diff --git a/.buildkite/ci_config.yaml b/.buildkite/ci_config.yaml
index b199e554a739..21ffa1b9b8d7 100644
--- a/.buildkite/ci_config.yaml
+++ b/.buildkite/ci_config.yaml
@@ -8,8 +8,9 @@ run_all_patterns:
   - "CMakeLists.txt"
   - "requirements/common.txt"
   - "requirements/cuda.txt"
-  - "requirements/build.txt"
-  - "requirements/test.txt"
+  - "requirements/kv_connectors.txt"
+  - "requirements/build/cuda.txt"
+  - "requirements/test/cuda.txt"
   - "setup.py"
   - "csrc/"
   - "cmake/"
diff --git a/.buildkite/ci_config_intel.yaml b/.buildkite/ci_config_intel.yaml
index 375be84a396a..a1c0091e0f10 100644
--- a/.buildkite/ci_config_intel.yaml
+++ b/.buildkite/ci_config_intel.yaml
@@ -6,8 +6,8 @@ run_all_patterns:
   - "CMakeLists.txt"
   - "requirements/common.txt"
   - "requirements/xpu.txt"
-  - "requirements/build.txt"
-  - "requirements/test.txt"
+  - "requirements/build/cuda.txt"
+  - "requirements/test/cuda.txt"
   - "setup.py"
   - "csrc/"
   - "cmake/"
diff --git a/.buildkite/hardware_tests/amd.yaml b/.buildkite/hardware_tests/amd.yaml
index 23a23723ad93..0c514647dc2b 100644
--- a/.buildkite/hardware_tests/amd.yaml
+++ b/.buildkite/hardware_tests/amd.yaml
@@ -20,11 +20,3 @@ steps:
     - docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
     env:
       DOCKER_BUILDKIT: "1"
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 1
-        - exit_status: -10  # Agent was lost
-          limit: 1
-        - exit_status: 1  # Machine occasionally fail
-          limit: 1
diff --git a/.buildkite/hardware_tests/cpu.yaml b/.buildkite/hardware_tests/cpu.yaml
index acca2b368858..19716bab6de5 100644
--- a/.buildkite/hardware_tests/cpu.yaml
+++ b/.buildkite/hardware_tests/cpu.yaml
@@ -12,13 +12,19 @@ steps:
   - vllm/_custom_ops.py
   - tests/kernels/attention/test_cpu_attn.py
   - tests/kernels/moe/test_cpu_fused_moe.py
+  - tests/kernels/moe/test_cpu_quant_fused_moe.py
   - tests/kernels/test_onednn.py
+  - tests/kernels/test_awq_int4_to_int8.py
+  - tests/kernels/quantization/test_cpu_fp8_scaled_mm.py
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
       pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
       pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
-      pytest -x -v -s tests/kernels/test_onednn.py"
+      pytest -x -v -s tests/kernels/moe/test_cpu_quant_fused_moe.py
+      pytest -x -v -s tests/kernels/test_onednn.py
+      pytest -x -v -s tests/kernels/test_awq_int4_to_int8.py
+      pytest -x -v -s tests/kernels/quantization/test_cpu_fp8_scaled_mm.py"
 
 - label: CPU-Compatibility Tests
   depends_on: []
@@ -44,10 +50,24 @@ steps:
   - tests/models/language/pooling/
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 40m "
       pytest -x -v -s tests/models/language/generation -m cpu_model
       pytest -x -v -s tests/models/language/pooling -m cpu_model"
 
+- label: CPU-ModelRunnerV2 Tests
+  depends_on: []
+  device: intel_cpu
+  no_plugin: true
+  soft_fail: true
+  source_file_dependencies:
+  - vllm/v1/worker/cpu/
+  - vllm/v1/worker/gpu/
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
+      uv pip install git+https://github.com/triton-lang/triton-cpu.git@270e696d
+      VLLM_USE_V2_MODEL_RUNNER=1 pytest -x -v -s tests/models/language/generation/test_granite.py -m cpu_model"
+
 - label: CPU-Quantization Model Tests
   depends_on: []
   device: intel_cpu
@@ -55,23 +75,24 @@ steps:
   source_file_dependencies:
   - csrc/cpu/
   - vllm/model_executor/layers/quantization/cpu_wna16.py
-  - vllm/model_executor/layers/quantization/gptq_marlin.py
+  - vllm/model_executor/layers/quantization/auto_gptq.py
   - vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
   - vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
   - vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py
+  - vllm/model_executor/layers/fused_moe/experts/cpu_moe.py
   - tests/quantization/test_compressed_tensors.py
   - tests/quantization/test_cpu_wna16.py
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
       pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs
       pytest -x -v -s tests/quantization/test_cpu_wna16.py"
       
-- label: CPU-Distributed Tests
+- label: CPU-Distributed Tests (PP+TP)
   depends_on: []
   device: intel_cpu
   no_plugin: true
-  source_file_dependencies:
+  source_file_dependencies: &cpu_distributed_deps
   - csrc/cpu/shm.cpp
   - vllm/v1/worker/cpu_worker.py
   - vllm/v1/worker/gpu_worker.py
@@ -80,10 +101,21 @@ steps:
   - vllm/platforms/cpu.py
   - vllm/distributed/parallel_state.py
   - vllm/distributed/device_communicators/cpu_communicator.py
+  - .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh tp_pp"
+
+- label: CPU-Distributed Tests (DP+TP)
+  depends_on: []
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies: *cpu_distributed_deps
   commands:
     - |
       bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
-      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh"
+      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh dp_tp"
 
 - label: CPU-Multi-Modal Model Tests %N
   depends_on: []
@@ -97,7 +129,7 @@ steps:
     - |
       bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
       pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
-  parallelism: 2
+  parallelism: 3
 
 - label: "Arm CPU Test"
   depends_on: []
diff --git a/.buildkite/hardware_tests/intel.yaml b/.buildkite/hardware_tests/intel.yaml
index ba0088b3af62..d70ce28428d4 100644
--- a/.buildkite/hardware_tests/intel.yaml
+++ b/.buildkite/hardware_tests/intel.yaml
@@ -8,10 +8,3 @@ steps:
     commands: 
     - bash .buildkite/scripts/hardware_ci/run-hpu-test.sh
 
-  - label: "Intel GPU Test"
-    depends_on: []
-    soft_fail: true
-    device: intel_gpu
-    no_plugin: true
-    commands: 
-    - bash .buildkite/scripts/hardware_ci/run-xpu-test.sh
diff --git a/.buildkite/image_build/image_build.sh b/.buildkite/image_build/image_build.sh
index 9131dfc71a0a..10c03c3e1773 100755
--- a/.buildkite/image_build/image_build.sh
+++ b/.buildkite/image_build/image_build.sh
@@ -92,8 +92,8 @@ check_and_skip_if_image_exists() {
 }
 
 ecr_login() {
-    aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
-    aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+    aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
+    aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com || true
 }
 
 prepare_cache_tags() {
@@ -192,6 +192,7 @@ export BUILDKITE_COMMIT
 export PARENT_COMMIT
 export IMAGE_TAG
 export IMAGE_TAG_LATEST
+export COMMIT="${COMMIT:-${BUILDKITE_COMMIT}}"
 export CACHE_FROM
 export CACHE_FROM_BASE_BRANCH
 export CACHE_FROM_MAIN
diff --git a/.buildkite/image_build/image_build.yaml b/.buildkite/image_build/image_build.yaml
index 42eaed7ddaa0..e0ef7d592424 100644
--- a/.buildkite/image_build/image_build.yaml
+++ b/.buildkite/image_build/image_build.yaml
@@ -6,6 +6,48 @@ steps:
     timeout_in_minutes: 600
     commands:
     - if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
+    # Non-root smoke 1: the default (root) image must still be importable
+    # under a non-root UID via `--user 2000:0`. Validates the `vllm` passwd
+    # entry + group-0-writable /home/vllm + uv path cleanup from #31959.
+    # Uses `import vllm` rather than `vllm serve --help` because the latter
+    # instantiates `VllmConfig` which requires a GPU attached to the
+    # container.
+    - docker run --rm --user 2000:0 --entrypoint python3 "$IMAGE_TAG" -c "import vllm; print(vllm.__version__)"
+    # Non-root smoke 2: assert the non-root enabling invariants are baked
+    # into the image. Runs as UID 2000:0 via a shell so we can verify
+    # filesystem perms + passwd/group file state + wrapper presence without
+    # triggering vLLM's GPU-requiring config-init path. The opt-in
+    # `vllm-openai-nonroot` target adds only `USER vllm`, `WORKDIR
+    # /home/vllm`, and an `ENTRYPOINT` override on top of these invariants;
+    # its build correctness is reviewed at the Dockerfile level. Wrapper
+    # logic is covered separately by the pre-commit hook
+    # `test-nonroot-entrypoint` (see .pre-commit-config.yaml).
+    - |
+      docker run --rm --user 2000:0 --entrypoint /bin/sh "$IMAGE_TAG" -ec '
+        if ! getent passwd 2000 | grep -q ^vllm:; then
+          echo FAIL: UID 2000 != vllm
+          exit 1
+        fi
+        if ! id -gn 2>/dev/null | grep -qx root; then
+          echo FAIL: GID 0 not root group
+          exit 1
+        fi
+        touch /home/vllm/.smoke && rm /home/vllm/.smoke
+        touch /opt/uv/cache/.smoke && rm /opt/uv/cache/.smoke
+        if ! test -x /usr/local/bin/vllm-nonroot-entrypoint.sh; then
+          echo FAIL: wrapper missing
+          exit 1
+        fi
+        if ! test -w /etc/passwd; then
+          echo FAIL: /etc/passwd not group-writable
+          exit 1
+        fi
+        if ! test -w /etc/group; then
+          echo FAIL: /etc/group not group-writable
+          exit 1
+        fi
+        echo non-root invariants OK
+      '
     retry:
       automatic:
         - exit_status: -1  # Agent was lost
diff --git a/.buildkite/image_build/image_build_cpu.sh b/.buildkite/image_build/image_build_cpu.sh
index ccfe155fa2b7..035f070ab891 100755
--- a/.buildkite/image_build/image_build_cpu.sh
+++ b/.buildkite/image_build/image_build_cpu.sh
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
diff --git a/.buildkite/image_build/image_build_cpu_arm64.sh b/.buildkite/image_build/image_build_cpu_arm64.sh
index ff3d11c8d599..b561e2c2e463 100755
--- a/.buildkite/image_build/image_build_cpu_arm64.sh
+++ b/.buildkite/image_build/image_build_cpu_arm64.sh
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-arm64-cpu) ]]; then
diff --git a/.buildkite/image_build/image_build_hpu.sh b/.buildkite/image_build/image_build_hpu.sh
index 60fa1789fa06..df900dc60342 100755
--- a/.buildkite/image_build/image_build_hpu.sh
+++ b/.buildkite/image_build/image_build_hpu.sh
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu) ]]; then
diff --git a/.buildkite/image_build/image_build_torch_nightly.sh b/.buildkite/image_build/image_build_torch_nightly.sh
new file mode 100755
index 000000000000..cbd08aa7bd0b
--- /dev/null
+++ b/.buildkite/image_build/image_build_torch_nightly.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+set -euo pipefail
+
+# Build a vLLM test image with PyTorch nightly installed.
+# Called by the pipeline generator's "vLLM Against PyTorch Nightly" group.
+
+if [[ $# -lt 5 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit> <branch> <image_tag>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+BRANCH=$4
+IMAGE_TAG=$5
+
+# --- Arguments ---
+echo "--- :mag: Arguments"
+echo "REGISTRY: ${REGISTRY}"
+echo "REPO: ${REPO}"
+echo "BUILDKITE_COMMIT: ${BUILDKITE_COMMIT}"
+echo "BRANCH: ${BRANCH}"
+echo "IMAGE_TAG: ${IMAGE_TAG}"
+
+# --- ECR login ---
+echo "--- :key: ECR login"
+aws ecr-public get-login-password --region us-east-1 \
+  | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr get-login-password --region us-east-1 \
+  | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+
+# --- Set up buildx ---
+echo "--- :docker: Setting up buildx"
+docker buildx create --name vllm-builder --driver docker-container --use || true
+docker buildx inspect --bootstrap
+docker buildx ls
+
+# --- Skip if image already exists ---
+echo "--- :mag: Checking if image already exists"
+if docker manifest inspect "$IMAGE_TAG" >/dev/null 2>&1; then
+  echo "Image found: $IMAGE_TAG — skipping build"
+  exit 0
+fi
+echo "Image not found, proceeding with build..."
+
+# --- CUDA 13.0 for nightly builds ---
+# Nightly CI uses CUDA 13.0 while regular CI stays on CUDA 12.9
+NIGHTLY_CUDA_VERSION="13.0.2"
+NIGHTLY_BUILD_BASE_IMAGE="nvidia/cuda:${NIGHTLY_CUDA_VERSION}-devel-ubuntu22.04"
+NIGHTLY_FINAL_BASE_IMAGE="nvidia/cuda:${NIGHTLY_CUDA_VERSION}-base-ubuntu22.04"
+
+echo "--- :docker: Building torch nightly image (CUDA ${NIGHTLY_CUDA_VERSION})"
+docker buildx build --file docker/Dockerfile \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --build-arg USE_SCCACHE=1 \
+  --build-arg PYTORCH_NIGHTLY=1 \
+  --build-arg CUDA_VERSION="${NIGHTLY_CUDA_VERSION}" \
+  --build-arg BUILD_BASE_IMAGE="${NIGHTLY_BUILD_BASE_IMAGE}" \
+  --build-arg FINAL_BASE_IMAGE="${NIGHTLY_FINAL_BASE_IMAGE}" \
+  --build-arg torch_cuda_arch_list="8.0 8.9 9.0 10.0 12.0" \
+  --tag "$IMAGE_TAG" \
+  --push \
+  --target test \
+  --progress plain .
+
+echo "--- :white_check_mark: Torch nightly image build complete: $IMAGE_TAG"
diff --git a/.buildkite/image_build/image_build_xpu.sh b/.buildkite/image_build/image_build_xpu.sh
index c3734dce13ca..45417b7339be 100755
--- a/.buildkite/image_build/image_build_xpu.sh
+++ b/.buildkite/image_build/image_build_xpu.sh
@@ -11,8 +11,8 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
-aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
+aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com || true
 
 # skip build if image already exists
 if ! docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-xpu &> /dev/null; then
diff --git a/.buildkite/intel_jobs/engine_intel.yaml b/.buildkite/intel_jobs/engine_intel.yaml
new file mode 100644
index 000000000000..c66576d40991
--- /dev/null
+++ b/.buildkite/intel_jobs/engine_intel.yaml
@@ -0,0 +1,21 @@
+group: Engine Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: Engine (1 GPU)
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/v1/engine/
+    - tests/v1/engine/
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py'
diff --git a/.buildkite/intel_jobs/kernels_intel.yaml b/.buildkite/intel_jobs/kernels_intel.yaml
new file mode 100644
index 000000000000..66a8db25f02e
--- /dev/null
+++ b/.buildkite/intel_jobs/kernels_intel.yaml
@@ -0,0 +1,21 @@
+group: Kernels Intel
+depends_on: 
+  - image-build-xpu
+steps:
+- label: vLLM IR Tests
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/ir
+    - vllm/kernels
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      pytest -v -s kernels/ir'
diff --git a/.buildkite/intel_jobs/lora_intel.yaml b/.buildkite/intel_jobs/lora_intel.yaml
new file mode 100644
index 000000000000..32a56ef59b3f
--- /dev/null
+++ b/.buildkite/intel_jobs/lora_intel.yaml
@@ -0,0 +1,135 @@
+group: LoRA Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: LoRA Runtime + Utils
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      pytest -v -s lora/test_layers.py &&
+      pytest -v -s lora/test_lora_checkpoints.py &&
+      pytest -v -s lora/test_lora_functions.py &&
+      pytest -v -s lora/test_lora_huggingface.py &&
+      pytest -v -s lora/test_lora_manager.py &&
+      pytest -v -s lora/test_lora_utils.py &&
+      pytest -v -s lora/test_peft_helper.py &&
+      pytest -v -s lora/test_resolver.py &&
+      pytest -v -s lora/test_utils.py &&
+      pytest -v -s lora/test_add_lora.py  &&
+      pytest -v -s lora/test_worker.py'
+
+- label: LoRA Fused/MoE Kernels
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      pytest -v -s lora/test_fused_moe_lora_kernel.py && 
+      pytest -v -s lora/test_moe_lora_align_sum.py --deselect="tests/lora/test_moe_lora_align_sum.py::test_moe_lora_align_block_size_mixed_base_and_lora[1]"'
+
+- label: LoRA Punica Kernels
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      set -o pipefail &&
+      pytest -v -s lora/test_punica_ops.py --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype0-3-43264-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype1-1-2049-64-128-16]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-1-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-1-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-256-8-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype0-3-2049-128-8-16]" --deselect="tests/lora/test_punica_ops.py::test_kernels[shrink-0-xpu:0-dtype0-1-2049-128-8-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels[expand-0-xpu:0-dtype1-1-2049-256-128-32]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-3-64256-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-2-29696-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype1-3-49408-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[shrink-0-xpu:0-dtype0-2-16384-32-4-4]" --deselect="tests/lora/test_punica_ops.py::test_kernels_hidden_size[expand-0-xpu:0-dtype0-2-51328-32-4-4]"'
+
+- label: LoRA Punica FP8/XPU Ops
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      pytest -v -s lora/test_punica_ops_fp8.py &&
+      pytest -v -s lora/test_punica_xpu_ops.py'
+
+- label: LoRA Models
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      (pytest -v -s lora/test_mixtral.py --deselect="tests/lora/test_mixtral.py::test_mixtral_lora[4]" || true) &&
+      pytest -v -s lora/test_quant_model.py --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model0]" --deselect="tests/lora/test_quant_model.py::test_quant_model_lora[model1]" --deselect="tests/lora/test_quant_model.py::test_quant_model_tp_equality[model0]" &&
+      pytest -v -s lora/test_transformers_model.py &&
+      pytest -v -s lora/test_chatglm3_tp.py &&
+      pytest -s -v lora/test_minicpmv_tp.py'
+
+- label: LoRA Multimodal
+  timeout_in_minutes: 45
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/lora
+    - tests/lora
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'cd tests &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      pytest -v -s lora/test_default_mm_loras.py && 
+      pytest -v -s lora/test_whisper.py'
diff --git a/.buildkite/intel_jobs/misc_intel.yaml b/.buildkite/intel_jobs/misc_intel.yaml
new file mode 100644
index 000000000000..864128bb5338
--- /dev/null
+++ b/.buildkite/intel_jobs/misc_intel.yaml
@@ -0,0 +1,55 @@
+group: Miscellaneous Intel
+depends_on:
+  - image-build-xpu
+steps:
+- label: V1 Core + KV + Metrics
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/core
+    - tests/v1/executor
+    - tests/v1/kv_offload
+    - tests/v1/worker
+    - tests/v1/kv_connector/unit
+    - tests/v1/metrics
+    - tests/entrypoints/openai/correctness/test_lmeval.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'pip install -r requirements/kv_connectors.txt &&
+      export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      cd tests &&
+      pytest -v -s v1/executor'
+
+- label: V1 Sample + Logits
+  timeout_in_minutes: 30
+  device: intel_gpu
+  no_plugin: true
+  working_dir: "."
+  env:
+    REGISTRY: "public.ecr.aws/q9t5s3a7"
+    REPO: "vllm-ci-test-repo"
+    VLLM_TEST_DEVICE: "xpu"
+  source_file_dependencies:
+    - vllm/
+    - tests/v1/sample
+    - tests/v1/logits_processors
+    - tests/v1/test_oracle.py
+    - tests/v1/test_request.py
+    - tests/v1/test_outputs.py
+  commands:
+    - >-
+      bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+      'export VLLM_WORKER_MULTIPROC_METHOD=spawn &&
+      cd tests &&
+      pytest -v -s v1/logits_processors --ignore=v1/logits_processors/test_custom_online.py --ignore=v1/logits_processors/test_custom_offline.py &&
+      pytest -v -s v1/test_oracle.py &&
+      pytest -v -s v1/test_request.py &&
+      pytest -v -s v1/test_outputs.py'
diff --git a/.buildkite/intel_jobs/test-intel.yaml b/.buildkite/intel_jobs/test-intel.yaml
index 3aa75f4754f9..805b7e54f120 100644
--- a/.buildkite/intel_jobs/test-intel.yaml
+++ b/.buildkite/intel_jobs/test-intel.yaml
@@ -35,9 +35,13 @@ steps:
         python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp &&
         python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN &&
         python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8 &&
+        python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --kv-cache-dtype fp8 &&
+        python3 examples/basic/offline_inference/generate.py --model nvidia/Llama-3.1-8B-Instruct-FP8 --block-size 64 --enforce-eager --quantization modelopt --kv-cache-dtype fp8 --attention-backend TRITON_ATTN --max-model-len 4096 &&
         python3 examples/basic/offline_inference/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --block-size 64 --enforce-eager --max-model-len 8192 &&
         python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 &&
-        python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel'
+        python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel &&
+        python3 examples/basic/offline_inference/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --max-model-len 8192
+        '
   - label: "XPU V1 test"
     depends_on:
       - image-build-xpu
@@ -56,9 +60,28 @@ steps:
         'cd tests &&
         pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py --ignore=v1/core/test_scheduler_e2e.py &&
         pytest -v -s v1/engine --ignore=v1/engine/test_output_processor.py &&
-        pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py &&
+        pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py -k "not test_topk_only and not test_topp_only and not test_topk_and_topp" &&
         pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py --ignore=v1/worker/test_worker_memory_snapshot.py &&
         pytest -v -s v1/structured_output &&
         pytest -v -s v1/test_serial_utils.py &&
-        pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py &&
-        pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py'
+        pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py &&
+        pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py --ignore=v1/kv_connector/unit/test_hf3fs_client.py --ignore=v1/kv_connector/unit/test_hf3fs_connector.py --ignore=v1/kv_connector/unit/test_hf3fs_metadata_server.py --ignore=v1/kv_connector/unit/test_offloading_connector.py'
+  - label: "XPU server test"
+    depends_on:
+      - image-build-xpu
+    timeout_in_minutes: 30
+    device: intel_gpu
+    no_plugin: true
+    env:
+      REGISTRY: "public.ecr.aws/q9t5s3a7"
+      REPO: "vllm-ci-test-repo"
+    source_file_dependencies:
+      - vllm/
+      - .buildkite/intel_jobs/test-intel.yaml 
+    commands:
+      - >-
+        bash .buildkite/scripts/hardware_ci/run-intel-test.sh
+        'pip install av &&
+        cd tests &&
+        pytest -v -s entrypoints/openai/chat_completion/test_audio_in_video.py &&
+        pytest -v -s benchmarks/test_serve_cli.py'
diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
index 6c0b5540cbb6..9a5af8540118 100644
--- a/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
+++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
@@ -1,6 +1,9 @@
 # For hf script, without -t option (tensor parallel size).
 # bash .buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -l 250 -t 8 -f 5
 model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
+required_gpu_arch:
+  - gfx942
+  - gfx950
 tasks:
 - name: "mmlu_pro"
   metrics:
diff --git a/.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml b/.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
index aa4fb9fa03d6..ff43fa187b0e 100644
--- a/.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
+++ b/.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
@@ -1,6 +1,9 @@
 # For vllm script, with -t option (tensor parallel size)
 # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -l 1319 -t 1
 model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
+required_gpu_arch:
+  - gfx942
+  - gfx950
 tasks:
 - name: "gsm8k"
   metrics:
diff --git a/.buildkite/lm-eval-harness/configs/Qwen3-235B-A22B-Instruct-2507-FP8.yaml b/.buildkite/lm-eval-harness/configs/Qwen3-235B-A22B-Instruct-2507-FP8.yaml
index 514c15d6098e..84e4f3fe3349 100644
--- a/.buildkite/lm-eval-harness/configs/Qwen3-235B-A22B-Instruct-2507-FP8.yaml
+++ b/.buildkite/lm-eval-harness/configs/Qwen3-235B-A22B-Instruct-2507-FP8.yaml
@@ -1,4 +1,7 @@
 model_name: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
+required_gpu_arch:
+  - gfx942
+  - gfx950
 tasks:
   - name: "mmlu_pro"
     metrics:
diff --git a/.buildkite/lm-eval-harness/configs/models-small-rocm.txt b/.buildkite/lm-eval-harness/configs/models-small-rocm.txt
index a3bb95e19e24..36e0543879b3 100644
--- a/.buildkite/lm-eval-harness/configs/models-small-rocm.txt
+++ b/.buildkite/lm-eval-harness/configs/models-small-rocm.txt
@@ -1,5 +1,6 @@
 Qwen2.5-1.5B-Instruct.yaml
 Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
+Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
 Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
 Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
 Qwen1.5-MoE-W4A16-compressed-tensors.yaml
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
index 518af9a66018..b495c0d123a6 100755
--- a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
@@ -2,7 +2,7 @@
 # We can use this script to compute baseline accuracy on chartqa for vllm.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install "lm-eval[api]>=0.4.11"
+#   pip install "lm-eval[api]>=0.4.12"
 
 usage() {
     echo``
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
index f010ffe6752d..e430e6183b2d 100755
--- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
@@ -2,7 +2,7 @@
 # We can use this script to compute baseline accuracy on GSM for transformers.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install "lm-eval[api]>=0.4.11"
+#   pip install "lm-eval[api]>=0.4.12"
 
 usage() {
     echo``
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
index fec4a94e63e4..f1a541ddbefc 100644
--- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
@@ -3,7 +3,7 @@
 # We use this for fp8, which HF does not support.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install "lm-eval[api]>=0.4.11"
+#   pip install "lm-eval[api]>=0.4.12"
 
 usage() {
     echo``
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
index e3c6e16bd6b3..ba8da9fc3f55 100644
--- a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
@@ -3,7 +3,7 @@
 # We use this for fp8, which HF does not support.
 #
 # Make sure you have lm-eval-harness installed:
-#   pip install "lm-eval[api]>=0.4.11"
+#   pip install "lm-eval[api]>=0.4.12"
 
 usage() {
     echo``
diff --git a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
index fad5f593be4f..d34e603b9e26 100644
--- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
+++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@@ -13,6 +13,7 @@
 from contextlib import contextmanager
 
 import lm_eval
+import pytest
 import yaml
 
 from vllm.platforms import current_platform
@@ -89,9 +90,40 @@ def launch_lm_eval(eval_config, tp_size):
     return results
 
 
+def _check_rocm_gpu_arch_requirement(eval_config):
+    """Skip the test if the model requires a ROCm GPU arch not present.
+
+    Model YAML configs can specify::
+
+        required_gpu_arch:
+          - gfx942
+          - gfx950
+
+    The check only applies on ROCm.  On other platforms (e.g. CUDA) the
+    field is ignored so that shared config files work for both NVIDIA and
+    AMD CI pipelines.
+    """
+    required_archs = eval_config.get("required_gpu_arch")
+    if not required_archs:
+        return
+
+    if not current_platform.is_rocm():
+        return
+
+    from vllm.platforms.rocm import _GCN_ARCH  # noqa: E402
+
+    if not any(arch in _GCN_ARCH for arch in required_archs):
+        pytest.skip(
+            f"Model requires GPU arch {required_archs}, "
+            f"but detected arch is '{_GCN_ARCH}'"
+        )
+
+
 def test_lm_eval_correctness_param(config_filename, tp_size):
     eval_config = yaml.safe_load(config_filename.read_text(encoding="utf-8"))
 
+    _check_rocm_gpu_arch_requirement(eval_config)
+
     results = launch_lm_eval(eval_config, tp_size)
 
     rtol = eval_config.get("rtol", DEFAULT_RTOL)
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json b/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
index 63f1f8ab887b..9f226ef2f819 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-arm64-cpu.json
@@ -36,6 +36,7 @@
       "model": "meta-llama/Llama-3.1-8B-Instruct",
       "backend": "vllm",
       "ignore-eos": "",
+      "temperature": 0,
       "num_prompts": 200
     }
   },
@@ -127,4 +128,4 @@
       }
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
index f0dc3d5ec067..30879b5e9dc5 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-asr.json
@@ -22,6 +22,7 @@
       "hf_split": "test",
       "no_stream": "",
       "no_oversample": "",
+      "temperature": 0,
       "num_prompts": 200
     }
   },
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
index 0411b04e1bd5..34c2cc82d395 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu-text.json
@@ -26,34 +26,14 @@
       "model": "meta-llama/Llama-3.1-8B-Instruct",
       "backend": "vllm",
       "ignore-eos": "",
+      "temperature": 0,
       "num_prompts": 200
     }
   },
   "tests": [
-    {
-      "test_name": "serving_llama8B_tp1_sharegpt",
-      "server_parameters": {
-        "tensor_parallel_size": 1
-      },
-      "client_parameters": {
-        "dataset_name": "sharegpt",
-        "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json"
-      }
-    },
-    {
-      "test_name": "serving_llama8B_tp2_sharegpt",
-      "server_parameters": {
-        "tensor_parallel_size": 2
-      },
-      "client_parameters": {
-        "dataset_name": "sharegpt",
-        "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json"
-      }
-    },
     {
       "test_name": "serving_llama8B_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 1
       },
       "client_parameters": {
         "dataset_name": "random",
@@ -62,290 +42,244 @@
       }
     },
     {
-      "test_name": "serving_llama8B_tp2_random_128_128",
+      "test_name": "serving_llama8B_int4_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 2
+        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
       },
       "client_parameters": {
+        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp4_random_128_128",
+      "test_name": "serving_llama8B_int8_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 4
+        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8"
       },
       "client_parameters": {
+        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp1_random_128_2048",
-      "server_parameters": {
-        "tensor_parallel_size": 1
-      },
-      "client_parameters": {
-        "dataset_name": "random",
-        "random-input-len": 128,
-        "random-output-len": 2048
-      }
-    },
-    {
-      "test_name": "serving_llama8B_tp2_random_128_2048",
+      "test_name": "serving_llama1B_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 2
+        "model": "meta-llama/Llama-3.2-1B"
       },
       "client_parameters": {
+        "model": "meta-llama/Llama-3.2-1B",
         "dataset_name": "random",
         "random-input-len": 128,
-        "random-output-len": 2048
+        "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp4_random_128_2048",
+      "test_name": "serving_llama3B_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 4
+        "model": "meta-llama/Llama-3.2-3B-Instruct"
       },
       "client_parameters": {
+        "model": "meta-llama/Llama-3.2-3B-Instruct",
         "dataset_name": "random",
         "random-input-len": 128,
-        "random-output-len": 2048
-      }
-    },
-    {
-      "test_name": "serving_llama8B_tp1_random_2048_128",
-      "server_parameters": {
-        "tensor_parallel_size": 1
-      },
-      "client_parameters": {
-        "dataset_name": "random",
-        "random-input-len": 2048,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp2_random_2048_128",
+      "test_name": "serving_llama70B_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 2
+        "model": "meta-llama/Llama-3.3-70B-Instruct"
       },
       "client_parameters": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct",
         "dataset_name": "random",
-        "random-input-len": 2048,
+        "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp4_random_2048_128",
+      "test_name": "serving_granite2B_tp1_random_128_128",
       "server_parameters": {
-        "tensor_parallel_size": 4
+        "model": "ibm-granite/granite-3.2-2b-instruct"
       },
       "client_parameters": {
+        "model": "ibm-granite/granite-3.2-2b-instruct",
         "dataset_name": "random",
-        "random-input-len": 2048,
+        "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_tp1_random_2048_2048",
-      "server_parameters": {
-        "tensor_parallel_size": 1
-      },
-      "client_parameters": {
-        "dataset_name": "random",
-        "random-input-len": 2048,
-        "random-output-len": 2048
-      }
-    },
-    {
-      "test_name": "serving_llama8B_tp2_random_2048_2048",
-      "server_parameters": {
-        "tensor_parallel_size": 2
-      },
-      "client_parameters": {
-        "dataset_name": "random",
-        "random-input-len": 2048,
-        "random-output-len": 2048
-      }
-    },
-    {
-      "test_name": "serving_llama8B_tp4_random_2048_2048",
-      "server_parameters": {
-        "tensor_parallel_size": 4
-      },
-      "client_parameters": {
-        "dataset_name": "random",
-        "random-input-len": 2048,
-        "random-output-len": 2048
-      }
-    },
-    {
-      "test_name": "serving_llama8B_int4_tp1_random_128_128",
+      "test_name": "serving_qwen1.7B_tp1_random_128_128",
       "server_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
-        "tensor_parallel_size": 1
+        "model": "Qwen/Qwen3-1.7B"
       },
       "client_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
+        "model": "Qwen/Qwen3-1.7B",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_int4_tp2_random_128_128",
+      "test_name": "serving_qwen4B_tp1_random_128_128",
       "server_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
-        "tensor_parallel_size": 2
+        "model": "Qwen/Qwen3-4B"
       },
       "client_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
+        "model": "Qwen/Qwen3-4B",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_int4_tp4_random_128_128",
+      "test_name": "serving_qwen8B_tp1_random_128_128",
       "server_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
-        "tensor_parallel_size": 4
+        "model": "Qwen/Qwen3-8B"
       },
       "client_parameters": {
-        "model": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
+        "model": "Qwen/Qwen3-8B",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_int8_tp1_random_128_128",
+      "test_name": "serving_qwen14B_tp1_random_128_128",
       "server_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
-        "tensor_parallel_size": 1
+        "model": "Qwen/Qwen3-14B"
       },
       "client_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model": "Qwen/Qwen3-14B",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_int8_tp2_random_128_128",
+      "test_name": "serving_qwen30B_tp1_random_128_128",
       "server_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
-        "tensor_parallel_size": 2
+        "model": "Qwen/Qwen3-30B-A3B"
       },
       "client_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model": "Qwen/Qwen3-30B-A3B",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama8B_int8_tp4_random_128_128",
+      "test_name": "serving_glm9B_tp1_random_128_128",
       "server_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
-        "tensor_parallel_size": 4
+        "model": "zai-org/glm-4-9b-hf"
       },
       "client_parameters": {
-        "model": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model": "zai-org/glm-4-9b-hf",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_llama3B_tp1_random_128_128",
+      "test_name": "serving_gemma7B_tp1_random_128_128",
       "server_parameters": {
-        "model": "meta-llama/Llama-3.2-3B-Instruct",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-7b"
       },
       "client_parameters": {
-        "model": "meta-llama/Llama-3.2-3B-Instruct",
+        "model": "google/gemma-7b",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_granite2B_tp1_random_128_128",
+      "test_name": "serving_gemma3-4b_tp1_random_128_128",
+      "server_environment_variables": {
+	"VLLM_CPU_SGL_KERNEL": 0
+      },
       "server_parameters": {
-        "model": "ibm-granite/granite-3.2-2b-instruct",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-3-4b-it"
       },
       "client_parameters": {
-        "model": "ibm-granite/granite-3.2-2b-instruct",
+        "model": "google/gemma-3-4b-it",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_qwen1.7B_tp1_random_128_128",
+      "test_name": "serving_gemma3-12b_tp1_random_128_128",
+      "server_environment_variables": {
+	"VLLM_CPU_SGL_KERNEL": 0
+      },
       "server_parameters": {
-        "model": "Qwen/Qwen3-1.7B",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-3-12b-it"
       },
       "client_parameters": {
-        "model": "Qwen/Qwen3-1.7B",
+        "model": "google/gemma-3-12b-it",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_qwen4B_tp1_random_128_128",
+      "test_name": "serving_gemma4-4b_tp1_random_128_128",
+      "server_environment_variables": {
+	"VLLM_CPU_SGL_KERNEL": 0
+      },
       "server_parameters": {
-        "model": "Qwen/Qwen3-4B",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-4-E4B-it"
       },
       "client_parameters": {
-        "model": "Qwen/Qwen3-4B",
+        "model": "google/gemma-4-E4B-it",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_qwen8B_tp1_random_128_128",
+      "test_name": "serving_gemma4-2b_tp1_random_128_128",
+      "server_environment_variables": {
+	"VLLM_CPU_SGL_KERNEL": 0
+      },
       "server_parameters": {
-        "model": "Qwen/Qwen3-8B",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-4-E2B-it"
       },
       "client_parameters": {
-        "model": "Qwen/Qwen3-8B",
+        "model": "google/gemma-4-E2B-it",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_glm9B_tp1_random_128_128",
+      "test_name": "serving_gemma4-26b_tp1_random_128_128",
+      "server_environment_variables": {
+	"VLLM_CPU_SGL_KERNEL": 0,
+        "VLLM_CPU_ATTN_SPLIT_KV": 0
+      },
       "server_parameters": {
-        "model": "zai-org/glm-4-9b-hf",
-        "tensor_parallel_size": 1
+        "model": "google/gemma-4-26B-A4B-it"
       },
       "client_parameters": {
-        "model": "zai-org/glm-4-9b-hf",
+        "model": "google/gemma-4-26B-A4B-it",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
       }
     },
     {
-      "test_name": "serving_gemma7B_tp1_random_128_128",
+      "test_name": "serving_phi4_tp1_random_128_128",
       "server_parameters": {
-        "model": "google/gemma-7b",
-        "tensor_parallel_size": 1
+        "model": "microsoft/Phi-4-reasoning"
       },
       "client_parameters": {
-        "model": "google/gemma-7b",
+        "model": "microsoft/Phi-4-reasoning",
         "dataset_name": "random",
         "random-input-len": 128,
         "random-output-len": 128
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json b/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
index f66ef2af4bd6..c2d7768e2026 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-cpu.json
@@ -26,6 +26,7 @@
       "model": "meta-llama/Llama-3.1-8B-Instruct",
       "backend": "vllm",
       "ignore-eos": "",
+      "temperature": 0,
       "num_prompts": 200
     }
   },
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests-hpu.json b/.buildkite/performance-benchmarks/tests/serving-tests-hpu.json
index 3929aa5fbbe0..d5ef981689dd 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests-hpu.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests-hpu.json
@@ -21,6 +21,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     },
@@ -47,6 +48,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     },
@@ -73,6 +75,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     },
@@ -100,6 +103,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     },
@@ -127,6 +131,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     },
@@ -151,6 +156,7 @@
             "backend": "vllm",
             "dataset_name": "sharegpt",
             "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "temperature": 0,
             "num_prompts": 200
         }
     }
diff --git a/.buildkite/performance-benchmarks/tests/serving-tests.json b/.buildkite/performance-benchmarks/tests/serving-tests.json
index 66d52abc1206..2cbd472295e7 100644
--- a/.buildkite/performance-benchmarks/tests/serving-tests.json
+++ b/.buildkite/performance-benchmarks/tests/serving-tests.json
@@ -1,73 +1,112 @@
-[
+{
+  "defaults": {
+    "qps_list": [
+      "inf"
+    ],
+    "max_concurrency_list": [12, 16, 24, 32, 64, 128, 200],
+    "server_parameters": {
+      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "tensor_parallel_size": 1,
+      "dtype": "bfloat16"
+    },
+    "client_parameters": {
+      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "backend": "vllm",
+      "ignore-eos": "",
+      "temperature": 0,
+      "num_prompts": 200
+    }
+  },
+  "tests": [
+    {
+      "test_name": "serving_llama8B_tp1_sharegpt",
+      "server_parameters": {
+        "tensor_parallel_size": 1
+      },
+      "client_parameters": {
+        "dataset_name": "sharegpt",
+        "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json"
+      }
+    },
+    {
+        "dataset_name": "sharegpt",
+        "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json"
+      }
+    },
+    {
+      "test_name": "serving_llama8B_tp1_random_128_128",
+      "server_parameters": {
+        "tensor_parallel_size": 1
+      },
+      "client_parameters": {
+        "dataset_name": "random",
+        "random-input-len": 128,
+        "random-output-len": 128
+      }
+    },
+    {
+      "test_name": "serving_llama8B_tp1_random_128_2048",
+      "server_parameters": {
+        "tensor_parallel_size": 1
+      },
+      "client_parameters": {
+        "dataset_name": "random",
+        "random-input-len": 128,
+        "random-output-len": 2048
+      }
+    },
     {
-        "test_name": "serving_llama8B_tp1_sharegpt",
-        "qps_list": [1, 4, 16, "inf"],
-        "server_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-            "tensor_parallel_size": 1,
-            "disable_log_stats": "",
-            "load_format": "dummy"
-        },
-        "client_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-            "backend": "vllm",
-            "dataset_name": "sharegpt",
-            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
-            "num_prompts": 200
-        }
+      "test_name": "serving_llama8B_tp1_random_2048_128",
+      "server_parameters": {
+        "tensor_parallel_size": 1
+      },
+      "client_parameters": {
+        "dataset_name": "random",
+        "random-input-len": 2048,
+        "random-output-len": 128
+      }
     },
     {
-        "test_name": "serving_llama70B_tp4_sharegpt",
-        "qps_list": [1, 4, 16, "inf"],
-        "server_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-            "tensor_parallel_size": 4,
-            "disable_log_stats": "",
-            "load_format": "dummy"
-        },
-        "client_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-            "backend": "vllm",
-            "dataset_name": "sharegpt",
-            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
-            "num_prompts": 200
-        }
+      "test_name": "serving_llama8B_tp1_random_2048_2048",
+      "server_parameters": {
+        "tensor_parallel_size": 1
+      },
+      "client_parameters": {
+        "dataset_name": "random",
+        "random-input-len": 2048,
+        "random-output-len": 2048
+      }
     },
     {
-        "test_name": "serving_mixtral8x7B_tp2_sharegpt",
-        "qps_list": [1, 4, 16, "inf"],
-        "server_parameters": {
-            "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-            "tensor_parallel_size": 2,
-            "disable_log_stats": "",
-            "load_format": "dummy"
-        },
-        "client_parameters": {
-            "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-            "backend": "vllm",
-            "dataset_name": "sharegpt",
-            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
-            "num_prompts": 200
-        }
+      "test_name": "serving_llama70B_tp4_random_128_128",
+      "server_parameters": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct",
+        "async_scheduling": "",
+	"no_enable_prefix_caching": "",
+        "max_num_batched_tokens": 8192
+      },
+      "client_parameters": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct",
+        "dataset_name": "random",
+        "random-input-len": 128,
+        "random-output-len": 128
+      }
     },
     {
-        "test_name": "serving_llama70B_tp4_sharegpt_specdecode",
-        "qps_list": [2],
-        "server_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", 
-            "tensor_parallel_size": 4,
-            "speculative_config": {
-                "model": "turboderp/Qwama-0.5B-Instruct",
-                "num_speculative_tokens": 4,
-                "draft_tensor_parallel_size": 1
-            }
-        },
-        "client_parameters": {
-            "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-            "backend": "vllm",
-            "dataset_name": "sharegpt",
-            "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
-            "num_prompts": 200 
-        }
+      "test_name": "serving_gemma4-e4b_tp1_random_128_128",
+      "server_parameters": {
+        "model": "google/gemma-4-E4B-it",
+	"enable_auto_tool_choice": "",
+	"tool_call_parser": "gemma4",
+	"chat_template": "examples/tool_chat_template_gemma4.jinja",
+	"reasoning_parser": "gemma4"
+      },
+      "client_parameters": {
+        "model": "google/gemma-4-E4B-it",
+        "dataset_name": "random",
+        "random-input-len": 128,
+        "random-output-len": 128
+      }
     }
-]
+  ]
+}
diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index 45b2996f7ead..df9b80f7f9a8 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -1,3 +1,16 @@
+# CUDA architecture lists — following PyTorch RELEASE.md
+# (https://github.com/pytorch/pytorch/blob/main/RELEASE.md)
+# SM86 included for broader Ampere coverage; SM89 for marlin fp8 support
+env:
+  CUDA_ARCH_X86: "7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
+  # aarch64 only architectures: 8.7 for Orin, 11.0 for Thor (since CUDA 13)
+  CUDA_ARCH_AARCH64: "8.0 8.7 8.9 9.0 10.0 11.0 12.0+PTX"
+  CUDA_ARCH_X86_CU129: "7.5 8.0 8.6 8.9 9.0 10.0 12.0"
+  CUDA_ARCH_AARCH64_CU129: "8.0 8.7 8.9 9.0 10.0 12.0"
+  MOONCAKE_WHEEL_AARCH64_2_35: "https://vllm-wheels.s3.amazonaws.com/mooncake/mooncake_transfer_engine-0.3.10.post2-0da9dfea3-cp312-cp312-manylinux_2_35_aarch64.whl"
+  MOONCAKE_WHEEL_AARCH64_2_39: "https://vllm-wheels.s3.amazonaws.com/mooncake/mooncake_transfer_engine-0.3.10.post2-0da9dfea3-cp312-cp312-manylinux_2_39_aarch64.whl"
+  MOONCAKE_WHEEL_X86_64: "https://vllm-wheels.s3.amazonaws.com/mooncake/mooncake_transfer_engine-0.3.10.post2-0da9dfea3-cp312-cp312-manylinux_2_35_x86_64.whl"
+
 steps:
   - input: "Provide Release version here"
     id: input-release-version
@@ -14,12 +27,11 @@ steps:
         agents:
           queue: arm64_cpu_queue_release
         commands:
-          # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
-          # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
+          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list=\"${CUDA_ARCH_AARCH64_CU129}\" --build-arg BUILD_OS=manylinux --build-arg BUILD_BASE_IMAGE=pytorch/manylinuxaarch64-builder:cuda12.9 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
           - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -29,12 +41,11 @@ steps:
         agents:
           queue: arm64_cpu_queue_release
         commands:
-          # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
-          # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04  --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
+          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.2 --build-arg torch_cuda_arch_list=\"${CUDA_ARCH_AARCH64}\" --build-arg BUILD_OS=manylinux --build-arg BUILD_BASE_IMAGE=pytorch/manylinuxaarch64-builder:cuda13.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
-          - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35"
+          - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -47,7 +58,8 @@ steps:
           - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
-          - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35"
+          - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -57,10 +69,11 @@ steps:
         agents:
           queue: cpu_queue_release
         commands:
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
+          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list=\"${CUDA_ARCH_X86_CU129}\" --build-arg BUILD_OS=manylinux --build-arg BUILD_BASE_IMAGE=pytorch/manylinux2_28-builder:cuda12.9 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
-          - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_31"
+          - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -70,10 +83,11 @@ steps:
         agents:
           queue: cpu_queue_release
         commands:
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
+          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.2 --build-arg torch_cuda_arch_list=\"${CUDA_ARCH_X86}\" --build-arg BUILD_OS=manylinux --build-arg BUILD_BASE_IMAGE=pytorch/manylinux2_28-builder:cuda13.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
-          - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35"
+          - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -86,7 +100,8 @@ steps:
           - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
           - "mkdir artifacts"
           - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
-          - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35"
+          - "bash .buildkite/scripts/upload-nightly-wheels.sh"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "s3://vllm-wheels/$$BUILDKITE_COMMIT/$(cd artifacts/dist && echo *.whl)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -98,105 +113,236 @@ steps:
     commands:
       - "bash .buildkite/scripts/generate-and-upload-nightly-index.sh"
 
+  - block: "Unblock to build release Docker images"
+    depends_on: ~
+    key: block-build-release-images
+    if: build.env("NIGHTLY") != "1"
+
   - group: "Build release Docker images"
     key: "build-release-images"
+    depends_on: block-build-release-images
+    allow_dependency_failure: true
     steps:
-      - label: "Build release image - x86_64 - CUDA 12.9"
+      - label: "Build release image - x86_64 - CUDA 13.0"
         depends_on: ~
         id: build-release-image-x86
         agents:
           queue: cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=13.0.2 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_X86}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_35}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu22.04 \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
           # re-tag to default image tag and push, just in case arm64 build fails
           - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"'
 
-      - label: "Build release image - aarch64 - CUDA 12.9"
+      - label: "Build release image - aarch64 - CUDA 13.0"
         depends_on: ~
         id: build-release-image-arm64
         agents:
           queue: arm64_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=13.0.2 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_AARCH64}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_35}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu22.04 \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"'
 
-      - label: "Build release image - x86_64 - CUDA 13.0"
+      - label: "Build release image - x86_64 - CUDA 12.9"
         depends_on: ~
-        id: build-release-image-x86-cuda-13-0
+        id: build-release-image-x86-cuda-12-9
         agents:
           queue: cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ."
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130"
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh cu129) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=12.9.1 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_X86_CU129}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_35}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129"
           # re-tag to default image tag and push, just in case arm64 build fails
-          - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
+          - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129"
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129"'
 
-      - label: "Build release image - aarch64 - CUDA 13.0"
+      - label: "Build release image - aarch64 - CUDA 12.9"
         depends_on: ~
-        id: build-release-image-arm64-cuda-13-0
+        id: build-release-image-arm64-cuda-12-9
         agents:
           queue: arm64_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          # compute capability 12.0 for RTX-50 series / RTX PRO 6000 Blackwell, 12.1 for DGX Spark
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ."
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130"
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh cu129) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=12.9.1 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_AARCH64_CU129}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_35}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129"'
 
-      - label: "Build release image - x86_64 - CUDA 12.9 - Ubuntu 24.04"
+      - label: "Build release image - x86_64 - CUDA 13.0 - Ubuntu 24.04"
         depends_on: ~
         id: build-release-image-x86-ubuntu2404
         agents:
           queue: cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ."
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh ubuntu2404) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=13.0.2 \
+              --build-arg UBUNTU_VERSION=24.04 \
+              --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_X86}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_39}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu24.04 \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404"
           - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404"
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404"'
 
-      - label: "Build release image - aarch64 - CUDA 12.9 - Ubuntu 24.04"
+      - label: "Build release image - aarch64 - CUDA 13.0 - Ubuntu 24.04"
         depends_on: ~
         id: build-release-image-arm64-ubuntu2404
         agents:
           queue: arm64_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ."
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh ubuntu2404) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=13.0.2 \
+              --build-arg UBUNTU_VERSION=24.04 \
+              --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_AARCH64}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_39}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu24.04 \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
           - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404"'
 
-      - label: "Build release image - x86_64 - CUDA 13.0 - Ubuntu 24.04"
+      - label: "Build release image - x86_64 - CUDA 12.9 - Ubuntu 24.04"
         depends_on: ~
-        id: build-release-image-x86-cuda-13-0-ubuntu2404
+        id: build-release-image-x86-cuda-12-9-ubuntu2404
         agents:
           queue: cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu24.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ."
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404"
-          - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404"
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404"
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh cu129-ubuntu2404) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=12.9.1 \
+              --build-arg UBUNTU_VERSION=24.04 \
+              --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_X86_CU129}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_39}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129-ubuntu2404"
+          - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129-ubuntu2404"
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129-ubuntu2404"'
 
-      - label: "Build release image - aarch64 - CUDA 13.0 - Ubuntu 24.04"
+      - label: "Build release image - aarch64 - CUDA 12.9 - Ubuntu 24.04"
         depends_on: ~
-        id: build-release-image-arm64-cuda-13-0-ubuntu2404
+        id: build-release-image-arm64-cuda-12-9-ubuntu2404
         agents:
           queue: arm64_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu24.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ."
-          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404"
+          - |
+            DOCKER_BUILDKIT=1 docker build \
+              $(bash .buildkite/scripts/docker-build-metadata-args.sh cu129-ubuntu2404) \
+              --build-arg max_jobs=16 \
+              --build-arg USE_SCCACHE=1 \
+              --build-arg GIT_REPO_CHECK=1 \
+              --build-arg CUDA_VERSION=12.9.1 \
+              --build-arg UBUNTU_VERSION=24.04 \
+              --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 \
+              --build-arg torch_cuda_arch_list="${CUDA_ARCH_AARCH64_CU129}" \
+              --build-arg INSTALL_KV_CONNECTORS=true \
+              --build-arg MOONCAKE_WHEEL_AARCH64="${MOONCAKE_WHEEL_AARCH64_2_39}" \
+              --build-arg MOONCAKE_WHEEL_X86_64="${MOONCAKE_WHEEL_X86_64}" \
+              --target vllm-openai \
+              --progress plain \
+              -f docker/Dockerfile .
+          - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu129-ubuntu2404"'
 
       - block: "Build release image for x86_64 CPU"
         key: block-cpu-release-image-build
         depends_on: ~
 
       - label: "Build release image - x86_64 - CPU"
+        key: build-cpu-release-image-x86
         depends_on:
           - block-cpu-release-image-build
           - input-release-version
@@ -207,6 +353,7 @@ steps:
           - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
           - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
           - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"'
         env:
           DOCKER_BUILDKIT: "1"
 
@@ -215,7 +362,8 @@ steps:
         depends_on: ~
 
       - label: "Build release image - arm64 - CPU"
-        depends_on: 
+        key: build-cpu-release-image-arm64
+        depends_on:
           - block-arm64-cpu-release-image-build
           - input-release-version
         agents:
@@ -225,13 +373,14 @@ steps:
           - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
           - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest"
           - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "$$BUILDKITE_LABEL" "public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)"'
         env:
           DOCKER_BUILDKIT: "1"
 
   - group: "Publish release images"
     key: "publish-release-images"
     steps:
-      - label: "Create multi-arch manifest - CUDA 12.9"
+      - label: "Create multi-arch manifest - CUDA 13.0"
         depends_on:
           - build-release-image-x86
           - build-release-image-arm64
@@ -242,29 +391,22 @@ steps:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
           - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend"
           - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "Manifest: CUDA 13.0" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"'
 
-      - label: "Annotate release workflow - CUDA 12.9"
-        depends_on:
-          - create-multi-arch-manifest
-        id: annotate-release-workflow
-        agents:
-          queue: small_cpu_queue_release
-        commands:
-          - "bash .buildkite/scripts/annotate-release.sh"
-
-      - label: "Create multi-arch manifest - CUDA 13.0"
+      - label: "Create multi-arch manifest - CUDA 12.9"
         depends_on:
-          - build-release-image-x86-cuda-13-0
-          - build-release-image-arm64-cuda-13-0
-        id: create-multi-arch-manifest-cuda-13-0
+          - build-release-image-x86-cuda-12-9
+          - build-release-image-arm64-cuda-12-9
+        id: create-multi-arch-manifest-cuda-12-9
         agents:
           queue: small_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130 --amend"
-          - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
+          - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu129 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu129 --amend"
+          - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "Manifest: CUDA 12.9" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129"'
 
-      - label: "Create multi-arch manifest - CUDA 12.9 - Ubuntu 24.04"
+      - label: "Create multi-arch manifest - CUDA 13.0 - Ubuntu 24.04"
         depends_on:
           - build-release-image-x86-ubuntu2404
           - build-release-image-arm64-ubuntu2404
@@ -275,18 +417,20 @@ steps:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
           - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-ubuntu2404 --amend"
           - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "Manifest: CUDA 13.0 Ubuntu 24.04" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404"'
 
-      - label: "Create multi-arch manifest - CUDA 13.0 - Ubuntu 24.04"
+      - label: "Create multi-arch manifest - CUDA 12.9 - Ubuntu 24.04"
         depends_on:
-          - build-release-image-x86-cuda-13-0-ubuntu2404
-          - build-release-image-arm64-cuda-13-0-ubuntu2404
-        id: create-multi-arch-manifest-cuda-13-0-ubuntu2404
+          - build-release-image-x86-cuda-12-9-ubuntu2404
+          - build-release-image-arm64-cuda-12-9-ubuntu2404
+        id: create-multi-arch-manifest-cuda-12-9-ubuntu2404
         agents:
           queue: small_cpu_queue_release
         commands:
           - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-          - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130-ubuntu2404 --amend"
-          - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404"
+          - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu129-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu129-ubuntu2404 --amend"
+          - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129-ubuntu2404"
+          - 'bash .buildkite/scripts/annotate-build-artifact.sh "Manifest: CUDA 12.9 Ubuntu 24.04" "public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu129-ubuntu2404"'
 
       - label: "Publish nightly multi-arch image to DockerHub"
         depends_on:
@@ -306,16 +450,16 @@ steps:
           DOCKER_BUILDKIT: "1"
           DOCKERHUB_USERNAME: "vllmbot"
 
-      - label: "Publish nightly multi-arch image to DockerHub - CUDA 13.0"
+      - label: "Publish nightly multi-arch image to DockerHub - CUDA 12.9"
         depends_on:
-          - create-multi-arch-manifest-cuda-13-0
+          - create-multi-arch-manifest-cuda-12-9
         if: build.env("NIGHTLY") == "1"
         agents:
           queue: small_cpu_queue_release
         commands:
-          - "bash .buildkite/scripts/push-nightly-builds.sh cu130"
+          - "bash .buildkite/scripts/push-nightly-builds.sh cu129"
           # Clean up old nightly builds (keep only last 14)
-          - "bash .buildkite/scripts/cleanup-nightly-builds.sh cu130-nightly-"
+          - "bash .buildkite/scripts/cleanup-nightly-builds.sh cu129-nightly-"
         plugins:
           - docker-login#v3.0.0:
               username: vllmbot
@@ -324,24 +468,6 @@ steps:
           DOCKER_BUILDKIT: "1"
           DOCKERHUB_USERNAME: "vllmbot"
 
-  - group: "Publish wheels"
-    key: "publish-wheels"
-    steps:
-      - block: "Confirm update release wheels to PyPI (experimental, use with caution)?"
-        key: block-upload-release-wheels
-        depends_on:
-          - input-release-version
-          - build-wheels
-
-      - label: "Upload release wheels to PyPI"
-        depends_on:
-          - block-upload-release-wheels
-        id: upload-release-wheels
-        agents:
-          queue: small_cpu_queue_release
-        commands:
-          - "bash .buildkite/scripts/upload-release-wheels-pypi.sh"
-
   # =============================================================================
   # ROCm Release Pipeline (x86_64 only)
   # =============================================================================
@@ -455,7 +581,7 @@ steps:
           echo ""
           echo " Build complete - Image and wheels cached"
         fi
-        
+
     artifact_paths:
       - "artifacts/rocm-base-wheels/*.whl"
     env:
@@ -611,12 +737,14 @@ steps:
       - "bash tools/vllm-rocm/generate-rocm-wheels-root-index.sh"
     env:
       S3_BUCKET: "vllm-wheels"
-      VARIANT: "rocm721"
+      VARIANT: "rocm722"
 
   # ROCm Job 6: Build ROCm Release Docker Image
   - label: ":docker: Build release image - x86_64 - ROCm"
     id: build-rocm-release-image
     depends_on:
+      - step: block-build-release-images
+        allow_failure: true
       - step: build-rocm-base-wheels
         allow_failure: false
     agents:
@@ -669,7 +797,7 @@ steps:
         
         # Push to ECR
         docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$${BUILDKITE_COMMIT}-rocm
-        
+
         echo ""
         echo " Successfully built and pushed ROCm release image"
         echo "   Image: public.ecr.aws/q9t5s3a7/vllm-release-repo:$${BUILDKITE_COMMIT}-rocm"
@@ -696,3 +824,60 @@ steps:
     env:
       DOCKER_BUILDKIT: "1"
       DOCKERHUB_USERNAME: "vllmbot"
+
+  # =============================================================================
+  # Publish to DockerHub and PyPI (at the end so all builds complete first)
+  # =============================================================================
+
+  - block: "Publish release images to DockerHub"
+    key: block-publish-release-images
+    depends_on:
+      - create-multi-arch-manifest
+      - create-multi-arch-manifest-cuda-12-9
+      - create-multi-arch-manifest-ubuntu2404
+      - create-multi-arch-manifest-cuda-12-9-ubuntu2404
+      - build-rocm-release-image
+      - input-release-version
+      # Wait for CPU builds if their block steps were unblocked, so publish
+      # doesn't race the in-progress CPU build. allow_failure lets publish
+      # proceed when the operator legitimately leaves the CPU block steps
+      # unblocked or the CPU build fails.
+      - step: build-cpu-release-image-x86
+        allow_failure: true
+      - step: build-cpu-release-image-arm64
+        allow_failure: true
+    if: build.env("NIGHTLY") != "1"
+
+  - label: "Publish release images to DockerHub"
+    depends_on:
+      - block-publish-release-images
+    key: publish-release-images-dockerhub
+    agents:
+      queue: small_cpu_queue_release
+    commands:
+      - "bash .buildkite/scripts/publish-release-images.sh"
+    plugins:
+      - docker-login#v3.0.0:
+          username: vllmbot
+          password-env: DOCKERHUB_TOKEN
+    env:
+      DOCKER_BUILDKIT: "1"
+      DOCKERHUB_USERNAME: "vllmbot"
+
+  - group: "Publish wheels"
+    key: "publish-wheels"
+    steps:
+      - block: "Confirm update release wheels to PyPI (experimental, use with caution)?"
+        key: block-upload-release-wheels
+        depends_on:
+          - input-release-version
+          - build-wheels
+
+      - label: "Upload release wheels to PyPI"
+        depends_on:
+          - block-upload-release-wheels
+        id: upload-release-wheels
+        agents:
+          queue: small_cpu_queue_release
+        commands:
+          - "bash .buildkite/scripts/upload-release-wheels-pypi.sh"
diff --git a/.buildkite/scripts/annotate-build-artifact.sh b/.buildkite/scripts/annotate-build-artifact.sh
new file mode 100755
index 000000000000..67cdf7923658
--- /dev/null
+++ b/.buildkite/scripts/annotate-build-artifact.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Append a build artifact line to the Buildkite annotation.
+# Usage: annotate-build-artifact.sh <label> <value>
+set -e
+echo "- **${1}**: \`${2}\`" | \
+  buildkite-agent annotate --append --style 'info' --context 'release-artifacts'
diff --git a/.buildkite/scripts/annotate-release.sh b/.buildkite/scripts/annotate-release.sh
deleted file mode 100755
index 2da9db2f2e5d..000000000000
--- a/.buildkite/scripts/annotate-release.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# Get release version, default to 1.0.0.dev for nightly/per-commit builds
-RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null | sed 's/^v//')
-if [ -z "${RELEASE_VERSION}" ]; then
-  RELEASE_VERSION="1.0.0.dev"
-fi
-
-ROCM_BASE_CACHE_KEY=$(.buildkite/scripts/cache-rocm-base-wheels.sh key)
-
-buildkite-agent annotate --style 'info' --context 'release-workflow' << EOF
-To download the wheel (by commit):
-\`\`\`
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux_2_31_x86_64.whl .
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux_2_31_aarch64.whl .
-
-(Optional) For CUDA 13.0:
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cu130-cp38-abi3-manylinux_2_35_x86_64.whl .
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cu130-cp38-abi3-manylinux_2_35_aarch64.whl .
-
-(Optional) For CPU:
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl .
-aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl .
-\`\`\`
-
-
-To download and upload the image:
-
-\`\`\`
-# Download images:
-
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu130
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu130
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm
-docker pull public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION}
-docker pull public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION}
-
-# Tag and push images:
-
-## CUDA
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64 vllm/vllm-openai:x86_64
-docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:latest-x86_64
-docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
-docker push vllm/vllm-openai:latest-x86_64
-docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu130 vllm/vllm-openai:x86_64-cu130
-docker tag vllm/vllm-openai:x86_64-cu130 vllm/vllm-openai:latest-x86_64-cu130
-docker tag vllm/vllm-openai:x86_64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130
-docker push vllm/vllm-openai:latest-x86_64-cu130
-docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64 vllm/vllm-openai:aarch64
-docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:latest-aarch64
-docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
-docker push vllm/vllm-openai:latest-aarch64
-docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu130 vllm/vllm-openai:aarch64-cu130
-docker tag vllm/vllm-openai:aarch64-cu130 vllm/vllm-openai:latest-aarch64-cu130
-docker tag vllm/vllm-openai:aarch64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
-docker push vllm/vllm-openai:latest-aarch64-cu130
-docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
-
-## ROCm
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}
-docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:latest
-docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:v${RELEASE_VERSION}
-docker push vllm/vllm-openai-rocm:latest
-docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base
-docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:latest-base
-docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
-docker push vllm/vllm-openai-rocm:latest-base
-docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
-
-## CPU
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:x86_64
-docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:latest-x86_64
-docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
-docker push vllm/vllm-openai-cpu:latest-x86_64
-docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
-
-docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:arm64
-docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:latest-arm64
-docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
-docker push vllm/vllm-openai-cpu:latest-arm64
-docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
-
-# Create multi-arch manifest:
-
-docker manifest rm vllm/vllm-openai:latest
-docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64
-docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
-docker manifest push vllm/vllm-openai:latest
-docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}
-
-docker manifest rm vllm/vllm-openai:latest-cu130
-docker manifest create vllm/vllm-openai:latest-cu130 vllm/vllm-openai:latest-x86_64-cu130 vllm/vllm-openai:latest-aarch64-cu130
-docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu130 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu130
-docker manifest push vllm/vllm-openai:latest-cu130
-docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu130
-
-docker manifest rm vllm/vllm-openai-cpu:latest || true
-docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64
-docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
-docker manifest push vllm/vllm-openai-cpu:latest
-docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION}
-\`\`\`
-EOF
diff --git a/.buildkite/scripts/check-ray-compatibility.sh b/.buildkite/scripts/check-ray-compatibility.sh
index 1572fe94168d..b056d4403dbb 100644
--- a/.buildkite/scripts/check-ray-compatibility.sh
+++ b/.buildkite/scripts/check-ray-compatibility.sh
@@ -29,7 +29,7 @@ if python3 -c "import torch; assert torch.version.hip" 2>/dev/null; then
         TORCH_INDEX_URL=""
     fi
 else
-    TORCH_INDEX_URL="https://download.pytorch.org/whl/cu129"
+    TORCH_INDEX_URL="https://download.pytorch.org/whl/cu130"
 fi
 echo ">>> Using PyTorch index: ${TORCH_INDEX_URL:-PyPI default}"
 
diff --git a/.buildkite/scripts/ci-fetch-log.sh b/.buildkite/scripts/ci-fetch-log.sh
new file mode 100755
index 000000000000..02798b56f4a9
--- /dev/null
+++ b/.buildkite/scripts/ci-fetch-log.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Usage: ./ci-fetch-log.sh <buildkite_job_url> [output_file]
+#        ./ci-fetch-log.sh <build_number> <job_uuid> [output_file]
+#
+# Downloads the raw log for a Buildkite job from the public, unauthenticated
+# /organizations/<org>/pipelines/<pipeline>/builds/<n>/jobs/<uuid>/download
+# endpoint, then strips ANSI/timestamps via ci-clean-log.sh.
+#
+# Find <build_number> and <job_uuid> via:
+#   gh pr checks <PR> --repo vllm-project/vllm
+# Each failing row's URL is .../builds/<build_number>#<job_uuid>.
+
+set -euo pipefail
+
+ORG="vllm"
+PIPELINE="ci"
+
+usage() {
+    echo "Usage: $0 <buildkite_job_url> [output_file]"
+    echo "       $0 <build_number> <job_uuid> [output_file]"
+    exit 1
+}
+
+if [ $# -lt 1 ]; then usage; fi
+
+if [[ "$1" == https://* ]]; then
+    BUILD=$(echo "$1" | sed -nE 's#.*/builds/([0-9]+).*#\1#p')
+    JOB=$(echo "$1" | grep -oE '[0-9a-f]{8}-[0-9a-f-]+' | head -n 1)
+    OUT="${2:-ci-${BUILD}-${JOB:0:8}.log}"
+else
+    if [ $# -lt 2 ]; then usage; fi
+    BUILD="$1"
+    JOB="$2"
+    OUT="${3:-ci-${BUILD}-${JOB:0:8}.log}"
+fi
+
+if [ -z "$BUILD" ] || [ -z "$JOB" ]; then
+    echo "Could not parse build number or job UUID from: $1" >&2
+    usage
+fi
+
+COOKIES=$(mktemp)
+trap 'rm -f "$COOKIES"' EXIT
+
+# Buildkite issues a session cookie on first hit; subsequent /download needs it.
+curl -fsSL -c "$COOKIES" -A "vllm-ci-fetch-log" \
+    "https://buildkite.com/${ORG}/${PIPELINE}/builds/${BUILD}" -o /dev/null
+
+curl -fsSL -b "$COOKIES" -A "vllm-ci-fetch-log" \
+    "https://buildkite.com/organizations/${ORG}/pipelines/${PIPELINE}/builds/${BUILD}/jobs/${JOB}/download" \
+    -o "$OUT"
+
+bash "$(dirname "$0")/ci-clean-log.sh" "$OUT"
+
+echo "$OUT"
diff --git a/.buildkite/scripts/detect-manylinux-tag.py b/.buildkite/scripts/detect-manylinux-tag.py
new file mode 100644
index 000000000000..40fa6c6ffbb7
--- /dev/null
+++ b/.buildkite/scripts/detect-manylinux-tag.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Detect the manylinux platform tag for a wheel and rename it in place.
+
+vLLM's build images produce wheels with the generic ``linux_<arch>`` platform
+tag, which installers like ``pip`` won't accept off PyPI/our index. We need to
+rewrite the platform tag to the appropriate ``manylinux_<major>_<minor>_<arch>``
+before uploading.
+
+Historically the tag was hard-coded per build (``manylinux_2_31`` for the
+Ubuntu 20.04-based image, ``manylinux_2_35`` for the Ubuntu 22.04-based
+images). That is brittle: bumping the base image silently produces wheels
+labelled with the wrong glibc requirement. This script asks ``auditwheel``
+to derive the tag from the symbol versions actually referenced by the
+binaries inside the wheel, so the label tracks reality.
+
+We can't simply call ``auditwheel repair`` -- it tries to graft external
+shared libraries into the wheel and fails on vLLM's CUDA/cuBLAS dependencies.
+Instead we use ``auditwheel.wheel_abi.analyze_wheel_abi`` directly, which is
+the same call that powers ``auditwheel show``, and read off
+``winfo.sym_policy.name``.
+
+Usage:
+    detect-manylinux-tag.py <wheel_path>
+
+The wheel is renamed in place; the new path is printed on stdout. All
+diagnostics go to stderr so callers can capture stdout safely.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from pathlib import Path
+
+from auditwheel.error import (
+    AuditwheelError,
+    NonPlatformWheelError,
+    WheelToolsError,
+)
+from auditwheel.wheel_abi import analyze_wheel_abi
+from auditwheel.wheeltools import get_wheel_architecture, get_wheel_libc
+
+
+def detect_platform_tag(wheel_path: Path) -> str:
+    """Return the most precise platform tag the wheel is consistent with.
+
+    Mirrors ``auditwheel show`` but returns ``sym_policy`` rather than
+    ``overall_policy``: we only care about the glibc symbol versions used,
+    not about other policy axes (ISA extensions, blacklist, etc.) that
+    ``overall_policy`` folds in.
+    """
+    fn = wheel_path.name
+
+    try:
+        arch = get_wheel_architecture(fn)
+    except (WheelToolsError, NonPlatformWheelError):
+        # Architecture isn't deducible from the filename; let auditwheel
+        # infer it from the ELF binaries inside the wheel.
+        arch = None
+
+    try:
+        libc = get_wheel_libc(fn)
+    except WheelToolsError:
+        # An unrepaired wheel uses ``linux_<arch>``, which doesn't encode
+        # libc. Let auditwheel infer it from the ELF binaries.
+        libc = None
+
+    winfo = analyze_wheel_abi(
+        libc,
+        arch,
+        wheel_path,
+        frozenset(),
+        disable_isa_ext_check=False,
+        allow_graft=False,
+    )
+    return winfo.sym_policy.name
+
+
+def rename_wheel(wheel_path: Path, new_platform_tag: str) -> Path:
+    """Rename the wheel in place, replacing only its platform tag."""
+    # Wheel filename per PEP 427:
+    #   {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
+    # The platform tag is always the last ``-``-separated token before
+    # ``.whl``. Compound tags like ``manylinux_2_31_x86_64`` use ``_`` as the
+    # internal separator, so ``-``-splitting is unambiguous.
+    parts = wheel_path.stem.split("-")
+    if len(parts) < 5:
+        raise ValueError(f"Unrecognised wheel filename: {wheel_path.name}")
+    parts[-1] = new_platform_tag
+    new_path = wheel_path.with_name("-".join(parts) + ".whl")
+    if new_path != wheel_path:
+        wheel_path.rename(new_path)
+    return new_path
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Detect a wheel's manylinux platform tag with "
+        "auditwheel and rename the wheel in place."
+    )
+    parser.add_argument(
+        "wheel",
+        type=Path,
+        help="Path to the wheel to inspect and rename.",
+    )
+    args = parser.parse_args()
+
+    wheel_path: Path = args.wheel
+    if not wheel_path.is_file():
+        print(f"error: {wheel_path} is not a file", file=sys.stderr)
+        return 1
+
+    # Catch the things that ``analyze_wheel_abi`` and ``rename_wheel`` can
+    # raise: any subclass of ``AuditwheelError`` (pure-Python wheels,
+    # invalid libc, malformed wheels), filesystem errors, or our own
+    # ``ValueError`` for an unrecognised wheel filename. Print a single
+    # ``ERROR_TYPE: message`` line to stderr instead of a Python
+    # traceback, which is much friendlier in CI logs.
+    try:
+        new_tag = detect_platform_tag(wheel_path)
+        print(f"detected platform tag: {new_tag}", file=sys.stderr)
+        new_path = rename_wheel(wheel_path, new_tag)
+    except (AuditwheelError, ValueError, OSError) as e:
+        print(
+            f"error: failed to retag {wheel_path.name}: {type(e).__name__}: {e}",
+            file=sys.stderr,
+        )
+        return 2
+
+    if new_path != wheel_path:
+        print(f"renamed {wheel_path.name} -> {new_path.name}", file=sys.stderr)
+    else:
+        print(f"wheel already tagged {new_tag}", file=sys.stderr)
+
+    print(new_path)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.buildkite/scripts/docker-build-metadata-args.sh b/.buildkite/scripts/docker-build-metadata-args.sh
new file mode 100644
index 000000000000..9aa6fa9314f7
--- /dev/null
+++ b/.buildkite/scripts/docker-build-metadata-args.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Emit docker build flags for release image provenance metadata.
+# Keep this helper best-effort: missing Buildkite metadata should fall back to
+# local/default values instead of blocking the Docker build.
+
+# Variant examples: "", "cu129", "ubuntu2404", "cu129-ubuntu2404".
+variant="${1:-}"
+variant_suffix="${variant:+-${variant}}"
+
+image_name="${VLLM_DOCKER_IMAGE_NAME:-vllm/vllm-openai}"
+staging_repo="${VLLM_STAGING_IMAGE_REPO:-public.ecr.aws/q9t5s3a7/vllm-release-repo}"
+build_commit="${VLLM_BUILD_COMMIT:-${BUILDKITE_COMMIT:-unknown}}"
+build_pipeline="${VLLM_BUILD_PIPELINE:-${BUILDKITE_PIPELINE_ID:-${BUILDKITE_PIPELINE_SLUG:-local}}}"
+build_url="${VLLM_BUILD_URL:-${BUILDKITE_BUILD_URL:-}}"
+tag_commit="${BUILDKITE_COMMIT:-${build_commit}}"
+
+if [[ -n "${BUILDKITE:-}" || -n "${BUILDKITE_COMMIT:-}" ]]; then
+  release_version="${RELEASE_VERSION:-}"
+  if command -v buildkite-agent >/dev/null 2>&1; then
+    release_version="${release_version:-$(buildkite-agent meta-data get release-version 2>/dev/null)}"
+  fi
+  release_version="${release_version#v}"
+  release_version="${release_version:-${tag_commit}}"
+
+  staging_image_ref="${staging_repo}:${tag_commit}-$(uname -m)${variant_suffix}"
+
+  if [[ "${NIGHTLY:-}" == "1" ]]; then
+    if [[ -z "${variant}" ]]; then
+      image_tag="${image_name}:nightly-${tag_commit}"
+    elif [[ "${variant}" == cu* ]]; then
+      cuda_variant="${variant%%-*}"
+      remaining_variant="${variant#${cuda_variant}}"
+      image_tag="${image_name}:${cuda_variant}-nightly-${tag_commit}${remaining_variant}"
+    else
+      image_tag="${image_name}:nightly-${tag_commit}${variant_suffix}"
+    fi
+  else
+    image_tag="${image_name}:v${release_version}${variant_suffix}"
+  fi
+else
+  image_tag="${VLLM_IMAGE_TAG:-local/vllm-openai:dev}"
+  staging_image_ref="${image_tag}"
+fi
+
+emit_arg() {
+  printf -- "--build-arg %s=%s " "$1" "$2"
+}
+
+emit_arg VLLM_BUILD_COMMIT "${build_commit}"
+emit_arg VLLM_BUILD_PIPELINE "${build_pipeline}"
+emit_arg VLLM_BUILD_URL "${build_url}"
+# This is the intended public tag. The final digest is only known after push.
+emit_arg VLLM_IMAGE_TAG "${image_tag}"
+printf -- "--tag %s " "${staging_image_ref}"
diff --git a/.buildkite/scripts/generate-and-upload-nightly-index.sh b/.buildkite/scripts/generate-and-upload-nightly-index.sh
index fa6eb979af53..502ed0609310 100755
--- a/.buildkite/scripts/generate-and-upload-nightly-index.sh
+++ b/.buildkite/scripts/generate-and-upload-nightly-index.sh
@@ -9,21 +9,14 @@ set -ex
 
 BUCKET="vllm-wheels"
 INDICES_OUTPUT_DIR="indices"
-DEFAULT_VARIANT_ALIAS="cu129" # align with vLLM_MAIN_CUDA_VERSION in vllm/envs.py
-PYTHON="${PYTHON_PROG:-python3}" # try to read from env var, otherwise use python3
+DEFAULT_VARIANT_ALIAS="cu130" # align with vLLM_MAIN_CUDA_VERSION in vllm/envs.py
 SUBPATH=$BUILDKITE_COMMIT
 S3_COMMIT_PREFIX="s3://$BUCKET/$SUBPATH/"
 
-# detect if python3.12+ is available
-has_new_python=$($PYTHON -c "print(1 if __import__('sys').version_info >= (3,12) else 0)")
-if [[ "$has_new_python" -eq 0 ]]; then
-    # use new python from docker
-    docker pull python:3-slim
-    PYTHON="docker run --rm -v $(pwd):/app -w /app python:3-slim python3"
-fi
-
-echo "Using python interpreter: $PYTHON"
-echo "Python version: $($PYTHON --version)"
+# Select python3 (>= 3.12) -- local if available, else a docker fallback.
+# shellcheck source=lib/select-python.sh
+source .buildkite/scripts/lib/select-python.sh
+select_python
 
 # ======== generate and upload indices ========
 
diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh
index 472691d63c83..9008d7ba319d 100755
--- a/.buildkite/scripts/hardware_ci/run-amd-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh
@@ -35,23 +35,6 @@ export PYTHONPATH=".."
 # Helper Functions
 ###############################################################################
 
-wait_for_clean_gpus() {
-  local timeout=${1:-300}
-  local start=$SECONDS
-  echo "--- Waiting for clean GPU state (timeout: ${timeout}s)"
-  while true; do
-    if grep -q clean /opt/amdgpu/etc/gpu_state; then
-      echo "GPUs state is \"clean\""
-      return
-    fi
-    if (( SECONDS - start >= timeout )); then
-      echo "Error: GPUs did not reach clean state within ${timeout}s" >&2
-      exit 1
-    fi
-    sleep 3
-  done
-}
-
 cleanup_docker() {
   # Get Docker's root directory
   docker_root=$(docker info -f '{{.DockerRootDir}}')
@@ -131,8 +114,7 @@ handle_pytest_exit() {
 # unquoted since they have no spaces and work fine.
 #
 # Already-quoted expressions (containing literal single quotes) are passed
-# through untouched to avoid double-quoting values injected by
-# apply_rocm_test_overrides.
+# through untouched to avoid double-quoting well-formed shell fragments.
 #
 # NOTE: This ONLY fixes -m/-k flags. It cannot recover arbitrary inner
 # double-quotes stripped by the calling shell (see header comment).
@@ -264,120 +246,17 @@ re_quote_pytest_markers() {
   echo "${output% }"
 }
 
-###############################################################################
-# ROCm-specific pytest command rewrites
-#
-# These apply ignore flags and environment overrides for tests that are not
-# yet supported or behave differently on ROCm hardware. Kept as a single
-# function so new exclusions are easy to add in one place.
-###############################################################################
-
-apply_rocm_test_overrides() {
-  local cmds="$1"
-
-  # --- Model registry filter ---
-  if [[ $cmds == *"pytest -v -s models/test_registry.py"* ]]; then
-    cmds=${cmds//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
-  fi
-
-  # --- LoRA: disable custom paged attention ---
-  if [[ $cmds == *"pytest -v -s lora"* ]]; then
-    cmds=${cmds//"pytest -v -s lora"/"pytest -v -s lora"}
-  fi
-
-  # --- Kernel ignores ---
-  if [[ $cmds == *" kernels/core"* ]]; then
-    cmds="${cmds} \
-    --ignore=kernels/core/test_fused_quant_layernorm.py \
-    --ignore=kernels/core/test_permute_cols.py"
-  fi
-
-  if [[ $cmds == *" kernels/attention"* ]]; then
-    cmds="${cmds} \
-    --ignore=kernels/attention/test_attention_selector.py \
-    --ignore=kernels/attention/test_encoder_decoder_attn.py \
-    --ignore=kernels/attention/test_flash_attn.py \
-    --ignore=kernels/attention/test_flashinfer.py \
-    --ignore=kernels/attention/test_prefix_prefill.py \
-    --ignore=kernels/attention/test_cascade_flash_attn.py \
-    --ignore=kernels/attention/test_mha_attn.py \
-    --ignore=kernels/attention/test_lightning_attn.py \
-    --ignore=kernels/attention/test_attention.py"
-  fi
-
-  if [[ $cmds == *" kernels/quantization"* ]]; then
-    cmds="${cmds} \
-    --ignore=kernels/quantization/test_int8_quant.py \
-    --ignore=kernels/quantization/test_machete_mm.py \
-    --ignore=kernels/quantization/test_block_fp8.py \
-    --ignore=kernels/quantization/test_block_int8.py \
-    --ignore=kernels/quantization/test_marlin_gemm.py \
-    --ignore=kernels/quantization/test_cutlass_scaled_mm.py \
-    --ignore=kernels/quantization/test_int8_kernel.py"
-  fi
-
-  if [[ $cmds == *" kernels/mamba"* ]]; then
-    cmds="${cmds} \
-    --ignore=kernels/mamba/test_mamba_mixer2.py \
-    --ignore=kernels/mamba/test_causal_conv1d.py \
-    --ignore=kernels/mamba/test_mamba_ssm_ssd.py"
-  fi
-
-  if [[ $cmds == *" kernels/moe"* ]]; then
-    cmds="${cmds} \
-    --ignore=kernels/moe/test_moe.py \
-    --ignore=kernels/moe/test_cutlass_moe.py"
-  fi
-
-  # --- Entrypoint ignores ---
-  if [[ $cmds == *" entrypoints/openai "* ]]; then
-    cmds=${cmds//" entrypoints/openai "/" entrypoints/openai \
-    --ignore=entrypoints/openai/chat_completion/test_audio.py \
-    --ignore=entrypoints/openai/completion/test_shutdown.py \
-    --ignore=entrypoints/openai/test_completion.py \
-    --ignore=entrypoints/openai/models/test_models.py \
-    --ignore=entrypoints/openai/test_return_tokens_as_ids.py \
-    --ignore=entrypoints/openai/chat_completion/test_root_path.py \
-    --ignore=entrypoints/openai/completion/test_prompt_validation.py "}
-  fi
-
-  if [[ $cmds == *" entrypoints/serve"* ]]; then
-    cmds="${cmds} \
-    --ignore=entrypoints/serve/lora/test_lora_adapters.py"
-  fi
-
-  if [[ $cmds == *" entrypoints/llm "* ]]; then
-    cmds=${cmds//" entrypoints/llm "/" entrypoints/llm \
-    --ignore=entrypoints/llm/test_chat.py \
-    --ignore=entrypoints/llm/test_accuracy.py \
-    --ignore=entrypoints/llm/test_init.py \
-    --ignore=entrypoints/llm/test_prompt_validation.py "}
-  fi
-
-  # Clean up escaped newlines from --ignore appends
-  cmds=$(echo "$cmds" | sed 's/ \\ / /g')
-
-  echo "$cmds"
-}
-
 ###############################################################################
 # Main
 ###############################################################################
 
 # --- GPU initialization ---
-echo "--- Confirming Clean Initial State"
-wait_for_clean_gpus
-
 echo "--- ROCm info"
 rocminfo
 
 # --- Docker housekeeping ---
 cleanup_docker
 
-echo "--- Resetting GPUs"
-echo "reset" > /opt/amdgpu/etc/gpu_state
-wait_for_clean_gpus
-
 # --- Pull test image ---
 echo "--- Pulling container"
 image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
@@ -402,9 +281,11 @@ HF_MOUNT="/root/.cache/huggingface"
 # double-quotes will have been stripped by the calling shell.
 if [[ -n "${VLLM_TEST_COMMANDS:-}" ]]; then
   commands="${VLLM_TEST_COMMANDS}"
+  commands_source="env"
   echo "Commands sourced from VLLM_TEST_COMMANDS (quoting preserved)"
 else
   commands="$*"
+  commands_source="argv"
   if [[ -z "$commands" ]]; then
     echo "Error: No test commands provided." >&2
     echo "Usage:" >&2
@@ -421,11 +302,16 @@ fi
 
 echo "Raw commands: $commands"
 
-# Fix quoting before ROCm overrides (so overrides see correct structure)
-commands=$(re_quote_pytest_markers "$commands")
-echo "After re-quoting: $commands"
+# Only try to repair stripped pytest -m/-k quoting in legacy argv mode.
+# VLLM_TEST_COMMANDS preserves inner quoting already, and re-quoting that path
+# can corrupt embedded echo strings or otherwise well-formed shell fragments.
+if [[ "$commands_source" == "argv" ]]; then
+  commands=$(re_quote_pytest_markers "$commands")
+  echo "After re-quoting: $commands"
+else
+  echo "Skipping re-quoting for VLLM_TEST_COMMANDS input"
+fi
 
-commands=$(apply_rocm_test_overrides "$commands")
 echo "Final commands: $commands"
 
 MYPYTHONPATH=".."
diff --git a/.buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh
index f289a43c6be4..8ac27ed6583a 100644
--- a/.buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh
@@ -1,43 +1,39 @@
 #!/bin/bash
 set -euox pipefail
 export VLLM_CPU_CI_ENV=0
+export VLLM_CPU_KVCACHE_SPACE=1 # avoid OOM
 
-echo "--- PP+TP"
-vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 &
-server_pid=$!
-timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
-vllm bench serve \
-    --backend vllm \
-    --dataset-name random \
-    --model meta-llama/Llama-3.2-3B-Instruct \
-    --num-prompts 20 \
-    --result-dir ./test_results \
-    --result-filename tp_pp.json \
-    --save-result \
-    --endpoint /v1/completions
-kill -s SIGTERM $server_pid; wait $server_pid || true
-failed_req=$(jq '.failed' ./test_results/tp_pp.json)
-if [ "$failed_req" -ne 0 ]; then
-  echo "Some requests were failed!"
-  exit 1
-fi
+MODE=${1:-all}
 
-echo "--- DP+TP"
-vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 &
-server_pid=$!
-timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
-vllm bench serve \
-    --backend vllm \
-    --dataset-name random \
-    --model meta-llama/Llama-3.2-3B-Instruct \
-    --num-prompts 20 \
-    --result-dir ./test_results \
-    --result-filename dp_pp.json \
-    --save-result \
-    --endpoint /v1/completions
-kill -s SIGTERM $server_pid; wait $server_pid || true
-failed_req=$(jq '.failed' ./test_results/dp_pp.json)
-if [ "$failed_req" -ne 0 ]; then
-  echo "Some requests were failed!"
-  exit 1
-fi
+run_scenario() {
+    local label="$1" result_file="$2"
+    shift 2
+    echo "--- $label"
+    vllm serve meta-llama/Llama-3.2-3B-Instruct "$@" --max-model-len=4096 &
+    local server_pid=$!
+    timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
+    vllm bench serve \
+        --backend vllm \
+        --dataset-name random \
+        --model meta-llama/Llama-3.2-3B-Instruct \
+        --num-prompts 20 \
+        --result-dir ./test_results \
+        --result-filename "$result_file" \
+        --save-result \
+        --endpoint /v1/completions
+    kill -s SIGTERM "$server_pid"; wait "$server_pid" || true
+    if [ "$(jq '.failed' "./test_results/$result_file")" -ne 0 ]; then
+        echo "Some requests were failed in $label!"
+        exit 1
+    fi
+}
+
+case "$MODE" in
+    tp_pp) run_scenario "PP+TP" tp_pp.json -tp=2 -pp=2 ;;
+    dp_tp) run_scenario "DP+TP" dp_tp.json -tp=2 -dp=2 ;;
+    all)
+        run_scenario "PP+TP" tp_pp.json -tp=2 -pp=2
+        run_scenario "DP+TP" dp_tp.json -tp=2 -dp=2
+        ;;
+    *) echo "ERROR: unknown mode '$MODE' (expected: tp_pp | dp_tp | all)" >&2; exit 1 ;;
+esac
diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
index c2509a07b2c4..9c13fa79fcb2 100755
--- a/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
@@ -31,6 +31,21 @@ function cpu_tests() {
     set -e
     pip list"
 
+  # Run kernel tests
+  docker exec cpu-test bash -c "
+    set -e
+    pytest -x -v -s tests/kernels/test_onednn.py
+    pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
+    pytest -x -v -s tests/kernels/core/test_cpu_activation.py
+    pytest -x -v -s tests/kernels/moe/test_moe.py -k test_cpu_fused_moe_basic"
+
+  # skip tests requiring model downloads if HF_TOKEN is not set
+  # due to rate-limits
+  if [ -z "$HF_TOKEN" ]; then
+    echo "Warning: HF_TOKEN is not set. Skipping tests that require model downloads."
+    return
+  fi
+
   # offline inference
   docker exec cpu-test bash -c "
     set -e
@@ -46,12 +61,6 @@ function cpu_tests() {
     set -e
     pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs"
 
-  # Run kernel tests
-  docker exec cpu-test bash -c "
-    set -e
-    pytest -x -v -s tests/kernels/test_onednn.py
-    pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
-    pytest -x -v -s tests/kernels/moe/test_moe.py -k test_cpu_fused_moe_basic"
 
   # basic online serving
   docker exec cpu-test bash -c '
@@ -66,6 +75,21 @@ function cpu_tests() {
       --num-prompts 20 \
       --endpoint /v1/completions
     kill -s SIGTERM $server_pid &'
+
+  # smoke test for Gated DeltaNet
+  docker exec cpu-test bash -c '
+    set -e
+    VLLM_CPU_OMP_THREADS_BIND=$E2E_OMP_THREADS vllm serve Qwen/Qwen3.5-0.8B --max-model-len 2048 &
+    server_pid=$!
+    timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
+    vllm bench serve \
+      --backend vllm \
+      --dataset-name random \
+      --model Qwen/Qwen3.5-0.8B \
+      --num-prompts 20 \
+      --endpoint /v1/completions
+    kill -s SIGTERM $server_pid &'
+
 }
 
 # All of CPU tests are expected to be finished less than 40 mins.
diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
index db75ad3083b2..27ec0068668f 100644
--- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image"
 docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
 
 # Run the image, setting --shm-size=4g for tensor parallel.
-docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
+docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 -e VLLM_CPU_ATTN_SPLIT_KV=0 --shm-size=4g "$IMAGE_NAME" \
         timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
diff --git a/.buildkite/scripts/hardware_ci/run-hpu-test.sh b/.buildkite/scripts/hardware_ci/run-hpu-test.sh
index 10df07b2000f..0b5d0af4b6f3 100644
--- a/.buildkite/scripts/hardware_ci/run-hpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-hpu-test.sh
@@ -42,7 +42,7 @@ WORKDIR /workspace/vllm
 ENV no_proxy=localhost,127.0.0.1
 ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
 
-RUN bash -c 'pip install -r <(sed "/^torch/d" requirements/build.txt)'
+RUN bash -c 'pip install -r <(sed "/^torch/d" requirements/build/cuda.txt)'
 RUN VLLM_TARGET_DEVICE=empty pip install --no-build-isolation -e .
 RUN pip install git+https://github.com/vllm-project/vllm-gaudi.git
 
diff --git a/.buildkite/scripts/hardware_ci/run-intel-test.sh b/.buildkite/scripts/hardware_ci/run-intel-test.sh
index 7949f107caa2..0cbe1b5a0f09 100755
--- a/.buildkite/scripts/hardware_ci/run-intel-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-intel-test.sh
@@ -25,22 +25,100 @@ export PYTHONPATH=".."
 ###############################################################################
 
 cleanup_docker() {
+  # Share the same lock with image pull to avoid cleanup/pull races on one node.
+  local docker_lock="/tmp/docker-pull.lock"
+  exec 9>"$docker_lock"
+  flock 9
+
   docker_root=$(docker info -f '{{.DockerRootDir}}')
   if [ -z "$docker_root" ]; then
     echo "Failed to determine Docker root directory." >&2
-    exit 1
+    flock -u 9
+    return 1
   fi
   echo "Docker root directory: $docker_root"
 
   disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
   threshold=70
   if [ "$disk_usage" -gt "$threshold" ]; then
-    echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
-    docker image prune -f
-    docker volume prune -f && docker system prune --force --filter "until=72h" --all
-    echo "Docker images and volumes cleanup completed."
+    echo "Disk usage is above $threshold%. Running aggressive CI image cleanup..."
+    cleanup_old_ci_images "${REGISTRY}/${REPO}" "${image_name}" "${DOCKER_IMAGE_CLEANUP_HOURS:-72}" 1
+  else
+    echo "Disk usage is below $threshold%. Checking old CI images anyway."
+    cleanup_old_ci_images "${REGISTRY}/${REPO}" "${image_name}" "${DOCKER_IMAGE_CLEANUP_HOURS:-72}" 0
+  fi
+  echo "Old CI image cleanup completed."
+
+  flock -u 9
+}
+
+cleanup_old_ci_images() {
+  local repo_prefix="$1"
+  local current_image_ref="$2"
+  local ttl_hours="$3"
+  local aggressive_cleanup="$4"
+
+  if [[ -z "$repo_prefix" || "$repo_prefix" == "/" ]]; then
+    echo "Skip old-image cleanup: invalid repo prefix '${repo_prefix}'"
+    return 0
+  fi
+
+  if ! [[ "$ttl_hours" =~ ^[0-9]+$ ]]; then
+    echo "Invalid DOCKER_IMAGE_CLEANUP_HOURS='${ttl_hours}', fallback to 72"
+    ttl_hours=72
+  fi
+
+  local now_epoch cutoff_epoch
+  now_epoch=$(date +%s)
+  cutoff_epoch=$((now_epoch - ttl_hours * 3600))
+
+  local -a used_image_ids
+  mapfile -t used_image_ids < <(docker ps -aq | xargs -r docker inspect --format '{{.Image}}' | sort -u)
+
+  local removed_count=0
+  local examined_count=0
+  declare -A seen_ids=()
+
+  while read -r image_ref image_id; do
+    [[ -z "$image_ref" || -z "$image_id" ]] && continue
+    ((examined_count++))
+
+    # Keep the image this job is going to use.
+    if [[ "$image_ref" == "$current_image_ref" ]]; then
+      continue
+    fi
+
+    # Avoid duplicate deletes when multiple tags point to same image id.
+    if [[ -n "${seen_ids[$image_id]:-}" ]]; then
+      continue
+    fi
+    seen_ids[$image_id]=1
+
+    # Never delete images that are used by any container on this node.
+    if printf '%s\n' "${used_image_ids[@]}" | grep -qx "$image_id"; then
+      continue
+    fi
+
+    local created created_epoch
+    created=$(docker image inspect -f '{{.Created}}' "$image_id" 2>/dev/null || true)
+    [[ -z "$created" ]] && continue
+    created_epoch=$(date -d "$created" +%s 2>/dev/null || true)
+    [[ -z "$created_epoch" ]] && continue
+
+    if (( created_epoch < cutoff_epoch )) || [[ "$aggressive_cleanup" == "1" ]]; then
+      if docker image rm -f "$image_id" >/dev/null 2>&1; then
+        ((removed_count++))
+      fi
+    fi
+  done < <(docker image ls --no-trunc "$repo_prefix" --format '{{.Repository}}:{{.Tag}} {{.ID}}')
+
+  # Also trim old dangling layers; this is safe and does not remove referenced images.
+  docker image prune -f --filter "until=${ttl_hours}h" >/dev/null 2>&1 || true
+
+  if [[ "$aggressive_cleanup" == "1" ]]; then
+    echo "Examined ${examined_count} images under ${repo_prefix}, removed ${removed_count} unused images under disk pressure."
   else
-    echo "Disk usage is below $threshold%. No cleanup needed."
+    echo "Examined ${examined_count} images under ${repo_prefix}, removed ${removed_count} old images (>${ttl_hours}h)."
   fi
 }
 
@@ -239,19 +317,32 @@ fi
 # --- Docker housekeeping ---
 cleanup_docker
 
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+
 # --- Build or pull test image ---
-if [[ -n "${IMAGE_TAG_XPU:-}" ]]; then
-  echo "Using prebuilt XPU image: ${IMAGE_TAG_XPU}"
-  docker pull "${IMAGE_TAG_XPU}"
+IMAGE="${IMAGE_TAG_XPU:-${image_name}}"
+
+echo "Using image: ${IMAGE}"
+
+if docker image inspect "${IMAGE}" >/dev/null 2>&1; then
+  echo "Image already exists locally, skipping pull"
 else
-  echo "Using prebuilt XPU image: ${image_name}"
-  docker pull "${image_name}"
+  echo "Image not found locally, waiting for lock..."
+
+  flock /tmp/docker-pull.lock bash -c "
+    if docker image inspect '${IMAGE}' >/dev/null 2>&1; then
+      echo 'Image already pulled by another runner'
+    else
+      echo 'Pulling image...'
+      timeout 900 docker pull '${IMAGE}'
+    fi
+  "
+
+  echo "Pull step completed"
 fi
 
 remove_docker_container() {
   docker rm -f "${container_name}" || true
-  docker image rm -f "${image_name}" || true
-  docker system prune -f || true
 }
 trap remove_docker_container EXIT
 
@@ -261,16 +352,31 @@ if [[ -z "${ZE_AFFINITY_MASK:-}" ]]; then
   echo "Warning: ZE_AFFINITY_MASK is not set. Proceeding without device affinity." >&2
 fi
 
-docker run \
+export CMDS="${commands}"
+export HF_TOKEN ZE_AFFINITY_MASK
+
+{
+  flock 9
+  if ! docker image inspect "${IMAGE}" >/dev/null 2>&1; then
+    echo 'Image missing before container creation, pulling again...'
+    timeout 900 docker pull "${IMAGE}"
+  fi
+
+  docker create \
     --device /dev/dri:/dev/dri \
     --net=host \
     --ipc=host \
     --privileged \
     -v /dev/dri/by-path:/dev/dri/by-path \
-    --entrypoint="" \
-    -e "HF_TOKEN=${HF_TOKEN:-}" \
-    -e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-}" \
-    -e "CMDS=${commands}" \
+    -v "${HOME}/.cache/huggingface:/root/.cache/huggingface" \
+    --entrypoint='' \
+    -e HF_TOKEN \
+    -e ZE_AFFINITY_MASK \
+    -e CMDS \
     --name "${container_name}" \
-    "${image_name}" \
-    bash -c 'set -e; echo "ZE_AFFINITY_MASK is ${ZE_AFFINITY_MASK:-}"; eval "$CMDS"'
+    "${IMAGE}" \
+    bash -c 'set -e; echo "ZE_AFFINITY_MASK is ${ZE_AFFINITY_MASK:-}"; eval "$CMDS"' \
+    >/dev/null
+} 9>/tmp/docker-pull.lock
+
+docker start -a "${container_name}"
diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
index 1def2c4682b1..f300b7f80991 100755
--- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
+++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh
@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
 echo "--- Installing Python dependencies ---"
 python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
     && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
-    && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
+    && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.12" \
     && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
 echo "--- Python dependencies installed ---"
 
diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
index feaf2b356267..6f4e0d37db7d 100755
--- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
 echo "--- Installing Python dependencies ---"
 python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
     && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
-    && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
+    && python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.12" \
     && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
 echo "--- Python dependencies installed ---"
 
@@ -136,8 +136,6 @@ run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
     "python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
 run_and_track_test 4 "test_quantization_accuracy.py" \
     "python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
-run_and_track_test 5 "examples/offline_inference/tpu.py" \
-    "python3 /workspace/vllm/examples/offline_inference/tpu.py"
 run_and_track_test 6 "test_tpu_model_runner.py" \
     "python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
 run_and_track_test 7 "test_sampler.py" \
diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
deleted file mode 100644
index a39bc3f17344..000000000000
--- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# This script build the CPU docker image and run the offline inference inside the container.
-# It serves a sanity check for compilation and basic model usage.
-set -ex
-
-image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
-container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
-
-# Try building the docker image
-docker build -t "${image_name}" -f docker/Dockerfile.xpu .
-
-# Setup cleanup
-remove_docker_container() {
-  docker rm -f "${container_name}" || true;
-  docker image rm -f "${image_name}" || true;
-  docker system prune -f || true;
-}
-trap remove_docker_container EXIT
-
-# Run the image and test offline inference/tensor parallel
-docker run \
-    --device /dev/dri:/dev/dri \
-    --net=host \
-    --ipc=host \
-    --privileged \
-    -v /dev/dri/by-path:/dev/dri/by-path \
-    --entrypoint="" \
-    -e "HF_TOKEN=${HF_TOKEN}" \
-    -e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" \
-    --name "${container_name}" \
-    "${image_name}" \
-    bash -c '
-    set -e
-    echo $ZE_AFFINITY_MASK
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 -O3 -cc.cudagraph_mode=NONE
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend ray
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN
-    python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8
-    python3 examples/basic/offline_inference/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4  --block-size 64 --enforce-eager --max-model-len 8192
-    python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b  --block-size 64 --enforce-eager -tp 2
-    python3 examples/basic/offline_inference/generate.py --model ibm-research/PowerMoE-3b  --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel
-    cd tests
-    pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py --ignore=v1/core/test_scheduler_e2e.py
-    pytest -v -s v1/engine
-    pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py
-    pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py --ignore=v1/worker/test_worker_memory_snapshot.py
-    pytest -v -s v1/structured_output
-    pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py
-    pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py -k "not (test_register_kv_caches and FLASH_ATTN and True)"
-    pytest -v -s v1/test_serial_utils.py
-'
diff --git a/.buildkite/scripts/lib/manylinux.sh b/.buildkite/scripts/lib/manylinux.sh
new file mode 100644
index 000000000000..bde2dfe0a3dc
--- /dev/null
+++ b/.buildkite/scripts/lib/manylinux.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Shared helper for rewriting a wheel's platform tag from the generic
+# ``linux_<arch>`` to the correct ``manylinux_<major>_<minor>_<arch>``.
+# After sourcing, call ``apply_manylinux_tag <wheel>`` on each wheel
+# that still carries the generic tag; the renamed path is printed on
+# stdout (logs go to stderr).
+#
+# Why a pinned Docker container instead of using whatever Python
+# happens to be on the agent:
+#   - vLLM's release agents are heterogeneous -- they don't agree on
+#     a Python minor version, and we can't rely on a particular
+#     ``auditwheel`` being installed.
+#   - ``detect-manylinux-tag.py`` reads ``auditwheel.wheel_abi`` and
+#     ``Policy.sym_policy``, which are *internal* APIs without a
+#     stability promise. Pinning both Python and auditwheel makes the
+#     detected tag a function of the inputs alone, and shifts version
+#     bumps from "implicit drift" to "deliberate, retested change".
+#   - Other release scripts (``generate-and-upload-nightly-index.sh``,
+#     ``upload-rocm-wheels.sh``) already use the python:3-slim image
+#     when the agent's interpreter is too old; this is the same idea
+#     made stricter.
+#
+# To keep the per-wheel cost down (the ROCm upload retags ~10 wheels
+# each run), we install auditwheel into a long-lived helper container
+# once on source, then ``docker exec`` into it for each call.
+#
+# Trap behaviour:
+# - Sourcing installs an EXIT trap that calls ``manylinux_cleanup`` to
+#   tear down the helper container. Any EXIT trap that was already in
+#   place when this file was sourced is captured and run AFTER our
+#   cleanup, so we don't silently clobber it.
+# - If a caller sets a new EXIT trap *after* sourcing, that trap will
+#   replace ours; in that case the caller should call
+#   ``manylinux_cleanup`` from their own handler.
+
+if [[ -n "${_MANYLINUX_LIB_SOURCED:-}" ]]; then
+    return 0
+fi
+_MANYLINUX_LIB_SOURCED=1
+
+# Pin both sides. Bump these deliberately and re-run a representative
+# wheel from each build target through the detection.
+_MANYLINUX_PYTHON_IMAGE="python:3.12-slim"
+_MANYLINUX_AUDITWHEEL_VERSION="6.6.0"
+
+# Resolve our own directory (and the sibling detect script) using the
+# canonical, symlink-resolved path. The container mounts cwd at the
+# same absolute path on both sides, so all paths we hand to it -- the
+# script, the wheel -- must canonicalise to a location under cwd.
+_MANYLINUX_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
+_MANYLINUX_DETECT_SCRIPT="$(cd "${_MANYLINUX_LIB_DIR}/.." && pwd -P)/detect-manylinux-tag.py"
+_MANYLINUX_CWD="$(pwd -P)"
+
+docker pull --quiet "$_MANYLINUX_PYTHON_IMAGE" >/dev/null
+
+# Spin up a long-lived helper container so we install auditwheel once
+# and then ``docker exec`` into it for each wheel.
+#
+# The container runs as root so ``pip install`` can write into the
+# system site-packages; individual ``docker exec`` calls below pin
+# themselves to the host UID so any file rename happens with host
+# ownership, not root.
+_MANYLINUX_CONTAINER="$(docker run -d --rm \
+    -v "$_MANYLINUX_CWD:$_MANYLINUX_CWD" \
+    -w "$_MANYLINUX_CWD" \
+    "$_MANYLINUX_PYTHON_IMAGE" \
+    sleep infinity)"
+docker exec "$_MANYLINUX_CONTAINER" \
+    pip install --quiet --disable-pip-version-check \
+    --root-user-action=ignore \
+    "auditwheel==${_MANYLINUX_AUDITWHEEL_VERSION}"
+
+# Public cleanup -- safe to call multiple times.
+manylinux_cleanup() {
+    if [[ -n "${_MANYLINUX_CONTAINER:-}" ]]; then
+        docker rm -f "$_MANYLINUX_CONTAINER" >/dev/null 2>&1 || true
+        _MANYLINUX_CONTAINER=""
+    fi
+}
+
+# Capture any EXIT trap that was already in place so we can chain to
+# it rather than overwrite it. ``trap -p EXIT`` prints the handler in
+# eval-able form (``trap -- 'CMD' EXIT``) or nothing if unset; we
+# strip the wrapper to recover ``CMD``. Handles the common case --
+# CMDs without embedded single quotes -- and degrades gracefully (we
+# still run our own cleanup) for the pathological case.
+_manylinux_prev_exit_trap_cmd=""
+_manylinux_existing_exit_trap="$(trap -p EXIT)"
+if [[ -n "$_manylinux_existing_exit_trap" ]]; then
+    _tmp="${_manylinux_existing_exit_trap#trap -- \'}"
+    _manylinux_prev_exit_trap_cmd="${_tmp%\' EXIT}"
+    unset _tmp
+fi
+unset _manylinux_existing_exit_trap
+
+_manylinux_run_exit_chain() {
+    manylinux_cleanup
+    if [[ -n "$_manylinux_prev_exit_trap_cmd" ]]; then
+        eval "$_manylinux_prev_exit_trap_cmd"
+    fi
+}
+trap _manylinux_run_exit_chain EXIT
+
+# Detect the manylinux platform tag for a single wheel and rename it
+# in place, printing the renamed wheel path on stdout. Returns
+# non-zero on failure (which under ``set -e`` propagates to caller).
+#
+# The wheel must be reachable via a path under the host cwd so it's
+# visible inside the helper container; in CI the wheels always live
+# under ``artifacts/`` so this is fine.
+apply_manylinux_tag() {
+    local wheel="$1"
+    local abs_wheel
+    abs_wheel="$(realpath "$wheel")"
+    local new_wheel
+    new_wheel="$(docker exec -u "$(id -u):$(id -g)" \
+        "$_MANYLINUX_CONTAINER" \
+        python "$_MANYLINUX_DETECT_SCRIPT" "$abs_wheel")"
+    if [[ -z "$new_wheel" || ! -f "$new_wheel" ]]; then
+        echo "apply_manylinux_tag: detect-manylinux-tag.py did not produce a valid wheel path for $wheel" >&2
+        return 1
+    fi
+    printf '%s\n' "$new_wheel"
+}
diff --git a/.buildkite/scripts/lib/select-python.sh b/.buildkite/scripts/lib/select-python.sh
new file mode 100644
index 000000000000..bc53030a2b50
--- /dev/null
+++ b/.buildkite/scripts/lib/select-python.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Pick a Python interpreter for buildkite scripts: prefer a local
+# ``python3`` if it is recent enough (>= 3.12), otherwise fall back to
+# a one-shot Docker container running ``python:3-slim``. After
+# ``select_python`` returns, ``$PYTHON`` is set in the caller's shell
+# and is safe to use as a command (e.g. ``$PYTHON some_script.py``).
+#
+# The 3.12 threshold matches what the existing nightly-index work
+# expects -- typing features used by ``generate-nightly-index.py``.
+# This helper does not pin the *minor* version; if you need stricter
+# reproducibility (e.g. relying on auditwheel internals), invoke
+# Docker yourself with a pinned tag rather than calling this.
+
+if [[ -n "${_SELECT_PYTHON_LIB_SOURCED:-}" ]]; then
+    return 0
+fi
+_SELECT_PYTHON_LIB_SOURCED=1
+
+# Sets ``PYTHON`` in the caller's shell and exports it. Idempotent --
+# calling twice is safe and the second call simply re-runs the probe.
+select_python() {
+    local py="${PYTHON_PROG:-python3}"
+    local has_new_python
+    has_new_python=$("$py" -c \
+        "print(1 if __import__('sys').version_info >= (3,12) else 0)" \
+        2>/dev/null || echo 0)
+    if [[ "$has_new_python" -eq 0 ]]; then
+        # ``-u $(id -u):$(id -g)`` so files created via the container
+        # end up owned by the host user, not root.
+        docker pull python:3-slim
+        PYTHON="docker run --rm -u $(id -u):$(id -g) -v $(pwd):/app -w /app python:3-slim python3"
+    else
+        PYTHON="$py"
+    fi
+    export PYTHON
+    echo "Using python interpreter: $PYTHON"
+    echo "Python version: $($PYTHON --version)"
+}
diff --git a/.buildkite/scripts/publish-release-images.sh b/.buildkite/scripts/publish-release-images.sh
new file mode 100755
index 000000000000..ec319aa76006
--- /dev/null
+++ b/.buildkite/scripts/publish-release-images.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Publish release Docker images from ECR to DockerHub.
+# Pulls per-arch images, tags with latest and versioned tags, pushes them,
+# then creates and pushes multi-arch manifests.
+
+set -euo pipefail
+
+RELEASE_VERSION=$(buildkite-agent meta-data get release-version --default "" | sed 's/^v//')
+if [ -z "${RELEASE_VERSION}" ]; then
+  echo "ERROR: release-version metadata not set"
+  exit 1
+fi
+
+COMMIT="$BUILDKITE_COMMIT"
+ROCM_BASE_CACHE_KEY=$(.buildkite/scripts/cache-rocm-base-wheels.sh key)
+
+echo "========================================"
+echo "Publishing release images v${RELEASE_VERSION}"
+echo "  Commit: ${COMMIT}"
+echo "  ROCm base cache key: ${ROCM_BASE_CACHE_KEY}"
+echo "========================================"
+
+# Login to ECR to pull staging images
+aws ecr-public get-login-password --region us-east-1 | \
+  docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7
+
+# ---- CUDA (default: 13.0) ----
+
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64 vllm/vllm-openai:latest-x86_64
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
+docker push vllm/vllm-openai:latest-x86_64
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64 vllm/vllm-openai:latest-aarch64
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
+docker push vllm/vllm-openai:latest-aarch64
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
+
+docker manifest rm vllm/vllm-openai:latest || true
+docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION} || true
+docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64
+docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
+docker manifest push vllm/vllm-openai:latest
+docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}
+
+# ---- CUDA 12.9 ----
+
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129 vllm/vllm-openai:latest-x86_64-cu129
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129
+docker push vllm/vllm-openai:latest-x86_64-cu129
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129 vllm/vllm-openai:latest-aarch64-cu129
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129
+docker push vllm/vllm-openai:latest-aarch64-cu129
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129
+
+docker manifest rm vllm/vllm-openai:latest-cu129 || true
+docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-cu129 || true
+docker manifest create vllm/vllm-openai:latest-cu129 vllm/vllm-openai:latest-x86_64-cu129 vllm/vllm-openai:latest-aarch64-cu129
+docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129
+docker manifest push vllm/vllm-openai:latest-cu129
+docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu129
+
+# ---- Ubuntu 24.04 (CUDA 13.0) ----
+
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404 vllm/vllm-openai:latest-x86_64-ubuntu2404
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404
+docker push vllm/vllm-openai:latest-x86_64-ubuntu2404
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404 vllm/vllm-openai:latest-aarch64-ubuntu2404
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404
+docker push vllm/vllm-openai:latest-aarch64-ubuntu2404
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404
+
+docker manifest rm vllm/vllm-openai:latest-ubuntu2404 || true
+docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404 || true
+docker manifest create vllm/vllm-openai:latest-ubuntu2404 vllm/vllm-openai:latest-x86_64-ubuntu2404 vllm/vllm-openai:latest-aarch64-ubuntu2404
+docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404
+docker manifest push vllm/vllm-openai:latest-ubuntu2404
+docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404
+
+# ---- Ubuntu 24.04 (CUDA 12.9) ----
+
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404
+docker push vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404 vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404
+docker push vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404
+docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404
+
+docker manifest rm vllm/vllm-openai:latest-cu129-ubuntu2404 || true
+docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404 || true
+docker manifest create vllm/vllm-openai:latest-cu129-ubuntu2404 vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404 vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404
+docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404
+docker manifest push vllm/vllm-openai:latest-cu129-ubuntu2404
+docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404
+
+# ---- ROCm ----
+
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm vllm/vllm-openai-rocm:latest
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm vllm/vllm-openai-rocm:v${RELEASE_VERSION}
+docker push vllm/vllm-openai-rocm:latest
+docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}
+
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:latest-base
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
+docker push vllm/vllm-openai-rocm:latest-base
+docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base
+
+# ---- CPU ----
+# CPU images are behind separate block steps and may not have been built.
+# All-or-nothing: inspect both arches first, then either publish everything
+# (per-arch + multi-arch manifest) or skip everything. Publishing only one
+# arch would leave `:latest-x86_64` pointing at the new release while the
+# `:latest` multi-arch manifest still resolves to the previous release.
+
+CPU_X86_TAG=public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION}
+CPU_ARM_TAG=public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION}
+
+CPU_X86_AVAILABLE=false
+CPU_ARM_AVAILABLE=false
+docker manifest inspect "${CPU_X86_TAG}" >/dev/null 2>&1 && CPU_X86_AVAILABLE=true
+docker manifest inspect "${CPU_ARM_TAG}" >/dev/null 2>&1 && CPU_ARM_AVAILABLE=true
+
+if [ "$CPU_X86_AVAILABLE" = "true" ] && [ "$CPU_ARM_AVAILABLE" = "true" ]; then
+  docker pull "${CPU_X86_TAG}"
+  docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:latest-x86_64
+  docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
+  docker push vllm/vllm-openai-cpu:latest-x86_64
+  docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64
+
+  docker pull "${CPU_ARM_TAG}"
+  docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:latest-arm64
+  docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
+  docker push vllm/vllm-openai-cpu:latest-arm64
+  docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
+
+  docker manifest rm vllm/vllm-openai-cpu:latest || true
+  docker manifest rm vllm/vllm-openai-cpu:v${RELEASE_VERSION} || true
+  docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64
+  docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64
+  docker manifest push vllm/vllm-openai-cpu:latest
+  docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION}
+elif [ "$CPU_X86_AVAILABLE" = "false" ] && [ "$CPU_ARM_AVAILABLE" = "false" ]; then
+  echo "WARNING: Neither CPU image found in ECR, skipping CPU publish (ensure block-cpu-release-image-build and block-arm64-cpu-release-image-build were unblocked and the builds finished pushing)"
+else
+  # Partial state: one arch built, the other did not. Fail loudly rather than
+  # ship a Docker Hub state where `:latest-${arch}` and `:latest` (multi-arch)
+  # disagree on which release they point at.
+  echo "ERROR: Partial CPU build detected (x86_64=${CPU_X86_AVAILABLE}, arm64=${CPU_ARM_AVAILABLE})."
+  echo "       Refusing to publish to avoid split-tag drift between per-arch and multi-arch tags."
+  echo "       Re-run the missing CPU build and retry, or manually publish if a single-arch release is intended."
+  exit 1
+fi
+
+echo ""
+echo "Successfully published release images for v${RELEASE_VERSION}"
diff --git a/.buildkite/scripts/run-rust-frontend-cargo-ci.sh b/.buildkite/scripts/run-rust-frontend-cargo-ci.sh
new file mode 100755
index 000000000000..6ce9b5200c41
--- /dev/null
+++ b/.buildkite/scripts/run-rust-frontend-cargo-ci.sh
@@ -0,0 +1,156 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+MODE="${1:-}"
+
+if [[ "$MODE" != "style-clippy" && "$MODE" != "test" ]]; then
+  echo "Usage: $0 {style-clippy|test}" >&2
+  exit 2
+fi
+
+ROOT_DIR="$(git rev-parse --show-toplevel)"
+cd "$ROOT_DIR"
+
+export CARGO_TERM_COLOR="${CARGO_TERM_COLOR:-always}"
+export CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
+export RUSTUP_HOME="${RUSTUP_HOME:-$HOME/.rustup}"
+export PATH="$CARGO_HOME/bin:$PATH"
+
+log_section() {
+  echo "--- $*"
+}
+
+install_protoc() {
+  if command -v protoc >/dev/null 2>&1; then
+    return
+  fi
+
+  local version="${PROTOC_VERSION:-31.1}"
+  local arch
+  case "$(uname -m)" in
+    x86_64)
+      arch="x86_64"
+      ;;
+    aarch64|arm64)
+      arch="aarch_64"
+      ;;
+    *)
+      echo "Unsupported protoc architecture: $(uname -m)" >&2
+      return 1
+      ;;
+  esac
+
+  local url="https://github.com/protocolbuffers/protobuf/releases/download/v${version}/protoc-${version}-linux-${arch}.zip"
+  local tmp_dir
+  tmp_dir="$(mktemp -d)"
+
+  log_section "Installing protoc ${version}"
+  curl -L --proto '=https' --tlsv1.2 -sSf "$url" -o "$tmp_dir/protoc.zip"
+  mkdir -p "$CARGO_HOME/bin"
+  unzip -q "$tmp_dir/protoc.zip" bin/protoc 'include/*' -d "$CARGO_HOME"
+  chmod +x "$CARGO_HOME/bin/protoc"
+  rm -rf "$tmp_dir"
+}
+
+rust_toolchain() {
+  awk -F '"' '/channel[[:space:]]*=/ { print $2; exit }' rust-toolchain.toml
+}
+
+install_rust_toolchain() {
+  log_section "Installing Rust toolchain"
+  if ! command -v rustup >/dev/null 2>&1; then
+    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
+      | sh -s -- -y --profile minimal --default-toolchain none
+  fi
+
+  local toolchain
+  toolchain="$(rust_toolchain)"
+  rustup toolchain install "$toolchain" --profile minimal --component rustfmt,clippy
+  rustup component add --toolchain "$toolchain" rustfmt clippy
+}
+
+install_cargo_binstall() {
+  if command -v cargo-binstall >/dev/null 2>&1; then
+    return
+  fi
+
+  log_section "Installing cargo-binstall"
+  curl -L --proto '=https' --tlsv1.2 -sSf \
+    https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh \
+    | bash
+}
+
+install_cargo_sort() {
+  if command -v cargo-sort >/dev/null 2>&1; then
+    return
+  fi
+
+  log_section "Installing cargo-sort"
+  install_cargo_binstall
+  cargo binstall --no-confirm cargo-sort
+}
+
+install_cargo_nextest() {
+  if command -v cargo-nextest >/dev/null 2>&1; then
+    return
+  fi
+
+  log_section "Installing cargo-nextest"
+  install_cargo_binstall
+  cargo binstall --no-confirm --secure cargo-nextest
+}
+
+install_uv() {
+  if command -v uv >/dev/null 2>&1; then
+    return
+  fi
+
+  log_section "Installing uv"
+  curl -LsSf --proto '=https' --tlsv1.2 https://astral.sh/uv/install.sh \
+    | env UV_INSTALL_DIR="$CARGO_HOME/bin" sh
+}
+
+run_style_clippy() {
+  install_cargo_sort
+
+  log_section "Checking Rust formatting"
+  cargo fmt --manifest-path rust/Cargo.toml --all -- --check
+
+  log_section "Checking Cargo.toml ordering"
+  cargo sort --workspace --check rust
+
+  log_section "Running clippy"
+  cargo clippy \
+    --manifest-path rust/Cargo.toml \
+    --workspace \
+    --all-targets \
+    --all-features \
+    --locked \
+    -- \
+    -D warnings
+}
+
+run_tests() {
+  install_uv
+  install_cargo_nextest
+
+  log_section "Running cargo nextest"
+  cargo nextest run \
+    --manifest-path rust/Cargo.toml \
+    --workspace \
+    --all-features \
+    --locked \
+    --no-fail-fast
+}
+
+install_protoc
+install_rust_toolchain
+
+case "$MODE" in
+  style-clippy)
+    run_style_clippy
+    ;;
+  test)
+    run_tests
+    ;;
+esac
diff --git a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh
index de48eb282a65..0eadfa1f80b4 100755
--- a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh
+++ b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh
@@ -51,6 +51,7 @@ vllm serve "$MODEL" \
   --offload-num-in-group 2 \
   --offload-prefetch-step 1 \
   --offload-params w13_weight w2_weight \
+  --generation-config vllm \
   --port "$PORT" \
   ${EXTRA_ARGS+"${EXTRA_ARGS[@]}"} &
 SERVER_PID=$!
diff --git a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh
new file mode 100755
index 000000000000..06743f16b687
--- /dev/null
+++ b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
+THRESHOLD=${1:-0.8}
+NUM_Q=${2:-1319}
+PORT=${3:-8050}
+OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
+mkdir -p "${OUT_DIR}"
+
+wait_for_server() {
+  local port=$1
+  timeout 600 bash -c '
+    until curl -sf "http://127.0.0.1:'"$port"'/health" > /dev/null; do
+      sleep 1
+    done'
+}
+
+MODEL="Qwen/Qwen3-30B-A3B-FP8"
+BACK="allgather_reducescatter"
+
+cleanup() {
+  if [[ -n "${SERVER_PID:-}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
+    kill "${SERVER_PID}" 2>/dev/null || true
+    for _ in {1..20}; do
+      kill -0 "${SERVER_PID}" 2>/dev/null || break
+      sleep 0.5
+    done
+    kill -9 "${SERVER_PID}" 2>/dev/null || true
+  fi
+}
+trap cleanup EXIT
+
+VLLM_DEEP_GEMM_WARMUP=skip \
+vllm serve "$MODEL" \
+--enforce-eager \
+--data-parallel-size 4 \
+--enable-expert-parallel \
+--enable-eplb \
+--all2all-backend "$BACK" \
+--eplb-config '{"window_size":20, "step_interval":100, "use_async":true}' \
+--trust-remote-code \
+--max-model-len 2048 \
+--port "$PORT" &
+SERVER_PID=$!
+wait_for_server "$PORT"
+
+TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
+OUT="${OUT_DIR}/${TAG}_${BACK}.json"
+python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port "$PORT" --num-questions "${NUM_Q}" --save-results "${OUT}"
+python3 - <<PY
+import json; acc=json.load(open('${OUT}'))['accuracy']
+print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
+assert acc >= ${THRESHOLD}, f"${MODEL} ${BACK} accuracy {acc}"
+PY
diff --git a/.buildkite/scripts/tool_call/run-bfcl-eval.sh b/.buildkite/scripts/tool_call/run-bfcl-eval.sh
index f3e5009e6fe3..3748cab62c7c 100755
--- a/.buildkite/scripts/tool_call/run-bfcl-eval.sh
+++ b/.buildkite/scripts/tool_call/run-bfcl-eval.sh
@@ -28,6 +28,7 @@
 #   BFCL_MAX_MODEL_LEN  - Max model length (default: 4096)
 #   BFCL_PORT           - Server port (default: 8000)
 #   BFCL_REASONING_PARSER - Reasoning parser name (default: disabled)
+#   BFCL_TEMPERATURE    - Temperature (default: 0.0)
 #   BFCL_EXTRA_ARGS     - Additional vLLM server args
 
 set -euo pipefail
@@ -43,6 +44,7 @@ TP_SIZE="${BFCL_TP_SIZE:-1}"
 MAX_MODEL_LEN="${BFCL_MAX_MODEL_LEN:-4096}"
 PORT="${BFCL_PORT:-8000}"
 REASONING_PARSER="${BFCL_REASONING_PARSER:-}"
+TEMPERATURE="${BFCL_TEMPERATURE:-0.0}"
 EXTRA_ARGS="${BFCL_EXTRA_ARGS:-}"
 
 # Set up output directory
@@ -139,7 +141,7 @@ echo "vLLM server is ready. (started in ${SECONDS_WAITED}s)"
 # be patched in-process so BFCL knows to use the OpenAI-compatible handler
 # against our local vLLM server.
 bfcl_exit_code=0
-python3 - "$MODEL" "$TEST_CATEGORY" "$NUM_THREADS" "$PORT" "$API_TYPE" "$OUTPUT_DIR" << 'PYEOF' || bfcl_exit_code=$?
+python3 - "$MODEL" "$TEST_CATEGORY" "$NUM_THREADS" "$PORT" "$API_TYPE" "$TEMPERATURE" "$OUTPUT_DIR" << 'PYEOF' || bfcl_exit_code=$?
 import os
 import sys
 
@@ -148,7 +150,8 @@ test_category = sys.argv[2]
 num_threads = int(sys.argv[3])
 port = sys.argv[4]
 api_type = sys.argv[5]
-output_dir = sys.argv[6] if len(sys.argv) > 6 and sys.argv[6] else os.getcwd()
+temperature = float(sys.argv[6])
+output_dir = sys.argv[7] if len(sys.argv) > 7 and sys.argv[7] else os.getcwd()
 
 os.environ["OPENAI_BASE_URL"] = f"http://localhost:{port}/v1"
 os.environ["OPENAI_API_KEY"] = "dummy"
@@ -204,6 +207,7 @@ gen_kwargs["model"] = [model]
 gen_kwargs["test_category"] = [c.strip() for c in test_category.split(",")]
 gen_kwargs["skip_server_setup"] = True
 gen_kwargs["num_threads"] = num_threads
+gen_kwargs["temperature"] = temperature
 generate(**gen_kwargs)
 
 # ---- evaluate ----
diff --git a/.buildkite/scripts/upload-nightly-wheels.sh b/.buildkite/scripts/upload-nightly-wheels.sh
index cc72cda7d505..8cef31908809 100644
--- a/.buildkite/scripts/upload-nightly-wheels.sh
+++ b/.buildkite/scripts/upload-nightly-wheels.sh
@@ -2,14 +2,18 @@
 
 set -ex
 
-# Upload a single wheel to S3 (rename linux -> manylinux).
+# Upload a single wheel to S3, after detecting and applying the appropriate
+# manylinux platform tag with auditwheel.
 # Index generation is handled separately by generate-and-upload-nightly-index.sh.
 
+# shellcheck source=lib/manylinux.sh
+source .buildkite/scripts/lib/manylinux.sh
+
 BUCKET="vllm-wheels"
 SUBPATH=$BUILDKITE_COMMIT
 S3_COMMIT_PREFIX="s3://$BUCKET/$SUBPATH/"
 
-# ========= collect, rename & upload the wheel ==========
+# ========= locate the wheel ==========
 
 # Assume wheels are in artifacts/dist/*.whl
 wheel_files=(artifacts/dist/*.whl)
@@ -21,19 +25,9 @@ if [[ ${#wheel_files[@]} -ne 1 ]]; then
 fi
 wheel="${wheel_files[0]}"
 
-# default build image uses ubuntu 20.04, which corresponds to manylinux_2_31
-# we also accept params as manylinux tag
-# refer to https://github.com/mayeut/pep600_compliance?tab=readme-ov-file#acceptable-distros-to-build-wheels
-manylinux_version="${1:-manylinux_2_31}"
+# ========= detect manylinux tag and rename ==========
 
-# Rename 'linux' to the appropriate manylinux version in the wheel filename
-if [[ "$wheel" != *"linux"* ]]; then
-  echo "Error: Wheel filename does not contain 'linux': $wheel"
-  exit 1
-fi
-new_wheel="${wheel/linux/$manylinux_version}"
-mv -- "$wheel" "$new_wheel"
-wheel="$new_wheel"
+wheel="$(apply_manylinux_tag "$wheel")"
 echo "Renamed wheel to: $wheel"
 
 # Extract the version from the wheel
diff --git a/.buildkite/scripts/upload-release-wheels-pypi.sh b/.buildkite/scripts/upload-release-wheels-pypi.sh
index 058e5bbe4f4c..a45d2aa3c9f6 100644
--- a/.buildkite/scripts/upload-release-wheels-pypi.sh
+++ b/.buildkite/scripts/upload-release-wheels-pypi.sh
@@ -39,10 +39,17 @@ fi
 
 set -x # avoid printing secrets above
 
-# install twine from pypi
-python3 -m venv /tmp/vllm-release-env
+# install uv if not already available
+if ! command -v uv &> /dev/null; then
+  curl -LsSf https://astral.sh/uv/install.sh | UV_VERSION=0.11.14 sh
+  export PATH="$HOME/.local/bin:$PATH"
+fi
+
+# install twine and sdist build prerequisites using uv with Python 3.12
+uv venv --python 3.12 /tmp/vllm-release-env
 source /tmp/vllm-release-env/bin/activate
-pip install twine
+uv pip install twine
+uv pip install -r requirements/build/cuda.txt
 python3 -m twine --version
 
 # copy release wheels to local directory
diff --git a/.buildkite/scripts/upload-rocm-wheels.sh b/.buildkite/scripts/upload-rocm-wheels.sh
index a42848a16ffe..1f3655631204 100755
--- a/.buildkite/scripts/upload-rocm-wheels.sh
+++ b/.buildkite/scripts/upload-rocm-wheels.sh
@@ -20,10 +20,6 @@ BUCKET="${S3_BUCKET:-vllm-wheels}"
 ROCM_SUBPATH="rocm/${BUILDKITE_COMMIT}"
 S3_COMMIT_PREFIX="s3://$BUCKET/$ROCM_SUBPATH/"
 INDICES_OUTPUT_DIR="rocm-indices"
-PYTHON="${PYTHON_PROG:-python3}"
-
-# ROCm uses manylinux_2_35 (Ubuntu 22.04 based)
-MANYLINUX_VERSION="manylinux_2_35"
 
 echo "========================================"
 echo "ROCm Wheel Upload Configuration"
@@ -34,19 +30,21 @@ echo "Commit: $BUILDKITE_COMMIT"
 echo "Branch: $BUILDKITE_BRANCH"
 echo "========================================"
 
-# ======== Part 0: Setup Python ========
+# ======== Part 0: Setup Python and helpers ========
 
-# Detect if python3.12+ is available
-has_new_python=$($PYTHON -c "print(1 if __import__('sys').version_info >= (3,12) else 0)" 2>/dev/null || echo 0)
-if [[ "$has_new_python" -eq 0 ]]; then
-    # Use new python from docker
-    # Use --user to ensure files are created with correct ownership (not root)
-    docker pull python:3-slim
-    PYTHON="docker run --rm --user $(id -u):$(id -g) -v $(pwd):/app -w /app python:3-slim python3"
-fi
+# Pick a Python interpreter for index generation -- local if recent
+# enough, else a one-shot docker fallback.
+# shellcheck source=lib/select-python.sh
+source .buildkite/scripts/lib/select-python.sh
+select_python
 
-echo "Using python interpreter: $PYTHON"
-echo "Python version: $($PYTHON --version)"
+# Set up auditwheel-in-a-container for the manylinux retagging step.
+# Distinct from select_python: ``manylinux.sh`` deliberately pins both
+# the Python and auditwheel versions (the script reads auditwheel
+# internals) and so always runs in a known-good container regardless
+# of what's on the agent.
+# shellcheck source=lib/manylinux.sh
+source .buildkite/scripts/lib/manylinux.sh
 
 # ======== Part 1: Collect and prepare wheels ========
 
@@ -63,11 +61,18 @@ if [ "$WHEEL_COUNT" -eq 0 ]; then
     exit 1
 fi
 
-# Rename linux to manylinux in wheel filenames
+# Detect the appropriate manylinux platform tag for any wheel that still
+# carries the generic ``linux_<arch>`` tag, and rename it in place. We use
+# auditwheel via ``apply_manylinux_tag`` (see lib/manylinux.sh) rather than
+# a hard-coded ``manylinux_2_35`` string so that the label tracks the actual
+# glibc symbol versions used by the binaries (and stays correct if the
+# rocm_base image is rebased).
+#
+# The ``linux``/``manylinux`` filter below skips both pre-tagged wheels
+# (e.g. upstream torch) and pure-Python ``-any.whl`` wheels.
 for wheel in all-rocm-wheels/*.whl; do
     if [[ "$wheel" == *"linux"* ]] && [[ "$wheel" != *"manylinux"* ]]; then
-        new_wheel="${wheel/linux/$MANYLINUX_VERSION}"
-        mv -- "$wheel" "$new_wheel"
+        new_wheel="$(apply_manylinux_tag "$wheel")"
         echo "Renamed: $(basename "$wheel") -> $(basename "$new_wheel")"
     fi
 done
diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 49987880c1a9..3cadab548fb1 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -1,8 +1,8 @@
 # In this file, you can add more tests to run either by adding a new step or
 # adding a new command to an existing step. See different options here for examples.
 
-# This script will be feed into Jinja template in `test-template-aws.j2` at
-# https://github.com/vllm-project/buildkite-ci/blob/main/scripts/test-template-aws.j2
+# This script will be feed into Jinja template in `test-template-amd.j2` at
+# https://github.com/vllm-project/buildkite-ci/blob/main/scripts/test-template-amd.j2
 # to generate the final pipeline yaml file.
 
 # Documentation
@@ -39,8 +39,8 @@
 #####################################################################################################################################
 #                                                                                                                                   #
 # IMPORTANT:                                                                                                                        #
-#   * Currently AMD CI has MI250 agents, MI325 agents, and MI355 agents. All upcoming feature improvements are tracked in:          #
-#         https://github.com/vllm-project/vllm/issues/34994                                                                         #
+#   * Currently AMD CI has MI250 agents, MI300 agents, MI325 agents, and MI355 agents. All upcoming feature improvements are        #
+#         tracked in: https://github.com/vllm-project/vllm/issues/34994                                                             #
 #                                                                                                                                   #
 #-----------------------------------------------------------------------------------------------------------------------------------#
 #                                                                                                                                   #
@@ -104,193 +104,180 @@
 #####################################################################################################################################
 
 
-
-
 steps:
 
+#########################################################################################################################################
+#                                                                                                                                       #
+#                                                         MI250 (gfx90a) tests                                                          #
+#                                                                                                                                       #
+#########################################################################################################################################
 
-#####################################################################################################################################
-#                                                                                                                                   #
-#  MI250 test definitions ( currently the test set is completely mirrored // TBD which tests are to be routed there ultimately)     #
-#                                                                                                                                   #
-#####################################################################################################################################
+#-----------------------------------------------------  mi250 · basic_correctness  -----------------------------------------------------#
 
-- label: Pytorch Nightly Dependency Override Check # TBD
+- label: Distributed Model Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  optional: true
-  soft_fail: true
+  agent_pool: mi250_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - requirements/nightly_torch_test.txt
+  - vllm/model_executor/model_loader/sharded_state_loader.py
+  - vllm/model_executor/models/
+  - vllm/model_executor/layers/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - tests/basic_correctness/
+  - tests/model_executor/model_loader/test_sharded_state_loader.py
+  - tests/models/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - bash standalone_tests/pytorch_nightly_dependency.sh
-
-
-- label: Async Engine, Inputs, Utils, Worker # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/detokenizer
-  - tests/multimodal
-  - tests/utils_
-  commands:
-  - pytest -v -s detokenizer
-  - pytest -v -s -m 'not cpu_test' multimodal
-  - pytest -v -s utils_
+  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
+  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
+  - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
 
+#----------------------------------------------------------  mi250 · compile  ----------------------------------------------------------#
 
-- label: Async Engine, Inputs, Utils, Worker, Config (CPU) # TBD
+- label: PyTorch Compilation Unit Tests # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  torch_nightly: true
   optional: true
-  no_gpu: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/test_inputs.py
-  - tests/test_outputs.py
-  - tests/test_pooling_params.py
-  - tests/test_ray_env.py
-  - tests/multimodal
-  - tests/renderers
-  - tests/standalone_tests/lazy_imports.py
-  - tests/tokenizers_
-  - tests/tool_parsers
-  - tests/transformers_utils
-  - tests/config
+  - vllm/compilation/
+  - vllm/model_executor/layers/
+  - vllm/v1/worker/
+  - vllm/v1/attention/
+  - vllm/v1/cudagraph_dispatcher.py
+  - vllm/config/compilation.py
+  - csrc/
+  - tests/compile
+  - vllm/platforms/rocm.py
   commands:
-  - python3 standalone_tests/lazy_imports.py
-  - pytest -v -s test_inputs.py
-  - pytest -v -s test_outputs.py
-  - pytest -v -s test_pooling_params.py
-  - pytest -v -s test_ray_env.py
-  - pytest -v -s -m 'cpu_test' multimodal
-  - pytest -v -s renderers
-  - pytest -v -s tokenizers_
-  - pytest -v -s tool_parsers
-  - pytest -v -s transformers_utils
-  - pytest -v -s config
-
+  - "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 
-- label: Python-only Installation # TBD
+- label: PyTorch Fullgraph # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - tests/standalone_tests/python_only_compile.sh
-  - setup.py
+  - vllm/compilation/
+  - vllm/model_executor/
+  - vllm/v1/attention/
+  - vllm/config/compilation.py
+  - csrc/
+  - tests/compile
   - vllm/platforms/rocm.py
   commands:
-  - bash standalone_tests/python_only_compile.sh
-
+  - pytest -v -s compile/fullgraph/test_full_graph.py -k 'not test_fp8_kv_scale_compile'
 
-- label: Basic Correctness # TBD
+- label: PyTorch Fullgraph Smoke Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  fast_check: true
+  optional: true
   torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/basic_correctness/test_basic_correctness
-  - tests/basic_correctness/test_cpu_offload
-  - tests/basic_correctness/test_cumem.py
-  commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s basic_correctness/test_cumem.py
-  - pytest -v -s basic_correctness/test_basic_correctness.py
-  - pytest -v -s basic_correctness/test_cpu_offload.py
-
-
-- label: Entrypoints Unit Tests # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  fast_check: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/entrypoints
-  - tests/entrypoints/
+  - vllm/compilation/
+  - vllm/model_executor/
+  - vllm/v1/attention/
+  - vllm/config/compilation.py
+  - csrc/
+  - tests/compile
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
-
+  - "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\;"
 
-- label: Entrypoints Integration (LLM) # TBD
+- label: Distributed Compile + RPC Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  fast_check: true
-  torch_nightly: true
+  agent_pool: mi250_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/llm
-  - tests/entrypoints/offline_mode
+  - vllm/compilation/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/worker/worker_base.py
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/compile/fullgraph/test_basic_correctness.py
+  - tests/compile/test_wrapper.py
+  - tests/entrypoints/llm/test_collective_rpc.py
+  - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
-  - pytest -v -s entrypoints/llm/test_generate.py
-  - pytest -v -s entrypoints/offline_mode
+  - pytest -v -s entrypoints/llm/test_collective_rpc.py
+  - pytest -v -s ./compile/fullgraph/test_basic_correctness.py
+  - pytest -v -s ./compile/test_wrapper.py
 
+#--------------------------------------------------------  mi250 · distributed  --------------------------------------------------------#
 
-- label: Entrypoints Integration (API Server 2) # TBD
+- label: Distributed Comm Ops # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  fast_check: true
-  torch_nightly: true
+  agent_pool: mi250_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/rpc
-  - tests/entrypoints/serve/instrumentator
-  - tests/tool_use
+  - vllm/distributed
+  - tests/distributed
+  - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/serve/instrumentator
-  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
-  - pytest -v -s tool_use
-
+  - pytest -v -s distributed/test_comm_ops.py
+  - pytest -v -s distributed/test_shm_broadcast.py
+  - pytest -v -s distributed/test_shm_buffer.py
+  - pytest -v -s distributed/test_shm_storage.py
 
-- label: Entrypoints Integration (Responses API) # TBD
+- label: Distributed Torchrun + Shutdown Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  fast_check: true
-  torch_nightly: true
+  agent_pool: mi250_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/openai/responses
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/worker/worker_base.py
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/distributed/
+  - tests/v1/shutdown
+  - tests/v1/worker/test_worker_memory_snapshot.py
+  - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai/responses
-
+  - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
+  - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
+  - pytest -v -s v1/worker/test_worker_memory_snapshot.py
 
-- label: EPLB Algorithm # TBD
+- label: Elastic EP Scaling Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  agent_pool: mi250_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/eplb
-  - tests/distributed/test_eplb_algo.py
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/compilation/
+  - tests/distributed/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s distributed/test_eplb_algo.py
-
+  - pytest -v -s distributed/test_elastic_ep.py
 
 - label: EPLB Execution # TBD
   timeout_in_minutes: 180
@@ -307,8 +294,7 @@ steps:
   - pytest -v -s distributed/test_eplb_execute.py
   - pytest -v -s distributed/test_eplb_spec_decode.py
 
-
-- label: Elastic EP Scaling Test # TBD
+- label: Pipeline + Context Parallelism (4 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_4
@@ -318,1803 +304,1688 @@ steps:
   - vllm/distributed/
   - vllm/engine/
   - vllm/executor/
-  - vllm/compilation/
+  - vllm/model_executor/models/
+  - vllm/model_executor/layers/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
   - tests/distributed/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s distributed/test_elastic_ep.py
+  - pytest -v -s distributed/test_pp_cudagraph.py
+  - pytest -v -s distributed/test_pipeline_parallel.py
 
+#----------------------------------------------------------  mi250 · engine  -----------------------------------------------------------#
 
-- label: Metrics, Tracing (2 GPUs) # TBD
+- label: Engine # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/tracing
+  - tests/engine
+  - tests/test_sequence
+  - tests/test_config
+  - tests/test_logger
+  - tests/test_vllm_port
   commands:
-  - "pip install \
-      'opentelemetry-sdk>=1.26.0' \
-      'opentelemetry-api>=1.26.0' \
-      'opentelemetry-exporter-otlp>=1.26.0' \
-      'opentelemetry-semantic-conventions-ai>=0.4.1'"
-  - pytest -v -s v1/tracing
+  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
 
+#-----------------------------------------------------------  mi250 · evals  -----------------------------------------------------------#
 
-- label: Regression # TBD
+- label: Multi-Modal Accuracy Eval (Small Models) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  working_dir: "/vllm-workspace/tests"
+  optional: true
+  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   source_file_dependencies:
-  - vllm/
-  - tests/test_regression
+  - vllm/multimodal/
+  - vllm/inputs/
+  - vllm/v1/core/
+  - vllm/platforms/rocm.py
+  - vllm/model_executor/model_loader/
   commands:
-  - pip install modelscope
-  - pytest -v -s test_regression.py
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
 
+#---------------------------------------------------------  mi250 · examples  ----------------------------------------------------------#
 
-- label: Engine # TBD
+- label: Examples # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
-  working_dir: "/vllm-workspace/tests"
+  working_dir: "/vllm-workspace/examples"
   source_file_dependencies:
-  - vllm/
-  - tests/engine
-  - tests/test_sequence
-  - tests/test_config
-  - tests/test_logger
-  - tests/test_vllm_port
+  - vllm/entrypoints
+  - vllm/multimodal
+  - examples/
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
+    - pip install tensorizer
+    # Basic
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
+    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
+    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+    - python3 basic/offline_inference/classify.py
+    - python3 basic/offline_inference/embed.py
+    - python3 basic/offline_inference/score.py
+    # Multi-modal models
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
+    # Pooling models
+    - python3 pooling/embed/vision_embedding_offline.py --seed 0
+    # Features demo
+    - python3 features/automatic_prefix_caching/prefix_caching_offline.py
+    - python3 deployment/llm_engine_example.py
+    - python3 features/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 features/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
 
+#----------------------------------------------------------  mi250 · kernels  ----------------------------------------------------------#
 
-- label: Engine (1 GPU) # TBD
+- label: Kernels Core Operation Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/
-  - tests/v1/engine/
+  - csrc/
+  - tests/kernels/core
+  - tests/kernels/test_top_k_per_row.py
+  - tests/kernels/test_concat_mla_q.py
+  - vllm/model_executor/layers/rotary_embedding/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/engine/test_preprocess_error_handling.py
-  - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
+  - pytest -v -s kernels/core --ignore=kernels/core/test_minimax_reduce_rms.py  kernels/test_concat_mla_q.py kernels/test_top_k_per_row.py
 
-
-- label: e2e Scheduling (1 GPU) # TBD
+- label: Kernels Helion Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/
-  - tests/v1/e2e/general/
+  - vllm/utils/import_utils.py
+  - tests/kernels/helion/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/general/test_async_scheduling.py
-
+  - pip install helion==1.0.0
+  - pytest -v -s kernels/helion/
 
-- label: e2e Core (1 GPU) # TBD
+- label: Kernels Mamba Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/
-  - tests/v1/e2e/general/
+  - csrc/mamba/
+  - tests/kernels/mamba
+  - vllm/model_executor/layers/mamba/ops
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/general --ignore v1/e2e/general/test_async_scheduling.py
+  - pytest -v -s kernels/mamba
 
+#-----------------------------------------------------------  mi250 · lora  ------------------------------------------------------------#
 
-- label: Spec Decode Speculators + MTP # TBD
+- label: LoRA %N # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  parallelism: 4
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - vllm/transformers_utils/configs/speculators/
-  - tests/v1/e2e/spec_decode/
+  - vllm/lora
+  - tests/lora
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
+  - pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_qwen3_with_multi_loras.py --ignore=lora/test_olmoe_tp.py --ignore=lora/test_deepseekv2_tp.py --ignore=lora/test_gptoss_tp.py --ignore=lora/test_qwen3moe_tp.py --ignore=lora/test_qwen35_densemodel_lora.py
 
+#------------------------------------------------------  mi250 · model_executor  -------------------------------------------------------#
 
-- label: Spec Decode Ngram + Suffix # TBD
+- label: Model Executor # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - tests/v1/e2e/spec_decode/
+  - vllm/engine/arg_utils.py
+  - vllm/config/model.py
+  - vllm/model_executor
+  - tests/model_executor
+  - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
+  - apt-get update && apt-get install -y curl libsodium23
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s model_executor -m '(not slow_test)'
+  - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
 
+#------------------------------------------------------  mi250 · models / basic  -------------------------------------------------------#
 
-- label: Spec Decode Draft Model # TBD
+- label: Basic Models Test (Other CPU) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  no_gpu: true
   optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
+  - vllm/
+  - tests/models/test_utils.py
+  - tests/models/test_vision.py
+  commands:
+  - pytest -v -s models/test_utils.py models/test_vision.py
+
+- label: Basic Models Tests (Extra Initialization) %N # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
+  agent_pool: mi250_1
+  torch_nightly: true
+  parallelism: 2
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/model_executor/models/
   - vllm/model_executor/layers/
-  - tests/v1/e2e/spec_decode/
+  - tests/models/test_initialization.py
+  - tests/models/registry.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
-
+  - pytest -v -s models/test_initialization.py -k 'not test_can_initialize_small_subset' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
-- label: V1 e2e (2 GPUs) # TBD
+- label: Basic Models Tests (Initialization) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/e2e
+  - tests/models/test_initialization.py
+  - tests/models/registry.py
   commands:
-    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
-
+  - pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
 
-- label: V1 Sample + Logits # TBD
-  timeout_in_minutes: 60
+- label: Basic Models Tests (Other) # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/sample
-  - tests/v1/logits_processors
-  - tests/v1/test_oracle.py
-  - tests/v1/test_request.py
-  - tests/v1/test_outputs.py
+  - tests/models/test_terratorch.py
+  - tests/models/test_transformers.py
+  - tests/models/test_registry.py
   commands:
-  - pytest -v -s v1/sample
-  - pytest -v -s v1/logits_processors
-  - pytest -v -s v1/test_oracle.py
-  - pytest -v -s v1/test_request.py
-  - pytest -v -s v1/test_outputs.py
+  - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
 
+#-----------------------------------------------------  mi250 · models / language  -----------------------------------------------------#
 
-- label: V1 Core + KV + Metrics # TBD
-  timeout_in_minutes: 60
+- label: Language Models Test (MTEB) # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/core
-  - tests/v1/executor
-  - tests/v1/kv_offload
-  - tests/v1/worker
-  - tests/v1/kv_connector/unit
-  - tests/v1/metrics
-  - tests/entrypoints/openai/correctness/test_lmeval.py
+  - tests/models/language/pooling_mteb_test
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - pytest -v -s -m 'not cpu_test' v1/core
-  - pytest -v -s v1/executor
-  - pytest -v -s v1/kv_offload
-  - pytest -v -s v1/worker
-  - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
-  - pytest -v -s -m 'not cpu_test' v1/metrics
-  - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
-  - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
+  - pytest -v -s models/language/pooling_mteb_test
 
+- label: Language Models Test (PPL) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
+  agent_pool: mi250_1
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/models/language/generation_ppl_test
+  commands:
+  - pytest -v -s models/language/generation_ppl_test
 
-- label: V1 Speculative Decoding (slow) # TBD
+- label: Language Models Tests (Extra Standard) %N # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  torch_nightly: true
+  parallelism: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
   - vllm/model_executor/models/
-  - vllm/v1/attention/
+  - vllm/model_executor/model_loader/
   - vllm/model_executor/layers/
-  - tests/v1/spec_decode/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - tests/models/language/pooling/test_embedding.py
+  - tests/models/language/generation/test_common.py
+  - tests/models/language/pooling/test_classification.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_eagle.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_extract_hidden_states.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_max_len.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_mtp.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_ngram.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_speculators_eagle3.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_tree_attention.py
+  - pip freeze | grep -E 'torch'
+  - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
+#----------------------------------------------------  mi250 · models / multimodal  ----------------------------------------------------#
 
-- label: V1 attention (H100-MI250) # TBD
+- label: Multi-Modal Models (Extended Generation 2) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/config/attention.py
-  - vllm/model_executor/layers/attention
-  - vllm/v1/attention
-  - tests/v1/attention
-  - vllm/_aiter_ops.py
-  - vllm/envs.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal/generation
   commands:
-  - pytest -v -s v1/attention
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
 
+- label: Multi-Modal Models (Extended Pooling) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
+  agent_pool: mi250_1
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/models/multimodal/pooling
+  commands:
+  - pytest -v -s models/multimodal/pooling -m 'not core_model'
 
-- label: V1 others (CPU) # TBD
+- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl" # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  no_gpu: true
+  torch_nightly: true
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1
+  - tests/models/multimodal
   commands:
-  - pytest -v -s -m 'cpu_test' v1/core
-  - pytest -v -s v1/structured_output
-  - pytest -v -s v1/test_serial_utils.py
-  - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
-  - pytest -v -s -m 'cpu_test' v1/metrics
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
+  - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
 
+#----------------------------------------------------------  mi250 · plugins  ----------------------------------------------------------#
 
-- label: Examples # TBD
+- label: Plugin Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  agent_pool: mi250_2
+  num_gpus: 2
   optional: true
-  working_dir: "/vllm-workspace/examples"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/entrypoints
-  - vllm/multimodal
-  - examples/
+  - vllm/plugins/
+  - tests/plugins/
   - vllm/platforms/rocm.py
   commands:
-    - pip install tensorizer
-    # Basic
-    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
-    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
-    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
-    - python3 basic/offline_inference/classify.py
-    - python3 basic/offline_inference/embed.py
-    - python3 basic/offline_inference/score.py
-    # Multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
-    # Pooling models
-    - python3 pooling/embed/vision_embedding_offline.py --seed 0
-    # Features demo
-    - python3 offline_inference/prefix_caching.py
-    - python3 offline_inference/llm_engine_example.py
-    - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
-    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+  # BEGIN: platform plugin and general plugin tests, all the code in-between runs on dummy platform
+  - pip install -e ./plugins/vllm_add_dummy_platform
+  - pytest -v -s plugins_tests/test_platform_plugins.py
+  - pip uninstall vllm_add_dummy_platform -y
+  # END: platform plugin tests
+  # BEGIN: `io_processor` plugins test, all the code in between uses the `prithvi_io_processor` plugin
+  - pip install -e ./plugins/prithvi_io_processor_plugin
+  - pytest -v -s plugins_tests/test_io_processor_plugins.py
+  - pytest -v -s plugins_tests/test_terratorch_io_processor_plugins.py
+  - pip uninstall prithvi_io_processor_plugin -y
+  # END: `io_processor` plugins test
+  # BEGIN: `bge_m3_sparse io_processor` test
+  - pip install -e ./plugins/bge_m3_sparse_plugin
+  - pytest -v -s plugins_tests/test_bge_m3_sparse_io_processor_plugins.py
+  - pip uninstall bge_m3_sparse_plugin -y
+  # END: `bge_m3_sparse io_processor` test
+  # BEGIN: `stat_logger` plugins test
+  - pip install -e ./plugins/vllm_add_dummy_stat_logger
+  - pytest -v -s plugins_tests/test_stats_logger_plugins.py
+  - pip uninstall dummy_stat_logger -y
+  # END: `stat_logger` plugins test
+  # BEGIN: other tests
+  - pytest -v -s plugins_tests/test_scheduler_plugins.py
+  - pip install -e ./plugins/vllm_add_dummy_model
+  - pytest -v -s distributed/test_distributed_oot.py
+  - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py
+  - pytest -v -s models/test_oot_registration.py
+  - pytest -v -s plugins/lora_resolvers
 
+#------------------------------------------------------------  mi250 · v1  -------------------------------------------------------------#
 
-- label: Platform Tests (CUDA) # TBD
+- label: Batch Invariance (H100-MI250) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/cuda
+  - vllm/v1/attention
+  - vllm/model_executor/layers
+  - tests/v1/determinism/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s cuda/test_cuda_context.py
-  - pytest -v -s cuda/test_platform_no_cuda_init.py
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pip install pytest-timeout pytest-forked
+  - pytest -v -s v1/determinism/test_batch_invariance.py
+  - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
 
-- label: Samplers Test # TBD
+- label: Cudagraph # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/layers
-  - vllm/sampling_metadata.py
-  - vllm/v1/sample/
-  - vllm/beam_search.py
-  - tests/samplers
-  - tests/conftest.py
-  - vllm/_aiter_ops.py
+  - tests/v1/cudagraph
+  - vllm/v1/cudagraph_dispatcher.py
+  - vllm/config/compilation.py
+  - vllm/compilation
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s samplers
-
+  - pytest -v -s v1/cudagraph/test_cudagraph_dispatch.py
+  - pytest -v -s v1/cudagraph/test_cudagraph_mode.py
 
-- label: LoRA %N # TBD
+- label: e2e Core (1 GPU) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  parallelism: 4
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/lora
-  - tests/lora
+  - vllm/v1/
+  - tests/v1/e2e/general/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_llm_with_multi_loras.py --ignore=lora/test_olmoe_tp.py --ignore=lora/test_deepseekv2_tp.py --ignore=lora/test_gptoss_tp.py --ignore=lora/test_qwen3moe_tp.py
-
+  - pytest -v -s v1/e2e/general --ignore v1/e2e/general/test_async_scheduling.py
 
-- label: PyTorch Compilation Unit Tests # TBD
+- label: e2e Scheduling (1 GPU) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
-  torch_nightly: true
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/model_executor/layers/
-  - vllm/v1/worker/
-  - vllm/v1/attention/
-  - vllm/v1/cudagraph_dispatcher.py
-  - vllm/config/compilation.py
-  - csrc/
-  - tests/compile
+  - vllm/v1/
+  - tests/v1/e2e/general/
   - vllm/platforms/rocm.py
   commands:
-  - "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
-
+  - pytest -v -s v1/e2e/general/test_async_scheduling.py
 
-- label: PyTorch Fullgraph Smoke Test # TBD
+- label: Engine (1 GPU) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
-  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/model_executor/
-  - vllm/v1/attention/
-  - vllm/config/compilation.py
-  - csrc/
-  - tests/compile
+  - vllm/v1/
+  - tests/v1/engine/
   - vllm/platforms/rocm.py
   commands:
-  - "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\;"
-
+  - pytest -v -s v1/engine/test_preprocess_error_handling.py
+  - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
 
-- label: PyTorch Fullgraph # TBD
+- label: Spec Decode Draft Model # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
-  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/model_executor/
-  - vllm/v1/attention/
-  - vllm/config/compilation.py
-  - csrc/
-  - tests/compile
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - tests/v1/e2e/spec_decode/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s compile/fullgraph/test_full_graph.py -k 'not test_fp8_kv_scale_compile'
-
+  - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
 
-- label: Cudagraph # TBD
+- label: Spec Decode Speculators + MTP # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - tests/v1/cudagraph
-  - vllm/v1/cudagraph_dispatcher.py
-  - vllm/config/compilation.py
-  - vllm/compilation
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - vllm/transformers_utils/configs/speculators/
+  - tests/v1/e2e/spec_decode/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/cudagraph/test_cudagraph_dispatch.py
-  - pytest -v -s v1/cudagraph/test_cudagraph_mode.py
-
+  - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
 
-- label: Kernels Core Operation Test # TBD
+- label: V1 attention (H100-MI250) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - tests/kernels/core
-  - tests/kernels/test_top_k_per_row.py
-  - tests/kernels/test_concat_mla_q.py
-  - vllm/model_executor/layers/rotary_embedding/
+  - vllm/config/attention.py
+  - vllm/model_executor/layers/attention
+  - vllm/v1/attention
+  - tests/v1/attention
   - vllm/_aiter_ops.py
+  - vllm/envs.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s kernels/core kernels/test_top_k_per_row.py
-
+  - pytest -v -s v1/attention
 
-- label: Kernels Mamba Test # TBD
-  timeout_in_minutes: 180
+- label: V1 Sample + Logits # TBD
+  timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/mamba/
-  - tests/kernels/mamba
-  - vllm/model_executor/layers/mamba/ops
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/sample
+  - tests/v1/logits_processors
+  - tests/v1/test_oracle.py
+  - tests/v1/test_request.py
+  - tests/v1/test_outputs.py
   commands:
-  - pytest -v -s kernels/mamba
-
+  - pytest -v -s v1/sample
+  - pytest -v -s v1/logits_processors
+  - pytest -v -s v1/test_oracle.py
+  - pytest -v -s v1/test_request.py
+  - pytest -v -s v1/test_outputs.py
 
-- label: Kernels Helion Test # TBD
+- label: Distributed DP Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  agent_pool: mi250_2
+  num_gpus: 2
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/utils/import_utils.py
-  - tests/kernels/helion/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/worker/worker_base.py
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/v1/distributed
+  - tests/entrypoints/openai/test_multi_api_servers.py
   - vllm/platforms/rocm.py
   commands:
-  - pip install helion
-  - pytest -v -s kernels/helion/
-
+  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
+  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
+  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
+  - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
 
-- label: Model Executor # TBD
+- label: NixlConnector PD + Spec Decode acceptance (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  optional: true
-  torch_nightly: true
+  agent_pool: mi250_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/engine/arg_utils.py
-  - vllm/config/model.py
-  - vllm/model_executor
-  - tests/model_executor
-  - tests/entrypoints/openai/completion/test_tensorizer_entrypoint.py
-  - vllm/_aiter_ops.py
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - vllm/v1/worker/kv_connector_model_runner_mixin.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-  - apt-get update && apt-get install -y curl libsodium23
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s model_executor -m '(not slow_test)'
-  - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
-
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - ATTENTION_BACKEND=ROCM_ATTN bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
 
-- label: Benchmarks # TBD
+- label: V1 e2e (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  working_dir: "/vllm-workspace/.buildkite"
+  agent_pool: mi250_2
+  optional: true
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - benchmarks/
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/e2e
   commands:
-  - bash scripts/run-benchmarks.sh
-
+    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
 
-- label: Benchmarks CLI Test # TBD
+- label: Distributed NixlConnector PD accuracy (4 GPUs)  # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  agent_pool: mi250_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/benchmarks/
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - tests/v1/kv_connector/nixl_integration/
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s benchmarks/
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
+#-------------------------------------------------------------  mi250 · misc  ------------------------------------------------------------#
 
-- label: OpenAI API correctness # TBD
+- label: Async Engine, Inputs, Utils, Worker, Config (CPU) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
   agent_pool: mi250_1
+  optional: true
+  no_gpu: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/entrypoints/openai/
-  - vllm/model_executor/models/whisper.py
-  - vllm/model_executor/layers/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - vllm/model_executor/model_loader/
+  - vllm/
+  - tests/test_inputs.py
+  - tests/test_outputs.py
+  - tests/test_pooling_params.py
+  - tests/test_ray_env.py
+  - tests/multimodal
+  - tests/renderers
+  - tests/standalone_tests/lazy_imports.py
+  - tests/tokenizers_
+  - tests/reasoning
+  - tests/tool_parsers
+  - tests/parser
+  - tests/transformers_utils
+  - tests/config
   commands:
-  - bash ../tools/install_torchcodec_rocm.sh || exit 1
-  - pytest -s entrypoints/openai/correctness/
+  - python3 standalone_tests/lazy_imports.py
+  - pytest -v -s test_inputs.py
+  - pytest -v -s test_outputs.py
+  - pytest -v -s test_pooling_params.py
+  - pytest -v -s test_ray_env.py
+  - pytest -v -s -m 'cpu_test' multimodal
+  - pytest -v -s renderers
+  - pytest -v -s tokenizers_
+  - pytest -v -s reasoning --ignore=reasoning/test_seedoss_reasoning_parser.py --ignore=reasoning/test_glm4_moe_reasoning_parser.py
+  - pytest -v -s tool_parsers
+  - pytest -v -s parser
+  - pytest -v -s transformers_utils
+  - pytest -v -s config
 
+#########################################################################################################################################
+#                                                                                                                                       #
+#                                                         MI300 (gfx942) tests                                                          #
+#                                                                                                                                       #
+#########################################################################################################################################
 
-- label: Basic Models Tests (Initialization) # TBD
+#-----------------------------------------------------  mi300 · basic_correctness  -----------------------------------------------------#
+
+- label: Basic Correctness # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
   torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/test_initialization.py
-  - tests/models/registry.py
+  - tests/basic_correctness/test_basic_correctness
+  - tests/basic_correctness/test_cpu_offload
+  - tests/basic_correctness/test_cumem.py
   commands:
-  - pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s basic_correctness/test_cumem.py
+  - pytest -v -s basic_correctness/test_basic_correctness.py
+  - pytest -v -s basic_correctness/test_cpu_offload.py
 
-- label: Basic Models Tests (Extra Initialization) %N # TBD
+- label: Distributed Model Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
-  parallelism: 2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
+  - vllm/model_executor/model_loader/sharded_state_loader.py
   - vllm/model_executor/models/
   - vllm/model_executor/layers/
-  - tests/models/test_initialization.py
-  - tests/models/registry.py
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - tests/basic_correctness/
+  - tests/model_executor/model_loader/test_sharded_state_loader.py
+  - tests/models/
   commands:
-  - pytest -v -s models/test_initialization.py -k 'not test_can_initialize_small_subset' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
+  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
+  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/generation/test_phi4siglip.py
+  - pytest models/multimodal/generation/test_phi4siglip.py -v -s -m 'distributed(num_gpus=2)'
+  - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
 
+#--------------------------------------------------------  mi300 · benchmarks  ---------------------------------------------------------#
 
-- label: Basic Models Tests (Other) # TBD
+- label: Benchmarks CLI Test # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/test_terratorch.py
-  - tests/models/test_transformers.py
-  - tests/models/test_registry.py
+  - tests/benchmarks/
   commands:
-  - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
+  - pytest -v -s benchmarks/
 
+#----------------------------------------------------------  mi300 · compile  ----------------------------------------------------------#
 
-- label: Basic Models Test (Other CPU) # TBD
+- label: Fusion E2E Config Sweep (H100-MI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  no_gpu: true
-  optional: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  num_gpus: 1
+  working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - vllm/
-  - tests/models/test_utils.py
-  - tests/models/test_vision.py
+  - csrc/quantization/
+  - vllm/compilation/
+  - vllm/model_executor/layers/layernorm.py
+  - vllm/model_executor/layers/activation.py
+  - vllm/model_executor/layers/attention/attention.py
+  - vllm/model_executor/layers/quantization/input_quant_fp8.py
+  - tests/compile/fusions_e2e/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s models/test_utils.py models/test_vision.py
-
+  - rocm-smi
+  - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "llama-3"
 
-- label: Language Models Tests (Extra Standard) %N # TBD
+- label: Fusion E2E Quick (H100-MI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
-  parallelism: 2
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  num_gpus: 1
+  working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/model_executor/layers/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - tests/models/language/pooling/test_embedding.py
-  - tests/models/language/generation/test_common.py
-  - tests/models/language/pooling/test_classification.py
+  - csrc/quantization/
+  - vllm/model_executor/
+  - vllm/v1/attention/
+  - vllm/compilation/
+  - tests/compile/fusions_e2e/
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pip freeze | grep -E 'torch'
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
-
+  - rocm-smi
+  # Run all models and attn backends but only Inductor partition and native custom ops
+  - "pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k 'inductor_partition and not +rms_norm and not +quant_fp8'"
+  # Different from CUDA, Qwen requires +rms_norm and +quant_fp8 as rms+quant fusion is only supported on AITER
+  - "pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k 'inductor_partition and +rms_norm and +quant_fp8 and qwen3'"
 
-- label: Language Models Test (PPL) # TBD
+- label: PyTorch Compilation Passes Unit Tests # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/language/generation_ppl_test
+  - tests/compile/passes
   commands:
-  - pytest -v -s models/language/generation_ppl_test
-
+  - pytest -s -v compile/passes --ignore compile/passes/distributed
 
-- label: Language Models Test (Extended Pooling)  # TBD
+- label: Pytorch Nightly Dependency Override Check # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  soft_fail: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/language/pooling
+  - requirements/test/nightly-torch.txt
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s models/language/pooling -m 'not core_model'
+  - bash standalone_tests/pytorch_nightly_dependency.sh
+
+- label: Distributed Compile Unit Tests (2xH100-2xMI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
+  num_gpus: 2
+  working_dir: "/vllm-workspace/"
+  source_file_dependencies:
+  - vllm/compilation/
+  - vllm/model_executor/layers
+  - tests/compile/passes/distributed/
+  - tests/compile/fusions_e2e/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  commands:
+  - export VLLM_TEST_CLEAN_GPU_MEMORY=1
+  - VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/passes/distributed/test_async_tp.py
+  - pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py::test_tp2_ar_rms_fusions
 
+#-----------------------------------------------------------  mi300 · cuda  ------------------------------------------------------------#
 
-- label: Language Models Test (MTEB) # TBD
+- label: Platform Tests (CUDA) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/language/pooling_mteb_test
+  - tests/cuda
   commands:
-  - pytest -v -s models/language/pooling_mteb_test
+  - pytest -v -s cuda/test_cuda_context.py
+  - pytest -v -s cuda/test_platform_no_cuda_init.py
 
+#--------------------------------------------------------  mi300 · detokenizer  --------------------------------------------------------#
 
-- label: Multi-Modal Processor (CPU) # TBD
+- label: Async Engine, Inputs, Utils, Worker # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  no_gpu: true
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal
-  - tests/models/registry.py
+  - tests/detokenizer
+  - tests/multimodal
+  - tests/utils_
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
+  - pytest -v -s detokenizer
+  - pytest -v -s -m 'not cpu_test' multimodal
+  - pytest -v -s utils_
 
+#--------------------------------------------------------  mi300 · distributed  --------------------------------------------------------#
 
-- label: Multi-Modal Accuracy Eval (Small Models) # TBD
+- label: EPLB Algorithm # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  optional: true
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/multimodal/
-  - vllm/inputs/
-  - vllm/v1/core/
+  - vllm/distributed/eplb
+  - tests/distributed/test_eplb_algo.py
   - vllm/platforms/rocm.py
-  - vllm/model_executor/model_loader/
   commands:
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
-
+  - pytest -v -s distributed/test_eplb_algo.py
+  - pytest -v -s distributed/test_eplb_utils.py
 
-- label: "Multi-Modal Models (Standard) 1: qwen2" # TBD
+- label: Distributed Tests (2xH100-2xMI250) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
+  num_gpus: 2
+  working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
+  - vllm/distributed/
+  - vllm/v1/distributed/
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - tests/distributed/test_context_parallel.py
+  - examples/features/data_parallel/data_parallel_offline.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
-  - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
-
+  - pytest -v -s tests/distributed/test_context_parallel.py
+  - VLLM_LOGGING_LEVEL=DEBUG python3 examples/features/data_parallel/data_parallel_offline.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=allgather_reducescatter --disable-nccl-for-dp-synchronization
 
-- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" # TBD
+- label: Distributed Tests (4xA100-4xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
-
+  - pytest -v -s distributed/test_custom_all_reduce.py
+  - torchrun --nproc_per_node=2 distributed/test_ca_buffer_sharing.py
+  - TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
+  - pytest -v -s -x lora/test_mixtral.py
 
-- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl" # TBD
+- label: Distributed Torchrun + Examples (4 GPUs) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
+  - vllm/distributed/
+  - tests/distributed/test_torchrun_example.py
+  - tests/distributed/test_torchrun_example_moe.py
+  - examples/rl/
+  - tests/examples/features/data_parallel/data_parallel_offline.py
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
-
+  - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
+  - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
+  - TP_SIZE=4 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  - PP_SIZE=2 TP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  - DP_SIZE=4 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
+  - python3 ../examples/features/data_parallel/data_parallel_offline.py --enforce-eager
+  # rlhf examples
+  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 ../examples/rl/rlhf_nccl.py
+  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 ../examples/rl/rlhf_ipc.py
 
-- label: "Multi-Modal Models (Standard) 4: other + whisper" # TBD
+- label: Elastic EP Scaling Test # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  - tests/models/multimodal/test_mapping.py
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/compilation/
+  - tests/distributed/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
-  - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
+  - pytest -v -s distributed/test_elastic_ep.py
 
+- label: RayExecutorV2 (4 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/v1/executor/ray_executor_v2.py
+  - vllm/v1/executor/abstract.py
+  - vllm/v1/executor/multiproc_executor.py
+  - tests/distributed/test_ray_v2_executor.py
+  - tests/distributed/test_ray_v2_executor_e2e.py
+  - tests/distributed/test_pipeline_parallel.py
+  - tests/basic_correctness/test_basic_correctness.py
+  - vllm/platforms/rocm.py
+  commands:
+  - export VLLM_USE_RAY_V2_EXECUTOR_BACKEND=1
+  - pytest -v -s distributed/test_ray_v2_executor.py
+  - pytest -v -s distributed/test_ray_v2_executor_e2e.py
+  - pytest -v -s distributed/test_pipeline_parallel.py -k "ray"
+  - TARGET_TEST_SUITE=L4 pytest -v -s basic_correctness/test_basic_correctness.py -k "ray"
 
-- label: Multi-Modal Models (Extended Generation 1) # TBD
+- label: Distributed Tests (8xH100-8xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_8
+  num_gpus: 8
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  - tests/models/multimodal/test_mapping.py
+  - examples/features/torchrun/torchrun_dp_example_offline.py
+  - vllm/config/parallel.py
+  - vllm/distributed/
+  - vllm/v1/engine/llm_engine.py
+  - vllm/v1/executor/uniproc_executor.py
+  - vllm/v1/worker/gpu_worker.py
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation -m 'not core_model' --ignore models/multimodal/generation/test_common.py
-  - pytest -v -s models/multimodal/test_mapping.py
+  - torchrun --nproc-per-node=8 ../examples/features/torchrun/torchrun_dp_example_offline.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
 
+#--------------------------------------------------------  mi300 · entrypoints  --------------------------------------------------------#
 
-- label: Multi-Modal Models (Extended Generation 2) # TBD
+- label: Entrypoints Integration (API Server 2) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  fast_check: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal/generation
+  - tests/entrypoints/rpc
+  - tests/entrypoints/serve/instrumentator
+  - tests/tool_use
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/serve/instrumentator
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
+  - pytest -v -s tool_use
 
-- label: Multi-Modal Models (Extended Generation 3) # TBD
+- label: Entrypoints Integration (API Server openai - Part 1) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
+  torch_nightly: true
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal/generation
+  - tests/entrypoints/openai
+  - tests/entrypoints/test_chat_utils
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/openai/chat_completion --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py
 
-- label: Multi-Modal Models (Extended Pooling) # TBD
+- label: Entrypoints Integration (API Server openai - Part 2) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  fast_check: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal/pooling
+  - tests/entrypoints/openai
+  - tests/entrypoints/test_chat_utils
   commands:
-  - pytest -v -s models/multimodal/pooling -m 'not core_model'
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/openai/completion --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py
+  - pytest -v -s entrypoints/test_chat_utils.py
 
-- label: Distributed Comm Ops # TBD
+- label: Entrypoints Integration (API Server openai - Part 3) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  num_gpus: 2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  fast_check: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed
-  - tests/distributed
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/entrypoints/openai
+  - tests/entrypoints/test_chat_utils
   commands:
-  - pytest -v -s distributed/test_comm_ops.py
-  - pytest -v -s distributed/test_shm_broadcast.py
-  - pytest -v -s distributed/test_shm_buffer.py
-  - pytest -v -s distributed/test_shm_storage.py
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
 
+- label: Entrypoints Integration (Speech to Text) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
+  torch_nightly: true
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/speech_to_text
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/speech_to_text
 
-- label: Distributed DP Tests (2 GPUs) # TBD
+- label: Entrypoints Integration (LLM) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
+  fast_check: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/worker/worker_base.py
-  - vllm/v1/engine/
-  - vllm/v1/worker/
-  - tests/v1/distributed
-  - tests/entrypoints/openai/test_multi_api_servers.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/entrypoints/llm
+  - tests/entrypoints/offline_mode
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
-  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
-  - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
-  - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
+  - pytest -v -s entrypoints/llm/test_generate.py
+  - pytest -v -s entrypoints/offline_mode
 
-- label: Distributed Compile + RPC Tests (2 GPUs) # TBD
+- label: Entrypoints Integration (Pooling) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/worker/worker_base.py
-  - vllm/v1/engine/
-  - vllm/v1/worker/
-  - tests/compile/fullgraph/test_basic_correctness.py
-  - tests/compile/test_wrapper.py
-  - tests/entrypoints/llm/test_collective_rpc.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/entrypoints/pooling
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s entrypoints/llm/test_collective_rpc.py
-  - pytest -v -s ./compile/fullgraph/test_basic_correctness.py
-  - pytest -v -s ./compile/test_wrapper.py
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/pooling
 
+- label: Entrypoints Integration (Responses API) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
+  torch_nightly: true
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/openai/responses
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/openai/responses
 
-- label: Distributed Torchrun + Shutdown Tests (2 GPUs) # TBD
+- label: Entrypoints Unit Tests # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  fast_check: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/worker/worker_base.py
-  - vllm/v1/engine/
-  - vllm/v1/worker/
-  - tests/distributed/
-  - tests/v1/shutdown
-  - tests/v1/worker/test_worker_memory_snapshot.py
+  - vllm/entrypoints
+  - tests/entrypoints/
   - vllm/platforms/rocm.py
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-  - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
-  - pytest -v -s v1/worker/test_worker_memory_snapshot.py
-
+  - pytest -v -s entrypoints/openai/tool_parsers
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 
-- label: Distributed Model Tests (2 GPUs) # TBD
+- label: OpenAI API correctness # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/model_loader/sharded_state_loader.py
-  - vllm/model_executor/models/
+  - csrc/
+  - vllm/entrypoints/openai/
+  - vllm/model_executor/models/whisper.py
   - vllm/model_executor/layers/
   - vllm/v1/attention/backends/
   - vllm/v1/attention/selector.py
-  - tests/basic_correctness/
-  - tests/model_executor/model_loader/test_sharded_state_loader.py
-  - tests/models/
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - vllm/model_executor/model_loader/
   commands:
-  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
-  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
-  - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
+  - bash ../tools/install_torchcodec_rocm.sh || exit 1
+  - pytest -s entrypoints/openai/correctness/
 
+#-----------------------------------------------------------  mi300 · evals  -----------------------------------------------------------#
 
-- label: Plugin Tests (2 GPUs) # TBD
+- label: DeepSeek V2-Lite Prefetch Offload Accuracy (H100-MI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
-  optional: true
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  num_gpus: 1
+  working_dir: "/vllm-workspace"
   source_file_dependencies:
-  - vllm/plugins/
-  - tests/plugins/
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/model_executor/layers/quantization/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/backends/mla/
+  - vllm/v1/attention/selector.py
+  - .buildkite/scripts/scheduled_integration_test/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  # BEGIN: platform plugin and general plugin tests, all the code in-between runs on dummy platform
-  - pip install -e ./plugins/vllm_add_dummy_platform
-  - pytest -v -s plugins_tests/test_platform_plugins.py
-  - pip uninstall vllm_add_dummy_platform -y
-  # END: platform plugin tests
-  # BEGIN: `io_processor` plugins test, all the code in between uses the `prithvi_io_processor` plugin
-  - pip install -e ./plugins/prithvi_io_processor_plugin
-  - pytest -v -s plugins_tests/test_io_processor_plugins.py
-  - pip uninstall prithvi_io_processor_plugin -y
-  # END: `io_processor` plugins test
-  # BEGIN: `bge_m3_sparse io_processor` test
-  - pip install -e ./plugins/bge_m3_sparse_plugin
-  - pytest -v -s plugins_tests/test_bge_m3_sparse_io_processor_plugins.py
-  - pip uninstall bge_m3_sparse_plugin -y
-  # END: `bge_m3_sparse io_processor` test
-  # BEGIN: `stat_logger` plugins test
-  - pip install -e ./plugins/vllm_add_dummy_stat_logger
-  - pytest -v -s plugins_tests/test_stats_logger_plugins.py
-  - pip uninstall dummy_stat_logger -y
-  # END: `stat_logger` plugins test
-  # BEGIN: other tests
-  - pytest -v -s plugins_tests/test_scheduler_plugins.py
-  - pip install -e ./plugins/vllm_add_dummy_model
-  - pytest -v -s distributed/test_distributed_oot.py
-  - pytest -v -s entrypoints/openai/chat_completion/test_oot_registration.py
-  - pytest -v -s models/test_oot_registration.py
-  - pytest -v -s plugins/lora_resolvers
-  # END: other tests
-
+  - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh 0.25 200 8030
 
-- label: Pipeline + Context Parallelism (4 GPUs) # TBD
+- label: LM Eval Small Models # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_4
-  num_gpus: 4
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
+  - csrc/
+  - vllm/model_executor/layers/quantization
   - vllm/model_executor/models/
-  - vllm/model_executor/layers/
+  - vllm/model_executor/model_loader/
   - vllm/v1/attention/backends/
   - vllm/v1/attention/selector.py
-  - tests/distributed/
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s distributed/test_pp_cudagraph.py
-  - pytest -v -s distributed/test_pipeline_parallel.py
-
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
 
-- label: Ray Dependency Compatibility Check # TBD
+- label: LM Eval Small Models (MI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_1
-  working_dir: "/"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   source_file_dependencies:
-  - requirements/
-  - setup.py
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - bash /vllm-workspace/.buildkite/scripts/check-ray-compatibility.sh
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-small-rocm.txt
 
-
-- label: Distributed NixlConnector PD accuracy (4 GPUs)  # TBD
+- label: GPQA Eval (GPT-OSS) (2xH100-2xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_4
-  num_gpus: 4
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - tests/evals/gpt_oss/
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+    - uv pip install --system 'gpt-oss[eval]==0.0.5'
+    - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx942.txt
 
-- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+- label: LM Eval Small Models (2xB200-2xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_4
-  num_gpus: 4
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx-fp8-and-mixed.txt
 
-- label: NixlConnector PD + Spec Decode acceptance (2 GPUs) # TBD
+- label: DeepSeek V2-Lite Accuracy (4xH100-4xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_2
-  num_gpus: 2
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
+  optional: true
+  working_dir: "/vllm-workspace"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - vllm/v1/worker/kv_connector_model_runner_mixin.py
-  - tests/v1/kv_connector/nixl_integration/
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/distributed/eplb
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/model_executor/layers/quantization/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/backends/mla/
+  - vllm/v1/attention/selector.py
+  - .buildkite/scripts/scheduled_integration_test/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
-
+  - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
 
-- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+- label: LM Eval Large Models (4xA100-4xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_4
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
   num_gpus: 4
-  working_dir: "/vllm-workspace/tests"
+  optional: true
+  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - CROSS_LAYERS_BLOCKS=True ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
 
-- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
+- label: Qwen3-30B-A3B-FP8-block Accuracy (4xH100-4xMI300) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi250_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  optional: true
+  working_dir: "/vllm-workspace"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/model_executor/layers/quantization/
+  - vllm/distributed/eplb
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - .buildkite/scripts/scheduled_integration_test/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - HYBRID_SSM=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
 
-- label: Distributed Tests (2 GPUs)(H100-MI250) # TBD
+- label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
-  agent_pool: mi325_2
-  num_gpus: 2
-  working_dir: "/vllm-workspace/"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
+  optional: true
+  working_dir: "/vllm-workspace"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/v1/distributed/
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/spec_decode/
+  - vllm/distributed/eplb
   - vllm/model_executor/layers/fused_moe/
+  - vllm/model_executor/layers/quantization/
   - vllm/v1/attention/backends/
   - vllm/v1/attention/selector.py
-  - tests/distributed/test_context_parallel.py
-  - examples/offline_inference/data_parallel.py
+  - .buildkite/scripts/scheduled_integration_test/
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s tests/distributed/test_context_parallel.py
-  - VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=allgather_reducescatter --disable-nccl-for-dp-synchronization
-
-
-#####################################################################################################################################
-#                                                                                                                                   #
-#                                                             gfx942                                                                #
-#                                                                                                                                   #
-#####################################################################################################################################
-
+  - bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040
 
-- label: Entrypoints Integration (LLM) # 13.1m
-  timeout_in_minutes: 22
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: LM Eval Large Models (8xH200-8xMI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_8
   optional: true
-  fast_check: true
-  torch_nightly: true
+  num_gpus: 8
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/llm
-  - tests/entrypoints/offline_mode
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/model_executor/layers/quantization/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/model_executor/layers/layernorm.py
+  - csrc/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  - tests/evals/
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
-  - pytest -v -s entrypoints/llm/test_generate.py
-  - pytest -v -s entrypoints/offline_mode
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx.txt
 
+#---------------------------------------------------------  mi300 · examples  ----------------------------------------------------------#
 
-- label: Entrypoints Integration (API Server openai - Part 1) # TBD
+- label: Examples # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  fast_check: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  working_dir: "/vllm-workspace/examples"
   source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/openai
-  - tests/entrypoints/test_chat_utils
+  - vllm/entrypoints
+  - vllm/multimodal
+  - examples/
+  - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai/chat_completion --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py
+    - pip install tensorizer
+    # Basic
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
+    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
+    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
+    - python3 basic/offline_inference/classify.py
+    - python3 basic/offline_inference/embed.py
+    - python3 basic/offline_inference/score.py
+    # Multi-modal models
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
+    # Pooling models
+    - python3 pooling/embed/vision_embedding_offline.py --seed 0
+    # Features demo
+    - python3 features/automatic_prefix_caching/prefix_caching_offline.py
+    - python3 deployment/llm_engine_example.py
+    - python3 features/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 features/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
 
+#----------------------------------------------------------  mi300 · kernels  ----------------------------------------------------------#
 
-- label: Entrypoints Integration (API Server openai - Part 2) # TBD
+- label: Kernels Attention Test %N # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  fast_check: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/openai
-  - tests/entrypoints/test_chat_utils
-  commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai/completion --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py
-  - pytest -v -s entrypoints/openai/speech_to_text/
-  - pytest -v -s entrypoints/test_chat_utils.py
-
-
-- label: Entrypoints Integration (API Server openai - Part 3) # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  fast_check: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/openai
-  - tests/entrypoints/test_chat_utils
-  commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/speech_to_text/ --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
-
-
-- label: Entrypoints Integration (API Server 2) #26.9m
-  timeout_in_minutes: 45
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  fast_check: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/rpc
-  - tests/entrypoints/serve/instrumentator
-  - tests/tool_use
-  commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/serve/instrumentator
-  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
-  - pytest -v -s tool_use
-
-
-- label: Entrypoints Integration (Pooling) # 22.8m
-  timeout_in_minutes: 48
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  fast_check: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/entrypoints/pooling
-  commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/pooling
-
-
-- label: Distributed Torchrun + Examples (4 GPUs) # TBD
-  timeout_in_minutes: 80
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/distributed/
-  - tests/distributed/test_torchrun_example.py
-  - tests/distributed/test_torchrun_example_moe.py
-  - examples/rl/
-  - tests/examples/offline_inference/data_parallel.py
-  - vllm/platforms/rocm.py
-  commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
-  - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
-  - TP_SIZE=4 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
-  - PP_SIZE=2 TP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
-  - DP_SIZE=4 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
-  - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py
-  - python3 ../examples/offline_inference/data_parallel.py --enforce-eager
-  # rlhf examples
-  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 ../examples/rl/rlhf_nccl.py
-  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 ../examples/rl/rlhf_ipc.py
-
-
-- label: Distributed DP Tests (4 GPUs) # TBD
-  timeout_in_minutes: 60
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/distributed/
-  - tests/v1/distributed
-  - tests/v1/engine/test_engine_core_client.py
-  - tests/distributed/test_utils
-  - vllm/platforms/rocm.py
-  commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
-  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
-  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
-  - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py
-  - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py
-  - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp
-  - pytest -v -s distributed/test_utils.py
-
-
-- label: Distributed Compile + Comm (4 GPUs) # TBD
-  timeout_in_minutes: 40
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/distributed/
-  - tests/distributed/test_pynccl
-  - tests/distributed/test_events
-  - tests/compile/fullgraph/test_basic_correctness.py
-  - tests/distributed/test_symm_mem_allreduce.py
-  - tests/distributed/test_multiproc_executor.py
-  - vllm/platforms/rocm.py
-  commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s compile/fullgraph/test_basic_correctness.py
-  - pytest -v -s distributed/test_pynccl.py
-  - pytest -v -s distributed/test_events.py
-  - pytest -v -s distributed/test_symm_mem_allreduce.py
-  - pytest -v -s distributed/test_multiproc_executor.py::test_multiproc_executor_multi_node
-
-
-- label: Distributed Tests (8 GPUs)(H100-MI325) # 6.4m
-  timeout_in_minutes: 10
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_8
-  num_gpus: 8
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - examples/offline_inference/torchrun_dp_example.py
-  - vllm/config/parallel.py
-  - vllm/distributed/
-  - vllm/v1/engine/llm_engine.py
-  - vllm/v1/executor/uniproc_executor.py
-  - vllm/v1/worker/gpu_worker.py
-  - vllm/platforms/rocm.py
-  commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - torchrun --nproc-per-node=8 ../examples/offline_inference/torchrun_dp_example.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
-
-
-- label: Elastic EP Scaling Test # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/compilation/
-  - tests/distributed/
-  - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s distributed/test_elastic_ep.py
-
-
-- label: Engine # 11.3m
-  timeout_in_minutes: 35
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/engine
-  - tests/test_sequence
-  - tests/test_config
-  - tests/test_logger
-  - tests/test_vllm_port
-  commands:
-  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
-
-
-- label: Engine (1 GPU) # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/v1/engine/
-  - tests/v1/engine/
-  - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s v1/engine/test_preprocess_error_handling.py
-  - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
-
-
-- label: e2e Scheduling (1 GPU) # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/v1/
-  - tests/v1/e2e/general/
-  - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s v1/e2e/general/test_async_scheduling.py
-
-
-- label: e2e Core (1 GPU) # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/v1/
-  - tests/v1/e2e/
-  - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s v1/e2e/general --ignore v1/e2e/general/test_async_scheduling.py
-
-
-- label: Spec Decode Eagle # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - tests/v1/e2e/spec_decode/
-  - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
-
-
-- label: Spec Decode Speculators + MTP # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  parallelism: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - vllm/transformers_utils/configs/speculators/
-  - tests/v1/e2e/spec_decode/
+  - csrc/attention/
+  - vllm/v1/attention
+  - vllm/model_executor/layers/attention
+  - tests/kernels/attention
+  - vllm/_aiter_ops.py
+  - vllm/envs.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
-
+  - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-- label: Spec Decode Ngram + Suffix # TBD
+- label: Kernels Core Operation Test # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - tests/v1/e2e/spec_decode/
+  - csrc/
+  - tests/kernels/core
+  - tests/kernels/test_top_k_per_row.py
+  - tests/kernels/test_concat_mla_q.py
+  - vllm/model_executor/layers/rotary_embedding/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
+  - pytest -v -s kernels/core --ignore=kernels/core/test_minimax_reduce_rms.py  kernels/test_concat_mla_q.py kernels/test_top_k_per_row.py
 
-
-- label: Spec Decode Draft Model # TBD
+- label: Kernels MoE Test %N # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  parallelism: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/v1/worker/gpu/spec_decode/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/sample/
-  - vllm/model_executor/layers/
-  - tests/v1/e2e/spec_decode/
+  - csrc/quantization/cutlass_w8a8/moe/
+  - csrc/moe/
+  - tests/kernels/moe
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/distributed/device_communicators/
+  - vllm/envs.py
+  - vllm/config
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
-  commands:
-  - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
-
+  commands:
+  - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  - pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-- label: V1 e2e (2 GPUs) # 7.1m
-  timeout_in_minutes: 12
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+- label: Kernels Quantization Test %N # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
+  parallelism: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/v1/e2e
+  - csrc/quantization/
+  - vllm/model_executor/layers/quantization
+  - tests/kernels/quantization
+  - tests/kernels/quantization/test_rocm_skinny_gemms.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  - vllm/model_executor/kernels/
   commands:
-    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
-
+  - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-- label: V1 e2e (4 GPUs) # 52.6m
-  timeout_in_minutes: 106
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
+- label: Kernels FP8 MoE Test (2xH100-2xMI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/v1/e2e
+  - csrc/moe/
+  - csrc/quantization/w8a8/cutlass/moe/
+  - vllm/model_executor/layers/fused_moe/
+  - tests/kernels/moe/test_deepep_moe.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  - vllm/envs.py
   commands:
-    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"
+    - pytest -v -s kernels/moe/test_deepep_moe.py
 
+#-----------------------------------------------------------  mi300 · lora  ------------------------------------------------------------#
 
-- label: V1 Spec Decode # TBD
-  timeout_in_minutes: 40
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: LoRA TP (Distributed) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/v1/spec_decode
+  - vllm/lora
+  - tests/lora
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s -m 'not slow_test' v1/spec_decode
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+  - pytest -v -s -x lora/test_chatglm3_tp.py
+  - pytest -v -s -x lora/test_llama_tp.py
+  - pytest -v -s -x lora/test_qwen3_with_multi_loras.py
+  - pytest -v -s -x lora/test_olmoe_tp.py
+  - pytest -v -s -x lora/test_gptoss_tp.py
+  - pytest -v -s -x lora/test_qwen35_densemodel_lora.py
 
+#-----------------------------------------------------  mi300 · models / language  -----------------------------------------------------#
 
-- label: V1 Sample + Logits # TBD
-  timeout_in_minutes: 40
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Language Models Test (Extended Pooling)  # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/sample
-  - tests/v1/logits_processors
-  - tests/v1/test_oracle.py
-  - tests/v1/test_request.py
-  - tests/v1/test_outputs.py
+  - tests/models/language/pooling
   commands:
-  - pytest -v -s v1/sample
-  - pytest -v -s v1/logits_processors
-  - pytest -v -s v1/test_oracle.py
-  - pytest -v -s v1/test_request.py
-  - pytest -v -s v1/test_outputs.py
-
+  - pytest -v -s models/language/pooling -m 'not core_model'
 
-- label: V1 Core + KV + Metrics # TBD
-  timeout_in_minutes: 40
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Language Models Tests (Standard) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1/core
-  - tests/v1/executor
-  - tests/v1/kv_offload
-  - tests/v1/worker
-  - tests/v1/kv_connector/unit
-  - tests/v1/metrics
-  - tests/entrypoints/openai/correctness/test_lmeval.py
+  - tests/models/language
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - pytest -v -s -m 'not cpu_test' v1/core
-  - pytest -v -s v1/executor
-  - pytest -v -s v1/kv_offload
-  - pytest -v -s v1/worker
-  - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
-  - pytest -v -s -m 'not cpu_test' v1/metrics
-  - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
-  # - export HSA_NO_SCRATCH_RECLAIM=1
-  - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
+  - pip freeze | grep -E 'torch'
+  - pytest -v -s models/language -m 'core_model and (not slow_test)'
 
+#----------------------------------------------------  mi300 · models / multimodal  ----------------------------------------------------#
 
-- label: V1 Speculative Decoding (slow) # TBD
+- label: Multi-Modal Models (Extended Generation 1) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/model_executor/models/
-  - vllm/v1/attention/
-  - vllm/model_executor/layers/
-  - tests/v1/spec_decode/
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal/generation
+  - tests/models/multimodal/test_mapping.py
   commands:
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_eagle.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_extract_hidden_states.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_max_len.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_mtp.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_ngram.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_speculators_eagle3.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_tree_attention.py
+  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@rocm-7.0-v2.3.0'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
+  - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
 
-- label: Acceptance Length Test (Large Models) # TBD
+- label: Multi-Modal Models (Extended Generation 2) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
-  - vllm/model_executor/models/mlp_speculator.py
-  - tests/v1/spec_decode/test_acceptance_length.py
-  - vllm/platforms/rocm.py
-  commands:
-  - export VLLM_ALLOW_INSECURE_SERIALIZATION=1
-  - pytest -v -s v1/spec_decode/test_acceptance_length.py -m slow_test
-
-
-- label: V1 attention (H100-MI325) # 14.5m
-  timeout_in_minutes: 40
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/config/attention.py
-  - vllm/model_executor/layers/attention
-  - vllm/v1/attention
-  - tests/v1/attention
-  - vllm/_aiter_ops.py
-  - vllm/envs.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal/generation
   commands:
-  - pytest -v -s v1/attention
+  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@rocm-7.0-v2.3.0'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
+  - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
 
 
-- label: Batch Invariance (H100-MI325) # 5.2m
-  timeout_in_minutes: 12
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Multi-Modal Models (Extended Generation 3) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/v1/attention
-  - vllm/model_executor/layers
-  - tests/v1/determinism/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal/generation
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pip install pytest-timeout pytest-forked
-  - pytest -v -s v1/determinism/test_batch_invariance.py
-  - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
 
-- label: V1 others (CPU) # 10.4m
-  timeout_in_minutes: 28
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  no_gpu: true
+- label: "Multi-Modal Models (Standard) 1: qwen2" # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  torch_nightly: true
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/v1
-  commands:
-  - pytest -v -s -m 'cpu_test' v1/core
-  - pytest -v -s v1/structured_output
-  - pytest -v -s v1/test_serial_utils.py
-  - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
-  - pytest -v -s -m 'cpu_test' v1/metrics
-
-
-- label: Examples # 24.5m
-  timeout_in_minutes: 55
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/examples"
-  source_file_dependencies:
-  - vllm/entrypoints
-  - vllm/multimodal
-  - examples/
-  - vllm/platforms/rocm.py
+  - tests/models/multimodal
   commands:
-    - pip install tensorizer
-    # Basic
-    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
-    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
-    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
-    - python3 basic/offline_inference/classify.py
-    - python3 basic/offline_inference/embed.py
-    - python3 basic/offline_inference/score.py
-    # Multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
-    # Pooling models
-    - python3 pooling/embed/vision_embedding_offline.py --seed 0
-    # Features demo
-    - python3 offline_inference/prefix_caching.py
-    - python3 offline_inference/llm_engine_example.py
-    - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
-    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
+  - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
 
-- label: Platform Tests (CUDA) # 5.0m
-  timeout_in_minutes: 9
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl" # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  torch_nightly: true
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/cuda
+  - tests/models/multimodal/generation
+  - tests/models/multimodal/test_mapping.py
   commands:
-  - pytest -v -s cuda/test_cuda_context.py
-  - pytest -v -s cuda/test_platform_no_cuda_init.py
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
+  - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
 
-- label: PyTorch Compilation Passes Unit Tests # TBD
+- label: "Multi-Modal Models (Standard) 4: other + whisper" # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/compile/passes
+  - tests/models/multimodal/generation
+  - tests/models/multimodal/test_mapping.py
   commands:
-  - pytest -s -v compile/passes --ignore compile/passes/distributed
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py  --ignore models/multimodal/generation/test_memory_leak.py --ignore models/multimodal/processing
+  - pytest -v -s models/multimodal/generation/test_memory_leak.py -m core_model
+  - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
 
-- label: Kernels Core Operation Test # 26.8m
-  timeout_in_minutes: 38
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Multi-Modal Processor # 1h 42m
+  timeout_in_minutes: 138
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - tests/kernels/core
-  - tests/kernels/test_top_k_per_row.py
-  - tests/kernels/test_concat_mla_q.py
-  - vllm/model_executor/layers/rotary_embedding/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal
+  - tests/models/registry.py
   commands:
-  - pytest -v -s kernels/core kernels/test_top_k_per_row.py
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/processing/test_tensor_schema.py
 
-- label: Kernels Attention Test %N # 17.7m
-  timeout_in_minutes: 28
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  parallelism: 2
+- label: Multi-Modal Processor (CPU) %N # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  parallelism: 4
+  no_gpu: true
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/attention/
-  - vllm/v1/attention
-  - vllm/model_executor/layers/attention
-  - tests/kernels/attention
-  - vllm/_aiter_ops.py
-  - vllm/envs.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal
+  - tests/models/registry.py
   commands:
-  - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
+#-----------------------------------------------------  mi300 · models / quantized  -----------------------------------------------------#
 
-- label: Kernels Quantization Test %N # 15.2m
-  timeout_in_minutes: 24
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  parallelism: 2
+- label: Quantized Models Test # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/quantization/
   - vllm/model_executor/layers/quantization
-  - tests/kernels/quantization
-  - tests/kernels/quantization/test_rocm_skinny_gemms.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
-  - vllm/model_executor/kernels/
+  - tests/models/quantization
+  - vllm/model_executor/model_loader/
   commands:
-  - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  - pytest -v -s models/quantization
 
+#--------------------------------------------------  mi300 · models / transformers  ---------------------------------------------------#
 
-- label: Kernels MoE Test %N # TBD
-  timeout_in_minutes: 19
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Transformers Nightly Models (Shardable) %N # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   parallelism: 4
-  working_dir: "/vllm-workspace/tests"
+  optional: true
+  working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - csrc/quantization/cutlass_w8a8/moe/
-  - csrc/moe/
-  - tests/kernels/moe
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/distributed/device_communicators/
-  - vllm/envs.py
-  - vllm/config
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/multimodal/
+  - vllm/model_executor/layers/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - tests/models/
   commands:
-  - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
-  - pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
-
+  - pip install --upgrade git+https://github.com/huggingface/transformers
+  - pytest -v -s tests/models/test_initialization.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
+  - pytest -v -s tests/models/multimodal/processing/ --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
-- label: Kernels FP8 MoE Test # TBD
+- label: Transformers Nightly Models (Single) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
+  working_dir: "/vllm-workspace/"
+  source_file_dependencies:
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/multimodal/
+  - vllm/model_executor/layers/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  - tests/models/
+  - examples/
+  commands:
+  - pip install --upgrade git+https://github.com/huggingface/transformers
+  - pytest -v -s tests/models/test_transformers.py
+  - pytest -v -s tests/models/multimodal/test_mapping.py
+  - python3 examples/basic/offline_inference/chat.py
+  - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
+  - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
+
+#-------------------------------------------------------  mi300 · quantization  --------------------------------------------------------#
+
+- label: Quantization # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/moe/
-  - csrc/quantization/w8a8/cutlass/moe/
-  - vllm/model_executor/layers/fused_moe/
-  - tests/kernels/moe/test_deepep_moe.py
+  - csrc/
+  - vllm/model_executor/layers/quantization
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
-  - vllm/envs.py
+  - tests/quantization
   commands:
-    - pytest -v -s kernels/moe/test_deepep_moe.py
 
+  # temporary install here since we need nightly, will move to requirements/test.in
+  # after torchao 0.12 release, and pin a working version of torchao nightly here
+
+  # since torchao nightly is only compatible with torch nightly currently
+  # https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
+  # we can only upgrade after this is resolved
+  # TODO(jerryzh168): resolve the above comment
+  - uv pip install --system torchao==0.17.0
+  - uv pip install --system conch-triton-kernels
+  - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
+
+#-----------------------------------------------------------  mi300 · rocm  ------------------------------------------------------------#
 
 - label: ROCm AITER Ops Test # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/_aiter_ops.py
@@ -2126,291 +1997,275 @@ steps:
   commands:
   - pytest -v -s rocm/aiter/
 
+#---------------------------------------------------------  mi300 · samplers  ----------------------------------------------------------#
 
-- label: Benchmarks # 8.2m
-  timeout_in_minutes: 20
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Samplers Test # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
-  working_dir: "/vllm-workspace/.buildkite"
-  source_file_dependencies:
-  - benchmarks/
-  - vllm/platforms/rocm.py
-  commands:
-  - bash scripts/run-benchmarks.sh
-
-
-- label: Quantization # 36.1m
-  timeout_in_minutes: 60
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/layers
+  - vllm/sampling_metadata.py
+  - vllm/v1/sample/
+  - vllm/entrypoints/generate/beam_search/
+  - tests/samplers
+  - tests/conftest.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
-  - tests/quantization
   commands:
-  - uv pip install --system torchao==0.14.1
-  - uv pip install --system conch-triton-kernels
-  - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
+  - pytest -v -s samplers
 
+#------------------------------------------------------------  mi300 · misc  ------------------------------------------------------------#
 
-- label: Language Models Tests (Standard) # 22.8m
-  timeout_in_minutes: 38
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Python-only Installation # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
-  torch_nightly: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/models/language
-  commands:
-  - pip freeze | grep -E 'torch'
-  - pytest -v -s models/language -m 'core_model and (not slow_test)'
-
-
-- label: Language Models Tests (Hybrid) %N # 34.9m
-  timeout_in_minutes: 55
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  torch_nightly: true
-  parallelism: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/language/generation
+  - tests/standalone_tests/python_only_compile.sh
+  - setup.py
+  - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
-  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
-  - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
-
+  - bash standalone_tests/python_only_compile.sh
 
-- label: Language Models Test (Extended Generation) # 32.2m
-  timeout_in_minutes: 55
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Regression # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/language/generation
+  - tests/test_regression
   commands:
-  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
-  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
-  - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
+  - pip install modelscope
+  - pytest -v -s test_regression.py
 
+#---------------------------------------------------------  mi300 · ray_compat  ---------------------------------------------------------#
 
-- label: Multi-Modal Processor # 1h 42m
-  timeout_in_minutes: 138
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
+- label: Ray Dependency Compatibility Check # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  working_dir: "/"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
-  - tests/models/registry.py
+  - requirements/
+  - setup.py
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/processing/test_tensor_schema.py
+  - bash /vllm-workspace/.buildkite/scripts/check-ray-compatibility.sh
 
+#------------------------------------------------------------  mi300 · v1  -------------------------------------------------------------#
 
-- label: "Multi-Modal Models (Standard) 1: qwen2" # TBD
+- label: Acceptance Length Test (Large Models) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
+  - vllm/v1/spec_decode/
+  - vllm/model_executor/models/mlp_speculator.py
+  - tests/v1/spec_decode/test_acceptance_length.py
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
-  - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
-
+  - export VLLM_ALLOW_INSECURE_SERIALIZATION=1
+  - pytest -v -s v1/spec_decode/test_acceptance_length.py -m slow_test
 
-- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" # TBD
+- label: e2e Core (1 GPU) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
+  - vllm/v1/
+  - tests/v1/e2e/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
-
+  - pytest -v -s v1/e2e/general --ignore v1/e2e/general/test_async_scheduling.py
 
-- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl" # TBD
+- label: e2e Scheduling (1 GPU) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  - tests/models/multimodal/test_mapping.py
+  - vllm/v1/
+  - tests/v1/e2e/general/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
-
+  - pytest -v -s v1/e2e/general/test_async_scheduling.py
 
-- label: "Multi-Modal Models (Standard) 4: other + whisper" # TBD
+- label: Engine (1 GPU) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  torch_nightly: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
+  - vllm/v1/engine/
+  - tests/v1/engine/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
-  - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
-
+  - pytest -v -s v1/engine/test_preprocess_error_handling.py
+  - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
 
-- label: Multi-Modal Models (Extended Generation 1) # 1h 2m
-  timeout_in_minutes: 106
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: Spec Decode Draft Model # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  - tests/models/multimodal/test_mapping.py
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - tests/v1/e2e/spec_decode/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation -m 'not core_model' --ignore models/multimodal/generation/test_common.py
-  - pytest -v -s models/multimodal/test_mapping.py
-
+  - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
 
-- label: Multi-Modal Models (Extended Generation 2) # TBD
+- label: Spec Decode Eagle # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  optional: true
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - tests/v1/e2e/spec_decode/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
-
+  - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
 
-- label: Multi-Modal Models (Extended Generation 3) # TBD
+- label: Spec Decode Ngram + Suffix # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - tests/v1/e2e/spec_decode/
+  - vllm/platforms/rocm.py
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
-
+  - pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
 
-- label: Multi-Modal Models (Extended Pooling) # TBD
+- label: Spec Decode Speculators + MTP # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/pooling
+  - vllm/v1/spec_decode/
+  - vllm/v1/worker/gpu/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/sample/
+  - vllm/model_executor/layers/
+  - vllm/transformers_utils/configs/speculators/
+  - tests/v1/e2e/spec_decode/
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s models/multimodal/pooling -m 'not core_model'
-
+  - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
 
-- label: Quantized Models Test # 21.4m
-  timeout_in_minutes: 38
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: V1 attention (H100-MI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/layers/quantization
+  - vllm/config/attention.py
+  - vllm/model_executor/layers/attention
+  - vllm/v1/attention
+  - tests/v1/attention
   - vllm/_aiter_ops.py
+  - vllm/envs.py
   - vllm/platforms/rocm.py
-  - tests/models/quantization
-  - vllm/model_executor/model_loader/
   commands:
-  - pytest -v -s models/quantization
+  - pytest -v -s v1/attention
 
+- label: V1 Core + KV + Metrics # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/v1/core
+  - tests/v1/executor
+  - tests/v1/kv_offload
+  - tests/v1/worker
+  - tests/v1/kv_connector/unit
+  - tests/v1/metrics
+  - tests/entrypoints/openai/correctness/test_lmeval.py
+  commands:
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - pytest -v -s -m 'not cpu_test' v1/core
+  - pytest -v -s v1/executor
+  - pytest -v -s v1/kv_offload
+  - pytest -v -s v1/worker
+  - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
+  - pytest -v -s -m 'not cpu_test' v1/metrics
+  - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
+  # - export HSA_NO_SCRATCH_RECLAIM=1
+  - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
 
-- label: Transformers Nightly Models # 50.9m
-  timeout_in_minutes: 102
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+- label: V1 others (CPU) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  no_gpu: true
   optional: true
-  working_dir: "/vllm-workspace/"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/multimodal/
-  - vllm/model_executor/layers/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - tests/models/
-  - examples/
+  - vllm/
+  - tests/v1
   commands:
-  - pip install --upgrade git+https://github.com/huggingface/transformers
-  - pytest -v -s tests/models/test_initialization.py
-  - pytest -v -s tests/models/test_transformers.py
-  - pytest -v -s tests/models/multimodal/processing/
-  - pytest -v -s tests/models/multimodal/test_mapping.py
-  - python3 examples/basic/offline_inference/chat.py
-  - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
-  - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
-
+  - pytest -v -s -m 'cpu_test' v1/core
+  - pytest -v -s v1/structured_output
+  - pytest -v -s v1/test_serial_utils.py
+  - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
+  - pytest -v -s -m 'cpu_test' v1/metrics
 
-- label: Quantized MoE Test (B200-MI325) # TBD
+- label: V1 Sample + Logits # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  working_dir: "/vllm-workspace/"
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_1
+  optional: true
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - tests/quantization/test_gfx3xx_moe.py
-  - vllm/model_executor/models/deepseek_v2.py
-  - vllm/model_executor/models/gpt_oss.py
-  - vllm/model_executor/models/llama4.py
-  - vllm/model_executor/layers/fused_moe
-  - vllm/model_executor/layers/quantization/compressed_tensors
-  - vllm/model_executor/layers/quantization/modelopt.py
-  - vllm/model_executor/layers/quantization/mxfp4.py
-  - vllm/v1/attention/backends/triton_attn.py
-  - vllm/v1/attention/backends/rocm_attn.py
-  - vllm/v1/attention/backends/rocm_aiter_fa.py
-  - vllm/v1/attention/backends/mla/
-  - vllm/v1/attention/selector.py
-  - vllm/model_executor/layers/layernorm.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - vllm/model_executor/model_loader/
+  - vllm/
+  - tests/v1/sample
+  - tests/v1/logits_processors
+  - tests/v1/test_oracle.py
+  - tests/v1/test_request.py
+  - tests/v1/test_outputs.py
   commands:
-  - pytest -s -v tests/quantization/test_gfx3xx_moe.py
-
+  - pytest -v -s v1/sample
+  - pytest -v -s v1/logits_processors
+  - pytest -v -s v1/test_oracle.py
+  - pytest -v -s v1/test_request.py
+  - pytest -v -s v1/test_outputs.py
 
-- label: Distributed DP Tests (2 GPUs) # 56.1m
-  timeout_in_minutes: 102
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+- label: Distributed DP Tests (2 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -2424,158 +2279,115 @@ steps:
   - tests/entrypoints/openai/test_multi_api_servers.py
   - vllm/platforms/rocm.py
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
   - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
 
-
-- label: Distributed Compile + RPC Tests (2 GPUs) # 56.1m
-  timeout_in_minutes: 102
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+- label: Distributed Tests (2xH100-2xMI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   num_gpus: 2
-  working_dir: "/vllm-workspace/tests"
+  working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - vllm/compilation/
   - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/worker/worker_base.py
-  - vllm/v1/engine/
-  - vllm/v1/worker/
-  - tests/compile/fullgraph/test_basic_correctness.py
-  - tests/compile/test_wrapper.py
-  - tests/entrypoints/llm/test_collective_rpc.py
+  - vllm/v1/distributed/
+  - vllm/model_executor/layers/fused_moe/
+  - tests/v1/distributed/test_dbo.py
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s entrypoints/llm/test_collective_rpc.py
-  - pytest -v -s ./compile/fullgraph/test_basic_correctness.py
-  - pytest -v -s ./compile/test_wrapper.py
-
+  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/rl/rlhf_async_new_apis.py
+  - VLLM_LOGGING_LEVEL=DEBUG python3 examples/features/data_parallel/data_parallel_offline.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
+  - pytest -v -s tests/v1/distributed/test_dbo.py
+  - VLLM_ALLOW_INSECURE_SERIALIZATION=1 pytest -v -s tests/distributed/test_weight_transfer.py
+  - pytest -v -s tests/distributed/test_packed_tensor.py
 
-- label: Distributed Torchrun + Shutdown Tests (2 GPUs) # 56.1m
-  timeout_in_minutes: 102
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+- label: Metrics, Tracing (2 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/engine/
-  - vllm/executor/
-  - vllm/worker/worker_base.py
-  - vllm/v1/engine/
-  - vllm/v1/worker/
-  - tests/distributed/
-  - tests/v1/shutdown
-  - tests/v1/worker/test_worker_memory_snapshot.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/tracing
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-  - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
-  - pytest -v -s v1/worker/test_worker_memory_snapshot.py
-
+  - "pip install \
+      'opentelemetry-sdk>=1.26.0' \
+      'opentelemetry-api>=1.26.0' \
+      'opentelemetry-exporter-otlp>=1.26.0' \
+      'opentelemetry-semantic-conventions-ai>=0.4.1'"
+  - pytest -v -s v1/tracing
 
-- label: Distributed Model Tests (2 GPUs) # 19.3m
-  timeout_in_minutes: 38
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
-  num_gpus: 2
+- label: V1 e2e (2 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/model_loader/sharded_state_loader.py
-  - vllm/model_executor/models/
-  - vllm/model_executor/layers/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - tests/basic_correctness/
-  - tests/model_executor/model_loader/test_sharded_state_loader.py
-  - tests/models/
+  - vllm/
+  - tests/v1/e2e
   commands:
-  - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py -m '(not slow_test)'
-  - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/language -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
-  - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
-
+    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
 
-- label: LoRA TP (Distributed) # 9.8m
-  timeout_in_minutes: 18
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
+- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
   num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/lora
-  - tests/lora
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
-  - pytest -v -s -x lora/test_chatglm3_tp.py
-  - pytest -v -s -x lora/test_llama_tp.py
-  - pytest -v -s -x lora/test_llm_with_multi_loras.py
-  - pytest -v -s -x lora/test_olmoe_tp.py
-  - pytest -v -s -x lora/test_gptoss_tp.py
-  - pytest -v -s -x lora/test_qwen35_densemodel_lora.py
-
-
-- label: Weight Loading Multiple GPU # 7.5m
-  timeout_in_minutes: 14
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
-  num_gpus: 2
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/weight_loading
-  commands:
-  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt
-
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - CROSS_LAYERS_BLOCKS=True ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
-- label: Weight Loading Multiple GPU - Large Models # 12.6m
-  timeout_in_minutes: 26
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
-  num_gpus: 2
-  optional: true
+- label: Distributed DP Tests (4 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/weight_loading
+  - vllm/distributed/
+  - tests/v1/distributed
+  - tests/v1/engine/test_engine_core_client.py
+  - tests/distributed/test_utils
+  - vllm/platforms/rocm.py
   commands:
-  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large-amd.txt
-
+  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
+  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
+  - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
+  - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py
+  - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py
+  - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp
+  - pytest -v -s distributed/test_utils.py
 
-- label: Ray Dependency Compatibility Check # TBD
+- label: Distributed NixlConnector PD accuracy (4 GPUs)  # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  num_gpus: 4
   optional: true
-  working_dir: "/"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - requirements/
-  - setup.py
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-  - bash /vllm-workspace/.buildkite/scripts/check-ray-compatibility.sh
-
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
-- label: Distributed NixlConnector PD accuracy (4 GPUs)  # 27.4m
-  timeout_in_minutes: 44
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
+- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
   num_gpus: 4
-  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -2583,15 +2395,13 @@ steps:
   - vllm/platforms/rocm.py
   commands:
   - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
-- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
   num_gpus: 4
-  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -2599,148 +2409,157 @@ steps:
   - vllm/platforms/rocm.py
   commands:
   - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - CROSS_LAYERS_BLOCKS=True ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - HYBRID_SSM=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
-- label: Distributed Tests (4 GPUs)(A100-MI325) # 20.9m
-  timeout_in_minutes: 37
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
+- label: V1 e2e (4 GPUs) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
+  - tests/v1/e2e
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s distributed/test_custom_all_reduce.py
-  - torchrun --nproc_per_node=2 distributed/test_ca_buffer_sharing.py
-  - TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-  - pytest -v -s -x lora/test_mixtral.py
+    - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"
+
+- label: V1 e2e (4xH100-4xMI300) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_4
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/attention/backends/utils.py
+    - vllm/v1/worker/gpu_model_runner.py
+    - tests/v1/e2e/test_hybrid_chunked_prefill.py
+  commands:
+    - pytest -v -s v1/e2e/test_hybrid_chunked_prefill.py
 
+#------------------------------------------------------  mi300 · weight_loading  -------------------------------------------------------#
 
-- label: Distributed Tests (2 GPUs)(H100-MI325) # TBD
+- label: Weight Loading Multiple GPU # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   num_gpus: 2
-  working_dir: "/vllm-workspace/"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/v1/distributed/
-  - vllm/model_executor/layers/fused_moe/
-  - tests/v1/distributed/test_dbo.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/weight_loading
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s tests/v1/distributed/test_dbo.py
-
+  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt
 
-- label: Distributed Compile Unit Tests (2xH100-2xMI325) # 14.3m
-  timeout_in_minutes: 32
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
+- label: Weight Loading Multiple GPU - Large Models # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
+  agent_pool: mi300_2
   num_gpus: 2
-  working_dir: "/vllm-workspace/"
+  optional: true
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/model_executor/layers
-  - tests/compile/passes/distributed/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/weight_loading
   commands:
-  - export VLLM_TEST_CLEAN_GPU_MEMORY=1
-  - VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/passes/distributed/test_async_tp.py
-  - pytest -v -s tests/compile/passes/distributed/test_sequence_parallelism.py
-  # TODO: this test is not supported on ROCm, there are aiter kernels for this.
-  # - pytest -v -s tests/compile/passes/distributed/test_fusion_all_reduce.py
-  # - pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
-  # - "VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'"
+  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large-amd.txt
+
+#########################################################################################################################################
+#                                                                                                                                       #
+#                                                         MI325 (gfx942) tests                                                          #
+#                                                                                                                                       #
+#########################################################################################################################################
 
+#----------------------------------------------------------  mi325 · compile  ----------------------------------------------------------#
 
-- label: LM Eval Small Models # 13.3m
-  timeout_in_minutes: 23
+- label: Distributed Compile + RPC Tests (2 GPUs) # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
+  agent_pool: mi325_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
+  - vllm/compilation/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/worker/worker_base.py
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/compile/fullgraph/test_basic_correctness.py
+  - tests/compile/test_wrapper.py
+  - tests/entrypoints/llm/test_collective_rpc.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
+  - pytest -v -s entrypoints/llm/test_collective_rpc.py
+  - pytest -v -s ./compile/fullgraph/test_basic_correctness.py
+  - pytest -v -s ./compile/test_wrapper.py
 
+#--------------------------------------------------------  mi325 · distributed  --------------------------------------------------------#
 
-- label: LM Eval Small Models (B200-MI325) # TBD
+- label: Distributed Torchrun + Shutdown Tests (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
   agent_pool: mi325_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/worker/worker_base.py
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/distributed/
+  - tests/v1/shutdown
+  - tests/v1/worker/test_worker_memory_snapshot.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx-fp8-and-mixed.txt
-
+  - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
+  - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
+  - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
+  - pytest -v -s v1/worker/test_worker_memory_snapshot.py
 
-- label: LM Eval Large Models (H200-MI325) # TBD
+- label: Distributed Compile + Comm (4 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_8
-  optional: true
-  num_gpus: 8
+  agent_pool: mi325_4
+  num_gpus: 4
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/model_executor/layers/quantization/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/model_executor/layers/layernorm.py
-  - csrc/
-  - vllm/_aiter_ops.py
+  - vllm/distributed/
+  - tests/distributed/test_pynccl
+  - tests/distributed/test_events
+  - tests/compile/fullgraph/test_basic_correctness.py
+  - tests/distributed/test_symm_mem_allreduce.py
+  - tests/distributed/test_multiproc_executor.py
   - vllm/platforms/rocm.py
-  - tests/evals/
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx.txt
+  - pytest -v -s compile/fullgraph/test_basic_correctness.py
+  - pytest -v -s distributed/test_pynccl.py
+  - pytest -v -s distributed/test_events.py
+  - pytest -v -s distributed/test_symm_mem_allreduce.py
+  - pytest -v -s distributed/test_multiproc_executor.py::test_multiproc_executor_multi_node
 
+#----------------------------------------------------------  mi325 · engine  -----------------------------------------------------------#
 
-- label: LM Eval Large Models (4 GPUs)(FP8) # 24.8m
-  timeout_in_minutes: 42
+- label: Engine # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  optional: true
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
+  agent_pool: mi325_1
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/engine
+  - tests/test_sequence
+  - tests/test_config
+  - tests/test_logger
+  - tests/test_vllm_port
   commands:
-  - export VLLM_USE_DEEP_GEMM=0
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm-fp8.txt --tp-size=4
+  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
 
+#-----------------------------------------------------------  mi325 · evals  -----------------------------------------------------------#
 
-- label: LM Eval Large Models (4 GPUs)(A100-MI325) # 17.3m
-  timeout_in_minutes: 27
+- label: LM Eval Large Models (4xH100-4xMI325) # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
   agent_pool: mi325_4
   num_gpus: 4
@@ -2756,11 +2575,10 @@ steps:
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
-
+  - export VLLM_USE_DEEP_GEMM=0
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm-fp8.txt --tp-size=4
 
-- label: ROCm LM Eval Large Models (8 Card) # TBD
+- label: ROCm LM Eval Large Models (8 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
   agent_pool: mi325_8
@@ -2781,214 +2599,144 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=8
 
+#-----------------------------------------------------  mi325 · models / language  -----------------------------------------------------#
 
-- label: GPQA Eval (GPT-OSS) (H100-MI325) # TBD
+- label: Language Models Test (Extended Generation) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_2
-  num_gpus: 2
-  optional: true
+  agent_pool: mi325_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - tests/evals/gpt_oss/
-  commands:
-    - uv pip install --system 'gpt-oss[eval]==0.0.5'
-    - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx942.txt
-
-
-- label: DeepSeek V2-Lite Accuracy # 6.7m
-  timeout_in_minutes: 12
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  optional: true
-  working_dir: "/vllm-workspace"
-  source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/distributed/eplb
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/model_executor/layers/quantization/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/backends/mla/
-  - vllm/v1/attention/selector.py
-  - .buildkite/scripts/scheduled_integration_test/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/language/generation
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
-
+  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
+  - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
 
-- label: DeepSeek V2-Lite Prefetch Offload Accuracy (H100-MI325) # TBD
+- label: Language Models Tests (Hybrid) %N # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
   agent_pool: mi325_1
-  num_gpus: 1
-  working_dir: "/vllm-workspace"
+  torch_nightly: true
+  parallelism: 2
+  optional: true
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/model_executor/layers/quantization/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/backends/mla/
-  - vllm/v1/attention/selector.py
-  - .buildkite/scripts/scheduled_integration_test/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/language/generation
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh 0.25 200 8030
+  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
+  - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
+
+#----------------------------------------------------  mi325 · models / multimodal  ----------------------------------------------------#
 
+- label: Multi-Modal Models (Extended Pooling) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
+  agent_pool: mi325_1
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/models/multimodal/pooling
+  commands:
+  - pytest -v -s models/multimodal/pooling -m 'not core_model'
 
-- label: Qwen3-30B-A3B-FP8-block Accuracy # 6.4m
-  timeout_in_minutes: 11
+- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
+  agent_pool: mi325_1
+  torch_nightly: true
   optional: true
-  working_dir: "/vllm-workspace"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/model_executor/layers/quantization/
-  - vllm/distributed/eplb
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - .buildkite/scripts/scheduled_integration_test/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/models/multimodal
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
+  - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
 
+#------------------------------------------------------------  mi325 · v1  -------------------------------------------------------------#
 
-- label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy # 10.9m
-  timeout_in_minutes: 22
+- label: V1 Spec Decode # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_4
-  num_gpus: 4
-  optional: true
-  working_dir: "/vllm-workspace"
+  agent_pool: mi325_1
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/spec_decode/
-  - vllm/distributed/eplb
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/model_executor/layers/quantization/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - .buildkite/scripts/scheduled_integration_test/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/spec_decode
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040
-
-##### .buildkite/test_areas/compile.yaml #####
-# Slowly setting up the tests so that it is also easier for the 
-# CI team to review and upstream to the pipelinev2.
-# The following tests are important for vLLM IR Ops refactoring,
-# which affects fusion passes on ROCm. So we have to 
-# enable them as as soon as possible.
+  - pytest -v -s -m 'not slow_test' v1/spec_decode
 
-## TODO: Enable the test in this group
-# # corresponds to .buildkite/test_areas/compile.yaml
-# - label: Fusion and Compile Unit Tests (2xB200-2xMI325) # TBD
-#   timeout_in_minutes: 180
-#   mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325, tj]
-#   agent_pool: mi325_1 # changed to 1 GPU until the fusion all reduce is enabled then only revert back to 2 GPUs
-#   num_gpus: 1
-#   working_dir: "/vllm-workspace/"
-#   source_file_dependencies:
-#   - csrc/quantization/fp4/
-#   - vllm/model_executor/layers/quantization/
-#   - vllm/model_executor/layers/layernorm.py
-#   - vllm/model_executor/layers/activation.py
-#   - vllm/model_executor/layers/attention/attention.py
-#   - vllm/v1/attention/backends/flashinfer.py
-#   - vllm/compilation/ # TODO(luka) limit to vllm/compilation/passes
-#   - tests/compile/test_fusion_attn.py
-#   - tests/compile/test_silu_mul_quant_fusion.py
-#   - tests/compile/distributed/test_fusion_all_reduce.py
-#   - tests/compile/fullgraph/test_full_graph.py
-#   commands:
-#     - rocm-smi
-#     # we run all backend tests on ROCm
-#     # These two tests are covered in "PyTorch Compilation Passes Unit Tests"
-#     # - "pytest -v -s tests/compile/passes/test_fusion_attn.py"
-#     # - "pytest -v -s tests/compile/passes/test_silu_mul_quant_fusion.py"
-#     # TODO: this test is not supported on ROCm, there are aiter kernels for this.
-#     # - pytest -v -s tests/compile/passes/distributed/test_fusion_all_reduce.py
-#     # TODO: find out more details
-#     # - pytest -v -s tests/compile/fullgraph/test_full_graph.py::test_fp8_kv_scale_compile
+#########################################################################################################################################
+#                                                                                                                                       #
+#                                                         MI355 (gfx950) tests                                                          #
+#                                                                                                                                       #
+#########################################################################################################################################
 
+#--------------------------------------------------------  mi355 · benchmarks  ---------------------------------------------------------#
 
-- label: Fusion E2E Quick (H100-MI325) # TBD
+- label: Attention Benchmarks Smoke Test (B200-MI355) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  num_gpus: 1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  agent_pool: mi355_2
+  num_gpus: 2
   working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - csrc/quantization/
-  - vllm/model_executor/
+  - benchmarks/attention_benchmarks/
   - vllm/v1/attention/
-  - vllm/compilation/
-  - tests/compile/fusions_e2e/
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - rocm-smi
-  # Run all models and attn backends but only Inductor partition and native custom ops
-  - "pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k 'inductor_partition and not +rms_norm and not +quant_fp8'"
-  # Different from CUDA, Qwen requires +rms_norm and +quant_fp8 as rms+quant fusion is only supported on AITER
-  - "pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k 'inductor_partition and +rms_norm and +quant_fp8 and qwen3'"
+  - python3 benchmarks/attention_benchmarks/benchmark.py --backends ROCM_ATTN ROCM_AITER_FA ROCM_AITER_UNIFIED_ATTN --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
 
+#--------------------------------------------------------  mi355 · distributed  --------------------------------------------------------#
 
-- label: Fusion E2E Config Sweep (H100-MI325) # TBD
+- label: Distributed Tests (2xH100-2xMI355) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
-  agent_pool: mi325_1
-  num_gpus: 1
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  agent_pool: mi355_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/"
   source_file_dependencies:
-  - csrc/quantization/
-  - vllm/compilation/
-  - vllm/model_executor/layers/layernorm.py
-  - vllm/model_executor/layers/activation.py
-  - vllm/model_executor/layers/attention/attention.py
-  - vllm/model_executor/layers/quantization/input_quant_fp8.py
-  - tests/compile/fusions_e2e/
+  - vllm/distributed/
+  - vllm/v1/distributed/
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - tests/distributed/test_context_parallel.py
+  - tests/v1/distributed/test_dbo.py
+  - examples/features/data_parallel/data_parallel_offline.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - rocm-smi
-  - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "llama-3"
-
-## There are no ops on ROCm for these tests.
-## The test still passes but the logs are not useful.
-## fused ops just call torch.ops.symm_mem which 
-## exists in ROCm even though they don't work
-# - label: AsyncTP Correctness Tests (2xH100-2xMI325)
-# - label: Fusion E2E TP2 Quick (H100-MI325)
-# - label: Fusion E2E TP2 AsyncTP Config Sweep (H100-MI325)
-# - label: Fusion E2E TP2 (B200-MI325)
-# - label: Sequence Parallel Correctness Tests (2xH100-2xMI325)
+  - pytest -v -s tests/distributed/test_context_parallel.py
+  - pytest -v -s tests/v1/distributed/test_dbo.py
 
+#--------------------------------------------------------  mi355 · entrypoints  --------------------------------------------------------#
 
-#####################################################################################################################################
-#                                                                                                                                   #
-#                                                             gfx950                                                                #
-#                                                                                                                                   #
-#####################################################################################################################################
+- label: Entrypoints Integration (API Server 2) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  agent_pool: mi355_1
+  optional: true
+  fast_check: true
+  torch_nightly: true
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/rpc
+  - tests/entrypoints/serve/instrumentator
+  - tests/tool_use
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/serve/instrumentator
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
+  - pytest -v -s tool_use
 
 - label: Entrypoints Integration (API Server openai - Part 1) # TBD
   timeout_in_minutes: 180
@@ -2996,6 +2744,7 @@ steps:
   agent_pool: mi355_1
   fast_check: true
   torch_nightly: true
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
@@ -3005,13 +2754,13 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -v -s entrypoints/openai/chat_completion --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py
 
-
 - label: Entrypoints Integration (API Server openai - Part 2) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
   fast_check: true
   torch_nightly: true
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
@@ -3020,10 +2769,8 @@ steps:
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -v -s entrypoints/openai/completion --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py
-  - pytest -v -s entrypoints/openai/speech_to_text/
   - pytest -v -s entrypoints/test_chat_utils.py
 
-
 - label: Entrypoints Integration (API Server openai - Part 3) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
@@ -3037,28 +2784,21 @@ steps:
   - tests/entrypoints/test_chat_utils
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/speech_to_text/ --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
 
-
-- label: Entrypoints Integration (API Server 2) # TBD
+- label: Entrypoints Integration (Speech to Text) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi355]
   agent_pool: mi355_1
-  optional: true
   fast_check: true
   torch_nightly: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/entrypoints/rpc
-  - tests/entrypoints/serve/instrumentator
-  - tests/tool_use
+  - tests/entrypoints/speech_to_text
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/serve/instrumentator
-  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
-  - pytest -v -s tool_use
-
+  - pytest -v -s entrypoints/speech_to_text
 
 - label: Entrypoints Integration (Pooling) # TBD
   timeout_in_minutes: 180
@@ -3074,117 +2814,112 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - pytest -v -s entrypoints/pooling
 
+#-----------------------------------------------------------  mi355 · evals  -----------------------------------------------------------#
 
-- label: Regression # TBD
+- label: GPQA Eval (GPT-OSS) (2xB200-2xMI355) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
+  agent_pool: mi355_2
+  num_gpus: 2
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/test_regression
-  commands:
-  - pip install modelscope
-  - pytest -v -s test_regression.py
-
-
-- label: V1 Spec Decode # TBD
-  timeout_in_minutes: 60
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/v1/spec_decode
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/model_executor/layers/fused_moe/
+  - tests/evals/gpt_oss/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s -m 'not slow_test' v1/spec_decode
-
+    - uv pip install --system 'gpt-oss[eval]==0.0.5'
+    - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx950.txt
 
-- label: V1 Sample + Logits # TBD
-  timeout_in_minutes: 60
+- label: LM Eval Qwen3-5 Models (B200-MI355) # TBD
+  timeout_in_minutes: 120
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
+  agent_pool: mi355_2
+  num_gpus: 2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/v1/sample
-  - tests/v1/logits_processors
-  - tests/v1/test_oracle.py
-  - tests/v1/test_request.py
-  - tests/v1/test_outputs.py
+  - vllm/model_executor/models/qwen3_5.py
+  - vllm/model_executor/models/qwen3_5_mtp.py
+  - vllm/transformers_utils/configs/qwen3_5.py
+  - vllm/transformers_utils/configs/qwen3_5_moe.py
+  - vllm/model_executor/models/qwen.py
+  - vllm/model_executor/models/qwen2.py
+  - vllm/model_executor/models/qwen3.py
+  - vllm/model_executor/models/qwen3_next.py
+  - vllm/model_executor/models/qwen3_next_mtp.py
+  - vllm/model_executor/layers/fla/ops/
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/sample
-  - pytest -v -s v1/logits_processors
-  - pytest -v -s v1/test_oracle.py
-  - pytest -v -s v1/test_request.py
-  - pytest -v -s v1/test_outputs.py
-
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-qwen35-mi355.txt
 
-- label: V1 Core + KV + Metrics # TBD
-  timeout_in_minutes: 60
+- label: LM Eval Small Models (2xB200-2xMI355) # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
+  agent_pool: mi355_2
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/v1/core
-  - tests/v1/executor
-  - tests/v1/kv_offload
-  - tests/v1/worker
-  - tests/v1/kv_connector/unit
-  - tests/v1/metrics
-  - tests/entrypoints/openai/correctness/test_lmeval.py
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - pytest -v -s -m 'not cpu_test' v1/core
-  - pytest -v -s v1/executor
-  - pytest -v -s v1/kv_offload
-  - pytest -v -s v1/worker
-  - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
-  - pytest -v -s -m 'not cpu_test' v1/metrics
-  - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
-  - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
-
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx-fp8-and-mixed.txt
 
-- label: V1 Speculative Decoding (slow) # TBD
+- label: Qwen3-30B-A3B-FP8-block Accuracy (B200-MI355) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  working_dir: "/vllm-workspace/tests"
+  agent_pool: mi355_2
+  num_gpus: 2
+  working_dir: "/vllm-workspace"
   source_file_dependencies:
-  - vllm/v1/spec_decode/
   - vllm/model_executor/models/
-  - vllm/v1/attention/
-  - vllm/model_executor/layers/
-  - tests/v1/spec_decode/
+  - vllm/model_executor/model_loader/
+  - vllm/model_executor/layers/quantization/
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/distributed/eplb
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
+  - .buildkite/scripts/scheduled_integration_test/
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_eagle.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_extract_hidden_states.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_max_len.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_mtp.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_ngram.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_speculators_eagle3.py
-  - pytest -v -s -m 'slow_test' v1/spec_decode/test_tree_attention.py
-
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
 
-- label: V1 attention (B200-MI355) # TBD
+- label: LM Eval Large Models (4xH100-4xMI355) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  working_dir: "/vllm-workspace/tests"
+  agent_pool: mi355_4
+  num_gpus: 4
+  optional: true
+  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   source_file_dependencies:
-  - vllm/config/attention.py
-  - vllm/model_executor/layers/attention
-  - vllm/v1/attention
-  - tests/v1/attention
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - vllm/model_executor/models/
+  - vllm/model_executor/model_loader/
+  - vllm/v1/attention/backends/
+  - vllm/v1/attention/selector.py
   - vllm/_aiter_ops.py
-  - vllm/envs.py
   - vllm/platforms/rocm.py
   commands:
-  - pytest -v -s v1/attention
+  - export VLLM_USE_DEEP_GEMM=0
+  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm-fp8.txt --tp-size=4
 
+#---------------------------------------------------------  mi355 · examples  ----------------------------------------------------------#
 
 - label: Examples # TBD
   timeout_in_minutes: 180
@@ -3206,19 +2941,44 @@ steps:
   - python3 basic/offline_inference/embed.py
   - python3 basic/offline_inference/score.py
   # Multi-modal models
-  - python3 offline_inference/audio_language.py --seed 0
-  - python3 offline_inference/vision_language.py --seed 0
-  - python3 offline_inference/vision_language_multi_image.py --seed 0
-  - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+  - python3 generate/multimodal/audio_language_offline.py --seed 0
+  - python3 generate/multimodal/vision_language_offline.py --seed 0
+  - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+  - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
   # Pooling models
   - python3 pooling/embed/vision_embedding_offline.py --seed 0
   # Features demo
-  - python3 offline_inference/prefix_caching.py
-  - python3 offline_inference/llm_engine_example.py
-  - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-  - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
-  - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+  - python3 features/automatic_prefix_caching/prefix_caching_offline.py
+  - python3 deployment/llm_engine_example.py
+  - python3 features/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 features/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+  - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+  - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
 
+#----------------------------------------------------------  mi355 · kernels  ----------------------------------------------------------#
+
+- label: Kernels (B200-MI355) # TBD
+  timeout_in_minutes: 180
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  agent_pool: mi355_1
+  working_dir: "/vllm-workspace/"
+  source_file_dependencies:
+  - csrc/quantization/fp4/
+  - csrc/attention/mla/
+  - csrc/quantization/cutlass_w8a8/moe/
+  - vllm/model_executor/layers/fused_moe/cutlass_moe.py
+  - vllm/v1/attention/backends/triton_attn.py
+  - vllm/v1/attention/backends/rocm_attn.py
+  - vllm/v1/attention/backends/rocm_aiter_fa.py
+  - vllm/v1/attention/backends/rocm_aiter_unified_attn.py
+  - vllm/v1/attention/backends/mla/aiter_triton_mla.py
+  - vllm/v1/attention/backends/mla/rocm_aiter_mla.py
+  - vllm/v1/attention/selector.py
+  - vllm/platforms/rocm.py
+  - vllm/_aiter_ops.py
+  commands:
+  - rocm-smi
+  - python3 examples/basic/offline_inference/chat.py
+  - pytest -v -s tests/kernels/attention/test_attention_selector.py
 
 - label: Kernels Attention Test %N # TBD
   timeout_in_minutes: 180
@@ -3238,25 +2998,6 @@ steps:
   commands:
   - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-
-- label: Kernels Quantization Test %N # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  parallelism: 2
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - csrc/quantization/
-  - vllm/model_executor/layers/quantization
-  - tests/kernels/quantization
-  - tests/kernels/quantization/test_rocm_skinny_gemms.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - vllm/model_executor/kernels/
-  commands:
-  - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
-
-
 - label: Kernels MoE Test %N # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
@@ -3277,54 +3018,40 @@ steps:
   - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   - pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-
-- label: Kernels FP8 MoE Test # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - csrc/moe/
-  - csrc/quantization/w8a8/cutlass/moe/
-  - vllm/model_executor/layers/fused_moe/
-  - tests/kernels/moe/test_deepep_moe.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - vllm/envs.py
-  commands:
-    - pytest -v -s kernels/moe/test_deepep_moe.py
-
-
-- label: Quantization # TBD
+- label: Kernels Quantization Test %N # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
+  parallelism: 2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
+  - csrc/quantization/
   - vllm/model_executor/layers/quantization
-  - tests/quantization
+  - tests/kernels/quantization
+  - tests/kernels/quantization/test_rocm_skinny_gemms.py
   - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - vllm/model_executor/kernels/
   commands:
-  - uv pip install --system torchao==0.14.1
-  - uv pip install --system conch-triton-kernels
-  - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
-
+  - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
 
-- label: Language Models Tests (Standard) # TBD
+- label: Kernels FP8 MoE Test (2xH100-2xMI355) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  torch_nightly: true
+  agent_pool: mi355_2
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/language
+  - csrc/moe/
+  - csrc/quantization/w8a8/cutlass/moe/
+  - vllm/model_executor/layers/fused_moe/
+  - tests/kernels/moe/test_deepep_moe.py
+  - vllm/_aiter_ops.py
+  - vllm/platforms/rocm.py
+  - vllm/envs.py
   commands:
-  - pip freeze | grep -E 'torch'
-  - pytest -v -s models/language -m 'core_model and (not slow_test)'
+    - pytest -v -s kernels/moe/test_deepep_moe.py
 
+#-----------------------------------------------------  mi355 · models / language  -----------------------------------------------------#
 
 - label: Language Models Test (Extended Generation) # TBD
   timeout_in_minutes: 180
@@ -3335,15 +3062,15 @@ steps:
   - vllm/
   - tests/models/language/generation
   commands:
-  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
-  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@rocm-7.0-v2.3.0'
+  - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
   - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
 
-
 - label: Language Models Test (Extended Pooling) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
@@ -3351,71 +3078,48 @@ steps:
   commands:
   - pytest -v -s models/language/pooling -m 'not core_model'
 
-
-- label: "Multi-Modal Models (Standard) 1: qwen2" # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  torch_nightly: true
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
-  commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
-  - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
-
-
-- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  torch_nightly: true
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal
-  commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
-
-
-- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl" # TBD
+- label: Language Models Test (PPL) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
-  torch_nightly: true
-  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  - tests/models/multimodal/test_mapping.py
+  - vllm/model_executor/models/qwen3_5.py
+  - vllm/model_executor/models/qwen3_5_mtp.py
+  - vllm/transformers_utils/configs/qwen3_5.py
+  - vllm/transformers_utils/configs/qwen3_5_moe.py
+  - vllm/model_executor/models/qwen.py
+  - vllm/model_executor/models/qwen2.py
+  - vllm/model_executor/models/qwen3.py
+  - vllm/model_executor/models/qwen3_next.py
+  - vllm/model_executor/models/qwen3_next_mtp.py
+  - vllm/model_executor/layers/fla/ops/
+  - vllm/_aiter_ops.py
+  - vllm/v1/attention/backends/triton_attn.py
+  - vllm/v1/attention/backends/rocm_attn.py
+  - vllm/v1/attention/backends/rocm_aiter_unified_attn.py
+  - vllm/v1/attention/backends/rocm_aiter_fa.py
+  - vllm/v1/attention/backends/flex_attention.py
+  - vllm/v1/attention/ops/
+  - vllm/platforms/rocm.py
+  - tests/models/language/generation_ppl_test
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
-  - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
-
+  - pytest -v -s models/language/generation_ppl_test
 
-- label: "Multi-Modal Models (Standard) 4: other + whisper" # TBD
+- label: Language Models Tests (Standard) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
   torch_nightly: true
-  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/models/multimodal/generation
+  - tests/models/language
   commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
-  - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
+  - pip freeze | grep -E 'torch'
+  - pytest -v -s models/language -m 'core_model and (not slow_test)'
 
+#----------------------------------------------------  mi355 · models / multimodal  ----------------------------------------------------#
 
 - label: Multi-Modal Models (Extended Generation 1) # TBD
   timeout_in_minutes: 180
@@ -3432,21 +3136,6 @@ steps:
   - pytest -v -s models/multimodal/generation -m 'not core_model' --ignore models/multimodal/generation/test_common.py
   - pytest -v -s models/multimodal/test_mapping.py
 
-
-- label: Multi-Modal Models (Extended Generation 2) # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  optional: true
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/
-  - tests/models/multimodal/generation
-  commands:
-  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-  - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
-
-
 - label: Multi-Modal Models (Extended Generation 3) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
@@ -3460,7 +3149,6 @@ steps:
   - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
   - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
 
-
 - label: Multi-Modal Models (Extended Pooling) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
@@ -3473,303 +3161,256 @@ steps:
   commands:
   - pytest -v -s models/multimodal/pooling -m 'not core_model'
 
-
-- label: Quantized Models Test # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_1
-  working_dir: "/vllm-workspace/tests"
-  source_file_dependencies:
-  - vllm/model_executor/layers/quantization
-  - tests/models/quantization
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
-  - vllm/model_executor/model_loader/
-  commands:
-  - pytest -v -s models/quantization
-
-
-- label: Kernels (B200-MI355) # TBD
+- label: "Multi-Modal Models (Standard) 1: qwen2" # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
-  working_dir: "/vllm-workspace/"
-  source_file_dependencies:
-  - csrc/quantization/fp4/
-  - csrc/attention/mla/
-  - csrc/quantization/cutlass_w8a8/moe/
-  - vllm/model_executor/layers/fused_moe/cutlass_moe.py
-  - vllm/v1/attention/backends/triton_attn.py
-  - vllm/v1/attention/backends/rocm_attn.py
-  - vllm/v1/attention/backends/rocm_aiter_fa.py
-  - vllm/v1/attention/backends/rocm_aiter_unified_attn.py
-  - vllm/v1/attention/backends/mla/aiter_triton_mla.py
-  - vllm/v1/attention/backends/mla/rocm_aiter_mla.py
-  - vllm/v1/attention/selector.py
-  - vllm/platforms/rocm.py
-  - vllm/_aiter_ops.py
-  commands:
-  - rocm-smi
-  - python3 examples/basic/offline_inference/chat.py
-  - pytest -v -s tests/kernels/attention/test_attention_selector.py
-
-
-- label: Weight Loading Multiple GPU # TBD
-  timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
+  torch_nightly: true
+  optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/weight_loading
+  - tests/models/multimodal
   commands:
-  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt
-
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
+  - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
 
-- label: Weight Loading Multiple GPU - Large Models # TBD
+- label: "Multi-Modal Models (Standard) 4: other + whisper" # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  working_dir: "/vllm-workspace/tests"
-  num_gpus: 2
+  agent_pool: mi355_1
+  torch_nightly: true
   optional: true
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/weight_loading
+  - tests/models/multimodal/generation
   commands:
-  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large-amd.txt
+  - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+  - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py  --ignore models/multimodal/generation/test_memory_leak.py --ignore models/multimodal/processing
+  - pytest -v -s models/multimodal/generation/test_memory_leak.py -m core_model
+  - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
 
+#-----------------------------------------------------  mi355 · models / quantized  -----------------------------------------------------#
 
-- label: Ray Dependency Compatibility Check # TBD
+- label: Quantized Models Test # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_1
-  optional: true
-  working_dir: "/"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - requirements/
-  - setup.py
+  - vllm/model_executor/layers/quantization
+  - tests/models/quantization
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
+  - vllm/model_executor/model_loader/
   commands:
-  - bash /vllm-workspace/.buildkite/scripts/check-ray-compatibility.sh
+  - pytest -v -s models/quantization
 
+#-------------------------------------------------------  mi355 · quantization  --------------------------------------------------------#
 
-- label: Distributed NixlConnector PD accuracy (4 GPUs) # TBD
+- label: Quantization # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_4
-  num_gpus: 4
-  optional: true
+  agent_pool: mi355_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  - tests/quantization
+  - vllm/_aiter_ops.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+  - uv pip install --system torchao==0.17.0
+  - uv pip install --system conch-triton-kernels
+  - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
+
+# - label: Quantized MoE Test (B200-MI355) # TBD
+#   timeout_in_minutes: 180
+#   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+#   agent_pool: mi355_1
+#   working_dir: "/vllm-workspace/"
+#   source_file_dependencies:
+#   - tests/quantization/test_gfx950_moe.py
+#   - vllm/model_executor/models/deepseek_v2.py
+#   - vllm/model_executor/models/gpt_oss.py
+#   - vllm/model_executor/models/llama4.py
+#   - vllm/model_executor/layers/fused_moe
+#   - vllm/model_executor/layers/quantization/compressed_tensors
+#   - vllm/model_executor/layers/quantization/modelopt.py
+#   - vllm/model_executor/layers/quantization/mxfp4.py
+#   - vllm/v1/attention/backends/triton_attn.py
+#   - vllm/v1/attention/backends/rocm_attn.py
+#   - vllm/v1/attention/backends/rocm_aiter_fa.py
+#   - vllm/v1/attention/backends/mla/
+#   - vllm/v1/attention/selector.py
+#   - vllm/model_executor/layers/layernorm.py
+#   - vllm/_aiter_ops.py
+#   - vllm/platforms/rocm.py
+#   - vllm/model_executor/model_loader/
+#   commands:
+#   - pytest -s -v tests/quantization/test_gfx950_moe.py
 
+#------------------------------------------------------------  mi355 · v1  -------------------------------------------------------------#
 
-- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
+- label: V1 attention (B200-MI355) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_4
-  num_gpus: 4
-  optional: true
+  agent_pool: mi355_1
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - tests/v1/kv_connector/nixl_integration/
+  - vllm/config/attention.py
+  - vllm/model_executor/layers/attention
+  - vllm/v1/attention
+  - tests/v1/attention
+  - vllm/_aiter_ops.py
+  - vllm/envs.py
   - vllm/platforms/rocm.py
   commands:
-  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
+  - pytest -v -s v1/attention
 
-- label: NixlConnector PD + Spec Decode acceptance (2 GPUs) # TBD
-  timeout_in_minutes: 180
+- label: V1 Core + KV + Metrics # TBD
+  timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
+  agent_pool: mi355_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-  - vllm/v1/worker/kv_connector_model_runner_mixin.py
-  - tests/v1/kv_connector/nixl_integration/
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/core
+  - tests/v1/executor
+  - tests/v1/kv_offload
+  - tests/v1/worker
+  - tests/v1/kv_connector/unit
+  - tests/v1/metrics
+  - tests/entrypoints/openai/correctness/test_lmeval.py
   commands:
   - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
-  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
-
+  - pytest -v -s -m 'not cpu_test' v1/core
+  - pytest -v -s v1/executor
+  - pytest -v -s v1/kv_offload
+  - pytest -v -s v1/worker
+  - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
+  - pytest -v -s -m 'not cpu_test' v1/metrics
+  - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
+  - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
 
-- label: Distributed Tests (2 GPUs)(H100-MI355) # TBD
-  timeout_in_minutes: 180
+- label: V1 Sample + Logits # TBD
+  timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
+  agent_pool: mi355_1
   optional: true
-  working_dir: "/vllm-workspace/"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/distributed/
-  - vllm/v1/distributed/
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - tests/distributed/test_context_parallel.py
-  - tests/v1/distributed/test_dbo.py
-  - examples/offline_inference/data_parallel.py
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/sample
+  - tests/v1/logits_processors
+  - tests/v1/test_oracle.py
+  - tests/v1/test_request.py
+  - tests/v1/test_outputs.py
   commands:
-  - export TORCH_NCCL_BLOCKING_WAIT=1
-  - pytest -v -s tests/distributed/test_context_parallel.py
-  - VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
-  - pytest -v -s tests/v1/distributed/test_dbo.py
-
+  - pytest -v -s v1/sample
+  - pytest -v -s v1/logits_processors
+  - pytest -v -s v1/test_oracle.py
+  - pytest -v -s v1/test_request.py
+  - pytest -v -s v1/test_outputs.py
 
-- label: Distributed Compile Unit Tests (2xH100-2xMI355) # TBD
-  timeout_in_minutes: 180
+- label: V1 Spec Decode # TBD
+  timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
-  optional: true
-  working_dir: "/vllm-workspace/"
+  agent_pool: mi355_1
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/compilation/
-  - vllm/model_executor/layers
-  - tests/compile/passes/distributed/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/v1/spec_decode
   commands:
-  - export VLLM_TEST_CLEAN_GPU_MEMORY=1
-  - VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/passes/distributed/test_async_tp.py
-  - pytest -v -s tests/compile/passes/distributed/test_sequence_parallelism.py
-  # TODO: this test is not supported on ROCm, there are aiter kernels for this.
-  # - pytest -v -s tests/compile/passes/distributed/test_fusion_all_reduce.py
-  # - pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
-  # - "VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'"
-
+  - pytest -v -s -m 'not slow_test' v1/spec_decode
 
-- label: LM Eval Small Models (B200-MI355) # TBD
+- label: NixlConnector PD + Spec Decode acceptance (2 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_2
+  num_gpus: 2
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - vllm/v1/worker/kv_connector_model_runner_mixin.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi3xx-fp8-and-mixed.txt
-
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - ATTENTION_BACKEND=ROCM_ATTN bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
 
-- label: LM Eval Large Models (4 GPUs)(FP8) # TBD
+- label: Distributed NixlConnector PD accuracy (4 GPUs) # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_4
   num_gpus: 4
   optional: true
-  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/_aiter_ops.py
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-  - export VLLM_USE_DEEP_GEMM=0
-  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm-fp8.txt --tp-size=4
-
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
-- label: GPQA Eval (GPT-OSS) (B200-MI355) # TBD
+- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs) # TBD
   timeout_in_minutes: 180
-  mirror_hardwares: [amdexperimental, amdproduction, amdgfx955nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
+  mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
+  agent_pool: mi355_4
+  num_gpus: 4
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - vllm/model_executor/layers/fused_moe/
-  - tests/evals/gpt_oss/
-  - vllm/_aiter_ops.py
+  - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+  - tests/v1/kv_connector/nixl_integration/
   - vllm/platforms/rocm.py
   commands:
-    - uv pip install --system 'gpt-oss[eval]==0.0.5'
-    - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx950.txt
+  - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
+  - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
 
+#------------------------------------------------------  mi355 · weight_loading  -------------------------------------------------------#
 
-- label: Qwen3-30B-A3B-FP8-block Accuracy (B200-MI355) # TBD
+- label: Weight Loading Multiple GPU # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_2
   num_gpus: 2
-  working_dir: "/vllm-workspace"
+  working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/
-  - vllm/model_executor/model_loader/
-  - vllm/model_executor/layers/quantization/
-  - vllm/model_executor/layers/fused_moe/
-  - vllm/distributed/eplb
-  - vllm/v1/attention/backends/
-  - vllm/v1/attention/selector.py
-  - .buildkite/scripts/scheduled_integration_test/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/weight_loading
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
-
+  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt
 
-- label: Attention Benchmarks Smoke Test (B200-MI355) # TBD
+- label: Weight Loading Multiple GPU - Large Models # TBD
   timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
   agent_pool: mi355_2
+  working_dir: "/vllm-workspace/tests"
   num_gpus: 2
-  working_dir: "/vllm-workspace/"
+  optional: true
   source_file_dependencies:
-  - benchmarks/attention_benchmarks/
-  - vllm/v1/attention/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/weight_loading
   commands:
-  - python3 benchmarks/attention_benchmarks/benchmark.py --backends ROCM_ATTN ROCM_AITER_FA ROCM_AITER_UNIFIED_ATTN --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
+  - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large-amd.txt
 
+#-----------------------------------------------------------  mi355 · misc  ------------------------------------------------------------#
 
-- label: LM Eval Qwen3-5 Models (B200-MI355) # TBD
-  timeout_in_minutes: 120
+- label: Regression # TBD
+  timeout_in_minutes: 180
   mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
-  agent_pool: mi355_2
-  num_gpus: 2
+  agent_pool: mi355_1
   optional: true
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - vllm/model_executor/models/qwen3_5.py
-  - vllm/model_executor/models/qwen3_5_mtp.py
-  - vllm/transformers_utils/configs/qwen3_5.py
-  - vllm/transformers_utils/configs/qwen3_5_moe.py
-  - vllm/model_executor/models/qwen.py
-  - vllm/model_executor/models/qwen2.py
-  - vllm/model_executor/models/qwen3.py
-  - vllm/model_executor/models/qwen3_next.py
-  - vllm/model_executor/models/qwen3_next_mtp.py
-  - vllm/model_executor/layers/fla/ops/
-  - vllm/_aiter_ops.py
-  - vllm/platforms/rocm.py
+  - vllm/
+  - tests/test_regression
   commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-qwen35-mi355.txt
+  - pip install modelscope
+  - pytest -v -s test_regression.py
diff --git a/.buildkite/test_areas/attention.yaml b/.buildkite/test_areas/attention.yaml
index 4bcf116f2756..d3947a03162b 100644
--- a/.buildkite/test_areas/attention.yaml
+++ b/.buildkite/test_areas/attention.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: V1 attention (H100)
+  key: v1-attention-h100
   timeout_in_minutes: 30
   device: h100
   source_file_dependencies:
@@ -14,8 +15,9 @@ steps:
     - pytest -v -s v1/attention
 
 - label: V1 attention (B200)
+  key: v1-attention-b200
   timeout_in_minutes: 30
-  device: b200
+  device: b200-k8s
   source_file_dependencies:
     - vllm/config/attention.py
     - vllm/model_executor/layers/attention
diff --git a/.buildkite/test_areas/basic_correctness.yaml b/.buildkite/test_areas/basic_correctness.yaml
index 759d2b535871..5d547cd48637 100644
--- a/.buildkite/test_areas/basic_correctness.yaml
+++ b/.buildkite/test_areas/basic_correctness.yaml
@@ -3,7 +3,9 @@ depends_on:
   - image-build
 steps:
 - label: Basic Correctness
+  key: basic-correctness
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/basic_correctness/test_basic_correctness
diff --git a/.buildkite/test_areas/benchmarks.yaml b/.buildkite/test_areas/benchmarks.yaml
index a30ec60ea960..85f804780179 100644
--- a/.buildkite/test_areas/benchmarks.yaml
+++ b/.buildkite/test_areas/benchmarks.yaml
@@ -2,16 +2,10 @@ group: Benchmarks
 depends_on: 
   - image-build
 steps:
-- label: Benchmarks
-  timeout_in_minutes: 20
-  working_dir: "/vllm-workspace/.buildkite"
-  source_file_dependencies:
-  - benchmarks/
-  commands:
-  - bash scripts/run-benchmarks.sh
-
 - label: Benchmarks CLI Test
+  key: benchmarks-cli-test
   timeout_in_minutes: 20
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/benchmarks/
@@ -19,7 +13,8 @@ steps:
   - pytest -v -s benchmarks/
 
 - label: Attention Benchmarks Smoke Test (B200)
-  device: b200
+  key: attention-benchmarks-smoke-test-b200
+  device: b200-k8s
   num_gpus: 2
   optional: true
   working_dir: "/vllm-workspace/"
diff --git a/.buildkite/test_areas/compile.yaml b/.buildkite/test_areas/compile.yaml
index c21b66552494..01248738d519 100644
--- a/.buildkite/test_areas/compile.yaml
+++ b/.buildkite/test_areas/compile.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Sequence Parallel Correctness Tests (2 GPUs)
+  key: sequence-parallel-correctness-tests-2-gpus
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/"
   num_devices: 2
@@ -17,6 +18,7 @@ steps:
   - pytest -v -s tests/compile/correctness_e2e/test_sequence_parallel.py
 
 - label: Sequence Parallel Correctness Tests (2xH100)
+  key: sequence-parallel-correctness-tests-2xh100
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/"
   device: h100
@@ -27,6 +29,7 @@ steps:
   - pytest -v -s tests/compile/correctness_e2e/test_sequence_parallel.py
 
 - label: AsyncTP Correctness Tests (2xH100)
+  key: asynctp-correctness-tests-2xh100
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/"
   device: h100
@@ -37,9 +40,10 @@ steps:
   - pytest -v -s tests/compile/correctness_e2e/test_async_tp.py
 
 - label: AsyncTP Correctness Tests (B200)
+  key: asynctp-correctness-tests-b200
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   optional: true
   num_devices: 2
   commands:
@@ -47,6 +51,7 @@ steps:
   - pytest -v -s tests/compile/correctness_e2e/test_async_tp.py
 
 - label: Distributed Compile Unit Tests (2xH100)
+  key: distributed-compile-unit-tests-2xh100
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/"
   device: h100
@@ -60,9 +65,10 @@ steps:
   - pytest -s -v tests/compile/passes/distributed
 
 - label: Fusion and Compile Unit Tests (2xB200)
+  key: fusion-and-compile-unit-tests-2xb200
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   source_file_dependencies:
   - csrc/quantization/fp4/
   - vllm/model_executor/layers/quantization/
@@ -72,6 +78,7 @@ steps:
   - vllm/v1/attention/backends/flashinfer.py
   - vllm/compilation/ # TODO(luka) limit to vllm/compilation/passes
   - tests/compile/passes/test_fusion_attn.py
+  - tests/compile/passes/test_mla_attn_quant_fusion.py
   - tests/compile/passes/test_silu_mul_quant_fusion.py
   - tests/compile/passes/distributed/test_fusion_all_reduce.py
   - tests/compile/fullgraph/test_full_graph.py
@@ -79,6 +86,7 @@ steps:
     # b200 runners are limited, so we limit the tests to the minimum set only supported on Blackwell
     - nvidia-smi
     - pytest -v -s tests/compile/passes/test_fusion_attn.py -k FLASHINFER
+    - pytest -v -s tests/compile/passes/test_mla_attn_quant_fusion.py
     - pytest -v -s tests/compile/passes/test_silu_mul_quant_fusion.py
     # this runner has 2 GPUs available even though num_devices=2 is not set
     - pytest -v -s tests/compile/passes/distributed/test_fusion_all_reduce.py
@@ -87,6 +95,7 @@ steps:
     - pytest -v -s tests/compile/fullgraph/test_full_graph.py::test_fp8_kv_scale_compile
 
 - label: Fusion E2E Quick (H100)
+  key: fusion-e2e-quick-h100
   timeout_in_minutes: 15
   working_dir: "/vllm-workspace/"
   device: h100
@@ -105,6 +114,7 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "inductor_partition and not +rms_norm and +quant_fp8 and (qwen3 or deepseek)"
 
 - label: Fusion E2E Config Sweep (H100)
+  key: fusion-e2e-config-sweep-h100
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/"
   device: h100
@@ -124,9 +134,10 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "llama-3"
 
 - label: Fusion E2E Config Sweep (B200)
+  key: fusion-e2e-config-sweep-b200
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   num_devices: 1
   optional: true
   commands:
@@ -137,6 +148,7 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "inductor_partition and (FLASHINFER and not +rms_norm and (not +quant_fp8 or +quant_fp8 and (qwen3 or deepseek)) or llama-3)"
 
 - label: Fusion E2E TP2 Quick (H100)
+  key: fusion-e2e-tp2-quick-h100
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/"
   device: h100
@@ -154,6 +166,7 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp2_async_tp.py -k "inductor_partition and not +rms_norm and (not +quant_fp8 or +quant_fp8 and (qwen3 or deepseek))"
 
 - label: Fusion E2E TP2 AR-RMS Config Sweep (H100)
+  key: fusion-e2e-tp2-ar-rms-config-sweep-h100
   timeout_in_minutes: 40
   working_dir: "/vllm-workspace/"
   device: h100
@@ -173,6 +186,7 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py -k "llama-3"
 
 - label: Fusion E2E TP2 AsyncTP Config Sweep (H100)
+  key: fusion-e2e-tp2-asynctp-config-sweep-h100
   timeout_in_minutes: 40
   working_dir: "/vllm-workspace/"
   device: h100
@@ -192,9 +206,10 @@ steps:
     - pytest -v -s tests/compile/fusions_e2e/test_tp2_async_tp.py -k "llama-3"
 
 - label: Fusion E2E TP2 (B200)
+  key: fusion-e2e-tp2-b200
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   num_devices: 2
   source_file_dependencies:
     - csrc/quantization/
diff --git a/.buildkite/test_areas/cuda.yaml b/.buildkite/test_areas/cuda.yaml
index b9bb3a2924e9..b56e635bea63 100644
--- a/.buildkite/test_areas/cuda.yaml
+++ b/.buildkite/test_areas/cuda.yaml
@@ -3,15 +3,22 @@ depends_on:
   - image-build
 steps:
 - label: Platform Tests (CUDA)
+  key: platform-tests-cuda
   timeout_in_minutes: 15
+  device: h200_18gb
   source_file_dependencies:
-  - vllm/
+  - vllm/envs.py
+  - vllm/logger.py
+  - vllm/platforms/
+  - vllm/plugins/
+  - vllm/utils/
   - tests/cuda
   commands:
     - pytest -v -s cuda/test_cuda_context.py
     - pytest -v -s cuda/test_platform_no_cuda_init.py
 
 - label: Cudagraph
+  key: cudagraph
   timeout_in_minutes: 20
   source_file_dependencies:
   - tests/v1/cudagraph
@@ -20,4 +27,5 @@ steps:
   - vllm/compilation
   commands:
     - pytest -v -s v1/cudagraph/test_cudagraph_dispatch.py
-    - pytest -v -s v1/cudagraph/test_cudagraph_mode.py
\ No newline at end of file
+    - pytest -v -s v1/cudagraph/test_cudagraph_mode.py
+    - pytest -v -s v1/cudagraph/test_breakable_cudagraph.py
\ No newline at end of file
diff --git a/.buildkite/test_areas/disaggregated.yaml b/.buildkite/test_areas/disaggregated.yaml
new file mode 100644
index 000000000000..d3e02be23981
--- /dev/null
+++ b/.buildkite/test_areas/disaggregated.yaml
@@ -0,0 +1,106 @@
+group: Disaggregated
+depends_on: 
+  - image-build
+steps:
+- label: Distributed NixlConnector PD accuracy (4 GPUs)
+  key: distributed-nixlconnector-pd-accuracy-4-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+- label: Distributed FlashInfer NixlConnector PD accuracy (4 GPUs)
+  key: distributed-flashinfer-nixlconnector-pd-accuracy-4-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - FLASHINFER=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+
+- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
+  key: dp-ep-distributed-nixlconnector-pd-accuracy-tests-4-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+
+- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
+  key: crosslayer-kv-layout-distributed-nixlconnector-pd-accuracy-tests-4-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+
+- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs)
+  key: hybrid-ssm-nixlconnector-pd-accuracy-tests-4-gpus
+  timeout_in_minutes: 25
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+
+- label: MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
+  key: multiconnector-nixl-offloading-pd-accuracy-2-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 2
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
+    - vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
+    - vllm/distributed/kv_transfer/kv_connector/v1/offloading/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
+
+- label: NixlConnector PD + Spec Decode acceptance (2 GPUs)
+  key: nixlconnector-pd-spec-decode-acceptance-2-gpus
+  timeout_in_minutes: 30
+  device: a100
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 2
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - vllm/v1/worker/kv_connector_model_runner_mixin.py
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - bash v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh
+
+- label: MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
+  key: multiconnector-nixl-offloading-pd-edge-cases-2-gpus
+  timeout_in_minutes: 30
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 2
+  source_file_dependencies:
+    - vllm/distributed/kv_transfer/kv_connector/v1/nixl/
+    - vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
+    - vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
+    - vllm/distributed/kv_transfer/kv_connector/v1/offloading/
+    - tests/v1/kv_connector/nixl_integration/
+  commands:
+    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
+    - bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
\ No newline at end of file
diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml
index cfa9b848e34c..8aa41a9a26ab 100644
--- a/.buildkite/test_areas/distributed.yaml
+++ b/.buildkite/test_areas/distributed.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Distributed Comm Ops
+  key: distributed-comm-ops
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -16,6 +17,7 @@ steps:
   - pytest -v -s distributed/test_shm_storage.py
 
 - label: Distributed DP Tests (2 GPUs)
+  key: distributed-dp-tests-2-gpus
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -37,6 +39,7 @@ steps:
   - DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
 
 - label: Distributed Compile + RPC Tests (2 GPUs)
+  key: distributed-compile-rpc-tests-2-gpus
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -59,6 +62,7 @@ steps:
   - pytest -v -s ./compile/test_wrapper.py
 
 - label: Distributed Torchrun + Shutdown Tests (2 GPUs)
+  key: distributed-torchrun-shutdown-tests-2-gpus
   timeout_in_minutes: 20
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -81,6 +85,7 @@ steps:
   - pytest -v -s v1/worker/test_worker_memory_snapshot.py
 
 - label: Distributed Torchrun + Examples (4 GPUs)
+  key: distributed-torchrun-examples-4-gpus
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace"
   num_devices: 4
@@ -88,9 +93,8 @@ steps:
   - vllm/distributed/
   - tests/distributed/test_torchrun_example.py
   - tests/distributed/test_torchrun_example_moe.py
-  - examples/offline_inference/rlhf_colocate.py
   - examples/rl/
-  - tests/examples/offline_inference/data_parallel.py
+  - tests/examples/features/data_parallel/data_parallel_offline.py
   commands:
   # https://github.com/NVIDIA/nccl/issues/1838
   - export NCCL_CUMEM_HOST_ENABLE=0
@@ -107,12 +111,13 @@ steps:
   # test with torchrun tp=2 and dp=2 with ep
   - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 tests/distributed/test_torchrun_example_moe.py
   # test with internal dp
-  - python3 examples/offline_inference/data_parallel.py --enforce-eager
+  - python3 examples/features/data_parallel/data_parallel_offline.py --enforce-eager
   # rlhf examples
   - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/rl/rlhf_nccl.py
   - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/rl/rlhf_ipc.py
 
 - label: Distributed DP Tests (4 GPUs)
+  key: distributed-dp-tests-4-gpus
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/tests"
   num_devices: 4
@@ -133,6 +138,7 @@ steps:
   - pytest -v -s distributed/test_utils.py
 
 - label: Distributed Compile + Comm (4 GPUs)
+  key: distributed-compile-comm-4-gpus
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/tests"
   num_devices: 4
@@ -154,24 +160,28 @@ steps:
   - pytest -v -s distributed/test_multiproc_executor.py::test_multiproc_executor_multi_node
 
 - label: Distributed Tests (8 GPUs)(H100)
+  key: distributed-tests-8-gpus-h100
   timeout_in_minutes: 10
   device: h100
   num_devices: 8
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
-  - examples/offline_inference/torchrun_dp_example.py
+  - examples/features/torchrun/torchrun_dp_example_offline.py
   - vllm/config/parallel.py
   - vllm/distributed/
   - vllm/v1/engine/llm_engine.py
   - vllm/v1/executor/uniproc_executor.py
   - vllm/v1/worker/gpu_worker.py
+  - tests/distributed/test_mnnvl_alltoall.py
+
   commands:
   # https://github.com/NVIDIA/nccl/issues/1838
   - export NCCL_CUMEM_HOST_ENABLE=0
   # test with torchrun tp=2 and dp=4 with ep
-  - torchrun --nproc-per-node=8 ../examples/offline_inference/torchrun_dp_example.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
+  - torchrun --nproc-per-node=8 ../examples/features/torchrun/torchrun_dp_example_offline.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
 
 - label: Distributed Tests (4 GPUs)(A100)
+  key: distributed-tests-4-gpus-a100
   device: a100
   optional: true
   num_devices: 4
@@ -186,6 +196,7 @@ steps:
   - pytest -v -s -x lora/test_mixtral.py
 
 - label: Distributed Tests (2 GPUs)(H100)
+  key: distributed-tests-2-gpus-h100
   timeout_in_minutes: 15
   device: h100
   optional: true
@@ -194,11 +205,14 @@ steps:
   commands:
     - pytest -v -s tests/distributed/test_context_parallel.py
     - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/rl/rlhf_async_new_apis.py
-    - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
+    - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/features/data_parallel/data_parallel_offline.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
     - pytest -v -s tests/v1/distributed/test_dbo.py
+    - VLLM_ALLOW_INSECURE_SERIALIZATION=1 pytest -v -s tests/distributed/test_weight_transfer.py
+    - pytest -v -s tests/distributed/test_packed_tensor.py
 
 - label: Distributed Tests (2 GPUs)(B200)
-  device: b200
+  key: distributed-tests-2-gpus-b200
+  device: b200-k8s
   optional: true
   working_dir: "/vllm-workspace/"
   num_devices: 2
@@ -206,8 +220,12 @@ steps:
     - pytest -v -s tests/distributed/test_context_parallel.py
     - pytest -v -s tests/distributed/test_nccl_symm_mem_allreduce.py
     - pytest -v -s tests/v1/distributed/test_dbo.py
+    - pytest -v -s tests/distributed/test_mnnvl_alltoall.py
+
+    
 
 - label: 2 Node Test (4 GPUs)
+  key: 2-node-test-4-gpus
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -220,68 +238,12 @@ steps:
   - vllm/executor/
   - vllm/model_executor/models/
   - tests/distributed/
-  - tests/examples/offline_inference/data_parallel.py
-  commands:
-    - ./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 $IMAGE_TAG "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=0 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py" "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=1 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code"
-
-- label: Distributed NixlConnector PD accuracy (4 GPUs)
-  timeout_in_minutes: 30
-  working_dir: "/vllm-workspace/tests"
-  num_devices: 4
-  source_file_dependencies:
-    - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-    - tests/v1/kv_connector/nixl_integration/
-  commands:
-    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-    - bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
-- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
-  timeout_in_minutes: 30
-  working_dir: "/vllm-workspace/tests"
-  num_devices: 4
-  source_file_dependencies:
-    - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-    - tests/v1/kv_connector/nixl_integration/
-  commands:
-    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-    - DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
-- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
-  timeout_in_minutes: 30
-  working_dir: "/vllm-workspace/tests"
-  num_devices: 4
-  source_file_dependencies:
-    - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-    - tests/v1/kv_connector/nixl_integration/
+  - tests/examples/features/data_parallel/data_parallel_offline.py
   commands:
-    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-    - CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
-- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs)
-  timeout_in_minutes: 20
-  working_dir: "/vllm-workspace/tests"
-  num_devices: 4
-  source_file_dependencies:
-    - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-    - tests/v1/kv_connector/nixl_integration/
-  commands:
-    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-    - HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
-- label: NixlConnector PD + Spec Decode acceptance (2 GPUs)
-  timeout_in_minutes: 30
-  device: a100
-  working_dir: "/vllm-workspace/tests"
-  num_devices: 2
-  source_file_dependencies:
-    - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-    - vllm/v1/worker/kv_connector_model_runner_mixin.py
-    - tests/v1/kv_connector/nixl_integration/
-  commands:
-    - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-    - bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
+    - ./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 $IMAGE_TAG "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/features/data_parallel/data_parallel_offline.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=0 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py" "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/features/data_parallel/data_parallel_offline.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=1 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code"
 
 - label: Pipeline + Context Parallelism (4 GPUs)
+  key: pipeline-context-parallelism-4-gpus
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/tests"
   num_devices: 4
@@ -294,3 +256,24 @@ steps:
   commands:
   - pytest -v -s distributed/test_pp_cudagraph.py
   - pytest -v -s distributed/test_pipeline_parallel.py
+
+- label: RayExecutorV2 (4 GPUs)
+  key: rayexecutorv2-4-gpus
+  timeout_in_minutes: 60
+  working_dir: "/vllm-workspace/tests"
+  num_devices: 4
+  source_file_dependencies:
+  - vllm/v1/executor/ray_executor_v2.py
+  - vllm/v1/executor/abstract.py
+  - vllm/v1/executor/multiproc_executor.py
+  - tests/distributed/test_ray_v2_executor.py
+  - tests/distributed/test_ray_v2_executor_e2e.py
+  - tests/distributed/test_pipeline_parallel.py
+  - tests/basic_correctness/test_basic_correctness.py
+  commands:
+  - export VLLM_USE_RAY_V2_EXECUTOR_BACKEND=1
+  - export NCCL_CUMEM_HOST_ENABLE=0
+  - pytest -v -s distributed/test_ray_v2_executor.py
+  - pytest -v -s distributed/test_ray_v2_executor_e2e.py
+  - pytest -v -s distributed/test_pipeline_parallel.py -k "ray"
+  - TARGET_TEST_SUITE=L4 pytest -v -s basic_correctness/test_basic_correctness.py -k "ray"
diff --git a/.buildkite/test_areas/docker.yaml b/.buildkite/test_areas/docker.yaml
new file mode 100644
index 000000000000..9bf96221abe0
--- /dev/null
+++ b/.buildkite/test_areas/docker.yaml
@@ -0,0 +1,16 @@
+group: Docker
+depends_on:
+  - image-build-cpu
+steps:
+- label: Docker Build Metadata
+  timeout_in_minutes: 10
+  device: cpu-small
+  source_file_dependencies:
+    - .buildkite/release-pipeline.yaml
+    - .buildkite/scripts/docker-build-metadata-args.sh
+    - docker/Dockerfile
+    - docker/Dockerfile.cpu
+    - docker/docker-bake.hcl
+    - tests/tools/test_docker_build_metadata_args.py
+  commands:
+    - pytest -v -s tools/test_docker_build_metadata_args.py
diff --git a/.buildkite/test_areas/e2e_integration.yaml b/.buildkite/test_areas/e2e_integration.yaml
index 5b7f96bc7a26..bb8aa14eac18 100644
--- a/.buildkite/test_areas/e2e_integration.yaml
+++ b/.buildkite/test_areas/e2e_integration.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: DeepSeek V2-Lite Accuracy
+  key: deepseek-v2-lite-accuracy
   timeout_in_minutes: 60
   device: h100
   optional: true
@@ -12,6 +13,7 @@ steps:
   - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
 
 - label: Qwen3-30B-A3B-FP8-block Accuracy
+  key: qwen3-30b-a3b-fp8-block-accuracy
   timeout_in_minutes: 60
   device: h100
   optional: true
@@ -21,15 +23,27 @@ steps:
   - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
 
 - label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
+  key: qwen3-30b-a3b-fp8-block-accuracy-b200
   timeout_in_minutes: 60
-  device: b200
+  device: b200-k8s
   optional: true
   num_devices: 2
   working_dir: "/vllm-workspace"
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
 
+- label: Qwen3-30B-A3B-FP8 DP4 Async EPLB Accuracy
+  key: qwen3-30b-a3b-fp8-dp4-async-eplb-accuracy
+  timeout_in_minutes: 60
+  device: h100
+  optional: true
+  num_devices: 4
+  working_dir: "/vllm-workspace"
+  commands:
+  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh 0.8 200 8050
+
 - label: DeepSeek V2-Lite Prefetch Offload Accuracy (H100)
+  key: deepseek-v2-lite-prefetch-offload-accuracy-h100
   timeout_in_minutes: 60
   device: h100
   optional: true
diff --git a/.buildkite/test_areas/engine.yaml b/.buildkite/test_areas/engine.yaml
index ed0df3e4d879..67af8881f094 100644
--- a/.buildkite/test_areas/engine.yaml
+++ b/.buildkite/test_areas/engine.yaml
@@ -3,18 +3,32 @@ depends_on:
   - image-build
 steps:
 - label: Engine
+  key: engine
   timeout_in_minutes: 15
+  device: h200_18gb
   source_file_dependencies:
-  - vllm/
+  - vllm/compilation/
+  - vllm/config/
+  - vllm/engine/
+  - vllm/entrypoints/logger.py
+  - vllm/envs.py
+  - vllm/logger.py
+  - vllm/logging_utils/
+  - vllm/platforms/
+  - vllm/sequence.py
+  - vllm/triton_utils/
+  - vllm/utils/
   - tests/engine
   - tests/test_sequence
   - tests/test_config
   - tests/test_logger
   - tests/test_vllm_port
+  - tests/test_jit_monitor.py
   commands:
-  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
+  - pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py test_jit_monitor.py
 
 - label: Engine (1 GPU)
+  key: engine-1-gpu
   timeout_in_minutes: 30
   source_file_dependencies:
     - vllm/v1/engine/
@@ -22,16 +36,32 @@ steps:
   commands:
     - pytest -v -s v1/engine/test_preprocess_error_handling.py
     - pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
+  mirror:
+    amd:
+      device: mi300_1
+      timeout_in_minutes: 40
+      depends_on:
+      - image-build-amd
 
 - label: e2e Scheduling (1 GPU)
+  key: e2e-scheduling-1-gpu
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/
     - tests/v1/e2e/general/
   commands:
     - pytest -v -s v1/e2e/general/test_async_scheduling.py
+  mirror:
+    amd:
+      device: mi250_1
+      timeout_in_minutes: 60
+      depends_on:
+      - image-build-amd
 
 - label: e2e Core (1 GPU)
+  device: h200_35gb
+  key: e2e-core-1-gpu
   timeout_in_minutes: 30
   source_file_dependencies:
     - vllm/v1/
@@ -40,38 +70,63 @@ steps:
     - pytest -v -s v1/e2e/general --ignore v1/e2e/general/test_async_scheduling.py
 
 - label: V1 e2e (2 GPUs)
+  key: v1-e2e-2-gpus
   timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
   optional: true
   num_devices: 2
   source_file_dependencies:
-    - vllm/
-    - tests/v1/e2e
+    - vllm/compilation/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/envs.py
+    - vllm/forward_context.py
+    - vllm/inputs/
+    - vllm/logger.py
+    - vllm/logging_utils/
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/platforms/
+    - vllm/sampling_params.py
+    - vllm/transformers_utils/
+    - vllm/triton_utils/
+    - vllm/utils/
+    - vllm/v1/
+    - tests/v1/e2e/spec_decode
   commands:
     # Only run tests that need exactly 2 GPUs
     - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "tensor_parallelism"
-  mirror:
-    amd:
-      device: mi325_2
-      depends_on:
-      - image-build-amd
 
 - label: V1 e2e (4 GPUs)
+  key: v1-e2e-4-gpus
   timeout_in_minutes: 60 # TODO: Fix timeout after we have more confidence in the test stability
   optional: true
   num_devices: 4
   source_file_dependencies:
-    - vllm/
-    - tests/v1/e2e
+    - vllm/compilation/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/envs.py
+    - vllm/forward_context.py
+    - vllm/inputs/
+    - vllm/logger.py
+    - vllm/logging_utils/
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/platforms/
+    - vllm/sampling_params.py
+    - vllm/transformers_utils/
+    - vllm/triton_utils/
+    - vllm/utils/
+    - vllm/v1/
+    - tests/v1/e2e/spec_decode
   commands:
     # Only run tests that need 4 GPUs
     - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle_correctness_heavy"
-  mirror:
-    amd:
-      device: mi325_4
-      depends_on:
-      - image-build-amd
 
 - label: V1 e2e (4xH100)
+  key: v1-e2e-4xh100
   timeout_in_minutes: 60
   device: h100
   num_devices: 4
diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml
index ebe6b9419fc2..5cc90028db31 100644
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -2,7 +2,8 @@ group: Entrypoints
 depends_on: 
   - image-build
 steps:
-- label: Entrypoints Unit Tests  
+- label: Entrypoints Unit Tests
+  key: entrypoints-unit-tests
   timeout_in_minutes: 10
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -10,9 +11,10 @@ steps:
   - tests/entrypoints/
   commands:
   - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling --ignore=entrypoints/speech_to_text
 
 - label: Entrypoints Integration (LLM)
+  key: entrypoints-integration-llm
   timeout_in_minutes: 40
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -24,8 +26,14 @@ steps:
   - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
+  mirror:
+    amd:
+      device: mi300_1
+      depends_on:
+      - image-build-amd
 
 - label: Entrypoints Integration (API Server openai - Part 1)
+  key: entrypoints-integration-api-server-openai-part-1
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -37,12 +45,13 @@ steps:
   - pytest -v -s entrypoints/openai/chat_completion --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
+      timeout_in_minutes: 80
       depends_on:
       - image-build-amd
 
-
 - label: Entrypoints Integration (API Server openai - Part 2)
+  key: entrypoints-integration-api-server-openai-part-2
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -51,16 +60,18 @@ steps:
   - tests/entrypoints/test_chat_utils
   commands:
   - pytest -v -s entrypoints/openai/completion --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py
-  - pytest -v -s entrypoints/openai/speech_to_text/
   - pytest -v -s entrypoints/test_chat_utils.py
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
+      timeout_in_minutes: 60
       depends_on:
       - image-build-amd
 
 - label: Entrypoints Integration (API Server openai - Part 3)
+  key: entrypoints-integration-api-server-openai-part-3
   timeout_in_minutes: 50
+  device: h200_18gb
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
@@ -68,9 +79,17 @@ steps:
   - tests/entrypoints/test_chat_utils
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/speech_to_text/ --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion --ignore=entrypoints/openai/completion --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
+  mirror:
+    amd:
+      device: mi300_1
+      timeout_in_minutes: 60
+      depends_on:
+      - image-build-amd
 
 - label: Entrypoints Integration (API Server 2)
+  device: h200_35gb
+  key: entrypoints-integration-api-server-2
   timeout_in_minutes: 130
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -83,8 +102,26 @@ steps:
   - pytest -v -s entrypoints/serve/instrumentator
   - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
   - pytest -v -s tool_use
+  mirror:
+    amd:
+      device: mi300_1
+      depends_on:
+      - image-build-amd
+
+- label: Entrypoints Integration (Speech to Text)
+  device: h200_35gb
+  key: entrypoints-integration-speech_to_text
+  timeout_in_minutes: 50
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/speech_to_text
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/speech_to_text
 
 - label: Entrypoints Integration (Pooling)
+  key: entrypoints-integration-pooling
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -95,6 +132,7 @@ steps:
   - pytest -v -s entrypoints/pooling
 
 - label: Entrypoints Integration (Responses API)
+  key: entrypoints-integration-responses-api
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
@@ -104,10 +142,12 @@ steps:
   - pytest -v -s entrypoints/openai/responses
 
 - label: OpenAI API Correctness
+  key: openai-api-correctness
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
   - csrc/
   - vllm/entrypoints/openai/
   - vllm/model_executor/models/whisper.py
-  commands: # LMEval+Transcription WER check
+  commands: # LMEval
   - pytest -s entrypoints/openai/correctness/
diff --git a/.buildkite/test_areas/expert_parallelism.yaml b/.buildkite/test_areas/expert_parallelism.yaml
index cf34a66a1012..0f7ab0d7157c 100644
--- a/.buildkite/test_areas/expert_parallelism.yaml
+++ b/.buildkite/test_areas/expert_parallelism.yaml
@@ -3,7 +3,9 @@ depends_on:
   - image-build
 steps:
 - label: EPLB Algorithm
+  key: eplb-algorithm
   timeout_in_minutes: 15
+  device: h200_18gb
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/distributed/eplb
@@ -13,8 +15,9 @@ steps:
   - pytest -v -s distributed/test_eplb_algo.py
   - pytest -v -s distributed/test_eplb_utils.py
 
-- label: EPLB Execution
-  timeout_in_minutes: 20
+- label: EPLB Execution # 17min
+  key: eplb-execution
+  timeout_in_minutes: 27
   working_dir: "/vllm-workspace/tests"
   num_devices: 4
   source_file_dependencies:
@@ -25,6 +28,7 @@ steps:
   - pytest -v -s distributed/test_eplb_spec_decode.py
 
 - label: Elastic EP Scaling Test
+  key: elastic-ep-scaling-test
   timeout_in_minutes: 20
   device: h100
   working_dir: "/vllm-workspace/tests"
diff --git a/.buildkite/test_areas/kernels.yaml b/.buildkite/test_areas/kernels.yaml
index 8eba8da0be85..70437d0ecfdd 100644
--- a/.buildkite/test_areas/kernels.yaml
+++ b/.buildkite/test_areas/kernels.yaml
@@ -2,29 +2,58 @@ group: Kernels
 depends_on: 
   - image-build
 steps:
+- label: vLLM IR Tests
+  key: vllm-ir-tests
+  timeout_in_minutes: 10
+  device: h200_18gb
+  working_dir: "/vllm-workspace/"
+  source_file_dependencies:
+    - vllm/ir
+    - vllm/kernels
+  commands:
+    - pytest -v -s tests/ir
+    - pytest -v -s tests/kernels/ir
+
 - label: Kernels Core Operation Test
+  key: kernels-core-operation-test
   timeout_in_minutes: 75
   source_file_dependencies:
   - csrc/
   - tests/kernels/core
-  - tests/kernels/test_top_k_per_row.py
   - tests/kernels/test_concat_mla_q.py
   commands:
-    - pytest -v -s kernels/core kernels/test_top_k_per_row.py kernels/test_concat_mla_q.py
+    - pytest -v -s kernels/core --ignore=kernels/core/test_minimax_reduce_rms.py  kernels/test_concat_mla_q.py
+
+- label: Kernels MiniMax Reduce RMS Test (2 GPUs)
+  key: kernels-minimax-reduce-rms-test-2-gpus
+  timeout_in_minutes: 15
+  num_devices: 2
+  device: h100
+  source_file_dependencies:
+  - csrc/minimax_reduce_rms_kernel.cu
+  - csrc/minimax_reduce_rms_kernel.h
+  - vllm/model_executor/layers/mamba/linear_attn.py
+  - vllm/model_executor/layers/mamba/lamport_workspace.py
+  - tests/kernels/core/test_minimax_reduce_rms.py
+  commands:
+    - pytest -v -s kernels/core/test_minimax_reduce_rms.py
 
 - label: Kernels Attention Test %N
+  key: kernels-attention-test
   timeout_in_minutes: 35
   source_file_dependencies:
   - csrc/attention/
   - vllm/v1/attention
     # TODO: remove this dependency (https://github.com/vllm-project/vllm/issues/32267)
   - vllm/model_executor/layers/attention
+  - vllm/utils/flashinfer.py
   - tests/kernels/attention
   commands:
     - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 2
 
 - label: Kernels Quantization Test %N
+  key: kernels-quantization-test
   timeout_in_minutes: 90
   source_file_dependencies:
   - csrc/quantization/
@@ -33,8 +62,22 @@ steps:
   commands:
     - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 2
+  mirror:
+    amd:
+      device: mi300_1
+      source_file_dependencies:
+      - csrc/quantization/
+      - vllm/model_executor/layers/quantization
+      - tests/kernels/quantization
+      - tests/kernels/quantization/test_rocm_skinny_gemms.py
+      - vllm/_aiter_ops.py
+      - vllm/platforms/rocm.py
+      - vllm/model_executor/kernels/
+      depends_on:
+      - image-build-amd
 
 - label: Kernels MoE Test %N
+  key: kernels-moe-test
   timeout_in_minutes: 25
   source_file_dependencies:
   - csrc/quantization/cutlass_w8a8/moe/
@@ -50,6 +93,7 @@ steps:
   parallelism: 5
 
 - label: Kernels Mamba Test
+  key: kernels-mamba-test
   timeout_in_minutes: 45
   source_file_dependencies:
   - csrc/mamba/
@@ -58,12 +102,28 @@ steps:
   commands:
     - pytest -v -s kernels/mamba
 
+- label: Kernels KDA Test
+  timeout_in_minutes: 20
+  device: h200_18gb
+  source_file_dependencies:
+  - vllm/model_executor/layers/fla/ops/kda.py
+  - vllm/model_executor/layers/fla/ops/chunk_delta_h.py
+  - vllm/model_executor/layers/fla/ops/l2norm.py
+  - tests/kernels/test_kda.py
+  commands:
+    - pytest -v -s kernels/test_kda.py
+
 - label: Kernels DeepGEMM Test (H100)
+  key: kernels-deepgemm-test-h100
   timeout_in_minutes: 45
   device: h100
   num_devices: 1
   source_file_dependencies:
+  - cmake/external_projects/deepgemm.cmake
   - tools/install_deepgemm.sh
+  - tools/build_deepgemm_C.py
+  - tools/setup_deepgemm_pythons.sh
+  - tools/check_wheel_deepgemm.py
   - vllm/utils/deep_gemm.py
   - vllm/model_executor/layers/fused_moe
   - vllm/model_executor/layers/quantization
@@ -71,30 +131,38 @@ steps:
   - tests/kernels/moe/test_deepgemm.py
   - tests/kernels/moe/test_batched_deepgemm.py
   - tests/kernels/attention/test_deepgemm_attention.py
+  - tests/quantization/test_cutlass_w4a16.py
   commands:
+    - python3 ../tools/check_wheel_deepgemm.py
     - pytest -v -s kernels/quantization/test_block_fp8.py
     - pytest -v -s kernels/moe/test_deepgemm.py
     - pytest -v -s kernels/moe/test_batched_deepgemm.py
     - pytest -v -s kernels/attention/test_deepgemm_attention.py
+    - pytest -v -s quantization/test_cutlass_w4a16.py
 
 - label: Kernels (B200)
+  key: kernels-b200
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   # optional: true
   source_file_dependencies:
   - csrc/quantization/fp4/
   - csrc/attention/mla/
   - csrc/quantization/cutlass_w8a8/moe/
-  - vllm/model_executor/layers/fused_moe/cutlass_moe.py
-  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
-  - vllm/model_executor/layers/fused_moe/flashinfer_a2a_prepare_finalize.py
+  - vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
+  - vllm/model_executor/layers/fused_moe/experts/flashinfer_cutlass_moe.py
+  - vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
+  - vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
+  - vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py
+  - vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_two_sided.py
   - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
   - vllm/v1/attention/backends/flashinfer.py
   - vllm/v1/attention/backends/mla/cutlass_mla.py
   - vllm/v1/attention/backends/mla/flashinfer_mla.py
   - vllm/v1/attention/selector.py
   - vllm/platforms/cuda.py
+  - tests/kernels/test_top_k_per_row.py
   commands:
     - nvidia-smi
     - python3 examples/basic/offline_inference/chat.py
@@ -105,6 +173,7 @@ steps:
     - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
     - pytest -v -s tests/kernels/attention/test_cutlass_mla_decode.py
     - pytest -v -s tests/kernels/attention/test_flashinfer_mla_decode.py
+    - pytest -v -s tests/kernels/test_top_k_per_row.py
     # Quantization
     - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
     - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
@@ -115,25 +184,29 @@ steps:
     - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
     - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
     - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
+    - pytest -v -s tests/kernels/moe/test_mxfp4_moe.py
     - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
     - pytest -v -s tests/kernels/moe/test_flashinfer.py
     - pytest -v -s tests/kernels/moe/test_flashinfer_moe.py
+    - pytest -v -s tests/kernels/moe/test_trtllm_nvfp4_moe.py
     - pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
     # e2e
     - pytest -v -s tests/models/quantization/test_nvfp4.py
 
 - label: Kernels Helion Test
+  key: kernels-helion-test
   timeout_in_minutes: 30
   device: h100
   source_file_dependencies:
   - vllm/utils/import_utils.py
   - tests/kernels/helion/
   commands:
-    - pip install helion
+    - pip install helion==1.0.0
     - pytest -v -s kernels/helion/
 
  
 - label: Kernels FP8 MoE Test (1 H100)
+  key: kernels-fp8-moe-test-1-h100
   timeout_in_minutes: 90
   device: h100
   num_devices: 1
@@ -150,6 +223,7 @@ steps:
     - pytest -v -s kernels/moe/test_triton_moe_ptpc_fp8.py
 
 - label: Kernels FP8 MoE Test (2 H100s)
+  key: kernels-fp8-moe-test-2-h100s
   timeout_in_minutes: 90
   device: h100
   num_devices: 2
@@ -159,8 +233,9 @@ steps:
     - pytest -v -s kernels/moe/test_deepep_moe.py
 
 - label: Kernels Fp4 MoE Test (B200)
+  key: kernels-fp4-moe-test-b200
   timeout_in_minutes: 60
-  device: b200
+  device: b200-k8s
   num_devices: 1
   optional: true
   commands:
@@ -168,3 +243,37 @@ steps:
     - pytest -v -s kernels/moe/test_flashinfer_moe.py
     - pytest -v -s kernels/moe/test_nvfp4_moe.py
     - pytest -v -s kernels/moe/test_ocp_mx_moe.py
+
+
+- label: Kernels FusedMoE Layer Test (2 H100s)
+  key: kernels-fusedmoe-layer-test-2-h100s
+  timeout_in_minutes: 90
+  device: h100
+  num_devices: 2
+  source_file_dependencies:
+  - csrc/quantization/cutlass_w8a8/moe/
+  - csrc/moe/
+  - tests/kernels/moe
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/model_executor/layers/quantization/
+  - vllm/distributed/device_communicators/
+  - vllm/config
+  commands:
+    - pytest -v -s kernels/moe/test_moe_layer.py
+
+
+- label: Kernels FusedMoE Layer Test (2 B200s)
+  key: kernels-fusedmoe-layer-test-2-b200s
+  timeout_in_minutes: 90
+  device: b200-k8s
+  num_devices: 2
+  source_file_dependencies:
+  - csrc/quantization/cutlass_w8a8/moe/
+  - csrc/moe/
+  - tests/kernels/moe
+  - vllm/model_executor/layers/fused_moe/
+  - vllm/model_executor/layers/quantization/
+  - vllm/distributed/device_communicators/
+  - vllm/config
+  commands:
+    - pytest -v -s kernels/moe/test_moe_layer.py
diff --git a/.buildkite/test_areas/lm_eval.yaml b/.buildkite/test_areas/lm_eval.yaml
index 39029efe9cd9..06f530ecc2a0 100644
--- a/.buildkite/test_areas/lm_eval.yaml
+++ b/.buildkite/test_areas/lm_eval.yaml
@@ -3,6 +3,8 @@ depends_on:
   - image-build
 steps:
 - label: LM Eval Small Models
+  device: h200_35gb
+  key: lm-eval-small-models
   timeout_in_minutes: 75
   source_file_dependencies:
   - csrc/
@@ -24,6 +26,7 @@ steps:
 #   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
 
 - label: LM Eval Large Models (4 GPUs)(H100)
+  key: lm-eval-large-models-4-gpus-h100
   device: h100
   optional: true
   num_devices: 4
@@ -36,8 +39,9 @@ steps:
     - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-hopper.txt --tp-size=4
 
 - label: LM Eval Small Models (B200)
+  key: lm-eval-small-models-b200
   timeout_in_minutes: 120
-  device: b200
+  device: b200-k8s
   optional: true
   source_file_dependencies:
   - csrc/
@@ -45,9 +49,22 @@ steps:
   commands:
   - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt
 
+- label: LM Eval Large Models (B200, EP)
+  key: lm-eval-large-models-b200-ep
+  timeout_in_minutes: 120
+  device: b200-k8s
+  optional: true
+  num_devices: 2
+  source_file_dependencies:
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  commands:
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell-ep.txt
+
 - label: LM Eval Qwen3.5 Models (B200)
+  key: lm-eval-qwen3-5-models-b200
   timeout_in_minutes: 120
-  device: b200
+  device: b200-k8s
   optional: true
   num_devices: 2
   source_file_dependencies:
@@ -62,6 +79,7 @@ steps:
   - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-qwen35-blackwell.txt
 
 - label: LM Eval Large Models (H200)
+  key: lm-eval-large-models-h200
   timeout_in_minutes: 60
   device: h200
   optional: true
@@ -70,6 +88,7 @@ steps:
     - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-h200.txt
 
 - label: MoE Refactor Integration Test (H100 - TEMPORARY)
+  key: moe-refactor-integration-test-h100-temporary
   device: h100
   optional: true
   num_devices: 2
@@ -77,21 +96,36 @@ steps:
     - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/moe-refactor/config-h100.txt
   
 - label: MoE Refactor Integration Test (B200 - TEMPORARY)
-  device: b200
+  key: moe-refactor-integration-test-b200-temporary
+  device: b200-k8s
   optional: true
   num_devices: 2
   commands:
     - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/moe-refactor/config-b200.txt
 
 - label: MoE Refactor Integration Test (B200 DP - TEMPORARY)
-  device: b200
+  key: moe-refactor-integration-test-b200-dp-temporary
+  device: b200-k8s
   optional: true
   num_devices: 2
   commands:
     - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/moe-refactor-dp-ep/config-b200.txt
 
 
+- label: LM Eval TurboQuant KV Cache
+  key: lm-eval-turboquant-kv-cache
+  timeout_in_minutes: 75
+  device: h200_18gb
+  source_file_dependencies:
+  - vllm/model_executor/layers/quantization/turboquant/
+  - vllm/v1/attention/backends/turboquant_attn.py
+  - vllm/v1/attention/ops/triton_turboquant_decode.py
+  - vllm/v1/attention/ops/triton_turboquant_store.py
+  commands:
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=evals/gsm8k/configs/models-turboquant.txt
+
 - label: GPQA Eval (GPT-OSS) (H100)
+  key: gpqa-eval-gpt-oss-h100
   timeout_in_minutes: 120
   device: h100
   optional: true
@@ -105,8 +139,9 @@ steps:
     - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-h100.txt
 
 - label: GPQA Eval (GPT-OSS) (B200)
+  key: gpqa-eval-gpt-oss-b200
   timeout_in_minutes: 120
-  device: b200
+  device: b200-k8s
   optional: true
   num_devices: 2
   source_file_dependencies:
@@ -116,3 +151,11 @@ steps:
   commands:
     - uv pip install --system 'gpt-oss[eval]==0.0.5'
     - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-b200.txt
+
+- label: MRCR Eval Small Models
+  device: h200_35gb
+  timeout_in_minutes: 30
+  source_file_dependencies:
+  - tests/evals/mrcr/
+  commands:
+    - pytest -s -v evals/mrcr/test_mrcr_correctness.py --config-list-file=evals/mrcr/configs/models-small.txt
diff --git a/.buildkite/test_areas/lora.yaml b/.buildkite/test_areas/lora.yaml
index 21f392ff737b..3ccf92f9a7ad 100644
--- a/.buildkite/test_areas/lora.yaml
+++ b/.buildkite/test_areas/lora.yaml
@@ -3,20 +3,24 @@ depends_on:
   - image-build
 steps:
 - label: LoRA %N
+  device: h200_35gb
+  key: lora
   timeout_in_minutes: 30
   source_file_dependencies:
   - vllm/lora
   - tests/lora
   commands:
-    - pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_llm_with_multi_loras.py --ignore=lora/test_olmoe_tp.py --ignore=lora/test_deepseekv2_tp.py --ignore=lora/test_gptoss_tp.py --ignore=lora/test_qwen3moe_tp.py --ignore=lora/test_qwen35_densemodel_lora.py 
+    - pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_qwen3_with_multi_loras.py --ignore=lora/test_olmoe_tp.py --ignore=lora/test_deepseekv2_tp.py --ignore=lora/test_gptoss_tp.py --ignore=lora/test_qwen3moe_tp.py --ignore=lora/test_qwen35_densemodel_lora.py 
   parallelism: 4
 
 
 - label: LoRA TP (Distributed)
+  key: lora-tp-distributed
   timeout_in_minutes: 30
   num_devices: 4
   source_file_dependencies:
   - vllm/lora
+  - vllm/model_executor/layers/fused_moe/
   - tests/lora
   commands:
     # FIXIT: find out which code initialize cuda before running the test
@@ -28,7 +32,7 @@ steps:
     # requires multi-GPU testing for validation.
     - pytest -v -s -x lora/test_chatglm3_tp.py
     - pytest -v -s -x lora/test_llama_tp.py
-    - pytest -v -s -x lora/test_llm_with_multi_loras.py
+    - pytest -v -s -x lora/test_qwen3_with_multi_loras.py
     - pytest -v -s -x lora/test_olmoe_tp.py
     - pytest -v -s -x lora/test_gptoss_tp.py
     - pytest -v -s -x lora/test_qwen35_densemodel_lora.py
\ No newline at end of file
diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml
index 5c21e1a7961c..72660bc6b0fc 100644
--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -3,24 +3,41 @@ depends_on:
   - image-build
 steps:
 - label: V1 Spec Decode
+  device: h200_35gb
+  key: v1-spec-decode
   timeout_in_minutes: 30
   source_file_dependencies:
-    - vllm/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/inputs/
+    - vllm/model_executor/
+    - vllm/platforms/
+    - vllm/sampling_params.py
+    - vllm/transformers_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/v1/spec_decode
   commands:
     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
     # TODO: create another `optional` test group for slow tests
     - pytest -v -s -m 'not slow_test' v1/spec_decode
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
 
 - label: V1 Sample + Logits
+  key: v1-sample-logits
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
-    - vllm/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/inputs/
+    - vllm/logger.py
+    - vllm/model_executor/
+    - vllm/platforms/
+    - vllm/sampling_params.py
+    - vllm/transformers_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/v1/sample
     - tests/v1/logits_processors
     - tests/v1/test_oracle.py
@@ -35,17 +52,35 @@ steps:
     - pytest -v -s v1/test_outputs.py
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
       depends_on:
       - image-build-amd
 
 - label: V1 Core + KV + Metrics
+  key: v1-core-kv-metrics
   timeout_in_minutes: 30
   source_file_dependencies:
-    - vllm/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/entrypoints/pooling/
+    - vllm/inputs/
+    - vllm/lora/
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/outputs.py
+    - vllm/platforms/
+    - vllm/pooling_params.py
+    - vllm/profiler/
+    - vllm/sampling_params.py
+    - vllm/tokenizers/
+    - vllm/transformers_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/v1/core
     - tests/v1/executor
     - tests/v1/kv_offload
+    - tests/v1/simple_kv_offload
     - tests/v1/worker
     - tests/v1/kv_connector/unit
     - tests/v1/metrics
@@ -57,23 +92,34 @@ steps:
     - pytest -v -s -m 'not cpu_test' v1/core
     - pytest -v -s v1/executor
     - pytest -v -s v1/kv_offload
+    - pytest -v -s v1/simple_kv_offload
     - pytest -v -s v1/worker
     - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
     - pytest -v -s -m 'not cpu_test' v1/metrics
     # Integration test for streaming correctness (requires special branch).
     - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
     - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
 
 - label: V1 Others (CPU)
+  key: v1-others-cpu
   depends_on:
     - image-build-cpu
   source_file_dependencies:
-    - vllm/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/inputs/
+    - vllm/lora/
+    - vllm/multimodal/
+    - vllm/outputs.py
+    - vllm/platforms/
+    - vllm/pooling_params.py
+    - vllm/profiler/
+    - vllm/sampling_params.py
+    - vllm/tokenizers/
+    - vllm/transformers_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/v1
   device: cpu-small
   commands:
@@ -84,10 +130,35 @@ steps:
     - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
     - pytest -v -s -m 'cpu_test' v1/metrics
 
+- label: Extract Hidden States Integration
+  key: extract-hidden-states-integration
+  timeout_in_minutes: 20
+  device: h200_18gb
+  source_file_dependencies:
+    - vllm/v1/spec_decode/extract_hidden_states.py
+    - vllm/model_executor/models/extract_hidden_states.py
+    - vllm/transformers_utils/configs/extract_hidden_states.py
+    - tests/v1/kv_connector/extract_hidden_states_integration
+  commands:
+    - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+    - pytest -v -s v1/kv_connector/extract_hidden_states_integration
+
 - label: Regression
+  key: regression
   timeout_in_minutes: 20
+  device: h200_18gb
   source_file_dependencies:
-  - vllm/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/test_regression
   commands:
   - pip install modelscope
@@ -95,6 +166,8 @@ steps:
   working_dir: "/vllm-workspace/tests" # optional
 
 - label: Examples
+  device: h200_35gb
+  key: examples
   timeout_in_minutes: 45
   working_dir: "/vllm-workspace/examples"
   source_file_dependencies:
@@ -111,25 +184,37 @@ steps:
     - python3 basic/offline_inference/embed.py
     - python3 basic/offline_inference/score.py
     # for multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
      # for pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # for features demo
-    - python3 offline_inference/prefix_caching.py
-    - python3 offline_inference/llm_engine_example.py
-    - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    - python3 features/automatic_prefix_caching/prefix_caching_offline.py
+    - python3 deployment/llm_engine_example.py
+    - python3 features/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 features/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
     # https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
-    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
 
 - label: Metrics, Tracing (2 GPUs)
+  key: metrics-tracing-2-gpus
   timeout_in_minutes: 20
   num_devices: 2
   source_file_dependencies:
-  - vllm/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/tracing/
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/v1/tracing
   commands:
   - "pip install \
@@ -140,8 +225,11 @@ steps:
   - pytest -v -s v1/tracing
 
 - label: Python-only Installation
+  key: python-only-installation
   depends_on: ~
+  optional: true
   timeout_in_minutes: 20
+  device: h200_18gb
   source_file_dependencies:
   - tests/standalone_tests/python_only_compile.sh
   - setup.py
@@ -149,9 +237,23 @@ steps:
   - bash standalone_tests/python_only_compile.sh
 
 - label: Async Engine, Inputs, Utils, Worker
+  device: h200_35gb
+  key: async-engine-inputs-utils-worker
   timeout_in_minutes: 50
   source_file_dependencies:
-  - vllm/
+  - vllm/assets/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/inputs/
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/sampling_params.py
+  - vllm/tokenizers/
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/detokenizer
   - tests/multimodal
   - tests/utils_
@@ -161,11 +263,35 @@ steps:
   - pytest -v -s utils_
 
 - label: Async Engine, Inputs, Utils, Worker, Config (CPU)
-  depends_on: 
+  key: async-engine-inputs-utils-worker-config-cpu
+  depends_on:
   - image-build-cpu
   timeout_in_minutes: 30
   source_file_dependencies:
-  - vllm/
+  - vllm/assets/
+  - vllm/config/
+  - vllm/engine/arg_utils.py
+  - vllm/entrypoints/chat_utils.py
+  - vllm/entrypoints/mcp/
+  - vllm/entrypoints/openai/chat_completion/protocol.py
+  - vllm/entrypoints/openai/engine/protocol.py
+  - vllm/envs.py
+  - vllm/exceptions.py
+  - vllm/inputs/
+  - vllm/model_executor/layers/quantization/quark/
+  - vllm/multimodal/
+  - vllm/outputs.py
+  - vllm/platforms/
+  - vllm/pooling_params.py
+  - vllm/ray/
+  - vllm/reasoning/
+  - vllm/renderers/
+  - vllm/sampling_params.py
+  - vllm/tokenizers/
+  - vllm/tool_parsers/
+  - vllm/transformers_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/test_inputs.py
   - tests/test_outputs.py
   - tests/test_pooling_params.py
@@ -174,7 +300,9 @@ steps:
   - tests/renderers
   - tests/standalone_tests/lazy_imports.py
   - tests/tokenizers_
+  - tests/reasoning
   - tests/tool_parsers
+  - tests/parser
   - tests/transformers_utils
   - tests/config
   device: cpu-small
@@ -187,11 +315,28 @@ steps:
   - pytest -v -s -m 'cpu_test' multimodal
   - pytest -v -s renderers
   - pytest -v -s tokenizers_
+  - pytest -v -s reasoning --ignore=reasoning/test_seedoss_reasoning_parser.py --ignore=reasoning/test_glm4_moe_reasoning_parser.py
   - pytest -v -s tool_parsers
+  - pytest -v -s parser
   - pytest -v -s transformers_utils
   - pytest -v -s config
 
+- label: Batch Invariance (A100)
+  key: batch-invariance-a100
+  timeout_in_minutes: 30
+  device: a100
+  source_file_dependencies:
+    - vllm/v1/attention
+    - vllm/model_executor/layers
+    - tests/v1/determinism/
+  commands:
+    - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+    - pip install pytest-timeout pytest-forked
+    - pytest -v -s v1/determinism/test_batch_invariance.py
+    - VLLM_TEST_MODEL=deepseek-ai/DeepSeek-V2-Lite-Chat pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[TRITON_MLA]
+
 - label: Batch Invariance (H100)
+  key: batch-invariance-h100
   timeout_in_minutes: 30
   device: h100
   source_file_dependencies:
@@ -207,8 +352,9 @@ steps:
     - VLLM_TEST_MODEL=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[FLASH_ATTN]
 
 - label: Batch Invariance (B200)
+  key: batch-invariance-b200
   timeout_in_minutes: 30
-  device: b200
+  device: b200-k8s
   source_file_dependencies:
     - vllm/v1/attention
     - vllm/model_executor/layers
@@ -220,8 +366,12 @@ steps:
     - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
     - VLLM_TEST_MODEL=deepseek-ai/DeepSeek-V2-Lite-Chat pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[TRITON_MLA]
     - VLLM_TEST_MODEL=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[FLASH_ATTN]
+    - pytest -v -s v1/determinism/test_nvfp4_batch_invariant.py
+    - pytest -v -s v1/determinism/test_nvfp4_batch_invariant_scaled_mm.py
   
 - label: Acceptance Length Test (Large Models) # optional
+  device: h200_35gb
+  key: acceptance-length-test-large-models
   timeout_in_minutes: 25
   gpu: h100
   optional: true
diff --git a/.buildkite/test_areas/model_executor.yaml b/.buildkite/test_areas/model_executor.yaml
index 212abfdbb906..c41ef8a7110d 100644
--- a/.buildkite/test_areas/model_executor.yaml
+++ b/.buildkite/test_areas/model_executor.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Model Executor
+  key: model-executor
   timeout_in_minutes: 35
   source_file_dependencies:
   - vllm/engine/arg_utils.py
diff --git a/.buildkite/test_areas/model_runner_v2.yaml b/.buildkite/test_areas/model_runner_v2.yaml
index b39b00d0c2ef..2964762b346c 100644
--- a/.buildkite/test_areas/model_runner_v2.yaml
+++ b/.buildkite/test_areas/model_runner_v2.yaml
@@ -3,6 +3,8 @@ depends_on:
   - image-build
 steps:
 - label: Model Runner V2 Core Tests
+  device: h200_35gb
+  key: model-runner-v2-core-tests
   timeout_in_minutes: 45
   source_file_dependencies:
   - vllm/v1/worker/gpu/
@@ -25,16 +27,19 @@ steps:
   - pytest -v -s entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
 
 - label: Model Runner V2 Examples
+  device: h200_35gb
+  key: model-runner-v2-examples
   timeout_in_minutes: 45
   working_dir: "/vllm-workspace/examples"
   source_file_dependencies:
     - vllm/v1/worker/gpu/
     - vllm/v1/core/sched/
     - vllm/v1/worker/gpu_worker.py
-    - examples/offline_inference/
     - examples/basic/offline_inference/
+    - examples/generate/multimodal/
+    - examples/features/
     - examples/pooling/embed/vision_embedding_offline.py
-    - examples/others/tensorize_vllm_model.py
+    - examples/features/tensorize_vllm_model.py
   commands:
     - set -x
     - export VLLM_USE_V2_MODEL_RUNNER=1
@@ -44,21 +49,22 @@ steps:
     #- python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10  # TODO
     #- python3 basic/offline_inference/embed.py   # TODO
     # for multi-modal models
-    - python3 offline_inference/audio_language.py --seed 0
-    - python3 offline_inference/vision_language.py --seed 0
-    - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
+    - python3 generate/multimodal/audio_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_offline.py --seed 0
+    - python3 generate/multimodal/vision_language_multi_image_offline.py --seed 0
+    - python3 generate/multimodal/encoder_decoder_multimodal_offline.py --model-type whisper --seed 0
     # for pooling models
     - python3 pooling/embed/vision_embedding_offline.py --seed 0
     # for features demo
-    - python3 offline_inference/prefix_caching.py
-    - python3 offline_inference/llm_engine_example.py
-    - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
+    - python3 features/automatic_prefix_caching/prefix_caching_offline.py
+    - python3 deployment/llm_engine_example.py
+    - python3 features/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 features/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
     # https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
-    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+    - python3 features/speculative_decoding/spec_decode_offline.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
 
 - label: Model Runner V2 Distributed (2 GPUs)
+  key: model-runner-v2-distributed-2-gpus
   timeout_in_minutes: 45
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -78,8 +84,8 @@ steps:
     - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py -k "not ray"
     - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
 
-# These require fix https://github.com/vllm-project/vllm/pull/36280
 - label: Model Runner V2 Pipeline Parallelism (4 GPUs)
+  key: model-runner-v2-pipeline-parallelism-4-gpus
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/tests"
   num_devices: 4
@@ -95,17 +101,20 @@ steps:
     - pytest -v -s distributed/test_pp_cudagraph.py -k "not ray"
 
 - label: Model Runner V2 Spec Decode
+  device: h200_35gb
+  key: model-runner-v2-spec-decode
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/v1/worker/gpu/
   - vllm/v1/worker/gpu_worker.py
   - tests/v1/spec_decode/test_max_len.py
-  - tests/v1/spec_decode/test_synthetic_rejection_sampler_utils.py
+  - tests/v1/spec_decode/test_rejection_sampler_utils.py
   - tests/v1/e2e/spec_decode/test_spec_decode.py
   commands:
   - set -x
   - export VLLM_USE_V2_MODEL_RUNNER=1
   - pytest -v -s v1/spec_decode/test_max_len.py -k "eagle or mtp"
+  - pytest -v -s v1/spec_decode/test_rejection_sampler_utils.py
   - pytest -v -s v1/spec_decode/test_synthetic_rejection_sampler_utils.py
   - pytest -v -s v1/e2e/spec_decode/test_spec_decode.py -k "eagle or mtp"
diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml
index f4e14ff4a94f..ef4638c6c4f4 100644
--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -1,9 +1,11 @@
 group: Models - Basic
-depends_on: 
+depends_on:
   - image-build
 steps:
 - label: Basic Models Tests (Initialization)
+  key: basic-models-tests-initialization
   timeout_in_minutes: 45
+  device: h200_18gb
   torch_nightly: true
   source_file_dependencies:
   - vllm/
@@ -12,10 +14,13 @@ steps:
   commands:
     # Run a subset of model initialization tests
     - pytest -v -s models/test_initialization.py::test_can_initialize_small_subset
+  mirror:
+    torch_nightly: {}
 
 - label: Basic Models Tests (Extra Initialization) %N
+  device: h200_35gb
+  key: basic-models-tests-extra-initialization
   timeout_in_minutes: 45
-  torch_nightly: true
   source_file_dependencies:
   - vllm/model_executor/models/
   - tests/models/test_initialization.py
@@ -26,8 +31,12 @@ steps:
     # test.) Also run if model initialization test file is modified
     - pytest -v -s models/test_initialization.py -k 'not test_can_initialize_small_subset' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
   parallelism: 2
+  mirror:
+    torch_nightly: {}
 
 - label: Basic Models Tests (Other)
+  device: h200_35gb
+  key: basic-models-tests-other
   timeout_in_minutes: 45
   source_file_dependencies:
   - vllm/
@@ -36,15 +45,10 @@ steps:
   - tests/models/test_registry.py
   commands:
     - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
-    
 
 - label: Basic Models Test (Other CPU) # 5min
-  depends_on: 
+  key: basic-models-test-other-cpu
+  depends_on:
   - image-build-cpu
   timeout_in_minutes: 10
   source_file_dependencies:
@@ -56,6 +60,8 @@ steps:
     - pytest -v -s models/test_utils.py models/test_vision.py
 
 - label: Transformers Nightly Models
+  device: h200_35gb
+  key: transformers-nightly-models
   working_dir: "/vllm-workspace/"
   optional: true
   soft_fail: true
@@ -66,6 +72,23 @@ steps:
     - pytest -v -s tests/models/multimodal/processing/
     - pytest -v -s tests/models/multimodal/test_mapping.py
     - python3 examples/basic/offline_inference/chat.py
-    - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
+    - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
+    # Whisper needs spawn method to avoid deadlock
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
+
+- label: Transformers Backward Compatibility Models Test
+  device: h200_35gb
+  key: transformers-backward-compatibility-models-test
+  working_dir: "/vllm-workspace/"
+  optional: true
+  soft_fail: true
+  commands:
+    - pip install transformers==4.57.5
+    - pytest -v -s tests/models/test_initialization.py
+    - pytest -v -s tests/models/test_transformers.py
+    - pytest -v -s tests/models/multimodal/processing/
+    - pytest -v -s tests/models/multimodal/test_mapping.py
+    - python3 examples/basic/offline_inference/chat.py
+    - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
     # Whisper needs spawn method to avoid deadlock
-    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
+    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/generate/multimodal/audio_language_offline.py --model-type whisper
diff --git a/.buildkite/test_areas/models_distributed.yaml b/.buildkite/test_areas/models_distributed.yaml
index 9df1bf830c19..b5758c55affa 100644
--- a/.buildkite/test_areas/models_distributed.yaml
+++ b/.buildkite/test_areas/models_distributed.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Distributed Model Tests (2 GPUs)
+  key: distributed-model-tests-2-gpus
   timeout_in_minutes: 50
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
@@ -18,5 +19,6 @@ steps:
   # Avoid importing model tests that cause CUDA reinitialization error
   - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
   - pytest models/language -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
+  - pytest models/multimodal/generation/test_phi4siglip.py -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/generation/test_phi4siglip.py
   - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml
index a3bd21ccff3c..5d357d60f8ae 100644
--- a/.buildkite/test_areas/models_language.yaml
+++ b/.buildkite/test_areas/models_language.yaml
@@ -1,10 +1,11 @@
 group: Models - Language
-depends_on: 
+depends_on:
   - image-build
 steps:
 - label: Language Models Tests (Standard)
+  key: language-models-tests-standard
   timeout_in_minutes: 25
-  torch_nightly: true
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/language
@@ -12,10 +13,12 @@ steps:
     # Test standard language models, excluding a subset of slow tests
     - pip freeze | grep -E 'torch'
     - pytest -v -s models/language -m 'core_model and (not slow_test)'
+  mirror:
+    torch_nightly: {}
 
 - label: Language Models Tests (Extra Standard) %N
+  key: language-models-tests-extra-standard
   timeout_in_minutes: 45
-  torch_nightly: true
   source_file_dependencies:
   - vllm/model_executor/models/
   - tests/models/language/pooling/test_embedding.py
@@ -27,10 +30,12 @@ steps:
     - pip freeze | grep -E 'torch'
     - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
   parallelism: 2
+  mirror:
+    torch_nightly: {}
 
 - label: Language Models Tests (Hybrid) %N
+  key: language-models-tests-hybrid
   timeout_in_minutes: 75
-  torch_nightly: true
   source_file_dependencies:
   - vllm/
   - tests/models/language/generation
@@ -38,12 +43,24 @@ steps:
     # Install fast path packages for testing against transformers
     # Note: also needed to run plamo2 model in vLLM
     - uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
-    - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
+    - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
     # Shard hybrid language model tests
     - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
   parallelism: 2
+  mirror:
+    torch_nightly: {}
+    amd:
+      device: mi300_1
+      depends_on:
+      - image-build-amd
+      commands:
+      - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
+      - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
+      - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
 
 - label: Language Models Test (Extended Generation) # 80min
+  device: h200_35gb
+  key: language-models-test-extended-generation
   timeout_in_minutes: 110
   optional: true
   source_file_dependencies:
@@ -53,20 +70,13 @@ steps:
     # Install fast path packages for testing against transformers
     # Note: also needed to run plamo2 model in vLLM
     - uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
-    - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
+    - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
     - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
-      commands:
-      - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
-      - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
-      - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
 
 - label: Language Models Test (PPL)
+  key: language-models-test-ppl
   timeout_in_minutes: 110
+  device: h200_18gb
   optional: true
   source_file_dependencies:
   - vllm/
@@ -75,6 +85,8 @@ steps:
     - pytest -v -s models/language/generation_ppl_test
 
 - label: Language Models Test (Extended Pooling)  # 36min
+  device: h200_35gb
+  key: language-models-test-extended-pooling
   timeout_in_minutes: 50
   optional: true
   source_file_dependencies:
@@ -84,12 +96,15 @@ steps:
     - pytest -v -s models/language/pooling -m 'not core_model'
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
+      timeout_in_minutes: 100
       depends_on:
       - image-build-amd
 
 - label: Language Models Test (MTEB)
+  key: language-models-test-mteb
   timeout_in_minutes: 110
+  device: h200_18gb
   optional: true
   source_file_dependencies:
   - vllm/
diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml
index a2bf550dfcdf..9540a540a2f6 100644
--- a/.buildkite/test_areas/models_multimodal.yaml
+++ b/.buildkite/test_areas/models_multimodal.yaml
@@ -3,7 +3,9 @@ depends_on:
   - image-build
 steps:
 - label: "Multi-Modal Models (Standard) 1: qwen2"
+  key: multi-modal-models-standard-1-qwen2
   timeout_in_minutes: 45
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -13,12 +15,14 @@ steps:
     - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
       depends_on:
       - image-build-amd
 
 - label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
+  key: multi-modal-models-standard-2-qwen3-gemma
   timeout_in_minutes: 45
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -26,13 +30,16 @@ steps:
     - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
     - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
     - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
+    - pytest -v -s models/multimodal/generation/test_vit_cudagraph.py -m core_model
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
       depends_on:
       - image-build-amd
 
 - label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl"
+  device: h200_35gb
+  key: multi-modal-models-standard-3-llava-qwen2-vl
   timeout_in_minutes: 45
   source_file_dependencies:
   - vllm/
@@ -43,27 +50,26 @@ steps:
     - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
       depends_on:
       - image-build-amd
 
 - label: "Multi-Modal Models (Standard) 4: other + whisper"
+  device: h200_35gb
+  key: multi-modal-models-standard-4-other-whisper
   timeout_in_minutes: 45
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
   commands:
     - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
+    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py  --ignore models/multimodal/generation/test_memory_leak.py --ignore models/multimodal/processing
+    - pytest models/multimodal/generation/test_memory_leak.py -m core_model
     - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
-  mirror:
-    amd:
-      device: mi325_1
-      depends_on:
-      - image-build-amd
 
 - label: Multi-Modal Processor (CPU)
-  depends_on: 
+  key: multi-modal-processor-cpu
+  depends_on:
   - image-build-cpu
   timeout_in_minutes: 60
   source_file_dependencies:
@@ -76,7 +82,9 @@ steps:
     - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
 
 - label: Multi-Modal Processor # 44min
+  key: multi-modal-processor
   timeout_in_minutes: 60
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -86,6 +94,8 @@ steps:
     - pytest -v -s models/multimodal/processing/test_tensor_schema.py
 
 - label: Multi-Modal Accuracy Eval (Small Models) # 50min
+  device: h200_35gb
+  key: multi-modal-accuracy-eval-small-models
   timeout_in_minutes: 70
   working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
   source_file_dependencies:
@@ -96,6 +106,7 @@ steps:
   - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1
 
 - label: Multi-Modal Models (Extended Generation 1)
+  key: multi-modal-models-extended-generation-1
   optional: true
   source_file_dependencies:
   - vllm/
@@ -107,11 +118,13 @@ steps:
     - pytest -v -s models/multimodal/test_mapping.py
   mirror:
     amd:
-      device: mi325_1
+      device: mi300_1
       depends_on:
       - image-build-amd
 
 - label: Multi-Modal Models (Extended Generation 2)
+  device: h200_35gb
+  key: multi-modal-models-extended-generation-2
   optional: true
   source_file_dependencies:
   - vllm/
@@ -121,6 +134,8 @@ steps:
     - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
 
 - label: Multi-Modal Models (Extended Generation 3)
+  device: h200_35gb
+  key: multi-modal-models-extended-generation-3
   optional: true
   source_file_dependencies:
   - vllm/
@@ -130,7 +145,9 @@ steps:
     - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
 
 - label: Multi-Modal Models (Extended Pooling)
+  key: multi-modal-models-extended-pooling
   optional: true
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal/pooling
diff --git a/.buildkite/test_areas/plugins.yaml b/.buildkite/test_areas/plugins.yaml
index 8e0eb0284019..0d23180f3ef7 100644
--- a/.buildkite/test_areas/plugins.yaml
+++ b/.buildkite/test_areas/plugins.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Plugin Tests (2 GPUs)
+  key: plugin-tests-2-gpus
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
diff --git a/.buildkite/test_areas/pytorch.yaml b/.buildkite/test_areas/pytorch.yaml
index f9968e9a897b..6866d5e3695d 100644
--- a/.buildkite/test_areas/pytorch.yaml
+++ b/.buildkite/test_areas/pytorch.yaml
@@ -3,9 +3,34 @@ depends_on:
   - image-build
 steps:
 - label: PyTorch Compilation Unit Tests
+  device: h200_35gb
+  key: pytorch-compilation-unit-tests
   timeout_in_minutes: 10
   source_file_dependencies:
-    - vllm/
+    - vllm/__init__.py
+    - vllm/_aiter_ops.py
+    - vllm/_custom_ops.py
+    - vllm/compilation/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/env_override.py
+    - vllm/envs.py
+    - vllm/forward_context.py
+    - vllm/inputs/
+    - vllm/ir/
+    - vllm/kernels/
+    - vllm/logger.py
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/platforms/
+    - vllm/plugins/
+    - vllm/sampling_params.py
+    - vllm/sequence.py
+    - vllm/transformers_utils/
+    - vllm/triton_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/compile
   commands:
   # Run unit tests defined directly under compile/,
@@ -18,27 +43,99 @@ steps:
   - "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 
 - label: PyTorch Compilation Unit Tests (H100)
+  key: pytorch-compilation-unit-tests-h100
   timeout_in_minutes: 30
   device: h100
   num_devices: 1
   source_file_dependencies:
-    - vllm/
+    - vllm/__init__.py
+    - vllm/_aiter_ops.py
+    - vllm/_custom_ops.py
+    - vllm/compilation/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/env_override.py
+    - vllm/envs.py
+    - vllm/forward_context.py
+    - vllm/inputs/
+    - vllm/ir/
+    - vllm/kernels/
+    - vllm/logger.py
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/platforms/
+    - vllm/plugins/
+    - vllm/sampling_params.py
+    - vllm/sequence.py
+    - vllm/transformers_utils/
+    - vllm/triton_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/compile/h100/
   commands:
   - "find compile/h100/ -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 
 - label: PyTorch Compilation Passes Unit Tests
+  key: pytorch-compilation-passes-unit-tests
   timeout_in_minutes: 20
   source_file_dependencies:
-    - vllm/
+    - vllm/__init__.py
+    - vllm/_aiter_ops.py
+    - vllm/_custom_ops.py
+    - vllm/compilation/
+    - vllm/config/
+    - vllm/distributed/
+    - vllm/engine/
+    - vllm/env_override.py
+    - vllm/envs.py
+    - vllm/forward_context.py
+    - vllm/inputs/
+    - vllm/ir/
+    - vllm/kernels/
+    - vllm/logger.py
+    - vllm/model_executor/
+    - vllm/multimodal/
+    - vllm/platforms/
+    - vllm/plugins/
+    - vllm/sampling_params.py
+    - vllm/sequence.py
+    - vllm/transformers_utils/
+    - vllm/triton_utils/
+    - vllm/utils/
+    - vllm/v1/
     - tests/compile/passes
   commands:
   - pytest -s -v compile/passes --ignore compile/passes/distributed
 
 - label: PyTorch Fullgraph Smoke Test
+  key: pytorch-fullgraph-smoke-test
   timeout_in_minutes: 35
   source_file_dependencies:
-  - vllm/
+  - vllm/__init__.py
+  - vllm/_aiter_ops.py
+  - vllm/_custom_ops.py
+  - vllm/compilation/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/env_override.py
+  - vllm/envs.py
+  - vllm/forward_context.py
+  - vllm/inputs/
+  - vllm/ir/
+  - vllm/kernels/
+  - vllm/logger.py
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/plugins/
+  - vllm/sampling_params.py
+  - vllm/sequence.py
+  - vllm/transformers_utils/
+  - vllm/triton_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/compile
   commands:
   # Run smoke tests under fullgraph directory, except test_full_graph.py
@@ -48,20 +145,47 @@ steps:
   - "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 
 - label: PyTorch Fullgraph
+  key: pytorch-fullgraph
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
-  - vllm/
+  - vllm/__init__.py
+  - vllm/_aiter_ops.py
+  - vllm/_custom_ops.py
+  - vllm/compilation/
+  - vllm/config/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/env_override.py
+  - vllm/envs.py
+  - vllm/forward_context.py
+  - vllm/inputs/
+  - vllm/ir/
+  - vllm/kernels/
+  - vllm/logger.py
+  - vllm/model_executor/
+  - vllm/multimodal/
+  - vllm/platforms/
+  - vllm/plugins/
+  - vllm/sampling_params.py
+  - vllm/sequence.py
+  - vllm/transformers_utils/
+  - vllm/triton_utils/
+  - vllm/utils/
+  - vllm/v1/
   - tests/compile
   commands:
     # fp8 kv scales not supported on sm89, tested on Blackwell instead
   - pytest -v -s compile/fullgraph/test_full_graph.py -k 'not test_fp8_kv_scale_compile'
 
 - label: Pytorch Nightly Dependency Override Check # 2min
+  key: pytorch-nightly-dependency-override-check
   # if this test fails, it means the nightly torch version is not compatible with some
   # of the dependencies. Please check the error message and add the package to whitelist
   # in /vllm/tools/pre_commit/generate_nightly_torch_test.py
+  device: h200_18gb
   soft_fail: true
   source_file_dependencies:
-  - requirements/nightly_torch_test.txt
+  - requirements/test/nightly-torch.txt
   commands:
   - bash standalone_tests/pytorch_nightly_dependency.sh
diff --git a/.buildkite/test_areas/quantization.yaml b/.buildkite/test_areas/quantization.yaml
index 5ee2e5186966..8a9a36da4481 100644
--- a/.buildkite/test_areas/quantization.yaml
+++ b/.buildkite/test_areas/quantization.yaml
@@ -1,29 +1,31 @@
 group: Quantization
-depends_on: 
+depends_on:
   - image-build
 steps:
 - label: Quantization
+  key: quantization
   timeout_in_minutes: 90
   source_file_dependencies:
   - csrc/
   - vllm/model_executor/layers/quantization
   - tests/quantization
   commands:
-  # temporary install here since we need nightly, will move to requirements/test.in
+  # temporary install here since we need nightly, will move to requirements/test/cuda.in
   # after torchao 0.12 release, and pin a working version of torchao nightly here
 
   # since torchao nightly is only compatible with torch nightly currently
   # https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
   # we can only upgrade after this is resolved
   # TODO(jerryzh168): resolve the above comment
-  - uv pip install --system torchao==0.14.1 --index-url https://download.pytorch.org/whl/cu129
+  - uv pip install --system torchao==0.17.0 --index-url https://download.pytorch.org/whl/cu130
   - uv pip install --system conch-triton-kernels
   - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
 
 - label: Quantized MoE Test (B200)
+  key: quantized-moe-test-b200
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/"
-  device: b200
+  device: b200-k8s
   source_file_dependencies:
   - tests/quantization/test_blackwell_moe.py
   - vllm/model_executor/models/deepseek_v2.py
@@ -38,6 +40,7 @@ steps:
     - pytest -s -v tests/quantization/test_blackwell_moe.py
 
 - label: Quantized Models Test
+  key: quantized-models-test
   timeout_in_minutes: 60
   source_file_dependencies:
   - vllm/model_executor/layers/quantization
diff --git a/.buildkite/test_areas/ray_compat.yaml b/.buildkite/test_areas/ray_compat.yaml
index 7917b0a4ff8b..9207621a5830 100644
--- a/.buildkite/test_areas/ray_compat.yaml
+++ b/.buildkite/test_areas/ray_compat.yaml
@@ -3,10 +3,12 @@ depends_on:
   - image-build
 steps:
 - label: Ray Dependency Compatibility Check
+  key: ray-dependency-compatibility-check
   # Informational only — does not block the pipeline.
   # If this fails, it means the PR introduces a dependency that
   # conflicts with Ray's dependency constraints.
   # See https://github.com/vllm-project/vllm/issues/33599
+  device: h200_18gb
   soft_fail: true
   timeout_in_minutes: 10
   source_file_dependencies:
diff --git a/.buildkite/test_areas/rust_frontend.yaml b/.buildkite/test_areas/rust_frontend.yaml
new file mode 100644
index 000000000000..f750d58be586
--- /dev/null
+++ b/.buildkite/test_areas/rust_frontend.yaml
@@ -0,0 +1,107 @@
+group: Rust Frontend E2E
+depends_on:
+  - image-build
+steps:
+- label: Rust Frontend OpenAI Coverage
+  timeout_in_minutes: 90
+  device: h200_18gb
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - rust/
+  - vllm/benchmarks/
+  - vllm/entrypoints/openai/
+  - vllm/entrypoints/serve/
+  - vllm/v1/sample/
+  - tests/utils.py
+  - tests/benchmarks/test_serve_cli.py
+  - tests/entrypoints/openai/chat_completion/test_chat_completion.py
+  # - tests/entrypoints/openai/chat_completion/test_chat_logit_bias_validation.py
+  # - tests/entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py
+  # - tests/entrypoints/openai/completion/test_prompt_validation.py
+  - tests/entrypoints/openai/completion/test_shutdown.py
+  # - tests/entrypoints/openai/test_return_token_ids.py
+  # - tests/entrypoints/openai/test_uds.py
+  - tests/v1/sample/test_logprobs_e2e.py
+  commands:
+  - export VLLM_USE_RUST_FRONTEND=1
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s benchmarks/test_serve_cli.py -k "not insecure and not (test_bench_serve and not test_bench_serve_chat)"
+  - pytest -v -s entrypoints/openai/chat_completion/test_chat_completion.py
+  # - pytest -v -s entrypoints/openai/chat_completion/test_chat_logit_bias_validation.py -k "not invalid"
+  # - pytest -v -s entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py
+  # - pytest -v -s entrypoints/openai/completion/test_prompt_validation.py -k "not prompt_embeds"
+  - pytest -v -s entrypoints/openai/completion/test_shutdown.py -k "not engine_failure and not test_abort_timeout_exits_quickly"
+  # - pytest -v -s entrypoints/openai/test_return_token_ids.py
+  # - pytest -v -s entrypoints/openai/test_uds.py
+  - pytest -v -s v1/sample/test_logprobs_e2e.py -k "test_prompt_logprobs_e2e_server"
+
+- label: Rust Frontend Serve/Admin Coverage
+  timeout_in_minutes: 60
+  device: h200_18gb
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - rust/
+  - vllm/entrypoints/openai/
+  - vllm/entrypoints/serve/
+  - vllm/v1/engine/
+  - tests/utils.py
+  # - tests/entrypoints/rpc/test_collective_rpc.py
+  - tests/entrypoints/serve/disagg/test_serving_tokens.py
+  - tests/entrypoints/serve/instrumentator/test_basic.py
+  - tests/entrypoints/serve/instrumentator/test_metrics.py
+  # - tests/entrypoints/serve/instrumentator/test_sleep.py
+  commands:
+  - export VLLM_USE_RUST_FRONTEND=1
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  # - pytest -v -s entrypoints/rpc/test_collective_rpc.py
+  - pytest -v -s entrypoints/serve/instrumentator/test_basic.py -k "not show_version and not server_load"
+  - pytest -v -s entrypoints/serve/disagg/test_serving_tokens.py -k "not stream and not lora and not test_generate_logprobs and not stop_string_workflow"
+  - pytest -v -s entrypoints/serve/instrumentator/test_metrics.py -k "text and not show and not run_batch and not test_metrics_counts and not test_metrics_exist"
+  # - pytest -v -s entrypoints/serve/instrumentator/test_sleep.py
+
+- label: Rust Frontend Core Correctness
+  timeout_in_minutes: 30
+  device: h200_18gb
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - rust/
+  - vllm/entrypoints/openai/
+  - tests/utils.py
+  - tests/entrypoints/openai/correctness/test_lmeval.py
+  commands:
+  - export VLLM_USE_RUST_FRONTEND=1
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
+
+- label: Rust Frontend Tool Use
+  timeout_in_minutes: 60
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - rust/
+  - vllm/entrypoints/openai/
+  - vllm/tool_parsers/
+  - tests/utils.py
+  - tests/tool_use/
+  commands:
+  - export VLLM_USE_RUST_FRONTEND=1
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s tool_use --ignore=tool_use/mistral --models llama3.2 -k "not test_response_format_with_tool_choice_required and not test_parallel_tool_calls_false and not test_tool_call_and_choice"
+
+- label: Rust Frontend Distributed
+  timeout_in_minutes: 30
+  num_devices: 4
+  working_dir: "/vllm-workspace/tests"
+  source_file_dependencies:
+  - rust/
+  - vllm/distributed/
+  - vllm/engine/
+  - vllm/executor/
+  - vllm/v1/engine/
+  - vllm/v1/worker/
+  - tests/utils.py
+  - tests/v1/distributed/test_internal_lb_dp.py
+  commands:
+  - export VLLM_USE_RUST_FRONTEND=1
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - export NCCL_CUMEM_HOST_ENABLE=0
+  - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py -k "not 4 and not server_info"
diff --git a/.buildkite/test_areas/rust_frontend_cargo.yaml b/.buildkite/test_areas/rust_frontend_cargo.yaml
new file mode 100644
index 000000000000..06f9eb9c245d
--- /dev/null
+++ b/.buildkite/test_areas/rust_frontend_cargo.yaml
@@ -0,0 +1,30 @@
+group: Rust Frontend Cargo
+depends_on: []
+steps:
+- label: Rust Frontend Cargo Style + Clippy
+  key: rust-frontend-cargo-style-clippy
+  depends_on: []
+  timeout_in_minutes: 30
+  device: cpu-medium
+  no_plugin: true
+  source_file_dependencies:
+  - rust/
+  - rust-toolchain.toml
+  - .buildkite/test_areas/rust_frontend_cargo.yaml
+  - .buildkite/scripts/run-rust-frontend-cargo-ci.sh
+  commands:
+  - .buildkite/scripts/run-rust-frontend-cargo-ci.sh style-clippy
+
+- label: Rust Frontend Cargo Tests
+  key: rust-frontend-cargo-tests
+  depends_on: []
+  timeout_in_minutes: 30
+  device: cpu-medium
+  no_plugin: true
+  source_file_dependencies:
+  - rust/
+  - rust-toolchain.toml
+  - .buildkite/test_areas/rust_frontend_cargo.yaml
+  - .buildkite/scripts/run-rust-frontend-cargo-ci.sh
+  commands:
+  - .buildkite/scripts/run-rust-frontend-cargo-ci.sh test
diff --git a/.buildkite/test_areas/samplers.yaml b/.buildkite/test_areas/samplers.yaml
index 2052a379827a..6ec6f8efd351 100644
--- a/.buildkite/test_areas/samplers.yaml
+++ b/.buildkite/test_areas/samplers.yaml
@@ -3,18 +3,23 @@ depends_on:
   - image-build
 steps:
 - label: Samplers Test
+  device: h200_35gb
+  key: samplers-test
   timeout_in_minutes: 75
   source_file_dependencies:
   - vllm/model_executor/layers
   - vllm/sampling_metadata.py
   - tests/samplers
   - tests/conftest.py
+  - vllm/entrypoints/generate/beam_search
   commands:
-    - pytest -v -s samplers
+    # VLLM_USE_FLASHINFER_SAMPLER defaults to 1 now, so we need to pin both
+    # values explicitly to still cover the PyTorch-native (Triton) path.
+    - VLLM_USE_FLASHINFER_SAMPLER=0 pytest -v -s samplers
     - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
   mirror:
     amd:
-      device: mi325_1
+      device: mi250_1
       depends_on:
       - image-build-amd
       commands:
diff --git a/.buildkite/test_areas/spec_decode.yaml b/.buildkite/test_areas/spec_decode.yaml
index 8dba7a2f8c66..54ce9ed7e117 100644
--- a/.buildkite/test_areas/spec_decode.yaml
+++ b/.buildkite/test_areas/spec_decode.yaml
@@ -3,7 +3,21 @@ depends_on:
   - image-build
 steps:
 - label: Spec Decode Eagle
+  key: spec-decode-eagle
   timeout_in_minutes: 30
+  device: h200_18gb
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
+
+- label: Spec Decode Eagle Nightly B200
+  key: spec-decode-eagle-nightly-b200
+  timeout_in_minutes: 30
+  device: b200-k8s
+  optional: true
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -12,7 +26,9 @@ steps:
     - pytest -v -s v1/e2e/spec_decode -k "eagle_correctness"
 
 - label: Spec Decode Speculators + MTP
+  key: spec-decode-speculators-mtp
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -21,8 +37,23 @@ steps:
   commands:
     - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
 
+- label: Spec Decode Speculators + MTP Nightly B200
+  key: spec-decode-speculators-mtp-nightly-b200
+  timeout_in_minutes: 30
+  device: b200-k8s
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - vllm/transformers_utils/configs/speculators/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "speculators or mtp_correctness"
+  
 - label: Spec Decode Ngram + Suffix
+  key: spec-decode-ngram-suffix
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -31,10 +62,51 @@ steps:
     - pytest -v -s v1/e2e/spec_decode -k "ngram or suffix"
 
 - label: Spec Decode Draft Model
+  key: spec-decode-draft-model
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
     - tests/v1/e2e/spec_decode/
   commands:
     - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
+
+- label: Spec Decode Draft Model Nightly B200
+  key: spec-decode-draft-model-nightly-b200
+  timeout_in_minutes: 30
+  device: b200-k8s
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "draft_model or no_sync or batch_inference"
+
+- label: Speculators Correctness
+  key: speculators-correctness
+  timeout_in_minutes: 60
+  device: h100
+  optional: true
+  num_devices: 1
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/model_executor/models/qwen3_dflash.py
+    - tests/v1/spec_decode/test_speculators_correctness.py
+  commands:
+    - export VLLM_ALLOW_INSECURE_SERIALIZATION=1
+    - pytest -v -s v1/spec_decode/test_speculators_correctness.py -m slow_test
+
+- label: Spec Decode MTP hybrid (B200)
+  timeout_in_minutes: 30
+  device: b200-k8s
+  optional: true
+  source_file_dependencies:
+    - vllm/v1/spec_decode/
+    - vllm/v1/worker/gpu/spec_decode/
+    - vllm/model_executor/models/qwen3_5.py
+    - vllm/model_executor/models/qwen3_5_mtp.py
+    - tests/v1/e2e/spec_decode/
+  commands:
+    - pytest -v -s v1/e2e/spec_decode -k "qwen3_5-hybrid"
diff --git a/.buildkite/test_areas/weight_loading.yaml b/.buildkite/test_areas/weight_loading.yaml
index 8e86374a8ad0..01c6bb7809bc 100644
--- a/.buildkite/test_areas/weight_loading.yaml
+++ b/.buildkite/test_areas/weight_loading.yaml
@@ -3,6 +3,7 @@ depends_on:
   - image-build
 steps:
 - label: Weight Loading Multiple GPU  # 33min
+  key: weight-loading-multiple-gpu
   timeout_in_minutes: 45
   working_dir: "/vllm-workspace/tests"
   num_devices: 2
diff --git a/.dockerignore b/.dockerignore
index 3863656915d0..66447272e95a 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,6 +2,7 @@
 /build
 dist
 vllm/*.so
+vllm/vllm-rs
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -31,3 +32,4 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+rust/target/
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index c0ceae044d25..d65c736d4ae8 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,18 +2,24 @@
 # for more info about CODEOWNERS file
 
 # This lists cover the "core" components of vLLM that require careful review
-/vllm/compilation @zou3519 @youkaichao @ProExpertProg @BoyuanFeng
-/vllm/distributed/kv_transfer @NickLucche @ApostaC @orozery
+/vllm/compilation @zou3519 @youkaichao @ProExpertProg @BoyuanFeng @vadiklyutiy
+/vllm/distributed/kv_transfer @NickLucche @ApostaC @orozery @xuechendi
 /vllm/lora @jeejeelee
 /vllm/model_executor/layers/attention @LucasWilkinson @MatthewBonanni
-/vllm/model_executor/layers/fused_moe @mgoin @pavanimajety
-/vllm/model_executor/layers/quantization @mgoin @robertgshaw2-redhat @tlrmchlsmth @yewentao256 @pavanimajety
-/vllm/model_executor/layers/mamba @tdoublep
+/vllm/model_executor/layers/fused_moe @mgoin @pavanimajety @zyongye
+/vllm/model_executor/layers/quantization @mgoin @robertgshaw2-redhat @tlrmchlsmth @yewentao256 @pavanimajety @zyongye
+/vllm/model_executor/layers/mamba @tdoublep @tomeras91
+/vllm/model_executor/layers/mamba/gdn_linear_attn.py @tdoublep @ZJY0516 @vadiklyutiy
+/vllm/model_executor/layers/rotary_embedding.py @vadiklyutiy
 /vllm/model_executor/model_loader @22quinn
 /vllm/model_executor/layers/batch_invariant.py @yewentao256 
+/vllm/ir @ProExpertProg
+/vllm/kernels/ @ProExpertProg @tjtanaa
+/vllm/kernels/helion @ProExpertProg @zou3519
 /vllm/multimodal @DarkLight1337 @ywang96 @NickLucche @tjtanaa
 /vllm/vllm_flash_attn @LucasWilkinson @MatthewBonanni
-CMakeLists.txt @tlrmchlsmth @LucasWilkinson
+/CMakeLists.txt @tlrmchlsmth @LucasWilkinson @Harry-Chen
+/cmake @tlrmchlsmth @LucasWilkinson @Harry-Chen
 
 # Any change to the VllmConfig changes can have a large user-facing impact,
 # so spam a lot of people
@@ -25,8 +31,8 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 /vllm/entrypoints/cli @hmellor @mgoin @DarkLight1337 @russellb
 /vllm/entrypoints/mcp @heheda12345
 /vllm/entrypoints/openai @aarnphm @chaunceyjiang @DarkLight1337 @russellb
-/vllm/entrypoints/openai/realtime @njhill
-/vllm/entrypoints/openai/speech_to_text @NickLucche
+/vllm/entrypoints/speech_to_text/realtime @njhill
+/vllm/entrypoints/speech_to_text @NickLucche
 /vllm/entrypoints/pooling @noooop
 /vllm/entrypoints/sagemaker @DarkLight1337
 /vllm/entrypoints/serve @njhill
@@ -39,15 +45,17 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 /vllm/pooling_params.py @noooop @DarkLight1337
 /vllm/tokenizers @DarkLight1337 @njhill
 /vllm/renderers @DarkLight1337 @njhill
-/vllm/reasoning @aarnphm @chaunceyjiang
-/vllm/tool_parsers @aarnphm @chaunceyjiang
+/vllm/reasoning @aarnphm @chaunceyjiang @sfeng33 @bbrowning
+/vllm/tool_parsers @aarnphm @chaunceyjiang @sfeng33 @bbrowning
+/vllm/parser @aarnphm @chaunceyjiang @sfeng33 @bbrowning
 
 # vLLM V1
 /vllm/v1/attention @LucasWilkinson @MatthewBonanni
 /vllm/v1/attention/backend.py @WoosukKwon @zhuohan123 @youkaichao @alexm-redhat @njhill
 /vllm/v1/attention/backends/mla @pavanimajety
-/vllm/v1/attention/backends/flashinfer.py @mgoin @pavanimajety
+/vllm/v1/attention/backends/flashinfer.py @mgoin @pavanimajety @vadiklyutiy
 /vllm/v1/attention/backends/triton_attn.py @tdoublep
+/vllm/v1/attention/backends/gdn_attn.py @ZJY0516 @vadiklyutiy
 /vllm/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
 /vllm/v1/sample @22quinn @houseroad @njhill
 /vllm/v1/spec_decode @benchislett @luccafong @MatthewBonanni
@@ -63,28 +71,36 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 /vllm/v1/worker/gpu @WoosukKwon @njhill
 /vllm/v1/worker/gpu/kv_connector.py @orozery
 
+# CI & building
+/.buildkite @Harry-Chen
+/docker/Dockerfile @Harry-Chen
+
 # Test ownership
 /.buildkite/lm-eval-harness @mgoin 
 /tests/distributed/test_multi_node_assignment.py @youkaichao
 /tests/distributed/test_pipeline_parallel.py @youkaichao
 /tests/distributed/test_same_node.py @youkaichao
 /tests/entrypoints @DarkLight1337 @robertgshaw2-redhat @aarnphm @NickLucche
-/tests/evals @mgoin
-/tests/kernels @mgoin @tlrmchlsmth @WoosukKwon @yewentao256
+/tests/evals @mgoin @vadiklyutiy
+/tests/kernels @mgoin @tlrmchlsmth @WoosukKwon @yewentao256 @zyongye
+/tests/kernels/ir @ProExpertProg @tjtanaa
 /tests/models @DarkLight1337 @ywang96
 /tests/multimodal @DarkLight1337 @ywang96 @NickLucche
-/tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety
+/tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety @zyongye
 /tests/test_inputs.py @DarkLight1337 @ywang96
 /tests/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
 /tests/v1/structured_output @mgoin @russellb @aarnphm
 /tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
 /tests/weight_loading @mgoin @youkaichao @yewentao256
 /tests/lora @jeejeelee
-/tests/models/language/generation/test_hybrid.py @tdoublep
+/tests/models/language/generation/test_hybrid.py @tdoublep @tomeras91
 /tests/v1/kv_connector/nixl_integration @NickLucche
 /tests/v1/kv_connector @ApostaC @orozery
 /tests/v1/kv_offload @ApostaC @orozery
-/tests/v1/determinism @yewentao256 
+/tests/v1/determinism @yewentao256
+/tests/reasoning @aarnphm @chaunceyjiang @sfeng33 @bbrowning
+/tests/tool_parsers @aarnphm @chaunceyjiang @sfeng33 @bbrowning
+/tests/tool_use @aarnphm @chaunceyjiang @sfeng33 @bbrowning
 
 # Transformers modeling backend
 /vllm/model_executor/models/transformers @hmellor
@@ -113,24 +129,33 @@ mkdocs.yaml @hmellor
 /tools/pre_commit @hmellor
 
 # CPU
-/vllm/v1/worker/cpu* @bigPYJ1151
+/vllm/v1/worker/cpu* @bigPYJ1151 @xuechendi
 /csrc/cpu @bigPYJ1151
-/vllm/platforms/cpu.py @bigPYJ1151
+/vllm/platforms/cpu.py @bigPYJ1151 @xuechendi
 /cmake/cpu_extension.cmake @bigPYJ1151
-/docker/Dockerfile.cpu @bigPYJ1151
+/docker/Dockerfile.cpu @bigPYJ1151 @xuechendi
 
 # Intel GPU
-/vllm/v1/worker/xpu* @jikunshang
-/vllm/platforms/xpu.py @jikunshang
-/docker/Dockerfile.xpu @jikunshang
+/vllm/v1/worker/xpu* @jikunshang @xuechendi
+/vllm/platforms/xpu.py @jikunshang @xuechendi
+/docker/Dockerfile.xpu @jikunshang @xuechendi
+
+# Nemotron-specific files
+/vllm/model_executor/models/*nemotron* @tomeras91
+/vllm/transformers_utils/configs/*nemotron* @tomeras91
+/tests/**/*nemotron* @tomeras91
 
 # Qwen-specific files
-/vllm/attention/backends/dual_chunk_flash_attn.py @sighingnow
-/vllm/model_executor/models/qwen* @sighingnow
+/vllm/model_executor/models/qwen* @sighingnow @vadiklyutiy
+/vllm/transformers_utils/configs/qwen* @sighingnow @vadiklyutiy
 
 # MTP-specific files
 /vllm/model_executor/models/deepseek_mtp.py @luccafong
 
+# DeepseekV4-specific files
+/vllm/models/deepseek_v4 @zyongye
+/vllm/model_executor/layers/sparse_attn_indexer.py @zyongye
+
 # Mistral-specific files
 /vllm/model_executor/models/mistral*.py @patrickvonplaten
 /vllm/model_executor/models/mixtral*.py @patrickvonplaten
@@ -142,15 +167,16 @@ mkdocs.yaml @hmellor
 # Kernels
 /vllm/v1/attention/ops/chunked_prefill_paged_decode.py @tdoublep
 /vllm/v1/attention/ops/triton_unified_attention.py @tdoublep
+/vllm/model_executor/layers/fla @ZJY0516 @vadiklyutiy
 
 # ROCm related: specify owner with write access to notify AMD folks for careful code review
-/vllm/**/*rocm* @tjtanaa
-/docker/Dockerfile.rocm* @gshtras @tjtanaa
-/vllm/v1/attention/backends/rocm*.py @gshtras @tjtanaa
-/vllm/v1/attention/backends/mla/rocm*.py @gshtras @tjtanaa
-/vllm/v1/attention/ops/rocm*.py @gshtras @tjtanaa
-/vllm/model_executor/layers/fused_moe/rocm*.py @gshtras @tjtanaa
-/csrc/rocm @gshtras @tjtanaa
+/vllm/**/*rocm* @tjtanaa @dllehr-amd
+/docker/Dockerfile.rocm* @tjtanaa @dllehr-amd
+/vllm/v1/attention/backends/rocm*.py @tjtanaa @dllehr-amd
+/vllm/v1/attention/backends/mla/rocm*.py @tjtanaa @dllehr-amd
+/vllm/v1/attention/ops/rocm*.py @tjtanaa @dllehr-amd
+/vllm/model_executor/layers/fused_moe/rocm*.py @tjtanaa @dllehr-amd
+/csrc/rocm @tjtanaa @dllehr-amd
 /requirements/*rocm* @tjtanaa
 /tests/**/*rocm* @tjtanaa
 /docs/**/*rocm* @tjtanaa
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 8043df65d558..8a3934670e44 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -15,7 +15,6 @@ PLEASE FILL IN THE PR DESCRIPTION HERE ENSURING ALL CHECKLIST ITEMS (AT THE BOTT
 - [ ] The test plan, such as providing test command.
 - [ ] The test results, such as pasting the results comparison before and after, or e2e results
 - [ ] (Optional) The necessary documentation update, such as updating `supported_models.md` and `examples` for a new model.
-- [ ] (Optional) Release notes update. If your change is user facing, please update the release notes draft in the [Google Doc](https://docs.google.com/document/d/1YyVqrgX4gHTtrstbq8oWUImOyPCKSGnJ7xtTpmXzlRs/edit?tab=t.0).
 </details>
 
 **BEFORE SUBMITTING, PLEASE READ <https://docs.vllm.ai/en/latest/contributing>** (anything written below this line will be removed by GitHub Actions)
diff --git a/.github/mergify.yml b/.github/mergify.yml
index e8ef4d49dd00..2d36e3507028 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -18,7 +18,7 @@ pull_request_rules:
 - name: comment-pre-commit-failure
   description: Comment on PR when pre-commit check fails
   conditions:
-    - status-failure=pre-commit
+    - check-failure=pre-commit
     - -closed
     - -draft
   actions:
@@ -51,7 +51,7 @@ pull_request_rules:
 - name: comment-dco-failure
   description: Comment on PR when DCO check fails
   conditions:
-    - status-failure=dco
+    - check-failure=dco
     - -closed
     - -draft
   actions:
@@ -83,8 +83,8 @@ pull_request_rules:
     - or:
       - files~=^examples/.*deepseek.*\.py
       - files~=^tests/.*deepseek.*\.py
-      - files~=^vllm/entrypoints/openai/tool_parsers/.*deepseek.*\.py
       - files~=^vllm/model_executor/models/.*deepseek.*\.py
+      - files~=^vllm/tool_parsers/.*deepseek.*\.py
       - files~=^vllm/reasoning/.*deepseek.*\.py
       - files~=^vllm/transformers_utils/.*deepseek.*\.py
       - title~=(?i)DeepSeek
@@ -110,9 +110,10 @@ pull_request_rules:
     - or:
       - files~=^examples/.*llama.*\.py
       - files~=^tests/.*llama.*\.py
-      - files~=^vllm/entrypoints/openai/tool_parsers/llama.*\.py
       - files~=^vllm/model_executor/models/.*llama.*\.py
-      - files~=^vllm/transformers_utils/configs/.*llama.*\.py
+      - files~=^vllm/reasoning/.*llama.*\.py
+      - files~=^vllm/tool_parsers/.*llama.*\.py
+      - files~=^vllm/transformers_utils/.*llama.*\.py
       - title~=(?i)llama
   actions:
     label:
@@ -133,6 +134,23 @@ pull_request_rules:
       add:
         - multi-modality
 
+- name: label-mistral
+  description: Automatically apply mistral label
+  conditions:
+    - label != stale
+    - or:
+      - files~=^examples/.*mistral.*\.py
+      - files~=^tests/.*mistral.*\.py
+      - files~=^vllm/model_executor/models/.*mistral.*\.py
+      - files~=^vllm/reasoning/.*mistral.*\.py
+      - files~=^vllm/tool_parsers/.*mistral.*\.py
+      - files~=^vllm/transformers_utils/.*mistral.*\.py
+      - title~=(?i)Mistral
+  actions:
+    label:
+      add:
+        - mistral
+
 - name: label-new-model
   description: Automatically apply new-model label
   conditions:
@@ -167,7 +185,9 @@ pull_request_rules:
       - files~=^examples/.*qwen.*\.py
       - files~=^tests/.*qwen.*\.py
       - files~=^vllm/model_executor/models/.*qwen.*\.py
+      - files~=^vllm/tool_parsers/.*qwen.*\.py
       - files~=^vllm/reasoning/.*qwen.*\.py
+      - files~=^vllm/transformers_utils/.*qwen.*\.py
       - title~=(?i)Qwen
   actions:
     label:
@@ -242,8 +262,9 @@ pull_request_rules:
       - files~=^docker/Dockerfile.xpu
       - files~=^\\.buildkite/intel_jobs/
       - files=\.buildkite/ci_config_intel.yaml
-      - files=vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
+      - files=vllm/model_executor/layers/fused_moe/experts/xpu_moe.py
       - files=vllm/model_executor/kernels/linear/mixed_precision/xpu.py
+      - files=vllm/model_executor/kernels/linear/mxfp8/xpu.py
       - files=vllm/model_executor/kernels/linear/scaled_mm/xpu.py
       - files=vllm/distributed/device_communicators/xpu_communicator.py
       - files=vllm/v1/attention/backends/mla/xpu_mla_sparse.py
@@ -251,6 +272,7 @@ pull_request_rules:
       - files=vllm/v1/worker/xpu_worker.py
       - files=vllm/v1/worker/xpu_model_runner.py
       - files=vllm/_xpu_ops.py
+      - files=vllm/kernels/xpu_ops.py
       - files~=^vllm/lora/ops/xpu_ops
       - files=vllm/lora/punica_wrapper/punica_xpu.py
       - files=vllm/platforms/xpu.py
@@ -258,7 +280,6 @@ pull_request_rules:
       - title~=(?i)XPU
       - title~=(?i)Intel
       - title~=(?i)BMG
-      - title~=(?i)Arc
   actions:
     label:
       add:
@@ -287,8 +308,7 @@ pull_request_rules:
       - files=benchmarks/benchmark_serving_structured_output.py
       - files=benchmarks/run_structured_output_benchmark.sh
       - files=docs/features/structured_outputs.md
-      - files=examples/offline_inference/structured_outputs.py
-      - files=examples/online_serving/structured_outputs/structured_outputs.py
+      - files=^examples/features/structured_outputs/
       - files~=^tests/v1/structured_output/
       - files=tests/entrypoints/llm/test_struct_output_generate.py
       - files~=^vllm/v1/structured_output/
@@ -304,7 +324,7 @@ pull_request_rules:
     - or:
       - files~=^vllm/v1/spec_decode/
       - files~=^tests/v1/spec_decode/
-      - files~=^examples/.*(spec_decode|mlpspeculator|eagle|speculation).*\.py
+      - files=^examples/features/speculative_decoding/
       - files~=^vllm/model_executor/models/.*eagle.*\.py
       - files=vllm/model_executor/models/mlp_speculator.py
       - files~=^vllm/transformers_utils/configs/(eagle|medusa|mlp_speculator)\.py
@@ -368,27 +388,24 @@ pull_request_rules:
       - files~=^tests/entrypoints/anthropic/.*tool.*
       - files~=^vllm/tool_parsers/
       - files=docs/features/tool_calling.md
-      - files~=^examples/tool_chat_*
-      - files=examples/offline_inference/chat_with_tools.py
-      - files=examples/online_serving/openai_chat_completion_client_with_tools_required.py
-      - files=examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
-      - files=examples/online_serving/openai_chat_completion_client_with_tools.py
+      - files~=^examples/tool_calling/
   actions:
     label:
       add:
         - tool-calling
 
-- name: auto-rebase if approved, ready, and 40 commits behind main
+- name: auto-rebase to keep merge candidate within 1 day behind main
   conditions:
     - base = main
     - label=ready
     - "#approved-reviews-by >= 1"
-    - "#commits-behind >= 40"
+    - "#commits-behind >= 50"
+    - "#check-failure = 0"
     - -closed
     - -draft
     - -conflict
   actions:
-    rebase: {}
+    update: {}
 
 - name: ping author on conflicts and add 'needs-rebase' label
   conditions:
@@ -460,9 +477,7 @@ pull_request_rules:
   conditions:
     - label != stale
     - or:
-      - files~=^examples/online_serving/disaggregated[^/]*/.*
-      - files~=^examples/offline_inference/disaggregated[^/]*/.*
-      - files~=^examples/others/lmcache/
+      - files~=^examples/disaggregated/
       - files~=^tests/v1/kv_connector/
       - files~=^vllm/distributed/kv_transfer/
       - title~=(?i)\bP/?D\b
diff --git a/.github/workflows/issue_autolabel.yml b/.github/workflows/issue_autolabel.yml
index 2cb5c176ae0a..3efa582f6702 100644
--- a/.github/workflows/issue_autolabel.yml
+++ b/.github/workflows/issue_autolabel.yml
@@ -320,20 +320,25 @@ jobs:
           script: |
             // Configuration: Map labels to GitHub users to CC
             // You can add multiple users per label, and multiple label configurations
+            // {users} will be replaced with @mentions
             const ccConfig = {
               rocm: {
-                users: ['hongxiayang', 'tjtanaa', 'vllmellm'],  // Add more users as needed: ['user1', 'user2', 'user3']
-                message: 'CC {users} for ROCm-related issue'  // {users} will be replaced with @mentions
+                users: ['hongxiayang', 'tjtanaa', 'vllmellm'],
+                message: 'CC {users} for ROCm-related issue',
+              },
+              mistral: {
+                users: ['patrickvonplaten', 'juliendenize', 'andylolu2'],
+                message: 'CC {users} for Mistral-related issue',
               },
               // Add more label -> user mappings here
               // Example:
               // cuda: {
               //   users: ['user1', 'user2'],
-              //   message: 'CC {users} for CUDA-related issue'
+              //   message: 'CC {users} for CUDA-related issue',
               // },
               // performance: {
               //   users: ['perfexpert'],
-              //   message: 'CC {users} for performance issue'
+              //   message: 'CC {users} for performance issue',
               // },
             };
             
diff --git a/.github/workflows/macos-smoke-test.yml b/.github/workflows/macos-smoke-test.yml
index 3c1a50bf8085..ea1c8b0feac3 100644
--- a/.github/workflows/macos-smoke-test.yml
+++ b/.github/workflows/macos-smoke-test.yml
@@ -32,7 +32,7 @@ jobs:
 
       - name: Install dependencies and build vLLM
         run: |
-          uv pip install -r requirements/cpu-build.txt --index-strategy unsafe-best-match
+          uv pip install -r requirements/build/cpu.txt --index-strategy unsafe-best-match
           uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match
           uv pip install -e . --no-build-isolation
         env:
@@ -45,6 +45,7 @@ jobs:
       - name: Smoke test vllm serve
         run: |
           # Start server in background
+          VLLM_CPU_KVCACHE_SPACE=1 \
           vllm serve Qwen/Qwen3-0.6B \
             --max-model-len=2K \
             --load-format=dummy \
diff --git a/.github/workflows/new_pr_bot.yml b/.github/workflows/new_pr_bot.yml
index ef5e30952c62..27100f9f4da0 100644
--- a/.github/workflows/new_pr_bot.yml
+++ b/.github/workflows/new_pr_bot.yml
@@ -62,14 +62,14 @@ jobs:
             const prAuthor = context.payload.pull_request.user.login;
 
             const { data: searchResults } = await github.rest.search.issuesAndPullRequests({
-              q: `repo:${owner}/${repo} type:pr author:${prAuthor}`,
+              q: `repo:${owner}/${repo} type:pr is:merged author:${prAuthor}`,
               per_page: 1,
             });
 
-            const authorPRCount = searchResults.total_count;
-            console.log(`Found ${authorPRCount} PRs by ${prAuthor}`);
+            const mergedPRCount = searchResults.total_count;
+            console.log(`Found ${mergedPRCount} merged PRs by ${prAuthor}`);
 
-            if (authorPRCount === 1) {
+            if (mergedPRCount === 0) {
               console.log(`Posting welcome comment for first-time contributor: ${prAuthor}`);
               await github.rest.issues.createComment({
                 owner,
@@ -98,5 +98,5 @@ jobs:
                 ].join('\n'),
               });
             } else {
-              console.log(`Skipping comment for ${prAuthor} - not their first PR (${authorPRCount} PRs found)`);
+              console.log(`Skipping comment for ${prAuthor} - not a first-time contributor (${mergedPRCount} merged PRs)`);
             }
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index d64f6ef0f651..1dd31b0e50f6 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -2,6 +2,7 @@ name: pre-commit
 
 on:
   pull_request:
+    types: [opened, synchronize, reopened, labeled]
   push:
     branches: [main]
 
@@ -28,6 +29,7 @@ jobs:
           });
 
           const hasReadyLabel = pr.labels.some(l => l.name === 'ready');
+          const hasVerifiedLabel = pr.labels.some(l => l.name === 'verified');
 
           const { data: mergedPRs } = await github.rest.search.issuesAndPullRequests({
             q: `repo:${context.repo.owner}/${context.repo.repo} is:pr is:merged author:${pr.user.login}`,
@@ -35,10 +37,10 @@ jobs:
           });
           const mergedCount = mergedPRs.total_count;
 
-          if (hasReadyLabel || mergedCount >= 4) {
-            core.info(`Check passed: ready label=${hasReadyLabel}, 4+ merged PRs=${mergedCount >= 4}`);
+          if (hasReadyLabel || hasVerifiedLabel || mergedCount >= 4) {
+            core.info(`Check passed: verified label=${hasVerifiedLabel}, ready label=${hasReadyLabel}, 4+ merged PRs=${mergedCount >= 4}`);
           } else {
-            core.setFailed(`PR must have the 'ready' label or the author must have at least 4 merged PRs (found ${mergedCount}).`);
+            core.setFailed(`PR must have the 'verified' or 'ready' (which also triggers tests) label or the author must have at least 4 merged PRs (found ${mergedCount}).`);
           }
 
   pre-commit:
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
index c69ebbb42da5..eb3971c42bfc 100644
--- a/.github/workflows/scripts/build.sh
+++ b/.github/workflows/scripts/build.sh
@@ -9,12 +9,15 @@ PATH=${cuda_home}/bin:$PATH
 LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
 
 # Install requirements
-$python_executable -m pip install -r requirements/build.txt -r requirements/cuda.txt
+if [ "$(echo $2 | cut -d. -f1)" = "12" ]; then
+    sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt
+fi
+$python_executable -m pip install -r requirements/build/cuda.txt -r requirements/cuda.txt
 
 # Limit the number of parallel jobs to avoid OOM
 export MAX_JOBS=1
 # Make sure release wheels are built for the following architectures
-export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
 
 bash tools/check_repo.sh
 
diff --git a/.gitignore b/.gitignore
index 7b822165d3eb..2c4e135e58dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,9 @@ vllm/third_party/triton_kernels/*
 # FlashMLA interface copied from source
 vllm/third_party/flashmla/flash_mla_interface.py
 
+# DeepGEMM vendored package built from source
+vllm/third_party/deep_gemm/
+
 # triton jit
 .triton
 
@@ -23,9 +26,13 @@ __pycache__/
 # C extensions
 *.so
 
+# Rust binaries
+vllm/vllm-rs
+
 # Distribution / packaging
 .Python
 build/
+!requirements/build/
 cmake-build-*/
 CMakeUserPresets.json
 develop-eggs/
@@ -233,6 +240,7 @@ ep_kernels_workspace/
 
 # Allow tracked library source folders under submodules (e.g., benchmarks/lib)
 !vllm/benchmarks/lib/
+!.buildkite/scripts/lib/
 
 # Generated gRPC protobuf files (compiled at build time from vllm_engine.proto)
 vllm/grpc/vllm_engine_pb2.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e53274480cc0..05625e8f6677 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,15 +39,24 @@ repos:
   rev: 0.11.1
   hooks:
     - id: pip-compile
-      args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
-      files: ^requirements/test\.(in|txt)$
+      args: [
+        requirements/test/cuda.in,
+        -c, requirements/cuda.txt,
+        -o, requirements/test/cuda.txt,
+        --index-strategy, unsafe-best-match,
+        --torch-backend, cu130,
+        --python-platform, x86_64-manylinux_2_28,
+        --python-version, "3.12",
+      ]
+      files: ^requirements/(common|cuda|test/cuda)\.(in|txt)$
     - id: pip-compile
       alias: pip-compile-rocm
       name: pip-compile-rocm
       args: [
-        requirements/rocm-test.in, -o, requirements/rocm-test.txt,
-        --index-strategy, unsafe-best-match,
+        requirements/test/rocm.in,
         -c, requirements/rocm.txt,
+        -o, requirements/test/rocm.txt,
+        --index-strategy, unsafe-best-match,
         --python-platform, x86_64-manylinux_2_28,
         --python-version, "3.12",
         # Exclude torch and CUDA/NVIDIA packages
@@ -59,30 +68,86 @@ repos:
         --no-emit-package, cuda-pathfinder,
         --no-emit-package, cuda-toolkit,
         --no-emit-package, cupy-cuda12x,
+        # nvidia packages (unsuffixed / unified naming)
         --no-emit-package, nvidia-cublas,
         --no-emit-package, nvidia-cuda-cupti,
         --no-emit-package, nvidia-cuda-nvrtc,
         --no-emit-package, nvidia-cuda-runtime,
-        --no-emit-package, nvidia-cudnn-cu13,
+        --no-emit-package, nvidia-cudnn,
         --no-emit-package, nvidia-cufft,
         --no-emit-package, nvidia-cufile,
         --no-emit-package, nvidia-curand,
         --no-emit-package, nvidia-cusolver,
         --no-emit-package, nvidia-cusparse,
+        --no-emit-package, nvidia-cusparselt,
+        --no-emit-package, nvidia-nccl,
+        --no-emit-package, nvidia-nvjitlink,
+        --no-emit-package, nvidia-nvshmem,
+        --no-emit-package, nvidia-nvtx,
+        # nvidia cu12 packages
+        --no-emit-package, nvidia-cublas-cu12,
+        --no-emit-package, nvidia-cuda-cupti-cu12,
+        --no-emit-package, nvidia-cuda-nvrtc-cu12,
+        --no-emit-package, nvidia-cuda-runtime-cu12,
+        --no-emit-package, nvidia-cudnn-cu12,
+        --no-emit-package, nvidia-cufft-cu12,
+        --no-emit-package, nvidia-cufile-cu12,
+        --no-emit-package, nvidia-curand-cu12,
+        --no-emit-package, nvidia-cusolver-cu12,
+        --no-emit-package, nvidia-cusparse-cu12,
+        --no-emit-package, nvidia-cusparselt-cu12,
+        --no-emit-package, nvidia-nccl-cu12,
+        --no-emit-package, nvidia-nvjitlink-cu12,
+        --no-emit-package, nvidia-nvshmem-cu12,
+        --no-emit-package, nvidia-nvtx-cu12,
+        # nvidia cu13 packages
+        --no-emit-package, nvidia-cublas-cu13,
+        --no-emit-package, nvidia-cuda-cupti-cu13,
+        --no-emit-package, nvidia-cuda-nvrtc-cu13,
+        --no-emit-package, nvidia-cuda-runtime-cu13,
+        --no-emit-package, nvidia-cudnn-cu13,
+        --no-emit-package, nvidia-cufft-cu13,
+        --no-emit-package, nvidia-cufile-cu13,
+        --no-emit-package, nvidia-curand-cu13,
+        --no-emit-package, nvidia-cusolver-cu13,
+        --no-emit-package, nvidia-cusparse-cu13,
         --no-emit-package, nvidia-cusparselt-cu13,
         --no-emit-package, nvidia-nccl-cu13,
-        --no-emit-package, nvidia-nvjitlink,
+        --no-emit-package, nvidia-nvjitlink-cu13,
         --no-emit-package, nvidia-nvshmem-cu13,
-        --no-emit-package, nvidia-nvtx,
+        --no-emit-package, nvidia-nvtx-cu13,
       ]
-      files: ^requirements/rocm-test\.(in|txt)$
+      files: ^requirements/(common|rocm|test/rocm)\.(in|txt)$
+    - id: pip-compile
+      alias: pip-compile-xpu
+      name: pip-compile-xpu
+      args: [
+        requirements/test/xpu.in,
+        -c, requirements/xpu.txt,
+        -o, requirements/test/xpu.txt,
+        --index-strategy, unsafe-best-match,
+        --torch-backend, xpu,
+        --python-platform, x86_64-manylinux_2_39,
+        --python-version, "3.12",
+      ]
+      files: ^requirements/(common|xpu|test/xpu)\.(in|txt)$
+    - id: pip-compile
+      alias: pip-compile-docs
+      name: pip-compile-docs
+      args: [
+        requirements/docs.in,
+        -o, requirements/docs.txt,
+        --python-platform, x86_64-manylinux_2_28,
+        --python-version, "3.12",
+      ]
+      files: ^requirements/docs\.(in|txt)$
 - repo: local
   hooks:
   - id: format-torch-nightly-test
-    name: reformat nightly_torch_test.txt to be in sync with test.in
+    name: reformat test/nightly-torch.txt to be in sync with test/cuda.in
     language: python
     entry: python tools/pre_commit/generate_nightly_torch_test.py
-    files: ^requirements/test\.(in|txt)$
+    files: ^requirements/test/cuda\.(in|txt)$
   - id: mypy-local
     name: Run mypy locally for lowest supported Python version
     entry: python tools/pre_commit/mypy.py 0 "3.10"
@@ -157,6 +222,12 @@ repos:
     name: Update Dockerfile dependency graph
     entry: tools/pre_commit/update-dockerfile-graph.sh
     language: script
+  - id: test-nonroot-entrypoint
+    name: Test non-root entrypoint wrapper
+    entry: bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
+    language: system
+    pass_filenames: false
+    files: ^docker/entrypoints/(vllm-nonroot-entrypoint|test_vllm_nonroot_entrypoint)\.sh$
   - id: check-forbidden-imports
     name: Check for forbidden imports
     entry: python tools/pre_commit/check_forbidden_imports.py
@@ -191,6 +262,32 @@ repos:
     entry: python tools/pre_commit/check_boolean_context_manager.py
     language: python
     types: [python]
+  # Rust hooks. These shell out to `cargo`; tools/pre_commit/rust-check.sh
+  # skips with a warning when cargo is not installed.
+  - id: rust-cargo-autoinherit
+    name: Rust - Normalize Cargo manifests with autoinherit
+    entry: tools/pre_commit/rust-check.sh autoinherit --prefer-simple-dotted
+    language: script
+    pass_filenames: false
+    require_serial: true
+    stages: [pre-commit] # Only run locally as Buildkite will cover this
+    files: ^rust/(Cargo\.toml|src/.*/Cargo\.toml)$
+  - id: rust-cargo-sort
+    name: Rust - Sort Cargo manifest sections
+    entry: tools/pre_commit/rust-check.sh sort --workspace
+    language: script
+    pass_filenames: false
+    require_serial: true
+    stages: [pre-commit] # Only run locally as Buildkite will cover this
+    files: ^rust/(Cargo\.toml|src/.*/Cargo\.toml)$
+  - id: rust-cargo-fmt
+    name: Rust - Format code
+    entry: tools/pre_commit/rust-check.sh fmt
+    language: script
+    pass_filenames: false
+    require_serial: true
+    stages: [pre-commit] # Only run locally as Buildkite will cover this
+    files: ^rust/.*(\.rs|Cargo\.toml|rustfmt\.toml)$
   # Keep `suggestion` last
   - id: suggestion
     name: Suggestion
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 1e479fd03d91..1dabec70ba58 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,7 +9,7 @@ build:
     python: "3.12"
   jobs:
     post_checkout:
-      # - bash docs/maybe_skip_pr_build.sh
+      - bash docs/pre_run_check.sh
       - git fetch origin main --unshallow --no-tags --filter=blob:none || true
     pre_create_environment:
       - pip install uv
diff --git a/AGENTS.md b/AGENTS.md
index 61312b29ef7d..215e4195eb43 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -72,11 +72,11 @@ uv pip install -e . --torch-backend=auto
 
 ```bash
 # Install test dependencies.
-# requirements/test.txt is pinned to x86_64; on other platforms, use the
+# requirements/test/cuda.txt is pinned to x86_64; on other platforms, use the
 # unpinned source file instead:
-uv pip install -r requirements/test.in    # resolves for current platform
+uv pip install -r requirements/test/cuda.in    # resolves for current platform
 # Or on x86_64:
-uv pip install -r requirements/test.txt
+uv pip install -r requirements/test/cuda.txt
 
 # Run a specific test file (use .venv/bin/python directly;
 # `source activate` does not persist in non-interactive shells):
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cf59f18eb7e7..553a3738d9fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,8 +13,12 @@ cmake_minimum_required(VERSION 3.26)
 # cmake --install . --component _C
 project(vllm_extensions LANGUAGES CXX)
 
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CUDA_STANDARD 20)
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+set(CMAKE_HIP_STANDARD 20)
+set(CMAKE_HIP_STANDARD_REQUIRED ON)
 
 
 # CUDA by default, can be overridden by using -DVLLM_TARGET_DEVICE=... (used by setup.py)
@@ -34,10 +38,10 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
 # Supported python versions.  These versions will be searched in order, the
 # first match will be selected.  These should be kept in sync with setup.py.
 #
-set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
+set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13" "3.14")
 
 # Supported AMD GPU architectures.
-set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")
+set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")
 
 # ROCm installation prefix. Default to /opt/rocm but allow override via
 # -DROCM_PATH=/your/rocm/path when invoking cmake.
@@ -56,8 +60,8 @@ endif()
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from docker/Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.10.0")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.10.0")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.11.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.11.0")
 
 #
 # Try to find python package with an executable that exactly matches
@@ -94,14 +98,35 @@ find_package(Torch REQUIRED)
 # This check must happen after find_package(Torch) because that's when CMAKE_CUDA_COMPILER_VERSION gets defined
 if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
    CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
-  set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0;12.1")
+   # starting from CUDA 12.9 and Blackwell (10.0), we use family-specific targets (10.0f, 12.0f, etc)
+   # to support the whole generation without specifying all sub-architectures
+   # see: https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features/
+  set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0")
 elseif(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
    CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
-  set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0;12.1")
+  set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;10.3;12.0;12.1")
 else()
-  set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0")
+  set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.7;8.9;9.0")
 endif()
 
+#
+# spinloop extension (pure CXX; must stay above the non-CUDA device branch so
+# CPU builds define the target before the early return)
+#
+set(VLLM_SPINLOOP_EXT_SRC "csrc/spinloop.cpp")
+set(SPINLOOP_COMPILE_FLAGS "")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
+  list(APPEND SPINLOOP_COMPILE_FLAGS "-mmwaitx")
+endif()
+define_extension_target(
+  spinloop
+  DESTINATION vllm
+  LANGUAGE CXX
+  SOURCES ${VLLM_SPINLOOP_EXT_SRC}
+  COMPILE_FLAGS ${SPINLOOP_COMPILE_FLAGS}
+  USE_SABI 3.11
+  WITH_SOABI)
+
 #
 # Forward the non-CUDA device extensions to external CMake scripts.
 #
@@ -225,8 +250,8 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
   # Certain HIP functions are marked as [[nodiscard]], yet vllm ignores the result which generates
   # a lot of warnings that always mask real issues. Suppressing until this is properly addressed.
   #
-  set(CMAKE_${VLLM_GPU_LANG}_FLAGS "${CMAKE_${VLLM_GPU_LANG}_FLAGS} -Wno-unused-result")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result")
+  set(CMAKE_${VLLM_GPU_LANG}_FLAGS "${CMAKE_${VLLM_GPU_LANG}_FLAGS} -Wno-unused-result -Wno-unused-value")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result -Wno-unused-value")
 endif()
 
 #
@@ -286,30 +311,24 @@ set(VLLM_EXT_SRC
   "csrc/attention/paged_attention_v1.cu"
   "csrc/attention/paged_attention_v2.cu"
   "csrc/attention/merge_attn_states.cu"
-  "csrc/attention/vertical_slash_index.cu"
-  "csrc/pos_encoding_kernels.cu"
-  "csrc/activation_kernels.cu"
-  "csrc/layernorm_kernels.cu"
-  "csrc/fused_qknorm_rope_kernel.cu"
-  "csrc/layernorm_quant_kernels.cu"
   "csrc/sampler.cu"
   "csrc/topk.cu"
   "csrc/cuda_view.cu"
-  "csrc/quantization/gptq/q_gemm.cu"
-  "csrc/quantization/w8a8/int8/scaled_quant.cu"
-  "csrc/quantization/w8a8/fp8/common.cu"
-  "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
-  "csrc/quantization/gguf/gguf_kernel.cu"
+  "csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu"
   "csrc/quantization/activation_kernels.cu"
   "csrc/cuda_utils_kernels.cu"
   "csrc/custom_all_reduce.cu"
-  "csrc/torch_bindings.cpp")
+  "csrc/torch_bindings.cpp"
+  "csrc/fused_deepseek_v4_qnorm_rope_kv_insert_kernel.cu")
 
 if(VLLM_GPU_LANG STREQUAL "CUDA")
+  list(APPEND VLLM_EXT_SRC
+    "csrc/minimax_reduce_rms_kernel.cu")
+
   SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
 
   # Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building.
-  set(CUTLASS_REVISION "v4.2.1")
+  set(CUTLASS_REVISION "v4.4.2")
 
   # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
   if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR})
@@ -339,10 +358,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   FetchContent_MakeAvailable(cutlass)
 
   list(APPEND VLLM_EXT_SRC
-    "csrc/quantization/awq/gemm_kernels.cu"
-    "csrc/quantization/w8a8/cutlass/scaled_mm_entry.cu"
-    "csrc/quantization/fp4/nvfp4_quant_entry.cu"
-    "csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu"
     "csrc/cutlass_extensions/common.cpp")
 
   set_gencode_flags_for_srcs(
@@ -474,38 +489,220 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
                    " in CUDA target architectures")
   endif()
 
+  # Expert-specialization MXFP8 blockscaled grouped kernels (SM100+).
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
+    cuda_archs_loose_intersection(ES_MXFP8_GROUPED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}")
+  else()
+    cuda_archs_loose_intersection(ES_MXFP8_GROUPED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
+  endif()
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND ES_MXFP8_GROUPED_MM_ARCHS)
+    set(SRCS
+      "csrc/moe/mxfp8_moe/cutlass_mxfp8_grouped_mm.cu"
+      "csrc/moe/mxfp8_moe/mxfp8_experts_quant.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${ES_MXFP8_GROUPED_MM_ARCHS}")
+    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_ES_MXFP8_GROUPED_MM_SM100=1")
+    message(STATUS "Building ES MXFP8 grouped kernels for archs: ${ES_MXFP8_GROUPED_MM_ARCHS}")
+  else()
+    if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8
+        AND ES_MXFP8_GROUPED_MM_ARCHS)
+      message(STATUS "Not building ES MXFP8 grouped kernels as CUDA Compiler version is "
+                     "not >= 12.8.")
+    else()
+      message(STATUS "Not building ES MXFP8 grouped kernels as no compatible archs found "
+                     "in CUDA target architectures.")
+    endif()
+  endif()
+
+  #
+  # Machete kernels
+
+  # The machete kernels only work on hopper and require CUDA 12.0 or later.
+  # Only build Machete kernels if we are building for something compatible with sm90a
+  cuda_archs_loose_intersection(MACHETE_ARCHS "9.0a" "${CUDA_ARCHS}")
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND MACHETE_ARCHS)
+    #
+    # For the Machete kernels we automatically generate sources for various
+    # preselected input type pairs and schedules.
+    # Generate sources:
+    set(MACHETE_GEN_SCRIPT
+      ${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py)
+    file(MD5 ${MACHETE_GEN_SCRIPT} MACHETE_GEN_SCRIPT_HASH)
+
+    message(STATUS "Machete generation script hash: ${MACHETE_GEN_SCRIPT_HASH}")
+    message(STATUS "Last run machete generate script hash: $CACHE{MACHETE_GEN_SCRIPT_HASH}")
+
+    if (NOT DEFINED CACHE{MACHETE_GEN_SCRIPT_HASH}
+        OR NOT $CACHE{MACHETE_GEN_SCRIPT_HASH} STREQUAL ${MACHETE_GEN_SCRIPT_HASH})
+      execute_process(
+        COMMAND ${CMAKE_COMMAND} -E env
+        PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:${VLLM_PYTHON_PATH}:$ENV{PYTHONPATH}
+          ${Python_EXECUTABLE} ${MACHETE_GEN_SCRIPT}
+        RESULT_VARIABLE machete_generation_result
+        OUTPUT_VARIABLE machete_generation_output
+        OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
+        ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
+      )
+
+      if (NOT machete_generation_result EQUAL 0)
+        message(FATAL_ERROR "Machete generation failed."
+                            " Result: \"${machete_generation_result}\""
+                            "\nCheck the log for details: "
+                            "${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log")
+      else()
+        set(MACHETE_GEN_SCRIPT_HASH ${MACHETE_GEN_SCRIPT_HASH}
+            CACHE STRING "Last run machete generate script hash" FORCE)
+        message(STATUS "Machete generation completed successfully.")
+      endif()
+    else()
+      message(STATUS "Machete generation script has not changed, skipping generation.")
+    endif()
+
+    # Add machete generated sources
+    file(GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu")
+    list(APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES})
+
+    # forward compatible
+    set_gencode_flags_for_srcs(
+      SRCS "${MACHETE_GEN_SOURCES}"
+      CUDA_ARCHS "${MACHETE_ARCHS}")
+
+    list(APPEND VLLM_EXT_SRC
+      csrc/quantization/machete/machete_pytorch.cu)
+
+    message(STATUS "Building Machete kernels for archs: ${MACHETE_ARCHS}")
+  else()
+    if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0
+        AND MACHETE_ARCHS)
+      message(STATUS "Not building Machete kernels as CUDA Compiler version is "
+                     "not >= 12.0, we recommend upgrading to CUDA 12.0 or "
+                     "later if you intend on running w4a16 quantized models on "
+                     "Hopper.")
+    else()
+      message(STATUS "Not building Machete kernels as no compatible archs "
+                     "found in CUDA target architectures")
+    endif()
+  endif()
+
+
+
+# if CUDA endif
+endif()
+
+if (VLLM_GPU_LANG STREQUAL "HIP")
+  # Add QuickReduce kernels
+  list(APPEND VLLM_EXT_SRC
+    "csrc/custom_quickreduce.cu"
+  )
+# if ROCM endif
+endif()
+
+message(STATUS "Enabling C extension.")
+define_extension_target(
+  _C
+  DESTINATION vllm
+  LANGUAGE ${VLLM_GPU_LANG}
+  SOURCES ${VLLM_EXT_SRC}
+  COMPILE_FLAGS ${VLLM_GPU_FLAGS}
+  ARCHITECTURES ${VLLM_GPU_ARCHES}
+  INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
+  INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
+  USE_SABI 3
+  WITH_SOABI)
+
+# If CUTLASS is compiled on NVCC >= 12.5, it by default uses
+# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the
+# driver API. This causes problems when linking with earlier versions of CUDA.
+# Setting this variable sidesteps the issue by calling the driver directly.
+target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
+
+if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
+  #
+  # _C_stable_libtorch extension (ops registered via STABLE_TORCH_LIBRARY)
+  #
+  set(VLLM_STABLE_EXT_SRC
+    "csrc/libtorch_stable/torch_bindings.cpp"
+    "csrc/libtorch_stable/activation_kernels.cu"
+    "csrc/libtorch_stable/quantization/w8a8/int8/scaled_quant.cu"
+    "csrc/libtorch_stable/quantization/w8a8/fp8/common.cu"
+    "csrc/libtorch_stable/quantization/gptq/q_gemm.cu"
+    "csrc/libtorch_stable/quantization/gguf/gguf_kernel.cu"
+    "csrc/libtorch_stable/pos_encoding_kernels.cu"
+    "csrc/libtorch_stable/fused_qknorm_rope_kernel.cu"
+    "csrc/libtorch_stable/layernorm_kernels.cu"
+    "csrc/libtorch_stable/layernorm_quant_kernels.cu"
+    "csrc/libtorch_stable/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu")
+
+  if(VLLM_GPU_LANG STREQUAL "CUDA")
+    list(APPEND VLLM_STABLE_EXT_SRC
+      "csrc/cuda_utils_kernels.cu"
+      "csrc/cutlass_extensions/common.cpp"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_entry.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_quant_entry.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_entry.cu"
+      "csrc/libtorch_stable/permute_cols.cu"
+      "csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu"
+      "csrc/libtorch_stable/quantization/w8a8/int8/per_token_group_quant.cu"
+      "csrc/libtorch_stable/quantization/awq/gemm_kernels.cu")
+
+    set_gencode_flags_for_srcs(
+      SRCS "${VLLM_STABLE_EXT_SRC}"
+      CUDA_ARCHS "${CUDA_ARCHS}")
+
+  # DeepSeek V3 fused A GEMM kernel (requires SM 9.0+, Hopper and later)
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
+    cuda_archs_loose_intersection(DSV3_FUSED_A_GEMM_ARCHS "9.0a;10.0f;11.0f" "${CUDA_ARCHS}")
+  else()
+    cuda_archs_loose_intersection(DSV3_FUSED_A_GEMM_ARCHS "9.0a;10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
+  endif()
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND DSV3_FUSED_A_GEMM_ARCHS)
+    set(SRCS "csrc/libtorch_stable/dsv3_fused_a_gemm.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${DSV3_FUSED_A_GEMM_ARCHS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
+    message(STATUS "Building dsv3_fused_a_gemm for archs: ${DSV3_FUSED_A_GEMM_ARCHS}")
+  else()
+    message(STATUS "Not building dsv3_fused_a_gemm as no compatible archs found "
+                   "in CUDA target architectures.")
+  endif()
+
   # Only build AllSpark kernels if we are building for at least some compatible archs.
   cuda_archs_loose_intersection(ALLSPARK_ARCHS "8.0;8.6;8.7;8.9" "${CUDA_ARCHS}")
   if (ALLSPARK_ARCHS)
-    set(ALLSPARK_SRCS
-       "csrc/quantization/gptq_allspark/allspark_repack.cu"
-       "csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu")
+    set(SRCS
+       "csrc/libtorch_stable/quantization/gptq_allspark/allspark_repack.cu"
+       "csrc/libtorch_stable/quantization/gptq_allspark/allspark_qgemm_w8a16.cu")
     set_gencode_flags_for_srcs(
-      SRCS "${ALLSPARK_SRCS}"
+      SRCS "${SRCS}"
       CUDA_ARCHS "${ALLSPARK_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${ALLSPARK_SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     message(STATUS "Building AllSpark kernels for archs: ${ALLSPARK_ARCHS}")
   else()
     message(STATUS "Not building AllSpark kernels as no compatible archs found"
                    " in CUDA target architectures")
   endif()
 
-
+  #
+  # CUTLASS scaled_mm kernels (moved from _C to _C_stable_libtorch)
+  #
   set(SCALED_MM_3X_ARCHS)
   # The cutlass_scaled_mm kernels for Hopper (c3x, i.e. CUTLASS 3.x) require
   # CUDA 12.0 or later
   cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}")
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND SCALED_MM_ARCHS)
     set(SRCS
-       "csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu"
-       "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu"
-       "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu"
-       "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu"
-       "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu")
+       "csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu"
+       "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu"
+       "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu"
+       "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu"
+       "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_SCALED_MM_SM90=1")
     # Let scaled_mm_c2x know it doesn't need to build these arches
     list(APPEND SCALED_MM_3X_ARCHS "${SCALED_MM_ARCHS}")
@@ -532,14 +729,14 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   endif()
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
     set(SRCS
-      "csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu"
-      "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu"
-      "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu"
     )
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_SCALED_MM_SM120=1")
     # Let scaled_mm_c2x know it doesn't need to build these arches
     list(APPEND SCALED_MM_3X_ARCHS "${SCALED_MM_ARCHS}")
@@ -566,14 +763,14 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   endif()
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
     set(SRCS
-      "csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu"
-      "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu"
-      "csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu"
+      "csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu"
     )
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_SCALED_MM_SM100=1")
     # Let scaled_mm_c2x know it doesn't need to build these arches
     list(APPEND SCALED_MM_3X_ARCHS "${SCALED_MM_ARCHS}")
@@ -599,11 +796,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   # subtract out the archs that are already built for 3x
   list(REMOVE_ITEM SCALED_MM_2X_ARCHS ${SCALED_MM_3X_ARCHS})
   if (SCALED_MM_2X_ARCHS)
-    set(SRCS "csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cu")
+    set(SRCS "csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_2X_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_SCALED_MM_C2X=1")
     message(STATUS "Building scaled_mm_c2x for archs: ${SCALED_MM_2X_ARCHS}")
   else()
@@ -616,95 +813,20 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     endif()
   endif()
 
-  # The nvfp4_scaled_mm_sm120 kernels for Blackwell SM12x require
-  # CUDA 12.8 or later
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(FP4_ARCHS "12.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(FP4_ARCHS "12.0a;12.1a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
-    set(SRCS
-      "csrc/quantization/fp4/nvfp4_quant_kernels.cu"
-      "csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu"
-      "csrc/quantization/fp4/nvfp4_experts_quant.cu"
-      "csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu"
-      "csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu")
-    set_gencode_flags_for_srcs(
-      SRCS "${SRCS}"
-      CUDA_ARCHS "${FP4_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM120=1")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM120=1")
-    message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
-  else()
-    message(STATUS "Not building NVFP4 as no compatible archs were found.")
-    # clear FP4_ARCHS
-    set(FP4_ARCHS)
-  endif()
-
-  # FP4 Archs and flags
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(FP4_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
-    set(SRCS
-      "csrc/quantization/fp4/nvfp4_quant_kernels.cu"
-      "csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu"
-      "csrc/quantization/fp4/nvfp4_experts_quant.cu"
-      "csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu"
-      "csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu")
-    set_gencode_flags_for_srcs(
-      SRCS "${SRCS}"
-      CUDA_ARCHS "${FP4_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM100=1")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM100=1")
-    message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
-  else()
-    message(STATUS "Not building NVFP4 as no compatible archs were found.")
-    # clear FP4_ARCHS
-    set(FP4_ARCHS)
-  endif()
-
-  # CUTLASS MLA Archs and flags
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(MLA_ARCHS "10.0f;11.0f;12.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(MLA_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND MLA_ARCHS)
-    set(SRCS
-      "csrc/attention/mla/sm100_cutlass_mla_kernel.cu")
-    set_gencode_flags_for_srcs(
-      SRCS "${SRCS}"
-      CUDA_ARCHS "${MLA_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MLA=1")
-    # Add MLA-specific include directories only to MLA source files
-    set_source_files_properties(${SRCS}
-      PROPERTIES INCLUDE_DIRECTORIES "${CUTLASS_DIR}/examples/77_blackwell_fmha;${CUTLASS_DIR}/examples/common")
-    message(STATUS "Building CUTLASS MLA for archs: ${MLA_ARCHS}")
-  else()
-    message(STATUS "Not building CUTLASS MLA as no compatible archs were found.")
-    # clear MLA_ARCHS
-    set(MLA_ARCHS)
-  endif()
-
-  # CUTLASS MoE kernels
+  #
+  # CUTLASS MoE kernels (moved from _C to _C_stable_libtorch)
+  #
 
   # The MoE kernel cutlass_moe_mm requires CUDA 12.3 or later (and ONLY works
   # on Hopper). get_cutlass_(batched_)moe_mm_data should only be compiled
   # if it's possible to compile MoE kernels that use its output.
   cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a" "${CUDA_ARCHS}")
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS)
-    set(SRCS "csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu")
+    set(SRCS "csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM90=1")
     message(STATUS "Building grouped_mm_c3x for archs: ${SCALED_MM_ARCHS}")
   else()
@@ -724,11 +846,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
   endif()
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
-    set(SRCS "csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu")
+    set(SRCS "csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${SCALED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM100=1")
     message(STATUS "Building grouped_mm_c3x for archs: ${SCALED_MM_ARCHS}")
   else()
@@ -742,51 +864,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     endif()
   endif()
 
-  # Expert-specialization MXFP8 blockscaled grouped kernels (SM100+).
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(ES_MXFP8_GROUPED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(ES_MXFP8_GROUPED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND ES_MXFP8_GROUPED_MM_ARCHS)
-    set(SRCS
-      "csrc/moe/mxfp8_moe/cutlass_mxfp8_grouped_mm.cu"
-      "csrc/moe/mxfp8_moe/mxfp8_experts_quant.cu")
-    set_gencode_flags_for_srcs(
-      SRCS "${SRCS}"
-      CUDA_ARCHS "${ES_MXFP8_GROUPED_MM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
-    list(APPEND VLLM_GPU_FLAGS "-DENABLE_ES_MXFP8_GROUPED_MM_SM100=1")
-    message(STATUS "Building ES MXFP8 grouped kernels for archs: ${ES_MXFP8_GROUPED_MM_ARCHS}")
-  else()
-    if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8
-        AND ES_MXFP8_GROUPED_MM_ARCHS)
-      message(STATUS "Not building ES MXFP8 grouped kernels as CUDA Compiler version is "
-                     "not >= 12.8.")
-    else()
-      message(STATUS "Not building ES MXFP8 grouped kernels as no compatible archs found "
-                     "in CUDA target architectures.")
-    endif()
-  endif()
-
-  # DeepSeek V3 fused A GEMM kernel (requires SM 9.0+, Hopper and later)
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(DSV3_FUSED_A_GEMM_ARCHS "9.0a;10.0f;11.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(DSV3_FUSED_A_GEMM_ARCHS "9.0a;10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND DSV3_FUSED_A_GEMM_ARCHS)
-    set(DSV3_FUSED_A_GEMM_SRC "csrc/dsv3_fused_a_gemm.cu")
-    set_gencode_flags_for_srcs(
-      SRCS "${DSV3_FUSED_A_GEMM_SRC}"
-      CUDA_ARCHS "${DSV3_FUSED_A_GEMM_ARCHS}")
-    list(APPEND VLLM_EXT_SRC ${DSV3_FUSED_A_GEMM_SRC})
-    message(STATUS "Building dsv3_fused_a_gemm for archs: ${DSV3_FUSED_A_GEMM_ARCHS}")
-  else()
-    message(STATUS "Not building dsv3_fused_a_gemm as no compatible archs found "
-                   "in CUDA target architectures.")
-  endif()
-
   # moe_data.cu is used by all CUTLASS MoE kernels.
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
     cuda_archs_loose_intersection(CUTLASS_MOE_DATA_ARCHS "9.0a;10.0f;11.0f;12.0f" "${CUDA_ARCHS}")
@@ -794,11 +871,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     cuda_archs_loose_intersection(CUTLASS_MOE_DATA_ARCHS "9.0a;10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}")
   endif()
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND CUTLASS_MOE_DATA_ARCHS)
-    set(SRCS "csrc/quantization/w8a8/cutlass/moe/moe_data.cu")
+    set(SRCS "csrc/libtorch_stable/quantization/w8a8/cutlass/moe/moe_data.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${CUTLASS_MOE_DATA_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     message(STATUS "Building moe_data for archs: ${CUTLASS_MOE_DATA_ARCHS}")
   else()
     if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND CUTLASS_MOE_DATA_ARCHS)
@@ -811,96 +888,97 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     endif()
   endif()
 
+  #
+  # FP4/NVFP4 kernels (moved from _C to _C_stable_libtorch)
+  #
+
+  # The nvfp4_scaled_mm_sm120 kernels for Blackwell SM12x require
+  # CUDA 12.8 or later
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}")
+    cuda_archs_loose_intersection(FP4_ARCHS "12.0f" "${CUDA_ARCHS}")
   else()
-    cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
+    cuda_archs_loose_intersection(FP4_ARCHS "12.0a;12.1a" "${CUDA_ARCHS}")
   endif()
-
-  #
-  # Machete kernels
-
-  # The machete kernels only work on hopper and require CUDA 12.0 or later.
-  # Only build Machete kernels if we are building for something compatible with sm90a
-  cuda_archs_loose_intersection(MACHETE_ARCHS "9.0a" "${CUDA_ARCHS}")
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND MACHETE_ARCHS)
-    #
-    # For the Machete kernels we automatically generate sources for various
-    # preselected input type pairs and schedules.
-    # Generate sources:
-    set(MACHETE_GEN_SCRIPT
-      ${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py)
-    file(MD5 ${MACHETE_GEN_SCRIPT} MACHETE_GEN_SCRIPT_HASH)
-
-    message(STATUS "Machete generation script hash: ${MACHETE_GEN_SCRIPT_HASH}")
-    message(STATUS "Last run machete generate script hash: $CACHE{MACHETE_GEN_SCRIPT_HASH}")
-
-    if (NOT DEFINED CACHE{MACHETE_GEN_SCRIPT_HASH}
-        OR NOT $CACHE{MACHETE_GEN_SCRIPT_HASH} STREQUAL ${MACHETE_GEN_SCRIPT_HASH})
-      execute_process(
-        COMMAND ${CMAKE_COMMAND} -E env
-        PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:${VLLM_PYTHON_PATH}:$ENV{PYTHONPATH}
-          ${Python_EXECUTABLE} ${MACHETE_GEN_SCRIPT}
-        RESULT_VARIABLE machete_generation_result
-        OUTPUT_VARIABLE machete_generation_output
-        OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
-        ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
-      )
-
-      if (NOT machete_generation_result EQUAL 0)
-        message(FATAL_ERROR "Machete generation failed."
-                            " Result: \"${machete_generation_result}\""
-                            "\nCheck the log for details: "
-                            "${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log")
-      else()
-        set(MACHETE_GEN_SCRIPT_HASH ${MACHETE_GEN_SCRIPT_HASH}
-            CACHE STRING "Last run machete generate script hash" FORCE)
-        message(STATUS "Machete generation completed successfully.")
-      endif()
-    else()
-      message(STATUS "Machete generation script has not changed, skipping generation.")
-    endif()
-
-    # Add machete generated sources
-    file(GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu")
-    list(APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES})
-
-    # forward compatible
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
+    set(SRCS
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_quant_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_experts_quant.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_blockwise_moe_kernel.cu")
     set_gencode_flags_for_srcs(
-      SRCS "${MACHETE_GEN_SOURCES}"
-      CUDA_ARCHS "${MACHETE_ARCHS}")
-
-    list(APPEND VLLM_EXT_SRC
-      csrc/quantization/machete/machete_pytorch.cu)
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${FP4_ARCHS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
+    # nvfp4_kv_cache_kernels uses non-stable torch API and is called directly
+    # from cache_kernels.cu, so it belongs in _C rather than _C_stable.
+    set(NVFP4_KV_SRC "csrc/nvfp4_kv_cache_kernels.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${NVFP4_KV_SRC}"
+      CUDA_ARCHS "${FP4_ARCHS}")
+    target_sources(_C PRIVATE ${NVFP4_KV_SRC})
+    target_compile_definitions(_C PRIVATE ENABLE_NVFP4_SM120=1)
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM120=1")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM120=1")
+    message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
+  else()
+    message(STATUS "Not building NVFP4 as no compatible archs were found.")
+    # clear FP4_ARCHS
+    set(FP4_ARCHS)
+  endif()
 
-    message(STATUS "Building Machete kernels for archs: ${MACHETE_ARCHS}")
+  # FP4 Archs and flags
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
+    cuda_archs_loose_intersection(FP4_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}")
   else()
-    if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0
-        AND MACHETE_ARCHS)
-      message(STATUS "Not building Machete kernels as CUDA Compiler version is "
-                     "not >= 12.0, we recommend upgrading to CUDA 12.0 or "
-                     "later if you intend on running w4a16 quantized models on "
-                     "Hopper.")
-    else()
-      message(STATUS "Not building Machete kernels as no compatible archs "
-                     "found in CUDA target architectures")
-    endif()
+    cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
+  endif()
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
+    set(SRCS
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_quant_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_experts_quant.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_kernels.cu"
+      "csrc/libtorch_stable/quantization/fp4/nvfp4_blockwise_moe_kernel.cu"
+      "csrc/libtorch_stable/quantization/fp4/mxfp4_experts_quant.cu"
+      "csrc/libtorch_stable/quantization/fp4/mxfp4_blockwise_moe_kernel.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${FP4_ARCHS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
+    set(NVFP4_KV_SRC "csrc/nvfp4_kv_cache_kernels.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${NVFP4_KV_SRC}"
+      CUDA_ARCHS "${FP4_ARCHS}")
+    target_sources(_C PRIVATE ${NVFP4_KV_SRC})
+    target_compile_definitions(_C PRIVATE ENABLE_NVFP4_SM100=1)
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_NVFP4_SM100=1")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MOE_SM100=1")
+    message(STATUS "Building NVFP4 for archs: ${FP4_ARCHS}")
+  else()
+    message(STATUS "Not building NVFP4 as no compatible archs were found.")
+    # clear FP4_ARCHS
+    set(FP4_ARCHS)
   endif()
 
+  #
+  # W4A8 kernels (moved from _C to _C_stable_libtorch)
+  #
+
   # Only build W4A8 kernels if we are building for something compatible with sm90a
   cuda_archs_loose_intersection(W4A8_ARCHS "9.0a" "${CUDA_ARCHS}")
   if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND W4A8_ARCHS)
     set(SRCS
-       "csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu"
-       "csrc/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu"
-       "csrc/quantization/cutlass_w4a8/w4a8_utils.cu"
+       "csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_mm_entry.cu"
+       "csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu"
+       "csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_utils.cu"
        )
 
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${W4A8_ARCHS}")
 
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
 
     message(STATUS "Building W4A8 kernels for archs: ${W4A8_ARCHS}")
   else()
@@ -916,67 +994,42 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     endif()
   endif()
 
+  # CUTLASS MLA Archs and flags
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
+    cuda_archs_loose_intersection(MLA_ARCHS "10.0f;11.0f;12.0f" "${CUDA_ARCHS}")
+  else()
+    cuda_archs_loose_intersection(MLA_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}")
+  endif()
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND MLA_ARCHS)
+    set(SRCS
+      "csrc/libtorch_stable/attention/mla/sm100_cutlass_mla_kernel.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${SRCS}"
+      CUDA_ARCHS "${MLA_ARCHS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MLA=1")
+    # Add MLA-specific include directories only to MLA source files
+    set_source_files_properties(${SRCS}
+      PROPERTIES INCLUDE_DIRECTORIES "${CUTLASS_DIR}/examples/77_blackwell_fmha;${CUTLASS_DIR}/examples/common")
+    message(STATUS "Building CUTLASS MLA for archs: ${MLA_ARCHS}")
+  else()
+    message(STATUS "Not building CUTLASS MLA as no compatible archs were found.")
+    # clear MLA_ARCHS
+    set(MLA_ARCHS)
+  endif()
+
   # Hadacore kernels
   cuda_archs_loose_intersection(HADACORE_ARCHS "8.0+PTX;9.0+PTX" "${CUDA_ARCHS}")
   if(HADACORE_ARCHS)
-    set(SRCS "csrc/quantization/hadamard/hadacore/hadamard_transform_cuda.cu")
+    set(SRCS "csrc/libtorch_stable/quantization/hadamard/hadacore/hadamard_transform_cuda.cu")
     set_gencode_flags_for_srcs(
       SRCS "${SRCS}"
       CUDA_ARCHS "${HADACORE_ARCHS}")
-    list(APPEND VLLM_EXT_SRC "${SRCS}")
+    list(APPEND VLLM_STABLE_EXT_SRC "${SRCS}")
     message(STATUS "Building hadacore")
   endif()
 
-# if CUDA endif
-endif()
-
-if (VLLM_GPU_LANG STREQUAL "HIP")
-  # Add QuickReduce kernels
-  list(APPEND VLLM_EXT_SRC
-    "csrc/custom_quickreduce.cu"
-  )
-# if ROCM endif
-endif()
-
-message(STATUS "Enabling C extension.")
-define_extension_target(
-  _C
-  DESTINATION vllm
-  LANGUAGE ${VLLM_GPU_LANG}
-  SOURCES ${VLLM_EXT_SRC}
-  COMPILE_FLAGS ${VLLM_GPU_FLAGS}
-  ARCHITECTURES ${VLLM_GPU_ARCHES}
-  INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
-  INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
-  USE_SABI 3
-  WITH_SOABI)
-
-# If CUTLASS is compiled on NVCC >= 12.5, it by default uses
-# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the
-# driver API. This causes problems when linking with earlier versions of CUDA.
-# Setting this variable sidesteps the issue by calling the driver directly.
-target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
-
-# add OR VLLM_GPU_LANG STREQUAL "HIP" here once
-# https://github.com/vllm-project/vllm/issues/35163 is resolved
-if(VLLM_GPU_LANG STREQUAL "CUDA")
-  #
-  # _C_stable_libtorch extension (ops registered via STABLE_TORCH_LIBRARY)
-  #
-  set(VLLM_STABLE_EXT_SRC
-    "csrc/libtorch_stable/torch_bindings.cpp")
-
-  if(VLLM_GPU_LANG STREQUAL "CUDA")
-    list(APPEND VLLM_STABLE_EXT_SRC
-      "csrc/libtorch_stable/permute_cols.cu"
-      "csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu"
-      "csrc/libtorch_stable/quantization/w8a8/int8/per_token_group_quant.cu")
-  endif()
-
-  if(VLLM_GPU_LANG STREQUAL "CUDA")
-    set_gencode_flags_for_srcs(
-      SRCS "${VLLM_STABLE_EXT_SRC}"
-      CUDA_ARCHS "${CUDA_ARCHS}")
+  # if CUDA endif
   endif()
 
   message(STATUS "Enabling C_stable extension.")
@@ -987,6 +1040,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
     SOURCES ${VLLM_STABLE_EXT_SRC}
     COMPILE_FLAGS ${VLLM_GPU_FLAGS}
     ARCHITECTURES ${VLLM_GPU_ARCHES}
+    INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
     USE_SABI 3
     WITH_SOABI)
 
@@ -997,9 +1051,34 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   target_compile_definitions(_C_stable_libtorch PRIVATE
     TORCH_TARGET_VERSION=0x020A000000000000ULL)
 
-  # Needed to use cuda APIs from C-shim
-  target_compile_definitions(_C_stable_libtorch PRIVATE
-    USE_CUDA)
+  # Needed to use cuda/hip APIs from C-shim
+  if(VLLM_GPU_LANG STREQUAL "CUDA")
+    target_compile_definitions(_C_stable_libtorch PRIVATE USE_CUDA)
+    # Needed by CUTLASS kernels
+    target_compile_definitions(_C_stable_libtorch PRIVATE
+      CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
+  elseif(VLLM_GPU_LANG STREQUAL "HIP")
+    target_compile_definitions(_C_stable_libtorch PRIVATE USE_ROCM)
+  endif()
+
+  # On ROCm, _C_stable_libtorch calls raw HIP APIs (e.g. hipGetDevice in
+  # get_device_prop()) which must resolve to the same libamdhip64.so that
+  # PyTorch uses.  When PyTorch bundles its own copy (pip/conda wheels),
+  # the raw HIP calls would otherwise resolve to the system ROCm copy,
+  # initializing a second HIP runtime that corrupts device state (wrong
+  # device on DeviceGuard, core dumps on multi-GPU tests).
+  #
+  # If PyTorch doesn't bundle libamdhip64 (built from source against system
+  # ROCm), there is only one copy in the process and no action is needed —
+  # the HIP compiler already links the system libamdhip64 automatically.
+  if(VLLM_GPU_LANG STREQUAL "HIP")
+    find_library(_STABLE_TORCH_AMDHIP64 amdhip64
+      PATHS "${TORCH_INSTALL_PREFIX}/lib" NO_DEFAULT_PATH)
+    if(_STABLE_TORCH_AMDHIP64)
+      message(STATUS "Found PyTorch-bundled libamdhip64 at ${_STABLE_TORCH_AMDHIP64}")
+      target_link_libraries(_C_stable_libtorch PRIVATE ${_STABLE_TORCH_AMDHIP64})
+    endif()
+  endif()
 endif()
 
 #
@@ -1009,14 +1088,13 @@ endif()
 set(VLLM_MOE_EXT_SRC
   "csrc/moe/torch_bindings.cpp"
   "csrc/moe/moe_align_sum_kernels.cu"
-  "csrc/moe/topk_softmax_kernels.cu")
+  "csrc/moe/topk_softmax_kernels.cu"
+  "csrc/moe/topk_softplus_sqrt_kernels.cu")
 
 if(VLLM_GPU_LANG STREQUAL "CUDA")
   list(APPEND VLLM_MOE_EXT_SRC
     "csrc/moe/moe_wna16.cu"
-    "csrc/moe/grouped_topk_kernels.cu"
-    "csrc/moe/gpt_oss_router_gemm.cu"
-    "csrc/moe/router_gemm.cu")
+    "csrc/moe/grouped_topk_kernels.cu")
 endif()
 
 if(VLLM_GPU_LANG STREQUAL "CUDA")
@@ -1146,24 +1224,39 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
                    " in CUDA target architectures")
   endif()
 
-  # DeepSeek V3 router GEMM kernel - requires SM90+
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
-    cuda_archs_loose_intersection(DSV3_ROUTER_GEMM_ARCHS "9.0a;10.0f;11.0f" "${CUDA_ARCHS}")
-  else()
-    cuda_archs_loose_intersection(DSV3_ROUTER_GEMM_ARCHS "9.0a;10.0a;10.1a;10.3a" "${CUDA_ARCHS}")
-  endif()
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND DSV3_ROUTER_GEMM_ARCHS)
+  # FP32 router GEMM (H=3072, E=256, M<=32), DeepSeek V3 router GEMM,
+  # and DeepSeek V4 norm+router GEMV all require SM90+ and CUDA >= 12.0.
+  cuda_archs_sm90plus(SM90PLUS_ROUTER_GEMM_ARCHS "${CUDA_ARCHS}")
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND SM90PLUS_ROUTER_GEMM_ARCHS)
+    set(FP32_ROUTER_GEMM_SRC
+      "csrc/moe/fp32_router_gemm_entry.cu"
+      "csrc/moe/fp32_router_gemm.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${FP32_ROUTER_GEMM_SRC}"
+      CUDA_ARCHS "${SM90PLUS_ROUTER_GEMM_ARCHS}")
+    list(APPEND VLLM_MOE_EXT_SRC "${FP32_ROUTER_GEMM_SRC}")
+
     set(DSV3_ROUTER_GEMM_SRC
       "csrc/moe/dsv3_router_gemm_entry.cu"
       "csrc/moe/dsv3_router_gemm_float_out.cu"
       "csrc/moe/dsv3_router_gemm_bf16_out.cu")
     set_gencode_flags_for_srcs(
       SRCS "${DSV3_ROUTER_GEMM_SRC}"
-      CUDA_ARCHS "${DSV3_ROUTER_GEMM_ARCHS}")
+      CUDA_ARCHS "${SM90PLUS_ROUTER_GEMM_ARCHS}")
     list(APPEND VLLM_MOE_EXT_SRC "${DSV3_ROUTER_GEMM_SRC}")
-    message(STATUS "Building DSV3 router GEMM kernel for archs: ${DSV3_ROUTER_GEMM_ARCHS}")
+
+    # DeepSeek V4 fused RMSNorm + router GEMV - same arch gating as DSV3.
+    set(DSV4_NORM_ROUTER_GEMM_SRC
+      "csrc/moe/dsv4_norm_router_gemm_entry.cu"
+      "csrc/moe/dsv4_norm_router_gemm_kernel.cu")
+    set_gencode_flags_for_srcs(
+      SRCS "${DSV4_NORM_ROUTER_GEMM_SRC}"
+      CUDA_ARCHS "${SM90PLUS_ROUTER_GEMM_ARCHS}")
+    list(APPEND VLLM_MOE_EXT_SRC "${DSV4_NORM_ROUTER_GEMM_SRC}")
+
+    message(STATUS "Building FP32/DSV3/DSV4 router GEMM kernels for archs: ${SM90PLUS_ROUTER_GEMM_ARCHS}")
   else()
-    message(STATUS "Not building DSV3 router GEMM kernel as no compatible archs found"
+    message(STATUS "Not building FP32/DSV3/DSV4 router GEMM kernels as no compatible archs found"
                    " (requires SM90+ and CUDA >= 12.0)")
   endif()
 endif()
@@ -1201,6 +1294,12 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
     WITH_SOABI)
 endif()
 
+# Must run after the last HIP `define_extension_target` so every extension
+# has registered its sources.
+if (VLLM_GPU_LANG STREQUAL "HIP")
+  vllm_finalize_hipify_target()
+endif()
+
 # For CUDA and HIP builds also build the triton_kernels external package.
 if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
     include(cmake/external_projects/triton_kernels.cmake)
@@ -1208,6 +1307,7 @@ endif()
 
 # For CUDA we also build and ship some external projects.
 if (VLLM_GPU_LANG STREQUAL "CUDA")
+    include(cmake/external_projects/deepgemm.cmake)
     include(cmake/external_projects/flashmla.cmake)
     include(cmake/external_projects/qutlass.cmake)
 
diff --git a/README.md b/README.md
index 705fbcb9150b..42777436c63d 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ Easy, fast, and cheap LLM serving for everyone
 | <a href="https://docs.vllm.ai"><b>Documentation</b></a> | <a href="https://blog.vllm.ai/"><b>Blog</b></a> | <a href="https://arxiv.org/abs/2309.06180"><b>Paper</b></a> | <a href="https://x.com/vllm_project"><b>Twitter/X</b></a> | <a href="https://discuss.vllm.ai"><b>User Forum</b></a> | <a href="https://slack.vllm.ai"><b>Developer Slack</b></a> |
 </p>
 
-🔥 We have built a vllm website to help you get started with vllm. Please visit [vllm.ai](https://vllm.ai) to learn more.
+🔥 We have built a vLLM website to help you get started with vLLM. Please visit [vllm.ai](https://vllm.ai) to learn more.
 For events, please visit [vllm.ai/events](https://vllm.ai/events) to join us.
 
 ---
@@ -23,47 +23,54 @@ For events, please visit [vllm.ai/events](https://vllm.ai/events) to join us.
 
 vLLM is a fast and easy-to-use library for LLM inference and serving.
 
-Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
+Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has grown into one of the most active open-source AI projects built and maintained by a diverse community of many dozens of academic institutions and companies from over 2000 contributors.
 
 vLLM is fast with:
 
 - State-of-the-art serving throughput
 - Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html)
-- Continuous batching of incoming requests
-- Fast model execution with CUDA/HIP graph
-- Quantizations: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), [AutoRound](https://arxiv.org/abs/2309.05516), INT4, INT8, and FP8
-- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer
-- Speculative decoding
-- Chunked prefill
+- Continuous batching of incoming requests, chunked prefill, prefix caching
+- Fast and flexible model execution with piecewise and full CUDA/HIP graphs
+- Quantization: FP8, MXFP8/MXFP4, NVFP4, INT8, INT4, GPTQ/AWQ, GGUF, compressed-tensors, ModelOpt, TorchAO, and [more](https://docs.vllm.ai/en/latest/features/quantization/index.html)
+- Optimized attention kernels including FlashAttention, FlashInfer, TRTLLM-GEN, FlashMLA, and Triton
+- Optimized GEMM/MoE kernels for various precisions using CUTLASS, TRTLLM-GEN, CuTeDSL
+- Speculative decoding including n-gram, suffix, EAGLE, DFlash
+- Automatic kernel generation and graph-level transformations using torch.compile
+- Disaggregated prefill, decode, and encode
 
 vLLM is flexible and easy to use with:
 
 - Seamless integration with popular Hugging Face models
 - High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more
-- Tensor, pipeline, data and expert parallelism support for distributed inference
+- Tensor, pipeline, data, expert, and context parallelism for distributed inference
 - Streaming outputs
-- OpenAI-compatible API server
-- Support for NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, Arm CPUs, and TPU. Additionally, support for diverse hardware plugins such as Intel Gaudi, IBM Spyre and Huawei Ascend.
-- Prefix caching support
-- Multi-LoRA support
+- Generation of structured outputs using xgrammar or guidance
+- Tool calling and reasoning parsers
+- OpenAI-compatible API server, plus Anthropic Messages API and gRPC support
+- Efficient multi-LoRA support for dense and MoE layers
+- Support for NVIDIA GPUs, AMD GPUs, and x86/ARM/PowerPC CPUs. Additionally, diverse hardware plugins such as Google TPUs, Intel Gaudi, IBM Spyre, Huawei Ascend, Rebellions NPU, Apple Silicon, MetaX GPU, and more.
 
-vLLM seamlessly supports most popular open-source models on HuggingFace, including:
+vLLM seamlessly supports 200+ model architectures on Hugging Face, including:
 
-- Transformer-like LLMs (e.g., Llama)
-- Mixture-of-Expert LLMs (e.g., Mixtral, Deepseek-V2 and V3)
-- Embedding Models (e.g., E5-Mistral)
-- Multi-modal LLMs (e.g., LLaVA)
+- Decoder-only LLMs (e.g., Llama, Qwen, Gemma)
+- Mixture-of-Expert LLMs (e.g., Mixtral, DeepSeek-V3, Qwen-MoE, GPT-OSS)
+- Hybrid attention and state-space models (e.g., Mamba, Qwen3.5)
+- Multi-modal models (e.g., LLaVA, Qwen-VL, Pixtral)
+- Embedding and retrieval models (e.g., E5-Mistral, GTE, ColBERT)
+- Reward and classification models (e.g., Qwen-Math)
 
 Find the full list of supported models [here](https://docs.vllm.ai/en/latest/models/supported_models.html).
 
 ## Getting Started
 
-Install vLLM with `pip` or [from source](https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html#build-wheel-from-source):
+Install vLLM with [`uv`](https://docs.astral.sh/uv/) (recommended) or `pip`:
 
 ```bash
-pip install vllm
+uv pip install vllm
 ```
 
+Or [build from source](https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html#build-wheel-from-source) for development.
+
 Visit our [documentation](https://docs.vllm.ai/en/latest/) to learn more.
 
 - [Installation](https://docs.vllm.ai/en/latest/getting_started/installation.html)
diff --git a/tests/entrypoints/openai/realtime/__init__.py b/benchmarks/__init__.py
similarity index 100%
rename from tests/entrypoints/openai/realtime/__init__.py
rename to benchmarks/__init__.py
diff --git a/benchmarks/attention_benchmarks/configs/mla_decode.yaml b/benchmarks/attention_benchmarks/configs/mla_decode.yaml
index d758654dbe80..8f12ac723064 100644
--- a/benchmarks/attention_benchmarks/configs/mla_decode.yaml
+++ b/benchmarks/attention_benchmarks/configs/mla_decode.yaml
@@ -53,6 +53,7 @@ backends:
   - FLASHINFER_MLA
   - FLASH_ATTN_MLA  # Hopper only
   - FLASHMLA        # Hopper only
+  - TOKENSPEED_MLA  # Blackwell + R1 dims + FP8 KV (use --kv-cache-dtype fp8)
 
 device: "cuda:0"
 repeats: 100
diff --git a/benchmarks/attention_benchmarks/configs/mla_prefill.yaml b/benchmarks/attention_benchmarks/configs/mla_prefill.yaml
index 122dbd783c5b..1e1ab264bace 100644
--- a/benchmarks/attention_benchmarks/configs/mla_prefill.yaml
+++ b/benchmarks/attention_benchmarks/configs/mla_prefill.yaml
@@ -3,6 +3,7 @@
 # Compares all available MLA prefill backends:
 #   FA backends:  fa2, fa3, fa4 (FlashAttention versions)
 #   Non-FA:       flashinfer, cudnn, trtllm (Blackwell-only, require flashinfer)
+#   CuTe DSL:     tokenspeed (Blackwell + R1 dims, requires tokenspeed_mla)
 #
 # Uses cutlass_mla as the decode backend for impl construction
 # (only the prefill path is exercised).
@@ -120,6 +121,7 @@ prefill_backends:
   - flashinfer
   - cudnn
   - trtllm
+  - tokenspeed
 
 device: "cuda:0"
 repeats: 20
diff --git a/benchmarks/attention_benchmarks/mla_runner.py b/benchmarks/attention_benchmarks/mla_runner.py
index f8bc7b4a10ed..abab1e2edbac 100644
--- a/benchmarks/attention_benchmarks/mla_runner.py
+++ b/benchmarks/attention_benchmarks/mla_runner.py
@@ -29,6 +29,7 @@
     VllmConfig,
     set_current_vllm_config,
 )
+from vllm.v1.attention.backends.mla.prefill.registry import MLAPrefillBackendEnum
 
 # ============================================================================
 # VllmConfig Creation
@@ -79,8 +80,8 @@ def create_minimal_vllm_config(
         index_topk: Optional topk value for sparse MLA backends. If provided,
                     the config will include index_topk for sparse attention.
         prefill_backend: Prefill backend name (e.g., "fa3", "fa4", "flashinfer",
-                        "cudnn", "trtllm"). Configures the attention config to
-                        force the specified prefill backend.
+                        "trtllm"). Configures the attention config to force
+                        the specified prefill backend.
 
     Returns:
         VllmConfig for benchmarking
@@ -179,19 +180,13 @@ def create_minimal_vllm_config(
 
     if prefill_backend is not None:
         prefill_cfg = get_prefill_backend_config(prefill_backend)
+        vllm_config.attention_config.mla_prefill_backend = prefill_cfg[
+            "mla_prefill_backend"
+        ]
         if prefill_cfg["flash_attn_version"] is not None:
             vllm_config.attention_config.flash_attn_version = prefill_cfg[
                 "flash_attn_version"
             ]
-        vllm_config.attention_config.disable_flashinfer_prefill = prefill_cfg[
-            "disable_flashinfer_prefill"
-        ]
-        vllm_config.attention_config.use_cudnn_prefill = prefill_cfg[
-            "use_cudnn_prefill"
-        ]
-        vllm_config.attention_config.use_trtllm_ragged_deepseek_prefill = prefill_cfg[
-            "use_trtllm_ragged_deepseek_prefill"
-        ]
 
     return vllm_config
 
@@ -206,39 +201,27 @@ def create_minimal_vllm_config(
 _PREFILL_BACKEND_CONFIG: dict[str, dict] = {
     "fa2": {
         "flash_attn_version": 2,
-        "disable_flashinfer_prefill": True,
-        "use_cudnn_prefill": False,
-        "use_trtllm_ragged_deepseek_prefill": False,
+        "mla_prefill_backend": MLAPrefillBackendEnum.FLASH_ATTN,
     },
     "fa3": {
         "flash_attn_version": 3,
-        "disable_flashinfer_prefill": True,
-        "use_cudnn_prefill": False,
-        "use_trtllm_ragged_deepseek_prefill": False,
+        "mla_prefill_backend": MLAPrefillBackendEnum.FLASH_ATTN,
     },
     "fa4": {
         "flash_attn_version": 4,
-        "disable_flashinfer_prefill": True,
-        "use_cudnn_prefill": False,
-        "use_trtllm_ragged_deepseek_prefill": False,
+        "mla_prefill_backend": MLAPrefillBackendEnum.FLASH_ATTN,
     },
     "flashinfer": {
         "flash_attn_version": None,
-        "disable_flashinfer_prefill": False,
-        "use_cudnn_prefill": False,
-        "use_trtllm_ragged_deepseek_prefill": False,
+        "mla_prefill_backend": MLAPrefillBackendEnum.FLASHINFER,
     },
-    "cudnn": {
+    "trtllm": {
         "flash_attn_version": None,
-        "disable_flashinfer_prefill": True,
-        "use_cudnn_prefill": True,
-        "use_trtllm_ragged_deepseek_prefill": False,
+        "mla_prefill_backend": MLAPrefillBackendEnum.TRTLLM_RAGGED,
     },
-    "trtllm": {
+    "tokenspeed": {
         "flash_attn_version": None,
-        "disable_flashinfer_prefill": True,
-        "use_cudnn_prefill": False,
-        "use_trtllm_ragged_deepseek_prefill": True,
+        "mla_prefill_backend": MLAPrefillBackendEnum.TOKENSPEED_MLA,
     },
 }
 
@@ -404,6 +387,7 @@ def _build_attention_metadata(
         query_start_loc=q_start_gpu,
         query_start_loc_cpu=q_start_cpu,
         seq_lens=seq_lens_gpu,
+        seq_lens_cpu_upper_bound=seq_lens_cpu,
         _seq_lens_cpu=seq_lens_cpu,
         _num_computed_tokens_cpu=num_computed_tokens_cpu,
         slot_mapping=slot_mapping,
@@ -624,6 +608,21 @@ def _create_backend_impl(
     # Create mock layer
     layer = MockLayer(device, impl=impl, kv_cache_spec=kv_cache_spec)
 
+    # Attach a prefill backend (MLAAttention does this in __init__; the metadata
+    # builder reads layer.prefill_backend from static_forward_context).
+    from vllm.v1.attention.backends.mla.prefill import get_mla_prefill_backend
+
+    prefill_backend_cls = get_mla_prefill_backend(vllm_config)
+    layer.prefill_backend = prefill_backend_cls(
+        num_heads=mla_dims["num_q_heads"],
+        scale=(mla_dims["qk_nope_head_dim"] + mla_dims["qk_rope_head_dim"]) ** -0.5,
+        kv_lora_rank=mla_dims["kv_lora_rank"],
+        qk_nope_head_dim=mla_dims["qk_nope_head_dim"],
+        qk_rope_head_dim=mla_dims["qk_rope_head_dim"],
+        v_head_dim=mla_dims["v_head_dim"],
+        vllm_config=vllm_config,
+    )
+
     # Create builder instance if needed
     builder_instance = None
     if builder_class:
@@ -960,19 +959,6 @@ def _run_mla_benchmark_batched(
     results = []
 
     with set_current_vllm_config(vllm_config):
-        # Clear cached prefill backend detection functions so they re-evaluate
-        # with the current VllmConfig. These are @functools.cache decorated and
-        # would otherwise return stale results from a previous backend's config.
-        from vllm.model_executor.layers.attention.mla_attention import (
-            use_cudnn_prefill,
-            use_flashinfer_prefill,
-            use_trtllm_ragged_deepseek_prefill,
-        )
-
-        use_flashinfer_prefill.cache_clear()
-        use_cudnn_prefill.cache_clear()
-        use_trtllm_ragged_deepseek_prefill.cache_clear()
-
         # Create backend impl, layer, builder, and indexer (reused across benchmarks)
         impl, layer, builder_instance, indexer = _create_backend_impl(
             backend_cfg,
@@ -984,38 +970,36 @@ def _run_mla_benchmark_batched(
             kv_cache_dtype=kv_cache_dtype,
         )
 
-        # Verify the actual prefill backend matches what was requested
+        # Verify the actual prefill backend matches what was requested. The
+        # selector + impl construction already raise on misuse; here we just
+        # check the resolved class against the requested name as a sanity guard.
         if prefill_backend is not None:
-            prefill_cfg = get_prefill_backend_config(prefill_backend)
-            fa_version = prefill_cfg["flash_attn_version"]
-
-            if fa_version is not None:
-                # FA backend: verify the impl's FA version
-                actual_fa_version = getattr(impl, "vllm_flash_attn_version", None)
+            expected_class = {
+                "fa2": "FlashAttnPrefillBackend",
+                "fa3": "FlashAttnPrefillBackend",
+                "fa4": "FlashAttnPrefillBackend",
+                "flashinfer": "FlashInferPrefillBackend",
+                "trtllm": "TrtllmRaggedPrefillBackend",
+                "tokenspeed": "TokenspeedMLAPrefillBackend",
+            }.get(prefill_backend)
+            actual_class = type(getattr(layer, "prefill_backend", None)).__name__
+            if expected_class and actual_class != expected_class:
+                raise RuntimeError(
+                    f"Prefill backend '{prefill_backend}' requested "
+                    f"{expected_class}, got {actual_class}. Check "
+                    f"attention_config plumbing or installed deps."
+                )
+            if prefill_backend in {"fa2", "fa3", "fa4"}:
+                fa_version = int(prefill_backend[2:])
+                actual_fa_version = getattr(
+                    layer.prefill_backend, "vllm_flash_attn_version", None
+                )
                 if actual_fa_version != fa_version:
                     raise RuntimeError(
                         f"Prefill backend '{prefill_backend}' requested FA "
-                        f"version {fa_version}, but the impl is using FA "
-                        f"version {actual_fa_version}. Check "
-                        f"vllm/v1/attention/backends/fa_utils.py."
+                        f"version {fa_version}, got "
+                        f"{actual_fa_version} on {actual_class}."
                     )
-            else:
-                # Non-FA backend: verify the builder picked the right path
-                expected_flags = {
-                    "flashinfer": "_use_fi_prefill",
-                    "cudnn": "_use_cudnn_prefill",
-                    "trtllm": "_use_trtllm_ragged_prefill",
-                }
-                flag_name = expected_flags.get(prefill_backend)
-                if flag_name and not getattr(builder_instance, flag_name, False):
-                    raise RuntimeError(
-                        f"Prefill backend '{prefill_backend}' was requested "
-                        f"but the metadata builder did not enable it. This "
-                        f"usually means a dependency is missing (e.g., "
-                        f"flashinfer not installed) or the platform doesn't "
-                        f"support it."
-                    )
-
         # Run each benchmark with the shared impl
         for config, threshold, num_splits in configs_with_params:
             # Set threshold for this benchmark (FlashAttn/FlashMLA only)
diff --git a/benchmarks/benchmark_hidden_state_extraction.py b/benchmarks/benchmark_hidden_state_extraction.py
new file mode 100644
index 000000000000..6056fcdd072c
--- /dev/null
+++ b/benchmarks/benchmark_hidden_state_extraction.py
@@ -0,0 +1,415 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Benchmark hidden state extraction throughput.
+
+Measures two modes:
+  1. Baseline: bulk inference with max_tokens=1, no extraction.
+  2. Extract:  async hidden state extraction via ExampleHiddenStatesConnector
+               with N concurrent clients, each consuming hidden states as
+               soon as their request finishes (overlapping I/O with generation).
+
+Reports tokens/s and prompts/s for each mode.
+
+Usage:
+  python benchmarks/benchmark_hidden_state_extraction.py \
+      --model Qwen/Qwen3-0.6B \
+      --num-prompts 64 \
+      --num-clients 8 \
+      --prompt-len 8192 \
+      --layers 1 2 3 4
+"""
+
+import argparse
+import asyncio
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import torch
+from transformers import AutoConfig
+
+from vllm import LLM, SamplingParams
+from vllm.config.kv_transfer import KVTransferConfig
+from vllm.distributed.kv_transfer.kv_connector.v1 import (
+    example_hidden_states_connector,
+)
+from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.sampling_params import RequestOutputKind
+from vllm.v1.engine.async_llm import AsyncLLM
+
+
+def _make_profiler_config(profile_dir: str) -> dict:
+    """Build a profiler_config dict for torch profiling."""
+    return {
+        "profiler": "torch",
+        "torch_profiler_dir": profile_dir,
+        "torch_profiler_with_stack": True,
+    }
+
+
+def make_random_prompts(
+    num_prompts: int, prompt_len: int, vocab_size: int, seed: int = 42
+) -> list[list[int]]:
+    """Generate lists of random token IDs."""
+    # Set seed for reproducibility
+    torch.manual_seed(seed)
+    return [
+        torch.randint(0, vocab_size, (prompt_len,)).tolist() for _ in range(num_prompts)
+    ]
+
+
+def consume_hidden_states(path: str) -> float:
+    """Load hidden states from disk and compute per-position mean.
+
+    Returns a single float: the grand mean of all hidden state values.
+    This forces the benchmark to actually read and reduce the data.
+
+    Uses :func:`load_hidden_states` which acquires a shared flock,
+    blocking (without polling) until the async writer releases its
+    exclusive lock.
+    """
+    obj = example_hidden_states_connector.load_hidden_states(path)
+    hs = obj["hidden_states"]
+    total = hs.mean().item()
+
+    example_hidden_states_connector.cleanup_hidden_states(path)
+
+    return total
+
+
+def run_baseline(
+    model: str,
+    prompts: list[list[int]],
+    extra_args: dict,
+    profile_dir: str | None = None,
+) -> dict:
+    """Baseline: bulk inference, no hidden state extraction."""
+    if profile_dir:
+        extra_args = {
+            **extra_args,
+            "profiler_config": _make_profiler_config(profile_dir),
+        }
+    llm = LLM(
+        model=model,
+        enable_prefix_caching=False,
+        enable_chunked_prefill=False,
+        **extra_args,
+    )
+    sampling_params = SamplingParams(max_tokens=1)
+    prompt_inputs = [{"prompt_token_ids": p} for p in prompts]
+
+    # Warmup
+    llm.generate(prompt_inputs[:4], sampling_params, use_tqdm=False)
+
+    if profile_dir:
+        llm.start_profile()
+
+    t0 = time.perf_counter()
+    outputs = llm.generate(prompt_inputs, sampling_params, use_tqdm=True)
+    elapsed = time.perf_counter() - t0
+
+    if profile_dir:
+        llm.stop_profile()
+
+    total_prompt_tokens = sum(len(o.prompt_token_ids) for o in outputs)
+    num_prompts = len(outputs)
+
+    del llm
+    torch.accelerator.empty_cache()
+
+    return {
+        "mode": "baseline",
+        "elapsed_s": elapsed,
+        "num_prompts": num_prompts,
+        "total_prompt_tokens": total_prompt_tokens,
+        "tokens_per_s": total_prompt_tokens / elapsed,
+        "prompts_per_s": num_prompts / elapsed,
+    }
+
+
+# ---- Async extraction benchmark ----
+
+
+async def _client_loop(
+    engine: AsyncLLM,
+    prompt_queue: asyncio.Queue,
+    consume_pool: ThreadPoolExecutor,
+    results: list[dict],
+    client_id: int,
+):
+    """A single async client: pulls prompts, submits to engine, consumes
+    hidden states as soon as each request finishes."""
+    loop = asyncio.get_event_loop()
+    while True:
+        item = await prompt_queue.get()
+        if item is None:
+            prompt_queue.task_done()
+            break
+        idx, token_ids = item
+
+        request_id = f"req-{idx}"
+        sampling_params = SamplingParams(
+            max_tokens=1,
+            output_kind=RequestOutputKind.FINAL_ONLY,
+        )
+
+        final_output = None
+        async for output in engine.generate(
+            request_id=request_id,
+            prompt={"prompt_token_ids": token_ids},
+            sampling_params=sampling_params,
+        ):
+            if output.finished:
+                final_output = output
+
+        # Consume hidden states on a thread (disk I/O)
+        path = final_output.kv_transfer_params["hidden_states_path"]
+        mean_val = await loop.run_in_executor(consume_pool, consume_hidden_states, path)
+        num_tokens = len(final_output.prompt_token_ids)
+
+        results.append(
+            {
+                "request_id": request_id,
+                "num_prompt_tokens": num_tokens,
+                "mean_hidden_value": mean_val,
+            }
+        )
+        prompt_queue.task_done()
+
+
+async def _run_extraction_async(
+    model: str,
+    prompts: list[list[int]],
+    num_clients: int,
+    layers: list[int],
+    tmpdir: str,
+    extra_args: dict,
+    profile_dir: str | None = None,
+) -> dict:
+    if profile_dir:
+        extra_args = {
+            **extra_args,
+            "profiler_config": _make_profiler_config(profile_dir),
+        }
+    engine_args = AsyncEngineArgs(
+        model=model,
+        enable_prefix_caching=False,
+        enable_chunked_prefill=False,
+        max_num_batched_tokens=40960,
+        max_model_len=40960,
+        speculative_config={
+            "method": "extract_hidden_states",
+            "num_speculative_tokens": 1,
+            "draft_model_config": {
+                "hf_config": {
+                    "eagle_aux_hidden_state_layer_ids": layers,
+                },
+            },
+        },
+        kv_transfer_config=KVTransferConfig(
+            kv_connector="ExampleHiddenStatesConnector",
+            kv_role="kv_producer",
+            kv_connector_extra_config={
+                "shared_storage_path": tmpdir,
+            },
+        ),
+        **extra_args,
+    )
+    engine = AsyncLLM.from_engine_args(engine_args)
+
+    try:
+        # Warmup: run a few prompts sequentially, cleaning up generated files
+        for i in range(min(4, len(prompts))):
+            sp = SamplingParams(max_tokens=1, output_kind=RequestOutputKind.FINAL_ONLY)
+            final_output = None
+            async for output in engine.generate(
+                request_id=f"warmup-{i}",
+                prompt={"prompt_token_ids": prompts[i]},
+                sampling_params=sp,
+            ):
+                if output.finished:
+                    final_output = output
+            if final_output and final_output.kv_transfer_params:
+                path = final_output.kv_transfer_params.get("hidden_states_path")
+                if path:
+                    example_hidden_states_connector.cleanup_hidden_states(path)
+
+        if profile_dir:
+            await engine.start_profile()
+
+        # Fill prompt queue
+        prompt_queue: asyncio.Queue = asyncio.Queue()
+        for idx, token_ids in enumerate(prompts):
+            prompt_queue.put_nowait((idx, token_ids))
+        # Sentinel per client
+        for _ in range(num_clients):
+            prompt_queue.put_nowait(None)
+
+        results: list[dict] = []
+        consume_pool = ThreadPoolExecutor(max_workers=num_clients)
+
+        t0 = time.perf_counter()
+        tasks = [
+            asyncio.create_task(
+                _client_loop(engine, prompt_queue, consume_pool, results, i)
+            )
+            for i in range(num_clients)
+        ]
+        await asyncio.gather(*tasks)
+        elapsed = time.perf_counter() - t0
+
+        consume_pool.shutdown(wait=True)
+
+        if profile_dir:
+            await engine.stop_profile()
+
+        total_prompt_tokens = sum(r["num_prompt_tokens"] for r in results)
+        num_prompts = len(results)
+        mean_hidden = sum(r["mean_hidden_value"] for r in results) / max(
+            len(results), 1
+        )
+
+        return {
+            "mode": "extract",
+            "elapsed_s": elapsed,
+            "num_prompts": num_prompts,
+            "total_prompt_tokens": total_prompt_tokens,
+            "tokens_per_s": total_prompt_tokens / elapsed,
+            "prompts_per_s": num_prompts / elapsed,
+            "mean_hidden_value": mean_hidden,
+        }
+    finally:
+        engine.shutdown()
+
+
+def run_extraction(
+    model: str,
+    prompts: list[list[int]],
+    num_clients: int,
+    layers: list[int],
+    extra_args: dict,
+    profile_dir: str | None = None,
+) -> dict:
+    return asyncio.run(
+        _run_extraction_async(
+            model,
+            prompts,
+            num_clients,
+            layers,
+            "/dev/shm",
+            extra_args,
+            profile_dir=profile_dir,
+        )
+    )
+
+
+def print_results(results: dict):
+    mode = results["mode"]
+    print(f"\n{'=' * 60}")
+    print(f"  {mode.upper()} RESULTS")
+    print(f"{'=' * 60}")
+    print(f"  Prompts:             {results['num_prompts']}")
+    print(f"  Total prompt tokens: {results['total_prompt_tokens']:,}")
+    print(f"  Wall time:           {results['elapsed_s']:.2f}s")
+    print(f"  Tokens/s:            {results['tokens_per_s']:,.0f}")
+    print(f"  Prompts/s:           {results['prompts_per_s']:.2f}")
+    if mode == "extract":
+        print(f"  Mean hidden value:   {results['mean_hidden_value']:.6f}")
+    print(f"{'=' * 60}\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark hidden state extraction throughput"
+    )
+    parser.add_argument("--model", type=str, required=True)
+    parser.add_argument("--num-prompts", type=int, default=64)
+    parser.add_argument("--num-clients", type=int, default=8)
+    parser.add_argument("--prompt-len", type=int, default=8192)
+    parser.add_argument("--layers", type=int, nargs="+", default=[1, 2, 3, 4])
+    parser.add_argument("--skip-baseline", action="store_true")
+    parser.add_argument("--skip-extract", action="store_true")
+    parser.add_argument("--gpu-memory-utilization", type=float, default=0.9)
+    parser.add_argument("--max-num-batched-tokens", type=int, default=None)
+    parser.add_argument("--max-cudagraph-capture-size", type=int, default=None)
+    parser.add_argument("--max-model-len", type=int, default=None)
+    parser.add_argument("--enforce-eager", action="store_true")
+    parser.add_argument("--load-format", type=str, default=None)
+    parser.add_argument(
+        "--profile",
+        action="store_true",
+        help="Enable torch profiler for both baseline and extraction runs.",
+    )
+    parser.add_argument(
+        "--torch-profiler-dir",
+        type=str,
+        default="./vllm_profile",
+        help="Directory to save torch profiler traces (default: ./vllm_profile).",
+    )
+    parser.add_argument(
+        "--enable-flashinfer-autotune",
+        action="store_true",
+        default=False,
+        help="Enable FlashInfer autotuning (can be slow).",
+    )
+    args = parser.parse_args()
+
+    extra_args = {
+        "gpu_memory_utilization": args.gpu_memory_utilization,
+    }
+    if args.max_model_len is not None:
+        extra_args["max_model_len"] = args.max_model_len
+    if args.max_num_batched_tokens is not None:
+        extra_args["max_num_batched_tokens"] = args.max_num_batched_tokens
+        if args.max_model_len and args.max_num_batched_tokens < args.max_model_len:
+            raise ValueError(
+                "max_num_batched_tokens must be >= max_model_len since chunked prefill"
+                " is not supported by hidden state extraction."
+            )
+    if args.enforce_eager:
+        extra_args["enforce_eager"] = True
+    if args.load_format is not None:
+        extra_args["load_format"] = args.load_format
+    if args.max_cudagraph_capture_size is not None:
+        extra_args["max_cudagraph_capture_size"] = args.max_cudagraph_capture_size
+    extra_args["enable_flashinfer_autotune"] = args.enable_flashinfer_autotune
+
+    # Get vocab size from HF config without loading the full model
+    hf_config = AutoConfig.from_pretrained(args.model, trust_remote_code=True)
+    vocab_size = hf_config.vocab_size
+    prompts = make_random_prompts(args.num_prompts, args.prompt_len, vocab_size)
+    print(
+        f"Generated {args.num_prompts} prompts, "
+        f"{args.prompt_len} tokens each (vocab {vocab_size})"
+    )
+
+    profile_dir = args.torch_profiler_dir if args.profile else None
+    if profile_dir:
+        print(f"Torch profiler enabled, traces will be saved to {profile_dir}/")
+
+    if not args.skip_baseline:
+        baseline_profile_dir = f"{profile_dir}/baseline" if profile_dir else None
+        baseline = run_baseline(
+            args.model, prompts, extra_args, profile_dir=baseline_profile_dir
+        )
+        print_results(baseline)
+
+    if not args.skip_extract:
+        extract_profile_dir = f"{profile_dir}/extract" if profile_dir else None
+        extract = run_extraction(
+            args.model,
+            prompts,
+            args.num_clients,
+            args.layers,
+            extra_args,
+            profile_dir=extract_profile_dir,
+        )
+        print_results(extract)
+
+    if not args.skip_baseline and not args.skip_extract:
+        slowdown = baseline["tokens_per_s"] / extract["tokens_per_s"]
+        print("Extraction slowdown factor: {:.2f}x".format(slowdown))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index 33aca831883a..664fa58dd49f 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -115,6 +115,39 @@ class SampleRequest:
 def sample_requests(
     tokenizer: PreTrainedTokenizerBase, args: argparse.Namespace
 ) -> list[SampleRequest]:
+    def _apply_random_prefix(
+        tokenizer: PreTrainedTokenizerBase,
+        requests: list[SampleRequest],
+        prefix_len: int,
+        seed: int,
+    ) -> list[SampleRequest]:
+        if prefix_len <= 0:
+            return requests
+        rng = np.random.default_rng(seed)
+        vocab_size = tokenizer.vocab_size
+        prohibited = getattr(tokenizer, "all_special_ids", None) or []
+        allowed = np.array([i for i in range(vocab_size) if i not in prohibited])
+        if len(allowed) == 0:
+            return requests
+        prefix_ids = rng.integers(0, len(allowed), size=prefix_len)
+        prefix_token_ids = allowed[prefix_ids].tolist()
+        out = []
+        for req in requests:
+            prompt_ids = tokenizer(req.prompt, add_special_tokens=False).input_ids
+            full_ids = prefix_token_ids + prompt_ids
+            full_prompt = tokenizer.decode(full_ids, skip_special_tokens=False)
+            out.append(
+                SampleRequest(
+                    prompt=full_prompt,
+                    prompt_len=len(tokenizer(full_prompt).input_ids),
+                    expected_output_len=req.expected_output_len,
+                    schema=req.schema,
+                    structure_type=req.structure_type,
+                    completion=req.completion,
+                )
+            )
+        return out
+
     if args.dataset == "json" or args.dataset == "json-unique":
         if args.json_schema_path is None:
             dir_path = os.path.dirname(os.path.realpath(__file__))
@@ -261,6 +294,9 @@ def _filter_func(item):
                 )
             )
 
+    requests = _apply_random_prefix(
+        tokenizer, requests, args.random_prefix_len, args.seed
+    )
     return requests
 
 
@@ -945,6 +981,15 @@ def create_argument_parser():
         "results in a more uniform arrival of requests.",
     )
     parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument(
+        "--random-prefix-len",
+        type=int,
+        default=0,
+        help=(
+            "Number of prefix tokens to prepend to every prompt. "
+            "The same prefix is used for all prompts to enable prefix caching."
+        ),
+    )
     parser.add_argument(
         "--trust-remote-code",
         action="store_true",
diff --git a/benchmarks/fused_kernels/merge_attn_states_benchmarks.py b/benchmarks/fused_kernels/merge_attn_states_benchmarks.py
new file mode 100644
index 000000000000..26b04299b353
--- /dev/null
+++ b/benchmarks/fused_kernels/merge_attn_states_benchmarks.py
@@ -0,0 +1,264 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Benchmark: Fused FP8 output quantization in merge_attn_states
+
+Compares fused vs unfused approaches for producing FP8-quantized merged
+attention output:
+  1. Fused CUDA     -- single CUDA kernel (merge + FP8 quant)
+  2. Fused Triton   -- single Triton kernel (merge + FP8 quant)
+  3. Unfused CUDA   -- CUDA merge + torch.compiled FP8 quant
+  4. Unfused Triton  -- Triton merge + torch.compiled FP8 quant
+
+Usage:
+    python benchmarks/fused_kernels/merge_attn_states_benchmarks.py
+    python benchmarks/fused_kernels/merge_attn_states_benchmarks.py --tp 1 4 8
+    python benchmarks/fused_kernels/merge_attn_states_benchmarks.py --dtype bfloat16
+"""
+
+import argparse
+import itertools
+
+import torch
+
+from vllm._custom_ops import merge_attn_states as merge_attn_states_cuda
+from vllm.benchmarks.lib.utils import default_vllm_config
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
+from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
+from vllm.platforms import current_platform
+from vllm.triton_utils import triton
+from vllm.v1.attention.ops.triton_merge_attn_states import (
+    merge_attn_states as merge_attn_states_triton,
+)
+
+# ---------------------------------------------------------------------------
+# Configuration defaults
+# ---------------------------------------------------------------------------
+
+NUM_TOKENS_LIST = [1, 16, 64, 256, 1024, 4096]
+
+# (label, num_heads, head_size) — num_heads is for TP=1
+HEAD_CONFIGS = [
+    ("DeepSeek-V3 MLA", 128, 128),
+    ("Llama-70B", 64, 128),
+    ("Llama-8B", 32, 128),
+]
+
+TP_SIZES = [1, 2, 4, 8]
+
+INPUT_DTYPES = [torch.float32, torch.float16, torch.bfloat16]
+
+QUANTILES = [0.5, 0.2, 0.8]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def short_dtype(dtype: torch.dtype) -> str:
+    return str(dtype).removeprefix("torch.")
+
+
+def make_inputs(
+    num_tokens: int,
+    num_heads: int,
+    head_size: int,
+    dtype: torch.dtype,
+):
+    """Create random prefix/suffix outputs and LSEs."""
+    prefix_output = torch.randn(
+        (num_tokens, num_heads, head_size), dtype=dtype, device="cuda"
+    )
+    suffix_output = torch.randn(
+        (num_tokens, num_heads, head_size), dtype=dtype, device="cuda"
+    )
+    prefix_lse = torch.randn(num_heads, num_tokens, dtype=torch.float32, device="cuda")
+    suffix_lse = torch.randn(num_heads, num_tokens, dtype=torch.float32, device="cuda")
+    # Sprinkle some inf values to exercise edge-case paths
+    mask = torch.rand(num_heads, num_tokens, device="cuda") < 0.05
+    prefix_lse[mask] = float("inf")
+    mask2 = torch.rand(num_heads, num_tokens, device="cuda") < 0.05
+    suffix_lse[mask2] = float("inf")
+    return prefix_output, suffix_output, prefix_lse, suffix_lse
+
+
+def build_configs(head_configs, num_tokens_list, input_dtypes, tp_sizes):
+    """Build (num_tokens, num_heads, head_size, dtype_str) config tuples,
+    applying TP division to num_heads and skipping invalid combos."""
+    configs = []
+    for (_, nh, hs), nt, dtype, tp in itertools.product(
+        head_configs, num_tokens_list, input_dtypes, tp_sizes
+    ):
+        nh_tp = nh // tp
+        if nh_tp >= 1:
+            configs.append((nt, nh_tp, hs, short_dtype(dtype)))
+    return configs
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Benchmark merge_attn_states fused FP8 quantization"
+    )
+    parser.add_argument(
+        "--num-tokens",
+        type=int,
+        nargs="+",
+        default=None,
+        help=f"Override token counts (default: {NUM_TOKENS_LIST})",
+    )
+    parser.add_argument(
+        "--tp",
+        type=int,
+        nargs="+",
+        default=None,
+        help=f"TP sizes to simulate (divides num_heads) (default: {TP_SIZES})",
+    )
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        nargs="+",
+        default=None,
+        help="Input dtypes (e.g. bfloat16 float16 float32). "
+        f"Default: {[short_dtype(d) for d in INPUT_DTYPES]}",
+    )
+    return parser.parse_args()
+
+
+# ---------------------------------------------------------------------------
+# Parse args and build configs before decorators
+# ---------------------------------------------------------------------------
+
+args = parse_args()
+
+num_tokens_list = args.num_tokens if args.num_tokens else NUM_TOKENS_LIST
+tp_sizes = args.tp if args.tp else TP_SIZES
+
+if args.dtype:
+    from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
+
+    input_dtypes = [STR_DTYPE_TO_TORCH_DTYPE[d] for d in args.dtype]
+else:
+    input_dtypes = INPUT_DTYPES
+
+configs = build_configs(HEAD_CONFIGS, num_tokens_list, input_dtypes, tp_sizes)
+
+torch._dynamo.config.recompile_limit = 8888
+
+
+# ---------------------------------------------------------------------------
+# Benchmark function
+# ---------------------------------------------------------------------------
+
+
+@triton.testing.perf_report(
+    triton.testing.Benchmark(
+        x_names=["num_tokens", "num_heads", "head_size", "dtype_str"],
+        x_vals=configs,
+        line_arg="provider",
+        line_vals=["fused_cuda", "fused_triton", "unfused_cuda", "unfused_triton"],
+        line_names=["Fused CUDA", "Fused Triton", "Unfused CUDA", "Unfused Triton"],
+        styles=[("blue", "-"), ("green", "-"), ("blue", "--"), ("green", "--")],
+        ylabel="us",
+        plot_name="merge_attn_states FP8 (fused vs unfused)",
+        args={},
+    )
+)
+@default_vllm_config()
+def benchmark(num_tokens, num_heads, head_size, dtype_str, provider):
+    input_dtype = getattr(torch, dtype_str)
+    fp8_dtype = current_platform.fp8_dtype()
+    prefix_out, suffix_out, prefix_lse, suffix_lse = make_inputs(
+        num_tokens, num_heads, head_size, input_dtype
+    )
+    output_scale = torch.tensor([0.1], dtype=torch.float32, device="cuda")
+
+    if provider == "fused_cuda":
+        output = torch.empty(
+            (num_tokens, num_heads, head_size), dtype=fp8_dtype, device="cuda"
+        )
+        fn = lambda: merge_attn_states_cuda(
+            output,
+            prefix_out,
+            prefix_lse,
+            suffix_out,
+            suffix_lse,
+            output_scale=output_scale,
+        )
+    elif provider == "fused_triton":
+        output = torch.empty(
+            (num_tokens, num_heads, head_size), dtype=fp8_dtype, device="cuda"
+        )
+        fn = lambda: merge_attn_states_triton(
+            output,
+            prefix_out,
+            prefix_lse,
+            suffix_out,
+            suffix_lse,
+            output_scale=output_scale,
+        )
+    elif provider == "unfused_cuda":
+        merge_buf = torch.empty(
+            (num_tokens, num_heads, head_size), dtype=input_dtype, device="cuda"
+        )
+        quant_fp8 = QuantFP8(
+            static=True,
+            group_shape=GroupShape.PER_TENSOR,
+            column_major_scales=False,
+        )
+        quant_input = merge_buf.view(-1, head_size)
+        compiled_quant = torch.compile(
+            quant_fp8.forward_native, fullgraph=True, dynamic=False
+        )
+
+        def unfused_fn():
+            merge_attn_states_cuda(
+                merge_buf, prefix_out, prefix_lse, suffix_out, suffix_lse
+            )
+            compiled_quant(quant_input, output_scale)
+
+        fn = unfused_fn
+    else:  # unfused_triton
+        merge_buf = torch.empty(
+            (num_tokens, num_heads, head_size), dtype=input_dtype, device="cuda"
+        )
+        quant_fp8 = QuantFP8(
+            static=True,
+            group_shape=GroupShape.PER_TENSOR,
+            column_major_scales=False,
+        )
+        quant_input = merge_buf.view(-1, head_size)
+        compiled_quant = torch.compile(
+            quant_fp8.forward_native, fullgraph=True, dynamic=False
+        )
+
+        def unfused_fn():
+            merge_attn_states_triton(
+                merge_buf, prefix_out, prefix_lse, suffix_out, suffix_lse
+            )
+            compiled_quant(quant_input, output_scale)
+
+        fn = unfused_fn
+
+    ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(fn, quantiles=QUANTILES)
+    return 1000 * ms, 1000 * max_ms, 1000 * min_ms  # us
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main():
+    device_name = current_platform.get_device_name()
+    print(f"Device: {device_name}")
+    print(f"Token counts: {num_tokens_list}")
+    print(f"TP sizes: {tp_sizes}")
+    print(f"Input dtypes: {[short_dtype(d) for d in input_dtypes]}")
+    print(f"Head configs: {[(c[0], c[1], c[2]) for c in HEAD_CONFIGS]}")
+    benchmark.run(print_data=True)
+
+
+if __name__ == "__main__":
+    with torch.inference_mode():
+        main()
diff --git a/benchmarks/fused_kernels/silu_mul_block_quant_benchmark.py b/benchmarks/fused_kernels/silu_mul_block_quant_benchmark.py
new file mode 100644
index 000000000000..4e8d787bf9c7
--- /dev/null
+++ b/benchmarks/fused_kernels/silu_mul_block_quant_benchmark.py
@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable, Iterable
+from dataclasses import dataclass
+from itertools import product
+
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as TBenchmark
+from torch.utils.benchmark import Measurement as TMeasurement
+from tqdm import tqdm
+
+import vllm._custom_ops as ops
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    per_token_group_quant_fp8,
+)
+
+
+@dataclass
+class bench_params_t:
+    num_tokens: int
+    hidden_size: int
+    dtype: torch.dtype
+    group_size: int  # Changed from list[int] to int
+
+    def description(self):
+        return (
+            f"N {self.num_tokens} "
+            f"x D {self.hidden_size} "
+            f"x DT {self.dtype} "
+            f"x GS {self.group_size}"
+        )
+
+
+def get_bench_params() -> list[bench_params_t]:
+    """Test configurations covering common model sizes."""
+    NUM_TOKENS = [16, 128, 512, 2048]
+    HIDDEN_SIZES = [1024, 2048, 4096, 5120, 14336]  # Common FFN sizes
+    DTYPES = [torch.float16, torch.bfloat16]
+    GROUP_SIZES = [64, 128]  # Changed from [[1, 64], [1, 128]]
+
+    combinations = product(NUM_TOKENS, HIDDEN_SIZES, DTYPES, GROUP_SIZES)
+    bench_params = list(
+        map(lambda x: bench_params_t(x[0], x[1], x[2], x[3]), combinations)
+    )
+    return bench_params
+
+
+# Reference implementations
+def unfused_fp8_impl(
+    x: torch.Tensor,
+    quant_dtype: torch.dtype,
+    group_size: int,  # Changed from list[int]
+):
+    """Unfused: SiLU+Mul then per-tensor quantize."""
+    hidden = x.shape[-1] // 2
+    gate, up = x.split(hidden, dim=-1)
+
+    # SiLU(gate) * up
+    silu_out = F.silu(gate) * up
+
+    # Per-tensor quantize (no group_size used here)
+    silu_out, _ = ops.scaled_fp8_quant(silu_out)
+
+
+def unfused_groupwise_fp8_impl(
+    x: torch.Tensor,
+    quant_dtype: torch.dtype,
+    group_size: int,  # Changed from list[int]
+):
+    """Unfused: SiLU+Mul then group-wise quantize."""
+    hidden = x.shape[-1] // 2
+    gate, up = x.split(hidden, dim=-1)
+
+    # SiLU(gate) * up
+    silu_out = F.silu(gate) * up
+
+    # Group quantize - use group_size directly
+    silu_out, _ = per_token_group_quant_fp8(
+        silu_out, group_size=group_size, use_ue8m0=False
+    )
+
+
+def fused_impl(
+    x: torch.Tensor,
+    quant_dtype: torch.dtype,
+    group_size: int,
+):
+    """Fused: SiLU+Mul+Block Quantization in single kernel."""
+    out, _ = ops.silu_and_mul_per_block_quant(
+        x,
+        group_size=group_size,
+        quant_dtype=quant_dtype,
+        is_scale_transposed=False,
+    )
+
+
+# Bench functions
+def bench_fn(
+    x: torch.Tensor,
+    quant_dtype: torch.dtype,
+    group_size: int,
+    label: str,
+    sub_label: str,
+    fn: Callable,
+    description: str,
+) -> TMeasurement:
+    min_run_time = 1
+
+    globals = {
+        "x": x,
+        "quant_dtype": quant_dtype,
+        "group_size": group_size,
+        "fn": fn,
+    }
+    return TBenchmark.Timer(
+        stmt="fn(x, quant_dtype, group_size)",
+        globals=globals,
+        label=label,
+        sub_label=sub_label,
+        description=description,
+    ).blocked_autorange(min_run_time=min_run_time)
+
+
+def bench(params: bench_params_t, label: str, sub_label: str) -> Iterable[TMeasurement]:
+    """Run benchmarks for all implementations."""
+    # Make inputs: [num_tokens, hidden_size * 2] for [gate || up]
+    scale = 1 / params.hidden_size
+    x = (
+        torch.randn(
+            params.num_tokens,
+            params.hidden_size * 2,
+            dtype=params.dtype,
+            device="cuda",
+        )
+        * scale
+    )
+
+    timers = []
+
+    # Unfused per-tensor FP8
+    timers.append(
+        bench_fn(
+            x,
+            torch.float8_e4m3fn,
+            params.group_size,
+            label,
+            sub_label,
+            unfused_fp8_impl,
+            "unfused_fp8_impl",
+        )
+    )
+
+    # Unfused group-wise FP8
+    timers.append(
+        bench_fn(
+            x,
+            torch.float8_e4m3fn,
+            params.group_size,
+            label,
+            sub_label,
+            unfused_groupwise_fp8_impl,
+            "unfused_groupwise_fp8_impl",
+        )
+    )
+
+    # Fused group-wise FP8
+    timers.append(
+        bench_fn(
+            x,
+            torch.float8_e4m3fn,
+            params.group_size,
+            label,
+            sub_label,
+            fused_impl,
+            "fused_groupwise_fp8_impl",
+        )
+    )
+
+    return timers
+
+
+def print_timers(timers: Iterable[TMeasurement]):
+    compare = TBenchmark.Compare(timers)
+    compare.print()
+
+
+def main():
+    torch.set_default_device("cuda")
+    bench_params = get_bench_params()
+
+    print(f"Running {len(bench_params)} benchmark configurations...")
+    print(
+        f"This will take approximately {len(bench_params) * 3} seconds (1s per variant)"
+    )
+    print()
+
+    timers = []
+    for bp in tqdm(bench_params):
+        result_timers = bench(bp, "silu-mul-block-quant", bp.description())
+        timers.extend(result_timers)
+
+    print("\n" + "=" * 80)
+    print("FINAL COMPARISON - ALL RESULTS")
+    print("=" * 80)
+    print_timers(timers)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/entrypoints/openai/speech_to_text/__init__.py b/benchmarks/kernels/__init__.py
similarity index 100%
rename from tests/entrypoints/openai/speech_to_text/__init__.py
rename to benchmarks/kernels/__init__.py
diff --git a/benchmarks/kernels/benchmark_block_fp8_gemm.py b/benchmarks/kernels/benchmark_block_fp8_gemm.py
index 8d50c3828206..9eddc907b937 100644
--- a/benchmarks/kernels/benchmark_block_fp8_gemm.py
+++ b/benchmarks/kernels/benchmark_block_fp8_gemm.py
@@ -9,11 +9,12 @@
 import torch
 
 from vllm.benchmarks.lib.utils import default_vllm_config
-from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    W8A8BlockFp8LinearOp,
+from vllm.model_executor.kernels.linear import (
+    init_fp8_linear_kernel,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    create_fp8_quant_key,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     CUTLASS_BLOCK_FP8_SUPPORTED,
@@ -70,11 +71,15 @@ def build_w8a8_block_fp8_runner(M, N, K, block_size, device, use_cutlass):
     weight_group_shape = GroupShape(block_n, block_k)
     act_quant_group_shape = GroupShape(1, block_k)  # Per-token, per-group quantization
 
-    linear_op = W8A8BlockFp8LinearOp(
-        weight_group_shape=weight_group_shape,
-        act_quant_group_shape=act_quant_group_shape,
-        cutlass_block_fp8_supported=use_cutlass,
-        use_aiter_and_is_supported=False,
+    linear_op = init_fp8_linear_kernel(
+        weight_quant_key=create_fp8_quant_key(
+            static=True, group_shape=weight_group_shape
+        ),
+        activation_quant_key=create_fp8_quant_key(
+            static=False, group_shape=act_quant_group_shape
+        ),
+        out_dtype=torch.get_default_dtype(),
+        module_name="build_w8a8_block_fp8_runner",
     )
 
     def run():
diff --git a/benchmarks/kernels/benchmark_cutlass_moe_fp8.py b/benchmarks/kernels/benchmark_cutlass_moe_fp8.py
index 3f80b024e108..03d7fb386f74 100644
--- a/benchmarks/kernels/benchmark_cutlass_moe_fp8.py
+++ b/benchmarks/kernels/benchmark_cutlass_moe_fp8.py
@@ -16,7 +16,7 @@
     maybe_make_prepare_finalize,
 )
 from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
-from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassExpertsFp8
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import CutlassExpertsFp8
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
 from vllm.platforms import current_platform
 from vllm.utils.argparse_utils import FlexibleArgumentParser
diff --git a/benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py b/benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py
index 2d4afd38c097..7379bf858889 100644
--- a/benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py
+++ b/benchmarks/kernels/benchmark_cutlass_moe_nvfp4.py
@@ -22,7 +22,7 @@
     fp8_w8a8_moe_quant_config,
     nvfp4_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
     CutlassExpertsFp4,
 )
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
diff --git a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
index dd4060bbdb94..04fc2960d1e4 100644
--- a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
+++ b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py
@@ -13,7 +13,7 @@
     maybe_make_prepare_finalize,
 )
 from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
-from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassExpertsFp8
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import CutlassExpertsFp8
 from vllm.model_executor.layers.fused_moe.fused_moe import (
     fused_experts,
     fused_topk,
diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
index 65bc38c6c755..4463a23772ee 100644
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -27,10 +27,10 @@
     RoutingMethodType,
     _get_config_dtype_str,
 )
-from vllm.model_executor.layers.fused_moe.fused_moe import *
-from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
     TritonOrDeepGemmExperts,
 )
+from vllm.model_executor.layers.fused_moe.fused_moe import *
 from vllm.transformers_utils.config import get_config
 from vllm.triton_utils import triton
 from vllm.utils.argparse_utils import FlexibleArgumentParser
diff --git a/benchmarks/kernels/benchmark_moe_align_block_size.py b/benchmarks/kernels/benchmark_moe_align_block_size.py
index 5f9a131f79b0..a340500379a0 100644
--- a/benchmarks/kernels/benchmark_moe_align_block_size.py
+++ b/benchmarks/kernels/benchmark_moe_align_block_size.py
@@ -9,6 +9,7 @@
     moe_align_block_size,
 )
 from vllm.triton_utils import triton
+from vllm.utils.torch_utils import set_random_seed
 
 
 def get_topk_ids(num_tokens: int, num_experts: int, topk: int) -> torch.Tensor:
@@ -44,7 +45,7 @@ def get_topk_ids(num_tokens: int, num_experts: int, topk: int) -> torch.Tensor:
 def benchmark(num_tokens, num_experts, topk, ep_size, provider):
     """Benchmark function for Triton."""
     block_size = 256
-    torch.cuda.manual_seed_all(0)
+    set_random_seed(0)
     topk_ids = get_topk_ids(num_tokens, num_experts, topk)
 
     e_map = None
diff --git a/benchmarks/kernels/benchmark_norm_router_gemm.py b/benchmarks/kernels/benchmark_norm_router_gemm.py
new file mode 100644
index 000000000000..cd50e9159961
--- /dev/null
+++ b/benchmarks/kernels/benchmark_norm_router_gemm.py
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Benchmark and correctness check for ``ops.dsv4_norm_router_gemm``.
+
+Two implementations are compared:
+
+  1. ``unfused``   — ``vllm_ops.rms_norm`` then ``ops.dsv3_router_gemm``,
+                     i.e. the current vLLM hot path (two kernel launches).
+  2. ``fused``     — ``ops.dsv4_norm_router_gemm``, the new single-kernel
+                     fused path.
+
+Both produce ``(normed_x: bf16, router_logits: fp32)``.  The correctness
+check verifies that ``fused`` and ``unfused`` agree to within ~1 bf16
+ULP — that is the precision floor for this op.
+"""
+
+import argparse
+
+import torch
+
+from vllm import _custom_ops as vllm_ops
+from vllm.triton_utils import triton
+
+# The fused dsv4_norm_router_gemm kernel is templated only for DSV4-Pro
+# (hidden_size=7168, num_experts=384).  Other shapes fall back to the
+# unfused path on the Python side (NormGatedLinear), so benchmark only
+# the configuration that the fused kernel actually targets.
+HIDDEN_SIZE = 7168
+NUM_EXPERTS_CHOICES = (384,)
+RMS_EPS = 1e-6
+
+
+def unfused_norm_router_gemm(
+    x: torch.Tensor,
+    norm_weight: torch.Tensor,
+    gate_weight: torch.Tensor,
+    eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    # Call ``_C::rms_norm`` directly (mirroring ``_dsv4_pro_norm_gate``'s
+    # fallback path) so the benchmarked baseline doesn't inherit any
+    # Python wrapper overhead or risk falling through to the native
+    # eager-primitive ``RMSNorm.forward_native`` path.
+    normed = torch.empty_like(x)
+    torch.ops._C.rms_norm(normed, x, norm_weight, eps)
+    logits = vllm_ops.dsv3_router_gemm(normed, gate_weight, torch.float32)
+    return normed, logits
+
+
+def fused_norm_router_gemm(
+    x: torch.Tensor,
+    norm_weight: torch.Tensor,
+    gate_weight: torch.Tensor,
+    eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return vllm_ops.dsv4_norm_router_gemm(x, norm_weight, gate_weight, eps)
+
+
+def _make_inputs(num_tokens: int, num_experts: int, hidden_size: int, seed: int = 0):
+    torch.manual_seed(seed)
+    device = "cuda"
+    x = torch.randn(num_tokens, hidden_size, dtype=torch.bfloat16, device=device)
+    norm_w = torch.randn(hidden_size, dtype=torch.bfloat16, device=device)
+    gate_w = torch.randn(num_experts, hidden_size, dtype=torch.bfloat16, device=device)
+    # Down-scale gate_w so the GEMV output stays in a representable range.
+    gate_w = gate_w / float(hidden_size) ** 0.5
+    norm_w = (norm_w * 0.1) + 1.0
+    return x, norm_w, gate_w
+
+
+def calculate_diff(
+    num_tokens: int,
+    num_experts: int,
+    hidden_size: int = HIDDEN_SIZE,
+    normed_atol: float = 2e-3,
+    logits_atol: float = 1e-2,
+    rtol: float = 1e-2,
+) -> None:
+    x, norm_w, gate_w = _make_inputs(num_tokens, num_experts, hidden_size)
+
+    normed_unfused, logits_unfused = unfused_norm_router_gemm(
+        x.clone(), norm_w, gate_w, RMS_EPS
+    )
+    normed_fused, logits_fused = fused_norm_router_gemm(
+        x.clone(), norm_w, gate_w, RMS_EPS
+    )
+
+    def _max_abs(a, b):
+        return (a.float() - b.float()).abs().max().item()
+
+    print(f"\n=== M={num_tokens} E={num_experts} H={hidden_size} ===")
+    print(f"normed_x  |fused - unfused| = {_max_abs(normed_fused, normed_unfused):.3e}")
+    print(f"logits    |fused - unfused| = {_max_abs(logits_fused, logits_unfused):.3e}")
+
+    ok_normed = torch.allclose(
+        normed_fused.float(),
+        normed_unfused.float(),
+        atol=normed_atol,
+        rtol=rtol,
+    )
+    ok_logits = torch.allclose(
+        logits_fused.float(),
+        logits_unfused.float(),
+        atol=logits_atol,
+        rtol=rtol,
+    )
+    if ok_normed and ok_logits:
+        print(
+            f"OK   fused vs unfused within "
+            f"normed_atol={normed_atol:.0e} logits_atol={logits_atol:.0e} "
+            f"rtol={rtol:.0e}"
+        )
+    else:
+        print(
+            f"FAIL normed_ok={ok_normed} logits_ok={ok_logits}; "
+            f"see max-abs values above"
+        )
+
+
+def get_benchmark():
+    # Only num_tokens varies (DSV4-Pro hard-codes E=384); single-axis
+    # sweep yields a clean line plot with M on the x-axis.
+    num_experts = NUM_EXPERTS_CHOICES[0]
+
+    @triton.testing.perf_report(
+        triton.testing.Benchmark(
+            x_names=["num_tokens"],
+            x_vals=list(range(1, 17)),
+            line_arg="provider",
+            line_vals=["unfused", "fused"],
+            line_names=["unfused (rms+dsv3)", "fused (dsv4)"],
+            styles=[("green", "-"), ("red", "-")],
+            ylabel="us",
+            plot_name=f"norm-router-gemm-E{num_experts}-H{HIDDEN_SIZE}",
+            args={},
+        )
+    )
+    def benchmark(num_tokens, provider):
+        x, norm_w, gate_w = _make_inputs(num_tokens, num_experts, HIDDEN_SIZE)
+
+        quantiles = [0.5, 0.2, 0.8]
+        if provider == "unfused":
+            fn = lambda: unfused_norm_router_gemm(  # noqa: E731
+                x, norm_w, gate_w, RMS_EPS
+            )
+        else:
+            fn = lambda: fused_norm_router_gemm(  # noqa: E731
+                x, norm_w, gate_w, RMS_EPS
+            )
+
+        ms, min_ms, max_ms = triton.testing.do_bench(fn, quantiles=quantiles)
+        return 1000 * ms, 1000 * max_ms, 1000 * min_ms
+
+    return benchmark
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--save-path",
+        type=str,
+        default="./configs/norm_router_gemm/",
+    )
+    parser.add_argument(
+        "--skip-bench",
+        action="store_true",
+        help="Run only the correctness check, not the perf sweep.",
+    )
+    args = parser.parse_args()
+
+    # Correctness sweep over the full fast-path range M=1..16.
+    for m in range(1, 17):
+        for e in NUM_EXPERTS_CHOICES:
+            calculate_diff(num_tokens=m, num_experts=e, hidden_size=HIDDEN_SIZE)
+
+    if args.skip_bench:
+        return
+
+    benchmark = get_benchmark()
+    benchmark.run(print_data=True, save_path=args.save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/kernels/benchmark_router_gemm.py b/benchmarks/kernels/benchmark_router_gemm.py
deleted file mode 100644
index cc63f8904c27..000000000000
--- a/benchmarks/kernels/benchmark_router_gemm.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import torch
-import torch.nn.functional as F
-
-from vllm import _custom_ops as ops
-from vllm.platforms import current_platform
-from vllm.transformers_utils.config import get_config
-from vllm.triton_utils import triton
-from vllm.utils.argparse_utils import FlexibleArgumentParser
-
-# Dimensions supported by the DSV3 specialized kernel
-DSV3_SUPPORTED_NUM_EXPERTS = [256, 384]
-DSV3_SUPPORTED_HIDDEN_SIZES = [7168]
-
-# Dimensions supported by the gpt-oss specialized kernel
-GPT_OSS_SUPPORTED_NUM_EXPERTS = [32, 128]
-GPT_OSS_SUPPORTED_HIDDEN_SIZES = [2880]
-
-
-def get_batch_size_range(max_batch_size):
-    return [2**x for x in range(14) if 2**x <= max_batch_size]
-
-
-def get_model_params(config):
-    if config.architectures[0] in (
-        "DeepseekV2ForCausalLM",
-        "DeepseekV3ForCausalLM",
-        "DeepseekV32ForCausalLM",
-    ):
-        num_experts = config.n_routed_experts
-        hidden_size = config.hidden_size
-    elif config.architectures[0] in ("GptOssForCausalLM",):
-        num_experts = config.num_local_experts
-        hidden_size = config.hidden_size
-    else:
-        raise ValueError(f"Unsupported architecture: {config.architectures}")
-    return num_experts, hidden_size
-
-
-def get_benchmark(model, max_batch_size, trust_remote_code):
-    @triton.testing.perf_report(
-        triton.testing.Benchmark(
-            x_names=["batch_size"],
-            x_vals=get_batch_size_range(max_batch_size),
-            x_log=False,
-            line_arg="provider",
-            line_vals=[
-                "torch",
-                "vllm",
-            ],
-            line_names=["PyTorch", "vLLM"],
-            styles=([("blue", "-"), ("red", "-")]),
-            ylabel="TFLOPs",
-            plot_name=f"{model} router gemm throughput",
-            args={},
-        )
-    )
-    def benchmark(batch_size, provider):
-        config = get_config(model=model, trust_remote_code=trust_remote_code)
-        num_experts, hidden_size = get_model_params(config)
-
-        mat_a = torch.randn(
-            (batch_size, hidden_size), dtype=torch.bfloat16, device="cuda"
-        ).contiguous()
-        mat_b = torch.randn(
-            (num_experts, hidden_size), dtype=torch.bfloat16, device="cuda"
-        ).contiguous()
-        bias = torch.randn(
-            num_experts, dtype=torch.bfloat16, device="cuda"
-        ).contiguous()
-
-        is_hopper_or_blackwell = current_platform.is_device_capability(
-            90
-        ) or current_platform.is_device_capability_family(100)
-        allow_dsv3_router_gemm = (
-            is_hopper_or_blackwell
-            and num_experts in DSV3_SUPPORTED_NUM_EXPERTS
-            and hidden_size in DSV3_SUPPORTED_HIDDEN_SIZES
-        )
-        allow_gpt_oss_router_gemm = (
-            is_hopper_or_blackwell
-            and num_experts in GPT_OSS_SUPPORTED_NUM_EXPERTS
-            and hidden_size in GPT_OSS_SUPPORTED_HIDDEN_SIZES
-        )
-
-        has_bias = False
-        if allow_gpt_oss_router_gemm:
-            has_bias = True
-
-        quantiles = [0.5, 0.2, 0.8]
-
-        if provider == "torch":
-
-            def runner():
-                if has_bias:
-                    F.linear(mat_a, mat_b, bias)
-                else:
-                    F.linear(mat_a, mat_b)
-        elif provider == "vllm":
-
-            def runner():
-                if allow_dsv3_router_gemm:
-                    ops.dsv3_router_gemm(mat_a, mat_b, torch.bfloat16)
-                elif allow_gpt_oss_router_gemm:
-                    ops.gpt_oss_router_gemm(mat_a, mat_b, bias)
-                else:
-                    raise ValueError("Unsupported router gemm")
-
-        ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(
-            runner, quantiles=quantiles
-        )
-
-        def tflops(t_ms):
-            flops = 2 * batch_size * hidden_size * num_experts
-            return flops / (t_ms * 1e-3) / 1e12
-
-        return tflops(ms), tflops(max_ms), tflops(min_ms)
-
-    return benchmark
-
-
-if __name__ == "__main__":
-    parser = FlexibleArgumentParser()
-    parser.add_argument("--model", type=str, default="openai/gpt-oss-20b")
-    parser.add_argument("--max-batch-size", default=16, type=int)
-    parser.add_argument("--trust-remote-code", action="store_true")
-    args = parser.parse_args()
-
-    # Get the benchmark function
-    benchmark = get_benchmark(args.model, args.max_batch_size, args.trust_remote_code)
-    # Run performance benchmark
-    benchmark.run(print_data=True)
diff --git a/benchmarks/kernels/benchmark_selective_state_update.py b/benchmarks/kernels/benchmark_selective_state_update.py
new file mode 100644
index 000000000000..a8b73da2aa9a
--- /dev/null
+++ b/benchmarks/kernels/benchmark_selective_state_update.py
@@ -0,0 +1,774 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Benchmark and tuning script for the Mamba selective_state_update kernel.
+
+Mirrors the fused MoE tuning workflow: sweeps (BLOCK_SIZE_M, num_warps) across
+an effective_batch grid for a given (headdim, dstate, ngroups, cache_dtype) and
+saves the best config per effective_batch to JSON. Generated configs are picked
+up by selective_state_update at runtime.
+
+Usage:
+    python -m benchmarks.kernels.benchmark_selective_state_update \
+        --all-dstates --save-configs --compare
+"""
+
+import argparse
+import json
+import os
+import sys
+from io import StringIO
+from itertools import product
+from typing import Any
+
+import torch
+
+from tests.kernels.mamba.utils import selective_state_update_ref
+from vllm.model_executor.layers.mamba.ops.mamba_ssm import (
+    _CONFIGS_DIR,
+    _canonical_cache_dtype,
+    _get_default_ssm_launch_config,
+    get_ssm_config_file_name,
+    get_ssm_device_name,
+    override_ssm_config,
+    selective_state_update,
+)
+from vllm.triton_utils import triton
+
+# bf16 shares configs with fp16 - same bit width.
+_SSM_CACHE_DTYPE_MAP: dict[str, torch.dtype] = {
+    "float32": torch.float32,
+    "float16": torch.float16,
+    "bfloat16": torch.float16,
+}
+
+_RESULTS_DIR = os.path.dirname(os.path.realpath(__file__))
+
+# ---------------------------------------------------------------------------
+# Tuning search space
+# ---------------------------------------------------------------------------
+
+_BSM_CHOICES_ALL = [4, 8, 16, 32, 64, 128, 256]
+
+NUM_WARPS_CHOICES = [1, 2, 4, 8]
+
+
+def _block_size_m_choices(headdim: int) -> list[int]:
+    """BLOCK_SIZE_M candidates worth sweeping for a given headdim.
+
+    BLOCK_SIZE_M > next_pow2(headdim) wastes >=50% of each tile via masking
+    (offs_m >= dim rows are zeroed out), so we cap the sweep there.
+    """
+    ceiling = 1
+    while ceiling < headdim:
+        ceiling <<= 1
+    return [b for b in _BSM_CHOICES_ALL if b <= ceiling]
+
+
+# Default deployment shapes. effective_batch = batch * nheads scales the
+# kernel grid, so configs transfer across (model, TP) combos sharing
+# (headdim, dstate, cache_dtype).
+DEFAULT_BATCH_SIZES = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 1536, 2048]
+DEFAULT_NHEADS = [128, 256]
+
+ALL_DSTATES = [16, 32, 64, 128, 256]
+
+# Default tuning shape — matches Nemotron-3-Super and Nemotron-3-Nano Mamba layers.
+# Override with CLI flags for other architectures.
+DEFAULT_HEADDIM = 64
+DEFAULT_NGROUPS = 8
+
+
+# ---------------------------------------------------------------------------
+# Benchmark helper
+# ---------------------------------------------------------------------------
+
+
+def _make_inputs(
+    batch: int,
+    nheads: int,
+    dim: int,
+    dstate: int,
+    ngroups: int,
+    dtype: torch.dtype,
+    state_dtype: torch.dtype | None = None,
+    device: str = "cuda",
+):
+    if state_dtype is None:
+        state_dtype = dtype
+    state = torch.randn(batch, nheads, dim, dstate, dtype=state_dtype, device=device)
+    x = torch.randn(batch, nheads, dim, dtype=dtype, device=device)
+    dt = torch.randn(batch, nheads, dim, dtype=dtype, device=device)
+    A = -torch.rand(nheads, dim, dstate, dtype=torch.float32, device=device)
+    B = torch.randn(batch, ngroups, dstate, dtype=dtype, device=device)
+    C = torch.randn(batch, ngroups, dstate, dtype=dtype, device=device)
+    D = torch.randn(nheads, dim, dtype=dtype, device=device)
+    dt_bias = torch.randn(nheads, dim, dtype=dtype, device=device)
+    out = torch.zeros(batch, nheads, dim, dtype=dtype, device=device)
+    return state, x, dt, A, B, C, D, dt_bias, out
+
+
+def benchmark_config(
+    batch: int,
+    nheads: int,
+    dim: int,
+    dstate: int,
+    ngroups: int,
+    block_size_m: int,
+    num_warps_val: int,
+    dtype: torch.dtype,
+    state_dtype: torch.dtype | None = None,
+    num_iters: int = 100,
+    num_warmup: int = 20,
+    graph_batch_size: int = 10,
+) -> float | None:
+    """
+    Time one (BLOCK_SIZE_M, num_warps) config for selective_state_update.
+    Returns elapsed time in microseconds, or None on error.
+
+    Uses CUDA graph capture-and-replay to isolate kernel time from Python
+    eager-mode dispatch / kwarg-resolution overhead, mirroring the timing
+    methodology in benchmarks/kernels/benchmark_moe.py.
+    """
+    state, x, dt, A, B, C, D, dt_bias, out = _make_inputs(
+        batch, nheads, dim, dstate, ngroups, dtype, state_dtype=state_dtype
+    )
+
+    def _call_kernel() -> None:
+        selective_state_update(
+            state,
+            x,
+            dt,
+            A,
+            B,
+            C,
+            D=D,
+            z=None,
+            dt_bias=dt_bias,
+            dt_softplus=True,
+            out=out,
+        )
+
+    try:
+        with override_ssm_config((block_size_m, num_warps_val)):
+            # Eager-mode warmup: triggers Triton autotune / JIT, primes caches.
+            for _ in range(num_warmup):
+                _call_kernel()
+            torch.accelerator.synchronize()
+
+            # Capture graph_batch_size invocations into a CUDA graph so the
+            # timed region runs without Python dispatch overhead per call.
+            graph = torch.cuda.CUDAGraph()
+            with torch.cuda.graph(graph):
+                for _ in range(graph_batch_size):
+                    _call_kernel()
+            torch.accelerator.synchronize()
+
+            # Warmup graph replays (let the runtime stabilize).
+            for _ in range(5):
+                graph.replay()
+            torch.accelerator.synchronize()
+
+            start = torch.cuda.Event(enable_timing=True)
+            end = torch.cuda.Event(enable_timing=True)
+            latencies: list[float] = []
+            for _ in range(num_iters):
+                start.record()
+                graph.replay()
+                end.record()
+                end.synchronize()
+                latencies.append(start.elapsed_time(end))
+            graph.reset()
+        # elapsed_time returns ms; each replay runs graph_batch_size kernels,
+        # so divide by (num_iters * graph_batch_size) and convert ms -> us.
+        return sum(latencies) / (num_iters * graph_batch_size) * 1000
+    except Exception as e:
+        if "OutOfResources" not in str(e):
+            print(
+                f"    Warning: config M={block_size_m},w={num_warps_val} "
+                f"raised {type(e).__name__}: {e}"
+            )
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Tuning loop
+# ---------------------------------------------------------------------------
+
+
+# CUDA grid Y/Z dim limit — both `batch` and `nheads` must fit individually.
+_CUDA_MAX_GRID_DIM = 65535
+
+# Above this, kernel state-offset arithmetic (batch * nheads * headdim * dstate)
+# overflows int32 and the launch raises cudaErrorIllegalAddress.
+# 262144 covers Nemotron Super TP1 BS=2048.
+_MAX_EFFECTIVE_BATCH = 262144
+
+
+def expand_batch_x_nheads(
+    batch_sizes: list[int],
+    nheads_list: list[int],
+    ngroups: int,
+) -> list[tuple[int, int, int]]:
+    """Cross-product batch_sizes × nheads_list → sorted [(effective_batch,
+    batch, nheads)], deduped by effective_batch. Filters pairs that exceed
+    the CUDA grid dim limit, the effective_batch ceiling, or where nheads is
+    not a positive multiple of ngroups.
+    """
+    seen: dict[int, tuple[int, int]] = {}
+    skipped_grid: list[tuple[int, int]] = []
+    skipped_ngroups: list[tuple[int, int]] = []
+    skipped_eb: list[tuple[int, int]] = []
+    for b, n in product(batch_sizes, nheads_list):
+        if b <= 0 or n <= 0:
+            continue
+        if b > _CUDA_MAX_GRID_DIM or n > _CUDA_MAX_GRID_DIM:
+            skipped_grid.append((b, n))
+            continue
+        if n % ngroups != 0:
+            skipped_ngroups.append((b, n))
+            continue
+        if b * n > _MAX_EFFECTIVE_BATCH:
+            skipped_eb.append((b, n))
+            continue
+        seen.setdefault(b * n, (b, n))
+    if skipped_grid:
+        print(
+            f"  Note: skipping (batch, nheads) pairs exceeding CUDA grid dim "
+            f"{_CUDA_MAX_GRID_DIM}: {skipped_grid}"
+        )
+    if skipped_ngroups:
+        print(
+            f"  Note: skipping (batch, nheads) pairs where nheads % ngroups != 0 "
+            f"for ngroups={ngroups}: {skipped_ngroups}"
+        )
+    if skipped_eb:
+        print(
+            f"  Note: skipping (batch, nheads) pairs whose effective_batch "
+            f"exceeds {_MAX_EFFECTIVE_BATCH}: {skipped_eb}"
+        )
+    return sorted((eb, b, n) for eb, (b, n) in seen.items())
+
+
+def tune_dstate(
+    dstate: int,
+    headdim: int,
+    ngroups: int,
+    dtype: torch.dtype,
+    num_iters: int,
+    verbose: bool,
+    active: list[tuple[int, int, int]],
+    state_dtype: torch.dtype | None = None,
+) -> tuple[dict[int, dict], dict[int, dict[tuple[int, int], float]]]:
+    """For each (effective_batch, batch, nheads) in *active*, sweep
+    (BLOCK_SIZE_M, num_warps) and return
+    ({effective_batch: best_config}, {effective_batch: {(bsm, nw): us}}).
+    The second map is the full timing grid, used downstream so we don't
+    re-measure the same config in the comparison phase.
+    """
+    best_per_eb: dict[int, dict] = {}
+    timings: dict[int, dict[tuple[int, int], float]] = {}
+
+    print(f"\n{'=' * 74}")
+    effective_state_dtype = state_dtype if state_dtype is not None else dtype
+    print(
+        f"Tuning  headdim={headdim}  dstate={dstate}  ngroups={ngroups}  "
+        f"dtype={dtype}  ssm_cache_dtype={effective_state_dtype}"
+    )
+    print(f"{'=' * 74}")
+
+    bsm_choices = _block_size_m_choices(headdim)
+    print(f"BSM candidates (capped at next_pow2(headdim={headdim})): {bsm_choices}")
+
+    hdr = f"{'EffBatch':>8} | {'BLOCK_M':>7} | {'warps':>5} | {'us':>10} | note"
+    print(hdr)
+    print("-" * 52)
+
+    for eb, batch, nheads in active:
+        best_time = float("inf")
+        best_cfg: dict = {}
+        eb_timings: dict[tuple[int, int], float] = {}
+
+        for bsm, nw in product(bsm_choices, NUM_WARPS_CHOICES):
+            t = benchmark_config(
+                batch=batch,
+                nheads=nheads,
+                dim=headdim,
+                dstate=dstate,
+                ngroups=ngroups,
+                block_size_m=bsm,
+                num_warps_val=nw,
+                dtype=dtype,
+                state_dtype=state_dtype,
+                num_iters=num_iters,
+            )
+            if t is None:
+                continue
+            eb_timings[(bsm, nw)] = t
+            is_best = t < best_time
+            if is_best:
+                best_time = t
+                best_cfg = {"BLOCK_SIZE_M": bsm, "num_warps": nw}
+            if verbose:
+                marker = " <-- best" if is_best else ""
+                print(f"{eb:>8} | {bsm:>7} | {nw:>5} | {t:>10.2f} |{marker}")
+
+        timings[eb] = eb_timings
+
+        if not best_cfg:
+            print(
+                f"{eb:>8} | {'-':>7} | {'-':>5} | {'-':>10} | "
+                f"no working config (skipped)"
+            )
+            continue
+
+        if not verbose:
+            print(
+                f"{eb:>8} | {best_cfg['BLOCK_SIZE_M']:>7} | "
+                f"{best_cfg['num_warps']:>5} | {best_time:>10.2f} | best"
+            )
+
+        best_per_eb[eb] = best_cfg
+
+    return best_per_eb, timings
+
+
+# ---------------------------------------------------------------------------
+# Correctness validation
+# ---------------------------------------------------------------------------
+
+
+def validate_configs(
+    dstate: int,
+    headdim: int,
+    ngroups: int,
+    tuned: dict[int, dict],
+    active: list[tuple[int, int, int]],
+    dtype: torch.dtype,
+    atol: float = 1e-2,
+    rtol: float = 1e-2,
+    state_dtype: torch.dtype | None = None,
+) -> dict[int, bool]:
+    """
+    For every (effective_batch, batch, nheads) in *active* that has a tuned
+    config, run the kernel with that config and compare against the reference.
+    Returns {effective_batch: passed}.
+    """
+    # Disable TF32 so the reference's matmul matches the Triton kernel's
+    # fp32 accumulation; otherwise large ebs show bf16 rounding mismatches.
+    torch.set_float32_matmul_precision("highest")
+
+    print(f"\n{'=' * 74}")
+    effective_state_dtype = state_dtype if state_dtype is not None else dtype
+    print(
+        f"Validation  headdim={headdim}  dstate={dstate}  ngroups={ngroups}  "
+        f"dtype={dtype}  ssm_cache_dtype={effective_state_dtype}  atol={atol}"
+    )
+    print(f"{'=' * 74}")
+    print(f"{'EffBatch':>8} | {'MaxAbsErr':>12} | {'Status':>8}")
+    print("-" * 36)
+
+    results: dict[int, bool] = {}
+
+    for eb, batch, nheads in active:
+        cfg = tuned.get(eb)
+        if cfg is None:
+            continue
+        state, x, dt, A, B, C, D, dt_bias, out = _make_inputs(
+            batch=batch,
+            nheads=nheads,
+            dim=headdim,
+            dstate=dstate,
+            ngroups=ngroups,
+            dtype=dtype,
+            state_dtype=state_dtype,
+        )
+        # Clone state before GPU kernel modifies it in-place
+        state_ref = state.clone()
+
+        with override_ssm_config((cfg["BLOCK_SIZE_M"], cfg["num_warps"])):
+            selective_state_update(
+                state,
+                x,
+                dt,
+                A,
+                B,
+                C,
+                D=D,
+                z=None,
+                dt_bias=dt_bias,
+                dt_softplus=True,
+                out=out,
+            )
+        torch.accelerator.synchronize()
+        gpu_out = out.detach().cpu()
+
+        # Reference uses the original (unmodified) state
+        # Upcast to fp32 so the reference sums in fp32 (matches the Triton
+        # kernel); summing in bf16 over `dstate` blows up the error.
+        ref_out = (
+            selective_state_update_ref(
+                state_ref.float(),
+                x.float(),
+                dt.float(),
+                A.float(),
+                B.float(),
+                C.float(),
+                D=D.float(),
+                dt_bias=dt_bias.float(),
+                dt_softplus=True,
+            )
+            .to(out.dtype)
+            .cpu()
+        )
+
+        passed = torch.allclose(gpu_out.float(), ref_out.float(), atol=atol, rtol=rtol)
+        max_err = (gpu_out.float() - ref_out.float()).abs().max().item()
+        status = "PASS" if passed else "FAIL"
+        results[eb] = passed
+        print(f"{eb:>8} | {max_err:>12.6f} | {status:>8}")
+
+    n_pass = sum(results.values())
+    n_total = len(results)
+    print(f"\n  {n_pass}/{n_total} configs passed validation for dstate={dstate}")
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Save configs
+# ---------------------------------------------------------------------------
+
+
+def save_configs(
+    headdim: int,
+    dstate: int,
+    cache_dtype: str,
+    configs: dict[int, dict],
+    save_dir: str | None = None,
+) -> str:
+    # bf16 shares configs with fp16, use common filename for both
+    cache_dtype = _canonical_cache_dtype(cache_dtype)
+
+    base_dir = save_dir if save_dir else _CONFIGS_DIR
+    os.makedirs(base_dir, exist_ok=True)
+    file_path = os.path.join(
+        base_dir,
+        get_ssm_config_file_name(headdim, dstate, cache_dtype, get_ssm_device_name()),
+    )
+    # triton_version is informational only, the loader ignores it
+    payload: dict[str, Any] = {
+        "triton_version": triton.__version__,
+        **{str(k): v for k, v in sorted(configs.items())},
+    }
+    with open(file_path, "w") as f:
+        json.dump(payload, f, indent=4)
+    return file_path
+
+
+# ---------------------------------------------------------------------------
+# Comparison table
+# ---------------------------------------------------------------------------
+
+
+def current_heuristic(dstate: int, is_blackwell: bool = False) -> dict:
+    """Return the current hard-coded BLOCK_SIZE_M / num_warps for dstate."""
+    bsm, nw = _get_default_ssm_launch_config(dstate, is_blackwell)
+    return {"BLOCK_SIZE_M": bsm, "num_warps": nw}
+
+
+def compare_heuristic_vs_tuned(
+    dstate: int,
+    headdim: int,
+    ngroups: int,
+    tuned: dict[int, dict],
+    timings: dict[int, dict[tuple[int, int], float]],
+    active: list[tuple[int, int, int]],
+    dtype: torch.dtype,
+    num_iters: int,
+    is_blackwell: bool,
+    state_dtype: torch.dtype | None = None,
+):
+    heur_cfg = current_heuristic(dstate, is_blackwell)
+    heur_key = (heur_cfg["BLOCK_SIZE_M"], heur_cfg["num_warps"])
+
+    print(f"\n{'=' * 74}")
+    print(
+        f"Comparison  headdim={headdim}  dstate={dstate}  "
+        f"ngroups={ngroups}  —  heuristic vs tuned"
+    )
+    print(
+        f"Heuristic: BLOCK_SIZE_M={heur_cfg['BLOCK_SIZE_M']}, "
+        f"num_warps={heur_cfg['num_warps']}"
+    )
+    print(f"{'=' * 74}")
+    hdr = (
+        f"{'EffBatch':>8} | {'Heur(us)':>10} | {'Tuned(us)':>10} | "
+        f"{'Speedup':>8} | Best config"
+    )
+    print(hdr)
+    print("-" * len(hdr))
+
+    for eb, batch, nheads in active:
+        eb_timings = timings.get(eb, {})
+
+        # Heuristic timing: reuse the tuning measurement if the heuristic
+        # config was in the swept grid; otherwise measure it once.
+        t_h = eb_timings.get(heur_key)
+        if t_h is None:
+            t_h = benchmark_config(
+                batch=batch,
+                nheads=nheads,
+                dim=headdim,
+                dstate=dstate,
+                ngroups=ngroups,
+                block_size_m=heur_cfg["BLOCK_SIZE_M"],
+                num_warps_val=heur_cfg["num_warps"],
+                dtype=dtype,
+                state_dtype=state_dtype,
+                num_iters=num_iters,
+            )
+
+        # `tuned[eb]` may be missing if all configs failed in tune_dstate;
+        # in that case fall back to the heuristic so the table still prints.
+        best = tuned.get(eb) or heur_cfg
+        t_t = eb_timings.get((best["BLOCK_SIZE_M"], best["num_warps"]))
+
+        if t_h is None or t_t is None:
+            print(f"{eb:>8} | {'N/A':>10} | {'N/A':>10} | {'N/A':>8} |")
+            continue
+        speedup = t_h / t_t
+        marker = " <--" if speedup > 1.05 else ""
+        print(
+            f"{eb:>8} | {t_h:>10.2f} | {t_t:>10.2f} | "
+            f"{speedup:>7.2f}x | "
+            f"M={best['BLOCK_SIZE_M']},w={best['num_warps']}{marker}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def save_results(device_name: str, output: str, results_file: str | None = None) -> str:
+    """Save the full benchmark output to a results text file."""
+    if results_file is None:
+        results_file = os.path.join(
+            _RESULTS_DIR, f"ssm_benchmark_results_{device_name}.txt"
+        )
+    with open(results_file, "w") as f:
+        f.write(output)
+    return results_file
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Tune selective_state_update kernel for Mamba SSM"
+    )
+    parser.add_argument(
+        "--dstate",
+        type=int,
+        default=128,
+        help="SSM state size to tune for (default: 128)",
+    )
+    parser.add_argument(
+        "--all-dstates",
+        action="store_true",
+        help="Tune all common dstate values: " + str(ALL_DSTATES),
+    )
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        default="bfloat16",
+        choices=["float16", "bfloat16"],
+        help="Activation / input data type (default: bfloat16)",
+    )
+    parser.add_argument(
+        "--mamba-ssm-cache-dtype",
+        type=str,
+        default="float32",
+        choices=list(_SSM_CACHE_DTYPE_MAP.keys()),
+        help="SSM state cache dtype (default: float32)",
+    )
+    parser.add_argument(
+        "--num-iters",
+        type=int,
+        default=100,
+        help="Number of timing iterations (default: 100)",
+    )
+    parser.add_argument(
+        "--save-configs",
+        action="store_true",
+        help=f"Save best configs to JSON in {_CONFIGS_DIR}",
+    )
+    parser.add_argument(
+        "--compare",
+        action="store_true",
+        help="Show comparison table: heuristic vs tuned",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print every (BLOCK_SIZE_M, num_warps) result, not just best",
+    )
+    parser.add_argument(
+        "--results-file",
+        type=str,
+        default=None,
+        help="Path to save the benchmark results text file "
+        "(default: ssm_benchmark_results_<device>.txt alongside this script)",
+    )
+    parser.add_argument(
+        "--save-dir",
+        type=str,
+        default=None,
+        help=f"Directory to save JSON configs (default: {_CONFIGS_DIR})",
+    )
+    parser.add_argument(
+        "--headdim",
+        type=int,
+        default=DEFAULT_HEADDIM,
+        help=f"Per-head feature dim (default: {DEFAULT_HEADDIM})",
+    )
+    parser.add_argument(
+        "--ngroups",
+        type=int,
+        default=DEFAULT_NGROUPS,
+        help=f"Number of B/C groups (default: {DEFAULT_NGROUPS})",
+    )
+    parser.add_argument(
+        "--batch-sizes",
+        type=int,
+        nargs="+",
+        default=DEFAULT_BATCH_SIZES,
+        metavar="B",
+        help=f"Decoder batch sizes to sweep (default: {DEFAULT_BATCH_SIZES})",
+    )
+    parser.add_argument(
+        "--nheads",
+        type=int,
+        nargs="+",
+        default=DEFAULT_NHEADS,
+        metavar="N",
+        help=f"Number of heads per rank to sweep (default: {DEFAULT_NHEADS}). "
+        "effective_batch = batch * nheads; cross-product is deduped by eb.",
+    )
+    parser.add_argument(
+        "--validate",
+        action="store_true",
+        help="After tuning, verify each best config against a CPU reference "
+        "implementation. Configs that fail are flagged in the output.",
+    )
+    parser.add_argument(
+        "--atol",
+        type=float,
+        default=1e-2,
+        help="Absolute tolerance for --validate (default: 1e-2)",
+    )
+    args = parser.parse_args()
+
+    dtype = torch.bfloat16 if args.dtype == "bfloat16" else torch.float16
+    state_dtype = _SSM_CACHE_DTYPE_MAP[args.mamba_ssm_cache_dtype]
+    device_name = get_ssm_device_name()
+    cap = torch.cuda.get_device_capability()
+    is_blackwell = cap[0] >= 10
+
+    # Mirror all output to a results file (like Unix tee).
+    buf = StringIO()
+
+    class _Tee:
+        """Writes to both the original stdout and an in-memory buffer."""
+
+        def write(self, s):
+            buf.write(s)
+            sys.__stdout__.write(s)
+
+        def flush(self):
+            sys.__stdout__.flush()
+
+    sys.stdout = _Tee()  # type: ignore[assignment]
+
+    try:
+        print(f"Device : {device_name}  (sm_{cap[0]}{cap[1]})")
+        print(f"Blackwell: {is_blackwell}")
+        print(f"dtype  : {args.dtype}")
+        print(f"ssm_cache_dtype: {args.mamba_ssm_cache_dtype}")
+        print(f"headdim: {args.headdim}")
+        print(f"ngroups: {args.ngroups}")
+        print(f"triton : {triton.__version__}")
+
+        dstates = ALL_DSTATES if args.all_dstates else [args.dstate]
+        active = expand_batch_x_nheads(args.batch_sizes, args.nheads, args.ngroups)
+
+        for dstate in dstates:
+            tuned, timings = tune_dstate(
+                dstate=dstate,
+                headdim=args.headdim,
+                ngroups=args.ngroups,
+                dtype=dtype,
+                num_iters=args.num_iters,
+                verbose=args.verbose,
+                active=active,
+                state_dtype=state_dtype,
+            )
+
+            if args.compare:
+                compare_heuristic_vs_tuned(
+                    dstate=dstate,
+                    headdim=args.headdim,
+                    ngroups=args.ngroups,
+                    tuned=tuned,
+                    timings=timings,
+                    active=active,
+                    dtype=dtype,
+                    num_iters=args.num_iters,
+                    is_blackwell=is_blackwell,
+                    state_dtype=state_dtype,
+                )
+
+            if args.validate:
+                validity = validate_configs(
+                    dstate=dstate,
+                    headdim=args.headdim,
+                    ngroups=args.ngroups,
+                    tuned=tuned,
+                    active=active,
+                    dtype=dtype,
+                    atol=args.atol,
+                    state_dtype=state_dtype,
+                )
+                # Filter out any configs that failed correctness check
+                failed = [eb for eb, ok in validity.items() if not ok]
+                if failed:
+                    print(
+                        f"\n  WARNING: {len(failed)} config(s) failed validation "
+                        f"for dstate={dstate}: effective_batches {failed}"
+                    )
+                    print("  These will NOT be saved even with --save-configs.")
+                    tuned = {
+                        eb: cfg for eb, cfg in tuned.items() if validity.get(eb, True)
+                    }
+
+            if args.save_configs:
+                path = save_configs(
+                    headdim=args.headdim,
+                    dstate=dstate,
+                    cache_dtype=args.mamba_ssm_cache_dtype,
+                    configs=tuned,
+                    save_dir=args.save_dir,
+                )
+                print(f"\nSaved: {path}")
+            else:
+                print(f"\nBest configs for dstate={dstate}:")
+                for eb, cfg in sorted(tuned.items()):
+                    print(f"  effective_batch={eb:>6}: {cfg}")
+                print("\n(Re-run with --save-configs to persist to JSON)")
+    finally:
+        sys.stdout = sys.__stdout__
+        results_path = save_results(device_name, buf.getvalue(), args.results_file)
+        print(f"\nResults saved to: {results_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/kernels/benchmark_silu_mul_fp8_quant.py b/benchmarks/kernels/benchmark_silu_mul_fp8_quant.py
index 13b97b7696b3..9fcf278f2ef3 100644
--- a/benchmarks/kernels/benchmark_silu_mul_fp8_quant.py
+++ b/benchmarks/kernels/benchmark_silu_mul_fp8_quant.py
@@ -20,7 +20,7 @@
 import numpy as np
 import torch
 
-from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
     persistent_masked_m_silu_mul_quant,
 )
 from vllm.triton_utils import tl, triton
diff --git a/benchmarks/kernels/benchmark_vit_bilinear_pos_embed.py b/benchmarks/kernels/benchmark_vit_bilinear_pos_embed.py
new file mode 100644
index 000000000000..65171a1b2e10
--- /dev/null
+++ b/benchmarks/kernels/benchmark_vit_bilinear_pos_embed.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Benchmarks the fused Triton bilinear position-embedding kernel against
+# the pure-PyTorch (native) implementation used in Qwen3-VL ViT models.
+#
+# == Usage Examples ==
+#
+# Default benchmark:
+#   python3 benchmark_vit_bilinear_pos_embed.py
+#
+# Custom parameters:
+#   python3 benchmark_vit_bilinear_pos_embed.py --hidden-dim 1152 \
+#       --num-grid-per-side 48 --save-path ./configs/vit_pos_embed/
+
+import itertools
+
+import torch
+
+from vllm.model_executor.models.qwen3_vl import (
+    pos_embed_interpolate_native,
+    triton_pos_embed_interpolate,
+)
+from vllm.triton_utils import HAS_TRITON, triton
+from vllm.utils.argparse_utils import FlexibleArgumentParser
+
+# (h, w) configurations to benchmark
+h_w_configs = [
+    (16, 16),
+    (32, 32),
+    (48, 48),
+    (64, 64),
+    (128, 128),
+    (32, 48),
+    (60, 80),
+]
+
+# Temporal dimensions
+t_range = [1]
+
+configs = list(itertools.product(t_range, h_w_configs))
+
+
+def get_benchmark(
+    num_grid_per_side: int,
+    spatial_merge_size: int,
+    hidden_dim: int,
+    dtype: torch.dtype,
+    device: str,
+):
+    @triton.testing.perf_report(
+        triton.testing.Benchmark(
+            x_names=["t", "h_w"],
+            x_vals=[list(_) for _ in configs],
+            line_arg="provider",
+            line_vals=["native", "triton"],
+            line_names=["Native (PyTorch)", "Triton"],
+            styles=[("blue", "-"), ("red", "-")],
+            ylabel="us",
+            plot_name=(
+                f"vit-bilinear-pos-embed-"
+                f"grid{num_grid_per_side}-"
+                f"dim{hidden_dim}-"
+                f"{dtype}"
+            ),
+            args={},
+        )
+    )
+    def benchmark(t, h_w, provider):
+        h, w = h_w
+
+        torch.manual_seed(42)
+        embed_weight = (
+            torch.randn(
+                num_grid_per_side * num_grid_per_side,
+                hidden_dim,
+                device=device,
+                dtype=dtype,
+            )
+            * 0.25
+        )
+
+        quantiles = [0.5, 0.2, 0.8]
+
+        if provider == "native":
+            ms, min_ms, max_ms = triton.testing.do_bench(
+                lambda: pos_embed_interpolate_native(
+                    embed_weight,
+                    t,
+                    h,
+                    w,
+                    num_grid_per_side,
+                    spatial_merge_size,
+                    dtype,
+                ),
+                quantiles=quantiles,
+            )
+        else:
+            assert HAS_TRITON, "Triton not available"
+            ms, min_ms, max_ms = triton.testing.do_bench(
+                lambda: triton_pos_embed_interpolate(
+                    embed_weight,
+                    t,
+                    h,
+                    w,
+                    num_grid_per_side,
+                    spatial_merge_size,
+                    dtype,
+                ),
+                quantiles=quantiles,
+            )
+
+        return 1000 * ms, 1000 * max_ms, 1000 * min_ms
+
+    return benchmark
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser(
+        description="Benchmark bilinear position embedding interpolation."
+    )
+    parser.add_argument(
+        "--num-grid-per-side",
+        type=int,
+        default=48,
+        help="Position embedding grid size (default: 48 for Qwen3-VL)",
+    )
+    parser.add_argument(
+        "--spatial-merge-size",
+        type=int,
+        default=2,
+        help="Spatial merge size (default: 2)",
+    )
+    parser.add_argument(
+        "--hidden-dim",
+        type=int,
+        default=1152,
+        help="Embedding hidden dimension (default: 1152 for Qwen3-VL)",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        choices=["cuda:0", "cuda:1"],
+        default="cuda:0",
+    )
+    parser.add_argument(
+        "--save-path",
+        type=str,
+        default="./vit_pos_embed/",
+    )
+    args = parser.parse_args()
+
+    dtype = torch.bfloat16
+
+    bench = get_benchmark(
+        args.num_grid_per_side,
+        args.spatial_merge_size,
+        args.hidden_dim,
+        dtype,
+        args.device,
+    )
+    bench.run(print_data=True, save_path=args.save_path)
diff --git a/benchmarks/kernels/benchmark_vit_fp8_attn.py b/benchmarks/kernels/benchmark_vit_fp8_attn.py
new file mode 100644
index 000000000000..7d7a067dde9d
--- /dev/null
+++ b/benchmarks/kernels/benchmark_vit_fp8_attn.py
@@ -0,0 +1,324 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Benchmarks FP8 vs BF16 ViT attention via FlashInfer cuDNN backend.
+#
+# == Usage Examples ==
+#
+# Benchmark mode (default, FlashInfer CUDAGraph Bench)
+#   python3 benchmark_vit_fp8_attn.py
+#
+# Profile mode (PyTorch profiler, saves TensorBoard traces):
+#   python3 benchmark_vit_fp8_attn.py --profile
+#   python3 benchmark_vit_fp8_attn.py --profile --profile-output-dir ./profile_traces
+#
+# Custom seq_lens:
+#   python3 benchmark_vit_fp8_attn.py --seq-lens 4096 8192 16384
+
+from functools import partial
+
+import numpy as np
+import torch
+from torch.profiler import ProfilerActivity, profile, record_function
+
+from vllm.utils.argparse_utils import FlexibleArgumentParser
+
+# Qwen3-VL defaults
+NUM_HEADS = 16
+HEAD_DIM = 72
+DEFAULT_SEQ_LENS = [2304, 4096, 8192, 16384]
+
+
+def _setup_fp8_attention(num_heads: int, head_dim: int) -> tuple:
+    """Create FP8 and BF16 attention modules + workspace."""
+    from types import SimpleNamespace
+    from unittest.mock import patch
+
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.config.multimodal import MultiModalConfig
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+        _get_flashinfer_workspace_buffer,
+    )
+    from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+    old_dtype = torch.get_default_dtype()
+    torch.set_default_dtype(torch.bfloat16)
+
+    backend_patch = patch(
+        "vllm.model_executor.layers.attention.mm_encoder_attention"
+        ".get_vit_attn_backend",
+        return_value=AttentionBackendEnum.FLASHINFER,
+    )
+
+    # FP8 attention
+    mm_config_fp8 = MultiModalConfig(mm_encoder_attn_dtype="fp8")
+    vllm_config_fp8 = VllmConfig()
+    vllm_config_fp8.model_config = SimpleNamespace(multimodal_config=mm_config_fp8)
+    with set_current_vllm_config(vllm_config_fp8), backend_patch:
+        attn_fp8 = MMEncoderAttention(
+            num_heads=num_heads,
+            head_size=head_dim,
+            prefix="visual.blocks.0.attn",
+        ).to("cuda")
+
+    # BF16 attention (no FP8)
+    with set_current_vllm_config(VllmConfig()), backend_patch:
+        attn_bf16 = MMEncoderAttention(
+            num_heads=num_heads,
+            head_size=head_dim,
+            prefix="visual.blocks.0.attn",
+        ).to("cuda")
+
+    torch.set_default_dtype(old_dtype)
+
+    workspace = _get_flashinfer_workspace_buffer()
+    return attn_fp8, attn_bf16, workspace
+
+
+def _build_meta(
+    seq_len: int,
+    num_heads: int,
+    head_dim: int,
+    fp8: bool,
+):
+    """Build cu_seqlens, max_seqlen, sequence_lengths."""
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+    from vllm.utils.math_utils import round_up
+    from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+    cu_np = np.array([0, seq_len], dtype=np.int32)
+    fp8_padded = num_heads * round_up(head_dim, 16) if fp8 else None
+
+    seq_lengths = MMEncoderAttention.maybe_compute_seq_lens(
+        AttentionBackendEnum.FLASHINFER, cu_np, torch.device("cuda")
+    )
+    max_seqlen = torch.tensor(
+        MMEncoderAttention.compute_max_seqlen(AttentionBackendEnum.FLASHINFER, cu_np),
+        dtype=torch.int32,
+    )
+    cu_seqlens = MMEncoderAttention.maybe_recompute_cu_seqlens(
+        AttentionBackendEnum.FLASHINFER,
+        cu_np,
+        num_heads * head_dim,
+        1,
+        torch.device("cuda"),
+        fp8_padded_hidden_size=fp8_padded,
+    )
+    return cu_seqlens, max_seqlen, seq_lengths
+
+
+def run_benchmark(
+    seq_lens: list[int],
+    num_heads: int,
+    head_dim: int,
+    method: str,
+):
+    """Benchmark FP8 vs BF16 attention across seq_lens.
+
+    Uses FlashInfer GPU-level timing to measure pure kernel time,
+    excluding CPU launch overhead.
+    """
+    if method == "cupti":
+        from flashinfer.testing import bench_gpu_time_with_cupti as bench_fn
+
+        bench_fn = partial(bench_fn, use_cuda_graph=True, cold_l2_cache=False)
+    elif method == "cudagraph":
+        from flashinfer.testing import (
+            bench_gpu_time_with_cudagraph as bench_fn,
+        )
+
+        bench_fn = partial(bench_fn, cold_l2_cache=False)
+    else:
+        raise ValueError(f"Invalid method: {method}")
+
+    attn_fp8, attn_bf16, workspace = _setup_fp8_attention(num_heads, head_dim)
+
+    print(f"Timing method: {method}")
+    print(f"{'seq_len':>8} {'BF16 (us)':>12} {'FP8 (us)':>12} {'Speedup':>10}")
+    print("-" * 46)
+
+    for seq_len in seq_lens:
+        torch.manual_seed(42)
+
+        q = torch.randn(
+            seq_len,
+            num_heads,
+            head_dim,
+            device="cuda",
+            dtype=torch.bfloat16,
+        )
+        k = torch.randn_like(q)
+        v = torch.randn_like(q)
+
+        cu_fp8, max_s, seq_l = _build_meta(seq_len, num_heads, head_dim, fp8=True)
+        # we can reuse cu_fp8 for cu_bf16 since q, k, and v are contiguous
+        cu_bf16 = cu_fp8.clone()
+
+        def bf16_fn(q=q, k=k, v=v, cu=cu_bf16, ms=max_s, sl=seq_l):
+            attn_bf16._forward_flashinfer(q, k, v, cu, ms, sl)
+
+        def fp8_fn(q=q, k=k, v=v, cu=cu_fp8, ms=max_s, sl=seq_l):
+            attn_fp8._forward_flashinfer(q, k, v, cu, ms, sl)
+
+        # bench_fn returns List[float] of per-iteration times in ms
+        bf16_times = bench_fn(bf16_fn)
+        fp8_times = bench_fn(fp8_fn)
+
+        bf16_us = np.median(bf16_times) * 1e3  # ms -> us
+        fp8_us = np.median(fp8_times) * 1e3
+        speedup = bf16_us / fp8_us if fp8_us > 0 else float("inf")
+
+        print(f"{seq_len:>8} {bf16_us:>12.1f} {fp8_us:>12.1f} {speedup:>9.2f}x")
+
+
+def _make_trace_handler(output_dir: str, worker_name: str, label: str):
+    """Create a trace handler that saves to TensorBoard and prints summary."""
+
+    def handler(prof):
+        torch.profiler.tensorboard_trace_handler(output_dir, worker_name)(prof)
+        print(f"\n{'=' * 80}")
+        print(label)
+        print(f"{'=' * 80}")
+        print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
+
+    return handler
+
+
+def run_profile(
+    seq_len: int,
+    num_heads: int,
+    head_dim: int,
+    warmup: int,
+    output_dir: str,
+):
+    """Profile FP8 vs BF16 attention with PyTorch profiler."""
+    attn_fp8, attn_bf16, workspace = _setup_fp8_attention(num_heads, head_dim)
+
+    torch.manual_seed(42)
+    q = torch.randn(
+        seq_len,
+        num_heads,
+        head_dim,
+        device="cuda",
+        dtype=torch.bfloat16,
+    )
+    k = torch.randn_like(q)
+    v = torch.randn_like(q)
+
+    cu_fp8, max_s, seq_l = _build_meta(seq_len, num_heads, head_dim, fp8=True)
+    # we can reuse cu_fp8 for cu_bf16 since q, k, and v are contiguous
+    cu_bf16 = cu_fp8.clone()
+
+    sched = torch.profiler.schedule(wait=0, warmup=warmup, active=1)
+
+    # Profile BF16 (warmup handled by profiler schedule)
+    with profile(
+        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+        schedule=sched,
+        on_trace_ready=_make_trace_handler(
+            output_dir,
+            f"bf16_h{head_dim}_s{seq_len}",
+            f"BF16 Attention (seq_len={seq_len}, heads={num_heads}, "
+            f"head_dim={head_dim})",
+        ),
+    ) as prof_bf16:
+        for _ in range(warmup + 1):
+            with record_function("bf16_attention"):
+                attn_bf16._forward_flashinfer(
+                    q.clone(), k.clone(), v.clone(), cu_bf16, max_s, seq_l
+                )
+                torch.accelerator.synchronize()
+            prof_bf16.step()
+
+    # Profile FP8 (warmup handled by profiler schedule)
+    with profile(
+        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+        schedule=sched,
+        on_trace_ready=_make_trace_handler(
+            output_dir,
+            f"fp8_h{head_dim}_s{seq_len}",
+            f"FP8 Attention (seq_len={seq_len}, heads={num_heads}, "
+            f"head_dim={head_dim})",
+        ),
+    ) as prof_fp8:
+        for _ in range(warmup + 1):
+            with record_function("fp8_attention"):
+                attn_fp8._forward_flashinfer(
+                    q.clone(), k.clone(), v.clone(), cu_fp8, max_s, seq_l
+                )
+                torch.accelerator.synchronize()
+            prof_fp8.step()
+
+    print(f"\nTensorBoard traces saved to: {output_dir}")
+    print(f"View with: tensorboard --logdir={output_dir}")
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser(description="Benchmark FP8 vs BF16 ViT attention.")
+    parser.add_argument(
+        "--seq-lens",
+        type=int,
+        nargs="+",
+        default=DEFAULT_SEQ_LENS,
+        help="Sequence lengths to benchmark",
+    )
+    parser.add_argument(
+        "--num-heads",
+        type=int,
+        default=NUM_HEADS,
+    )
+    parser.add_argument(
+        "--head-dim",
+        type=int,
+        default=HEAD_DIM,
+    )
+    parser.add_argument(
+        "--method",
+        choices=["cupti", "cudagraph"],
+        default="cudagraph",
+        help="GPU timing method: cupti (CUPTI kernel timing) or "
+        "cudagraph (CUDA graph capture/replay). Default: cudagraph",
+    )
+    parser.add_argument(
+        "--warmup",
+        type=int,
+        default=10,
+        help="Warmup iterations (profile mode only)",
+    )
+    parser.add_argument(
+        "--profile",
+        action="store_true",
+        help="Run PyTorch profiler instead of benchmark",
+    )
+    parser.add_argument(
+        "--profile-seq-len",
+        type=int,
+        default=8192,
+        help="Sequence length for profiling (default: 8192)",
+    )
+    parser.add_argument(
+        "--profile-output-dir",
+        type=str,
+        default="./profile_traces",
+        help="Output directory for TensorBoard traces (default: ./profile_traces)",
+    )
+    args = parser.parse_args()
+
+    if args.profile:
+        run_profile(
+            args.profile_seq_len,
+            args.num_heads,
+            args.head_dim,
+            args.warmup,
+            args.profile_output_dir,
+        )
+    else:
+        run_benchmark(
+            args.seq_lens,
+            args.num_heads,
+            args.head_dim,
+            args.method,
+        )
diff --git a/benchmarks/kernels/cpu/benchmark_cpu_attn.py b/benchmarks/kernels/cpu/benchmark_cpu_attn.py
index 63d034278c7e..08afd693c333 100644
--- a/benchmarks/kernels/cpu/benchmark_cpu_attn.py
+++ b/benchmarks/kernels/cpu/benchmark_cpu_attn.py
@@ -12,7 +12,6 @@
     cpu_attn_get_scheduler_metadata,
     cpu_attn_reshape_and_cache,
 )
-from vllm.platforms import CpuArchEnum, current_platform
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
 from vllm.v1.attention.backends.cpu_attn import CPUAttentionBackend, _get_attn_isa
@@ -22,15 +21,14 @@ def get_attn_isa(
     block_size: int | None = None,
     dtype: torch.dtype | None = None,
 ):
-    if block_size and dtype:
-        return _get_attn_isa(dtype, block_size)
-    else:
-        if current_platform.get_cpu_architecture() == CpuArchEnum.ARM:
-            return "neon"
-        elif torch.cpu._is_amx_tile_supported():
-            return "amx"
-        else:
-            return "vec"
+    # Delegate to _get_attn_isa so the fallback path applies the same arch
+    # gating (e.g. RISC-V RVV is only chosen when the build's hardcoded
+    # VLEN=128 kernel is actually present; on VLEN=256 / scalar hosts it
+    # correctly falls through to vec/vec16).
+    return _get_attn_isa(
+        dtype if dtype is not None else torch.bfloat16,
+        block_size if block_size else 32,
+    )
 
 
 # rand number generation takes too much time, cache rand tensors
@@ -235,7 +233,7 @@ def rint(lo: int, hi: int) -> int:
     )
     parser.add_argument("--use-sink", action="store_true")
     parser.add_argument(
-        "--isa", type=str, choices=["vec", "neon", "amx", "vec16"], default=None
+        "--isa", type=str, choices=["vec", "neon", "amx", "vec16", "rvv"], default=None
     )
     parser.add_argument("--seed", type=int, default=0)
     parser.add_argument("--iters", type=int, default=20)
diff --git a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
index 4384d3e56828..c9aaef284d70 100644
--- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
+++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
@@ -16,6 +16,7 @@
     fp8_gemm_nt,
     per_block_cast_to_fp8,
 )
+from vllm.utils.torch_utils import set_random_seed
 
 
 def benchmark_shape(
@@ -235,9 +236,7 @@ def run_benchmarks(verbose: bool = False):
     torch.backends.cudnn.allow_tf32 = True
 
     # Set seeds for reproducibility
-    torch.manual_seed(42)
-    torch.cuda.manual_seed(42)
-
+    set_random_seed(42)
     # Define benchmark shapes (m, n, k)
     shapes = [
         (8, 4096, 7168),
diff --git a/tests/entrypoints/pooling/pooling/__init__.py b/benchmarks/kernels/ir/__init__.py
similarity index 100%
rename from tests/entrypoints/pooling/pooling/__init__.py
rename to benchmarks/kernels/ir/__init__.py
diff --git a/benchmarks/kernels/ir/bench_ir_ops.py b/benchmarks/kernels/ir/bench_ir_ops.py
new file mode 100644
index 000000000000..b23c4e8ae327
--- /dev/null
+++ b/benchmarks/kernels/ir/bench_ir_ops.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Generic benchmark harness for vLLM IR ops.
+
+Usage:
+    python benchmarks/kernels/ir/bench_ir_ops.py
+    python benchmarks/kernels/ir/bench_ir_ops.py --ops rms_norm
+    python benchmarks/kernels/ir/bench_ir_ops.py --ops rms_norm,silu_mul
+    python benchmarks/kernels/ir/bench_ir_ops.py --no-cuda-graph
+    python benchmarks/kernels/ir/bench_ir_ops.py --ops rms_norm --save-path ./results/
+"""
+
+import argparse
+import contextlib
+import csv
+import dataclasses
+import datetime
+import math
+import os
+import subprocess
+import sys
+import tempfile
+
+# Ensure repo root is on sys.path so `benchmarks` is importable as a package.
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+# Suppress noisy C++ warnings from vllm kernel registration (written to fd 2
+# directly by the dynamic linker, so Python-level sys.stderr redirect won't
+# catch them).
+_saved_fd = os.dup(2)
+try:
+    with open(os.devnull, "w") as _devnull:
+        os.dup2(_devnull.fileno(), 2)
+        import torch
+
+        import vllm.kernels  # noqa: E402, F401
+finally:
+    os.dup2(_saved_fd, 2)
+    os.close(_saved_fd)
+
+from tqdm import tqdm  # noqa: E402
+
+from benchmarks.kernels.ir.shapes import SHAPE_CONFIGS  # noqa: E402  # isort: skip
+from vllm.ir.op import IrOp  # noqa: E402
+from vllm.platforms import current_platform  # noqa: E402
+from vllm.triton_utils import triton  # noqa: E402
+
+
+@dataclasses.dataclass(frozen=True)
+class BenchConfig:
+    use_cuda_graph: bool = True
+    warmup: int = 25
+    rep: int = 100
+
+
+def _pkg_version(name: str) -> str:
+    from importlib.metadata import PackageNotFoundError, version
+
+    with contextlib.suppress(PackageNotFoundError):
+        return version(name)
+    return "not installed"
+
+
+_METADATA_LABELS = {
+    "timestamp": "Timestamp",
+    "git_commit": "Git commit",
+    "vllm": "vLLM",
+    "pytorch": "PyTorch",
+    "cuda_runtime": "CUDA runtime",
+    "triton": "Triton",
+    "cutlass": "CUTLASS",
+    "helion": "Helion",
+    "device": "Device",
+    "bench_mode": "Bench mode",
+    "warmup": "Warmup",
+    "rep": "Repetitions",
+}
+
+
+def collect_env_metadata(cfg: BenchConfig) -> dict[str, str]:
+    from vllm.collect_env import get_env_info
+
+    env = get_env_info()
+
+    git_sha = "unknown"
+    with contextlib.suppress(subprocess.CalledProcessError, FileNotFoundError):
+        git_sha = (
+            subprocess.check_output(
+                ["git", "rev-parse", "--short", "HEAD"], stderr=subprocess.DEVNULL
+            )
+            .decode()
+            .strip()
+        )
+
+    device_name = current_platform.get_device_name()
+
+    warmup_note = " ms" if not cfg.use_cuda_graph else " ms (ignored)"
+    rep_note = " replays" if cfg.use_cuda_graph else " ms"
+
+    return {
+        "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "git_commit": git_sha,
+        "vllm": str(env.vllm_version),
+        "pytorch": str(env.torch_version),
+        "cuda_runtime": str(env.cuda_runtime_version),
+        "triton": triton.__version__,
+        "cutlass": _pkg_version("nvidia-cutlass-dsl"),
+        "helion": _pkg_version("helion"),
+        "device": device_name,
+        "bench_mode": "cuda_graph" if cfg.use_cuda_graph else "eager",
+        "warmup": f"{cfg.warmup}{warmup_note}",
+        "rep": f"{cfg.rep}{rep_note}",
+    }
+
+
+def print_metadata(metadata: dict[str, str]):
+    print("=" * 60)
+    for key, val in metadata.items():
+        print(f"{_METADATA_LABELS.get(key, key) + ':':<16}{val}")
+    print("=" * 60)
+
+
+def _clone_args(args: tuple) -> tuple:
+    return tuple(a.clone() if isinstance(a, torch.Tensor) else a for a in args)
+
+
+# TODO(gmagogsfm): When the `maybe_inplace` PR lands, ops marked as
+# inplace=True will mutate bench_args across iterations. Both CUDA graph
+# and eager modes will accumulate drift from repeated in-place mutation.
+# We need to re-clone inputs per iteration for inplace ops.
+def _bench_one(fn, args, cfg: BenchConfig) -> float:
+    bench_args = _clone_args(args)
+    bench_fn = lambda: fn(*bench_args)
+
+    if cfg.use_cuda_graph:
+        ms = triton.testing.do_bench_cudagraph(bench_fn, rep=cfg.rep, quantiles=[0.5])
+    else:
+        ms = triton.testing.do_bench(
+            bench_fn, warmup=cfg.warmup, rep=cfg.rep, quantiles=[0.5]
+        )
+    return ms * 1000
+
+
+# TODO(gmagogsfm): Once compiled native implementation lands (#38775),
+# the benchmark baseline should be the compiled native (what vLLM runs by
+# default) rather than the uncompiled native implementation.
+def collect_timings(
+    op: IrOp, shape_configs: list[dict], cfg: BenchConfig
+) -> tuple[list[str], list[str], dict[str, dict[str, float]]]:
+    def fmt(v) -> str:
+        return str(v).split(".")[-1] if isinstance(v, torch.dtype) else str(v)
+
+    case_names = [
+        "_".join(f"{k}={fmt(v)}" for k, v in kwargs.items()) for kwargs in shape_configs
+    ]
+    providers = [n for n, impl in op.impls.items() if impl.supported]
+
+    results: dict[str, dict[str, float]] = {c: {} for c in case_names}
+    for provider in providers:
+        impl = op.impls[provider]
+        desc = f"{op.name} / {provider}"
+        for case_name, kwargs in tqdm(
+            zip(case_names, shape_configs),
+            desc=desc,
+            total=len(case_names),
+            unit=" cases",
+        ):
+            args = op.generate_inputs(**kwargs)
+            if impl.supports_args(*args):
+                results[case_name][provider] = _bench_one(impl.impl_fn, args, cfg)
+            else:
+                results[case_name][provider] = float("nan")
+
+    return case_names, providers, results
+
+
+def analyze_results(
+    op_name: str,
+    case_names: list[str],
+    providers: list[str],
+    results: dict[str, dict[str, float]],
+) -> tuple[list[dict[str, str]], list[dict[str, str]], list[str]]:
+    native_col = "native"
+    non_native = [p for p in providers if p != native_col]
+
+    header_cols = ["case"]
+    for p in providers:
+        header_cols.append(f"{p} (us)")
+    for p in non_native:
+        header_cols.append(f"{p} speedup")
+
+    detail_rows: list[dict[str, str]] = []
+    speedup_data: dict[str, list[tuple[float, str]]] = {p: [] for p in non_native}
+
+    for case_name in case_names:
+        timings = results[case_name]
+        row: dict[str, str] = {"case": case_name}
+
+        for p in providers:
+            val = timings.get(p, float("nan"))
+            row[f"{p} (us)"] = f"{val:.2f}" if not math.isnan(val) else "n/a"
+
+        native_us = timings.get(native_col, float("nan"))
+        for p in non_native:
+            p_us = timings.get(p, float("nan"))
+            if not math.isnan(native_us) and not math.isnan(p_us) and p_us > 0:
+                speedup = native_us / p_us
+                row[f"{p} speedup"] = f"{speedup:.2f}x"
+                speedup_data[p].append((speedup, case_name))
+            else:
+                row[f"{p} speedup"] = "n/a"
+
+        detail_rows.append(row)
+
+    summary_rows: list[dict[str, str]] = []
+    for p in non_native:
+        entries = speedup_data[p]
+        if not entries:
+            continue
+        speedups = [s for s, _ in entries]
+        geomean = math.exp(sum(math.log(s) for s in speedups) / len(speedups))
+        best_val, best_case = max(entries)
+        worst_val, worst_case = min(entries)
+        wins = sum(1 for s in speedups if s > 1.0)
+        losses = sum(1 for s in speedups if s < 1.0)
+        total = len(speedups)
+
+        print(f"\n{p} vs native ({wins}/{total} faster, {losses}/{total} slower):")
+        print(f"  geomean speedup: {geomean:.2f}x")
+        print(f"  best:            {best_val:.2f}x  ({best_case})")
+        print(f"  worst:           {worst_val:.2f}x  ({worst_case})")
+
+        summary_rows.append(
+            {
+                "op": op_name,
+                "provider": p,
+                "geomean_speedup": f"{geomean:.2f}",
+                "best_speedup": f"{best_val:.2f}",
+                "best_case": best_case,
+                "worst_speedup": f"{worst_val:.2f}",
+                "worst_case": worst_case,
+                "wins": str(wins),
+                "losses": str(losses),
+                "total": str(total),
+            }
+        )
+
+    return detail_rows, summary_rows, header_cols
+
+
+def write_csv(path: str, rows: list[dict[str, str]], fieldnames: list[str]):
+    with open(path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def save_results(
+    save_dir: str,
+    op_name: str,
+    detail_rows: list[dict[str, str]],
+    header_cols: list[str],
+    all_summary_rows: list[dict[str, str]],
+    metadata: dict[str, str],
+):
+    write_csv(
+        os.path.join(save_dir, f"{op_name}_detail.csv"),
+        detail_rows,
+        header_cols,
+    )
+    if all_summary_rows:
+        write_csv(
+            os.path.join(save_dir, "summary.csv"),
+            all_summary_rows,
+            list(all_summary_rows[0].keys()),
+        )
+    write_csv(
+        os.path.join(save_dir, "metadata.csv"),
+        [metadata],
+        list(metadata.keys()),
+    )
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Benchmark vLLM IR ops")
+    parser.add_argument(
+        "--ops",
+        type=str,
+        default=None,
+        help="Comma-separated list of op names to benchmark (substring match)",
+    )
+    parser.add_argument(
+        "--no-cuda-graph",
+        action="store_true",
+        help="Disable CUDA graph; use do_bench with L2 cache flushing instead",
+    )
+    parser.add_argument(
+        "--warmup",
+        type=int,
+        default=25,
+        help="Warmup time in ms (do_bench) or ignored with CUDA graph (default: 25)",
+    )
+    parser.add_argument(
+        "--rep",
+        type=int,
+        default=100,
+        help="Repetition time in ms (do_bench) or number of graph replays "
+        "(do_bench_cudagraph) (default: 100)",
+    )
+    parser.add_argument(
+        "--save-path",
+        type=str,
+        default=None,
+        help="Directory to save results (default: auto-created temp dir)",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    cfg = BenchConfig(
+        use_cuda_graph=not args.no_cuda_graph,
+        warmup=args.warmup,
+        rep=args.rep,
+    )
+
+    torch.set_default_device(current_platform.device_type)
+
+    metadata = collect_env_metadata(cfg)
+    print_metadata(metadata)
+
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    save_dir = args.save_path or os.path.join(
+        tempfile.gettempdir(), f"vllm_ir_bench_{timestamp}"
+    )
+    os.makedirs(save_dir, exist_ok=True)
+
+    op_filters = [f.strip() for f in args.ops.split(",")] if args.ops else None
+    all_summary_rows: list[dict[str, str]] = []
+
+    for op in IrOp.registry.values():
+        if op_filters and not any(f in op.name for f in op_filters):
+            continue
+        if not op.has_input_generator:
+            print(f"Skipping op '{op.name}': no input generator registered")
+            continue
+        if op.name not in SHAPE_CONFIGS:
+            raise RuntimeError(
+                f"No benchmark shape config for op '{op.name}'. "
+                f"Add it to benchmarks/kernels/ir/shapes.py"
+            )
+
+        case_names, providers, results = collect_timings(
+            op, SHAPE_CONFIGS[op.name], cfg
+        )
+        detail_rows, summary_rows, header_cols = analyze_results(
+            op.name, case_names, providers, results
+        )
+        all_summary_rows.extend(summary_rows)
+
+        save_results(
+            save_dir,
+            op.name,
+            detail_rows,
+            header_cols,
+            all_summary_rows,
+            metadata,
+        )
+
+    print(f"\nResults saved to: {save_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/kernels/ir/shapes.py b/benchmarks/kernels/ir/shapes.py
new file mode 100644
index 000000000000..6cc44cf6cec1
--- /dev/null
+++ b/benchmarks/kernels/ir/shapes.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Shape configurations for IR op benchmarks.
+"""
+
+import torch
+
+NUM_TOKENS = [1, 2, 4, 16, 64, 256, 1024, 4096, 16384]
+COMMON_HIDDEN_SIZES = [
+    2048,  # Llama 3.2 1B, Qwen 3 MoE 30B-A3B, Gemma 3n
+    3072,  # Gemma 7B/9B
+    4096,  # Llama 3 8B, Qwen 3 8B, Mistral 7B
+    5120,  # Llama 4 Scout 17B-16E
+    7168,  # DeepSeek V3
+    8192,  # Llama 3 70B
+    16384,  # Llama 3 405B
+]
+
+# Each entry maps an op name to a list of kwarg dicts that will be passed
+# to that op's registered input generator via op.generate_inputs(**kwargs).
+SHAPE_CONFIGS: dict[str, list[dict]] = {
+    "rms_norm": [
+        {"num_tokens": n, "hidden_size": d, "dtype": dtype}
+        for dtype in [torch.float16, torch.bfloat16, torch.float32]
+        for d in COMMON_HIDDEN_SIZES
+        for n in NUM_TOKENS
+    ],
+}
diff --git a/benchmarks/multi_turn/benchmark_serving_multi_turn.py b/benchmarks/multi_turn/benchmark_serving_multi_turn.py
index e23f6b923f1b..2f56099c66fd 100644
--- a/benchmarks/multi_turn/benchmark_serving_multi_turn.py
+++ b/benchmarks/multi_turn/benchmark_serving_multi_turn.py
@@ -217,6 +217,7 @@ async def send_request(
     min_tokens: int | None = None,
     max_tokens: int | None = None,
     timeout_sec: int = 120,
+    conversation_id: str | None = None,
 ) -> ServerResponse:
     payload = {
         "model": model,
@@ -225,6 +226,9 @@ async def send_request(
         "temperature": 0.0,
     }
 
+    if conversation_id is not None:
+        payload["conversation_id"] = conversation_id
+
     if stream:
         payload["stream"] = True
         payload["stream_options"] = {"include_usage": False}
@@ -419,6 +423,7 @@ async def send_turn(
         min_tokens,
         max_tokens,
         req_args.timeout_sec,
+        conversation_id=conv_id,
     )
 
     if response.valid is False:
@@ -1439,6 +1444,12 @@ async def main() -> None:
         action="store_true",
         help="Export summary to Excel file (optional)",
     )
+    parser.add_argument(
+        "--stats-json-output",
+        type=str,
+        default=None,
+        help="Export per-request stats (ttft_ms, tpot_ms, etc.) to a JSON file",
+    )
     parser.add_argument(
         "-v",
         "--verbose",
@@ -1462,6 +1473,12 @@ async def main() -> None:
         "(for example: --warmup-percentages=0%%,50%%)",
     )
 
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Trust remote code when loading the tokenizer.",
+    )
+
     args = parser.parse_args()
 
     logger.info(args)
@@ -1504,7 +1521,9 @@ async def main() -> None:
     np.random.seed(args.seed)
 
     logger.info("Loading tokenizer")
-    tokenizer = AutoTokenizer.from_pretrained(args.model)
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.model, trust_remote_code=args.trust_remote_code
+    )
 
     await get_server_info(args.url)
 
@@ -1651,6 +1670,19 @@ async def main() -> None:
         warmup_runtime_sec=warmup_runtime_sec,
     )
 
+    if args.stats_json_output is not None:
+        # Export per-request metrics as a JSON array for downstream analysis.
+        stats_data = [s._asdict() for s in client_metrics]
+        logger.info(
+            f"{Color.GREEN}Writing per-request stats JSON: "
+            f"{args.stats_json_output}{Color.RESET}"
+        )
+        os.makedirs(
+            os.path.dirname(os.path.abspath(args.stats_json_output)), exist_ok=True
+        )
+        with open(args.stats_json_output, "w") as f:
+            json.dump(stats_data, f, indent=2)
+
     if args.output_file is not None:
         # Write a JSON file with the updated conversations
         # The "assistant" content will contain the answers from the tested LLM
diff --git a/build_rust.sh b/build_rust.sh
new file mode 100755
index 000000000000..98871ec8abcf
--- /dev/null
+++ b/build_rust.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Build the vllm-rs Rust frontend binary and install it into the vllm package.
+# Usage: ./build_rust.sh [--debug]
+#
+# By default builds in release mode. Pass --debug for faster compile times
+# during development.
+
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "$0")" && pwd)"
+RUST_DIR="$REPO_ROOT/rust"
+TARGET_PATH="${VLLM_RS_TARGET_PATH:-$REPO_ROOT/vllm/vllm-rs}"
+
+# Read the required toolchain from rust-toolchain.toml.
+TOOLCHAIN=$(grep '^channel' "$REPO_ROOT/rust-toolchain.toml" | sed 's/.*= *"\(.*\)"/\1/')
+
+# Ensure rustup and the required toolchain are available.
+if ! command -v rustup &>/dev/null; then
+    echo "rustup not found, installing..."
+    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain none
+    source "$HOME/.cargo/env"
+fi
+
+if ! rustup run "$TOOLCHAIN" rustc --version &>/dev/null; then
+    echo "Installing Rust toolchain: $TOOLCHAIN"
+    rustup toolchain install "$TOOLCHAIN"
+fi
+
+if [[ "${1:-}" == "--debug" ]]; then
+    PROFILE_ARGS=()
+    PROFILE_DIR="debug"
+else
+    PROFILE_ARGS=(--release)
+    PROFILE_DIR="release"
+fi
+
+cargo +"$TOOLCHAIN" build "${PROFILE_ARGS[@]}" \
+    --manifest-path "$RUST_DIR/Cargo.toml" \
+    --bin vllm-rs \
+    --features native-tls-vendored
+
+mkdir -p "$(dirname "$TARGET_PATH")"
+cp "$RUST_DIR/target/$PROFILE_DIR/vllm-rs" "$TARGET_PATH"
+echo "Installed vllm-rs to $TARGET_PATH"
diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake
index 8d74d6d5d96c..ffab4015f495 100644
--- a/cmake/cpu_extension.cmake
+++ b/cmake/cpu_extension.cmake
@@ -1,7 +1,7 @@
 include(FetchContent)
 
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_EXTENSIONS ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
@@ -30,6 +30,26 @@ else()
     list(APPEND CXX_COMPILE_FLAGS
         "-fopenmp"
         "-DVLLM_CPU_EXTENSION")
+
+    # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
+    # and create a local shim dir with it. When PyTorch is built from source or packaged
+    # by a distro (common on RISC-V, s390x, Fedora/RHEL aarch64), no vendored libgomp
+    # exists and the shim dir is empty; fall back to the system libgomp in that case.
+    vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)
+
+    if(VLLM_TORCH_GOMP_SHIM_DIR)
+        find_library(OPEN_MP
+            NAMES gomp
+            PATHS "${VLLM_TORCH_GOMP_SHIM_DIR}"
+            NO_DEFAULT_PATH
+            REQUIRED
+        )
+        # Use the same libgomp as PyTorch at runtime
+        set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
+    else()
+        # Fall back to system / toolchain libgomp
+        find_library(OPEN_MP NAMES gomp REQUIRED)
+    endif()
 endif()
 
 if (NOT MACOSX_FOUND)
@@ -146,16 +166,51 @@ elseif (S390_FOUND)
         "-mtune=native")
 elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
     message(STATUS "RISC-V detected")
-    if(RVV_BF16_FOUND)
-        message(STATUS "BF16 extension detected")
-        set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
-        add_compile_definitions(RISCV_BF16_SUPPORT)
-    elseif (RVV_FP16_FOUND)
-        message(WARNING "BF16 functionality is not available")
-        set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
+    # VLLM_RVV_VLEN selects the target VLEN. Auto-detected from /proc/cpuinfo
+    # by default; override with -DVLLM_RVV_VLEN=128 or -DVLLM_RVV_VLEN=256.
+    if(NOT DEFINED VLLM_RVV_VLEN)
+        # Auto-detect: find the largest zvl<N>b in /proc/cpuinfo isa line.
+        if(EXISTS /proc/cpuinfo)
+            file(READ /proc/cpuinfo _cpuinfo)
+            set(_best 0)
+            foreach(_n IN ITEMS 128 256 512 1024)
+                if(_cpuinfo MATCHES "zvl${_n}b")
+                    set(_best ${_n})
+                endif()
+            endforeach()
+            if(_best GREATER 0)
+                set(VLLM_RVV_VLEN ${_best})
+            endif()
+        endif()
+        # If auto-detect failed (no /proc/cpuinfo or no zvl<N>b reported)
+        # but the compiler supports RVV, require explicit specification.
+        if(NOT DEFINED VLLM_RVV_VLEN AND (RVV_FP16_FOUND OR RVV_BF16_FOUND))
+            message(FATAL_ERROR
+                "RISC-V RVV is available but VLEN could not be auto-detected. "
+                "Please specify VLEN explicitly:\n"
+                "  -DVLLM_RVV_VLEN=128   (for VLEN=128 hardware)\n"
+                "  -DVLLM_RVV_VLEN=256   (for VLEN=256 hardware, e.g. Spacemit X100)\n"
+                "  -DVLLM_RVV_VLEN=0     (force scalar, no RVV)")
+        endif()
+    endif()
+    if(VLLM_RVV_VLEN AND VLLM_RVV_VLEN GREATER 0)
+        message(STATUS "RISC-V RVV VLEN=${VLLM_RVV_VLEN}")
+        # Sources gate FP16/BF16 paths on the compiler-provided
+        # __riscv_zvfh / __riscv_zvfbfmin macros, which GCC and clang
+        # define automatically when those extensions appear in -march.
+        if(RVV_BF16_FOUND)
+            message(STATUS "BF16 extension detected")
+            set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
+        elseif(RVV_FP16_FOUND)
+            message(WARNING "BF16 functionality is not available")
+            set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
+        else()
+            message(STATUS "compile riscv with scalar (no FP16/BF16)")
+            set(MARCH_FLAGS -march=rv64gc)
+        endif()
     else()
         message(STATUS "compile riscv with scalar")
-        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
+        set(MARCH_FLAGS -march=rv64gc)
     endif()
     list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
 else()
@@ -175,20 +230,6 @@ if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND
         if(NOT NPROC)
             set(NPROC 4)
         endif()
-        # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
-        # and create a local shim dir with it
-        vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)
-
-        find_library(OPEN_MP
-            NAMES gomp
-            PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
-            NO_DEFAULT_PATH
-            REQUIRED
-        )
-        # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
-        if (OPEN_MP)
-            set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
-        endif()
 
         # Fetch and populate ACL
         if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
@@ -287,14 +328,6 @@ if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND
     set(ONEDNN_VERBOSE "ON")
     set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
 
-    # TODO: Refactor this
-    if (ENABLE_X86_ISA)
-        # Note: only enable oneDNN for AVX512
-        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
-    else()
-        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
-    endif()
-
     set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
     set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
     FetchContent_MakeAvailable(oneDNN)
@@ -307,8 +340,14 @@ if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND
         PRIVATE ${oneDNN_SOURCE_DIR}/src
     )
     target_link_libraries(dnnl_ext dnnl torch)
-    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
+    if (ENABLE_X86_ISA)
+        target_compile_options(dnnl_ext PRIVATE ${CXX_COMPILE_FLAGS_AVX2} -fPIC)
+    else()
+        target_compile_options(dnnl_ext PRIVATE ${CXX_COMPILE_FLAGS} -fPIC)
+    endif()
     list(APPEND LIBS dnnl_ext)
+
+
     set(USE_ONEDNN ON)
 else()
     set(USE_ONEDNN OFF)
@@ -349,6 +388,7 @@ endif()
 set(VLLM_EXT_SRC
     "csrc/cpu/activation.cpp"
     "csrc/cpu/utils.cpp"
+    "csrc/cpu/spec_decode_utils.cpp"
     "csrc/cpu/layernorm.cpp"
     "csrc/cpu/mla_decode.cpp"
     "csrc/cpu/pos_encoding.cpp"
@@ -359,6 +399,7 @@ set(VLLM_EXT_SRC
 if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
     set(VLLM_EXT_SRC
         "csrc/cpu/shm.cpp"
+        "csrc/cpu/activation_lut_bf16.cpp"
         ${VLLM_EXT_SRC})
 endif()
 
@@ -370,11 +411,15 @@ endif()
 
 if (ENABLE_X86_ISA)
     set(VLLM_EXT_SRC_SGL
+        "csrc/cpu/sgl-kernels/fla.cpp"
+        "csrc/cpu/sgl-kernels/conv.cpp"
         "csrc/cpu/sgl-kernels/gemm.cpp"
         "csrc/cpu/sgl-kernels/gemm_int8.cpp"
         "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
+        "csrc/cpu/sgl-kernels/gemm_int4.cpp"
         "csrc/cpu/sgl-kernels/moe.cpp"
         "csrc/cpu/sgl-kernels/moe_int8.cpp"
+        "csrc/cpu/sgl-kernels/moe_int4.cpp"
         "csrc/cpu/sgl-kernels/moe_fp8.cpp")
 
     set(VLLM_EXT_SRC_AVX512
@@ -382,6 +427,7 @@ if (ENABLE_X86_ISA)
         "csrc/cpu/cpu_wna16.cpp"
         "csrc/cpu/cpu_fused_moe.cpp"
         "csrc/cpu/utils.cpp"
+        "csrc/cpu/spec_decode_utils.cpp"
         "csrc/cpu/cpu_attn.cpp"
         "csrc/cpu/dnnl_kernels.cpp"
         "csrc/cpu/torch_bindings.cpp"
@@ -392,9 +438,11 @@ if (ENABLE_X86_ISA)
         "csrc/cpu/pos_encoding.cpp"
         "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 
 
-    set(VLLM_EXT_SRC_AVX2 
+    set(VLLM_EXT_SRC_AVX2
         "csrc/cpu/utils.cpp"
+        "csrc/cpu/spec_decode_utils.cpp"
         "csrc/cpu/cpu_attn.cpp"
+        "csrc/cpu/dnnl_kernels.cpp"
         "csrc/cpu/torch_bindings.cpp"
         # TODO: Remove these files
         "csrc/cpu/activation.cpp"
@@ -409,7 +457,7 @@ if (ENABLE_X86_ISA)
 
     set(_C_LIBS numa dnnl_ext)
     set(_C_AVX512_LIBS numa dnnl_ext)
-    set(_C_AVX2_LIBS numa)
+    set(_C_AVX2_LIBS numa dnnl_ext)
 
     # AMX + AVX512F + AVX512BF16 + AVX512VNNI
     define_extension_target(
diff --git a/cmake/external_projects/deepgemm.cmake b/cmake/external_projects/deepgemm.cmake
new file mode 100644
index 000000000000..183c42dc7953
--- /dev/null
+++ b/cmake/external_projects/deepgemm.cmake
@@ -0,0 +1,187 @@
+include(FetchContent)
+
+# If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory
+# instead of downloading.
+# It can be set as an environment variable or passed as a cmake argument.
+# The environment variable takes precedence.
+if (DEFINED ENV{DEEPGEMM_SRC_DIR})
+  set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR})
+endif()
+
+if(DEEPGEMM_SRC_DIR)
+  FetchContent_Declare(
+    deepgemm
+    SOURCE_DIR ${DEEPGEMM_SRC_DIR}
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+  )
+else()
+  # This ref should be kept in sync with tools/install_deepgemm.sh
+  FetchContent_Declare(
+    deepgemm
+    GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git
+    GIT_TAG 891d57b4db1071624b5c8fa0d1e51cb317fa709f
+    GIT_SUBMODULES "third-party/cutlass" "third-party/fmt"
+    GIT_PROGRESS TRUE
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+  )
+endif()
+
+# Use FetchContent_Populate (not MakeAvailable) to avoid processing
+# DeepGEMM's own CMakeLists.txt which has incompatible find_package calls.
+FetchContent_GetProperties(deepgemm)
+if(NOT deepgemm_POPULATED)
+  FetchContent_Populate(deepgemm)
+endif()
+message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}")
+
+# DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100
+set(DEEPGEMM_SUPPORT_ARCHS)
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a")
+endif()
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f")
+elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a")
+endif()
+
+cuda_archs_loose_intersection(DEEPGEMM_ARCHS
+  "${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}")
+
+if(DEEPGEMM_ARCHS)
+  message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")
+
+  #
+  # DeepGEMM integration notes
+  # --------------------------
+  # We vendor DeepGEMM into vllm/third_party/deep_gemm/ and bundle a
+  # `_C.cpython-X.Y-*.so` for every CPython in `requires-python`. The
+  # per-Python build is delegated to tools/build_deepgemm_C.py.
+  #
+  # Why per-Python: DeepGEMM's binding uses PYBIND11_MODULE, which links
+  # private CPython symbols — a single `_C.abi3.so` is not viable today
+  # (see #41476 / #41512 for the failed attempt).
+  #
+  # TODOs (tracked in vllm-project/vllm#42431):
+  #   - Replace DeepGEMM's pybind11 binding with a TORCH_LIBRARY + shim
+  #     binding (cf. vllm-flash-attention/csrc/common/pytorch_shim.h) to
+  #     collapse to one `_C.abi3.so`. Needs either an upstream change or
+  #     a maintained binding fork in vLLM.
+  #   - AOT-compile DeepGEMM's CUDA kernels instead of runtime JIT to drop
+  #     the vendored CUTLASS/CCCL headers and the CUDA-toolkit-at-runtime
+  #     requirement.
+  #
+
+  # DEEPGEMM_PYTHON_INTERPRETERS: ":"-separated target Python paths.
+  # Empty/unset → fall back to the build interpreter (editable installs).
+  # (Empty-but-set env vars test as DEFINED in cmake — treat as unset.)
+  if(NOT "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}" STREQUAL "")
+    string(REPLACE ":" ";" _dg_pythons "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}")
+  else()
+    set(_dg_pythons "${Python_EXECUTABLE}")
+  endif()
+  message(STATUS "DeepGEMM _C will be built for: ${_dg_pythons}")
+
+  # add_custom_command does no implicit header scanning; glob explicitly so
+  # header-only edits in DeepGEMM/cutlass/fmt re-trigger the rebuild.
+  file(GLOB_RECURSE _dg_headers
+    "${deepgemm_SOURCE_DIR}/csrc/*.h"
+    "${deepgemm_SOURCE_DIR}/csrc/*.hpp"
+    "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.h"
+    "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.hpp"
+    "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.cuh")
+
+  set(_dg_markers)
+  set(_dg_seen_soabis)
+  foreach(_pybin IN LISTS _dg_pythons)
+    execute_process(
+      COMMAND "${_pybin}" -c
+        "import sysconfig; print(sysconfig.get_config_var('SOABI'))"
+      OUTPUT_VARIABLE _dg_soabi
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+      COMMAND_ERROR_IS_FATAL ANY)
+    # Dedup interpreters that resolve to the same CPython.
+    if(_dg_soabi IN_LIST _dg_seen_soabis)
+      continue()
+    endif()
+    list(APPEND _dg_seen_soabis "${_dg_soabi}")
+    set(_dg_dir "${CMAKE_CURRENT_BINARY_DIR}/deepgemm_C_${_dg_soabi}")
+    set(_dg_marker "${_dg_dir}/.built")
+    add_custom_command(
+      OUTPUT "${_dg_marker}"
+      COMMAND "${Python_EXECUTABLE}"
+              "${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py"
+              "${deepgemm_SOURCE_DIR}" "${_dg_dir}" "${_pybin}"
+      COMMAND "${CMAKE_COMMAND}" -E touch "${_dg_marker}"
+      DEPENDS "${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py"
+              "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp"
+              ${_dg_headers}
+      COMMENT "Building DeepGEMM _C for ${_pybin}"
+      VERBATIM)
+    list(APPEND _dg_markers "${_dg_marker}")
+    install(DIRECTORY "${_dg_dir}/"
+      DESTINATION vllm/third_party/deep_gemm
+      COMPONENT _deep_gemm_C
+      FILES_MATCHING PATTERN "_C.cpython-*.so")
+  endforeach()
+  add_custom_target(_deep_gemm_C ALL DEPENDS ${_dg_markers})
+
+  #
+  # Vendor DeepGEMM Python package files
+  #
+  install(FILES
+    "${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py"
+    DESTINATION vllm/third_party/deep_gemm
+    COMPONENT _deep_gemm_C)
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/"
+    DESTINATION vllm/third_party/deep_gemm/utils
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/"
+    DESTINATION vllm/third_party/deep_gemm/testing
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/"
+    DESTINATION vllm/third_party/deep_gemm/legacy
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/mega/"
+    DESTINATION vllm/third_party/deep_gemm/mega
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  # Generate envs.py (normally generated by DeepGEMM's setup.py build step)
+  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
+    "# Pre-installed environment variables\npersistent_envs = dict()\n")
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
+    DESTINATION vllm/third_party/deep_gemm
+    RENAME envs.py
+    COMPONENT _deep_gemm_C)
+
+  #
+  # Install include files needed for JIT compilation at runtime.
+  # The JIT compiler finds these relative to the package directory.
+  #
+
+  # DeepGEMM's own CUDA headers
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/"
+    DESTINATION vllm/third_party/deep_gemm/include
+    COMPONENT _deep_gemm_C)
+
+  # CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS)
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/"
+    DESTINATION vllm/third_party/deep_gemm/include
+    COMPONENT _deep_gemm_C)
+
+else()
+  message(STATUS "DeepGEMM will not compile: "
+    "unsupported CUDA architecture ${CUDA_ARCHS}")
+  # Create empty target so setup.py doesn't fail on unsupported systems
+  add_custom_target(_deep_gemm_C)
+endif()
diff --git a/cmake/external_projects/flashmla.cmake b/cmake/external_projects/flashmla.cmake
index 0f16b9161fa3..65986df55012 100644
--- a/cmake/external_projects/flashmla.cmake
+++ b/cmake/external_projects/flashmla.cmake
@@ -19,7 +19,7 @@ else()
   FetchContent_Declare(
         flashmla
         GIT_REPOSITORY https://github.com/vllm-project/FlashMLA
-        GIT_TAG 692917b1cda61b93ac9ee2d846ec54e75afe87b1
+        GIT_TAG a6ec2ba7bd0a7dff98b3f4d3e6b52b159c48d78b
         GIT_PROGRESS TRUE
         CONFIGURE_COMMAND ""
         BUILD_COMMAND ""
diff --git a/cmake/external_projects/triton_kernels.cmake b/cmake/external_projects/triton_kernels.cmake
index 1d8b9779c8f7..2966c78030bd 100644
--- a/cmake/external_projects/triton_kernels.cmake
+++ b/cmake/external_projects/triton_kernels.cmake
@@ -1,6 +1,6 @@
 # Install OpenAI triton_kernels from https://github.com/triton-lang/triton/tree/main/python/triton_kernels
 
-set(DEFAULT_TRITON_KERNELS_TAG "v3.6.0")
+set(DEFAULT_TRITON_KERNELS_TAG "v3.5.1")
 
 # Set TRITON_KERNELS_SRC_DIR for use with local development with vLLM. We expect TRITON_KERNELS_SRC_DIR to
 # be directly set to the triton_kernels python directory.
diff --git a/cmake/external_projects/vllm_flash_attn.cmake b/cmake/external_projects/vllm_flash_attn.cmake
index 443d41d5a21a..b38917a7b0b5 100644
--- a/cmake/external_projects/vllm_flash_attn.cmake
+++ b/cmake/external_projects/vllm_flash_attn.cmake
@@ -39,7 +39,7 @@ else()
   FetchContent_Declare(
           vllm-flash-attn
           GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
-          GIT_TAG 29210221863736a08f71a866459e368ad1ac4a95
+          GIT_TAG bce29425653ec0fbc579d329883030e832d15ada
           GIT_PROGRESS TRUE
           # Don't share the vllm-flash-attn build between build types
           BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
@@ -87,18 +87,30 @@ endforeach()
 #
 add_custom_target(_vllm_fa4_cutedsl_C)
 
-# Copy flash_attn/cute directory (needed for FA4) and transform imports
-# The cute directory uses flash_attn.cute imports internally, which we replace
-# with vllm.vllm_flash_attn.cute to match our package structure.
-install(CODE "
-  file(GLOB_RECURSE CUTE_PY_FILES \"${vllm-flash-attn_SOURCE_DIR}/flash_attn/cute/*.py\")
-  foreach(SRC_FILE \${CUTE_PY_FILES})
-    file(RELATIVE_PATH REL_PATH \"${vllm-flash-attn_SOURCE_DIR}/flash_attn/cute\" \${SRC_FILE})
-    set(DST_FILE \"\${CMAKE_INSTALL_PREFIX}/vllm/vllm_flash_attn/cute/\${REL_PATH}\")
-    get_filename_component(DST_DIR \${DST_FILE} DIRECTORY)
-    file(MAKE_DIRECTORY \${DST_DIR})
-    file(READ \${SRC_FILE} FILE_CONTENTS)
-    string(REPLACE \"flash_attn.cute\" \"vllm.vllm_flash_attn.cute\" FILE_CONTENTS \"\${FILE_CONTENTS}\")
-    file(WRITE \${DST_FILE} \"\${FILE_CONTENTS}\")
-  endforeach()
-" COMPONENT _vllm_fa4_cutedsl_C)
+# Install flash_attn/cute directory (needed for FA4).
+# When using a local source dir (VLLM_FLASH_ATTN_SRC_DIR), create a symlink
+# so edits to cute-dsl Python files take effect immediately without rebuilding.
+# Otherwise, copy files and transform flash_attn.cute imports to
+# vllm.vllm_flash_attn.cute to match our package structure.
+if(VLLM_FLASH_ATTN_SRC_DIR)
+  install(CODE "
+    set(LINK_TARGET \"${vllm-flash-attn_SOURCE_DIR}/flash_attn/cute\")
+    set(LINK_NAME \"\${CMAKE_INSTALL_PREFIX}/vllm/vllm_flash_attn/cute\")
+    file(MAKE_DIRECTORY \"\${CMAKE_INSTALL_PREFIX}/vllm/vllm_flash_attn\")
+    file(REMOVE_RECURSE \"\${LINK_NAME}\")
+    file(CREATE_LINK \"\${LINK_TARGET}\" \"\${LINK_NAME}\" SYMBOLIC)
+  " COMPONENT _vllm_fa4_cutedsl_C)
+else()
+  install(CODE "
+    file(GLOB_RECURSE CUTE_PY_FILES \"${vllm-flash-attn_SOURCE_DIR}/flash_attn/cute/*.py\")
+    foreach(SRC_FILE \${CUTE_PY_FILES})
+      file(RELATIVE_PATH REL_PATH \"${vllm-flash-attn_SOURCE_DIR}/flash_attn/cute\" \${SRC_FILE})
+      set(DST_FILE \"\${CMAKE_INSTALL_PREFIX}/vllm/vllm_flash_attn/cute/\${REL_PATH}\")
+      get_filename_component(DST_DIR \${DST_FILE} DIRECTORY)
+      file(MAKE_DIRECTORY \${DST_DIR})
+      file(READ \${SRC_FILE} FILE_CONTENTS)
+      string(REPLACE \"flash_attn.cute\" \"vllm.vllm_flash_attn.cute\" FILE_CONTENTS \"\${FILE_CONTENTS}\")
+      file(WRITE \${DST_FILE} \"\${FILE_CONTENTS}\")
+    endforeach()
+  " COMPONENT _vllm_fa4_cutedsl_C)
+endif()
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
index e95333457b57..f10ba93f7c65 100644
--- a/cmake/utils.cmake
+++ b/cmake/utils.cmake
@@ -47,12 +47,17 @@ macro (append_cmake_prefix_path PKG EXPR)
   list(APPEND CMAKE_PREFIX_PATH ${_PREFIX_PATH})
 endmacro()
 
-#
-# Add a target named `hipify${NAME}` that runs the hipify preprocessor on a set
-# of CUDA source files. The names of the corresponding "hipified" sources are
-# stored in `OUT_SRCS`.
-#
+# Resolve hipified output paths for `NAME` into `OUT_SRCS` and register the
+# `.cu` sources with the shared `hipify_all` target. Per-extension hipify
+# targets are unsafe to run in parallel against a shared csrc/ output dir, so
+# accumulation here is paired with a single finalize step.
 function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
+  if (TARGET hipify_all)
+    message(FATAL_ERROR
+      "hipify_sources_target(${NAME}) called after vllm_finalize_hipify_target. "
+      "Add the new HIP extension before the finalizer call in CMakeLists.txt.")
+  endif()
+
   #
   # Split into C++ and non-C++ (i.e. CUDA) sources.
   #
@@ -73,19 +78,41 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
     list(APPEND HIP_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${SRC}")
   endforeach()
 
-  set(CSRC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/csrc)
-  add_custom_target(
-    hipify${NAME}
-    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/hipify.py -p ${CMAKE_SOURCE_DIR}/csrc -o ${CSRC_BUILD_DIR} ${SRCS}
-    DEPENDS ${CMAKE_SOURCE_DIR}/cmake/hipify.py ${SRCS}
-    BYPRODUCTS ${HIP_SRCS}
-    COMMENT "Running hipify on ${NAME} extension source files.")
+  set_property(GLOBAL APPEND PROPERTY VLLM_HIPIFY_ALL_SRCS ${SRCS})
+  set_property(GLOBAL APPEND PROPERTY VLLM_HIPIFY_ALL_BYPRODUCTS ${HIP_SRCS})
 
   # Swap out original extension sources with hipified sources.
   list(APPEND HIP_SRCS ${CXX_SRCS})
   set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
 endfunction()
 
+# Define the single shared `hipify_all` custom target that runs hipify once
+# on the union of every HIP extension's sources. Call after the last HIP
+# `define_extension_target`.
+function (vllm_finalize_hipify_target)
+  if (TARGET hipify_all)
+    return()
+  endif()
+
+  get_property(ALL_SRCS GLOBAL PROPERTY VLLM_HIPIFY_ALL_SRCS)
+  get_property(ALL_BYPRODUCTS GLOBAL PROPERTY VLLM_HIPIFY_ALL_BYPRODUCTS)
+
+  if (NOT ALL_SRCS)
+    return()
+  endif()
+
+  list(REMOVE_DUPLICATES ALL_SRCS)
+  list(REMOVE_DUPLICATES ALL_BYPRODUCTS)
+
+  set(CSRC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/csrc)
+  add_custom_target(
+    hipify_all
+    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/hipify.py -p ${CMAKE_SOURCE_DIR}/csrc -o ${CSRC_BUILD_DIR} ${ALL_SRCS}
+    DEPENDS ${CMAKE_SOURCE_DIR}/cmake/hipify.py ${ALL_SRCS}
+    BYPRODUCTS ${ALL_BYPRODUCTS}
+    COMMENT "Running hipify on all extension source files.")
+endfunction()
+
 #
 # Get additional GPU compiler flags from torch.
 #
@@ -449,6 +476,16 @@ function(cuda_archs_loose_intersection OUT_CUDA_ARCHS SRC_CUDA_ARCHS TGT_CUDA_AR
   set(${OUT_CUDA_ARCHS} ${_CUDA_ARCHS} PARENT_SCOPE)
 endfunction()
 
+
+function(cuda_archs_sm90plus OUT_CUDA_ARCHS TGT_CUDA_ARCHS)
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
+    cuda_archs_loose_intersection(_archs "9.0a;10.0f;11.0f" "${TGT_CUDA_ARCHS}")
+  else()
+    cuda_archs_loose_intersection(_archs "9.0a;10.0a;10.1a;10.3a" "${TGT_CUDA_ARCHS}")
+  endif()
+  set(${OUT_CUDA_ARCHS} ${_archs} PARENT_SCOPE)
+endfunction()
+
 #
 # Override the GPU architectures detected by cmake/torch and filter them by
 # `GPU_SUPPORTED_ARCHES`. Sets the final set of architectures in
@@ -551,7 +588,7 @@ function (define_extension_target MOD_NAME)
 
   if (ARG_LANGUAGE STREQUAL "HIP")
     # Make this target dependent on the hipify preprocessor step.
-    add_dependencies(${MOD_NAME} hipify${MOD_NAME})
+    add_dependencies(${MOD_NAME} hipify_all)
     # Make sure we include the hipified versions of the headers, and avoid conflicts with the ones in the original source folder
     target_include_directories(${MOD_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/csrc
       ${ARG_INCLUDE_DIRECTORIES})
diff --git a/csrc/async_util.cuh b/csrc/async_util.cuh
new file mode 100644
index 000000000000..392d78c53fdb
--- /dev/null
+++ b/csrc/async_util.cuh
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace vllm {
+namespace cuda_async {
+
+__device__ __forceinline__ void cp_async_shared_global_16_cg(
+    void* smem_ptr, const void* glob_ptr) {
+#if defined(USE_ROCM)
+  *reinterpret_cast<int4*>(smem_ptr) = *reinterpret_cast<const int4*>(glob_ptr);
+#elif defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+  uint32_t smem = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
+  asm volatile("cp.async.cg.shared.global [%0], [%1], 16;\n"
+               :
+               : "r"(smem), "l"(glob_ptr));
+#elif defined(__CUDA_ARCH__)
+  *reinterpret_cast<int4*>(smem_ptr) = *reinterpret_cast<const int4*>(glob_ptr);
+#else
+  (void)smem_ptr;
+  (void)glob_ptr;
+#endif
+}
+
+__device__ __forceinline__ void cp_async_shared_global_ca(void* smem_ptr,
+                                                          const void* glob_ptr,
+                                                          int size_bytes) {
+#if defined(USE_ROCM)
+  if (size_bytes == 4) {
+    *reinterpret_cast<uint32_t*>(smem_ptr) =
+        *reinterpret_cast<const uint32_t*>(glob_ptr);
+  } else if (size_bytes == 8) {
+    *reinterpret_cast<uint64_t*>(smem_ptr) =
+        *reinterpret_cast<const uint64_t*>(glob_ptr);
+  } else {
+    *reinterpret_cast<int4*>(smem_ptr) =
+        *reinterpret_cast<const int4*>(glob_ptr);
+  }
+#elif defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+  uint32_t smem = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
+  if (size_bytes == 4) {
+    asm volatile("cp.async.ca.shared.global [%0], [%1], 4;\n"
+                 :
+                 : "r"(smem), "l"(glob_ptr));
+  } else if (size_bytes == 8) {
+    asm volatile("cp.async.ca.shared.global [%0], [%1], 8;\n"
+                 :
+                 : "r"(smem), "l"(glob_ptr));
+  } else {
+    asm volatile("cp.async.ca.shared.global [%0], [%1], 16;\n"
+                 :
+                 : "r"(smem), "l"(glob_ptr));
+  }
+#elif defined(__CUDA_ARCH__)
+  if (size_bytes == 4) {
+    *reinterpret_cast<uint32_t*>(smem_ptr) =
+        *reinterpret_cast<const uint32_t*>(glob_ptr);
+  } else if (size_bytes == 8) {
+    *reinterpret_cast<uint64_t*>(smem_ptr) =
+        *reinterpret_cast<const uint64_t*>(glob_ptr);
+  } else {
+    *reinterpret_cast<int4*>(smem_ptr) =
+        *reinterpret_cast<const int4*>(glob_ptr);
+  }
+#else
+  (void)smem_ptr;
+  (void)glob_ptr;
+  (void)size_bytes;
+#endif
+}
+
+__device__ __forceinline__ void cp_async_commit_group() {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 && !defined(USE_ROCM)
+  asm volatile("cp.async.commit_group;\n" ::);
+#endif
+}
+
+template <int n>
+__device__ __forceinline__ void cp_async_wait_group() {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 && !defined(USE_ROCM)
+  asm volatile("cp.async.wait_group %0;\n" : : "n"(n));
+#endif
+}
+
+}  // namespace cuda_async
+}  // namespace vllm
diff --git a/csrc/attention/dtype_fp8.cuh b/csrc/attention/dtype_fp8.cuh
index e714e321b0be..3d56859d8fc5 100644
--- a/csrc/attention/dtype_fp8.cuh
+++ b/csrc/attention/dtype_fp8.cuh
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "attention_generic.cuh"
+#include "torch_utils.h"
 
 #include <stdint.h>
 #ifdef ENABLE_FP8
@@ -17,6 +18,22 @@ enum class Fp8KVCacheDataType {
   kFp8E5M2 = 2,
 };
 
+inline Fp8KVCacheDataType get_fp8_kv_cache_data_type(
+    const std::string& dtype_str) {
+  // dtype_str refers to CacheDType at vllm.config.cache.CacheDType
+  if (dtype_str == "auto" || dtype_str == "float16" ||
+      dtype_str == "bfloat16") {
+    // unquantized kv cache
+    return Fp8KVCacheDataType::kAuto;
+  } else if (dtype_str == "fp8" || dtype_str == "fp8_ds_mla" ||
+             dtype_str == "fp8_e4m3") {
+    return Fp8KVCacheDataType::kFp8E4M3;
+  } else if (dtype_str == "fp8_e5m2") {
+    return Fp8KVCacheDataType::kFp8E5M2;
+  }
+  TORCH_UTILS_CHECK(false, "Unsupported fp8 kv cache data type: ", dtype_str);
+}
+
 // fp8 vector types for quantization of kv cache
 template <>
 struct Vec<uint8_t, 1> {
diff --git a/csrc/attention/merge_attn_states.cu b/csrc/attention/merge_attn_states.cu
index 27d1e990c611..75f066e80915 100644
--- a/csrc/attention/merge_attn_states.cu
+++ b/csrc/attention/merge_attn_states.cu
@@ -3,22 +3,33 @@
 #include <ATen/cuda/CUDAContext.h>
 #include <c10/cuda/CUDAGuard.h>
 #include <algorithm>
+#include <limits>
 
 #include "attention_dtypes.h"
 #include "attention_utils.cuh"
+#include "../quantization/w8a8/fp8/common.cuh"
+#include "../dispatch_utils.h"
 
 namespace vllm {
 
 // Implements section 2.2 of https://www.arxiv.org/pdf/2501.01005
 // can be used to combine partial attention results (in the split-KV case)
-template <typename scalar_t, const uint NUM_THREADS>
+template <typename scalar_t, typename output_t, const uint NUM_THREADS,
+          bool USE_FP8_OUTPUT>
 __global__ void merge_attn_states_kernel(
-    scalar_t* output, float* output_lse, const scalar_t* prefix_output,
+    output_t* output, float* output_lse, const scalar_t* prefix_output,
     const float* prefix_lse, const scalar_t* suffix_output,
     const float* suffix_lse, const uint num_tokens, const uint num_heads,
     const uint head_size, const uint prefix_head_stride,
-    const uint output_head_stride) {
-  using pack_128b_t = uint4;
+    const uint output_head_stride, const uint prefix_num_tokens,
+    const float* output_scale) {
+  // Inputs always load 128-bit packs (pack_size elements of scalar_t).
+  // Outputs store pack_size elements of output_t, which is smaller for FP8.
+  using input_pack_t = uint4;
+  using output_pack_t =
+      std::conditional_t<USE_FP8_OUTPUT,
+                         std::conditional_t<sizeof(scalar_t) == 4, uint, uint2>,
+                         uint4>;
   const uint pack_size = 16 / sizeof(scalar_t);
   const uint threads_per_head = head_size / pack_size;
 
@@ -41,8 +52,45 @@ __global__ void merge_attn_states_kernel(
                                head_idx * output_head_stride;
   const scalar_t* prefix_head_ptr = prefix_output + src_head_offset;
   const scalar_t* suffix_head_ptr = suffix_output + src_head_offset;
-  scalar_t* output_head_ptr = output + dst_head_offset;
+  output_t* output_head_ptr = output + dst_head_offset;
 
+  // Pre-invert scale: multiplication is faster than division
+  float fp8_scale_inv = 1.0f;
+  if constexpr (USE_FP8_OUTPUT) {
+    fp8_scale_inv = 1.0f / *output_scale;
+  }
+
+  // If token_idx >= prefix_num_tokens, just copy from suffix
+  if (token_idx >= prefix_num_tokens) {
+    if (pack_offset < head_size) {
+      input_pack_t s_out_pack = reinterpret_cast<const input_pack_t*>(
+          suffix_head_ptr)[pack_offset / pack_size];
+
+      if constexpr (USE_FP8_OUTPUT) {
+        output_t o_out_pack[pack_size];
+#pragma unroll
+        for (uint i = 0; i < pack_size; ++i) {
+          const float val =
+              vllm::to_float(reinterpret_cast<const scalar_t*>(&s_out_pack)[i]);
+          o_out_pack[i] =
+              vllm::scaled_fp8_conversion<true, output_t>(val, fp8_scale_inv);
+        }
+        reinterpret_cast<output_pack_t*>(
+            output_head_ptr)[pack_offset / pack_size] =
+            *reinterpret_cast<output_pack_t*>(o_out_pack);
+      } else {
+        reinterpret_cast<output_pack_t*>(
+            output_head_ptr)[pack_offset / pack_size] = s_out_pack;
+      }
+    }
+    if (output_lse != nullptr && pack_idx == 0) {
+      float s_lse = suffix_lse[head_idx * num_tokens + token_idx];
+      output_lse[head_idx * num_tokens + token_idx] = s_lse;
+    }
+    return;
+  }
+
+  // For tokens within prefix range, merge prefix and suffix
   float p_lse = prefix_lse[head_idx * num_tokens + token_idx];
   float s_lse = suffix_lse[head_idx * num_tokens + token_idx];
   p_lse = std::isinf(p_lse) ? -std::numeric_limits<float>::infinity() : p_lse;
@@ -53,20 +101,34 @@ __global__ void merge_attn_states_kernel(
   /* In certain edge cases, MLA can produce p_lse = s_lse = -inf;
      continuing the pipeline then yields NaN. Root cause: with chunked prefill
      a batch may be split into two chunks; if a request in that batch has no
-     prefix hit, every LSE entry for that request’s position is -inf, and at
+     prefix hit, every LSE entry for that request's position is -inf, and at
      this moment we merge cross-attention at first. For now we simply emit
      prefix_output (expected to be all zeros) and prefix_lse (-inf) to fix
      this problem.
   */
   if (std::isinf(max_lse)) {
     if (pack_offset < head_size) {
-      // Pack 128b load
-      pack_128b_t p_out_pack = reinterpret_cast<const pack_128b_t*>(
+      input_pack_t p_out_pack = reinterpret_cast<const input_pack_t*>(
           prefix_head_ptr)[pack_offset / pack_size];
 
-      // Pack 128b storage
-      reinterpret_cast<pack_128b_t*>(output_head_ptr)[pack_offset / pack_size] =
-          p_out_pack;
+      if constexpr (USE_FP8_OUTPUT) {
+        // Convert prefix values to FP8 (since -inf means no data,
+        // prefix_output is expected to be zeros)
+        output_t o_out_pack[pack_size];
+#pragma unroll
+        for (uint i = 0; i < pack_size; ++i) {
+          const float val =
+              vllm::to_float(reinterpret_cast<const scalar_t*>(&p_out_pack)[i]);
+          o_out_pack[i] =
+              vllm::scaled_fp8_conversion<true, output_t>(val, fp8_scale_inv);
+        }
+        reinterpret_cast<output_pack_t*>(
+            output_head_ptr)[pack_offset / pack_size] =
+            *reinterpret_cast<output_pack_t*>(o_out_pack);
+      } else {
+        reinterpret_cast<output_pack_t*>(
+            output_head_ptr)[pack_offset / pack_size] = p_out_pack;
+      }
     }
     // We only need to write to output_lse once per head.
     if (output_lse != nullptr && pack_idx == 0) {
@@ -84,30 +146,43 @@ __global__ void merge_attn_states_kernel(
   const float s_scale = s_se / out_se;
 
   if (pack_offset < head_size) {
-    // Pack 128b load
-    pack_128b_t p_out_pack = reinterpret_cast<const pack_128b_t*>(
+    input_pack_t p_out_pack = reinterpret_cast<const input_pack_t*>(
         prefix_head_ptr)[pack_offset / pack_size];
-    pack_128b_t s_out_pack = reinterpret_cast<const pack_128b_t*>(
+    input_pack_t s_out_pack = reinterpret_cast<const input_pack_t*>(
         suffix_head_ptr)[pack_offset / pack_size];
-    pack_128b_t o_out_pack;
 
+    // Compute merged values in float32
+    float o_out_f[pack_size];
 #pragma unroll
     for (uint i = 0; i < pack_size; ++i) {
-      // Always use float for FMA to keep high precision.
-      // half(uint16_t), bfloat16, float -> float.
       const float p_out_f =
           vllm::to_float(reinterpret_cast<const scalar_t*>(&p_out_pack)[i]);
       const float s_out_f =
           vllm::to_float(reinterpret_cast<const scalar_t*>(&s_out_pack)[i]);
-      // fma: a * b + c = p_out_f * p_scale + (s_out_f * s_scale)
-      const float o_out_f = p_out_f * p_scale + (s_out_f * s_scale);
-      // float -> half(uint16_t), bfloat16, float.
-      vllm::from_float(reinterpret_cast<scalar_t*>(&o_out_pack)[i], o_out_f);
+      o_out_f[i] = p_out_f * p_scale + (s_out_f * s_scale);
     }
 
-    // Pack 128b storage
-    reinterpret_cast<pack_128b_t*>(output_head_ptr)[pack_offset / pack_size] =
-        o_out_pack;
+    // Convert and store
+    if constexpr (USE_FP8_OUTPUT) {
+      output_t o_out_pack[pack_size];
+#pragma unroll
+      for (uint i = 0; i < pack_size; ++i) {
+        o_out_pack[i] = vllm::scaled_fp8_conversion<true, output_t>(
+            o_out_f[i], fp8_scale_inv);
+      }
+      reinterpret_cast<output_pack_t*>(
+          output_head_ptr)[pack_offset / pack_size] =
+          *reinterpret_cast<output_pack_t*>(o_out_pack);
+    } else {
+      output_pack_t o_out_pack;
+#pragma unroll
+      for (uint i = 0; i < pack_size; ++i) {
+        vllm::from_float(reinterpret_cast<scalar_t*>(&o_out_pack)[i],
+                         o_out_f[i]);
+      }
+      reinterpret_cast<output_pack_t*>(
+          output_head_ptr)[pack_offset / pack_size] = o_out_pack;
+    }
   }
   // We only need to write to output_lse once per head.
   if (output_lse != nullptr && pack_idx == 0) {
@@ -134,50 +209,73 @@ __global__ void merge_attn_states_kernel(
     }                                                                   \
   }
 
-#define LAUNCH_MERGE_ATTN_STATES(scalar_t, NUM_THREADS)                     \
+#define LAUNCH_MERGE_ATTN_STATES(scalar_t, output_t, NUM_THREADS,           \
+                                 USE_FP8_OUTPUT)                            \
   {                                                                         \
-    vllm::merge_attn_states_kernel<scalar_t, NUM_THREADS>                   \
+    vllm::merge_attn_states_kernel<scalar_t, output_t, NUM_THREADS,         \
+                                   USE_FP8_OUTPUT>                          \
         <<<grid, block, 0, stream>>>(                                       \
-            reinterpret_cast<scalar_t*>(output.data_ptr()), output_lse_ptr, \
+            reinterpret_cast<output_t*>(output.data_ptr()), output_lse_ptr, \
             reinterpret_cast<scalar_t*>(prefix_output.data_ptr()),          \
             reinterpret_cast<float*>(prefix_lse.data_ptr()),                \
             reinterpret_cast<scalar_t*>(suffix_output.data_ptr()),          \
             reinterpret_cast<float*>(suffix_lse.data_ptr()), num_tokens,    \
-            num_heads, head_size, prefix_head_stride, output_head_stride);  \
+            num_heads, head_size, prefix_head_stride, output_head_stride,   \
+            prefix_num_tokens, output_scale_ptr);                           \
   }
 
 /*@brief Merges the attention states from prefix and suffix
  * into the output tensor. NUM_TOKENS: n, NUM_HEADS: h, HEAD_SIZE: d
  *
  * @param output [n,h,d] The output tensor to store the merged attention states.
- * @param output_lse [h,d] Optional tensor to store the log-sum-exp values.
+ * @param output_lse [h,n] Optional tensor to store the log-sum-exp values.
  * @param prefix_output [n,h,d] The prefix attention states.
  * @param prefix_lse [h,n] The log-sum-exp values for the prefix attention
  * states.
  * @param suffix_output [n,h,d] The suffix attention states.
  * @param suffix_lse [h,n] The log-sum-exp values for the suffix attention
  * states.
+ * @param prefill_tokens_with_context Number of prefill tokens with context
+ * For the first p tokens (0 <= token_idx < prefill_tokens_with_context), output
+ * is computed by merging prefix_output and suffix_output. For remaining tokens
+ * (prefill_tokens_with_context <= token_idx < n), output is copied directly
+ * from suffix_output.
+ * @param output_scale Optional scalar tensor for FP8 static quantization.
+ * When provided, output must be FP8 dtype.
  */
 template <typename scalar_t>
-void merge_attn_states_launcher(torch::Tensor& output,
-                                std::optional<torch::Tensor> output_lse,
-                                const torch::Tensor& prefix_output,
-                                const torch::Tensor& prefix_lse,
-                                const torch::Tensor& suffix_output,
-                                const torch::Tensor& suffix_lse) {
+void merge_attn_states_launcher(
+    torch::Tensor& output, std::optional<torch::Tensor> output_lse,
+    const torch::Tensor& prefix_output, const torch::Tensor& prefix_lse,
+    const torch::Tensor& suffix_output, const torch::Tensor& suffix_lse,
+    const std::optional<int64_t> prefill_tokens_with_context,
+    const std::optional<torch::Tensor>& output_scale) {
   constexpr uint NUM_THREADS = 128;
   const uint num_tokens = output.size(0);
   const uint num_heads = output.size(1);
   const uint head_size = output.size(2);
   const uint prefix_head_stride = prefix_output.stride(1);
   const uint output_head_stride = output.stride(1);
+  // Thread mapping is based on input BF16 pack_size
   const uint pack_size = 16 / sizeof(scalar_t);
   TORCH_CHECK(head_size % pack_size == 0,
               "headsize must be multiple of pack_size:", pack_size);
+
+  const uint prefix_num_tokens =
+      prefill_tokens_with_context.has_value()
+          ? static_cast<uint>(prefill_tokens_with_context.value())
+          : num_tokens;
+  TORCH_CHECK(prefix_num_tokens <= num_tokens,
+              "prefix_num_tokens must be <= num_tokens");
+
   float* output_lse_ptr = nullptr;
   if (output_lse.has_value()) {
     output_lse_ptr = output_lse.value().data_ptr<float>();
   }
+  float* output_scale_ptr = nullptr;
+  if (output_scale.has_value()) {
+    output_scale_ptr = output_scale.value().data_ptr<float>();
+  }
   // Process one pack elements per thread. for float, the
   // pack_size is 4 for half/bf16, the pack_size is 8.
   const uint threads_per_head = head_size / pack_size;
@@ -189,14 +287,22 @@ void merge_attn_states_launcher(torch::Tensor& output,
   const c10::cuda::OptionalCUDAGuard device_guard(prefix_output.device());
   auto stream = at::cuda::getCurrentCUDAStream();
 
-  LAUNCH_MERGE_ATTN_STATES(scalar_t, NUM_THREADS);
+  if (output_scale.has_value()) {
+    // FP8 output path - dispatch on output FP8 type
+    VLLM_DISPATCH_FP8_TYPES(output.scalar_type(), "merge_attn_states_fp8", [&] {
+      LAUNCH_MERGE_ATTN_STATES(scalar_t, fp8_t, NUM_THREADS, true);
+    });
+  } else {
+    // Original BF16/FP16/FP32 output path
+    LAUNCH_MERGE_ATTN_STATES(scalar_t, scalar_t, NUM_THREADS, false);
+  }
 }
 
-#define CALL_MERGE_ATTN_STATES_LAUNCHER(scalar_t)                           \
-  {                                                                         \
-    merge_attn_states_launcher<scalar_t>(output, output_lse, prefix_output, \
-                                         prefix_lse, suffix_output,         \
-                                         suffix_lse);                       \
+#define CALL_MERGE_ATTN_STATES_LAUNCHER(scalar_t)                     \
+  {                                                                   \
+    merge_attn_states_launcher<scalar_t>(                             \
+        output, output_lse, prefix_output, prefix_lse, suffix_output, \
+        suffix_lse, prefill_tokens_with_context, output_scale);       \
   }
 
 void merge_attn_states(torch::Tensor& output,
@@ -204,6 +310,21 @@ void merge_attn_states(torch::Tensor& output,
                        const torch::Tensor& prefix_output,
                        const torch::Tensor& prefix_lse,
                        const torch::Tensor& suffix_output,
-                       const torch::Tensor& suffix_lse) {
-  DISPATCH_BY_SCALAR_DTYPE(output.dtype(), CALL_MERGE_ATTN_STATES_LAUNCHER);
+                       const torch::Tensor& suffix_lse,
+                       std::optional<int64_t> prefill_tokens_with_context,
+                       const std::optional<torch::Tensor>& output_scale) {
+  if (output_scale.has_value()) {
+    TORCH_CHECK(output.scalar_type() == at::ScalarType::Float8_e4m3fn ||
+                    output.scalar_type() == at::ScalarType::Float8_e4m3fnuz,
+                "output must be FP8 when output_scale is provided, got: ",
+                output.scalar_type());
+  } else {
+    TORCH_CHECK(output.scalar_type() == prefix_output.scalar_type(),
+                "output dtype (", output.scalar_type(),
+                ") must match prefix_output dtype (",
+                prefix_output.scalar_type(), ") when output_scale is not set");
+  }
+  // Always dispatch on prefix_output (input) dtype
+  DISPATCH_BY_SCALAR_DTYPE(prefix_output.dtype(),
+                           CALL_MERGE_ATTN_STATES_LAUNCHER);
 }
diff --git a/csrc/attention/vertical_slash_index.cu b/csrc/attention/vertical_slash_index.cu
deleted file mode 100644
index c1b45b143f4e..000000000000
--- a/csrc/attention/vertical_slash_index.cu
+++ /dev/null
@@ -1,401 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-
-#include <assert.h>
-
-#include <cuda.h>
-
-#include <torch/all.h>
-
-__device__ int64_t save_blocks(int* block_offset, int64_t range_start,
-                               int64_t range_end, int64_t block_size,
-                               int64_t input_block_count, int64_t kv_seqlen) {
-  if (range_start >= kv_seqlen) {
-    return input_block_count;
-  }
-  if (range_end > kv_seqlen) {
-    range_end = kv_seqlen;
-  }
-  int64_t current_block_count = input_block_count;
-  for (int idx = range_start; idx < range_end; idx += block_size) {
-    block_offset[current_block_count++] = idx;
-  }
-  return current_block_count;
-}
-
-__global__ void convert_vertical_slash_indexes_kernel(
-    const int* q_seqlens,         // [BATCH, ]
-    const int* kv_seqlens,        // [BATCH, ]
-    const int* vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    const int* slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    int* block_count,             // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* block_offset,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_S]
-    int* column_count,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* column_index,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_V]
-    int64_t N_HEADS, int64_t N_ROWS, int64_t BLOCK_SIZE_M, int64_t BLOCK_SIZE_N,
-    int64_t NNZ_V, int64_t NNZ_S,
-    bool causal  // True for intra, False for succ
-) {
-  const int batch_idx = blockIdx.y;
-  const int head_idx = blockIdx.x;
-  const int group_idx = blockIdx.z;
-
-  int64_t q_seqlen = q_seqlens[batch_idx];
-  int64_t kv_seqlen = kv_seqlens[batch_idx];
-  int64_t block_idx_m = group_idx * blockDim.x + threadIdx.x;
-  int64_t start_m = block_idx_m * BLOCK_SIZE_M;
-  if (start_m >= q_seqlen) {
-    return;
-  }
-  int64_t end_m = start_m + BLOCK_SIZE_M;
-  vertical_indexes += (batch_idx * N_HEADS + head_idx) * NNZ_V;
-  slash_indexes += (batch_idx * N_HEADS + head_idx) * NNZ_S;
-  int64_t row_offset = (batch_idx * N_HEADS + head_idx) * N_ROWS + block_idx_m;
-  block_count += row_offset;
-  block_offset += row_offset * NNZ_S;
-  column_count += row_offset;
-  column_index += row_offset * NNZ_V;
-
-  bool has_slash = true;
-  int64_t tmp_col_cnt = 0, tmp_blk_cnt = 0;
-  int64_t s = 0, v = 0;
-  int64_t v_idx = vertical_indexes[v++];
-  int64_t s_idx = slash_indexes[s++];
-  if (causal) {
-    while (s_idx >= end_m + (kv_seqlen - q_seqlen) && s < NNZ_S) {
-      s_idx = slash_indexes[s++];
-    }
-    if (s_idx > end_m + (kv_seqlen - q_seqlen)) has_slash = false;
-    s_idx = max((kv_seqlen - q_seqlen) + end_m - s_idx, BLOCK_SIZE_M);
-  } else {
-    while (s_idx >= end_m + kv_seqlen && s < NNZ_S) {
-      s_idx = slash_indexes[s++];
-    }
-    if (s_idx > end_m + kv_seqlen) has_slash = false;
-    s_idx = max(kv_seqlen + end_m - s_idx, BLOCK_SIZE_M);
-  }
-
-  int64_t range_start = s_idx - BLOCK_SIZE_M, range_end = s_idx;
-  if (!has_slash) {
-    if (causal) {
-      range_start = (kv_seqlen - q_seqlen) + end_m;
-      range_end = (kv_seqlen - q_seqlen) + end_m + BLOCK_SIZE_N;
-    } else {
-      range_start = kv_seqlen;
-      range_end = kv_seqlen + BLOCK_SIZE_N;
-    }
-  }
-
-  bool slash_finished = false;
-  while (1) {
-    if (v_idx < range_end) {
-      if (v_idx < range_start) {
-        column_index[tmp_col_cnt++] = v_idx;
-      }
-      if (v < NNZ_V) {
-        v_idx = vertical_indexes[v++];
-      } else {
-        if (causal)
-          v_idx = end_m + BLOCK_SIZE_N + (kv_seqlen - q_seqlen);
-        else
-          v_idx = end_m + BLOCK_SIZE_N + kv_seqlen;
-      }
-    } else {
-      if ((s < NNZ_S && causal) ||
-          (s < NNZ_S && !causal && slash_indexes[s] >= start_m)) {
-        if (causal)
-          s_idx = max((kv_seqlen - q_seqlen) + end_m - slash_indexes[s++],
-                      BLOCK_SIZE_M);
-        else
-          s_idx = max(kv_seqlen + end_m - slash_indexes[s++], BLOCK_SIZE_M);
-      } else {
-        if (v == NNZ_V || (v_idx > range_start && causal)) {
-          // add the last vertical if no more slash
-          if (v == NNZ_V && !causal && v_idx < kv_seqlen) {
-            column_index[tmp_col_cnt++] = v_idx;
-          }
-          tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                    BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-          break;
-        } else {
-          if (causal) {
-            range_start = (kv_seqlen - q_seqlen) + end_m;
-            range_end = (kv_seqlen - q_seqlen) + end_m + BLOCK_SIZE_N;
-          } else {
-            // if slash_finished but there are vertical left, save current
-            // blocks
-            tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                      BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-            range_start = kv_seqlen;
-            range_end = kv_seqlen + BLOCK_SIZE_N;
-          }
-          slash_finished = true;
-        }
-      }
-      if (!slash_finished) {
-        if (s_idx > range_end + BLOCK_SIZE_M) {
-          tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                    BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-          range_start = s_idx - BLOCK_SIZE_M;
-          range_end = s_idx;
-        } else if (s_idx > range_end) {
-          range_end += BLOCK_SIZE_M;
-        }
-      }
-    }
-  }
-
-  block_count[0] = tmp_blk_cnt;
-  column_count[0] = tmp_col_cnt;
-}
-
-void convert_vertical_slash_indexes_64x64(
-    const int* q_seqlens,         // [BATCH, ]
-    const int* kv_seqlens,        // [BATCH, ]
-    const int* vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    const int* slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    int* block_count,             // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* block_offset,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_S]
-    int* column_count,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* column_index,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_V]
-    int64_t BATCH_SIZE, int64_t N_HEADS, int64_t N_ROWS, int64_t BLOCK_SIZE_M,
-    int64_t BLOCK_SIZE_N, int64_t NNZ_V, int64_t NNZ_S, bool causal) {
-  const int N_THREADS = 64;
-  const dim3 dimBlock(N_THREADS);
-  const dim3 dimGrid(N_HEADS, BATCH_SIZE, (N_ROWS + N_THREADS - 1) / N_THREADS);
-  convert_vertical_slash_indexes_kernel<<<dimGrid, dimBlock>>>(
-      q_seqlens, kv_seqlens, vertical_indexes, slash_indexes, block_count,
-      block_offset, column_count, column_index, N_HEADS, N_ROWS, BLOCK_SIZE_M,
-      BLOCK_SIZE_N, NNZ_V, NNZ_S, causal);
-}
-
-/**
- * Implements the Algorithm 4 in paper https://arxiv.org/abs/2407.02490.
- *
- * This function builds the index of each row of blocks from vertical indices
- * and slash indices. The vertical indices are treated as points, while the
- * slash indices are converted as ranges. The output consists of the merged
- * ranges and separate column indices, where the ranges are represented by
- * block indices.
- *
- * The implementation is referenced from the original MInference repo:
- * https://github.com/microsoft/MInference/blob/main/csrc/vertical_slash_index.cu.
- */
-void convert_vertical_slash_indexes(
-    torch::Tensor& block_count,      // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& block_offset,     // [BATCH, N_HEADS, NUM_ROWS, NNZ_S]
-    torch::Tensor& column_count,     // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& column_index,     // [BATCH, N_HEADS, NUM_ROWS, NNZ_V]
-    torch::Tensor q_seqlens,         // [BATCH, ]
-    torch::Tensor kv_seqlens,        // [BATCH, ]
-    torch::Tensor vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    torch::Tensor slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    int64_t context_size, int64_t block_size_M, int64_t block_size_N,
-    bool causal) {
-  cudaSetDevice(q_seqlens.get_device());
-
-  int batch_size = slash_indexes.size(0);
-  int num_heads = slash_indexes.size(1);
-  int nnz_slash = slash_indexes.size(2);
-  int nnz_vertical = vertical_indexes.size(2);
-  int num_rows = (context_size + block_size_M - 1) / block_size_M;
-
-  convert_vertical_slash_indexes_64x64(
-      q_seqlens.data_ptr<int>(), kv_seqlens.data_ptr<int>(),
-      vertical_indexes.data_ptr<int>(), slash_indexes.data_ptr<int>(),
-      block_count.data_ptr<int>(), block_offset.data_ptr<int>(),
-      column_count.data_ptr<int>(), column_index.data_ptr<int>(), batch_size,
-      num_heads, num_rows, block_size_M, block_size_N, nnz_vertical, nnz_slash,
-      causal);
-}
-
-__global__ void convert_vertical_slash_indexes_kernel_mergehead(
-    const int* q_seqlens,         // [BATCH, ]
-    const int* kv_seqlens,        // [BATCH, ]
-    const int* vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    const int* slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    const int* per_head_vertical_topkv, const int* per_head_slash_topkv,
-    int* block_count,   // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* block_offset,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_S]
-    int* column_count,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* column_index,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_V]
-    int64_t N_HEADS, int64_t N_ROWS, int64_t BLOCK_SIZE_M, int64_t BLOCK_SIZE_N,
-    int64_t NNZ_V, int64_t NNZ_S,
-    bool causal  // True for intra, False for succ
-) {
-  const int batch_idx = blockIdx.y;
-  const int head_idx = blockIdx.x;
-  const int group_idx = blockIdx.z;
-
-  int64_t q_seqlen = q_seqlens[batch_idx];
-  int64_t kv_seqlen = kv_seqlens[batch_idx];
-  int64_t block_idx_m = group_idx * blockDim.x + threadIdx.x;
-  int64_t start_m = block_idx_m * BLOCK_SIZE_M;
-  if (start_m >= q_seqlen) {
-    return;
-  }
-  int64_t end_m = start_m + BLOCK_SIZE_M;
-  vertical_indexes += (batch_idx * N_HEADS + head_idx) * NNZ_V;
-  slash_indexes += (batch_idx * N_HEADS + head_idx) * NNZ_S;
-  int64_t row_offset = (batch_idx * N_HEADS + head_idx) * N_ROWS + block_idx_m;
-  block_count += row_offset;
-  block_offset += row_offset * NNZ_S;
-  column_count += row_offset;
-  column_index += row_offset * NNZ_V;
-
-  // MergeHead: each head has it's unique max topk NNZ_V，NNZ_S. (NNZ_V，NNZ_S
-  // above is buffer size, use to compute offset)
-  NNZ_S = per_head_slash_topkv[head_idx];
-  NNZ_V = per_head_vertical_topkv[head_idx];
-
-  bool has_slash = true;
-  int64_t tmp_col_cnt = 0, tmp_blk_cnt = 0;
-  int64_t s = 0, v = 0;
-  int64_t v_idx = vertical_indexes[v++];
-  int64_t s_idx = slash_indexes[s++];
-  if (causal) {
-    while (s_idx >= end_m + (kv_seqlen - q_seqlen) && s < NNZ_S) {
-      s_idx = slash_indexes[s++];
-    }
-    if (s_idx > end_m + (kv_seqlen - q_seqlen)) has_slash = false;
-    s_idx = max((kv_seqlen - q_seqlen) + end_m - s_idx, BLOCK_SIZE_M);
-  } else {
-    while (s_idx >= end_m + kv_seqlen && s < NNZ_S) {
-      s_idx = slash_indexes[s++];
-    }
-    if (s_idx > end_m + kv_seqlen) has_slash = false;
-    s_idx = max(kv_seqlen + end_m - s_idx, BLOCK_SIZE_M);
-  }
-
-  int64_t range_start = s_idx - BLOCK_SIZE_M, range_end = s_idx;
-  if (!has_slash) {
-    if (causal) {
-      range_start = (kv_seqlen - q_seqlen) + end_m;
-      range_end = (kv_seqlen - q_seqlen) + end_m + BLOCK_SIZE_N;
-    } else {
-      range_start = kv_seqlen;
-      range_end = kv_seqlen + BLOCK_SIZE_N;
-    }
-  }
-
-  bool slash_finished = false;
-  while (1) {
-    if (v_idx < range_end) {
-      if (v_idx < range_start) {
-        column_index[tmp_col_cnt++] = v_idx;
-      }
-      if (v < NNZ_V) {
-        v_idx = vertical_indexes[v++];
-      } else {
-        if (causal)
-          v_idx = end_m + BLOCK_SIZE_N + (kv_seqlen - q_seqlen);
-        else
-          v_idx = end_m + BLOCK_SIZE_N + kv_seqlen;
-      }
-    } else {
-      if ((s < NNZ_S && causal) ||
-          (s < NNZ_S && !causal && slash_indexes[s] >= start_m)) {
-        if (causal)
-          s_idx = max((kv_seqlen - q_seqlen) + end_m - slash_indexes[s++],
-                      BLOCK_SIZE_M);
-        else
-          s_idx = max(kv_seqlen + end_m - slash_indexes[s++], BLOCK_SIZE_M);
-      } else {
-        if (v == NNZ_V || (v_idx > range_start && causal)) {
-          // add the last vertical if no more slash
-          if (v == NNZ_V && !causal && v_idx < kv_seqlen) {
-            column_index[tmp_col_cnt++] = v_idx;
-          }
-          tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                    BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-          break;
-        } else {
-          if (causal) {
-            range_start = (kv_seqlen - q_seqlen) + end_m;
-            range_end = (kv_seqlen - q_seqlen) + end_m + BLOCK_SIZE_N;
-          } else {
-            // if slash_finished but there are vertical left, save current
-            // blocks
-            tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                      BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-            range_start = kv_seqlen;
-            range_end = kv_seqlen + BLOCK_SIZE_N;
-          }
-          slash_finished = true;
-        }
-      }
-      if (!slash_finished) {
-        if (s_idx > range_end + BLOCK_SIZE_M) {
-          tmp_blk_cnt = save_blocks(block_offset, range_start, range_end,
-                                    BLOCK_SIZE_N, tmp_blk_cnt, kv_seqlen);
-          range_start = s_idx - BLOCK_SIZE_M;
-          range_end = s_idx;
-        } else if (s_idx > range_end) {
-          range_end += BLOCK_SIZE_M;
-        }
-      }
-    }
-  }
-
-  block_count[0] = tmp_blk_cnt;
-  column_count[0] = tmp_col_cnt;
-}
-
-void convert_vertical_slash_indexes_64x64_mergehead(
-    const int* q_seqlens,         // [BATCH, ]
-    const int* kv_seqlens,        // [BATCH, ]
-    const int* vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    const int* slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    int* per_head_vertical_topkv, int* per_head_slash_topkv,
-    int* block_count,   // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* block_offset,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_S]
-    int* column_count,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M)]
-    int* column_index,  // [BATCH, N_HEADS, cdiv(N_CTX, BLOCK_SIZE_M), NNZ_V]
-    int64_t BATCH_SIZE, int64_t N_HEADS, int64_t N_ROWS, int64_t BLOCK_SIZE_M,
-    int64_t BLOCK_SIZE_N, int64_t NNZ_V, int64_t NNZ_S, bool causal) {
-  const int N_THREADS = 64;
-  const dim3 dimBlock(N_THREADS);
-  const dim3 dimGrid(N_HEADS, BATCH_SIZE, (N_ROWS + N_THREADS - 1) / N_THREADS);
-  convert_vertical_slash_indexes_kernel_mergehead<<<dimGrid, dimBlock>>>(
-      q_seqlens, kv_seqlens, vertical_indexes, slash_indexes,
-      per_head_vertical_topkv, per_head_slash_topkv, block_count, block_offset,
-      column_count, column_index, N_HEADS, N_ROWS, BLOCK_SIZE_M, BLOCK_SIZE_N,
-      NNZ_V, NNZ_S, causal);
-}
-
-/**
- * Implements the Algorithm 4 in paper https://arxiv.org/abs/2407.02490.
- *
- * Like the above convert_vertical_slash_indexes, but with
- * pre-computed vertical and slash counts.
- */
-void convert_vertical_slash_indexes_mergehead(
-    torch::Tensor& block_count,            // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& block_offset,           // [BATCH, N_HEADS, NUM_ROWS, NNZ_S]
-    torch::Tensor& column_count,           // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& column_index,           // [BATCH, N_HEADS, NUM_ROWS, NNZ_V]
-    torch::Tensor q_seqlens,               // [BATCH, ]
-    torch::Tensor kv_seqlens,              // [BATCH, ]
-    torch::Tensor vertical_indexes,        // [BATCH, N_HEADS, NNZ_V]
-    torch::Tensor slash_indexes,           // [BATCH, N_HEADS, NNZ_S]
-    torch::Tensor vertical_indices_count,  // [N_HEADS, ]
-    torch::Tensor slash_indices_count,     // [N_HEADS, ]
-    int64_t context_size, int64_t block_size_M, int64_t block_size_N,
-    bool causal) {
-  cudaSetDevice(q_seqlens.get_device());
-
-  int batch_size = slash_indexes.size(0);
-  int num_heads = slash_indexes.size(1);
-  int nnz_slash = slash_indexes.size(2);
-  int nnz_vertical = vertical_indexes.size(2);
-  int num_rows = (context_size + block_size_M - 1) / block_size_M;
-
-  convert_vertical_slash_indexes_64x64_mergehead(
-      q_seqlens.data_ptr<int>(), kv_seqlens.data_ptr<int>(),
-      vertical_indexes.data_ptr<int>(), slash_indexes.data_ptr<int>(),
-      vertical_indices_count.data_ptr<int>(),
-      slash_indices_count.data_ptr<int>(), block_count.data_ptr<int>(),
-      block_offset.data_ptr<int>(), column_count.data_ptr<int>(),
-      column_index.data_ptr<int>(), batch_size, num_heads, num_rows,
-      block_size_M, block_size_N, nnz_vertical, nnz_slash, causal);
-}
diff --git a/csrc/cache.h b/csrc/cache.h
index 0188a568edc7..a9e74b0dc2df 100644
--- a/csrc/cache.h
+++ b/csrc/cache.h
@@ -10,6 +10,11 @@ void swap_blocks(torch::Tensor& src, torch::Tensor& dst,
                  int64_t block_size_in_bytes,
                  const torch::Tensor& block_mapping);
 
+void swap_blocks_batch(const torch::Tensor& src_ptrs,
+                       const torch::Tensor& dst_ptrs,
+                       const torch::Tensor& sizes,
+                       bool is_src_access_order_any);
+
 void reshape_and_cache(torch::Tensor& key, torch::Tensor& value,
                        torch::Tensor& key_cache, torch::Tensor& value_cache,
                        torch::Tensor& slot_mapping,
diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu
index 2b3906df9ec5..9130dd2ccae7 100644
--- a/csrc/cache_kernels.cu
+++ b/csrc/cache_kernels.cu
@@ -24,6 +24,8 @@
 #ifdef USE_ROCM
   #include <hip/hip_bf16.h>
 typedef __hip_bfloat16 __nv_bfloat16;
+#else
+  #include <cuda.h>
 #endif
 
 #if defined(__gfx942__)
@@ -73,6 +75,104 @@ void swap_blocks(torch::Tensor& src, torch::Tensor& dst,
   }
 }
 
+void swap_blocks_batch(const torch::Tensor& src_ptrs,
+                       const torch::Tensor& dst_ptrs,
+                       const torch::Tensor& sizes,
+                       bool is_src_access_order_any) {
+  TORCH_CHECK(src_ptrs.device().is_cpu(), "src_ptrs must be on CPU");
+  TORCH_CHECK(dst_ptrs.device().is_cpu(), "dst_ptrs must be on CPU");
+  TORCH_CHECK(sizes.device().is_cpu(), "sizes must be on CPU");
+  TORCH_CHECK(src_ptrs.dtype() == torch::kInt64, "src_ptrs must be int64");
+  TORCH_CHECK(dst_ptrs.dtype() == torch::kInt64, "dst_ptrs must be int64");
+  TORCH_CHECK(sizes.dtype() == torch::kInt64, "sizes must be int64");
+
+  const int64_t n = src_ptrs.size(0);
+  TORCH_CHECK(dst_ptrs.size(0) == n, "dst_ptrs length must match src_ptrs");
+  TORCH_CHECK(sizes.size(0) == n, "sizes length must match src_ptrs");
+
+  if (n == 0) return;
+
+  int64_t* src_data = src_ptrs.mutable_data_ptr<int64_t>();
+  int64_t* dst_data = dst_ptrs.mutable_data_ptr<int64_t>();
+  int64_t* size_data = sizes.mutable_data_ptr<int64_t>();
+
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  // Use cuMemcpyBatchAsync / hipMemcpyBatchAsync to submit all copies in a
+  // single driver call, amortizing per-copy submission overhead. int64_t
+  // and CUdeviceptr/void*/size_t are all 8 bytes on 64-bit platforms, so we
+  // reinterpret_cast the tensor data directly to avoid copies.
+  static_assert(sizeof(size_t) == sizeof(int64_t));
+#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION >= 12080
+  static_assert(sizeof(CUdeviceptr) == sizeof(int64_t));
+  // Resolve cuMemcpyBatchAsync at runtime via cuGetProcAddress so that
+  // binaries compiled with CUDA 12.8+ still work on older drivers, and
+  // we avoid the CUDA 13.0 header remapping (#define to _v2 signature).
+  // The function pointer is cached after the first call.
+  using BatchFn =
+      CUresult (*)(CUdeviceptr*, CUdeviceptr*, size_t*, size_t,
+                   CUmemcpyAttributes*, size_t*, size_t, size_t*, CUstream);
+  static BatchFn batch_fn = []() -> BatchFn {
+    CUdriverProcAddressQueryResult sym_status;
+    void* fn_ptr = nullptr;
+    CUresult res = cuGetProcAddress("cuMemcpyBatchAsync", &fn_ptr, 12080,
+                                    CU_GET_PROC_ADDRESS_DEFAULT, &sym_status);
+    if (res != CUDA_SUCCESS || fn_ptr == nullptr) {
+      return nullptr;
+    }
+    return reinterpret_cast<BatchFn>(fn_ptr);
+  }();
+
+  if (batch_fn != nullptr) {
+    CUmemcpyAttributes attr = {};
+    // ANY lets the DMA engine prefetch source bytes out of stream order,
+    // which is only safe when no GPU stream is concurrently writing the
+    // source.
+    attr.srcAccessOrder = is_src_access_order_any
+                              ? CU_MEMCPY_SRC_ACCESS_ORDER_ANY
+                              : CU_MEMCPY_SRC_ACCESS_ORDER_STREAM;
+    size_t attrs_idx = 0;
+    size_t fail_idx = 0;
+    CUresult result = batch_fn(reinterpret_cast<CUdeviceptr*>(dst_data),
+                               reinterpret_cast<CUdeviceptr*>(src_data),
+                               reinterpret_cast<size_t*>(size_data),
+                               static_cast<size_t>(n), &attr, &attrs_idx, 1,
+                               &fail_idx, static_cast<CUstream>(stream));
+    TORCH_CHECK(result == CUDA_SUCCESS, "cuMemcpyBatchAsync failed at index ",
+                fail_idx, " with error ", result);
+    return;
+  }
+#elif defined(USE_ROCM) && defined(HIP_VERSION) && HIP_VERSION >= 70100000
+  // ROCm 7.1+ exposes hipMemcpyBatchAsync. The 7.2.1 implementation early-
+  // returns hipErrorNotSupported whenever numAttrs > 0 (see ROCm/clr @
+  // rocm-7.2.1 hipamd/src/hip_memory.cpp:2819-2822), so call with
+  // numAttrs=0.
+  {
+    hipMemcpyAttributes attr = {};
+    size_t attrs_idx = 0;
+    size_t fail_idx = 0;
+    hipError_t result = hipMemcpyBatchAsync(
+        reinterpret_cast<void**>(dst_data), reinterpret_cast<void**>(src_data),
+        reinterpret_cast<size_t*>(size_data), static_cast<size_t>(n), &attr,
+        &attrs_idx, 0, &fail_idx, static_cast<hipStream_t>(stream));
+    TORCH_CHECK(result == hipSuccess, "hipMemcpyBatchAsync failed at index ",
+                fail_idx, " with error ", result);
+    return;
+  }
+#endif
+  {
+    // Fallback for CUDA < 12.8, older CUDA drivers, and ROCm < 7.1:
+    // individual async copies. cudaMemcpyDefault lets the driver infer
+    // direction from pointer types.
+    for (int64_t i = 0; i < n; i++) {
+      cudaMemcpyAsync(reinterpret_cast<void*>(dst_data[i]),
+                      reinterpret_cast<void*>(src_data[i]),
+                      static_cast<size_t>(size_data[i]), cudaMemcpyDefault,
+                      stream);
+    }
+  }
+}
+
 namespace vllm {
 
 // Grid: (num_layers, num_pairs)
@@ -523,6 +623,11 @@ __global__ void cp_gather_indexer_k_quant_cache_kernel(
   const int head_idx = (blockIdx.y * blockDim.x + threadIdx.x) * VEC_SIZE;
   // Find batch index within a block
   __shared__ int batch_idx[BLOCK_Y_SIZE];
+  if (threadIdx.x == 0) {
+    batch_idx[threadIdx.y] = -1;
+  }
+  __syncthreads();
+
   for (int iter = 0; iter < cuda_utils::ceil_div(batch_size, int(blockDim.x));
        iter++) {
     int tid = iter * blockDim.x + threadIdx.x;
@@ -535,16 +640,18 @@ __global__ void cp_gather_indexer_k_quant_cache_kernel(
     }
   }
 
-#ifndef USE_ROCM
-  __syncwarp();
-#endif
+  __syncthreads();
 
-  if (head_idx >= head_dim || token_idx >= num_tokens) {
+  // num_tokens may be an allocation upper bound when Python avoids a D2H sync.
+  // Only tokens covered by the exact device-side cu_seq_lens are valid to
+  // gather.
+  const int batch = batch_idx[threadIdx.y];
+  if (head_idx >= head_dim || token_idx >= num_tokens || batch < 0) {
     return;
   }
-  const int inbatch_seq_idx = token_idx - cu_seq_lens[batch_idx[threadIdx.y]];
-  const int block_idx = block_table[batch_idx[threadIdx.y] * num_blocks +
-                                    inbatch_seq_idx / cache_block_size];
+  const int inbatch_seq_idx = token_idx - cu_seq_lens[batch];
+  const int block_idx =
+      block_table[batch * num_blocks + inbatch_seq_idx / cache_block_size];
   const int64_t src_block_offset = block_idx * block_stride;
   const int64_t cache_inblock_offset =
       (inbatch_seq_idx % cache_block_size) * head_dim + head_idx;
@@ -648,6 +755,28 @@ void reshape_and_cache_flash(
   int num_tokens = slot_mapping.size(0);
   int num_heads = key.size(1);
   int head_size = key.size(2);
+
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(key));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  if (kv_cache_dtype == "nvfp4") {
+#if defined(ENABLE_NVFP4_SM100) || defined(ENABLE_NVFP4_SM120)
+    // NVFP4 dispatch is compiled separately for SM100+.
+    extern void reshape_and_cache_nvfp4_dispatch(
+        torch::Tensor & key, torch::Tensor & value, torch::Tensor & key_cache,
+        torch::Tensor & value_cache, torch::Tensor & slot_mapping,
+        torch::Tensor & k_scale, torch::Tensor & v_scale);
+    reshape_and_cache_nvfp4_dispatch(key, value, key_cache, value_cache,
+                                     slot_mapping, k_scale, v_scale);
+    return;
+#else
+    TORCH_CHECK(false,
+                "NVFP4 KV cache requires SM100+ (Blackwell). "
+                "Please rebuild vllm with a Blackwell-compatible CUDA target.");
+#endif
+  }
+
+  // Original FP8/auto path.
   int block_size = key_cache.size(1);
 
   int64_t key_stride = key.stride(0);
@@ -665,8 +794,6 @@ void reshape_and_cache_flash(
 
   dim3 grid(num_tokens);
   dim3 block(std::min(num_heads * head_size, 512));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(key));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
   DISPATCH_BY_KV_CACHE_DTYPE(key.dtype(), kv_cache_dtype,
                              CALL_RESHAPE_AND_CACHE_FLASH);
@@ -1394,6 +1521,9 @@ void concat_mla_q(torch::Tensor& ql_nope,  // [num_tokens, num_heads, nope_dim]
   TORCH_CHECK(ql_nope.stride(2) == 1, "ql_nope must have stride 1 in dim 2");
   TORCH_CHECK(q_pe.stride(2) == 1, "q_pe must have stride 1 in dim 2");
   TORCH_CHECK(q_out.stride(2) == 1, "q_out must have stride 1 in dim 2");
+  TORCH_CHECK(ql_nope.scalar_type() == at::ScalarType::Half ||
+                  ql_nope.scalar_type() == at::ScalarType::BFloat16,
+              "ql_nope must be float16 or bfloat16 dtype");
 
   if (num_tokens == 0) return;
 
@@ -1405,7 +1535,7 @@ void concat_mla_q(torch::Tensor& ql_nope,  // [num_tokens, num_heads, nope_dim]
   const at::cuda::OptionalCUDAGuard device_guard(device_of(ql_nope));
   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  VLLM_DISPATCH_FLOATING_TYPES(ql_nope.scalar_type(), "concat_mla_q", [&] {
+  VLLM_DISPATCH_HALF_TYPES(ql_nope.scalar_type(), "concat_mla_q", [&] {
     vllm::ConcatMLAQKernel<scalar_t, 512><<<grid_size, block_size, 0, stream>>>(
         q_out.data_ptr<scalar_t>(), ql_nope.data_ptr<scalar_t>(),
         q_pe.data_ptr<scalar_t>(), num_tokens, num_heads, q_out.stride(0),
diff --git a/csrc/cache_kernels_fused.cu b/csrc/cache_kernels_fused.cu
index be037b2fdec2..8687ebe1f14c 100644
--- a/csrc/cache_kernels_fused.cu
+++ b/csrc/cache_kernels_fused.cu
@@ -21,28 +21,33 @@ namespace vllm {
 
 // NOTE Be EXTRA careful with raw_kv_scalar_t, for __half and __nv_bfloat16 it's
 // using u16 as the backing type.
-template <typename qk_t, bool IS_NEOX, typename raw_kv_scalar_t,
-          typename cache_t, Fp8KVCacheDataType kv_dt>
+template <typename qk_t, typename cos_sin_t, bool IS_NEOX,
+          typename raw_kv_scalar_t, typename cache_t, Fp8KVCacheDataType kv_dt>
 __global__ void concat_and_cache_mla_rope_fused_kernel(
     const int64_t* __restrict__ positions,  // [num_tokens]
     qk_t* __restrict__ q_pe,        // [num_tokens, num_q_heads, rot_dim]
     qk_t* __restrict__ k_pe,        // [num_tokens, rot_dim]
     const qk_t* __restrict__ kv_c,  // [num_tokens, kv_lora_rank]
-    const qk_t* __restrict__ rope_cos_sin_cache,  // [max_position, 2,
-                                                  // rot_dim // 2]
+    const cos_sin_t* __restrict__ rope_cos_sin_cache,  // [max_position, 2,
+                                                       // rot_dim // 2]
     const int rot_dim, const int64_t q_pe_stride_token,
     const int64_t q_pe_stride_head, const int64_t k_pe_stride,
     const int64_t kv_c_stride, const int num_q_heads,
     cache_t* __restrict__ kv_cache,  // [num_blocks, block_size, (kv_lora_rank +
                                      // rot_dim)]
-    const int64_t* __restrict__ kv_cache_slot_mapping,  // [num_tokens]
+    const int64_t* __restrict__ slot_mapping,  // [num_tokens]
     const int block_stride, const int entry_stride, const int kv_lora_rank,
     const int block_size, const float* kv_cache_quant_scale) {
   // Each thread block is responsible for one token.
   const int64_t token_idx = blockIdx.x;
+  const int64_t slot_idx = slot_mapping[token_idx];
+  // NOTE: slot_idx can be -1 if the token is padded
+  if (slot_idx < 0) {
+    return;
+  }
   const int64_t pos = positions[token_idx];
 
-  const qk_t* cos_sin_ptr = rope_cos_sin_cache + pos * rot_dim;
+  const cos_sin_t* cos_sin_ptr = rope_cos_sin_cache + pos * rot_dim;
 
   const int embed_dim = rot_dim / 2;
 
@@ -54,8 +59,8 @@ __global__ void concat_and_cache_mla_rope_fused_kernel(
 
     // NOTE: Would be nice to have interleaved sin/cos so we could just load
     // both at the same time.
-    qk_t cos = VLLM_LDG(cos_sin_ptr + pair_idx);
-    qk_t sin = VLLM_LDG(cos_sin_ptr + pair_idx + embed_dim);
+    qk_t cos = static_cast<qk_t>(VLLM_LDG(cos_sin_ptr + pair_idx));
+    qk_t sin = static_cast<qk_t>(VLLM_LDG(cos_sin_ptr + pair_idx + embed_dim));
 
     qk_t* q_pe_head_ptr =
         q_pe + token_idx * q_pe_stride_token + head_idx * q_pe_stride_head;
@@ -81,21 +86,15 @@ __global__ void concat_and_cache_mla_rope_fused_kernel(
     q_pe_head_ptr[pair_idx_y] = y_dst;
   }
 
-  const int64_t slot_idx = kv_cache_slot_mapping[token_idx];
   const int64_t block_idx = slot_idx / block_size;
   const int64_t entry_idx = slot_idx % block_size;
 
-  // NOTE: slot_idx can be -1 if the token is padded
-  if (slot_idx < 0) {
-    return;
-  }
-
   // K with 1 HEAD
   for (int i = threadIdx.x; i < embed_dim; i += blockDim.x) {
     int pair_idx = i;
 
-    qk_t cos = VLLM_LDG(cos_sin_ptr + pair_idx);
-    qk_t sin = VLLM_LDG(cos_sin_ptr + pair_idx + embed_dim);
+    qk_t cos = static_cast<qk_t>(VLLM_LDG(cos_sin_ptr + pair_idx));
+    qk_t sin = static_cast<qk_t>(VLLM_LDG(cos_sin_ptr + pair_idx + embed_dim));
 
     qk_t* k_pe_head_ptr = k_pe + token_idx * k_pe_stride;
 
@@ -165,36 +164,43 @@ __global__ void concat_and_cache_mla_rope_fused_kernel(
 
 }  // namespace vllm
 
-#define CALL_CONCAT_AND_CACHE_MLA_ROPE_FUSED(RAW_KV_T, CACHE_T, KV_DTYPE)      \
-  do {                                                                         \
-    VLLM_DISPATCH_FLOATING_TYPES(q_pe.scalar_type(), "qk_scalar_type", [&] {   \
-      using qk_t = scalar_t;                                                   \
-      if (rope_is_neox) {                                                      \
-        vllm::concat_and_cache_mla_rope_fused_kernel<qk_t, true, RAW_KV_T,     \
-                                                     CACHE_T, KV_DTYPE>        \
-            <<<grid, block, 0, stream>>>(                                      \
-                positions.data_ptr<int64_t>(), q_pe.data_ptr<qk_t>(),          \
-                k_pe.data_ptr<qk_t>(), kv_c.data_ptr<qk_t>(),                  \
-                rope_cos_sin_cache.data_ptr<qk_t>(), rot_dim,                  \
-                q_pe_stride_token, q_pe_stride_head, k_pe_stride, kv_c_stride, \
-                num_q_heads, reinterpret_cast<CACHE_T*>(kv_cache.data_ptr()),  \
-                kv_cache_slot_mapping.data_ptr<int64_t>(), block_stride,       \
-                entry_stride, kv_lora_rank, block_size,                        \
-                kv_cache_quant_scale.data_ptr<float>());                       \
-      } else {                                                                 \
-        vllm::concat_and_cache_mla_rope_fused_kernel<qk_t, false, RAW_KV_T,    \
-                                                     CACHE_T, KV_DTYPE>        \
-            <<<grid, block, 0, stream>>>(                                      \
-                positions.data_ptr<int64_t>(), q_pe.data_ptr<qk_t>(),          \
-                k_pe.data_ptr<qk_t>(), kv_c.data_ptr<qk_t>(),                  \
-                rope_cos_sin_cache.data_ptr<qk_t>(), rot_dim,                  \
-                q_pe_stride_token, q_pe_stride_head, k_pe_stride, kv_c_stride, \
-                num_q_heads, reinterpret_cast<CACHE_T*>(kv_cache.data_ptr()),  \
-                kv_cache_slot_mapping.data_ptr<int64_t>(), block_stride,       \
-                entry_stride, kv_lora_rank, block_size,                        \
-                kv_cache_quant_scale.data_ptr<float>());                       \
-      }                                                                        \
-    });                                                                        \
+#define CALL_CONCAT_AND_CACHE_MLA_ROPE_FUSED(RAW_KV_T, CACHE_T, KV_DTYPE)     \
+  do {                                                                        \
+    VLLM_DISPATCH_FLOATING_TYPES(q_pe.scalar_type(), "qk_scalar_type", [&] {  \
+      using qk_t = scalar_t;                                                  \
+      VLLM_DISPATCH_FLOATING_TYPES(                                           \
+          rope_cos_sin_cache.scalar_type(), "rope_cos_sin_cache_scalar_type", \
+          [&] {                                                               \
+            using cos_sin_t = scalar_t;                                       \
+            if (rope_is_neox) {                                               \
+              vllm::concat_and_cache_mla_rope_fused_kernel<                   \
+                  qk_t, cos_sin_t, true, RAW_KV_T, CACHE_T, KV_DTYPE>         \
+                  <<<grid, block, 0, stream>>>(                               \
+                      positions.data_ptr<int64_t>(), q_pe.data_ptr<qk_t>(),   \
+                      k_pe.data_ptr<qk_t>(), kv_c.data_ptr<qk_t>(),           \
+                      rope_cos_sin_cache.data_ptr<cos_sin_t>(), rot_dim,      \
+                      q_pe_stride_token, q_pe_stride_head, k_pe_stride,       \
+                      kv_c_stride, num_q_heads,                               \
+                      reinterpret_cast<CACHE_T*>(kv_cache.data_ptr()),        \
+                      slot_mapping.data_ptr<int64_t>(), block_stride,         \
+                      entry_stride, kv_lora_rank, block_size,                 \
+                      kv_cache_quant_scale.data_ptr<float>());                \
+            } else {                                                          \
+              vllm::concat_and_cache_mla_rope_fused_kernel<                   \
+                  qk_t, cos_sin_t, false, RAW_KV_T, CACHE_T, KV_DTYPE>        \
+                  <<<grid, block, 0, stream>>>(                               \
+                      positions.data_ptr<int64_t>(), q_pe.data_ptr<qk_t>(),   \
+                      k_pe.data_ptr<qk_t>(), kv_c.data_ptr<qk_t>(),           \
+                      rope_cos_sin_cache.data_ptr<cos_sin_t>(), rot_dim,      \
+                      q_pe_stride_token, q_pe_stride_head, k_pe_stride,       \
+                      kv_c_stride, num_q_heads,                               \
+                      reinterpret_cast<CACHE_T*>(kv_cache.data_ptr()),        \
+                      slot_mapping.data_ptr<int64_t>(), block_stride,         \
+                      entry_stride, kv_lora_rank, block_size,                 \
+                      kv_cache_quant_scale.data_ptr<float>());                \
+            }                                                                 \
+          });                                                                 \
+    });                                                                       \
   } while (false)
 
 // Executes RoPE on q_pe and k_pe, then writes k_pe and kv_c in the kv cache.
@@ -208,43 +214,52 @@ void concat_and_cache_mla_rope_fused(
     torch::Tensor& kv_c,                // [num_tokens, kv_lora_rank]
     torch::Tensor& rope_cos_sin_cache,  // [max_position, rot_dim]
     bool rope_is_neox,
-    torch::Tensor&
-        kv_cache_slot_mapping,  // [num_tokens] or [num_actual_tokens]
+    torch::Tensor& slot_mapping,  // [num_tokens] or [num_actual_tokens]
     torch::Tensor&
         kv_cache,  // [num_blocks, block_size, (kv_lora_rank + rot_dim)]
     const std::string& kv_cache_dtype, torch::Tensor& kv_cache_quant_scale) {
-  const int64_t num_tokens = q_pe.size(0);
+  // NOTE(woosuk): In vLLM V1, query/key/position.size(0) can be different from
+  // slot_mapping.size(0) because of padding for CUDA graphs.
+  // In vLLM V0, key.size(0) is always equal to slot_mapping.size(0) because
+  // both include padding.
+  // In vLLM V1, however, key.size(0) can be larger than slot_mapping.size(0)
+  // since key includes padding for CUDA graphs, while slot_mapping does not.
+  // In this case, slot_mapping.size(0) represents the actual number of tokens
+  // before padding.
+  // For compatibility with both cases, we use slot_mapping.size(0) as the
+  // number of tokens.
+  int num_tokens = slot_mapping.size(0);
+  int num_padded_tokens = q_pe.size(0);
+  TORCH_CHECK_GE(num_padded_tokens, num_tokens);
 
   const int num_q_heads = q_pe.size(1);
   const int rot_dim = q_pe.size(2);
   const int kv_lora_rank = kv_c.size(1);
 
-  TORCH_CHECK(positions.size(0) >=
-              num_tokens);  // CUDA Graphs might pad this for us
+  TORCH_CHECK_EQ(positions.size(0), num_padded_tokens);
   TORCH_CHECK_EQ(positions.dim(), 1);
   TORCH_CHECK_EQ(positions.scalar_type(), c10::ScalarType::Long);
 
-  TORCH_CHECK_EQ(q_pe.size(0), num_tokens);
+  TORCH_CHECK_EQ(q_pe.dim(), 3);
+  TORCH_CHECK_EQ(q_pe.size(0), num_padded_tokens);
   TORCH_CHECK_EQ(q_pe.size(1), num_q_heads);
   TORCH_CHECK_EQ(q_pe.size(2), rot_dim);
-  TORCH_CHECK_EQ(q_pe.dim(), 3);
 
-  TORCH_CHECK_EQ(k_pe.size(0), num_tokens);
-  TORCH_CHECK_EQ(k_pe.size(1), rot_dim);
   TORCH_CHECK_EQ(k_pe.dim(), 2);
+  TORCH_CHECK_EQ(k_pe.size(0), num_padded_tokens);
+  TORCH_CHECK_EQ(k_pe.size(1), rot_dim);
   TORCH_CHECK_EQ(k_pe.scalar_type(), q_pe.scalar_type());
 
-  TORCH_CHECK_EQ(kv_c.size(0), num_tokens);
-  TORCH_CHECK_EQ(kv_c.size(1), kv_lora_rank);
   TORCH_CHECK_EQ(kv_c.dim(), 2);
+  TORCH_CHECK_EQ(kv_c.size(0), num_padded_tokens);
+  TORCH_CHECK_EQ(kv_c.size(1), kv_lora_rank);
   TORCH_CHECK_EQ(kv_c.scalar_type(), q_pe.scalar_type());
   TORCH_CHECK_EQ(kv_c.dtype(), q_pe.dtype());
 
   TORCH_CHECK_EQ(rope_cos_sin_cache.size(1), rot_dim);
-  TORCH_CHECK_EQ(rope_cos_sin_cache.scalar_type(), q_pe.scalar_type());
 
-  TORCH_CHECK_EQ(kv_cache_slot_mapping.size(0), num_tokens);
-  TORCH_CHECK_EQ(kv_cache_slot_mapping.scalar_type(), c10::ScalarType::Long);
+  TORCH_CHECK_EQ(slot_mapping.size(0), num_tokens);
+  TORCH_CHECK_EQ(slot_mapping.scalar_type(), c10::ScalarType::Long);
 
   TORCH_CHECK_EQ(kv_cache.size(2), kv_lora_rank + rot_dim);
   TORCH_CHECK_EQ(kv_cache.dim(), 3);
diff --git a/csrc/core/batch_invariant.hpp b/csrc/core/batch_invariant.hpp
index fffe96b86857..8273bc74b1ef 100644
--- a/csrc/core/batch_invariant.hpp
+++ b/csrc/core/batch_invariant.hpp
@@ -1,7 +1,6 @@
 #pragma once
 #include <cstdlib>
 #include <string>
-#include <cctype>
 
 namespace vllm {
 
diff --git a/csrc/core/scalar_type.hpp b/csrc/core/scalar_type.hpp
index 68a8750f583b..b6f39ed795f3 100644
--- a/csrc/core/scalar_type.hpp
+++ b/csrc/core/scalar_type.hpp
@@ -1,7 +1,13 @@
 #pragma once
 
-// For TORCH_CHECK
-#include <torch/library.h>
+#include <cstdint>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <variant>
+
+// For STD_TORCH_CHECK
+#include <torch/headeronly/util/Exception.h>
 
 namespace vllm {
 
@@ -45,7 +51,7 @@ class ScalarType {
   // IEEE 754 compliant floating point type
   static constexpr ScalarType float_IEEE754(uint8_t exponent,
                                             uint8_t mantissa) {
-    TORCH_CHECK(mantissa > 0 && exponent > 0);
+    STD_TORCH_CHECK(mantissa > 0 && exponent > 0);
     return ScalarType(exponent, mantissa, true, 0, false, NAN_IEEE_754);
   }
 
@@ -53,11 +59,12 @@ class ScalarType {
   static constexpr ScalarType float_(uint8_t exponent, uint8_t mantissa,
                                      bool finite_values_only,
                                      NanRepr nan_repr) {
-    TORCH_CHECK(nan_repr < NAN_REPR_ID_MAX, "Invalid NanRepr");
-    TORCH_CHECK(mantissa > 0 && exponent > 0);
-    TORCH_CHECK(nan_repr != NAN_IEEE_754,
-                "use `float_IEEE754` constructor for floating point types that "
-                "follow IEEE 754 conventions");
+    STD_TORCH_CHECK(nan_repr < NAN_REPR_ID_MAX, "Invalid NanRepr");
+    STD_TORCH_CHECK(mantissa > 0 && exponent > 0);
+    STD_TORCH_CHECK(
+        nan_repr != NAN_IEEE_754,
+        "use `float_IEEE754` constructor for floating point types that "
+        "follow IEEE 754 conventions");
     return ScalarType(exponent, mantissa, true, 0, finite_values_only,
                       nan_repr);
   }
@@ -176,8 +183,8 @@ class ScalarType {
 
  private:
   double _floating_point_max() const {
-    TORCH_CHECK(mantissa <= 52 && exponent <= 11,
-                "Cannot represent max/min as a double for type ", str());
+    STD_TORCH_CHECK(mantissa <= 52 && exponent <= 11,
+                    "Cannot represent max/min as a double for type ", str());
 
     uint64_t max_mantissa = (uint64_t(1) << mantissa) - 1;
     if (nan_repr == NAN_EXTD_RANGE_MAX_MIN) {
@@ -186,8 +193,8 @@ class ScalarType {
 
     uint64_t max_exponent = (uint64_t(1) << exponent) - 2;
     if (nan_repr == NAN_EXTD_RANGE_MAX_MIN || nan_repr == NAN_NONE) {
-      TORCH_CHECK(exponent < 11,
-                  "Cannot represent max/min as a double for type ", str());
+      STD_TORCH_CHECK(exponent < 11,
+                      "Cannot represent max/min as a double for type ", str());
       max_exponent += 1;
     }
 
@@ -216,16 +223,17 @@ class ScalarType {
     if (is_floating_point()) {
       return {_floating_point_max()};
     } else {
-      TORCH_CHECK(size_bits() < 64 || size_bits() == 64 && is_signed(),
-                  "Cannot represent max as a int64_t");
+      STD_TORCH_CHECK(size_bits() < 64 || size_bits() == 64 && is_signed(),
+                      "Cannot represent max as a int64_t");
       return {(int64_t(1) << mantissa) - 1};
     }
   }
 
   constexpr std::variant<int64_t, double> _raw_min() const {
     if (is_floating_point()) {
-      TORCH_CHECK(is_signed(),
-                  "We currently assume all floating point types are signed");
+      STD_TORCH_CHECK(
+          is_signed(),
+          "We currently assume all floating point types are signed");
       constexpr uint64_t sign_bit_double = (uint64_t(1) << 63);
 
       double max = _floating_point_max();
@@ -233,8 +241,8 @@ class ScalarType {
       uint64_t min_raw = max_raw | sign_bit_double;
       return {*reinterpret_cast<double*>(&min_raw)};
     } else {
-      TORCH_CHECK(!is_signed() || size_bits() <= 64,
-                  "Cannot represent min as a int64_t");
+      STD_TORCH_CHECK(!is_signed() || size_bits() <= 64,
+                      "Cannot represent min as a int64_t");
       if (is_signed()) {
         // set the top bit to 1 (i.e. INT64_MIN) and the rest to 0
         // then perform an arithmetic shift right to set all the bits above
diff --git a/csrc/cpu/activation_lut_bf16.cpp b/csrc/cpu/activation_lut_bf16.cpp
new file mode 100644
index 000000000000..0ff2567e1ee8
--- /dev/null
+++ b/csrc/cpu/activation_lut_bf16.cpp
@@ -0,0 +1,71 @@
+#include "cpu_types.hpp"
+
+#include <array>
+#include <cstdint>
+#include <mutex>
+#include <string>
+
+#include <ATen/ops/empty.h>
+#include <ATen/ops/gelu.h>
+#include <c10/util/BFloat16.h>
+
+constexpr uint32_t ActivationLutSize = 1u << 16;
+
+at::Tensor gelu_reference(const at::Tensor& x) { return at::gelu(x, "none"); }
+
+void maybe_init_activation_lut_bf16(
+    uint16_t* lut, std::once_flag& once,
+    at::Tensor (*activation)(const at::Tensor&)) {
+  std::call_once(once, [&]() {
+    auto lut_input =
+        at::empty({static_cast<int64_t>(ActivationLutSize)},
+                  at::TensorOptions().device(at::kCPU).dtype(at::kFloat));
+    auto* lut_input_ptr = lut_input.data_ptr<float>();
+#pragma omp parallel for
+    for (uint32_t i = 0; i < ActivationLutSize; ++i) {
+      lut_input_ptr[i] = c10::detail::f32_from_bits(static_cast<uint16_t>(i));
+    }
+
+    auto lut_output = activation(lut_input);
+    const auto* lut_output_ptr = lut_output.data_ptr<float>();
+#pragma omp parallel for
+    for (uint32_t i = 0; i < ActivationLutSize; ++i) {
+      lut[i] = c10::detail::round_to_nearest_even(lut_output_ptr[i]);
+    }
+  });
+}
+
+void activation_lut_bf16(torch::Tensor& out, torch::Tensor& input,
+                         const uint16_t* lut, const char* op_name) {
+  TORCH_CHECK(input.scalar_type() == at::kBFloat16, op_name,
+              ": input must be bfloat16");
+  TORCH_CHECK(out.scalar_type() == at::kBFloat16, op_name,
+              ": out must be bfloat16");
+  TORCH_CHECK(input.is_contiguous(), op_name, ": input must be contiguous");
+  TORCH_CHECK(out.is_contiguous(), op_name, ": out must be contiguous");
+
+  const auto* src =
+      reinterpret_cast<const uint16_t*>(input.data_ptr<at::BFloat16>());
+  auto* dst = reinterpret_cast<uint16_t*>(out.data_ptr<at::BFloat16>());
+  const int64_t n = input.numel();
+
+  CPU_KERNEL_GUARD_IN(activation_lut_bf16_impl)
+#pragma omp parallel for
+  for (int64_t i = 0; i < n; ++i) {
+    dst[i] = lut[src[i]];
+  }
+  CPU_KERNEL_GUARD_OUT(activation_lut_bf16_impl)
+}
+
+void activation_lut_bf16(torch::Tensor& out, torch::Tensor& input,
+                         const std::string& activation) {
+  if (activation == "gelu") {
+    static std::array<uint16_t, ActivationLutSize> lut{};
+    static std::once_flag once;
+    maybe_init_activation_lut_bf16(lut.data(), once, gelu_reference);
+    activation_lut_bf16(out, input, lut.data(), "gelu_lut");
+    return;
+  }
+
+  TORCH_CHECK(false, "Unsupported activation: ", activation);
+}
diff --git a/csrc/cpu/cpu_arch_macros.h b/csrc/cpu/cpu_arch_macros.h
index c73b62ecdec9..53ae70497c0f 100644
--- a/csrc/cpu/cpu_arch_macros.h
+++ b/csrc/cpu/cpu_arch_macros.h
@@ -61,8 +61,23 @@
 #endif
 
 #ifdef __aarch64__
-  // Implementation copied from Arm Optimized Routines (expf AdvSIMD)
+  // Implementation of neon_expf copied from Arm Optimized Routines (expf
+  // AdvSIMD)
   // https://github.com/ARM-software/optimized-routines/blob/master/math/aarch64/advsimd/expf.c
+  //
+  // Additional fast exponential intended for cases where outputs will be
+  // downcasted to FP16 / BF16 (e.g. attention softmax). Accurate within 1 ULP
+  // for FP16 Accurate within 1 ULP for BF16 for inputs in [-87.683, 88.376] &
+  // clamps inputs outside this range to 0 / inf. Implementation is similar to
+  // exp_u20, but:
+  // - uses a third degree polynomial approximation for exp(r) instead of a
+  // fifth degree one, with coefficients re-tuned.
+  // - does not split natural log (ln) into high / low parts
+  // - clamps exp(x) to 0 for x < -87.683113f and inf for x > 88.3762589f
+  // exp(x) = 2^n (exp(r))
+  // r = x - n*ln2, with n = round(x/ln2)
+  // exp(r) ~ poly(r) = 1 + r + r^2 * (c3 + c2 * r)
+  // n = round(x / ln2), r = x - n*ln2
   #include <limits>
   #define DEFINE_FAST_EXP                                                      \
     const float32x4_t inv_ln2 = vdupq_n_f32(0x1.715476p+0f);                   \
@@ -106,8 +121,55 @@
                           result.val[2] = neon_expf(vec.reg.val[2]);           \
                           result.val[3] = neon_expf(vec.reg.val[3]);           \
                           return vec_op::FP32Vec16(result);                    \
-                        };
+                        };                                                     \
+    const float32x4_t lower_bound = vdupq_n_f32(-0x1.5ebb82p+6f);              \
+    const float32x4_t upper_bound = vdupq_n_f32(0x1.61814ap+6f);               \
+    constexpr float ln2 = 0x1.62e43p-1f;                                       \
+    constexpr float f_c2 = 0x1.5592ecp-3f;                                     \
+    const float32x4_t f_c3 = vdupq_n_f32(0x1.017d34p-1f);                      \
+    auto neon_expf_f16 = [&](float32x4_t values) __attribute__((               \
+                             always_inline)) {                                 \
+      const uint32x4_t lt_lower = vcltq_f32(values, lower_bound);              \
+      const uint32x4_t gt_upper = vcgtq_f32(values, upper_bound);              \
+      float32x4_t n = vrndaq_f32(vmulq_f32(values, inv_ln2));                  \
+      float32x4_t r = vfmsq_n_f32(values, n, ln2);                             \
+      uint32x4_t e = vshlq_n_u32(vreinterpretq_u32_s32(vcvtq_s32_f32(n)), 23); \
+      float32x4_t r2 = vmulq_f32(r, r);                                        \
+      float32x4_t q = vfmaq_n_f32(f_c3, r, f_c2);                              \
+      float32x4_t s = vaddq_f32(vdupq_n_f32(1.0f), r);                         \
+      float32x4_t p = vfmaq_f32(s, q, r2);                                     \
+      float32x4_t y =                                                          \
+          vreinterpretq_f32_u32(vaddq_u32(vreinterpretq_u32_f32(p), e));       \
+      y = vbslq_f32(lt_lower, vdupq_n_f32(0.0f), y);                           \
+      y = vbslq_f32(gt_upper, vdupq_n_f32(INFINITY), y);                       \
+      return y;                                                                \
+    };                                                                         \
+    auto fast_exp_f16 = [&](const vec_op::FP32Vec16& vec)                      \
+                            __attribute__((always_inline)) {                   \
+                              float32x4x4_t result;                            \
+                              result.val[0] = neon_expf_f16(vec.reg.val[0]);   \
+                              result.val[1] = neon_expf_f16(vec.reg.val[1]);   \
+                              result.val[2] = neon_expf_f16(vec.reg.val[2]);   \
+                              result.val[3] = neon_expf_f16(vec.reg.val[3]);   \
+                              return vec_op::FP32Vec16(result);                \
+                            };
 
 #endif  // __aarch64__
 
+// RISC-V RVV
+#ifdef __riscv_v
+  #include <riscv_vector.h>
+
+  #ifdef __riscv_zihintpause
+    #define FAST_SPINNING __riscv_pause();
+  #endif
+
+  // FP32Vec16::exp() in cpu_types_riscv.hpp already implements the full
+  // polynomial approximation for RVV, so we simply delegate to it.
+  #define DEFINE_FAST_EXP                             \
+    auto fast_exp = [&](const vec_op::FP32Vec16& vec) \
+                        __attribute__((always_inline)) { return vec.exp(); };
+
+#endif  // __riscv_v
+
 #endif
diff --git a/csrc/cpu/cpu_attn.cpp b/csrc/cpu/cpu_attn.cpp
index a582b4b4d7cc..26b881f4f143 100644
--- a/csrc/cpu/cpu_attn.cpp
+++ b/csrc/cpu/cpu_attn.cpp
@@ -1,5 +1,16 @@
 #include "cpu_attn_dispatch_generated.h"
 
+// Maps kv_cache_dtype string to Fp8KVCacheDataType enum.
+// "auto" -> kAuto(0); "fp8"/"fp8_e4m3" -> kFp8E4M3; "fp8_e5m2" -> kFp8E5M2.
+static inline cpu_attention::Fp8KVCacheDataType parse_fp8_kv_dtype(
+    const std::string& kv_cache_dtype) {
+  if (kv_cache_dtype == "fp8_e5m2")
+    return cpu_attention::Fp8KVCacheDataType::kFp8E5M2;
+  if (kv_cache_dtype == "fp8_e4m3" || kv_cache_dtype == "fp8")
+    return cpu_attention::Fp8KVCacheDataType::kFp8E4M3;
+  return cpu_attention::Fp8KVCacheDataType::kAuto;
+}
+
 torch::Tensor get_scheduler_metadata(
     const int64_t num_req, const int64_t num_heads_q,
     const int64_t num_heads_kv, const int64_t head_dim,
@@ -18,6 +29,10 @@ torch::Tensor get_scheduler_metadata(
     isa = cpu_attention::ISA::NEON;
   } else if (isa_hint == "vxe") {
     isa = cpu_attention::ISA::VXE;
+  } else if (isa_hint == "rvv") {
+    isa = cpu_attention::ISA::RVV;
+  } else if (isa_hint == "vsx") {
+    isa = cpu_attention::ISA::VSX;
   } else {
     TORCH_CHECK(false, "Unsupported CPU attention ISA hint: " + isa_hint);
   }
@@ -49,7 +64,7 @@ torch::Tensor get_scheduler_metadata(
   input.enable_kv_split = enable_kv_split;
 
   VLLM_DISPATCH_FLOATING_TYPES(dtype, "get_scheduler_metadata", [&]() {
-    CPU_ATTN_DISPATCH(head_dim, isa, [&]() {
+    CPU_ATTN_DISPATCH(head_dim, isa, 0, [&]() {
       input.elem_size = sizeof(scalar_t);
       input.q_buffer_elem_size = sizeof(attn_impl::q_buffer_t);
       input.logits_buffer_elem_size = sizeof(attn_impl::logits_buffer_t);
@@ -72,7 +87,9 @@ void cpu_attn_reshape_and_cache(
         key_cache,  // [num_blocks, num_kv_heads, block_size, head_size]
     torch::Tensor&
         value_cache,  // [num_blocks, num_kv_heads, block_size, head_size]
-    const torch::Tensor& slot_mapping, const std::string& isa) {
+    const torch::Tensor& slot_mapping, const std::string& isa,
+    const double k_scale = 1.0, const double v_scale = 1.0,
+    const std::string& kv_cache_dtype = "auto") {
   TORCH_CHECK_EQ(key.dim(), 3);
   TORCH_CHECK_EQ(value.dim(), 3);
   TORCH_CHECK_EQ(key_cache.dim(), 4);
@@ -80,18 +97,30 @@ void cpu_attn_reshape_and_cache(
   TORCH_CHECK_EQ(key.stride(2), 1);
   TORCH_CHECK_EQ(value.stride(2), 1);
 
+  const int64_t kv_cache_idx =
+      static_cast<int64_t>(parse_fp8_kv_dtype(kv_cache_dtype));
+  const bool is_fp8 = (kv_cache_idx != 0);
+
+  if (is_fp8) {
+    TORCH_CHECK(key_cache.scalar_type() == at::ScalarType::Byte,
+                "key_cache must be uint8 for FP8 path");
+    TORCH_CHECK(value_cache.scalar_type() == at::ScalarType::Byte,
+                "value_cache must be uint8 for FP8 path");
+    TORCH_CHECK(k_scale > 0, "k_scale must be positive for FP8 path");
+    TORCH_CHECK(v_scale > 0, "v_scale must be positive for FP8 path");
+  }
+
+  const float k_inv = is_fp8 ? 1.0f / static_cast<float>(k_scale) : 0.0f;
+  const float v_inv = is_fp8 ? 1.0f / static_cast<float>(v_scale) : 0.0f;
+
   const int64_t token_num = key.size(0);
-  const int64_t key_token_num_stride = key.stride(0);
-  const int64_t value_token_num_stride = value.stride(0);
-  const int64_t head_num = value.size(1);
-  const int64_t key_head_num_stride = key.stride(1);
-  const int64_t value_head_num_stride = value.stride(1);
+  const int64_t head_num = key.size(1);
+  const int64_t head_dim = key.size(2);
   const int64_t num_blocks = key_cache.size(0);
   const int64_t num_blocks_stride = key_cache.stride(0);
   const int64_t cache_head_num_stride = key_cache.stride(1);
   const int64_t block_size = key_cache.size(2);
   const int64_t block_size_stride = key_cache.stride(2);
-  const int64_t head_dim = key.size(-1);
 
   cpu_attention::ISA isa_tag = [&]() {
     if (isa == "amx") {
@@ -104,21 +133,33 @@ void cpu_attn_reshape_and_cache(
       return cpu_attention::ISA::NEON;
     } else if (isa == "vxe") {
       return cpu_attention::ISA::VXE;
+    } else if (isa == "rvv") {
+      return cpu_attention::ISA::RVV;
+    } else if (isa == "vsx") {
+      return cpu_attention::ISA::VSX;
     } else {
       TORCH_CHECK(false, "Invalid ISA type: " + isa);
     }
   }();
 
+  if (is_fp8) {
+    TORCH_CHECK(isa_tag == cpu_attention::ISA::AMX ||
+                    isa_tag == cpu_attention::ISA::VEC,
+                "FP8 KV cache is only supported on x86 (AMX/VEC) ISA");
+  }
+
   VLLM_DISPATCH_FLOATING_TYPES(
       key.scalar_type(), "cpu_attn_reshape_and_cache", [&]() {
-        CPU_ATTN_DISPATCH(head_dim, isa_tag, [&]() {
+        CPU_ATTN_DISPATCH(head_dim, isa_tag, kv_cache_idx, [&]() {
+          using kv_t = typename attn_impl::kv_cache_t;
           attn_impl::reshape_and_cache(
               key.data_ptr<scalar_t>(), value.data_ptr<scalar_t>(),
-              key_cache.data_ptr<scalar_t>(), value_cache.data_ptr<scalar_t>(),
-              slot_mapping.data_ptr<int64_t>(), token_num, key_token_num_stride,
-              value_token_num_stride, head_num, key_head_num_stride,
-              value_head_num_stride, num_blocks, num_blocks_stride,
-              cache_head_num_stride, block_size, block_size_stride);
+              reinterpret_cast<kv_t*>(key_cache.data_ptr()),
+              reinterpret_cast<kv_t*>(value_cache.data_ptr()),
+              slot_mapping.data_ptr<int64_t>(), token_num, key.stride(0),
+              value.stride(0), head_num, key.stride(1), value.stride(1),
+              num_blocks, num_blocks_stride, cache_head_num_stride, block_size,
+              block_size_stride, k_inv, v_inv);
         });
       });
 }
@@ -137,13 +178,26 @@ void cpu_attention_with_kv_cache(
     const int64_t sliding_window_left, const int64_t sliding_window_right,
     const torch::Tensor& block_table,  // [num_tokens, max_block_num]
     const double softcap, const torch::Tensor& scheduler_metadata,
-    const std::optional<torch::Tensor>& s_aux  // [num_heads]
-) {
+    const std::optional<torch::Tensor>& s_aux,  // [num_heads]
+    const double k_scale = 1.0, const double v_scale = 1.0,
+    const std::string& kv_cache_dtype = "auto") {
   TORCH_CHECK_EQ(query.dim(), 3);
   TORCH_CHECK_EQ(query.stride(2), 1);
   TORCH_CHECK_EQ(key_cache.dim(), 4);
   TORCH_CHECK_EQ(value_cache.dim(), 4);
 
+  const int64_t kv_cache_idx =
+      static_cast<int64_t>(parse_fp8_kv_dtype(kv_cache_dtype));
+  const bool is_fp8 = (kv_cache_idx != 0);
+  if (is_fp8) {
+    TORCH_CHECK(key_cache.scalar_type() == at::ScalarType::Byte,
+                "key_cache must be uint8 for FP8 path");
+    TORCH_CHECK(value_cache.scalar_type() == at::ScalarType::Byte,
+                "value_cache must be uint8 for FP8 path");
+    TORCH_CHECK(k_scale > 0, "k_scale must be positive for FP8 path");
+    TORCH_CHECK(v_scale > 0, "v_scale must be positive for FP8 path");
+  }
+
   cpu_attention::AttentionInput input;
   input.metadata = reinterpret_cast<cpu_attention::AttentionMetadata*>(
       scheduler_metadata.data_ptr());
@@ -165,25 +219,32 @@ void cpu_attention_with_kv_cache(
   input.block_table = block_table.data_ptr<int32_t>();
   input.alibi_slopes =
       alibi_slopes.has_value() ? alibi_slopes->data_ptr<float>() : nullptr;
-  // For now sink must be bf16
   input.s_aux = s_aux.has_value() ? s_aux->data_ptr<c10::BFloat16>() : nullptr;
   input.scale = scale;
   input.causal = causal;
   input.sliding_window_left = sliding_window_left;
   input.sliding_window_right = sliding_window_right;
   if (input.causal) {
-    // to make boundary calculation easier
     input.sliding_window_right = 0;
   }
-  float softcap_fp32 = softcap;
-  input.softcap = softcap_fp32;
+  input.softcap = static_cast<float>(softcap);
+
+  if (is_fp8) {
+    input.k_scale_fp8 = static_cast<float>(k_scale);
+    input.v_scale_fp8 = static_cast<float>(v_scale);
+    TORCH_CHECK(input.metadata->isa == cpu_attention::ISA::AMX ||
+                    input.metadata->isa == cpu_attention::ISA::VEC,
+                "FP8 KV cache is only supported on x86 (AMX/VEC) ISA");
+  }
 
   VLLM_DISPATCH_FLOATING_TYPES(
       query.scalar_type(), "cpu_attention_with_kv_cache", [&]() {
-        CPU_ATTN_DISPATCH(query.size(2), input.metadata->isa, [&]() {
-          TORCH_CHECK_EQ(input.block_size % attn_impl::BlockSizeAlignment, 0);
-          cpu_attention::AttentionMainLoop<attn_impl> mainloop;
-          mainloop(&input);
-        });
+        CPU_ATTN_DISPATCH(
+            query.size(2), input.metadata->isa, kv_cache_idx, [&]() {
+              TORCH_CHECK_EQ(input.block_size % attn_impl::BlockSizeAlignment,
+                             0);
+              cpu_attention::AttentionMainLoop<attn_impl> mainloop;
+              mainloop(&input);
+            });
       });
 }
diff --git a/csrc/cpu/cpu_attn_amx.hpp b/csrc/cpu/cpu_attn_amx.hpp
index 1c8644d52329..6a0341085dce 100644
--- a/csrc/cpu/cpu_attn_amx.hpp
+++ b/csrc/cpu/cpu_attn_amx.hpp
@@ -1,6 +1,7 @@
 #ifndef CPU_ATTN_AMX_HPP
 #define CPU_ATTN_AMX_HPP
 
+#include "cpu_attn_fp8.hpp"
 #include "cpu_attn_impl.hpp"
 
 namespace cpu_attention {
@@ -21,9 +22,10 @@ typedef struct __tile_config {
 // 2-2-4 pattern, for 16 < m <= 32
 // TILE 0, 1: load A matrix, row num should be 16, m - 16
 // TILE 2, 3: load B matrix, row num should be 16
-// TILE 4, 5, 6, 7: store results C matrix, row num should be 16, 16, m - 16, m
-// - 16
-template <typename kv_cache_t>
+// TILE 4, 5, 6, 7: store results C matrix, row num should be 16, 16,
+// m - 16, m - 16
+// q_buffer_t: A (Q/P) tile type; kv_cache_t: B (K/V cache) tile type.
+template <typename q_buffer_t, typename kv_cache_t>
 class TileGemm224 {
  public:
   template <AttentionGemmPhase phase, int32_t k_size>
@@ -42,13 +44,56 @@ class TileGemm224 {
   }
 };
 
-template <>
-class TileGemm224<c10::BFloat16> {
+// Dequantize one FP8 tile (AMX_TILE_ROW_NUM rows x 32 cols) to BF16.
+template <typename kv_cache_t>
+FORCE_INLINE void deq_tile_amx(const uint8_t* src, c10::BFloat16* dst) {
+  for (int r = 0; r < AMX_TILE_ROW_NUM; ++r) {
+    if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e4m3fn>) {
+      vec_op::BF16Vec32(src + r * 32, vec_op::fp8_bf16_e4m3_tag{})
+          .save(dst + r * 32);
+    } else {
+      vec_op::BF16Vec32(src + r * 32, vec_op::fp8_bf16_e5m2_tag{})
+          .save(dst + r * 32);
+    }
+  }
+}
+
+// For FP8: dequant src into scratch and return scratch.
+// For BF16: return src directly (scratch is unused; the compiler elides it).
+template <typename kv_cache_t>
+FORCE_INLINE const c10::BFloat16* prepare_b_tile(const kv_cache_t* src,
+                                                 c10::BFloat16* scratch) {
+  if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+                std::is_same_v<kv_cache_t, c10::Float8_e5m2>) {
+    deq_tile_amx<kv_cache_t>(reinterpret_cast<const uint8_t*>(src), scratch);
+    return scratch;
+  } else {
+    return reinterpret_cast<const c10::BFloat16*>(src);
+  }
+}
+
+// Handles both BF16 and FP8 KV cache (2-2-4 pattern).
+template <typename kv_cache_t>
+class TileGemm224<c10::BFloat16, kv_cache_t> {
+  static_assert(std::is_same_v<kv_cache_t, c10::BFloat16> ||
+                    std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+                    std::is_same_v<kv_cache_t, c10::Float8_e5m2>,
+                "kv_cache_t must be BFloat16, Float8_e4m3fn, or Float8_e5m2");
+
+  static constexpr bool fp8_kv =
+      std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+      std::is_same_v<kv_cache_t, c10::Float8_e5m2>;
+
+  static constexpr int64_t tile_elems = AMX_TILE_BYTES / sizeof(c10::BFloat16);
+  // BF16 path: scratch_elems=1 so the scratch array is eliminated by the
+  // compiler.
+  static constexpr int64_t scratch_elems = fp8_kv ? tile_elems : 1;
+
  public:
   template <AttentionGemmPhase phase, int32_t k_size>
   FORCE_INLINE static void gemm(const int32_t m_size,
                                 c10::BFloat16* __restrict__ a_tile,
-                                c10::BFloat16* __restrict__ b_tile,
+                                kv_cache_t* __restrict__ b_tile,
                                 float* __restrict__ c_tile, const int64_t lda,
                                 const int64_t ldb, const int64_t ldc,
                                 const int32_t block_size,
@@ -56,6 +101,7 @@ class TileGemm224<c10::BFloat16> {
                                 const bool accum_c) {
     const int32_t k_times =
         dynamic_k_size / (AMX_TILE_ROW_NUM * 4 / sizeof(c10::BFloat16));
+
     c10::BFloat16* __restrict__ a_tile_0 = a_tile;
     c10::BFloat16* __restrict__ a_tile_1 = a_tile + lda * AMX_TILE_ROW_NUM;
     const int64_t a_tile_stride = [&]() {
@@ -70,8 +116,8 @@ class TileGemm224<c10::BFloat16> {
       }
     }();
 
-    c10::BFloat16* __restrict__ b_tile_2 = b_tile;
-    c10::BFloat16* __restrict__ b_tile_3 = [&]() {
+    kv_cache_t* __restrict__ b_tile_2 = b_tile;
+    kv_cache_t* __restrict__ b_tile_3 = [&]() {
       if constexpr (phase == AttentionGemmPhase::QK) {
         // k_cache is prepacked
         return b_tile + (k_size * AMX_TILE_ROW_BYTES / 4);
@@ -106,11 +152,16 @@ class TileGemm224<c10::BFloat16> {
       _tile_zero(7);
     }
 
+    alignas(64) c10::BFloat16 scratch_2[scratch_elems];
+    alignas(64) c10::BFloat16 scratch_3[scratch_elems];
     for (int32_t k = 0; k < k_times; ++k) {
+      const c10::BFloat16* load_2 = prepare_b_tile(b_tile_2, scratch_2);
+      const c10::BFloat16* load_3 = prepare_b_tile(b_tile_3, scratch_3);
+
       _tile_loadd(0, a_tile_0, a_tile_stride);
-      _tile_stream_loadd(2, b_tile_2, b_tile_stride);
+      _tile_stream_loadd(2, const_cast<c10::BFloat16*>(load_2), b_tile_stride);
       _tile_dpbf16ps(4, 0, 2);
-      _tile_stream_loadd(3, b_tile_3, b_tile_stride);
+      _tile_stream_loadd(3, const_cast<c10::BFloat16*>(load_3), b_tile_stride);
       _tile_dpbf16ps(5, 0, 3);
       _tile_loadd(1, a_tile_1, a_tile_stride);
       _tile_dpbf16ps(6, 1, 2);
@@ -154,13 +205,13 @@ class TileGemm224<c10::BFloat16> {
 };
 
 // 1-2-2 pattern, for 0 < m <= 16
-// TILE 0, (1): load A matrix, use extra 1 tile for prefetch, row num should be
-// m, m
-// TILE 2, 3, (4, 5): load B matrix, use extra 2 tiles for prefetch, row
-// num should be 16
-// TILE 6, 7, (6, 7): store results C matrix, row num should be
-// m
-template <typename kv_cache_t>
+// TILE 0, (1): load A matrix, use extra 1 tile for prefetch, row num should
+// be m, m
+// TILE 2, 3, (4, 5): load B matrix, use extra 2 tiles for prefetch, row num
+// should be 16
+// TILE 6, 7: store results C matrix, row num should be m
+// q_buffer_t: A (Q/P) tile type; kv_cache_t: B (K/V cache) tile type.
+template <typename q_buffer_t, typename kv_cache_t>
 class TileGemm122 {
  public:
   template <AttentionGemmPhase phase, int32_t k_size>
@@ -179,13 +230,26 @@ class TileGemm122 {
   }
 };
 
-template <>
-class TileGemm122<c10::BFloat16> {
+// Handles both BF16 and FP8 KV cache (1-2-2 pattern).
+template <typename kv_cache_t>
+class TileGemm122<c10::BFloat16, kv_cache_t> {
+  static_assert(std::is_same_v<kv_cache_t, c10::BFloat16> ||
+                    std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+                    std::is_same_v<kv_cache_t, c10::Float8_e5m2>,
+                "kv_cache_t must be BFloat16, Float8_e4m3fn, or Float8_e5m2");
+
+  static constexpr bool fp8_kv =
+      std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+      std::is_same_v<kv_cache_t, c10::Float8_e5m2>;
+
+  static constexpr int64_t tile_elems = AMX_TILE_BYTES / sizeof(c10::BFloat16);
+  static constexpr int64_t scratch_elems = fp8_kv ? tile_elems : 1;
+
  public:
   template <AttentionGemmPhase phase, int32_t k_size>
   FORCE_INLINE static void gemm(const int32_t m_size,
                                 c10::BFloat16* __restrict__ a_tile,
-                                c10::BFloat16* __restrict__ b_tile,
+                                kv_cache_t* __restrict__ b_tile,
                                 float* __restrict__ c_tile, const int64_t lda,
                                 const int64_t ldb, const int64_t ldc,
                                 const int32_t block_size,
@@ -215,21 +279,19 @@ class TileGemm122<c10::BFloat16> {
       }
     }();
 
-    c10::BFloat16* __restrict__ b_tile_2 = b_tile;
-    c10::BFloat16* __restrict__ b_tile_3 = [&]() {
+    kv_cache_t* __restrict__ b_tile_2 = b_tile;
+    kv_cache_t* __restrict__ b_tile_3 = [&]() {
       if constexpr (phase == AttentionGemmPhase::QK) {
-        // k_cache is prepacked
         return b_tile + (k_size * AMX_TILE_ROW_BYTES / 4);
       } else if constexpr (phase == AttentionGemmPhase::PV) {
-        // v_cache is prepacked
         return b_tile + (block_size * AMX_TILE_ROW_BYTES / 4);
       } else {
         TORCH_CHECK(false, "Unreachable");
       }
     }();
-    c10::BFloat16* __restrict__ b_tile_4 =
+    kv_cache_t* __restrict__ b_tile_4 =
         b_tile_2 + AMX_TILE_BYTES / sizeof(c10::BFloat16);
-    c10::BFloat16* __restrict__ b_tile_5 =
+    kv_cache_t* __restrict__ b_tile_5 =
         b_tile_3 + AMX_TILE_BYTES / sizeof(c10::BFloat16);
     int64_t b_stride = AMX_TILE_ROW_BYTES;
 
@@ -250,16 +312,25 @@ class TileGemm122<c10::BFloat16> {
       _tile_zero(7);
     }
 
+    alignas(64) c10::BFloat16 scratch_2[scratch_elems];
+    alignas(64) c10::BFloat16 scratch_3[scratch_elems];
+    alignas(64) c10::BFloat16 scratch_4[scratch_elems];
+    alignas(64) c10::BFloat16 scratch_5[scratch_elems];
     for (int32_t k = 0; k < k_group_times; ++k) {
+      const c10::BFloat16* load_2 = prepare_b_tile(b_tile_2, scratch_2);
+      const c10::BFloat16* load_3 = prepare_b_tile(b_tile_3, scratch_3);
+      const c10::BFloat16* load_4 = prepare_b_tile(b_tile_4, scratch_4);
+      const c10::BFloat16* load_5 = prepare_b_tile(b_tile_5, scratch_5);
+
       _tile_loadd(0, a_tile_0, a_tile_stride);
-      _tile_stream_loadd(2, b_tile_2, b_stride);
+      _tile_stream_loadd(2, const_cast<c10::BFloat16*>(load_2), b_stride);
       _tile_dpbf16ps(6, 0, 2);
-      _tile_stream_loadd(3, b_tile_3, b_stride);
+      _tile_stream_loadd(3, const_cast<c10::BFloat16*>(load_3), b_stride);
       _tile_dpbf16ps(7, 0, 3);
       _tile_loadd(1, a_tile_1, a_tile_stride);
-      _tile_stream_loadd(4, b_tile_4, b_stride);
+      _tile_stream_loadd(4, const_cast<c10::BFloat16*>(load_4), b_stride);
       _tile_dpbf16ps(6, 1, 4);
-      _tile_stream_loadd(5, b_tile_5, b_stride);
+      _tile_stream_loadd(5, const_cast<c10::BFloat16*>(load_5), b_stride);
       _tile_dpbf16ps(7, 1, 5);
 
       // update ptrs
@@ -279,10 +350,13 @@ class TileGemm122<c10::BFloat16> {
     }
 
     if (has_tail) {
+      const c10::BFloat16* load_2 = prepare_b_tile(b_tile_2, scratch_2);
+      const c10::BFloat16* load_3 = prepare_b_tile(b_tile_3, scratch_3);
+
       _tile_loadd(0, a_tile_0, a_tile_stride);
-      _tile_stream_loadd(2, b_tile_2, b_stride);
+      _tile_stream_loadd(2, const_cast<c10::BFloat16*>(load_2), b_stride);
       _tile_dpbf16ps(6, 0, 2);
-      _tile_stream_loadd(3, b_tile_3, b_stride);
+      _tile_stream_loadd(3, const_cast<c10::BFloat16*>(load_3), b_stride);
       _tile_dpbf16ps(7, 0, 3);
     }
 
@@ -302,21 +376,25 @@ class TileGemm122<c10::BFloat16> {
     _tile_loadconfig(&config);
   }
 };
+
 }  // namespace
 
-template <typename scalar_t, int64_t head_dim>
-class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::AMX, scalar_t, head_dim, kv_cache_scalar_t> {
+  static constexpr bool fp8_kv =
+      std::is_same_v<kv_cache_scalar_t, c10::Float8_e4m3fn> ||
+      std::is_same_v<kv_cache_scalar_t, c10::Float8_e5m2>;
+
  public:
   using query_t = scalar_t;
   using q_buffer_t = scalar_t;
-  using kv_cache_t = scalar_t;
+  using kv_cache_t = kv_cache_scalar_t;
   using logits_buffer_t = float;
   using partial_output_buffer_t = float;
   using prob_buffer_t = scalar_t;
 
   constexpr static int64_t BlockSizeAlignment =
-      AMX_TILE_ROW_BYTES /
-      sizeof(kv_cache_t);  // KV token num unit of QK and PV phases
+      32;  // AMX_TILE_ROW_NUM = 16 tokens/tile; 32 = 2 tiles
   constexpr static int64_t HeadDimAlignment =
       2 * (AMX_TILE_ROW_BYTES / 4);  // headdim num unit of PV phase
   constexpr static int64_t MaxQHeadNumPerIteration = 32;
@@ -324,6 +402,9 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
   constexpr static ISA ISAType = ISA::AMX;
   constexpr static bool scale_on_logits = true;
 
+  float k_scale = 1.0f;
+  float v_scale = 1.0f;
+
  public:
   AttentionImpl() : current_q_head_num_(0) {
     // Use all columns in AMX tiles
@@ -332,21 +413,50 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
 
   ~AttentionImpl() { _tile_release(); }
 
+  void init_from_input(const AttentionInput* input) {
+    if constexpr (fp8_kv) {
+      k_scale = input->k_scale_fp8;
+      v_scale = input->v_scale_fp8;
+    }
+  }
+
+  float get_output_v_scale() const noexcept {
+    if constexpr (fp8_kv) {
+      // AMX dequant places FP8 payload into a BF16 field (exponent bias 127).
+      // Correction = 2^(127 - FP8_bias): E4M3 bias=7 → 2^120, E5M2 bias=15 →
+      // 2^112.
+      constexpr float bias =
+          std::is_same_v<kv_cache_t, c10::Float8_e5m2> ? 0x1p112f : 0x1p120f;
+      return v_scale * bias;
+    }
+    return 1.0f;
+  }
+
   template <template <typename tile_gemm_t> typename attention>
   FORCE_INLINE void execute_attention(DEFINE_CPU_ATTENTION_PARAMS) {
+    if constexpr (fp8_kv) {
+      // Same bias correction as get_output_v_scale: AMX FP8→BF16 dequant
+      // shifts the exponent bias from FP8 to BF16 (127), so we multiply by
+      // 2^(127-FP8_bias) to recover the true value. E4M3: 2^120, E5M2: 2^112.
+      const float bias =
+          std::is_same_v<kv_cache_t, c10::Float8_e5m2> ? 0x1p112f : 0x1p120f;
+      scale *= k_scale * bias;
+    }
     if (q_head_num > AMX_TILE_ROW_NUM) {
       if (q_head_num != current_q_head_num_) {
         current_q_head_num_ = q_head_num;
-        TileGemm224<kv_cache_t>::init_tile_config(q_head_num, amx_tile_config_);
+        TileGemm224<q_buffer_t, kv_cache_t>::init_tile_config(q_head_num,
+                                                              amx_tile_config_);
       }
-      attention<TileGemm224<kv_cache_t>> attention_iteration;
+      attention<TileGemm224<q_buffer_t, kv_cache_t>> attention_iteration;
       attention_iteration(CPU_ATTENTION_PARAMS);
     } else {
       if (q_head_num != current_q_head_num_) {
         current_q_head_num_ = q_head_num;
-        TileGemm122<kv_cache_t>::init_tile_config(q_head_num, amx_tile_config_);
+        TileGemm122<q_buffer_t, kv_cache_t>::init_tile_config(q_head_num,
+                                                              amx_tile_config_);
       }
-      attention<TileGemm122<kv_cache_t>> attention_iteration;
+      attention<TileGemm122<q_buffer_t, kv_cache_t>> attention_iteration;
       attention_iteration(CPU_ATTENTION_PARAMS);
     }
   }
@@ -411,13 +521,26 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
   // reshape KV to AMX friendly layout
   static void reshape_and_cache(
       const scalar_t* __restrict__ key, const scalar_t* __restrict__ value,
-      scalar_t* __restrict__ key_cache, scalar_t* __restrict__ value_cache,
+      kv_cache_t* __restrict__ key_cache, kv_cache_t* __restrict__ value_cache,
       const int64_t* __restrict__ slot_mapping, const int64_t token_num,
       const int64_t key_token_num_stride, const int64_t value_token_num_stride,
       const int64_t head_num, const int64_t key_head_num_stride,
       const int64_t value_head_num_stride, const int64_t num_blocks,
       const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
-      const int64_t block_size, const int64_t block_size_stride) {
+      const int64_t block_size, const int64_t block_size_stride,
+      const float k_inv = 0.0f, const float v_inv = 0.0f) {
+    if constexpr (fp8_kv) {
+      constexpr auto qfn = select_fp8_quant_fn<kv_cache_t>();
+      reshape_and_cache_fp8_amx_impl<scalar_t, qfn>(
+          key, value, reinterpret_cast<uint8_t*>(key_cache),
+          reinterpret_cast<uint8_t*>(value_cache), slot_mapping, token_num,
+          head_num, head_dim, block_size, key_token_num_stride,
+          key_head_num_stride, value_token_num_stride, value_head_num_stride,
+          num_blocks_stride, cache_head_num_stride, num_blocks_stride,
+          cache_head_num_stride, k_inv, v_inv);
+      return;
+    }
+
     // For AMX 2D tiles, size of each line is 64 bytes
     constexpr int64_t amx_tile_row_size = AMX_TILE_ROW_BYTES;
     // For AMX B matrix, N always is 16
@@ -426,6 +549,9 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
     // For now suppose block_size is divisible by amx_tile_column_num
     TORCH_CHECK_EQ(block_size % amx_b_tile_k_size, 0);
 
+    scalar_t* __restrict__ kc = reinterpret_cast<scalar_t*>(key_cache);
+    scalar_t* __restrict__ vc = reinterpret_cast<scalar_t*>(value_cache);
+
 #pragma omp parallel for collapse(2)
     for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
       for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
@@ -453,8 +579,7 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
           constexpr int64_t quadword_num_per_group =
               token_num_per_group * quadword_num;
           int32_t* key_cache_start_ptr =
-              reinterpret_cast<int32_t*>(key_cache +
-                                         block_idx * num_blocks_stride +
+              reinterpret_cast<int32_t*>(kc + block_idx * num_blocks_stride +
                                          head_idx * cache_head_num_stride) +
               group_idx * quadword_num_per_group + group_offset;
 
@@ -483,7 +608,7 @@ class AttentionImpl<ISA::AMX, scalar_t, head_dim> {
                                             token_idx * value_token_num_stride +
                                             head_idx * value_head_num_stride;
           scalar_t* value_cache_start_ptr =
-              value_cache + block_idx * num_blocks_stride +
+              vc + block_idx * num_blocks_stride +
               head_idx * cache_head_num_stride +
               sub_group_idx * token_num_per_sub_group * amx_b_tile_n_size +
               sub_group_offset;
diff --git a/csrc/cpu/cpu_attn_fp8.hpp b/csrc/cpu/cpu_attn_fp8.hpp
new file mode 100644
index 000000000000..764b6ed7f84a
--- /dev/null
+++ b/csrc/cpu/cpu_attn_fp8.hpp
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#pragma once
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#include "cpu/utils.hpp"
+
+typedef uint32_t __attribute__((__may_alias__)) u32_alias_t;
+typedef uint16_t __attribute__((__may_alias__)) u16_alias_t;
+typedef float __attribute__((__may_alias__)) f32_alias_t;
+
+// Reference scalar dequant — used to verify vectorized AMX dequant.
+inline float fp8e4m3_to_float_scalar(uint8_t b, float scale) noexcept {
+  // NaN encoding in E4M3
+  if ((b & 0x7F) == 0x7F) return std::numeric_limits<float>::quiet_NaN();
+  uint32_t b_u32 = static_cast<uint32_t>(b);
+  uint32_t sign = (b_u32 & 0x80) << 24;
+  uint32_t payload = (b_u32 & 0x7F) << 20;
+  uint32_t bits = sign | payload;
+  float b_f32_unscaled = *reinterpret_cast<const f32_alias_t*>(&bits);
+  float b_f32_scaled = b_f32_unscaled * scale * 0x1p120f;
+  return b_f32_scaled;
+}
+
+inline uint8_t float_to_fp8e4m3_scalar(float v, float inv_scale) noexcept {
+  v *= inv_scale;
+  constexpr float fp8_max = 448.0f;
+  v = std::max(-fp8_max, std::min(fp8_max, v));
+  if (v == 0.0f) return 0;
+
+  // Inverse mapping of fp8e4m3_to_float_scalar: shift the effective exponent
+  // bias from fp32 (127) back to fp8 e4m3 (7), then pack sign|payload.
+  float v_f32_unscaled = v * 0x1p-120f;
+  uint32_t bits = *reinterpret_cast<const u32_alias_t*>(&v_f32_unscaled);
+  uint8_t sign = static_cast<uint8_t>((bits >> 24) & 0x80);
+  uint8_t payload = static_cast<uint8_t>((bits >> 20) & 0x7F);
+  if (payload == 0) return sign;
+  payload = std::min<uint8_t>(payload, 0x7E);  // keep 0x7F as NaN encoding
+  return static_cast<uint8_t>(sign | payload);
+}
+
+// ---------------------------------------------------------------------------
+// AMX reshape impl — parameterised on the quantisation function.
+// Writes key/value into uint8 FP8 KV cache using the AMX tile-friendly layout.
+// K: halfword-packed (2 FP8 per uint16, token_num_per_group=16).
+// V: sub-group packing (token_num_per_sub_group=2, head_elems_per_group=16).
+// block_size must be divisible by 32.
+// ---------------------------------------------------------------------------
+template <typename scalar_t, uint8_t (*quant_fn)(float, float)>
+inline void reshape_and_cache_fp8_amx_impl(
+    const scalar_t* key_ptr, const scalar_t* value_ptr, uint8_t* key_cache_ptr,
+    uint8_t* value_cache_ptr, const int64_t* slot_ptr, int64_t token_num,
+    int64_t head_num, int64_t head_dim, int64_t block_size, int64_t k_stride0,
+    int64_t k_stride1, int64_t v_stride0, int64_t v_stride1, int64_t kc_stride0,
+    int64_t kc_stride1, int64_t vc_stride0, int64_t vc_stride1, float k_inv,
+    float v_inv) {
+  constexpr int64_t token_num_per_group = 16;  // AMX_TILE_ROW_NUM
+  const int64_t halfword_num = head_dim / 2;   // 2 FP8 per uint16
+  const int64_t halfword_num_per_group = token_num_per_group * halfword_num;
+  constexpr int64_t head_elems_per_group = 16;
+  constexpr int64_t token_num_per_sub_group = 2;  // = 4 / sizeof(BF16)
+  const int64_t group_num = head_dim / head_elems_per_group;
+  const int64_t group_size = block_size * head_elems_per_group;
+
+#pragma omp parallel for collapse(2) schedule(static)
+  for (int64_t tok = 0; tok < token_num; ++tok) {
+    for (int64_t h = 0; h < head_num; ++h) {
+      const int64_t slot = slot_ptr[tok];
+      if (slot < 0) continue;
+      const int64_t block_idx = slot / block_size;
+      const int64_t block_offset = slot % block_size;
+
+      // Key: halfword-packed, 2 FP8 per uint16
+      {
+        const scalar_t* ksrc = key_ptr + tok * k_stride0 + h * k_stride1;
+        const int64_t group_idx = block_offset / token_num_per_group;
+        const int64_t group_offset = block_offset % token_num_per_group;
+        uint16_t* kdst =
+            reinterpret_cast<uint16_t*>(key_cache_ptr + block_idx * kc_stride0 +
+                                        h * kc_stride1) +
+            group_idx * halfword_num_per_group + group_offset;
+        for (int64_t j = 0; j < halfword_num; ++j) {
+          uint8_t fp8_0 = quant_fn(static_cast<float>(ksrc[j * 2]), k_inv);
+          uint8_t fp8_1 = quant_fn(static_cast<float>(ksrc[j * 2 + 1]), k_inv);
+          uint8_t bytes[2] = {fp8_0, fp8_1};
+          uint16_t hw = *reinterpret_cast<const u16_alias_t*>(bytes);
+          kdst[j * token_num_per_group] = hw;
+        }
+      }
+
+      // Value: sub-group packing (token_num_per_sub_group = 2)
+      {
+        const scalar_t* vsrc = value_ptr + tok * v_stride0 + h * v_stride1;
+        const int64_t sub_group_idx = block_offset / token_num_per_sub_group;
+        const int64_t sub_group_offset = block_offset % token_num_per_sub_group;
+        uint8_t* vdst =
+            value_cache_ptr + block_idx * vc_stride0 + h * vc_stride1 +
+            sub_group_idx * token_num_per_sub_group * head_elems_per_group +
+            sub_group_offset;
+        for (int64_t i = 0; i < group_num; ++i) {
+          for (int64_t j = 0; j < head_elems_per_group; ++j)
+            vdst[j * token_num_per_sub_group] =
+                quant_fn(static_cast<float>(vsrc[j]), v_inv);
+          vsrc += head_elems_per_group;
+          vdst += group_size;
+        }
+      }
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// FP8 E5M2 scalar helpers
+// ---------------------------------------------------------------------------
+
+// Reference scalar dequant — used to verify vectorized AMX dequant.
+// FP8 E5M2: s[7] e[6:2] m[1:0], exponent bias = 15 (same as FP16).
+// Byte b → FP16 bits = b << 8 (no bias correction needed).
+inline float fp8e5m2_to_float_scalar(uint8_t b, float scale) noexcept {
+  const uint8_t exp_bits = (b >> 2) & 0x1F;
+  const uint8_t mant_bits = b & 0x03;
+  // NaN: exp=11111, mant!=00
+  if (exp_bits == 0x1F && mant_bits != 0)
+    return std::numeric_limits<float>::quiet_NaN();
+  const uint32_t sign = static_cast<uint32_t>(b & 0x80) << 24;
+  if (exp_bits == 0x1F)
+    return sign ? -std::numeric_limits<float>::infinity()
+                : std::numeric_limits<float>::infinity();
+  if (exp_bits == 0) {  // subnormal: (-1)^s * 2^-14 * mant/4
+    if (mant_bits == 0) return 0.0f;
+    float v = mant_bits * 0x1p-16f;
+    return (sign ? -v : v) * scale;
+  }
+  // Normal: FP32 exp = exp5 - 15 + 127, mantissa top 2 bits
+  uint32_t fp32_bits = sign |
+                       ((static_cast<uint32_t>(exp_bits) - 15 + 127) << 23) |
+                       (static_cast<uint32_t>(mant_bits) << 21);
+  float val = *reinterpret_cast<const f32_alias_t*>(&fp32_bits);
+  return val * scale;
+}
+
+inline uint8_t float_to_fp8e5m2_scalar(float v, float inv_scale) noexcept {
+  v *= inv_scale;
+  constexpr float fp8_e5m2_max = 57344.0f;
+  v = std::max(-fp8_e5m2_max, std::min(fp8_e5m2_max, v));
+  if (v == 0.0f) return 0;
+  uint32_t bits = *reinterpret_cast<const u32_alias_t*>(&v);
+  const uint8_t sign = static_cast<uint8_t>((bits >> 24) & 0x80);
+  const int32_t exp_fp32 = static_cast<int32_t>((bits >> 23) & 0xFF) - 127;
+  const uint8_t mant2 = static_cast<uint8_t>((bits >> 21) & 0x03);
+  if (exp_fp32 < -14) {  // subnormal in E5M2
+    const int shift = -14 - exp_fp32;
+    if (shift + 21 >= 32)
+      return sign;  // underflow: too small for E5M2 subnormal
+    const uint32_t m = (0x800000u | (bits & 0x7FFFFFu)) >> (shift + 21);
+    return sign | static_cast<uint8_t>(std::min<uint32_t>(m, 3u));
+  }
+  const uint8_t exp5 = static_cast<uint8_t>(exp_fp32 + 15);
+  return sign | (exp5 << 2) | mant2;
+}
+
+// ---------------------------------------------------------------------------
+// Select the FP8 quant function at compile time based on kv_cache_t.
+// ---------------------------------------------------------------------------
+template <typename kv_cache_t>
+constexpr auto select_fp8_quant_fn() {
+  if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e5m2>)
+    return float_to_fp8e5m2_scalar;
+  else
+    return float_to_fp8e4m3_scalar;
+}
+
+// ---------------------------------------------------------------------------
+// VEC reshape impl — parameterised on the quantisation function.
+// Writes key (column-major) and value (row-major) into uint8 FP8 KV cache.
+// The pragma omp must live outside VLLM_DISPATCH_FLOATING_TYPES because
+// #pragma cannot appear inside variadic macro arguments.
+// ---------------------------------------------------------------------------
+template <typename scalar_t, uint8_t (*quant_fn)(float, float)>
+inline void reshape_and_cache_fp8_vec_impl(
+    const scalar_t* key_ptr, const scalar_t* value_ptr, uint8_t* key_cache_ptr,
+    uint8_t* value_cache_ptr, const int64_t* slot_ptr, int64_t token_num,
+    int64_t head_num, int64_t head_dim, int64_t block_size, int64_t k_stride0,
+    int64_t k_stride1, int64_t v_stride0, int64_t v_stride1, int64_t kc_stride0,
+    int64_t kc_stride1, int64_t vc_stride0, int64_t vc_stride1, float k_inv,
+    float v_inv) {
+#pragma omp parallel for collapse(2) schedule(static)
+  for (int64_t tok = 0; tok < token_num; ++tok) {
+    for (int64_t h = 0; h < head_num; ++h) {
+      const int64_t slot = slot_ptr[tok];
+      if (slot < 0) continue;
+      const int64_t block_idx = slot / block_size;
+      const int64_t block_offset = slot % block_size;
+
+      // Key layout: column-major within block
+      const scalar_t* ksrc = key_ptr + tok * k_stride0 + h * k_stride1;
+      uint8_t* kdst = key_cache_ptr + block_idx * kc_stride0 + h * kc_stride1 +
+                      block_offset;
+      for (int64_t i = 0; i < head_dim; ++i)
+        kdst[i * block_size] = quant_fn(static_cast<float>(ksrc[i]), k_inv);
+
+      // Value layout: row-major within block (contiguous head_dim bytes)
+      const scalar_t* vsrc = value_ptr + tok * v_stride0 + h * v_stride1;
+      uint8_t* vdst = value_cache_ptr + block_idx * vc_stride0 +
+                      h * vc_stride1 + block_offset * head_dim;
+      for (int64_t i = 0; i < head_dim; ++i)
+        vdst[i] = quant_fn(static_cast<float>(vsrc[i]), v_inv);
+    }
+  }
+}
diff --git a/csrc/cpu/cpu_attn_impl.hpp b/csrc/cpu/cpu_attn_impl.hpp
index c15799fa950d..2d0859a13dbb 100644
--- a/csrc/cpu/cpu_attn_impl.hpp
+++ b/csrc/cpu/cpu_attn_impl.hpp
@@ -12,10 +12,24 @@
 #include "cpu/utils.hpp"
 
 namespace cpu_attention {
-enum class ISA { AMX, VEC, VEC16, NEON, VXE };
+enum class ISA { AMX, VEC, VEC16, NEON, VXE, RVV, VSX };
 
-template <ISA isa, typename scalar_t, int64_t head_dim>
-class AttentionImpl {};
+// Mirrors csrc/attention/dtype_fp8.cuh Fp8KVCacheDataType exactly.
+enum class Fp8KVCacheDataType {
+  kAuto = 0,
+  kFp8E4M3 = 1,
+  kFp8E5M2 = 2,
+};
+
+struct AttentionInput;
+
+template <ISA isa, typename scalar_t, int64_t head_dim,
+          typename kv_cache_scalar_t = scalar_t>
+class AttentionImpl {
+ public:
+  void init_from_input(const AttentionInput*) {}
+  float get_output_v_scale() const noexcept { return 1.0f; }
+};
 
 struct AttentionWorkItemGroup {
   int32_t req_id;
@@ -147,6 +161,15 @@ struct AttentionMetadata {
       case ISA::NEON:
         ss << "NEON, ";
         break;
+      case ISA::VXE:
+        ss << "VXE, ";
+        break;
+      case ISA::RVV:
+        ss << "RVV, ";
+        break;
+      case ISA::VSX:
+        ss << "VSX, ";
+        break;
     }
     ss << "workitem_group_num: " << workitem_group_num
        << ", reduction_item_num: " << reduction_item_num
@@ -436,7 +459,7 @@ class AttentionScheduler {
     const int64_t kv_len_per_thread =
         (((total_kv_len / thread_num) + kv_len_alignment - 1) /
          kv_len_alignment) *
-        kv_len_alignment * (use_gqa ? input.num_heads_kv : input.num_heads_q);
+        kv_len_alignment;
     std::vector<AttentionWorkItemGroup> workitems;
     std::vector<ReductionWorkItemGroup> reduce_workitems;
     workitems.reserve(1024);
@@ -777,6 +800,9 @@ struct AttentionInput {
   int32_t sliding_window_left;
   int32_t sliding_window_right;
   float softcap;
+  // FP8 KV cache scales (used by FP8 attention implementations)
+  float k_scale_fp8 = 1.0f;
+  float v_scale_fp8 = 1.0f;
 };
 
 #define DEFINE_CPU_ATTENTION_PARAMS                                         \
@@ -1149,7 +1175,11 @@ class AttentionMainLoop {
                        bool use_sink) {
 #ifdef DEFINE_FAST_EXP
       DEFINE_FAST_EXP
+      bool constexpr IsReducedPrecision =
+          std::is_same_v<query_t, c10::BFloat16> ||
+          std::is_same_v<query_t, c10::Half>;
 #endif
+
       using prob_buffer_vec_t = typename VecTypeTrait<prob_buffer_t>::vec_t;
       static_assert(sizeof(prob_buffer_t) <= sizeof(logits_buffer_t));
 
@@ -1198,8 +1228,17 @@ class AttentionMainLoop {
             vec = vec - max_vec;
 
             // compute exp
-#ifdef DEFINE_FAST_EXP
-            vec = fast_exp(vec);
+
+#if defined(DEFINE_FAST_EXP)
+  #ifdef __aarch64__
+            if constexpr (IsReducedPrecision) {
+              vec = fast_exp_f16(vec);
+            } else
+  #endif
+            {
+              vec = fast_exp(vec);
+            }
+
             prob_buffer_vec_t output_vec(vec);
             output_vec.save(curr_prob_buffer_iter);
 #else
@@ -1255,7 +1294,11 @@ class AttentionMainLoop {
                        int32_t kv_tile_token_num, float softcap_scale) {
 #ifdef DEFINE_FAST_EXP
       DEFINE_FAST_EXP
+      bool constexpr IsReducedPrecision =
+          std::is_same_v<query_t, c10::BFloat16> ||
+          std::is_same_v<query_t, c10::Half>;
 #endif
+
       float inv_softcap_scale = 1.0 / softcap_scale;
       vec_op::FP32Vec16 softcap_scale_vec(softcap_scale);
       vec_op::FP32Vec16 inv_softcap_scale_vec(inv_softcap_scale);
@@ -1269,8 +1312,15 @@ class AttentionMainLoop {
           vec_op::FP32Vec16 vec(curr_logits_buffer_iter);
           vec = vec * inv_softcap_scale_vec;
 
-#ifdef DEFINE_FAST_EXP
-          vec = fast_exp(vec);
+#if defined(DEFINE_FAST_EXP)
+  #ifdef __aarch64__
+          if constexpr (IsReducedPrecision) {
+            vec = fast_exp_f16(vec);
+          } else
+  #endif
+          {
+            vec = fast_exp(vec);
+          }
           vec_op::FP32Vec16 inv_vec = ones_vec / vec;
           vec = (vec - inv_vec) / (vec + inv_vec);
 #else
@@ -1347,6 +1397,13 @@ class AttentionMainLoop {
       }
 
       attention_impl_t attn_impl;
+      constexpr bool fp8_kv = std::is_same_v<kv_cache_t, c10::Float8_e4m3fn> ||
+                              std::is_same_v<kv_cache_t, c10::Float8_e5m2>;
+      float output_v_scale = 1.0f;
+      if constexpr (fp8_kv) {
+        attn_impl.init_from_input(input);
+        output_v_scale = attn_impl.get_output_v_scale();
+      }
 
       // general information
       const int32_t q_head_num = input->num_heads;
@@ -1726,7 +1783,7 @@ class AttentionMainLoop {
                                reinterpret_cast<query_t*>(input->output) +
                                    output_buffer_offset,
                                sum_buffer, actual_q_heads_per_kv,
-                               actual_q_token_num, q_head_num);
+                               actual_q_token_num, q_head_num, output_v_scale);
                 } else {
                   const int32_t stride =
                       actual_q_heads_per_kv * split_kv_q_token_num_threshold;
@@ -1796,7 +1853,7 @@ class AttentionMainLoop {
               split_output_buffer,
               reinterpret_cast<query_t*>(input->output) + output_buffer_offset,
               split_sum_buffer, actual_q_heads_per_kv, curr_output_token_num,
-              q_head_num);
+              q_head_num, output_v_scale);
         }
       }
     }
@@ -1920,8 +1977,8 @@ class AttentionMainLoop {
                     query_t* __restrict__ curr_output_buffer,
                     float* __restrict__ sum_buffer,
                     const int32_t q_heads_per_kv,
-                    const int32_t actual_q_token_num,
-                    const int32_t q_head_num) {
+                    const int32_t actual_q_token_num, const int32_t q_head_num,
+                    const float v_scale = 1.0f) {
     // final output
     using output_vec_t = typename VecTypeTrait<query_t>::vec_t;
 
@@ -1935,7 +1992,7 @@ class AttentionMainLoop {
           curr_partial_output_buffer;
       query_t* __restrict__ curr_output_buffer_iter = curr_output_buffer;
       for (int32_t head_idx = 0; head_idx < q_heads_per_kv; ++head_idx) {
-        vec_op::FP32Vec16 inv_sum_scale_vec(1.0 / *curr_sum_buffer);
+        vec_op::FP32Vec16 inv_sum_scale_vec(v_scale / *curr_sum_buffer);
 
         for (int32_t i = 0; i < group_num_per_head; ++i) {
           vec_op::FP32Vec16 vec(curr_partial_output_buffer_iter);
diff --git a/csrc/cpu/cpu_attn_neon.hpp b/csrc/cpu/cpu_attn_neon.hpp
index 3523893c38c5..db4c5df2e88d 100644
--- a/csrc/cpu/cpu_attn_neon.hpp
+++ b/csrc/cpu/cpu_attn_neon.hpp
@@ -248,8 +248,8 @@ class TileGemmNeonFMLA {
 }  // namespace
 
 // this is similar to "ISA::VEC" at the moment
-template <typename scalar_t, int64_t head_dim>
-class AttentionImpl<ISA::NEON, scalar_t, head_dim> {
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::NEON, scalar_t, head_dim, kv_cache_scalar_t> {
  public:
   using query_t = scalar_t;
   using q_buffer_t = float;
@@ -343,7 +343,8 @@ class AttentionImpl<ISA::NEON, scalar_t, head_dim> {
       const int64_t head_num, const int64_t key_head_num_stride,
       const int64_t value_head_num_stride, const int64_t num_blocks,
       const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
-      const int64_t block_size, const int64_t block_size_stride) {
+      const int64_t block_size, const int64_t block_size_stride,
+      const float /*k_inv*/ = 0.0f, const float /*v_inv*/ = 0.0f) {
 #pragma omp parallel for collapse(2)
     for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
       for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
@@ -388,7 +389,7 @@ class AttentionImpl<ISA::NEON, scalar_t, head_dim> {
 #ifdef ARM_BF16_SUPPORT
 // For BF16 on Arm, reuse the BFMMLA kernels with 32-token alignment.
 template <int64_t head_dim>
-class AttentionImpl<ISA::NEON, c10::BFloat16, head_dim>
+class AttentionImpl<ISA::NEON, c10::BFloat16, head_dim, c10::BFloat16>
     : public AttentionImplNEONBFMMLA<BLOCK_SIZE_ALIGNMENT, ISA::NEON,
                                      head_dim> {};
 #endif
diff --git a/csrc/cpu/cpu_attn_neon_bfmmla.hpp b/csrc/cpu/cpu_attn_neon_bfmmla.hpp
index fb133aa13098..4e4578a74f5b 100644
--- a/csrc/cpu/cpu_attn_neon_bfmmla.hpp
+++ b/csrc/cpu/cpu_attn_neon_bfmmla.hpp
@@ -602,7 +602,8 @@ class AttentionImplNEONBFMMLA {
       [[maybe_unused]] const int64_t num_blocks,
       const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
       const int64_t block_size,
-      [[maybe_unused]] const int64_t block_size_stride) {
+      [[maybe_unused]] const int64_t block_size_stride,
+      const float /*k_inv*/ = 0.0f, const float /*v_inv*/ = 0.0f) {
     const int64_t k_block_stride = (head_dim / TILE_K) * K_INNER_STRIDE;
     const int64_t v_pair_stride =
         (block_size / V_TOKENS_PER_ROW_BLOCK) * V_INNER_STRIDE;
diff --git a/csrc/cpu/cpu_attn_rvv.hpp b/csrc/cpu/cpu_attn_rvv.hpp
new file mode 100644
index 000000000000..396cc55c59e6
--- /dev/null
+++ b/csrc/cpu/cpu_attn_rvv.hpp
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+#ifndef CPU_ATTN_RVV_HPP
+#define CPU_ATTN_RVV_HPP
+
+// RVV attention kernel using VLEN-agnostic RVVI() macros from
+// cpu_types_riscv_defs.hpp.  The Mx8 tile GEMM uses 8 FP32 elements
+// per vector (LMUL_256 bits of FP32 data), which maps to:
+//   VLEN=128: m2 (256 bits = 8 x FP32)
+//   VLEN=256: m1 (256 bits = 8 x FP32)
+// Only VLEN=128 and VLEN=256 are supported; other VLENs (512, 1024)
+// and scalar RISC-V builds fall back to VEC/VEC16.
+#if defined(__riscv_v_min_vlen) && \
+    (__riscv_v_min_vlen == 128 || __riscv_v_min_vlen == 256)
+
+  #include "cpu_attn_impl.hpp"
+  #include "cpu_types_riscv_defs.hpp"
+  #include <riscv_vector.h>
+  #include <type_traits>
+
+namespace cpu_attention {
+
+namespace {
+
+  #define BLOCK_SIZE_ALIGNMENT 32
+  #define HEAD_SIZE_ALIGNMENT 32
+  #define MAX_Q_HEAD_NUM_PER_ITER 16
+
+// ============================================================================
+// B-matrix row loading: load 8 elements as FP32
+// ============================================================================
+
+template <typename kv_cache_t>
+FORCE_INLINE fixed_fp32x8_t load_row8_B_as_f32(const kv_cache_t* p);
+
+template <>
+FORCE_INLINE fixed_fp32x8_t load_row8_B_as_f32<float>(const float* p) {
+  return RVVI(__riscv_vle32_v_f32, LMUL_256)(p, 8);
+}
+
+template <>
+FORCE_INLINE fixed_fp32x8_t load_row8_B_as_f32<c10::Half>(const c10::Half* p) {
+  #ifdef __riscv_zvfh
+  fixed_fp16x8_t h = RVVI(__riscv_vle16_v_f16, LMUL_128)(
+      reinterpret_cast<const _Float16*>(p), 8);
+  return RVVI(__riscv_vfwcvt_f_f_v_f32, LMUL_256)(h, 8);
+  #else
+  alignas(16) float tmp[8];
+  for (int i = 0; i < 8; ++i) {
+    tmp[i] = static_cast<float>(p[i]);
+  }
+  return RVVI(__riscv_vle32_v_f32, LMUL_256)(tmp, 8);
+  #endif
+}
+
+template <>
+FORCE_INLINE fixed_fp32x8_t
+load_row8_B_as_f32<c10::BFloat16>(const c10::BFloat16* p) {
+  #ifdef __riscv_zvfbfmin
+  fixed_bf16x8_t bf = RVVI(__riscv_vle16_v_bf16, LMUL_128)(
+      reinterpret_cast<const __bf16*>(p), 8);
+  return RVVI(__riscv_vfwcvtbf16_f_f_v_f32, LMUL_256)(bf, 8);
+  #else
+  fixed_u16x8_t raw = RVVI(__riscv_vle16_v_u16, LMUL_128)(
+      reinterpret_cast<const uint16_t*>(p), 8);
+  fixed_u32x8_t wide = RVVI(__riscv_vzext_vf2_u32, LMUL_256)(raw, 8);
+  fixed_u32x8_t shifted = RVVI(__riscv_vsll_vx_u32, LMUL_256)(wide, 16, 8);
+  return RVVI4(__riscv_vreinterpret_v_u32, LMUL_256, _f32, LMUL_256)(shifted);
+  #endif
+}
+
+// ============================================================================
+// Micro kernel: Mx8 tile, K unrolled by 4, RVV scalar-broadcast FMA
+// ============================================================================
+//
+// RVV has no lane-indexed FMA; instead we load A elements as scalars and
+// use vfmacc_vf (scalar * vector + accumulator).
+//
+// The 8-column tile uses LMUL_256 bits of FP32 data:
+//   VLEN=128: m2 (2 regs per accumulator), M=8 => 18 of 32 regs
+//   VLEN=256: m1 (1 reg  per accumulator), M=8 =>  9 of 32 regs
+
+template <int32_t M, typename kv_cache_t>
+FORCE_INLINE void gemm_micro_rvv_fma_Mx8_Ku4(
+    const float* __restrict A,       // [M x K]
+    const kv_cache_t* __restrict B,  // [K x 8]
+    float* __restrict C,             // [M x 8]
+    int64_t lda, int64_t ldb, int64_t ldc, int32_t K, bool accumulate) {
+  static_assert(1 <= M && M <= 8, "M must be in [1,8]");
+
+  constexpr size_t vl = 8;
+
+  #define ROWS_APPLY(OP) OP(0) OP(1) OP(2) OP(3) OP(4) OP(5) OP(6) OP(7)
+  #define IF_M(i) if constexpr (M > (i))
+
+  #define DECL_A(i) const float* a##i = A + (i) * lda;
+  ROWS_APPLY(DECL_A)
+  #undef DECL_A
+
+  #define DECL_ACC(i) fixed_fp32x8_t acc##i;
+  ROWS_APPLY(DECL_ACC)
+  #undef DECL_ACC
+
+  #define INIT_ACC(i)                                                    \
+    IF_M(i) {                                                            \
+      if (accumulate) {                                                  \
+        acc##i = RVVI(__riscv_vle32_v_f32, LMUL_256)(C + (i) * ldc, vl); \
+      } else {                                                           \
+        acc##i = RVVI(__riscv_vfmv_v_f_f32, LMUL_256)(0.f, vl);          \
+      }                                                                  \
+    }
+  ROWS_APPLY(INIT_ACC)
+  #undef INIT_ACC
+
+  int32_t k = 0;
+
+  for (; k + 3 < K; k += 4) {
+    {
+      fixed_fp32x8_t b =
+          load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 0) * ldb);
+  #define STEP_K0(i)                                                          \
+    IF_M(i) {                                                                 \
+      acc##i = RVVI(__riscv_vfmacc_vf_f32, LMUL_256)(acc##i, *(a##i + k + 0), \
+                                                     b, vl);                  \
+    }
+      ROWS_APPLY(STEP_K0)
+  #undef STEP_K0
+    }
+    {
+      fixed_fp32x8_t b =
+          load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 1) * ldb);
+  #define STEP_K1(i)                                                          \
+    IF_M(i) {                                                                 \
+      acc##i = RVVI(__riscv_vfmacc_vf_f32, LMUL_256)(acc##i, *(a##i + k + 1), \
+                                                     b, vl);                  \
+    }
+      ROWS_APPLY(STEP_K1)
+  #undef STEP_K1
+    }
+    {
+      fixed_fp32x8_t b =
+          load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 2) * ldb);
+  #define STEP_K2(i)                                                          \
+    IF_M(i) {                                                                 \
+      acc##i = RVVI(__riscv_vfmacc_vf_f32, LMUL_256)(acc##i, *(a##i + k + 2), \
+                                                     b, vl);                  \
+    }
+      ROWS_APPLY(STEP_K2)
+  #undef STEP_K2
+    }
+    {
+      fixed_fp32x8_t b =
+          load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 3) * ldb);
+  #define STEP_K3(i)                                                          \
+    IF_M(i) {                                                                 \
+      acc##i = RVVI(__riscv_vfmacc_vf_f32, LMUL_256)(acc##i, *(a##i + k + 3), \
+                                                     b, vl);                  \
+    }
+      ROWS_APPLY(STEP_K3)
+  #undef STEP_K3
+    }
+  }
+
+  for (; k < K; ++k) {
+    fixed_fp32x8_t b = load_row8_B_as_f32<kv_cache_t>(B + (int64_t)k * ldb);
+  #define TAIL_ROW(i)                                                        \
+    IF_M(i) {                                                                \
+      acc##i =                                                               \
+          RVVI(__riscv_vfmacc_vf_f32, LMUL_256)(acc##i, *(a##i + k), b, vl); \
+    }
+    ROWS_APPLY(TAIL_ROW)
+  #undef TAIL_ROW
+  }
+
+  #define STORE_ROW(i) \
+    IF_M(i) { RVVI(__riscv_vse32_v_f32, LMUL_256)(C + (i) * ldc, acc##i, vl); }
+  ROWS_APPLY(STORE_ROW)
+  #undef STORE_ROW
+
+  #undef ROWS_APPLY
+  #undef IF_M
+}
+
+// ============================================================================
+// Macro kernel: dispatch M tiles of {8,4,2,1}, step N by 8
+// ============================================================================
+
+template <int32_t N, typename kv_cache_t>
+FORCE_INLINE void gemm_macro_rvv_fma_Mx8_Ku4(const float* __restrict A,
+                                             const kv_cache_t* __restrict B,
+                                             float* __restrict C, int32_t M,
+                                             int32_t K, int64_t lda,
+                                             int64_t ldb, int64_t ldc,
+                                             bool accumulate) {
+  static_assert(N % 8 == 0, "N must be a multiple of 8");
+  for (int32_t m = 0; m < M;) {
+    int32_t mb = (M - m >= 8) ? 8 : (M - m >= 4) ? 4 : (M - m >= 2) ? 2 : 1;
+    const float* Ab = A + m * lda;
+    float* Cb = C + m * ldc;
+
+    for (int32_t n = 0; n < N; n += 8) {
+      const kv_cache_t* Bn = B + n;
+      float* Cn = Cb + n;
+      switch (mb) {
+        case 8:
+          gemm_micro_rvv_fma_Mx8_Ku4<8, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        case 4:
+          gemm_micro_rvv_fma_Mx8_Ku4<4, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        case 2:
+          gemm_micro_rvv_fma_Mx8_Ku4<2, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        default:
+          gemm_micro_rvv_fma_Mx8_Ku4<1, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+      }
+    }
+    m += mb;
+  }
+}
+
+// ============================================================================
+// TileGemm wrapper — plugs into AttentionMainLoop
+// ============================================================================
+
+template <typename kv_cache_t>
+class TileGemmRVV {
+ public:
+  template <AttentionGemmPhase phase, int32_t k_size>
+  FORCE_INLINE static void gemm(const int32_t m_size,
+                                float* __restrict__ a_tile,
+                                kv_cache_t* __restrict__ b_tile,
+                                float* __restrict__ c_tile, const int64_t lda,
+                                const int64_t ldb, const int64_t ldc,
+                                const int32_t block_size,
+                                const int32_t dynamic_k_size,
+                                const bool accum_c) {
+    if constexpr (phase == AttentionGemmPhase::QK) {
+      gemm_macro_rvv_fma_Mx8_Ku4<BLOCK_SIZE_ALIGNMENT, kv_cache_t>(
+          a_tile, b_tile, c_tile, m_size, k_size, lda, ldb, ldc, accum_c);
+    } else {
+      gemm_macro_rvv_fma_Mx8_Ku4<HEAD_SIZE_ALIGNMENT, kv_cache_t>(
+          a_tile, b_tile, c_tile, m_size, dynamic_k_size, lda, ldb, ldc,
+          accum_c);
+    }
+  }
+};
+
+}  // namespace
+
+// ============================================================================
+// AttentionImpl<ISA::RVV> — mirrors ISA::NEON specialization
+// ============================================================================
+
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::RVV, scalar_t, head_dim, kv_cache_scalar_t> {
+ public:
+  using query_t = scalar_t;
+  using q_buffer_t = float;
+  using kv_cache_t = scalar_t;
+  using logits_buffer_t = float;
+  using partial_output_buffer_t = float;
+  using prob_buffer_t = float;
+
+  constexpr static int64_t BlockSizeAlignment = BLOCK_SIZE_ALIGNMENT;
+  constexpr static int64_t HeadDimAlignment = HEAD_SIZE_ALIGNMENT;
+  constexpr static int64_t MaxQHeadNumPerIteration = MAX_Q_HEAD_NUM_PER_ITER;
+  constexpr static int64_t HeadDim = head_dim;
+  constexpr static ISA ISAType = ISA::RVV;
+  constexpr static bool scale_on_logits = false;
+
+  static_assert(HeadDim % HeadDimAlignment == 0);
+  static_assert(HeadDimAlignment % 8 == 0);
+  static_assert(BlockSizeAlignment % 8 == 0);
+
+ public:
+  template <template <typename tile_gemm_t> typename attention>
+  FORCE_INLINE void execute_attention(DEFINE_CPU_ATTENTION_PARAMS) {
+    attention<TileGemmRVV<kv_cache_t>> attention_iteration;
+    attention_iteration(CPU_ATTENTION_PARAMS);
+  }
+
+  constexpr static int64_t k_cache_token_group_stride(
+      const int32_t block_size) {
+    return BlockSizeAlignment;
+  }
+
+  constexpr static int64_t v_cache_token_group_stride(
+      const int32_t block_size) {
+    return head_dim * BlockSizeAlignment;
+  }
+
+  constexpr static int64_t v_cache_head_group_stride(const int32_t block_size) {
+    return HeadDimAlignment;
+  }
+
+  static void copy_q_heads_tile(scalar_t* __restrict__ src,
+                                float* __restrict__ q_buffer,
+                                const int32_t q_num,
+                                const int32_t q_heads_per_kv,
+                                const int64_t q_num_stride,
+                                const int64_t q_head_stride, float scale) {
+    static_assert(head_dim % 16 == 0);
+    constexpr int32_t unroll_size = head_dim / 16;
+    using load_vec_t = typename VecTypeTrait<scalar_t>::vec_t;
+
+    vec_op::FP32Vec16 scale_vec(scale);
+    for (int32_t q_num_idx = 0; q_num_idx < q_num; ++q_num_idx) {
+      for (int32_t q_head_idx = 0; q_head_idx < q_heads_per_kv; ++q_head_idx) {
+        scalar_t* __restrict__ curr_q =
+            src + q_num_idx * q_num_stride + q_head_idx * q_head_stride;
+        float* __restrict__ curr_q_buffer =
+            q_buffer + q_num_idx * q_heads_per_kv * head_dim +
+            q_head_idx * head_dim;
+
+        vec_op::unroll_loop<int32_t, unroll_size>([&](int32_t i) {
+          load_vec_t vec(curr_q);
+          vec_op::FP32Vec16 fp32_vec(vec);
+          fp32_vec = fp32_vec * scale_vec;
+          fp32_vec.save(curr_q_buffer);
+
+          curr_q += 16;
+          curr_q_buffer += 16;
+        });
+      }
+    }
+  }
+
+  static void reshape_and_cache(
+      const scalar_t* __restrict__ key, const scalar_t* __restrict__ value,
+      scalar_t* __restrict__ key_cache, scalar_t* __restrict__ value_cache,
+      const int64_t* __restrict__ slot_mapping, const int64_t token_num,
+      const int64_t key_token_num_stride, const int64_t value_token_num_stride,
+      const int64_t head_num, const int64_t key_head_num_stride,
+      const int64_t value_head_num_stride, const int64_t num_blocks,
+      const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
+      const int64_t block_size, const int64_t block_size_stride,
+      const float /*k_inv*/ = 0.0f, const float /*v_inv*/ = 0.0f) {
+  #pragma omp parallel for collapse(2)
+    for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
+      for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
+        const int64_t pos = slot_mapping[token_idx];
+        if (pos < 0) {
+          continue;
+        }
+
+        const int64_t block_idx = pos / block_size;
+        const int64_t block_offset = pos % block_size;
+        {
+          const scalar_t* key_start_ptr = key +
+                                          token_idx * key_token_num_stride +
+                                          head_idx * key_head_num_stride;
+          scalar_t* key_cache_start_ptr =
+              key_cache + block_idx * num_blocks_stride +
+              head_idx * cache_head_num_stride + block_offset;
+
+          {
+            const ptrdiff_t byte_stride = block_size * sizeof(scalar_t);
+            int64_t i = 0;
+            for (; i < head_dim;) {
+              size_t vl;
+              if constexpr (std::is_same_v<scalar_t, float>) {
+                vl = __riscv_vsetvl_e32m2(head_dim - i);
+                vfloat32m2_t v = __riscv_vle32_v_f32m2(
+                    reinterpret_cast<const float*>(key_start_ptr + i), vl);
+                __riscv_vsse32_v_f32m2(
+                    reinterpret_cast<float*>(key_cache_start_ptr +
+                                             i * block_size),
+                    byte_stride, v, vl);
+              } else {
+                vl = __riscv_vsetvl_e16m1(head_dim - i);
+                vuint16m1_t v = __riscv_vle16_v_u16m1(
+                    reinterpret_cast<const uint16_t*>(key_start_ptr + i), vl);
+                __riscv_vsse16_v_u16m1(
+                    reinterpret_cast<uint16_t*>(key_cache_start_ptr +
+                                                i * block_size),
+                    byte_stride, v, vl);
+              }
+              i += vl;
+            }
+          }
+        }
+        {
+          const scalar_t* value_start_ptr = value +
+                                            token_idx * value_token_num_stride +
+                                            head_idx * value_head_num_stride;
+          scalar_t* value_cache_start_ptr =
+              value_cache + block_idx * num_blocks_stride +
+              head_idx * cache_head_num_stride + block_offset * head_dim;
+          std::memcpy(value_cache_start_ptr, value_start_ptr,
+                      sizeof(scalar_t) * head_dim);
+        }
+      }
+    }
+  }
+};
+
+}  // namespace cpu_attention
+
+  #undef BLOCK_SIZE_ALIGNMENT
+  #undef HEAD_SIZE_ALIGNMENT
+  #undef MAX_Q_HEAD_NUM_PER_ITER
+
+#endif  // __riscv_v_min_vlen == 128 || 256
+
+#endif  // CPU_ATTN_RVV_HPP
diff --git a/csrc/cpu/cpu_attn_vec.hpp b/csrc/cpu/cpu_attn_vec.hpp
index 479313f0e19f..c3983e0578a5 100644
--- a/csrc/cpu/cpu_attn_vec.hpp
+++ b/csrc/cpu/cpu_attn_vec.hpp
@@ -1,11 +1,37 @@
 #ifndef CPU_ATTN_VEC_HPP
 #define CPU_ATTN_VEC_HPP
 
+#include "cpu_attn_fp8.hpp"
 #include "cpu_attn_impl.hpp"
 
 namespace cpu_attention {
 
 namespace {
+
+// Load 32 kv_cache_t elements starting at ptr and return them as two FP32Vec16s
+// covering the lower 16 and upper 16 positions.
+// For FP8: both halves come from a single BF16Vec32 dequant of 32 bytes.
+// For BF16/FP16/FP32: two separate vector loads at ptr and ptr+16.
+template <typename kv_cache_t>
+FORCE_INLINE std::pair<vec_op::FP32Vec16, vec_op::FP32Vec16> load_b_pair_vec(
+    const kv_cache_t* ptr) {
+  if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e4m3fn>) {
+    // BF16 container, but values are in the FP16 exponent range (bias 15 not
+    // 127).
+    vec_op::BF16Vec32 bf16_b_reg(reinterpret_cast<const uint8_t*>(ptr),
+                                 vec_op::fp8_e4m3_tag{});
+    return {vec_op::FP32Vec16(bf16_b_reg, 0), vec_op::FP32Vec16(bf16_b_reg, 1)};
+  } else if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e5m2>) {
+    vec_op::BF16Vec32 bf16_b_reg(reinterpret_cast<const uint8_t*>(ptr),
+                                 vec_op::fp8_e5m2_tag{});
+    return {vec_op::FP32Vec16(bf16_b_reg, 0), vec_op::FP32Vec16(bf16_b_reg, 1)};
+  } else {
+    using load_vec_t = typename VecTypeTrait<kv_cache_t>::vec_t;
+    return std::make_pair(vec_op::FP32Vec16(load_vec_t(ptr)),
+                          vec_op::FP32Vec16(load_vec_t(ptr + 16)));
+  }
+}
+
 // 8-2-16 pattern, 8 regs for A, 2 regs for B, 16 regs for C, [8, K] @ [k, 32]
 template <typename kv_cache_t>
 class TileGemm82 {
@@ -53,11 +79,8 @@ class TileGemm82 {
                          const int64_t ldb, const int64_t ldc,
                          const int32_t block_size, const int32_t dynamic_k_size,
                          const bool accum_c) {
-    static_assert(0 < M <= 8);
-    using load_vec_t = typename VecTypeTrait<kv_cache_t>::vec_t;
+    static_assert(0 < M && M <= 8);
 
-    kv_cache_t* __restrict__ curr_b_0 = b_tile;
-    kv_cache_t* __restrict__ curr_b_1 = b_tile + 16;
     float* __restrict__ curr_c_0 = c_tile;
     float* __restrict__ curr_c_1 = c_tile + 16;
 
@@ -76,16 +99,14 @@ class TileGemm82 {
     }
 
     float* __restrict__ curr_a = a_tile;
+    kv_cache_t* __restrict__ curr_b = b_tile;
+
     for (int32_t k = 0; k < dynamic_k_size; ++k) {
-      load_vec_t b_0_reg(curr_b_0);
-      vec_op::FP32Vec16 fp32_b_0_reg(b_0_reg);
-      load_vec_t b_1_reg(curr_b_1);
-      vec_op::FP32Vec16 fp32_b_1_reg(b_1_reg);
+      auto [fp32_b_0_reg, fp32_b_1_reg] = load_b_pair_vec(curr_b);
 
       float* __restrict__ curr_m_a = curr_a;
       vec_op::unroll_loop<int32_t, M>([&](int32_t i) {
-        float v = *curr_m_a;
-        vec_op::FP32Vec16 a_reg(v);
+        vec_op::FP32Vec16 a_reg(*curr_m_a);
         c_regs[i * 2] = c_regs[i * 2] + a_reg * fp32_b_0_reg;
         c_regs[i * 2 + 1] = c_regs[i * 2 + 1] + a_reg * fp32_b_1_reg;
 
@@ -95,8 +116,7 @@ class TileGemm82 {
 
       // update
       curr_a += 1;
-      curr_b_0 += ldb;
-      curr_b_1 += ldb;
+      curr_b += ldb;
     }
 
     vec_op::unroll_loop<int32_t, M>([&](int32_t i) {
@@ -109,15 +129,20 @@ class TileGemm82 {
     });
   }
 };
+
 }  // namespace
 
 // This is a general but naive implementation based on vector instructions
-template <typename scalar_t, int64_t head_dim>
-class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::VEC, scalar_t, head_dim, kv_cache_scalar_t> {
+  static constexpr bool fp8_kv =
+      std::is_same_v<kv_cache_scalar_t, c10::Float8_e4m3fn> ||
+      std::is_same_v<kv_cache_scalar_t, c10::Float8_e5m2>;
+
  public:
   using query_t = scalar_t;
   using q_buffer_t = float;
-  using kv_cache_t = scalar_t;
+  using kv_cache_t = kv_cache_scalar_t;
   using logits_buffer_t = float;
   using partial_output_buffer_t = float;
   using prob_buffer_t = float;
@@ -129,11 +154,45 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
   constexpr static int64_t MaxQHeadNumPerIteration = 8;
   constexpr static int64_t HeadDim = head_dim;
   constexpr static ISA ISAType = ISA::VEC;
-  constexpr static bool scale_on_logits = false;  // apply scale on q_buffer
+  constexpr static bool scale_on_logits = fp8_kv;
+
+  float k_scale = 1.0f;
+  float v_scale = 1.0f;
 
  public:
+  void init_from_input(const AttentionInput* input) {
+    if constexpr (fp8_kv) {
+      k_scale = input->k_scale_fp8;
+      v_scale = input->v_scale_fp8;
+    }
+  }
+
+  float get_output_v_scale() const noexcept {
+    if constexpr (fp8_kv) {
+      // VEC dequant unpacks FP8 into a pseudo-FP16 layout (exponent bias 15).
+      // E4M3 (bias=7) needs correction 2^(15-7) = 2^8; E5M2 bias matches FP16
+      // so no correction.
+      if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e5m2>) {
+        return v_scale;
+      } else {
+        return v_scale * 0x1p8f;
+      }
+    }
+    return 1.0f;
+  }
+
   template <template <typename tile_gemm_t> typename attention>
   FORCE_INLINE void execute_attention(DEFINE_CPU_ATTENTION_PARAMS) {
+    if constexpr (fp8_kv) {
+      // Same bias correction as get_output_v_scale: VEC FP8→pseudo-FP16 dequant
+      // uses bias 15; E4M3 (bias=7) needs ×2^8, E5M2 (bias=15) needs no
+      // correction.
+      if constexpr (std::is_same_v<kv_cache_t, c10::Float8_e5m2>) {
+        scale *= k_scale;
+      } else {
+        scale *= k_scale * 0x1p8f;
+      }
+    }
     attention<TileGemm82<kv_cache_t>> attention_iteration;
     attention_iteration(CPU_ATTENTION_PARAMS);
   }
@@ -161,17 +220,19 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
                               // row-major
   }
 
-  // Copy q to q_buffer and cast it to fp32
-  static void copy_q_heads_tile(
-      scalar_t* __restrict__ src,  // [q_num, q_heads_per_kv, head_size]
-      float* __restrict__ q_buffer, const int32_t q_num,
-      const int32_t q_heads_per_kv, const int64_t q_num_stride,
-      const int64_t q_head_stride, float scale) {
+  // Copy q to q_buffer and cast it to fp32.
+  // FP8: QK scale is folded into execute_attention; copy Q unscaled here.
+  void copy_q_heads_tile(scalar_t* __restrict__ src,
+                         float* __restrict__ q_buffer, const int32_t q_num,
+                         const int32_t q_heads_per_kv,
+                         const int64_t q_num_stride,
+                         const int64_t q_head_stride, float scale) {
     static_assert(head_dim % 16 == 0);
     constexpr int32_t unroll_size = head_dim / 16;
     using load_vec_t = typename VecTypeTrait<scalar_t>::vec_t;
 
-    vec_op::FP32Vec16 scale_vec(scale);
+    const float effective_scale = fp8_kv ? 1.0f : scale;
+    vec_op::FP32Vec16 scale_vec(effective_scale);
     for (int32_t q_num_idx = 0; q_num_idx < q_num; ++q_num_idx) {
       for (int32_t q_head_idx = 0; q_head_idx < q_heads_per_kv; ++q_head_idx) {
         scalar_t* __restrict__ curr_q =
@@ -196,13 +257,26 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
   // reshape K as column-major and V as row-major
   static void reshape_and_cache(
       const scalar_t* __restrict__ key, const scalar_t* __restrict__ value,
-      scalar_t* __restrict__ key_cache, scalar_t* __restrict__ value_cache,
+      kv_cache_t* __restrict__ key_cache, kv_cache_t* __restrict__ value_cache,
       const int64_t* __restrict__ slot_mapping, const int64_t token_num,
       const int64_t key_token_num_stride, const int64_t value_token_num_stride,
       const int64_t head_num, const int64_t key_head_num_stride,
       const int64_t value_head_num_stride, const int64_t num_blocks,
       const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
-      const int64_t block_size, const int64_t block_size_stride) {
+      const int64_t block_size, const int64_t block_size_stride,
+      const float k_inv = 0.0f, const float v_inv = 0.0f) {
+    if constexpr (fp8_kv) {
+      constexpr auto qfn = select_fp8_quant_fn<kv_cache_t>();
+      reshape_and_cache_fp8_vec_impl<scalar_t, qfn>(
+          key, value, reinterpret_cast<uint8_t*>(key_cache),
+          reinterpret_cast<uint8_t*>(value_cache), slot_mapping, token_num,
+          head_num, head_dim, block_size, key_token_num_stride,
+          key_head_num_stride, value_token_num_stride, value_head_num_stride,
+          num_blocks_stride, cache_head_num_stride, num_blocks_stride,
+          cache_head_num_stride, k_inv, v_inv);
+      return;
+    }
+
 #pragma omp parallel for collapse(2)
     for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
       for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
@@ -220,8 +294,9 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
                                           token_idx * key_token_num_stride +
                                           head_idx * key_head_num_stride;
           scalar_t* key_cache_start_ptr =
-              key_cache + block_idx * num_blocks_stride +
-              head_idx * cache_head_num_stride + block_offset;
+              reinterpret_cast<scalar_t*>(key_cache) +
+              block_idx * num_blocks_stride + head_idx * cache_head_num_stride +
+              block_offset;
 
 #pragma GCC unroll 8
           for (int64_t i = 0, j = 0; i < head_dim; ++i, j += block_size) {
@@ -234,8 +309,9 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
                                             token_idx * value_token_num_stride +
                                             head_idx * value_head_num_stride;
           scalar_t* value_cache_start_ptr =
-              value_cache + block_idx * num_blocks_stride +
-              head_idx * cache_head_num_stride + block_offset * head_dim;
+              reinterpret_cast<scalar_t*>(value_cache) +
+              block_idx * num_blocks_stride + head_idx * cache_head_num_stride +
+              block_offset * head_dim;
           std::memcpy(value_cache_start_ptr, value_start_ptr,
                       sizeof(scalar_t) * head_dim);
         }
@@ -243,6 +319,7 @@ class AttentionImpl<ISA::VEC, scalar_t, head_dim> {
     }
   }
 };
+
 }  // namespace cpu_attention
 
 #endif
diff --git a/csrc/cpu/cpu_attn_vec16.hpp b/csrc/cpu/cpu_attn_vec16.hpp
index 7402312c0924..bc15d614a7ed 100644
--- a/csrc/cpu/cpu_attn_vec16.hpp
+++ b/csrc/cpu/cpu_attn_vec16.hpp
@@ -68,7 +68,7 @@ class TileGemm161 {
                          const int64_t ldb, const int64_t ldc,
                          const int32_t block_size, const int32_t dynamic_k_size,
                          const bool accum_c) {
-    static_assert(0 < M <= 16);
+    static_assert(0 < M && M <= 16);
     using load_vec_t = typename VecTypeTrait<kv_cache_t>::vec_t;
 
     kv_cache_t* __restrict__ curr_b_0 = b_tile;
@@ -116,9 +116,9 @@ class TileGemm161 {
 }  // namespace
 
 // This is a general but naive implementation based on vector instructions
-template <typename scalar_t, int64_t head_dim>
-class AttentionImpl<ISA::VEC16, scalar_t, head_dim>
-    : public AttentionImpl<ISA::VEC, scalar_t, head_dim> {
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::VEC16, scalar_t, head_dim, kv_cache_scalar_t>
+    : public AttentionImpl<ISA::VEC, scalar_t, head_dim, kv_cache_scalar_t> {
  public:
   using query_t = scalar_t;
   using q_buffer_t = float;
diff --git a/csrc/cpu/cpu_attn_vsx.hpp b/csrc/cpu/cpu_attn_vsx.hpp
new file mode 100644
index 000000000000..c7e1502bcb05
--- /dev/null
+++ b/csrc/cpu/cpu_attn_vsx.hpp
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#ifndef CPU_ATTN_VSX_HPP
+#define CPU_ATTN_VSX_HPP
+
+#include "cpu_attn_impl.hpp"
+#include <altivec.h>
+#include <type_traits>
+
+namespace cpu_attention {
+
+namespace {
+
+// ppc64le Vector = 16 bytes (128 bits)
+#define BLOCK_SIZE_ALIGNMENT 32
+#define HEAD_SIZE_ALIGNMENT 32
+#define MAX_Q_HEAD_NUM_PER_ITER 16
+
+template <typename kv_cache_t>
+FORCE_INLINE void load_row8_B_as_f32(const kv_cache_t* p, __vector float& b0,
+                                     __vector float& b1);
+
+// [1] Float Specialization
+template <>
+FORCE_INLINE void load_row8_B_as_f32<float>(const float* p, __vector float& b0,
+                                            __vector float& b1) {
+  b0 = vec_xl(0, const_cast<float*>(p));
+  b1 = vec_xl(0, const_cast<float*>(p + 4));
+}
+
+// [2] BFloat16 Specialization (Little Endian ppc64le)
+// On ppc64le (LE): BF16 bits should land in the HIGH 16 bits of each float32.
+// Byte layout of float32 on LE: [byte0(LSB), byte1, byte2, byte3(MSB)]
+// We need BF16 in bytes2-3 (high half) with bytes0-1 zeroed.
+// vec_mergeh on LE interleaves elements 0..3: result_i = {a[i], b[i]}
+// So vec_mergeh(zeros_u16, raw_u16) gives for each uint16 pair:
+//   uint16[2i]   = zeros[i]  -> low 16 bits of uint32  -> zeroed mantissa LSBs
+//   uint16[2i+1] = raw[i]    -> high 16 bits of uint32 -> BF16 bits
+// Cast to float32 gives exactly (bf16_bits << 16) per element.
+template <>
+FORCE_INLINE void load_row8_B_as_f32<c10::BFloat16>(const c10::BFloat16* p,
+                                                    __vector float& b0,
+                                                    __vector float& b1) {
+  __vector unsigned short raw = vec_xl(
+      0, reinterpret_cast<unsigned short*>(const_cast<c10::BFloat16*>(p)));
+  __vector unsigned short zeros = vec_splat_u16(0);
+
+  // LE: zeros in low 16 bits, raw in high 16 bits → bf16 << 16 == float32
+  b0 = (__vector float)vec_mergeh(zeros, raw);
+  b1 = (__vector float)vec_mergel(zeros, raw);
+}
+
+// Note: c10::Half (FP16) is not supported on PowerPC architecture
+
+template <int32_t M, typename kv_cache_t>
+FORCE_INLINE void gemm_micro_ppc64le_Mx8_Ku4(
+    const float* __restrict A,       // [M x K]
+    const kv_cache_t* __restrict B,  // [K x 8]
+    float* __restrict C,             // [M x 8]
+    int64_t lda, int64_t ldb, int64_t ldc, int32_t K, bool accumulate) {
+  static_assert(1 <= M && M <= 8, "M must be in [1,8]");
+
+#define ROWS_APPLY(OP) OP(0) OP(1) OP(2) OP(3) OP(4) OP(5) OP(6) OP(7)
+#define IF_M(i) if constexpr (M > (i))
+
+  // 1. Define A pointers
+#define DECL_A(i) const float* a##i = A + (i) * lda;
+  ROWS_APPLY(DECL_A)
+#undef DECL_A
+
+  // 2. Define Accumulators (2 vectors covers 8 columns)
+#define DECL_ACC(i) __vector float acc##i##_0, acc##i##_1;
+  ROWS_APPLY(DECL_ACC)
+#undef DECL_ACC
+
+  // 3. Initialize Accumulators (Load C or Zero)
+#define INIT_ACC(i)                                                  \
+  IF_M(i) {                                                          \
+    if (accumulate) {                                                \
+      acc##i##_0 = vec_xl(0, const_cast<float*>(C + (i) * ldc + 0)); \
+      acc##i##_1 = vec_xl(0, const_cast<float*>(C + (i) * ldc + 4)); \
+    } else {                                                         \
+      acc##i##_0 = vec_splats(0.0f);                                 \
+      acc##i##_1 = vec_splats(0.0f);                                 \
+    }                                                                \
+  }
+  ROWS_APPLY(INIT_ACC)
+#undef INIT_ACC
+
+  int32_t k = 0;
+
+  for (; k + 3 < K; k += 4) {
+    // Load 4 values of A for each Row M: A[k...k+3]
+#define LOAD_A4(i)        \
+  __vector float a##i##v; \
+  IF_M(i) a##i##v = vec_xl(0, const_cast<float*>(a##i + k));
+    ROWS_APPLY(LOAD_A4)
+#undef LOAD_A4
+
+    // FMA for specific lane L of A
+    // ppc64le: vec_madd(b, vec_splat(a, lane), acc)
+#define FMAS_LANE(i, aiv, L)                        \
+  IF_M(i) {                                         \
+    __vector float a_broad = vec_splat(aiv, L);     \
+    acc##i##_0 = vec_madd(b0, a_broad, acc##i##_0); \
+    acc##i##_1 = vec_madd(b1, a_broad, acc##i##_1); \
+  }
+
+    // Unroll K=0..3
+    {
+      __vector float b0, b1;
+      load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 0) * ldb, b0, b1);
+#define STEP_K0(i) FMAS_LANE(i, a##i##v, 0)
+      ROWS_APPLY(STEP_K0)
+#undef STEP_K0
+    }
+    {
+      __vector float b0, b1;
+      load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 1) * ldb, b0, b1);
+#define STEP_K1(i) FMAS_LANE(i, a##i##v, 1)
+      ROWS_APPLY(STEP_K1)
+#undef STEP_K1
+    }
+    {
+      __vector float b0, b1;
+      load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 2) * ldb, b0, b1);
+#define STEP_K2(i) FMAS_LANE(i, a##i##v, 2)
+      ROWS_APPLY(STEP_K2)
+#undef STEP_K2
+    }
+    {
+      __vector float b0, b1;
+      load_row8_B_as_f32<kv_cache_t>(B + (int64_t)(k + 3) * ldb, b0, b1);
+#define STEP_K3(i) FMAS_LANE(i, a##i##v, 3)
+      ROWS_APPLY(STEP_K3)
+#undef STEP_K3
+    }
+#undef FMAS_LANE
+  }
+
+  for (; k < K; ++k) {
+    __vector float b0, b1;
+    load_row8_B_as_f32<kv_cache_t>(B + (int64_t)k * ldb, b0, b1);
+#define TAIL_ROW(i)                              \
+  IF_M(i) {                                      \
+    __vector float ai = vec_splats(*(a##i + k)); \
+    acc##i##_0 = vec_madd(b0, ai, acc##i##_0);   \
+    acc##i##_1 = vec_madd(b1, ai, acc##i##_1);   \
+  }
+    ROWS_APPLY(TAIL_ROW)
+#undef TAIL_ROW
+  }
+
+#define STORE_ROW(i)                           \
+  IF_M(i) {                                    \
+    vec_xst(acc##i##_0, 0, C + (i) * ldc + 0); \
+    vec_xst(acc##i##_1, 0, C + (i) * ldc + 4); \
+  }
+  ROWS_APPLY(STORE_ROW)
+#undef STORE_ROW
+
+#undef ROWS_APPLY
+#undef IF_M
+}
+
+template <int32_t N, typename kv_cache_t>
+FORCE_INLINE void gemm_macro_ppc64le_Mx8_Ku4(const float* __restrict A,
+                                             const kv_cache_t* __restrict B,
+                                             float* __restrict C, int32_t M,
+                                             int32_t K, int64_t lda,
+                                             int64_t ldb, int64_t ldc,
+                                             bool accumulate) {
+  static_assert(N % 8 == 0, "N must be a multiple of 8");
+  for (int32_t m = 0; m < M;) {
+    int32_t mb = (M - m >= 8) ? 8 : (M - m >= 4) ? 4 : (M - m >= 2) ? 2 : 1;
+    const float* Ab = A + m * lda;
+    float* Cb = C + m * ldc;
+
+    for (int32_t n = 0; n < N; n += 8) {
+      const kv_cache_t* Bn = B + n;
+      float* Cn = Cb + n;
+      switch (mb) {
+        case 8:
+          gemm_micro_ppc64le_Mx8_Ku4<8, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        case 4:
+          gemm_micro_ppc64le_Mx8_Ku4<4, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        case 2:
+          gemm_micro_ppc64le_Mx8_Ku4<2, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+        default:
+          gemm_micro_ppc64le_Mx8_Ku4<1, kv_cache_t>(Ab, Bn, Cn, lda, ldb, ldc,
+                                                    K, accumulate);
+          break;
+      }
+    }
+    m += mb;
+  }
+}
+
+template <typename kv_cache_t>
+class TileGemmPPC64 {
+ public:
+  template <AttentionGemmPhase phase, int32_t k_size>
+  FORCE_INLINE static void gemm(const int32_t m_size,
+                                float* __restrict__ a_tile,
+                                kv_cache_t* __restrict__ b_tile,
+                                float* __restrict__ c_tile, const int64_t lda,
+                                const int64_t ldb, const int64_t ldc,
+                                const int32_t block_size,
+                                const int32_t dynamic_k_size,
+                                const bool accum_c) {
+    if constexpr (phase == AttentionGemmPhase::QK) {
+      gemm_macro_ppc64le_Mx8_Ku4<BLOCK_SIZE_ALIGNMENT, kv_cache_t>(
+          a_tile, b_tile, c_tile, m_size, k_size, lda, ldb, ldc, accum_c);
+    } else {
+      gemm_macro_ppc64le_Mx8_Ku4<HEAD_SIZE_ALIGNMENT, kv_cache_t>(
+          a_tile, b_tile, c_tile, m_size, dynamic_k_size, lda, ldb, ldc,
+          accum_c);
+    }
+  }
+};
+
+}  // namespace
+
+template <typename scalar_t, int64_t head_dim>
+class AttentionImpl<ISA::VSX, scalar_t, head_dim> {
+ public:
+  using query_t = scalar_t;
+  using q_buffer_t = float;
+  using kv_cache_t = scalar_t;
+  using logits_buffer_t = float;
+  using partial_output_buffer_t = float;
+  using prob_buffer_t = float;
+
+  constexpr static int64_t BlockSizeAlignment = BLOCK_SIZE_ALIGNMENT;
+  constexpr static int64_t HeadDimAlignment = HEAD_SIZE_ALIGNMENT;
+  constexpr static int64_t MaxQHeadNumPerIteration = MAX_Q_HEAD_NUM_PER_ITER;
+  constexpr static int64_t HeadDim = head_dim;
+  constexpr static ISA ISAType = ISA::VSX;
+  constexpr static bool scale_on_logits =
+      false;  // Scale is applied to Q during copy
+
+ public:
+  AttentionImpl() {}
+
+  template <template <typename tile_gemm_t> typename attention>
+  FORCE_INLINE void execute_attention(DEFINE_CPU_ATTENTION_PARAMS) {
+    attention<TileGemmPPC64<kv_cache_t>> attention_iteration;
+    attention_iteration(CPU_ATTENTION_PARAMS);
+  }
+
+  // Strides for Memory Layout
+  constexpr static int64_t k_cache_token_group_stride(
+      const int32_t block_size) {
+    return BlockSizeAlignment;  // [head_dim, block_size] layout
+  }
+
+  constexpr static int64_t v_cache_token_group_stride(
+      const int32_t block_size) {
+    return head_dim * BlockSizeAlignment;
+  }
+
+  constexpr static int64_t v_cache_head_group_stride(const int32_t block_size) {
+    return HeadDimAlignment;
+  }
+
+  static void copy_q_heads_tile(scalar_t* __restrict__ src,
+                                float* __restrict__ q_buffer,
+                                const int32_t q_num,
+                                const int32_t q_heads_per_kv,
+                                const int64_t q_num_stride,
+                                const int64_t q_head_stride, float scale) {
+    __vector float scale_vec = vec_splats(scale);
+    constexpr bool is_bf16 = std::is_same<scalar_t, c10::BFloat16>::value;
+
+    for (int32_t i = 0; i < q_num; ++i) {
+      for (int32_t h = 0; h < q_heads_per_kv; ++h) {
+        scalar_t* curr_src = src + i * q_num_stride + h * q_head_stride;
+        float* curr_dst =
+            q_buffer + i * q_heads_per_kv * head_dim + h * head_dim;
+
+        int32_t d = 0;
+        for (; d <= head_dim - 8; d += 8) {
+          __vector float v0, v1;
+          load_row8_B_as_f32<scalar_t>(curr_src + d, v0, v1);
+
+          v0 = vec_mul(v0, scale_vec);
+          v1 = vec_mul(v1, scale_vec);
+
+          vec_xst(v0, 0, curr_dst + d);
+          vec_xst(v1, 0, curr_dst + d + 4);
+        }
+
+        for (; d < head_dim; ++d) {
+          float val = static_cast<float>(curr_src[d]);
+          curr_dst[d] = val * scale;
+        }
+      }
+    }
+  }
+
+  static void reshape_and_cache(
+      const scalar_t* __restrict__ key, const scalar_t* __restrict__ value,
+      scalar_t* __restrict__ key_cache, scalar_t* __restrict__ value_cache,
+      const int64_t* __restrict__ slot_mapping, const int64_t token_num,
+      const int64_t key_token_num_stride, const int64_t value_token_num_stride,
+      const int64_t head_num, const int64_t key_head_num_stride,
+      const int64_t value_head_num_stride, const int64_t num_blocks,
+      const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
+      const int64_t block_size, const int64_t block_size_stride,
+      const float k_inv = 0.0f, const float v_inv = 0.0f) {
+    // k_inv and v_inv are unused on VSX: FP8 KV cache is not supported on
+    // PowerPC. The parameters are present to match the common interface.
+#pragma omp parallel for collapse(2)
+    for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
+      for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
+        const int64_t pos = slot_mapping[token_idx];
+        if (pos < 0) continue;
+
+        const int64_t block_idx = pos / block_size;
+        const int64_t block_offset = pos % block_size;
+
+        {
+          const scalar_t* key_src = key + token_idx * key_token_num_stride +
+                                    head_idx * key_head_num_stride;
+          scalar_t* key_dst = key_cache + block_idx * num_blocks_stride +
+                              head_idx * cache_head_num_stride + block_offset;
+
+          for (int64_t i = 0, j = 0; i < head_dim; ++i, j += block_size) {
+            key_dst[j] = key_src[i];
+          }
+        }
+
+        {
+          const scalar_t* val_src = value + token_idx * value_token_num_stride +
+                                    head_idx * value_head_num_stride;
+          scalar_t* val_dst = value_cache + block_idx * num_blocks_stride +
+                              head_idx * cache_head_num_stride +
+                              block_offset * head_dim;
+
+          std::memcpy(val_dst, val_src, sizeof(scalar_t) * head_dim);
+        }
+      }
+    }
+  }
+};
+
+}  // namespace cpu_attention
+
+#undef BLOCK_SIZE_ALIGNMENT
+#undef HEAD_SIZE_ALIGNMENT
+#undef MAX_Q_HEAD_NUM_PER_ITER
+
+#endif  // CPU_ATTN_VSX_HPP
diff --git a/csrc/cpu/cpu_attn_vxe.hpp b/csrc/cpu/cpu_attn_vxe.hpp
index 45db4ebd7396..cbfda4cf7842 100644
--- a/csrc/cpu/cpu_attn_vxe.hpp
+++ b/csrc/cpu/cpu_attn_vxe.hpp
@@ -244,8 +244,8 @@ class TileGemmS390X {
 
 }  // namespace
 
-template <typename scalar_t, int64_t head_dim>
-class AttentionImpl<ISA::VXE, scalar_t, head_dim> {
+template <typename scalar_t, int64_t head_dim, typename kv_cache_scalar_t>
+class AttentionImpl<ISA::VXE, scalar_t, head_dim, kv_cache_scalar_t> {
  public:
   using query_t = scalar_t;
   using q_buffer_t = float;
@@ -342,7 +342,8 @@ class AttentionImpl<ISA::VXE, scalar_t, head_dim> {
       const int64_t head_num, const int64_t key_head_num_stride,
       const int64_t value_head_num_stride, const int64_t num_blocks,
       const int64_t num_blocks_stride, const int64_t cache_head_num_stride,
-      const int64_t block_size, const int64_t block_size_stride) {
+      const int64_t block_size, const int64_t block_size_stride,
+      const float /*k_inv*/ = 0.0f, const float /*v_inv*/ = 0.0f) {
 #pragma omp parallel for collapse(2)
     for (int64_t token_idx = 0; token_idx < token_num; ++token_idx) {
       for (int64_t head_idx = 0; head_idx < head_num; ++head_idx) {
diff --git a/csrc/cpu/cpu_fused_moe.cpp b/csrc/cpu/cpu_fused_moe.cpp
index 1a82645397b5..0dc5060fe99c 100644
--- a/csrc/cpu/cpu_fused_moe.cpp
+++ b/csrc/cpu/cpu_fused_moe.cpp
@@ -30,13 +30,15 @@
   }()
 
 namespace {
-enum class FusedMOEAct { SiluAndMul, SwigluOAIAndMul };
+enum class FusedMOEAct { SiluAndMul, SwigluOAIAndMul, GeluAndMul };
 
 FusedMOEAct get_act_type(const std::string& act) {
   if (act == "silu") {
     return FusedMOEAct::SiluAndMul;
   } else if (act == "swigluoai") {
     return FusedMOEAct::SwigluOAIAndMul;
+  } else if (act == "gelu") {
+    return FusedMOEAct::GeluAndMul;
   } else {
     TORCH_CHECK(false, "Invalid act type: " + act);
   }
@@ -104,6 +106,43 @@ void silu_and_mul(float* __restrict__ input, scalar_t* __restrict__ output,
   }
 }
 
+template <typename scalar_t>
+void gelu_and_mul(float* __restrict__ input, scalar_t* __restrict__ output,
+                  const int32_t m_size, const int32_t n_size,
+                  const int32_t input_stride, const int32_t output_stride) {
+  using scalar_vec_t = typename cpu_utils::VecTypeTrait<scalar_t>::vec_t;
+  const int32_t dim = n_size / 2;
+  float* __restrict__ gate = input;
+  float* __restrict__ up = input + dim;
+  vec_op::FP32Vec16 one_vec(1.0);
+  vec_op::FP32Vec16 w1_vec(M_SQRT1_2);
+  vec_op::FP32Vec16 w2_vec(0.5);
+  alignas(64) float temp[16];
+
+  DEFINE_FAST_EXP
+
+  for (int32_t m = 0; m < m_size; ++m) {
+    for (int32_t n = 0; n < dim; n += 16) {
+      vec_op::FP32Vec16 gate_vec(gate + n);
+      vec_op::FP32Vec16 up_vec(up + n);
+      auto er_input_vec = gate_vec * w1_vec;
+
+      er_input_vec.save(temp);
+      for (int32_t i = 0; i < 16; ++i) {
+        temp[i] = std::erf(temp[i]);
+      }
+      vec_op::FP32Vec16 er_vec(temp);
+      auto gelu = gate_vec * w2_vec * (one_vec + er_vec);
+      auto gated_output_fp32 = up_vec * gelu;
+      scalar_vec_t gated_output = scalar_vec_t(gated_output_fp32);
+      gated_output.save(output + n);
+    }
+    gate += input_stride;
+    up += input_stride;
+    output += output_stride;
+  }
+}
+
 template <typename scalar_t>
 FORCE_INLINE void apply_gated_act(const FusedMOEAct act,
                                   float* __restrict__ input,
@@ -118,6 +157,9 @@ FORCE_INLINE void apply_gated_act(const FusedMOEAct act,
     case FusedMOEAct::SiluAndMul:
       silu_and_mul(input, output, m, n, input_stride, output_stride);
       return;
+    case FusedMOEAct::GeluAndMul:
+      gelu_and_mul(input, output, m, n, input_stride, output_stride);
+      return;
     default:
       TORCH_CHECK(false, "Unsupported act type.");
   }
diff --git a/csrc/cpu/cpu_types_arm.hpp b/csrc/cpu/cpu_types_arm.hpp
index f9975b4e29cd..b408731f40d1 100644
--- a/csrc/cpu/cpu_types_arm.hpp
+++ b/csrc/cpu/cpu_types_arm.hpp
@@ -15,6 +15,9 @@ using namespace at::vec;
 
 namespace vec_op {
 
+struct fp8_e4m3_tag {};
+struct fp8_e5m2_tag {};
+
 #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)         \
   AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
   AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)  \
@@ -322,6 +325,9 @@ struct BF16Vec32 : public VectorizedRegWrapper<BF16Vec32, 4, c10::BFloat16> {
     reg.val[2] = vec8_data.reg.val[0];
     reg.val[3] = vec8_data.reg.val[0];
   };
+
+  explicit BF16Vec32(const uint8_t*, fp8_e4m3_tag) : Base() {}
+  explicit BF16Vec32(const uint8_t*, fp8_e5m2_tag) : Base() {}
 };
 
 struct FP32Vec4 : public VectorizedRegWrapper<FP32Vec4, 1, float> {
@@ -480,6 +486,10 @@ struct FP32Vec16 : public VectorizedRegWrapper<FP32Vec16, 4, float> {
 
   explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {};
 
+  // FP8 stub: dead code on ARM (fp8 KV cache is x86-only), needed for
+  // load_b_pair_vec template to compile on all platforms.
+  explicit FP32Vec16(const BF16Vec32&, int) : Base() {}
+
   explicit FP32Vec16(const FP16Vec16& v) {
     reg.val[0] = Vectorized<float>(vcvt_f32_f16(vget_low_f16(v.reg.val[0])));
     reg.val[1] = Vectorized<float>(vcvt_f32_f16(vget_high_f16(v.reg.val[0])));
diff --git a/csrc/cpu/cpu_types_riscv.hpp b/csrc/cpu/cpu_types_riscv.hpp
index 910ee5c11331..e617d98dd002 100644
--- a/csrc/cpu/cpu_types_riscv.hpp
+++ b/csrc/cpu/cpu_types_riscv.hpp
@@ -1,832 +1,25 @@
 #ifndef CPU_TYPES_RISCV_HPP
 #define CPU_TYPES_RISCV_HPP
 
-#include <algorithm>
-#include <cmath>
-#include <cstring>
-#include <iostream>
-#include <limits>
-#include <riscv_vector.h>
-#include <torch/all.h>
+// RISC-V Vector (RVV) CPU type definitions for vLLM.
+//
+// Supports multiple VLENs via compile-time dispatch. The compiler defines
+// __riscv_v_min_vlen from the zvl<N>b extension in -march. The defs header
+// maps VLEN to the correct LMUL suffixes, and the impl header provides
+// VLEN-independent class implementations.
+//
+// To add support for a new VLEN, add the LMUL mapping in
+// cpu_types_riscv_defs.hpp (the impl header needs no changes).
 
-// ============================================================================
-// Vector Register Type Definitions (VLEN=128 bits)
-// ============================================================================
-
-typedef vfloat16m1_t fixed_vfloat16m1_t
-    __attribute__((riscv_rvv_vector_bits(128)));
-typedef vfloat16m2_t fixed_vfloat16m2_t
-    __attribute__((riscv_rvv_vector_bits(256)));
-
-typedef vfloat32m1_t fixed_vfloat32m1_t
-    __attribute__((riscv_rvv_vector_bits(128)));
-typedef vfloat32m2_t fixed_vfloat32m2_t
-    __attribute__((riscv_rvv_vector_bits(256)));
-typedef vfloat32m4_t fixed_vfloat32m4_t
-    __attribute__((riscv_rvv_vector_bits(512)));
-typedef vfloat32m8_t fixed_vfloat32m8_t
-    __attribute__((riscv_rvv_vector_bits(1024)));
-
-typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(256)));
-typedef vint32m4_t fixed_vint32m4_t __attribute__((riscv_rvv_vector_bits(512)));
-
-typedef vuint16m1_t fixed_vuint16m1_t
-    __attribute__((riscv_rvv_vector_bits(128)));
-typedef vuint16m2_t fixed_vuint16m2_t
-    __attribute__((riscv_rvv_vector_bits(256)));
-typedef vuint16m4_t fixed_vuint16m4_t
-    __attribute__((riscv_rvv_vector_bits(512)));
-
-#ifdef RISCV_BF16_SUPPORT
-typedef vbfloat16m1_t fixed_vbfloat16m1_t
-    __attribute__((riscv_rvv_vector_bits(128)));
-typedef vbfloat16m2_t fixed_vbfloat16m2_t
-    __attribute__((riscv_rvv_vector_bits(256)));
-typedef vbfloat16m4_t fixed_vbfloat16m4_t
-    __attribute__((riscv_rvv_vector_bits(512)));
-#endif
-
-namespace vec_op {
-
-#ifdef RISCV_BF16_SUPPORT
-  #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)         \
-    AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
-    AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)  \
-    AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__)
-#else
-  #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)         \
-    AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
-    AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)
+#ifndef __riscv_vector
+  #error "cpu_types_riscv.hpp included in a non-RVV translation unit"
 #endif
 
-#define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
-  AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
-
-#define FORCE_INLINE __attribute__((always_inline)) inline
-
-namespace {
-template <typename T, T... indexes, typename F>
-constexpr void unroll_loop_item(std::integer_sequence<T, indexes...>, F&& f) {
-  (f(std::integral_constant<T, indexes>{}), ...);
-};
-}  // namespace
-
-template <typename T, T count, typename F,
-          typename = std::enable_if_t<std::is_invocable_v<F, T>>>
-constexpr void unroll_loop(F&& f) {
-  unroll_loop_item(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
-}
-
-template <typename T>
-struct Vec {
-  constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; };
-};
-
-struct FP32Vec8;
-struct FP32Vec16;
-
-// ============================================================================
-// FP16 Implementation
-// ============================================================================
-
-struct FP16Vec8 : public Vec<FP16Vec8> {
-  constexpr static int VEC_ELEM_NUM = 8;
-  fixed_vfloat16m1_t reg;
-
-  explicit FP16Vec8(const void* ptr)
-      : reg(__riscv_vle16_v_f16m1(static_cast<const _Float16*>(ptr),
-                                  VEC_ELEM_NUM)) {};
-
-  explicit FP16Vec8(const FP32Vec8&);
-
-  void save(void* ptr) const {
-    __riscv_vse16_v_f16m1(static_cast<_Float16*>(ptr), reg, VEC_ELEM_NUM);
-  }
-  void save(void* ptr, int elem_num) const {
-    __riscv_vse16_v_f16m1(static_cast<_Float16*>(ptr), reg, elem_num);
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(_Float16);
-    __riscv_vsse16_v_f16m1(static_cast<_Float16*>(ptr), byte_stride, reg,
-                           VEC_ELEM_NUM);
-  }
-};
-
-struct FP16Vec16 : public Vec<FP16Vec16> {
-  constexpr static int VEC_ELEM_NUM = 16;
-  fixed_vfloat16m2_t reg;
-
-  explicit FP16Vec16(const void* ptr)
-      : reg(__riscv_vle16_v_f16m2(static_cast<const _Float16*>(ptr),
-                                  VEC_ELEM_NUM)) {};
-
-  explicit FP16Vec16(const FP32Vec16& vec);
-
-  void save(void* ptr) const {
-    __riscv_vse16_v_f16m2(static_cast<_Float16*>(ptr), reg, VEC_ELEM_NUM);
-  }
-  void save(void* ptr, int elem_num) const {
-    __riscv_vse16_v_f16m2(static_cast<_Float16*>(ptr), reg, elem_num);
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(_Float16);
-    __riscv_vsse16_v_f16m2(static_cast<_Float16*>(ptr), byte_stride, reg,
-                           VEC_ELEM_NUM);
-  }
-};
-
-// ============================================================================
-// BF16 Implementation
-// ============================================================================
-
-#ifdef RISCV_BF16_SUPPORT
-
-FORCE_INLINE fixed_vuint16m1_t bf16_to_u16(fixed_vbfloat16m1_t v) {
-  return __riscv_vreinterpret_v_bf16m1_u16m1(v);
-}
-FORCE_INLINE fixed_vuint16m2_t bf16_to_u16(fixed_vbfloat16m2_t v) {
-  return __riscv_vreinterpret_v_bf16m2_u16m2(v);
-}
-FORCE_INLINE fixed_vuint16m4_t bf16_to_u16(fixed_vbfloat16m4_t v) {
-  return __riscv_vreinterpret_v_bf16m4_u16m4(v);
-}
-
-struct BF16Vec8 : public Vec<BF16Vec8> {
-  constexpr static int VEC_ELEM_NUM = 8;
-  fixed_vbfloat16m1_t reg;
-
-  explicit BF16Vec8(const void* ptr)
-      : reg(__riscv_vreinterpret_v_u16m1_bf16m1(__riscv_vle16_v_u16m1(
-            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
-
-  explicit BF16Vec8(fixed_vbfloat16m1_t data) : reg(data) {};
-  explicit BF16Vec8(const FP32Vec8&);
-
-  void save(void* ptr) const {
-    __riscv_vse16_v_u16m1(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          VEC_ELEM_NUM);
-  }
-  void save(void* ptr, int elem_num) const {
-    __riscv_vse16_v_u16m1(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          elem_num);
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    __riscv_vsse16_v_u16m1(reinterpret_cast<uint16_t*>(ptr), byte_stride,
-                           bf16_to_u16(reg), VEC_ELEM_NUM);
-  }
-};
-
-struct BF16Vec16 : public Vec<BF16Vec16> {
-  constexpr static int VEC_ELEM_NUM = 16;
-  fixed_vbfloat16m2_t reg;
-
-  explicit BF16Vec16(const void* ptr)
-      : reg(__riscv_vreinterpret_v_u16m2_bf16m2(__riscv_vle16_v_u16m2(
-            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
-
-  explicit BF16Vec16(fixed_vbfloat16m2_t data) : reg(data) {};
-  explicit BF16Vec16(const FP32Vec16&);
-
-  void save(void* ptr) const {
-    __riscv_vse16_v_u16m2(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          VEC_ELEM_NUM);
-  }
-  void save(void* ptr, int elem_num) const {
-    __riscv_vse16_v_u16m2(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          elem_num);
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    __riscv_vsse16_v_u16m2(reinterpret_cast<uint16_t*>(ptr), byte_stride,
-                           bf16_to_u16(reg), VEC_ELEM_NUM);
-  }
-};
-
-struct BF16Vec32 : public Vec<BF16Vec32> {
-  constexpr static int VEC_ELEM_NUM = 32;
-  fixed_vbfloat16m4_t reg;
-
-  explicit BF16Vec32(const void* ptr)
-      : reg(__riscv_vreinterpret_v_u16m4_bf16m4(__riscv_vle16_v_u16m4(
-            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
-
-  explicit BF16Vec32(fixed_vbfloat16m4_t data) : reg(data) {};
-
-  explicit BF16Vec32(const BF16Vec8& v) {
-    fixed_vuint16m1_t u16_val = bf16_to_u16(v.reg);
-    fixed_vuint16m4_t u16_combined =
-        __riscv_vcreate_v_u16m1_u16m4(u16_val, u16_val, u16_val, u16_val);
-    reg = __riscv_vreinterpret_v_u16m4_bf16m4(u16_combined);
-  };
-
-  void save(void* ptr) const {
-    __riscv_vse16_v_u16m4(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          VEC_ELEM_NUM);
-  }
-  void save(void* ptr, int elem_num) const {
-    __riscv_vse16_v_u16m4(reinterpret_cast<uint16_t*>(ptr), bf16_to_u16(reg),
-                          elem_num);
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    __riscv_vsse16_v_u16m4(reinterpret_cast<uint16_t*>(ptr), byte_stride,
-                           bf16_to_u16(reg), VEC_ELEM_NUM);
-  }
-};
-
-#else
-// ============================================================================
-// BF16 Fallback Implementation (FP32 Simulation)
-// ============================================================================
-
-struct BF16Vec8 : public Vec<BF16Vec8> {
-  constexpr static int VEC_ELEM_NUM = 8;
-  fixed_vfloat32m2_t reg_fp32;
-  explicit BF16Vec8(const void* ptr) {
-    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
-    float tmp[8];
-    for (int i = 0; i < 8; ++i) {
-      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
-      std::memcpy(&tmp[i], &v, 4);
-    }
-    reg_fp32 = __riscv_vle32_v_f32m2(tmp, 8);
-  }
-  explicit BF16Vec8(const FP32Vec8&);
-  void save(void* ptr) const {
-    float tmp[8];
-    __riscv_vse32_v_f32m2(tmp, reg_fp32, 8);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < 8; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-  void save(void* ptr, int elem_num) const {
-    float tmp[8];
-    __riscv_vse32_v_f32m2(tmp, reg_fp32, 8);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < elem_num; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    float tmp[8];
-    __riscv_vse32_v_f32m2(tmp, reg_fp32, 8);
-    uint8_t* u8 = static_cast<uint8_t*>(ptr);
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    for (int i = 0; i < 8; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      uint16_t val = static_cast<uint16_t>(v >> 16);
-      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
-    }
-  }
-};
-
-struct BF16Vec16 : public Vec<BF16Vec16> {
-  constexpr static int VEC_ELEM_NUM = 16;
-  fixed_vfloat32m4_t reg_fp32;
-  explicit BF16Vec16(const void* ptr) {
-    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
-    float tmp[16];
-    for (int i = 0; i < 16; ++i) {
-      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
-      std::memcpy(&tmp[i], &v, 4);
-    }
-    reg_fp32 = __riscv_vle32_v_f32m4(tmp, 16);
-  }
-  explicit BF16Vec16(const FP32Vec16&);
-  void save(void* ptr) const {
-    float tmp[16];
-    __riscv_vse32_v_f32m4(tmp, reg_fp32, 16);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < 16; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-  void save(void* ptr, int elem_num) const {
-    float tmp[16];
-    __riscv_vse32_v_f32m4(tmp, reg_fp32, 16);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < elem_num; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    float tmp[16];
-    __riscv_vse32_v_f32m4(tmp, reg_fp32, 16);
-    uint8_t* u8 = static_cast<uint8_t*>(ptr);
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    for (int i = 0; i < 16; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      uint16_t val = static_cast<uint16_t>(v >> 16);
-      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
-    }
-  }
-};
-
-struct BF16Vec32 : public Vec<BF16Vec32> {
-  constexpr static int VEC_ELEM_NUM = 32;
-  fixed_vfloat32m8_t reg_fp32;
-
-  explicit BF16Vec32(const void* ptr) {
-    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
-    float tmp[32];
-    for (int i = 0; i < 32; ++i) {
-      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
-      std::memcpy(&tmp[i], &v, 4);
-    }
-    reg_fp32 = __riscv_vle32_v_f32m8(tmp, 32);
-  }
-
-  explicit BF16Vec32(const BF16Vec8& v) {
-    float tmp_small[8];
-    __riscv_vse32_v_f32m2(tmp_small, v.reg_fp32, 8);
-    float tmp_large[32];
-    for (int i = 0; i < 4; ++i) {
-      std::memcpy(tmp_large + (i * 8), tmp_small, 8 * sizeof(float));
-    }
-    reg_fp32 = __riscv_vle32_v_f32m8(tmp_large, 32);
-  }
-
-  void save(void* ptr) const {
-    float tmp[32];
-    __riscv_vse32_v_f32m8(tmp, reg_fp32, 32);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < 32; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-
-  void save(void* ptr, int elem_num) const {
-    float tmp[32];
-    __riscv_vse32_v_f32m8(tmp, reg_fp32, 32);
-    uint16_t* u16 = static_cast<uint16_t*>(ptr);
-    for (int i = 0; i < elem_num; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      u16[i] = static_cast<uint16_t>(v >> 16);
-    }
-  }
-
-  void save_strided(void* ptr, ptrdiff_t stride) const {
-    float tmp[32];
-    __riscv_vse32_v_f32m8(tmp, reg_fp32, 32);
-    uint8_t* u8 = static_cast<uint8_t*>(ptr);
-    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
-    for (int i = 0; i < 32; ++i) {
-      uint32_t v;
-      std::memcpy(&v, &tmp[i], 4);
-      uint16_t val = static_cast<uint16_t>(v >> 16);
-      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
-    }
-  }
-};
-#endif
-
-// ============================================================================
-// FP32 Implementation
-// ============================================================================
-
-struct FP32Vec4 : public Vec<FP32Vec4> {
-  constexpr static int VEC_ELEM_NUM = 4;
-  fixed_vfloat32m1_t reg;
-  explicit FP32Vec4(float v) : reg(__riscv_vfmv_v_f_f32m1(v, VEC_ELEM_NUM)) {};
-  explicit FP32Vec4() : reg(__riscv_vfmv_v_f_f32m1(0.0f, VEC_ELEM_NUM)) {};
-  explicit FP32Vec4(const float* ptr)
-      : reg(__riscv_vle32_v_f32m1(ptr, VEC_ELEM_NUM)) {};
-  explicit FP32Vec4(fixed_vfloat32m1_t data) : reg(data) {};
-  explicit FP32Vec4(const FP32Vec4& data) : reg(data.reg) {};
-  void save(float* ptr) const { __riscv_vse32_v_f32m1(ptr, reg, VEC_ELEM_NUM); }
-  void save(float* ptr, int elem_num) const {
-    __riscv_vse32_v_f32m1(ptr, reg, elem_num);
-  }
-};
-
-struct FP32Vec8 : public Vec<FP32Vec8> {
-  constexpr static int VEC_ELEM_NUM = 8;
-  fixed_vfloat32m2_t reg;
-
-  explicit FP32Vec8(float v) : reg(__riscv_vfmv_v_f_f32m2(v, VEC_ELEM_NUM)) {};
-  explicit FP32Vec8() : reg(__riscv_vfmv_v_f_f32m2(0.0f, VEC_ELEM_NUM)) {};
-  explicit FP32Vec8(const float* ptr)
-      : reg(__riscv_vle32_v_f32m2(ptr, VEC_ELEM_NUM)) {};
-  explicit FP32Vec8(fixed_vfloat32m2_t data) : reg(data) {};
-  explicit FP32Vec8(const FP32Vec8& data) : reg(data.reg) {};
-  explicit FP32Vec8(const FP16Vec8& v)
-      : reg(__riscv_vfwcvt_f_f_v_f32m2(v.reg, VEC_ELEM_NUM)) {};
-  explicit FP32Vec8(fixed_vfloat16m1_t v)
-      : reg(__riscv_vfwcvt_f_f_v_f32m2(v, VEC_ELEM_NUM)) {};
-
-#ifdef RISCV_BF16_SUPPORT
-  explicit FP32Vec8(fixed_vbfloat16m1_t v)
-      : reg(__riscv_vfwcvtbf16_f_f_v_f32m2(v, VEC_ELEM_NUM)) {};
-  explicit FP32Vec8(const BF16Vec8& v)
-      : reg(__riscv_vfwcvtbf16_f_f_v_f32m2(v.reg, VEC_ELEM_NUM)) {};
-#else
-  explicit FP32Vec8(const BF16Vec8& v) : reg(v.reg_fp32) {};
+#ifndef __riscv_v_min_vlen
+  #error "compiler did not define __riscv_v_min_vlen; pass -march=...zvl<N>b"
 #endif
 
-  float reduce_sum() const {
-    fixed_vfloat32m1_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
-    scalar = __riscv_vfredusum_vs_f32m2_f32m1(reg, scalar, VEC_ELEM_NUM);
-    return __riscv_vfmv_f_s_f32m1_f32(scalar);
-  }
-
-  FP32Vec8 operator*(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfmul_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec8 operator+(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfadd_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec8 operator-(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfsub_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec8 operator/(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfdiv_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-
-  FP32Vec8 min(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfmin_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec8 max(const FP32Vec8& b) const {
-    return FP32Vec8(__riscv_vfmax_vv_f32m2(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec8 abs() const {
-    return FP32Vec8(__riscv_vfabs_v_f32m2(reg, VEC_ELEM_NUM));
-  }
-
-  FP32Vec8 min(const FP32Vec8& b, int elem_num) const {
-    return FP32Vec8(__riscv_vfmin_vv_f32m2(reg, b.reg, elem_num));
-  }
-  FP32Vec8 max(const FP32Vec8& b, int elem_num) const {
-    return FP32Vec8(__riscv_vfmax_vv_f32m2(reg, b.reg, elem_num));
-  }
-
-  FP32Vec8 clamp(const FP32Vec8& min_v, const FP32Vec8& max_v) const {
-    fixed_vfloat32m2_t temp =
-        __riscv_vfmax_vv_f32m2(min_v.reg, reg, VEC_ELEM_NUM);
-    return FP32Vec8(__riscv_vfmin_vv_f32m2(max_v.reg, temp, VEC_ELEM_NUM));
-  }
-
-  void save(float* ptr) const { __riscv_vse32_v_f32m2(ptr, reg, VEC_ELEM_NUM); }
-  void save(float* ptr, int elem_num) const {
-    __riscv_vse32_v_f32m2(ptr, reg, elem_num);
-  }
-  void save_strided(float* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(float);
-    __riscv_vsse32_v_f32m2(ptr, byte_stride, reg, VEC_ELEM_NUM);
-  }
-
-  FP32Vec8 exp() const {
-    const float inv_ln2 = 1.44269504088896341f;
-    fixed_vfloat32m2_t x_scaled =
-        __riscv_vfmul_vf_f32m2(reg, inv_ln2, VEC_ELEM_NUM);
-    fixed_vint32m2_t n_int = __riscv_vfcvt_x_f_v_i32m2(x_scaled, VEC_ELEM_NUM);
-    fixed_vfloat32m2_t n_float = __riscv_vfcvt_f_x_v_f32m2(n_int, VEC_ELEM_NUM);
-
-    fixed_vfloat32m2_t r =
-        __riscv_vfsub_vv_f32m2(x_scaled, n_float, VEC_ELEM_NUM);
-
-    fixed_vfloat32m2_t poly =
-        __riscv_vfmv_v_f_f32m2(0.001333355810164f, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, r, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(poly, 0.009618129107628f, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, r, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(poly, 0.055504108664821f, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, r, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(poly, 0.240226506959101f, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, r, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(poly, 0.693147180559945f, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, r, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(poly, 1.0f, VEC_ELEM_NUM);
-
-    fixed_vint32m2_t biased_exp =
-        __riscv_vadd_vx_i32m2(n_int, 127, VEC_ELEM_NUM);
-    biased_exp = __riscv_vmax_vx_i32m2(biased_exp, 0, VEC_ELEM_NUM);
-    fixed_vint32m2_t exponent_bits =
-        __riscv_vsll_vx_i32m2(biased_exp, 23, VEC_ELEM_NUM);
-    fixed_vfloat32m2_t scale =
-        __riscv_vreinterpret_v_i32m2_f32m2(exponent_bits);
-
-    return FP32Vec8(__riscv_vfmul_vv_f32m2(poly, scale, VEC_ELEM_NUM));
-  }
-
-  FP32Vec8 tanh() const {
-    fixed_vfloat32m2_t x_clamped = __riscv_vfmin_vf_f32m2(
-        __riscv_vfmax_vf_f32m2(reg, -9.0f, VEC_ELEM_NUM), 9.0f, VEC_ELEM_NUM);
-    fixed_vfloat32m2_t x2 =
-        __riscv_vfmul_vf_f32m2(x_clamped, 2.0f, VEC_ELEM_NUM);
-    FP32Vec8 exp_val = FP32Vec8(x2).exp();
-    fixed_vfloat32m2_t num =
-        __riscv_vfsub_vf_f32m2(exp_val.reg, 1.0f, VEC_ELEM_NUM);
-    fixed_vfloat32m2_t den =
-        __riscv_vfadd_vf_f32m2(exp_val.reg, 1.0f, VEC_ELEM_NUM);
-    return FP32Vec8(__riscv_vfdiv_vv_f32m2(num, den, VEC_ELEM_NUM));
-  }
-
-  FP32Vec8 er() const {
-    const float p = 0.3275911f, a1 = 0.254829592f, a2 = -0.284496736f,
-                a3 = 1.421413741f, a4 = -1.453152027f, a5 = 1.061405429f;
-    fixed_vfloat32m2_t abs_x = __riscv_vfabs_v_f32m2(reg, VEC_ELEM_NUM);
-
-    fixed_vfloat32m2_t t = __riscv_vfadd_vf_f32m2(
-        __riscv_vfmul_vf_f32m2(abs_x, p, VEC_ELEM_NUM), 1.0f, VEC_ELEM_NUM);
-    t = __riscv_vfrdiv_vf_f32m2(t, 1.0f, VEC_ELEM_NUM);
-
-    fixed_vfloat32m2_t poly = __riscv_vfmv_v_f_f32m2(a5, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vv_f32m2(poly, t, VEC_ELEM_NUM),
-                                  a4, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vv_f32m2(poly, t, VEC_ELEM_NUM),
-                                  a3, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vv_f32m2(poly, t, VEC_ELEM_NUM),
-                                  a2, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m2(__riscv_vfmul_vv_f32m2(poly, t, VEC_ELEM_NUM),
-                                  a1, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m2(poly, t, VEC_ELEM_NUM);
-
-    fixed_vfloat32m2_t exp_val =
-        FP32Vec8(__riscv_vfneg_v_f32m2(
-                     __riscv_vfmul_vv_f32m2(abs_x, abs_x, VEC_ELEM_NUM),
-                     VEC_ELEM_NUM))
-            .exp()
-            .reg;
-    fixed_vfloat32m2_t res = __riscv_vfrsub_vf_f32m2(
-        __riscv_vfmul_vv_f32m2(poly, exp_val, VEC_ELEM_NUM), 1.0f,
-        VEC_ELEM_NUM);
-
-    vbool16_t mask = __riscv_vmflt_vf_f32m2_b16(reg, 0.0f, VEC_ELEM_NUM);
-    return FP32Vec8(__riscv_vfneg_v_f32m2_m(mask, res, VEC_ELEM_NUM));
-  }
-};
-
-struct FP32Vec16 : public Vec<FP32Vec16> {
-  constexpr static int VEC_ELEM_NUM = 16;
-  fixed_vfloat32m4_t reg;
-
-  explicit FP32Vec16(float v) : reg(__riscv_vfmv_v_f_f32m4(v, VEC_ELEM_NUM)) {};
-  explicit FP32Vec16() : reg(__riscv_vfmv_v_f_f32m4(0.0f, VEC_ELEM_NUM)) {};
-  explicit FP32Vec16(const float* ptr)
-      : reg(__riscv_vle32_v_f32m4(ptr, VEC_ELEM_NUM)) {};
-  explicit FP32Vec16(fixed_vfloat32m4_t data) : reg(data) {};
-  explicit FP32Vec16(const FP32Vec8& data)
-      : reg(__riscv_vcreate_v_f32m2_f32m4(data.reg, data.reg)) {};
-  explicit FP32Vec16(const FP32Vec16& data) : reg(data.reg) {};
-  explicit FP32Vec16(const FP16Vec16& v);
-
-#ifdef RISCV_BF16_SUPPORT
-  explicit FP32Vec16(fixed_vbfloat16m2_t v)
-      : reg(__riscv_vfwcvtbf16_f_f_v_f32m4(v, VEC_ELEM_NUM)) {};
-  explicit FP32Vec16(const BF16Vec16& v)
-      : reg(__riscv_vfwcvtbf16_f_f_v_f32m4(v.reg, VEC_ELEM_NUM)) {};
-#else
-  explicit FP32Vec16(const BF16Vec16& v) : reg(v.reg_fp32) {};
-#endif
-
-  FP32Vec16 operator+(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfadd_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec16 operator-(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfsub_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec16 operator*(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfmul_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec16 operator/(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfdiv_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-
-  FP32Vec16 fma(const FP32Vec16& a, const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfmacc_vv_f32m4(reg, a.reg, b.reg, VEC_ELEM_NUM));
-  }
-
-  float reduce_sum() const {
-    fixed_vfloat32m1_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
-    scalar = __riscv_vfredusum_vs_f32m4_f32m1(reg, scalar, VEC_ELEM_NUM);
-    return __riscv_vfmv_f_s_f32m1_f32(scalar);
-  }
-
-  float reduce_max() const {
-    fixed_vfloat32m1_t scalar =
-        __riscv_vfmv_s_f_f32m1(std::numeric_limits<float>::lowest(), 1);
-    scalar = __riscv_vfredmax_vs_f32m4_f32m1(reg, scalar, VEC_ELEM_NUM);
-    return __riscv_vfmv_f_s_f32m1_f32(scalar);
-  }
-
-  float reduce_min() const {
-    fixed_vfloat32m1_t scalar =
-        __riscv_vfmv_s_f_f32m1(std::numeric_limits<float>::max(), 1);
-    scalar = __riscv_vfredmin_vs_f32m4_f32m1(reg, scalar, VEC_ELEM_NUM);
-    return __riscv_vfmv_f_s_f32m1_f32(scalar);
-  }
-
-  template <int group_size>
-  float reduce_sub_sum(int idx) {
-    static_assert(VEC_ELEM_NUM % group_size == 0);
-    const int start = idx * group_size;
-    vuint32m4_t indices = __riscv_vid_v_u32m4(VEC_ELEM_NUM);
-    vbool8_t mask = __riscv_vmand_mm_b8(
-        __riscv_vmsgeu_vx_u32m4_b8(indices, start, VEC_ELEM_NUM),
-        __riscv_vmsltu_vx_u32m4_b8(indices, start + group_size, VEC_ELEM_NUM),
-        VEC_ELEM_NUM);
-    fixed_vfloat32m1_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
-    scalar =
-        __riscv_vfredusum_vs_f32m4_f32m1_m(mask, reg, scalar, VEC_ELEM_NUM);
-    return __riscv_vfmv_f_s_f32m1_f32(scalar);
-  };
-
-  FP32Vec16 max(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfmax_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec16 min(const FP32Vec16& b) const {
-    return FP32Vec16(__riscv_vfmin_vv_f32m4(reg, b.reg, VEC_ELEM_NUM));
-  }
-  FP32Vec16 abs() const {
-    return FP32Vec16(__riscv_vfabs_v_f32m4(reg, VEC_ELEM_NUM));
-  }
-
-  FP32Vec16 clamp(const FP32Vec16& min_v, const FP32Vec16& max_v) const {
-    return FP32Vec16(__riscv_vfmin_vv_f32m4(
-        max_v.reg, __riscv_vfmax_vv_f32m4(min_v.reg, reg, VEC_ELEM_NUM),
-        VEC_ELEM_NUM));
-  }
-
-  void save(float* ptr) const { __riscv_vse32_v_f32m4(ptr, reg, VEC_ELEM_NUM); }
-  void save(float* ptr, int elem_num) const {
-    __riscv_vse32_v_f32m4(ptr, reg, elem_num);
-  }
-  void save_strided(float* ptr, ptrdiff_t stride) const {
-    ptrdiff_t byte_stride = stride * sizeof(float);
-    __riscv_vsse32_v_f32m4(ptr, byte_stride, reg, VEC_ELEM_NUM);
-  }
-
-  FP32Vec16 exp() const {
-    const float inv_ln2 = 1.44269504088896341f;
-    fixed_vfloat32m4_t x_scaled =
-        __riscv_vfmul_vf_f32m4(reg, inv_ln2, VEC_ELEM_NUM);
-    fixed_vint32m4_t n_int = __riscv_vfcvt_x_f_v_i32m4(x_scaled, VEC_ELEM_NUM);
-    fixed_vfloat32m4_t n_float = __riscv_vfcvt_f_x_v_f32m4(n_int, VEC_ELEM_NUM);
-    fixed_vfloat32m4_t r =
-        __riscv_vfsub_vv_f32m4(x_scaled, n_float, VEC_ELEM_NUM);
-
-    fixed_vfloat32m4_t poly =
-        __riscv_vfmv_v_f_f32m4(0.001333355810164f, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, r, VEC_ELEM_NUM),
-                                  0.009618129107628f, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, r, VEC_ELEM_NUM),
-                                  0.055504108664821f, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, r, VEC_ELEM_NUM),
-                                  0.240226506959101f, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, r, VEC_ELEM_NUM),
-                                  0.693147180559945f, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, r, VEC_ELEM_NUM),
-                                  1.0f, VEC_ELEM_NUM);
-
-    fixed_vint32m4_t biased_exp = __riscv_vmax_vx_i32m4(
-        __riscv_vadd_vx_i32m4(n_int, 127, VEC_ELEM_NUM), 0, VEC_ELEM_NUM);
-    fixed_vfloat32m4_t scale = __riscv_vreinterpret_v_i32m4_f32m4(
-        __riscv_vsll_vx_i32m4(biased_exp, 23, VEC_ELEM_NUM));
-
-    return FP32Vec16(__riscv_vfmul_vv_f32m4(poly, scale, VEC_ELEM_NUM));
-  }
-
-  FP32Vec16 tanh() const {
-    fixed_vfloat32m4_t x_clamped = __riscv_vfmin_vf_f32m4(
-        __riscv_vfmax_vf_f32m4(reg, -9.0f, VEC_ELEM_NUM), 9.0f, VEC_ELEM_NUM);
-    FP32Vec16 exp_val =
-        FP32Vec16(__riscv_vfmul_vf_f32m4(x_clamped, 2.0f, VEC_ELEM_NUM)).exp();
-    return FP32Vec16(__riscv_vfdiv_vv_f32m4(
-        __riscv_vfsub_vf_f32m4(exp_val.reg, 1.0f, VEC_ELEM_NUM),
-        __riscv_vfadd_vf_f32m4(exp_val.reg, 1.0f, VEC_ELEM_NUM), VEC_ELEM_NUM));
-  }
-
-  FP32Vec16 er() const {
-    const float p = 0.3275911f, a1 = 0.254829592f, a2 = -0.284496736f,
-                a3 = 1.421413741f, a4 = -1.453152027f, a5 = 1.061405429f;
-    fixed_vfloat32m4_t abs_x = __riscv_vfabs_v_f32m4(reg, VEC_ELEM_NUM);
-    fixed_vfloat32m4_t t = __riscv_vfrdiv_vf_f32m4(
-        __riscv_vfadd_vf_f32m4(__riscv_vfmul_vf_f32m4(abs_x, p, VEC_ELEM_NUM),
-                               1.0f, VEC_ELEM_NUM),
-        1.0f, VEC_ELEM_NUM);
-
-    fixed_vfloat32m4_t poly = __riscv_vfmv_v_f_f32m4(a5, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, t, VEC_ELEM_NUM),
-                                  a4, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, t, VEC_ELEM_NUM),
-                                  a3, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, t, VEC_ELEM_NUM),
-                                  a2, VEC_ELEM_NUM);
-    poly = __riscv_vfadd_vf_f32m4(__riscv_vfmul_vv_f32m4(poly, t, VEC_ELEM_NUM),
-                                  a1, VEC_ELEM_NUM);
-    poly = __riscv_vfmul_vv_f32m4(poly, t, VEC_ELEM_NUM);
-
-    fixed_vfloat32m4_t exp_val =
-        FP32Vec16(__riscv_vfneg_v_f32m4(
-                      __riscv_vfmul_vv_f32m4(abs_x, abs_x, VEC_ELEM_NUM),
-                      VEC_ELEM_NUM))
-            .exp()
-            .reg;
-    fixed_vfloat32m4_t res = __riscv_vfrsub_vf_f32m4(
-        __riscv_vfmul_vv_f32m4(poly, exp_val, VEC_ELEM_NUM), 1.0f,
-        VEC_ELEM_NUM);
-
-    vbool8_t mask = __riscv_vmflt_vf_f32m4_b8(reg, 0.0f, VEC_ELEM_NUM);
-    return FP32Vec16(__riscv_vfneg_v_f32m4_m(mask, res, VEC_ELEM_NUM));
-  }
-};
-
-// ============================================================================
-// Type Traits & Global Helpers
-// ============================================================================
-
-template <typename T>
-struct VecType {
-  using vec_type = void;
-  using vec_t = void;
-};
-
-template <typename T>
-using vec_t = typename VecType<T>::vec_type;
-
-template <>
-struct VecType<float> {
-  using vec_type = FP32Vec8;
-  using vec_t = FP32Vec8;
-};
-template <>
-struct VecType<c10::Half> {
-  using vec_type = FP16Vec8;
-  using vec_t = FP16Vec8;
-};
-template <>
-struct VecType<c10::BFloat16> {
-  using vec_type = BF16Vec8;
-  using vec_t = BF16Vec8;
-};
-
-template <typename T>
-void storeFP32(float v, T* ptr) {
-  *ptr = v;
-}
-template <>
-inline void storeFP32<c10::Half>(float v, c10::Half* ptr) {
-  *reinterpret_cast<_Float16*>(ptr) = static_cast<_Float16>(v);
-}
-
-inline FP16Vec16::FP16Vec16(const FP32Vec16& v) {
-  reg = __riscv_vfncvt_f_f_w_f16m2(v.reg, VEC_ELEM_NUM);
-}
-inline FP16Vec8::FP16Vec8(const FP32Vec8& v) {
-  reg = __riscv_vfncvt_f_f_w_f16m1(v.reg, VEC_ELEM_NUM);
-}
-inline FP32Vec16::FP32Vec16(const FP16Vec16& v) {
-  reg = __riscv_vfwcvt_f_f_v_f32m4(v.reg, VEC_ELEM_NUM);
-}
-inline void fma(FP32Vec16& acc, const FP32Vec16& a, const FP32Vec16& b) {
-  acc = acc.fma(a, b);
-}
-
-#ifdef RISCV_BF16_SUPPORT
-template <>
-inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
-  *ptr = static_cast<__bf16>(v);
-};
-inline BF16Vec8::BF16Vec8(const FP32Vec8& v)
-    : reg(__riscv_vfncvtbf16_f_f_w_bf16m1(v.reg, VEC_ELEM_NUM)) {};
-inline BF16Vec16::BF16Vec16(const FP32Vec16& v)
-    : reg(__riscv_vfncvtbf16_f_f_w_bf16m2(v.reg, VEC_ELEM_NUM)) {};
-#else
-template <>
-inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
-  uint32_t val;
-  std::memcpy(&val, &v, 4);
-  *reinterpret_cast<uint16_t*>(ptr) = static_cast<uint16_t>(val >> 16);
-}
-inline BF16Vec8::BF16Vec8(const FP32Vec8& v) : reg_fp32(v.reg) {}
-inline BF16Vec16::BF16Vec16(const FP32Vec16& v) : reg_fp32(v.reg) {}
-#endif
-
-inline void prefetch(const void* addr) { __builtin_prefetch(addr, 0, 1); }
-
-}  // namespace vec_op
-
-#ifndef CPU_KERNEL_GUARD_IN
-  #define CPU_KERNEL_GUARD_IN(NAME)
-#endif
-
-#ifndef CPU_KERNEL_GUARD_OUT
-  #define CPU_KERNEL_GUARD_OUT(NAME)
-#endif
+#include "cpu_types_riscv_defs.hpp"
+#include "cpu_types_riscv_impl.hpp"
 
-#endif  // CPU_TYPES_RISCV_HPP
\ No newline at end of file
+#endif  // CPU_TYPES_RISCV_HPP
diff --git a/csrc/cpu/cpu_types_riscv_defs.hpp b/csrc/cpu/cpu_types_riscv_defs.hpp
new file mode 100644
index 000000000000..8871617f05f2
--- /dev/null
+++ b/csrc/cpu/cpu_types_riscv_defs.hpp
@@ -0,0 +1,102 @@
+#ifndef CPU_TYPES_RISCV_DEFS_HPP
+#define CPU_TYPES_RISCV_DEFS_HPP
+
+// VLEN-to-LMUL mapping for RISC-V Vector extension.
+//
+// LMUL_<N> expands to the LMUL suffix giving N total bits of vector data:
+//   VLEN=128: LMUL_128=m1,  LMUL_256=m2,  LMUL_512=m4,  LMUL_1024=m8
+//   VLEN=256: LMUL_128=mf2, LMUL_256=m1,  LMUL_512=m2,  LMUL_1024=m4
+
+#include <riscv_vector.h>
+
+#if __riscv_v_min_vlen == 128
+  #define LMUL_128 m1
+  #define LMUL_256 m2
+  #define LMUL_512 m4
+  #define LMUL_1024 m8
+  #define BOOL_256 b16
+  #define BOOL_512 b8
+#elif __riscv_v_min_vlen == 256
+  #define LMUL_128 mf2
+  #define LMUL_256 m1
+  #define LMUL_512 m2
+  #define LMUL_1024 m4
+  #define BOOL_256 b32
+  #define BOOL_512 b16
+#else
+  #error "cpu_types_riscv_defs.hpp: unsupported __riscv_v_min_vlen"
+#endif
+
+// Token-paste helpers.
+#define _RVV_P2(a, b) a##b
+#define _RVV_P3(a, b, c) a##b##c
+#define _RVV_P4(a, b, c, d) a##b##c##d
+#define RVVTYPE(base, lmul, suffix) _RVV_P3(base, lmul, suffix)
+#define RVVI(base, lmul) _RVV_P2(base, lmul)
+#define RVVI3(base, lmul, suffix) _RVV_P3(base, lmul, suffix)
+#define RVVI4(a, b, c, d) _RVV_P4(a, b, c, d)
+// For mask intrinsics: RVVIB(base, LMUL_256, BOOL_256) → base##m2##_##b16
+#define _RVV_PB(base, lmul, btype) base##lmul##_##btype
+#define RVVIB(base, lmul, btype) _RVV_PB(base, lmul, btype)
+
+// ---- Semantic fixed-vector typedefs (named by element count) ----
+
+// float16
+typedef RVVTYPE(vfloat16, LMUL_128, _t) fixed_fp16x8_t
+    __attribute__((riscv_rvv_vector_bits(128)));
+typedef RVVTYPE(vfloat16, LMUL_256, _t) fixed_fp16x16_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+
+// float32
+typedef RVVTYPE(vfloat32, LMUL_128, _t) fixed_fp32x4_t
+    __attribute__((riscv_rvv_vector_bits(128)));
+typedef RVVTYPE(vfloat32, LMUL_256, _t) fixed_fp32x8_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+typedef RVVTYPE(vfloat32, LMUL_512, _t) fixed_fp32x16_t
+    __attribute__((riscv_rvv_vector_bits(512)));
+typedef RVVTYPE(vfloat32, LMUL_1024, _t) fixed_fp32x32_t
+    __attribute__((riscv_rvv_vector_bits(1024)));
+
+// int32
+typedef RVVTYPE(vint32, LMUL_256, _t) fixed_i32x8_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+typedef RVVTYPE(vint32, LMUL_512, _t) fixed_i32x16_t
+    __attribute__((riscv_rvv_vector_bits(512)));
+
+// uint16
+typedef RVVTYPE(vuint16, LMUL_128, _t) fixed_u16x8_t
+    __attribute__((riscv_rvv_vector_bits(128)));
+typedef RVVTYPE(vuint16, LMUL_256, _t) fixed_u16x16_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+typedef RVVTYPE(vuint16, LMUL_512, _t) fixed_u16x32_t
+    __attribute__((riscv_rvv_vector_bits(512)));
+
+// uint32
+typedef RVVTYPE(vuint32, LMUL_256, _t) fixed_u32x8_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+
+// bfloat16
+#ifdef __riscv_zvfbfmin
+typedef RVVTYPE(vbfloat16, LMUL_128, _t) fixed_bf16x8_t
+    __attribute__((riscv_rvv_vector_bits(128)));
+typedef RVVTYPE(vbfloat16, LMUL_256, _t) fixed_bf16x16_t
+    __attribute__((riscv_rvv_vector_bits(256)));
+typedef RVVTYPE(vbfloat16, LMUL_512, _t) fixed_bf16x32_t
+    __attribute__((riscv_rvv_vector_bits(512)));
+#endif
+
+// ---- Reduction accumulator type (always m1 = one register of f32) ----
+// Used for scalar reductions; only element [0] is meaningful.
+typedef vfloat32m1_t rvv_f32_accum_t
+    __attribute__((riscv_rvv_vector_bits(__riscv_v_min_vlen)));
+
+// ---- Mask types for f32 elements ----
+#if __riscv_v_min_vlen == 128
+typedef vbool16_t rvv_mask_f32x8_t;
+typedef vbool8_t rvv_mask_f32x16_t;
+#elif __riscv_v_min_vlen == 256
+typedef vbool32_t rvv_mask_f32x8_t;
+typedef vbool16_t rvv_mask_f32x16_t;
+#endif
+
+#endif  // CPU_TYPES_RISCV_DEFS_HPP
diff --git a/csrc/cpu/cpu_types_riscv_impl.hpp b/csrc/cpu/cpu_types_riscv_impl.hpp
new file mode 100644
index 000000000000..d6cae76c45c5
--- /dev/null
+++ b/csrc/cpu/cpu_types_riscv_impl.hpp
@@ -0,0 +1,928 @@
+#ifndef CPU_TYPES_RISCV_IMPL_HPP
+#define CPU_TYPES_RISCV_IMPL_HPP
+
+// Shared implementation of RVV vector-type wrapper classes.
+// This file is VLEN-independent: it uses the semantic type names and
+// RVVI() intrinsic macros from cpu_types_riscv_defs.hpp.
+//
+// DO NOT include this file directly; include cpu_types_riscv.hpp instead.
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <torch/all.h>
+namespace vec_op {
+
+// FP8 KV cache is not supported on RISC-V. These tag types and the
+// corresponding BF16Vec32 stub constructors below exist solely so that
+// templates referencing vec_op::fp8_*_tag in their bodies (e.g. in
+// cpu_attn_vec.hpp) compile under GCC's -Wtemplate-body lookup. The
+// stubs are never instantiated by CPU_ATTN_DISPATCH on __riscv.
+struct fp8_e4m3_tag {};
+struct fp8_e5m2_tag {};
+
+// BFloat16 is always supported on RISC-V: natively when __riscv_zvfbfmin
+// is defined (compiler-provided when -march includes zvfbfmin), otherwise
+// via the FP32-simulation fallback path.
+#define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)         \
+  AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
+  AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)  \
+  AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__)
+
+#define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
+  AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
+
+#define FORCE_INLINE __attribute__((always_inline)) inline
+
+namespace {
+template <typename T, T... indexes, typename F>
+constexpr void unroll_loop_item(std::integer_sequence<T, indexes...>, F&& f) {
+  (f(std::integral_constant<T, indexes>{}), ...);
+};
+}  // namespace
+
+template <typename T, T count, typename F,
+          typename = std::enable_if_t<std::is_invocable_v<F, T>>>
+constexpr void unroll_loop(F&& f) {
+  unroll_loop_item(std::make_integer_sequence<T, count>{}, std::forward<F>(f));
+}
+
+template <typename T>
+struct Vec {
+  constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; };
+};
+
+struct FP32Vec8;
+struct FP32Vec16;
+
+// ============================================================================
+// FP16 Implementation
+// ============================================================================
+
+struct FP16Vec8 : public Vec<FP16Vec8> {
+  constexpr static int VEC_ELEM_NUM = 8;
+  fixed_fp16x8_t reg;
+
+  explicit FP16Vec8(const void* ptr)
+      : reg(RVVI(__riscv_vle16_v_f16, LMUL_128)(
+            static_cast<const _Float16*>(ptr), VEC_ELEM_NUM)) {};
+
+  explicit FP16Vec8(const FP32Vec8&);
+
+  void save(void* ptr) const {
+    RVVI(__riscv_vse16_v_f16, LMUL_128)(static_cast<_Float16*>(ptr), reg,
+                                        VEC_ELEM_NUM);
+  }
+  void save(void* ptr, int elem_num) const {
+    RVVI(__riscv_vse16_v_f16, LMUL_128)(static_cast<_Float16*>(ptr), reg,
+                                        elem_num);
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(_Float16);
+    RVVI(__riscv_vsse16_v_f16, LMUL_128)(static_cast<_Float16*>(ptr),
+                                         byte_stride, reg, VEC_ELEM_NUM);
+  }
+};
+
+struct FP16Vec16 : public Vec<FP16Vec16> {
+  constexpr static int VEC_ELEM_NUM = 16;
+  fixed_fp16x16_t reg;
+
+  explicit FP16Vec16(const void* ptr)
+      : reg(RVVI(__riscv_vle16_v_f16, LMUL_256)(
+            static_cast<const _Float16*>(ptr), VEC_ELEM_NUM)) {};
+
+  explicit FP16Vec16(const FP32Vec16& vec);
+
+  void save(void* ptr) const {
+    RVVI(__riscv_vse16_v_f16, LMUL_256)(static_cast<_Float16*>(ptr), reg,
+                                        VEC_ELEM_NUM);
+  }
+  void save(void* ptr, int elem_num) const {
+    RVVI(__riscv_vse16_v_f16, LMUL_256)(static_cast<_Float16*>(ptr), reg,
+                                        elem_num);
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(_Float16);
+    RVVI(__riscv_vsse16_v_f16, LMUL_256)(static_cast<_Float16*>(ptr),
+                                         byte_stride, reg, VEC_ELEM_NUM);
+  }
+};
+
+// ============================================================================
+// BF16 Implementation
+// ============================================================================
+
+#ifdef __riscv_zvfbfmin
+
+FORCE_INLINE fixed_u16x8_t bf16_to_u16(fixed_bf16x8_t v) {
+  return RVVI4(__riscv_vreinterpret_v_bf16, LMUL_128, _u16, LMUL_128)(v);
+}
+FORCE_INLINE fixed_u16x16_t bf16_to_u16(fixed_bf16x16_t v) {
+  return RVVI4(__riscv_vreinterpret_v_bf16, LMUL_256, _u16, LMUL_256)(v);
+}
+FORCE_INLINE fixed_u16x32_t bf16_to_u16(fixed_bf16x32_t v) {
+  return RVVI4(__riscv_vreinterpret_v_bf16, LMUL_512, _u16, LMUL_512)(v);
+}
+
+struct BF16Vec8 : public Vec<BF16Vec8> {
+  constexpr static int VEC_ELEM_NUM = 8;
+  fixed_bf16x8_t reg;
+
+  explicit BF16Vec8(const void* ptr)
+      : reg(RVVI4(__riscv_vreinterpret_v_u16, LMUL_128, _bf16,
+                  LMUL_128)(RVVI(__riscv_vle16_v_u16, LMUL_128)(
+            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
+
+  explicit BF16Vec8(fixed_bf16x8_t data) : reg(data) {};
+  explicit BF16Vec8(const FP32Vec8&);
+
+  void save(void* ptr) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_128)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), VEC_ELEM_NUM);
+  }
+  void save(void* ptr, int elem_num) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_128)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), elem_num);
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    RVVI(__riscv_vsse16_v_u16, LMUL_128)(reinterpret_cast<uint16_t*>(ptr),
+                                         byte_stride, bf16_to_u16(reg),
+                                         VEC_ELEM_NUM);
+  }
+};
+
+struct BF16Vec16 : public Vec<BF16Vec16> {
+  constexpr static int VEC_ELEM_NUM = 16;
+  fixed_bf16x16_t reg;
+
+  explicit BF16Vec16(const void* ptr)
+      : reg(RVVI4(__riscv_vreinterpret_v_u16, LMUL_256, _bf16,
+                  LMUL_256)(RVVI(__riscv_vle16_v_u16, LMUL_256)(
+            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
+
+  explicit BF16Vec16(fixed_bf16x16_t data) : reg(data) {};
+  explicit BF16Vec16(const FP32Vec16&);
+
+  void save(void* ptr) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_256)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), VEC_ELEM_NUM);
+  }
+  void save(void* ptr, int elem_num) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_256)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), elem_num);
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    RVVI(__riscv_vsse16_v_u16, LMUL_256)(reinterpret_cast<uint16_t*>(ptr),
+                                         byte_stride, bf16_to_u16(reg),
+                                         VEC_ELEM_NUM);
+  }
+};
+
+struct BF16Vec32 : public Vec<BF16Vec32> {
+  constexpr static int VEC_ELEM_NUM = 32;
+  fixed_bf16x32_t reg;
+
+  explicit BF16Vec32(const void* ptr)
+      : reg(RVVI4(__riscv_vreinterpret_v_u16, LMUL_512, _bf16,
+                  LMUL_512)(RVVI(__riscv_vle16_v_u16, LMUL_512)(
+            reinterpret_cast<const uint16_t*>(ptr), VEC_ELEM_NUM))) {};
+
+  explicit BF16Vec32(fixed_bf16x32_t data) : reg(data) {};
+
+  // FP8 KV cache stubs: never instantiated on RISC-V (CPU_ATTN_DISPATCH
+  // omits FP8 cases on __riscv); exist only so name lookup succeeds.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e4m3_tag)
+      : BF16Vec32(static_cast<const void*>(ptr)) {}
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e5m2_tag)
+      : BF16Vec32(static_cast<const void*>(ptr)) {}
+
+  explicit BF16Vec32(const BF16Vec8& v) {
+    fixed_u16x8_t u16_val = bf16_to_u16(v.reg);
+    fixed_u16x32_t u16_combined =
+        RVVI4(__riscv_vcreate_v_u16, LMUL_128, _u16, LMUL_512)(
+            u16_val, u16_val, u16_val, u16_val);
+    reg = RVVI4(__riscv_vreinterpret_v_u16, LMUL_512, _bf16,
+                LMUL_512)(u16_combined);
+  };
+
+  void save(void* ptr) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_512)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), VEC_ELEM_NUM);
+  }
+  void save(void* ptr, int elem_num) const {
+    RVVI(__riscv_vse16_v_u16, LMUL_512)(reinterpret_cast<uint16_t*>(ptr),
+                                        bf16_to_u16(reg), elem_num);
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    RVVI(__riscv_vsse16_v_u16, LMUL_512)(reinterpret_cast<uint16_t*>(ptr),
+                                         byte_stride, bf16_to_u16(reg),
+                                         VEC_ELEM_NUM);
+  }
+};
+
+#else
+// ============================================================================
+// BF16 Fallback Implementation (FP32 Simulation)
+// ============================================================================
+
+struct BF16Vec8 : public Vec<BF16Vec8> {
+  constexpr static int VEC_ELEM_NUM = 8;
+  fixed_fp32x8_t reg_fp32;
+  explicit BF16Vec8(const void* ptr) {
+    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
+    float tmp[8];
+    for (int i = 0; i < 8; ++i) {
+      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
+      std::memcpy(&tmp[i], &v, 4);
+    }
+    reg_fp32 = RVVI(__riscv_vle32_v_f32, LMUL_256)(tmp, 8);
+  }
+  explicit BF16Vec8(const FP32Vec8&);
+  void save(void* ptr) const {
+    float tmp[8];
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(tmp, reg_fp32, 8);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < 8; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+  void save(void* ptr, int elem_num) const {
+    float tmp[8];
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(tmp, reg_fp32, 8);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < elem_num; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    float tmp[8];
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(tmp, reg_fp32, 8);
+    uint8_t* u8 = static_cast<uint8_t*>(ptr);
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    for (int i = 0; i < 8; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      uint16_t val = static_cast<uint16_t>(v >> 16);
+      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
+    }
+  }
+};
+
+struct BF16Vec16 : public Vec<BF16Vec16> {
+  constexpr static int VEC_ELEM_NUM = 16;
+  fixed_fp32x16_t reg_fp32;
+  explicit BF16Vec16(const void* ptr) {
+    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
+    float tmp[16];
+    for (int i = 0; i < 16; ++i) {
+      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
+      std::memcpy(&tmp[i], &v, 4);
+    }
+    reg_fp32 = RVVI(__riscv_vle32_v_f32, LMUL_512)(tmp, 16);
+  }
+  explicit BF16Vec16(const FP32Vec16&);
+  void save(void* ptr) const {
+    float tmp[16];
+    RVVI(__riscv_vse32_v_f32, LMUL_512)(tmp, reg_fp32, 16);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < 16; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+  void save(void* ptr, int elem_num) const {
+    float tmp[16];
+    RVVI(__riscv_vse32_v_f32, LMUL_512)(tmp, reg_fp32, 16);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < elem_num; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    float tmp[16];
+    RVVI(__riscv_vse32_v_f32, LMUL_512)(tmp, reg_fp32, 16);
+    uint8_t* u8 = static_cast<uint8_t*>(ptr);
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    for (int i = 0; i < 16; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      uint16_t val = static_cast<uint16_t>(v >> 16);
+      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
+    }
+  }
+};
+
+struct BF16Vec32 : public Vec<BF16Vec32> {
+  constexpr static int VEC_ELEM_NUM = 32;
+  fixed_fp32x32_t reg_fp32;
+
+  explicit BF16Vec32(const void* ptr) {
+    const uint16_t* u16 = static_cast<const uint16_t*>(ptr);
+    float tmp[32];
+    for (int i = 0; i < 32; ++i) {
+      uint32_t v = static_cast<uint32_t>(u16[i]) << 16;
+      std::memcpy(&tmp[i], &v, 4);
+    }
+    reg_fp32 = RVVI(__riscv_vle32_v_f32, LMUL_1024)(tmp, 32);
+  }
+
+  // FP8 KV cache stubs: never instantiated on RISC-V (CPU_ATTN_DISPATCH
+  // omits FP8 cases on __riscv); exist only so name lookup succeeds.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e4m3_tag)
+      : BF16Vec32(static_cast<const void*>(ptr)) {}
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e5m2_tag)
+      : BF16Vec32(static_cast<const void*>(ptr)) {}
+
+  explicit BF16Vec32(const BF16Vec8& v) {
+    float tmp_small[8];
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(tmp_small, v.reg_fp32, 8);
+    float tmp_large[32];
+    for (int i = 0; i < 4; ++i) {
+      std::memcpy(tmp_large + (i * 8), tmp_small, 8 * sizeof(float));
+    }
+    reg_fp32 = RVVI(__riscv_vle32_v_f32, LMUL_1024)(tmp_large, 32);
+  }
+
+  void save(void* ptr) const {
+    float tmp[32];
+    RVVI(__riscv_vse32_v_f32, LMUL_1024)(tmp, reg_fp32, 32);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < 32; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+
+  void save(void* ptr, int elem_num) const {
+    float tmp[32];
+    RVVI(__riscv_vse32_v_f32, LMUL_1024)(tmp, reg_fp32, 32);
+    uint16_t* u16 = static_cast<uint16_t*>(ptr);
+    for (int i = 0; i < elem_num; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      u16[i] = static_cast<uint16_t>(v >> 16);
+    }
+  }
+
+  void save_strided(void* ptr, ptrdiff_t stride) const {
+    float tmp[32];
+    RVVI(__riscv_vse32_v_f32, LMUL_1024)(tmp, reg_fp32, 32);
+    uint8_t* u8 = static_cast<uint8_t*>(ptr);
+    ptrdiff_t byte_stride = stride * sizeof(uint16_t);
+    for (int i = 0; i < 32; ++i) {
+      uint32_t v;
+      std::memcpy(&v, &tmp[i], 4);
+      uint16_t val = static_cast<uint16_t>(v >> 16);
+      *reinterpret_cast<uint16_t*>(u8 + i * byte_stride) = val;
+    }
+  }
+};
+#endif
+
+// ============================================================================
+// FP32 Implementation
+// ============================================================================
+
+struct FP32Vec4 : public Vec<FP32Vec4> {
+  constexpr static int VEC_ELEM_NUM = 4;
+  fixed_fp32x4_t reg;
+  explicit FP32Vec4(float v)
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_128)(v, VEC_ELEM_NUM)) {};
+  explicit FP32Vec4()
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_128)(0.0f, VEC_ELEM_NUM)) {};
+  explicit FP32Vec4(const float* ptr)
+      : reg(RVVI(__riscv_vle32_v_f32, LMUL_128)(ptr, VEC_ELEM_NUM)) {};
+  explicit FP32Vec4(fixed_fp32x4_t data) : reg(data) {};
+  explicit FP32Vec4(const FP32Vec4& data) : reg(data.reg) {};
+  void save(float* ptr) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_128)(ptr, reg, VEC_ELEM_NUM);
+  }
+  void save(float* ptr, int elem_num) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_128)(ptr, reg, elem_num);
+  }
+};
+
+struct FP32Vec8 : public Vec<FP32Vec8> {
+  constexpr static int VEC_ELEM_NUM = 8;
+  fixed_fp32x8_t reg;
+
+  explicit FP32Vec8(float v)
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_256)(v, VEC_ELEM_NUM)) {};
+  explicit FP32Vec8()
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_256)(0.0f, VEC_ELEM_NUM)) {};
+  explicit FP32Vec8(const float* ptr)
+      : reg(RVVI(__riscv_vle32_v_f32, LMUL_256)(ptr, VEC_ELEM_NUM)) {};
+  explicit FP32Vec8(fixed_fp32x8_t data) : reg(data) {};
+  explicit FP32Vec8(const FP32Vec8& data) : reg(data.reg) {};
+  explicit FP32Vec8(const FP16Vec8& v)
+      : reg(RVVI(__riscv_vfwcvt_f_f_v_f32, LMUL_256)(v.reg, VEC_ELEM_NUM)) {};
+  explicit FP32Vec8(fixed_fp16x8_t v)
+      : reg(RVVI(__riscv_vfwcvt_f_f_v_f32, LMUL_256)(v, VEC_ELEM_NUM)) {};
+
+#ifdef __riscv_zvfbfmin
+  explicit FP32Vec8(fixed_bf16x8_t v)
+      : reg(RVVI(__riscv_vfwcvtbf16_f_f_v_f32, LMUL_256)(v, VEC_ELEM_NUM)) {};
+  explicit FP32Vec8(const BF16Vec8& v)
+      : reg(RVVI(__riscv_vfwcvtbf16_f_f_v_f32, LMUL_256)(v.reg, VEC_ELEM_NUM)) {
+        };
+#else
+  explicit FP32Vec8(const BF16Vec8& v) : reg(v.reg_fp32) {};
+#endif
+
+  float reduce_sum() const {
+    rvv_f32_accum_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
+    scalar = RVVI3(__riscv_vfredusum_vs_f32, LMUL_256, _f32m1)(reg, scalar,
+                                                               VEC_ELEM_NUM);
+    return __riscv_vfmv_f_s_f32m1_f32(scalar);
+  }
+
+  FP32Vec8 operator*(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec8 operator+(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfadd_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec8 operator-(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfsub_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec8 operator/(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfdiv_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+
+  FP32Vec8 min(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfmin_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec8 max(const FP32Vec8& b) const {
+    return FP32Vec8(
+        RVVI(__riscv_vfmax_vv_f32, LMUL_256)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec8 abs() const {
+    return FP32Vec8(RVVI(__riscv_vfabs_v_f32, LMUL_256)(reg, VEC_ELEM_NUM));
+  }
+
+  FP32Vec8 min(const FP32Vec8& b, int elem_num) const {
+    return FP32Vec8(RVVI(__riscv_vfmin_vv_f32, LMUL_256)(reg, b.reg, elem_num));
+  }
+  FP32Vec8 max(const FP32Vec8& b, int elem_num) const {
+    return FP32Vec8(RVVI(__riscv_vfmax_vv_f32, LMUL_256)(reg, b.reg, elem_num));
+  }
+
+  FP32Vec8 clamp(const FP32Vec8& min_v, const FP32Vec8& max_v) const {
+    fixed_fp32x8_t temp =
+        RVVI(__riscv_vfmax_vv_f32, LMUL_256)(min_v.reg, reg, VEC_ELEM_NUM);
+    return FP32Vec8(
+        RVVI(__riscv_vfmin_vv_f32, LMUL_256)(max_v.reg, temp, VEC_ELEM_NUM));
+  }
+
+  void save(float* ptr) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(ptr, reg, VEC_ELEM_NUM);
+  }
+  void save(float* ptr, int elem_num) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_256)(ptr, reg, elem_num);
+  }
+  void save_strided(float* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(float);
+    RVVI(__riscv_vsse32_v_f32, LMUL_256)(ptr, byte_stride, reg, VEC_ELEM_NUM);
+  }
+
+  FP32Vec8 exp() const {
+    // Clamp input to prevent NaN: exp(-inf) must return 0, not NaN.
+    // Without clamping, -inf * 0.0 = NaN in the final poly * scale step.
+    // Matches the clamping strategy used by x86 AVX-512 and ARM NEON.
+    constexpr float exp_lo = -87.3365447505f;  // ln(FLT_MIN)
+    constexpr float exp_hi = 88.7228391117f;   // ln(FLT_MAX)
+    fixed_fp32x8_t x = RVVI(__riscv_vfmin_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmax_vf_f32, LMUL_256)(reg, exp_lo, VEC_ELEM_NUM), exp_hi,
+        VEC_ELEM_NUM);
+
+    const float inv_ln2 = 1.44269504088896341f;
+    fixed_fp32x8_t x_scaled =
+        RVVI(__riscv_vfmul_vf_f32, LMUL_256)(x, inv_ln2, VEC_ELEM_NUM);
+    fixed_i32x8_t n_int =
+        RVVI(__riscv_vfcvt_x_f_v_i32, LMUL_256)(x_scaled, VEC_ELEM_NUM);
+    fixed_fp32x8_t n_float =
+        RVVI(__riscv_vfcvt_f_x_v_f32, LMUL_256)(n_int, VEC_ELEM_NUM);
+
+    fixed_fp32x8_t r =
+        RVVI(__riscv_vfsub_vv_f32, LMUL_256)(x_scaled, n_float, VEC_ELEM_NUM);
+
+    fixed_fp32x8_t poly =
+        RVVI(__riscv_vfmv_v_f_f32, LMUL_256)(0.001333355810164f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, r, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(poly, 0.009618129107628f,
+                                                VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, r, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(poly, 0.055504108664821f,
+                                                VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, r, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(poly, 0.240226506959101f,
+                                                VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, r, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(poly, 0.693147180559945f,
+                                                VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, r, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(poly, 1.0f, VEC_ELEM_NUM);
+
+    fixed_i32x8_t biased_exp =
+        RVVI(__riscv_vadd_vx_i32, LMUL_256)(n_int, 127, VEC_ELEM_NUM);
+    biased_exp =
+        RVVI(__riscv_vmax_vx_i32, LMUL_256)(biased_exp, 0, VEC_ELEM_NUM);
+    fixed_i32x8_t exponent_bits =
+        RVVI(__riscv_vsll_vx_i32, LMUL_256)(biased_exp, 23, VEC_ELEM_NUM);
+    fixed_fp32x8_t scale = RVVI4(__riscv_vreinterpret_v_i32, LMUL_256, _f32,
+                                 LMUL_256)(exponent_bits);
+
+    return FP32Vec8(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, scale, VEC_ELEM_NUM));
+  }
+
+  FP32Vec8 tanh() const {
+    fixed_fp32x8_t x_clamped = RVVI(__riscv_vfmin_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmax_vf_f32, LMUL_256)(reg, -9.0f, VEC_ELEM_NUM), 9.0f,
+        VEC_ELEM_NUM);
+    fixed_fp32x8_t x2 =
+        RVVI(__riscv_vfmul_vf_f32, LMUL_256)(x_clamped, 2.0f, VEC_ELEM_NUM);
+    FP32Vec8 exp_val = FP32Vec8(x2).exp();
+    fixed_fp32x8_t num =
+        RVVI(__riscv_vfsub_vf_f32, LMUL_256)(exp_val.reg, 1.0f, VEC_ELEM_NUM);
+    fixed_fp32x8_t den =
+        RVVI(__riscv_vfadd_vf_f32, LMUL_256)(exp_val.reg, 1.0f, VEC_ELEM_NUM);
+    return FP32Vec8(
+        RVVI(__riscv_vfdiv_vv_f32, LMUL_256)(num, den, VEC_ELEM_NUM));
+  }
+
+  FP32Vec8 er() const {
+    const float p = 0.3275911f, a1 = 0.254829592f, a2 = -0.284496736f,
+                a3 = 1.421413741f, a4 = -1.453152027f, a5 = 1.061405429f;
+    fixed_fp32x8_t abs_x =
+        RVVI(__riscv_vfabs_v_f32, LMUL_256)(reg, VEC_ELEM_NUM);
+
+    fixed_fp32x8_t t = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vf_f32, LMUL_256)(abs_x, p, VEC_ELEM_NUM), 1.0f,
+        VEC_ELEM_NUM);
+    t = RVVI(__riscv_vfrdiv_vf_f32, LMUL_256)(t, 1.0f, VEC_ELEM_NUM);
+
+    fixed_fp32x8_t poly =
+        RVVI(__riscv_vfmv_v_f_f32, LMUL_256)(a5, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, t, VEC_ELEM_NUM), a4,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, t, VEC_ELEM_NUM), a3,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, t, VEC_ELEM_NUM), a2,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, t, VEC_ELEM_NUM), a1,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, t, VEC_ELEM_NUM);
+
+    fixed_fp32x8_t exp_val = FP32Vec8(RVVI(__riscv_vfneg_v_f32, LMUL_256)(
+                                          RVVI(__riscv_vfmul_vv_f32, LMUL_256)(
+                                              abs_x, abs_x, VEC_ELEM_NUM),
+                                          VEC_ELEM_NUM))
+                                 .exp()
+                                 .reg;
+    fixed_fp32x8_t res = RVVI(__riscv_vfrsub_vf_f32, LMUL_256)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_256)(poly, exp_val, VEC_ELEM_NUM), 1.0f,
+        VEC_ELEM_NUM);
+
+    rvv_mask_f32x8_t mask = RVVIB(__riscv_vmflt_vf_f32, LMUL_256, BOOL_256)(
+        reg, 0.0f, VEC_ELEM_NUM);
+    return FP32Vec8(
+        RVVI3(__riscv_vfneg_v_f32, LMUL_256, _m)(mask, res, VEC_ELEM_NUM));
+  }
+};
+
+struct FP32Vec16 : public Vec<FP32Vec16> {
+  constexpr static int VEC_ELEM_NUM = 16;
+  fixed_fp32x16_t reg;
+
+  explicit FP32Vec16(float v)
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_512)(v, VEC_ELEM_NUM)) {};
+  explicit FP32Vec16()
+      : reg(RVVI(__riscv_vfmv_v_f_f32, LMUL_512)(0.0f, VEC_ELEM_NUM)) {};
+  explicit FP32Vec16(const float* ptr)
+      : reg(RVVI(__riscv_vle32_v_f32, LMUL_512)(ptr, VEC_ELEM_NUM)) {};
+  explicit FP32Vec16(fixed_fp32x16_t data) : reg(data) {};
+  explicit FP32Vec16(const FP32Vec8& data)
+      : reg(RVVI4(__riscv_vcreate_v_f32, LMUL_256, _f32, LMUL_512)(
+            data.reg, data.reg)) {};
+  explicit FP32Vec16(const FP32Vec16& data) : reg(data.reg) {};
+  explicit FP32Vec16(const FP16Vec16& v);
+
+#ifdef __riscv_zvfbfmin
+  explicit FP32Vec16(fixed_bf16x16_t v)
+      : reg(RVVI(__riscv_vfwcvtbf16_f_f_v_f32, LMUL_512)(v, VEC_ELEM_NUM)) {};
+  explicit FP32Vec16(const BF16Vec16& v)
+      : reg(RVVI(__riscv_vfwcvtbf16_f_f_v_f32, LMUL_512)(v.reg, VEC_ELEM_NUM)) {
+        };
+#else
+  explicit FP32Vec16(const BF16Vec16& v) : reg(v.reg_fp32) {};
+#endif
+
+  FP32Vec16 operator+(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfadd_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec16 operator-(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfsub_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec16 operator*(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec16 operator/(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfdiv_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+
+  FP32Vec16 fma(const FP32Vec16& a, const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfmacc_vv_f32, LMUL_512)(reg, a.reg, b.reg, VEC_ELEM_NUM));
+  }
+
+  float reduce_sum() const {
+    rvv_f32_accum_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
+    scalar = RVVI3(__riscv_vfredusum_vs_f32, LMUL_512, _f32m1)(reg, scalar,
+                                                               VEC_ELEM_NUM);
+    return __riscv_vfmv_f_s_f32m1_f32(scalar);
+  }
+
+  float reduce_max() const {
+    rvv_f32_accum_t scalar =
+        __riscv_vfmv_s_f_f32m1(std::numeric_limits<float>::lowest(), 1);
+    scalar = RVVI3(__riscv_vfredmax_vs_f32, LMUL_512, _f32m1)(reg, scalar,
+                                                              VEC_ELEM_NUM);
+    return __riscv_vfmv_f_s_f32m1_f32(scalar);
+  }
+
+  float reduce_min() const {
+    rvv_f32_accum_t scalar =
+        __riscv_vfmv_s_f_f32m1(std::numeric_limits<float>::max(), 1);
+    scalar = RVVI3(__riscv_vfredmin_vs_f32, LMUL_512, _f32m1)(reg, scalar,
+                                                              VEC_ELEM_NUM);
+    return __riscv_vfmv_f_s_f32m1_f32(scalar);
+  }
+
+  template <int group_size>
+  float reduce_sub_sum(int idx) {
+    static_assert(VEC_ELEM_NUM % group_size == 0);
+    const int start = idx * group_size;
+    auto indices = RVVI(__riscv_vid_v_u32, LMUL_512)(VEC_ELEM_NUM);
+    rvv_mask_f32x16_t mask = RVVI(__riscv_vmand_mm_, BOOL_512)(
+        RVVIB(__riscv_vmsgeu_vx_u32, LMUL_512, BOOL_512)(indices, start,
+                                                         VEC_ELEM_NUM),
+        RVVIB(__riscv_vmsltu_vx_u32, LMUL_512, BOOL_512)(
+            indices, start + group_size, VEC_ELEM_NUM),
+        VEC_ELEM_NUM);
+    rvv_f32_accum_t scalar = __riscv_vfmv_s_f_f32m1(0.0f, 1);
+    scalar = RVVI3(__riscv_vfredusum_vs_f32, LMUL_512, _f32m1_m)(
+        mask, reg, scalar, VEC_ELEM_NUM);
+    return __riscv_vfmv_f_s_f32m1_f32(scalar);
+  };
+
+  FP32Vec16 max(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfmax_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec16 min(const FP32Vec16& b) const {
+    return FP32Vec16(
+        RVVI(__riscv_vfmin_vv_f32, LMUL_512)(reg, b.reg, VEC_ELEM_NUM));
+  }
+  FP32Vec16 abs() const {
+    return FP32Vec16(RVVI(__riscv_vfabs_v_f32, LMUL_512)(reg, VEC_ELEM_NUM));
+  }
+
+  FP32Vec16 clamp(const FP32Vec16& min_v, const FP32Vec16& max_v) const {
+    return FP32Vec16(RVVI(__riscv_vfmin_vv_f32, LMUL_512)(
+        max_v.reg,
+        RVVI(__riscv_vfmax_vv_f32, LMUL_512)(min_v.reg, reg, VEC_ELEM_NUM),
+        VEC_ELEM_NUM));
+  }
+
+  void save(float* ptr) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_512)(ptr, reg, VEC_ELEM_NUM);
+  }
+  void save(float* ptr, int elem_num) const {
+    RVVI(__riscv_vse32_v_f32, LMUL_512)(ptr, reg, elem_num);
+  }
+  void save_strided(float* ptr, ptrdiff_t stride) const {
+    ptrdiff_t byte_stride = stride * sizeof(float);
+    RVVI(__riscv_vsse32_v_f32, LMUL_512)(ptr, byte_stride, reg, VEC_ELEM_NUM);
+  }
+
+  FP32Vec16 exp() const {
+    // Clamp input to prevent NaN: exp(-inf) must return 0, not NaN.
+    // Without clamping, -inf * 0.0 = NaN in the final poly * scale step.
+    // Matches the clamping strategy used by x86 AVX-512 and ARM NEON.
+    constexpr float exp_lo = -87.3365447505f;  // ln(FLT_MIN)
+    constexpr float exp_hi = 88.7228391117f;   // ln(FLT_MAX)
+    fixed_fp32x16_t x = RVVI(__riscv_vfmin_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmax_vf_f32, LMUL_512)(reg, exp_lo, VEC_ELEM_NUM), exp_hi,
+        VEC_ELEM_NUM);
+
+    const float inv_ln2 = 1.44269504088896341f;
+    fixed_fp32x16_t x_scaled =
+        RVVI(__riscv_vfmul_vf_f32, LMUL_512)(x, inv_ln2, VEC_ELEM_NUM);
+    fixed_i32x16_t n_int =
+        RVVI(__riscv_vfcvt_x_f_v_i32, LMUL_512)(x_scaled, VEC_ELEM_NUM);
+    fixed_fp32x16_t n_float =
+        RVVI(__riscv_vfcvt_f_x_v_f32, LMUL_512)(n_int, VEC_ELEM_NUM);
+    fixed_fp32x16_t r =
+        RVVI(__riscv_vfsub_vv_f32, LMUL_512)(x_scaled, n_float, VEC_ELEM_NUM);
+
+    fixed_fp32x16_t poly =
+        RVVI(__riscv_vfmv_v_f_f32, LMUL_512)(0.001333355810164f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, r, VEC_ELEM_NUM),
+        0.009618129107628f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, r, VEC_ELEM_NUM),
+        0.055504108664821f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, r, VEC_ELEM_NUM),
+        0.240226506959101f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, r, VEC_ELEM_NUM),
+        0.693147180559945f, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, r, VEC_ELEM_NUM), 1.0f,
+        VEC_ELEM_NUM);
+
+    fixed_i32x16_t biased_exp = RVVI(__riscv_vmax_vx_i32, LMUL_512)(
+        RVVI(__riscv_vadd_vx_i32, LMUL_512)(n_int, 127, VEC_ELEM_NUM), 0,
+        VEC_ELEM_NUM);
+    fixed_fp32x16_t scale =
+        RVVI4(__riscv_vreinterpret_v_i32, LMUL_512, _f32, LMUL_512)(
+            RVVI(__riscv_vsll_vx_i32, LMUL_512)(biased_exp, 23, VEC_ELEM_NUM));
+
+    return FP32Vec16(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, scale, VEC_ELEM_NUM));
+  }
+
+  FP32Vec16 tanh() const {
+    fixed_fp32x16_t x_clamped = RVVI(__riscv_vfmin_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmax_vf_f32, LMUL_512)(reg, -9.0f, VEC_ELEM_NUM), 9.0f,
+        VEC_ELEM_NUM);
+    FP32Vec16 exp_val = FP32Vec16(RVVI(__riscv_vfmul_vf_f32, LMUL_512)(
+                                      x_clamped, 2.0f, VEC_ELEM_NUM))
+                            .exp();
+    return FP32Vec16(RVVI(__riscv_vfdiv_vv_f32, LMUL_512)(
+        RVVI(__riscv_vfsub_vf_f32, LMUL_512)(exp_val.reg, 1.0f, VEC_ELEM_NUM),
+        RVVI(__riscv_vfadd_vf_f32, LMUL_512)(exp_val.reg, 1.0f, VEC_ELEM_NUM),
+        VEC_ELEM_NUM));
+  }
+
+  FP32Vec16 er() const {
+    const float p = 0.3275911f, a1 = 0.254829592f, a2 = -0.284496736f,
+                a3 = 1.421413741f, a4 = -1.453152027f, a5 = 1.061405429f;
+    fixed_fp32x16_t abs_x =
+        RVVI(__riscv_vfabs_v_f32, LMUL_512)(reg, VEC_ELEM_NUM);
+    fixed_fp32x16_t t = RVVI(__riscv_vfrdiv_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+            RVVI(__riscv_vfmul_vf_f32, LMUL_512)(abs_x, p, VEC_ELEM_NUM), 1.0f,
+            VEC_ELEM_NUM),
+        1.0f, VEC_ELEM_NUM);
+
+    fixed_fp32x16_t poly =
+        RVVI(__riscv_vfmv_v_f_f32, LMUL_512)(a5, VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, t, VEC_ELEM_NUM), a4,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, t, VEC_ELEM_NUM), a3,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, t, VEC_ELEM_NUM), a2,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfadd_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, t, VEC_ELEM_NUM), a1,
+        VEC_ELEM_NUM);
+    poly = RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, t, VEC_ELEM_NUM);
+
+    fixed_fp32x16_t exp_val =
+        FP32Vec16(RVVI(__riscv_vfneg_v_f32, LMUL_512)(
+                      RVVI(__riscv_vfmul_vv_f32, LMUL_512)(abs_x, abs_x,
+                                                           VEC_ELEM_NUM),
+                      VEC_ELEM_NUM))
+            .exp()
+            .reg;
+    fixed_fp32x16_t res = RVVI(__riscv_vfrsub_vf_f32, LMUL_512)(
+        RVVI(__riscv_vfmul_vv_f32, LMUL_512)(poly, exp_val, VEC_ELEM_NUM), 1.0f,
+        VEC_ELEM_NUM);
+
+    rvv_mask_f32x16_t mask = RVVIB(__riscv_vmflt_vf_f32, LMUL_512, BOOL_512)(
+        reg, 0.0f, VEC_ELEM_NUM);
+    return FP32Vec16(
+        RVVI3(__riscv_vfneg_v_f32, LMUL_512, _m)(mask, res, VEC_ELEM_NUM));
+  }
+};
+
+// ============================================================================
+// Type Traits & Global Helpers
+// ============================================================================
+
+template <typename T>
+struct VecType {
+  using vec_type = void;
+  using vec_t = void;
+};
+
+template <typename T>
+using vec_t = typename VecType<T>::vec_type;
+
+template <>
+struct VecType<float> {
+  using vec_type = FP32Vec8;
+  using vec_t = FP32Vec8;
+};
+template <>
+struct VecType<c10::Half> {
+  using vec_type = FP16Vec8;
+  using vec_t = FP16Vec8;
+};
+template <>
+struct VecType<c10::BFloat16> {
+  using vec_type = BF16Vec8;
+  using vec_t = BF16Vec8;
+};
+
+template <typename T>
+void storeFP32(float v, T* ptr) {
+  *ptr = v;
+}
+template <>
+inline void storeFP32<c10::Half>(float v, c10::Half* ptr) {
+  *reinterpret_cast<_Float16*>(ptr) = static_cast<_Float16>(v);
+}
+
+inline FP16Vec16::FP16Vec16(const FP32Vec16& v) {
+  reg = RVVI(__riscv_vfncvt_f_f_w_f16, LMUL_256)(v.reg, VEC_ELEM_NUM);
+}
+inline FP16Vec8::FP16Vec8(const FP32Vec8& v) {
+  reg = RVVI(__riscv_vfncvt_f_f_w_f16, LMUL_128)(v.reg, VEC_ELEM_NUM);
+}
+inline FP32Vec16::FP32Vec16(const FP16Vec16& v) {
+  reg = RVVI(__riscv_vfwcvt_f_f_v_f32, LMUL_512)(v.reg, VEC_ELEM_NUM);
+}
+inline void fma(FP32Vec16& acc, const FP32Vec16& a, const FP32Vec16& b) {
+  acc = acc.fma(a, b);
+}
+
+#ifdef __riscv_zvfbfmin
+template <>
+inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
+  *ptr = static_cast<__bf16>(v);
+};
+inline BF16Vec8::BF16Vec8(const FP32Vec8& v)
+    : reg(RVVI(__riscv_vfncvtbf16_f_f_w_bf16, LMUL_128)(v.reg, VEC_ELEM_NUM)) {
+      };
+inline BF16Vec16::BF16Vec16(const FP32Vec16& v)
+    : reg(RVVI(__riscv_vfncvtbf16_f_f_w_bf16, LMUL_256)(v.reg, VEC_ELEM_NUM)) {
+      };
+#else
+template <>
+inline void storeFP32<c10::BFloat16>(float v, c10::BFloat16* ptr) {
+  uint32_t val;
+  std::memcpy(&val, &v, 4);
+  *reinterpret_cast<uint16_t*>(ptr) = static_cast<uint16_t>(val >> 16);
+}
+inline BF16Vec8::BF16Vec8(const FP32Vec8& v) : reg_fp32(v.reg) {}
+inline BF16Vec16::BF16Vec16(const FP32Vec16& v) : reg_fp32(v.reg) {}
+#endif
+
+inline void prefetch(const void* addr) { __builtin_prefetch(addr, 0, 1); }
+
+}  // namespace vec_op
+
+#ifndef CPU_KERNEL_GUARD_IN
+  #define CPU_KERNEL_GUARD_IN(NAME)
+#endif
+
+#ifndef CPU_KERNEL_GUARD_OUT
+  #define CPU_KERNEL_GUARD_OUT(NAME)
+#endif
+
+#endif  // CPU_TYPES_RISCV_IMPL_HPP
diff --git a/csrc/cpu/cpu_types_scalar.hpp b/csrc/cpu/cpu_types_scalar.hpp
index f9da78283da5..d1c2fc85933a 100644
--- a/csrc/cpu/cpu_types_scalar.hpp
+++ b/csrc/cpu/cpu_types_scalar.hpp
@@ -6,6 +6,9 @@
 
 namespace vec_op {
 
+struct fp8_e4m3_tag {};
+struct fp8_e5m2_tag {};
+
 #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)            \
   AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__)    \
   AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \
@@ -145,6 +148,9 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
   }
 
   void save(void* ptr) const { *reinterpret_cast<f16x32_t*>(ptr) = reg; }
+
+  explicit BF16Vec32(const uint8_t*, fp8_e4m3_tag) : reg{} {}
+  explicit BF16Vec32(const uint8_t*, fp8_e5m2_tag) : reg{} {}
 };
 
 struct FP32Vec4 : public Vec<FP32Vec4> {
@@ -302,6 +308,10 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
 
   FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {};
 
+  // FP8 stub: dead code on scalar path (fp8 KV cache is x86-only), needed for
+  // load_b_pair_vec template to compile on all platforms.
+  explicit FP32Vec16(const BF16Vec32&, int) : reg{} {}
+
   FP32Vec16 operator*(const FP32Vec16& b) const {
     f32x16_t ret;
     unroll_loop<int, VEC_ELEM_NUM>(
diff --git a/csrc/cpu/cpu_types_vsx.hpp b/csrc/cpu/cpu_types_vsx.hpp
index 089b9840ea2e..87c7a9dd51f4 100644
--- a/csrc/cpu/cpu_types_vsx.hpp
+++ b/csrc/cpu/cpu_types_vsx.hpp
@@ -9,6 +9,10 @@
 
 namespace vec_op {
 
+// FP8 tag types for tag dispatch (see cpu_attn_vec.hpp)
+struct fp8_e4m3_tag {};
+struct fp8_e5m2_tag {};
+
 // FIXME: FP16 is not fully supported in Torch-CPU
 #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)         \
   AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
@@ -142,6 +146,9 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
       : reg({vec8_data.reg, vec8_data.reg, vec8_data.reg, vec8_data.reg}) {}
 
   void save(void* ptr) const { *reinterpret_cast<ss16x8x4_t*>(ptr) = reg; }
+
+  explicit BF16Vec32(const uint8_t*, fp8_e4m3_tag) : reg{} {}
+  explicit BF16Vec32(const uint8_t*, fp8_e5m2_tag) : reg{} {}
 };
 
 struct FP32Vec4 : public Vec<FP32Vec4> {
@@ -404,6 +411,10 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
 
   explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}
 
+  // FP8 stub: dead code on PowerPC (fp8 KV cache is x86-only), needed for
+  // load_b_pair_vec template to compile on all platforms.
+  explicit FP32Vec16(const BF16Vec32&, int) : reg{} {}
+
   explicit FP32Vec16(const INT32Vec16& v) {
     reg.val[0] = vec_ctf(v.reg.val[0], 0);
     reg.val[1] = vec_ctf(v.reg.val[1], 0);
diff --git a/csrc/cpu/cpu_types_vxe.hpp b/csrc/cpu/cpu_types_vxe.hpp
index 700ba0306239..2e0af466b649 100644
--- a/csrc/cpu/cpu_types_vxe.hpp
+++ b/csrc/cpu/cpu_types_vxe.hpp
@@ -8,6 +8,9 @@
 #include <torch/all.h>
 namespace vec_op {
 
+struct fp8_e4m3_tag {};
+struct fp8_e5m2_tag {};
+
 #define vec_neg(a) (-(a))
 #define vec_add(a, b) ((a) + (b))
 #define vec_sub(a, b) ((a) - (b))
@@ -241,6 +244,9 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
   explicit BF16Vec32(const BF16Vec8& vec8_data)
       : reg({vec8_data.reg, vec8_data.reg, vec8_data.reg, vec8_data.reg}) {}
 
+  explicit BF16Vec32(const uint8_t*, fp8_e4m3_tag) : reg{} {}
+  explicit BF16Vec32(const uint8_t*, fp8_e5m2_tag) : reg{} {}
+
   void save(void* ptr) const { *reinterpret_cast<ss16x8x4_t*>(ptr) = reg; }
 };
 
@@ -682,6 +688,10 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
 
   explicit FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}
 
+  // FP8 stub: dead code on s390x (fp8 KV cache is x86-only), needed for
+  // load_b_pair_vec template to compile on all platforms.
+  explicit FP32Vec16(const BF16Vec32&, int) : reg{} {}
+
   FP32Vec16 operator*(const FP32Vec16& b) const {
     return FP32Vec16(f32x4x4_t({vec_mul(reg.val[0], b.reg.val[0]),
                                 vec_mul(reg.val[1], b.reg.val[1]),
diff --git a/csrc/cpu/cpu_types_x86.hpp b/csrc/cpu/cpu_types_x86.hpp
index d94af338ac1c..396b9b7e041f 100644
--- a/csrc/cpu/cpu_types_x86.hpp
+++ b/csrc/cpu/cpu_types_x86.hpp
@@ -11,6 +11,17 @@ static_assert(false, "AVX2 must be supported for the current implementation.");
 
 namespace vec_op {
 
+// Tags for FP8 BF16Vec32 constructors (avoid overload collision with
+// BF16Vec32(void*)).
+// VEC path (FP8 → pseudo-FP16 layout, scale correction applied later):
+struct fp8_e4m3_tag {};  // E4M3 → pseudo-FP16; BF16 value = true_E4M3 * 2^-8
+struct fp8_e5m2_tag {};  // E5M2 → FP16 bits directly (same exponent bias=15)
+// AMX path (FP8 → unscaled BF16, no FP32 round-trip):
+// BF16 value = true_E4M3 * 2^-120 (E4M3) or true_E5M2 * 2^-112 (E5M2).
+// Exponent rebiasing is folded into k/v scales by the caller.
+struct fp8_bf16_e4m3_tag {};
+struct fp8_bf16_e5m2_tag {};
+
 #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)            \
   AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__)    \
   AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \
@@ -111,9 +122,17 @@ struct FP16Vec16 : public Vec<FP16Vec16> {
   void save(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, reg); }
 
   void save(void* ptr, const int elem_num) const {
+#ifdef __AVX512BW__
     constexpr uint32_t M = 0xFFFFFFFF;
     __mmask16 mask = _cvtu32_mask16(M >> (32 - elem_num));
     _mm256_mask_storeu_epi16(ptr, mask, reg);
+#else
+    // Fallback for lack of 16-bit masked store
+    int16_t tmp[VEC_ELEM_NUM];
+    _mm256_storeu_si256((__m256i*)tmp, reg);
+    for (int i = 0; i < elem_num; ++i)
+      reinterpret_cast<int16_t*>(ptr)[i] = tmp[i];
+#endif
   }
 };
 
@@ -150,9 +169,17 @@ struct BF16Vec16 : public Vec<BF16Vec16> {
   void save(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, reg); }
 
   void save(void* ptr, const int elem_num) const {
+#ifdef __AVX512BW__
     constexpr uint32_t M = 0xFFFFFFFF;
     __mmask16 mask = _cvtu32_mask16(M >> (32 - elem_num));
     _mm256_mask_storeu_epi16(ptr, mask, reg);
+#else
+    // Fallback for lack of 16-bit masked store
+    int16_t tmp[VEC_ELEM_NUM];
+    _mm256_storeu_si256((__m256i*)tmp, reg);
+    for (int i = 0; i < elem_num; ++i)
+      reinterpret_cast<int16_t*>(ptr)[i] = tmp[i];
+#endif
   }
 };
 
@@ -176,6 +203,50 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
                                (__m128i)vec8_data.reg, 2),
             (__m128i)vec8_data.reg, 3)) {}
 
+  // Decode 32 FP8-E4M3 bytes to pseudo-FP16 layout (stored in the BF16
+  // register).  Result = true_E4M3 * 2^-8; caller applies scale * 2^8.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e4m3_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m512i b16 = _mm512_cvtepu8_epi16(b8);
+    __m512i sign =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x80)), 8);
+    __m512i payload =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x7F)), 7);
+    reg = _mm512_or_si512(sign, payload);
+  }
+
+  // Decode 32 FP8-E5M2 bytes to FP16 layout.
+  // E5M2 and FP16 share the same 5-bit exponent bias (15), so FP8 byte b maps
+  // directly to FP16 bits by shifting left 8 — no sign/payload reconstruction.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e5m2_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    reg = _mm512_slli_epi16(_mm512_cvtepu8_epi16(b8), 8);
+  }
+
+  // Direct FP8-E4M3 → unscaled BF16 for AMX (no FP32 round-trip).
+  // BF16 value = true_E4M3 * 2^-120; exponent rebiasing folded into k/v scales.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_bf16_e4m3_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m512i b16 = _mm512_cvtepu8_epi16(b8);
+    __m512i sign =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x80)), 8);
+    __m512i payload =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x7F)), 4);
+    reg = _mm512_or_si512(sign, payload);
+  }
+
+  // Direct FP8-E5M2 → unscaled BF16 for AMX (no FP32 round-trip).
+  // BF16 value = true_E5M2 * 2^-112; exponent rebiasing folded into k/v scales.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_bf16_e5m2_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m512i b16 = _mm512_cvtepu8_epi16(b8);
+    __m512i sign =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x80)), 8);
+    __m512i payload =
+        _mm512_slli_epi16(_mm512_and_si512(b16, _mm512_set1_epi16(0x7F)), 5);
+    reg = _mm512_or_si512(sign, payload);
+  }
+
   void save(void* ptr) const { *reinterpret_cast<__m512i*>(ptr) = reg; }
 };
 #else
@@ -192,13 +263,83 @@ struct BF16Vec32 : public Vec<BF16Vec32> {
   explicit BF16Vec32(__m256i low, __m256i high)
       : reg_low(low), reg_high(high) {}
 
+  explicit BF16Vec32()
+      : reg_low(_mm256_setzero_si256()), reg_high(_mm256_setzero_si256()) {}
+
   explicit BF16Vec32(BF16Vec8& vec8_data)
-      : reg_low((__m256i)_mm256_inserti32x4(
-            _mm256_castsi128_si256((__m128i)vec8_data.reg),
-            (__m128i)vec8_data.reg, 1)),
-        reg_high((__m256i)_mm256_inserti32x4(
-            _mm256_castsi128_si256((__m128i)vec8_data.reg),
-            (__m128i)vec8_data.reg, 1)) {}
+      : reg_low(_mm256_broadcastsi128_si256((__m128i)vec8_data.reg)),
+        reg_high(_mm256_broadcastsi128_si256((__m128i)vec8_data.reg)) {}
+
+  // E4M3 decode (AVX2 path) — same bit-layout trick as the AVX512 variant
+  // above.  Result = true_E4M3 * 2^-8; caller applies scale * 2^8.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e4m3_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m128i b8_low = _mm256_extracti128_si256(b8, 0);
+    __m128i b8_high = _mm256_extracti128_si256(b8, 1);
+    __m256i b16_low = _mm256_cvtepu8_epi16(b8_low);
+    __m256i b16_high = _mm256_cvtepu8_epi16(b8_high);
+
+    __m256i sign_low = _mm256_slli_epi16(
+        _mm256_and_si256(b16_low, _mm256_set1_epi16(0x80)), 8);
+    __m256i payload_low = _mm256_slli_epi16(
+        _mm256_and_si256(b16_low, _mm256_set1_epi16(0x7F)), 7);
+    __m256i sign_high = _mm256_slli_epi16(
+        _mm256_and_si256(b16_high, _mm256_set1_epi16(0x80)), 8);
+    __m256i payload_high = _mm256_slli_epi16(
+        _mm256_and_si256(b16_high, _mm256_set1_epi16(0x7F)), 7);
+    reg_low = _mm256_or_si256(sign_low, payload_low);
+    reg_high = _mm256_or_si256(sign_high, payload_high);
+  }
+
+  // E5M2 decode (AVX2 path) — b << 8 maps to FP16 bits; see AVX512 variant
+  // above.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_e5m2_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m128i b8_low = _mm256_extracti128_si256(b8, 0);
+    __m128i b8_high = _mm256_extracti128_si256(b8, 1);
+    reg_low = _mm256_slli_epi16(_mm256_cvtepu8_epi16(b8_low), 8);
+    reg_high = _mm256_slli_epi16(_mm256_cvtepu8_epi16(b8_high), 8);
+  }
+
+  // Direct FP8-E4M3 → unscaled BF16 for AMX (AVX2 path, no FP32 round-trip).
+  // BF16 value = true_E4M3 * 2^-120; exponent rebiasing folded into k/v scales.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_bf16_e4m3_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m128i b8_low = _mm256_extracti128_si256(b8, 0);
+    __m128i b8_high = _mm256_extracti128_si256(b8, 1);
+    __m256i b16_low = _mm256_cvtepu8_epi16(b8_low);
+    __m256i b16_high = _mm256_cvtepu8_epi16(b8_high);
+    reg_low = _mm256_or_si256(
+        _mm256_slli_epi16(_mm256_and_si256(b16_low, _mm256_set1_epi16(0x80)),
+                          8),
+        _mm256_slli_epi16(_mm256_and_si256(b16_low, _mm256_set1_epi16(0x7F)),
+                          4));
+    reg_high = _mm256_or_si256(
+        _mm256_slli_epi16(_mm256_and_si256(b16_high, _mm256_set1_epi16(0x80)),
+                          8),
+        _mm256_slli_epi16(_mm256_and_si256(b16_high, _mm256_set1_epi16(0x7F)),
+                          4));
+  }
+
+  // Direct FP8-E5M2 → unscaled BF16 for AMX (AVX2 path, no FP32 round-trip).
+  // BF16 value = true_E5M2 * 2^-112; exponent rebiasing folded into k/v scales.
+  explicit BF16Vec32(const uint8_t* ptr, fp8_bf16_e5m2_tag) {
+    __m256i b8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+    __m128i b8_low = _mm256_extracti128_si256(b8, 0);
+    __m128i b8_high = _mm256_extracti128_si256(b8, 1);
+    __m256i b16_low = _mm256_cvtepu8_epi16(b8_low);
+    __m256i b16_high = _mm256_cvtepu8_epi16(b8_high);
+    reg_low = _mm256_or_si256(
+        _mm256_slli_epi16(_mm256_and_si256(b16_low, _mm256_set1_epi16(0x80)),
+                          8),
+        _mm256_slli_epi16(_mm256_and_si256(b16_low, _mm256_set1_epi16(0x7F)),
+                          5));
+    reg_high = _mm256_or_si256(
+        _mm256_slli_epi16(_mm256_and_si256(b16_high, _mm256_set1_epi16(0x80)),
+                          8),
+        _mm256_slli_epi16(_mm256_and_si256(b16_high, _mm256_set1_epi16(0x7F)),
+                          5));
+  }
 
   void save(void* ptr) const {
     _mm256_storeu_si256((__m256i*)ptr, reg_low);
@@ -390,6 +531,11 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
       : reg(_mm512_castsi512_ps(
             _mm512_bslli_epi128(_mm512_cvtepu16_epi32(v.reg), 2))) {}
 
+  explicit FP32Vec16(const BF16Vec32& v, int upper) {
+    __m256i v_half_i = _mm512_extracti32x8_epi32(v.reg, upper);
+    reg = _mm512_cvtph_ps(v_half_i);
+  }
+
   explicit FP32Vec16(const FP16Vec16& v) : reg(_mm512_cvtph_ps(v.reg)) {}
 
   explicit FP32Vec16(const FP16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}
@@ -494,6 +640,14 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
   explicit FP32Vec16(const FP32Vec8& data)
       : reg_low(data.reg), reg_high(data.reg) {}
 
+  explicit FP32Vec16(const BF16Vec32& v, int upper) {
+    const __m256i& half = upper ? v.reg_high : v.reg_low;
+    __m128i lo = _mm256_extractf128_si256(half, 0);
+    __m128i hi = _mm256_extractf128_si256(half, 1);
+    reg_low = _mm256_cvtph_ps(lo);
+    reg_high = _mm256_cvtph_ps(hi);
+  }
+
   explicit FP32Vec16(const FP16Vec16& v) {
     __m128i low = _mm256_extractf128_si256(v.reg, 0);
     __m128i high = _mm256_extractf128_si256(v.reg, 1);
@@ -535,6 +689,11 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
                      _mm256_sub_ps(reg_high, b.reg_high));
   }
 
+  FP32Vec16 operator-() const {
+    const __m256 neg = _mm256_set1_ps(-0.0f);
+    return FP32Vec16(_mm256_xor_ps(reg_low, neg), _mm256_xor_ps(reg_high, neg));
+  }
+
   FP32Vec16 operator/(const FP32Vec16& b) const {
     return FP32Vec16(_mm256_div_ps(reg_low, b.reg_low),
                      _mm256_div_ps(reg_high, b.reg_high));
@@ -600,6 +759,85 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
     _mm256_storeu_ps(ptr, reg_low);
     _mm256_storeu_ps(ptr + 8, reg_high);
   }
+
+  void save(float* ptr, const int elem_num) const {
+    // Partial store: cmpgt produces a sign-bit mask (0xFFFFFFFF/0 per lane)
+    // for the first elem_num lanes, applied across the two 8-wide halves.
+    if (elem_num <= 8) {
+      __m256i mask =
+          _mm256_cmpgt_epi32(_mm256_set1_epi32(elem_num),
+                             _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7));
+      _mm256_maskstore_ps(ptr, mask, reg_low);
+    } else {
+      _mm256_storeu_ps(ptr, reg_low);
+      __m256i mask =
+          _mm256_cmpgt_epi32(_mm256_set1_epi32(elem_num - 8),
+                             _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7));
+      _mm256_maskstore_ps(ptr + 8, mask, reg_high);
+    }
+  }
+
+  FP32Vec16 clamp(const FP32Vec16& min, const FP32Vec16& max) const {
+    return FP32Vec16(
+        _mm256_min_ps(max.reg_low, _mm256_max_ps(min.reg_low, reg_low)),
+        _mm256_min_ps(max.reg_high, _mm256_max_ps(min.reg_high, reg_high)));
+  }
+
+  FP32Vec16 abs() const {
+    const __m256 sign_mask = _mm256_set1_ps(-0.0f);
+    return FP32Vec16(_mm256_andnot_ps(sign_mask, reg_low),
+                     _mm256_andnot_ps(sign_mask, reg_high));
+  }
+
+  FP32Vec16 min(const FP32Vec16& b) const {
+    return FP32Vec16(_mm256_min_ps(reg_low, b.reg_low),
+                     _mm256_min_ps(reg_high, b.reg_high));
+  }
+
+  // Partial element-wise min over the first elem_num lanes only (tail path).
+  // Scalar via AliasReg: AVX2 has no masked vminps, so we spill, loop, reload.
+  FP32Vec16 min(const FP32Vec16& b, const int elem_num) const {
+    AliasReg ar_this_low, ar_this_high, ar_b_low, ar_b_high;
+    ar_this_low.reg = reg_low;
+    ar_this_high.reg = reg_high;
+    ar_b_low.reg = b.reg_low;
+    ar_b_high.reg = b.reg_high;
+    for (int i = 0; i < elem_num && i < 8; ++i)
+      ar_this_low.values[i] =
+          std::min(ar_this_low.values[i], ar_b_low.values[i]);
+    for (int i = 0; i < elem_num - 8 && i < 8; ++i)
+      ar_this_high.values[i] =
+          std::min(ar_this_high.values[i], ar_b_high.values[i]);
+    return FP32Vec16(ar_this_low.reg, ar_this_high.reg);
+  }
+
+  // Partial element-wise max over the first elem_num lanes only (tail path).
+  // Scalar via AliasReg: AVX2 has no masked vmaxps, so we spill, loop, reload.
+  FP32Vec16 max(const FP32Vec16& b, const int elem_num) const {
+    AliasReg ar_this_low, ar_this_high, ar_b_low, ar_b_high;
+    ar_this_low.reg = reg_low;
+    ar_this_high.reg = reg_high;
+    ar_b_low.reg = b.reg_low;
+    ar_b_high.reg = b.reg_high;
+    for (int i = 0; i < elem_num && i < 8; ++i)
+      ar_this_low.values[i] =
+          std::max(ar_this_low.values[i], ar_b_low.values[i]);
+    for (int i = 0; i < elem_num - 8 && i < 8; ++i)
+      ar_this_high.values[i] =
+          std::max(ar_this_high.values[i], ar_b_high.values[i]);
+    return FP32Vec16(ar_this_low.reg, ar_this_high.reg);
+  }
+
+  float reduce_min() const {
+    __m256 v = _mm256_min_ps(reg_low, reg_high);
+    __m256 v_shuffled = _mm256_permute_ps(v, 0b00001011);
+    __m256 v_min = _mm256_min_ps(v, v_shuffled);
+    v_shuffled = _mm256_permute_ps(v_min, 0b00000001);
+    v_min = _mm256_min_ps(v_min, v_shuffled);
+    v_shuffled = _mm256_permute2f128_ps(v_min, v_min, 0b00000001);
+    v_min = _mm256_min_ps(v_min, v_shuffled);
+    return _mm256_cvtss_f32(v_min);
+  }
 };
 #endif
 
@@ -652,6 +890,34 @@ struct INT8Vec64 : public Vec<INT8Vec64> {
   // non-temporal save
   void nt_save(int8_t* ptr) { _mm512_stream_si512((__m512i*)ptr, reg); }
 };
+#else
+struct INT8Vec16 : public Vec<INT8Vec16> {
+  constexpr static int VEC_ELEM_NUM = 16;
+  union AliasReg {
+    __m128i reg;
+    int8_t values[VEC_ELEM_NUM];
+  };
+
+  __m128i reg;
+
+  explicit INT8Vec16(const FP32Vec16& vec) {
+    __m256i lo_i32 = _mm256_cvtps_epi32(vec.reg_low);
+    __m256i hi_i32 = _mm256_cvtps_epi32(vec.reg_high);
+    __m256i packed16 = _mm256_packs_epi32(lo_i32, hi_i32);
+    packed16 = _mm256_permute4x64_epi64(packed16, 0xD8);
+    __m256i packed8 = _mm256_packs_epi16(packed16, _mm256_setzero_si256());
+    packed8 = _mm256_permute4x64_epi64(packed8, 0xD8);
+    reg = _mm256_castsi256_si128(packed8);
+  }
+
+  void save(int8_t* ptr) const { _mm_storeu_si128((__m128i*)ptr, reg); }
+
+  void save(int8_t* ptr, const int elem_num) const {
+    AliasReg ar;
+    ar.reg = reg;
+    for (int i = 0; i < elem_num; ++i) ptr[i] = ar.values[i];
+  }
+};
 #endif
 
 template <typename T>
diff --git a/csrc/cpu/dnnl_kernels.cpp b/csrc/cpu/dnnl_kernels.cpp
index 80be42bb7639..058fe25b0e26 100644
--- a/csrc/cpu/dnnl_kernels.cpp
+++ b/csrc/cpu/dnnl_kernels.cpp
@@ -215,7 +215,7 @@ void dynamic_quant_epilogue(const float* input, scalar_t* output,
         float zp_scale_val = a_scale[i] * static_cast<float>(azp[i]);
         token_zp_scale_vec = cvt_vec_t(zp_scale_val);
       }
-      for (; j < hidden_size - vec_elem_num; ++j) {
+      for (; j < hidden_size - vec_elem_num; j += vec_elem_num) {
         cvt_vec_t elems_fp32(input_ptr + j);
         elems_fp32 = elems_fp32 * token_scale_vec;
         if constexpr (AZP) {
diff --git a/csrc/cpu/generate_cpu_attn_dispatch.py b/csrc/cpu/generate_cpu_attn_dispatch.py
index f1d08017feae..7c7123a6def5 100644
--- a/csrc/cpu/generate_cpu_attn_dispatch.py
+++ b/csrc/cpu/generate_cpu_attn_dispatch.py
@@ -8,7 +8,7 @@
 import os
 
 # Head dimensions divisible by 32 (support all ISAs)
-HEAD_DIMS_32 = [32, 64, 96, 128, 160, 192, 224, 256]
+HEAD_DIMS_32 = [32, 64, 96, 128, 160, 192, 224, 256, 512]
 
 # Head dimensions divisible by 16 but not 32 (VEC16 only)
 HEAD_DIMS_16 = [80, 112]
@@ -20,73 +20,99 @@
     "VEC16": 2,
     "NEON": 3,
     "VXE": 4,
+    "RVV": 5,
+    "VSX": 6,
+}
+
+# KV cache index: 0 = auto (same as scalar_t), 1 = fp8_e4m3, 2 = fp8_e5m2
+KV_CACHE_IDX = {
+    "auto": 0,
+    "fp8_e4m3": 1,
+    "fp8_e5m2": 2,
+}
+
+# C++ type for each kv_cache index
+KV_CACHE_CPP_TYPES = {
+    "auto": "scalar_t",
+    "fp8_e4m3": "c10::Float8_e4m3fn",
+    "fp8_e5m2": "c10::Float8_e5m2",
 }
 
 # ISAs supported for head_dims divisible by 32
-ISA_FOR_32 = ["AMX", "NEON", "VEC", "VEC16", "VXE"]
+ISA_FOR_32 = ["AMX", "NEON", "VEC", "VEC16", "VXE", "RVV", "VSX"]
 
 # ISAs supported for head_dims divisible by 16 only
 ISA_FOR_16 = ["VEC16"]
 
+# ISAs that support FP8 KV cache (x86 AVX2/AVX-512 required)
+ISA_FOR_FP8 = ["AMX", "VEC"]
 
-def encode_params(head_dim: int, isa_type: str) -> int:
-    """Encode head_dim and ISA type into a single int64_t."""
+
+def encode_params(head_dim: int, isa_type: str, kv_cache: str = "auto") -> int:
+    """Encode head_dim, ISA type, and KV cache type into a single int64_t."""
     isa_val = ISA_TYPES[isa_type]
-    # Encoding: (head_dim << 8) | isa_type
-    # This allows head_dim up to 2^56 - 1 and 256 ISA types
-    return (head_dim << 8) | isa_val
+    kv_val = KV_CACHE_IDX[kv_cache]
+    # Encoding: (head_dim << 16) | (kv_cache_idx << 8) | isa_type
+    # This allows head_dim up to 2^48 - 1, 256 KV cache types, and 256 ISA types
+    return (head_dim << 16) | (kv_val << 8) | isa_val
+
+
+def _make_case(
+    head_dim: int, isa: str, kv_cache: str = "auto", isa_override: str | None = None
+) -> str:
+    """Generate a single switch case line."""
+    encoded = encode_params(head_dim, isa, kv_cache)
+    actual_isa = isa_override if isa_override else isa
+    cpp_type = KV_CACHE_CPP_TYPES[kv_cache]
+    attn_impl = (
+        f"cpu_attention::AttentionImpl<"
+        f"cpu_attention::ISA::{actual_isa}, \\\n"
+        f"                                                       "
+        f"scalar_t, head_dim, {cpp_type}>"
+    )
+    comment = (
+        f"head_dim={head_dim}, isa={isa}"
+        if kv_cache == "auto"
+        else f"head_dim={head_dim}, isa={isa}, kv_cache={kv_cache}"
+    )
+    return (
+        f"""      case {encoded}LL: {{ """
+        f"""/* {comment} */ \\"""
+        f"""
+        constexpr size_t head_dim = {head_dim}; \\"""
+        f"""
+        using attn_impl = {attn_impl}; \\"""
+        f"""
+        return __VA_ARGS__(); \\"""
+        f"""
+      }} \\"""
+    )
 
 
-def generate_cases_for_isa_group(isa_list: list[str]) -> str:
+def generate_cases_for_isa_group(isa_list: list[str], include_fp8: bool = False) -> str:
     """Generate switch cases for a specific ISA group."""
     cases = []
 
-    # Generate cases for head_dims divisible by 32
+    # Non-FP8 cases for head_dims divisible by 32
     for head_dim in HEAD_DIMS_32:
         for isa in isa_list:
             if isa not in ISA_FOR_32:
                 continue
-            encoded = encode_params(head_dim, isa)
-            case_str = (
-                f"""      case {encoded}LL: {{ """
-                f"""/* head_dim={head_dim}, isa={isa} */ \\"""
-                f"""
-        constexpr size_t head_dim = {head_dim}; \\"""
-                f"""
-        using attn_impl = cpu_attention::AttentionImpl<"""
-                f"""cpu_attention::ISA::{isa}, \\"""
-                f"""
-                                                       """
-                f"""scalar_t, head_dim>; \\"""
-                f"""
-        return __VA_ARGS__(); \\"""
-                f"""
-      }} \\"""
-            )
-            cases.append(case_str)
+            cases.append(_make_case(head_dim, isa, "auto"))
 
-    # Generate cases for head_dims divisible by 16 only
+    # Non-FP8 cases for head_dims divisible by 16 only
     for head_dim in HEAD_DIMS_16:
         for isa in isa_list:
-            encoded = encode_params(head_dim, isa)
-            case_str = (
-                f"""      case {encoded}LL: {{ """
-                f"""/* head_dim={head_dim}, isa={isa} """
-                f"""(using VEC16) */ \\"""
-                f"""
-        constexpr size_t head_dim = {head_dim}; \\"""
-                f"""
-        using attn_impl = cpu_attention::AttentionImpl<"""
-                f"""cpu_attention::ISA::VEC16, \\"""
-                f"""
-                                                       """
-                f"""scalar_t, head_dim>; \\"""
-                f"""
-        return __VA_ARGS__(); \\"""
-                f"""
-      }} \\"""
-            )
-            cases.append(case_str)
+            cases.append(_make_case(head_dim, isa, "auto", isa_override="VEC16"))
+
+    # FP8 cases: only AMX and VEC, only head_dims divisible by 32
+    if include_fp8:
+        for fp8_type in ("fp8_e4m3", "fp8_e5m2"):
+            for head_dim in HEAD_DIMS_32:
+                for isa in isa_list:
+                    if isa not in ISA_FOR_FP8:
+                        continue
+                    cases.append(_make_case(head_dim, isa, fp8_type))
 
     return "\n".join(cases)
 
@@ -94,8 +120,9 @@ def generate_cases_for_isa_group(isa_list: list[str]) -> str:
 def generate_helper_function() -> str:
     """Generate helper function to encode parameters."""
     return """
-inline int64_t encode_cpu_attn_params(int64_t head_dim, cpu_attention::ISA isa) {
-  return (head_dim << 8) | static_cast<int64_t>(isa);
+inline int64_t encode_cpu_attn_params(int64_t head_dim, cpu_attention::ISA isa,
+                                      int64_t kv_cache_idx = 0) {
+  return (head_dim << 16) | (kv_cache_idx << 8) | static_cast<int64_t>(isa);
 }
 """
 
@@ -123,93 +150,115 @@ def generate_header_file() -> str:
   #include "cpu_attn_vxe.hpp"
 #endif
 
-"""
-
-    header += generate_helper_function()
-
-    # Generate dispatch macro with conditional compilation for different ISA sets
-    header += """
-// Dispatch macro using encoded parameters
-"""
-
-    # x86_64 with AMX
-    header += """#if defined(CPU_CAPABILITY_AMXBF16)
-#define CPU_ATTN_DISPATCH(HEAD_DIM, ISA_TYPE, ...) \\
-  [&] { \\
-    int64_t encoded_params = encode_cpu_attn_params(HEAD_DIM, ISA_TYPE); \\
-    switch (encoded_params) { \\
-"""
-    header += generate_cases_for_isa_group(["AMX", "VEC", "VEC16"])
-    header += """
-      default: { \\
-        TORCH_CHECK(false, "Unsupported CPU attention configuration: head_dim=" + \\
-                    std::to_string(HEAD_DIM) + " isa=" + \\
-                    std::to_string(static_cast<int>(ISA_TYPE))); \\
-      } \\
-    } \\
-  }()
+// cpu_attn_rvv.hpp supports VLEN=128 and VLEN=256 via RVVI() macros.
+// Other VLENs and scalar RISC-V builds skip it entirely.
+#if defined(__riscv) && defined(__riscv_v_min_vlen) && \
+    (__riscv_v_min_vlen == 128 || __riscv_v_min_vlen == 256)
+  #include "cpu_attn_rvv.hpp"
+#endif
 
-"""
+#ifdef __powerpc__
+  #include "cpu_attn_vsx.hpp"
+#endif
 
-    # ARM64 with NEON
-    header += """#elif defined(__aarch64__)
-#define CPU_ATTN_DISPATCH(HEAD_DIM, ISA_TYPE, ...) \\
-  [&] { \\
-    int64_t encoded_params = encode_cpu_attn_params(HEAD_DIM, ISA_TYPE); \\
-    switch (encoded_params) { \\
 """
-    header += generate_cases_for_isa_group(["NEON", "VEC", "VEC16"])
-    header += """
-      default: { \\
-        TORCH_CHECK(false, "Unsupported CPU attention configuration: head_dim=" + \\
-                    std::to_string(HEAD_DIM) + " isa=" + \\
-                    std::to_string(static_cast<int>(ISA_TYPE))); \\
-      } \\
-    } \\
-  }()
 
-"""
+    header += generate_helper_function()
 
-    # s390x with VXE
-    header += """#elif defined(__s390x__)
-#define CPU_ATTN_DISPATCH(HEAD_DIM, ISA_TYPE, ...) \\
-  [&] { \\
-    int64_t encoded_params = encode_cpu_attn_params(HEAD_DIM, ISA_TYPE); \\
-    switch (encoded_params) { \\
-"""
-    header += generate_cases_for_isa_group(["VXE", "VEC", "VEC16"])
+    # Generate dispatch macro with conditional compilation for different ISA sets
     header += """
-      default: { \\
-        TORCH_CHECK(false, "Unsupported CPU attention configuration: head_dim=" + \\
-                    std::to_string(HEAD_DIM) + " isa=" + \\
-                    std::to_string(static_cast<int>(ISA_TYPE))); \\
-      } \\
-    } \\
-  }()
-
-"""
-
-    # Fallback: VEC and VEC16 only
-    header += """#else
-#define CPU_ATTN_DISPATCH(HEAD_DIM, ISA_TYPE, ...) \\
-  [&] { \\
-    int64_t encoded_params = encode_cpu_attn_params(HEAD_DIM, ISA_TYPE); \\
-    switch (encoded_params) { \\
+// Dispatch macro using encoded parameters.
+// KV_CACHE_IDX: Fp8KVCacheDataType enum value (kAuto=0, kFp8E4M3=1, kFp8E5M2=2).
+// FP8 cases (kv_cache_idx != 0) are generated on x86 platforms with AVX2 or
+// AVX-512: BF16Vec32 FP8 constructors have both AVX-512 and AVX2 implementations
+// in cpu_types_x86.hpp. Non-x86 platforms (#else fallback) have fp8=False.
 """
-    header += generate_cases_for_isa_group(["VEC", "VEC16"])
-    header += """
-      default: { \\
-        TORCH_CHECK(false, "Unsupported CPU attention configuration: head_dim=" + \\
-                    std::to_string(HEAD_DIM) + " isa=" + \\
-                    std::to_string(static_cast<int>(ISA_TYPE))); \\
-      } \\
-    } \\
-  }()
-
-#endif  /* CPU_CAPABILITY_AMXBF16 / __aarch64__ / __s390x__ */
 
-#endif  // CPU_ATTN_DISPATCH_GENERATED_H
-"""
+    def _macro_block(guard: str, isa_list: list[str], fp8: bool) -> str:
+        """Return one CPU_ATTN_DISPATCH macro block for a given guard."""
+        enc = (
+            "    int64_t encoded_params = encode_cpu_attn_params("
+            "HEAD_DIM, ISA_TYPE, KV_CACHE_IDX); \\"
+        )
+        cases = generate_cases_for_isa_group(isa_list, include_fp8=fp8)
+        tail = (
+            "\n"
+            "      default: { \\\n"
+            "        TORCH_CHECK(false, "
+            '"Unsupported CPU attention configuration: head_dim=" + \\\n'
+            '                    std::to_string(HEAD_DIM) + " isa=" + \\\n'
+            "                    std::to_string(static_cast<int>(ISA_TYPE))"
+            " + \\\n"
+            '                    " kv_cache_idx=" + '
+            "std::to_string(KV_CACHE_IDX)); \\\n"
+            "      } \\\n"
+            "    } \\\n"
+            "  }()\n\n"
+        )
+        return (
+            f"{guard}\n"
+            "#define CPU_ATTN_DISPATCH(HEAD_DIM, ISA_TYPE, KV_CACHE_IDX, ...) \\\n"
+            "  [&] { \\\n"
+            f"{enc}\n"
+            "    switch (encoded_params) { \\\n"
+            f"{cases}"
+            f"{tail}"
+        )
+
+    header += _macro_block(
+        "#if defined(CPU_CAPABILITY_AMXBF16)",
+        ["AMX", "VEC", "VEC16"],
+        fp8=True,
+    )
+    header += _macro_block(
+        "#elif defined(__aarch64__)",
+        ["NEON", "VEC", "VEC16"],
+        fp8=False,
+    )
+    header += _macro_block(
+        "#elif defined(__s390x__)",
+        ["VXE", "VEC", "VEC16"],
+        fp8=False,
+    )
+    # RISC-V with RVV.  cpu_attn_rvv.hpp supports VLEN=128 and VLEN=256
+    # via RVVI() macros.  Builds with a supported VLEN get
+    # RVV+VEC+VEC16; other RISC-V builds fall back to VEC/VEC16 only.
+    header += _macro_block(
+        "#elif defined(__riscv) && defined(__riscv_v_min_vlen) "
+        "&& (__riscv_v_min_vlen == 128 || __riscv_v_min_vlen == 256)",
+        ["RVV", "VEC", "VEC16"],
+        fp8=False,
+    )
+    header += _macro_block(
+        "#elif defined(__riscv)",
+        ["VEC", "VEC16"],
+        fp8=False,
+    )
+    header += _macro_block(
+        "#elif defined(__powerpc__)",
+        ["VSX", "VEC", "VEC16"],
+        fp8=False,
+    )
+    header += _macro_block(
+        "#elif defined(__AVX512F__)",
+        ["VEC", "VEC16"],
+        fp8=True,
+    )
+    header += _macro_block(
+        "#elif defined(__AVX2__)",
+        ["VEC", "VEC16"],
+        fp8=False,
+    )
+    header += _macro_block(
+        "#else",
+        ["VEC", "VEC16"],
+        fp8=False,
+    )
+    header += (
+        "#endif  /* CPU_CAPABILITY_AMXBF16 / __aarch64__ / __s390x__ /"
+        " __riscv / __powerpc__ */\n\n"
+        "#endif  // CPU_ATTN_DISPATCH_GENERATED_H\n"
+    )
 
     return header
 
diff --git a/csrc/cpu/micro_gemm/cpu_micro_gemm_vec.hpp b/csrc/cpu/micro_gemm/cpu_micro_gemm_vec.hpp
index bdd3e85a1c52..1c605a2851d7 100644
--- a/csrc/cpu/micro_gemm/cpu_micro_gemm_vec.hpp
+++ b/csrc/cpu/micro_gemm/cpu_micro_gemm_vec.hpp
@@ -39,7 +39,7 @@ class TileGemm82 {
 
   template <int32_t M>
   static void gemm_micro(DEFINE_CPU_MICRO_GEMM_PARAMS) {
-    static_assert(0 < M <= 8);
+    static_assert(0 < M && M <= 8);
     using load_vec_t = typename cpu_utils::VecTypeTrait<scalar_t>::vec_t;
 
     scalar_t* __restrict__ curr_b_0 = b_ptr;
diff --git a/csrc/cpu/pos_encoding.cpp b/csrc/cpu/pos_encoding.cpp
index 74bb014cf39e..9f41e4e222bd 100644
--- a/csrc/cpu/pos_encoding.cpp
+++ b/csrc/cpu/pos_encoding.cpp
@@ -178,7 +178,12 @@ void rotary_embedding_gptj_impl(
 
 void rotary_embedding(torch::Tensor& positions, torch::Tensor& query,
                       std::optional<torch::Tensor> key, int64_t head_size,
-                      torch::Tensor& cos_sin_cache, bool is_neox) {
+                      torch::Tensor& cos_sin_cache, bool is_neox,
+                      int64_t rope_dim_offset, bool inverse) {
+  TORCH_CHECK(rope_dim_offset == 0,
+              "rope_dim_offset != 0 is not supported on CPU");
+  TORCH_CHECK(!inverse, "inverse rotary embedding is not supported on CPU");
+
   int num_tokens = positions.numel();
   int rot_dim = cos_sin_cache.size(1);
   int num_heads = query.size(-1) / head_size;
diff --git a/csrc/cpu/sgl-kernels/common.h b/csrc/cpu/sgl-kernels/common.h
index b96037e82c19..d501d2adb811 100644
--- a/csrc/cpu/sgl-kernels/common.h
+++ b/csrc/cpu/sgl-kernels/common.h
@@ -1,13 +1,12 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
+// clang-format off
+
 #pragma once
 
 #include <ATen/ATen.h>
 #include <ATen/Parallel.h>
-#include <ATen/record_function.h>
-
-// clang-format off
 
 #if defined(_OPENMP)
 #include <omp.h>
@@ -16,40 +15,157 @@
 namespace {
 
 // dispatch bool
-#define AT_DISPATCH_BOOL(BOOL_V, BOOL_NAME, ...)                                 \
-  [&] {                                                                          \
-    if (BOOL_V) {                                                                \
-      constexpr bool BOOL_NAME = true;                                           \
-      return __VA_ARGS__();                                                      \
-    } else {                                                                     \
-      constexpr bool BOOL_NAME = false;                                          \
-      return __VA_ARGS__();                                                      \
-    }                                                                            \
+#define AT_DISPATCH_BOOL(BOOL_V, BOOL_NAME, ...) \
+  [&] {                                          \
+    if (BOOL_V) {                                \
+      constexpr bool BOOL_NAME = true;           \
+      return __VA_ARGS__();                      \
+    } else {                                     \
+      constexpr bool BOOL_NAME = false;          \
+      return __VA_ARGS__();                      \
+    }                                            \
+  }()
+
+#define AT_DISPATCH_BOOL2(BOOL_V1, BOOL_NAME1, BOOL_V2, BOOL_NAME2, ...) \
+  [&] {                                                                  \
+    if (BOOL_V1) {                                                       \
+      constexpr bool BOOL_NAME1 = true;                                  \
+      if (BOOL_V2) {                                                     \
+        constexpr bool BOOL_NAME2 = true;                                \
+        return __VA_ARGS__();                                            \
+      } else {                                                           \
+        constexpr bool BOOL_NAME2 = false;                               \
+        return __VA_ARGS__();                                            \
+      }                                                                  \
+    } else {                                                             \
+      constexpr bool BOOL_NAME1 = false;                                 \
+      if (BOOL_V2) {                                                     \
+        constexpr bool BOOL_NAME2 = true;                                \
+        return __VA_ARGS__();                                            \
+      } else {                                                           \
+        constexpr bool BOOL_NAME2 = false;                               \
+        return __VA_ARGS__();                                            \
+      }                                                                  \
+    }                                                                    \
+  }()
+
+// dispatch: bfloat16, float16, int8_t, fp8_e4m3, uint8_t(mxfp4/int4)
+#define CPU_DISPATCH_PACKED_TYPES(TYPE, ...)                     \
+  [&] {                                                          \
+    switch (TYPE) {                                              \
+      case at::ScalarType::BFloat16: {                           \
+        using packed_t = at::BFloat16;                           \
+        return __VA_ARGS__();                                    \
+      }                                                          \
+      case at::ScalarType::Half: {                               \
+        using packed_t = at::Half;                               \
+        return __VA_ARGS__();                                    \
+      }                                                          \
+      case at::ScalarType::Char: {                               \
+        using packed_t = int8_t;                                 \
+        return __VA_ARGS__();                                    \
+      }                                                          \
+      case at::ScalarType::Float8_e4m3fn: {                      \
+        using packed_t = at::Float8_e4m3fn;                      \
+        return __VA_ARGS__();                                    \
+      }                                                          \
+      case at::ScalarType::Byte: {                               \
+        using packed_t = uint8_t;                                \
+        return __VA_ARGS__();                                    \
+      }                                                          \
+      default:                                                   \
+        TORCH_CHECK(false, "Unsupported floating data type.\n"); \
+    }                                                            \
+  }()
+
+// Helper MICRO for CPU_DISPATCH_FLOATING_TYPES_EXT:
+//   TYPE1: the primary dtype (input, output, weight);
+//   TYPE2: defined as PARAM_T input
+#define CPU_DISPATCH_TYPE1_WITH_PARAM(TYPE1, PARAM_T, ...)   \
+  switch (TYPE1) {                                           \
+    case at::ScalarType::BFloat16: {                         \
+      using scalar_t = at::BFloat16;                         \
+      using param_t = PARAM_T;                               \
+      return __VA_ARGS__();                                  \
+    }                                                        \
+    case at::ScalarType::Half: {                             \
+      using scalar_t = at::Half;                             \
+      using param_t = PARAM_T;                               \
+      return __VA_ARGS__();                                  \
+    }                                                        \
+    case at::ScalarType::Float: {                            \
+      using scalar_t = float;                                \
+      using param_t = PARAM_T;                               \
+      return __VA_ARGS__();                                  \
+    }                                                        \
+    default:                                                 \
+      TORCH_CHECK(false, "Unsupported floating data type."); \
+  }
+
+// Helper MICRO for CPU_DISPATCH_REDUCED_FLOATING_TYPES_EXT:
+//   TYPE1: the primary dtype (input, output, weight);
+//   TYPE2: defined as PARAM_T input
+#define CPU_DISPATCH_TYPE1_WITH_PARAM_REDUCED(TYPE1, PARAM_T, ...) \
+  switch (TYPE1) {                                                 \
+    case at::ScalarType::BFloat16: {                               \
+      using scalar_t = at::BFloat16;                               \
+      using param_t = PARAM_T;                                     \
+      return __VA_ARGS__();                                        \
+    }                                                              \
+    case at::ScalarType::Half: {                                   \
+      using scalar_t = at::Half;                                   \
+      using param_t = PARAM_T;                                     \
+      return __VA_ARGS__();                                        \
+    }                                                              \
+    default:                                                       \
+      TORCH_CHECK(false, "Unsupported floating data type.");       \
+  }
+
+// Helper MICRO for CPU_DISPATCH_REDUCED_FLOATING_TYPES_EXT:
+//   TYPE1: the dtype both for scalar_t and param_t
+#define CPU_DISPATCH_TYPE1_WITH_SAME_PARAM_REDUCED(TYPE1, ...)       \
+  switch (TYPE1) {                                                   \
+    case at::ScalarType::BFloat16: {                                 \
+      using scalar_t = at::BFloat16;                                 \
+      using param_t = at::BFloat16;                                  \
+      return __VA_ARGS__();                                          \
+    }                                                                \
+    case at::ScalarType::Half: {                                     \
+      using scalar_t = at::Half;                                     \
+      using param_t = at::Half;                                      \
+      return __VA_ARGS__();                                          \
+    }                                                                \
+    default:                                                         \
+      TORCH_CHECK(false, "Unsupported reduced floating data type."); \
+  }
+
+// dispatch with mixed dtypes (TYPE1, TYPE2):
+//   TYPE1: the primary dtype (input, output, weight);
+//   TYPE2: the secondary dtype (bias, etc.).
+#define CPU_DISPATCH_FLOATING_TYPES_EXT(TYPE1, TYPE2, ...)            \
+  [&] {                                                               \
+    if (TYPE2 == at::kFloat) {                                        \
+      CPU_DISPATCH_TYPE1_WITH_PARAM(TYPE1, float, __VA_ARGS__)        \
+    } else if (TYPE2 == at::ScalarType::BFloat16) {                   \
+      CPU_DISPATCH_TYPE1_WITH_PARAM(TYPE1, at::BFloat16, __VA_ARGS__) \
+    } else if (TYPE2 == at::ScalarType::Half) {                       \
+      CPU_DISPATCH_TYPE1_WITH_PARAM(TYPE1, at::Half, __VA_ARGS__)     \
+    } else {                                                          \
+      TORCH_CHECK(false, "Unsupported floating data type.");          \
+    }                                                                 \
   }()
 
-// dispatch: bfloat16, float16, int8_t, fp8_e4m3
-#define CPU_DISPATCH_PACKED_TYPES(TYPE, ...)                                    \
-  [&] {                                                                         \
-    switch (TYPE) {                                                             \
-      case at::ScalarType::BFloat16 : {                                         \
-        using packed_t = at::BFloat16;                                          \
-        return __VA_ARGS__();                                                   \
-      }                                                                         \
-      case at::ScalarType::Half: {                                              \
-        using packed_t = at::Half;                                              \
-        return __VA_ARGS__();                                                   \
-      }                                                                         \
-      case at::ScalarType::Char : {                                             \
-        using packed_t = int8_t;                                                \
-        return __VA_ARGS__();                                                   \
-      }                                                                         \
-      case at::ScalarType::Float8_e4m3fn : {                                    \
-        using packed_t = at::Float8_e4m3fn;                                     \
-        return __VA_ARGS__();                                                   \
-      }                                                                         \
-      default:                                                                  \
-        TORCH_CHECK(false, "Unsupported floating data type.\n");                \
-    }                                                                           \
+// dispatch with mixed dtypes (reduced one, no float for TYPE1) (TYPE1, TYPE2):
+//   TYPE1: the primary dtype (input, output, weight);
+//   TYPE2: the secondary dtype (bias, etc.).
+#define CPU_DISPATCH_REDUCED_FLOATING_TYPES_EXT(TYPE1, TYPE2, ...)     \
+  [&] {                                                                \
+    if (TYPE2 == at::kFloat) {                                         \
+      CPU_DISPATCH_TYPE1_WITH_PARAM_REDUCED(TYPE1, float, __VA_ARGS__) \
+    } else {                                                           \
+      TORCH_CHECK(TYPE1 == TYPE2);                                     \
+      CPU_DISPATCH_TYPE1_WITH_SAME_PARAM_REDUCED(TYPE1, __VA_ARGS__)   \
+    }                                                                  \
   }()
 
 #define UNUSED(x) (void)(x)
@@ -70,13 +186,51 @@ namespace {
 #define CHECK_DIM(d, x) TORCH_CHECK(x.dim() == d, #x " must be a " #d "D tensor")
 
 #define CHECK_EQ(a, b) TORCH_CHECK((a) == (b), "CHECK_EQ(" #a ", " #b ") failed. ", a, " vs ", b)
+#define CHECK_GT(a, b) TORCH_CHECK((a) > (b), "CHECK_GT(" #a ", " #b ") failed. ", a, " vs ", b)
+#define CHECK_GE(a, b) TORCH_CHECK((a) >= (b), "CHECK_GE(" #a ", " #b ") failed. ", a, " vs ", b)
+
+template <bool is_only_lastdim_contiguous>
+static inline void CHECK_INPUT_SHAPE_DTYPE(const at::Tensor& tensor, const at::IntArrayRef sizes, at::ScalarType st) {
+  TORCH_CHECK(tensor.sizes() == sizes, "Input tensor shape mismatch: expected ", sizes, ", got ", tensor.sizes());
+  TORCH_CHECK(tensor.scalar_type() == st, "Input tensor dtype mismatch");
+  if constexpr (is_only_lastdim_contiguous) {
+    CHECK_LAST_DIM_CONTIGUOUS_INPUT(tensor);
+  } else {
+    CHECK_INPUT(tensor);
+  }
+}
 
-// parallel routines
+// [NB] Parallel Routines
+//
+//  * at::parallel_for - applies for most of generic use cases, this will be compiled
+//                       against openmp in default torch release.
+//
+//  * parallel_for     - same function as above, can choose payload partition scheme in
+//                       balance211.
+//
+//  * parallel_2d      - parallel for 2 dimensions, used in GEMM, etc.
+//                       this one will do payload balance across 2 dimensions.
+//
+
+// grain size for each thread
 constexpr int GRAIN_SIZE = 1024;
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-inline T div_up(T x, T y) { return (x + y - 1) / y; }
+inline T div_up(T x, T y) {
+  return (x + y - 1) / y;
+}
+
+// you can only use at::get_thread_num() with at::parallel_for()
+// as it is lazy initialized, otherwise it will always return 0.
+inline int get_thread_num() {
+#if defined(_OPENMP)
+  return omp_get_thread_num();
+#else
+  return 0;
+#endif
+}
 
+// balance payload across each thread
 template <typename T>
 inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
 #if 0
@@ -94,10 +248,10 @@ inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
     }
     n_end += n_start;
 #else
-    // pytorch aten partition pattern
-    T n_my = div_up(n, nth);
-    n_start = ith * n_my;
-    n_end = std::min(n_start + n_my, n);
+  // pytorch aten partition pattern
+  T n_my = div_up(n, nth);
+  n_start = ith * n_my;
+  n_end = std::min(n_start + n_my, n);
 #endif
 }
 
@@ -105,15 +259,15 @@ template <typename func_t>
 inline void parallel_for(int n, const func_t& f) {
 #if defined(_OPENMP)
 #pragma omp parallel
-{
+  {
     int nth = omp_get_num_threads();
     int ith = omp_get_thread_num();
     int tbegin, tend;
     balance211(n, nth, ith, tbegin, tend);
     f(tbegin, tend);
-}
+  }
 #else
-    f(0, n);
+  f(0, n);
 #endif
 }
 
@@ -129,7 +283,6 @@ int inline adjust_num_threads(int m) {
 
 template <typename func_t>
 inline void parallel_2d(int m, int n, const func_t& f) {
-
   // make sure we have even num_threads
   int nth = adjust_num_threads(m);
 
@@ -157,31 +310,66 @@ inline void parallel_2d(int m, int n, const func_t& f) {
 
 #if defined(_OPENMP)
 #pragma omp parallel num_threads(nth)
-{
-  int ith = omp_get_thread_num();
-  int ith_m = ith / nth_n;
-  int ith_n = ith % nth_n;
+  {
+    int ith = omp_get_thread_num();
+    int ith_m = ith / nth_n;
+    int ith_n = ith % nth_n;
 
-  int thread_block_m = div_up(m, nth_m);
-  int thread_block_n = div_up(n, nth_n);
+    int thread_block_m = div_up(m, nth_m);
+    int thread_block_n = div_up(n, nth_n);
 
-  int begin_m = ith_m * thread_block_m;
-  int end_m = std::min(m, begin_m + thread_block_m);
-  int begin_n = ith_n * thread_block_n;
-  int end_n = std::min(n, begin_n + thread_block_n);
+    int begin_m = ith_m * thread_block_m;
+    int end_m = std::min(m, begin_m + thread_block_m);
+    int begin_n = ith_n * thread_block_n;
+    int end_n = std::min(n, begin_n + thread_block_n);
 
-  f(begin_m, end_m, begin_n, end_n);
-}
+    f(begin_m, end_m, begin_n, end_n);
+  }
 #else
   f(0, m, 0, n);
 #endif
 }
 
+// limit max cache blocks
+// when we need to do pre-unpack for weights, e.g. fp8
+#define MAX_CACHE_BLOCK_SIZE 4
+
 template <typename T>
-int get_cache_blocks(int BLOCK_SIZE, int K) {
+inline int get_cache_blocks(int chunk_size) {
   // L2 2MB and ratio of 50%
   const int L2_size = 2048 * 1024 >> 1;
-  return std::max(1, int(L2_size / (BLOCK_SIZE * K * sizeof(T))));
+  return std::max(1, int(L2_size / (chunk_size * sizeof(T))));
+}
+
+template <>
+inline int get_cache_blocks<at::Float8_e4m3fn>(int chunk_size) {
+  // fp8 uses bf16 as accumulate type
+  int cache_block_size = get_cache_blocks<at::BFloat16>(chunk_size);
+  return std::min(MAX_CACHE_BLOCK_SIZE, cache_block_size);
+}
+
+template <>
+inline int get_cache_blocks<uint8_t>(int chunk_size) {
+  // mxfp4 uses bf16 as accumulate type
+  int cache_block_size = get_cache_blocks<at::BFloat16>(chunk_size);
+  return std::min(MAX_CACHE_BLOCK_SIZE, cache_block_size);
+}
+
+// 2d sequential loop in range : [mb0, mb1), [nb0, nb1)
+template <typename T, typename func_t>
+inline void loop_2d(int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1, int64_t chunk_size, const func_t& f) {
+  // get number of blocks for L2 in most inner loop
+  int64_t cache_blocks_nb = get_cache_blocks<T>(chunk_size);
+
+  // loop order: [NB / cache_blocks_nb, MB, cache_blocks_nb]
+  // TODO: implement reverse order of [MB / cache_blocks_mb, NB, cache_blocks_mb]
+  for (int64_t nbb = nb0; nbb < nb1; nbb += cache_blocks_nb) {
+    for (int64_t mb = mb0; mb < mb1; ++mb) {
+      for (int64_t nb = nbb; nb < std::min(nbb + cache_blocks_nb, nb1); ++nb) {
+        f(mb, nb, nb - nbb);
+      }
+    }
+  }
 }
 
 // data indexing for dimension collapse
@@ -235,4 +423,10 @@ struct Unroll<1> {
   }
 };
 
-} // anonymous namespace
+// conditional data ptr for optional tensor
+template <typename T>
+inline T* conditional_data_ptr(const std::optional<at::Tensor>& opt) {
+  return opt.has_value() ? opt.value().data_ptr<T>() : nullptr;
+}
+
+}  // anonymous namespace
diff --git a/csrc/cpu/sgl-kernels/conv.cpp b/csrc/cpu/sgl-kernels/conv.cpp
new file mode 100644
index 000000000000..15114732aac1
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/conv.cpp
@@ -0,0 +1,720 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+#include "common.h"
+#include "gemm.h"
+#include "vec.h"
+
+namespace {
+
+template <typename scalar_t>
+inline void copy_stub(scalar_t* __restrict__ y, const scalar_t* __restrict__ x, int64_t size) {
+  using Vec = at::vec::Vectorized<scalar_t>;
+  const bool is_padding = (x == nullptr);
+  for (int64_t d = 0; d < size; d += Vec::size()) {
+    Vec data_vec = is_padding ? Vec(0.f) : Vec::loadu(x + d);
+    data_vec.store(y + d);
+  }
+}
+
+// no remainder
+template <typename scalar_t>
+void inline update_conv_state(
+    scalar_t* __restrict__ conv_states,
+    const scalar_t* __restrict__ input,
+    int64_t width,
+    int64_t dim,
+    int64_t seqlen,
+    bool has_initial_states) {
+  // width for `conv_states`
+  int64_t width1 = width - 1;
+  int64_t w = 0;
+  for (; w < width1 - seqlen; ++w) {
+    scalar_t* y = conv_states + w * dim;
+    const scalar_t* x = has_initial_states ? conv_states + (w + seqlen) * dim : nullptr;
+    copy_stub(y, x, dim);
+  }
+  for (; w < width1; ++w) {
+    scalar_t* y = conv_states + w * dim;
+    const scalar_t* x = input + (w + seqlen - width1) * dim;
+    copy_stub(y, x, dim);
+  }
+}
+
+// A : [M, BLOCK_N]
+// B : [BLOCK_N, K], prepacked as [K/2, BLOCK_N, 2]
+// C : [M, BLOCK_N]
+// bias : [BLOCK_N]
+//
+// lda : leading dimension of `input` and `out`
+//
+template <typename scalar_t, int K, int BLOCK_N, bool has_bias, bool has_silu>
+struct tinygemm_kernel {
+  static inline void apply(
+      const scalar_t* __restrict__ A,
+      const scalar_t* __restrict__ B,
+      scalar_t* __restrict__ C,
+      const scalar_t* __restrict__ bias,
+      const scalar_t* __restrict__ conv_states,
+      bool has_initial_state,
+      int64_t M,
+      int64_t lda,
+      bool is_first_token) {
+    TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
+  }
+};
+
+#if defined(CPU_CAPABILITY_AVX512)
+template <int K, int BLOCK_N, bool has_bias, bool has_silu>
+struct tinygemm_kernel<at::BFloat16, K, BLOCK_N, has_bias, has_silu> {
+  static inline void apply(
+      const at::BFloat16* __restrict__ A,
+      const at::BFloat16* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      const at::BFloat16* __restrict__ bias,
+      const at::BFloat16* __restrict__ conv_states,
+      bool has_initial_state,
+      int64_t M,
+      int64_t lda,
+      bool is_first_token) {
+    assert(K == 4);
+    constexpr int ROWS = K;
+    constexpr int COLS = BLOCK_N / block_size_n();
+
+    // leading dimension size for b for next block [K/2, 32, 2]
+    constexpr int ldb = block_size_n() * K;
+
+    __m512bh va[ROWS * COLS];
+    __m512bh vb[ROWS * COLS];
+    __m512 vc[COLS * 2];
+
+    // k: {-3, -2, -1} -> {0, 1, 2}
+    auto set_conv_states = [&](int k, int col) -> __m512i {
+      return has_initial_state ? _mm512_loadu_si512(conv_states + (k + K - 1) * lda + col * 32)
+                               : _mm512_setzero_si512();
+    };
+
+#define MM512_LOAD_A(idx)                                                 \
+  ((idx) < 0 && is_first_token) ? (__m512bh)(set_conv_states((idx), col)) \
+                                : (__m512bh)(_mm512_loadu_si512(A + (idx) * lda + col * 32))
+
+#define MM512_PACK_A(ap, bp, a, b)                       \
+  do {                                                   \
+    __m512i r0 = (__m512i)(a);                           \
+    __m512i r1 = (__m512i)(b);                           \
+    __m512i d0 = _mm512_unpacklo_epi16(r0, r1);          \
+    __m512i d1 = _mm512_unpackhi_epi16(r0, r1);          \
+    r0 = _mm512_shuffle_i32x4(d0, d1, 0x88);             \
+    r1 = _mm512_shuffle_i32x4(d0, d1, 0xdd);             \
+    (ap) = (__m512bh)_mm512_shuffle_i32x4(r0, r1, 0x88); \
+    (bp) = (__m512bh)_mm512_shuffle_i32x4(r0, r1, 0xdd); \
+  } while (0)
+
+    // step 0 : preload a at time step [-3][-2][-1]
+    auto preloada = [&](auto i) {
+      constexpr int col = i;
+      int64_t m = 0;
+      va[1 * COLS + col] = MM512_LOAD_A(m - 3);
+      va[2 * COLS + col] = MM512_LOAD_A(m - 2);
+      va[3 * COLS + col] = MM512_LOAD_A(m - 1);
+    };
+    Unroll<COLS>{}(preloada);
+
+    auto loada = [&](auto i, int64_t m) {
+      constexpr int col = i;
+      // update previous time step
+      va[0 * COLS + col] = va[1 * COLS + col];
+      va[1 * COLS + col] = va[2 * COLS + col];
+      va[2 * COLS + col] = va[3 * COLS + col];
+      // load current time step
+      va[3 * COLS + col] = MM512_LOAD_A(m);
+    };
+
+    // step 1 : load weight for just once
+    auto loadb = [&](auto i) {
+      constexpr int row = i / COLS;
+      constexpr int col = i % COLS;
+      vb[row * COLS + col] = (__m512bh)(_mm512_loadu_si512(B + col * ldb + row * 32));
+    };
+    Unroll<ROWS * COLS>{}(loadb);
+
+    // [NB] accumulates 4x32 bfloat16 blocks
+    //
+    //   +------------+------------+
+    //   |    col0    |    col1    |
+    //   +------------+------------+
+    //   |  va0  va1  |  va0  va1  |
+    //   |  va2  va3  |  va2  va3  |
+    //   +------------+------------+
+    //   |  vc0  vc1  |  vc0  vc1  |
+    //   +------------+------------+
+    //
+    //  * va and vb shares the same memory layout
+    //  * block_n 32 with 4 rows equals to 4 registers
+    //  * 37 uops with avx512bf16 v.s. 57 uops with avx512f
+    //
+    auto compute = [&](auto i) {
+      constexpr int col = i;
+
+      // init accumulators
+      if constexpr (has_bias) {
+        __m512i b16 = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(bias + col * 32));
+        vc[col * 2 + 0] = CVT_BF16_TO_FP32(_mm512_extracti32x8_epi32(b16, 0));
+        vc[col * 2 + 1] = CVT_BF16_TO_FP32(_mm512_extracti32x8_epi32(b16, 1));
+      } else {
+        vc[col * 2 + 0] = _mm512_set1_ps(0.f);
+        vc[col * 2 + 1] = _mm512_set1_ps(0.f);
+      }
+
+      // convert to vnni2 format
+      __m512bh va0, va1, va2, va3;
+      MM512_PACK_A(va0, va1, va[0 * COLS + col], va[1 * COLS + col]);
+      MM512_PACK_A(va2, va3, va[2 * COLS + col], va[3 * COLS + col]);
+
+      // accumulate
+      vc[col * 2 + 0] = _mm512_dpbf16_ps(vc[col * 2 + 0], va0, vb[0 * COLS + col]);
+      vc[col * 2 + 0] = _mm512_dpbf16_ps(vc[col * 2 + 0], va2, vb[2 * COLS + col]);
+      vc[col * 2 + 1] = _mm512_dpbf16_ps(vc[col * 2 + 1], va1, vb[1 * COLS + col]);
+      vc[col * 2 + 1] = _mm512_dpbf16_ps(vc[col * 2 + 1], va3, vb[3 * COLS + col]);
+    };
+
+    using fVec = at::vec::Vectorized<float>;
+    using bVec = at::vec::Vectorized<at::BFloat16>;
+    const fVec one = fVec(1.f);
+    auto storec = [&](auto i, int64_t m) {
+      constexpr int col = i;
+      fVec x0 = fVec(vc[col * 2 + 0]);
+      fVec x1 = fVec(vc[col * 2 + 1]);
+      if constexpr (has_silu) {
+        x0 = x0 / (one + x0.neg().exp_u20());
+        x1 = x1 / (one + x1.neg().exp_u20());
+      }
+      bVec out_vec = convert_from_float_ext<at::BFloat16>(x0, x1);
+      out_vec.store(C + m * lda + col * 32);
+    };
+
+    for (int64_t m = 0; m < M; ++m) {
+      // step 3.a : load a at current time step
+      Unroll<COLS>{}(loada, m);
+      // step 3.b : accumulate for window size (4)
+      Unroll<COLS>{}(compute);
+      // step 3.c : store c at current time step
+      Unroll<COLS>{}(storec, m);
+    }
+  }
+};
+#endif
+
+#define LAUNCH_TINYGEMM_KERNEL(K, NB_SIZE)                                                   \
+  tinygemm_kernel<scalar_t, K, NB_SIZE, has_bias, has_silu>::apply(                          \
+      input + bs * seqlen * dim + mb_start * dim + nb_start,                                 \
+      weight + nb_start * width,                                                             \
+      out + bs * seqlen * dim + mb_start * dim + nb_start,                                   \
+      has_bias ? bias + nb_start : nullptr,                                                  \
+      has_conv_states ? conv_states + conv_state_index * conv_state_slot_stride + nb_start : nullptr, \
+      has_initial_states_value,                                                              \
+      mb_size,                                                                               \
+      dim,                                                                                   \
+      mb_start == 0);
+
+template <typename scalar_t>
+void causal_conv1d_fwd_kernel_impl(
+    scalar_t* __restrict__ out,
+    const scalar_t* __restrict__ input,
+    const scalar_t* __restrict__ weight,
+    const scalar_t* __restrict__ bias,
+    scalar_t* __restrict__ conv_states,
+    const int32_t* __restrict__ conv_indices,
+    const bool* __restrict__ has_initial_state,
+    bool silu_activation,
+    int64_t batch,
+    int64_t dim,
+    int64_t seqlen,
+    int64_t width,
+    int64_t num_seq_blocks,
+    int64_t conv_state_slot_stride) {
+  // handle 32 x 64 per block
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n() * 2;
+  const int64_t NB = div_up(dim, BLOCK_N);
+
+  const int64_t num_blocks_per_seq = div_up(seqlen, BLOCK_M);
+  const bool has_conv_states = conv_states != nullptr;
+  const bool has_conv_indices = conv_indices != nullptr;
+
+  // parallel on [batch, seq, NB]
+  AT_DISPATCH_BOOL2(bias != nullptr, has_bias, silu_activation, has_silu, [&] {
+    at::parallel_for(0, num_seq_blocks * NB, 0, [&](int64_t begin, int64_t end) {
+      int64_t mb{0}, nb{0};
+      data_index_init(begin, mb, num_seq_blocks, nb, NB);
+
+      for (int64_t i = begin; i < end; ++i) {
+        int64_t bs = mb / num_blocks_per_seq;
+
+        int64_t mb_start = (mb % num_blocks_per_seq) * BLOCK_M;
+        int64_t mb_size = std::min(seqlen - mb_start, BLOCK_M);
+        int64_t nb_start = nb * BLOCK_N;
+        int64_t nb_size = std::min(dim - nb_start, BLOCK_N);
+
+        const bool has_initial_states_value = has_conv_states ? has_initial_state[bs] : false;
+        int32_t conv_state_index = has_conv_indices ? conv_indices[bs] : bs;
+
+        switch (width << 4 | nb_size >> 4) {
+          case 0x42:
+            LAUNCH_TINYGEMM_KERNEL(4, 32);
+            break;
+          case 0x44:
+            LAUNCH_TINYGEMM_KERNEL(4, 64);
+            break;
+          default:
+            TORCH_CHECK(false, "Unexpected block size, ", width, " x ", nb_size);
+        }
+
+        // move to the next index
+        data_index_step(mb, num_seq_blocks, nb, NB);
+      }
+    });
+  });
+
+  // update conv_states if necessary
+  if (has_conv_states) {
+    at::parallel_for(0, batch, 0, [&](int64_t begin, int64_t end) {
+      for (int64_t bs = begin; bs < end; ++bs) {
+        update_conv_state(
+            conv_states + bs * conv_state_slot_stride, input + bs * seqlen * dim, width, dim, seqlen, has_initial_state[bs]);
+      }
+    });
+  }
+}
+
+#define LAUNCH_TINYGEMM_VARLEN_KERNEL(K, NB_SIZE)                   \
+  tinygemm_kernel<scalar_t, K, NB_SIZE, has_bias, has_silu>::apply( \
+      input + batch_offset * dim + mb_start * dim + nb_start,       \
+      weight + nb_start * width,                                    \
+      out + batch_offset * dim + mb_start * dim + nb_start,         \
+      has_bias ? bias + nb_start : nullptr,                         \
+      nullptr,                                                      \
+      false,                                                        \
+      mb_size,                                                      \
+      dim,                                                          \
+      mb_start == 0);
+
+// TODO: add `has_initial_state` support for varlen kernel
+template <typename scalar_t>
+void causal_conv1d_fwd_varlen_kernel_impl(
+    scalar_t* __restrict__ out,
+    const scalar_t* __restrict__ input,
+    const scalar_t* __restrict__ weight,
+    const scalar_t* __restrict__ bias,
+    scalar_t* __restrict__ conv_states,
+    const int32_t* __restrict__ query_start_loc,
+    const int32_t* __restrict__ conv_indices,
+    const bool* __restrict__ has_initial_state,
+    const int32_t* __restrict__ block_indices,
+    bool silu_activation,
+    int64_t batch,
+    int64_t dim,
+    int64_t width,
+    int64_t num_seq_blocks,
+    int64_t conv_state_slot_stride) {
+  // handle 32 x 64 per block
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n() * 2;
+  const int64_t NB = div_up(dim, BLOCK_N);
+
+  const bool has_conv_states = conv_states != nullptr;
+  const bool has_conv_indices = conv_indices != nullptr;
+
+  // parallel on [batch, seq, NB]
+  AT_DISPATCH_BOOL2(bias != nullptr, has_bias, silu_activation, has_silu, [&] {
+    at::parallel_for(0, num_seq_blocks * NB, 0, [&](int64_t begin, int64_t end) {
+      int64_t mb{0}, nb{0};
+      data_index_init(begin, mb, num_seq_blocks, nb, NB);
+
+      for (int64_t i = begin; i < end; ++i) {
+        int32_t bs = block_indices[mb * 2 + 0];
+        int32_t batch_offset = query_start_loc[bs];
+        int32_t seqlen = query_start_loc[bs + 1] - query_start_loc[bs];
+
+        int64_t mb_start = block_indices[mb * 2 + 1] * BLOCK_M;
+        int64_t mb_size = std::min(seqlen - mb_start, BLOCK_M);
+        int64_t nb_start = nb * BLOCK_N;
+        int64_t nb_size = std::min(dim - nb_start, BLOCK_N);
+
+        switch (width << 4 | nb_size >> 4) {
+          case 0x42:
+            LAUNCH_TINYGEMM_VARLEN_KERNEL(4, 32);
+            break;
+          case 0x44:
+            LAUNCH_TINYGEMM_VARLEN_KERNEL(4, 64);
+            break;
+          default:
+            TORCH_CHECK(false, "Unexpected block size, ", width, " x ", nb_size);
+        }
+
+        // move to the next index
+        data_index_step(mb, num_seq_blocks, nb, NB);
+      }
+    });
+  });
+
+  // update conv_states if necessary
+  if (has_conv_states) {
+    at::parallel_for(0, batch, 0, [&](int64_t begin, int64_t end) {
+      for (int64_t bs = begin; bs < end; ++bs) {
+        int32_t conv_state_index = has_conv_indices ? conv_indices[bs] : bs;
+        int32_t seqlen = query_start_loc[bs + 1] - query_start_loc[bs];
+        int32_t batch_offset = query_start_loc[bs];
+        update_conv_state(
+            conv_states + conv_state_index * conv_state_slot_stride,
+            input + batch_offset * dim,
+            width,
+            dim,
+            seqlen,
+            /* has_initial_state */ false);
+      }
+    });
+  }
+}
+
+template <typename scalar_t>
+void causal_conv1d_update_kernel_impl(
+    scalar_t* __restrict__ out,
+    const scalar_t* __restrict__ input,
+    scalar_t* __restrict__ conv_states,
+    const scalar_t* __restrict__ weight,
+    const scalar_t* __restrict__ bias,
+    const int32_t* __restrict__ conv_indices,
+    bool silu_activation,
+    int64_t batch,
+    int64_t dim,
+    int64_t seqlen,
+    int64_t width,
+    int64_t conv_state_slot_stride) {
+  // handle 32 x 64 per block
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n() * 2;
+  const int64_t NB = div_up(dim, BLOCK_N);
+
+  const bool has_conv_states = conv_states != nullptr;
+  const bool has_conv_indices = conv_indices != nullptr;
+
+  // parallel on [batch, NB]
+  AT_DISPATCH_BOOL2(bias != nullptr, has_bias, silu_activation, has_silu, [&] {
+    at::parallel_for(0, batch * NB, 0, [&](int64_t begin, int64_t end) {
+      int64_t bs{0}, nb{0};
+      data_index_init(begin, bs, batch, nb, NB);
+
+      for (int64_t i = begin; i < end; ++i) {
+        int64_t mb_start = 0;
+        int64_t mb_size = 1;
+        int64_t nb_start = nb * BLOCK_N;
+        int64_t nb_size = std::min(dim - nb_start, BLOCK_N);
+
+        const bool has_initial_states_value = true;
+        int32_t conv_state_index = has_conv_indices ? conv_indices[bs] : bs;
+
+        switch (width << 4 | nb_size >> 4) {
+          case 0x42:
+            LAUNCH_TINYGEMM_KERNEL(4, 32);
+            break;
+          case 0x44:
+            LAUNCH_TINYGEMM_KERNEL(4, 64);
+            break;
+          default:
+            TORCH_CHECK(false, "Unexpected block size, ", width, " x ", nb_size);
+        }
+
+        // move to the next index
+        data_index_step(bs, batch, nb, NB);
+      }
+    });
+  });
+
+#define CONV_STATE_INDEXR(w) conv_states + conv_state_index*conv_state_slot_stride + (w) * dim
+
+  // update conv_states
+  at::parallel_for(0, batch, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t bs = begin; bs < end; ++bs) {
+      // update old states, range [1, width - 1)
+      int32_t conv_state_index = has_conv_indices ? conv_indices[bs] : bs;
+      for (int64_t w = 1; w < width - 1; ++w) {
+        std::memcpy(CONV_STATE_INDEXR(w - 1), CONV_STATE_INDEXR(w), dim * sizeof(scalar_t));
+      }
+      // copy new states
+      std::memcpy(CONV_STATE_INDEXR(width - 2), input + bs * dim, dim * sizeof(scalar_t));
+    }
+  });
+}
+
+}  // anonymous namespace
+
+// from [dim, width] or [N, K]
+// to [N/BLOCK_N, K/2, BLOCK_N, 2]
+at::Tensor causal_conv1d_weight_pack(const at::Tensor& weight) {
+  CHECK_INPUT(weight);
+
+  int64_t dim = weight.size(0);
+  int64_t width = weight.size(1);
+  constexpr int64_t BLOCK_N = block_size_n();
+  TORCH_CHECK(width == 4, "causal_conv1d_weight_pack: support only width of 4");
+  TORCH_CHECK(dim % BLOCK_N == 0, "causal_conv1d_weight_pack: invalid dim size ", dim);
+
+  const int64_t N = dim, K2 = width >> 1;
+  const int64_t NB = div_up(N, BLOCK_N);
+
+  auto packed_weight = at::empty_like(weight);
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(weight.scalar_type(), "causal_conv1d_fwd_kernel_impl", [&] {
+    // cast to float32 as vnni size is 2
+    const float* w_data = reinterpret_cast<float*>(weight.data_ptr<scalar_t>());
+    float* packed_data = reinterpret_cast<float*>(packed_weight.data_ptr<scalar_t>());
+
+    at::parallel_for(0, NB * K2 * BLOCK_N, 0, [&](int64_t begin, int64_t end) {
+      int64_t nb{0}, k2{0}, n{0};
+      data_index_init(begin, nb, NB, k2, K2, n, BLOCK_N);
+
+      // TODO: optimize this if we need to online prepacking.
+      for (int64_t i = begin; i < end; ++i) {
+        packed_data[i] = w_data[nb * BLOCK_N * K2 + n * K2 + k2];
+
+        // move to the next index
+        data_index_step(nb, NB, k2, K2, n, BLOCK_N);
+      }
+    });
+  });
+  return packed_weight;
+}
+
+#define CHECK_OPTIONAL_SHAPE_DTYPE(OPT, SIZE, DTYPE) \
+  if (OPT.has_value()) {                             \
+    const auto tensor = OPT.value();                 \
+    CHECK_CONTIGUOUS(tensor);                        \
+    CHECK_EQ(tensor.size(0), SIZE);                  \
+    CHECK_EQ(tensor.scalar_type(), DTYPE);           \
+  }
+
+template <int BLOCK_M>
+int64_t get_block_count(const std::optional<at::Tensor>& offsets, int64_t batch, int64_t seqlen) {
+  if (offsets.has_value()) {
+    const int32_t* offsets_data = offsets.value().data_ptr<int32_t>();
+    int32_t num_seq_blocks = 0;
+    for (int64_t row = 0; row < batch; ++row) {
+      num_seq_blocks += div_up(offsets_data[row + 1] - offsets_data[row], BLOCK_M);
+    }
+    return num_seq_blocks;
+  }
+  return batch * div_up(seqlen, int64_t(BLOCK_M));
+}
+
+template <int BLOCK_M>
+at::Tensor get_block_indices(const std::optional<at::Tensor>& offsets, int64_t num_seq_blocks) {
+  if (!offsets.has_value()) {
+    return at::Tensor();
+  }
+
+  const at::Tensor& offsets_ = offsets.value();
+  at::Tensor indices = at::empty({num_seq_blocks, 2}, offsets_.options());
+
+  int64_t batch = offsets_.size(0) - 1;
+
+  const int32_t* offsets_data = offsets_.data_ptr<int32_t>();
+  int32_t* indices_data = indices.data_ptr<int32_t>();
+
+  int64_t idx = 0;
+  for (int32_t row = 0; row < batch; ++row) {
+    int32_t blocks = div_up(offsets_data[row + 1] - offsets_data[row], BLOCK_M);
+
+    for (int32_t col = 0; col < blocks; ++col) {
+      indices_data[idx * 2 + 0] = row;
+      indices_data[idx * 2 + 1] = col;
+      idx++;
+    }
+  }
+  return indices;
+}
+
+// API aligned with GPUs
+//
+//   x: (batch, dim, seqlen) or (dim, cu_seq_len) for varlen
+//   weight: (dim, width)
+//   bias: (dim,)
+//   query_start_loc: (batch + 1) int32
+//   cache_indices: (batch)  int32
+//   has_initial_state: (batch) bool
+//   conv_states: (..., dim, width - 1) itype
+//   activation: either None or "silu" or "swish"
+//   pad_slot_id: int
+//
+at::Tensor causal_conv1d_fwd_cpu(
+    const at::Tensor& x,
+    const at::Tensor& weight,
+    const std::optional<at::Tensor>& bias,
+    const std::optional<at::Tensor>& conv_states,
+    const std::optional<at::Tensor>& query_start_loc,
+    const std::optional<at::Tensor>& conv_state_indices,
+    const std::optional<at::Tensor>& has_initial_state,
+    bool silu_activation,
+    int64_t pad_slot_id,
+    bool is_vnni) {
+  CHECK_CONTIGUOUS(weight);
+  auto packed_w = is_vnni ? weight : causal_conv1d_weight_pack(weight);
+
+  const bool is_var_seqlen = query_start_loc.has_value();
+  const int64_t input_ndim = is_var_seqlen ? 2 : 3;
+  TORCH_CHECK(x.dim() == input_ndim, "causal_conv1d_fwd_cpu: expect x to be ", input_ndim, "D tensor.");
+  TORCH_CHECK(x.stride(-2) == 1 && x.stride(-1) == x.size(-2), "causal_conv1d_fwd_cpu: expect x to be transposed.");
+
+  const int64_t batch = is_var_seqlen ? query_start_loc.value().size(0) - 1 : x.size(0);
+  const int64_t dim = x.size(-2);
+  const int64_t seqlen = x.size(-1);
+  const int64_t width = weight.size(-1);
+
+  const auto scalar_type = x.scalar_type();
+  CHECK_EQ(weight.scalar_type(), scalar_type);
+  CHECK_OPTIONAL_SHAPE_DTYPE(bias, dim, scalar_type);
+  CHECK_OPTIONAL_SHAPE_DTYPE(query_start_loc, batch + 1, at::kInt);
+  CHECK_OPTIONAL_SHAPE_DTYPE(conv_state_indices, batch, at::kInt);
+  CHECK_OPTIONAL_SHAPE_DTYPE(has_initial_state, batch, at::kBool);
+
+  if (conv_states.has_value()) {
+    auto& conv_states_val = conv_states.value();
+    int64_t padded_batch = conv_states_val.size(0);
+    CHECK_EQ(conv_states_val.scalar_type(), scalar_type);
+    CHECK_GE(padded_batch, batch);
+    CHECK_EQ(conv_states_val.size(1), dim);
+    CHECK_EQ(conv_states_val.size(2), width - 1);
+
+    // adjust `conv_states` to be contiguous on `dim`
+    // should happen only once
+    if (conv_states_val.stride(-2) != 1) {
+      auto conv_states_copy = conv_states_val.clone();
+      conv_states_val.as_strided_({padded_batch, dim, width - 1}, {(width - 1) * dim, 1, dim});
+      conv_states_val.copy_(conv_states_copy);
+    }
+  }
+
+// IMPORTANT: To make the kernal compatible with vLLM KV cache layout 
+  int64_t conv_state_slot_stride = conv_states->stride(0);
+
+  // block size for sequence blocks, 32
+  constexpr int64_t BLOCK_M = block_size_m();
+
+  // total number of sequence blocks
+  int64_t num_seq_blocks = get_block_count<BLOCK_M>(query_start_loc, batch, seqlen);
+
+  at::Tensor out = at::empty_like(x);
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(scalar_type, "causal_conv1d_fwd_kernel_impl", [&] {
+    if (is_var_seqlen) {
+      // record seq blocks in Coordinate format, aka [num_seq_blocks, 2]
+      at::Tensor block_indices = get_block_indices<BLOCK_M>(query_start_loc, num_seq_blocks);
+
+      causal_conv1d_fwd_varlen_kernel_impl(
+          out.data_ptr<scalar_t>(),
+          x.data_ptr<scalar_t>(),
+          packed_w.data_ptr<scalar_t>(),
+          conditional_data_ptr<scalar_t>(bias),
+          conditional_data_ptr<scalar_t>(conv_states),
+          conditional_data_ptr<int32_t>(query_start_loc),
+          conditional_data_ptr<int32_t>(conv_state_indices),
+          conditional_data_ptr<bool>(has_initial_state),
+          block_indices.data_ptr<int32_t>(),
+          silu_activation,
+          batch,
+          dim,
+          width,
+          num_seq_blocks,
+          conv_state_slot_stride);
+    } else {
+      causal_conv1d_fwd_kernel_impl<scalar_t>(
+          out.data_ptr<scalar_t>(),
+          x.data_ptr<scalar_t>(),
+          packed_w.data_ptr<scalar_t>(),
+          conditional_data_ptr<scalar_t>(bias),
+          conditional_data_ptr<scalar_t>(conv_states),
+          conditional_data_ptr<int32_t>(conv_state_indices),
+          conditional_data_ptr<bool>(has_initial_state),
+          silu_activation,
+          batch,
+          dim,
+          seqlen,
+          width,
+          num_seq_blocks,
+          conv_state_slot_stride);
+    }
+  });
+  return out;
+}
+
+// API aligned with GPUs
+//
+//   x: (batch, dim) or (batch, dim, seqlen)
+//   conv_state: (..., dim, state_len), where state_len >= width - 1
+//   weight: (dim, width)
+//   bias: (dim,)
+//   cache_seqlens: (batch,), dtype int32.
+//   conv_state_indices: (batch,), dtype int32
+//   pad_slot_id: int
+//   out: (batch, dim) or (batch, dim, seqlen)
+//
+at::Tensor causal_conv1d_update_cpu(
+    const at::Tensor& x,
+    const at::Tensor& conv_states,
+    const at::Tensor& weight,
+    const std::optional<at::Tensor>& bias,
+    bool silu_activation,
+    const std::optional<at::Tensor>& cache_seqlens,
+    const std::optional<at::Tensor>& conv_state_indices,
+    int64_t pad_slot_id,
+    bool is_vnni) {
+  CHECK_CONTIGUOUS(x);
+  CHECK_CONTIGUOUS(weight);
+  auto packed_w = is_vnni ? weight : causal_conv1d_weight_pack(weight);
+
+  // TODO: add multi-token prediction support
+  TORCH_CHECK(x.dim() == 2, "causal_conv1d_update_cpu: expect x to be 2D tensor.");
+  TORCH_CHECK(!cache_seqlens.has_value(), "causal_conv1d_update_cpu: don't support cache_seqlens.");
+
+  int64_t batch = x.size(0);
+  int64_t dim = x.size(1);
+  int64_t seqlen = 1;
+  int64_t width = weight.size(-1);
+
+  const auto scalar_type = x.scalar_type();
+  CHECK_EQ(weight.scalar_type(), scalar_type);
+  CHECK_OPTIONAL_SHAPE_DTYPE(bias, dim, scalar_type);
+  CHECK_OPTIONAL_SHAPE_DTYPE(conv_state_indices, batch, at::kInt);
+
+  CHECK_EQ(conv_states.scalar_type(), scalar_type);
+  CHECK_EQ(conv_states.size(1), dim);
+  CHECK_EQ(conv_states.size(2), width - 1);
+
+  // adjust `conv_states` to be contiguous on `dim`
+  if (conv_states.stride(-2) != 1) {
+    int64_t num_cache_lines = conv_states.size(0);
+    auto conv_states_copy = conv_states.clone();
+    conv_states.as_strided_({num_cache_lines, dim, width - 1}, {(width - 1) * dim, 1, dim});
+    conv_states.copy_(conv_states_copy);
+  }
+
+  // IMPORTANT: To make the kernal compatible with vLLM KV cache layout 
+  int64_t conv_state_slot_stride = conv_states.stride(0);
+  at::Tensor out = at::empty_like(x);
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(scalar_type, "causal_conv1d_update_kernel_impl", [&] {
+    causal_conv1d_update_kernel_impl<scalar_t>(
+        out.data_ptr<scalar_t>(),
+        x.data_ptr<scalar_t>(),
+        conv_states.data_ptr<scalar_t>(),
+        packed_w.data_ptr<scalar_t>(),
+        conditional_data_ptr<scalar_t>(bias),
+        conditional_data_ptr<int32_t>(conv_state_indices),
+        silu_activation,
+        batch,
+        dim,
+        seqlen,
+        width,
+        conv_state_slot_stride);
+  });
+  return out;
+}
diff --git a/csrc/cpu/sgl-kernels/fla.cpp b/csrc/cpu/sgl-kernels/fla.cpp
new file mode 100644
index 000000000000..e939e1c52565
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/fla.cpp
@@ -0,0 +1,1398 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+#include "common.h"
+#include "gemm.h"
+#include "vec.h"
+#include "vec_pack.h"
+
+namespace {
+// For this cpu kernel, we have some innovations aside from the existing gpu kernels:
+// 1) Use less parallel loops, i.e. 4 including l2_norm.
+// 2) Fuse part of l2_norm with the rest of the computation.
+
+#define THREAD_BUFFER_ALLOC(dst, base_ptr, offset, type, size) \
+  type* dst = reinterpret_cast<type*>((base_ptr) + (offset));  \
+  offset += (size);
+
+template <typename scalar_t>
+inline void fill_stub(scalar_t* __restrict__ out, float val, int size) {
+  using Vec = at::vec::Vectorized<scalar_t>;
+  constexpr int kVecSize = Vec::size();
+  const Vec data_vec = Vec(static_cast<scalar_t>(val));
+  int d = 0;
+#pragma GCC unroll 4
+  for (; d <= size - kVecSize; d += kVecSize) {
+    data_vec.store(out + d);
+  }
+  if (size - d > 0) {
+    data_vec.store(out + d, size - d);
+  }
+}
+
+template <typename scalar_t, int64_t chunk_size = 64>
+void chunk_gated_delta_rule_kernel_impl(
+    scalar_t* __restrict__ out,                  // [B, T, HV, EV]
+    float* __restrict__ final_state_data,        // [N, HV, EK, EV]
+    const scalar_t* __restrict__ q_orig,         // [B, T, HK, EK]
+    const scalar_t* __restrict__ k_orig,         // [B, T, HK, EK]
+    const scalar_t* __restrict__ v_orig,         // [B, T, HV, EV]
+    const float* __restrict__ g_orig,            // [B, T, HV] FP32
+    const scalar_t* __restrict__ b_orig,         // [B, T, HV]
+    const int32_t* __restrict__ cu_seqlens_ptr,  // [N + 1] INT32
+    float* __restrict__ buff,
+    scalar_t* __restrict__ reduced_buff,
+    scalar_t* __restrict__ thread_buff,
+    const int32_t* __restrict__ chunk_offsets_ptr,
+    const int32_t* __restrict__ chunk_indices_ptr,
+    bool use_qk_l2norm_in_kernel,
+    const int64_t& batch_size,
+    const int64_t& global_seq_len,
+    const int64_t& qk_num_head,
+    const int64_t& v_num_head,
+    const int64_t& qk_head_size,
+    const int64_t& v_head_size,
+    const int64_t& qStrideH,
+    const int64_t& qStrideT,
+    const int64_t& kStrideH,
+    const int64_t& kStrideT,
+    const int64_t& vStrideH,
+    const int64_t& vStrideT,
+    const int64_t& oStrideH,
+    const int64_t& oStrideT,
+    const int64_t& global_total_seq_length,
+    const int64_t& global_num_chunk,
+    const int64_t& buff_size_16bit_per_thread,
+    double eps = 1e-5) {
+  int64_t gStrideH = 1;
+  int64_t gStrideT = v_num_head;
+  int64_t bStrideH = 1;
+  int64_t bStrideT = v_num_head;
+  int64_t final_state_StrideN = v_num_head * qk_head_size * v_head_size;
+  int64_t final_state_StrideH = qk_head_size * v_head_size;
+  int64_t final_state_StrideE = v_head_size;
+  int64_t head_group = v_num_head / qk_num_head;
+  float scale = 1.0 / std::sqrt(qk_head_size);
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int64_t VecSize = bVec::size();
+  constexpr int64_t fVecSize = fVec::size();
+
+  // Data pointers
+  float* g_pad = buff;
+  float* core_attn_out = g_pad + v_num_head * global_total_seq_length;
+  float* decay_mask = core_attn_out + batch_size * v_num_head * global_total_seq_length * v_head_size;
+  float* v_beta_attn = decay_mask + v_num_head * global_total_seq_length * chunk_size;
+
+  scalar_t* q_pad = reduced_buff;
+  scalar_t* k_pad = q_pad + qk_num_head * global_total_seq_length * qk_head_size;
+  scalar_t* v_pad = k_pad + qk_num_head * global_total_seq_length * qk_head_size;
+  scalar_t* k_beta = v_pad + v_num_head * global_total_seq_length * v_head_size;
+  scalar_t* v_beta = k_beta + v_num_head * global_total_seq_length * qk_head_size;
+  scalar_t* k_cumdecay_reduced = v_beta + v_num_head * global_total_seq_length * v_head_size;
+  scalar_t* q_norm_sum = k_cumdecay_reduced + v_num_head * global_total_seq_length * qk_head_size;
+  scalar_t* k_norm_sum = q_norm_sum + qk_num_head * global_seq_len;
+
+  if (use_qk_l2norm_in_kernel) {
+    at::parallel_for(0, qk_num_head * global_seq_len, 0, [&](int64_t begin, int64_t end) {
+      int64_t h_qk = 0, l = 0;
+      data_index_init(begin, h_qk, qk_num_head, l, global_seq_len);
+      for (int64_t i = begin; i < end; ++i) {
+        auto q_norm_sum_ptr = q_norm_sum + h_qk * global_seq_len + l;
+        auto k_norm_sum_ptr = k_norm_sum + h_qk * global_seq_len + l;
+        float sum_q = float(0);
+        float sum_k = float(0);
+        fVec sum_q_fvec = fVec(float(0));
+        fVec sum_k_fvec = fVec(float(0));
+        int64_t q_offset = l * qStrideT + h_qk * qStrideH;
+        int64_t k_offset = l * qStrideT + h_qk * qStrideH;
+        int64_t d;
+        for (d = 0; d <= qk_head_size - VecSize; d += VecSize) {
+          bVec q_bvec = bVec::loadu(q_orig + q_offset + d);
+          fVec q_fvec0, q_fvec1;
+          std::tie(q_fvec0, q_fvec1) = at::vec::convert_to_float(q_bvec);
+          sum_q_fvec += q_fvec0 * q_fvec0;
+          sum_q_fvec += q_fvec1 * q_fvec1;
+          bVec k_bvec = bVec::loadu(k_orig + k_offset + d);
+          fVec k_fvec0, k_fvec1;
+          std::tie(k_fvec0, k_fvec1) = at::vec::convert_to_float(k_bvec);
+          sum_k_fvec += k_fvec0 * k_fvec0;
+          sum_k_fvec += k_fvec1 * k_fvec1;
+        }
+        sum_q += vec_reduce_sum(sum_q_fvec);
+        sum_k += vec_reduce_sum(sum_k_fvec);
+        q_norm_sum_ptr[0] = static_cast<scalar_t>(float(1) / std::sqrt(sum_q + eps));
+        k_norm_sum_ptr[0] = static_cast<scalar_t>(float(1) / std::sqrt(sum_k + eps));
+        data_index_step(h_qk, qk_num_head, l, global_seq_len);
+      }
+    });
+  }
+
+  // query = query * scale
+  // k_beta = key * beta.unsqueeze(-1)
+  // v_beta = value * beta.unsqueeze(-1)
+  // Padding for q/k/v/beta
+  at::parallel_for(0, qk_num_head * global_num_chunk, 1, [&](int64_t begin, int64_t end) {
+    int ompIdx = at::get_thread_num();
+    int64_t h_qk = 0, c = 0;
+    data_index_init(begin, h_qk, qk_num_head, c, global_num_chunk);
+    for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
+      int64_t ib = chunk_indices_ptr[c * 2];      // idx_batch
+      int64_t ic = chunk_indices_ptr[c * 2 + 1];  // idx_chunk
+      int64_t l_orig = cu_seqlens_ptr[ib] + ic * chunk_size;
+      int64_t l = c * chunk_size;
+      bool is_tail = (c + 1 == chunk_offsets_ptr[ib + 1]);
+      int64_t seq_len = cu_seqlens_ptr[ib + 1] - cu_seqlens_ptr[ib];
+      int64_t real_chunk_size = is_tail ? seq_len - ic * chunk_size : chunk_size;
+      auto q_orig_ptr = q_orig + h_qk * qStrideH + l_orig * qStrideT;
+      auto k_orig_ptr = k_orig + h_qk * kStrideH + l_orig * kStrideT;
+      auto v_orig_ptr = v_orig + l_orig * vStrideT;
+      auto b_orig_ptr = b_orig + l_orig * bStrideT;
+      auto q_pad_ptr = q_pad + h_qk * global_total_seq_length * qk_head_size + l * qk_head_size;
+      auto k_pad_ptr = k_pad + h_qk * global_total_seq_length * qk_head_size + l * qk_head_size;
+      auto v_pad_ptr = v_pad + l * v_head_size;
+      auto k_beta_ptr = k_beta + l * qk_head_size;
+      auto v_beta_ptr = v_beta + l * v_head_size;
+
+      for (int64_t j = 0; j < real_chunk_size; j++) {
+        auto curr_q_orig = q_orig_ptr + j * qStrideT;
+        auto curr_k_orig = k_orig_ptr + j * kStrideT;
+        auto curr_q_pad = q_pad_ptr + j * qk_head_size;
+        auto curr_k_pad = k_pad_ptr + j * qk_head_size;
+        auto q_scale =
+            use_qk_l2norm_in_kernel ? *(q_norm_sum + h_qk * global_seq_len + l_orig + j) : static_cast<scalar_t>(1);
+        auto k_scale =
+            use_qk_l2norm_in_kernel ? *(k_norm_sum + h_qk * global_seq_len + l_orig + j) : static_cast<scalar_t>(1);
+        auto q_scale_vec = bVec(q_scale);
+        auto k_scale_vec = bVec(k_scale);
+        int64_t i = 0;
+        scalar_t scale_reduced = static_cast<scalar_t>(scale);
+        auto vec_scale_reduced = bVec(scale_reduced);
+        for (; i < fVecSize * (qk_head_size / fVecSize); i += fVecSize) {
+          auto tmp0 = bVec::loadu(curr_q_orig + i, fVecSize);
+          auto tmp1 = tmp0 * q_scale_vec * vec_scale_reduced;
+          tmp1.store(curr_q_pad + i, fVecSize);
+          auto tmp3 = bVec::loadu(curr_k_orig + i, fVecSize);
+          auto tmp4 = tmp3 * k_scale_vec;
+          tmp4.store(curr_k_pad + i, fVecSize);
+        }
+
+        for (auto hi = 0; hi < head_group; hi++) {
+          int64_t h = h_qk * head_group + hi;
+          auto curr_v_orig = v_orig_ptr + h * vStrideH + j * vStrideT;
+          auto curr_b_orig = b_orig_ptr + h * bStrideH + j * bStrideT;
+          scalar_t b_orig_val_reduced = *(curr_b_orig);
+          auto curr_v_pad = v_pad_ptr + h * global_total_seq_length * v_head_size + j * v_head_size;
+          auto curr_k_beta = k_beta_ptr + h * global_total_seq_length * qk_head_size + j * qk_head_size;
+          auto curr_v_beta = v_beta_ptr + h * global_total_seq_length * v_head_size + j * v_head_size;
+
+          // query = query * scale
+          // k_beta = key * beta.unsqueeze(-1)
+          int64_t i = 0;
+          auto vec_b_reduced = bVec(b_orig_val_reduced);
+          for (; i < fVecSize * (qk_head_size / fVecSize); i += fVecSize) {
+            auto tmp0 = bVec::loadu(curr_k_orig + i, fVecSize);
+            auto tmp2 = tmp0 * k_scale_vec * vec_b_reduced;
+            tmp2.store(curr_k_beta + i, fVecSize);
+          }
+          // v_beta = value * beta.unsqueeze(-1)
+          i = 0;
+          for (; i < VecSize * (v_head_size / VecSize); i += VecSize) {
+            auto tmp3 = bVec::loadu(curr_v_orig + i);
+            tmp3.store(curr_v_pad + i);
+            auto tmp5 = tmp3 * vec_b_reduced;
+            tmp5.store(curr_v_beta + i);
+          }
+        }
+      }
+
+      for (int64_t j = real_chunk_size; j < chunk_size; j++) {
+        auto curr_q_pad = q_pad_ptr + j * qk_head_size;
+        auto curr_k_pad = k_pad_ptr + j * qk_head_size;
+        int64_t i = 0;
+        auto vec_zero = bVec(0.0);
+        for (; i < VecSize * (qk_head_size / VecSize); i += VecSize) {
+          vec_zero.store(curr_q_pad + i);
+          vec_zero.store(curr_k_pad + i);
+        }
+        for (auto hi = 0; hi < head_group; hi++) {
+          int64_t h = h_qk * head_group + hi;
+          auto curr_v_pad = v_pad_ptr + h * global_total_seq_length * v_head_size + j * v_head_size;
+          auto curr_k_beta = k_beta_ptr + h * global_total_seq_length * qk_head_size + j * qk_head_size;
+          auto curr_v_beta = v_beta_ptr + h * global_total_seq_length * v_head_size + j * v_head_size;
+          int64_t i = 0;
+          for (; i < VecSize * (qk_head_size / VecSize); i += VecSize) {
+            vec_zero.store(curr_k_beta + i);
+          }
+          i = 0;
+          for (; i < VecSize * (v_head_size / VecSize); i += VecSize) {
+            vec_zero.store(curr_v_pad + i);
+            vec_zero.store(curr_v_beta + i);
+          }
+        }
+      }
+      // Move to the next query
+      data_index_step(h_qk, qk_num_head, c, global_num_chunk);
+    }
+  });
+
+  at::parallel_for(0, v_num_head * global_num_chunk, 1, [&](int64_t begin, int64_t end) {
+    int64_t h = 0, c = 0;
+    data_index_init(begin, h, v_num_head, c, global_num_chunk);
+    int ompIdx = at::get_thread_num();
+    int64_t offset = 0;
+    scalar_t* thread_buff_ptr = thread_buff + ompIdx * buff_size_16bit_per_thread;
+    THREAD_BUFFER_ALLOC(k_transpose, thread_buff_ptr, offset, scalar_t, qk_head_size * chunk_size);
+    THREAD_BUFFER_ALLOC(v_pack, thread_buff_ptr, offset, scalar_t, chunk_size * v_head_size);
+    THREAD_BUFFER_ALLOC(k_beta_g, thread_buff_ptr, offset, scalar_t, chunk_size * qk_head_size);
+    THREAD_BUFFER_ALLOC(k_beta_g_pack, thread_buff_ptr, offset, scalar_t, chunk_size * qk_head_size);
+    THREAD_BUFFER_ALLOC(curr_attn, thread_buff_ptr, offset, float, chunk_size* chunk_size * 2);
+    THREAD_BUFFER_ALLOC(curr_attn_reduced, thread_buff_ptr, offset, scalar_t, chunk_size * chunk_size);
+    THREAD_BUFFER_ALLOC(k_cumdecay, thread_buff_ptr, offset, float, chunk_size* qk_head_size * 2);
+    THREAD_BUFFER_ALLOC(row, thread_buff_ptr, offset, float, chunk_size * 2);
+    THREAD_BUFFER_ALLOC(updated, thread_buff_ptr, offset, float, chunk_size * 2);
+    for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
+      int64_t ib = chunk_indices_ptr[c * 2];      // idx_batch
+      int64_t ic = chunk_indices_ptr[c * 2 + 1];  // idx_chunk
+      int64_t l_orig = cu_seqlens_ptr[ib] + ic * chunk_size;
+      int64_t seq_len = cu_seqlens_ptr[ib + 1] - cu_seqlens_ptr[ib];
+      int64_t h_qk = h / head_group;
+      auto curr_g_orig = g_orig + h * gStrideH + l_orig * gStrideT;
+      auto curr_g_pad = g_pad + h * global_total_seq_length + c * chunk_size;
+      auto curr_decay_mask = decay_mask + h * global_total_seq_length * chunk_size + c * chunk_size * chunk_size;
+      auto curr_k_pad = k_pad + h_qk * global_total_seq_length * qk_head_size + c * chunk_size * qk_head_size;
+      auto curr_k_beta = k_beta + h * global_total_seq_length * qk_head_size + c * chunk_size * qk_head_size;
+      auto curr_k_cumdecay_reduced =
+          k_cumdecay_reduced + h * global_total_seq_length * qk_head_size + c * chunk_size * qk_head_size;
+      auto curr_v_beta = v_beta + h * global_total_seq_length * v_head_size + c * chunk_size * v_head_size;
+      auto curr_value = v_beta_attn + h * global_total_seq_length * v_head_size + c * chunk_size * v_head_size;
+
+      float acc_val = 0;
+      for (int64_t i = 0; i < chunk_size; i++) {
+        // Padding for g
+        // g = g.cumsum(dim=-1)
+        // g: [B, HV, num_chunk, chunk_size]
+        if (ic * chunk_size + i < seq_len) {
+          acc_val += curr_g_orig[i * gStrideT];
+        }
+        curr_g_pad[i] = acc_val;
+        // decay_mask = ((g.unsqueeze(-1) - g.unsqueeze(-2)).tril().exp().float()).tril()
+        // decay_mask: [B, HV, num_chunk, chunk_size, chunk_size]
+        float curr_g_pad_i = static_cast<float>(curr_g_pad[i]);
+        auto vec_curr_g_pad_i = fVec(curr_g_pad_i);
+        int64_t j = 0;
+        int64_t len = i + 1;
+        for (; j < fVecSize * (len / fVecSize); j += fVecSize) {
+          auto tmp0 = fVec::loadu(curr_g_pad + j);
+          auto tmp1 = vec_curr_g_pad_i - tmp0;
+          auto tmp2 = tmp1.exp_u20();
+          tmp2.store(curr_decay_mask + i * chunk_size + j);
+        }
+        if (j < len) {
+          auto tmp0 = fVec::loadu(curr_g_pad + j, len - j);
+          auto tmp1 = vec_curr_g_pad_i - tmp0;
+          auto tmp2 = tmp1.exp_u20();
+          tmp2.store(curr_decay_mask + i * chunk_size + j, len - j);
+        }
+      }
+
+      // attn = k_beta @ key.transpose(-1, -2)
+      // attn: [B, HV, num_chunk, chunk_size, chunk_size]
+      // transpose and pack for key
+      pack_vnni<scalar_t>(
+          /*    dst */ k_transpose,
+          /*    src */ curr_k_pad,
+          /*     N  */ chunk_size,
+          /*     K  */ qk_head_size,
+          /* ld_src */ qk_head_size,
+          /* ld_dst */ chunk_size);
+      // k_beta @ key.transpose(-1, -2)
+      at::native::cpublas::brgemm(
+          /*     M */ chunk_size,
+          /*     N */ chunk_size,
+          /*     K */ qk_head_size,
+          /*   lda */ qk_head_size,
+          /*   ldb */ chunk_size,
+          /*   ldc */ chunk_size,
+          /* add_C */ false,
+          /*     A */ curr_k_beta,
+          /*     B */ k_transpose,
+          /*     C */ curr_attn);
+      // attn = attn * decay_mask
+      for (int64_t m = 0; m < chunk_size; m++) {
+        at::vec::map2<float>(
+            [](fVec x, fVec y) { return fVec(0) - x * y; },
+            curr_attn + m * chunk_size,
+            curr_attn + m * chunk_size,
+            curr_decay_mask + m * chunk_size,
+            chunk_size);
+      }
+
+      // chunk decay
+      // attn: [B, HV, num_chunk, chunk_size, chunk_size]
+      // mask = torch.triu(torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=0)
+      // attn = -attn.masked_fill(mask, 0)
+      // attn[..., i, :i] = row + (row.unsqueeze(-1) * sub).sum(-2) [B, HV, num_chunk, i]
+      // attn = attn + torch.eye(chunk_size, dtype=attn.dtype, device=attn.device)
+      // attn = -attn.masked_fill(mask, 0)
+      for (int i = 0; i < chunk_size; i++) {
+        const auto vec_zero = fVec(0);
+        int64_t len = chunk_size - i;
+        int64_t front = len % fVecSize;
+        int64_t j = i;
+        // first masked vec for alignment
+        if (front > 0) {
+          vec_zero.store(curr_attn + i * chunk_size + j, front);
+          j += front;
+        }
+        for (; j < fVecSize * (chunk_size / fVecSize); j += fVecSize) {
+          vec_zero.store(curr_attn + i * chunk_size + j);
+        }
+      }
+      for (int i = 1; i < chunk_size; i++) {
+        // row = attn[..., i, :i] [B, HK, num_chunk, i]
+        int64_t j = 0;
+        int64_t len = i;
+        for (; j < fVecSize * (len / fVecSize); j += fVecSize) {
+          auto tmp0 = fVec::loadu(curr_attn + i * chunk_size + j);
+          tmp0.store(row + j);
+        }
+        if (j < len) {
+          auto tmp0 = fVec::loadu(curr_attn + i * chunk_size + j, len - j);
+          tmp0.store(row + j, len - j);
+        }
+        // (row.unsqueeze(-1) * sub).sum(-2)
+        fill_stub(updated, 0, i);
+        for (int k = 0; k < i; k++) {
+          float row_k = row[k];
+          auto vec_row_k = fVec(row_k);
+          int64_t j = 0;
+          int64_t len = i;
+          for (; j < fVecSize * (len / fVecSize); j += fVecSize) {
+            auto tmp0 = fVec::loadu(curr_attn + k * chunk_size + j);
+            auto tmp1 = vec_row_k * tmp0;
+            auto tmp2 = fVec::loadu(updated + j);
+            auto tmp3 = tmp1 + tmp2;
+            tmp3.store(updated + j);
+          }
+          if (j < len) {
+            auto tmp0 = fVec::loadu(curr_attn + k * chunk_size + j, len - j);
+            auto tmp1 = vec_row_k * tmp0;
+            auto tmp2 = fVec::loadu(updated + j);
+            auto tmp3 = tmp1 + tmp2;
+            tmp3.store(updated + j, len - j);
+          }
+        }
+        // attn[..., i, :i] = row + sum(...)
+        j = 0;
+        len = i;
+        for (; j < fVecSize * (len / fVecSize); j += fVecSize) {
+          auto tmp0 = fVec::loadu(row + j);
+          auto tmp1 = fVec::loadu(updated + j);
+          auto tmp2 = tmp0 + tmp1;
+          tmp2.store(curr_attn + i * chunk_size + j);
+        }
+        if (j < len) {
+          auto tmp0 = fVec::loadu(row + j, len - j);
+          auto tmp1 = fVec::loadu(updated + j, len - j);
+          auto tmp2 = tmp0 + tmp1;
+          tmp2.store(curr_attn + i * chunk_size + j, len - j);
+        }
+      }
+      for (int i = 0; i < chunk_size; i++) {
+        curr_attn[i * chunk_size + i] += 1.0f;
+        at::vec::map<scalar_t>(
+            [](fVec x) { return x; }, curr_attn_reduced + i * chunk_size, curr_attn + i * chunk_size, chunk_size);
+      }
+
+      // v_beta_attn = attn @ v_beta
+      // k_cumdecay = attn @ (k_beta * g.exp().unsqueeze(-1))
+      // v_beta_attn: [B, HV, num_chunk, chunk_size, EV]
+      // k_beta_g = k_beta * g: [B, HV, num_chunk, chunk_size, EK]
+      // k_cumdecay: [B, HV, num_chunk, chunk_size, EK]
+      // pack for value
+      pack_vnni2<scalar_t>(
+          /*    dst */ v_pack,
+          /*    src */ curr_v_beta,
+          /*     N  */ chunk_size,
+          /*     K  */ v_head_size,
+          /* ld_src */ v_head_size,
+          /* ld_dst */ v_head_size);
+      // value = attn @ v_beta
+      at::native::cpublas::brgemm(
+          /*     M */ chunk_size,
+          /*     N */ v_head_size,
+          /*     K */ chunk_size,
+          /*   lda */ chunk_size,
+          /*   ldb */ v_head_size,
+          /*   ldc */ v_head_size,
+          /* add_C */ false,
+          /*     A */ curr_attn_reduced,
+          /*     B */ v_pack,
+          /*     C */ curr_value);
+      // k_beta_g = k_beta * g.exp().unsqueeze(-1)
+      for (int64_t j = 0; j < chunk_size; j++) {
+        int64_t i = 0;
+        float g_exp = std::exp(curr_g_pad[j]);
+        scalar_t g_exp_reduced = static_cast<scalar_t>(g_exp);
+        auto vec_g_exp_reduced = bVec(g_exp_reduced);
+        for (; i < VecSize * (qk_head_size / VecSize); i += VecSize) {
+          auto tmp0 = bVec::loadu(curr_k_beta + j * qk_head_size + i);
+          auto tmp1 = tmp0 * vec_g_exp_reduced;
+          tmp1.store(k_beta_g + j * qk_head_size + i);
+        }
+      }
+      // pack for k_beta_g
+      pack_vnni2<scalar_t>(
+          /*    dst */ k_beta_g_pack,
+          /*    src */ k_beta_g,
+          /*     N  */ chunk_size,
+          /*     K  */ qk_head_size,
+          /* ld_src */ qk_head_size,
+          /* ld_dst */ qk_head_size);
+      // k_cumdecay = attn @ k_beta_g
+      at::native::cpublas::brgemm(
+          /*     M */ chunk_size,
+          /*     N */ qk_head_size,
+          /*     K */ chunk_size,
+          /*   lda */ chunk_size,
+          /*   ldb */ qk_head_size,
+          /*   ldc */ qk_head_size,
+          /* add_C */ false,
+          /*     A */ curr_attn_reduced,
+          /*     B */ k_beta_g_pack,
+          /*     C */ k_cumdecay);
+      for (int i = 0; i < chunk_size; i++) {
+        at::vec::map<scalar_t>(
+            [](fVec x) { return x; },
+            curr_k_cumdecay_reduced + i * qk_head_size,
+            k_cumdecay + i * qk_head_size,
+            qk_head_size);
+      }
+
+      // Move to the next query
+      data_index_step(h, v_num_head, c, global_num_chunk);
+    }
+  });
+
+  // for each chunk
+  at::parallel_for(0, batch_size * v_num_head, 1, [&](int64_t begin, int64_t end) {
+    int64_t b = 0, h = 0;
+    data_index_init(begin, b, batch_size, h, v_num_head);
+    int ompIdx = at::get_thread_num();
+    int64_t offset =
+        /* k_transpose */ qk_head_size * chunk_size +
+        /* v_pack */ chunk_size * v_head_size +
+        /* k_beta_g  */ chunk_size * qk_head_size +
+        /* k_beta_g_pack  */ chunk_size * qk_head_size +
+        /* attn */ chunk_size * chunk_size * 2 +
+        /* attn_reduced */ chunk_size * chunk_size +
+        /* k_cumdecay */ chunk_size * qk_head_size * 2 +
+        /* row */ chunk_size * 2 +
+        /* updated */ chunk_size * 2;
+    scalar_t* thread_buff_ptr = thread_buff + ompIdx * buff_size_16bit_per_thread;
+    THREAD_BUFFER_ALLOC(
+        curr_last_recurrent_state_reduced, thread_buff_ptr, offset, scalar_t, qk_head_size * v_head_size);
+    THREAD_BUFFER_ALLOC(
+        curr_last_recurrent_state_pack_reduced, thread_buff_ptr, offset, scalar_t, qk_head_size * v_head_size);
+    THREAD_BUFFER_ALLOC(k_transpose_i, thread_buff_ptr, offset, scalar_t, qk_head_size * chunk_size);
+    THREAD_BUFFER_ALLOC(attn_i, thread_buff_ptr, offset, float, chunk_size* chunk_size * 2);
+    THREAD_BUFFER_ALLOC(attn_i_reduced, thread_buff_ptr, offset, scalar_t, chunk_size * chunk_size);
+    THREAD_BUFFER_ALLOC(v_prime, thread_buff_ptr, offset, float, chunk_size* v_head_size * 2);
+    THREAD_BUFFER_ALLOC(v_prime_reduced, thread_buff_ptr, offset, scalar_t, chunk_size * v_head_size);
+    THREAD_BUFFER_ALLOC(v_prime_pack_reduced, thread_buff_ptr, offset, scalar_t, chunk_size * v_head_size);
+    THREAD_BUFFER_ALLOC(qg, thread_buff_ptr, offset, scalar_t, chunk_size * qk_head_size);
+    THREAD_BUFFER_ALLOC(attn_inter, thread_buff_ptr, offset, float, chunk_size* v_head_size * 2);
+    THREAD_BUFFER_ALLOC(kg, thread_buff_ptr, offset, scalar_t, chunk_size * qk_head_size);
+    THREAD_BUFFER_ALLOC(kg_transpose, thread_buff_ptr, offset, scalar_t, qk_head_size * chunk_size);
+    THREAD_BUFFER_ALLOC(kgv, thread_buff_ptr, offset, float, qk_head_size* v_head_size * 2);
+
+    for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
+      int64_t start_q = cu_seqlens_ptr[b];
+      int64_t seq_len = cu_seqlens_ptr[b + 1] - start_q;
+      int64_t num_chunk = chunk_offsets_ptr[b + 1] - chunk_offsets_ptr[b];
+      int64_t chunk_offset = chunk_offsets_ptr[b];
+      int64_t len_offset = chunk_offset * chunk_size;
+
+      int64_t h_qk = h / head_group;
+      auto out_ptr = out + start_q * oStrideT;
+      auto curr_q = q_pad + len_offset * qk_head_size +
+                    h_qk * global_total_seq_length * qk_head_size;  // [num_chunk, chunk_size, EK]
+      auto curr_k = k_pad + len_offset * qk_head_size +
+                    h_qk * global_total_seq_length * qk_head_size;            // [num_chunk, chunk_size, EK]
+      auto curr_v = v_beta_attn + h * global_total_seq_length * v_head_size;  // [num_chunk, chunk_size, EV]
+      auto curr_decay_mask =
+          decay_mask + h * global_total_seq_length * chunk_size;  // [num_chunk, chunk_size, chunk_size]
+      auto curr_k_cumdecay_reduced =
+          k_cumdecay_reduced + h * global_total_seq_length * qk_head_size;  // [num_chunk, chunk_size, EK]
+      auto curr_last_recurrent_state =
+          final_state_data + b * final_state_StrideN + h * final_state_StrideH;  // [EK, EV]
+      auto curr_g_pad = g_pad + len_offset + h * global_total_seq_length;        // [num_chunk, chunk_size]
+      auto curr_core_attn_out = core_attn_out + len_offset * v_head_size +
+                                h * global_total_seq_length * v_head_size;  // [num_chunk, chunk_size, EV]
+      for (int64_t c = 0; c < num_chunk; c++) {
+        for (int i = 0; i < qk_head_size; i++) {
+          at::vec::map<scalar_t>(
+              [](fVec x) { return x; },
+              curr_last_recurrent_state_reduced + i * v_head_size,
+              curr_last_recurrent_state + i * v_head_size,
+              v_head_size);
+        }
+        auto q_i = curr_q + c * chunk_size * qk_head_size;                                   // [chunk_size, EK]
+        auto k_i = curr_k + c * chunk_size * qk_head_size;                                   // [chunk_size, EK]
+        auto v_i = curr_v + (chunk_offset + c) * chunk_size * v_head_size;                   // [chunk_size, EV]
+        auto decay_mask_i = curr_decay_mask + (chunk_offset + c) * chunk_size * chunk_size;  // [chunk_size, chunk_size]
+        auto k_cumdecay_i_reduced =
+            curr_k_cumdecay_reduced + (chunk_offset + c) * chunk_size * qk_head_size;  // [chunk_size, EK]
+        auto g_pad_i = curr_g_pad + c * chunk_size;                                    // [chunk_size]
+        auto core_attn_out_i = curr_core_attn_out + c * chunk_size * v_head_size;      // [chunk_size, EV]
+
+        // attn_i = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)
+        // k_transpose_i = k_i.transpose(-1, -2)
+        pack_vnni<scalar_t>(
+            /*    dst */ k_transpose_i,
+            /*    src */ k_i,
+            /*     N  */ chunk_size,
+            /*     K  */ qk_head_size,
+            /* ld_src */ qk_head_size,
+            /* ld_dst */ chunk_size);
+        // attn_i = q_i @ k_transpose_i
+        at::native::cpublas::brgemm(
+            /* M */ chunk_size,
+            /* N */ chunk_size,
+            /* K */ qk_head_size,
+            /* lda */ qk_head_size,
+            /* ldb */ chunk_size,
+            /* ldc */ chunk_size,
+            /* add_C */ false,
+            /* A */ q_i,
+            /* B */ k_transpose_i,
+            /* C */ attn_i);
+        // attn_i = attn_i * decay_mask_i
+        for (int64_t m = 0; m < chunk_size; m++) {
+          auto attn_i_m = attn_i + m * chunk_size;
+          auto attn_i_reduced_m = attn_i_reduced + m * chunk_size;
+          auto decay_mask_i_m = decay_mask_i + m * chunk_size;
+          int64_t n = 0;
+          for (; n < fVecSize * (chunk_size / fVecSize); n += fVecSize) {
+            auto tmp0 = fVec::loadu(attn_i_m + n);
+            auto tmp1 = fVec::loadu(decay_mask_i_m + n);
+            auto tmp2 = tmp0 * tmp1;
+            auto tmp3 = at::vec::convert<scalar_t>(tmp2);
+            tmp3.store(attn_i_reduced_m + n, fVecSize);
+          }
+          if (n < chunk_size) {
+            auto tmp0 = fVec::loadu(attn_i_m + n, chunk_size - n);
+            auto tmp1 = fVec::loadu(decay_mask_i_m + n, chunk_size - n);
+            auto tmp2 = tmp0 * tmp1;
+            auto tmp3 = at::vec::convert<scalar_t>(tmp2);
+            tmp3.store(attn_i_reduced_m + n, chunk_size - n);
+          }
+        }
+        // mask = torch.triu(torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=1)
+        // attn_i = attn_i.masked_fill_(mask, 0)
+        for (int i = 0; i < chunk_size - 1; i++) {
+          const auto vec_zero = bVec(0);
+          int64_t len = chunk_size - i - 1;
+          int64_t front = len % VecSize;
+          int64_t j = i + 1;
+          // first masked vec for alignment
+          if (front > 0) {
+            vec_zero.store(attn_i_reduced + i * chunk_size + j, front);
+            j += front;
+          }
+          for (; j < VecSize * (chunk_size / VecSize); j += VecSize) {
+            vec_zero.store(attn_i_reduced + i * chunk_size + j);
+          }
+        }
+
+        // pack for curr_last_recurrent_state
+        pack_vnni2<scalar_t>(
+            /*    dst */ curr_last_recurrent_state_pack_reduced,
+            /*    src */ curr_last_recurrent_state_reduced,
+            /*     N  */ qk_head_size,
+            /*     K  */ v_head_size,
+            /* ld_src */ v_head_size,
+            /* ld_dst */ v_head_size);
+
+        // v_prime = k_cumdecay_i @ curr_last_recurrent_state: [chunk_size, EV]
+        // k_cumdecay_i: [chunk_size, EK]
+        // curr_last_recurrent_state: [EK, EV]
+        at::native::cpublas::brgemm(
+            /*     M */ chunk_size,
+            /*     N */ v_head_size,
+            /*     K */ qk_head_size,
+            /*   lda */ qk_head_size,
+            /*   ldb */ v_head_size,
+            /*   ldc */ v_head_size,
+            /* add_C */ false,
+            /*     A */ k_cumdecay_i_reduced,
+            /*     B */ curr_last_recurrent_state_pack_reduced,
+            /*     C */ v_prime);
+
+        // v_new = v_prime = v_i - v_prime
+        // v_i: [chunk_size, EV]
+        for (int64_t m = 0; m < chunk_size; m++) {
+          int64_t i = 0;
+          for (; i < fVecSize * (v_head_size / fVecSize); i += fVecSize) {
+            auto tmp0 = fVec::loadu(v_i + m * v_head_size + i);
+            auto tmp1 = fVec::loadu(v_prime + m * v_head_size + i);
+            auto tmp2 = tmp0 - tmp1;
+            auto tmp3 = at::vec::convert<scalar_t>(tmp2);
+            tmp3.store(v_prime_reduced + m * v_head_size + i, fVecSize);
+          }
+        }
+
+        // attn_inter = (q_i * g[:, :, i, :, None].exp()) @ last_recurrent_state
+        // qg = q_i * g[:, :, i, :, None].exp(): [chunk_size, EK]
+        // q_i: [chunk_size, EK]
+        // g[:, :, i, :, None]: [chunk_size, 1]
+        for (int64_t m = 0; m < chunk_size; m++) {
+          auto g_pad_i_m = g_pad_i + m;
+          auto g_exp = std::exp(*g_pad_i_m);
+          int64_t i = 0;
+          scalar_t g_exp_reduced = static_cast<scalar_t>(g_exp);
+          auto vec_g_exp_reduced = bVec(g_exp_reduced);
+          for (; i < VecSize * (qk_head_size / VecSize); i += VecSize) {
+            auto tmp0 = bVec::loadu(q_i + m * qk_head_size + i);
+            auto tmp2 = tmp0 * vec_g_exp_reduced;
+            tmp2.store(qg + m * qk_head_size + i);
+          }
+        }
+        // attn_inter = qg @ curr_last_recurrent_state: [chunk_size, EV]
+        // curr_last_recurrent_state: [EK, EV]
+        at::native::cpublas::brgemm(
+            /* M */ chunk_size,
+            /* N */ v_head_size,
+            /* K */ qk_head_size,
+            /* lda */ qk_head_size,
+            /* ldb */ v_head_size,
+            /* ldc */ v_head_size,
+            /* add_C */ false,
+            /* A */ qg,
+            /* B */ curr_last_recurrent_state_pack_reduced,
+            /* C */ attn_inter);
+
+        // core_attn_out[:, :, i] = attn_inter + attn_i @ v_new
+        // pack for v_prime
+        pack_vnni2<scalar_t>(
+            /*    dst */ v_prime_pack_reduced,
+            /*    src */ v_prime_reduced,
+            /*     N  */ chunk_size,
+            /*     K  */ v_head_size,
+            /* ld_src */ v_head_size,
+            /* ld_dst */ v_head_size);
+        // attn_inter = attn_inter + attn_i @ v_new: [chunk_size, EV]
+        // attn_i: [chunk_size, chunk_size]
+        // v_new: [chunk_size, EV]
+        at::native::cpublas::brgemm(
+            /* M */ chunk_size,
+            /* N */ v_head_size,
+            /* K */ chunk_size,
+            /* lda */ chunk_size,
+            /* ldb */ v_head_size,
+            /* ldc */ v_head_size,
+            /* add_C */ true,
+            /* A */ attn_i_reduced,
+            /* B */ v_prime_pack_reduced,
+            /* C */ attn_inter);
+
+        // core_attn_out[:, :, i] = attn_inter
+        for (int64_t m = 0; m < chunk_size; m++) {
+          at::vec::map<float>(
+              [](fVec x) { return x; }, core_attn_out_i + m * v_head_size, attn_inter + m * v_head_size, v_head_size);
+        }
+
+        // last_recurrent_state = (
+        //     last_recurrent_state * g[:, :, i, -1, None, None].exp()
+        //     + (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new
+        // )
+        // 1) last_recurrent_state * g[:, :, i, -1, None, None].exp()
+        // curr_last_recurrent_state: [EK, EV]
+        // g[:, :, i, -1, None, None]: [1, 1]
+        // last_recurrent_state * g[:, :, i, -1, None, None].exp(): [EK, EV]
+        auto g_pad_i_last = g_pad_i + chunk_size - 1;
+        auto g_exp_last = std::exp(g_pad_i_last[0]);
+        for (int64_t m = 0; m < qk_head_size; m++) {
+          int64_t i = 0;
+          auto vec_g_exp_last = fVec(g_exp_last);
+          for (; i < fVecSize * (v_head_size / fVecSize); i += fVecSize) {
+            auto tmp0 = bVec::loadu(curr_last_recurrent_state_reduced + m * v_head_size + i);
+            auto tmp1 = at::vec::convert<float>(tmp0);
+            auto tmp2 = tmp1 * vec_g_exp_last;
+            tmp2.store(curr_last_recurrent_state + m * v_head_size + i);
+          }
+          if (i < v_head_size) {
+            auto tmp0 = bVec::loadu(curr_last_recurrent_state_reduced + m * v_head_size + i, v_head_size - i);
+            auto tmp1 = at::vec::convert<float>(tmp0);
+            auto tmp2 = tmp1 * vec_g_exp_last;
+            tmp2.store(curr_last_recurrent_state + m * v_head_size + i, v_head_size - i);
+          }
+        }
+        // 2) (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new
+        // k_i: [chunk_size, EK]
+        // g[:, :, i, -1, None]: [1]
+        // g[:, :, i]: [chunk_size]
+        // (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]: [chunk_size, 1]
+        // kg = k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]: [chunk_size, EK]
+        // (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2): [EK, chunk_size]
+        // v_new: [chunk_size, EV]
+        // (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new: [EK, EV]
+        // kg = k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]
+        for (int64_t m = 0; m < chunk_size; m++) {
+          auto g_exp = std::exp((g_pad_i_last[0] - g_pad_i[m]));
+          int64_t i = 0;
+          scalar_t g_exp_reduced = static_cast<scalar_t>(g_exp);
+          auto vec_g_exp_reduced = bVec(g_exp_reduced);
+          for (; i < VecSize * (qk_head_size / VecSize); i += VecSize) {
+            auto tmp0 = bVec::loadu(k_i + m * qk_head_size + i);
+            auto tmp2 = tmp0 * vec_g_exp_reduced;
+            tmp2.store(kg + m * qk_head_size + i);
+          }
+        }
+        // kg.transpose(-1, -2): [EK, chunk_size]
+        at::native::utils::transpose<scalar_t>(
+            /* M */ chunk_size,
+            /* N */ qk_head_size,
+            /* src */ kg,
+            /* ld_src */ qk_head_size,
+            /* dst */ kg_transpose,
+            /* ld_dst */ chunk_size);
+        // kgv = kg.transpose(-1, -2) @ v_new
+        // v_new: [chunk_size, EV]
+        at::native::cpublas::brgemm(
+            /* M */ qk_head_size,
+            /* N */ v_head_size,
+            /* K */ chunk_size,
+            /* lda */ chunk_size,
+            /* ldb */ v_head_size,
+            /* ldc */ v_head_size,
+            /* add_C */ false,
+            /* A */ kg_transpose,
+            /* B */ v_prime_pack_reduced,
+            /* C */ kgv);
+        // last_recurrent_state = 1) + 2)
+        for (int64_t m = 0; m < qk_head_size; m++) {
+          at::vec::map2<float>(
+              [](fVec x, fVec y) { return x + y; },
+              curr_last_recurrent_state + m * v_head_size,
+              curr_last_recurrent_state + m * v_head_size,
+              kgv + m * v_head_size,
+              v_head_size);
+        }
+      }
+
+      // core_attn_out -> output
+      // output: [B, T, HV, EV]
+      // core_attn_out: [B, HV, padded_T, EV]
+      auto curr_out = out_ptr + h * oStrideH;
+      for (int64_t m = 0; m < seq_len; m++) {
+        at::vec::map<scalar_t>(
+            [](fVec x) { return x; }, curr_out + m * oStrideT, curr_core_attn_out + m * v_head_size, v_head_size);
+      }
+
+      // Move to the next query
+      data_index_step(b, batch_size, h, v_num_head);
+    }
+  });
+}
+
+inline float softplus(float x, double threshold = 20.0) {
+  if (x > threshold)
+    return x;
+  else if (x < -threshold)
+    return std::exp(x);
+  else
+    return std::log1p(std::exp(x));
+}
+
+inline at::vec::Vectorized<float> softplus(const at::vec::Vectorized<float>& x, double threshold = 20.0) {
+  using Vec = at::vec::Vectorized<float>;
+  Vec mask_hi = x > Vec(threshold);
+  Vec mask_lo = x < Vec(-threshold);
+
+  Vec expx = x.exp_u20();
+  Vec log1pex = (expx + Vec(1.0f)).log();
+
+  return Vec::blendv(Vec::blendv(log1pex, expx, mask_lo), x, mask_hi);
+}
+
+template <typename scalar_t, typename param_t>
+void fused_sigmoid_gating_delta_rule_update_kernel_impl(
+    const scalar_t* __restrict__ q_ptr,
+    const scalar_t* __restrict__ k_ptr,
+    const scalar_t* __restrict__ v_ptr,
+    const param_t* __restrict__ A_log_ptr,
+    const scalar_t* __restrict__ a_ptr,
+    const scalar_t* __restrict__ dt_bias_ptr,
+    const scalar_t* __restrict__ b_ptr,
+    const int32_t* __restrict__ indices_ptr,
+    float* __restrict__ state_ptr,
+    scalar_t* __restrict__ o_ptr,
+    float* __restrict__ qk_scale_buf,
+    int64_t seq_len,
+    int64_t batch_size,
+    int64_t num_heads,
+    int64_t head_dim,
+    int64_t v_num_heads,
+    int64_t v_head_dim,
+    int64_t q_strideB,
+    int64_t q_strideS,
+    int64_t q_strideH,
+    int64_t k_strideB,
+    int64_t k_strideS,
+    int64_t k_strideH,
+    int64_t v_strideB,
+    int64_t v_strideS,
+    int64_t v_strideH,
+    int64_t state_slot_stride,
+    bool use_qk_l2norm_in_kernel,
+    double softplus_threshold) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+
+  constexpr int64_t VecSize = bVec::size();
+  constexpr int64_t fVecSize = fVec::size();
+  int64_t group_size = v_num_heads / num_heads;
+  double scale = 1 / std::sqrt(head_dim);
+  fVec scale_vec = fVec(scale);
+  if (use_qk_l2norm_in_kernel) {
+    float eps = 1e-5;
+    at::parallel_for(0, batch_size * seq_len * num_heads, 0, [&](int64_t begin, int64_t end) {
+      int64_t bi{0}, si{0}, ni{0};
+      data_index_init(begin, bi, batch_size, si, seq_len, ni, num_heads);
+      for (int64_t i = begin; i < end; ++i) {
+        float sum_q = float(0);
+        float sum_k = float(0);
+        fVec sum_q_fvec = fVec(float(0));
+        fVec sum_k_fvec = fVec(float(0));
+        int64_t q_offset = bi * q_strideB + si * q_strideS + ni * q_strideH;
+        int64_t k_offset = bi * k_strideB + si * k_strideS + ni * k_strideH;
+        int64_t q_scale_offset = bi * seq_len * num_heads + si * num_heads + ni;
+        int64_t k_scale_offset = q_scale_offset + batch_size * seq_len * num_heads;
+        int64_t d;
+#pragma GCC unroll 4
+        for (d = 0; d <= head_dim - VecSize; d += VecSize) {
+          bVec q_bvec = bVec::loadu(q_ptr + q_offset + d);
+          fVec q_fvec0, q_fvec1;
+          std::tie(q_fvec0, q_fvec1) = at::vec::convert_to_float(q_bvec);
+          sum_q_fvec += q_fvec0 * q_fvec0;
+          sum_q_fvec += q_fvec1 * q_fvec1;
+          bVec k_bvec = bVec::loadu(k_ptr + k_offset + d);
+          fVec k_fvec0, k_fvec1;
+          std::tie(k_fvec0, k_fvec1) = at::vec::convert_to_float(k_bvec);
+          sum_k_fvec += k_fvec0 * k_fvec0;
+          sum_k_fvec += k_fvec1 * k_fvec1;
+        }
+#pragma GCC unroll 4
+        for (; d < head_dim; ++d) {
+          float q_val = static_cast<float>(q_ptr[q_offset + d]);
+          sum_q += q_val * q_val;
+          float k_val = static_cast<float>(k_ptr[k_offset + d]);
+          sum_k += k_val * k_val;
+        }
+
+        sum_q += vec_reduce_sum(sum_q_fvec);
+        sum_k += vec_reduce_sum(sum_k_fvec);
+        qk_scale_buf[q_scale_offset] = float(1) / std::sqrt(sum_q + eps);
+        qk_scale_buf[k_scale_offset] = float(1) / std::sqrt(sum_k + eps);
+
+        data_index_step(bi, batch_size, si, seq_len, ni, num_heads);
+      }
+    });
+  }
+  at::parallel_for(0, batch_size * seq_len * v_num_heads, 0, [&](int64_t begin, int64_t end) {
+    int64_t bi{0}, si{0}, ni{0};
+    data_index_init(begin, bi, batch_size, si, seq_len, ni, v_num_heads);
+    for (int64_t i = begin; i < end; ++i) {
+      int64_t cache_index = indices_ptr[bi];
+      int64_t state_offset = cache_index * state_slot_stride + ni * head_dim * v_head_dim;
+      float g_val = -std::exp(float(A_log_ptr[ni])) *
+                    softplus(float(a_ptr[bi * v_num_heads + ni]) + float(dt_bias_ptr[ni]), softplus_threshold);
+      float g_val_exp = std::exp(g_val);
+      fVec g_val_exp_vec = fVec(g_val_exp);
+      int64_t q_offset = si * q_strideS + bi * q_strideB + (ni / group_size) * q_strideH;
+      int64_t k_offset = si * k_strideS + bi * k_strideB + (ni / group_size) * k_strideH;
+      int64_t q_scale_offset = bi * seq_len * num_heads + si * num_heads + (ni / group_size);
+      int64_t k_scale_offset = q_scale_offset + batch_size * seq_len * num_heads;
+      float q_scale = use_qk_l2norm_in_kernel ? qk_scale_buf[q_scale_offset] : 1.0f;
+      float k_scale = use_qk_l2norm_in_kernel ? qk_scale_buf[k_scale_offset] : 1.0f;
+      int64_t v_offset = si * v_strideS + bi * v_strideB + ni * v_strideH;
+      int64_t o_offset = ((bi * seq_len + si) * v_num_heads + ni) * v_head_dim;
+      float beta_val = 1 / (1 + std::exp(-b_ptr[ni]));
+      fVec beta_vec = fVec(beta_val);
+      int64_t dvi = 0;
+      for (; dvi <= v_head_dim - VecSize; dvi += VecSize) {
+        fVec kv_mem_vec0 = fVec(float(0));
+        fVec kv_mem_vec1 = fVec(float(0));
+        for (int di = 0; di < head_dim; ++di) {
+          fVec k_val_vec = fVec(k_ptr[k_offset + di] * k_scale);
+          fVec state_vec0 = fVec::loadu(state_ptr + state_offset + di * v_head_dim + dvi);
+          fVec state_vec1 = fVec::loadu(state_ptr + state_offset + di * v_head_dim + dvi + fVecSize);
+          kv_mem_vec0 = kv_mem_vec0 + state_vec0 * g_val_exp_vec * k_val_vec;
+          kv_mem_vec1 = kv_mem_vec1 + state_vec1 * g_val_exp_vec * k_val_vec;
+        }
+        bVec v_bvec = bVec::loadu(v_ptr + v_offset + dvi);
+        fVec v_vec0, v_vec1;
+        std::tie(v_vec0, v_vec1) = at::vec::convert_to_float(v_bvec);
+        fVec dt_vec0 = (v_vec0 - kv_mem_vec0) * beta_vec;
+        fVec dt_vec1 = (v_vec1 - kv_mem_vec1) * beta_vec;
+        fVec o_vec0 = fVec(float(0));
+        fVec o_vec1 = fVec(float(0));
+        for (int di = 0; di < head_dim; ++di) {
+          fVec q_vec = fVec(q_ptr[q_offset + di] * q_scale);
+          fVec k_vec = fVec(k_ptr[k_offset + di] * k_scale);
+          fVec state_vec0 = fVec::loadu(state_ptr + state_offset + di * v_head_dim + dvi);
+          fVec state_vec1 = fVec::loadu(state_ptr + state_offset + di * v_head_dim + dvi + fVecSize);
+          state_vec0 = state_vec0 * g_val_exp_vec + k_vec * dt_vec0;
+          state_vec1 = state_vec1 * g_val_exp_vec + k_vec * dt_vec1;
+          o_vec0 = o_vec0 + state_vec0 * q_vec * scale_vec;
+          o_vec1 = o_vec1 + state_vec1 * q_vec * scale_vec;
+          state_vec0.store(state_ptr + state_offset + di * v_head_dim + dvi);
+          state_vec1.store(state_ptr + state_offset + di * v_head_dim + dvi + fVecSize);
+        }
+        bVec o_vec = at::vec::convert_from_float<scalar_t>(o_vec0, o_vec1);
+        o_vec.store(o_ptr + o_offset + dvi);
+      }
+      for (; dvi < v_head_dim; ++dvi) {
+        float kv_mem_val = 0;
+        for (int di = 0; di < head_dim; ++di) {
+          float k_val = k_ptr[k_offset + di] * k_scale;
+          state_ptr[state_offset + di * v_head_dim + dvi] *= g_val_exp;
+          kv_mem_val += state_ptr[state_offset + di * v_head_dim + dvi] * k_val;
+        }
+        float v_val = v_ptr[v_offset + dvi];
+        float dt_val = (v_val - kv_mem_val) * beta_val;
+        float o_val = 0;
+        for (int di = 0; di < head_dim; ++di) {
+          float q_val = q_ptr[q_offset + di] * q_scale;
+          float k_val = k_ptr[k_offset + di] * k_scale;
+          state_ptr[state_offset + di * v_head_dim + dvi] += k_val * dt_val;
+          o_val += state_ptr[state_offset + di * v_head_dim + dvi] * q_val * scale;
+        }
+        o_ptr[o_offset + dvi] = o_val;
+      }
+      data_index_step(bi, batch_size, si, seq_len, ni, v_num_heads);
+    }
+  });
+}
+
+template <typename scalar_t>
+void fused_gdn_gating_kernel_impl(
+    float* __restrict__ A_log,
+    const scalar_t* __restrict__ a,
+    const scalar_t* __restrict__ b,
+    const scalar_t* __restrict__ dt_bias,
+    float* __restrict__ out,
+    scalar_t* __restrict__ beta,
+    int64_t batch,
+    int64_t num_heads) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int vec_size = bVec::size();
+  constexpr int fvec_size = fVec::size();
+  const fVec neg_one(-1.0f);
+  const fVec one(1.0f);
+  at::parallel_for(0, batch, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t i = begin; i < end; ++i) {
+      int64_t j = 0;
+      for (; j < num_heads - (num_heads % vec_size); j += vec_size) {
+        fVec A_log_vec0 = fVec::loadu(A_log + j);
+        fVec A_log_vec1 = fVec::loadu(A_log + j + fvec_size);
+        bVec dt_bias_vec = bVec::loadu(dt_bias + j);
+        bVec a_bvec = bVec::loadu(a + i * num_heads + j);
+        bVec b_bvec = bVec::loadu(b + i * num_heads + j);
+        fVec a0, a1, dt_bias_vec0, dt_bias_vec1, b0, b1;
+        std::tie(a0, a1) = at::vec::convert_to_float(a_bvec);
+        std::tie(b0, b1) = at::vec::convert_to_float(b_bvec);
+        std::tie(dt_bias_vec0, dt_bias_vec1) = at::vec::convert_to_float(dt_bias_vec);
+
+        fVec g0 = neg_one * A_log_vec0.exp_u20() * softplus(a0 + dt_bias_vec0);
+        fVec g1 = neg_one * A_log_vec1.exp_u20() * softplus(a1 + dt_bias_vec1);
+        fVec beta0 = one / (one + (neg_one * b0).exp_u20());
+        fVec beta1 = one / (one + (neg_one * b1).exp_u20());
+
+        g0.store(out + i * num_heads + j);
+        g1.store(out + i * num_heads + j + fvec_size);
+        bVec beta_vec = at::vec::convert_from_float<scalar_t>(beta0, beta1);
+        beta_vec.store(beta + i * num_heads + j);
+      }
+      for (; j < num_heads; ++j) {
+        out[i * num_heads + j] = -std::exp(A_log[j]) * softplus(float(a[i * num_heads + j]) + float(dt_bias[j]));
+        beta[i * num_heads + j] = 1 / (1 + std::exp(-b[i * num_heads + j]));
+      }
+    }
+  });
+}
+
+template <typename scalar_t>
+void fused_gdn_gating_kernel_impl(
+    scalar_t* __restrict__ A_log,
+    const scalar_t* __restrict__ a,
+    const scalar_t* __restrict__ b,
+    const scalar_t* __restrict__ dt_bias,
+    float* __restrict__ out,
+    scalar_t* __restrict__ beta,
+    int64_t batch,
+    int64_t num_heads) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int vec_size = bVec::size();
+  constexpr int fvec_size = fVec::size();
+  const fVec neg_one(-1.0f);
+  const fVec one(1.0f);
+  at::parallel_for(0, batch, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t i = begin; i < end; ++i) {
+      int64_t j = 0;
+      for (; j < num_heads - (num_heads % vec_size); j += vec_size) {
+        bVec A_log_bvec = bVec::loadu(A_log + j);
+        fVec A_log_vec0, A_log_vec1;
+        std::tie(A_log_vec0, A_log_vec1) = at::vec::convert_to_float(A_log_bvec);
+        bVec dt_bias_vec = bVec::loadu(dt_bias + j);
+        bVec a_bvec = bVec::loadu(a + i * num_heads + j);
+        bVec b_bvec = bVec::loadu(b + i * num_heads + j);
+        fVec a0, a1, dt_bias_vec0, dt_bias_vec1, b0, b1;
+        std::tie(a0, a1) = at::vec::convert_to_float(a_bvec);
+        std::tie(b0, b1) = at::vec::convert_to_float(b_bvec);
+        std::tie(dt_bias_vec0, dt_bias_vec1) = at::vec::convert_to_float(dt_bias_vec);
+
+        fVec g0 = neg_one * A_log_vec0.exp_u20() * softplus(a0 + dt_bias_vec0);
+        fVec g1 = neg_one * A_log_vec1.exp_u20() * softplus(a1 + dt_bias_vec1);
+        fVec beta0 = one / (one + (neg_one * b0).exp_u20());
+        fVec beta1 = one / (one + (neg_one * b1).exp_u20());
+
+        g0.store(out + i * num_heads + j);
+        g1.store(out + i * num_heads + j + fvec_size);
+        bVec beta_vec = at::vec::convert_from_float<scalar_t>(beta0, beta1);
+        beta_vec.store(beta + i * num_heads + j);
+      }
+      for (; j < num_heads; ++j) {
+        out[i * num_heads + j] = -std::exp(float(A_log[j])) * softplus(float(a[i * num_heads + j]) + float(dt_bias[j]));
+        beta[i * num_heads + j] = 1 / (1 + std::exp(-b[i * num_heads + j]));
+      }
+    }
+  });
+}
+
+}  // anonymous namespace
+
+template <bool is_last_dim_contiguous>
+inline void
+CHECK_INPUT_SHAPE_DTYPE(const at::Tensor& tensor, const int64_t& dim, const at::IntArrayRef& sizes, at::ScalarType st) {
+  TORCH_CHECK(tensor.sizes() == sizes, "Input tensor shape mismatch: expected ", sizes, ", got ", tensor.sizes());
+  TORCH_CHECK(tensor.dtype() == st, "Input tensor dtype mismatch");
+  CHECK_DIM(dim, tensor);
+  if (is_last_dim_contiguous) {
+    CHECK_LAST_DIM_CONTIGUOUS_INPUT(tensor);
+  } else {
+    CHECK_CONTIGUOUS(tensor);
+  }
+}
+
+// query: [B, T, HK, EK]
+// key: [B, T, HK, EK]
+// value: [B, T, HV, EV]
+// g: [B, T, HV] FP32
+// beta: [B, T, HV]
+// initial_state: [N, HV, EK, EV] FP32
+// output_final_state: bool
+// cu_seqlens: [N + 1] INT32
+// head_first: bool
+// use_qk_l2norm_in_kernel: bool
+std::tuple<at::Tensor, at::Tensor> chunk_gated_delta_rule_cpu(
+    const at::Tensor& query,
+    const at::Tensor& key,
+    const at::Tensor& value,
+    const at::Tensor& g,
+    const at::Tensor& beta,
+    const at::Tensor& initial_state,
+    bool output_final_state,
+    const at::Tensor& cu_seqlens,
+    bool head_first,
+    bool use_qk_l2norm_in_kernel,
+    double eps = 1e-5) {
+  TORCH_CHECK(head_first == false, "chunk_gated_delta_rule_cpu does not support head first");
+  int64_t B = query.size(0);
+  int64_t global_seq_len = query.size(1);
+  int64_t qk_num_head = query.size(2);
+  int64_t qk_head_size = query.size(3);
+  int64_t v_num_head = value.size(2);
+  int64_t v_head_size = value.size(3);
+  int64_t batch_size = initial_state.size(0);
+  CHECK_EQ(B, 1);
+  TORCH_CHECK(v_num_head % qk_num_head == 0, "expect v_num_head multiple of qk_num_head.");
+  TORCH_CHECK(qk_head_size % 32 == 0, "expect qk_head_size to be multiples of 32.");
+  TORCH_CHECK(v_head_size % 32 == 0, "expect v_head_size to be multiples of 32.");
+  CHECK_INPUT_SHAPE_DTYPE<true>(query, 4, {B, global_seq_len, qk_num_head, qk_head_size}, at::kBFloat16);
+  CHECK_INPUT_SHAPE_DTYPE<true>(key, 4, {B, global_seq_len, qk_num_head, qk_head_size}, at::kBFloat16);
+  CHECK_INPUT_SHAPE_DTYPE<true>(value, 4, {B, global_seq_len, v_num_head, v_head_size}, at::kBFloat16);
+  CHECK_INPUT_SHAPE_DTYPE<false>(g, 3, {B, global_seq_len, v_num_head}, at::kFloat);
+  CHECK_INPUT_SHAPE_DTYPE<false>(beta, 3, {B, global_seq_len, v_num_head}, at::kBFloat16);
+  CHECK_INPUT_SHAPE_DTYPE<false>(cu_seqlens, 1, {batch_size + 1}, at::kInt);
+  CHECK_INPUT_SHAPE_DTYPE<false>(initial_state, 4, {batch_size, v_num_head, qk_head_size, v_head_size}, at::kFloat);
+
+  at::Tensor output = at::empty_like(value, value.options());  // [B, T, HV, EV]
+  at::Tensor final_state = initial_state.to(at::kFloat);       // [N, HV, EK, EV]
+
+  // Strides
+  int64_t qStrideH = query.stride(2);
+  int64_t qStrideT = query.stride(1);
+  int64_t kStrideH = key.stride(2);
+  int64_t kStrideT = key.stride(1);
+  int64_t vStrideH = value.stride(2);
+  int64_t vStrideT = value.stride(1);
+  int64_t oStrideH = output.stride(2);
+  int64_t oStrideT = output.stride(1);
+
+  constexpr int64_t chunk_size = 64;
+  // Deduce the global chunks
+  // e.g. cu_seqlens: [0, 5, 13, 16], chunk_size = 4
+  // chunk_offsets: [0, 2, 4, 5]
+  // chunk_indices (batch_id, local_chunk_id): [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0]]
+  at::Tensor chunk_offsets = at::empty(batch_size + 1, cu_seqlens.options());
+  auto chunk_offsets_ptr = chunk_offsets.data_ptr<int32_t>();
+  chunk_offsets_ptr[0] = 0;
+  int32_t* cu_seqlens_ptr = cu_seqlens.data_ptr<int32_t>();
+  int64_t s = 0;
+  int64_t e = 0;
+  int64_t s_pad = 0;
+  int64_t e_pad = 0;
+  for (int64_t b = 0; b < batch_size; b++) {
+    e = cu_seqlens_ptr[b + 1];
+    int64_t seq_len = e - s;
+    int64_t pad_size = (chunk_size - seq_len % chunk_size) % chunk_size;
+    int64_t total_seq_length = seq_len + pad_size;
+    e_pad = s_pad + total_seq_length;
+    chunk_offsets[b + 1] = e_pad / chunk_size;
+    s = e;
+    s_pad = e_pad;
+  }
+  int64_t global_total_seq_length = e_pad;
+  int64_t global_num_chunk = chunk_offsets_ptr[batch_size];
+  at::Tensor chunk_indices = at::empty(global_num_chunk * 2, cu_seqlens.options());
+  auto chunk_indices_ptr = chunk_indices.data_ptr<int32_t>();
+  int64_t curr_c = 0;
+  for (int64_t b = 0; b < batch_size; b++) {
+    int64_t batch_chunk_num = chunk_offsets_ptr[b + 1] - chunk_offsets_ptr[b];
+    for (int64_t c = 0; c < batch_chunk_num; c++) {
+      chunk_indices_ptr[curr_c * 2] = b;
+      chunk_indices_ptr[curr_c * 2 + 1] = c;
+      curr_c += 1;
+    }
+  }
+
+  // Allocate buffer
+  int64_t buff_size = v_num_head * global_total_seq_length                               // g_pad_data
+                      + batch_size * v_num_head * global_total_seq_length * v_head_size  // core_attn
+                      + v_num_head * global_total_seq_length * chunk_size                // decay_mask
+                      + v_num_head * global_total_seq_length * v_head_size;              // v_beta_attn
+  at::Tensor buff_data = at::empty({buff_size}, query.options().dtype(at::kFloat));
+  int64_t reduced_buff_size = qk_num_head * global_total_seq_length * qk_head_size    // q_pad_data
+                              + qk_num_head * global_total_seq_length * qk_head_size  // k_pad_data
+                              + v_num_head * global_total_seq_length * v_head_size    // v_pad_data
+                              + v_num_head * global_total_seq_length * qk_head_size   // k_beta_data
+                              + v_num_head * global_total_seq_length * v_head_size    // v_beta_data
+                              + v_num_head * global_total_seq_length * qk_head_size   // k_cumdecay_reduced
+                              + qk_num_head * global_seq_len                          // q_norm_sum
+                              + qk_num_head * global_seq_len;                         // k_norm_sum
+  at::Tensor reduced_buff_data = at::empty({reduced_buff_size}, query.options());
+  int64_t num_thread = at::get_num_threads();
+  int64_t buff_size_16bit_per_thread =
+      /* k_transpose */ qk_head_size * chunk_size +
+      /* v_pack */ chunk_size * v_head_size +
+      /* k_beta_g  */ chunk_size * qk_head_size +
+      /* k_beta_g_pack  */ chunk_size * qk_head_size +
+      /* attn */ chunk_size * chunk_size * 2 +
+      /* attn_reduced */ chunk_size * chunk_size +
+      /* k_cumdecay */ chunk_size * qk_head_size * 2 +
+      /* row */ chunk_size * 2 +
+      /* updated */ chunk_size * 2 +
+      /* curr_last_recurrent_state_reduced  */ qk_head_size * v_head_size +
+      /* curr_last_recurrent_state_pack_reduced   */ qk_head_size * v_head_size +
+      /* k_transpose_i  */ qk_head_size * chunk_size +
+      /* attn_i   */ chunk_size * chunk_size * 2 +
+      /* attn_i_reduced     */ chunk_size * chunk_size +
+      /* v_prime */ chunk_size * v_head_size * 2 +
+      /* v_prime_reduced */ chunk_size * v_head_size +
+      /* v_prime_pack_reduced */ chunk_size * v_head_size +
+      /* qg */ chunk_size * qk_head_size +
+      /* attn_inter */ chunk_size * v_head_size * 2 +
+      /* kg */ chunk_size * qk_head_size +
+      /* kg_transpose */ qk_head_size * chunk_size +
+      /* kgv */ qk_head_size * v_head_size * 2;
+  at::Tensor thread_buff_data = at::empty({num_thread, buff_size_16bit_per_thread}, query.options());
+
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(query.scalar_type(), "chunk_gated_delta_rule_kernel", [&] {
+    chunk_gated_delta_rule_kernel_impl<scalar_t, chunk_size>(
+        output.data_ptr<scalar_t>(),
+        final_state.data_ptr<float>(),
+        query.data_ptr<scalar_t>(),
+        key.data_ptr<scalar_t>(),
+        value.data_ptr<scalar_t>(),
+        g.data_ptr<float>(),
+        beta.data_ptr<scalar_t>(),
+        cu_seqlens_ptr,
+        buff_data.data_ptr<float>(),
+        reduced_buff_data.data_ptr<scalar_t>(),
+        thread_buff_data.data_ptr<scalar_t>(),
+        chunk_offsets_ptr,
+        chunk_indices_ptr,
+        use_qk_l2norm_in_kernel,
+        batch_size,
+        global_seq_len,
+        qk_num_head,
+        v_num_head,
+        qk_head_size,
+        v_head_size,
+        qStrideH,
+        qStrideT,
+        kStrideH,
+        kStrideT,
+        vStrideH,
+        vStrideT,
+        oStrideH,
+        oStrideT,
+        global_total_seq_length,
+        global_num_chunk,
+        buff_size_16bit_per_thread,
+        eps);
+  });
+  return std::make_tuple(std::move(output), std::move(final_state));
+}
+
+// A_log: [v_num_heads]
+// dt_bias: [v_num_heads]
+// query: [seq_len, batch_size, num_heads, head_dim]
+// key: [seq_len, batch_size, num_heads, head_dim]
+// value: [seq_len, batch_size, v_num_heads, v_head_dim]
+// a: [batch_size, v_num_heads]
+// b: [batch_size, v_num_heads]
+// initial_state_source:[num_tokens, v_num_heads, head_dim, v_head_dim]
+// initial_state_indices: [batch_size]
+// cu_seqlens: [batch_size + 1]
+at::Tensor fused_sigmoid_gating_delta_rule_update_cpu(
+    const at::Tensor& A_log,
+    const at::Tensor& dt_bias,
+    const at::Tensor& q,
+    const at::Tensor& k,
+    const at::Tensor& v,
+    const at::Tensor& a,
+    const at::Tensor& b,
+    at::Tensor& initial_state_source,
+    const at::Tensor& initial_state_indices,
+    const at::Tensor& cu_seqlens,
+    bool use_qk_l2norm_in_kernel,
+    double softplus_beta = 1.0,
+    double softplus_threshold = 20.0) {
+  CHECK_DIM(4, q);
+  CHECK_DIM(4, v);
+  CHECK_LAST_DIM_CONTIGUOUS_INPUT(q);
+  int64_t seq_len = q.size(0);
+  int64_t batch_size = q.size(1);
+  int64_t num_heads = q.size(2);
+  int64_t head_dim = q.size(3);
+  int64_t v_num_heads = v.size(2);
+  int64_t v_head_dim = v.size(3);
+  CHECK_INPUT_SHAPE_DTYPE<true>(k, {seq_len, batch_size, num_heads, head_dim}, q.scalar_type());
+  CHECK_INPUT_SHAPE_DTYPE<true>(v, {seq_len, batch_size, v_num_heads, v_head_dim}, q.scalar_type());
+  CHECK_INPUT_SHAPE_DTYPE<true>(a, {batch_size, v_num_heads}, q.scalar_type());
+  CHECK_INPUT_SHAPE_DTYPE<true>(dt_bias, {v_num_heads}, q.scalar_type());
+  CHECK_INPUT_SHAPE_DTYPE<true>(b, {batch_size, v_num_heads}, q.scalar_type());
+  CHECK_INPUT_SHAPE_DTYPE<true>(initial_state_indices, {batch_size}, at::kInt);
+  CHECK_INPUT_SHAPE_DTYPE<true>(cu_seqlens, {batch_size + 1}, at::kInt);
+  CHECK_INPUT_SHAPE_DTYPE<true>(
+      initial_state_source, {initial_state_source.size(0), v_num_heads, head_dim, v_head_dim}, at::kFloat);
+  CHECK(initial_state_source.size(0) >= batch_size);
+  CHECK_EQ(v_num_heads % num_heads, 0);
+  TORCH_CHECK(
+      A_log.sizes() == at::IntArrayRef({v_num_heads}),
+      "Input tensor shape mismatch: expected ",
+      at::IntArrayRef({v_num_heads}),
+      ", got ",
+      A_log.sizes());
+
+  int64_t q_strideB = q.stride(1);
+  int64_t q_strideS = q.stride(0);
+  int64_t q_strideH = q.stride(2);
+  int64_t k_strideB = k.stride(1);
+  int64_t k_strideS = k.stride(0);
+  int64_t k_strideH = k.stride(2);
+  int64_t v_strideB = v.stride(1);
+  int64_t v_strideS = v.stride(0);
+  int64_t v_strideH = v.stride(2);
+  // IMPORTANT: To make the kernal compatible with vLLM KV cache layout 
+  int64_t state_slot_stride = initial_state_source.stride(0);
+  at::Tensor core_attn_out = at::empty({batch_size, seq_len, v_num_heads, v_head_dim}, q.options());
+  at::Tensor qk_scale_buf = at::empty({2 * batch_size, seq_len, num_heads}, at::kFloat);
+
+  CPU_DISPATCH_REDUCED_FLOATING_TYPES_EXT(
+      q.scalar_type(), A_log.scalar_type(), "fused_sigmoid_gating_delta_rule_update_kernel_impl", [&] {
+        fused_sigmoid_gating_delta_rule_update_kernel_impl<scalar_t, param_t>(
+            q.data_ptr<scalar_t>(),
+            k.data_ptr<scalar_t>(),
+            v.data_ptr<scalar_t>(),
+            A_log.data_ptr<param_t>(),
+            a.data_ptr<scalar_t>(),
+            dt_bias.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            initial_state_indices.data_ptr<int32_t>(),
+            initial_state_source.data_ptr<float>(),
+            core_attn_out.data_ptr<scalar_t>(),
+            qk_scale_buf.data_ptr<float>(),
+            seq_len,
+            batch_size,
+            num_heads,
+            head_dim,
+            v_num_heads,
+            v_head_dim,
+            q_strideB,
+            q_strideS,
+            q_strideH,
+            k_strideB,
+            k_strideS,
+            k_strideH,
+            v_strideB,
+            v_strideS,
+            v_strideH,
+            state_slot_stride,
+            use_qk_l2norm_in_kernel,
+            softplus_threshold);
+      });
+  return core_attn_out;
+}
+
+// A_log: [num_v_heads]
+// a: [batch, num_v_heads]
+// b: [batch, num_v_heads]
+// dt_bias: [num_v_heads]
+// -A_log.float().exp() * F.softplus(a.float() + dt_bias)
+std::tuple<at::Tensor, at::Tensor>
+fused_gdn_gating_cpu(const at::Tensor& A_log, const at::Tensor& a, const at::Tensor& b, const at::Tensor& dt_bias) {
+  CHECK_DIM(1, A_log);
+  CHECK_DIM(2, a);
+  CHECK_DIM(2, b);
+  CHECK_DIM(1, dt_bias);
+  CHECK_CONTIGUOUS(a);
+  CHECK_EQ(A_log.size(0), a.size(1));
+  CHECK_EQ(A_log.size(0), dt_bias.size(0));
+  int batch = a.size(0);
+  int num_heads = a.size(1);
+  CHECK_EQ(b.size(0), batch);
+  CHECK_EQ(b.size(1), num_heads);
+  at::Tensor out = at::empty({1, batch, num_heads}, a.options().dtype(at::kFloat));
+  at::Tensor beta = at::empty({1, batch, num_heads}, b.options());
+  CPU_DISPATCH_REDUCED_FLOATING_TYPES_EXT(a.scalar_type(), A_log.scalar_type(), "fused_gdn_gating_kernel", [&] {
+    fused_gdn_gating_kernel_impl<scalar_t>(
+        A_log.data_ptr<param_t>(),
+        a.data_ptr<scalar_t>(),
+        b.data_ptr<scalar_t>(),
+        dt_bias.data_ptr<scalar_t>(),
+        out.data_ptr<float>(),
+        beta.data_ptr<scalar_t>(),
+        batch,
+        num_heads);
+  });
+  return std::make_tuple(out, beta);
+}
diff --git a/csrc/cpu/sgl-kernels/gemm.cpp b/csrc/cpu/sgl-kernels/gemm.cpp
index 65c56943c56f..38e6d9f4ce9d 100644
--- a/csrc/cpu/sgl-kernels/gemm.cpp
+++ b/csrc/cpu/sgl-kernels/gemm.cpp
@@ -1,11 +1,12 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
-#include "common.h"
-#include "vec.h"
+// clang-format off
+
 #include "gemm.h"
 
-// clang-format off
+#include "common.h"
+#include "vec.h"
 
 namespace {
 
@@ -26,13 +27,13 @@ inline void s8s8_compensation(int8_t* __restrict__ packed, int K) {
   const __m512i off = _mm512_set1_epi8(static_cast<char>(0x80));
   for (int k = 0; k < K / 4; ++k) {
     for (int col = 0; col < COLS; ++col) {
-      __m512i vb = _mm512_loadu_si512((const __m512i *)(packed + k * BLOCK_N * 4 + col * 64));
+      __m512i vb = _mm512_loadu_si512((const __m512i*)(packed + k * BLOCK_N * 4 + col * 64));
       vcomp[col] = _mm512_dpbusd_epi32(vcomp[col], off, vb);
     }
   }
 
   for (int col = 0; col < COLS; ++col) {
-    _mm512_storeu_si512((__m512i *)(packed + offset + col * 64), vcomp[col]);
+    _mm512_storeu_si512((__m512i*)(packed + offset + col * 64), vcomp[col]);
   }
 #else
   TORCH_CHECK(false, "s8s8_compensation not implemented!");
@@ -69,6 +70,43 @@ inline void pack_vnni<int8_t>(int8_t* __restrict__ packed, const int8_t* __restr
   s8s8_compensation<BLOCK_N>(packed, K);
 }
 
+// uint8_t: mxfp4 or int4
+// pack to vnni2 format as they are computed with bfloat16
+//
+// from [N, K'/2, 2] to [K'/2, N, 2], view 2x int4 as unit8:
+// from [N,    K   ] to [K,    N   ] where K = K'/2
+//
+template <>
+inline void pack_vnni<uint8_t>(uint8_t* __restrict__ packed, const uint8_t* __restrict__ weight, int N, int K) {
+  constexpr int BLOCK_N = block_size_n();
+
+  uint8_t unpacked[2 * BLOCK_N];
+
+  // 32-way pack (align with BLOCK_N), faster for avx512 unpacking
+  //
+  // for a range of (64):
+  //   {0, 1, 2, ..., 63}
+  //
+  // original format:
+  //   { 1|0,  3|2, ..., 63|62}
+  //
+  // packed format:
+  //   {32|0, 31|1, ..., 63|31}
+  //
+  for (int k = 0; k < K; ++k) {
+    // unpack first
+    for (int n = 0; n < N; ++n) {
+      uint8_t value = weight[n * K + k];
+      unpacked[n * 2 + 0] = value & 0xF;  // lower 4 bits
+      unpacked[n * 2 + 1] = value >> 4;   // higher 4 bits
+    }
+    // re-pack to 32-way
+    for (int n = 0; n < N; ++n) {
+      packed[k * N + n] = (unpacked[n + BLOCK_N] << 4) | unpacked[n];
+    }
+  }
+}
+
 template <typename scalar_t>
 inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ input, int64_t size) {
   using bVec = at::vec::Vectorized<scalar_t>;
@@ -76,7 +114,7 @@ inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ inpu
   constexpr int kVecSize = bVec::size();
 
   int64_t d;
-  #pragma GCC unroll 4
+#pragma GCC unroll 4
   for (d = 0; d <= size - kVecSize; d += kVecSize) {
     fVec data0 = fVec::loadu(input + d);
     fVec data1 = fVec::loadu(input + d + fVec::size());
@@ -89,13 +127,34 @@ inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ inpu
 }
 
 template <typename scalar_t>
-inline void copy_add_stub(scalar_t* __restrict__ out, const float* __restrict__ input, const float* __restrict__ bias, int64_t size) {
+inline void copy_stub(float* __restrict__ out, const scalar_t* __restrict__ input, int64_t size) {
   using bVec = at::vec::Vectorized<scalar_t>;
   using fVec = at::vec::Vectorized<float>;
   constexpr int kVecSize = bVec::size();
 
   int64_t d;
-  #pragma GCC unroll 4
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    fVec data0, data1;
+    bVec b_vec = bVec::loadu(input + d);
+    std::tie(data0, data1) = at::vec::convert_to_float(b_vec);
+    data0.store(out + d);
+    data1.store(out + d + fVec::size());
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<float>(input[d]);
+  }
+}
+
+template <typename scalar_t>
+inline void copy_add_stub(
+    scalar_t* __restrict__ out, const float* __restrict__ input, const float* __restrict__ bias, int64_t size) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+
+  int64_t d;
+#pragma GCC unroll 4
   for (d = 0; d <= size - kVecSize; d += kVecSize) {
     fVec data0 = fVec::loadu(input + d) + fVec::loadu(bias + d);
     fVec data1 = fVec::loadu(input + d + fVec::size()) + fVec::loadu(bias + d + fVec::size());
@@ -107,11 +166,51 @@ inline void copy_add_stub(scalar_t* __restrict__ out, const float* __restrict__
   }
 }
 
+template <typename scalar_t, bool has_bias>
+inline void scalar_sigmoid_and_mul(
+    scalar_t* __restrict__ out,
+    const float* __restrict__ input,
+    const float* __restrict__ bias,
+    const scalar_t* __restrict__ mul,
+    int SIZE) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  // scalar sigmoid
+  const fVec one = fVec(1.f);
+  fVec X;
+  if constexpr (has_bias) {
+    assert(bias != nullptr);
+    X = fVec(input[0] + bias[0]);
+  } else {
+    X = fVec(input[0]);
+  }
+  X = one / (one + X.neg().exp_u20());
+
+  // vec mul
+  constexpr int kVecSize = bVec::size();
+  for (int d = 0; d < SIZE; d += kVecSize) {
+    bVec m_bvec = bVec::loadu(mul + d);
+    fVec m_fvec0, m_fvec1;
+    std::tie(m_fvec0, m_fvec1) = at::vec::convert_to_float(m_bvec);
+    m_fvec0 = m_fvec0 * X;
+    m_fvec1 = m_fvec1 * X;
+
+    bVec out_vec = convert_from_float_ext<scalar_t>(m_fvec0, m_fvec1);
+    out_vec.store(out + d);
+  }
+}
+
 template <typename scalar_t, bool has_bias, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn {
   static inline void apply(
-      const scalar_t* __restrict__ A, const scalar_t* __restrict__ B, scalar_t* __restrict__ C,
-      const float* __restrict__ bias, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const scalar_t* __restrict__ A,
+      const scalar_t* __restrict__ B,
+      scalar_t* __restrict__ C,
+      const float* __restrict__ bias,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -120,9 +219,14 @@ struct tinygemm_kernel_nn {
 template <bool has_bias, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const at::BFloat16* __restrict__ A, const at::BFloat16* __restrict__ B, at::BFloat16* __restrict__ C,
-      const float* __restrict__ bias, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const at::BFloat16* __restrict__ A,
+      const at::BFloat16* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      const float* __restrict__ bias,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
 
@@ -145,7 +249,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
 
     const int64_t K2 = K >> 1;
     const int64_t lda2 = lda >> 1;
-    const int64_t ldb2 = ldb; // ldb * 2 >> 1;
+    const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
     const float* a_ptr = reinterpret_cast<const float*>(A);
     const float* b_ptr = reinterpret_cast<const float*>(B);
 
@@ -180,9 +284,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
               (__m512i)(_mm512_cvtne2ps_pbh(vc[row * COLS + col + 1], vc[row * COLS + col])));
         }
       } else {
-        _mm256_storeu_si256(
-            reinterpret_cast<__m256i*>(C + row * ldc + col * 16),
-            (__m256i)(_mm512_cvtneps_pbh(vc[i])));
+        _mm256_storeu_si256(reinterpret_cast<__m256i*>(C + row * ldc + col * 16), (__m256i)(_mm512_cvtneps_pbh(vc[i])));
       }
     };
     Unroll<ROWS * COLS>{}(storec);
@@ -190,22 +292,33 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                          \
-    tinygemm_kernel_nn<scalar_t, has_bias, MB_SIZE, NB_SIZE>::apply(         \
-        A + mb_start * lda, B + nb_start * 2, C + mb_start * ldc + nb_start, \
-        has_bias ? bias + nb_start : nullptr, K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                \
+  tinygemm_kernel_nn<scalar_t, has_bias, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda,                                          \
+      B + nb_start * 2,                                            \
+      C + mb_start * ldc + nb_start,                               \
+      has_bias ? bias + nb_start : nullptr,                        \
+      K,                                                           \
+      lda,                                                         \
+      ldb,                                                         \
+      ldc);
 
 template <typename scalar_t, bool has_bias>
 struct brgemm {
   static inline void apply(
-      const scalar_t* __restrict__ A, const scalar_t* __restrict__ B, scalar_t* __restrict__ C,
-      float* __restrict__ Ctmp, const float* __restrict__ bias,
-      int64_t M, int64_t N, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const scalar_t* __restrict__ A,
+      const scalar_t* __restrict__ B,
+      scalar_t* __restrict__ C,
+      float* __restrict__ Ctmp,
+      const float* __restrict__ bias,
+      int64_t M,
+      int64_t N,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int BLOCK_N = block_size_n();
-    at::native::cpublas::brgemm(
-        M, N, K, lda, ldb, BLOCK_N, /* add_C */false,
-        A, B, Ctmp);
+    at::native::cpublas::brgemm(M, N, K, lda, ldb, BLOCK_N, /* add_C */ false, A, B, Ctmp);
 
     // copy from Ctmp to C
     for (int64_t m = 0; m < M; ++m) {
@@ -216,6 +329,21 @@ struct brgemm {
       }
     }
   }
+  static inline void apply(
+      const float* __restrict__ A,
+      const float* __restrict__ B,
+      scalar_t* __restrict__ C,
+      float* __restrict__ Ctmp,
+      const float* __restrict__ bias,
+      int64_t M,
+      int64_t N,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
+    constexpr int BLOCK_N = block_size_n();
+    at::native::cpublas::brgemm(M, N, K, lda, ldb, BLOCK_N, /* add_C */ false, A, B, Ctmp);
+  }
 };
 
 template <typename scalar_t, bool has_bias>
@@ -232,15 +360,12 @@ void tinygemm_kernel(
     int64_t ldb,
     int64_t ldc,
     bool brg) {
-
   if (brg) {
-    brgemm<scalar_t, has_bias>::apply(
-        A, B, C, Ctmp, bias,
-        M, N, K, lda, ldb, ldc);
+    brgemm<scalar_t, has_bias>::apply(A, B, C, Ctmp, bias, M, N, K, lda, ldb, ldc);
     return;
   }
 
-  // pattern: 1-4-16
+  // pattern: 1-4-16, N = 16, 32, 48, 64
   constexpr int64_t BLOCK_M = 4;
   constexpr int64_t BLOCK_N = 64;
   const int64_t MB = div_up(M, BLOCK_M);
@@ -252,25 +377,88 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
+      switch (mb_size << 4 | nb_size >> 4) {
         // mb_size = 1
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_NN(1, 32); break;
-        case 0x14: LAUNCH_TINYGEMM_KERNEL_NN(1, 64); break;
+        case 0x11:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 16);
+          break;
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 32);
+          break;
+        case 0x13:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 48);
+          break;
+        case 0x14:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 64);
+          break;
         // mb_size = 2
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_NN(2, 32); break;
-        case 0x24: LAUNCH_TINYGEMM_KERNEL_NN(2, 64); break;
+        case 0x21:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 16);
+          break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 32);
+          break;
+        case 0x23:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 48);
+          break;
+        case 0x24:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 64);
+          break;
         // mb_size = 3
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_NN(3, 32); break;
-        case 0x34: LAUNCH_TINYGEMM_KERNEL_NN(3, 64); break;
+        case 0x31:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 16);
+          break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 32);
+          break;
+        case 0x33:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 48);
+          break;
+        case 0x34:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 64);
+          break;
         // mb_size = 4
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_NN(4, 32); break;
-        case 0x44: LAUNCH_TINYGEMM_KERNEL_NN(4, 64); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+        case 0x41:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 16);
+          break;
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 32);
+          break;
+        case 0x43:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 48);
+          break;
+        case 0x44:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 64);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, " x ", nb_size);
       }
     }
   }
 }
 
+template <typename scalar_t, bool has_bias>
+void tinygemm_kernel(
+    const float* __restrict__ A,
+    const float* __restrict__ B,
+    scalar_t* __restrict__ C,
+    float* __restrict__ Ctmp,
+    const float* __restrict__ bias,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg) {
+  TORCH_CHECK(brg, "Expected to use fp32 brgemm for small N GEMM");
+  if (brg) {
+    brgemm<scalar_t, has_bias>::apply(A, B, C, Ctmp, bias, M, N, K, lda, ldb, ldc);
+    return;
+  }
+  // TODO : add intrinsic path
+}
+
 template <typename scalar_t>
 void weight_packed_linear_kernel_impl(
     scalar_t* __restrict__ out,
@@ -282,29 +470,20 @@ void weight_packed_linear_kernel_impl(
     int64_t K,
     int64_t mat1_strideM,
     int64_t out_strideM) {
-
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
   const int64_t MB = div_up(M, BLOCK_M);
   const int64_t NB = div_up(N, BLOCK_N);
 
-  // use avx512-bf16 when a) M is small; b) dtype is bfloat16, otherwise use amx
-  const bool use_brgemm = (M > 4) || (!std::is_same_v<scalar_t, at::BFloat16>);
-
-  // l2 cache block for n
-  int64_t cache_blocks_nb = get_cache_blocks<scalar_t>(BLOCK_N, K);
+  const bool use_brgemm = can_use_brgemm<scalar_t>(M);
 
   // parallel on [MB, NB]
   AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
-    parallel_2d(MB, NB, [&](int64_t begin_mb, int64_t end_mb, int64_t begin_nb, int64_t end_nb) {
-
+    parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
       // for brgemm, use float32 for accumulate
       alignas(64) float Ctmp[BLOCK_M * BLOCK_N];
 
-      for (int64_t nbb = begin_nb; nbb < end_nb; nbb += cache_blocks_nb) {
-      for (int64_t mb = begin_mb; mb < end_mb; ++mb) {
-      for (int64_t nb = nbb; nb < std::min(nbb + cache_blocks_nb, end_nb); ++nb) {
-
+      loop_2d<scalar_t>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
         int64_t mb_start = mb * BLOCK_M;
         int64_t mb_size = std::min(M - mb_start, BLOCK_M);
         int64_t nb_start = nb * BLOCK_N;
@@ -323,7 +502,82 @@ void weight_packed_linear_kernel_impl(
             /* ldb */ nb_size,
             /* ldc */ out_strideM,
             /* brg */ use_brgemm);
-      }}}
+      });
+
+      if (use_brgemm) {
+        at::native::cpublas::brgemm_release();
+      }
+    });
+  });
+}
+
+template <typename scalar_t>
+void weight_packed_linear_kernel_impl(
+    scalar_t* __restrict__ out,
+    const scalar_t* __restrict__ mat1,
+    const float* __restrict__ mat2,
+    const float* __restrict__ bias,
+    const scalar_t* __restrict__ post_mul_mat,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t mat1_strideM,
+    int64_t out_strideM) {
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n();
+  const int64_t MB = div_up(M, BLOCK_M);
+  const int64_t NB = div_up(N, BLOCK_N);
+
+  const bool use_brgemm = true;  // TODO: add intrinsic path
+  // parallel on [MB, NB]
+  AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
+    parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+      // for brgemm, use float32 for accumulate
+      alignas(64) float Atmp[BLOCK_M * K];
+      alignas(64) float Ctmp[BLOCK_M * BLOCK_N];
+
+      loop_2d<float>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+        int64_t mb_start = mb * BLOCK_M;
+        int64_t mb_size = std::min(M - mb_start, BLOCK_M);
+        int64_t nb_start = nb * BLOCK_N;
+        int64_t nb_size = std::min(N - nb_start, BLOCK_N);
+        for (int64_t m = 0; m < mb_size; ++m) {
+          copy_stub<scalar_t>(Atmp + m * K, mat1 + mb_start * mat1_strideM + m * K, K);
+        }
+        tinygemm_kernel<scalar_t, has_bias>(
+            /*   A */ Atmp,
+            /*   B */ mat2 + nb_start * K /* nb * BLOCK_N * K */,
+            /*   C */ out + mb_start * out_strideM + nb_start,
+            /* Ctmp*/ Ctmp,
+            /* bias*/ bias + nb_start,
+            /*   M */ mb_size,
+            /*   N */ nb_size,
+            /*   K */ K,
+            /* lda */ mat1_strideM,
+            /* ldb */ nb_size,
+            /* ldc */ out_strideM,
+            /* brg */ use_brgemm);
+
+        if (post_mul_mat != nullptr) {
+          for (int64_t m = 0; m < mb_size; ++m) {
+            scalar_sigmoid_and_mul<scalar_t, has_bias>(
+                out + mb_start * out_strideM + nb_start + m * out_strideM,
+                Ctmp + m * BLOCK_N,
+                bias + nb_start,
+                post_mul_mat + mb_start * out_strideM + m * out_strideM,
+                out_strideM);
+          }
+        } else {
+          for (int64_t m = 0; m < mb_size; ++m) {
+            if constexpr (has_bias) {
+              copy_add_stub(
+                  out + mb_start * out_strideM + nb_start + m * out_strideM, Ctmp + m * BLOCK_N, bias + nb_start, N);
+            } else {
+              copy_stub(out + mb_start * out_strideM + nb_start + m * out_strideM, Ctmp + m * BLOCK_N, N);
+            }
+          }
+        }
+      });
 
       if (use_brgemm) {
         at::native::cpublas::brgemm_release();
@@ -332,20 +586,38 @@ void weight_packed_linear_kernel_impl(
   });
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 // tinygemm interface
 template <typename scalar_t>
-void tinygemm_kernel(const scalar_t* __restrict__ A, const scalar_t* __restrict__ B, scalar_t* __restrict__ C,
-    float* __restrict__ Ctmp, int64_t M, int64_t N, int64_t K, int64_t lda, int64_t ldb, int64_t ldc, bool brg) {
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const scalar_t* __restrict__ B,
+    scalar_t* __restrict__ C,
+    float* __restrict__ Ctmp,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg) {
   tinygemm_kernel<scalar_t, false>(A, B, C, Ctmp, nullptr, M, N, K, lda, ldb, ldc, brg);
 }
 
-#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE)                                             \
-    template void tinygemm_kernel<TYPE>(                                                \
-        const TYPE* __restrict__ A, const TYPE* __restrict__ B, TYPE* __restrict__ C,   \
-        float* __restrict__ Ctmp, int64_t M, int64_t N, int64_t K, int64_t lda,         \
-        int64_t ldb, int64_t ldc, bool brg)
+#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE) \
+  template void tinygemm_kernel<TYPE>(      \
+      const TYPE* __restrict__ A,           \
+      const TYPE* __restrict__ B,           \
+      TYPE* __restrict__ C,                 \
+      float* __restrict__ Ctmp,             \
+      int64_t M,                            \
+      int64_t N,                            \
+      int64_t K,                            \
+      int64_t lda,                          \
+      int64_t ldb,                          \
+      int64_t ldc,                          \
+      bool brg)
 
 INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16);
 INSTANTIATE_TINYGEMM_TEMPLATE(at::Half);
@@ -359,14 +631,23 @@ at::Tensor convert_weight_packed(at::Tensor& weight) {
 
   const int64_t ndim = weight.ndimension();
   TORCH_CHECK(ndim == 2 || ndim == 3, "expect weight to be 2d or 3d, got ", ndim, "d tensor.");
+
+  if (ndim == 2 && weight.size(0) < TILE_N) {
+    // for 2D weight and small OC shape, we use fma linear path, which needs transpose not pack
+    return weight.to(at::kFloat).t().contiguous();
+  }
+
   const auto st = weight.scalar_type();
   const int64_t E = ndim == 3 ? weight.size(0) : 1;
   const int64_t OC = ndim == 3 ? weight.size(1) : weight.size(0);
   const int64_t IC = ndim == 3 ? weight.size(2) : weight.size(1);
 
+  // mxfp4 or int4 are packed with uint8
+  const int64_t actual_IC = st == at::kByte ? IC * 2 : IC;
+
   // we handle 2 TILE_N at a time.
   TORCH_CHECK(OC % TILE_N == 0, "invalid weight out features ", OC);
-  TORCH_CHECK(IC % TILE_K == 0, "invalid weight input features ", IC);
+  TORCH_CHECK(actual_IC % TILE_K == 0, "invalid weight input features ", actual_IC);
 
   constexpr int64_t BLOCK_N = block_size_n();
   const int64_t NB = div_up(OC, BLOCK_N);
@@ -375,12 +656,14 @@ at::Tensor convert_weight_packed(at::Tensor& weight) {
   auto packed_weight = at::empty({}, weight.options());
   const int64_t stride = OC * IC;
 
-  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf || st == at::kChar || st == at::kFloat8_e4m3fn,
-      "expect weight to be bfloat16, float16, int8 or fp8_e4m3.");
+  // Note: for `kByte` (uint8), it represents either `mxfp4` or `int4`.
+  TORCH_CHECK(
+      st == at::kBFloat16 || st == at::kHalf || st == at::kChar || st == at::kFloat8_e4m3fn || st == at::kByte,
+      "expect weight to be bfloat16, float16, int8, fp8_e4m3 or uint8(mxfp4 or int4).");
 
   CPU_DISPATCH_PACKED_TYPES(st, [&] {
     // adjust most inner dimension size
-    const int packed_row_size = get_row_size<packed_t>(IC);
+    const int packed_row_size = get_row_size<packed_t>(actual_IC);
     auto sizes = weight.sizes().vec();
     sizes[ndim - 1] = packed_row_size;
     packed_weight.resize_(sizes);
@@ -399,10 +682,7 @@ at::Tensor convert_weight_packed(at::Tensor& weight) {
         int64_t n = nb * BLOCK_N;
         int64_t n_size = std::min(BLOCK_N, OC - n);
         pack_vnni<packed_t>(
-            packed_data + e * OC * packed_row_size + n * packed_row_size,
-            w_data + e * stride + n * IC,
-            n_size,
-            IC);
+            packed_data + e * OC * packed_row_size + n * packed_row_size, w_data + e * stride + n * IC, n_size, IC);
 
         // move to the next index
         data_index_step(e, E, nb, NB);
@@ -412,33 +692,141 @@ at::Tensor convert_weight_packed(at::Tensor& weight) {
   return packed_weight;
 }
 
+at::Tensor convert_scale_packed(at::Tensor& scale) {
+  CHECK_INPUT(scale);
+
+  const int64_t ndim = scale.ndimension();
+  TORCH_CHECK(ndim == 2 || ndim == 3, "expect scale to be 2d or 3d, got ", ndim, "d tensor.");
+  const auto st = scale.scalar_type();
+  const int64_t E = ndim == 3 ? scale.size(0) : 1;
+  const int64_t N = ndim == 3 ? scale.size(1) : scale.size(0);
+  // number of groups, e.g. K/32
+  const int64_t G = ndim == 3 ? scale.size(2) : scale.size(1);
+
+  constexpr int64_t BLOCK_N = block_size_n();
+  TORCH_CHECK(N % BLOCK_N == 0, "invalid weight out features ", N);
+  const int64_t NB = N / BLOCK_N;
+
+  auto packed_scale = at::empty_like(scale);
+  TORCH_CHECK(st == at::kByte, "expect scale to be uint8.");
+
+  const uint8_t* s_data = scale.data_ptr<uint8_t>();
+  uint8_t* packed_data = packed_scale.data_ptr<uint8_t>();
+
+  // parallel on src {E, NB, BLOCK_N, G}, dst {E, NB, G, BLOCK_N}
+  at::parallel_for(0, E * NB * BLOCK_N * G, 0, [&](int64_t begin, int64_t end) {
+    int64_t e{0}, nb{0}, n{0}, g{0};
+    data_index_init(begin, e, E, nb, NB, n, BLOCK_N, g, G);
+
+    for (int64_t i = begin; i < end; ++i) {
+      packed_data[e * N * G + nb * G * BLOCK_N + g * BLOCK_N + n] = s_data[i];
+      // move to the next index
+      data_index_step(e, E, nb, NB, n, BLOCK_N, g, G);
+    }
+  });
+  return packed_scale;
+}
+
 // mat1 : [M, K]
-// mat2 : [N, K]
+// mat2 : [N, K] ([K, N] if use_fma_gemm)
 // bias : [N]
 // out  : [M, N]
 //
-at::Tensor weight_packed_linear(at::Tensor& mat1, at::Tensor& mat2,
-    const std::optional<at::Tensor>& bias, bool is_vnni) {
-  RECORD_FUNCTION(
-    "sgl-kernel::weight_packed_linear", std::vector<c10::IValue>({mat1, mat2, bias}));
-
+at::Tensor
+weight_packed_linear(at::Tensor& mat1, at::Tensor& mat2, const std::optional<at::Tensor>& bias, bool is_vnni) {
   auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
+  bool use_fma_gemm = false;
+  if (packed_w.scalar_type() == at::kFloat) {
+    use_fma_gemm = true;
+  }
+
+  int64_t M = mat1.size(0);
+  int64_t K = mat1.size(1);
+  int64_t N = use_fma_gemm ? mat2.size(1) : mat2.size(0);
 
   CHECK_LAST_DIM_CONTIGUOUS_INPUT(mat1);
   CHECK_INPUT(mat2);
-
-  int64_t M = mat1.size(0);
-  int64_t N = mat2.size(0);
-  int64_t K = mat2.size(1);
-  CHECK_EQ(mat1.size(1), K);
   CHECK_DIM(2, mat1);
   CHECK_DIM(2, mat2);
+  if (!use_fma_gemm) {
+    CHECK_EQ(mat1.size(1), K);
+  }
 
+  auto dispatch_type = mat1.scalar_type();
   auto out = at::empty({M, N}, mat1.options());
-
   // strides
-  int64_t mat1_strideM = mat1.stride(0);
   int64_t out_strideM = out.stride(0);
+  int64_t mat1_strideM = mat1.stride(0);
+
+  const bool has_bias = bias.has_value();
+  const float* bias_data = nullptr;
+  if (has_bias) {
+    CHECK_EQ(bias.value().size(0), N);
+    bias_data = bias.value().data_ptr<float>();
+  }
+
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(dispatch_type, "weight_packed_linear_kernel_impl", [&] {
+    if (use_fma_gemm) {
+      weight_packed_linear_kernel_impl<scalar_t>(
+          out.data_ptr<scalar_t>(),
+          mat1.data_ptr<scalar_t>(),
+          packed_w.data_ptr<float>(),
+          bias_data,
+          nullptr,
+          M,
+          N,
+          K,
+          mat1_strideM,
+          out_strideM);
+    } else {
+      weight_packed_linear_kernel_impl<scalar_t>(
+          out.data_ptr<scalar_t>(),
+          mat1.data_ptr<scalar_t>(),
+          packed_w.data_ptr<scalar_t>(),
+          bias_data,
+          M,
+          N,
+          K,
+          mat1_strideM,
+          out_strideM);
+    }
+  });
+
+  return out;
+}
+
+// mat1         : [M, K]
+// mat2         : [K, 1]
+// post_mul_mat : [M, K]
+// bias         : [N]
+// out          : [M, N]
+//
+at::Tensor fused_linear_sigmoid_mul(
+    at::Tensor& mat1,
+    at::Tensor& mat2,
+    const std::optional<at::Tensor>& bias,
+    bool is_vnni,
+    const at::Tensor& post_mul_mat) {
+  auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
+  TORCH_CHECK(packed_w.scalar_type() == at::kFloat, "fused_linear_sigmoid_mul requires packed float weight")
+
+  int64_t M = mat1.size(0);
+  int64_t K = mat1.size(1);
+  int64_t N = mat2.size(1);
+
+  CHECK_LAST_DIM_CONTIGUOUS_INPUT(mat1);
+  CHECK_INPUT(mat2);
+  CHECK_DIM(2, mat1);
+  CHECK_DIM(2, mat2);
+
+  int64_t out_strideM = post_mul_mat.size(1);
+  int64_t mat1_strideM = mat1.stride(0);
+  auto dispatch_type = mat1.scalar_type();
+  auto out = at::empty({M, out_strideM}, mat1.options());
+
+  TORCH_CHECK(
+      N == 1 && out_strideM % 32 == 0,
+      "post_mul_mat tensor size(1) should be 32 dividable, and the mat2 OC=1 (Mx1 as linear output shape)")
 
   const bool has_bias = bias.has_value();
   const float* bias_data = nullptr;
@@ -447,12 +835,13 @@ at::Tensor weight_packed_linear(at::Tensor& mat1, at::Tensor& mat2,
     bias_data = bias.value().data_ptr<float>();
   }
 
-  AT_DISPATCH_REDUCED_FLOATING_TYPES(mat1.scalar_type(), "weight_packed_linear_kernel_impl", [&] {
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(dispatch_type, "fused_linear_sigmoid_mul", [&] {
     weight_packed_linear_kernel_impl<scalar_t>(
         out.data_ptr<scalar_t>(),
         mat1.data_ptr<scalar_t>(),
-        packed_w.data_ptr<scalar_t>(),
+        packed_w.data_ptr<float>(),
         bias_data,
+        post_mul_mat.data_ptr<scalar_t>(),
         M,
         N,
         K,
diff --git a/csrc/cpu/sgl-kernels/gemm.h b/csrc/cpu/sgl-kernels/gemm.h
index fba5673323f5..f3fb37a5f615 100644
--- a/csrc/cpu/sgl-kernels/gemm.h
+++ b/csrc/cpu/sgl-kernels/gemm.h
@@ -1,8 +1,12 @@
-#pragma once
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
 
+#pragma once
 #include <ATen/native/CPUBlas.h>
 
-// clang-format off
+#include "common.h"
 
 // amx-bf16
 #define TILE_M 16
@@ -10,20 +14,42 @@
 #define TILE_K 32
 
 // block size for AMX gemm
-constexpr int block_size_m() { return 2 * TILE_M; }
-constexpr int block_size_n() { return 2 * TILE_N; }
+constexpr int block_size_m() {
+  return 2 * TILE_M;
+}
+constexpr int block_size_n() {
+  return 2 * TILE_N;
+}
 
 // define threshold using brgemm (intel AMX)
-template <typename T> inline bool can_use_brgemm(int M);
-template <> inline bool can_use_brgemm<at::BFloat16>(int M) { return M > 4; }
-template <> inline bool can_use_brgemm<at::Half>(int M) { return true; }
-// TODO: add u8s8 brgemm, this requires PyTorch 2.7
-template <> inline bool can_use_brgemm<int8_t>(int M) { return false; }
-template <> inline bool can_use_brgemm<at::Float8_e4m3fn>(int M) { return M > 4; }
-template <> inline bool can_use_brgemm<at::quint4x2>(int M) { return M > 4; }
+template <typename T>
+inline bool can_use_brgemm(int M);
+template <>
+inline bool can_use_brgemm<at::BFloat16>(int M) {
+  return M > 4;
+}
+template <>
+inline bool can_use_brgemm<at::Half>(int M) {
+  return true;
+}
+// this requires PyTorch 2.7 or above
+template <>
+inline bool can_use_brgemm<int8_t>(int M) {
+  return M > 4;
+}
+
+template <>
+inline bool can_use_brgemm<uint8_t>(int M) {
+  return M > 4;
+}
+
+template <>
+inline bool can_use_brgemm<at::Float8_e4m3fn>(int M) {
+  return M > 4;
+}
 
 // work around compiler internal error
-#define BLOCK_K 128 // 4 * TILE_K
+#define BLOCK_K 128  // 4 * TILE_K
 
 // adjust leading dimension size for K
 template <typename T>
@@ -36,13 +62,60 @@ inline int64_t get_row_size<int8_t>(int64_t K) {
   return K + sizeof(int32_t);
 }
 
+// uint8: mxfp4 or int4
+template <>
+inline int64_t get_row_size<uint8_t>(int64_t K) {
+  return K >> 1;
+}
+
 inline int64_t get_row_size(int64_t K, bool use_int8_w8a8) {
   return use_int8_w8a8 ? K + sizeof(int32_t) : K;
 }
 
+enum class CPUAcTMethod : int { silu_and_mul = 0, swiglu = 1 };
+
+constexpr bool operator==(CPUAcTMethod a, int b) {
+  return static_cast<int>(a) == b;
+}
+
+constexpr bool operator==(int a, CPUAcTMethod b) {
+  return a == static_cast<int>(b);
+}
+
+enum class CPUQuantMethod : int64_t { BF16 = 0, INT8_W8A8 = 1, FP8_W8A16 = 2, INT4_W4A8 = 3, MXFP4 = 4 };
+
+constexpr bool operator==(CPUQuantMethod a, int64_t b) {
+  return static_cast<int64_t>(a) == b;
+}
+
+constexpr bool operator==(int64_t a, CPUQuantMethod b) {
+  return a == static_cast<int64_t>(b);
+}
+
+enum class CPUQuantAlgo : int64_t { AWQ = 0, GPTQ = 1 };
+
+constexpr bool operator==(CPUQuantAlgo a, int64_t b) {
+  return static_cast<int64_t>(a) == b;
+}
+
+constexpr bool operator==(int64_t a, CPUQuantAlgo b) {
+  return a == static_cast<int64_t>(b);
+}
+
+inline int64_t get_4bit_block_k_size(int64_t group_size) {
+  return group_size > 128 ? 128 : group_size;
+}
+
 // pack weight to vnni format
 at::Tensor convert_weight_packed(at::Tensor& weight);
 
+// pack scale to blocked format for mxfp4
+at::Tensor convert_scale_packed(at::Tensor& scale);
+
+// pack weight to vnni format for int4
+std::tuple<at::Tensor, at::Tensor, at::Tensor>
+convert_weight_packed_scale_zp(at::Tensor qweight, at::Tensor qzeros, at::Tensor scales);
+
 // moe implementations for int8 w8a8
 template <typename scalar_t>
 void fused_experts_int8_kernel_impl(
@@ -69,9 +142,9 @@ void fused_experts_int8_kernel_impl(
     int64_t topk,
     int64_t num_tokens_post_pad);
 
-// moe implementations for fp8 w8a16
-template <typename scalar_t>
-void fused_experts_fp8_kernel_impl(
+// moe implementations for fp8 w8a16 and mxfp4
+template <typename scalar_t, typename packed_t, typename param_t, bool is_mxfp4>
+void fused_experts_fp_kernel_impl(
     scalar_t* __restrict__ output,
     scalar_t* __restrict__ ic0,
     scalar_t* __restrict__ ic1,
@@ -80,10 +153,12 @@ void fused_experts_fp8_kernel_impl(
     scalar_t* __restrict__ B_tmp,
     float* __restrict__ C_tmp,
     const scalar_t* __restrict__ input,
-    const at::Float8_e4m3fn* __restrict__ packed_w1,
-    const at::Float8_e4m3fn* __restrict__ packed_w2,
-    const float* __restrict__ w1s,
-    const float* __restrict__ w2s,
+    const packed_t* __restrict__ packed_w1,
+    const packed_t* __restrict__ packed_w2,
+    const float* __restrict__ w1_bias,
+    const float* __restrict__ w2_bias,
+    const param_t* __restrict__ w1s,
+    const param_t* __restrict__ w2s,
     int64_t block_size_N,
     int64_t block_size_K,
     const float* __restrict__ topk_weights,
@@ -95,36 +170,11 @@ void fused_experts_fp8_kernel_impl(
     int64_t K,
     int64_t E,
     int64_t topk,
-    int64_t num_tokens_post_pad);
-
-// moe implementations for int4 w4a16
-template <typename scalar_t>
-void fused_experts_int4_w4a16_kernel_impl(
-    scalar_t* __restrict__ output,
-    scalar_t* __restrict__ ic0,
-    scalar_t* __restrict__ ic1,
-    scalar_t* __restrict__ ic2,
-    scalar_t* __restrict__ A_tmp,
-    scalar_t* __restrict__ B_tmp,
-    float* __restrict__ C_tmp,
-    const scalar_t* __restrict__ input,
-    const at::quint4x2* __restrict__ packed_w1,
-    const at::quint4x2* __restrict__ packed_w2,
-    const uint8_t* __restrict__ w1z,
-    const uint8_t* __restrict__ w2z,
-    const scalar_t* __restrict__ w1s,
-    const scalar_t* __restrict__ w2s,
-    int group_size,
-    const float* __restrict__ topk_weights,
-    const int32_t* __restrict__ sorted_ids,
-    const int32_t* __restrict__ expert_ids,
-    const int32_t* __restrict__ offsets,
-    int64_t M,
-    int64_t N,
-    int64_t K,
-    int64_t E,
-    int64_t topk,
-    int64_t num_tokens_post_pad);
+    int64_t num_tokens_post_pad,
+    float alpha,
+    float limit,
+    CPUAcTMethod act_func,
+    bool with_bias);
 
 // shared expert implementation for int8 w8a8
 template <typename scalar_t>
@@ -145,6 +195,37 @@ void shared_expert_int8_kernel_impl(
     int64_t N,
     int64_t K);
 
+template <typename scalar_t>
+void fused_experts_int4_w4a8_kernel_impl(
+    scalar_t* __restrict__ output,
+    scalar_t* __restrict__ ic0,
+    scalar_t* __restrict__ ic1,
+    scalar_t* __restrict__ ic2,
+    uint8_t* __restrict__ A_tmp,
+    uint8_t* __restrict__ Aq_tmp,
+    float* __restrict__ As_tmp,
+    int32_t* __restrict__ Azp_tmp,
+    float* __restrict__ C_tmp,
+    int8_t* __restrict__ dqB_tmp,
+    const scalar_t* __restrict__ input,
+    const uint8_t* __restrict__ packed_w1,
+    const uint8_t* __restrict__ packed_w2,
+    const int8_t* __restrict__ w1z,
+    const int8_t* __restrict__ w2z,
+    const float* __restrict__ w1s,
+    const float* __restrict__ w2s,
+    int group_size,
+    const float* __restrict__ topk_weights,
+    const int32_t* __restrict__ sorted_ids,
+    const int32_t* __restrict__ expert_ids,
+    const int32_t* __restrict__ offsets,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t E,
+    int64_t topk,
+    int64_t num_tokens_post_pad);
+
 template <typename scalar_t>
 void shared_expert_fp8_kernel_impl(
     scalar_t* __restrict__ output,
@@ -196,6 +277,7 @@ void tinygemm_kernel(
     int64_t ldc,
     bool brg);
 
+// block quantization
 template <typename scalar_t>
 void tinygemm_kernel(
     const scalar_t* __restrict__ A,
@@ -203,6 +285,7 @@ void tinygemm_kernel(
     scalar_t* __restrict__ C,
     scalar_t* __restrict__ Btmp,
     float* __restrict__ Ctmp,
+    const float* __restrict__ Bbias,
     const float* __restrict__ scale,
     int64_t M,
     int64_t N,
@@ -211,56 +294,81 @@ void tinygemm_kernel(
     int64_t ldb,
     int64_t ldc,
     bool brg,
-    int64_t block_size_K);
+    int64_t block_size_K,
+    bool do_unpack = true);
 
+// per tensor quantization
 template <typename scalar_t>
 void tinygemm_kernel(
     const scalar_t* __restrict__ A,
-    const at::quint4x2* __restrict__ B,
+    const at::Float8_e4m3fn* __restrict__ B,
     scalar_t* __restrict__ C,
-    const uint8_t* __restrict__ Bz,
-    const scalar_t* __restrict__ Bs,
     scalar_t* __restrict__ Btmp,
     float* __restrict__ Ctmp,
+    float scale,
     int64_t M,
     int64_t N,
     int64_t K,
-    int group_size,
     int64_t lda,
     int64_t ldb,
     int64_t ldc,
-    int64_t strideBz,
-    int64_t strideBs,
     bool brg);
 
-// TODO: debug print, remove me later
-inline void print_16x32i(const __m512i x) {
-  int32_t a[16];
-  _mm512_storeu_si512((__m512i *)a, x);
-
-  for (int i = 0; i < 16; i++){
-    std::cout << a[i] << " ";
-  }
-  std::cout << std::endl;
-}
-
-inline void print_16x32(const __m512 x) {
-  float a[16];
-  _mm512_storeu_ps((__m512 *)a, x);
-
-  for (int i = 0; i < 16; i++){
-    std::cout << a[i] << " ";
-  }
-  std::cout << std::endl;
-}
-
+// mxfp4
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const uint8_t* __restrict__ B,
+    scalar_t* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    float* __restrict__ Ctmp,
+    const float* __restrict__ Bbias,
+    const uint8_t* __restrict__ scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg,
+    int64_t block_size_K,
+    bool do_unpack = true);
 
-inline void print_32x8u(const __m256i x) {
-  uint8_t a[32];
-  _mm256_storeu_si256((__m256i *)a, x);
+template <typename scalar_t>
+void tinygemm_kernel(
+    scalar_t* C,
+    float* C_temp,
+    const uint8_t* A,
+    const float* scales_a,
+    const int32_t* qzeros_a,
+    const uint8_t* B,
+    const float* scales_b,
+    const int8_t* qzeros_b,
+    const int32_t* compensation,
+    int8_t* dqB_tmp,
+    int64_t M,
+    int64_t K,
+    int64_t lda,
+    int64_t ldc_f,
+    int64_t ldc_s,
+    bool store_out,
+    bool use_brgemm);
 
-  for (int i = 0; i < 32; ++i) {
-    std::cout << int32_t(a[i]) << " ";
-  }
-  std::cout << std::endl;
-}
+// mxfp4
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const uint8_t* __restrict__ B,
+    scalar_t* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    float* __restrict__ Ctmp,
+    const uint8_t* __restrict__ scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg,
+    int64_t block_size_K,
+    bool do_unpack = true);
diff --git a/csrc/cpu/sgl-kernels/gemm_fp8.cpp b/csrc/cpu/sgl-kernels/gemm_fp8.cpp
index ef29181cee56..b47eb9256e03 100644
--- a/csrc/cpu/sgl-kernels/gemm_fp8.cpp
+++ b/csrc/cpu/sgl-kernels/gemm_fp8.cpp
@@ -1,14 +1,11 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
-#include "common.h"
-#include "vec.h"
-#include "gemm.h"
-
 // clang-format off
 
-// we use 4x32 for BLOCK_M
-#define BLOCK_SIZE_M_SCALE 4
+#include "common.h"
+#include "gemm.h"
+#include "vec.h"
 
 namespace {
 
@@ -19,7 +16,7 @@ inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ inpu
   constexpr int kVecSize = bVec::size();
 
   int64_t d;
-  #pragma GCC unroll 4
+#pragma GCC unroll 4
   for (d = 0; d <= size - kVecSize; d += kVecSize) {
     fVec data0 = fVec::loadu(input + d);
     fVec data1 = fVec::loadu(input + d + fVec::size());
@@ -32,13 +29,14 @@ inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ inpu
 }
 
 template <typename scalar_t>
-inline void copy_add_stub(scalar_t* __restrict__ out, const float* __restrict__ input, const float* __restrict__ bias, int64_t size) {
+inline void copy_add_stub(
+    scalar_t* __restrict__ out, const float* __restrict__ input, const float* __restrict__ bias, int64_t size) {
   using bVec = at::vec::Vectorized<scalar_t>;
   using fVec = at::vec::Vectorized<float>;
   constexpr int kVecSize = bVec::size();
 
   int64_t d;
-  #pragma GCC unroll 4
+#pragma GCC unroll 4
   for (d = 0; d <= size - kVecSize; d += kVecSize) {
     fVec data0 = fVec::loadu(input + d) + fVec::loadu(bias + d);
     fVec data1 = fVec::loadu(input + d + fVec::size()) + fVec::loadu(bias + d + fVec::size());
@@ -49,21 +47,58 @@ inline void copy_add_stub(scalar_t* __restrict__ out, const float* __restrict__
     out[d] = static_cast<scalar_t>(input[d] + bias[d]);
   }
 }
+template <typename scalar_t>
+inline void copy_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input, int size, float scale) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  const fVec vscale = fVec(scale);
+
+  int d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    fVec data0 = fVec::loadu(input + d) * vscale;
+    fVec data1 = fVec::loadu(input + d + fVec::size()) * vscale;
+    bVec out_vec = convert_from_float_ext<scalar_t>(data0, data1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d] * scale);
+  }
+}
+
+template <>
+inline void
+copy_add_stub(float* __restrict__ out, const float* __restrict__ input, const float* __restrict__ bias, int64_t size) {
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = fVec::size();
+
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    fVec data = fVec::loadu(input + d) + fVec::loadu(bias + d);
+    data.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = input[d] + bias[d];
+  }
+}
 
 inline void unpack_B(
     at::BFloat16* __restrict__ Btmp,
     const at::Float8_e4m3fn* __restrict__ packed_B,
-    int N,
-    int K,
-    int ldb,
-    int ldb_tmp,
+    int64_t N,
+    int64_t K,
+    int64_t ldb,
+    int64_t ldb_tmp,
     float scale) {
 #if defined(CPU_CAPABILITY_AVX512)
   // [K/2, N, 2]
-  const int K2 = K >> 1;
-  const int ldb2 = ldb; // ldb * 2 >> 1;
+  const int64_t K2 = K >> 1;
+  const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
   const uint16_t* b_ptr = reinterpret_cast<const uint16_t*>(packed_B);
-  const __m512 vd = _mm512_set1_ps(scale);
+  const __m512 vexp = _mm512_castsi512_ps(_mm512_set1_epi32(kFP8_BIAS));
+  const __m512 vd = _mm512_mul_ps(_mm512_set1_ps(scale), vexp);
 
   constexpr int BLOCK_N = block_size_n();
   static_assert(BLOCK_N == 32);
@@ -72,7 +107,7 @@ inline void unpack_B(
   constexpr int PREFETCH_SIZE_K = 64;
 
 #pragma GCC unroll 4
-  for (int k = 0; k < K2; ++k) {
+  for (int64_t k = 0; k < K2; ++k) {
     __m512i b8 = _mm512_loadu_si512(b_ptr + k * ldb2);
     if constexpr (PREFETCH_SIZE_K > 0) {
       _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2, _MM_HINT_T0);
@@ -81,8 +116,8 @@ inline void unpack_B(
     __m256i b8_0 = _mm512_extracti32x8_epi32(b8, 0);
     __m256i b8_1 = _mm512_extracti32x8_epi32(b8, 1);
 
-    __m512bh bf16_0 = CVT_FP8_TO_BF16(b8_0);
-    __m512bh bf16_1 = CVT_FP8_TO_BF16(b8_1);
+    __m512bh bf16_0 = CVT_FP8_TO_BF16_EXT(b8_0);
+    __m512bh bf16_1 = CVT_FP8_TO_BF16_EXT(b8_1);
 
     // Apply scale
     __m512 f0_lo = CVT_BF16_TO_FP32(_mm512_extracti32x8_epi32((__m512i)bf16_0, 0));
@@ -106,26 +141,139 @@ inline void unpack_B(
 #endif
 }
 
-template <typename scalar_t, typename packed_t, bool has_bias, int BLOCK_M, int BLOCK_N>
+inline void unpack_B(
+    at::BFloat16* __restrict__ Btmp,
+    const at::Float8_e4m3fn* __restrict__ packed_B,
+    int N,
+    int K,
+    int ldb,
+    int ldb_tmp) {
+#if defined(CPU_CAPABILITY_AVX512)
+  // [K/2, N, 2]
+  const int K2 = K >> 1;
+  const int ldb2 = ldb;  // ldb * 2 >> 1;
+  const uint16_t* b_ptr = reinterpret_cast<const uint16_t*>(packed_B);
+
+  // prefetch distance
+  constexpr int PREFETCH_SIZE_K = 64;
+#pragma GCC unroll 4
+  for (int k = 0; k < K2; ++k) {
+    __m512i b8 = _mm512_loadu_si512(b_ptr + k * ldb2);
+    if constexpr (PREFETCH_SIZE_K > 0) {
+      _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2, _MM_HINT_T0);
+    }
+
+    __m256i b8_0 = _mm512_extracti32x8_epi32(b8, 0);
+    __m256i b8_1 = _mm512_extracti32x8_epi32(b8, 1);
+
+    __m512bh bf16_0 = CVT_FP8_TO_BF16(b8_0);
+    __m512bh bf16_1 = CVT_FP8_TO_BF16(b8_1);
+    _mm512_storeu_si512(Btmp + k * ldb_tmp * 2 + 0, (__m512i)bf16_0);
+    _mm512_storeu_si512(Btmp + k * ldb_tmp * 2 + 32, (__m512i)bf16_1);
+  }
+#else
+  TORCH_CHECK(false, "unpack_B: scalar path not implemented!");
+#endif
+}
+
+// mxfp4
+inline void unpack_B(
+    at::BFloat16* __restrict__ Btmp,
+    const uint8_t* __restrict__ packed_B,
+    int64_t N,
+    int64_t K,
+    int64_t ldb,
+    int64_t ldb_tmp,
+    const uint8_t* __restrict__ scale) {
+#if defined(CPU_CAPABILITY_AVX512)
+  // [K/2, N, 2]
+  const int64_t K2 = K >> 1;
+  const int64_t ldb2 = ldb;                                           // ldb * 2 >> 1;
+  const uint8_t* b_ptr = reinterpret_cast<const uint8_t*>(packed_B);  // 2 * 4 bit = 8 bit
+
+  constexpr int BLOCK_N = block_size_n();
+  static_assert(BLOCK_N == 32);
+
+  // prefetch distance
+  constexpr int PREFETCH_SIZE_K = 64;
+
+  // exponent bias 127
+  const __m512i off = _mm512_set1_epi16(0x7F);
+
+  // load 32 bytes only once for each block
+  __m256i s8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(scale));
+  __m512i s16 = _mm512_slli_epi16(_mm512_sub_epi16(_mm512_cvtepu8_epi16(s8), off), 0x7);
+
+  // holds Nx2(64) scales, interleaved as 2 belongs to K dimension
+  // e.g. vs0: { s0,  s0,  s1,  s1, ..., s15, s15}
+  //      vs1: {s16, s16, s17, s17, ..., s31, s31}
+  auto [vscale0, vscale1] = transpose_2x32_16bit(s16, s16);
+
+#pragma GCC unroll 4
+  for (int64_t k = 0; k < K2; ++k) {
+    __m256i b4 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(b_ptr + k * ldb2));
+    if constexpr (PREFETCH_SIZE_K > 0) {
+      _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2, _MM_HINT_T0);
+    }
+    auto [vb0, vb1] = CVT_MXFP4_TO_BF16(b4, vscale0, vscale1);
+
+    _mm512_storeu_si512(Btmp + k * ldb_tmp * 2 + 0, (__m512i)vb0);
+    _mm512_storeu_si512(Btmp + k * ldb_tmp * 2 + 32, (__m512i)vb1);
+  }
+#else
+  TORCH_CHECK(false, "unpack_B: scalar path not implemented!");
+#endif
+}
+
+template <typename scalar_t, typename packed_t, typename param_t, bool has_bias, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn {
   static inline void apply(
-      const scalar_t* __restrict__ A, const packed_t* __restrict__ B, scalar_t* __restrict__ C,
-      const float* __restrict__ bias, const float* __restrict__ scale, int K, int lda, int ldb, int ldc, int64_t block_size_K) {
+      const scalar_t* __restrict__ A,
+      const packed_t* __restrict__ B,
+      scalar_t* __restrict__ C,
+      const float* __restrict__ bias,
+      const param_t* __restrict__ scale,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc,
+      int64_t block_size_K) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
 
+template <typename scalar_t, int BLOCK_M, int BLOCK_N>
+struct tinygemm_kernel_nn2 {
+  static inline void apply(
+      const scalar_t* __restrict__ A,
+      const at::Float8_e4m3fn* __restrict__ B,
+      scalar_t* __restrict__ C,
+      float scale,
+      int K,
+      int lda,
+      int ldb,
+      int ldc) {
+    TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
+  }
+};
 #if defined(CPU_CAPABILITY_AVX512)
 template <bool has_bias, int BLOCK_M, int BLOCK_N>
-struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, has_bias, BLOCK_M, BLOCK_N> {
+struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, float, has_bias, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const at::BFloat16* __restrict__ A, const at::Float8_e4m3fn* __restrict__ B, at::BFloat16* __restrict__ C,
-      const float* __restrict__ bias, const float* __restrict__ scale, int K, int lda, int ldb, int ldc, int64_t block_size_K) {
-
+      const at::BFloat16* __restrict__ A,
+      const at::Float8_e4m3fn* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      const float* __restrict__ bias,
+      const float* __restrict__ scale,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc,
+      int64_t block_size_K) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
 
-    const int KB = div_up(K, BLOCK_K);
+    const int64_t KB = div_up(K, (int64_t)BLOCK_K);
 
     // prefetch distance
     constexpr int PREFETCH_SIZE_K = 64;
@@ -139,6 +287,8 @@ struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, has_bias, BLOCK_M, BL
     // block quant scale
     __m512 vscale;
 
+    const __m512 vexp = _mm512_castsi512_ps(_mm512_set1_epi32(kFP8_BIAS));
+
     auto loadc = [&](auto i) {
       constexpr int col = i % COLS;
       if constexpr (has_bias) {
@@ -149,8 +299,8 @@ struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, has_bias, BLOCK_M, BL
     };
     Unroll<ROWS * COLS>{}(loadc);
 
-    const int lda2 = lda >> 1;
-    const int ldb2 = ldb; // ldb * 2 >> 1;
+    const int64_t lda2 = lda >> 1;
+    const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
     const float* a_ptr = reinterpret_cast<const float*>(A);
     const uint16_t* b_ptr = reinterpret_cast<const uint16_t*>(B);
 
@@ -170,34 +320,31 @@ struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, has_bias, BLOCK_M, BL
           if constexpr (PREFETCH_SIZE_K > 0) {
             _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2 + col * 16, _MM_HINT_T0);
           }
-          vb[col + 0] = CVT_FP8_TO_BF16(_mm512_extracti32x8_epi32(b8, 0));
-          vb[col + 1] = CVT_FP8_TO_BF16(_mm512_extracti32x8_epi32(b8, 1));
+          vb[col + 0] = CVT_FP8_TO_BF16_EXT(_mm512_extracti32x8_epi32(b8, 0));
+          vb[col + 1] = CVT_FP8_TO_BF16_EXT(_mm512_extracti32x8_epi32(b8, 1));
         }
       }
       vsum[i] = _mm512_dpbf16_ps(vsum[i], va, vb[col]);
     };
 
-    constexpr int BLOCK_K2 = BLOCK_K >> 1;
-    for (int kb = 0; kb < KB; ++kb) {
-      int kb_start = kb * BLOCK_K2;
-      int kb_end = std::min(K, kb_start + BLOCK_K2);
+    constexpr int64_t BLOCK_K2 = BLOCK_K >> 1;
+    for (int64_t kb = 0; kb < KB; ++kb) {
+      int64_t kb_start = kb * BLOCK_K2;
+      int64_t kb_end = std::min(K >> 1, kb_start + BLOCK_K2);
       // 1. load scale vector
       vscale = _mm512_set1_ps(scale[kb]);
+      vscale = _mm512_mul_ps(vscale, vexp);
       if constexpr (PREFETCH_SIZE_KB > 0) {
         _mm_prefetch(scale + kb + PREFETCH_SIZE_KB, _MM_HINT_T0);
       }
       // 2. zero vsum for each block
-      Unroll<ROWS * COLS>{}([&](auto i) {
-        vsum[i] = _mm512_setzero_ps();
-      });
+      Unroll<ROWS * COLS>{}([&](auto i) { vsum[i] = _mm512_setzero_ps(); });
       // 3. accumulate across each block
       for (int k = kb_start; k < kb_end; ++k) {
         Unroll<ROWS * COLS>{}(compute, k);
       }
       // 4. apply scale
-      Unroll<ROWS * COLS>{}([&](auto i) {
-        vc[i] = _mm512_fmadd_ps(vsum[i], vscale, vc[i]);
-      });
+      Unroll<ROWS * COLS>{}([&](auto i) { vc[i] = _mm512_fmadd_ps(vsum[i], vscale, vc[i]); });
     }
 
     auto storec = [&](auto i) {
@@ -213,14 +360,195 @@ struct tinygemm_kernel_nn<at::BFloat16, at::Float8_e4m3fn, has_bias, BLOCK_M, BL
     Unroll<ROWS * COLS>{}(storec);
   }
 };
-#endif
+template <int BLOCK_M, int BLOCK_N>
+struct tinygemm_kernel_nn2<at::BFloat16, BLOCK_M, BLOCK_N> {
+  static inline void apply(
+      const at::BFloat16* __restrict__ A,
+      const at::Float8_e4m3fn* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      float scale,
+      int K,
+      int lda,
+      int ldb,
+      int ldc) {
+    constexpr int ROWS = BLOCK_M;
+    constexpr int COLS = BLOCK_N / 16;
+
+    // prefetch distance
+    constexpr int PREFETCH_SIZE_K = 64;
+
+    __m512bh va;
+    __m512bh vb[COLS];
+    __m512 vc[ROWS * COLS];
+
+    const __m512 vscale = _mm512_set1_ps(scale);
+
+    auto loadc = [&](auto i) { vc[i] = _mm512_setzero_ps(); };
+    Unroll<ROWS * COLS>{}(loadc);
+
+    const int K2 = K >> 1;
+    const int lda2 = lda >> 1;
+    const int ldb2 = ldb;  // ldb * 2 >> 1;
+    const float* a_ptr = reinterpret_cast<const float*>(A);
+    const uint16_t* b_ptr = reinterpret_cast<const uint16_t*>(B);
+
+    auto compute = [&](auto i, int k) {
+      constexpr int row = i / COLS;
+      constexpr int col = i % COLS;
+
+      if constexpr (col == 0) {
+        va = (__m512bh)(_mm512_set1_ps(a_ptr[row * lda2 + k]));
+      }
+      if constexpr (row == 0) {
+        if constexpr (col % 2 == 0) {
+          __m512i b8 = _mm512_loadu_si512(b_ptr + k * ldb2 + col * 16);
+          if constexpr (PREFETCH_SIZE_K > 0) {
+            _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2 + col * 16, _MM_HINT_T0);
+          }
+          vb[col + 0] = CVT_FP8_TO_BF16(_mm512_extracti32x8_epi32(b8, 0));
+          vb[col + 1] = CVT_FP8_TO_BF16(_mm512_extracti32x8_epi32(b8, 1));
+        }
+      }
+      vc[i] = _mm512_dpbf16_ps(vc[i], va, vb[col]);
+    };
+    for (int k = 0; k < K2; ++k) {
+      Unroll<ROWS * COLS>{}(compute, k);
+    }
+
+    auto storec = [&](auto i) {
+      constexpr int row = i / COLS;
+      constexpr int col = i % COLS;
+      // for COLS = 2, 4 use 512bit store
+      if constexpr (col % 2 == 0) {
+        __m512 vc0 = _mm512_mul_ps(vc[row * COLS + col + 0], vscale);
+        __m512 vc1 = _mm512_mul_ps(vc[row * COLS + col + 1], vscale);
+        _mm512_storeu_si512(
+            reinterpret_cast<__m512i*>((C + row * ldc + col * 16)), (__m512i)(_mm512_cvtne2ps_pbh(vc1, vc0)));
+      }
+    };
+    Unroll<ROWS * COLS>{}(storec);
+  }
+};
+
+template <bool has_bias, int BLOCK_M, int BLOCK_N>
+struct tinygemm_kernel_nn<at::BFloat16, uint8_t, uint8_t, has_bias, BLOCK_M, BLOCK_N> {
+  static inline void apply(
+      const at::BFloat16* __restrict__ A,
+      const uint8_t* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      const float* __restrict__ bias,
+      const uint8_t* __restrict__ scale,
+      int K,
+      int lda,
+      int ldb,
+      int ldc,
+      int64_t block_size_K) {
+    // mxfp4 supports only group size of 32
+    // expect weight packed in 32-way, vnni2 format Nx2(64)
+    assert(block_size_K == 32);
+    assert(BLOCK_N == 32);
+
+    constexpr int ROWS = BLOCK_M;
+    constexpr int COLS = BLOCK_N / 16;
+
+    // prefetch distance
+    constexpr int PREFETCH_SIZE_K = 64;
+    constexpr int PREFETCH_SIZE_KB = 1;
+
+    __m512bh va;
+    __m512bh vb[COLS];
+    __m512 vc[ROWS * COLS];
+
+    // holds Nx2(64) scales, interleaved as 2 belongs to K dimension
+    // e.g. vs0: { s0,  s0,  s1,  s1, ..., s15, s15}
+    //      vs1: {s16, s16, s17, s17, ..., s31, s31}
+    __m512i vscale[COLS];
+
+    // exponent bias 127
+    const __m512i off = _mm512_set1_epi16(0x7F);
+
+    auto loadc = [&](auto i) {
+      constexpr int col = i % COLS;
+      if constexpr (has_bias) {
+        vc[i] = _mm512_loadu_ps(bias + col * 16);
+      } else {
+        vc[i] = _mm512_setzero_ps();
+      }
+    };
+    Unroll<ROWS * COLS>{}(loadc);
+
+    const int64_t K2 = K >> 1;
+    const int64_t lda2 = lda >> 1;
+    const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
+    const float* a_ptr = reinterpret_cast<const float*>(A);
+    const uint8_t* b_ptr = reinterpret_cast<const uint8_t*>(B);
+
+    auto compute = [&](auto i, int k) {
+      constexpr int row = i / COLS;
+      constexpr int col = i % COLS;
+
+      if constexpr (col == 0) {
+        va = (__m512bh)(_mm512_set1_ps(a_ptr[row * lda2 + k]));
+        if constexpr (PREFETCH_SIZE_K > 0) {
+          _mm_prefetch(a_ptr + row * lda2 + k + PREFETCH_SIZE_K, _MM_HINT_T0);
+        }
+      }
+      if constexpr (row == 0) {
+        // load 32 * 2 (64) int4 at a time
+        if constexpr (col % 2 == 0) {
+          __m256i b4 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(b_ptr + k * ldb2 + col * 16));
+          if constexpr (PREFETCH_SIZE_K > 0) {
+            _mm_prefetch(b_ptr + (k + PREFETCH_SIZE_K) * ldb2 + col * 16, _MM_HINT_T0);
+          }
+          std::tie(vb[col + 0], vb[col + 1]) = CVT_MXFP4_TO_BF16(b4, vscale[col + 0], vscale[col + 1]);
+        }
+      }
+      vc[i] = _mm512_dpbf16_ps(vc[i], va, vb[col]);
+    };
+
+    for (int64_t k = 0; k < K2; ++k) {
+      // update scales every 16x2 K
+      if ((k & 15) == 0) {
+        __m256i s8 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(scale + (k >> 4) * 32));
+        __m512i s16 = _mm512_slli_epi16(_mm512_sub_epi16(_mm512_cvtepu8_epi16(s8), off), 0x7);
+        std::tie(vscale[0], vscale[1]) = transpose_2x32_16bit(s16, s16);
+      }
+      Unroll<ROWS * COLS>{}(compute, k);
+    }
 
-#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                          \
-    tinygemm_kernel_nn<scalar_t, at::Float8_e4m3fn, has_bias, MB_SIZE, NB_SIZE>::apply(         \
-        A + mb_start * lda, B + nb_start * 2, C + mb_start * ldc + nb_start, \
-        has_bias ? bias + nb_start : nullptr, scale, K, lda, ldb, ldc, block_size_K);
+    auto storec = [&](auto i) {
+      constexpr int row = i / COLS;
+      constexpr int col = i % COLS;
+      // for COLS = 2,4 use 512bit store
+      if constexpr (col % 2 == 0) {
+        _mm512_storeu_si512(
+            reinterpret_cast<__m512i*>((C + row * ldc + col * 16)),
+            (__m512i)(_mm512_cvtne2ps_pbh(vc[row * COLS + col + 1], vc[row * COLS + col])));
+      }
+    };
+    Unroll<ROWS * COLS>{}(storec);
+  }
+};
+#endif
 
-template <typename scalar_t, typename packed_t, bool has_bias>
+#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                                   \
+  tinygemm_kernel_nn<scalar_t, packed_t, param_t, has_bias, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda,                                                             \
+      B + nb_start * 2,                                                               \
+      C + mb_start * ldc + nb_start,                                                  \
+      has_bias ? bias + nb_start : nullptr,                                           \
+      scale,                                                                          \
+      K,                                                                              \
+      lda,                                                                            \
+      ldb,                                                                            \
+      ldc,                                                                            \
+      block_size_K);
+
+#define LAUNCH_TINYGEMM_KERNEL_NN2(MB_SIZE, NB_SIZE)      \
+  tinygemm_kernel_nn2<scalar_t, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda, B + nb_start * 2, C + mb_start * ldc + nb_start, scale, K, lda, ldb, ldc);
+
+template <typename scalar_t, typename packed_t, typename param_t, bool has_bias>
 struct brgemm {
   static inline void apply(
       const scalar_t* __restrict__ A,
@@ -229,19 +557,22 @@ struct brgemm {
       scalar_t* __restrict__ Btmp,
       float* __restrict__ Ctmp,
       const float* __restrict__ bias,
-      const float* __restrict__ scale,
+      const param_t* __restrict__ scale,
       int M,
       int N,
       int K,
       int lda,
       int ldb,
-      int ldc) {
+      int ldc,
+      bool do_unpack = true) {
     TORCH_CHECK(false, "struct brgemm: primary template not implemented!");
   }
 };
+template <typename scalar_t>
+struct brgemm2 {};
 
 template <bool has_bias>
-struct brgemm<at::BFloat16, at::Float8_e4m3fn, has_bias> {
+struct brgemm<at::BFloat16, at::Float8_e4m3fn, float, has_bias> {
   static inline void apply(
       const at::BFloat16* __restrict__ A,
       const at::Float8_e4m3fn* __restrict__ B,
@@ -255,22 +586,101 @@ struct brgemm<at::BFloat16, at::Float8_e4m3fn, has_bias> {
       int K,
       int lda,
       int ldb,
-      int ldc) {
-
+      int ldc,
+      bool do_unpack = true) {
     constexpr int BLOCK_N = block_size_n();
 
     // [K, BLOCK_N] -> [K / 2, BLOCK_N * 2]
     const int ldb_tmp = BLOCK_N;
 
+    if (do_unpack) {
+      for (int k = 0; k < K; k += BLOCK_K) {
+        int kb_size = std::min(BLOCK_K, K - k);
+
+        int idx = k >> 7;  // k / BLOCK_K where BLOCK_K = 128
+        unpack_B(Btmp + k * ldb_tmp, B + k * ldb, N, kb_size, ldb, ldb_tmp, scale[idx]);
+      }
+    }
+
+    at::native::cpublas::brgemm(M, N, K, lda, ldb_tmp, BLOCK_N, /* add_C */ false, A, Btmp, Ctmp);
+
+    // copy from Ctmp to C
+    for (int m = 0; m < M; ++m) {
+      if constexpr (has_bias) {
+        copy_add_stub(C + m * ldc, Ctmp + m * BLOCK_N, bias, N);
+      } else {
+        copy_stub(C + m * ldc, Ctmp + m * BLOCK_N, N);
+      }
+    }
+  }
+};
+
+template <>
+struct brgemm2<at::BFloat16> {
+  static inline void apply(
+      const at::BFloat16* __restrict__ A,
+      const at::Float8_e4m3fn* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      at::BFloat16* __restrict__ Btmp,
+      float* __restrict__ Ctmp,
+      float scale,
+      int M,
+      int N,
+      int K,
+      int lda,
+      int ldb,
+      int ldc) {
+    constexpr int BLOCK_N = block_size_n();
+
+    // [BLOCK_K, BLOCK_N] -> [BLOCK_K / 2, BLOCK_N * 2]
+    const int ldb_tmp = block_size_n();
+
+    // accumulate across K per BLOCK_K
     for (int k = 0; k < K; k += BLOCK_K) {
       int kb_size = std::min(BLOCK_K, K - k);
+      unpack_B(Btmp, B + k * ldb, N, kb_size, ldb, ldb_tmp);
 
-      int idx = k >> 7; // k / BLOCK_K where BLOCK_K = 128
-      unpack_B(Btmp + k * ldb_tmp, B + k * ldb, N, kb_size, ldb, ldb_tmp, scale[idx]);
+      const bool add_C = (k != 0);
+      at::native::cpublas::brgemm(M, N, kb_size, lda, ldb_tmp, BLOCK_N, add_C, A + k, Btmp, Ctmp);
     }
 
-    at::native::cpublas::brgemm(
-        M, N, K, lda, ldb_tmp, BLOCK_N, /* add_C */ false, A, Btmp, Ctmp);
+    // copy from Ctmp to C and mul scale
+    for (int m = 0; m < M; ++m) {
+      copy_mul_stub(C + m * ldc, Ctmp + m * BLOCK_N, N, scale);
+    }
+  }
+};
+
+template <bool has_bias>
+struct brgemm<at::BFloat16, uint8_t, uint8_t, has_bias> {
+  static inline void apply(
+      const at::BFloat16* __restrict__ A,
+      const uint8_t* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      at::BFloat16* __restrict__ Btmp,
+      float* __restrict__ Ctmp,
+      const float* __restrict__ bias,
+      const uint8_t* __restrict__ scale,
+      int M,
+      int N,
+      int K,
+      int lda,
+      int ldb,
+      int ldc,
+      bool do_unpack = true) {
+    constexpr int BLOCK_N = block_size_n();
+
+    // [K, BLOCK_N] -> [K / 2, BLOCK_N * 2]
+    const int ldb_tmp = BLOCK_N;
+
+    if (do_unpack) {
+      // group size 32 for mxfp4
+      for (int k = 0; k < K; k += 32) {
+        unpack_B(Btmp + k * ldb_tmp, B + k * (ldb >> 1), N, 32, ldb, ldb_tmp, scale + (k >> 5) * BLOCK_N);
+      }
+    }
+
+    at::native::cpublas::brgemm(M, N, K, lda, ldb_tmp, BLOCK_N, /* add_C */ false, A, Btmp, Ctmp);
 
     // copy from Ctmp to C
     for (int m = 0; m < M; ++m) {
@@ -283,14 +693,14 @@ struct brgemm<at::BFloat16, at::Float8_e4m3fn, has_bias> {
   }
 };
 
-template <typename scalar_t, bool has_bias>
+template <typename scalar_t, typename packed_t, typename param_t, bool has_bias>
 void tinygemm_kernel(
     const scalar_t* __restrict__ A,
-    const at::Float8_e4m3fn* __restrict__ B,
+    const packed_t* __restrict__ B,
     scalar_t* __restrict__ C,
     scalar_t* __restrict__ Btmp,
     float* __restrict__ Ctmp,
-    const float* __restrict__ scale,
+    const param_t* __restrict__ scale,
     const float* __restrict__ bias,
     int64_t M,
     int64_t N,
@@ -299,11 +709,11 @@ void tinygemm_kernel(
     int64_t ldb,
     int64_t ldc,
     bool brg,
-    int64_t block_size_K) {
-
+    int64_t block_size_K,
+    bool do_unpack = true) {
   if (brg) {
-    brgemm<scalar_t, at::Float8_e4m3fn, has_bias>::apply(
-        A, B, C, Btmp, Ctmp, bias, scale, M, N, K, lda, ldb, ldc);
+    brgemm<scalar_t, packed_t, param_t, has_bias>::apply(
+        A, B, C, Btmp, Ctmp, bias, scale, M, N, K, lda, ldb, ldc, do_unpack);
     return;
   }
 
@@ -319,23 +729,136 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_NN(1, 32); break;
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_NN(2, 32); break;
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_NN(3, 32); break;
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_NN(4, 32); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+      switch (mb_size << 4 | nb_size >> 4) {
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 32);
+          break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 32);
+          break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 32);
+          break;
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 32);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
 }
 
 template <typename scalar_t>
-void fp8_scaled_mm_kernel_impl(
+void tinygemm_kernel2(
+    const scalar_t* __restrict__ A,
+    const at::Float8_e4m3fn* __restrict__ B,
+    scalar_t* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    float* __restrict__ Ctmp,
+    float scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg) {
+  if (brg) {
+    brgemm2<scalar_t>::apply(A, B, C, Btmp, Ctmp, scale, M, N, K, lda, ldb, ldc);
+    return;
+  }
+
+  // pattern: 1-8-8
+  if (M == 1) {
+    constexpr int64_t BLOCK_N = 128;
+    const int64_t NB = div_up(N, BLOCK_N);
+    int64_t mb_start = 0;
+
+    for (int64_t nb = 0; nb < NB; ++nb) {
+      int64_t nb_start = nb * BLOCK_N;
+      int64_t nb_size = std::min(BLOCK_N, N - nb_start);
+
+      switch (nb_size >> 4) {
+        case 2:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 32);
+          break;
+        case 4:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 64);
+          break;
+        case 6:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 96);
+          break;
+        case 8:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 128);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, 1x", "nb_size");
+      }
+    }
+    return;
+  }
+
+  // pattern: 1-4-16
+  constexpr int64_t BLOCK_M = 4;
+  constexpr int64_t BLOCK_N = 64;
+  const int64_t MB = div_up(M, BLOCK_M);
+  const int64_t NB = div_up(N, BLOCK_N);
+  for (int64_t mb = 0; mb < MB; ++mb) {
+    int64_t mb_start = mb * BLOCK_M;
+    int64_t mb_size = std::min(BLOCK_M, M - mb_start);
+    for (int64_t nb = 0; nb < NB; ++nb) {
+      int64_t nb_start = nb * BLOCK_N;
+      int64_t nb_size = std::min(BLOCK_N, N - nb_start);
+
+      switch (mb_size << 4 | nb_size >> 4) {
+        // mb_size = 1
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 32);
+          break;
+        case 0x14:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 64);
+          break;
+        // mb_size = 2
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN2(2, 32);
+          break;
+        case 0x24:
+          LAUNCH_TINYGEMM_KERNEL_NN2(2, 64);
+          break;
+        // mb_size = 3
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN2(3, 32);
+          break;
+        case 0x34:
+          LAUNCH_TINYGEMM_KERNEL_NN2(3, 64);
+          break;
+        // mb_size = 4
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN2(4, 32);
+          break;
+        case 0x44:
+          LAUNCH_TINYGEMM_KERNEL_NN2(4, 64);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
+      }
+    }
+  }
+}
+
+// NB: fp8/fp4 scaled mm kernel implementation
+//
+//        scalar_t     packed_t     param_t
+//   FP8    BF16         FP8         FP32
+//  MXFP4   BF16          U8           U8
+//
+template <typename scalar_t, typename packed_t, typename param_t, typename func_t>
+void fp_scaled_mm_kernel_impl(
     scalar_t* __restrict__ out,
     const scalar_t* __restrict__ mat1,
-    const at::Float8_e4m3fn* __restrict__ mat2,
-    const float* __restrict__ scales2,
+    const packed_t* __restrict__ mat2,
+    const param_t* __restrict__ scales2,
     const float* __restrict__ bias,
     scalar_t* __restrict__ buffer,
     int64_t M,
@@ -345,42 +868,41 @@ void fp8_scaled_mm_kernel_impl(
     int64_t out_strideM,
     int64_t block_size_N,
     int64_t block_size_K,
-    int64_t buffer_size_per_thread) {
-
-  constexpr int64_t BLOCK_M = block_size_m() * BLOCK_SIZE_M_SCALE;
+    int64_t buffer_size_per_thread,
+    const func_t& scale_offset_per_block) {
+  constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
   const int64_t MB = div_up(M, BLOCK_M);
   const int64_t NB = div_up(N, BLOCK_N);
 
-  const int64_t scale_size_K = div_up(K, block_size_K);
-  const int64_t blocks_n_per_group = block_size_N / BLOCK_N;
+  const bool use_brgemm = can_use_brgemm<packed_t>(M);
 
-  const bool use_brgemm = can_use_brgemm<at::Float8_e4m3fn>(M);
+  // use K/2 for mxfp4 and K for fp8
+  const int64_t packed_K = get_row_size<packed_t>(K);
 
   // parallel on [MB, NB]
   AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
-    at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
-      int64_t mb{0}, nb{0};
-      data_index_init(begin, mb, MB, nb, NB);
-
-      int tid = at::get_thread_num();
+    parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+      int tid = get_thread_num();
       scalar_t* __restrict__ Btmp = buffer + tid * buffer_size_per_thread;
-      float* __restrict__ Ctmp = (float*)((void*)(Btmp + BLOCK_N * K));
+      float* __restrict__ Ctmp = (float*)((void*)(Btmp + MAX_CACHE_BLOCK_SIZE * BLOCK_N * K));
 
-      for (int64_t i = begin; i < end; ++i) {
-        UNUSED(i);
-        const float* scale_ptr = scales2 + (nb / blocks_n_per_group) * scale_size_K;
+      loop_2d<packed_t>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+        const param_t* scale_ptr = scales2 + scale_offset_per_block(nb);
 
         int64_t mb_start = mb * BLOCK_M;
         int64_t mb_size = std::min(M - mb_start, BLOCK_M);
         int64_t nb_start = nb * BLOCK_N;
         int64_t nb_size = std::min(N - nb_start, BLOCK_N);
 
-        tinygemm_kernel<scalar_t, has_bias>(
+        // only do unpacking for the first row
+        bool do_unpack = (mb == mb0);
+
+        tinygemm_kernel<scalar_t, packed_t, param_t, has_bias>(
             /*   A            */ mat1 + mb_start * mat1_strideM,
-            /*   B            */ mat2 + nb_start * K, // nb * BLOCK_N * K
+            /*   B            */ mat2 + nb_start * packed_K,  // nb * BLOCK_N * K
             /*   C            */ out + mb_start * out_strideM + nb_start,
-            /*   Btmp         */ Btmp,
+            /*   Btmp         */ Btmp + nb_offset * BLOCK_N * K,
             /*   Ctmp         */ Ctmp,
             /*   scale        */ scale_ptr,
             /*   bias         */ bias + nb_start,
@@ -391,11 +913,9 @@ void fp8_scaled_mm_kernel_impl(
             /*   ldb          */ nb_size,
             /*   ldc          */ out_strideM,
             /*   brg          */ use_brgemm,
-            /*   block_size_K */ block_size_K);
-
-        // move to the next index
-        data_index_step(mb, MB, nb, NB);
-      }
+            /*   block_size_K */ block_size_K,
+            /*   do_unpack    */ do_unpack);
+      });
 
       if (use_brgemm) {
         at::native::cpublas::brgemm_release();
@@ -404,7 +924,7 @@ void fp8_scaled_mm_kernel_impl(
   });
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 // tinygemm interface
 template <typename scalar_t>
@@ -414,6 +934,7 @@ void tinygemm_kernel(
     scalar_t* __restrict__ C,
     scalar_t* __restrict__ Btmp,
     float* __restrict__ Ctmp,
+    const float* __restrict__ Bbias,
     const float* __restrict__ scale,
     int64_t M,
     int64_t N,
@@ -422,42 +943,189 @@ void tinygemm_kernel(
     int64_t ldb,
     int64_t ldc,
     bool brg,
-    int64_t block_size_K) {
-  tinygemm_kernel<scalar_t, false>(A, B, C, Btmp, Ctmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K);
+    int64_t block_size_K,
+    bool do_unpack) {
+  if (Bbias != nullptr) {
+    tinygemm_kernel<scalar_t, at::Float8_e4m3fn, float, true>(
+        A, B, C, Btmp, Ctmp, scale, Bbias, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+    return;
+  }
+  tinygemm_kernel<scalar_t, at::Float8_e4m3fn, float, false>(
+      A, B, C, Btmp, Ctmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+}
+
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const at::Float8_e4m3fn* __restrict__ B,
+    scalar_t* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    float* __restrict__ Ctmp,
+    float scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg) {
+  tinygemm_kernel2<scalar_t>(A, B, C, Btmp, Ctmp, scale, M, N, K, lda, ldb, ldc, brg);
+}
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const uint8_t* __restrict__ B,
+    scalar_t* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    float* __restrict__ Ctmp,
+    const float* __restrict__ Bbias,
+    const uint8_t* __restrict__ scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg,
+    int64_t block_size_K,
+    bool do_unpack) {
+  if (Bbias != nullptr) {
+    tinygemm_kernel<scalar_t, uint8_t, uint8_t, true>(
+        A, B, C, Btmp, Ctmp, scale, Bbias, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+    return;
+  }
+  tinygemm_kernel<scalar_t, uint8_t, uint8_t, false>(
+      A, B, C, Btmp, Ctmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
 }
 
-#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE)    \
+// tinygemm interface
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const at::Float8_e4m3fn* __restrict__ B,
+    float* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    const float* __restrict__ Bbias,
+    const float* __restrict__ scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg,
+    int64_t block_size_K,
+    bool do_unpack) {
+  if (Bbias != nullptr) {
+    tinygemm_kernel<scalar_t, at::Float8_e4m3fn, float, true>(
+        A, B, C, Btmp, scale, Bbias, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+    return;
+  }
+  tinygemm_kernel<scalar_t, at::Float8_e4m3fn, float, false>(
+      A, B, C, Btmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+}
+
+template <typename scalar_t>
+void tinygemm_kernel(
+    const scalar_t* __restrict__ A,
+    const uint8_t* __restrict__ B,
+    float* __restrict__ C,
+    scalar_t* __restrict__ Btmp,
+    const float* __restrict__ Bbias,
+    const uint8_t* __restrict__ scale,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg,
+    int64_t block_size_K,
+    bool do_unpack) {
+  if (Bbias != nullptr) {
+    tinygemm_kernel<scalar_t, uint8_t, uint8_t, true>(
+        A, B, C, Btmp, scale, Bbias, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+    return;
+  }
+  tinygemm_kernel<scalar_t, uint8_t, uint8_t, false>(
+      A, B, C, Btmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
+}
+
+#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE_A, TYPE_B, TYPE_S) \
+  template void tinygemm_kernel<TYPE_A>(                      \
+      const TYPE_A* __restrict__ A,                           \
+      const TYPE_B* __restrict__ B,                           \
+      TYPE_A* __restrict__ C,                                 \
+      TYPE_A* __restrict__ Btmp,                              \
+      float* __restrict__ Ctmp,                               \
+      const float* __restrict__ Bbias,                        \
+      const TYPE_S* __restrict__ scale,                       \
+      int64_t M,                                              \
+      int64_t N,                                              \
+      int64_t K,                                              \
+      int64_t lda,                                            \
+      int64_t ldb,                                            \
+      int64_t ldc,                                            \
+      bool brg,                                               \
+      int64_t block_size_K,                                   \
+      bool do_unpack)
+
+INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16, at::Float8_e4m3fn, float);
+INSTANTIATE_TINYGEMM_TEMPLATE(at::Half, at::Float8_e4m3fn, float);
+INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16, uint8_t, uint8_t);
+INSTANTIATE_TINYGEMM_TEMPLATE(at::Half, uint8_t, uint8_t);
+
+#define INSTANTIATE_TINYGEMM_TEMPLATE2(TYPE)   \
   template void tinygemm_kernel<TYPE>(         \
       const TYPE* __restrict__ A,              \
       const at::Float8_e4m3fn* __restrict__ B, \
       TYPE* __restrict__ C,                    \
       TYPE* __restrict__ Btmp,                 \
       float* __restrict__ Ctmp,                \
-      const float* __restrict__ scale,         \
+      float scale,                             \
       int64_t M,                               \
       int64_t N,                               \
       int64_t K,                               \
       int64_t lda,                             \
       int64_t ldb,                             \
       int64_t ldc,                             \
-      bool brg,                                \
-      int64_t block_size_K)
+      bool brg)
 
-INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16);
-INSTANTIATE_TINYGEMM_TEMPLATE(at::Half);
+INSTANTIATE_TINYGEMM_TEMPLATE2(at::BFloat16);
 
-at::Tensor fp8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2, at::Tensor& scales2,
-    std::vector<int64_t> block_size, std::optional<at::Tensor>& bias,
-    at::ScalarType out_dtype, bool is_vnni) {
-  RECORD_FUNCTION("sgl-kernel::fp8_scaled_mm_cpu", std::vector<c10::IValue>({mat1, mat2, scales2, block_size, bias}));
+inline const float* get_bias_data(const std::optional<at::Tensor>& bias, int64_t N) {
+  if (bias.has_value()) {
+    const auto& bias_ref = bias.value();
+    CHECK_EQ(bias_ref.size(0), N);
+    return bias_ref.data_ptr<float>();
+  }
+  return nullptr;
+}
+// FP8 and MXFP4 WoQ uses the same pattern:
+//   Btmp : [T, BLOCK_N * K]
+//   Ctmp : [T, BLOCK_M * BLOCK_N]
+inline at::Tensor alloc_thread_buffer(const at::TensorOptions& options, int64_t K) {
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n();
+  int num_threads = at::get_num_threads();
+  int64_t size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * K + BLOCK_M * BLOCK_N * 2;
+  return at::empty({num_threads, size_per_thread}, options);
+}
 
+at::Tensor fp8_scaled_mm_cpu(
+    at::Tensor& mat1,
+    at::Tensor& mat2,
+    at::Tensor& scales2,
+    std::vector<int64_t> block_size,
+    const std::optional<at::Tensor>& bias,
+    at::ScalarType out_dtype,
+    bool is_vnni) {
   auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
 
   CHECK_LAST_DIM_CONTIGUOUS_INPUT(mat1);
   CHECK_INPUT(mat2);
   CHECK_INPUT(scales2);
-  TORCH_CHECK(scales2.scalar_type() == at::kFloat,
-      "fp8_scaled_mm_cpu: expect scales2 to be float32.");
+  TORCH_CHECK(scales2.scalar_type() == at::kFloat, "fp8_scaled_mm_cpu: expect scales2 to be float32.");
 
   int64_t M = mat1.size(0);
   int64_t N = mat2.size(0);
@@ -467,13 +1135,10 @@ at::Tensor fp8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2, at::Tensor& sca
   CHECK_DIM(2, mat1);
   CHECK_DIM(2, mat2);
 
-  TORCH_CHECK(block_size.size() == 2,
-      "fp8_scaled_mm_cpu: expect block_size.size() to be 2.");
-
+  TORCH_CHECK(block_size.size() == 2, "fp8_scaled_mm_cpu: expect block_size.size() to be 2.");
   int64_t block_size_N = block_size[0];
   int64_t block_size_K = block_size[1];
 
-  constexpr int64_t BLOCK_M = block_size_m() * BLOCK_SIZE_M_SCALE;
   constexpr int64_t BLOCK_N = block_size_n();
   TORCH_CHECK(block_size_N % BLOCK_N == 0, "fp8_scaled_mm_cpu: expect block_size_N to be multiples of BLOCK_N");
   TORCH_CHECK(block_size_K == BLOCK_K, "fp8_scaled_mm_cpu: expect block_size_K equals to BLOCK_K");
@@ -481,49 +1146,94 @@ at::Tensor fp8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2, at::Tensor& sca
   CHECK_EQ(scales2.size(1), div_up(K, block_size_K));
 
   const auto st = mat1.scalar_type();
-  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf,
-      "fp8_scaled_mm_cpu: expect A to be bfloat16 or half.");
-  TORCH_CHECK(st == out_dtype,
-      "fp8_scaled_mm_cpu: expect A has same dtype with out_dtype.");
-  TORCH_CHECK(mat2.scalar_type() == at::kFloat8_e4m3fn,
-      "fp8_scaled_mm_cpu: expect mat2 to be fp8_e4m3.");
-  TORCH_CHECK(scales2.scalar_type() == at::kFloat,
-      "fp8_scaled_mm_cpu: expect scales to be float32.");
+  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf, "fp8_scaled_mm_cpu: expect A to be bfloat16 or half.");
+  TORCH_CHECK(st == out_dtype, "fp8_scaled_mm_cpu: expect A has same dtype with out_dtype.");
+  TORCH_CHECK(mat2.scalar_type() == at::kFloat8_e4m3fn, "fp8_scaled_mm_cpu: expect mat2 to be fp8_e4m3.");
+  TORCH_CHECK(scales2.scalar_type() == at::kFloat, "fp8_scaled_mm_cpu: expect scales to be float32.");
   auto out = at::empty({M, N}, mat1.options().dtype(out_dtype));
 
-  // strides
-  int64_t mat1_strideM = mat1.stride(0);
-  int64_t out_strideM = out.stride(0);
-
-  const bool has_bias = bias.has_value();
-  const float* bias_data = nullptr;
-  if (has_bias) {
-    CHECK_EQ(bias.value().size(0), N);
-    bias_data = bias.value().data_ptr<float>();
-  }
-
-  // Btmp : [T, BLOCK_N * K]
-  // Ctmp : [T, BLOCK_M * BLOCK_N]
-  int num_threads = at::get_num_threads();
-  int64_t size_per_thread = BLOCK_N * K + BLOCK_M * BLOCK_N * 2;
-  auto buffer = at::empty({num_threads, size_per_thread}, mat1.options());
+  auto buffer = alloc_thread_buffer(mat1.options(), K);
 
   AT_DISPATCH_REDUCED_FLOATING_TYPES(out_dtype, "fp8_scaled_mm_kernel_impl", [&] {
-    fp8_scaled_mm_kernel_impl<scalar_t>(
+    // used for lambda computing scale offset for each block
+    //   fp8 block gemm sale shape: [N/128, K/128]
+    //   for each block: [1, K/128]
+    const int64_t scale_size_K = div_up(K, block_size_K);
+    const int64_t blocks_n_per_group = block_size_N / BLOCK_N;
+
+    fp_scaled_mm_kernel_impl<scalar_t, at::Float8_e4m3fn, float>(
         out.data_ptr<scalar_t>(),
         mat1.data_ptr<scalar_t>(),
         packed_w.data_ptr<at::Float8_e4m3fn>(),
         scales2.data_ptr<float>(),
-        bias_data,
+        get_bias_data(bias, N),
         buffer.data_ptr<scalar_t>(),
         M,
         N,
         K,
-        mat1_strideM,
-        out_strideM,
+        mat1.stride(0),
+        out.stride(0),
         block_size_N,
         block_size_K,
-        size_per_thread);
+        buffer.size(-1),
+        [&](int64_t nb) { return (nb / blocks_n_per_group) * scale_size_K; });
+  });
+
+  return out;
+}
+
+// mat1 : [M, K] bfloat16
+// mat2 : [N, K / 2] uint8, actual layout: [N / BLOCK_N, K / 2, BLOCK_N, 2]
+// scales2: [N, K / G], actual layout: [N / BLOCK_N, K / G, BLOCK_N]
+at::Tensor mxfp4_scaled_mm_cpu(
+    at::Tensor& mat1, at::Tensor& mat2, at::Tensor& scales2, const std::optional<at::Tensor>& bias, bool is_vnni) {
+  auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
+
+  CHECK_INPUT(mat1);
+  CHECK_INPUT(mat2);
+  CHECK_INPUT(scales2);
+
+  int64_t M = mat1.size(0);
+  int64_t N = mat2.size(0);
+  int64_t K = mat2.size(1) * 2;
+
+  // mxfp4 supports only group size of 32 (2^5)
+  constexpr int64_t group_size = 32;
+  constexpr int64_t BLOCK_N = block_size_n();
+
+  CHECK_EQ(mat1.size(1), K);
+  CHECK_EQ(scales2.numel(), N * K >> 5);
+
+  const auto st = mat1.scalar_type();
+  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf, "mxfp4_scaled_mm_cpu: expect A to be bfloat16 or half.");
+  TORCH_CHECK(mat2.scalar_type() == at::kByte, "mxfp4_scaled_mm_cpu: expect mat2 to be uint8.");
+  TORCH_CHECK(scales2.scalar_type() == at::kByte, "mxfp4_scaled_mm_cpu: expect scales to be uint8.");
+  auto out = at::empty({M, N}, mat1.options());
+
+  auto buffer = alloc_thread_buffer(mat1.options(), K);
+
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(st, "mxfp4_scaled_mm_kernel_impl", [&] {
+    // used for lambda computing scale offset for each block
+    //   mxfp4 block gemm sale shape: [N/BLOCK_N, K/32, BLOCK_N]
+    //   for each block: [K/32, BLOCK_N]
+    const int64_t s_strideN = (K >> 5) * BLOCK_N;
+
+    fp_scaled_mm_kernel_impl<scalar_t, uint8_t, uint8_t>(
+        out.data_ptr<scalar_t>(),
+        mat1.data_ptr<scalar_t>(),
+        packed_w.data_ptr<uint8_t>(),
+        scales2.data_ptr<uint8_t>(),
+        get_bias_data(bias, N),
+        buffer.data_ptr<scalar_t>(),
+        M,
+        N,
+        K,
+        mat1.stride(0),
+        out.stride(0),
+        /* block_size_N */ 1,
+        /* block_size_K */ group_size,
+        buffer.size(-1),
+        [&](int64_t nb) { return nb * s_strideN; });
   });
 
   return out;
diff --git a/csrc/cpu/sgl-kernels/gemm_int4.cpp b/csrc/cpu/sgl-kernels/gemm_int4.cpp
new file mode 100644
index 000000000000..5b66b2a5aee7
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/gemm_int4.cpp
@@ -0,0 +1,894 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+#include <torch/all.h>
+
+#include "gemm.h"
+#include "vec.h"
+
+namespace {
+
+#define BLOCK_N block_size_n()
+#define BLOCK_M 128
+
+template <bool sym_quant_act>
+struct ActDtype;
+template <>
+struct ActDtype<true> {
+  using type = int8_t;
+};
+template <>
+struct ActDtype<false> {
+  using type = uint8_t;
+};
+
+#if defined(CPU_CAPABILITY_AVX512)
+struct alignas(32) m256i_wrapper {
+  __m256i data;
+};
+
+inline std::array<m256i_wrapper, 2> load_zps_4vnni(const int8_t* __restrict__ zps) {
+  // broadcast 01234567 to
+  // 01234567012345670123456701234567
+  __m256i vzps_low = _mm256_set1_epi64x(*reinterpret_cast<const long*>(zps));
+  __m256i vzps_high = _mm256_set1_epi64x(*reinterpret_cast<const long*>(zps + 8));
+  // shuffle from
+  // 01234567012345670123456701234567
+  // to
+  // 00001111222233334444555566667777
+  __m256i shuffle_mask =
+      _mm256_set_epi8(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+  vzps_low = _mm256_shuffle_epi8(vzps_low, shuffle_mask);
+  vzps_high = _mm256_shuffle_epi8(vzps_high, shuffle_mask);
+  m256i_wrapper vzps_low_wp, vzps_high_wp;
+  vzps_low_wp.data = vzps_low;
+  vzps_high_wp.data = vzps_high;
+  return {vzps_low_wp, vzps_high_wp};
+}
+
+inline std::array<m256i_wrapper, 2> load_uint4_as_int8(const uint8_t* __restrict__ qB) {
+  __m256i packed = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(qB));
+  const __m256i low_mask = _mm256_set1_epi8(0x0f);
+  __m256i high = _mm256_srli_epi16(packed, 4);
+  high = _mm256_and_si256(high, low_mask);
+  __m256i low = _mm256_and_si256(packed, low_mask);
+  m256i_wrapper low_wp, high_wp;
+  low_wp.data = low;
+  high_wp.data = high;
+  return {low_wp, high_wp};
+}
+
+template <int64_t N, int64_t ldb>
+void _dequant_weight_zp_only(const uint8_t* __restrict__ B, int8_t* dqB, const int8_t* __restrict__ qzeros, int64_t K) {
+  // unpack weight int8 -> two int4
+  // subtract zero point
+  // B shape = [K, ldb] = [K, N / 2], actual shape = [K / 4, N / 2, 4]
+  // dqB shape = [K, N], actual shape = [K / 4, N, 4]
+#pragma GCC unroll 2
+  for (int n = 0; n < N; n += 16) {
+    auto [zps_low_wp, zps_high_wp] = load_zps_4vnni(&qzeros[n]);
+    auto zps_low = zps_low_wp.data;
+    auto zps_high = zps_high_wp.data;
+    for (int k = 0; k < K; k += 4) {
+      auto [vb_low_wp, vb_high_wp] = load_uint4_as_int8(B + ldb * k + n / 2 * 4);
+      auto vb_low = vb_low_wp.data;
+      auto vb_high = vb_high_wp.data;
+      vb_high = _mm256_sub_epi8(vb_high, zps_high);
+      vb_low = _mm256_sub_epi8(vb_low, zps_low);
+      // store vb to B
+      _mm256_storeu_si256(reinterpret_cast<__m256i_u*>(dqB + N * k + n * 4), vb_low);
+      _mm256_storeu_si256(reinterpret_cast<__m256i_u*>(dqB + N * k + (n + 8) * 4), vb_high);
+    }
+  }
+}
+
+template <bool accum, int64_t N, bool sym_quant_act>
+void _dequant_and_store(
+    float* __restrict__ output,
+    const int32_t* __restrict__ input,
+    const float* __restrict__ scale_a,
+    const int32_t* __restrict__ zp_a,
+    const float* __restrict__ scale_b,
+    const int32_t* __restrict__ comp_b,
+    int M,
+    int ldi,
+    int ldo,
+    int ldsa = 1) {
+  for (int m = 0; m < M; ++m) {
+    float a_scale = *(scale_a + m * ldsa);
+    __m512 va_scale = _mm512_set1_ps(a_scale);
+    int32_t a_zp;
+    __m512i va_zp;
+    if constexpr (!sym_quant_act) {
+      a_zp = *(zp_a + m * ldsa);
+      va_zp = _mm512_set1_epi32(a_zp);
+    }
+    int n = 0;
+#pragma GCC unroll 2
+    for (; n < N; n += 16) {
+      __m512i vc = _mm512_loadu_si512(input + m * ldi + n);
+      if constexpr (!sym_quant_act) {
+        __m512i vb_comp = _mm512_loadu_si512(comp_b + n);
+        vc = _mm512_sub_epi32(vc, _mm512_mullo_epi32(vb_comp, va_zp));
+      }
+      __m512 vc_f = _mm512_cvtepi32_ps(vc);
+      __m512 vc_f_mul = _mm512_mul_ps(vc_f, va_scale);
+      __m512 vb_s = _mm512_loadu_ps(scale_b + n);
+      vc_f_mul = _mm512_mul_ps(vc_f_mul, vb_s);
+      if constexpr (accum) {
+        __m512 vo = _mm512_loadu_ps(output + m * ldo + n);
+        _mm512_storeu_ps(output + m * ldo + n, _mm512_add_ps(vo, vc_f_mul));
+      } else {
+        _mm512_storeu_ps(output + m * ldo + n, vc_f_mul);
+      }
+    }
+    for (; n < N; ++n) {
+      float dq_val;
+      if constexpr (sym_quant_act) {
+        dq_val = (float)input[m * ldi + n] * a_scale * scale_b[n];
+      } else {
+        dq_val = (float)(input[m * ldi + n] - a_zp * comp_b[n]) * a_scale * scale_b[n];
+      }
+      if constexpr (accum) {
+        output[m * ldo + n] += dq_val;
+      } else {
+        output[m * ldo + n] = dq_val;
+      }
+    }
+  }
+}
+
+#else
+template <int64_t N, int64_t ldb>
+void _dequant_weight_zp_only(const uint8_t* B, int8_t* dqB, const int8_t* qzeros, int64_t K) {
+  // B shape = [K, N / 2]
+  // dqB shape = [K, N]
+  for (int k = 0; k < K; ++k) {
+    for (int n = 0; n < N / 2; ++n) {
+      int32_t b = (int32_t)B[k * ldb + n];
+      dqB[k * N + n * 2] = (b & 0xf) - qzeros[n];
+      dqB[k * N + n * 2 + 1] = (b >> 4) - qzeros[n];
+    }
+  }
+}
+#endif
+
+#if defined(CPU_CAPABILITY_AVX512)
+inline __m512i combine_m256i(__m256i a, __m256i b) {
+  __m512i c = _mm512_castsi256_si512(a);
+  return _mm512_inserti64x4(c, b, 1);
+}
+
+inline __m512i combine_m256i(std::array<m256i_wrapper, 2> two_256) {
+  return combine_m256i(two_256[0].data, two_256[1].data);
+}
+
+// negate elements in a according to b's sign
+static inline __m512i _mm512_sign_epi8(__m512i a, __m512i b) {
+  __m512i zero = _mm512_setzero_si512();
+  __mmask64 blt0 = _mm512_movepi8_mask(b);
+  return _mm512_mask_sub_epi8(a, blt0, zero, a);
+}
+
+template <int64_t M, int64_t N, int64_t ldb, bool sym_quant_act>
+void _dequant_gemm_accum_small_M(
+    float* __restrict__ C,
+    const uint8_t* A,
+    const float* scales_a,
+    const int32_t* qzeros_a,
+    const uint8_t* B,
+    const float* scales_b,
+    const int8_t* qzeros_b,
+    int64_t K,
+    int64_t lda,
+    int64_t ldc) {
+  // if sym_quant_act is true, A pointer type is passed in as uint8_t* but actually int8_t*.
+
+  constexpr int COLS = N / 16;
+  // Computing compensation is faster than loading it for small M
+  // because it's memory bound.
+  __m512i ones = _mm512_set1_epi8(1);  // used for computing compensation
+  __m512i va;
+  __m512i vb[COLS];
+  __m512i vc[M * COLS];
+  __m512 vscales[COLS];
+  __m512i vzps[COLS];
+  __m512i vcompensate[COLS];
+
+  // Load scales and zps
+  Unroll<COLS>{}([&](auto i) {
+    vscales[i] = _mm512_loadu_ps(scales_b + i * 16);
+    vzps[i] = combine_m256i(load_zps_4vnni(qzeros_b + i * 16));
+    if constexpr (!sym_quant_act) {
+      vcompensate[i] = _mm512_setzero_epi32();
+    }
+  });
+  Unroll<M * COLS>{}([&](auto i) { vc[i] = _mm512_setzero_epi32(); });
+
+  auto compute = [&](auto i, int k) {
+    constexpr const int row = i / COLS;
+    constexpr const int col = i % COLS;
+
+    if constexpr (col == 0) {
+      va = _mm512_set1_epi32(*(int32_t*)(A + row * lda + k));
+    }
+
+    if constexpr (row == 0) {
+      int B_offset = k * ldb + col * 16 * 2;
+      vb[col] = combine_m256i(load_uint4_as_int8(B + B_offset));
+      vb[col] = _mm512_sub_epi8(vb[col], vzps[col]);
+      if constexpr (!sym_quant_act) {
+        vcompensate[col] = _mm512_dpbusd_epi32(vcompensate[col], ones, vb[col]);
+      }
+      _mm_prefetch(B + B_offset + 128 * ldb, _MM_HINT_T0);
+    }
+    if constexpr (sym_quant_act) {
+      auto vsb = _mm512_sign_epi8(vb[col], va);
+      auto vabsa = _mm512_sign_epi8(va, va);
+      vc[i] = _mm512_dpbusds_epi32(vc[i], vabsa, vsb);
+    } else {
+      vc[i] = _mm512_dpbusd_epi32(vc[i], va, vb[col]);
+    }
+  };
+
+  // Accumulate along k
+  constexpr const int unroll = 4;
+  int k = 0;
+  for (; k < K / 4 / unroll; k++) {
+    Unroll<unroll>{}([&](auto i) { Unroll<M * COLS>{}(compute, 4 * (k * unroll + i)); });
+  }
+  k *= 4 * unroll;
+  for (; k < K; k += 4) {
+    Unroll<M * COLS>{}(compute, k);
+  }
+
+  // Store to C
+  auto store = [&](auto i) {
+    constexpr const int row = i / COLS;
+    constexpr const int col = i % COLS;
+    // compute (qC - compensate * zp_a) * scale_a * scale_b
+    __m512 vc_float;
+    if constexpr (!sym_quant_act) {
+      vc[i] = _mm512_sub_epi32(vc[i], _mm512_mullo_epi32(vcompensate[col], _mm512_set1_epi32(*(qzeros_a + row))));
+    }
+    vc_float = _mm512_cvtepi32_ps(vc[i]);
+    vc_float = _mm512_mul_ps(vc_float, _mm512_set1_ps(*(scales_a + row)));
+
+    vc_float = _mm512_mul_ps(vc_float, vscales[col]);
+    auto vc_old = _mm512_loadu_ps(C + row * ldc + col * 16);
+    vc_float = _mm512_add_ps(vc_float, vc_old);
+    _mm512_storeu_ps(C + row * ldc + col * 16, vc_float);
+  };
+  Unroll<M * COLS>{}(store);
+}
+
+#define CALL_DEQUANT_GEMM_ACCUM_SMALL_M(M) \
+  _dequant_gemm_accum_small_M<M, N, ldb, sym_quant_act>(C, A, scales_a, qzeros_a, B, scales_b, qzeros_b, K, lda, ldc);
+#endif
+
+template <int64_t N, int64_t ldb, bool sym_quant_act>
+void _dequant_gemm_accum(
+    float* C,
+    const uint8_t* A,
+    const float* scales_a,
+    const int32_t* qzeros_a,
+    const uint8_t* B,
+    const float* scales_b,
+    const int8_t* qzeros_b,
+    const int32_t* compensation,
+    int8_t* dqB,
+    int64_t M,
+    int64_t K,
+    int64_t lda,
+    int64_t ldc,
+    bool use_brgemm) {
+  // Compute GEMM int8 * int8 -> int32
+  // dequant result to float by applying scales/qzeros
+#if defined(CPU_CAPABILITY_AVX512)
+  if (!use_brgemm) {
+    switch (M) {
+      case 1:
+        CALL_DEQUANT_GEMM_ACCUM_SMALL_M(1);
+        break;
+      case 2:
+        CALL_DEQUANT_GEMM_ACCUM_SMALL_M(2);
+        break;
+      case 3:
+        CALL_DEQUANT_GEMM_ACCUM_SMALL_M(3);
+        break;
+      case 4:
+        CALL_DEQUANT_GEMM_ACCUM_SMALL_M(4);
+        break;
+      default:
+        TORCH_CHECK(false, "tinygemm_kernel: unexpected M for AVX path!");
+    }
+    return;
+  }
+
+  _dequant_weight_zp_only<N, ldb>(B, dqB, qzeros_b, K);
+  using Tin = typename ActDtype<sym_quant_act>::type;
+  Tin* A_ptr = (Tin*)A;
+  if (use_brgemm) {
+    int32_t C_i32[M * N];
+    at::native::cpublas::brgemm(
+        M, N, K, lda, N /*ldb*/, N /*ldc*/, false /* add_C */, A_ptr, dqB, C_i32, true /* is_vnni */);
+    _mm_prefetch(B + N * K / 2, _MM_HINT_T0);
+    _mm_prefetch(A + K, _MM_HINT_T0);
+    _dequant_and_store<true, N, sym_quant_act>(
+        C, C_i32, scales_a, qzeros_a, scales_b, compensation, M, N /*ldi*/, ldc, 1 /*ldsa*/);
+  } else
+#endif
+  {
+    TORCH_CHECK(false, "tinygemm_kernel: scalar path not implemented!");
+  }
+}
+
+template <int64_t N>
+inline void copy_bias(const float* bias_ptr, float* y_buf, int64_t m) {
+  if (bias_ptr) {
+    for (int i = 0; i < m; ++i) {
+      int j = 0;
+#if defined(CPU_CAPABILITY_AVX512)
+#pragma GCC unroll 2
+      for (; j < N; j += 16) {
+        __m512 bias_vec = _mm512_loadu_ps(bias_ptr + j);
+        _mm512_storeu_ps(y_buf + i * N + j, bias_vec);
+      }
+#endif
+      for (; j < N; ++j) {
+        y_buf[i * N + j] = bias_ptr[j];
+      }
+    }
+  } else {  // initialize to zero
+    for (int i = 0; i < m; ++i) {
+      int j = 0;
+#if defined(CPU_CAPABILITY_AVX512)
+#pragma GCC unroll 2
+      for (; j < N; j += 16) {
+        __m512 zero_vec = _mm512_setzero_ps();
+        _mm512_storeu_ps(y_buf + i * N + j, zero_vec);
+      }
+#endif
+      for (; j < N; ++j) {
+        y_buf[i * N + j] = 0;
+      }
+    }
+  }
+}
+
+template <typename out_dtype, int64_t N>
+inline void store_out(const float* y_buf, out_dtype* c_ptr, int64_t m, /* int64_t n, */ int64_t lda) {
+  for (int i = 0; i < m; ++i) {
+    int j = 0;
+    if constexpr (std::is_same<out_dtype, float>::value) {
+#if defined(CPU_CAPABILITY_AVX512)
+#pragma GCC unroll 2
+      for (; j < N; j += 16) {
+        __m512 y_vec = _mm512_loadu_ps(y_buf + i * N + j);
+        _mm512_storeu_ps(c_ptr + i * lda + j, y_vec);
+      }
+#endif
+      for (; j < N; ++j) {
+        c_ptr[i * lda + j] = y_buf[i * N + j];
+      }
+    } else if constexpr (std::is_same<out_dtype, at::BFloat16>::value) {
+#if defined(CPU_CAPABILITY_AVX512)
+#pragma GCC unroll 2
+      for (; j < N; j += 16) {
+        __m512 y_vec = _mm512_loadu_ps(y_buf + i * N + j);
+        __m256i y_bf16_vec = at::vec::cvtfp32_bf16(y_vec);
+        _mm256_storeu_si256(reinterpret_cast<__m256i*>(c_ptr + i * lda + j), y_bf16_vec);
+      }
+#endif
+      for (; j < N; ++j) {
+        c_ptr[i * lda + j] = at::BFloat16(y_buf[i * N + j]);
+      }
+    } else if constexpr (std::is_same<out_dtype, at::Half>::value) {
+#if defined(CPU_CAPABILITY_AVX512)
+#pragma GCC unroll 2
+      for (; j < N; j += 16) {
+        __m512 y_vec = _mm512_loadu_ps(y_buf + i * N + j);
+        __m256i y_fp16_vec = at::vec::cvtfp32_fp16(y_vec);
+        _mm256_storeu_si256(reinterpret_cast<__m256i*>(c_ptr + i * lda + j), y_fp16_vec);
+      }
+#endif
+      for (; j < N; ++j) {
+        c_ptr[i * lda + j] = at::Half(y_buf[i * N + j]);
+      }
+    } else {
+      TORCH_CHECK(false, "Unsupported output dtype");
+    }
+  }
+}
+
+void fill_val_stub(int32_t* __restrict__ output, int32_t value, int64_t size) {
+  using iVec = at::vec::Vectorized<int32_t>;
+  constexpr int VecSize = iVec::size();
+  const iVec fill_val_vec = iVec(value);
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - VecSize; d += VecSize) {
+    fill_val_vec.store(output + d);
+  }
+  for (; d < size; ++d) {
+    output[d] = value;
+  }
+}
+
+template <typename act_dtype, typename out_dtype, bool sym_quant_act>
+void _da8w4_linear_impl(
+    act_dtype* __restrict__ input,
+    const float* __restrict__ input_scales,
+    const int32_t* __restrict__ input_qzeros,
+    const uint8_t* __restrict__ weight,
+    const float* __restrict__ weight_scales,
+    const int8_t* __restrict__ weight_qzeros,
+    const float* __restrict__ bias,
+    out_dtype* __restrict__ output,
+    float* __restrict__ output_temp,
+    int8_t* __restrict__ dequant_weight_temp,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t num_groups) {
+  // weight + compensation shape = [Nc, Kc, BLOCK_N * _block_k / 2 + BLOCK_N*sizeof(int32_t)]
+  // scales/qzeros shape = [Nc, G, BLOCK_N]
+  const bool use_brgemm = can_use_brgemm<int8_t>(M);
+  int64_t block_m = [&]() -> long {
+    if (M <= 48) {
+      return M;
+    } else if (M < 64) {
+      return 32;
+    } else if (M < 96) {
+      return 64;
+    } else {
+      return 128;
+    }
+  }();
+  int64_t Mc = div_up(M, block_m);
+  bool parallel_on_M = M > 128;
+  int64_t Nc = N / BLOCK_N;
+  int64_t num_blocks = parallel_on_M ? Mc * Nc : Nc;
+  int64_t group_size = div_up(K, num_groups);
+  int64_t _block_k = get_4bit_block_k_size(group_size);
+  int64_t Kc = K / _block_k;
+  int64_t block_per_group = group_size / _block_k;
+
+  at::parallel_for(0, num_blocks, 1, [&](int64_t begin, int64_t end) {
+    int tid = get_thread_num();
+    float* C_tmp = output_temp + tid * block_m * BLOCK_N;
+    int8_t* dqB_tmp = dequant_weight_temp + tid * _block_k * BLOCK_N;
+    for (const auto i : c10::irange(begin, end)) {
+      int64_t mc = parallel_on_M ? i / Nc : 0;
+      int64_t nc = parallel_on_M ? i % Nc : i;
+      int64_t mc_end = parallel_on_M ? mc + 1 : Mc;
+
+      for (int mci = mc; mci < mc_end; ++mci) {
+        int64_t m_size = mci * block_m + block_m > M ? M - mci * block_m : block_m;
+        // copy bias to y_buf if bias is not None
+        auto bias_data = bias ? bias + nc * BLOCK_N : nullptr;
+        copy_bias<BLOCK_N>(bias_data, C_tmp, m_size);
+        for (int kci = 0; kci < Kc; ++kci) {
+          int32_t* compensation_ptr =
+              sym_quant_act
+                  ? nullptr
+                  : (int32_t*)(void*)(weight + (nc * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) +
+                                      _block_k * BLOCK_N / 2) /*Bcomp*/;
+          _dequant_gemm_accum<BLOCK_N, BLOCK_N / 2, sym_quant_act>(
+              /*C*/ C_tmp,
+              /*A*/ (uint8_t*)input + mci * block_m * K + kci * _block_k,
+              /*scales_a*/ input_scales + mci * block_m,
+              /*qzeros_a*/ input_qzeros + mci * block_m,
+              /*B*/ weight + (nc * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))),
+              /*scales_b*/ weight_scales + nc * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N,
+              /*qzeros_b*/ weight_qzeros + nc * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N,
+              /*Bcomp*/ compensation_ptr,
+              /*dqB_tmp*/ dqB_tmp,
+              /*M*/ m_size,
+              /*K*/ _block_k,
+              /*lda*/ K,
+              /*ldc*/ BLOCK_N,
+              /*use_brgemm*/ use_brgemm);
+        }
+        // store y_buf to output with dtype conversion
+        store_out<out_dtype, BLOCK_N>(C_tmp, output + mci * block_m * N + nc * BLOCK_N, m_size, N /*lda*/);
+      }
+    }
+    if (use_brgemm) {
+      at::native::cpublas::brgemm_release();
+    }
+  });
+}
+
+}  // anonymous namespace
+
+/*
+return: packed_weight, packed_scales, packed_qzeros
+*/
+std::tuple<at::Tensor, at::Tensor, at::Tensor> convert_int4_weight_packed_with_compensation(
+    const at::Tensor& weight, const at::Tensor& scales, const at::Tensor& qzeros) {
+  // weight shape = [N, K]
+  // scales shape = [N, G]
+  // qzeros shape = [N, G]
+  TORCH_CHECK(weight.dim() == 2, "DA8W4 CPU: Weight should be a 2D tensor for packing");
+  TORCH_CHECK(weight.size(1) % 2 == 0, "DA8W4 CPU: Weight should have even number of columns for packing");
+
+  auto new_scales = scales;
+  auto new_qzeros = qzeros;
+  if (new_scales.dim() == 1) {
+    new_scales.unsqueeze_(1);
+  }
+  new_scales = new_scales.to(at::kFloat);
+  if (new_qzeros.dim() == 1) {
+    new_qzeros.unsqueeze_(1);
+  }
+  new_qzeros = new_qzeros.to(at::kChar);
+  int64_t N = weight.size(0);
+  int64_t K = weight.size(1);
+  int64_t G = scales.size(1);
+  int64_t group_size = K / G;
+  int64_t _block_k = get_4bit_block_k_size(group_size);
+  constexpr int block_n = block_size_n();
+  int64_t Nc = N / block_n;
+  int64_t Kc = K / _block_k;
+
+  // Reorder weight to [N/block_n, K/_block_k, _block_k, block_n]
+  // Reorder scales/qzeros to [N/block_n, G, block_n]
+  // weight + compensation shape = [Nc, Kc, block_n * _block_k / 2 + block_n*sizeof(int32_t)]
+  // scales/qzeros shape = [Nc, G, block_n]
+  auto weight_view = weight.view({Nc, block_n, Kc, _block_k});
+  at::Tensor weight_reordered = weight_view.permute({0, 2, 3, 1}).contiguous();
+  at::Tensor blocked_weight;
+  at::Tensor blocked_scales = new_scales.view({Nc, block_n, G}).permute({0, 2, 1}).contiguous();
+  at::Tensor blocked_qzeros = new_qzeros.view({Nc, block_n, G}).permute({0, 2, 1}).contiguous();
+  // Compensation = Σ(k)(W[k][n] - ZP[n]) for each block.
+  auto weight_sub_qzero = weight.view({Nc, block_n, G, -1}).to(at::kInt) - new_qzeros.view({Nc, block_n, G, -1});
+  weight_sub_qzero = weight_sub_qzero.view({Nc, block_n, Kc, _block_k});
+  at::Tensor compensation = weight_sub_qzero.sum(-1);
+  compensation = compensation.permute({0, 2, 1}).contiguous().to(at::kInt);
+  int64_t buffer_size_nbytes = _block_k * block_n / 2 + block_n * sizeof(int32_t);
+  blocked_weight = at::empty({Nc, Kc, buffer_size_nbytes}, weight.options());
+
+  auto weight_ptr = weight_reordered.data_ptr<uint8_t>();
+  auto compensation_ptr = compensation.data_ptr<int32_t>();
+  auto blocked_weight_ptr = blocked_weight.data_ptr<uint8_t>();
+  int64_t num_blocks = Nc * Kc;
+  at::parallel_for(0, num_blocks, 1, [&](int64_t begin, int64_t end) {
+    for (const auto i : c10::irange(begin, end)) {
+      auto in_ptr = weight_ptr + i * _block_k * block_n;
+      auto out_ptr = blocked_weight_ptr + i * block_n * (_block_k / 2 + sizeof(int32_t));
+      int32_t* comp_in_prt = compensation_ptr + i * block_n;
+      int32_t* comp_out_prt = (int32_t*)(void*)(blocked_weight_ptr + i * block_n * (_block_k / 2 + sizeof(int32_t)) +
+                                                _block_k * block_n / 2);
+      // Reorder weight block to VNNI4 and pack two lanes along N
+      // N=16 viewed as two lanes: a0, ...a7, b0, ...b7
+      // pack two lanes: [a0, b0], ..., [a7, b7]
+      // plain shape = [_block_k, block_n]
+      // packed shape = [_block_k / 4, block_n / 2, 4] viewed as [_block_k, block_n / 2]
+      constexpr int n_group_size = 8;
+      constexpr int vnni_size = 4;
+      constexpr int n_group = block_n / n_group_size;  // 4
+      for (int nb = 0; nb < n_group; nb += 2) {
+        for (int k = 0; k < _block_k; k += vnni_size) {
+          for (int ni = 0; ni < n_group_size; ++ni) {
+            for (int ki = 0; ki < vnni_size; ++ki) {
+              int src_idx_1 = nb * n_group_size + ni + (k + ki) * block_n;
+              int src_idx_2 = (nb + 1) * n_group_size + ni + (k + ki) * block_n;
+              int dst_idx = (nb / 2 * n_group_size + ni) * vnni_size + k * block_n / 2 + ki;
+              uint8_t src_1 = *(in_ptr + src_idx_1);
+              uint8_t src_2 = *(in_ptr + src_idx_2);
+              uint8_t dst = (src_1 & 0x0f) | ((src_2 & 0x0f) << 4);
+              *(out_ptr + dst_idx) = dst;
+            }
+          }
+        }
+      }
+      // compensation [block_n]
+      for (int nb = 0; nb < block_n; nb++) {
+        *(comp_out_prt + nb) = *(comp_in_prt + nb);
+      }
+    }
+  });
+
+  return std::make_tuple(std::move(blocked_weight), std::move(blocked_scales), std::move(blocked_qzeros));
+}
+
+std::tuple<at::Tensor, at::Tensor> unpack_4bit_to_32bit_signed(const at::Tensor& qweight, const at::Tensor& qzeros) {
+  TORCH_CHECK(qweight.scalar_type() == at::kInt, "qweight must be int32");
+  TORCH_CHECK(qzeros.scalar_type() == at::kInt, "qzeros must be int32");
+  const auto W0 = qweight.size(0);
+  const auto W1 = qweight.size(1);
+  const auto Z0 = qzeros.size(0);
+  const auto Z1 = qzeros.size(1);
+
+  // unpacked_weights: (W0 * 8, W1), int8
+  auto unpacked_weights = at::zeros({W0 * 8, W1}, at::TensorOptions().dtype(at::kChar));
+  // unpacked_zeros: (Z0, Z1 * 8), int8
+  auto unpacked_zeros = at::zeros({Z0, Z1 * 8}, at::TensorOptions().dtype(at::kChar));
+
+  const int32_t* qw_ptr = qweight.data_ptr<int32_t>();
+  const int32_t* qz_ptr = qzeros.data_ptr<int32_t>();
+  int8_t* uw_ptr = unpacked_weights.data_ptr<int8_t>();
+  int8_t* uz_ptr = unpacked_zeros.data_ptr<int8_t>();
+
+  // ---- unpack qweight ----
+  for (int64_t row = 0; row < W0 * 8; ++row) {
+    const int i = row & 7;         // row % 8
+    const int src_row = row >> 3;  // row // 8
+    const int shift = 4 * i;
+    for (int64_t col = 0; col < W1; ++col) {
+      int32_t v = qw_ptr[src_row * W1 + col];
+      uw_ptr[row * W1 + col] = static_cast<int8_t>((v >> shift) & 0xF);
+    }
+  }
+  // ---- unpack qzeros ----
+  for (int64_t col = 0; col < Z1 * 8; ++col) {
+    const int i = col & 7;
+    const int src_col = col >> 3;
+    const int shift = 4 * i;
+
+    for (int64_t row = 0; row < Z0; ++row) {
+      int32_t v = qz_ptr[row * Z1 + src_col];
+      uz_ptr[row * (Z1 * 8) + col] = static_cast<int8_t>((v >> shift) & 0xF);
+    }
+  }
+
+  return std::make_tuple(unpacked_weights, unpacked_zeros + 1);
+}
+
+std::tuple<at::Tensor, at::Tensor>
+autogptq_to_int4pack(const at::Tensor& qweight_tensor, const at::Tensor& qzeros_tensor) {
+  TORCH_CHECK(qweight_tensor.scalar_type() == at::kInt, "qweight_tensor must be int32");
+  TORCH_CHECK(qzeros_tensor.scalar_type() == at::kInt, "qzeros_tensor must be int32");
+  TORCH_CHECK(qweight_tensor.is_cpu(), "CPU only implementation");
+  if (qweight_tensor.dim() == 3) {
+    const int64_t B = qweight_tensor.size(0);
+    std::vector<at::Tensor> qweight_list;
+    std::vector<at::Tensor> qzeros_list;
+    qweight_list.reserve(B);
+    qzeros_list.reserve(B);
+    for (int64_t i = 0; i < B; ++i) {
+      auto outputs = unpack_4bit_to_32bit_signed(qweight_tensor[i], qzeros_tensor[i]);
+      at::Tensor unpacked_qweight = std::get<0>(outputs);
+      at::Tensor unpacked_qzeros = std::get<1>(outputs);
+      qweight_list.push_back(unpacked_qweight.transpose(0, 1).contiguous().to(at::kByte));
+      qzeros_list.push_back(unpacked_qzeros.contiguous().to(at::kByte));
+    }
+    return std::make_tuple(at::stack(qweight_list).detach(), at::stack(qzeros_list).detach());
+  }
+  auto outputs = unpack_4bit_to_32bit_signed(qweight_tensor, qzeros_tensor);
+  at::Tensor unpacked_qweight = std::get<0>(outputs);
+  at::Tensor unpacked_qzeros = std::get<1>(outputs);
+  at::Tensor return_qweight = unpacked_qweight.transpose(0, 1).contiguous().to(at::kByte);
+  at::Tensor return_qzeros = unpacked_qzeros.contiguous().to(at::kByte);
+  return std::make_tuple(return_qweight, return_qzeros);
+}
+
+std::tuple<at::Tensor, at::Tensor> int4pack(at::Tensor qweight, at::Tensor qzeros, int64_t quant_method_4bit) {
+  if (quant_method_4bit == CPUQuantAlgo::AWQ) {
+    // autoawq unpacking
+    qweight = qweight.contiguous();
+    qzeros = qzeros.contiguous();
+    // bitshifts: [0, 4, 1, 5, 2, 6, 3, 7] * 4
+    auto bitshifts = at::tensor({0, 4, 1, 5, 2, 6, 3, 7}, at::kInt) * 4;
+    auto qweight_unsq = qweight.unsqueeze(-1);  // [..., K, N/8, 1]
+    auto unpacked = (at::bitwise_right_shift(qweight_unsq, bitshifts) & 0xF).contiguous();
+    auto qweight_final = unpacked.flatten(-2).transpose(-1, -2).to(at::kByte).clone();
+    auto qzeros_unsq = qzeros.unsqueeze(-1);
+    auto qzeros_unpacked = (at::bitwise_right_shift(qzeros_unsq, bitshifts) & 0xF).contiguous();
+    auto qzeros_final = qzeros_unpacked.flatten(-2).to(at::kByte).clone();
+    return std::make_tuple(qweight_final, qzeros_final);
+  } else if (quant_method_4bit == CPUQuantAlgo::GPTQ) {
+    // autogptq unpacking
+    auto outputs = autogptq_to_int4pack(qweight, qzeros);
+    at::Tensor unpacked_qweight = std::get<0>(outputs);
+    at::Tensor unpacked_qzeros = std::get<1>(outputs);
+    return std::make_tuple(unpacked_qweight, unpacked_qzeros);
+  } else {
+    TORCH_CHECK(false, "CPU int4 pack only support AWQ or GPTQ...");
+  }
+}
+
+std::tuple<at::Tensor, at::Tensor, at::Tensor> convert_weight_packed_scale_zp(
+    at::Tensor qweight,  // awq: (*, K, N / 8)  ||  gptq: (*, K / 8, N) , int32
+    at::Tensor qzeros,   // awq: (*, K / group_size, N / 8) ||  gptq: (*, K / group_size, N / 8) , int32
+    at::Tensor scales,   // awq: (*, K / group_size, N) ||  gptq: (*, K / group_size, N) , bfloat16
+    int64_t quant_method_4bit) {
+  at::Tensor _qweight;
+  at::Tensor _qzeros;
+
+  auto res = int4pack(qweight, qzeros, quant_method_4bit);
+  _qweight = std::get<0>(res);
+  _qzeros = std::get<1>(res);
+
+  auto _scales = scales;
+  _qzeros = _qzeros.transpose(-2, -1).contiguous();  // .T
+  _scales = _scales.transpose(-2, -1).contiguous();
+  if (_qweight.dim() == 3) {  // Dim=3 for MOE packing, TODO: refine a unified loop
+    int64_t E = _qweight.size(0);
+    int64_t K = _qweight.size(2);
+    int64_t G = _scales.size(2);
+    int64_t group_size = K / G;
+    int64_t _block_k = get_4bit_block_k_size(group_size);
+    int64_t block_n = block_size_n();
+    int64_t Nc = _qweight.size(1) / block_n;
+    int64_t Kc = K / _block_k;
+    int64_t buffer_size_nbytes = _block_k * block_n / 2 + block_n * sizeof(int32_t);
+    auto blocked_weight = at::empty({E, Nc, Kc, buffer_size_nbytes}, _qweight.options());
+    auto blocked_scales = at::empty({E, Nc, G, block_n}, _scales.options()).to(at::kFloat);
+    auto blocked_qzeros = at::empty({E, Nc, G, block_n}, _qzeros.options()).to(at::kChar);
+    for (int i = 0; i < _qweight.size(0); i++) {
+      auto res_ = convert_int4_weight_packed_with_compensation(_qweight[i], _scales[i], _qzeros[i]);
+      blocked_weight[i] = std::get<0>(res_);
+      blocked_scales[i] = std::get<1>(res_);
+      blocked_qzeros[i] = std::get<2>(res_);
+    }
+    _qweight = blocked_weight;
+    _scales = blocked_scales;
+    _qzeros = blocked_qzeros;
+  } else {
+    auto res_ = convert_int4_weight_packed_with_compensation(_qweight, _scales, _qzeros);
+    _qweight = std::get<0>(res_);
+    _scales = std::get<1>(res_);
+    _qzeros = std::get<2>(res_);
+  }
+
+  return std::make_tuple(_qweight, _qzeros, _scales);
+}
+
+at::Tensor int4_scaled_mm_cpu_with_quant(
+    const at::Tensor& input,
+    const at::Tensor& weight,
+    const at::Tensor& weight_scales,
+    const at::Tensor& weight_qzeros,
+    const std::optional<at::Tensor>& bias,
+    at::ScalarType output_dtype) {
+  int64_t M_a = input.size(0);
+  int64_t K_a = input.size(1);
+  int64_t lda = input.stride(0);
+
+  const auto st = input.scalar_type();
+  TORCH_CHECK(
+      st == at::kBFloat16 || st == at::kHalf, "int4_scaled_mm_cpu_with_quant: expect A to be bfloat16 or half.");
+
+  constexpr bool sym_quant_act = false;  // TODO: add sym quant path
+  using Tin = typename ActDtype<sym_quant_act>::type;
+  int64_t act_buffer_size = /* act quant */ M_a * K_a +
+                            /* act scale */ M_a * sizeof(float) +
+                            /* act zp */ M_a * sizeof(int32_t);
+  auto act_buffer = at::empty({act_buffer_size}, input.options().dtype(at::kByte));
+  // asym path, activation quants into uint8_t
+  auto Aq_data = act_buffer.data_ptr<uint8_t>();
+  auto As_data = reinterpret_cast<float*>(Aq_data + M_a * K_a);
+  auto Azp_data = reinterpret_cast<int32_t*>(As_data + M_a);
+  fill_val_stub(Azp_data, 128, M_a);  // sym_a s8s8 is unified to u8s8 with compensation (128)
+
+  auto out_sizes = input.sizes().vec();
+  int64_t N = weight_scales.size(0) * weight_scales.size(-1);
+  out_sizes.back() = N;
+  auto output = at::empty(out_sizes, input.options());
+  // weight + compensation shape = [Nc, Kc, BLOCK_N * _block_k / 2 + BLOCK_N*sizeof(int32_t)]
+  // scales/qzeros shape = [Nc, G, BLOCK_N]
+  int64_t Nc = weight.size(0);
+  int64_t Kc = weight.size(1);
+  int64_t _block_k = K_a / Kc;
+  TORCH_CHECK(N == Nc * BLOCK_N, "DA8W4: weight and input shapes mismatch");
+  // scales/qzeros shape = [Nc, G, BLOCK_N]
+  int64_t num_groups = weight_scales.size(1);
+
+  const uint8_t* b_ptr = weight.data_ptr<uint8_t>();
+  const float* b_scales_ptr = weight_scales.data_ptr<float>();
+  const int8_t* b_qzeros_ptr = weight_qzeros.data_ptr<int8_t>();
+  const float* bias_ptr = bias.has_value() ? bias.value().data_ptr<float>() : nullptr;
+  int num_threads = at::get_num_threads();
+  int64_t temp_buffer_size = /* output temp */ num_threads * BLOCK_M * BLOCK_N * sizeof(float) +
+                             /*  weight dequant temp */ num_threads * _block_k * BLOCK_N;
+  auto c_temp_buffer = at::empty({temp_buffer_size}, input.options().dtype(at::kChar));
+  float* c_temp_ptr = (float*)((void*)(c_temp_buffer.data_ptr<int8_t>()));
+  int8_t* dqB_temp_ptr = (int8_t*)((void*)(c_temp_ptr + num_threads * BLOCK_M * BLOCK_N));
+
+#define LAUNCH_DA8W4_LINEAR_WITH_QUANT_IMPL(sym_quant_act)                                                 \
+  AT_DISPATCH_FLOATING_TYPES_AND2(                                                                         \
+      at::ScalarType::BFloat16, at::ScalarType::Half, output_dtype, "int4_scaled_mm_cpu_with_quant", [&] { \
+        const scalar_t* __restrict__ A_data = input.data_ptr<scalar_t>();                                  \
+        scalar_t* __restrict__ c_ptr = output.data_ptr<scalar_t>();                                        \
+        at::parallel_for(0, M_a, 0, [&](int64_t begin, int64_t end) {                                      \
+          for (int64_t m = begin; m < end; ++m) {                                                          \
+            quantize_row_int8<scalar_t>(Aq_data + m * K_a, As_data[m], A_data + m * lda, K_a);             \
+          }                                                                                                \
+        });                                                                                                \
+        _da8w4_linear_impl<Tin, scalar_t, sym_quant_act>(                                                  \
+            Aq_data,                                                                                       \
+            As_data,                                                                                       \
+            Azp_data,                                                                                      \
+            b_ptr,                                                                                         \
+            b_scales_ptr,                                                                                  \
+            b_qzeros_ptr,                                                                                  \
+            bias_ptr,                                                                                      \
+            c_ptr,                                                                                         \
+            c_temp_ptr,                                                                                    \
+            dqB_temp_ptr,                                                                                  \
+            M_a,                                                                                           \
+            N,                                                                                             \
+            K_a,                                                                                           \
+            num_groups);                                                                                   \
+      });
+
+  LAUNCH_DA8W4_LINEAR_WITH_QUANT_IMPL(sym_quant_act);
+
+  return output;
+}
+template <typename scalar_t>
+inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ input, int64_t size) {
+  using Vec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+// no remainder
+#pragma GCC unroll 4
+  for (int64_t d = 0; d < size; d += Vec::size()) {
+    fVec x0 = fVec::loadu(input + d);
+    fVec x1 = fVec::loadu(input + d + fVec::size());
+    Vec res = convert_from_float_ext<scalar_t>(x0, x1);
+    res.store(out + d);
+  }
+}
+
+template <typename scalar_t>
+void tinygemm_kernel(
+    scalar_t* C,
+    float* C_temp,
+    const uint8_t* A,
+    const float* scales_a,
+    const int32_t* qzeros_a,
+    const uint8_t* B,
+    const float* scales_b,
+    const int8_t* qzeros_b,
+    const int32_t* compensation,
+    int8_t* dqB_tmp,
+    int64_t M,
+    int64_t K,
+    int64_t lda,
+    int64_t ldc_f,
+    int64_t ldc_s,
+    bool store_out,
+    bool use_brgemm) {
+  // TODO: add sym quant act, now only asym
+  _dequant_gemm_accum<BLOCK_N, BLOCK_N / 2, false>(
+      C_temp, A, scales_a, qzeros_a, B, scales_b, qzeros_b, compensation, dqB_tmp, M, K, lda, ldc_f, use_brgemm);
+  if (store_out) {
+    // copy from Ctmp to C
+    for (int64_t m = 0; m < M; ++m) {
+      copy_stub<scalar_t>(C + m * ldc_s, C_temp + m * ldc_f, BLOCK_N);
+    }
+  }
+}
+
+#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE) \
+  template void tinygemm_kernel<TYPE>(      \
+      TYPE * C,                             \
+      float* C_temp,                        \
+      const uint8_t* A,                     \
+      const float* scales_a,                \
+      const int32_t* qzeros_a,              \
+      const uint8_t* B,                     \
+      const float* scales_b,                \
+      const int8_t* qzeros_b,               \
+      const int32_t* compensation,          \
+      int8_t* dqB_tmp,                      \
+      int64_t M,                            \
+      int64_t K,                            \
+      int64_t lda,                          \
+      int64_t ldc_f,                        \
+      int64_t ldc_s,                        \
+      bool store_out,                       \
+      bool use_brgemm)
+
+INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16);
+INSTANTIATE_TINYGEMM_TEMPLATE(at::Half);
+
+// int4 gemm dispatch api register
+at::Tensor int4_scaled_mm_cpu(
+    at::Tensor& x, at::Tensor& w, at::Tensor& w_zeros, at::Tensor& w_scales, std::optional<at::Tensor> bias) {
+  return int4_scaled_mm_cpu_with_quant(x, w, w_scales, w_zeros, bias, x.scalar_type());
+}
diff --git a/csrc/cpu/sgl-kernels/gemm_int8.cpp b/csrc/cpu/sgl-kernels/gemm_int8.cpp
index 4d6560cceb1a..c9b79de80105 100644
--- a/csrc/cpu/sgl-kernels/gemm_int8.cpp
+++ b/csrc/cpu/sgl-kernels/gemm_int8.cpp
@@ -1,20 +1,83 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
+// clang-format off
+
 #include "common.h"
-#include "vec.h"
 #include "gemm.h"
-
-// clang-format off
+#include "vec.h"
 
 namespace {
 
+template <typename scalar_t, bool has_bias, int BLOCK_N>
+struct scale_C {
+  static inline void apply(
+      scalar_t* __restrict__ C,
+      const int32_t* __restrict__ Ctmp,
+      const int32_t* __restrict__ Bcomp,
+      const float* __restrict__ bias,
+      float As,
+      const float* __restrict__ Bs) {
+    TORCH_CHECK(false, "scale_C: scalar path not implemented!");
+  }
+};
+
+#if defined(CPU_CAPABILITY_AVX512)
+template <bool has_bias, int BLOCK_N>
+struct scale_C<at::BFloat16, has_bias, BLOCK_N> {
+  static inline void apply(
+      at::BFloat16* __restrict__ C,
+      const int32_t* __restrict__ Ctmp,
+      const int32_t* __restrict__ Bcomp,
+      const float* __restrict__ bias,
+      float As,
+      const float* __restrict__ Bs) {
+    constexpr int COLS = BLOCK_N / 16;
+    static_assert(COLS % 2 == 0);
+
+    __m512 vc[COLS];
+    __m512 vd0 = _mm512_set1_ps(As);
+
+    auto compute = [&](auto col) {
+      __m512 vd1 = _mm512_loadu_ps(Bs + col * 16);
+      __m512i vcomp = _mm512_loadu_si512(Bcomp + col * 16);
+      __m512i vc32 = _mm512_loadu_si512(Ctmp + col * 16);
+      vc[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32, vcomp));
+      if constexpr (has_bias) {
+        __m512 vbias = _mm512_loadu_ps(bias + col * 16);
+        vc[col] = _mm512_fmadd_ps(_mm512_mul_ps(vc[col], vd0), vd1, vbias);
+      } else {
+        vc[col] = _mm512_mul_ps(_mm512_mul_ps(vc[col], vd0), vd1);
+      }
+    };
+    Unroll<COLS>{}(compute);
+
+    auto storec = [&](auto col) {
+      // for COLS = 2, 4 use 512bit store
+      if constexpr (col % 2 == 0) {
+        _mm512_storeu_si512(
+            reinterpret_cast<__m512i*>((C + col * 16)), (__m512i)(_mm512_cvtne2ps_pbh(vc[col + 1], vc[col + 0])));
+      }
+    };
+    Unroll<COLS>{}(storec);
+  }
+};
+#endif
+
 template <typename scalar_t, bool has_bias, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B, scalar_t* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs, const int32_t* __restrict__ Bcomp,
-      const float* __restrict__ bias, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B,
+      scalar_t* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs,
+      const int32_t* __restrict__ Bcomp,
+      const float* __restrict__ bias,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -23,10 +86,17 @@ struct tinygemm_kernel_nn {
 template <bool has_bias, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B, at::BFloat16* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs, const int32_t* __restrict__ Bcomp,
-      const float* __restrict__ bias, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B,
+      at::BFloat16* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs,
+      const int32_t* __restrict__ Bcomp,
+      const float* __restrict__ bias,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
     static_assert(COLS % 2 == 0);
@@ -38,10 +108,10 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
     __m512i vb[COLS];
     __m512i vc[ROWS * COLS];
     __m512i vcomp[COLS];
-    __m512  vd0;
-    __m512  vd1[COLS];
+    __m512 vd0;
+    __m512 vd1[COLS];
 
-    // oops! 4x4 spills but luckily we use 4x2
+    // oops! 4x4 spills but we use 4x2
     __m512 vbias[COLS];
 
     // [NOTE]: s8s8 igemm compensation in avx512-vnni
@@ -54,14 +124,12 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
     // 1) 128 * b is pre-computed when packing B to vnni formats
     // 2) a + 128 is fused when dynamically quantize A
     //
-    auto loadc = [&](auto i) {
-      vc[i] = _mm512_set1_epi32(0);
-    };
+    auto loadc = [&](auto i) { vc[i] = _mm512_set1_epi32(0); };
     Unroll<ROWS * COLS>{}(loadc);
 
     const int64_t K4 = K >> 2;
     const int64_t lda4 = lda >> 2;
-    const int64_t ldb4 = ldb; // ldb * 4 >> 2;
+    const int64_t ldb4 = ldb;  // ldb * 4 >> 2;
     const int32_t* a_ptr = reinterpret_cast<const int32_t*>(A);
     const int32_t* b_ptr = reinterpret_cast<const int32_t*>(B);
 
@@ -89,7 +157,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
       constexpr int col = i % COLS;
 
       // load a scale
-      if constexpr(col == 0) {
+      if constexpr (col == 0) {
         vd0 = _mm512_set1_ps(As[row]);
       }
       // load b scale and vcomp per 2 vectors
@@ -120,8 +188,7 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
         }
 
         _mm512_storeu_si512(
-            reinterpret_cast<__m512i*>((C + row * ldc + col * 16)),
-            (__m512i)(_mm512_cvtne2ps_pbh(vc1, vc0)));
+            reinterpret_cast<__m512i*>((C + row * ldc + col * 16)), (__m512i)(_mm512_cvtne2ps_pbh(vc1, vc0)));
       }
     };
     Unroll<ROWS * COLS>{}(storec);
@@ -129,11 +196,19 @@ struct tinygemm_kernel_nn<at::BFloat16, has_bias, BLOCK_M, BLOCK_N> {
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                          \
-    tinygemm_kernel_nn<scalar_t, has_bias, MB_SIZE, NB_SIZE>::apply(         \
-        A + mb_start * lda, B + nb_start * 4, C + mb_start * ldc + nb_start, \
-        As + mb_start, Bs + nb_start, Bcomp + nb_start,                      \
-        has_bias ? bias + nb_start : nullptr, K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                \
+  tinygemm_kernel_nn<scalar_t, has_bias, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda,                                          \
+      B + nb_start * 4,                                            \
+      C + mb_start * ldc + nb_start,                               \
+      As + mb_start,                                               \
+      Bs + nb_start,                                               \
+      Bcomp + nb_start,                                            \
+      has_bias ? bias + nb_start : nullptr,                        \
+      K,                                                           \
+      lda,                                                         \
+      ldb,                                                         \
+      ldc);
 
 template <typename scalar_t, bool has_bias>
 void tinygemm_kernel(
@@ -151,10 +226,20 @@ void tinygemm_kernel(
     int64_t ldb,
     int64_t ldc,
     bool brg) {
-
   // B compensation
   const int32_t* Bcomp = reinterpret_cast<const int32_t*>(B + block_size_n() * K);
 
+  if (brg) {
+    constexpr int BLOCK_N = block_size_n();
+    at::native::cpublas::brgemm(M, N, K, lda, ldb, BLOCK_N, /* add_C */ false, A, B, Ctmp);
+
+    // apply compensation and scale
+    for (int64_t m = 0; m < M; ++m) {
+      scale_C<scalar_t, has_bias, BLOCK_N>::apply(C + m * ldc, Ctmp + m * BLOCK_N, Bcomp, bias, As[m], Bs);
+    }
+    return;
+  }
+
   // pattern: 1-4-16
   constexpr int64_t BLOCK_M = 4;
   constexpr int64_t BLOCK_N = 64;
@@ -167,26 +252,43 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
+      switch (mb_size << 4 | nb_size >> 4) {
         // mb_size = 1
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_NN(1, 32); break;
-        case 0x14: LAUNCH_TINYGEMM_KERNEL_NN(1, 64); break;
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 32);
+          break;
+        case 0x14:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 64);
+          break;
         // mb_size = 2
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_NN(2, 32); break;
-        case 0x24: LAUNCH_TINYGEMM_KERNEL_NN(2, 64); break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 32);
+          break;
+        case 0x24:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 64);
+          break;
         // mb_size = 3
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_NN(3, 32); break;
-        case 0x34: LAUNCH_TINYGEMM_KERNEL_NN(3, 64); break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 32);
+          break;
+        case 0x34:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 64);
+          break;
         // mb_size = 4
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_NN(4, 32); break;
-        case 0x44: LAUNCH_TINYGEMM_KERNEL_NN(4, 64); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 32);
+          break;
+        case 0x44:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 64);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
 }
 
-template<typename scalar_t>
+template <typename scalar_t>
 void int8_scaled_mm_kernel_impl(
     scalar_t* __restrict__ out,
     const uint8_t* __restrict__ mat1,
@@ -197,28 +299,22 @@ void int8_scaled_mm_kernel_impl(
     int64_t M,
     int64_t N,
     int64_t K) {
-
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
   const int64_t MB = div_up(M, BLOCK_M);
   const int64_t NB = div_up(N, BLOCK_N);
 
-  // TODO: brgemm u8s8 depends on PyTorch 2.7 release.
-  const bool use_brgemm = false;
+  const bool use_brgemm = can_use_brgemm<int8_t>(M);
 
   // K + 4 after compensation
   const int64_t packed_row_size = get_row_size<int8_t>(K);
 
   AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
-    at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
-      int64_t mb{0}, nb{0};
-      data_index_init(begin, mb, MB, nb, NB);
-
+    parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
       // for brgemm, use int32_t for accumulate
       alignas(64) int32_t Ctmp[BLOCK_M * BLOCK_N];
 
-      for (int i = begin; i < end; ++i) {
-        UNUSED(i);
+      loop_2d<int8_t>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
         int mb_start = mb * BLOCK_M;
         int mb_size = std::min(M - mb_start, BLOCK_M);
         int nb_start = nb * BLOCK_N;
@@ -239,10 +335,7 @@ void int8_scaled_mm_kernel_impl(
             /* ldb */ nb_size,
             /* ldc */ N,
             /* brg */ use_brgemm);
-
-        // move to the next index
-        data_index_step(mb, MB, nb, NB);
-      }
+      });
 
       if (use_brgemm) {
         at::native::cpublas::brgemm_release();
@@ -251,28 +344,47 @@ void int8_scaled_mm_kernel_impl(
   });
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 // tinygemm interface
 template <typename scalar_t>
-void tinygemm_kernel(const uint8_t* __restrict__ A, const int8_t* __restrict__ B, scalar_t* __restrict__ C,
-    int32_t* __restrict__ Ctmp,  const float* __restrict__ As, const float* __restrict__ Bs,
-    int64_t M, int64_t N, int64_t K, int64_t lda, int64_t ldb, int64_t ldc, bool brg) {
+void tinygemm_kernel(
+    const uint8_t* __restrict__ A,
+    const int8_t* __restrict__ B,
+    scalar_t* __restrict__ C,
+    int32_t* __restrict__ Ctmp,
+    const float* __restrict__ As,
+    const float* __restrict__ Bs,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t lda,
+    int64_t ldb,
+    int64_t ldc,
+    bool brg) {
   tinygemm_kernel<scalar_t, false>(A, B, C, Ctmp, As, Bs, nullptr, M, N, K, lda, ldb, ldc, brg);
 }
 
-#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE)                                                     \
-    template void tinygemm_kernel<TYPE>(                                                        \
-        const uint8_t* __restrict__ A, const int8_t* __restrict__ B, TYPE* __restrict__ C,      \
-        int32_t* __restrict__ Ctmp, const float* __restrict__ As, const float* __restrict__ Bs, \
-        int64_t M, int64_t N, int64_t K, int64_t lda, int64_t ldb, int64_t ldc, bool brg)
+#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE) \
+  template void tinygemm_kernel<TYPE>(      \
+      const uint8_t* __restrict__ A,        \
+      const int8_t* __restrict__ B,         \
+      TYPE* __restrict__ C,                 \
+      int32_t* __restrict__ Ctmp,           \
+      const float* __restrict__ As,         \
+      const float* __restrict__ Bs,         \
+      int64_t M,                            \
+      int64_t N,                            \
+      int64_t K,                            \
+      int64_t lda,                          \
+      int64_t ldb,                          \
+      int64_t ldc,                          \
+      bool brg)
 
 INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16);
 INSTANTIATE_TINYGEMM_TEMPLATE(at::Half);
 
 std::tuple<at::Tensor, at::Tensor> per_token_quant_int8_cpu(at::Tensor& A) {
-  RECORD_FUNCTION("sgl-kernel::per_token_quant_int8_cpu", std::vector<c10::IValue>({A}));
-
   CHECK_LAST_DIM_CONTIGUOUS_INPUT(A);
   CHECK_DIM(2, A);
 
@@ -281,8 +393,7 @@ std::tuple<at::Tensor, at::Tensor> per_token_quant_int8_cpu(at::Tensor& A) {
   int64_t lda = A.stride(0);
 
   const auto st = A.scalar_type();
-  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf,
-      "per_token_quant_int8: expect A to be bfloat16 or half.");
+  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf, "per_token_quant_int8: expect A to be bfloat16 or half.");
 
   auto Aq = at::empty({M, K}, A.options().dtype(at::kByte));
   auto As = at::empty({M}, A.options().dtype(at::kFloat));
@@ -292,13 +403,9 @@ std::tuple<at::Tensor, at::Tensor> per_token_quant_int8_cpu(at::Tensor& A) {
     float* __restrict__ As_data = As.data_ptr<float>();
     const scalar_t* __restrict__ A_data = A.data_ptr<scalar_t>();
 
-    at::parallel_for(0, M, 0, [&] (int64_t begin, int64_t end) {
+    at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
       for (int64_t m = begin; m < end; ++m) {
-        quantize_row_int8<scalar_t>(
-            Aq_data + m * K,
-            As_data[m],
-            A_data + m * lda,
-            K);
+        quantize_row_int8<scalar_t>(Aq_data + m * K, As_data[m], A_data + m * lda, K);
       }
     });
   });
@@ -315,11 +422,14 @@ std::tuple<at::Tensor, at::Tensor> per_token_quant_int8_cpu(at::Tensor& A) {
 // bias    : [N]
 // out     : [M, N]
 //
-at::Tensor int8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2,
-    at::Tensor& scales1, at::Tensor& scales2,
-    std::optional<at::Tensor>& bias, at::ScalarType out_dtype, bool is_vnni) {
-  RECORD_FUNCTION("sgl-kernel::int8_scaled_mm_cpu", std::vector<c10::IValue>({mat1, mat2, scales1, scales2, bias}));
-
+at::Tensor int8_scaled_mm_cpu(
+    at::Tensor& mat1,
+    at::Tensor& mat2,
+    at::Tensor& scales1,
+    at::Tensor& scales2,
+    const std::optional<at::Tensor>& bias,
+    at::ScalarType out_dtype,
+    bool is_vnni) {
   auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
 
   CHECK_INPUT(mat1);
@@ -340,7 +450,8 @@ at::Tensor int8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2,
 
   TORCH_CHECK(mat1.scalar_type() == at::kByte, "int8_scaled_mm: expect mat1 to be uint8.");
   TORCH_CHECK(mat2.scalar_type() == at::kChar, "int8_scaled_mm: expect mat2 to be int8.");
-  TORCH_CHECK(scales1.scalar_type() == at::kFloat && scales2.scalar_type() == at::kFloat,
+  TORCH_CHECK(
+      scales1.scalar_type() == at::kFloat && scales2.scalar_type() == at::kFloat,
       "int8_scaled_mm: expect scales to be float32.");
 
   auto out = at::empty({M, N}, mat1.options().dtype(out_dtype));
@@ -368,10 +479,13 @@ at::Tensor int8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2,
 }
 
 // fused `per_token_quant_int8_cpu` and `int8_scaled_mm_cpu`
-at::Tensor int8_scaled_mm_with_quant(at::Tensor& mat1, at::Tensor& mat2, at::Tensor& scales2,
-    const std::optional<at::Tensor>& bias, at::ScalarType out_dtype, bool is_vnni) {
-  RECORD_FUNCTION("sgl-kernel::int8_scaled_mm_cpu", std::vector<c10::IValue>({mat1, mat2, scales2, bias}));
-
+at::Tensor int8_scaled_mm_with_quant(
+    at::Tensor& mat1,
+    at::Tensor& mat2,
+    at::Tensor& scales2,
+    const std::optional<at::Tensor>& bias,
+    at::ScalarType out_dtype,
+    bool is_vnni) {
   auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);
 
   CHECK_LAST_DIM_CONTIGUOUS_INPUT(mat1);
@@ -390,14 +504,10 @@ at::Tensor int8_scaled_mm_with_quant(at::Tensor& mat1, at::Tensor& mat2, at::Ten
   CHECK_EQ(scales2.numel(), N);
 
   const auto st = mat1.scalar_type();
-  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf,
-      "int8_scaled_mm_with_quant: expect A to be bfloat16 or half.");
-  TORCH_CHECK(st == out_dtype,
-      "int8_scaled_mm_with_quant: expect A has same dtype with out_dtype.");
-  TORCH_CHECK(mat2.scalar_type() == at::kChar,
-      "int8_scaled_mm_with_quant: expect mat2 to be int8.");
-  TORCH_CHECK(scales2.scalar_type() == at::kFloat,
-      "int8_scaled_mm_with_quant: expect scales to be float32.");
+  TORCH_CHECK(st == at::kBFloat16 || st == at::kHalf, "int8_scaled_mm_with_quant: expect A to be bfloat16 or half.");
+  TORCH_CHECK(st == out_dtype, "int8_scaled_mm_with_quant: expect A has same dtype with out_dtype.");
+  TORCH_CHECK(mat2.scalar_type() == at::kChar, "int8_scaled_mm_with_quant: expect mat2 to be int8.");
+  TORCH_CHECK(scales2.scalar_type() == at::kFloat, "int8_scaled_mm_with_quant: expect scales to be float32.");
 
   const int64_t buffer_size = M * K + M * sizeof(float);
   auto buffer = at::empty({buffer_size}, mat1.options().dtype(at::kByte));
@@ -415,13 +525,9 @@ at::Tensor int8_scaled_mm_with_quant(at::Tensor& mat1, at::Tensor& mat2, at::Ten
     float* __restrict__ As_data = (float*)((void*)(Aq_data + M * K));
     const scalar_t* __restrict__ A_data = mat1.data_ptr<scalar_t>();
 
-    at::parallel_for(0, M, 0, [&] (int64_t begin, int64_t end) {
+    at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
       for (int64_t m = begin; m < end; ++m) {
-        quantize_row_int8<scalar_t>(
-            Aq_data + m * K,
-            As_data[m],
-            A_data + m * lda,
-            K);
+        quantize_row_int8<scalar_t>(Aq_data + m * K, As_data[m], A_data + m * lda, K);
       }
     });
 
diff --git a/csrc/cpu/sgl-kernels/moe.cpp b/csrc/cpu/sgl-kernels/moe.cpp
index c01bfd376d4f..06f9f7c37362 100644
--- a/csrc/cpu/sgl-kernels/moe.cpp
+++ b/csrc/cpu/sgl-kernels/moe.cpp
@@ -1,12 +1,13 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
+// clang-format off
+
+#include "moe.h"
+
 #include "common.h"
-#include "vec.h"
 #include "gemm.h"
 
-// clang-format off
-
 namespace {
 
 // [NOTE]: Fused MoE kernel with AMX
@@ -30,109 +31,6 @@ namespace {
 //     3. abstract at::native::cpublas::brgemm with WoQ gemm (M = 1 & M != 1)
 //
 
-template <typename scalar_t>
-inline void fill_stub(scalar_t* __restrict__ out, scalar_t val, int64_t size) {
-  using Vec = at::vec::Vectorized<scalar_t>;
-  const Vec data_vec(val);
-  at::vec::map<scalar_t>([data_vec](Vec out) { return out = data_vec; }, out, out, size);
-}
-
-template <typename scalar_t>
-inline void copy_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t size) {
-  using Vec = at::vec::Vectorized<scalar_t>;
-  // no remainder
-  #pragma GCC unroll 4
-  for (int64_t d = 0; d < size; d += Vec::size()) {
-    Vec data = Vec::loadu(input + d);
-    data.store(out + d);
-  }
-}
-
-template <typename scalar_t>
-inline void copy_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input, float weight, int64_t size) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec weight_vec = fVec(weight);
-  int64_t d;
-  #pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    fVec data0 = fVec::loadu(input + d) * weight_vec;
-    fVec data1 = fVec::loadu(input + d + fVec::size()) * weight_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(data0, data1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] * weight);
-  }
-}
-
-// acc from [topk, K] to [K]
-template <typename scalar_t>
-inline void sum_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t topk, int64_t K) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  if (topk == 1) {
-    // do copy for topk = 1
-    copy_stub(out, input, K);
-  } else {
-    // do sum for topk != 1
-    int64_t d;
-    #pragma GCC unroll 4
-    for (d = 0; d <= K - kVecSize; d += kVecSize) {
-      fVec sum_fvec0 = fVec(0.f);
-      fVec sum_fvec1 = fVec(0.f);
-      for (int t = 0; t < topk; ++t) {
-        bVec x_bvec = bVec::loadu(input + t * K + d);
-        fVec x_fvec0, x_fvec1;
-        std::tie(x_fvec0, x_fvec1) = at::vec::convert_to_float(x_bvec);
-
-        sum_fvec0 += x_fvec0;
-        sum_fvec1 += x_fvec1;
-      }
-      bVec out_bvec = convert_from_float_ext<scalar_t>(sum_fvec0, sum_fvec1);
-      out_bvec.store(out + d);
-    }
-    for (; d < K; ++d) {
-      float sum_val = 0.f;
-      for (int t = 0; t < topk; ++t) {
-        sum_val += static_cast<float>(input[t * K + d]);
-      }
-      out[d] = static_cast<scalar_t>(sum_val);
-    }
-  }
-}
-
-// out = input + input2 * scale
-template <typename scalar_t>
-inline void add_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input,
-    const scalar_t* __restrict__ input2, float scale, int64_t size) {
-
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec s_vec = fVec(scale);
-  int64_t d;
-  #pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    fVec x0 = fVec::loadu(input + d);
-    fVec x1 = fVec::loadu(input + d + fVec::size());
-
-    bVec y_bvec = bVec::loadu(input2 + d);
-    fVec y0, y1;
-    std::tie(y0, y1) = at::vec::convert_to_float(y_bvec);
-
-    x0 = x0 + y0 * s_vec;
-    x1 = x1 + y1 * s_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] + float(input2[d]) * scale);
-  }
-}
-
 template <int BLOCK_M>
 int moe_align_block_size(
     int32_t* __restrict__ sorted_ids,
@@ -144,8 +42,7 @@ int moe_align_block_size(
     int num_experts,
     int numel,
     int num_threads) {
-
-  #define T_INDEX(tt) total_cnts + (tt) * num_experts
+#define T_INDEX(tt) total_cnts + (tt) * num_experts
 
   // accumulate count of expert ids locally
   at::parallel_for(0, numel, 0, [&](int begin, int end) {
@@ -160,8 +57,7 @@ int moe_align_block_size(
   using iVec = at::vec::Vectorized<int32_t>;
   for (int t = 0; t < num_threads; ++t) {
     at::vec::map2<int32_t>(
-        [](iVec x, iVec y) { return x + y; },
-        T_INDEX(t + 1), T_INDEX(t + 1), T_INDEX(t), num_experts);
+        [](iVec x, iVec y) { return x + y; }, T_INDEX(t + 1), T_INDEX(t + 1), T_INDEX(t), num_experts);
   }
 
   // the last row holds sums of each experts
@@ -201,7 +97,9 @@ int moe_align_block_size(
   // padding value for sorted_ids: numel
   auto sorted_id_size = [=](const int32_t* sorted_ids_ptr) {
     for (int d = 0; d < BLOCK_M; ++d) {
-      if (sorted_ids_ptr[d] == numel) { return d; }
+      if (sorted_ids_ptr[d] == numel) {
+        return d;
+      }
     }
     return BLOCK_M;
   };
@@ -215,7 +113,7 @@ int moe_align_block_size(
       offsets[mb + 1] = sorted_id_size(sorted_ids + mb * BLOCK_M);
     }
   });
-  // TODO: do we need to vectorize this ?
+  // TODO: do we need to vecterize this ?
   for (int mb = 0; mb < num_token_blocks; ++mb) {
     offsets[mb + 1] += offsets[mb];
   }
@@ -236,7 +134,6 @@ inline void silu_and_mul(
     const float* __restrict__ input1,  // y: y0, y1
     int64_t m_size,
     int64_t N) {
-
   using bVec = at::vec::Vectorized<scalar_t>;
   using fVec = at::vec::Vectorized<float>;
 
@@ -269,8 +166,14 @@ inline void silu_and_mul(
 template <typename scalar_t, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn2 {
   static inline void apply(
-      const scalar_t* __restrict__ A, const scalar_t* __restrict__ B0, const scalar_t* __restrict__ B1,
-      scalar_t* __restrict__ C, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const scalar_t* __restrict__ A,
+      const scalar_t* __restrict__ B0,
+      const scalar_t* __restrict__ B1,
+      scalar_t* __restrict__ C,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -279,9 +182,14 @@ struct tinygemm_kernel_nn2 {
 template <int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn2<at::BFloat16, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const at::BFloat16* __restrict__ A, const at::BFloat16* __restrict__ B0, const at::BFloat16* __restrict__ B1,
-      at::BFloat16* __restrict__ C, int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const at::BFloat16* __restrict__ A,
+      const at::BFloat16* __restrict__ B0,
+      const at::BFloat16* __restrict__ B1,
+      at::BFloat16* __restrict__ C,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
 
@@ -304,7 +212,7 @@ struct tinygemm_kernel_nn2<at::BFloat16, BLOCK_M, BLOCK_N> {
 
     const int64_t K2 = K >> 1;
     const int64_t lda2 = lda >> 1;
-    const int64_t ldb2 = ldb; // ldb * 2 >> 1;
+    const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
     const float* a_ptr = reinterpret_cast<const float*>(A);
     const float* b0_ptr = reinterpret_cast<const float*>(B0);
     const float* b1_ptr = reinterpret_cast<const float*>(B1);
@@ -352,17 +260,16 @@ struct tinygemm_kernel_nn2<at::BFloat16, BLOCK_M, BLOCK_N> {
         _mm512_storeu_si512(
             reinterpret_cast<__m512i*>((C + row * ldc + col * 16)),
             (__m512i)(_mm512_cvtne2ps_pbh(__m512(x1), __m512(x0))));
-        }
+      }
     };
     Unroll<ROWS * COLS>{}(storec);
   }
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)                          \
-    tinygemm_kernel_nn2<scalar_t, MB_SIZE, NB_SIZE>::apply(                  \
-        A + mb_start * lda, B0 + nb_start * 2, B1 + nb_start * 2,            \
-        C + mb_start * ldc + nb_start, K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_NN(MB_SIZE, NB_SIZE)       \
+  tinygemm_kernel_nn2<scalar_t, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda, B0 + nb_start * 2, B1 + nb_start * 2, C + mb_start * ldc + nb_start, K, lda, ldb, ldc);
 
 template <typename scalar_t>
 void tinygemm_kernel(
@@ -376,7 +283,6 @@ void tinygemm_kernel(
     int64_t lda,
     int64_t ldb,
     int64_t ldc) {
-
   // pattern: 1-(2+2)-(8+8)
   constexpr int64_t BLOCK_M = 4;
   constexpr int64_t BLOCK_N = 32;
@@ -389,16 +295,25 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
+      switch (mb_size << 4 | nb_size >> 4) {
         // mb_size = 1
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_NN(1, 32); break;
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN(1, 32);
+          break;
         // mb_size = 2
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_NN(2, 32); break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN(2, 32);
+          break;
         // mb_size = 3
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_NN(3, 32); break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN(3, 32);
+          break;
         // mb_size = 4
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_NN(4, 32); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN(4, 32);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
@@ -407,8 +322,13 @@ void tinygemm_kernel(
 template <typename scalar_t, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn {
   static inline void apply(
-      const scalar_t* __restrict__ A, const scalar_t* __restrict__ B, float* __restrict__ C,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const scalar_t* __restrict__ A,
+      const scalar_t* __restrict__ B,
+      float* __restrict__ C,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -417,9 +337,13 @@ struct tinygemm_kernel_nn {
 template <int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_nn<at::BFloat16, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const at::BFloat16* __restrict__ A, const at::BFloat16* __restrict__ B, float* __restrict__ C,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const at::BFloat16* __restrict__ A,
+      const at::BFloat16* __restrict__ B,
+      float* __restrict__ C,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
 
@@ -432,14 +356,12 @@ struct tinygemm_kernel_nn<at::BFloat16, BLOCK_M, BLOCK_N> {
     __m512bh vb[COLS];
     __m512 vc[ROWS * COLS];
 
-    auto loadc = [&](auto i) {
-      vc[i] = _mm512_set1_ps(0.f);
-    };
+    auto loadc = [&](auto i) { vc[i] = _mm512_set1_ps(0.f); };
     Unroll<ROWS * COLS>{}(loadc);
 
     const int64_t K2 = K >> 1;
     const int64_t lda2 = lda >> 1;
-    const int64_t ldb2 = ldb; // ldb * 2 >> 1;
+    const int64_t ldb2 = ldb;  // ldb * 2 >> 1;
     const float* a_ptr = reinterpret_cast<const float*>(A);
     const float* b_ptr = reinterpret_cast<const float*>(B);
 
@@ -466,17 +388,15 @@ struct tinygemm_kernel_nn<at::BFloat16, BLOCK_M, BLOCK_N> {
       constexpr int row = i / COLS;
       constexpr int col = i % COLS;
       _mm512_storeu_ps(reinterpret_cast<__m512*>(C + row * ldc + col * 16), vc[i]);
-
     };
     Unroll<ROWS * COLS>{}(storec);
   }
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_NN2(MB_SIZE, NB_SIZE)                         \
-    tinygemm_kernel_nn<scalar_t, MB_SIZE, NB_SIZE>::apply(                   \
-        A + mb_start * lda, B + nb_start * 2, C + mb_start * ldc + nb_start, \
-        K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_NN2(MB_SIZE, NB_SIZE)     \
+  tinygemm_kernel_nn<scalar_t, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda, B + nb_start * 2, C + mb_start * ldc + nb_start, K, lda, ldb, ldc);
 
 template <typename scalar_t>
 void tinygemm_kernel(
@@ -489,7 +409,6 @@ void tinygemm_kernel(
     int64_t lda,
     int64_t ldb,
     int64_t ldc) {
-
   // pattern: 1-2-8
   constexpr int64_t BLOCK_M = 4;
   constexpr int64_t BLOCK_N = 32;
@@ -502,16 +421,25 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
+      switch (mb_size << 4 | nb_size >> 4) {
         // mb_size = 1
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_NN2(1, 32); break;
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_NN2(1, 32);
+          break;
         // mb_size = 2
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_NN2(2, 32); break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_NN2(2, 32);
+          break;
         // mb_size = 3
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_NN2(3, 32); break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_NN2(3, 32);
+          break;
         // mb_size = 4
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_NN2(4, 32); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_NN2(4, 32);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
@@ -537,7 +465,6 @@ void fused_experts_kernel_impl(
     int64_t E,
     int64_t topk,
     int64_t num_tokens_post_pad) {
-
   // handle 2 tiles per block
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
@@ -552,39 +479,36 @@ void fused_experts_kernel_impl(
   const int64_t stride_e = 2 * N * K;
   const int64_t stride_n = K;
 
+  int64_t avg_M = std::max(int64_t(1), M * topk / E);
+  const bool use_brgemm = can_use_brgemm<scalar_t>(avg_M);
+
   // here we only parallel on half of 2N to fuse silu_and_mul with gemm
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     scalar_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
     float* __restrict__ C0 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
     float* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
-
-      // nb0 from top half and nb1 from bottom half
-      int64_t nb0 = nb, nb1 = nb + NB;
-      int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+    loop_2d<scalar_t>(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+      // nb_upper from top half and nb_lower from bottom half
+      int64_t nb_upper = nb, nb_lower = nb + NB;
+      int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
 
       // B shape [K, n_size] in vnni format
       int32_t expert_id = expert_ids[mb];
-      const scalar_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb0 * BLOCK_N * stride_n;
-      const scalar_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb1 * BLOCK_N * stride_n;
+      const scalar_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb_upper * BLOCK_N * stride_n;
+      const scalar_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb_lower * BLOCK_N * stride_n;
 
-      // 1.a load A
-      const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
       int64_t m_size = offsets[mb + 1] - offsets[mb];
 
-      const bool use_brgemm = can_use_brgemm<scalar_t>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
-
-      for (int64_t m = 0; m < m_size; ++m) {
-        int32_t index = A_ids[m] / topk;
-        copy_stub(A + m * K, input + index * K, K);
+      if (nb_offset == 0) {
+        // 1.a load A
+        const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
+        for (int64_t m = 0; m < m_size; ++m) {
+          int32_t index = A_ids[m] / topk;
+          copy_stub(A + m * K, input + index * K, K);
+        }
       }
 
       if (use_brgemm) {
@@ -616,12 +540,7 @@ void fused_experts_kernel_impl(
 
         // 1.d silu and mul
         const int64_t offset = offsets[mb];
-        silu_and_mul<scalar_t, BLOCK_N>(
-            ic1 + offset * N + nb * BLOCK_N,
-            C0,
-            C1,
-            m_size,
-            N);
+        silu_and_mul<scalar_t, BLOCK_N>(ic1 + offset * N + nb * BLOCK_N, C0, C1, m_size, N);
       } else {
         // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
         const int64_t offset = offsets[mb];
@@ -637,9 +556,9 @@ void fused_experts_kernel_impl(
             /* ldb   */ n_size,
             /* ldc   */ N);
       }
-    }
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
@@ -654,24 +573,16 @@ void fused_experts_kernel_impl(
   const int64_t stride_oc = IC;
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     // we won't be using C1 for gemm2
     float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
-
+    loop_2d<scalar_t>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = offsets[mb + 1] - offsets[mb];
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
-      const bool use_brgemm = can_use_brgemm<scalar_t>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
-
       // A ptr from ic1 of [M * topk, N] in sorted order
       // so as to avoid copy A to tmp buffer again
       const scalar_t* __restrict__ A = ic1 + offsets[mb] * N;
@@ -714,9 +625,9 @@ void fused_experts_kernel_impl(
         float weight = topk_weights[index];
         copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
       }
-    }
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
@@ -743,7 +654,6 @@ void shared_expert_kernel_impl(
     int64_t M,
     int64_t N,
     int64_t K) {
-
   // handle 2 tiles per block
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
@@ -755,36 +665,29 @@ void shared_expert_kernel_impl(
   TORCH_CHECK(N % BLOCK_N == 0, "Fixme when N is not multiples of ", BLOCK_N);
   const int64_t stride_n = K;
 
+  const bool use_brgemm = can_use_brgemm<scalar_t>(M);
+
+  const bool apply_scaling_factor = fused_experts_out != nullptr;
+
   // here we only parallel on half of 2N to fuse silu_and_mul with gemm
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     float* __restrict__ C0 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
     float* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
-
-      // nb0 from top half and nb1 from bottom half
-      int64_t nb0 = nb, nb1 = nb + NB;
-      int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+    loop_2d<scalar_t>(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+      // nb_upper from top half and nb_lower from bottom half
+      int64_t nb_upper = nb, nb_lower = nb + NB;
+      int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
 
-      //int64_t mb_start = mb * BLOCK_M;
-      //int64_t mb_size = std::min(M - mb_start, BLOCK_M);
-
       // A shape [m_size, K]
       const scalar_t* A = input + mb * BLOCK_M * K;
 
       // B shape [K, n_size] in vnni format
-      const scalar_t* __restrict__ B0 = packed_w1 + nb0 * BLOCK_N * stride_n;
-      const scalar_t* __restrict__ B1 = packed_w1 + nb1 * BLOCK_N * stride_n;
-
-      const bool use_brgemm = can_use_brgemm<scalar_t>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
+      const scalar_t* __restrict__ B0 = packed_w1 + nb_upper * BLOCK_N * stride_n;
+      const scalar_t* __restrict__ B1 = packed_w1 + nb_lower * BLOCK_N * stride_n;
 
       if (use_brgemm) {
         // 1.b gemm: C0 = A @ B0
@@ -814,12 +717,7 @@ void shared_expert_kernel_impl(
             /* C     */ C1);
 
         // 1.d silu and mul
-        silu_and_mul<scalar_t, BLOCK_N>(
-            ic1 + mb * BLOCK_M * N + nb * BLOCK_N,
-            C0,
-            C1,
-            m_size,
-            N);
+        silu_and_mul<scalar_t, BLOCK_N>(ic1 + mb * BLOCK_M * N + nb * BLOCK_N, C0, C1, m_size, N);
       } else {
         // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
         tinygemm_kernel(
@@ -834,9 +732,9 @@ void shared_expert_kernel_impl(
             /* ldb   */ n_size,
             /* ldc   */ N);
       }
-    }
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
@@ -850,24 +748,16 @@ void shared_expert_kernel_impl(
   const int64_t stride_oc = IC;
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     // we won't be using C1 for gemm2
     float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
-
+    loop_2d<scalar_t>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
-      const bool use_brgemm = can_use_brgemm<scalar_t>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
-
       // A shape [m_size, IC]
       const scalar_t* __restrict__ A = ic1 + mb * BLOCK_M * N;
 
@@ -902,34 +792,33 @@ void shared_expert_kernel_impl(
 
       // 2.b copy from C to output and add fused_experts_out
       scalar_t* __restrict__ out = output + mb * BLOCK_M * K + nb * BLOCK_N;
-      const scalar_t* __restrict__ fused_out = fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N;
+      const scalar_t* __restrict__ fused_out =
+          apply_scaling_factor ? fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N : nullptr;
       for (int64_t m = 0; m < m_size; ++m) {
-        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
+        const scalar_t* __restrict__ fused_out_row = apply_scaling_factor ? (fused_out + m * K) : nullptr;
+        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out_row, routed_scaling_factor, n_size);
       }
-    }
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 // common checks
 static inline void check_moe_scales(
     bool use_int8_w8a8,
     bool use_fp8_w8a16,
+    bool use_mxfp4,
     const std::optional<at::Tensor>& w1_scale,
     const std::optional<at::Tensor>& w2_scale,
-    const std::optional<std::vector<int64_t>> block_size,
-    const std::optional<at::Tensor>& a1_scale,
-    const std::optional<at::Tensor>& a2_scale) {
+    const std::optional<std::vector<int64_t>> block_size) {
   if (use_int8_w8a8) {
     TORCH_CHECK(w1_scale.has_value(), "missing w1_scale for int8 w8a8.");
     TORCH_CHECK(w2_scale.has_value(), "missing w2_scale for int8 w8a8.");
-    TORCH_CHECK(!a1_scale.has_value(), "static quantization for activation not supported.");
-    TORCH_CHECK(!a2_scale.has_value(), "static quantization for activation not supported.");
   }
   if (use_fp8_w8a16) {
     TORCH_CHECK(w1_scale.has_value(), "missing w1_scale for fp8 w8a16.");
@@ -937,25 +826,32 @@ static inline void check_moe_scales(
     TORCH_CHECK(block_size.has_value(), "missing block_size for fp8 w8a16.");
     TORCH_CHECK(block_size.value().size() == 2, "expect block_size.size() to be 2.");
   }
+  if (use_mxfp4) {
+    TORCH_CHECK(w1_scale.has_value(), "missing w1_scale for mxfp4.");
+    TORCH_CHECK(w2_scale.has_value(), "missing w2_scale for mxfp4.");
+    TORCH_CHECK(w1_scale.value().scalar_type() == at::kByte, "expect w1_scale to be uint8.");
+    TORCH_CHECK(w2_scale.value().scalar_type() == at::kByte, "expect w2_scale to be uint8.");
+  }
 }
 
-#define CHECK_MOE_SCALES_FP8(DIM0, DIM1)                 \
-    auto w1s = w1_scale.value();                         \
-    auto w2s = w2_scale.value();                         \
-    auto block_size_val = block_size.value();            \
-    int64_t block_size_N = block_size_val[0];            \
-    int64_t block_size_K = block_size_val[1];            \
-    TORCH_CHECK(w1s.size(DIM0) == 2 * N / block_size_N); \
-    TORCH_CHECK(w1s.size(DIM1) == K / block_size_K);     \
-    TORCH_CHECK(w2s.size(DIM0) == K / block_size_N);     \
-    TORCH_CHECK(w2s.size(DIM1) == N / block_size_K)
+#define CHECK_MOE_SCALES_FP8(DIM0, DIM1)                      \
+  auto w1s = w1_scale.value();                                \
+  auto w2s = w2_scale.value();                                \
+  auto block_size_val = block_size.value();                   \
+  int64_t block_size_N = block_size_val[0];                   \
+  int64_t block_size_K = block_size_val[1];                   \
+  TORCH_CHECK(w1s.size(DIM0) == div_up(2 * N, block_size_N)); \
+  TORCH_CHECK(w1s.size(DIM1) == div_up(K, block_size_K));     \
+  TORCH_CHECK(w2s.size(DIM0) == div_up(K, block_size_N));     \
+  TORCH_CHECK(w2s.size(DIM1) == div_up(N, block_size_K))
 
 // hidden_states: [M, K]
-// w1: [E, 2N, K]
-// w2: [E, K, N]
+// w1: [E, 2N, K] or [E, 2N, K / 2] for uint8
+// w2: [E, K, N] or [E, K, N / 2] for uint8
 // topk_weights: [M, topk]
 // topk_ids: [M, topk] (int32_t)
 //
+
 at::Tensor fused_experts_cpu(
     at::Tensor& hidden_states,
     at::Tensor& w1,
@@ -963,16 +859,17 @@ at::Tensor fused_experts_cpu(
     at::Tensor& topk_weights,
     at::Tensor& topk_ids,
     bool inplace,
-    bool use_int8_w8a8,
-    bool use_fp8_w8a16,
+    int64_t moe_comp_method,
     const std::optional<at::Tensor>& w1_scale,
     const std::optional<at::Tensor>& w2_scale,
+    const std::optional<at::Tensor>& w1_zero,
+    const std::optional<at::Tensor>& w2_zero,
     const std::optional<std::vector<int64_t>> block_size,
-    const std::optional<at::Tensor>& a1_scale,
-    const std::optional<at::Tensor>& a2_scale,
+    const std::optional<at::Tensor>& w1_bias,
+    const std::optional<at::Tensor>& w2_bias,
+    const std::optional<double>& alpha,
+    const std::optional<double>& limit,
     bool is_vnni) {
-  RECORD_FUNCTION("sgl-kernel::fused_experts_cpu", std::vector<c10::IValue>({hidden_states, w1, w2, topk_weights, topk_ids}));
-
   auto packed_w1 = is_vnni ? w1 : convert_weight_packed(w1);
   auto packed_w2 = is_vnni ? w2 : convert_weight_packed(w2);
 
@@ -985,32 +882,54 @@ at::Tensor fused_experts_cpu(
   CHECK_INPUT(w2);
   CHECK_EQ(topk_weights.sizes(), topk_ids.sizes());
   CHECK_DIM(2, hidden_states);
-  CHECK_DIM(3, w1);
-  CHECK_DIM(3, w2);
+  if (moe_comp_method == CPUQuantMethod::INT4_W4A8 && is_vnni) {
+    CHECK_DIM(4, w1);
+    CHECK_DIM(4, w2);
+  } else {
+    CHECK_DIM(3, w1);
+    CHECK_DIM(3, w2);
+  }
   CHECK_DIM(2, topk_weights);
   CHECK_DIM(2, topk_ids);
 
   CHECK_EQ(topk_ids.scalar_type(), at::kInt);
-  CHECK_EQ(topk_weights.scalar_type(), at::kFloat);
+
+  // TODO: support topk_weights to be bf16 or fp16 in the kernel.
+  // The topk_weights of llama4 is computed via Llama4MoE:custom_routing_function and is bf16/fp16
+  // while the kernel currently only supports it to be float32
+  auto topk_weights_ = topk_weights.to(at::kFloat);
+  CHECK_EQ(topk_weights_.scalar_type(), at::kFloat);
 
   int64_t M = hidden_states.size(0);
   int64_t K = hidden_states.size(1);
-  int64_t N = w1.size(1) / 2;
+  int64_t N = moe_comp_method == CPUQuantMethod::INT4_W4A8 ? w1_scale.value().size(1) * w1_scale.value().size(3) / 2
+                                                           : w1.size(1) / 2;
   int64_t E = w1.size(0);
-  int64_t topk = topk_weights.size(1);
+  int64_t topk = topk_weights_.size(1);
 
   // we use int32_t compensation for int8 w8a8
-  int64_t packed_K = get_row_size(K, use_int8_w8a8);
-  int64_t packed_N = get_row_size(N, use_int8_w8a8);
+  int64_t packed_K = moe_comp_method == CPUQuantMethod::MXFP4
+                         ? get_row_size<uint8_t>(K)
+                         : get_row_size(K, moe_comp_method == CPUQuantMethod::INT8_W8A8);
+  int64_t packed_N = moe_comp_method == CPUQuantMethod::MXFP4
+                         ? get_row_size<uint8_t>(N)
+                         : get_row_size(N, moe_comp_method == CPUQuantMethod::INT8_W8A8);
 
   // check weight shapes
   CHECK_EQ(w2.size(0), E);
-  CHECK_EQ(w2.size(1), K);
-  CHECK_EQ(packed_w1.size(2), packed_K);
-  CHECK_EQ(packed_w2.size(2), packed_N);
-
+  if (!(moe_comp_method == CPUQuantMethod::INT4_W4A8)) {
+    CHECK_EQ(w2.size(1), K);
+    CHECK_EQ(packed_w1.size(2), packed_K / (moe_comp_method == CPUQuantMethod::INT4_W4A8 ? 2 : 1));
+    CHECK_EQ(packed_w2.size(2), packed_N / (moe_comp_method == CPUQuantMethod::INT4_W4A8 ? 2 : 1));
+  }
   // check scales
-  check_moe_scales(use_int8_w8a8, use_fp8_w8a16, w1_scale, w2_scale, block_size, a1_scale, a2_scale);
+  check_moe_scales(
+      moe_comp_method == CPUQuantMethod::INT8_W8A8,
+      moe_comp_method == CPUQuantMethod::FP8_W8A16,
+      moe_comp_method == CPUQuantMethod::MXFP4,
+      w1_scale,
+      w2_scale,
+      block_size);
 
   at::Tensor out_hidden_states = inplace ? hidden_states : at::empty_like(hidden_states);
 
@@ -1031,8 +950,8 @@ at::Tensor fused_experts_cpu(
   int32_t* __restrict__ sorted_ids = buffer.data_ptr<int32_t>();
   int32_t* __restrict__ expert_ids = sorted_ids + max_num_tokens_padded;
   int32_t* __restrict__ total_cnts = expert_ids + max_num_blocks;
-  int32_t* __restrict__ cumsums    = total_cnts + (num_threads + 1) * E;
-  int32_t* __restrict__ offsets    = cumsums    + (E + 1);
+  int32_t* __restrict__ cumsums = total_cnts + (num_threads + 1) * E;
+  int32_t* __restrict__ offsets = cumsums + (E + 1);
 
   // init sorted_ids with `numel` as the padding number
   // init expert_ids with `num_experts`
@@ -1062,28 +981,33 @@ at::Tensor fused_experts_cpu(
   //   5. Aq_tmp : [M, K] or [M * topk, N]
   //   6. As_tmp : [M * topk]
   //
-  // for fp8 w8a16:
+  // for fp8 w8a16 and mxfp4:
   //   7. intermediate_cache0 : [M * topk, 2N]
-  //   8. B_tmp : [T, BLOCK_N, std::max(K, N)]
+  //   8. B_tmp : [T, MAX_CACHE_BLOCK_SIZE, BLOCK_N, std::max(K, N)]
   //
-  int64_t buffer_size_nbytes = M * topk * N * 2 + M * topk * K * 2 +
-      num_threads * BLOCK_M * K * (use_int8_w8a8 ? 1 : 2) +
+  int64_t buffer_size_nbytes =
+      M * topk * N * 2 + M * topk * K * 2 +
+      num_threads * BLOCK_M * K *
+          (moe_comp_method == CPUQuantMethod::INT8_W8A8 | moe_comp_method == CPUQuantMethod::INT4_W4A8 ? 1 : 2) +
       num_threads * 2 * BLOCK_M * BLOCK_N * sizeof(float);
 
-  if (use_int8_w8a8) {
+  if (moe_comp_method == CPUQuantMethod::INT8_W8A8) {
     buffer_size_nbytes += std::max(M * K, M * topk * N) + M * topk * sizeof(float);
   }
-  if (use_fp8_w8a16) {
-    buffer_size_nbytes += M * topk * 2 * N * 2 + num_threads * BLOCK_N * std::max(K, N) * 2;
+  if (moe_comp_method == CPUQuantMethod::FP8_W8A16 || moe_comp_method == CPUQuantMethod::MXFP4) {
+    buffer_size_nbytes += M * topk * 2 * N * 2 + num_threads * MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N) * 2;
+  }
+  if (moe_comp_method == CPUQuantMethod::INT4_W4A8) {
+    buffer_size_nbytes += M * topk * 2 * N * 2 + std::max(M * K, M * topk * N) + M * topk * sizeof(float) +
+                          num_threads * 2 * get_4bit_block_k_size(K / w1_scale.value().size(2)) * BLOCK_N;
   }
-
   auto buffer2 = at::empty({buffer_size_nbytes}, hidden_states.options().dtype(at::kChar));
 
   AT_DISPATCH_REDUCED_FLOATING_TYPES(st, "fused_experts_kernel_impl", [&] {
     scalar_t* __restrict__ intermediate_cache1 = (scalar_t*)((void*)(buffer2.data_ptr<int8_t>()));
     scalar_t* __restrict__ intermediate_cache2 = intermediate_cache1 + M * topk * N;
 
-    if (use_int8_w8a8) {
+    if (moe_comp_method == CPUQuantMethod::INT8_W8A8) {
       uint8_t* __restrict__ A_tmp = (uint8_t*)((void*)(intermediate_cache2 + M * topk * K));
       float* __restrict__ C_tmp = (float*)((void*)(A_tmp + num_threads * BLOCK_M * K));
       uint8_t* __restrict__ Aq_tmp = (uint8_t*)((void*)(C_tmp + num_threads * 2 * BLOCK_M * BLOCK_N));
@@ -1107,7 +1031,7 @@ at::Tensor fused_experts_cpu(
           packed_w2.data_ptr<int8_t>(),
           w1s.data_ptr<float>(),
           w2s.data_ptr<float>(),
-          topk_weights.data_ptr<float>(),
+          topk_weights_.data_ptr<float>(),
           sorted_ids,
           expert_ids,
           offsets,
@@ -1117,15 +1041,17 @@ at::Tensor fused_experts_cpu(
           E,
           topk,
           num_tokens_post_pad);
-    } else if (use_fp8_w8a16) {
+    } else if (moe_comp_method == CPUQuantMethod::FP8_W8A16) {
       // here we just ignore C_tmp as it is not used
       scalar_t* __restrict__ A_tmp = (scalar_t*)((void*)(intermediate_cache2 + M * topk * K));
       float* __restrict__ C_tmp = (float*)((void*)(A_tmp + num_threads * BLOCK_M * K));
       scalar_t* __restrict__ intermediate_cache0 = (scalar_t*)((void*)(C_tmp + num_threads * 2 * BLOCK_M * BLOCK_N));
       scalar_t* __restrict__ B_tmp = (scalar_t*)((void*)(intermediate_cache0 + M * topk * 2 * N));
+      bool with_bias = w1_bias.has_value();
+      auto act_func = alpha.has_value() && limit.has_value() ? CPUAcTMethod::swiglu : CPUAcTMethod::silu_and_mul;
 
       CHECK_MOE_SCALES_FP8(1, 2);
-      fused_experts_fp8_kernel_impl(
+      fused_experts_fp_kernel_impl<scalar_t, at::Float8_e4m3fn, float, false>(
           out_hidden_states.data_ptr<scalar_t>(),
           intermediate_cache0,
           intermediate_cache1,
@@ -1136,10 +1062,103 @@ at::Tensor fused_experts_cpu(
           hidden_states.data_ptr<scalar_t>(),
           packed_w1.data_ptr<at::Float8_e4m3fn>(),
           packed_w2.data_ptr<at::Float8_e4m3fn>(),
+          with_bias ? w1_bias.value().data_ptr<float>() : nullptr,
+          with_bias ? w2_bias.value().data_ptr<float>() : nullptr,
           w1s.data_ptr<float>(),
           w2s.data_ptr<float>(),
           block_size_N,
           block_size_K,
+          topk_weights_.data_ptr<float>(),
+          sorted_ids,
+          expert_ids,
+          offsets,
+          M,
+          N,
+          K,
+          E,
+          topk,
+          num_tokens_post_pad,
+          alpha.has_value() ? float(alpha.value()) : 0,
+          limit.has_value() ? float(limit.value()) : 0,
+          act_func,
+          with_bias);
+    } else if (moe_comp_method == CPUQuantMethod::MXFP4) {
+      scalar_t* __restrict__ A_tmp = (scalar_t*)((void*)(intermediate_cache2 + M * topk * K));
+      float* __restrict__ C_tmp = (float*)((void*)(A_tmp + num_threads * BLOCK_M * K));
+      scalar_t* __restrict__ intermediate_cache0 = (scalar_t*)((void*)(C_tmp + num_threads * 2 * BLOCK_M * BLOCK_N));
+      scalar_t* __restrict__ B_tmp = (scalar_t*)((void*)(intermediate_cache0 + M * topk * 2 * N));
+      bool with_bias = w1_bias.has_value();
+      auto act_func = alpha.has_value() && limit.has_value() ? CPUAcTMethod::swiglu : CPUAcTMethod::silu_and_mul;
+
+      // mxfp4 supports only group size of 32 (2^5)
+      constexpr int64_t group_size = 32;
+      auto w1s = w1_scale.value();
+      auto w2s = w2_scale.value();
+      TORCH_CHECK(w1s.numel() == E * 2 * N * K / group_size, "w1_scale size mismatch");
+      TORCH_CHECK(w2s.numel() == E * K * N / group_size, "w2_scale size mismatch");
+      fused_experts_fp_kernel_impl<scalar_t, uint8_t, uint8_t, true>(
+          out_hidden_states.data_ptr<scalar_t>(),
+          intermediate_cache0,
+          intermediate_cache1,
+          intermediate_cache2,
+          A_tmp,
+          B_tmp,
+          C_tmp,
+          hidden_states.data_ptr<scalar_t>(),
+          packed_w1.data_ptr<uint8_t>(),
+          packed_w2.data_ptr<uint8_t>(),
+          with_bias ? w1_bias.value().data_ptr<float>() : nullptr,
+          with_bias ? w2_bias.value().data_ptr<float>() : nullptr,
+          w1s.data_ptr<uint8_t>(),
+          w2s.data_ptr<uint8_t>(),
+          /*block_size_N*/ 1,
+          /*block_size_K*/ group_size,
+          topk_weights_.data_ptr<float>(),
+          sorted_ids,
+          expert_ids,
+          offsets,
+          M,
+          N,
+          K,
+          E,
+          topk,
+          num_tokens_post_pad,
+          alpha.has_value() ? float(alpha.value()) : 0,
+          limit.has_value() ? float(limit.value()) : 0,
+          act_func,
+          with_bias);
+    } else if (moe_comp_method == CPUQuantMethod::INT4_W4A8) {
+      uint8_t* __restrict__ A_tmp = (uint8_t*)((void*)(intermediate_cache2 + M * topk * K));
+      float* __restrict__ C_tmp = (float*)((void*)(A_tmp + num_threads * BLOCK_M * K));
+      scalar_t* __restrict__ intermediate_cache0 = (scalar_t*)((void*)(C_tmp + num_threads * 2 * BLOCK_M * BLOCK_N));
+      uint8_t* __restrict__ Aq_tmp = (uint8_t*)((void*)(intermediate_cache0 + M * topk * 2 * N));
+      float* __restrict__ As_tmp = (float*)((void*)(Aq_tmp + std::max(M * K, M * topk * N)));
+      int8_t* __restrict__ dqB_tmp = (int8_t*)((void*)(As_tmp + M * topk));
+
+      // weight + compensation shape = [Nc, Kc, block_n * block_k / 2 + block_n*sizeof(int32_t)]
+      // scales/qzeros shape = [E, Nc, G, block_n]
+      int64_t num_groups = w1_scale.value().size(2);
+      const int group_size = K / num_groups;
+      // TODO: check scales and zeros
+      fused_experts_int4_w4a8_kernel_impl<scalar_t>(
+          out_hidden_states.data_ptr<scalar_t>(),
+          intermediate_cache0,
+          intermediate_cache1,
+          intermediate_cache2,
+          A_tmp,
+          Aq_tmp,
+          As_tmp,
+          nullptr,
+          C_tmp,
+          dqB_tmp,
+          hidden_states.data_ptr<scalar_t>(),
+          packed_w1.data_ptr<uint8_t>(),
+          packed_w2.data_ptr<uint8_t>(),
+          w1_zero.value().data_ptr<int8_t>(),
+          w2_zero.value().data_ptr<int8_t>(),
+          w1_scale.value().data_ptr<float>(),
+          w2_scale.value().data_ptr<float>(),
+          group_size,
           topk_weights.data_ptr<float>(),
           sorted_ids,
           expert_ids,
@@ -1163,7 +1182,7 @@ at::Tensor fused_experts_cpu(
           hidden_states.data_ptr<scalar_t>(),
           packed_w1.data_ptr<scalar_t>(),
           packed_w2.data_ptr<scalar_t>(),
-          topk_weights.data_ptr<float>(),
+          topk_weights_.data_ptr<float>(),
           sorted_ids,
           expert_ids,
           offsets,
@@ -1188,34 +1207,37 @@ at::Tensor shared_expert_cpu(
     at::Tensor& hidden_states,
     at::Tensor& w1,
     at::Tensor& w2,
-    at::Tensor& fused_experts_out,
-    double routed_scaling_factor,
+    const std::optional<at::Tensor>& fused_experts_out,
+    const std::optional<double> routed_scaling_factor,
     bool inplace,
     bool use_int8_w8a8,
     bool use_fp8_w8a16,
-    std::optional<at::Tensor>& w1_scale,
-    std::optional<at::Tensor>& w2_scale,
-    std::optional<std::vector<int64_t>> block_size,
-    std::optional<at::Tensor>& a1_scale,
-    std::optional<at::Tensor>& a2_scale,
+    const std::optional<at::Tensor>& w1_scale,
+    const std::optional<at::Tensor>& w2_scale,
+    const std::optional<std::vector<int64_t>> block_size,
     bool is_vnni) {
-  RECORD_FUNCTION("sgl-kernel::shared_expert_cpu", std::vector<c10::IValue>({hidden_states, w1, w2}));
-
   auto packed_w1 = is_vnni ? w1 : convert_weight_packed(w1);
   auto packed_w2 = is_vnni ? w2 : convert_weight_packed(w2);
 
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
 
+  double routed_scaling_factor_value = 0;
+  if (routed_scaling_factor.has_value()) {
+    TORCH_CHECK(fused_experts_out.has_value(), "shared_expert_cpu: expect fused_experts_out.");
+    const auto fused_experts_out_tensor = fused_experts_out.value();
+    routed_scaling_factor_value = routed_scaling_factor.value();
+    CHECK_INPUT(fused_experts_out_tensor);
+    CHECK_EQ(hidden_states.sizes(), fused_experts_out_tensor.sizes());
+  }
+
   const auto st = hidden_states.scalar_type();
   CHECK_INPUT(hidden_states);
-  CHECK_INPUT(fused_experts_out);
   CHECK_INPUT(w1);
   CHECK_INPUT(w2);
   CHECK_DIM(2, hidden_states);
   CHECK_DIM(2, w1);
   CHECK_DIM(2, w2);
-  CHECK_EQ(hidden_states.sizes(), fused_experts_out.sizes());
   CHECK_EQ(hidden_states.scalar_type(), st);
 
   int64_t M = hidden_states.size(0);
@@ -1232,7 +1254,7 @@ at::Tensor shared_expert_cpu(
   CHECK_EQ(packed_w2.size(1), packed_N);
 
   // check scales
-  check_moe_scales(use_int8_w8a8, use_fp8_w8a16, w1_scale, w2_scale, block_size, a1_scale, a2_scale);
+  check_moe_scales(use_int8_w8a8, use_fp8_w8a16, false, w1_scale, w2_scale, block_size);
 
   at::Tensor out_hidden_states = inplace ? hidden_states : at::empty_like(hidden_states);
 
@@ -1246,7 +1268,7 @@ at::Tensor shared_expert_cpu(
   //
   // for fp8 w8a16:
   //   5. intermediate_cache0 : [M, 2N]
-  //   6. B_tmp: [T, BLOCK_M, max(K, N)]
+  //   6. B_tmp: [T, MAX_CACHE_BLOCK_SIZE, BLOCK_M, max(K, N)]
   //
   int num_threads = at::get_num_threads();
   int64_t buffer_size_nbytes = M * N * 2 + num_threads * 2 * BLOCK_M * BLOCK_N * sizeof(float);
@@ -1255,7 +1277,7 @@ at::Tensor shared_expert_cpu(
     buffer_size_nbytes += std::max(M * K, M * N) + M * sizeof(float);
   }
   if (use_fp8_w8a16) {
-    buffer_size_nbytes += M * 2 * N * 2 + num_threads * BLOCK_M * std::max(K, N) * 2;
+    buffer_size_nbytes += M * 2 * N * 2 + num_threads * MAX_CACHE_BLOCK_SIZE * BLOCK_M * std::max(K, N) * 2;
   }
 
   auto buffer = at::empty({buffer_size_nbytes}, hidden_states.options().dtype(at::kChar));
@@ -1283,8 +1305,8 @@ at::Tensor shared_expert_cpu(
           packed_w2.data_ptr<int8_t>(),
           w1s.data_ptr<float>(),
           w2s.data_ptr<float>(),
-          fused_experts_out.data_ptr<scalar_t>(),
-          routed_scaling_factor,
+          conditional_data_ptr<scalar_t>(fused_experts_out),
+          routed_scaling_factor_value,
           M,
           N,
           K);
@@ -1306,8 +1328,8 @@ at::Tensor shared_expert_cpu(
           w2s.data_ptr<float>(),
           block_size_N,
           block_size_K,
-          fused_experts_out.data_ptr<scalar_t>(),
-          routed_scaling_factor,
+          conditional_data_ptr<scalar_t>(fused_experts_out),
+          routed_scaling_factor_value,
           M,
           N,
           K);
@@ -1319,8 +1341,8 @@ at::Tensor shared_expert_cpu(
           hidden_states.data_ptr<scalar_t>(),
           packed_w1.data_ptr<scalar_t>(),
           packed_w2.data_ptr<scalar_t>(),
-          fused_experts_out.data_ptr<scalar_t>(),
-          routed_scaling_factor,
+          conditional_data_ptr<scalar_t>(fused_experts_out),
+          routed_scaling_factor_value,
           M,
           N,
           K);
diff --git a/csrc/cpu/sgl-kernels/moe.h b/csrc/cpu/sgl-kernels/moe.h
new file mode 100644
index 000000000000..b6d2e9e7f6db
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/moe.h
@@ -0,0 +1,284 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+#pragma once
+#include "vec.h"
+
+template <typename scalar_t>
+inline void fill_stub(scalar_t* __restrict__ out, scalar_t val, int64_t size) {
+  using Vec = at::vec::Vectorized<scalar_t>;
+  const Vec data_vec(val);
+  at::vec::map<scalar_t>([data_vec](Vec out) { return out = data_vec; }, out, out, size);
+}
+
+template <typename scalar_t>
+inline void copy_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t size) {
+  using Vec = at::vec::Vectorized<scalar_t>;
+  constexpr int kVecSize = Vec::size();
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    Vec data = Vec::loadu(input + d);
+    data.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = input[d];
+  }
+}
+
+template <typename scalar_t>
+inline void copy_stub(scalar_t* __restrict__ out, const float* __restrict__ input, int64_t size) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    auto [x0, x1] = load_float_vec2(input + d);
+    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d]);
+  }
+}
+
+template <>
+inline void copy_stub<uint8_t>(uint8_t* __restrict__ out, const uint8_t* __restrict__ input, int64_t size) {
+  // size might be 64x + 32
+  std::memcpy(out, input, size * sizeof(uint8_t));
+}
+
+template <typename scalar_t, typename input_t>
+inline void copy_mul_stub(scalar_t* __restrict__ out, const input_t* __restrict__ input, float weight, int64_t size) {
+  static_assert(
+      std::is_same_v<input_t, float> || std::is_same_v<input_t, scalar_t>,
+      "copy_mul_stub only supports input_t == float or input_t == scalar_t");
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  const fVec weight_vec = fVec(weight);
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    auto [x0, x1] = load_float_vec2(input + d);
+    x0 = x0 * weight_vec;
+    x1 = x1 * weight_vec;
+    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d] * weight);
+  }
+}
+
+// acc from [topk, K] to [K]
+template <typename scalar_t>
+inline void sum_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t topk, int64_t K) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  if (topk == 1) {
+    // do copy for topk = 1
+    copy_stub(out, input, K);
+  } else {
+    // do sum for topk != 1
+    int64_t d;
+#pragma GCC unroll 4
+    for (d = 0; d <= K - kVecSize; d += kVecSize) {
+      fVec sum_fvec0 = fVec(0.f);
+      fVec sum_fvec1 = fVec(0.f);
+      for (int t = 0; t < topk; ++t) {
+        bVec x_bvec = bVec::loadu(input + t * K + d);
+        fVec x_fvec0, x_fvec1;
+        std::tie(x_fvec0, x_fvec1) = at::vec::convert_to_float(x_bvec);
+
+        sum_fvec0 += x_fvec0;
+        sum_fvec1 += x_fvec1;
+      }
+      bVec out_bvec = convert_from_float_ext<scalar_t>(sum_fvec0, sum_fvec1);
+      out_bvec.store(out + d);
+    }
+    for (; d < K; ++d) {
+      float sum_val = 0.f;
+      for (int t = 0; t < topk; ++t) {
+        sum_val += static_cast<float>(input[t * K + d]);
+      }
+      out[d] = static_cast<scalar_t>(sum_val);
+    }
+  }
+}
+
+// out = input + input2 * scale
+template <typename scalar_t, typename input_t>
+inline void add_mul_stub(
+    scalar_t* __restrict__ out,
+    const input_t* __restrict__ input,
+    const scalar_t* __restrict__ input2,
+    float scale,
+    int64_t size) {
+  static_assert(
+      std::is_same_v<input_t, float> || std::is_same_v<input_t, scalar_t>,
+      "add_mul_stub only supports input_t == float or input_t == scalar_t");
+
+  // out = input (without scale factor)
+  if (input2 == nullptr) {
+    copy_stub(out, input, size);
+    return;
+  }
+
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  const fVec s_vec = fVec(scale);
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    auto [x0, x1] = load_float_vec2(input + d);
+
+    bVec y_bvec = bVec::loadu(input2 + d);
+    fVec y0, y1;
+    std::tie(y0, y1) = at::vec::convert_to_float(y_bvec);
+
+    x0 = x0 + y0 * s_vec;
+    x1 = x1 + y1 * s_vec;
+    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d] + float(input2[d]) * scale);
+  }
+}
+
+template <typename scalar_t>
+inline void silu_and_mul_stub(
+    scalar_t* __restrict__ out, const scalar_t* __restrict__ input, const scalar_t* __restrict__ input2, int64_t size) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  const fVec one = fVec(1.f);
+
+  // no remainder
+#pragma GCC unroll 4
+  for (int64_t d = 0; d < size; d += bVec::size()) {
+    bVec x = bVec::loadu(input + d);
+    fVec x0, x1;
+    std::tie(x0, x1) = at::vec::convert_to_float(x);
+    bVec y = bVec::loadu(input2 + d);
+    fVec y0, y1;
+    std::tie(y0, y1) = at::vec::convert_to_float(y);
+    x0 = x0 / (one + x0.neg().exp_u20());
+    x1 = x1 / (one + x1.neg().exp_u20());
+    x0 = x0 * y0;
+    x1 = x1 * y1;
+    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+    out_vec.store(out + d);
+  }
+}
+
+template <typename scalar_t>
+inline void copy_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input, float weight, int64_t size) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  const fVec weight_vec = fVec(weight);
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    fVec data0 = fVec::loadu(input + d) * weight_vec;
+    fVec data1 = fVec::loadu(input + d + fVec::size()) * weight_vec;
+    bVec out_vec = convert_from_float_ext<scalar_t>(data0, data1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d] * weight);
+  }
+}
+
+// input = input + input2
+inline void add_bias_stub(float* __restrict__ input, const float* __restrict__ input2, int64_t size) {
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = fVec::size();
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    fVec x_fvec = fVec::loadu(input + d);
+    fVec y_fvec = fVec::loadu(input2 + d);
+    x_fvec = x_fvec + y_fvec;
+    x_fvec.store(input + d);
+  }
+  for (; d < size; ++d) {
+    input[d] = input[d] + input2[d];
+  }
+}
+
+template <typename scalar_t>
+inline void copy_mul_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, float weight, int64_t size) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = bVec::size();
+  const fVec weight_vec = fVec(weight);
+  int64_t d;
+#pragma GCC unroll 4
+  for (d = 0; d <= size - kVecSize; d += kVecSize) {
+    bVec x = bVec::loadu(input + d);
+    fVec x0, x1;
+    std::tie(x0, x1) = at::vec::convert_to_float(x);
+    x0 = x0 * weight_vec;
+    x1 = x1 * weight_vec;
+    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+    out_vec.store(out + d);
+  }
+  for (; d < size; ++d) {
+    out[d] = static_cast<scalar_t>(input[d] * weight);
+  }
+}
+
+template <typename scalar_t>
+inline void clamp_sigmoid_and_mul_stub(
+    scalar_t* __restrict__ out,
+    const scalar_t* __restrict__ input,
+    int64_t size,
+    const float alpha,
+    const float limit) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  const fVec one = fVec(1.f);
+  const fVec zero = fVec(0.f);
+  const fVec limit_v = fVec(limit);
+  const fVec nlimit_v = fVec(-limit);
+  const fVec alpha_v = fVec(alpha);
+
+  // no remainder
+#pragma GCC unroll 4
+  for (int64_t d = 0; d < size; d += bVec::size()) {
+    bVec x = bVec::loadu(input + d);
+    fVec x0_, y0_;
+    std::tie(x0_, y0_) = at::vec::convert_to_float(x);
+    float tmp_buffer[fVec::size() * 2];  // 32
+    float tmp_glu[fVec::size()];         // 16
+    float tmp_linear[fVec::size()];      // 16
+    x0_.store(tmp_buffer);
+    y0_.store(tmp_buffer + fVec::size());
+    // interleaved: x[2i] = glu, x[2i+1] = linear
+    for (int j = 0; j < fVec::size(); ++j) {
+      // x0 [0,2,..30]
+      tmp_glu[j] = tmp_buffer[j * 2];
+      // y0 [1,3,...31]
+      tmp_linear[j] = tmp_buffer[j * 2 + 1];
+    }
+    fVec x0 = fVec::loadu(tmp_glu);
+    fVec y0 = fVec::loadu(tmp_linear);
+
+    // clamp
+    x0 = at::vec::minimum(x0, limit_v);
+    y0 = at::vec::minimum(limit_v, at::vec::maximum(nlimit_v, y0));
+    // x * sigmoid(x * alpha)
+    x0 = x0 / (one + (x0 * alpha_v).neg().exp_u20());
+    // (y + 1) * x
+    y0 = y0 + one;
+    x0 = x0 * y0;
+    convert_from_float_and_store<scalar_t>(out + d / 2, x0);
+  }
+}
diff --git a/csrc/cpu/sgl-kernels/moe_fp8.cpp b/csrc/cpu/sgl-kernels/moe_fp8.cpp
index 84a6af267740..7b33a6585594 100644
--- a/csrc/cpu/sgl-kernels/moe_fp8.cpp
+++ b/csrc/cpu/sgl-kernels/moe_fp8.cpp
@@ -1,150 +1,14 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
-#include "common.h"
-#include "gemm.h"
-#include "vec.h"
-
 // clang-format off
 
-namespace {
-
-template <typename scalar_t>
-inline void copy_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t size) {
-  using Vec = at::vec::Vectorized<scalar_t>;
-  // no remainder
-  #pragma GCC unroll 4
-  for (int64_t d = 0; d < size; d += Vec::size()) {
-    Vec data = Vec::loadu(input + d);
-    data.store(out + d);
-  }
-}
-
-template <typename scalar_t>
-inline void copy_mul_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, float weight, int64_t size) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec weight_vec = fVec(weight);
-  int64_t d;
-  #pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    bVec x = bVec::loadu(input + d);
-    fVec x0, x1;
-    std::tie(x0, x1) = at::vec::convert_to_float(x);
-    x0 = x0 * weight_vec;
-    x1 = x1 * weight_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] * weight);
-  }
-}
-
-// acc from [topk, K] to [K]
-template <typename scalar_t>
-inline void sum_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t topk, int64_t K) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  if (topk == 1) {
-    // do copy for topk = 1
-    copy_stub(out, input, K);
-  } else {
-    // do sum for topk != 1
-    int64_t d;
-    #pragma GCC unroll 4
-    for (d = 0; d <= K - kVecSize; d += kVecSize) {
-      fVec sum_fvec0 = fVec(0.f);
-      fVec sum_fvec1 = fVec(0.f);
-      for (int t = 0; t < topk; ++t) {
-        bVec x_bvec = bVec::loadu(input + t * K + d);
-        fVec x_fvec0, x_fvec1;
-        std::tie(x_fvec0, x_fvec1) = at::vec::convert_to_float(x_bvec);
-
-        sum_fvec0 += x_fvec0;
-        sum_fvec1 += x_fvec1;
-      }
-      bVec out_bvec = convert_from_float_ext<scalar_t>(sum_fvec0, sum_fvec1);
-      out_bvec.store(out + d);
-    }
-    for (; d < K; ++d) {
-      float sum_val = 0.f;
-      for (int t = 0; t < topk; ++t) {
-        sum_val += static_cast<float>(input[t * K + d]);
-      }
-      out[d] = static_cast<scalar_t>(sum_val);
-    }
-  }
-}
-
-// out = input + input2 * scale
-template <typename scalar_t>
-inline void add_mul_stub(
-    scalar_t* __restrict__ out,
-    const scalar_t* __restrict__ input,
-    const scalar_t* __restrict__ input2,
-    float scale,
-    int64_t size) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec s_vec = fVec(scale);
-
-  int64_t d;
-#pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    bVec x_bvec = bVec::loadu(input + d);
-    fVec x0, x1;
-    std::tie(x0, x1) = at::vec::convert_to_float(x_bvec);
-
-    bVec y_bvec = bVec::loadu(input2 + d);
-    fVec y0, y1;
-    std::tie(y0, y1) = at::vec::convert_to_float(y_bvec);
-
-    x0 = x0 + y0 * s_vec;
-    x1 = x1 + y1 * s_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] + float(input2[d]) * scale);
-  }
-}
-
-template <typename scalar_t>
-inline void silu_and_mul_stub(
-    scalar_t* __restrict__ out,
-    const scalar_t* __restrict__ input,
-    const scalar_t* __restrict__ input2,
-    int64_t size) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  const fVec one = fVec(1.f);
-
-  // no remainder
-#pragma GCC unroll 4
-  for (int64_t d = 0; d < size; d += bVec::size()) {
-    bVec x = bVec::loadu(input + d);
-    fVec x0, x1;
-    std::tie(x0, x1) = at::vec::convert_to_float(x);
-    bVec y = bVec::loadu(input2 + d);
-    fVec y0, y1;
-    std::tie(y0, y1) = at::vec::convert_to_float(y);
-    x0 = x0 / (one + x0.neg().exp_u20());
-    x1 = x1 / (one + x1.neg().exp_u20());
-    x0 = x0 * y0;
-    x1 = x1 * y1;
-    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
-    out_vec.store(out + d);
-  }
-}
-
-} // anonymous namespace
+#include "common.h"
+#include "gemm.h"
+#include "moe.h"
 
-template <typename scalar_t>
-void fused_experts_fp8_kernel_impl(
+template <typename scalar_t, typename packed_t, typename param_t, bool is_mxfp4>
+void fused_experts_fp_kernel_impl(
     scalar_t* __restrict__ output,
     scalar_t* __restrict__ ic0,
     scalar_t* __restrict__ ic1,
@@ -153,10 +17,12 @@ void fused_experts_fp8_kernel_impl(
     scalar_t* __restrict__ B_tmp,
     float* __restrict__ C_tmp,
     const scalar_t* __restrict__ input,
-    const at::Float8_e4m3fn* __restrict__ packed_w1,
-    const at::Float8_e4m3fn* __restrict__ packed_w2,
-    const float* __restrict__ w1s,
-    const float* __restrict__ w2s,
+    const packed_t* __restrict__ packed_w1,
+    const packed_t* __restrict__ packed_w2,
+    const float* __restrict__ w1_bias,
+    const float* __restrict__ w2_bias,
+    const param_t* __restrict__ w1s,
+    const param_t* __restrict__ w2s,
     int64_t block_size_N,
     int64_t block_size_K,
     const float* __restrict__ topk_weights,
@@ -168,8 +34,11 @@ void fused_experts_fp8_kernel_impl(
     int64_t K,
     int64_t E,
     int64_t topk,
-    int64_t num_tokens_post_pad) {
-
+    int64_t num_tokens_post_pad,
+    float alpha,
+    float limit,
+    CPUAcTMethod act_func,
+    bool with_bias) {
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
 
@@ -179,39 +48,52 @@ void fused_experts_fp8_kernel_impl(
   int64_t scale_size_N = div_up(2 * N, block_size_N);
   int64_t scale_size_K = div_up(K, block_size_K);
   int64_t blocks_n_per_group = block_size_N / BLOCK_N;
+  std::function<int64_t(int64_t)> scale_offset_per_block;
+  if constexpr (is_mxfp4) {
+    scale_offset_per_block = [&](int64_t a) { return a * BLOCK_N; };
+  } else {
+    scale_offset_per_block = [&](int64_t a) { return a / blocks_n_per_group; };
+  }
+
+  const int64_t packed_K = get_row_size<packed_t>(K);
+
+  const int64_t stride_e = 2 * N * packed_K;
+  const int64_t stride_n = packed_K;
 
-  const int64_t stride_e = 2 * N * K;
-  const int64_t stride_n = K;
+  int64_t avg_M = std::max(int64_t(1), M * topk / E);
+  const bool use_brgemm = can_use_brgemm<packed_t>(avg_M);
+
+  int64_t B_tmp_size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N);
 
   // here we only parallel on half of 2N to fuse silu_and_mul with gemm
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     scalar_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
-
+    loop_2d<packed_t>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t n_size = std::min(2 * N - nb * BLOCK_N, BLOCK_N);
 
       // B shape [K, n_size] in vnni format
       int32_t expert_id = expert_ids[mb];
-      const at::Float8_e4m3fn* __restrict__ B = packed_w1 + expert_id * stride_e + nb * BLOCK_N * stride_n;
-      const float* __restrict__ Bs = w1s + expert_id * scale_size_N * scale_size_K + (nb / blocks_n_per_group) * scale_size_K;
+      const packed_t* __restrict__ B = packed_w1 + expert_id * stride_e + nb * BLOCK_N * stride_n;
+      const param_t* __restrict__ Bs =
+          w1s + expert_id * scale_size_N * scale_size_K + scale_offset_per_block(nb) * scale_size_K;
+      const float* __restrict__ B_bias = with_bias ? w1_bias + expert_id * 2 * N + nb * BLOCK_N : nullptr;
 
-      // 1.a load A
-      const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
-      int64_t m_size = offsets[mb + 1] - offsets[mb];
+      // do unpacking for the first row or a new expert
+      int32_t pre_expert_id = mb == 0 ? -1 : expert_ids[mb - 1];
+      bool do_unpack = (mb == mb0) || (expert_id != pre_expert_id);
 
-      const bool use_brgemm = can_use_brgemm<at::Float8_e4m3fn>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
+      int64_t m_size = offsets[mb + 1] - offsets[mb];
 
-      for (int64_t m = 0; m < m_size; ++m) {
-        int32_t index = A_ids[m] / topk;
-        copy_stub(A + m * K, input + index * K, K);
+      if (nb_offset == 0) {
+        // 1.a load A
+        const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
+        for (int64_t m = 0; m < m_size; ++m) {
+          int32_t index = A_ids[m] / topk;
+          copy_stub(A + m * K, input + index * K, K);
+        }
       }
 
       const int64_t offset = offsets[mb];
@@ -219,8 +101,9 @@ void fused_experts_fp8_kernel_impl(
           /*   A            */ A,
           /*   B            */ B,
           /*   C            */ ic0 + offset * 2 * N + nb * BLOCK_N,
-          /*   Btmp         */ B_tmp + tid * BLOCK_N * std::max(K, N),
+          /*   Btmp         */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * K,
           /*   Ctmp         */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
+          /*   Bbias        */ B_bias,
           /*   scale        */ Bs,
           /*   M            */ m_size,
           /*   N            */ n_size,
@@ -229,25 +112,30 @@ void fused_experts_fp8_kernel_impl(
           /*   ldb          */ n_size,
           /*   ldc          */ 2 * N,
           /*   brg          */ use_brgemm,
-          /*   block_size_K */ block_size_K);
-    }
+          /*   block_size_K */ block_size_K,
+          /*   do_unpack    */ do_unpack);
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
 
   // stage 1.5: intermediate_cache1 = silu(intermediate_cache0)
-  at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
-    for (int64_t m = begin; m < end; ++m) {
-      silu_and_mul_stub(
-          ic1 + m * N,
-          ic0 + m * 2 * N,
-          ic0 + m * 2 * N + N,
-          N);
-    }
-  });
-
+  if (act_func == CPUAcTMethod::silu_and_mul) {
+    at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
+      for (int64_t m = begin; m < end; ++m) {
+        silu_and_mul_stub(ic1 + m * N, ic0 + m * 2 * N, ic0 + m * 2 * N + N, N);
+      }
+    });
+  } else if (act_func == CPUAcTMethod::swiglu) {
+    at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
+      for (int64_t m = begin; m < end; ++m) {
+        clamp_sigmoid_and_mul_stub(ic1 + m * N, ic0 + m * 2 * N, N, alpha, limit);
+        clamp_sigmoid_and_mul_stub(ic1 + m * N + N / 2, ic0 + m * 2 * N + N, N, alpha, limit);
+      }
+    });
+  }
   // stage 2: intermediate_cache2 = intermediate_cache1 @ w2
   //   w2 : [E, K, N] as [E, OC, IC]
   const int64_t OC = K;  // rename K as OC
@@ -256,26 +144,19 @@ void fused_experts_fp8_kernel_impl(
   const int64_t NB2 = div_up(OC, BLOCK_N);
   scale_size_N = div_up(K, block_size_N);
   scale_size_K = div_up(N, block_size_K);
-  const int64_t stride_e2 = OC * IC;
-  const int64_t stride_oc = IC;
+  const int64_t packed_IC = get_row_size<packed_t>(IC);
+  const int64_t stride_e2 = OC * packed_IC;
+  const int64_t stride_oc = packed_IC;
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
-    int tid = at::get_thread_num();
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+    int tid = get_thread_num();
     alignas(64) scalar_t C[BLOCK_M * BLOCK_K];
 
-    bool is_brgemm_used = false;
-
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
-
+    loop_2d<packed_t>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = offsets[mb + 1] - offsets[mb];
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
-      const bool use_brgemm = can_use_brgemm<at::Float8_e4m3fn>(m_size);
-      is_brgemm_used = is_brgemm_used || use_brgemm;
-
       // A ptr from ic1 of [M * topk, N] in sorted order
       // so as to avoid copy A to tmp buffer again
       const scalar_t* __restrict__ A = ic1 + offsets[mb] * N;
@@ -283,15 +164,22 @@ void fused_experts_fp8_kernel_impl(
 
       // B shape [IC, n_size] in vnni format
       int32_t expert_id = expert_ids[mb];
-      const at::Float8_e4m3fn* __restrict__ B = packed_w2 + expert_id * stride_e2 + nb * BLOCK_N * stride_oc;
-      const float* __restrict__ Bs = w2s + expert_id * scale_size_N * scale_size_K + (nb / blocks_n_per_group) * scale_size_K;
+      const packed_t* __restrict__ B = packed_w2 + expert_id * stride_e2 + nb * BLOCK_N * stride_oc;
+      const param_t* __restrict__ Bs =
+          w2s + expert_id * scale_size_N * scale_size_K + scale_offset_per_block(nb) * scale_size_K;
+      const float* __restrict__ B_bias = with_bias ? w2_bias + expert_id * OC + nb * BLOCK_N : nullptr;
+
+      // do unpacking for the first row or a new expert
+      int32_t pre_expert_id = mb == 0 ? -1 : expert_ids[mb - 1];
+      bool do_unpack = (mb == mb0) || (expert_id != pre_expert_id);
 
       tinygemm_kernel<scalar_t>(
           /*   A            */ A,
           /*   B            */ B,
           /*   C            */ C,
-          /*   Btmp         */ B_tmp + tid * BLOCK_N * std::max(K, N),
+          /*   Btmp         */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * IC,
           /*   Ctmp         */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
+          /*   Bbias        */ B_bias,
           /*   scale        */ Bs,
           /*   M            */ m_size,
           /*   N            */ n_size,
@@ -300,7 +188,8 @@ void fused_experts_fp8_kernel_impl(
           /*   ldb          */ n_size,
           /*   ldc          */ BLOCK_N,
           /*   brg          */ use_brgemm,
-          /*   block_size_K */ block_size_K);
+          /*   block_size_K */ block_size_K,
+          /*   do_unpack    */ do_unpack);
 
       // 2.b copy from C to ic2 in original order
       //   and also mul topk_weights in float32
@@ -309,13 +198,12 @@ void fused_experts_fp8_kernel_impl(
         float weight = topk_weights[index];
         copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
       }
-    }
+    });
 
-    if (is_brgemm_used) {
+    if (use_brgemm) {
       at::native::cpublas::brgemm_release();
     }
   });
-
   // stage 3: out = intermediate_cache2.sum(dim=1)
   //   from [M, topk, K] to [M, K]
   at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
@@ -325,35 +213,43 @@ void fused_experts_fp8_kernel_impl(
   });
 }
 
-#define INSTANTIATE_MOE_FP8_TEMPLATE(TYPE)             \
-  template void fused_experts_fp8_kernel_impl<TYPE>(   \
-      TYPE* __restrict__ output,                       \
-      TYPE* __restrict__ ic0,                          \
-      TYPE* __restrict__ ic1,                          \
-      TYPE* __restrict__ ic2,                          \
-      TYPE* __restrict__ A_tmp,                        \
-      TYPE* __restrict__ B_tmp,                        \
-      float* __restrict__ C_tmp,                       \
-      const TYPE* __restrict__ input,                  \
-      const at::Float8_e4m3fn* __restrict__ packed_w1, \
-      const at::Float8_e4m3fn* __restrict__ packed_w2, \
-      const float* __restrict__ w1s,                   \
-      const float* __restrict__ w2s,                   \
-      int64_t block_size_N,                            \
-      int64_t block_size_K,                            \
-      const float* __restrict__ topk_weights,          \
-      const int32_t* __restrict__ sorted_ids,          \
-      const int32_t* __restrict__ expert_ids,          \
-      const int32_t* __restrict__ offsets,             \
-      int64_t M,                                       \
-      int64_t N,                                       \
-      int64_t K,                                       \
-      int64_t E,                                       \
-      int64_t topk,                                    \
-      int64_t num_tokens_post_pad)
-
-INSTANTIATE_MOE_FP8_TEMPLATE(at::BFloat16);
-INSTANTIATE_MOE_FP8_TEMPLATE(at::Half);
+#define INSTANTIATE_MOE_FP_TEMPLATE(TYPE1, TYPE2, TYPE3, IS_MXFP4)           \
+  template void fused_experts_fp_kernel_impl<TYPE1, TYPE2, TYPE3, IS_MXFP4>( \
+      TYPE1* __restrict__ output,                                            \
+      TYPE1* __restrict__ ic0,                                               \
+      TYPE1* __restrict__ ic1,                                               \
+      TYPE1* __restrict__ ic2,                                               \
+      TYPE1* __restrict__ A_tmp,                                             \
+      TYPE1* __restrict__ B_tmp,                                             \
+      float* __restrict__ C_tmp,                                             \
+      const TYPE1* __restrict__ input,                                       \
+      const TYPE2* __restrict__ packed_w1,                                   \
+      const TYPE2* __restrict__ packed_w2,                                   \
+      const float* __restrict__ w1_bias,                                     \
+      const float* __restrict__ w2_bias,                                     \
+      const TYPE3* __restrict__ w1s,                                         \
+      const TYPE3* __restrict__ w2s,                                         \
+      int64_t block_size_N,                                                  \
+      int64_t block_size_K,                                                  \
+      const float* __restrict__ topk_weights,                                \
+      const int32_t* __restrict__ sorted_ids,                                \
+      const int32_t* __restrict__ expert_ids,                                \
+      const int32_t* __restrict__ offsets,                                   \
+      int64_t M,                                                             \
+      int64_t N,                                                             \
+      int64_t K,                                                             \
+      int64_t E,                                                             \
+      int64_t topk,                                                          \
+      int64_t num_tokens_post_pad,                                           \
+      float alpha,                                                           \
+      float limit,                                                           \
+      CPUAcTMethod act_func,                                                 \
+      bool with_bias)
+
+INSTANTIATE_MOE_FP_TEMPLATE(at::BFloat16, at::Float8_e4m3fn, float, false);
+INSTANTIATE_MOE_FP_TEMPLATE(at::Half, at::Float8_e4m3fn, float, false);
+INSTANTIATE_MOE_FP_TEMPLATE(at::BFloat16, uint8_t, uint8_t, true);
+INSTANTIATE_MOE_FP_TEMPLATE(at::Half, uint8_t, uint8_t, true);
 
 template <typename scalar_t>
 void shared_expert_fp8_kernel_impl(
@@ -374,7 +270,6 @@ void shared_expert_fp8_kernel_impl(
     int64_t M,
     int64_t N,
     int64_t K) {
-
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
 
@@ -385,22 +280,27 @@ void shared_expert_fp8_kernel_impl(
   int64_t blocks_n_per_group = block_size_N / BLOCK_N;
 
   const bool use_brgemm = can_use_brgemm<at::Float8_e4m3fn>(M);
+  const bool apply_scaling_factor = fused_experts_out != nullptr;
 
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
-    int tid = at::get_thread_num();
+  int64_t B_tmp_size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N);
 
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+    int tid = get_thread_num();
+
+    loop_2d<at::Float8_e4m3fn>(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
       int64_t n_size = std::min(2 * N - nb * BLOCK_N, BLOCK_N);
 
+      // do unpacking for the first row
+      bool do_unpack = (mb == mb0);
+
       tinygemm_kernel<scalar_t>(
           /*   A            */ input + mb * BLOCK_M * K,
           /*   B            */ packed_w1 + nb * BLOCK_N * K,
           /*   C            */ ic0 + mb * BLOCK_M * 2 * N + nb * BLOCK_N,
-          /*   Btmp         */ B_tmp + tid * BLOCK_N * std::max(K, N),
+          /*   Btmp         */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * K,
           /*   Ctmp         */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
+          /*   Bbias        */ nullptr,
           /*   scale        */ w1s + (nb / blocks_n_per_group) * scale_size_K,
           /*   M            */ m_size,
           /*   N            */ n_size,
@@ -409,8 +309,9 @@ void shared_expert_fp8_kernel_impl(
           /*   ldb          */ n_size,
           /*   ldc          */ 2 * N,
           /*   brg          */ use_brgemm,
-          /*   block_size_K */ block_size_K);
-    }
+          /*   block_size_K */ block_size_K,
+          /*   do_unpack    */ do_unpack);
+    });
 
     if (use_brgemm) {
       at::native::cpublas::brgemm_release();
@@ -420,11 +321,7 @@ void shared_expert_fp8_kernel_impl(
   // stage 1.5: intermediate_cache1 = silu(intermediate_cache0)
   at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
     for (int64_t m = begin; m < end; ++m) {
-      silu_and_mul_stub(
-          ic1 + m * N,
-          ic0 + m * 2 * N,
-          ic0 + m * 2 * N + N,
-          N);
+      silu_and_mul_stub(ic1 + m * N, ic0 + m * 2 * N, ic0 + m * 2 * N + N, N);
     }
   });
 
@@ -437,23 +334,25 @@ void shared_expert_fp8_kernel_impl(
   scale_size_K = div_up(N, block_size_K);
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
-    int tid = at::get_thread_num();
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+    int tid = get_thread_num();
     alignas(64) scalar_t C[BLOCK_M * BLOCK_K];
 
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
+    loop_2d<at::Float8_e4m3fn>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
+      // do unpacking for the first row
+      bool do_unpack = (mb == mb0);
+
       // 2.a gemm: C = A @ B
       tinygemm_kernel<scalar_t>(
           /*   A            */ ic1 + mb * BLOCK_M * N,
           /*   B            */ packed_w2 + nb * BLOCK_N * N,
           /*   C            */ C,
-          /*   Btmp         */ B_tmp + tid * BLOCK_N * std::max(K, N),
+          /*   Btmp         */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * IC,
           /*   Ctmp         */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
+          /*   Bbias        */ nullptr,
           /*   scale        */ w2s + (nb / blocks_n_per_group) * scale_size_K,
           /*   M            */ m_size,
           /*   N            */ n_size,
@@ -462,15 +361,18 @@ void shared_expert_fp8_kernel_impl(
           /*   ldb          */ n_size,
           /*   ldc          */ BLOCK_N,
           /*   brg          */ use_brgemm,
-          /*   block_size_K */ block_size_K);
+          /*   block_size_K */ block_size_K,
+          /*   do_unpack    */ do_unpack);
 
       // 2.b copy from C to output and add fused_experts_out
       scalar_t* __restrict__ out = output + mb * BLOCK_M * K + nb * BLOCK_N;
-      const scalar_t* __restrict__ fused_out = fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N;
+      const scalar_t* __restrict__ fused_out =
+          apply_scaling_factor ? fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N : nullptr;
       for (int64_t m = 0; m < m_size; ++m) {
-        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
+        const scalar_t* __restrict__ fused_out_row = apply_scaling_factor ? (fused_out + m * K) : nullptr;
+        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out_row, routed_scaling_factor, n_size);
       }
-    }
+    });
   });
 
   if (use_brgemm) {
diff --git a/csrc/cpu/sgl-kernels/moe_int4.cpp b/csrc/cpu/sgl-kernels/moe_int4.cpp
new file mode 100644
index 000000000000..c97784847a9e
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/moe_int4.cpp
@@ -0,0 +1,323 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+#include "common.h"
+#include "gemm.h"
+#include "moe.h"
+
+template <int64_t N>
+inline void copy_bias(const float* bias_ptr, float* y_buf, int64_t m, int64_t ldn) {
+  using Vec = at::vec::Vectorized<float>;
+  constexpr int kVecSize = Vec::size();
+  static_assert(N % kVecSize == 0, "copy_bias requires N to be a multiple of Vectorized<float>::size()");
+  const bool has_bias = bias_ptr != nullptr;
+  const Vec zero_vec(0.f);
+  for (int i = 0; i < m; ++i) {
+#pragma GCC unroll 2
+    for (int j = 0; j < N; j += kVecSize) {
+      Vec vec = has_bias ? Vec::loadu(bias_ptr + j) : zero_vec;
+      vec.store(y_buf + i * ldn + j);
+    }
+  }
+}
+
+template <typename scalar_t>
+void fused_experts_int4_w4a8_kernel_impl(
+    scalar_t* __restrict__ output,
+    scalar_t* __restrict__ ic0,
+    scalar_t* __restrict__ ic1,
+    scalar_t* __restrict__ ic2,
+    uint8_t* __restrict__ A_tmp,
+    uint8_t* __restrict__ Aq_tmp,
+    float* __restrict__ As_tmp,
+    int32_t* __restrict__ Azp_tmp,
+    float* __restrict__ C_tmp,
+    int8_t* __restrict__ dqB_tmp,
+    const scalar_t* __restrict__ input,
+    const uint8_t* __restrict__ packed_w1,
+    const uint8_t* __restrict__ packed_w2,
+    const int8_t* __restrict__ w1z,
+    const int8_t* __restrict__ w2z,
+    const float* __restrict__ w1s,
+    const float* __restrict__ w2s,
+    int group_size,
+    const float* __restrict__ topk_weights,
+    const int32_t* __restrict__ sorted_ids,
+    const int32_t* __restrict__ expert_ids,
+    const int32_t* __restrict__ offsets,
+    int64_t M,
+    int64_t N,
+    int64_t K,
+    int64_t E,
+    int64_t topk,
+    int64_t num_tokens_post_pad) {
+  constexpr int64_t BLOCK_M = block_size_m();
+  constexpr int64_t BLOCK_N = block_size_n();
+  int num_threads = at::get_num_threads();
+  // int64_t buffer_size_nbytes = M * topk * N * 2
+  //                              M * topk * K * 2 +
+  //                              num_threads * BLOCK_M * K +
+  //                              num_threads * 2 * BLOCK_M * BLOCK_N * sizeof(float)  +
+  //                              M * topk * 2 * N * 2 +
+  //                              max(M * K, M * topk * N)  +
+  //                              M * topk * sizeof(float);
+
+  // intermediate_cache1 (scalar_t):     START + M * topk * N
+  // intermediate_cache2 (scalar_t):     + M * topk * K
+  // A_tmp (uint8_t):                    + num_threads * BLOCK_M * K
+  // C_tmp (float):                      + num_threads * 2 * BLOCK_M * BLOCK_N
+  // intermediate_cache0 (scalar_t):     + M * topk * 2 * N
+  // Aq_tmp (uint8_t):                   + max(M * K, M * topk * N)
+  // As_tmp (float):                     + M * topk
+  // dqB_tmp (int8_t)                    + num_threads * _block_k * BlOCK_N
+
+  // stage 0: quantize input to uint8, [M, K]
+  at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t m = begin; m < end; ++m) {
+      quantize_row_int8<scalar_t>(Aq_tmp + m * K, As_tmp[m], input + m * K, K);
+    }
+  });
+  int64_t _block_k = get_4bit_block_k_size(group_size);
+  auto Azp = at::ones({M * topk}).to(at::kInt).mul(128);
+  auto Azp_ptr = Azp.data_ptr<int32_t>();
+  // stage 1: intermediate_cache0 = hidden_states @ w1
+  const int64_t MB = div_up(num_tokens_post_pad, BLOCK_M);
+  const int64_t NB = div_up(N, BLOCK_N);
+
+  int64_t block_per_group = group_size / _block_k;
+  int64_t Kc = K / _block_k;
+  int64_t num_groups = K / group_size;
+
+  const int64_t stride_e = 2 * NB * Kc * (BLOCK_N * (_block_k / 2 + sizeof(int32_t)));
+  const bool sym_quant_act = false;
+  // weight + compensation shape = [E, Nc, Kc, block_n * _block_k / 2 + block_n*sizeof(int32_t)]
+  // scales/qzeros shape = [E, Nc, G, block_n]
+
+  // here we only parallel on half of 2N to fuse silu_and_mul with gemm
+  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+    // get local pointers
+    int tid = at::get_thread_num();
+    int8_t* dqB_tmp1 = dqB_tmp + tid * 2 * _block_k * BLOCK_N;
+    int8_t* dqB_tmp2 = dqB_tmp1 + _block_k * BLOCK_N;
+    alignas(64) float As[BLOCK_M];
+    uint8_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
+    float* __restrict__ C0 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+    float* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
+    bool is_brgemm_used = false;
+    for (int64_t i = begin; i < end; ++i) {
+      int64_t mb = i / NB;
+      int64_t nb = i % NB;
+      int64_t nb1 = nb + NB;
+      int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
+      // B shape [K, n_size] in vnni format
+      int32_t expert_id = expert_ids[mb];
+      const uint8_t* __restrict__ B = packed_w1 + expert_id * stride_e;
+      // Bz and Bs: [E, K/gs, 2N]
+      const int8_t* __restrict__ Bz = w1z + expert_id * (num_groups) * (2 * N);
+      const float* __restrict__ Bs = w1s + expert_id * (num_groups) * (2 * N);
+
+      // 1.a load A
+      const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
+      int64_t m_size = offsets[mb + 1] - offsets[mb];
+      const bool use_brgemm = can_use_brgemm<int8_t>(m_size);
+      is_brgemm_used = is_brgemm_used || use_brgemm;
+      // copy to A [BLOCK_M, K]
+      for (int64_t m = 0; m < m_size; ++m) {
+        int32_t index = A_ids[m] / topk;
+        copy_stub(A + m * K, Aq_tmp + index * K, K);
+        As[m] = As_tmp[index];
+      }
+      const int64_t offset = offsets[mb];
+      copy_bias<BLOCK_N>(nullptr, C0, m_size, BLOCK_N);
+      copy_bias<BLOCK_N>(nullptr, C1, m_size, BLOCK_N);
+      for (int kci = 0; kci < Kc; ++kci) {
+        int32_t* compensation_ptr =
+            sym_quant_act ? nullptr
+                          : (int32_t*)(void*)(B + (nb * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) +
+                                              _block_k * BLOCK_N / 2) /*Bcomp*/;
+        tinygemm_kernel<scalar_t>(
+            ic0 + offset * 2 * N + nb * BLOCK_N,
+            C0,
+            A + kci * _block_k,
+            As,
+            Azp_ptr,
+            B + (nb * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) /*B*/,
+            Bs + nb * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*scales_b*/,
+            Bz + nb * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*qzeros_b*/,
+            compensation_ptr,
+            dqB_tmp1,
+            m_size,
+            _block_k,
+            K,
+            BLOCK_N,
+            2 * N,
+            kci == Kc - 1,
+            use_brgemm);
+      }
+
+      for (int kci = 0; kci < Kc; ++kci) {
+        int32_t* compensation_ptr =
+            sym_quant_act ? nullptr
+                          : (int32_t*)(void*)(B + (nb1 * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) +
+                                              _block_k * BLOCK_N / 2) /*Bcomp*/;
+        tinygemm_kernel<scalar_t>(
+            ic0 + offset * 2 * N + nb1 * BLOCK_N,
+            C1,
+            A + kci * _block_k,
+            As,
+            Azp_ptr,
+            B + (nb1 * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) /*B*/,
+            Bs + nb1 * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*scales_b*/,
+            Bz + nb1 * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*qzeros_b*/,
+            compensation_ptr,
+            dqB_tmp2,
+            m_size,
+            _block_k,
+            K,
+            BLOCK_N,
+            2 * N,
+            kci == Kc - 1,
+            use_brgemm);
+      }
+    }
+
+    if (is_brgemm_used) {
+      at::native::cpublas::brgemm_release();
+    }
+  });
+
+  // stage 1.5: intermediate_cache1 = silu(intermediate_cache0)
+  at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t m = begin; m < end; ++m) {
+      silu_and_mul_stub(ic1 + m * N, ic0 + m * 2 * N, ic0 + m * 2 * N + N, N);
+    }
+  });
+
+  // stage 1.5: quantize ic1 to uint8, [M * topk, N]
+  at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t m = begin; m < end; ++m) {
+      quantize_row_int8<scalar_t>(Aq_tmp + m * N, As_tmp[m], ic1 + m * N, N);
+    }
+  });
+  // stage 2: intermediate_cache2 = intermediate_cache1 @ w2
+  //   w2 : [E, K, N] as [E, OC, IC]
+  const int64_t OC = K;  // rename K as OC
+  const int64_t IC = N;  // rename N as IC
+  const int64_t MB2 = MB;
+  const int64_t NB2 = div_up(OC, BLOCK_N);
+  const int64_t stride_oc = IC;
+  num_groups = IC / group_size;
+  Kc = IC / _block_k;
+  const int64_t stride_e2 = NB2 * Kc * (BLOCK_N * (_block_k / 2 + sizeof(int32_t)));
+  // parallel on [MB2, NB2]
+  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+    int tid = at::get_thread_num();
+    int8_t* dqB_tmp1 = dqB_tmp + tid * 2 * _block_k * BLOCK_N;
+    float* __restrict__ C2 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+    bool is_brgemm_used = false;
+    for (int64_t i = begin; i < end; ++i) {
+      int64_t mb = i / NB2;
+      int64_t nb = i % NB2;
+
+      int64_t m_size = offsets[mb + 1] - offsets[mb];
+      int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
+      const bool use_brgemm = can_use_brgemm<int8_t>(m_size);
+      is_brgemm_used = is_brgemm_used || use_brgemm;
+      const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
+
+      // B shape [IC, n_size] in vnni format
+      int32_t expert_id = expert_ids[mb];
+      const uint8_t* __restrict__ B = packed_w2 + expert_id * stride_e2;
+
+      // Bz and Bs: [E, IC/gs, OC]
+      const int8_t* __restrict__ Bz = w2z + expert_id * (num_groups)*OC;
+      const float* __restrict__ Bs = w2s + expert_id * (num_groups)*OC;
+
+      // A ptr from ic1 of [M * topk, N] in sorted order
+      // so as to avoid copy A to tmp buffer again
+      const uint8_t* __restrict__ A = Aq_tmp + offsets[mb] * IC;
+      const float* __restrict__ As = As_tmp + offsets[mb];
+      copy_bias<BLOCK_N>(nullptr, C2, m_size, BLOCK_N);
+      for (int kci = 0; kci < Kc; ++kci) {
+        int32_t* compensation_ptr =
+            sym_quant_act ? nullptr
+                          : (int32_t*)(void*)(B + (nb * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))) +
+                                              _block_k * BLOCK_N / 2) /*Bcomp*/;
+        tinygemm_kernel<scalar_t>(
+            nullptr, /*store_out is false*/
+            C2,
+            A + kci * _block_k,
+            As,
+            Azp_ptr,
+            B + (nb * Kc + kci) * (BLOCK_N * (_block_k / 2 + sizeof(int32_t))),
+            Bs + nb * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*scales_b*/,
+            Bz + nb * BLOCK_N * num_groups + kci / block_per_group * BLOCK_N /*zeros_b*/,
+            compensation_ptr,
+            dqB_tmp1,
+            m_size,
+            _block_k,
+            IC,
+            BLOCK_N,
+            BLOCK_N,
+            false,
+            use_brgemm);
+      }
+
+      // 2.b copy from C to ic2 in original order
+      //   and also mul topk_weights in float32
+      for (int64_t m = 0; m < m_size; ++m) {
+        int32_t index = A_ids[m];
+        float weight = topk_weights[index];
+        copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C2 + m * BLOCK_N, weight, n_size);
+      }
+    }
+
+    if (is_brgemm_used) {
+      at::native::cpublas::brgemm_release();
+    }
+  });
+
+  // stage 3: out = intermediate_cache2.sum(dim=1)
+  //   from [M, topk, K] to [M, K]
+  at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
+    for (int64_t m = begin; m < end; ++m) {
+      sum_stub(output + m * K, ic2 + m * topk * K, topk, K);
+    }
+  });
+}
+
+#define INSTANTIATE_MOE_INT4_W4A8_TEMPLATE(TYPE)           \
+  template void fused_experts_int4_w4a8_kernel_impl<TYPE>( \
+      TYPE* __restrict__ output,                           \
+      TYPE* __restrict__ ic0,                              \
+      TYPE* __restrict__ ic1,                              \
+      TYPE* __restrict__ ic2,                              \
+      uint8_t* __restrict__ A_tmp,                         \
+      uint8_t* __restrict__ Aq_tmp,                        \
+      float* __restrict__ As_tmp,                          \
+      int32_t* __restrict__ Azp_tmp,                       \
+      float* __restrict__ C_tmp,                           \
+      int8_t* __restrict__ dqB_tmp,                        \
+      const TYPE* __restrict__ input,                      \
+      const uint8_t* __restrict__ packed_w1,               \
+      const uint8_t* __restrict__ packed_w2,               \
+      const int8_t* __restrict__ w1z,                      \
+      const int8_t* __restrict__ w2z,                      \
+      const float* __restrict__ w1s,                       \
+      const float* __restrict__ w2s,                       \
+      int group_size,                                      \
+      const float* __restrict__ topk_weights,              \
+      const int32_t* __restrict__ sorted_ids,              \
+      const int32_t* __restrict__ expert_ids,              \
+      const int32_t* __restrict__ offsets,                 \
+      int64_t M,                                           \
+      int64_t N,                                           \
+      int64_t K,                                           \
+      int64_t E,                                           \
+      int64_t topk,                                        \
+      int64_t num_tokens_post_pad)
+
+INSTANTIATE_MOE_INT4_W4A8_TEMPLATE(at::BFloat16);
+INSTANTIATE_MOE_INT4_W4A8_TEMPLATE(at::Half);
diff --git a/csrc/cpu/sgl-kernels/moe_int8.cpp b/csrc/cpu/sgl-kernels/moe_int8.cpp
index e28b4fc4ee59..3bdd5892d0b4 100644
--- a/csrc/cpu/sgl-kernels/moe_int8.cpp
+++ b/csrc/cpu/sgl-kernels/moe_int8.cpp
@@ -1,124 +1,145 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
+// clang-format off
+
 #include "common.h"
-#include "vec.h"
 #include "gemm.h"
-
-// clang-format off
+#include "moe.h"
 
 namespace {
 
-template <typename scalar_t>
-inline void copy_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t size) {
-  using Vec = at::vec::Vectorized<scalar_t>;
-  // no remainder
-  #pragma GCC unroll 4
-  for (int64_t d = 0; d < size; d += Vec::size()) {
-    Vec data = Vec::loadu(input + d);
-    data.store(out + d);
-  }
-}
-
-template <>
-inline void copy_stub<uint8_t>(uint8_t* __restrict__ out, const uint8_t* __restrict__ input, int64_t size) {
-  // size might be 64x + 32
-  std::memcpy(out, input, size * sizeof(uint8_t));
-}
-
-template <typename scalar_t>
-inline void copy_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input, float weight, int64_t size) {
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec weight_vec = fVec(weight);
-  int64_t d;
-  #pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    fVec data0 = fVec::loadu(input + d) * weight_vec;
-    fVec data1 = fVec::loadu(input + d + fVec::size()) * weight_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(data0, data1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] * weight);
-  }
-}
+template <typename scalar_t, int BLOCK_N>
+inline void silu_and_mul(
+    scalar_t* __restrict__ C,
+    const int32_t* __restrict__ C0,  // x: x0, x1
+    const int32_t* __restrict__ C1,  // y: y0, y1
+    const float* __restrict__ As,
+    const float* __restrict__ Bs0,
+    const float* __restrict__ Bs1,
+    const int32_t* __restrict__ Bcomp0,
+    const int32_t* __restrict__ Bcomp1,
+    int64_t m_size,
+    int64_t N) {
+#if defined(CPU_CAPABILITY_AVX512)
+  constexpr int COLS = BLOCK_N / 16;
+  static_assert(COLS % 2 == 0);
+
+  __m512 vc0[COLS];
+  __m512 vc1[COLS];
+  __m512i vcomp0[COLS];
+  __m512i vcomp1[COLS];
+  __m512 vas;
+  __m512 vbs0[COLS];
+  __m512 vbs1[COLS];
+
+  auto load_scale_and_comp = [&](auto col) {
+    vcomp0[col] = _mm512_loadu_si512(Bcomp0 + col * 16);
+    vcomp1[col] = _mm512_loadu_si512(Bcomp1 + col * 16);
+    vbs0[col] = _mm512_loadu_ps(Bs0 + col * 16);
+    vbs1[col] = _mm512_loadu_ps(Bs1 + col * 16);
+  };
+  Unroll<COLS>{}(load_scale_and_comp);
+
+  auto scalec = [&](auto col, int64_t m) {
+    // update As
+    vas = _mm512_set1_ps(As[m]);
+    // C = As * (C - Bcomp) * Bs
+    __m512i vc32_0 = _mm512_loadu_si512(C0 + m * BLOCK_N + col * 16);
+    __m512i vc32_1 = _mm512_loadu_si512(C1 + m * BLOCK_N + col * 16);
+    vc0[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32_0, vcomp0[col]));
+    vc1[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32_1, vcomp1[col]));
+    vc0[col] = _mm512_mul_ps(_mm512_mul_ps(vc0[col], vas), vbs0[col]);
+    vc1[col] = _mm512_mul_ps(_mm512_mul_ps(vc1[col], vas), vbs1[col]);
+  };
 
-// acc from [topk, K] to [K]
-template <typename scalar_t>
-inline void sum_stub(scalar_t* __restrict__ out, const scalar_t* __restrict__ input, int64_t topk, int64_t K) {
   using bVec = at::vec::Vectorized<scalar_t>;
   using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  if (topk == 1) {
-    // do copy for topk = 1
-    copy_stub(out, input, K);
-  } else {
-    // do sum for topk != 1
-    int64_t d;
-    #pragma GCC unroll 4
-    for (d = 0; d <= K - kVecSize; d += kVecSize) {
-      fVec sum_fvec0 = fVec(0.f);
-      fVec sum_fvec1 = fVec(0.f);
-      for (int t = 0; t < topk; ++t) {
-        bVec x_bvec = bVec::loadu(input + t * K + d);
-        fVec x_fvec0, x_fvec1;
-        std::tie(x_fvec0, x_fvec1) = at::vec::convert_to_float(x_bvec);
-
-        sum_fvec0 += x_fvec0;
-        sum_fvec1 += x_fvec1;
-      }
-      bVec out_bvec = convert_from_float_ext<scalar_t>(sum_fvec0, sum_fvec1);
-      out_bvec.store(out + d);
-    }
-    for (; d < K; ++d) {
-      float sum_val = 0.f;
-      for (int t = 0; t < topk; ++t) {
-        sum_val += static_cast<float>(input[t * K + d]);
-      }
-      out[d] = static_cast<scalar_t>(sum_val);
+  const fVec one = fVec(1.f);
+  auto silu_and_mul = [&](auto col) {
+    fVec x = fVec(vc0[col]);
+    fVec y = fVec(vc1[col]);
+    x = x / (one + x.neg().exp_u20());
+    vc0[col] = x * y;
+  };
+
+  auto storec = [&](auto col, int64_t m) {
+    if constexpr (col % 2 == 0) {
+      fVec x0 = fVec(vc0[col + 0]);
+      fVec x1 = fVec(vc0[col + 1]);
+      bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
+      out_vec.store(C + m * N + col * 16);
     }
+  };
+
+  for (int64_t m = 0; m < m_size; ++m) {
+    Unroll<COLS>{}(scalec, m);
+    Unroll<COLS>{}(silu_and_mul);
+    Unroll<COLS>{}(storec, m);
   }
+#else
+  TORCH_CHECK(false, "silu_and_mul: scalar path not implemented!");
+#endif
 }
 
-// out = input + input2 * scale
-template <typename scalar_t>
-inline void add_mul_stub(scalar_t* __restrict__ out, const float* __restrict__ input,
-    const scalar_t* __restrict__ input2, float scale, int64_t size) {
-
-  using bVec = at::vec::Vectorized<scalar_t>;
-  using fVec = at::vec::Vectorized<float>;
-  constexpr int kVecSize = bVec::size();
-  const fVec s_vec = fVec(scale);
-  int64_t d;
-  #pragma GCC unroll 4
-  for (d = 0; d <= size - kVecSize; d += kVecSize) {
-    fVec x0 = fVec::loadu(input + d);
-    fVec x1 = fVec::loadu(input + d + fVec::size());
-
-    bVec y_bvec = bVec::loadu(input2 + d);
-    fVec y0, y1;
-    std::tie(y0, y1) = at::vec::convert_to_float(y_bvec);
-
-    x0 = x0 + y0 * s_vec;
-    x1 = x1 + y1 * s_vec;
-    bVec out_vec = convert_from_float_ext<scalar_t>(x0, x1);
-    out_vec.store(out + d);
-  }
-  for (; d < size; ++d) {
-    out[d] = static_cast<scalar_t>(input[d] + float(input2[d]) * scale);
+template <int BLOCK_N>
+inline void scale_C(
+    float* __restrict__ C,
+    const int32_t* __restrict__ Ctmp,
+    const float* __restrict__ As,
+    const float* __restrict__ Bs,
+    const int32_t* __restrict__ Bcomp,
+    int64_t m_size) {
+#if defined(CPU_CAPABILITY_AVX512)
+  constexpr int COLS = BLOCK_N / 16;
+  static_assert(COLS % 2 == 0);
+
+  __m512 vc[COLS];
+  __m512i vcomp[COLS];
+  __m512 vas;
+  __m512 vbs[COLS];
+
+  auto load_scale_and_comp = [&](auto col) {
+    vcomp[col] = _mm512_loadu_si512(Bcomp + col * 16);
+    vbs[col] = _mm512_loadu_ps(Bs + col * 16);
+  };
+  Unroll<COLS>{}(load_scale_and_comp);
+
+  auto scalec = [&](auto col, int64_t m) {
+    // update As
+    vas = _mm512_set1_ps(As[m]);
+    // C = As * (C - Bcomp) * Bs
+    __m512i vc32 = _mm512_loadu_si512(Ctmp + m * BLOCK_N + col * 16);
+    vc[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32, vcomp[col]));
+    vc[col] = _mm512_mul_ps(_mm512_mul_ps(vc[col], vas), vbs[col]);
+    _mm512_storeu_ps(C + m * BLOCK_N + col * 16, vc[col]);
+  };
+
+  for (int64_t m = 0; m < m_size; ++m) {
+    Unroll<COLS>{}(scalec, m);
   }
+#else
+  TORCH_CHECK(false, "scale_C: scalar path not implemented!");
+#endif
 }
 
 /// gemm for w13
 template <typename scalar_t, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_vnni {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B0, const int8_t* __restrict__ B1, scalar_t* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs0, const float* __restrict__ Bs1,
-      const int32_t* __restrict__ Bcomp0, const int32_t* __restrict__ Bcomp1,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B0,
+      const int8_t* __restrict__ B1,
+      scalar_t* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs0,
+      const float* __restrict__ Bs1,
+      const int32_t* __restrict__ Bcomp0,
+      const int32_t* __restrict__ Bcomp1,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -127,11 +148,19 @@ struct tinygemm_kernel_vnni {
 template <int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B0, const int8_t* __restrict__ B1, at::BFloat16* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs0, const float* __restrict__ Bs1,
-      const int32_t* __restrict__ Bcomp0, const int32_t* __restrict__ Bcomp1,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B0,
+      const int8_t* __restrict__ B1,
+      at::BFloat16* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs0,
+      const float* __restrict__ Bs1,
+      const int32_t* __restrict__ Bcomp0,
+      const int32_t* __restrict__ Bcomp1,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
     static_assert(COLS % 2 == 0);
@@ -143,9 +172,9 @@ struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
     __m512i vc1[ROWS * COLS];
     __m512i vcomp0[COLS];
     __m512i vcomp1[COLS];
-    __m512  was;
-    __m512  vbs0[COLS];
-    __m512  vbs1[COLS];
+    __m512 vas;
+    __m512 vbs0[COLS];
+    __m512 vbs1[COLS];
 
     auto loadc = [&](auto i) {
       vc0[i] = _mm512_set1_epi32(0);
@@ -155,7 +184,7 @@ struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
 
     const int64_t K4 = K >> 2;
     const int64_t lda4 = lda >> 2;
-    const int64_t ldb4 = ldb; // ldb * 4 >> 2;
+    const int64_t ldb4 = ldb;  // ldb * 4 >> 2;
     const int32_t* a_ptr = reinterpret_cast<const int32_t*>(A);
     const int32_t* b0_ptr = reinterpret_cast<const int32_t*>(B0);
     const int32_t* b1_ptr = reinterpret_cast<const int32_t*>(B1);
@@ -183,8 +212,8 @@ struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
       constexpr int col = i % COLS;
 
       // load a scale
-      if constexpr(col == 0) {
-        was = _mm512_set1_ps(As[row]);
+      if constexpr (col == 0) {
+        vas = _mm512_set1_ps(As[row]);
       }
       // load b scale and vcomp
       if constexpr (row == 0) {
@@ -195,8 +224,8 @@ struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
       }
       __m512 c0 = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc0[i], vcomp0[col]));
       __m512 c1 = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc1[i], vcomp1[col]));
-      vc0[i] = _mm512_castps_si512(_mm512_mul_ps(_mm512_mul_ps(c0, was), vbs0[col]));
-      vc1[i] = _mm512_castps_si512(_mm512_mul_ps(_mm512_mul_ps(c1, was), vbs1[col]));
+      vc0[i] = _mm512_castps_si512(_mm512_mul_ps(_mm512_mul_ps(c0, vas), vbs0[col]));
+      vc1[i] = _mm512_castps_si512(_mm512_mul_ps(_mm512_mul_ps(c1, vas), vbs1[col]));
     };
     Unroll<ROWS * COLS>{}(scalec);
 
@@ -221,19 +250,28 @@ struct tinygemm_kernel_vnni<at::BFloat16, BLOCK_M, BLOCK_N> {
         _mm512_storeu_si512(
             reinterpret_cast<__m512i*>((C + row * ldc + col * 16)),
             (__m512i)(_mm512_cvtne2ps_pbh(__m512(x1), __m512(x0))));
-        }
+      }
     };
     Unroll<ROWS * COLS>{}(storec);
   }
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_VNNI(MB_SIZE, NB_SIZE)                        \
-    tinygemm_kernel_vnni<scalar_t, MB_SIZE, NB_SIZE>::apply(                 \
-        A + mb_start * lda, B0 + nb_start * 4, B1 + nb_start * 4,            \
-        C + mb_start * ldc + nb_start, As + mb_start,                        \
-        Bs0 + nb_start, Bs1 + nb_start, Bcomp0 + nb_start, Bcomp1 + nb_start,\
-        K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_VNNI(MB_SIZE, NB_SIZE)      \
+  tinygemm_kernel_vnni<scalar_t, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda,                                  \
+      B0 + nb_start * 4,                                   \
+      B1 + nb_start * 4,                                   \
+      C + mb_start * ldc + nb_start,                       \
+      As + mb_start,                                       \
+      Bs0 + nb_start,                                      \
+      Bs1 + nb_start,                                      \
+      Bcomp0 + nb_start,                                   \
+      Bcomp1 + nb_start,                                   \
+      K,                                                   \
+      lda,                                                 \
+      ldb,                                                 \
+      ldc);
 
 template <typename scalar_t>
 void tinygemm_kernel(
@@ -250,7 +288,6 @@ void tinygemm_kernel(
     int64_t lda,
     int64_t ldb,
     int64_t ldc) {
-
   const int32_t* Bcomp0 = reinterpret_cast<const int32_t*>(B0 + block_size_n() * K);
   const int32_t* Bcomp1 = reinterpret_cast<const int32_t*>(B1 + block_size_n() * K);
 
@@ -266,12 +303,21 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_VNNI(1, 32); break;
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_VNNI(2, 32); break;
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_VNNI(3, 32); break;
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_VNNI(4, 32); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+      switch (mb_size << 4 | nb_size >> 4) {
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_VNNI(1, 32);
+          break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_VNNI(2, 32);
+          break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_VNNI(3, 32);
+          break;
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_VNNI(4, 32);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
@@ -281,9 +327,16 @@ void tinygemm_kernel(
 template <typename scalar_t, int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_vnni2 {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B, float* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs, const int32_t* __restrict__ Bcomp,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B,
+      float* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs,
+      const int32_t* __restrict__ Bcomp,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     TORCH_CHECK(false, "tinygemm_kernel_nn: scalar path not implemented!");
   }
 };
@@ -292,10 +345,16 @@ struct tinygemm_kernel_vnni2 {
 template <int BLOCK_M, int BLOCK_N>
 struct tinygemm_kernel_vnni2<at::BFloat16, BLOCK_M, BLOCK_N> {
   static inline void apply(
-      const uint8_t* __restrict__ A, const int8_t* __restrict__ B, float* __restrict__ C,
-      const float* __restrict__ As, const float* __restrict__ Bs, const int32_t* __restrict__ Bcomp,
-      int64_t K, int64_t lda, int64_t ldb, int64_t ldc) {
-
+      const uint8_t* __restrict__ A,
+      const int8_t* __restrict__ B,
+      float* __restrict__ C,
+      const float* __restrict__ As,
+      const float* __restrict__ Bs,
+      const int32_t* __restrict__ Bcomp,
+      int64_t K,
+      int64_t lda,
+      int64_t ldb,
+      int64_t ldc) {
     constexpr int ROWS = BLOCK_M;
     constexpr int COLS = BLOCK_N / 16;
     static_assert(COLS % 2 == 0);
@@ -304,17 +363,15 @@ struct tinygemm_kernel_vnni2<at::BFloat16, BLOCK_M, BLOCK_N> {
     __m512i vb[COLS];
     __m512i vc[ROWS * COLS];
     __m512i vcomp[COLS];
-    __m512  was;
-    __m512  vbs[COLS];
+    __m512 vas;
+    __m512 vbs[COLS];
 
-    auto loadc = [&](auto i) {
-      vc[i] = _mm512_set1_epi32(0);
-    };
+    auto loadc = [&](auto i) { vc[i] = _mm512_set1_epi32(0); };
     Unroll<ROWS * COLS>{}(loadc);
 
     const int64_t K4 = K >> 2;
     const int64_t lda4 = lda >> 2;
-    const int64_t ldb4 = ldb; // ldb * 4 >> 2;
+    const int64_t ldb4 = ldb;  // ldb * 4 >> 2;
     const int32_t* a_ptr = reinterpret_cast<const int32_t*>(A);
     const int32_t* b_ptr = reinterpret_cast<const int32_t*>(B);
 
@@ -339,8 +396,8 @@ struct tinygemm_kernel_vnni2<at::BFloat16, BLOCK_M, BLOCK_N> {
       constexpr int col = i % COLS;
 
       // load a scale
-      if constexpr(col == 0) {
-        was = _mm512_set1_ps(As[row]);
+      if constexpr (col == 0) {
+        vas = _mm512_set1_ps(As[row]);
       }
       // load b scale and vcomp per 2 vectors
       // also load bias if any
@@ -353,7 +410,7 @@ struct tinygemm_kernel_vnni2<at::BFloat16, BLOCK_M, BLOCK_N> {
         }
       }
       __m512 x = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc[i], vcomp[col]));
-      x = _mm512_mul_ps(_mm512_mul_ps(x, was), vbs[col]);
+      x = _mm512_mul_ps(_mm512_mul_ps(x, vas), vbs[col]);
       _mm512_storeu_ps(reinterpret_cast<__m512*>(C + row * ldc + col * 16), x);
     };
     Unroll<ROWS * COLS>{}(storec);
@@ -361,11 +418,18 @@ struct tinygemm_kernel_vnni2<at::BFloat16, BLOCK_M, BLOCK_N> {
 };
 #endif
 
-#define LAUNCH_TINYGEMM_KERNEL_VNNI2(MB_SIZE, NB_SIZE)                       \
-    tinygemm_kernel_vnni2<scalar_t, MB_SIZE, NB_SIZE>::apply(                \
-        A + mb_start * lda, B + nb_start * 4, C + mb_start * ldc + nb_start, \
-        As + mb_start, Bs + nb_start, Bcomp + nb_start,                      \
-        K, lda, ldb, ldc);
+#define LAUNCH_TINYGEMM_KERNEL_VNNI2(MB_SIZE, NB_SIZE)      \
+  tinygemm_kernel_vnni2<scalar_t, MB_SIZE, NB_SIZE>::apply( \
+      A + mb_start * lda,                                   \
+      B + nb_start * 4,                                     \
+      C + mb_start * ldc + nb_start,                        \
+      As + mb_start,                                        \
+      Bs + nb_start,                                        \
+      Bcomp + nb_start,                                     \
+      K,                                                    \
+      lda,                                                  \
+      ldb,                                                  \
+      ldc);
 
 template <typename scalar_t>
 void tinygemm_kernel(
@@ -380,7 +444,6 @@ void tinygemm_kernel(
     int64_t lda,
     int64_t ldb,
     int64_t ldc) {
-
   // B compensation
   const int32_t* Bcomp = reinterpret_cast<const int32_t*>(B + block_size_n() * K);
 
@@ -396,18 +459,27 @@ void tinygemm_kernel(
       int64_t nb_start = nb * BLOCK_N;
       int64_t nb_size = std::min(BLOCK_N, N - nb_start);
 
-      switch(mb_size << 4 | nb_size >> 4) {
-        case 0x12: LAUNCH_TINYGEMM_KERNEL_VNNI2(1, 32); break;
-        case 0x22: LAUNCH_TINYGEMM_KERNEL_VNNI2(2, 32); break;
-        case 0x32: LAUNCH_TINYGEMM_KERNEL_VNNI2(3, 32); break;
-        case 0x42: LAUNCH_TINYGEMM_KERNEL_VNNI2(4, 32); break;
-        default: TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", nb_size);
+      switch (mb_size << 4 | nb_size >> 4) {
+        case 0x12:
+          LAUNCH_TINYGEMM_KERNEL_VNNI2(1, 32);
+          break;
+        case 0x22:
+          LAUNCH_TINYGEMM_KERNEL_VNNI2(2, 32);
+          break;
+        case 0x32:
+          LAUNCH_TINYGEMM_KERNEL_VNNI2(3, 32);
+          break;
+        case 0x42:
+          LAUNCH_TINYGEMM_KERNEL_VNNI2(4, 32);
+          break;
+        default:
+          TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
       }
     }
   }
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 template <typename scalar_t>
 void fused_experts_int8_kernel_impl(
@@ -433,7 +505,6 @@ void fused_experts_int8_kernel_impl(
     int64_t E,
     int64_t topk,
     int64_t num_tokens_post_pad) {
-
   // handle 2 tiles per block
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
@@ -441,11 +512,7 @@ void fused_experts_int8_kernel_impl(
   // stage 0: quantize input to uint8, [M, K]
   at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
     for (int64_t m = begin; m < end; ++m) {
-      quantize_row_int8<scalar_t>(
-          Aq_tmp + m * K,
-          As_tmp[m],
-          input + m * K,
-          K);
+      quantize_row_int8<scalar_t>(Aq_tmp + m * K, As_tmp[m], input + m * K, K);
     }
   });
 
@@ -462,66 +529,107 @@ void fused_experts_int8_kernel_impl(
 
   const int64_t stride_e = 2 * N * packed_K;
   const int64_t stride_n = packed_K;
+
+  int64_t avg_M = std::max(int64_t(1), M * topk / E);
+  const bool use_brgemm = can_use_brgemm<int8_t>(avg_M);
+
   // here we only parallel on half of 2N to fuse silu_and_mul with gemm
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
+    int tid = get_thread_num();
     uint8_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
+    int32_t* __restrict__ C0 = reinterpret_cast<int32_t*>(C_tmp) + tid * 2 * BLOCK_M * BLOCK_N;
+    int32_t* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
 
     alignas(64) float As[BLOCK_M];
 
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
-
-      // nb0 from top half and nb1 from bottom half
-      int64_t nb0 = nb, nb1 = nb + NB;
-      int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+    loop_2d<int8_t>(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+      // nb_upper from top half and nb_lower from bottom half
+      int64_t nb_upper = nb, nb_lower = nb + NB;
+      int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
 
       // B shape [K, n_size] in vnni format
       int32_t expert_id = expert_ids[mb];
-      const int8_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb0 * BLOCK_N * stride_n;
-      const int8_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb1 * BLOCK_N * stride_n;
-      const float* __restrict__ Bs0 = w1s + expert_id * 2 * N + nb0 * BLOCK_N;
-      const float* __restrict__ Bs1 = w1s + expert_id * 2 * N + nb1 * BLOCK_N;
+      const int8_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb_upper * BLOCK_N * stride_n;
+      const int8_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb_lower * BLOCK_N * stride_n;
+      const float* __restrict__ Bs0 = w1s + expert_id * 2 * N + nb_upper * BLOCK_N;
+      const float* __restrict__ Bs1 = w1s + expert_id * 2 * N + nb_lower * BLOCK_N;
 
-      // 1.a load A
-      const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
       int64_t m_size = offsets[mb + 1] - offsets[mb];
 
-      for (int64_t m = 0; m < m_size; ++m) {
-        int32_t index = A_ids[m] / topk;
-        copy_stub(A + m * K, Aq_tmp + index * K, K);
-        As[m] = As_tmp[index];
+      if (nb_offset == 0) {
+        // 1.a load A
+        const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
+        for (int64_t m = 0; m < m_size; ++m) {
+          int32_t index = A_ids[m] / topk;
+          copy_stub(A + m * K, Aq_tmp + index * K, K);
+          As[m] = As_tmp[index];
+        }
       }
 
-      // fused 1.b: silu_and_mul(A @ B0, A @ B1)
-      const int64_t offset = offsets[mb];
-      tinygemm_kernel(
-          /* A     */ A,
-          /* B0    */ B0,
-          /* B1    */ B1,
-          /* C     */ ic1 + offset * N + nb * BLOCK_N,
-          /* As    */ As,
-          /* Bs0   */ Bs0,
-          /* Bs1   */ Bs1,
-          /* M     */ m_size,
-          /* N     */ n_size,
-          /* K     */ K,
-          /* lda   */ K,
-          /* ldb   */ n_size,
-          /* ldc   */ N);
+      if (use_brgemm) {
+        // 1.b gemm: C0 = A @ B0
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B0,
+            /* C     */ C0);
+
+        // 1.c gemm: C1 = A @ B1
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B1,
+            /* C     */ C1);
+
+        const int32_t* Bcomp0 = reinterpret_cast<const int32_t*>(B0 + block_size_n() * K);
+        const int32_t* Bcomp1 = reinterpret_cast<const int32_t*>(B1 + block_size_n() * K);
+
+        // 1.d silu and mul
+        const int64_t offset = offsets[mb];
+        silu_and_mul<scalar_t, BLOCK_N>(
+            ic1 + offset * N + nb * BLOCK_N, C0, C1, As, Bs0, Bs1, Bcomp0, Bcomp1, m_size, N);
+      } else {
+        // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
+        const int64_t offset = offsets[mb];
+        tinygemm_kernel(
+            /* A     */ A,
+            /* B0    */ B0,
+            /* B1    */ B1,
+            /* C     */ ic1 + offset * N + nb * BLOCK_N,
+            /* As    */ As,
+            /* Bs0   */ Bs0,
+            /* Bs1   */ Bs1,
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ N);
+      }
+    });
+
+    if (use_brgemm) {
+      at::native::cpublas::brgemm_release();
     }
   });
 
   // stage 1.5: quantize ic1 to uint8, [M * topk, N]
   at::parallel_for(0, M * topk, 0, [&](int64_t begin, int64_t end) {
     for (int64_t m = begin; m < end; ++m) {
-      quantize_row_int8<scalar_t>(
-          Aq_tmp + m * N,
-          As_tmp[m],
-          ic1 + m * N,
-          N);
+      quantize_row_int8<scalar_t>(Aq_tmp + m * N, As_tmp[m], ic1 + m * N, N);
     }
   });
 
@@ -535,16 +643,13 @@ void fused_experts_int8_kernel_impl(
   const int64_t stride_oc = packed_N;
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
-    // we won't be using C1 for gemm2
+    int tid = get_thread_num();
     float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+    int32_t* __restrict__ C32 = reinterpret_cast<int32_t*>(C + BLOCK_M * BLOCK_N);
 
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
-
+    loop_2d<int8_t>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = offsets[mb + 1] - offsets[mb];
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
@@ -560,18 +665,36 @@ void fused_experts_int8_kernel_impl(
       const float* __restrict__ Bs = w2s + expert_id * K + nb * BLOCK_N;
 
       // 2.a gemm: C = A @ B
-      tinygemm_kernel<scalar_t>(
-          /* A     */ A,
-          /* B     */ B,
-          /* C     */ C,
-          /* As    */ As,
-          /* Bs    */ Bs,
-          /* M     */ m_size,
-          /* N     */ n_size,
-          /* K     */ IC,
-          /* lda   */ IC,
-          /* ldb   */ n_size,
-          /* ldc   */ BLOCK_N);
+      if (use_brgemm) {
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ IC,
+            /* lda   */ IC,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B,
+            /* C     */ C32);
+
+        // apply scales
+        const int32_t* Bcomp = reinterpret_cast<const int32_t*>(B + block_size_n() * IC);
+        scale_C<BLOCK_N>(C, C32, As, Bs, Bcomp, m_size);
+      } else {
+        tinygemm_kernel<scalar_t>(
+            /* A     */ A,
+            /* B     */ B,
+            /* C     */ C,
+            /* As    */ As,
+            /* Bs    */ Bs,
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ IC,
+            /* lda   */ IC,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N);
+      }
 
       // 2.b copy from C to ic2 in original order
       //   and also mul topk_weights in float32
@@ -580,6 +703,10 @@ void fused_experts_int8_kernel_impl(
         float weight = topk_weights[index];
         copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
       }
+    });
+
+    if (use_brgemm) {
+      at::native::cpublas::brgemm_release();
     }
   });
 
@@ -592,17 +719,30 @@ void fused_experts_int8_kernel_impl(
   });
 }
 
-#define INSTANTIATE_MOE_INT8_TEMPLATE(TYPE)                                                  \
-  template void fused_experts_int8_kernel_impl<TYPE> (                                       \
-      TYPE* __restrict__ output, TYPE* __restrict__ ic1,                                     \
-      TYPE* __restrict__ ic2, uint8_t* __restrict__ A_tmp,                                   \
-      float* __restrict__ C_tmp, uint8_t* __restrict__ Aq_tmp,                               \
-      float* __restrict__ As_tmp, const TYPE* __restrict__ input,                            \
-      const int8_t* __restrict__ packed_w1, const int8_t* __restrict__ packed_w2,            \
-      const float* __restrict__ w1s, const float* __restrict__ w2s,                          \
-      const float* __restrict__ topk_weights, const int32_t* __restrict__ sorted_ids,        \
-      const int32_t* __restrict__ expert_ids, const int32_t* __restrict__ offsets,           \
-      int64_t M, int64_t N, int64_t K, int64_t E, int64_t topk, int64_t num_tokens_post_pad)
+#define INSTANTIATE_MOE_INT8_TEMPLATE(TYPE)           \
+  template void fused_experts_int8_kernel_impl<TYPE>( \
+      TYPE* __restrict__ output,                      \
+      TYPE* __restrict__ ic1,                         \
+      TYPE* __restrict__ ic2,                         \
+      uint8_t* __restrict__ A_tmp,                    \
+      float* __restrict__ C_tmp,                      \
+      uint8_t* __restrict__ Aq_tmp,                   \
+      float* __restrict__ As_tmp,                     \
+      const TYPE* __restrict__ input,                 \
+      const int8_t* __restrict__ packed_w1,           \
+      const int8_t* __restrict__ packed_w2,           \
+      const float* __restrict__ w1s,                  \
+      const float* __restrict__ w2s,                  \
+      const float* __restrict__ topk_weights,         \
+      const int32_t* __restrict__ sorted_ids,         \
+      const int32_t* __restrict__ expert_ids,         \
+      const int32_t* __restrict__ offsets,            \
+      int64_t M,                                      \
+      int64_t N,                                      \
+      int64_t K,                                      \
+      int64_t E,                                      \
+      int64_t topk,                                   \
+      int64_t num_tokens_post_pad)
 
 INSTANTIATE_MOE_INT8_TEMPLATE(at::BFloat16);
 INSTANTIATE_MOE_INT8_TEMPLATE(at::Half);
@@ -624,7 +764,6 @@ void shared_expert_int8_kernel_impl(
     int64_t M,
     int64_t N,
     int64_t K) {
-
   // handle 2 tiles per block
   constexpr int64_t BLOCK_M = block_size_m();
   constexpr int64_t BLOCK_N = block_size_n();
@@ -632,15 +771,11 @@ void shared_expert_int8_kernel_impl(
   // stage 0: quantize input to uint8, [M, K]
   at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
     for (int64_t m = begin; m < end; ++m) {
-      quantize_row_int8<scalar_t>(
-          Aq_tmp + m * K,
-          As_tmp[m],
-          input + m * K,
-          K);
+      quantize_row_int8<scalar_t>(Aq_tmp + m * K, As_tmp[m], input + m * K, K);
     }
   });
 
-   // stage 1: intermediate_cache1 = silu(hidden_states @ w1)
+  // stage 1: intermediate_cache1 = silu(hidden_states @ w1)
   const int64_t MB = div_up(M, BLOCK_M);
   const int64_t NB = div_up(N, BLOCK_N);
 
@@ -651,15 +786,20 @@ void shared_expert_int8_kernel_impl(
   const int64_t packed_N = get_row_size<int8_t>(N);
   const int64_t stride_n = packed_K;
 
+  const bool use_brgemm = can_use_brgemm<int8_t>(M);
+  const bool apply_scaling_factor = fused_experts_out != nullptr;
+
   // here we only parallel on half of 2N to fuse silu_and_mul with gemm
-  at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB;
-      int64_t nb = i % NB;
-
-      // nb0 from top half and nb1 from bottom half
-      int64_t nb0 = nb, nb1 = nb + NB;
-      int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+  parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+    // get local pointers
+    int tid = get_thread_num();
+    int32_t* __restrict__ C0 = reinterpret_cast<int32_t*>(C_tmp) + tid * 2 * BLOCK_M * BLOCK_N;
+    int32_t* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
+
+    loop_2d<int8_t>(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+      // nb_upper from top half and nb_lower from bottom half
+      int64_t nb_upper = nb, nb_lower = nb + NB;
+      int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
 
       // A shape [m_size, K]
@@ -667,37 +807,72 @@ void shared_expert_int8_kernel_impl(
       const float* As = As_tmp + mb * BLOCK_M;
 
       // B shape [K, n_size] in vnni format
-      const int8_t* __restrict__ B0 = packed_w1 + nb0 * BLOCK_N * stride_n;
-      const int8_t* __restrict__ B1 = packed_w1 + nb1 * BLOCK_N * stride_n;
-      const float* __restrict__ Bs0 = w1s + nb0 * BLOCK_N;
-      const float* __restrict__ Bs1 = w1s + nb1 * BLOCK_N;
-
-      // fused 1.b: silu_and_mul(A @ B0, A @ B1)
-      tinygemm_kernel(
-          /* A     */ A,
-          /* B0    */ B0,
-          /* B1    */ B1,
-          /* C     */ ic1 + mb * BLOCK_M * N + nb * BLOCK_N,
-          /* As    */ As,
-          /* Bs0   */ Bs0,
-          /* Bs1   */ Bs1,
-          /* M     */ m_size,
-          /* N     */ n_size,
-          /* K     */ K,
-          /* lda   */ K,
-          /* ldb   */ n_size,
-          /* ldc   */ N);
+      const int8_t* __restrict__ B0 = packed_w1 + nb_upper * BLOCK_N * stride_n;
+      const int8_t* __restrict__ B1 = packed_w1 + nb_lower * BLOCK_N * stride_n;
+      const float* __restrict__ Bs0 = w1s + nb_upper * BLOCK_N;
+      const float* __restrict__ Bs1 = w1s + nb_lower * BLOCK_N;
+
+      if (use_brgemm) {
+        // 1.b gemm: C0 = A @ B0
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B0,
+            /* C     */ C0);
+
+        // 1.c gemm: C1 = A @ B1
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B1,
+            /* C     */ C1);
+
+        const int32_t* Bcomp0 = reinterpret_cast<const int32_t*>(B0 + block_size_n() * K);
+        const int32_t* Bcomp1 = reinterpret_cast<const int32_t*>(B1 + block_size_n() * K);
+
+        // 1.d silu and mul
+        silu_and_mul<scalar_t, BLOCK_N>(
+            ic1 + mb * BLOCK_M * N + nb * BLOCK_N, C0, C1, As, Bs0, Bs1, Bcomp0, Bcomp1, m_size, N);
+      } else {
+        // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
+        tinygemm_kernel(
+            /* A     */ A,
+            /* B0    */ B0,
+            /* B1    */ B1,
+            /* C     */ ic1 + mb * BLOCK_M * N + nb * BLOCK_N,
+            /* As    */ As,
+            /* Bs0   */ Bs0,
+            /* Bs1   */ Bs1,
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ K,
+            /* lda   */ K,
+            /* ldb   */ n_size,
+            /* ldc   */ N);
+      }
+    });
+
+    if (use_brgemm) {
+      at::native::cpublas::brgemm_release();
     }
   });
 
   // stage 1.5: quantize ic1 to uint8, [M * topk, N]
   at::parallel_for(0, M, 0, [&](int64_t begin, int64_t end) {
     for (int64_t m = begin; m < end; ++m) {
-      quantize_row_int8<scalar_t>(
-          Aq_tmp + m * N,
-          As_tmp[m],
-          ic1 + m * N,
-          N);
+      quantize_row_int8<scalar_t>(Aq_tmp + m * N, As_tmp[m], ic1 + m * N, N);
     }
   });
 
@@ -710,16 +885,13 @@ void shared_expert_int8_kernel_impl(
   const int64_t stride_oc = packed_N;
 
   // parallel on [MB2, NB2]
-  at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+  parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
     // get local pointers
-    int tid = at::get_thread_num();
-    // we won't be using C1 for gemm2
+    int tid = get_thread_num();
     float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+    int32_t* __restrict__ C32 = reinterpret_cast<int32_t*>(C + BLOCK_M * BLOCK_N);
 
-    for (int64_t i = begin; i < end; ++i) {
-      int64_t mb = i / NB2;
-      int64_t nb = i % NB2;
-
+    loop_2d<int8_t>(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
       int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
       int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
 
@@ -731,39 +903,71 @@ void shared_expert_int8_kernel_impl(
       const int8_t* __restrict__ B = packed_w2 + nb * BLOCK_N * stride_oc;
       const float* __restrict__ Bs = w2s + nb * BLOCK_N;
 
-      // 2.a gemm: C = A @ B
-      tinygemm_kernel<scalar_t>(
-          /* A     */ A,
-          /* B     */ B,
-          /* C     */ C,
-          /* As    */ As,
-          /* Bs    */ Bs,
-          /* M     */ m_size,
-          /* N     */ n_size,
-          /* K     */ IC,
-          /* lda   */ IC,
-          /* ldb   */ n_size,
-          /* ldc   */ BLOCK_N);
+      if (use_brgemm) {
+        at::native::cpublas::brgemm(
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ IC,
+            /* lda   */ IC,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N,
+            /* add_C */ false,
+            /* A     */ A,
+            /* B     */ B,
+            /* C     */ C32);
+
+        // apply scales
+        const int32_t* Bcomp = reinterpret_cast<const int32_t*>(B + block_size_n() * IC);
+        scale_C<BLOCK_N>(C, C32, As, Bs, Bcomp, m_size);
+      } else {
+        // 2.a gemm: C = A @ B
+        tinygemm_kernel<scalar_t>(
+            /* A     */ A,
+            /* B     */ B,
+            /* C     */ C,
+            /* As    */ As,
+            /* Bs    */ Bs,
+            /* M     */ m_size,
+            /* N     */ n_size,
+            /* K     */ IC,
+            /* lda   */ IC,
+            /* ldb   */ n_size,
+            /* ldc   */ BLOCK_N);
+      }
 
       // 2.b copy from C to output and add fused_experts_out
       scalar_t* __restrict__ out = output + mb * BLOCK_M * K + nb * BLOCK_N;
-      const scalar_t* __restrict__ fused_out = fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N;
+      const scalar_t* __restrict__ fused_out =
+          apply_scaling_factor ? fused_experts_out + mb * BLOCK_M * K + nb * BLOCK_N : nullptr;
       for (int64_t m = 0; m < m_size; ++m) {
-        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
+        const scalar_t* __restrict__ fused_out_row = apply_scaling_factor ? (fused_out + m * K) : nullptr;
+        add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out_row, routed_scaling_factor, n_size);
       }
+    });
+
+    if (use_brgemm) {
+      at::native::cpublas::brgemm_release();
     }
   });
 }
 
-#define INSTANTIATE_SHARED_EXPERT_INT8_TEMPLATE(TYPE)                                        \
-  template void shared_expert_int8_kernel_impl<TYPE> (                                       \
-      TYPE* __restrict__ output, TYPE* __restrict__ ic1,                                     \
-      float* __restrict__ C_tmp, uint8_t* __restrict__ Aq_tmp,                               \
-      float* __restrict__ As_tmp, const TYPE* __restrict__ input,                            \
-      const int8_t* __restrict__ packed_w1, const int8_t* __restrict__ packed_w2,            \
-      const float* __restrict__ w1s, const float* __restrict__ w2s,                          \
-      const TYPE* __restrict__ fused_experts_out, float routed_scaling_factor,               \
-      int64_t M, int64_t N, int64_t K)
+#define INSTANTIATE_SHARED_EXPERT_INT8_TEMPLATE(TYPE) \
+  template void shared_expert_int8_kernel_impl<TYPE>( \
+      TYPE* __restrict__ output,                      \
+      TYPE* __restrict__ ic1,                         \
+      float* __restrict__ C_tmp,                      \
+      uint8_t* __restrict__ Aq_tmp,                   \
+      float* __restrict__ As_tmp,                     \
+      const TYPE* __restrict__ input,                 \
+      const int8_t* __restrict__ packed_w1,           \
+      const int8_t* __restrict__ packed_w2,           \
+      const float* __restrict__ w1s,                  \
+      const float* __restrict__ w2s,                  \
+      const TYPE* __restrict__ fused_experts_out,     \
+      float routed_scaling_factor,                    \
+      int64_t M,                                      \
+      int64_t N,                                      \
+      int64_t K)
 
 INSTANTIATE_SHARED_EXPERT_INT8_TEMPLATE(at::BFloat16);
 INSTANTIATE_SHARED_EXPERT_INT8_TEMPLATE(at::Half);
diff --git a/csrc/cpu/sgl-kernels/vec.h b/csrc/cpu/sgl-kernels/vec.h
index 160845c9b1cb..52b5ff7bedb1 100644
--- a/csrc/cpu/sgl-kernels/vec.h
+++ b/csrc/cpu/sgl-kernels/vec.h
@@ -1,53 +1,80 @@
 // Adapted from
 // https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
 
-#pragma once
-
 // clang-format off
 
+#pragma once
+
 #if defined(__AVX512F__) && defined(__AVX512BF16__) && defined(__AMX_BF16__)
 #define CPU_CAPABILITY_AVX512
 #endif
 
 #include <ATen/cpu/vec/functional.h>
 #include <ATen/cpu/vec/vec.h>
-
+#include <immintrin.h>
 namespace {
 
 using namespace at::vec;
 
-template <typename scalar_t,
-          typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 0>
+template <typename scalar_t, typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 0>
 inline Vectorized<scalar_t> convert_from_float_ext(const Vectorized<float>& a, const Vectorized<float>& b) {
   return at::vec::convert_from_float<scalar_t>(a, b);
 }
 
+template <typename scalar_t>
+inline void convert_from_float_and_store(scalar_t* out, const Vectorized<float>& a) {
+  float out_buffer[at::vec::Vectorized<float>::size()];
+  a.store(out_buffer);
+  for (int i = 0; i < 16; i++) {
+    out[i] = (scalar_t)out_buffer[i];
+  }
+}
+
+// allow f16, bf16
+template <typename scalar_t, typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 1>
+inline std::tuple<Vectorized<float>, Vectorized<float>> load_float_vec2(const scalar_t* __restrict__ data) {
+  using bVec = at::vec::Vectorized<scalar_t>;
+  using fVec = at::vec::Vectorized<float>;
+  bVec x_vec = bVec::loadu(data);
+  fVec x0, x1;
+  std::tie(x0, x1) = at::vec::convert_to_float(x_vec);
+  return std::make_tuple(x0, x1);
+}
+
+// allow  f32
+inline std::tuple<Vectorized<float>, Vectorized<float>> load_float_vec2(const float* __restrict__ data) {
+  using fVec = at::vec::Vectorized<float>;
+  fVec x0 = fVec::loadu(data);
+  fVec x1 = fVec::loadu(data + fVec::size());
+  return std::make_tuple(x0, x1);
+}
+
 #if defined(CPU_CAPABILITY_AVX512)
 
 // `at::vec::convert_from_float<>` from PyTorch doesn't have avx512-bf16 intrinsics
 // use native instruction for bfloat16->float32 conversion
 template <>
-inline Vectorized<at::BFloat16> convert_from_float_ext<at::BFloat16>(const Vectorized<float>& a, const Vectorized<float>& b) {
+inline Vectorized<at::BFloat16>
+convert_from_float_ext<at::BFloat16>(const Vectorized<float>& a, const Vectorized<float>& b) {
   return (__m512i)(_mm512_cvtne2ps_pbh(__m512(b), __m512(a)));
 }
 
-#define CVT_BF16_TO_FP32(a) \
-    _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16))
+template <>
+inline void convert_from_float_and_store<at::BFloat16>(at::BFloat16* out, const Vectorized<float>& a) {
+  _mm256_storeu_si256((__m256i*)out, (__m256i)(_mm512_cvtneps_pbh(__m512(a))));
+}
+
+#define CVT_BF16_TO_FP32(a) _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16))
 
-#define CVT_FP16_TO_FP32(a) \
-    _mm512_cvtps_ph(a, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))
+#define CVT_FP16_TO_FP32(a) _mm512_cvtph_ps(a)
 
 // this doesn't handle NaN.
 inline __m512bh cvt_e4m3_bf16_intrinsic_no_nan(__m256i fp8_vec) {
   const __m512i x = _mm512_cvtepu8_epi16(fp8_vec);
-
-  const __m512i mant = _mm512_slli_epi16(_mm512_and_si512(x, _mm512_set1_epi16(0x07)), 4);
-  const __m512i raw_exp = _mm512_srli_epi16(_mm512_and_si512(x, _mm512_set1_epi16(0x78)), 3);
-  const __m512i exp = _mm512_slli_epi16(_mm512_add_epi16(raw_exp, _mm512_set1_epi16(120)), 7);
-  const __m512i nonsign = _mm512_or_si512(exp, mant);
-
-  const __m512i sign = _mm512_slli_epi16(_mm512_and_si512(x, _mm512_set1_epi16(0x80)), 8);
-  const __m512i combined = _mm512_or_si512(nonsign, sign);
+  __m512i combined = _mm512_add_epi16(x, _mm512_set1_epi16(0x0780));
+  combined = _mm512_slli_epi16(combined, 4);
+  combined = _mm512_and_si512(combined, _mm512_set1_epi16(0x87f0));
+  combined = _mm512_add_epi16(combined, _mm512_set1_epi16(0x3c00));
 
   const __mmask32 is_nonzero = _mm512_cmpneq_epi16_mask(x, _mm512_setzero_si512());
   return (__m512bh)_mm512_maskz_mov_epi16(is_nonzero, combined);
@@ -112,11 +139,77 @@ inline __m512bh CVT_FP8_TO_BF16(__m256i a) {
   return cvt_e4m3_bf16_intrinsic_with_denorm(a);
 #endif
 }
+// faster version of float8_e4m3fn conversion to bfloat16
+//
+// we mapped cuda implementation from below link and vectorized with avx512:
+// https://github.com/thu-pacman/chitu/blob/1ed2078ec26581ebdca05b7306d4385f86edaa7c/csrc/cuda/marlin/marlin_gemm/dequant.h#L387
+//
+inline __attribute__((always_inline)) __m512bh CVT_FP8_TO_BF16_EXT(__m256i a) {
+  const __m512i mask0 = _mm512_set1_epi16(0x80);  // sign bit
+  const __m512i mask1 = _mm512_set1_epi16(0x7F);  // exponent and mantissa
+  const __m512i mask2 = _mm512_set1_epi16(0x4000);
+
+  __m512i x = _mm512_cvtepu8_epi16(a);
+  __m512i vsign = _mm512_and_si512(x, mask0);
+  vsign = _mm512_slli_epi16(vsign, 8);
+
+  __m512i vexp_and_mant = _mm512_and_si512(x, mask1);
+  vexp_and_mant = _mm512_slli_epi16(vexp_and_mant, 4);
+
+  // _MM_TERNLOG_A | _MM_TERNLOG_B | _MM_TERNLOG_C: 0b11111110
+  return (__m512bh)(_mm512_ternarylogic_epi32(vsign, mask2, vexp_and_mant, 0b11111110));
+}
+
+// bias for conversion of fp8 to bf16 1/256 in float32
+#define kFP8_BIAS 0x3b800000
+
+// remove warning: ignoring attributes on template argument ‘__m512bh’ [-Wignored-attributes]
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+
+#define MXFP4_VALUES \
+  -6.0f, -4.0f, -3.0f, -2.0f, -1.5f, -1.0f, -0.5f, -0.0f, 6.0f, 4.0f, 3.0f, 2.0f, 1.5f, 1.0f, 0.5f, 0.0f
+
+// convert 64 mxfp4 to 2x bf16 vectors, expect input 32-way packing
+inline std::tuple<__m512bh, __m512bh> cvt_mxfp4_e2m1_bf16_intrinsic_lut(__m256i a, __m512i s0, __m512i s1) {
+  // LUT
+  const __m512 values = _mm512_set_ps(MXFP4_VALUES);
+  const __m512i lut = (__m512i)(_mm512_cvtne2ps_pbh(values, values));
+
+  const __m512i abs_mask = _mm512_set1_epi16(0x7FFF);
+  const __m512i zero = _mm512_setzero_si512();
+
+  // expand values to 16-bit integers
+  __m512i x0 = _mm512_cvtepu8_epi16(a);
+  __m512i x1 = _mm512_srli_epi32(x0, 4);
+
+  // LUT to convert mxfp4 values to bf16
+  x0 = _mm512_permutexvar_epi16(x0, lut);
+  x1 = _mm512_permutexvar_epi16(x1, lut);
+
+  // check for zeros
+  __mmask32 mask0 = _mm512_cmp_epi16_mask(_mm512_and_si512(x0, abs_mask), zero, _MM_CMPINT_EQ);
+  __mmask32 mask1 = _mm512_cmp_epi16_mask(_mm512_and_si512(x1, abs_mask), zero, _MM_CMPINT_EQ);
+
+  // emulate bf16 mul with scale factor
+  x0 = _mm512_add_epi16(x0, s0);
+  x1 = _mm512_add_epi16(x1, s1);
+
+  // blend with zero
+  x0 = _mm512_mask_blend_epi16(mask0, x0, zero);
+  x1 = _mm512_mask_blend_epi16(mask1, x1, zero);
+
+  return std::make_tuple(__m512bh(x0), __m512bh(x1));
+}
+
+#define CVT_MXFP4_TO_BF16(a, s0, s1) cvt_mxfp4_e2m1_bf16_intrinsic_lut(a, s0, s1)
+
+#pragma GCC diagnostic pop
 
 #endif
 
 // vector to scalar reduction
-#if defined(CPU_CAPABILITY_AVX512) && 0
+#if defined(CPU_CAPABILITY_AVX512)
 inline float vec_reduce_sum(const Vectorized<float>& a) {
   return _mm512_reduce_add_ps(__m512(a));
 }
@@ -136,10 +229,9 @@ inline float vec_reduce_max(const Vectorized<float>& a) {
 
 // https://github.com/InternLM/lmdeploy/blob/086481ed84b59bee3b8e4274e5fc69620040c048/lmdeploy/pytorch/kernels/cuda/w8a8_triton_kernels.py#L282
 template <typename scalar_t>
-inline void quantize_row_int8(uint8_t* __restrict__ Aq, float& As,
-    const scalar_t* __restrict__ A, int64_t K, float eps = 1e-7) {
-
-  float amax = 0.f; // absolute max
+inline void
+quantize_row_int8(uint8_t* __restrict__ Aq, float& As, const scalar_t* __restrict__ A, int64_t K, float eps = 1e-7) {
+  float amax = 0.f;  // absolute max
   for (int64_t k = 0; k < K; ++k) {
     const float val = static_cast<float>(A[k]);
     amax = std::max(amax, std::abs(val));
@@ -158,9 +250,8 @@ inline void quantize_row_int8(uint8_t* __restrict__ Aq, float& As,
 
 #if defined(CPU_CAPABILITY_AVX512)
 template <>
-inline void quantize_row_int8<at::BFloat16>(uint8_t* __restrict__ Aq, float& As,
-    const at::BFloat16* __restrict__ A, int64_t K, float eps) {
-
+inline void quantize_row_int8<at::BFloat16>(
+    uint8_t* __restrict__ Aq, float& As, const at::BFloat16* __restrict__ A, int64_t K, float eps) {
   const __m512 signBit = _mm512_set1_ps(-0.0f);
   const __m512i off = _mm512_set1_epi32(128);
 
@@ -200,7 +291,7 @@ inline void quantize_row_int8<at::BFloat16>(uint8_t* __restrict__ Aq, float& As,
 // transpose utils
 // taken from my PR in ggml: https://github.com/ggml-org/llama.cpp/pull/8998
 #if defined(CPU_CAPABILITY_AVX512)
-inline void transpose_16x16_32bit(__m512i * v) {
+inline void transpose_16x16_32bit(__m512i* v) {
   __m512i v1[16];
   v1[0] = _mm512_unpacklo_epi32(v[0], v[1]);
   v1[1] = _mm512_unpackhi_epi32(v[0], v[1]);
@@ -293,16 +384,56 @@ inline std::tuple<__m512i, __m512i> transpose_2x32_16bit(__m512i r0, __m512i r1)
 }
 #pragma GCC diagnostic pop
 
-#endif
-
-// TODO: debug print, remove me later
-template<typename scalar_t>
-void print_array(scalar_t* ptr, int size) {
-  for (int d = 0; d < size; ++d) {
-    if (d % 16 == 0) { std::cout << std::endl; }
-    std::cout << ptr[d] << " ";
-  }
-  std::cout << std::endl;
+inline __attribute__((always_inline)) __m512 _mm512_fexp_u20_ps(const __m512 values) {
+  const __m512 vec_c0 = _mm512_set1_ps(0.00010703434948458272f);
+  const __m512 vec_c1 = _mm512_set1_ps(0.30354260500649682f);
+  const __m512 vec_c2 = _mm512_set1_ps(-0.22433836478672356);
+  const __m512 vec_c3 = _mm512_set1_ps(-0.079204240219773236);
+
+  const __m512 vec_exp_log2ef = _mm512_castsi512_ps(_mm512_set1_epi32(0x3fb8aa3b));  // log2(e)
+
+  const __m512 vec_a = _mm512_set1_ps(std::pow(2, 23) / std::log2(2));
+  const __m512 vec_b = _mm512_set1_ps(std::pow(2, 23) * 127.f);
+
+  const __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
+  const __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
+  __m512i vec_infinity = _mm512_set1_epi32(0x7F800000);
+  __m512i vec_zero = _mm512_setzero_epi32();
+
+  // Fast Exponential Computation on SIMD Architectures
+  // A. Cristiano I. Malossi, Yves Ineichen, Costas Bekas, and Alessandro
+  // Curioni exp(x) = 2**(x * log2(e))
+  //        = 2**xi * 2**xf   - TIPS we are using  the EEEE floating point
+  //        representation with identification to the exponent and the
+  //        mentissa
+  //  2**xf will be approximated to a polynomial of degree 3 computed with
+  //  Horner method
+  // mask for the boundary condition
+  auto min_mask = _mm512_cmp_ps_mask(values, vec_ln_flt_min, _CMP_LT_OS);
+  auto max_mask = _mm512_cmp_ps_mask(values, vec_ln_flt_max, _CMP_GT_OS);
+
+  // transformation with log2(e)
+  auto vec_src = _mm512_mul_ps(values, vec_exp_log2ef);
+  auto vec_fractional = _mm512_sub_ps(vec_src, _mm512_floor_ps(vec_src));
+
+  // compute polynomial using Horner Scheme, for superscalar processor
+  auto vec_res = _mm512_fmadd_ps(vec_fractional, vec_c3, vec_c2);
+  vec_res = _mm512_fmadd_ps(vec_fractional, vec_res, vec_c1);
+  vec_res = _mm512_fmadd_ps(vec_fractional, vec_res, vec_c0);
+
+  vec_src = _mm512_sub_ps(vec_src, vec_res);
+  // the tips is here, headache in perspective
+  auto tmp = _mm512_fmadd_ps(vec_a, vec_src, vec_b);
+  // headache bis - we loose precision with the cast but it "fits", but ok
+  // after f32 -> f16 later
+  __m512i casted_integer = _mm512_cvttps_epi32(tmp);
+  // boundary condition, lower than the min -> 0
+  casted_integer = _mm512_mask_mov_epi32(casted_integer, min_mask, vec_zero);
+  // boundary condition, larger than the max -> +oo
+  casted_integer = _mm512_mask_mov_epi32(casted_integer, max_mask, vec_infinity);
+  // final interpretation to float
+  return _mm512_castsi512_ps(casted_integer);
 }
+#endif
 
-} // anonymous namespace
+}  // anonymous namespace
diff --git a/csrc/cpu/sgl-kernels/vec_pack.h b/csrc/cpu/sgl-kernels/vec_pack.h
new file mode 100644
index 000000000000..d245d2a40743
--- /dev/null
+++ b/csrc/cpu/sgl-kernels/vec_pack.h
@@ -0,0 +1,299 @@
+// Adapted from
+// https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc/cpu
+
+// clang-format off
+
+// To use the transpose functions
+#include <ATen/native/cpu/utils.h>
+
+#include "vec.h"
+
+namespace {
+
+using namespace at::vec;
+
+template <typename index_t>
+inline index_t get_index(index_t* ind, int i) {
+  return (ind == nullptr) ? (index_t)i : ind[i];
+}
+
+#if defined(CPU_CAPABILITY_AVX512)
+// key: from [N, 32] to [32/2, N, 2]
+template <typename scalar_t, typename index_t>
+inline void pack_vnni_Nx32(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int N,
+    int ld_src,
+    int ld_dst) {
+  __m512i vinputs[16];
+
+  int n = 0;
+  for (; n < N; ++n) {
+    index_t index = get_index(ind, n);
+    vinputs[n] = _mm512_loadu_si512(src + index * ld_src);
+  }
+  // padding with zero to avoid uninitialized vectors
+  for (; n < 16; ++n) {
+    vinputs[n] = _mm512_set1_epi32(0);
+  }
+
+  // pack key
+  transpose_16x16_32bit(vinputs);
+
+  const __mmask16 vmask = (1 << N) - 1;
+  for (int k = 0; k < 16; ++k) {
+    _mm512_mask_storeu_epi32(dst + k * ld_dst * 2, vmask, vinputs[k]);
+  }
+}
+
+template <typename scalar_t, typename index_t>
+inline void pack_vnni_N_remainder(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int N,
+    int K,
+    int ld_src,
+    int ld_dst) {
+  __m512i vinputs[16];
+
+  int K2 = K >> 1;
+  const __mmask16 vmask = (1 << K2) - 1;
+
+  int n = 0;
+  for (; n < N; ++n) {
+    index_t index = get_index(ind, n);
+    vinputs[n] = _mm512_maskz_loadu_epi32(vmask, src + index * ld_src);
+  }
+  // padding with zero to avoid uninitialized vectors
+  for (; n < 16; ++n) {
+    vinputs[n] = _mm512_set1_epi32(0);
+  }
+
+  // pack key
+  transpose_16x16_32bit(vinputs);
+
+  const __mmask16 vmask2 = (1 << N) - 1;
+  for (int k = 0; k < K2; ++k) {
+    _mm512_mask_storeu_epi32(dst + k * ld_dst * 2, vmask2, vinputs[k]);
+  }
+}
+
+// value: from [K, 32] to [K/2, 32, 2]
+template <typename scalar_t, typename index_t>
+inline void pack_vnni_Kx32(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int K,
+    int ld_src,
+    int ld_dst) {
+  __m512i vinputs[2];
+
+  int k = 0;
+  for (; k < K; ++k) {
+    index_t index = get_index(ind, k);
+    vinputs[k] = _mm512_loadu_si512(src + index * ld_src);
+  }
+  // padding with zero to avoid uninitialized vectors
+  for (; k < 2; ++k) {
+    vinputs[k] = _mm512_set1_epi32(0);
+  }
+
+  // pack value
+  __m512i d0, d1;
+  std::tie(d0, d1) = transpose_2x32_16bit(vinputs[0], vinputs[1]);
+  _mm512_storeu_si512(dst + 0 * ld_dst * 2, d0);
+  _mm512_storeu_si512(dst + 0 * ld_dst * 2 + 32, d1);
+}
+
+template <typename scalar_t, typename index_t>
+inline void pack_vnni_K_remainder(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int K,
+    int N,
+    int ld_src,
+    int ld_dst) {
+  __m512i vinputs[2];
+
+  const __mmask32 vmask = (1 << N) - 1;
+
+  int k = 0;
+  for (; k < K; ++k) {
+    index_t index = get_index(ind, k);
+    vinputs[k] = _mm512_maskz_loadu_epi16(vmask, src + index * ld_src);
+  }
+  // padding with zero to avoid uninitialized vectors
+  for (; k < 2; ++k) {
+    vinputs[k] = _mm512_set1_epi32(0);
+  }
+
+  // pack value
+  __m512i d0, d1;
+  std::tie(d0, d1) = transpose_2x32_16bit(vinputs[0], vinputs[1]);
+
+  if (N <= 16) {
+    // 2N * 16bits: N * 32bits
+    const __mmask16 vmask2 = (1 << N) - 1;
+    _mm512_mask_storeu_epi32(dst + 0 * ld_dst * 2, vmask2, d0);
+  } else {
+    // 2(N-16) * 16bits: (N-16) * 32bits
+    const __mmask16 vmask2 = (1 << (N - 16)) - 1;
+    _mm512_storeu_epi32(dst + 0 * ld_dst * 2, d0);
+    _mm512_mask_storeu_epi32(dst + 0 * ld_dst * 2 + 32, vmask2, d1);
+  }
+}
+#endif
+
+// convert to vnni format
+// from [N, K/2, 2] to [K/2, N, 2] for bfloat16 and float16
+template <typename scalar_t, typename index_t, bool is_indexed>
+void pack_vnni(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int N,
+    int K,
+    int ld_src,
+    int ld_dst) {
+#if defined(CPU_CAPABILITY_AVX512)
+  const int NB = div_up(N, 16);
+  const int KB = K / 32;
+  const int K_remainder = K - KB * 32;
+
+  for (int nb = 0; nb < NB; ++nb) {
+    int nb_size = std::min(N - nb * 16, 16);
+    for (int kb = 0; kb < KB; ++kb) {
+      // handle 16x512bits each block
+      pack_vnni_Nx32<scalar_t, index_t>(
+          /*    dst */ dst + ((kb * 32) >> 1) * ld_dst * 2 + nb * 16 * 2,
+          /*    src */ src + kb * 32 + (is_indexed ? 0 : nb * 16 * ld_src),
+          /*    ind */ is_indexed ? ind + nb * 16 : nullptr,
+          /*      N */ nb_size,
+          /* ld_src */ ld_src,
+          /* ld_dst */ ld_dst);
+    }
+    if (K_remainder > 0) {
+      pack_vnni_N_remainder<scalar_t, index_t>(
+          /*    dst */ dst + ((KB * 32) >> 1) * ld_dst * 2 + nb * 16 * 2,
+          /*    src */ src + KB * 32 + (is_indexed ? 0 : nb * 16 * ld_src),
+          /*    ind */ is_indexed ? ind + nb * 16 : nullptr,
+          /*      N */ nb_size,
+          /*      K */ K_remainder,
+          /* ld_src */ ld_src,
+          /* ld_dst */ ld_dst);
+    }
+  }
+#else
+  for (int n = 0; n < N; ++n) {
+    index_t index = get_index(ind, n);
+    for (int k = 0; k < K / 2; ++k) {
+      for (int d = 0; d < 2; ++d) {
+        dst[k * ld_dst * 2 + n * 2 + d] = src[index * ld_src + k * 2 + d];
+      }
+    }
+  }
+#endif
+}
+
+template <typename scalar_t>
+void pack_vnni(scalar_t* __restrict__ dst, const scalar_t* __restrict__ src, int N, int K, int ld_src, int ld_dst) {
+  pack_vnni<scalar_t, int32_t, false>(dst, src, nullptr, N, K, ld_src, ld_dst);
+}
+
+template <typename scalar_t, typename index_t>
+void pack_vnni(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int N,
+    int K,
+    int ld_src,
+    int ld_dst) {
+  assert(ind != nullptr);
+  pack_vnni<scalar_t, index_t, true>(dst, src, ind, N, K, ld_src, ld_dst);
+}
+
+// convert to vnni format
+// from [K/2, 2, N] to [K/2, N, 2] for bfloat16 and float16
+template <typename scalar_t, typename index_t, bool is_indexed>
+void pack_vnni2(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int K,
+    int N,
+    int ld_src,
+    int ld_dst) {
+#if defined(CPU_CAPABILITY_AVX512)
+  const int KB = div_up(K, 2);
+  const int NB = N / 32;
+  const int N_remainder = N - NB * 32;
+
+  for (int kb = 0; kb < KB; ++kb) {
+    int kb_size = std::min(K - kb * 2, 2);
+    for (int nb = 0; nb < NB; ++nb) {
+      // handle 2x512bits each block
+      pack_vnni_Kx32<scalar_t, index_t>(
+          /*    dst */ dst + ((kb * 2) >> 1) * ld_dst * 2 + nb * 32 * 2,
+          /*    src */ src + (is_indexed ? 0 : kb * 2 * ld_src) + nb * 32,
+          /*    ind */ is_indexed ? ind + kb * 2 : nullptr,
+          /*      K */ kb_size,
+          /* ld_src */ ld_src,
+          /* ld_dst */ ld_dst);
+    }
+    if (N_remainder > 0) {
+      pack_vnni_K_remainder(
+          /*    dst */ dst + ((kb * 2) >> 1) * ld_dst * 2 + NB * 32 * 2,
+          /*    src */ src + (is_indexed ? 0 : kb * 2 * ld_src) + NB * 32,
+          /*    ind */ is_indexed ? ind + kb * 2 : nullptr,
+          /*      K */ kb_size,
+          /*      N */ N_remainder,
+          /* ld_src */ ld_src,
+          /* ld_dst */ ld_dst);
+    }
+  }
+#else
+  int k = 0;
+  for (; k < (K >> 1) * 2; k += 2) {
+    index_t index0 = get_index(ind, k + 0);
+    index_t index1 = get_index(ind, k + 1);
+    for (int n = 0; n < N; ++n) {
+      dst[(k >> 1) * ld_dst * 2 + n * 2 + 0] = src[index0 * ld_src + n];
+      dst[(k >> 1) * ld_dst * 2 + n * 2 + 1] = src[index1 * ld_src + n];
+    }
+  }
+  if (K % 2 != 0) {
+    index_t index = get_index(ind, K - 1);
+    for (int n = 0; n < N; ++n) {
+      dst[(K >> 1) * ld_dst * 2 + n * 2 + 0] = src[index * ld_src + n];
+      dst[(K >> 1) * ld_dst * 2 + n * 2 + 1] = 0;
+    }
+    k += 2;
+  }
+#endif
+}
+
+template <typename scalar_t>
+void pack_vnni2(scalar_t* __restrict__ dst, const scalar_t* __restrict__ src, int K, int N, int ld_src, int ld_dst) {
+  pack_vnni2<scalar_t, int32_t, false>(dst, src, nullptr, K, N, ld_src, ld_dst);
+}
+
+template <typename scalar_t, typename index_t>
+void pack_vnni2(
+    scalar_t* __restrict__ dst,
+    const scalar_t* __restrict__ src,
+    const index_t* __restrict__ ind,
+    int K,
+    int N,
+    int ld_src,
+    int ld_dst) {
+  assert(ind != nullptr);
+  pack_vnni2<scalar_t, index_t, true>(dst, src, ind, K, N, ld_src, ld_dst);
+}
+
+}  // anonymous namespace
diff --git a/csrc/cpu/spec_decode_utils.cpp b/csrc/cpu/spec_decode_utils.cpp
new file mode 100644
index 000000000000..a76b8bc69376
--- /dev/null
+++ b/csrc/cpu/spec_decode_utils.cpp
@@ -0,0 +1,409 @@
+#include "cpu_types.hpp"
+
+#include <algorithm>
+
+namespace cpu_utils {
+
+void eagle_prepare_inputs_padded_kernel_impl(
+    const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& valid_sampled_tokens_count,
+    const torch::Tensor& query_start_loc_gpu,
+    torch::Tensor& token_indices_to_sample,
+    torch::Tensor& num_rejected_tokens_gpu, const int64_t num_reqs) {
+  const int64_t* cu_draft_ptr = cu_num_draft_tokens.data_ptr<int64_t>();
+  const int64_t* valid_count_ptr =
+      valid_sampled_tokens_count.data_ptr<int64_t>();
+  const int32_t* query_loc_ptr = query_start_loc_gpu.data_ptr<int32_t>();
+  int32_t* indices_out_ptr = token_indices_to_sample.data_ptr<int32_t>();
+  int64_t* rejected_out_ptr = num_rejected_tokens_gpu.data_ptr<int64_t>();
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < num_reqs; ++req_idx) {
+    int64_t start_idx = req_idx == 0 ? 0 : cu_draft_ptr[req_idx - 1];
+    int64_t num_draft_tokens = cu_draft_ptr[req_idx] - start_idx;
+    int64_t num_valid_tokens = valid_count_ptr[req_idx];
+
+    int64_t num_rejected = 0;
+    if (num_draft_tokens > 0) {
+      num_rejected = num_draft_tokens + 1 - num_valid_tokens;
+    }
+
+    int32_t q_last_tok_idx = query_loc_ptr[req_idx + 1] - 1;
+    int32_t index_to_sample = q_last_tok_idx - num_rejected;
+
+    indices_out_ptr[req_idx] = index_to_sample;
+    rejected_out_ptr[req_idx] = num_rejected;
+  }
+}
+
+void eagle_prepare_next_token_padded_kernel_impl(
+    const torch::Tensor& sampled_token_ids,
+    const torch::Tensor& discard_request_mask,
+    const torch::Tensor& backup_next_token_ids, torch::Tensor& next_token_ids,
+    torch::Tensor& valid_sampled_tokens_count, const int64_t vocab_size,
+    const int64_t num_sampled_tokens_per_req, const int64_t num_reqs) {
+  const int64_t* sampled_ids_ptr = sampled_token_ids.data_ptr<int64_t>();
+  const bool* discard_mask_ptr = discard_request_mask.data_ptr<bool>();
+  const int64_t* backup_ids_ptr = backup_next_token_ids.data_ptr<int64_t>();
+  int64_t* next_ids_out_ptr = next_token_ids.data_ptr<int64_t>();
+  int64_t* valid_count_out_ptr = valid_sampled_tokens_count.data_ptr<int64_t>();
+
+  const int64_t stride = sampled_token_ids.stride(0);
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < num_reqs; ++req_idx) {
+    const int64_t* row_ptr = sampled_ids_ptr + req_idx * stride;
+    int64_t valid_count = 0;
+    int64_t last_valid_token = -1;
+
+    for (int64_t pos = 0; pos < num_sampled_tokens_per_req; ++pos) {
+      int64_t token = row_ptr[pos];
+      if (token != -1 && token < vocab_size) {
+        valid_count++;
+        last_valid_token = token;
+      }
+    }
+
+    bool discard = discard_mask_ptr[req_idx];
+    if (discard) {
+      next_ids_out_ptr[req_idx] = backup_ids_ptr[req_idx];
+      valid_count_out_ptr[req_idx] = 0;
+    } else {
+      next_ids_out_ptr[req_idx] =
+          (valid_count > 0) ? last_valid_token : backup_ids_ptr[req_idx];
+      valid_count_out_ptr[req_idx] = valid_count;
+    }
+  }
+}
+
+void eagle_step_slot_mapping_metadata_kernel_impl(
+    const torch::Tensor& positions, const torch::Tensor& block_table,
+    torch::Tensor& seq_lens, torch::Tensor& out_clamped_positions,
+    torch::Tensor& out_slot_mapping, const int64_t block_size,
+    const int64_t max_model_len, const int64_t PAD_ID) {
+  const int64_t batch_size = positions.size(0);
+  const int64_t input_batch_size = out_slot_mapping.size(0);
+
+  const int64_t* pos_ptr = positions.data_ptr<int64_t>();
+  const int32_t* bt_ptr = block_table.data_ptr<int32_t>();
+  int32_t* seq_lens_ptr = seq_lens.data_ptr<int32_t>();
+  int64_t* out_clamped_ptr = out_clamped_positions.data_ptr<int64_t>();
+  int64_t* out_slot_ptr = out_slot_mapping.data_ptr<int64_t>();
+
+  const int64_t bt_stride = block_table.stride(0);
+  const int64_t n_blocks_per_req = block_table.size(1);
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < input_batch_size; ++req_idx) {
+    if (req_idx >= batch_size) {
+      out_slot_ptr[req_idx] = PAD_ID;
+      continue;
+    }
+
+    int64_t position = pos_ptr[req_idx];
+    int64_t new_position = position + 1;
+    bool exceeds_max = new_position >= max_model_len;
+    int64_t clamped_position = exceeds_max ? 0 : new_position;
+
+    out_clamped_ptr[req_idx] = clamped_position;
+
+    int64_t block_number = clamped_position / block_size;
+    block_number = std::min(block_number, n_blocks_per_req - 1);
+    int32_t block_id = bt_ptr[req_idx * bt_stride + block_number];
+    int64_t slot_id = block_id * block_size + (clamped_position % block_size);
+    out_slot_ptr[req_idx] = exceeds_max ? PAD_ID : slot_id;
+
+    int32_t seq_len = seq_lens_ptr[req_idx];
+    int32_t new_seq_len = exceeds_max ? 1 : (seq_len + 1);
+    new_seq_len = std::min(new_seq_len, static_cast<int32_t>(max_model_len));
+    seq_lens_ptr[req_idx] = new_seq_len;
+  }
+}
+
+void copy_and_expand_eagle_inputs_kernel_impl(
+    const torch::Tensor& target_token_ids,
+    const torch::Tensor& target_positions, const torch::Tensor& next_token_ids,
+    torch::Tensor& out_input_ids, torch::Tensor& out_positions,
+    torch::Tensor& out_is_rejected_token_mask,
+    torch::Tensor& out_is_masked_token_mask,
+    torch::Tensor& out_new_token_indices,
+    torch::Tensor& out_hidden_state_mapping,
+    const torch::Tensor& query_start_loc, const torch::Tensor& query_end_loc,
+    const int64_t padding_token_id, const int64_t parallel_drafting_token_id,
+    const int64_t total_input_tokens,
+    const int64_t num_padding_slots_per_request, const bool shift_input_ids) {
+  const int64_t num_reqs = query_end_loc.size(0);
+
+  const int64_t* target_ids_ptr = target_token_ids.data_ptr<int64_t>();
+  const int64_t* target_pos_ptr = target_positions.data_ptr<int64_t>();
+  const int64_t* next_ids_ptr = next_token_ids.data_ptr<int64_t>();
+  const int32_t* query_start_ptr = query_start_loc.data_ptr<int32_t>();
+  const int32_t* query_end_ptr = query_end_loc.data_ptr<int32_t>();
+
+  int64_t* out_ids_ptr = out_input_ids.data_ptr<int64_t>();
+  int64_t* out_pos_ptr = out_positions.data_ptr<int64_t>();
+  bool* out_rej_mask_ptr = out_is_rejected_token_mask.data_ptr<bool>();
+  bool* out_mask_ptr = out_is_masked_token_mask.data_ptr<bool>();
+  int32_t* out_new_idx_ptr = out_new_token_indices.data_ptr<int32_t>();
+  int32_t* out_hidden_map_ptr = out_hidden_state_mapping.data_ptr<int32_t>();
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < num_reqs; ++req_idx) {
+    int32_t q_start = query_start_ptr[req_idx];
+    int32_t next_q_start = query_start_ptr[req_idx + 1];
+    int32_t q_end = query_end_ptr[req_idx];
+
+    int64_t num_valid_tokens =
+        shift_input_ids ? (q_end - q_start) : (q_end - q_start + 1);
+    int64_t input_offset = shift_input_ids ? 1 : 0;
+
+    int64_t out_start = q_start + req_idx * (num_padding_slots_per_request -
+                                             (shift_input_ids ? 1 : 0));
+    int64_t num_rejected = next_q_start - q_end - 1;
+    int64_t total_output_tokens =
+        num_valid_tokens + num_padding_slots_per_request + num_rejected;
+
+    int64_t start_pos = target_pos_ptr[q_start];
+    int64_t bonus_token = next_ids_ptr[req_idx];
+
+    for (int64_t j = 0; j < total_output_tokens; ++j) {
+      int64_t out_idx = out_start + j;
+      bool is_valid = j < num_valid_tokens;
+      bool is_bonus = j == num_valid_tokens;
+      bool is_parallel = (j > num_valid_tokens) &&
+                         (j < num_valid_tokens + num_padding_slots_per_request);
+      bool is_rejected = j >= num_valid_tokens + num_padding_slots_per_request;
+
+      int64_t in_idx =
+          std::min(static_cast<int64_t>(q_start + input_offset + j),
+                   total_input_tokens - 1);
+
+      int64_t token_id = padding_token_id;
+      if (is_valid)
+        token_id = target_ids_ptr[in_idx];
+      else if (is_bonus)
+        token_id = bonus_token;
+      else if (is_parallel)
+        token_id = parallel_drafting_token_id;
+
+      out_ids_ptr[out_idx] = token_id;
+      out_pos_ptr[out_idx] = is_rejected ? 0 : (start_pos + j);
+      out_rej_mask_ptr[out_idx] = is_rejected;
+      out_mask_ptr[out_idx] = is_parallel;
+
+      if (is_bonus || is_parallel) {
+        int64_t new_token_local_idx = j - num_valid_tokens;
+        int64_t new_token_out_idx =
+            req_idx * num_padding_slots_per_request + new_token_local_idx;
+        out_new_idx_ptr[new_token_out_idx] = out_idx;
+      }
+    }
+
+    if (shift_input_ids) {
+      int64_t n_input = next_q_start - q_start;
+      for (int64_t j = 0; j < n_input; ++j) {
+        out_hidden_map_ptr[q_start + j] = out_start + j;
+      }
+    }
+  }
+}
+
+void rejection_greedy_sample_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids, const torch::Tensor& target_argmax,
+    const torch::Tensor& bonus_token_ids,
+    const std::optional<torch::Tensor>& is_greedy, const int64_t max_spec_len) {
+  const int64_t batch_size = cu_num_draft_tokens.size(0);
+
+  int64_t* out_ptr = output_token_ids.data_ptr<int64_t>();
+  const int64_t* cu_draft_ptr = cu_num_draft_tokens.data_ptr<int64_t>();
+  const int64_t* draft_ids_ptr = draft_token_ids.data_ptr<int64_t>();
+  const int64_t* target_argmax_ptr = target_argmax.data_ptr<int64_t>();
+  const int64_t* bonus_ids_ptr = bonus_token_ids.data_ptr<int64_t>();
+  const bool* greedy_ptr =
+      is_greedy.has_value() ? is_greedy.value().data_ptr<bool>() : nullptr;
+
+  const int64_t out_stride = output_token_ids.stride(0);
+  const int64_t bonus_stride = bonus_token_ids.stride(0);
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < batch_size; ++req_idx) {
+    if (greedy_ptr && !greedy_ptr[req_idx]) continue;
+
+    int64_t start_idx = req_idx == 0 ? 0 : cu_draft_ptr[req_idx - 1];
+    int64_t end_idx = cu_draft_ptr[req_idx];
+    int64_t num_draft_tokens = end_idx - start_idx;
+
+    bool rejected = false;
+    for (int64_t pos = 0; pos < num_draft_tokens; ++pos) {
+      int64_t target_id = target_argmax_ptr[start_idx + pos];
+      out_ptr[req_idx * out_stride + pos] = target_id;
+
+      if (draft_ids_ptr[start_idx + pos] != target_id) {
+        rejected = true;
+        break;
+      }
+    }
+
+    if (!rejected) {
+      out_ptr[req_idx * out_stride + num_draft_tokens] =
+          bonus_ids_ptr[req_idx * bonus_stride];
+    }
+  }
+}
+
+void rejection_random_sample_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids,
+    const std::optional<torch::Tensor>& draft_probs,
+    const torch::Tensor& target_probs, const torch::Tensor& bonus_token_ids,
+    const torch::Tensor& recovered_token_ids,
+    const torch::Tensor& uniform_probs,
+    const std::optional<torch::Tensor>& is_greedy, const int64_t max_spec_len,
+    const int64_t vocab_size, const bool no_draft_probs) {
+  const int64_t batch_size = cu_num_draft_tokens.size(0);
+
+  int64_t* out_ptr = output_token_ids.data_ptr<int64_t>();
+  const int64_t* cu_draft_ptr = cu_num_draft_tokens.data_ptr<int64_t>();
+  const int64_t* draft_ids_ptr = draft_token_ids.data_ptr<int64_t>();
+  const float* draft_probs_ptr =
+      no_draft_probs ? nullptr : draft_probs.value().data_ptr<float>();
+  const float* target_probs_ptr = target_probs.data_ptr<float>();
+  const int64_t* bonus_ids_ptr = bonus_token_ids.data_ptr<int64_t>();
+  const int64_t* recovered_ids_ptr = recovered_token_ids.data_ptr<int64_t>();
+  const float* uniform_probs_ptr = uniform_probs.data_ptr<float>();
+  const bool* greedy_ptr =
+      is_greedy.has_value() ? is_greedy.value().data_ptr<bool>() : nullptr;
+
+  const int64_t out_stride = output_token_ids.stride(0);
+  const int64_t bonus_stride = bonus_token_ids.stride(0);
+  const int64_t target_stride = target_probs.stride(0);
+  const int64_t draft_probs_stride =
+      no_draft_probs ? 0 : draft_probs.value().stride(0);
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < batch_size; ++req_idx) {
+    if (greedy_ptr && greedy_ptr[req_idx]) continue;
+
+    int64_t start_idx = req_idx == 0 ? 0 : cu_draft_ptr[req_idx - 1];
+    int64_t end_idx = cu_draft_ptr[req_idx];
+    int64_t num_draft_tokens = end_idx - start_idx;
+
+    bool rejected = false;
+    for (int64_t pos = 0; pos < num_draft_tokens; ++pos) {
+      int64_t token_idx = start_idx + pos;
+      int64_t draft_id = draft_ids_ptr[token_idx];
+
+      float p = target_probs_ptr[token_idx * target_stride + draft_id];
+      float q =
+          no_draft_probs
+              ? 1.0f
+              : draft_probs_ptr[token_idx * draft_probs_stride + draft_id];
+      float uniform_p = uniform_probs_ptr[token_idx];
+
+      float ratio = (q > 0.0f) ? (p / q) : 0.0f;
+
+      if (ratio >= uniform_p) {
+        out_ptr[req_idx * out_stride + pos] = draft_id;
+      } else {
+        out_ptr[req_idx * out_stride + pos] = recovered_ids_ptr[token_idx];
+        rejected = true;
+        break;
+      }
+    }
+
+    if (!rejected) {
+      out_ptr[req_idx * out_stride + num_draft_tokens] =
+          bonus_ids_ptr[req_idx * bonus_stride];
+    }
+  }
+}
+
+void expand_kernel_impl(torch::Tensor& output, const torch::Tensor& input,
+                        const torch::Tensor& cu_num_tokens,
+                        const int64_t replace_from, const int64_t replace_to) {
+  const int64_t batch_size = cu_num_tokens.size(0);
+  const int64_t* cu_tokens_ptr = cu_num_tokens.data_ptr<int64_t>();
+
+  int64_t* out_ptr = output.data_ptr<int64_t>();
+  const int64_t* in_ptr = input.data_ptr<int64_t>();
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < batch_size; ++req_idx) {
+    int64_t start_idx = req_idx == 0 ? 0 : cu_tokens_ptr[req_idx - 1];
+    int64_t end_idx = cu_tokens_ptr[req_idx];
+    int64_t val = in_ptr[req_idx];
+
+    if (val == replace_from) {
+      val = replace_to;
+    }
+
+    for (int64_t i = start_idx; i < end_idx; ++i) {
+      out_ptr[i] = val;
+    }
+  }
+}
+
+void sample_recovered_tokens_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids,
+    const std::optional<torch::Tensor>& draft_probs,
+    const torch::Tensor& target_probs, const torch::Tensor& inv_q,
+    const int64_t vocab_size, const bool no_draft_probs) {
+  const int64_t batch_size = cu_num_draft_tokens.size(0);
+
+  int64_t* out_ptr = output_token_ids.data_ptr<int64_t>();
+  const int64_t* cu_draft_ptr = cu_num_draft_tokens.data_ptr<int64_t>();
+  const int64_t* draft_ids_ptr = draft_token_ids.data_ptr<int64_t>();
+  const float* draft_probs_ptr =
+      no_draft_probs ? nullptr : draft_probs.value().data_ptr<float>();
+  const float* target_probs_ptr = target_probs.data_ptr<float>();
+  const float* inv_q_ptr = inv_q.data_ptr<float>();
+
+  const int64_t target_stride = target_probs.stride(0);
+  const int64_t draft_probs_stride =
+      no_draft_probs ? 0 : draft_probs.value().stride(0);
+  const int64_t inv_q_stride = inv_q.stride(0);
+
+#pragma omp parallel for
+  for (int64_t req_idx = 0; req_idx < batch_size; ++req_idx) {
+    int64_t start_idx = req_idx == 0 ? 0 : cu_draft_ptr[req_idx - 1];
+    int64_t end_idx = cu_draft_ptr[req_idx];
+    int64_t num_draft_tokens = end_idx - start_idx;
+
+    const float* req_inv_q = inv_q_ptr + req_idx * inv_q_stride;
+
+    for (int64_t pos = 0; pos < num_draft_tokens; ++pos) {
+      int64_t token_idx = start_idx + pos;
+      int64_t draft_id = draft_ids_ptr[token_idx];
+
+      const float* token_target_probs =
+          target_probs_ptr + token_idx * target_stride;
+      const float* token_draft_probs =
+          no_draft_probs ? nullptr
+                         : (draft_probs_ptr + token_idx * draft_probs_stride);
+
+      int64_t best_id = 0;
+      float best_val = -1.0f;
+
+      for (int64_t v = 0; v < vocab_size; ++v) {
+        float prob = token_target_probs[v];
+        if (no_draft_probs) {
+          if (v == draft_id) prob = 0.0f;
+        } else {
+          float diff = prob - token_draft_probs[v];
+          prob = diff > 0.0f ? diff : 0.0f;
+        }
+
+        float val = prob * req_inv_q[v];
+        if (val > best_val) {
+          best_val = val;
+          best_id = v;
+        }
+      }
+      out_ptr[token_idx] = best_id;
+    }
+  }
+}
+
+}  // namespace cpu_utils
diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp
index 15b254662f0a..35350cf247e4 100644
--- a/csrc/cpu/torch_bindings.cpp
+++ b/csrc/cpu/torch_bindings.cpp
@@ -8,8 +8,6 @@
 // libraries use different ISAs.
 #define TORCH_EXTENSION_NAME _C
 
-std::string init_cpu_threads_env(const std::string& cpu_ids);
-
 void release_dnnl_matmul_handler(int64_t handler);
 
 int64_t create_onednn_scaled_mm_handler(const torch::Tensor& b,
@@ -58,27 +56,96 @@ void shm_send_tensor_list(int64_t handle,
 
 std::vector<torch::Tensor> shm_recv_tensor_list(int64_t handle, int64_t src);
 
+// SGL CPU kernels
+
 at::Tensor weight_packed_linear(at::Tensor& mat1, at::Tensor& mat2,
                                 const std::optional<at::Tensor>& bias,
                                 bool is_vnni);
 
 at::Tensor convert_weight_packed(at::Tensor& weight);
 
+at::Tensor convert_scale_packed(at::Tensor& scale);
+
 at::Tensor fused_experts_cpu(
     at::Tensor& hidden_states, at::Tensor& w1, at::Tensor& w2,
     at::Tensor& topk_weights, at::Tensor& topk_ids, bool inplace,
-    bool use_int8_w8a8, bool use_fp8_w8a16,
-    const std::optional<at::Tensor>& w1_scale,
+    int64_t moe_comp_method, const std::optional<at::Tensor>& w1_scale,
     const std::optional<at::Tensor>& w2_scale,
+    const std::optional<at::Tensor>& w1_zero,
+    const std::optional<at::Tensor>& w2_zero,
     const std::optional<std::vector<int64_t>> block_size,
-    const std::optional<at::Tensor>& a1_scale,
-    const std::optional<at::Tensor>& a2_scale, bool is_vnni);
+    const std::optional<at::Tensor>& w1_bias,
+    const std::optional<at::Tensor>& w2_bias,
+    const std::optional<double>& alpha, const std::optional<double>& limit,
+    bool is_vnni);
 
 at::Tensor int8_scaled_mm_with_quant(at::Tensor& mat1, at::Tensor& mat2,
                                      at::Tensor& scales2,
                                      const std::optional<at::Tensor>& bias,
                                      at::ScalarType out_dtype, bool is_vnni);
 
+// Adapted from sglang: FP8 W8A16 kernel
+at::Tensor fp8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2,
+                             at::Tensor& scales2,
+                             std::vector<int64_t> block_size,
+                             const std::optional<at::Tensor>& bias,
+                             at::ScalarType out_dtype, bool is_vnni);
+
+// Adapted from sglang: INT4 W4A8 kernels
+std::tuple<at::Tensor, at::Tensor, at::Tensor> convert_weight_packed_scale_zp(
+    at::Tensor qweight,  // awq: (*, K, N / 8)  ||  gptq: (*, K / 8, N) , int32
+    at::Tensor qzeros,   // awq: (*, K / group_size, N / 8) ||  gptq: (*, K /
+                         // group_size, N / 8) , int32
+    at::Tensor scales,   // awq: (*, K / group_size, N) ||  gptq: (*, K /
+                         // group_size, N) , bfloat16
+    int64_t quant_method_4bit);
+
+at::Tensor int4_scaled_mm_cpu(at::Tensor& x, at::Tensor& w, at::Tensor& w_zeros,
+                              at::Tensor& w_scales,
+                              std::optional<at::Tensor> bias);
+
+// Adapted from sglang: GDN
+std::tuple<at::Tensor, at::Tensor> chunk_gated_delta_rule_cpu(
+    const at::Tensor& query, const at::Tensor& key, const at::Tensor& value,
+    const at::Tensor& g, const at::Tensor& beta,
+    const at::Tensor& initial_state, bool output_final_state,
+    const at::Tensor& cu_seqlens, bool head_first, bool use_qk_l2norm_in_kernel,
+    double eps = 1e-5);
+
+at::Tensor fused_sigmoid_gating_delta_rule_update_cpu(
+    const at::Tensor& A_log, const at::Tensor& dt_bias, const at::Tensor& q,
+    const at::Tensor& k, const at::Tensor& v, const at::Tensor& a,
+    const at::Tensor& b, at::Tensor& initial_state_source,
+    const at::Tensor& initial_state_indices, const at::Tensor& cu_seqlens,
+    bool use_qk_l2norm_in_kernel, double softplus_beta = 1.0,
+    double softplus_threshold = 20.0);
+
+std::tuple<at::Tensor, at::Tensor> fused_gdn_gating_cpu(
+    const at::Tensor& A_log, const at::Tensor& a, const at::Tensor& b,
+    const at::Tensor& dt_bias);
+
+// Adapted from sglang: casual_conv1d kernels
+at::Tensor causal_conv1d_weight_pack(const at::Tensor& weight);
+
+at::Tensor causal_conv1d_fwd_cpu(
+    const at::Tensor& x, const at::Tensor& weight,
+    const std::optional<at::Tensor>& bias,
+    const std::optional<at::Tensor>& conv_states,
+    const std::optional<at::Tensor>& query_start_loc,
+    const std::optional<at::Tensor>& cache_indices,
+    const std::optional<at::Tensor>& has_initial_state, bool silu_activation,
+    int64_t pad_slot_id, bool is_vnni);
+
+at::Tensor causal_conv1d_update_cpu(
+    const at::Tensor& x, const at::Tensor& conv_states,
+    const at::Tensor& weight, const std::optional<at::Tensor>& bias,
+    bool silu_activation, const std::optional<at::Tensor>& cache_seqlens,
+    const std::optional<at::Tensor>& conv_state_indices, int64_t pad_slot_id,
+    bool is_vnni);
+
+void activation_lut_bf16(torch::Tensor& out, torch::Tensor& input,
+                         const std::string& activation);
+
 torch::Tensor get_scheduler_metadata(
     const int64_t num_req, const int64_t num_heads_q,
     const int64_t num_heads_kv, const int64_t head_dim,
@@ -92,7 +159,9 @@ void cpu_attn_reshape_and_cache(const torch::Tensor& key,
                                 torch::Tensor& key_cache,
                                 torch::Tensor& value_cache,
                                 const torch::Tensor& slot_mapping,
-                                const std::string& isa);
+                                const std::string& isa, const double k_scale,
+                                const double v_scale,
+                                const std::string& kv_cache_dtype);
 
 void cpu_attention_with_kv_cache(
     const torch::Tensor& query, const torch::Tensor& key_cache,
@@ -103,7 +172,8 @@ void cpu_attention_with_kv_cache(
     const int64_t sliding_window_left, const int64_t sliding_window_right,
     const torch::Tensor& block_table, const double softcap,
     const torch::Tensor& scheduler_metadata,
-    const std::optional<torch::Tensor>& s_aux);
+    const std::optional<torch::Tensor>& s_aux, const double k_scale,
+    const double v_scale, const std::string& kv_cache_dtype);
 
 // Note: just for avoiding importing errors
 void placeholder_op() { TORCH_CHECK(false, "Unimplemented"); }
@@ -132,6 +202,63 @@ void compute_slot_mapping_kernel_impl(const torch::Tensor query_start_loc,
                                       torch::Tensor slot_mapping,
                                       const int64_t block_size);
 
+void init_cpu_memory_env(std::vector<int64_t> node_ids);
+
+namespace cpu_utils {
+void eagle_prepare_inputs_padded_kernel_impl(
+    const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& valid_sampled_tokens_count,
+    const torch::Tensor& query_start_loc_gpu,
+    torch::Tensor& token_indices_to_sample,
+    torch::Tensor& num_rejected_tokens_gpu, const int64_t num_reqs);
+void eagle_prepare_next_token_padded_kernel_impl(
+    const torch::Tensor& sampled_token_ids,
+    const torch::Tensor& discard_request_mask,
+    const torch::Tensor& backup_next_token_ids, torch::Tensor& next_token_ids,
+    torch::Tensor& valid_sampled_tokens_count, const int64_t vocab_size,
+    const int64_t num_sampled_tokens_per_req, const int64_t num_reqs);
+void eagle_step_slot_mapping_metadata_kernel_impl(
+    const torch::Tensor& positions, const torch::Tensor& block_table,
+    torch::Tensor& seq_lens, torch::Tensor& out_clamped_positions,
+    torch::Tensor& out_slot_mapping, const int64_t block_size,
+    const int64_t max_model_len, const int64_t PAD_ID);
+void copy_and_expand_eagle_inputs_kernel_impl(
+    const torch::Tensor& target_token_ids,
+    const torch::Tensor& target_positions, const torch::Tensor& next_token_ids,
+    torch::Tensor& out_input_ids, torch::Tensor& out_positions,
+    torch::Tensor& out_is_rejected_token_mask,
+    torch::Tensor& out_is_masked_token_mask,
+    torch::Tensor& out_new_token_indices,
+    torch::Tensor& out_hidden_state_mapping,
+    const torch::Tensor& query_start_loc, const torch::Tensor& query_end_loc,
+    const int64_t padding_token_id, const int64_t parallel_drafting_token_id,
+    const int64_t total_input_tokens,
+    const int64_t num_padding_slots_per_request, const bool shift_input_ids);
+void rejection_greedy_sample_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids, const torch::Tensor& target_argmax,
+    const torch::Tensor& bonus_token_ids,
+    const std::optional<torch::Tensor>& is_greedy, const int64_t max_spec_len);
+void rejection_random_sample_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids,
+    const std::optional<torch::Tensor>& draft_probs,
+    const torch::Tensor& target_probs, const torch::Tensor& bonus_token_ids,
+    const torch::Tensor& recovered_token_ids,
+    const torch::Tensor& uniform_probs,
+    const std::optional<torch::Tensor>& is_greedy, const int64_t max_spec_len,
+    const int64_t vocab_size, const bool no_draft_probs);
+void expand_kernel_impl(torch::Tensor& output, const torch::Tensor& input,
+                        const torch::Tensor& cu_num_tokens,
+                        const int64_t replace_from, const int64_t replace_to);
+void sample_recovered_tokens_kernel_impl(
+    torch::Tensor& output_token_ids, const torch::Tensor& cu_num_draft_tokens,
+    const torch::Tensor& draft_token_ids,
+    const std::optional<torch::Tensor>& draft_probs,
+    const torch::Tensor& target_probs, const torch::Tensor& inv_q,
+    const int64_t vocab_size, const bool no_draft_probs);
+}  // namespace cpu_utils
+
 TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   // vLLM custom ops
 
@@ -170,6 +297,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
   ops.impl("gelu_quick", torch::kCPU, &gelu_quick);
 
+#if (defined(__aarch64__) && !defined(__APPLE__))
+
+  ops.def(
+      "activation_lut_bf16(Tensor! out, Tensor input, str activation)"
+      " -> ()");
+  ops.impl("activation_lut_bf16", torch::kCPU, &activation_lut_bf16);
+
+#endif  // (defined(__aarch64__) && !defined(__APPLE__))
+
   // Layernorm
   // Apply Root Mean Square (RMS) Normalization to the input tensor.
   ops.def(
@@ -188,12 +324,13 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   ops.def(
       "rotary_embedding(Tensor positions, Tensor! query,"
       "                 Tensor!? key, int head_size,"
-      "                 Tensor cos_sin_cache, bool is_neox) -> ()");
+      "                 Tensor cos_sin_cache, bool is_neox, int "
+      "rope_dim_offset=0, bool inverse=False) -> ()");
   ops.impl("rotary_embedding", torch::kCPU, &rotary_embedding);
 
   // Quantization
-#if defined(__AVX512F__) || (defined(__aarch64__) && !defined(__APPLE__)) || \
-    defined(__powerpc64__)
+#if defined(__AVX512F__) || defined(__AVX2__) || \
+    (defined(__aarch64__) && !defined(__APPLE__)) || defined(__powerpc64__)
   // Helper function to release oneDNN handlers
   ops.def("release_dnnl_matmul_handler(int handler) -> ()",
           &release_dnnl_matmul_handler);
@@ -273,11 +410,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   ops.impl("weight_packed_linear", torch::kCPU, &weight_packed_linear);
   ops.def("convert_weight_packed(Tensor! weight) -> Tensor");
   ops.impl("convert_weight_packed", torch::kCPU, &convert_weight_packed);
+  ops.def("convert_scale_packed(Tensor! scale) -> Tensor");
+  ops.impl("convert_scale_packed", torch::kCPU, &convert_scale_packed);
   ops.def(
-      "fused_experts_cpu(Tensor! hidden_states, Tensor w1, Tensor w2, Tensor "
-      "topk_weights, Tensor topk_ids, bool inplace, bool use_int8_w8a8, bool "
-      "use_fp8_w8a16, Tensor? w1_scale, Tensor? w2_scale, SymInt[]? "
-      "block_size, Tensor? a1_scale, Tensor? a2_scale, bool is_vnni) -> "
+      "fused_experts_cpu(Tensor hidden_states, Tensor w1, Tensor w2, Tensor "
+      "topk_weights, Tensor topk_ids, bool "
+      "inplace, int moe_comp_method, Tensor? w1_scale, Tensor? w2_scale, "
+      "Tensor? w1_zero, Tensor? w2_zero, int[]? block_size, "
+      "Tensor? w1_bias, Tensor? w2_bias, float? alpha, float? limit, "
+      "bool is_vnni) -> "
       "Tensor");
   ops.impl("fused_experts_cpu", torch::kCPU, &fused_experts_cpu);
   ops.def(
@@ -285,6 +426,67 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "Tensor? bias, ScalarType out_dtype, bool is_vnni) -> Tensor");
   ops.impl("int8_scaled_mm_with_quant", torch::kCPU,
            &int8_scaled_mm_with_quant);
+
+  // Adapted from sglang: INT4 W4A8 kernels
+  ops.def(
+      "convert_weight_packed_scale_zp(Tensor weight, Tensor qzeros, Tensor "
+      "scales, int quant_method_4bit) -> (Tensor, "
+      "Tensor, Tensor)");
+  ops.impl("convert_weight_packed_scale_zp", torch::kCPU,
+           &convert_weight_packed_scale_zp);
+
+  ops.def(
+      "int4_scaled_mm_cpu(Tensor(a0!) x, Tensor(a1!) w, Tensor(a2!) w_zeros, "
+      "Tensor(a3!) w_scales, Tensor? bias) -> Tensor");
+  ops.impl("int4_scaled_mm_cpu", torch::kCPU, &int4_scaled_mm_cpu);
+
+  // Adapted from sglang: FP8 W8A16 kernel
+  ops.def(
+      "fp8_scaled_mm_cpu(Tensor(a0!) mat1, Tensor(a1!) mat2, Tensor(a2!) "
+      "scales2, SymInt[] block_size, Tensor? bias, ScalarType out_dtype, "
+      "bool is_vnni) -> Tensor");
+  ops.impl("fp8_scaled_mm_cpu", torch::kCPU, &fp8_scaled_mm_cpu);
+
+  // Adapted from sglang: GDN kernels
+  ops.def(
+      "chunk_gated_delta_rule_cpu(Tensor query, Tensor key, Tensor value, "
+      "Tensor g, Tensor beta, "
+      "Tensor initial_state, bool output_final_state, Tensor cu_seqlens, bool "
+      "head_first, "
+      "bool use_qk_l2norm_in_kernel, float eps=1e-5) -> (Tensor, Tensor)");
+  ops.impl("chunk_gated_delta_rule_cpu", torch::kCPU,
+           &chunk_gated_delta_rule_cpu);
+  ops.def(
+      "fused_sigmoid_gating_delta_rule_update_cpu(Tensor A_log, Tensor "
+      "dt_bias, Tensor q, Tensor k, Tensor v, Tensor "
+      "a, Tensor b, Tensor(a!) initial_state_source, Tensor "
+      "initial_state_indices, Tensor cu_seqlens, bool "
+      "use_qk_l2norm_in_kernel, float softplus_beta=1.0, float "
+      "softplus_threshold=20.0) -> Tensor");
+  ops.impl("fused_sigmoid_gating_delta_rule_update_cpu", torch::kCPU,
+           &fused_sigmoid_gating_delta_rule_update_cpu);
+  ops.def(
+      "fused_gdn_gating_cpu(Tensor A_log, Tensor a, Tensor b, Tensor dt_bias) "
+      "-> (Tensor, Tensor)");
+  ops.impl("fused_gdn_gating_cpu", torch::kCPU, &fused_gdn_gating_cpu);
+
+  // Adapted from sglang: casual_conv1d kernels
+  ops.def("causal_conv1d_weight_pack(Tensor weight) -> Tensor");
+  ops.impl("causal_conv1d_weight_pack", torch::kCPU,
+           &causal_conv1d_weight_pack);
+  ops.def(
+      "causal_conv1d_fwd_cpu(Tensor x, Tensor weight, Tensor? bias, Tensor? "
+      "conv_states, Tensor? query_start_loc,"
+      "Tensor? cache_indices, Tensor? has_initial_state, bool silu_activation, "
+      "int pad_slot_id, bool is_vnni) -> "
+      "Tensor");
+  ops.impl("causal_conv1d_fwd_cpu", torch::kCPU, &causal_conv1d_fwd_cpu);
+  ops.def(
+      "causal_conv1d_update_cpu(Tensor x, Tensor(a!) conv_states, Tensor "
+      "weight, Tensor? bias, bool silu_activation,"
+      "Tensor? cache_seqlens, Tensor? conv_state_indices, int pad_slot_id, "
+      "bool is_vnni) -> Tensor");
+  ops.impl("causal_conv1d_update_cpu", torch::kCPU, &causal_conv1d_update_cpu);
 #endif
 
   // CPU attention kernels
@@ -296,15 +498,18 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       &get_scheduler_metadata);
   ops.def(
       "cpu_attn_reshape_and_cache(Tensor key, Tensor value, Tensor(a2!) "
-      "key_cache, Tensor(a3!) value_cache, Tensor slot_mapping, str "
-      "isa) -> ()",
+      "key_cache, Tensor(a3!) value_cache, Tensor slot_mapping, str isa, "
+      "float k_scale=1.0, float v_scale=1.0, str kv_cache_dtype=\"auto\") -> "
+      "()",
       &cpu_attn_reshape_and_cache);
   ops.def(
       "cpu_attention_with_kv_cache(Tensor query, Tensor key_cache, Tensor "
       "value_cache, Tensor(a3!) output, Tensor query_start_loc, Tensor "
       "seq_lens, float scale, bool causal, Tensor? alibi_slopes, SymInt "
       "sliding_window_left, SymInt sliding_window_right, Tensor block_table, "
-      "float softcap, Tensor scheduler_metadata, Tensor? s_aux) -> ()",
+      "float softcap, Tensor scheduler_metadata, Tensor? s_aux, "
+      "float k_scale=1.0, float v_scale=1.0, str kv_cache_dtype=\"auto\") -> "
+      "()",
       &cpu_attention_with_kv_cache);
 
   // placeholders
@@ -334,7 +539,6 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "str act, str isa) -> ()");
   ops.impl("cpu_fused_moe", torch::kCPU, &cpu_fused_moe);
 #endif
-  ops.def("init_cpu_threads_env(str cpu_ids) -> str", &init_cpu_threads_env);
   ops.def(
       "mla_decode_kvcache("
       "   Tensor! out, Tensor query, Tensor kv_cache,"
@@ -346,6 +550,72 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "positions, Tensor block_table, Tensor(a3!) slot_mapping, SymInt "
       "block_size) -> ()",
       &compute_slot_mapping_kernel_impl);
+
+  ops.def("init_cpu_memory_env(SymInt[] node_ids) -> ()", &init_cpu_memory_env);
+
+  // Speculative decoding kernels
+  ops.def(
+      "eagle_prepare_inputs_padded_kernel_impl(Tensor cu_num_draft_tokens, "
+      "Tensor valid_sampled_tokens_count, Tensor query_start_loc_gpu, "
+      "Tensor(a3!) token_indices_to_sample, "
+      "Tensor(a4!) num_rejected_tokens_gpu, "
+      "SymInt num_reqs) -> ()",
+      &cpu_utils::eagle_prepare_inputs_padded_kernel_impl);
+  ops.def(
+      "eagle_prepare_next_token_padded_kernel_impl("
+      "Tensor sampled_token_ids, Tensor discard_request_mask, "
+      "Tensor backup_next_token_ids, Tensor(a3!) next_token_ids, "
+      "Tensor(a4!) valid_sampled_tokens_count, SymInt vocab_size, "
+      "SymInt num_sampled_tokens_per_req, SymInt num_reqs) -> ()",
+      &cpu_utils::eagle_prepare_next_token_padded_kernel_impl);
+  ops.def(
+      "eagle_step_slot_mapping_metadata_kernel_impl("
+      "Tensor positions, Tensor block_table, Tensor(a2!) seq_lens, "
+      "Tensor(a3!) out_clamped_positions, Tensor(a4!) out_slot_mapping, "
+      "SymInt block_size, SymInt max_model_len, SymInt PAD_ID) -> ()",
+      &cpu_utils::eagle_step_slot_mapping_metadata_kernel_impl);
+  ops.def(
+      "copy_and_expand_eagle_inputs_kernel_impl("
+      "Tensor target_token_ids, Tensor target_positions, "
+      "Tensor next_token_ids, Tensor(a3!) out_input_ids, "
+      "Tensor(a4!) out_positions, "
+      "Tensor(a5!) out_is_rejected_token_mask, "
+      "Tensor(a6!) out_is_masked_token_mask, "
+      "Tensor(a7!) out_new_token_indices, "
+      "Tensor(a8!) out_hidden_state_mapping, "
+      "Tensor query_start_loc, Tensor query_end_loc, "
+      "SymInt padding_token_id, SymInt parallel_drafting_token_id, "
+      "SymInt total_input_tokens, SymInt num_padding_slots_per_request, "
+      "bool shift_input_ids) -> ()",
+      &cpu_utils::copy_and_expand_eagle_inputs_kernel_impl);
+  ops.def(
+      "rejection_greedy_sample_kernel_impl("
+      "Tensor(a0!) output_token_ids, Tensor cu_num_draft_tokens, "
+      "Tensor draft_token_ids, Tensor target_argmax, "
+      "Tensor bonus_token_ids, Tensor? is_greedy, "
+      "SymInt max_spec_len) -> ()",
+      &cpu_utils::rejection_greedy_sample_kernel_impl);
+  ops.def(
+      "rejection_random_sample_kernel_impl("
+      "Tensor(a0!) output_token_ids, Tensor cu_num_draft_tokens, "
+      "Tensor draft_token_ids, Tensor? draft_probs, "
+      "Tensor target_probs, Tensor bonus_token_ids, "
+      "Tensor recovered_token_ids, Tensor uniform_probs, "
+      "Tensor? is_greedy, SymInt max_spec_len, SymInt vocab_size, "
+      "bool no_draft_probs) -> ()",
+      &cpu_utils::rejection_random_sample_kernel_impl);
+  ops.def(
+      "expand_kernel_impl(Tensor(a0!) output, Tensor input, "
+      "Tensor cu_num_tokens, SymInt replace_from, "
+      "SymInt replace_to) -> ()",
+      &cpu_utils::expand_kernel_impl);
+  ops.def(
+      "sample_recovered_tokens_kernel_impl("
+      "Tensor(a0!) output_token_ids, Tensor cu_num_draft_tokens, "
+      "Tensor draft_token_ids, Tensor? draft_probs, "
+      "Tensor target_probs, Tensor inv_q, SymInt vocab_size, "
+      "bool no_draft_probs) -> ()",
+      &cpu_utils::sample_recovered_tokens_kernel_impl);
 }
 
 REGISTER_EXTENSION(TORCH_EXTENSION_NAME)
diff --git a/csrc/cpu/utils.cpp b/csrc/cpu/utils.cpp
index 3c133a0c59cf..fd06e01e4d60 100644
--- a/csrc/cpu/utils.cpp
+++ b/csrc/cpu/utils.cpp
@@ -13,46 +13,11 @@
 #include "cpu/utils.hpp"
 
 #ifdef VLLM_NUMA_DISABLED
-std::string init_cpu_threads_env(const std::string& cpu_ids) {
-  return std::string(
-      "Warning: NUMA is not enabled in this build. `init_cpu_threads_env` has "
-      "no effect to setup thread affinity.");
-}
-
-#endif
-
-#ifndef VLLM_NUMA_DISABLED
-std::string init_cpu_threads_env(const std::string& cpu_ids) {
-  bitmask* omp_cpu_mask = numa_parse_cpustring_all(cpu_ids.c_str());
-  TORCH_CHECK(omp_cpu_mask != nullptr,
-              "Failed to parse CPU string: " + cpu_ids);
-  TORCH_CHECK(omp_cpu_mask->size > 0);
-  std::vector<int> omp_cpu_ids;
-  omp_cpu_ids.reserve(omp_cpu_mask->size);
-
-  constexpr int group_size = 8 * sizeof(*omp_cpu_mask->maskp);
-
-  for (int offset = 0; offset < omp_cpu_mask->size; offset += group_size) {
-    unsigned long group_mask = omp_cpu_mask->maskp[offset / group_size];
-    int i = 0;
-    while (group_mask) {
-      if (group_mask & 1) {
-        omp_cpu_ids.emplace_back(offset + i);
-      }
-      ++i;
-      group_mask >>= 1;
-    }
-  }
-
+void init_cpu_memory_env(std::vector<int64_t> node_ids) {}
+#else
+void init_cpu_memory_env(std::vector<int64_t> node_ids) {
   // Memory node binding
   if (numa_available() != -1) {
-    std::set<int> node_ids;
-    for (const auto& cpu_id : omp_cpu_ids) {
-      int node_id = numa_node_of_cpu(cpu_id);
-      if (node_id != -1) {
-        node_ids.insert(node_id);
-      }
-    }
     // Concatenate all node_ids into a single comma-separated string
     if (!node_ids.empty()) {
       std::string node_ids_str;
@@ -120,48 +85,6 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
       }
     }
   }
-
-  // OMP threads binding
-  omp_set_num_threads((int)omp_cpu_ids.size());
-  torch::set_num_threads((int)omp_cpu_ids.size());
-  TORCH_CHECK_EQ(omp_cpu_ids.size(), torch::get_num_threads());
-  TORCH_CHECK_EQ(omp_cpu_ids.size(), omp_get_max_threads());
-
-  std::vector<std::pair<int, int>> thread_core_mapping;
-  thread_core_mapping.reserve(omp_cpu_ids.size());
-  omp_lock_t writelock;
-  omp_init_lock(&writelock);
-
-  #pragma omp parallel for schedule(static, 1)
-  for (size_t i = 0; i < omp_cpu_ids.size(); ++i) {
-    cpu_set_t mask;
-    CPU_ZERO(&mask);
-    CPU_SET(omp_cpu_ids[i], &mask);
-    int ret = sched_setaffinity(0, sizeof(cpu_set_t), &mask);
-    if (ret == -1) {
-      TORCH_CHECK(false,
-                  "sched_setaffinity failed. errno: " + std::to_string(errno));
-    }
-
-    omp_set_lock(&writelock);
-    thread_core_mapping.emplace_back(gettid(), omp_cpu_ids[i]);
-    omp_unset_lock(&writelock);
-  }
-
-  omp_destroy_lock(&writelock);
-
-  numa_free_nodemask(omp_cpu_mask);
-
-  std::stringstream ss;
-  ss << "OMP threads binding of Process " << getpid() << ":\n";
-  std::sort(thread_core_mapping.begin(), thread_core_mapping.end(),
-            [](auto&& a, auto&& b) { return a.second < b.second; });
-  for (auto&& item : thread_core_mapping) {
-    ss << "\t"
-       << "OMP tid: " << item.first << ", core " << item.second << "\n";
-  }
-
-  return ss.str();
 }
 #endif  // VLLM_NUMA_DISABLED
 
diff --git a/csrc/cpu/utils.hpp b/csrc/cpu/utils.hpp
index 4a4c50e67957..394e67e3a034 100644
--- a/csrc/cpu/utils.hpp
+++ b/csrc/cpu/utils.hpp
@@ -54,11 +54,34 @@ struct Counter {
 };
 
 inline int64_t get_available_l2_size() {
+#if defined(__s390x__) || defined(__powerpc__)
   static int64_t size = []() {
-    const uint32_t l2_cache_size = at::cpu::L2_cache_size();
+    uint32_t l2_cache_size = 0;
+    auto caps = at::cpu::get_cpu_capabilities();
+    auto it = caps.find("l2_cache_size");
+    if (it != caps.end()) {
+      l2_cache_size = static_cast<uint32_t>(it->second.toInt());
+    }
+    if (l2_cache_size == 0) {
+      long sys_l2 = sysconf(_SC_LEVEL2_CACHE_SIZE);
+      if (sys_l2 > 0) {
+        l2_cache_size = static_cast<uint32_t>(sys_l2);
+      }
+    }
+    if (l2_cache_size == 0) {
+      l2_cache_size = 256 * 1024;
+    }
+    return static_cast<int64_t>(l2_cache_size) >> 1;  // use 50% of L2 cache
+  }();
+  return size;
+#else
+  static int64_t size = []() {
+    auto caps = at::cpu::get_cpu_capabilities();
+    const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
     return l2_cache_size >> 1;  // use 50% of L2 cache
   }();
   return size;
+#endif
 }
 
 template <int32_t alignment_v, typename T>
diff --git a/csrc/cuda_vec_utils.cuh b/csrc/cuda_vec_utils.cuh
index 5e2f51f933c6..efbb09994d25 100644
--- a/csrc/cuda_vec_utils.cuh
+++ b/csrc/cuda_vec_utils.cuh
@@ -3,12 +3,14 @@
 
 #pragma once
 
-#include <c10/util/BFloat16.h>
-#include <c10/util/Half.h>
+#include <torch/headeronly/util/BFloat16.h>
+#include <torch/headeronly/util/Half.h>
 #include <cassert>
 
 #ifdef USE_ROCM
   #include <hip/hip_runtime.h>
+  #include <hip/hip_bf16.h>
+  #include <hip/hip_fp16.h>
 #else
   #include <cuda_bf16.h>
   #include <cuda_fp16.h>
diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp
index 9ef623bf7f1f..0b720d356e78 100644
--- a/csrc/cumem_allocator.cpp
+++ b/csrc/cumem_allocator.cpp
@@ -232,28 +232,6 @@ void unmap_and_release(unsigned long long device, ssize_t size,
     }
   }
 
-  // ROCm workaround: hipMemRelease does not return physical VRAM to the
-  // free pool while the virtual-address reservation is still held.
-  // Cycling cuMemAddressFree → cuMemAddressReserve (at the same address)
-  // forces the driver to actually release the physical pages while keeping
-  // the same VA available for a later create_and_map.
-  if (first_error == no_error) {
-    first_error = cuMemAddressFree(d_mem, size);
-    if (first_error == no_error) {
-      CUdeviceptr d_mem_new = 0;
-      first_error = cuMemAddressReserve(&d_mem_new, size, 0, d_mem, 0);
-      if (first_error == no_error && d_mem_new != d_mem) {
-        cuMemAddressFree(d_mem_new, size);
-        snprintf(error_msg, sizeof(error_msg),
-                 "ROCm: VA re-reserve got %p instead of %p", (void*)d_mem_new,
-                 (void*)d_mem);
-        error_code = CUresult(1);
-        std::cerr << error_msg << std::endl;
-        return;
-      }
-    }
-  }
-
   if (first_error != no_error) {
     CUDA_CHECK(first_error);
   }
diff --git a/csrc/cutlass_extensions/common.hpp b/csrc/cutlass_extensions/common.hpp
index 91c215071f6e..ca96b0ef3fef 100644
--- a/csrc/cutlass_extensions/common.hpp
+++ b/csrc/cutlass_extensions/common.hpp
@@ -6,14 +6,16 @@
 #include <cstdio>
 #include <cstdlib>
 
+#include <torch/headeronly/util/shim_utils.h>
+
 /**
  * Helper function for checking CUTLASS errors
  */
-#define CUTLASS_CHECK(status)                       \
-  {                                                 \
-    cutlass::Status error = status;                 \
-    TORCH_CHECK(error == cutlass::Status::kSuccess, \
-                cutlassGetStatusString(error));     \
+#define CUTLASS_CHECK(status)                           \
+  {                                                     \
+    cutlass::Status error = status;                     \
+    STD_TORCH_CHECK(error == cutlass::Status::kSuccess, \
+                    cutlassGetStatusString(error));     \
   }
 
 inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) {
@@ -94,44 +96,14 @@ struct enable_sm90_or_later : Kernel {
 };
 
 template <typename Kernel>
-struct enable_sm90_only : Kernel {
+struct enable_sm100_to_sm120 : Kernel {
   template <typename... Args>
   CUTLASS_DEVICE void operator()(Args&&... args) {
 #if defined __CUDA_ARCH__
-  #if __CUDA_ARCH__ == 900
+  #if (__CUDA_ARCH__ >= 1000 && __CUDA_ARCH__ < 1200)
     Kernel::operator()(std::forward<Args>(args)...);
   #else
-    printf("This kernel only supports sm90.\n");
-    asm("trap;");
-  #endif
-#endif
-  }
-};
-
-template <typename Kernel>
-struct enable_sm100f_only : Kernel {
-  template <typename... Args>
-  CUTLASS_DEVICE void operator()(Args&&... args) {
-#if defined __CUDA_ARCH__
-  #if __CUDA_ARCH__ == 1000 || __CUDA_ARCH__ == 1030
-    Kernel::operator()(std::forward<Args>(args)...);
-  #else
-    printf("This kernel only supports sm100f.\n");
-    asm("trap;");
-  #endif
-#endif
-  }
-};
-
-template <typename Kernel>
-struct enable_sm100a_only : Kernel {
-  template <typename... Args>
-  CUTLASS_DEVICE void operator()(Args&&... args) {
-#if defined __CUDA_ARCH__
-  #if __CUDA_ARCH__ == 1000
-    Kernel::operator()(std::forward<Args>(args)...);
-  #else
-    printf("This kernel only supports sm100a.\n");
+    printf("This kernel only supports sm[100, 120).\n");
     asm("trap;");
   #endif
 #endif
@@ -146,7 +118,7 @@ struct enable_sm120_only : Kernel {
   #if __CUDA_ARCH__ == 1200
     Kernel::operator()(std::forward<Args>(args)...);
   #else
-    printf("This kernel only supports sm120.\n");
+    printf("This kernel only supports sm120a.\n");
     asm("trap;");
   #endif
 #endif
@@ -158,8 +130,13 @@ template <typename Kernel>
 struct enable_sm120_family : Kernel {
   template <typename... Args>
   CUTLASS_DEVICE void operator()(Args&&... args) {
-#if defined __CUDA_ARCH__ && (__CUDA_ARCH__ >= 1200 && __CUDA_ARCH__ < 1300)
+#if defined __CUDA_ARCH__
+  #if (__CUDA_ARCH__ >= 1200 && __CUDA_ARCH__ < 1300)
     Kernel::operator()(std::forward<Args>(args)...);
+  #else
+    printf("This kernel only supports sm120f.\n");
+    asm("trap;");
+  #endif
 #endif
   }
 };
diff --git a/csrc/cutlass_extensions/cute_utils.cuh b/csrc/cutlass_extensions/cute_utils.cuh
index f61fe3ceb978..116ce854d588 100644
--- a/csrc/cutlass_extensions/cute_utils.cuh
+++ b/csrc/cutlass_extensions/cute_utils.cuh
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <cute/tensor.hpp>
-#include <torch/all.h>
 namespace cute {
 
 ////////////////////////////////////////////////////////////////////
diff --git a/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp b/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp
index 5c1d6e3f46be..0a3c9e9cc7f4 100644
--- a/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp
+++ b/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp
@@ -189,9 +189,9 @@ struct Sm90RowOrScalarBroadcastArray {
       }
 
       auto synchronize = [&] () { cutlass::arch::NamedBarrier::sync(thr_num, cutlass::arch::ReservedNamedBarriers::EpilogueBarrier); };
-      Tensor tGS_gRow_flt = filter_zeros(tGS_gRow);
-      Tensor tGS_sRow_flt = filter_zeros(tGS_sRow);
-      Tensor tGS_cRow_flt = make_tensor(tGS_cRow.data(), make_layout(tGS_gRow_flt.shape(), tGS_cRow.stride()));
+      cute::Tensor tGS_gRow_flt = filter_zeros(tGS_gRow);
+      cute::Tensor tGS_sRow_flt = filter_zeros(tGS_sRow);
+      cute::Tensor tGS_cRow_flt = make_tensor(tGS_cRow.data(), make_layout(tGS_gRow_flt.shape(), tGS_cRow.stride()));
 
       for (int i = 0; i < size(tGS_gRow_flt); ++i) {
         if (get<1>(tGS_cRow_flt(i)) >= size<1>(CtaTileShapeMNK{})) {
@@ -211,8 +211,8 @@ struct Sm90RowOrScalarBroadcastArray {
     begin_loop(int epi_m, int epi_n) {
       if (epi_m == 0) { // Assumes M-major subtile loop
         if (!params.row_broadcast) return; // Do not issue LDS when row is scalar 
-        Tensor tSR_sRow_flt = filter_zeros(tSR_sRow(_,_,_,epi_m,epi_n));
-        Tensor tSR_rRow_flt = filter_zeros(tSR_rRow);
+        cute::Tensor tSR_sRow_flt = filter_zeros(tSR_sRow(_,_,_,epi_m,epi_n));
+        cute::Tensor tSR_rRow_flt = filter_zeros(tSR_rRow);
         copy(tSR_sRow_flt, tSR_rRow_flt);
       }
     }
@@ -241,9 +241,9 @@ struct Sm90RowOrScalarBroadcastArray {
     auto [m, n, k, l] = args.tile_coord_mnkl;
     using ThreadCount = decltype(size(args.tiled_copy));
 
-    Tensor mRow = make_tensor(make_gmem_ptr(params.ptr_row_array[l]), make_shape(M,N,1), params.dRow);
-    Tensor gRow = local_tile(mRow(_,_,l), take<0,2>(args.tile_shape_mnk), make_coord(m, n));          // (CTA_M, CTA_N)
-    Tensor sRow = make_tensor(make_smem_ptr(smem), 
+    cute::Tensor mRow = make_tensor(make_gmem_ptr(params.ptr_row_array[l]), make_shape(M,N,1), params.dRow);
+    cute::Tensor gRow = local_tile(mRow(_,_,l), take<0,2>(args.tile_shape_mnk), make_coord(m, n));          // (CTA_M, CTA_N)
+    cute::Tensor sRow = make_tensor(make_smem_ptr(smem), 
         make_shape(size<0>(CtaTileShapeMNK{}), size<1>(CtaTileShapeMNK{})), make_shape(_0{}, _1{}));  // (CTA_M, CTA_N)
     //// G2S: Gmem to Smem
     auto tiled_g2s = make_tiled_copy(Copy_Atom<DefaultCopy, Element>{},
@@ -251,16 +251,16 @@ struct Sm90RowOrScalarBroadcastArray {
                                             Stride<_0,          _1>>{}, 
                                      Layout<_1>{});   
     auto thr_g2s = tiled_g2s.get_slice(args.thread_idx);
-    Tensor tGS_gRow = thr_g2s.partition_S(gRow);
-    Tensor tGS_sRow = thr_g2s.partition_D(sRow);
+    cute::Tensor tGS_gRow = thr_g2s.partition_S(gRow);
+    cute::Tensor tGS_sRow = thr_g2s.partition_D(sRow);
 
     //// G2S: Coord 
     auto cRow = make_identity_tensor(make_shape(size<0>(CtaTileShapeMNK{}), size<1>(CtaTileShapeMNK{})));
-    Tensor tGS_cRow = thr_g2s.partition_S(cRow);
+    cute::Tensor tGS_cRow = thr_g2s.partition_S(cRow);
 
     //// S2R: Smem to Reg
-    Tensor tSR_sRow = sm90_partition_for_epilogue<ReferenceSrc>(sRow, args.epi_tile, args.tiled_copy, args.thread_idx);
-    Tensor tSR_rRow = make_tensor_like(take<0,3>(tSR_sRow));                                           // (CPY,CPY_M,CPY_N)
+    cute::Tensor tSR_sRow = sm90_partition_for_epilogue<ReferenceSrc>(sRow, args.epi_tile, args.tiled_copy, args.thread_idx);
+    cute::Tensor tSR_rRow = make_tensor_like(take<0,3>(tSR_sRow));                                           // (CPY,CPY_M,CPY_N)
 
     return ConsumerStoreCallbacks<decltype(tGS_gRow), decltype(tGS_sRow), decltype(tGS_cRow), decltype(tiled_g2s), decltype(tSR_sRow), decltype(tSR_rRow), decltype(args.tCcD), decltype(args.residue_cD), ThreadCount>(
       tGS_gRow, 
@@ -389,27 +389,35 @@ struct Sm90ColOrScalarBroadcastArray {
 
     CUTLASS_DEVICE void
     begin() {
-      Tensor pred = make_tensor<bool>(shape(tCgCol));
-      CUTLASS_PRAGMA_UNROLL
-      for (int i = 0; i < size(pred); ++i) {
-        pred(i) = get<0>(tCcCol(i)) < m;
-      }
-
       if (!params.col_broadcast) {
         fill(tCrCol, *(params.ptr_col_array[group]));
         return;
       }
 
-      // Filter so we don't issue redundant copies over stride-0 modes
-      // (only works if 0-strides are in same location, which is by construction)
-      copy_if(pred, filter(tCgCol), filter(tCrCol));
+      // tCgCol has layout (CPY,CPY_M,CPY_N,EPI_M,EPI_N) where CPY_N and
+      // EPI_N are stride-0 for the column broadcast. Slice those modes at
+      // index 0 to avoid redundant copies AND ensure pred/data consistency
+      static_assert(decltype(stride<2>(tCgCol))::value == 0, "Expected stride-0 CPY_N for col broadcast");
+      static_assert(decltype(stride<4>(tCgCol))::value == 0, "Expected stride-0 EPI_N for col broadcast");
+
+      auto tCgCol_s = tCgCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+      auto tCrCol_s = tCrCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+      auto tCcCol_s = tCcCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+
+      cute::Tensor pred = make_tensor<bool>(shape(tCgCol_s));
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < size(pred); ++i) {
+        pred(i) = get<0>(tCcCol_s(i)) < m;
+      }
+
+      copy_if(pred, tCgCol_s, tCrCol_s);
     }
 
     template <typename ElementAccumulator, int FragmentSize>
     CUTLASS_DEVICE Array<Element, FragmentSize>
     visit(Array<ElementAccumulator, FragmentSize> const& frg_acc, int epi_v, int epi_m, int epi_n) {
       Array<Element, FragmentSize> frg_col;
-      Tensor tCrCol_mn = tCrCol(_,_,_,epi_m,epi_n);
+      cute::Tensor tCrCol_mn = tCrCol(_,_,_,epi_m,epi_n);
 
       CUTLASS_PRAGMA_UNROLL
       for (int i = 0; i < FragmentSize; ++i) {
@@ -431,16 +439,16 @@ struct Sm90ColOrScalarBroadcastArray {
     auto [M, N, K, L] = args.problem_shape_mnkl;
     auto [m, n, k, l] = args.tile_coord_mnkl;
 
-    Tensor mCol = make_tensor(make_gmem_ptr(params.ptr_col_array[l]), make_shape(M,N,1), params.dCol);
-    Tensor tCgCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor mCol = make_tensor(make_gmem_ptr(params.ptr_col_array[l]), make_shape(M,N,1), params.dCol);
+    cute::Tensor tCgCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
       mCol, args.tile_shape_mnk, args.tile_coord_mnkl, args.epi_tile, args.tiled_copy, args.thread_idx);
-    Tensor tCrCol = make_tensor_like(tCgCol);                                          // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor tCrCol = make_tensor_like(tCgCol);                                          // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
 
     // Generate an identity tensor matching the shape of the global tensor and 
     //  partition the same way, this will be used to generate the predicate
     //  tensor for loading
-    Tensor cCol = make_identity_tensor(mCol.shape());
-    Tensor tCcCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor cCol = make_identity_tensor(mCol.shape());
+    cute::Tensor tCcCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
       cCol, args.tile_shape_mnk, args.tile_coord_mnkl, args.epi_tile, args.tiled_copy, args.thread_idx);
 
     return ConsumerStoreCallbacks(
diff --git a/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp b/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp
index 58b1e8ff159f..29e6ec41e2a4 100644
--- a/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp
+++ b/csrc/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp
@@ -186,9 +186,9 @@ struct Sm90RowOrScalarBroadcast {
       }
 
       auto synchronize = [&] () { cutlass::arch::NamedBarrier::sync(thr_num, cutlass::arch::ReservedNamedBarriers::EpilogueBarrier); };
-      Tensor tGS_gRow_flt = filter_zeros(tGS_gRow);
-      Tensor tGS_sRow_flt = filter_zeros(tGS_sRow);
-      Tensor tGS_cRow_flt = make_tensor(tGS_cRow.data(), make_layout(tGS_gRow_flt.shape(), tGS_cRow.stride()));
+      cute::Tensor tGS_gRow_flt = filter_zeros(tGS_gRow);
+      cute::Tensor tGS_sRow_flt = filter_zeros(tGS_sRow);
+      cute::Tensor tGS_cRow_flt = make_tensor(tGS_cRow.data(), make_layout(tGS_gRow_flt.shape(), tGS_cRow.stride()));
 
       for (int i = 0; i < size(tGS_gRow_flt); ++i) {
         if (get<1>(tGS_cRow_flt(i)) >= size<1>(CtaTileShapeMNK{})) {
@@ -208,8 +208,8 @@ struct Sm90RowOrScalarBroadcast {
     begin_loop(int epi_m, int epi_n) {
       if (epi_m == 0) { // Assumes M-major subtile loop
         if (!params.row_broadcast) return; // Do not issue LDS when row is scalar 
-        Tensor tSR_sRow_flt = filter_zeros(tSR_sRow(_,_,_,epi_m,epi_n));
-        Tensor tSR_rRow_flt = filter_zeros(tSR_rRow);
+        cute::Tensor tSR_sRow_flt = filter_zeros(tSR_sRow(_,_,_,epi_m,epi_n));
+        cute::Tensor tSR_rRow_flt = filter_zeros(tSR_rRow);
         copy(tSR_sRow_flt, tSR_rRow_flt);
       }
     }
@@ -238,9 +238,9 @@ struct Sm90RowOrScalarBroadcast {
     auto [m, n, k, l] = args.tile_coord_mnkl;
     using ThreadCount = decltype(size(args.tiled_copy));
 
-    Tensor mRow = make_tensor(make_gmem_ptr(params.ptr_row), make_shape(M,N,L), params.dRow);
-    Tensor gRow = local_tile(mRow(_,_,l), take<0,2>(args.tile_shape_mnk), make_coord(m, n));          // (CTA_M, CTA_N)
-    Tensor sRow = make_tensor(make_smem_ptr(smem), 
+    cute::Tensor mRow = make_tensor(make_gmem_ptr(params.ptr_row), make_shape(M,N,L), params.dRow);
+    cute::Tensor gRow = local_tile(mRow(_,_,l), take<0,2>(args.tile_shape_mnk), make_coord(m, n));          // (CTA_M, CTA_N)
+    cute::Tensor sRow = make_tensor(make_smem_ptr(smem), 
         make_shape(size<0>(CtaTileShapeMNK{}), size<1>(CtaTileShapeMNK{})), make_shape(_0{}, _1{}));  // (CTA_M, CTA_N)
     //// G2S: Gmem to Smem
     auto tiled_g2s = make_tiled_copy(Copy_Atom<DefaultCopy, Element>{},
@@ -248,16 +248,16 @@ struct Sm90RowOrScalarBroadcast {
                                             Stride<_0,          _1>>{}, 
                                      Layout<_1>{});   
     auto thr_g2s = tiled_g2s.get_slice(args.thread_idx);
-    Tensor tGS_gRow = thr_g2s.partition_S(gRow);
-    Tensor tGS_sRow = thr_g2s.partition_D(sRow);
+    cute::Tensor tGS_gRow = thr_g2s.partition_S(gRow);
+    cute::Tensor tGS_sRow = thr_g2s.partition_D(sRow);
 
     //// G2S: Coord 
     auto cRow = make_identity_tensor(make_shape(size<0>(CtaTileShapeMNK{}), size<1>(CtaTileShapeMNK{})));
-    Tensor tGS_cRow = thr_g2s.partition_S(cRow);
+    cute::Tensor tGS_cRow = thr_g2s.partition_S(cRow);
 
     //// S2R: Smem to Reg
-    Tensor tSR_sRow = sm90_partition_for_epilogue<ReferenceSrc>(sRow, args.epi_tile, args.tiled_copy, args.thread_idx);
-    Tensor tSR_rRow = make_tensor_like(take<0,3>(tSR_sRow));                                           // (CPY,CPY_M,CPY_N)
+    cute::Tensor tSR_sRow = sm90_partition_for_epilogue<ReferenceSrc>(sRow, args.epi_tile, args.tiled_copy, args.thread_idx);
+    cute::Tensor tSR_rRow = make_tensor_like(take<0,3>(tSR_sRow));                                           // (CPY,CPY_M,CPY_N)
 
     return ConsumerStoreCallbacks<decltype(tGS_gRow), decltype(tGS_sRow), decltype(tGS_cRow), decltype(tiled_g2s), decltype(tSR_sRow), decltype(tSR_rRow), decltype(args.tCcD), decltype(args.residue_cD), ThreadCount>(
       tGS_gRow, 
@@ -382,27 +382,35 @@ struct Sm90ColOrScalarBroadcast {
 
     CUTLASS_DEVICE void
     begin() {
-      Tensor pred = make_tensor<bool>(shape(tCgCol));
-      CUTLASS_PRAGMA_UNROLL
-      for (int i = 0; i < size(pred); ++i) {
-        pred(i) = get<0>(tCcCol(i)) < m;
-      }
-
       if (!params.col_broadcast) {
         fill(tCrCol, *(params.ptr_col));
         return;
       }
 
-      // Filter so we don't issue redundant copies over stride-0 modes
-      // (only works if 0-strides are in same location, which is by construction)
-      copy_if(pred, filter(tCgCol), filter(tCrCol));
+      // tCgCol has layout (CPY,CPY_M,CPY_N,EPI_M,EPI_N) where CPY_N and
+      // EPI_N are stride-0 for the column broadcast. Slice those modes at
+      // index 0 to avoid redundant copies AND ensure pred/data consistency
+      static_assert(decltype(stride<2>(tCgCol))::value == 0, "Expected stride-0 CPY_N for col broadcast");
+      static_assert(decltype(stride<4>(tCgCol))::value == 0, "Expected stride-0 EPI_N for col broadcast");
+
+      auto tCgCol_s = tCgCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+      auto tCrCol_s = tCrCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+      auto tCcCol_s = tCcCol(_,_,0,_,0);      // (CPY,CPY_M,EPI_M)
+
+      cute::Tensor pred = make_tensor<bool>(shape(tCgCol_s));
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < size(pred); ++i) {
+        pred(i) = get<0>(tCcCol_s(i)) < m;
+      }
+
+      copy_if(pred, tCgCol_s, tCrCol_s);
     }
 
     template <typename ElementAccumulator, int FragmentSize>
     CUTLASS_DEVICE Array<Element, FragmentSize>
     visit(Array<ElementAccumulator, FragmentSize> const& frg_acc, int epi_v, int epi_m, int epi_n) {
       Array<Element, FragmentSize> frg_col;
-      Tensor tCrCol_mn = tCrCol(_,_,_,epi_m,epi_n);
+      cute::Tensor tCrCol_mn = tCrCol(_,_,_,epi_m,epi_n);
 
       CUTLASS_PRAGMA_UNROLL
       for (int i = 0; i < FragmentSize; ++i) {
@@ -422,16 +430,16 @@ struct Sm90ColOrScalarBroadcast {
   get_consumer_store_callbacks(ConsumerStoreArgs<Args...> const& args) {
 
     auto [M, N, K, L] = args.problem_shape_mnkl;
-    Tensor mCol = make_tensor(make_gmem_ptr(params.ptr_col), make_shape(M,N,L), params.dCol);
-    Tensor tCgCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor mCol = make_tensor(make_gmem_ptr(params.ptr_col), make_shape(M,N,L), params.dCol);
+    cute::Tensor tCgCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
       mCol, args.tile_shape_mnk, args.tile_coord_mnkl, args.epi_tile, args.tiled_copy, args.thread_idx);
-    Tensor tCrCol = make_tensor_like(tCgCol);                                          // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor tCrCol = make_tensor_like(tCgCol);                                          // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
 
     // Generate an identity tensor matching the shape of the global tensor and 
     //  partition the same way, this will be used to generate the predicate
     //  tensor for loading
-    Tensor cCol = make_identity_tensor(mCol.shape());
-    Tensor tCcCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
+    cute::Tensor cCol = make_identity_tensor(mCol.shape());
+    cute::Tensor tCcCol = sm90_partition_for_epilogue<ReferenceSrc>(                         // (CPY,CPY_M,CPY_N,EPI_M,EPI_N)
       cCol, args.tile_shape_mnk, args.tile_coord_mnkl, args.epi_tile, args.tiled_copy, args.thread_idx);
 
     return ConsumerStoreCallbacks(
diff --git a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp b/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
index cf79507e1997..c2ddcea6d027 100644
--- a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
+++ b/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
@@ -3,6 +3,14 @@
 #include "cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp"
 #include "cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp"
 
+// This header is shared by both _C (unstable ABI) and _C_stable_libtorch
+// (stable ABI) targets. When compiled under the stable ABI target,
+// TORCH_TARGET_VERSION is defined and Tensor is unavailable, so we
+// use torch::stable::Tensor instead.
+#ifdef TORCH_TARGET_VERSION
+  #include <torch/csrc/stable/tensor.h>
+#endif
+
 /*
    This file defines custom epilogues for fusing channel scales, token scales,
    bias, and activation zero-points onto a GEMM operation using the
@@ -15,6 +23,12 @@
 
 namespace vllm::c3x {
 
+#ifdef TORCH_TARGET_VERSION
+using TensorType = torch::stable::Tensor;
+#else
+using TensorType = torch::Tensor;
+#endif
+
 using namespace cute;
 
 template <typename T>
@@ -84,7 +98,7 @@ struct ScaledEpilogueBase {
   // from a tensor. It can handle both row and column, as well as row/column or
   // scalar cases.
   template <typename Descriptor, typename T>
-  static auto args_from_tensor(torch::Tensor const& tensor) {
+  static auto args_from_tensor(TensorType const& tensor) {
     using Arguments = typename Descriptor::Arguments;
     auto* data_ptr = static_cast<T*>(tensor.data_ptr());
     if constexpr (std::is_same_v<Descriptor, ColOrScalarLoad<T>> ||
@@ -100,7 +114,7 @@ struct ScaledEpilogueBase {
   // This overload handles the case where there might not be a tensor, in which
   // case a nullptr is passed and a constant (0) is used.
   template <typename Descriptor, typename T>
-  static auto args_from_tensor(std::optional<torch::Tensor> const& tensor) {
+  static auto args_from_tensor(std::optional<TensorType> const& tensor) {
     using Arguments = typename Descriptor::Arguments;
     auto* data_ptr = tensor ? static_cast<T*>(tensor->data_ptr()) : nullptr;
     static_assert(std::is_same_v<Descriptor, ColLoad<T, true>> ||
@@ -158,8 +172,8 @@ struct ScaledEpilogue
       cutlass::epilogue::fusion::Sm90EVT<Compute1, ScaleA, EVTCompute0>;
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales) {
+  static ArgumentType prepare_args(TensorType const& a_scales,
+                                   TensorType const& b_scales) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
 
@@ -203,9 +217,9 @@ struct ScaledEpilogueBias
       cutlass::epilogue::fusion::Sm90EVT<Compute1, ScaleA, EVTCompute0, Bias>;
 
   using ArgumentType = typename EVTCompute::Arguments;
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& bias) {
+  static ArgumentType prepare_args(TensorType const& a_scales,
+                                   TensorType const& b_scales,
+                                   TensorType const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
@@ -246,9 +260,9 @@ struct ScaledEpilogueColumnBias
       cutlass::epilogue::fusion::Sm90EVT<Compute1, ScaleA, EVTCompute0, Bias>;
 
   using ArgumentType = typename EVTCompute::Arguments;
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& bias) {
+  static ArgumentType prepare_args(TensorType const& a_scales,
+                                   TensorType const& b_scales,
+                                   TensorType const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
@@ -304,10 +318,10 @@ struct ScaledEpilogueBiasAzp
                                          EVTComputeScaleB, Bias>;
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& azp_adj,
-                                   std::optional<torch::Tensor> const& bias) {
+  static ArgumentType prepare_args(TensorType const& a_scales,
+                                   TensorType const& b_scales,
+                                   TensorType const& azp_adj,
+                                   std::optional<TensorType> const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
@@ -380,11 +394,11 @@ struct ScaledEpilogueBiasAzpToken
                                          EVTComputeScaleB, Bias>;
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& azp_adj,
-                                   torch::Tensor const& azp,
-                                   std::optional<torch::Tensor> const& bias) {
+  static ArgumentType prepare_args(TensorType const& a_scales,
+                                   TensorType const& b_scales,
+                                   TensorType const& azp_adj,
+                                   TensorType const& azp,
+                                   std::optional<TensorType> const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
diff --git a/csrc/cutlass_extensions/torch_utils.hpp b/csrc/cutlass_extensions/torch_utils.hpp
index a1ff933cce63..5f973033fd72 100644
--- a/csrc/cutlass_extensions/torch_utils.hpp
+++ b/csrc/cutlass_extensions/torch_utils.hpp
@@ -1,6 +1,19 @@
 #pragma once
 
-#include <torch/all.h>
+#include "torch_utils.h"
+
+// This header is shared between _C (unstable ABI, used by machete) and
+// _C_stable_libtorch (stable ABI, used by W4A8/sparse). TORCH_TARGET_VERSION
+// is defined only for the stable target, so we switch includes and types
+// accordingly. TorchTensor (not Tensor) avoids ambiguity with cute::Tensor.
+#ifdef TORCH_TARGET_VERSION
+  #include <torch/csrc/stable/tensor.h>
+  #include <torch/headeronly/util/BFloat16.h>
+  #include <torch/headeronly/util/Half.h>
+using TorchTensor = torch::stable::Tensor;
+#else
+using TorchTensor = torch::Tensor;
+#endif
 
 #include "cute/layout.hpp"
 #include "cutlass/layout/matrix.h"
@@ -55,35 +68,35 @@ CUTE_HOST_DEVICE constexpr auto make_shape_from_idx(F&& f) {
 // If `tensor.dim() < rank(Stride{})`, the shape is padded with 1s and the extra
 // strides are set to be 0 or 1.
 template <typename Stride>
-static inline auto make_cute_layout(torch::Tensor const& tensor,
+static inline auto make_cute_layout(TorchTensor const& tensor,
                                     std::string_view name = "tensor") {
-  TORCH_CHECK(tensor.dim() <= rank(Stride{}));
-  auto stride = cute::transform_with_idx(
-      Stride{}, [&](auto const& stride_ele, auto const& idx) {
-        using StrideEle = std::decay_t<decltype(stride_ele)>;
-
-        if (idx < tensor.dim()) {
-          if constexpr (cute::is_static_v<StrideEle>) {
-            TORCH_CHECK(StrideEle::value == tensor.stride(idx), "Expected ",
-                        name, ".stride(", idx, ") to be ", StrideEle::value);
-            return StrideEle{};
-          } else {
-            if (tensor.size(idx) == 1) {
-              // use 0 stride for dim with size 1, this is easier for
-              // cute/cutlass to optimize (helps the TMA code flatten dims)
-              return StrideEle{0};
-            } else {
-              return tensor.stride(idx);
-            }
-          }
+  TORCH_UTILS_CHECK(tensor.dim() <= rank(Stride{}));
+  auto stride = cute::transform_with_idx(Stride{}, [&](auto const& stride_ele,
+                                                       auto const& idx) {
+    using StrideEle = std::decay_t<decltype(stride_ele)>;
+
+    if (idx < tensor.dim()) {
+      if constexpr (cute::is_static_v<StrideEle>) {
+        TORCH_UTILS_CHECK(StrideEle::value == tensor.stride(idx), "Expected ",
+                          name, ".stride(", idx, ") to be ", StrideEle::value);
+        return StrideEle{};
+      } else {
+        if (tensor.size(idx) == 1) {
+          // use 0 stride for dim with size 1, this is easier for
+          // cute/cutlass to optimize (helps the TMA code flatten dims)
+          return StrideEle{0};
         } else {
-          // Extra strides are assumed to be 0 or 1
-          if constexpr (cute::is_static_v<StrideEle>) {
-            static_assert(StrideEle::value == 0 || StrideEle::value == 1);
-          }
-          return StrideEle{};
+          return tensor.stride(idx);
         }
-      });
+      }
+    } else {
+      // Extra strides are assumed to be 0 or 1
+      if constexpr (cute::is_static_v<StrideEle>) {
+        static_assert(StrideEle::value == 0 || StrideEle::value == 1);
+      }
+      return StrideEle{};
+    }
+  });
 
   auto shape = cute::make_shape_from_idx<rank(Stride{})>([&](auto const& idx) {
     if (idx < tensor.dim())
@@ -97,7 +110,7 @@ static inline auto make_cute_layout(torch::Tensor const& tensor,
 
 template <typename Stride>
 static inline auto maybe_make_cute_layout(
-    std::optional<torch::Tensor> const& tensor,
+    std::optional<TorchTensor> const& tensor,
     std::string_view name = "tensor") {
   using Layout = decltype(make_cute_layout<Stride>(*tensor));
 
@@ -121,12 +134,12 @@ template <typename T>
 using equivalent_cutlass_type_t = typename equivalent_cutlass_type<T>::type;
 
 template <>
-struct equivalent_cutlass_type<c10::Half> {
+struct equivalent_cutlass_type<torch::headeronly::Half> {
   using type = cutlass::half_t;
 };
 
 template <>
-struct equivalent_cutlass_type<c10::BFloat16> {
+struct equivalent_cutlass_type<torch::headeronly::BFloat16> {
   using type = cutlass::bfloat16_t;
 };
 
@@ -134,8 +147,8 @@ struct equivalent_cutlass_type<c10::BFloat16> {
 // equivalent_scalar_t (basically inverse of equivalent_cutlass_type)
 //
 
-// Return a `c10::CppTypeToScalarType<T>` compatible type, i.e. get the C++ from
-// c10 that is equivalent to T, e.g.: `cutlass::half_t -> c10::Half`
+// Return a `torch::headeronly::CppTypeToScalarType<T>` compatible type, i.e.
+// get the C++ type equivalent to T, e.g.: `cutlass::half_t -> Half`
 template <typename T>
 struct equivalent_scalar_type {
   using type = T;
@@ -146,15 +159,15 @@ using equivalent_scalar_type_t = typename equivalent_scalar_type<T>::type;
 
 template <>
 struct equivalent_scalar_type<cutlass::half_t> {
-  using type = c10::Half;
+  using type = torch::headeronly::Half;
 };
 
 template <>
 struct equivalent_scalar_type<cutlass::bfloat16_t> {
-  using type = c10::BFloat16;
+  using type = torch::headeronly::BFloat16;
 };
 
-// get equivalent c10::ScalarType tag from compile time type
+// get equivalent torch::headeronly::ScalarType tag from compile time type
 template <typename T>
-static inline constexpr c10::ScalarType equivalent_scalar_type_v =
-    c10::CppTypeToScalarType<equivalent_scalar_type_t<T>>::value;
\ No newline at end of file
+static inline constexpr torch::headeronly::ScalarType equivalent_scalar_type_v =
+    torch::headeronly::CppTypeToScalarType<equivalent_scalar_type_t<T>>::value;
diff --git a/csrc/fused_deepseek_v4_qnorm_rope_kv_insert_kernel.cu b/csrc/fused_deepseek_v4_qnorm_rope_kv_insert_kernel.cu
new file mode 100644
index 000000000000..589e5f7bac04
--- /dev/null
+++ b/csrc/fused_deepseek_v4_qnorm_rope_kv_insert_kernel.cu
@@ -0,0 +1,622 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+ *
+ * Horizontally-fused DeepseekV4-MLA kernel:
+ *   - Q side:  per-head RMSNorm (no weight) + GPT-J RoPE on last ROPE_DIM
+ *   - KV side: GPT-J RoPE on last ROPE_DIM + UE8M0 FP8 quant on NoPE + paged
+ *              cache insert
+ *
+ * Structured after `applyMLARopeAndAssignQKVKernelGeneration` in
+ * TensorRT-LLM's mlaKernels.cu: one kernel, one grid, with head-slot
+ * dispatch choosing Q vs KV work per warp.  The per-warp RMSNorm/RoPE
+ * skeleton is adapted from vllm-deepseek_v4's existing
+ * `fusedQKNormRopeKernel` (csrc/fused_qknorm_rope_kernel.cu).
+ *
+ * Assumptions (hard-coded for DeepseekV4 attention):
+ *   HEAD_DIM  = 512
+ *   ROPE_DIM  = 64   (RoPE applied to dims [NOPE_DIM, HEAD_DIM))
+ *   NOPE_DIM  = 448
+ *   QUANT_BLOCK = 64 (UE8M0 FP8 quant block)
+ *   FP8_MAX   = 448.0f
+ *   is_neox=false (GPT-J interleaved pairs)
+ *   cos_sin_cache layout [max_pos, rope_dim] = cos || sin (cos first, sin
+ *     second along last dim; each half is rope_dim/2 = 32 values)
+ *
+ * Cache layout per paged-cache block (block_size tokens):
+ *   [0,            bs*576):          token data, 448 fp8 + 128 bf16 each
+ *   [bs*576,       bs*576 + bs*8):   UE8M0 scales, 7 real + 1 pad per token
+ */
+
+#include <cmath>
+#ifndef USE_ROCM
+  #include <cuda_fp8.h>
+#else
+  #include <hip/hip_fp8.h>
+#endif
+#include <cuda_runtime.h>
+#include <type_traits>
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <torch/cuda.h>
+
+#include "cuda_compat.h"
+#include "dispatch_utils.h"
+#include "type_convert.cuh"
+
+#ifndef FINAL_MASK
+  #ifdef USE_ROCM
+    #define FINAL_MASK 0xffffffffffffffffULL
+  #else
+    #define FINAL_MASK 0xffffffffu
+  #endif
+#endif
+
+#ifdef USE_ROCM
+// ROCm-compatible FP8 conversion helpers
+__device__ __forceinline__ uint8_t rocm_cvt_float_to_fp8_e4m3(float val) {
+  #if defined(HIP_FP8_TYPE_OCP)
+  __hip_fp8_e4m3 fp8_val(val);
+  #else
+  __hip_fp8_e4m3_fnuz fp8_val(val);
+  #endif
+  return reinterpret_cast<uint8_t&>(fp8_val);
+}
+#endif
+
+namespace vllm {
+namespace deepseek_v4_fused_ops {
+
+namespace {
+inline int getSMVersion() {
+  auto* props = at::cuda::getCurrentDeviceProperties();
+  return props->major * 10 + props->minor;
+}
+}  // namespace
+
+// ────────────────────────────────────────────────────────────────────────────
+// Constants
+// ────────────────────────────────────────────────────────────────────────────
+constexpr int kHeadDim = 512;
+constexpr int kRopeDim = 64;
+constexpr int kNopeDim = kHeadDim - kRopeDim;  // 448
+constexpr int kQuantBlock = 64;
+constexpr int kNumQuantBlocks = kNopeDim / kQuantBlock;   // 7
+constexpr int kScaleBytesPerToken = kNumQuantBlocks + 1;  // 8 (7 real + 1 pad)
+constexpr int kTokenDataBytes = kNopeDim + kRopeDim * 2;  // 448 + 128 = 576
+constexpr float kFp8Max = 448.0f;
+
+#ifndef USE_ROCM
+// When num_tokens is less than this threshold,
+// run the reduced grid variant on cuda
+constexpr float NUM_TOKEN_CUTOFF = 1024;
+#endif
+
+// Per-warp layout:  32 lanes × 16 elems/lane = 512 elems = HEAD_DIM.
+constexpr int kNumLanes = 32;
+constexpr int kElemsPerLane = kHeadDim / kNumLanes;  // 16
+
+// ────────────────────────────────────────────────────────────────────────────
+// Small inline helpers
+// ────────────────────────────────────────────────────────────────────────────
+__device__ __forceinline__ float warp4MaxAbs(float val) {
+  // Reduce absolute max across 4 consecutive lanes (lane id & 3 group).
+  float peer = __shfl_xor_sync(FINAL_MASK, val, 1);
+  val = fmaxf(val, peer);
+  peer = __shfl_xor_sync(FINAL_MASK, val, 2);
+  val = fmaxf(val, peer);
+  return val;
+}
+
+template <typename T>
+__device__ __forceinline__ float warpSum(float val) {
+#pragma unroll
+  for (int mask = 16; mask > 0; mask >>= 1) {
+    val += __shfl_xor_sync(FINAL_MASK, val, mask, 32);
+  }
+  return val;
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Per-slot inner pipeline
+// ────────────────────────────────────────────────────────────────────────────
+// Shared by both kernel variants: 1 CTA per (token, head) pair vs. 1 CTA per
+// token
+template <typename scalar_t_in>
+__device__ __forceinline__ void processDeepseekV4Slot(
+    uint4 v0, uint4 v1, int const tokenIdx, int const slotIdx,
+    int const dim_base, int const laneId, int const num_heads_q,
+    float const eps, scalar_t_in* __restrict__ q_inout,
+    uint8_t* __restrict__ k_cache, int64_t const* __restrict__ slot_mapping,
+    int64_t const* __restrict__ position_ids,
+    float const* __restrict__ cos_sin_cache, int const cache_block_size,
+    int const kv_block_stride) {
+  using Converter = vllm::_typeConvert<scalar_t_in>;
+  bool const isKV = (slotIdx == num_heads_q);
+
+  // ── Decode the bf16 → 16 fp32 registers ─────────────────────────────
+  float elements[kElemsPerLane];
+  {
+    typename Converter::packed_hip_type const* p0 =
+        reinterpret_cast<typename Converter::packed_hip_type const*>(&v0);
+    typename Converter::packed_hip_type const* p1 =
+        reinterpret_cast<typename Converter::packed_hip_type const*>(&v1);
+#pragma unroll
+    for (int i = 0; i < 4; i++) {
+      float2 f2 = Converter::convert(p0[i]);
+      elements[2 * i] = f2.x;
+      elements[2 * i + 1] = f2.y;
+    }
+#pragma unroll
+    for (int i = 0; i < 4; i++) {
+      float2 f2 = Converter::convert(p1[i]);
+      elements[8 + 2 * i] = f2.x;
+      elements[8 + 2 * i + 1] = f2.y;
+    }
+  }
+
+  // ── Q branch: RMSNorm (no weight) ───────────────────────────────────
+  if (!isKV) {
+    float sumOfSquares = 0.0f;
+#pragma unroll
+    for (int i = 0; i < kElemsPerLane; i++) {
+      sumOfSquares += elements[i] * elements[i];
+    }
+    sumOfSquares = warpSum<float>(sumOfSquares);
+    float const rms_rcp =
+        rsqrtf(sumOfSquares / static_cast<float>(kHeadDim) + eps);
+#pragma unroll
+    for (int i = 0; i < kElemsPerLane; i++) {
+      elements[i] = elements[i] * rms_rcp;
+    }
+  }
+
+  // ── GPT-J RoPE on dims [NOPE_DIM, HEAD_DIM) ─────────────────────────────
+  // All math in fp32.  cos_sin_cache is loaded as fp32 (its native storage).
+  bool const is_rope_lane = dim_base >= kNopeDim;
+  if (is_rope_lane) {
+    int64_t const pos = position_ids[tokenIdx];
+    constexpr int kHalfRope = kRopeDim / 2;
+    float const* cos_ptr = cos_sin_cache + pos * kRopeDim;
+    float const* sin_ptr = cos_ptr + kHalfRope;
+
+    int const rope_local_base = dim_base - kNopeDim;
+    int const half_base = rope_local_base >> 1;
+
+    // Load phase: 4 vectorized LDGs issue back-to-back.
+    float4 const c0 = *reinterpret_cast<float4 const*>(cos_ptr + half_base);
+    float4 const c1 = *reinterpret_cast<float4 const*>(cos_ptr + half_base + 4);
+    float4 const s0 = *reinterpret_cast<float4 const*>(sin_ptr + half_base);
+    float4 const s1 = *reinterpret_cast<float4 const*>(sin_ptr + half_base + 4);
+    float const cos_arr[8] = {c0.x, c0.y, c0.z, c0.w, c1.x, c1.y, c1.z, c1.w};
+    float const sin_arr[8] = {s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w};
+
+#pragma unroll
+    for (int p = 0; p < kElemsPerLane / 2; p++) {
+      float const x_even = elements[2 * p];
+      float const x_odd = elements[2 * p + 1];
+      elements[2 * p] = x_even * cos_arr[p] - x_odd * sin_arr[p];
+      elements[2 * p + 1] = x_even * sin_arr[p] + x_odd * cos_arr[p];
+    }
+  }
+
+  // ═══════════════════════════════════════════════════════════════════
+  // Q / KV branch dispatch. Restructured as if/else (no early `return`)
+  // so every code path lands at the same exit point — callers own PDL
+  // triggering and per-iteration buffer rotation.
+  // ═══════════════════════════════════════════════════════════════════
+  if (!isKV) {
+    // ── Q: cast back to bf16 and store. ────────────────────────────
+    uint4 out0, out1;
+    typename Converter::packed_hip_type* po0 =
+        reinterpret_cast<typename Converter::packed_hip_type*>(&out0);
+    typename Converter::packed_hip_type* po1 =
+        reinterpret_cast<typename Converter::packed_hip_type*>(&out1);
+#pragma unroll
+    for (int i = 0; i < 4; i++) {
+      po0[i] =
+          Converter::convert(make_float2(elements[2 * i], elements[2 * i + 1]));
+    }
+#pragma unroll
+    for (int i = 0; i < 4; i++) {
+      po1[i] = Converter::convert(
+          make_float2(elements[8 + 2 * i], elements[8 + 2 * i + 1]));
+    }
+    scalar_t_in* dst =
+        q_inout +
+        (static_cast<int64_t>(tokenIdx) * num_heads_q + slotIdx) * kHeadDim +
+        dim_base;
+    *reinterpret_cast<uint4*>(dst) = out0;
+    *reinterpret_cast<uint4*>(dst + 8) = out1;
+  } else {
+    // ── KV: FP8 quant on NoPE + bf16 store on RoPE + cache insert.
+    int64_t const slot_id = slot_mapping[tokenIdx];
+    if (slot_id >= 0) {
+      int64_t const block_idx = slot_id / cache_block_size;
+      int64_t const pos_in_block = slot_id % cache_block_size;
+      uint8_t* block_base =
+          k_cache + block_idx * static_cast<int64_t>(kv_block_stride);
+      uint8_t* token_fp8_ptr = block_base + pos_in_block * kTokenDataBytes;
+      uint8_t* token_bf16_ptr = token_fp8_ptr + kNopeDim;
+      uint8_t* token_scale_ptr =
+          block_base +
+          static_cast<int64_t>(cache_block_size) * kTokenDataBytes +
+          pos_in_block * kScaleBytesPerToken;
+
+#pragma unroll
+      for (int i = 0; i < kElemsPerLane; i++) {
+        elements[i] = Converter::convert(Converter::convert(elements[i]));
+      }
+
+      float local_absmax = 0.0f;
+#pragma unroll
+      for (int i = 0; i < kElemsPerLane; i++) {
+        local_absmax = fmaxf(local_absmax, fabsf(elements[i]));
+      }
+      float const absmax = fmaxf(warp4MaxAbs(local_absmax), 1e-4f);
+      float const exponent = ceilf(log2f(absmax / kFp8Max));
+      float const inv_scale = exp2f(-exponent);
+
+      if (!is_rope_lane) {
+        uint8_t out_bytes[kElemsPerLane];
+#pragma unroll
+        for (int i = 0; i < kElemsPerLane; i++) {
+          float scaled = elements[i] * inv_scale;
+          scaled = fminf(fmaxf(scaled, -kFp8Max), kFp8Max);
+#ifndef USE_ROCM
+          __nv_fp8_storage_t s =
+              __nv_cvt_float_to_fp8(scaled, __NV_SATFINITE, __NV_E4M3);
+          out_bytes[i] = static_cast<uint8_t>(s);
+#else
+          out_bytes[i] = rocm_cvt_float_to_fp8_e4m3(scaled);
+#endif
+        }
+        *reinterpret_cast<uint4*>(token_fp8_ptr + dim_base) =
+            *reinterpret_cast<uint4 const*>(out_bytes);
+
+        if ((laneId & 3) == 0) {
+          int const q_block_idx = laneId >> 2;
+          float encoded = fmaxf(fminf(exponent + 127.0f, 255.0f), 0.0f);
+          token_scale_ptr[q_block_idx] = static_cast<uint8_t>(encoded);
+        }
+        if (laneId == 0) {
+          token_scale_ptr[kNumQuantBlocks] = 0;
+        }
+      } else {
+        uint4 out0, out1;
+        typename Converter::packed_hip_type* po0 =
+            reinterpret_cast<typename Converter::packed_hip_type*>(&out0);
+        typename Converter::packed_hip_type* po1 =
+            reinterpret_cast<typename Converter::packed_hip_type*>(&out1);
+#pragma unroll
+        for (int i = 0; i < 4; i++) {
+          po0[i] = Converter::convert(
+              make_float2(elements[2 * i], elements[2 * i + 1]));
+        }
+#pragma unroll
+        for (int i = 0; i < 4; i++) {
+          po1[i] = Converter::convert(
+              make_float2(elements[8 + 2 * i], elements[8 + 2 * i + 1]));
+        }
+        int const rope_local_base = dim_base - kNopeDim;
+        scalar_t_in* bf16_dst =
+            reinterpret_cast<scalar_t_in*>(token_bf16_ptr) + rope_local_base;
+        *reinterpret_cast<uint4*>(bf16_dst) = out0;
+        *reinterpret_cast<uint4*>(bf16_dst + 8) = out1;
+      }
+    }
+  }
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Kernel
+// ────────────────────────────────────────────────────────────────────────────
+//
+// Grid: 1D, gridDim.x = ceil(num_tokens_full * (num_heads_q + 1) /
+// warps_per_block) Block: blockDim.x = 256 threads (8 warps per block) Each
+// warp handles one (token, head_slot) pair. head_slot < num_heads_q          →
+// Q branch (RMSNorm + RoPE, in place) head_slot == num_heads_q         → KV
+// branch (RoPE + UE8M0 quant + insert)
+//
+// With DP padding, q/kv/position_ids can have more rows than slot_mapping.
+// The Q branch covers all `num_tokens_full` rows (downstream attention uses
+// them).  The KV branch only inserts the first `num_tokens_insert` tokens
+// (= slot_mapping length) into the paged cache.
+//
+template <typename scalar_t_in>
+__global__ void fusedDeepseekV4QNormRopeKVRopeQuantInsertKernel(
+    scalar_t_in* __restrict__ q_inout,         // [N, H, 512] bf16, in place
+    scalar_t_in const* __restrict__ kv_in,     // [N, 512] bf16
+    uint8_t* __restrict__ k_cache,             // [num_blocks, block_stride]
+    int64_t const* __restrict__ slot_mapping,  // [num_tokens_insert] i64
+    int64_t const* __restrict__ position_ids,  // [N] i64
+    float const* __restrict__ cos_sin_cache,   // [max_pos, 64] fp32
+    float const eps,
+    int const num_tokens_full,    // = q.size(0) = kv.size(0)
+    int const num_tokens_insert,  // = slot_mapping.size(0), ≤ num_tokens_full
+    int const num_heads_q,        // H
+    int const cache_block_size,   // tokens per paged-cache block
+    int const kv_block_stride) {  // bytes per paged-cache block
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  // BF16 _typeConvert specialization is unavailable on pre-Ampere.  The
+  // DeepseekV4 kernel only runs with bf16 inputs in practice, so compile a
+  // no-op stub for sm_70/sm_75 to keep multi-arch builds happy.
+  if constexpr (std::is_same_v<scalar_t_in, c10::BFloat16>) {
+    return;
+  } else {
+#endif
+    int const warpsPerBlock = blockDim.x / 32;
+    int const warpId = threadIdx.x / 32;
+    int const laneId = threadIdx.x % 32;
+    int const globalWarpIdx = blockIdx.x * warpsPerBlock + warpId;
+
+    int const total_slots_per_token = num_heads_q + 1;
+    int const tokenIdx = globalWarpIdx / total_slots_per_token;
+    int const slotIdx = globalWarpIdx % total_slots_per_token;
+    if (tokenIdx >= num_tokens_full) return;
+
+    bool const isKV = (slotIdx == num_heads_q);
+    // KV branch: skip DP-padded tokens (no slot reserved for them).
+    if (isKV && tokenIdx >= num_tokens_insert) return;
+
+    // PDL: wait for predecessor kernel (upstream q/kv producer) to signal
+    // before touching any global memory.  No-op when PDL is not enabled on
+    // the launch.  The CUDA runtime wrapper emits the griddepcontrol.wait
+    // PTX with the required memory clobber internally.
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    cudaGridDependencySynchronize();
+#endif
+
+    // Dim range this lane owns within the 512-wide head.
+    int const dim_base = laneId * kElemsPerLane;  // in [0, 512) step 16
+
+    // Two 16-byte loads per thread (8 bf16 each).  Use uint4 as the vector
+    // type; the shared per-slot helper bitcasts to scalar_t_in packed pairs.
+    scalar_t_in const* src_ptr;
+    if (isKV) {
+      src_ptr = kv_in + static_cast<int64_t>(tokenIdx) * kHeadDim + dim_base;
+    } else {
+      int64_t const q_row_offset =
+          (static_cast<int64_t>(tokenIdx) * num_heads_q + slotIdx) * kHeadDim +
+          dim_base;
+      src_ptr = q_inout + q_row_offset;
+    }
+    uint4 const v0 = *reinterpret_cast<uint4 const*>(src_ptr);
+    uint4 const v1 = *reinterpret_cast<uint4 const*>(src_ptr + 8);
+
+    processDeepseekV4Slot<scalar_t_in>(
+        v0, v1, tokenIdx, slotIdx, dim_base, laneId, num_heads_q, eps, q_inout,
+        k_cache, slot_mapping, position_ids, cos_sin_cache, cache_block_size,
+        kv_block_stride);
+
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    cudaTriggerProgrammaticLaunchCompletion();
+#endif
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  }
+#endif
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Kernel
+// ────────────────────────────────────────────────────────────────────────────
+//
+// Grid: 1D, gridDim.x = num_tokens_full
+// Block: blockDim.x = 256 threads (8 warps per block) Each
+// warp handles one token, iterating over each head.
+// Q branch (RMSNorm + RoPE, in place) head_slot == num_heads_q
+// KV branch (RoPE + UE8M0 quant + insert)
+//
+template <typename scalar_t_in>
+__global__ void fusedDeepseekV4QNormRopeKVRopeQuantInsertKernelReducedGrid(
+    scalar_t_in* __restrict__ q_inout,  // [N, H, 512] bf16, in place
+    scalar_t_in const* __restrict__ kv_in, uint8_t* __restrict__ k_cache,
+    int64_t const* __restrict__ slot_mapping,
+    int64_t const* __restrict__ position_ids,
+    float const* __restrict__ cos_sin_cache, float const eps,
+    int const num_tokens_full, int const num_tokens_insert,
+    int const num_heads_q, int const cache_block_size,
+    int const kv_block_stride) {
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  if constexpr (std::is_same_v<scalar_t_in, c10::BFloat16>) {
+    return;
+  } else {
+#endif
+    int const warpsPerBlock = blockDim.x / 32;
+    int const warpId = threadIdx.x / 32;
+    int const laneId = threadIdx.x % 32;
+
+    int const tokenIdx = blockIdx.x;
+    if (tokenIdx >= num_tokens_full) return;
+
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    cudaGridDependencySynchronize();
+#endif
+
+    int const dim_base = laneId * kElemsPerLane;  // in [0, 512) step 16
+    int const slot_end =
+        (tokenIdx >= num_tokens_insert) ? num_heads_q : (num_heads_q + 1);
+
+    auto src_for_slot = [&](int s) -> scalar_t_in const* {
+      if (s == num_heads_q) {
+        return kv_in + static_cast<int64_t>(tokenIdx) * kHeadDim + dim_base;
+      }
+      return q_inout +
+             (static_cast<int64_t>(tokenIdx) * num_heads_q +
+              static_cast<int64_t>(s)) *
+                 kHeadDim +
+             dim_base;
+    };
+
+    if (warpId < slot_end) {
+      int curr_slot = warpId;
+      scalar_t_in const* src_curr = src_for_slot(curr_slot);
+      uint4 v0_curr = *reinterpret_cast<uint4 const*>(src_curr);
+      uint4 v1_curr = *reinterpret_cast<uint4 const*>(src_curr + 8);
+
+      while (curr_slot < slot_end) {
+        int const next_slot = curr_slot + warpsPerBlock;
+        bool const has_next = (next_slot < slot_end);
+
+        // Prefetch src for the next slot
+        uint4 v0_next, v1_next;
+        if (has_next) {
+          scalar_t_in const* src_next = src_for_slot(next_slot);
+          v0_next = *reinterpret_cast<uint4 const*>(src_next);
+          v1_next = *reinterpret_cast<uint4 const*>(src_next + 8);
+        }
+
+        processDeepseekV4Slot<scalar_t_in>(
+            v0_curr, v1_curr, tokenIdx, curr_slot, dim_base, laneId,
+            num_heads_q, eps, q_inout, k_cache, slot_mapping, position_ids,
+            cos_sin_cache, cache_block_size, kv_block_stride);
+
+        // ── Buffer rotation: hand the prefetched LDGs to the next iter.
+        v0_curr = v0_next;
+        v1_curr = v1_next;
+        curr_slot = next_slot;
+      }  // while
+    }  // if (warpId < slot_end)
+
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    cudaTriggerProgrammaticLaunchCompletion();
+#endif
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  }
+#endif
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Launch wrapper
+// ────────────────────────────────────────────────────────────────────────────
+template <typename scalar_t_in>
+void launchFusedDeepseekV4QNormRopeKVRopeQuantInsert(
+    scalar_t_in* q_inout, scalar_t_in const* kv_in, uint8_t* k_cache,
+    int64_t const* slot_mapping, int64_t const* position_ids,
+    float const* cos_sin_cache, float const eps, int const num_tokens_full,
+    int const num_tokens_insert, int const num_heads_q,
+    int const cache_block_size, int const kv_block_stride,
+    cudaStream_t stream) {
+  constexpr int kBlockSize = 256;
+  constexpr int kWarpsPerBlock = kBlockSize / 32;
+  int64_t const total_warps =
+      static_cast<int64_t>(num_tokens_full) * (num_heads_q + 1);
+  int const grid =
+      static_cast<int>((total_warps + kWarpsPerBlock - 1) / kWarpsPerBlock);
+
+  // PDL: enable programmatic stream serialization whenever the hardware
+  // supports it (SM90+).  On pre-Hopper GPUs the attribute is unavailable,
+  // so leave numAttrs = 0 and launch as a regular kernel.
+#ifndef USE_ROCM
+  static int const sm_version = getSMVersion();
+  // Host-side guard: the device kernel body is compiled as a no-op for
+  // bf16 on pre-Ampere (sm_70/sm_75) because _typeConvert<BFloat16> is
+  // unavailable there.  Refuse the launch loudly instead of silently
+  // skipping the work.
+  TORCH_CHECK(
+      sm_version >= 80,
+      "fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert requires sm_80+ "
+      "(Ampere or newer); got sm_",
+      sm_version);
+  cudaLaunchConfig_t config;
+  config.gridDim = dim3(grid);
+  config.blockDim = dim3(kBlockSize);
+  config.dynamicSmemBytes = 0;
+  config.stream = stream;
+  cudaLaunchAttribute attrs[1];
+  attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+  attrs[0].val.programmaticStreamSerializationAllowed = 1;
+  config.attrs = attrs;
+  config.numAttrs = (sm_version >= 90) ? 1 : 0;
+
+  if (num_tokens_full < NUM_TOKEN_CUTOFF) {
+    cudaLaunchKernelEx(
+        &config, fusedDeepseekV4QNormRopeKVRopeQuantInsertKernel<scalar_t_in>,
+        q_inout, kv_in, k_cache, slot_mapping, position_ids, cos_sin_cache, eps,
+        num_tokens_full, num_tokens_insert, num_heads_q, cache_block_size,
+        kv_block_stride);
+  } else {
+    config.gridDim = dim3(num_tokens_full);
+    cudaLaunchKernelEx(
+        &config,
+        fusedDeepseekV4QNormRopeKVRopeQuantInsertKernelReducedGrid<scalar_t_in>,
+        q_inout, kv_in, k_cache, slot_mapping, position_ids, cos_sin_cache, eps,
+        num_tokens_full, num_tokens_insert, num_heads_q, cache_block_size,
+        kv_block_stride);
+  }
+
+#else
+  // ROCm: use standard kernel launch syntax (no PDL/stream serialization)
+  // clang-format off
+  fusedDeepseekV4QNormRopeKVRopeQuantInsertKernel<scalar_t_in>
+      <<<grid, kBlockSize, 0, stream>>>(
+          q_inout, kv_in, k_cache, slot_mapping, position_ids, cos_sin_cache,
+          eps, num_tokens_full, num_tokens_insert, num_heads_q,
+          cache_block_size, kv_block_stride);
+#endif
+}
+
+}  // namespace deepseek_v4_fused_ops
+}  // namespace vllm
+
+// ────────────────────────────────────────────────────────────────────────────
+// Torch op wrapper
+// ────────────────────────────────────────────────────────────────────────────
+void fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert(
+    torch::Tensor& q,                    // [N, H, 512] bf16, in place
+    torch::Tensor const& kv,             // [N, 512] bf16 (read-only)
+    torch::Tensor& k_cache,              // [num_blocks, block_bytes] uint8
+    torch::Tensor const& slot_mapping,   // [N] int64
+    torch::Tensor const& position_ids,   // [N] int64
+    torch::Tensor const& cos_sin_cache,  // [max_pos, rope_dim] bf16
+    double eps, int64_t cache_block_size) {
+  TORCH_CHECK(q.is_cuda() && q.is_contiguous(), "q must be contiguous CUDA");
+  TORCH_CHECK(kv.is_cuda() && kv.is_contiguous(), "kv must be contiguous CUDA");
+  TORCH_CHECK(k_cache.is_cuda(), "k_cache must be CUDA");
+  TORCH_CHECK(slot_mapping.is_cuda() && slot_mapping.dtype() == torch::kInt64,
+              "slot_mapping must be int64 CUDA");
+  TORCH_CHECK(position_ids.is_cuda() && position_ids.dtype() == torch::kInt64,
+              "position_ids must be int64 CUDA");
+  TORCH_CHECK(cos_sin_cache.is_cuda(), "cos_sin_cache must be CUDA");
+  TORCH_CHECK(q.dim() == 3 && q.size(2) == 512, "q shape [N, H, 512]");
+  TORCH_CHECK(kv.dim() == 2 && kv.size(1) == 512, "kv shape [N, 512]");
+  TORCH_CHECK(q.dtype() == kv.dtype(), "q and kv dtype must match");
+  TORCH_CHECK(k_cache.dtype() == torch::kUInt8, "k_cache must be uint8");
+  TORCH_CHECK(cos_sin_cache.dim() == 2 && cos_sin_cache.size(1) == 64,
+              "cos_sin_cache shape [max_pos, 64]");
+  TORCH_CHECK(cos_sin_cache.dtype() == torch::kFloat32,
+              "cos_sin_cache must be float32");
+
+  // With DP padding, slot_mapping can be shorter than q/kv/positions.
+  // Q-norm+RoPE runs on all q.size(0) rows (downstream attention uses them);
+  // KV quant+insert runs only on the first slot_mapping.size(0) rows.
+  int const num_tokens_full = static_cast<int>(q.size(0));
+  int const num_tokens_insert = static_cast<int>(slot_mapping.size(0));
+  TORCH_CHECK(static_cast<int>(kv.size(0)) == num_tokens_full &&
+                  static_cast<int>(position_ids.size(0)) == num_tokens_full,
+              "q/kv/position_ids row counts must match");
+  TORCH_CHECK(num_tokens_insert <= num_tokens_full,
+              "slot_mapping must not exceed q row count");
+  int const num_heads_q = static_cast<int>(q.size(1));
+  int const cache_block_size_i = static_cast<int>(cache_block_size);
+  int const kv_block_stride = static_cast<int>(k_cache.stride(0));
+
+  at::cuda::OptionalCUDAGuard device_guard(device_of(q));
+  auto stream = at::cuda::getCurrentCUDAStream();
+
+  VLLM_DISPATCH_HALF_TYPES(
+      q.scalar_type(), "fused_deepseek_v4_qnorm_rope_kv_insert", [&] {
+        using qkv_scalar_t = scalar_t;
+        vllm::deepseek_v4_fused_ops::
+            launchFusedDeepseekV4QNormRopeKVRopeQuantInsert<qkv_scalar_t>(
+                reinterpret_cast<qkv_scalar_t*>(q.data_ptr()),
+                reinterpret_cast<qkv_scalar_t const*>(kv.data_ptr()),
+                reinterpret_cast<uint8_t*>(k_cache.data_ptr()),
+                reinterpret_cast<int64_t const*>(slot_mapping.data_ptr()),
+                reinterpret_cast<int64_t const*>(position_ids.data_ptr()),
+                cos_sin_cache.data_ptr<float>(), static_cast<float>(eps),
+                num_tokens_full, num_tokens_insert, num_heads_q,
+                cache_block_size_i, kv_block_stride, stream);
+      });
+}
\ No newline at end of file
diff --git a/csrc/fused_qknorm_rope_kernel.cu b/csrc/fused_qknorm_rope_kernel.cu
deleted file mode 100644
index a51e1a347e1d..000000000000
--- a/csrc/fused_qknorm_rope_kernel.cu
+++ /dev/null
@@ -1,436 +0,0 @@
-/*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-#include <cuda_runtime.h>
-#include <type_traits>
-
-#include <torch/cuda.h>
-#include <c10/cuda/CUDAGuard.h>
-
-#include "cuda_compat.h"
-#include "dispatch_utils.h"
-#include "type_convert.cuh"
-
-#define CHECK_TYPE(x, st)                                              \
-  TORCH_CHECK(x.scalar_type() == st, #x " dtype is ", x.scalar_type(), \
-              ", while ", st, " is expected")
-#define CHECK_TH_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CONTIGUOUS(x) \
-  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
-#define CHECK_INPUT(x) \
-  CHECK_TH_CUDA(x);    \
-  CHECK_CONTIGUOUS(x)
-
-#ifdef USE_ROCM
-  #define FINAL_MASK 0xffffffffffffffffULL
-
-  #if defined(HIP_VERSION) && HIP_VERSION < 70000000
-// On ROCm versions before 7.0, __syncwarp isn't defined. The below
-// implementation is copy/pasted from the implementation in ROCm 7.0
-__device__ inline void __syncwarp() {
-  __builtin_amdgcn_fence(__ATOMIC_RELEASE, "wavefront");
-  __builtin_amdgcn_wave_barrier();
-  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "wavefront");
-}
-  #endif
-#else
-  #define FINAL_MASK 0xffffffff
-#endif
-
-namespace tensorrt_llm::common {
-template <typename T, int num>
-struct packed_as;
-// Specialization for packed_as used in this kernel.
-template <>
-struct packed_as<uint, 1> {
-  using type = uint;
-};
-
-template <>
-struct packed_as<uint, 2> {
-  using type = uint2;
-};
-
-template <>
-struct packed_as<uint, 4> {
-  using type = uint4;
-};
-
-template <typename T>
-__inline__ __device__ T warpReduceSum(T val) {
-#pragma unroll
-  for (int mask = 16; mask > 0; mask >>= 1)
-    val += __shfl_xor_sync(FINAL_MASK, val, mask, 32);
-  return val;
-}
-
-template <typename T>
-inline __device__ __host__ T divUp(T m, T n) {
-  return (m + n - 1) / n;
-}
-
-}  // namespace tensorrt_llm::common
-
-namespace tensorrt_llm::kernels {
-// NOTE(zhuhaoran): This kernel is adapted from TensorRT-LLM implementation,
-// with added support for passing the cos_sin_cache as an input.
-// https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/kernels/fusedQKNormRopeKernel.cu
-
-// Perform per-head QK Norm and RoPE in a single kernel.
-// scalar_t_in: data type of QKV and RMSNorm weights
-// scalar_t_cache: data type of cos/sin cache
-// head_dim: the dimension of each head
-// interleave: interleave=!is_neox.
-template <typename scalar_t_in, typename scalar_t_cache, int head_dim,
-          bool interleave>
-__global__ void fusedQKNormRopeKernel(
-    void* qkv_void,                  // Combined QKV tensor
-    int const num_heads_q,           // Number of query heads
-    int const num_heads_k,           // Number of key heads
-    int const num_heads_v,           // Number of value heads
-    float const eps,                 // Epsilon for RMS normalization
-    void const* q_weight_void,       // RMSNorm weights for query
-    void const* k_weight_void,       // RMSNorm weights for key
-    void const* cos_sin_cache_void,  // Pre-computed cos/sin cache
-    int64_t const* position_ids,     // Position IDs for RoPE
-    int const num_tokens,            // Number of tokens
-    int const rotary_dim             // Dimension for RoPE
-) {
-#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
-  if constexpr ((std::is_same_v<scalar_t_in, c10::BFloat16>) ||
-                std::is_same_v<scalar_t_cache, c10::BFloat16>) {
-    return;
-  } else {
-#endif
-
-    using Converter = vllm::_typeConvert<scalar_t_in>;
-    static_assert(Converter::exists,
-                  "Input QKV data type is not supported for this CUDA "
-                  "architecture or toolkit version.");
-    using T_in = typename Converter::hip_type;
-    using T2_in = typename Converter::packed_hip_type;
-
-    using CacheConverter = vllm::_typeConvert<scalar_t_cache>;
-    static_assert(CacheConverter::exists,
-                  "Cache data type is not supported for this CUDA architecture "
-                  "or toolkit version.");
-    using T_cache = typename CacheConverter::hip_type;
-
-    T_in* qkv = reinterpret_cast<T_in*>(qkv_void);
-    T_in const* q_weight = reinterpret_cast<T_in const*>(q_weight_void);
-    T_in const* k_weight = reinterpret_cast<T_in const*>(k_weight_void);
-    T_cache const* cos_sin_cache =
-        reinterpret_cast<T_cache const*>(cos_sin_cache_void);
-
-    int const warpsPerBlock = blockDim.x / 32;
-    int const warpId = threadIdx.x / 32;
-    int const laneId = threadIdx.x % 32;
-
-    // Calculate global warp index to determine which head/token this warp
-    // processes
-    int const globalWarpIdx = blockIdx.x * warpsPerBlock + warpId;
-
-    // Total number of attention heads (Q and K)
-    int const total_qk_heads = num_heads_q + num_heads_k;
-
-    // Determine which token and head type (Q or K) this warp processes
-    int const tokenIdx = globalWarpIdx / total_qk_heads;
-    int const localHeadIdx = globalWarpIdx % total_qk_heads;
-
-    // Skip if this warp is assigned beyond the number of tokens
-    if (tokenIdx >= num_tokens) return;
-
-    bool const isQ = localHeadIdx < num_heads_q;
-    int const headIdx = isQ ? localHeadIdx : localHeadIdx - num_heads_q;
-
-    int const num_heads = num_heads_q + num_heads_k + num_heads_v;
-
-    static_assert(head_dim % (32 * 2) == 0,
-                  "head_dim must be divisible by 64 (each warp processes one "
-                  "head, and each thread gets even number of "
-                  "elements)");
-    constexpr int numElemsPerThread = head_dim / 32;
-    float elements[numElemsPerThread];
-    constexpr int elemSizeBytes = numElemsPerThread * sizeof(__nv_bfloat16);
-    static_assert(elemSizeBytes % 4 == 0,
-                  "numSizeBytes must be a multiple of 4");
-    constexpr int vecSize =
-        elemSizeBytes /
-        4;  // Use packed_as<uint, vecSize> to perform loading/saving.
-    using vec_T = typename tensorrt_llm::common::packed_as<uint, vecSize>::type;
-
-    int offsetWarp;  // Offset for the warp
-    if (isQ) {
-      // Q segment: token offset + head offset within Q segment
-      offsetWarp = tokenIdx * num_heads * head_dim + headIdx * head_dim;
-    } else {
-      // K segment: token offset + entire Q segment + head offset within K
-      // segment
-      offsetWarp = tokenIdx * num_heads * head_dim + num_heads_q * head_dim +
-                   headIdx * head_dim;
-    }
-    int offsetThread = offsetWarp + laneId * numElemsPerThread;
-
-    // Sum of squares for RMSNorm
-    float sumOfSquares = 0.0f;
-
-    // Load.
-    {
-      vec_T vec = *reinterpret_cast<vec_T const*>(&qkv[offsetThread]);
-      constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
-#pragma unroll
-      for (int i = 0; i < num_packed_elems; i++) {
-        // Interpret the generic vector chunk as the specific packed type
-        T2_in packed_val = *(reinterpret_cast<T2_in*>(&vec) + i);
-        // Convert to float2 for computation
-        float2 vals = Converter::convert(packed_val);
-        sumOfSquares += vals.x * vals.x;
-        sumOfSquares += vals.y * vals.y;
-
-        elements[2 * i] = vals.x;
-        elements[2 * i + 1] = vals.y;
-      }
-    }
-
-    // Reduce sum across warp using the utility function
-    sumOfSquares = tensorrt_llm::common::warpReduceSum(sumOfSquares);
-
-    // Compute RMS normalization factor
-    float rms_rcp = rsqrtf(sumOfSquares / static_cast<float>(head_dim) + eps);
-
-    // Normalize elements
-#pragma unroll
-    for (int i = 0; i < numElemsPerThread; i++) {
-      int dim = laneId * numElemsPerThread + i;
-      float weight = isQ ? Converter::convert(q_weight[dim])
-                         : Converter::convert(k_weight[dim]);
-      elements[i] *= rms_rcp * weight;
-    }
-
-    // Apply RoPE to normalized elements
-    float elements2[numElemsPerThread];  // Additional buffer required for RoPE.
-
-    int64_t pos_id = position_ids[tokenIdx];
-
-    // Calculate cache pointer for this position - similar to
-    // pos_encoding_kernels.cu
-    T_cache const* cache_ptr = cos_sin_cache + pos_id * rotary_dim;
-    int const embed_dim = rotary_dim / 2;
-    T_cache const* cos_ptr = cache_ptr;
-    T_cache const* sin_ptr = cache_ptr + embed_dim;
-    int const rotary_lanes = rotary_dim / numElemsPerThread;  // rotary range
-    if (laneId < rotary_lanes) {
-      if constexpr (interleave) {
-        // Perform interleaving. Use pre-computed cos/sin values.
-#pragma unroll
-        for (int i = 0; i < numElemsPerThread / 2; ++i) {
-          int const idx0 = 2 * i;
-          int const idx1 = 2 * i + 1;
-          // Global dimension index in the head
-          int const dim_idx = laneId * numElemsPerThread + idx0;
-
-          float const val0 = elements[idx0];
-          float const val1 = elements[idx1];
-
-          int const half_dim = dim_idx / 2;
-          float const cos_val =
-              CacheConverter::convert(VLLM_LDG(cos_ptr + half_dim));
-          float const sin_val =
-              CacheConverter::convert(VLLM_LDG(sin_ptr + half_dim));
-
-          elements[idx0] = val0 * cos_val - val1 * sin_val;
-          elements[idx1] = val0 * sin_val + val1 * cos_val;
-        }
-      } else {
-        // Before data exchange with in warp, we need to sync.
-        __syncwarp();
-        int pairOffset = (rotary_dim / 2) / numElemsPerThread;
-        // Get the data from the other half of the warp. Use pre-computed
-        // cos/sin values.
-#pragma unroll
-        for (int i = 0; i < numElemsPerThread; i++) {
-          elements2[i] = __shfl_xor_sync(FINAL_MASK, elements[i], pairOffset);
-
-          if (laneId < pairOffset) {
-            elements2[i] = -elements2[i];
-          }
-          int dim_idx = laneId * numElemsPerThread + i;
-
-          dim_idx = (dim_idx * 2) % rotary_dim;
-          int half_dim = dim_idx / 2;
-          float cos_val = CacheConverter::convert(VLLM_LDG(cos_ptr + half_dim));
-          float sin_val = CacheConverter::convert(VLLM_LDG(sin_ptr + half_dim));
-
-          elements[i] = elements[i] * cos_val + elements2[i] * sin_val;
-        }
-        // __shfl_xor_sync does not provide memfence. Need to sync again.
-        __syncwarp();
-      }
-    }
-    // Store.
-    {
-      vec_T vec;
-      constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
-#pragma unroll
-      for (int i = 0; i < num_packed_elems; i++) {
-        // Convert from float2 back to the specific packed type
-        T2_in packed_val = Converter::convert(
-            make_float2(elements[2 * i], elements[2 * i + 1]));
-        // Place it into the generic vector
-        *(reinterpret_cast<T2_in*>(&vec) + i) = packed_val;
-      }
-      *reinterpret_cast<vec_T*>(&qkv[offsetThread]) = vec;
-    }
-
-#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
-  }
-#endif
-}
-
-// Borrowed from
-// https://github.com/flashinfer-ai/flashinfer/blob/8125d079a43e9a0ba463a4ed1b639cefd084cec9/include/flashinfer/pos_enc.cuh#L568
-#define DISPATCH_INTERLEAVE(interleave, INTERLEAVE, ...) \
-  if (interleave) {                                      \
-    const bool INTERLEAVE = true;                        \
-    __VA_ARGS__                                          \
-  } else {                                               \
-    const bool INTERLEAVE = false;                       \
-    __VA_ARGS__                                          \
-  }
-
-template <typename scalar_t_in, typename scalar_t_cache>
-void launchFusedQKNormRope(void* qkv, int const num_tokens,
-                           int const num_heads_q, int const num_heads_k,
-                           int const num_heads_v, int const head_dim,
-                           int const rotary_dim, float const eps,
-                           void const* q_weight, void const* k_weight,
-                           void const* cos_sin_cache, bool const interleave,
-                           int64_t const* position_ids, cudaStream_t stream) {
-  constexpr int blockSize = 256;
-
-  int const warpsPerBlock = blockSize / 32;
-  int const totalQKHeads = num_heads_q + num_heads_k;
-  int const totalWarps = num_tokens * totalQKHeads;
-
-  int const gridSize = common::divUp(totalWarps, warpsPerBlock);
-  dim3 gridDim(gridSize);
-  dim3 blockDim(blockSize);
-
-  switch (head_dim) {
-    case 64:
-      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
-        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 64, INTERLEAVE>
-            <<<gridDim, blockDim, 0, stream>>>(
-                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
-                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
-      });
-      break;
-    case 128:
-      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
-        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 128, INTERLEAVE>
-            <<<gridDim, blockDim, 0, stream>>>(
-                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
-                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
-      });
-      break;
-    case 256:
-      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
-        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 256, INTERLEAVE>
-            <<<gridDim, blockDim, 0, stream>>>(
-                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
-                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
-      });
-      break;
-    default:
-      TORCH_CHECK(false,
-                  "Unsupported head dimension for fusedQKNormRope: ", head_dim);
-  }
-}
-}  // namespace tensorrt_llm::kernels
-
-void fused_qk_norm_rope(
-    torch::Tensor& qkv,       // Combined QKV tensor [num_tokens,
-                              // (num_heads_q+num_heads_k+num_heads_v)*head_dim]
-    int64_t num_heads_q,      // Number of query heads
-    int64_t num_heads_k,      // Number of key heads
-    int64_t num_heads_v,      // Number of value heads
-    int64_t head_dim,         // Dimension per head
-    double eps,               // Epsilon for RMS normalization
-    torch::Tensor& q_weight,  // RMSNorm weights for query [head_dim]
-    torch::Tensor& k_weight,  // RMSNorm weights for key [head_dim]
-    torch::Tensor& cos_sin_cache,  // Cos/sin cache [max_position, head_dim]
-    bool is_neox,                  // Whether RoPE is applied in Neox style
-    torch::Tensor& position_ids    // Position IDs for RoPE [num_tokens]
-) {
-  // Input validation
-  CHECK_INPUT(qkv);
-  CHECK_INPUT(position_ids);
-  CHECK_INPUT(q_weight);
-  CHECK_INPUT(k_weight);
-  CHECK_INPUT(cos_sin_cache);
-  CHECK_TYPE(position_ids, torch::kInt64);
-
-  TORCH_CHECK(qkv.dim() == 2,
-              "QKV tensor must be 2D: [num_tokens, "
-              "(num_heads_q+num_heads_k+num_heads_v)*head_dim]");
-  TORCH_CHECK(position_ids.dim() == 1, "Position IDs must be 1D: [num_tokens]");
-  TORCH_CHECK(q_weight.dim() == 1, "Query weights must be 1D: [head_dim]");
-  TORCH_CHECK(k_weight.dim() == 1, "Key weights must be 1D: [head_dim]");
-  TORCH_CHECK(cos_sin_cache.dim() == 2,
-              "Cos/sin cache must be 2D: [max_position, head_dim]");
-  TORCH_CHECK(q_weight.size(0) == head_dim,
-              "Query weights size must match head dimension");
-  TORCH_CHECK(k_weight.size(0) == head_dim,
-              "Key weights size must match head dimension");
-
-  TORCH_CHECK(cos_sin_cache.size(1) % 2 == 0, "rotary_dim must be even");
-  TORCH_CHECK(cos_sin_cache.size(1) <= head_dim,
-              "rotary_dim must be less than or equal to head_dim");
-
-  TORCH_CHECK(qkv.scalar_type() == q_weight.scalar_type() &&
-                  qkv.scalar_type() == k_weight.scalar_type(),
-              "qkv, q_weight and k_weight must have the same dtype");
-
-  int64_t num_tokens = qkv.size(0);
-  TORCH_CHECK(position_ids.size(0) == num_tokens,
-              "Number of tokens in position_ids must match QKV");
-
-  int64_t total_heads = num_heads_q + num_heads_k + num_heads_v;
-  TORCH_CHECK(
-      qkv.size(1) == total_heads * head_dim,
-      "QKV tensor size must match total number of heads and head dimension");
-
-  auto stream = at::cuda::getCurrentCUDAStream(qkv.get_device());
-
-  VLLM_DISPATCH_HALF_TYPES(qkv.scalar_type(), "fused_qk_norm_rope_kernel", [&] {
-    using qkv_scalar_t = scalar_t;
-    VLLM_DISPATCH_FLOATING_TYPES(
-        cos_sin_cache.scalar_type(), "fused_qk_norm_rope_kernel", [&] {
-          using cache_scalar_t = scalar_t;
-          tensorrt_llm::kernels::launchFusedQKNormRope<qkv_scalar_t,
-                                                       cache_scalar_t>(
-              qkv.data_ptr(), static_cast<int>(num_tokens),
-              static_cast<int>(num_heads_q), static_cast<int>(num_heads_k),
-              static_cast<int>(num_heads_v), static_cast<int>(head_dim),
-              static_cast<int>(cos_sin_cache.size(1)), static_cast<float>(eps),
-              q_weight.data_ptr(), k_weight.data_ptr(),
-              cos_sin_cache.data_ptr(), !is_neox,
-              reinterpret_cast<int64_t const*>(position_ids.data_ptr()),
-              stream);
-        });
-  });
-}
\ No newline at end of file
diff --git a/csrc/activation_kernels.cu b/csrc/libtorch_stable/activation_kernels.cu
similarity index 70%
rename from csrc/activation_kernels.cu
rename to csrc/libtorch_stable/activation_kernels.cu
index 758a77795553..28fdce5c3055 100644
--- a/csrc/activation_kernels.cu
+++ b/csrc/libtorch_stable/activation_kernels.cu
@@ -1,39 +1,84 @@
-#include <ATen/cuda/CUDAContext.h>
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
+#include <cuda.h>
+#include <torch/csrc/stable/tensor.h>
 
 #include <cmath>
 
-#include "cuda_compat.h"
-#include "cuda_vec_utils.cuh"
+#include "../cuda_compat.h"
+#include "../cuda_vec_utils.cuh"
 #include "dispatch_utils.h"
+#include "torch_utils.h"
 
 namespace vllm {
 
 template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&),
-          bool act_first>
+          bool act_first, bool HAS_CLAMP>
 __device__ __forceinline__ scalar_t compute(const scalar_t& x,
-                                            const scalar_t& y) {
-  return act_first ? ACT_FN(x) * y : x * ACT_FN(y);
+                                            const scalar_t& y,
+                                            const float limit) {
+  if constexpr (act_first) {
+    scalar_t gate = x;
+    scalar_t up = y;
+    if constexpr (HAS_CLAMP) {
+      gate = (scalar_t)fminf((float)gate, limit);
+      up = (scalar_t)fmaxf(fminf((float)up, limit), -limit);
+    }
+    return ACT_FN(gate) * up;
+  } else {
+    scalar_t gate = x;
+    scalar_t up = y;
+    if constexpr (HAS_CLAMP) {
+      gate = (scalar_t)fmaxf(fminf((float)gate, limit), -limit);
+      up = (scalar_t)fminf((float)up, limit);
+    }
+    return gate * ACT_FN(up);
+  }
 }
 
 template <typename packed_t, packed_t (*PACKED_ACT_FN)(const packed_t&),
-          bool act_first>
+          bool act_first, bool HAS_CLAMP>
 __device__ __forceinline__ packed_t packed_compute(const packed_t& x,
-                                                   const packed_t& y) {
-  return act_first ? packed_mul(PACKED_ACT_FN(x), y)
-                   : packed_mul(x, PACKED_ACT_FN(y));
+                                                   const packed_t& y,
+                                                   const float limit) {
+  if constexpr (act_first) {
+    packed_t gate = x;
+    packed_t up = y;
+    if constexpr (HAS_CLAMP) {
+      float2 g = cast_to_float2(gate);
+      float2 u = cast_to_float2(up);
+      g.x = fminf(g.x, limit);
+      g.y = fminf(g.y, limit);
+      u.x = fmaxf(fminf(u.x, limit), -limit);
+      u.y = fmaxf(fminf(u.y, limit), -limit);
+      gate = cast_to_packed<packed_t>(g);
+      up = cast_to_packed<packed_t>(u);
+    }
+    return packed_mul(PACKED_ACT_FN(gate), up);
+  } else {
+    packed_t gate = x;
+    packed_t up = y;
+    if constexpr (HAS_CLAMP) {
+      float2 g = cast_to_float2(gate);
+      float2 u = cast_to_float2(up);
+      g.x = fmaxf(fminf(g.x, limit), -limit);
+      g.y = fmaxf(fminf(g.y, limit), -limit);
+      u.x = fminf(u.x, limit);
+      u.y = fminf(u.y, limit);
+      gate = cast_to_packed<packed_t>(g);
+      up = cast_to_packed<packed_t>(u);
+    }
+    return packed_mul(gate, PACKED_ACT_FN(up));
+  }
 }
 
 // Activation and gating kernel template.
 template <typename scalar_t, typename packed_t,
           scalar_t (*ACT_FN)(const scalar_t&),
           packed_t (*PACKED_ACT_FN)(const packed_t&), bool act_first,
-          bool use_vec, bool use_256b = false>
+          bool use_vec, bool HAS_CLAMP, bool use_256b = false>
 __global__ void act_and_mul_kernel(
     scalar_t* __restrict__ out,          // [..., d]
     const scalar_t* __restrict__ input,  // [..., 2, d]
-    const int d) {
+    const int d, const float limit) {
   const scalar_t* x_ptr = input + blockIdx.x * 2 * d;
   const scalar_t* y_ptr = x_ptr + d;
   scalar_t* out_ptr = out + blockIdx.x * d;
@@ -58,8 +103,9 @@ __global__ void act_and_mul_kernel(
       }
 #pragma unroll
       for (int j = 0; j < pvec_t::NUM_ELTS; j++) {
-        x.elts[j] = packed_compute<packed_t, PACKED_ACT_FN, act_first>(
-            x.elts[j], y.elts[j]);
+        x.elts[j] =
+            packed_compute<packed_t, PACKED_ACT_FN, act_first, HAS_CLAMP>(
+                x.elts[j], y.elts[j], limit);
       }
       if constexpr (use_256b) {
         st256(x, &out_vec[i]);
@@ -72,7 +118,8 @@ __global__ void act_and_mul_kernel(
     for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
       const scalar_t x = VLLM_LDG(&x_ptr[idx]);
       const scalar_t y = VLLM_LDG(&y_ptr[idx]);
-      out_ptr[idx] = compute<scalar_t, ACT_FN, act_first>(x, y);
+      out_ptr[idx] =
+          compute<scalar_t, ACT_FN, act_first, HAS_CLAMP>(x, y, limit);
     }
   }
 }
@@ -151,8 +198,11 @@ packed_gelu_tanh_kernel(const packed_t& val) {
 
 // Launch activation and gating kernel.
 // Use ACT_FIRST (bool) indicating whether to apply the activation function
-// first.
-#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL, PACKED_KERNEL, ACT_FIRST)        \
+// first. HAS_CLAMP (bool) enables pre-activation clamping: gate input is
+// clamped (max only) and up input is clamped (both sides) before the
+// activation function is applied.
+#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL, PACKED_KERNEL, ACT_FIRST,        \
+                                      HAS_CLAMP, LIMIT)                        \
   auto dtype = input.scalar_type();                                            \
   int d = input.size(-1) / 2;                                                  \
   int64_t num_tokens = input.numel() / input.size(-1);                         \
@@ -160,76 +210,87 @@ packed_gelu_tanh_kernel(const packed_t& val) {
     return;                                                                    \
   }                                                                            \
   dim3 grid(num_tokens);                                                       \
-  int cc_major = at::cuda::getCurrentDeviceProperties()->major;                \
+  int cc_major = get_device_prop()->major;                                     \
   int support_vec =                                                            \
       (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128)            \
           ? vllm::VecTraits<true>::ARCH_MAX_VEC_SIZE                           \
           : vllm::VecTraits<false>::ARCH_MAX_VEC_SIZE;                         \
-  int vec_size = support_vec / at::elementSize(dtype);                         \
+  int vec_size = support_vec / input.element_size();                           \
   const bool use_vec = (d % vec_size == 0);                                    \
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));            \
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();                \
+  const torch::stable::accelerator::DeviceGuard device_guard(                  \
+      input.get_device_index());                                               \
+  const cudaStream_t stream = get_current_cuda_stream();                       \
   if (use_vec) {                                                               \
     dim3 block(std::min(d / vec_size, 1024));                                  \
     if (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128) {         \
-      VLLM_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {          \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {   \
         vllm::act_and_mul_kernel<                                              \
             scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,      \
             KERNEL<scalar_t>,                                                  \
             PACKED_KERNEL<typename vllm::PackedTypeConverter<scalar_t>::Type>, \
-            ACT_FIRST, true, true><<<grid, block, 0, stream>>>(                \
-            out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d);          \
+            ACT_FIRST, true, HAS_CLAMP, true><<<grid, block, 0, stream>>>(     \
+            out.mutable_data_ptr<scalar_t>(),                                  \
+            input.const_data_ptr<scalar_t>(), d, LIMIT);                       \
       });                                                                      \
     } else {                                                                   \
-      VLLM_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {          \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {   \
         vllm::act_and_mul_kernel<                                              \
             scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,      \
             KERNEL<scalar_t>,                                                  \
             PACKED_KERNEL<typename vllm::PackedTypeConverter<scalar_t>::Type>, \
-            ACT_FIRST, true, false><<<grid, block, 0, stream>>>(               \
-            out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d);          \
+            ACT_FIRST, true, HAS_CLAMP, false><<<grid, block, 0, stream>>>(    \
+            out.mutable_data_ptr<scalar_t>(),                                  \
+            input.const_data_ptr<scalar_t>(), d, LIMIT);                       \
       });                                                                      \
     }                                                                          \
   } else {                                                                     \
     dim3 block(std::min(d, 1024));                                             \
-    VLLM_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {            \
+    VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel", [&] {     \
       vllm::act_and_mul_kernel<                                                \
           scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,        \
           KERNEL<scalar_t>,                                                    \
           PACKED_KERNEL<typename vllm::PackedTypeConverter<scalar_t>::Type>,   \
-          ACT_FIRST, false><<<grid, block, 0, stream>>>(                       \
-          out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d);            \
+          ACT_FIRST, false, HAS_CLAMP><<<grid, block, 0, stream>>>(            \
+          out.mutable_data_ptr<scalar_t>(), input.const_data_ptr<scalar_t>(),  \
+          d, LIMIT);                                                           \
     });                                                                        \
   }
 
-void silu_and_mul(torch::Tensor& out,    // [..., d]
-                  torch::Tensor& input)  // [..., 2 * d]
+void silu_and_mul(torch::stable::Tensor& out,    // [..., d]
+                  torch::stable::Tensor& input)  // [..., 2 * d]
 {
   LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel, vllm::packed_silu_kernel,
-                                true);
+                                true, false, 0.0f);
+}
+
+void silu_and_mul_clamp(torch::stable::Tensor& out,    // [..., d]
+                        torch::stable::Tensor& input,  // [..., 2 * d]
+                        double limit) {
+  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel, vllm::packed_silu_kernel,
+                                true, true, (float)limit);
 }
 
-void mul_and_silu(torch::Tensor& out,    // [..., d]
-                  torch::Tensor& input)  // [..., 2 * d]
+void mul_and_silu(torch::stable::Tensor& out,    // [..., d]
+                  torch::stable::Tensor& input)  // [..., 2 * d]
 {
   // The difference between mul_and_silu and silu_and_mul is that mul_and_silu
   // applies the silu to the latter half of the input.
   LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel, vllm::packed_silu_kernel,
-                                false);
+                                false, false, 0.0f);
 }
 
-void gelu_and_mul(torch::Tensor& out,    // [..., d]
-                  torch::Tensor& input)  // [..., 2 * d]
+void gelu_and_mul(torch::stable::Tensor& out,    // [..., d]
+                  torch::stable::Tensor& input)  // [..., 2 * d]
 {
   LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_kernel, vllm::packed_gelu_kernel,
-                                true);
+                                true, false, 0.0f);
 }
 
-void gelu_tanh_and_mul(torch::Tensor& out,    // [..., d]
-                       torch::Tensor& input)  // [..., 2 * d]
+void gelu_tanh_and_mul(torch::stable::Tensor& out,    // [..., d]
+                       torch::stable::Tensor& input)  // [..., 2 * d]
 {
-  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel,
-                                vllm::packed_gelu_tanh_kernel, true);
+  LAUNCH_ACTIVATION_GATE_KERNEL(
+      vllm::gelu_tanh_kernel, vllm::packed_gelu_tanh_kernel, true, false, 0.0f);
 }
 
 namespace vllm {
@@ -377,19 +438,20 @@ __global__ void swigluoai_and_mul_kernel(
     return;                                                                    \
   }                                                                            \
   dim3 grid(num_tokens);                                                       \
-  int cc_major = at::cuda::getCurrentDeviceProperties()->major;                \
+  int cc_major = get_device_prop()->major;                                     \
   int support_vec =                                                            \
       (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128)            \
           ? vllm::VecTraits<true>::ARCH_MAX_VEC_SIZE                           \
           : vllm::VecTraits<false>::ARCH_MAX_VEC_SIZE;                         \
-  int vec_size = support_vec / at::elementSize(dtype);                         \
+  int vec_size = support_vec / input.element_size();                           \
   const bool use_vec = (d % vec_size == 0);                                    \
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));            \
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();                \
+  const torch::stable::accelerator::DeviceGuard device_guard(                  \
+      input.get_device_index());                                               \
+  const cudaStream_t stream = get_current_cuda_stream();                       \
   if (use_vec) {                                                               \
     dim3 block(std::min(d / vec_size, 1024));                                  \
     if (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128) {         \
-      VLLM_DISPATCH_FLOATING_TYPES(                                            \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                     \
           dtype, "act_and_mul_kernel_with_param", [&] {                        \
             vllm::act_and_mul_kernel_with_param<                               \
                 scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,  \
@@ -397,11 +459,11 @@ __global__ void swigluoai_and_mul_kernel(
                 PACKED_KERNEL<                                                 \
                     typename vllm::PackedTypeConverter<scalar_t>::Type>,       \
                 true, true><<<grid, block, 0, stream>>>(                       \
-                out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d,       \
-                PARAM);                                                        \
+                out.mutable_data_ptr<scalar_t>(),                              \
+                input.const_data_ptr<scalar_t>(), d, PARAM);                   \
           });                                                                  \
     } else {                                                                   \
-      VLLM_DISPATCH_FLOATING_TYPES(                                            \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                     \
           dtype, "act_and_mul_kernel_with_param", [&] {                        \
             vllm::act_and_mul_kernel_with_param<                               \
                 scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,  \
@@ -409,45 +471,49 @@ __global__ void swigluoai_and_mul_kernel(
                 PACKED_KERNEL<                                                 \
                     typename vllm::PackedTypeConverter<scalar_t>::Type>,       \
                 true, false><<<grid, block, 0, stream>>>(                      \
-                out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d,       \
-                PARAM);                                                        \
+                out.mutable_data_ptr<scalar_t>(),                              \
+                input.const_data_ptr<scalar_t>(), d, PARAM);                   \
           });                                                                  \
     }                                                                          \
   } else {                                                                     \
     dim3 block(std::min(d, 1024));                                             \
-    VLLM_DISPATCH_FLOATING_TYPES(dtype, "act_and_mul_kernel_with_param", [&] { \
-      vllm::act_and_mul_kernel_with_param<                                     \
-          scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,        \
-          KERNEL<scalar_t>,                                                    \
-          PACKED_KERNEL<typename vllm::PackedTypeConverter<scalar_t>::Type>,   \
-          false><<<grid, block, 0, stream>>>(                                  \
-          out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(), d, PARAM);     \
-    });                                                                        \
+    VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                       \
+        dtype, "act_and_mul_kernel_with_param", [&] {                          \
+          vllm::act_and_mul_kernel_with_param<                                 \
+              scalar_t, typename vllm::PackedTypeConverter<scalar_t>::Type,    \
+              KERNEL<scalar_t>,                                                \
+              PACKED_KERNEL<                                                   \
+                  typename vllm::PackedTypeConverter<scalar_t>::Type>,         \
+              false><<<grid, block, 0, stream>>>(                              \
+              out.mutable_data_ptr<scalar_t>(),                                \
+              input.const_data_ptr<scalar_t>(), d, PARAM);                     \
+        });                                                                    \
   }
 
-#define LAUNCH_SIGLUOAI_AND_MUL(KERNEL, ALPHA, LIMIT)                          \
-  int d = input.size(-1) / 2;                                                  \
-  int64_t num_tokens = input.numel() / input.size(-1);                         \
-  dim3 grid(num_tokens);                                                       \
-  dim3 block(std::min(d, 1024));                                               \
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));            \
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();                \
-  VLLM_DISPATCH_FLOATING_TYPES(                                                \
-      input.scalar_type(), "clamp_swiglu_kernel_with_params", [&] {            \
-        vllm::swigluoai_and_mul_kernel<scalar_t, KERNEL<scalar_t>>             \
-            <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(),             \
-                                         input.data_ptr<scalar_t>(), d, ALPHA, \
-                                         LIMIT);                               \
+#define LAUNCH_SIGLUOAI_AND_MUL(KERNEL, ALPHA, LIMIT)                         \
+  int d = input.size(-1) / 2;                                                 \
+  int64_t num_tokens = input.numel() / input.size(-1);                        \
+  dim3 grid(num_tokens);                                                      \
+  dim3 block(std::min(d, 1024));                                              \
+  const torch::stable::accelerator::DeviceGuard device_guard(                 \
+      input.get_device_index());                                              \
+  const cudaStream_t stream = get_current_cuda_stream();                      \
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                        \
+      input.scalar_type(), "clamp_swiglu_kernel_with_params", [&] {           \
+        vllm::swigluoai_and_mul_kernel<scalar_t, KERNEL<scalar_t>>            \
+            <<<grid, block, 0, stream>>>(out.mutable_data_ptr<scalar_t>(),    \
+                                         input.const_data_ptr<scalar_t>(), d, \
+                                         ALPHA, LIMIT);                       \
       });
 
-void fatrelu_and_mul(torch::Tensor& out,    // [..., d],
-                     torch::Tensor& input,  // [..., 2 * d]
+void fatrelu_and_mul(torch::stable::Tensor& out,    // [..., d],
+                     torch::stable::Tensor& input,  // [..., 2 * d]
                      double threshold) {
   LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(
       vllm::fatrelu_kernel, vllm::packed_fatrelu_kernel, threshold);
 }
-void swigluoai_and_mul(torch::Tensor& out,    // [..., d]
-                       torch::Tensor& input,  // [..., 2 * d]
+void swigluoai_and_mul(torch::stable::Tensor& out,    // [..., d]
+                       torch::stable::Tensor& input,  // [..., 2 * d]
                        double alpha, double limit) {
   LAUNCH_SIGLUOAI_AND_MUL(vllm::swigluoai_and_mul, alpha, limit);
 }
@@ -502,45 +568,46 @@ __global__ void activation_kernel(
 }  // namespace vllm
 
 // Launch element-wise activation kernel.
-#define LAUNCH_ACTIVATION_KERNEL(KERNEL)                                 \
-  auto dtype = input.scalar_type();                                      \
-  int d = input.size(-1);                                                \
-  int64_t num_tokens = input.numel() / input.size(-1);                   \
-  if (num_tokens == 0) {                                                 \
-    return;                                                              \
-  }                                                                      \
-  dim3 grid(num_tokens);                                                 \
-  int cc_major = at::cuda::getCurrentDeviceProperties()->major;          \
-  int support_vec =                                                      \
-      (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128)      \
-          ? vllm::VecTraits<true>::ARCH_MAX_VEC_SIZE                     \
-          : vllm::VecTraits<false>::ARCH_MAX_VEC_SIZE;                   \
-  int vec_size = support_vec / at::elementSize(dtype);                   \
-  const bool use_vec = (d % vec_size == 0);                              \
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));      \
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();          \
-  if (use_vec) {                                                         \
-    dim3 block(std::min(d / vec_size, 1024));                            \
-    if (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128) {   \
-      VLLM_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {     \
-        vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, true, true>  \
-            <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(),       \
-                                         input.data_ptr<scalar_t>(), d); \
-      });                                                                \
-    } else {                                                             \
-      VLLM_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {     \
-        vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, true, false> \
-            <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(),       \
-                                         input.data_ptr<scalar_t>(), d); \
-      });                                                                \
-    }                                                                    \
-  } else {                                                               \
-    dim3 block(std::min(d, 1024));                                       \
-    VLLM_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {       \
-      vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, false>         \
-          <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(),         \
-                                       input.data_ptr<scalar_t>(), d);   \
-    });                                                                  \
+#define LAUNCH_ACTIVATION_KERNEL(KERNEL)                                       \
+  auto dtype = input.scalar_type();                                            \
+  int d = input.size(-1);                                                      \
+  int64_t num_tokens = input.numel() / input.size(-1);                         \
+  if (num_tokens == 0) {                                                       \
+    return;                                                                    \
+  }                                                                            \
+  dim3 grid(num_tokens);                                                       \
+  int cc_major = get_device_prop()->major;                                     \
+  int support_vec =                                                            \
+      (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128)            \
+          ? vllm::VecTraits<true>::ARCH_MAX_VEC_SIZE                           \
+          : vllm::VecTraits<false>::ARCH_MAX_VEC_SIZE;                         \
+  int vec_size = support_vec / input.element_size();                           \
+  const bool use_vec = (d % vec_size == 0);                                    \
+  const torch::stable::accelerator::DeviceGuard device_guard(                  \
+      input.get_device_index());                                               \
+  const cudaStream_t stream = get_current_cuda_stream();                       \
+  if (use_vec) {                                                               \
+    dim3 block(std::min(d / vec_size, 1024));                                  \
+    if (CUDA_VERSION >= 12090 && cc_major >= 10 && num_tokens > 128) {         \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {    \
+        vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, true, true>        \
+            <<<grid, block, 0, stream>>>(out.mutable_data_ptr<scalar_t>(),     \
+                                         input.const_data_ptr<scalar_t>(), d); \
+      });                                                                      \
+    } else {                                                                   \
+      VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {    \
+        vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, true, false>       \
+            <<<grid, block, 0, stream>>>(out.mutable_data_ptr<scalar_t>(),     \
+                                         input.const_data_ptr<scalar_t>(), d); \
+      });                                                                      \
+    }                                                                          \
+  } else {                                                                     \
+    dim3 block(std::min(d, 1024));                                             \
+    VLLM_STABLE_DISPATCH_FLOATING_TYPES(dtype, "activation_kernel", [&] {      \
+      vllm::activation_kernel<scalar_t, KERNEL<scalar_t>, false>               \
+          <<<grid, block, 0, stream>>>(out.mutable_data_ptr<scalar_t>(),       \
+                                       input.const_data_ptr<scalar_t>(), d);   \
+    });                                                                        \
   }
 
 namespace vllm {
@@ -568,20 +635,20 @@ __device__ __forceinline__ T gelu_quick_kernel(const T& x) {
 
 }  // namespace vllm
 
-void gelu_new(torch::Tensor& out,    // [..., d]
-              torch::Tensor& input)  // [..., d]
+void gelu_new(torch::stable::Tensor& out,    // [..., d]
+              torch::stable::Tensor& input)  // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_new_kernel);
 }
 
-void gelu_fast(torch::Tensor& out,    // [..., d]
-               torch::Tensor& input)  // [..., d]
+void gelu_fast(torch::stable::Tensor& out,    // [..., d]
+               torch::stable::Tensor& input)  // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_fast_kernel);
 }
 
-void gelu_quick(torch::Tensor& out,    // [..., d]
-                torch::Tensor& input)  // [..., d]
+void gelu_quick(torch::stable::Tensor& out,    // [..., d]
+                torch::stable::Tensor& input)  // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }
diff --git a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp b/csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
similarity index 100%
rename from csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
rename to csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
diff --git a/csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_reduction.hpp b/csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_reduction.hpp
similarity index 100%
rename from csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_reduction.hpp
rename to csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_reduction.hpp
diff --git a/csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_tma_warpspecialized.hpp b/csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_tma_warpspecialized.hpp
similarity index 100%
rename from csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_tma_warpspecialized.hpp
rename to csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_fmha_mla_tma_warpspecialized.hpp
diff --git a/csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_mla_tile_scheduler.hpp b/csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_mla_tile_scheduler.hpp
similarity index 100%
rename from csrc/attention/mla/cutlass_sm100_mla/kernel/sm100_mla_tile_scheduler.hpp
rename to csrc/libtorch_stable/attention/mla/cutlass_sm100_mla/kernel/sm100_mla_tile_scheduler.hpp
diff --git a/csrc/attention/mla/sm100_cutlass_mla_kernel.cu b/csrc/libtorch_stable/attention/mla/sm100_cutlass_mla_kernel.cu
similarity index 77%
rename from csrc/attention/mla/sm100_cutlass_mla_kernel.cu
rename to csrc/libtorch_stable/attention/mla/sm100_cutlass_mla_kernel.cu
index d1874515cc8f..55d75383476e 100644
--- a/csrc/attention/mla/sm100_cutlass_mla_kernel.cu
+++ b/csrc/libtorch_stable/attention/mla/sm100_cutlass_mla_kernel.cu
@@ -18,13 +18,12 @@ limitations under the License.
  * Taken from SGLANG PR https://github.com/sgl-project/sglang/pull/6929
  * by Alcanderian JieXin Liang
  */
-#include "core/registration.h"
+#include "libtorch_stable/torch_utils.h"
+
+#include <torch/csrc/stable/library.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
 #include <cutlass/cutlass.h>
 #include <cutlass/kernel_hardware_info.h>
-#include <torch/all.h>
 
 #include <cute/tensor.hpp>
 #include <iostream>
@@ -35,27 +34,27 @@ limitations under the License.
 // clang-format off
 #if !defined(CUDA_VERSION) || CUDA_VERSION < 12040
 void sm100_cutlass_mla_decode(
-    torch::Tensor const& out,
-    torch::Tensor const& lse,
-    torch::Tensor const& q_nope,
-    torch::Tensor const& q_pe,
-    torch::Tensor const& kv_c_and_k_pe_cache,
-    torch::Tensor const& seq_lens,
-    torch::Tensor const& page_table,
-    torch::Tensor const& workspace,
+    torch::stable::Tensor const& out,
+    torch::stable::Tensor const& lse,
+    torch::stable::Tensor const& q_nope,
+    torch::stable::Tensor const& q_pe,
+    torch::stable::Tensor const& kv_c_and_k_pe_cache,
+    torch::stable::Tensor const& seq_lens,
+    torch::stable::Tensor const& page_table,
+    torch::stable::Tensor const& workspace,
     double sm_scale,
     int64_t num_kv_splits) {
-  TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_decode");
+  STD_TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_decode");
 }
 int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, int64_t sm_count, int64_t num_kv_splits) {
-  TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_get_workspace_size");
+  STD_TORCH_CHECK(false, "CUDA version must be >= 12.4 for cutlass_mla_get_workspace_size");
 }
 #else
 
 #define CUTLASS_CHECK(status)                                                       \
   {                                                                                 \
     cutlass::Status error = status;                                                 \
-    TORCH_CHECK(error == cutlass::Status::kSuccess, cutlassGetStatusString(error)); \
+    STD_TORCH_CHECK(error == cutlass::Status::kSuccess, cutlassGetStatusString(error)); \
   }
 
 using namespace cute;
@@ -100,23 +99,23 @@ struct MlaSm100 {
 
 template <typename T>
 typename T::Fmha::Arguments args_from_options(
-    at::Tensor const& out,
-    at::Tensor const& lse,
-    at::Tensor const& q_nope,
-    at::Tensor const& q_pe,
-    at::Tensor const& kv_c_and_k_pe_cache,
-    at::Tensor const& seq_lens,
-    at::Tensor const& page_table,
+    torch::stable::Tensor const& out,
+    torch::stable::Tensor const& lse,
+    torch::stable::Tensor const& q_nope,
+    torch::stable::Tensor const& q_pe,
+    torch::stable::Tensor const& kv_c_and_k_pe_cache,
+    torch::stable::Tensor const& seq_lens,
+    torch::stable::Tensor const& page_table,
     double sm_scale,
     int64_t num_kv_splits) {
   cutlass::KernelHardwareInfo hw_info;
-  hw_info.device_id = q_nope.device().index();
+  hw_info.device_id = q_nope.get_device_index();
   hw_info.sm_count = cutlass::KernelHardwareInfo::query_device_multiprocessor_count(hw_info.device_id);
 
-  int batches = q_nope.sizes()[0];
-  int page_count_per_seq = page_table.sizes()[1];
-  int page_count_total = kv_c_and_k_pe_cache.sizes()[0];
-  int page_size = kv_c_and_k_pe_cache.sizes()[1];
+  int batches = q_nope.size(0);
+  int page_count_per_seq = page_table.size(1);
+  int page_count_total = kv_c_and_k_pe_cache.size(0);
+  int page_size = kv_c_and_k_pe_cache.size(1);
   int max_seq_len = page_size * page_count_per_seq;
   using TileShapeH = typename T::TileShapeH;
   using TileShapeD = typename T::TileShapeD;
@@ -186,14 +185,14 @@ typename T::Fmha::Arguments args_from_options(
 
 template <typename Element, typename ElementOut, bool IsPaged128, typename PersistenceOption>
 void runMla(
-    at::Tensor const& out,
-    at::Tensor const& lse,
-    at::Tensor const& q_nope,
-    at::Tensor const& q_pe,
-    at::Tensor const& kv_c_and_k_pe_cache,
-    at::Tensor const& seq_lens,
-    at::Tensor const& page_table,
-    at::Tensor const& workspace,
+    torch::stable::Tensor const& out,
+    torch::stable::Tensor const& lse,
+    torch::stable::Tensor const& q_nope,
+    torch::stable::Tensor const& q_pe,
+    torch::stable::Tensor const& kv_c_and_k_pe_cache,
+    torch::stable::Tensor const& seq_lens,
+    torch::stable::Tensor const& page_table,
+    torch::stable::Tensor const& workspace,
     double sm_scale,
     int64_t num_kv_splits,
     cudaStream_t stream) {
@@ -220,37 +219,37 @@ void runMla(
   }()
 
 void sm100_cutlass_mla_decode(
-    torch::Tensor const& out,
-    torch::Tensor const& lse,
-    torch::Tensor const& q_nope,
-    torch::Tensor const& q_pe,
-    torch::Tensor const& kv_c_and_k_pe_cache,
-    torch::Tensor const& seq_lens,
-    torch::Tensor const& page_table,
-    torch::Tensor const& workspace,
+    torch::stable::Tensor const& out,
+    torch::stable::Tensor const& lse,
+    torch::stable::Tensor const& q_nope,
+    torch::stable::Tensor const& q_pe,
+    torch::stable::Tensor const& kv_c_and_k_pe_cache,
+    torch::stable::Tensor const& seq_lens,
+    torch::stable::Tensor const& page_table,
+    torch::stable::Tensor const& workspace,
     double sm_scale,
     int64_t num_kv_splits) {
-  auto in_dtype = q_nope.dtype();
-  at::cuda::CUDAGuard device_guard{(char)q_nope.get_device()};
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(q_nope.get_device());
-  const int page_size = kv_c_and_k_pe_cache.sizes()[1];
-  
+  auto in_dtype = q_nope.scalar_type();
+  torch::stable::accelerator::DeviceGuard device_guard(q_nope.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(q_nope.get_device_index());
+  const int page_size = kv_c_and_k_pe_cache.size(1);
+
   // NOTE(alcanderian): IsPersistent has bug with manual split_kv.
   // Kernel will hang if batch is too large with large num_kv_splits. (for example bs=8, num_kv_splits=8)
   // Maybe per batch split kv will fix this.
   DISPATCH_BOOL(page_size == 128, IsPaged128, [&] {
     DISPATCH_BOOL(num_kv_splits <= 1, NotManualSplitKV, [&] {
-      if (in_dtype == at::ScalarType::Half) {
+      if (in_dtype == torch::headeronly::ScalarType::Half) {
         runMla<cutlass::half_t, cutlass::half_t, IsPaged128, IsPersistent<NotManualSplitKV>>(
           out, lse, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, page_table, workspace, sm_scale, num_kv_splits, stream);
-      } else if (in_dtype == at::ScalarType::BFloat16) {
+      } else if (in_dtype == torch::headeronly::ScalarType::BFloat16) {
         runMla<cutlass::bfloat16_t, cutlass::bfloat16_t, IsPaged128, IsPersistent<NotManualSplitKV>>(
           out, lse, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, page_table, workspace, sm_scale, num_kv_splits, stream);
-      } else if (in_dtype == at::ScalarType::Float8_e4m3fn) {
+      } else if (in_dtype == torch::headeronly::ScalarType::Float8_e4m3fn) {
         runMla<cutlass::float_e4m3_t, cutlass::bfloat16_t, IsPaged128, IsPersistent<NotManualSplitKV>>(
           out, lse, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, page_table, workspace, sm_scale, num_kv_splits, stream);
       } else {
-        TORCH_CHECK(false, "Unsupported input data type of MLA");
+        STD_TORCH_CHECK(false, "Unsupported input data type of MLA");
       }
       return true;
     });
@@ -280,12 +279,12 @@ int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_ba
 
 #endif
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("sm100_cutlass_mla_decode", &sm100_cutlass_mla_decode);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("sm100_cutlass_mla_decode", TORCH_BOX(&sm100_cutlass_mla_decode));
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CatchAll, m) {
-  m.impl("sm100_cutlass_mla_get_workspace_size", &sm100_cutlass_mla_get_workspace_size);
+STABLE_TORCH_LIBRARY_IMPL(_C, CompositeExplicitAutograd, m) {
+  m.impl("sm100_cutlass_mla_get_workspace_size", TORCH_BOX(&sm100_cutlass_mla_get_workspace_size));
 }
 
 // clang-format on
diff --git a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp b/csrc/libtorch_stable/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
similarity index 91%
rename from csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
rename to csrc/libtorch_stable/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
index ad8c0067d4a9..f6737a73d48a 100644
--- a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
+++ b/csrc/libtorch_stable/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/csrc/stable/tensor.h>
+
 #include "cutlass_extensions/epilogue/broadcast_load_epilogue_c2x.hpp"
 
 /*
@@ -52,7 +54,7 @@ struct ScaledEpilogueBase {
   // from a tensor. It can handle both row and column, as well as row/column or
   // scalar cases.
   template <typename Descriptor, typename T>
-  static auto args_from_tensor(torch::Tensor const& tensor) {
+  static auto args_from_tensor(torch::stable::Tensor const& tensor) {
     using Arguments = typename Descriptor::Arguments;
     auto* data_ptr = static_cast<T*>(tensor.data_ptr());
     if constexpr (std::is_same_v<Descriptor, ColOrScalarLoad<T>> ||
@@ -68,7 +70,8 @@ struct ScaledEpilogueBase {
   // This overload handles the case where there might not be a tensor, in which
   // case a nullptr is passed and a constant (0) is used.
   template <typename Descriptor, typename T>
-  static auto args_from_tensor(std::optional<torch::Tensor> const& tensor) {
+  static auto args_from_tensor(
+      std::optional<torch::stable::Tensor> const& tensor) {
     static_assert(std::is_same_v<Descriptor, RowOrZeroLoad<T>>);
     using Arguments = typename Descriptor::Arguments;
     auto* data_ptr = tensor ? static_cast<T*>(tensor->data_ptr()) : nullptr;
@@ -117,8 +120,8 @@ struct ScaledEpilogue
       cutlass::epilogue::threadblock::Sm80EVT<Compute1, ScaleA, EVTCompute0>;
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales) {
+  static ArgumentType prepare_args(torch::stable::Tensor const& a_scales,
+                                   torch::stable::Tensor const& b_scales) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
 
@@ -160,9 +163,9 @@ struct ScaledEpilogueBias
   using EVTCompute = cutlass::epilogue::threadblock::Sm80EVT<Compute1, ScaleA,
                                                              EVTCompute0, Bias>;
   using ArgumentType = typename EVTCompute::Arguments;
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& bias) {
+  static ArgumentType prepare_args(torch::stable::Tensor const& a_scales,
+                                   torch::stable::Tensor const& b_scales,
+                                   torch::stable::Tensor const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
@@ -220,10 +223,11 @@ struct ScaledEpilogueBiasAzp
 
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& azp_adj,
-                                   std::optional<torch::Tensor> const& bias) {
+  static ArgumentType prepare_args(
+      torch::stable::Tensor const& a_scales,
+      torch::stable::Tensor const& b_scales,
+      torch::stable::Tensor const& azp_adj,
+      std::optional<torch::stable::Tensor> const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
@@ -298,11 +302,11 @@ struct ScaledEpilogueBiasAzpToken
 
   using ArgumentType = typename EVTCompute::Arguments;
 
-  static ArgumentType prepare_args(torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales,
-                                   torch::Tensor const& azp_adj,
-                                   torch::Tensor const& azp,
-                                   std::optional<torch::Tensor> const& bias) {
+  static ArgumentType prepare_args(
+      torch::stable::Tensor const& a_scales,
+      torch::stable::Tensor const& b_scales,
+      torch::stable::Tensor const& azp_adj, torch::stable::Tensor const& azp,
+      std::optional<torch::stable::Tensor> const& bias) {
     auto a_args = SUPER::template args_from_tensor<ScaleA, float>(a_scales);
     auto b_args = SUPER::template args_from_tensor<ScaleB, float>(b_scales);
     auto bias_args = SUPER::template args_from_tensor<Bias, ElementD>(bias);
diff --git a/csrc/libtorch_stable/dispatch_utils.h b/csrc/libtorch_stable/dispatch_utils.h
index 5ebba72b15dd..e9478236a0e1 100644
--- a/csrc/libtorch_stable/dispatch_utils.h
+++ b/csrc/libtorch_stable/dispatch_utils.h
@@ -49,6 +49,44 @@
   THO_DISPATCH_SWITCH(TYPE, NAME,                       \
                       VLLM_STABLE_DISPATCH_CASE_FP8_TYPES(__VA_ARGS__))
 
+// Half types dispatch (Half + BFloat16)
+#define VLLM_STABLE_DISPATCH_CASE_HALF_TYPES(...)                     \
+  THO_DISPATCH_CASE(torch::headeronly::ScalarType::Half, __VA_ARGS__) \
+  THO_DISPATCH_CASE(torch::headeronly::ScalarType::BFloat16, __VA_ARGS__)
+
+#define VLLM_STABLE_DISPATCH_HALF_TYPES(TYPE, NAME, ...) \
+  THO_DISPATCH_SWITCH(TYPE, NAME,                        \
+                      VLLM_STABLE_DISPATCH_CASE_HALF_TYPES(__VA_ARGS__))
+
+// Quant type dispatch (FP8 + INT8)
+#ifdef USE_ROCM
+  #define VLLM_STABLE_DISPATCH_CASE_QUANT_TYPES(...)                  \
+    THO_DISPATCH_CASE(torch::headeronly::ScalarType::Float8_e4m3fn,   \
+                      __VA_ARGS__)                                    \
+    THO_DISPATCH_CASE(torch::headeronly::ScalarType::Float8_e4m3fnuz, \
+                      __VA_ARGS__)                                    \
+    THO_DISPATCH_CASE(torch::headeronly::ScalarType::Char, __VA_ARGS__)
+#else
+  #define VLLM_STABLE_DISPATCH_CASE_QUANT_TYPES(...)                \
+    THO_DISPATCH_CASE(torch::headeronly::ScalarType::Float8_e4m3fn, \
+                      __VA_ARGS__)                                  \
+    THO_DISPATCH_CASE(torch::headeronly::ScalarType::Char, __VA_ARGS__)
+#endif
+
+#define VLLM_STABLE_DISPATCH_QUANT_TYPES(TYPE, NAME, ...) \
+  THO_DISPATCH_SWITCH(TYPE, NAME,                         \
+                      VLLM_STABLE_DISPATCH_CASE_QUANT_TYPES(__VA_ARGS__))
+
+// Group size dispatch (pure C++ if/else, no ATen dependency)
+#define VLLM_STABLE_DISPATCH_GROUP_SIZE(group_size, const_group_size, ...) \
+  if (group_size == 128) {                                                 \
+    constexpr int const_group_size = 128;                                  \
+    __VA_ARGS__();                                                         \
+  } else if (group_size == 64) {                                           \
+    constexpr int const_group_size = 64;                                   \
+    __VA_ARGS__();                                                         \
+  }
+
 // Boolean dispatch
 #define VLLM_STABLE_DISPATCH_BOOL(expr, const_expr, ...) \
   if (expr) {                                            \
@@ -58,3 +96,56 @@
     constexpr bool const_expr = false;                   \
     __VA_ARGS__();                                       \
   }
+
+// Vec size dispatch (pure C++ switch, no ATen dependency)
+#define VLLM_STABLE_DISPATCH_VEC_SIZE(VEC_SIZE, ...) \
+  switch (VEC_SIZE) {                                \
+    case 16: {                                       \
+      constexpr int vec_size = 16;                   \
+      __VA_ARGS__();                                 \
+      break;                                         \
+    }                                                \
+    case 8: {                                        \
+      constexpr int vec_size = 8;                    \
+      __VA_ARGS__();                                 \
+      break;                                         \
+    }                                                \
+    case 4: {                                        \
+      constexpr int vec_size = 4;                    \
+      __VA_ARGS__();                                 \
+      break;                                         \
+    }                                                \
+    case 2: {                                        \
+      constexpr int vec_size = 2;                    \
+      __VA_ARGS__();                                 \
+      break;                                         \
+    }                                                \
+    default: {                                       \
+      constexpr int vec_size = 1;                    \
+      __VA_ARGS__();                                 \
+      break;                                         \
+    }                                                \
+  }
+
+// Tensor rank dispatch (2D, 3D, 4D)
+#define VLLM_STABLE_DISPATCH_RANK234(NUM_DIMS, ...)                          \
+  switch (NUM_DIMS) {                                                        \
+    case 2: {                                                                \
+      constexpr int tensor_rank = 2;                                         \
+      __VA_ARGS__();                                                         \
+      break;                                                                 \
+    }                                                                        \
+    case 3: {                                                                \
+      constexpr int tensor_rank = 3;                                         \
+      __VA_ARGS__();                                                         \
+      break;                                                                 \
+    }                                                                        \
+    case 4: {                                                                \
+      constexpr int tensor_rank = 4;                                         \
+      __VA_ARGS__();                                                         \
+      break;                                                                 \
+    }                                                                        \
+    default:                                                                 \
+      STD_TORCH_CHECK(                                                       \
+          false, "Expects rank 2, 3 or 4 tensors but got unsupported rank"); \
+  }
diff --git a/csrc/dsv3_fused_a_gemm.cu b/csrc/libtorch_stable/dsv3_fused_a_gemm.cu
similarity index 93%
rename from csrc/dsv3_fused_a_gemm.cu
rename to csrc/libtorch_stable/dsv3_fused_a_gemm.cu
index 65dff9c84bab..bdf749ddfcf9 100644
--- a/csrc/dsv3_fused_a_gemm.cu
+++ b/csrc/libtorch_stable/dsv3_fused_a_gemm.cu
@@ -20,13 +20,15 @@
  * limitations under the License.
  */
 
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <cuda_bf16.h>
-#include <cuda_runtime.h>
-#include <torch/all.h>
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
 
 #include "core/registration.h"
+#include "libtorch_stable/torch_utils.h"
+
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
 
 #include <cstdlib>
 #include <mutex>
@@ -34,7 +36,7 @@
 namespace {
 
 inline int getSMVersion() {
-  auto* props = at::cuda::getCurrentDeviceProperties();
+  auto* props = get_device_prop();
   return props->major * 10 + props->minor;
 }
 
@@ -700,37 +702,40 @@ template void invokeFusedAGemm<__nv_bfloat16, 7168, 2112, 16>(
     __nv_bfloat16*, __nv_bfloat16 const*, __nv_bfloat16 const*, int num_tokens,
     cudaStream_t);
 
-void dsv3_fused_a_gemm(torch::Tensor& output, torch::Tensor const& mat_a,
-                       torch::Tensor const& mat_b) {
-  TORCH_CHECK(mat_a.dim() == 2 && mat_b.dim() == 2 && output.dim() == 2);
+void dsv3_fused_a_gemm(torch::stable::Tensor& output,
+                       torch::stable::Tensor const& mat_a,
+                       torch::stable::Tensor const& mat_b) {
+  STD_TORCH_CHECK(mat_a.dim() == 2 && mat_b.dim() == 2 && output.dim() == 2);
   int const num_tokens = mat_a.size(0);
   int const hd_in = mat_a.size(1);
   int const hd_out = mat_b.size(1);
 
   constexpr int kHdIn = 7168;
   constexpr int kHdOut = 2112;
-  TORCH_CHECK(num_tokens >= 1 && num_tokens <= 16,
-              "required 1 <= mat_a.shape[0] <= 16")
-  TORCH_CHECK(hd_in == kHdIn, "required mat_a.shape[1] == 7168")
-  TORCH_CHECK(hd_out == kHdOut, "required mat_b.shape[1] == 2112")
-  TORCH_CHECK(output.size(0) == num_tokens,
-              "required output.shape[0] == mat_a.shape[0]")
-  TORCH_CHECK(output.size(1) == hd_out,
-              "required output.shape[1] == mat_b.shape[1]")
-
-  TORCH_CHECK(mat_a.stride(1) == 1, "mat_a must be a row major tensor");
-  TORCH_CHECK(output.stride(1) == 1, "output must be a row major tensor");
-  TORCH_CHECK(mat_b.stride(0) == 1, "mat_b must be a column major tensor");
-
-  TORCH_CHECK(mat_a.scalar_type() == torch::kBFloat16 &&
-                  mat_b.scalar_type() == torch::kBFloat16,
-              "Only BFloat16 input dtype is supported")
-  TORCH_CHECK(output.scalar_type() == torch::kBFloat16,
-              "Only BFloat16 output dtype is supported")
-
-  TORCH_CHECK(getSMVersion() >= 90, "required CUDA ARCH >= SM_90");
-
-  auto stream = at::cuda::getCurrentCUDAStream(mat_a.get_device());
+  STD_TORCH_CHECK(num_tokens >= 1 && num_tokens <= 16,
+                  "required 1 <= mat_a.shape[0] <= 16");
+  STD_TORCH_CHECK(hd_in == kHdIn, "required mat_a.shape[1] == 7168");
+  STD_TORCH_CHECK(hd_out == kHdOut, "required mat_b.shape[1] == 2112");
+  STD_TORCH_CHECK(output.size(0) == num_tokens,
+                  "required output.shape[0] == mat_a.shape[0]");
+  STD_TORCH_CHECK(output.size(1) == hd_out,
+                  "required output.shape[1] == mat_b.shape[1]");
+
+  STD_TORCH_CHECK(mat_a.stride(1) == 1, "mat_a must be a row major tensor");
+  STD_TORCH_CHECK(output.stride(1) == 1, "output must be a row major tensor");
+  STD_TORCH_CHECK(mat_b.stride(0) == 1, "mat_b must be a column major tensor");
+
+  STD_TORCH_CHECK(
+      mat_a.scalar_type() == torch::headeronly::ScalarType::BFloat16 &&
+          mat_b.scalar_type() == torch::headeronly::ScalarType::BFloat16,
+      "Only BFloat16 input dtype is supported");
+  STD_TORCH_CHECK(
+      output.scalar_type() == torch::headeronly::ScalarType::BFloat16,
+      "Only BFloat16 output dtype is supported");
+
+  STD_TORCH_CHECK(getSMVersion() >= 90, "required CUDA ARCH >= SM_90");
+
+  auto stream = get_current_cuda_stream(mat_a.get_device_index());
   if (num_tokens <= 8) {
     invokeFusedAGemm<__nv_bfloat16, kHdIn, kHdOut, 8>(
         reinterpret_cast<__nv_bfloat16*>(output.mutable_data_ptr()),
@@ -746,6 +751,6 @@ void dsv3_fused_a_gemm(torch::Tensor& output, torch::Tensor const& mat_a,
   }
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("dsv3_fused_a_gemm", &dsv3_fused_a_gemm);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("dsv3_fused_a_gemm", TORCH_BOX(&dsv3_fused_a_gemm));
 }
diff --git a/csrc/libtorch_stable/fused_qknorm_rope_kernel.cu b/csrc/libtorch_stable/fused_qknorm_rope_kernel.cu
new file mode 100644
index 000000000000..bcf0ae585478
--- /dev/null
+++ b/csrc/libtorch_stable/fused_qknorm_rope_kernel.cu
@@ -0,0 +1,818 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+#include <cuda_runtime.h>
+#include <type_traits>
+
+#include "torch_utils.h"
+
+#include "../async_util.cuh"
+#include "../cuda_compat.h"
+#include "../type_convert.cuh"
+#include "dispatch_utils.h"
+
+#define CHECK_TYPE(x, st)                                                  \
+  STD_TORCH_CHECK(x.scalar_type() == st, #x " dtype is ", x.scalar_type(), \
+                  ", while ", st, " is expected")
+#define CHECK_TH_CUDA(x) \
+  STD_TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) \
+  STD_TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) \
+  CHECK_TH_CUDA(x);    \
+  CHECK_CONTIGUOUS(x)
+
+#ifdef USE_ROCM
+  #define FINAL_MASK 0xffffffffffffffffULL
+
+  #if defined(HIP_VERSION) && HIP_VERSION < 70000000
+// On ROCm versions before 7.0, __syncwarp isn't defined. The below
+// implementation is copy/pasted from the implementation in ROCm 7.0
+__device__ inline void __syncwarp() {
+  __builtin_amdgcn_fence(__ATOMIC_RELEASE, "wavefront");
+  __builtin_amdgcn_wave_barrier();
+  __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "wavefront");
+}
+  #endif
+#else
+  #define FINAL_MASK 0xffffffff
+#endif
+
+namespace tensorrt_llm::common {
+template <typename T, int num>
+struct packed_as;
+// Specialization for packed_as used in this kernel.
+template <>
+struct packed_as<uint, 1> {
+  using type = uint;
+};
+
+template <>
+struct packed_as<uint, 2> {
+  using type = uint2;
+};
+
+template <>
+struct packed_as<uint, 4> {
+  using type = uint4;
+};
+
+template <typename T>
+__inline__ __device__ T warpReduceSum(T val) {
+#pragma unroll
+  for (int mask = 16; mask > 0; mask >>= 1)
+    val += __shfl_xor_sync(FINAL_MASK, val, mask, 32);
+  return val;
+}
+
+template <typename T>
+inline __device__ __host__ T divUp(T m, T n) {
+  return (m + n - 1) / n;
+}
+
+}  // namespace tensorrt_llm::common
+
+namespace tensorrt_llm::kernels {
+
+using namespace vllm::cuda_async;
+
+// NOTE(zhuhaoran): This kernel is adapted from TensorRT-LLM implementation,
+// with added support for passing the cos_sin_cache as an input.
+// https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/kernels/fusedQKNormRopeKernel.cu
+
+// Perform per-head QK Norm and RoPE in a single kernel.
+// scalar_t_in: data type of QKV and RMSNorm weights
+// scalar_t_cache: data type of cos/sin cache
+// head_dim: the dimension of each head
+// interleave: interleave=!is_neox.
+template <typename scalar_t_in, typename scalar_t_cache, int head_dim,
+          bool interleave>
+__global__ void fusedQKNormRopeKernel(
+    void* qkv_void,                  // Combined QKV tensor
+    int const num_heads_q,           // Number of query heads
+    int const num_heads_k,           // Number of key heads
+    int const num_heads_v,           // Number of value heads
+    float const eps,                 // Epsilon for RMS normalization
+    void const* q_weight_void,       // RMSNorm weights for query
+    void const* k_weight_void,       // RMSNorm weights for key
+    void const* cos_sin_cache_void,  // Pre-computed cos/sin cache
+    int64_t const* position_ids,     // Position IDs for RoPE
+    int const num_tokens,            // Number of tokens
+    int const rotary_dim             // Dimension for RoPE
+) {
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  if constexpr ((std::is_same_v<scalar_t_in, c10::BFloat16>) ||
+                std::is_same_v<scalar_t_cache, c10::BFloat16>) {
+    return;
+  } else {
+#endif
+
+    using Converter = vllm::_typeConvert<scalar_t_in>;
+    static_assert(Converter::exists,
+                  "Input QKV data type is not supported for this CUDA "
+                  "architecture or toolkit version.");
+    using T_in = typename Converter::hip_type;
+    using T2_in = typename Converter::packed_hip_type;
+
+    using CacheConverter = vllm::_typeConvert<scalar_t_cache>;
+    static_assert(CacheConverter::exists,
+                  "Cache data type is not supported for this CUDA architecture "
+                  "or toolkit version.");
+    using T_cache = typename CacheConverter::hip_type;
+
+    T_in* qkv = reinterpret_cast<T_in*>(qkv_void);
+    T_in const* q_weight = reinterpret_cast<T_in const*>(q_weight_void);
+    T_in const* k_weight = reinterpret_cast<T_in const*>(k_weight_void);
+    T_cache const* cos_sin_cache =
+        reinterpret_cast<T_cache const*>(cos_sin_cache_void);
+
+    int const warpsPerBlock = blockDim.x / 32;
+    int const warpId = threadIdx.x / 32;
+    int const laneId = threadIdx.x % 32;
+
+    // Calculate global warp index to determine which head/token this warp
+    // processes
+    int const globalWarpIdx = blockIdx.x * warpsPerBlock + warpId;
+
+    // Total number of attention heads (Q and K)
+    int const total_qk_heads = num_heads_q + num_heads_k;
+
+    // Determine which token and head type (Q or K) this warp processes
+    int const tokenIdx = globalWarpIdx / total_qk_heads;
+    int const localHeadIdx = globalWarpIdx % total_qk_heads;
+
+    // Skip if this warp is assigned beyond the number of tokens
+    if (tokenIdx >= num_tokens) return;
+
+    bool const isQ = localHeadIdx < num_heads_q;
+    int const headIdx = isQ ? localHeadIdx : localHeadIdx - num_heads_q;
+
+    int const num_heads = num_heads_q + num_heads_k + num_heads_v;
+
+    static_assert(head_dim % (32 * 2) == 0,
+                  "head_dim must be divisible by 64 (each warp processes one "
+                  "head, and each thread gets even number of "
+                  "elements)");
+    constexpr int numElemsPerThread = head_dim / 32;
+    float elements[numElemsPerThread];
+    constexpr int elemSizeBytes = numElemsPerThread * sizeof(__nv_bfloat16);
+    static_assert(elemSizeBytes % 4 == 0,
+                  "numSizeBytes must be a multiple of 4");
+    constexpr int vecSize =
+        elemSizeBytes /
+        4;  // Use packed_as<uint, vecSize> to perform loading/saving.
+    using vec_T = typename tensorrt_llm::common::packed_as<uint, vecSize>::type;
+
+    int offsetWarp;  // Offset for the warp
+    if (isQ) {
+      // Q segment: token offset + head offset within Q segment
+      offsetWarp = tokenIdx * num_heads * head_dim + headIdx * head_dim;
+    } else {
+      // K segment: token offset + entire Q segment + head offset within K
+      // segment
+      offsetWarp = tokenIdx * num_heads * head_dim + num_heads_q * head_dim +
+                   headIdx * head_dim;
+    }
+    int offsetThread = offsetWarp + laneId * numElemsPerThread;
+
+    // Sum of squares for RMSNorm
+    float sumOfSquares = 0.0f;
+
+    // Load.
+    {
+      vec_T vec = *reinterpret_cast<vec_T const*>(&qkv[offsetThread]);
+      constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
+#pragma unroll
+      for (int i = 0; i < num_packed_elems; i++) {
+        // Interpret the generic vector chunk as the specific packed type
+        T2_in packed_val = *(reinterpret_cast<T2_in*>(&vec) + i);
+        // Convert to float2 for computation
+        float2 vals = Converter::convert(packed_val);
+        sumOfSquares += vals.x * vals.x;
+        sumOfSquares += vals.y * vals.y;
+
+        elements[2 * i] = vals.x;
+        elements[2 * i + 1] = vals.y;
+      }
+    }
+
+    // Reduce sum across warp using the utility function
+    sumOfSquares = tensorrt_llm::common::warpReduceSum(sumOfSquares);
+
+    // Compute RMS normalization factor
+    float rms_rcp = rsqrtf(sumOfSquares / static_cast<float>(head_dim) + eps);
+
+    // Normalize elements
+#pragma unroll
+    for (int i = 0; i < numElemsPerThread; i++) {
+      int dim = laneId * numElemsPerThread + i;
+      float weight = isQ ? Converter::convert(q_weight[dim])
+                         : Converter::convert(k_weight[dim]);
+      elements[i] *= rms_rcp * weight;
+    }
+
+    // Apply RoPE to normalized elements
+    float elements2[numElemsPerThread];  // Additional buffer required for RoPE.
+
+    int64_t pos_id = position_ids[tokenIdx];
+
+    // Calculate cache pointer for this position - similar to
+    // pos_encoding_kernels.cu
+    T_cache const* cache_ptr = cos_sin_cache + pos_id * rotary_dim;
+    int const embed_dim = rotary_dim / 2;
+    T_cache const* cos_ptr = cache_ptr;
+    T_cache const* sin_ptr = cache_ptr + embed_dim;
+    int const rotary_lanes = rotary_dim / numElemsPerThread;  // rotary range
+    if (laneId < rotary_lanes) {
+      if constexpr (interleave) {
+        // Perform interleaving. Use pre-computed cos/sin values.
+#pragma unroll
+        for (int i = 0; i < numElemsPerThread / 2; ++i) {
+          int const idx0 = 2 * i;
+          int const idx1 = 2 * i + 1;
+          // Global dimension index in the head
+          int const dim_idx = laneId * numElemsPerThread + idx0;
+
+          float const val0 = elements[idx0];
+          float const val1 = elements[idx1];
+
+          int const half_dim = dim_idx / 2;
+          float const cos_val =
+              CacheConverter::convert(VLLM_LDG(cos_ptr + half_dim));
+          float const sin_val =
+              CacheConverter::convert(VLLM_LDG(sin_ptr + half_dim));
+
+          elements[idx0] = val0 * cos_val - val1 * sin_val;
+          elements[idx1] = val0 * sin_val + val1 * cos_val;
+        }
+      } else {
+        // Before data exchange with in warp, we need to sync.
+        __syncwarp();
+        int pairOffset = (rotary_dim / 2) / numElemsPerThread;
+        // Get the data from the other half of the warp. Use pre-computed
+        // cos/sin values.
+#pragma unroll
+        for (int i = 0; i < numElemsPerThread; i++) {
+          elements2[i] = __shfl_xor_sync(FINAL_MASK, elements[i], pairOffset);
+
+          if (laneId < pairOffset) {
+            elements2[i] = -elements2[i];
+          }
+          int dim_idx = laneId * numElemsPerThread + i;
+
+          dim_idx = (dim_idx * 2) % rotary_dim;
+          int half_dim = dim_idx / 2;
+          float cos_val = CacheConverter::convert(VLLM_LDG(cos_ptr + half_dim));
+          float sin_val = CacheConverter::convert(VLLM_LDG(sin_ptr + half_dim));
+
+          elements[i] = elements[i] * cos_val + elements2[i] * sin_val;
+        }
+        // __shfl_xor_sync does not provide memfence. Need to sync again.
+        __syncwarp();
+      }
+    }
+    // Store.
+    {
+      vec_T vec;
+      constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
+#pragma unroll
+      for (int i = 0; i < num_packed_elems; i++) {
+        // Convert from float2 back to the specific packed type
+        T2_in packed_val = Converter::convert(
+            make_float2(elements[2 * i], elements[2 * i + 1]));
+        // Place it into the generic vector
+        *(reinterpret_cast<T2_in*>(&vec) + i) = packed_val;
+      }
+      *reinterpret_cast<vec_T*>(&qkv[offsetThread]) = vec;
+    }
+
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  }
+#endif
+}
+
+// Multi-token-head kernel: one warp processes HEADS_PER_WARP token-heads for
+// the same token, sharing cos/sin from shared memory via cp.async.
+// When HEADS_PER_WARP > 1 the warp reuses the loaded cos/sin across all heads,
+// hiding global-memory latency and improving occupancy for large batches.
+template <typename scalar_t_in, typename scalar_t_cache, int head_dim,
+          bool interleave, int HEADS_PER_WARP>
+__global__ void fusedQKNormRopeKernelNTokenHeads(
+    void* qkv_void, int const num_heads_q, int const num_heads_k,
+    int const num_heads_v, float const eps, void const* q_weight_void,
+    void const* k_weight_void, void const* cos_sin_cache_void,
+    int64_t const* position_ids, int const num_tokens, int const rotary_dim) {
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  if constexpr ((std::is_same_v<scalar_t_in, c10::BFloat16>) ||
+                std::is_same_v<scalar_t_cache, c10::BFloat16>) {
+    return;
+  } else {
+#endif
+
+    using Converter = vllm::_typeConvert<scalar_t_in>;
+    static_assert(Converter::exists,
+                  "Input QKV data type is not supported for this CUDA "
+                  "architecture or toolkit version.");
+    using T_in = typename Converter::hip_type;
+    using T2_in = typename Converter::packed_hip_type;
+
+    using CacheConverter = vllm::_typeConvert<scalar_t_cache>;
+    static_assert(CacheConverter::exists,
+                  "Cache data type is not supported for this CUDA architecture "
+                  "or toolkit version.");
+    using T_cache = typename CacheConverter::hip_type;
+
+    extern __shared__ char smem_storage[];
+    // Shared memory layout:
+    //   [0, cos_sin_bytes)           : cos/sin for each warp  (warpsPerBlock *
+    //   rotary_dim * sizeof(T_cache))
+    // [cos_sin_bytes, ...)         : QKV tiles
+    //   per warp     (warpsPerBlock * HEADS_PER_WARP * 32 * elemSizeBytes)
+    T_cache* const smem = reinterpret_cast<T_cache*>(smem_storage);
+
+    T_in* qkv = reinterpret_cast<T_in*>(qkv_void);
+    T_in const* q_weight = reinterpret_cast<T_in const*>(q_weight_void);
+    T_in const* k_weight = reinterpret_cast<T_in const*>(k_weight_void);
+    T_cache const* cos_sin_cache =
+        reinterpret_cast<T_cache const*>(cos_sin_cache_void);
+
+    int const warpsPerBlock = blockDim.x / 32;
+    int const warpId = threadIdx.x / 32;
+    int const laneId = threadIdx.x % 32;
+
+    int const total_qk_heads = num_heads_q + num_heads_k;
+    int const num_heads = num_heads_q + num_heads_k + num_heads_v;
+    int const head_chunks_per_token =
+        (total_qk_heads + HEADS_PER_WARP - 1) / HEADS_PER_WARP;
+
+    int const warp_global = blockIdx.x * warpsPerBlock + warpId;
+    int const tokenIdx = warp_global / head_chunks_per_token;
+    int const headChunk = warp_global % head_chunks_per_token;
+    int const first_head = headChunk * HEADS_PER_WARP;
+    int const num_heads_this_warp =
+        (first_head + HEADS_PER_WARP <= total_qk_heads)
+            ? HEADS_PER_WARP
+            : (total_qk_heads - first_head);
+
+    if (tokenIdx >= num_tokens) return;
+
+    static_assert(head_dim % (32 * 2) == 0, "head_dim must be divisible by 64");
+    constexpr int numElemsPerThread = head_dim / 32;
+    constexpr int elemSizeBytes = numElemsPerThread * sizeof(__nv_bfloat16);
+    static_assert(elemSizeBytes % 4 == 0,
+                  "elemSizeBytes must be a multiple of 4");
+    constexpr int vecSize = elemSizeBytes / 4;
+    using vec_T = typename tensorrt_llm::common::packed_as<uint, vecSize>::type;
+
+    int const cos_sin_bytes =
+        warpsPerBlock * rotary_dim * static_cast<int>(sizeof(T_cache));
+    int const qkv_tile_bytes = 32 * elemSizeBytes;
+    char* const this_warp_head_smem =
+        smem_storage + cos_sin_bytes +
+        warpId * (HEADS_PER_WARP * qkv_tile_bytes);
+
+    // === Group 0: async load all heads' QKV into smem (issued first). ===
+    for (int k = 0; k < num_heads_this_warp; ++k) {
+      int const localHeadIdx = first_head + k;
+      bool const isQ = localHeadIdx < num_heads_q;
+      int const headIdx = isQ ? localHeadIdx : localHeadIdx - num_heads_q;
+      int offWarp;
+      if (isQ) {
+        offWarp = tokenIdx * num_heads * head_dim + headIdx * head_dim;
+      } else {
+        offWarp = tokenIdx * num_heads * head_dim + num_heads_q * head_dim +
+                  headIdx * head_dim;
+      }
+      int const offThread = offWarp + laneId * numElemsPerThread;
+      char* smem_dst =
+          this_warp_head_smem + k * qkv_tile_bytes + laneId * elemSizeBytes;
+      cp_async_shared_global_ca(smem_dst,
+                                reinterpret_cast<const char*>(&qkv[offThread]),
+                                elemSizeBytes);
+    }
+    cp_async_commit_group();  // commit group 0 (QKV)
+
+    // === Group 1: async load cos/sin into smem (issued second). ===
+    int64_t const pos_id = position_ids[tokenIdx];
+    T_cache const* const cache_ptr = cos_sin_cache + pos_id * rotary_dim;
+    int const copy_bytes = rotary_dim * static_cast<int>(sizeof(T_cache));
+    int const num_copies = (copy_bytes + 15) / 16;
+    for (int copyId = laneId; copyId < num_copies; copyId += 32) {
+      char* smem_ptr =
+          reinterpret_cast<char*>(&smem[warpId * rotary_dim]) + copyId * 16;
+      const char* glob_ptr =
+          reinterpret_cast<const char*>(cache_ptr) + copyId * 16;
+      cp_async_shared_global_16_cg(smem_ptr, glob_ptr);
+    }
+    cp_async_commit_group();  // commit group 1 (cos/sin)
+
+    // wait<1>: allow at most 1 pending group (group 1) → group 0 (QKV) is done.
+    cp_async_wait_group<1>();
+
+    float elements[numElemsPerThread];
+    float elements2[numElemsPerThread];
+    int const rotary_lanes = rotary_dim / numElemsPerThread;
+    int const embed_dim = rotary_dim / 2;
+    T_cache const* const cos_smem = &smem[warpId * rotary_dim];
+    T_cache const* const sin_smem = &smem[warpId * rotary_dim + embed_dim];
+
+    // Preload weights into registers once, reused across all heads.
+    float q_w[numElemsPerThread];
+    float k_w[numElemsPerThread];
+#pragma unroll
+    for (int i = 0; i < numElemsPerThread; i++) {
+      int const dim = laneId * numElemsPerThread + i;
+      q_w[i] = Converter::convert(q_weight[dim]);
+      k_w[i] = Converter::convert(k_weight[dim]);
+    }
+
+    for (int k = 0; k < num_heads_this_warp; ++k) {
+      int const localHeadIdx = first_head + k;
+      bool const isQ = localHeadIdx < num_heads_q;
+      int const headIdx = isQ ? localHeadIdx : localHeadIdx - num_heads_q;
+
+      int offsetWarp;
+      if (isQ) {
+        offsetWarp = tokenIdx * num_heads * head_dim + headIdx * head_dim;
+      } else {
+        offsetWarp = tokenIdx * num_heads * head_dim + num_heads_q * head_dim +
+                     headIdx * head_dim;
+      }
+      int const offsetThread = offsetWarp + laneId * numElemsPerThread;
+
+      // === Part 1: QK Norm (read from smem; group 0 already done). ===
+      float sumOfSquares = 0.0f;
+      {
+        char const* smem_src =
+            this_warp_head_smem + k * qkv_tile_bytes + laneId * elemSizeBytes;
+        vec_T vec = *reinterpret_cast<vec_T const*>(smem_src);
+        constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
+#pragma unroll
+        for (int i = 0; i < num_packed_elems; i++) {
+          T2_in packed_val = *(reinterpret_cast<T2_in*>(&vec) + i);
+          float2 vals = Converter::convert(packed_val);
+          sumOfSquares += vals.x * vals.x;
+          sumOfSquares += vals.y * vals.y;
+          elements[2 * i] = vals.x;
+          elements[2 * i + 1] = vals.y;
+        }
+      }
+
+      sumOfSquares = tensorrt_llm::common::warpReduceSum(sumOfSquares);
+      float rms_rcp = rsqrtf(sumOfSquares / static_cast<float>(head_dim) + eps);
+
+#pragma unroll
+      for (int i = 0; i < numElemsPerThread; i++) {
+        elements[i] *= rms_rcp * (isQ ? q_w[i] : k_w[i]);
+      }
+
+      // On first head: wait for group 1 (cos/sin) before RoPE.
+      if (k == 0) cp_async_wait_group<0>();
+
+      // === Part 2: RoPE using cos/sin from shared memory. ===
+      if (laneId < rotary_lanes) {
+        if constexpr (interleave) {
+#pragma unroll
+          for (int i = 0; i < numElemsPerThread / 2; ++i) {
+            int const idx0 = 2 * i;
+            int const idx1 = 2 * i + 1;
+            int const dim_idx = laneId * numElemsPerThread + idx0;
+            float const val0 = elements[idx0];
+            float const val1 = elements[idx1];
+            int const half_dim = dim_idx / 2;
+            float const cos_val = CacheConverter::convert(cos_smem[half_dim]);
+            float const sin_val = CacheConverter::convert(sin_smem[half_dim]);
+            elements[idx0] = val0 * cos_val - val1 * sin_val;
+            elements[idx1] = val0 * sin_val + val1 * cos_val;
+          }
+        } else {
+          __syncwarp();
+          int const pairOffset = (rotary_dim / 2) / numElemsPerThread;
+#pragma unroll
+          for (int i = 0; i < numElemsPerThread; i++) {
+            elements2[i] = __shfl_xor_sync(FINAL_MASK, elements[i], pairOffset);
+            if (laneId < pairOffset) elements2[i] = -elements2[i];
+            int dim_idx = laneId * numElemsPerThread + i;
+            dim_idx = (dim_idx * 2) % rotary_dim;
+            int const half_dim = dim_idx / 2;
+            float const cos_val = CacheConverter::convert(cos_smem[half_dim]);
+            float const sin_val = CacheConverter::convert(sin_smem[half_dim]);
+            elements[i] = elements[i] * cos_val + elements2[i] * sin_val;
+          }
+          __syncwarp();
+        }
+      }
+
+      // Store.
+      {
+        vec_T vec;
+        constexpr int num_packed_elems = elemSizeBytes / sizeof(T2_in);
+#pragma unroll
+        for (int i = 0; i < num_packed_elems; i++) {
+          T2_in packed_val = Converter::convert(
+              make_float2(elements[2 * i], elements[2 * i + 1]));
+          *(reinterpret_cast<T2_in*>(&vec) + i) = packed_val;
+        }
+        *reinterpret_cast<vec_T*>(&qkv[offsetThread]) = vec;
+      }
+    }
+
+#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 800) && !defined(USE_ROCM)
+  }
+#endif
+}
+
+// Borrowed from
+// https://github.com/flashinfer-ai/flashinfer/blob/8125d079a43e9a0ba463a4ed1b639cefd084cec9/include/flashinfer/pos_enc.cuh#L568
+#define DISPATCH_INTERLEAVE(interleave, INTERLEAVE, ...) \
+  if (interleave) {                                      \
+    const bool INTERLEAVE = true;                        \
+    __VA_ARGS__                                          \
+  } else {                                               \
+    const bool INTERLEAVE = false;                       \
+    __VA_ARGS__                                          \
+  }
+
+template <typename scalar_t_in, typename scalar_t_cache>
+void launchFusedQKNormRope(void* qkv, int const num_tokens,
+                           int const num_heads_q, int const num_heads_k,
+                           int const num_heads_v, int const head_dim,
+                           int const rotary_dim, float const eps,
+                           void const* q_weight, void const* k_weight,
+                           void const* cos_sin_cache, bool const interleave,
+                           int64_t const* position_ids, cudaStream_t stream) {
+  constexpr int blockSize = 256;
+  int const warpsPerBlock = blockSize / 32;
+  int const totalQKHeads = num_heads_q + num_heads_k;
+  int const totalWarps = num_tokens * totalQKHeads;
+  int const gridSize = common::divUp(totalWarps, warpsPerBlock);
+  dim3 gridDim(gridSize);
+  dim3 blockDim(blockSize);
+  switch (head_dim) {
+    case 64:
+      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
+        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 64, INTERLEAVE>
+            <<<gridDim, blockDim, 0, stream>>>(
+                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
+                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
+      });
+      break;
+    case 128:
+      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
+        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 128, INTERLEAVE>
+            <<<gridDim, blockDim, 0, stream>>>(
+                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
+                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
+      });
+      break;
+    case 256:
+      DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
+        fusedQKNormRopeKernel<scalar_t_in, scalar_t_cache, 256, INTERLEAVE>
+            <<<gridDim, blockDim, 0, stream>>>(
+                qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight,
+                k_weight, cos_sin_cache, position_ids, num_tokens, rotary_dim);
+      });
+      break;
+    default:
+      STD_TORCH_CHECK(
+          false, "Unsupported head dimension for fusedQKNormRope: ", head_dim);
+  }
+}
+
+// Launch: one warp processes token_heads_per_warp token-heads (1, 2, 4, or 8).
+// When token_heads_per_warp == 1, delegates to the 1-head baseline above.
+template <typename scalar_t_in, typename scalar_t_cache>
+void launchFusedQKNormRopeNTokenHeads(
+    void* qkv, int const num_tokens, int const num_heads_q,
+    int const num_heads_k, int const num_heads_v, int const head_dim,
+    int const rotary_dim, float const eps, void const* q_weight,
+    void const* k_weight, void const* cos_sin_cache, bool const interleave,
+    int64_t const* position_ids, int const token_heads_per_warp,
+    cudaStream_t stream) {
+  STD_TORCH_CHECK(token_heads_per_warp == 1 || token_heads_per_warp == 2 ||
+                      token_heads_per_warp == 4 || token_heads_per_warp == 8,
+                  "token_heads_per_warp must be 1, 2, 4, or 8, got ",
+                  token_heads_per_warp);
+
+  // token_heads_per_warp == 1: delegate to the 1-head baseline kernel.
+  if (token_heads_per_warp == 1) {
+    launchFusedQKNormRope<scalar_t_in, scalar_t_cache>(
+        qkv, num_tokens, num_heads_q, num_heads_k, num_heads_v, head_dim,
+        rotary_dim, eps, q_weight, k_weight, cos_sin_cache, interleave,
+        position_ids, stream);
+    return;
+  }
+
+  // NTokenHeads kernel uses cp.async to load cos/sin in 16-byte chunks.
+  // If rotary_dim * sizeof(cache_dtype) is not a multiple of 16, the last
+  // cp.async would write past the shared memory allocation.
+  // Fall back to the base kernel instead of failing.
+  {
+    size_t const rotary_bytes =
+        static_cast<size_t>(rotary_dim) *
+        (std::is_same_v<scalar_t_cache, float> ? sizeof(float) : 2u);
+    if (rotary_bytes % 16 != 0) {
+      launchFusedQKNormRope<scalar_t_in, scalar_t_cache>(
+          qkv, num_tokens, num_heads_q, num_heads_k, num_heads_v, head_dim,
+          rotary_dim, eps, q_weight, k_weight, cos_sin_cache, interleave,
+          position_ids, stream);
+      return;
+    }
+  }
+
+  constexpr int blockSize = 256;
+  int const warpsPerBlock = blockSize / 32;
+  int const totalQKHeads = num_heads_q + num_heads_k;
+  // Grid: one warp per (token, head_chunk); same token → reuse cos/sin in smem.
+  int const head_chunks_per_token =
+      (totalQKHeads + token_heads_per_warp - 1) / token_heads_per_warp;
+  int const total_warps = num_tokens * head_chunks_per_token;
+  int const gridSize = common::divUp(total_warps, warpsPerBlock);
+  dim3 gridDim(gridSize);
+  dim3 blockDim(blockSize);
+  // Cache element size: float=4, bfloat16=2 (host-safe; kernel uses same
+  // layout).
+  size_t const cache_elem_size =
+      std::is_same_v<scalar_t_cache, float> ? sizeof(float) : 2u;
+  // QKV smem: token_heads_per_warp tiles per warp, each tile 32*(head_dim/32*2)
+  // = 2*head_dim bytes.
+  size_t const qkv_smem_per_warp = static_cast<size_t>(token_heads_per_warp) *
+                                   2u * static_cast<size_t>(head_dim);
+  size_t const smem_bytes =
+      warpsPerBlock * static_cast<size_t>(rotary_dim) * cache_elem_size +
+      warpsPerBlock * qkv_smem_per_warp;
+
+#define LAUNCH_N_TOKEN_HEADS(N)                                              \
+  do {                                                                       \
+    switch (head_dim) {                                                      \
+      case 64:                                                               \
+        DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {                        \
+          fusedQKNormRopeKernelNTokenHeads<scalar_t_in, scalar_t_cache, 64,  \
+                                           INTERLEAVE, (N)>                  \
+              <<<gridDim, blockDim, smem_bytes, stream>>>(                   \
+                  qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight, \
+                  k_weight, cos_sin_cache, position_ids, num_tokens,         \
+                  rotary_dim);                                               \
+        });                                                                  \
+        break;                                                               \
+      case 128:                                                              \
+        DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {                        \
+          fusedQKNormRopeKernelNTokenHeads<scalar_t_in, scalar_t_cache, 128, \
+                                           INTERLEAVE, (N)>                  \
+              <<<gridDim, blockDim, smem_bytes, stream>>>(                   \
+                  qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight, \
+                  k_weight, cos_sin_cache, position_ids, num_tokens,         \
+                  rotary_dim);                                               \
+        });                                                                  \
+        break;                                                               \
+      case 256:                                                              \
+        DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {                        \
+          fusedQKNormRopeKernelNTokenHeads<scalar_t_in, scalar_t_cache, 256, \
+                                           INTERLEAVE, (N)>                  \
+              <<<gridDim, blockDim, smem_bytes, stream>>>(                   \
+                  qkv, num_heads_q, num_heads_k, num_heads_v, eps, q_weight, \
+                  k_weight, cos_sin_cache, position_ids, num_tokens,         \
+                  rotary_dim);                                               \
+        });                                                                  \
+        break;                                                               \
+      default:                                                               \
+        STD_TORCH_CHECK(false, "Unsupported head dimension: ", head_dim);    \
+    }                                                                        \
+  } while (0)
+
+  if (token_heads_per_warp == 2) {
+    LAUNCH_N_TOKEN_HEADS(2);
+  } else if (token_heads_per_warp == 4) {
+    LAUNCH_N_TOKEN_HEADS(4);
+  } else if (token_heads_per_warp == 8) {
+    LAUNCH_N_TOKEN_HEADS(8);
+  }
+#undef LAUNCH_N_TOKEN_HEADS
+}
+
+}  // namespace tensorrt_llm::kernels
+
+void fused_qk_norm_rope(
+    torch::stable::Tensor&
+        qkv,              // Combined QKV tensor [num_tokens,
+                          // (num_heads_q+num_heads_k+num_heads_v)*head_dim]
+    int64_t num_heads_q,  // Number of query heads
+    int64_t num_heads_k,  // Number of key heads
+    int64_t num_heads_v,  // Number of value heads
+    int64_t head_dim,     // Dimension per head
+    double eps,           // Epsilon for RMS normalization
+    torch::stable::Tensor& q_weight,  // RMSNorm weights for query [head_dim]
+    torch::stable::Tensor& k_weight,  // RMSNorm weights for key [head_dim]
+    torch::stable::Tensor& cos_sin_cache,  // Cos/sin cache [max_position,
+                                           // head_dim]
+    bool is_neox,  // Whether RoPE is applied in Neox style
+    torch::stable::Tensor& position_ids,  // Position IDs for RoPE [num_tokens]
+    int64_t forced_token_heads_per_warp   // -1 = auto-select, >0 = forced value
+) {
+  // Input validation
+  CHECK_INPUT(qkv);
+  CHECK_INPUT(position_ids);
+  CHECK_INPUT(q_weight);
+  CHECK_INPUT(k_weight);
+  CHECK_INPUT(cos_sin_cache);
+  CHECK_TYPE(position_ids, torch::headeronly::ScalarType::Long);
+
+  STD_TORCH_CHECK(qkv.dim() == 2,
+                  "QKV tensor must be 2D: [num_tokens, "
+                  "(num_heads_q+num_heads_k+num_heads_v)*head_dim]");
+  STD_TORCH_CHECK(position_ids.dim() == 1,
+                  "Position IDs must be 1D: [num_tokens]");
+  STD_TORCH_CHECK(q_weight.dim() == 1, "Query weights must be 1D: [head_dim]");
+  STD_TORCH_CHECK(k_weight.dim() == 1, "Key weights must be 1D: [head_dim]");
+  STD_TORCH_CHECK(cos_sin_cache.dim() == 2,
+                  "Cos/sin cache must be 2D: [max_position, head_dim]");
+  STD_TORCH_CHECK(q_weight.size(0) == head_dim,
+                  "Query weights size must match head dimension");
+  STD_TORCH_CHECK(k_weight.size(0) == head_dim,
+                  "Key weights size must match head dimension");
+
+  STD_TORCH_CHECK(cos_sin_cache.size(1) % 2 == 0, "rotary_dim must be even");
+  STD_TORCH_CHECK(cos_sin_cache.size(1) <= head_dim,
+                  "rotary_dim must be less than or equal to head_dim");
+
+  STD_TORCH_CHECK(qkv.scalar_type() == q_weight.scalar_type() &&
+                      qkv.scalar_type() == k_weight.scalar_type(),
+                  "qkv, q_weight and k_weight must have the same dtype");
+
+  int64_t num_tokens = qkv.size(0);
+  STD_TORCH_CHECK(position_ids.size(0) == num_tokens,
+                  "Number of tokens in position_ids must match QKV");
+
+  int64_t total_heads = num_heads_q + num_heads_k + num_heads_v;
+  STD_TORCH_CHECK(
+      qkv.size(1) == total_heads * head_dim,
+      "QKV tensor size must match total number of heads and head dimension");
+
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      qkv.get_device_index());
+  auto stream = get_current_cuda_stream(qkv.get_device_index());
+
+  // Select token_heads_per_warp: forced value if >0, else auto-select.
+  // Auto thresholds are calibrated on SM 9.0 (H100). On other architectures,
+  // fall back to token_heads_per_warp=1 (base kernel) until profiled.
+  int token_heads_per_warp;
+  if (forced_token_heads_per_warp > 0) {  // only support SM80+
+    token_heads_per_warp = static_cast<int>(forced_token_heads_per_warp);
+  } else {
+    token_heads_per_warp = 1;
+    int sm_version = get_device_prop()->major * 10 + get_device_prop()->minor;
+    int64_t total_qk_units = num_tokens * (num_heads_q + num_heads_k);
+    if (sm_version == 90) {
+      if (head_dim >= 256) {
+        if (total_qk_units < 4096LL) {
+          token_heads_per_warp = 1;
+        } else if (total_qk_units < 8192LL) {
+          token_heads_per_warp = 2;
+        } else {
+          token_heads_per_warp = 4;
+        }
+      } else {
+        if (total_qk_units < 10240LL) {
+          token_heads_per_warp = 1;
+        } else if (total_qk_units < 40960LL) {
+          token_heads_per_warp = 4;
+        } else {
+          token_heads_per_warp = 8;
+        }
+      }
+    }
+  }
+
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
+      qkv.scalar_type(), "fused_qk_norm_rope_kernel", [&] {
+        using qkv_scalar_t = scalar_t;
+        VLLM_STABLE_DISPATCH_FLOATING_TYPES(
+            cos_sin_cache.scalar_type(), "fused_qk_norm_rope_kernel", [&] {
+              using cache_scalar_t = scalar_t;
+              tensorrt_llm::kernels::launchFusedQKNormRopeNTokenHeads<
+                  qkv_scalar_t, cache_scalar_t>(
+                  qkv.data_ptr(), static_cast<int>(num_tokens),
+                  static_cast<int>(num_heads_q), static_cast<int>(num_heads_k),
+                  static_cast<int>(num_heads_v), static_cast<int>(head_dim),
+                  static_cast<int>(cos_sin_cache.size(1)),
+                  static_cast<float>(eps), q_weight.data_ptr(),
+                  k_weight.data_ptr(), cos_sin_cache.data_ptr(), !is_neox,
+                  reinterpret_cast<int64_t const*>(position_ids.data_ptr()),
+                  token_heads_per_warp, stream);
+            });
+      });
+}
diff --git a/csrc/layernorm_kernels.cu b/csrc/libtorch_stable/layernorm_kernels.cu
similarity index 74%
rename from csrc/layernorm_kernels.cu
rename to csrc/libtorch_stable/layernorm_kernels.cu
index 9766103f7646..fb714b1b1e07 100644
--- a/csrc/layernorm_kernels.cu
+++ b/csrc/libtorch_stable/layernorm_kernels.cu
@@ -1,11 +1,12 @@
-#include "type_convert.cuh"
-#include "dispatch_utils.h"
-#include "cub_helpers.h"
-#include "core/batch_invariant.hpp"
-#include "libtorch_stable/quantization/vectorization_utils.cuh"
+#include <numeric>
+
+#include "torch_utils.h"
 
-#include <torch/cuda.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "../cub_helpers.h"
+#include "../core/batch_invariant.hpp"
+#include "../type_convert.cuh"
+#include "dispatch_utils.h"
+#include "quantization/vectorization_utils.cuh"
 
 namespace vllm {
 
@@ -77,7 +78,8 @@ __global__ void rms_norm_kernel(
 #pragma unroll
     for (int j = 0; j < VEC_SIZE; j++) {
       float x = static_cast<float>(src1.val[j]);
-      dst.val[j] = ((scalar_t)(x * s_variance)) * src2.val[j];
+      float w = static_cast<float>(src2.val[j]);
+      dst.val[j] = static_cast<scalar_t>(x * s_variance * w);
     }
     v_out[i] = dst;
   }
@@ -134,10 +136,17 @@ fused_add_rms_norm_kernel(
   for (int idx = threadIdx.x; idx < vec_hidden_size; idx += blockDim.x) {
     int id = blockIdx.x * vec_hidden_size + idx;
     int64_t strided_id = blockIdx.x * vec_input_stride + idx;
-    _f16Vec<scalar_t, width> temp = residual_v[id];
-    temp *= s_variance;
-    temp *= weight_v[idx];
-    input_v[strided_id] = temp;
+    _f16Vec<scalar_t, width> res = residual_v[id];
+    _f16Vec<scalar_t, width> w = weight_v[idx];
+    _f16Vec<scalar_t, width> out;
+    using Converter = _typeConvert<scalar_t>;
+#pragma unroll
+    for (int j = 0; j < width; ++j) {
+      float x = Converter::convert(res.data[j]);
+      float wf = Converter::convert(w.data[j]);
+      out.data[j] = Converter::convert(x * s_variance * wf);
+    }
+    input_v[strided_id] = out;
   }
 }
 
@@ -174,23 +183,23 @@ fused_add_rms_norm_kernel(
 
   for (int idx = threadIdx.x; idx < hidden_size; idx += blockDim.x) {
     float x = (float)residual[blockIdx.x * hidden_size + idx];
-    input[blockIdx.x * input_stride + idx] =
-        ((scalar_t)(x * s_variance)) * weight[idx];
+    float w = (float)weight[idx];
+    input[blockIdx.x * input_stride + idx] = (scalar_t)(x * s_variance * w);
   }
 }
 
 }  // namespace vllm
 
-void rms_norm(torch::Tensor& out,     // [..., hidden_size]
-              torch::Tensor& input,   // [..., hidden_size]
-              torch::Tensor& weight,  // [hidden_size]
+void rms_norm(torch::stable::Tensor& out,     // [..., hidden_size]
+              torch::stable::Tensor& input,   // [..., hidden_size]
+              torch::stable::Tensor& weight,  // [hidden_size]
               double epsilon) {
-  TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(out.is_contiguous());
   if (input.stride(-1) != 1) {
-    input = input.contiguous();
+    input = torch::stable::contiguous(input);
   }
-  TORCH_CHECK(input.stride(-1) == 1);
-  TORCH_CHECK(weight.is_contiguous());
+  STD_TORCH_CHECK(input.stride(-1) == 1);
+  STD_TORCH_CHECK(weight.is_contiguous());
 
   int hidden_size = input.size(-1);
 
@@ -205,45 +214,49 @@ void rms_norm(torch::Tensor& out,     // [..., hidden_size]
   // For large num_tokens, use smaller blocks to increase SM concurrency.
   const int max_block_size = (num_tokens < 256) ? 1024 : 256;
   dim3 grid(num_tokens);
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_RANK234(num_dims, [&] {
-    VLLM_DISPATCH_FLOATING_TYPES(input.scalar_type(), "rms_norm_kernel", [&] {
-      const int calculated_vec_size =
-          std::gcd(16 / sizeof(scalar_t), hidden_size);
-      const int block_size =
-          std::min(hidden_size / calculated_vec_size, max_block_size);
-      dim3 block(block_size);
-      VLLM_DISPATCH_VEC_SIZE(calculated_vec_size, [&] {
-        vllm::rms_norm_kernel<scalar_t, vec_size, tensor_rank>
-            <<<grid, block, 0, stream>>>(
-                out.data_ptr<scalar_t>(), input.data_ptr<scalar_t>(),
-                input_stride_d2, input_stride_d3, input_stride_d4,
-                input_shape_d2, input_shape_d3, weight.data_ptr<scalar_t>(),
-                epsilon, num_tokens, hidden_size);
-      });
-    });
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_RANK234(num_dims, [&] {
+    VLLM_STABLE_DISPATCH_FLOATING_TYPES(
+        input.scalar_type(), "rms_norm_kernel", [&] {
+          const int calculated_vec_size =
+              std::gcd(16 / sizeof(scalar_t), hidden_size);
+          const int block_size =
+              std::min(hidden_size / calculated_vec_size, max_block_size);
+          dim3 block(block_size);
+          VLLM_STABLE_DISPATCH_VEC_SIZE(calculated_vec_size, [&] {
+            vllm::rms_norm_kernel<scalar_t, vec_size, tensor_rank>
+                <<<grid, block, 0, stream>>>(
+                    out.mutable_data_ptr<scalar_t>(),
+                    input.const_data_ptr<scalar_t>(), input_stride_d2,
+                    input_stride_d3, input_stride_d4, input_shape_d2,
+                    input_shape_d3, weight.const_data_ptr<scalar_t>(), epsilon,
+                    num_tokens, hidden_size);
+          });
+        });
   });
 }
 
-#define LAUNCH_FUSED_ADD_RMS_NORM(width)                                    \
-  VLLM_DISPATCH_FLOATING_TYPES(                                             \
-      input.scalar_type(), "fused_add_rms_norm_kernel", [&] {               \
-        vllm::fused_add_rms_norm_kernel<scalar_t, width>                    \
-            <<<grid, block, 0, stream>>>(                                   \
-                input.data_ptr<scalar_t>(), input_stride,                   \
-                residual.data_ptr<scalar_t>(), weight.data_ptr<scalar_t>(), \
-                epsilon, num_tokens, hidden_size);                          \
+#define LAUNCH_FUSED_ADD_RMS_NORM(width)                                \
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                  \
+      input.scalar_type(), "fused_add_rms_norm_kernel", [&] {           \
+        vllm::fused_add_rms_norm_kernel<scalar_t, width>                \
+            <<<grid, block, 0, stream>>>(                               \
+                input.mutable_data_ptr<scalar_t>(), input_stride,       \
+                residual.mutable_data_ptr<scalar_t>(),                  \
+                weight.const_data_ptr<scalar_t>(), epsilon, num_tokens, \
+                hidden_size);                                           \
       });
 
-void fused_add_rms_norm(torch::Tensor& input,     // [..., hidden_size]
-                        torch::Tensor& residual,  // [..., hidden_size]
-                        torch::Tensor& weight,    // [hidden_size]
+void fused_add_rms_norm(torch::stable::Tensor& input,     // [..., hidden_size]
+                        torch::stable::Tensor& residual,  // [..., hidden_size]
+                        torch::stable::Tensor& weight,    // [hidden_size]
                         double epsilon) {
-  TORCH_CHECK(weight.scalar_type() == input.scalar_type());
-  TORCH_CHECK(input.scalar_type() == residual.scalar_type());
-  TORCH_CHECK(residual.is_contiguous());
-  TORCH_CHECK(weight.is_contiguous());
+  STD_TORCH_CHECK(weight.scalar_type() == input.scalar_type());
+  STD_TORCH_CHECK(input.scalar_type() == residual.scalar_type());
+  STD_TORCH_CHECK(residual.is_contiguous());
+  STD_TORCH_CHECK(weight.is_contiguous());
   int hidden_size = input.size(-1);
   int64_t input_stride = input.stride(-2);
   int num_tokens = input.numel() / hidden_size;
@@ -255,8 +268,9 @@ void fused_add_rms_norm(torch::Tensor& input,     // [..., hidden_size]
      hiding on global mem ops. */
   const int max_block_size = (num_tokens < 256) ? 1024 : 256;
   dim3 block(std::min(hidden_size, max_block_size));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
   /*If the tensor types are FP16/BF16, try to use the optimized kernel
     with packed + vectorized ops.
     Max optimization is achieved with a width-8 vector of FP16/BF16s
diff --git a/csrc/layernorm_quant_kernels.cu b/csrc/libtorch_stable/layernorm_quant_kernels.cu
similarity index 69%
rename from csrc/layernorm_quant_kernels.cu
rename to csrc/libtorch_stable/layernorm_quant_kernels.cu
index f96386252c31..26ffa76d6e14 100644
--- a/csrc/layernorm_quant_kernels.cu
+++ b/csrc/libtorch_stable/layernorm_quant_kernels.cu
@@ -5,15 +5,16 @@
  * Currently, only static fp8 quantization is supported.
  */
 
-#include "type_convert.cuh"
-#include "quantization/w8a8/fp8/common.cuh"
-#include "dispatch_utils.h"
-#include "cub_helpers.h"
-#include "core/batch_invariant.hpp"
-#include "libtorch_stable/quantization/vectorization_utils.cuh"
+#include <numeric>
+
+#include "torch_utils.h"
 
-#include <torch/cuda.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "../cub_helpers.h"
+#include "../core/batch_invariant.hpp"
+#include "../quantization/w8a8/fp8/common.cuh"
+#include "../type_convert.cuh"
+#include "dispatch_utils.h"
+#include "quantization/vectorization_utils.cuh"
 
 namespace vllm {
 
@@ -65,9 +66,16 @@ __global__ void rms_norm_static_fp8_quant_kernel(
 #pragma unroll
     for (int j = 0; j < VEC_SIZE; j++) {
       float x = static_cast<float>(src1.val[j]);
-      float const out_norm = ((scalar_t)(x * s_variance)) * src2.val[j];
+      float w = static_cast<float>(src2.val[j]);
+      // Round normalized result through scalar_t to match the precision of the
+      // unfused composite (rms_norm writes scalar_t, then
+      // static_scaled_fp8_quant re-loads it as float before FP8 conversion).
+      // Without this round, the fused path is strictly more accurate and
+      // disagrees with the composite at exact E4M3 quantization tie boundaries.
+      scalar_t out_norm = static_cast<scalar_t>(x * s_variance * w);
       out[blockIdx.x * hidden_size + idx * VEC_SIZE + j] =
-          scaled_fp8_conversion<true, fp8_type>(out_norm, scale_inv);
+          scaled_fp8_conversion<true, fp8_type>(static_cast<float>(out_norm),
+                                                scale_inv);
     }
   }
 }
@@ -127,13 +135,21 @@ fused_add_rms_norm_static_fp8_quant_kernel(
 
   for (int idx = threadIdx.x; idx < vec_hidden_size; idx += blockDim.x) {
     int id = blockIdx.x * vec_hidden_size + idx;
-    _f16Vec<scalar_t, width> temp = residual_v[id];
-    temp *= s_variance;
-    temp *= weight_v[idx];
+    _f16Vec<scalar_t, width> res = residual_v[id];
+    _f16Vec<scalar_t, width> w = weight_v[idx];
+    using Converter = _typeConvert<scalar_t>;
+    using HipT = typename Converter::hip_type;
 #pragma unroll
     for (int i = 0; i < width; ++i) {
-      out[id * width + i] =
-          scaled_fp8_conversion<true, fp8_type>(float(temp.data[i]), scale_inv);
+      float x = Converter::convert(res.data[i]);
+      float wf = Converter::convert(w.data[i]);
+      // See note in rms_norm_static_fp8_quant_kernel: round through scalar_t
+      // to match the unfused composite path at FP8 boundaries. We use the
+      // backend's hip_type for the intermediate since c10::Half/BFloat16 has
+      // ambiguous conversions on CUDA and no implicit conversion on ROCm.
+      HipT out_norm_h = Converter::convert(x * s_variance * wf);
+      out[id * width + i] = scaled_fp8_conversion<true, fp8_type>(
+          Converter::convert(out_norm_h), scale_inv);
     }
   }
 }
@@ -176,20 +192,24 @@ fused_add_rms_norm_static_fp8_quant_kernel(
 
   for (int idx = threadIdx.x; idx < hidden_size; idx += blockDim.x) {
     float x = (float)residual[blockIdx.x * hidden_size + idx];
-    float const out_norm = ((scalar_t)(x * s_variance)) * weight[idx];
-    out[blockIdx.x * hidden_size + idx] =
-        scaled_fp8_conversion<true, fp8_type>(out_norm, scale_inv);
+    float w = (float)weight[idx];
+    // See note in rms_norm_static_fp8_quant_kernel: round through scalar_t
+    // to match the unfused composite path at FP8 boundaries.
+    scalar_t out_norm = static_cast<scalar_t>(x * s_variance * w);
+    out[blockIdx.x * hidden_size + idx] = scaled_fp8_conversion<true, fp8_type>(
+        static_cast<float>(out_norm), scale_inv);
   }
 }
 
 }  // namespace vllm
 
-void rms_norm_static_fp8_quant(torch::Tensor& out,     // [..., hidden_size]
-                               torch::Tensor& input,   // [..., hidden_size]
-                               torch::Tensor& weight,  // [hidden_size]
-                               torch::Tensor& scale,   // [1]
-                               double epsilon) {
-  TORCH_CHECK(out.is_contiguous());
+void rms_norm_static_fp8_quant(
+    torch::stable::Tensor& out,     // [..., hidden_size]
+    torch::stable::Tensor& input,   // [..., hidden_size]
+    torch::stable::Tensor& weight,  // [hidden_size]
+    torch::stable::Tensor& scale,   // [1]
+    double epsilon) {
+  STD_TORCH_CHECK(out.is_contiguous());
   int hidden_size = input.size(-1);
   int input_stride = input.stride(-2);
   int num_tokens = input.numel() / hidden_size;
@@ -197,24 +217,26 @@ void rms_norm_static_fp8_quant(torch::Tensor& out,     // [..., hidden_size]
   // For large num_tokens, use smaller blocks to increase SM concurrency.
   const int max_block_size = (num_tokens < 256) ? 1024 : 256;
   dim3 grid(num_tokens);
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "rms_norm_kernel_scalar_type", [&] {
-        VLLM_DISPATCH_FP8_TYPES(
+        VLLM_STABLE_DISPATCH_FP8_TYPES(
             out.scalar_type(), "rms_norm_kernel_fp8_type", [&] {
               const int calculated_vec_size =
                   std::gcd(16 / sizeof(scalar_t), hidden_size);
               const int block_size =
                   std::min(hidden_size / calculated_vec_size, max_block_size);
               dim3 block(block_size);
-              VLLM_DISPATCH_VEC_SIZE(calculated_vec_size, [&] {
+              VLLM_STABLE_DISPATCH_VEC_SIZE(calculated_vec_size, [&] {
                 vllm::rms_norm_static_fp8_quant_kernel<scalar_t, fp8_t,
                                                        vec_size>
                     <<<grid, block, 0, stream>>>(
-                        out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),
-                        input_stride, weight.data_ptr<scalar_t>(),
-                        scale.data_ptr<float>(), epsilon, num_tokens,
+                        out.mutable_data_ptr<fp8_t>(),
+                        input.const_data_ptr<scalar_t>(), input_stride,
+                        weight.const_data_ptr<scalar_t>(),
+                        scale.const_data_ptr<float>(), epsilon, num_tokens,
                         hidden_size);
               });
             });
@@ -222,30 +244,32 @@ void rms_norm_static_fp8_quant(torch::Tensor& out,     // [..., hidden_size]
 }
 
 #define LAUNCH_FUSED_ADD_RMS_NORM(width)                                     \
-  VLLM_DISPATCH_FLOATING_TYPES(                                              \
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(                                       \
       input.scalar_type(), "fused_add_rms_norm_kernel_scalar_type", [&] {    \
-        VLLM_DISPATCH_FP8_TYPES(                                             \
+        VLLM_STABLE_DISPATCH_FP8_TYPES(                                      \
             out.scalar_type(), "fused_add_rms_norm_kernel_fp8_type", [&] {   \
               vllm::fused_add_rms_norm_static_fp8_quant_kernel<scalar_t,     \
                                                                width, fp8_t> \
                   <<<grid, block, 0, stream>>>(                              \
-                      out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),     \
-                      input_stride, residual.data_ptr<scalar_t>(),           \
-                      weight.data_ptr<scalar_t>(), scale.data_ptr<float>(),  \
-                      epsilon, num_tokens, hidden_size);                     \
+                      out.mutable_data_ptr<fp8_t>(),                         \
+                      input.mutable_data_ptr<scalar_t>(), input_stride,      \
+                      residual.mutable_data_ptr<scalar_t>(),                 \
+                      weight.const_data_ptr<scalar_t>(),                     \
+                      scale.const_data_ptr<float>(), epsilon, num_tokens,    \
+                      hidden_size);                                          \
             });                                                              \
       });
 void fused_add_rms_norm_static_fp8_quant(
-    torch::Tensor& out,       // [..., hidden_size],
-    torch::Tensor& input,     // [..., hidden_size]
-    torch::Tensor& residual,  // [..., hidden_size]
-    torch::Tensor& weight,    // [hidden_size]
-    torch::Tensor& scale,     // [1]
+    torch::stable::Tensor& out,       // [..., hidden_size],
+    torch::stable::Tensor& input,     // [..., hidden_size]
+    torch::stable::Tensor& residual,  // [..., hidden_size]
+    torch::stable::Tensor& weight,    // [hidden_size]
+    torch::stable::Tensor& scale,     // [1]
     double epsilon) {
-  TORCH_CHECK(out.is_contiguous());
-  TORCH_CHECK(residual.is_contiguous());
-  TORCH_CHECK(residual.scalar_type() == input.scalar_type());
-  TORCH_CHECK(weight.scalar_type() == input.scalar_type());
+  STD_TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(residual.is_contiguous());
+  STD_TORCH_CHECK(residual.scalar_type() == input.scalar_type());
+  STD_TORCH_CHECK(weight.scalar_type() == input.scalar_type());
   int hidden_size = input.size(-1);
   int input_stride = input.stride(-2);
   int num_tokens = input.numel() / hidden_size;
@@ -257,8 +281,9 @@ void fused_add_rms_norm_static_fp8_quant(
      hiding on global mem ops. */
   const int max_block_size = (num_tokens < 256) ? 1024 : 256;
   dim3 block(std::min(hidden_size, max_block_size));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
   /*If the tensor types are FP16/BF16, try to use the optimized kernel
     with packed + vectorized ops.
     Max optimization is achieved with a width-8 vector of FP16/BF16s
diff --git a/csrc/libtorch_stable/ops.h b/csrc/libtorch_stable/ops.h
index b74c5c505f87..f99ff1d1db5b 100644
--- a/csrc/libtorch_stable/ops.h
+++ b/csrc/libtorch_stable/ops.h
@@ -27,4 +27,278 @@ void per_token_group_quant_int8(const torch::stable::Tensor& input,
                                 torch::stable::Tensor& output_s,
                                 int64_t group_size, double eps, double int8_min,
                                 double int8_max);
+
+bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability);
+bool cutlass_scaled_mm_supports_block_fp8(int64_t cuda_device_capability);
+bool cutlass_group_gemm_supported(int64_t cuda_device_capability);
+
+void cutlass_scaled_mm(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b,
+                       torch::stable::Tensor const& a_scales,
+                       torch::stable::Tensor const& b_scales,
+                       std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_moe_mm(torch::stable::Tensor& out_tensors,
+                    torch::stable::Tensor const& a_tensors,
+                    torch::stable::Tensor const& b_tensors,
+                    torch::stable::Tensor const& a_scales,
+                    torch::stable::Tensor const& b_scales,
+                    torch::stable::Tensor const& expert_offsets,
+                    torch::stable::Tensor const& problem_sizes,
+                    torch::stable::Tensor const& a_strides,
+                    torch::stable::Tensor const& b_strides,
+                    torch::stable::Tensor const& c_strides, bool per_act_token,
+                    bool per_out_ch);
+
+void cutlass_scaled_mm_azp(torch::stable::Tensor& out,
+                           torch::stable::Tensor const& a,
+                           torch::stable::Tensor const& b,
+                           torch::stable::Tensor const& a_scales,
+                           torch::stable::Tensor const& b_scales,
+                           torch::stable::Tensor const& azp_adj,
+                           std::optional<torch::stable::Tensor> const& azp,
+                           std::optional<torch::stable::Tensor> const& bias);
+
+void get_cutlass_moe_mm_data(
+    const torch::stable::Tensor& topk_ids,
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    torch::stable::Tensor& input_permutation,
+    torch::stable::Tensor& output_permutation, const int64_t num_experts,
+    const int64_t n, const int64_t k,
+    const std::optional<torch::stable::Tensor>& blockscale_offsets,
+    const bool is_gated);
+
+void get_cutlass_moe_mm_problem_sizes_from_expert_offsets(
+    const torch::stable::Tensor& expert_first_token_offset,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2, const int64_t n, const int64_t k,
+    const bool swap_ab);
+
+void get_cutlass_batched_moe_mm_data(
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    const torch::stable::Tensor& expert_num_tokens,
+    const int64_t num_local_experts, const int64_t padded_m, const int64_t n,
+    const int64_t k);
+
+// FP4/NVFP4 ops
+bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability);
+
+void cutlass_scaled_fp4_mm(torch::stable::Tensor& D,
+                           torch::stable::Tensor const& A,
+                           torch::stable::Tensor const& B,
+                           torch::stable::Tensor const& A_sf,
+                           torch::stable::Tensor const& B_sf,
+                           torch::stable::Tensor const& alpha);
+
+void cutlass_fp4_group_mm(torch::stable::Tensor& output,
+                          const torch::stable::Tensor& a,
+                          const torch::stable::Tensor& b,
+                          const torch::stable::Tensor& a_blockscale,
+                          const torch::stable::Tensor& b_blockscales,
+                          const torch::stable::Tensor& alphas,
+                          const torch::stable::Tensor& problem_sizes,
+                          const torch::stable::Tensor& expert_offsets,
+                          const torch::stable::Tensor& sf_offsets);
+
+std::tuple<torch::stable::Tensor, torch::stable::Tensor> scaled_fp4_quant_func(
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_scale, bool is_sf_swizzled_layout);
+
+void scaled_fp4_quant_out(torch::stable::Tensor const& input,
+                          torch::stable::Tensor const& input_scale,
+                          bool is_sf_swizzled_layout,
+                          torch::stable::Tensor& output,
+                          torch::stable::Tensor& output_scale);
+
+void scaled_fp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts);
+
+void silu_and_mul_scaled_fp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts);
+
+void silu_and_mul_nvfp4_quant(torch::stable::Tensor& out,
+                              torch::stable::Tensor& output_block_scale,
+                              torch::stable::Tensor& input,
+                              torch::stable::Tensor& input_global_scale);
+
+void cutlass_mxfp4_group_mm(torch::stable::Tensor& output,
+                            const torch::stable::Tensor& a,
+                            const torch::stable::Tensor& b,
+                            const torch::stable::Tensor& a_blockscale,
+                            const torch::stable::Tensor& b_blockscales,
+                            const torch::stable::Tensor& problem_sizes,
+                            const torch::stable::Tensor& expert_offsets,
+                            const torch::stable::Tensor& sf_offsets);
+
+// AWQ ops
+torch::stable::Tensor awq_gemm(torch::stable::Tensor _in_feats,
+                               torch::stable::Tensor _kernel,
+                               torch::stable::Tensor _scaling_factors,
+                               torch::stable::Tensor _zeros,
+                               int64_t split_k_iters);
+
+torch::stable::Tensor awq_dequantize(torch::stable::Tensor _kernel,
+                                     torch::stable::Tensor _scaling_factors,
+                                     torch::stable::Tensor _zeros,
+                                     int64_t split_k_iters, int64_t thx,
+                                     int64_t thy);
+
+// DSV3 fused A GEMM: conditionally compiled so declaration and impl
+// registration are in the source file (dsv3_fused_a_gemm.cu)
+
+// AllSpark ops: declarations are in the source files
+// (allspark_repack.cu and allspark_qgemm_w8a16.cu)
+
 #endif
+
+torch::stable::Tensor hadacore_transform(torch::stable::Tensor& x,
+                                         bool inplace);
+
+// Layernorm kernels (shared CUDA/ROCm)
+void rms_norm(torch::stable::Tensor& out, torch::stable::Tensor& input,
+              torch::stable::Tensor& weight, double epsilon);
+
+void fused_add_rms_norm(torch::stable::Tensor& input,
+                        torch::stable::Tensor& residual,
+                        torch::stable::Tensor& weight, double epsilon);
+
+// Layernorm-quant kernels (shared CUDA/ROCm)
+void rms_norm_static_fp8_quant(torch::stable::Tensor& out,
+                               torch::stable::Tensor& input,
+                               torch::stable::Tensor& weight,
+                               torch::stable::Tensor& scale, double epsilon);
+
+void fused_add_rms_norm_static_fp8_quant(torch::stable::Tensor& out,
+                                         torch::stable::Tensor& input,
+                                         torch::stable::Tensor& residual,
+                                         torch::stable::Tensor& weight,
+                                         torch::stable::Tensor& scale,
+                                         double epsilon);
+
+// Fused layernorm + dynamic per-token quant kernels (shared CUDA/ROCm)
+void rms_norm_dynamic_per_token_quant(
+    torch::stable::Tensor& out, torch::stable::Tensor const& input,
+    torch::stable::Tensor const& weight, torch::stable::Tensor& scales,
+    double const var_epsilon, std::optional<torch::stable::Tensor> scale_ub,
+    std::optional<torch::stable::Tensor> residual);
+
+void rms_norm_per_block_quant(torch::stable::Tensor& out,
+                              torch::stable::Tensor const& input,
+                              torch::stable::Tensor const& weight,
+                              torch::stable::Tensor& scales,
+                              double const var_epsilon,
+                              std::optional<torch::stable::Tensor> scale_ub,
+                              std::optional<torch::stable::Tensor> residual,
+                              int64_t group_size, bool is_scale_transposed);
+
+// Positional encoding kernels (shared CUDA/ROCm)
+void rotary_embedding(torch::stable::Tensor& positions,
+                      torch::stable::Tensor& query,
+                      std::optional<torch::stable::Tensor> key,
+                      int64_t head_size, torch::stable::Tensor& cos_sin_cache,
+                      bool is_neox, int64_t rope_dim_offset, bool inverse);
+
+void fused_qk_norm_rope(torch::stable::Tensor& qkv, int64_t num_heads_q,
+                        int64_t num_heads_k, int64_t num_heads_v,
+                        int64_t head_dim, double eps,
+                        torch::stable::Tensor& q_weight,
+                        torch::stable::Tensor& k_weight,
+                        torch::stable::Tensor& cos_sin_cache, bool is_neox,
+                        torch::stable::Tensor& position_ids,
+                        int64_t forced_token_heads_per_warp);
+
+// Activation kernels (shared CUDA/ROCm)
+void silu_and_mul(torch::stable::Tensor& out, torch::stable::Tensor& input);
+void silu_and_mul_clamp(torch::stable::Tensor& out,
+                        torch::stable::Tensor& input, double limit);
+void mul_and_silu(torch::stable::Tensor& out, torch::stable::Tensor& input);
+void gelu_and_mul(torch::stable::Tensor& out, torch::stable::Tensor& input);
+void gelu_tanh_and_mul(torch::stable::Tensor& out,
+                       torch::stable::Tensor& input);
+void fatrelu_and_mul(torch::stable::Tensor& out, torch::stable::Tensor& input,
+                     double threshold);
+void swigluoai_and_mul(torch::stable::Tensor& out, torch::stable::Tensor& input,
+                       double alpha = 1.702, double limit = 7.0);
+void gelu_new(torch::stable::Tensor& out, torch::stable::Tensor& input);
+void gelu_fast(torch::stable::Tensor& out, torch::stable::Tensor& input);
+void gelu_quick(torch::stable::Tensor& out, torch::stable::Tensor& input);
+
+// INT8 quantization kernels (shared CUDA/ROCm)
+void static_scaled_int8_quant(torch::stable::Tensor& out,
+                              torch::stable::Tensor const& input,
+                              torch::stable::Tensor const& scale,
+                              std::optional<torch::stable::Tensor> const& azp);
+
+void dynamic_scaled_int8_quant(torch::stable::Tensor& out,
+                               torch::stable::Tensor const& input,
+                               torch::stable::Tensor& scales,
+                               std::optional<torch::stable::Tensor> const& azp);
+
+// FP8 quantization kernels (shared CUDA/ROCm)
+void static_scaled_fp8_quant(
+    torch::stable::Tensor& out, torch::stable::Tensor const& input,
+    torch::stable::Tensor const& scale,
+    std::optional<torch::headeronly::IntHeaderOnlyArrayRef> group_shape =
+        std::nullopt);
+
+void dynamic_scaled_fp8_quant(torch::stable::Tensor& out,
+                              torch::stable::Tensor const& input,
+                              torch::stable::Tensor& scale);
+
+void dynamic_per_token_scaled_fp8_quant(
+    torch::stable::Tensor& out, torch::stable::Tensor const& input,
+    torch::stable::Tensor& scale,
+    std::optional<torch::stable::Tensor> const& scale_ub);
+
+// GPTQ kernels (shared CUDA/ROCm)
+torch::stable::Tensor gptq_gemm(torch::stable::Tensor a,
+                                torch::stable::Tensor b_q_weight,
+                                torch::stable::Tensor b_gptq_qzeros,
+                                torch::stable::Tensor b_gptq_scales,
+                                torch::stable::Tensor b_g_idx, bool use_exllama,
+                                bool use_v2_format, int64_t bit);
+
+void gptq_shuffle(torch::stable::Tensor q_weight, torch::stable::Tensor q_perm,
+                  int64_t bit);
+
+// GGML kernels (shared CUDA/ROCm)
+torch::stable::Tensor ggml_dequantize(
+    torch::stable::Tensor W, int64_t type, int64_t m, int64_t n,
+    std::optional<torch::headeronly::ScalarType> const& dtype);
+
+torch::stable::Tensor ggml_mul_mat_vec_a8(torch::stable::Tensor W,
+                                          torch::stable::Tensor X, int64_t type,
+                                          int64_t row);
+
+torch::stable::Tensor ggml_mul_mat_a8(torch::stable::Tensor W,
+                                      torch::stable::Tensor X, int64_t type,
+                                      int64_t row);
+
+torch::stable::Tensor ggml_moe_a8(torch::stable::Tensor X,
+                                  torch::stable::Tensor W,
+                                  torch::stable::Tensor sorted_token_ids,
+                                  torch::stable::Tensor expert_ids,
+                                  torch::stable::Tensor num_tokens_post_padded,
+                                  int64_t type, int64_t row, int64_t top_k,
+                                  int64_t tokens);
+
+torch::stable::Tensor ggml_moe_a8_vec(torch::stable::Tensor X,
+                                      torch::stable::Tensor W,
+                                      torch::stable::Tensor topk_ids,
+                                      int64_t top_k, int64_t type, int64_t row,
+                                      int64_t tokens);
+
+int64_t ggml_moe_get_block_size(int64_t type);
diff --git a/csrc/libtorch_stable/pos_encoding_kernels.cu b/csrc/libtorch_stable/pos_encoding_kernels.cu
new file mode 100644
index 000000000000..74af743b0961
--- /dev/null
+++ b/csrc/libtorch_stable/pos_encoding_kernels.cu
@@ -0,0 +1,201 @@
+#include "torch_utils.h"
+
+#include "../cuda_compat.h"
+#include "dispatch_utils.h"
+
+namespace vllm {
+
+template <typename scalar_t, typename cache_t, bool IS_NEOX>
+inline __device__ void apply_token_rotary_embedding(
+    scalar_t* __restrict__ arr, const cache_t* __restrict__ cos_ptr,
+    const cache_t* __restrict__ sin_ptr, int rot_offset, int embed_dim,
+    const bool inverse) {
+  int x_index, y_index;
+  float cos_f, sin_f;
+  if (IS_NEOX) {
+    x_index = rot_offset;
+    y_index = embed_dim + rot_offset;
+    cos_f = static_cast<float>(VLLM_LDG(cos_ptr + x_index));
+    sin_f = static_cast<float>(VLLM_LDG(sin_ptr + x_index));
+  } else {
+    x_index = 2 * rot_offset;
+    y_index = 2 * rot_offset + 1;
+    cos_f = static_cast<float>(VLLM_LDG(cos_ptr + x_index / 2));
+    sin_f = static_cast<float>(VLLM_LDG(sin_ptr + x_index / 2));
+  }
+  if (inverse) {
+    sin_f = -sin_f;
+  }
+  const float x_f = static_cast<float>(arr[x_index]);
+  const float y_f = static_cast<float>(arr[y_index]);
+  arr[x_index] = static_cast<scalar_t>(x_f * cos_f - y_f * sin_f);
+  arr[y_index] = static_cast<scalar_t>(y_f * cos_f + x_f * sin_f);
+}
+
+template <typename scalar_t, typename cache_t, bool IS_NEOX>
+inline __device__ void apply_rotary_embedding(
+    scalar_t* __restrict__ query,  // [batch_size, seq_len, num_heads,
+                                   // head_size] or [num_tokens, num_heads,
+                                   // head_size]
+    scalar_t* __restrict__ key,    // nullptr or
+                                   // [batch_size, seq_len, num_kv_heads,
+                                   // head_size] or [num_tokens, num_kv_heads,
+                                   // head_size]
+    const cache_t* cache_ptr, const int head_size, const int num_heads,
+    const int num_kv_heads, const int rot_dim, const int token_idx,
+    const int64_t query_stride, const int64_t key_stride,
+    const int64_t head_stride, const int64_t rope_dim_offset,
+    const bool inverse) {
+  const int embed_dim = rot_dim / 2;
+  const cache_t* cos_ptr = cache_ptr;
+  const cache_t* sin_ptr = cache_ptr + embed_dim;
+
+  const int nq = num_heads * embed_dim;
+  for (int i = threadIdx.x; i < nq; i += blockDim.x) {
+    const int head_idx = i / embed_dim;
+    const int64_t token_head =
+        token_idx * query_stride + head_idx * head_stride + rope_dim_offset;
+    const int rot_offset = i % embed_dim;
+    apply_token_rotary_embedding<scalar_t, cache_t, IS_NEOX>(
+        query + token_head, cos_ptr, sin_ptr, rot_offset, embed_dim, inverse);
+  }
+
+  if (key != nullptr) {
+    const int nk = num_kv_heads * embed_dim;
+    for (int i = threadIdx.x; i < nk; i += blockDim.x) {
+      const int head_idx = i / embed_dim;
+      const int64_t token_head =
+          token_idx * key_stride + head_idx * head_stride + rope_dim_offset;
+      const int rot_offset = i % embed_dim;
+      apply_token_rotary_embedding<scalar_t, cache_t, IS_NEOX>(
+          key + token_head, cos_ptr, sin_ptr, rot_offset, embed_dim, inverse);
+    }
+  }
+}
+
+template <typename scalar_t, typename cache_t, bool IS_NEOX>
+__global__ void rotary_embedding_kernel(
+    const int64_t* __restrict__ positions,  // [batch_size, seq_len] or
+                                            // [num_tokens]
+    scalar_t* __restrict__ query,           // [batch_size, seq_len, num_heads,
+                                   // head_size] or [num_tokens, num_heads,
+                                   // head_size]
+    scalar_t* __restrict__ key,  // nullptr or
+                                 // [batch_size, seq_len, num_kv_heads,
+                                 // head_size] or [num_tokens, num_kv_heads,
+                                 // head_size]
+    const cache_t* __restrict__ cos_sin_cache,  // [max_position, rot_dim]
+    const int rot_dim, const int64_t query_stride, const int64_t key_stride,
+    const int64_t head_stride, const int num_heads, const int num_kv_heads,
+    const int head_size, const int64_t rope_dim_offset, const bool inverse) {
+  const int token_idx = blockIdx.x;
+  int64_t pos = positions[token_idx];
+  const cache_t* cache_ptr = cos_sin_cache + pos * rot_dim;
+
+  apply_rotary_embedding<scalar_t, cache_t, IS_NEOX>(
+      query, key, cache_ptr, head_size, num_heads, num_kv_heads, rot_dim,
+      token_idx, query_stride, key_stride, head_stride, rope_dim_offset,
+      inverse);
+}
+
+}  // namespace vllm
+
+void rotary_embedding(
+    torch::stable::Tensor& positions,  // [batch_size, seq_len] or [num_tokens]
+    torch::stable::Tensor&
+        query,  // [batch_size, seq_len, num_heads * head_size] or
+                // [num_tokens, num_heads * head_size] or
+                // [batch_size, seq_len, num_heads, head_size] or
+                // [num_tokens, num_heads, head_size]
+    std::optional<torch::stable::Tensor> key,
+    // null or
+    // [batch_size, seq_len, num_kv_heads * head_size] or
+    // [num_tokens, num_kv_heads * head_size] or
+    // [batch_size, seq_len, num_heads, head_size] or
+    // [num_tokens, num_heads, head_size]
+    int64_t head_size,
+    torch::stable::Tensor& cos_sin_cache,  // [max_position, rot_dim]
+    bool is_neox, int64_t rope_dim_offset, bool inverse) {
+  // num_tokens = batch_size * seq_len
+  int64_t num_tokens = positions.numel();
+  int positions_ndim = positions.dim();
+
+  // Make sure num_tokens dim is consistent across positions, query, and key
+  STD_TORCH_CHECK(
+      positions_ndim == 1 || positions_ndim == 2,
+      "positions must have shape [num_tokens] or [batch_size, seq_len]");
+  if (positions_ndim == 1) {
+    STD_TORCH_CHECK(
+        query.size(0) == positions.size(0) &&
+            (!key.has_value() || key->size(0) == positions.size(0)),
+        "query, key and positions must have the same number of tokens");
+  }
+  if (positions_ndim == 2) {
+    STD_TORCH_CHECK(
+        query.size(0) == positions.size(0) &&
+            (!key.has_value() || key->size(0) == positions.size(0)) &&
+            query.size(1) == positions.size(1) &&
+            (!key.has_value() || key->size(1) == positions.size(1)),
+        "query, key and positions must have the same batch_size and seq_len");
+  }
+
+  // Make sure head_size is valid for query and key
+  // hidden_size = num_heads * head_size
+  int query_hidden_size = query.numel() / num_tokens;
+  int key_hidden_size = key.has_value() ? key->numel() / num_tokens : 0;
+  STD_TORCH_CHECK(query_hidden_size % head_size == 0);
+  STD_TORCH_CHECK(key_hidden_size % head_size == 0);
+
+  // Make sure query and key have consistent number of heads
+  int num_heads = query_hidden_size / head_size;
+  int num_kv_heads = key.has_value() ? key_hidden_size / head_size : num_heads;
+  STD_TORCH_CHECK(num_heads % num_kv_heads == 0);
+
+  int rot_dim = cos_sin_cache.size(1);
+  int seq_dim_idx = positions_ndim - 1;
+  int64_t query_stride = query.stride(seq_dim_idx);
+  int64_t key_stride = key.has_value() ? key->stride(seq_dim_idx) : 0;
+
+  STD_TORCH_CHECK((rot_dim + rope_dim_offset) <= head_size);
+  // Determine head stride: for [*, heads, head_size] use stride of last dim;
+  // for flat [*, heads*head_size], heads blocks are contiguous of size
+  // head_size
+  int query_ndim = query.dim();
+  int64_t head_stride =
+      (query_ndim == positions_ndim + 2) ? query.stride(-2) : head_size;
+
+  dim3 grid(num_tokens);
+  dim3 block(std::min<int64_t>(num_heads * rot_dim / 2, 512));
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      query.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
+      query.scalar_type(), "rotary_embedding", [&] {
+        using query_t = scalar_t;
+        VLLM_STABLE_DISPATCH_FLOATING_TYPES(
+            cos_sin_cache.scalar_type(), "rotary_embedding_cache", [&] {
+              using cache_t = scalar_t;
+              if (is_neox) {
+                vllm::rotary_embedding_kernel<query_t, cache_t, true>
+                    <<<grid, block, 0, stream>>>(
+                        positions.const_data_ptr<int64_t>(),
+                        query.mutable_data_ptr<query_t>(),
+                        key.has_value() ? key->mutable_data_ptr<query_t>()
+                                        : nullptr,
+                        cos_sin_cache.const_data_ptr<cache_t>(), rot_dim,
+                        query_stride, key_stride, head_stride, num_heads,
+                        num_kv_heads, head_size, rope_dim_offset, inverse);
+              } else {
+                vllm::rotary_embedding_kernel<query_t, cache_t, false>
+                    <<<grid, block, 0, stream>>>(
+                        positions.const_data_ptr<int64_t>(),
+                        query.mutable_data_ptr<query_t>(),
+                        key.has_value() ? key->mutable_data_ptr<query_t>()
+                                        : nullptr,
+                        cos_sin_cache.const_data_ptr<cache_t>(), rot_dim,
+                        query_stride, key_stride, head_stride, num_heads,
+                        num_kv_heads, head_size, rope_dim_offset, inverse);
+              }
+            });
+      });
+}
diff --git a/csrc/quantization/awq/dequantize.cuh b/csrc/libtorch_stable/quantization/awq/dequantize.cuh
similarity index 100%
rename from csrc/quantization/awq/dequantize.cuh
rename to csrc/libtorch_stable/quantization/awq/dequantize.cuh
diff --git a/csrc/quantization/awq/gemm_kernels.cu b/csrc/libtorch_stable/quantization/awq/gemm_kernels.cu
similarity index 89%
rename from csrc/quantization/awq/gemm_kernels.cu
rename to csrc/libtorch_stable/quantization/awq/gemm_kernels.cu
index 53c47679cdd7..c3702c52efcb 100644
--- a/csrc/quantization/awq/gemm_kernels.cu
+++ b/csrc/libtorch_stable/quantization/awq/gemm_kernels.cu
@@ -7,10 +7,11 @@ Shang and Dang, Xingyu and Han, Song}, journal={arXiv}, year={2023}
 }
  */
 
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/csrc/stable/ops.h>
+#include "libtorch_stable/torch_utils.h"
 
-#include "dequantize.cuh"
+#include "libtorch_stable/quantization/awq/dequantize.cuh"
 
 #include <cuda_fp16.h>
 
@@ -410,10 +411,11 @@ __global__ void __launch_bounds__(64)
 }  // namespace awq
 }  // namespace vllm
 
-torch::Tensor awq_dequantize(torch::Tensor _kernel,
-                             torch::Tensor _scaling_factors,
-                             torch::Tensor _zeros, int64_t split_k_iters,
-                             int64_t thx, int64_t thy) {
+torch::stable::Tensor awq_dequantize(torch::stable::Tensor _kernel,
+                                     torch::stable::Tensor _scaling_factors,
+                                     torch::stable::Tensor _zeros,
+                                     int64_t split_k_iters, int64_t thx,
+                                     int64_t thy) {
   int in_c = _kernel.size(0);
   int qout_c = _kernel.size(1);
   int out_c = qout_c * 8;
@@ -437,23 +439,24 @@ torch::Tensor awq_dequantize(torch::Tensor _kernel,
     y_blocks = (int)(in_c / 8);
   }
 
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(_scaling_factors));
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      _scaling_factors.get_device_index());
 
-  auto options = torch::TensorOptions()
-                     .dtype(_scaling_factors.dtype())
-                     .device(_scaling_factors.device());
-  at::Tensor _de_kernel = torch::empty({in_c, out_c}, options);
+  auto _de_kernel =
+      torch::stable::empty({in_c, out_c}, _scaling_factors.scalar_type(),
+                           std::nullopt, _scaling_factors.device());
 
-  auto kernel = reinterpret_cast<int*>(_kernel.data_ptr<int>());
-  auto de_kernel = reinterpret_cast<half*>(_de_kernel.data_ptr<at::Half>());
-  auto scaling_factors =
-      reinterpret_cast<half*>(_scaling_factors.data_ptr<at::Half>());
-  auto zeros = reinterpret_cast<int*>(_zeros.data_ptr<int>());
+  auto kernel = reinterpret_cast<int*>(_kernel.mutable_data_ptr<int>());
+  auto de_kernel = reinterpret_cast<half*>(
+      _de_kernel.mutable_data_ptr<torch::headeronly::Half>());
+  auto scaling_factors = reinterpret_cast<half*>(
+      _scaling_factors.mutable_data_ptr<torch::headeronly::Half>());
+  auto zeros = reinterpret_cast<int*>(_zeros.mutable_data_ptr<int>());
 
   dim3 num_blocks(x_blocks, y_blocks);
   dim3 threads_per_block(x_thread, y_thread);
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   vllm::awq::dequantize_weights<<<num_blocks, threads_per_block, 0, stream>>>(
       kernel, scaling_factors, zeros, de_kernel, G);
 
@@ -466,27 +469,30 @@ torch::Tensor awq_dequantize(torch::Tensor _kernel,
 // zeros: IC // G, OC // 8 [int32] -> cast to IC // G, OC [uint4b]
 // assume that batch_size < 16 for now
 
-torch::Tensor awq_gemm(torch::Tensor _in_feats, torch::Tensor _kernel,
-                       torch::Tensor _scaling_factors, torch::Tensor _zeros,
-                       int64_t split_k_iters) {
+torch::stable::Tensor awq_gemm(torch::stable::Tensor _in_feats,
+                               torch::stable::Tensor _kernel,
+                               torch::stable::Tensor _scaling_factors,
+                               torch::stable::Tensor _zeros,
+                               int64_t split_k_iters) {
   int num_in_feats = _in_feats.size(0);
   int num_in_channels = _in_feats.size(1);
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(_in_feats));
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      _in_feats.get_device_index());
 
-  auto options = torch::TensorOptions()
-                     .dtype(_in_feats.dtype())
-                     .device(_in_feats.device());
-  at::Tensor _out_feats =
-      torch::empty({split_k_iters, num_in_feats, _kernel.size(1) * 8}, options);
+  auto _out_feats = torch::stable::empty(
+      {split_k_iters, num_in_feats, _kernel.size(1) * 8},
+      _in_feats.scalar_type(), std::nullopt, _in_feats.device());
   int num_out_feats = _out_feats.size(-2);
   int num_out_channels = _out_feats.size(-1);
 
-  auto in_feats = reinterpret_cast<half*>(_in_feats.data_ptr<at::Half>());
-  auto kernel = reinterpret_cast<int*>(_kernel.data_ptr<int>());
-  auto out_feats = reinterpret_cast<half*>(_out_feats.data_ptr<at::Half>());
-  auto scaling_factors =
-      reinterpret_cast<half*>(_scaling_factors.data_ptr<at::Half>());
-  auto zeros = reinterpret_cast<int*>(_zeros.data_ptr<int>());
+  auto in_feats = reinterpret_cast<half*>(
+      _in_feats.mutable_data_ptr<torch::headeronly::Half>());
+  auto kernel = reinterpret_cast<int*>(_kernel.mutable_data_ptr<int>());
+  auto out_feats = reinterpret_cast<half*>(
+      _out_feats.mutable_data_ptr<torch::headeronly::Half>());
+  auto scaling_factors = reinterpret_cast<half*>(
+      _scaling_factors.mutable_data_ptr<torch::headeronly::Half>());
+  auto zeros = reinterpret_cast<int*>(_zeros.mutable_data_ptr<int>());
   int group_size = num_in_channels / _scaling_factors.size(0);
 
   if (num_out_channels % 64 != 0)
@@ -498,7 +504,7 @@ torch::Tensor awq_gemm(torch::Tensor _in_feats, torch::Tensor _kernel,
   if (num_out_channels % group_size != 0)
     throw std::invalid_argument("OC is not multiple of Group size");
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   if (num_out_channels % 128 == 0) {
     int j_factors1 = num_out_channels / 128 / 1;
     dim3 num_blocks((num_out_feats + 16 - 1) / 16 * j_factors1 * split_k_iters);
@@ -522,5 +528,5 @@ torch::Tensor awq_gemm(torch::Tensor _in_feats, torch::Tensor _kernel,
             group_size, split_k_iters, in_feats, kernel, scaling_factors, zeros,
             num_in_feats, num_in_channels, num_out_channels, out_feats);
   }
-  return _out_feats.sum(0);
+  return torch::stable::sum(_out_feats, 0);
 }
diff --git a/csrc/quantization/cutlass_w4a8/get_group_starts.cuh b/csrc/libtorch_stable/quantization/cutlass_w4a8/get_group_starts.cuh
similarity index 63%
rename from csrc/quantization/cutlass_w4a8/get_group_starts.cuh
rename to csrc/libtorch_stable/quantization/cutlass_w4a8/get_group_starts.cuh
index fec142d0d87a..5cda4c9750a2 100644
--- a/csrc/quantization/cutlass_w4a8/get_group_starts.cuh
+++ b/csrc/libtorch_stable/quantization/cutlass_w4a8/get_group_starts.cuh
@@ -2,10 +2,9 @@
 #pragma once
 
 #include <cuda.h>
-#include <torch/all.h>
-#include <c10/cuda/CUDAStream.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
 
-#include "core/scalar_type.hpp"
 #include "cutlass/bfloat16.h"
 #include "cutlass/float8.h"
 
@@ -41,7 +40,7 @@ __global__ void get_group_gemm_starts(
 }
 
 #define __CALL_GET_STARTS_KERNEL(TENSOR_C_TYPE, C_TYPE)                  \
-  else if (out_tensors.dtype() == TENSOR_C_TYPE) {                       \
+  else if (out_tensors.scalar_type() == TENSOR_C_TYPE) {                 \
     get_group_gemm_starts<cutlass::float_e4m3_t, int32_t, C_TYPE, float, \
                           cutlass::Array<cutlass::float_e4m3_t, 8>>      \
         <<<1, num_experts, 0, stream>>>(                                 \
@@ -66,23 +65,34 @@ __global__ void get_group_gemm_starts(
 namespace {
 
 void run_get_group_gemm_starts(
-    torch::Tensor const& expert_offsets, torch::Tensor& a_ptrs,
-    torch::Tensor& b_ptrs, torch::Tensor& out_ptrs,
-    torch::Tensor& a_scales_ptrs, torch::Tensor& b_scales_ptrs,
-    torch::Tensor& b_group_scales_ptrs, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor& out_tensors,
-    torch::Tensor const& a_scales, torch::Tensor const& b_scales,
-    torch::Tensor const& b_group_scales, const int64_t b_group_size) {
-  TORCH_CHECK(a_tensors.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b_tensors.dtype() == torch::kInt32);  // int4 8x packed into int32
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_group_scales.dtype() ==
-              torch::kFloat8_e4m3fn);  // the underlying torch type is e4m3
-  TORCH_CHECK(out_tensors.dtype() ==
-              torch::kBFloat16);  // only support bf16 for now
+    torch::stable::Tensor const& expert_offsets, torch::stable::Tensor& a_ptrs,
+    torch::stable::Tensor& b_ptrs, torch::stable::Tensor& out_ptrs,
+    torch::stable::Tensor& a_scales_ptrs, torch::stable::Tensor& b_scales_ptrs,
+    torch::stable::Tensor& b_group_scales_ptrs,
+    torch::stable::Tensor const& a_tensors,
+    torch::stable::Tensor const& b_tensors, torch::stable::Tensor& out_tensors,
+    torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    torch::stable::Tensor const& b_group_scales, const int64_t b_group_size) {
+  STD_TORCH_CHECK(a_tensors.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(
+      b_tensors.scalar_type() ==
+      torch::headeronly::ScalarType::Int);  // int4 8x packed into int32
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(
+      b_group_scales.scalar_type() ==
+      torch::headeronly::ScalarType::Float8_e4m3fn);  // the underlying torch
+                                                      // type is e4m3
+  STD_TORCH_CHECK(
+      out_tensors.scalar_type() ==
+      torch::headeronly::ScalarType::BFloat16);  // only support bf16 for now
   // expect int64_t to avoid overflow during offset calculations
-  TORCH_CHECK(expert_offsets.dtype() == torch::kInt64);
+  STD_TORCH_CHECK(expert_offsets.scalar_type() ==
+                  torch::headeronly::ScalarType::Long);
 
   int num_experts = static_cast<int>(expert_offsets.size(0));
   // logical k, n
@@ -90,15 +100,16 @@ void run_get_group_gemm_starts(
   int64_t k = a_tensors.size(1);
   int64_t scale_k = cutlass::ceil_div(k, b_group_size);
 
-  auto stream = at::cuda::getCurrentCUDAStream(a_tensors.device().index());
+  auto stream = get_current_cuda_stream(a_tensors.get_device_index());
 
   if (false) {
   }
-  __CALL_GET_STARTS_KERNEL(torch::kBFloat16, cutlass::bfloat16_t)
-  __CALL_GET_STARTS_KERNEL(torch::kFloat16, half)
+  __CALL_GET_STARTS_KERNEL(torch::headeronly::ScalarType::BFloat16,
+                           cutlass::bfloat16_t)
+  __CALL_GET_STARTS_KERNEL(torch::headeronly::ScalarType::Half, half)
   else {
-    TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
+    STD_TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
   }
 }
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/csrc/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu
similarity index 79%
rename from csrc/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu
rename to csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu
index 4b425790dbac..1091d9d12308 100644
--- a/csrc/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu
+++ b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_grouped_mm_entry.cu
@@ -14,13 +14,12 @@
 #include "cutlass/util/mixed_dtype_utils.hpp"
 
 // vllm includes
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
 #include "cutlass_extensions/torch_utils.hpp"
 #include "cutlass_extensions/common.hpp"
 
-#include "core/registration.h"
 #include "get_group_starts.cuh"
 #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
 #include "w4a8_utils.cuh"
@@ -168,31 +167,40 @@ struct W4A8GroupedGemmKernel {
   static_assert(sizeof(LayoutB_Reordered) % sizeof(int32_t) == 0,
                 "LayoutB_Reordered size must be divisible by 4 bytes");
 
-  static void grouped_mm(
-      torch::Tensor& out_tensors, const torch::Tensor& a_tensors,
-      const torch::Tensor& b_tensors, const torch::Tensor& a_scales,
-      const torch::Tensor& b_scales, const torch::Tensor& b_group_scales,
-      const int64_t b_group_size, const torch::Tensor& expert_offsets,
-      const torch::Tensor& problem_sizes_torch, const torch::Tensor& a_strides,
-      const torch::Tensor& b_strides, const torch::Tensor& c_strides,
-      const torch::Tensor& group_scale_strides) {
+  static void grouped_mm(torch::stable::Tensor& out_tensors,
+                         const torch::stable::Tensor& a_tensors,
+                         const torch::stable::Tensor& b_tensors,
+                         const torch::stable::Tensor& a_scales,
+                         const torch::stable::Tensor& b_scales,
+                         const torch::stable::Tensor& b_group_scales,
+                         const int64_t b_group_size,
+                         const torch::stable::Tensor& expert_offsets,
+                         const torch::stable::Tensor& problem_sizes_torch,
+                         const torch::stable::Tensor& a_strides,
+                         const torch::stable::Tensor& b_strides,
+                         const torch::stable::Tensor& c_strides,
+                         const torch::stable::Tensor& group_scale_strides) {
     auto device = a_tensors.device();
     auto device_id = device.index();
-    const at::cuda::OptionalCUDAGuard device_guard(device);
-    auto stream = at::cuda::getCurrentCUDAStream(device_id);
+    const torch::stable::accelerator::DeviceGuard device_guard(device_id);
+    auto stream = get_current_cuda_stream(device_id);
 
     int num_experts = static_cast<int>(expert_offsets.size(0));
     int n = static_cast<int>(b_tensors.size(1));
     int k = static_cast<int>(b_tensors.size(2)) * PackFactor;
 
-    auto options_int =
-        torch::TensorOptions().dtype(torch::kInt64).device(device);
-    torch::Tensor a_ptrs = torch::empty(num_experts, options_int);
-    torch::Tensor b_ptrs = torch::empty(num_experts, options_int);
-    torch::Tensor out_ptrs = torch::empty(num_experts, options_int);
-    torch::Tensor a_scales_ptrs = torch::empty(num_experts, options_int);
-    torch::Tensor b_scales_ptrs = torch::empty(num_experts, options_int);
-    torch::Tensor b_group_scales_ptrs = torch::empty(num_experts, options_int);
+    torch::stable::Tensor a_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
+    torch::stable::Tensor b_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
+    torch::stable::Tensor out_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
+    torch::stable::Tensor a_scales_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
+    torch::stable::Tensor b_scales_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
+    torch::stable::Tensor b_group_scales_ptrs = torch::stable::empty(
+        num_experts, torch::headeronly::ScalarType::Long, std::nullopt, device);
 
     // get the correct offsets to pass to gemm
     run_get_group_gemm_starts(expert_offsets, a_ptrs, b_ptrs, out_ptrs,
@@ -247,9 +255,9 @@ struct W4A8GroupedGemmKernel {
 
     // Allocate workspace
     size_t workspace_size = GemmShuffled::get_workspace_size(arguments);
-    torch::Tensor workspace =
-        torch::empty(workspace_size,
-                     torch::TensorOptions().dtype(torch::kU8).device(device));
+    torch::stable::Tensor workspace = torch::stable::empty(
+        workspace_size, torch::headeronly::ScalarType::Byte, std::nullopt,
+        device);
 
     // Run GEMM
     GemmShuffled gemm;
@@ -294,14 +302,20 @@ using Kernel_256x128_2x1x1_Coop =
 using Kernel_128x256_2x1x1_Coop =
     W4A8GroupedGemmKernel<Shape<_128, _256>, Shape<_2, _1, _1>, Coop, CoopEpi>;
 
-void mm_dispatch(
-    torch::Tensor& out_tensors, const torch::Tensor& a_tensors,
-    const torch::Tensor& b_tensors, const torch::Tensor& a_scales,
-    const torch::Tensor& b_scales, const torch::Tensor& b_group_scales,
-    const int64_t b_group_size, const torch::Tensor& expert_offsets,
-    const torch::Tensor& problem_sizes, const torch::Tensor& a_strides,
-    const torch::Tensor& b_strides, const torch::Tensor& c_strides,
-    const torch::Tensor& group_scale_strides, const std::string& schedule) {
+void mm_dispatch(torch::stable::Tensor& out_tensors,
+                 const torch::stable::Tensor& a_tensors,
+                 const torch::stable::Tensor& b_tensors,
+                 const torch::stable::Tensor& a_scales,
+                 const torch::stable::Tensor& b_scales,
+                 const torch::stable::Tensor& b_group_scales,
+                 const int64_t b_group_size,
+                 const torch::stable::Tensor& expert_offsets,
+                 const torch::stable::Tensor& problem_sizes,
+                 const torch::stable::Tensor& a_strides,
+                 const torch::stable::Tensor& b_strides,
+                 const torch::stable::Tensor& c_strides,
+                 const torch::stable::Tensor& group_scale_strides,
+                 const std::string& schedule) {
   if (schedule == "Kernel_128x16_1x1x1_Coop") {
     Kernel_128x16_1x1x1_Coop::grouped_mm(
         out_tensors, a_tensors, b_tensors, a_scales, b_scales, b_group_scales,
@@ -358,18 +372,23 @@ void mm_dispatch(
         b_group_size, expert_offsets, problem_sizes, a_strides, b_strides,
         c_strides, group_scale_strides);
   } else {
-    TORCH_CHECK(false,
-                "cutlass_w4a8_moe_mm: unknown schedule string: ", schedule);
+    STD_TORCH_CHECK(false,
+                    "cutlass_w4a8_moe_mm: unknown schedule string: ", schedule);
   }
 }
 
-void mm(torch::Tensor& out_tensors, const torch::Tensor& a_tensors,
-        const torch::Tensor& b_tensors, const torch::Tensor& a_scales,
-        const torch::Tensor& b_scales, const torch::Tensor& b_group_scales,
-        const int64_t b_group_size, const torch::Tensor& expert_offsets,
-        const torch::Tensor& problem_sizes, const torch::Tensor& a_strides,
-        const torch::Tensor& b_strides, const torch::Tensor& c_strides,
-        const torch::Tensor& group_scale_strides,
+void mm(torch::stable::Tensor& out_tensors,
+        const torch::stable::Tensor& a_tensors,
+        const torch::stable::Tensor& b_tensors,
+        const torch::stable::Tensor& a_scales,
+        const torch::stable::Tensor& b_scales,
+        const torch::stable::Tensor& b_group_scales, const int64_t b_group_size,
+        const torch::stable::Tensor& expert_offsets,
+        const torch::stable::Tensor& problem_sizes,
+        const torch::stable::Tensor& a_strides,
+        const torch::stable::Tensor& b_strides,
+        const torch::stable::Tensor& c_strides,
+        const torch::stable::Tensor& group_scale_strides,
         std::optional<std::string> maybe_schedule) {
   // user has specified a schedule
   if (maybe_schedule) {
@@ -406,26 +425,27 @@ void mm(torch::Tensor& out_tensors, const torch::Tensor& a_tensors,
               a_strides, b_strides, c_strides, group_scale_strides, schedule);
 }
 
-std::tuple<torch::Tensor, torch::Tensor> encode_and_reorder_int4b(
-    torch::Tensor const& b_tensors) {
-  TORCH_CHECK(b_tensors.dtype() == torch::kInt32);
-  TORCH_CHECK(b_tensors.dim() == 3);  // (experts, n, k)
-  TORCH_CHECK(b_tensors.is_contiguous());
-  TORCH_CHECK(b_tensors.is_cuda());
+std::tuple<torch::stable::Tensor, torch::stable::Tensor>
+encode_and_reorder_int4b(torch::stable::Tensor const& b_tensors) {
+  STD_TORCH_CHECK(b_tensors.scalar_type() ==
+                  torch::headeronly::ScalarType::Int);
+  STD_TORCH_CHECK(b_tensors.dim() == 3);  // (experts, n, k)
+  STD_TORCH_CHECK(b_tensors.is_contiguous());
+  STD_TORCH_CHECK(b_tensors.is_cuda());
 
   int n = static_cast<int>(b_tensors.size(1));
   int k = static_cast<int>(b_tensors.size(2)) * PackFactor;  // logical k
 
   // CUTLASS reorder_tensor requires k % 256 == 0 and n % 16 == 0.
   // These misalignments cause silent OOB unless run under Compute Sanitizer.
-  TORCH_CHECK(k % 256 == 0, "logical k must be divisible by 256");
-  TORCH_CHECK(n % 16 == 0, "n must be divisible by 16");
+  STD_TORCH_CHECK(k % 256 == 0, "logical k must be divisible by 256");
+  STD_TORCH_CHECK(n % 16 == 0, "n must be divisible by 16");
 
   // we will store the layout to an int32 tensor;
   // this is the number of elements we need per layout
   constexpr size_t layout_width = sizeof(LayoutB_Reordered) / sizeof(int32_t);
 
-  torch::Tensor b_tensors_packed = torch::empty_like(b_tensors);
+  torch::stable::Tensor b_tensors_packed = torch::stable::empty_like(b_tensors);
   int num_experts = static_cast<int>(b_tensors.size(0));
 
   auto b_ptr = static_cast<QuantType const*>(b_tensors.const_data_ptr());
@@ -435,7 +455,7 @@ std::tuple<torch::Tensor, torch::Tensor> encode_and_reorder_int4b(
   size_t num_int4_elems = 1ull * num_experts * n * k;
   bool ok = vllm::cutlass_w4a8_utils::unified_encode_int4b(b_ptr, b_packed_ptr,
                                                            num_int4_elems);
-  TORCH_CHECK(ok, "unified_encode_int4b failed");
+  STD_TORCH_CHECK(ok, "unified_encode_int4b failed");
 
   // construct the layout once; assumes each expert has the same layout
   using LayoutType = LayoutB_Reordered;
@@ -456,28 +476,28 @@ std::tuple<torch::Tensor, torch::Tensor> encode_and_reorder_int4b(
   }
 
   // save the packed layout to torch tensor so we can re-use it
-  auto cpu_opts =
-      torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU);
-  torch::Tensor layout_cpu =
-      torch::empty({num_experts, layout_width}, cpu_opts);
+  torch::stable::Tensor layout_cpu = torch::stable::empty(
+      {num_experts, layout_width}, torch::headeronly::ScalarType::Int,
+      std::nullopt, torch::stable::Device(torch::stable::DeviceType::CPU));
 
-  int32_t* layout_data = layout_cpu.data_ptr<int32_t>();
+  int32_t* layout_data = layout_cpu.mutable_data_ptr<int32_t>();
   for (int i = 0; i < num_experts; ++i) {
     std::memcpy(layout_data + i * layout_width,  // dst (int32*)
                 &layout_B_reordered,             // src (LayoutType*)
                 sizeof(LayoutType));             // number of bytes
   }
 
-  torch::Tensor packed_layout =
-      layout_cpu.to(b_tensors.device(), /*non_blocking=*/false);
+  torch::stable::Tensor packed_layout =
+      torch::stable::to(layout_cpu, b_tensors.device(),
+                        /*non_blocking=*/false);
 
   return {b_tensors_packed, packed_layout};
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("cutlass_w4a8_moe_mm", &mm);
-  m.impl("cutlass_encode_and_reorder_int4b_grouped", &encode_and_reorder_int4b);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("cutlass_w4a8_moe_mm", TORCH_BOX(&mm));
+  m.impl("cutlass_encode_and_reorder_int4b_grouped",
+         TORCH_BOX(&encode_and_reorder_int4b));
 }
 
 }  // namespace vllm::cutlass_w4a8_moe
-/////////////////////////////////////////////////////////////////////////////////////////////////
\ No newline at end of file
diff --git a/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_mm_entry.cu
similarity index 83%
rename from csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu
rename to csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_mm_entry.cu
index f77af06cd6c0..c2b8c0c00dea 100644
--- a/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu
+++ b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_mm_entry.cu
@@ -3,14 +3,12 @@
 //   https://github.com/NVIDIA/cutlass/blob/main/examples/55_hopper_mixed_dtype_gemm/55_hopper_int4_fp8_gemm.cu
 //
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
 #include "cutlass_extensions/torch_utils.hpp"
 #include "w4a8_utils.cuh"
 
-#include "core/registration.h"
-
 #include "cutlass/cutlass.h"
 #include <limits>
 
@@ -161,31 +159,31 @@ struct W4A8GemmKernel {
   using StrideD = typename GemmKernelShuffled::StrideD;
   using StrideS = typename CollectiveMainloopShuffled::StrideScale;
 
-  static torch::Tensor mm(torch::Tensor const& A,
-                          torch::Tensor const& B,             // already packed
-                          torch::Tensor const& group_scales,  // already packed
-                          int64_t group_size,
-                          torch::Tensor const& channel_scales,
-                          torch::Tensor const& token_scales,
-                          std::optional<at::ScalarType> const& maybe_out_type) {
+  static torch::stable::Tensor mm(
+      torch::stable::Tensor const& A,
+      torch::stable::Tensor const& B,             // already packed
+      torch::stable::Tensor const& group_scales,  // already packed
+      int64_t group_size, torch::stable::Tensor const& channel_scales,
+      torch::stable::Tensor const& token_scales,
+      std::optional<torch::headeronly::ScalarType> const& maybe_out_type) {
     // TODO: param validation
     int m = A.size(0);
     int k = A.size(1);
     int n = B.size(1);
 
     // safely cast group_size to int
-    TORCH_CHECK(group_size > 0 && group_size <= std::numeric_limits<int>::max(),
-                "group_size out of supported range for int: ", group_size);
+    STD_TORCH_CHECK(
+        group_size > 0 && group_size <= std::numeric_limits<int>::max(),
+        "group_size out of supported range for int: ", group_size);
     int const group_size_int = static_cast<int>(group_size);
 
     // Allocate output
-    const at::cuda::OptionalCUDAGuard device_guard(device_of(A));
+    const torch::stable::accelerator::DeviceGuard device_guard(
+        A.get_device_index());
     auto device = A.device();
-    auto stream = at::cuda::getCurrentCUDAStream(device.index());
-    torch::Tensor D =
-        torch::empty({m, n}, torch::TensorOptions()
-                                 .dtype(equivalent_scalar_type_v<ElementD>)
-                                 .device(device));
+    auto stream = get_current_cuda_stream(device.index());
+    torch::stable::Tensor D = torch::stable::empty(
+        {m, n}, equivalent_scalar_type_v<ElementD>, std::nullopt, device);
     // prepare arg pointers
     auto A_ptr = static_cast<MmaType const*>(A.const_data_ptr());
     auto B_ptr = static_cast<QuantType const*>(B.const_data_ptr());
@@ -237,9 +235,9 @@ struct W4A8GemmKernel {
 
     // Workspace
     size_t workspace_size = GemmShuffled::get_workspace_size(arguments);
-    torch::Tensor workspace =
-        torch::empty(workspace_size,
-                     torch::TensorOptions().dtype(torch::kU8).device(device));
+    torch::stable::Tensor workspace = torch::stable::empty(
+        workspace_size, torch::headeronly::ScalarType::Byte, std::nullopt,
+        device);
 
     // Run GEMM
     GemmShuffled gemm;
@@ -269,14 +267,14 @@ using Kernel_128x64_1x1x1 = W4A8GemmKernel<Shape<_128, _64>, Shape<_1, _1, _1>>;
 using Kernel_128x32_1x1x1 = W4A8GemmKernel<Shape<_128, _32>, Shape<_1, _1, _1>>;
 using Kernel_128x16_1x1x1 = W4A8GemmKernel<Shape<_128, _16>, Shape<_1, _1, _1>>;
 
-torch::Tensor mm_dispatch(torch::Tensor const& A,
-                          torch::Tensor const& B,             // already packed
-                          torch::Tensor const& group_scales,  // already packed
-                          int64_t group_size,
-                          torch::Tensor const& channel_scales,
-                          torch::Tensor const& token_scales,
-                          std::optional<at::ScalarType> const& maybe_out_type,
-                          const std::string& schedule) {
+torch::stable::Tensor mm_dispatch(
+    torch::stable::Tensor const& A,
+    torch::stable::Tensor const& B,             // already packed
+    torch::stable::Tensor const& group_scales,  // already packed
+    int64_t group_size, torch::stable::Tensor const& channel_scales,
+    torch::stable::Tensor const& token_scales,
+    std::optional<torch::headeronly::ScalarType> const& maybe_out_type,
+    const std::string& schedule) {
   if (schedule == "256x128_1x1x1") {
     return Kernel_256x128_1x1x1::mm(A, B, group_scales, group_size,
                                     channel_scales, token_scales,
@@ -318,17 +316,18 @@ torch::Tensor mm_dispatch(torch::Tensor const& A,
                                    channel_scales, token_scales,
                                    maybe_out_type);
   }
-  TORCH_CHECK(false, "Unknown W4A8 schedule: ", schedule);
+  STD_TORCH_CHECK(false, "Unknown W4A8 schedule: ", schedule);
   return {};
 }
 
-torch::Tensor mm(torch::Tensor const& A,
-                 torch::Tensor const& B,             // already packed
-                 torch::Tensor const& group_scales,  // already packed
-                 int64_t group_size, torch::Tensor const& channel_scales,
-                 torch::Tensor const& token_scales,
-                 std::optional<at::ScalarType> const& maybe_out_type,
-                 std::optional<std::string> maybe_schedule) {
+torch::stable::Tensor mm(
+    torch::stable::Tensor const& A,
+    torch::stable::Tensor const& B,             // already packed
+    torch::stable::Tensor const& group_scales,  // already packed
+    int64_t group_size, torch::stable::Tensor const& channel_scales,
+    torch::stable::Tensor const& token_scales,
+    std::optional<torch::headeronly::ScalarType> const& maybe_out_type,
+    std::optional<std::string> maybe_schedule) {
   // requested a specific schedule
   if (maybe_schedule) {
     return mm_dispatch(A, B, group_scales, group_size, channel_scales,
@@ -378,14 +377,15 @@ torch::Tensor mm(torch::Tensor const& A,
 // ----------------------------------------------------------------------------
 // Pre-processing utils
 // ----------------------------------------------------------------------------
-torch::Tensor pack_scale_fp8(torch::Tensor const& scales) {
-  TORCH_CHECK(scales.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(scales.is_contiguous());
-  TORCH_CHECK(scales.is_cuda());
-
-  auto packed_scales = torch::empty(
-      {scales.numel() * ScalePackSize},
-      torch::TensorOptions().dtype(scales.dtype()).device(scales.device()));
+torch::stable::Tensor pack_scale_fp8(torch::stable::Tensor const& scales) {
+  STD_TORCH_CHECK(scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(scales.is_contiguous());
+  STD_TORCH_CHECK(scales.is_cuda());
+
+  auto packed_scales =
+      torch::stable::empty({scales.numel() * ScalePackSize},
+                           scales.scalar_type(), std::nullopt, scales.device());
   auto scales_ptr = static_cast<MmaType const*>(scales.const_data_ptr());
   auto packed_scales_ptr =
       static_cast<cutlass::Array<ElementScale, ScalePackSize>*>(
@@ -396,15 +396,16 @@ torch::Tensor pack_scale_fp8(torch::Tensor const& scales) {
   return packed_scales;
 }
 
-torch::Tensor encode_and_reorder_int4b(torch::Tensor const& B) {
-  TORCH_CHECK(B.dtype() == torch::kInt32);
-  TORCH_CHECK(B.dim() == 2);
+torch::stable::Tensor encode_and_reorder_int4b(torch::stable::Tensor const& B) {
+  STD_TORCH_CHECK(B.scalar_type() == torch::headeronly::ScalarType::Int);
+  STD_TORCH_CHECK(B.dim() == 2);
 
-  torch::Tensor B_packed = torch::empty_like(B);
+  torch::stable::Tensor B_packed = torch::stable::empty_like(B);
 
   int k = B.size(0) * PackFactor;  // logical k
   int n = B.size(1);
-  TORCH_CHECK((n * k) % 32 == 0, "need multiples of 32 int4s for 16B chunks");
+  STD_TORCH_CHECK((n * k) % 32 == 0,
+                  "need multiples of 32 int4s for 16B chunks");
 
   auto B_ptr = static_cast<QuantType const*>(B.const_data_ptr());
   auto B_packed_ptr = static_cast<QuantType*>(B_packed.data_ptr());
@@ -415,16 +416,17 @@ torch::Tensor encode_and_reorder_int4b(torch::Tensor const& B) {
 
   bool ok = vllm::cutlass_w4a8_utils::unified_encode_int4b(B_ptr, B_packed_ptr,
                                                            n * k);
-  TORCH_CHECK(ok, "unified_encode_int4b failed");
+  STD_TORCH_CHECK(ok, "unified_encode_int4b failed");
   cutlass::reorder_tensor(B_packed_ptr, layout_B, layout_B_reordered);
 
   return B_packed;
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("cutlass_w4a8_mm", &mm);
-  m.impl("cutlass_pack_scale_fp8", &pack_scale_fp8);
-  m.impl("cutlass_encode_and_reorder_int4b", &encode_and_reorder_int4b);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("cutlass_w4a8_mm", TORCH_BOX(&mm));
+  m.impl("cutlass_pack_scale_fp8", TORCH_BOX(&pack_scale_fp8));
+  m.impl("cutlass_encode_and_reorder_int4b",
+         TORCH_BOX(&encode_and_reorder_int4b));
 }
 
-}  // namespace vllm::cutlass_w4a8
\ No newline at end of file
+}  // namespace vllm::cutlass_w4a8
diff --git a/csrc/quantization/cutlass_w4a8/w4a8_utils.cu b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_utils.cu
similarity index 100%
rename from csrc/quantization/cutlass_w4a8/w4a8_utils.cu
rename to csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_utils.cu
diff --git a/csrc/quantization/cutlass_w4a8/w4a8_utils.cuh b/csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_utils.cuh
similarity index 100%
rename from csrc/quantization/cutlass_w4a8/w4a8_utils.cuh
rename to csrc/libtorch_stable/quantization/cutlass_w4a8/w4a8_utils.cuh
diff --git a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu b/csrc/libtorch_stable/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
similarity index 86%
rename from csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
rename to csrc/libtorch_stable/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
index 3539096c9feb..49f2944f3fd6 100644
--- a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
+++ b/csrc/libtorch_stable/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
+#include "cuda_vec_utils.cuh"
 
 #include <cuda_runtime_api.h>
 #include <cuda_runtime.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
 #include <cuda_fp8.h>
-#include "dispatch_utils.h"
 
 #include "cuda_utils.h"
 #include "launch_bounds_utils.h"
@@ -118,17 +117,19 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
 
 }  // namespace vllm
 
-void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output,  // [..., d]
-                                     torch::Tensor& output_sf,
-                                     torch::Tensor& input,  // [..., 2 * d]
-                                     torch::Tensor& input_sf) {
+void silu_and_mul_nvfp4_quant_sm1xxa(
+    torch::stable::Tensor& output,  // [..., d]
+    torch::stable::Tensor& output_sf,
+    torch::stable::Tensor& input,  // [..., 2 * d]
+    torch::stable::Tensor& input_sf) {
   int32_t m = input.size(0);
   int32_t n = input.size(1) / 2;
 
-  TORCH_CHECK(n % 16 == 0, "The N dimension must be multiple of 16.");
-  TORCH_CHECK(input.scalar_type() == at::ScalarType::Half ||
-                  input.scalar_type() == at::ScalarType::BFloat16,
-              "Unsupported input data type for quantize_to_fp4.");
+  STD_TORCH_CHECK(n % 16 == 0, "The N dimension must be multiple of 16.");
+  STD_TORCH_CHECK(
+      input.scalar_type() == torch::headeronly::ScalarType::Half ||
+          input.scalar_type() == torch::headeronly::ScalarType::BFloat16,
+      "Unsupported input data type for quantize_to_fp4.");
 
   int multiProcessorCount =
       get_device_attribute(cudaDevAttrMultiProcessorCount, -1);
@@ -136,8 +137,9 @@ void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output,  // [..., d]
   auto input_sf_ptr = static_cast<float const*>(input_sf.data_ptr());
   auto sf_out = static_cast<int32_t*>(output_sf.data_ptr());
   auto output_ptr = static_cast<int64_t*>(output.data_ptr());
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  auto stream = at::cuda::getCurrentCUDAStream(input.get_device());
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  auto stream = get_current_cuda_stream(input.get_device_index());
   dim3 block(std::min(int(n / ELTS_PER_THREAD), 512));
   int const numBlocksPerSM =
       vllm_runtime_blocks_per_sm(static_cast<int>(block.x));
@@ -149,7 +151,7 @@ void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output,  // [..., d]
       int(m), std::max(1, (multiProcessorCount * numBlocksPerSM) / grid_y));
   dim3 grid(grid_x, grid_y);
 
-  VLLM_DISPATCH_HALF_TYPES(
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
       input.scalar_type(), "silu_and_mul_nvfp4_quant_kernel", [&] {
         using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
         auto input_ptr = static_cast<cuda_type const*>(input.data_ptr());
diff --git a/csrc/libtorch_stable/quantization/fp4/mxfp4_blockwise_moe_kernel.cu b/csrc/libtorch_stable/quantization/fp4/mxfp4_blockwise_moe_kernel.cu
new file mode 100644
index 000000000000..8a493fdf22c3
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/fp4/mxfp4_blockwise_moe_kernel.cu
@@ -0,0 +1,468 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+ *
+ * MXFP4 x MXFP4 block-scaled grouped GEMM kernel for MoE on SM100.
+ * Uses Cutlass mx_float4_t operands, E8M0 block scales, and 32-element groups.
+ */
+
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
+
+#include <cutlass/arch/arch.h>
+
+#include "cutlass_extensions/common.hpp"
+
+#include "cute/tensor.hpp"
+#include "cutlass/tensor_ref.h"
+#include "cutlass/epilogue/collective/default_epilogue.hpp"
+#include "cutlass/epilogue/thread/linear_combination.h"
+#include "cutlass/gemm/dispatch_policy.hpp"
+#include "cutlass/gemm/group_array_problem_shape.hpp"
+#include "cutlass/gemm/collective/collective_builder.hpp"
+#include "cutlass/epilogue/collective/collective_builder.hpp"
+#include "cutlass/gemm/device/gemm_universal_adapter.h"
+#include "cutlass/gemm/kernel/gemm_universal.hpp"
+
+#include "cutlass/util/packed_stride.hpp"
+#include <cassert>
+
+using namespace cute;
+
+// Offset-computation kernel for MXFP4 grouped GEMM (group size 32).
+template <typename ElementAB, typename ElementC, typename ElementSF,
+          typename LayoutSFA, typename LayoutSFB, typename ScaleConfig>
+__global__ void __mxfp4_get_group_gemm_starts(
+    ElementAB** a_offsets, ElementAB** b_offsets, ElementC** out_offsets,
+    ElementSF** a_scales_offsets, ElementSF** b_scales_offsets,
+    LayoutSFA* layout_sfa_base_as_int, LayoutSFB* layout_sfb_base_as_int,
+    ElementAB* a_base_as_int, ElementAB* b_base_as_int,
+    ElementC* out_base_as_int, ElementSF* a_scales_base_as_int,
+    ElementSF* b_scales_base_as_int, const int32_t* expert_offsets,
+    const int32_t* sf_offsets, const int32_t* problem_sizes_as_shapes,
+    int64_t* a_strides, int64_t* b_strides, int64_t* c_strides,
+    const int64_t a_stride_val, const int64_t b_stride_val,
+    const int64_t c_stride_val, const int K, const int N) {
+  int64_t expert_id = threadIdx.x;
+  if (expert_id >= gridDim.x * blockDim.x) {
+    return;
+  }
+  int64_t expert_offset = static_cast<int64_t>(expert_offsets[expert_id]);
+  int64_t sf_offset = static_cast<int64_t>(sf_offsets[expert_id]);
+  int64_t group_size = 32;
+  int64_t m = static_cast<int64_t>(problem_sizes_as_shapes[expert_id * 3]);
+  int64_t n = static_cast<int64_t>(problem_sizes_as_shapes[expert_id * 3 + 1]);
+  int64_t k = static_cast<int64_t>(problem_sizes_as_shapes[expert_id * 3 + 2]);
+  assert((m >= 0 && n == N && k == K && k % 2 == 0) &&
+         "unexpected problem sizes");
+
+  int64_t half_k = static_cast<int64_t>(k / 2);
+  int64_t group_k = static_cast<int64_t>(k / group_size);
+  // Shape of A as uint8/byte = [M, K // 2]
+  a_offsets[expert_id] = a_base_as_int + expert_offset * half_k;
+  // Shape of B as uint8/byte = [E, N, K // 2]
+  b_offsets[expert_id] = b_base_as_int + expert_id * n * half_k;
+  // Shape of C = [M, N]
+  out_offsets[expert_id] = out_base_as_int + expert_offset * n;
+  // Shape of a_scale = [sum(sf_sizes), K // group_size]
+  a_scales_offsets[expert_id] = a_scales_base_as_int + sf_offset * group_k;
+
+  assert((reinterpret_cast<uintptr_t>(a_scales_offsets[expert_id]) % 128) ==
+             0 &&
+         "TMA requires 128-byte alignment");
+
+  // Shape of B scale = [E, N, K // group_size]
+  b_scales_offsets[expert_id] = b_scales_base_as_int + expert_id * n * group_k;
+  assert((reinterpret_cast<uintptr_t>(b_scales_offsets[expert_id]) % 128) ==
+             0 &&
+         "TMA requires 128-byte alignment");
+
+  // Initialize strides
+  a_strides[expert_id] = a_stride_val;
+  b_strides[expert_id] = b_stride_val;
+  c_strides[expert_id] = c_stride_val;
+
+  LayoutSFA* layout_sfa_ptr = layout_sfa_base_as_int + expert_id;
+  LayoutSFB* layout_sfb_ptr = layout_sfb_base_as_int + expert_id;
+
+  *layout_sfa_ptr = ScaleConfig::tile_atom_to_shape_SFA(cute::make_shape(
+      static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), 1));
+  *layout_sfb_ptr = ScaleConfig::tile_atom_to_shape_SFB(cute::make_shape(
+      static_cast<int>(m), static_cast<int>(n), static_cast<int>(k), 1));
+}
+
+#define __CALL_MXFP4_GET_STARTS_KERNEL(ELEMENT_AB_TYPE, SF_TYPE,               \
+                                       TENSOR_C_TYPE, C_TYPE, LayoutSFA,       \
+                                       LayoutSFB, ScaleConfig)                 \
+  else if (out_tensors.scalar_type() == TENSOR_C_TYPE) {                       \
+    __mxfp4_get_group_gemm_starts<ELEMENT_AB_TYPE, C_TYPE, SF_TYPE, LayoutSFA, \
+                                  LayoutSFB, ScaleConfig>                      \
+        <<<1, num_experts, 0, stream>>>(                                       \
+            static_cast<ELEMENT_AB_TYPE**>(a_starts.data_ptr()),               \
+            static_cast<ELEMENT_AB_TYPE**>(b_starts.data_ptr()),               \
+            static_cast<C_TYPE**>(out_starts.data_ptr()),                      \
+            static_cast<SF_TYPE**>(a_scales_starts.data_ptr()),                \
+            static_cast<SF_TYPE**>(b_scales_starts.data_ptr()),                \
+            reinterpret_cast<LayoutSFA*>(layout_sfa.data_ptr()),               \
+            reinterpret_cast<LayoutSFB*>(layout_sfb.data_ptr()),               \
+            static_cast<ELEMENT_AB_TYPE*>(a_tensors.data_ptr()),               \
+            static_cast<ELEMENT_AB_TYPE*>(b_tensors.data_ptr()),               \
+            static_cast<C_TYPE*>(out_tensors.data_ptr()),                      \
+            static_cast<SF_TYPE*>(a_scales.data_ptr()),                        \
+            static_cast<SF_TYPE*>(b_scales.data_ptr()),                        \
+            static_cast<int32_t*>(expert_offsets.data_ptr()),                  \
+            static_cast<int32_t*>(sf_offsets.data_ptr()),                      \
+            static_cast<int32_t*>(problem_sizes.data_ptr()),                   \
+            static_cast<int64_t*>(a_strides.data_ptr()),                       \
+            static_cast<int64_t*>(b_strides.data_ptr()),                       \
+            static_cast<int64_t*>(c_strides.data_ptr()), a_stride_val,         \
+            b_stride_val, c_stride_val, K, N);                                 \
+  }
+
+template <typename LayoutSFA, typename LayoutSFB, typename ScaleConfig>
+void mxfp4_run_get_group_gemm_starts(
+    const torch::stable::Tensor& a_starts,
+    const torch::stable::Tensor& b_starts,
+    const torch::stable::Tensor& out_starts,
+    const torch::stable::Tensor& a_scales_starts,
+    const torch::stable::Tensor& b_scales_starts,
+    const torch::stable::Tensor& layout_sfa,
+    const torch::stable::Tensor& layout_sfb,
+    const torch::stable::Tensor& a_strides,
+    const torch::stable::Tensor& b_strides,
+    const torch::stable::Tensor& c_strides, int64_t a_stride_val,
+    int64_t b_stride_val, int64_t c_stride_val,
+    torch::stable::Tensor const& a_tensors,
+    torch::stable::Tensor const& b_tensors,
+    torch::stable::Tensor const& out_tensors,
+    torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    torch::stable::Tensor const& expert_offsets,
+    torch::stable::Tensor const& sf_offsets,
+    torch::stable::Tensor const& problem_sizes, int M, int N, int K) {
+  int num_experts = (int)expert_offsets.size(0);
+  auto stream = get_current_cuda_stream(a_tensors.get_device_index());
+
+  STD_TORCH_CHECK(out_tensors.size(1) == N,
+                  "Output tensor shape doesn't match expected shape");
+  STD_TORCH_CHECK(K / 2 == b_tensors.size(2),
+                  "b_tensors(dim = 2) and a_tensors(dim = 1) trailing"
+                  " dimension must match");
+  if (false) {
+  }
+  // MXFP4 uses E8M0 (float_ue8m0_t) scale factors
+  __CALL_MXFP4_GET_STARTS_KERNEL(cutlass::float_e2m1_t, cutlass::float_ue8m0_t,
+                                 torch::headeronly::ScalarType::BFloat16,
+                                 cutlass::bfloat16_t, LayoutSFA, LayoutSFB,
+                                 ScaleConfig)
+  __CALL_MXFP4_GET_STARTS_KERNEL(cutlass::float_e2m1_t, cutlass::float_ue8m0_t,
+                                 torch::headeronly::ScalarType::Half, half,
+                                 LayoutSFA, LayoutSFB, ScaleConfig)
+  else {
+    STD_TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
+  }
+}
+
+template <typename OutType>
+void run_mxfp4_blockwise_scaled_group_mm_sm100(
+    torch::stable::Tensor& output, const torch::stable::Tensor& a,
+    const torch::stable::Tensor& b, const torch::stable::Tensor& a_blockscale,
+    const torch::stable::Tensor& b_blockscales,
+    const torch::stable::Tensor& problem_sizes,
+    const torch::stable::Tensor& expert_offsets,
+    const torch::stable::Tensor& sf_offsets, int M, int N, int K) {
+  using ProblemShape =
+      cutlass::gemm::GroupProblemShape<Shape<int32_t, int32_t, int32_t>>;
+  using ElementType = cutlass::float_e2m1_t;
+  using ElementSFType = cutlass::float_ue8m0_t;
+  using ElementA = cutlass::mx_float4_t<cutlass::float_e2m1_t>;
+  using ElementB = cutlass::mx_float4_t<cutlass::float_e2m1_t>;
+
+  using ElementC = OutType;
+  using ElementD = ElementC;
+  using ElementAccumulator = float;
+  // Layout definitions
+  using LayoutA = cutlass::layout::RowMajor;
+  using LayoutB = cutlass::layout::ColumnMajor;
+  using LayoutC = cutlass::layout::RowMajor;
+  using LayoutD = LayoutC;
+
+  static constexpr int AlignmentA = 32;
+  static constexpr int AlignmentB = 32;
+  static constexpr int AlignmentC = 128 / cutlass::sizeof_bits<ElementC>::value;
+  static constexpr int AlignmentD = 128 / cutlass::sizeof_bits<ElementD>::value;
+
+  // Architecture definitions
+  using ArchTag = cutlass::arch::Sm100;
+  using EpilogueOperatorClass = cutlass::arch::OpClassTensorOp;
+  using MainloopOperatorClass = cutlass::arch::OpClassBlockScaledTensorOp;
+  using StageCountType = cutlass::gemm::collective::StageCountAuto;
+
+  using ClusterShape = Shape<_1, _1, _1>;
+  struct MMA1SMConfig {
+    using MmaTileShape = Shape<_128, _128, _128>;
+    using KernelSchedule =
+        cutlass::gemm::KernelPtrArrayTmaWarpSpecialized1SmMxf4Sm100;
+    using EpilogueSchedule = cutlass::epilogue::PtrArrayTmaWarpSpecialized1Sm;
+  };
+
+  using CollectiveEpilogue =
+      typename cutlass::epilogue::collective::CollectiveBuilder<
+          ArchTag, EpilogueOperatorClass, typename MMA1SMConfig::MmaTileShape,
+          ClusterShape, Shape<_128, _64>, ElementAccumulator,
+          ElementAccumulator, ElementC, LayoutC*, AlignmentC, ElementD,
+          LayoutC*, AlignmentD,
+          typename MMA1SMConfig::EpilogueSchedule>::CollectiveOp;
+
+  using CollectiveMainloop =
+      typename cutlass::gemm::collective::CollectiveBuilder<
+          ArchTag, MainloopOperatorClass, ElementA, LayoutA*, AlignmentA,
+          ElementB, LayoutB*, AlignmentB, ElementAccumulator,
+          typename MMA1SMConfig::MmaTileShape, ClusterShape,
+          cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(
+              sizeof(typename CollectiveEpilogue::SharedStorage))>,
+          typename MMA1SMConfig::KernelSchedule>::CollectiveOp;
+
+  using GemmKernel =
+      cutlass::gemm::kernel::GemmUniversal<ProblemShape, CollectiveMainloop,
+                                           CollectiveEpilogue>;
+
+  using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
+  using StrideA = typename Gemm::GemmKernel::InternalStrideA;
+  using StrideB = typename Gemm::GemmKernel::InternalStrideB;
+  using StrideC = typename Gemm::GemmKernel::InternalStrideC;
+  using StrideD = typename Gemm::GemmKernel::InternalStrideD;
+
+  using LayoutSFA =
+      typename Gemm::GemmKernel::CollectiveMainloop::InternalLayoutSFA;
+  using LayoutSFB =
+      typename Gemm::GemmKernel::CollectiveMainloop::InternalLayoutSFB;
+  using ScaleConfig =
+      typename Gemm::GemmKernel::CollectiveMainloop::Sm1xxBlkScaledConfig;
+
+  using UnderlyingProblemShape = ProblemShape::UnderlyingProblemShape;
+  int num_experts = static_cast<int>(expert_offsets.size(0));
+
+  torch::stable::Tensor a_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor out_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor a_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor layout_sfa = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor layout_sfb = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor a_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor c_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+
+  mxfp4_run_get_group_gemm_starts<LayoutSFA, LayoutSFB, ScaleConfig>(
+      a_ptrs, b_ptrs, out_ptrs, a_scales_ptrs, b_scales_ptrs, layout_sfa,
+      layout_sfb, a_strides1, b_strides1, c_strides1, a.stride(0) * 2,
+      b.stride(1) * 2, output.stride(0), a, b, output, a_blockscale,
+      b_blockscales, expert_offsets, sf_offsets, problem_sizes, M, N, K);
+
+  // Create an instance of the GEMM
+  Gemm gemm_op;
+
+  UnderlyingProblemShape* problem_sizes_as_shapes =
+      static_cast<UnderlyingProblemShape*>(problem_sizes.data_ptr());
+
+  // Set the Scheduler info
+  cutlass::KernelHardwareInfo hw_info;
+  using RasterOrderOptions = typename cutlass::gemm::kernel::detail::
+      PersistentTileSchedulerSm100GroupParams<
+          typename ProblemShape::UnderlyingProblemShape>::RasterOrderOptions;
+  typename Gemm::GemmKernel::TileSchedulerArguments scheduler;
+  scheduler.raster_order = RasterOrderOptions::AlongM;
+  hw_info.device_id = a.get_device_index();
+  static std::unordered_map<int, int> cached_sm_counts;
+  if (cached_sm_counts.find(hw_info.device_id) == cached_sm_counts.end()) {
+    cached_sm_counts[hw_info.device_id] =
+        cutlass::KernelHardwareInfo::query_device_multiprocessor_count(
+            hw_info.device_id);
+  }
+  hw_info.sm_count = min(cached_sm_counts[hw_info.device_id], INT_MAX);
+
+  // Mainloop Arguments
+  typename GemmKernel::MainloopArguments mainloop_args{
+      static_cast<const ElementType**>(a_ptrs.data_ptr()),
+      static_cast<StrideA*>(a_strides1.data_ptr()),
+      static_cast<const ElementType**>(b_ptrs.data_ptr()),
+      static_cast<StrideB*>(b_strides1.data_ptr()),
+      static_cast<const ElementSFType**>(a_scales_ptrs.data_ptr()),
+      reinterpret_cast<LayoutSFA*>(layout_sfa.data_ptr()),
+      static_cast<const ElementSFType**>(b_scales_ptrs.data_ptr()),
+      reinterpret_cast<LayoutSFB*>(layout_sfb.data_ptr())};
+
+  // Epilogue Arguments
+  typename GemmKernel::EpilogueArguments epilogue_args{
+      {},  // epilogue.thread
+      nullptr,
+      static_cast<StrideC*>(c_strides1.data_ptr()),
+      static_cast<ElementD**>(out_ptrs.data_ptr()),
+      static_cast<StrideC*>(c_strides1.data_ptr())};
+  auto& fusion_args = epilogue_args.thread;
+  // Scalar epilogue (CUTLASS grouped GEMM): D = 1 * accum + 0 * C
+  fusion_args.alpha_ptr = nullptr;
+  fusion_args.beta_ptr = nullptr;
+  fusion_args.alpha = 1.0f;
+  fusion_args.alpha_ptr_array = nullptr;
+  fusion_args.dAlpha = {_0{}, _0{}, 0};
+  fusion_args.beta = 0.0f;
+  fusion_args.beta_ptr_array = nullptr;
+  fusion_args.dBeta = {_0{}, _0{}, 0};
+
+  // Gemm Arguments
+  typename GemmKernel::Arguments args{
+      cutlass::gemm::GemmUniversalMode::kGrouped,
+      {num_experts, problem_sizes_as_shapes, nullptr},
+      mainloop_args,
+      epilogue_args,
+      hw_info,
+      scheduler};
+
+  size_t workspace_size = Gemm::get_workspace_size(args);
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, a.device());
+  const cudaStream_t stream = get_current_cuda_stream(a.get_device_index());
+
+  auto can_implement_status = gemm_op.can_implement(args);
+  STD_TORCH_CHECK(
+      can_implement_status == cutlass::Status::kSuccess,
+      "Failed to implement MXFP4 GEMM: status=", (int)can_implement_status);
+
+  // Run the GEMM
+  auto status = gemm_op.initialize(args, workspace.data_ptr());
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess,
+                  "Failed to initialize MXFP4 GEMM: status=", (int)status,
+                  " workspace_size=", workspace_size,
+                  " num_experts=", num_experts, " M=", M, " N=", N, " K=", K);
+
+  status = gemm_op.run(args, workspace.data_ptr(), stream);
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess,
+                  "Failed to run MXFP4 GEMM");
+}
+
+template <typename OutType>
+void run_mxfp4_blockwise_scaled_group_mm(
+    torch::stable::Tensor& output, const torch::stable::Tensor& a,
+    const torch::stable::Tensor& b, const torch::stable::Tensor& a_blockscale,
+    const torch::stable::Tensor& b_blockscales,
+    const torch::stable::Tensor& problem_sizes,
+    const torch::stable::Tensor& expert_offsets,
+    const torch::stable::Tensor& sf_offsets, int M, int N, int K) {
+  int32_t version_num = get_sm_version_num();
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
+  if (version_num >= 100 && version_num < 120) {
+    run_mxfp4_blockwise_scaled_group_mm_sm100<OutType>(
+        output, a, b, a_blockscale, b_blockscales, problem_sizes,
+        expert_offsets, sf_offsets, M, N, K);
+    return;
+  }
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled cutlass_mxfp4_group_mm kernel for CUDA device capability: ",
+      version_num, ". Required capability: 100");
+}
+
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
+constexpr auto MXFP4_FLOAT4_E2M1X2 = torch::headeronly::ScalarType::Byte;
+// E8M0 scale factors stored as uint8
+constexpr auto MXFP4_SF_DTYPE = torch::headeronly::ScalarType::Byte;
+#endif
+
+#define CHECK_TYPE(x, st, m)             \
+  STD_TORCH_CHECK(x.scalar_type() == st, \
+                  ": Inconsistency of torch::stable::Tensor type:", m)
+#define CHECK_TH_CUDA(x, m) \
+  STD_TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor.")
+#define CHECK_CONTIGUOUS(x, m) \
+  STD_TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous.")
+#define CHECK_INPUT(x, st, m) \
+  CHECK_TH_CUDA(x, m);        \
+  CHECK_CONTIGUOUS(x, m);     \
+  CHECK_TYPE(x, st, m)
+
+void cutlass_mxfp4_group_mm(torch::stable::Tensor& output,
+                            const torch::stable::Tensor& a,
+                            const torch::stable::Tensor& b,
+                            const torch::stable::Tensor& a_blockscale,
+                            const torch::stable::Tensor& b_blockscales,
+                            const torch::stable::Tensor& problem_sizes,
+                            const torch::stable::Tensor& expert_offsets,
+                            const torch::stable::Tensor& sf_offsets) {
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
+  // Input validation
+  CHECK_INPUT(a, MXFP4_FLOAT4_E2M1X2, "a");
+  CHECK_INPUT(b, MXFP4_FLOAT4_E2M1X2, "b");
+  // MXFP4 uses E8M0 scale factors (stored as uint8)
+  CHECK_INPUT(a_blockscale, MXFP4_SF_DTYPE, "a_blockscale");
+  CHECK_INPUT(b_blockscales, MXFP4_SF_DTYPE, "b_blockscales");
+
+  STD_TORCH_CHECK(
+      a_blockscale.dim() == 2,
+      "expected a_blockscale to be of shape [num_experts, rounded_m,"
+      " k // group_size], observed rank: ",
+      a_blockscale.dim())
+  STD_TORCH_CHECK(b_blockscales.dim() == 3,
+                  "expected b_blockscale to be of shape: "
+                  " [num_experts, n, k // group_size], observed rank: ",
+                  b_blockscales.dim())
+  STD_TORCH_CHECK(problem_sizes.dim() == 2,
+                  "problem_sizes must be a 2D tensor");
+  STD_TORCH_CHECK(problem_sizes.size(1) == 3,
+                  "problem_sizes must have the shape (num_experts, 3)");
+  STD_TORCH_CHECK(
+      problem_sizes.size(0) == expert_offsets.size(0),
+      "Number of experts in problem_sizes must match expert_offsets");
+  STD_TORCH_CHECK(
+      problem_sizes.scalar_type() == torch::headeronly::ScalarType::Int,
+      "problem_sizes must be int32.");
+
+  int M = static_cast<int>(a.size(0));
+  int N = static_cast<int>(b.size(1));
+  int E = static_cast<int>(b.size(0));
+  int K = static_cast<int>(2 * b.size(2));
+
+  if (output.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    run_mxfp4_blockwise_scaled_group_mm<cutlass::bfloat16_t>(
+        output, a, b, a_blockscale, b_blockscales, problem_sizes,
+        expert_offsets, sf_offsets, M, N, K);
+  } else {
+    run_mxfp4_blockwise_scaled_group_mm<cutlass::half_t>(
+        output, a, b, a_blockscale, b_blockscales, problem_sizes,
+        expert_offsets, sf_offsets, M, N, K);
+  }
+#else
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled cutlass_mxfp4_group_mm kernel; build vLLM with "
+      "SM100 block-scaled FP4 MoE (ENABLE_NVFP4_SM100) and CUDA 12.8+.");
+#endif
+}
+
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("cutlass_mxfp4_group_mm", TORCH_BOX(&cutlass_mxfp4_group_mm));
+}
diff --git a/csrc/libtorch_stable/quantization/fp4/mxfp4_experts_quant.cu b/csrc/libtorch_stable/quantization/fp4/mxfp4_experts_quant.cu
new file mode 100644
index 000000000000..78e4eda0c012
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/fp4/mxfp4_experts_quant.cu
@@ -0,0 +1,432 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+ *
+ * MXFP4 activation quantization kernel for MoE experts.
+ * Quantizes BF16/FP16 activations to MXFP4: E2M1 values with E8M0 block scales
+ * over 32-element groups.
+ *
+ * Uses PACK16 E2M1 conversion helpers (nvfp4_utils.cuh) configured for:
+ *   - Block size 32 (2 threads per SF in PACK16 mode)
+ *   - E8M0 (power-of-two) scale factors
+ *   - SF layout: [numMTiles, numKTiles, 32, 4, 4] where numKTiles=ceil(K/128)
+ */
+
+// MXFP4 requires PACK16 mode (16 elements per thread) so that
+// 2 threads cover 32-element blocks. This requires CUDA >= 12.9.
+// Must be defined before any header that (transitively) includes
+// nvfp4_utils.cuh.
+#define NVFP4_ENABLE_ELTS16 1
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cuda_runtime.h>
+#include <cuda_fp8.h>
+
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
+#include "cuda_vec_utils.cuh"
+#include "cuda_utils.h"
+
+#include "nvfp4_utils.cuh"
+static_assert(CVT_FP4_ELTS_PER_THREAD == 16,
+              "MXFP4 experts quant requires PACK16 mode (CUDA >= 12.9)");
+
+#include "launch_bounds_utils.h"
+
+namespace vllm {
+
+// MXFP4 block size constants
+static constexpr int MXFP4_SF_VEC_SIZE = 32;
+
+// For PACK16 mode (CVT_FP4_ELTS_PER_THREAD=16): 2 threads per SF
+// For PACK8 mode (CVT_FP4_ELTS_PER_THREAD=8): 4 threads per SF
+static constexpr int MXFP4_NUM_THREADS_PER_SF =
+    MXFP4_SF_VEC_SIZE / CVT_FP4_ELTS_PER_THREAD;
+
+// MXFP4 quantization kernel for experts.
+// Uses 32-element blocks with E8M0 (UE8M0) scale factors.
+// When FUSE_SILU_MUL=true, expects input with gate||up layout and fuses
+// SiLU(gate)*up before quantization.
+template <class Type, bool FUSE_SILU_MUL = false,
+          bool SMALL_NUM_EXPERTS = false>
+__global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
+    mxfp4_cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in,
+                          fp4_packed_t* out, uint32_t* SFout,
+                          uint32_t* input_offset_by_experts,
+                          uint32_t* output_scale_offset_by_experts,
+                          int n_experts, bool low_latency) {
+  using PackedVec = PackedVec<Type, CVT_FP4_PACK16>;
+  static_assert(sizeof(PackedVec) == sizeof(Type) * CVT_FP4_ELTS_PER_THREAD,
+                "Vec size is not matched.");
+
+  // MXFP4: numKTiles = ceil(numCols / 128) since block_size=32, 4 SFs/tile
+  int32_t const numKTiles = (numCols + 127) / 128;
+
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int colsPerRow = numCols / CVT_FP4_ELTS_PER_THREAD;
+  int inColsPerRow = FUSE_SILU_MUL ? colsPerRow * 2 : colsPerRow;
+
+  for (int globalIdx = tid; globalIdx < numRows * colsPerRow;
+       globalIdx += gridDim.x * blockDim.x) {
+    int rowIdx = globalIdx / colsPerRow;
+    int colIdx = globalIdx % colsPerRow;
+
+    int rowIdx_in_expert = 0;
+    int expert_idx = 0;
+
+    if constexpr (SMALL_NUM_EXPERTS) {
+      for (int i = 0; i < n_experts; i++) {
+        uint32_t current_offset = __ldca(&input_offset_by_experts[i]);
+        uint32_t next_offset = __ldca(&input_offset_by_experts[i + 1]);
+        if (rowIdx >= current_offset && rowIdx < next_offset) {
+          rowIdx_in_expert = rowIdx - current_offset;
+          expert_idx = i;
+          break;
+        }
+      }
+    } else {
+      uint32_t local_offsets[17];
+      for (int chunk_start = 0; chunk_start < n_experts; chunk_start += 16) {
+        *reinterpret_cast<int4*>(local_offsets) =
+            __ldca(reinterpret_cast<const int4*>(
+                &input_offset_by_experts[chunk_start]));
+        *reinterpret_cast<int4*>(local_offsets + 4) =
+            __ldca(reinterpret_cast<const int4*>(
+                &input_offset_by_experts[chunk_start + 4]));
+        *reinterpret_cast<int4*>(local_offsets + 8) =
+            __ldca(reinterpret_cast<const int4*>(
+                &input_offset_by_experts[chunk_start + 8]));
+        *reinterpret_cast<int4*>(local_offsets + 12) =
+            __ldca(reinterpret_cast<const int4*>(
+                &input_offset_by_experts[chunk_start + 12]));
+        local_offsets[16] = __ldca(&input_offset_by_experts[chunk_start + 16]);
+
+#pragma unroll
+        for (int i = 0; i < 16; i++) {
+          if (rowIdx >= local_offsets[i] && rowIdx < local_offsets[i + 1]) {
+            rowIdx_in_expert = rowIdx - local_offsets[i];
+            expert_idx = chunk_start + i;
+            break;
+          }
+        }
+      }
+    }
+
+    // Load input and optionally apply fused SiLU+Mul
+    int64_t inOffset = rowIdx * inColsPerRow + colIdx;
+    PackedVec in_vec = reinterpret_cast<PackedVec const*>(in)[inOffset];
+    PackedVec quant_input;
+    if constexpr (FUSE_SILU_MUL) {
+      PackedVec in_vec_up =
+          reinterpret_cast<PackedVec const*>(in)[inOffset + colsPerRow];
+      quant_input = compute_silu_mul(in_vec, in_vec_up);
+    } else {
+      quant_input = in_vec;
+    }
+
+    // In PACK16 mode, each thread outputs 16 E2M1 values = u32x2
+    int64_t outOffset = rowIdx * colsPerRow + colIdx;
+    auto& out_pos = out[outOffset];
+
+    uint32_t* SFout_in_expert =
+        SFout + output_scale_offset_by_experts[expert_idx] * numKTiles;
+
+    // Use MXFP4_NUM_THREADS_PER_SF (2 for PACK16) for 32-element blocks
+    auto sf_out =
+        cvt_quant_to_fp4_get_sf_out_offset<uint32_t, MXFP4_NUM_THREADS_PER_SF>(
+            rowIdx_in_expert, colIdx, numKTiles, SFout_in_expert);
+
+    // Block E8M0 scales only; no extra tensor-level scale in this path
+    constexpr float SFScaleVal = 1.0f;
+    // UE8M0_SF=true for MXFP4 E8M0 scale factors
+    out_pos =
+        cvt_warp_fp16_to_fp4<Type, MXFP4_NUM_THREADS_PER_SF, /*UE8M0_SF=*/true>(
+            quant_input, SFScaleVal, sf_out);
+  }
+}
+
+// Large M_topk variant using shared memory for expert offsets
+template <class Type, bool FUSE_SILU_MUL = false,
+          bool SMALL_NUM_EXPERTS = false>
+__global__ void __launch_bounds__(1024, VLLM_BLOCKS_PER_SM(1024))
+    mxfp4_cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in,
+                          fp4_packed_t* out, uint32_t* SFout,
+                          uint32_t* input_offset_by_experts,
+                          uint32_t* output_scale_offset_by_experts,
+                          int n_experts) {
+  using PackedVec = PackedVec<Type, CVT_FP4_PACK16>;
+  static_assert(sizeof(PackedVec) == sizeof(Type) * CVT_FP4_ELTS_PER_THREAD,
+                "Vec size is not matched.");
+
+  // MXFP4: numKTiles = ceil(numCols / 128)
+  int32_t const numKTiles = (numCols + 127) / 128;
+
+  extern __shared__ uint32_t shared_input_offsets[];
+
+  if constexpr (SMALL_NUM_EXPERTS) {
+    for (int i = threadIdx.x; i < n_experts + 1; i += blockDim.x) {
+      shared_input_offsets[i] = input_offset_by_experts[i];
+    }
+  } else {
+    for (int i = threadIdx.x * 4; i < n_experts; i += blockDim.x * 4) {
+      *reinterpret_cast<int4*>(&shared_input_offsets[i]) =
+          *reinterpret_cast<const int4*>(&input_offset_by_experts[i]);
+    }
+    if (threadIdx.x == 0) {
+      shared_input_offsets[n_experts] = input_offset_by_experts[n_experts];
+    }
+  }
+
+  __syncthreads();
+
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int colsPerRow = numCols / CVT_FP4_ELTS_PER_THREAD;
+  int inColsPerRow = FUSE_SILU_MUL ? colsPerRow * 2 : colsPerRow;
+
+  for (int globalIdx = tid; globalIdx < numRows * colsPerRow;
+       globalIdx += gridDim.x * blockDim.x) {
+    int rowIdx = globalIdx / colsPerRow;
+    int colIdx = globalIdx % colsPerRow;
+
+    int rowIdx_in_expert = 0;
+    int expert_idx = 0;
+
+    // Binary search through experts using shared memory
+    int left = 0, right = n_experts - 1;
+    while (left <= right) {
+      int mid = (left + right) / 2;
+      uint32_t mid_offset = shared_input_offsets[mid];
+      uint32_t next_offset = shared_input_offsets[mid + 1];
+
+      if (rowIdx >= mid_offset && rowIdx < next_offset) {
+        rowIdx_in_expert = rowIdx - mid_offset;
+        expert_idx = mid;
+        break;
+      } else if (rowIdx < mid_offset) {
+        right = mid - 1;
+      } else {
+        left = mid + 1;
+      }
+    }
+
+    int64_t inOffset = rowIdx * inColsPerRow + colIdx;
+    PackedVec in_vec = reinterpret_cast<PackedVec const*>(in)[inOffset];
+    PackedVec quant_input;
+    if constexpr (FUSE_SILU_MUL) {
+      PackedVec in_vec_up =
+          reinterpret_cast<PackedVec const*>(in)[inOffset + colsPerRow];
+      quant_input = compute_silu_mul(in_vec, in_vec_up);
+    } else {
+      quant_input = in_vec;
+    }
+
+    int64_t outOffset = rowIdx * colsPerRow + colIdx;
+    auto& out_pos = out[outOffset];
+
+    // MXFP4 has no global scale - only block-level E8M0 scale factors
+    constexpr float SFScaleVal = 1.0f;
+
+    uint32_t* SFout_in_expert =
+        SFout + output_scale_offset_by_experts[expert_idx] * numKTiles;
+
+    auto sf_out =
+        cvt_quant_to_fp4_get_sf_out_offset<uint32_t, MXFP4_NUM_THREADS_PER_SF>(
+            rowIdx_in_expert, colIdx, numKTiles, SFout_in_expert);
+
+    out_pos =
+        cvt_warp_fp16_to_fp4<Type, MXFP4_NUM_THREADS_PER_SF, /*UE8M0_SF=*/true>(
+            quant_input, SFScaleVal, sf_out);
+  }
+}
+
+template <typename T, bool FUSE_SILU_MUL = false>
+void mxfp4_quant_impl(void* output, void* output_scale, void* input,
+                      void* input_offset_by_experts,
+                      void* output_scale_offset_by_experts, int m_topk, int k,
+                      int n_experts, cudaStream_t stream) {
+  int multiProcessorCount =
+      get_device_attribute(cudaDevAttrMultiProcessorCount, -1);
+
+  int const workSizePerRow = k / ELTS_PER_THREAD;
+  int const totalWorkSize = m_topk * workSizePerRow;
+  dim3 block(std::min(workSizePerRow, 512));
+  int const numBlocksPerSM =
+      vllm_runtime_blocks_per_sm(static_cast<int>(block.x));
+  dim3 grid(std::min(static_cast<int>((totalWorkSize + block.x - 1) / block.x),
+                     multiProcessorCount * numBlocksPerSM));
+  while (grid.x <= multiProcessorCount && block.x > 64) {
+    grid.x *= 2;
+    block.x = (block.x + 1) / 2;
+  }
+
+  int const blockRepeat =
+      (totalWorkSize + block.x * grid.x - 1) / (block.x * grid.x);
+  if (blockRepeat > 1) {
+    size_t shared_mem_size = (n_experts + 1) * sizeof(uint32_t);
+    if (n_experts >= 4) {
+      mxfp4_cvt_fp16_to_fp4<T, FUSE_SILU_MUL, false>
+          <<<grid, block, shared_mem_size, stream>>>(
+              m_topk, k, reinterpret_cast<T*>(input),
+              reinterpret_cast<fp4_packed_t*>(output),
+              reinterpret_cast<uint32_t*>(output_scale),
+              reinterpret_cast<uint32_t*>(input_offset_by_experts),
+              reinterpret_cast<uint32_t*>(output_scale_offset_by_experts),
+              n_experts);
+    } else {
+      mxfp4_cvt_fp16_to_fp4<T, FUSE_SILU_MUL, true>
+          <<<grid, block, shared_mem_size, stream>>>(
+              m_topk, k, reinterpret_cast<T*>(input),
+              reinterpret_cast<fp4_packed_t*>(output),
+              reinterpret_cast<uint32_t*>(output_scale),
+              reinterpret_cast<uint32_t*>(input_offset_by_experts),
+              reinterpret_cast<uint32_t*>(output_scale_offset_by_experts),
+              n_experts);
+    }
+  } else {
+    if (n_experts >= 16) {
+      mxfp4_cvt_fp16_to_fp4<T, FUSE_SILU_MUL, false>
+          <<<grid, block, 0, stream>>>(
+              m_topk, k, reinterpret_cast<T*>(input),
+              reinterpret_cast<fp4_packed_t*>(output),
+              reinterpret_cast<uint32_t*>(output_scale),
+              reinterpret_cast<uint32_t*>(input_offset_by_experts),
+              reinterpret_cast<uint32_t*>(output_scale_offset_by_experts),
+              n_experts, /* bool low_latency */ true);
+    } else {
+      mxfp4_cvt_fp16_to_fp4<T, FUSE_SILU_MUL, true><<<grid, block, 0, stream>>>(
+          m_topk, k, reinterpret_cast<T*>(input),
+          reinterpret_cast<fp4_packed_t*>(output),
+          reinterpret_cast<uint32_t*>(output_scale),
+          reinterpret_cast<uint32_t*>(input_offset_by_experts),
+          reinterpret_cast<uint32_t*>(output_scale_offset_by_experts),
+          n_experts, /* bool low_latency */ true);
+    }
+  }
+}
+
+}  // namespace vllm
+
+/*Quantization entry for mxfp4 experts quantization*/
+#define CHECK_TH_CUDA(x, m) \
+  STD_TORCH_CHECK(x.is_cuda(), m, "must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x, m) \
+  STD_TORCH_CHECK(x.is_contiguous(), m, "must be contiguous")
+#define CHECK_INPUT(x, m) \
+  CHECK_TH_CUDA(x, m);    \
+  CHECK_CONTIGUOUS(x, m);
+
+constexpr auto HALF = torch::headeronly::ScalarType::Half;
+constexpr auto BF16 = torch::headeronly::ScalarType::BFloat16;
+constexpr auto INT = torch::headeronly::ScalarType::Int;
+constexpr auto UINT8 = torch::headeronly::ScalarType::Byte;
+
+static constexpr int MXFP4_BLOCK_SIZE = 32;
+
+static void validate_mxfp4_experts_quant_inputs(
+    torch::stable::Tensor const& output,
+    torch::stable::Tensor const& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts,
+    int64_t n_experts, int64_t m_topk, int64_t k) {
+  CHECK_INPUT(output, "output");
+  CHECK_INPUT(output_scale, "output_scale");
+  CHECK_INPUT(input, "input");
+  CHECK_INPUT(input_offset_by_experts, "input_offset_by_experts");
+  CHECK_INPUT(output_scale_offset_by_experts, "output_scale_offset_by_experts");
+
+  STD_TORCH_CHECK(output.dim() == 2);
+  STD_TORCH_CHECK(output_scale.dim() == 2);
+  STD_TORCH_CHECK(input.dim() == 2);
+  STD_TORCH_CHECK(input_offset_by_experts.dim() == 1);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.dim() == 1);
+
+  STD_TORCH_CHECK(input.scalar_type() == HALF || input.scalar_type() == BF16);
+  STD_TORCH_CHECK(input_offset_by_experts.scalar_type() == INT);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.scalar_type() == INT);
+  // output is uint8 (two mxfp4 values packed into one uint8)
+  // output_scale is int32 (four E8M0 values packed into one int32)
+  STD_TORCH_CHECK(output.scalar_type() == UINT8);
+  STD_TORCH_CHECK(output_scale.scalar_type() == INT);
+
+  STD_TORCH_CHECK(k % MXFP4_BLOCK_SIZE == 0, "k must be a multiple of 32");
+  STD_TORCH_CHECK(input_offset_by_experts.size(0) == n_experts + 1);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.size(0) == n_experts + 1);
+  STD_TORCH_CHECK(output.size(0) == m_topk);
+  STD_TORCH_CHECK(output.size(1) == k / 2);
+  int scales_k = k / MXFP4_BLOCK_SIZE;
+  // K-dimension scale columns padded to a multiple of 4 for swizzle layout
+  int padded_k = (scales_k + (4 - 1)) / 4 * 4;
+  // 4 = 4 E8M0 values packed into one int32
+  STD_TORCH_CHECK(output_scale.size(1) * 4 == padded_k);
+}
+
+void mxfp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts,
+    int64_t n_experts) {
+  auto m_topk = input.size(0);
+  auto k = input.size(1);
+
+  validate_mxfp4_experts_quant_inputs(
+      output, output_scale, input, input_offset_by_experts,
+      output_scale_offset_by_experts, n_experts, m_topk, k);
+
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(input.get_device_index());
+
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
+      input.scalar_type(), "mxfp4_experts_quant_kernel", [&] {
+        using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
+        vllm::mxfp4_quant_impl<cuda_type, /*FUSE_SILU_MUL=*/false>(
+            output.data_ptr(), output_scale.data_ptr(), input.data_ptr(),
+            input_offset_by_experts.data_ptr(),
+            output_scale_offset_by_experts.data_ptr(), m_topk, k, n_experts,
+            stream);
+      });
+}
+
+void silu_and_mul_mxfp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts,
+    int64_t n_experts) {
+  auto m_topk = input.size(0);
+  auto k_times_2 = input.size(1);
+  STD_TORCH_CHECK(k_times_2 % 2 == 0, "input width must be even (gate || up)");
+  auto k = k_times_2 / 2;
+
+  validate_mxfp4_experts_quant_inputs(
+      output, output_scale, input, input_offset_by_experts,
+      output_scale_offset_by_experts, n_experts, m_topk, k);
+
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(input.get_device_index());
+
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
+      input.scalar_type(), "silu_mul_mxfp4_experts_quant_kernel", [&] {
+        using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
+        vllm::mxfp4_quant_impl<cuda_type, /*FUSE_SILU_MUL=*/true>(
+            output.data_ptr(), output_scale.data_ptr(), input.data_ptr(),
+            input_offset_by_experts.data_ptr(),
+            output_scale_offset_by_experts.data_ptr(), m_topk, k, n_experts,
+            stream);
+      });
+}
+
+// Registered here (not torch_bindings.cpp) because VLLM_GPU_FLAGS is applied
+// only under COMPILE_LANGUAGE:CUDA, so ENABLE_NVFP4_SM100 is invisible to
+// .cpp files and cannot gate the registration from there.
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("mxfp4_experts_quant", TORCH_BOX(&mxfp4_experts_quant));
+  m.impl("silu_and_mul_mxfp4_experts_quant",
+         TORCH_BOX(&silu_and_mul_mxfp4_experts_quant));
+}
diff --git a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
similarity index 66%
rename from csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
rename to csrc/libtorch_stable/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
index ae8ef1bf99d6..b22308d25cae 100644
--- a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
@@ -14,14 +14,12 @@
  * limitations under the License.
  */
 
-#include "core/registration.h"
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
 
-#include <torch/all.h>
 #include <cutlass/arch/arch.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <c10/cuda/CUDAStream.h>
 #include "cutlass_extensions/common.hpp"
 
 #include "cute/tensor.hpp"
@@ -122,7 +120,7 @@ __global__ void __get_group_gemm_starts(
 #define __CALL_GET_STARTS_KERNEL_BLOCKSCALE(ELEMENT_AB_TYPE, SF_TYPE,         \
                                             TENSOR_C_TYPE, C_TYPE, LayoutSFA, \
                                             LayoutSFB, ScaleConfig)           \
-  else if (out_tensors.dtype() == TENSOR_C_TYPE) {                            \
+  else if (out_tensors.scalar_type() == TENSOR_C_TYPE) {                      \
     __get_group_gemm_starts<ELEMENT_AB_TYPE, C_TYPE, SF_TYPE, float,          \
                             LayoutSFA, LayoutSFB, ScaleConfig>                \
         <<<1, num_experts, 0, stream>>>(                                      \
@@ -150,50 +148,64 @@ __global__ void __get_group_gemm_starts(
   }
 
 template <typename LayoutSFA, typename LayoutSFB, typename ScaleConfig>
-void run_get_group_gemm_starts(
-    const torch::Tensor& a_starts, const torch::Tensor& b_starts,
-    const torch::Tensor& out_starts, const torch::Tensor& a_scales_starts,
-    const torch::Tensor& b_scales_starts, const torch::Tensor& alpha_starts,
-    const torch::Tensor& layout_sfa, const torch::Tensor& layout_sfb,
-    const torch::Tensor& a_strides, const torch::Tensor& b_strides,
-    const torch::Tensor& c_strides, int64_t a_stride_val, int64_t b_stride_val,
-    int64_t c_stride_val,
-    /*these are used for their base addresses*/
-    torch::Tensor const& a_tensors, torch::Tensor const& b_tensors,
-    torch::Tensor const& out_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& alphas,
-    torch::Tensor const& expert_offsets, torch::Tensor const& sf_offsets,
-    torch::Tensor const& problem_sizes, int M, int N, int K) {
+void run_get_group_gemm_starts(const torch::stable::Tensor& a_starts,
+                               const torch::stable::Tensor& b_starts,
+                               const torch::stable::Tensor& out_starts,
+                               const torch::stable::Tensor& a_scales_starts,
+                               const torch::stable::Tensor& b_scales_starts,
+                               const torch::stable::Tensor& alpha_starts,
+                               const torch::stable::Tensor& layout_sfa,
+                               const torch::stable::Tensor& layout_sfb,
+                               const torch::stable::Tensor& a_strides,
+                               const torch::stable::Tensor& b_strides,
+                               const torch::stable::Tensor& c_strides,
+                               int64_t a_stride_val, int64_t b_stride_val,
+                               int64_t c_stride_val,
+                               /*these are used for their base addresses*/
+                               torch::stable::Tensor const& a_tensors,
+                               torch::stable::Tensor const& b_tensors,
+                               torch::stable::Tensor const& out_tensors,
+                               torch::stable::Tensor const& a_scales,
+                               torch::stable::Tensor const& b_scales,
+                               torch::stable::Tensor const& alphas,
+                               torch::stable::Tensor const& expert_offsets,
+                               torch::stable::Tensor const& sf_offsets,
+                               torch::stable::Tensor const& problem_sizes,
+                               int M, int N, int K) {
   int num_experts = (int)expert_offsets.size(0);
-  auto stream = at::cuda::getCurrentCUDAStream(a_tensors.device().index());
+  auto stream = get_current_cuda_stream(a_tensors.get_device_index());
 
-  TORCH_CHECK(out_tensors.size(1) == N,
-              "Output tensor shape doesn't match expected shape");
-  TORCH_CHECK(K / 2 == b_tensors.size(2),
-              "b_tensors(dim = 2) and a_tensors(dim = 1) trailing"
-              " dimension must match");
+  STD_TORCH_CHECK(out_tensors.size(1) == N,
+                  "Output tensor shape doesn't match expected shape");
+  STD_TORCH_CHECK(K / 2 == b_tensors.size(2),
+                  "b_tensors(dim = 2) and a_tensors(dim = 1) trailing"
+                  " dimension must match");
   if (false) {
   }
   //(ELEMENT_AB_TYPE, BS_TYPE, TENSOR_C_TYPE, C_TYPE, LayoutSFA, LayoutSFB,
   // ScaleConfig)
   __CALL_GET_STARTS_KERNEL_BLOCKSCALE(
-      cutlass::float_e2m1_t, cutlass::float_ue4m3_t, torch::kBFloat16,
-      cutlass::bfloat16_t, LayoutSFA, LayoutSFB, ScaleConfig)
+      cutlass::float_e2m1_t, cutlass::float_ue4m3_t,
+      torch::headeronly::ScalarType::BFloat16, cutlass::bfloat16_t, LayoutSFA,
+      LayoutSFB, ScaleConfig)
   __CALL_GET_STARTS_KERNEL_BLOCKSCALE(cutlass::float_e2m1_t,
-                                      cutlass::float_ue4m3_t, torch::kFloat16,
-                                      half, LayoutSFA, LayoutSFB, ScaleConfig)
+                                      cutlass::float_ue4m3_t,
+                                      torch::headeronly::ScalarType::Half, half,
+                                      LayoutSFA, LayoutSFB, ScaleConfig)
   else {
-    TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
+    STD_TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
   }
 }
 
 template <typename OutType>
 void run_fp4_blockwise_scaled_group_mm_sm100(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
-    const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
-    const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets, int M,
-    int N, int K) {
+    torch::stable::Tensor& output, const torch::stable::Tensor& a,
+    const torch::stable::Tensor& b, const torch::stable::Tensor& a_blockscale,
+    const torch::stable::Tensor& b_blockscales,
+    const torch::stable::Tensor& alphas,
+    const torch::stable::Tensor& problem_sizes,
+    const torch::stable::Tensor& expert_offsets,
+    const torch::stable::Tensor& sf_offsets, int M, int N, int K) {
   using ProblemShape =
       cutlass::gemm::GroupProblemShape<Shape<int32_t, int32_t, int32_t>>;
   using ElementType = cutlass::float_e2m1_t;
@@ -272,20 +284,40 @@ void run_fp4_blockwise_scaled_group_mm_sm100(
 
   using UnderlyingProblemShape = ProblemShape::UnderlyingProblemShape;
   int num_experts = static_cast<int>(expert_offsets.size(0));
-  auto options_int =
-      torch::TensorOptions().dtype(torch::kInt64).device(a.device());
-
-  torch::Tensor a_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor out_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor a_scales_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_scales_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor alpha_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor layout_sfa = torch::empty({num_experts, 5}, options_int);
-  torch::Tensor layout_sfb = torch::empty({num_experts, 5}, options_int);
-  torch::Tensor a_strides1 = torch::empty(num_experts, options_int);
-  torch::Tensor b_strides1 = torch::empty(num_experts, options_int);
-  torch::Tensor c_strides1 = torch::empty(num_experts, options_int);
+
+  torch::stable::Tensor a_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor out_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor a_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor alpha_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor layout_sfa = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor layout_sfb = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor a_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor c_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
 
   run_get_group_gemm_starts<LayoutSFA, LayoutSFB, ScaleConfig>(
       a_ptrs, b_ptrs, out_ptrs, a_scales_ptrs, b_scales_ptrs, alpha_ptrs,
@@ -308,7 +340,7 @@ void run_fp4_blockwise_scaled_group_mm_sm100(
           typename ProblemShape::UnderlyingProblemShape>::RasterOrderOptions;
   typename Gemm::GemmKernel::TileSchedulerArguments scheduler;
   scheduler.raster_order = RasterOrderOptions::AlongM;
-  hw_info.device_id = a.get_device();
+  hw_info.device_id = a.get_device_index();
   static std::unordered_map<int, int> cached_sm_counts;
   if (cached_sm_counts.find(hw_info.device_id) == cached_sm_counts.end()) {
     cached_sm_counts[hw_info.device_id] =
@@ -350,32 +382,35 @@ void run_fp4_blockwise_scaled_group_mm_sm100(
       scheduler};
 
   size_t workspace_size = Gemm::get_workspace_size(args);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(a.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(a.get_device());
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, a.device());
+  const cudaStream_t stream = get_current_cuda_stream(a.get_device_index());
 
   auto can_implement_status = gemm_op.can_implement(args);
-  TORCH_CHECK(can_implement_status == cutlass::Status::kSuccess,
-              "Failed to implement GEMM: status=", (int)can_implement_status);
+  STD_TORCH_CHECK(
+      can_implement_status == cutlass::Status::kSuccess,
+      "Failed to implement GEMM: status=", (int)can_implement_status);
 
   // Run the GEMM
   auto status = gemm_op.initialize(args, workspace.data_ptr());
-  TORCH_CHECK(status == cutlass::Status::kSuccess,
-              "Failed to initialize GEMM: status=", (int)status,
-              " workspace_size=", workspace_size, " num_experts=", num_experts,
-              " M=", M, " N=", N, " K=", K);
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess,
+                  "Failed to initialize GEMM: status=", (int)status,
+                  " workspace_size=", workspace_size,
+                  " num_experts=", num_experts, " M=", M, " N=", N, " K=", K);
 
   status = gemm_op.run(args, workspace.data_ptr(), stream);
-  TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
 }
 
 void run_fp4_blockwise_scaled_group_mm_sm120(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
-    const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
-    const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets, int M,
-    int N, int K) {
+    torch::stable::Tensor& output, const torch::stable::Tensor& a,
+    const torch::stable::Tensor& b, const torch::stable::Tensor& a_blockscale,
+    const torch::stable::Tensor& b_blockscales,
+    const torch::stable::Tensor& alphas,
+    const torch::stable::Tensor& problem_sizes,
+    const torch::stable::Tensor& expert_offsets,
+    const torch::stable::Tensor& sf_offsets, int M, int N, int K) {
   using ProblemShape =
       cutlass::gemm::GroupProblemShape<Shape<int32_t, int32_t, int32_t>>;
   using ElementType = cutlass::float_e2m1_t;
@@ -446,20 +481,40 @@ void run_fp4_blockwise_scaled_group_mm_sm120(
 
   using UnderlyingProblemShape = ProblemShape::UnderlyingProblemShape;
   int num_experts = static_cast<int>(expert_offsets.size(0));
-  auto options_int =
-      torch::TensorOptions().dtype(torch::kInt64).device(a.device());
-
-  torch::Tensor a_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor out_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor a_scales_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_scales_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor alpha_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor layout_sfa = torch::empty({num_experts, 5}, options_int);
-  torch::Tensor layout_sfb = torch::empty({num_experts, 5}, options_int);
-  torch::Tensor a_strides1 = torch::empty(num_experts, options_int);
-  torch::Tensor b_strides1 = torch::empty(num_experts, options_int);
-  torch::Tensor c_strides1 = torch::empty(num_experts, options_int);
+
+  torch::stable::Tensor a_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor out_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor a_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_scales_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor alpha_ptrs =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor layout_sfa = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor layout_sfb = torch::stable::empty(
+      {num_experts, 5}, torch::headeronly::ScalarType::Long, std::nullopt,
+      a.device());
+  torch::stable::Tensor a_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor b_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
+  torch::stable::Tensor c_strides1 =
+      torch::stable::empty(num_experts, torch::headeronly::ScalarType::Long,
+                           std::nullopt, a.device());
 
   run_get_group_gemm_starts<LayoutSFA, LayoutSFB, ScaleConfig>(
       a_ptrs, b_ptrs, out_ptrs, a_scales_ptrs, b_scales_ptrs, alpha_ptrs,
@@ -480,7 +535,7 @@ void run_fp4_blockwise_scaled_group_mm_sm120(
   using RasterOrderOptions = cutlass::gemm::kernel::detail::RasterOrderOptions;
   typename Gemm::GemmKernel::TileSchedulerArguments scheduler;
   scheduler.raster_order = RasterOrderOptions::AlongM;
-  hw_info.device_id = a.get_device();
+  hw_info.device_id = a.get_device_index();
   static std::unordered_map<int, int> cached_sm_counts;
   if (cached_sm_counts.find(hw_info.device_id) == cached_sm_counts.end()) {
     cached_sm_counts[hw_info.device_id] =
@@ -523,33 +578,36 @@ void run_fp4_blockwise_scaled_group_mm_sm120(
       scheduler};
 
   size_t workspace_size = Gemm::get_workspace_size(args);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(a.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(a.get_device());
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, a.device());
+  const cudaStream_t stream = get_current_cuda_stream(a.get_device_index());
 
   auto can_implement_status = gemm_op.can_implement(args);
-  TORCH_CHECK(can_implement_status == cutlass::Status::kSuccess,
-              "Failed to implement GEMM: status=", (int)can_implement_status);
+  STD_TORCH_CHECK(
+      can_implement_status == cutlass::Status::kSuccess,
+      "Failed to implement GEMM: status=", (int)can_implement_status);
 
   // Run the GEMM
   auto status = gemm_op.initialize(args, workspace.data_ptr());
-  TORCH_CHECK(status == cutlass::Status::kSuccess,
-              "Failed to initialize GEMM: status=", (int)status,
-              " workspace_size=", workspace_size, " num_experts=", num_experts,
-              " M=", M, " N=", N, " K=", K);
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess,
+                  "Failed to initialize GEMM: status=", (int)status,
+                  " workspace_size=", workspace_size,
+                  " num_experts=", num_experts, " M=", M, " N=", N, " K=", K);
 
   status = gemm_op.run(args, workspace.data_ptr(), stream);
-  TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
+  STD_TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
 }
 
 template <typename OutType>
 void run_fp4_blockwise_scaled_group_mm(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
-    const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
-    const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets, int M,
-    int N, int K) {
+    torch::stable::Tensor& output, const torch::stable::Tensor& a,
+    const torch::stable::Tensor& b, const torch::stable::Tensor& a_blockscale,
+    const torch::stable::Tensor& b_blockscales,
+    const torch::stable::Tensor& alphas,
+    const torch::stable::Tensor& problem_sizes,
+    const torch::stable::Tensor& expert_offsets,
+    const torch::stable::Tensor& sf_offsets, int M, int N, int K) {
   int32_t version_num = get_sm_version_num();
 #if defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
   if (version_num >= 120 && version_num < 130) {
@@ -567,7 +625,7 @@ void run_fp4_blockwise_scaled_group_mm(
     return;
   }
 #endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
       false,
       "No compiled cutlass_fp4_group_mm kernel for CUDA device capability: ",
       version_num, ". Required capability: 100 or 120");
@@ -575,26 +633,31 @@ void run_fp4_blockwise_scaled_group_mm(
 
 #if (defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100) || \
     (defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120)
-constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
-constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
+constexpr auto FLOAT4_E2M1X2 = torch::headeronly::ScalarType::Byte;
+constexpr auto SF_DTYPE = torch::headeronly::ScalarType::Float8_e4m3fn;
 #endif
 
-#define CHECK_TYPE(x, st, m) \
-  TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)
+#define CHECK_TYPE(x, st, m)             \
+  STD_TORCH_CHECK(x.scalar_type() == st, \
+                  ": Inconsistency of torch::stable::Tensor type:", m)
 #define CHECK_TH_CUDA(x, m) \
-  TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor.")
+  STD_TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor.")
 #define CHECK_CONTIGUOUS(x, m) \
-  TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous.")
+  STD_TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous.")
 #define CHECK_INPUT(x, st, m) \
   CHECK_TH_CUDA(x, m);        \
   CHECK_CONTIGUOUS(x, m);     \
   CHECK_TYPE(x, st, m)
 
-void cutlass_fp4_group_mm(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
-    const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
-    const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets) {
+void cutlass_fp4_group_mm(torch::stable::Tensor& output,
+                          const torch::stable::Tensor& a,
+                          const torch::stable::Tensor& b,
+                          const torch::stable::Tensor& a_blockscale,
+                          const torch::stable::Tensor& b_blockscales,
+                          const torch::stable::Tensor& alphas,
+                          const torch::stable::Tensor& problem_sizes,
+                          const torch::stable::Tensor& expert_offsets,
+                          const torch::stable::Tensor& sf_offsets) {
 #if (defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100) || \
     (defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120)
   // Input validation
@@ -602,30 +665,34 @@ void cutlass_fp4_group_mm(
   CHECK_INPUT(b, FLOAT4_E2M1X2, "b");
   CHECK_INPUT(a_blockscale, SF_DTYPE, "a_blockscale");
   CHECK_INPUT(b_blockscales, SF_DTYPE, "b_blockscales");
-  CHECK_INPUT(alphas, at::ScalarType::Float, "alphas");
-
-  TORCH_CHECK(a_blockscale.dim() == 2,
-              "expected a_blockscale to be of shape [num_experts, rounded_m,"
-              " k // group_size], observed rank: ",
-              a_blockscale.dim())
-  TORCH_CHECK(b_blockscales.dim() == 3,
-              "expected b_blockscale to be of shape: "
-              " [num_experts, n, k // group_size], observed rank: ",
-              b_blockscales.dim())
-  TORCH_CHECK(problem_sizes.dim() == 2, "problem_sizes must be  a 2D tensor");
-  TORCH_CHECK(problem_sizes.size(1) == 3,
-              "problem_sizes must have the shape (num_experts, 3)");
-  TORCH_CHECK(problem_sizes.size(0) == expert_offsets.size(0),
-              "Number of experts in problem_sizes must match expert_offsets");
-  TORCH_CHECK(problem_sizes.dtype() == torch::kInt32,
-              "problem_sizes must be int32.");
+  CHECK_INPUT(alphas, torch::headeronly::ScalarType::Float, "alphas");
+
+  STD_TORCH_CHECK(
+      a_blockscale.dim() == 2,
+      "expected a_blockscale to be of shape [num_experts, rounded_m,"
+      " k // group_size], observed rank: ",
+      a_blockscale.dim())
+  STD_TORCH_CHECK(b_blockscales.dim() == 3,
+                  "expected b_blockscale to be of shape: "
+                  " [num_experts, n, k // group_size], observed rank: ",
+                  b_blockscales.dim())
+  STD_TORCH_CHECK(problem_sizes.dim() == 2,
+                  "problem_sizes must be  a 2D tensor");
+  STD_TORCH_CHECK(problem_sizes.size(1) == 3,
+                  "problem_sizes must have the shape (num_experts, 3)");
+  STD_TORCH_CHECK(
+      problem_sizes.size(0) == expert_offsets.size(0),
+      "Number of experts in problem_sizes must match expert_offsets");
+  STD_TORCH_CHECK(
+      problem_sizes.scalar_type() == torch::headeronly::ScalarType::Int,
+      "problem_sizes must be int32.");
 
   int M = static_cast<int>(a.size(0));
   int N = static_cast<int>(b.size(1));
   int E = static_cast<int>(b.size(0));
   int K = static_cast<int>(2 * b.size(2));
 
-  if (output.scalar_type() == torch::kBFloat16) {
+  if (output.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     run_fp4_blockwise_scaled_group_mm<cutlass::bfloat16_t>(
         output, a, b, a_blockscale, b_blockscales, alphas, problem_sizes,
         expert_offsets, sf_offsets, M, N, K);
@@ -633,7 +700,7 @@ void cutlass_fp4_group_mm(
   #if defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
     int32_t version_num = get_sm_version_num();
     if (version_num >= 120 && version_num < 130) {
-      TORCH_CHECK_NOT_IMPLEMENTED(
+      STD_TORCH_CHECK_NOT_IMPLEMENTED(
           false, "SM120 NVFP4 MOE only supports bfloat16 output, got: ",
           output.scalar_type());
     }
@@ -643,7 +710,7 @@ void cutlass_fp4_group_mm(
         expert_offsets, sf_offsets, M, N, K);
   }
 #else
-  TORCH_CHECK_NOT_IMPLEMENTED(
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
       false,
       "No compiled cutlass_fp4_group_mm kernel, vLLM must "
       "be compiled with ENABLE_NVFP4_SM100 or ENABLE_NVFP4_SM120 for SM100/120 "
@@ -651,6 +718,6 @@ void cutlass_fp4_group_mm(
 #endif
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("cutlass_fp4_group_mm", &cutlass_fp4_group_mm);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("cutlass_fp4_group_mm", TORCH_BOX(&cutlass_fp4_group_mm));
 }
diff --git a/csrc/quantization/fp4/nvfp4_experts_quant.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_experts_quant.cu
similarity index 82%
rename from csrc/quantization/fp4/nvfp4_experts_quant.cu
rename to csrc/libtorch_stable/quantization/fp4/nvfp4_experts_quant.cu
index 3162b6cdb8a9..744ae4f73112 100644
--- a/csrc/quantization/fp4/nvfp4_experts_quant.cu
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_experts_quant.cu
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
+#include "libtorch_stable/torch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
+#include "cuda_vec_utils.cuh"
 
 #include <cuda_runtime_api.h>
 #include <cuda_runtime.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
 #include <cuda_fp8.h>
-#include "dispatch_utils.h"
 
 #include "cuda_utils.h"
 #include "nvfp4_utils.cuh"
@@ -278,7 +277,9 @@ void quant_impl(void* output, void* output_scale, void* input,
       (totalWorkSize + block.x * grid.x - 1) / (block.x * grid.x);
   if (blockRepeat > 1) {
     size_t shared_mem_size = (n_experts + 1) * sizeof(uint32_t);
-    if (n_experts >= 4) {
+    // The shared-memory vectorized offset load only handles full 4-expert
+    // chunks. Use the scalar specialization for the remainder cases.
+    if (n_experts >= 4 && n_experts % 4 == 0) {
       cvt_fp16_to_fp4<T, FUSE_SILU_MUL, false, false>
           <<<grid, block, shared_mem_size, stream>>>(
               m_topk, k, reinterpret_cast<T*>(input),
@@ -300,7 +301,9 @@ void quant_impl(void* output, void* output_scale, void* input,
               n_experts);
     }
   } else {
-    if (n_experts >= 16) {
+    // The low-latency vectorized expert lookup only handles full 16-expert
+    // chunks. Fall back to the scalar lookup path for the remainder cases.
+    if (n_experts >= 16 && n_experts % 16 == 0) {
       cvt_fp16_to_fp4<T, FUSE_SILU_MUL, false, false>
           <<<grid, block, 0, stream>>>(
               m_topk, k, reinterpret_cast<T*>(input),
@@ -327,25 +330,28 @@ void quant_impl(void* output, void* output_scale, void* input,
 }  // namespace vllm
 
 /*Quantization entry for fp4 experts quantization*/
-#define CHECK_TH_CUDA(x, m) TORCH_CHECK(x.is_cuda(), m, "must be a CUDA tensor")
+#define CHECK_TH_CUDA(x, m) \
+  STD_TORCH_CHECK(x.is_cuda(), m, "must be a CUDA tensor")
 #define CHECK_CONTIGUOUS(x, m) \
-  TORCH_CHECK(x.is_contiguous(), m, "must be contiguous")
+  STD_TORCH_CHECK(x.is_contiguous(), m, "must be contiguous")
 #define CHECK_INPUT(x, m) \
   CHECK_TH_CUDA(x, m);    \
   CHECK_CONTIGUOUS(x, m);
 
-constexpr auto HALF = at::ScalarType::Half;
-constexpr auto BF16 = at::ScalarType::BFloat16;
-constexpr auto FLOAT = at::ScalarType::Float;
-constexpr auto INT = at::ScalarType::Int;
-constexpr auto UINT8 = at::ScalarType::Byte;
+constexpr auto HALF = torch::headeronly::ScalarType::Half;
+constexpr auto BF16 = torch::headeronly::ScalarType::BFloat16;
+constexpr auto FLOAT = torch::headeronly::ScalarType::Float;
+constexpr auto INT = torch::headeronly::ScalarType::Int;
+constexpr auto UINT8 = torch::headeronly::ScalarType::Byte;
 
 // Common validation for fp4 experts quantization entry points.
 static void validate_fp4_experts_quant_inputs(
-    torch::Tensor const& output, torch::Tensor const& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts, int64_t m_topk,
+    torch::stable::Tensor const& output,
+    torch::stable::Tensor const& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts, int64_t m_topk,
     int64_t k) {
   CHECK_INPUT(output, "output");
   CHECK_INPUT(output_scale, "output_scale");
@@ -354,41 +360,42 @@ static void validate_fp4_experts_quant_inputs(
   CHECK_INPUT(input_offset_by_experts, "input_offset_by_experts");
   CHECK_INPUT(output_scale_offset_by_experts, "output_scale_offset_by_experts");
 
-  TORCH_CHECK(output.dim() == 2);
-  TORCH_CHECK(output_scale.dim() == 2);
-  TORCH_CHECK(input.dim() == 2);
-  TORCH_CHECK(input_global_scale.dim() == 1);
-  TORCH_CHECK(input_offset_by_experts.dim() == 1);
-  TORCH_CHECK(output_scale_offset_by_experts.dim() == 1);
-
-  TORCH_CHECK(input.scalar_type() == HALF || input.scalar_type() == BF16);
-  TORCH_CHECK(input_global_scale.scalar_type() == FLOAT);
-  TORCH_CHECK(input_offset_by_experts.scalar_type() == INT);
-  TORCH_CHECK(output_scale_offset_by_experts.scalar_type() == INT);
+  STD_TORCH_CHECK(output.dim() == 2);
+  STD_TORCH_CHECK(output_scale.dim() == 2);
+  STD_TORCH_CHECK(input.dim() == 2);
+  STD_TORCH_CHECK(input_global_scale.dim() == 1);
+  STD_TORCH_CHECK(input_offset_by_experts.dim() == 1);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.dim() == 1);
+
+  STD_TORCH_CHECK(input.scalar_type() == HALF || input.scalar_type() == BF16);
+  STD_TORCH_CHECK(input_global_scale.scalar_type() == FLOAT);
+  STD_TORCH_CHECK(input_offset_by_experts.scalar_type() == INT);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.scalar_type() == INT);
   // output is uint8 (two nvfp4 values are packed into one uint8)
   // output_scale is int32 (four fp8 values are packed into one int32)
-  TORCH_CHECK(output.scalar_type() == UINT8);
-  TORCH_CHECK(output_scale.scalar_type() == INT);
+  STD_TORCH_CHECK(output.scalar_type() == UINT8);
+  STD_TORCH_CHECK(output_scale.scalar_type() == INT);
 
   const int BLOCK_SIZE = 16;
-  TORCH_CHECK(k % BLOCK_SIZE == 0, "k must be a multiple of 16");
+  STD_TORCH_CHECK(k % BLOCK_SIZE == 0, "k must be a multiple of 16");
   auto n_experts = input_global_scale.size(0);
-  TORCH_CHECK(input_offset_by_experts.size(0) == n_experts + 1);
-  TORCH_CHECK(output_scale_offset_by_experts.size(0) == n_experts + 1);
-  TORCH_CHECK(output.size(0) == m_topk);
-  TORCH_CHECK(output.size(1) == k / 2);
+  STD_TORCH_CHECK(input_offset_by_experts.size(0) == n_experts + 1);
+  STD_TORCH_CHECK(output_scale_offset_by_experts.size(0) == n_experts + 1);
+  STD_TORCH_CHECK(output.size(0) == m_topk);
+  STD_TORCH_CHECK(output.size(1) == k / 2);
   int scales_k = k / BLOCK_SIZE;
   // 4 means the swizzle requirement by nvidia nvfp4.
   int padded_k = (scales_k + (4 - 1)) / 4 * 4;
   // 4 means 4 fp8 values are packed into one int32
-  TORCH_CHECK(output_scale.size(1) * 4 == padded_k);
+  STD_TORCH_CHECK(output_scale.size(1) * 4 == padded_k);
 }
 
 void scaled_fp4_experts_quant_sm1xxa(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts) {
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts) {
   auto m_topk = input.size(0);
   auto k = input.size(1);
 
@@ -397,11 +404,11 @@ void scaled_fp4_experts_quant_sm1xxa(
                                     output_scale_offset_by_experts, m_topk, k);
 
   auto n_experts = input_global_scale.size(0);
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream =
-      at::cuda::getCurrentCUDAStream(input.get_device());
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(input.get_device_index());
 
-  VLLM_DISPATCH_HALF_TYPES(
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
       input.scalar_type(), "nvfp4_experts_quant_kernel", [&] {
         using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
         vllm::quant_impl<cuda_type, /*FUSE_SILU_MUL=*/false>(
@@ -413,14 +420,15 @@ void scaled_fp4_experts_quant_sm1xxa(
 }
 
 void silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts) {
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts) {
   auto m_topk = input.size(0);
   // Input has gate || up layout, so k = input.size(1) / 2
   auto k_times_2 = input.size(1);
-  TORCH_CHECK(k_times_2 % 2 == 0, "input width must be even (gate || up)");
+  STD_TORCH_CHECK(k_times_2 % 2 == 0, "input width must be even (gate || up)");
   auto k = k_times_2 / 2;
 
   validate_fp4_experts_quant_inputs(output, output_scale, input,
@@ -428,11 +436,11 @@ void silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
                                     output_scale_offset_by_experts, m_topk, k);
 
   auto n_experts = input_global_scale.size(0);
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream =
-      at::cuda::getCurrentCUDAStream(input.get_device());
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(input.get_device_index());
 
-  VLLM_DISPATCH_HALF_TYPES(
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
       input.scalar_type(), "silu_mul_nvfp4_experts_quant_kernel", [&] {
         using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
         vllm::quant_impl<cuda_type, /*FUSE_SILU_MUL=*/true>(
diff --git a/csrc/libtorch_stable/quantization/fp4/nvfp4_quant_entry.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_quant_entry.cu
new file mode 100644
index 000000000000..8d4ba1accc7c
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_quant_entry.cu
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <torch/csrc/stable/tensor.h>
+
+#include "libtorch_stable/torch_utils.h"
+
+#include "cutlass_extensions/common.hpp"
+#include "nvfp4_utils.cuh"
+
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+void scaled_fp4_quant_sm1xxa(torch::stable::Tensor const& output,
+                             torch::stable::Tensor const& input,
+                             torch::stable::Tensor const& output_sf,
+                             torch::stable::Tensor const& input_sf,
+                             bool is_sf_swizzled_layout);
+#endif
+
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+void scaled_fp4_experts_quant_sm1xxa(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts);
+#endif
+
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+void silu_and_mul_nvfp4_quant_sm1xxa(torch::stable::Tensor& output,
+                                     torch::stable::Tensor& output_sf,
+                                     torch::stable::Tensor& input,
+                                     torch::stable::Tensor& input_sf);
+#endif
+
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+void silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts);
+#endif
+
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+static bool nvfp4_quant_sm_supported() {
+  const int32_t sm = get_sm_version_num();
+  #if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
+  if (sm >= 100 && sm < 120) return true;
+  #endif
+  #if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
+  if (sm >= 120 && sm < 130) return true;
+  #endif
+  return false;
+}
+#endif
+
+void scaled_fp4_quant_out(torch::stable::Tensor const& input,
+                          torch::stable::Tensor const& input_sf,
+                          bool is_sf_swizzled_layout,
+                          torch::stable::Tensor& output,
+                          torch::stable::Tensor& output_sf) {
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+  STD_TORCH_CHECK(nvfp4_quant_sm_supported(),
+                  "No compiled nvfp4 quantization kernel for SM ",
+                  get_sm_version_num(),
+                  ". Recompile with the appropriate CUDA arch.");
+  return scaled_fp4_quant_sm1xxa(output, input, output_sf, input_sf,
+                                 is_sf_swizzled_layout);
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(false,
+                                  "No compiled nvfp4 quantization kernel");
+}
+
+std::tuple<torch::stable::Tensor, torch::stable::Tensor> scaled_fp4_quant_func(
+    torch::stable::Tensor const& input, torch::stable::Tensor const& input_sf,
+    bool is_sf_swizzled_layout) {
+  int64_t n = input.size(-1);
+  int64_t m = input.numel() / n;
+  auto device = input.device();
+
+  // Two fp4 values packed into a uint8
+  auto output = torch::stable::empty(
+      {m, n / 2}, torch::headeronly::ScalarType::Byte, std::nullopt, device);
+
+  torch::stable::Tensor output_sf;
+  if (is_sf_swizzled_layout) {
+    auto [sf_m, sf_n] = vllm::computeSwizzledSFShape(m, n);
+    output_sf = torch::stable::empty(
+        {sf_m, sf_n}, torch::headeronly::ScalarType::Int, std::nullopt, device);
+  } else {
+    output_sf = torch::stable::empty({m, n / CVT_FP4_SF_VEC_SIZE},
+                                     torch::headeronly::ScalarType::Byte,
+                                     std::nullopt, device);
+  }
+
+  scaled_fp4_quant_out(input, input_sf, is_sf_swizzled_layout, output,
+                       output_sf);
+  return {output, output_sf};
+}
+
+void scaled_fp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts) {
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+  STD_TORCH_CHECK(nvfp4_quant_sm_supported(),
+                  "No compiled nvfp4 experts quantization kernel for SM ",
+                  get_sm_version_num(),
+                  ". Recompile with the appropriate CUDA arch.");
+  return scaled_fp4_experts_quant_sm1xxa(
+      output, output_scale, input, input_global_scale, input_offset_by_experts,
+      output_scale_offset_by_experts);
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false, "No compiled nvfp4 experts quantization kernel");
+}
+
+void silu_and_mul_nvfp4_quant(torch::stable::Tensor& output,
+                              torch::stable::Tensor& output_sf,
+                              torch::stable::Tensor& input,
+                              torch::stable::Tensor& input_sf) {
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+  STD_TORCH_CHECK(nvfp4_quant_sm_supported(),
+                  "No compiled silu_and_mul nvfp4 quantization kernel for SM ",
+                  get_sm_version_num(),
+                  ". Recompile with the appropriate CUDA arch.");
+  return silu_and_mul_nvfp4_quant_sm1xxa(output, output_sf, input, input_sf);
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false, "No compiled silu_and_mul nvfp4 quantization kernel");
+}
+
+void silu_and_mul_scaled_fp4_experts_quant(
+    torch::stable::Tensor& output, torch::stable::Tensor& output_scale,
+    torch::stable::Tensor const& input,
+    torch::stable::Tensor const& input_global_scale,
+    torch::stable::Tensor const& input_offset_by_experts,
+    torch::stable::Tensor const& output_scale_offset_by_experts) {
+#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
+    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
+  STD_TORCH_CHECK(nvfp4_quant_sm_supported(),
+                  "No compiled silu_and_mul nvfp4 experts quantization kernel "
+                  "for SM ",
+                  get_sm_version_num(),
+                  ". Recompile with the appropriate CUDA arch.");
+  return silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
+      output, output_scale, input, input_global_scale, input_offset_by_experts,
+      output_scale_offset_by_experts);
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false, "No compiled silu_and_mul nvfp4 experts quantization kernel");
+}
diff --git a/csrc/quantization/fp4/nvfp4_quant_kernels.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_quant_kernels.cu
similarity index 66%
rename from csrc/quantization/fp4/nvfp4_quant_kernels.cu
rename to csrc/libtorch_stable/quantization/fp4/nvfp4_quant_kernels.cu
index 773047c22500..e1d101f9d867 100644
--- a/csrc/quantization/fp4/nvfp4_quant_kernels.cu
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_quant_kernels.cu
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
 
 #include <cuda_runtime_api.h>
 #include <cuda_runtime.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
 #include <cuda_fp8.h>
-#include "dispatch_utils.h"
+
+#include "libtorch_stable/torch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
+#include "cuda_vec_utils.cuh"
 
 #include "cuda_utils.h"
 #include "launch_bounds_utils.h"
@@ -38,8 +38,8 @@ namespace vllm {
 // Use UE4M3 by default.
 template <class Type, bool UE8M0_SF = false>
 __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
-    cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, int32_t num_padded_cols,
-                    Type const* __restrict__ in,
+    cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, int32_t outputCols,
+                    int32_t num_padded_cols, Type const* __restrict__ in,
                     float const* __restrict__ SFScale,
                     uint32_t* __restrict__ out, uint32_t* __restrict__ SFout) {
   using PackedVec = vllm::PackedVec<Type, CVT_FP4_PACK16>;
@@ -50,7 +50,7 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
                 "Vec size is not matched.");
 
   // Precompute SF layout parameter (constant for entire kernel).
-  int32_t const numKTiles = (numCols + 63) / 64;
+  int32_t const numKTiles = (outputCols + 63) / 64;
 
   int sf_m = round_up<int>(numRows, 128);
   int32_t const colIdx = blockDim.x * blockIdx.y + threadIdx.x;
@@ -68,16 +68,17 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
       PackedVec in_vec;
       int64_t inOffset = rowIdx * (numCols / CVT_FP4_ELTS_PER_THREAD) + colIdx;
 
-      // If we are outside valid rows OR outside valid columns -> Use Zeros
-      bool valid = (rowIdx < numRows) && (elem_idx < numCols);
+      // If we are outside valid columns, feed zeros
+      bool valid_input = (rowIdx < numRows) && (elem_idx < numCols);
+      bool valid_output = (rowIdx < numRows) && (elem_idx < outputCols);
       if constexpr (CVT_FP4_PACK16) {
         ld256_cg_or_zero(reinterpret_cast<u32x8_t&>(in_vec),
                          &reinterpret_cast<const uint32_t*>(in)[inOffset * 8],
-                         valid);
+                         valid_input);
       } else {
         ld128_cg_or_zero(reinterpret_cast<uint4&>(in_vec),
                          &reinterpret_cast<const uint32_t*>(in)[inOffset * 4],
-                         valid);
+                         valid_input);
       }
 
       auto sf_out =
@@ -89,16 +90,16 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
           cvt_warp_fp16_to_fp4<Type, CVT_FP4_NUM_THREADS_PER_SF, UE8M0_SF>(
               in_vec, global_scale, sf_out);
 
-      // We do NOT write output for padding because the 'out' tensor is not
-      // padded.
-      if (valid) {
+      if (valid_output) {
         if constexpr (CVT_FP4_PACK16) {
-          int64_t outOffset = rowIdx * (numCols / 8) + colIdx * 2;
+          int64_t outOffset = rowIdx * (outputCols / 8) + colIdx * 2;
           uint64_t packed64 =
               (uint64_t(out_val.hi) << 32) | uint64_t(out_val.lo);
           reinterpret_cast<uint64_t*>(out)[outOffset >> 1] = packed64;
         } else {
-          out[inOffset] = out_val;
+          int64_t outOffset =
+              rowIdx * (outputCols / CVT_FP4_ELTS_PER_THREAD) + colIdx;
+          out[outOffset] = out_val;
         }
       }
     }
@@ -109,7 +110,8 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
 template <class Type, bool UE8M0_SF = false>
 __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
     cvt_fp16_to_fp4_sf_major(int32_t numRows, int32_t numCols,
-                             int32_t sf_n_unpadded, int32_t num_packed_cols,
+                             int32_t outputCols, int32_t sf_n_unpadded,
+                             int32_t num_packed_cols,
                              Type const* __restrict__ in,
                              float const* __restrict__ SFScale,
                              uint32_t* __restrict__ out,
@@ -136,7 +138,7 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
       PackedVec in_vec;
       int64_t inOffset = rowIdx * (numCols / CVT_FP4_ELTS_PER_THREAD) + colIdx;
 
-      // If we are outside valid rows OR outside valid columns -> Use Zeros
+      // If we are outside valid columns, feed zeros
       bool valid = (rowIdx < numRows) && (elem_idx < numCols);
       if constexpr (CVT_FP4_PACK16) {
         ld256_cg_or_zero(reinterpret_cast<u32x8_t&>(in_vec),
@@ -155,16 +157,16 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
           cvt_warp_fp16_to_fp4<Type, CVT_FP4_NUM_THREADS_PER_SF, UE8M0_SF>(
               in_vec, global_scale, sf_out);
 
-      // We do NOT write output for padding because the 'out' tensor is not
-      // padded.
-      if (valid) {
+      if (rowIdx < numRows) {
         if constexpr (CVT_FP4_PACK16) {
-          int64_t outOffset = rowIdx * (numCols / 8) + colIdx * 2;
+          int64_t outOffset = rowIdx * (outputCols / 8) + colIdx * 2;
           uint64_t packed64 =
               (uint64_t(out_val.hi) << 32) | uint64_t(out_val.lo);
           reinterpret_cast<uint64_t*>(out)[outOffset >> 1] = packed64;
         } else {
-          out[inOffset] = out_val;
+          int64_t outOffset =
+              rowIdx * (outputCols / CVT_FP4_ELTS_PER_THREAD) + colIdx;
+          out[outOffset] = out_val;
         }
       }
     }
@@ -173,18 +175,22 @@ __global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512))
 
 }  // namespace vllm
 
-void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
-                             torch::Tensor const& input,
-                             torch::Tensor const& output_sf,
-                             torch::Tensor const& input_sf,
+void scaled_fp4_quant_sm1xxa(torch::stable::Tensor const& output,
+                             torch::stable::Tensor const& input,
+                             torch::stable::Tensor const& output_sf,
+                             torch::stable::Tensor const& input_sf,
                              bool is_sf_swizzled_layout) {
   int32_t m = input.size(0);
   int32_t n = input.size(1);
+  int32_t output_n = output.size(1) * 2;
 
-  TORCH_CHECK(n % 16 == 0, "The N dimension must be multiple of 16.");
-  TORCH_CHECK(input.scalar_type() == at::ScalarType::Half ||
-                  input.scalar_type() == at::ScalarType::BFloat16,
-              "Unsupported input data type for quantize_to_fp4.");
+  STD_TORCH_CHECK(n % 16 == 0, "The N dimension must be multiple of 16.");
+  STD_TORCH_CHECK(output_n % 16 == 0,
+                  "The output tensor width must be a multiple of 16.");
+  STD_TORCH_CHECK(
+      input.scalar_type() == torch::headeronly::ScalarType::Half ||
+          input.scalar_type() == torch::headeronly::ScalarType::BFloat16,
+      "Unsupported input data type for quantize_to_fp4.");
 
   int multiProcessorCount =
       get_device_attribute(cudaDevAttrMultiProcessorCount, -1);
@@ -192,10 +198,11 @@ void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
   auto input_sf_ptr = static_cast<float const*>(input_sf.data_ptr());
   auto sf_out = static_cast<int32_t*>(output_sf.data_ptr());
   auto output_ptr = static_cast<int64_t*>(output.data_ptr());
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  auto stream = at::cuda::getCurrentCUDAStream(input.get_device());
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  auto stream = get_current_cuda_stream(input.get_device_index());
 
-  int sf_n_unpadded = int(n / CVT_FP4_SF_VEC_SIZE);
+  int output_sf_n_unpadded = int(output_n / CVT_FP4_SF_VEC_SIZE);
 
   // Grid, Block size. Each thread converts 8 values.
   dim3 block(std::min(int(n / ELTS_PER_THREAD), 512));
@@ -203,7 +210,7 @@ void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
       vllm_runtime_blocks_per_sm(static_cast<int>(block.x));
 
   if (is_sf_swizzled_layout) {
-    int sf_n_int = int(vllm::round_up(sf_n_unpadded, 4) / 4);
+    int sf_n_int = int(vllm::round_up(output_sf_n_unpadded, 4) / 4);
     int32_t num_padded_cols =
         sf_n_int * 4 * CVT_FP4_SF_VEC_SIZE / CVT_FP4_ELTS_PER_THREAD;
 
@@ -213,31 +220,32 @@ void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
                  std::max(1, (multiProcessorCount * numBlocksPerSM) / grid_y));
     dim3 grid(grid_x, grid_y);
 
-    VLLM_DISPATCH_HALF_TYPES(input.scalar_type(), "nvfp4_quant_kernel", [&] {
-      using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
-      auto input_ptr = static_cast<cuda_type const*>(input.data_ptr());
-      // NOTE: We don't support e8m0 scales at this moment.
-      vllm::cvt_fp16_to_fp4<cuda_type, false><<<grid, block, 0, stream>>>(
-          m, n, num_padded_cols, input_ptr, input_sf_ptr,
-          reinterpret_cast<uint32_t*>(output_ptr),
-          reinterpret_cast<uint32_t*>(sf_out));
-    });
+    VLLM_STABLE_DISPATCH_HALF_TYPES(
+        input.scalar_type(), "nvfp4_quant_kernel", [&] {
+          using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
+          auto input_ptr = static_cast<cuda_type const*>(input.data_ptr());
+          vllm::cvt_fp16_to_fp4<cuda_type, false><<<grid, block, 0, stream>>>(
+              m, n, output_n, num_padded_cols, input_ptr, input_sf_ptr,
+              reinterpret_cast<uint32_t*>(output_ptr),
+              reinterpret_cast<uint32_t*>(sf_out));
+        });
   } else {
-    int num_packed_cols = n / CVT_FP4_ELTS_PER_THREAD;
+    int num_packed_cols = output_n / CVT_FP4_ELTS_PER_THREAD;
     int grid_y = vllm::div_round_up(num_packed_cols, static_cast<int>(block.x));
     int grid_x = std::min(
         m, std::max(1, (multiProcessorCount * numBlocksPerSM) / grid_y));
     dim3 grid(grid_x, grid_y);
 
-    VLLM_DISPATCH_HALF_TYPES(input.scalar_type(), "nvfp4_quant_kernel", [&] {
-      using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
-      auto input_ptr = static_cast<cuda_type const*>(input.data_ptr());
-      // NOTE: We don't support e8m0 scales at this moment.
-      vllm::cvt_fp16_to_fp4_sf_major<cuda_type, false>
-          <<<grid, block, 0, stream>>>(m, n, sf_n_unpadded, num_packed_cols,
-                                       input_ptr, input_sf_ptr,
-                                       reinterpret_cast<uint32_t*>(output_ptr),
-                                       reinterpret_cast<uint32_t*>(sf_out));
-    });
+    VLLM_STABLE_DISPATCH_HALF_TYPES(
+        input.scalar_type(), "nvfp4_quant_kernel", [&] {
+          using cuda_type = vllm::CUDATypeConverter<scalar_t>::Type;
+          auto input_ptr = static_cast<cuda_type const*>(input.data_ptr());
+          vllm::cvt_fp16_to_fp4_sf_major<cuda_type, false>
+              <<<grid, block, 0, stream>>>(
+                  m, n, output_n, output_sf_n_unpadded, num_packed_cols,
+                  input_ptr, input_sf_ptr,
+                  reinterpret_cast<uint32_t*>(output_ptr),
+                  reinterpret_cast<uint32_t*>(sf_out));
+        });
   }
 }
diff --git a/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_entry.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_entry.cu
new file mode 100644
index 000000000000..d7b2a18e29cb
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_entry.cu
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <torch/csrc/stable/tensor.h>
+
+#include "libtorch_stable/torch_utils.h"
+
+#include "cutlass_extensions/common.hpp"
+
+#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
+void cutlass_scaled_fp4_mm_sm100a(torch::stable::Tensor& D,
+                                  torch::stable::Tensor const& A,
+                                  torch::stable::Tensor const& B,
+                                  torch::stable::Tensor const& A_sf,
+                                  torch::stable::Tensor const& B_sf,
+                                  torch::stable::Tensor const& alpha);
+#endif
+
+#if defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
+void cutlass_scaled_fp4_mm_sm120a(torch::stable::Tensor& D,
+                                  torch::stable::Tensor const& A,
+                                  torch::stable::Tensor const& B,
+                                  torch::stable::Tensor const& A_sf,
+                                  torch::stable::Tensor const& B_sf,
+                                  torch::stable::Tensor const& alpha);
+#endif
+
+void cutlass_scaled_fp4_mm(torch::stable::Tensor& D,
+                           const torch::stable::Tensor& A,
+                           const torch::stable::Tensor& B,
+                           const torch::stable::Tensor& A_sf,
+                           const torch::stable::Tensor& B_sf,
+                           const torch::stable::Tensor& alpha) {
+  // Make sure we're on A's device.
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      A.get_device_index());
+  const int32_t sm = get_sm_version_num();
+
+#if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
+  if (sm >= 100 && sm < 120) {
+    cutlass_scaled_fp4_mm_sm100a(D, A, B, A_sf, B_sf, alpha);
+    return;
+  }
+#endif
+
+#if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
+  if (sm >= 120 && sm < 130) {
+    cutlass_scaled_fp4_mm_sm120a(D, A, B, A_sf, B_sf, alpha);
+    return;
+  }
+#endif
+
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false, "No compiled nvfp4 mm kernel for SM ", sm,
+      ". Recompile with CUDA >= 12.8 and CC >= 100.");
+}
+
+bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability) {
+  int runtimeVersion;
+  cudaRuntimeGetVersion(&runtimeVersion);
+  if (runtimeVersion < 12080) return false;
+  // Only report support when the SM-specific kernel was actually compiled in,
+  // so the Python-side backend selector does not choose CUTLASS and then hit
+  // TORCH_CHECK_NOT_IMPLEMENTED (or worse, fall through to Marlin).
+#if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
+  if (cuda_device_capability >= 100 && cuda_device_capability < 120)
+    return true;
+#endif
+#if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
+  if (cuda_device_capability >= 120 && cuda_device_capability < 130)
+    return true;
+#endif
+  return false;
+}
diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_kernels.cu
similarity index 58%
rename from csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
rename to csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_kernels.cu
index 5bc4c38a275c..fc83c6e8d348 100644
--- a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_kernels.cu
@@ -14,15 +14,16 @@
  * limitations under the License.
  */
 
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "libtorch_stable/torch_utils.h"
 
 #include "cutlass_extensions/common.hpp"
 
 #include "cutlass/cutlass.h"
 
+#include <type_traits>
+
 #include "cutlass/gemm/collective/collective_builder.hpp"
 #include "cutlass/epilogue/collective/collective_builder.hpp"
 #include "cutlass/gemm/device/gemm_universal_adapter.h"
@@ -31,15 +32,21 @@
 #include "cutlass/util/packed_stride.hpp"
 
 #include "core/math.hpp"
+#include "core/batch_invariant.hpp"
 
 using namespace cute;
 
 #if defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
 
-// Configuration for M in (256, inf)
+// Configuration for M in (256, inf), also reused for batch-invariant mode
+// to keep a fixed large-M tiling across all batch sizes.
+// Do not change the tile K or tile scheduler here unless you are also
+// updating the batch-invariant behavior; if batch-invariant mode needs a
+// different schedule, add a dedicated batch-invariant config/path instead.
 struct sm100_fp4_config_default {
   using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
   using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileScheduler = cutlass::gemm::PersistentScheduler;
   using TileShape = Shape<_256, _256, _256>;
   using ClusterShape = Shape<_2, _1, _1>;
   using PerSmTileShape_MNK = Shape<_128, _256, _256>;
@@ -49,6 +56,7 @@ struct sm100_fp4_config_default {
 struct sm100_fp4_config_M256 {
   using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
   using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileScheduler = void;
   using TileShape = Shape<_256, _128, _256>;
   using ClusterShape = Shape<_2, _1, _1>;
   using PerSmTileShape_MNK = Shape<_128, _128, _256>;
@@ -58,6 +66,7 @@ struct sm100_fp4_config_M256 {
 struct sm100_fp4_config_M16 {
   using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
   using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileScheduler = void;
   using TileShape = Shape<_128, _128, _256>;
   using ClusterShape = Shape<_1, _1, _1>;
   using PerSmTileShape_MNK = Shape<_128, _128, _256>;
@@ -99,7 +108,7 @@ struct Fp4GemmSm100 {
           cutlass::epilogue::collective::EpilogueTileAuto, ElementAccumulator,
           ElementAccumulator, ElementC, LayoutCTag, AlignmentC, ElementD,
           LayoutDTag, AlignmentD,
-          cutlass::epilogue::collective::EpilogueScheduleAuto>::CollectiveOp;
+          typename Config::EpilogueSchedule>::CollectiveOp;
 
   using CollectiveMainloop =
       typename cutlass::gemm::collective::CollectiveBuilder<
@@ -108,10 +117,13 @@ struct Fp4GemmSm100 {
           ClusterShape,
           cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(
               sizeof(typename CollectiveEpilogue::SharedStorage))>,
-          cutlass::gemm::collective::KernelScheduleAuto>::CollectiveOp;
+          typename Config::KernelSchedule>::CollectiveOp;
 
-  using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
-      Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
+  using TileScheduler = typename Config::TileScheduler;
+  using GemmKernel =
+      cutlass::gemm::kernel::GemmUniversal<Shape<int, int, int, int>,
+                                           CollectiveMainloop,
+                                           CollectiveEpilogue, TileScheduler>;
   using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
   using StrideA = typename Gemm::GemmKernel::StrideA;
   using LayoutA = decltype(cute::make_layout(make_shape(0, 0, 0), StrideA{}));
@@ -127,8 +139,9 @@ struct Fp4GemmSm100 {
 
 template <typename Config>
 typename Config::Gemm::Arguments args_from_options(
-    at::Tensor& D, at::Tensor const& A, at::Tensor const& B,
-    at::Tensor const& A_sf, at::Tensor const& B_sf, at::Tensor const& alpha,
+    torch::stable::Tensor& D, torch::stable::Tensor const& A,
+    torch::stable::Tensor const& B, torch::stable::Tensor const& A_sf,
+    torch::stable::Tensor const& B_sf, torch::stable::Tensor const& alpha,
     int64_t M, int64_t N, int64_t K) {
   using ElementA = typename Config::Gemm::ElementA;
   using ElementB = typename Config::Gemm::ElementB;
@@ -174,19 +187,20 @@ typename Config::Gemm::Arguments args_from_options(
 }
 
 template <typename Config>
-void runGemm(at::Tensor& D, at::Tensor const& A, at::Tensor const& B,
-             at::Tensor const& A_sf, at::Tensor const& B_sf,
-             at::Tensor const& alpha, int64_t m, int64_t n, int64_t k,
-             cudaStream_t stream) {
+void runGemm(torch::stable::Tensor& D, torch::stable::Tensor const& A,
+             torch::stable::Tensor const& B, torch::stable::Tensor const& A_sf,
+             torch::stable::Tensor const& B_sf,
+             torch::stable::Tensor const& alpha, int64_t m, int64_t n,
+             int64_t k, cudaStream_t stream) {
   typename Config::Gemm gemm;
 
   auto arguments =
       args_from_options<Config>(D, A, B, A_sf, B_sf, alpha, m, n, k);
 
   size_t workspace_size = Config::Gemm::get_workspace_size(arguments);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(A.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, A.device());
 
   CUTLASS_CHECK(gemm.can_implement(arguments));
 
@@ -197,12 +211,24 @@ void runGemm(at::Tensor& D, at::Tensor const& A, at::Tensor const& B,
 
 // Dispatch function to select appropriate config based on M
 template <typename OutType>
-void cutlass_fp4_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
-                               torch::Tensor const& B,
-                               torch::Tensor const& A_sf,
-                               torch::Tensor const& B_sf,
-                               torch::Tensor const& alpha, int64_t m, int64_t n,
-                               int64_t k, cudaStream_t stream) {
+void cutlass_fp4_gemm_dispatch(torch::stable::Tensor& D,
+                               torch::stable::Tensor const& A,
+                               torch::stable::Tensor const& B,
+                               torch::stable::Tensor const& A_sf,
+                               torch::stable::Tensor const& B_sf,
+                               torch::stable::Tensor const& alpha, int64_t m,
+                               int64_t n, int64_t k, cudaStream_t stream) {
+  if (vllm::vllm_is_batch_invariant()) {
+    using BiGemm = Fp4GemmSm100<sm100_fp4_config_default, OutType>;
+    static_assert(
+        cute::is_same_v<typename BiGemm::TileScheduler,
+                        cutlass::gemm::PersistentScheduler>,
+        "batch_invariant requires a persistent tile scheduler; stream-K or "
+        "split-K would break numerical invariance");
+    runGemm<BiGemm>(D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+    return;
+  }
+
   uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
 
   if (mp2 <= 16) {
@@ -222,61 +248,65 @@ void cutlass_fp4_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
 
 #else
 template <typename OutType>
-void cutlass_fp4_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
-                               torch::Tensor const& B,
-                               torch::Tensor const& A_sf,
-                               torch::Tensor const& B_sf,
-                               torch::Tensor const& alpha, int64_t m, int64_t n,
-                               int64_t k, cudaStream_t stream) {
-  TORCH_CHECK(false,
-              "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to "
-              "a CUTLASS 3.8 source directory to enable support.");
+void cutlass_fp4_gemm_dispatch(torch::stable::Tensor& D,
+                               torch::stable::Tensor const& A,
+                               torch::stable::Tensor const& B,
+                               torch::stable::Tensor const& A_sf,
+                               torch::stable::Tensor const& B_sf,
+                               torch::stable::Tensor const& alpha, int64_t m,
+                               int64_t n, int64_t k, cudaStream_t stream) {
+  STD_TORCH_CHECK(false,
+                  "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to "
+                  "a CUTLASS 3.8 source directory to enable support.");
 }
 #endif  // defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
 
-#define CHECK_TYPE(x, st, m) \
-  TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)
+#define CHECK_TYPE(x, st, m)             \
+  STD_TORCH_CHECK(x.scalar_type() == st, \
+                  ": Inconsistency of torch::stable::Tensor type:", m)
 #define CHECK_TH_CUDA(x, m) \
-  TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor")
+  STD_TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor")
 #define CHECK_CONTIGUOUS(x, m) \
-  TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous")
+  STD_TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous")
 #define CHECK_INPUT(x, st, m) \
   CHECK_TH_CUDA(x, m);        \
   CHECK_CONTIGUOUS(x, m);     \
   CHECK_TYPE(x, st, m)
 
-constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
-constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
+constexpr auto FLOAT4_E2M1X2 = torch::headeronly::ScalarType::Byte;
+constexpr auto SF_DTYPE = torch::headeronly::ScalarType::Float8_e4m3fn;
 
-void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
-                                  torch::Tensor const& B,
-                                  torch::Tensor const& A_sf,
-                                  torch::Tensor const& B_sf,
-                                  torch::Tensor const& alpha) {
+void cutlass_scaled_fp4_mm_sm100a(torch::stable::Tensor& D,
+                                  torch::stable::Tensor const& A,
+                                  torch::stable::Tensor const& B,
+                                  torch::stable::Tensor const& A_sf,
+                                  torch::stable::Tensor const& B_sf,
+                                  torch::stable::Tensor const& alpha) {
   CHECK_INPUT(A, FLOAT4_E2M1X2, "a");
   CHECK_INPUT(B, FLOAT4_E2M1X2, "b");
 
   CHECK_INPUT(A_sf, SF_DTYPE, "scale_a");
   CHECK_INPUT(B_sf, SF_DTYPE, "scale_b");
 
-  CHECK_INPUT(alpha, at::ScalarType::Float, "alpha");
+  CHECK_INPUT(alpha, torch::headeronly::ScalarType::Float, "alpha");
 
-  TORCH_CHECK(A.dim() == 2, "a must be a matrix");
-  TORCH_CHECK(B.dim() == 2, "b must be a matrix");
-  TORCH_CHECK(A.sizes()[1] == B.sizes()[1],
-              "a and b shapes cannot be multiplied (", A.sizes()[0], "x",
-              A.sizes()[1], " and ", B.sizes()[0], "x", B.sizes()[1], ")");
+  STD_TORCH_CHECK(A.dim() == 2, "a must be a matrix");
+  STD_TORCH_CHECK(B.dim() == 2, "b must be a matrix");
+  STD_TORCH_CHECK(A.size(1) == B.size(1),
+                  "a and b shapes cannot be multiplied (", A.size(0), "x",
+                  A.size(1), " and ", B.size(0), "x", B.size(1), ")");
 
-  auto const m = A.sizes()[0];
-  auto const n = B.sizes()[0];
-  auto const k = A.sizes()[1] * 2;
+  auto const m = A.size(0);
+  auto const n = B.size(0);
+  auto const k = A.size(1) * 2;
 
   constexpr int alignment = 32;
-  TORCH_CHECK(k % alignment == 0, "Expected k to be divisible by ", alignment,
-              ", but got a shape: (", A.sizes()[0], "x", A.sizes()[1],
-              "), k: ", k, ".");
-  TORCH_CHECK(n % alignment == 0, "Expected n to be divisible by ", alignment,
-              ", but got b shape: (", B.sizes()[0], "x", B.sizes()[1], ").");
+  STD_TORCH_CHECK(k % alignment == 0, "Expected k to be divisible by ",
+                  alignment, ", but got a shape: (", A.size(0), "x", A.size(1),
+                  "), k: ", k, ".");
+  STD_TORCH_CHECK(n % alignment == 0, "Expected n to be divisible by ",
+                  alignment, ", but got b shape: (", B.size(0), "x", B.size(1),
+                  ").");
 
   auto round_up = [](int x, int y) { return (x + y - 1) / y * y; };
   int rounded_m = round_up(m, 128);
@@ -285,33 +315,34 @@ void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
   // integer.
   int rounded_k = round_up(k / 16, 4);
 
-  TORCH_CHECK(A_sf.dim() == 2, "scale_a must be a matrix");
-  TORCH_CHECK(B_sf.dim() == 2, "scale_b must be a matrix");
-  TORCH_CHECK(A_sf.sizes()[1] == B_sf.sizes()[1],
-              "scale_a and scale_b shapes cannot be multiplied (",
-              A_sf.sizes()[0], "x", A_sf.sizes()[1], " and ", B_sf.sizes()[0],
-              "x", B_sf.sizes()[1], ")");
-  TORCH_CHECK(A_sf.sizes()[0] == rounded_m && A_sf.sizes()[1] == rounded_k,
-              "scale_a must be padded and swizzled to a shape (", rounded_m,
-              "x", rounded_k, "), but got a shape (", A_sf.sizes()[0], "x",
-              A_sf.sizes()[1], ")");
-  TORCH_CHECK(B_sf.sizes()[0] == rounded_n && B_sf.sizes()[1] == rounded_k,
-              "scale_b must be padded and swizzled to a shape (", rounded_n,
-              "x", rounded_k, "), but got a shape (", B_sf.sizes()[0], "x",
-              B_sf.sizes()[1], ")");
-
-  auto out_dtype = D.dtype();
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(A));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(A.get_device());
-
-  if (out_dtype == at::ScalarType::Half) {
+  STD_TORCH_CHECK(A_sf.dim() == 2, "scale_a must be a matrix");
+  STD_TORCH_CHECK(B_sf.dim() == 2, "scale_b must be a matrix");
+  STD_TORCH_CHECK(A_sf.size(1) == B_sf.size(1),
+                  "scale_a and scale_b shapes cannot be multiplied (",
+                  A_sf.size(0), "x", A_sf.size(1), " and ", B_sf.size(0), "x",
+                  B_sf.size(1), ")");
+  STD_TORCH_CHECK(A_sf.size(0) == rounded_m && A_sf.size(1) == rounded_k,
+                  "scale_a must be padded and swizzled to a shape (", rounded_m,
+                  "x", rounded_k, "), but got a shape (", A_sf.size(0), "x",
+                  A_sf.size(1), ")");
+  STD_TORCH_CHECK(B_sf.size(0) == rounded_n && B_sf.size(1) == rounded_k,
+                  "scale_b must be padded and swizzled to a shape (", rounded_n,
+                  "x", rounded_k, "), but got a shape (", B_sf.size(0), "x",
+                  B_sf.size(1), ")");
+
+  auto out_dtype = D.scalar_type();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      A.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(A.get_device_index());
+
+  if (out_dtype == torch::headeronly::ScalarType::Half) {
     cutlass_fp4_gemm_dispatch<cutlass::half_t>(D, A, B, A_sf, B_sf, alpha, m, n,
                                                k, stream);
-  } else if (out_dtype == at::ScalarType::BFloat16) {
+  } else if (out_dtype == torch::headeronly::ScalarType::BFloat16) {
     cutlass_fp4_gemm_dispatch<cutlass::bfloat16_t>(D, A, B, A_sf, B_sf, alpha,
                                                    m, n, k, stream);
   } else {
-    TORCH_CHECK(false, "Unsupported output data type of nvfp4 mm (", out_dtype,
-                ")");
+    STD_TORCH_CHECK(false, "Unsupported output data type of nvfp4 mm (",
+                    out_dtype, ")");
   }
 }
diff --git a/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
new file mode 100644
index 000000000000..2baa00caa824
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <torch/csrc/stable/tensor.h>
+
+#include "libtorch_stable/torch_utils.h"
+
+#include "cutlass_extensions/common.hpp"
+
+#include "cutlass/cutlass.h"
+
+#include <type_traits>
+
+#include "cutlass/gemm/collective/collective_builder.hpp"
+#include "cutlass/epilogue/collective/collective_builder.hpp"
+#include "cutlass/gemm/device/gemm_universal_adapter.h"
+#include "cutlass/gemm/kernel/gemm_universal.hpp"
+
+#include "cutlass/util/packed_stride.hpp"
+
+#include "core/math.hpp"
+#include "core/batch_invariant.hpp"
+
+using namespace cute;
+
+#define CHECK_TYPE(x, st, m)             \
+  STD_TORCH_CHECK(x.scalar_type() == st, \
+                  ": Inconsistency of torch::stable::Tensor type:", m)
+#define CHECK_TH_CUDA(x, m) \
+  STD_TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x, m) \
+  STD_TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous")
+#define CHECK_INPUT(x, st, m) \
+  CHECK_TH_CUDA(x, m);        \
+  CHECK_CONTIGUOUS(x, m);     \
+  CHECK_TYPE(x, st, m)
+
+constexpr auto FLOAT4_E2M1X2 = torch::headeronly::ScalarType::Byte;
+constexpr auto SF_DTYPE = torch::headeronly::ScalarType::Float8_e4m3fn;
+
+struct sm120_fp4_config_M256 {
+  using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
+  using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileScheduler = void;
+  using ClusterShape = Shape<_1, _1, _1>;
+  using MmaTileShape = Shape<_128, _128, _128>;
+  using PerSmTileShape_MNK = Shape<_128, _128, _128>;
+};
+
+struct sm120_fp4_config_default {
+  // Also used for batch-invariant mode.
+  // Do not change the tile K or tile scheduler here unless you are also
+  // updating the batch-invariant behavior; if batch-invariant mode needs a
+  // different schedule, add a dedicated batch-invariant config/path instead.
+  using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
+  using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileScheduler = cutlass::gemm::PersistentScheduler;
+  using ClusterShape = Shape<_1, _1, _1>;
+  using MmaTileShape = Shape<_256, _128, _128>;
+  using PerSmTileShape_MNK = Shape<_256, _128, _128>;
+};
+
+template <typename Config, typename OutType>
+struct Fp4GemmSm120 {
+  using ElementA = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
+  using LayoutATag = cutlass::layout::RowMajor;
+  static constexpr int AlignmentA = 32;
+
+  using ElementB = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
+  using LayoutBTag = cutlass::layout::ColumnMajor;
+  static constexpr int AlignmentB = 32;
+
+  using ElementD = OutType;
+  using ElementC = OutType;
+  using LayoutCTag = cutlass::layout::RowMajor;
+  using LayoutDTag = cutlass::layout::RowMajor;
+  static constexpr int AlignmentD = 128 / cutlass::sizeof_bits<ElementD>::value;
+  static constexpr int AlignmentC = 128 / cutlass::sizeof_bits<ElementC>::value;
+
+  using ElementAccumulator = float;
+  using ArchTag = cutlass::arch::Sm120;
+  using OperatorClass = cutlass::arch::OpClassBlockScaledTensorOp;
+
+  using MmaTileShape = typename Config::MmaTileShape;
+  using ClusterShape = typename Config::ClusterShape;
+  using PerSmTileShape_MNK = typename Config::PerSmTileShape_MNK;
+
+  using CollectiveEpilogue =
+      typename cutlass::epilogue::collective::CollectiveBuilder<
+          ArchTag, OperatorClass, PerSmTileShape_MNK, ClusterShape,
+          cutlass::epilogue::collective::EpilogueTileAuto, ElementAccumulator,
+          ElementAccumulator, ElementC, LayoutCTag, AlignmentC, ElementD,
+          LayoutDTag, AlignmentD,
+          typename Config::EpilogueSchedule>::CollectiveOp;
+
+  using CollectiveMainloop =
+      typename cutlass::gemm::collective::CollectiveBuilder<
+          ArchTag, OperatorClass, ElementA, LayoutATag, AlignmentA, ElementB,
+          LayoutBTag, AlignmentB, ElementAccumulator, MmaTileShape,
+          ClusterShape,
+          cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(
+              sizeof(typename CollectiveEpilogue::SharedStorage))>,
+          typename Config::KernelSchedule>::CollectiveOp;
+
+  using TileScheduler = typename Config::TileScheduler;
+  using GemmKernel =
+      cutlass::gemm::kernel::GemmUniversal<Shape<int, int, int, int>,
+                                           CollectiveMainloop,
+                                           CollectiveEpilogue, TileScheduler>;
+
+  using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
+};
+
+template <typename Gemm>
+typename Gemm::Arguments args_from_options(torch::stable::Tensor& D,
+                                           torch::stable::Tensor const& A,
+                                           torch::stable::Tensor const& B,
+                                           torch::stable::Tensor const& A_sf,
+                                           torch::stable::Tensor const& B_sf,
+                                           torch::stable::Tensor const& alpha,
+                                           int M, int N, int K) {
+  using ElementA = typename Gemm::ElementA;
+  using ElementB = typename Gemm::ElementB;
+  using ElementD = typename Gemm::ElementD;
+  using ElementSFA = cutlass::float_ue4m3_t;
+  using ElementSFB = cutlass::float_ue4m3_t;
+  using ElementCompute = float;
+
+  using StrideA = typename Gemm::GemmKernel::StrideA;
+  using StrideB = typename Gemm::GemmKernel::StrideB;
+  using StrideC = typename Gemm::GemmKernel::StrideC;
+  using StrideD = typename Gemm::GemmKernel::StrideD;
+
+  using Sm1xxBlkScaledConfig =
+      typename Gemm::GemmKernel::CollectiveMainloop::Sm1xxBlkScaledConfig;
+
+  auto stride_A = cutlass::make_cute_packed_stride(StrideA{}, {M, K, 1});
+  auto stride_B = cutlass::make_cute_packed_stride(StrideB{}, {N, K, 1});
+  auto stride_D = cutlass::make_cute_packed_stride(StrideD{}, {M, N, 1});
+
+  auto layout_SFA = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFA(
+      cute::make_shape(M, N, K, 1));
+  auto layout_SFB = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFB(
+      cute::make_shape(M, N, K, 1));
+
+  typename Gemm::Arguments arguments{
+      cutlass::gemm::GemmUniversalMode::kGemm,
+      {M, N, K, 1},
+      {static_cast<ElementA const*>(A.data_ptr()), stride_A,
+       static_cast<ElementB const*>(B.data_ptr()), stride_B,
+       static_cast<ElementSFA const*>(A_sf.data_ptr()), layout_SFA,
+       static_cast<ElementSFB const*>(B_sf.data_ptr()), layout_SFB},
+      {{},
+       static_cast<ElementD const*>(D.data_ptr()),
+       stride_D,
+       static_cast<ElementD*>(D.data_ptr()),
+       stride_D}};
+  auto& fusion_args = arguments.epilogue.thread;
+  fusion_args.alpha_ptr = static_cast<ElementCompute const*>(alpha.data_ptr());
+
+  return arguments;
+}
+
+template <typename Gemm>
+void runGemm(torch::stable::Tensor& D, torch::stable::Tensor const& A,
+             torch::stable::Tensor const& B, torch::stable::Tensor const& A_sf,
+             torch::stable::Tensor const& B_sf,
+             torch::stable::Tensor const& alpha, int M, int N, int K,
+             cudaStream_t stream) {
+  Gemm gemm;
+
+  auto arguments = args_from_options<Gemm>(D, A, B, A_sf, B_sf, alpha, M, N, K);
+
+  size_t workspace_size = Gemm::get_workspace_size(arguments);
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, A.device());
+
+  CUTLASS_CHECK(gemm.can_implement(arguments));
+
+  CUTLASS_CHECK(gemm.initialize(arguments, workspace.data_ptr(), stream));
+
+  CUTLASS_CHECK(gemm.run(arguments, workspace.data_ptr(), stream));
+}
+
+namespace {
+
+// Dispatch function to select appropriate config based on M (file-local;
+// internal linkage avoids clashing with SM100's cutlass_fp4_gemm_dispatch in
+// nvfp4_scaled_mm_kernels.cu).
+template <typename OutType>
+void cutlass_fp4_gemm_dispatch(torch::stable::Tensor& D,
+                               torch::stable::Tensor const& A,
+                               torch::stable::Tensor const& B,
+                               torch::stable::Tensor const& A_sf,
+                               torch::stable::Tensor const& B_sf,
+                               torch::stable::Tensor const& alpha, int m, int n,
+                               int k, cudaStream_t stream) {
+  if (vllm::vllm_is_batch_invariant()) {
+    using BiGemm = Fp4GemmSm120<sm120_fp4_config_default, OutType>;
+    static_assert(
+        cute::is_same_v<typename BiGemm::TileScheduler,
+                        cutlass::gemm::PersistentScheduler>,
+        "batch_invariant requires a persistent tile scheduler; stream-K or "
+        "split-K would break numerical invariance");
+    runGemm<typename BiGemm::Gemm>(D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+    return;
+  }
+
+  uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
+  if (mp2 <= 256) {
+    runGemm<typename Fp4GemmSm120<sm120_fp4_config_M256, OutType>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  } else {
+    runGemm<typename Fp4GemmSm120<sm120_fp4_config_default, OutType>::Gemm>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  }
+}
+
+}  // namespace
+
+void cutlass_scaled_fp4_mm_sm120a(torch::stable::Tensor& D,
+                                  torch::stable::Tensor const& A,
+                                  torch::stable::Tensor const& B,
+                                  torch::stable::Tensor const& A_sf,
+                                  torch::stable::Tensor const& B_sf,
+                                  torch::stable::Tensor const& alpha) {
+#if defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
+  CHECK_INPUT(A, FLOAT4_E2M1X2, "a");
+  CHECK_INPUT(B, FLOAT4_E2M1X2, "b");
+
+  CHECK_INPUT(A_sf, SF_DTYPE, "scale_a");
+  CHECK_INPUT(B_sf, SF_DTYPE, "scale_b");
+
+  CHECK_INPUT(alpha, torch::headeronly::ScalarType::Float, "alpha");
+
+  STD_TORCH_CHECK(A.dim() == 2, "a must be a matrix");
+  STD_TORCH_CHECK(B.dim() == 2, "b must be a matrix");
+  STD_TORCH_CHECK(A.size(1) == B.size(1),
+                  "a and b shapes cannot be multiplied (", A.size(0), "x",
+                  A.size(1), " and ", B.size(0), "x", B.size(1), ")");
+
+  auto const m = A.size(0);
+  auto const n = B.size(0);
+  auto const k = A.size(1) * 2;
+
+  constexpr int alignment = 32;
+  STD_TORCH_CHECK(k % alignment == 0, "Expected k to be divisible by ",
+                  alignment, ", but got a shape: (", A.size(0), "x", A.size(1),
+                  "), k: ", k, ".");
+  STD_TORCH_CHECK(n % alignment == 0, "Expected n to be divisible by ",
+                  alignment, ", but got b shape: (", B.size(0), "x", B.size(1),
+                  ").");
+
+  auto round_up = [](int x, int y) { return (x + y - 1) / y * y; };
+  int rounded_m = round_up(m, 128);
+  int rounded_n = round_up(n, 128);
+  // Since k is divisible by 32 (alignment), k / 16 is guaranteed to be an
+  // integer.
+  int rounded_k = round_up(k / 16, 4);
+
+  STD_TORCH_CHECK(A_sf.dim() == 2, "scale_a must be a matrix");
+  STD_TORCH_CHECK(B_sf.dim() == 2, "scale_b must be a matrix");
+  STD_TORCH_CHECK(A_sf.size(1) == B_sf.size(1),
+                  "scale_a and scale_b shapes cannot be multiplied (",
+                  A_sf.size(0), "x", A_sf.size(1), " and ", B_sf.size(0), "x",
+                  B_sf.size(1), ")");
+  STD_TORCH_CHECK(A_sf.size(0) == rounded_m && A_sf.size(1) == rounded_k,
+                  "scale_a must be padded and swizzled to a shape (", rounded_m,
+                  "x", rounded_k, "), but got a shape (", A_sf.size(0), "x",
+                  A_sf.size(1), ")");
+  STD_TORCH_CHECK(B_sf.size(0) == rounded_n && B_sf.size(1) == rounded_k,
+                  "scale_b must be padded and swizzled to a shape (", rounded_n,
+                  "x", rounded_k, "), but got a shape (", B_sf.size(0), "x",
+                  B_sf.size(1), ")");
+
+  auto out_dtype = D.scalar_type();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      A.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream(A.get_device_index());
+
+  if (out_dtype == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_fp4_gemm_dispatch<cutlass::bfloat16_t>(
+        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
+  } else if (out_dtype == torch::headeronly::ScalarType::Half) {
+    return cutlass_fp4_gemm_dispatch<cutlass::half_t>(D, A, B, A_sf, B_sf,
+                                                      alpha, m, n, k, stream);
+  } else {
+    STD_TORCH_CHECK(false, "Unsupported output data type of nvfp4 mm sm120 (",
+                    out_dtype, ")");
+  }
+#else
+  STD_TORCH_CHECK(false,
+                  "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to "
+                  "a CUTLASS 3.8 source directory to enable support.");
+#endif  // defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
+}
diff --git a/csrc/quantization/fp4/nvfp4_utils.cuh b/csrc/libtorch_stable/quantization/fp4/nvfp4_utils.cuh
similarity index 99%
rename from csrc/quantization/fp4/nvfp4_utils.cuh
rename to csrc/libtorch_stable/quantization/fp4/nvfp4_utils.cuh
index 0c04f010888d..590e4c06b62d 100644
--- a/csrc/quantization/fp4/nvfp4_utils.cuh
+++ b/csrc/libtorch_stable/quantization/fp4/nvfp4_utils.cuh
@@ -20,7 +20,7 @@
 #include <cuda_fp8.h>
 #include <utility>
 
-#include "../../cuda_vec_utils.cuh"
+#include "cuda_vec_utils.cuh"
 
 #if defined(NVFP4_ENABLE_ELTS16) && defined(CUDA_VERSION) && \
     CUDA_VERSION >= 12090
diff --git a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu b/csrc/libtorch_stable/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
similarity index 54%
rename from csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
rename to csrc/libtorch_stable/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
index 723ca8142b82..2152e64dc962 100644
--- a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
+++ b/csrc/libtorch_stable/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
@@ -1,6 +1,5 @@
 
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "../../torch_utils.h"
 
 #include "../../dispatch_utils.h"
 #include "layernorm_utils.cuh"
@@ -134,63 +133,71 @@ __global__ void rms_norm_per_block_quant_kernel(
 // Residual add + RMS norm + dynamic per token
 template <typename scalar_in_t>
 void rms_norm_dynamic_per_token_quant_dispatch(
-    torch::Tensor& out,           // [..., hidden_size]
-    torch::Tensor const& input,   // [..., hidden_size]
-    torch::Tensor const& weight,  // [hidden_size]
-    torch::Tensor& scales,        // [num_tokens]
-    double const var_epsilon,     // Variance epsilon used in norm calculation
-    std::optional<at::Tensor> const& scale_ub,
-    std::optional<at::Tensor>& residual) {
+    torch::stable::Tensor& out,           // [..., hidden_size]
+    torch::stable::Tensor const& input,   // [..., hidden_size]
+    torch::stable::Tensor const& weight,  // [hidden_size]
+    torch::stable::Tensor& scales,        // [num_tokens]
+    double const var_epsilon,  // Variance epsilon used in norm calculation
+    std::optional<torch::stable::Tensor> const& scale_ub,
+    std::optional<torch::stable::Tensor>& residual) {
   int32_t hidden_size = input.size(-1);
-  int32_t input_stride = input.view({-1, hidden_size}).stride(0);
+  int32_t input_stride =
+      torch::stable::view(input, {-1, hidden_size}).stride(0);
   auto num_tokens = input.numel() / hidden_size;
 
   dim3 grid(num_tokens);
   dim3 block(std::min(hidden_size, 1024));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
 
-  VLLM_DISPATCH_BOOL(residual.has_value(), has_residual, [&] {
-    VLLM_DISPATCH_QUANT_TYPES(
+  VLLM_STABLE_DISPATCH_BOOL(residual.has_value(), has_residual, [&] {
+    VLLM_STABLE_DISPATCH_QUANT_TYPES(
         out.scalar_type(), "rms_norm_dynamic_per_token_quant_kernel", [&] {
           vllm::rms_norm_dynamic_per_token_quant_kernel<scalar_in_t, scalar_t,
                                                         has_residual>
               <<<grid, block, 0, stream>>>(
-                  out.data_ptr<scalar_t>(), scales.data_ptr<float>(),
-                  input.data_ptr<scalar_in_t>(), weight.data_ptr<scalar_in_t>(),
-                  scale_ub.has_value() ? scale_ub->data_ptr<float>() : nullptr,
+                  out.mutable_data_ptr<scalar_t>(),
+                  scales.mutable_data_ptr<float>(),
+                  input.const_data_ptr<scalar_in_t>(),
+                  weight.const_data_ptr<scalar_in_t>(),
+                  scale_ub.has_value() ? scale_ub->const_data_ptr<float>()
+                                       : nullptr,
                   var_epsilon, hidden_size, input_stride,
-                  has_residual ? residual->data_ptr<scalar_in_t>() : nullptr);
+                  has_residual ? residual->mutable_data_ptr<scalar_in_t>()
+                               : nullptr);
         });
   });
 }
 
 void rms_norm_dynamic_per_token_quant(
-    torch::Tensor& out,           // [..., hidden_size]
-    torch::Tensor const& input,   // [..., hidden_size]
-    torch::Tensor const& weight,  // [hidden_size]
-    torch::Tensor& scales,        // [num_tokens]
-    double const var_epsilon,     // Variance epsilon used in norm calculation
-    std::optional<at::Tensor> scale_ub, std::optional<at::Tensor> residual) {
-  static c10::ScalarType kFp8Type = is_fp8_ocp()
-                                        ? c10::ScalarType::Float8_e4m3fn
-                                        : c10::ScalarType::Float8_e4m3fnuz;
-  TORCH_CHECK(out.dtype() == kFp8Type || out.dtype() == torch::kInt8);
-  TORCH_CHECK(out.is_contiguous());
-  TORCH_CHECK(input.stride(-1) == 1,
-              "Input must be contiguous in the last dimension");
+    torch::stable::Tensor& out,           // [..., hidden_size]
+    torch::stable::Tensor const& input,   // [..., hidden_size]
+    torch::stable::Tensor const& weight,  // [hidden_size]
+    torch::stable::Tensor& scales,        // [num_tokens]
+    double const var_epsilon,  // Variance epsilon used in norm calculation
+    std::optional<torch::stable::Tensor> scale_ub,
+    std::optional<torch::stable::Tensor> residual) {
+  static torch::headeronly::ScalarType kFp8Type =
+      is_fp8_ocp() ? torch::headeronly::ScalarType::Float8_e4m3fn
+                   : torch::headeronly::ScalarType::Float8_e4m3fnuz;
+  STD_TORCH_CHECK(out.scalar_type() == kFp8Type ||
+                  out.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(input.stride(-1) == 1,
+                  "Input must be contiguous in the last dimension");
 
   if (scale_ub.has_value()) {
-    TORCH_CHECK(out.dtype() == kFp8Type);
+    STD_TORCH_CHECK(out.scalar_type() == kFp8Type);
   }
-  TORCH_CHECK(weight.dtype() == input.dtype());
-  TORCH_CHECK(scales.dtype() == torch::kFloat32);
+  STD_TORCH_CHECK(weight.scalar_type() == input.scalar_type());
+  STD_TORCH_CHECK(scales.scalar_type() == torch::headeronly::ScalarType::Float);
   if (residual) {
-    TORCH_CHECK(residual->scalar_type() == input.scalar_type());
-    TORCH_CHECK(residual->is_contiguous());
+    STD_TORCH_CHECK(residual->scalar_type() == input.scalar_type());
+    STD_TORCH_CHECK(residual->is_contiguous());
   }
 
-  VLLM_DISPATCH_FLOATING_TYPES(
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "rms_norm_dynamic_per_token_quant_dispatch", [&] {
         rms_norm_dynamic_per_token_quant_dispatch<scalar_t>(
             out, input, weight, scales, var_epsilon, scale_ub, residual);
@@ -199,103 +206,115 @@ void rms_norm_dynamic_per_token_quant(
 
 // Residual add + RMS norm + dynamic per token
 void rms_norm_per_block_quant_dispatch(
-    torch::Tensor& out,           // [..., hidden_size]
-    torch::Tensor const& input,   // [..., hidden_size]
-    torch::Tensor const& weight,  // [hidden_size]
-    torch::Tensor& scales,        // [num_tokens, hidden_size / group_size] or
-                                  // [hidden_size / group_size, num_tokens]
+    torch::stable::Tensor& out,           // [..., hidden_size]
+    torch::stable::Tensor const& input,   // [..., hidden_size]
+    torch::stable::Tensor const& weight,  // [hidden_size]
+    torch::stable::Tensor& scales,        // [num_tokens, hidden_size /
+                                          // group_size] or
+                                          // [hidden_size / group_size,
+                                          // num_tokens]
     int32_t group_size,
     double const var_epsilon,  // Variance epsilon used in norm calculation
-    std::optional<at::Tensor> const& scale_ub,
-    std::optional<at::Tensor>& residual, bool is_scale_transposed) {
+    std::optional<torch::stable::Tensor> const& scale_ub,
+    std::optional<torch::stable::Tensor>& residual, bool is_scale_transposed) {
   int32_t hidden_size = input.size(-1);
-  int32_t input_stride = input.view({-1, hidden_size}).stride(0);
+  int32_t input_stride =
+      torch::stable::view(input, {-1, hidden_size}).stride(0);
 
-  TORCH_CHECK(hidden_size % 4 == 0,
-              "Hidden size must be divisible by 4 for vectorized access");
-  TORCH_CHECK(input_stride % 4 == 0,
-              "Input stride must be divisible by 4 for vectorized access");
-  TORCH_CHECK(group_size % 4 == 0,
-              "Group size must be divisible by 4 for vectorized access");
+  STD_TORCH_CHECK(hidden_size % 4 == 0,
+                  "Hidden size must be divisible by 4 for vectorized access");
+  STD_TORCH_CHECK(input_stride % 4 == 0,
+                  "Input stride must be divisible by 4 for vectorized access");
+  STD_TORCH_CHECK(group_size % 4 == 0,
+                  "Group size must be divisible by 4 for vectorized access");
 
   auto num_tokens = input.numel() / hidden_size;
 
   dim3 grid(num_tokens);
   const int max_block_size = (num_tokens <= 256) ? 512 : 256;
   dim3 block(std::min(hidden_size, max_block_size));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
 
-  VLLM_DISPATCH_FLOATING_TYPES(
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "rms_norm_per_block_quant_fp_dispatch", [&] {
         using scalar_in_t = scalar_t;
-        VLLM_DISPATCH_GROUP_SIZE(group_size, gs, [&] {
-          VLLM_DISPATCH_BOOL(residual.has_value(), has_residual, [&] {
-            VLLM_DISPATCH_BOOL(is_scale_transposed, transpose_scale, [&] {
-              VLLM_DISPATCH_QUANT_TYPES(
-                  out.scalar_type(), "rms_norm_per_block_quant_kernel", [&] {
-                    vllm::rms_norm_per_block_quant_kernel<scalar_in_t, scalar_t,
-                                                          has_residual,
-                                                          transpose_scale, gs>
-                        <<<grid, block, 0, stream>>>(
-                            out.data_ptr<scalar_t>(), scales.data_ptr<float>(),
-                            input.data_ptr<scalar_in_t>(),
-                            weight.data_ptr<scalar_in_t>(),
-                            scale_ub.has_value() ? scale_ub->data_ptr<float>()
-                                                 : nullptr,
+        VLLM_STABLE_DISPATCH_GROUP_SIZE(group_size, gs, [&] {
+          VLLM_STABLE_DISPATCH_BOOL(residual.has_value(), has_residual, [&] {
+            VLLM_STABLE_DISPATCH_BOOL(
+                is_scale_transposed, transpose_scale, [&] {
+                  VLLM_STABLE_DISPATCH_QUANT_TYPES(
+                      out.scalar_type(), "rms_norm_per_block_quant_kernel",
+                      [&] {
+                        vllm::rms_norm_per_block_quant_kernel<
+                            scalar_in_t, scalar_t, has_residual,
+                            transpose_scale, gs><<<grid, block, 0, stream>>>(
+                            out.mutable_data_ptr<scalar_t>(),
+                            scales.mutable_data_ptr<float>(),
+                            input.const_data_ptr<scalar_in_t>(),
+                            weight.const_data_ptr<scalar_in_t>(),
+                            scale_ub.has_value()
+                                ? scale_ub->const_data_ptr<float>()
+                                : nullptr,
                             var_epsilon, hidden_size, input_stride,
-                            has_residual ? residual->data_ptr<scalar_in_t>()
-                                         : nullptr,
+                            has_residual
+                                ? residual->mutable_data_ptr<scalar_in_t>()
+                                : nullptr,
                             scales.stride(1));
-                  });
-            });
+                      });
+                });
           });
         });
       });
 }
 
-void rms_norm_per_block_quant(torch::Tensor& out, torch::Tensor const& input,
-                              torch::Tensor const& weight,
-                              torch::Tensor& scales, double const var_epsilon,
-                              std::optional<torch::Tensor> scale_ub,
-                              std::optional<torch::Tensor> residual,
+void rms_norm_per_block_quant(torch::stable::Tensor& out,
+                              torch::stable::Tensor const& input,
+                              torch::stable::Tensor const& weight,
+                              torch::stable::Tensor& scales,
+                              double const var_epsilon,
+                              std::optional<torch::stable::Tensor> scale_ub,
+                              std::optional<torch::stable::Tensor> residual,
                               int64_t group_size, bool is_scale_transposed) {
-  static c10::ScalarType kFp8Type = is_fp8_ocp()
-                                        ? c10::ScalarType::Float8_e4m3fn
-                                        : c10::ScalarType::Float8_e4m3fnuz;
-  TORCH_CHECK(out.dtype() == kFp8Type || out.dtype() == torch::kInt8);
-  TORCH_CHECK(out.is_contiguous());
-  TORCH_CHECK(input.stride(-1) == 1,
-              "Input must be contiguous in the last dimension");
+  static torch::headeronly::ScalarType kFp8Type =
+      is_fp8_ocp() ? torch::headeronly::ScalarType::Float8_e4m3fn
+                   : torch::headeronly::ScalarType::Float8_e4m3fnuz;
+  STD_TORCH_CHECK(out.scalar_type() == kFp8Type ||
+                  out.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(input.stride(-1) == 1,
+                  "Input must be contiguous in the last dimension");
 
   if (scale_ub.has_value()) {
-    TORCH_CHECK(out.dtype() == kFp8Type);
+    STD_TORCH_CHECK(out.scalar_type() == kFp8Type);
   }
-  TORCH_CHECK(weight.dtype() == input.dtype());
-  TORCH_CHECK(scales.dtype() == torch::kFloat32);
+  STD_TORCH_CHECK(weight.scalar_type() == input.scalar_type());
+  STD_TORCH_CHECK(scales.scalar_type() == torch::headeronly::ScalarType::Float);
   if (residual) {
-    TORCH_CHECK(residual->scalar_type() == input.scalar_type());
-    TORCH_CHECK(residual->is_contiguous());
+    STD_TORCH_CHECK(residual->scalar_type() == input.scalar_type());
+    STD_TORCH_CHECK(residual->is_contiguous());
   }
 
-  TORCH_CHECK(group_size == 128 || group_size == 64,
-              "Unsupported group size: ", group_size);
+  STD_TORCH_CHECK(group_size == 128 || group_size == 64,
+                  "Unsupported group size: ", group_size);
 
   if (scales.stride(1) > 1) {
-    TORCH_CHECK(is_scale_transposed,
-                "Outer scale stride must be 1 when scales are not transposed");
+    STD_TORCH_CHECK(
+        is_scale_transposed,
+        "Outer scale stride must be 1 when scales are not transposed");
   }
 
   int64_t hidden_size = input.size(-1);
-  TORCH_CHECK(hidden_size > 0 && hidden_size % group_size == 0,
-              "hidden_size must be a positive multiple of group_size");
+  STD_TORCH_CHECK(hidden_size > 0 && hidden_size % group_size == 0,
+                  "hidden_size must be a positive multiple of group_size");
   int64_t num_tokens = input.numel() / hidden_size;
   int64_t num_groups = hidden_size / group_size;
-  TORCH_CHECK(scales.numel() >= num_tokens * num_groups,
-              "scales buffer too small: need ", num_tokens * num_groups,
-              " elements, got ", scales.numel());
+  STD_TORCH_CHECK(scales.numel() >= num_tokens * num_groups,
+                  "scales buffer too small: need ", num_tokens * num_groups,
+                  " elements, got ", scales.numel());
 
   rms_norm_per_block_quant_dispatch(out, input, weight, scales, group_size,
                                     var_epsilon, scale_ub, residual,
                                     is_scale_transposed);
-}
\ No newline at end of file
+}
diff --git a/csrc/quantization/fused_kernels/layernorm_utils.cuh b/csrc/libtorch_stable/quantization/fused_kernels/layernorm_utils.cuh
similarity index 99%
rename from csrc/quantization/fused_kernels/layernorm_utils.cuh
rename to csrc/libtorch_stable/quantization/fused_kernels/layernorm_utils.cuh
index 48b615ebdd95..290abedcf940 100644
--- a/csrc/quantization/fused_kernels/layernorm_utils.cuh
+++ b/csrc/libtorch_stable/quantization/fused_kernels/layernorm_utils.cuh
@@ -8,8 +8,8 @@
 #include "quantization/utils.cuh"
 #include "quant_conversions.cuh"
 
-#include "../../cub_helpers.h"
-#include "../../cuda_compat.h"
+#include "../../../cub_helpers.h"
+#include "../../../cuda_compat.h"
 
 namespace vllm {
 
diff --git a/csrc/quantization/fused_kernels/quant_conversions.cuh b/csrc/libtorch_stable/quantization/fused_kernels/quant_conversions.cuh
similarity index 98%
rename from csrc/quantization/fused_kernels/quant_conversions.cuh
rename to csrc/libtorch_stable/quantization/fused_kernels/quant_conversions.cuh
index 3711c47edc8c..dbe38092f956 100644
--- a/csrc/quantization/fused_kernels/quant_conversions.cuh
+++ b/csrc/libtorch_stable/quantization/fused_kernels/quant_conversions.cuh
@@ -6,7 +6,7 @@
 
 #include "libtorch_stable/quantization/vectorization.cuh"
 // TODO(luka/varun):refactor common.cuh to use this file instead
-#include "quantization/w8a8/fp8/common.cuh"
+#include "../../../quantization/w8a8/fp8/common.cuh"
 
 namespace vllm {
 
diff --git a/csrc/quantization/gguf/gguf_kernel.cu b/csrc/libtorch_stable/quantization/gguf/gguf_kernel.cu
similarity index 61%
rename from csrc/quantization/gguf/gguf_kernel.cu
rename to csrc/libtorch_stable/quantization/gguf/gguf_kernel.cu
index 76fe73e95040..0fdfcafab8c0 100644
--- a/csrc/quantization/gguf/gguf_kernel.cu
+++ b/csrc/libtorch_stable/quantization/gguf/gguf_kernel.cu
@@ -1,17 +1,20 @@
 #include <cuda_fp16.h>
 #include <cuda_runtime.h>
 
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "../../../cuda_compat.h"
+#include "../../dispatch_utils.h"
+#include "../../torch_utils.h"
 
-#include "../../cuda_compat.h"
-#include "dispatch_utils.h"
+#include <torch/csrc/stable/ops.h>
 
-#include "ggml-common.h"
-#include "vecdotq.cuh"
-#include "dequantize.cuh"
-#include "mmvq.cuh"
-#include "mmq.cuh"
+// NOTE: These headers are intentionally kept in csrc/quantization/gguf/ (not
+// moved to libtorch_stable) to avoid unnecessary reformatting that would break
+// git rename detection and pollute blame history.
+#include "../../../quantization/gguf/ggml-common.h"
+#include "../../../quantization/gguf/vecdotq.cuh"
+#include "../../../quantization/gguf/dequantize.cuh"
+#include "../../../quantization/gguf/mmvq.cuh"
+#include "../../../quantization/gguf/mmq.cuh"
 #include "moe.cuh"
 #include "moe_vec.cuh"
 
@@ -71,16 +74,17 @@ static void quantize_row_q8_1_cuda(const scalar_t* x, void* vy, const int kx,
   }
 }
 
-torch::Tensor ggml_dequantize(torch::Tensor W,  // quant weight
-                              int64_t type, int64_t m, int64_t n,
-                              std::optional<at::ScalarType> const& dtype) {
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(W));
-  auto dtype_ = dtype.value_or(torch::kFloat16);
-  auto options = torch::TensorOptions().dtype(dtype_).device(W.device());
-  at::Tensor DW = torch::empty({m, n}, options);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
+torch::stable::Tensor ggml_dequantize(
+    torch::stable::Tensor W,  // quant weight
+    int64_t type, int64_t m, int64_t n,
+    std::optional<torch::headeronly::ScalarType> const& dtype) {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      W.get_device_index());
+  auto dtype_ = dtype.value_or(torch::headeronly::ScalarType::Half);
+  auto DW = torch::stable::empty({m, n}, dtype_, std::nullopt, W.device());
+  cudaStream_t stream = get_current_cuda_stream();
 
-  VLLM_DISPATCH_FLOATING_TYPES(DW.scalar_type(), "ggml_dequantize", [&] {
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(DW.scalar_type(), "ggml_dequantize", [&] {
     auto to_cuda = ggml_get_to_cuda<scalar_t>(type);
     to_cuda((void*)W.data_ptr(), (scalar_t*)DW.data_ptr(), m * n, stream);
   });
@@ -88,135 +92,142 @@ torch::Tensor ggml_dequantize(torch::Tensor W,  // quant weight
   return DW;
 }
 
-torch::Tensor ggml_mul_mat_vec_a8(torch::Tensor W,  // quant weight
-                                  torch::Tensor X,  // input
-                                  int64_t type, int64_t row) {
+torch::stable::Tensor ggml_mul_mat_vec_a8(
+    torch::stable::Tensor W,  // quant weight
+    torch::stable::Tensor X,  // input
+    int64_t type, int64_t row) {
   int col = X.sizes()[1];
   int vecs = X.sizes()[0];
   const int padded = (col + 512 - 1) / 512 * 512;
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(X));
-  auto options = torch::TensorOptions().dtype(X.dtype()).device(W.device());
-  at::Tensor Y = torch::empty({vecs, row}, options);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
-  options = torch::TensorOptions().dtype(torch::kInt32).device(W.device());
-  at::Tensor quant_X = torch::empty({vecs, padded / 32 * 9}, options);
-  VLLM_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_mul_mat_vec_a8", [&] {
-    quantize_row_q8_1_cuda<scalar_t>(
-        (scalar_t*)X.data_ptr(), (void*)quant_X.data_ptr(), col, vecs, stream);
-    switch (type) {
-      case 2:
-        mul_mat_vec_q4_0_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 3:
-        mul_mat_vec_q4_1_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 6:
-        mul_mat_vec_q5_0_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 7:
-        mul_mat_vec_q5_1_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 8:
-        mul_mat_vec_q8_0_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 10:
-        mul_mat_vec_q2_K_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 11:
-        mul_mat_vec_q3_K_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 12:
-        mul_mat_vec_q4_K_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 13:
-        mul_mat_vec_q5_K_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 14:
-        mul_mat_vec_q6_K_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 16:
-        mul_mat_vec_iq2_xxs_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 17:
-        mul_mat_vec_iq2_xs_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 18:
-        mul_mat_vec_iq3_xxs_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 19:
-        mul_mat_vec_iq1_s_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 20:
-        mul_mat_vec_iq4_nl_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 21:
-        mul_mat_vec_iq3_s_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 22:
-        mul_mat_vec_iq2_s_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 23:
-        mul_mat_vec_iq4_xs_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-      case 29:
-        mul_mat_vec_iq1_m_q8_1_cuda<scalar_t>(
-            (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
-            (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
-        break;
-    }
-  });
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      X.get_device_index());
+  auto Y = torch::stable::empty({vecs, row}, X.scalar_type(), std::nullopt,
+                                W.device());
+  cudaStream_t stream = get_current_cuda_stream();
+  auto quant_X = torch::stable::empty({vecs, padded / 32 * 9},
+                                      torch::headeronly::ScalarType::Int,
+                                      std::nullopt, W.device());
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
+      X.scalar_type(), "ggml_mul_mat_vec_a8", [&] {
+        quantize_row_q8_1_cuda<scalar_t>((scalar_t*)X.data_ptr(),
+                                         (void*)quant_X.data_ptr(), col, vecs,
+                                         stream);
+        switch (type) {
+          case 2:
+            mul_mat_vec_q4_0_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 3:
+            mul_mat_vec_q4_1_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 6:
+            mul_mat_vec_q5_0_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 7:
+            mul_mat_vec_q5_1_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 8:
+            mul_mat_vec_q8_0_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 10:
+            mul_mat_vec_q2_K_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 11:
+            mul_mat_vec_q3_K_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 12:
+            mul_mat_vec_q4_K_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 13:
+            mul_mat_vec_q5_K_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 14:
+            mul_mat_vec_q6_K_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 16:
+            mul_mat_vec_iq2_xxs_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 17:
+            mul_mat_vec_iq2_xs_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 18:
+            mul_mat_vec_iq3_xxs_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 19:
+            mul_mat_vec_iq1_s_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 20:
+            mul_mat_vec_iq4_nl_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 21:
+            mul_mat_vec_iq3_s_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 22:
+            mul_mat_vec_iq2_s_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 23:
+            mul_mat_vec_iq4_xs_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+          case 29:
+            mul_mat_vec_iq1_m_q8_1_cuda<scalar_t>(
+                (void*)W.data_ptr(), (void*)quant_X.data_ptr(),
+                (scalar_t*)Y.data_ptr(), col, row, vecs, stream);
+            break;
+        }
+      });
   return Y;
 }
 
-torch::Tensor ggml_mul_mat_a8(torch::Tensor W,  // quant weight
-                              torch::Tensor X,  // input
-                              int64_t type, int64_t row) {
+torch::stable::Tensor ggml_mul_mat_a8(torch::stable::Tensor W,  // quant weight
+                                      torch::stable::Tensor X,  // input
+                                      int64_t type, int64_t row) {
   int col = X.sizes()[1];
   int padded = (col + 512 - 1) / 512 * 512;
   int batch = X.sizes()[0];
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(X));
-  auto options = torch::TensorOptions().dtype(X.dtype()).device(W.device());
-  at::Tensor Y = torch::empty({batch, row}, options);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
-  options = torch::TensorOptions().dtype(torch::kInt32).device(W.device());
-  at::Tensor quant_X = torch::empty({batch, padded / 32 * 9}, options);
-  VLLM_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_mul_mat_a8", [&] {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      X.get_device_index());
+  auto Y = torch::stable::empty({batch, row}, X.scalar_type(), std::nullopt,
+                                W.device());
+  cudaStream_t stream = get_current_cuda_stream();
+  auto quant_X = torch::stable::empty({batch, padded / 32 * 9},
+                                      torch::headeronly::ScalarType::Int,
+                                      std::nullopt, W.device());
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_mul_mat_a8", [&] {
     quantize_row_q8_1_cuda((scalar_t*)X.data_ptr(), (void*)quant_X.data_ptr(),
                            col, batch, stream);
 
@@ -276,21 +287,24 @@ torch::Tensor ggml_mul_mat_a8(torch::Tensor W,  // quant weight
   return Y;
 }
 
-torch::Tensor ggml_moe_a8(torch::Tensor X,  // input
-                          torch::Tensor W,  // expert weights
-                          torch::Tensor sorted_token_ids,
-                          torch::Tensor expert_ids,
-                          torch::Tensor num_tokens_post_padded, int64_t type,
-                          int64_t row, int64_t top_k, int64_t tokens) {
+torch::stable::Tensor ggml_moe_a8(torch::stable::Tensor X,  // input
+                                  torch::stable::Tensor W,  // expert weights
+                                  torch::stable::Tensor sorted_token_ids,
+                                  torch::stable::Tensor expert_ids,
+                                  torch::stable::Tensor num_tokens_post_padded,
+                                  int64_t type, int64_t row, int64_t top_k,
+                                  int64_t tokens) {
   int col = X.sizes()[1];
   int padded = (col + 512 - 1) / 512 * 512;
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(X));
-  auto options = torch::TensorOptions().dtype(X.dtype()).device(W.device());
-  at::Tensor Y = torch::empty({tokens * top_k, row}, options);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
-  options = torch::TensorOptions().dtype(torch::kInt32).device(W.device());
-  at::Tensor quant_X = torch::empty({tokens, padded / 32 * 9}, options);
-  VLLM_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_moe_a8", [&] {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      X.get_device_index());
+  auto Y = torch::stable::empty({tokens * top_k, row}, X.scalar_type(),
+                                std::nullopt, W.device());
+  cudaStream_t stream = get_current_cuda_stream();
+  auto quant_X = torch::stable::empty({tokens, padded / 32 * 9},
+                                      torch::headeronly::ScalarType::Int,
+                                      std::nullopt, W.device());
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_moe_a8", [&] {
     quantize_row_q8_1_cuda((scalar_t*)X.data_ptr(), (void*)quant_X.data_ptr(),
                            col, tokens, stream);
     switch (type) {
@@ -379,19 +393,23 @@ torch::Tensor ggml_moe_a8(torch::Tensor X,  // input
   return Y;
 }
 
-torch::Tensor ggml_moe_a8_vec(torch::Tensor X,  // input
-                              torch::Tensor W,  // expert weights
-                              torch::Tensor topk_ids, int64_t top_k,
-                              int64_t type, int64_t row, int64_t tokens) {
+torch::stable::Tensor ggml_moe_a8_vec(
+    torch::stable::Tensor X,  // input
+    torch::stable::Tensor W,  // expert weights
+    torch::stable::Tensor topk_ids, int64_t top_k, int64_t type, int64_t row,
+    int64_t tokens) {
   int col = X.sizes()[1];
   const int padded = (col + 512 - 1) / 512 * 512;
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(X));
-  auto options = torch::TensorOptions().dtype(X.dtype()).device(W.device());
-  at::Tensor Y = torch::zeros({tokens * top_k, row}, options);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
-  options = torch::TensorOptions().dtype(torch::kInt32).device(W.device());
-  at::Tensor quant_X = torch::empty({tokens, padded / 32 * 9}, options);
-  VLLM_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_moe_vec_a8", [&] {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      X.get_device_index());
+  auto Y = torch::stable::empty({tokens * top_k, row}, X.scalar_type(),
+                                std::nullopt, W.device());
+  torch::stable::fill_(Y, 0.0);
+  cudaStream_t stream = get_current_cuda_stream();
+  auto quant_X = torch::stable::empty({tokens, padded / 32 * 9},
+                                      torch::headeronly::ScalarType::Int,
+                                      std::nullopt, W.device());
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(X.scalar_type(), "ggml_moe_vec_a8", [&] {
     quantize_row_q8_1_cuda<scalar_t>((scalar_t*)X.data_ptr(),
                                      (void*)quant_X.data_ptr(), col, tokens,
                                      stream);
diff --git a/csrc/quantization/gguf/moe.cuh b/csrc/libtorch_stable/quantization/gguf/moe.cuh
similarity index 99%
rename from csrc/quantization/gguf/moe.cuh
rename to csrc/libtorch_stable/quantization/gguf/moe.cuh
index df9b84abcc13..a2f9f46c8f89 100644
--- a/csrc/quantization/gguf/moe.cuh
+++ b/csrc/libtorch_stable/quantization/gguf/moe.cuh
@@ -1,7 +1,7 @@
 #include <cstdint>
 
 /* Adapted from ./csrc/quantization/gguf/mmq.cuh
-   based on ./vllm/model_executor/layers/fused_moe/fused_moe.py */
+   based on ./vllm/model_executor/layers/fused_moe/experts/triton_moe.py */
 template <typename scalar_t, int qk, int qr, int qi, bool need_sum,
           typename block_q_t, int mmq_x, int mmq_y, int nwarps,
           allocate_tiles_cuda_t allocate_tiles, load_tiles_cuda_t load_tiles,
diff --git a/csrc/quantization/gguf/moe_vec.cuh b/csrc/libtorch_stable/quantization/gguf/moe_vec.cuh
similarity index 100%
rename from csrc/quantization/gguf/moe_vec.cuh
rename to csrc/libtorch_stable/quantization/gguf/moe_vec.cuh
diff --git a/csrc/quantization/gptq/compat.cuh b/csrc/libtorch_stable/quantization/gptq/compat.cuh
similarity index 93%
rename from csrc/quantization/gptq/compat.cuh
rename to csrc/libtorch_stable/quantization/gptq/compat.cuh
index 1b3fb3d39103..f95f3fd95860 100644
--- a/csrc/quantization/gptq/compat.cuh
+++ b/csrc/libtorch_stable/quantization/gptq/compat.cuh
@@ -42,8 +42,8 @@ __device__ __forceinline__ void atomicAdd_half2(half2* address, half2 val) {
 }
 
 //
-
-#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
+#if defined(__CUDA_ARCH__) || \
+    (defined(USE_ROCM) && (HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR) < 713)
   #if __CUDA_ARCH__ < 700 || defined(USE_ROCM)
 
 __device__ __forceinline__ void atomicAdd(half* address, half val) {
diff --git a/csrc/quantization/gptq/matrix_view.cuh b/csrc/libtorch_stable/quantization/gptq/matrix_view.cuh
similarity index 100%
rename from csrc/quantization/gptq/matrix_view.cuh
rename to csrc/libtorch_stable/quantization/gptq/matrix_view.cuh
diff --git a/csrc/quantization/gptq/q_gemm.cu b/csrc/libtorch_stable/quantization/gptq/q_gemm.cu
similarity index 97%
rename from csrc/quantization/gptq/q_gemm.cu
rename to csrc/libtorch_stable/quantization/gptq/q_gemm.cu
index 8a29ad5ab2dd..e3f79c5a6b8e 100644
--- a/csrc/quantization/gptq/q_gemm.cu
+++ b/csrc/libtorch_stable/quantization/gptq/q_gemm.cu
@@ -6,9 +6,8 @@ https://github.com/qwopqwop200/GPTQ-for-LLaMa
 #include <cstdint>
 #include <cstdio>
 
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAContext.h>
+#include "../../torch_utils.h"
+#include <torch/csrc/stable/ops.h>
 #include <cuda_runtime.h>
 #include <cuda_fp16.h>
 
@@ -735,7 +734,7 @@ void gemm_half_q_half_cuda_part(const half* a, const uint32_t* b_q_weight,
   fp_gemm_half_q_half_gptq_kernel kernel =
       pick_gemm_half_q_half_gptq_kernel(true, m_count, bit);
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   kernel<<<gridDim, blockDim, 0, stream>>>(
       a, b_q_weight, b_gptq_qzeros, b_gptq_scales, c, size_m, size_n, size_k,
       groups, use_v2_format, b_q_perm);
@@ -1164,7 +1163,7 @@ void reconstruct_exllama(const uint32_t* b_q_weight,
     reconstruct_exllama_kernel = reconstruct_exllama_8bit_kernel;
   }
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   reconstruct_exllama_kernel<<<gridDim, blockDim, 0, stream>>>(
       b_q_weight, b_q_perm, b_gptq_qzeros, b_gptq_scales, height, width, groups,
       use_v2_format, out);
@@ -1376,7 +1375,7 @@ void gemm_half_q_half_alt(const half* a, const uint32_t* b_q_weight,
     kernel = gemm_half_q_half_alt_8bit_kernel;
   }
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   kernel<<<gridDim, blockDim, 0, stream>>>(
       (const half2*)a, b_q_weight, c, b_gptq_scales, b_gptq_qzeros, b_g_idx,
       size_m, size_k / 32 * bit, size_n, use_v2_format);
@@ -1485,7 +1484,7 @@ void reconstruct_gptq(const uint32_t* b_q_weight, const uint32_t* b_gptq_qzeros,
     gridDim.y = DIVIDE(height, 32);
   }
 
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   kernel<<<gridDim, blockDim, 0, stream>>>(b_q_weight, b_gptq_scales,
                                            b_gptq_qzeros, b_g_idx, height,
                                            width, groups, use_v2_format, out);
@@ -1794,7 +1793,7 @@ void shuffle_exllama_weight(uint32_t* q_weight, int* q_perm, int height,
     } else if (bit == 8) {
       kernel = make_sequential_8bit_kernel;
     }
-    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    const cudaStream_t stream = get_current_cuda_stream();
     kernel<<<gridDim, blockDim, 0, stream>>>(q_weight, new_qweight, q_perm,
                                              width);
     // Replace qweights
@@ -1818,29 +1817,34 @@ void shuffle_exllama_weight(uint32_t* q_weight, int* q_perm, int height,
   } else if (bit == 8) {
     shuffle_kernel = shuffle_8bit_kernel;
   }
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const cudaStream_t stream = get_current_cuda_stream();
   shuffle_kernel<<<gridDim, blockDim, 0, stream>>>(q_weight, height, width);
 }
 
 }  // namespace gptq
 }  // namespace vllm
 
-torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
-                        torch::Tensor b_gptq_qzeros,
-                        torch::Tensor b_gptq_scales, torch::Tensor b_g_idx,
-                        bool use_exllama, bool use_v2_format, int64_t bit) {
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
-  auto options = torch::TensorOptions().dtype(a.dtype()).device(a.device());
-  at::Tensor c = torch::zeros({a.size(0), b_q_weight.size(1)}, options);
-  at::Tensor temp_dq = torch::empty(
-      {b_q_weight.size(0) * 32 / bit, b_q_weight.size(1)}, options);
+torch::stable::Tensor gptq_gemm(torch::stable::Tensor a,
+                                torch::stable::Tensor b_q_weight,
+                                torch::stable::Tensor b_gptq_qzeros,
+                                torch::stable::Tensor b_gptq_scales,
+                                torch::stable::Tensor b_g_idx, bool use_exllama,
+                                bool use_v2_format, int64_t bit) {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      a.get_device_index());
+  auto c = torch::stable::new_zeros(a, {a.size(0), b_q_weight.size(1)});
+  auto temp_dq =
+      torch::stable::empty({b_q_weight.size(0) * 32 / bit, b_q_weight.size(1)},
+                           a.scalar_type(), std::nullopt, a.device());
 
   vllm::gptq::gemm_half_q_half_cuda(
-      at::cuda::getCurrentCUDABlasHandle(), (const half*)a.data_ptr(),
+      get_current_cuda_blas_handle(), (const half*)a.data_ptr(),
       (const uint32_t*)b_q_weight.data_ptr(),
       (const uint32_t*)b_gptq_qzeros.data_ptr(),
       (const half*)b_gptq_scales.data_ptr(),
-      b_g_idx.device().is_meta() ? NULL : (const int*)b_g_idx.data_ptr(),
+      b_g_idx.device().type() == torch::stable::DeviceType::Meta
+          ? NULL
+          : (const int*)b_g_idx.data_ptr(),
       (half*)c.data_ptr(), (half*)temp_dq.data_ptr(),
       c.size(0),              // m
       c.size(1),              // n
@@ -1850,11 +1854,14 @@ torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
   return c;
 }
 
-void gptq_shuffle(torch::Tensor q_weight, torch::Tensor q_perm, int64_t bit) {
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(q_weight));
+void gptq_shuffle(torch::stable::Tensor q_weight, torch::stable::Tensor q_perm,
+                  int64_t bit) {
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      q_weight.get_device_index());
   vllm::gptq::shuffle_exllama_weight(
       (uint32_t*)q_weight.data_ptr(),
-      q_perm.device().is_meta() || q_perm.numel() == 0
+      q_perm.device().type() == torch::stable::DeviceType::Meta ||
+              q_perm.numel() == 0
           ? NULL
           : (int*)q_perm.data_ptr(),
       q_weight.size(0) * 32 / bit, q_weight.size(1), bit);
diff --git a/csrc/quantization/gptq/qdq_2.cuh b/csrc/libtorch_stable/quantization/gptq/qdq_2.cuh
similarity index 100%
rename from csrc/quantization/gptq/qdq_2.cuh
rename to csrc/libtorch_stable/quantization/gptq/qdq_2.cuh
diff --git a/csrc/quantization/gptq/qdq_3.cuh b/csrc/libtorch_stable/quantization/gptq/qdq_3.cuh
similarity index 100%
rename from csrc/quantization/gptq/qdq_3.cuh
rename to csrc/libtorch_stable/quantization/gptq/qdq_3.cuh
diff --git a/csrc/quantization/gptq/qdq_4.cuh b/csrc/libtorch_stable/quantization/gptq/qdq_4.cuh
similarity index 100%
rename from csrc/quantization/gptq/qdq_4.cuh
rename to csrc/libtorch_stable/quantization/gptq/qdq_4.cuh
diff --git a/csrc/quantization/gptq/qdq_8.cuh b/csrc/libtorch_stable/quantization/gptq/qdq_8.cuh
similarity index 100%
rename from csrc/quantization/gptq/qdq_8.cuh
rename to csrc/libtorch_stable/quantization/gptq/qdq_8.cuh
diff --git a/csrc/quantization/gptq/qdq_util.cuh b/csrc/libtorch_stable/quantization/gptq/qdq_util.cuh
similarity index 100%
rename from csrc/quantization/gptq/qdq_util.cuh
rename to csrc/libtorch_stable/quantization/gptq/qdq_util.cuh
diff --git a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
similarity index 92%
rename from csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
rename to csrc/libtorch_stable/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
index e306ff02605b..96dc3ecfc860 100644
--- a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
+++ b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
@@ -1,20 +1,28 @@
 #include "allspark_utils.cuh"
-#include <torch/all.h>
-#include "core/registration.h"
+
+#include <torch/csrc/stable/c/shim.h>
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
+
 #include <cublas_v2.h>
 
-at::Tensor as_g_workspace;
+#include "core/registration.h"
+#include "libtorch_stable/torch_utils.h"
+
+torch::stable::Tensor as_g_workspace;
 
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
 
-torch::Tensor allspark_w8a16_gemm(
-    torch::Tensor const& a, torch::Tensor const& b_qweight,
-    torch::Tensor const& b_scales, std::optional<torch::Tensor> const& b_qzeros,
-    int64_t n, int64_t group_size, int64_t sm_count, int64_t sm_version,
+torch::stable::Tensor allspark_w8a16_gemm(
+    torch::stable::Tensor const& a, torch::stable::Tensor const& b_qweight,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& b_qzeros, int64_t n,
+    int64_t group_size, int64_t sm_count, int64_t sm_version,
     int64_t CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) {
-  TORCH_CHECK_NOT_IMPLEMENTED(
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
       false, "allspark_w8a16_gemm(..) requires CUDA_ARCH >= 8.0");
-  return torch::empty({1, 1});
+  return torch::stable::empty({1, 1});
 }
 
 #else
@@ -848,8 +856,8 @@ void restore_N32_K16_dequantize_rhs_w8a16(const QT* qdata, const FT* scales,
                                           const int N_32align, const int N,
                                           const int K, const int GroupSize,
                                           cudaStream_t stream) {
-  TORCH_CHECK(N % 8 == 0 && K % 16 == 0 && N_32align % 32 == 0,
-              "Unsupported shape");
+  STD_TORCH_CHECK(N % 8 == 0 && K % 16 == 0 && N_32align % 32 == 0,
+                  "Unsupported shape");
   if (GroupSize == -1) {
     const int BLOCK = 128;
     dim3 grid(N_32align / 32, ((K / 16) + 3) / 4);
@@ -859,7 +867,7 @@ void restore_N32_K16_dequantize_rhs_w8a16(const QT* qdata, const FT* scales,
   }
   // TODO: Support SubChannel
   else {
-    TORCH_CHECK(false, "Now only support PerChannel");
+    STD_TORCH_CHECK(false, "Now only support PerChannel");
   }
 }
 
@@ -916,24 +924,27 @@ void allspark_qgemm_w8a16_perc_ampere(
 
 }  // namespace allspark
 
-torch::Tensor allspark_w8a16_gemm(
-    torch::Tensor const& a, torch::Tensor const& b_qweight,
-    torch::Tensor const& b_scales, std::optional<torch::Tensor> const& b_qzeros,
-    int64_t n, int64_t group_size, int64_t sm_count, int64_t sm_version,
+torch::stable::Tensor allspark_w8a16_gemm(
+    torch::stable::Tensor const& a, torch::stable::Tensor const& b_qweight,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& b_qzeros, int64_t n,
+    int64_t group_size, int64_t sm_count, int64_t sm_version,
     int64_t CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) {
   // Verify device and strides
-  TORCH_CHECK(a.device().is_cuda(), "A is not on GPU");
-  TORCH_CHECK(a.is_contiguous(), "A is not contiguous");
+  STD_TORCH_CHECK(a.device().is_cuda(), "A is not on GPU");
+  STD_TORCH_CHECK(a.is_contiguous(), "A is not contiguous");
 
-  TORCH_CHECK(b_qweight.device().is_cuda(), "b_qweight is not on GPU");
-  TORCH_CHECK(b_qweight.is_contiguous(), "b_qweight is not contiguous");
+  STD_TORCH_CHECK(b_qweight.device().is_cuda(), "b_qweight is not on GPU");
+  STD_TORCH_CHECK(b_qweight.is_contiguous(), "b_qweight is not contiguous");
 
-  TORCH_CHECK(b_scales.device().is_cuda(), "b_scales is not on GPU");
-  TORCH_CHECK(b_scales.is_contiguous(), "b_scales is not contiguous");
+  STD_TORCH_CHECK(b_scales.device().is_cuda(), "b_scales is not on GPU");
+  STD_TORCH_CHECK(b_scales.is_contiguous(), "b_scales is not contiguous");
 
   if (has_zp) {
-    TORCH_CHECK(b_qzeros.value().device().is_cuda(), "b_qzeros is not on GPU");
-    TORCH_CHECK(b_qzeros.value().is_contiguous(), "b_qzeros is not contiguous");
+    STD_TORCH_CHECK(b_qzeros.value().device().is_cuda(),
+                    "b_qzeros is not on GPU");
+    STD_TORCH_CHECK(b_qzeros.value().is_contiguous(),
+                    "b_qzeros is not contiguous");
   }
 
   int m = a.size(0);
@@ -941,16 +952,17 @@ torch::Tensor allspark_w8a16_gemm(
   int k = a.size(1);
 
   // Verify shape
-  TORCH_CHECK(b_qweight.size(0) == n_32align,
-              "Shape mismatch: b_qweight.size(0) = ", b_qweight.size(0),
-              ", n_32align = ", n_32align);
-  TORCH_CHECK(b_qweight.size(1) == k,
-              "Shape mismatch: b_qweight.size(1) = ", b_qweight.size(1),
-              ", k = ", k);
+  STD_TORCH_CHECK(b_qweight.size(0) == n_32align,
+                  "Shape mismatch: b_qweight.size(0) = ", b_qweight.size(0),
+                  ", n_32align = ", n_32align);
+  STD_TORCH_CHECK(b_qweight.size(1) == k,
+                  "Shape mismatch: b_qweight.size(1) = ", b_qweight.size(1),
+                  ", k = ", k);
 
-  TORCH_CHECK(group_size == -1, "Currently only supports group_size = -1");
+  STD_TORCH_CHECK(group_size == -1, "Currently only supports group_size = -1");
 
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      a.get_device_index());
   const void* a_ptr = reinterpret_cast<const void*>(a.data_ptr());
   const uint8_t* b_ptr = reinterpret_cast<const uint8_t*>(b_qweight.data_ptr());
   const void* b_scale_ptr = reinterpret_cast<const void*>(b_scales.data_ptr());
@@ -959,12 +971,12 @@ torch::Tensor allspark_w8a16_gemm(
     b_zero_ptr = reinterpret_cast<const void*>(b_qzeros.value().data_ptr());
   }
 
-  auto c_options = torch::TensorOptions().dtype(a.dtype()).device(a.device());
-  torch::Tensor c = torch::empty({m, n}, c_options);
-  void* c_ptr = reinterpret_cast<void*>(c.data_ptr());
+  auto c =
+      torch::stable::empty({m, n}, a.scalar_type(), std::nullopt, a.device());
+  void* c_ptr = reinterpret_cast<void*>(c.mutable_data_ptr());
 
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
+  cudaStream_t stream = get_current_cuda_stream();
+  cublasHandle_t handle = get_current_cuda_blas_handle();
 
   allspark::BlockTileSplitkParams fused_gemm_params;
 
@@ -976,14 +988,15 @@ torch::Tensor allspark_w8a16_gemm(
         m, n, k, sm_count, fused_gemm_params);
   }
 
-  auto ws_options = torch::TensorOptions().dtype(at::kChar).device(a.device());
   if (as_g_workspace.numel() <
       ws_size) {  // ws_options: kChar, so numel() is bytes
-    as_g_workspace = torch::empty({long(ws_size)}, ws_options);
+    as_g_workspace = torch::stable::empty({static_cast<int64_t>(ws_size)},
+                                          torch::headeronly::ScalarType::Char,
+                                          std::nullopt, a.device());
   }
   void* ws = reinterpret_cast<void*>(as_g_workspace.data_ptr());
 
-  if (a.dtype() == at::ScalarType::Half) {
+  if (a.scalar_type() == torch::headeronly::ScalarType::Half) {
     allspark::allspark_qgemm_w8a16_perc_ampere<__half, uint8_t>(
         reinterpret_cast<const __half*>(a_ptr), b_ptr,
         reinterpret_cast<const __half*>(b_scale_ptr),
@@ -991,7 +1004,7 @@ torch::Tensor allspark_w8a16_gemm(
         reinterpret_cast<__half*>(c_ptr), m, n_32align, n, k, ws,
         fused_gemm_params, group_size, CUBLAS_M_THRESHOLD, sm_version, stream,
         handle);
-  } else if (a.dtype() == at::ScalarType::BFloat16) {
+  } else if (a.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     allspark::allspark_qgemm_w8a16_perc_ampere<__nv_bfloat16, uint8_t>(
         reinterpret_cast<const __nv_bfloat16*>(a_ptr), b_ptr,
         reinterpret_cast<const __nv_bfloat16*>(b_scale_ptr),
@@ -1006,6 +1019,6 @@ torch::Tensor allspark_w8a16_gemm(
 
 #endif
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-  m.impl("allspark_w8a16_gemm", &allspark_w8a16_gemm);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+  m.impl("allspark_w8a16_gemm", TORCH_BOX(&allspark_w8a16_gemm));
 }
diff --git a/csrc/quantization/gptq_allspark/allspark_repack.cu b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_repack.cu
similarity index 67%
rename from csrc/quantization/gptq_allspark/allspark_repack.cu
rename to csrc/libtorch_stable/quantization/gptq_allspark/allspark_repack.cu
index 7a5b2f95cc2e..b325d30a041a 100644
--- a/csrc/quantization/gptq_allspark/allspark_repack.cu
+++ b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_repack.cu
@@ -1,6 +1,11 @@
 #include "allspark_utils.cuh"
-#include <torch/all.h>
+
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
+
 #include "core/registration.h"
+#include "libtorch_stable/torch_utils.h"
 
 namespace allspark {
 
@@ -99,36 +104,40 @@ void rearrange_kn_weight_as_n32k16_order_ldg16(
 }  // namespace allspark
 
 void rearrange_kn_weight_as_n32k16_order(
-    torch::Tensor const& b_qweight, torch::Tensor const& b_scales,
-    std::optional<torch::Tensor> const& b_zeros, bool has_zp,
-    torch::Tensor& b_qweight_reorder, torch::Tensor& b_scales_reorder,
-    std::optional<torch::Tensor> const& b_zeros_reorder, const int64_t K,
-    const int64_t N, const int64_t N_32align) {
+    torch::stable::Tensor const& b_qweight,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& b_zeros, bool has_zp,
+    torch::stable::Tensor& b_qweight_reorder,
+    torch::stable::Tensor& b_scales_reorder,
+    std::optional<torch::stable::Tensor> const& b_zeros_reorder,
+    const int64_t K, const int64_t N, const int64_t N_32align) {
   // Verify device and strides
-  TORCH_CHECK(b_qweight.device().is_cuda(), "b_qweight is not on GPU");
-  TORCH_CHECK(b_qweight.is_contiguous(), "b_qweight is not contiguous");
+  STD_TORCH_CHECK(b_qweight.device().is_cuda(), "b_qweight is not on GPU");
+  STD_TORCH_CHECK(b_qweight.is_contiguous(), "b_qweight is not contiguous");
 
-  TORCH_CHECK(b_scales.device().is_cuda(), "b_scales is not on GPU");
-  TORCH_CHECK(b_scales.is_contiguous(), "b_scales is not contiguous");
+  STD_TORCH_CHECK(b_scales.device().is_cuda(), "b_scales is not on GPU");
+  STD_TORCH_CHECK(b_scales.is_contiguous(), "b_scales is not contiguous");
 
-  TORCH_CHECK(b_qweight_reorder.device().is_cuda(),
-              "b_qweight_reorder is not on GPU");
-  TORCH_CHECK(b_qweight_reorder.is_contiguous(),
-              "b_qweight_reorder is not contiguous");
+  STD_TORCH_CHECK(b_qweight_reorder.device().is_cuda(),
+                  "b_qweight_reorder is not on GPU");
+  STD_TORCH_CHECK(b_qweight_reorder.is_contiguous(),
+                  "b_qweight_reorder is not contiguous");
 
-  TORCH_CHECK(b_scales_reorder.device().is_cuda(),
-              "b_scales_reorder is not on GPU");
-  TORCH_CHECK(b_scales_reorder.is_contiguous(),
-              "b_scales_reorder is not contiguous");
+  STD_TORCH_CHECK(b_scales_reorder.device().is_cuda(),
+                  "b_scales_reorder is not on GPU");
+  STD_TORCH_CHECK(b_scales_reorder.is_contiguous(),
+                  "b_scales_reorder is not contiguous");
 
   if (has_zp) {
-    TORCH_CHECK(b_zeros.value().device().is_cuda(), "b_zeros is not on GPU");
-    TORCH_CHECK(b_zeros.value().is_contiguous(), "b_zeros is not contiguous");
-
-    TORCH_CHECK(b_zeros_reorder.value().device().is_cuda(),
-                "b_zeros_reorder is not on GPU");
-    TORCH_CHECK(b_zeros_reorder.value().is_contiguous(),
-                "b_zeros_reorder is not contiguous");
+    STD_TORCH_CHECK(b_zeros.value().device().is_cuda(),
+                    "b_zeros is not on GPU");
+    STD_TORCH_CHECK(b_zeros.value().is_contiguous(),
+                    "b_zeros is not contiguous");
+
+    STD_TORCH_CHECK(b_zeros_reorder.value().device().is_cuda(),
+                    "b_zeros_reorder is not on GPU");
+    STD_TORCH_CHECK(b_zeros_reorder.value().is_contiguous(),
+                    "b_zeros_reorder is not contiguous");
   }
 
   const uint8_t* matB = reinterpret_cast<const uint8_t*>(b_qweight.data_ptr());
@@ -136,18 +145,20 @@ void rearrange_kn_weight_as_n32k16_order(
   const void* b_zero = has_zp ? b_zeros.value().data_ptr() : nullptr;
 
   uint8_t* matB_reorder =
-      reinterpret_cast<uint8_t*>(b_qweight_reorder.data_ptr());
-  void* b_scale_reorder = b_scales_reorder.data_ptr();
-  void* b_zero_reorder = has_zp ? b_zeros_reorder.value().data_ptr() : nullptr;
+      reinterpret_cast<uint8_t*>(b_qweight_reorder.mutable_data_ptr());
+  void* b_scale_reorder = b_scales_reorder.mutable_data_ptr();
+  void* b_zero_reorder =
+      has_zp ? b_zeros_reorder.value().mutable_data_ptr() : nullptr;
 
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  if (b_scales.dtype() == at::ScalarType::Half) {
+  cudaStream_t stream = get_current_cuda_stream();
+  if (b_scales.scalar_type() == torch::headeronly::ScalarType::Half) {
     allspark::rearrange_kn_weight_as_n32k16_order_ldg16<__half>(
         matB, reinterpret_cast<const __half*>(b_scale),
         reinterpret_cast<const __half*>(b_zero), matB_reorder,
         reinterpret_cast<__half*>(b_scale_reorder),
         reinterpret_cast<__half*>(b_zero_reorder), K, N, N_32align, stream);
-  } else if (b_scales.dtype() == at::ScalarType::BFloat16) {
+  } else if (b_scales.scalar_type() ==
+             torch::headeronly::ScalarType::BFloat16) {
     allspark::rearrange_kn_weight_as_n32k16_order_ldg16<__nv_bfloat16>(
         matB, reinterpret_cast<const __nv_bfloat16*>(b_scale),
         reinterpret_cast<const __nv_bfloat16*>(b_zero), matB_reorder,
@@ -157,7 +168,7 @@ void rearrange_kn_weight_as_n32k16_order(
   }
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
   m.impl("rearrange_kn_weight_as_n32k16_order",
-         &rearrange_kn_weight_as_n32k16_order);
+         TORCH_BOX(&rearrange_kn_weight_as_n32k16_order));
 }
diff --git a/csrc/quantization/gptq_allspark/allspark_utils.cuh b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_utils.cuh
similarity index 99%
rename from csrc/quantization/gptq_allspark/allspark_utils.cuh
rename to csrc/libtorch_stable/quantization/gptq_allspark/allspark_utils.cuh
index c7a6e96aff4b..ce96c2d11fea 100644
--- a/csrc/quantization/gptq_allspark/allspark_utils.cuh
+++ b/csrc/libtorch_stable/quantization/gptq_allspark/allspark_utils.cuh
@@ -1,13 +1,12 @@
 #pragma once
 
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <cuda_runtime.h>
-#include <cuda_fp16.h>
 #include <cuda_bf16.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+
 #include <iostream>
-#include "../marlin/marlin_dtypes.cuh"
+
+#include "quantization/marlin/marlin_dtypes.cuh"
 using marlin::MarlinScalarType2;
 
 namespace allspark {
diff --git a/csrc/quantization/hadamard/hadacore/hadamard_transform_cuda.cu b/csrc/libtorch_stable/quantization/hadamard/hadacore/hadamard_transform_cuda.cu
similarity index 93%
rename from csrc/quantization/hadamard/hadacore/hadamard_transform_cuda.cu
rename to csrc/libtorch_stable/quantization/hadamard/hadacore/hadamard_transform_cuda.cu
index aff11326d78e..665585caa46c 100644
--- a/csrc/quantization/hadamard/hadacore/hadamard_transform_cuda.cu
+++ b/csrc/libtorch_stable/quantization/hadamard/hadacore/hadamard_transform_cuda.cu
@@ -11,18 +11,16 @@ Redistribution and use in source and binary forms, with or without modification,
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ***********/
 
-#include <torch/all.h>
+#include "libtorch_stable/torch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
+
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/macros.h>
+
 #include <stdint.h>
 #include <cuda_runtime.h>
 #include <mma.h>
 #include <cuda/annotated_ptr>
-#include <c10/cuda/CUDAException.h>
-
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
-#include "core/registration.h"
-#include "dispatch_utils.h"
 
 namespace hadacore {
 
@@ -65,12 +63,12 @@ constexpr int launch_configs_big[7][3] = {
 };
 
 // a 4x2, b 2x2, c 2x2
-template <torch::ScalarType dtype>
+template <torch::headeronly::ScalarType dtype>
 __device__ __forceinline__ void mma_m16_n8_k16_b16_b16_b16_noacc(b32 a0, b32 a1, b32 a2, b32 a3, b32 b0, b32 b1, b32& c0, b32& c1){
-    static_assert(dtype == torch::ScalarType::Half || dtype == torch::ScalarType::BFloat16);
+    static_assert(dtype == torch::headeronly::ScalarType::Half || dtype == torch::headeronly::ScalarType::BFloat16);
     // d, a, b, c
     b32 zero = 0;
-    if constexpr(dtype == torch::ScalarType::Half) {
+    if constexpr(dtype == torch::headeronly::ScalarType::Half) {
         asm (
             "mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 "
             "{%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%8, %9};\n\t"
@@ -89,7 +87,7 @@ __device__ __forceinline__ void mma_m16_n8_k16_b16_b16_b16_noacc(b32 a0, b32 a1,
 }
 
 // a 4x2, b 4x2, c 4x2
-template <torch::ScalarType dtype>
+template <torch::headeronly::ScalarType dtype>
 __device__ __forceinline__ void mma_m16_n16_k16_b16_b16_b16_noacc(b32 a0, b32 a1, b32 a2, b32 a3, b32 b0, b32 b1, b32 b2, b32 b3, b32& c0, b32& c1, b32& c2, b32& c3){
     mma_m16_n8_k16_b16_b16_b16_noacc<dtype>(a0, a1, a2, a3, b0, b1, c0, c1);
     mma_m16_n8_k16_b16_b16_b16_noacc<dtype>(a0, a1, a2, a3, b2, b3, c2, c3);
@@ -108,11 +106,11 @@ __device__ __forceinline__ void matrix_transpose_m8_n8_b16_inplace(b32& a0) {
 #define n_p(i) ((val_1n[i] & 0x0000FFFF) | val_1p[i] << 16)
 #define n_n(i) ((val_1n[i] & 0x0000FFFF) | val_1n[i] << 16)
 
-template<int64_t num_chunks, int64_t warps_per_block, int64_t log_had_size, int64_t blocks_per_sm, bool enable_mask, torch::ScalarType dtype>
+template<int64_t num_chunks, int64_t warps_per_block, int64_t log_had_size, int64_t blocks_per_sm, bool enable_mask, torch::headeronly::ScalarType dtype>
 __global__ void __launch_bounds__(32 * warps_per_block, blocks_per_sm)
 // a is column major, b is row major
 hadamard_transform_kernel(b16* a, b16* out, int total_num_chunks) {
-    static_assert(dtype == torch::ScalarType::Half || dtype == torch::ScalarType::BFloat16, "Only fp16 and bf16 supported currently");
+    static_assert(dtype == torch::headeronly::ScalarType::Half || dtype == torch::headeronly::ScalarType::BFloat16, "Only fp16 and bf16 supported currently");
 
     b32 b_frag_all[num_chunks][4]; // for all chunks, holds matrix fragment (which takes 4 regs of b16x2 * 32 threads)
 
@@ -162,8 +160,8 @@ hadamard_transform_kernel(b16* a, b16* out, int total_num_chunks) {
     constexpr b16 bf16_1p[4] = {0b0011111100110101, 0b0011111100000000, 0b0011111010110101, 0b0011111010000000};
     constexpr b16 bf16_1n[4] = {0b1011111100110101, 0b1011111100000000, 0b1011111010110101, 0b1011111010000000};
 
-    #define val_type_1p(i) (((dtype) == torch::ScalarType::Half) ? (fp16_1p[i]) : (bf16_1p[i]))
-    #define val_type_1n(i) (((dtype) == torch::ScalarType::Half) ? (fp16_1n[i]) : (bf16_1n[i]))
+    #define val_type_1p(i) (((dtype) == torch::headeronly::ScalarType::Half) ? (fp16_1p[i]) : (bf16_1p[i]))
+    #define val_type_1n(i) (((dtype) == torch::headeronly::ScalarType::Half) ? (fp16_1n[i]) : (bf16_1n[i]))
     constexpr b16 val_1p[4] = {val_type_1p(0), val_type_1p(1), val_type_1p(2), val_type_1p(3)};
     constexpr b16 val_1n[4] = {val_type_1n(0), val_type_1n(1), val_type_1n(2), val_type_1n(3)};
 
@@ -684,14 +682,14 @@ constexpr int64_t ceil_div(int64_t a, int64_t b) {
     return (a + b - 1) / b;
 }
 
-template <torch::ScalarType dtype, int64_t chunks_per_warp, int64_t warps_per_block, int64_t log_had_size, int64_t blocks_per_sm, bool check_masking = false>
+template <torch::headeronly::ScalarType dtype, int64_t chunks_per_warp, int64_t warps_per_block, int64_t log_had_size, int64_t blocks_per_sm, bool check_masking = false>
 void __forceinline__ run_kernel(b16* a_mat, b16* out, int64_t num_chunks, cudaStream_t stream) {
     int64_t shared_size = chunks_per_warp * warps_per_block * 128 * 4;
     dim3 block_size = 32 * warps_per_block;
 
     #define CHECK_SHARED_LIM() {                                                                              \
         if (shared_size > 48 * 1024) {                                                                        \
-            C10_CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536)); \
+            STD_CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536)); \
         }                                                                                                     \
     }                                                                                                         \
 
@@ -714,10 +712,10 @@ void __forceinline__ run_kernel(b16* a_mat, b16* out, int64_t num_chunks, cudaSt
         kernel<<<dim3(grid_size), dim3(block_size), shared_size, stream>>>(a_mat, out, num_chunks);
     }
     
-    C10_CUDA_KERNEL_LAUNCH_CHECK();
+    STD_CUDA_KERNEL_LAUNCH_CHECK();
 }
 
-template <torch::ScalarType dtype>
+template <torch::headeronly::ScalarType dtype>
 void run_fht(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cudaStream_t stream) {
     int64_t num_chunks = numel / 256; // caller required to ensure divisible by 256
     // for size 256, use (2, 1)
@@ -764,54 +762,54 @@ void run_fht(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cu
     }
 }
 
-template void run_fht<torch::ScalarType::Half>(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cudaStream_t stream);
-template void run_fht<torch::ScalarType::BFloat16>(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cudaStream_t stream);
+template void run_fht<torch::headeronly::ScalarType::Half>(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cudaStream_t stream);
+template void run_fht<torch::headeronly::ScalarType::BFloat16>(void* a_mat_ptr, void* out_ptr, int64_t numel, int64_t had_size, cudaStream_t stream);
 
 }  // namespace hadacore
 
 constexpr bool is_power_of_two(int x) { return x && !(x & (x - 1)); }
 
-torch::Tensor hadacore_transform(torch::Tensor& x, bool inplace) {
+torch::stable::Tensor hadacore_transform(torch::stable::Tensor& x, bool inplace) {
     auto dtype = x.scalar_type();
-    TORCH_CHECK(dtype == torch::ScalarType::Half || dtype == torch::ScalarType::BFloat16, "Only fp16 and bf16 supported currently");
-    TORCH_CHECK(x.is_cuda());
-    
+    STD_TORCH_CHECK(dtype == torch::headeronly::ScalarType::Half || dtype == torch::headeronly::ScalarType::BFloat16, "Only fp16 and bf16 supported currently");
+    STD_TORCH_CHECK(x.is_cuda());
+
     const int had_size = x.size(-1);
-    TORCH_CHECK(is_power_of_two(had_size) && (had_size <= (1U << 15)),
+    STD_TORCH_CHECK(is_power_of_two(had_size) && (had_size <= (1U << 15)),
         "Only power of two Hadamard sizes up to 2^15 are supported, got ", had_size);
-    
+
     const auto res_shape = x.sizes();
-    x = x.reshape({-1, had_size});
-    
+    x = torch::stable::reshape(x, {-1, had_size});
+
     auto numel = x.numel();
     if (numel % 256 != 0) {
-        x = torch::nn::functional::pad(x, torch::nn::functional::PadFuncOptions({0, 0, 0, (256 - numel % 256) / had_size}));
+        x = torch::stable::pad(x, {0, 0, 0, (256 - numel % 256) / had_size});
     }
-    
+
     if (x.stride(-1) != 1) {
-        x = x.contiguous();
+        x = torch::stable::contiguous(x);
     }
-    torch::Tensor out = inplace ? x : torch::empty_like(x);
+    torch::stable::Tensor out = inplace ? x : torch::stable::empty_like(x);
 
-    at::cuda::CUDAGuard device_guard{(char)x.get_device()};
-    auto stream = at::cuda::getCurrentCUDAStream().stream();
+    torch::stable::accelerator::DeviceGuard device_guard(x.get_device_index());
+    auto stream = get_current_cuda_stream();
 
-    VLLM_DISPATCH_HALF_TYPES(x.scalar_type(), "hadacore_transform_runfht", [&] {
-      auto constexpr SCALAR_TYPE = c10::CppTypeToScalarType<scalar_t>::value;
+    VLLM_STABLE_DISPATCH_HALF_TYPES(x.scalar_type(), "hadacore_transform_runfht", [&] {
+      auto constexpr SCALAR_TYPE = torch::headeronly::CppTypeToScalarType<scalar_t>::value;
       hadacore::run_fht<SCALAR_TYPE>(x.data_ptr(), x.data_ptr(), x.numel(), had_size, stream);
     });
 
     if (numel % 256 != 0) {
-        out = out.narrow(0, 0, numel / had_size);
+        out = torch::stable::narrow(out, 0, 0, numel / had_size);
     }
 
     if (inplace && out.data_ptr() != x.data_ptr()) {
-        x.copy_(out.view(res_shape));
+        torch::stable::copy_(x, torch::stable::view(out, res_shape));
         return x;
     }
-    return out.reshape(res_shape);
+    return torch::stable::reshape(out, res_shape);
 }
 
-TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
-    m.impl("hadacore_transform", &hadacore_transform);
+STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, m) {
+    m.impl("hadacore_transform", TORCH_BOX(&hadacore_transform));
 }
diff --git a/csrc/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh
similarity index 84%
rename from csrc/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh
index 26de32ce2b16..ae40c0989e03 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/cutlass_gemm_caller.cuh
@@ -2,9 +2,10 @@
 
 // clang-format will break include orders
 // clang-format off
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/csrc/stable/ops.h>
 
-#include <ATen/cuda/CUDAContext.h>
+#include "libtorch_stable/torch_utils.h"
 
 #include "cutlass/cutlass.h"
 
@@ -25,14 +26,14 @@
 namespace vllm::c3x {
 
 static inline cute::Shape<int, int, int, int> get_problem_shape(
-    torch::Tensor const& a, torch::Tensor const& b) {
+    torch::stable::Tensor const& a, torch::stable::Tensor const& b) {
   int32_t m = a.size(0), n = b.size(1), k = a.size(1);
   return {m, n, k, 1};
 }
 
 template <typename GemmKernel>
 void cutlass_gemm_caller(
-    torch::Device device, cute::Shape<int, int, int, int> prob_shape,
+    torch::stable::Device device, cute::Shape<int, int, int, int> prob_shape,
     typename GemmKernel::MainloopArguments mainloop_args,
     typename GemmKernel::EpilogueArguments epilogue_args,
     typename GemmKernel::TileSchedulerArguments scheduler = {}) {
@@ -50,19 +51,20 @@ void cutlass_gemm_caller(
   CUTLASS_CHECK(gemm_op.can_implement(args));
 
   size_t workspace_size = gemm_op.get_workspace_size(args);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(device);
-  auto workspace = torch::empty(workspace_size, workspace_options);
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, device);
 
-  auto stream = at::cuda::getCurrentCUDAStream(device.index());
+  auto stream = get_current_cuda_stream(device.index());
 
   cutlass::Status status = gemm_op.run(args, workspace.data_ptr(), stream);
   CUTLASS_CHECK(status);
 }
 
 template <typename Gemm, typename... EpilogueArgs>
-void cutlass_gemm_caller(torch::Tensor& out, torch::Tensor const& a,
-                         torch::Tensor const& b,
+void cutlass_gemm_caller(torch::stable::Tensor& out,
+                         torch::stable::Tensor const& a,
+                         torch::stable::Tensor const& b,
                          EpilogueArgs&&... epilogue_params) {
   using ElementAB = typename Gemm::ElementAB;
   using ElementC = typename Gemm::ElementC;
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm.cuh
similarity index 97%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm.cuh
index 546e1eec64bb..952931103c67 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm.cuh
@@ -141,7 +141,7 @@ struct cutlass_3x_gemm_sm100 {
               sizeof(typename CollectiveEpilogue::SharedStorage))>,
           KernelSchedule>::CollectiveOp;
 
-  using GemmKernel = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
+  using GemmKernel = enable_sm100_to_sm120<cutlass::gemm::kernel::GemmUniversal<
       Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
 };
 
@@ -202,7 +202,7 @@ struct cutlass_3x_gemm_sm120 {
               sizeof(typename CollectiveEpilogue::SharedStorage))>,
           KernelSchedule>::CollectiveOp;
 
-  using GemmKernel = enable_sm120_only<cutlass::gemm::kernel::GemmUniversal<
+  using GemmKernel = enable_sm120_family<cutlass::gemm::kernel::GemmUniversal<
       Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
 };
 
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu
similarity index 51%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu
index 4cd38f4975df..bc088cf633f4 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu
@@ -4,13 +4,12 @@
 
 namespace vllm {
 
-void cutlass_scaled_mm_azp_sm90_int8(torch::Tensor& out, torch::Tensor const& a,
-                                     torch::Tensor const& b,
-                                     torch::Tensor const& a_scales,
-                                     torch::Tensor const& b_scales,
-                                     torch::Tensor const& azp_adj,
-                                     std::optional<torch::Tensor> const& azp,
-                                     std::optional<torch::Tensor> const& bias) {
+void cutlass_scaled_mm_azp_sm90_int8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias) {
   if (azp) {
     return cutlass_scaled_mm_sm90_int8_epilogue<
         c3x::ScaledEpilogueBiasAzpToken>(out, a, b, a_scales, b_scales, azp_adj,
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu
new file mode 100644
index 000000000000..f3df69850ec6
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu
@@ -0,0 +1,22 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_blockwise_sm100_fp8_dispatch.cuh"
+#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_blockwise_sm100_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    cutlass_gemm_blockwise_sm100_fp8_dispatch<cutlass::bfloat16_t>(
+        out, a, b, a_scales, b_scales);
+
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    cutlass_gemm_blockwise_sm100_fp8_dispatch<cutlass::half_t>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
similarity index 92%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
index db3b26c084ee..8f74f2991469 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "cuda_utils.h"
 #include "cutlass/cutlass.h"
 #include "cutlass/numeric_types.h"
@@ -123,17 +125,17 @@ struct cutlass_3x_gemm_fp8_blockwise {
           MainloopScheduler
       >::CollectiveOp>;
 
-  using KernelType = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
+  using KernelType = enable_sm100_to_sm120<cutlass::gemm::kernel::GemmUniversal<
       Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue>>;
 
   struct GemmKernel : public KernelType {};
 };
 
 template <typename Gemm>
-void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
-                                   torch::Tensor const& b,
-                                   torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales) {
+void cutlass_gemm_caller_blockwise(torch::stable::Tensor& out, torch::stable::Tensor const& a,
+                                   torch::stable::Tensor const& b,
+                                   torch::stable::Tensor const& a_scales,
+                                   torch::stable::Tensor const& b_scales) {
   static constexpr bool swap_ab = Gemm::swap_ab;
   using GemmKernel = typename Gemm::GemmKernel;
   using StrideA = typename Gemm::GemmKernel::StrideA;
@@ -200,11 +202,11 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
 }
 
 template <typename OutType>
-void cutlass_gemm_blockwise_sm100_fp8_dispatch(torch::Tensor& out,
-                                               torch::Tensor const& a,
-                                               torch::Tensor const& b,
-                                               torch::Tensor const& a_scales,
-                                               torch::Tensor const& b_scales) {
+void cutlass_gemm_blockwise_sm100_fp8_dispatch(torch::stable::Tensor& out,
+                                               torch::stable::Tensor const& a,
+                                               torch::stable::Tensor const& b,
+                                               torch::stable::Tensor const& a_scales,
+                                               torch::stable::Tensor const& b_scales) {
   int32_t m = a.size(0), n = b.size(1), k = a.size(1), sms;
   cudaDeviceGetAttribute(&sms, cudaDevAttrMultiProcessorCount, a.get_device());
 
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu
new file mode 100644
index 000000000000..7ceb0697df2a
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu
@@ -0,0 +1,22 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_blockwise_sm120_fp8_dispatch.cuh"
+#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_blockwise_sm120_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    cutlass_gemm_blockwise_sm120_fp8_dispatch<cutlass::bfloat16_t>(
+        out, a, b, a_scales, b_scales);
+
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    cutlass_gemm_blockwise_sm120_fp8_dispatch<cutlass::half_t>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
similarity index 61%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
index 0701cb5866cf..a9008ce44240 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "cuda_utils.h"
 #include "cutlass/cutlass.h"
 #include "cutlass/numeric_types.h"
@@ -24,8 +26,10 @@ using namespace cute;
 template <class OutType, int ScaleGranularityM,
           int ScaleGranularityN, int ScaleGranularityK,
           class MmaTileShape, class ClusterShape,
-          class EpilogueScheduler, class MainloopScheduler>
+          class EpilogueScheduler, class MainloopScheduler,
+          bool swap_ab_ = false>
 struct cutlass_3x_gemm_fp8_blockwise {
+  static constexpr bool swap_ab = swap_ab_;
   using ElementAB = cutlass::float_e4m3_t;
 
   using ElementA = ElementAB;
@@ -53,9 +57,13 @@ struct cutlass_3x_gemm_fp8_blockwise {
   using ElementCompute = float;
   using ElementBlockScale = float; 
 
-  using ScaleConfig = cutlass::detail::Sm120BlockwiseScaleConfig<
+  using ScaleConfig = conditional_t<swap_ab,
+      cutlass::detail::Sm120BlockwiseScaleConfig<
+        ScaleGranularityM, ScaleGranularityN, ScaleGranularityK,
+        cute::UMMA::Major::K, cute::UMMA::Major::MN>,
+      cutlass::detail::Sm120BlockwiseScaleConfig<
         ScaleGranularityM, ScaleGranularityN, ScaleGranularityK,
-        cute::UMMA::Major::MN, cute::UMMA::Major::K>;
+        cute::UMMA::Major::MN, cute::UMMA::Major::K>>;
 
   // layout_SFA and layout_SFB cannot be swapped since they are deduced.
   using LayoutSFA = decltype(ScaleConfig::deduce_layoutSFA());
@@ -76,17 +84,32 @@ struct cutlass_3x_gemm_fp8_blockwise {
       ElementAccumulator,
       ElementCompute,
       ElementC,
-      LayoutC,
+      conditional_t<swap_ab, LayoutC_Transpose, LayoutC>,
       AlignmentC,
       ElementD,
-      LayoutD,
+      conditional_t<swap_ab, LayoutD_Transpose, LayoutD>,
       AlignmentD,
       EpilogueScheduler,
       DefaultOperation
   >::CollectiveOp;
  
   using StageCountType = cutlass::gemm::collective::StageCountAuto; 
-  using CollectiveMainloop = 
+  using CollectiveMainloop = conditional_t<swap_ab,
+      typename cutlass::gemm::collective::CollectiveBuilder<
+          ArchTag,
+          OperatorClass,
+          ElementB,
+          cute::tuple<LayoutB_Transpose, LayoutSFA>,
+          AlignmentB,
+          ElementA,
+          cute::tuple<LayoutA_Transpose, LayoutSFB>,
+          AlignmentA,
+          ElementAccumulator,
+          MmaTileShape,
+          ClusterShape,
+          cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(sizeof(typename CollectiveEpilogue::SharedStorage))>,
+          MainloopScheduler
+      >::CollectiveOp,
       typename cutlass::gemm::collective::CollectiveBuilder<
           ArchTag,
           OperatorClass,
@@ -101,7 +124,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
           ClusterShape,
           cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(sizeof(typename CollectiveEpilogue::SharedStorage))>,
           MainloopScheduler
-      >::CollectiveOp;
+      >::CollectiveOp>;
 
   // SM12x family to support both SM120 (RTX 5090) and SM121 (DGX Spark)
   using KernelType = enable_sm120_family<cutlass::gemm::kernel::GemmUniversal<
@@ -113,7 +136,7 @@ struct cutlass_3x_gemm_fp8_blockwise {
 // Tile configurations for different M ranges
 template <typename OutType>
 struct sm120_blockwise_fp8_config_default {
-  // M > 256: use 128x128x128 tile with Cooperative (Auto) schedule
+  // use 128x128x128 tile with Cooperative (Auto) schedule
   using KernelSchedule = cutlass::gemm::collective::KernelScheduleAuto;
   using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
   using TileShape = Shape<_128, _128, _128>;
@@ -125,8 +148,8 @@ struct sm120_blockwise_fp8_config_default {
 };
 
 template <typename OutType>
-struct sm120_blockwise_fp8_config_M64 {
-  // M in [1, 256]: use 64x128x128 tile with Pingpong schedule
+struct sm120_blockwise_fp8_config_pingpong {
+  // use 64x128x128 tile with Pingpong schedule
   using KernelSchedule = cutlass::gemm::KernelTmaWarpSpecializedBlockwisePingpongSm120;
   using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
   using TileShape = Shape<_64, _128, _128>;
@@ -137,11 +160,24 @@ struct sm120_blockwise_fp8_config_M64 {
       EpilogueSchedule, KernelSchedule>;
 };
 
+template <typename OutType>
+struct sm120_blockwise_fp8_config_swapab {
+  // use 128x32x128 tile with Cooperative schedule
+  using KernelSchedule = cutlass::gemm::KernelTmaWarpSpecializedBlockwiseCooperativeSm120;
+  using EpilogueSchedule = cutlass::epilogue::collective::EpilogueScheduleAuto;
+  using TileShape = Shape<_128, _32, _128>;
+  using ClusterShape = Shape<_1, _1, _1>;
+  using Gemm = cutlass_3x_gemm_fp8_blockwise<
+      OutType, 128, 1, 128, TileShape, ClusterShape,
+      EpilogueSchedule, KernelSchedule, true>;
+};
+
 template <typename Gemm>
-void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
-                                   torch::Tensor const& b,
-                                   torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales) {
+void cutlass_gemm_caller_blockwise(torch::stable::Tensor& out, torch::stable::Tensor const& a,
+                                   torch::stable::Tensor const& b,
+                                   torch::stable::Tensor const& a_scales,
+                                   torch::stable::Tensor const& b_scales) {
+  static constexpr bool swap_ab = Gemm::swap_ab;
   using GemmKernel = typename Gemm::GemmKernel;
   using StrideA = typename Gemm::GemmKernel::StrideA;
   using StrideB = typename Gemm::GemmKernel::StrideB;
@@ -165,11 +201,13 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
   b_stride =
       cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(n, k, 1));
   c_stride =
-      cutlass::make_cute_packed_stride(StrideC{}, cute::make_shape(m, n, 1));
+      cutlass::make_cute_packed_stride(StrideC{}, swap_ab ? cute::make_shape(n, m, 1) : cute::make_shape(m, n, 1));
 
-  LayoutSFA layout_SFA = 
+  LayoutSFA layout_SFA = swap_ab ?
+      ScaleConfig::tile_atom_to_shape_SFA(make_shape(n, m, k, 1)) :
       ScaleConfig::tile_atom_to_shape_SFA(make_shape(m, n, k, 1));
-  LayoutSFB layout_SFB = 
+  LayoutSFB layout_SFB = swap_ab ?
+      ScaleConfig::tile_atom_to_shape_SFB(make_shape(n, m, k, 1)) :
       ScaleConfig::tile_atom_to_shape_SFB(make_shape(m, n, k, 1));
 
   auto a_ptr = static_cast<ElementAB const*>(a.data_ptr());
@@ -178,15 +216,24 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
   auto b_scales_ptr = static_cast<ElementBlockScale const*>(b_scales.data_ptr());
 
   typename GemmKernel::MainloopArguments mainloop_args{};
-  mainloop_args.ptr_A = a_ptr;
-  mainloop_args.dA = a_stride;
-  mainloop_args.ptr_B = b_ptr;
-  mainloop_args.dB = b_stride;
-  mainloop_args.ptr_SFA = a_scales_ptr;
   mainloop_args.layout_SFA = layout_SFA;
-  mainloop_args.ptr_SFB = b_scales_ptr;
   mainloop_args.layout_SFB = layout_SFB;
-  auto prob_shape = cute::make_shape(m, n, k, 1);
+  if (swap_ab) {
+    mainloop_args.ptr_A = b_ptr;
+    mainloop_args.dA = b_stride;
+    mainloop_args.ptr_B = a_ptr;
+    mainloop_args.dB = a_stride;
+    mainloop_args.ptr_SFA = b_scales_ptr;
+    mainloop_args.ptr_SFB = a_scales_ptr;
+  } else {
+    mainloop_args.ptr_A = a_ptr;
+    mainloop_args.dA = a_stride;
+    mainloop_args.ptr_B = b_ptr;
+    mainloop_args.dB = b_stride;
+    mainloop_args.ptr_SFA = a_scales_ptr;
+    mainloop_args.ptr_SFB = b_scales_ptr;
+  }
+  auto prob_shape = swap_ab ? cute::make_shape(n, m, k, 1) : cute::make_shape(m, n, k, 1);
 
   auto c_ptr = static_cast<ElementD*>(out.data_ptr());
   typename GemmKernel::EpilogueArguments epilogue_args{
@@ -196,21 +243,32 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
 }
 
 template <typename OutType>
-void cutlass_gemm_blockwise_sm120_fp8_dispatch(torch::Tensor& out,
-                                               torch::Tensor const& a,
-                                               torch::Tensor const& b,
-                                               torch::Tensor const& a_scales,
-                                               torch::Tensor const& b_scales) {
+void cutlass_gemm_blockwise_sm120_fp8_dispatch(torch::stable::Tensor& out,
+                                               torch::stable::Tensor const& a,
+                                               torch::stable::Tensor const& b,
+                                               torch::stable::Tensor const& a_scales,
+                                               torch::stable::Tensor const& b_scales) {
   int M = a.size(0);
-  if (M <= 256) {
-    using Gemm = typename sm120_blockwise_fp8_config_M64<OutType>::Gemm;
+  // more heuristic tuning can be done here by checking N/K dimensions as well
+  bool swap_ab = (M <= 64) || (M % 4 != 0);
+
+  if (!swap_ab) {
+    if (M <= 256) {
+      using Gemm = typename sm120_blockwise_fp8_config_pingpong<OutType>::Gemm;
+      return cutlass_gemm_caller_blockwise<Gemm>(
+          out, a, b, a_scales, b_scales);
+    }
+    // M > 256: use default 128x128x128 config with Cooperative (Auto) schedule
+    using Gemm = typename sm120_blockwise_fp8_config_default<OutType>::Gemm;
+    return cutlass_gemm_caller_blockwise<Gemm>(
+        out, a, b, a_scales, b_scales);
+  } else {
+    // Swap A/B for small M to improve performance
+    // Use TILE_N=32 as the minimum compatible tile size.
+    using Gemm = typename sm120_blockwise_fp8_config_swapab<OutType>::Gemm;
     return cutlass_gemm_caller_blockwise<Gemm>(
         out, a, b, a_scales, b_scales);
   }
-  // M > 256: use default 128x128x128 config with Cooperative (Auto) schedule
-  using Gemm = typename sm120_blockwise_fp8_config_default<OutType>::Gemm;
-  return cutlass_gemm_caller_blockwise<Gemm>(
-      out, a, b, a_scales, b_scales);
 }
 
 }  // namespace vllm
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu
new file mode 100644
index 000000000000..d3318c487675
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu
@@ -0,0 +1,23 @@
+
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_blockwise_sm90_fp8_dispatch.cuh"
+#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_blockwise_sm90_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    cutlass_gemm_blockwise_sm90_fp8_dispatch<cutlass::bfloat16_t>(
+        out, a, b, a_scales, b_scales);
+
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    cutlass_gemm_blockwise_sm90_fp8_dispatch<cutlass::half_t>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
similarity index 87%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
index c40d49966271..cf62e81fd75b 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "cutlass/cutlass.h"
 #include "cutlass/numeric_types.h"
 
@@ -101,10 +103,10 @@ struct cutlass_3x_gemm_fp8_blockwise {
 };
 
 template <typename Gemm>
-void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
-                                   torch::Tensor const& b,
-                                   torch::Tensor const& a_scales,
-                                   torch::Tensor const& b_scales) {
+void cutlass_gemm_caller_blockwise(torch::stable::Tensor& out, torch::stable::Tensor const& a,
+                                   torch::stable::Tensor const& b,
+                                   torch::stable::Tensor const& a_scales,
+                                   torch::stable::Tensor const& b_scales) {
   using GemmKernel = typename Gemm::GemmKernel;
   using StrideA = typename Gemm::GemmKernel::StrideA;
   using StrideB = typename Gemm::GemmKernel::StrideB;
@@ -120,7 +122,7 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
 
   int32_t m = a.size(0), n = b.size(1), k = a.size(1);
 
-  TORCH_CHECK(m % 4 == 0, "m must be divisible by 4");
+  STD_TORCH_CHECK(m % 4 == 0, "m must be divisible by 4");
 
   StrideA a_stride;
   StrideB b_stride;
@@ -161,11 +163,11 @@ void cutlass_gemm_caller_blockwise(torch::Tensor& out, torch::Tensor const& a,
 }
 
 template <typename OutType>
-void cutlass_gemm_blockwise_sm90_fp8_dispatch(torch::Tensor& out,
-                                              torch::Tensor const& a,
-                                              torch::Tensor const& b,
-                                              torch::Tensor const& a_scales,
-                                              torch::Tensor const& b_scales) {
+void cutlass_gemm_blockwise_sm90_fp8_dispatch(torch::stable::Tensor& out,
+                                              torch::stable::Tensor const& a,
+                                              torch::stable::Tensor const& b,
+                                              torch::stable::Tensor const& a_scales,
+                                              torch::stable::Tensor const& b_scales) {
   // TODO: better heuristics
   cutlass_gemm_caller_blockwise<cutlass_3x_gemm_fp8_blockwise<
       OutType, 1, 128, 128, Shape<_128, _128, _128>,
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp
similarity index 55%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp
index 2204a49257b0..adb3de50fc1b 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_helper.hpp
@@ -1,52 +1,57 @@
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
 #include "cuda_utils.h"
 #include "cutlass_extensions/common.hpp"
 
 template <typename Fp8Func, typename Int8Func, typename BlockwiseFunc>
-void dispatch_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
-                        torch::Tensor const& b, torch::Tensor const& a_scales,
-                        torch::Tensor const& b_scales,
-                        std::optional<torch::Tensor> const& bias,
+void dispatch_scaled_mm(torch::stable::Tensor& c,
+                        torch::stable::Tensor const& a,
+                        torch::stable::Tensor const& b,
+                        torch::stable::Tensor const& a_scales,
+                        torch::stable::Tensor const& b_scales,
+                        std::optional<torch::stable::Tensor> const& bias,
                         Fp8Func fp8_func, Int8Func int8_func,
                         BlockwiseFunc blockwise_func) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
 
   int M = a.size(0), N = b.size(1), K = a.size(1);
 
   if ((a_scales.numel() == 1 || a_scales.numel() == a.size(0)) &&
       (b_scales.numel() == 1 || b_scales.numel() == b.size(1))) {
     // Standard per-tensor/per-token/per-channel scaling
-    TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-    if (a.dtype() == torch::kFloat8_e4m3fn) {
+    STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+    if (a.scalar_type() == torch::headeronly::ScalarType::Float8_e4m3fn) {
       fp8_func(c, a, b, a_scales, b_scales, bias);
     } else {
-      TORCH_CHECK(a.dtype() == torch::kInt8);
+      STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
       if constexpr (!std::is_same_v<Int8Func, std::nullptr_t>) {
         int8_func(c, a, b, a_scales, b_scales, bias);
       } else {
         int32_t version_num = get_sm_version_num();
-        TORCH_CHECK(
+        STD_TORCH_CHECK(
             false, "Int8 not supported on SM", version_num,
             ". Use FP8 quantization instead, or run on older arch (SM < 100).");
       }
     }
   } else {
-    TORCH_CHECK(a_scales.dim() == 2, "a scale must be 2d tensor.");
-    TORCH_CHECK(b_scales.dim() == 2, "b scale must be 2d tensor.");
+    STD_TORCH_CHECK(a_scales.dim() == 2, "a scale must be 2d tensor.");
+    STD_TORCH_CHECK(b_scales.dim() == 2, "b scale must be 2d tensor.");
     int32_t version_num = get_sm_version_num();
     if (version_num >= 90) {
-      TORCH_CHECK(
+      STD_TORCH_CHECK(
           a.size(0) == a_scales.size(0) &&
               cuda_utils::ceil_div(a.size(1), int64_t(128)) == a_scales.size(1),
           "a_scale_group_shape must be [1, 128].");
-      TORCH_CHECK(
+      STD_TORCH_CHECK(
           cuda_utils::ceil_div(b.size(0), int64_t(128)) == b_scales.size(0) &&
               cuda_utils::ceil_div(b.size(1), int64_t(128)) == b_scales.size(1),
           "b_scale_group_shape must be [128, 128].");
     }
 
-    TORCH_CHECK(!bias, "Bias not yet supported blockwise scaled_mm");
+    STD_TORCH_CHECK(!bias, "Bias not yet supported blockwise scaled_mm");
     blockwise_func(c, a, b, a_scales, b_scales);
   }
 }
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp
new file mode 100644
index 000000000000..a3a3eb3e2875
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <torch/csrc/stable/tensor.h>
+
+namespace vllm {
+
+void cutlass_scaled_mm_sm90_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_sm90_int8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_azp_sm90_int8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_blockwise_sm90_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales);
+
+void cutlass_scaled_mm_sm100_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_sm120_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_blockwise_sm100_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales);
+
+void cutlass_scaled_mm_blockwise_sm120_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales);
+}  // namespace vllm
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu
new file mode 100644
index 000000000000..84040a6a2218
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu
@@ -0,0 +1,33 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_sm100_fp8_dispatch.cuh"
+#include "core/batch_invariant.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_sm100_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm100_fp8_batch_invariant_epilogue<true>(
+          out, a, b, a_scales, b_scales, *bias);
+    }
+    return cutlass_scaled_mm_sm100_fp8_epilogue<true>(out, a, b, a_scales,
+                                                      b_scales, *bias);
+  } else {
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm100_fp8_batch_invariant_epilogue<false>(
+          out, a, b, a_scales, b_scales);
+    }
+    return cutlass_scaled_mm_sm100_fp8_epilogue<false>(out, a, b, a_scales,
+                                                       b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh
similarity index 76%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh
index 311cd4bd41c5..f790b3653d57 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm.cuh"
 #include "cutlass_gemm_caller.cuh"
 #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
@@ -90,7 +92,7 @@ struct cutlass_3x_gemm_sm100_fp8 {
   // -----------------------------------------------------------
   // Kernel definition
   // -----------------------------------------------------------
-  using GemmKernel = enable_sm100f_only<cutlass::gemm::kernel::GemmUniversal<
+  using GemmKernel = enable_sm100_to_sm120<cutlass::gemm::kernel::GemmUniversal<
       Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
 };
 
@@ -192,8 +194,9 @@ struct sm100_fp8_config_M16_swap_ab {
 };
 
 template <typename Gemm, typename... EpilogueArgs>
-void cutlass_gemm_caller_sm100_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                   torch::Tensor const& b,
+void cutlass_gemm_caller_sm100_fp8(torch::stable::Tensor& out,
+                                   torch::stable::Tensor const& a,
+                                   torch::stable::Tensor const& b,
                                    EpilogueArgs&&... epilogue_params) {
   static constexpr bool swap_ab = Gemm::swap_ab;
   using ElementAB = typename Gemm::ElementAB;
@@ -237,15 +240,15 @@ void cutlass_gemm_caller_sm100_fp8(torch::Tensor& out, torch::Tensor const& a,
 
 template <typename InType, typename OutType, bool EnableBias,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm100_fp8_dispatch(torch::Tensor& out,
-                                            torch::Tensor const& a,
-                                            torch::Tensor const& b,
-                                            torch::Tensor const& a_scales,
-                                            torch::Tensor const& b_scales,
-                                            EpilogueArgs&&... args) {
+inline void cutlass_gemm_sm100_fp8_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
   using Cutlass3xGemmDefault =
       typename sm100_fp8_config_default<InType, OutType,
@@ -291,23 +294,53 @@ inline void cutlass_gemm_sm100_fp8_dispatch(torch::Tensor& out,
   }
 }
 
+template <typename InType, typename OutType, bool EnableBias,
+          typename... EpilogueArgs>
+inline void cutlass_gemm_sm100_fp8_batch_invariant_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... args) {
+  static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  using Cutlass3xGemmM64SwapAB =
+      typename sm100_fp8_config_M64_swap_ab<InType, OutType,
+                                            EnableBias>::Cutlass3xGemm;
+  using Cutlass3xGemmM64 =
+      typename sm100_fp8_config_M64<InType, OutType, EnableBias>::Cutlass3xGemm;
+
+  // keep the CUTLASS config independent of M for batch invariance
+  uint32_t const k = a.size(1);
+  if (k < 4096) {
+    return cutlass_gemm_caller_sm100_fp8<Cutlass3xGemmM64>(
+        out, a, b, a_scales, b_scales, std::forward<EpilogueArgs>(args)...);
+  }
+  return cutlass_gemm_caller_sm100_fp8<Cutlass3xGemmM64SwapAB>(
+      out, a, b, b_scales, a_scales, std::forward<EpilogueArgs>(args)...);
+}
+
 template <bool EnableBias, typename... EpilogueArgs>
-void cutlass_scaled_mm_sm100_fp8_epilogue(torch::Tensor& out,
-                                          torch::Tensor const& a,
-                                          torch::Tensor const& b,
-                                          torch::Tensor const& a_scales,
-                                          torch::Tensor const& b_scales,
+void cutlass_scaled_mm_sm100_fp8_epilogue(torch::stable::Tensor& out,
+                                          torch::stable::Tensor const& a,
+                                          torch::stable::Tensor const& b,
+                                          torch::stable::Tensor const& a_scales,
+                                          torch::stable::Tensor const& b_scales,
                                           EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
-  if (out.dtype() == torch::kBFloat16) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     return cutlass_gemm_sm100_fp8_dispatch<cutlass::float_e4m3_t,
                                            cutlass::bfloat16_t, EnableBias>(
         out, a, b, a_scales, b_scales,
         std::forward<EpilogueArgs>(epilogue_args)...);
   } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
     return cutlass_gemm_sm100_fp8_dispatch<cutlass::float_e4m3_t,
                                            cutlass::half_t, EnableBias>(
         out, a, b, a_scales, b_scales,
@@ -315,4 +348,28 @@ void cutlass_scaled_mm_sm100_fp8_epilogue(torch::Tensor& out,
   }
 }
 
+template <bool EnableBias, typename... EpilogueArgs>
+void cutlass_scaled_mm_sm100_fp8_batch_invariant_epilogue(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm100_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::bfloat16_t, EnableBias>(
+        out, a, b, a_scales, b_scales,
+        std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm100_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::half_t, EnableBias>(
+        out, a, b, a_scales, b_scales,
+        std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
 }  // namespace vllm
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu
new file mode 100644
index 000000000000..972d6c626062
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu
@@ -0,0 +1,34 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_sm120_fp8_dispatch.cuh"
+#include "core/batch_invariant.hpp"
+#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_sm120_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm120_fp8_batch_invariant_epilogue<
+          c3x::ScaledEpilogueBias>(out, a, b, a_scales, b_scales, *bias);
+    }
+    return cutlass_scaled_mm_sm120_fp8_epilogue<c3x::ScaledEpilogueBias>(
+        out, a, b, a_scales, b_scales, *bias);
+  } else {
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm120_fp8_batch_invariant_epilogue<
+          c3x::ScaledEpilogue>(out, a, b, a_scales, b_scales);
+    }
+    return cutlass_scaled_mm_sm120_fp8_epilogue<c3x::ScaledEpilogue>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh
similarity index 71%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh
index 37846a87bbfb..7a7229c95bae 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm.cuh"
 #include "cutlass_gemm_caller.cuh"
 
@@ -70,7 +72,7 @@ struct cutlass_3x_gemm_sm120_custom {
               sizeof(typename CollectiveEpilogue::SharedStorage))>,
           KernelSchedule, void>::CollectiveOp;
 
-  using GemmKernel = enable_sm120_only<cutlass::gemm::kernel::GemmUniversal<
+  using GemmKernel = enable_sm120_family<cutlass::gemm::kernel::GemmUniversal<
       Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>>;
 };
 
@@ -138,13 +140,15 @@ struct sm120_fp8_config_M16 {
 template <typename InType, typename OutType,
           template <typename, typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm120_fp8_dispatch(torch::Tensor& out,
-                                            torch::Tensor const& a,
-                                            torch::Tensor const& b,
+inline void cutlass_gemm_sm120_fp8_dispatch(torch::stable::Tensor& out,
+                                            torch::stable::Tensor const& a,
+                                            torch::stable::Tensor const& b,
                                             EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
   int M = a.size(0);
 
@@ -175,25 +179,69 @@ inline void cutlass_gemm_sm120_fp8_dispatch(torch::Tensor& out,
       out, a, b, std::forward<EpilogueArgs>(args)...);
 }
 
+template <typename InType, typename OutType,
+          template <typename, typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+inline void cutlass_gemm_sm120_fp8_batch_invariant_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, EpilogueArgs&&... args) {
+  static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  using Cutlass3xGemmM64 =
+      typename sm120_fp8_config_M64<InType, OutType, Epilogue>::Cutlass3xGemm;
+
+  // keep the CUTLASS config independent of M for batch invariance
+  return cutlass_gemm_caller<Cutlass3xGemmM64>(
+      out, a, b, std::forward<EpilogueArgs>(args)...);
+}
+
 template <template <typename, typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-void cutlass_scaled_mm_sm120_fp8_epilogue(torch::Tensor& out,
-                                          torch::Tensor const& a,
-                                          torch::Tensor const& b,
+void cutlass_scaled_mm_sm120_fp8_epilogue(torch::stable::Tensor& out,
+                                          torch::stable::Tensor const& a,
+                                          torch::stable::Tensor const& b,
                                           EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
-  if (out.dtype() == torch::kBFloat16) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     return cutlass_gemm_sm120_fp8_dispatch<cutlass::float_e4m3_t,
                                            cutlass::bfloat16_t, Epilogue>(
         out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
   } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
     return cutlass_gemm_sm120_fp8_dispatch<cutlass::float_e4m3_t,
                                            cutlass::half_t, Epilogue>(
         out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
   }
 }
 
+template <template <typename, typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+void cutlass_scaled_mm_sm120_fp8_batch_invariant_epilogue(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm120_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::bfloat16_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm120_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::half_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
 }  // namespace vllm
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu
new file mode 100644
index 000000000000..e86c9bd48d3f
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu
@@ -0,0 +1,33 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_sm90_fp8_dispatch.cuh"
+#include "core/batch_invariant.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_sm90_fp8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm90_fp8_batch_invariant_epilogue<true>(
+          out, a, b, a_scales, b_scales, *bias);
+    }
+    return cutlass_scaled_mm_sm90_fp8_epilogue<true>(out, a, b, a_scales,
+                                                     b_scales, *bias);
+  } else {
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm90_fp8_batch_invariant_epilogue<false>(
+          out, a, b, a_scales, b_scales);
+    }
+    return cutlass_scaled_mm_sm90_fp8_epilogue<false>(out, a, b, a_scales,
+                                                      b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh
similarity index 79%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh
index b8433214be1b..f78b8daea510 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm.cuh"
 #include "cutlass_gemm_caller.cuh"
 #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
@@ -235,8 +237,9 @@ struct sm90_fp8_config_M16_N8192 {
 };
 
 template <typename Gemm, typename... EpilogueArgs>
-void cutlass_gemm_caller_sm90_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                  torch::Tensor const& b,
+void cutlass_gemm_caller_sm90_fp8(torch::stable::Tensor& out,
+                                  torch::stable::Tensor const& a,
+                                  torch::stable::Tensor const& b,
                                   EpilogueArgs&&... epilogue_params) {
   static constexpr bool swap_ab = Gemm::swap_ab;
   using ElementAB = typename Gemm::ElementAB;
@@ -280,15 +283,15 @@ void cutlass_gemm_caller_sm90_fp8(torch::Tensor& out, torch::Tensor const& a,
 
 template <typename InType, typename OutType, bool EnableBias,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm90_fp8_dispatch(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
-                                           torch::Tensor const& a_scales,
-                                           torch::Tensor const& b_scales,
-                                           EpilogueArgs&&... args) {
+inline void cutlass_gemm_sm90_fp8_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
   using Cutlass3xGemmDefault =
       typename sm90_fp8_config_default<InType, OutType,
@@ -346,23 +349,54 @@ inline void cutlass_gemm_sm90_fp8_dispatch(torch::Tensor& out,
   }
 }
 
+template <typename InType, typename OutType, bool EnableBias,
+          typename... EpilogueArgs>
+inline void cutlass_gemm_sm90_fp8_batch_invariant_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... args) {
+  static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  using Cutlass3xGemmM64_N1280 =
+      typename sm90_fp8_config_M64_N1280<InType, OutType,
+                                         EnableBias>::Cutlass3xGemm;
+  using Cutlass3xGemmM64_N8192 =
+      typename sm90_fp8_config_M64_N8192<InType, OutType,
+                                         EnableBias>::Cutlass3xGemm;
+
+  // keep the CUTLASS config independent of M for batch invariance
+  uint32_t const n = b.size(1);
+  if (n <= 1280) {
+    return cutlass_gemm_caller_sm90_fp8<Cutlass3xGemmM64_N1280>(
+        out, a, b, b_scales, a_scales, std::forward<EpilogueArgs>(args)...);
+  }
+  return cutlass_gemm_caller_sm90_fp8<Cutlass3xGemmM64_N8192>(
+      out, a, b, b_scales, a_scales, std::forward<EpilogueArgs>(args)...);
+}
+
 template <bool EnableBias, typename... EpilogueArgs>
-void cutlass_scaled_mm_sm90_fp8_epilogue(torch::Tensor& out,
-                                         torch::Tensor const& a,
-                                         torch::Tensor const& b,
-                                         torch::Tensor const& a_scales,
-                                         torch::Tensor const& b_scales,
+void cutlass_scaled_mm_sm90_fp8_epilogue(torch::stable::Tensor& out,
+                                         torch::stable::Tensor const& a,
+                                         torch::stable::Tensor const& b,
+                                         torch::stable::Tensor const& a_scales,
+                                         torch::stable::Tensor const& b_scales,
                                          EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
-  if (out.dtype() == torch::kBFloat16) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     return cutlass_gemm_sm90_fp8_dispatch<cutlass::float_e4m3_t,
                                           cutlass::bfloat16_t, EnableBias>(
         out, a, b, a_scales, b_scales,
         std::forward<EpilogueArgs>(epilogue_args)...);
   } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
     return cutlass_gemm_sm90_fp8_dispatch<cutlass::float_e4m3_t,
                                           cutlass::half_t, EnableBias>(
         out, a, b, a_scales, b_scales,
@@ -370,4 +404,28 @@ void cutlass_scaled_mm_sm90_fp8_epilogue(torch::Tensor& out,
   }
 }
 
+template <bool EnableBias, typename... EpilogueArgs>
+void cutlass_scaled_mm_sm90_fp8_batch_invariant_epilogue(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm90_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::bfloat16_t, EnableBias>(
+        out, a, b, a_scales, b_scales,
+        std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm90_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::half_t, EnableBias>(
+        out, a, b, a_scales, b_scales,
+        std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
 }  // namespace vllm
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu
new file mode 100644
index 000000000000..717a2a588307
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu
@@ -0,0 +1,25 @@
+#include "scaled_mm_kernels.hpp"
+#include "scaled_mm_sm90_int8_dispatch.cuh"
+#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
+
+namespace vllm {
+
+void cutlass_scaled_mm_sm90_int8(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    return cutlass_scaled_mm_sm90_int8_epilogue<c3x::ScaledEpilogueBias>(
+        out, a, b, a_scales, b_scales, *bias);
+  } else {
+    return cutlass_scaled_mm_sm90_int8_epilogue<c3x::ScaledEpilogue>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh
similarity index 86%
rename from csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh
index c4fa18101956..a2ec816c9c93 100644
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm.cuh"
 #include "cutlass_gemm_caller.cuh"
 
@@ -87,13 +89,13 @@ struct sm90_int8_config_M32_NSmall {
 template <typename InType, typename OutType,
           template <typename, typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm90_int8_dispatch(torch::Tensor& out,
-                                            torch::Tensor const& a,
-                                            torch::Tensor const& b,
+inline void cutlass_gemm_sm90_int8_dispatch(torch::stable::Tensor& out,
+                                            torch::stable::Tensor const& a,
+                                            torch::stable::Tensor const& b,
                                             EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, int8_t>());
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
 
   using Cutlass3xGemmDefault =
       typename sm90_int8_config_default<InType, OutType,
@@ -142,19 +144,19 @@ inline void cutlass_gemm_sm90_int8_dispatch(torch::Tensor& out,
 
 template <template <typename, typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-void cutlass_scaled_mm_sm90_int8_epilogue(torch::Tensor& out,
-                                          torch::Tensor const& a,
-                                          torch::Tensor const& b,
+void cutlass_scaled_mm_sm90_int8_epilogue(torch::stable::Tensor& out,
+                                          torch::stable::Tensor const& a,
+                                          torch::stable::Tensor const& b,
                                           EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
 
-  if (out.dtype() == torch::kBFloat16) {
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     return cutlass_gemm_sm90_int8_dispatch<int8_t, cutlass::bfloat16_t,
                                            Epilogue>(
         out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
   } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
     return cutlass_gemm_sm90_int8_dispatch<int8_t, cutlass::half_t, Epilogue>(
         out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
   }
diff --git a/csrc/quantization/w8a8/cutlass/moe/get_group_starts.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/get_group_starts.cuh
similarity index 63%
rename from csrc/quantization/w8a8/cutlass/moe/get_group_starts.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/moe/get_group_starts.cuh
index 15bb2c300543..e073b4e64adb 100644
--- a/csrc/quantization/w8a8/cutlass/moe/get_group_starts.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/get_group_starts.cuh
@@ -1,10 +1,10 @@
 #pragma once
 
 #include <cuda.h>
-#include <torch/all.h>
-#include <c10/cuda/CUDAStream.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
+#include "libtorch_stable/torch_utils.h"
 
-#include "core/scalar_type.hpp"
 #include "cutlass/bfloat16.h"
 #include "cutlass/float8.h"
 
@@ -31,7 +31,7 @@ __global__ void get_group_gemm_starts(
 }
 
 #define __CALL_GET_STARTS_KERNEL(TENSOR_C_TYPE, C_TYPE)                    \
-  else if (out_tensors.dtype() == TENSOR_C_TYPE) {                         \
+  else if (out_tensors.scalar_type() == TENSOR_C_TYPE) {                   \
     get_group_gemm_starts<cutlass::float_e4m3_t, C_TYPE, float>            \
         <<<1, num_experts, 0, stream>>>(                                   \
             static_cast<int64_t*>(expert_offsets.data_ptr()),              \
@@ -51,32 +51,39 @@ __global__ void get_group_gemm_starts(
 namespace {
 
 void run_get_group_gemm_starts(
-    torch::Tensor const& expert_offsets, torch::Tensor& a_ptrs,
-    torch::Tensor& b_ptrs, torch::Tensor& out_ptrs,
-    torch::Tensor& a_scales_ptrs, torch::Tensor& b_scales_ptrs,
-    torch::Tensor const& a_tensors, torch::Tensor const& b_tensors,
-    torch::Tensor& out_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales) {
-  TORCH_CHECK(a_tensors.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b_tensors.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
+    torch::stable::Tensor const& expert_offsets, torch::stable::Tensor& a_ptrs,
+    torch::stable::Tensor& b_ptrs, torch::stable::Tensor& out_ptrs,
+    torch::stable::Tensor& a_scales_ptrs, torch::stable::Tensor& b_scales_ptrs,
+    torch::stable::Tensor const& a_tensors,
+    torch::stable::Tensor const& b_tensors, torch::stable::Tensor& out_tensors,
+    torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales) {
+  STD_TORCH_CHECK(a_tensors.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b_tensors.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
   // expect int64_t to avoid overflow during offset calculations
-  TORCH_CHECK(expert_offsets.dtype() == torch::kInt64);
+  STD_TORCH_CHECK(expert_offsets.scalar_type() ==
+                  torch::headeronly::ScalarType::Long);
 
   int num_experts = static_cast<int>(expert_offsets.size(0));
   bool per_act_token = a_scales.numel() != 1;
   bool per_out_ch = b_scales.numel() != num_experts;
 
-  auto stream = at::cuda::getCurrentCUDAStream(a_tensors.device().index());
+  auto stream = get_current_cuda_stream(a_tensors.get_device_index());
 
   if (false) {
   }
-  __CALL_GET_STARTS_KERNEL(torch::kBFloat16, cutlass::bfloat16_t)
-  __CALL_GET_STARTS_KERNEL(torch::kFloat16, half)
+  __CALL_GET_STARTS_KERNEL(torch::headeronly::ScalarType::BFloat16,
+                           cutlass::bfloat16_t)
+  __CALL_GET_STARTS_KERNEL(torch::headeronly::ScalarType::Half, half)
   else {
-    TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
+    STD_TORCH_CHECK(false, "Invalid output type (must be float16 or bfloat16)");
   }
 }
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh
similarity index 78%
rename from csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh
index 659941de182e..49df3fa4e7f2 100644
--- a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x.cuh
@@ -6,6 +6,7 @@
 #include "cutlass/epilogue/collective/collective_builder.hpp"
 #include "cutlass/gemm/device/gemm_universal_adapter.h"
 
+#include <torch/csrc/stable/ops.h>
 #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
 #include "cutlass_extensions/common.hpp"
 #include "get_group_starts.cuh"
@@ -84,13 +85,17 @@ struct cutlass_3x_group_gemm {
 };
 
 template <typename Gemm>
-void cutlass_group_gemm_caller(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
+void cutlass_group_gemm_caller(torch::stable::Tensor& out_tensors,
+                               torch::stable::Tensor const& a_tensors,
+                               torch::stable::Tensor const& b_tensors,
+                               torch::stable::Tensor const& a_scales,
+                               torch::stable::Tensor const& b_scales,
+                               torch::stable::Tensor const& expert_offsets,
+                               torch::stable::Tensor const& problem_sizes,
+                               torch::stable::Tensor const& a_strides,
+                               torch::stable::Tensor const& b_strides,
+                               torch::stable::Tensor const& c_strides,
+                               bool per_act_token, bool per_out_ch) {
   static constexpr bool swap_ab = Gemm::swap_ab;
 
   using ElementAB = typename Gemm::ElementAB;
@@ -98,16 +103,20 @@ void cutlass_group_gemm_caller(
 
   int num_experts = static_cast<int>(expert_offsets.size(0));
 
-  auto stream = at::cuda::getCurrentCUDAStream(a_tensors.device().index());
+  auto stream = get_current_cuda_stream(a_tensors.get_device_index());
 
-  auto options_int =
-      torch::TensorOptions().dtype(torch::kInt64).device(a_tensors.device());
+  auto device = a_tensors.device();
 
-  torch::Tensor a_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor out_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor a_scales_ptrs = torch::empty(num_experts, options_int);
-  torch::Tensor b_scales_ptrs = torch::empty(num_experts, options_int);
+  torch::stable::Tensor a_ptrs = torch::stable::empty(
+      {num_experts}, torch::headeronly::ScalarType::Long, std::nullopt, device);
+  torch::stable::Tensor b_ptrs = torch::stable::empty(
+      {num_experts}, torch::headeronly::ScalarType::Long, std::nullopt, device);
+  torch::stable::Tensor out_ptrs = torch::stable::empty(
+      {num_experts}, torch::headeronly::ScalarType::Long, std::nullopt, device);
+  torch::stable::Tensor a_scales_ptrs = torch::stable::empty(
+      {num_experts}, torch::headeronly::ScalarType::Long, std::nullopt, device);
+  torch::stable::Tensor b_scales_ptrs = torch::stable::empty(
+      {num_experts}, torch::headeronly::ScalarType::Long, std::nullopt, device);
 
   run_get_group_gemm_starts(expert_offsets, a_ptrs, b_ptrs, out_ptrs,
                             a_scales_ptrs, b_scales_ptrs, a_tensors, b_tensors,
@@ -156,7 +165,7 @@ void cutlass_group_gemm_caller(
       static_cast<ElementD**>(out_ptrs.data_ptr()),
       static_cast<StrideC*>(c_strides.data_ptr())};
 
-  int device_id = a_tensors.device().index();
+  int device_id = a_tensors.get_device_index();
   static const cutlass::KernelHardwareInfo hw_info{
       device_id, cutlass::KernelHardwareInfo::query_device_multiprocessor_count(
                      device_id)};
@@ -170,9 +179,9 @@ void cutlass_group_gemm_caller(
   CUTLASS_CHECK(gemm_op.can_implement(args));
 
   size_t workspace_size = gemm_op.get_workspace_size(args);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(a_tensors.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, device);
 
   cutlass::Status status = gemm_op.run(args, workspace.data_ptr(), stream);
   CUTLASS_CHECK(status);
diff --git a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu
similarity index 59%
rename from csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu
index 641e5997f0fd..1f88bf140da5 100644
--- a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu
@@ -1,7 +1,8 @@
 #include <cudaTypedefs.h>
 
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
+#include "libtorch_stable/torch_utils.h"
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
 
 #include "cutlass/cutlass.h"
 #include "grouped_mm_c3x.cuh"
@@ -62,21 +63,27 @@ struct sm100_fp8_config_N8192 {
 };
 
 template <typename InType, typename OutType>
-void run_cutlass_moe_mm_sm100(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
-  TORCH_CHECK(a_tensors.size(0) > 0, "No input A tensors provided.");
-  TORCH_CHECK(b_tensors.size(0) > 0, "No input B tensors provided.");
-  TORCH_CHECK(out_tensors.size(0) > 0, "No output tensors provided.");
-
-  TORCH_CHECK(a_tensors.dtype() == torch::kFloat8_e4m3fn,
-              "A tensors must be of type float8_e4m3fn.");
-  TORCH_CHECK(b_tensors.dtype() == torch::kFloat8_e4m3fn,
-              "B tensors must be of type float8_e4m3fn.");
+void run_cutlass_moe_mm_sm100(torch::stable::Tensor& out_tensors,
+                              torch::stable::Tensor const& a_tensors,
+                              torch::stable::Tensor const& b_tensors,
+                              torch::stable::Tensor const& a_scales,
+                              torch::stable::Tensor const& b_scales,
+                              torch::stable::Tensor const& expert_offsets,
+                              torch::stable::Tensor const& problem_sizes,
+                              torch::stable::Tensor const& a_strides,
+                              torch::stable::Tensor const& b_strides,
+                              torch::stable::Tensor const& c_strides,
+                              bool per_act_token, bool per_out_ch) {
+  STD_TORCH_CHECK(a_tensors.size(0) > 0, "No input A tensors provided.");
+  STD_TORCH_CHECK(b_tensors.size(0) > 0, "No input B tensors provided.");
+  STD_TORCH_CHECK(out_tensors.size(0) > 0, "No output tensors provided.");
+
+  STD_TORCH_CHECK(
+      a_tensors.scalar_type() == torch::headeronly::ScalarType::Float8_e4m3fn,
+      "A tensors must be of type float8_e4m3fn.");
+  STD_TORCH_CHECK(
+      b_tensors.scalar_type() == torch::headeronly::ScalarType::Float8_e4m3fn,
+      "B tensors must be of type float8_e4m3fn.");
 
   using Cutlass3xGemmDefault = typename sm100_fp8_config_default<
       InType, OutType, vllm::c3x::ScaledEpilogueArray>::Cutlass3xGemm;
@@ -107,14 +114,18 @@ void run_cutlass_moe_mm_sm100(
 }
 }  // namespace
 
-void dispatch_moe_mm_sm100(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
-  if (out_tensors.dtype() == torch::kBFloat16) {
+void dispatch_moe_mm_sm100(torch::stable::Tensor& out_tensors,
+                           torch::stable::Tensor const& a_tensors,
+                           torch::stable::Tensor const& b_tensors,
+                           torch::stable::Tensor const& a_scales,
+                           torch::stable::Tensor const& b_scales,
+                           torch::stable::Tensor const& expert_offsets,
+                           torch::stable::Tensor const& problem_sizes,
+                           torch::stable::Tensor const& a_strides,
+                           torch::stable::Tensor const& b_strides,
+                           torch::stable::Tensor const& c_strides,
+                           bool per_act_token, bool per_out_ch) {
+  if (out_tensors.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     run_cutlass_moe_mm_sm100<cutlass::float_e4m3_t, cutlass::bfloat16_t>(
         out_tensors, a_tensors, b_tensors, a_scales, b_scales, expert_offsets,
         problem_sizes, a_strides, b_strides, c_strides, per_act_token,
@@ -127,13 +138,17 @@ void dispatch_moe_mm_sm100(
   }
 }
 
-void cutlass_moe_mm_sm100(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
+void cutlass_moe_mm_sm100(torch::stable::Tensor& out_tensors,
+                          torch::stable::Tensor const& a_tensors,
+                          torch::stable::Tensor const& b_tensors,
+                          torch::stable::Tensor const& a_scales,
+                          torch::stable::Tensor const& b_scales,
+                          torch::stable::Tensor const& expert_offsets,
+                          torch::stable::Tensor const& problem_sizes,
+                          torch::stable::Tensor const& a_strides,
+                          torch::stable::Tensor const& b_strides,
+                          torch::stable::Tensor const& c_strides,
+                          bool per_act_token, bool per_out_ch) {
   dispatch_moe_mm_sm100(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
                         expert_offsets, problem_sizes, a_strides, b_strides,
                         c_strides, per_act_token, per_out_ch);
diff --git a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu
similarity index 70%
rename from csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu
index 8f21623b52fa..d494bfb9d08c 100644
--- a/csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu
@@ -1,7 +1,8 @@
 #include <cudaTypedefs.h>
 
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
+#include "libtorch_stable/torch_utils.h"
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
 
 #include "cutlass/cutlass.h"
 #include "grouped_mm_c3x.cuh"
@@ -103,21 +104,27 @@ struct sm90_fp8_config_N8192 {
 };
 
 template <typename InType, typename OutType>
-void run_cutlass_moe_mm_sm90(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
-  TORCH_CHECK(a_tensors.size(0) > 0, "No input A tensors provided.");
-  TORCH_CHECK(b_tensors.size(0) > 0, "No input B tensors provided.");
-  TORCH_CHECK(out_tensors.size(0) > 0, "No output tensors provided.");
-
-  TORCH_CHECK(a_tensors.dtype() == torch::kFloat8_e4m3fn,
-              "A tensors must be of type float8_e4m3fn.");
-  TORCH_CHECK(b_tensors.dtype() == torch::kFloat8_e4m3fn,
-              "B tensors must be of type float8_e4m3fn.");
+void run_cutlass_moe_mm_sm90(torch::stable::Tensor& out_tensors,
+                             torch::stable::Tensor const& a_tensors,
+                             torch::stable::Tensor const& b_tensors,
+                             torch::stable::Tensor const& a_scales,
+                             torch::stable::Tensor const& b_scales,
+                             torch::stable::Tensor const& expert_offsets,
+                             torch::stable::Tensor const& problem_sizes,
+                             torch::stable::Tensor const& a_strides,
+                             torch::stable::Tensor const& b_strides,
+                             torch::stable::Tensor const& c_strides,
+                             bool per_act_token, bool per_out_ch) {
+  STD_TORCH_CHECK(a_tensors.size(0) > 0, "No input A tensors provided.");
+  STD_TORCH_CHECK(b_tensors.size(0) > 0, "No input B tensors provided.");
+  STD_TORCH_CHECK(out_tensors.size(0) > 0, "No output tensors provided.");
+
+  STD_TORCH_CHECK(
+      a_tensors.scalar_type() == torch::headeronly::ScalarType::Float8_e4m3fn,
+      "A tensors must be of type float8_e4m3fn.");
+  STD_TORCH_CHECK(
+      b_tensors.scalar_type() == torch::headeronly::ScalarType::Float8_e4m3fn,
+      "B tensors must be of type float8_e4m3fn.");
 
   using Cutlass3xGemmN8192 = typename sm90_fp8_config_N8192<
       InType, OutType, vllm::c3x::ScaledEpilogueArray>::Cutlass3xGemm;
@@ -163,14 +170,18 @@ void run_cutlass_moe_mm_sm90(
   }
 }
 
-void dispatch_moe_mm_sm90(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
-  if (out_tensors.dtype() == torch::kBFloat16) {
+void dispatch_moe_mm_sm90(torch::stable::Tensor& out_tensors,
+                          torch::stable::Tensor const& a_tensors,
+                          torch::stable::Tensor const& b_tensors,
+                          torch::stable::Tensor const& a_scales,
+                          torch::stable::Tensor const& b_scales,
+                          torch::stable::Tensor const& expert_offsets,
+                          torch::stable::Tensor const& problem_sizes,
+                          torch::stable::Tensor const& a_strides,
+                          torch::stable::Tensor const& b_strides,
+                          torch::stable::Tensor const& c_strides,
+                          bool per_act_token, bool per_out_ch) {
+  if (out_tensors.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
     run_cutlass_moe_mm_sm90<cutlass::float_e4m3_t, cutlass::bfloat16_t>(
         out_tensors, a_tensors, b_tensors, a_scales, b_scales, expert_offsets,
         problem_sizes, a_strides, b_strides, c_strides, per_act_token,
@@ -185,13 +196,17 @@ void dispatch_moe_mm_sm90(
 
 }  // namespace
 
-void cutlass_moe_mm_sm90(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
+void cutlass_moe_mm_sm90(torch::stable::Tensor& out_tensors,
+                         torch::stable::Tensor const& a_tensors,
+                         torch::stable::Tensor const& b_tensors,
+                         torch::stable::Tensor const& a_scales,
+                         torch::stable::Tensor const& b_scales,
+                         torch::stable::Tensor const& expert_offsets,
+                         torch::stable::Tensor const& problem_sizes,
+                         torch::stable::Tensor const& a_strides,
+                         torch::stable::Tensor const& b_strides,
+                         torch::stable::Tensor const& c_strides,
+                         bool per_act_token, bool per_out_ch) {
   dispatch_moe_mm_sm90(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
                        expert_offsets, problem_sizes, a_strides, b_strides,
                        c_strides, per_act_token, per_out_ch);
diff --git a/csrc/quantization/w8a8/cutlass/moe/moe_data.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/moe_data.cu
similarity index 70%
rename from csrc/quantization/w8a8/cutlass/moe/moe_data.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/moe/moe_data.cu
index 268c4e10d24e..2632989cc69d 100644
--- a/csrc/quantization/w8a8/cutlass/moe/moe_data.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/moe/moe_data.cu
@@ -1,9 +1,11 @@
 #include <cudaTypedefs.h>
 
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
+#include "libtorch_stable/torch_utils.h"
+#include <torch/csrc/stable/tensor.h>
+#include <torch/csrc/stable/ops.h>
+#include <torch/headeronly/core/ScalarType.h>
 
-#include "dispatch_utils.h"
+#include "libtorch_stable/dispatch_utils.h"
 
 #include <iostream>
 
@@ -110,19 +112,22 @@ __global__ void compute_arg_sorts(const int32_t* __restrict__ topk_ids,
 }
 
 namespace {
-inline void launch_compute_problem_sizes(
-    const torch::Tensor& topk_ids, torch::Tensor& problem_sizes1,
-    torch::Tensor& problem_sizes2, torch::Tensor& atomic_buffer,
-    int64_t num_experts, int64_t n, int64_t k, cudaStream_t stream,
-    const bool swap_ab, const bool is_gated) {
+inline void launch_compute_problem_sizes(const torch::stable::Tensor& topk_ids,
+                                         torch::stable::Tensor& problem_sizes1,
+                                         torch::stable::Tensor& problem_sizes2,
+                                         torch::stable::Tensor& atomic_buffer,
+                                         int64_t num_experts, int64_t n,
+                                         int64_t k, cudaStream_t stream,
+                                         const bool swap_ab,
+                                         const bool is_gated) {
   int num_threads = min(THREADS_PER_EXPERT, topk_ids.numel());
 
-  auto const* topk_ptr = topk_ids.data_ptr<int32_t>();
-  auto* ps1_ptr = problem_sizes1.data_ptr<int32_t>();
-  auto* ps2_ptr = problem_sizes2.data_ptr<int32_t>();
-  auto* atomic_ptr = atomic_buffer.data_ptr<int32_t>();
+  auto const* topk_ptr = topk_ids.const_data_ptr<int32_t>();
+  auto* ps1_ptr = problem_sizes1.mutable_data_ptr<int32_t>();
+  auto* ps2_ptr = problem_sizes2.mutable_data_ptr<int32_t>();
+  auto* atomic_ptr = atomic_buffer.mutable_data_ptr<int32_t>();
 
-  VLLM_DISPATCH_BOOL(swap_ab, SwapAB, [&] {
+  VLLM_STABLE_DISPATCH_BOOL(swap_ab, SwapAB, [&] {
     compute_problem_sizes<SwapAB><<<num_experts, num_threads, 0, stream>>>(
         topk_ptr, ps1_ptr, ps2_ptr, atomic_ptr,
         static_cast<int>(topk_ids.numel()), static_cast<int>(n),
@@ -171,46 +176,53 @@ __global__ void compute_problem_sizes_from_expert_offsets(
 }
 
 void get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
-    const torch::Tensor& expert_first_token_offset,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    const int64_t n, const int64_t k, const bool swap_ab) {
-  TORCH_CHECK(expert_first_token_offset.is_cuda(),
-              "expert_first_token_offset must be a CUDA tensor");
-  TORCH_CHECK(expert_first_token_offset.dtype() == torch::kInt64,
-              "expert_first_token_offset must be int64");
-
-  TORCH_CHECK(problem_sizes1.is_cuda() && problem_sizes2.is_cuda(),
-              "problem_sizes must be CUDA tensors");
-  TORCH_CHECK(problem_sizes1.dtype() == torch::kInt32 &&
-                  problem_sizes2.dtype() == torch::kInt32,
-              "problem_sizes must be int32");
-  TORCH_CHECK(problem_sizes1.is_contiguous() && problem_sizes2.is_contiguous(),
-              "problem_sizes must be contiguous");
-  TORCH_CHECK(problem_sizes1.dim() == 2 && problem_sizes2.dim() == 2,
-              "problem_sizes must be 2D tensors");
-  TORCH_CHECK(problem_sizes1.size(1) == 3 && problem_sizes2.size(1) == 3,
-              "problem_sizes second dim must be 3");
-  TORCH_CHECK(problem_sizes1.sizes() == problem_sizes2.sizes(),
-              "problem_sizes1 and problem_sizes2 must have same shape");
+    const torch::stable::Tensor& expert_first_token_offset,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2, const int64_t n, const int64_t k,
+    const bool swap_ab) {
+  STD_TORCH_CHECK(expert_first_token_offset.is_cuda(),
+                  "expert_first_token_offset must be a CUDA tensor");
+  STD_TORCH_CHECK(expert_first_token_offset.scalar_type() ==
+                      torch::headeronly::ScalarType::Long,
+                  "expert_first_token_offset must be int64");
+
+  STD_TORCH_CHECK(problem_sizes1.is_cuda() && problem_sizes2.is_cuda(),
+                  "problem_sizes must be CUDA tensors");
+  STD_TORCH_CHECK(
+      problem_sizes1.scalar_type() == torch::headeronly::ScalarType::Int &&
+          problem_sizes2.scalar_type() == torch::headeronly::ScalarType::Int,
+      "problem_sizes must be int32");
+  STD_TORCH_CHECK(
+      problem_sizes1.is_contiguous() && problem_sizes2.is_contiguous(),
+      "problem_sizes must be contiguous");
+  STD_TORCH_CHECK(problem_sizes1.dim() == 2 && problem_sizes2.dim() == 2,
+                  "problem_sizes must be 2D tensors");
+  STD_TORCH_CHECK(problem_sizes1.size(1) == 3 && problem_sizes2.size(1) == 3,
+                  "problem_sizes second dim must be 3");
+  STD_TORCH_CHECK(problem_sizes1.size(0) == problem_sizes2.size(0) &&
+                      problem_sizes1.size(1) == problem_sizes2.size(1),
+                  "problem_sizes1 and problem_sizes2 must have same shape");
 
   int64_t const num_experts64 = problem_sizes1.size(0);
-  TORCH_CHECK(expert_first_token_offset.numel() == num_experts64 + 1,
-              "expert_first_token_offset must have num_experts + 1 elements");
-  TORCH_CHECK(num_experts64 <= INT32_MAX, "num_experts must fit in int32");
-  TORCH_CHECK(n <= INT32_MAX && k <= INT32_MAX, "n and k must fit in int32");
+  STD_TORCH_CHECK(
+      expert_first_token_offset.numel() == num_experts64 + 1,
+      "expert_first_token_offset must have num_experts + 1 elements");
+  STD_TORCH_CHECK(num_experts64 <= INT32_MAX, "num_experts must fit in int32");
+  STD_TORCH_CHECK(n <= INT32_MAX && k <= INT32_MAX,
+                  "n and k must fit in int32");
 
   int const num_experts = static_cast<int>(num_experts64);
-  auto stream = at::cuda::getCurrentCUDAStream(
-      expert_first_token_offset.device().index());
+  auto stream =
+      get_current_cuda_stream(expert_first_token_offset.get_device_index());
 
   int const threads = (num_experts < 256) ? num_experts : 256;
   int const blocks = (num_experts + threads - 1) / threads;
 
-  auto const* offsets_ptr = expert_first_token_offset.data_ptr<int64_t>();
-  auto* ps1_ptr = problem_sizes1.data_ptr<int32_t>();
-  auto* ps2_ptr = problem_sizes2.data_ptr<int32_t>();
+  auto const* offsets_ptr = expert_first_token_offset.const_data_ptr<int64_t>();
+  auto* ps1_ptr = problem_sizes1.mutable_data_ptr<int32_t>();
+  auto* ps2_ptr = problem_sizes2.mutable_data_ptr<int32_t>();
 
-  VLLM_DISPATCH_BOOL(swap_ab, SwapAB, [&] {
+  VLLM_STABLE_DISPATCH_BOOL(swap_ab, SwapAB, [&] {
     compute_problem_sizes_from_expert_offsets<SwapAB>
         <<<blocks, threads, 0, stream>>>(offsets_ptr, ps1_ptr, ps2_ptr,
                                          num_experts, static_cast<int>(n),
@@ -219,16 +231,19 @@ void get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
 }
 
 void get_cutlass_moe_mm_data_caller(
-    const torch::Tensor& topk_ids, torch::Tensor& expert_offsets,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    torch::Tensor& input_permutation, torch::Tensor& output_permutation,
-    const int64_t num_experts, const int64_t n, const int64_t k,
-    const std::optional<torch::Tensor>& blockscale_offsets,
+    const torch::stable::Tensor& topk_ids,
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    torch::stable::Tensor& input_permutation,
+    torch::stable::Tensor& output_permutation, const int64_t num_experts,
+    const int64_t n, const int64_t k,
+    const std::optional<torch::stable::Tensor>& blockscale_offsets,
     const bool is_gated) {
-  auto stream = at::cuda::getCurrentCUDAStream(topk_ids.device().index());
-  auto options_int32 =
-      torch::TensorOptions().dtype(torch::kInt32).device(topk_ids.device());
-  torch::Tensor atomic_buffer = torch::zeros(num_experts, options_int32);
+  auto device = topk_ids.device();
+  auto stream = get_current_cuda_stream(device.index());
+  torch::stable::Tensor atomic_buffer = torch::stable::new_zeros(
+      topk_ids, {num_experts}, torch::headeronly::ScalarType::Int);
 
   int num_threads = min(THREADS_PER_EXPERT, topk_ids.numel());
 
@@ -290,11 +305,13 @@ __global__ void compute_batched_moe_data(
 }
 
 void get_cutlass_batched_moe_mm_data_caller(
-    torch::Tensor& expert_offsets, torch::Tensor& problem_sizes1,
-    torch::Tensor& problem_sizes2, const torch::Tensor& expert_num_tokens,
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    const torch::stable::Tensor& expert_num_tokens,
     const int64_t num_local_experts, const int64_t padded_m, const int64_t n,
     const int64_t k) {
-  auto stream = at::cuda::getCurrentCUDAStream(expert_offsets.device().index());
+  auto stream = get_current_cuda_stream(expert_offsets.get_device_index());
 
   if (num_local_experts * padded_m > SWAP_AB_THRESHOLD) {
     compute_batched_moe_data<false><<<1, num_local_experts, 0, stream>>>(
@@ -311,4 +328,4 @@ void get_cutlass_batched_moe_mm_data_caller(
         static_cast<const int32_t*>(expert_num_tokens.data_ptr()), padded_m, n,
         k);
   }
-}
\ No newline at end of file
+}
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cu
new file mode 100644
index 000000000000..184a26b491f8
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cu
@@ -0,0 +1,229 @@
+#include <stddef.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/core/ScalarType.h>
+#include "cutlass/cutlass.h"
+
+#include "scaled_mm_c2x.cuh"
+#include "scaled_mm_c2x_sm75_dispatch.cuh"
+#include "scaled_mm_c2x_sm80_dispatch.cuh"
+#include "scaled_mm_c2x_sm89_fp8_dispatch.cuh"
+#include "scaled_mm_c2x_sm89_int8_dispatch.cuh"
+
+#include "core/batch_invariant.hpp"
+#include "libtorch_stable/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp"
+
+using namespace vllm;
+
+/*
+   This file defines quantized GEMM operations using the CUTLASS 2.x API, for
+   NVIDIA GPUs with SM versions prior to sm90 (Hopper).
+*/
+
+template <template <typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+void cutlass_scaled_mm_sm75_epilogue(torch::stable::Tensor& out,
+                                     torch::stable::Tensor const& a,
+                                     torch::stable::Tensor const& b,
+                                     EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm75_dispatch<int8_t, cutlass::bfloat16_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm75_dispatch<int8_t, cutlass::half_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
+void cutlass_scaled_mm_sm75(torch::stable::Tensor& out,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBias>(
+        out, a, b, a_scales, b_scales, *bias);
+  } else {
+    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogue>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+void cutlass_scaled_mm_azp_sm75(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+
+  if (azp) {
+    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
+        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
+  } else {
+    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBiasAzp>(
+        out, a, b, a_scales, b_scales, azp_adj, bias);
+  }
+}
+
+template <template <typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+void cutlass_scaled_mm_sm80_epilogue(torch::stable::Tensor& out,
+                                     torch::stable::Tensor const& a,
+                                     torch::stable::Tensor const& b,
+                                     EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm80_dispatch<int8_t, cutlass::bfloat16_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm80_dispatch<int8_t, cutlass::half_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
+void cutlass_scaled_mm_sm80(torch::stable::Tensor& out,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBias>(
+        out, a, b, a_scales, b_scales, *bias);
+  } else {
+    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogue>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+void cutlass_scaled_mm_azp_sm80(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+
+  if (azp) {
+    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
+        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
+  } else {
+    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBiasAzp>(
+        out, a, b, a_scales, b_scales, azp_adj, bias);
+  }
+}
+
+template <template <typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+void cutlass_scaled_mm_sm89_epilogue(torch::stable::Tensor& out,
+                                     torch::stable::Tensor const& a,
+                                     torch::stable::Tensor const& b,
+                                     EpilogueArgs&&... epilogue_args) {
+  if (a.scalar_type() == torch::headeronly::ScalarType::Char) {
+    STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
+
+    if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+      return cutlass_gemm_sm89_int8_dispatch<int8_t, cutlass::bfloat16_t,
+                                             Epilogue>(
+          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+    } else {
+      assert(out.scalar_type() == torch::headeronly::ScalarType::Half);
+      return cutlass_gemm_sm89_int8_dispatch<int8_t, cutlass::half_t, Epilogue>(
+          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+    }
+  } else {
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
+    STD_TORCH_CHECK(b.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
+
+    if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+      return cutlass_gemm_sm89_fp8_dispatch<cutlass::float_e4m3_t,
+                                            cutlass::bfloat16_t, Epilogue>(
+          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+    } else {
+      STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+      return cutlass_gemm_sm89_fp8_dispatch<cutlass::float_e4m3_t,
+                                            cutlass::half_t, Epilogue>(
+          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+    }
+  }
+}
+
+void cutlass_scaled_mm_sm89(torch::stable::Tensor& out,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  if (bias) {
+    STD_TORCH_CHECK(bias->scalar_type() == out.scalar_type(),
+                    "currently bias dtype must match output dtype ",
+                    out.scalar_type());
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm89_fp8_batch_invariant_epilogue<
+          c2x::ScaledEpilogueBias>(out, a, b, a_scales, b_scales, *bias);
+    }
+    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBias>(
+        out, a, b, a_scales, b_scales, *bias);
+  } else {
+    if (vllm_is_batch_invariant()) {
+      return cutlass_scaled_mm_sm89_fp8_batch_invariant_epilogue<
+          c2x::ScaledEpilogue>(out, a, b, a_scales, b_scales);
+    }
+    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogue>(
+        out, a, b, a_scales, b_scales);
+  }
+}
+
+void cutlass_scaled_mm_azp_sm89(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+
+  if (azp) {
+    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
+        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
+  } else {
+    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBiasAzp>(
+        out, a, b, a_scales, b_scales, azp_adj, bias);
+  }
+}
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cuh
similarity index 87%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cuh
index 28d6d8ac8ec5..6eb2c051d00f 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x.cuh
@@ -1,8 +1,9 @@
 #pragma once
 #include <stddef.h>
-#include <torch/all.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/csrc/stable/ops.h>
 
-#include <ATen/cuda/CUDAContext.h>
+#include "libtorch_stable/torch_utils.h"
 
 // clang-format will break include orders
 // clang-format off
@@ -95,8 +96,9 @@ struct cutlass_2x_gemm {
 };
 
 template <typename Gemm, typename... EpilogueArgs>
-inline void cutlass_gemm_caller(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
+inline void cutlass_gemm_caller(torch::stable::Tensor& out,
+                                torch::stable::Tensor const& a,
+                                torch::stable::Tensor const& b,
                                 EpilogueArgs&&... epilogue_params) {
   using ElementAB = typename Gemm::ElementAB;
   using ElementD = typename Gemm::ElementD;
@@ -149,11 +151,12 @@ inline void cutlass_gemm_caller(torch::Tensor& out, torch::Tensor const& a,
   // Launch the CUTLASS GEMM kernel.
   typename Gemm::Op gemm_op;
   size_t workspace_size = gemm_op.get_workspace_size(args);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(a.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
+  auto device = a.device();
+  auto workspace =
+      torch::stable::empty(workspace_size, torch::headeronly::ScalarType::Byte,
+                           std::nullopt, device);
 
-  auto stream = at::cuda::getCurrentCUDAStream(a.get_device());
+  auto stream = get_current_cuda_stream(device.index());
 
   CUTLASS_CHECK(gemm_op.can_implement(args));
   cutlass::Status status = gemm_op(args, workspace.data_ptr(), stream);
@@ -161,9 +164,9 @@ inline void cutlass_gemm_caller(torch::Tensor& out, torch::Tensor const& a,
 }
 
 template <typename Gemm, typename FallbackGemm, typename... EpilogueArgs>
-inline void fallback_cutlass_gemm_caller(torch::Tensor& out,
-                                         torch::Tensor const& a,
-                                         torch::Tensor const& b,
+inline void fallback_cutlass_gemm_caller(torch::stable::Tensor& out,
+                                         torch::stable::Tensor const& a,
+                                         torch::stable::Tensor const& b,
                                          EpilogueArgs&&... args) {
   // In some cases, the GPU isn't able to accommodate the
   // shared memory requirements of the Gemm. In such cases, use
@@ -180,8 +183,8 @@ inline void fallback_cutlass_gemm_caller(torch::Tensor& out,
     return cutlass_gemm_caller<Gemm>(out, a, b,
                                      std::forward<EpilogueArgs>(args)...);
   } else {
-    TORCH_CHECK(fallback_gemm_shared_mem_size <=
-                max_shared_mem_per_block_opt_in);
+    STD_TORCH_CHECK(fallback_gemm_shared_mem_size <=
+                    max_shared_mem_per_block_opt_in);
     return cutlass_gemm_caller<FallbackGemm>(
         out, a, b, std::forward<EpilogueArgs>(args)...);
   }
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh
similarity index 92%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh
index a562fd896e54..4637e5d5ed8c 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm75_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm_c2x.cuh"
 
 /**
@@ -70,13 +72,13 @@ struct sm75_config_M32 {
 template <typename InType, typename OutType,
           template <typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm75_dispatch(torch::Tensor& out,
-                                       torch::Tensor const& a,
-                                       torch::Tensor const& b,
+inline void cutlass_gemm_sm75_dispatch(torch::stable::Tensor& out,
+                                       torch::stable::Tensor const& a,
+                                       torch::stable::Tensor const& b,
                                        EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, int8_t>());
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
 
   using Cutlass2xGemmDefault =
       typename sm75_config_default<InType, OutType, Epilogue>::Cutlass2xGemm;
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh
similarity index 93%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh
index 89d101b0ed82..fa2e3c5faa56 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm80_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm_c2x.cuh"
 
 /**
@@ -72,13 +74,13 @@ struct sm80_config_M16 {
 template <typename InType, typename OutType,
           template <typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm80_dispatch(torch::Tensor& out,
-                                       torch::Tensor const& a,
-                                       torch::Tensor const& b,
+inline void cutlass_gemm_sm80_dispatch(torch::stable::Tensor& out,
+                                       torch::stable::Tensor const& a,
+                                       torch::stable::Tensor const& b,
                                        EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, int8_t>());
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
 
   using Cutlass2xGemmDefault =
       typename sm80_config_default<InType, OutType, Epilogue>::Cutlass2xGemm;
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh
similarity index 78%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh
index c7e0039bef7f..d3424d980f9b 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_fp8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm_c2x.cuh"
 #include "cutlass/float8.h"
 
@@ -34,10 +36,12 @@ struct sm89_fp8_config_default {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -84,10 +88,12 @@ struct sm89_fp8_config_M256 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -125,10 +131,12 @@ struct sm89_fp8_config_M128 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -173,10 +181,12 @@ struct sm89_fp8_config_M64 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -227,10 +237,12 @@ struct sm89_fp8_config_M32 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -280,10 +292,12 @@ struct sm89_fp8_config_M16 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
+    STD_TORCH_CHECK(a.scalar_type() ==
+                    torch::headeronly::ScalarType::Float8_e4m3fn);
 
     using FallbackGemm =
         typename sm89_fp8_fallback_gemm<InType, OutType,
@@ -326,13 +340,15 @@ struct sm89_fp8_config_M16 {
 template <typename InType, typename OutType,
           template <typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm89_fp8_dispatch(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
+inline void cutlass_gemm_sm89_fp8_dispatch(torch::stable::Tensor& out,
+                                           torch::stable::Tensor const& a,
+                                           torch::stable::Tensor const& b,
                                            EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
-  TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-  TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
 
   uint32_t const m = a.size(0);
   uint32_t const mp2 =
@@ -365,4 +381,43 @@ inline void cutlass_gemm_sm89_fp8_dispatch(torch::Tensor& out,
   }
 }
 
+template <typename InType, typename OutType,
+          template <typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+inline void cutlass_gemm_sm89_fp8_batch_invariant_dispatch(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, EpilogueArgs&&... args) {
+  static_assert(std::is_same<InType, cutlass::float_e4m3_t>());
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  // keep the CUTLASS config independent of M for batch invariance
+  return sm89_fp8_config_M64::dispatch<InType, OutType, Epilogue>(
+      out, a, b, std::forward<EpilogueArgs>(args)...);
+}
+
+template <template <typename, typename> typename Epilogue,
+          typename... EpilogueArgs>
+void cutlass_scaled_mm_sm89_fp8_batch_invariant_epilogue(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, EpilogueArgs&&... epilogue_args) {
+  STD_TORCH_CHECK(a.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+  STD_TORCH_CHECK(b.scalar_type() ==
+                  torch::headeronly::ScalarType::Float8_e4m3fn);
+
+  if (out.scalar_type() == torch::headeronly::ScalarType::BFloat16) {
+    return cutlass_gemm_sm89_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::bfloat16_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  } else {
+    STD_TORCH_CHECK(out.scalar_type() == torch::headeronly::ScalarType::Half);
+    return cutlass_gemm_sm89_fp8_batch_invariant_dispatch<
+        cutlass::float_e4m3_t, cutlass::half_t, Epilogue>(
+        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
+  }
+}
+
 }  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh
similarity index 86%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh
index ebbf3342e027..f4d24150a823 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c2x_sm89_int8_dispatch.cuh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <torch/headeronly/util/shim_utils.h>
+
 #include "scaled_mm_c2x.cuh"
 
 /**
@@ -32,10 +34,11 @@ struct sm89_int8_config_default {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -88,10 +91,11 @@ struct sm89_int8_config_M256 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -143,10 +147,11 @@ struct sm89_int8_config_M128 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -193,10 +198,11 @@ struct sm89_int8_config_M64 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -234,10 +240,11 @@ struct sm89_int8_config_M32 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -276,10 +283,11 @@ struct sm89_int8_config_M16 {
   template <typename InType, typename OutType,
             template <typename, typename> typename Epilogue,
             typename... EpilogueArgs>
-  static void dispatch(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, EpilogueArgs&&... args) {
+  static void dispatch(torch::stable::Tensor& out,
+                       torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b, EpilogueArgs&&... args) {
     static_assert(std::is_same<InType, int8_t>());
-    TORCH_CHECK(a.dtype() == torch::kInt8);
+    STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
 
     using FallbackGemm =
         typename sm89_int8_fallback_gemm<InType, OutType,
@@ -311,13 +319,13 @@ struct sm89_int8_config_M16 {
 template <typename InType, typename OutType,
           template <typename, typename> typename Epilogue,
           typename... EpilogueArgs>
-inline void cutlass_gemm_sm89_int8_dispatch(torch::Tensor& out,
-                                            torch::Tensor const& a,
-                                            torch::Tensor const& b,
+inline void cutlass_gemm_sm89_int8_dispatch(torch::stable::Tensor& out,
+                                            torch::stable::Tensor const& a,
+                                            torch::stable::Tensor const& b,
                                             EpilogueArgs&&... args) {
   static_assert(std::is_same<InType, int8_t>());
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
+  STD_TORCH_CHECK(a.scalar_type() == torch::headeronly::ScalarType::Char);
+  STD_TORCH_CHECK(b.scalar_type() == torch::headeronly::ScalarType::Char);
 
   uint32_t const m = a.size(0);
   uint32_t const mp2 =
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu
similarity index 56%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu
index 0cbd5305e3c2..23816361edcb 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu
@@ -8,11 +8,12 @@
 
 #if defined ENABLE_SCALED_MM_SM100 && ENABLE_SCALED_MM_SM100
 
-void cutlass_scaled_mm_sm100(torch::Tensor& c, torch::Tensor const& a,
-                             torch::Tensor const& b,
-                             torch::Tensor const& a_scales,
-                             torch::Tensor const& b_scales,
-                             std::optional<torch::Tensor> const& bias) {
+void cutlass_scaled_mm_sm100(torch::stable::Tensor& c,
+                             torch::stable::Tensor const& a,
+                             torch::stable::Tensor const& b,
+                             torch::stable::Tensor const& a_scales,
+                             torch::stable::Tensor const& b_scales,
+                             std::optional<torch::stable::Tensor> const& bias) {
   dispatch_scaled_mm(c, a, b, a_scales, b_scales, bias,
                      vllm::cutlass_scaled_mm_sm100_fp8,
                      nullptr,  // int8 not supported on SM100
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu
similarity index 56%
rename from csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu
rename to csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu
index dc87c5c35cb8..e7f136897f1d 100644
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu
@@ -8,11 +8,12 @@
 
 #if defined ENABLE_SCALED_MM_SM120 && ENABLE_SCALED_MM_SM120
 
-void cutlass_scaled_mm_sm120(torch::Tensor& c, torch::Tensor const& a,
-                             torch::Tensor const& b,
-                             torch::Tensor const& a_scales,
-                             torch::Tensor const& b_scales,
-                             std::optional<torch::Tensor> const& bias) {
+void cutlass_scaled_mm_sm120(torch::stable::Tensor& c,
+                             torch::stable::Tensor const& a,
+                             torch::stable::Tensor const& b,
+                             torch::stable::Tensor const& a_scales,
+                             torch::stable::Tensor const& b_scales,
+                             std::optional<torch::stable::Tensor> const& bias) {
   dispatch_scaled_mm(c, a, b, a_scales, b_scales, bias,
                      vllm::cutlass_scaled_mm_sm120_fp8,
                      nullptr,  // int8 not supported on SM120
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu
new file mode 100644
index 000000000000..18b5fb96cbe1
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu
@@ -0,0 +1,38 @@
+#include "c3x/scaled_mm_helper.hpp"
+#include "c3x/scaled_mm_kernels.hpp"
+
+/*
+   This file defines quantized GEMM operations using the CUTLASS 3.x API, for
+   NVIDIA GPUs with sm90a (Hopper).
+*/
+
+#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
+
+void cutlass_scaled_mm_sm90(torch::stable::Tensor& c,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias) {
+  dispatch_scaled_mm(c, a, b, a_scales, b_scales, bias,
+                     vllm::cutlass_scaled_mm_sm90_fp8,
+                     vllm::cutlass_scaled_mm_sm90_int8,
+                     vllm::cutlass_scaled_mm_blockwise_sm90_fp8);
+}
+
+void cutlass_scaled_mm_azp_sm90(
+    torch::stable::Tensor& out, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias) {
+  STD_TORCH_CHECK(a_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+  STD_TORCH_CHECK(b_scales.scalar_type() ==
+                  torch::headeronly::ScalarType::Float);
+
+  vllm::cutlass_scaled_mm_azp_sm90_int8(out, a, b, a_scales, b_scales, azp_adj,
+                                        azp, bias);
+}
+
+#endif
diff --git a/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_entry.cu b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_entry.cu
new file mode 100644
index 000000000000..2e5bbca4700a
--- /dev/null
+++ b/csrc/libtorch_stable/quantization/w8a8/cutlass/scaled_mm_entry.cu
@@ -0,0 +1,451 @@
+#include <cudaTypedefs.h>
+
+#include <torch/csrc/stable/tensor.h>
+
+#include "libtorch_stable/torch_utils.h"
+
+#include "cutlass_extensions/common.hpp"
+
+void cutlass_scaled_mm_sm75(torch::stable::Tensor& c,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_sm80(torch::stable::Tensor& c,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_sm89(torch::stable::Tensor& c,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias);
+
+#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
+void cutlass_scaled_mm_sm90(torch::stable::Tensor& c,
+                            torch::stable::Tensor const& a,
+                            torch::stable::Tensor const& b,
+                            torch::stable::Tensor const& a_scales,
+                            torch::stable::Tensor const& b_scales,
+                            std::optional<torch::stable::Tensor> const& bias);
+#endif
+#if defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90
+void cutlass_moe_mm_sm90(torch::stable::Tensor& out_tensors,
+                         torch::stable::Tensor const& a_tensors,
+                         torch::stable::Tensor const& b_tensors,
+                         torch::stable::Tensor const& a_scales,
+                         torch::stable::Tensor const& b_scales,
+                         torch::stable::Tensor const& expert_offsets,
+                         torch::stable::Tensor const& problem_sizes,
+                         torch::stable::Tensor const& a_strides,
+                         torch::stable::Tensor const& b_strides,
+                         torch::stable::Tensor const& c_strides,
+                         bool per_act_token, bool per_out_ch);
+
+#endif
+
+#if defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100
+void cutlass_moe_mm_sm100(torch::stable::Tensor& out_tensors,
+                          torch::stable::Tensor const& a_tensors,
+                          torch::stable::Tensor const& b_tensors,
+                          torch::stable::Tensor const& a_scales,
+                          torch::stable::Tensor const& b_scales,
+                          torch::stable::Tensor const& expert_offsets,
+                          torch::stable::Tensor const& problem_sizes,
+                          torch::stable::Tensor const& a_strides,
+                          torch::stable::Tensor const& b_strides,
+                          torch::stable::Tensor const& c_strides,
+                          bool per_act_token, bool per_out_ch);
+#endif
+
+#if defined ENABLE_SCALED_MM_SM120 && ENABLE_SCALED_MM_SM120
+void cutlass_scaled_mm_sm120(torch::stable::Tensor& c,
+                             torch::stable::Tensor const& a,
+                             torch::stable::Tensor const& b,
+                             torch::stable::Tensor const& a_scales,
+                             torch::stable::Tensor const& b_scales,
+                             std::optional<torch::stable::Tensor> const& bias);
+#endif
+
+#if defined ENABLE_SCALED_MM_SM100 && ENABLE_SCALED_MM_SM100
+void cutlass_scaled_mm_sm100(torch::stable::Tensor& c,
+                             torch::stable::Tensor const& a,
+                             torch::stable::Tensor const& b,
+                             torch::stable::Tensor const& a_scales,
+                             torch::stable::Tensor const& b_scales,
+                             std::optional<torch::stable::Tensor> const& bias);
+#endif
+
+#if (defined(ENABLE_CUTLASS_MOE_SM90) && ENABLE_CUTLASS_MOE_SM90) ||   \
+    (defined(ENABLE_CUTLASS_MOE_SM100) && ENABLE_CUTLASS_MOE_SM100) || \
+    (defined(ENABLE_CUTLASS_MOE_SM120) && ENABLE_CUTLASS_MOE_SM120)
+void get_cutlass_moe_mm_data_caller(
+    const torch::stable::Tensor& topk_ids,
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    torch::stable::Tensor& input_permutation,
+    torch::stable::Tensor& output_permutation, const int64_t num_experts,
+    const int64_t n, const int64_t k,
+    const std::optional<torch::stable::Tensor>& blockscale_offsets,
+    const bool is_gated);
+
+void get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
+    const torch::stable::Tensor& expert_first_token_offset,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2, const int64_t n, const int64_t k,
+    const bool swap_ab);
+
+void get_cutlass_batched_moe_mm_data_caller(
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    const torch::stable::Tensor& expert_num_tokens,
+    const int64_t num_local_experts, const int64_t padded_m, const int64_t n,
+    const int64_t k);
+#endif
+
+void cutlass_scaled_mm_azp_sm75(
+    torch::stable::Tensor& c, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_azp_sm80(
+    torch::stable::Tensor& c, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias);
+
+void cutlass_scaled_mm_azp_sm89(
+    torch::stable::Tensor& c, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias);
+
+#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
+void cutlass_scaled_mm_azp_sm90(
+    torch::stable::Tensor& c, torch::stable::Tensor const& a,
+    torch::stable::Tensor const& b, torch::stable::Tensor const& a_scales,
+    torch::stable::Tensor const& b_scales, torch::stable::Tensor const& azp_adj,
+    std::optional<torch::stable::Tensor> const& azp,
+    std::optional<torch::stable::Tensor> const& bias);
+#endif
+
+bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
+  // CUTLASS FP8 kernels need at least
+  //   CUDA 12.0 on SM90 systems (Hopper)
+  //   CUDA 12.4 on SM89 systems (Lovelace)
+
+#if defined CUDA_VERSION
+  if (cuda_device_capability >= 90) {
+    return CUDA_VERSION >= 12000;
+  } else if (cuda_device_capability >= 89) {
+    return CUDA_VERSION >= 12040;
+  }
+#endif
+
+  return false;
+}
+
+bool cutlass_scaled_mm_supports_block_fp8(int64_t cuda_device_capability) {
+  // CUTLASS block-quantized FP8 kernels need at least CUDA 12.0
+  // and at least SM90 (Hopper)
+
+#if defined CUDA_VERSION
+  if (cuda_device_capability >= 100) {
+    return CUDA_VERSION >= 12080;
+  } else if (cuda_device_capability >= 90) {
+    return CUDA_VERSION >= 12000;
+  }
+#endif
+
+  return false;
+}
+
+bool cutlass_group_gemm_supported(int64_t cuda_device_capability) {
+  // CUTLASS grouped FP8 kernels need at least CUDA 12.3 and SM90 (Hopper)
+  // or CUDA 12.8 and SM100 (Blackwell)
+
+#if defined CUDA_VERSION
+  if (cuda_device_capability >= 100) {
+    return CUDA_VERSION >= 12080;
+  }
+  if (cuda_device_capability >= 90) {
+    return CUDA_VERSION >= 12030;
+  }
+#endif
+
+  return false;
+}
+
+void cutlass_scaled_mm(torch::stable::Tensor& c, torch::stable::Tensor const& a,
+                       torch::stable::Tensor const& b,
+                       torch::stable::Tensor const& a_scales,
+                       torch::stable::Tensor const& b_scales,
+                       std::optional<torch::stable::Tensor> const& bias) {
+  // Checks for conformality
+  STD_TORCH_CHECK(a.dim() == 2 && b.dim() == 2 && c.dim() == 2);
+  STD_TORCH_CHECK(c.size(0) == a.size(0) && a.size(1) == b.size(0) &&
+                  b.size(1) == c.size(1));
+
+  // Check for strides and alignment
+  STD_TORCH_CHECK(a.stride(1) == 1 && c.stride(1) == 1);  // Row-major
+  STD_TORCH_CHECK(b.stride(0) == 1);                      // Column-major
+  STD_TORCH_CHECK(c.stride(0) % 16 == 0 &&
+                  b.stride(1) % 16 == 0);  // 16 Byte Alignment
+
+  if (bias) {
+    STD_TORCH_CHECK(bias->numel() == b.size(1) && bias->is_contiguous() &&
+                    bias->dim() == 1);
+  }
+
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      a.get_device_index());
+  int32_t version_num = get_sm_version_num();
+
+#if defined ENABLE_SCALED_MM_SM120 && ENABLE_SCALED_MM_SM120
+  if (version_num >= 120) {
+    cutlass_scaled_mm_sm120(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+#endif
+
+#if defined ENABLE_SCALED_MM_SM100 && ENABLE_SCALED_MM_SM100
+  if (version_num >= 100 && version_num < 120) {
+    cutlass_scaled_mm_sm100(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+#endif
+
+  // Guard against compilation issues for sm90 kernels
+#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
+  if (version_num >= 90 && version_num < 100) {
+    // Hopper
+    cutlass_scaled_mm_sm90(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+#endif
+
+#if defined ENABLE_SCALED_MM_C2X && ENABLE_SCALED_MM_C2X
+  if (version_num == 89) {
+    // Ada Lovelace
+    cutlass_scaled_mm_sm89(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+
+  if (version_num >= 80) {
+    // Ampere
+    cutlass_scaled_mm_sm80(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+
+  if (version_num >= 75) {
+    // Turing
+    cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
+    return;
+  }
+#endif
+
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled cutlass_scaled_mm for a compute capability less than "
+      "CUDA device capability: ",
+      version_num);
+}
+
+void cutlass_moe_mm(torch::stable::Tensor& out_tensors,
+                    torch::stable::Tensor const& a_tensors,
+                    torch::stable::Tensor const& b_tensors,
+                    torch::stable::Tensor const& a_scales,
+                    torch::stable::Tensor const& b_scales,
+                    torch::stable::Tensor const& expert_offsets,
+                    torch::stable::Tensor const& problem_sizes,
+                    torch::stable::Tensor const& a_strides,
+                    torch::stable::Tensor const& b_strides,
+                    torch::stable::Tensor const& c_strides, bool per_act_token,
+                    bool per_out_ch) {
+  int32_t version_num = get_sm_version_num();
+#if defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100
+  if (version_num >= 100 && version_num < 110) {
+    cutlass_moe_mm_sm100(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
+                         expert_offsets, problem_sizes, a_strides, b_strides,
+                         c_strides, per_act_token, per_out_ch);
+    return;
+  }
+#endif
+#if defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90
+  if (version_num >= 90 && version_num < 100) {
+    cutlass_moe_mm_sm90(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
+                        expert_offsets, problem_sizes, a_strides, b_strides,
+                        c_strides, per_act_token, per_out_ch);
+    return;
+  }
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled cutlass_scaled_mm for CUDA device capability: ", version_num,
+      ". Required capability: 90 or 100");
+}
+
+void get_cutlass_moe_mm_data(
+    const torch::stable::Tensor& topk_ids,
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    torch::stable::Tensor& input_permutation,
+    torch::stable::Tensor& output_permutation, const int64_t num_experts,
+    const int64_t n, const int64_t k,
+    const std::optional<torch::stable::Tensor>& blockscale_offsets,
+    const bool is_gated) {
+  // This function currently gets compiled only if we have a valid cutlass moe
+  // mm to run it for.
+  int32_t version_num = get_sm_version_num();
+#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
+    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
+    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
+  get_cutlass_moe_mm_data_caller(topk_ids, expert_offsets, problem_sizes1,
+                                 problem_sizes2, input_permutation,
+                                 output_permutation, num_experts, n, k,
+                                 blockscale_offsets, is_gated);
+  return;
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled get_cutlass_moe_mm_data: no cutlass_scaled_mm kernel for "
+      "CUDA device capability: ",
+      version_num, ". Required capability: 90, 100, or 120");
+}
+
+void get_cutlass_moe_mm_problem_sizes_from_expert_offsets(
+    const torch::stable::Tensor& expert_first_token_offset,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2, const int64_t n, const int64_t k,
+    const bool swap_ab) {
+  int32_t version_num = get_sm_version_num();
+#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
+    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
+    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
+  get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
+      expert_first_token_offset, problem_sizes1, problem_sizes2, n, k, swap_ab);
+  return;
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled get_cutlass_moe_mm_problem_sizes_from_expert_offsets: "
+      "no cutlass_scaled_mm kernel for CUDA device capability: ",
+      version_num, ". Required capability: 90, 100, or 120");
+}
+
+void get_cutlass_batched_moe_mm_data(
+    torch::stable::Tensor& expert_offsets,
+    torch::stable::Tensor& problem_sizes1,
+    torch::stable::Tensor& problem_sizes2,
+    const torch::stable::Tensor& expert_num_tokens,
+    const int64_t num_local_experts, const int64_t padded_m, const int64_t n,
+    const int64_t k) {
+  // This function currently gets compiled only if we have a valid cutlass moe
+  // mm to run it for.
+  int32_t version_num = get_sm_version_num();
+#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
+    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
+    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
+  get_cutlass_batched_moe_mm_data_caller(expert_offsets, problem_sizes1,
+                                         problem_sizes2, expert_num_tokens,
+                                         num_local_experts, padded_m, n, k);
+  return;
+#endif
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled get_cutlass_batched_moe_mm_data: no "
+      "cutlass_scaled_mm kernel "
+      "for CUDA device capability: ",
+      version_num, ". Required capability: 90, 100, or 120");
+}
+
+void cutlass_scaled_mm_azp(torch::stable::Tensor& c,
+                           torch::stable::Tensor const& a,
+                           torch::stable::Tensor const& b,
+                           torch::stable::Tensor const& a_scales,
+                           torch::stable::Tensor const& b_scales,
+                           torch::stable::Tensor const& azp_adj,
+                           std::optional<torch::stable::Tensor> const& azp,
+                           std::optional<torch::stable::Tensor> const& bias) {
+  // Checks for conformality
+  STD_TORCH_CHECK(a.dim() == 2 && b.dim() == 2 && c.dim() == 2);
+  STD_TORCH_CHECK(c.size(0) == a.size(0) && a.size(1) == b.size(0) &&
+                  b.size(1) == c.size(1));
+  STD_TORCH_CHECK(a_scales.numel() == 1 || a_scales.numel() == a.size(0));
+  STD_TORCH_CHECK(b_scales.numel() == 1 || b_scales.numel() == b.size(1));
+
+  // Check for strides and alignment
+  STD_TORCH_CHECK(a.stride(1) == 1 && c.stride(1) == 1);  // Row-major
+  STD_TORCH_CHECK(b.stride(0) == 1);                      // Column-major
+  STD_TORCH_CHECK(c.stride(0) % 16 == 0 &&
+                  b.stride(1) % 16 == 0);  // 16 Byte Alignment
+  STD_TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
+
+  // bias, azp, azp_adj are all 1d
+  // bias and azp_adj have n elements, azp has m elements
+  if (bias) {
+    STD_TORCH_CHECK(bias->numel() == b.size(1) && bias->is_contiguous());
+  }
+  if (azp) {
+    STD_TORCH_CHECK(azp->numel() == a.size(0) && azp->is_contiguous());
+  }
+  STD_TORCH_CHECK(azp_adj.numel() == b.size(1) && azp_adj.is_contiguous());
+
+  // azp & bias types
+  STD_TORCH_CHECK(azp_adj.scalar_type() == torch::headeronly::ScalarType::Int);
+  STD_TORCH_CHECK(!azp ||
+                  azp->scalar_type() == torch::headeronly::ScalarType::Int);
+  STD_TORCH_CHECK(!bias || bias->scalar_type() == c.scalar_type(),
+                  "currently bias dtype must match output dtype ",
+                  c.scalar_type());
+
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      a.get_device_index());
+
+  int32_t version_num = get_sm_version_num();
+
+#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
+  if (version_num >= 90) {
+    cutlass_scaled_mm_azp_sm90(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
+    return;
+  }
+#endif
+
+#if defined ENABLE_SCALED_MM_C2X && ENABLE_SCALED_MM_C2X
+  if (version_num == 89) {
+    // Ada Lovelace
+    cutlass_scaled_mm_azp_sm89(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
+    return;
+  }
+
+  if (version_num >= 80) {
+    // Ampere
+    cutlass_scaled_mm_azp_sm80(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
+    return;
+  }
+
+  // Turing
+  STD_TORCH_CHECK(version_num >= 75);
+  cutlass_scaled_mm_azp_sm75(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
+  return;
+#endif
+
+  STD_TORCH_CHECK_NOT_IMPLEMENTED(
+      false,
+      "No compiled cutlass_scaled_mm_azp for a compute capability less than "
+      "CUDA device capability: ",
+      version_num);
+}
diff --git a/csrc/quantization/w8a8/fp8/common.cu b/csrc/libtorch_stable/quantization/w8a8/fp8/common.cu
similarity index 66%
rename from csrc/quantization/w8a8/fp8/common.cu
rename to csrc/libtorch_stable/quantization/w8a8/fp8/common.cu
index 52e159d65010..d02fc2296e61 100644
--- a/csrc/quantization/w8a8/fp8/common.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/fp8/common.cu
@@ -1,11 +1,9 @@
-#include "common.cuh"
-#include "dispatch_utils.h"
-#include "cub_helpers.h"
-#include "libtorch_stable/quantization/vectorization_utils.cuh"
-#include <c10/cuda/CUDAGuard.h>
-#include <ATen/cuda/Exceptions.h>
-#include <tuple>
-
+#include "../../../../quantization/w8a8/fp8/common.cuh"
+#include "../../../dispatch_utils.h"
+#include "../../../../cub_helpers.h"
+#include "../../vectorization_utils.cuh"
+#include "../../../torch_utils.h"
+#include <torch/csrc/stable/macros.h>
 namespace vllm {
 
 // STRIDE_I_ZERO: true if scale_stride_i == 0 (per-tensor or per-channel)
@@ -183,16 +181,16 @@ __global__ void dynamic_per_token_scaled_fp8_quant_kernel_strided(
 }  // namespace vllm
 
 void static_scaled_fp8_quant(
-    torch::Tensor& out,          // [..., d]
-    torch::Tensor const& input,  // [..., d]
-    torch::Tensor const& scale,  // various shapes
-    std::optional<std::tuple<int64_t, int64_t>>
-        opt_group_shape)  // optional explicit (group_m, group_n)
+    torch::stable::Tensor& out,          // [..., d]
+    torch::stable::Tensor const& input,  // [..., d]
+    torch::stable::Tensor const& scale,  // various shapes
+    std::optional<torch::headeronly::IntHeaderOnlyArrayRef>
+        opt_group_shape)  // optional explicit [group_m, group_n]
 {
-  TORCH_CHECK(input.stride(-1) == 1,
-              "last dimension of input must be contiguous");
-  TORCH_CHECK(out.stride(-1) == 1,
-              "last dimension of output must be contiguous");
+  STD_TORCH_CHECK(input.stride(-1) == 1,
+                  "last dimension of input must be contiguous");
+  STD_TORCH_CHECK(out.stride(-1) == 1,
+                  "last dimension of output must be contiguous");
 
   const int hidden_size = input.size(-1);              // N (columns)
   const int num_tokens = input.numel() / hidden_size;  // M (rows)
@@ -212,13 +210,18 @@ void static_scaled_fp8_quant(
   } else if (scale.dim() == 1) {
     // 1D scale: require explicit group_shape to disambiguate per-channel vs
     // per-token (avoids edge case where num_tokens == hidden_size)
-    TORCH_CHECK(opt_group_shape.has_value(),
-                "1D scale requires explicit group_shape to disambiguate "
-                "per-channel vs per-token quantization. "
-                "Use group_shape=(-1, 1) for per-channel or group_shape=(1, "
-                "-1) for per-token.");
-
-    const auto& [opt_group_m, opt_group_n] = opt_group_shape.value();
+    STD_TORCH_CHECK(
+        opt_group_shape.has_value(),
+        "1D scale requires explicit group_shape to disambiguate "
+        "per-channel vs per-token quantization. "
+        "Use group_shape=(-1, 1) for per-channel or group_shape=(1, "
+        "-1) for per-token.");
+    STD_TORCH_CHECK(opt_group_shape->size() == 2,
+                    "group_shape must have exactly 2 elements, got ",
+                    opt_group_shape->size());
+
+    const auto opt_group_m = (*opt_group_shape)[0];
+    const auto opt_group_n = (*opt_group_shape)[1];
     group_m = opt_group_m == -1 ? num_tokens : static_cast<int>(opt_group_m);
     group_n = opt_group_n == -1 ? hidden_size : static_cast<int>(opt_group_n);
 
@@ -228,11 +231,11 @@ void static_scaled_fp8_quant(
     const int64_t expected_scale_n = hidden_size / group_n;
     const int64_t expected_scale_numel = expected_scale_m * expected_scale_n;
 
-    TORCH_CHECK(scale_len == expected_scale_numel, "1D scale length (",
-                scale_len, ") does not match expected size (",
-                expected_scale_numel, ") for group_shape (", opt_group_m, ", ",
-                opt_group_n, ") with input shape (", num_tokens, ", ",
-                hidden_size, ")");
+    STD_TORCH_CHECK(scale_len == expected_scale_numel, "1D scale length (",
+                    scale_len, ") does not match expected size (",
+                    expected_scale_numel, ") for group_shape (", opt_group_m,
+                    ", ", opt_group_n, ") with input shape (", num_tokens, ", ",
+                    hidden_size, ")");
 
     // For 1D scale, determine strides based on which dim is trivial
     // Scale indexing: scale[gi * scale_stride_i + gj * scale_stride_j]
@@ -248,7 +251,7 @@ void static_scaled_fp8_quant(
       scale_stride_i = scale.stride(0);
       scale_stride_j = 0;
     } else {
-      TORCH_CHECK(
+      STD_TORCH_CHECK(
           false,
           "1D scale can only be used when one of the scale dimensions is 1. "
           "For 2D group scaling, use a 2D scale tensor.");
@@ -259,10 +262,12 @@ void static_scaled_fp8_quant(
     const int64_t scale_size_0 = scale.size(0);
     const int64_t scale_size_1 = scale.size(1);
 
-    TORCH_CHECK(num_tokens % scale_size_0 == 0, "num_tokens (", num_tokens,
-                ") must be divisible by scale.size(0) (", scale_size_0, ")");
-    TORCH_CHECK(hidden_size % scale_size_1 == 0, "hidden_size (", hidden_size,
-                ") must be divisible by scale.size(1) (", scale_size_1, ")");
+    STD_TORCH_CHECK(num_tokens % scale_size_0 == 0, "num_tokens (", num_tokens,
+                    ") must be divisible by scale.size(0) (", scale_size_0,
+                    ")");
+    STD_TORCH_CHECK(hidden_size % scale_size_1 == 0, "hidden_size (",
+                    hidden_size, ") must be divisible by scale.size(1) (",
+                    scale_size_1, ")");
 
     // Infer from 2D scale shape
     int inferred_group_m = num_tokens / scale_size_0;
@@ -270,16 +275,21 @@ void static_scaled_fp8_quant(
 
     // Use explicit if provided, otherwise use inferred
     if (opt_group_shape.has_value()) {
-      const auto& [opt_group_m, opt_group_n] = opt_group_shape.value();
+      STD_TORCH_CHECK(opt_group_shape->size() == 2,
+                      "group_shape must have exactly 2 elements, got ",
+                      opt_group_shape->size());
+      const auto opt_group_m = (*opt_group_shape)[0];
+      const auto opt_group_n = (*opt_group_shape)[1];
       group_m = opt_group_m == -1 ? num_tokens : static_cast<int>(opt_group_m);
       group_n = opt_group_n == -1 ? hidden_size : static_cast<int>(opt_group_n);
 
       // Validate explicit matches inferred
-      TORCH_CHECK(group_m == inferred_group_m && group_n == inferred_group_n,
-                  "Explicit group_shape (", opt_group_m, ", ", opt_group_n,
-                  ") does not match inferred group shape (", inferred_group_m,
-                  ", ", inferred_group_n, ") from 2D scale tensor shape (",
-                  scale_size_0, ", ", scale_size_1, ")");
+      STD_TORCH_CHECK(
+          group_m == inferred_group_m && group_n == inferred_group_n,
+          "Explicit group_shape (", opt_group_m, ", ", opt_group_n,
+          ") does not match inferred group shape (", inferred_group_m, ", ",
+          inferred_group_n, ") from 2D scale tensor shape (", scale_size_0,
+          ", ", scale_size_1, ")");
     } else {
       group_m = inferred_group_m;
       group_n = inferred_group_n;
@@ -288,8 +298,8 @@ void static_scaled_fp8_quant(
     scale_stride_i = scale.stride(0);
     scale_stride_j = scale.stride(1);
   } else {
-    TORCH_CHECK(false, "scale must be 0D, 1D, or 2D tensor, but got ",
-                scale.dim(), "D");
+    STD_TORCH_CHECK(false, "scale must be 0D, 1D, or 2D tensor, but got ",
+                    scale.dim(), "D");
   }
 
   const int block_size = 256;
@@ -299,37 +309,39 @@ void static_scaled_fp8_quant(
   const int64_t in_row_stride = input.stride(-2);
   const int64_t out_row_stride = out.stride(-2);
 
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
 
   // Dispatch to template-specialized kernel based on stride pattern
-  VLLM_DISPATCH_FLOATING_TYPES(
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "scaled_fp8_quant_kernel_scalar_type", [&] {
-        VLLM_DISPATCH_FP8_TYPES(
+        VLLM_STABLE_DISPATCH_FP8_TYPES(
             out.scalar_type(), "scaled_fp8_quant_kernel_fp8_type", [&] {
-              VLLM_DISPATCH_BOOL(scale_stride_i == 0, S0_ZERO, [&] {
-                VLLM_DISPATCH_BOOL(scale_stride_j == 0, S1_ZERO, [&] {
+              VLLM_STABLE_DISPATCH_BOOL(scale_stride_i == 0, S0_ZERO, [&] {
+                VLLM_STABLE_DISPATCH_BOOL(scale_stride_j == 0, S1_ZERO, [&] {
                   vllm::scaled_fp8_quant_kernel_strided_group_shape<
                       scalar_t, fp8_t, S0_ZERO, S1_ZERO>
                       <<<grid, block, 0, stream>>>(
-                          out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),
-                          scale.data_ptr<float>(), hidden_size, in_row_stride,
-                          out_row_stride, group_m, group_n, scale_stride_i,
-                          scale_stride_j);
+                          out.mutable_data_ptr<fp8_t>(),
+                          input.const_data_ptr<scalar_t>(),
+                          scale.const_data_ptr<float>(), hidden_size,
+                          in_row_stride, out_row_stride, group_m, group_n,
+                          scale_stride_i, scale_stride_j);
                 });
               });
             });
       });
 }
 
-void dynamic_scaled_fp8_quant(torch::Tensor& out,          // [..., d]
-                              torch::Tensor const& input,  // [..., d]
-                              torch::Tensor& scale)        // [1]
+void dynamic_scaled_fp8_quant(torch::stable::Tensor& out,          // [..., d]
+                              torch::stable::Tensor const& input,  // [..., d]
+                              torch::stable::Tensor& scale)        // [1]
 {
-  TORCH_CHECK(input.stride(-1) == 1,
-              "last dimension of input must be contiguous");
-  TORCH_CHECK(out.stride(-1) == 1,
-              "last dimension of output must be contiguous");
+  STD_TORCH_CHECK(input.stride(-1) == 1,
+                  "last dimension of input must be contiguous");
+  STD_TORCH_CHECK(out.stride(-1) == 1,
+                  "last dimension of output must be contiguous");
 
   const int hidden_size = input.size(-1);
   const int num_tokens = input.numel() / hidden_size;
@@ -340,40 +352,43 @@ void dynamic_scaled_fp8_quant(torch::Tensor& out,          // [..., d]
   const int64_t in_row_stride = input.stride(-2);
   const int64_t out_row_stride = out.stride(-2);
 
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
 
   // scale tensor should be initialised to <=0 before reduction
-  AT_CUDA_CHECK(
-      cudaMemsetAsync(scale.data_ptr<float>(), 0, sizeof(float), stream));
+  STD_CUDA_CHECK(cudaMemsetAsync(scale.mutable_data_ptr<float>(), 0,
+                                 sizeof(float), stream));
 
-  VLLM_DISPATCH_FLOATING_TYPES(
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "scaled_fp8_quant_kernel_scalar_type", [&] {
-        VLLM_DISPATCH_FP8_TYPES(
+        VLLM_STABLE_DISPATCH_FP8_TYPES(
             out.scalar_type(), "scaled_fp8_quant_kernel_fp8_type", [&] {
               vllm::segmented_max_reduction_strided<scalar_t, fp8_t>
                   <<<grid, block, 0, stream>>>(
-                      scale.data_ptr<float>(), input.data_ptr<scalar_t>(),
-                      hidden_size, in_row_stride,
-                      static_cast<int64_t>(num_tokens));
+                      scale.mutable_data_ptr<float>(),
+                      input.const_data_ptr<scalar_t>(), hidden_size,
+                      in_row_stride, static_cast<int64_t>(num_tokens));
 
               vllm::scaled_fp8_quant_kernel_strided_dynamic<scalar_t, fp8_t>
-                  <<<grid, block, 0, stream>>>(
-                      out.data_ptr<fp8_t>(), input.data_ptr<scalar_t>(),
-                      scale.data_ptr<float>(), hidden_size, in_row_stride,
-                      out_row_stride);
+                  <<<grid, block, 0, stream>>>(out.mutable_data_ptr<fp8_t>(),
+                                               input.const_data_ptr<scalar_t>(),
+                                               scale.const_data_ptr<float>(),
+                                               hidden_size, in_row_stride,
+                                               out_row_stride);
             });
       });
 }
 
 void dynamic_per_token_scaled_fp8_quant(
-    torch::Tensor& out,          // [..., d]
-    torch::Tensor const& input,  // [..., d]
-    torch::Tensor& scales, std::optional<at::Tensor> const& scale_ub) {
-  TORCH_CHECK(input.stride(-1) == 1,
-              "last dimension of input must be contiguous");
-  TORCH_CHECK(out.stride(-1) == 1,
-              "last dimension of output must be contiguous");
+    torch::stable::Tensor& out,          // [..., d]
+    torch::stable::Tensor const& input,  // [..., d]
+    torch::stable::Tensor& scales,
+    std::optional<torch::stable::Tensor> const& scale_ub) {
+  STD_TORCH_CHECK(input.stride(-1) == 1,
+                  "last dimension of input must be contiguous");
+  STD_TORCH_CHECK(out.stride(-1) == 1,
+                  "last dimension of output must be contiguous");
 
   const int hidden_size = input.size(-1);
   const int num_tokens = input.numel() / hidden_size;
@@ -384,20 +399,24 @@ void dynamic_per_token_scaled_fp8_quant(
   const int64_t in_row_stride = input.stride(-2);
   const int64_t out_row_stride = out.stride(-2);
 
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(),
       "dynamic_per_token_scaled_fp8_quant_kernel_scalar_type", [&] {
-        VLLM_DISPATCH_FP8_TYPES(
+        VLLM_STABLE_DISPATCH_FP8_TYPES(
             out.scalar_type(),
             "dynamic_per_token_scaled_fp8_quant_kernel_fp8_type", [&] {
-              vllm::dynamic_per_token_scaled_fp8_quant_kernel_strided<
-                  scalar_t, fp8_t><<<grid, block, 0, stream>>>(
-                  out.data_ptr<fp8_t>(), scales.data_ptr<float>(),
-                  input.data_ptr<scalar_t>(),
-                  scale_ub.has_value() ? scale_ub->data_ptr<float>() : nullptr,
-                  hidden_size, in_row_stride, out_row_stride);
+              vllm::dynamic_per_token_scaled_fp8_quant_kernel_strided<scalar_t,
+                                                                      fp8_t>
+                  <<<grid, block, 0, stream>>>(
+                      out.mutable_data_ptr<fp8_t>(),
+                      scales.mutable_data_ptr<float>(),
+                      input.const_data_ptr<scalar_t>(),
+                      scale_ub.has_value() ? scale_ub->const_data_ptr<float>()
+                                           : nullptr,
+                      hidden_size, in_row_stride, out_row_stride);
             });
       });
 }
diff --git a/csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu b/csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu
index 69b6564be750..37a612b43944 100644
--- a/csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/fp8/per_token_group_quant.cu
@@ -156,6 +156,17 @@ inline int GetGroupsPerBlock(int64_t num_groups) {
   return 1;
 }
 
+// Largest divisor of padded_groups_per_row that is <= 16. ry = 16 / kx.
+inline int GetGroupsPerBlockX(int64_t padded_groups_per_row) {
+  if (padded_groups_per_row % 16 == 0) {
+    return 16;
+  }
+  if (padded_groups_per_row % 8 == 0) {
+    return 8;
+  }
+  return 4;
+}
+
 void per_token_group_quant_8bit(const torch::stable::Tensor& input,
                                 torch::stable::Tensor& output_q,
                                 torch::stable::Tensor& output_s,
@@ -236,138 +247,266 @@ void per_token_group_quant_8bit(const torch::stable::Tensor& input,
 #undef LAUNCH_KERNEL
 }
 
-template <typename T, typename DST_DTYPE>
-__global__ void per_token_group_quant_8bit_packed_kernel(
+// Register-resident fast path for group_size==128.
+//
+// Each thread holds 16 source elements (32 B = uint4 x 2) in registers across
+// the absmax reduce -> scale compute -> quantize pipeline. No shared memory.
+// UE8M0 scale extracted via bit math (bit-exact with exp2f(ceilf(log2f))).
+//
+// Loads two contiguous uint4s (16 B + 16 B = 32 B) per thread; on Blackwell
+// nvcc fuses these into a single 256-bit LDG.E.256.
+//
+// Constraints: GROUP_SIZE % (THREADS_PER_GROUP * VEC_SIZE) == 0; for
+// THREADS_PER_GROUP=8 and bf16/fp16 (VEC_SIZE=16), this means GROUP_SIZE=128.
+template <typename T, typename DST_DTYPE, int GROUP_SIZE, int kGroupsPerBlockX,
+          int kRowsPerBlock>
+__global__ void per_token_group_quant_8bit_packed_register_kernel(
     const T* __restrict__ input, void* __restrict__ output_q,
-    unsigned int* __restrict__ output_s_packed, const int group_size,
-    const int num_groups, const int groups_per_block, const int groups_per_row,
-    const int mn, const int tma_aligned_mn, const float eps,
+    unsigned int* __restrict__ output_s_packed, const int padded_groups_per_row,
+    const int groups_per_row, const int mn, const int output_q_mn_extent,
+    const int tma_aligned_mn, const int64_t num_scale_elems, const float eps,
     const float min_8bit, const float max_8bit) {
-  const int threads_per_group = 16;
-  const int64_t local_group_id = threadIdx.x / threads_per_group;
-  const int lane_id = threadIdx.x % threads_per_group;
-
-  const int64_t block_group_id = blockIdx.x * groups_per_block;
-  const int64_t global_group_id = block_group_id + local_group_id;
-  if (global_group_id >= num_groups) {
+  static_assert(GROUP_SIZE == 128, "fast path supports GROUP_SIZE==128");
+  constexpr int THREADS_PER_GROUP = 8;
+  constexpr int VEC_SIZE = 32 / sizeof(T);  // 16 for bf16/fp16
+  static_assert(GROUP_SIZE == THREADS_PER_GROUP * VEC_SIZE,
+                "GROUP_SIZE must equal THREADS_PER_GROUP * VEC_SIZE");
+  static_assert(32 % THREADS_PER_GROUP == 0,
+                "THREADS_PER_GROUP must divide warp size for the shuffle "
+                "mask to be valid");
+  static_assert(
+      kGroupsPerBlockX > 0 && (kGroupsPerBlockX & (kGroupsPerBlockX - 1)) == 0,
+      "kGroupsPerBlockX must be a positive power of 2");
+  static_assert(kRowsPerBlock > 0, "kRowsPerBlock must be positive");
+
+  const int local_group_id = threadIdx.x / THREADS_PER_GROUP;
+  const int lane_id = threadIdx.x % THREADS_PER_GROUP;
+
+  const int sf_k_local = local_group_id % kGroupsPerBlockX;
+  const int row_local = local_group_id / kGroupsPerBlockX;
+  const int sf_k_idx = blockIdx.x * kGroupsPerBlockX + sf_k_local;
+  const int mn_idx = blockIdx.y * kRowsPerBlock + row_local;
+
+  if (mn_idx >= tma_aligned_mn) {
     return;
   }
+  const bool is_valid_group = (mn_idx < mn) && (sf_k_idx < groups_per_row);
+
+  // Load 16 input elements (32 B) into registers as two adjacent uint4
+  // loads. nvcc keeps these as 2x LDG.E.128 on sm_100; the per-thread cost
+  // is dominated by HBM bandwidth at large MN, so a fused 256-bit load via
+  // inline PTX gave no measurable speedup.
+  // alignas(16) is required so the uint4* reinterpret_cast below is
+  // well-defined for T == bf16/fp16 (default alignof is 2).
+  alignas(16) T regs[VEC_SIZE];
+  float local_absmax = eps;
+  if (is_valid_group) {
+    const T* group_input =
+        input + static_cast<int64_t>(mn_idx) * groups_per_row * GROUP_SIZE +
+        sf_k_idx * GROUP_SIZE + lane_id * VEC_SIZE;
+    uint4* dst = reinterpret_cast<uint4*>(&regs[0]);
+    const uint4* src = reinterpret_cast<const uint4*>(group_input);
+    dst[0] = src[0];
+    dst[1] = src[1];
+#pragma unroll
+    for (int i = 0; i < VEC_SIZE; ++i) {
+      float v = fabsf(static_cast<float>(regs[i]));
+      local_absmax = fmaxf(local_absmax, v);
+    }
+  }
 
-  const int64_t block_group_offset = global_group_id * group_size;
-
-  const T* group_input = input + block_group_offset;
-  DST_DTYPE* group_output =
-      static_cast<DST_DTYPE*>(output_q) + block_group_offset;
-
-  // shared memory to cache each group's data to avoid double DRAM reads.
-  extern __shared__ __align__(16) char smem_raw[];
-  T* smem = reinterpret_cast<T*>(smem_raw);
-  T* smem_group = smem + local_group_id * group_size;
-  const float y_s =
-      ComputeGroupScale<T, true>(group_input, smem_group, group_size, lane_id,
-                                 threads_per_group, eps, max_8bit);
+  // 8-lane subgroup shuffle reduce (octet of the warp). The mask selects the
+  // 8 lanes within the warp that share a group.
+  unsigned mask = 0xffu << (threadIdx.x & 24u);
+  local_absmax = fmaxf(local_absmax, __shfl_xor_sync(mask, local_absmax, 4));
+  local_absmax = fmaxf(local_absmax, __shfl_xor_sync(mask, local_absmax, 2));
+  local_absmax = fmaxf(local_absmax, __shfl_xor_sync(mask, local_absmax, 1));
 
-  // pack 4 scales into a uint32
+  float y_s = local_absmax / max_8bit;
+  y_s = fmaxf(y_s, 1e-10f);
+  uint32_t bits = __float_as_uint(y_s);
+  uint32_t exp_bits = (bits >> 23) & 0xffu;
+  uint32_t mant_bits = bits & 0x7fffffu;
+  uint8_t exp_byte =
+      static_cast<uint8_t>(exp_bits + (mant_bits != 0u ? 1u : 0u));
+
+  // Lane 0 writes the packed scale byte.
   if (lane_id == 0) {
-    // map flat group id to 2D indices (mn_idx, sf_k_idx)
-    const int sf_k_idx = static_cast<int>(global_group_id % groups_per_row);
-    const int mn_idx = static_cast<int>(global_group_id / groups_per_row);
-
-    if (mn_idx < mn) {
-      // each uint32 in output_s_packed stores 4 packed scales
-      const int sf_k_pack_idx = sf_k_idx / 4;
-      const int pos = sf_k_idx % 4;
-
-      // reinterpret the UE8M0 scale y_s as IEEE bits, extract the 8-bit
-      // exponent, and place it into the correct byte of the 32-bit word.
-      const unsigned int bits = __float_as_uint(y_s);
-      const unsigned int exponent = (bits >> 23u) & 0xffu;
-      const unsigned int contrib = exponent << (pos * 8u);
-
-      const int out_idx = sf_k_pack_idx * tma_aligned_mn + mn_idx;
-      // atomically OR 8-bit exponent into the packed scales buffer
-      atomicOr(output_s_packed + out_idx, contrib);
+    const int sf_k_pack_idx = sf_k_idx / 4;
+    const int pos = sf_k_idx % 4;
+    const int out_idx = sf_k_pack_idx * tma_aligned_mn + mn_idx;
+    if (is_valid_group) {
+      reinterpret_cast<uint8_t*>(output_s_packed)[out_idx * 4 + pos] = exp_byte;
+    } else if (out_idx < num_scale_elems) {
+      reinterpret_cast<uint8_t*>(output_s_packed)[out_idx * 4 + pos] = 0;
     }
   }
 
-  __syncthreads();
+  // For padded mn rows that fall within output_q's allocated extent, write
+  // a uint4 of zeros to keep the buffer clean for downstream TMA loads.
+  // Skip writes for sf_k padding (those positions don't exist in output_q).
+  if (!is_valid_group) {
+    if (sf_k_idx < groups_per_row && mn_idx >= mn &&
+        mn_idx < output_q_mn_extent) {
+      DST_DTYPE* group_output =
+          static_cast<DST_DTYPE*>(output_q) +
+          static_cast<int64_t>(mn_idx) * groups_per_row * GROUP_SIZE +
+          sf_k_idx * GROUP_SIZE + lane_id * VEC_SIZE;
+      *reinterpret_cast<uint4*>(group_output) = make_uint4(0, 0, 0, 0);
+    }
+    return;
+  }
 
-  QuantizeGroup<T, DST_DTYPE>(smem_group, group_output, group_size, lane_id,
-                              threads_per_group, y_s, min_8bit, max_8bit);
+  // Reconstruct y_s as a power-of-2 float and use its reciprocal.
+  float y_s_q = __uint_as_float(static_cast<uint32_t>(exp_byte) << 23);
+  float inv_y = 1.0f / y_s_q;
+
+  // Quantize and pack into 16 fp8/int8 bytes (= uint4). VEC_SIZE==16 so we
+  // fill four 32-bit words, four bytes each.
+  uint32_t packed_lo = 0;
+  uint32_t packed_lo_hi = 0;
+  uint32_t packed_hi_lo = 0;
+  uint32_t packed_hi = 0;
+#pragma unroll
+  for (int i = 0; i < VEC_SIZE; ++i) {
+    float q =
+        fminf(fmaxf(static_cast<float>(regs[i]) * inv_y, min_8bit), max_8bit);
+    DST_DTYPE qb = DST_DTYPE(q);
+    uint8_t byte = *reinterpret_cast<uint8_t*>(&qb);
+    const int shift = (i & 3) * 8;
+    if (i < 4) {
+      packed_lo |= static_cast<uint32_t>(byte) << shift;
+    } else if (i < 8) {
+      packed_lo_hi |= static_cast<uint32_t>(byte) << shift;
+    } else if (i < 12) {
+      packed_hi_lo |= static_cast<uint32_t>(byte) << shift;
+    } else {
+      packed_hi |= static_cast<uint32_t>(byte) << shift;
+    }
+  }
+
+  uint4 packed_out =
+      make_uint4(packed_lo, packed_lo_hi, packed_hi_lo, packed_hi);
+  DST_DTYPE* group_output =
+      static_cast<DST_DTYPE*>(output_q) +
+      static_cast<int64_t>(mn_idx) * groups_per_row * GROUP_SIZE +
+      sf_k_idx * GROUP_SIZE + lane_id * VEC_SIZE;
+  *reinterpret_cast<uint4*>(group_output) = packed_out;
 }
 
+// Public entry point: register-resident packed quant kernel.
+// Constraints: group_size == 128 and bf16/fp16 input.
 void per_token_group_quant_8bit_packed(const torch::stable::Tensor& input,
                                        torch::stable::Tensor& output_q,
                                        torch::stable::Tensor& output_s_packed,
                                        int64_t group_size, double eps,
                                        double min_8bit, double max_8bit) {
+  STD_TORCH_CHECK(group_size == 128,
+                  "per_token_group_quant_8bit_packed only supports "
+                  "group_size==128, got ",
+                  group_size, ".");
+  const auto in_dtype = input.scalar_type();
+  STD_TORCH_CHECK(
+      in_dtype == torch::headeronly::ScalarType::Half ||
+          in_dtype == torch::headeronly::ScalarType::BFloat16,
+      "per_token_group_quant_8bit_packed only supports bf16/fp16 input.");
+
   STD_TORCH_CHECK(input.is_contiguous());
   STD_TORCH_CHECK(output_q.is_contiguous());
 
   const int64_t k = input.size(-1);
-  STD_TORCH_CHECK(k % group_size == 0, "Last dimension (", k,
-                  ") must be divisible by group_size (", group_size, ").");
+  STD_TORCH_CHECK(k % group_size == 0, "input last dim k=", k,
+                  " is not divisible by group_size=", group_size, ".");
 
   const int64_t mn = input.numel() / k;
   const int64_t groups_per_row = k / group_size;
-  const int64_t num_groups = mn * groups_per_row;
-
-  STD_TORCH_CHECK(output_s_packed.dim() == 2,
-                  "output_s_packed must be 2D, got dim=", output_s_packed.dim(),
-                  ".");
-
   const int64_t k_num_packed_sfk = (groups_per_row + 3) / 4;
   const int64_t tma_aligned_mn = ((mn + 3) / 4) * 4;
 
+  // output_q may be allocated with extra padded mn rows (e.g.,
+  // (tma_aligned_mn, k)) so the kernel can zero-fill them in-line and the
+  // caller can use torch.empty instead of torch.zeros. The grid only covers
+  // up to tma_aligned_mn, so we cap the extent there.
+  const int64_t output_q_mn_actual = output_q.numel() / k;
+  STD_TORCH_CHECK(output_q_mn_actual >= mn,
+                  "output_q must have at least mn rows; got ",
+                  output_q_mn_actual, " rows for mn=", mn, ".");
+  const int64_t output_q_mn_extent =
+      output_q_mn_actual < tma_aligned_mn ? output_q_mn_actual : tma_aligned_mn;
+
   STD_TORCH_CHECK(
       output_s_packed.scalar_type() == torch::headeronly::ScalarType::Int,
-      "output_s_packed must have dtype int32 for UE8M0-packed scales.");
-  // DeepGEMM expects SFA scales in MN-major form with shape
-  // [mn, ceil_div(K, 128 * 4)] and TMA-aligned stride on the last
-  // dimension.
+      "output_s_packed must be int32 for UE8M0-packed scales.");
   STD_TORCH_CHECK(output_s_packed.size(0) == mn &&
                       output_s_packed.size(1) == k_num_packed_sfk,
                   "output_s_packed shape must be [", mn, ", ", k_num_packed_sfk,
-                  "], but got [", output_s_packed.size(0), ", ",
+                  "]; got [", output_s_packed.size(0), ", ",
                   output_s_packed.size(1), "].");
+  STD_TORCH_CHECK(output_s_packed.stride(0) == 1 &&
+                      output_s_packed.stride(1) == tma_aligned_mn,
+                  "output_s_packed strides must be [1, ", tma_aligned_mn,
+                  "]; got [", output_s_packed.stride(0), ", ",
+                  output_s_packed.stride(1), "].");
 
   cudaStream_t stream = get_current_cuda_stream();
 
-  constexpr int THREADS_PER_GROUP = 16;
-
-  const int groups_per_block = GetGroupsPerBlock(num_groups);
+  constexpr int THREADS_PER_GROUP = 8;
+  const int64_t padded_groups_per_row = k_num_packed_sfk * 4;
+  const int64_t num_scale_elems = mn + (k_num_packed_sfk - 1) * tma_aligned_mn;
+
+  STD_TORCH_CHECK(padded_groups_per_row % 4 == 0,
+                  "padded_groups_per_row=", padded_groups_per_row,
+                  " is not a multiple of 4.");
+  const int kx = GetGroupsPerBlockX(padded_groups_per_row);
+  const int ry = 16 / kx;
+  const int64_t blocks_x = padded_groups_per_row / kx;
+  const int64_t blocks_y = (tma_aligned_mn + ry - 1) / ry;
+  const int num_threads = (kx * ry) * THREADS_PER_GROUP;
+  // CUDA caps grid.x and grid.y at 2^31 - 1; guard against pathological inputs.
+  STD_TORCH_CHECK(blocks_x <= static_cast<int64_t>(INT32_MAX) &&
+                      blocks_y <= static_cast<int64_t>(INT32_MAX),
+                  "per_token_group_quant_8bit_packed grid too large: (",
+                  blocks_x, ", ", blocks_y, ").");
 
   auto dst_type = output_q.scalar_type();
-  const int num_blocks = num_groups / groups_per_block;
-  const int num_threads = groups_per_block * THREADS_PER_GROUP;
 
-  // zero-initialize packed scales, since we use atomicOr to accumulate
-  // exponents from different groups.
-  torch::stable::zero_(output_s_packed);
-
-#define LAUNCH_PACKED_KERNEL(T, DST_DTYPE)                                \
-  do {                                                                    \
-    dim3 grid(num_blocks);                                                \
-    dim3 block(num_threads);                                              \
-    size_t smem_bytes =                                                   \
-        static_cast<size_t>(groups_per_block) * group_size * sizeof(T);   \
-    per_token_group_quant_8bit_packed_kernel<T, DST_DTYPE>                \
-        <<<grid, block, smem_bytes, stream>>>(                            \
-            static_cast<const T*>(input.data_ptr()), output_q.data_ptr(), \
-            reinterpret_cast<unsigned int*>(output_s_packed.data_ptr()),  \
-            static_cast<int>(group_size), static_cast<int>(num_groups),   \
-            groups_per_block, static_cast<int>(groups_per_row),           \
-            static_cast<int>(mn), static_cast<int>(tma_aligned_mn),       \
-            static_cast<float>(eps), static_cast<float>(min_8bit),        \
-            static_cast<float>(max_8bit));                                \
+#define LAUNCH_REG_KERNEL_INST(T, DST_DTYPE, KX, RY)                         \
+  do {                                                                       \
+    dim3 grid(static_cast<unsigned int>(blocks_x),                           \
+              static_cast<unsigned int>(blocks_y));                          \
+    dim3 block(num_threads);                                                 \
+    per_token_group_quant_8bit_packed_register_kernel<T, DST_DTYPE, 128, KX, \
+                                                      RY>                    \
+        <<<grid, block, 0, stream>>>(                                        \
+            static_cast<const T*>(input.data_ptr()), output_q.data_ptr(),    \
+            reinterpret_cast<unsigned int*>(output_s_packed.data_ptr()),     \
+            static_cast<int>(padded_groups_per_row),                         \
+            static_cast<int>(groups_per_row), static_cast<int>(mn),          \
+            static_cast<int>(output_q_mn_extent),                            \
+            static_cast<int>(tma_aligned_mn), num_scale_elems,               \
+            static_cast<float>(eps), static_cast<float>(min_8bit),           \
+            static_cast<float>(max_8bit));                                   \
   } while (0)
 
-  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
-      input.scalar_type(), "per_token_group_quant_8bit_packed", ([&] {
+#define LAUNCH_REG_KERNEL(T, DST_DTYPE)                    \
+  do {                                                     \
+    if (kx == 16) {                                        \
+      LAUNCH_REG_KERNEL_INST(T, DST_DTYPE, 16, 1);         \
+    } else if (kx == 8) {                                  \
+      LAUNCH_REG_KERNEL_INST(T, DST_DTYPE, 8, 2);          \
+    } else if (kx == 4) {                                  \
+      LAUNCH_REG_KERNEL_INST(T, DST_DTYPE, 4, 4);          \
+    } else {                                               \
+      STD_TORCH_CHECK(false, "Unsupported kx value ", kx); \
+    }                                                      \
+  } while (0)
+
+  VLLM_STABLE_DISPATCH_HALF_TYPES(
+      input.scalar_type(), "per_token_group_quant_8bit_packed_register", ([&] {
         if (dst_type == torch::headeronly::ScalarType::Float8_e4m3fn) {
-          LAUNCH_PACKED_KERNEL(scalar_t, __nv_fp8_e4m3);
+          LAUNCH_REG_KERNEL(scalar_t, __nv_fp8_e4m3);
         } else if (dst_type == torch::headeronly::ScalarType::Char) {
-          LAUNCH_PACKED_KERNEL(scalar_t, int8_t);
+          LAUNCH_REG_KERNEL(scalar_t, int8_t);
         } else {
           STD_TORCH_CHECK(
               false,
@@ -376,7 +515,8 @@ void per_token_group_quant_8bit_packed(const torch::stable::Tensor& input,
         }
       }));
 
-#undef LAUNCH_PACKED_KERNEL
+#undef LAUNCH_REG_KERNEL
+#undef LAUNCH_REG_KERNEL_INST
 }
 
 void per_token_group_quant_fp8(const torch::stable::Tensor& input,
diff --git a/csrc/quantization/w8a8/int8/scaled_quant.cu b/csrc/libtorch_stable/quantization/w8a8/int8/scaled_quant.cu
similarity index 79%
rename from csrc/quantization/w8a8/int8/scaled_quant.cu
rename to csrc/libtorch_stable/quantization/w8a8/int8/scaled_quant.cu
index ae1395a363c7..ede7913a3558 100644
--- a/csrc/quantization/w8a8/int8/scaled_quant.cu
+++ b/csrc/libtorch_stable/quantization/w8a8/int8/scaled_quant.cu
@@ -1,12 +1,11 @@
-#include <ATen/cuda/CUDAContext.h>
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
+#include <torch/csrc/stable/tensor.h>
 
 #include <cmath>
 
-#include "dispatch_utils.h"
-#include "libtorch_stable/quantization/vectorization_utils.cuh"
-#include "cub_helpers.h"
+#include "../../../dispatch_utils.h"
+#include "../../../torch_utils.h"
+#include "../../vectorization_utils.cuh"
+#include "../../../../cub_helpers.h"
 
 static inline __device__ int8_t float_to_int8_rn(float x) {
 #ifdef USE_ROCM
@@ -263,66 +262,73 @@ __global__ void dynamic_scaled_int8_azp_quant_kernel(
 
 }  // namespace vllm
 
-void static_scaled_int8_quant(torch::Tensor& out,          // [..., hidden_size]
-                              torch::Tensor const& input,  // [..., hidden_size]
-                              torch::Tensor const& scale,
-                              std::optional<torch::Tensor> const& azp) {
-  TORCH_CHECK(input.is_contiguous());
-  TORCH_CHECK(out.is_contiguous());
-  TORCH_CHECK(scale.numel() == 1);
-  TORCH_CHECK(!azp || azp->numel() == 1);
+void static_scaled_int8_quant(
+    torch::stable::Tensor& out,          // [..., hidden_size]
+    torch::stable::Tensor const& input,  // [..., hidden_size]
+    torch::stable::Tensor const& scale,
+    std::optional<torch::stable::Tensor> const& azp) {
+  STD_TORCH_CHECK(input.is_contiguous());
+  STD_TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(scale.numel() == 1);
+  STD_TORCH_CHECK(!azp || azp->numel() == 1);
 
   int const hidden_size = input.size(-1);
   int const num_tokens = input.numel() / hidden_size;
   dim3 const grid(num_tokens);
   dim3 const block(std::min(hidden_size, 256));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "static_scaled_int8_quant_kernel", [&] {
         if (!azp) {
           vllm::static_scaled_int8_quant_kernel<scalar_t, float>
-              <<<grid, block, 0, stream>>>(
-                  input.data_ptr<scalar_t>(), out.data_ptr<int8_t>(),
-                  scale.data_ptr<float>(), hidden_size);
+              <<<grid, block, 0, stream>>>(input.const_data_ptr<scalar_t>(),
+                                           out.mutable_data_ptr<int8_t>(),
+                                           scale.const_data_ptr<float>(),
+                                           hidden_size);
         } else {
           vllm::static_scaled_int8_azp_quant_kernel<scalar_t, float, int32_t>
               <<<grid, block, 0, stream>>>(
-                  input.data_ptr<scalar_t>(), out.data_ptr<int8_t>(),
-                  scale.data_ptr<float>(), azp->data_ptr<int32_t>(),
-                  hidden_size);
+                  input.const_data_ptr<scalar_t>(),
+                  out.mutable_data_ptr<int8_t>(), scale.const_data_ptr<float>(),
+                  azp->const_data_ptr<int32_t>(), hidden_size);
         }
       });
 }
 
 void dynamic_scaled_int8_quant(
-    torch::Tensor& out,          // [..., hidden_size]
-    torch::Tensor const& input,  // [..., hidden_size]
-    torch::Tensor& scales, std::optional<torch::Tensor> const& azp) {
-  TORCH_CHECK(input.is_contiguous());
-  TORCH_CHECK(out.is_contiguous());
-  TORCH_CHECK(scales.is_contiguous());
-  TORCH_CHECK(!azp || azp->is_contiguous());
+    torch::stable::Tensor& out,          // [..., hidden_size]
+    torch::stable::Tensor const& input,  // [..., hidden_size]
+    torch::stable::Tensor& scales,
+    std::optional<torch::stable::Tensor> const& azp) {
+  STD_TORCH_CHECK(input.is_contiguous());
+  STD_TORCH_CHECK(out.is_contiguous());
+  STD_TORCH_CHECK(scales.is_contiguous());
+  STD_TORCH_CHECK(!azp || azp->is_contiguous());
 
   int const hidden_size = input.size(-1);
   int const num_tokens = input.numel() / hidden_size;
   dim3 const grid(num_tokens);
   dim3 const block(std::min(hidden_size, 256));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(
+  const torch::stable::accelerator::DeviceGuard device_guard(
+      input.get_device_index());
+  const cudaStream_t stream = get_current_cuda_stream();
+  VLLM_STABLE_DISPATCH_FLOATING_TYPES(
       input.scalar_type(), "dynamic_scaled_int8_quant_kernel", [&] {
         if (!azp) {
           vllm::dynamic_scaled_int8_quant_kernel<scalar_t, float>
-              <<<grid, block, 0, stream>>>(
-                  input.data_ptr<scalar_t>(), out.data_ptr<int8_t>(),
-                  scales.data_ptr<float>(), hidden_size);
+              <<<grid, block, 0, stream>>>(input.const_data_ptr<scalar_t>(),
+                                           out.mutable_data_ptr<int8_t>(),
+                                           scales.mutable_data_ptr<float>(),
+                                           hidden_size);
         } else {
           vllm::dynamic_scaled_int8_azp_quant_kernel<scalar_t, float, int32_t>
-              <<<grid, block, 0, stream>>>(
-                  input.data_ptr<scalar_t>(), out.data_ptr<int8_t>(),
-                  scales.data_ptr<float>(), azp->data_ptr<int32_t>(),
-                  hidden_size);
+              <<<grid, block, 0, stream>>>(input.const_data_ptr<scalar_t>(),
+                                           out.mutable_data_ptr<int8_t>(),
+                                           scales.mutable_data_ptr<float>(),
+                                           azp->mutable_data_ptr<int32_t>(),
+                                           hidden_size);
         }
       });
 }
\ No newline at end of file
diff --git a/csrc/libtorch_stable/quantization/w8a8/per_token_group_quant_8bit.h b/csrc/libtorch_stable/quantization/w8a8/per_token_group_quant_8bit.h
index d67fd2b336ec..6630c0decee6 100644
--- a/csrc/libtorch_stable/quantization/w8a8/per_token_group_quant_8bit.h
+++ b/csrc/libtorch_stable/quantization/w8a8/per_token_group_quant_8bit.h
@@ -8,3 +8,13 @@ void per_token_group_quant_8bit(const torch::stable::Tensor& input,
                                 torch::stable::Tensor& output_s,
                                 int64_t group_size, double eps, double min_8bit,
                                 double max_8bit, bool scale_ue8m0 = false);
+
+// Public op: register-resident packed quant for the DeepGEMM Blackwell path.
+// Restricted to group_size == 128 and bf16/fp16 input; other configurations
+// raise STD_TORCH_CHECK. The legacy shared-memory fallback was removed because
+// no production caller (deep_gemm_moe / input_quant_fp8) uses other shapes.
+void per_token_group_quant_8bit_packed(const torch::stable::Tensor& input,
+                                       torch::stable::Tensor& output_q,
+                                       torch::stable::Tensor& output_s_packed,
+                                       int64_t group_size, double eps,
+                                       double min_8bit, double max_8bit);
diff --git a/csrc/libtorch_stable/torch_bindings.cpp b/csrc/libtorch_stable/torch_bindings.cpp
index d3b4c395b6f5..1601c3bd5bfa 100644
--- a/csrc/libtorch_stable/torch_bindings.cpp
+++ b/csrc/libtorch_stable/torch_bindings.cpp
@@ -31,7 +31,397 @@ STABLE_TORCH_LIBRARY_FRAGMENT(_C, ops) {
       "per_token_group_quant_int8(Tensor input, Tensor! output_q, Tensor! "
       "output_s, int group_size, float eps, float int8_min, float int8_max) -> "
       "()");
+
+  // CUTLASS w8a8 GEMM, supporting symmetric per-tensor or per-row/column
+  // quantization, as well as bias
+  ops.def(
+      "cutlass_scaled_mm(Tensor! out, Tensor a,"
+      "                  Tensor b, Tensor a_scales,"
+      "                  Tensor b_scales, Tensor? bias) -> ()");
+
+  // CUTLASS w8a8 GEMM, supporting asymmetric per-tensor or per-row/column
+  // quantization.
+  ops.def(
+      "cutlass_scaled_mm_azp(Tensor! out, Tensor a,"
+      "                  Tensor b, Tensor a_scales,"
+      "                  Tensor b_scales, Tensor azp_adj,"
+      "                  Tensor? azp, Tensor? bias) -> ()");
+
+  // Check if cutlass scaled_mm is supported for CUDA devices of the given
+  // capability
+  ops.def("cutlass_scaled_mm_supports_fp8(int cuda_device_capability) -> bool");
+
+  // Check if cutlass grouped gemm is supported for CUDA devices of the given
+  // capability
+  ops.def("cutlass_group_gemm_supported(int cuda_device_capability) -> bool");
+
+  // CUTLASS w8a8 grouped GEMM
+  ops.def(
+      "cutlass_moe_mm(Tensor! out_tensors, Tensor a_tensors, Tensor b_tensors, "
+      "               Tensor a_scales, Tensor b_scales, Tensor expert_offsets, "
+      "               Tensor problem_sizes, Tensor a_strides, "
+      "               Tensor b_strides, Tensor c_strides, bool per_act_token, "
+      "               bool per_out_ch) -> ()");
+
+  // A function that computes data required to run fused MoE with w8a8 grouped
+  // GEMM. It takes topk_ids as an input, and computes expert_offsets
+  // (token start indices of each expert). In addition to this, it computes
+  // problem sizes for each expert's multiplication used by the two mms called
+  // from fused MoE operation, and arrays with permutations required to shuffle
+  // and de-shuffle the input/output of the fused operation.
+  ops.def(
+      "get_cutlass_moe_mm_data(Tensor topk_ids, Tensor! expert_offsets, "
+      "                        Tensor! problem_sizes1, Tensor! problem_sizes2, "
+      "                        Tensor! input_permutation, "
+      "                        Tensor! output_permutation, int num_experts, "
+      "                        int n, int k, Tensor? blockscale_offsets, "
+      "                        bool is_gated) -> ()");
+
+  // compute per-expert problem sizes from expert_first_token_offset
+  // produced by vLLM's moe_permute kernel
+  ops.def(
+      "get_cutlass_moe_mm_problem_sizes_from_expert_offsets("
+      "    Tensor expert_first_token_offset, "
+      "    Tensor! problem_sizes1, "
+      "    Tensor! problem_sizes2, "
+      "    int n, int k, bool swap_ab) -> ()");
+
+  // A function that computes data required to run fused MoE with w8a8 grouped
+  // GEMM in batched expert format. It takes expert_num_tokens
+  // as an input, and computes expert_offsets (token start indices of each
+  // expert). In addition to this, it computes problem sizes for each expert's
+  // multiplication used by the two mms called from fused MoE operation.
+  ops.def(
+      "get_cutlass_batched_moe_mm_data(Tensor! expert_offsets, "
+      "                             Tensor! problem_sizes1, "
+      "                             Tensor! problem_sizes2, "
+      "                             Tensor expert_num_tokens, "
+      "                             int num_local_experts, int padded_m, "
+      "                             int n, int k) -> ()");
+
+  // Check if cutlass scaled_mm supports block quantization (used by DeepSeekV3)
+  ops.def(
+      "cutlass_scaled_mm_supports_block_fp8(int cuda_device_capability) -> "
+      "bool");
+
+  // CUTLASS nvfp4 block scaled GEMM
+  ops.def(
+      "cutlass_scaled_fp4_mm(Tensor! out, Tensor a, Tensor b,"
+      "                      Tensor block_scale_a, Tensor block_scale_b,"
+      "                      Tensor alpha) -> ()");
+
+  // cutlass nvfp4 block scaled group GEMM
+  ops.def(
+      "cutlass_fp4_group_mm(Tensor! out, Tensor a, Tensor b,"
+      " Tensor a_blockscale, Tensor b_blockscales, Tensor alphas,"
+      " Tensor problem_sizes, Tensor expert_offsets, Tensor sf_offsets) -> ()");
+
+  // cutlass mxfp4 block scaled group GEMM (MXFP4 x MXFP4 MoE)
+  ops.def(
+      "cutlass_mxfp4_group_mm(Tensor! out, Tensor a, Tensor b,"
+      " Tensor a_blockscale, Tensor b_blockscales,"
+      " Tensor problem_sizes, Tensor expert_offsets, Tensor sf_offsets) -> ()");
+
+  // Compute NVFP4 block quantized tensor.
+  ops.def(
+      "scaled_fp4_quant(Tensor input,"
+      "                 Tensor input_scale, bool "
+      "is_sf_swizzled_layout) -> (Tensor, Tensor)");
+
+  // Out variant
+  // TODO: Add out_variant tag once PyTorch supports it (added in 2.11)
+  // This registration is now migrated to stable ABI
+  // at::Tag::out_variant is not available in the stable ABI (enum_tag.h is not
+  // yet in torch/headeronly), the tag should be applied from Python
+  // via torch.library.Library.define(..., tags=(torch.Tag.out_variant,))
+  // with the .impl remaining in C++.
+  // See pytorch/pytorch#176117.
+  ops.def(
+      "scaled_fp4_quant.out(Tensor input,"
+      "                     Tensor input_scale, bool "
+      "is_sf_swizzled_layout, *, Tensor(a!) output, Tensor(b!) output_scale) "
+      "-> ()");
+
+  // Compute NVFP4 experts quantization.
+  ops.def(
+      "scaled_fp4_experts_quant(Tensor! output, Tensor! output_scale,"
+      "Tensor input, Tensor input_global_scale, Tensor input_offset_by_experts,"
+      "Tensor output_scale_offset_by_experts) -> ()");
+
+  // Fused SiLU+Mul+NVFP4 experts quantization.
+  ops.def(
+      "silu_and_mul_scaled_fp4_experts_quant(Tensor! output, Tensor! "
+      "output_scale,"
+      "Tensor input, Tensor input_global_scale, Tensor input_offset_by_experts,"
+      "Tensor output_scale_offset_by_experts) -> ()");
+
+  // Compute MXFP4 experts quantization (32-element blocks, E8M0 SFs).
+  ops.def(
+      "mxfp4_experts_quant(Tensor! output, Tensor! output_scale,"
+      "Tensor input, Tensor input_offset_by_experts,"
+      "Tensor output_scale_offset_by_experts, int n_experts) -> ()");
+
+  // Fused SiLU+Mul+MXFP4 experts quantization.
+  ops.def(
+      "silu_and_mul_mxfp4_experts_quant(Tensor! output, Tensor! "
+      "output_scale,"
+      "Tensor input, Tensor input_offset_by_experts,"
+      "Tensor output_scale_offset_by_experts, int n_experts) -> ()");
+
+  // Fused SiLU+Mul+NVFP4 quantization.
+  ops.def(
+      "silu_and_mul_nvfp4_quant(Tensor! result, Tensor! result_block_scale, "
+      "Tensor input, Tensor input_global_scale) -> ()");
+
+  // Check if cutlass_scaled_mm_fp4 is supported for CUDA devices
+  // of the given capability
+  ops.def("cutlass_scaled_mm_supports_fp4(int cuda_device_capability) -> bool");
+
+  // CUTLASS w4a8 GEMM
+  ops.def(
+      "cutlass_w4a8_mm("
+      "   Tensor A,"
+      "   Tensor B,"
+      "   Tensor group_scales,"
+      "   int    group_size,"
+      "   Tensor channel_scales,"
+      "   Tensor token_scales,"
+      "   ScalarType? out_type,"
+      "   str?   maybe_schedule"
+      ") -> Tensor");
+
+  // pack scales
+  ops.def("cutlass_pack_scale_fp8(Tensor scales) -> Tensor");
+
+  // encode and reorder weight matrix
+  ops.def("cutlass_encode_and_reorder_int4b(Tensor B) -> Tensor");
+
+  // CUTLASS w4a8 grouped GEMM
+  ops.def(
+      "cutlass_w4a8_moe_mm("
+      "   Tensor! out_tensors,"
+      "   Tensor a_tensors,"
+      "   Tensor b_tensors,"
+      "   Tensor a_scales,"
+      "   Tensor b_scales,"
+      "   Tensor b_group_scales,"
+      "   int b_group_size,"
+      "   Tensor expert_offsets,"
+      "   Tensor problem_sizes,"
+      "   Tensor a_strides,"
+      "   Tensor b_strides,"
+      "   Tensor c_strides,"
+      "   Tensor group_scale_strides,"
+      "   str? maybe_schedule"
+      ") -> ()");
+
+  ops.def(
+      "cutlass_encode_and_reorder_int4b_grouped(Tensor b_tensors) -> (Tensor, "
+      "Tensor)");
+
+  // SM100 CUTLASS MLA decode
+  // conditionally compiled so impl registrations are in source file
+  ops.def(
+      "sm100_cutlass_mla_decode(Tensor! out, Tensor! lse, Tensor q_nope,"
+      "                         Tensor q_pe, Tensor kv_c_and_k_pe_cache,"
+      "                         Tensor seq_lens, Tensor page_table,"
+      "                         Tensor workspace, float scale,"
+      "                         int num_kv_splits) -> ()");
+
+  ops.def(
+      "sm100_cutlass_mla_get_workspace_size(int max_seq_len, int num_batches,"
+      "                                     int sm_count, int num_kv_splits) "
+      "-> int");
+  // Quantized GEMM for AWQ.
+  ops.def(
+      "awq_gemm(Tensor _in_feats, Tensor _kernel, Tensor _scaling_factors, "
+      "Tensor _zeros, SymInt split_k_iters) -> Tensor");
+
+  // Dequantization for AWQ.
+  ops.def(
+      "awq_dequantize(Tensor _kernel, Tensor _scaling_factors, "
+      "Tensor _zeros, SymInt split_k_iters, int thx, int thy) -> Tensor");
+
+  // DeepSeek V3 fused A GEMM (SM 9.0+, bf16 only, 1-16 tokens).
+  // conditionally compiled so impl registration is in source file
+  ops.def(
+      "dsv3_fused_a_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()");
+
+  // reorder weight for AllSpark Ampere W8A16 Fused Gemm kernel
+  ops.def(
+      "rearrange_kn_weight_as_n32k16_order(Tensor b_qweight, Tensor b_scales, "
+      "Tensor? b_zeros, "
+      "bool has_zp, Tensor! b_qweight_reorder, Tensor! b_scales_reorder, "
+      "Tensor!? b_zeros_reorder, "
+      "int K, int N, int N_32align) -> ()");
+
+  // AllSpark quantization ops
+  ops.def(
+      "allspark_w8a16_gemm(Tensor a, Tensor b_qweight, Tensor b_scales, "
+      "Tensor? b_qzeros, "
+      "SymInt n, SymInt group_size, SymInt sm_count, SymInt sm_version, SymInt "
+      "CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) -> Tensor");
 #endif
+
+  // Hadamard transforms
+  // conditionally compiled so impl registration is in source file
+  ops.def("hadacore_transform(Tensor! x, bool inplace) -> Tensor");
+
+  // Apply Root Mean Square (RMS) Normalization to the input tensor.
+  ops.def(
+      "rms_norm(Tensor! result, Tensor input, Tensor weight, float epsilon) -> "
+      "()");
+
+  // In-place fused Add and RMS Normalization.
+  ops.def(
+      "fused_add_rms_norm(Tensor! input, Tensor! residual, Tensor weight, "
+      "float epsilon) -> ()");
+
+  // Layernorm-quant
+  // Apply Root Mean Square (RMS) Normalization to the input tensor.
+  ops.def(
+      "rms_norm_static_fp8_quant(Tensor! result, Tensor input, Tensor weight, "
+      "Tensor scale, float epsilon) -> "
+      "()");
+
+  // In-place fused Add and RMS Normalization.
+  ops.def(
+      "fused_add_rms_norm_static_fp8_quant(Tensor! result, Tensor input, "
+      "Tensor! residual, Tensor weight, "
+      "Tensor scale, float epsilon) -> ()");
+
+  // Fused Layernorm + Quant kernels
+  ops.def(
+      "rms_norm_dynamic_per_token_quant(Tensor! result, Tensor input, "
+      "Tensor weight, Tensor! scale, float epsilon, "
+      "Tensor? scale_ub, Tensor!? residual) -> ()");
+
+  // Fused Layernorm + Block quant kernels
+  ops.def(
+      "rms_norm_per_block_quant(Tensor! result, Tensor input, "
+      "Tensor weight, Tensor! scale, float epsilon, "
+      "Tensor? scale_ub, Tensor!? residual, int group_size, "
+      "bool is_scale_transposed) -> ()");
+
+  // Rotary embedding
+  // Apply GPT-NeoX or GPT-J style rotary embedding to query and key.
+  ops.def(
+      "rotary_embedding(Tensor positions, Tensor! query,"
+      "                 Tensor!? key, int head_size,"
+      "                 Tensor cos_sin_cache, bool is_neox, int "
+      "rope_dim_offset=0, bool inverse=False) -> ()");
+
+  // Function for fused QK Norm and RoPE
+  ops.def(
+      "fused_qk_norm_rope(Tensor! qkv, int num_heads_q, "
+      "int num_heads_k, int num_heads_v, int head_dim, float eps, "
+      "Tensor q_weight, Tensor k_weight, Tensor cos_sin_cache, "
+      "bool is_neox, Tensor position_ids, "
+      "int forced_token_heads_per_warp=-1) -> ()");
+
+  // Activation ops
+  // Activation function used in SwiGLU.
+  ops.def("silu_and_mul(Tensor! result, Tensor input) -> ()");
+
+  ops.def("mul_and_silu(Tensor! out, Tensor input) -> ()");
+
+  // SwiGLU activation with input clamping.
+  ops.def(
+      "silu_and_mul_with_clamp(Tensor! result, Tensor input, float limit) "
+      "-> ()");
+
+  // Activation function used in GeGLU with `none` approximation.
+  ops.def("gelu_and_mul(Tensor! out, Tensor input) -> ()");
+
+  // Activation function used in GeGLU with `tanh` approximation.
+  ops.def("gelu_tanh_and_mul(Tensor! out, Tensor input) -> ()");
+
+  // FATReLU implementation.
+  ops.def("fatrelu_and_mul(Tensor! out, Tensor input, float threshold) -> ()");
+
+  ops.def(
+      "swigluoai_and_mul(Tensor! out, Tensor input, float alpha=1.702, float "
+      "limit=7.0) "
+      "-> ()");
+
+  // GELU implementation used in GPT-2.
+  ops.def("gelu_new(Tensor! out, Tensor input) -> ()");
+
+  // Approximate GELU implementation.
+  ops.def("gelu_fast(Tensor! out, Tensor input) -> ()");
+
+  // Quick GELU implementation.
+  ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
+
+  // Compute int8 quantized tensor for given scaling factor.
+  ops.def(
+      "static_scaled_int8_quant(Tensor! result, Tensor input, Tensor scale,"
+      "Tensor? azp) -> ()");
+
+  // Compute int8 quantized tensor and scaling factor
+  ops.def(
+      "dynamic_scaled_int8_quant(Tensor! result, Tensor input, Tensor! scale, "
+      "Tensor!? azp) -> ()");
+
+  // Compute FP8 quantized tensor for given scaling factor.
+  // Supports per-tensor, per-channel, per-token, and arbitrary 2D group
+  // scaling. Optional group_m/group_n specify the group shape explicitly;
+  // required for 1D scales to disambiguate per-channel vs per-token.
+  ops.def(
+      "static_scaled_fp8_quant(Tensor! result, Tensor input, Tensor scale, "
+      "int[]? group_shape=None) -> ()");
+
+  // Compute dynamic-per-tensor FP8 quantized tensor and scaling factor.
+  ops.def(
+      "dynamic_scaled_fp8_quant(Tensor! result, Tensor input, Tensor! scale) "
+      "-> "
+      "()");
+
+  // Compute dynamic-per-token FP8 quantized tensor and scaling factor.
+  ops.def(
+      "dynamic_per_token_scaled_fp8_quant(Tensor! result, Tensor input, "
+      "Tensor! scale, Tensor? scale_ub) -> "
+      "()");
+
+  // Quantized GEMM for GPTQ.
+  // Note: even though the C++ inferred schema is correct for this op, it seems
+  // to prevent the meta function registry.
+  ops.def(
+      "gptq_gemm(Tensor a, Tensor b_q_weight, Tensor b_gptq_qzeros, "
+      "Tensor b_gptq_scales, Tensor b_g_idx, bool use_exllama, bool "
+      "use_v2_format, int bit) "
+      "-> Tensor");
+
+  // Post processing for GPTQ.
+  ops.def("gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()");
+
+  // Dequantization for GGML.
+  ops.def(
+      "ggml_dequantize(Tensor W, int type, SymInt m, SymInt n, ScalarType? "
+      "dtype) -> Tensor");
+
+  // mmvq kernel for GGML.
+  ops.def(
+      "ggml_mul_mat_vec_a8(Tensor W, Tensor X, int type, SymInt row) "
+      "-> Tensor");
+
+  // mmq kernel for GGML.
+  ops.def(
+      "ggml_mul_mat_a8(Tensor W, Tensor X, int type, SymInt row) -> Tensor");
+
+  // moe kernel for GGML.
+  ops.def(
+      "ggml_moe_a8(Tensor X, Tensor W, "
+      "Tensor sorted_token_ids, Tensor expert_ids, Tensor "
+      "num_tokens_post_padded, "
+      "int type, SymInt row, SymInt top_k, SymInt tokens) -> Tensor");
+
+  ops.def(
+      "ggml_moe_a8_vec(Tensor X, Tensor W, "
+      "Tensor topk_ids, int top_k, "
+      "int type, SymInt row, SymInt tokens) -> Tensor");
+
+  ops.def("ggml_moe_get_block_size(int type) -> int");
 }
 
 STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, ops) {
@@ -46,7 +436,109 @@ STABLE_TORCH_LIBRARY_IMPL(_C, CUDA, ops) {
            TORCH_BOX(&per_token_group_quant_8bit_packed));
   ops.impl("per_token_group_quant_int8",
            TORCH_BOX(&per_token_group_quant_int8));
+
+  // CUTLASS scaled_mm ops
+  ops.impl("cutlass_scaled_mm", TORCH_BOX(&cutlass_scaled_mm));
+  ops.impl("cutlass_scaled_mm_azp", TORCH_BOX(&cutlass_scaled_mm_azp));
+  ops.impl("cutlass_moe_mm", TORCH_BOX(&cutlass_moe_mm));
+  ops.impl("get_cutlass_moe_mm_data", TORCH_BOX(&get_cutlass_moe_mm_data));
+  ops.impl("get_cutlass_moe_mm_problem_sizes_from_expert_offsets",
+           TORCH_BOX(&get_cutlass_moe_mm_problem_sizes_from_expert_offsets));
+  ops.impl("get_cutlass_batched_moe_mm_data",
+           TORCH_BOX(&get_cutlass_batched_moe_mm_data));
+
+  // FP4/NVFP4 ops
+  ops.impl("cutlass_scaled_fp4_mm", TORCH_BOX(&cutlass_scaled_fp4_mm));
+  ops.impl("scaled_fp4_quant", TORCH_BOX(&scaled_fp4_quant_func));
+  ops.impl("scaled_fp4_quant.out", TORCH_BOX(&scaled_fp4_quant_out));
+  ops.impl("scaled_fp4_experts_quant", TORCH_BOX(&scaled_fp4_experts_quant));
+  ops.impl("silu_and_mul_scaled_fp4_experts_quant",
+           TORCH_BOX(&silu_and_mul_scaled_fp4_experts_quant));
+  ops.impl("silu_and_mul_nvfp4_quant", TORCH_BOX(&silu_and_mul_nvfp4_quant));
+  // mxfp4_experts_quant: registered in mxfp4_experts_quant.cu (SM100 only).
+  // W4A8 ops: registered in w4a8_mm_entry.cu / w4a8_grouped_mm_entry.cu.
+
+  // AWQ ops
+  ops.impl("awq_gemm", TORCH_BOX(&awq_gemm));
+  ops.impl("awq_dequantize", TORCH_BOX(&awq_dequantize));
+
+  // DSV3 fused A GEMM: conditionally compiled so impl registration is in
+  // source file (dsv3_fused_a_gemm.cu)
+
+  // AllSpark ops: conditionally compiled so impl registrations are in source
+  // files (allspark_repack.cu and allspark_qgemm_w8a16.cu)
+#endif
+
+  // Layernorm kernels (shared CUDA/ROCm)
+  ops.impl("rms_norm", TORCH_BOX(&rms_norm));
+  ops.impl("fused_add_rms_norm", TORCH_BOX(&fused_add_rms_norm));
+
+  // Layernorm-quant kernels (shared CUDA/ROCm)
+  ops.impl("rms_norm_static_fp8_quant", TORCH_BOX(&rms_norm_static_fp8_quant));
+  ops.impl("fused_add_rms_norm_static_fp8_quant",
+           TORCH_BOX(&fused_add_rms_norm_static_fp8_quant));
+
+  // Fused layernorm + dynamic per-token quant kernels (shared CUDA/ROCm)
+  ops.impl("rms_norm_dynamic_per_token_quant",
+           TORCH_BOX(&rms_norm_dynamic_per_token_quant));
+  ops.impl("rms_norm_per_block_quant", TORCH_BOX(&rms_norm_per_block_quant));
+
+  // Positional encoding kernels (shared CUDA/ROCm)
+  ops.impl("rotary_embedding", TORCH_BOX(&rotary_embedding));
+  ops.impl("fused_qk_norm_rope", TORCH_BOX(&fused_qk_norm_rope));
+
+  // Activation kernels (shared CUDA/ROCm)
+  ops.impl("silu_and_mul", TORCH_BOX(&silu_and_mul));
+  ops.impl("mul_and_silu", TORCH_BOX(&mul_and_silu));
+  ops.impl("gelu_and_mul", TORCH_BOX(&gelu_and_mul));
+  ops.impl("gelu_tanh_and_mul", TORCH_BOX(&gelu_tanh_and_mul));
+  ops.impl("fatrelu_and_mul", TORCH_BOX(&fatrelu_and_mul));
+  ops.impl("swigluoai_and_mul", TORCH_BOX(&swigluoai_and_mul));
+  ops.impl("gelu_new", TORCH_BOX(&gelu_new));
+  ops.impl("gelu_fast", TORCH_BOX(&gelu_fast));
+  ops.impl("gelu_quick", TORCH_BOX(&gelu_quick));
+  ops.impl("silu_and_mul_with_clamp", TORCH_BOX(&silu_and_mul_clamp));
+
+  // INT8 quantization kernels
+  ops.impl("static_scaled_int8_quant", TORCH_BOX(&static_scaled_int8_quant));
+  ops.impl("dynamic_scaled_int8_quant", TORCH_BOX(&dynamic_scaled_int8_quant));
+
+  // FP8 quantization kernels
+  ops.impl("static_scaled_fp8_quant", TORCH_BOX(&static_scaled_fp8_quant));
+  ops.impl("dynamic_scaled_fp8_quant", TORCH_BOX(&dynamic_scaled_fp8_quant));
+  ops.impl("dynamic_per_token_scaled_fp8_quant",
+           TORCH_BOX(&dynamic_per_token_scaled_fp8_quant));
+
+  // GPTQ kernels
+  ops.impl("gptq_gemm", TORCH_BOX(&gptq_gemm));
+  ops.impl("gptq_shuffle", TORCH_BOX(&gptq_shuffle));
+
+  // GGML kernels
+  ops.impl("ggml_dequantize", TORCH_BOX(&ggml_dequantize));
+  ops.impl("ggml_mul_mat_vec_a8", TORCH_BOX(&ggml_mul_mat_vec_a8));
+  ops.impl("ggml_mul_mat_a8", TORCH_BOX(&ggml_mul_mat_a8));
+  ops.impl("ggml_moe_a8", TORCH_BOX(&ggml_moe_a8));
+  ops.impl("ggml_moe_a8_vec", TORCH_BOX(&ggml_moe_a8_vec));
+}
+
+// These capability-check functions take only primitive args (no tensors), so
+// there is no device to dispatch on. CompositeExplicitAutograd makes them
+// available for all backends. This is the stable ABI equivalent of calling
+// ops.impl("op_name", &func) without a dispatch key in the non-stable API.
+STABLE_TORCH_LIBRARY_IMPL(_C, CompositeExplicitAutograd, ops) {
+#ifndef USE_ROCM
+  ops.impl("cutlass_scaled_mm_supports_fp8",
+           TORCH_BOX(&cutlass_scaled_mm_supports_fp8));
+  ops.impl("cutlass_group_gemm_supported",
+           TORCH_BOX(&cutlass_group_gemm_supported));
+  ops.impl("cutlass_scaled_mm_supports_block_fp8",
+           TORCH_BOX(&cutlass_scaled_mm_supports_block_fp8));
+  ops.impl("cutlass_scaled_mm_supports_fp4",
+           TORCH_BOX(&cutlass_scaled_mm_supports_fp4));
 #endif
+
+  // GGML block size lookup (no tensor args)
+  ops.impl("ggml_moe_get_block_size", TORCH_BOX(&ggml_moe_get_block_size));
 }
 
 REGISTER_EXTENSION(_C_stable_libtorch)
diff --git a/csrc/libtorch_stable/torch_utils.h b/csrc/libtorch_stable/torch_utils.h
index 1bc744fee5f2..1adbb4d49866 100644
--- a/csrc/libtorch_stable/torch_utils.h
+++ b/csrc/libtorch_stable/torch_utils.h
@@ -1,9 +1,79 @@
 #pragma once
 
 #include <torch/csrc/inductor/aoti_torch/c/shim.h>
+#include <torch/csrc/stable/accelerator.h>
+#include <torch/csrc/stable/ops.h>
+#include <torch/csrc/stable/tensor.h>
 #include <torch/headeronly/util/shim_utils.h>
 
-#include <cuda_runtime.h>
+#ifndef USE_ROCM
+  #include <cuda_runtime.h>
+#else
+  #include <hip/hip_runtime.h>
+#endif
+#include <cublas_v2.h>
+
+#include <deque>
+#include <mutex>
+#include <string>
+#include <vector>
+
+// Stable ABI equivalent of TORCH_CHECK_NOT_IMPLEMENTED.
+#define STD_TORCH_CHECK_NOT_IMPLEMENTED(cond, ...) \
+  STD_TORCH_CHECK(cond, "NotImplementedError: ", __VA_ARGS__)
+
+// Device properties cache for stable ABI compatibility.
+// Uses raw CUDA/HIP APIs instead of ATen functions.
+// Using inline ensures a single instance across all translation units.
+inline std::deque<std::once_flag> device_flags;
+inline std::vector<cudaDeviceProp> device_properties;
+inline std::once_flag vectors_init_flag;
+
+inline void do_init_device_vectors() {
+  int device_count;
+  cudaError_t err = cudaGetDeviceCount(&device_count);
+  if (err != cudaSuccess) {
+    STD_TORCH_CHECK(false, "cudaGetDeviceCount failed: " +
+                               std::string(cudaGetErrorString(err)));
+  }
+  device_flags.resize(device_count);
+  device_properties.resize(device_count);
+}
+
+inline void initDeviceVectors() {
+  std::call_once(vectors_init_flag, do_init_device_vectors);
+}
+
+inline void initDeviceProperty(int device_index) {
+  cudaDeviceProp device_prop{};
+  cudaError_t err = cudaGetDeviceProperties(&device_prop, device_index);
+  if (err != cudaSuccess) {
+    STD_TORCH_CHECK(false, "cudaGetDeviceProperties failed: " +
+                               std::string(cudaGetErrorString(err)));
+  }
+  device_properties[device_index] = device_prop;
+}
+
+// Get device properties using raw CUDA/HIP APIs (stable ABI compatible).
+// Caches results per device so cudaGetDeviceProperties is called at most once
+// per device.
+inline cudaDeviceProp* get_device_prop() {
+  initDeviceVectors();
+  int device_index;
+  cudaError_t err = cudaGetDevice(&device_index);
+  if (err != cudaSuccess) {
+    STD_TORCH_CHECK(
+        false, "cudaGetDevice failed: " + std::string(cudaGetErrorString(err)));
+  }
+  STD_TORCH_CHECK(device_index >= 0 && static_cast<size_t>(device_index) <
+                                           device_properties.size(),
+                  "CUDA device index " + std::to_string(device_index) +
+                      " out of range [0, " +
+                      std::to_string(device_properties.size()) + ")");
+
+  std::call_once(device_flags[device_index], initDeviceProperty, device_index);
+  return &device_properties[device_index];
+}
 
 // Utility to get the current CUDA stream for a given device using stable APIs.
 // Returns a cudaStream_t for use in kernel launches.
@@ -13,3 +83,10 @@ inline cudaStream_t get_current_cuda_stream(int32_t device_index = -1) {
       aoti_torch_get_current_cuda_stream(device_index, &stream_ptr));
   return reinterpret_cast<cudaStream_t>(stream_ptr);
 }
+
+// Utility to get the current cuBLAS handle using stable APIs.
+inline cublasHandle_t get_current_cuda_blas_handle() {
+  void* blas_handle_ptr = nullptr;
+  TORCH_ERROR_CODE_CHECK(torch_get_current_cuda_blas_handle(&blas_handle_ptr));
+  return reinterpret_cast<cublasHandle_t>(blas_handle_ptr);
+}
diff --git a/csrc/mamba/mamba_ssm/selective_scan.h b/csrc/mamba/mamba_ssm/selective_scan.h
index 8f33c7cfa163..ff1d9528e0f6 100644
--- a/csrc/mamba/mamba_ssm/selective_scan.h
+++ b/csrc/mamba/mamba_ssm/selective_scan.h
@@ -21,7 +21,7 @@ struct SSMParamsBase {
     int dim_ngroups_ratio;
     bool is_variable_B;
     bool is_variable_C;
-    int64_t pad_slot_id;
+    int64_t null_block_id;
 
     bool delta_softplus;
     bool cache_enabled;
diff --git a/csrc/mamba/mamba_ssm/selective_scan_fwd.cu b/csrc/mamba/mamba_ssm/selective_scan_fwd.cu
index d852a0ed4928..ba2f0cc61942 100644
--- a/csrc/mamba/mamba_ssm/selective_scan_fwd.cu
+++ b/csrc/mamba/mamba_ssm/selective_scan_fwd.cu
@@ -118,9 +118,17 @@ void selective_scan_fwd_kernel(SSMParamsBase params) {
 
     const int* cache_indices = params.cache_indices_ptr == nullptr ? nullptr
         : reinterpret_cast<int *>(params.cache_indices_ptr);
-    const int cache_index = cache_indices == nullptr ? batch_id : cache_indices[batch_id]; 
-    // cache_index == params.pad_slot_id is defined as padding, so we exit early
-    if (cache_index == params.pad_slot_id){
+    int cache_index;
+    if (cache_indices == nullptr) {
+        cache_index = batch_id;
+    } else if (params.cache_enabled) {
+        const int* initial_state_idx = reinterpret_cast<const int*>(params.initial_state_idx_ptr);
+        cache_index = cache_indices[batch_id * params.cache_indices_stride + initial_state_idx[batch_id]];
+    } else {
+        cache_index = cache_indices[batch_id];
+    }
+    // Skip batch entries whose cache index maps to the null block (padding).
+    if (cache_indices != nullptr && cache_index == params.null_block_id){
         return;
     }
     input_t *u = reinterpret_cast<input_t *>(params.u_ptr) + sequence_start_index * params.u_batch_stride
@@ -527,7 +535,7 @@ void set_ssm_params_fwd(SSMParamsBase &params,
                         const std::optional<at::Tensor>& cache_indices,
                         const std::optional<at::Tensor>& has_initial_state,
                         bool varlen,
-                        int64_t pad_slot_id,
+                        int64_t null_block_id,
                         int64_t block_size,
                         const std::optional<torch::Tensor> &block_idx_first_scheduled_token,
                         const std::optional<torch::Tensor> &block_idx_last_scheduled_token,
@@ -544,7 +552,7 @@ void set_ssm_params_fwd(SSMParamsBase &params,
     params.dstate = dstate;
     params.n_groups = n_groups;
     params.dim_ngroups_ratio = dim / n_groups;
-    params.pad_slot_id = pad_slot_id;
+    params.null_block_id = null_block_id;
 
     params.delta_softplus = delta_softplus;
 
@@ -658,7 +666,7 @@ void selective_scan_fwd(const torch::Tensor &u, const torch::Tensor &delta,
                   const torch::Tensor &ssm_states,
                   // used to identify padding entries if cache_indices provided
                   // in case of padding, the kernel will return early
-                  int64_t pad_slot_id,
+                  int64_t null_block_id,
                   int64_t block_size,
                   const std::optional<torch::Tensor> &block_idx_first_scheduled_token,
                   const std::optional<torch::Tensor> &block_idx_last_scheduled_token,
@@ -805,7 +813,7 @@ void selective_scan_fwd(const torch::Tensor &u, const torch::Tensor &delta,
                        cache_indices,
                        has_initial_state,
                        varlen,
-                       pad_slot_id,
+                       null_block_id,
                        block_size,
                        block_idx_first_scheduled_token,
                        block_idx_last_scheduled_token,
diff --git a/csrc/minimax_reduce_rms_kernel.cu b/csrc/minimax_reduce_rms_kernel.cu
new file mode 100644
index 000000000000..6245b02d6e98
--- /dev/null
+++ b/csrc/minimax_reduce_rms_kernel.cu
@@ -0,0 +1,879 @@
+
+/*
+ * Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cooperative_groups.h>
+#include <cuda_runtime.h>
+
+#include <torch/cuda.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include "cuda_compat.h"
+#include "cuda_utils.h"
+#include "core/registration.h"
+#include "minimax_reduce_rms_kernel.h"
+
+#include <algorithm>
+
+#define FINAL_MASK 0xffffffff
+#define MINIMAX_REDUCE_RMS_WARP_SIZE 32
+
+namespace vllm {
+namespace tensorrt_llm {
+
+template <int NRanks>
+struct LamportComm {
+  __device__ __forceinline__ LamportComm(void** workspace, int rank) {
+    counter_ptr = &reinterpret_cast<int*>(workspace[NRanks * 3])[0];
+    flag_ptr = &reinterpret_cast<int*>(workspace[NRanks * 3])[2];
+    clear_ptr = &reinterpret_cast<int64_t*>(workspace[NRanks * 3 + 1])[0];
+    flag_value = *flag_ptr;
+    auto comm_size = reinterpret_cast<int64_t*>(workspace[NRanks * 3 + 1])[1];
+    clear_size = *clear_ptr;
+    int data_offset = flag_value % 3;
+    int clear_offset = (flag_value + 2) % 3;
+    for (int r = 0; r < NRanks; ++r) {
+      data_bufs[r] = reinterpret_cast<uint8_t*>(workspace[2 * NRanks + r]) +
+                     data_offset * comm_size;
+    }
+    clear_buf = reinterpret_cast<uint8_t*>(workspace[2 * NRanks + rank]) +
+                clear_offset * comm_size;
+    __syncthreads();
+    if (threadIdx.x == 0) {
+      atomicAdd(counter_ptr, 1);
+    }
+  }
+
+  __device__ __forceinline__ void update(int64_t new_clear_size) {
+    if (blockIdx.x == 0 && threadIdx.x == 0) {
+      while (*reinterpret_cast<int volatile*>(counter_ptr) != gridDim.x) {
+      }
+      *flag_ptr = (flag_value + 1) % 3;
+      *clear_ptr = new_clear_size;
+      *counter_ptr = 0;
+    }
+  }
+
+  int* counter_ptr;
+  int* flag_ptr;
+  int64_t* clear_ptr;
+  uint8_t* data_bufs[NRanks];
+  uint8_t* clear_buf;
+  int64_t clear_size;
+  int flag_value;
+};
+
+__device__ __forceinline__ bool is_neg_zero(float v) {
+  return *reinterpret_cast<uint32_t*>(&v) == 0x80000000;
+}
+
+__device__ __forceinline__ bool is_neg_zero(float4 v) {
+  return is_neg_zero(v.x) || is_neg_zero(v.y) || is_neg_zero(v.z) ||
+         is_neg_zero(v.w);
+}
+
+__device__ __forceinline__ float4 get_neg_zero() {
+  float4 vec;
+#pragma unroll
+  for (int i = 0; i < 4; ++i) {
+    reinterpret_cast<uint32_t*>(&vec)[i] = 0x80000000;
+  }
+  return vec;
+}
+
+template <int Dim>
+__device__ __forceinline__ float rms_rsqrt(float& v, float eps) {
+  constexpr float kInvDim = 1.0F / static_cast<float>(Dim);
+  v = rsqrtf((v * kInvDim) + eps);
+  return v;
+}
+
+template <int Dim>
+__device__ __forceinline__ float4 rms_rsqrt(float4& v, float eps) {
+  constexpr float kInvDim = 1.0F / static_cast<float>(Dim);
+  v.x = rsqrtf((v.x * kInvDim) + eps);
+  v.y = rsqrtf((v.y * kInvDim) + eps);
+  v.z = rsqrtf((v.z * kInvDim) + eps);
+  v.w = rsqrtf((v.w * kInvDim) + eps);
+  return v;
+}
+__device__ __forceinline__ float4 ld_global_volatile(float4* addr) {
+  float4 val;
+  asm volatile("ld.volatile.global.v4.f32 {%0, %1, %2, %3}, [%4];"
+               : "=f"(val.x), "=f"(val.y), "=f"(val.z), "=f"(val.w)
+               : "l"(addr));
+  return val;
+}
+
+__device__ __forceinline__ float ld_global_volatile(float* addr) {
+  float val;
+  asm volatile("ld.volatile.global.f32 %0, [%1];" : "=f"(val) : "l"(addr));
+  return val;
+}
+
+// Used by the scalar (non-float4) kernel only
+template <typename T, int NUM>
+__inline__ __device__ T warpReduceSumV2(T* val) {
+#pragma unroll
+  for (int i = 0; i < NUM; i++) {
+#pragma unroll
+    for (int mask = 16; mask > 0; mask >>= 1)
+      val[i] += __shfl_xor_sync(FINAL_MASK, val[i], mask, 32);
+  }
+  return (T)(0.0f);
+}
+
+template <typename T, int NUM>
+__inline__ __device__ T blockReduceSumV2(T* val) {
+  static __shared__ T shared[NUM][33];
+  int lane = threadIdx.x & 0x1f;
+  int wid = threadIdx.x >> 5;
+
+  warpReduceSumV2<T, NUM>(val);
+
+  if (lane == 0) {
+#pragma unroll
+    for (int i = 0; i < NUM; i++) {
+      shared[i][wid] = val[i];
+    }
+  }
+
+  __syncthreads();
+
+  bool is_mask = threadIdx.x < (blockDim.x / 32.f);
+#pragma unroll
+  for (int i = 0; i < NUM; i++) {
+    val[i] = is_mask ? shared[i][lane] : (T)(0.0f);
+  }
+  warpReduceSumV2<T, NUM>(val);
+  return (T)0.0f;
+}
+
+// for float4 version
+template <uint32_t kNumThreads, typename T, int ArraySize = 4>
+__device__ __forceinline__ void local_warp_reduce_sum_array(
+    T* value_ptr, uint32_t active_mask = 0xffffffffu) {
+  static_assert(kNumThreads >= 1 &&
+                kNumThreads <= MINIMAX_REDUCE_RMS_WARP_SIZE);
+#pragma unroll
+  for (int i = 0; i < ArraySize; ++i) {
+#pragma unroll
+    for (int mask = kNumThreads / 2; mask > 0; mask >>= 1) {
+      value_ptr[i] += __shfl_xor_sync(active_mask, value_ptr[i], mask,
+                                      MINIMAX_REDUCE_RMS_WARP_SIZE);
+    }
+  }
+}
+
+constexpr int next_pow2(int val) {
+  int result = 1;
+  while (result < val) {
+    result <<= 1;
+  }
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+
+template <typename DType>
+class IndexHelper {
+ public:
+  __device__ __forceinline__ IndexHelper(MiniMaxReduceRMSParams const& params) {
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+    namespace cg = cooperative_groups;
+    cg::cluster_group cluster = cg::this_cluster();
+    cg::grid_group grid = cg::this_grid();
+    token_id = grid.cluster_rank();
+    access_id_in_token = cluster.thread_rank();
+    token_stride = grid.num_clusters();
+#else
+    token_id = blockIdx.x;
+    access_id_in_token = threadIdx.x;
+    token_stride = gridDim.x;
+#endif
+    access_id = token_id * params.hidden_dim / kElemsPerAccess<DType> +
+                access_id_in_token;
+    access_stride = token_stride * params.hidden_dim / kElemsPerAccess<DType>;
+    tot_access = params.size_q / kElemsPerAccess<DType>;
+  }
+
+  int token_id;
+  int access_id_in_token;
+  int token_stride;
+  int access_id;
+  int access_stride;
+  int tot_access;
+};
+
+/**
+* this kernel is used to for minimax attention module
+* input tensor [total_tokens, hidden_dim / tp_size], fp32
+* rms weight [hidden_dim / tp_size], bf16
+step 1: reduce from single rank to get the variance sum (reduce(input^2,
+dim=-1)) step 2: reduce from all ranks to get the variance sum
+(all_reduce(variance_sum)) step 3: calculate the rms norm (input *
+rsqrt(variance + eps)) in this case, max hidden_dim is 6144 (float data), for
+each token, we only need 6144 / 4 / tp_size = (1536 / tp_size) threads so we can
+assume cluster size is 1 (tp_size >= 2)
+ */
+template <typename DType, int NRanks>
+__global__ void __launch_bounds__(1024)
+    minimax_reduce_rms_kernel_lamport(MiniMaxReduceRMSParams params) {
+  IndexHelper<DType> index_helper(params);
+  int token_id = index_helper.token_id;
+  int access_id_in_token = index_helper.access_id_in_token;
+  int token_stride = index_helper.token_stride;
+  int access_id = index_helper.access_id;
+  int access_stride = index_helper.access_stride;
+  int tot_access = index_helper.tot_access;
+  int tot_tokens = params.size_q / params.hidden_dim;
+  float4 clear_vec = get_neg_zero();
+
+  LamportComm<NRanks> comm(params.workspace, params.rank);
+  int clear_access = comm.clear_size / kElemsPerAccess<DType>;
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.wait;");
+#endif
+  for (int idx = access_id; idx < tot_access;
+       idx += access_stride, token_id += token_stride) {
+    alignas(16) DType vals[kElemsPerAccess<DType>];
+    float sum_variance = 0.F;
+    *reinterpret_cast<float4*>(vals) =
+        reinterpret_cast<float4*>(params.allreduce_in)[idx];
+#pragma unroll
+    for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+      sum_variance += static_cast<float>(vals[i]) * static_cast<float>(vals[i]);
+    }
+    blockReduceSumV2<float, 1>(&sum_variance);
+    if (is_neg_zero(sum_variance)) {
+      sum_variance = 0.F;
+    }
+    if (threadIdx.x == 0) {
+      for (int r = 0; r < NRanks; ++r) {
+        reinterpret_cast<float*>(
+            comm.data_bufs[r])[(params.rank * tot_tokens) + token_id] =
+            (sum_variance);
+      }
+    }
+
+    bool done = false;
+    float vars_all_ranks[NRanks];
+    while (!done) {
+      done = true;
+#pragma unroll
+      for (int r = 0; r < NRanks; ++r) {
+        vars_all_ranks[r] = ld_global_volatile(&reinterpret_cast<float*>(
+            comm.data_bufs[params.rank])[(r * tot_tokens) + token_id]);
+        done &= !is_neg_zero(vars_all_ranks[r]);
+      }
+    }
+    sum_variance = 0.F;
+#pragma unroll
+    for (int r = 0; r < NRanks; ++r) {
+      sum_variance += vars_all_ranks[r];
+    }
+
+    DType norm_weight[kElemsPerAccess<DType>];
+    *reinterpret_cast<typename ElemsPerAccess<DType>::vec_type*>(norm_weight) =
+        reinterpret_cast<typename ElemsPerAccess<DType>::vec_type*>(
+            params.rms_gamma)[access_id_in_token];
+
+#pragma unroll
+    for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+      vals[i] = static_cast<DType>(
+          static_cast<float>(vals[i]) *
+          rsqrtf(
+              (sum_variance / static_cast<float>(params.hidden_dim) / NRanks) +
+              params.rms_eps) *
+          static_cast<float>(norm_weight[i]));
+    }
+
+    reinterpret_cast<float4*>(params.rms_norm_out)[idx] =
+        *reinterpret_cast<float4*>(vals);
+  }
+  for (int idx = access_id; idx < clear_access; idx += access_stride) {
+    reinterpret_cast<float4*>(comm.clear_buf)[idx] = clear_vec;
+  }
+  comm.update(params.size_q * NRanks);
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.launch_dependents;");
+#endif
+}
+
+/**
+ * Float4 variant: process 4 rows at once, allreduce variance sums as float4 for
+ * better memory coalescing. sum_variance is always float; applies to all DTypes
+ * (half, bf16, float). When tot_tokens % 4 != 0, the last group pads rows with
+ * zeros; padded rows are not written to rms_norm_out. IsQK: when true, process
+ * Q+K in one loop with doubled comm buffer; when false, single-matrix (Q only).
+ */
+template <typename DType, int NRanks, int OriginQDim, int OriginKDim>
+__global__ void __launch_bounds__(1024)
+    minimax_reduce_qk_rms_kernel_lamport_float4(MiniMaxReduceRMSParams params) {
+  // Compile-time per-rank dimensions
+  constexpr int RankQDim = OriginQDim / NRanks;
+  constexpr int RankKDim = OriginKDim / NRanks;
+  // Threads needed to cover one row of Q / K with float4 accesses
+  constexpr int ThreadsPerRowQ = RankQDim / kElemsPerAccess<DType>;
+  constexpr int ThreadsPerRowK = RankKDim / kElemsPerAccess<DType>;
+  // Number of warps dedicated to Q / K
+  constexpr int NumWarpQ = (ThreadsPerRowQ + MINIMAX_REDUCE_RMS_WARP_SIZE - 1) /
+                           MINIMAX_REDUCE_RMS_WARP_SIZE;
+  constexpr int NumWarpK = (ThreadsPerRowK + MINIMAX_REDUCE_RMS_WARP_SIZE - 1) /
+                           MINIMAX_REDUCE_RMS_WARP_SIZE;
+
+  int tot_tokens = params.size_q / RankQDim;
+  int tot_groups = (tot_tokens + 3) / 4;  // ceiling; last group may be partial
+
+  // Memory strides for strided qkv tensors (elements -> float4-access units)
+  int access_stride_q = (params.stride_q > 0 ? params.stride_q : RankQDim) /
+                        kElemsPerAccess<DType>;
+  int access_stride_k = (params.stride_k > 0 ? params.stride_k : RankKDim) /
+                        kElemsPerAccess<DType>;
+  // Output strides: default to contiguous (hidden_dim / hidden_dim_k)
+  int access_stride_q_out =
+      (params.stride_q_out > 0 ? params.stride_q_out : params.hidden_dim) /
+      kElemsPerAccess<DType>;
+  int access_stride_k_out =
+      (params.stride_k_out > 0 ? params.stride_k_out : params.hidden_dim_k) /
+      kElemsPerAccess<DType>;
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  namespace cg = cooperative_groups;
+  cg::cluster_group cluster = cg::this_cluster();
+  cg::grid_group grid = cg::this_grid();
+  int group_id = grid.cluster_rank();
+  int access_id_in_token = cluster.thread_rank();
+  int group_stride = grid.num_clusters();
+#else
+  int group_id = blockIdx.x;
+  int access_id_in_token = threadIdx.x;
+  int group_stride = gridDim.x;
+#endif
+
+  bool is_q = (access_id_in_token < NumWarpQ * MINIMAX_REDUCE_RMS_WARP_SIZE);
+  int k_thread_idx =
+      access_id_in_token - (NumWarpQ * MINIMAX_REDUCE_RMS_WARP_SIZE);
+  bool is_valid_q = (access_id_in_token < ThreadsPerRowQ);
+  bool is_valid_k = (k_thread_idx >= 0 && k_thread_idx < ThreadsPerRowK);
+  float4 clear_vec = get_neg_zero();
+
+  // Shared memory for two-level block reduction and scale broadcast
+  __shared__ float block_reduce_sum[4][MINIMAX_REDUCE_RMS_WARP_SIZE + 1];
+  __shared__ float global_scale_q[4];
+  __shared__ float global_scale_k[4];
+
+  LamportComm<NRanks> comm(params.workspace, params.rank);
+
+  DType norm_weight[kElemsPerAccess<DType>]{};
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.wait;");
+#endif
+  if (is_q) {
+    if (is_valid_q) {
+      *reinterpret_cast<typename ElemsPerAccess<DType>::vec_type*>(
+          norm_weight) =
+          reinterpret_cast<typename ElemsPerAccess<DType>::vec_type const*>(
+              params.rms_gamma)[access_id_in_token];
+    }
+  } else {
+    if (is_valid_k) {
+      *reinterpret_cast<typename ElemsPerAccess<DType>::vec_type*>(
+          norm_weight) =
+          reinterpret_cast<typename ElemsPerAccess<DType>::vec_type const*>(
+              params.rms_gamma_k)[k_thread_idx];
+    }
+  }
+
+  // Main loop: process one group of 4 tokens per iteration.
+  for (int g = group_id; g < tot_groups; g += group_stride) {
+    alignas(16) DType vals[4][kElemsPerAccess<DType>]{};
+    float warp_sum_variance[4]{0.F, 0.F, 0.F, 0.F};
+
+    if (is_q) {
+#pragma unroll
+      for (int row = 0; row < 4; ++row) {
+        int token_r = g * 4 + row;
+        if (token_r >= tot_tokens || !is_valid_q) {
+          continue;
+        }
+        int idx_r = token_r * access_stride_q + access_id_in_token;
+        *reinterpret_cast<float4*>(&vals[row][0]) =
+            reinterpret_cast<float4 const*>(params.allreduce_in)[idx_r];
+#pragma unroll
+        for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+          float x = static_cast<float>(vals[row][i]);
+          warp_sum_variance[row] += x * x;
+        }
+      }
+    } else {
+#pragma unroll
+      for (int row = 0; row < 4; ++row) {
+        int token_r = g * 4 + row;
+        if (token_r >= tot_tokens || !is_valid_k) {
+          continue;
+        }
+        int idx_r = token_r * access_stride_k + k_thread_idx;
+        *reinterpret_cast<float4*>(&vals[row][0]) =
+            reinterpret_cast<float4 const*>(params.allreduce_in_k)[idx_r];
+#pragma unroll
+        for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+          float x = static_cast<float>(vals[row][i]);
+          warp_sum_variance[row] += x * x;
+        }
+      }
+    }
+
+    local_warp_reduce_sum_array<MINIMAX_REDUCE_RMS_WARP_SIZE, float, 4>(
+        warp_sum_variance);
+    // Warp lane 0 writes its warp's partial sum to shared memory
+    int lane = threadIdx.x & (MINIMAX_REDUCE_RMS_WARP_SIZE - 1);
+    if (lane == 0) {
+#pragma unroll
+      for (int t = 0; t < 4; ++t) {
+        block_reduce_sum[t][threadIdx.x / MINIMAX_REDUCE_RMS_WARP_SIZE] =
+            warp_sum_variance[t];
+      }
+    }
+    __syncthreads();
+
+    int tid = threadIdx.x;
+
+    if (tid < MINIMAX_REDUCE_RMS_WARP_SIZE) {
+      constexpr int kNumWarpQPow2 =
+          (next_pow2(NumWarpQ) > NRanks) ? next_pow2(NumWarpQ) : NRanks;
+      float local_sum[4];
+#pragma unroll
+      for (int t = 0; t < 4; ++t) {
+        local_sum[t] = (tid < NumWarpQ) ? block_reduce_sum[t][tid] : 0.F;
+      }
+      // After this, all kNumWarpQPow2 lanes (including tid 0..NRanks-1) have
+      // the total Q sum-of-squares for all 4 tokens.
+      local_warp_reduce_sum_array<kNumWarpQPow2, float, 4>(local_sum);
+
+      if (tid < NRanks) {
+#pragma unroll
+        for (int t = 0; t < 4; ++t) {
+          if (is_neg_zero(local_sum[t])) {
+            local_sum[t] = 0.F;
+          }
+        }
+        // Parallel push: thread tid writes this rank's Q sum to rank tid's buf
+        reinterpret_cast<float4*>(
+            comm.data_bufs[tid])[(params.rank * tot_groups * 2) + (2 * g)] =
+            *reinterpret_cast<float4*>(local_sum);
+
+        // Parallel pull: thread tid reads rank tid's contribution from
+        // this rank's (params.rank's) buffer
+        bool done = false;
+        float4 var_all_ranks;
+        while (!done) {
+          done = true;
+          var_all_ranks = ld_global_volatile(&reinterpret_cast<float4*>(
+              comm.data_bufs[params.rank])[(tid * tot_groups * 2) + (2 * g)]);
+          done &= !is_neg_zero(var_all_ranks);
+        }
+
+        // Warp-level allreduce: each of the NRanks threads holds one rank's
+        // partial sum; after this all NRanks threads have the global total.
+        constexpr uint32_t kQActiveMask = (1u << NRanks) - 1u;
+        local_warp_reduce_sum_array<NRanks, float, 4>(
+            reinterpret_cast<float*>(&var_all_ranks), kQActiveMask);
+
+        // Thread 0 computes rsqrt with compile-time Dim and writes to smem
+        if (tid == 0) {
+          *reinterpret_cast<float4*>(global_scale_q) =
+              rms_rsqrt<OriginQDim>(var_all_ranks, params.rms_eps);
+        }
+      }
+    } else if (tid >= MINIMAX_REDUCE_RMS_WARP_SIZE * NumWarpQ &&
+               tid < MINIMAX_REDUCE_RMS_WARP_SIZE * (NumWarpQ + 1)) {
+      // --- K leader warp ---
+      constexpr int kNumWarpKPow2 =
+          (next_pow2(NumWarpK) > NRanks) ? next_pow2(NumWarpK) : NRanks;
+      float local_sum[4];
+#pragma unroll
+      for (int t = 0; t < 4; ++t) {
+        local_sum[t] = (k_thread_idx < NumWarpK)
+                           ? block_reduce_sum[t][NumWarpQ + k_thread_idx]
+                           : 0.F;
+      }
+      local_warp_reduce_sum_array<kNumWarpKPow2, float, 4>(local_sum);
+
+      if (k_thread_idx < NRanks) {
+#pragma unroll
+        for (int t = 0; t < 4; ++t) {
+          if (is_neg_zero(local_sum[t])) {
+            local_sum[t] = 0.F;
+          }
+        }
+        reinterpret_cast<float4*>(
+            comm.data_bufs[k_thread_idx])[(params.rank * tot_groups * 2) +
+                                          (2 * g + 1)] =
+            *reinterpret_cast<float4*>(local_sum);
+
+        bool done = false;
+        float4 var_all_ranks;
+        while (!done) {
+          done = true;
+          var_all_ranks = ld_global_volatile(&reinterpret_cast<float4*>(
+              comm.data_bufs[params.rank])[(k_thread_idx * tot_groups * 2) +
+                                           (2 * g + 1)]);
+          done &= !is_neg_zero(var_all_ranks);
+        }
+
+        constexpr uint32_t kKActiveMask = (1u << NRanks) - 1u;
+        local_warp_reduce_sum_array<NRanks, float, 4>(
+            reinterpret_cast<float*>(&var_all_ranks), kKActiveMask);
+
+        if (k_thread_idx == 0) {
+          *reinterpret_cast<float4*>(global_scale_k) =
+              rms_rsqrt<OriginKDim>(var_all_ranks, params.rms_eps);
+        }
+      }
+    }
+    __syncthreads();
+
+    if (is_q) {
+#pragma unroll
+      for (int t = 0; t < 4; ++t) {
+        warp_sum_variance[t] = global_scale_q[t];
+      }
+#pragma unroll
+      for (int r = 0; r < 4; ++r) {
+#pragma unroll
+        for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+          vals[r][i] = static_cast<DType>(static_cast<float>(vals[r][i]) *
+                                          warp_sum_variance[r] *
+                                          static_cast<float>(norm_weight[i]));
+        }
+        int token_r = g * 4 + r;
+        if (token_r >= tot_tokens || !is_valid_q) {
+          continue;
+        }
+        int idx_out = token_r * access_stride_q_out + access_id_in_token;
+        reinterpret_cast<float4*>(params.rms_norm_out)[idx_out] =
+            *reinterpret_cast<float4*>(&vals[r][0]);
+      }
+    } else {
+#pragma unroll
+      for (int t = 0; t < 4; ++t) {
+        warp_sum_variance[t] = global_scale_k[t];
+      }
+#pragma unroll
+      for (int r = 0; r < 4; ++r) {
+#pragma unroll
+        for (int i = 0; i < kElemsPerAccess<DType>; ++i) {
+          vals[r][i] = static_cast<DType>(static_cast<float>(vals[r][i]) *
+                                          warp_sum_variance[r] *
+                                          static_cast<float>(norm_weight[i]));
+        }
+        int token_r = g * 4 + r;
+        if (token_r >= tot_tokens || !is_valid_k) {
+          continue;
+        }
+        int idx_out = token_r * access_stride_k_out + k_thread_idx;
+        reinterpret_cast<float4*>(params.rms_norm_out_k)[idx_out] =
+            *reinterpret_cast<float4*>(&vals[r][0]);
+      }
+    }
+  }  // end group loop
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.launch_dependents;");
+#endif
+
+  int clear_access = static_cast<int>(comm.clear_size / kElemsPerAccess<DType>);
+  int clear_stride = group_stride * blockDim.x;
+  for (int idx = group_id * blockDim.x + threadIdx.x; idx < clear_access;
+       idx += clear_stride) {
+    reinterpret_cast<float4*>(comm.clear_buf)[idx] = clear_vec;
+  }
+
+  comm.update(static_cast<int64_t>(2) * tot_groups * kElemsPerAccess<DType> *
+              NRanks);
+}
+
+int get_sm_count() {
+  static int sm_count = 0;
+  if (sm_count == 0) {
+    int device_id;
+    CUDA_CHECK(cudaGetDevice(&device_id));
+    cudaDeviceProp device_prop;
+    cudaGetDeviceProperties(&device_prop, device_id);
+    sm_count = device_prop.multiProcessorCount;
+  }
+  return sm_count;
+}
+
+inline int getSMVersion(bool queryRealSmArch = false) {
+  int device{-1};
+  CUDA_CHECK(cudaGetDevice(&device));
+  int sm_major = 0;
+  int sm_minor = 0;
+  CUDA_CHECK(cudaDeviceGetAttribute(&sm_major,
+                                    cudaDevAttrComputeCapabilityMajor, device));
+  CUDA_CHECK(cudaDeviceGetAttribute(&sm_minor,
+                                    cudaDevAttrComputeCapabilityMinor, device));
+  int sm = sm_major * 10 + sm_minor;
+  if (sm == 121 && !queryRealSmArch) {
+    return 120;
+  }
+  return sm;
+}
+
+template <typename KernelFunc>
+int get_max_active_blocks(KernelFunc kernel, int block_size,
+                          int dynamic_smem = 0) {
+  int max_active = 0;
+  CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+      &max_active, kernel, block_size, dynamic_smem));
+  return std::max(max_active, 1);
+}
+
+template <typename DType, int NRanks>
+void minimax_reduce_rms_kernel_launcher(MiniMaxReduceRMSParams const& params) {
+  static int SM = getSMVersion();
+  int token_num = params.size_q / params.hidden_dim;
+  int sm_count = get_sm_count();
+  int cluster_size = 1;
+  int cluster_num = token_num;
+  int threads_per_token = params.hidden_dim / kElemsPerAccess<DType>;
+  int block_size = threads_per_token;
+
+  int max_blocks_per_sm = get_max_active_blocks(
+      minimax_reduce_rms_kernel_lamport<DType, NRanks>, block_size);
+  int max_grid = max_blocks_per_sm * sm_count;
+
+  int grid_size =
+      (std::min(max_grid, cluster_num * cluster_size) / cluster_size) *
+      cluster_size;
+
+  cudaLaunchConfig_t cfg;
+  cfg.gridDim = grid_size;
+  cfg.blockDim = block_size;
+  cfg.dynamicSmemBytes = 0;
+  cfg.stream = params.stream;
+
+  cudaLaunchAttribute attribute[2];
+  attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+  attribute[0].val.programmaticStreamSerializationAllowed = 1;
+  attribute[1].id = cudaLaunchAttributeClusterDimension;
+  attribute[1].val.clusterDim.x = cluster_size;
+  attribute[1].val.clusterDim.y = 1;
+  attribute[1].val.clusterDim.z = 1;
+  cfg.attrs = attribute;
+  cfg.numAttrs = SM >= 90 ? 2 : 0;
+
+  CUDA_CHECK(cudaLaunchKernelEx(
+      &cfg, minimax_reduce_rms_kernel_lamport<DType, NRanks>, params));
+}
+
+template <typename DType, int NRanks, int OriginQDim, int OriginKDim>
+void minimax_reduce_rms_kernel_launcher_float4(
+    MiniMaxReduceRMSParams const& params) {
+  TORCH_CHECK(params.size_q % params.hidden_dim == 0);
+  TORCH_CHECK(params.hidden_dim % kElemsPerAccess<DType> == 0);
+  if (params.stride_q > 0) {
+    TORCH_CHECK(params.stride_q % kElemsPerAccess<DType> == 0);
+  }
+  TORCH_CHECK(params.allreduce_in_k != nullptr,
+              "float4 QK kernel requires K input");
+  TORCH_CHECK(params.hidden_dim >= params.hidden_dim_k);
+  TORCH_CHECK(params.size_k % params.hidden_dim_k == 0);
+  TORCH_CHECK(params.hidden_dim_k % kElemsPerAccess<DType> == 0);
+  TORCH_CHECK(params.size_q / params.hidden_dim ==
+              params.size_k / params.hidden_dim_k);
+  if (params.stride_k > 0) {
+    TORCH_CHECK(params.stride_k % kElemsPerAccess<DType> == 0);
+  }
+
+  int token_num = params.size_q / params.hidden_dim;
+  int tot_groups = (token_num + 3) / 4;
+  if (tot_groups == 0) {
+    return;
+  }
+
+  static int SM = getSMVersion();
+  int sm_count = get_sm_count();
+  int cluster_size = 1;
+  int cluster_num = tot_groups;
+
+  int access_per_row_q = params.hidden_dim / kElemsPerAccess<DType>;
+  int access_per_row_k = params.hidden_dim_k / kElemsPerAccess<DType>;
+
+  // Round each section up to a warp boundary
+  auto divUp = [](int a, int b) { return (a + b - 1) / b * b; };
+  int block_size = divUp(access_per_row_q, MINIMAX_REDUCE_RMS_WARP_SIZE) +
+                   divUp(access_per_row_k, MINIMAX_REDUCE_RMS_WARP_SIZE);
+
+  auto kfn =
+      minimax_reduce_qk_rms_kernel_lamport_float4<DType, NRanks, OriginQDim,
+                                                  OriginKDim>;
+
+  int max_blocks_per_sm = get_max_active_blocks(kfn, block_size);
+  int max_grid = max_blocks_per_sm * sm_count;
+  int grid_size =
+      (std::min(max_grid, cluster_num * cluster_size) / cluster_size) *
+      cluster_size;
+
+  cudaLaunchConfig_t cfg;
+  cfg.gridDim = grid_size;
+  cfg.blockDim = block_size;
+  cfg.dynamicSmemBytes = 0;
+  cfg.stream = params.stream;
+
+  cudaLaunchAttribute attribute[2];
+  attribute[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+  attribute[0].val.programmaticStreamSerializationAllowed = 1;
+  attribute[1].id = cudaLaunchAttributeClusterDimension;
+  attribute[1].val.clusterDim.x = cluster_size;
+  attribute[1].val.clusterDim.y = 1;
+  attribute[1].val.clusterDim.z = 1;
+  cfg.attrs = attribute;
+  cfg.numAttrs = SM >= 90 ? 2 : 0;
+
+  CUDA_CHECK(cudaLaunchKernelEx(&cfg, kfn, params));
+}
+
+template <int NRanks>
+void dispatch_dtype(MiniMaxReduceRMSParams const& params) {
+  // Use the optimized QK float4 kernel when:
+  //  - K input is present, AND
+  //  - the full (NRanks * per-rank) dimensions match the MiniMax M2 shape.
+  // Otherwise fall back to the scalar kernel.
+  bool use_float4 = (params.allreduce_in_k != nullptr) &&
+                    (params.hidden_dim * params.nranks == 6144) &&
+                    (params.hidden_dim_k * params.nranks == 1024);
+
+  if (params.dtype == at::ScalarType::Half) {
+    if (use_float4) {
+      minimax_reduce_rms_kernel_launcher_float4<half, NRanks, 6144, 1024>(
+          params);
+    } else {
+      minimax_reduce_rms_kernel_launcher<half, NRanks>(params);
+    }
+  } else if (params.dtype == at::ScalarType::BFloat16) {
+    if (use_float4) {
+      minimax_reduce_rms_kernel_launcher_float4<__nv_bfloat16, NRanks, 6144,
+                                                1024>(params);
+    } else {
+      minimax_reduce_rms_kernel_launcher<__nv_bfloat16, NRanks>(params);
+    }
+  } else if (params.dtype == at::ScalarType::Float) {
+    if (use_float4) {
+      minimax_reduce_rms_kernel_launcher_float4<float, NRanks, 6144, 1024>(
+          params);
+    } else {
+      minimax_reduce_rms_kernel_launcher<float, NRanks>(params);
+    }
+  } else {
+    TORCH_CHECK(false, "Unsupported data type for minimax_reduce_rms_op");
+  }
+}
+
+void minimax_reduce_rms_op(MiniMaxReduceRMSParams const& params) {
+  if (params.nranks == 2) {
+    dispatch_dtype<2>(params);
+  } else if (params.nranks == 4) {
+    dispatch_dtype<4>(params);
+  } else if (params.nranks == 8) {
+    dispatch_dtype<8>(params);
+  } else if (params.nranks == 16) {
+    dispatch_dtype<16>(params);
+  } else {
+    TORCH_CHECK(false, "minimax_reduce_rms_op: unsupported ranks number!");
+  }
+}
+}  // namespace tensorrt_llm
+}  // namespace vllm
+
+torch::Tensor minimax_allreduce_rms(torch::Tensor const& input,
+                                    torch::Tensor const& norm_weight,
+                                    torch::Tensor workspace, int64_t const rank,
+                                    int64_t const nranks, double const eps) {
+  auto allreduce_params = vllm::tensorrt_llm::MiniMaxReduceRMSParams();
+
+  allreduce_params.nranks = static_cast<int>(nranks);
+  allreduce_params.rank = static_cast<int>(rank);
+  allreduce_params.dtype = input.scalar_type();
+  allreduce_params.size_q = static_cast<int>(input.numel());
+  allreduce_params.hidden_dim = static_cast<int>(input.size(-1));
+  allreduce_params.stride_q = allreduce_params.hidden_dim;
+  allreduce_params.workspace =
+      reinterpret_cast<void**>(workspace.mutable_data_ptr());
+  allreduce_params.allreduce_in = input.data_ptr();
+  allreduce_params.rms_gamma = norm_weight.data_ptr();
+  allreduce_params.rms_eps = static_cast<float>(eps);
+  allreduce_params.stream = at::cuda::getCurrentCUDAStream(input.get_device());
+
+  torch::Tensor rms_norm_out = torch::empty_like(input);
+  allreduce_params.rms_norm_out = rms_norm_out.mutable_data_ptr();
+
+  vllm::tensorrt_llm::minimax_reduce_rms_op(allreduce_params);
+
+  return rms_norm_out;
+}
+
+std::tuple<torch::Tensor, torch::Tensor> minimax_allreduce_rms_qk(
+    torch::Tensor qkv, torch::Tensor const& norm_weight_q,
+    torch::Tensor const& norm_weight_k, torch::Tensor workspace,
+    int64_t const q_size, int64_t const kv_size, int64_t const rank,
+    int64_t const nranks, double const eps) {
+  TORCH_CHECK(qkv.dim() == 2, "minimax_allreduce_rms_qk: qkv must be 2D");
+  TORCH_CHECK(qkv.is_contiguous(),
+              "minimax_allreduce_rms_qk: qkv must be contiguous");
+  int64_t qkv_dim = qkv.size(-1);
+  TORCH_CHECK(qkv_dim == q_size + 2 * kv_size,
+              "minimax_allreduce_rms_qk: qkv last dim must equal "
+              "q_size + 2 * kv_size");
+  TORCH_CHECK(rank < nranks,
+              "minimax_allreduce_rms_qk: rank must be less than nranks");
+
+  int64_t num_tokens = qkv.size(0);
+  int elem_bytes = qkv.element_size();
+
+  torch::Tensor q_out = torch::empty({num_tokens, q_size}, qkv.options());
+  torch::Tensor k_out = torch::empty({num_tokens, kv_size}, qkv.options());
+
+  auto params = vllm::tensorrt_llm::MiniMaxReduceRMSParams();
+  params.nranks = static_cast<int>(nranks);
+  params.rank = static_cast<int>(rank);
+  params.dtype = qkv.scalar_type();
+  params.size_q = static_cast<int>(num_tokens * q_size);
+  params.hidden_dim = static_cast<int>(q_size);
+  params.size_k = static_cast<int>(num_tokens * kv_size);
+  params.hidden_dim_k = static_cast<int>(kv_size);
+  params.stride_q = static_cast<int>(qkv_dim);
+  params.stride_k = static_cast<int>(qkv_dim);
+  params.stride_q_out = 0;  // q_out is contiguous; kernel uses hidden_dim
+  params.stride_k_out = 0;  // k_out is contiguous; kernel uses hidden_dim_k
+  params.workspace = reinterpret_cast<void**>(workspace.mutable_data_ptr());
+
+  uint8_t* base = static_cast<uint8_t*>(qkv.data_ptr());
+  params.allreduce_in = base;
+  params.allreduce_in_k = base + q_size * elem_bytes;
+  params.rms_gamma = norm_weight_q.data_ptr();
+  params.rms_gamma_k = norm_weight_k.data_ptr();
+  params.rms_eps = static_cast<float>(eps);
+  params.stream = at::cuda::getCurrentCUDAStream(qkv.get_device());
+
+  params.rms_norm_out = q_out.mutable_data_ptr();
+  params.rms_norm_out_k = k_out.mutable_data_ptr();
+
+  vllm::tensorrt_llm::minimax_reduce_rms_op(params);
+  return {q_out, k_out};
+}
diff --git a/csrc/minimax_reduce_rms_kernel.h b/csrc/minimax_reduce_rms_kernel.h
new file mode 100644
index 000000000000..e8c2d012247b
--- /dev/null
+++ b/csrc/minimax_reduce_rms_kernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuda_bf16.h>
+#include <cuda_fp16.h>
+
+#include <torch/types.h>
+
+namespace vllm {
+namespace tensorrt_llm {
+
+template <typename DType>
+struct ElemsPerAccess;
+
+template <>
+struct ElemsPerAccess<half> {
+  static constexpr int value = 8;
+  using vec_type = float4;
+};
+
+template <>
+struct ElemsPerAccess<nv_bfloat16> {
+  static constexpr int value = 8;
+  using vec_type = float4;
+};
+
+template <>
+struct ElemsPerAccess<float> {
+  static constexpr int value = 4;
+  using vec_type = float4;
+};
+
+template <typename DType>
+static constexpr int kElemsPerAccess = ElemsPerAccess<DType>::value;
+
+struct MiniMaxReduceRMSParams {
+  int nranks{};
+  int rank{};
+  at::ScalarType dtype{at::ScalarType::Undefined};
+  int size_q{};
+  int hidden_dim{};
+  int size_k{};
+  int hidden_dim_k{};
+  int stride_q{};  // row stride for q input (elements); when > hidden_dim,
+                   // q is part of a wider qkv tensor
+  int stride_k{};  // row stride for k input (elements); when > hidden_dim_k,
+                   // k is part of a wider qkv tensor
+  int stride_q_out{};  // row stride for q output (elements); 0 = contiguous
+  int stride_k_out{};  // row stride for k output (elements); 0 = contiguous
+  void** workspace{};
+  void* allreduce_in{};
+  void* rms_norm_out{};
+  void* rms_gamma{};
+  void* allreduce_in_k{};
+  void* rms_norm_out_k{};
+  void* rms_gamma_k{};
+  float rms_eps{};
+  cudaStream_t stream{};
+};
+
+void minimax_reduce_rms_op(MiniMaxReduceRMSParams const& params);
+
+}  // namespace tensorrt_llm
+}  // namespace vllm
diff --git a/csrc/moe/dsv4_norm_router_gemm.h b/csrc/moe/dsv4_norm_router_gemm.h
new file mode 100644
index 000000000000..7f66bfcb4aed
--- /dev/null
+++ b/csrc/moe/dsv4_norm_router_gemm.h
@@ -0,0 +1,30 @@
+/*
+ * Fused RMSNorm + router GEMV for DeepSeek V4.
+ *
+ * Computes in a single kernel:
+ *   normed_x[m,k]   = x[m,k] * rsqrt(mean(x[m]^2) + eps) * norm_weight[k]
+ *   router_logits[m,n] = sum_k(normed_x[m,k] * gate_weight[n,k])
+ *
+ * The GEMV body mirrors the algorithm in csrc/moe/dsv3_router_gemm_*.cu
+ * (warp butterfly + smem cross-warp reduction, fp32 accumulation, PDL on
+ * SM90+).  Blocks 0..kNumTokens-1 each materialize one token's normed_x
+ * row to global memory using the algebraic identity
+ *      logits[m,n] = rsqrt[m] * sum_k(x[m,k] * nw[k] * gw[n,k])
+ * which lets every block produce its column of logits before normed_x
+ * exists in gmem.
+ *
+ * Logits output is fp32 only — DeepSeek V4 router gate is hard-coded to
+ * fp32 (vllm/model_executor/models/deepseek_v4.py:749).
+ */
+
+#pragma once
+
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+
+#include "dsv3_router_gemm_utils.h"
+
+template <typename T, int kNumTokens, int kNumExperts, int kHiddenDim>
+void invokeNormRouterGemm(float* logits, __nv_bfloat16* normed_x, T const* x,
+                          T const* norm_weight, T const* gate_weight, float eps,
+                          cudaStream_t stream);
diff --git a/csrc/moe/dsv4_norm_router_gemm_entry.cu b/csrc/moe/dsv4_norm_router_gemm_entry.cu
new file mode 100644
index 000000000000..1232248e6177
--- /dev/null
+++ b/csrc/moe/dsv4_norm_router_gemm_entry.cu
@@ -0,0 +1,130 @@
+/*
+ * TORCH op entry for the fused RMSNorm + router GEMV kernel
+ * (DeepSeek V4 Pro).  This op is DSV4-Pro-specific: the kernel is
+ * instantiated only for ``num_experts == 384`` and ``hidden_dim ==
+ * 7168``.  Other configurations (e.g. DSV4-Flash with H=4096) must
+ * fall back to the unfused ``rms_norm`` + ``dsv3_router_gemm`` path.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <torch/all.h>
+
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+
+#include "core/registration.h"
+#include "dsv4_norm_router_gemm.h"
+
+namespace {
+
+// DSV4-Pro hard-coded shape constants.  Renamed from the earlier
+// ``kKimiK2NumExperts`` to avoid the misleading impression that this
+// kernel targets Kimi K2 — 384 happens to match Kimi K2's gate but the
+// intent here is DSV4-Pro.
+constexpr int kDsv4NumExperts = 384;
+constexpr int kDsv4HiddenDim = 7168;
+
+template <int kBegin, int kEnd>
+struct LoopUnroller {
+  static void unroll(int num_tokens, float* logits, __nv_bfloat16* normed_x,
+                     __nv_bfloat16 const* x, __nv_bfloat16 const* norm_weight,
+                     __nv_bfloat16 const* gate_weight, float eps,
+                     cudaStream_t stream) {
+    if (num_tokens == kBegin) {
+      invokeNormRouterGemm<__nv_bfloat16, kBegin, kDsv4NumExperts,
+                           kDsv4HiddenDim>(logits, normed_x, x, norm_weight,
+                                           gate_weight, eps, stream);
+    } else {
+      LoopUnroller<kBegin + 1, kEnd>::unroll(num_tokens, logits, normed_x, x,
+                                             norm_weight, gate_weight, eps,
+                                             stream);
+    }
+  }
+};
+
+template <int kEnd>
+struct LoopUnroller<kEnd, kEnd> {
+  static void unroll(int num_tokens, float* logits, __nv_bfloat16* normed_x,
+                     __nv_bfloat16 const* x, __nv_bfloat16 const* norm_weight,
+                     __nv_bfloat16 const* gate_weight, float eps,
+                     cudaStream_t stream) {
+    if (num_tokens == kEnd) {
+      invokeNormRouterGemm<__nv_bfloat16, kEnd, kDsv4NumExperts,
+                           kDsv4HiddenDim>(logits, normed_x, x, norm_weight,
+                                           gate_weight, eps, stream);
+    } else {
+      throw std::invalid_argument(
+          "Invalid num_tokens, only supports 1 to 16 for "
+          "dsv4_norm_router_gemm");
+    }
+  }
+};
+
+}  // namespace
+
+void dsv4_norm_router_gemm(at::Tensor& logits,    // [num_tokens, E] fp32
+                           at::Tensor& normed_x,  // [num_tokens, H] bf16
+                           at::Tensor const& x,   // [num_tokens, H] bf16
+                           at::Tensor const& norm_weight,  // [H] bf16
+                           at::Tensor const& gate_weight,  // [E, H] bf16
+                           double eps) {
+  TORCH_CHECK(x.dim() == 2 && norm_weight.dim() == 1 && gate_weight.dim() == 2,
+              "x must be 2D, norm_weight 1D, gate_weight 2D");
+  TORCH_CHECK(logits.dim() == 2 && normed_x.dim() == 2,
+              "logits and normed_x must be 2D");
+
+  int const num_tokens = x.size(0);
+  int const hidden_dim = x.size(1);
+  int const num_experts = gate_weight.size(0);
+
+  TORCH_CHECK(hidden_dim == kDsv4HiddenDim,
+              "Expected hidden_dim=", kDsv4HiddenDim,
+              " (DSV4-Pro), but got hidden_dim=", hidden_dim);
+  TORCH_CHECK(gate_weight.size(1) == hidden_dim,
+              "gate_weight.shape[1] must equal x.shape[1]");
+  TORCH_CHECK(norm_weight.size(0) == hidden_dim,
+              "norm_weight.shape[0] must equal x.shape[1]");
+  TORCH_CHECK(num_experts == kDsv4NumExperts,
+              "Expected num_experts=", kDsv4NumExperts,
+              " (DSV4-Pro), but got num_experts=", num_experts);
+  TORCH_CHECK(num_tokens >= 1 && num_tokens <= 16,
+              "num_tokens must be in [1, 16] for dsv4_norm_router_gemm");
+
+  TORCH_CHECK(x.dtype() == at::kBFloat16, "x must be bf16");
+  TORCH_CHECK(norm_weight.dtype() == at::kBFloat16, "norm_weight must be bf16");
+  TORCH_CHECK(gate_weight.dtype() == at::kBFloat16, "gate_weight must be bf16");
+  TORCH_CHECK(normed_x.dtype() == at::kBFloat16, "normed_x must be bf16");
+  TORCH_CHECK(logits.dtype() == at::kFloat,
+              "logits must be float32 (DSV4 router output is hard-coded fp32)");
+
+  TORCH_CHECK(normed_x.size(0) == num_tokens && normed_x.size(1) == hidden_dim,
+              "normed_x must be [num_tokens, hidden_dim]");
+  TORCH_CHECK(logits.size(0) == num_tokens && logits.size(1) == num_experts,
+              "logits must be [num_tokens, num_experts]");
+
+  TORCH_CHECK(x.is_contiguous() && norm_weight.is_contiguous() &&
+                  gate_weight.is_contiguous() && normed_x.is_contiguous() &&
+                  logits.is_contiguous(),
+              "all tensors must be contiguous");
+
+  auto const sm = getSMVersion();
+  TORCH_CHECK(sm >= 90 && sm <= 103,
+              "dsv4_norm_router_gemm requires SM_90 <= CUDA ARCH <= SM_103");
+
+  cudaStream_t const stream = at::cuda::getCurrentCUDAStream();
+
+  auto* logits_ptr = reinterpret_cast<float*>(logits.mutable_data_ptr());
+  auto* nx_ptr = reinterpret_cast<__nv_bfloat16*>(normed_x.mutable_data_ptr());
+  auto* x_ptr = reinterpret_cast<__nv_bfloat16 const*>(x.data_ptr());
+  auto* nw_ptr = reinterpret_cast<__nv_bfloat16 const*>(norm_weight.data_ptr());
+  auto* gw_ptr = reinterpret_cast<__nv_bfloat16 const*>(gate_weight.data_ptr());
+  float const eps_f = static_cast<float>(eps);
+
+  LoopUnroller<1, 16>::unroll(num_tokens, logits_ptr, nx_ptr, x_ptr, nw_ptr,
+                              gw_ptr, eps_f, stream);
+}
+
+TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
+  m.impl("dsv4_norm_router_gemm", &dsv4_norm_router_gemm);
+}
diff --git a/csrc/moe/dsv4_norm_router_gemm_kernel.cu b/csrc/moe/dsv4_norm_router_gemm_kernel.cu
new file mode 100644
index 000000000000..dc6e17f19b18
--- /dev/null
+++ b/csrc/moe/dsv4_norm_router_gemm_kernel.cu
@@ -0,0 +1,249 @@
+/*
+ * Fused RMSNorm + router GEMV for DeepSeek V4 (logits are fp32; bf16
+ * output is unsupported because DSV4 hard-codes fp32 logits).  See
+ * dsv4_norm_router_gemm.h for the math.
+ *
+ * The GEMV body mirrors csrc/moe/dsv3_router_gemm_float_out.cu (warp
+ * butterfly reduction + smem cross-warp reduction, fp32 accumulation,
+ * 128-thread block, PDL on SM90+).  RMSNorm is folded into the same
+ * pass via the identity
+ *   logits[m,n] = rsqrt[m] * sum_k(x[m,k] * nw[k] * gw[n,k])
+ * so x is read exactly once per block during the GEMV phase.  Blocks
+ * 0..kNumTokens-1 each materialize one row of normed_x for downstream
+ * experts / shared_experts to consume.
+ */
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+
+#include "dsv4_norm_router_gemm.h"
+
+namespace {
+
+// Convert 8 bf16 values packed in uint4 into 8 floats. Mirrors the helper
+// in dsv3_router_gemm_float_out.cu (kept local so the dsv3 file stays
+// untouched).
+template <int VPT>
+__device__ __forceinline__ void bf16_uint4_to_float8(uint4 const& vec,
+                                                     float* dst) {
+  __nv_bfloat16* bf16_ptr =
+      reinterpret_cast<__nv_bfloat16*>(const_cast<uint4*>(&vec));
+#pragma unroll
+  for (int i = 0; i < VPT; i++) {
+    dst[i] = __bfloat162float(bf16_ptr[i]);
+  }
+}
+
+template <typename T, int kBlockSize, int VPT, int kNumTokens, int kNumExperts,
+          int kHiddenDim>
+__global__ __launch_bounds__(128, 1) void norm_router_gemm_kernel(
+    float* __restrict__ logits, __nv_bfloat16* __restrict__ normed_x,
+    T const* __restrict__ x, T const* __restrict__ norm_weight,
+    T const* __restrict__ gate_weight, float eps) {
+  static_assert(kBlockSize == 128, "kernel assumes blockDim.x == 128");
+  static_assert(kHiddenDim % (VPT * kBlockSize) == 0,
+                "kHiddenDim must be a multiple of VPT * kBlockSize");
+
+  int const n_idx = blockIdx.x;
+  int const tid = threadIdx.x;
+  constexpr int kWarpSize = 32;
+  constexpr int kNumWarps = kBlockSize / kWarpSize;
+  constexpr int k_elems_per_iter = VPT * kBlockSize;
+  constexpr int k_iterations = kHiddenDim / k_elems_per_iter;
+
+  T const* gw_col = gate_weight + n_idx * kHiddenDim;
+
+  // Per-thread accumulators — fp32 throughout, matching dsv3 / layernorm.
+  float partial[kNumTokens] = {};
+  float ss[kNumTokens] = {};
+
+  // Cross-warp reduction scratch.
+  __shared__ float sm_partial[kNumTokens][kNumWarps];
+  __shared__ float sm_ss[kNumTokens][kNumWarps];
+  __shared__ float s_rsqrt[kNumTokens];
+
+  int k_bases[k_iterations];
+#pragma unroll
+  for (int ki = 0; ki < k_iterations; ki++) {
+    k_bases[ki] = ki * k_elems_per_iter + tid * VPT;
+  }
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.wait;");
+#endif
+
+  // ---- Phase 1: single pass over x, accumulate partial GEMV and ss. ----
+#pragma unroll
+  for (int ki = 0; ki < k_iterations; ki++) {
+    int const k_base = k_bases[ki];
+
+    uint4 nw_vec = *reinterpret_cast<uint4 const*>(norm_weight + k_base);
+    float nw_f[VPT];
+    bf16_uint4_to_float8<VPT>(nw_vec, nw_f);
+
+    uint4 b_vec = *reinterpret_cast<uint4 const*>(gw_col + k_base);
+    float b_f[VPT];
+    bf16_uint4_to_float8<VPT>(b_vec, b_f);
+
+#pragma unroll
+    for (int m = 0; m < kNumTokens; m++) {
+      uint4 a_vec =
+          *reinterpret_cast<uint4 const*>(x + m * kHiddenDim + k_base);
+      float a_f[VPT];
+      bf16_uint4_to_float8<VPT>(a_vec, a_f);
+
+#pragma unroll
+      for (int k = 0; k < VPT; k++) {
+        float a = a_f[k];
+        ss[m] += a * a;
+        partial[m] += a * nw_f[k] * b_f[k];
+      }
+    }
+  }
+
+  // ---- Phase 2: warp butterfly reduction for both ss[] and partial[]. ----
+  int const warpId = tid / kWarpSize;
+  int const laneId = tid % kWarpSize;
+
+#pragma unroll
+  for (int m = 0; m < kNumTokens; m++) {
+    float p = partial[m];
+    float s = ss[m];
+
+    p += __shfl_xor_sync(0xffffffff, p, 16);
+    s += __shfl_xor_sync(0xffffffff, s, 16);
+    p += __shfl_xor_sync(0xffffffff, p, 8);
+    s += __shfl_xor_sync(0xffffffff, s, 8);
+    p += __shfl_xor_sync(0xffffffff, p, 4);
+    s += __shfl_xor_sync(0xffffffff, s, 4);
+    p += __shfl_xor_sync(0xffffffff, p, 2);
+    s += __shfl_xor_sync(0xffffffff, s, 2);
+    p += __shfl_xor_sync(0xffffffff, p, 1);
+    s += __shfl_xor_sync(0xffffffff, s, 1);
+
+    if (laneId == 0) {
+      sm_partial[m][warpId] = p;
+      sm_ss[m][warpId] = s;
+    }
+  }
+
+  __syncthreads();
+
+  // ---- Phase 3: tid 0 finalises the reduction, writes logits, stashes
+  //               rsqrt[m] in smem for phase 4. ----
+  if (tid == 0) {
+#pragma unroll
+    for (int m = 0; m < kNumTokens; m++) {
+      float p_sum = 0.0f;
+      float s_sum = 0.0f;
+#pragma unroll
+      for (int w = 0; w < kNumWarps; w++) {
+        p_sum += sm_partial[m][w];
+        s_sum += sm_ss[m][w];
+      }
+      // Order matches layernorm_kernels.cu: rsqrtf(variance / H + eps).
+      // Use division (not multiply-by-reciprocal) to avoid an extra ULP
+      // mismatch with the reference RMSNorm.
+      float rs = rsqrtf(s_sum / static_cast<float>(kHiddenDim) + eps);
+      s_rsqrt[m] = rs;
+      logits[m * kNumExperts + n_idx] = p_sum * rs;
+    }
+  }
+
+  __syncthreads();
+
+  // ---- Phase 4: spread normed_x writes across blocks 0..kNumTokens-1.
+  //              Each writer block handles exactly one token row,
+  //              avoiding the long tail of block 0 doing all M rows.
+  //              Every block has every token's rsqrt[] in s_rsqrt
+  //              already (computed independently in phase 3), so no
+  //              cross-block synchronization is required. ----
+  if (n_idx < kNumTokens) {
+    int const m_writer = n_idx;
+    float const rs = s_rsqrt[m_writer];
+    __nv_bfloat16 const* x_row = x + m_writer * kHiddenDim;
+    __nv_bfloat16* normed_row = normed_x + m_writer * kHiddenDim;
+
+#pragma unroll
+    for (int ki = 0; ki < k_iterations; ki++) {
+      int const k_base = k_bases[ki];
+
+      uint4 nw_vec = *reinterpret_cast<uint4 const*>(norm_weight + k_base);
+      float nw_f[VPT];
+      bf16_uint4_to_float8<VPT>(nw_vec, nw_f);
+
+      uint4 a_vec = *reinterpret_cast<uint4 const*>(x_row + k_base);
+      float a_f[VPT];
+      bf16_uint4_to_float8<VPT>(a_vec, a_f);
+
+      uint4 normed_vec;
+      __nv_bfloat16* np = reinterpret_cast<__nv_bfloat16*>(&normed_vec);
+#pragma unroll
+      for (int k = 0; k < VPT; k++) {
+        np[k] = __float2bfloat16(a_f[k] * rs * nw_f[k]);
+      }
+      *reinterpret_cast<uint4*>(normed_row + k_base) = normed_vec;
+    }
+  }
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.launch_dependents;");
+#endif
+}
+
+}  // namespace
+
+template <typename T, int kNumTokens, int kNumExperts, int kHiddenDim>
+void invokeNormRouterGemm(float* logits, __nv_bfloat16* normed_x, T const* x,
+                          T const* norm_weight, T const* gate_weight, float eps,
+                          cudaStream_t stream) {
+  constexpr int VPT = 16 / sizeof(T);
+  constexpr int kBlockSize = 128;
+
+  cudaLaunchConfig_t config;
+  config.gridDim = kNumExperts;
+  config.blockDim = kBlockSize;
+  config.dynamicSmemBytes = 0;
+  config.stream = stream;
+
+  cudaLaunchAttribute attrs[1];
+  attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+  attrs[0].val.programmaticStreamSerializationAllowed = 1;
+  config.numAttrs = 1;
+  config.attrs = attrs;
+
+  cudaLaunchKernelEx(&config,
+                     norm_router_gemm_kernel<T, kBlockSize, VPT, kNumTokens,
+                                             kNumExperts, kHiddenDim>,
+                     logits, normed_x, x, norm_weight, gate_weight, eps);
+}
+
+// Template instantiations — DSV4-Pro is the only supported configuration:
+// num_experts=384, hidden_dim=7168.  Other shapes (e.g. DSV4-Flash with
+// hidden_dim=4096) fall back to the unfused path on the Python side.
+#define INSTANTIATE(M)                                                    \
+  template void invokeNormRouterGemm<__nv_bfloat16, M, 384, 7168>(        \
+      float*, __nv_bfloat16*, __nv_bfloat16 const*, __nv_bfloat16 const*, \
+      __nv_bfloat16 const*, float, cudaStream_t);
+
+INSTANTIATE(1)
+INSTANTIATE(2)
+INSTANTIATE(3)
+INSTANTIATE(4)
+INSTANTIATE(5)
+INSTANTIATE(6)
+INSTANTIATE(7)
+INSTANTIATE(8)
+INSTANTIATE(9)
+INSTANTIATE(10)
+INSTANTIATE(11)
+INSTANTIATE(12)
+INSTANTIATE(13)
+INSTANTIATE(14)
+INSTANTIATE(15)
+INSTANTIATE(16)
+
+#undef INSTANTIATE
diff --git a/csrc/moe/fp32_router_gemm.cu b/csrc/moe/fp32_router_gemm.cu
new file mode 100644
index 000000000000..7f0f9be9ac06
--- /dev/null
+++ b/csrc/moe/fp32_router_gemm.cu
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+//
+// Router GEMM: activation(T) x weight(fp32) -> fp32, H=3072, E=256, M<=32.
+// Supports bf16 or fp32 activation; weight is always fp32.
+// Adapted from dsv3_router_gemm_float_out.cu.
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+
+#include "dsv3_router_gemm_utils.h"
+
+// ---------------------------------------------------------------------------
+// Load helpers
+// ---------------------------------------------------------------------------
+
+// Load VPT fp32 values from the weight matrix (always fp32).
+//   VPT=4 when activation is fp32 (one float4 load)
+//   VPT=8 when activation is bf16 (two float4 loads)
+template <int VPT>
+__device__ __forceinline__ void load_weight(float const* ptr, float* dst);
+
+template <>
+__device__ __forceinline__ void load_weight<4>(float const* ptr, float* dst) {
+  float4 v = *reinterpret_cast<float4 const*>(ptr);
+  dst[0] = v.x;
+  dst[1] = v.y;
+  dst[2] = v.z;
+  dst[3] = v.w;
+}
+
+template <>
+__device__ __forceinline__ void load_weight<8>(float const* ptr, float* dst) {
+  float4 v0 = *reinterpret_cast<float4 const*>(ptr);
+  float4 v1 = *reinterpret_cast<float4 const*>(ptr + 4);
+  dst[0] = v0.x;
+  dst[1] = v0.y;
+  dst[2] = v0.z;
+  dst[3] = v0.w;
+  dst[4] = v1.x;
+  dst[5] = v1.y;
+  dst[6] = v1.z;
+  dst[7] = v1.w;
+}
+
+// Load VPT activation values and convert to fp32.
+template <typename T, int VPT>
+__device__ __forceinline__ void load_activation(T const* ptr, float* dst);
+
+// fp32 activation: one float4 load, no conversion needed.
+template <>
+__device__ __forceinline__ void load_activation<float, 4>(float const* ptr,
+                                                          float* dst) {
+  float4 v = *reinterpret_cast<float4 const*>(ptr);
+  dst[0] = v.x;
+  dst[1] = v.y;
+  dst[2] = v.z;
+  dst[3] = v.w;
+}
+
+// bf16 activation: one uint4 load (8 × bf16) + element-wise conversion.
+template <>
+__device__ __forceinline__ void load_activation<__nv_bfloat16, 8>(
+    __nv_bfloat16 const* ptr, float* dst) {
+  uint4 v = *reinterpret_cast<uint4 const*>(ptr);
+  __nv_bfloat16 const* bf16_ptr = reinterpret_cast<__nv_bfloat16 const*>(&v);
+#pragma unroll
+  for (int i = 0; i < 8; i++) dst[i] = __bfloat162float(bf16_ptr[i]);
+}
+
+// ---------------------------------------------------------------------------
+// Kernel
+// ---------------------------------------------------------------------------
+
+// InputT : type of activation (float or __nv_bfloat16)
+// Weight is always fp32; output is always fp32.
+// VPT = 16 / sizeof(InputT):  4 for fp32, 8 for bf16
+template <typename InputT, int kBlockSize, int kNumTokens, int kNumExperts,
+          int kHiddenDim, bool ENABLE_PDL>
+__global__ __launch_bounds__(128, 1) void fp32_router_gemm_kernel(
+    float* out, InputT const* mat_a, float const* mat_b) {
+  constexpr int VPT = 16 / sizeof(InputT);
+  constexpr int k_elems_per_k_iteration = VPT * kBlockSize;
+  constexpr int k_iterations = kHiddenDim / k_elems_per_k_iteration;
+  constexpr int kWarpSize = 32;
+  constexpr int kNumWarps = kBlockSize / kWarpSize;
+
+  int const n_idx = blockIdx.x;
+  int const tid = threadIdx.x;
+  int const warpId = tid / kWarpSize;
+  int const laneId = tid % kWarpSize;
+
+  float acc[kNumTokens] = {};
+  __shared__ float sm_reduction[kNumTokens][kNumWarps];
+
+  float const* b_col = mat_b + n_idx * kHiddenDim;
+
+  int k_bases[k_iterations];
+#pragma unroll
+  for (int ki = 0; ki < k_iterations; ki++) {
+    k_bases[ki] = ki * k_elems_per_k_iteration + tid * VPT;
+  }
+
+#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12000) && \
+    defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+  if constexpr (ENABLE_PDL) {
+    asm volatile("griddepcontrol.wait;");
+  }
+#endif
+
+  for (int ki = 0; ki < k_iterations; ki++) {
+    int const k_base = k_bases[ki];
+
+    float b_float[VPT];
+    load_weight<VPT>(b_col + k_base, b_float);
+
+#pragma unroll
+    for (int m_idx = 0; m_idx < kNumTokens; m_idx++) {
+      float a_float[VPT];
+      load_activation<InputT, VPT>(mat_a + m_idx * kHiddenDim + k_base,
+                                   a_float);
+#pragma unroll
+      for (int k = 0; k < VPT; k++) {
+        acc[m_idx] += a_float[k] * b_float[k];
+      }
+    }
+  }
+
+  // Warp-level butterfly reduction
+#pragma unroll
+  for (int m = 0; m < kNumTokens; m++) {
+    float sum = acc[m];
+    sum += __shfl_xor_sync(0xffffffff, sum, 16);
+    sum += __shfl_xor_sync(0xffffffff, sum, 8);
+    sum += __shfl_xor_sync(0xffffffff, sum, 4);
+    sum += __shfl_xor_sync(0xffffffff, sum, 2);
+    sum += __shfl_xor_sync(0xffffffff, sum, 1);
+    if (laneId == 0) sm_reduction[m][warpId] = sum;
+  }
+
+  __syncthreads();
+
+  if (tid == 0) {
+#pragma unroll
+    for (int m = 0; m < kNumTokens; m++) {
+      float final_sum = 0.0f;
+#pragma unroll
+      for (int w = 0; w < kNumWarps; w++) final_sum += sm_reduction[m][w];
+      out[m * kNumExperts + n_idx] = final_sum;
+    }
+  }
+
+#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12000) && \
+    defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+  if constexpr (ENABLE_PDL) {
+    __syncthreads();
+    asm volatile("griddepcontrol.launch_dependents;");
+  }
+#endif
+}
+
+// ---------------------------------------------------------------------------
+// Launcher
+// ---------------------------------------------------------------------------
+
+template <typename InputT, int kNumTokens, int kNumExperts, int kHiddenDim>
+void invokeFp32RouterGemm(float* output, InputT const* mat_a,
+                          float const* mat_b, cudaStream_t stream) {
+  constexpr int kBlockSize = 128;
+#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12000)
+  if (getEnvEnablePDL()) {
+    cudaLaunchConfig_t config;
+    config.gridDim = kNumExperts;
+    config.blockDim = kBlockSize;
+    config.dynamicSmemBytes = 0;
+    config.stream = stream;
+    cudaLaunchAttribute attrs[1];
+    attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+    attrs[0].val.programmaticStreamSerializationAllowed = 1;
+    config.numAttrs = 1;
+    config.attrs = attrs;
+    cudaLaunchKernelEx(&config,
+                       fp32_router_gemm_kernel<InputT, kBlockSize, kNumTokens,
+                                               kNumExperts, kHiddenDim, true>,
+                       output, mat_a, mat_b);
+    return;
+  }
+#endif
+
+  fp32_router_gemm_kernel<InputT, kBlockSize, kNumTokens, kNumExperts,
+                          kHiddenDim, false>
+      <<<kNumExperts, kBlockSize, 0, stream>>>(output, mat_a, mat_b);
+}
+
+// ---------------------------------------------------------------------------
+// Explicit instantiations: M=1..32, E=256, H=3072, for both input types
+// ---------------------------------------------------------------------------
+
+#define INSTANTIATE(T, M)                              \
+  template void invokeFp32RouterGemm<T, M, 256, 3072>( \
+      float*, T const*, float const*, cudaStream_t);
+
+#define INSTANTIATE_ALL(T) \
+  INSTANTIATE(T, 1)        \
+  INSTANTIATE(T, 2)        \
+  INSTANTIATE(T, 3)        \
+  INSTANTIATE(T, 4)        \
+  INSTANTIATE(T, 5)        \
+  INSTANTIATE(T, 6)        \
+  INSTANTIATE(T, 7)        \
+  INSTANTIATE(T, 8)        \
+  INSTANTIATE(T, 9)        \
+  INSTANTIATE(T, 10)       \
+  INSTANTIATE(T, 11)       \
+  INSTANTIATE(T, 12)       \
+  INSTANTIATE(T, 13)       \
+  INSTANTIATE(T, 14)       \
+  INSTANTIATE(T, 15)       \
+  INSTANTIATE(T, 16)       \
+  INSTANTIATE(T, 17)       \
+  INSTANTIATE(T, 18)       \
+  INSTANTIATE(T, 19)       \
+  INSTANTIATE(T, 20)       \
+  INSTANTIATE(T, 21)       \
+  INSTANTIATE(T, 22)       \
+  INSTANTIATE(T, 23)       \
+  INSTANTIATE(T, 24)       \
+  INSTANTIATE(T, 25)       \
+  INSTANTIATE(T, 26)       \
+  INSTANTIATE(T, 27)       \
+  INSTANTIATE(T, 28)       \
+  INSTANTIATE(T, 29)       \
+  INSTANTIATE(T, 30)       \
+  INSTANTIATE(T, 31)       \
+  INSTANTIATE(T, 32)
+
+INSTANTIATE_ALL(float)
+INSTANTIATE_ALL(__nv_bfloat16)
+
+#undef INSTANTIATE_ALL
+#undef INSTANTIATE
diff --git a/csrc/moe/fp32_router_gemm_entry.cu b/csrc/moe/fp32_router_gemm_entry.cu
new file mode 100644
index 000000000000..2ef85082eda5
--- /dev/null
+++ b/csrc/moe/fp32_router_gemm_entry.cu
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <torch/all.h>
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+#include <stdexcept>
+
+#include "core/registration.h"
+#include "dsv3_router_gemm_utils.h"
+
+static constexpr int FP32_NUM_EXPERTS = 256;
+static constexpr int FP32_HIDDEN_DIM = 3072;
+static constexpr int FP32_MAX_TOKENS = 32;
+
+// Forward declarations — 4 template params must match fp32_router_gemm.cu
+template <typename InputT, int kNumTokens, int kNumExperts, int kHiddenDim>
+void invokeFp32RouterGemm(float* output, InputT const* mat_a,
+                          float const* mat_b, cudaStream_t stream);
+
+// LoopUnroller templated on InputT
+template <typename InputT, int kBegin, int kEnd>
+struct Fp32LoopUnroller {
+  static void unroll(int num_tokens, float* output, InputT const* mat_a,
+                     float const* mat_b, cudaStream_t stream) {
+    if (num_tokens == kBegin) {
+      invokeFp32RouterGemm<InputT, kBegin, FP32_NUM_EXPERTS, FP32_HIDDEN_DIM>(
+          output, mat_a, mat_b, stream);
+    } else {
+      Fp32LoopUnroller<InputT, kBegin + 1, kEnd>::unroll(num_tokens, output,
+                                                         mat_a, mat_b, stream);
+    }
+  }
+};
+
+template <typename InputT, int kEnd>
+struct Fp32LoopUnroller<InputT, kEnd, kEnd> {
+  static void unroll(int num_tokens, float* output, InputT const* mat_a,
+                     float const* mat_b, cudaStream_t stream) {
+    if (num_tokens == kEnd) {
+      invokeFp32RouterGemm<InputT, kEnd, FP32_NUM_EXPERTS, FP32_HIDDEN_DIM>(
+          output, mat_a, mat_b, stream);
+    } else {
+      throw std::invalid_argument(
+          "fp32_router_gemm: num_tokens must be in [1, 32]");
+    }
+  }
+};
+
+void fp32_router_gemm(at::Tensor& output,       // [num_tokens, num_experts]
+                      const at::Tensor& mat_a,  // [num_tokens, hidden_dim]
+                      const at::Tensor& mat_b   // [num_experts, hidden_dim]
+) {
+  TORCH_CHECK(output.dim() == 2 && mat_a.dim() == 2 && mat_b.dim() == 2);
+  TORCH_CHECK(output.is_cuda() && mat_a.is_cuda() && mat_b.is_cuda(),
+              "fp32_router_gemm: all tensors must be CUDA tensors");
+  TORCH_CHECK(output.get_device() == mat_a.get_device() &&
+                  output.get_device() == mat_b.get_device(),
+              "fp32_router_gemm: all tensors must be on the same CUDA device");
+  TORCH_CHECK(output.is_contiguous() && mat_a.is_contiguous() &&
+                  mat_b.is_contiguous(),
+              "fp32_router_gemm: all tensors must be contiguous");
+
+  const int num_tokens = mat_a.size(0);
+  const int num_experts = mat_b.size(0);
+  const int hidden_dim = mat_a.size(1);
+
+  TORCH_CHECK(output.size(0) == num_tokens && output.size(1) == num_experts,
+              "fp32_router_gemm: output must have shape [num_tokens, "
+              "num_experts], got [",
+              output.size(0), ", ", output.size(1), "], expected [",
+              num_tokens, ", ", num_experts, "]");
+  TORCH_CHECK(
+      mat_a.size(1) == mat_b.size(1),
+      "fp32_router_gemm: mat_a and mat_b must have the same hidden_dim");
+  TORCH_CHECK(hidden_dim == FP32_HIDDEN_DIM,
+              "fp32_router_gemm: expected hidden_dim=", FP32_HIDDEN_DIM,
+              ", got ", hidden_dim);
+  TORCH_CHECK(num_experts == FP32_NUM_EXPERTS,
+              "fp32_router_gemm: expected num_experts=", FP32_NUM_EXPERTS,
+              ", got ", num_experts);
+  TORCH_CHECK(num_tokens <= FP32_MAX_TOKENS,
+              "fp32_router_gemm: num_tokens must be in [0, ", FP32_MAX_TOKENS,
+              "], got ", num_tokens);
+  TORCH_CHECK(mat_a.dtype() == at::kFloat || mat_a.dtype() == at::kBFloat16,
+              "fp32_router_gemm: mat_a must be float32 or bfloat16");
+  TORCH_CHECK(mat_b.dtype() == at::kFloat,
+              "fp32_router_gemm: mat_b (weight) must be float32");
+  TORCH_CHECK(output.dtype() == at::kFloat,
+              "fp32_router_gemm: output must be float32");
+
+  if (num_tokens == 0) {
+    return;
+  }
+
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(mat_a));
+  const int sm = getSMVersion();
+  TORCH_CHECK(sm >= 90, "fp32_router_gemm: requires SM90+, got SM", sm);
+
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  float* out_ptr = reinterpret_cast<float*>(output.mutable_data_ptr());
+  float const* mat_b_ptr = reinterpret_cast<float const*>(mat_b.data_ptr());
+
+  if (mat_a.dtype() == at::kBFloat16) {
+    auto const* mat_a_ptr =
+        reinterpret_cast<__nv_bfloat16 const*>(mat_a.data_ptr());
+    Fp32LoopUnroller<__nv_bfloat16, 1, FP32_MAX_TOKENS>::unroll(
+        num_tokens, out_ptr, mat_a_ptr, mat_b_ptr, stream);
+  } else {
+    auto const* mat_a_ptr = reinterpret_cast<float const*>(mat_a.data_ptr());
+    Fp32LoopUnroller<float, 1, FP32_MAX_TOKENS>::unroll(
+        num_tokens, out_ptr, mat_a_ptr, mat_b_ptr, stream);
+  }
+}
+
+TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
+  m.impl("fp32_router_gemm", &fp32_router_gemm);
+}
diff --git a/csrc/moe/gpt_oss_router_gemm.cu b/csrc/moe/gpt_oss_router_gemm.cu
deleted file mode 100644
index 0294cd36aa8f..000000000000
--- a/csrc/moe/gpt_oss_router_gemm.cu
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Adapted from
- * https://github.com/NVIDIA/TensorRT-LLM/blob/v1.3.0rc7/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_cuda.cu
- * Copyright (c) 2025, The vLLM team.
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
- * All rights reserved. SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAStream.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <torch/all.h>
-#include "gpt_oss_router_gemm.cuh"
-
-void launch_gpt_oss_router_gemm(__nv_bfloat16* gA, __nv_bfloat16* gB,
-                                __nv_bfloat16* gC, __nv_bfloat16* bias,
-                                int batch_size, int output_features,
-                                int input_features, cudaStream_t stream) {
-  static int const WARP_TILE_M = 16;
-  static int const TILE_M = WARP_TILE_M;
-  static int const TILE_N = 8;
-  static int const TILE_K = 64;
-  static int const STAGES = 16;
-  static int const STAGE_UNROLL = 4;
-  static bool const PROFILE = false;
-
-  CUtensorMap weight_map{};
-  CUtensorMap activation_map{};
-
-  constexpr uint32_t rank = 2;
-  uint64_t size[rank] = {(uint64_t)input_features, (uint64_t)output_features};
-  uint64_t stride[rank - 1] = {input_features * sizeof(__nv_bfloat16)};
-  uint32_t box_size[rank] = {TILE_K, TILE_M};
-  uint32_t elem_stride[rank] = {1, 1};
-
-  CUresult res = cuTensorMapEncodeTiled(
-      &weight_map, CUtensorMapDataType::CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, rank,
-      gB, size, stride, box_size, elem_stride,
-      CUtensorMapInterleave::CU_TENSOR_MAP_INTERLEAVE_NONE,
-      CUtensorMapSwizzle::CU_TENSOR_MAP_SWIZZLE_128B,
-      CUtensorMapL2promotion::CU_TENSOR_MAP_L2_PROMOTION_NONE,
-      CUtensorMapFloatOOBfill::CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE);
-  TORCH_CHECK(res == CUDA_SUCCESS,
-              "cuTensorMapEncodeTiled failed for weight_map, error code=",
-              static_cast<int>(res));
-
-  size[1] = batch_size;
-  box_size[1] = TILE_N;
-
-  res = cuTensorMapEncodeTiled(
-      &activation_map, CUtensorMapDataType::CU_TENSOR_MAP_DATA_TYPE_BFLOAT16,
-      rank, gA, size, stride, box_size, elem_stride,
-      CUtensorMapInterleave::CU_TENSOR_MAP_INTERLEAVE_NONE,
-      CUtensorMapSwizzle::CU_TENSOR_MAP_SWIZZLE_128B,
-      CUtensorMapL2promotion::CU_TENSOR_MAP_L2_PROMOTION_NONE,
-      CUtensorMapFloatOOBfill::CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE);
-  TORCH_CHECK(res == CUDA_SUCCESS,
-              "cuTensorMapEncodeTiled failed for activation_map, error code=",
-              static_cast<int>(res));
-
-  int smem_size = STAGES * STAGE_UNROLL *
-                  (TILE_M * TILE_K * sizeof(__nv_bfloat16) +
-                   TILE_N * TILE_K * sizeof(__nv_bfloat16));
-
-  gpuErrChk(cudaFuncSetAttribute(
-      gpt_oss_router_gemm_kernel<WARP_TILE_M, TILE_M, TILE_N, TILE_K, STAGES,
-                                 STAGE_UNROLL, PROFILE>,
-      cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size));
-
-  int tiles_m = (output_features + TILE_M - 1) / TILE_M;
-  int tiles_n = (batch_size + TILE_N - 1) / TILE_N;
-
-  dim3 grid(tiles_m, tiles_n);
-  dim3 block(384);
-
-  cudaLaunchConfig_t config;
-  cudaLaunchAttribute attrs[1];
-  config.gridDim = grid;
-  config.blockDim = block;
-  config.dynamicSmemBytes = smem_size;
-  config.stream = stream;
-  config.attrs = attrs;
-  attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
-  attrs[0].val.programmaticStreamSerializationAllowed = 1;
-  config.numAttrs = 1;
-
-  cudaLaunchKernelEx(
-      &config,
-      &gpt_oss_router_gemm_kernel<WARP_TILE_M, TILE_M, TILE_N, TILE_K, STAGES,
-                                  STAGE_UNROLL, PROFILE>,
-      gC, gA, gB, bias, output_features, batch_size, input_features, weight_map,
-      activation_map, nullptr);
-}
-
-void gpt_oss_router_gemm_cuda_forward(torch::Tensor& output,
-                                      torch::Tensor input, torch::Tensor weight,
-                                      torch::Tensor bias) {
-  auto const batch_size = input.size(0);
-  auto const input_dim = input.size(1);
-  auto const output_dim = weight.size(0);
-
-  auto stream = at::cuda::getCurrentCUDAStream();
-
-  if (input.scalar_type() == at::ScalarType::BFloat16) {
-    launch_gpt_oss_router_gemm((__nv_bfloat16*)input.data_ptr(),
-                               (__nv_bfloat16*)weight.data_ptr(),
-                               (__nv_bfloat16*)output.mutable_data_ptr(),
-                               (__nv_bfloat16*)bias.data_ptr(), batch_size,
-                               output_dim, input_dim, stream);
-  } else {
-    throw std::invalid_argument("Unsupported dtype, only supports bfloat16");
-  }
-}
-
-void gpt_oss_router_gemm(torch::Tensor& output, torch::Tensor input,
-                         torch::Tensor weight, torch::Tensor bias) {
-  TORCH_CHECK(input.dim() == 2, "input must be 2D");
-  TORCH_CHECK(weight.dim() == 2, "weight must be 2D");
-  TORCH_CHECK(bias.dim() == 1, "bias must be 1D");
-  TORCH_CHECK(input.sizes()[1] == weight.sizes()[1],
-              "input.size(1) must match weight.size(1)");
-  TORCH_CHECK(weight.sizes()[0] == bias.sizes()[0],
-              "weight.size(0) must match bias.size(0)");
-  TORCH_CHECK(input.scalar_type() == at::ScalarType::BFloat16,
-              "input tensor must be bfloat16");
-  TORCH_CHECK(weight.scalar_type() == at::ScalarType::BFloat16,
-              "weight tensor must be bfloat16");
-  TORCH_CHECK(bias.scalar_type() == at::ScalarType::BFloat16,
-              "bias tensor must be bfloat16");
-  gpt_oss_router_gemm_cuda_forward(output, input, weight, bias);
-}
diff --git a/csrc/moe/gpt_oss_router_gemm.cuh b/csrc/moe/gpt_oss_router_gemm.cuh
deleted file mode 100644
index 5cc653f19cfb..000000000000
--- a/csrc/moe/gpt_oss_router_gemm.cuh
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Adapted from
- * https://github.com/NVIDIA/TensorRT-LLM/blob/v1.3.0rc7/cpp/tensorrt_llm/kernels/tinygemm2/tinygemm2_kernel.cuh
- * Copyright (c) 2025, The vLLM team.
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
- * All rights reserved. SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cuda_bf16.h"
-#include <stdint.h>
-#include <stdio.h>
-#include <vector>
-
-#include "cuda_pipeline.h"
-#include <cuda.h>
-#include <cuda/barrier>
-#include <cuda/std/utility>
-#include <cuda_runtime.h>
-
-using barrier = cuda::barrier<cuda::thread_scope_block>;
-namespace cde = cuda::device::experimental;
-namespace ptx = cuda::ptx;
-
-#define gpuErrChk(ans)                    \
-  {                                       \
-    gpuAssert((ans), __FILE__, __LINE__); \
-  }
-
-inline void gpuAssert(cudaError_t code, char const* file, int line,
-                      bool abort = true) {
-  if (code != cudaSuccess) {
-    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
-            line);
-    if (abort) {
-      throw std::runtime_error(cudaGetErrorString(code));
-    }
-  }
-}
-
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
-__device__ uint64_t gclock64() {
-  unsigned long long int rv;
-  asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(rv));
-  return rv;
-}
-
-__device__ void ldmatrix(__nv_bfloat16 rv[2], uint32_t smem_ptr) {
-  int dst;
-  asm volatile("ldmatrix.sync.aligned.x1.m8n8.shared.b16 {%0}, [%1];\n"
-               : "=r"(dst)
-               : "r"(smem_ptr));
-  int* rvi = reinterpret_cast<int*>(&rv[0]);
-  rvi[0] = dst;
-}
-
-__device__ void ldmatrix2(__nv_bfloat16 rv[4], uint32_t smem_ptr) {
-  int x, y;
-  asm volatile("ldmatrix.sync.aligned.x2.m8n8.shared.b16 {%0, %1}, [%2];\n"
-               : "=r"(x), "=r"(y)
-               : "r"(smem_ptr));
-
-  int* rvi = reinterpret_cast<int*>(&rv[0]);
-  rvi[0] = x;
-  rvi[1] = y;
-}
-
-__device__ void ldmatrix4(__nv_bfloat16 rv[8], uint32_t smem_ptr) {
-  int x, y, z, w;
-  asm volatile(
-      "ldmatrix.sync.aligned.x4.m8n8.shared.b16 {%0, %1, %2, %3}, [%4];"
-      : "=r"(x), "=r"(y), "=r"(z), "=r"(w)
-      : "r"(smem_ptr));
-  int* rvi = reinterpret_cast<int*>(&rv[0]);
-  rvi[0] = x;
-  rvi[1] = y;
-  rvi[2] = z;
-  rvi[3] = w;
-}
-
-__device__ void HMMA_1688(float d[4], __nv_bfloat16 a[4], __nv_bfloat16 b[2],
-                          float c[4]) {
-  uint32_t const* A = reinterpret_cast<uint32_t const*>(&a[0]);
-  uint32_t const* B = reinterpret_cast<uint32_t const*>(&b[0]);
-  float const* C = reinterpret_cast<float const*>(&c[0]);
-  float* D = reinterpret_cast<float*>(&d[0]);
-
-  asm volatile(
-      "mma.sync.aligned.m16n8k8.row.col.f32.bf16.bf16.f32 "
-      "{%0,%1,%2,%3}, {%4,%5}, {%6}, {%7,%8,%9,%10};\n"
-      : "=f"(D[0]), "=f"(D[1]), "=f"(D[2]), "=f"(D[3])
-      : "r"(A[0]), "r"(A[1]), "r"(B[0]), "f"(C[0]), "f"(C[1]), "f"(C[2]),
-        "f"(C[3]));
-}
-
-__device__ void HMMA_16816(float d[4], __nv_bfloat16 a[8], __nv_bfloat16 b[4],
-                           float c[4]) {
-  uint32_t const* A = reinterpret_cast<uint32_t const*>(&a[0]);
-  uint32_t const* B = reinterpret_cast<uint32_t const*>(&b[0]);
-  float const* C = reinterpret_cast<float const*>(&c[0]);
-  float* D = reinterpret_cast<float*>(&d[0]);
-
-  asm volatile(
-      "mma.sync.aligned.m16n8k16.row.col.f32.bf16.bf16.f32 "
-      "{%0,%1,%2,%3}, {%4,%5,%6,%7}, {%8,%9}, {%10,%11,%12,%13};\n"
-      : "=f"(D[0]), "=f"(D[1]), "=f"(D[2]), "=f"(D[3])
-      : "r"(A[0]), "r"(A[1]), "r"(A[2]), "r"(A[3]), "r"(B[0]), "r"(B[1]),
-        "f"(C[0]), "f"(C[1]), "f"(C[2]), "f"(C[3]));
-}
-
-__device__ void bar_wait(uint32_t bar_ptr, int phase) {
-  asm volatile(
-      "{\n"
-      ".reg .pred                P1;\n"
-      "LAB_WAIT:\n"
-      "mbarrier.try_wait.parity.shared::cta.b64 P1, [%0], %1;\n"
-      "@P1                       bra.uni DONE;\n"
-      "bra.uni                   LAB_WAIT;\n"
-      "DONE:\n"
-      "}\n" ::"r"(bar_ptr),
-      "r"(phase));
-}
-
-__device__ bool bar_try_wait(uint32_t bar_ptr, int phase) {
-  uint32_t success;
-  #ifdef INTERNAL
-  asm volatile(".pragma \"set knob DontInsertYield\";\n" : : : "memory");
-  #endif
-  asm volatile(
-      "{\n\t"
-      ".reg .pred P1; \n\t"
-      "mbarrier.try_wait.parity.shared::cta.b64 P1, [%1], %2; \n\t"
-      "selp.b32 %0, 1, 0, P1; \n\t"
-      "}"
-      : "=r"(success)
-      : "r"(bar_ptr), "r"(phase));
-  return success;
-}
-
-__device__ uint32_t elect_one_sync() {
-  uint32_t pred = 0;
-  uint32_t laneid = 0;
-  asm volatile(
-      "{\n"
-      ".reg .b32 %%rx;\n"
-      ".reg .pred %%px;\n"
-      "     elect.sync %%rx|%%px, %2;\n"
-      "@%%px mov.s32 %1, 1;\n"
-      "     mov.s32 %0, %%rx;\n"
-      "}\n"
-      : "+r"(laneid), "+r"(pred)
-      : "r"(0xFFFFFFFF));
-  return pred;
-}
-#endif
-
-struct Profile {
-  uint64_t start;
-  uint64_t weight_load_start;
-  uint64_t act_load_start;
-  uint64_t compute_start;
-  uint64_t complete;
-};
-
-template <int WARP_TILE_M, int TILE_M, int TILE_N, int TILE_K, int STAGES,
-          int STAGE_UNROLL, bool PROFILE>
-__global__ __launch_bounds__(384, 1) void gpt_oss_router_gemm_kernel(
-    __nv_bfloat16* output, __nv_bfloat16* weights, __nv_bfloat16* activations,
-    __nv_bfloat16* bias, int M, int N, int K,
-    const __grid_constant__ CUtensorMap weight_map,
-    const __grid_constant__ CUtensorMap activation_map,
-    Profile* profile = nullptr) {
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
-
-  if (PROFILE && threadIdx.x == 0 && blockIdx.y == 0)
-    profile[blockIdx.x].start = gclock64();
-
-  extern __shared__ __align__(128) char smem[];
-
-  __nv_bfloat16* sh_weights = (__nv_bfloat16*)&smem[0];
-  __nv_bfloat16* sh_activations =
-      (__nv_bfloat16*)&smem[STAGES * STAGE_UNROLL * TILE_M * TILE_K *
-                            sizeof(__nv_bfloat16)];
-
-  #pragma nv_diag_suppress static_var_with_dynamic_init
-  __shared__ barrier bar_wt_ready[STAGES];
-  __shared__ barrier bar_act_ready[STAGES];
-  __shared__ barrier bar_data_consumed[STAGES];
-
-  __shared__ float4 reduction_buffer[128];
-
-  __shared__ nv_bfloat16 sh_bias[TILE_M];
-
-  if (threadIdx.x == 0) {
-    for (int i = 0; i < STAGES; i++) {
-      init(&bar_wt_ready[i], 1);
-      init(&bar_act_ready[i], 1);
-      init(&bar_data_consumed[i], 32);
-    }
-    ptx::fence_proxy_async(ptx::space_shared);
-    asm volatile("prefetch.tensormap [%0];"
-                 :
-                 : "l"(reinterpret_cast<uint64_t>(&weight_map))
-                 : "memory");
-    asm volatile("prefetch.tensormap [%0];"
-                 :
-                 : "l"(reinterpret_cast<uint64_t>(&activation_map))
-                 : "memory");
-  }
-  __syncthreads();
-
-  int warp_id = threadIdx.x / 32;
-  int lane_id = threadIdx.x % 32;
-
-  int phase = 0;
-
-  int mib = blockIdx.x * TILE_M;
-  int ni = blockIdx.y * TILE_N;
-
-  float accum[4];
-  for (int i = 0; i < 4; i++) accum[i] = 0.f;
-
-  int const K_LOOPS_DMA =
-      (K + 4 * TILE_K * STAGE_UNROLL - 1) / (4 * (TILE_K * STAGE_UNROLL));
-  int const K_LOOPS_COMPUTE = K_LOOPS_DMA;
-
-  // Data loading thread
-  if (warp_id >= 4 && elect_one_sync()) {
-    int stage = warp_id % 4;
-
-    bool weight_warp = warp_id < 8;
-    if (!weight_warp) {
-      cudaGridDependencySynchronize();
-      cudaTriggerProgrammaticLaunchCompletion();
-    }
-
-    for (int ki = 0; ki < K_LOOPS_DMA; ki++) {
-      int k = (ki * 4 + (warp_id % 4)) * TILE_K * STAGE_UNROLL;
-
-      uint64_t desc_ptr_wt = reinterpret_cast<uint64_t>(&weight_map);
-      uint64_t desc_ptr_act = reinterpret_cast<uint64_t>(&activation_map);
-
-      uint32_t bar_ptr_wt = __cvta_generic_to_shared(&bar_wt_ready[stage]);
-      uint32_t bar_ptr_act = __cvta_generic_to_shared(&bar_act_ready[stage]);
-      int bytes_wt = TILE_M * TILE_K * sizeof(__nv_bfloat16);
-      int bytes_act = TILE_N * TILE_K * sizeof(__nv_bfloat16);
-
-      bar_wait(__cvta_generic_to_shared(&bar_data_consumed[stage]), phase ^ 1);
-
-      if (weight_warp)
-        asm volatile("mbarrier.arrive.expect_tx.shared.b64 _, [%0], %1;"
-                     :
-                     : "r"(bar_ptr_wt), "r"(STAGE_UNROLL * bytes_wt));
-      if (!weight_warp)
-        asm volatile("mbarrier.arrive.expect_tx.shared.b64 _, [%0], %1;"
-                     :
-                     : "r"(bar_ptr_act), "r"(STAGE_UNROLL * bytes_act));
-
-      if (PROFILE && blockIdx.y == 0 && ki == 0 && weight_warp)
-        profile[blockIdx.x].weight_load_start = gclock64();
-      if (PROFILE && blockIdx.y == 0 && ki == 0 && !weight_warp)
-        profile[blockIdx.x].act_load_start = gclock64();
-
-      for (int i = 0; i < STAGE_UNROLL; i++) {
-        uint32_t smem_ptr_wt = __cvta_generic_to_shared(
-            &sh_weights[(stage * STAGE_UNROLL + i) * TILE_M * TILE_K]);
-        uint32_t crd0 = k + i * TILE_K;
-        uint32_t crd1 = mib;
-        if (weight_warp)
-          asm volatile(
-              "cp.async.bulk.tensor.2d.shared::cta.global.mbarrier::complete_"
-              "tx::bytes [%0], [%1, {%3,%4}], "
-              "[%2];"
-              :
-              : "r"(smem_ptr_wt), "l"(desc_ptr_wt), "r"(bar_ptr_wt), "r"(crd0),
-                "r"(crd1)
-              : "memory");
-
-        uint32_t smem_ptr_act = __cvta_generic_to_shared(
-            &sh_activations[(stage * STAGE_UNROLL + i) * TILE_N * TILE_K]);
-        crd0 = k + i * TILE_K;
-        crd1 = ni;
-        if (!weight_warp)
-          asm volatile(
-              "cp.async.bulk.tensor.2d.shared::cta.global.mbarrier::complete_"
-              "tx::bytes [%0], [%1, {%3,%4}], "
-              "[%2];"
-              :
-              : "r"(smem_ptr_act), "l"(desc_ptr_act), "r"(bar_ptr_act),
-                "r"(crd0), "r"(crd1)
-              : "memory");
-      }
-
-      stage += 4;
-      if (stage >= STAGES) {
-        stage = warp_id % 4;
-        phase ^= 1;
-      }
-    }
-    // Wait for pending loads to be consumed before exiting, to avoid race
-    for (int i = 0; i < (STAGES / 4) - 1; i++) {
-      bar_wait(__cvta_generic_to_shared(&bar_data_consumed[stage]), phase ^ 1);
-      stage += 4;
-      if (stage >= STAGES) {
-        stage = warp_id % 4;
-        phase ^= 1;
-      }
-    }
-  }
-  // Compute threads
-  else if (warp_id < 4) {
-    // Sneak the bias load into the compute warps since they're just waiting for
-    // stuff anyway
-    if (threadIdx.x < TILE_M) sh_bias[threadIdx.x] = bias[mib + threadIdx.x];
-
-    int stage = warp_id;
-
-    int phase = 0;
-    int lane_id_div8 = lane_id / 8;
-    int lane_id_mod8 = lane_id % 8;
-
-    int lane_row_offset_wt = (lane_id_div8 % 2) ? 8 : 0;
-    int lane_col_offset_wt = (lane_id_div8 / 2) ? 1 : 0;
-
-    int row_wt = lane_id_mod8 + lane_row_offset_wt;
-    int row_act = lane_id_mod8;
-
-    int row_offset_wt = (reinterpret_cast<uintptr_t>(sh_weights) / 128) % 8;
-    int row_offset_act = row_offset_wt;
-
-    uint32_t bar_ptr_wt = __cvta_generic_to_shared(&bar_wt_ready[stage]);
-    uint32_t bar_ptr_act = __cvta_generic_to_shared(&bar_act_ready[stage]);
-
-    bool weight_ready = bar_try_wait(bar_ptr_wt, phase);
-    bool act_ready = bar_try_wait(bar_ptr_act, phase);
-
-  #pragma unroll 2
-    for (int ki = 0; ki < K_LOOPS_COMPUTE; ki++) {
-      int next_stage = stage + 4;
-      int next_phase = phase;
-      if (next_stage >= STAGES) {
-        next_stage = warp_id;
-        next_phase ^= 1;
-      }
-
-      while (!weight_ready || !act_ready) {
-        weight_ready = bar_try_wait(bar_ptr_wt, phase);
-        act_ready = bar_try_wait(bar_ptr_act, phase);
-      }
-
-      if (PROFILE && blockIdx.y == 0 && threadIdx.x == 0 && ki == 0)
-        profile[blockIdx.x].compute_start = gclock64();
-
-      if (ki + 1 < K_LOOPS_COMPUTE) {
-        weight_ready = bar_try_wait(
-            __cvta_generic_to_shared(&bar_wt_ready[next_stage]), next_phase);
-        act_ready = bar_try_wait(
-            __cvta_generic_to_shared(&bar_act_ready[next_stage]), next_phase);
-      }
-
-  #pragma unroll
-      for (int su = 0; su < STAGE_UNROLL; su++) {
-        __nv_bfloat16* ptr_weights =
-            &sh_weights[(stage * STAGE_UNROLL + su) * TILE_M * TILE_K];
-        __nv_bfloat16* ptr_act =
-            &sh_activations[(stage * STAGE_UNROLL + su) * TILE_N * TILE_K];
-
-  #pragma unroll
-        for (int kii = 0; kii < TILE_K / 16; kii++) {
-          __nv_bfloat16 a[8];
-          __nv_bfloat16 b[4];
-
-          int col = 2 * kii + lane_col_offset_wt;
-          int col_sw = ((row_wt + row_offset_wt) % 8) ^ col;
-
-          ldmatrix4(a, __cvta_generic_to_shared(
-                           &ptr_weights[row_wt * TILE_K + col_sw * 8]));
-
-          col = 2 * kii + lane_id_div8;
-          col_sw = ((row_act + row_offset_act) % 8) ^ col;
-
-          ldmatrix2(b, __cvta_generic_to_shared(
-                           &ptr_act[row_act * TILE_K + 8 * col_sw]));
-
-          HMMA_16816(accum, a, b, accum);
-        }
-      }
-
-      uint32_t bar_c = __cvta_generic_to_shared(&bar_data_consumed[stage]);
-      asm volatile("mbarrier.arrive.shared::cta.b64 _, [%0];" : : "r"(bar_c));
-
-      stage = next_stage;
-      phase = next_phase;
-    }
-
-    float4 accum4;
-    accum4.x = accum[0];
-    accum4.y = accum[1];
-    accum4.z = accum[2];
-    accum4.w = accum[3];
-    reduction_buffer[threadIdx.x] = accum4;
-
-    __syncthreads();
-
-    if (warp_id == 0) {
-      int mi = mib + warp_id * WARP_TILE_M;
-      int tm = mi + lane_id / 4;
-      int tn = ni + 2 * (lane_id % 4);
-
-      float4 accum1 = reduction_buffer[32 + threadIdx.x];
-      float4 accum2 = reduction_buffer[64 + threadIdx.x];
-      float4 accum3 = reduction_buffer[96 + threadIdx.x];
-
-      accum[0] = accum[0] + accum1.x + accum2.x + accum3.x;
-      accum[1] = accum[1] + accum1.y + accum2.y + accum3.y;
-      accum[2] = accum[2] + accum1.z + accum2.z + accum3.z;
-      accum[3] = accum[3] + accum1.w + accum2.w + accum3.w;
-
-      float bias_lo = __bfloat162float(sh_bias[tm - mib]);
-      float bias_hi = __bfloat162float(sh_bias[tm + 8 - mib]);
-
-      if (tn < N && tm < M)
-        output[tn * M + tm] = __float2bfloat16(accum[0] + bias_lo);
-      if (tn + 1 < N && tm < M)
-        output[(tn + 1) * M + tm] = __float2bfloat16(accum[1] + bias_lo);
-      if (tn < N && tm + 8 < M)
-        output[tn * M + tm + 8] = __float2bfloat16(accum[2] + bias_hi);
-      if (tn + 1 < N && tm + 8 < M)
-        output[(tn + 1) * M + tm + 8] = __float2bfloat16(accum[3] + bias_hi);
-
-      if (PROFILE && blockIdx.y == 0 && threadIdx.x == 0)
-        profile[blockIdx.x].complete = gclock64();
-    }
-  }
-#endif  // end if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
-}
diff --git a/csrc/moe/marlin_moe_wna16/generate_kernels.py b/csrc/moe/marlin_moe_wna16/generate_kernels.py
index 52f266707bb9..6ddda1d51db5 100644
--- a/csrc/moe/marlin_moe_wna16/generate_kernels.py
+++ b/csrc/moe/marlin_moe_wna16/generate_kernels.py
@@ -15,11 +15,11 @@
 for arch in sys.argv[1].split(","):
     arch = arch[: arch.index(".") + 2].replace(".", "")
     arch = int(arch)
-    # only SM89 and SM120 fully support
-    # mma.sync.aligned.m16n8k32.row.col.f32.e4m3.e4m3.f32.
+    # SM89 and the SM12x family (SM120 RTX 5090, SM121 DGX Spark GB10)
+    # fully support mma.sync.aligned.m16n8k32.row.col.f32.e4m3.e4m3.f32.
     # SM90 and SM100 can use this PTX, but it’s simulated
     # with FP16 MMA, so it cannot achieve any acceleration.
-    if arch in [89, 120]:
+    if arch == 89 or arch // 10 == 12:
         SUPPORT_FP8 = True
     if arch >= 80:
         SUPPORT_SM80 = True
@@ -108,6 +108,15 @@
         "thread_m_blocks": THREAD_M_BLOCKS,
         "group_blocks": [2],
     },
+    # MXFP8
+    {
+        "a_type": ["kBFloat16"],
+        "b_type": "kFE4M3fn",
+        "s_type": "kFE8M0fnu",
+        "thread_configs": THREAD_CONFIGS,
+        "thread_m_blocks": THREAD_M_BLOCKS,
+        "group_blocks": [2],
+    },
     # AWQ-INT4 with INT8 activation
     {
         "a_type": ["kS8"],
diff --git a/csrc/moe/marlin_moe_wna16/marlin_template.h b/csrc/moe/marlin_moe_wna16/marlin_template.h
index f5685b898036..9858df94573e 100644
--- a/csrc/moe/marlin_moe_wna16/marlin_template.h
+++ b/csrc/moe/marlin_moe_wna16/marlin_template.h
@@ -343,6 +343,8 @@ __global__ void Marlin(
   if constexpr (b_type == vllm::kFE2M1f) {
     static_assert(s_type == vllm::kFE4M3fn && group_blocks == 1 ||
                   s_type == vllm::kFE8M0fnu && group_blocks == 2);
+  } else if constexpr (b_type == vllm::kFE4M3fn && s_type == vllm::kFE8M0fnu) {
+    static_assert(group_blocks == 2);
   } else if constexpr (std::is_same<scalar_t, nv_bfloat16>::value) {
     static_assert(s_type == vllm::kBFloat16);
   } else if constexpr (std::is_same<scalar_t, half>::value) {
@@ -357,9 +359,10 @@ __global__ void Marlin(
   constexpr bool is_int_type = b_type == vllm::kU4 || b_type == vllm::kU8 ||
                                b_type == vllm::kS4 || b_type == vllm::kS8 ||
                                b_type == vllm::kU4B8 || b_type == vllm::kU8B128;
+  constexpr bool is_8bit_scale = s_type.size_bits() == 8;
   // see comments of dequant.h for more details
   constexpr bool dequant_skip_flop =
-      is_a_8bit || b_type == vllm::kFE4M3fn ||
+      is_a_8bit || (b_type == vllm::kFE4M3fn && !(s_type == vllm::kFE8M0fnu)) ||
       b_type == vllm::kFE2M1f && s_type == vllm::kFE4M3fn ||
       has_zp && !is_zp_float && !std::is_same<scalar_t, nv_bfloat16>::value ||
       has_zp && !is_zp_float && !(b_type == vllm::kU8);
@@ -373,7 +376,7 @@ __global__ void Marlin(
   const int group_size =
       (!has_act_order && group_blocks == -1) ? prob_k : prob_k / num_groups;
   const int scales_expert_stride =
-      prob_n * prob_k / group_size / (b_type == vllm::kFE2M1f ? 16 : 8);
+      prob_n * prob_k / group_size / (is_8bit_scale ? 16 : 8);
   const int zp_expert_stride =
       is_zp_float ? prob_n * prob_k / group_size / 8
                   : prob_n * prob_k / group_size / (pack_factor * 4);
@@ -692,9 +695,8 @@ __global__ void Marlin(
   constexpr int b_sh_wr_iters = b_sh_stage / b_sh_wr_delta;
 
   // Scale sizes/strides without act_order
-  int s_gl_stride = prob_n / (b_type == vllm::kFE2M1f ? 16 : 8);
-  constexpr int s_sh_stride =
-      16 * thread_n_blocks / (b_type == vllm::kFE2M1f ? 16 : 8);
+  int s_gl_stride = prob_n / (is_8bit_scale ? 16 : 8);
+  constexpr int s_sh_stride = 16 * thread_n_blocks / (is_8bit_scale ? 16 : 8);
   constexpr int s_tb_groups =
       !has_act_order && group_blocks != -1 && group_blocks < thread_k_blocks
           ? thread_k_blocks / group_blocks
@@ -1131,7 +1133,7 @@ __global__ void Marlin(
 
           int4* sh_s_stage = sh_s + s_sh_stage * pipe;
 
-          if constexpr (b_type_id != vllm::kFE2M1f.id()) {
+          if constexpr (!is_8bit_scale) {
             reinterpret_cast<int4*>(&frag_s[k % 2])[0] =
                 sh_s_stage[s_sh_rd + cur_group_id * s_sh_stride];
           } else {
@@ -1140,7 +1142,7 @@ __global__ void Marlin(
                     sh_s_stage)[s_sh_rd + cur_group_id * (2 * s_sh_stride)];
           }
         } else if (group_blocks >= b_sh_wr_iters) {
-          if constexpr (b_type_id != vllm::kFE2M1f.id()) {
+          if constexpr (!is_8bit_scale) {
             reinterpret_cast<int4*>(&frag_s[1])[0] =
                 reinterpret_cast<int4*>(&frag_s[0])[0];
           } else {
@@ -1341,7 +1343,7 @@ __global__ void Marlin(
       }
     }
 
-    if constexpr (b_type == vllm::kFE2M1f) {
+    if constexpr (s_type == vllm::kFE4M3fn || s_type == vllm::kFE8M0fnu) {
       int s_quant_0 = reinterpret_cast<int*>(frag_s[k2])[0];
       int s_quant_1 = reinterpret_cast<int*>(frag_s[k2])[1];
 
diff --git a/csrc/moe/marlin_moe_wna16/ops.cu b/csrc/moe/marlin_moe_wna16/ops.cu
index 60681ad930ff..82cba2978b10 100644
--- a/csrc/moe/marlin_moe_wna16/ops.cu
+++ b/csrc/moe/marlin_moe_wna16/ops.cu
@@ -448,8 +448,8 @@ void marlin_mm(const void* A, const void* B, void* C, void* C_tmp, void* b_bias,
                 "FP8 only support Ada Lovelace or newer GPUs.");
     TORCH_CHECK(
         major_capability * 10 + minor_capability == 89 ||
-            major_capability * 10 + minor_capability == 120,
-        "Marlin W4A8-FP8 only support SM89 or SM120 device (It is slower than "
+            major_capability == 12,
+        "Marlin W4A8-FP8 only support SM89 or SM12x device (It is slower than "
         "Marlin W4A16 on other devices).");
   }
 
@@ -599,6 +599,9 @@ torch::Tensor moe_wna16_marlin_gemm(
                   "When b_type = float4_e2m1f, b_scale scalar type must be",
                   "float8_e4m3fn (for NVFP4) or float8_e8m0fnu (for MXFP4).");
     }
+  } else if (b_type_id == vllm::kFE4M3fn.id() &&
+             b_scales.scalar_type() == at::ScalarType::Float8_e8m0fnu) {
+    s_type_id = vllm::kFE8M0fnu.id();
   }
 
   vllm::ScalarType a_type = vllm::ScalarType::from_id(a_type_id);
diff --git a/csrc/moe/moe_align_sum_kernels.cu b/csrc/moe/moe_align_sum_kernels.cu
index b4b3c793b13e..a8fa59b19398 100644
--- a/csrc/moe/moe_align_sum_kernels.cu
+++ b/csrc/moe/moe_align_sum_kernels.cu
@@ -390,7 +390,13 @@ __global__ void moe_lora_align_block_size_kernel(
     int32_t* __restrict__ token_mask, bool has_expert_map) {
   int lora_idx = blockIdx.x / 2;
   int lora_id = lora_ids[lora_idx];
-  if (lora_id == -1 || adapter_enabled[lora_id] == 0) {
+  // Output buffers are indexed by lora_id (in [0, max_loras)). The grid
+  // iterates one extra slot to accommodate the "-1" entry that
+  // active_lora_ids may hold in position 0 for mixed base + LoRA batches;
+  // guard against any other unexpected lora_id >= max_loras to avoid
+  // out-of-bounds writes. This mirrors the `lora_id >= max_loras` guard in
+  // the Triton _fused_moe_lora_kernel.
+  if (lora_id == -1 || lora_id >= max_loras || adapter_enabled[lora_id] == 0) {
     return;
   }
 
@@ -420,10 +426,21 @@ __global__ void lora_count_and_sort_expert_tokens_kernel(
     int32_t* __restrict__ sorted_token_ids, int32_t* __restrict__ cumsum_buffer,
     int32_t* __restrict__ expert_map, size_t numel, int32_t num_experts,
     int32_t max_num_tokens_padded, int32_t topk_num, int32_t* token_mask,
-    int32_t* lora_ids, bool has_expert_map) {
+    int32_t max_loras, int32_t* lora_ids, int32_t* adapter_enabled,
+    bool has_expert_map) {
   int lora_idx = blockIdx.x;
   int lora_id = lora_ids[lora_idx];
-  if (lora_id == -1) {
+  // Same guard rationale as moe_lora_align_block_size_kernel. Additionally
+  // skip disabled adapter slots: moe_lora_align_block_size_kernel early-returns
+  // for them and leaves token_mask[lora_id, :] uninitialized (token_mask is
+  // allocated with torch::empty), so running the sort loop here would traverse
+  // garbage mask bits and pollute this slot's rows of sorted_token_ids and
+  // cumsum_buffer. Downstream consumers already skip disabled slots, so the
+  // pollution is dormant today, but the check keeps behavior symmetric with
+  // the other two align kernels and avoids O(numel) wasted work per disabled
+  // slot. Short-circuit evaluation ensures adapter_enabled is only indexed
+  // after lora_id is confirmed to be in [0, max_loras).
+  if (lora_id == -1 || lora_id >= max_loras || adapter_enabled[lora_id] == 0) {
     return;
   }
 
@@ -446,7 +463,8 @@ __global__ void moe_lora_align_block_size_small_batch_expert_kernel(
     int32_t* token_mask, bool has_expert_map) {
   int lora_idx = blockIdx.x;
   int lora_id = lora_ids[lora_idx];
-  if (lora_id == -1 || adapter_enabled[lora_id] == 0) {
+  // Same guard rationale as moe_lora_align_block_size_kernel.
+  if (lora_id == -1 || lora_id >= max_loras || adapter_enabled[lora_id] == 0) {
     return;
   }
 
@@ -698,7 +716,15 @@ void moe_lora_align_block_size(
                   scalar_t, fill_threads>;
           AT_CUDA_CHECK(VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize(
               (void*)kernel, shared_mem));
-          kernel<<<max_loras, blockDim, shared_mem, stream>>>(
+          // Grid size is (max_loras + 1) because active_lora_ids has length
+          // max_loras + 1: sorted-unique values of token_lora_mapping, which
+          // can include -1 (base-model tokens) in addition to up to max_loras
+          // real LoRA slots. Using max_loras would drop the real LoRA slot
+          // when -1 is present at position 0 and leave output buffers
+          // uninitialized, causing illegal memory accesses in downstream
+          // MoE-LoRA kernels. This mirrors the fix made for the Triton
+          // _fused_moe_lora_kernel grid in vllm-project/vllm#32277.
+          kernel<<<max_loras + 1, blockDim, shared_mem, stream>>>(
               topk_ids.data_ptr<scalar_t>(),
               token_lora_mapping.data_ptr<int32_t>(), block_size,
               expert_map.data_ptr<int32_t>(), num_experts, max_loras,
@@ -722,10 +748,17 @@ void moe_lora_align_block_size(
           auto align_kernel =
               vllm::moe::moe_lora_align_block_size_kernel<scalar_t>;
 
-          // launch two threadblocks for each lora
+          // Launch two threadblocks per LoRA slot, across max_loras + 1 slots
+          // to cover the extra "-1" (base-model tokens) entry that
+          // active_lora_ids may contain in addition to up to max_loras real
+          // LoRA slots. Using max_loras would drop the real LoRA slot when -1
+          // occupies position 0 and leave the output buffers uninitialized,
+          // causing illegal memory accesses downstream. Mirrors the grid fix
+          // applied to _fused_moe_lora_kernel in vllm-project/vllm#32277.
           // blockIdx.x % 2 == 0: counting experts and aligning
           // blockIdx.x % 2 == 1: filling sorted_token_ids
-          align_kernel<<<max_loras * 2, blockDim, shared_mem_size, stream>>>(
+          align_kernel<<<(max_loras + 1) * 2, blockDim, shared_mem_size,
+                         stream>>>(
               topk_ids.data_ptr<scalar_t>(),
               token_lora_mapping.data_ptr<int32_t>(), block_size,
               expert_map.data_ptr<int32_t>(), num_experts, max_loras,
@@ -744,7 +777,10 @@ void moe_lora_align_block_size(
           const int max_blocks = 65535;
           const int actual_blocks = std::min(num_blocks, max_blocks);
 
-          dim3 gridDims(max_loras, actual_blocks);
+          // Same rationale as align_kernel above: iterate over max_loras + 1
+          // slots so the sort kernel processes the real LoRA slot even when
+          // active_lora_ids has -1 at position 0.
+          dim3 gridDims(max_loras + 1, actual_blocks);
           auto sort_kernel =
               vllm::moe::lora_count_and_sort_expert_tokens_kernel<scalar_t>;
 
@@ -753,7 +789,8 @@ void moe_lora_align_block_size(
               sorted_token_ids.data_ptr<int32_t>(), cumsum.data_ptr<int32_t>(),
               expert_map.data_ptr<int32_t>(), topk_ids.numel(), num_experts,
               max_num_tokens_padded, topk_num, token_mask.data_ptr<int32_t>(),
-              lora_ids.data_ptr<int32_t>(), has_expert_map);
+              max_loras, lora_ids.data_ptr<int32_t>(),
+              adapter_enabled.data_ptr<int32_t>(), has_expert_map);
         }
       });
 }
\ No newline at end of file
diff --git a/csrc/moe/moe_ops.h b/csrc/moe/moe_ops.h
index de931dc76467..3a3ea8933083 100644
--- a/csrc/moe/moe_ops.h
+++ b/csrc/moe/moe_ops.h
@@ -5,12 +5,21 @@
 void topk_softmax(torch::Tensor& topk_weights, torch::Tensor& topk_indices,
                   torch::Tensor& token_expert_indices,
                   torch::Tensor& gating_output, bool renormalize,
-                  std::optional<torch::Tensor> bias);
+                  std::optional<torch::Tensor> bias, bool enable_pdl);
 
 void topk_sigmoid(torch::Tensor& topk_weights, torch::Tensor& topk_indices,
                   torch::Tensor& token_expert_indices,
                   torch::Tensor& gating_output, bool renormalize,
-                  std::optional<torch::Tensor> bias);
+                  std::optional<torch::Tensor> bias, bool enable_pdl);
+
+void topk_softplus_sqrt(torch::Tensor& topk_weights,
+                        torch::Tensor& topk_indices,
+                        torch::Tensor& token_expert_indices,
+                        torch::Tensor& gating_output, bool renormalize,
+                        double routed_scaling_factor,
+                        const c10::optional<torch::Tensor>& correction_bias,
+                        const c10::optional<torch::Tensor>& input_ids,
+                        const c10::optional<torch::Tensor>& tid2eid);
 
 void moe_sum(torch::Tensor& input, torch::Tensor& output);
 
@@ -58,10 +67,6 @@ void shuffle_rows(const torch::Tensor& input_tensor,
                   torch::Tensor& output_tensor);
 
 #ifndef USE_ROCM
-// cuBLAS bf16 x bf16 -> fp32 router GEMM (fallback for non-SM90 / batch > 16)
-torch::Tensor router_gemm_bf16_fp32(torch::Tensor const& input,
-                                    torch::Tensor const& weight);
-
 // DeepSeek V3 optimized router GEMM kernel for SM90+
 // Computes output = mat_a @ mat_b.T where:
 //   mat_a: [num_tokens, hidden_dim] in bf16
@@ -71,7 +76,16 @@ torch::Tensor router_gemm_bf16_fp32(torch::Tensor const& input,
 void dsv3_router_gemm(torch::Tensor& output, const torch::Tensor& mat_a,
                       const torch::Tensor& mat_b);
 
-// gpt-oss optimized router GEMM kernel for SM90+
-void gpt_oss_router_gemm(torch::Tensor& output, torch::Tensor input,
-                         torch::Tensor weight, torch::Tensor bias);
+// Fused RMSNorm + router GEMV for DeepSeek V4. Produces both:
+//   normed_x[m,k]      = x[m,k] * rsqrt(mean(x[m]^2) + eps) * norm_weight[k]
+//   logits[m,n]        = sum_k(normed_x[m,k] * gate_weight[n,k])
+// in a single kernel launch. Same dim/dtype constraints as dsv3_router_gemm.
+void dsv4_norm_router_gemm(at::Tensor& logits, at::Tensor& normed_x,
+                           at::Tensor const& x, at::Tensor const& norm_weight,
+                           at::Tensor const& gate_weight, double eps);
+
+// BF16/FP32 x FP32 -> FP32 router GEMM for H=3072, E=256, M<=32
+// (SM90+)
+void fp32_router_gemm(torch::Tensor& output, const torch::Tensor& mat_a,
+                      const torch::Tensor& mat_b);
 #endif
diff --git a/csrc/moe/router_gemm.cu b/csrc/moe/router_gemm.cu
deleted file mode 100644
index a939f8846ff1..000000000000
--- a/csrc/moe/router_gemm.cu
+++ /dev/null
@@ -1,52 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-// bf16 x bf16 -> fp32 router GEMM via cuBLAS.
-// Uses CUBLAS_COMPUTE_32F so bf16 operands accumulate into fp32,
-// matching TRT-LLM's cuBLAS fallback behaviour in dsv3RouterGemmOp.
-
-#include <torch/all.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <cublas_v2.h>
-
-// cuBLAS column-major math for row-major PyTorch tensors:
-//   weight[N,K]_row  lda=K  -> cuBLAS sees (K,N) col-major; CUBLAS_OP_T ->
-//   (N,K) input[M,K]_row   ldb=K  -> cuBLAS sees (K,M) col-major; CUBLAS_OP_N
-//   -> (K,M) out[M,N]_row     ldc=N  -> cuBLAS sees (N,M) col-major (written as
-//   output^T)
-// cuBLAS: C(N,M) = weight(N,K) @ input(K,M)  =>  C^T = output[M,N]
-// params: m=N, n=M, k=K, lda=K (weight), ldb=K (input), ldc=N (output)
-
-torch::Tensor router_gemm_bf16_fp32(torch::Tensor const& input,
-                                    torch::Tensor const& weight) {
-  TORCH_CHECK(input.dtype() == torch::kBFloat16,
-              "router_gemm_bf16_fp32: input must be bfloat16");
-  TORCH_CHECK(weight.dtype() == torch::kBFloat16,
-              "router_gemm_bf16_fp32: weight must be bfloat16");
-  TORCH_CHECK(input.dim() == 2 && weight.dim() == 2,
-              "router_gemm_bf16_fp32: input and weight must be 2-D");
-  TORCH_CHECK(input.size(1) == weight.size(1),
-              "router_gemm_bf16_fp32: inner dimensions must match");
-
-  int64_t const M = input.size(0);
-  int64_t const N = weight.size(0);
-  int64_t const K = input.size(1);
-
-  auto out = torch::empty({M, N}, input.options().dtype(torch::kFloat32));
-
-  cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
-  TORCH_CUDABLAS_CHECK(
-      cublasSetStream(handle, at::cuda::getCurrentCUDAStream()));
-
-  float const alpha = 1.0f;
-  float const beta = 0.0f;
-
-  TORCH_CUDABLAS_CHECK(cublasGemmEx(
-      handle, CUBLAS_OP_T, CUBLAS_OP_N, static_cast<int>(N),
-      static_cast<int>(M), static_cast<int>(K), &alpha, weight.data_ptr(),
-      CUDA_R_16BF, static_cast<int>(K), input.data_ptr(), CUDA_R_16BF,
-      static_cast<int>(K), &beta, out.data_ptr(), CUDA_R_32F,
-      static_cast<int>(N), CUBLAS_COMPUTE_32F, CUBLAS_GEMM_DEFAULT));
-
-  return out;
-}
diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu
index 833036da528e..8bb4f3167265 100644
--- a/csrc/moe/topk_softmax_kernels.cu
+++ b/csrc/moe/topk_softmax_kernels.cu
@@ -62,6 +62,13 @@ __device__ __forceinline__ float toFloat(T value) {
     }
 }
 
+#ifndef USE_ROCM
+inline bool supportsPdlOnCurrentDevice() {
+    const auto* props = at::cuda::getCurrentDeviceProperties();
+    return props != nullptr && props->major >= 9;
+}
+#endif
+
 // Scoring function enums
 enum ScoringFunc {
   SCORING_SOFTMAX = 0, // apply softmax
@@ -126,7 +133,9 @@ __launch_bounds__(TPB) __global__
     {
         const int idx = thread_row_offset + ii;
         const float val = toFloat(input[idx]);
-        const float softmax_val = expf(val - float_max) * normalizing_factor;
+        float softmax_val = expf(val - float_max) * normalizing_factor;
+        // Clamp NaN/Inf to 0 to prevent duplicate expert IDs downstream.
+        if (isnan(softmax_val) || isinf(softmax_val)) softmax_val = 0.f;
         output[idx] = softmax_val;
     }
 }
@@ -147,7 +156,9 @@ __launch_bounds__(TPB) __global__
     {
         const int idx = thread_row_offset + ii;
         const float val = toFloat(input[idx]);
-        const float sigmoid_val = 1.0f / (1.0f + __expf(-val));
+        float sigmoid_val = 1.0f / (1.0f + __expf(-val));
+        // Clamp NaN/Inf to 0 to prevent duplicate expert IDs downstream.
+        if (isnan(sigmoid_val) || isinf(sigmoid_val)) sigmoid_val = 0.f;
         output[idx] = sigmoid_val;
     }
 }
@@ -261,7 +272,7 @@ __launch_bounds__(TPB) __global__ void moeTopK(
 */
 
 template <int VPT, int NUM_EXPERTS, int WARPS_PER_CTA, int BYTES_PER_LDG, int WARP_SIZE_PARAM, typename IndType,
-          typename InputType = float, ScoringFunc SF>
+          typename InputType = float, ScoringFunc SF = SCORING_SOFTMAX, bool ENABLE_PDL = false>
 __launch_bounds__(WARPS_PER_CTA* WARP_SIZE_PARAM) __global__
     void topkGating(const InputType* input, const bool* finished, float* output, const int num_rows, IndType* indices,
         int* source_rows, const int k, const int start_expert, const int end_expert, const bool renormalize,
@@ -314,6 +325,14 @@ __launch_bounds__(WARPS_PER_CTA* WARP_SIZE_PARAM) __global__
     const int thread_row_in_warp = threadIdx.x / THREADS_PER_ROW;
     const int thread_row = warp_base_row + thread_row_in_warp;
 
+
+#if !defined(USE_ROCM) && defined(CUDA_VERSION) && (CUDA_VERSION >= 12000) && \
+    defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    if constexpr (ENABLE_PDL) {
+        asm volatile("griddepcontrol.wait;");
+    }
+#endif
+
     // Threads with indices out of bounds should early exit here.
     if (thread_row >= num_rows)
     {
@@ -442,6 +461,19 @@ __launch_bounds__(WARPS_PER_CTA* WARP_SIZE_PARAM) __global__
       }
     }
 
+    // Fix: clamp NaN/Inf values to 0 to prevent duplicate expert IDs.
+    // NaN gating (from degenerate hidden states in CUDA graph padding) causes
+    // softmax to produce all-NaN, which makes the argmax loop always pick
+    // expert 0 for every top-k slot, producing duplicate expert IDs that
+    // crash FlashInfer's three-step MoE sort.
+    // With 0s, the argmax uses index tie-breaking to pick [0,1,2,...,k-1].
+#pragma unroll
+    for (int ii = 0; ii < VPT; ++ii) {
+      if (isnan(row_chunk[ii]) || isinf(row_chunk[ii])) {
+        row_chunk[ii] = 0.f;
+      }
+    }
+
     static constexpr int COLS_PER_GROUP_LDG = ELTS_PER_LDG * THREADS_PER_ROW;
 
     // If bias is not null, use biased value for selection
@@ -560,6 +592,14 @@ __launch_bounds__(WARPS_PER_CTA* WARP_SIZE_PARAM) __global__
             }
         }
     }
+
+#if !defined(USE_ROCM) && defined(CUDA_VERSION) && (CUDA_VERSION >= 12000) && \
+    defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)
+    if constexpr (ENABLE_PDL) {
+        asm volatile("griddepcontrol.launch_dependents;");
+    }
+#endif
+
 }
 
 namespace detail
@@ -580,7 +620,7 @@ struct TopkConstants
 template <int EXPERTS, int WARPS_PER_TB, int WARP_SIZE_PARAM, int MAX_BYTES_PER_LDG, typename IndType, typename InputType, ScoringFunc SF>
 void topkGatingLauncherHelper(const InputType* input, const bool* finished, float* output, IndType* indices,
     int* source_row, const int num_rows, const int k, const int start_expert, const int end_expert, const bool renormalize,
-    const float* bias, cudaStream_t stream)
+    const float* bias, bool enable_pdl, cudaStream_t stream)
 {
     static constexpr int BYTES_PER_LDG = MIN(MAX_BYTES_PER_LDG, sizeof(InputType) * EXPERTS);
     using Constants = detail::TopkConstants<EXPERTS, BYTES_PER_LDG, WARP_SIZE_PARAM, InputType>;
@@ -590,7 +630,27 @@ void topkGatingLauncherHelper(const InputType* input, const bool* finished, floa
     const int num_blocks = (num_warps + WARPS_PER_TB - 1) / WARPS_PER_TB;
 
     dim3 block_dim(WARP_SIZE_PARAM, WARPS_PER_TB);
-    topkGating<VPT, EXPERTS, WARPS_PER_TB, BYTES_PER_LDG, WARP_SIZE_PARAM, IndType, InputType, SF><<<num_blocks, block_dim, 0, stream>>>(
+#if !defined(USE_ROCM) && defined(CUDA_VERSION) && (CUDA_VERSION >= 12000)
+    if (enable_pdl && supportsPdlOnCurrentDevice()) {
+        cudaLaunchConfig_t config;
+        config.gridDim = num_blocks;
+        config.blockDim = block_dim;
+        config.dynamicSmemBytes = 0;
+        config.stream = stream;
+        cudaLaunchAttribute attrs[1];
+        attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+        attrs[0].val.programmaticStreamSerializationAllowed = 1;
+        config.numAttrs = 1;
+        config.attrs = attrs;
+        cudaLaunchKernelEx(
+            &config,
+            topkGating<VPT, EXPERTS, WARPS_PER_TB, BYTES_PER_LDG, WARP_SIZE_PARAM, IndType, InputType, SF, true>,
+            input, finished, output, num_rows, indices, source_row, k, start_expert, end_expert, renormalize, bias);
+        return;
+    }
+#endif
+    topkGating<VPT, EXPERTS, WARPS_PER_TB, BYTES_PER_LDG, WARP_SIZE_PARAM, IndType, InputType, SF, false>
+        <<<num_blocks, block_dim, 0, stream>>>(
         input, finished, output, num_rows, indices, source_row, k, start_expert, end_expert, renormalize, bias);
 }
 
@@ -602,7 +662,7 @@ void topkGatingLauncherHelper(const InputType* input, const bool* finished, floa
                              IndType, InputType, SF>(                         \
         gating_output, nullptr, topk_weights, topk_indices,                   \
         token_expert_indices, num_tokens, topk, 0, num_experts, renormalize,  \
-        bias, stream);
+        bias, enable_pdl, stream);
 #else
   #define LAUNCH_TOPK(NUM_EXPERTS, WARPS_PER_TB, MAX_BYTES)                    \
     if (WARP_SIZE == 64) {                                                     \
@@ -610,13 +670,13 @@ void topkGatingLauncherHelper(const InputType* input, const bool* finished, floa
                                IndType, InputType, SF>(                        \
           gating_output, nullptr, topk_weights, topk_indices,                  \
           token_expert_indices, num_tokens, topk, 0, num_experts, renormalize, \
-          bias, stream);                                                       \
+          bias, enable_pdl, stream);                                           \
     } else if (WARP_SIZE == 32) {                                              \
       topkGatingLauncherHelper<NUM_EXPERTS, WARPS_PER_TB, 32, MAX_BYTES,       \
                                IndType, InputType, SF>(                        \
           gating_output, nullptr, topk_weights, topk_indices,                  \
           token_expert_indices, num_tokens, topk, 0, num_experts, renormalize, \
-          bias, stream);                                                       \
+          bias, enable_pdl, stream);                                           \
     } else {                                                                   \
       assert(false &&                                                          \
              "Unsupported warp size. Only 32 and 64 are supported for ROCm");  \
@@ -635,6 +695,7 @@ void topkGatingKernelLauncher(
     const int topk,
     const bool renormalize,
     const float* bias,
+    bool enable_pdl,
     cudaStream_t stream) {
     static constexpr int WARPS_PER_TB = 4;
     static constexpr int BYTES_PER_LDG_POWER_OF_2 = 16;
@@ -728,6 +789,7 @@ void dispatch_topk_launch(
     torch::Tensor& softmax_workspace,
     int num_tokens, int num_experts, int topk, bool renormalize,
     std::optional<torch::Tensor> bias,
+    bool enable_pdl,
     cudaStream_t stream)
  {
     const float* bias_ptr = nullptr;
@@ -748,7 +810,7 @@ void dispatch_topk_launch(
             token_expert_indices.data_ptr<int>(),
             softmax_workspace.data_ptr<float>(),
             num_tokens, num_experts, topk, renormalize,
-            bias_ptr, stream);
+            bias_ptr, enable_pdl, stream);
     } else if (topk_indices.scalar_type() == at::ScalarType::UInt32) {
         vllm::moe::topkGatingKernelLauncher<uint32_t, ComputeType, SF>(
             reinterpret_cast<const ComputeType*>(gating_output.data_ptr()),
@@ -757,7 +819,7 @@ void dispatch_topk_launch(
             token_expert_indices.data_ptr<int>(),
             softmax_workspace.data_ptr<float>(),
             num_tokens, num_experts, topk, renormalize,
-            bias_ptr, stream);
+            bias_ptr, enable_pdl, stream);
     } else {
         TORCH_CHECK(topk_indices.scalar_type() == at::ScalarType::Long);
         vllm::moe::topkGatingKernelLauncher<int64_t, ComputeType, SF>(
@@ -767,7 +829,7 @@ void dispatch_topk_launch(
             token_expert_indices.data_ptr<int>(),
             softmax_workspace.data_ptr<float>(),
             num_tokens, num_experts, topk, renormalize,
-            bias_ptr, stream);
+            bias_ptr, enable_pdl, stream);
     }
 }
 
@@ -777,7 +839,8 @@ void topk_softmax(
     torch::Tensor& token_expert_indices,        // [num_tokens, topk]
     torch::Tensor& gating_output,               // [num_tokens, num_experts]
     bool renormalize,
-    std::optional<torch::Tensor> bias)
+    std::optional<torch::Tensor> bias,
+    bool enable_pdl)
 {
     const int num_experts = gating_output.size(-1);
     const auto num_tokens = gating_output.numel() / num_experts;
@@ -795,15 +858,15 @@ void topk_softmax(
     if (gating_output.scalar_type() == at::ScalarType::Float) {
         dispatch_topk_launch<float, vllm::moe::SCORING_SOFTMAX>(gating_output, topk_weights, topk_indices,
             token_expert_indices, softmax_workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else if (gating_output.scalar_type() == at::ScalarType::Half) {
         dispatch_topk_launch<__half, vllm::moe::SCORING_SOFTMAX>(gating_output, topk_weights, topk_indices,
             token_expert_indices, softmax_workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else if (gating_output.scalar_type() == at::ScalarType::BFloat16) {
         dispatch_topk_launch<__nv_bfloat16, vllm::moe::SCORING_SOFTMAX>(gating_output, topk_weights, topk_indices,
             token_expert_indices, softmax_workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else {
         TORCH_CHECK(false, "Unsupported gating_output data type: ", gating_output.scalar_type());
     }
@@ -815,7 +878,8 @@ void topk_sigmoid(
     torch::Tensor& token_expert_indices,        // [num_tokens, topk]
     torch::Tensor& gating_output,               // [num_tokens, num_experts]
     bool renormalize,
-    std::optional<torch::Tensor> bias)
+    std::optional<torch::Tensor> bias,
+    bool enable_pdl)
 {
     const int num_experts = gating_output.size(-1);
     const auto num_tokens = gating_output.numel() / num_experts;
@@ -833,15 +897,15 @@ void topk_sigmoid(
     if (gating_output.scalar_type() == at::ScalarType::Float) {
         dispatch_topk_launch<float, vllm::moe::SCORING_SIGMOID>(gating_output, topk_weights, topk_indices,
             token_expert_indices, workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else if (gating_output.scalar_type() == at::ScalarType::Half) {
         dispatch_topk_launch<__half, vllm::moe::SCORING_SIGMOID>(gating_output, topk_weights, topk_indices,
             token_expert_indices, workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else if (gating_output.scalar_type() == at::ScalarType::BFloat16) {
         dispatch_topk_launch<__nv_bfloat16, vllm::moe::SCORING_SIGMOID>(gating_output, topk_weights, topk_indices,
             token_expert_indices, workspace, num_tokens, num_experts, topk, renormalize,
-            bias, stream);
+            bias, enable_pdl, stream);
     } else {
         TORCH_CHECK(false, "Unsupported gating_output data type: ", gating_output.scalar_type());
     }
diff --git a/csrc/moe/topk_softplus_sqrt_kernels.cu b/csrc/moe/topk_softplus_sqrt_kernels.cu
new file mode 100644
index 000000000000..d5bb8edadc67
--- /dev/null
+++ b/csrc/moe/topk_softplus_sqrt_kernels.cu
@@ -0,0 +1,727 @@
+/*
+ * Adapted from
+ * https://github.com/NVIDIA/TensorRT-LLM/blob/v0.7.1/cpp/tensorrt_llm/kernels/mixtureOfExperts/moe_kernels.cu
+ * Copyright (c) 2024, The vLLM team.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION &
+ * AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <type_traits>
+#include <torch/all.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include "../cuda_compat.h"
+#include "../cub_helpers.h"
+#ifndef USE_ROCM
+  #include <cuda_bf16.h>
+  #include <cuda_fp16.h>
+#else
+  #include <hip/hip_bf16.h>
+  #include <hip/hip_fp16.h>
+typedef __hip_bfloat16 __nv_bfloat16;
+typedef __hip_bfloat162 __nv_bfloat162;
+#endif
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+namespace vllm {
+namespace moe {
+
+/// Aligned array type
+template <typename T,
+          /// Number of elements in the array
+          int N,
+          /// Alignment requirement in bytes
+          int Alignment = sizeof(T) * N>
+struct alignas(Alignment) AlignedArray {
+  T data[N];
+};
+
+template <typename T>
+__device__ __forceinline__ float toFloat(T value) {
+  if constexpr (std::is_same_v<T, float>) {
+    return value;
+  } else if constexpr (std::is_same_v<T, __nv_bfloat16>) {
+    return __bfloat162float(value);
+  } else if constexpr (std::is_same_v<T, __half>) {
+    return __half2float(value);
+  }
+}
+
+// ====================== TopK softplus_sqrt things
+// ===============================
+
+/*
+  A Top-K gating softplus_sqrt written to exploit when the number of experts in
+  the MoE layers are a small power of 2. This allows us to cleanly share the
+  rows among the threads in a single warp and eliminate communication between
+  warps (so no need to use shared mem).
+
+  It fuses the sigmoid, max and argmax into a single kernel.
+
+  Limitations:
+  1) This implementation is optimized for when the number of experts is a small
+  power of 2. Additionally it also supports when number of experts is multiple
+  of 64 which is still faster than the computing sigmoid and topK separately
+  (only tested on CUDA yet). 2) This implementation assumes k is small, but will
+  work for any k.
+*/
+
+template <int VPT, int NUM_EXPERTS, int WARPS_PER_CTA, int BYTES_PER_LDG,
+          int WARP_SIZE_PARAM, bool USE_HASH, typename IndType,
+          typename InputType = float>
+__launch_bounds__(WARPS_PER_CTA* WARP_SIZE_PARAM) __global__
+    void topkGatingSoftplusSqrt(
+        const InputType* input, const bool* finished, float* output,
+        const int num_rows, IndType* indices, int* source_rows, const int k,
+        const int start_expert, const int end_expert, const bool renormalize,
+        double routed_scaling_factor, const float* correction_bias,
+        const IndType* input_ids, const IndType* tid2eid) {
+  static_assert(std::is_same_v<InputType, float> ||
+                    std::is_same_v<InputType, __nv_bfloat16> ||
+                    std::is_same_v<InputType, __half>,
+                "InputType must be float, __nv_bfloat16, or __half");
+
+  // We begin by enforcing compile time assertions and setting up compile time
+  // constants.
+  static_assert(BYTES_PER_LDG == (BYTES_PER_LDG & -BYTES_PER_LDG),
+                "BYTES_PER_LDG must be power of 2");
+  static_assert(BYTES_PER_LDG <= 16, "BYTES_PER_LDG must be leq 16");
+
+  // Number of bytes each thread pulls in per load
+  static constexpr int ELTS_PER_LDG = BYTES_PER_LDG / sizeof(InputType);
+  static constexpr int ELTS_PER_ROW = NUM_EXPERTS;
+  static constexpr int THREADS_PER_ROW = ELTS_PER_ROW / VPT;
+  static constexpr int LDG_PER_THREAD = VPT / ELTS_PER_LDG;
+
+  if constexpr (std::is_same_v<InputType, __nv_bfloat16> ||
+                std::is_same_v<InputType, __half>) {
+    static_assert(ELTS_PER_LDG == 1 || ELTS_PER_LDG % 2 == 0,
+                  "ELTS_PER_LDG must be 1 or even for 16-bit conversion");
+  }
+
+  // Restrictions based on previous section.
+  static_assert(
+      VPT % ELTS_PER_LDG == 0,
+      "The elements per thread must be a multiple of the elements per ldg");
+  static_assert(WARP_SIZE_PARAM % THREADS_PER_ROW == 0,
+                "The threads per row must cleanly divide the threads per warp");
+  static_assert(THREADS_PER_ROW == (THREADS_PER_ROW & -THREADS_PER_ROW),
+                "THREADS_PER_ROW must be power of 2");
+  static_assert(THREADS_PER_ROW <= WARP_SIZE_PARAM,
+                "THREADS_PER_ROW can be at most warp size");
+
+  // We have NUM_EXPERTS elements per row. We specialize for small #experts
+  static constexpr int ELTS_PER_WARP = WARP_SIZE_PARAM * VPT;
+  static constexpr int ROWS_PER_WARP = ELTS_PER_WARP / ELTS_PER_ROW;
+  static constexpr int ROWS_PER_CTA = WARPS_PER_CTA * ROWS_PER_WARP;
+
+  // Restrictions for previous section.
+  static_assert(ELTS_PER_WARP % ELTS_PER_ROW == 0,
+                "The elts per row must cleanly divide the total elt per warp");
+
+  // ===================== From this point, we finally start computing run-time
+  // variables. ========================
+
+  // Compute CTA and warp rows. We pack multiple rows into a single warp, and a
+  // block contains WARPS_PER_CTA warps. This, each block processes a chunk of
+  // rows. We start by computing the start row for each block.
+  const int cta_base_row = blockIdx.x * ROWS_PER_CTA;
+
+  // Now, using the base row per thread block, we compute the base row per warp.
+  const int warp_base_row = cta_base_row + threadIdx.y * ROWS_PER_WARP;
+
+  // The threads in a warp are split into sub-groups that will work on a row.
+  // We compute row offset for each thread sub-group
+  const int thread_row_in_warp = threadIdx.x / THREADS_PER_ROW;
+  const int thread_row = warp_base_row + thread_row_in_warp;
+
+  // Threads with indices out of bounds should early exit here.
+  if (thread_row >= num_rows) {
+    return;
+  }
+  const bool row_is_active = finished ? !finished[thread_row] : true;
+
+  // We finally start setting up the read pointers for each thread. First, each
+  // thread jumps to the start of the row it will read.
+  const InputType* thread_row_ptr = input + thread_row * ELTS_PER_ROW;
+
+  // Now, we compute the group each thread belong to in order to determine the
+  // first column to start loads.
+  const int thread_group_idx = threadIdx.x % THREADS_PER_ROW;
+  const int first_elt_read_by_thread = thread_group_idx * ELTS_PER_LDG;
+  const InputType* thread_read_ptr = thread_row_ptr + first_elt_read_by_thread;
+
+  // Finally, we pull in the data from global mem
+  float row_chunk[VPT];
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+  asm volatile("griddepcontrol.wait;");
+#endif
+
+  // NOTE(zhuhaoran): dispatch different input types loading, BF16/FP16 convert
+  // to float
+  if constexpr (std::is_same_v<InputType, float>) {
+    using VecType = AlignedArray<float, ELTS_PER_LDG>;
+    VecType* row_chunk_vec_ptr = reinterpret_cast<VecType*>(&row_chunk);
+    const VecType* vec_thread_read_ptr =
+        reinterpret_cast<const VecType*>(thread_read_ptr);
+#pragma unroll
+    for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+      row_chunk_vec_ptr[ii] = vec_thread_read_ptr[ii * THREADS_PER_ROW];
+    }
+  } else if constexpr (std::is_same_v<InputType, __nv_bfloat16>) {
+    if constexpr (ELTS_PER_LDG >= 2) {
+      using VecType = AlignedArray<__nv_bfloat16, ELTS_PER_LDG>;
+      float2* row_chunk_f2 = reinterpret_cast<float2*>(row_chunk);
+      const VecType* vec_thread_read_ptr =
+          reinterpret_cast<const VecType*>(thread_read_ptr);
+#pragma unroll
+      for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+        VecType vec = vec_thread_read_ptr[ii * THREADS_PER_ROW];
+        int base_idx_f2 = ii * ELTS_PER_LDG / 2;
+#pragma unroll
+        for (int jj = 0; jj < ELTS_PER_LDG / 2; ++jj) {
+          row_chunk_f2[base_idx_f2 + jj] = __bfloat1622float2(
+              *reinterpret_cast<const __nv_bfloat162*>(vec.data + jj * 2));
+        }
+      }
+    } else {  // ELTS_PER_LDG == 1
+#pragma unroll
+      for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+        const __nv_bfloat16* scalar_ptr =
+            thread_read_ptr + ii * THREADS_PER_ROW;
+        row_chunk[ii] = __bfloat162float(*scalar_ptr);
+      }
+    }
+  } else if constexpr (std::is_same_v<InputType, __half>) {
+    if constexpr (ELTS_PER_LDG >= 2) {
+      using VecType = AlignedArray<__half, ELTS_PER_LDG>;
+      float2* row_chunk_f2 = reinterpret_cast<float2*>(row_chunk);
+      const VecType* vec_thread_read_ptr =
+          reinterpret_cast<const VecType*>(thread_read_ptr);
+#pragma unroll
+      for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+        VecType vec = vec_thread_read_ptr[ii * THREADS_PER_ROW];
+        int base_idx_f2 = ii * ELTS_PER_LDG / 2;
+#pragma unroll
+        for (int jj = 0; jj < ELTS_PER_LDG / 2; ++jj) {
+          row_chunk_f2[base_idx_f2 + jj] = __half22float2(
+              *reinterpret_cast<const __half2*>(vec.data + jj * 2));
+        }
+      }
+    } else {  // ELTS_PER_LDG == 1
+#pragma unroll
+      for (int ii = 0; ii < LDG_PER_THREAD; ++ii) {
+        const __half* scalar_ptr = thread_read_ptr + ii * THREADS_PER_ROW;
+        row_chunk[ii] = __half2float(*scalar_ptr);
+      }
+    }
+  }
+  constexpr float threshold = 20.0f;
+  constexpr float beta = 1.0f;
+
+  // Hash MoE path: indices are predetermined from lookup table
+  if constexpr (USE_HASH) {
+    const IndType token_id = input_ids[thread_row];
+    const IndType* expert_indices_for_token = tid2eid + token_id * k;
+#pragma unroll
+    for (int ii = 0; ii < VPT; ++ii) {
+      float val = row_chunk[ii];
+      float val_b = val * beta;
+      val = (val_b > threshold) ? val : (__logf(1.0f + __expf(val_b))) / beta;
+      row_chunk[ii] = sqrtf(val);
+    }
+    float selected_sum = 0.f;
+#pragma unroll
+    for (int k_idx = 0; k_idx < k; ++k_idx) {
+      const int expert = expert_indices_for_token[k_idx];
+      const int idx = k * thread_row + k_idx;
+      for (int ii = 0; ii < VPT; ++ii) {
+        const int group_id = ii / ELTS_PER_LDG;
+        const int local_id = ii % ELTS_PER_LDG;
+        const int expert_idx = first_elt_read_by_thread +
+                               group_id * THREADS_PER_ROW * ELTS_PER_LDG +
+                               local_id;
+        if (expert == expert_idx) {
+          indices[idx] = expert;
+          selected_sum += row_chunk[ii];
+          break;
+        }
+      }
+    }
+    // Compute per-thread scale (using warp reduction when renormalizing).
+    // THREADS_PER_ROW-parameterized butterfly works for both warp sizes (32
+    // on CUDA, 64 on ROCm CDNA) and any THREADS_PER_ROW the dispatch picks.
+    if (renormalize) {
+#pragma unroll
+      for (int mask = THREADS_PER_ROW / 2; mask > 0; mask /= 2) {
+        selected_sum +=
+            VLLM_SHFL_XOR_SYNC_WIDTH(selected_sum, mask, THREADS_PER_ROW);
+      }
+    }
+    float scale = static_cast<float>(routed_scaling_factor);
+    if (renormalize) {
+      const float denom = selected_sum > 0.f ? selected_sum : 1.f;
+      scale /= denom;
+    }
+
+#pragma unroll
+    for (int k_idx = 0; k_idx < k; ++k_idx) {
+      const int expert = expert_indices_for_token[k_idx];
+      const int idx = k * thread_row + k_idx;
+      for (int ii = 0; ii < VPT; ++ii) {
+        const int group_id = ii / ELTS_PER_LDG;
+        const int local_id = ii % ELTS_PER_LDG;
+        const int expert_idx = first_elt_read_by_thread +
+                               group_id * THREADS_PER_ROW * ELTS_PER_LDG +
+                               local_id;
+        if (expert == expert_idx) {
+          output[idx] = row_chunk[ii] * scale;
+          break;
+        }
+      }
+    }
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+    asm volatile("griddepcontrol.launch_dependents;");
+#endif
+    return;
+  } else {
+#pragma unroll
+    for (int ii = 0; ii < VPT; ++ii) {
+      float val = row_chunk[ii];
+      float val_b = val * beta;
+      // Compute softplus: log(1 + exp(val)) with numerical stability
+      // When val > threshold, softplus(x) ≈ x to avoid exp overflow
+      val = (val_b > threshold) ? val : (__logf(1.0f + __expf(val_b))) / beta;
+      val = sqrtf(val);
+      if (correction_bias) {
+        const int group_id = ii / ELTS_PER_LDG;
+        const int local_id = ii % ELTS_PER_LDG;
+        const int expert_idx = first_elt_read_by_thread +
+                               group_id * THREADS_PER_ROW * ELTS_PER_LDG +
+                               local_id;
+        val = val + correction_bias[expert_idx];
+      }
+      row_chunk[ii] = val;
+    }
+
+    // Original TopK path: find top-k experts by score
+    // Now, sigmoid_res contains the sigmoid of the row chunk. Now, I want to
+    // find the topk elements in each row, along with the max index.
+    int start_col = first_elt_read_by_thread;
+    static constexpr int COLS_PER_GROUP_LDG = ELTS_PER_LDG * THREADS_PER_ROW;
+
+    float selected_sum = 0.f;
+    for (int k_idx = 0; k_idx < k; ++k_idx) {
+      // First, each thread does the local argmax
+      float max_val = row_chunk[0];
+      int expert = start_col;
+#pragma unroll
+      for (int ldg = 0, col = start_col; ldg < LDG_PER_THREAD;
+           ++ldg, col += COLS_PER_GROUP_LDG) {
+#pragma unroll
+        for (int ii = 0; ii < ELTS_PER_LDG; ++ii) {
+          float val = row_chunk[ldg * ELTS_PER_LDG + ii];
+
+          // No check on the experts here since columns with the smallest index
+          // are processed first and only updated if > (not >=)
+          if (val > max_val) {
+            max_val = val;
+            expert = col + ii;
+          }
+        }
+      }
+
+// Now, we perform the argmax reduce. We use the butterfly pattern so threads
+// reach consensus about the max. This will be useful for K > 1 so that the
+// threads can agree on "who" had the max value. That thread can then blank out
+// their max with -inf and the warp can run more iterations...
+#pragma unroll
+      for (int mask = THREADS_PER_ROW / 2; mask > 0; mask /= 2) {
+        float other_max =
+            VLLM_SHFL_XOR_SYNC_WIDTH(max_val, mask, THREADS_PER_ROW);
+        int other_expert =
+            VLLM_SHFL_XOR_SYNC_WIDTH(expert, mask, THREADS_PER_ROW);
+
+        // We want lower indices to "win" in every thread so we break ties this
+        // way
+        if (other_max > max_val ||
+            (other_max == max_val && other_expert < expert)) {
+          max_val = other_max;
+          expert = other_expert;
+        }
+      }
+
+      // Write the max for this k iteration to global memory.
+      if (thread_group_idx == 0) {
+        // Add a guard to ignore experts not included by this node
+        const bool node_uses_expert =
+            expert >= start_expert && expert < end_expert;
+        const bool should_process_row = row_is_active && node_uses_expert;
+
+        // The lead thread from each sub-group will write out the final results
+        // to global memory. (This will be a single) thread per row of the
+        // input/output matrices.
+        const int idx = k * thread_row + k_idx;
+        if (correction_bias != nullptr) {
+          max_val -= correction_bias[expert];
+        }
+        output[idx] = max_val;
+        indices[idx] =
+            should_process_row ? (expert - start_expert) : NUM_EXPERTS;
+        source_rows[idx] = k_idx * num_rows + thread_row;
+        if (renormalize) {
+          selected_sum += max_val;
+        }
+      }
+
+      // Finally, we clear the value in the thread with the current max if there
+      // is another iteration to run.
+      if (k_idx + 1 < k) {
+        const int ldg_group_for_expert = expert / COLS_PER_GROUP_LDG;
+        const int thread_to_clear_in_group =
+            (expert / ELTS_PER_LDG) % THREADS_PER_ROW;
+
+        // Only the thread in the group which produced the max will reset the
+        // "winning" value to -inf.
+        if (thread_group_idx == thread_to_clear_in_group) {
+          const int offset_for_expert = expert % ELTS_PER_LDG;
+          // Safe to set to any negative value since row_chunk values must be
+          // between 0 and 1.
+          row_chunk[ldg_group_for_expert * ELTS_PER_LDG + offset_for_expert] =
+              -10000.f;
+        }
+      }
+    }
+
+    // Apply renormalization and routed scaling factor to final weights.
+    if (thread_group_idx == 0) {
+      float scale = static_cast<float>(routed_scaling_factor);
+      if (renormalize) {
+        const float denom = selected_sum > 0.f ? selected_sum : 1.f;
+        scale /= denom;
+      }
+      for (int k_idx = 0; k_idx < k; ++k_idx) {
+        const int idx = k * thread_row + k_idx;
+        output[idx] = output[idx] * scale;
+      }
+    }
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+    asm volatile("griddepcontrol.launch_dependents;");
+#endif
+  }
+}
+
+namespace detail {
+// Constructs some constants needed to partition the work across threads at
+// compile time.
+template <int EXPERTS, int BYTES_PER_LDG, int WARP_SIZE_PARAM,
+          typename InputType>
+struct TopkConstants {
+  static constexpr int ELTS_PER_LDG = BYTES_PER_LDG / sizeof(InputType);
+  static_assert(EXPERTS / (ELTS_PER_LDG * WARP_SIZE_PARAM) == 0 ||
+                    EXPERTS % (ELTS_PER_LDG * WARP_SIZE_PARAM) == 0,
+                "");
+  static constexpr int VECs_PER_THREAD =
+      MAX(1, EXPERTS / (ELTS_PER_LDG * WARP_SIZE_PARAM));
+  static constexpr int VPT = VECs_PER_THREAD * ELTS_PER_LDG;
+  static constexpr int THREADS_PER_ROW = EXPERTS / VPT;
+  static const int ROWS_PER_WARP = WARP_SIZE_PARAM / THREADS_PER_ROW;
+};
+}  // namespace detail
+
+#define DISPATCH_HASH(use_hash, USE_HASH, ...)                                 \
+  if (use_hash) {                                                              \
+    const bool USE_HASH = true;                                                \
+    static_assert(USE_HASH == true, "USE_HASH must be compile-time constant"); \
+    __VA_ARGS__                                                                \
+  } else {                                                                     \
+    const bool USE_HASH = false;                                               \
+    static_assert(USE_HASH == false,                                           \
+                  "USE_HASH must be compile-time constant");                   \
+    __VA_ARGS__                                                                \
+  }
+
+template <int EXPERTS, int WARPS_PER_TB, int WARP_SIZE_PARAM,
+          int MAX_BYTES_PER_LDG, typename IndType, typename InputType>
+void topkGatingSoftplusSqrtLauncherHelper(
+    const InputType* input, const bool* finished, float* output,
+    IndType* indices, int* source_row, const int num_rows, const int k,
+    const int start_expert, const int end_expert, const bool renormalize,
+    double routed_scaling_factor, const float* correction_bias,
+    const bool use_hash, const IndType* input_ids, const IndType* tid2eid,
+    cudaStream_t stream) {
+  static constexpr int BYTES_PER_LDG =
+      MIN(MAX_BYTES_PER_LDG, sizeof(InputType) * EXPERTS);
+  using Constants =
+      detail::TopkConstants<EXPERTS, BYTES_PER_LDG, WARP_SIZE_PARAM, InputType>;
+  static constexpr int VPT = Constants::VPT;
+  static constexpr int ROWS_PER_WARP = Constants::ROWS_PER_WARP;
+  const int num_warps = (num_rows + ROWS_PER_WARP - 1) / ROWS_PER_WARP;
+  const int num_blocks = (num_warps + WARPS_PER_TB - 1) / WARPS_PER_TB;
+  dim3 block_dim(WARP_SIZE_PARAM, WARPS_PER_TB);
+  DISPATCH_HASH(use_hash, USE_HASH, {
+    auto* kernel =
+        &topkGatingSoftplusSqrt<VPT, EXPERTS, WARPS_PER_TB, BYTES_PER_LDG,
+                                WARP_SIZE_PARAM, USE_HASH, IndType, InputType>;
+#ifndef USE_ROCM
+    cudaLaunchConfig_t config = {};
+    config.gridDim = num_blocks;
+    config.blockDim = block_dim;
+    config.dynamicSmemBytes = 0;
+    config.stream = stream;
+    cudaLaunchAttribute attrs[1];
+    attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
+    attrs[0].val.programmaticStreamSerializationAllowed = 1;
+    config.numAttrs = 1;
+    config.attrs = attrs;
+    cudaLaunchKernelEx(&config, kernel, input, finished, output, num_rows,
+                       indices, source_row, k, start_expert, end_expert,
+                       renormalize, routed_scaling_factor, correction_bias,
+                       input_ids, tid2eid);
+#else
+    kernel<<<num_blocks, block_dim, 0, stream>>>(
+        input, finished, output, num_rows, indices, source_row, k, start_expert,
+        end_expert, renormalize, routed_scaling_factor, correction_bias,
+        input_ids, tid2eid);
+#endif
+  })
+}
+
+#ifndef USE_ROCM
+  #define LAUNCH_SOFTPLUS_SQRT(NUM_EXPERTS, WARPS_PER_TB, MAX_BYTES)           \
+    static_assert(WARP_SIZE == 32,                                             \
+                  "Unsupported warp size. Only 32 is supported for CUDA");     \
+    topkGatingSoftplusSqrtLauncherHelper<NUM_EXPERTS, WARPS_PER_TB, WARP_SIZE, \
+                                         MAX_BYTES>(                           \
+        gating_output, nullptr, topk_weights, topk_indices,                    \
+        token_expert_indices, num_tokens, topk, 0, num_experts, renormalize,   \
+        routed_scaling_factor, correction_bias, use_hash, input_ids, tid2eid,  \
+        stream);
+#else
+  #define LAUNCH_SOFTPLUS_SQRT(NUM_EXPERTS, WARPS_PER_TB, MAX_BYTES)           \
+    if (WARP_SIZE == 64) {                                                     \
+      topkGatingSoftplusSqrtLauncherHelper<NUM_EXPERTS, WARPS_PER_TB, 64,      \
+                                           MAX_BYTES>(                         \
+          gating_output, nullptr, topk_weights, topk_indices,                  \
+          token_expert_indices, num_tokens, topk, 0, num_experts, renormalize, \
+          routed_scaling_factor, correction_bias, use_hash, input_ids,         \
+          tid2eid, stream);                                                    \
+    } else if (WARP_SIZE == 32) {                                              \
+      topkGatingSoftplusSqrtLauncherHelper<NUM_EXPERTS, WARPS_PER_TB, 32,      \
+                                           MAX_BYTES>(                         \
+          gating_output, nullptr, topk_weights, topk_indices,                  \
+          token_expert_indices, num_tokens, topk, 0, num_experts, renormalize, \
+          routed_scaling_factor, correction_bias, use_hash, input_ids,         \
+          tid2eid, stream);                                                    \
+    } else {                                                                   \
+      assert(false &&                                                          \
+             "Unsupported warp size. Only 32 and 64 are supported for ROCm");  \
+    }
+#endif
+
+template <typename IndType, typename InputType>
+void topkGatingSoftplusSqrtKernelLauncher(
+    const InputType* gating_output, float* topk_weights, IndType* topk_indices,
+    int* token_expert_indices, const int num_tokens, const int num_experts,
+    const int topk, const bool renormalize, double routed_scaling_factor,
+    const float* correction_bias, const bool use_hash, const IndType* input_ids,
+    const IndType* tid2eid, cudaStream_t stream) {
+  static constexpr int WARPS_PER_TB = 4;
+  static constexpr int BYTES_PER_LDG_POWER_OF_2 = 16;
+  // for bfloat16 dtype, we need 4 bytes loading to make sure num_experts
+  // elements can be loaded by a warp
+  static constexpr int BYTES_PER_LDG_MULTIPLE_64 =
+      (std::is_same_v<InputType, __nv_bfloat16> ||
+       std::is_same_v<InputType, __half>)
+          ? 4
+          : 8;
+  // Narrower LDG (ELTS_PER_LDG=1) used by 192/320/448/576 on ROCm WARP_SIZE=64
+  // where ELTS_PER_LDG=2 fails the EXPERTS%(ELTS_PER_LDG*WARP_SIZE)==0 check.
+  // On CUDA WARP_SIZE=32 the wider LDG already aligns, so the alias collapses
+  // back to BYTES_PER_LDG_MULTIPLE_64 — no behavioral change for CUDA.
+#ifdef USE_ROCM
+  static constexpr int BYTES_PER_LDG_MULTIPLE_64_NARROW =
+      (std::is_same_v<InputType, __nv_bfloat16> ||
+       std::is_same_v<InputType, __half>)
+          ? 2
+          : 4;
+#else
+  static constexpr int BYTES_PER_LDG_MULTIPLE_64_NARROW =
+      BYTES_PER_LDG_MULTIPLE_64;
+#endif
+  switch (num_experts) {
+    case 1:
+      LAUNCH_SOFTPLUS_SQRT(1, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 2:
+      LAUNCH_SOFTPLUS_SQRT(2, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 4:
+      LAUNCH_SOFTPLUS_SQRT(4, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 8:
+      LAUNCH_SOFTPLUS_SQRT(8, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 16:
+      LAUNCH_SOFTPLUS_SQRT(16, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 32:
+      LAUNCH_SOFTPLUS_SQRT(32, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 64:
+      LAUNCH_SOFTPLUS_SQRT(64, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 128:
+      LAUNCH_SOFTPLUS_SQRT(128, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 256:
+      LAUNCH_SOFTPLUS_SQRT(256, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+    case 512:
+      LAUNCH_SOFTPLUS_SQRT(512, WARPS_PER_TB, BYTES_PER_LDG_POWER_OF_2);
+      break;
+      // Multiples of 64 that are not powers of 2. The kernel requires
+      // EXPERTS % (ELTS_PER_LDG * WARP_SIZE) == 0. With ELTS_PER_LDG=2
+      // (BYTES_PER_LDG_MULTIPLE_64), this holds for all five values on CUDA
+      // WARP_SIZE=32 but only for 384 on ROCm WARP_SIZE=64. The other four
+      // use BYTES_PER_LDG_MULTIPLE_64_NARROW (ELTS_PER_LDG=1), which
+      // satisfies the assertion for any multiple of 64 on either backend;
+      // on CUDA the narrow alias collapses back to the wider load, so CUDA
+      // behavior is unchanged.
+    case 192:
+      LAUNCH_SOFTPLUS_SQRT(192, WARPS_PER_TB, BYTES_PER_LDG_MULTIPLE_64_NARROW);
+      break;
+    case 320:
+      LAUNCH_SOFTPLUS_SQRT(320, WARPS_PER_TB, BYTES_PER_LDG_MULTIPLE_64_NARROW);
+      break;
+    case 384:
+      LAUNCH_SOFTPLUS_SQRT(384, WARPS_PER_TB, BYTES_PER_LDG_MULTIPLE_64);
+      break;
+    case 448:
+      LAUNCH_SOFTPLUS_SQRT(448, WARPS_PER_TB, BYTES_PER_LDG_MULTIPLE_64_NARROW);
+      break;
+    case 576:
+      LAUNCH_SOFTPLUS_SQRT(576, WARPS_PER_TB, BYTES_PER_LDG_MULTIPLE_64_NARROW);
+      break;
+    default: {
+      TORCH_CHECK(false, "Unsupported expert number: ", num_experts);
+    }
+  }
+}
+
+}  // namespace moe
+}  // namespace vllm
+
+template <typename ComputeType>
+void dispatch_topk_softplus_sqrt_launch(
+    const ComputeType* gating_output, torch::Tensor& topk_weights,
+    torch::Tensor& topk_indices, torch::Tensor& token_expert_indices,
+    int num_tokens, int num_experts, int topk, bool renormalize,
+    double routed_scaling_factor,
+    const c10::optional<torch::Tensor>& correction_bias,
+    const c10::optional<torch::Tensor>& input_ids,
+    const c10::optional<torch::Tensor>& tid2eid, cudaStream_t stream) {
+  const float* bias_ptr = nullptr;
+  if (correction_bias.has_value()) {
+    bias_ptr = correction_bias.value().data_ptr<float>();
+  }
+  bool use_hash = false;
+  if (tid2eid.has_value()) {
+    TORCH_CHECK(input_ids.has_value(), "input_ids is required for hash MoE");
+    use_hash = true;
+  }
+  if (topk_indices.scalar_type() == at::ScalarType::Int) {
+    const int* input_ids_ptr = nullptr;
+    const int* tid2eid_ptr = nullptr;
+    if (tid2eid.has_value()) {
+      input_ids_ptr = input_ids.value().data_ptr<int>();
+      tid2eid_ptr = tid2eid.value().data_ptr<int>();
+    }
+
+    vllm::moe::topkGatingSoftplusSqrtKernelLauncher<int, ComputeType>(
+        gating_output, topk_weights.data_ptr<float>(),
+        topk_indices.data_ptr<int>(), token_expert_indices.data_ptr<int>(),
+        num_tokens, num_experts, topk, renormalize, routed_scaling_factor,
+        bias_ptr, use_hash, input_ids_ptr, tid2eid_ptr, stream);
+  } else if (topk_indices.scalar_type() == at::ScalarType::UInt32) {
+    const uint32_t* input_ids_ptr = nullptr;
+    const uint32_t* tid2eid_ptr = nullptr;
+    if (tid2eid.has_value()) {
+      input_ids_ptr = input_ids.value().data_ptr<uint32_t>();
+      tid2eid_ptr = tid2eid.value().data_ptr<uint32_t>();
+    }
+    vllm::moe::topkGatingSoftplusSqrtKernelLauncher<uint32_t, ComputeType>(
+        gating_output, topk_weights.data_ptr<float>(),
+        topk_indices.data_ptr<uint32_t>(), token_expert_indices.data_ptr<int>(),
+        num_tokens, num_experts, topk, renormalize, routed_scaling_factor,
+        bias_ptr, use_hash, input_ids_ptr, tid2eid_ptr, stream);
+  } else {
+    TORCH_CHECK(topk_indices.scalar_type() == at::ScalarType::Long);
+
+    const int64_t* input_ids_ptr = nullptr;
+    const int64_t* tid2eid_ptr = nullptr;
+    if (tid2eid.has_value()) {
+      input_ids_ptr = input_ids.value().data_ptr<int64_t>();
+      tid2eid_ptr = tid2eid.value().data_ptr<int64_t>();
+    }
+
+    vllm::moe::topkGatingSoftplusSqrtKernelLauncher<int64_t, ComputeType>(
+        gating_output, topk_weights.data_ptr<float>(),
+        topk_indices.data_ptr<int64_t>(), token_expert_indices.data_ptr<int>(),
+        num_tokens, num_experts, topk, renormalize, routed_scaling_factor,
+        bias_ptr, use_hash, input_ids_ptr, tid2eid_ptr, stream);
+  }
+}
+
+void topk_softplus_sqrt(
+    torch::Tensor& topk_weights,          // [num_tokens, topk]
+    torch::Tensor& topk_indices,          // [num_tokens, topk]
+    torch::Tensor& token_expert_indices,  // [num_tokens, topk]
+    torch::Tensor& gating_output,         // [num_tokens, num_experts]
+    bool renormalize, double routed_scaling_factor,
+    const c10::optional<torch::Tensor>& correction_bias,
+    const c10::optional<torch::Tensor>& input_ids,
+    const c10::optional<torch::Tensor>& tid2eid) {
+  const int num_experts = gating_output.size(-1);
+  const auto num_tokens = gating_output.numel() / num_experts;
+  const int topk = topk_weights.size(-1);
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(gating_output));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  if (gating_output.scalar_type() == at::ScalarType::Float) {
+    dispatch_topk_softplus_sqrt_launch<float>(
+        gating_output.data_ptr<float>(), topk_weights, topk_indices,
+        token_expert_indices, num_tokens, num_experts, topk, renormalize,
+        routed_scaling_factor, correction_bias, input_ids, tid2eid, stream);
+  } else if (gating_output.scalar_type() == at::ScalarType::Half) {
+    dispatch_topk_softplus_sqrt_launch<__half>(
+        reinterpret_cast<const __half*>(gating_output.data_ptr<at::Half>()),
+        topk_weights, topk_indices, token_expert_indices, num_tokens,
+        num_experts, topk, renormalize, routed_scaling_factor, correction_bias,
+        input_ids, tid2eid, stream);
+  } else if (gating_output.scalar_type() == at::ScalarType::BFloat16) {
+    dispatch_topk_softplus_sqrt_launch<__nv_bfloat16>(
+        reinterpret_cast<const __nv_bfloat16*>(
+            gating_output.data_ptr<at::BFloat16>()),
+        topk_weights, topk_indices, token_expert_indices, num_tokens,
+        num_experts, topk, renormalize, routed_scaling_factor, correction_bias,
+        input_ids, tid2eid, stream);
+  } else {
+    TORCH_CHECK(false, "Unsupported gating_output data type: ",
+                gating_output.scalar_type());
+  }
+}
\ No newline at end of file
diff --git a/csrc/moe/torch_bindings.cpp b/csrc/moe/torch_bindings.cpp
index 4cd74366ea4d..3cf5b0546024 100644
--- a/csrc/moe/torch_bindings.cpp
+++ b/csrc/moe/torch_bindings.cpp
@@ -6,16 +6,23 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
   m.def(
       "topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
       "token_expert_indices, Tensor gating_output, bool renormalize, Tensor? "
-      "bias) -> ()");
+      "bias, bool enable_pdl=False) -> ()");
   m.impl("topk_softmax", torch::kCUDA, &topk_softmax);
 
   // Apply topk sigmoid to the gating outputs.
   m.def(
       "topk_sigmoid(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
       "token_expert_indices, Tensor gating_output, bool renormalize, Tensor? "
-      "bias) -> ()");
+      "bias, bool enable_pdl=False) -> ()");
   m.impl("topk_sigmoid", torch::kCUDA, &topk_sigmoid);
 
+  m.def(
+      "topk_softplus_sqrt(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
+      "token_expert_indices, Tensor gating_output, bool renormalize, float "
+      "routed_scaling_factor, Tensor? "
+      "bias, Tensor? input_ids, Tensor? tid2eid) -> ()");
+  m.impl("topk_softplus_sqrt", torch::kCUDA, &topk_softplus_sqrt);
+
   // Calculate the result of moe by summing up the partial results
   // from all selected experts.
   m.def("moe_sum(Tensor input, Tensor! output) -> ()");
@@ -85,16 +92,6 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
       "bool use_fp32_reduce, bool is_zp_float,"
       "int thread_k, int thread_n, int blocks_per_sm) -> Tensor");
 
-  m.def(
-      "marlin_gemm_moe(Tensor! a, Tensor! b_q_weights, Tensor! sorted_ids, "
-      "Tensor! topk_weights, Tensor! topk_ids, Tensor! b_scales, Tensor! "
-      "b_zeros, Tensor! g_idx, Tensor! perm, Tensor! workspace, "
-      "int b_q_type, SymInt size_m, "
-      "SymInt size_n, SymInt size_k, bool is_k_full, int num_experts, int "
-      "topk, "
-      "int moe_block_size, bool replicate_input, bool apply_weights)"
-      " -> Tensor");
-
   m.def(
       "moe_permute(Tensor input, Tensor topk_ids,"
       "Tensor token_expert_indices, Tensor? expert_map, int n_expert,"
@@ -125,19 +122,20 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
       "Tensor)");
   m.impl("grouped_topk", torch::kCUDA, &grouped_topk);
 
-  // cuBLAS bf16 x bf16 -> fp32 router GEMM (fallback for non-SM90 / batch > 16)
-  m.def("router_gemm_bf16_fp32(Tensor input, Tensor weight) -> Tensor");
-  m.impl("router_gemm_bf16_fp32", torch::kCUDA, &router_gemm_bf16_fp32);
-
   // DeepSeek V3 optimized router GEMM for SM90+
   m.def("dsv3_router_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()");
   // conditionally compiled so impl registration is in source file
 
-  // gpt-oss optimized router GEMM kernel for SM90+
+  // DeepSeek V4 fused RMSNorm + router GEMV for SM90+
   m.def(
-      "gpt_oss_router_gemm(Tensor! output, Tensor input, Tensor weights, "
-      "Tensor bias) -> ()");
-  m.impl("gpt_oss_router_gemm", torch::kCUDA, &gpt_oss_router_gemm);
+      "dsv4_norm_router_gemm(Tensor! logits, Tensor! normed_x, Tensor x, "
+      "Tensor norm_weight, Tensor gate_weight, float eps) -> ()");
+  // conditionally compiled so impl registration is in source file
+
+  // BF16/FP32 x FP32 -> FP32 router GEMM for H=3072, E=256, M<=32
+  // (SM90+)
+  m.def("fp32_router_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()");
+  // impl registration is in fp32_router_gemm_entry.cu
 #endif
 }
 
diff --git a/csrc/nvfp4_kv_cache_kernels.cu b/csrc/nvfp4_kv_cache_kernels.cu
new file mode 100644
index 000000000000..d6aa715c203d
--- /dev/null
+++ b/csrc/nvfp4_kv_cache_kernels.cu
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+// NVFP4 KV cache store kernel.
+// Quantizes bf16 key/value to packed FP4 + FP8 block scales and writes them
+// into the paged KV cache.
+//
+// Per page layout: [K_data | K_scale | V_data | V_scale]
+// Both data and scale regions are contiguous per head, enabling direct
+// TMA descriptor use.
+//
+// Reuses device functions from nvfp4_utils.cuh:
+//   - cvt_warp_fp16_to_fp4()  for bf16 → fp4 quantization + block scale
+//   - pack_fp4()              for packing float pairs to fp4
+//   - reciprocal_approximate_ftz() for fast reciprocal
+
+#define NVFP4_ENABLE_ELTS16 1
+#include "libtorch_stable/quantization/fp4/nvfp4_utils.cuh"
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <torch/all.h>
+
+#include "dispatch_utils.h"
+
+namespace vllm {
+
+// Compute swizzled scale offset for SM100 trtllm-gen MHA kernel.
+// The swizzle pattern for HND layout is:
+//   [T//4, 4, 4, S//4] → permute(0, 2, 3, 1) → reshape to [T, S]
+// where T = block_size (page_size), S = scale_dim = head_size // 16.
+//
+// For a linear (t, s) position, the swizzled position is:
+//   swizzled_t = (t / 4) * 4 + (s / (S / 4))
+//   swizzled_s = (s % (S / 4)) * 4 + (t % 4)
+__device__ __forceinline__ int swizzle_scale_offset(int t, int s,
+                                                    int scale_dim) {
+  int s_group = scale_dim / 4;
+  int swizzled_t = (t / 4) * 4 + (s / s_group);
+  int swizzled_s = (s % s_group) * 4 + (t % 4);
+  return swizzled_t * scale_dim + swizzled_s;
+}
+
+// Kernel: quantize bf16 key/value to NVFP4 and store in paged KV cache.
+//
+// Takes separate data and scale cache pointers for K and V.
+// Within each KV side, data and scale are separate contiguous regions.
+//
+// Threading: one CUDA block per token, threads process heads and
+// groups of 16 elements within each head.
+template <typename scalar_t>
+__global__ void reshape_and_cache_nvfp4_kernel(
+    const scalar_t* __restrict__ key,      // [num_tokens, num_heads, head_size]
+    const scalar_t* __restrict__ value,    // [num_tokens, num_heads, head_size]
+    uint8_t* __restrict__ key_data_cache,  // data region for K
+    uint8_t* __restrict__ value_data_cache,    // data region for V
+    uint8_t* __restrict__ key_scale_cache,     // scale region for K
+    uint8_t* __restrict__ value_scale_cache,   // scale region for V
+    const int64_t* __restrict__ slot_mapping,  // [num_actual_tokens]
+    const float* __restrict__ k_scale_ptr,     // pointer to checkpoint k_scale
+    const float* __restrict__ v_scale_ptr,     // pointer to checkpoint v_scale
+    const int64_t key_stride,                  // key.stride(0) in elements
+    const int64_t value_stride,                // value.stride(0) in elements
+    const int num_heads, const int head_size, const int block_size,
+    const int64_t data_block_stride,         // data cache stride for dim 0
+    const int64_t data_head_stride,          // data cache stride for heads
+    const int64_t data_block_offset_stride,  // data cache stride for tokens
+    const int64_t scale_block_stride,        // scale cache stride for dim 0
+    const int64_t scale_head_stride,         // scale cache stride for heads
+    const int64_t scale_block_offset_stride  // scale cache stride for tokens
+) {
+  using CudaType = typename CUDATypeConverter<scalar_t>::Type;
+  using PVec = PackedVec<CudaType, CVT_FP4_PACK16>;
+
+  static constexpr int ELTS = CVT_FP4_ELTS_PER_THREAD;  // 16 or 8
+  static constexpr int THREADS_PER_SF = CVT_FP4_SF_VEC_SIZE / ELTS;
+
+  const int64_t token_idx = blockIdx.x;
+  const int64_t slot_idx = slot_mapping[token_idx];
+  if (slot_idx < 0) return;
+
+  const int64_t block_idx = slot_idx / block_size;
+  const int block_offset = static_cast<int>(slot_idx % block_size);
+
+  const int scale_dim = head_size / 16;
+  const int groups_per_head = head_size / CVT_FP4_SF_VEC_SIZE;
+
+  const int total_groups = num_heads * groups_per_head;
+  const int tid = threadIdx.x;
+  const int num_thread_groups = blockDim.x / THREADS_PER_SF;
+  const int tg_id = tid / THREADS_PER_SF;
+  const int tg_lane = tid % THREADS_PER_SF;
+
+  // Process both K (kv=0) and V (kv=1)
+#pragma unroll
+  for (int kv = 0; kv < 2; kv++) {
+    const scalar_t* __restrict__ src = (kv == 0) ? key : value;
+    const float global_scale = 1.0f / ((kv == 0) ? *k_scale_ptr : *v_scale_ptr);
+    const int64_t src_stride = (kv == 0) ? key_stride : value_stride;
+    uint8_t* __restrict__ data_cache =
+        (kv == 0) ? key_data_cache : value_data_cache;
+    uint8_t* __restrict__ sc_cache =
+        (kv == 0) ? key_scale_cache : value_scale_cache;
+
+    // Source pointer for this token (use actual stride, not assumed contiguous)
+    const CudaType* __restrict__ token_src =
+        reinterpret_cast<const CudaType*>(src) + token_idx * src_stride;
+
+    // Destination bases in data and scale caches for this token's block
+    uint8_t* __restrict__ data_block =
+        data_cache + block_idx * data_block_stride;
+    uint8_t* __restrict__ scale_block =
+        sc_cache + block_idx * scale_block_stride;
+
+    for (int g = tg_id; g < total_groups; g += num_thread_groups) {
+      const int head = g / groups_per_head;
+      const int group_in_head = g % groups_per_head;
+
+      // Load 16 (or 8) bf16 elements from source
+      PVec in_vec;
+      const CudaType* __restrict__ src_ptr =
+          token_src + head * head_size + group_in_head * CVT_FP4_SF_VEC_SIZE +
+          tg_lane * ELTS;
+
+#pragma unroll
+      for (int i = 0; i < ELTS / 2; i++) {
+        in_vec.elts[i] = reinterpret_cast<
+            const typename PackedTypeConverter<CudaType>::Type*>(src_ptr)[i];
+      }
+
+      // Quantize: produces packed fp4 and writes scale factor.
+      uint8_t sf_val;
+      uint8_t* sf_out_ptr = (tg_lane == 0) ? &sf_val : nullptr;
+
+      fp4_packed_t packed = cvt_warp_fp16_to_fp4<CudaType, THREADS_PER_SF>(
+          in_vec, global_scale, sf_out_ptr);
+
+      // Write packed FP4 data to data cache
+      uint8_t* __restrict__ data_dst = data_block + head * data_head_stride +
+                                       block_offset * data_block_offset_stride;
+
+#if CVT_FP4_PACK16
+      {
+        // 16 elements → 8 bytes (u32x2)
+        int data_byte_offset = group_in_head * 8;
+        reinterpret_cast<uint64_t*>(data_dst + data_byte_offset)[0] =
+            (uint64_t(packed.hi) << 32) | uint64_t(packed.lo);
+      }
+#else
+      {
+        // 8 elements → 4 bytes (uint32_t)
+        int data_byte_offset =
+            group_in_head * CVT_FP4_SF_VEC_SIZE / 2 + tg_lane * ELTS / 2;
+        reinterpret_cast<uint32_t*>(data_dst + data_byte_offset)[0] = packed;
+      }
+#endif
+
+      // Write block scale to scale cache.
+      // K (kv==0): linear layout (no swizzle).
+      // V (kv==1): swizzled layout for SM100 trtllm-gen MHA kernel.
+      if (sf_out_ptr != nullptr) {
+        int scale_idx = group_in_head;
+        uint8_t* __restrict__ scale_dst;
+        if (kv == 0) {
+          scale_dst = scale_block + head * scale_head_stride +
+                      block_offset * scale_block_offset_stride + scale_idx;
+        } else {
+          int swizzled_offset =
+              swizzle_scale_offset(block_offset, scale_idx, scale_dim);
+          int swizzled_t = swizzled_offset / scale_dim;
+          int swizzled_s = swizzled_offset % scale_dim;
+          scale_dst = scale_block + head * scale_head_stride +
+                      swizzled_t * scale_block_offset_stride + swizzled_s;
+        }
+        *scale_dst = sf_val;
+      }
+    }
+  }
+}
+
+}  // namespace vllm
+
+// Non-template entry point callable from cache_kernels.cu.
+// Receives key_cache/value_cache as kv_cache[:, 0] and kv_cache[:, 1].
+// Each KV side contains both data and scale:
+//   page = [K_data | K_scale | V_data | V_scale]
+void reshape_and_cache_nvfp4_dispatch(torch::Tensor& key, torch::Tensor& value,
+                                      torch::Tensor& key_cache,
+                                      torch::Tensor& value_cache,
+                                      torch::Tensor& slot_mapping,
+                                      torch::Tensor& k_scale,
+                                      torch::Tensor& v_scale) {
+  int num_tokens = slot_mapping.size(0);
+  int num_heads = key.size(1);
+  int head_size = key.size(2);
+  int data_dim = head_size / 2;
+  int scale_dim = head_size / 16;
+  int full_dim = data_dim + scale_dim;
+
+  // key_cache is kv_cache[:, 0] with shape
+  // [num_blocks, block_size, num_heads, full_dim] in logical order.
+  // Strides encode the physical layout (HND or NHD).
+  TORCH_CHECK(key_cache.dim() == 4, "key_cache must be 4D");
+  TORCH_CHECK(key_cache.size(3) == full_dim,
+              "key_cache last dim must be data_dim + scale_dim, got ",
+              key_cache.size(3), " expected ", full_dim);
+
+  int block_size = key_cache.size(1);
+
+  TORCH_CHECK(head_size % 16 == 0,
+              "head_size must be divisible by 16 for NVFP4 KV cache");
+  TORCH_CHECK(block_size % 4 == 0,
+              "block_size must be divisible by 4 for NVFP4 KV cache swizzle");
+
+  // Detect physical layout from strides (based on full_dim).
+  // HND: head stride > block_offset stride.
+  bool is_hnd = key_cache.stride(2) > key_cache.stride(1);
+
+  int64_t data_block_stride = key_cache.stride(0);  // page_bytes
+  int64_t data_head_stride, data_block_offset_stride;
+  if (is_hnd) {
+    data_head_stride = (int64_t)block_size * data_dim;
+    data_block_offset_stride = data_dim;
+  } else {
+    data_head_stride = data_dim;
+    data_block_offset_stride = (int64_t)num_heads * data_dim;
+  }
+
+  // Page layout: [K_data | K_scale | V_data | V_scale]
+  // Scale follows data within each KV side.
+  int64_t data_per_kv = (int64_t)num_heads * block_size * data_dim;
+
+  uint8_t* key_scale_ptr = key_cache.data_ptr<uint8_t>() + data_per_kv;
+  uint8_t* value_scale_ptr = value_cache.data_ptr<uint8_t>() + data_per_kv;
+
+  // Scale strides: same page stride, inner strides from layout.
+  int64_t scale_block_stride = data_block_stride;
+  int64_t scale_head_stride, scale_block_offset_stride;
+  if (is_hnd) {
+    scale_head_stride = (int64_t)block_size * scale_dim;
+    scale_block_offset_stride = scale_dim;
+  } else {
+    scale_head_stride = scale_dim;
+    scale_block_offset_stride = (int64_t)num_heads * scale_dim;
+  }
+
+  const float* k_scale_ptr = k_scale.data_ptr<float>();
+  const float* v_scale_ptr = v_scale.data_ptr<float>();
+
+  int groups_per_head = head_size / CVT_FP4_SF_VEC_SIZE;
+  int total_groups = num_heads * groups_per_head;
+  constexpr int THREADS_PER_SF = CVT_FP4_SF_VEC_SIZE / CVT_FP4_ELTS_PER_THREAD;
+  int num_threads = std::min(total_groups * THREADS_PER_SF, 512);
+  num_threads = ((num_threads + 31) / 32) * 32;
+
+  dim3 grid(num_tokens);
+  dim3 block(num_threads);
+
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(key));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  AT_DISPATCH_REDUCED_FLOATING_TYPES(
+      key.scalar_type(), "reshape_and_cache_nvfp4", [&] {
+        vllm::reshape_and_cache_nvfp4_kernel<scalar_t>
+            <<<grid, block, 0, stream>>>(
+                key.data_ptr<scalar_t>(), value.data_ptr<scalar_t>(),
+                key_cache.data_ptr<uint8_t>(), value_cache.data_ptr<uint8_t>(),
+                key_scale_ptr, value_scale_ptr,
+                slot_mapping.data_ptr<int64_t>(), k_scale_ptr, v_scale_ptr,
+                key.stride(0), value.stride(0), num_heads, head_size,
+                block_size, data_block_stride, data_head_stride,
+                data_block_offset_stride, scale_block_stride, scale_head_stride,
+                scale_block_offset_stride);
+      });
+}
diff --git a/csrc/ops.h b/csrc/ops.h
index 2e16ef877004..3e2faac5b7a4 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <optional>
+#include <string>
 #include <torch/library.h>
 #include <tuple>
 
@@ -53,50 +54,26 @@ void paged_attention_v2(
     const int64_t blocksparse_vert_stride, const int64_t blocksparse_block_size,
     const int64_t blocksparse_head_sliding_step);
 
-void merge_attn_states(torch::Tensor& output,
-                       std::optional<torch::Tensor> output_lse,
-                       const torch::Tensor& prefix_output,
-                       const torch::Tensor& prefix_lse,
-                       const torch::Tensor& suffix_output,
-                       const torch::Tensor& suffix_lse);
-#ifndef USE_ROCM
-void convert_vertical_slash_indexes(
-    torch::Tensor& block_count,      // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& block_offset,     // [BATCH, N_HEADS, NUM_ROWS, NNZ_S]
-    torch::Tensor& column_count,     // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& column_index,     // [BATCH, N_HEADS, NUM_ROWS, NNZ_V]
-    torch::Tensor q_seqlens,         // [BATCH, ]
-    torch::Tensor kv_seqlens,        // [BATCH, ]
-    torch::Tensor vertical_indexes,  // [BATCH, N_HEADS, NNZ_V]
-    torch::Tensor slash_indexes,     // [BATCH, N_HEADS, NNZ_S]
-    int64_t context_size, int64_t block_size_M, int64_t block_size_N,
-    bool causal);
-
-void convert_vertical_slash_indexes_mergehead(
-    torch::Tensor& block_count,            // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& block_offset,           // [BATCH, N_HEADS, NUM_ROWS, NNZ_S]
-    torch::Tensor& column_count,           // [BATCH, N_HEADS, NUM_ROWS]
-    torch::Tensor& column_index,           // [BATCH, N_HEADS, NUM_ROWS, NNZ_V]
-    torch::Tensor q_seqlens,               // [BATCH, ]
-    torch::Tensor kv_seqlens,              // [BATCH, ]
-    torch::Tensor vertical_indexes,        // [BATCH, N_HEADS, NNZ_V]
-    torch::Tensor slash_indexes,           // [BATCH, N_HEADS, NNZ_S]
-    torch::Tensor vertical_indices_count,  // [N_HEADS, ]
-    torch::Tensor slash_indices_count, int64_t context_size,
-    int64_t block_size_M, int64_t block_size_N, bool causal);
-#endif
+void merge_attn_states(
+    torch::Tensor& output, std::optional<torch::Tensor> output_lse,
+    const torch::Tensor& prefix_output, const torch::Tensor& prefix_lse,
+    const torch::Tensor& suffix_output, const torch::Tensor& suffix_lse,
+    const std::optional<int64_t> prefill_tokens_with_context,
+    const std::optional<torch::Tensor>& output_scale = std::nullopt);
 
+// rms_norm and fused_add_rms_norm declarations also exist in
+// csrc/libtorch_stable/ops.h (torch::stable ABI for CUDA). They remain here
+// because the CPU build still uses these torch::Tensor declarations.
 void rms_norm(torch::Tensor& out, torch::Tensor& input, torch::Tensor& weight,
               double epsilon);
 
 void fused_add_rms_norm(torch::Tensor& input, torch::Tensor& residual,
                         torch::Tensor& weight, double epsilon);
 
-void fused_qk_norm_rope(torch::Tensor& qkv, int64_t num_heads_q,
-                        int64_t num_heads_k, int64_t num_heads_v,
-                        int64_t head_dim, double eps, torch::Tensor& q_weight,
-                        torch::Tensor& k_weight, torch::Tensor& cos_sin_cache,
-                        bool is_neox, torch::Tensor& position_ids);
+void fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert(
+    torch::Tensor& q, torch::Tensor const& kv, torch::Tensor& k_cache,
+    torch::Tensor const& slot_mapping, torch::Tensor const& position_ids,
+    torch::Tensor const& cos_sin_cache, double eps, int64_t cache_block_size);
 
 void apply_repetition_penalties_(torch::Tensor& logits,
                                  const torch::Tensor& prompt_mask,
@@ -114,50 +91,31 @@ void top_k_per_row_decode(const torch::Tensor& logits, int64_t next_n,
                           int64_t numRows, int64_t stride0, int64_t stride1,
                           int64_t topK);
 
-void large_context_topk(const torch::Tensor& score, torch::Tensor& indices,
-                        const torch::Tensor& lengths,
-                        std::optional<torch::Tensor> row_starts_opt);
-
-void rms_norm_static_fp8_quant(torch::Tensor& out, torch::Tensor& input,
-                               torch::Tensor& weight, torch::Tensor& scale,
-                               double epsilon);
-
-void fused_add_rms_norm_static_fp8_quant(torch::Tensor& out,
-                                         torch::Tensor& input,
-                                         torch::Tensor& residual,
-                                         torch::Tensor& weight,
-                                         torch::Tensor& scale, double epsilon);
-
-void rms_norm_dynamic_per_token_quant(torch::Tensor& out,
-                                      torch::Tensor const& input,
-                                      torch::Tensor const& weight,
-                                      torch::Tensor& scales,
-                                      double const epsilon,
-                                      std::optional<torch::Tensor> scale_ub,
-                                      std::optional<torch::Tensor> residual);
-
-void rms_norm_per_block_quant(torch::Tensor& out, torch::Tensor const& input,
-                              torch::Tensor const& weight,
-                              torch::Tensor& scales, double const epsilon,
-                              std::optional<torch::Tensor> scale_ub,
-                              std::optional<torch::Tensor> residual,
-                              int64_t group_size, bool is_scale_transposed);
+void persistent_topk(const torch::Tensor& logits, const torch::Tensor& lengths,
+                     torch::Tensor& output, torch::Tensor& workspace, int64_t k,
+                     int64_t max_seq_len);
 
+void silu_and_mul_per_block_quant(torch::Tensor& out,
+                                  torch::Tensor const& input,
+                                  torch::Tensor& scales, int64_t group_size,
+                                  std::optional<torch::Tensor> scale_ub,
+                                  bool is_scale_transposed);
+
+// rotary_embedding also exist in csrc/libtorch_stable/ops.h (torch::stable
+// ABI for CUDA). It remains here because the CPU build still uses these
+// torch::Tensor declarations.
 void rotary_embedding(torch::Tensor& positions, torch::Tensor& query,
                       std::optional<torch::Tensor> key, int64_t head_size,
-                      torch::Tensor& cos_sin_cache, bool is_neox);
+                      torch::Tensor& cos_sin_cache, bool is_neox,
+                      int64_t rope_dim_offset, bool inverse);
 
 void silu_and_mul(torch::Tensor& out, torch::Tensor& input);
 
+void silu_and_mul_clamp(torch::Tensor& out, torch::Tensor& input, double limit);
+
 void silu_and_mul_quant(torch::Tensor& out, torch::Tensor& input,
                         torch::Tensor& scale);
 
-#ifndef USE_ROCM
-void silu_and_mul_nvfp4_quant(torch::Tensor& out,
-                              torch::Tensor& output_block_scale,
-                              torch::Tensor& input,
-                              torch::Tensor& input_global_scale);
-#endif
 void persistent_masked_m_silu_mul_quant(
     const at::Tensor& input,   // (E, T, 2*H)
     const at::Tensor& counts,  // (E)
@@ -165,17 +123,10 @@ void persistent_masked_m_silu_mul_quant(
     at::Tensor& y_s,           // (E, T, H//group_size) [OUT]
     bool use_ue8m0);
 
-void mul_and_silu(torch::Tensor& out, torch::Tensor& input);
-
 void gelu_and_mul(torch::Tensor& out, torch::Tensor& input);
 
 void gelu_tanh_and_mul(torch::Tensor& out, torch::Tensor& input);
 
-void fatrelu_and_mul(torch::Tensor& out, torch::Tensor& input,
-                     double threshold);
-void swigluoai_and_mul(torch::Tensor& out, torch::Tensor& input,
-                       double alpha = 1.702, double limit = 7.0);
-
 void gelu_new(torch::Tensor& out, torch::Tensor& input);
 
 void gelu_fast(torch::Tensor& out, torch::Tensor& input);
@@ -190,124 +141,6 @@ void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope,
 
 torch::Tensor get_cuda_view_from_cpu_tensor(torch::Tensor& cpu_tensor);
 
-#ifndef USE_ROCM
-
-torch::Tensor awq_gemm(torch::Tensor _in_feats, torch::Tensor _kernel,
-                       torch::Tensor _scaling_factors, torch::Tensor _zeros,
-                       int64_t split_k_iters);
-
-torch::Tensor awq_dequantize(torch::Tensor _kernel,
-                             torch::Tensor _scaling_factors,
-                             torch::Tensor _zeros, int64_t split_k_iters,
-                             int64_t thx, int64_t thy);
-
-#endif
-
-torch::Tensor ggml_dequantize(torch::Tensor W, int64_t type, int64_t m,
-                              int64_t n,
-                              std::optional<at::ScalarType> const& dtype);
-
-torch::Tensor ggml_mul_mat_vec_a8(torch::Tensor W, torch::Tensor X,
-                                  int64_t type, int64_t row);
-
-torch::Tensor ggml_mul_mat_a8(torch::Tensor W, torch::Tensor X, int64_t type,
-                              int64_t row);
-
-torch::Tensor ggml_moe_a8(torch::Tensor X, torch::Tensor W,
-                          torch::Tensor sorted_token_ids,
-                          torch::Tensor expert_ids,
-                          torch::Tensor num_tokens_post_padded, int64_t type,
-                          int64_t row, int64_t top_k, int64_t tokens);
-
-torch::Tensor ggml_moe_a8_vec(torch::Tensor X, torch::Tensor W,
-                              torch::Tensor topk_ids, int64_t top_k,
-                              int64_t type, int64_t row, int64_t tokens);
-
-int64_t ggml_moe_get_block_size(int64_t type);
-
-#ifndef USE_ROCM
-
-bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability);
-bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability);
-bool cutlass_scaled_mm_supports_block_fp8(int64_t cuda_device_capability);
-bool cutlass_group_gemm_supported(int64_t cuda_device_capability);
-
-void cutlass_scaled_fp4_mm(torch::Tensor& D, torch::Tensor const& A,
-                           torch::Tensor const& B, torch::Tensor const& A_sf,
-                           torch::Tensor const& B_sf,
-                           torch::Tensor const& alpha);
-
-void cutlass_scaled_mm(torch::Tensor& out, torch::Tensor const& a,
-                       torch::Tensor const& b, torch::Tensor const& a_scales,
-                       torch::Tensor const& b_scales,
-                       std::optional<torch::Tensor> const& bias);
-
-void cutlass_moe_mm(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch);
-
-void cutlass_fp4_group_mm(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,
-    const torch::Tensor& alphas, const torch::Tensor& problem_sizes,
-    const torch::Tensor& expert_offsets, const torch::Tensor& sf_offsets);
-
-void get_cutlass_moe_mm_data(
-    const torch::Tensor& topk_ids, torch::Tensor& expert_offsets,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    torch::Tensor& input_permutation, torch::Tensor& output_permutation,
-    const int64_t num_experts, const int64_t n, const int64_t k,
-    const std::optional<torch::Tensor>& blockscale_offsets,
-    const bool is_gated);
-
-void get_cutlass_moe_mm_problem_sizes_from_expert_offsets(
-    const torch::Tensor& expert_first_token_offset,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    const int64_t n, const int64_t k, const bool swap_ab);
-
-void get_cutlass_batched_moe_mm_data(torch::Tensor& expert_offsets,
-                                     torch::Tensor& problem_sizes1,
-                                     torch::Tensor& problem_sizes2,
-                                     const torch::Tensor& expert_num_tokens,
-                                     const int64_t num_local_experts,
-                                     const int64_t padded_m, const int64_t n,
-                                     const int64_t k);
-
-void cutlass_scaled_mm_azp(torch::Tensor& out, torch::Tensor const& a,
-                           torch::Tensor const& b,
-                           torch::Tensor const& a_scales,
-                           torch::Tensor const& b_scales,
-                           torch::Tensor const& azp_adj,
-                           std::optional<torch::Tensor> const& azp,
-                           std::optional<torch::Tensor> const& bias);
-
-std::tuple<torch::Tensor, torch::Tensor> scaled_fp4_quant_func(
-    torch::Tensor const& input, torch::Tensor const& input_scale,
-    bool is_sf_swizzled_layout);
-
-void scaled_fp4_quant_out(torch::Tensor const& input,
-                          torch::Tensor const& input_scale,
-                          bool is_sf_swizzled_layout, torch::Tensor& output,
-                          torch::Tensor& output_scale);
-
-void scaled_fp4_experts_quant(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts);
-
-void silu_and_mul_scaled_fp4_experts_quant(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts);
-
-#endif
-
 void static_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
                               torch::Tensor const& scale,
                               std::optional<torch::Tensor> const& azp);
@@ -316,24 +149,6 @@ void dynamic_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
                                torch::Tensor& scales,
                                std::optional<torch::Tensor> const& azp);
 
-torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
-                        torch::Tensor b_gptq_qzeros,
-                        torch::Tensor b_gptq_scales, torch::Tensor b_g_idx,
-                        bool use_exllama, bool use_v2_format, int64_t bit);
-
-void gptq_shuffle(torch::Tensor q_weight, torch::Tensor q_perm, int64_t bit);
-
-void static_scaled_fp8_quant(
-    torch::Tensor& out, torch::Tensor const& input, torch::Tensor const& scale,
-    std::optional<std::tuple<int64_t, int64_t>> group_shape = std::nullopt);
-
-void dynamic_scaled_fp8_quant(torch::Tensor& out, torch::Tensor const& input,
-                              torch::Tensor& scale);
-
-void dynamic_per_token_scaled_fp8_quant(
-    torch::Tensor& out, torch::Tensor const& input, torch::Tensor& scale,
-    std::optional<torch::Tensor> const& scale_ub);
-
 void selective_scan_fwd(
     const torch::Tensor& u, const torch::Tensor& delta, const torch::Tensor& A,
     const torch::Tensor& B, const torch::Tensor& C,
@@ -343,7 +158,7 @@ void selective_scan_fwd(
     const std::optional<torch::Tensor>& query_start_loc,
     const std::optional<torch::Tensor>& cache_indices,
     const std::optional<torch::Tensor>& has_initial_state,
-    const torch::Tensor& ssm_states, int64_t pad_slot_id, int64_t block_size,
+    const torch::Tensor& ssm_states, int64_t null_block_id, int64_t block_size,
     const std::optional<torch::Tensor>& block_idx_first_scheduled_token,
     const std::optional<torch::Tensor>& block_idx_last_scheduled_token,
     const std::optional<torch::Tensor>& initial_state_idx,
@@ -375,8 +190,6 @@ std::tuple<int64_t, torch::Tensor> allocate_shared_buffer_and_handle(
 int64_t open_mem_handle(torch::Tensor& mem_handle);
 void free_shared_buffer(int64_t buffer);
 
-torch::Tensor hadacore_transform(torch::Tensor& x, bool inplace);
-
 #ifdef USE_ROCM
 fptr_t init_custom_qr(int64_t rank, int64_t world_size,
                       std::optional<int64_t> qr_max_size = std::nullopt);
@@ -389,6 +202,13 @@ int64_t qr_max_size();
 #endif
 
 #ifndef USE_ROCM
-void dsv3_fused_a_gemm(torch::Tensor& output, torch::Tensor const& mat_a,
-                       torch::Tensor const& mat_b);
-#endif
\ No newline at end of file
+torch::Tensor minimax_allreduce_rms(torch::Tensor const& input,
+                                    torch::Tensor const& norm_weight,
+                                    torch::Tensor workspace, int64_t const rank,
+                                    int64_t const nranks, double const eps);
+std::tuple<torch::Tensor, torch::Tensor> minimax_allreduce_rms_qk(
+    torch::Tensor qkv, torch::Tensor const& norm_weight_q,
+    torch::Tensor const& norm_weight_k, torch::Tensor workspace,
+    int64_t const q_size, int64_t const kv_size, int64_t const rank,
+    int64_t const nranks, double const eps);
+#endif
diff --git a/csrc/persistent_topk.cuh b/csrc/persistent_topk.cuh
new file mode 100644
index 000000000000..8b9d10ff83dd
--- /dev/null
+++ b/csrc/persistent_topk.cuh
@@ -0,0 +1,1309 @@
+/*
+ * Persistent TopK Scheduler for DSA Indexer
+ */
+
+#ifndef PERSISTENT_TOPK_CUH_
+#define PERSISTENT_TOPK_CUH_
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+#include <cub/cub.cuh>
+#include <cstdint>
+
+namespace vllm {
+namespace persistent {
+
+// ============================================================================
+// Constants
+// ============================================================================
+
+constexpr int kThreadsPerBlock = 1024;
+constexpr int RADIX = 256;
+
+// Medium path: all shared state in dynamic smem (no static __shared__,
+// which would inflate the kernel's smem footprint and kill occupancy
+// for the decode/trivial paths).
+constexpr size_t kMediumHistBytes = 2 * (RADIX + 128) * sizeof(int);  // 3072
+constexpr size_t kMediumScalarsBytes = 5 * sizeof(int);               // 20
+constexpr size_t kMediumHeaderSize =
+    (kMediumHistBytes + kMediumScalarsBytes + 127) & ~size_t(127);  // 3200
+constexpr int MAX_BUFFERED_ITEMS = 4096;
+constexpr size_t kSmemMedium =
+    kMediumHeaderSize + 2 * MAX_BUFFERED_ITEMS * sizeof(int);  // 35968
+constexpr uint32_t RADIX_THRESHOLD = 32768;
+
+// Decode path constants
+constexpr int kDecodeBins = 2048;
+constexpr uint32_t HIST2048_THRESHOLD = 8192;
+
+// Large path: fixed shared memory for histograms + scalars
+constexpr size_t kFixedSmemLarge =
+    ((RADIX + RADIX + 5) * sizeof(uint32_t) + 15) & ~size_t(15);
+
+// ============================================================================
+// Common helpers
+// ============================================================================
+
+__device__ __forceinline__ auto convert_to_uint32_v2(float x) -> uint32_t {
+  uint32_t bits = __float_as_uint(x);
+  return (bits & 0x80000000u) ? ~bits : (bits | 0x80000000u);
+}
+
+__device__ __forceinline__ auto convert_to_uint8(float x) -> uint8_t {
+  __half h = __float2half_rn(x);
+  uint16_t bits = __half_as_ushort(h);
+  uint16_t key = (bits & 0x8000) ? static_cast<uint16_t>(~bits)
+                                 : static_cast<uint16_t>(bits | 0x8000);
+  return static_cast<uint8_t>(key >> 8);
+}
+
+// ============================================================================
+// Vectorized load helpers
+// ============================================================================
+
+// Unconditional float4 load with cache hint (.cg = cache at global level only).
+__device__ __forceinline__ void load_float4(const float* ptr, float& v0,
+                                            float& v1, float& v2, float& v3) {
+  uint32_t r0, r1, r2, r3;
+  asm volatile("ld.global.cg.v4.u32 {%0,%1,%2,%3}, [%4];\n"
+               : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+               : "l"(ptr));
+  v0 = __uint_as_float(r0);
+  v1 = __uint_as_float(r1);
+  v2 = __uint_as_float(r2);
+  v3 = __uint_as_float(r3);
+}
+
+// Per-element predicated scalar loads with -inf default.
+__device__ __forceinline__ void load_float4_predicated(const float* ptr,
+                                                       int base, int seq_len,
+                                                       float& v0, float& v1,
+                                                       float& v2, float& v3) {
+  uint32_t r0, r1, r2, r3;
+  int p0 = (base < seq_len);
+  int p1 = (base + 1 < seq_len);
+  int p2 = (base + 2 < seq_len);
+  int p3 = (base + 3 < seq_len);
+  asm volatile(
+      "{\n"
+      "  .reg .pred pr0, pr1, pr2, pr3;\n"
+      "  setp.ne.u32 pr0, %4, 0;\n"
+      "  setp.ne.u32 pr1, %5, 0;\n"
+      "  setp.ne.u32 pr2, %6, 0;\n"
+      "  setp.ne.u32 pr3, %7, 0;\n"
+      "  mov.u32 %0, 0xFF800000;\n"
+      "  mov.u32 %1, 0xFF800000;\n"
+      "  mov.u32 %2, 0xFF800000;\n"
+      "  mov.u32 %3, 0xFF800000;\n"
+      "  @pr0 ld.global.cg.u32 %0, [%8];\n"
+      "  @pr1 ld.global.cg.u32 %1, [%8+4];\n"
+      "  @pr2 ld.global.cg.u32 %2, [%8+8];\n"
+      "  @pr3 ld.global.cg.u32 %3, [%8+12];\n"
+      "}\n"
+      : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+      : "r"(p0), "r"(p1), "r"(p2), "r"(p3), "l"(ptr));
+  v0 = __uint_as_float(r0);
+  v1 = __uint_as_float(r1);
+  v2 = __uint_as_float(r2);
+  v3 = __uint_as_float(r3);
+}
+
+// ============================================================================
+// Large path: inter-CTA coordination state (one per group)
+// ============================================================================
+
+struct RadixRowState {
+  uint32_t histogram[3][256];  // Triple-buffered histograms
+  uint32_t remaining_k;
+  uint32_t prefix;
+  int arrival_counter;
+  int output_counter;
+};
+
+// ============================================================================
+// Kernel parameters
+// ============================================================================
+
+struct PersistentTopKParams {
+  const float* __restrict__ input;  // [num_rows, stride]
+  int32_t* __restrict__ output;     // [num_rows, top_k]
+  int32_t* __restrict__ lengths;    // [num_rows]
+  RadixRowState* row_states;        // large path: per-group state
+  uint32_t num_rows;
+  uint32_t stride;
+  uint32_t top_k;           // actual k value for output stride
+  uint32_t chunk_size;      // large path: elements per CTA
+  uint32_t ctas_per_group;  // 1=medium, >1=large
+  uint32_t max_seq_len;     // max seq_len across all rows (for early CTA exit)
+};
+
+// ============================================================================
+// Decode path: 2048-bin histogram for short sequences (seq_len <= 8192)
+// Uses 11-bit half-precision bins for fine granularity.
+// One histogram pass typically suffices since 8192/2048 = 4 elements/bin avg.
+// ============================================================================
+
+// 11-bit bin from half-precision representation (ascending: high values -> high
+// bins)
+__device__ __forceinline__ uint32_t decode_bin(float x) {
+  __half hx = __float2half(x);
+  uint16_t bits = __half_as_ushort(hx);
+  uint16_t key = (bits & 0x8000) ? static_cast<uint16_t>(~bits)
+                                 : static_cast<uint16_t>(bits | 0x8000);
+  return key >> 5;
+}
+
+template <int TopK>
+__device__ __noinline__ void histogram_2048_topk(
+    const float* __restrict__ logits, int32_t* __restrict__ output_indices,
+    int32_t seq_len) {
+  extern __shared__ int decode_smem[];
+  const int tx = threadIdx.x;
+  const int lane = tx & 31;
+
+  // ---- Layout constants ----
+  constexpr int SBASE = 8192 - 8;           // 8184
+  constexpr int RHIST = RADIX + 128;        // 384
+  constexpr int BOFF = 2 * RHIST;           // 768
+  constexpr int DBUF = (SBASE - BOFF) / 2;  // 3708
+  constexpr int MAX_ITEMS_PER_THREAD =
+      (HIST2048_THRESHOLD + kThreadsPerBlock - 1) / kThreadsPerBlock;
+
+  enum : int { sTHR = 0, sOUT = 1, sREF = 2, sFIN = 3, sBUF0 = 4, sBUF1 = 5 };
+
+  // ---- Initialize scalars (prevents stale data from prior rows) ----
+  if (tx < 8) {
+    decode_smem[SBASE + tx] = 0;
+  }
+
+  // ---- Phase 1: Build 2048-bin histogram with float4 vectorized loads ----
+  int* histo = decode_smem;
+  uint16_t reg_bins[MAX_ITEMS_PER_THREAD];
+  int nitems = 0;
+
+  for (int i = tx; i < kDecodeBins; i += kThreadsPerBlock) {
+    histo[i] = 0;
+  }
+  __syncthreads();
+
+  const int n_vec = (seq_len + 3) >> 2;
+  const bool row_aligned = ((reinterpret_cast<uintptr_t>(logits) & 15) == 0);
+
+  for (int i = tx; i < n_vec; i += kThreadsPerBlock) {
+    const int base = i << 2;
+    float v0, v1, v2, v3;
+
+    if (row_aligned && base + 3 < seq_len) {
+      load_float4(logits + base, v0, v1, v2, v3);
+    } else {
+      load_float4_predicated(logits + base, base, seq_len, v0, v1, v2, v3);
+    }
+
+    const uint16_t b0 = static_cast<uint16_t>(decode_bin(v0));
+    const uint16_t b1 = static_cast<uint16_t>(decode_bin(v1));
+    const uint16_t b2 = static_cast<uint16_t>(decode_bin(v2));
+    const uint16_t b3 = static_cast<uint16_t>(decode_bin(v3));
+    reg_bins[nitems++] = b0;
+    reg_bins[nitems++] = b1;
+    reg_bins[nitems++] = b2;
+    reg_bins[nitems++] = b3;
+    atomicAdd(&histo[b0], 1);
+    atomicAdd(&histo[b1], 1);
+    atomicAdd(&histo[b2], 1);
+    atomicAdd(&histo[b3], 1);
+  }
+  __syncthreads();
+
+  // ---- CUB suffix sum ----
+  using BlockScanT = cub::BlockScan<int, kThreadsPerBlock>;
+  const int h0 = histo[2 * tx];
+  const int pair_sum = h0 + histo[2 * tx + 1];
+
+  auto& scan_storage = *reinterpret_cast<typename BlockScanT::TempStorage*>(
+      decode_smem + kDecodeBins);
+
+  int pair_prefix, total;
+  BlockScanT(scan_storage).ExclusiveSum(pair_sum, pair_prefix, total);
+
+  // Find threshold bin purely from registers
+  const int pair_suffix = total - pair_prefix;
+
+  if (pair_suffix >= TopK && (pair_suffix - h0) < TopK) {
+    decode_smem[SBASE + sTHR] = 2 * tx;
+  }
+  {
+    const int right_suf = pair_suffix - h0;
+    const int next_suf = pair_suffix - pair_sum;
+    if (right_suf >= TopK && next_suf < TopK) {
+      decode_smem[SBASE + sTHR] = 2 * tx + 1;
+    }
+  }
+  __syncthreads();
+
+  const int threshold = decode_smem[SBASE + sTHR];
+
+  // ---- Phase 2: Collection with warp-aggregated atomicAdds ----
+  int* bufs[2] = {decode_smem + BOFF, decode_smem + BOFF + DBUF};
+  const int sOUT_abs = SBASE + sOUT;
+  const int sBUF0_abs = SBASE + sBUF0;
+
+  {
+    const uint32_t uthr = static_cast<uint32_t>(threshold);
+    int item = 0;
+    const int n_vec_iters = (n_vec + kThreadsPerBlock - 1) / kThreadsPerBlock;
+
+    for (int iter = 0; iter < n_vec_iters; iter++) {
+      const int i = tx + iter * kThreadsPerBlock;
+      const bool vec_valid = (i < n_vec);
+      const int base_idx = i << 2;
+
+#pragma unroll 4
+      for (int sub = 0; sub < 4; sub++) {
+        const int elem_idx = base_idx + sub;
+        uint32_t bin = 0;
+        if (vec_valid) bin = reg_bins[item++];
+        const bool is_above = vec_valid && (bin > uthr);
+        const bool is_equal = vec_valid && (bin == uthr);
+
+        const uint32_t above_mask = __ballot_sync(0xffffffff, is_above);
+        if (above_mask) {
+          const int above_count = __popc(above_mask);
+          const int above_rank = __popc(above_mask & ((1u << lane) - 1));
+          int above_base;
+          if (lane == 0) {
+            above_base = atomicAdd(&decode_smem[sOUT_abs], above_count);
+          }
+          above_base = __shfl_sync(0xffffffff, above_base, 0);
+          if (is_above) {
+            output_indices[above_base + above_rank] = elem_idx;
+          }
+        }
+
+        const uint32_t equal_mask = __ballot_sync(0xffffffff, is_equal);
+        if (equal_mask) {
+          const int equal_count = __popc(equal_mask);
+          const int equal_rank = __popc(equal_mask & ((1u << lane) - 1));
+          int equal_base;
+          if (lane == 0) {
+            equal_base = atomicAdd(&decode_smem[sBUF0_abs], equal_count);
+          }
+          equal_base = __shfl_sync(0xffffffff, equal_base, 0);
+          if (is_equal && __builtin_expect(equal_base + equal_rank < DBUF, 1)) {
+            bufs[0][equal_base + equal_rank] = elem_idx;
+          }
+        }
+      }
+    }
+  }
+  __syncthreads();
+
+  int remaining_k = TopK - decode_smem[SBASE + sOUT];
+  if (remaining_k <= 0) return;
+
+  // If all buffered elements fit, output them all (common for short seqs)
+  const int raw_buf0 = decode_smem[SBASE + sBUF0];
+  if (raw_buf0 <= remaining_k) {
+    const int nb = (raw_buf0 < DBUF) ? raw_buf0 : DBUF;
+    const int base = decode_smem[SBASE + sOUT];
+    for (int i = tx; i < nb; i += kThreadsPerBlock) {
+      output_indices[base + i] = bufs[0][i];
+    }
+    __syncthreads();
+    return;
+  }
+
+  // ---- Phase 3: Deferred refinement (rare path) ----
+  int* refine[2] = {decode_smem, decode_smem + RHIST};
+  const int num_buf0 = (raw_buf0 < DBUF) ? raw_buf0 : DBUF;
+
+  for (int i = tx; i < RHIST; i += kThreadsPerBlock) {
+    refine[0][i] = 0;
+  }
+  __syncthreads();
+
+  for (int i = tx; i < num_buf0; i += kThreadsPerBlock) {
+    const uint32_t fp32 = convert_to_uint32_v2(logits[bufs[0][i]]);
+    atomicAdd(&refine[0][(fp32 >> 24) & 0xFF], 1);
+  }
+  __syncthreads();
+
+  auto compute_suffix_sum = [&]() {
+#pragma unroll 8
+    for (int i = 0; i < 8; ++i) {
+      if (tx < RADIX) {
+        const int stride = 1 << i;
+        const int s = i & 1;
+        const int d = s ^ 1;
+        int value = refine[s][tx];
+        if (tx < RADIX - stride) value += refine[s][tx + stride];
+        refine[d][tx] = value;
+      }
+      __syncthreads();
+    }
+  };
+
+#pragma unroll 4
+  for (int pass = 0; pass < 4; ++pass) {
+    const int src = pass & 1;
+    const int dst = src ^ 1;
+
+    const int raw_buf = decode_smem[SBASE + sBUF0 + src];
+    const int num_buffered = (raw_buf < DBUF) ? raw_buf : DBUF;
+
+    compute_suffix_sum();
+
+    if (tx < RADIX && refine[0][tx] > remaining_k &&
+        refine[0][tx + 1] <= remaining_k) {
+      decode_smem[SBASE + sREF] = tx;
+      decode_smem[SBASE + sBUF0 + dst] = 0;
+      decode_smem[SBASE + sFIN] = remaining_k - refine[0][tx + 1];
+    }
+    __syncthreads();
+
+    const int ref_thr = decode_smem[SBASE + sREF];
+    remaining_k -= refine[0][ref_thr + 1];
+    const int bit_offset = 24 - pass * 8;
+
+    if (remaining_k == 0) {
+      for (int i = tx; i < num_buffered; i += kThreadsPerBlock) {
+        const int idx = bufs[src][i];
+        const uint32_t fp32 = convert_to_uint32_v2(logits[idx]);
+        if (((fp32 >> bit_offset) & 0xFF) > static_cast<uint32_t>(ref_thr)) {
+          const int pos = atomicAdd(&decode_smem[SBASE + sOUT], 1);
+          output_indices[pos] = idx;
+        }
+      }
+      __syncthreads();
+      break;
+    }
+
+    __syncthreads();
+    if (tx < RADIX + 1) refine[0][tx] = 0;
+    __syncthreads();
+
+    for (int i = tx; i < num_buffered; i += kThreadsPerBlock) {
+      const int idx = bufs[src][i];
+      const float logit_val = logits[idx];
+      const uint32_t fp32 = convert_to_uint32_v2(logit_val);
+      const int bin = (fp32 >> bit_offset) & 0xFF;
+
+      if (bin > ref_thr) {
+        const int pos = atomicAdd(&decode_smem[SBASE + sOUT], 1);
+        output_indices[pos] = idx;
+      } else if (bin == ref_thr) {
+        if (pass == 3) {
+          const int slot = atomicAdd(&decode_smem[SBASE + sFIN], -1);
+          if (slot > 0) output_indices[TopK - slot] = idx;
+        } else {
+          const int bp = atomicAdd(&decode_smem[SBASE + sBUF0 + dst], 1);
+          if (__builtin_expect(bp < DBUF, 1)) {
+            bufs[dst][bp] = idx;
+            const int nbo = bit_offset - 8;
+            atomicAdd(&refine[0][(fp32 >> nbo) & 0xFF], 1);
+          }
+        }
+      }
+    }
+    __syncthreads();
+  }
+}
+
+// ============================================================================
+// Medium path: coarse FP16 histogram + 4-pass FP32 radix refinement
+// For sequences 8K < seq_len <= 64K.
+// ============================================================================
+
+// Adapted from:
+// https://github.com/sgl-project/sglang/blob/v0.5.8/sgl-kernel/csrc/elementwise/topk.cu#L87
+// by: DarkSharpness
+// which at the same time is an optimized topk kernel copied from tilelang
+// kernel
+template <int TopK>
+__device__ __noinline__ void histogram_256_topk(
+    const float* __restrict__ logits, int* __restrict__ output_indices,
+    int logits_offset, int seq_len) {
+  // All shared state lives in dynamic shared memory to avoid static
+  extern __shared__ char medium_smem[];
+
+  int (*shared_histogram)[RADIX + 128] =
+      reinterpret_cast<int (*)[RADIX + 128]>(medium_smem);
+  int* medium_scalars = reinterpret_cast<int*>(medium_smem + kMediumHistBytes);
+  int& shared_output_count = medium_scalars[0];
+  int& shared_threshold_bin = medium_scalars[1];
+  int* shared_buffered_count = &medium_scalars[2];
+  int& shared_final_k = medium_scalars[4];
+  int (*buffered_indices)[MAX_BUFFERED_ITEMS] =
+      reinterpret_cast<int (*)[MAX_BUFFERED_ITEMS]>(medium_smem +
+                                                    kMediumHeaderSize);
+
+  const int thread_id = threadIdx.x;
+  int remaining_k = TopK;
+
+  if (thread_id < RADIX + 1) {
+    shared_histogram[0][thread_id] = 0;
+  }
+  __syncthreads();
+
+  for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
+    const auto bin = convert_to_uint8(logits[idx + logits_offset]);
+    atomicAdd(&shared_histogram[0][bin], 1);
+  }
+  __syncthreads();
+
+  auto compute_cumulative_sum = [&]() {
+#pragma unroll 8
+    for (int i = 0; i < 8; ++i) {
+      if (__builtin_expect(thread_id < RADIX, 1)) {
+        const int stride = 1 << i;
+        const int src_buffer = i & 1;
+        const int dst_buffer = src_buffer ^ 1;
+        int value = shared_histogram[src_buffer][thread_id];
+        if (thread_id < RADIX - stride) {
+          value += shared_histogram[src_buffer][thread_id + stride];
+        }
+        shared_histogram[dst_buffer][thread_id] = value;
+      }
+      __syncthreads();
+    }
+  };
+
+  compute_cumulative_sum();
+
+  if (thread_id < RADIX && shared_histogram[0][thread_id] > remaining_k &&
+      shared_histogram[0][thread_id + 1] <= remaining_k) {
+    shared_threshold_bin = thread_id;
+    shared_buffered_count[0] = 0;
+    shared_output_count = 0;
+  }
+  __syncthreads();
+
+  const int threshold_bin = shared_threshold_bin;
+  remaining_k -= shared_histogram[0][threshold_bin + 1];
+
+  if (remaining_k == 0) {
+    for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
+      const int bin = convert_to_uint8(logits[idx + logits_offset]);
+      if (bin > threshold_bin) {
+        const int output_pos = atomicAdd(&shared_output_count, 1);
+        output_indices[output_pos] = idx;
+      }
+    }
+    __syncthreads();
+    return;
+  }
+
+  __syncthreads();
+  if (thread_id < RADIX + 1) {
+    shared_histogram[0][thread_id] = 0;
+  }
+  __syncthreads();
+
+  for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
+    const float logit_value = logits[idx + logits_offset];
+    const int bin = convert_to_uint8(logit_value);
+    if (bin > threshold_bin) {
+      const int output_pos = atomicAdd(&shared_output_count, 1);
+      output_indices[output_pos] = idx;
+    } else if (bin == threshold_bin) {
+      const int buffer_pos = atomicAdd(&shared_buffered_count[0], 1);
+      if (__builtin_expect(buffer_pos < MAX_BUFFERED_ITEMS, 1)) {
+        buffered_indices[0][buffer_pos] = idx;
+        const uint32_t fp32_bits = convert_to_uint32_v2(logit_value);
+        const int next_bin = (fp32_bits >> 24) & 0xFF;
+        atomicAdd(&shared_histogram[0][next_bin], 1);
+      }
+    }
+  }
+  __syncthreads();
+
+#pragma unroll 4
+  for (int pass = 0; pass < 4; ++pass) {
+    const int src_buffer = pass % 2;
+    const int dst_buffer = src_buffer ^ 1;
+    const int raw_buffered = shared_buffered_count[src_buffer];
+    const int num_buffered =
+        (raw_buffered < MAX_BUFFERED_ITEMS) ? raw_buffered : MAX_BUFFERED_ITEMS;
+
+    compute_cumulative_sum();
+
+    if (thread_id < RADIX && shared_histogram[0][thread_id] > remaining_k &&
+        shared_histogram[0][thread_id + 1] <= remaining_k) {
+      shared_threshold_bin = thread_id;
+      shared_buffered_count[dst_buffer] = 0;
+      shared_final_k = remaining_k - shared_histogram[0][thread_id + 1];
+    }
+    __syncthreads();
+
+    const int threshold_bin = shared_threshold_bin;
+    remaining_k -= shared_histogram[0][threshold_bin + 1];
+    const int bit_offset = 24 - pass * 8;
+
+    if (remaining_k == 0) {
+      for (int i = thread_id; i < num_buffered; i += kThreadsPerBlock) {
+        const int idx = buffered_indices[src_buffer][i];
+        const uint32_t fp32_bits =
+            convert_to_uint32_v2(logits[idx + logits_offset]);
+        const int bin = (fp32_bits >> bit_offset) & 0xFF;
+        if (bin > threshold_bin) {
+          const int output_pos = atomicAdd(&shared_output_count, 1);
+          output_indices[output_pos] = idx;
+        }
+      }
+      __syncthreads();
+      break;
+    }
+
+    __syncthreads();
+    if (thread_id < RADIX + 1) {
+      shared_histogram[0][thread_id] = 0;
+    }
+    __syncthreads();
+
+    for (int i = thread_id; i < num_buffered; i += kThreadsPerBlock) {
+      const int idx = buffered_indices[src_buffer][i];
+      const float logit_value = logits[idx + logits_offset];
+      const uint32_t fp32_bits = convert_to_uint32_v2(logit_value);
+      const int bin = (fp32_bits >> bit_offset) & 0xFF;
+      if (bin > threshold_bin) {
+        const int output_pos = atomicAdd(&shared_output_count, 1);
+        output_indices[output_pos] = idx;
+      } else if (bin == threshold_bin) {
+        if (pass == 3) {
+          const int slot = atomicAdd(&shared_final_k, -1);
+          if (slot > 0) {
+            output_indices[TopK - slot] = idx;
+          }
+        } else {
+          const int buffer_pos =
+              atomicAdd(&shared_buffered_count[dst_buffer], 1);
+          if (__builtin_expect(buffer_pos < MAX_BUFFERED_ITEMS, 1)) {
+            buffered_indices[dst_buffer][buffer_pos] = idx;
+            const int next_bit_offset = bit_offset - 8;
+            const int next_bin = (fp32_bits >> next_bit_offset) & 0xFF;
+            atomicAdd(&shared_histogram[0][next_bin], 1);
+          }
+        }
+      }
+    }
+    __syncthreads();
+  }
+}
+
+// ============================================================================
+// Inter-CTA sync primitives
+// ============================================================================
+
+__device__ __forceinline__ int ld_acquire(int* ptr) {
+  int state = 0;
+#if (__CUDA_ARCH__ >= 700)
+  asm volatile("ld.global.acquire.gpu.b32 %0, [%1];\n"
+               : "=r"(state)
+               : "l"(ptr));
+#else
+  asm volatile("ld.cg.global.b32 %0, [%1];\n" : "=r"(state) : "l"(ptr));
+#endif
+  return state;
+}
+
+__device__ __forceinline__ void red_release(int* ptr, int val) {
+#if (__CUDA_ARCH__ >= 700)
+  asm volatile("fence.acq_rel.gpu;\n");
+  asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
+               :
+               : "l"(ptr), "r"(val));
+#else
+  __threadfence();
+  atomicAdd(ptr, val);
+#endif
+}
+
+__device__ __forceinline__ void st_release(int* ptr, int val) {
+#if (__CUDA_ARCH__ >= 700)
+  asm volatile("fence.acq_rel.gpu;\n");
+  asm volatile("st.release.gpu.global.b32 [%0], %1;\n" : : "l"(ptr), "r"(val));
+#else
+  __threadfence();
+  atomicExch(ptr, val);
+#endif
+}
+
+__device__ __forceinline__ void wait_ge(int* ptr, int target_val,
+                                        int thread_idx) {
+  if (thread_idx == 0) {
+#pragma unroll 1
+    while (ld_acquire(ptr) < target_val) {
+    }
+  }
+  __syncthreads();
+}
+
+// ============================================================================
+// Large path: multi-CTA radix select for sequences > 64K
+//
+// Each row is processed by a group of CTAs. Each CTA loads its chunk into
+// shared memory as ordered uint32, then participates in 4 rounds of
+// coordinated radix select via global-memory histograms and barriers.
+// ============================================================================
+
+// ============================================================================
+// Multi-CTA cooperative RadixTopK for a single large row.
+// Adapted from https://github.com/flashinfer-ai/flashinfer/pull/2215
+// ============================================================================
+
+template <int TopK, uint32_t VEC_SIZE>
+__device__ void radix_topk(const float* __restrict__ row_input,
+                           int32_t* __restrict__ row_output, uint32_t seq_len,
+                           uint32_t my_chunk_start, uint32_t chunk_size,
+                           uint32_t* local_histogram, uint32_t* suffix_sum,
+                           uint32_t* shared_scalars, uint32_t* shared_ordered,
+                           RadixRowState* state, uint32_t cta_in_group,
+                           uint32_t ctas_per_group, int& barrier_phase,
+                           uint32_t iter, uint32_t tx) {
+  const uint32_t my_chunk_end = (my_chunk_start + chunk_size < seq_len)
+                                    ? my_chunk_start + chunk_size
+                                    : seq_len;
+  const uint32_t actual_chunk_size =
+      (my_chunk_start < seq_len) ? (my_chunk_end - my_chunk_start) : 0;
+
+  // -- Stage 1: Load chunk to shared memory as ordered uint32 --
+  {
+    const uint32_t aligned_size = (actual_chunk_size / VEC_SIZE) * VEC_SIZE;
+
+    for (uint32_t i = tx * VEC_SIZE; i < aligned_size;
+         i += kThreadsPerBlock * VEC_SIZE) {
+      const float* src = row_input + my_chunk_start + i;
+      if constexpr (VEC_SIZE == 4) {
+        float4 v = *reinterpret_cast<const float4*>(src);
+        shared_ordered[i] = convert_to_uint32_v2(v.x);
+        shared_ordered[i + 1] = convert_to_uint32_v2(v.y);
+        shared_ordered[i + 2] = convert_to_uint32_v2(v.z);
+        shared_ordered[i + 3] = convert_to_uint32_v2(v.w);
+      } else if constexpr (VEC_SIZE == 2) {
+        float2 v = *reinterpret_cast<const float2*>(src);
+        shared_ordered[i] = convert_to_uint32_v2(v.x);
+        shared_ordered[i + 1] = convert_to_uint32_v2(v.y);
+      } else {
+        shared_ordered[i] = convert_to_uint32_v2(*src);
+      }
+    }
+    for (uint32_t i = aligned_size + tx; i < actual_chunk_size;
+         i += kThreadsPerBlock) {
+      shared_ordered[i] = convert_to_uint32_v2(row_input[my_chunk_start + i]);
+    }
+  }
+  __syncthreads();
+
+  // -- Init radix select state --
+  if (tx == 0) {
+    shared_scalars[0] = 0;     // prefix
+    shared_scalars[1] = TopK;  // remaining_k
+  }
+  __syncthreads();
+
+  // -- Initial barrier --
+  if (tx == 0) {
+    red_release(&state->arrival_counter, 1);
+  }
+  wait_ge(&state->arrival_counter,
+          (barrier_phase + 1) * static_cast<int>(ctas_per_group), tx);
+  barrier_phase++;
+  __syncthreads();
+
+  if (cta_in_group == 0 && tx == 0) {
+    st_release(&state->output_counter, 0);
+  }
+
+  // -- Stage 2: 4 rounds of radix select --
+  for (uint32_t round = 0; round < 4; round++) {
+    const uint32_t global_round = iter * 4 + round;
+    const uint32_t shift = 24 - round * 8;
+    const uint32_t prefix = shared_scalars[0];
+    const uint32_t remaining_k = shared_scalars[1];
+
+    uint32_t* current_hist = state->histogram[global_round % 3];
+    uint32_t* next_hist = state->histogram[(global_round + 1) % 3];
+
+    for (uint32_t i = tx; i < RADIX; i += kThreadsPerBlock) {
+      local_histogram[i] = 0;
+    }
+    __syncthreads();
+
+    for (uint32_t i = tx; i < actual_chunk_size; i += kThreadsPerBlock) {
+      uint32_t ordered = shared_ordered[i];
+      uint32_t mask = (round == 0) ? 0u : (~0u << (32 - round * 8));
+      if ((ordered & mask) == prefix) {
+        uint32_t bucket = (ordered >> shift) & 0xFF;
+        atomicAdd(&local_histogram[bucket], 1);
+      }
+    }
+    __syncthreads();
+
+    for (uint32_t i = tx; i < RADIX; i += kThreadsPerBlock) {
+      if (local_histogram[i] > 0) {
+        atomicAdd(&current_hist[i], local_histogram[i]);
+      }
+    }
+
+    if (cta_in_group == 0) {
+      for (uint32_t i = tx; i < RADIX; i += kThreadsPerBlock) {
+        next_hist[i] = 0;
+      }
+    }
+
+    if (tx == 0) {
+      red_release(&state->arrival_counter, 1);
+    }
+    wait_ge(&state->arrival_counter,
+            (barrier_phase + 1) * static_cast<int>(ctas_per_group), tx);
+    barrier_phase++;
+    __syncthreads();
+
+    for (uint32_t i = tx; i < RADIX; i += kThreadsPerBlock) {
+      suffix_sum[i] = current_hist[i];
+    }
+    __syncthreads();
+
+    for (uint32_t stride = 1; stride < RADIX; stride *= 2) {
+      uint32_t val = 0;
+      if (tx < RADIX) {
+        val = suffix_sum[tx];
+        if (tx + stride < RADIX) val += suffix_sum[tx + stride];
+      }
+      __syncthreads();
+      if (tx < RADIX) suffix_sum[tx] = val;
+      __syncthreads();
+    }
+
+    if (tx == 0) {
+      shared_scalars[2] = 0;
+      shared_scalars[3] = remaining_k;
+    }
+    __syncthreads();
+
+    if (tx < RADIX) {
+      uint32_t count_ge = suffix_sum[tx];
+      uint32_t count_gt = (tx + 1 < RADIX) ? suffix_sum[tx + 1] : 0;
+      if (count_ge >= remaining_k && count_gt < remaining_k) {
+        shared_scalars[2] = tx;
+        shared_scalars[3] = remaining_k - count_gt;
+      }
+    }
+    __syncthreads();
+
+    if (tx == 0) {
+      shared_scalars[0] = prefix | (shared_scalars[2] << shift);
+      shared_scalars[1] = shared_scalars[3];
+    }
+    __syncthreads();
+  }  // end 4 radix rounds
+
+  // -- Count local > pivot elements --
+  const uint32_t ordered_pivot = shared_scalars[0];
+
+  if (tx == 0) suffix_sum[0] = 0;
+  __syncthreads();
+
+  uint32_t my_gt_count = 0;
+  for (uint32_t i = tx; i < actual_chunk_size; i += kThreadsPerBlock) {
+    if (shared_ordered[i] > ordered_pivot) my_gt_count++;
+  }
+  for (int offset = 16; offset > 0; offset /= 2) {
+    my_gt_count += __shfl_down_sync(0xffffffff, my_gt_count, offset);
+  }
+  if (tx % 32 == 0 && my_gt_count > 0) {
+    atomicAdd(&suffix_sum[0], my_gt_count);
+  }
+  __syncthreads();
+  const uint32_t local_gt_count = suffix_sum[0];
+
+  // -- Stage 3: Collect top-k indices --
+  if (tx == 0) {
+    local_histogram[0] = 0;
+    if (local_gt_count > 0) {
+      local_histogram[1] =
+          atomicAdd(&state->output_counter, static_cast<int>(local_gt_count));
+    }
+  }
+  __syncthreads();
+
+  for (uint32_t i = tx; i < actual_chunk_size; i += kThreadsPerBlock) {
+    if (shared_ordered[i] > ordered_pivot) {
+      uint32_t local_pos = atomicAdd(&local_histogram[0], 1);
+      int pos = static_cast<int>(local_histogram[1]) + local_pos;
+      row_output[pos] = static_cast<int32_t>(my_chunk_start + i);
+    }
+  }
+
+  if (tx == 0) {
+    red_release(&state->arrival_counter, 1);
+  }
+  wait_ge(&state->arrival_counter,
+          (barrier_phase + 1) * static_cast<int>(ctas_per_group), tx);
+  barrier_phase++;
+  __syncthreads();
+
+  for (uint32_t i = tx; i < actual_chunk_size; i += kThreadsPerBlock) {
+    if (shared_ordered[i] == ordered_pivot) {
+      int pos = atomicAdd(&state->output_counter, 1);
+      if (pos < TopK) {
+        row_output[pos] = static_cast<int32_t>(my_chunk_start + i);
+      }
+    }
+  }
+}
+
+// ============================================================================
+// Persistent kernel — BS≤32, decode/medium/large paths with RadixTopK
+// BS>32 uses standalone histogram_256_buffered_topk (separate kernel,
+// see filtered_topk.cuh)
+// ============================================================================
+
+template <int TopK = 2048, uint32_t VEC_SIZE = 1>
+__global__ void __launch_bounds__(kThreadsPerBlock, 2)
+    persistent_topk_kernel(PersistentTopKParams params) {
+  const uint32_t tx = threadIdx.x;
+  extern __shared__ uint8_t smem_raw[];
+
+  // ========================================================================
+  // Group mode: multi-CTA groups with static round-robin row assignment.
+  // Non-large rows: CTA-0 handles trivial/decode/medium.
+  // Large rows: all CTAs in the group cooperate via RadixTopK.
+  // ========================================================================
+  const uint32_t ctas_per_group = params.ctas_per_group;
+  const uint32_t group_id = blockIdx.x / ctas_per_group;
+  const uint32_t cta_in_group = blockIdx.x % ctas_per_group;
+  const uint32_t num_groups = gridDim.x / ctas_per_group;
+  const uint32_t chunk_size = params.chunk_size;
+
+  if (blockIdx.x >= num_groups * ctas_per_group) return;
+
+  // Early exit: non-CTA-0 threads are never needed if no large rows exist
+  if (cta_in_group != 0 && params.max_seq_len <= RADIX_THRESHOLD) return;
+
+  uint32_t* local_histogram = reinterpret_cast<uint32_t*>(smem_raw);
+  uint32_t* suffix_sum = local_histogram + RADIX;
+  uint32_t* shared_scalars = suffix_sum + RADIX;
+  uint32_t* shared_ordered =
+      reinterpret_cast<uint32_t*>(smem_raw + kFixedSmemLarge);
+
+  // RadixRowState for multi-CTA cooperative radix.
+  // Zero-initialization is done host-side via cudaMemsetAsync in topk.cu
+  // before launch — that gives a stream-ordered happens-before edge for all
+  // CTAs, which the previous in-kernel init (CTA-0 only + intra-CTA
+  // __syncthreads) did not provide and which manifested as a race against
+  // CTA-1+'s first red_release on arrival_counter.
+  RadixRowState* state = &params.row_states[group_id];
+
+  int barrier_phase = 0;
+  const uint32_t total_iters = (params.num_rows + num_groups - 1) / num_groups;
+
+  for (uint32_t iter = 0; iter < total_iters; iter++) {
+    // Static round-robin: all CTAs in the group implicitly agree on the row
+    uint32_t row_idx = group_id + iter * num_groups;
+    if (row_idx >= params.num_rows) break;
+
+    const uint32_t seq_len = params.lengths[row_idx];
+    int32_t* row_output = params.output + row_idx * params.top_k;
+    const float* row_input = params.input + row_idx * params.stride;
+
+    if (seq_len <= RADIX_THRESHOLD) {
+      if (cta_in_group == 0) {
+        if (seq_len <= static_cast<uint32_t>(TopK)) {
+          // Trivial case: seq_len <= TopK
+          for (uint32_t i = tx; i < static_cast<uint32_t>(TopK);
+               i += kThreadsPerBlock) {
+            row_output[i] = (i < seq_len) ? static_cast<int32_t>(i) : -1;
+          }
+        } else if (seq_len <= static_cast<uint32_t>(HIST2048_THRESHOLD)) {
+          histogram_2048_topk<TopK>(row_input, row_output, seq_len);
+        } else {
+          histogram_256_topk<TopK>(row_input, row_output, 0, seq_len);
+        }
+      }
+      continue;
+    }
+
+    const uint32_t my_chunk_start = cta_in_group * chunk_size;
+    radix_topk<TopK, VEC_SIZE>(
+        row_input, row_output, seq_len, my_chunk_start, chunk_size,
+        local_histogram, suffix_sum, shared_scalars, shared_ordered, state,
+        cta_in_group, ctas_per_group, barrier_phase, iter, tx);
+  }
+}
+
+}  // namespace persistent
+
+// ============================================================================
+// FlashInfer FilteredTopK (BS>32 dispatch) — float32 only.
+// Extracted from flashinfer_topk.cuh. Lives in namespace vllm (not persistent).
+// Adapted from https://github.com/flashinfer-ai/flashinfer/pull/2215
+// ============================================================================
+
+#define FLASHINFER_CUDA_CALL(func, ...) \
+  {                                     \
+    cudaError_t e = (func);             \
+    if (e != cudaSuccess) {             \
+      return e;                         \
+    }                                   \
+  }
+
+#define FLASHINFER_INLINE inline __attribute__((always_inline)) __device__
+
+template <typename T, size_t N>
+struct vec_t {
+  T data[N];
+
+  FLASHINFER_INLINE T& operator[](size_t i) { return data[i]; }
+  FLASHINFER_INLINE const T& operator[](size_t i) const { return data[i]; }
+
+  FLASHINFER_INLINE void cast_load(const T* ptr) {
+#pragma unroll
+    for (size_t i = 0; i < N; ++i) {
+      data[i] = ptr[i];
+    }
+  }
+
+  FLASHINFER_INLINE void cast_store(T* ptr) const {
+#pragma unroll
+    for (size_t i = 0; i < N; ++i) {
+      ptr[i] = data[i];
+    }
+  }
+};
+#undef FLASHINFER_INLINE
+
+// FilteredTopK traits for different data types
+template <typename DType>
+struct FilteredTopKTraits;
+
+// Specialization for float (32-bit): coarse histogram uses FP16 high 8 bits, 4
+// refinement rounds
+template <>
+struct FilteredTopKTraits<float> {
+  using OrderedType = uint32_t;
+  static constexpr int NUM_REFINE_ROUNDS = 4;
+  static constexpr int FIRST_REFINE_SHIFT = 24;
+
+  __device__ __forceinline__ static uint8_t ToCoarseKey(float x) {
+    // Convert to FP16 representation and extract high 8 bits
+    __half h = __float2half_rn(x);
+    uint16_t bits = __half_as_ushort(h);
+    uint16_t key = (bits & 0x8000) ? static_cast<uint16_t>(~bits)
+                                   : static_cast<uint16_t>(bits | 0x8000);
+    return static_cast<uint8_t>(key >> 8);
+  }
+
+  __device__ __forceinline__ static OrderedType ToOrdered(float x) {
+    uint32_t bits = __float_as_uint(x);
+    return (bits & 0x80000000u) ? ~bits : (bits | 0x80000000u);
+  }
+};
+
+constexpr uint32_t FILTERED_TOPK_BLOCK_THREADS = 1024;
+constexpr uint32_t FILTERED_TOPK_SMEM_INPUT_SIZE =
+    16 * 1024;  // 16K indices per buffer
+constexpr size_t FILTERED_TOPK_SMEM_DYNAMIC =
+    sizeof(int) * 2 * FILTERED_TOPK_SMEM_INPUT_SIZE;  // 128KB
+
+/*!
+ * \brief Filtered Top-K kernel for ragged sequences.
+ *
+ * \tparam DType Data type (float, half, nv_bfloat16)
+ * \tparam IdType Index type (int32_t)
+ * \tparam VEC_SIZE Vector size for input loads (1, 2, 4, or 8)
+ */
+template <typename DType, typename IdType, int VEC_SIZE, uint32_t MAX_K = 2048>
+__global__ void __launch_bounds__(FILTERED_TOPK_BLOCK_THREADS)
+    FilteredTopKUnifiedKernel(const DType* __restrict__ input,
+                              IdType* __restrict__ output,
+                              const IdType* __restrict__ lengths,
+                              uint32_t num_rows, uint32_t top_k,
+                              uint32_t max_len) {
+  constexpr uint32_t BLOCK_SIZE = FILTERED_TOPK_BLOCK_THREADS;
+  constexpr int RADIX = 256;
+  constexpr int SMEM_INPUT_SIZE = FILTERED_TOPK_SMEM_INPUT_SIZE;
+
+  const uint32_t bid = blockIdx.x;
+  const int tx = threadIdx.x;
+
+  if (bid >= num_rows) return;
+
+  const int length =
+      (lengths != nullptr) ? lengths[bid] : static_cast<int>(max_len);
+  const DType* score = input + bid * max_len;
+  IdType* dst = output + bid * top_k;
+
+  // Trivial case: length <= top_k
+  if (length <= static_cast<int>(top_k)) {
+    for (int i = tx; i < static_cast<int>(top_k); i += BLOCK_SIZE) {
+      dst[i] = (i < length) ? static_cast<IdType>(i) : static_cast<IdType>(-1);
+    }
+    return;
+  }
+
+  // Static shared memory
+  alignas(128) __shared__ int s_histogram_buf[2][RADIX + 128];
+  alignas(128) __shared__ int s_counter;
+  alignas(128) __shared__ int s_threshold_bin_id;
+  alignas(128) __shared__ int s_num_input[2];
+  alignas(128) __shared__ int s_indices[MAX_K];
+
+  auto& s_histogram = s_histogram_buf[0];
+
+  // Dynamic shared memory for input double buffer
+  extern __shared__ int s_input_idx[][SMEM_INPUT_SIZE];
+
+  using Traits = FilteredTopKTraits<DType>;
+  int topk = top_k;
+
+  // Stage 1: 8-bit coarse histogram with vectorized loads
+  if (tx < RADIX + 1) s_histogram[tx] = 0;
+  __syncthreads();
+
+  vec_t<DType, VEC_SIZE> score_vec;
+
+  const int aligned_length = (length / VEC_SIZE) * VEC_SIZE;
+#pragma unroll 2
+  for (int base = tx * VEC_SIZE; base < aligned_length;
+       base += BLOCK_SIZE * VEC_SIZE) {
+    score_vec.cast_load(&score[base]);
+#pragma unroll
+    for (int j = 0; j < VEC_SIZE; ++j) {
+      const auto bin = Traits::ToCoarseKey(score_vec[j]);
+      atomicAdd(&s_histogram[bin], 1);
+    }
+  }
+  // Handle tail
+  for (int i = aligned_length + tx; i < length; i += BLOCK_SIZE) {
+    const auto bin = Traits::ToCoarseKey(score[i]);
+    atomicAdd(&s_histogram[bin], 1);
+  }
+  __syncthreads();
+
+  // Suffix sum
+  const auto run_cumsum = [&]() {
+#pragma unroll 8
+    for (int i = 0; i < 8; ++i) {
+      if (tx < RADIX) {
+        const auto j = 1 << i;
+        const auto k = i & 1;
+        auto value = s_histogram_buf[k][tx];
+        if (tx < RADIX - j) {
+          value += s_histogram_buf[k][tx + j];
+        }
+        s_histogram_buf[k ^ 1][tx] = value;
+      }
+      __syncthreads();
+    }
+  };
+
+  run_cumsum();
+  if (tx < RADIX && s_histogram[tx] > topk && s_histogram[tx + 1] <= topk) {
+    s_threshold_bin_id = tx;
+    s_num_input[0] = 0;
+    s_counter = 0;
+  }
+  __syncthreads();
+
+  const auto threshold_bin = s_threshold_bin_id;
+  topk -= s_histogram[threshold_bin + 1];
+
+  constexpr int NUM_ROUNDS = Traits::NUM_REFINE_ROUNDS;
+  constexpr int FIRST_SHIFT = Traits::FIRST_REFINE_SHIFT;
+
+  if (topk == 0) {
+    // Collect indices where bin > threshold
+#pragma unroll 2
+    for (int base = tx * VEC_SIZE; base < aligned_length;
+         base += BLOCK_SIZE * VEC_SIZE) {
+      score_vec.cast_load(&score[base]);
+#pragma unroll
+      for (int j = 0; j < VEC_SIZE; ++j) {
+        const auto bin = static_cast<int>(Traits::ToCoarseKey(score_vec[j]));
+        if (bin > threshold_bin) {
+          const auto pos = atomicAdd(&s_counter, 1);
+          s_indices[pos] = base + j;
+        }
+      }
+    }
+    // Handle tail
+    for (int i = aligned_length + tx; i < length; i += BLOCK_SIZE) {
+      const auto bin = static_cast<int>(Traits::ToCoarseKey(score[i]));
+      if (bin > threshold_bin) {
+        const auto pos = atomicAdd(&s_counter, 1);
+        s_indices[pos] = i;
+      }
+    }
+    __syncthreads();
+  } else {
+    __syncthreads();
+    if (tx < RADIX + 1) s_histogram[tx] = 0;
+    __syncthreads();
+
+    // Filter + histogram for refinement
+    auto filter_and_add_to_histogram = [&](auto raw_input, int index) {
+      const auto bin = static_cast<int>(Traits::ToCoarseKey(raw_input));
+      if (bin > threshold_bin) {
+        const auto pos = atomicAdd(&s_counter, 1);
+        s_indices[pos] = index;
+      } else if (bin == threshold_bin) {
+        const auto pos = atomicAdd(&s_num_input[0], 1);
+        if (__builtin_expect(pos < SMEM_INPUT_SIZE, 1)) {
+          s_input_idx[0][pos] = index;
+          const auto ordered = Traits::ToOrdered(raw_input);
+          const auto sub_bin = (ordered >> FIRST_SHIFT) & 0xFF;
+          atomicAdd(&s_histogram[sub_bin], 1);
+        }
+      }
+    };
+#pragma unroll 2
+    for (int base = tx * VEC_SIZE; base < aligned_length;
+         base += BLOCK_SIZE * VEC_SIZE) {
+      score_vec.cast_load(&score[base]);
+#pragma unroll
+      for (int j = 0; j < VEC_SIZE; ++j) {
+        filter_and_add_to_histogram(score_vec[j], base + j);
+      }
+    }
+    // Handle tail
+    for (int i = aligned_length + tx; i < length; i += BLOCK_SIZE) {
+      filter_and_add_to_histogram(score[i], i);
+    }
+    __syncthreads();
+
+    // Stage 2: refine with 8bit radix passes
+#pragma unroll
+    for (int round = 0; round < NUM_ROUNDS; ++round) {
+      __shared__ int s_last_remain;
+      const auto r_idx = round % 2;
+
+      const auto _raw_num_input = s_num_input[r_idx];
+      const auto num_input =
+          (_raw_num_input < SMEM_INPUT_SIZE) ? _raw_num_input : SMEM_INPUT_SIZE;
+
+      run_cumsum();
+      if (tx < RADIX && s_histogram[tx] > topk && s_histogram[tx + 1] <= topk) {
+        s_threshold_bin_id = tx;
+        s_num_input[r_idx ^ 1] = 0;
+        s_last_remain = topk - s_histogram[tx + 1];
+      }
+      __syncthreads();
+
+      const auto threshold = s_threshold_bin_id;
+      topk -= s_histogram[threshold + 1];
+
+      const int offset = FIRST_SHIFT - round * 8;
+      const bool is_last_round = (round == NUM_ROUNDS - 1);
+
+      if (topk == 0) {
+        for (int i = tx; i < num_input; i += BLOCK_SIZE) {
+          const auto idx = s_input_idx[r_idx][i];
+          const auto bin = (Traits::ToOrdered(score[idx]) >> offset) & 0xFF;
+          if (static_cast<int>(bin) > threshold) {
+            const auto pos = atomicAdd(&s_counter, 1);
+            s_indices[pos] = idx;
+          }
+        }
+        __syncthreads();
+        break;
+      } else {
+        __syncthreads();
+        if (tx < RADIX + 1) s_histogram[tx] = 0;
+        __syncthreads();
+        for (int i = tx; i < num_input; i += BLOCK_SIZE) {
+          const auto idx = s_input_idx[r_idx][i];
+          const auto raw_input = score[idx];
+          const auto bin = (Traits::ToOrdered(raw_input) >> offset) & 0xFF;
+          if (static_cast<int>(bin) > threshold) {
+            const auto pos = atomicAdd(&s_counter, 1);
+            s_indices[pos] = idx;
+          } else if (static_cast<int>(bin) == threshold) {
+            if (is_last_round) {
+              const auto pos = atomicAdd(&s_last_remain, -1);
+              if (pos > 0) {
+                s_indices[top_k - pos] = idx;
+              }
+            } else {
+              const auto pos = atomicAdd(&s_num_input[r_idx ^ 1], 1);
+              if (__builtin_expect(pos < SMEM_INPUT_SIZE, 1)) {
+                s_input_idx[r_idx ^ 1][pos] = idx;
+                const auto bin32 = Traits::ToOrdered(raw_input);
+                const auto sub_bin = (bin32 >> (offset - 8)) & 0xFF;
+                atomicAdd(&s_histogram[sub_bin], 1);
+              }
+            }
+          }
+        }
+        __syncthreads();
+      }
+    }
+  }
+
+  // Output phase - mode-specific
+#pragma unroll 2
+  for (int base = tx; base < static_cast<int>(top_k); base += BLOCK_SIZE) {
+    const int idx = s_indices[base];
+    dst[base] = static_cast<IdType>(idx);
+  }
+}
+
+// Helper to compute GCD for VEC_SIZE selection
+constexpr uint32_t gcd(uint32_t a, uint32_t b) {
+  while (b != 0) {
+    uint32_t t = b;
+    b = a % b;
+    a = t;
+  }
+  return a;
+}
+
+// Compute optimal VEC_SIZE based on max_len and dtype
+// Returns 1, 2, 4, or 8
+template <typename DType>
+constexpr int ComputeFilteredTopKVecSize(uint32_t max_len) {
+  constexpr int MAX_VEC = 16 / sizeof(DType);  // 4 for float32, 8 for fp16/bf16
+  // Use GCD to find largest power-of-2 divisor
+  const uint32_t g = gcd(max_len, static_cast<uint32_t>(MAX_VEC));
+  return static_cast<int>(g);
+}
+
+template <typename DType, typename IdType, uint32_t MAX_K = 2048>
+cudaError_t FilteredTopKRaggedTransform(DType* input, IdType* output_indices,
+                                        IdType* lengths, uint32_t num_rows,
+                                        uint32_t top_k_val, uint32_t max_len,
+                                        cudaStream_t stream = 0) {
+  constexpr size_t smem_size = FILTERED_TOPK_SMEM_DYNAMIC;
+  constexpr int MAX_VEC = 16 / sizeof(DType);
+
+  dim3 grid(num_rows);
+  dim3 block(FILTERED_TOPK_BLOCK_THREADS);
+  void* args[] = {&input,    &output_indices, &lengths,
+                  &num_rows, &top_k_val,      &max_len};
+
+  const int vec_size = ComputeFilteredTopKVecSize<DType>(max_len);
+
+#define DISPATCH_VEC_SIZE(VS)                                               \
+  if (vec_size == VS) {                                                     \
+    auto kernel = FilteredTopKUnifiedKernel<DType, IdType, VS, MAX_K>;      \
+    FLASHINFER_CUDA_CALL(cudaFuncSetAttribute(                              \
+        kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size));   \
+    FLASHINFER_CUDA_CALL(cudaLaunchKernel((void*)kernel, grid, block, args, \
+                                          smem_size, stream));              \
+    return cudaSuccess;                                                     \
+  }
+
+  DISPATCH_VEC_SIZE(1)
+  DISPATCH_VEC_SIZE(2)
+  DISPATCH_VEC_SIZE(4)
+  if constexpr (MAX_VEC >= 8) {
+    DISPATCH_VEC_SIZE(8)
+  }
+#undef DISPATCH_VEC_SIZE
+
+  return cudaSuccess;
+}
+
+}  // namespace vllm
+
+#endif  // PERSISTENT_TOPK_CUH_
diff --git a/csrc/pos_encoding_kernels.cu b/csrc/pos_encoding_kernels.cu
deleted file mode 100644
index b5645b33b907..000000000000
--- a/csrc/pos_encoding_kernels.cu
+++ /dev/null
@@ -1,184 +0,0 @@
-#include <torch/all.h>
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
-#include "cuda_compat.h"
-#include "dispatch_utils.h"
-
-namespace vllm {
-
-template <typename scalar_t, bool IS_NEOX>
-inline __device__ void apply_token_rotary_embedding(
-    scalar_t* __restrict__ arr, const scalar_t* __restrict__ cos_ptr,
-    const scalar_t* __restrict__ sin_ptr, int rot_offset, int embed_dim) {
-  int x_index, y_index;
-  scalar_t cos, sin;
-  if (IS_NEOX) {
-    // GPT-NeoX style rotary embedding.
-    x_index = rot_offset;
-    y_index = embed_dim + rot_offset;
-    cos = VLLM_LDG(cos_ptr + x_index);
-    sin = VLLM_LDG(sin_ptr + x_index);
-  } else {
-    // GPT-J style rotary embedding.
-    x_index = 2 * rot_offset;
-    y_index = 2 * rot_offset + 1;
-    cos = VLLM_LDG(cos_ptr + x_index / 2);
-    sin = VLLM_LDG(sin_ptr + x_index / 2);
-  }
-
-  const scalar_t x = arr[x_index];
-  const scalar_t y = arr[y_index];
-  arr[x_index] = x * cos - y * sin;
-  arr[y_index] = y * cos + x * sin;
-}
-
-template <typename scalar_t, bool IS_NEOX>
-inline __device__ void apply_rotary_embedding(
-    scalar_t* __restrict__ query,  // [batch_size, seq_len, num_heads,
-                                   // head_size] or [num_tokens, num_heads,
-                                   // head_size]
-    scalar_t* __restrict__ key,    // nullptr or
-                                   // [batch_size, seq_len, num_kv_heads,
-                                   // head_size] or [num_tokens, num_kv_heads,
-                                   // head_size]
-    const scalar_t* cache_ptr, const int head_size, const int num_heads,
-    const int num_kv_heads, const int rot_dim, const int token_idx,
-    const int64_t query_stride, const int64_t key_stride,
-    const int64_t head_stride) {
-  const int embed_dim = rot_dim / 2;
-  const scalar_t* cos_ptr = cache_ptr;
-  const scalar_t* sin_ptr = cache_ptr + embed_dim;
-
-  const int nq = num_heads * embed_dim;
-  for (int i = threadIdx.x; i < nq; i += blockDim.x) {
-    const int head_idx = i / embed_dim;
-    const int64_t token_head =
-        token_idx * query_stride + head_idx * head_stride;
-    const int rot_offset = i % embed_dim;
-    apply_token_rotary_embedding<scalar_t, IS_NEOX>(
-        query + token_head, cos_ptr, sin_ptr, rot_offset, embed_dim);
-  }
-
-  if (key != nullptr) {
-    const int nk = num_kv_heads * embed_dim;
-    for (int i = threadIdx.x; i < nk; i += blockDim.x) {
-      const int head_idx = i / embed_dim;
-      const int64_t token_head =
-          token_idx * key_stride + head_idx * head_stride;
-      const int rot_offset = i % embed_dim;
-      apply_token_rotary_embedding<scalar_t, IS_NEOX>(
-          key + token_head, cos_ptr, sin_ptr, rot_offset, embed_dim);
-    }
-  }
-}
-
-template <typename scalar_t, bool IS_NEOX>
-__global__ void rotary_embedding_kernel(
-    const int64_t* __restrict__ positions,  // [batch_size, seq_len] or
-                                            // [num_tokens]
-    scalar_t* __restrict__ query,           // [batch_size, seq_len, num_heads,
-                                   // head_size] or [num_tokens, num_heads,
-                                   // head_size]
-    scalar_t* __restrict__ key,  // nullptr or
-                                 // [batch_size, seq_len, num_kv_heads,
-                                 // head_size] or [num_tokens, num_kv_heads,
-                                 // head_size]
-    const scalar_t* __restrict__ cos_sin_cache,  // [max_position, 2, rot_dim //
-                                                 // 2]
-    const int rot_dim, const int64_t query_stride, const int64_t key_stride,
-    const int64_t head_stride, const int num_heads, const int num_kv_heads,
-    const int head_size) {
-  // Each thread block is responsible for one token.
-  const int token_idx = blockIdx.x;
-  int64_t pos = positions[token_idx];
-  const scalar_t* cache_ptr = cos_sin_cache + pos * rot_dim;
-
-  apply_rotary_embedding<scalar_t, IS_NEOX>(
-      query, key, cache_ptr, head_size, num_heads, num_kv_heads, rot_dim,
-      token_idx, query_stride, key_stride, head_stride);
-}
-
-}  // namespace vllm
-
-void rotary_embedding(
-    torch::Tensor& positions,  // [batch_size, seq_len] or [num_tokens]
-    torch::Tensor& query,  // [batch_size, seq_len, num_heads * head_size] or
-                           // [num_tokens, num_heads * head_size] or
-                           // [batch_size, seq_len, num_heads, head_size] or
-                           // [num_tokens, num_heads, head_size]
-    std::optional<torch::Tensor> key,
-    // null or
-    // [batch_size, seq_len, num_kv_heads * head_size] or
-    // [num_tokens, num_kv_heads * head_size] or
-    // [batch_size, seq_len, num_heads, head_size] or
-    // [num_tokens, num_heads, head_size]
-    int64_t head_size,
-    torch::Tensor& cos_sin_cache,  // [max_position, rot_dim]
-    bool is_neox) {
-  // num_tokens = batch_size * seq_len
-  int64_t num_tokens = positions.numel();
-  int positions_ndim = positions.dim();
-
-  // Make sure num_tokens dim is consistent across positions, query, and key
-  TORCH_CHECK(
-      positions_ndim == 1 || positions_ndim == 2,
-      "positions must have shape [num_tokens] or [batch_size, seq_len]");
-  if (positions_ndim == 1) {
-    TORCH_CHECK(query.size(0) == positions.size(0) &&
-                    (!key.has_value() || key->size(0) == positions.size(0)),
-                "query, key and positions must have the same number of tokens");
-  }
-  if (positions_ndim == 2) {
-    TORCH_CHECK(
-        query.size(0) == positions.size(0) &&
-            (!key.has_value() || key->size(0) == positions.size(0)) &&
-            query.size(1) == positions.size(1) &&
-            (!key.has_value() || key->size(1) == positions.size(1)),
-        "query, key and positions must have the same batch_size and seq_len");
-  }
-
-  // Make sure head_size is valid for query and key
-  // hidden_size = num_heads * head_size
-  int query_hidden_size = query.numel() / num_tokens;
-  int key_hidden_size = key.has_value() ? key->numel() / num_tokens : 0;
-  TORCH_CHECK(query_hidden_size % head_size == 0);
-  TORCH_CHECK(key_hidden_size % head_size == 0);
-
-  // Make sure query and key have consistent number of heads
-  int num_heads = query_hidden_size / head_size;
-  int num_kv_heads = key.has_value() ? key_hidden_size / head_size : num_heads;
-  TORCH_CHECK(num_heads % num_kv_heads == 0);
-
-  int rot_dim = cos_sin_cache.size(1);
-  int seq_dim_idx = positions_ndim - 1;
-  int64_t query_stride = query.stride(seq_dim_idx);
-  int64_t key_stride = key.has_value() ? key->stride(seq_dim_idx) : 0;
-  // Determine head stride: for [*, heads, head_size] use stride of last dim;
-  // for flat [*, heads*head_size], heads blocks are contiguous of size
-  // head_size
-  int query_ndim = query.dim();
-  int64_t head_stride =
-      (query_ndim == positions_ndim + 2) ? query.stride(-2) : head_size;
-
-  dim3 grid(num_tokens);
-  dim3 block(std::min<int64_t>(num_heads * rot_dim / 2, 512));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(query));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(query.scalar_type(), "rotary_embedding", [&] {
-    if (is_neox) {
-      vllm::rotary_embedding_kernel<scalar_t, true><<<grid, block, 0, stream>>>(
-          positions.data_ptr<int64_t>(), query.data_ptr<scalar_t>(),
-          key.has_value() ? key->data_ptr<scalar_t>() : nullptr,
-          cos_sin_cache.data_ptr<scalar_t>(), rot_dim, query_stride, key_stride,
-          head_stride, num_heads, num_kv_heads, head_size);
-    } else {
-      vllm::rotary_embedding_kernel<scalar_t, false>
-          <<<grid, block, 0, stream>>>(
-              positions.data_ptr<int64_t>(), query.data_ptr<scalar_t>(),
-              key.has_value() ? key->data_ptr<scalar_t>() : nullptr,
-              cos_sin_cache.data_ptr<scalar_t>(), rot_dim, query_stride,
-              key_stride, head_stride, num_heads, num_kv_heads, head_size);
-    }
-  });
-}
diff --git a/csrc/quantization/fp4/nvfp4_quant_entry.cu b/csrc/quantization/fp4/nvfp4_quant_entry.cu
deleted file mode 100644
index 8b5a1fd22cb7..000000000000
--- a/csrc/quantization/fp4/nvfp4_quant_entry.cu
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <torch/all.h>
-
-#include "nvfp4_utils.cuh"
-
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-void scaled_fp4_quant_sm1xxa(torch::Tensor const& output,
-                             torch::Tensor const& input,
-                             torch::Tensor const& output_sf,
-                             torch::Tensor const& input_sf,
-                             bool is_sf_swizzled_layout);
-#endif
-
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-void scaled_fp4_experts_quant_sm1xxa(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts);
-#endif
-
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output,
-                                     torch::Tensor& output_sf,
-                                     torch::Tensor& input,
-                                     torch::Tensor& input_sf);
-#endif
-
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-void silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts);
-#endif
-
-void scaled_fp4_quant_out(torch::Tensor const& input,
-                          torch::Tensor const& input_sf,
-                          bool is_sf_swizzled_layout, torch::Tensor& output,
-                          torch::Tensor& output_sf) {
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-  return scaled_fp4_quant_sm1xxa(output, input, output_sf, input_sf,
-                                 is_sf_swizzled_layout);
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled nvfp4 quantization kernel");
-}
-
-std::tuple<torch::Tensor, torch::Tensor> scaled_fp4_quant_func(
-    torch::Tensor const& input, torch::Tensor const& input_sf,
-    bool is_sf_swizzled_layout) {
-  int64_t n = input.size(-1);
-  int64_t m = input.numel() / n;
-  auto device = input.device();
-
-  // Two fp4 values packed into a uint8
-  auto output = torch::empty(
-      {m, n / 2}, torch::TensorOptions().device(device).dtype(torch::kUInt8));
-
-  torch::Tensor output_sf;
-  if (is_sf_swizzled_layout) {
-    auto [sf_m, sf_n] = vllm::computeSwizzledSFShape(m, n);
-    output_sf = torch::empty(
-        {sf_m, sf_n},
-        torch::TensorOptions().device(device).dtype(torch::kInt32));
-  } else {
-    output_sf = torch::empty(
-        {m, n / CVT_FP4_SF_VEC_SIZE},
-        torch::TensorOptions().device(device).dtype(torch::kUInt8));
-  }
-
-  scaled_fp4_quant_out(input, input_sf, is_sf_swizzled_layout, output,
-                       output_sf);
-  return {output, output_sf};
-}
-
-void scaled_fp4_experts_quant(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts) {
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-  return scaled_fp4_experts_quant_sm1xxa(
-      output, output_scale, input, input_global_scale, input_offset_by_experts,
-      output_scale_offset_by_experts);
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(false,
-                              "No compiled nvfp4 experts quantization kernel");
-}
-
-void silu_and_mul_nvfp4_quant(torch::Tensor& output, torch::Tensor& output_sf,
-                              torch::Tensor& input, torch::Tensor& input_sf) {
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-  return silu_and_mul_nvfp4_quant_sm1xxa(output, output_sf, input, input_sf);
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false, "No compiled silu_and_mul nvfp4 quantization kernel");
-}
-
-void silu_and_mul_scaled_fp4_experts_quant(
-    torch::Tensor& output, torch::Tensor& output_scale,
-    torch::Tensor const& input, torch::Tensor const& input_global_scale,
-    torch::Tensor const& input_offset_by_experts,
-    torch::Tensor const& output_scale_offset_by_experts) {
-#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
-    (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
-  return silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
-      output, output_scale, input, input_global_scale, input_offset_by_experts,
-      output_scale_offset_by_experts);
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false, "No compiled silu_and_mul nvfp4 experts quantization kernel");
-}
diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
deleted file mode 100644
index d9c4d24d8e1f..000000000000
--- a/csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <torch/all.h>
-#include <c10/cuda/CUDAGuard.h>
-#include "cutlass_extensions/common.hpp"
-
-#if defined ENABLE_NVFP4_SM100 && ENABLE_NVFP4_SM100
-void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
-                                  torch::Tensor const& B,
-                                  torch::Tensor const& A_sf,
-                                  torch::Tensor const& B_sf,
-                                  torch::Tensor const& alpha);
-#endif
-
-#if defined ENABLE_NVFP4_SM120 && ENABLE_NVFP4_SM120
-void cutlass_scaled_fp4_mm_sm120a(torch::Tensor& D, torch::Tensor const& A,
-                                  torch::Tensor const& B,
-                                  torch::Tensor const& A_sf,
-                                  torch::Tensor const& B_sf,
-                                  torch::Tensor const& alpha);
-#endif
-
-void cutlass_scaled_fp4_mm(torch::Tensor& D, const torch::Tensor& A,
-                           const torch::Tensor& B, const torch::Tensor& A_sf,
-                           const torch::Tensor& B_sf,
-                           const torch::Tensor& alpha) {
-  // Make sure we’re on A’s device.
-  const c10::cuda::OptionalCUDAGuard device_guard(device_of(A));
-  const int32_t sm = get_sm_version_num();
-
-#if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
-  if (sm >= 100 && sm < 120) {
-    cutlass_scaled_fp4_mm_sm100a(D, A, B, A_sf, B_sf, alpha);
-    return;
-  }
-#endif
-
-#if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
-  if (sm >= 120 && sm < 130) {
-    cutlass_scaled_fp4_mm_sm120a(D, A, B, A_sf, B_sf, alpha);
-    return;
-  }
-#endif
-
-  TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled nvfp4 mm kernel for SM ", sm,
-                              ". Recompile with CUDA >= 12.8 and CC >= 100.");
-}
-
-bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability) {
-  int runtimeVersion;
-  cudaRuntimeGetVersion(&runtimeVersion);
-  return cuda_device_capability >= 100 && runtimeVersion >= 12080;
-}
diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
deleted file mode 100644
index 89de23b76e65..000000000000
--- a/csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <torch/all.h>
-
-#include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAGuard.h>
-
-#include "cutlass_extensions/common.hpp"
-
-#include "cutlass/cutlass.h"
-
-#include "cutlass/gemm/collective/collective_builder.hpp"
-#include "cutlass/epilogue/collective/collective_builder.hpp"
-#include "cutlass/gemm/device/gemm_universal_adapter.h"
-#include "cutlass/gemm/kernel/gemm_universal.hpp"
-
-#include "cutlass/util/packed_stride.hpp"
-
-#include "core/math.hpp"
-
-using namespace cute;
-
-#define CHECK_TYPE(x, st, m) \
-  TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)
-#define CHECK_TH_CUDA(x, m) \
-  TORCH_CHECK(x.is_cuda(), m, ": must be a CUDA tensor")
-#define CHECK_CONTIGUOUS(x, m) \
-  TORCH_CHECK(x.is_contiguous(), m, ": must be contiguous")
-#define CHECK_INPUT(x, st, m) \
-  CHECK_TH_CUDA(x, m);        \
-  CHECK_CONTIGUOUS(x, m);     \
-  CHECK_TYPE(x, st, m)
-
-constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
-constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
-
-struct sm120_fp4_config_M256 {
-  using ClusterShape = Shape<_1, _1, _1>;
-  using MmaTileShape = Shape<_128, _128, _128>;
-  using PerSmTileShape_MNK = Shape<_128, _128, _128>;
-};
-
-struct sm120_fp4_config_default {
-  using ClusterShape = Shape<_1, _1, _1>;
-  using MmaTileShape = Shape<_256, _128, _128>;
-  using PerSmTileShape_MNK = Shape<_256, _128, _128>;
-};
-
-template <typename Config, typename OutType>
-struct Fp4GemmSm120 {
-  using ElementA = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
-  using LayoutATag = cutlass::layout::RowMajor;
-  static constexpr int AlignmentA = 32;
-
-  using ElementB = cutlass::nv_float4_t<cutlass::float_e2m1_t>;
-  using LayoutBTag = cutlass::layout::ColumnMajor;
-  static constexpr int AlignmentB = 32;
-
-  using ElementD = OutType;
-  using ElementC = OutType;
-  using LayoutCTag = cutlass::layout::RowMajor;
-  using LayoutDTag = cutlass::layout::RowMajor;
-  static constexpr int AlignmentD = 128 / cutlass::sizeof_bits<ElementD>::value;
-  static constexpr int AlignmentC = 128 / cutlass::sizeof_bits<ElementC>::value;
-
-  using ElementAccumulator = float;
-  using ArchTag = cutlass::arch::Sm120;
-  using OperatorClass = cutlass::arch::OpClassBlockScaledTensorOp;
-
-  using MmaTileShape = typename Config::MmaTileShape;
-  using ClusterShape = typename Config::ClusterShape;
-  using PerSmTileShape_MNK = typename Config::PerSmTileShape_MNK;
-
-  using CollectiveEpilogue =
-      typename cutlass::epilogue::collective::CollectiveBuilder<
-          ArchTag, OperatorClass, PerSmTileShape_MNK, ClusterShape,
-          cutlass::epilogue::collective::EpilogueTileAuto, ElementAccumulator,
-          ElementAccumulator, ElementC, LayoutCTag, AlignmentC, ElementD,
-          LayoutDTag, AlignmentD,
-          cutlass::epilogue::collective::EpilogueScheduleAuto>::CollectiveOp;
-
-  using CollectiveMainloop =
-      typename cutlass::gemm::collective::CollectiveBuilder<
-          ArchTag, OperatorClass, ElementA, LayoutATag, AlignmentA, ElementB,
-          LayoutBTag, AlignmentB, ElementAccumulator, MmaTileShape,
-          ClusterShape,
-          cutlass::gemm::collective::StageCountAutoCarveout<static_cast<int>(
-              sizeof(typename CollectiveEpilogue::SharedStorage))>,
-          cutlass::gemm::collective::KernelScheduleAuto>::CollectiveOp;
-
-  using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
-      Shape<int, int, int, int>, CollectiveMainloop, CollectiveEpilogue, void>;
-
-  using Gemm = cutlass::gemm::device::GemmUniversalAdapter<GemmKernel>;
-};
-
-template <typename Gemm>
-typename Gemm::Arguments args_from_options(at::Tensor& D, at::Tensor const& A,
-                                           at::Tensor const& B,
-                                           at::Tensor const& A_sf,
-                                           at::Tensor const& B_sf,
-                                           torch::Tensor const& alpha, int M,
-                                           int N, int K) {
-  using ElementA = typename Gemm::ElementA;
-  using ElementB = typename Gemm::ElementB;
-  using ElementD = typename Gemm::ElementD;
-  using ElementSFA = cutlass::float_ue4m3_t;
-  using ElementSFB = cutlass::float_ue4m3_t;
-  using ElementCompute = float;
-
-  using StrideA = typename Gemm::GemmKernel::StrideA;
-  using StrideB = typename Gemm::GemmKernel::StrideB;
-  using StrideC = typename Gemm::GemmKernel::StrideC;
-  using StrideD = typename Gemm::GemmKernel::StrideD;
-
-  using Sm1xxBlkScaledConfig =
-      typename Gemm::GemmKernel::CollectiveMainloop::Sm1xxBlkScaledConfig;
-
-  auto stride_A = cutlass::make_cute_packed_stride(StrideA{}, {M, K, 1});
-  auto stride_B = cutlass::make_cute_packed_stride(StrideB{}, {N, K, 1});
-  auto stride_D = cutlass::make_cute_packed_stride(StrideD{}, {M, N, 1});
-
-  auto layout_SFA = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFA(
-      cute::make_shape(M, N, K, 1));
-  auto layout_SFB = Sm1xxBlkScaledConfig::tile_atom_to_shape_SFB(
-      cute::make_shape(M, N, K, 1));
-
-  typename Gemm::Arguments arguments{
-      cutlass::gemm::GemmUniversalMode::kGemm,
-      {M, N, K, 1},
-      {static_cast<ElementA const*>(A.data_ptr()), stride_A,
-       static_cast<ElementB const*>(B.data_ptr()), stride_B,
-       static_cast<ElementSFA const*>(A_sf.data_ptr()), layout_SFA,
-       static_cast<ElementSFB const*>(B_sf.data_ptr()), layout_SFB},
-      {{},
-       static_cast<ElementD const*>(D.data_ptr()),
-       stride_D,
-       static_cast<ElementD*>(D.data_ptr()),
-       stride_D}};
-  auto& fusion_args = arguments.epilogue.thread;
-  fusion_args.alpha_ptr = static_cast<ElementCompute const*>(alpha.data_ptr());
-
-  return arguments;
-}
-
-template <typename Gemm>
-void runGemm(at::Tensor& D, at::Tensor const& A, at::Tensor const& B,
-             at::Tensor const& A_sf, at::Tensor const& B_sf,
-             torch::Tensor const& alpha, int M, int N, int K,
-             cudaStream_t stream) {
-  Gemm gemm;
-
-  auto arguments = args_from_options<Gemm>(D, A, B, A_sf, B_sf, alpha, M, N, K);
-
-  size_t workspace_size = Gemm::get_workspace_size(arguments);
-  auto const workspace_options =
-      torch::TensorOptions().dtype(torch::kUInt8).device(A.device());
-  auto workspace = torch::empty(workspace_size, workspace_options);
-
-  CUTLASS_CHECK(gemm.can_implement(arguments));
-
-  CUTLASS_CHECK(gemm.initialize(arguments, workspace.data_ptr(), stream));
-
-  CUTLASS_CHECK(gemm.run(arguments, workspace.data_ptr(), stream));
-}
-
-void cutlass_fp4_bf16_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
-                                    torch::Tensor const& B,
-                                    torch::Tensor const& A_sf,
-                                    torch::Tensor const& B_sf,
-                                    torch::Tensor const& alpha, int m, int n,
-                                    int k, cudaStream_t stream) {
-  uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
-  if (mp2 <= 256) {
-    runGemm<Fp4GemmSm120<sm120_fp4_config_M256, cutlass::bfloat16_t>::Gemm>(
-        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
-  } else {
-    runGemm<Fp4GemmSm120<sm120_fp4_config_default, cutlass::bfloat16_t>::Gemm>(
-        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
-  }
-}
-
-void cutlass_fp4_f16_gemm_dispatch(torch::Tensor& D, torch::Tensor const& A,
-                                   torch::Tensor const& B,
-                                   torch::Tensor const& A_sf,
-                                   torch::Tensor const& B_sf,
-                                   torch::Tensor const& alpha, int m, int n,
-                                   int k, cudaStream_t stream) {
-  uint32_t const mp2 = std::max(static_cast<uint32_t>(16), next_pow_2(m));
-  if (mp2 <= 256) {
-    runGemm<Fp4GemmSm120<sm120_fp4_config_M256, cutlass::half_t>::Gemm>(
-        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
-  } else {
-    runGemm<Fp4GemmSm120<sm120_fp4_config_default, cutlass::half_t>::Gemm>(
-        D, A, B, A_sf, B_sf, alpha, m, n, k, stream);
-  }
-}
-
-void cutlass_scaled_fp4_mm_sm120a(torch::Tensor& D, torch::Tensor const& A,
-                                  torch::Tensor const& B,
-                                  torch::Tensor const& A_sf,
-                                  torch::Tensor const& B_sf,
-                                  torch::Tensor const& alpha) {
-#if defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
-  CHECK_INPUT(A, FLOAT4_E2M1X2, "a");
-  CHECK_INPUT(B, FLOAT4_E2M1X2, "b");
-
-  CHECK_INPUT(A_sf, SF_DTYPE, "scale_a");
-  CHECK_INPUT(B_sf, SF_DTYPE, "scale_b");
-
-  CHECK_INPUT(alpha, at::ScalarType::Float, "alpha");
-
-  TORCH_CHECK(A.dim() == 2, "a must be a matrix");
-  TORCH_CHECK(B.dim() == 2, "b must be a matrix");
-  TORCH_CHECK(A.sizes()[1] == B.sizes()[1],
-              "a and b shapes cannot be multiplied (", A.sizes()[0], "x",
-              A.sizes()[1], " and ", B.sizes()[0], "x", B.sizes()[1], ")");
-
-  auto const m = A.sizes()[0];
-  auto const n = B.sizes()[0];
-  auto const k = A.sizes()[1] * 2;
-
-  constexpr int alignment = 32;
-  TORCH_CHECK(k % alignment == 0, "Expected k to be divisible by ", alignment,
-              ", but got a shape: (", A.sizes()[0], "x", A.sizes()[1],
-              "), k: ", k, ".");
-  TORCH_CHECK(n % alignment == 0, "Expected n to be divisible by ", alignment,
-              ", but got b shape: (", B.sizes()[0], "x", B.sizes()[1], ").");
-
-  auto round_up = [](int x, int y) { return (x + y - 1) / y * y; };
-  int rounded_m = round_up(m, 128);
-  int rounded_n = round_up(n, 128);
-  // Since k is divisible by 32 (alignment), k / 16 is guaranteed to be an
-  // integer.
-  int rounded_k = round_up(k / 16, 4);
-
-  TORCH_CHECK(A_sf.dim() == 2, "scale_a must be a matrix");
-  TORCH_CHECK(B_sf.dim() == 2, "scale_b must be a matrix");
-  TORCH_CHECK(A_sf.sizes()[1] == B_sf.sizes()[1],
-              "scale_a and scale_b shapes cannot be multiplied (",
-              A_sf.sizes()[0], "x", A_sf.sizes()[1], " and ", B_sf.sizes()[0],
-              "x", B_sf.sizes()[1], ")");
-  TORCH_CHECK(A_sf.sizes()[0] == rounded_m && A_sf.sizes()[1] == rounded_k,
-              "scale_a must be padded and swizzled to a shape (", rounded_m,
-              "x", rounded_k, "), but got a shape (", A_sf.sizes()[0], "x",
-              A_sf.sizes()[1], ")");
-  TORCH_CHECK(B_sf.sizes()[0] == rounded_n && B_sf.sizes()[1] == rounded_k,
-              "scale_b must be padded and swizzled to a shape (", rounded_n,
-              "x", rounded_k, "), but got a shape (", B_sf.sizes()[0], "x",
-              B_sf.sizes()[1], ")");
-
-  auto out_dtype = D.dtype();
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(A));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream(A.get_device());
-
-  if (out_dtype == at::ScalarType::BFloat16) {
-    return cutlass_fp4_bf16_gemm_dispatch(D, A, B, A_sf, B_sf, alpha, m, n, k,
-                                          stream);
-  } else if (out_dtype == at::ScalarType::Half) {
-    return cutlass_fp4_f16_gemm_dispatch(D, A, B, A_sf, B_sf, alpha, m, n, k,
-                                         stream);
-  } else {
-    TORCH_CHECK(false, "Unsupported output data type of nvfp4 mm sm120 (",
-                out_dtype, ")");
-  }
-#else
-  TORCH_CHECK(false,
-              "Unsupported CUTLASS version. Set VLLM_CUTLASS_SRC_DIR to "
-              "a CUTLASS 3.8 source directory to enable support.");
-#endif  // defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
-}
\ No newline at end of file
diff --git a/csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu b/csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu
new file mode 100644
index 000000000000..d5c76232599e
--- /dev/null
+++ b/csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include "../../dispatch_utils.h"
+#include "libtorch_stable/quantization/fused_kernels/quant_conversions.cuh"
+
+namespace vllm {
+
+// Logic: one thread block per (token, group) pair
+
+template <typename scalar_t, typename scalar_out_t, bool is_scale_transposed,
+          int32_t group_size>
+__global__ void silu_and_mul_per_block_quant_kernel(
+    scalar_out_t* __restrict__ out,  // Output: [num_tokens, hidden_size] in
+                                     // FP8/INT8
+    float* __restrict__ scales,      // Output: [num_tokens, hidden_size /
+                                 // group_size] or [hidden_size / group_size,
+                                 // num_tokens]
+    scalar_t const* __restrict__ input,  // Input: [num_tokens, hidden_size * 2]
+    float const* scale_ub,               // Optional scale upper bound
+    int32_t const hidden_size  // Output hidden size (input is 2x this)
+) {
+  static_assert((group_size & (group_size - 1)) == 0,
+                "group_size must be a power of 2 for correct reduction");
+
+  // Grid: (num_tokens, num_groups)
+  int const token_idx = blockIdx.x;
+  int const group_idx = blockIdx.y;
+  int const tid = threadIdx.x;  // tid in [0, group_size)
+  int const num_tokens = gridDim.x;
+
+  // Input layout: [gate || up] concatenated along last dimension
+  int const input_stride = hidden_size * 2;
+  int const group_start = group_idx * group_size;
+
+  // Pointers to this token's data
+  scalar_t const* token_input_gate =
+      input + token_idx * input_stride + group_start;
+  scalar_t const* token_input_up = token_input_gate + hidden_size;
+  scalar_out_t* token_output = out + token_idx * hidden_size + group_start;
+
+  // Scale pointer for this group
+  int const num_groups = gridDim.y;
+  float* group_scale_ptr = is_scale_transposed
+                               ? scales + group_idx * num_tokens + token_idx
+                               : scales + token_idx * num_groups + group_idx;
+
+  // Shared memory for reduction (compile-time sized)
+  __shared__ float shared_max[group_size];
+
+  // Step 1: Each thread loads one element, computes SiLU, stores in register
+  float gate = static_cast<float>(token_input_gate[tid]);
+  float up = static_cast<float>(token_input_up[tid]);
+
+  // Compute SiLU(gate) * up
+  float sigmoid_gate = 1.0f / (1.0f + expf(-gate));
+  float silu_gate = gate * sigmoid_gate;
+  float result = silu_gate * up;  // Keep in register
+
+  // Step 2: Reduce to find group max
+  shared_max[tid] = fabsf(result);
+  __syncthreads();
+
+// Power-of-2 reduction (group_size guaranteed to be power of 2)
+#pragma unroll
+  for (int stride = group_size / 2; stride > 0; stride >>= 1) {
+    if (tid < stride) {
+      shared_max[tid] = fmaxf(shared_max[tid], shared_max[tid + stride]);
+    }
+    __syncthreads();
+  }
+
+  // Step 3: Compute scale (thread 0), broadcast via shared memory
+  if (tid == 0) {
+    float group_max = shared_max[0];
+
+    float const quant_range = quant_type_max_v<scalar_out_t>;
+    float group_scale = group_max / quant_range;
+
+    // Apply scale upper bound if provided
+    if (scale_ub != nullptr) {
+      group_scale = fminf(group_scale, *scale_ub);
+    }
+
+    // Use minimum safe scaling factor
+    group_scale = fmaxf(group_scale, min_scaling_factor<scalar_out_t>::val());
+
+    // Store scale to global memory
+    *group_scale_ptr = group_scale;
+
+    // Reuse shared_max[0] to broadcast scale
+    shared_max[0] = group_scale;
+  }
+  __syncthreads();
+
+  float group_scale = shared_max[0];
+
+  // Step 4: Quantize and write output
+  token_output[tid] =
+      vllm::ScaledQuant<scalar_out_t, false>::quant_fn(result, group_scale);
+}
+
+}  // namespace vllm
+
+void silu_and_mul_per_block_quant(torch::Tensor& out,
+                                  torch::Tensor const& input,
+                                  torch::Tensor& scales, int64_t group_size,
+                                  std::optional<torch::Tensor> scale_ub,
+                                  bool is_scale_transposed) {
+  static c10::ScalarType kFp8Type = is_fp8_ocp()
+                                        ? c10::ScalarType::Float8_e4m3fn
+                                        : c10::ScalarType::Float8_e4m3fnuz;
+
+  TORCH_CHECK(out.dtype() == kFp8Type || out.dtype() == torch::kInt8);
+  TORCH_CHECK(out.is_contiguous() && input.is_contiguous());
+  TORCH_CHECK(
+      input.dtype() == torch::kFloat16 || input.dtype() == torch::kBFloat16,
+      "Input must be FP16 or BF16");
+  TORCH_CHECK(scales.dtype() == torch::kFloat32, "Scales must be FP32");
+  TORCH_CHECK(group_size == 128 || group_size == 64,
+              "Unsupported group size: ", group_size);
+
+  if (scale_ub.has_value()) {
+    TORCH_CHECK(out.dtype() == kFp8Type);
+  }
+
+  int32_t hidden_size = out.size(-1);
+  auto num_tokens = input.size(0);
+  int32_t num_groups = hidden_size / group_size;
+
+  TORCH_CHECK(input.size(-1) == hidden_size * 2,
+              "input last dim must be 2x output hidden_size");
+  TORCH_CHECK(hidden_size % group_size == 0,
+              "hidden_size must be divisible by group_size");
+
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  dim3 grid(num_tokens, num_groups);
+  dim3 block(group_size);
+
+  VLLM_DISPATCH_FLOATING_TYPES(
+      input.scalar_type(), "silu_and_mul_per_block_quant", [&] {
+        using scalar_in_t = scalar_t;
+
+        VLLM_DISPATCH_QUANT_TYPES(
+            out.scalar_type(), "silu_and_mul_per_block_quant", [&] {
+              using scalar_out_t = scalar_t;
+
+              VLLM_DISPATCH_GROUP_SIZE(group_size, gs, [&] {
+                VLLM_DISPATCH_BOOL(is_scale_transposed, transpose_scale, [&] {
+                  vllm::silu_and_mul_per_block_quant_kernel<
+                      scalar_in_t, scalar_out_t, transpose_scale, gs>
+                      <<<grid, block, 0, stream>>>(
+                          out.data_ptr<scalar_out_t>(),
+                          scales.data_ptr<float>(),
+                          input.data_ptr<scalar_in_t>(),
+                          scale_ub.has_value() ? scale_ub->data_ptr<float>()
+                                               : nullptr,
+                          hidden_size);
+                });
+              });
+            });
+      });
+}
\ No newline at end of file
diff --git a/csrc/quantization/machete/machete_mainloop.cuh b/csrc/quantization/machete/machete_mainloop.cuh
index 9f02f4f17974..a0d1ded8876a 100644
--- a/csrc/quantization/machete/machete_mainloop.cuh
+++ b/csrc/quantization/machete/machete_mainloop.cuh
@@ -154,6 +154,7 @@ struct MacheteCollectiveMma {
   struct DispatchPolicy {
     constexpr static int Stages = PipelineStages;
     using ClusterShape = ClusterShape_MNK;
+    using ArchTag = arch::Sm90;
     using Schedule = KernelScheduleType;
   };
 
diff --git a/csrc/quantization/marlin/generate_kernels.py b/csrc/quantization/marlin/generate_kernels.py
index 5ecbc6ac9990..7b316037ec63 100644
--- a/csrc/quantization/marlin/generate_kernels.py
+++ b/csrc/quantization/marlin/generate_kernels.py
@@ -15,11 +15,11 @@
 for arch in sys.argv[1].split(","):
     arch = arch[: arch.index(".") + 2].replace(".", "")
     arch = int(arch)
-    # only SM89 and SM120 fully support
-    # mma.sync.aligned.m16n8k32.row.col.f32.e4m3.e4m3.f32.
+    # SM89 and the SM12x family (SM120 RTX 5090, SM121 DGX Spark GB10)
+    # fully support mma.sync.aligned.m16n8k32.row.col.f32.e4m3.e4m3.f32.
     # SM90 and SM100 can use this PTX, but it’s simulated
     # with FP16 MMA, so it cannot achieve any acceleration.
-    if arch in [89, 120]:
+    if arch == 89 or arch // 10 == 12:
         SUPPORT_FP8 = True
     if arch >= 80:
         SUPPORT_SM80 = True
@@ -108,6 +108,15 @@
         "thread_m_blocks": THREAD_M_BLOCKS,
         "group_blocks": [2],
     },
+    # MXFP8
+    {
+        "a_type": ["kBFloat16"],
+        "b_type": "kFE4M3fn",
+        "s_type": "kFE8M0fnu",
+        "thread_configs": THREAD_CONFIGS,
+        "thread_m_blocks": THREAD_M_BLOCKS,
+        "group_blocks": [2],
+    },
     # AWQ-INT4 with INT8 activation
     {
         "a_type": ["kS8"],
diff --git a/csrc/quantization/marlin/marlin.cu b/csrc/quantization/marlin/marlin.cu
index fbdb619c27f0..5684f272e136 100644
--- a/csrc/quantization/marlin/marlin.cu
+++ b/csrc/quantization/marlin/marlin.cu
@@ -591,6 +591,9 @@ torch::Tensor marlin_gemm(
                   "When b_type = float4_e2m1f, b_scale scalar type must be",
                   "float8_e4m3fn (for NVFP4) or float8_e8m0fnu (for MXFP4).");
     }
+  } else if (b_type_id == vllm::kFE4M3fn.id() &&
+             b_scales.scalar_type() == at::ScalarType::Float8_e8m0fnu) {
+    s_type_id = vllm::kFE8M0fnu.id();
   }
 
   vllm::ScalarType a_type = vllm::ScalarType::from_id(a_type_id);
diff --git a/csrc/quantization/marlin/marlin.cuh b/csrc/quantization/marlin/marlin.cuh
index 33fe52f605b4..d3a91568349f 100644
--- a/csrc/quantization/marlin/marlin.cuh
+++ b/csrc/quantization/marlin/marlin.cuh
@@ -2,10 +2,14 @@
 
 #ifndef _marlin_cuh
   #define _marlin_cuh
-  #include <torch/all.h>
-
-  #include <ATen/cuda/CUDAContext.h>
-  #include <c10/cuda/CUDAGuard.h>
+  // These torch headers are only needed by non-stable callers (e.g. ops.cu).
+  // Guard them so that stable ABI targets can still include marlin.cuh
+  // for Vec, constants, and cp_async helpers without pulling in torch/all.h.
+  #ifndef TORCH_TARGET_VERSION
+    #include <torch/all.h>
+    #include <ATen/cuda/CUDAContext.h>
+    #include <c10/cuda/CUDAGuard.h>
+  #endif
   #include <cuda.h>
   #include <cuda_fp16.h>
   #include <cuda_runtime.h>
diff --git a/csrc/quantization/marlin/marlin_template.h b/csrc/quantization/marlin/marlin_template.h
index 9e625b645ee6..32b8f8bdd51e 100644
--- a/csrc/quantization/marlin/marlin_template.h
+++ b/csrc/quantization/marlin/marlin_template.h
@@ -327,6 +327,9 @@ __global__ void Marlin(
   if constexpr (b_type == vllm::kFE2M1f) {
     static_assert(s_type == vllm::kFE4M3fn && group_blocks == 1 ||
                   s_type == vllm::kFE8M0fnu && group_blocks == 2);
+  } else if constexpr (s_type == vllm::kFE8M0fnu) {
+    // MXFP8: FP8 weights with e8m0 microscaling block scales
+    static_assert(b_type == vllm::kFE4M3fn && group_blocks == 2);
   } else if constexpr (std::is_same<scalar_t, nv_bfloat16>::value) {
     static_assert(s_type == vllm::kBFloat16);
   } else if constexpr (std::is_same<scalar_t, half>::value) {
@@ -334,6 +337,7 @@ __global__ void Marlin(
   }
 
   constexpr bool is_a_8bit = a_type.size_bits() == 8;
+  constexpr bool is_8bit_scale = s_type.size_bits() == 8;
   if constexpr (!is_a_8bit) {
     static_assert(std::is_same<scalar_t, c_scalar_t>::value);
   }
@@ -343,7 +347,7 @@ __global__ void Marlin(
                                b_type == vllm::kU4B8 || b_type == vllm::kU8B128;
   // see comments of dequant.h for more details
   constexpr bool dequant_skip_flop =
-      is_a_8bit || b_type == vllm::kFE4M3fn ||
+      is_a_8bit || (b_type == vllm::kFE4M3fn && !(s_type == vllm::kFE8M0fnu)) ||
       b_type == vllm::kFE2M1f && s_type == vllm::kFE4M3fn ||
       has_zp && !is_zp_float && !std::is_same<scalar_t, nv_bfloat16>::value ||
       has_zp && !is_zp_float && !(b_type == vllm::kU8);
@@ -555,9 +559,8 @@ __global__ void Marlin(
   constexpr int b_sh_wr_iters = b_sh_stage / b_sh_wr_delta;
 
   // Scale sizes/strides without act_order
-  int s_gl_stride = prob_n / (b_type == vllm::kFE2M1f ? 16 : 8);
-  constexpr int s_sh_stride =
-      16 * thread_n_blocks / (b_type == vllm::kFE2M1f ? 16 : 8);
+  int s_gl_stride = prob_n / (is_8bit_scale ? 16 : 8);
+  constexpr int s_sh_stride = 16 * thread_n_blocks / (is_8bit_scale ? 16 : 8);
   constexpr int s_tb_groups =
       !has_act_order && group_blocks != -1 && group_blocks < thread_k_blocks
           ? thread_k_blocks / group_blocks
@@ -997,7 +1000,7 @@ __global__ void Marlin(
 
           int4* sh_s_stage = sh_s + s_sh_stage * pipe;
 
-          if constexpr (b_type_id != vllm::kFE2M1f.id()) {
+          if constexpr (!is_8bit_scale) {
             reinterpret_cast<int4*>(&frag_s[k % 2])[0] =
                 sh_s_stage[s_sh_rd + cur_group_id * s_sh_stride];
           } else {
@@ -1006,7 +1009,7 @@ __global__ void Marlin(
                     sh_s_stage)[s_sh_rd + cur_group_id * (2 * s_sh_stride)];
           }
         } else if (group_blocks >= b_sh_wr_iters) {
-          if constexpr (b_type_id != vllm::kFE2M1f.id()) {
+          if constexpr (!is_8bit_scale) {
             reinterpret_cast<int4*>(&frag_s[1])[0] =
                 reinterpret_cast<int4*>(&frag_s[0])[0];
           } else {
@@ -1207,7 +1210,7 @@ __global__ void Marlin(
       }
     }
 
-    if constexpr (b_type == vllm::kFE2M1f) {
+    if constexpr (s_type == vllm::kFE4M3fn || s_type == vllm::kFE8M0fnu) {
       int s_quant_0 = reinterpret_cast<int*>(frag_s[k2])[0];
       int s_quant_1 = reinterpret_cast<int*>(frag_s[k2])[1];
 
diff --git a/csrc/quantization/utils.cuh b/csrc/quantization/utils.cuh
index 73055a152874..6bb9b9fc5635 100644
--- a/csrc/quantization/utils.cuh
+++ b/csrc/quantization/utils.cuh
@@ -7,23 +7,23 @@
  */
 
 #include <cmath>
-#include <torch/types.h>
+#include <torch/headeronly/macros/Macros.h>
 
 #ifndef USE_ROCM
-  #include <c10/util/Float8_e4m3fn.h>
+  #include <torch/headeronly/util/Float8_e4m3fn.h>
   #define MAYBE_HOST_DEVICE C10_HOST_DEVICE
 #else
-  #include <ATen/hip/HIPContext.h>
-  #include <c10/util/Float8_e4m3fn.h>
-  #include <c10/util/Float8_e4m3fnuz.h>
+  #include <torch/headeronly/util/Float8_e4m3fn.h>
+  #include <torch/headeronly/util/Float8_e4m3fnuz.h>
   // ROCm doesn't seem to need C10_HOST_DEVICE for static constexpr
   #define MAYBE_HOST_DEVICE
 #endif
 
 template <typename T,
-          typename = std::enable_if_t<std::is_same_v<T, c10::Float8_e4m3fn> ||
-                                      std::is_same_v<T, c10::Float8_e4m3fnuz> ||
-                                      std::is_same_v<T, int8_t>>>
+          typename = std::enable_if_t<
+              std::is_same_v<T, torch::headeronly::Float8_e4m3fn> ||
+              std::is_same_v<T, torch::headeronly::Float8_e4m3fnuz> ||
+              std::is_same_v<T, int8_t>>>
 struct quant_type_max {
   static constexpr T val() { return std::numeric_limits<T>::max(); }
 };
@@ -31,9 +31,10 @@ struct quant_type_max {
 // Using the default max value from pytorch (240.0 0x7F) will cause accuracy
 // issues when running dynamic quantization. Here use 224.0 0x7E for rocm.
 template <>
-struct quant_type_max<c10::Float8_e4m3fnuz> {
-  static constexpr c10::Float8_e4m3fnuz val() {
-    return c10::Float8_e4m3fnuz(0x7E, c10::Float8_e4m3fnuz::from_bits());
+struct quant_type_max<torch::headeronly::Float8_e4m3fnuz> {
+  static constexpr torch::headeronly::Float8_e4m3fnuz val() {
+    return torch::headeronly::Float8_e4m3fnuz(
+        0x7E, torch::headeronly::Float8_e4m3fnuz::from_bits());
   }
 };
 
@@ -42,9 +43,10 @@ MAYBE_HOST_DEVICE static constexpr T quant_type_max_v =
     quant_type_max<T>::val();
 
 template <typename T,
-          typename = std::enable_if_t<std::is_same_v<T, c10::Float8_e4m3fn> ||
-                                      std::is_same_v<T, c10::Float8_e4m3fnuz> ||
-                                      std::is_same_v<T, int8_t>>>
+          typename = std::enable_if_t<
+              std::is_same_v<T, torch::headeronly::Float8_e4m3fn> ||
+              std::is_same_v<T, torch::headeronly::Float8_e4m3fnuz> ||
+              std::is_same_v<T, int8_t>>>
 struct min_scaling_factor {
   C10_DEVICE C10_ALWAYS_INLINE static float val() {
     return 1.0f / (quant_type_max_v<T> * 512.0f);
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu
deleted file mode 100644
index 4a8a5ed02d6c..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_blockwise_sm100_fp8_dispatch.cuh"
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
-
-namespace vllm {
-
-void cutlass_scaled_mm_blockwise_sm100_fp8(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
-                                           torch::Tensor const& a_scales,
-                                           torch::Tensor const& b_scales) {
-  if (out.dtype() == torch::kBFloat16) {
-    cutlass_gemm_blockwise_sm100_fp8_dispatch<cutlass::bfloat16_t>(
-        out, a, b, a_scales, b_scales);
-
-  } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
-    cutlass_gemm_blockwise_sm100_fp8_dispatch<cutlass::half_t>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu
deleted file mode 100644
index 5515374a5759..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_blockwise_sm120_fp8_dispatch.cuh"
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
-
-namespace vllm {
-
-void cutlass_scaled_mm_blockwise_sm120_fp8(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
-                                           torch::Tensor const& a_scales,
-                                           torch::Tensor const& b_scales) {
-  if (out.dtype() == torch::kBFloat16) {
-    cutlass_gemm_blockwise_sm120_fp8_dispatch<cutlass::bfloat16_t>(
-        out, a, b, a_scales, b_scales);
-
-  } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
-    cutlass_gemm_blockwise_sm120_fp8_dispatch<cutlass::half_t>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu
deleted file mode 100644
index 0501e6da160e..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_blockwise_sm90_fp8_dispatch.cuh"
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
-
-namespace vllm {
-
-void cutlass_scaled_mm_blockwise_sm90_fp8(torch::Tensor& out,
-                                          torch::Tensor const& a,
-                                          torch::Tensor const& b,
-                                          torch::Tensor const& a_scales,
-                                          torch::Tensor const& b_scales) {
-  if (out.dtype() == torch::kBFloat16) {
-    cutlass_gemm_blockwise_sm90_fp8_dispatch<cutlass::bfloat16_t>(
-        out, a, b, a_scales, b_scales);
-
-  } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
-    cutlass_gemm_blockwise_sm90_fp8_dispatch<cutlass::half_t>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-}  // namespace vllm
\ No newline at end of file
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp
deleted file mode 100644
index 9ceb3a3ece5d..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_kernels.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#pragma once
-
-#include <torch/all.h>
-
-namespace vllm {
-
-void cutlass_scaled_mm_sm90_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_sm90_int8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_azp_sm90_int8(torch::Tensor& out, torch::Tensor const& a,
-                                     torch::Tensor const& b,
-                                     torch::Tensor const& a_scales,
-                                     torch::Tensor const& b_scales,
-                                     torch::Tensor const& azp_adj,
-                                     std::optional<torch::Tensor> const& azp,
-                                     std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_blockwise_sm90_fp8(torch::Tensor& out,
-                                          torch::Tensor const& a,
-                                          torch::Tensor const& b,
-                                          torch::Tensor const& a_scales,
-                                          torch::Tensor const& b_scales);
-
-void cutlass_scaled_mm_sm100_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_sm120_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_blockwise_sm100_fp8(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
-                                           torch::Tensor const& a_scales,
-                                           torch::Tensor const& b_scales);
-
-void cutlass_scaled_mm_blockwise_sm120_fp8(torch::Tensor& out,
-                                           torch::Tensor const& a,
-                                           torch::Tensor const& b,
-                                           torch::Tensor const& a_scales,
-                                           torch::Tensor const& b_scales);
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu
deleted file mode 100644
index 62aeb927ccdc..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_sm100_fp8_dispatch.cuh"
-
-namespace vllm {
-
-void cutlass_scaled_mm_sm100_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm100_fp8_epilogue<true>(out, a, b, a_scales,
-                                                      b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm100_fp8_epilogue<false>(out, a, b, a_scales,
-                                                       b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu
deleted file mode 100644
index bc816cbdf86e..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_sm120_fp8_dispatch.cuh"
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
-
-namespace vllm {
-
-void cutlass_scaled_mm_sm120_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm120_fp8_epilogue<c3x::ScaledEpilogueBias>(
-        out, a, b, a_scales, b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm120_fp8_epilogue<c3x::ScaledEpilogue>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu
deleted file mode 100644
index 1db6c41bf953..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_sm90_fp8_dispatch.cuh"
-
-namespace vllm {
-
-void cutlass_scaled_mm_sm90_fp8(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm90_fp8_epilogue<true>(out, a, b, a_scales,
-                                                     b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm90_fp8_epilogue<false>(out, a, b, a_scales,
-                                                      b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu b/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu
deleted file mode 100644
index 021467b8bde8..000000000000
--- a/csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "scaled_mm_kernels.hpp"
-#include "scaled_mm_sm90_int8_dispatch.cuh"
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
-
-namespace vllm {
-
-void cutlass_scaled_mm_sm90_int8(torch::Tensor& out, torch::Tensor const& a,
-                                 torch::Tensor const& b,
-                                 torch::Tensor const& a_scales,
-                                 torch::Tensor const& b_scales,
-                                 std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm90_int8_epilogue<c3x::ScaledEpilogueBias>(
-        out, a, b, a_scales, b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm90_int8_epilogue<c3x::ScaledEpilogue>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-}  // namespace vllm
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cu b/csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cu
deleted file mode 100644
index 865fef5aeea1..000000000000
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cu
+++ /dev/null
@@ -1,199 +0,0 @@
-#include <stddef.h>
-#include <torch/all.h>
-#include "cutlass/cutlass.h"
-
-#include "scaled_mm_c2x.cuh"
-#include "scaled_mm_c2x_sm75_dispatch.cuh"
-#include "scaled_mm_c2x_sm80_dispatch.cuh"
-#include "scaled_mm_c2x_sm89_fp8_dispatch.cuh"
-#include "scaled_mm_c2x_sm89_int8_dispatch.cuh"
-
-#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp"
-
-using namespace vllm;
-
-/*
-   This file defines quantized GEMM operations using the CUTLASS 2.x API, for
-   NVIDIA GPUs with SM versions prior to sm90 (Hopper).
-*/
-
-template <template <typename, typename> typename Epilogue,
-          typename... EpilogueArgs>
-void cutlass_scaled_mm_sm75_epilogue(torch::Tensor& out, torch::Tensor const& a,
-                                     torch::Tensor const& b,
-                                     EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
-
-  if (out.dtype() == torch::kBFloat16) {
-    return cutlass_gemm_sm75_dispatch<int8_t, cutlass::bfloat16_t, Epilogue>(
-        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-  } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
-    return cutlass_gemm_sm75_dispatch<int8_t, cutlass::half_t, Epilogue>(
-        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-  }
-}
-
-void cutlass_scaled_mm_sm75(torch::Tensor& out, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBias>(
-        out, a, b, a_scales, b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogue>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-void cutlass_scaled_mm_azp_sm75(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-
-  if (azp) {
-    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
-        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
-  } else {
-    return cutlass_scaled_mm_sm75_epilogue<c2x::ScaledEpilogueBiasAzp>(
-        out, a, b, a_scales, b_scales, azp_adj, bias);
-  }
-}
-
-template <template <typename, typename> typename Epilogue,
-          typename... EpilogueArgs>
-void cutlass_scaled_mm_sm80_epilogue(torch::Tensor& out, torch::Tensor const& a,
-                                     torch::Tensor const& b,
-                                     EpilogueArgs&&... epilogue_args) {
-  TORCH_CHECK(a.dtype() == torch::kInt8);
-  TORCH_CHECK(b.dtype() == torch::kInt8);
-
-  if (out.dtype() == torch::kBFloat16) {
-    return cutlass_gemm_sm80_dispatch<int8_t, cutlass::bfloat16_t, Epilogue>(
-        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-  } else {
-    TORCH_CHECK(out.dtype() == torch::kFloat16);
-    return cutlass_gemm_sm80_dispatch<int8_t, cutlass::half_t, Epilogue>(
-        out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-  }
-}
-
-void cutlass_scaled_mm_sm80(torch::Tensor& out, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBias>(
-        out, a, b, a_scales, b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogue>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-void cutlass_scaled_mm_azp_sm80(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-
-  if (azp) {
-    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
-        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
-  } else {
-    return cutlass_scaled_mm_sm80_epilogue<c2x::ScaledEpilogueBiasAzp>(
-        out, a, b, a_scales, b_scales, azp_adj, bias);
-  }
-}
-
-template <template <typename, typename> typename Epilogue,
-          typename... EpilogueArgs>
-void cutlass_scaled_mm_sm89_epilogue(torch::Tensor& out, torch::Tensor const& a,
-                                     torch::Tensor const& b,
-                                     EpilogueArgs&&... epilogue_args) {
-  if (a.dtype() == torch::kInt8) {
-    TORCH_CHECK(b.dtype() == torch::kInt8);
-
-    if (out.dtype() == torch::kBFloat16) {
-      return cutlass_gemm_sm89_int8_dispatch<int8_t, cutlass::bfloat16_t,
-                                             Epilogue>(
-          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-    } else {
-      assert(out.dtype() == torch::kFloat16);
-      return cutlass_gemm_sm89_int8_dispatch<int8_t, cutlass::half_t, Epilogue>(
-          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-    }
-  } else {
-    TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn);
-    TORCH_CHECK(b.dtype() == torch::kFloat8_e4m3fn);
-
-    if (out.dtype() == torch::kBFloat16) {
-      return cutlass_gemm_sm89_fp8_dispatch<cutlass::float_e4m3_t,
-                                            cutlass::bfloat16_t, Epilogue>(
-          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-    } else {
-      TORCH_CHECK(out.dtype() == torch::kFloat16);
-      return cutlass_gemm_sm89_fp8_dispatch<cutlass::float_e4m3_t,
-                                            cutlass::half_t, Epilogue>(
-          out, a, b, std::forward<EpilogueArgs>(epilogue_args)...);
-    }
-  }
-}
-
-void cutlass_scaled_mm_sm89(torch::Tensor& out, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-  if (bias) {
-    TORCH_CHECK(bias->dtype() == out.dtype(),
-                "currently bias dtype must match output dtype ", out.dtype());
-    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBias>(
-        out, a, b, a_scales, b_scales, *bias);
-  } else {
-    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogue>(
-        out, a, b, a_scales, b_scales);
-  }
-}
-
-void cutlass_scaled_mm_azp_sm89(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-
-  if (azp) {
-    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBiasAzpToken>(
-        out, a, b, a_scales, b_scales, azp_adj, *azp, bias);
-  } else {
-    return cutlass_scaled_mm_sm89_epilogue<c2x::ScaledEpilogueBiasAzp>(
-        out, a, b, a_scales, b_scales, azp_adj, bias);
-  }
-}
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu b/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu
deleted file mode 100644
index 211302171f07..000000000000
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "c3x/scaled_mm_helper.hpp"
-#include "c3x/scaled_mm_kernels.hpp"
-
-/*
-   This file defines quantized GEMM operations using the CUTLASS 3.x API, for
-   NVIDIA GPUs with sm90a (Hopper).
-*/
-
-#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
-
-void cutlass_scaled_mm_sm90(torch::Tensor& c, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias) {
-  dispatch_scaled_mm(c, a, b, a_scales, b_scales, bias,
-                     vllm::cutlass_scaled_mm_sm90_fp8,
-                     vllm::cutlass_scaled_mm_sm90_int8,
-                     vllm::cutlass_scaled_mm_blockwise_sm90_fp8);
-}
-
-void cutlass_scaled_mm_azp_sm90(torch::Tensor& out, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias) {
-  TORCH_CHECK(a_scales.dtype() == torch::kFloat32);
-  TORCH_CHECK(b_scales.dtype() == torch::kFloat32);
-
-  vllm::cutlass_scaled_mm_azp_sm90_int8(out, a, b, a_scales, b_scales, azp_adj,
-                                        azp, bias);
-}
-
-#endif
diff --git a/csrc/quantization/w8a8/cutlass/scaled_mm_entry.cu b/csrc/quantization/w8a8/cutlass/scaled_mm_entry.cu
deleted file mode 100644
index 87478a38b973..000000000000
--- a/csrc/quantization/w8a8/cutlass/scaled_mm_entry.cu
+++ /dev/null
@@ -1,420 +0,0 @@
-#include <cudaTypedefs.h>
-
-#include <c10/cuda/CUDAGuard.h>
-#include <torch/all.h>
-
-#include "cutlass_extensions/common.hpp"
-
-void cutlass_scaled_mm_sm75(torch::Tensor& c, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_sm80(torch::Tensor& c, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_sm89(torch::Tensor& c, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias);
-
-#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
-void cutlass_scaled_mm_sm90(torch::Tensor& c, torch::Tensor const& a,
-                            torch::Tensor const& b,
-                            torch::Tensor const& a_scales,
-                            torch::Tensor const& b_scales,
-                            std::optional<torch::Tensor> const& bias);
-#endif
-#if defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90
-void cutlass_moe_mm_sm90(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch);
-
-#endif
-
-#if defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100
-void cutlass_moe_mm_sm100(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch);
-#endif
-
-#if defined ENABLE_SCALED_MM_SM120 && ENABLE_SCALED_MM_SM120
-void cutlass_scaled_mm_sm120(torch::Tensor& c, torch::Tensor const& a,
-                             torch::Tensor const& b,
-                             torch::Tensor const& a_scales,
-                             torch::Tensor const& b_scales,
-                             std::optional<torch::Tensor> const& bias);
-#endif
-
-#if defined ENABLE_SCALED_MM_SM100 && ENABLE_SCALED_MM_SM100
-void cutlass_scaled_mm_sm100(torch::Tensor& c, torch::Tensor const& a,
-                             torch::Tensor const& b,
-                             torch::Tensor const& a_scales,
-                             torch::Tensor const& b_scales,
-                             std::optional<torch::Tensor> const& bias);
-#endif
-
-#if (defined(ENABLE_CUTLASS_MOE_SM90) && ENABLE_CUTLASS_MOE_SM90) ||   \
-    (defined(ENABLE_CUTLASS_MOE_SM100) && ENABLE_CUTLASS_MOE_SM100) || \
-    (defined(ENABLE_CUTLASS_MOE_SM120) && ENABLE_CUTLASS_MOE_SM120)
-void get_cutlass_moe_mm_data_caller(
-    const torch::Tensor& topk_ids, torch::Tensor& expert_offsets,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    torch::Tensor& input_permutation, torch::Tensor& output_permutation,
-    const int64_t num_experts, const int64_t n, const int64_t k,
-    const std::optional<torch::Tensor>& blockscale_offsets,
-    const bool is_gated);
-
-void get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
-    const torch::Tensor& expert_first_token_offset,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    const int64_t n, const int64_t k, const bool swap_ab);
-
-void get_cutlass_batched_moe_mm_data_caller(
-    torch::Tensor& expert_offsets, torch::Tensor& problem_sizes1,
-    torch::Tensor& problem_sizes2, const torch::Tensor& expert_num_tokens,
-    const int64_t num_local_experts, const int64_t padded_m, const int64_t n,
-    const int64_t k);
-#endif
-
-void cutlass_scaled_mm_azp_sm75(torch::Tensor& c, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_azp_sm80(torch::Tensor& c, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias);
-
-void cutlass_scaled_mm_azp_sm89(torch::Tensor& c, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias);
-
-#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
-void cutlass_scaled_mm_azp_sm90(torch::Tensor& c, torch::Tensor const& a,
-                                torch::Tensor const& b,
-                                torch::Tensor const& a_scales,
-                                torch::Tensor const& b_scales,
-                                torch::Tensor const& azp_adj,
-                                std::optional<torch::Tensor> const& azp,
-                                std::optional<torch::Tensor> const& bias);
-#endif
-
-bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
-  // CUTLASS FP8 kernels need at least
-  //   CUDA 12.0 on SM90 systems (Hopper)
-  //   CUDA 12.4 on SM89 systems (Lovelace)
-
-#if defined CUDA_VERSION
-  if (cuda_device_capability >= 90) {
-    return CUDA_VERSION >= 12000;
-  } else if (cuda_device_capability >= 89) {
-    return CUDA_VERSION >= 12040;
-  }
-#endif
-
-  return false;
-}
-
-bool cutlass_scaled_mm_supports_block_fp8(int64_t cuda_device_capability) {
-  // CUTLASS block-quantized FP8 kernels need at least CUDA 12.0
-  // and at least SM90 (Hopper)
-
-#if defined CUDA_VERSION
-  if (cuda_device_capability >= 100) {
-    return CUDA_VERSION >= 12080;
-  } else if (cuda_device_capability >= 90) {
-    return CUDA_VERSION >= 12000;
-  }
-#endif
-
-  return false;
-}
-
-bool cutlass_group_gemm_supported(int64_t cuda_device_capability) {
-  // CUTLASS grouped FP8 kernels need at least CUDA 12.3 and SM90 (Hopper)
-  // or CUDA 12.8 and SM100 (Blackwell)
-
-#if defined CUDA_VERSION
-  if (cuda_device_capability >= 100) {
-    return CUDA_VERSION >= 12080;
-  }
-  if (cuda_device_capability >= 90) {
-    return CUDA_VERSION >= 12030;
-  }
-#endif
-
-  return false;
-}
-
-void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a,
-                       torch::Tensor const& b, torch::Tensor const& a_scales,
-                       torch::Tensor const& b_scales,
-                       std::optional<torch::Tensor> const& bias) {
-  // Checks for conformality
-  TORCH_CHECK(a.dim() == 2 && b.dim() == 2 && c.dim() == 2);
-  TORCH_CHECK(c.size(0) == a.size(0) && a.size(1) == b.size(0) &&
-              b.size(1) == c.size(1));
-
-  // Check for strides and alignment
-  TORCH_CHECK(a.stride(1) == 1 && c.stride(1) == 1);  // Row-major
-  TORCH_CHECK(b.stride(0) == 1);                      // Column-major
-  TORCH_CHECK(c.stride(0) % 16 == 0 &&
-              b.stride(1) % 16 == 0);  // 16 Byte Alignment
-
-  if (bias) {
-    TORCH_CHECK(bias->numel() == b.size(1) && bias->is_contiguous() &&
-                bias->dim() == 1);
-  }
-
-  at::cuda::OptionalCUDAGuard const device_guard(device_of(a));
-  int32_t version_num = get_sm_version_num();
-
-#if defined ENABLE_SCALED_MM_SM120 && ENABLE_SCALED_MM_SM120
-  if (version_num >= 120) {
-    cutlass_scaled_mm_sm120(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-#endif
-
-#if defined ENABLE_SCALED_MM_SM100 && ENABLE_SCALED_MM_SM100
-  if (version_num >= 100 && version_num < 120) {
-    cutlass_scaled_mm_sm100(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-#endif
-
-  // Guard against compilation issues for sm90 kernels
-#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
-  if (version_num >= 90 && version_num < 100) {
-    // Hopper
-    cutlass_scaled_mm_sm90(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-#endif
-
-#if defined ENABLE_SCALED_MM_C2X && ENABLE_SCALED_MM_C2X
-  if (version_num == 89) {
-    // Ada Lovelace
-    cutlass_scaled_mm_sm89(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-
-  if (version_num >= 80) {
-    // Ampere
-    cutlass_scaled_mm_sm80(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-
-  if (version_num >= 75) {
-    // Turing
-    cutlass_scaled_mm_sm75(c, a, b, a_scales, b_scales, bias);
-    return;
-  }
-#endif
-
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false,
-      "No compiled cutlass_scaled_mm for a compute capability less than "
-      "CUDA device capability: ",
-      version_num);
-}
-
-void cutlass_moe_mm(
-    torch::Tensor& out_tensors, torch::Tensor const& a_tensors,
-    torch::Tensor const& b_tensors, torch::Tensor const& a_scales,
-    torch::Tensor const& b_scales, torch::Tensor const& expert_offsets,
-    torch::Tensor const& problem_sizes, torch::Tensor const& a_strides,
-    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
-    bool per_act_token, bool per_out_ch) {
-  int32_t version_num = get_sm_version_num();
-#if defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100
-  if (version_num >= 100 && version_num < 110) {
-    cutlass_moe_mm_sm100(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
-                         expert_offsets, problem_sizes, a_strides, b_strides,
-                         c_strides, per_act_token, per_out_ch);
-    return;
-  }
-#endif
-#if defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90
-  if (version_num >= 90 && version_num < 100) {
-    cutlass_moe_mm_sm90(out_tensors, a_tensors, b_tensors, a_scales, b_scales,
-                        expert_offsets, problem_sizes, a_strides, b_strides,
-                        c_strides, per_act_token, per_out_ch);
-    return;
-  }
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false,
-      "No compiled cutlass_scaled_mm for CUDA device capability: ", version_num,
-      ". Required capability: 90 or 100");
-}
-
-void get_cutlass_moe_mm_data(
-    const torch::Tensor& topk_ids, torch::Tensor& expert_offsets,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    torch::Tensor& input_permutation, torch::Tensor& output_permutation,
-    const int64_t num_experts, const int64_t n, const int64_t k,
-    const std::optional<torch::Tensor>& blockscale_offsets,
-    const bool is_gated) {
-  // This function currently gets compiled only if we have a valid cutlass moe
-  // mm to run it for.
-  int32_t version_num = get_sm_version_num();
-#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
-    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
-    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
-  get_cutlass_moe_mm_data_caller(topk_ids, expert_offsets, problem_sizes1,
-                                 problem_sizes2, input_permutation,
-                                 output_permutation, num_experts, n, k,
-                                 blockscale_offsets, is_gated);
-  return;
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false,
-      "No compiled get_cutlass_moe_mm_data: no cutlass_scaled_mm kernel for "
-      "CUDA device capability: ",
-      version_num, ". Required capability: 90, 100, or 120");
-}
-
-void get_cutlass_moe_mm_problem_sizes_from_expert_offsets(
-    const torch::Tensor& expert_first_token_offset,
-    torch::Tensor& problem_sizes1, torch::Tensor& problem_sizes2,
-    const int64_t n, const int64_t k, const bool swap_ab) {
-  int32_t version_num = get_sm_version_num();
-#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
-    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
-    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
-  get_cutlass_moe_mm_problem_sizes_from_expert_offsets_caller(
-      expert_first_token_offset, problem_sizes1, problem_sizes2, n, k, swap_ab);
-  return;
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false,
-      "No compiled get_cutlass_moe_mm_problem_sizes_from_expert_offsets: "
-      "no cutlass_scaled_mm kernel for CUDA device capability: ",
-      version_num, ". Required capability: 90, 100, or 120");
-}
-
-void get_cutlass_batched_moe_mm_data(torch::Tensor& expert_offsets,
-                                     torch::Tensor& problem_sizes1,
-                                     torch::Tensor& problem_sizes2,
-                                     const torch::Tensor& expert_num_tokens,
-                                     const int64_t num_local_experts,
-                                     const int64_t padded_m, const int64_t n,
-                                     const int64_t k) {
-  // This function currently gets compiled only if we have a valid cutlass moe
-  // mm to run it for.
-  int32_t version_num = get_sm_version_num();
-#if (defined ENABLE_CUTLASS_MOE_SM90 && ENABLE_CUTLASS_MOE_SM90) ||   \
-    (defined ENABLE_CUTLASS_MOE_SM100 && ENABLE_CUTLASS_MOE_SM100) || \
-    (defined ENABLE_CUTLASS_MOE_SM120 && ENABLE_CUTLASS_MOE_SM120)
-  get_cutlass_batched_moe_mm_data_caller(expert_offsets, problem_sizes1,
-                                         problem_sizes2, expert_num_tokens,
-                                         num_local_experts, padded_m, n, k);
-  return;
-#endif
-  TORCH_CHECK_NOT_IMPLEMENTED(false,
-                              "No compiled get_cutlass_batched_moe_mm_data: no "
-                              "cutlass_scaled_mm kernel "
-                              "for CUDA device capability: ",
-                              version_num,
-                              ". Required capability: 90, 100, or 120");
-}
-
-void cutlass_scaled_mm_azp(torch::Tensor& c, torch::Tensor const& a,
-                           torch::Tensor const& b,
-                           torch::Tensor const& a_scales,
-                           torch::Tensor const& b_scales,
-                           torch::Tensor const& azp_adj,
-                           std::optional<torch::Tensor> const& azp,
-                           std::optional<torch::Tensor> const& bias) {
-  // Checks for conformality
-  TORCH_CHECK(a.dim() == 2 && b.dim() == 2 && c.dim() == 2);
-  TORCH_CHECK(c.size(0) == a.size(0) && a.size(1) == b.size(0) &&
-              b.size(1) == c.size(1));
-  TORCH_CHECK(a_scales.numel() == 1 || a_scales.numel() == a.size(0));
-  TORCH_CHECK(b_scales.numel() == 1 || b_scales.numel() == b.size(1));
-
-  // Check for strides and alignment
-  TORCH_CHECK(a.stride(1) == 1 && c.stride(1) == 1);  // Row-major
-  TORCH_CHECK(b.stride(0) == 1);                      // Column-major
-  TORCH_CHECK(c.stride(0) % 16 == 0 &&
-              b.stride(1) % 16 == 0);  // 16 Byte Alignment
-  TORCH_CHECK(a_scales.is_contiguous() && b_scales.is_contiguous());
-
-  // bias, azp, azp_adj are all 1d
-  // bias and azp_adj have n elements, azp has m elements
-  if (bias) {
-    TORCH_CHECK(bias->numel() == b.size(1) && bias->is_contiguous());
-  }
-  if (azp) {
-    TORCH_CHECK(azp->numel() == a.size(0) && azp->is_contiguous());
-  }
-  TORCH_CHECK(azp_adj.numel() == b.size(1) && azp_adj.is_contiguous());
-
-  // azp & bias types
-  TORCH_CHECK(azp_adj.dtype() == torch::kInt32);
-  TORCH_CHECK(!azp || azp->dtype() == torch::kInt32);
-  TORCH_CHECK(!bias || bias->dtype() == c.dtype(),
-              "currently bias dtype must match output dtype ", c.dtype());
-
-  at::cuda::OptionalCUDAGuard const device_guard(device_of(a));
-
-  int32_t version_num = get_sm_version_num();
-
-#if defined ENABLE_SCALED_MM_SM90 && ENABLE_SCALED_MM_SM90
-  if (version_num >= 90) {
-    cutlass_scaled_mm_azp_sm90(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
-    return;
-  }
-#endif
-
-#if defined ENABLE_SCALED_MM_C2X && ENABLE_SCALED_MM_C2X
-  if (version_num == 89) {
-    // Ada Lovelace
-    cutlass_scaled_mm_azp_sm89(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
-    return;
-  }
-
-  if (version_num >= 80) {
-    // Ampere
-    cutlass_scaled_mm_azp_sm80(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
-    return;
-  }
-
-  // Turing
-  TORCH_CHECK(version_num >= 75);
-  cutlass_scaled_mm_azp_sm75(c, a, b, a_scales, b_scales, azp_adj, azp, bias);
-  return;
-#endif
-
-  TORCH_CHECK_NOT_IMPLEMENTED(
-      false,
-      "No compiled cutlass_scaled_mm_azp for a compute capability less than "
-      "CUDA device capability: ",
-      version_num);
-}
diff --git a/csrc/quantization/w8a8/fp8/amd/quant_utils.cuh b/csrc/quantization/w8a8/fp8/amd/quant_utils.cuh
index 81f5cb83f3e1..7ae644d81d48 100644
--- a/csrc/quantization/w8a8/fp8/amd/quant_utils.cuh
+++ b/csrc/quantization/w8a8/fp8/amd/quant_utils.cuh
@@ -639,7 +639,9 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
   // function with template<typename scalar_t, typename cache_t,
   // Fp8KVCacheDataType kv_dt>.
   #define DISPATCH_BY_KV_CACHE_DTYPE(SRC_DTYPE, KV_DTYPE, FN)                  \
-    if (KV_DTYPE == "auto") {                                                  \
+    vllm::Fp8KVCacheDataType KV_CACHE_DTYPE =                                  \
+        vllm::get_fp8_kv_cache_data_type(KV_DTYPE);                            \
+    if (KV_CACHE_DTYPE == vllm::Fp8KVCacheDataType::kAuto) {                   \
       if (SRC_DTYPE == at::ScalarType::Float) {                                \
         FN(float, float, vllm::Fp8KVCacheDataType::kAuto);                     \
       } else if (SRC_DTYPE == at::ScalarType::Half) {                          \
@@ -649,21 +651,18 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
       } else {                                                                 \
         TORCH_CHECK(false, "Unsupported input type of kv cache: ", SRC_DTYPE); \
       }                                                                        \
-    } else {                                                                   \
-      if (KV_DTYPE == "fp8" || KV_DTYPE == "fp8_e4m3") {                       \
-        if (SRC_DTYPE == at::ScalarType::Float) {                              \
-          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);              \
-        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
-          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);           \
-        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
-          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);      \
-        } else {                                                               \
-          TORCH_CHECK(false,                                                   \
-                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
-        }                                                                      \
+    } else if (KV_CACHE_DTYPE == vllm::Fp8KVCacheDataType::kFp8E4M3) {         \
+      if (SRC_DTYPE == at::ScalarType::Float) {                                \
+        FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);                \
+      } else if (SRC_DTYPE == at::ScalarType::Half) {                          \
+        FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);             \
+      } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                      \
+        FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);        \
       } else {                                                                 \
-        TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);   \
+        TORCH_CHECK(false, "Unsupported input type of kv cache: ", SRC_DTYPE); \
       }                                                                        \
+    } else {                                                                   \
+      TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);     \
     }
 
 }  // namespace fp8
diff --git a/csrc/quantization/w8a8/fp8/common.cuh b/csrc/quantization/w8a8/fp8/common.cuh
index 7a385f5163ae..087f5099165a 100644
--- a/csrc/quantization/w8a8/fp8/common.cuh
+++ b/csrc/quantization/w8a8/fp8/common.cuh
@@ -1,10 +1,23 @@
 #pragma once
 
 #include "libtorch_stable/quantization/vectorization.cuh"
-#include "quantization/utils.cuh"
+#include "../../utils.cuh"
 
 #include <cmath>
 
+// This header is shared between _C and _C_stable_libtorch targets.
+// torch_utils.h provides get_device_prop(). We need to pass USE_CUDA
+// to the .so to expose some of the shims used by torch_utils.h. For now
+// this is only done for _C_stable_libtorch and not for _C, so we use the
+// non stable at::cuda::getCurrentDeviceProperties for _C for now.
+#ifdef TORCH_TARGET_VERSION
+  #include "../../../libtorch_stable/torch_utils.h"
+#else
+  #ifdef USE_ROCM
+    #include <ATen/hip/HIPContext.h>
+  #endif
+#endif
+
 #ifndef USE_ROCM
   #include "nvidia/quant_utils.cuh"
 #else
@@ -18,7 +31,11 @@ static bool is_fp8_ocp() {
 #ifndef USE_ROCM
   return true;
 #else
-  auto dprops = at::cuda::getCurrentDeviceProperties();
+  #ifdef TORCH_TARGET_VERSION
+  auto* dprops = get_device_prop();
+  #else
+  auto* dprops = at::cuda::getCurrentDeviceProperties();
+  #endif
   std::string device_arch = dprops->gcnArchName;
   size_t substring = device_arch.find("gfx94");
   return substring == std::string::npos;
diff --git a/csrc/quantization/w8a8/fp8/nvidia/quant_utils.cuh b/csrc/quantization/w8a8/fp8/nvidia/quant_utils.cuh
index 421e8092474b..3b7e25dc56be 100644
--- a/csrc/quantization/w8a8/fp8/nvidia/quant_utils.cuh
+++ b/csrc/quantization/w8a8/fp8/nvidia/quant_utils.cuh
@@ -543,7 +543,9 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
   // function with template<typename scalar_t, typename cache_t,
   // Fp8KVCacheDataType kv_dt>.
   #define DISPATCH_BY_KV_CACHE_DTYPE(SRC_DTYPE, KV_DTYPE, FN)                  \
-    if (KV_DTYPE == "auto") {                                                  \
+    vllm::Fp8KVCacheDataType KV_CACHE_DTYPE =                                  \
+        vllm::get_fp8_kv_cache_data_type(KV_DTYPE);                            \
+    if (KV_CACHE_DTYPE == vllm::Fp8KVCacheDataType::kAuto) {                   \
       if (SRC_DTYPE == at::ScalarType::Float) {                                \
         FN(float, float, vllm::Fp8KVCacheDataType::kAuto);                     \
       } else if (SRC_DTYPE == at::ScalarType::Half) {                          \
@@ -553,43 +555,28 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
       } else {                                                                 \
         TORCH_CHECK(false, "Unsupported input type of kv cache: ", SRC_DTYPE); \
       }                                                                        \
-    } else {                                                                   \
-      if (KV_DTYPE == "fp8" || KV_DTYPE == "fp8_e4m3") {                       \
-        if (SRC_DTYPE == at::ScalarType::Float) {                              \
-          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);              \
-        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
-          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);           \
-        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
-          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);      \
-        } else {                                                               \
-          TORCH_CHECK(false,                                                   \
-                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
-        }                                                                      \
-      } else if (KV_DTYPE == "fp8_e5m2") {                                     \
-        if (SRC_DTYPE == at::ScalarType::Float) {                              \
-          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);              \
-        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
-          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);           \
-        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
-          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);      \
-        } else {                                                               \
-          TORCH_CHECK(false,                                                   \
-                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
-        }                                                                      \
-      } else if (KV_DTYPE == "fp8_ds_mla") {                                   \
-        if (SRC_DTYPE == at::ScalarType::Float) {                              \
-          FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);              \
-        } else if (SRC_DTYPE == at::ScalarType::Half) {                        \
-          FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);           \
-        } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                    \
-          FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);      \
-        } else {                                                               \
-          TORCH_CHECK(false,                                                   \
-                      "Unsupported input type of kv cache: ", SRC_DTYPE);      \
-        }                                                                      \
+    } else if (KV_CACHE_DTYPE == vllm::Fp8KVCacheDataType::kFp8E4M3) {         \
+      if (SRC_DTYPE == at::ScalarType::Float) {                                \
+        FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);                \
+      } else if (SRC_DTYPE == at::ScalarType::Half) {                          \
+        FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);             \
+      } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                      \
+        FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3);        \
+      } else {                                                                 \
+        TORCH_CHECK(false, "Unsupported input type of kv cache: ", SRC_DTYPE); \
+      }                                                                        \
+    } else if (KV_CACHE_DTYPE == vllm::Fp8KVCacheDataType::kFp8E5M2) {         \
+      if (SRC_DTYPE == at::ScalarType::Float) {                                \
+        FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);                \
+      } else if (SRC_DTYPE == at::ScalarType::Half) {                          \
+        FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);             \
+      } else if (SRC_DTYPE == at::ScalarType::BFloat16) {                      \
+        FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E5M2);        \
       } else {                                                                 \
-        TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);   \
+        TORCH_CHECK(false, "Unsupported input type of kv cache: ", SRC_DTYPE); \
       }                                                                        \
+    } else {                                                                   \
+      TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE);     \
     }
 
 }  // namespace fp8
diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu
index a339c5641bb4..9e6c0726d19e 100644
--- a/csrc/rocm/attention.cu
+++ b/csrc/rocm/attention.cu
@@ -40,15 +40,6 @@ using __hip_fp8_e5m2 = __hip_fp8_e5m2_fnuz;
   #define __HIP__FP8MFMA__
 #endif
 
-#if defined(__HIPCC__) && (defined(__gfx1100__) || defined(__gfx1101__) || \
-                           defined(__gfx1150__) || defined(__gfx1151__))
-  #define __HIP__GFX11__
-#endif
-
-#if defined(__HIPCC__) && (defined(__gfx1200__) || defined(__gfx1201__))
-  #define __HIP__GFX12__
-#endif
-
 #if defined(NDEBUG)
   #undef NDEBUG
   #include <assert.h>
@@ -1629,7 +1620,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
   }
 }
 
-#elif defined(__HIP__GFX11__)
+#elif defined(__GFX11__)
 
 using floatx8 = __attribute__((__vector_size__(8 * sizeof(float)))) float;
 
@@ -2388,7 +2379,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
   out_ptr[threadIdx.x] = from_float<scalar_t>(acc);
 }
 
-#elif defined(__HIP__GFX12__)
+#elif defined(__GFX12__)
 
 using floatx8 = __attribute__((__vector_size__(8 * sizeof(float)))) float;
 
diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu
index 60e10e53391a..3342db37be90 100644
--- a/csrc/rocm/skinny_gemms.cu
+++ b/csrc/rocm/skinny_gemms.cu
@@ -26,16 +26,11 @@
   #define __HIP__GFX9__
 #endif
 
-#if defined(__HIPCC__) &&                                                    \
-    (defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1150__) || \
-     defined(__gfx1151__) || defined(__gfx1200__) || defined(__gfx1201__))
+// Combined RDNA macro (gfx11 + gfx12) - both use 32-wide wavefronts
+#if defined(__GFX11__) || defined(__GFX12__)
   #define __HIP__GFX1X__
 #endif
 
-#if defined(__HIPCC__) && (defined(__gfx1200__) || defined(__gfx1201__))
-  #define __HIP__GFX12__
-#endif
-
 #if defined(__HIPCC__) && (defined(__gfx942__) || defined(__gfx950__))
   #define __HIP__MI3XX__
 #endif
@@ -1845,7 +1840,7 @@ torch::Tensor wvSplitKrc(const at::Tensor& in_a, const at::Tensor& in_b,
   return out_c;
 }
 
-#if defined(__HIP__MI3XX__) || defined(__HIP__GFX12__)
+#if defined(__HIP__MI3XX__) || defined(__GFX12__)
 template <typename scalar_t, typename fp8_t, int THRDS, int YTILE, int WvPrGrp,
           int A_CHUNK, int UNRL, int N>
 __global__ void __launch_bounds__(WvPrGrp* THRDS)
@@ -1893,7 +1888,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   float sB = *s_B;
 
   while (m < M) {
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
     // gfx12: per-lane scalar accumulation via v_dot4_f32_fp8_fp8
     float sum[N][YTILE] = {};
   #else
@@ -1931,7 +1926,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   #pragma unroll
       for (uint32_t k2 = 0; k2 < UNRL; k2++) {
         for (uint32_t n = 0; n < N; n++) {
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
           // gfx12: 4 x dot4 per A_CHUNK=16 bytes (4 FP8 per dot4)
           for (int y = 0; y < YTILE; ++y) {
     #pragma unroll
@@ -1955,7 +1950,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
     }
 
     // Final reduction
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
     // gfx12 wave32: DPP row_shr within 16-lane rows + cross-row shuffle
     for (int n = 0; n < N; n++) {
       for (int y = 0; y < YTILE; y++) {
@@ -1993,7 +1988,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   #endif
 
     const bool writeback_lane =
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
         threadIdx.x == (THRDS - 1);
   #else
         threadIdx.x == 0;
@@ -2009,7 +2004,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
       for (int n = 0; n < N; n++) {
         for (int y = 0; y < YTILE; y++) {
           if (y + m >= M) break;  // To avoid mem access fault.
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
           float result = sum[n][y] * sA * sB;
   #else
           float result = sum[n][y][0] * sA * sB;
@@ -2027,7 +2022,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
     m += CuCount * _WvPrGrp * YTILE;
   }
 }
-#else   // !defined(__HIP__MI3XX__) && !defined(__HIP__GFX12__)
+#else   // !defined(__HIP__MI3XX__) && !defined(__GFX12__)
 template <typename scalar_t, typename fp8_t, int THRDS, int YTILE, int WvPrGrp,
           int A_CHUNK, int UNRL, int N>
 __global__ void wvSplitKQ_hf_sml_(const int K, const int Kap, const int Kbp,
@@ -2039,9 +2034,9 @@ __global__ void wvSplitKQ_hf_sml_(const int K, const int Kap, const int Kbp,
                                   const int _WvPrGrp, const int CuCount) {
   UNREACHABLE_CODE
 }
-#endif  // defined(__HIP__MI3XX__) || defined(__HIP__GFX12__)
+#endif  // defined(__HIP__MI3XX__) || defined(__GFX12__)
 
-#if defined(__HIP__MI3XX__) || defined(__HIP__GFX12__)
+#if defined(__HIP__MI3XX__) || defined(__GFX12__)
 template <typename scalar_t, typename fp8_t, int THRDS, int YTILE, int WvPrGrp,
           int A_CHUNK, int UNRL, int N>
 __global__ void __launch_bounds__(WvPrGrp* THRDS)
@@ -2088,7 +2083,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   float sB = *s_B;
 
   while (m < M) {
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
     // gfx12: per-lane scalar accumulation via v_dot4_f32_fp8_fp8
     float sum[N][YTILE] = {};
   #else
@@ -2128,7 +2123,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   #pragma unroll
       for (uint32_t k2 = 0; k2 < UNRL; k2++) {
         for (uint32_t n = 0; n < N; n++) {
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
           // gfx12: 4 x dot4 per A_CHUNK=16 bytes (4 FP8 per dot4)
           for (int y = 0; y < YTILE; ++y) {
     #pragma unroll
@@ -2152,7 +2147,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
     }
 
     // Final reduction
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
     // gfx12 wave32: DPP row_shr within 16-lane rows + cross-row shuffle
     for (int n = 0; n < N; n++) {
       for (int y = 0; y < YTILE; y++) {
@@ -2190,7 +2185,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
   #endif
 
     const bool writeback_lane =
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
         threadIdx.x == (THRDS - 1);
   #else
         threadIdx.x == 0;
@@ -2206,7 +2201,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
       for (int n = 0; n < N; n++) {
         for (int y = 0; y < YTILE; y++) {
           if (y + m >= M) break;  // To avoid mem access fault.
-  #ifdef __HIP__GFX12__
+  #ifdef __GFX12__
           float result = sum[n][y] * sA * sB;
   #else
           float result = sum[n][y][0] * sA * sB;
@@ -2224,7 +2219,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
     m += CuCount * _WvPrGrp * YTILE;
   }
 }
-#else   // !defined(__HIP__MI3XX__) && !defined(__HIP__GFX12__)
+#else   // !defined(__HIP__MI3XX__) && !defined(__GFX12__)
 template <typename scalar_t, typename fp8_t, int THRDS, int YTILE, int WvPrGrp,
           int A_CHUNK, int UNRL, int N>
 __global__ void wvSplitKQ_hf_(const int K, const int Kap, const int Kbp,
@@ -2236,7 +2231,7 @@ __global__ void wvSplitKQ_hf_(const int K, const int Kap, const int Kbp,
                               const int CuCount) {
   UNREACHABLE_CODE
 }
-#endif  // defined(__HIP__MI3XX__) || defined(__HIP__GFX12__)
+#endif  // defined(__HIP__MI3XX__) || defined(__GFX12__)
 
 void wvSplitKQ(const at::Tensor& in_b, const at::Tensor& in_a,
                const std::optional<at::Tensor>& in_bias, at::Tensor& out_c,
diff --git a/csrc/sampler.cu b/csrc/sampler.cu
index 2e76873c8f18..14d84013c08d 100644
--- a/csrc/sampler.cu
+++ b/csrc/sampler.cu
@@ -258,7 +258,13 @@ __device__ bool processHistogramStep(
   auto processBins = [&](float logit, int idx) {
     if (isPartialMatch<patternShift>(logit, logitPattern)) {
       uint32_t binIdx = extractBinIdx<step>(logit);
-      if (binIdx < thresholdBinIdx) {
+      // Only write elements with binIdx < thresholdBinIdx when:
+      // 1. This is step 0 and the threshold bin is small enough (no step 1)
+      // 2. This is step >= 1 (where pattern matching filters correctly)
+      // This prevents duplicates when step 0 and step 1 both run.
+      bool shouldWriteDirectly =
+          (step == 0 && smemFinalBinSize[0] <= kNumFinalItems) || (step >= 1);
+      if (binIdx < thresholdBinIdx && shouldWriteDirectly) {
         // The element is part of the top-k selection
         int dstIdx = atomicAdd(&smemFoundTopKValues[0], 1);
 
@@ -564,8 +570,9 @@ template <int kNumThreadsPerBlock, bool useRadixSort,
           bool multipleBlocksPerRow = false, bool mergeBlocks = false>
 static __global__ __launch_bounds__(kNumThreadsPerBlock) void topKPerRowDecode(
     const float* logits, const int* seqLens, int* outIndices, int stride0,
-    int stride1, const int topK, int next_n, float* outLogits = nullptr,
-    const int numBlocksToMerge = 0, const int* indices = nullptr) {
+    int stride1, const int topK, int next_n, int seqLensIs2D = 0,
+    float* outLogits = nullptr, const int numBlocksToMerge = 0,
+    const int* indices = nullptr) {
   // The number of bins in the histogram.
   static constexpr int kNumBins = 2048;
 
@@ -574,8 +581,16 @@ static __global__ __launch_bounds__(kNumThreadsPerBlock) void topKPerRowDecode(
 
   // The range of logits within the row.
   int rowStart = 0;
-  int seq_len = seqLens[rowIdx / next_n];
-  int rowEnd = max(0, seq_len - next_n + (rowIdx % next_n) + 1);
+  int batch_idx = rowIdx / next_n;
+  int next_n_idx = rowIdx % next_n;
+  // seqLensIs2D=0: 1D seqLens — all rows in a batch share the same seq_len;
+  //               kernel computes per-row effective length via offset.
+  // seqLensIs2D=1: 2D seqLens — each logit row has its own pre-computed
+  //               effective length (flat index rowIdx = b*next_n + j maps
+  //               directly to seqLens[b, j] in C-contiguous layout).
+  int seq_len = seqLensIs2D ? seqLens[rowIdx] : seqLens[batch_idx];
+  int rowEnd =
+      seqLensIs2D ? max(0, seq_len) : max(0, seq_len - next_n + next_n_idx + 1);
 
   // Local pointers to this block
   if constexpr (!multipleBlocksPerRow && !mergeBlocks) {
@@ -653,6 +668,11 @@ void top_k_per_row_decode(const torch::Tensor& logits, int64_t next_n,
   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
   const auto numColumns = logits.size(1);
 
+  // True if seqLens is 2D (B, next_n): each logit row has its own pre-computed
+  // effective seq_len. False if seqLens is 1D (B,): all rows in a batch share
+  // the same seq_len and the kernel computes the per-row offset itself.
+  int seqLensIs2D = seqLens.dim() == 2 ? 1 : 0;
+
   if (numColumns < kSortingAlgorithmThreshold) {
     // Use insertion sort
     vllm::topKPerRowDecode<kNumThreadsPerBlock, false>
@@ -660,7 +680,7 @@ void top_k_per_row_decode(const torch::Tensor& logits, int64_t next_n,
             logits.data_ptr<float>(), seqLens.data_ptr<int>(),
             indices.data_ptr<int>(), static_cast<int>(stride0),
             static_cast<int>(stride1), static_cast<int>(topK),
-            static_cast<int>(next_n));
+            static_cast<int>(next_n), seqLensIs2D);
   } else if (numColumns < kSplitWorkThreshold) {
     // From this threshold, use radix sort instead
     vllm::topKPerRowDecode<kNumThreadsPerBlock, true>
@@ -668,7 +688,7 @@ void top_k_per_row_decode(const torch::Tensor& logits, int64_t next_n,
             logits.data_ptr<float>(), seqLens.data_ptr<int>(),
             indices.data_ptr<int>(), static_cast<int>(stride0),
             static_cast<int>(stride1), static_cast<int>(topK),
-            static_cast<int>(next_n));
+            static_cast<int>(next_n), seqLensIs2D);
   } else {
     // Long sequences are run in two steps
     constexpr auto multipleBlocksPerRowConfig = 10;
@@ -686,15 +706,16 @@ void top_k_per_row_decode(const torch::Tensor& logits, int64_t next_n,
             logits.data_ptr<float>(), seqLens.data_ptr<int>(),
             outIndicesAux.data_ptr<int>(), static_cast<int>(stride0),
             static_cast<int>(stride1), static_cast<int>(topK),
-            static_cast<int>(next_n), outLogitsAux.data_ptr<float>());
+            static_cast<int>(next_n), seqLensIs2D,
+            outLogitsAux.data_ptr<float>());
 
     constexpr int kNumThreadsPerBlockMerge = 1024;
     vllm::topKPerRowDecode<kNumThreadsPerBlockMerge, true, false, true>
         <<<numRows, kNumThreadsPerBlockMerge, topK * sizeof(int32_t), stream>>>(
             outLogitsAux.data_ptr<float>(), seqLens.data_ptr<int>(),
             indices.data_ptr<int>(), multipleBlocksPerRowConfig * topK, 1,
-            static_cast<int>(topK), static_cast<int>(next_n), nullptr,
-            multipleBlocksPerRowConfig, outIndicesAux.data_ptr<int>());
+            static_cast<int>(topK), static_cast<int>(next_n), seqLensIs2D,
+            nullptr, multipleBlocksPerRowConfig, outIndicesAux.data_ptr<int>());
   }
 }
 
diff --git a/csrc/spinloop.cpp b/csrc/spinloop.cpp
new file mode 100644
index 000000000000..c29e48a5f0ec
--- /dev/null
+++ b/csrc/spinloop.cpp
@@ -0,0 +1,204 @@
+#include <Python.h>
+
+extern "C" {
+
+#include <stdbool.h>
+#include <time.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+  #include <cpuid.h>
+  #include <mwaitxintrin.h>
+#endif
+
+#if defined(CLOCK_MONOTONIC_RAW)
+  #define TIMEOUT_CLOCK CLOCK_MONOTONIC_RAW
+#else
+  #define TIMEOUT_CLOCK CLOCK_MONOTONIC
+#endif
+
+#define CPU_SUPPORT_NONE 0
+#define CPU_SUPPORT_MONITORX 1
+
+#define MWAITX_DEFAULT_TIMEOUT_CYCLES 1000000
+
+typedef struct {
+  unsigned int cpu_support;
+  unsigned int max_monitor_line_size;
+} spinloop_state_t;
+
+static void determine_cpu_support(spinloop_state_t* state) {
+  state->cpu_support = CPU_SUPPORT_NONE;
+  state->max_monitor_line_size = 0;
+
+#if defined(__i386__) || defined(__x86_64__)
+  unsigned int eax, ebx, ecx, edx;
+  if (__get_cpuid(0, &eax, &ebx, &ecx, &edx) == 1) {
+    // AMD CPU (possible monitorx/mwaitx support)
+    if (ebx == 0x68747541 && edx == 0x69746e65 && ecx == 0x444d4163) {
+      if (__get_cpuid(0x80000000, &eax, &ebx, &ecx, &edx) == 1 &&
+          eax >= 0x80000001 &&
+          __get_cpuid(0x80000001, &eax, &ebx, &ecx, &edx) == 1) {
+        if ((ecx & (1 << 29)) != 0) {
+          state->cpu_support = CPU_SUPPORT_MONITORX;
+        }
+      }
+    }
+  }
+
+  if (state->cpu_support == CPU_SUPPORT_MONITORX) {
+    if (__get_cpuid(5, &eax, &ebx, &ecx, &edx) == 1) {
+      state->max_monitor_line_size = ebx & 0xff;
+    }
+  }
+#endif
+}
+
+static PyObject* method_spinloop(PyObject* self, PyObject* args,
+                                 PyObject* kwargs) {
+  Py_buffer buffer;
+  PyObject* callback;
+  double timeout = 0.;
+
+  spinloop_state_t* state = (spinloop_state_t*)PyModule_GetState(self);
+  if (state == NULL) {
+    PyErr_SetString(PyExc_TypeError, "Failed to retrieve module state!");
+    return NULL;
+  }
+
+  static const char* keywords[] = {"buffer", "callback", "timeout", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*O|d", (char**)keywords,
+                                   &buffer, &callback, &timeout)) {
+    return NULL;
+  }
+
+  if (!PyCallable_Check(callback)) {
+    PyErr_SetString(PyExc_TypeError, "callback parameter must be callable!");
+    PyBuffer_Release(&buffer);
+    return NULL;
+  }
+
+  struct timespec t_start;
+  if (clock_gettime(TIMEOUT_CLOCK, &t_start) != 0) {
+    PyErr_SetString(PyExc_RuntimeError, "clock_gettime() failed!");
+    PyBuffer_Release(&buffer);
+    return NULL;
+  }
+
+  bool result = false;
+  bool error = false;
+  bool have_timeout = (timeout > 1e-9);
+  unsigned int iteration = 0;
+  const bool buffer_qualifies = (buffer.len <= state->max_monitor_line_size);
+
+  while (true) {
+    PyObject* res = PyObject_CallNoArgs(callback);
+    if (res == NULL) {
+      error = true;
+      break;
+    }
+    int ok = (res == Py_True);
+    Py_DECREF(res);
+
+    if (ok) {
+      result = true;
+      break;
+    }
+
+    // Check timeout at most every 16 iterations to avoid clock_gettime and
+    // comparison cost
+    if (have_timeout && (iteration & 15u) == 0) {
+      struct timespec t_now;
+      if (clock_gettime(TIMEOUT_CLOCK, &t_now) != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "clock_gettime() failed!");
+        error = true;
+        break;
+      }
+
+      const double elapsed = (double)(t_now.tv_sec - t_start.tv_sec) +
+                             (t_now.tv_nsec - t_start.tv_nsec) * 1e-9;
+      if (elapsed >= timeout) {
+        result = false;
+        break;
+      }
+    }
+    ++iteration;
+
+#if defined(__i386__) || defined(__x86_64__)
+    // monitorx + mwaitx with qualified buffer
+    if (buffer_qualifies && state->cpu_support == CPU_SUPPORT_MONITORX) {
+      _mm_monitorx(buffer.buf, 0, 0);
+
+      // Check once more in case the buffer has been modified while we were
+      // arming the monitor hardware
+      res = PyObject_CallNoArgs(callback);
+      if (res == NULL) {
+        error = true;
+        break;
+      }
+      ok = (res == Py_True);
+      Py_DECREF(res);
+
+      if (ok) {
+        result = true;
+        break;
+      }
+
+      // Run mwaitx with enabled timeout (bit 1). The actual timeout value
+      // is not very important, we just want to ensure we don't lock up
+      // here for too long.
+      Py_BEGIN_ALLOW_THREADS _mm_mwaitx((1 << 1), 0,
+                                        MWAITX_DEFAULT_TIMEOUT_CYCLES);
+      Py_END_ALLOW_THREADS
+    }
+
+    // Fallback: Busy poll
+    else {
+#endif
+      // Give other threads a chance to be scheduled
+      Py_BEGIN_ALLOW_THREADS
+#if defined(__i386__) || defined(__x86_64__)
+      __builtin_ia32_pause();
+#elif defined(__aarch64__)
+        __asm__ volatile("yield" :: : "memory");
+#endif
+      Py_END_ALLOW_THREADS
+#if defined(__i386__) || defined(__x86_64__)
+    }
+#endif
+  }
+
+  PyBuffer_Release(&buffer);
+
+  if (error) {
+    return NULL;
+  }
+
+  if (result) {
+    Py_RETURN_TRUE;
+  }
+
+  Py_RETURN_FALSE;
+}
+
+static PyMethodDef spinloop_methods[] = {
+    {"spinloop", (PyCFunction)method_spinloop, METH_VARARGS | METH_KEYWORDS,
+     "Wait for store with callback"},
+    {NULL, NULL, 0, NULL}};
+
+static struct PyModuleDef spinloop_module = {
+    PyModuleDef_HEAD_INIT, "spinloop",
+    "Hardware-optimized spinloops for Python", sizeof(spinloop_state_t),
+    spinloop_methods};
+
+PyMODINIT_FUNC PyInit_spinloop(void) {
+  PyObject* m = PyModule_Create(&spinloop_module);
+  if (m != NULL) {
+    spinloop_state_t* state = (spinloop_state_t*)PyModule_GetState(m);
+    if (state != NULL) {
+      determine_cpu_support(state);
+    }
+  }
+  return m;
+}
+
+}  // extern "C"
diff --git a/csrc/topk.cu b/csrc/topk.cu
index a7850f5363b9..9ca9aa8824d8 100644
--- a/csrc/topk.cu
+++ b/csrc/topk.cu
@@ -1,373 +1,267 @@
-// Portions of this file are adapted from SGLang PR:
-// https://github.com/sgl-project/sglang/pull/11194
-// and
-// https://github.com/sgl-project/sglang/pull/17747
+// Persistent TopK kernel for DeepSeek V3 sparse attention indexer.
+// See persistent_topk.cuh for kernel implementation.
 
-#include "cuda_compat.h"
-#include "dispatch_utils.h"
-
-#include <torch/cuda.h>
-#include <c10/cuda/CUDAGuard.h>
+#include <torch/all.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda_runtime.h>
+#include <algorithm>
 
 #ifndef USE_ROCM
-  #include <cub/cub.cuh>
-#else
-  #include <hipcub/hipcub.hpp>
+  #include "persistent_topk.cuh"
 #endif
 
-namespace vllm {
-
-constexpr int TopK = 2048;              // DeepSeek V3 sparse attention top-k
-constexpr int kThreadsPerBlock = 1024;  // Threads per block
-
-// Shared memory budget
-#if defined(USE_ROCM)
-constexpr size_t kSmem = 48 * 1024;  // ROCm default: 48KB
-#else
-// Reduced from 128KB to 32KB to improve occupancy.
-// Each radix pass needs at most ~TopK candidates in the threshold bin,
-// so 4K entries per round (2 rounds = 8K entries = 32KB) is sufficient.
-constexpr size_t kSmem = 8 * 1024 * sizeof(uint32_t);  // 32KB (bytes)
-#endif
-
-struct FastTopKParams {
-  const float* __restrict__ input;         // [batch, seq_len] Logits
-  const int32_t* __restrict__ row_starts;  // [batch] Offset into each row
-                                           // (optional)
-  int32_t* __restrict__ indices;           // [batch, TopK] Output top-k indices
-  int32_t* __restrict__ lengths;           // [batch] Sequence lengths per row
-  int64_t input_stride;                    // Stride between rows
-};
-
-__device__ __forceinline__ auto convert_to_uint32_v2(float x) -> uint32_t {
-  uint32_t bits = __float_as_uint(x);
-  return (bits & 0x80000000u) ? ~bits : (bits | 0x80000000u);
-}
-
-__device__ __forceinline__ auto convert_to_uint8(float x) -> uint8_t {
-  __half h = __float2half_rn(x);
-  uint16_t bits = __half_as_ushort(h);
-  uint16_t key = (bits & 0x8000) ? static_cast<uint16_t>(~bits)
-                                 : static_cast<uint16_t>(bits | 0x8000);
-  return static_cast<uint8_t>(key >> 8);
-}
-
-__device__ void naive_topk_cuda(const float* __restrict__ logits,
-                                int32_t* __restrict__ output_indices,
-                                int32_t seq_len) {
-  const int thread_id = threadIdx.x;
-  for (int i = thread_id; i < TopK; i += kThreadsPerBlock) {
-    output_indices[i] = (i < seq_len) ? i : -1;
-  }
-}
-
-// Adapted from:
-// https://github.com/sgl-project/sglang/blob/v0.5.8/sgl-kernel/csrc/elementwise/topk.cu#L87
-// by: DarkSharpness
-// which at the same time is an optimized topk kernel copied from tilelang
-// kernel
-__device__ void fast_topk_cuda_tl(
-    const float* __restrict__ logits,  // Input logits [seq_len]
-    int* __restrict__ output_indices,  // Output top-k indices [TopK]
-    int logits_offset,                 // Starting offset in logits array
-    int seq_len)                       // Number of valid logits to process
-{
-  constexpr int RADIX = 256;
-  constexpr int MAX_BUFFERED_ITEMS = kSmem / (2 * sizeof(int));
-
-  alignas(128) __shared__ int shared_histogram[2][RADIX + 128];
-  alignas(128) __shared__ int shared_output_count;
-  alignas(128) __shared__ int shared_threshold_bin;
-  alignas(128) __shared__ int shared_buffered_count[2];
-
-  extern __shared__ int buffered_indices[][MAX_BUFFERED_ITEMS];
+namespace {
 
-  const int thread_id = threadIdx.x;
-  int remaining_k = TopK;
-
-  // Pass 0: Build coarse 8-bit histogram using FP16 high bits
-  if (thread_id < RADIX + 1) {
-    shared_histogram[0][thread_id] = 0;
-  }
-  __syncthreads();
-
-  for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
-    const auto bin = convert_to_uint8(logits[idx + logits_offset]);
-    ::atomicAdd(&shared_histogram[0][bin], 1);
+#ifndef USE_ROCM
+template <int TopK>
+void launch_persistent_topk(const torch::Tensor& logits,
+                            const torch::Tensor& lengths, torch::Tensor& output,
+                            torch::Tensor& workspace, int64_t max_seq_len) {
+  namespace P = vllm::persistent;
+
+  const int64_t num_rows = logits.size(0);
+  const int64_t stride = logits.stride(0);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+
+  static int num_sms = 0;
+  static int max_smem_per_block = 0;
+  if (num_sms == 0) {
+    int device;
+    cudaGetDevice(&device);
+    cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device);
+    cudaDeviceGetAttribute(&max_smem_per_block,
+                           cudaDevAttrMaxSharedMemoryPerBlockOptin, device);
   }
-  __syncthreads();
-
-  // Helper: Compute cumulative sum (suffix sum) over histogram using ping-pong
-  // buffers
-  auto compute_cumulative_sum = [&]() {
-    static_assert(1 << 8 == RADIX,
-                  "Radix must be 256 for 8 unrolled iterations");
-#pragma unroll 8
-    for (int i = 0; i < 8; ++i) {
-      if (C10_LIKELY(thread_id < RADIX)) {
-        const int stride = 1 << i;
-        const int src_buffer = i & 1;
-        const int dst_buffer = src_buffer ^ 1;
 
-        int value = shared_histogram[src_buffer][thread_id];
-        if (thread_id < RADIX - stride) {
-          value += shared_histogram[src_buffer][thread_id + stride];
-        }
-        shared_histogram[dst_buffer][thread_id] = value;
-      }
-      __syncthreads();
+  if (num_rows > 32 && max_smem_per_block >= 128 * 1024) {
+    cudaError_t status =
+        vllm::FilteredTopKRaggedTransform<float, int32_t, TopK>(
+            logits.data_ptr<float>(), output.data_ptr<int32_t>(),
+            lengths.data_ptr<int32_t>(), static_cast<uint32_t>(num_rows),
+            static_cast<uint32_t>(TopK), static_cast<uint32_t>(stride), stream);
+    TORCH_CHECK(status == cudaSuccess,
+                "FilteredTopK failed: ", cudaGetErrorString(status));
+  } else {
+    TORCH_CHECK(workspace.is_cuda(), "workspace must be CUDA tensor");
+    TORCH_CHECK(workspace.dtype() == torch::kUInt8, "workspace must be uint8");
+
+    int effective_max_smem;
+    if (num_rows <= 4) {
+      effective_max_smem =
+          std::min(max_smem_per_block, static_cast<int>(P::kSmemMedium));
+    } else if (num_rows <= 8) {
+      constexpr int kSmemCapMedium = 48 * 1024;
+      effective_max_smem = std::min(max_smem_per_block, kSmemCapMedium);
+    } else {
+      effective_max_smem = max_smem_per_block;
     }
-  };
 
-  compute_cumulative_sum();
-
-  // Find threshold bin where cumsum crosses remaining_k
-  if (thread_id < RADIX && shared_histogram[0][thread_id] > remaining_k &&
-      shared_histogram[0][thread_id + 1] <= remaining_k) {
-    shared_threshold_bin = thread_id;
-    shared_buffered_count[0] = 0;
-    shared_output_count = 0;
-  }
-  __syncthreads();
-
-  const int threshold_bin = shared_threshold_bin;
-  remaining_k -= shared_histogram[0][threshold_bin + 1];
-
-  // Early exit if threshold bin perfectly matches remaining_k
-  if (remaining_k == 0) {
-    for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
-      const int bin = convert_to_uint8(logits[idx + logits_offset]);
-      if (bin > threshold_bin) {
-        const int output_pos = ::atomicAdd(&shared_output_count, 1);
-        output_indices[output_pos] = idx;
-      }
+    size_t available_for_ordered =
+        static_cast<size_t>(effective_max_smem) - P::kFixedSmemLarge;
+    uint32_t max_chunk_elements =
+        static_cast<uint32_t>(available_for_ordered / sizeof(uint32_t));
+
+    uint32_t vec_size = 1;
+    if (stride % 4 == 0)
+      vec_size = 4;
+    else if (stride % 2 == 0)
+      vec_size = 2;
+
+    max_chunk_elements = (max_chunk_elements / vec_size) * vec_size;
+    uint32_t min_chunk = vec_size * P::kThreadsPerBlock;
+    if (max_chunk_elements < min_chunk) max_chunk_elements = min_chunk;
+
+    uint32_t ctas_per_group =
+        (static_cast<uint32_t>(stride) + max_chunk_elements - 1) /
+        max_chunk_elements;
+    uint32_t chunk_size =
+        (static_cast<uint32_t>(stride) + ctas_per_group - 1) / ctas_per_group;
+    chunk_size = ((chunk_size + vec_size - 1) / vec_size) * vec_size;
+    if (chunk_size > max_chunk_elements) chunk_size = max_chunk_elements;
+
+    size_t smem_size = P::kFixedSmemLarge + chunk_size * sizeof(uint32_t);
+    if (smem_size < P::kSmemMedium) smem_size = P::kSmemMedium;
+
+    // Query occupancy for the instantiation that will actually launch;
+    // overestimating it deadlocks the cooperative barrier.
+    int occupancy = 1;
+    cudaError_t occ_err = cudaSuccess;
+    if (vec_size == 4) {
+      occ_err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+          &occupancy, P::persistent_topk_kernel<TopK, 4>, P::kThreadsPerBlock,
+          smem_size);
+    } else if (vec_size == 2) {
+      occ_err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+          &occupancy, P::persistent_topk_kernel<TopK, 2>, P::kThreadsPerBlock,
+          smem_size);
+    } else {
+      occ_err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+          &occupancy, P::persistent_topk_kernel<TopK, 1>, P::kThreadsPerBlock,
+          smem_size);
     }
-    __syncthreads();
-    return;
-  }
-
-  // Prepare for refinement passes: Process threshold bin
-  __syncthreads();
-  if (thread_id < RADIX + 1) {
-    shared_histogram[0][thread_id] = 0;
-  }
-  __syncthreads();
-
-  // Scan all elements and:
-  // 1. Write indices > threshold_bin to output
-  // 2. Buffer indices == threshold_bin for refinement
-  // 3. Build histogram for next refinement pass (fused optimization)
-  for (int idx = thread_id; idx < seq_len; idx += kThreadsPerBlock) {
-    const float logit_value = logits[idx + logits_offset];
-    const int bin = convert_to_uint8(logit_value);
-
-    if (bin > threshold_bin) {
-      // in top-k, write to output
-      const int output_pos = ::atomicAdd(&shared_output_count, 1);
-      output_indices[output_pos] = idx;
-    } else if (bin == threshold_bin) {
-      // Candidate for top-k, needs refinement
-      const int buffer_pos = ::atomicAdd(&shared_buffered_count[0], 1);
-      if (C10_LIKELY(buffer_pos < MAX_BUFFERED_ITEMS)) {
-        buffered_indices[0][buffer_pos] = idx;
-        // Fused: Build histogram for next pass
-        const uint32_t fp32_bits = convert_to_uint32_v2(logit_value);
-        const int next_bin = (fp32_bits >> 24) & 0xFF;
-        ::atomicAdd(&shared_histogram[0][next_bin], 1);
+    TORCH_CHECK(occ_err == cudaSuccess,
+                "persistent_topk occupancy query failed: ",
+                cudaGetErrorString(occ_err));
+    if (occupancy < 1) occupancy = 1;
+
+    // The cooperative spin-wait barrier only runs when at least one row hits
+    // the radix path (seq_len > RADIX_THRESHOLD). Below that, non-CTA-0 CTAs
+    // early-exit, so oversubscription can't deadlock and headroom is wasted.
+    const bool needs_cooperative =
+        static_cast<uint32_t>(max_seq_len) > P::RADIX_THRESHOLD;
+
+    const uint32_t hw_resident_cap =
+        static_cast<uint32_t>(num_sms) * static_cast<uint32_t>(occupancy);
+    uint32_t max_resident_ctas = hw_resident_cap;
+    if (needs_cooperative) {
+      // Reserve one CTA per SM when occupancy allows; fall back to a single
+      // CTA when occupancy == 1 (the most deadlock-prone case — any straggler
+      // kernel that takes the only slot on one SM hangs the barrier). Never
+      // drop below one full group's worth.
+      uint32_t headroom = (occupancy > 1) ? static_cast<uint32_t>(num_sms) : 1u;
+      if (max_resident_ctas >= headroom + ctas_per_group) {
+        max_resident_ctas -= headroom;
       }
     }
-  }
-  __syncthreads();
-
-  // ============================================================================
-  // Passes 1-4: Refine using 8-bit passes over FP32 bits
-  // ============================================================================
-  // FP32 bits [31:0] split into 4 bytes processed MSB-first:
-  // Pass 1: bits [31:24], Pass 2: bits [23:16], Pass 3: bits [15:8], Pass 4:
-  // bits [7:0]
-#pragma unroll 4
-  for (int pass = 0; pass < 4; ++pass) {
-    __shared__ int shared_final_k;  // For final pass: remaining slots to fill
-    const int src_buffer = pass % 2;
-    const int dst_buffer = src_buffer ^ 1;
-
-    // Clamp buffered count to prevent overflow
-    const int raw_buffered = shared_buffered_count[src_buffer];
-    const int num_buffered =
-        (raw_buffered < MAX_BUFFERED_ITEMS) ? raw_buffered : MAX_BUFFERED_ITEMS;
-
-    compute_cumulative_sum();
-
-    // Find threshold bin for this pass
-    if (thread_id < RADIX && shared_histogram[0][thread_id] > remaining_k &&
-        shared_histogram[0][thread_id + 1] <= remaining_k) {
-      shared_threshold_bin = thread_id;
-      shared_buffered_count[dst_buffer] = 0;
-      shared_final_k = remaining_k - shared_histogram[0][thread_id + 1];
+    uint32_t num_groups = std::min(max_resident_ctas / ctas_per_group,
+                                   static_cast<uint32_t>(num_rows));
+    if (num_groups == 0) num_groups = 1;
+    uint32_t total_ctas = num_groups * ctas_per_group;
+
+    // If the cooperative launch wouldn't fit, fall back to FilteredTopK
+    // instead of deadlocking. Only relevant when needs_cooperative.
+    if (needs_cooperative && total_ctas > hw_resident_cap) {
+      TORCH_CHECK(max_smem_per_block >= 128 * 1024,
+                  "persistent_topk would oversubscribe and the FilteredTopK "
+                  "fallback requires >=128KB smem per block (have ",
+                  max_smem_per_block, "). total_ctas=", total_ctas,
+                  " > num_sms*occupancy=", hw_resident_cap, " (TopK=", TopK,
+                  ", vec_size=", vec_size, ", ctas_per_group=", ctas_per_group,
+                  ", smem=", smem_size, ").");
+      cudaError_t status =
+          vllm::FilteredTopKRaggedTransform<float, int32_t, TopK>(
+              logits.data_ptr<float>(), output.data_ptr<int32_t>(),
+              lengths.data_ptr<int32_t>(), static_cast<uint32_t>(num_rows),
+              static_cast<uint32_t>(TopK), static_cast<uint32_t>(stride),
+              stream);
+      TORCH_CHECK(status == cudaSuccess,
+                  "FilteredTopK fallback failed: ", cudaGetErrorString(status));
+      return;
     }
-    __syncthreads();
 
-    const int threshold_bin = shared_threshold_bin;
-    remaining_k -= shared_histogram[0][threshold_bin + 1];
-
-    // Bit offset for this pass: 24, 16, 8, 0
-    const int bit_offset = 24 - pass * 8;
-
-    // Early exit if threshold bin perfectly matches
-    if (remaining_k == 0) {
-      for (int i = thread_id; i < num_buffered; i += kThreadsPerBlock) {
-        const int idx = buffered_indices[src_buffer][i];
-        const uint32_t fp32_bits =
-            convert_to_uint32_v2(logits[idx + logits_offset]);
-        const int bin = (fp32_bits >> bit_offset) & 0xFF;
-        if (bin > threshold_bin) {
-          const int output_pos = ::atomicAdd(&shared_output_count, 1);
-          output_indices[output_pos] = idx;
-        }
-      }
-      __syncthreads();
-      break;
+    size_t state_bytes = num_groups * sizeof(P::RadixRowState);
+    TORCH_CHECK(workspace.size(0) >= static_cast<int64_t>(state_bytes),
+                "workspace too small, need ", state_bytes, " bytes");
+
+    // Zero the per-group RadixRowState region before launch.
+    //
+    // Issued UNCONDITIONALLY so the memset is captured as its own node in
+    // the cudagraph (a separate cudaMemsetAsync node, sequenced before the
+    // persistent_topk_kernel launch on the same stream). The previous
+    // host-side guard `if (needs_cooperative)` was evaluated at capture time;
+    // when capture-time max_seq_len <= RADIX_THRESHOLD (always true under
+    // FULL_DECODE_ONLY with max_model_len < 32 K) the memset would NOT be
+    // captured, leaving the workspace state to accumulate across replays.
+    // That's a latent correctness bug if the runtime data ever takes the
+    // radix path, and removes one variable while debugging hangs in the
+    // decode/medium paths.
+    //
+    // Cost is sub-microsecond: state_bytes = num_groups * sizeof(RadixRowState)
+    // is ~3 KB per group, ~100 KB for the largest grids on this hardware.
+    //
+    // Why the memset is required (regardless of which path the kernel takes):
+    //   1. arrival_counter accumulates within a launch and is never reset,
+    //      so a prior call leaves it at a large positive value. Without this
+    //      reset, the very first wait_ge in the next call sees counter >>
+    //      target and returns instantly, breaking the barrier.
+    //   2. The previous in-kernel init only ran in CTA-0 with intra-CTA
+    //      __syncthreads(), so it had no happens-before edge to CTA-1+'s
+    //      first red_release. cudaMemsetAsync is stream-ordered: the zero
+    //      is globally visible before any CTA runs.
+    {
+      cudaError_t mz_err = cudaMemsetAsync(workspace.data_ptr<uint8_t>(), 0,
+                                           state_bytes, stream);
+      TORCH_CHECK(mz_err == cudaSuccess,
+                  "row_states memset failed: ", cudaGetErrorString(mz_err));
     }
 
-    // Continue refinement
-    __syncthreads();
-    if (thread_id < RADIX + 1) {
-      shared_histogram[0][thread_id] = 0;
+    P::PersistentTopKParams params;
+    params.input = logits.data_ptr<float>();
+    params.output = output.data_ptr<int32_t>();
+    params.lengths = lengths.data_ptr<int32_t>();
+    params.num_rows = static_cast<uint32_t>(num_rows);
+    params.stride = static_cast<uint32_t>(stride);
+    params.top_k = static_cast<uint32_t>(TopK);
+    params.chunk_size = chunk_size;
+    params.row_states =
+        reinterpret_cast<P::RadixRowState*>(workspace.data_ptr<uint8_t>());
+    params.ctas_per_group = ctas_per_group;
+    params.max_seq_len = static_cast<uint32_t>(max_seq_len);
+
+  #define LAUNCH_PERSISTENT(TOPK_VAL, VS)                                     \
+    do {                                                                      \
+      auto kernel = &P::persistent_topk_kernel<TOPK_VAL, VS>;                 \
+      cudaError_t err = cudaFuncSetAttribute(                                 \
+          kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);    \
+      TORCH_CHECK(err == cudaSuccess,                                         \
+                  "Failed to set smem: ", cudaGetErrorString(err));           \
+      kernel<<<total_ctas, P::kThreadsPerBlock, smem_size, stream>>>(params); \
+    } while (0)
+
+    if (vec_size == 4) {
+      LAUNCH_PERSISTENT(TopK, 4);
+    } else if (vec_size == 2) {
+      LAUNCH_PERSISTENT(TopK, 2);
+    } else {
+      LAUNCH_PERSISTENT(TopK, 1);
     }
-    __syncthreads();
-
-    for (int i = thread_id; i < num_buffered; i += kThreadsPerBlock) {
-      const int idx = buffered_indices[src_buffer][i];
-      const float logit_value = logits[idx + logits_offset];
-      const uint32_t fp32_bits = convert_to_uint32_v2(logit_value);
-      const int bin = (fp32_bits >> bit_offset) & 0xFF;
-
-      if (bin > threshold_bin) {
-        // Definitely in top-k
-        const int output_pos = ::atomicAdd(&shared_output_count, 1);
-        output_indices[output_pos] = idx;
-      } else if (bin == threshold_bin) {
-        if (pass == 3) {
-          // Final pass (bits [7:0]): No more refinement possible
-          // Fill remaining slots in reverse order to maintain descending order
-          const int slot = ::atomicAdd(&shared_final_k, -1);
-          if (slot > 0) {
-            output_indices[TopK - slot] = idx;
-          }
-        } else {
-          // Buffer for next pass and build next histogram
-          const int buffer_pos =
-              ::atomicAdd(&shared_buffered_count[dst_buffer], 1);
-          if (C10_LIKELY(buffer_pos < MAX_BUFFERED_ITEMS)) {
-            buffered_indices[dst_buffer][buffer_pos] = idx;
-            // Fused: Build histogram for next pass
-            const int next_bit_offset = bit_offset - 8;
-            const int next_bin = (fp32_bits >> next_bit_offset) & 0xFF;
-            ::atomicAdd(&shared_histogram[0][next_bin], 1);
-          }
-        }
-      }
-    }
-    __syncthreads();
+  #undef LAUNCH_PERSISTENT
   }
-}
 
-__global__ __launch_bounds__(kThreadsPerBlock) void topk_kernel(
-    const FastTopKParams params) {
-  const auto& [input, row_starts, indices, lengths, input_stride] = params;
-  const uint64_t batch_idx = blockIdx.x;
-  const int logits_offset = row_starts == nullptr ? 0 : row_starts[batch_idx];
-  const int seq_len = lengths[batch_idx];
-  int* output_indices = indices + batch_idx * TopK;
-  const float* logits = input + batch_idx * input_stride;
-
-  if (seq_len <= TopK) {
-    // Shortcut: All elements are in top-k
-    return naive_topk_cuda(logits, output_indices, seq_len);
-  } else {
-    return fast_topk_cuda_tl(logits, output_indices, logits_offset, seq_len);
-  }
+  cudaError_t err = cudaGetLastError();
+  TORCH_CHECK(err == cudaSuccess,
+              "persistent_topk failed: ", cudaGetErrorString(err));
 }
+#endif
 
-FastTopKParams get_params(
-    const at::Tensor& score, const at::Tensor& lengths,
-    std::optional<at::Tensor> row_starts_opt = std::nullopt,
-    std::optional<at::Tensor> indices_opt = std::nullopt) {
-  const int64_t batch_size = score.size(0);
-
-  TORCH_CHECK(score.dim() == 2 && score.stride(1) == 1,
-              "score must be 2D with contiguous rows");
-  TORCH_CHECK(lengths.dim() == 1 && lengths.is_contiguous() &&
-                  lengths.size(0) == batch_size,
-              "lengths must be 1D contiguous with size matching batch");
-
-  const int32_t* row_starts_ptr = nullptr;
-  if (row_starts_opt.has_value()) {
-    const auto& row_starts = *row_starts_opt;
-    TORCH_CHECK(row_starts.dim() == 1 && row_starts.size(0) == batch_size,
-                "row_starts must be 1D with size matching batch");
-    row_starts_ptr = row_starts.data_ptr<int32_t>();
-  }
+}  // anonymous namespace
 
-  int32_t* indices_ptr = nullptr;
-  if (indices_opt.has_value()) {
-    const auto& indices = *indices_opt;
-    TORCH_CHECK(indices.dim() == 2 && indices.is_contiguous() &&
-                    indices.size(0) == batch_size && indices.size(1) == TopK,
-                "indices must be 2D contiguous [batch, TopK]");
-    indices_ptr = indices.data_ptr<int32_t>();
+void persistent_topk(const torch::Tensor& logits, const torch::Tensor& lengths,
+                     torch::Tensor& output, torch::Tensor& workspace, int64_t k,
+                     int64_t max_seq_len) {
+#ifndef USE_ROCM
+  TORCH_CHECK(logits.is_cuda(), "logits must be CUDA tensor");
+  TORCH_CHECK(lengths.is_cuda(), "lengths must be CUDA tensor");
+  TORCH_CHECK(output.is_cuda(), "output must be CUDA tensor");
+  TORCH_CHECK(logits.dtype() == torch::kFloat32, "Only float32 supported");
+  TORCH_CHECK(lengths.dtype() == torch::kInt32, "lengths must be int32");
+  TORCH_CHECK(output.dtype() == torch::kInt32, "output must be int32");
+  TORCH_CHECK(logits.dim() == 2, "logits must be 2D");
+  TORCH_CHECK(lengths.dim() == 1 || lengths.dim() == 2,
+              "lengths must be 1D or 2D");
+  TORCH_CHECK(lengths.is_contiguous(), "lengths must be contiguous");
+  TORCH_CHECK(output.dim() == 2, "output must be 2D");
+
+  const int64_t num_rows = logits.size(0);
+  const int64_t stride = logits.stride(0);
+
+  TORCH_CHECK(lengths.numel() == num_rows, "lengths size mismatch");
+  TORCH_CHECK(output.size(0) == num_rows && output.size(1) == k,
+              "output size mismatch");
+  TORCH_CHECK(k == 512 || k == 1024 || k == 2048,
+              "persistent_topk supports k=512, k=1024, or k=2048, got k=", k);
+
+  if (k == 512) {
+    launch_persistent_topk<512>(logits, lengths, output, workspace,
+                                max_seq_len);
+  } else if (k == 1024) {
+    launch_persistent_topk<1024>(logits, lengths, output, workspace,
+                                 max_seq_len);
+  } else {
+    launch_persistent_topk<2048>(logits, lengths, output, workspace,
+                                 max_seq_len);
   }
-
-  return FastTopKParams{
-      .input = score.data_ptr<float>(),
-      .row_starts = row_starts_ptr,
-      .indices = indices_ptr,
-      .lengths = lengths.data_ptr<int32_t>(),
-      .input_stride = score.stride(0),
-  };
-}
-
-template <auto* kernel_func, size_t smem_bytes>
-void setup_kernel_smem_once() {
-  static const cudaError_t result = []() -> cudaError_t {
-#ifdef USE_ROCM
-    auto func_ptr = reinterpret_cast<const void*>(kernel_func);
 #else
-    auto func_ptr = kernel_func;
+  TORCH_CHECK(false, "persistent_topk is not supported on ROCm");
 #endif
-    return cudaFuncSetAttribute(
-        func_ptr, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_bytes);
-  }();
-
-  TORCH_CHECK(
-      result == cudaSuccess,
-      "Failed to set kernel shared memory limit: ", cudaGetErrorString(result));
 }
-
-}  // namespace vllm
-
-void large_context_topk(
-    const torch::Tensor& logits, torch::Tensor& indices,
-    const torch::Tensor& seq_lens,
-    std::optional<torch::Tensor> row_starts = std::nullopt) {
-  TORCH_CHECK(logits.is_cuda(), "logits must be a CUDA tensor");
-  TORCH_CHECK(indices.is_cuda(), "indices must be a CUDA tensor");
-  TORCH_CHECK(seq_lens.is_cuda(), "seq_lens must be a CUDA tensor");
-  if (row_starts.has_value()) {
-    TORCH_CHECK(row_starts->is_cuda(), "row_starts must be a CUDA tensor");
-  }
-
-  const auto params = vllm::get_params(logits, seq_lens, row_starts, indices);
-  const int64_t batch_size = logits.size(0);
-
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  const dim3 grid(static_cast<uint32_t>(batch_size));
-  const dim3 block(vllm::kThreadsPerBlock);
-
-  vllm::setup_kernel_smem_once<vllm::topk_kernel, vllm::kSmem>();
-  vllm::topk_kernel<<<grid, block, vllm::kSmem, stream>>>(params);
-
-  const cudaError_t result = cudaGetLastError();
-  TORCH_CHECK(result == cudaSuccess,
-              "large_context_topk kernel failed: ", cudaGetErrorString(result));
-}
\ No newline at end of file
diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp
index 3bc69c7bb892..78c2875644c8 100644
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -2,7 +2,6 @@
 #include "cuda_utils.h"
 #include "ops.h"
 #include "core/registration.h"
-
 #include <torch/library.h>
 #include <torch/version.h>
 
@@ -73,102 +72,38 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "    Tensor prefix_output,"
       "    Tensor prefix_lse,"
       "    Tensor suffix_output,"
-      "    Tensor suffix_lse) -> ()");
+      "    Tensor suffix_lse,"
+      "    int!? prefill_tokens_with_context,"
+      "    Tensor? output_scale=None) -> ()");
   ops.impl("merge_attn_states", torch::kCUDA, &merge_attn_states);
-#ifndef USE_ROCM
-  ops.def(
-      "convert_vertical_slash_indexes("
-      "   Tensor! block_count, Tensor! block_offset, "
-      "   Tensor! column_count, Tensor! column_index, "
-      "   Tensor q_seqlens, Tensor q_seqlens, "
-      "   Tensor vertical_indexes, Tensor slash_indexes, "
-      "   int context_size, int block_size_M, int block_size_N, "
-      "   bool causal) -> ()");
-  ops.impl("convert_vertical_slash_indexes", torch::kCUDA,
-           &convert_vertical_slash_indexes);
-
-  ops.def(
-      "convert_vertical_slash_indexes_mergehead("
-      "   Tensor! block_count, Tensor! block_offset, "
-      "   Tensor! column_count, Tensor! column_index, "
-      "   Tensor q_seqlens, Tensor q_seqlens, "
-      "   Tensor vertical_indexes, Tensor slash_indexes, "
-      "   Tensor vertical_indices_count, Tensor slash_indices_count, "
-      "   int context_size, int block_size_M, int block_size_N, "
-      "   bool causal) -> ()");
-  ops.impl("convert_vertical_slash_indexes_mergehead", torch::kCUDA,
-           &convert_vertical_slash_indexes_mergehead);
-#endif
-
-  // Activation ops
-  // Activation function used in SwiGLU.
-  ops.def("silu_and_mul(Tensor! result, Tensor input) -> ()");
-  ops.impl("silu_and_mul", torch::kCUDA, &silu_and_mul);
 
+  // Activation ops (quantized only — basic ops moved to _C_stable_libtorch)
   ops.def(
       "silu_and_mul_quant(Tensor! result, Tensor input, Tensor scale) -> ()");
   ops.impl("silu_and_mul_quant", torch::kCUDA, &silu_and_mul_quant);
 
-#ifndef USE_ROCM
-  ops.def(
-      "silu_and_mul_nvfp4_quant(Tensor! result, Tensor! result_block_scale, "
-      "Tensor input, Tensor input_global_scale) -> ()");
-  ops.impl("silu_and_mul_nvfp4_quant", torch::kCUDA, &silu_and_mul_nvfp4_quant);
-#endif
-
-  ops.def("mul_and_silu(Tensor! out, Tensor input) -> ()");
-  ops.impl("mul_and_silu", torch::kCUDA, &mul_and_silu);
-
-  // Activation function used in GeGLU with `none` approximation.
-  ops.def("gelu_and_mul(Tensor! out, Tensor input) -> ()");
-  ops.impl("gelu_and_mul", torch::kCUDA, &gelu_and_mul);
-
-  // Activation function used in GeGLU with `tanh` approximation.
-  ops.def("gelu_tanh_and_mul(Tensor! out, Tensor input) -> ()");
-  ops.impl("gelu_tanh_and_mul", torch::kCUDA, &gelu_tanh_and_mul);
-
-  // FATReLU implementation.
-  ops.def("fatrelu_and_mul(Tensor! out, Tensor input, float threshold) -> ()");
-  ops.impl("fatrelu_and_mul", torch::kCUDA, &fatrelu_and_mul);
-
-  ops.def(
-      "swigluoai_and_mul(Tensor! out, Tensor input, float alpha=1.702, float "
-      "limit=7.0) "
-      "-> ()");
-  ops.impl("swigluoai_and_mul", torch::kCUDA, &swigluoai_and_mul);
-
-  // GELU implementation used in GPT-2.
-  ops.def("gelu_new(Tensor! out, Tensor input) -> ()");
-  ops.impl("gelu_new", torch::kCUDA, &gelu_new);
-
-  // Approximate GELU implementation.
-  ops.def("gelu_fast(Tensor! out, Tensor input) -> ()");
-  ops.impl("gelu_fast", torch::kCUDA, &gelu_fast);
-
-  // Quick GELU implementation.
-  ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
-  ops.impl("gelu_quick", torch::kCUDA, &gelu_quick);
-
-  // Layernorm
-  // Apply Root Mean Square (RMS) Normalization to the input tensor.
-  ops.def(
-      "rms_norm(Tensor! result, Tensor input, Tensor weight, float epsilon) -> "
-      "()");
-  ops.impl("rms_norm", torch::kCUDA, &rms_norm);
-
-  // In-place fused Add and RMS Normalization.
-  ops.def(
-      "fused_add_rms_norm(Tensor! input, Tensor! residual, Tensor weight, "
-      "float epsilon) -> ()");
-  ops.impl("fused_add_rms_norm", torch::kCUDA, &fused_add_rms_norm);
-
-  // Function for fused QK Norm and RoPE
-  ops.def(
-      "fused_qk_norm_rope(Tensor! qkv, int num_heads_q, "
-      "int num_heads_k, int num_heads_v, int head_dim, float eps, "
-      "Tensor q_weight, Tensor k_weight, Tensor cos_sin_cache, "
-      "bool is_neox, Tensor position_ids) -> ()");
-  ops.impl("fused_qk_norm_rope", torch::kCUDA, &fused_qk_norm_rope);
+  // Fused SiLU+Mul + per-block quantization
+  ops.def(
+      "silu_and_mul_per_block_quant("
+      "Tensor! out, "
+      "Tensor input, "
+      "Tensor! scales, "
+      "int group_size, "
+      "Tensor? scale_ub=None, "
+      "bool is_scale_transposed=False) -> ()");
+  ops.impl("silu_and_mul_per_block_quant", torch::kCUDA,
+           &silu_and_mul_per_block_quant);
+
+  // Horizontally-fused DeepseekV4-MLA: per-head RMSNorm + GPT-J RoPE for Q, and
+  // GPT-J RoPE + UE8M0 FP8 quant + paged cache insert for KV, all in one
+  // kernel launch.
+  ops.def(
+      "fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert("
+      "Tensor! q, Tensor kv, Tensor! k_cache, "
+      "Tensor slot_mapping, Tensor position_ids, Tensor cos_sin_cache, "
+      "float eps, int cache_block_size) -> ()");
+  ops.impl("fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert", torch::kCUDA,
+           &fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert);
 
   // Apply repetition penalties to logits in-place
   ops.def(
@@ -191,70 +126,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   ops.impl("top_k_per_row_decode", torch::kCUDA, &top_k_per_row_decode);
 
   ops.def(
-      "large_context_topk(Tensor score, Tensor indices, Tensor lengths, "
-      "Tensor? "
-      "row_starts_opt) -> ()");
-  ops.impl("large_context_topk", torch::kCUDA, &large_context_topk);
-
-  // Layernorm-quant
-  // Apply Root Mean Square (RMS) Normalization to the input tensor.
-  ops.def(
-      "rms_norm_static_fp8_quant(Tensor! result, Tensor input, Tensor weight, "
-      "Tensor scale, float epsilon) -> "
-      "()");
-  ops.impl("rms_norm_static_fp8_quant", torch::kCUDA,
-           &rms_norm_static_fp8_quant);
-
-  // In-place fused Add and RMS Normalization.
-  ops.def(
-      "fused_add_rms_norm_static_fp8_quant(Tensor! result, Tensor input, "
-      "Tensor! residual, Tensor weight, "
-      "Tensor scale, float epsilon) -> ()");
-  ops.impl("fused_add_rms_norm_static_fp8_quant", torch::kCUDA,
-           &fused_add_rms_norm_static_fp8_quant);
-
-  // Fused Layernorm + Quant kernels
-  ops.def(
-      "rms_norm_dynamic_per_token_quant(Tensor! result, Tensor input, "
-      "Tensor weight, Tensor! scale, float epsilon, "
-      "Tensor? scale_ub, Tensor!? residual) -> ()");
-  ops.impl("rms_norm_dynamic_per_token_quant", torch::kCUDA,
-           &rms_norm_dynamic_per_token_quant);
-
-  // Fused Layernorm + Block quant kernels
-  ops.def(
-      "rms_norm_per_block_quant(Tensor! result, Tensor input, "
-      "Tensor weight, Tensor! scale, float epsilon, "
-      "Tensor? scale_ub, Tensor!? residual, int group_size, "
-      "bool is_scale_transposed) -> ()");
-  ops.impl("rms_norm_per_block_quant", torch::kCUDA, &rms_norm_per_block_quant);
-
-  // Rotary embedding
-  // Apply GPT-NeoX or GPT-J style rotary embedding to query and key.
-  ops.def(
-      "rotary_embedding(Tensor positions, Tensor! query,"
-      "                 Tensor!? key, int head_size,"
-      "                 Tensor cos_sin_cache, bool is_neox) -> ()");
-  ops.impl("rotary_embedding", torch::kCUDA, &rotary_embedding);
+      "persistent_topk(Tensor logits, Tensor lengths, Tensor! output, "
+      "Tensor workspace, int k, int max_seq_len) -> ()");
+  ops.impl("persistent_topk", torch::kCUDA, &persistent_topk);
 
   // Quantization ops
 #ifndef USE_ROCM
-  // DeepSeek V3 fused A GEMM (SM 9.0+, bf16 only, 1-16 tokens).
-  ops.def(
-      "dsv3_fused_a_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()");
-  // conditionally compiled so impl registration is in source file
-
-  // Quantized GEMM for AWQ.
-  ops.def(
-      "awq_gemm(Tensor _in_feats, Tensor _kernel, Tensor _scaling_factors, "
-      "Tensor _zeros, SymInt split_k_iters) -> Tensor");
-  ops.impl("awq_gemm", torch::kCUDA, &awq_gemm);
-
-  // Dequantization for AWQ.
-  ops.def(
-      "awq_dequantize(Tensor _kernel, Tensor _scaling_factors, "
-      "Tensor _zeros, SymInt split_k_iters, int thx, int thy) -> Tensor");
-  ops.impl("awq_dequantize", torch::kCUDA, &awq_dequantize);
 
   // Note about marlin kernel 'workspace' arguments:
   // Technically these should be mutable since they are modified by the kernel.
@@ -332,97 +209,9 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "Tensor? qzeros_or_none, bool inplace) -> Tensor");
   // conditionally compiled so impl registrations are in source file
 
-  // CUTLASS w4a8 GEMM
-  ops.def(
-      "cutlass_w4a8_mm("
-      "   Tensor A,"
-      "   Tensor B,"
-      "   Tensor group_scales,"
-      "   int    group_size,"
-      "   Tensor channel_scales,"
-      "   Tensor token_scales,"
-      "   ScalarType? out_type,"
-      "   str?   maybe_schedule"
-      ") -> Tensor");
-  // pack scales
-  ops.def("cutlass_pack_scale_fp8(Tensor scales) -> Tensor");
-  // encode and reorder weight matrix
-  ops.def("cutlass_encode_and_reorder_int4b(Tensor B) -> Tensor");
-  // conditionally compiled so impl registration is in source file
-
-  // CUTLASS w4a8 grouped GEMM
-  ops.def(
-      "cutlass_w4a8_moe_mm("
-      "   Tensor! out_tensors,"
-      "   Tensor a_tensors,"
-      "   Tensor b_tensors,"
-      "   Tensor a_scales,"
-      "   Tensor b_scales,"
-      "   Tensor b_group_scales,"
-      "   int b_group_size,"
-      "   Tensor expert_offsets,"
-      "   Tensor problem_sizes,"
-      "   Tensor a_strides,"
-      "   Tensor b_strides,"
-      "   Tensor c_strides,"
-      "   Tensor group_scale_strides,"
-      "   str? maybe_schedule"
-      ") -> ()");
-  ops.def(
-      "cutlass_encode_and_reorder_int4b_grouped(Tensor b_tensors) -> (Tensor, "
-      "Tensor)");
-  // conditionally compiled so impl registration is in source file
-
 #endif
 
-  // Dequantization for GGML.
-  ops.def(
-      "ggml_dequantize(Tensor W, int type, SymInt m, SymInt n, ScalarType? "
-      "dtype) -> Tensor");
-  ops.impl("ggml_dequantize", torch::kCUDA, &ggml_dequantize);
-
-  // mmvq kernel for GGML.
-  ops.def(
-      "ggml_mul_mat_vec_a8(Tensor W, Tensor X, int type, SymInt row) "
-      "-> Tensor");
-  ops.impl("ggml_mul_mat_vec_a8", torch::kCUDA, &ggml_mul_mat_vec_a8);
-
-  // mmq kernel for GGML.
-  ops.def(
-      "ggml_mul_mat_a8(Tensor W, Tensor X, int type, SymInt row) -> Tensor");
-  ops.impl("ggml_mul_mat_a8", torch::kCUDA, &ggml_mul_mat_a8);
-
-  // moe kernel for GGML.
-  ops.def(
-      "ggml_moe_a8(Tensor X, Tensor W, "
-      "Tensor sorted_token_ids, Tensor expert_ids, Tensor "
-      "num_tokens_post_padded, "
-      "int type, SymInt row, SymInt top_k, SymInt tokens) -> Tensor");
-  ops.impl("ggml_moe_a8", torch::kCUDA, &ggml_moe_a8);
-
-  ops.def(
-      "ggml_moe_a8_vec(Tensor X, Tensor W, "
-      "Tensor topk_ids, int top_k, "
-      "int type, SymInt row, SymInt tokens) -> Tensor");
-  ops.impl("ggml_moe_a8_vec", torch::kCUDA, &ggml_moe_a8_vec);
-
-  ops.def("ggml_moe_get_block_size", &ggml_moe_get_block_size);
-
 #ifndef USE_ROCM
-  // CUTLASS nvfp4 block scaled GEMM
-  ops.def(
-      "cutlass_scaled_fp4_mm(Tensor! out, Tensor a, Tensor b,"
-      "                      Tensor block_scale_a, Tensor block_scale_b,"
-      "                      Tensor alpha) -> ()");
-  ops.impl("cutlass_scaled_fp4_mm", torch::kCUDA, &cutlass_scaled_fp4_mm);
-
-  // cutlass nvfp4 block scaled group GEMM
-  ops.def(
-      "cutlass_fp4_group_mm(Tensor! out, Tensor a, Tensor b,"
-      " Tensor a_blockscale, Tensor b_blockscales, Tensor alphas,"
-      " Tensor problem_sizes, Tensor expert_offsets, Tensor sf_offsets) -> ()");
-  // conditionally compiled so impl registration is in source file
-
   // Expert-specialization mxfp8 blockscaled grouped quantization (SM100+).
   ops.def(
       "mxfp8_experts_quant("
@@ -439,197 +228,8 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       " -> ()");
   // conditionally compiled so impl registration is in source file
 
-  // CUTLASS w8a8 GEMM, supporting symmetric per-tensor or per-row/column
-  // quantization, as well as bias
-  ops.def(
-      "cutlass_scaled_mm(Tensor! out, Tensor a,"
-      "                  Tensor b, Tensor a_scales,"
-      "                  Tensor b_scales, Tensor? bias) -> ()");
-  ops.impl("cutlass_scaled_mm", torch::kCUDA, &cutlass_scaled_mm);
-
-  // CUTLASS w8a8 GEMM, supporting asymmetric per-tensor or per-row/column
-  // quantization.
-  ops.def(
-      "cutlass_scaled_mm_azp(Tensor! out, Tensor a,"
-      "                  Tensor b, Tensor a_scales,"
-      "                  Tensor b_scales, Tensor azp_adj,"
-      "                  Tensor? azp, Tensor? bias) -> ()");
-  ops.impl("cutlass_scaled_mm_azp", torch::kCUDA, &cutlass_scaled_mm_azp);
-
-  // Check if cutlass scaled_mm is supported for CUDA devices of the given
-  // capability
-  ops.def("cutlass_scaled_mm_supports_fp8(int cuda_device_capability) -> bool");
-  ops.impl("cutlass_scaled_mm_supports_fp8", &cutlass_scaled_mm_supports_fp8);
-
-  // Check if cutlass grouped gemm is supported for CUDA devices of the given
-  // capability
-  ops.def("cutlass_group_gemm_supported(int cuda_device_capability) -> bool");
-  ops.impl("cutlass_group_gemm_supported", &cutlass_group_gemm_supported);
-
-  // CUTLASS w8a8 grouped GEMM
-  ops.def(
-      "cutlass_moe_mm(Tensor! out_tensors, Tensor a_tensors, Tensor b_tensors, "
-      "               Tensor a_scales, Tensor b_scales, Tensor expert_offsets, "
-      "               Tensor problem_sizes, Tensor a_strides, "
-      "               Tensor b_strides, Tensor c_strides, bool per_act_token, "
-      "               bool per_out_ch) -> ()");
-  ops.impl("cutlass_moe_mm", torch::kCUDA, &cutlass_moe_mm);
-
-  // A function that computes data required to run fused MoE with w8a8 grouped
-  // GEMM. It takes topk_ids as an input, and computes expert_offsets
-  // (token start indices of each expert). In addition to this, it computes
-  // problem sizes for each expert's multiplication used by the two mms called
-  // from fused MoE operation, and arrays with permutations required to shuffle
-  // and de-shuffle the input/output of the fused operation.
-  ops.def(
-      "get_cutlass_moe_mm_data(Tensor topk_ids, Tensor! expert_offsets, "
-      "                        Tensor! problem_sizes1, Tensor! problem_sizes2, "
-      "                        Tensor! input_permutation, "
-      "                        Tensor! output_permutation, int num_experts, "
-      "                        int n, int k, Tensor? blockscale_offsets, "
-      "                        bool is_gated) -> ()");
-  ops.impl("get_cutlass_moe_mm_data", torch::kCUDA, &get_cutlass_moe_mm_data);
-
-  // compute per-expert problem sizes from expert_first_token_offset
-  // produced by vLLM's moe_permute kernel
-  ops.def(
-      "get_cutlass_moe_mm_problem_sizes_from_expert_offsets("
-      "    Tensor expert_first_token_offset, "
-      "    Tensor! problem_sizes1, "
-      "    Tensor! problem_sizes2, "
-      "    int n, int k, bool swap_ab) -> ()");
-  ops.impl("get_cutlass_moe_mm_problem_sizes_from_expert_offsets", torch::kCUDA,
-           &get_cutlass_moe_mm_problem_sizes_from_expert_offsets);
-
-  // A function that computes data required to run fused MoE with w8a8 grouped
-  // GEMM in batched expert format. It takes expert_num_tokens
-  // as an input, and computes expert_offsets (token start indices of each
-  // expert). In addition to this, it computes problem sizes for each expert's
-  // multiplication used by the two mms called from fused MoE operation.
-  ops.def(
-      "get_cutlass_batched_moe_mm_data(Tensor! expert_offsets, "
-      "                             Tensor! problem_sizes1, "
-      "                             Tensor! problem_sizes2, "
-      "                             Tensor expert_num_tokens, "
-      "                             int num_local_experts, int padded_m, "
-      "                             int n, int k) -> ()");
-  ops.impl("get_cutlass_batched_moe_mm_data", torch::kCUDA,
-           &get_cutlass_batched_moe_mm_data);
-
-  // Check if cutlass scaled_mm supports block quantization (used by DeepSeekV3)
-  ops.def(
-      "cutlass_scaled_mm_supports_block_fp8(int cuda_device_capability) -> "
-      "bool");
-  ops.impl("cutlass_scaled_mm_supports_block_fp8",
-           &cutlass_scaled_mm_supports_block_fp8);
-
-  // SM100 CUTLASS MLA decode
-  ops.def(
-      "sm100_cutlass_mla_decode(Tensor! out, Tensor! lse, Tensor q_nope,"
-      "                         Tensor q_pe, Tensor kv_c_and_k_pe_cache,"
-      "                         Tensor seq_lens, Tensor page_table,"
-      "                         Tensor workspace, float scale,"
-      "                         int num_kv_splits) -> ()");
-  // conditionally compiled so impl in source file
-
-  // SM100 CUTLASS MLA workspace
-  ops.def(
-      "sm100_cutlass_mla_get_workspace_size(int max_seq_len, int num_batches,"
-      "                                     int sm_count, int num_kv_splits) "
-      "-> int");
-  // conditionally compiled so impl in source file
-
-  // Compute NVFP4 block quantized tensor.
-  ops.def(
-      "scaled_fp4_quant(Tensor input,"
-      "                 Tensor input_scale, bool "
-      "is_sf_swizzled_layout) -> (Tensor, Tensor)");
-  ops.impl("scaled_fp4_quant", torch::kCUDA, &scaled_fp4_quant_func);
-
-  // Out variant
-  // TODO: Add {at::Tag::out_variant} tag and update all call sites
-  // to use the functional variant once vLLM upgrades PyTorch.
-  // See pytorch/pytorch#176117.
-  ops.def(
-      "scaled_fp4_quant.out(Tensor input,"
-      "                     Tensor input_scale, bool "
-      "is_sf_swizzled_layout, *, Tensor(a!) output, Tensor(b!) output_scale) "
-      "-> ()");
-  ops.impl("scaled_fp4_quant.out", torch::kCUDA, &scaled_fp4_quant_out);
-
-  // Compute NVFP4 experts quantization.
-  ops.def(
-      "scaled_fp4_experts_quant(Tensor! output, Tensor! output_scale,"
-      "Tensor input, Tensor input_global_scale, Tensor input_offset_by_experts,"
-      "Tensor output_scale_offset_by_experts) -> ()");
-  ops.impl("scaled_fp4_experts_quant", torch::kCUDA, &scaled_fp4_experts_quant);
-
-  // Fused SiLU+Mul+NVFP4 experts quantization.
-  ops.def(
-      "silu_and_mul_scaled_fp4_experts_quant(Tensor! output, Tensor! "
-      "output_scale,"
-      "Tensor input, Tensor input_global_scale, Tensor input_offset_by_experts,"
-      "Tensor output_scale_offset_by_experts) -> ()");
-  ops.impl("silu_and_mul_scaled_fp4_experts_quant", torch::kCUDA,
-           &silu_and_mul_scaled_fp4_experts_quant);
-
-  // Check if cutlass_scaled_mm_fp4 is supported for CUDA devices
-  // of the given capability
-  ops.def("cutlass_scaled_mm_supports_fp4(int cuda_device_capability) -> bool");
-  ops.impl("cutlass_scaled_mm_supports_fp4", &cutlass_scaled_mm_supports_fp4);
 #endif
 
-  // Quantized GEMM for GPTQ.
-  // Note: even though the C++ inferred schema is correct for this op, it seems
-  // to prevent the meta function registry.
-  ops.def(
-      "gptq_gemm(Tensor a, Tensor b_q_weight, Tensor b_gptq_qzeros, "
-      "Tensor b_gptq_scales, Tensor b_g_idx, bool use_exllama, bool "
-      "use_v2_format, int bit) "
-      "-> Tensor");
-  ops.impl("gptq_gemm", torch::kCUDA, &gptq_gemm);
-
-  // Post processing for GPTQ.
-  ops.def("gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()");
-  ops.impl("gptq_shuffle", torch::kCUDA, &gptq_shuffle);
-
-  // Compute FP8 quantized tensor for given scaling factor.
-  // Supports per-tensor, per-channel, per-token, and arbitrary 2D group
-  // scaling. Optional group_m/group_n specify the group shape explicitly;
-  // required for 1D scales to disambiguate per-channel vs per-token.
-  ops.def(
-      "static_scaled_fp8_quant(Tensor! result, Tensor input, Tensor scale, "
-      "(int, int)? group_shape=None) -> ()");
-  ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);
-
-  // Compute dynamic-per-tensor FP8 quantized tensor and scaling factor.
-  ops.def(
-      "dynamic_scaled_fp8_quant(Tensor! result, Tensor input, Tensor! scale) "
-      "-> "
-      "()");
-  ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);
-
-  // Compute dynamic-per-token FP8 quantized tensor and scaling factor.
-  ops.def(
-      "dynamic_per_token_scaled_fp8_quant(Tensor! result, Tensor input, "
-      "Tensor! scale, Tensor? scale_ub) -> "
-      "()");
-  ops.impl("dynamic_per_token_scaled_fp8_quant", torch::kCUDA,
-           &dynamic_per_token_scaled_fp8_quant);
-
-  // Compute int8 quantized tensor for given scaling factor.
-  ops.def(
-      "static_scaled_int8_quant(Tensor! result, Tensor input, Tensor scale,"
-      "Tensor? azp) -> ()");
-  ops.impl("static_scaled_int8_quant", torch::kCUDA, &static_scaled_int8_quant);
-
-  // Compute int8 quantized tensor and scaling factor
-  ops.def(
-      "dynamic_scaled_int8_quant(Tensor! result, Tensor input, Tensor! scale, "
-      "Tensor!? azp) -> ()");
-  ops.impl("dynamic_scaled_int8_quant", torch::kCUDA,
-           &dynamic_scaled_int8_quant);
-
   // Mamba selective scan kernel
   ops.def(
       "selective_scan_fwd(Tensor! u, Tensor! delta,"
@@ -640,7 +240,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "Tensor? cache_indices,"
       "Tensor? has_initial_state,"
       "Tensor! ssm_states,"
-      "int pad_slot_id,"
+      "int null_block_id,"
       "int block_size,"
       "Tensor? block_idx_first_scheduled_token,"
       "Tensor? block_idx_last_scheduled_token,"
@@ -649,25 +249,29 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
       "Tensor? last_chunk_indices) -> ()");
   ops.impl("selective_scan_fwd", torch::kCUDA, &selective_scan_fwd);
 
-  // Hadamard transforms
-  ops.def("hadacore_transform(Tensor! x, bool inplace) -> Tensor");
-
 #ifndef USE_ROCM
-  // reorder weight for AllSpark Ampere W8A16 Fused Gemm kernel
   ops.def(
-      "rearrange_kn_weight_as_n32k16_order(Tensor b_qweight, Tensor b_scales, "
-      "Tensor? b_zeros, "
-      "bool has_zp, Tensor! b_qweight_reorder, Tensor! b_scales_reorder, "
-      "Tensor!? b_zeros_reorder, "
-      "int K, int N, int N_32align) -> ()");
-  //  conditionally compiled so impl in source file
+      "minimax_allreduce_rms("
+      "Tensor input,"
+      "Tensor norm_weight,"
+      "Tensor workspace,"
+      "int rank,"
+      "int nranks,"
+      "float eps) -> Tensor");
+  ops.impl("minimax_allreduce_rms", torch::kCUDA, &minimax_allreduce_rms);
+  ops.def(
+      "minimax_allreduce_rms_qk("
+      "Tensor qkv,"
+      "Tensor norm_weight_q,"
+      "Tensor norm_weight_k,"
+      "Tensor workspace,"
+      "int q_size,"
+      "int kv_size,"
+      "int rank,"
+      "int nranks,"
+      "float eps) -> (Tensor, Tensor)");
+  ops.impl("minimax_allreduce_rms_qk", torch::kCUDA, &minimax_allreduce_rms_qk);
 
-  // AllSpark quantization ops
-  ops.def(
-      "allspark_w8a16_gemm(Tensor a, Tensor b_qweight, Tensor b_scales, "
-      "Tensor? b_qzeros, "
-      "SymInt n, SymInt group_size, SymInt sm_count, SymInt sm_version, SymInt "
-      "CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) -> Tensor");
   //  conditionally compiled so impl in source file
 #endif
 }
@@ -680,6 +284,13 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) {
       "            int block_size_in_bytes, Tensor block_mapping) -> ()");
   cache_ops.impl("swap_blocks", torch::kCUDA, &swap_blocks);
 
+  // Batch swap: submit all block copies in a single driver call.
+  cache_ops.def(
+      "swap_blocks_batch(Tensor src_ptrs, Tensor dst_ptrs,"
+      "                  Tensor sizes,"
+      "                  bool is_src_access_order_any=False) -> ()");
+  cache_ops.impl("swap_blocks_batch", torch::kCPU, &swap_blocks_batch);
+
   // Reshape the key and value tensors and cache them.
   cache_ops.def(
       "reshape_and_cache(Tensor key, Tensor value,"
diff --git a/csrc/torch_utils.h b/csrc/torch_utils.h
new file mode 100644
index 000000000000..898b9e113e19
--- /dev/null
+++ b/csrc/torch_utils.h
@@ -0,0 +1,17 @@
+#pragma once
+
+// Shared TORCH_UTILS_CHECK across both libtorch stable and unstable source
+// files. Keep this header free of CUTLASS/CUTE so attention/quant headers can
+// use it.
+//
+// If TORCH_TARGET_VERSION is defined, we are building _C_stable_libtorch.so so
+// use STD_TORCH_CHECK via header-only.
+// Otherwise, use TORCH_CHECK via torch/all.h.
+
+#ifdef TORCH_TARGET_VERSION
+  #include <torch/headeronly/util/Exception.h>
+  #define TORCH_UTILS_CHECK STD_TORCH_CHECK
+#else
+  #include <torch/all.h>
+  #define TORCH_UTILS_CHECK TORCH_CHECK
+#endif
diff --git a/csrc/type_convert.cuh b/csrc/type_convert.cuh
index 2678f69e19b6..9d939bb828fc 100644
--- a/csrc/type_convert.cuh
+++ b/csrc/type_convert.cuh
@@ -1,8 +1,10 @@
 #pragma once
 
-#include <torch/all.h>
+#include <torch/headeronly/util/BFloat16.h>
+#include <torch/headeronly/util/Half.h>
 
 #ifndef USE_ROCM
+  #include <cuda.h>
   #include <cuda_bf16.h>
   #include <cuda_fp16.h>
 #else
@@ -191,4 +193,4 @@ struct alignas(16) _f16Vec {
     return result;
   }
 };
-}  // namespace vllm
\ No newline at end of file
+}  // namespace vllm
diff --git a/docker/Dockerfile b/docker/Dockerfile
index b26b82eb598e..cae909862b55 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -22,7 +22,7 @@
 #   docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
 # =============================================================================
 
-ARG CUDA_VERSION=12.9.1
+ARG CUDA_VERSION=13.0.2
 ARG PYTHON_VERSION=3.12
 ARG UBUNTU_VERSION=22.04
 
@@ -37,10 +37,17 @@ ARG UBUNTU_VERSION=22.04
 # compatibility with other Linux OSes. The main reason for this is that the
 # glibc version is baked into the distro, and binaries built with one glibc
 # version are not backwards compatible with OSes that use an earlier version.
-ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
+ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 # Using cuda base image with minimal dependencies necessary for JIT compilation (FlashInfer, DeepGEMM, EP kernels)
 ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}
 
+# OS family of BUILD_BASE_IMAGE. Controls package manager (apt vs dnf) and
+# Python bootstrap. Set to "manylinux" alongside a manylinux build base such
+# as pytorch/manylinux2_28-builder:cuda13.0 to produce wheels with a glibc
+# 2.28 floor (matches PyTorch's own published wheels). Default stays on
+# Ubuntu for backwards compatibility.
+ARG BUILD_OS=ubuntu
+
 # By parameterizing the Deadsnakes repository URL, we allow third-party to use
 # their own mirror. When doing so, we don't benefit from the transparent
 # installation of the GPG key of the PPA, as done by add-apt-repository, so we
@@ -94,44 +101,93 @@ FROM ${BUILD_BASE_IMAGE} AS base
 
 ARG CUDA_VERSION
 ARG PYTHON_VERSION
+ARG BUILD_OS
 
 ENV DEBIAN_FRONTEND=noninteractive
 
-# Install system dependencies including build tools
-RUN apt-get update -y \
-    && apt-get install -y --no-install-recommends \
-        ccache \
-        software-properties-common \
-        git \
-        curl \
-        sudo \
-        python3-pip \
-        libibverbs-dev \
-        # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
-        # as it was causing spam when compiling the CUTLASS kernels
-        gcc-10 \
-        g++-10 \
-    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 \
-    # Install python dev headers if available (needed for cmake FindPython on Ubuntu 24.04
-    # which ships cmake 3.28 and requires Development.SABIModule; silently skipped on
-    # Ubuntu 20.04/22.04 where python3.x-dev is not available without a PPA)
-    && (apt-get install -y --no-install-recommends python${PYTHON_VERSION}-dev 2>/dev/null || true) \
-    && rm -rf /var/lib/apt/lists/* \
+# Environment for uv
+# Declared BEFORE the installer + `uv venv` invocations below so the uv
+# binary, managed Python, download cache, and /opt/venv all land under
+# /opt/uv instead of /root/.local/. Without this, the venv created at
+# build time hardlinks back to /root/.local/share/uv/python and
+# descendants of this stage (`build`, `dev`, `csrc-build`,
+# `extensions-build`) inherit a root-owned, non-root-unreadable layout.
+# See #15174, #15359, #31959. Child stages inherit these via Dockerfile
+# `ENV` unless they override them explicitly.
+ENV UV_HTTP_TIMEOUT=500
+ENV UV_INDEX_STRATEGY="unsafe-best-match"
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+ENV UV_CACHE_DIR=/opt/uv/cache
+ENV UV_INSTALL_DIR=/opt/uv/bin
+ENV PATH="/opt/venv/bin:/opt/uv/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/venv"
+
+# Install system dependencies including build tools.
+# The Ubuntu path uses apt + deadsnakes-via-uv for Python; the manylinux path
+# (AlmaLinux 8, e.g. pytorch/manylinux2_28-builder) uses dnf and the Python
+# interpreters pre-installed at /opt/python/cpXY-cpXY/.
+RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
+        # rdma-core-devel provides libibverbs headers; ccache lives in EPEL,
+        # which the pytorch manylinux image already enables. git/curl/sudo
+        # are typically pre-installed but listed defensively.
+        dnf install -y --setopt=install_weak_deps=False \
+            ccache \
+            git \
+            curl \
+            sudo \
+            rdma-core-devel \
+        && dnf clean all \
+        && rm -rf /var/cache/dnf; \
+    else \
+        apt-get update -y \
+        && apt-get install -y --no-install-recommends \
+            ccache \
+            software-properties-common \
+            git \
+            curl \
+            sudo \
+            python3-pip \
+            libibverbs-dev \
+            # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
+            # as it was causing spam when compiling the CUTLASS kernels
+            gcc-10 \
+            g++-10 \
+        && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 \
+        # Install python dev headers if available (needed for cmake FindPython on Ubuntu 24.04
+        # which ships cmake 3.28 and requires Development.SABIModule; silently skipped on
+        # Ubuntu 20.04/22.04 where python3.x-dev is not available without a PPA)
+        && (apt-get install -y --no-install-recommends python${PYTHON_VERSION}-dev 2>/dev/null || true) \
+        && rm -rf /var/lib/apt/lists/*; \
+    fi
+
+# Install uv and bootstrap /opt/venv. Both paths converge on /opt/venv so all
+# downstream stages stay distro-agnostic.
+RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" "${UV_INSTALL_DIR}" \
+    && chmod -R a+rX /opt/uv \
     && curl -LsSf https://astral.sh/uv/install.sh | sh \
-    && $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
+    # `--seed` installs pip/setuptools/wheel into the venv so `python3 -m
+    # pip` works regardless of how uv happens to link the venv back to the
+    # managed Python install (which, at a non-default UV_PYTHON_INSTALL_DIR,
+    # doesn't always expose ensurepip via the default venv layout).
+    && if [ "${BUILD_OS}" = "manylinux" ]; then \
+           # manylinux images ship Python at /opt/python/cpXY-cpXY/; point uv
+           # at the matching interpreter rather than letting it fetch one.
+           PYV_NODOT=$(echo ${PYTHON_VERSION} | tr -d '.') \
+           && MANYLINUX_PY=/opt/python/cp${PYV_NODOT}-cp${PYV_NODOT}/bin/python${PYTHON_VERSION} \
+           && uv venv --seed /opt/venv --python "$MANYLINUX_PY"; \
+       else \
+           uv venv --seed /opt/venv --python ${PYTHON_VERSION}; \
+       fi \
     && rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
-    && ln -s /opt/venv/bin/python3 /usr/bin/python3 \
-    && ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \
-    && ln -s /opt/venv/bin/pip /usr/bin/pip \
+    && ln -sf /opt/venv/bin/python3 /usr/bin/python3 \
+    && ln -sf /opt/venv/bin/python3-config /usr/bin/python3-config \
+    && ln -sf /opt/venv/bin/pip /usr/bin/pip \
     && python3 --version && python3 -m pip --version
 
-# Activate virtual environment and add uv to PATH
-ENV PATH="/opt/venv/bin:/root/.local/bin:$PATH"
-ENV VIRTUAL_ENV="/opt/venv"
-
-# Environment for uv
-ENV UV_HTTP_TIMEOUT=500
-ENV UV_INDEX_STRATEGY="unsafe-best-match"
+# UV_LINK_MODE=copy applies to subsequent `uv pip install` RUNs (avoids
+# hardlink failures with BuildKit cache mounts); it must not be set during
+# `uv venv` above, which relies on hardlinking /opt/venv back to the
+# managed Python source so ensurepip / `python3 -m pip` still resolve.
 ENV UV_LINK_MODE=copy
 
 # Verify GCC version
@@ -162,8 +218,11 @@ COPY requirements/common.txt requirements/common.txt
 COPY requirements/cuda.txt requirements/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY pyproject.toml pyproject.toml
-RUN --mount=type=cache,target=/root/.cache/uv \
-    if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
+RUN --mount=type=cache,target=/opt/uv/cache \
+    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
+        sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' requirements/cuda.txt; \
+    fi \
+    && if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing torch nightly..." \
         && uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
         --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
@@ -179,7 +238,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Track PyTorch lib versions used during build and match in downstream instances.
 # We do this for both nightly and release so we can strip dependencies/*.txt as needed.
 # Otherwise library dependencies can upgrade/downgrade torch incorrectly.
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
     && TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
     && echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
@@ -188,10 +247,60 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Explicitly set the list to avoid issues with torch 2.2
 # See https://github.com/pytorch/pytorch/pull/123243
 # From versions.json: .torch.cuda_arch_list
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
+ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 #################### BUILD BASE IMAGE ####################
 
+#################### RUST BUILD IMAGE ####################
+# Build the Rust frontend (`vllm-rs`) in a dedicated stage so the main wheel
+# build stage doesn't need the rust toolchain, protoc, or the rust source.
+# This stage runs in parallel with csrc-build/extensions-build.
+FROM ${BUILD_BASE_IMAGE} AS rust-build
+ARG BUILD_OS
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install a basic C toolchain (some rust crates compile C in their build.rs
+# scripts) and unzip (used to extract the pinned protoc release below).
+RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
+        dnf install -y --setopt=install_weak_deps=False \
+            ca-certificates curl git gcc gcc-c++ make unzip \
+        && dnf clean all && rm -rf /var/cache/dnf; \
+    else \
+        apt-get update -y \
+        && apt-get install -y --no-install-recommends \
+            ca-certificates curl git build-essential unzip \
+        && rm -rf /var/lib/apt/lists/*; \
+    fi
+
+COPY tools/install_protoc.sh /tmp/install_protoc.sh
+RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
+
+# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
+    sh -s -- -y --profile minimal --default-toolchain none
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+WORKDIR /workspace
+
+# Copy only the rust workspace — the binary is the sole artifact we need.
+COPY rust rust
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY build_rust.sh build_rust.sh
+
+# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
+# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
+ENV CARGO_BUILD_JOBS=4
+
+# Build the release binary. Cache cargo registry/git and target/, but copy the
+# binary out of the target/ cache mount so it persists into the image layer
+# for later COPY --from=rust-build.
+RUN --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/workspace/rust/target \
+    VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
+#################### RUST BUILD IMAGE ####################
+
 #################### CSRC BUILD IMAGE ####################
 FROM base AS csrc-build
 ARG TARGETPLATFORM
@@ -204,7 +313,7 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG PYTORCH_NIGHTLY
 
 # Install build dependencies
-COPY requirements/build.txt requirements/build.txt
+COPY requirements/build/cuda.txt requirements/build/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
 
@@ -215,17 +324,17 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing build requirements without torch..." \
         && python3 use_existing_torch.py --prefix \
-        && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
+        && uv pip install --python /opt/venv/bin/python3 -r requirements/build/cuda.txt \
         && echo "Installing torch nightly..." \
         && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
         --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     else \
         echo "Installing build requirements..." \
-        && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
+        && uv pip install --python /opt/venv/bin/python3 -r requirements/build/cuda.txt \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     fi
 
@@ -260,15 +369,23 @@ ARG VLLM_MAIN_CUDA_VERSION=""
 ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
 
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         python3 use_existing_torch.py --prefix; \
     fi
 
+# Provision one bare Python per `requires-python` entry; cmake reads
+# DEEPGEMM_PYTHON_INTERPRETERS to build DeepGEMM `_C` for each. See
+# cmake/external_projects/deepgemm.cmake for the full picture.
+COPY tools/setup_deepgemm_pythons.sh tools/build_deepgemm_C.py tools/
+ENV DEEPGEMM_VENV_PREFIX=/opt/dgenv
+RUN --mount=type=cache,target=/root/.cache/uv \
+    tools/setup_deepgemm_pythons.sh > /tmp/dg_pythons.txt
+
 # Build the vLLM wheel
 # if USE_SCCACHE is set, use sccache to speed up compilation
 # AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=secret,id=aws-credentials,target=/root/.aws/credentials,required=false \
     if [ "$USE_SCCACHE" = "1" ]; then \
         echo "Installing sccache..." \
@@ -292,6 +409,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         && export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" \
         && export VLLM_MAIN_CUDA_VERSION="${VLLM_MAIN_CUDA_VERSION}" \
         && export VLLM_DOCKER_BUILD_CONTEXT=1 \
+        && export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) \
         && sccache --show-stats \
         && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
         && sccache --show-stats; \
@@ -301,7 +419,7 @@ ARG vllm_target_device="cuda"
 ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     if [ "$USE_SCCACHE" != "1" ]; then \
         # Clean any existing CMake artifacts
         rm -rf .deps && \
@@ -309,13 +427,14 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
         export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
         export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" && \
         export VLLM_DOCKER_BUILD_CONTEXT=1 && \
+        export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) && \
         python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
     fi
 
 #################### CSRC BUILD IMAGE ####################
 
 #################### EXTENSIONS BUILD IMAGE ####################
-# Build DeepGEMM, DeepEP - runs in PARALLEL with csrc-build
+# Build DeepEP - runs in PARALLEL with csrc-build
 # This stage is independent and doesn't affect csrc cache
 FROM base AS extensions-build
 ARG CUDA_VERSION
@@ -327,27 +446,12 @@ ENV UV_LINK_MODE=copy
 
 WORKDIR /workspace
 
-# Build DeepGEMM wheel
-# Default moved here from tools/install_deepgemm.sh for centralized version management
-ARG DEEPGEMM_GIT_REF=477618cd51baffca09c4b0b87e97c03fe827ef03
-COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
-RUN --mount=type=cache,target=/root/.cache/uv \
-    mkdir -p /tmp/deepgemm/dist && \
-    VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh \
-        --cuda-version "${CUDA_VERSION}" \
-        ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} \
-        --wheel-dir /tmp/deepgemm/dist || \
-    echo "DeepGEMM build skipped (CUDA version requirement not met)"
-
-# Ensure the wheel dir exists so COPY won't fail when DeepGEMM is skipped
-RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped
-
 # Build DeepEP wheels
 COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh
 # Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
 ARG DEEPEP_COMMIT_HASH=73b6ea4
 ARG NVSHMEM_VER
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     mkdir -p /tmp/ep_kernels_workspace/dist && \
     export TORCH_CUDA_ARCH_LIST='9.0a 10.0a' && \
     /tmp/install_python_libraries.sh \
@@ -370,7 +474,7 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
 ARG PYTORCH_NIGHTLY
 
 # Install build dependencies
-COPY requirements/build.txt requirements/build.txt
+COPY requirements/build/cuda.txt requirements/build/cuda.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
 
@@ -381,17 +485,17 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 # Use copy mode to avoid hardlink failures with Docker cache mounts
 ENV UV_LINK_MODE=copy
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing build requirements without torch..." \
         && python3 use_existing_torch.py --prefix \
-        && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
+        && uv pip install --python /opt/venv/bin/python3 -r requirements/build/cuda.txt \
         && echo "Installing torch nightly..." \
         && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
         --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     else \
         echo "Installing build requirements..." \
-        && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
+        && uv pip install --python /opt/venv/bin/python3 -r requirements/build/cuda.txt \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     fi
 
@@ -401,6 +505,10 @@ WORKDIR /workspace
 COPY --from=csrc-build /workspace/dist /precompiled-wheels
 COPY . .
 
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /workspace/vllm-rs vllm/vllm-rs
+
 ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
     if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
@@ -412,21 +520,21 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
 ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
 
 # Use existing torch for nightly builds
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         python3 use_existing_torch.py --prefix; \
     fi
 
 # Build the vLLM wheel
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=bind,source=.git,target=.git \
     if [ "${vllm_target_device}" = "cuda" ]; then \
+        export VLLM_USE_PRECOMPILED=1; \
         export VLLM_PRECOMPILED_WHEEL_LOCATION=$(ls /precompiled-wheels/*.whl); \
     fi && \
     python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
 
 # Copy extension wheels from extensions-build stage for later use
-COPY --from=extensions-build /tmp/deepgemm/dist /tmp/deepgemm/dist
 COPY --from=extensions-build /tmp/ep_kernels_workspace/dist /tmp/ep_kernels_workspace/dist
 
 # Check the size of the wheel if RUN_WHEEL_CHECK is true
@@ -449,6 +557,7 @@ FROM base AS dev
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG BUILD_OS
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -458,7 +567,11 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
 
 # Install libnuma-dev, required by fastsafetensors (fixes #20384)
-RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*
+RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
+        dnf install -y numactl-devel && dnf clean all && rm -rf /var/cache/dnf; \
+    else \
+        apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*; \
+    fi
 
 
 # We can specify the standard or nightly build of PyTorch
@@ -466,17 +579,17 @@ ARG PYTORCH_NIGHTLY
 
 # Install development dependencies
 COPY requirements/lint.txt requirements/lint.txt
-COPY requirements/test.in requirements/test.in
-COPY requirements/test.txt requirements/test.txt
+COPY requirements/test/cuda.in requirements/test/cuda.in
+COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing dev requirements plus torch nightly..." \
         && python3 use_existing_torch.py --prefix \
-        && cat torch_lib_versions.txt >> requirements/test.in \
-        && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
+        && cat torch_lib_versions.txt >> requirements/test/cuda.in \
+        && uv pip compile requirements/test/cuda.in -o requirements/test/cuda.txt --index-strategy unsafe-best-match \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
         && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \
         -r requirements/dev.txt \
@@ -546,26 +659,75 @@ RUN apt-get update -y \
 # Install CUDA development tools for runtime JIT compilation
 # (FlashInfer, DeepGEMM, EP kernels all require compilation at runtime)
 RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
+    CUDA_VERSION_SHORT=$(echo $CUDA_VERSION | cut -d. -f1,2) && \
     apt-get update -y && \
-    apt-get install -y --no-install-recommends \
+    apt-get install -y --no-install-recommends --allow-change-held-packages \
         cuda-nvcc-${CUDA_VERSION_DASH} \
         cuda-cudart-${CUDA_VERSION_DASH} \
         cuda-nvrtc-${CUDA_VERSION_DASH} \
         cuda-cuobjdump-${CUDA_VERSION_DASH} \
         libcurand-dev-${CUDA_VERSION_DASH} \
-        libcublas-${CUDA_VERSION_DASH} \
-        # Fixes nccl_allocator requiring nccl.h at runtime
-        # https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
-        libnccl-dev && \
+        libcublas-dev-${CUDA_VERSION_DASH} \
+        # Required by fastsafetensors (fixes #20384)
+        libnuma-dev \
+        # numactl CLI for NUMA binding at runtime
+        numactl && \
+    # Fixes nccl_allocator requiring nccl.h at runtime
+    # https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
+    # NCCL packages don't use the cuda-MAJOR-MINOR naming convention,
+    # so we pin the version to match our CUDA version
+    NCCL_VER=$(apt-cache madison libnccl-dev | grep "+cuda${CUDA_VERSION_SHORT}" | head -1 | awk -F'|' '{gsub(/^ +| +$/, "", $2); print $2}') && \
+    apt-get install -y --no-install-recommends --allow-change-held-packages libnccl-dev=${NCCL_VER} libnccl2=${NCCL_VER} && \
     rm -rf /var/lib/apt/lists/*
 
 # Install uv for faster pip installs
 RUN python3 -m pip install uv
 
 # Environment for uv
+# Redirect uv's managed Python and download cache out of /root/ so downstream
+# images (`FROM vllm/vllm-openai` + `USER <uid>`) and direct non-root runs
+# (`docker run --user <uid>:<gid>`) can read and execute them. See #15174,
+# #15359, #31959.
 ENV UV_HTTP_TIMEOUT=500
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
 ENV UV_LINK_MODE=copy
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+ENV UV_CACHE_DIR=/opt/uv/cache
+RUN mkdir -p "${UV_PYTHON_INSTALL_DIR}" "${UV_CACHE_DIR}" \
+    && chgrp -R 0 /opt/uv \
+    && chmod -R g+rwX,a+rX /opt/uv
+
+# ----------------------------------------------------------------------
+# Non-root support (opt-in)
+# ----------------------------------------------------------------------
+# Create a conventional `vllm` user (UID 2000, GID 0) so the image can be
+# run under `--user 2000:0` or the opt-in `vllm-openai-nonroot` target.
+#
+# Design notes:
+#   * GID 0 + group-writable cache dirs follow the OpenShift arbitrary-UID
+#     pattern, so any UID that is a member of group 0 at runtime can write
+#     to /home/vllm and /opt/uv without additional chown work.
+#   * The default `vllm-openai` image keeps `USER root`, so every existing
+#     `docker run vllm/vllm-openai ...` / K8s manifest / `FROM vllm/vllm-openai`
+#     + `RUN uv pip install --system ...` flow is unchanged.
+#   * The entrypoint wrapper below is only used by `vllm-openai-nonroot`; it
+#     handles the OpenShift arbitrary-UID case (UID not in /etc/passwd).
+# See #31959 and docs/deployment/docker.md.
+RUN useradd --uid 2000 --gid 0 --create-home --home-dir /home/vllm \
+        --shell /bin/bash vllm \
+    && mkdir -p /home/vllm/.cache /home/vllm/.config \
+    && chown -R 2000:0 /home/vllm \
+    && chmod -R g+rwX /home/vllm \
+    # Allow the entrypoint wrapper to append a /etc/passwd entry for an
+    # arbitrary runtime UID that shares GID 0. Without this, `whoami`, bash's
+    # `\u` prompt, `id -un`, and anything else that calls `getpwuid()`
+    # directly return "I have no name!" for OpenShift-style arbitrary UIDs.
+    # This matches the convention used by Red Hat UBI base images.
+    && chgrp 0 /etc/passwd /etc/group \
+    && chmod g=u /etc/passwd /etc/group
+COPY docker/entrypoints/vllm-nonroot-entrypoint.sh \
+    /usr/local/bin/vllm-nonroot-entrypoint.sh
+RUN chmod 0755 /usr/local/bin/vllm-nonroot-entrypoint.sh
 
 # Enable CUDA forward compatibility by setting '-e VLLM_ENABLE_CUDA_COMPATIBILITY=1'
 # Only needed for datacenter/professional GPUs with older drivers.
@@ -582,7 +744,10 @@ ENV VLLM_ENABLE_CUDA_COMPATIBILITY=0
 ARG PYTORCH_CUDA_INDEX_BASE_URL
 COPY requirements/common.txt /tmp/common.txt
 COPY requirements/cuda.txt /tmp/requirements-cuda.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
+    if [ "$(echo $CUDA_VERSION | cut -d. -f1)" = "12" ]; then \
+        sed -i 's/^nvidia-cutlass-dsl\[cu13\]>=/nvidia-cutlass-dsl>=/' /tmp/requirements-cuda.txt; \
+    fi && \
     uv pip install --system -r /tmp/requirements-cuda.txt \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
     rm /tmp/requirements-cuda.txt /tmp/common.txt
@@ -590,30 +755,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Install FlashInfer JIT cache (requires CUDA-version-specific index URL)
 # https://docs.flashinfer.ai/installation.html
 # From versions.json: .flashinfer.version
-ARG FLASHINFER_VERSION=0.6.6
-RUN --mount=type=cache,target=/root/.cache/uv \
+ARG FLASHINFER_VERSION=0.6.11.post2
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
-        --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
-    && flashinfer show-config
-
-# Pre-download FlashInfer TRTLLM BMM headers for air-gapped environments.
-# At runtime, MoE JIT compilation downloads these from edge.urm.nvidia.com
-# which fails without internet. This step caches them at build time.
-RUN python3 <<'PYEOF'
-from flashinfer.jit import env as jit_env
-from flashinfer.jit.cubin_loader import download_trtllm_headers, get_cubin
-from flashinfer.artifacts import ArtifactPath, CheckSumHash
-
-download_trtllm_headers(
-    'bmm',
-    jit_env.FLASHINFER_CUBIN_DIR / 'flashinfer' / 'trtllm' / 'batched_gemm' / 'trtllmGen_bmm_export',
-    f'{ArtifactPath.TRTLLM_GEN_BMM}/include/trtllmGen_bmm_export',
-    ArtifactPath.TRTLLM_GEN_BMM,
-    get_cubin(f'{ArtifactPath.TRTLLM_GEN_BMM}/checksums.txt', CheckSumHash.TRTLLM_GEN_BMM),
-)
-
-print('FlashInfer TRTLLM BMM headers downloaded successfully')
-PYEOF
+        --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
 # ============================================================
 # OPENAI API SERVER DEPENDENCIES
@@ -643,13 +788,13 @@ ARG BITSANDBYTES_VERSION_X86=0.46.1
 ARG BITSANDBYTES_VERSION_ARM64=0.42.0
 ARG TIMM_VERSION=">=1.0.17"
 ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
         BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
     else \
         BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
     fi; \
-    uv pip install --system accelerate hf_transfer modelscope \
+    uv pip install --system accelerate modelscope \
         "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
 
 # ============================================================
@@ -668,7 +813,7 @@ ARG PYTORCH_NIGHTLY
 # Check whether to install torch nightly instead of release for this build.
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
         echo "Installing torch nightly..." \
         && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
@@ -682,28 +827,26 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
     fi
 
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
 . /etc/environment && \
 uv pip list
 
-# Install deepgemm wheel that has been built in the `build` stage
-RUN --mount=type=cache,target=/root/.cache/uv \
-    --mount=type=bind,from=build,source=/tmp/deepgemm/dist,target=/tmp/deepgemm/dist,ro \
-    sh -c 'if ls /tmp/deepgemm/dist/*.whl >/dev/null 2>&1; then \
-              uv pip install --system /tmp/deepgemm/dist/*.whl; \
-           else \
-              echo "No DeepGEMM wheels to install; skipping."; \
-           fi'
-
 # Pytorch now installs NVSHMEM, setting LD_LIBRARY_PATH
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 
 # Install EP kernels wheels (DeepEP) that have been built in the `build` stage
 RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
-    --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system ep_kernels/dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
+# Download FlashInfer precompiled cubins AFTER all pip installs are done.
+# This must run after the vLLM wheel and EP kernels installs above, because
+# those can reinstall/touch flashinfer packages. Downloading cubins earlier
+# (in the flashinfer-jit-cache layer) causes ~2.5 GB of layer duplication
+# when a later pip install overwrites flashinfer package files.
+RUN flashinfer show-config && flashinfer download-cubin
+
 # CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
 # return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers
 # consistently from the host (see https://github.com/vllm-project/vllm/issues/18859).
@@ -743,19 +886,19 @@ ARG PYTORCH_NIGHTLY
 
 # Install development dependencies (for testing)
 COPY requirements/lint.txt requirements/lint.txt
-COPY requirements/test.in requirements/test.in
-COPY requirements/test.txt requirements/test.txt
+COPY requirements/test/cuda.in requirements/test/cuda.in
+COPY requirements/test/cuda.txt requirements/test/cuda.txt
 COPY requirements/dev.txt requirements/dev.txt
 COPY use_existing_torch.py use_existing_torch.py
 COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     if [ "$CUDA_MAJOR" -ge 12 ]; then \
         if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
             echo "Installing dev requirements plus torch nightly..." \
             && python3 use_existing_torch.py --prefix \
-            && cat torch_lib_versions.txt >> requirements/test.in \
-            && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
+            && cat torch_lib_versions.txt >> requirements/test/cuda.in \
+            && uv pip compile requirements/test/cuda.in -o requirements/test/cuda.txt --index-strategy unsafe-best-match \
             --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
             && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
             -r requirements/dev.txt \
@@ -768,13 +911,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     fi
 
 # install development dependencies (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     uv pip install --system -e tests/vllm_test_utils
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 # Copy in the v1 package for testing (it isn't distributed yet)
 COPY vllm/v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
@@ -792,6 +936,10 @@ FROM vllm-base AS vllm-openai-base
 ARG TARGETPLATFORM
 ARG INSTALL_KV_CONNECTORS=false
 ARG CUDA_VERSION
+ARG VLLM_BUILD_COMMIT
+ARG VLLM_BUILD_PIPELINE
+ARG VLLM_BUILD_URL
+ARG VLLM_IMAGE_TAG
 
 ARG PIP_INDEX_URL UV_INDEX_URL
 ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
@@ -801,9 +949,9 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
 ENV UV_HTTP_TIMEOUT=500
 
 # install kv_connectors if requested
-ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
+ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
-RUN --mount=type=cache,target=/root/.cache/uv \
+RUN --mount=type=cache,target=/opt/uv/cache \
     --mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \
     CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
     CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); \
@@ -813,30 +961,90 @@ RUN --mount=type=cache,target=/root/.cache/uv \
                 libcublas-dev-${CUDA_VERSION_DASH} \
                 libcusolver-dev-${CUDA_VERSION_DASH}"; \
     if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
-        if [ "$CUDA_MAJOR" -ge 13 ]; then \
-            uv pip install --system nixl-cu13; \
-        fi; \
         uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( \
             # if the above fails, install from source
             apt-get update -y && \
-            apt-get install -y --no-install-recommends ${BUILD_PKGS} && \
+            apt-get install -y --no-install-recommends --allow-change-held-packages ${BUILD_PKGS} && \
             uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && \
             apt-get purge -y ${BUILD_PKGS} && \
             # clean up -dev packages, keep runtime libraries
             rm -rf /var/lib/apt/lists/* \
         ); \
+        # Force-reinstall the matching CUDA wheel so the correct nixl_ep_cpp.so is installed.
+        uv pip install --system --force-reinstall --no-deps nixl-cu${CUDA_MAJOR}; \
+    fi
+
+# Optional override: install mooncake-transfer-engine from a URL instead of the
+# PyPI release pulled in above. Use this for wheels built with non-default CMake
+# flags (e.g. `STORE_USE_ETCD=ON` for master HA). The URL's manylinux glibc
+# floor must be <= the FINAL_BASE_IMAGE's glibc.
+ARG MOONCAKE_WHEEL_AARCH64
+ARG MOONCAKE_WHEEL_X86_64
+RUN if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
+        if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+            WHEEL="${MOONCAKE_WHEEL_AARCH64}"; \
+        else \
+            WHEEL="${MOONCAKE_WHEEL_X86_64}"; \
+        fi && \
+        if [ -n "${WHEEL}" ]; then \
+            uv pip install --system "${WHEEL}" && \
+            CUDA_MAJOR="${CUDA_VERSION%%.*}" && \
+            if [ ! -f /usr/local/cuda/lib64/libcudart.so ] && \
+               [ -f "/usr/local/cuda/lib64/libcudart.so.${CUDA_MAJOR}" ]; then \
+                ln -s "libcudart.so.${CUDA_MAJOR}" /usr/local/cuda/lib64/libcudart.so; \
+            fi; \
+        fi; \
     fi
 
 ENV VLLM_USAGE_SOURCE production-docker-image
+ENV VLLM_BUILD_COMMIT=${VLLM_BUILD_COMMIT:-unknown} \
+    VLLM_BUILD_PIPELINE=${VLLM_BUILD_PIPELINE:-local} \
+    VLLM_BUILD_URL=${VLLM_BUILD_URL:-} \
+    VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-local/vllm-openai:dev}
+LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm" \
+      org.opencontainers.image.revision="${VLLM_BUILD_COMMIT}" \
+      org.opencontainers.image.version="${VLLM_IMAGE_TAG}" \
+      org.opencontainers.image.url="${VLLM_BUILD_URL}" \
+      ai.vllm.build.commit="${VLLM_BUILD_COMMIT}" \
+      ai.vllm.build.pipeline="${VLLM_BUILD_PIPELINE}" \
+      ai.vllm.build.url="${VLLM_BUILD_URL}" \
+      ai.vllm.image.tag="${VLLM_IMAGE_TAG}"
 
 # define sagemaker first, so it is not default from `docker build`
 FROM vllm-openai-base AS vllm-sagemaker
 
-COPY examples/online_serving/sagemaker-entrypoint.sh .
+COPY examples/deployment/sagemaker-entrypoint.sh .
 RUN chmod +x sagemaker-entrypoint.sh
 ENTRYPOINT ["./sagemaker-entrypoint.sh"]
 
 FROM vllm-openai-base AS vllm-openai
 
+# To run the image as non-root, either build the `vllm-openai-nonroot` target
+# below, or in a derived Dockerfile uncomment the following line and ensure
+# any additional layers chgrp-0 / chmod-g+rwX paths they write to. The `vllm`
+# user (UID 2000, GID 0) is already created in the `vllm-base` stage.
+# See docs/deployment/docker.md.
+# USER vllm
 ENTRYPOINT ["vllm", "serve"]
 #################### OPENAI API SERVER ####################
+
+#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
+# Non-root-ready variant of `vllm-openai`. Built via:
+#   docker build --target vllm-openai-nonroot -t vllm:openai-nonroot \
+#       -f docker/Dockerfile .
+#
+# Runtime behavior:
+#   * Default USER is `vllm` (UID 2000, GID 0) created in `vllm-base`.
+#   * HOME is /home/vllm, pre-created group-0-writable so arbitrary UIDs in
+#     group 0 (OpenShift / `--user <uid>:0`) can also use the image.
+#   * Entrypoint wrapper handles the "UID not in /etc/passwd" case for truly
+#     arbitrary UIDs by falling back HOME/USER to sane writable defaults.
+#   * All cache/config envs (HF_HOME, VLLM_CACHE_ROOT, TRITON_CACHE_DIR, ...)
+#     remain unset so their library defaults resolve to $HOME/.cache/... ,
+#     which is writable.
+FROM vllm-openai AS vllm-openai-nonroot
+
+USER vllm
+WORKDIR /home/vllm
+ENTRYPOINT ["/usr/local/bin/vllm-nonroot-entrypoint.sh"]
+#################### OPENAI API SERVER (NON-ROOT, OPT-IN) ####################
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index abae5d1bedcb..c2ce0e88f296 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -27,15 +27,19 @@ WORKDIR /workspace
 ARG PYTHON_VERSION=3.12
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
 
+ARG max_jobs=32
+ENV MAX_JOBS=${max_jobs}
+
 # Install minimal dependencies and uv
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
     --mount=type=cache,target=/var/lib/apt,sharing=locked \
     apt-get update -y \
-    && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \
+    && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates zlib1g-dev \
     gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof make xz-utils \
     && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
     && curl -LsSf https://astral.sh/uv/install.sh | sh
 
+# Compiler and linker environment
 ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12
 ENV CCACHE_DIR=/root/.cache/ccache
 ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -80,12 +84,48 @@ FROM base-${TARGETARCH} AS base
 
 RUN echo 'ulimit -c 0' >> ~/.bashrc
 
+######################### RUST BUILD IMAGE #########################
+# Build the Rust frontend (`vllm-rs`) in a dedicated stage so the wheel build
+# stage doesn't need the rust toolchain or protoc. This stage runs in parallel
+# with the main vllm-build stage.
+FROM ubuntu:22.04 AS rust-build
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update -y \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates curl git build-essential unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY tools/install_protoc.sh /tmp/install_protoc.sh
+RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
+
+# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
+    sh -s -- -y --profile minimal --default-toolchain none
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+WORKDIR /workspace
+
+# Copy only the rust workspace — the binary is the sole artifact we need.
+COPY rust rust
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY build_rust.sh build_rust.sh
+
+# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
+# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
+ENV CARGO_BUILD_JOBS=4
+
+# Build the release binary. Cache cargo registry/git and target/, but copy the
+# binary out of the target/ cache mount so it persists into the image layer
+# for later COPY --from=rust-build.
+RUN --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/workspace/rust/target \
+    VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
+
 ######################### BUILD IMAGE #########################
 FROM base AS vllm-build
 
-ARG max_jobs=32
-ENV MAX_JOBS=${max_jobs}
-
 ARG GIT_REPO_CHECK=0
 # Support for cross-compilation with x86 ISA including AVX2 and AVX512: docker build --build-arg VLLM_CPU_X86="true" ...
 ARG VLLM_CPU_X86=0
@@ -107,13 +147,17 @@ RUN if [ "$TARGETARCH" = "arm64" ] && [ "$VLLM_CPU_X86" != "0" ]; then \
     fi
 
 # Copy build requirements
-COPY requirements/cpu-build.txt requirements/build.txt
+COPY requirements/build/cpu.txt requirements/build/cpu.txt
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements/build.txt
+    uv pip install -r requirements/build/cpu.txt
 
 COPY . .
 
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /workspace/vllm-rs vllm/vllm-rs
+
 RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
 
 RUN --mount=type=cache,target=/root/.cache/uv \
@@ -127,26 +171,28 @@ FROM base AS vllm-test-deps
 WORKDIR /vllm-workspace
 
 # Copy test requirements
-COPY requirements/test.in requirements/cpu-test.in
+COPY requirements/test/cuda.in requirements/test/cpu.in
 
 RUN \
-    sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
+    sed -i '/mamba_ssm/d' requirements/test/cpu.in && \
     remove_packages_not_supported_on_aarch64() { \
     case "$(uname -m)" in \
     aarch64|arm64) \
-    sed -i '/decord/d' requirements/cpu-test.in; \
-    sed -i '/terratorch/d' requirements/cpu-test.in; \
+    sed -i '/decord/d' requirements/test/cpu.in; \
+    sed -i '/terratorch/d' requirements/test/cpu.in; \
     ;; \
     esac; \
     }; \
     remove_packages_not_supported_on_aarch64 && \
-    sed -i 's/^torch==.*/torch==2.10.0/g' requirements/cpu-test.in && \
-    sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
-    sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
-    uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
+    sed -i 's/^torch==.*/torch==2.11.0/g' requirements/test/cpu.in && \
+    sed -i 's/torchaudio.*/torchaudio/g' requirements/test/cpu.in && \
+    sed -i 's/torchvision.*/torchvision/g' requirements/test/cpu.in && \
+    # Related issue: https://github.com/vllm-project/vllm/pull/38800#issuecomment-4228314305
+    sed -i 's/^sentence-transformers.*/sentence-transformers==5.3.0/g' requirements/test/cpu.in && \
+    uv pip compile requirements/test/cpu.in -o requirements/test/cpu.txt --index-strategy unsafe-best-match --torch-backend cpu
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements/cpu-test.txt
+    uv pip install -r requirements/test/cpu.txt
 
 ######################### DEV IMAGE #########################
 FROM vllm-build AS vllm-dev
@@ -168,10 +214,11 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=.git,target=.git \
     VLLM_TARGET_DEVICE=cpu python3 setup.py develop
 
-COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt
+COPY --from=vllm-test-deps /vllm-workspace/requirements/test/cpu.txt requirements/test/cpu.txt
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements/dev.txt && \
+    uv pip install -r requirements/lint.txt && \
+    uv pip install -r requirements/test/cpu.txt && \
     pre-commit install --hook-type pre-commit --hook-type commit-msg
 
 ENTRYPOINT ["bash"]
@@ -189,12 +236,19 @@ ADD ./tests/ ./tests/
 ADD ./examples/ ./examples/
 ADD ./benchmarks/ ./benchmarks/
 ADD ./vllm/collect_env.py .
+ADD ./docker/ ./docker/
 ADD ./.buildkite/ ./.buildkite/
 
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install -e tests/vllm_test_utils
 
+# enable fast downloads from hf (for testing)
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
+
 ######################### RELEASE IMAGE #########################
 FROM base AS vllm-openai
 
@@ -203,7 +257,7 @@ WORKDIR /vllm-workspace
 RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
-    uv pip install dist/*.whl
+    uv pip install "$(realpath dist/*.whl)[audio,triton-cpu]"
 
 # Add labels to document build configuration
 LABEL org.opencontainers.image.title="vLLM CPU"
diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch
index 5c424980ee2d..0d5a9cc5f83a 100644
--- a/docker/Dockerfile.nightly_torch
+++ b/docker/Dockerfile.nightly_torch
@@ -77,7 +77,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system $pkgs --index-url https://download.pytorch.org/whl/nightly/cu128
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system numba==0.61.2
+    uv pip install --system numba==0.65.0
 
 RUN --mount=type=cache,target=/root/.cache/uv \
 uv pip install --system -r requirements/common.txt
@@ -94,6 +94,41 @@ RUN cat torch_build_versions.txt
 
 #################### BASE BUILD IMAGE ####################
 
+#################### RUST BUILD IMAGE ####################
+# Build the Rust frontend (`vllm-rs`) in a dedicated stage so the wheel build
+# stage doesn't need the rust toolchain or protoc.
+FROM ubuntu:22.04 AS rust-build
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update -y \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates curl git build-essential unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY tools/install_protoc.sh /tmp/install_protoc.sh
+RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
+
+# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
+    sh -s -- -y --profile minimal --default-toolchain none
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+WORKDIR /workspace
+
+COPY rust rust
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY build_rust.sh build_rust.sh
+
+# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
+# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
+ENV CARGO_BUILD_JOBS=4
+
+RUN --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/workspace/rust/target \
+    VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
+#################### RUST BUILD IMAGE ####################
+
 #################### WHEEL BUILD IMAGE ####################
 FROM base AS build
 ARG TARGETPLATFORM
@@ -104,10 +139,14 @@ ENV UV_HTTP_TIMEOUT=500
 
 COPY . .
 
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /workspace/vllm-rs vllm/vllm-rs
+
 RUN python3 use_existing_torch.py
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system -r requirements/build.txt
+    uv pip install --system -r requirements/build/cuda.txt
 
 ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
@@ -217,13 +256,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.
 
 
 # build flashinfer for torch nightly from source around 10 mins
-# release version: v0.6.6
+# release version: v0.6.11.post2
 # todo(elainewy): cache flashinfer build result for faster build
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/uv \
     echo "git clone flashinfer..." \
-    && git clone --depth 1 --branch v0.6.6 --recursive https://github.com/flashinfer-ai/flashinfer.git \
+    && git clone --depth 1 --branch v0.6.11.post2 --recursive https://github.com/flashinfer-ai/flashinfer.git \
     && cd flashinfer \
     && git submodule update --init --recursive \
     && echo "finish git clone flashinfer..." \
@@ -258,7 +297,7 @@ FROM vllm-base as test
 COPY tests/ tests/
 
 # install build and runtime dependencies without stable torch version
-COPY requirements/nightly_torch_test.txt requirements/nightly_torch_test.txt
+COPY requirements/test/nightly-torch.txt requirements/test/nightly-torch.txt
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
@@ -269,12 +308,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -e tests/vllm_test_utils
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER 1
+ENV HF_XET_HIGH_PERFORMANCE 1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system -r requirements/nightly_torch_test.txt
+    uv pip install --system -r requirements/test/nightly-torch.txt
 
 # Logging to confirm the torch versions
 RUN pip freeze | grep -E 'torch|vllm|flashinfer'
diff --git a/docker/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le
index 07b64a509a4b..845d900c39cd 100644
--- a/docker/Dockerfile.ppc64le
+++ b/docker/Dockerfile.ppc64le
@@ -251,7 +251,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     make -C /numactl install && \
     # sentencepiece.pc is in some pkgconfig inside uv cache
     export PKG_CONFIG_PATH=$(find / -type d -name "pkgconfig" 2>/dev/null | tr '\n' ':') && \
-    nanobind_DIR=$(uv pip show nanobind | grep Location | sed 's/^Location: //;s/$/\/nanobind\/cmake/') && uv pip install -r /src/requirements/common.txt -r /src/requirements/cpu.txt -r /src/requirements/build.txt --no-build-isolation && \
+    nanobind_DIR=$(uv pip show nanobind | grep Location | sed 's/^Location: //;s/$/\/nanobind\/cmake/') && uv pip install -r /src/requirements/common.txt -r /src/requirements/cpu.txt -r /src/requirements/build/cuda.txt --no-build-isolation && \
     cd /src/ && \
     uv build --wheel --out-dir /vllmwheel/ --no-build-isolation && \
     uv pip install /vllmwheel/*.whl
diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
index 6db6d8b83598..b2342200a682 100644
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -2,6 +2,19 @@
 ARG REMOTE_VLLM="0"
 ARG COMMON_WORKDIR=/app
 ARG BASE_IMAGE=rocm/vllm-dev:base
+# NIC backend for MoRI RDMA support.
+# By default (all), drivers and userspace libraries for all supported NIC types
+# (ainic and bnxt) are installed; MoRI selects the appropriate one at runtime.
+# To install drivers for a single NIC type only, set NIC_BACKEND explicitly:
+#   --build-arg NIC_BACKEND=ainic   # AMD AINIC (Pensando) only
+#   --build-arg NIC_BACKEND=bnxt    # Broadcom Thor-2 only
+#   --build-arg NIC_BACKEND=none    # Install nothing.
+ARG NIC_BACKEND=all
+# AMD AINIC apt repo settings
+# Users can specify a custom version compatible with their host drivers.
+# The default version has been tested with ioinic-dkms=25.11.1.001
+ARG AINIC_VERSION=1.117.3-hydra
+ARG UBUNTU_CODENAME=jammy
 
 # Sccache configuration (only used in release pipeline)
 ARG USE_SCCACHE
@@ -19,7 +32,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
 # Install some basic utilities
 RUN apt-get update -q -y && apt-get install -q -y \
     sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
-    apt-transport-https ca-certificates wget curl
+    apt-transport-https ca-certificates wget curl \
+    libnuma-dev
 RUN python3 -m pip install --upgrade pip
 # Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
 ARG USE_SCCACHE
@@ -97,9 +111,47 @@ ONBUILD RUN git clone ${VLLM_REPO} \
                && git fetch upstream ; fi
 FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
 
+# -----------------------
+# Rust build stage
+# Builds the `vllm-rs` frontend in a dedicated stage so the wheel build stages
+# don't need the rust toolchain or protoc. Runs in parallel with the main wheel
+# build for faster end-to-end builds.
+FROM fetch_vllm AS rust-build
+ARG COMMON_WORKDIR
+
+# protoc is used by tonic-build/prost-build.
+RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \
+        ca-certificates curl unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY tools/install_protoc.sh /tmp/install_protoc.sh
+RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
+
+# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
+    sh -s -- -y --profile minimal --default-toolchain none
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+# Cap cargo parallelism to avoid exhausting the AMD CI host's open-file limit
+# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
+ENV CARGO_BUILD_JOBS=4
+
+# Build the release binary. Cache cargo registry/git, and copy the binary out
+# so it persists into the image layer for later COPY --from=rust-build.
+RUN --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    cd ${COMMON_WORKDIR}/vllm \
+    && VLLM_RS_TARGET_PATH=/tmp/vllm-rs bash build_rust.sh
+
 # -----------------------
 # vLLM build stages
 FROM fetch_vllm AS build_vllm
+ARG COMMON_WORKDIR
+
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /tmp/vllm-rs ${COMMON_WORKDIR}/vllm/vllm/vllm-rs
+
 # Build vLLM (setup.py auto-detects sccache in PATH)
 RUN cd vllm \
     && python3 -m pip install -r requirements/rocm.txt \
@@ -118,10 +170,10 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
 
 # RIXL/UCX build stages
 FROM base AS build_rixl
-ARG RIXL_BRANCH="f33a5599"
+ARG RIXL_BRANCH="39be1de8"
 ARG RIXL_REPO="https://github.com/ROCm/RIXL.git"
-ARG UCX_BRANCH="da3fac2a"
-ARG UCX_REPO="https://github.com/ROCm/ucx.git"
+ARG UCX_BRANCH="bfb51733"
+ARG UCX_REPO="https://github.com/openucx/ucx.git"
 ENV ROCM_PATH=/opt/rocm
 ENV UCX_HOME=/usr/local/ucx
 ENV RIXL_HOME=/usr/local/rixl
@@ -159,7 +211,7 @@ RUN cd /usr/local/src && \
         --disable-doxygen-doc \
         --enable-optimizations \
         --enable-devel-headers \
-        --with-rocm=/opt/rocm \
+        --with-rocm=${ROCM_PATH} \
         --with-verbs \
         --with-dm \
         --enable-mt && \
@@ -180,7 +232,13 @@ RUN git clone ${RIXL_REPO} /opt/rixl && \
     ninja install
 
 # Generate RIXL wheel
-RUN cd /opt/rixl && mkdir -p /app/install && \
+# Exclude libcore and libpull from auditwheel: transitive dependencies
+# that are not shipped in the wheel and vary across base images.
+RUN cd /opt/rixl && \
+    sed -i "s/--exclude 'libamdhip64\*'/--exclude 'libamdhip64*' --exclude 'libcore*' --exclude 'libpull*'/" \
+        contrib/build-wheel.sh && \
+    mkdir -p /app/install && \
+    _ucx_install_dir=${UCX_HOME} \
     ./contrib/build-wheel.sh \
         --output-dir /app/install \
         --rocm-dir ${ROCM_PATH} \
@@ -189,11 +247,12 @@ RUN cd /opt/rixl && mkdir -p /app/install && \
 
 # DeepEP build stage
 FROM base AS build_deep
-ARG ROCSHMEM_BRANCH="ba0bf0f3"
+ARG ROCSHMEM_BRANCH="f0acb0c6"
 ARG ROCSHMEM_REPO="https://github.com/ROCm/rocm-systems.git"
-ARG DEEPEP_BRANCH="e84464ec"
+ARG DEEPEP_BRANCH="a9ea9774"
 ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git"
 ARG DEEPEP_NIC="cx7"
+ARG DEEPEP_ROCM_ARCH="gfx942;gfx950"
 ENV ROCSHMEM_DIR=/opt/rocshmem
 
 RUN git clone ${ROCSHMEM_REPO} \
@@ -201,20 +260,69 @@ RUN git clone ${ROCSHMEM_REPO} \
  && git checkout ${ROCSHMEM_BRANCH} \
  && mkdir -p projects/rocshmem/build \
  && cd projects/rocshmem/build \
- && cmake .. \
-    -DCMAKE_INSTALL_PREFIX="${ROCSHMEM_DIR}" \
-    -DROCM_PATH=/opt/rocm \
-    -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-    -DUSE_EXTERNAL_MPI=OFF \
- && make -j \
- && make install
+ && INSTALL_PREFIX=${ROCSHMEM_DIR} \
+    ../scripts/build_configs/all_backends -DUSE_EXTERNAL_MPI=OFF
 
 # Build DeepEP wheel.
 # DeepEP looks for rocshmem at ROCSHMEM_DIR.
 RUN git clone ${DEEPEP_REPO} \
  && cd DeepEP \
  && git checkout ${DEEPEP_BRANCH} \
- && python3 setup.py --variant rocm --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install
+ && python3 setup.py --variant rocm --rocm-explicit-ctx --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install
+
+# MoRI runtime dependencies live in Dockerfile.rocm so NIC backend changes do
+# not force users to rebuild the long-lived Dockerfile.rocm_base image.
+FROM base AS mori_base
+ARG NIC_BACKEND
+ARG AINIC_VERSION
+ARG UBUNTU_CODENAME
+RUN /bin/bash -lc 'set -euo pipefail; \
+ \
+ install_ainic() { \
+   apt-get update && apt-get install -y --no-install-recommends ca-certificates curl gnupg apt-transport-https; \
+   rm -rf /var/lib/apt/lists/*; \
+   mkdir -p /etc/apt/keyrings; \
+   curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/amdainic.gpg; \
+   echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/amdainic.gpg] https://repo.radeon.com/amdainic/pensando/ubuntu/${AINIC_VERSION} ${UBUNTU_CODENAME} main" \
+     > /etc/apt/sources.list.d/amdainic.list; \
+   apt-get update && apt-get install -y --no-install-recommends \
+     libionic-dev \
+     ionic-common \
+   ; \
+   rm -rf /var/lib/apt/lists/*; \
+ }; \
+ \
+ # NOTE: requires FW 235.2.86.0 and kernel drivers on the host: \
+ #   bnxt-en-dkms=1.10.3.235.2.86.0 bnxt-re-dkms=235.2.86.0 (from packages.broadcom.com PPA) \
+ install_bnxt() { \
+   install -m 0755 -d /etc/apt/keyrings; \
+   curl -fsSL https://packages.broadcom.com/artifactory/api/security/keypair/PackagesKey/public \
+     -o /etc/apt/keyrings/broadcom-nic.asc; \
+   chmod a+r /etc/apt/keyrings/broadcom-nic.asc; \
+   echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/broadcom-nic.asc] https://packages.broadcom.com/artifactory/ethernet-nic-debian-public jammy main" \
+     > /etc/apt/sources.list.d/broadcom-nic.list; \
+   apt-get update && apt-get install -y --no-install-recommends \
+     bnxt-rocelib=235.2.86.0 \
+   ; \
+   cp -a /usr/local/lib/x86_64-linux-gnu/libbnxt_re* /usr/local/lib/; \
+   ldconfig; \
+   rm -rf /var/lib/apt/lists/*; \
+ }; \
+ \
+ echo "[MORI] Install MoRI proxy deps"; \
+ pip install --quiet --ignore-installed blinker && \
+ pip install --quiet quart msgpack aiohttp pyzmq; \
+ echo "[MORI] NIC_BACKEND=${NIC_BACKEND}"; \
+ \
+ # NIC backend deps — mori auto-detects NIC at runtime (MORI_DEVICE_NIC env var override). \
+ # Only vendor packages are installed here for dlopen; no compile-time flags needed. \
+ case "${NIC_BACKEND}" in \
+   none)  ;; \
+   all)   install_ainic; install_bnxt ;; \
+   ainic) install_ainic ;; \
+   bnxt)  install_bnxt ;; \
+   *)     echo "ERROR: unknown NIC_BACKEND=${NIC_BACKEND}. Use one of: none, ainic, bnxt, all"; exit 2 ;; \
+ esac'
 
 # -----------------------
 # vLLM wheel release build stage (for building distributable wheels)
@@ -223,6 +331,10 @@ FROM fetch_vllm AS build_vllm_wheel_release
 
 ARG COMMON_WORKDIR
 
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /tmp/vllm-rs ${COMMON_WORKDIR}/vllm/vllm/vllm-rs
+
 # Create /install directory for custom wheels
 RUN mkdir -p /install
 
@@ -318,7 +430,7 @@ COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
 
 # -----------------------
 # Test vLLM image
-FROM base AS test
+FROM mori_base AS test
 
 RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
 
@@ -328,14 +440,24 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
     --mount=type=cache,target=/root/.cache/uv \
     cd /install \
     && uv pip install --system -r requirements/rocm.txt \
-    && uv pip install --system -r requirements/rocm-test.txt \
+    && uv pip install --system -r requirements/test/rocm.txt \
     && pip uninstall -y vllm \
     && uv pip install --system *.whl
 
-# Verify that PyTorch is the ROCm build, not CUDA
-RUN python3 -c "import torch; assert torch.version.hip is not None, \
-    f'Expected ROCm PyTorch but got CUDA (torch.version.cuda={torch.version.cuda}, torch.version.hip={torch.version.hip})'; \
-    print(f'Verified: PyTorch {torch.__version__} with ROCm (HIP {torch.version.hip})')"
+# Persist the built wheel in the image so python_only_compile_rocm.sh can
+# reinstall it after removing compilers. The bind-mounted /install contents
+# above are not available once that RUN step completes.
+COPY --from=export_vllm /*.whl /opt/vllm-wheels/
+
+# Update rdma-core to support latest rocshmem
+ARG DEEPEP_NIC
+RUN if [ "${DEEPEP_NIC}" = "cx7" ] || [ "${DEEPEP_NIC}" = "io" ]; then \
+    git clone --branch v62.0 --depth 1 https://github.com/linux-rdma/rdma-core.git /tmp/rdma-core && \
+    cd /tmp/rdma-core && \
+    mkdir -p build && cd build && \
+    cmake -GNinja -DCMAKE_INSTALL_PREFIX=/usr -DNO_MAN_PAGES=1 .. && \
+    ninja &&     ninja install &&     ldconfig &&     rm -rf /tmp/rdma-core; \
+fi
 
 # Install RIXL wheel
 RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
@@ -364,9 +486,10 @@ RUN cd /vllm-workspace \
     && python3 -m pip install pytest-shard
 
 # enable fast downloads from hf (for testing)
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --system hf_transfer
-ENV HF_HUB_ENABLE_HF_TRANSFER=1
+ENV HF_XET_HIGH_PERFORMANCE=1
+
+# increase timeout for hf downloads (for testing)
+ENV HF_HUB_DOWNLOAD_TIMEOUT 60
 
 # install audio decode package `torchcodec` from source (required due to 
 # ROCm and torch version mismatch) for tests with datasets package
@@ -384,17 +507,35 @@ COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-pac
 ENV MIOPEN_DEBUG_CONV_DIRECT=0
 ENV MIOPEN_DEBUG_CONV_GEMM=0
 
+# Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc
+# See: https://github.com/ROCm/rocm-libraries/issues/6266
+ENV HSA_ENABLE_IPC_MODE_LEGACY=1
+
 # Source code is used in the `python_only_compile.sh` test
 # We hide it inside `src/` so that this source code
 # will not be imported by other tests
 RUN mkdir src && mv vllm src/vllm
 
 # This is a workaround to ensure pytest exits with the correct status code in CI tests.
-RUN echo "import os\n\ndef pytest_sessionfinish(session, exitstatus):\n    os._exit(int(exitstatus))" > /vllm-workspace/conftest.py
+RUN printf '%s\n' \
+    'import os' \
+    '' \
+    '_exit_code = 1' \
+    '' \
+    'def pytest_sessionfinish(session, exitstatus):' \
+    '    global _exit_code' \
+    '    _exit_code = int(exitstatus)' \
+    '' \
+    'def pytest_unconfigure(config):' \
+    '    import sys' \
+    '    sys.stdout.flush()' \
+    '    sys.stderr.flush()' \
+    '    os._exit(_exit_code)' \
+    > /vllm-workspace/conftest.py
 
 # -----------------------
 # Final vLLM image
-FROM base AS final
+FROM mori_base AS final
 
 RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
 
@@ -429,14 +570,24 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
     && pip uninstall -y vllm \
     && uv pip install --system *.whl
 
+# Install RIXL wheel
+RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
+    uv pip install --system /rixl_install/*.whl
+
 ARG COMMON_WORKDIR
 ARG BASE_IMAGE
+ARG NIC_BACKEND
+ARG AINIC_VERSION
 
 # Copy over the benchmark scripts as well
 COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
 COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
 COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
 
+# Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc
+# See: https://github.com/ROCm/rocm-libraries/issues/6266
+ENV HSA_ENABLE_IPC_MODE_LEGACY=1
+
 ENV TOKENIZERS_PARALLELISM=false
 
 # ENV that can improve safe tensor loading, and end-to-end time
@@ -448,7 +599,9 @@ ENV HIP_FORCE_DEV_KERNARG=1
 # Workaround for ROCm profiler limits
 RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
 ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
-RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt
+RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt \
+    && echo "MORI_NIC_BACKEND=${NIC_BACKEND}" >> ${COMMON_WORKDIR}/versions.txt \
+    && echo "AINIC_VERSION=${AINIC_VERSION}" >> ${COMMON_WORKDIR}/versions.txt
 
 CMD ["/bin/bash"]
 
diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base
index e77406728cb4..cb6d09f5bb6c 100644
--- a/docker/Dockerfile.rocm_base
+++ b/docker/Dockerfile.rocm_base
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.2.1-complete
+ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.2.2-complete
 ARG TRITON_BRANCH="ba5c1517"
 ARG TRITON_REPO="https://github.com/ROCm/triton.git"
 ARG PYTORCH_BRANCH="8514f051" # release/2.10 as of 3/17
@@ -9,9 +9,9 @@ ARG PYTORCH_AUDIO_BRANCH="v2.9.0"
 ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
 ARG FA_BRANCH="0e60e394"
 ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
-ARG AITER_BRANCH="v0.1.10.post2"
+ARG AITER_BRANCH="v0.1.13"
 ARG AITER_REPO="https://github.com/ROCm/aiter.git"
-ARG MORI_BRANCH="2d02c6a9"
+ARG MORI_BRANCH="v1.1.0"
 ARG MORI_REPO="https://github.com/ROCm/mori.git"
 
 # Sccache configuration (only used in release pipeline)
@@ -104,6 +104,28 @@ ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
 ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
 ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
 
+# torch profiler hotfix for 7.2.2: rebuild CLR with https://github.com/ROCm/rocm-systems/pull/5062
+# will be removed once we move to ROCm 7.2.3
+RUN apt-get update && apt-get install -y rocm-llvm-dev
+RUN pip install CppHeaderParser
+RUN git clone --no-checkout --filter=blob:none https://github.com/ROCm/rocm-systems /tmp/rocm-systems \
+    && cd /tmp/rocm-systems \
+    && git sparse-checkout init --cone \
+    && git sparse-checkout set projects/hip projects/clr \
+    && git checkout 35e8c7bf8911862e5389509800e65fdf125412b3 \
+    && export CLR_DIR=/tmp/rocm-systems/projects/clr \
+    && export HIP_DIR=/tmp/rocm-systems/projects/hip \
+    && mkdir -p $CLR_DIR/build && cd $CLR_DIR/build \
+    && cmake \
+        -DHIP_COMMON_DIR=$HIP_DIR \
+        -DCMAKE_PREFIX_PATH="/opt/rocm/" \
+        -DCLR_BUILD_HIP=ON \
+        -DCLR_BUILD_OCL=OFF \
+        -DHIP_PLATFORM=amd \
+        .. \
+    && make -j$(nproc) \
+    && make install \
+    && rm -rf /tmp/rocm-systems
 
 ###
 ### Triton Build
@@ -112,10 +134,14 @@ FROM base AS build_triton
 ARG TRITON_BRANCH
 ARG TRITON_REPO
 RUN git clone ${TRITON_REPO}
+# Cherry picking the following
+# https://github.com/triton-lang/triton/pull/8991
+# https://github.com/triton-lang/triton/pull/9541
 RUN cd triton \
     && git checkout ${TRITON_BRANCH} \
     && git config --global user.email "you@example.com" && git config --global user.name "Your Name" \
     && git cherry-pick 555d04f \
+    && git cherry-pick dd998b6 \
     && if [ ! -f setup.py ]; then cd python; fi \
     && python3 setup.py bdist_wheel --dist-dir=dist \
     && mkdir -p /app/install && cp dist/*.whl /app/install
@@ -149,8 +175,6 @@ RUN git clone ${PYTORCH_REPO} pytorch
 RUN cd pytorch && git checkout ${PYTORCH_BRANCH}
 RUN cd pytorch \
     && pip install -r requirements.txt && git submodule update --init --recursive
-RUN cd pytorch/third_party/kineto \
-    && git remote add rocm https://github.com/ROCm/kineto && git fetch rocm && git checkout 2d73be3 
 RUN cd pytorch && python3 tools/amd_build/build_amd.py \
     && if [ "$USE_SCCACHE" = "1" ]; then \
            export HIP_CLANG_PATH=/opt/sccache-wrappers \
diff --git a/docker/Dockerfile.s390x b/docker/Dockerfile.s390x
index 989c621d3a95..554a7257c236 100644
--- a/docker/Dockerfile.s390x
+++ b/docker/Dockerfile.s390x
@@ -42,7 +42,7 @@ FROM python-install AS pyarrow
 # Build Apache Arrow
 WORKDIR /tmp
 RUN --mount=type=cache,target=/root/.cache/uv \
-    git clone https://github.com/apache/arrow.git && \
+    git clone https://github.com/apache/arrow.git  -b maint-19.0.1 && \
     cd arrow/cpp && \
     mkdir release && cd release && \
     cmake -DCMAKE_BUILD_TYPE=Release \
@@ -68,19 +68,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install -r requirements-build.txt && \
     python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
 
-FROM python-install AS numa-build
-# Install numactl (needed for numa.h dependency)
-WORKDIR /tmp
-RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
-    tar -xvzf v2.0.16.tar.gz && \
-    cd numactl-2.0.16 && \
-    ./autogen.sh && \
-    ./configure && \
-    make
-
-# Set include path
-ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
-
 FROM python-install AS rust
 ENV CARGO_HOME=/root/.cargo
 ENV RUSTUP_HOME=/root/.rustup
@@ -91,15 +78,27 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
     rustup default stable && \
     rustup show
 
+FROM python-install AS numa-build
+WORKDIR /tmp
+RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.19.tar.gz && \
+    tar -xvzf v2.0.19.tar.gz && \
+    cd numactl-2.0.19 && \
+    ./autogen.sh && \
+    ./configure && \
+    make
+
+# Set include path
+ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
+
 FROM python-install AS torch-vision
 # Install torchvision
-ARG TORCH_VISION_VERSION=v0.25.0
+ARG TORCH_VISION_VERSION=v0.26.0
 WORKDIR /tmp
 RUN --mount=type=cache,target=/root/.cache/uv \
     git clone https://github.com/pytorch/vision.git && \
     cd vision && \
     git checkout $TORCH_VISION_VERSION && \
-    uv pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cpu && \
+    uv pip install torch==2.11.0 --index-url https://download.pytorch.org/whl/cpu && \
     python setup.py bdist_wheel
 
 FROM python-install AS hf-xet-builder
@@ -133,7 +132,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \
     git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
     cd llvm-project && mkdir build && cd  build && \
-    uv pip install 'cmake<4' setuptools numpy && \
+    uv pip install 'cmake<4' 'setuptools<70' numpy && \
     export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \
     CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \
     CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \
@@ -193,27 +192,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     cd opencv-python && \
     python -m build --wheel --installer=uv --outdir /tmp/opencv-python/dist
 
-# Build Outlines Core
-FROM python-install AS outlines-core-builder
+## Todo(r3hankhan123): Remove guidance-builder stage once vLLM upgrades to new version of llguidance that fixes s390x issues. See https://github.com/guidance-ai/llguidance/issues/330
+FROM python-install AS guidance-builder
 WORKDIR /tmp
 ENV CARGO_HOME=/root/.cargo
 ENV RUSTUP_HOME=/root/.rustup
 ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
-COPY requirements/common.txt /tmp/requirements/common.txt
-ARG OUTLINES_CORE_VERSION
 RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
     --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
-    OUTLINES_CORE_VERSION=${OUTLINES_CORE_VERSION:-$(grep -E '^outlines_core\s*==\s*[0-9.]+' /tmp/requirements/common.txt | grep -Eo '[0-9.]+')} && \
-    if [ -z "${OUTLINES_CORE_VERSION}" ]; then echo "ERROR: Could not determine outlines_core version"; exit 1; fi && \
-    git clone https://github.com/dottxt-ai/outlines-core.git && \
-    cd outlines-core && \
-    git checkout tags/${OUTLINES_CORE_VERSION} && \
-    sed -i "s/version = \"0.0.0\"/version = \"${OUTLINES_CORE_VERSION}\"/" Cargo.toml && \
+    git clone https://github.com/guidance-ai/llguidance.git && \
+    cd llguidance && \
+    git checkout s390x-fix-v2  && \
     uv pip install maturin && \
-    python -m maturin build --release --out dist
+    python -m maturin build --release --out dist --compatibility linux
 
-# Final build stage
+# # Final build stage
 FROM python-install AS vllm-cpu
 ARG PYTHON_VERSION
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
@@ -229,10 +223,12 @@ ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/
 ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
 ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
+# Force pure Python protobuf to avoid s390x C++ extension crashes
+ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 COPY . /workspace/vllm
 WORKDIR /workspace/vllm
 
-RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \
+RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.19,target=/numactl \
     make -C /numactl install
 
 # Install dependencies, including PyTorch and Apache Arrow
@@ -245,24 +241,24 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \
     --mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \
     --mount=type=bind,from=opencv-builder,source=/tmp/opencv-python/dist,target=/tmp/opencv-wheels/ \
-    --mount=type=bind,from=outlines-core-builder,source=/tmp/outlines-core/dist,target=/tmp/outlines-core/dist/ \
-     ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \
+    --mount=type=bind,from=guidance-builder,source=/tmp/llguidance/dist,target=/tmp/guidance-wheels/ \
+     ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/*.whl) && \
      VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \
      HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \
      LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \
      NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
      OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \
-     OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \
+     GUIDANCE_WHL_FILE=$(ls /tmp/guidance-wheels/*.whl) && \
      uv pip install -v \    
-        $ARROW_WHL_FILE  \
+        $ARROW_WHL_FILE \
         $VISION_WHL_FILE \
         $HF_XET_WHL_FILE \
         $LLVM_WHL_FILE \
         $NUMBA_WHL_FILE \
         $OPENCV_WHL_FILE \
-        $OUTLINES_CORE_WHL_FILE \
+        $GUIDANCE_WHL_FILE \
         --index-strategy unsafe-best-match \
-        -r requirements/cpu-build.txt \
+        -r requirements/build/cpu.txt \
         -r requirements/cpu.txt
 
 
@@ -271,6 +267,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \
     uv pip install "$(echo dist/*.whl)[tensorizer]"
 
+# Remove protobuf C++ extension that crashes on s390x
+RUN rm -rf /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/_upb/*.so \
+           /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/protobuf/pyext/*.so 2>/dev/null || true
+
 # setup non-root user for vllm
 RUN umask 002 && \
     /usr/sbin/useradd --uid 2000 --gid 0 vllm && \
diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu
index d4c98bf7405d..ef05b4aa2e57 100644
--- a/docker/Dockerfile.xpu
+++ b/docker/Dockerfile.xpu
@@ -1,3 +1,37 @@
+######################### RUST BUILD IMAGE #########################
+# Build the Rust frontend (`vllm-rs`) in a dedicated stage so the main image
+# doesn't need the rust toolchain or protoc. Runs in parallel with vllm-base.
+FROM ubuntu:22.04 AS rust-build
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update -y \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates curl git build-essential unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY tools/install_protoc.sh /tmp/install_protoc.sh
+RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
+
+# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
+    sh -s -- -y --profile minimal --default-toolchain none
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+WORKDIR /workspace
+
+COPY rust rust
+COPY rust-toolchain.toml rust-toolchain.toml
+COPY build_rust.sh build_rust.sh
+
+# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
+# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
+ENV CARGO_BUILD_JOBS=4
+
+RUN --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/workspace/rust/target \
+    VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
+
 FROM intel/deep-learning-essentials:2025.3.2-0-devel-ubuntu24.04 AS vllm-base
 
 WORKDIR /workspace/
@@ -5,9 +39,6 @@ WORKDIR /workspace/
 ARG PYTHON_VERSION=3.12
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/xpu"
 
-RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
-    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
-
 RUN apt clean && apt-get update -y && \
     apt-get install -y --no-install-recommends --fix-missing \
     curl \
@@ -26,8 +57,20 @@ RUN apt clean && apt-get update -y && \
     python3.12-dev \
     python3-pip
 
-RUN apt update && apt upgrade -y && \
-    apt install -y intel-oneapi-compiler-dpcpp-cpp-2025.3
+# Add oneAPI repo, pin oneAPI to 2025.3, then install pinned packages in one layer.
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    printf '%s\n' \
+    'Package: intel-oneapi-* intel-deep-learning-essentials* intel-pti*' \
+    'Pin: version 2025.3*' \
+    'Pin-Priority: 1001' \
+    > /etc/apt/preferences.d/oneapi-2025.3.pref && \
+    apt-get update -y && \
+    apt-get install -y --no-install-recommends \
+    intel-oneapi-compiler-dpcpp-cpp-2025.3 \
+    intel-oneapi-mkl-devel-2025.3 \
+    intel-oneapi-dnnl-devel-2025.3 && \
+    rm -rf /var/lib/apt/lists/*
 
 # Install UMD
 RUN mkdir neo && \
@@ -50,9 +93,9 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-# This oneccl contains the BMG support which is not the case for default version of oneapi 2025.2.
-ARG ONECCL_INSTALLER="intel-oneccl-2021.15.7.8_offline.sh"
-RUN wget "https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/${ONECCL_INSTALLER}" && \
+# This oneccl contains the BMG support which is not the case for default version of oneapi 2025.3.
+ARG ONECCL_INSTALLER="intel-oneccl-2021.15.9.14_offline.sh"
+RUN wget "https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.9/${ONECCL_INSTALLER}" && \
     bash "${ONECCL_INSTALLER}" -a --silent --eula accept && \
     rm "${ONECCL_INSTALLER}" && \
     echo "source /opt/intel/oneapi/setvars.sh --force" >> /root/.bashrc && \
@@ -76,26 +119,25 @@ ENV UV_LINK_MODE="copy"
 RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,src=requirements/common.txt,target=/workspace/vllm/requirements/common.txt \
     --mount=type=bind,src=requirements/xpu.txt,target=/workspace/vllm/requirements/xpu.txt \
-    --mount=type=bind,src=requirements/xpu-test.in,target=/workspace/vllm/requirements/xpu-test.in \
+    --mount=type=bind,src=requirements/test/xpu.txt,target=/workspace/vllm/requirements/test/xpu.txt \
     uv pip install --upgrade pip && \
     uv pip install -r requirements/xpu.txt && \
-    uv pip compile /workspace/vllm/requirements/xpu-test.in \
-        -o /workspace/vllm/requirements/xpu-test.txt \
-        -c /workspace/vllm/requirements/xpu.txt \
-        --index-strategy unsafe-best-match \
-        --extra-index-url ${PIP_EXTRA_INDEX_URL} \
-        --python-version ${PYTHON_VERSION} && \
     uv pip install grpcio-tools protobuf nanobind && \
     source /opt/intel/oneapi/setvars.sh --force && \
     source /opt/intel/oneapi/ccl/2021.15/env/vars.sh --force && \
     export CMAKE_PREFIX_PATH="$(python3 -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH}" && \
-    uv pip install --no-build-isolation -r /workspace/vllm/requirements/xpu-test.txt
+    uv pip install --no-build-isolation -r /workspace/vllm/requirements/test/xpu.txt
 
 
 
 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
 
 COPY . .
+
+# Drop the pre-built rust frontend binary into the source tree. setup.py
+# detects it and ships it as-is, skipping the local cargo build.
+COPY --from=rust-build /workspace/vllm-rs vllm/vllm-rs
+
 ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
     if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi
@@ -111,12 +153,9 @@ CMD ["/bin/bash"]
 
 FROM vllm-base AS vllm-openai
 
-# install additional dependencies for openai api server
-RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[api] modelscope
-
 # install development dependencies (for testing)
-RUN uv pip install -e tests/vllm_test_utils
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install -e tests/vllm_test_utils
 
 # install NIXL and UCX from source code
 ARG UCX_VERSION=e5d98879705239d254ede40b4a52891850cb5349
@@ -173,7 +212,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # FIX triton
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip uninstall triton triton-xpu && \
-    uv pip install triton-xpu==3.6.0
+    uv pip install triton-xpu==3.7.0
 
 # remove torch bundled oneccl to avoid conflicts
 RUN --mount=type=cache,target=/root/.cache/uv \
diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl
index e1c2fbba63a6..94ca8397561a 100644
--- a/docker/docker-bake.hcl
+++ b/docker/docker-bake.hcl
@@ -20,13 +20,29 @@ variable "NVCC_THREADS" {
 }
 
 variable "TORCH_CUDA_ARCH_LIST" {
-  default = "8.0 8.9 9.0 10.0"
+  default = "8.0 8.9 9.0 10.0 11.0 12.0"
 }
 
 variable "COMMIT" {
   default = ""
 }
 
+variable "VLLM_BUILD_COMMIT" {
+  default = "unknown"
+}
+
+variable "VLLM_BUILD_PIPELINE" {
+  default = "local"
+}
+
+variable "VLLM_BUILD_URL" {
+  default = ""
+}
+
+variable "VLLM_IMAGE_TAG" {
+  default = "local/vllm-openai:dev"
+}
+
 # Groups
 
 group "default" {
@@ -46,6 +62,10 @@ target "_common" {
     max_jobs             = MAX_JOBS
     nvcc_threads         = NVCC_THREADS
     torch_cuda_arch_list = TORCH_CUDA_ARCH_LIST
+    VLLM_BUILD_COMMIT    = VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")
+    VLLM_BUILD_PIPELINE  = VLLM_BUILD_PIPELINE
+    VLLM_BUILD_URL       = VLLM_BUILD_URL
+    VLLM_IMAGE_TAG       = VLLM_IMAGE_TAG
   }
 }
 
@@ -56,10 +76,16 @@ target "_labels" {
     "org.opencontainers.image.title"       = "vLLM"
     "org.opencontainers.image.description" = "vLLM: A high-throughput and memory-efficient inference and serving engine for LLMs"
     "org.opencontainers.image.licenses"    = "Apache-2.0"
-    "org.opencontainers.image.revision"    = COMMIT
+    "org.opencontainers.image.revision"    = VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")
+    "org.opencontainers.image.version"     = VLLM_IMAGE_TAG
+    "org.opencontainers.image.url"         = VLLM_BUILD_URL
+    "ai.vllm.build.commit"                 = VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")
+    "ai.vllm.build.pipeline"               = VLLM_BUILD_PIPELINE
+    "ai.vllm.build.url"                    = VLLM_BUILD_URL
+    "ai.vllm.image.tag"                    = VLLM_IMAGE_TAG
   }
   annotations = [
-      "index,manifest:org.opencontainers.image.revision=${COMMIT}",
+    "index,manifest:org.opencontainers.image.revision=${VLLM_BUILD_COMMIT != "unknown" ? VLLM_BUILD_COMMIT : (COMMIT != "" ? COMMIT : "unknown")}",
   ]
 }
 
@@ -88,7 +114,6 @@ target "test-ubuntu2404" {
   args = {
     UBUNTU_VERSION          = "24.04"
     GDRCOPY_OS_VERSION      = "Ubuntu24_04"
-    FLASHINFER_AOT_COMPILE  = "true"
   }
   output = ["type=docker"]
 }
@@ -100,7 +125,6 @@ target "openai-ubuntu2404" {
   args = {
     UBUNTU_VERSION          = "24.04"
     GDRCOPY_OS_VERSION      = "Ubuntu24_04"
-    FLASHINFER_AOT_COMPILE  = "true"
   }
   output = ["type=docker"]
 }
diff --git a/docker/entrypoints/test_vllm_nonroot_entrypoint.sh b/docker/entrypoints/test_vllm_nonroot_entrypoint.sh
new file mode 100755
index 000000000000..c136f0549199
--- /dev/null
+++ b/docker/entrypoints/test_vllm_nonroot_entrypoint.sh
@@ -0,0 +1,266 @@
+#!/bin/sh
+# Shell-level unit test for vllm-nonroot-entrypoint.sh.
+#
+# Runs on the host (no Docker, no GPU) by stubbing `vllm` with a shim that
+# dumps its env + argv instead of actually serving. Exercises the wrapper's
+# HOME/USER fallback behavior that can't be easily tested from buildkite
+# (which would need a GPU to run `vllm serve --help`).
+#
+# Usage:
+#   bash docker/entrypoints/test_vllm_nonroot_entrypoint.sh
+# Exits non-zero on the first failed assertion.
+
+set -eu
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+WRAPPER="${SCRIPT_DIR}/vllm-nonroot-entrypoint.sh"
+
+if [ ! -x "$WRAPPER" ]; then
+    echo "FAIL: wrapper not found or not executable: $WRAPPER" >&2
+    exit 1
+fi
+
+WORKDIR="$(mktemp -d)"
+trap 'rm -rf "$WORKDIR"' EXIT
+
+# Stub `vllm` on PATH. It dumps env + argv + cwd to stdout so we can assert.
+mkdir -p "$WORKDIR/bin"
+cat > "$WORKDIR/bin/vllm" <<'EOF'
+#!/bin/sh
+echo "ARGV=$*"
+echo "HOME=${HOME-__unset__}"
+echo "USER=${USER-__unset__}"
+echo "LOGNAME=${LOGNAME-__unset__}"
+echo "PWD=$(pwd)"
+EOF
+chmod +x "$WORKDIR/bin/vllm"
+
+run_wrapper() {
+    # Usage: run_wrapper <output_file> <env_kv>... -- <wrapper_arg>...
+    _out="$1"; shift
+    _env=""
+    while [ "${1:-}" != "--" ]; do
+        _env="$_env $1"; shift
+    done
+    shift
+    env -i PATH="$WORKDIR/bin:/usr/bin:/bin" $_env "$WRAPPER" "$@" > "$_out"
+}
+
+fail() { echo "FAIL: $*" >&2; echo "--- stdout ---" >&2; cat "$1" >&2; exit 1; }
+
+expect_default_home() {
+    _out="$1"
+    _case="$2"
+    if [ -w /home/vllm ]; then
+        expected_home="/home/vllm"
+        grep -q "^HOME=$expected_home\$" "$_out" \
+            || fail "$_out" "$_case: HOME not set to $expected_home"
+    else
+        expected_home="/tmp/vllm-home.XXXXXX"
+        grep -Eq '^HOME=/tmp/vllm-home\.[^/]+$' "$_out" \
+            || fail "$_out" "$_case: HOME not set to $expected_home"
+    fi
+}
+
+# -----------------------------------------------------------------------------
+# Case 1: writable HOME and USER both set -> wrapper must leave them alone.
+# -----------------------------------------------------------------------------
+case1_home="$WORKDIR/case1-home"
+mkdir -p "$case1_home"
+out="$WORKDIR/case1.out"
+run_wrapper "$out" "HOME=$case1_home" "USER=alice" "LOGNAME=alice" -- --model foo
+grep -q "^HOME=$case1_home\$" "$out" || fail "$out" "case1: HOME not preserved"
+grep -q "^USER=alice\$" "$out" || fail "$out" "case1: USER not preserved"
+grep -q "^LOGNAME=alice\$" "$out" || fail "$out" "case1: LOGNAME not preserved"
+grep -q "^ARGV=serve --model foo\$" "$out" || fail "$out" "case1: ARGV wrong"
+echo "PASS: case1 (writable HOME + USER preserved)"
+
+# -----------------------------------------------------------------------------
+# Case 2: HOME unset -> falls back to /home/vllm if writable, else
+# /tmp/vllm-home.XXXXXX.
+# -----------------------------------------------------------------------------
+# The wrapper checks whether the real /home/vllm exists and is writable. On
+# dev machines /home/vllm typically does NOT exist, so the
+# wrapper should fall to /tmp/vllm-home.XXXXXX.
+out="$WORKDIR/case2.out"
+run_wrapper "$out" -- --model bar
+expect_default_home "$out" "case2"
+grep -q "^USER=vllm\$" "$out" || fail "$out" "case2: USER not defaulted to vllm"
+grep -q "^LOGNAME=vllm\$" "$out" || fail "$out" "case2: LOGNAME not defaulted to vllm"
+grep -q "^ARGV=serve --model bar\$" "$out" || fail "$out" "case2: ARGV wrong"
+echo "PASS: case2 (unset HOME falls back to $expected_home, USER defaulted)"
+
+# -----------------------------------------------------------------------------
+# Case 3: HOME set but unwritable -> must also fall back.
+# -----------------------------------------------------------------------------
+ro_home="$WORKDIR/ro-home"
+mkdir -p "$ro_home"
+chmod 0500 "$ro_home"
+out="$WORKDIR/case3.out"
+run_wrapper "$out" "HOME=$ro_home" -- --model baz
+expect_default_home "$out" "case3"
+grep -q "^USER=vllm\$" "$out" || fail "$out" "case3: USER not defaulted"
+chmod 0700 "$ro_home"
+echo "PASS: case3 (unwritable HOME overridden)"
+
+# -----------------------------------------------------------------------------
+# Case 4: USER set but LOGNAME unset -> LOGNAME mirrors USER.
+# -----------------------------------------------------------------------------
+case4_home="$WORKDIR/case4-home"
+mkdir -p "$case4_home"
+out="$WORKDIR/case4.out"
+run_wrapper "$out" "HOME=$case4_home" "USER=carol" -- --model qux
+grep -q "^USER=carol\$" "$out" || fail "$out" "case4: USER not preserved"
+grep -q "^LOGNAME=carol\$" "$out" || fail "$out" "case4: LOGNAME not mirrored from USER"
+echo "PASS: case4 (LOGNAME mirrors USER when unset)"
+
+# -----------------------------------------------------------------------------
+# Case 5: /etc/passwd is writable AND the current UID is not in it -> wrapper
+# appends a synthetic entry. Uses the VLLM_PASSWD_FILE test hook so we don't
+# touch the real /etc/passwd.
+# -----------------------------------------------------------------------------
+fake_passwd="$WORKDIR/fake-passwd"
+: > "$fake_passwd"  # empty file, current UID definitely not present
+case5_home="$WORKDIR/case5-home"
+mkdir -p "$case5_home"
+out="$WORKDIR/case5.out"
+run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
+current_uid="$(id -u)"
+current_gid="$(id -g)"
+expected_line="vllm:x:${current_uid}:${current_gid}:vllm:${case5_home}:/bin/bash"
+grep -Fx "$expected_line" "$fake_passwd" > /dev/null \
+    || { echo "FAIL: case5: expected line not found in fake passwd:"; echo "  expected: $expected_line"; echo "  file contents:"; cat "$fake_passwd"; exit 1; }
+echo "PASS: case5 (passwd entry appended for arbitrary UID)"
+
+# -----------------------------------------------------------------------------
+# Case 6: /etc/passwd is writable but current UID already has an entry ->
+# wrapper must NOT duplicate the entry.
+# -----------------------------------------------------------------------------
+fake_passwd="$WORKDIR/fake-passwd-prepopulated"
+printf 'vllm:x:%s:%s:vllm:/home/vllm:/bin/bash\n' "$current_uid" "$current_gid" > "$fake_passwd"
+out="$WORKDIR/case6.out"
+run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
+line_count="$(wc -l < "$fake_passwd")"
+# NOTE: wc may count 0 or 1 depending on trailing newline; accept 1.
+# More robust: count lines matching our UID.
+uid_lines="$(grep -c ":${current_uid}:" "$fake_passwd" || true)"
+[ "$uid_lines" = "1" ] \
+    || { echo "FAIL: case6: expected exactly one entry for UID $current_uid, got $uid_lines"; cat "$fake_passwd"; exit 1; }
+echo "PASS: case6 (existing passwd entry not duplicated)"
+
+# -----------------------------------------------------------------------------
+# Case 7: /etc/passwd is NOT writable -> wrapper must NOT crash, just skip.
+# Skipped when running as root, because root's DAC override means [ -w ... ]
+# is always true regardless of mode bits -- the case can't be simulated.
+# In the real deployment (non-root UID inside the container) this IS the
+# relevant behavior and is what `_passwd_file is not writable` encodes.
+# -----------------------------------------------------------------------------
+if [ "$(id -u)" = "0" ]; then
+    echo "SKIP: case7 (running as root; DAC override makes unwritable check meaningless)"
+else
+    fake_passwd="$WORKDIR/ro-passwd"
+    : > "$fake_passwd"
+    chmod 0444 "$fake_passwd"
+    out="$WORKDIR/case7.out"
+    run_wrapper "$out" "HOME=$case5_home" "VLLM_PASSWD_FILE=$fake_passwd" -- --model foo
+    # File must remain empty (no write happened) and the wrapper exec'd
+    # `vllm serve` successfully (stdout contains ARGV line).
+    [ ! -s "$fake_passwd" ] \
+        || { echo "FAIL: case7: RO passwd file was modified"; cat "$fake_passwd"; exit 1; }
+    grep -q "^ARGV=serve --model foo\$" "$out" || fail "$out" "case7: wrapper didn't exec vllm"
+    chmod 0600 "$fake_passwd"
+    echo "PASS: case7 (unwritable passwd file tolerated)"
+fi
+
+# -----------------------------------------------------------------------------
+# Case 8: caller's writable CWD is preserved — wrapper must NOT chdir to HOME
+# when cwd is usable. Protects relative-path workflows like
+# `docker run -w /models ... --model ./llama.gguf`.
+# -----------------------------------------------------------------------------
+case8_home="$WORKDIR/case8-home"
+mkdir -p "$case8_home"
+case8_cwd="$WORKDIR/case8-cwd"
+mkdir -p "$case8_cwd"
+out="$WORKDIR/case8.out"
+(cd "$case8_cwd" && run_wrapper "$out" "HOME=$case8_home" "USER=alice" "LOGNAME=alice" -- --model ./relpath)
+grep -q "^PWD=$case8_cwd\$" "$out" \
+    || fail "$out" "case8: writable cwd not preserved (got $(grep '^PWD=' "$out"))"
+grep -q "^ARGV=serve --model \\./relpath\$" "$out" \
+    || fail "$out" "case8: relative argv not preserved"
+echo "PASS: case8 (writable cwd preserved; relative argv still resolves from caller's cwd)"
+
+# -----------------------------------------------------------------------------
+# Case 9: read-only cwd is ALSO preserved. A caller who mounts a read-only
+# model directory at the container's cwd (e.g. `docker run -w /models` with
+# /models bind-mounted ro) expects relative argv like `--model ./foo.gguf`
+# to resolve against /models. An earlier version of this wrapper rewrote
+# read-only cwd to $HOME and broke that workflow; this case guards against
+# the regression returning.
+# -----------------------------------------------------------------------------
+case9_home="$WORKDIR/case9-home"
+mkdir -p "$case9_home"
+case9_ro="$WORKDIR/case9-ro"
+mkdir -p "$case9_ro"
+chmod 0555 "$case9_ro"
+out="$WORKDIR/case9.out"
+(cd "$case9_ro" && run_wrapper "$out" "HOME=$case9_home" "USER=alice" "LOGNAME=alice" -- --model ./foo)
+grep -q "^PWD=$case9_ro\$" "$out" \
+    || fail "$out" "case9: read-only cwd was rewritten (got $(grep '^PWD=' "$out"))"
+grep -q "^ARGV=serve --model \\./foo\$" "$out" \
+    || fail "$out" "case9: relative argv not preserved"
+chmod 0700 "$case9_ro"
+echo "PASS: case9 (read-only cwd preserved; relative argv still resolves from caller's cwd)"
+
+# -----------------------------------------------------------------------------
+# Case 10: truly inaccessible cwd (no search bit) DOES fall back to $HOME.
+# Skipped as root because DAC override lets root cd into 0000 directories.
+# -----------------------------------------------------------------------------
+if [ "$(id -u)" = "0" ]; then
+    echo "SKIP: case10 (running as root; DAC override makes inaccessible cwd untestable)"
+else
+    case10_home="$WORKDIR/case10-home"
+    mkdir -p "$case10_home"
+    case10_cwd="$WORKDIR/case10-cwd"
+    mkdir -p "$case10_cwd"
+    out="$WORKDIR/case10.out"
+    # Make cwd genuinely inaccessible (mode 0000 = no search bit -> cd .
+    # fails with EACCES). Use absolute paths for chmod so our own test
+    # cleanup still works without needing search perm on the dir.
+    (
+        cd "$case10_cwd"
+        chmod 0000 "$case10_cwd"
+        run_wrapper "$out" "HOME=$case10_home" "USER=alice" "LOGNAME=alice" -- --model foo
+    )
+    chmod 0700 "$case10_cwd"
+    grep -q "^PWD=$case10_home\$" "$out" \
+        || fail "$out" "case10: inaccessible cwd not overridden to HOME (got $(grep '^PWD=' "$out"))"
+    echo "PASS: case10 (inaccessible cwd falls back to \$HOME)"
+fi
+
+# -----------------------------------------------------------------------------
+# Case 11: if /tmp cannot create a private fallback dir, wrapper uses /tmp as
+# the last-resort HOME instead of leaving HOME empty under set -eu.
+# -----------------------------------------------------------------------------
+if [ -w /home/vllm ]; then
+    echo "SKIP: case11 (/home/vllm is writable; mktemp fallback path is not used)"
+else
+    cat > "$WORKDIR/bin/mktemp" <<'EOF'
+#!/bin/sh
+exit 1
+EOF
+    chmod +x "$WORKDIR/bin/mktemp"
+
+    out="$WORKDIR/case11.out"
+    run_wrapper "$out" -- --model no-mktemp
+    rm -f "$WORKDIR/bin/mktemp"
+
+    grep -q "^HOME=/tmp\$" "$out" \
+        || fail "$out" "case11: mktemp failure did not fall back to /tmp"
+    grep -q "^USER=vllm\$" "$out" || fail "$out" "case11: USER not defaulted"
+    grep -q "^LOGNAME=vllm\$" "$out" || fail "$out" "case11: LOGNAME not defaulted"
+    grep -q "^ARGV=serve --model no-mktemp\$" "$out" || fail "$out" "case11: ARGV wrong"
+    echo "PASS: case11 (mktemp failure falls back to /tmp)"
+fi
+
+echo ""
+echo "ALL CASES PASSED."
diff --git a/docker/entrypoints/vllm-nonroot-entrypoint.sh b/docker/entrypoints/vllm-nonroot-entrypoint.sh
new file mode 100755
index 000000000000..0972ed990977
--- /dev/null
+++ b/docker/entrypoints/vllm-nonroot-entrypoint.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# Entrypoint wrapper for the opt-in `vllm-openai-nonroot` image.
+#
+# The image also ships a `vllm` user (UID 2000, GID 0) with HOME /home/vllm
+# and a group-0-writable home directory. When the container is launched with
+# `--user 2000:0` (or any other UID in group 0) the passwd entry is enough on
+# its own: Docker picks up HOME=/home/vllm, getpass.getuser() resolves to
+# "vllm", and every cache dir (HF, Triton, Inductor, vLLM, Numba, Outlines)
+# that defaults to `$HOME/.cache/...` lands in a writable location.
+#
+# This wrapper exists for the *arbitrary-UID* case (e.g. OpenShift's
+# `runAsUser: 1000540000` Restricted Pod Security Standard) where the caller
+# UID is not in /etc/passwd at all. In that case:
+#   * $HOME may be unset or resolve to "/" (unwritable).
+#   * getpass.getuser() falls back to pwd.getpwuid() -> KeyError.
+#
+# The wrapper re-points $HOME to /home/vllm when writable, /tmp/vllm-home.XXXXXX
+# otherwise, and defaults $USER to "vllm" so the pwd-lookup path is never
+# taken. Everything else is forwarded to `vllm serve`.
+#
+# Non-empty caller-set env vars (HOME, USER, LOGNAME) are preserved, so
+# existing K8s manifests and `docker run -e ...` keep working unchanged.
+# Unset or empty values fall through to the wrapper's defaults, matching
+# what shell code typically expects from "unset".
+
+set -eu
+
+if [ -z "${HOME:-}" ] || [ ! -w "${HOME}" ]; then
+    if [ -w /home/vllm ]; then
+        export HOME=/home/vllm
+    else
+        if _h="$(mktemp -d /tmp/vllm-home.XXXXXX 2>/dev/null)"; then
+            export HOME="$_h"
+            chmod 0700 "$HOME" 2>/dev/null || true
+        else
+            export HOME=/tmp
+        fi
+        unset _h
+    fi
+fi
+
+# Preserve the caller's cwd whenever it's still usable. A read-only mount
+# (e.g. `docker run -w /models ... --model ./llama.gguf` where /models is
+# the user's model share) is a legitimate, usable cwd — vllm only needs to
+# *read* relative paths from there. We only fall back to $HOME when the
+# cwd itself is truly inaccessible (no search bit, deleted inode, mount
+# gone, etc.), which is when `cd .` actually fails.
+#
+# This is the accessibility check, not a writability check; the latter
+# would silently rewrite cwd for any read-only workflow and break relative
+# argv like `--model ./llama.gguf`, `--chat-template ./t.jinja`, relative
+# TLS cert paths, etc.
+if ! cd . 2>/dev/null; then
+    cd "$HOME"
+fi
+
+# getpass.getuser() prefers $USER/$LOGNAME/etc. before hitting getpwuid();
+# setting it here makes the "UID not in passwd" path a no-op for everything
+# in the process tree.
+if [ -z "${USER:-}" ]; then
+    export USER=vllm
+fi
+if [ -z "${LOGNAME:-}" ]; then
+    export LOGNAME="$USER"
+fi
+
+# Shell-level tooling (`whoami`, bash's `\u` prompt, `id -un`, `sudo`) does
+# NOT consult $USER; it calls getpwuid(geteuid()) directly. For arbitrary
+# runtime UIDs in OpenShift-style deploys this returns "I have no name!".
+# If /etc/passwd is group-0 writable (set at build time) and doesn't yet
+# have an entry for this UID, append a synthetic one so every downstream
+# consumer sees a consistent "vllm" identity.
+#
+# We parse the passwd file directly instead of calling `getent` because
+# the container's NSS is typically just files anyway, and this lets us
+# unit-test via the VLLM_PASSWD_FILE hook (undocumented; production uses
+# /etc/passwd).
+_passwd_file="${VLLM_PASSWD_FILE:-/etc/passwd}"
+_uid="$(id -u)"
+if [ -w "$_passwd_file" ] \
+    && ! awk -F: -v u="$_uid" '$3==u {found=1; exit} END {exit !found}' "$_passwd_file" 2>/dev/null; then
+    printf 'vllm:x:%s:%s:vllm:%s:/bin/bash\n' \
+        "$_uid" "$(id -g)" "$HOME" >> "$_passwd_file"
+fi
+unset _uid _passwd_file
+
+exec vllm serve "$@"
diff --git a/docker/versions.json b/docker/versions.json
index 582d1bd54279..ee23b5baf04f 100644
--- a/docker/versions.json
+++ b/docker/versions.json
@@ -2,7 +2,7 @@
   "_comment": "Auto-generated from Dockerfile ARGs. Do not edit manually. Run: python tools/generate_versions_json.py",
   "variable": {
     "CUDA_VERSION": {
-      "default": "12.9.1"
+      "default": "13.0.2"
     },
     "PYTHON_VERSION": {
       "default": "3.12"
@@ -11,10 +11,13 @@
       "default": "22.04"
     },
     "BUILD_BASE_IMAGE": {
-      "default": "nvidia/cuda:12.9.1-devel-ubuntu20.04"
+      "default": "nvidia/cuda:13.0.2-devel-ubuntu22.04"
     },
     "FINAL_BASE_IMAGE": {
-      "default": "nvidia/cuda:12.9.1-base-ubuntu22.04"
+      "default": "nvidia/cuda:13.0.2-base-ubuntu22.04"
+    },
+    "BUILD_OS": {
+      "default": "ubuntu"
     },
     "GET_PIP_URL": {
       "default": "https://bootstrap.pypa.io/get-pip.py"
@@ -32,7 +35,7 @@
       "default": "false"
     },
     "TORCH_CUDA_ARCH_LIST": {
-      "default": "7.0 7.5 8.0 8.9 9.0 10.0 12.0"
+      "default": "7.5 8.0 8.6 8.9 9.0 10.0 11.0 12.0+PTX"
     },
     "MAX_JOBS": {
       "default": "2"
@@ -52,9 +55,6 @@
     "vllm_target_device": {
       "default": "cuda"
     },
-    "DEEPGEMM_GIT_REF": {
-      "default": "477618cd51baffca09c4b0b87e97c03fe827ef03"
-    },
     "DEEPEP_COMMIT_HASH": {
       "default": "73b6ea4"
     },
@@ -68,7 +68,7 @@
       "default": "true"
     },
     "FLASHINFER_VERSION": {
-      "default": "0.6.6"
+      "default": "0.6.11.post2"
     },
     "GDRCOPY_CUDA_VERSION": {
       "default": "12.8"
diff --git a/docs/.nav.yml b/docs/.nav.yml
index 89584442e390..7d985fdeb58c 100644
--- a/docs/.nav.yml
+++ b/docs/.nav.yml
@@ -11,7 +11,7 @@ nav:
       - usage/*
     - Inference and Serving:
       - serving/offline_inference.md
-      - serving/openai_compatible_server.md
+      - Online Serving: serving/online_serving
       - serving/*
       - serving/integrations
     - Deployment:
diff --git a/docs/README.md b/docs/README.md
index 4b480c463abb..2bdc3e1a3e4c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -25,7 +25,7 @@ hide:
 
 vLLM is a fast and easy-to-use library for LLM inference and serving.
 
-Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
+Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has grown into one of the most active open-source AI projects built and maintained by a diverse community of many dozens of academic institutions and companies from over 2000 contributors.
 
 Where to get started with vLLM depends on the type of user. If you are looking to:
 
@@ -42,23 +42,37 @@ vLLM is fast with:
 
 - State-of-the-art serving throughput
 - Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html)
-- Continuous batching of incoming requests
-- Fast model execution with CUDA/HIP graph
-- Quantization: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8
-- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer.
-- Speculative decoding
-- Chunked prefill
+- Continuous batching of incoming requests, chunked prefill, prefix caching
+- Fast and flexible model execution with piecewise and full CUDA/HIP graphs
+- Quantization: FP8, MXFP8/MXFP4, NVFP4, INT8, INT4, GPTQ/AWQ, GGUF, compressed-tensors, ModelOpt, TorchAO, and [more](https://docs.vllm.ai/en/latest/features/quantization/index.html)
+- Optimized attention kernels including FlashAttention, FlashInfer, TRTLLM-GEN, FlashMLA, and Triton
+- Optimized GEMM/MoE kernels for various precisions using CUTLASS, TRTLLM-GEN, CuTeDSL
+- Speculative decoding including n-gram, suffix, EAGLE, DFlash
+- Automatic kernel generation and graph-level transformations using torch.compile
+- Disaggregated prefill, decode, and encode
 
 vLLM is flexible and easy to use with:
 
-- Seamless integration with popular HuggingFace models
+- Seamless integration with popular Hugging Face models
 - High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more
-- Tensor, pipeline, data and expert parallelism support for distributed inference
+- Tensor, pipeline, data, expert, and context parallelism for distributed inference
 - Streaming outputs
-- OpenAI-compatible API server
-- Support for NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, Arm CPUs, and TPU. Additionally, support for diverse hardware plugins such as Intel Gaudi, IBM Spyre and Huawei Ascend.
-- Prefix caching support
-- Multi-LoRA support
+- Generation of structured outputs using xgrammar or guidance
+- Tool calling and reasoning parsers
+- OpenAI-compatible API server, plus Anthropic Messages API and gRPC support
+- Efficient multi-LoRA support for dense and MoE layers
+- Support for NVIDIA GPUs, AMD GPUs, and x86/ARM/PowerPC CPUs. Additionally, diverse hardware plugins such as Google TPUs, Intel Gaudi, IBM Spyre, Huawei Ascend, Rebellions NPU, Apple Silicon, MetaX GPU, and more.
+
+vLLM seamlessly supports 200+ model architectures on HuggingFace, including:
+
+- Decoder-only LLMs (e.g., Llama, Qwen, Gemma)
+- Mixture-of-Expert LLMs (e.g., Mixtral, DeepSeek-V3, Qwen-MoE, GPT-OSS)
+- Hybrid attention and state-space models (e.g., Mamba, Qwen3.5)
+- Multi-modal models (e.g., LLaVA, Qwen-VL, Pixtral)
+- Embedding and retrieval models (e.g., E5-Mistral, GTE, ColBERT)
+- Reward and classification models (e.g., Qwen-Math)
+
+Find the full list of supported models [here](./models/supported_models.md).
 
 For more information, check out the following:
 
diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png
index 9ac394d4c9f8..b4f505493add 100644
Binary files a/docs/assets/contributing/dockerfile-stages-dependency.png and b/docs/assets/contributing/dockerfile-stages-dependency.png differ
diff --git a/docs/assets/contributing/vllm_bench_serve_dataset_stats.png b/docs/assets/contributing/vllm_bench_serve_dataset_stats.png
new file mode 100644
index 000000000000..72c19d3d7c07
Binary files /dev/null and b/docs/assets/contributing/vllm_bench_serve_dataset_stats.png differ
diff --git a/docs/assets/contributing/vllm_bench_serve_timeline.html b/docs/assets/contributing/vllm_bench_serve_timeline.html
new file mode 100644
index 000000000000..d463e202b6d1
--- /dev/null
+++ b/docs/assets/contributing/vllm_bench_serve_timeline.html
@@ -0,0 +1,3888 @@
+<html>
+<head><meta charset="utf-8" /></head>
+<body>
+    <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
+        <script type="text/javascript">/**
+* plotly.js v3.3.1
+* Copyright 2012-2025, Plotly, Inc.
+* All rights reserved.
+* Licensed under the MIT license
+*/
+(
+ function(root, factory) {
+  if (typeof module === "object" && module.exports) {
+   module.exports = factory();
+  } else {
+   root.moduleName = factory();
+  }
+} (typeof self !== "undefined" ? self : this, () => {
+"use strict";var Plotly=(()=>{var ctt=Object.create;var LS=Object.defineProperty,ftt=Object.defineProperties,htt=Object.getOwnPropertyDescriptor,dtt=Object.getOwnPropertyDescriptors,vtt=Object.getOwnPropertyNames,s6=Object.getOwnPropertySymbols,ptt=Object.getPrototypeOf,xO=Object.prototype.hasOwnProperty,lee=Object.prototype.propertyIsEnumerable;var see=(e,t,r)=>t in e?LS(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,mg=(e,t)=>{for(var r in t||(t={}))xO.call(t,r)&&see(e,r,t[r]);if(s6)for(var r of s6(t))lee.call(t,r)&&see(e,r,t[r]);return e},q1=(e,t)=>ftt(e,dtt(t));var uee=(e,t)=>{var r={};for(var n in e)xO.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&s6)for(var n of s6(e))t.indexOf(n)<0&&lee.call(e,n)&&(r[n]=e[n]);return r};var gu=(e,t)=>()=>(e&&(t=e(e=0)),t);var ye=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),cee=(e,t)=>{for(var r in t)LS(e,r,{get:t[r],enumerable:!0})},fee=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let i of vtt(t))!xO.call(e,i)&&i!==r&&LS(e,i,{get:()=>t[i],enumerable:!(n=htt(t,i))||n.enumerable});return e};var gtt=(e,t,r)=>(r=e!=null?ctt(ptt(e)):{},fee(t||!e||!e.__esModule?LS(r,"default",{value:e,enumerable:!0}):r,e)),ob=e=>fee(LS({},"__esModule",{value:!0}),e);var l6=ye(hee=>{"use strict";hee.version="3.3.1"});var vee=ye((dee,u6)=>{(function(t,r,n){r[t]=r[t]||n(),typeof u6!="undefined"&&u6.exports&&(u6.exports=r[t])})("Promise",typeof window!="undefined"?window:dee,function(){"use strict";var t,r,n,i=Object.prototype.toString,a=typeof setImmediate!="undefined"?function(k){return setImmediate(k)}:setTimeout;try{Object.defineProperty({},"x",{}),t=function(k,E,S,L){return Object.defineProperty(k,E,{value:S,writable:!0,configurable:L!==!1})}}catch(p){t=function(E,S,L){return E[S]=L,E}}n=function(){var k,E,S;function L(x,C){this.fn=x,this.self=C,this.next=void 0}return{add:function(C,M){S=new L(C,M),E?E.next=S:k=S,E=S,S=void 0},drain:function(){var C=k;for(k=E=r=void 0;C;)C.fn.call(C.self),C=C.next}}}();function o(p,k){n.add(p,k),r||(r=a(n.drain))}function s(p){var k,E=typeof p;return p!=null&&(E=="object"||E=="function")&&(k=p.then),typeof k=="function"?k:!1}function l(){for(var p=0;p<this.chain.length;p++)u(this,this.state===1?this.chain[p].success:this.chain[p].failure,this.chain[p]);this.chain.length=0}function u(p,k,E){var S,L;try{k===!1?E.reject(p.msg):(k===!0?S=p.msg:S=k.call(void 0,p.msg),S===E.promise?E.reject(TypeError("Promise-chain cycle")):(L=s(S))?L.call(S,E.resolve,E.reject):E.resolve(S))}catch(x){E.reject(x)}}function c(p){var k,E=this;if(!E.triggered){E.triggered=!0,E.def&&(E=E.def);try{(k=s(p))?o(function(){var S=new d(E);try{k.call(p,function(){c.apply(S,arguments)},function(){f.apply(S,arguments)})}catch(L){f.call(S,L)}}):(E.msg=p,E.state=1,E.chain.length>0&&o(l,E))}catch(S){f.call(new d(E),S)}}}function f(p){var k=this;k.triggered||(k.triggered=!0,k.def&&(k=k.def),k.msg=p,k.state=2,k.chain.length>0&&o(l,k))}function h(p,k,E,S){for(var L=0;L<k.length;L++)(function(C){p.resolve(k[C]).then(function(g){E(C,g)},S)})(L)}function d(p){this.def=p,this.triggered=!1}function v(p){this.promise=p,this.state=0,this.triggered=!1,this.chain=[],this.msg=void 0}function _(p){if(typeof p!="function")throw TypeError("Not a function");if(this.__NPO__!==0)throw TypeError("Not a promise");this.__NPO__=1;var k=new v(this);this.then=function(S,L){var x={success:typeof S=="function"?S:!0,failure:typeof L=="function"?L:!1};return x.promise=new this.constructor(function(M,g){if(typeof M!="function"||typeof g!="function")throw TypeError("Not a function");x.resolve=M,x.reject=g}),k.chain.push(x),k.state!==0&&o(l,k),x.promise},this.catch=function(S){return this.then(void 0,S)};try{p.call(void 0,function(S){c.call(k,S)},function(S){f.call(k,S)})}catch(E){f.call(k,E)}}var b=t({},"constructor",_,!1);return _.prototype=b,t(b,"__NPO__",0,!1),t(_,"resolve",function(k){var E=this;return k&&typeof k=="object"&&k.__NPO__===1?k:new E(function(L,x){if(typeof L!="function"||typeof x!="function")throw TypeError("Not a function");L(k)})}),t(_,"reject",function(k){return new this(function(S,L){if(typeof S!="function"||typeof L!="function")throw TypeError("Not a function");L(k)})}),t(_,"all",function(k){var E=this;return i.call(k)!="[object Array]"?E.reject(TypeError("Not an array")):k.length===0?E.resolve([]):new E(function(L,x){if(typeof L!="function"||typeof x!="function")throw TypeError("Not a function");var C=k.length,M=Array(C),g=0;h(E,k,function(T,z){M[T]=z,++g===C&&L(M)},x)})}),t(_,"race",function(k){var E=this;return i.call(k)!="[object Array]"?E.reject(TypeError("Not an array")):new E(function(L,x){if(typeof L!="function"||typeof x!="function")throw TypeError("Not a function");h(E,k,function(M,g){L(g)},x)})}),_})});var Oa=ye((arr,c6)=>{(function(){var e={version:"3.8.2"},t=[].slice,r=function(X){return t.call(X)},n=self.document;function i(X){return X&&(X.ownerDocument||X.document||X).documentElement}function a(X){return X&&(X.ownerDocument&&X.ownerDocument.defaultView||X.document&&X||X.defaultView)}if(n)try{r(n.documentElement.childNodes)[0].nodeType}catch(X){r=function(se){for(var Te=se.length,Ne=new Array(Te);Te--;)Ne[Te]=se[Te];return Ne}}if(Date.now||(Date.now=function(){return+new Date}),n)try{n.createElement("DIV").style.setProperty("opacity",0,"")}catch(X){var o=this.Element.prototype,s=o.setAttribute,l=o.setAttributeNS,u=this.CSSStyleDeclaration.prototype,c=u.setProperty;o.setAttribute=function(se,Te){s.call(this,se,Te+"")},o.setAttributeNS=function(se,Te,Ne){l.call(this,se,Te,Ne+"")},u.setProperty=function(se,Te,Ne){c.call(this,se,Te+"",Ne)}}e.ascending=f;function f(X,se){return X<se?-1:X>se?1:X>=se?0:NaN}e.descending=function(X,se){return se<X?-1:se>X?1:se>=X?0:NaN},e.min=function(X,se){var Te=-1,Ne=X.length,He,Ye;if(arguments.length===1){for(;++Te<Ne;)if((Ye=X[Te])!=null&&Ye>=Ye){He=Ye;break}for(;++Te<Ne;)(Ye=X[Te])!=null&&He>Ye&&(He=Ye)}else{for(;++Te<Ne;)if((Ye=se.call(X,X[Te],Te))!=null&&Ye>=Ye){He=Ye;break}for(;++Te<Ne;)(Ye=se.call(X,X[Te],Te))!=null&&He>Ye&&(He=Ye)}return He},e.max=function(X,se){var Te=-1,Ne=X.length,He,Ye;if(arguments.length===1){for(;++Te<Ne;)if((Ye=X[Te])!=null&&Ye>=Ye){He=Ye;break}for(;++Te<Ne;)(Ye=X[Te])!=null&&Ye>He&&(He=Ye)}else{for(;++Te<Ne;)if((Ye=se.call(X,X[Te],Te))!=null&&Ye>=Ye){He=Ye;break}for(;++Te<Ne;)(Ye=se.call(X,X[Te],Te))!=null&&Ye>He&&(He=Ye)}return He},e.extent=function(X,se){var Te=-1,Ne=X.length,He,Ye,kt;if(arguments.length===1){for(;++Te<Ne;)if((Ye=X[Te])!=null&&Ye>=Ye){He=kt=Ye;break}for(;++Te<Ne;)(Ye=X[Te])!=null&&(He>Ye&&(He=Ye),kt<Ye&&(kt=Ye))}else{for(;++Te<Ne;)if((Ye=se.call(X,X[Te],Te))!=null&&Ye>=Ye){He=kt=Ye;break}for(;++Te<Ne;)(Ye=se.call(X,X[Te],Te))!=null&&(He>Ye&&(He=Ye),kt<Ye&&(kt=Ye))}return[He,kt]};function h(X){return X===null?NaN:+X}function d(X){return!isNaN(X)}e.sum=function(X,se){var Te=0,Ne=X.length,He,Ye=-1;if(arguments.length===1)for(;++Ye<Ne;)d(He=+X[Ye])&&(Te+=He);else for(;++Ye<Ne;)d(He=+se.call(X,X[Ye],Ye))&&(Te+=He);return Te},e.mean=function(X,se){var Te=0,Ne=X.length,He,Ye=-1,kt=Ne;if(arguments.length===1)for(;++Ye<Ne;)d(He=h(X[Ye]))?Te+=He:--kt;else for(;++Ye<Ne;)d(He=h(se.call(X,X[Ye],Ye)))?Te+=He:--kt;if(kt)return Te/kt},e.quantile=function(X,se){var Te=(X.length-1)*se+1,Ne=Math.floor(Te),He=+X[Ne-1],Ye=Te-Ne;return Ye?He+Ye*(X[Ne]-He):He},e.median=function(X,se){var Te=[],Ne=X.length,He,Ye=-1;if(arguments.length===1)for(;++Ye<Ne;)d(He=h(X[Ye]))&&Te.push(He);else for(;++Ye<Ne;)d(He=h(se.call(X,X[Ye],Ye)))&&Te.push(He);if(Te.length)return e.quantile(Te.sort(f),.5)},e.variance=function(X,se){var Te=X.length,Ne=0,He,Ye,kt=0,nt=-1,jt=0;if(arguments.length===1)for(;++nt<Te;)d(He=h(X[nt]))&&(Ye=He-Ne,Ne+=Ye/++jt,kt+=Ye*(He-Ne));else for(;++nt<Te;)d(He=h(se.call(X,X[nt],nt)))&&(Ye=He-Ne,Ne+=Ye/++jt,kt+=Ye*(He-Ne));if(jt>1)return kt/(jt-1)},e.deviation=function(){var X=e.variance.apply(this,arguments);return X&&Math.sqrt(X)};function v(X){return{left:function(se,Te,Ne,He){for(arguments.length<3&&(Ne=0),arguments.length<4&&(He=se.length);Ne<He;){var Ye=Ne+He>>>1;X(se[Ye],Te)<0?Ne=Ye+1:He=Ye}return Ne},right:function(se,Te,Ne,He){for(arguments.length<3&&(Ne=0),arguments.length<4&&(He=se.length);Ne<He;){var Ye=Ne+He>>>1;X(se[Ye],Te)>0?He=Ye:Ne=Ye+1}return Ne}}}var _=v(f);e.bisectLeft=_.left,e.bisect=e.bisectRight=_.right,e.bisector=function(X){return v(X.length===1?function(se,Te){return f(X(se),Te)}:X)},e.shuffle=function(X,se,Te){(Ne=arguments.length)<3&&(Te=X.length,Ne<2&&(se=0));for(var Ne=Te-se,He,Ye;Ne;)Ye=Math.random()*Ne--|0,He=X[Ne+se],X[Ne+se]=X[Ye+se],X[Ye+se]=He;return X},e.permute=function(X,se){for(var Te=se.length,Ne=new Array(Te);Te--;)Ne[Te]=X[se[Te]];return Ne},e.pairs=function(X){for(var se=0,Te=X.length-1,Ne,He=X[0],Ye=new Array(Te<0?0:Te);se<Te;)Ye[se]=[Ne=He,He=X[++se]];return Ye},e.transpose=function(X){if(!(Ye=X.length))return[];for(var se=-1,Te=e.min(X,b),Ne=new Array(Te);++se<Te;)for(var He=-1,Ye,kt=Ne[se]=new Array(Ye);++He<Ye;)kt[He]=X[He][se];return Ne};function b(X){return X.length}e.zip=function(){return e.transpose(arguments)},e.keys=function(X){var se=[];for(var Te in X)se.push(Te);return se},e.values=function(X){var se=[];for(var Te in X)se.push(X[Te]);return se},e.entries=function(X){var se=[];for(var Te in X)se.push({key:Te,value:X[Te]});return se},e.merge=function(X){for(var se=X.length,Te,Ne=-1,He=0,Ye,kt;++Ne<se;)He+=X[Ne].length;for(Ye=new Array(He);--se>=0;)for(kt=X[se],Te=kt.length;--Te>=0;)Ye[--He]=kt[Te];return Ye};var p=Math.abs;e.range=function(X,se,Te){if(arguments.length<3&&(Te=1,arguments.length<2&&(se=X,X=0)),(se-X)/Te===1/0)throw new Error("infinite range");var Ne=[],He=k(p(Te)),Ye=-1,kt;if(X*=He,se*=He,Te*=He,Te<0)for(;(kt=X+Te*++Ye)>se;)Ne.push(kt/He);else for(;(kt=X+Te*++Ye)<se;)Ne.push(kt/He);return Ne};function k(X){for(var se=1;X*se%1;)se*=10;return se}function E(X,se){for(var Te in se)Object.defineProperty(X.prototype,Te,{value:se[Te],enumerable:!1})}e.map=function(X,se){var Te=new S;if(X instanceof S)X.forEach(function(nt,jt){Te.set(nt,jt)});else if(Array.isArray(X)){var Ne=-1,He=X.length,Ye;if(arguments.length===1)for(;++Ne<He;)Te.set(Ne,X[Ne]);else for(;++Ne<He;)Te.set(se.call(X,Ye=X[Ne],Ne),Ye)}else for(var kt in X)Te.set(kt,X[kt]);return Te};function S(){this._=Object.create(null)}var L="__proto__",x="\0";E(S,{has:g,get:function(X){return this._[C(X)]},set:function(X,se){return this._[C(X)]=se},remove:P,keys:T,values:function(){var X=[];for(var se in this._)X.push(this._[se]);return X},entries:function(){var X=[];for(var se in this._)X.push({key:M(se),value:this._[se]});return X},size:z,empty:O,forEach:function(X){for(var se in this._)X.call(this,M(se),this._[se])}});function C(X){return(X+="")===L||X[0]===x?x+X:X}function M(X){return(X+="")[0]===x?X.slice(1):X}function g(X){return C(X)in this._}function P(X){return(X=C(X))in this._&&delete this._[X]}function T(){var X=[];for(var se in this._)X.push(M(se));return X}function z(){var X=0;for(var se in this._)++X;return X}function O(){for(var X in this._)return!1;return!0}e.nest=function(){var X={},se=[],Te=[],Ne,He;function Ye(nt,jt,gr){if(gr>=se.length)return He?He.call(X,jt):Ne?jt.sort(Ne):jt;for(var yr=-1,Hr=jt.length,qr=se[gr++],_i,bi,Zr,ai=new S,gi;++yr<Hr;)(gi=ai.get(_i=qr(bi=jt[yr])))?gi.push(bi):ai.set(_i,[bi]);return nt?(bi=nt(),Zr=function(Ii,Si){bi.set(Ii,Ye(nt,Si,gr))}):(bi={},Zr=function(Ii,Si){bi[Ii]=Ye(nt,Si,gr)}),ai.forEach(Zr),bi}function kt(nt,jt){if(jt>=se.length)return nt;var gr=[],yr=Te[jt++];return nt.forEach(function(Hr,qr){gr.push({key:Hr,values:kt(qr,jt)})}),yr?gr.sort(function(Hr,qr){return yr(Hr.key,qr.key)}):gr}return X.map=function(nt,jt){return Ye(jt,nt,0)},X.entries=function(nt){return kt(Ye(e.map,nt,0),0)},X.key=function(nt){return se.push(nt),X},X.sortKeys=function(nt){return Te[se.length-1]=nt,X},X.sortValues=function(nt){return Ne=nt,X},X.rollup=function(nt){return He=nt,X},X},e.set=function(X){var se=new V;if(X)for(var Te=0,Ne=X.length;Te<Ne;++Te)se.add(X[Te]);return se};function V(){this._=Object.create(null)}E(V,{has:g,add:function(X){return this._[C(X+="")]=!0,X},remove:P,values:T,size:z,empty:O,forEach:function(X){for(var se in this._)X.call(this,M(se))}}),e.behavior={};function G(X){return X}e.rebind=function(X,se){for(var Te=1,Ne=arguments.length,He;++Te<Ne;)X[He=arguments[Te]]=Z(X,se,se[He]);return X};function Z(X,se,Te){return function(){var Ne=Te.apply(se,arguments);return Ne===se?X:Ne}}function j(X,se){if(se in X)return se;se=se.charAt(0).toUpperCase()+se.slice(1);for(var Te=0,Ne=N.length;Te<Ne;++Te){var He=N[Te]+se;if(He in X)return He}}var N=["webkit","ms","moz","Moz","o","O"];function H(){}e.dispatch=function(){for(var X=new te,se=-1,Te=arguments.length;++se<Te;)X[arguments[se]]=oe(X);return X};function te(){}te.prototype.on=function(X,se){var Te=X.indexOf("."),Ne="";if(Te>=0&&(Ne=X.slice(Te+1),X=X.slice(0,Te)),X)return arguments.length<2?this[X].on(Ne):this[X].on(Ne,se);if(arguments.length===2){if(se==null)for(X in this)this.hasOwnProperty(X)&&this[X].on(Ne,null);return this}};function oe(X){var se=[],Te=new S;function Ne(){for(var He=se,Ye=-1,kt=He.length,nt;++Ye<kt;)(nt=He[Ye].on)&&nt.apply(this,arguments);return X}return Ne.on=function(He,Ye){var kt=Te.get(He),nt;return arguments.length<2?kt&&kt.on:(kt&&(kt.on=null,se=se.slice(0,nt=se.indexOf(kt)).concat(se.slice(nt+1)),Te.remove(He)),Ye&&se.push(Te.set(He,{on:Ye})),X)},Ne}e.event=null;function _e(){e.event.preventDefault()}function Ee(){for(var X=e.event,se;se=X.sourceEvent;)X=se;return X}function Ce(X){for(var se=new te,Te=0,Ne=arguments.length;++Te<Ne;)se[arguments[Te]]=oe(se);return se.of=function(He,Ye){return function(kt){try{var nt=kt.sourceEvent=e.event;kt.target=X,e.event=kt,se[kt.type].apply(He,Ye)}finally{e.event=nt}}},se}e.requote=function(X){return X.replace(me,"\\$&")};var me=/[\\\^\$\*\+\?\|\[\]\(\)\.\{\}]/g,ie={}.__proto__?function(X,se){X.__proto__=se}:function(X,se){for(var Te in se)X[Te]=se[Te]};function Se(X){return ie(X,Pe),X}var Le=function(X,se){return se.querySelector(X)},Ae=function(X,se){return se.querySelectorAll(X)},Fe=function(X,se){var Te=X.matches||X[j(X,"matchesSelector")];return Fe=function(Ne,He){return Te.call(Ne,He)},Fe(X,se)};typeof Sizzle=="function"&&(Le=function(X,se){return Sizzle(X,se)[0]||null},Ae=Sizzle,Fe=Sizzle.matchesSelector),e.selection=function(){return e.select(n.documentElement)};var Pe=e.selection.prototype=[];Pe.select=function(X){var se=[],Te,Ne,He,Ye;X=ge(X);for(var kt=-1,nt=this.length;++kt<nt;){se.push(Te=[]),Te.parentNode=(He=this[kt]).parentNode;for(var jt=-1,gr=He.length;++jt<gr;)(Ye=He[jt])?(Te.push(Ne=X.call(Ye,Ye.__data__,jt,kt)),Ne&&"__data__"in Ye&&(Ne.__data__=Ye.__data__)):Te.push(null)}return Se(se)};function ge(X){return typeof X=="function"?X:function(){return Le(X,this)}}Pe.selectAll=function(X){var se=[],Te,Ne;X=Re(X);for(var He=-1,Ye=this.length;++He<Ye;)for(var kt=this[He],nt=-1,jt=kt.length;++nt<jt;)(Ne=kt[nt])&&(se.push(Te=r(X.call(Ne,Ne.__data__,nt,He))),Te.parentNode=Ne);return Se(se)};function Re(X){return typeof X=="function"?X:function(){return Ae(X,this)}}var ce="http://www.w3.org/1999/xhtml",Ze={svg:"http://www.w3.org/2000/svg",xhtml:ce,xlink:"http://www.w3.org/1999/xlink",xml:"http://www.w3.org/XML/1998/namespace",xmlns:"http://www.w3.org/2000/xmlns/"};e.ns={prefix:Ze,qualify:function(X){var se=X.indexOf(":"),Te=X;return se>=0&&(Te=X.slice(0,se))!=="xmlns"&&(X=X.slice(se+1)),Ze.hasOwnProperty(Te)?{space:Ze[Te],local:X}:X}},Pe.attr=function(X,se){if(arguments.length<2){if(typeof X=="string"){var Te=this.node();return X=e.ns.qualify(X),X.local?Te.getAttributeNS(X.space,X.local):Te.getAttribute(X)}for(se in X)this.each(ut(se,X[se]));return this}return this.each(ut(X,se))};function ut(X,se){X=e.ns.qualify(X);function Te(){this.removeAttribute(X)}function Ne(){this.removeAttributeNS(X.space,X.local)}function He(){this.setAttribute(X,se)}function Ye(){this.setAttributeNS(X.space,X.local,se)}function kt(){var jt=se.apply(this,arguments);jt==null?this.removeAttribute(X):this.setAttribute(X,jt)}function nt(){var jt=se.apply(this,arguments);jt==null?this.removeAttributeNS(X.space,X.local):this.setAttributeNS(X.space,X.local,jt)}return se==null?X.local?Ne:Te:typeof se=="function"?X.local?nt:kt:X.local?Ye:He}function pt(X){return X.trim().replace(/\s+/g," ")}Pe.classed=function(X,se){if(arguments.length<2){if(typeof X=="string"){var Te=this.node(),Ne=(X=st(X)).length,He=-1;if(se=Te.classList){for(;++He<Ne;)if(!se.contains(X[He]))return!1}else for(se=Te.getAttribute("class");++He<Ne;)if(!Zt(X[He]).test(se))return!1;return!0}for(se in X)this.each(lt(se,X[se]));return this}return this.each(lt(X,se))};function Zt(X){return new RegExp("(?:^|\\s+)"+e.requote(X)+"(?:\\s+|$)","g")}function st(X){return(X+"").trim().split(/^|\s+/)}function lt(X,se){X=st(X).map(Gt);var Te=X.length;function Ne(){for(var Ye=-1;++Ye<Te;)X[Ye](this,se)}function He(){for(var Ye=-1,kt=se.apply(this,arguments);++Ye<Te;)X[Ye](this,kt)}return typeof se=="function"?He:Ne}function Gt(X){var se=Zt(X);return function(Te,Ne){if(He=Te.classList)return Ne?He.add(X):He.remove(X);var He=Te.getAttribute("class")||"";Ne?(se.lastIndex=0,se.test(He)||Te.setAttribute("class",pt(He+" "+X))):Te.setAttribute("class",pt(He.replace(se," ")))}}Pe.style=function(X,se,Te){var Ne=arguments.length;if(Ne<3){if(typeof X!="string"){Ne<2&&(se="");for(Te in X)this.each(Nt(Te,X[Te],se));return this}if(Ne<2){var He=this.node();return a(He).getComputedStyle(He,null).getPropertyValue(X)}Te=""}return this.each(Nt(X,se,Te))};function Nt(X,se,Te){function Ne(){this.style.removeProperty(X)}function He(){this.style.setProperty(X,se,Te)}function Ye(){var kt=se.apply(this,arguments);kt==null?this.style.removeProperty(X):this.style.setProperty(X,kt,Te)}return se==null?Ne:typeof se=="function"?Ye:He}Pe.property=function(X,se){if(arguments.length<2){if(typeof X=="string")return this.node()[X];for(se in X)this.each(Jt(se,X[se]));return this}return this.each(Jt(X,se))};function Jt(X,se){function Te(){delete this[X]}function Ne(){this[X]=se}function He(){var Ye=se.apply(this,arguments);Ye==null?delete this[X]:this[X]=Ye}return se==null?Te:typeof se=="function"?He:Ne}Pe.text=function(X){return arguments.length?this.each(typeof X=="function"?function(){var se=X.apply(this,arguments);this.textContent=se==null?"":se}:X==null?function(){this.textContent=""}:function(){this.textContent=X}):this.node().textContent},Pe.html=function(X){return arguments.length?this.each(typeof X=="function"?function(){var se=X.apply(this,arguments);this.innerHTML=se==null?"":se}:X==null?function(){this.innerHTML=""}:function(){this.innerHTML=X}):this.node().innerHTML},Pe.append=function(X){return X=sr(X),this.select(function(){return this.appendChild(X.apply(this,arguments))})};function sr(X){function se(){var Ne=this.ownerDocument,He=this.namespaceURI;return He===ce&&Ne.documentElement.namespaceURI===ce?Ne.createElement(X):Ne.createElementNS(He,X)}function Te(){return this.ownerDocument.createElementNS(X.space,X.local)}return typeof X=="function"?X:(X=e.ns.qualify(X)).local?Te:se}Pe.insert=function(X,se){return X=sr(X),se=ge(se),this.select(function(){return this.insertBefore(X.apply(this,arguments),se.apply(this,arguments)||null)})},Pe.remove=function(){return this.each(wr)};function wr(){var X=this.parentNode;X&&X.removeChild(this)}Pe.data=function(X,se){var Te=-1,Ne=this.length,He,Ye;if(!arguments.length){for(X=new Array(Ne=(He=this[0]).length);++Te<Ne;)(Ye=He[Te])&&(X[Te]=Ye.__data__);return X}function kt(yr,Hr){var qr,_i=yr.length,bi=Hr.length,Zr=Math.min(_i,bi),ai=new Array(bi),gi=new Array(bi),Ii=new Array(_i),Si,ei;if(se){var Ln=new S,En=new Array(_i),Un;for(qr=-1;++qr<_i;)(Si=yr[qr])&&(Ln.has(Un=se.call(Si,Si.__data__,qr))?Ii[qr]=Si:Ln.set(Un,Si),En[qr]=Un);for(qr=-1;++qr<bi;)(Si=Ln.get(Un=se.call(Hr,ei=Hr[qr],qr)))?Si!==!0&&(ai[qr]=Si,Si.__data__=ei):gi[qr]=cr(ei),Ln.set(Un,!0);for(qr=-1;++qr<_i;)qr in En&&Ln.get(En[qr])!==!0&&(Ii[qr]=yr[qr])}else{for(qr=-1;++qr<Zr;)Si=yr[qr],ei=Hr[qr],Si?(Si.__data__=ei,ai[qr]=Si):gi[qr]=cr(ei);for(;qr<bi;++qr)gi[qr]=cr(Hr[qr]);for(;qr<_i;++qr)Ii[qr]=yr[qr]}gi.update=ai,gi.parentNode=ai.parentNode=Ii.parentNode=yr.parentNode,nt.push(gi),jt.push(ai),gr.push(Ii)}var nt=Vt([]),jt=Se([]),gr=Se([]);if(typeof X=="function")for(;++Te<Ne;)kt(He=this[Te],X.call(He,He.parentNode.__data__,Te));else for(;++Te<Ne;)kt(He=this[Te],X);return jt.enter=function(){return nt},jt.exit=function(){return gr},jt};function cr(X){return{__data__:X}}Pe.datum=function(X){return arguments.length?this.property("__data__",X):this.property("__data__")},Pe.filter=function(X){var se=[],Te,Ne,He;typeof X!="function"&&(X=$e(X));for(var Ye=0,kt=this.length;Ye<kt;Ye++){se.push(Te=[]),Te.parentNode=(Ne=this[Ye]).parentNode;for(var nt=0,jt=Ne.length;nt<jt;nt++)(He=Ne[nt])&&X.call(He,He.__data__,nt,Ye)&&Te.push(He)}return Se(se)};function $e(X){return function(){return Fe(this,X)}}Pe.order=function(){for(var X=-1,se=this.length;++X<se;)for(var Te=this[X],Ne=Te.length-1,He=Te[Ne],Ye;--Ne>=0;)(Ye=Te[Ne])&&(He&&He!==Ye.nextSibling&&He.parentNode.insertBefore(Ye,He),He=Ye);return this},Pe.sort=function(X){X=St.apply(this,arguments);for(var se=-1,Te=this.length;++se<Te;)this[se].sort(X);return this.order()};function St(X){return arguments.length||(X=f),function(se,Te){return se&&Te?X(se.__data__,Te.__data__):!se-!Te}}Pe.each=function(X){return Qt(this,function(se,Te,Ne){X.call(se,se.__data__,Te,Ne)})};function Qt(X,se){for(var Te=0,Ne=X.length;Te<Ne;Te++)for(var He=X[Te],Ye=0,kt=He.length,nt;Ye<kt;Ye++)(nt=He[Ye])&&se(nt,Ye,Te);return X}Pe.call=function(X){var se=r(arguments);return X.apply(se[0]=this,se),this},Pe.empty=function(){return!this.node()},Pe.node=function(){for(var X=0,se=this.length;X<se;X++)for(var Te=this[X],Ne=0,He=Te.length;Ne<He;Ne++){var Ye=Te[Ne];if(Ye)return Ye}return null},Pe.size=function(){var X=0;return Qt(this,function(){++X}),X};function Vt(X){return ie(X,_t),X}var _t=[];e.selection.enter=Vt,e.selection.enter.prototype=_t,_t.append=Pe.append,_t.empty=Pe.empty,_t.node=Pe.node,_t.call=Pe.call,_t.size=Pe.size,_t.select=function(X){for(var se=[],Te,Ne,He,Ye,kt,nt=-1,jt=this.length;++nt<jt;){He=(Ye=this[nt]).update,se.push(Te=[]),Te.parentNode=Ye.parentNode;for(var gr=-1,yr=Ye.length;++gr<yr;)(kt=Ye[gr])?(Te.push(He[gr]=Ne=X.call(Ye.parentNode,kt.__data__,gr,nt)),Ne.__data__=kt.__data__):Te.push(null)}return Se(se)},_t.insert=function(X,se){return arguments.length<2&&(se=It(this)),Pe.insert.call(this,X,se)};function It(X){var se,Te;return function(Ne,He,Ye){var kt=X[Ye].update,nt=kt.length,jt;for(Ye!=Te&&(Te=Ye,se=0),He>=se&&(se=He+1);!(jt=kt[se])&&++se<nt;);return jt}}e.select=function(X){var se;return typeof X=="string"?(se=[Le(X,n)],se.parentNode=n.documentElement):(se=[X],se.parentNode=i(X)),Se([se])},e.selectAll=function(X){var se;return typeof X=="string"?(se=r(Ae(X,n)),se.parentNode=n.documentElement):(se=r(X),se.parentNode=null),Se([se])},Pe.on=function(X,se,Te){var Ne=arguments.length;if(Ne<3){if(typeof X!="string"){Ne<2&&(se=!1);for(Te in X)this.each(mt(Te,X[Te],se));return this}if(Ne<2)return(Ne=this.node()["__on"+X])&&Ne._;Te=!1}return this.each(mt(X,se,Te))};function mt(X,se,Te){var Ne="__on"+X,He=X.indexOf("."),Ye=lr;He>0&&(X=X.slice(0,He));var kt=er.get(X);kt&&(X=kt,Ye=Tr);function nt(){var yr=this[Ne];yr&&(this.removeEventListener(X,yr,yr.$),delete this[Ne])}function jt(){var yr=Ye(se,r(arguments));nt.call(this),this.addEventListener(X,this[Ne]=yr,yr.$=Te),yr._=se}function gr(){var yr=new RegExp("^__on([^.]+)"+e.requote(X)+"$"),Hr;for(var qr in this)if(Hr=qr.match(yr)){var _i=this[qr];this.removeEventListener(Hr[1],_i,_i.$),delete this[qr]}}return He?se?jt:nt:se?H:gr}var er=e.map({mouseenter:"mouseover",mouseleave:"mouseout"});n&&er.forEach(function(X){"on"+X in n&&er.remove(X)});function lr(X,se){return function(Te){var Ne=e.event;e.event=Te,se[0]=this.__data__;try{X.apply(this,se)}finally{e.event=Ne}}}function Tr(X,se){var Te=lr(X,se);return function(Ne){var He=this,Ye=Ne.relatedTarget;(!Ye||Ye!==He&&!(Ye.compareDocumentPosition(He)&8))&&Te.call(He,Ne)}}var Lr,ti=0;function Br(X){var se=".dragsuppress-"+ ++ti,Te="click"+se,Ne=e.select(a(X)).on("touchmove"+se,_e).on("dragstart"+se,_e).on("selectstart"+se,_e);if(Lr==null&&(Lr="onselectstart"in X?!1:j(X.style,"userSelect")),Lr){var He=i(X).style,Ye=He[Lr];He[Lr]="none"}return function(kt){if(Ne.on(se,null),Lr&&(He[Lr]=Ye),kt){var nt=function(){Ne.on(Te,null)};Ne.on(Te,function(){_e(),nt()},!0),setTimeout(nt,0)}}}e.mouse=function(X){return dt(X,Ee())};var Vr=this.navigator&&/WebKit/.test(this.navigator.userAgent)?-1:0;function dt(X,se){se.changedTouches&&(se=se.changedTouches[0]);var Te=X.ownerSVGElement||X;if(Te.createSVGPoint){var Ne=Te.createSVGPoint();if(Vr<0){var He=a(X);if(He.scrollX||He.scrollY){Te=e.select("body").append("svg").style({position:"absolute",top:0,left:0,margin:0,padding:0,border:"none"},"important");var Ye=Te[0][0].getScreenCTM();Vr=!(Ye.f||Ye.e),Te.remove()}}return Vr?(Ne.x=se.pageX,Ne.y=se.pageY):(Ne.x=se.clientX,Ne.y=se.clientY),Ne=Ne.matrixTransform(X.getScreenCTM().inverse()),[Ne.x,Ne.y]}var kt=X.getBoundingClientRect();return[se.clientX-kt.left-X.clientLeft,se.clientY-kt.top-X.clientTop]}e.touch=function(X,se,Te){if(arguments.length<3&&(Te=se,se=Ee().changedTouches),se){for(var Ne=0,He=se.length,Ye;Ne<He;++Ne)if((Ye=se[Ne]).identifier===Te)return dt(X,Ye)}},e.behavior.drag=function(){var X=Ce(He,"drag","dragstart","dragend"),se=null,Te=Ye(H,e.mouse,a,"mousemove","mouseup"),Ne=Ye(Ge,e.touch,G,"touchmove","touchend");function He(){this.on("mousedown.drag",Te).on("touchstart.drag",Ne)}function Ye(kt,nt,jt,gr,yr){return function(){var Hr=this,qr=e.event.target.correspondingElement||e.event.target,_i=Hr.parentNode,bi=X.of(Hr,arguments),Zr=0,ai=kt(),gi=".drag"+(ai==null?"":"-"+ai),Ii,Si=e.select(jt(qr)).on(gr+gi,En).on(yr+gi,Un),ei=Br(qr),Ln=nt(_i,ai);se?(Ii=se.apply(Hr,arguments),Ii=[Ii.x-Ln[0],Ii.y-Ln[1]]):Ii=[0,0],bi({type:"dragstart"});function En(){var ia=nt(_i,ai),Ea,Ia;ia&&(Ea=ia[0]-Ln[0],Ia=ia[1]-Ln[1],Zr|=Ea|Ia,Ln=ia,bi({type:"drag",x:ia[0]+Ii[0],y:ia[1]+Ii[1],dx:Ea,dy:Ia}))}function Un(){nt(_i,ai)&&(Si.on(gr+gi,null).on(yr+gi,null),ei(Zr),bi({type:"dragend"}))}}}return He.origin=function(kt){return arguments.length?(se=kt,He):se},e.rebind(He,X,"on")};function Ge(){return e.event.changedTouches[0].identifier}e.touches=function(X,se){return arguments.length<2&&(se=Ee().touches),se?r(se).map(function(Te){var Ne=dt(X,Te);return Ne.identifier=Te.identifier,Ne}):[]};var Je=1e-6,je=Je*Je,tt=Math.PI,xt=2*tt,Ie=xt-Je,xe=tt/2,ke=tt/180,vt=180/tt;function ir(X){return X>0?1:X<0?-1:0}function ar(X,se,Te){return(se[0]-X[0])*(Te[1]-X[1])-(se[1]-X[1])*(Te[0]-X[0])}function vr(X){return X>1?0:X<-1?tt:Math.acos(X)}function ii(X){return X>1?xe:X<-1?-xe:Math.asin(X)}function pi(X){return((X=Math.exp(X))-1/X)/2}function $r(X){return((X=Math.exp(X))+1/X)/2}function di(X){return((X=Math.exp(2*X))-1)/(X+1)}function ji(X){return(X=Math.sin(X/2))*X}var In=Math.SQRT2,wi=2,On=4;e.interpolateZoom=function(X,se){var Te=X[0],Ne=X[1],He=X[2],Ye=se[0],kt=se[1],nt=se[2],jt=Ye-Te,gr=kt-Ne,yr=jt*jt+gr*gr,Hr,qr;if(yr<je)qr=Math.log(nt/He)/In,Hr=function(Ii){return[Te+Ii*jt,Ne+Ii*gr,He*Math.exp(In*Ii*qr)]};else{var _i=Math.sqrt(yr),bi=(nt*nt-He*He+On*yr)/(2*He*wi*_i),Zr=(nt*nt-He*He-On*yr)/(2*nt*wi*_i),ai=Math.log(Math.sqrt(bi*bi+1)-bi),gi=Math.log(Math.sqrt(Zr*Zr+1)-Zr);qr=(gi-ai)/In,Hr=function(Ii){var Si=Ii*qr,ei=$r(ai),Ln=He/(wi*_i)*(ei*di(In*Si+ai)-pi(ai));return[Te+Ln*jt,Ne+Ln*gr,He*ei/$r(In*Si+ai)]}}return Hr.duration=qr*1e3,Hr},e.behavior.zoom=function(){var X={x:0,y:0,k:1},se,Te,Ne,He=[960,500],Ye=qn,kt=250,nt=0,jt="mousedown.zoom",gr="mousemove.zoom",yr="mouseup.zoom",Hr,qr="touchstart.zoom",_i,bi=Ce(Si,"zoomstart","zoom","zoomend"),Zr,ai,gi,Ii;ra||(ra="onwheel"in n?(Fn=function(){return-e.event.deltaY*(e.event.deltaMode?120:1)},"wheel"):"onmousewheel"in n?(Fn=function(){return e.event.wheelDelta},"mousewheel"):(Fn=function(){return-e.event.detail},"MozMousePixelScroll"));function Si(Gn){Gn.on(jt,go).on(ra+".zoom",Ms).on("dblclick.zoom",Xs).on(qr,Is)}Si.event=function(Gn){Gn.each(function(){var ja=bi.of(this,arguments),Fo=X;Bo?e.select(this).transition().each("start.zoom",function(){X=this.__chart__||{x:0,y:0,k:1},Ia(ja)}).tween("zoom:zoom",function(){var Uo=He[0],$s=He[1],Sl=Te?Te[0]:Uo/2,bu=Te?Te[1]:$s/2,dl=e.interpolateZoom([(Sl-X.x)/X.k,(bu-X.y)/X.k,Uo/X.k],[(Sl-Fo.x)/Fo.k,(bu-Fo.y)/Fo.k,Uo/Fo.k]);return function(Sc){var Me=dl(Sc),bt=Uo/Me[2];this.__chart__=X={x:Sl-Me[0]*bt,y:bu-Me[1]*bt,k:bt},yo(ja)}}).each("interrupt.zoom",function(){Da(ja)}).each("end.zoom",function(){Da(ja)}):(this.__chart__=X,Ia(ja),yo(ja),Da(ja))})},Si.translate=function(Gn){return arguments.length?(X={x:+Gn[0],y:+Gn[1],k:X.k},Ea(),Si):[X.x,X.y]},Si.scale=function(Gn){return arguments.length?(X={x:X.x,y:X.y,k:null},En(+Gn),Ea(),Si):X.k},Si.scaleExtent=function(Gn){return arguments.length?(Ye=Gn==null?qn:[+Gn[0],+Gn[1]],Si):Ye},Si.center=function(Gn){return arguments.length?(Ne=Gn&&[+Gn[0],+Gn[1]],Si):Ne},Si.size=function(Gn){return arguments.length?(He=Gn&&[+Gn[0],+Gn[1]],Si):He},Si.duration=function(Gn){return arguments.length?(kt=+Gn,Si):kt},Si.x=function(Gn){return arguments.length?(ai=Gn,Zr=Gn.copy(),X={x:0,y:0,k:1},Si):ai},Si.y=function(Gn){return arguments.length?(Ii=Gn,gi=Gn.copy(),X={x:0,y:0,k:1},Si):Ii};function ei(Gn){return[(Gn[0]-X.x)/X.k,(Gn[1]-X.y)/X.k]}function Ln(Gn){return[Gn[0]*X.k+X.x,Gn[1]*X.k+X.y]}function En(Gn){X.k=Math.max(Ye[0],Math.min(Ye[1],Gn))}function Un(Gn,ja){ja=Ln(ja),X.x+=Gn[0]-ja[0],X.y+=Gn[1]-ja[1]}function ia(Gn,ja,Fo,Uo){Gn.__chart__={x:X.x,y:X.y,k:X.k},En(Math.pow(2,Uo)),Un(Te=ja,Fo),Gn=e.select(Gn),kt>0&&(Gn=Gn.transition().duration(kt)),Gn.call(Si.event)}function Ea(){ai&&ai.domain(Zr.range().map(function(Gn){return(Gn-X.x)/X.k}).map(Zr.invert)),Ii&&Ii.domain(gi.range().map(function(Gn){return(Gn-X.y)/X.k}).map(gi.invert))}function Ia(Gn){nt++||Gn({type:"zoomstart"})}function yo(Gn){Ea(),Gn({type:"zoom",scale:X.k,translate:[X.x,X.y]})}function Da(Gn){--nt||(Gn({type:"zoomend"}),Te=null)}function go(){var Gn=this,ja=bi.of(Gn,arguments),Fo=0,Uo=e.select(a(Gn)).on(gr,bu).on(yr,dl),$s=ei(e.mouse(Gn)),Sl=Br(Gn);fa.call(Gn),Ia(ja);function bu(){Fo=1,Un(e.mouse(Gn),$s),yo(ja)}function dl(){Uo.on(gr,null).on(yr,null),Sl(Fo),Da(ja)}}function Is(){var Gn=this,ja=bi.of(Gn,arguments),Fo={},Uo=0,$s,Sl=".zoom-"+e.event.changedTouches[0].identifier,bu="touchmove"+Sl,dl="touchend"+Sl,Sc=[],Me=e.select(Gn),bt=Br(Gn);Rr(),Ia(ja),Me.on(jt,null).on(qr,Rr);function zt(){var Gr=e.touches(Gn);return $s=X.k,Gr.forEach(function(mi){mi.identifier in Fo&&(Fo[mi.identifier]=ei(mi))}),Gr}function Rr(){var Gr=e.event.target;e.select(Gr).on(bu,jr).on(dl,Nr),Sc.push(Gr);for(var mi=e.event.changedTouches,Ui=0,qi=mi.length;Ui<qi;++Ui)Fo[mi[Ui].identifier]=null;var Ei=zt(),Hn=Date.now();if(Ei.length===1){if(Hn-_i<500){var en=Ei[0];ia(Gn,en,Fo[en.identifier],Math.floor(Math.log(X.k)/Math.LN2)+1),_e()}_i=Hn}else if(Ei.length>1){var en=Ei[0],Wi=Ei[1],si=en[0]-Wi[0],Mr=en[1]-Wi[1];Uo=si*si+Mr*Mr}}function jr(){var Gr=e.touches(Gn),mi,Ui,qi,Ei;fa.call(Gn);for(var Hn=0,en=Gr.length;Hn<en;++Hn,Ei=null)if(qi=Gr[Hn],Ei=Fo[qi.identifier]){if(Ui)break;mi=qi,Ui=Ei}if(Ei){var Wi=(Wi=qi[0]-mi[0])*Wi+(Wi=qi[1]-mi[1])*Wi,si=Uo&&Math.sqrt(Wi/Uo);mi=[(mi[0]+qi[0])/2,(mi[1]+qi[1])/2],Ui=[(Ui[0]+Ei[0])/2,(Ui[1]+Ei[1])/2],En(si*$s)}_i=null,Un(mi,Ui),yo(ja)}function Nr(){if(e.event.touches.length){for(var Gr=e.event.changedTouches,mi=0,Ui=Gr.length;mi<Ui;++mi)delete Fo[Gr[mi].identifier];for(var qi in Fo)return void zt()}e.selectAll(Sc).on(Sl,null),Me.on(jt,go).on(qr,Is),bt(),Da(ja)}}function Ms(){var Gn=bi.of(this,arguments);Hr?clearTimeout(Hr):(fa.call(this),se=ei(Te=Ne||e.mouse(this)),Ia(Gn)),Hr=setTimeout(function(){Hr=null,Da(Gn)},50),_e(),En(Math.pow(2,Fn()*.002)*X.k),Un(Te,se),yo(Gn)}function Xs(){var Gn=e.mouse(this),ja=Math.log(X.k)/Math.LN2;ia(this,Gn,ei(Gn),e.event.shiftKey?Math.ceil(ja)-1:Math.floor(ja)+1)}return e.rebind(Si,bi,"on")};var qn=[0,1/0],Fn,ra;e.color=la;function la(){}la.prototype.toString=function(){return this.rgb()+""},e.hsl=Ut;function Ut(X,se,Te){return this instanceof Ut?(this.h=+X,this.s=+se,void(this.l=+Te)):arguments.length<2?X instanceof Ut?new Ut(X.h,X.s,X.l):Ha(""+X,vo,Ut):new Ut(X,se,Te)}var wt=Ut.prototype=new la;wt.brighter=function(X){return X=Math.pow(.7,arguments.length?X:1),new Ut(this.h,this.s,this.l/X)},wt.darker=function(X){return X=Math.pow(.7,arguments.length?X:1),new Ut(this.h,this.s,X*this.l)},wt.rgb=function(){return rr(this.h,this.s,this.l)};function rr(X,se,Te){var Ne,He;X=isNaN(X)?0:(X%=360)<0?X+360:X,se=isNaN(se)||se<0?0:se>1?1:se,Te=Te<0?0:Te>1?1:Te,He=Te<=.5?Te*(1+se):Te+se-Te*se,Ne=2*Te-He;function Ye(nt){return nt>360?nt-=360:nt<0&&(nt+=360),nt<60?Ne+(He-Ne)*nt/60:nt<180?He:nt<240?Ne+(He-Ne)*(240-nt)/60:Ne}function kt(nt){return Math.round(Ye(nt)*255)}return new Wa(kt(X+120),kt(X),kt(X-120))}e.hcl=nr;function nr(X,se,Te){return this instanceof nr?(this.h=+X,this.c=+se,void(this.l=+Te)):arguments.length<2?X instanceof nr?new nr(X.h,X.c,X.l):X instanceof ri?Sn(X.l,X.a,X.b):Sn((X=jn((X=e.rgb(X)).r,X.g,X.b)).l,X.a,X.b):new nr(X,se,Te)}var Er=nr.prototype=new la;Er.brighter=function(X){return new nr(this.h,this.c,Math.min(100,this.l+Qr*(arguments.length?X:1)))},Er.darker=function(X){return new nr(this.h,this.c,Math.max(0,this.l-Qr*(arguments.length?X:1)))},Er.rgb=function(){return Xr(this.h,this.c,this.l).rgb()};function Xr(X,se,Te){return isNaN(X)&&(X=0),isNaN(se)&&(se=0),new ri(Te,Math.cos(X*=ke)*se,Math.sin(X)*se)}e.lab=ri;function ri(X,se,Te){return this instanceof ri?(this.l=+X,this.a=+se,void(this.b=+Te)):arguments.length<2?X instanceof ri?new ri(X.l,X.a,X.b):X instanceof nr?Xr(X.h,X.c,X.l):jn((X=Wa(X)).r,X.g,X.b):new ri(X,se,Te)}var Qr=18,Oi=.95047,$i=1,tn=1.08883,fn=ri.prototype=new la;fn.brighter=function(X){return new ri(Math.min(100,this.l+Qr*(arguments.length?X:1)),this.a,this.b)},fn.darker=function(X){return new ri(Math.max(0,this.l-Qr*(arguments.length?X:1)),this.a,this.b)},fn.rgb=function(){return yn(this.l,this.a,this.b)};function yn(X,se,Te){var Ne=(X+16)/116,He=Ne+se/500,Ye=Ne-Te/200;return He=Ba(He)*Oi,Ne=Ba(Ne)*$i,Ye=Ba(Ye)*tn,new Wa(ma(3.2404542*He-1.5371385*Ne-.4985314*Ye),ma(-.969266*He+1.8760108*Ne+.041556*Ye),ma(.0556434*He-.2040259*Ne+1.0572252*Ye))}function Sn(X,se,Te){return X>0?new nr(Math.atan2(Te,se)*vt,Math.sqrt(se*se+Te*Te),X):new nr(NaN,NaN,X)}function Ba(X){return X>.206893034?X*X*X:(X-4/29)/7.787037}function ua(X){return X>.008856?Math.pow(X,1/3):7.787037*X+4/29}function ma(X){return Math.round(255*(X<=.00304?12.92*X:1.055*Math.pow(X,1/2.4)-.055))}e.rgb=Wa;function Wa(X,se,Te){return this instanceof Wa?(this.r=~~X,this.g=~~se,void(this.b=~~Te)):arguments.length<2?X instanceof Wa?new Wa(X.r,X.g,X.b):Ha(""+X,Wa,rr):new Wa(X,se,Te)}function Fa(X){return new Wa(X>>16,X>>8&255,X&255)}function Wo(X){return Fa(X)+""}var da=Wa.prototype=new la;da.brighter=function(X){X=Math.pow(.7,arguments.length?X:1);var se=this.r,Te=this.g,Ne=this.b,He=30;return!se&&!Te&&!Ne?new Wa(He,He,He):(se&&se<He&&(se=He),Te&&Te<He&&(Te=He),Ne&&Ne<He&&(Ne=He),new Wa(Math.min(255,se/X),Math.min(255,Te/X),Math.min(255,Ne/X)))},da.darker=function(X){return X=Math.pow(.7,arguments.length?X:1),new Wa(X*this.r,X*this.g,X*this.b)},da.hsl=function(){return vo(this.r,this.g,this.b)},da.toString=function(){return"#"+Wn(this.r)+Wn(this.g)+Wn(this.b)};function Wn(X){return X<16?"0"+Math.max(0,X).toString(16):Math.min(255,X).toString(16)}function Ha(X,se,Te){var Ne=0,He=0,Ye=0,kt,nt,jt;if(kt=/([a-z]+)\((.*)\)/.exec(X=X.toLowerCase()),kt)switch(nt=kt[2].split(","),kt[1]){case"hsl":return Te(parseFloat(nt[0]),parseFloat(nt[1])/100,parseFloat(nt[2])/100);case"rgb":return se(kr(nt[0]),kr(nt[1]),kr(nt[2]))}return(jt=Jr.get(X))?se(jt.r,jt.g,jt.b):(X!=null&&X.charAt(0)==="#"&&!isNaN(jt=parseInt(X.slice(1),16))&&(X.length===4?(Ne=(jt&3840)>>4,Ne=Ne>>4|Ne,He=jt&240,He=He>>4|He,Ye=jt&15,Ye=Ye<<4|Ye):X.length===7&&(Ne=(jt&16711680)>>16,He=(jt&65280)>>8,Ye=jt&255)),se(Ne,He,Ye))}function vo(X,se,Te){var Ne=Math.min(X/=255,se/=255,Te/=255),He=Math.max(X,se,Te),Ye=He-Ne,kt,nt,jt=(He+Ne)/2;return Ye?(nt=jt<.5?Ye/(He+Ne):Ye/(2-He-Ne),X==He?kt=(se-Te)/Ye+(se<Te?6:0):se==He?kt=(Te-X)/Ye+2:kt=(X-se)/Ye+4,kt*=60):(kt=NaN,nt=jt>0&&jt<1?0:kt),new Ut(kt,nt,jt)}function jn(X,se,Te){X=Mt(X),se=Mt(se),Te=Mt(Te);var Ne=ua((.4124564*X+.3575761*se+.1804375*Te)/Oi),He=ua((.2126729*X+.7151522*se+.072175*Te)/$i),Ye=ua((.0193339*X+.119192*se+.9503041*Te)/tn);return ri(116*He-16,500*(Ne-He),200*(He-Ye))}function Mt(X){return(X/=255)<=.04045?X/12.92:Math.pow((X+.055)/1.055,2.4)}function kr(X){var se=parseFloat(X);return X.charAt(X.length-1)==="%"?Math.round(se*2.55):se}var Jr=e.map({aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074});Jr.forEach(function(X,se){Jr.set(X,Fa(se))});function vi(X){return typeof X=="function"?X:function(){return X}}e.functor=vi,e.xhr=hn(G);function hn(X){return function(se,Te,Ne){return arguments.length===2&&typeof Te=="function"&&(Ne=Te,Te=null),An(se,Te,X,Ne)}}function An(X,se,Te,Ne){var He={},Ye=e.dispatch("beforesend","progress","load","error"),kt={},nt=new XMLHttpRequest,jt=null;self.XDomainRequest&&!("withCredentials"in nt)&&/^(http(s)?:)?\/\//.test(X)&&(nt=new XDomainRequest),"onload"in nt?nt.onload=nt.onerror=gr:nt.onreadystatechange=function(){nt.readyState>3&&gr()};function gr(){var yr=nt.status,Hr;if(!yr&&Li(nt)||yr>=200&&yr<300||yr===304){try{Hr=Te.call(He,nt)}catch(qr){Ye.error.call(He,qr);return}Ye.load.call(He,Hr)}else Ye.error.call(He,nt)}return nt.onprogress=function(yr){var Hr=e.event;e.event=yr;try{Ye.progress.call(He,nt)}finally{e.event=Hr}},He.header=function(yr,Hr){return yr=(yr+"").toLowerCase(),arguments.length<2?kt[yr]:(Hr==null?delete kt[yr]:kt[yr]=Hr+"",He)},He.mimeType=function(yr){return arguments.length?(se=yr==null?null:yr+"",He):se},He.responseType=function(yr){return arguments.length?(jt=yr,He):jt},He.response=function(yr){return Te=yr,He},["get","post"].forEach(function(yr){He[yr]=function(){return He.send.apply(He,[yr].concat(r(arguments)))}}),He.send=function(yr,Hr,qr){if(arguments.length===2&&typeof Hr=="function"&&(qr=Hr,Hr=null),nt.open(yr,X,!0),se!=null&&!("accept"in kt)&&(kt.accept=se+",*/*"),nt.setRequestHeader)for(var _i in kt)nt.setRequestHeader(_i,kt[_i]);return se!=null&&nt.overrideMimeType&&nt.overrideMimeType(se),jt!=null&&(nt.responseType=jt),qr!=null&&He.on("error",qr).on("load",function(bi){qr(null,bi)}),Ye.beforesend.call(He,nt),nt.send(Hr==null?null:Hr),He},He.abort=function(){return nt.abort(),He},e.rebind(He,Ye,"on"),Ne==null?He:He.get(Mn(Ne))}function Mn(X){return X.length===1?function(se,Te){X(se==null?Te:null)}:X}function Li(X){var se=X.responseType;return se&&se!=="text"?X.response:X.responseText}e.dsv=function(X,se){var Te=new RegExp('["'+X+`
+]`),Ne=X.charCodeAt(0);function He(gr,yr,Hr){arguments.length<3&&(Hr=yr,yr=null);var qr=An(gr,se,yr==null?Ye:kt(yr),Hr);return qr.row=function(_i){return arguments.length?qr.response((yr=_i)==null?Ye:kt(_i)):yr},qr}function Ye(gr){return He.parse(gr.responseText)}function kt(gr){return function(yr){return He.parse(yr.responseText,gr)}}He.parse=function(gr,yr){var Hr;return He.parseRows(gr,function(qr,_i){if(Hr)return Hr(qr,_i-1);var bi=function(Zr){for(var ai={},gi=qr.length,Ii=0;Ii<gi;++Ii)ai[qr[Ii]]=Zr[Ii];return ai};Hr=yr?function(Zr,ai){return yr(bi(Zr),ai)}:bi})},He.parseRows=function(gr,yr){var Hr={},qr={},_i=[],bi=gr.length,Zr=0,ai=0,gi,Ii;function Si(){if(Zr>=bi)return qr;if(Ii)return Ii=!1,Hr;var Ln=Zr;if(gr.charCodeAt(Ln)===34){for(var En=Ln;En++<bi;)if(gr.charCodeAt(En)===34){if(gr.charCodeAt(En+1)!==34)break;++En}Zr=En+2;var Un=gr.charCodeAt(En+1);return Un===13?(Ii=!0,gr.charCodeAt(En+2)===10&&++Zr):Un===10&&(Ii=!0),gr.slice(Ln+1,En).replace(/""/g,'"')}for(;Zr<bi;){var Un=gr.charCodeAt(Zr++),ia=1;if(Un===10)Ii=!0;else if(Un===13)Ii=!0,gr.charCodeAt(Zr)===10&&(++Zr,++ia);else if(Un!==Ne)continue;return gr.slice(Ln,Zr-ia)}return gr.slice(Ln)}for(;(gi=Si())!==qr;){for(var ei=[];gi!==Hr&&gi!==qr;)ei.push(gi),gi=Si();yr&&(ei=yr(ei,ai++))==null||_i.push(ei)}return _i},He.format=function(gr){if(Array.isArray(gr[0]))return He.formatRows(gr);var yr=new V,Hr=[];return gr.forEach(function(qr){for(var _i in qr)yr.has(_i)||Hr.push(yr.add(_i))}),[Hr.map(jt).join(X)].concat(gr.map(function(qr){return Hr.map(function(_i){return jt(qr[_i])}).join(X)})).join(`
+`)},He.formatRows=function(gr){return gr.map(nt).join(`
+`)};function nt(gr){return gr.map(jt).join(X)}function jt(gr){return Te.test(gr)?'"'+gr.replace(/\"/g,'""')+'"':gr}return He},e.csv=e.dsv(",","text/csv"),e.tsv=e.dsv("	","text/tab-separated-values");var _n,ya,Jn,Ma,_o=this[j(this,"requestAnimationFrame")]||function(X){setTimeout(X,17)};e.timer=function(){No.apply(this,arguments)};function No(X,se,Te){var Ne=arguments.length;Ne<2&&(se=0),Ne<3&&(Te=Date.now());var He=Te+se,Ye={c:X,t:He,n:null};return ya?ya.n=Ye:_n=Ye,ya=Ye,Jn||(Ma=clearTimeout(Ma),Jn=1,_o(po)),Ye}function po(){var X=Lo(),se=ko()-X;se>24?(isFinite(se)&&(clearTimeout(Ma),Ma=setTimeout(po,se)),Jn=0):(Jn=1,_o(po))}e.timer.flush=function(){Lo(),ko()};function Lo(){for(var X=Date.now(),se=_n;se;)X>=se.t&&se.c(X-se.t)&&(se.c=null),se=se.n;return X}function ko(){for(var X,se=_n,Te=1/0;se;)se.c?(se.t<Te&&(Te=se.t),se=(X=se).n):se=X?X.n=se.n:_n=se.n;return ya=X,Te}e.round=function(X,se){return se?Math.round(X*(se=Math.pow(10,se)))/se:Math.round(X)},e.geom={};function Ds(X){return X[0]}function Fs(X){return X[1]}e.geom.hull=function(X){var se=Ds,Te=Fs;if(arguments.length)return Ne(X);function Ne(He){if(He.length<3)return[];var Ye=vi(se),kt=vi(Te),nt,jt=He.length,gr=[],yr=[];for(nt=0;nt<jt;nt++)gr.push([+Ye.call(this,He[nt],nt),+kt.call(this,He[nt],nt),nt]);for(gr.sort(ul),nt=0;nt<jt;nt++)yr.push([gr[nt][0],-gr[nt][1]]);var Hr=ll(gr),qr=ll(yr),_i=qr[0]===Hr[0],bi=qr[qr.length-1]===Hr[Hr.length-1],Zr=[];for(nt=Hr.length-1;nt>=0;--nt)Zr.push(He[gr[Hr[nt]][2]]);for(nt=+_i;nt<qr.length-bi;++nt)Zr.push(He[gr[qr[nt]][2]]);return Zr}return Ne.x=function(He){return arguments.length?(se=He,Ne):se},Ne.y=function(He){return arguments.length?(Te=He,Ne):Te},Ne};function ll(X){for(var se=X.length,Te=[0,1],Ne=2,He=2;He<se;He++){for(;Ne>1&&ar(X[Te[Ne-2]],X[Te[Ne-1]],X[He])<=0;)--Ne;Te[Ne++]=He}return Te.slice(0,Ne)}function ul(X,se){return X[0]-se[0]||X[1]-se[1]}e.geom.polygon=function(X){return ie(X,zl),X};var zl=e.geom.polygon.prototype=[];zl.area=function(){for(var X=-1,se=this.length,Te,Ne=this[se-1],He=0;++X<se;)Te=Ne,Ne=this[X],He+=Te[1]*Ne[0]-Te[0]*Ne[1];return He*.5},zl.centroid=function(X){var se=-1,Te=this.length,Ne=0,He=0,Ye,kt=this[Te-1],nt;for(arguments.length||(X=-1/(6*this.area()));++se<Te;)Ye=kt,kt=this[se],nt=Ye[0]*kt[1]-kt[0]*Ye[1],Ne+=(Ye[0]+kt[0])*nt,He+=(Ye[1]+kt[1])*nt;return[Ne*X,He*X]},zl.clip=function(X){for(var se,Te=As(X),Ne=-1,He=this.length-As(this),Ye,kt,nt=this[He-1],jt,gr,yr;++Ne<He;){for(se=X.slice(),X.length=0,jt=this[Ne],gr=se[(kt=se.length-Te)-1],Ye=-1;++Ye<kt;)yr=se[Ye],us(yr,nt,jt)?(us(gr,nt,jt)||X.push(il(gr,yr,nt,jt)),X.push(yr)):us(gr,nt,jt)&&X.push(il(gr,yr,nt,jt)),gr=yr;Te&&X.push(X[0]),nt=jt}return X};function us(X,se,Te){return(Te[0]-se[0])*(X[1]-se[1])<(Te[1]-se[1])*(X[0]-se[0])}function il(X,se,Te,Ne){var He=X[0],Ye=Te[0],kt=se[0]-He,nt=Ne[0]-Ye,jt=X[1],gr=Te[1],yr=se[1]-jt,Hr=Ne[1]-gr,qr=(nt*(jt-gr)-Hr*(He-Ye))/(Hr*kt-nt*yr);return[He+qr*kt,jt+qr*yr]}function As(X){var se=X[0],Te=X[X.length-1];return!(se[0]-Te[0]||se[1]-Te[1])}var cl,Ks,zs,Io=[],ls,Zl,Su=[];function nc(){Os(this),this.edge=this.site=this.circle=null}function bs(X){var se=Io.pop()||new nc;return se.site=X,se}function Rn(X){Oo(X),zs.remove(X),Io.push(X),Os(X)}function _a(X){var se=X.circle,Te=se.x,Ne=se.cy,He={x:Te,y:Ne},Ye=X.P,kt=X.N,nt=[X];Rn(X);for(var jt=Ye;jt.circle&&p(Te-jt.circle.x)<Je&&p(Ne-jt.circle.cy)<Je;)Ye=jt.P,nt.unshift(jt),Rn(jt),jt=Ye;nt.unshift(jt),Oo(jt);for(var gr=kt;gr.circle&&p(Te-gr.circle.x)<Je&&p(Ne-gr.circle.cy)<Je;)kt=gr.N,nt.push(gr),Rn(gr),gr=kt;nt.push(gr),Oo(gr);var yr=nt.length,Hr;for(Hr=1;Hr<yr;++Hr)gr=nt[Hr],jt=nt[Hr-1],pl(gr.edge,jt.site,gr.site,He);jt=nt[0],gr=nt[yr-1],gr.edge=rf(jt.site,gr.site,null,He),aa(jt),aa(gr)}function Vu(X){for(var se=X.x,Te=X.y,Ne,He,Ye,kt,nt=zs._;nt;)if(Ye=Ol(nt,Te)-se,Ye>Je)nt=nt.L;else if(kt=se-xo(nt,Te),kt>Je){if(!nt.R){Ne=nt;break}nt=nt.R}else{Ye>-Je?(Ne=nt.P,He=nt):kt>-Je?(Ne=nt,He=nt.N):Ne=He=nt;break}var jt=bs(X);if(zs.insert(Ne,jt),!(!Ne&&!He)){if(Ne===He){Oo(Ne),He=bs(Ne.site),zs.insert(jt,He),jt.edge=He.edge=rf(Ne.site,jt.site),aa(Ne),aa(He);return}if(!He){jt.edge=rf(Ne.site,jt.site);return}Oo(Ne),Oo(He);var gr=Ne.site,yr=gr.x,Hr=gr.y,qr=X.x-yr,_i=X.y-Hr,bi=He.site,Zr=bi.x-yr,ai=bi.y-Hr,gi=2*(qr*ai-_i*Zr),Ii=qr*qr+_i*_i,Si=Zr*Zr+ai*ai,ei={x:(ai*Ii-_i*Si)/gi+yr,y:(qr*Si-Zr*Ii)/gi+Hr};pl(He.edge,gr,bi,ei),jt.edge=rf(gr,X,null,ei),He.edge=rf(X,bi,null,ei),aa(Ne),aa(He)}}function Ol(X,se){var Te=X.site,Ne=Te.x,He=Te.y,Ye=He-se;if(!Ye)return Ne;var kt=X.P;if(!kt)return-1/0;Te=kt.site;var nt=Te.x,jt=Te.y,gr=jt-se;if(!gr)return nt;var yr=nt-Ne,Hr=1/Ye-1/gr,qr=yr/gr;return Hr?(-qr+Math.sqrt(qr*qr-2*Hr*(yr*yr/(-2*gr)-jt+gr/2+He-Ye/2)))/Hr+Ne:(Ne+nt)/2}function xo(X,se){var Te=X.N;if(Te)return Ol(Te,se);var Ne=X.site;return Ne.y===se?Ne.x:1/0}function Yl(X){this.site=X,this.edges=[]}Yl.prototype.prepare=function(){for(var X=this.edges,se=X.length,Te;se--;)Te=X[se].edge,(!Te.b||!Te.a)&&X.splice(se,1);return X.sort(Hl),X.length};function Ns(X){for(var se=X[0][0],Te=X[1][0],Ne=X[0][1],He=X[1][1],Ye,kt,nt,jt,gr=Ks,yr=gr.length,Hr,qr,_i,bi,Zr,ai;yr--;)if(Hr=gr[yr],!(!Hr||!Hr.prepare()))for(_i=Hr.edges,bi=_i.length,qr=0;qr<bi;)ai=_i[qr].end(),nt=ai.x,jt=ai.y,Zr=_i[++qr%bi].start(),Ye=Zr.x,kt=Zr.y,(p(nt-Ye)>Je||p(jt-kt)>Je)&&(_i.splice(qr,0,new Zc(Uf(Hr.site,ai,p(nt-se)<Je&&He-jt>Je?{x:se,y:p(Ye-se)<Je?kt:He}:p(jt-He)<Je&&Te-nt>Je?{x:p(kt-He)<Je?Ye:Te,y:He}:p(nt-Te)<Je&&jt-Ne>Je?{x:Te,y:p(Ye-Te)<Je?kt:Ne}:p(jt-Ne)<Je&&nt-se>Je?{x:p(kt-Ne)<Je?Ye:se,y:Ne}:null),Hr.site,null)),++bi)}function Hl(X,se){return se.angle-X.angle}function ac(){Os(this),this.x=this.y=this.arc=this.site=this.cy=null}function aa(X){var se=X.P,Te=X.N;if(!(!se||!Te)){var Ne=se.site,He=X.site,Ye=Te.site;if(Ne!==Ye){var kt=He.x,nt=He.y,jt=Ne.x-kt,gr=Ne.y-nt,yr=Ye.x-kt,ai=Ye.y-nt,Hr=2*(jt*ai-gr*yr);if(!(Hr>=-je)){var qr=jt*jt+gr*gr,_i=yr*yr+ai*ai,bi=(ai*qr-gr*_i)/Hr,Zr=(jt*_i-yr*qr)/Hr,ai=Zr+nt,gi=Su.pop()||new ac;gi.arc=X,gi.site=He,gi.x=bi+kt,gi.y=ai+Math.sqrt(bi*bi+Zr*Zr),gi.cy=ai,X.circle=gi;for(var Ii=null,Si=Zl._;Si;)if(gi.y<Si.y||gi.y===Si.y&&gi.x<=Si.x)if(Si.L)Si=Si.L;else{Ii=Si.P;break}else if(Si.R)Si=Si.R;else{Ii=Si;break}Zl.insert(Ii,gi),Ii||(ls=gi)}}}}function Oo(X){var se=X.circle;se&&(se.P||(ls=se.N),Zl.remove(se),Su.push(se),Os(se),X.circle=null)}function qo(X,se,Te,Ne){return function(He){var Ye=He.a,kt=He.b,nt=Ye.x,jt=Ye.y,gr=kt.x,yr=kt.y,Hr=0,qr=1,_i=gr-nt,bi=yr-jt,Zr;if(Zr=X-nt,!(!_i&&Zr>0)){if(Zr/=_i,_i<0){if(Zr<Hr)return;Zr<qr&&(qr=Zr)}else if(_i>0){if(Zr>qr)return;Zr>Hr&&(Hr=Zr)}if(Zr=Te-nt,!(!_i&&Zr<0)){if(Zr/=_i,_i<0){if(Zr>qr)return;Zr>Hr&&(Hr=Zr)}else if(_i>0){if(Zr<Hr)return;Zr<qr&&(qr=Zr)}if(Zr=se-jt,!(!bi&&Zr>0)){if(Zr/=bi,bi<0){if(Zr<Hr)return;Zr<qr&&(qr=Zr)}else if(bi>0){if(Zr>qr)return;Zr>Hr&&(Hr=Zr)}if(Zr=Ne-jt,!(!bi&&Zr<0)){if(Zr/=bi,bi<0){if(Zr>qr)return;Zr>Hr&&(Hr=Zr)}else if(bi>0){if(Zr<Hr)return;Zr<qr&&(qr=Zr)}return Hr>0&&(He.a={x:nt+Hr*_i,y:jt+Hr*bi}),qr<1&&(He.b={x:nt+qr*_i,y:jt+qr*bi}),He}}}}}}function ql(X){for(var se=cl,Te=qo(X[0][0],X[0][1],X[1][0],X[1][1]),Ne=se.length,He;Ne--;)He=se[Ne],(!Pc(He,X)||!Te(He)||p(He.a.x-He.b.x)<Je&&p(He.a.y-He.b.y)<Je)&&(He.a=He.b=null,se.splice(Ne,1))}function Pc(X,se){var Te=X.b;if(Te)return!0;var Ne=X.a,He=se[0][0],Ye=se[1][0],kt=se[0][1],nt=se[1][1],jt=X.l,gr=X.r,yr=jt.x,Hr=jt.y,qr=gr.x,_i=gr.y,bi=(yr+qr)/2,Zr=(Hr+_i)/2,ai,gi;if(_i===Hr){if(bi<He||bi>=Ye)return;if(yr>qr){if(!Ne)Ne={x:bi,y:kt};else if(Ne.y>=nt)return;Te={x:bi,y:nt}}else{if(!Ne)Ne={x:bi,y:nt};else if(Ne.y<kt)return;Te={x:bi,y:kt}}}else if(ai=(yr-qr)/(_i-Hr),gi=Zr-ai*bi,ai<-1||ai>1)if(yr>qr){if(!Ne)Ne={x:(kt-gi)/ai,y:kt};else if(Ne.y>=nt)return;Te={x:(nt-gi)/ai,y:nt}}else{if(!Ne)Ne={x:(nt-gi)/ai,y:nt};else if(Ne.y<kt)return;Te={x:(kt-gi)/ai,y:kt}}else if(Hr<_i){if(!Ne)Ne={x:He,y:ai*He+gi};else if(Ne.x>=Ye)return;Te={x:Ye,y:ai*Ye+gi}}else{if(!Ne)Ne={x:Ye,y:ai*Ye+gi};else if(Ne.x<He)return;Te={x:He,y:ai*He+gi}}return X.a=Ne,X.b=Te,!0}function Do(X,se){this.l=X,this.r=se,this.a=this.b=null}function rf(X,se,Te,Ne){var He=new Do(X,se);return cl.push(He),Te&&pl(He,X,se,Te),Ne&&pl(He,se,X,Ne),Ks[X.i].edges.push(new Zc(He,X,se)),Ks[se.i].edges.push(new Zc(He,se,X)),He}function Uf(X,se,Te){var Ne=new Do(X,null);return Ne.a=se,Ne.b=Te,cl.push(Ne),Ne}function pl(X,se,Te,Ne){!X.a&&!X.b?(X.a=Ne,X.l=se,X.r=Te):X.l===Te?X.b=Ne:X.a=Ne}function Zc(X,se,Te){var Ne=X.a,He=X.b;this.edge=X,this.site=se,this.angle=Te?Math.atan2(Te.y-se.y,Te.x-se.x):X.l===se?Math.atan2(He.x-Ne.x,Ne.y-He.y):Math.atan2(Ne.x-He.x,He.y-Ne.y)}Zc.prototype={start:function(){return this.edge.l===this.site?this.edge.a:this.edge.b},end:function(){return this.edge.l===this.site?this.edge.b:this.edge.a}};function Kl(){this._=null}function Os(X){X.U=X.C=X.L=X.R=X.P=X.N=null}Kl.prototype={insert:function(X,se){var Te,Ne,He;if(X){if(se.P=X,se.N=X.N,X.N&&(X.N.P=se),X.N=se,X.R){for(X=X.R;X.L;)X=X.L;X.L=se}else X.R=se;Te=X}else this._?(X=Cf(this._),se.P=null,se.N=X,X.P=X.L=se,Te=X):(se.P=se.N=null,this._=se,Te=null);for(se.L=se.R=null,se.U=Te,se.C=!0,X=se;Te&&Te.C;)Ne=Te.U,Te===Ne.L?(He=Ne.R,He&&He.C?(Te.C=He.C=!1,Ne.C=!0,X=Ne):(X===Te.R&&(yu(this,Te),X=Te,Te=X.U),Te.C=!1,Ne.C=!0,oc(this,Ne))):(He=Ne.L,He&&He.C?(Te.C=He.C=!1,Ne.C=!0,X=Ne):(X===Te.L&&(oc(this,Te),X=Te,Te=X.U),Te.C=!1,Ne.C=!0,yu(this,Ne))),Te=X.U;this._.C=!1},remove:function(X){X.N&&(X.N.P=X.P),X.P&&(X.P.N=X.N),X.N=X.P=null;var se=X.U,Te,Ne=X.L,He=X.R,Ye,kt;if(Ne?He?Ye=Cf(He):Ye=Ne:Ye=He,se?se.L===X?se.L=Ye:se.R=Ye:this._=Ye,Ne&&He?(kt=Ye.C,Ye.C=X.C,Ye.L=Ne,Ne.U=Ye,Ye!==He?(se=Ye.U,Ye.U=X.U,X=Ye.R,se.L=X,Ye.R=He,He.U=Ye):(Ye.U=se,se=Ye,X=Ye.R)):(kt=X.C,X=Ye),X&&(X.U=se),!kt){if(X&&X.C){X.C=!1;return}do{if(X===this._)break;if(X===se.L){if(Te=se.R,Te.C&&(Te.C=!1,se.C=!0,yu(this,se),Te=se.R),Te.L&&Te.L.C||Te.R&&Te.R.C){(!Te.R||!Te.R.C)&&(Te.L.C=!1,Te.C=!0,oc(this,Te),Te=se.R),Te.C=se.C,se.C=Te.R.C=!1,yu(this,se),X=this._;break}}else if(Te=se.L,Te.C&&(Te.C=!1,se.C=!0,oc(this,se),Te=se.L),Te.L&&Te.L.C||Te.R&&Te.R.C){(!Te.L||!Te.L.C)&&(Te.R.C=!1,Te.C=!0,yu(this,Te),Te=se.L),Te.C=se.C,se.C=Te.L.C=!1,oc(this,se),X=this._;break}Te.C=!0,X=se,se=se.U}while(!X.C);X&&(X.C=!1)}}};function yu(X,se){var Te=se,Ne=se.R,He=Te.U;He?He.L===Te?He.L=Ne:He.R=Ne:X._=Ne,Ne.U=He,Te.U=Ne,Te.R=Ne.L,Te.R&&(Te.R.U=Te),Ne.L=Te}function oc(X,se){var Te=se,Ne=se.L,He=Te.U;He?He.L===Te?He.L=Ne:He.R=Ne:X._=Ne,Ne.U=He,Te.U=Ne,Te.L=Ne.R,Te.L&&(Te.L.U=Te),Ne.R=Te}function Cf(X){for(;X.L;)X=X.L;return X}function sc(X,se){var Te=X.sort(Vh).pop(),Ne,He,Ye;for(cl=[],Ks=new Array(X.length),zs=new Kl,Zl=new Kl;;)if(Ye=ls,Te&&(!Ye||Te.y<Ye.y||Te.y===Ye.y&&Te.x<Ye.x))(Te.x!==Ne||Te.y!==He)&&(Ks[Te.i]=new Yl(Te),Vu(Te),Ne=Te.x,He=Te.y),Te=X.pop();else if(Ye)_a(Ye.arc);else break;se&&(ql(se),Ns(se));var kt={cells:Ks,edges:cl};return zs=Zl=cl=Ks=null,kt}function Vh(X,se){return se.y-X.y||se.x-X.x}e.geom.voronoi=function(X){var se=Ds,Te=Fs,Ne=se,He=Te,Ye=Lf;if(X)return kt(X);function kt(jt){var gr=new Array(jt.length),yr=Ye[0][0],Hr=Ye[0][1],qr=Ye[1][0],_i=Ye[1][1];return sc(nt(jt),Ye).cells.forEach(function(bi,Zr){var ai=bi.edges,gi=bi.site,Ii=gr[Zr]=ai.length?ai.map(function(Si){var ei=Si.start();return[ei.x,ei.y]}):gi.x>=yr&&gi.x<=qr&&gi.y>=Hr&&gi.y<=_i?[[yr,_i],[qr,_i],[qr,Hr],[yr,Hr]]:[];Ii.point=jt[Zr]}),gr}function nt(jt){return jt.map(function(gr,yr){return{x:Math.round(Ne(gr,yr)/Je)*Je,y:Math.round(He(gr,yr)/Je)*Je,i:yr}})}return kt.links=function(jt){return sc(nt(jt)).edges.filter(function(gr){return gr.l&&gr.r}).map(function(gr){return{source:jt[gr.l.i],target:jt[gr.r.i]}})},kt.triangles=function(jt){var gr=[];return sc(nt(jt)).cells.forEach(function(yr,Hr){for(var qr=yr.site,_i=yr.edges.sort(Hl),bi=-1,Zr=_i.length,ai,gi,Ii=_i[Zr-1].edge,Si=Ii.l===qr?Ii.r:Ii.l;++bi<Zr;)ai=Ii,gi=Si,Ii=_i[bi].edge,Si=Ii.l===qr?Ii.r:Ii.l,Hr<gi.i&&Hr<Si.i&&cs(qr,gi,Si)<0&&gr.push([jt[Hr],jt[gi.i],jt[Si.i]])}),gr},kt.x=function(jt){return arguments.length?(Ne=vi(se=jt),kt):se},kt.y=function(jt){return arguments.length?(He=vi(Te=jt),kt):Te},kt.clipExtent=function(jt){return arguments.length?(Ye=jt==null?Lf:jt,kt):Ye===Lf?null:Ye},kt.size=function(jt){return arguments.length?kt.clipExtent(jt&&[[0,0],jt]):Ye===Lf?null:Ye&&Ye[1]},kt};var Lf=[[-1e6,-1e6],[1e6,1e6]];function cs(X,se,Te){return(X.x-Te.x)*(se.y-X.y)-(X.x-se.x)*(Te.y-X.y)}e.geom.delaunay=function(X){return e.geom.voronoi().triangles(X)},e.geom.quadtree=function(X,se,Te,Ne,He){var Ye=Ds,kt=Fs,nt;if(nt=arguments.length)return Ye=nf,kt=Vf,nt===3&&(He=Te,Ne=se,Te=se=0),jt(X);function jt(gr){var yr,Hr=vi(Ye),qr=vi(kt),_i,bi,Zr,ai,gi,Ii,Si,ei;if(se!=null)gi=se,Ii=Te,Si=Ne,ei=He;else if(Si=ei=-(gi=Ii=1/0),_i=[],bi=[],ai=gr.length,nt)for(Zr=0;Zr<ai;++Zr)yr=gr[Zr],yr.x<gi&&(gi=yr.x),yr.y<Ii&&(Ii=yr.y),yr.x>Si&&(Si=yr.x),yr.y>ei&&(ei=yr.y),_i.push(yr.x),bi.push(yr.y);else for(Zr=0;Zr<ai;++Zr){var Ln=+Hr(yr=gr[Zr],Zr),En=+qr(yr,Zr);Ln<gi&&(gi=Ln),En<Ii&&(Ii=En),Ln>Si&&(Si=Ln),En>ei&&(ei=En),_i.push(Ln),bi.push(En)}var Un=Si-gi,ia=ei-Ii;Un>ia?ei=Ii+Un:Si=gi+ia;function Ea(Da,go,Is,Ms,Xs,Gn,ja,Fo){if(!(isNaN(Is)||isNaN(Ms)))if(Da.leaf){var Uo=Da.x,$s=Da.y;if(Uo!=null)if(p(Uo-Is)+p($s-Ms)<.01)Ia(Da,go,Is,Ms,Xs,Gn,ja,Fo);else{var Sl=Da.point;Da.x=Da.y=Da.point=null,Ia(Da,Sl,Uo,$s,Xs,Gn,ja,Fo),Ia(Da,go,Is,Ms,Xs,Gn,ja,Fo)}else Da.x=Is,Da.y=Ms,Da.point=go}else Ia(Da,go,Is,Ms,Xs,Gn,ja,Fo)}function Ia(Da,go,Is,Ms,Xs,Gn,ja,Fo){var Uo=(Xs+ja)*.5,$s=(Gn+Fo)*.5,Sl=Is>=Uo,bu=Ms>=$s,dl=bu<<1|Sl;Da.leaf=!1,Da=Da.nodes[dl]||(Da.nodes[dl]=Jl()),Sl?Xs=Uo:ja=Uo,bu?Gn=$s:Fo=$s,Ea(Da,go,Is,Ms,Xs,Gn,ja,Fo)}var yo=Jl();if(yo.add=function(Da){Ea(yo,Da,+Hr(Da,++Zr),+qr(Da,Zr),gi,Ii,Si,ei)},yo.visit=function(Da){fl(Da,yo,gi,Ii,Si,ei)},yo.find=function(Da){return lc(yo,Da[0],Da[1],gi,Ii,Si,ei)},Zr=-1,se==null){for(;++Zr<ai;)Ea(yo,gr[Zr],_i[Zr],bi[Zr],gi,Ii,Si,ei);--Zr}else gr.forEach(yo.add);return _i=bi=gr=yr=null,yo}return jt.x=function(gr){return arguments.length?(Ye=gr,jt):Ye},jt.y=function(gr){return arguments.length?(kt=gr,jt):kt},jt.extent=function(gr){return arguments.length?(gr==null?se=Te=Ne=He=null:(se=+gr[0][0],Te=+gr[0][1],Ne=+gr[1][0],He=+gr[1][1]),jt):se==null?null:[[se,Te],[Ne,He]]},jt.size=function(gr){return arguments.length?(gr==null?se=Te=Ne=He=null:(se=Te=0,Ne=+gr[0],He=+gr[1]),jt):se==null?null:[Ne-se,He-Te]},jt};function nf(X){return X.x}function Vf(X){return X.y}function Jl(){return{leaf:!0,nodes:[],point:null,x:null,y:null}}function fl(X,se,Te,Ne,He,Ye){if(!X(se,Te,Ne,He,Ye)){var kt=(Te+He)*.5,nt=(Ne+Ye)*.5,jt=se.nodes;jt[0]&&fl(X,jt[0],Te,Ne,kt,nt),jt[1]&&fl(X,jt[1],kt,Ne,He,nt),jt[2]&&fl(X,jt[2],Te,nt,kt,Ye),jt[3]&&fl(X,jt[3],kt,nt,He,Ye)}}function lc(X,se,Te,Ne,He,Ye,kt){var nt=1/0,jt;return function gr(yr,Hr,qr,_i,bi){if(!(Hr>Ye||qr>kt||_i<Ne||bi<He)){if(Zr=yr.point){var Zr,ai=se-yr.x,gi=Te-yr.y,Ii=ai*ai+gi*gi;if(Ii<nt){var Si=Math.sqrt(nt=Ii);Ne=se-Si,He=Te-Si,Ye=se+Si,kt=Te+Si,jt=Zr}}for(var ei=yr.nodes,Ln=(Hr+_i)*.5,En=(qr+bi)*.5,Un=se>=Ln,ia=Te>=En,Ea=ia<<1|Un,Ia=Ea+4;Ea<Ia;++Ea)if(yr=ei[Ea&3])switch(Ea&3){case 0:gr(yr,Hr,qr,Ln,En);break;case 1:gr(yr,Ln,qr,_i,En);break;case 2:gr(yr,Hr,En,Ln,bi);break;case 3:gr(yr,Ln,En,_i,bi);break}}}(X,Ne,He,Ye,kt),jt}e.interpolateRgb=Fu;function Fu(X,se){X=e.rgb(X),se=e.rgb(se);var Te=X.r,Ne=X.g,He=X.b,Ye=se.r-Te,kt=se.g-Ne,nt=se.b-He;return function(jt){return"#"+Wn(Math.round(Te+Ye*jt))+Wn(Math.round(Ne+kt*jt))+Wn(Math.round(He+nt*jt))}}e.interpolateObject=Es;function Es(X,se){var Te={},Ne={},He;for(He in X)He in se?Te[He]=xl(X[He],se[He]):Ne[He]=X[He];for(He in se)He in X||(Ne[He]=se[He]);return function(Ye){for(He in Te)Ne[He]=Te[He](Ye);return Ne}}e.interpolateNumber=Hs;function Hs(X,se){return X=+X,se=+se,function(Te){return X*(1-Te)+se*Te}}e.interpolateString=Go;function Go(X,se){var Te=ps.lastIndex=uc.lastIndex=0,Ne,He,Ye,kt=-1,nt=[],jt=[];for(X=X+"",se=se+"";(Ne=ps.exec(X))&&(He=uc.exec(se));)(Ye=He.index)>Te&&(Ye=se.slice(Te,Ye),nt[kt]?nt[kt]+=Ye:nt[++kt]=Ye),(Ne=Ne[0])===(He=He[0])?nt[kt]?nt[kt]+=He:nt[++kt]=He:(nt[++kt]=null,jt.push({i:kt,x:Hs(Ne,He)})),Te=uc.lastIndex;return Te<se.length&&(Ye=se.slice(Te),nt[kt]?nt[kt]+=Ye:nt[++kt]=Ye),nt.length<2?jt[0]?(se=jt[0].x,function(gr){return se(gr)+""}):function(){return se}:(se=jt.length,function(gr){for(var yr=0,Hr;yr<se;++yr)nt[(Hr=jt[yr]).i]=Hr.x(gr);return nt.join("")})}var ps=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,uc=new RegExp(ps.source,"g");e.interpolate=xl;function xl(X,se){for(var Te=e.interpolators.length,Ne;--Te>=0&&!(Ne=e.interpolators[Te](X,se)););return Ne}e.interpolators=[function(X,se){var Te=typeof se;return(Te==="string"?Jr.has(se.toLowerCase())||/^(#|rgb\(|hsl\()/i.test(se)?Fu:Go:se instanceof la?Fu:Array.isArray(se)?Gu:Te==="object"&&isNaN(se)?Es:Hs)(X,se)}],e.interpolateArray=Gu;function Gu(X,se){var Te=[],Ne=[],He=X.length,Ye=se.length,kt=Math.min(X.length,se.length),nt;for(nt=0;nt<kt;++nt)Te.push(xl(X[nt],se[nt]));for(;nt<He;++nt)Ne[nt]=X[nt];for(;nt<Ye;++nt)Ne[nt]=se[nt];return function(jt){for(nt=0;nt<kt;++nt)Ne[nt]=Te[nt](jt);return Ne}}var qs=function(){return G},ad=e.map({linear:qs,poly:Gf,quad:function(){return af},cubic:function(){return Hu},sin:function(){return Ic},exp:function(){return yf},circle:function(){return Bl},elastic:wh,back:Qf,bounce:function(){return _f}}),Po=e.map({in:G,out:Yo,"in-out":Pa,"out-in":function(X){return Pa(Yo(X))}});e.ease=function(X){var se=X.indexOf("-"),Te=se>=0?X.slice(0,se):X,Ne=se>=0?X.slice(se+1):"in";return Te=ad.get(Te)||qs,Ne=Po.get(Ne)||G,od(Ne(Te.apply(null,t.call(arguments,1))))};function od(X){return function(se){return se<=0?0:se>=1?1:X(se)}}function Yo(X){return function(se){return 1-X(1-se)}}function Pa(X){return function(se){return .5*(se<.5?X(2*se):2-X(2-2*se))}}function af(X){return X*X}function Hu(X){return X*X*X}function bl(X){if(X<=0)return 0;if(X>=1)return 1;var se=X*X,Te=se*X;return 4*(X<.5?Te:3*(X-se)+Te-.75)}function Gf(X){return function(se){return Math.pow(se,X)}}function Ic(X){return 1-Math.cos(X*xe)}function yf(X){return Math.pow(2,10*(X-1))}function Bl(X){return 1-Math.sqrt(1-X*X)}function wh(X,se){var Te;return arguments.length<2&&(se=.45),arguments.length?Te=se/xt*Math.asin(1/X):(X=1,Te=se/4),function(Ne){return 1+X*Math.pow(2,-10*Ne)*Math.sin((Ne-Te)*xt/se)}}function Qf(X){return X||(X=1.70158),function(se){return se*se*((X+1)*se-X)}}function _f(X){return X<1/2.75?7.5625*X*X:X<2/2.75?7.5625*(X-=1.5/2.75)*X+.75:X<2.5/2.75?7.5625*(X-=2.25/2.75)*X+.9375:7.5625*(X-=2.625/2.75)*X+.984375}e.interpolateHcl=Yc;function Yc(X,se){X=e.hcl(X),se=e.hcl(se);var Te=X.h,Ne=X.c,He=X.l,Ye=se.h-Te,kt=se.c-Ne,nt=se.l-He;return isNaN(kt)&&(kt=0,Ne=isNaN(Ne)?se.c:Ne),isNaN(Ye)?(Ye=0,Te=isNaN(Te)?se.h:Te):Ye>180?Ye-=360:Ye<-180&&(Ye+=360),function(jt){return Xr(Te+Ye*jt,Ne+kt*jt,He+nt*jt)+""}}e.interpolateHsl=eh;function eh(X,se){X=e.hsl(X),se=e.hsl(se);var Te=X.h,Ne=X.s,He=X.l,Ye=se.h-Te,kt=se.s-Ne,nt=se.l-He;return isNaN(kt)&&(kt=0,Ne=isNaN(Ne)?se.s:Ne),isNaN(Ye)?(Ye=0,Te=isNaN(Te)?se.h:Te):Ye>180?Ye-=360:Ye<-180&&(Ye+=360),function(jt){return rr(Te+Ye*jt,Ne+kt*jt,He+nt*jt)+""}}e.interpolateLab=th;function th(X,se){X=e.lab(X),se=e.lab(se);var Te=X.l,Ne=X.a,He=X.b,Ye=se.l-Te,kt=se.a-Ne,nt=se.b-He;return function(jt){return yn(Te+Ye*jt,Ne+kt*jt,He+nt*jt)+""}}e.interpolateRound=ju;function ju(X,se){return se-=X,function(Te){return Math.round(X+se*Te)}}e.transform=function(X){var se=n.createElementNS(e.ns.prefix.svg,"g");return(e.transform=function(Te){if(Te!=null){se.setAttribute("transform",Te);var Ne=se.transform.baseVal.consolidate()}return new Hf(Ne?Ne.matrix:Kc)})(X)};function Hf(X){var se=[X.a,X.b],Te=[X.c,X.d],Ne=of(se),He=cc(se,Te),Ye=of(Nl(Te,se,-He))||0;se[0]*Te[1]<Te[0]*se[1]&&(se[0]*=-1,se[1]*=-1,Ne*=-1,He*=-1),this.rotate=(Ne?Math.atan2(se[1],se[0]):Math.atan2(-Te[0],Te[1]))*vt,this.translate=[X.e,X.f],this.scale=[Ne,Ye],this.skew=Ye?Math.atan2(He,Ye)*vt:0}Hf.prototype.toString=function(){return"translate("+this.translate+")rotate("+this.rotate+")skewX("+this.skew+")scale("+this.scale+")"};function cc(X,se){return X[0]*se[0]+X[1]*se[1]}function of(X){var se=Math.sqrt(cc(X,X));return se&&(X[0]/=se,X[1]/=se),se}function Nl(X,se,Te){return X[0]+=Te*se[0],X[1]+=Te*se[1],X}var Kc={a:1,b:0,c:0,d:1,e:0,f:0};e.interpolateTransform=sf;function Rc(X){return X.length?X.pop()+",":""}function gs(X,se,Te,Ne){if(X[0]!==se[0]||X[1]!==se[1]){var He=Te.push("translate(",null,",",null,")");Ne.push({i:He-4,x:Hs(X[0],se[0])},{i:He-2,x:Hs(X[1],se[1])})}else(se[0]||se[1])&&Te.push("translate("+se+")")}function jf(X,se,Te,Ne){X!==se?(X-se>180?se+=360:se-X>180&&(X+=360),Ne.push({i:Te.push(Rc(Te)+"rotate(",null,")")-2,x:Hs(X,se)})):se&&Te.push(Rc(Te)+"rotate("+se+")")}function Gh(X,se,Te,Ne){X!==se?Ne.push({i:Te.push(Rc(Te)+"skewX(",null,")")-2,x:Hs(X,se)}):se&&Te.push(Rc(Te)+"skewX("+se+")")}function rh(X,se,Te,Ne){if(X[0]!==se[0]||X[1]!==se[1]){var He=Te.push(Rc(Te)+"scale(",null,",",null,")");Ne.push({i:He-4,x:Hs(X[0],se[0])},{i:He-2,x:Hs(X[1],se[1])})}else(se[0]!==1||se[1]!==1)&&Te.push(Rc(Te)+"scale("+se+")")}function sf(X,se){var Te=[],Ne=[];return X=e.transform(X),se=e.transform(se),gs(X.translate,se.translate,Te,Ne),jf(X.rotate,se.rotate,Te,Ne),Gh(X.skew,se.skew,Te,Ne),rh(X.scale,se.scale,Te,Ne),X=se=null,function(He){for(var Ye=-1,kt=Ne.length,nt;++Ye<kt;)Te[(nt=Ne[Ye]).i]=nt.x(He);return Te.join("")}}function Th(X,se){return se=(se-=X=+X)||1/se,function(Te){return(Te-X)/se}}function Mu(X,se){return se=(se-=X=+X)||1/se,function(Te){return Math.max(0,Math.min(1,(Te-X)/se))}}e.layout={},e.layout.bundle=function(){return function(X){for(var se=[],Te=-1,Ne=X.length;++Te<Ne;)se.push(ih(X[Te]));return se}};function ih(X){for(var se=X.source,Te=X.target,Ne=Eu(se,Te),He=[se];se!==Ne;)se=se.parent,He.push(se);for(var Ye=He.length;Te!==Ne;)He.splice(Ye,0,Te),Te=Te.parent;return He}function js(X){for(var se=[],Te=X.parent;Te!=null;)se.push(X),X=Te,Te=Te.parent;return se.push(X),se}function Eu(X,se){if(X===se)return X;for(var Te=js(X),Ne=js(se),He=Te.pop(),Ye=Ne.pop(),kt=null;He===Ye;)kt=He,He=Te.pop(),Ye=Ne.pop();return kt}e.layout.chord=function(){var X={},se,Te,Ne,He,Ye=0,kt,nt,jt;function gr(){var Hr={},qr=[],_i=e.range(He),bi=[],Zr,ai,gi,Ii,Si;for(se=[],Te=[],Zr=0,Ii=-1;++Ii<He;){for(ai=0,Si=-1;++Si<He;)ai+=Ne[Ii][Si];qr.push(ai),bi.push(e.range(He)),Zr+=ai}for(kt&&_i.sort(function(yo,Da){return kt(qr[yo],qr[Da])}),nt&&bi.forEach(function(yo,Da){yo.sort(function(go,Is){return nt(Ne[Da][go],Ne[Da][Is])})}),Zr=(xt-Ye*He)/Zr,ai=0,Ii=-1;++Ii<He;){for(gi=ai,Si=-1;++Si<He;){var ei=_i[Ii],Ln=bi[ei][Si],En=Ne[ei][Ln],Un=ai,ia=ai+=En*Zr;Hr[ei+"-"+Ln]={index:ei,subindex:Ln,startAngle:Un,endAngle:ia,value:En}}Te[ei]={index:ei,startAngle:gi,endAngle:ai,value:qr[ei]},ai+=Ye}for(Ii=-1;++Ii<He;)for(Si=Ii-1;++Si<He;){var Ea=Hr[Ii+"-"+Si],Ia=Hr[Si+"-"+Ii];(Ea.value||Ia.value)&&se.push(Ea.value<Ia.value?{source:Ia,target:Ea}:{source:Ea,target:Ia})}jt&&yr()}function yr(){se.sort(function(Hr,qr){return jt((Hr.source.value+Hr.target.value)/2,(qr.source.value+qr.target.value)/2)})}return X.matrix=function(Hr){return arguments.length?(He=(Ne=Hr)&&Ne.length,se=Te=null,X):Ne},X.padding=function(Hr){return arguments.length?(Ye=Hr,se=Te=null,X):Ye},X.sortGroups=function(Hr){return arguments.length?(kt=Hr,se=Te=null,X):kt},X.sortSubgroups=function(Hr){return arguments.length?(nt=Hr,se=null,X):nt},X.sortChords=function(Hr){return arguments.length?(jt=Hr,se&&yr(),X):jt},X.chords=function(){return se||gr(),se},X.groups=function(){return Te||gr(),Te},X},e.layout.force=function(){var X={},se=e.dispatch("start","tick","end"),Te,Ne=[1,1],He,Ye,kt=.9,nt=nl,jt=nh,gr=-30,yr=Ah,Hr=.1,qr=.64,_i=[],bi=[],Zr,ai,gi;function Ii(ei){return function(Ln,En,Un,ia){if(Ln.point!==ei){var Ea=Ln.cx-ei.x,Ia=Ln.cy-ei.y,yo=ia-En,Da=Ea*Ea+Ia*Ia;if(yo*yo/qr<Da){if(Da<yr){var go=Ln.charge/Da;ei.px-=Ea*go,ei.py-=Ia*go}return!0}if(Ln.point&&Da&&Da<yr){var go=Ln.pointCharge/Da;ei.px-=Ea*go,ei.py-=Ia*go}}return!Ln.charge}}X.tick=function(){if((Ye*=.99)<.005)return Te=null,se.end({type:"end",alpha:Ye=0}),!0;var ei=_i.length,Ln=bi.length,En,Un,ia,Ea,Ia,yo,Da,go,Is;for(Un=0;Un<Ln;++Un)ia=bi[Un],Ea=ia.source,Ia=ia.target,go=Ia.x-Ea.x,Is=Ia.y-Ea.y,(yo=go*go+Is*Is)&&(yo=Ye*ai[Un]*((yo=Math.sqrt(yo))-Zr[Un])/yo,go*=yo,Is*=yo,Ia.x-=go*(Da=Ea.weight+Ia.weight?Ea.weight/(Ea.weight+Ia.weight):.5),Ia.y-=Is*Da,Ea.x+=go*(Da=1-Da),Ea.y+=Is*Da);if((Da=Ye*Hr)&&(go=Ne[0]/2,Is=Ne[1]/2,Un=-1,Da))for(;++Un<ei;)ia=_i[Un],ia.x+=(go-ia.x)*Da,ia.y+=(Is-ia.y)*Da;if(gr)for(_u(En=e.geom.quadtree(_i),Ye,gi),Un=-1;++Un<ei;)(ia=_i[Un]).fixed||En.visit(Ii(ia));for(Un=-1;++Un<ei;)ia=_i[Un],ia.fixed?(ia.x=ia.px,ia.y=ia.py):(ia.x-=(ia.px-(ia.px=ia.x))*kt,ia.y-=(ia.py-(ia.py=ia.y))*kt);se.tick({type:"tick",alpha:Ye})},X.nodes=function(ei){return arguments.length?(_i=ei,X):_i},X.links=function(ei){return arguments.length?(bi=ei,X):bi},X.size=function(ei){return arguments.length?(Ne=ei,X):Ne},X.linkDistance=function(ei){return arguments.length?(nt=typeof ei=="function"?ei:+ei,X):nt},X.distance=X.linkDistance,X.linkStrength=function(ei){return arguments.length?(jt=typeof ei=="function"?ei:+ei,X):jt},X.friction=function(ei){return arguments.length?(kt=+ei,X):kt},X.charge=function(ei){return arguments.length?(gr=typeof ei=="function"?ei:+ei,X):gr},X.chargeDistance=function(ei){return arguments.length?(yr=ei*ei,X):Math.sqrt(yr)},X.gravity=function(ei){return arguments.length?(Hr=+ei,X):Hr},X.theta=function(ei){return arguments.length?(qr=ei*ei,X):Math.sqrt(qr)},X.alpha=function(ei){return arguments.length?(ei=+ei,Ye?ei>0?Ye=ei:(Te.c=null,Te.t=NaN,Te=null,se.end({type:"end",alpha:Ye=0})):ei>0&&(se.start({type:"start",alpha:Ye=ei}),Te=No(X.tick)),X):Ye},X.start=function(){var ei,Ln=_i.length,En=bi.length,Un=Ne[0],ia=Ne[1],Ea,Ia;for(ei=0;ei<Ln;++ei)(Ia=_i[ei]).index=ei,Ia.weight=0;for(ei=0;ei<En;++ei)Ia=bi[ei],typeof Ia.source=="number"&&(Ia.source=_i[Ia.source]),typeof Ia.target=="number"&&(Ia.target=_i[Ia.target]),++Ia.source.weight,++Ia.target.weight;for(ei=0;ei<Ln;++ei)Ia=_i[ei],isNaN(Ia.x)&&(Ia.x=yo("x",Un)),isNaN(Ia.y)&&(Ia.y=yo("y",ia)),isNaN(Ia.px)&&(Ia.px=Ia.x),isNaN(Ia.py)&&(Ia.py=Ia.y);if(Zr=[],typeof nt=="function")for(ei=0;ei<En;++ei)Zr[ei]=+nt.call(this,bi[ei],ei);else for(ei=0;ei<En;++ei)Zr[ei]=nt;if(ai=[],typeof jt=="function")for(ei=0;ei<En;++ei)ai[ei]=+jt.call(this,bi[ei],ei);else for(ei=0;ei<En;++ei)ai[ei]=jt;if(gi=[],typeof gr=="function")for(ei=0;ei<Ln;++ei)gi[ei]=+gr.call(this,_i[ei],ei);else for(ei=0;ei<Ln;++ei)gi[ei]=gr;function yo(Da,go){if(!Ea){for(Ea=new Array(Ln),Xs=0;Xs<Ln;++Xs)Ea[Xs]=[];for(Xs=0;Xs<En;++Xs){var Is=bi[Xs];Ea[Is.source.index].push(Is.target),Ea[Is.target.index].push(Is.source)}}for(var Ms=Ea[ei],Xs=-1,Gn=Ms.length,ja;++Xs<Gn;)if(!isNaN(ja=Ms[Xs][Da]))return ja;return Math.random()*go}return X.resume()},X.resume=function(){return X.alpha(.1)},X.stop=function(){return X.alpha(0)},X.drag=function(){if(He||(He=e.behavior.drag().origin(G).on("dragstart.force",Dc).on("drag.force",Si).on("dragend.force",ks)),!arguments.length)return He;this.on("mouseover.force",bc).on("mouseout.force",hu).call(He)};function Si(ei){ei.px=e.event.x,ei.py=e.event.y,X.resume()}return e.rebind(X,se,"on")};function Dc(X){X.fixed|=2}function ks(X){X.fixed&=-7}function bc(X){X.fixed|=4,X.px=X.x,X.py=X.y}function hu(X){X.fixed&=-5}function _u(X,se,Te){var Ne=0,He=0;if(X.charge=0,!X.leaf)for(var Ye=X.nodes,kt=Ye.length,nt=-1,jt;++nt<kt;)jt=Ye[nt],jt!=null&&(_u(jt,se,Te),X.charge+=jt.charge,Ne+=jt.charge*jt.cx,He+=jt.charge*jt.cy);if(X.point){X.leaf||(X.point.x+=Math.random()-.5,X.point.y+=Math.random()-.5);var gr=se*Te[X.point.index];X.charge+=X.pointCharge=gr,Ne+=gr*X.point.x,He+=gr*X.point.y}X.cx=Ne/X.charge,X.cy=He/X.charge}var nl=20,nh=1,Ah=1/0;e.layout.hierarchy=function(){var X=Pf,se=bd,Te=xf;function Ne(He){var Ye=[He],kt=[],nt;for(He.depth=0;(nt=Ye.pop())!=null;)if(kt.push(nt),(gr=se.call(Ne,nt,nt.depth))&&(jt=gr.length)){for(var jt,gr,yr;--jt>=0;)Ye.push(yr=gr[jt]),yr.parent=nt,yr.depth=nt.depth+1;Te&&(nt.value=0),nt.children=gr}else Te&&(nt.value=+Te.call(Ne,nt,nt.depth)||0),delete nt.children;return wc(He,function(Hr){var qr,_i;X&&(qr=Hr.children)&&qr.sort(X),Te&&(_i=Hr.parent)&&(_i.value+=Hr.value)}),kt}return Ne.sort=function(He){return arguments.length?(X=He,Ne):X},Ne.children=function(He){return arguments.length?(se=He,Ne):se},Ne.value=function(He){return arguments.length?(Te=He,Ne):Te},Ne.revalue=function(He){return Te&&(Fc(He,function(Ye){Ye.children&&(Ye.value=0)}),wc(He,function(Ye){var kt;Ye.children||(Ye.value=+Te.call(Ne,Ye,Ye.depth)||0),(kt=Ye.parent)&&(kt.value+=Ye.value)})),He},Ne};function zu(X,se){return e.rebind(X,se,"sort","children","value"),X.nodes=X,X.links=Ou,X}function Fc(X,se){for(var Te=[X];(X=Te.pop())!=null;)if(se(X),(He=X.children)&&(Ne=He.length))for(var Ne,He;--Ne>=0;)Te.push(He[Ne])}function wc(X,se){for(var Te=[X],Ne=[];(X=Te.pop())!=null;)if(Ne.push(X),(kt=X.children)&&(Ye=kt.length))for(var He=-1,Ye,kt;++He<Ye;)Te.push(kt[He]);for(;(X=Ne.pop())!=null;)se(X)}function bd(X){return X.children}function xf(X){return X.value}function Pf(X,se){return se.value-X.value}function Ou(X){return e.merge(X.map(function(se){return(se.children||[]).map(function(Te){return{source:se,target:Te}})}))}e.layout.partition=function(){var X=e.layout.hierarchy(),se=[1,1];function Te(Ye,kt,nt,jt){var gr=Ye.children;if(Ye.x=kt,Ye.y=Ye.depth*jt,Ye.dx=nt,Ye.dy=jt,gr&&(Hr=gr.length)){var yr=-1,Hr,qr,_i;for(nt=Ye.value?nt/Ye.value:0;++yr<Hr;)Te(qr=gr[yr],kt,_i=qr.value*nt,jt),kt+=_i}}function Ne(Ye){var kt=Ye.children,nt=0;if(kt&&(gr=kt.length))for(var jt=-1,gr;++jt<gr;)nt=Math.max(nt,Ne(kt[jt]));return 1+nt}function He(Ye,kt){var nt=X.call(this,Ye,kt);return Te(nt[0],0,se[0],se[1]/Ne(nt[0])),nt}return He.size=function(Ye){return arguments.length?(se=Ye,He):se},zu(He,X)},e.layout.pie=function(){var X=Number,se=bf,Te=0,Ne=xt,He=0;function Ye(kt){var nt=kt.length,jt=kt.map(function(Ii,Si){return+X.call(Ye,Ii,Si)}),gr=+(typeof Te=="function"?Te.apply(this,arguments):Te),yr=(typeof Ne=="function"?Ne.apply(this,arguments):Ne)-gr,Hr=Math.min(Math.abs(yr)/nt,+(typeof He=="function"?He.apply(this,arguments):He)),qr=Hr*(yr<0?-1:1),_i=e.sum(jt),bi=_i?(yr-nt*qr)/_i:0,Zr=e.range(nt),ai=[],gi;return se!=null&&Zr.sort(se===bf?function(Ii,Si){return jt[Si]-jt[Ii]}:function(Ii,Si){return se(kt[Ii],kt[Si])}),Zr.forEach(function(Ii){ai[Ii]={data:kt[Ii],value:gi=jt[Ii],startAngle:gr,endAngle:gr+=gi*bi+qr,padAngle:Hr}}),ai}return Ye.value=function(kt){return arguments.length?(X=kt,Ye):X},Ye.sort=function(kt){return arguments.length?(se=kt,Ye):se},Ye.startAngle=function(kt){return arguments.length?(Te=kt,Ye):Te},Ye.endAngle=function(kt){return arguments.length?(Ne=kt,Ye):Ne},Ye.padAngle=function(kt){return arguments.length?(He=kt,Ye):He},Ye};var bf={};e.layout.stack=function(){var X=G,se=du,Te=ku,Ne=Hh,He=jl,Ye=lf;function kt(nt,jt){if(!(bi=nt.length))return nt;var gr=nt.map(function(Ii,Si){return X.call(kt,Ii,Si)}),yr=gr.map(function(Ii){return Ii.map(function(Si,ei){return[He.call(kt,Si,ei),Ye.call(kt,Si,ei)]})}),Hr=se.call(kt,yr,jt);gr=e.permute(gr,Hr),yr=e.permute(yr,Hr);var qr=Te.call(kt,yr,jt),_i=gr[0].length,bi,Zr,ai,gi;for(ai=0;ai<_i;++ai)for(Ne.call(kt,gr[0][ai],gi=qr[ai],yr[0][ai][1]),Zr=1;Zr<bi;++Zr)Ne.call(kt,gr[Zr][ai],gi+=yr[Zr-1][ai][1],yr[Zr][ai][1]);return nt}return kt.values=function(nt){return arguments.length?(X=nt,kt):X},kt.order=function(nt){return arguments.length?(se=typeof nt=="function"?nt:If.get(nt)||du,kt):se},kt.offset=function(nt){return arguments.length?(Te=typeof nt=="function"?nt:Cs.get(nt)||ku,kt):Te},kt.x=function(nt){return arguments.length?(He=nt,kt):He},kt.y=function(nt){return arguments.length?(Ye=nt,kt):Ye},kt.out=function(nt){return arguments.length?(Ne=nt,kt):Ne},kt};function jl(X){return X.x}function lf(X){return X.y}function Hh(X,se,Te){X.y0=se,X.y=Te}var If=e.map({"inside-out":function(X){var se=X.length,Te,Ne,He=X.map(Wf),Ye=X.map(Us),kt=e.range(se).sort(function(Hr,qr){return He[Hr]-He[qr]}),nt=0,jt=0,gr=[],yr=[];for(Te=0;Te<se;++Te)Ne=kt[Te],nt<jt?(nt+=Ye[Ne],gr.push(Ne)):(jt+=Ye[Ne],yr.push(Ne));return yr.reverse().concat(gr)},reverse:function(X){return e.range(X.length).reverse()},default:du}),Cs=e.map({silhouette:function(X){var se=X.length,Te=X[0].length,Ne=[],He=0,Ye,kt,nt,jt=[];for(kt=0;kt<Te;++kt){for(Ye=0,nt=0;Ye<se;Ye++)nt+=X[Ye][kt][1];nt>He&&(He=nt),Ne.push(nt)}for(kt=0;kt<Te;++kt)jt[kt]=(He-Ne[kt])/2;return jt},wiggle:function(X){var se=X.length,Te=X[0],Ne=Te.length,He,Ye,kt,nt,jt,gr,yr,Hr,qr,_i=[];for(_i[0]=Hr=qr=0,Ye=1;Ye<Ne;++Ye){for(He=0,nt=0;He<se;++He)nt+=X[He][Ye][1];for(He=0,jt=0,yr=Te[Ye][0]-Te[Ye-1][0];He<se;++He){for(kt=0,gr=(X[He][Ye][1]-X[He][Ye-1][1])/(2*yr);kt<He;++kt)gr+=(X[kt][Ye][1]-X[kt][Ye-1][1])/yr;jt+=gr*X[He][Ye][1]}_i[Ye]=Hr-=nt?jt/nt*yr:0,Hr<qr&&(qr=Hr)}for(Ye=0;Ye<Ne;++Ye)_i[Ye]-=qr;return _i},expand:function(X){var se=X.length,Te=X[0].length,Ne=1/se,He,Ye,kt,nt=[];for(Ye=0;Ye<Te;++Ye){for(He=0,kt=0;He<se;He++)kt+=X[He][Ye][1];if(kt)for(He=0;He<se;He++)X[He][Ye][1]/=kt;else for(He=0;He<se;He++)X[He][Ye][1]=Ne}for(Ye=0;Ye<Te;++Ye)nt[Ye]=0;return nt},zero:ku});function du(X){return e.range(X.length)}function ku(X){for(var se=-1,Te=X[0].length,Ne=[];++se<Te;)Ne[se]=0;return Ne}function Wf(X){for(var se=1,Te=0,Ne=X[0][1],He,Ye=X.length;se<Ye;++se)(He=X[se][1])>Ne&&(Te=se,Ne=He);return Te}function Us(X){return X.reduce(wf,0)}function wf(X,se){return X+se[1]}e.layout.histogram=function(){var X=!0,se=Number,Te=Rf,Ne=zc;function He(Ye,qr){for(var nt=[],jt=Ye.map(se,this),gr=Te.call(this,jt,qr),yr=Ne.call(this,gr,jt,qr),Hr,qr=-1,_i=jt.length,bi=yr.length-1,Zr=X?1:1/_i,ai;++qr<bi;)Hr=nt[qr]=[],Hr.dx=yr[qr+1]-(Hr.x=yr[qr]),Hr.y=0;if(bi>0)for(qr=-1;++qr<_i;)ai=jt[qr],ai>=gr[0]&&ai<=gr[1]&&(Hr=nt[e.bisect(yr,ai,1,bi)-1],Hr.y+=Zr,Hr.push(Ye[qr]));return nt}return He.value=function(Ye){return arguments.length?(se=Ye,He):se},He.range=function(Ye){return arguments.length?(Te=vi(Ye),He):Te},He.bins=function(Ye){return arguments.length?(Ne=typeof Ye=="number"?function(kt){return Wu(kt,Ye)}:vi(Ye),He):Ne},He.frequency=function(Ye){return arguments.length?(X=!!Ye,He):X},He};function zc(X,se){return Wu(X,Math.ceil(Math.log(se.length)/Math.LN2+1))}function Wu(X,se){for(var Te=-1,Ne=+X[0],He=(X[1]-Ne)/se,Ye=[];++Te<=se;)Ye[Te]=He*Te+Ne;return Ye}function Rf(X){return[e.min(X),e.max(X)]}e.layout.pack=function(){var X=e.layout.hierarchy().sort(Xu),se=0,Te=[1,1],Ne;function He(Ye,kt){var nt=X.call(this,Ye,kt),jt=nt[0],gr=Te[0],yr=Te[1],Hr=Ne==null?Math.sqrt:typeof Ne=="function"?Ne:function(){return Ne};if(jt.x=jt.y=0,wc(jt,function(_i){_i.r=+Hr(_i.value)}),wc(jt,ah),se){var qr=se*(Ne?1:Math.max(2*jt.r/gr,2*jt.r/yr))/2;wc(jt,function(_i){_i.r+=qr}),wc(jt,ah),wc(jt,function(_i){_i.r-=qr})}return Tc(jt,gr/2,yr/2,Ne?1:1/Math.max(2*jt.r/gr,2*jt.r/yr)),nt}return He.size=function(Ye){return arguments.length?(Te=Ye,He):Te},He.radius=function(Ye){return arguments.length?(Ne=Ye==null||typeof Ye=="function"?Ye:+Ye,He):Ne},He.padding=function(Ye){return arguments.length?(se=+Ye,He):se},zu(He,X)};function Xu(X,se){return X.value-se.value}function uf(X,se){var Te=X._pack_next;X._pack_next=se,se._pack_prev=X,se._pack_next=Te,Te._pack_prev=se}function Xf(X,se){X._pack_next=se,se._pack_prev=X}function Wl(X,se){var Te=se.x-X.x,Ne=se.y-X.y,He=X.r+se.r;return .999*He*He>Te*Te+Ne*Ne}function ah(X){if(!(se=X.children)||!(qr=se.length))return;var se,Te=1/0,Ne=-1/0,He=1/0,Ye=-1/0,kt,nt,jt,gr,yr,Hr,qr;function _i(ei){Te=Math.min(ei.x-ei.r,Te),Ne=Math.max(ei.x+ei.r,Ne),He=Math.min(ei.y-ei.r,He),Ye=Math.max(ei.y+ei.r,Ye)}if(se.forEach(Zu),kt=se[0],kt.x=-kt.r,kt.y=0,_i(kt),qr>1&&(nt=se[1],nt.x=nt.r,nt.y=0,_i(nt),qr>2))for(jt=se[2],wl(kt,nt,jt),_i(jt),uf(kt,jt),kt._pack_prev=jt,uf(jt,nt),nt=kt._pack_next,gr=3;gr<qr;gr++){wl(kt,nt,jt=se[gr]);var bi=0,Zr=1,ai=1;for(yr=nt._pack_next;yr!==nt;yr=yr._pack_next,Zr++)if(Wl(yr,jt)){bi=1;break}if(bi==1)for(Hr=kt._pack_prev;Hr!==yr._pack_prev&&!Wl(Hr,jt);Hr=Hr._pack_prev,ai++);bi?(Zr<ai||Zr==ai&&nt.r<kt.r?Xf(kt,nt=yr):Xf(kt=Hr,nt),gr--):(uf(kt,jt),nt=jt,_i(jt))}var gi=(Te+Ne)/2,Ii=(He+Ye)/2,Si=0;for(gr=0;gr<qr;gr++)jt=se[gr],jt.x-=gi,jt.y-=Ii,Si=Math.max(Si,jt.r+Math.sqrt(jt.x*jt.x+jt.y*jt.y));X.r=Si,se.forEach(Oc)}function Zu(X){X._pack_next=X._pack_prev=X}function Oc(X){delete X._pack_next,delete X._pack_prev}function Tc(X,se,Te,Ne){var He=X.children;if(X.x=se+=Ne*X.x,X.y=Te+=Ne*X.y,X.r*=Ne,He)for(var Ye=-1,kt=He.length;++Ye<kt;)Tc(He[Ye],se,Te,Ne)}function wl(X,se,Te){var Ne=X.r+Te.r,He=se.x-X.x,Ye=se.y-X.y;if(Ne&&(He||Ye)){var kt=se.r+Te.r,nt=He*He+Ye*Ye;kt*=kt,Ne*=Ne;var jt=.5+(Ne-kt)/(2*nt),gr=Math.sqrt(Math.max(0,2*kt*(Ne+nt)-(Ne-=nt)*Ne-kt*kt))/(2*nt);Te.x=X.x+jt*He+gr*Ye,Te.y=X.y+jt*Ye-gr*He}else Te.x=X.x+Ne,Te.y=X.y}e.layout.tree=function(){var X=e.layout.hierarchy().sort(null).value(null),se=vu,Te=[1,1],Ne=null;function He(yr,Hr){var qr=X.call(this,yr,Hr),_i=qr[0],bi=Ye(_i);if(wc(bi,kt),bi.parent.m=-bi.z,Fc(bi,nt),Ne)Fc(_i,gr);else{var Zr=_i,ai=_i,gi=_i;Fc(_i,function(Ln){Ln.x<Zr.x&&(Zr=Ln),Ln.x>ai.x&&(ai=Ln),Ln.depth>gi.depth&&(gi=Ln)});var Ii=se(Zr,ai)/2-Zr.x,Si=Te[0]/(ai.x+se(ai,Zr)/2+Ii),ei=Te[1]/(gi.depth||1);Fc(_i,function(Ln){Ln.x=(Ln.x+Ii)*Si,Ln.y=Ln.depth*ei})}return qr}function Ye(yr){for(var Hr={A:null,children:[yr]},qr=[Hr],_i;(_i=qr.pop())!=null;)for(var bi=_i.children,Zr,ai=0,gi=bi.length;ai<gi;++ai)qr.push((bi[ai]=Zr={_:bi[ai],parent:_i,children:(Zr=bi[ai].children)&&Zr.slice()||[],A:null,a:null,z:0,m:0,c:0,s:0,t:null,i:ai}).a=Zr);return Hr.children[0]}function kt(yr){var Hr=yr.children,qr=yr.parent.children,_i=yr.i?qr[yr.i-1]:null;if(Hr.length){Bc(yr);var bi=(Hr[0].z+Hr[Hr.length-1].z)/2;_i?(yr.z=_i.z+se(yr._,_i._),yr.m=yr.z-bi):yr.z=bi}else _i&&(yr.z=_i.z+se(yr._,_i._));yr.parent.A=jt(yr,_i,yr.parent.A||qr[0])}function nt(yr){yr._.x=yr.z+yr.parent.m,yr.m+=yr.parent.m}function jt(yr,Hr,qr){if(Hr){for(var _i=yr,bi=yr,Zr=Hr,ai=_i.parent.children[0],gi=_i.m,Ii=bi.m,Si=Zr.m,ei=ai.m,Ln;Zr=cf(Zr),_i=qc(_i),Zr&&_i;)ai=qc(ai),bi=cf(bi),bi.a=yr,Ln=Zr.z+Si-_i.z-gi+se(Zr._,_i._),Ln>0&&(fc(At(Zr,yr,qr),yr,Ln),gi+=Ln,Ii+=Ln),Si+=Zr.m,gi+=_i.m,ei+=ai.m,Ii+=bi.m;Zr&&!cf(bi)&&(bi.t=Zr,bi.m+=Si-Ii),_i&&!qc(ai)&&(ai.t=_i,ai.m+=gi-ei,qr=yr)}return qr}function gr(yr){yr.x*=Te[0],yr.y=yr.depth*Te[1]}return He.separation=function(yr){return arguments.length?(se=yr,He):se},He.size=function(yr){return arguments.length?(Ne=(Te=yr)==null?gr:null,He):Ne?null:Te},He.nodeSize=function(yr){return arguments.length?(Ne=(Te=yr)==null?null:gr,He):Ne?Te:null},zu(He,X)};function vu(X,se){return X.parent==se.parent?1:2}function qc(X){var se=X.children;return se.length?se[0]:X.t}function cf(X){var se=X.children,Te;return(Te=se.length)?se[Te-1]:X.t}function fc(X,se,Te){var Ne=Te/(se.i-X.i);se.c-=Ne,se.s+=Te,X.c+=Ne,se.z+=Te,se.m+=Te}function Bc(X){for(var se=0,Te=0,Ne=X.children,He=Ne.length,Ye;--He>=0;)Ye=Ne[He],Ye.z+=se,Ye.m+=se,se+=Ye.s+(Te+=Ye.c)}function At(X,se,Te){return X.a.parent===se.parent?X.a:Te}e.layout.cluster=function(){var X=e.layout.hierarchy().sort(null).value(null),se=vu,Te=[1,1],Ne=!1;function He(Ye,kt){var nt=X.call(this,Ye,kt),jt=nt[0],gr,yr=0;wc(jt,function(Zr){var ai=Zr.children;ai&&ai.length?(Zr.x=Cr(ai),Zr.y=Wt(ai)):(Zr.x=gr?yr+=se(Zr,gr):0,Zr.y=0,gr=Zr)});var Hr=Ar(jt),qr=Kr(jt),_i=Hr.x-se(Hr,qr)/2,bi=qr.x+se(qr,Hr)/2;return wc(jt,Ne?function(Zr){Zr.x=(Zr.x-jt.x)*Te[0],Zr.y=(jt.y-Zr.y)*Te[1]}:function(Zr){Zr.x=(Zr.x-_i)/(bi-_i)*Te[0],Zr.y=(1-(jt.y?Zr.y/jt.y:1))*Te[1]}),nt}return He.separation=function(Ye){return arguments.length?(se=Ye,He):se},He.size=function(Ye){return arguments.length?(Ne=(Te=Ye)==null,He):Ne?null:Te},He.nodeSize=function(Ye){return arguments.length?(Ne=(Te=Ye)!=null,He):Ne?Te:null},zu(He,X)};function Wt(X){return 1+e.max(X,function(se){return se.y})}function Cr(X){return X.reduce(function(se,Te){return se+Te.x},0)/X.length}function Ar(X){var se=X.children;return se&&se.length?Ar(se[0]):X}function Kr(X){var se=X.children,Te;return se&&(Te=se.length)?Kr(se[Te-1]):X}e.layout.treemap=function(){var X=e.layout.hierarchy(),se=Math.round,Te=[1,1],Ne=null,He=ki,Ye=!1,kt,nt="squarify",jt=.5*(1+Math.sqrt(5));function gr(Zr,ai){for(var gi=-1,Ii=Zr.length,Si,ei;++gi<Ii;)ei=(Si=Zr[gi]).value*(ai<0?0:ai),Si.area=isNaN(ei)||ei<=0?0:ei}function yr(Zr){var ai=Zr.children;if(ai&&ai.length){var gi=He(Zr),Ii=[],Si=ai.slice(),ei,Ln=1/0,En,Un=nt==="slice"?gi.dx:nt==="dice"?gi.dy:nt==="slice-dice"?Zr.depth&1?gi.dy:gi.dx:Math.min(gi.dx,gi.dy),ia;for(gr(Si,gi.dx*gi.dy/Zr.value),Ii.area=0;(ia=Si.length)>0;)Ii.push(ei=Si[ia-1]),Ii.area+=ei.area,nt!=="squarify"||(En=qr(Ii,Un))<=Ln?(Si.pop(),Ln=En):(Ii.area-=Ii.pop().area,_i(Ii,Un,gi,!1),Un=Math.min(gi.dx,gi.dy),Ii.length=Ii.area=0,Ln=1/0);Ii.length&&(_i(Ii,Un,gi,!0),Ii.length=Ii.area=0),ai.forEach(yr)}}function Hr(Zr){var ai=Zr.children;if(ai&&ai.length){var gi=He(Zr),Ii=ai.slice(),Si,ei=[];for(gr(Ii,gi.dx*gi.dy/Zr.value),ei.area=0;Si=Ii.pop();)ei.push(Si),ei.area+=Si.area,Si.z!=null&&(_i(ei,Si.z?gi.dx:gi.dy,gi,!Ii.length),ei.length=ei.area=0);ai.forEach(Hr)}}function qr(Zr,ai){for(var gi=Zr.area,Ii,Si=0,ei=1/0,Ln=-1,En=Zr.length;++Ln<En;)(Ii=Zr[Ln].area)&&(Ii<ei&&(ei=Ii),Ii>Si&&(Si=Ii));return gi*=gi,ai*=ai,gi?Math.max(ai*Si*jt/gi,gi/(ai*ei*jt)):1/0}function _i(Zr,ai,gi,Ii){var Si=-1,ei=Zr.length,Ln=gi.x,En=gi.y,Un=ai?se(Zr.area/ai):0,ia;if(ai==gi.dx){for((Ii||Un>gi.dy)&&(Un=gi.dy);++Si<ei;)ia=Zr[Si],ia.x=Ln,ia.y=En,ia.dy=Un,Ln+=ia.dx=Math.min(gi.x+gi.dx-Ln,Un?se(ia.area/Un):0);ia.z=!0,ia.dx+=gi.x+gi.dx-Ln,gi.y+=Un,gi.dy-=Un}else{for((Ii||Un>gi.dx)&&(Un=gi.dx);++Si<ei;)ia=Zr[Si],ia.x=Ln,ia.y=En,ia.dx=Un,En+=ia.dy=Math.min(gi.y+gi.dy-En,Un?se(ia.area/Un):0);ia.z=!1,ia.dy+=gi.y+gi.dy-En,gi.x+=Un,gi.dx-=Un}}function bi(Zr){var ai=kt||X(Zr),gi=ai[0];return gi.x=gi.y=0,gi.value?(gi.dx=Te[0],gi.dy=Te[1]):gi.dx=gi.dy=0,kt&&X.revalue(gi),gr([gi],gi.dx*gi.dy/gi.value),(kt?Hr:yr)(gi),Ye&&(kt=ai),ai}return bi.size=function(Zr){return arguments.length?(Te=Zr,bi):Te},bi.padding=function(Zr){if(!arguments.length)return Ne;function ai(Si){var ei=Zr.call(bi,Si,Si.depth);return ei==null?ki(Si):Xi(Si,typeof ei=="number"?[ei,ei,ei,ei]:ei)}function gi(Si){return Xi(Si,Zr)}var Ii;return He=(Ne=Zr)==null?ki:(Ii=typeof Zr)=="function"?ai:(Ii==="number"&&(Zr=[Zr,Zr,Zr,Zr]),gi),bi},bi.round=function(Zr){return arguments.length?(se=Zr?Math.round:Number,bi):se!=Number},bi.sticky=function(Zr){return arguments.length?(Ye=Zr,kt=null,bi):Ye},bi.ratio=function(Zr){return arguments.length?(jt=Zr,bi):jt},bi.mode=function(Zr){return arguments.length?(nt=Zr+"",bi):nt},zu(bi,X)};function ki(X){return{x:X.x,y:X.y,dx:X.dx,dy:X.dy}}function Xi(X,se){var Te=X.x+se[3],Ne=X.y+se[0],He=X.dx-se[1]-se[3],Ye=X.dy-se[0]-se[2];return He<0&&(Te+=He/2,He=0),Ye<0&&(Ne+=Ye/2,Ye=0),{x:Te,y:Ne,dx:He,dy:Ye}}e.random={normal:function(X,se){var Te=arguments.length;return Te<2&&(se=1),Te<1&&(X=0),function(){var Ne,He,Ye;do Ne=Math.random()*2-1,He=Math.random()*2-1,Ye=Ne*Ne+He*He;while(!Ye||Ye>1);return X+se*Ne*Math.sqrt(-2*Math.log(Ye)/Ye)}},logNormal:function(){var X=e.random.normal.apply(e,arguments);return function(){return Math.exp(X())}},bates:function(X){var se=e.random.irwinHall(X);return function(){return se()/X}},irwinHall:function(X){return function(){for(var se=0,Te=0;Te<X;Te++)se+=Math.random();return se}}},e.scale={};function dn(X){var se=X[0],Te=X[X.length-1];return se<Te?[se,Te]:[Te,se]}function wn(X){return X.rangeExtent?X.rangeExtent():dn(X.range())}function Nn(X,se,Te,Ne){var He=Te(X[0],X[1]),Ye=Ne(se[0],se[1]);return function(kt){return Ye(He(kt))}}function Yi(X,se){var Te=0,Ne=X.length-1,He=X[Te],Ye=X[Ne],kt;return Ye<He&&(kt=Te,Te=Ne,Ne=kt,kt=He,He=Ye,Ye=kt),X[Te]=se.floor(He),X[Ne]=se.ceil(Ye),X}function Qi(X){return X?{floor:function(se){return Math.floor(se/X)*X},ceil:function(se){return Math.ceil(se/X)*X}}:on}var on={floor:G,ceil:G};function Fi(X,se,Te,Ne){var He=[],Ye=[],kt=0,nt=Math.min(X.length,se.length)-1;for(X[nt]<X[0]&&(X=X.slice().reverse(),se=se.slice().reverse());++kt<=nt;)He.push(Te(X[kt-1],X[kt])),Ye.push(Ne(se[kt-1],se[kt]));return function(jt){var gr=e.bisect(X,jt,1,nt)-1;return Ye[gr](He[gr](jt))}}e.scale.linear=function(){return $n([0,1],[0,1],xl,!1)};function $n(X,se,Te,Ne){var He,Ye;function kt(){var jt=Math.min(X.length,se.length)>2?Fi:Nn,gr=Ne?Mu:Th;return He=jt(X,se,gr,Te),Ye=jt(se,X,gr,xl),nt}function nt(jt){return He(jt)}return nt.invert=function(jt){return Ye(jt)},nt.domain=function(jt){return arguments.length?(X=jt.map(Number),kt()):X},nt.range=function(jt){return arguments.length?(se=jt,kt()):se},nt.rangeRound=function(jt){return nt.range(jt).interpolate(ju)},nt.clamp=function(jt){return arguments.length?(Ne=jt,kt()):Ne},nt.interpolate=function(jt){return arguments.length?(Te=jt,kt()):Te},nt.ticks=function(jt){return Na(X,jt)},nt.tickFormat=function(jt,gr){return d3_scale_linearTickFormat(X,jt,gr)},nt.nice=function(jt){return Ra(X,jt),kt()},nt.copy=function(){return $n(X,se,Te,Ne)},kt()}function Ca(X,se){return e.rebind(X,se,"range","rangeRound","interpolate","clamp")}function Ra(X,se){return Yi(X,Qi(La(X,se)[2])),Yi(X,Qi(La(X,se)[2])),X}function La(X,se){se==null&&(se=10);var Te=dn(X),Ne=Te[1]-Te[0],He=Math.pow(10,Math.floor(Math.log(Ne/se)/Math.LN10)),Ye=se/Ne*He;return Ye<=.15?He*=10:Ye<=.35?He*=5:Ye<=.75&&(He*=2),Te[0]=Math.ceil(Te[0]/He)*He,Te[1]=Math.floor(Te[1]/He)*He+He*.5,Te[2]=He,Te}function Na(X,se){return e.range.apply(e,La(X,se))}var Yn={s:1,g:1,p:1,r:1,e:1};function Dn(X){return-Math.floor(Math.log(X)/Math.LN10+.01)}function Ka(X,se){var Te=Dn(se[2]);return X in Yn?Math.abs(Te-Dn(Math.max(p(se[0]),p(se[1]))))+ +(X!=="e"):Te-(X==="%")*2}e.scale.log=function(){return bo(e.scale.linear().domain([0,1]),10,!0,[1,10])};function bo(X,se,Te,Ne){function He(nt){return(Te?Math.log(nt<0?0:nt):-Math.log(nt>0?0:-nt))/Math.log(se)}function Ye(nt){return Te?Math.pow(se,nt):-Math.pow(se,-nt)}function kt(nt){return X(He(nt))}return kt.invert=function(nt){return Ye(X.invert(nt))},kt.domain=function(nt){return arguments.length?(Te=nt[0]>=0,X.domain((Ne=nt.map(Number)).map(He)),kt):Ne},kt.base=function(nt){return arguments.length?(se=+nt,X.domain(Ne.map(He)),kt):se},kt.nice=function(){var nt=Yi(Ne.map(He),Te?Math:Xo);return X.domain(nt),Ne=nt.map(Ye),kt},kt.ticks=function(){var nt=dn(Ne),jt=[],gr=nt[0],yr=nt[1],Hr=Math.floor(He(gr)),qr=Math.ceil(He(yr)),_i=se%1?2:se;if(isFinite(qr-Hr)){if(Te){for(;Hr<qr;Hr++)for(var bi=1;bi<_i;bi++)jt.push(Ye(Hr)*bi);jt.push(Ye(Hr))}else for(jt.push(Ye(Hr));Hr++<qr;)for(var bi=_i-1;bi>0;bi--)jt.push(Ye(Hr)*bi);for(Hr=0;jt[Hr]<gr;Hr++);for(qr=jt.length;jt[qr-1]>yr;qr--);jt=jt.slice(Hr,qr)}return jt},kt.copy=function(){return bo(X.copy(),se,Te,Ne)},Ca(kt,X)}var Xo={floor:function(X){return-Math.ceil(-X)},ceil:function(X){return-Math.floor(-X)}};e.scale.pow=function(){return Ss(e.scale.linear(),1,[0,1])};function Ss(X,se,Te){var Ne=as(se),He=as(1/se);function Ye(kt){return X(Ne(kt))}return Ye.invert=function(kt){return He(X.invert(kt))},Ye.domain=function(kt){return arguments.length?(X.domain((Te=kt.map(Number)).map(Ne)),Ye):Te},Ye.ticks=function(kt){return Na(Te,kt)},Ye.tickFormat=function(kt,nt){return d3_scale_linearTickFormat(Te,kt,nt)},Ye.nice=function(kt){return Ye.domain(Ra(Te,kt))},Ye.exponent=function(kt){return arguments.length?(Ne=as(se=kt),He=as(1/se),X.domain(Te.map(Ne)),Ye):se},Ye.copy=function(){return Ss(X.copy(),se,Te)},Ca(Ye,X)}function as(X){return function(se){return se<0?-Math.pow(-se,X):Math.pow(se,X)}}e.scale.sqrt=function(){return e.scale.pow().exponent(.5)},e.scale.ordinal=function(){return ws([],{t:"range",a:[[]]})};function ws(X,se){var Te,Ne,He;function Ye(nt){return Ne[((Te.get(nt)||(se.t==="range"?Te.set(nt,X.push(nt)):NaN))-1)%Ne.length]}function kt(nt,jt){return e.range(X.length).map(function(gr){return nt+jt*gr})}return Ye.domain=function(nt){if(!arguments.length)return X;X=[],Te=new S;for(var jt=-1,gr=nt.length,yr;++jt<gr;)Te.has(yr=nt[jt])||Te.set(yr,X.push(yr));return Ye[se.t].apply(Ye,se.a)},Ye.range=function(nt){return arguments.length?(Ne=nt,He=0,se={t:"range",a:arguments},Ye):Ne},Ye.rangePoints=function(nt,jt){arguments.length<2&&(jt=0);var gr=nt[0],yr=nt[1],Hr=X.length<2?(gr=(gr+yr)/2,0):(yr-gr)/(X.length-1+jt);return Ne=kt(gr+Hr*jt/2,Hr),He=0,se={t:"rangePoints",a:arguments},Ye},Ye.rangeRoundPoints=function(nt,jt){arguments.length<2&&(jt=0);var gr=nt[0],yr=nt[1],Hr=X.length<2?(gr=yr=Math.round((gr+yr)/2),0):(yr-gr)/(X.length-1+jt)|0;return Ne=kt(gr+Math.round(Hr*jt/2+(yr-gr-(X.length-1+jt)*Hr)/2),Hr),He=0,se={t:"rangeRoundPoints",a:arguments},Ye},Ye.rangeBands=function(nt,jt,gr){arguments.length<2&&(jt=0),arguments.length<3&&(gr=jt);var yr=nt[1]<nt[0],Hr=nt[yr-0],qr=nt[1-yr],_i=(qr-Hr)/(X.length-jt+2*gr);return Ne=kt(Hr+_i*gr,_i),yr&&Ne.reverse(),He=_i*(1-jt),se={t:"rangeBands",a:arguments},Ye},Ye.rangeRoundBands=function(nt,jt,gr){arguments.length<2&&(jt=0),arguments.length<3&&(gr=jt);var yr=nt[1]<nt[0],Hr=nt[yr-0],qr=nt[1-yr],_i=Math.floor((qr-Hr)/(X.length-jt+2*gr));return Ne=kt(Hr+Math.round((qr-Hr-(X.length-jt)*_i)/2),_i),yr&&Ne.reverse(),He=Math.round(_i*(1-jt)),se={t:"rangeRoundBands",a:arguments},Ye},Ye.rangeBand=function(){return He},Ye.rangeExtent=function(){return dn(se.a[0])},Ye.copy=function(){return ws(X,se)},Ye.domain(X)}e.scale.category10=function(){return e.scale.ordinal().range(Ho)},e.scale.category20=function(){return e.scale.ordinal().range(ml)},e.scale.category20b=function(){return e.scale.ordinal().range(Ws)},e.scale.category20c=function(){return e.scale.ordinal().range(Ls)};var Ho=[2062260,16744206,2924588,14034728,9725885,9197131,14907330,8355711,12369186,1556175].map(Wo),ml=[2062260,11454440,16744206,16759672,2924588,10018698,14034728,16750742,9725885,12955861,9197131,12885140,14907330,16234194,8355711,13092807,12369186,14408589,1556175,10410725].map(Wo),Ws=[3750777,5395619,7040719,10264286,6519097,9216594,11915115,13556636,9202993,12426809,15186514,15190932,8666169,11356490,14049643,15177372,8077683,10834324,13528509,14589654].map(Wo),Ls=[3244733,7057110,10406625,13032431,15095053,16616764,16625259,16634018,3253076,7652470,10607003,13101504,7695281,10394312,12369372,14342891,6513507,9868950,12434877,14277081].map(Wo);e.scale.quantile=function(){return va([],[])};function va(X,se){var Te;function Ne(){var Ye=0,kt=se.length;for(Te=[];++Ye<kt;)Te[Ye-1]=e.quantile(X,Ye/kt);return He}function He(Ye){if(!isNaN(Ye=+Ye))return se[e.bisect(Te,Ye)]}return He.domain=function(Ye){return arguments.length?(X=Ye.map(h).filter(d).sort(f),Ne()):X},He.range=function(Ye){return arguments.length?(se=Ye,Ne()):se},He.quantiles=function(){return Te},He.invertExtent=function(Ye){return Ye=se.indexOf(Ye),Ye<0?[NaN,NaN]:[Ye>0?Te[Ye-1]:X[0],Ye<Te.length?Te[Ye]:X[X.length-1]]},He.copy=function(){return va(X,se)},Ne()}e.scale.quantize=function(){return no(0,1,[0,1])};function no(X,se,Te){var Ne,He;function Ye(nt){return Te[Math.max(0,Math.min(He,Math.floor(Ne*(nt-X))))]}function kt(){return Ne=Te.length/(se-X),He=Te.length-1,Ye}return Ye.domain=function(nt){return arguments.length?(X=+nt[0],se=+nt[nt.length-1],kt()):[X,se]},Ye.range=function(nt){return arguments.length?(Te=nt,kt()):Te},Ye.invertExtent=function(nt){return nt=Te.indexOf(nt),nt=nt<0?NaN:nt/Ne+X,[nt,nt+1/Ne]},Ye.copy=function(){return no(X,se,Te)},kt()}e.scale.threshold=function(){return ys([.5],[0,1])};function ys(X,se){function Te(Ne){if(Ne<=Ne)return se[e.bisect(X,Ne)]}return Te.domain=function(Ne){return arguments.length?(X=Ne,Te):X},Te.range=function(Ne){return arguments.length?(se=Ne,Te):se},Te.invertExtent=function(Ne){return Ne=se.indexOf(Ne),[X[Ne-1],X[Ne]]},Te.copy=function(){return ys(X,se)},Te}e.scale.identity=function(){return rs([0,1])};function rs(X){function se(Te){return+Te}return se.invert=se,se.domain=se.range=function(Te){return arguments.length?(X=Te.map(se),se):X},se.ticks=function(Te){return Na(X,Te)},se.tickFormat=function(Te,Ne){return d3_scale_linearTickFormat(X,Te,Ne)},se.copy=function(){return rs(X)},se}e.svg={};function $l(){return 0}e.svg.arc=function(){var X=Yu,se=Nc,Te=$l,Ne=Cu,He=pu,Ye=Uc,kt=xu;function nt(){var gr=Math.max(0,+X.apply(this,arguments)),yr=Math.max(0,+se.apply(this,arguments)),Hr=He.apply(this,arguments)-xe,qr=Ye.apply(this,arguments)-xe,_i=Math.abs(qr-Hr),bi=Hr>qr?0:1;if(yr<gr&&(Zr=yr,yr=gr,gr=Zr),_i>=Ie)return jt(yr,bi)+(gr?jt(gr,1-bi):"")+"Z";var Zr,ai,gi,Ii,Si=0,ei=0,Ln,En,Un,ia,Ea,Ia,yo,Da,go=[];if((Ii=(+kt.apply(this,arguments)||0)/2)&&(gi=Ne===Cu?Math.sqrt(gr*gr+yr*yr):+Ne.apply(this,arguments),bi||(ei*=-1),yr&&(ei=ii(gi/yr*Math.sin(Ii))),gr&&(Si=ii(gi/gr*Math.sin(Ii)))),yr){Ln=yr*Math.cos(Hr+ei),En=yr*Math.sin(Hr+ei),Un=yr*Math.cos(qr-ei),ia=yr*Math.sin(qr-ei);var Is=Math.abs(qr-Hr-2*ei)<=tt?0:1;if(ei&&Ac(Ln,En,Un,ia)===bi^Is){var Ms=(Hr+qr)/2;Ln=yr*Math.cos(Ms),En=yr*Math.sin(Ms),Un=ia=null}}else Ln=En=0;if(gr){Ea=gr*Math.cos(qr-Si),Ia=gr*Math.sin(qr-Si),yo=gr*Math.cos(Hr+Si),Da=gr*Math.sin(Hr+Si);var Xs=Math.abs(Hr-qr+2*Si)<=tt?0:1;if(Si&&Ac(Ea,Ia,yo,Da)===1-bi^Xs){var Gn=(Hr+qr)/2;Ea=gr*Math.cos(Gn),Ia=gr*Math.sin(Gn),yo=Da=null}}else Ea=Ia=0;if(_i>Je&&(Zr=Math.min(Math.abs(yr-gr)/2,+Te.apply(this,arguments)))>.001){ai=gr<yr^bi?0:1;var ja=Zr,Fo=Zr;if(_i<tt){var Uo=yo==null?[Ea,Ia]:Un==null?[Ln,En]:il([Ln,En],[yo,Da],[Un,ia],[Ea,Ia]),$s=Ln-Uo[0],Sl=En-Uo[1],bu=Un-Uo[0],dl=ia-Uo[1],Sc=1/Math.sin(Math.acos(($s*bu+Sl*dl)/(Math.sqrt($s*$s+Sl*Sl)*Math.sqrt(bu*bu+dl*dl)))/2),Me=Math.sqrt(Uo[0]*Uo[0]+Uo[1]*Uo[1]);Fo=Math.min(Zr,(gr-Me)/(Sc-1)),ja=Math.min(Zr,(yr-Me)/(Sc+1))}if(Un!=null){var bt=Ua(yo==null?[Ea,Ia]:[yo,Da],[Ln,En],yr,ja,bi),zt=Ua([Un,ia],[Ea,Ia],yr,ja,bi);Zr===ja?go.push("M",bt[0],"A",ja,",",ja," 0 0,",ai," ",bt[1],"A",yr,",",yr," 0 ",1-bi^Ac(bt[1][0],bt[1][1],zt[1][0],zt[1][1]),",",bi," ",zt[1],"A",ja,",",ja," 0 0,",ai," ",zt[0]):go.push("M",bt[0],"A",ja,",",ja," 0 1,",ai," ",zt[0])}else go.push("M",Ln,",",En);if(yo!=null){var Rr=Ua([Ln,En],[yo,Da],gr,-Fo,bi),jr=Ua([Ea,Ia],Un==null?[Ln,En]:[Un,ia],gr,-Fo,bi);Zr===Fo?go.push("L",jr[0],"A",Fo,",",Fo," 0 0,",ai," ",jr[1],"A",gr,",",gr," 0 ",bi^Ac(jr[1][0],jr[1][1],Rr[1][0],Rr[1][1]),",",1-bi," ",Rr[1],"A",Fo,",",Fo," 0 0,",ai," ",Rr[0]):go.push("L",jr[0],"A",Fo,",",Fo," 0 0,",ai," ",Rr[0])}else go.push("L",Ea,",",Ia)}else go.push("M",Ln,",",En),Un!=null&&go.push("A",yr,",",yr," 0 ",Is,",",bi," ",Un,",",ia),go.push("L",Ea,",",Ia),yo!=null&&go.push("A",gr,",",gr," 0 ",Xs,",",1-bi," ",yo,",",Da);return go.push("Z"),go.join("")}function jt(gr,yr){return"M0,"+gr+"A"+gr+","+gr+" 0 1,"+yr+" 0,"+-gr+"A"+gr+","+gr+" 0 1,"+yr+" 0,"+gr}return nt.innerRadius=function(gr){return arguments.length?(X=vi(gr),nt):X},nt.outerRadius=function(gr){return arguments.length?(se=vi(gr),nt):se},nt.cornerRadius=function(gr){return arguments.length?(Te=vi(gr),nt):Te},nt.padRadius=function(gr){return arguments.length?(Ne=gr==Cu?Cu:vi(gr),nt):Ne},nt.startAngle=function(gr){return arguments.length?(He=vi(gr),nt):He},nt.endAngle=function(gr){return arguments.length?(Ye=vi(gr),nt):Ye},nt.padAngle=function(gr){return arguments.length?(kt=vi(gr),nt):kt},nt.centroid=function(){var gr=(+X.apply(this,arguments)+ +se.apply(this,arguments))/2,yr=(+He.apply(this,arguments)+ +Ye.apply(this,arguments))/2-xe;return[Math.cos(yr)*gr,Math.sin(yr)*gr]},nt};var Cu="auto";function Yu(X){return X.innerRadius}function Nc(X){return X.outerRadius}function pu(X){return X.startAngle}function Uc(X){return X.endAngle}function xu(X){return X&&X.padAngle}function Ac(X,se,Te,Ne){return(X-Te)*se-(se-Ne)*X>0?0:1}function Ua(X,se,Te,Ne,He){var Ye=X[0]-se[0],kt=X[1]-se[1],nt=(He?Ne:-Ne)/Math.sqrt(Ye*Ye+kt*kt),jt=nt*kt,gr=-nt*Ye,yr=X[0]+jt,Hr=X[1]+gr,qr=se[0]+jt,_i=se[1]+gr,bi=(yr+qr)/2,Zr=(Hr+_i)/2,ai=qr-yr,gi=_i-Hr,Ii=ai*ai+gi*gi,Si=Te-Ne,ei=yr*_i-qr*Hr,Ln=(gi<0?-1:1)*Math.sqrt(Math.max(0,Si*Si*Ii-ei*ei)),En=(ei*gi-ai*Ln)/Ii,Un=(-ei*ai-gi*Ln)/Ii,ia=(ei*gi+ai*Ln)/Ii,Ea=(-ei*ai+gi*Ln)/Ii,Ia=En-bi,yo=Un-Zr,Da=ia-bi,go=Ea-Zr;return Ia*Ia+yo*yo>Da*Da+go*go&&(En=ia,Un=Ea),[[En-jt,Un-gr],[En*Te/Si,Un*Te/Si]]}function oo(){return!0}function Vc(X){var se=Ds,Te=Fs,Ne=oo,He=Ku,Ye=He.key,kt=.7;function nt(jt){var gr=[],yr=[],Hr=-1,qr=jt.length,_i,bi=vi(se),Zr=vi(Te);function ai(){gr.push("M",He(X(yr),kt))}for(;++Hr<qr;)Ne.call(this,_i=jt[Hr],Hr)?yr.push([+bi.call(this,_i,Hr),+Zr.call(this,_i,Hr)]):yr.length&&(ai(),yr=[]);return yr.length&&ai(),gr.length?gr.join(""):null}return nt.x=function(jt){return arguments.length?(se=jt,nt):se},nt.y=function(jt){return arguments.length?(Te=jt,nt):Te},nt.defined=function(jt){return arguments.length?(Ne=jt,nt):Ne},nt.interpolate=function(jt){return arguments.length?(typeof jt=="function"?Ye=He=jt:Ye=(He=hc.get(jt)||Ku).key,nt):Ye},nt.tension=function(jt){return arguments.length?(kt=jt,nt):kt},nt}e.svg.line=function(){return Vc(G)};var hc=e.map({linear:Ku,"linear-closed":ue,step:w,"step-before":B,"step-after":Q,basis:Tt,"basis-open":Yt,"basis-closed":Kt,bundle:xr,cardinal:qe,"cardinal-open":ee,"cardinal-closed":le,monotone:Ft});hc.forEach(function(X,se){se.key=X,se.closed=/-closed$/.test(X)});function Ku(X){return X.length>1?X.join("L"):X+"Z"}function ue(X){return X.join("L")+"Z"}function w(X){for(var se=0,Te=X.length,Ne=X[0],He=[Ne[0],",",Ne[1]];++se<Te;)He.push("H",(Ne[0]+(Ne=X[se])[0])/2,"V",Ne[1]);return Te>1&&He.push("H",Ne[0]),He.join("")}function B(X){for(var se=0,Te=X.length,Ne=X[0],He=[Ne[0],",",Ne[1]];++se<Te;)He.push("V",(Ne=X[se])[1],"H",Ne[0]);return He.join("")}function Q(X){for(var se=0,Te=X.length,Ne=X[0],He=[Ne[0],",",Ne[1]];++se<Te;)He.push("H",(Ne=X[se])[0],"V",Ne[1]);return He.join("")}function ee(X,se){return X.length<4?Ku(X):X[1]+Xe(X.slice(1,-1),ot(X,se))}function le(X,se){return X.length<3?ue(X):X[0]+Xe((X.push(X[0]),X),ot([X[X.length-2]].concat(X,[X[1]]),se))}function qe(X,se){return X.length<3?Ku(X):X[0]+Xe(X,ot(X,se))}function Xe(X,se){if(se.length<1||X.length!=se.length&&X.length!=se.length+2)return Ku(X);var Te=X.length!=se.length,Ne="",He=X[0],Ye=X[1],kt=se[0],nt=kt,jt=1;if(Te&&(Ne+="Q"+(Ye[0]-kt[0]*2/3)+","+(Ye[1]-kt[1]*2/3)+","+Ye[0]+","+Ye[1],He=X[1],jt=2),se.length>1){nt=se[1],Ye=X[jt],jt++,Ne+="C"+(He[0]+kt[0])+","+(He[1]+kt[1])+","+(Ye[0]-nt[0])+","+(Ye[1]-nt[1])+","+Ye[0]+","+Ye[1];for(var gr=2;gr<se.length;gr++,jt++)Ye=X[jt],nt=se[gr],Ne+="S"+(Ye[0]-nt[0])+","+(Ye[1]-nt[1])+","+Ye[0]+","+Ye[1]}if(Te){var yr=X[jt];Ne+="Q"+(Ye[0]+nt[0]*2/3)+","+(Ye[1]+nt[1]*2/3)+","+yr[0]+","+yr[1]}return Ne}function ot(X,se){for(var Te=[],Ne=(1-se)/2,He,Ye=X[0],kt=X[1],nt=1,jt=X.length;++nt<jt;)He=Ye,Ye=kt,kt=X[nt],Te.push([Ne*(kt[0]-He[0]),Ne*(kt[1]-He[1])]);return Te}function Tt(X){if(X.length<3)return Ku(X);var se=1,Te=X.length,Ne=X[0],He=Ne[0],Ye=Ne[1],kt=[He,He,He,(Ne=X[1])[0]],nt=[Ye,Ye,Ye,Ne[1]],jt=[He,",",Ye,"L",Ir(De,kt),",",Ir(De,nt)];for(X.push(X[Te-1]);++se<=Te;)Ne=X[se],kt.shift(),kt.push(Ne[0]),nt.shift(),nt.push(Ne[1]),Be(jt,kt,nt);return X.pop(),jt.push("L",Ne),jt.join("")}function Yt(X){if(X.length<4)return Ku(X);for(var se=[],Te=-1,Ne=X.length,He,Ye=[0],kt=[0];++Te<3;)He=X[Te],Ye.push(He[0]),kt.push(He[1]);for(se.push(Ir(De,Ye)+","+Ir(De,kt)),--Te;++Te<Ne;)He=X[Te],Ye.shift(),Ye.push(He[0]),kt.shift(),kt.push(He[1]),Be(se,Ye,kt);return se.join("")}function Kt(X){for(var se,Te=-1,Ne=X.length,He=Ne+4,Ye,kt=[],nt=[];++Te<4;)Ye=X[Te%Ne],kt.push(Ye[0]),nt.push(Ye[1]);for(se=[Ir(De,kt),",",Ir(De,nt)],--Te;++Te<He;)Ye=X[Te%Ne],kt.shift(),kt.push(Ye[0]),nt.shift(),nt.push(Ye[1]),Be(se,kt,nt);return se.join("")}function xr(X,se){var Te=X.length-1;if(Te)for(var Ne=X[0][0],He=X[0][1],Ye=X[Te][0]-Ne,kt=X[Te][1]-He,nt=-1,jt,gr;++nt<=Te;)jt=X[nt],gr=nt/Te,jt[0]=se*jt[0]+(1-se)*(Ne+gr*Ye),jt[1]=se*jt[1]+(1-se)*(He+gr*kt);return Tt(X)}function Ir(X,se){return X[0]*se[0]+X[1]*se[1]+X[2]*se[2]+X[3]*se[3]}var ve=[0,2/3,1/3,0],be=[0,1/3,2/3,0],De=[0,1/6,2/3,1/6];function Be(X,se,Te){X.push("C",Ir(ve,se),",",Ir(ve,Te),",",Ir(be,se),",",Ir(be,Te),",",Ir(De,se),",",Ir(De,Te))}function et(X,se){return(se[1]-X[1])/(se[0]-X[0])}function We(X){for(var se=0,Te=X.length-1,Ne=[],He=X[0],Ye=X[1],kt=Ne[0]=et(He,Ye);++se<Te;)Ne[se]=(kt+(kt=et(He=Ye,Ye=X[se+1])))/2;return Ne[se]=kt,Ne}function it(X){for(var se=[],Te,Ne,He,Ye,kt=We(X),nt=-1,jt=X.length-1;++nt<jt;)Te=et(X[nt],X[nt+1]),p(Te)<Je?kt[nt]=kt[nt+1]=0:(Ne=kt[nt]/Te,He=kt[nt+1]/Te,Ye=Ne*Ne+He*He,Ye>9&&(Ye=Te*3/Math.sqrt(Ye),kt[nt]=Ye*Ne,kt[nt+1]=Ye*He));for(nt=-1;++nt<=jt;)Ye=(X[Math.min(jt,nt+1)][0]-X[Math.max(0,nt-1)][0])/(6*(1+kt[nt]*kt[nt])),se.push([Ye||0,kt[nt]*Ye||0]);return se}function Ft(X){return X.length<3?Ku(X):X[0]+Xe(X,it(X))}e.svg.line.radial=function(){var X=Vc(Ht);return X.radius=X.x,delete X.x,X.angle=X.y,delete X.y,X};function Ht(X){for(var se,Te=-1,Ne=X.length,He,Ye;++Te<Ne;)se=X[Te],He=se[0],Ye=se[1]-xe,se[0]=He*Math.cos(Ye),se[1]=He*Math.sin(Ye);return X}function tr(X){var se=Ds,Te=Ds,Ne=0,He=Fs,Ye=oo,kt=Ku,nt=kt.key,jt=kt,gr="L",yr=.7;function Hr(qr){var _i=[],bi=[],Zr=[],ai=-1,gi=qr.length,Ii,Si=vi(se),ei=vi(Ne),Ln=se===Te?function(){return Un}:vi(Te),En=Ne===He?function(){return ia}:vi(He),Un,ia;function Ea(){_i.push("M",kt(X(Zr),yr),gr,jt(X(bi.reverse()),yr),"Z")}for(;++ai<gi;)Ye.call(this,Ii=qr[ai],ai)?(bi.push([Un=+Si.call(this,Ii,ai),ia=+ei.call(this,Ii,ai)]),Zr.push([+Ln.call(this,Ii,ai),+En.call(this,Ii,ai)])):bi.length&&(Ea(),bi=[],Zr=[]);return bi.length&&Ea(),_i.length?_i.join(""):null}return Hr.x=function(qr){return arguments.length?(se=Te=qr,Hr):Te},Hr.x0=function(qr){return arguments.length?(se=qr,Hr):se},Hr.x1=function(qr){return arguments.length?(Te=qr,Hr):Te},Hr.y=function(qr){return arguments.length?(Ne=He=qr,Hr):He},Hr.y0=function(qr){return arguments.length?(Ne=qr,Hr):Ne},Hr.y1=function(qr){return arguments.length?(He=qr,Hr):He},Hr.defined=function(qr){return arguments.length?(Ye=qr,Hr):Ye},Hr.interpolate=function(qr){return arguments.length?(typeof qr=="function"?nt=kt=qr:nt=(kt=hc.get(qr)||Ku).key,jt=kt.reverse||kt,gr=kt.closed?"M":"L",Hr):nt},Hr.tension=function(qr){return arguments.length?(yr=qr,Hr):yr},Hr}B.reverse=Q,Q.reverse=B,e.svg.area=function(){return tr(G)},e.svg.area.radial=function(){var X=tr(Ht);return X.radius=X.x,delete X.x,X.innerRadius=X.x0,delete X.x0,X.outerRadius=X.x1,delete X.x1,X.angle=X.y,delete X.y,X.startAngle=X.y0,delete X.y0,X.endAngle=X.y1,delete X.y1,X};function dr(X){return X.source}function Sr(X){return X.target}e.svg.chord=function(){var X=dr,se=Sr,Te=Or,Ne=pu,He=Uc;function Ye(yr,Hr){var qr=kt(this,X,yr,Hr),_i=kt(this,se,yr,Hr);return"M"+qr.p0+jt(qr.r,qr.p1,qr.a1-qr.a0)+(nt(qr,_i)?gr(qr.r,qr.p1,qr.r,qr.p0):gr(qr.r,qr.p1,_i.r,_i.p0)+jt(_i.r,_i.p1,_i.a1-_i.a0)+gr(_i.r,_i.p1,qr.r,qr.p0))+"Z"}function kt(yr,Hr,qr,_i){var bi=Hr.call(yr,qr,_i),Zr=Te.call(yr,bi,_i),ai=Ne.call(yr,bi,_i)-xe,gi=He.call(yr,bi,_i)-xe;return{r:Zr,a0:ai,a1:gi,p0:[Zr*Math.cos(ai),Zr*Math.sin(ai)],p1:[Zr*Math.cos(gi),Zr*Math.sin(gi)]}}function nt(yr,Hr){return yr.a0==Hr.a0&&yr.a1==Hr.a1}function jt(yr,Hr,qr){return"A"+yr+","+yr+" 0 "+ +(qr>tt)+",1 "+Hr}function gr(yr,Hr,qr,_i){return"Q 0,0 "+_i}return Ye.radius=function(yr){return arguments.length?(Te=vi(yr),Ye):Te},Ye.source=function(yr){return arguments.length?(X=vi(yr),Ye):X},Ye.target=function(yr){return arguments.length?(se=vi(yr),Ye):se},Ye.startAngle=function(yr){return arguments.length?(Ne=vi(yr),Ye):Ne},Ye.endAngle=function(yr){return arguments.length?(He=vi(yr),Ye):He},Ye};function Or(X){return X.radius}e.svg.diagonal=function(){var X=dr,se=Sr,Te=Wr;function Ne(He,Ye){var kt=X.call(this,He,Ye),nt=se.call(this,He,Ye),jt=(kt.y+nt.y)/2,gr=[kt,{x:kt.x,y:jt},{x:nt.x,y:jt},nt];return gr=gr.map(Te),"M"+gr[0]+"C"+gr[1]+" "+gr[2]+" "+gr[3]}return Ne.source=function(He){return arguments.length?(X=vi(He),Ne):X},Ne.target=function(He){return arguments.length?(se=vi(He),Ne):se},Ne.projection=function(He){return arguments.length?(Te=He,Ne):Te},Ne};function Wr(X){return[X.x,X.y]}e.svg.diagonal.radial=function(){var X=e.svg.diagonal(),se=Wr,Te=X.projection;return X.projection=function(Ne){return arguments.length?Te(ni(se=Ne)):se},X};function ni(X){return function(){var se=X.apply(this,arguments),Te=se[0],Ne=se[1]-xe;return[Te*Math.cos(Ne),Te*Math.sin(Ne)]}}e.svg.symbol=function(){var X=cn,se=Pi;function Te(Ne,He){return(Cn.get(X.call(this,Ne,He))||ln)(se.call(this,Ne,He))}return Te.type=function(Ne){return arguments.length?(X=vi(Ne),Te):X},Te.size=function(Ne){return arguments.length?(se=vi(Ne),Te):se},Te};function Pi(){return 64}function cn(){return"circle"}function ln(X){var se=Math.sqrt(X/tt);return"M0,"+se+"A"+se+","+se+" 0 1,1 0,"+-se+"A"+se+","+se+" 0 1,1 0,"+se+"Z"}var Cn=e.map({circle:ln,cross:function(X){var se=Math.sqrt(X/5)/2;return"M"+-3*se+","+-se+"H"+-se+"V"+-3*se+"H"+se+"V"+-se+"H"+3*se+"V"+se+"H"+se+"V"+3*se+"H"+-se+"V"+se+"H"+-3*se+"Z"},diamond:function(X){var se=Math.sqrt(X/(2*Ta)),Te=se*Ta;return"M0,"+-se+"L"+Te+",0 0,"+se+" "+-Te+",0Z"},square:function(X){var se=Math.sqrt(X)/2;return"M"+-se+","+-se+"L"+se+","+-se+" "+se+","+se+" "+-se+","+se+"Z"},"triangle-down":function(X){var se=Math.sqrt(X/Kn),Te=se*Kn/2;return"M0,"+Te+"L"+se+","+-Te+" "+-se+","+-Te+"Z"},"triangle-up":function(X){var se=Math.sqrt(X/Kn),Te=se*Kn/2;return"M0,"+-Te+"L"+se+","+Te+" "+-se+","+Te+"Z"}});e.svg.symbolTypes=Cn.keys();var Kn=Math.sqrt(3),Ta=Math.tan(30*ke);Pe.transition=function(X){for(var se=Bo||++mo,Te=To(X),Ne=[],He,Ye,kt=Ps||{time:Date.now(),ease:bl,delay:0,duration:250},nt=-1,jt=this.length;++nt<jt;){Ne.push(He=[]);for(var gr=this[nt],yr=-1,Hr=gr.length;++yr<Hr;)(Ye=gr[yr])&&hl(Ye,yr,Te,se,kt),He.push(Ye)}return Co(Ne,Te,se)},Pe.interrupt=function(X){return this.each(X==null?fa:$a(To(X)))};var fa=$a(To());function $a(X){return function(){var se,Te,Ne;(se=this[X])&&(Ne=se[Te=se.active])&&(Ne.timer.c=null,Ne.timer.t=NaN,--se.count?delete se[Te]:delete this[X],se.active+=.5,Ne.event&&Ne.event.interrupt.call(this,this.__data__,Ne.index))}}function Co(X,se,Te){return ie(X,Qa),X.namespace=se,X.id=Te,X}var Qa=[],mo=0,Bo,Ps;Qa.call=Pe.call,Qa.empty=Pe.empty,Qa.node=Pe.node,Qa.size=Pe.size,e.transition=function(X,se){return X&&X.transition?Bo?X.transition(se):X:e.selection().transition(X)},e.transition.prototype=Qa,Qa.select=function(X){var se=this.id,Te=this.namespace,Ne=[],He,Ye,kt;X=ge(X);for(var nt=-1,jt=this.length;++nt<jt;){Ne.push(He=[]);for(var gr=this[nt],yr=-1,Hr=gr.length;++yr<Hr;)(kt=gr[yr])&&(Ye=X.call(kt,kt.__data__,yr,nt))?("__data__"in kt&&(Ye.__data__=kt.__data__),hl(Ye,yr,Te,se,kt[Te][se]),He.push(Ye)):He.push(null)}return Co(Ne,Te,se)},Qa.selectAll=function(X){var se=this.id,Te=this.namespace,Ne=[],He,Ye,kt,nt,jt;X=Re(X);for(var gr=-1,yr=this.length;++gr<yr;)for(var Hr=this[gr],qr=-1,_i=Hr.length;++qr<_i;)if(kt=Hr[qr]){jt=kt[Te][se],Ye=X.call(kt,kt.__data__,qr,gr),Ne.push(He=[]);for(var bi=-1,Zr=Ye.length;++bi<Zr;)(nt=Ye[bi])&&hl(nt,bi,Te,se,jt),He.push(nt)}return Co(Ne,Te,se)},Qa.filter=function(X){var se=[],Te,Ne,He;typeof X!="function"&&(X=$e(X));for(var Ye=0,kt=this.length;Ye<kt;Ye++){se.push(Te=[]);for(var Ne=this[Ye],nt=0,jt=Ne.length;nt<jt;nt++)(He=Ne[nt])&&X.call(He,He.__data__,nt,Ye)&&Te.push(He)}return Co(se,this.namespace,this.id)},Qa.tween=function(X,se){var Te=this.id,Ne=this.namespace;return arguments.length<2?this.node()[Ne][Te].tween.get(X):Qt(this,se==null?function(He){He[Ne][Te].tween.remove(X)}:function(He){He[Ne][Te].tween.set(X,se)})};function Ts(X,se,Te,Ne){var He=X.id,Ye=X.namespace;return Qt(X,typeof Te=="function"?function(kt,nt,jt){kt[Ye][He].tween.set(se,Ne(Te.call(kt,kt.__data__,nt,jt)))}:(Te=Ne(Te),function(kt){kt[Ye][He].tween.set(se,Te)}))}Qa.attr=function(X,se){if(arguments.length<2){for(se in X)this.attr(se,X[se]);return this}var Te=X=="transform"?sf:xl,Ne=e.ns.qualify(X);function He(){this.removeAttribute(Ne)}function Ye(){this.removeAttributeNS(Ne.space,Ne.local)}function kt(jt){return jt==null?He:(jt+="",function(){var gr=this.getAttribute(Ne),yr;return gr!==jt&&(yr=Te(gr,jt),function(Hr){this.setAttribute(Ne,yr(Hr))})})}function nt(jt){return jt==null?Ye:(jt+="",function(){var gr=this.getAttributeNS(Ne.space,Ne.local),yr;return gr!==jt&&(yr=Te(gr,jt),function(Hr){this.setAttributeNS(Ne.space,Ne.local,yr(Hr))})})}return Ts(this,"attr."+X,se,Ne.local?nt:kt)},Qa.attrTween=function(X,se){var Te=e.ns.qualify(X);function Ne(Ye,kt){var nt=se.call(this,Ye,kt,this.getAttribute(Te));return nt&&function(jt){this.setAttribute(Te,nt(jt))}}function He(Ye,kt){var nt=se.call(this,Ye,kt,this.getAttributeNS(Te.space,Te.local));return nt&&function(jt){this.setAttributeNS(Te.space,Te.local,nt(jt))}}return this.tween("attr."+X,Te.local?He:Ne)},Qa.style=function(X,se,Te){var Ne=arguments.length;if(Ne<3){if(typeof X!="string"){Ne<2&&(se="");for(Te in X)this.style(Te,X[Te],se);return this}Te=""}function He(){this.style.removeProperty(X)}function Ye(kt){return kt==null?He:(kt+="",function(){var nt=a(this).getComputedStyle(this,null).getPropertyValue(X),jt;return nt!==kt&&(jt=xl(nt,kt),function(gr){this.style.setProperty(X,jt(gr),Te)})})}return Ts(this,"style."+X,se,Ye)},Qa.styleTween=function(X,se,Te){arguments.length<3&&(Te="");function Ne(He,Ye){var kt=se.call(this,He,Ye,a(this).getComputedStyle(this,null).getPropertyValue(X));return kt&&function(nt){this.style.setProperty(X,kt(nt),Te)}}return this.tween("style."+X,Ne)},Qa.text=function(X){return Ts(this,"text",X,wo)};function wo(X){return X==null&&(X=""),function(){this.textContent=X}}Qa.remove=function(){var X=this.namespace;return this.each("end.transition",function(){var se;this[X].count<2&&(se=this.parentNode)&&se.removeChild(this)})},Qa.ease=function(X){var se=this.id,Te=this.namespace;return arguments.length<1?this.node()[Te][se].ease:(typeof X!="function"&&(X=e.ease.apply(e,arguments)),Qt(this,function(Ne){Ne[Te][se].ease=X}))},Qa.delay=function(X){var se=this.id,Te=this.namespace;return arguments.length<1?this.node()[Te][se].delay:Qt(this,typeof X=="function"?function(Ne,He,Ye){Ne[Te][se].delay=+X.call(Ne,Ne.__data__,He,Ye)}:(X=+X,function(Ne){Ne[Te][se].delay=X}))},Qa.duration=function(X){var se=this.id,Te=this.namespace;return arguments.length<1?this.node()[Te][se].duration:Qt(this,typeof X=="function"?function(Ne,He,Ye){Ne[Te][se].duration=Math.max(1,X.call(Ne,Ne.__data__,He,Ye))}:(X=Math.max(1,X),function(Ne){Ne[Te][se].duration=X}))},Qa.each=function(X,se){var Te=this.id,Ne=this.namespace;if(arguments.length<2){var He=Ps,Ye=Bo;try{Bo=Te,Qt(this,function(kt,nt,jt){Ps=kt[Ne][Te],X.call(kt,kt.__data__,nt,jt)})}finally{Ps=He,Bo=Ye}}else Qt(this,function(kt){var nt=kt[Ne][Te];(nt.event||(nt.event=e.dispatch("start","end","interrupt"))).on(X,se)});return this},Qa.transition=function(){for(var X=this.id,se=++mo,Te=this.namespace,Ne=[],He,Ye,kt,nt,jt=0,gr=this.length;jt<gr;jt++){Ne.push(He=[]);for(var Ye=this[jt],yr=0,Hr=Ye.length;yr<Hr;yr++)(kt=Ye[yr])&&(nt=kt[Te][X],hl(kt,yr,Te,se,{time:nt.time,ease:nt.ease,delay:nt.delay+nt.duration,duration:nt.duration})),He.push(kt)}return Co(Ne,Te,se)};function To(X){return X==null?"__transition__":"__transition_"+X+"__"}function hl(X,se,Te,Ne,He){var Ye=X[Te]||(X[Te]={active:0,count:0}),kt=Ye[Ne],nt,jt,gr,yr,Hr;function qr(Zr){var ai=kt.delay;if(jt.t=ai+nt,ai<=Zr)return _i(Zr-ai);jt.c=_i}function _i(Zr){var ai=Ye.active,gi=Ye[ai];gi&&(gi.timer.c=null,gi.timer.t=NaN,--Ye.count,delete Ye[ai],gi.event&&gi.event.interrupt.call(X,X.__data__,gi.index));for(var Ii in Ye)if(+Ii<Ne){var Si=Ye[Ii];Si.timer.c=null,Si.timer.t=NaN,--Ye.count,delete Ye[Ii]}jt.c=bi,No(function(){return jt.c&&bi(Zr||1)&&(jt.c=null,jt.t=NaN),1},0,nt),Ye.active=Ne,kt.event&&kt.event.start.call(X,X.__data__,se),Hr=[],kt.tween.forEach(function(ei,Ln){(Ln=Ln.call(X,X.__data__,se))&&Hr.push(Ln)}),yr=kt.ease,gr=kt.duration}function bi(Zr){for(var ai=Zr/gr,gi=yr(ai),Ii=Hr.length;Ii>0;)Hr[--Ii].call(X,gi);if(ai>=1)return kt.event&&kt.event.end.call(X,X.__data__,se),--Ye.count?delete Ye[Ne]:delete X[Te],1}kt||(nt=He.time,jt=No(qr,0,nt),kt=Ye[Ne]={tween:new S,time:nt,timer:jt,delay:He.delay,duration:He.duration,ease:He.ease,index:se},He=null,++Ye.count)}e.svg.axis=function(){var X=e.scale.linear(),se=Ul,Te=6,Ne=6,He=3,Ye=[10],kt=null,nt;function jt(gr){gr.each(function(){var yr=e.select(this),Hr=this.__chart__||X,qr=this.__chart__=X.copy(),_i=kt==null?qr.ticks?qr.ticks.apply(qr,Ye):qr.domain():kt,bi=nt==null?qr.tickFormat?qr.tickFormat.apply(qr,Ye):G:nt,Zr=yr.selectAll(".tick").data(_i,qr),ai=Zr.enter().insert("g",".domain").attr("class","tick").style("opacity",Je),gi=e.transition(Zr.exit()).style("opacity",Je).remove(),Ii=e.transition(Zr.order()).style("opacity",1),Si=Math.max(Te,0)+He,ei,Ln=wn(qr),En=yr.selectAll(".domain").data([0]),Un=(En.enter().append("path").attr("class","domain"),e.transition(En));ai.append("line"),ai.append("text");var ia=ai.select("line"),Ea=Ii.select("line"),Ia=Zr.select("text").text(bi),yo=ai.select("text"),Da=Ii.select("text"),go=se==="top"||se==="left"?-1:1,Is,Ms,Xs,Gn;if(se==="bottom"||se==="top"?(ei=au,Is="x",Xs="y",Ms="x2",Gn="y2",Ia.attr("dy",go<0?"0em":".71em").style("text-anchor","middle"),Un.attr("d","M"+Ln[0]+","+go*Ne+"V0H"+Ln[1]+"V"+go*Ne)):(ei=Js,Is="y",Xs="x",Ms="y2",Gn="x2",Ia.attr("dy",".32em").style("text-anchor",go<0?"end":"start"),Un.attr("d","M"+go*Ne+","+Ln[0]+"H0V"+Ln[1]+"H"+go*Ne)),ia.attr(Gn,go*Te),yo.attr(Xs,go*Si),Ea.attr(Ms,0).attr(Gn,go*Te),Da.attr(Is,0).attr(Xs,go*Si),qr.rangeBand){var ja=qr,Fo=ja.rangeBand()/2;Hr=qr=function(Uo){return ja(Uo)+Fo}}else Hr.rangeBand?Hr=qr:gi.call(ei,qr,Hr);ai.call(ei,Hr,qr),Ii.call(ei,qr,qr)})}return jt.scale=function(gr){return arguments.length?(X=gr,jt):X},jt.orient=function(gr){return arguments.length?(se=gr in Lu?gr+"":Ul,jt):se},jt.ticks=function(){return arguments.length?(Ye=r(arguments),jt):Ye},jt.tickValues=function(gr){return arguments.length?(kt=gr,jt):kt},jt.tickFormat=function(gr){return arguments.length?(nt=gr,jt):nt},jt.tickSize=function(gr){var yr=arguments.length;return yr?(Te=+gr,Ne=+arguments[yr-1],jt):Te},jt.innerTickSize=function(gr){return arguments.length?(Te=+gr,jt):Te},jt.outerTickSize=function(gr){return arguments.length?(Ne=+gr,jt):Ne},jt.tickPadding=function(gr){return arguments.length?(He=+gr,jt):He},jt.tickSubdivide=function(){return arguments.length&&jt},jt};var Ul="bottom",Lu={top:1,right:1,bottom:1,left:1};function au(X,se,Te){X.attr("transform",function(Ne){var He=se(Ne);return"translate("+(isFinite(He)?He:Te(Ne))+",0)"})}function Js(X,se,Te){X.attr("transform",function(Ne){var He=se(Ne);return"translate(0,"+(isFinite(He)?He:Te(Ne))+")"})}e.svg.brush=function(){var X=Ce(yr,"brushstart","brush","brushend"),se=null,Te=null,Ne=[0,0],He=[0,0],Ye,kt,nt=!0,jt=!0,gr=dc[0];function yr(Zr){Zr.each(function(){var ai=e.select(this).style("pointer-events","all").style("-webkit-tap-highlight-color","rgba(0,0,0,0)").on("mousedown.brush",bi).on("touchstart.brush",bi),gi=ai.selectAll(".background").data([0]);gi.enter().append("rect").attr("class","background").style("visibility","hidden").style("cursor","crosshair"),ai.selectAll(".extent").data([0]).enter().append("rect").attr("class","extent").style("cursor","move");var Ii=ai.selectAll(".resize").data(gr,G);Ii.exit().remove(),Ii.enter().append("g").attr("class",function(En){return"resize "+En}).style("cursor",function(En){return Ql[En]}).append("rect").attr("x",function(En){return/[ew]$/.test(En)?-3:null}).attr("y",function(En){return/^[ns]/.test(En)?-3:null}).attr("width",6).attr("height",6).style("visibility","hidden"),Ii.style("display",yr.empty()?"none":null);var Si=e.transition(ai),ei=e.transition(gi),Ln;se&&(Ln=wn(se),ei.attr("x",Ln[0]).attr("width",Ln[1]-Ln[0]),qr(Si)),Te&&(Ln=wn(Te),ei.attr("y",Ln[0]).attr("height",Ln[1]-Ln[0]),_i(Si)),Hr(Si)})}yr.event=function(Zr){Zr.each(function(){var ai=X.of(this,arguments),gi={x:Ne,y:He,i:Ye,j:kt},Ii=this.__chart__||gi;this.__chart__=gi,Bo?e.select(this).transition().each("start.brush",function(){Ye=Ii.i,kt=Ii.j,Ne=Ii.x,He=Ii.y,ai({type:"brushstart"})}).tween("brush:brush",function(){var Si=Gu(Ne,gi.x),ei=Gu(He,gi.y);return Ye=kt=null,function(Ln){Ne=gi.x=Si(Ln),He=gi.y=ei(Ln),ai({type:"brush",mode:"resize"})}}).each("end.brush",function(){Ye=gi.i,kt=gi.j,ai({type:"brush",mode:"resize"}),ai({type:"brushend"})}):(ai({type:"brushstart"}),ai({type:"brush",mode:"resize"}),ai({type:"brushend"}))})};function Hr(Zr){Zr.selectAll(".resize").attr("transform",function(ai){return"translate("+Ne[+/e$/.test(ai)]+","+He[+/^s/.test(ai)]+")"})}function qr(Zr){Zr.select(".extent").attr("x",Ne[0]),Zr.selectAll(".extent,.n>rect,.s>rect").attr("width",Ne[1]-Ne[0])}function _i(Zr){Zr.select(".extent").attr("y",He[0]),Zr.selectAll(".extent,.e>rect,.w>rect").attr("height",He[1]-He[0])}function bi(){var Zr=this,ai=e.select(e.event.target),gi=X.of(Zr,arguments),Ii=e.select(Zr),Si=ai.datum(),ei=!/^(n|s)$/.test(Si)&&se,Ln=!/^(e|w)$/.test(Si)&&Te,En=ai.classed("extent"),Un=Br(Zr),ia,Ea=e.mouse(Zr),Ia,yo=e.select(a(Zr)).on("keydown.brush",Is).on("keyup.brush",Ms);if(e.event.changedTouches?yo.on("touchmove.brush",Xs).on("touchend.brush",ja):yo.on("mousemove.brush",Xs).on("mouseup.brush",ja),Ii.interrupt().selectAll("*").interrupt(),En)Ea[0]=Ne[0]-Ea[0],Ea[1]=He[0]-Ea[1];else if(Si){var Da=+/w$/.test(Si),go=+/^n/.test(Si);Ia=[Ne[1-Da]-Ea[0],He[1-go]-Ea[1]],Ea[0]=Ne[Da],Ea[1]=He[go]}else e.event.altKey&&(ia=Ea.slice());Ii.style("pointer-events","none").selectAll(".resize").style("display",null),e.select("body").style("cursor",ai.style("cursor")),gi({type:"brushstart"}),Xs();function Is(){e.event.keyCode==32&&(En||(ia=null,Ea[0]-=Ne[1],Ea[1]-=He[1],En=2),_e())}function Ms(){e.event.keyCode==32&&En==2&&(Ea[0]+=Ne[1],Ea[1]+=He[1],En=0,_e())}function Xs(){var Fo=e.mouse(Zr),Uo=!1;Ia&&(Fo[0]+=Ia[0],Fo[1]+=Ia[1]),En||(e.event.altKey?(ia||(ia=[(Ne[0]+Ne[1])/2,(He[0]+He[1])/2]),Ea[0]=Ne[+(Fo[0]<ia[0])],Ea[1]=He[+(Fo[1]<ia[1])]):ia=null),ei&&Gn(Fo,se,0)&&(qr(Ii),Uo=!0),Ln&&Gn(Fo,Te,1)&&(_i(Ii),Uo=!0),Uo&&(Hr(Ii),gi({type:"brush",mode:En?"move":"resize"}))}function Gn(Fo,Uo,$s){var Sl=wn(Uo),bu=Sl[0],dl=Sl[1],Sc=Ea[$s],Me=$s?He:Ne,bt=Me[1]-Me[0],zt,Rr;if(En&&(bu-=Sc,dl-=bt+Sc),zt=($s?jt:nt)?Math.max(bu,Math.min(dl,Fo[$s])):Fo[$s],En?Rr=(zt+=Sc)+bt:(ia&&(Sc=Math.max(bu,Math.min(dl,2*ia[$s]-zt))),Sc<zt?(Rr=zt,zt=Sc):Rr=Sc),Me[0]!=zt||Me[1]!=Rr)return $s?kt=null:Ye=null,Me[0]=zt,Me[1]=Rr,!0}function ja(){Xs(),Ii.style("pointer-events","all").selectAll(".resize").style("display",yr.empty()?"none":null),e.select("body").style("cursor",null),yo.on("mousemove.brush",null).on("mouseup.brush",null).on("touchmove.brush",null).on("touchend.brush",null).on("keydown.brush",null).on("keyup.brush",null),Un(),gi({type:"brushend"})}}return yr.x=function(Zr){return arguments.length?(se=Zr,gr=dc[!se<<1|!Te],yr):se},yr.y=function(Zr){return arguments.length?(Te=Zr,gr=dc[!se<<1|!Te],yr):Te},yr.clamp=function(Zr){return arguments.length?(se&&Te?(nt=!!Zr[0],jt=!!Zr[1]):se?nt=!!Zr:Te&&(jt=!!Zr),yr):se&&Te?[nt,jt]:se?nt:Te?jt:null},yr.extent=function(Zr){var ai,gi,Ii,Si,ei;return arguments.length?(se&&(ai=Zr[0],gi=Zr[1],Te&&(ai=ai[0],gi=gi[0]),Ye=[ai,gi],se.invert&&(ai=se(ai),gi=se(gi)),gi<ai&&(ei=ai,ai=gi,gi=ei),(ai!=Ne[0]||gi!=Ne[1])&&(Ne=[ai,gi])),Te&&(Ii=Zr[0],Si=Zr[1],se&&(Ii=Ii[1],Si=Si[1]),kt=[Ii,Si],Te.invert&&(Ii=Te(Ii),Si=Te(Si)),Si<Ii&&(ei=Ii,Ii=Si,Si=ei),(Ii!=He[0]||Si!=He[1])&&(He=[Ii,Si])),yr):(se&&(Ye?(ai=Ye[0],gi=Ye[1]):(ai=Ne[0],gi=Ne[1],se.invert&&(ai=se.invert(ai),gi=se.invert(gi)),gi<ai&&(ei=ai,ai=gi,gi=ei))),Te&&(kt?(Ii=kt[0],Si=kt[1]):(Ii=He[0],Si=He[1],Te.invert&&(Ii=Te.invert(Ii),Si=Te.invert(Si)),Si<Ii&&(ei=Ii,Ii=Si,Si=ei))),se&&Te?[[ai,Ii],[gi,Si]]:se?[ai,gi]:Te&&[Ii,Si])},yr.clear=function(){return yr.empty()||(Ne=[0,0],He=[0,0],Ye=kt=null),yr},yr.empty=function(){return!!se&&Ne[0]==Ne[1]||!!Te&&He[0]==He[1]},e.rebind(yr,X,"on")};var Ql={n:"ns-resize",e:"ew-resize",s:"ns-resize",w:"ew-resize",nw:"nwse-resize",ne:"nesw-resize",se:"nwse-resize",sw:"nesw-resize"},dc=[["n","e","s","w","nw","ne","se","sw"],["e","w"],["n","s"],[]];e.text=hn(function(X){return X.responseText}),e.json=function(X,se){return An(X,"application/json",Tl,se)};function Tl(X){return JSON.parse(X.responseText)}e.html=function(X,se){return An(X,"text/html",Al,se)};function Al(X){var se=n.createRange();return se.selectNode(n.body),se.createContextualFragment(X.responseText)}e.xml=hn(function(X){return X.responseXML}),typeof c6=="object"&&c6.exports?c6.exports=e:this.d3=e}).apply(self)});var bO=ye((f6,pee)=>{(function(e,t){typeof f6=="object"&&typeof pee!="undefined"?t(f6):(e=e||self,t(e.d3=e.d3||{}))})(f6,function(e){"use strict";var t=new Date,r=new Date;function n($e,St,Qt,Vt){function _t(It){return $e(It=arguments.length===0?new Date:new Date(+It)),It}return _t.floor=function(It){return $e(It=new Date(+It)),It},_t.ceil=function(It){return $e(It=new Date(It-1)),St(It,1),$e(It),It},_t.round=function(It){var mt=_t(It),er=_t.ceil(It);return It-mt<er-It?mt:er},_t.offset=function(It,mt){return St(It=new Date(+It),mt==null?1:Math.floor(mt)),It},_t.range=function(It,mt,er){var lr=[],Tr;if(It=_t.ceil(It),er=er==null?1:Math.floor(er),!(It<mt)||!(er>0))return lr;do lr.push(Tr=new Date(+It)),St(It,er),$e(It);while(Tr<It&&It<mt);return lr},_t.filter=function(It){return n(function(mt){if(mt>=mt)for(;$e(mt),!It(mt);)mt.setTime(mt-1)},function(mt,er){if(mt>=mt)if(er<0)for(;++er<=0;)for(;St(mt,-1),!It(mt););else for(;--er>=0;)for(;St(mt,1),!It(mt););})},Qt&&(_t.count=function(It,mt){return t.setTime(+It),r.setTime(+mt),$e(t),$e(r),Math.floor(Qt(t,r))},_t.every=function(It){return It=Math.floor(It),!isFinite(It)||!(It>0)?null:It>1?_t.filter(Vt?function(mt){return Vt(mt)%It===0}:function(mt){return _t.count(0,mt)%It===0}):_t}),_t}var i=n(function(){},function($e,St){$e.setTime(+$e+St)},function($e,St){return St-$e});i.every=function($e){return $e=Math.floor($e),!isFinite($e)||!($e>0)?null:$e>1?n(function(St){St.setTime(Math.floor(St/$e)*$e)},function(St,Qt){St.setTime(+St+Qt*$e)},function(St,Qt){return(Qt-St)/$e}):i};var a=i.range,o=1e3,s=6e4,l=36e5,u=864e5,c=6048e5,f=n(function($e){$e.setTime($e-$e.getMilliseconds())},function($e,St){$e.setTime(+$e+St*o)},function($e,St){return(St-$e)/o},function($e){return $e.getUTCSeconds()}),h=f.range,d=n(function($e){$e.setTime($e-$e.getMilliseconds()-$e.getSeconds()*o)},function($e,St){$e.setTime(+$e+St*s)},function($e,St){return(St-$e)/s},function($e){return $e.getMinutes()}),v=d.range,_=n(function($e){$e.setTime($e-$e.getMilliseconds()-$e.getSeconds()*o-$e.getMinutes()*s)},function($e,St){$e.setTime(+$e+St*l)},function($e,St){return(St-$e)/l},function($e){return $e.getHours()}),b=_.range,p=n(function($e){$e.setHours(0,0,0,0)},function($e,St){$e.setDate($e.getDate()+St)},function($e,St){return(St-$e-(St.getTimezoneOffset()-$e.getTimezoneOffset())*s)/u},function($e){return $e.getDate()-1}),k=p.range;function E($e){return n(function(St){St.setDate(St.getDate()-(St.getDay()+7-$e)%7),St.setHours(0,0,0,0)},function(St,Qt){St.setDate(St.getDate()+Qt*7)},function(St,Qt){return(Qt-St-(Qt.getTimezoneOffset()-St.getTimezoneOffset())*s)/c})}var S=E(0),L=E(1),x=E(2),C=E(3),M=E(4),g=E(5),P=E(6),T=S.range,z=L.range,O=x.range,V=C.range,G=M.range,Z=g.range,j=P.range,N=n(function($e){$e.setDate(1),$e.setHours(0,0,0,0)},function($e,St){$e.setMonth($e.getMonth()+St)},function($e,St){return St.getMonth()-$e.getMonth()+(St.getFullYear()-$e.getFullYear())*12},function($e){return $e.getMonth()}),H=N.range,te=n(function($e){$e.setMonth(0,1),$e.setHours(0,0,0,0)},function($e,St){$e.setFullYear($e.getFullYear()+St)},function($e,St){return St.getFullYear()-$e.getFullYear()},function($e){return $e.getFullYear()});te.every=function($e){return!isFinite($e=Math.floor($e))||!($e>0)?null:n(function(St){St.setFullYear(Math.floor(St.getFullYear()/$e)*$e),St.setMonth(0,1),St.setHours(0,0,0,0)},function(St,Qt){St.setFullYear(St.getFullYear()+Qt*$e)})};var oe=te.range,_e=n(function($e){$e.setUTCSeconds(0,0)},function($e,St){$e.setTime(+$e+St*s)},function($e,St){return(St-$e)/s},function($e){return $e.getUTCMinutes()}),Ee=_e.range,Ce=n(function($e){$e.setUTCMinutes(0,0,0)},function($e,St){$e.setTime(+$e+St*l)},function($e,St){return(St-$e)/l},function($e){return $e.getUTCHours()}),me=Ce.range,ie=n(function($e){$e.setUTCHours(0,0,0,0)},function($e,St){$e.setUTCDate($e.getUTCDate()+St)},function($e,St){return(St-$e)/u},function($e){return $e.getUTCDate()-1}),Se=ie.range;function Le($e){return n(function(St){St.setUTCDate(St.getUTCDate()-(St.getUTCDay()+7-$e)%7),St.setUTCHours(0,0,0,0)},function(St,Qt){St.setUTCDate(St.getUTCDate()+Qt*7)},function(St,Qt){return(Qt-St)/c})}var Ae=Le(0),Fe=Le(1),Pe=Le(2),ge=Le(3),Re=Le(4),ce=Le(5),Ze=Le(6),ut=Ae.range,pt=Fe.range,Zt=Pe.range,st=ge.range,lt=Re.range,Gt=ce.range,Nt=Ze.range,Jt=n(function($e){$e.setUTCDate(1),$e.setUTCHours(0,0,0,0)},function($e,St){$e.setUTCMonth($e.getUTCMonth()+St)},function($e,St){return St.getUTCMonth()-$e.getUTCMonth()+(St.getUTCFullYear()-$e.getUTCFullYear())*12},function($e){return $e.getUTCMonth()}),sr=Jt.range,wr=n(function($e){$e.setUTCMonth(0,1),$e.setUTCHours(0,0,0,0)},function($e,St){$e.setUTCFullYear($e.getUTCFullYear()+St)},function($e,St){return St.getUTCFullYear()-$e.getUTCFullYear()},function($e){return $e.getUTCFullYear()});wr.every=function($e){return!isFinite($e=Math.floor($e))||!($e>0)?null:n(function(St){St.setUTCFullYear(Math.floor(St.getUTCFullYear()/$e)*$e),St.setUTCMonth(0,1),St.setUTCHours(0,0,0,0)},function(St,Qt){St.setUTCFullYear(St.getUTCFullYear()+Qt*$e)})};var cr=wr.range;e.timeDay=p,e.timeDays=k,e.timeFriday=g,e.timeFridays=Z,e.timeHour=_,e.timeHours=b,e.timeInterval=n,e.timeMillisecond=i,e.timeMilliseconds=a,e.timeMinute=d,e.timeMinutes=v,e.timeMonday=L,e.timeMondays=z,e.timeMonth=N,e.timeMonths=H,e.timeSaturday=P,e.timeSaturdays=j,e.timeSecond=f,e.timeSeconds=h,e.timeSunday=S,e.timeSundays=T,e.timeThursday=M,e.timeThursdays=G,e.timeTuesday=x,e.timeTuesdays=O,e.timeWednesday=C,e.timeWednesdays=V,e.timeWeek=S,e.timeWeeks=T,e.timeYear=te,e.timeYears=oe,e.utcDay=ie,e.utcDays=Se,e.utcFriday=ce,e.utcFridays=Gt,e.utcHour=Ce,e.utcHours=me,e.utcMillisecond=i,e.utcMilliseconds=a,e.utcMinute=_e,e.utcMinutes=Ee,e.utcMonday=Fe,e.utcMondays=pt,e.utcMonth=Jt,e.utcMonths=sr,e.utcSaturday=Ze,e.utcSaturdays=Nt,e.utcSecond=f,e.utcSeconds=h,e.utcSunday=Ae,e.utcSundays=ut,e.utcThursday=Re,e.utcThursdays=lt,e.utcTuesday=Pe,e.utcTuesdays=Zt,e.utcWednesday=ge,e.utcWednesdays=st,e.utcWeek=Ae,e.utcWeeks=ut,e.utcYear=wr,e.utcYears=cr,Object.defineProperty(e,"__esModule",{value:!0})})});var r3=ye((h6,gee)=>{(function(e,t){typeof h6=="object"&&typeof gee!="undefined"?t(h6,bO()):(e=e||self,t(e.d3=e.d3||{},e.d3))})(h6,function(e,t){"use strict";function r(Ge){if(0<=Ge.y&&Ge.y<100){var Je=new Date(-1,Ge.m,Ge.d,Ge.H,Ge.M,Ge.S,Ge.L);return Je.setFullYear(Ge.y),Je}return new Date(Ge.y,Ge.m,Ge.d,Ge.H,Ge.M,Ge.S,Ge.L)}function n(Ge){if(0<=Ge.y&&Ge.y<100){var Je=new Date(Date.UTC(-1,Ge.m,Ge.d,Ge.H,Ge.M,Ge.S,Ge.L));return Je.setUTCFullYear(Ge.y),Je}return new Date(Date.UTC(Ge.y,Ge.m,Ge.d,Ge.H,Ge.M,Ge.S,Ge.L))}function i(Ge,Je,je){return{y:Ge,m:Je,d:je,H:0,M:0,S:0,L:0}}function a(Ge){var Je=Ge.dateTime,je=Ge.date,tt=Ge.time,xt=Ge.periods,Ie=Ge.days,xe=Ge.shortDays,ke=Ge.months,vt=Ge.shortMonths,ir=h(xt),ar=d(xt),vr=h(Ie),ii=d(Ie),pi=h(xe),$r=d(xe),di=h(ke),ji=d(ke),In=h(vt),wi=d(vt),On={a:$i,A:tn,b:fn,B:yn,c:null,d:N,e:N,f:Ee,H,I:te,j:oe,L:_e,m:Ce,M:me,p:Sn,q:Ba,Q:mt,s:er,S:ie,u:Se,U:Le,V:Ae,w:Fe,W:Pe,x:null,X:null,y:ge,Y:Re,Z:ce,"%":It},qn={a:ua,A:ma,b:Wa,B:Fa,c:null,d:Ze,e:Ze,f:lt,H:ut,I:pt,j:Zt,L:st,m:Gt,M:Nt,p:Wo,q:da,Q:mt,s:er,S:Jt,u:sr,U:wr,V:cr,w:$e,W:St,x:null,X:null,y:Qt,Y:Vt,Z:_t,"%":It},Fn={a:rr,A:nr,b:Er,B:Xr,c:ri,d:M,e:M,f:V,H:P,I:P,j:g,L:O,m:C,M:T,p:wt,q:x,Q:Z,s:j,S:z,u:_,U:b,V:p,w:v,W:k,x:Qr,X:Oi,y:S,Y:E,Z:L,"%":G};On.x=ra(je,On),On.X=ra(tt,On),On.c=ra(Je,On),qn.x=ra(je,qn),qn.X=ra(tt,qn),qn.c=ra(Je,qn);function ra(Wn,Ha){return function(vo){var jn=[],Mt=-1,kr=0,Jr=Wn.length,vi,hn,An;for(vo instanceof Date||(vo=new Date(+vo));++Mt<Jr;)Wn.charCodeAt(Mt)===37&&(jn.push(Wn.slice(kr,Mt)),(hn=o[vi=Wn.charAt(++Mt)])!=null?vi=Wn.charAt(++Mt):hn=vi==="e"?" ":"0",(An=Ha[vi])&&(vi=An(vo,hn)),jn.push(vi),kr=Mt+1);return jn.push(Wn.slice(kr,Mt)),jn.join("")}}function la(Wn,Ha){return function(vo){var jn=i(1900,void 0,1),Mt=Ut(jn,Wn,vo+="",0),kr,Jr;if(Mt!=vo.length)return null;if("Q"in jn)return new Date(jn.Q);if("s"in jn)return new Date(jn.s*1e3+("L"in jn?jn.L:0));if(Ha&&!("Z"in jn)&&(jn.Z=0),"p"in jn&&(jn.H=jn.H%12+jn.p*12),jn.m===void 0&&(jn.m="q"in jn?jn.q:0),"V"in jn){if(jn.V<1||jn.V>53)return null;"w"in jn||(jn.w=1),"Z"in jn?(kr=n(i(jn.y,0,1)),Jr=kr.getUTCDay(),kr=Jr>4||Jr===0?t.utcMonday.ceil(kr):t.utcMonday(kr),kr=t.utcDay.offset(kr,(jn.V-1)*7),jn.y=kr.getUTCFullYear(),jn.m=kr.getUTCMonth(),jn.d=kr.getUTCDate()+(jn.w+6)%7):(kr=r(i(jn.y,0,1)),Jr=kr.getDay(),kr=Jr>4||Jr===0?t.timeMonday.ceil(kr):t.timeMonday(kr),kr=t.timeDay.offset(kr,(jn.V-1)*7),jn.y=kr.getFullYear(),jn.m=kr.getMonth(),jn.d=kr.getDate()+(jn.w+6)%7)}else("W"in jn||"U"in jn)&&("w"in jn||(jn.w="u"in jn?jn.u%7:"W"in jn?1:0),Jr="Z"in jn?n(i(jn.y,0,1)).getUTCDay():r(i(jn.y,0,1)).getDay(),jn.m=0,jn.d="W"in jn?(jn.w+6)%7+jn.W*7-(Jr+5)%7:jn.w+jn.U*7-(Jr+6)%7);return"Z"in jn?(jn.H+=jn.Z/100|0,jn.M+=jn.Z%100,n(jn)):r(jn)}}function Ut(Wn,Ha,vo,jn){for(var Mt=0,kr=Ha.length,Jr=vo.length,vi,hn;Mt<kr;){if(jn>=Jr)return-1;if(vi=Ha.charCodeAt(Mt++),vi===37){if(vi=Ha.charAt(Mt++),hn=Fn[vi in o?Ha.charAt(Mt++):vi],!hn||(jn=hn(Wn,vo,jn))<0)return-1}else if(vi!=vo.charCodeAt(jn++))return-1}return jn}function wt(Wn,Ha,vo){var jn=ir.exec(Ha.slice(vo));return jn?(Wn.p=ar[jn[0].toLowerCase()],vo+jn[0].length):-1}function rr(Wn,Ha,vo){var jn=pi.exec(Ha.slice(vo));return jn?(Wn.w=$r[jn[0].toLowerCase()],vo+jn[0].length):-1}function nr(Wn,Ha,vo){var jn=vr.exec(Ha.slice(vo));return jn?(Wn.w=ii[jn[0].toLowerCase()],vo+jn[0].length):-1}function Er(Wn,Ha,vo){var jn=In.exec(Ha.slice(vo));return jn?(Wn.m=wi[jn[0].toLowerCase()],vo+jn[0].length):-1}function Xr(Wn,Ha,vo){var jn=di.exec(Ha.slice(vo));return jn?(Wn.m=ji[jn[0].toLowerCase()],vo+jn[0].length):-1}function ri(Wn,Ha,vo){return Ut(Wn,Je,Ha,vo)}function Qr(Wn,Ha,vo){return Ut(Wn,je,Ha,vo)}function Oi(Wn,Ha,vo){return Ut(Wn,tt,Ha,vo)}function $i(Wn){return xe[Wn.getDay()]}function tn(Wn){return Ie[Wn.getDay()]}function fn(Wn){return vt[Wn.getMonth()]}function yn(Wn){return ke[Wn.getMonth()]}function Sn(Wn){return xt[+(Wn.getHours()>=12)]}function Ba(Wn){return 1+~~(Wn.getMonth()/3)}function ua(Wn){return xe[Wn.getUTCDay()]}function ma(Wn){return Ie[Wn.getUTCDay()]}function Wa(Wn){return vt[Wn.getUTCMonth()]}function Fa(Wn){return ke[Wn.getUTCMonth()]}function Wo(Wn){return xt[+(Wn.getUTCHours()>=12)]}function da(Wn){return 1+~~(Wn.getUTCMonth()/3)}return{format:function(Wn){var Ha=ra(Wn+="",On);return Ha.toString=function(){return Wn},Ha},parse:function(Wn){var Ha=la(Wn+="",!1);return Ha.toString=function(){return Wn},Ha},utcFormat:function(Wn){var Ha=ra(Wn+="",qn);return Ha.toString=function(){return Wn},Ha},utcParse:function(Wn){var Ha=la(Wn+="",!0);return Ha.toString=function(){return Wn},Ha}}}var o={"-":"",_:" ",0:"0"},s=/^\s*\d+/,l=/^%/,u=/[\\^$*+?|[\]().{}]/g;function c(Ge,Je,je){var tt=Ge<0?"-":"",xt=(tt?-Ge:Ge)+"",Ie=xt.length;return tt+(Ie<je?new Array(je-Ie+1).join(Je)+xt:xt)}function f(Ge){return Ge.replace(u,"\\$&")}function h(Ge){return new RegExp("^(?:"+Ge.map(f).join("|")+")","i")}function d(Ge){for(var Je={},je=-1,tt=Ge.length;++je<tt;)Je[Ge[je].toLowerCase()]=je;return Je}function v(Ge,Je,je){var tt=s.exec(Je.slice(je,je+1));return tt?(Ge.w=+tt[0],je+tt[0].length):-1}function _(Ge,Je,je){var tt=s.exec(Je.slice(je,je+1));return tt?(Ge.u=+tt[0],je+tt[0].length):-1}function b(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.U=+tt[0],je+tt[0].length):-1}function p(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.V=+tt[0],je+tt[0].length):-1}function k(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.W=+tt[0],je+tt[0].length):-1}function E(Ge,Je,je){var tt=s.exec(Je.slice(je,je+4));return tt?(Ge.y=+tt[0],je+tt[0].length):-1}function S(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.y=+tt[0]+(+tt[0]>68?1900:2e3),je+tt[0].length):-1}function L(Ge,Je,je){var tt=/^(Z)|([+-]\d\d)(?::?(\d\d))?/.exec(Je.slice(je,je+6));return tt?(Ge.Z=tt[1]?0:-(tt[2]+(tt[3]||"00")),je+tt[0].length):-1}function x(Ge,Je,je){var tt=s.exec(Je.slice(je,je+1));return tt?(Ge.q=tt[0]*3-3,je+tt[0].length):-1}function C(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.m=tt[0]-1,je+tt[0].length):-1}function M(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.d=+tt[0],je+tt[0].length):-1}function g(Ge,Je,je){var tt=s.exec(Je.slice(je,je+3));return tt?(Ge.m=0,Ge.d=+tt[0],je+tt[0].length):-1}function P(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.H=+tt[0],je+tt[0].length):-1}function T(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.M=+tt[0],je+tt[0].length):-1}function z(Ge,Je,je){var tt=s.exec(Je.slice(je,je+2));return tt?(Ge.S=+tt[0],je+tt[0].length):-1}function O(Ge,Je,je){var tt=s.exec(Je.slice(je,je+3));return tt?(Ge.L=+tt[0],je+tt[0].length):-1}function V(Ge,Je,je){var tt=s.exec(Je.slice(je,je+6));return tt?(Ge.L=Math.floor(tt[0]/1e3),je+tt[0].length):-1}function G(Ge,Je,je){var tt=l.exec(Je.slice(je,je+1));return tt?je+tt[0].length:-1}function Z(Ge,Je,je){var tt=s.exec(Je.slice(je));return tt?(Ge.Q=+tt[0],je+tt[0].length):-1}function j(Ge,Je,je){var tt=s.exec(Je.slice(je));return tt?(Ge.s=+tt[0],je+tt[0].length):-1}function N(Ge,Je){return c(Ge.getDate(),Je,2)}function H(Ge,Je){return c(Ge.getHours(),Je,2)}function te(Ge,Je){return c(Ge.getHours()%12||12,Je,2)}function oe(Ge,Je){return c(1+t.timeDay.count(t.timeYear(Ge),Ge),Je,3)}function _e(Ge,Je){return c(Ge.getMilliseconds(),Je,3)}function Ee(Ge,Je){return _e(Ge,Je)+"000"}function Ce(Ge,Je){return c(Ge.getMonth()+1,Je,2)}function me(Ge,Je){return c(Ge.getMinutes(),Je,2)}function ie(Ge,Je){return c(Ge.getSeconds(),Je,2)}function Se(Ge){var Je=Ge.getDay();return Je===0?7:Je}function Le(Ge,Je){return c(t.timeSunday.count(t.timeYear(Ge)-1,Ge),Je,2)}function Ae(Ge,Je){var je=Ge.getDay();return Ge=je>=4||je===0?t.timeThursday(Ge):t.timeThursday.ceil(Ge),c(t.timeThursday.count(t.timeYear(Ge),Ge)+(t.timeYear(Ge).getDay()===4),Je,2)}function Fe(Ge){return Ge.getDay()}function Pe(Ge,Je){return c(t.timeMonday.count(t.timeYear(Ge)-1,Ge),Je,2)}function ge(Ge,Je){return c(Ge.getFullYear()%100,Je,2)}function Re(Ge,Je){return c(Ge.getFullYear()%1e4,Je,4)}function ce(Ge){var Je=Ge.getTimezoneOffset();return(Je>0?"-":(Je*=-1,"+"))+c(Je/60|0,"0",2)+c(Je%60,"0",2)}function Ze(Ge,Je){return c(Ge.getUTCDate(),Je,2)}function ut(Ge,Je){return c(Ge.getUTCHours(),Je,2)}function pt(Ge,Je){return c(Ge.getUTCHours()%12||12,Je,2)}function Zt(Ge,Je){return c(1+t.utcDay.count(t.utcYear(Ge),Ge),Je,3)}function st(Ge,Je){return c(Ge.getUTCMilliseconds(),Je,3)}function lt(Ge,Je){return st(Ge,Je)+"000"}function Gt(Ge,Je){return c(Ge.getUTCMonth()+1,Je,2)}function Nt(Ge,Je){return c(Ge.getUTCMinutes(),Je,2)}function Jt(Ge,Je){return c(Ge.getUTCSeconds(),Je,2)}function sr(Ge){var Je=Ge.getUTCDay();return Je===0?7:Je}function wr(Ge,Je){return c(t.utcSunday.count(t.utcYear(Ge)-1,Ge),Je,2)}function cr(Ge,Je){var je=Ge.getUTCDay();return Ge=je>=4||je===0?t.utcThursday(Ge):t.utcThursday.ceil(Ge),c(t.utcThursday.count(t.utcYear(Ge),Ge)+(t.utcYear(Ge).getUTCDay()===4),Je,2)}function $e(Ge){return Ge.getUTCDay()}function St(Ge,Je){return c(t.utcMonday.count(t.utcYear(Ge)-1,Ge),Je,2)}function Qt(Ge,Je){return c(Ge.getUTCFullYear()%100,Je,2)}function Vt(Ge,Je){return c(Ge.getUTCFullYear()%1e4,Je,4)}function _t(){return"+0000"}function It(){return"%"}function mt(Ge){return+Ge}function er(Ge){return Math.floor(+Ge/1e3)}var lr;Tr({dateTime:"%x, %X",date:"%-m/%-d/%Y",time:"%-I:%M:%S %p",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});function Tr(Ge){return lr=a(Ge),e.timeFormat=lr.format,e.timeParse=lr.parse,e.utcFormat=lr.utcFormat,e.utcParse=lr.utcParse,lr}var Lr="%Y-%m-%dT%H:%M:%S.%LZ";function ti(Ge){return Ge.toISOString()}var Br=Date.prototype.toISOString?ti:e.utcFormat(Lr);function Vr(Ge){var Je=new Date(Ge);return isNaN(Je)?null:Je}var dt=+new Date("2000-01-01T00:00:00.000Z")?Vr:e.utcParse(Lr);e.isoFormat=Br,e.isoParse=dt,e.timeFormatDefaultLocale=Tr,e.timeFormatLocale=a,Object.defineProperty(e,"__esModule",{value:!0})})});var wO=ye((d6,mee)=>{(function(e,t){typeof d6=="object"&&typeof mee!="undefined"?t(d6):(e=typeof globalThis!="undefined"?globalThis:e||self,t(e.d3=e.d3||{}))})(d6,function(e){"use strict";function t(C){return Math.abs(C=Math.round(C))>=1e21?C.toLocaleString("en").replace(/,/g,""):C.toString(10)}function r(C,M){if((g=(C=M?C.toExponential(M-1):C.toExponential()).indexOf("e"))<0)return null;var g,P=C.slice(0,g);return[P.length>1?P[0]+P.slice(2):P,+C.slice(g+1)]}function n(C){return C=r(Math.abs(C)),C?C[1]:NaN}function i(C,M){return function(g,P){for(var T=g.length,z=[],O=0,V=C[0],G=0;T>0&&V>0&&(G+V+1>P&&(V=Math.max(1,P-G)),z.push(g.substring(T-=V,T+V)),!((G+=V+1)>P));)V=C[O=(O+1)%C.length];return z.reverse().join(M)}}function a(C){return function(M){return M.replace(/[0-9]/g,function(g){return C[+g]})}}var o=/^(?:(.)?([<>=^]))?([+\-( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?(~)?([a-z%])?$/i;function s(C){if(!(M=o.exec(C)))throw new Error("invalid format: "+C);var M;return new l({fill:M[1],align:M[2],sign:M[3],symbol:M[4],zero:M[5],width:M[6],comma:M[7],precision:M[8]&&M[8].slice(1),trim:M[9],type:M[10]})}s.prototype=l.prototype;function l(C){this.fill=C.fill===void 0?" ":C.fill+"",this.align=C.align===void 0?">":C.align+"",this.sign=C.sign===void 0?"-":C.sign+"",this.symbol=C.symbol===void 0?"":C.symbol+"",this.zero=!!C.zero,this.width=C.width===void 0?void 0:+C.width,this.comma=!!C.comma,this.precision=C.precision===void 0?void 0:+C.precision,this.trim=!!C.trim,this.type=C.type===void 0?"":C.type+""}l.prototype.toString=function(){return this.fill+this.align+this.sign+this.symbol+(this.zero?"0":"")+(this.width===void 0?"":Math.max(1,this.width|0))+(this.comma?",":"")+(this.precision===void 0?"":"."+Math.max(0,this.precision|0))+(this.trim?"~":"")+this.type};function u(C){e:for(var M=C.length,g=1,P=-1,T;g<M;++g)switch(C[g]){case".":P=T=g;break;case"0":P===0&&(P=g),T=g;break;default:if(!+C[g])break e;P>0&&(P=0);break}return P>0?C.slice(0,P)+C.slice(T+1):C}var c;function f(C,M){var g=r(C,M);if(!g)return C+"";var P=g[0],T=g[1],z=T-(c=Math.max(-8,Math.min(8,Math.floor(T/3)))*3)+1,O=P.length;return z===O?P:z>O?P+new Array(z-O+1).join("0"):z>0?P.slice(0,z)+"."+P.slice(z):"0."+new Array(1-z).join("0")+r(C,Math.max(0,M+z-1))[0]}function h(C,M){var g=r(C,M);if(!g)return C+"";var P=g[0],T=g[1];return T<0?"0."+new Array(-T).join("0")+P:P.length>T+1?P.slice(0,T+1)+"."+P.slice(T+1):P+new Array(T-P.length+2).join("0")}var d={"%":function(C,M){return(C*100).toFixed(M)},b:function(C){return Math.round(C).toString(2)},c:function(C){return C+""},d:t,e:function(C,M){return C.toExponential(M)},f:function(C,M){return C.toFixed(M)},g:function(C,M){return C.toPrecision(M)},o:function(C){return Math.round(C).toString(8)},p:function(C,M){return h(C*100,M)},r:h,s:f,X:function(C){return Math.round(C).toString(16).toUpperCase()},x:function(C){return Math.round(C).toString(16)}};function v(C){return C}var _=Array.prototype.map,b=["y","z","a","f","p","n","\xB5","m","","k","M","G","T","P","E","Z","Y"];function p(C){var M=C.grouping===void 0||C.thousands===void 0?v:i(_.call(C.grouping,Number),C.thousands+""),g=C.currency===void 0?"":C.currency[0]+"",P=C.currency===void 0?"":C.currency[1]+"",T=C.decimal===void 0?".":C.decimal+"",z=C.numerals===void 0?v:a(_.call(C.numerals,String)),O=C.percent===void 0?"%":C.percent+"",V=C.minus===void 0?"-":C.minus+"",G=C.nan===void 0?"NaN":C.nan+"";function Z(N){N=s(N);var H=N.fill,te=N.align,oe=N.sign,_e=N.symbol,Ee=N.zero,Ce=N.width,me=N.comma,ie=N.precision,Se=N.trim,Le=N.type;Le==="n"?(me=!0,Le="g"):d[Le]||(ie===void 0&&(ie=12),Se=!0,Le="g"),(Ee||H==="0"&&te==="=")&&(Ee=!0,H="0",te="=");var Ae=_e==="$"?g:_e==="#"&&/[boxX]/.test(Le)?"0"+Le.toLowerCase():"",Fe=_e==="$"?P:/[%p]/.test(Le)?O:"",Pe=d[Le],ge=/[defgprs%]/.test(Le);ie=ie===void 0?6:/[gprs]/.test(Le)?Math.max(1,Math.min(21,ie)):Math.max(0,Math.min(20,ie));function Re(ce){var Ze=Ae,ut=Fe,pt,Zt,st;if(Le==="c")ut=Pe(ce)+ut,ce="";else{ce=+ce;var lt=ce<0||1/ce<0;if(ce=isNaN(ce)?G:Pe(Math.abs(ce),ie),Se&&(ce=u(ce)),lt&&+ce==0&&oe!=="+"&&(lt=!1),Ze=(lt?oe==="("?oe:V:oe==="-"||oe==="("?"":oe)+Ze,ut=(Le==="s"?b[8+c/3]:"")+ut+(lt&&oe==="("?")":""),ge){for(pt=-1,Zt=ce.length;++pt<Zt;)if(st=ce.charCodeAt(pt),48>st||st>57){ut=(st===46?T+ce.slice(pt+1):ce.slice(pt))+ut,ce=ce.slice(0,pt);break}}}me&&!Ee&&(ce=M(ce,1/0));var Gt=Ze.length+ce.length+ut.length,Nt=Gt<Ce?new Array(Ce-Gt+1).join(H):"";switch(me&&Ee&&(ce=M(Nt+ce,Nt.length?Ce-ut.length:1/0),Nt=""),te){case"<":ce=Ze+ce+ut+Nt;break;case"=":ce=Ze+Nt+ce+ut;break;case"^":ce=Nt.slice(0,Gt=Nt.length>>1)+Ze+ce+ut+Nt.slice(Gt);break;default:ce=Nt+Ze+ce+ut;break}return z(ce)}return Re.toString=function(){return N+""},Re}function j(N,H){var te=Z((N=s(N),N.type="f",N)),oe=Math.max(-8,Math.min(8,Math.floor(n(H)/3)))*3,_e=Math.pow(10,-oe),Ee=b[8+oe/3];return function(Ce){return te(_e*Ce)+Ee}}return{format:Z,formatPrefix:j}}var k;E({decimal:".",thousands:",",grouping:[3],currency:["$",""],minus:"-"});function E(C){return k=p(C),e.format=k.format,e.formatPrefix=k.formatPrefix,k}function S(C){return Math.max(0,-n(Math.abs(C)))}function L(C,M){return Math.max(0,Math.max(-8,Math.min(8,Math.floor(n(M)/3)))*3-n(Math.abs(C)))}function x(C,M){return C=Math.abs(C),M=Math.abs(M)-C,Math.max(0,n(M)-n(C))+1}e.FormatSpecifier=l,e.formatDefaultLocale=E,e.formatLocale=p,e.formatSpecifier=s,e.precisionFixed=S,e.precisionPrefix=L,e.precisionRound=x,Object.defineProperty(e,"__esModule",{value:!0})})});var _ee=ye((orr,yee)=>{"use strict";yee.exports=function(e){for(var t=e.length,r,n=0;n<t;n++)if(r=e.charCodeAt(n),(r<9||r>13)&&r!==32&&r!==133&&r!==160&&r!==5760&&r!==6158&&(r<8192||r>8205)&&r!==8232&&r!==8233&&r!==8239&&r!==8287&&r!==8288&&r!==12288&&r!==65279)return!1;return!0}});var Eo=ye((srr,xee)=>{"use strict";var mtt=_ee();xee.exports=function(e){var t=typeof e;if(t==="string"){var r=e;if(e=+e,e===0&&mtt(r))return!1}else if(t!=="number")return!1;return e-e<1}});var fs=ye((lrr,bee)=>{"use strict";bee.exports={BADNUM:void 0,FP_SAFE:Number.MAX_VALUE*1e-4,ONEMAXYEAR:316224e5,ONEAVGYEAR:315576e5,ONEMINYEAR:31536e6,ONEMAXQUARTER:79488e5,ONEAVGQUARTER:78894e5,ONEMINQUARTER:76896e5,ONEMAXMONTH:26784e5,ONEAVGMONTH:26298e5,ONEMINMONTH:24192e5,ONEWEEK:6048e5,ONEDAY:864e5,ONEHOUR:36e5,ONEMIN:6e4,ONESEC:1e3,ONEMILLI:1,ONEMICROSEC:.001,EPOCHJD:24405875e-1,ALMOST_EQUAL:1-1e-6,LOG_CLIP:10,MINUS_SIGN:"\u2212"}});var TO=ye((v6,wee)=>{(function(e,t){typeof v6=="object"&&typeof wee!="undefined"?t(v6):(e=typeof globalThis!="undefined"?globalThis:e||self,t(e["base64-arraybuffer"]={}))})(v6,function(e){"use strict";for(var t="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",r=typeof Uint8Array=="undefined"?[]:new Uint8Array(256),n=0;n<t.length;n++)r[t.charCodeAt(n)]=n;var i=function(o){var s=new Uint8Array(o),l,u=s.length,c="";for(l=0;l<u;l+=3)c+=t[s[l]>>2],c+=t[(s[l]&3)<<4|s[l+1]>>4],c+=t[(s[l+1]&15)<<2|s[l+2]>>6],c+=t[s[l+2]&63];return u%3===2?c=c.substring(0,c.length-1)+"=":u%3===1&&(c=c.substring(0,c.length-2)+"=="),c},a=function(o){var s=o.length*.75,l=o.length,u,c=0,f,h,d,v;o[o.length-1]==="="&&(s--,o[o.length-2]==="="&&s--);var _=new ArrayBuffer(s),b=new Uint8Array(_);for(u=0;u<l;u+=4)f=r[o.charCodeAt(u)],h=r[o.charCodeAt(u+1)],d=r[o.charCodeAt(u+2)],v=r[o.charCodeAt(u+3)],b[c++]=f<<2|h>>4,b[c++]=(h&15)<<4|d>>2,b[c++]=(d&3)<<6|v&63;return _};e.decode=a,e.encode=i,Object.defineProperty(e,"__esModule",{value:!0})})});var my=ye((urr,Tee)=>{"use strict";Tee.exports=function(t){return window&&window.process&&window.process.versions?Object.prototype.toString.call(t)==="[object Object]":Object.prototype.toString.call(t)==="[object Object]"&&Object.getPrototypeOf(t).hasOwnProperty("hasOwnProperty")}});var vv=ye(yg=>{"use strict";var ytt=TO().decode,_tt=my(),AO=Array.isArray,xtt=ArrayBuffer,btt=DataView;function Aee(e){return xtt.isView(e)&&!(e instanceof btt)}yg.isTypedArray=Aee;function p6(e){return AO(e)||Aee(e)}yg.isArrayOrTypedArray=p6;function wtt(e){return!p6(e[0])}yg.isArray1D=wtt;yg.ensureArray=function(e,t){return AO(e)||(e=[]),e.length=t,e};var Ld={u1c:typeof Uint8ClampedArray=="undefined"?void 0:Uint8ClampedArray,i1:typeof Int8Array=="undefined"?void 0:Int8Array,u1:typeof Uint8Array=="undefined"?void 0:Uint8Array,i2:typeof Int16Array=="undefined"?void 0:Int16Array,u2:typeof Uint16Array=="undefined"?void 0:Uint16Array,i4:typeof Int32Array=="undefined"?void 0:Int32Array,u4:typeof Uint32Array=="undefined"?void 0:Uint32Array,f4:typeof Float32Array=="undefined"?void 0:Float32Array,f8:typeof Float64Array=="undefined"?void 0:Float64Array};Ld.uint8c=Ld.u1c;Ld.uint8=Ld.u1;Ld.int8=Ld.i1;Ld.uint16=Ld.u2;Ld.int16=Ld.i2;Ld.uint32=Ld.u4;Ld.int32=Ld.i4;Ld.float32=Ld.f4;Ld.float64=Ld.f8;function SO(e){return e.constructor===ArrayBuffer}yg.isArrayBuffer=SO;yg.decodeTypedArraySpec=function(e){var t=[],r=Ttt(e),n=r.dtype,i=Ld[n];if(!i)throw new Error('Error in dtype: "'+n+'"');var a=i.BYTES_PER_ELEMENT,o=r.bdata;SO(o)||(o=ytt(o));var s=r.shape===void 0?[o.byteLength/a]:(""+r.shape).split(",");s.reverse();var l=s.length,u,c,f=+s[0],h=a*f,d=0;if(l===1)t=new i(o);else if(l===2)for(u=+s[1],c=0;c<u;c++)t[c]=new i(o,d,f),d+=h;else if(l===3){u=+s[1];for(var v=+s[2],_=0;_<v;_++)for(t[_]=[],c=0;c<u;c++)t[_][c]=new i(o,d,f),d+=h}else throw new Error("ndim: "+l+'is not supported with the shape:"'+r.shape+'"');return t.bdata=r.bdata,t.dtype=r.dtype,t.shape=s.reverse().join(","),e._inputArray=t,t};yg.isTypedArraySpec=function(e){return _tt(e)&&e.hasOwnProperty("dtype")&&typeof e.dtype=="string"&&e.hasOwnProperty("bdata")&&(typeof e.bdata=="string"||SO(e.bdata))&&(e.shape===void 0||e.hasOwnProperty("shape")&&(typeof e.shape=="string"||typeof e.shape=="number"))};function Ttt(e){return{bdata:e.bdata,dtype:e.dtype,shape:e.shape}}yg.concat=function(){var e=[],t=!0,r=0,n,i,a,o,s,l,u,c;for(a=0;a<arguments.length;a++)o=arguments[a],l=o.length,l&&(i?e.push(o):(i=o,s=l),AO(o)?n=!1:(t=!1,r?n!==o.constructor&&(n=!1):n=o.constructor),r+=l);if(!r)return[];if(!e.length)return i;if(t)return i.concat.apply(i,e);if(n){for(u=new n(r),u.set(i),a=0;a<e.length;a++)o=e[a],u.set(o,s),s+=o.length;return u}for(u=new Array(r),c=0;c<i.length;c++)u[c]=i[c];for(a=0;a<e.length;a++){for(o=e[a],c=0;c<o.length;c++)u[s+c]=o[c];s+=c}return u};yg.maxRowLength=function(e){return See(e,Math.max,0)};yg.minRowLength=function(e){return See(e,Math.min,1/0)};function See(e,t,r){if(p6(e))if(p6(e[0])){for(var n=r,i=0;i<e.length;i++)n=t(n,e[i].length);return n}else return e.length;return 0}});var PS=ye((frr,Lee)=>{"use strict";var Mee=Eo(),EO=vv().isArrayOrTypedArray;Lee.exports=function(t,r){if(Mee(r))r=String(r);else if(typeof r!="string"||r.slice(-4)==="[-1]")throw"bad property string";var n=r.split("."),i,a,o,s;for(s=0;s<n.length;s++)if(String(n[s]).slice(0,2)==="__")throw"bad property string";for(s=0;s<n.length;){if(i=String(n[s]).match(/^([^\[\]]*)((\[\-?[0-9]*\])+)$/),i){if(i[1])n[s]=i[1];else if(s===0)n.splice(0,1);else throw"bad property string";for(a=i[2].slice(1,-1).split("]["),o=0;o<a.length;o++)s++,n.splice(s,0,Number(a[o]))}s++}return typeof t!="object"?Ett(t,r,n):{set:kee(t,n,r),get:Eee(t,n),astr:r,parts:n,obj:t}};function Eee(e,t){return function(r){var n=e,i,a,o,s,l;for(s=0;s<t.length-1;s++){if(i=t[s],i===-1){for(a=!0,o=[],l=0;l<n.length;l++)o[l]=Eee(n[l],t.slice(s+1))(r),o[l]!==o[0]&&(a=!1);return a?o[0]:o}if(typeof i=="number"&&!EO(n)||(n=n[i],typeof n!="object"||n===null))return}if(!(typeof n!="object"||n===null)&&(o=n[t[s]],!(!r&&o===null)))return o}}var Att=/(^|\.)args\[/;function MO(e,t){return e===void 0||e===null&&!t.match(Att)}function kee(e,t,r){return function(n){var i=e,a="",o=[[e,a]],s=MO(n,r),l,u;for(u=0;u<t.length-1;u++){if(l=t[u],typeof l=="number"&&!EO(i))throw"array index but container is not an array";if(l===-1){if(s=!Mtt(i,t.slice(u+1),n,r),s)break;return}if(!Cee(i,l,t[u+1],s))break;if(i=i[l],typeof i!="object"||i===null)throw"container is not an object";a=Stt(a,l),o.push([i,a])}if(s){if(u===t.length-1&&(delete i[t[u]],Array.isArray(i)&&+t[u]===i.length-1))for(;i.length&&i[i.length-1]===void 0;)i.pop()}else i[t[u]]=n}}function Stt(e,t){var r=t;return Mee(t)?r="["+t+"]":e&&(r="."+t),e+r}function Mtt(e,t,r,n){var i=EO(r),a=!0,o=r,s=n.replace("-1",0),l=i?!1:MO(r,s),u=t[0],c;for(c=0;c<e.length;c++)s=n.replace("-1",c),i&&(o=r[c%r.length],l=MO(o,s)),l&&(a=!1),Cee(e,c,u,l)&&kee(e[c],t,n.replace("-1",c))(o);return a}function Cee(e,t,r,n){if(e[t]===void 0){if(n)return!1;typeof r=="number"?e[t]=[]:e[t]={}}return!0}function Ett(e,t,r){return{set:function(){throw"bad container"},get:function(){},astr:t,parts:r,obj:e}}});var Dee=ye((hrr,Ree)=>{"use strict";var i3=PS(),ktt=/^\w*$/,Ctt=0,Pee=1,g6=2,Iee=3,sb=4;Ree.exports=function(t,r,n,i){n=n||"name",i=i||"value";var a,o,s,l={};r&&r.length?(s=i3(t,r),o=s.get()):o=t,r=r||"";var u={};if(o)for(a=0;a<o.length;a++)u[o[a][n]]=a;var c=ktt.test(i),f={set:function(h,d){var v=d===null?sb:Ctt;if(!o){if(!s||v===sb)return;o=[],s.set(o)}var _=u[h];if(_===void 0){if(v===sb)return;v=v|Iee,_=o.length,u[h]=_}else d!==(c?o[_][i]:i3(o[_],i).get())&&(v=v|g6);var b=o[_]=o[_]||{};return b[n]=h,c?b[i]=d:i3(b,i).set(d),d!==null&&(v=v&~sb),l[_]=l[_]|v,f},get:function(h){if(o){var d=u[h];if(d!==void 0)return c?o[d][i]:i3(o[d],i).get()}},rename:function(h,d){var v=u[h];return v===void 0||(l[v]=l[v]|Pee,u[d]=v,delete u[h],o[v][n]=d),f},remove:function(h){var d=u[h];if(d===void 0)return f;var v=o[d];if(Object.keys(v).length>2)return l[d]=l[d]|g6,f.set(h,null);if(c){for(a=d;a<o.length;a++)l[a]=l[a]|Iee;for(a=d;a<o.length;a++)u[o[a][n]]--;o.splice(d,1),delete u[h]}else i3(v,i).set(null),l[d]=l[d]|g6|sb;return f},constructUpdate:function(){for(var h,d,v={},_=Object.keys(l),b=0;b<_.length;b++)d=_[b],h=r+"["+d+"]",o[d]?(l[d]&Pee&&(v[h+"."+n]=o[d][n]),l[d]&g6&&(c?v[h+"."+i]=l[d]&sb?null:o[d][i]:v[h+"."+i]=l[d]&sb?null:i3(o[d],i).get())):v[h]=null;return v}};return f}});var zee=ye((drr,Fee)=>{"use strict";var Ltt=/^(.*)(\.[^\.\[\]]+|\[\d\])$/,Ptt=/^[^\.\[\]]+$/;Fee.exports=function(e,t){for(;t;){var r=e.match(Ltt);if(r)e=r[1];else if(e.match(Ptt))e="";else throw new Error("bad relativeAttr call:"+[e,t]);if(t.charAt(0)==="^")t=t.slice(1);else break}return e&&t.charAt(0)!=="["?e+"."+t:e+t}});var m6=ye((vrr,Oee)=>{"use strict";var Itt=Eo();Oee.exports=function(t,r){if(t>0)return Math.log(t)/Math.LN10;var n=Math.log(Math.min(r[0],r[1]))/Math.LN10;return Itt(n)||(n=Math.log(Math.max(r[0],r[1]))/Math.LN10-6),n}});var Nee=ye((prr,Bee)=>{"use strict";var qee=vv().isArrayOrTypedArray,IS=my();Bee.exports=function e(t,r){for(var n in r){var i=r[n],a=t[n];if(a!==i)if(n.charAt(0)==="_"||typeof i=="function"){if(n in t)continue;t[n]=i}else if(qee(i)&&qee(a)&&IS(i[0])){if(n==="customdata"||n==="ids")continue;for(var o=Math.min(i.length,a.length),s=0;s<o;s++)a[s]!==i[s]&&IS(i[s])&&IS(a[s])&&e(a[s],i[s])}else IS(i)&&IS(a)&&(e(a,i),Object.keys(a).length||delete t[n])}}});var n3=ye((grr,Uee)=>{"use strict";function Rtt(e,t){var r=e%t;return r<0?r+t:r}function Dtt(e,t){return Math.abs(e)>t/2?e-Math.round(e/t)*t:e}Uee.exports={mod:Rtt,modHalf:Dtt}});var cd=ye((mrr,y6)=>{(function(e){var t=/^\s+/,r=/\s+$/,n=0,i=e.round,a=e.min,o=e.max,s=e.random;function l(ge,Re){if(ge=ge||"",Re=Re||{},ge instanceof l)return ge;if(!(this instanceof l))return new l(ge,Re);var ce=u(ge);this._originalInput=ge,this._r=ce.r,this._g=ce.g,this._b=ce.b,this._a=ce.a,this._roundA=i(100*this._a)/100,this._format=Re.format||ce.format,this._gradientType=Re.gradientType,this._r<1&&(this._r=i(this._r)),this._g<1&&(this._g=i(this._g)),this._b<1&&(this._b=i(this._b)),this._ok=ce.ok,this._tc_id=n++}l.prototype={isDark:function(){return this.getBrightness()<128},isLight:function(){return!this.isDark()},isValid:function(){return this._ok},getOriginalInput:function(){return this._originalInput},getFormat:function(){return this._format},getAlpha:function(){return this._a},getBrightness:function(){var ge=this.toRgb();return(ge.r*299+ge.g*587+ge.b*114)/1e3},getLuminance:function(){var ge=this.toRgb(),Re,ce,Ze,ut,pt,Zt;return Re=ge.r/255,ce=ge.g/255,Ze=ge.b/255,Re<=.03928?ut=Re/12.92:ut=e.pow((Re+.055)/1.055,2.4),ce<=.03928?pt=ce/12.92:pt=e.pow((ce+.055)/1.055,2.4),Ze<=.03928?Zt=Ze/12.92:Zt=e.pow((Ze+.055)/1.055,2.4),.2126*ut+.7152*pt+.0722*Zt},setAlpha:function(ge){return this._a=N(ge),this._roundA=i(100*this._a)/100,this},toHsv:function(){var ge=d(this._r,this._g,this._b);return{h:ge.h*360,s:ge.s,v:ge.v,a:this._a}},toHsvString:function(){var ge=d(this._r,this._g,this._b),Re=i(ge.h*360),ce=i(ge.s*100),Ze=i(ge.v*100);return this._a==1?"hsv("+Re+", "+ce+"%, "+Ze+"%)":"hsva("+Re+", "+ce+"%, "+Ze+"%, "+this._roundA+")"},toHsl:function(){var ge=f(this._r,this._g,this._b);return{h:ge.h*360,s:ge.s,l:ge.l,a:this._a}},toHslString:function(){var ge=f(this._r,this._g,this._b),Re=i(ge.h*360),ce=i(ge.s*100),Ze=i(ge.l*100);return this._a==1?"hsl("+Re+", "+ce+"%, "+Ze+"%)":"hsla("+Re+", "+ce+"%, "+Ze+"%, "+this._roundA+")"},toHex:function(ge){return _(this._r,this._g,this._b,ge)},toHexString:function(ge){return"#"+this.toHex(ge)},toHex8:function(ge){return b(this._r,this._g,this._b,this._a,ge)},toHex8String:function(ge){return"#"+this.toHex8(ge)},toRgb:function(){return{r:i(this._r),g:i(this._g),b:i(this._b),a:this._a}},toRgbString:function(){return this._a==1?"rgb("+i(this._r)+", "+i(this._g)+", "+i(this._b)+")":"rgba("+i(this._r)+", "+i(this._g)+", "+i(this._b)+", "+this._roundA+")"},toPercentageRgb:function(){return{r:i(H(this._r,255)*100)+"%",g:i(H(this._g,255)*100)+"%",b:i(H(this._b,255)*100)+"%",a:this._a}},toPercentageRgbString:function(){return this._a==1?"rgb("+i(H(this._r,255)*100)+"%, "+i(H(this._g,255)*100)+"%, "+i(H(this._b,255)*100)+"%)":"rgba("+i(H(this._r,255)*100)+"%, "+i(H(this._g,255)*100)+"%, "+i(H(this._b,255)*100)+"%, "+this._roundA+")"},toName:function(){return this._a===0?"transparent":this._a<1?!1:Z[_(this._r,this._g,this._b,!0)]||!1},toFilter:function(ge){var Re="#"+p(this._r,this._g,this._b,this._a),ce=Re,Ze=this._gradientType?"GradientType = 1, ":"";if(ge){var ut=l(ge);ce="#"+p(ut._r,ut._g,ut._b,ut._a)}return"progid:DXImageTransform.Microsoft.gradient("+Ze+"startColorstr="+Re+",endColorstr="+ce+")"},toString:function(ge){var Re=!!ge;ge=ge||this._format;var ce=!1,Ze=this._a<1&&this._a>=0,ut=!Re&&Ze&&(ge==="hex"||ge==="hex6"||ge==="hex3"||ge==="hex4"||ge==="hex8"||ge==="name");return ut?ge==="name"&&this._a===0?this.toName():this.toRgbString():(ge==="rgb"&&(ce=this.toRgbString()),ge==="prgb"&&(ce=this.toPercentageRgbString()),(ge==="hex"||ge==="hex6")&&(ce=this.toHexString()),ge==="hex3"&&(ce=this.toHexString(!0)),ge==="hex4"&&(ce=this.toHex8String(!0)),ge==="hex8"&&(ce=this.toHex8String()),ge==="name"&&(ce=this.toName()),ge==="hsl"&&(ce=this.toHslString()),ge==="hsv"&&(ce=this.toHsvString()),ce||this.toHexString())},clone:function(){return l(this.toString())},_applyModification:function(ge,Re){var ce=ge.apply(null,[this].concat([].slice.call(Re)));return this._r=ce._r,this._g=ce._g,this._b=ce._b,this.setAlpha(ce._a),this},lighten:function(){return this._applyModification(L,arguments)},brighten:function(){return this._applyModification(x,arguments)},darken:function(){return this._applyModification(C,arguments)},desaturate:function(){return this._applyModification(k,arguments)},saturate:function(){return this._applyModification(E,arguments)},greyscale:function(){return this._applyModification(S,arguments)},spin:function(){return this._applyModification(M,arguments)},_applyCombination:function(ge,Re){return ge.apply(null,[this].concat([].slice.call(Re)))},analogous:function(){return this._applyCombination(O,arguments)},complement:function(){return this._applyCombination(g,arguments)},monochromatic:function(){return this._applyCombination(V,arguments)},splitcomplement:function(){return this._applyCombination(z,arguments)},triad:function(){return this._applyCombination(P,arguments)},tetrad:function(){return this._applyCombination(T,arguments)}},l.fromRatio=function(ge,Re){if(typeof ge=="object"){var ce={};for(var Ze in ge)ge.hasOwnProperty(Ze)&&(Ze==="a"?ce[Ze]=ge[Ze]:ce[Ze]=me(ge[Ze]));ge=ce}return l(ge,Re)};function u(ge){var Re={r:0,g:0,b:0},ce=1,Ze=null,ut=null,pt=null,Zt=!1,st=!1;return typeof ge=="string"&&(ge=Fe(ge)),typeof ge=="object"&&(Ae(ge.r)&&Ae(ge.g)&&Ae(ge.b)?(Re=c(ge.r,ge.g,ge.b),Zt=!0,st=String(ge.r).substr(-1)==="%"?"prgb":"rgb"):Ae(ge.h)&&Ae(ge.s)&&Ae(ge.v)?(Ze=me(ge.s),ut=me(ge.v),Re=v(ge.h,Ze,ut),Zt=!0,st="hsv"):Ae(ge.h)&&Ae(ge.s)&&Ae(ge.l)&&(Ze=me(ge.s),pt=me(ge.l),Re=h(ge.h,Ze,pt),Zt=!0,st="hsl"),ge.hasOwnProperty("a")&&(ce=ge.a)),ce=N(ce),{ok:Zt,format:ge.format||st,r:a(255,o(Re.r,0)),g:a(255,o(Re.g,0)),b:a(255,o(Re.b,0)),a:ce}}function c(ge,Re,ce){return{r:H(ge,255)*255,g:H(Re,255)*255,b:H(ce,255)*255}}function f(ge,Re,ce){ge=H(ge,255),Re=H(Re,255),ce=H(ce,255);var Ze=o(ge,Re,ce),ut=a(ge,Re,ce),pt,Zt,st=(Ze+ut)/2;if(Ze==ut)pt=Zt=0;else{var lt=Ze-ut;switch(Zt=st>.5?lt/(2-Ze-ut):lt/(Ze+ut),Ze){case ge:pt=(Re-ce)/lt+(Re<ce?6:0);break;case Re:pt=(ce-ge)/lt+2;break;case ce:pt=(ge-Re)/lt+4;break}pt/=6}return{h:pt,s:Zt,l:st}}function h(ge,Re,ce){var Ze,ut,pt;ge=H(ge,360),Re=H(Re,100),ce=H(ce,100);function Zt(Gt,Nt,Jt){return Jt<0&&(Jt+=1),Jt>1&&(Jt-=1),Jt<1/6?Gt+(Nt-Gt)*6*Jt:Jt<1/2?Nt:Jt<2/3?Gt+(Nt-Gt)*(2/3-Jt)*6:Gt}if(Re===0)Ze=ut=pt=ce;else{var st=ce<.5?ce*(1+Re):ce+Re-ce*Re,lt=2*ce-st;Ze=Zt(lt,st,ge+1/3),ut=Zt(lt,st,ge),pt=Zt(lt,st,ge-1/3)}return{r:Ze*255,g:ut*255,b:pt*255}}function d(ge,Re,ce){ge=H(ge,255),Re=H(Re,255),ce=H(ce,255);var Ze=o(ge,Re,ce),ut=a(ge,Re,ce),pt,Zt,st=Ze,lt=Ze-ut;if(Zt=Ze===0?0:lt/Ze,Ze==ut)pt=0;else{switch(Ze){case ge:pt=(Re-ce)/lt+(Re<ce?6:0);break;case Re:pt=(ce-ge)/lt+2;break;case ce:pt=(ge-Re)/lt+4;break}pt/=6}return{h:pt,s:Zt,v:st}}function v(ge,Re,ce){ge=H(ge,360)*6,Re=H(Re,100),ce=H(ce,100);var Ze=e.floor(ge),ut=ge-Ze,pt=ce*(1-Re),Zt=ce*(1-ut*Re),st=ce*(1-(1-ut)*Re),lt=Ze%6,Gt=[ce,Zt,pt,pt,st,ce][lt],Nt=[st,ce,ce,Zt,pt,pt][lt],Jt=[pt,pt,st,ce,ce,Zt][lt];return{r:Gt*255,g:Nt*255,b:Jt*255}}function _(ge,Re,ce,Ze){var ut=[Ce(i(ge).toString(16)),Ce(i(Re).toString(16)),Ce(i(ce).toString(16))];return Ze&&ut[0].charAt(0)==ut[0].charAt(1)&&ut[1].charAt(0)==ut[1].charAt(1)&&ut[2].charAt(0)==ut[2].charAt(1)?ut[0].charAt(0)+ut[1].charAt(0)+ut[2].charAt(0):ut.join("")}function b(ge,Re,ce,Ze,ut){var pt=[Ce(i(ge).toString(16)),Ce(i(Re).toString(16)),Ce(i(ce).toString(16)),Ce(ie(Ze))];return ut&&pt[0].charAt(0)==pt[0].charAt(1)&&pt[1].charAt(0)==pt[1].charAt(1)&&pt[2].charAt(0)==pt[2].charAt(1)&&pt[3].charAt(0)==pt[3].charAt(1)?pt[0].charAt(0)+pt[1].charAt(0)+pt[2].charAt(0)+pt[3].charAt(0):pt.join("")}function p(ge,Re,ce,Ze){var ut=[Ce(ie(Ze)),Ce(i(ge).toString(16)),Ce(i(Re).toString(16)),Ce(i(ce).toString(16))];return ut.join("")}l.equals=function(ge,Re){return!ge||!Re?!1:l(ge).toRgbString()==l(Re).toRgbString()},l.random=function(){return l.fromRatio({r:s(),g:s(),b:s()})};function k(ge,Re){Re=Re===0?0:Re||10;var ce=l(ge).toHsl();return ce.s-=Re/100,ce.s=te(ce.s),l(ce)}function E(ge,Re){Re=Re===0?0:Re||10;var ce=l(ge).toHsl();return ce.s+=Re/100,ce.s=te(ce.s),l(ce)}function S(ge){return l(ge).desaturate(100)}function L(ge,Re){Re=Re===0?0:Re||10;var ce=l(ge).toHsl();return ce.l+=Re/100,ce.l=te(ce.l),l(ce)}function x(ge,Re){Re=Re===0?0:Re||10;var ce=l(ge).toRgb();return ce.r=o(0,a(255,ce.r-i(255*-(Re/100)))),ce.g=o(0,a(255,ce.g-i(255*-(Re/100)))),ce.b=o(0,a(255,ce.b-i(255*-(Re/100)))),l(ce)}function C(ge,Re){Re=Re===0?0:Re||10;var ce=l(ge).toHsl();return ce.l-=Re/100,ce.l=te(ce.l),l(ce)}function M(ge,Re){var ce=l(ge).toHsl(),Ze=(ce.h+Re)%360;return ce.h=Ze<0?360+Ze:Ze,l(ce)}function g(ge){var Re=l(ge).toHsl();return Re.h=(Re.h+180)%360,l(Re)}function P(ge){var Re=l(ge).toHsl(),ce=Re.h;return[l(ge),l({h:(ce+120)%360,s:Re.s,l:Re.l}),l({h:(ce+240)%360,s:Re.s,l:Re.l})]}function T(ge){var Re=l(ge).toHsl(),ce=Re.h;return[l(ge),l({h:(ce+90)%360,s:Re.s,l:Re.l}),l({h:(ce+180)%360,s:Re.s,l:Re.l}),l({h:(ce+270)%360,s:Re.s,l:Re.l})]}function z(ge){var Re=l(ge).toHsl(),ce=Re.h;return[l(ge),l({h:(ce+72)%360,s:Re.s,l:Re.l}),l({h:(ce+216)%360,s:Re.s,l:Re.l})]}function O(ge,Re,ce){Re=Re||6,ce=ce||30;var Ze=l(ge).toHsl(),ut=360/ce,pt=[l(ge)];for(Ze.h=(Ze.h-(ut*Re>>1)+720)%360;--Re;)Ze.h=(Ze.h+ut)%360,pt.push(l(Ze));return pt}function V(ge,Re){Re=Re||6;for(var ce=l(ge).toHsv(),Ze=ce.h,ut=ce.s,pt=ce.v,Zt=[],st=1/Re;Re--;)Zt.push(l({h:Ze,s:ut,v:pt})),pt=(pt+st)%1;return Zt}l.mix=function(ge,Re,ce){ce=ce===0?0:ce||50;var Ze=l(ge).toRgb(),ut=l(Re).toRgb(),pt=ce/100,Zt={r:(ut.r-Ze.r)*pt+Ze.r,g:(ut.g-Ze.g)*pt+Ze.g,b:(ut.b-Ze.b)*pt+Ze.b,a:(ut.a-Ze.a)*pt+Ze.a};return l(Zt)},l.readability=function(ge,Re){var ce=l(ge),Ze=l(Re);return(e.max(ce.getLuminance(),Ze.getLuminance())+.05)/(e.min(ce.getLuminance(),Ze.getLuminance())+.05)},l.isReadable=function(ge,Re,ce){var Ze=l.readability(ge,Re),ut,pt;switch(pt=!1,ut=Pe(ce),ut.level+ut.size){case"AAsmall":case"AAAlarge":pt=Ze>=4.5;break;case"AAlarge":pt=Ze>=3;break;case"AAAsmall":pt=Ze>=7;break}return pt},l.mostReadable=function(ge,Re,ce){var Ze=null,ut=0,pt,Zt,st,lt;ce=ce||{},Zt=ce.includeFallbackColors,st=ce.level,lt=ce.size;for(var Gt=0;Gt<Re.length;Gt++)pt=l.readability(ge,Re[Gt]),pt>ut&&(ut=pt,Ze=l(Re[Gt]));return l.isReadable(ge,Ze,{level:st,size:lt})||!Zt?Ze:(ce.includeFallbackColors=!1,l.mostReadable(ge,["#fff","#000"],ce))};var G=l.names={aliceblue:"f0f8ff",antiquewhite:"faebd7",aqua:"0ff",aquamarine:"7fffd4",azure:"f0ffff",beige:"f5f5dc",bisque:"ffe4c4",black:"000",blanchedalmond:"ffebcd",blue:"00f",blueviolet:"8a2be2",brown:"a52a2a",burlywood:"deb887",burntsienna:"ea7e5d",cadetblue:"5f9ea0",chartreuse:"7fff00",chocolate:"d2691e",coral:"ff7f50",cornflowerblue:"6495ed",cornsilk:"fff8dc",crimson:"dc143c",cyan:"0ff",darkblue:"00008b",darkcyan:"008b8b",darkgoldenrod:"b8860b",darkgray:"a9a9a9",darkgreen:"006400",darkgrey:"a9a9a9",darkkhaki:"bdb76b",darkmagenta:"8b008b",darkolivegreen:"556b2f",darkorange:"ff8c00",darkorchid:"9932cc",darkred:"8b0000",darksalmon:"e9967a",darkseagreen:"8fbc8f",darkslateblue:"483d8b",darkslategray:"2f4f4f",darkslategrey:"2f4f4f",darkturquoise:"00ced1",darkviolet:"9400d3",deeppink:"ff1493",deepskyblue:"00bfff",dimgray:"696969",dimgrey:"696969",dodgerblue:"1e90ff",firebrick:"b22222",floralwhite:"fffaf0",forestgreen:"228b22",fuchsia:"f0f",gainsboro:"dcdcdc",ghostwhite:"f8f8ff",gold:"ffd700",goldenrod:"daa520",gray:"808080",green:"008000",greenyellow:"adff2f",grey:"808080",honeydew:"f0fff0",hotpink:"ff69b4",indianred:"cd5c5c",indigo:"4b0082",ivory:"fffff0",khaki:"f0e68c",lavender:"e6e6fa",lavenderblush:"fff0f5",lawngreen:"7cfc00",lemonchiffon:"fffacd",lightblue:"add8e6",lightcoral:"f08080",lightcyan:"e0ffff",lightgoldenrodyellow:"fafad2",lightgray:"d3d3d3",lightgreen:"90ee90",lightgrey:"d3d3d3",lightpink:"ffb6c1",lightsalmon:"ffa07a",lightseagreen:"20b2aa",lightskyblue:"87cefa",lightslategray:"789",lightslategrey:"789",lightsteelblue:"b0c4de",lightyellow:"ffffe0",lime:"0f0",limegreen:"32cd32",linen:"faf0e6",magenta:"f0f",maroon:"800000",mediumaquamarine:"66cdaa",mediumblue:"0000cd",mediumorchid:"ba55d3",mediumpurple:"9370db",mediumseagreen:"3cb371",mediumslateblue:"7b68ee",mediumspringgreen:"00fa9a",mediumturquoise:"48d1cc",mediumvioletred:"c71585",midnightblue:"191970",mintcream:"f5fffa",mistyrose:"ffe4e1",moccasin:"ffe4b5",navajowhite:"ffdead",navy:"000080",oldlace:"fdf5e6",olive:"808000",olivedrab:"6b8e23",orange:"ffa500",orangered:"ff4500",orchid:"da70d6",palegoldenrod:"eee8aa",palegreen:"98fb98",paleturquoise:"afeeee",palevioletred:"db7093",papayawhip:"ffefd5",peachpuff:"ffdab9",peru:"cd853f",pink:"ffc0cb",plum:"dda0dd",powderblue:"b0e0e6",purple:"800080",rebeccapurple:"663399",red:"f00",rosybrown:"bc8f8f",royalblue:"4169e1",saddlebrown:"8b4513",salmon:"fa8072",sandybrown:"f4a460",seagreen:"2e8b57",seashell:"fff5ee",sienna:"a0522d",silver:"c0c0c0",skyblue:"87ceeb",slateblue:"6a5acd",slategray:"708090",slategrey:"708090",snow:"fffafa",springgreen:"00ff7f",steelblue:"4682b4",tan:"d2b48c",teal:"008080",thistle:"d8bfd8",tomato:"ff6347",turquoise:"40e0d0",violet:"ee82ee",wheat:"f5deb3",white:"fff",whitesmoke:"f5f5f5",yellow:"ff0",yellowgreen:"9acd32"},Z=l.hexNames=j(G);function j(ge){var Re={};for(var ce in ge)ge.hasOwnProperty(ce)&&(Re[ge[ce]]=ce);return Re}function N(ge){return ge=parseFloat(ge),(isNaN(ge)||ge<0||ge>1)&&(ge=1),ge}function H(ge,Re){_e(ge)&&(ge="100%");var ce=Ee(ge);return ge=a(Re,o(0,parseFloat(ge))),ce&&(ge=parseInt(ge*Re,10)/100),e.abs(ge-Re)<1e-6?1:ge%Re/parseFloat(Re)}function te(ge){return a(1,o(0,ge))}function oe(ge){return parseInt(ge,16)}function _e(ge){return typeof ge=="string"&&ge.indexOf(".")!=-1&&parseFloat(ge)===1}function Ee(ge){return typeof ge=="string"&&ge.indexOf("%")!=-1}function Ce(ge){return ge.length==1?"0"+ge:""+ge}function me(ge){return ge<=1&&(ge=ge*100+"%"),ge}function ie(ge){return e.round(parseFloat(ge)*255).toString(16)}function Se(ge){return oe(ge)/255}var Le=function(){var ge="[-\\+]?\\d+%?",Re="[-\\+]?\\d*\\.\\d+%?",ce="(?:"+Re+")|(?:"+ge+")",Ze="[\\s|\\(]+("+ce+")[,|\\s]+("+ce+")[,|\\s]+("+ce+")\\s*\\)?",ut="[\\s|\\(]+("+ce+")[,|\\s]+("+ce+")[,|\\s]+("+ce+")[,|\\s]+("+ce+")\\s*\\)?";return{CSS_UNIT:new RegExp(ce),rgb:new RegExp("rgb"+Ze),rgba:new RegExp("rgba"+ut),hsl:new RegExp("hsl"+Ze),hsla:new RegExp("hsla"+ut),hsv:new RegExp("hsv"+Ze),hsva:new RegExp("hsva"+ut),hex3:/^#?([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex6:/^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$/,hex4:/^#?([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex8:/^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$/}}();function Ae(ge){return!!Le.CSS_UNIT.exec(ge)}function Fe(ge){ge=ge.replace(t,"").replace(r,"").toLowerCase();var Re=!1;if(G[ge])ge=G[ge],Re=!0;else if(ge=="transparent")return{r:0,g:0,b:0,a:0,format:"name"};var ce;return(ce=Le.rgb.exec(ge))?{r:ce[1],g:ce[2],b:ce[3]}:(ce=Le.rgba.exec(ge))?{r:ce[1],g:ce[2],b:ce[3],a:ce[4]}:(ce=Le.hsl.exec(ge))?{h:ce[1],s:ce[2],l:ce[3]}:(ce=Le.hsla.exec(ge))?{h:ce[1],s:ce[2],l:ce[3],a:ce[4]}:(ce=Le.hsv.exec(ge))?{h:ce[1],s:ce[2],v:ce[3]}:(ce=Le.hsva.exec(ge))?{h:ce[1],s:ce[2],v:ce[3],a:ce[4]}:(ce=Le.hex8.exec(ge))?{r:oe(ce[1]),g:oe(ce[2]),b:oe(ce[3]),a:Se(ce[4]),format:Re?"name":"hex8"}:(ce=Le.hex6.exec(ge))?{r:oe(ce[1]),g:oe(ce[2]),b:oe(ce[3]),format:Re?"name":"hex"}:(ce=Le.hex4.exec(ge))?{r:oe(ce[1]+""+ce[1]),g:oe(ce[2]+""+ce[2]),b:oe(ce[3]+""+ce[3]),a:Se(ce[4]+""+ce[4]),format:Re?"name":"hex8"}:(ce=Le.hex3.exec(ge))?{r:oe(ce[1]+""+ce[1]),g:oe(ce[2]+""+ce[2]),b:oe(ce[3]+""+ce[3]),format:Re?"name":"hex"}:!1}function Pe(ge){var Re,ce;return ge=ge||{level:"AA",size:"small"},Re=(ge.level||"AA").toUpperCase(),ce=(ge.size||"small").toLowerCase(),Re!=="AA"&&Re!=="AAA"&&(Re="AA"),ce!=="small"&&ce!=="large"&&(ce="small"),{level:Re,size:ce}}typeof y6!="undefined"&&y6.exports?y6.exports=l:window.tinycolor=l})(Math)});var Ao=ye(FS=>{"use strict";var Vee=my(),RS=Array.isArray;function Ftt(e,t){var r,n;for(r=0;r<e.length;r++){if(n=e[r],n!==null&&typeof n=="object")return!1;n!==void 0&&(t[r]=n)}return!0}FS.extendFlat=function(){return DS(arguments,!1,!1,!1)};FS.extendDeep=function(){return DS(arguments,!0,!1,!1)};FS.extendDeepAll=function(){return DS(arguments,!0,!0,!1)};FS.extendDeepNoArrays=function(){return DS(arguments,!0,!1,!0)};function DS(e,t,r,n){var i=e[0],a=e.length,o,s,l,u,c,f,h;if(a===2&&RS(i)&&RS(e[1])&&i.length===0){if(h=Ftt(e[1],i),h)return i;i.splice(0,i.length)}for(var d=1;d<a;d++){o=e[d];for(s in o)l=i[s],u=o[s],n&&RS(u)?i[s]=u:t&&u&&(Vee(u)||(c=RS(u)))?(c?(c=!1,f=l&&RS(l)?l:[]):f=l&&Vee(l)?l:{},i[s]=DS([f,u],t,r,n)):(typeof u!="undefined"||r)&&(i[s]=u)}return i}});var ec=ye((_rr,Gee)=>{"use strict";Gee.exports=function(e){var t=e.variantValues,r=e.editType,n=e.colorEditType;n===void 0&&(n=r);var i={editType:r,valType:"integer",min:1,max:1e3,extras:["normal","bold"],dflt:"normal"};e.noNumericWeightValues&&(i.valType="enumerated",i.values=i.extras,i.extras=void 0,i.min=void 0,i.max=void 0);var a={family:{valType:"string",noBlank:!0,strict:!0,editType:r},size:{valType:"number",min:1,editType:r},color:{valType:"color",editType:n},weight:i,style:{editType:r,valType:"enumerated",values:["normal","italic"],dflt:"normal"},variant:e.noFontVariant?void 0:{editType:r,valType:"enumerated",values:t||["normal","small-caps","all-small-caps","all-petite-caps","petite-caps","unicase"],dflt:"normal"},textcase:e.noFontTextcase?void 0:{editType:r,valType:"enumerated",values:["normal","word caps","upper","lower"],dflt:"normal"},lineposition:e.noFontLineposition?void 0:{editType:r,valType:"flaglist",flags:["under","over","through"],extras:["none"],dflt:"none"},shadow:e.noFontShadow?void 0:{editType:r,valType:"string",dflt:e.autoShadowDflt?"auto":"none"},editType:r};return e.autoSize&&(a.size.dflt="auto"),e.autoColor&&(a.color.dflt="auto"),e.arrayOk&&(a.family.arrayOk=!0,a.weight.arrayOk=!0,a.style.arrayOk=!0,e.noFontVariant||(a.variant.arrayOk=!0),e.noFontTextcase||(a.textcase.arrayOk=!0),e.noFontLineposition||(a.lineposition.arrayOk=!0),e.noFontShadow||(a.shadow.arrayOk=!0),a.size.arrayOk=!0,a.color.arrayOk=!0),a}});var zS=ye((xrr,Hee)=>{"use strict";Hee.exports={YANGLE:60,HOVERARROWSIZE:6,HOVERTEXTPAD:3,HOVERFONTSIZE:13,HOVERFONT:"Arial, sans-serif",HOVERMINTIME:50,HOVERID:"-hover"}});var B1=ye((brr,Xee)=>{"use strict";var jee=zS(),Wee=ec(),kO=Wee({editType:"none"});kO.family.dflt=jee.HOVERFONT;kO.size.dflt=jee.HOVERFONTSIZE;Xee.exports={clickmode:{valType:"flaglist",flags:["event","select"],dflt:"event",editType:"plot",extras:["none"]},dragmode:{valType:"enumerated",values:["zoom","pan","select","lasso","drawclosedpath","drawopenpath","drawline","drawrect","drawcircle","orbit","turntable",!1],dflt:"zoom",editType:"modebar"},hovermode:{valType:"enumerated",values:["x","y","closest",!1,"x unified","y unified"],dflt:"closest",editType:"modebar"},hoversubplots:{valType:"enumerated",values:["single","overlaying","axis"],dflt:"overlaying",editType:"none"},hoverdistance:{valType:"integer",min:-1,dflt:20,editType:"none"},spikedistance:{valType:"integer",min:-1,dflt:-1,editType:"none"},hoverlabel:{bgcolor:{valType:"color",editType:"none"},bordercolor:{valType:"color",editType:"none"},font:kO,grouptitlefont:Wee({editType:"none"}),align:{valType:"enumerated",values:["left","right","auto"],dflt:"auto",editType:"none"},namelength:{valType:"integer",min:-1,dflt:15,editType:"none"},showarrow:{valType:"boolean",dflt:!0,editType:"none"},editType:"none"},selectdirection:{valType:"enumerated",values:["h","v","d","any"],dflt:"any",editType:"none"}}});var a3=ye((wrr,Zee)=>{"use strict";var ztt=ec(),OS=B1().hoverlabel,qS=Ao().extendFlat;Zee.exports={hoverlabel:{bgcolor:qS({},OS.bgcolor,{arrayOk:!0}),bordercolor:qS({},OS.bordercolor,{arrayOk:!0}),font:ztt({arrayOk:!0,editType:"none"}),align:qS({},OS.align,{arrayOk:!0}),namelength:qS({},OS.namelength,{arrayOk:!0}),showarrow:qS({},OS.showarrow),editType:"none"}}});var Gl=ye((Trr,Yee)=>{"use strict";var Ott=ec(),qtt=a3();Yee.exports={type:{valType:"enumerated",values:[],dflt:"scatter",editType:"calc+clearAxisTypes",_noTemplating:!0},visible:{valType:"enumerated",values:[!0,!1,"legendonly"],dflt:!0,editType:"calc"},showlegend:{valType:"boolean",dflt:!0,editType:"style"},legend:{valType:"subplotid",dflt:"legend",editType:"style"},legendgroup:{valType:"string",dflt:"",editType:"style"},legendgrouptitle:{text:{valType:"string",dflt:"",editType:"style"},font:Ott({editType:"style"}),editType:"style"},legendrank:{valType:"number",dflt:1e3,editType:"style"},legendwidth:{valType:"number",min:0,editType:"style"},opacity:{valType:"number",min:0,max:1,dflt:1,editType:"style"},name:{valType:"string",editType:"style"},uid:{valType:"string",editType:"plot",anim:!0},ids:{valType:"data_array",editType:"calc",anim:!0},customdata:{valType:"data_array",editType:"calc"},meta:{valType:"any",arrayOk:!0,editType:"plot"},selectedpoints:{valType:"any",editType:"calc"},hoverinfo:{valType:"flaglist",flags:["x","y","z","text","name"],extras:["all","none","skip"],arrayOk:!0,dflt:"all",editType:"none"},hoverlabel:qtt.hoverlabel,stream:{token:{valType:"string",noBlank:!0,strict:!0,editType:"calc"},maxpoints:{valType:"number",min:0,max:1e4,dflt:500,editType:"calc"},editType:"calc"},uirevision:{valType:"any",editType:"none"}}});var lb=ye((Arr,$ee)=>{"use strict";var Btt=cd(),_6={Greys:[[0,"rgb(0,0,0)"],[1,"rgb(255,255,255)"]],YlGnBu:[[0,"rgb(8,29,88)"],[.125,"rgb(37,52,148)"],[.25,"rgb(34,94,168)"],[.375,"rgb(29,145,192)"],[.5,"rgb(65,182,196)"],[.625,"rgb(127,205,187)"],[.75,"rgb(199,233,180)"],[.875,"rgb(237,248,217)"],[1,"rgb(255,255,217)"]],Greens:[[0,"rgb(0,68,27)"],[.125,"rgb(0,109,44)"],[.25,"rgb(35,139,69)"],[.375,"rgb(65,171,93)"],[.5,"rgb(116,196,118)"],[.625,"rgb(161,217,155)"],[.75,"rgb(199,233,192)"],[.875,"rgb(229,245,224)"],[1,"rgb(247,252,245)"]],YlOrRd:[[0,"rgb(128,0,38)"],[.125,"rgb(189,0,38)"],[.25,"rgb(227,26,28)"],[.375,"rgb(252,78,42)"],[.5,"rgb(253,141,60)"],[.625,"rgb(254,178,76)"],[.75,"rgb(254,217,118)"],[.875,"rgb(255,237,160)"],[1,"rgb(255,255,204)"]],Bluered:[[0,"rgb(0,0,255)"],[1,"rgb(255,0,0)"]],RdBu:[[0,"rgb(5,10,172)"],[.35,"rgb(106,137,247)"],[.5,"rgb(190,190,190)"],[.6,"rgb(220,170,132)"],[.7,"rgb(230,145,90)"],[1,"rgb(178,10,28)"]],Reds:[[0,"rgb(220,220,220)"],[.2,"rgb(245,195,157)"],[.4,"rgb(245,160,105)"],[1,"rgb(178,10,28)"]],Blues:[[0,"rgb(5,10,172)"],[.35,"rgb(40,60,190)"],[.5,"rgb(70,100,245)"],[.6,"rgb(90,120,245)"],[.7,"rgb(106,137,247)"],[1,"rgb(220,220,220)"]],Picnic:[[0,"rgb(0,0,255)"],[.1,"rgb(51,153,255)"],[.2,"rgb(102,204,255)"],[.3,"rgb(153,204,255)"],[.4,"rgb(204,204,255)"],[.5,"rgb(255,255,255)"],[.6,"rgb(255,204,255)"],[.7,"rgb(255,153,255)"],[.8,"rgb(255,102,204)"],[.9,"rgb(255,102,102)"],[1,"rgb(255,0,0)"]],Rainbow:[[0,"rgb(150,0,90)"],[.125,"rgb(0,0,200)"],[.25,"rgb(0,25,255)"],[.375,"rgb(0,152,255)"],[.5,"rgb(44,255,150)"],[.625,"rgb(151,255,0)"],[.75,"rgb(255,234,0)"],[.875,"rgb(255,111,0)"],[1,"rgb(255,0,0)"]],Portland:[[0,"rgb(12,51,131)"],[.25,"rgb(10,136,186)"],[.5,"rgb(242,211,56)"],[.75,"rgb(242,143,56)"],[1,"rgb(217,30,30)"]],Jet:[[0,"rgb(0,0,131)"],[.125,"rgb(0,60,170)"],[.375,"rgb(5,255,255)"],[.625,"rgb(255,255,0)"],[.875,"rgb(250,0,0)"],[1,"rgb(128,0,0)"]],Hot:[[0,"rgb(0,0,0)"],[.3,"rgb(230,0,0)"],[.6,"rgb(255,210,0)"],[1,"rgb(255,255,255)"]],Blackbody:[[0,"rgb(0,0,0)"],[.2,"rgb(230,0,0)"],[.4,"rgb(230,210,0)"],[.7,"rgb(255,255,255)"],[1,"rgb(160,200,255)"]],Earth:[[0,"rgb(0,0,130)"],[.1,"rgb(0,180,180)"],[.2,"rgb(40,210,40)"],[.4,"rgb(230,230,50)"],[.6,"rgb(120,70,20)"],[1,"rgb(255,255,255)"]],Electric:[[0,"rgb(0,0,0)"],[.15,"rgb(30,0,100)"],[.4,"rgb(120,0,100)"],[.6,"rgb(160,90,0)"],[.8,"rgb(230,200,0)"],[1,"rgb(255,250,220)"]],Viridis:[[0,"#440154"],[.06274509803921569,"#48186a"],[.12549019607843137,"#472d7b"],[.18823529411764706,"#424086"],[.25098039215686274,"#3b528b"],[.3137254901960784,"#33638d"],[.3764705882352941,"#2c728e"],[.4392156862745098,"#26828e"],[.5019607843137255,"#21918c"],[.5647058823529412,"#1fa088"],[.6274509803921569,"#28ae80"],[.6901960784313725,"#3fbc73"],[.7529411764705882,"#5ec962"],[.8156862745098039,"#84d44b"],[.8784313725490196,"#addc30"],[.9411764705882353,"#d8e219"],[1,"#fde725"]],Cividis:[[0,"rgb(0,32,76)"],[.058824,"rgb(0,42,102)"],[.117647,"rgb(0,52,110)"],[.176471,"rgb(39,63,108)"],[.235294,"rgb(60,74,107)"],[.294118,"rgb(76,85,107)"],[.352941,"rgb(91,95,109)"],[.411765,"rgb(104,106,112)"],[.470588,"rgb(117,117,117)"],[.529412,"rgb(131,129,120)"],[.588235,"rgb(146,140,120)"],[.647059,"rgb(161,152,118)"],[.705882,"rgb(176,165,114)"],[.764706,"rgb(192,177,109)"],[.823529,"rgb(209,191,102)"],[.882353,"rgb(225,204,92)"],[.941176,"rgb(243,219,79)"],[1,"rgb(255,233,69)"]]},Kee=_6.RdBu;function Ntt(e,t){if(t||(t=Kee),!e)return t;function r(){try{e=_6[e]||JSON.parse(e)}catch(n){e=t}}return typeof e=="string"&&(r(),typeof e=="string"&&r()),Jee(e)?e:t}function Jee(e){var t=0;if(!Array.isArray(e)||e.length<2||!e[0]||!e[e.length-1]||+e[0][0]!=0||+e[e.length-1][0]!=1)return!1;for(var r=0;r<e.length;r++){var n=e[r];if(n.length!==2||+n[0]<t||!Btt(n[1]).isValid())return!1;t=+n[0]}return!0}function Utt(e){return _6[e]!==void 0?!0:Jee(e)}$ee.exports={scales:_6,defaultScale:Kee,get:Ntt,isValid:Utt}});var Lh=ye(ub=>{"use strict";ub.defaults=["#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"];ub.defaultLine="#444";ub.lightLine="#eee";ub.background="#fff";ub.borderLine="#BEC8D9";ub.lightFraction=100*10/11});var ka=ye((Mrr,Qee)=>{"use strict";var bp=cd(),Vtt=Eo(),Gtt=vv().isTypedArray,fd=Qee.exports={},x6=Lh();fd.defaults=x6.defaults;var Htt=fd.defaultLine=x6.defaultLine;fd.lightLine=x6.lightLine;var LO=fd.background=x6.background;fd.tinyRGB=function(e){var t=e.toRgb();return"rgb("+Math.round(t.r)+", "+Math.round(t.g)+", "+Math.round(t.b)+")"};fd.rgb=function(e){return fd.tinyRGB(bp(e))};fd.opacity=function(e){return e?bp(e).getAlpha():0};fd.addOpacity=function(e,t){var r=bp(e).toRgb();return"rgba("+Math.round(r.r)+", "+Math.round(r.g)+", "+Math.round(r.b)+", "+t+")"};fd.combine=function(e,t){var r=bp(e).toRgb();if(r.a===1)return bp(e).toRgbString();var n=bp(t||LO).toRgb(),i=n.a===1?n:{r:255*(1-n.a)+n.r*n.a,g:255*(1-n.a)+n.g*n.a,b:255*(1-n.a)+n.b*n.a},a={r:i.r*(1-r.a)+r.r*r.a,g:i.g*(1-r.a)+r.g*r.a,b:i.b*(1-r.a)+r.b*r.a};return bp(a).toRgbString()};fd.interpolate=function(e,t,r){var n=bp(e).toRgb(),i=bp(t).toRgb(),a={r:r*n.r+(1-r)*i.r,g:r*n.g+(1-r)*i.g,b:r*n.b+(1-r)*i.b};return bp(a).toRgbString()};fd.contrast=function(e,t,r){var n=bp(e);n.getAlpha()!==1&&(n=bp(fd.combine(e,LO)));var i=n.isDark()?t?n.lighten(t):LO:r?n.darken(r):Htt;return i.toString()};fd.stroke=function(e,t){var r=bp(t);e.style({stroke:fd.tinyRGB(r),"stroke-opacity":r.getAlpha()})};fd.fill=function(e,t){var r=bp(t);e.style({fill:fd.tinyRGB(r),"fill-opacity":r.getAlpha()})};fd.clean=function(e){if(!(!e||typeof e!="object")){var t=Object.keys(e),r,n,i,a;for(r=0;r<t.length;r++)if(i=t[r],a=e[i],i.slice(-5)==="color")if(Array.isArray(a))for(n=0;n<a.length;n++)a[n]=CO(a[n]);else e[i]=CO(a);else if(i.slice(-10)==="colorscale"&&Array.isArray(a))for(n=0;n<a.length;n++)Array.isArray(a[n])&&(a[n][1]=CO(a[n][1]));else if(Array.isArray(a)){var o=a[0];if(!Array.isArray(o)&&o&&typeof o=="object")for(n=0;n<a.length;n++)fd.clean(a[n])}else a&&typeof a=="object"&&!Gtt(a)&&fd.clean(a)}};function CO(e){if(Vtt(e)||typeof e!="string")return e;var t=e.trim();if(t.slice(0,3)!=="rgb")return e;var r=t.match(/^rgba?\s*\(([^()]*)\)$/);if(!r)return e;var n=r[1].trim().split(/\s*[\s,]\s*/),i=t.charAt(3)==="a"&&n.length===4;if(!i&&n.length!==3)return e;for(var a=0;a<n.length;a++){if(!n[a].length||(n[a]=Number(n[a]),!(n[a]>=0)))return e;if(a===3)n[a]>1&&(n[a]=1);else if(n[a]>=1)return e}var o=Math.round(n[0]*255)+", "+Math.round(n[1]*255)+", "+Math.round(n[2]*255);return i?"rgba("+o+", "+n[3]+")":"rgb("+o+")"}});var N1=ye((Err,ete)=>{"use strict";ete.exports={SHOW_PLACEHOLDER:100,HIDE_PLACEHOLDER:1e3,DESELECTDIM:.2}});var o3=ye(tte=>{"use strict";tte.counter=function(e,t,r,n){var i=(t||"")+(r?"":"$"),a=n===!1?"":"^";return e==="xy"?new RegExp(a+"x([2-9]|[1-9][0-9]+)?y([2-9]|[1-9][0-9]+)?"+i):new RegExp(a+e+"([2-9]|[1-9][0-9]+)?"+i)}});var ate=ye(wp=>{"use strict";var PO=Eo(),rte=cd(),ite=Ao().extendFlat,jtt=Gl(),Wtt=lb(),Xtt=ka(),Ztt=N1().DESELECTDIM,s3=PS(),nte=o3().counter,Ytt=n3().modHalf,_g=vv().isArrayOrTypedArray,U1=vv().isTypedArraySpec,V1=vv().decodeTypedArraySpec;wp.valObjectMeta={data_array:{coerceFunction:function(e,t,r){t.set(_g(e)?e:U1(e)?V1(e):r)}},enumerated:{coerceFunction:function(e,t,r,n){n.coerceNumber&&(e=+e),n.values.indexOf(e)===-1?t.set(r):t.set(e)},validateFunction:function(e,t){t.coerceNumber&&(e=+e);for(var r=t.values,n=0;n<r.length;n++){var i=String(r[n]);if(i.charAt(0)==="/"&&i.charAt(i.length-1)==="/"){var a=new RegExp(i.slice(1,-1));if(a.test(e))return!0}else if(e===r[n])return!0}return!1}},boolean:{coerceFunction:function(e,t,r,n){let i=a=>a===!0||a===!1;i(e)||n.arrayOk&&Array.isArray(e)&&e.length>0&&e.every(i)?t.set(e):t.set(r)}},number:{coerceFunction:function(e,t,r,n){U1(e)&&(e=V1(e)),!PO(e)||n.min!==void 0&&e<n.min||n.max!==void 0&&e>n.max?t.set(r):t.set(+e)}},integer:{coerceFunction:function(e,t,r,n){if((n.extras||[]).indexOf(e)!==-1){t.set(e);return}U1(e)&&(e=V1(e)),e%1||!PO(e)||n.min!==void 0&&e<n.min||n.max!==void 0&&e>n.max?t.set(r):t.set(+e)}},string:{coerceFunction:function(e,t,r,n){if(typeof e!="string"){var i=typeof e=="number";n.strict===!0||!i?t.set(r):t.set(String(e))}else n.noBlank&&!e?t.set(r):t.set(e)}},color:{coerceFunction:function(e,t,r){U1(e)&&(e=V1(e)),rte(e).isValid()?t.set(e):t.set(r)}},colorlist:{coerceFunction:function(e,t,r){function n(i){return rte(i).isValid()}!Array.isArray(e)||!e.length?t.set(r):e.every(n)?t.set(e):t.set(r)}},colorscale:{coerceFunction:function(e,t,r){t.set(Wtt.get(e,r))}},angle:{coerceFunction:function(e,t,r){U1(e)&&(e=V1(e)),e==="auto"?t.set("auto"):PO(e)?t.set(Ytt(+e,360)):t.set(r)}},subplotid:{coerceFunction:function(e,t,r,n){var i=n.regex||nte(r);let a=o=>typeof o=="string"&&i.test(o);a(e)||n.arrayOk&&_g(e)&&e.length>0&&e.every(a)?t.set(e):t.set(r)},validateFunction:function(e,t){var r=t.dflt;return e===r?!0:typeof e!="string"?!1:!!nte(r).test(e)}},flaglist:{coerceFunction:function(e,t,r,n){if((n.extras||[]).indexOf(e)!==-1){t.set(e);return}if(typeof e!="string"){t.set(r);return}for(var i=e.split("+"),a=0;a<i.length;){var o=i[a];n.flags.indexOf(o)===-1||i.indexOf(o)<a?i.splice(a,1):a++}i.length?t.set(i.join("+")):t.set(r)}},any:{coerceFunction:function(e,t,r){e===void 0?t.set(r):t.set(U1(e)?V1(e):e)}},info_array:{coerceFunction:function(e,t,r,n){function i(k,E,S){var L,x={set:function(C){L=C}};return S===void 0&&(S=E.dflt),wp.valObjectMeta[E.valType].coerceFunction(k,x,S,E),L}if(U1(e)&&(e=V1(e)),!_g(e)){t.set(r);return}var a=n.dimensions===2||n.dimensions==="1-2"&&Array.isArray(e)&&_g(e[0]),o=n.items,s=[],l=Array.isArray(o),u=l&&a&&_g(o[0]),c=a&&l&&!u,f=l&&!c?o.length:e.length,h,d,v,_,b,p;if(r=Array.isArray(r)?r:[],a)for(h=0;h<f;h++)for(s[h]=[],v=_g(e[h])?e[h]:[],c?b=o.length:l?b=o[h].length:b=v.length,d=0;d<b;d++)c?_=o[d]:l?_=o[h][d]:_=o,p=i(v[d],_,(r[h]||[])[d]),p!==void 0&&(s[h][d]=p);else for(h=0;h<f;h++)p=i(e[h],l?o[h]:o,r[h]),p!==void 0&&(s[h]=p);t.set(s)},validateFunction:function(e,t){if(!_g(e))return!1;var r=t.items,n=Array.isArray(r),i=t.dimensions===2;if(!t.freeLength&&e.length!==r.length)return!1;for(var a=0;a<e.length;a++)if(i){if(!_g(e[a])||!t.freeLength&&e[a].length!==r[a].length)return!1;for(var o=0;o<e[a].length;o++)if(!b6(e[a][o],n?r[a][o]:r))return!1}else if(!b6(e[a],n?r[a]:r))return!1;return!0}}};wp.coerce=function(e,t,r,n,i){var a=s3(r,n).get(),o=s3(e,n),s=s3(t,n),l=o.get(),u=t._template;if(l===void 0&&u&&(l=s3(u,n).get(),u=0),i===void 0&&(i=a.dflt),a.arrayOk){if(_g(l))return s.set(l),l;if(U1(l))return l=V1(l),s.set(l),l}var c=wp.valObjectMeta[a.valType].coerceFunction;c(l,s,i,a);var f=s.get();return u&&f===i&&!b6(l,a)&&(l=s3(u,n).get(),c(l,s,i,a),f=s.get()),f};wp.coerce2=function(e,t,r,n,i){var a=s3(e,n),o=wp.coerce(e,t,r,n,i),s=a.get();return s!=null?o:!1};wp.coerceFont=function(e,t,r,n){n||(n={}),r=ite({},r),r=ite(r,n.overrideDflt||{});var i={family:e(t+".family",r.family),size:e(t+".size",r.size),color:e(t+".color",r.color),weight:e(t+".weight",r.weight),style:e(t+".style",r.style)};if(n.noFontVariant||(i.variant=e(t+".variant",r.variant)),n.noFontLineposition||(i.lineposition=e(t+".lineposition",r.lineposition)),n.noFontTextcase||(i.textcase=e(t+".textcase",r.textcase)),!n.noFontShadow){var a=r.shadow;a==="none"&&n.autoShadowDflt&&(a="auto"),i.shadow=e(t+".shadow",a)}return i};wp.coercePattern=function(e,t,r,n){var i=e(t+".shape"),a;if(i||(a=e(t+".path")),i||a){i&&e(t+".solidity"),e(t+".size");var o=e(t+".fillmode"),s=o==="overlay";if(!n){var l=e(t+".bgcolor",s?r:void 0);e(t+".fgcolor",s?Xtt.contrast(l):r)}e(t+".fgopacity",s?.5:1)}};wp.coerceHoverinfo=function(e,t,r){var n=t._module.attributes,i=n.hoverinfo?n:jtt,a=i.hoverinfo,o;if(r._dataLength===1){var s=a.dflt==="all"?a.flags.slice():a.dflt.split("+");s.splice(s.indexOf("name"),1),o=s.join("+")}return wp.coerce(e,t,i,"hoverinfo",o)};wp.coerceSelectionMarkerOpacity=function(e,t){if(e.marker){var r=e.marker.opacity;if(r!==void 0){var n,i;!_g(r)&&!e.selected&&!e.unselected&&(n=r,i=Ztt*r),t("selected.marker.opacity",n),t("unselected.marker.opacity",i)}}};function b6(e,t){var r=wp.valObjectMeta[t.valType];if(t.arrayOk&&_g(e))return!0;if(r.validateFunction)return r.validateFunction(e,t);var n={},i=n,a={set:function(o){i=o}};return r.coerceFunction(e,a,n,t),i!==n}wp.validate=b6});var cb=ye((Lrr,ute)=>{"use strict";var ote={staticPlot:{valType:"boolean",dflt:!1},typesetMath:{valType:"boolean",dflt:!0},plotlyServerURL:{valType:"string",dflt:""},editable:{valType:"boolean",dflt:!1},edits:{annotationPosition:{valType:"boolean",dflt:!1},annotationTail:{valType:"boolean",dflt:!1},annotationText:{valType:"boolean",dflt:!1},axisTitleText:{valType:"boolean",dflt:!1},colorbarPosition:{valType:"boolean",dflt:!1},colorbarTitleText:{valType:"boolean",dflt:!1},legendPosition:{valType:"boolean",dflt:!1},legendText:{valType:"boolean",dflt:!1},shapePosition:{valType:"boolean",dflt:!1},titleText:{valType:"boolean",dflt:!1}},editSelection:{valType:"boolean",dflt:!0},autosizable:{valType:"boolean",dflt:!1},responsive:{valType:"boolean",dflt:!1},fillFrame:{valType:"boolean",dflt:!1},frameMargins:{valType:"number",dflt:0,min:0,max:.5},scrollZoom:{valType:"flaglist",flags:["cartesian","gl3d","geo","mapbox","map"],extras:[!0,!1],dflt:"gl3d+geo+map"},doubleClick:{valType:"enumerated",values:[!1,"reset","autosize","reset+autosize"],dflt:"reset+autosize"},doubleClickDelay:{valType:"number",dflt:300,min:0},showAxisDragHandles:{valType:"boolean",dflt:!0},showAxisRangeEntryBoxes:{valType:"boolean",dflt:!0},showTips:{valType:"boolean",dflt:!0},showLink:{valType:"boolean",dflt:!1},linkText:{valType:"string",dflt:"Edit chart",noBlank:!0},sendData:{valType:"boolean",dflt:!0},showSources:{valType:"any",dflt:!1},displayModeBar:{valType:"enumerated",values:["hover",!0,!1],dflt:"hover"},showSendToCloud:{valType:"boolean",dflt:!1},showEditInChartStudio:{valType:"boolean",dflt:!1},modeBarButtonsToRemove:{valType:"any",dflt:[]},modeBarButtonsToAdd:{valType:"any",dflt:[]},modeBarButtons:{valType:"any",dflt:!1},toImageButtonOptions:{valType:"any",dflt:{}},displaylogo:{valType:"boolean",dflt:!0},watermark:{valType:"boolean",dflt:!1},plotGlPixelRatio:{valType:"number",dflt:2,min:1,max:4},setBackground:{valType:"any",dflt:"transparent"},topojsonURL:{valType:"string",noBlank:!0,dflt:"https://cdn.plot.ly/un/"},mapboxAccessToken:{valType:"string",dflt:null},logging:{valType:"integer",min:0,max:2,dflt:1},notifyOnLogging:{valType:"integer",min:0,max:2,dflt:0},queueLength:{valType:"integer",min:0,dflt:0},locale:{valType:"string",dflt:"en-US"},locales:{valType:"any",dflt:{}}},ste={};function lte(e,t){for(var r in e){var n=e[r];n.valType?t[r]=n.dflt:(t[r]||(t[r]={}),lte(n,t[r]))}}lte(ote,ste);ute.exports={configAttributes:ote,dfltConfig:ste}});var RO=ye((Prr,cte)=>{"use strict";var IO=Oa(),Ktt=Eo(),BS=[];cte.exports=function(e,t){if(BS.indexOf(e)!==-1)return;BS.push(e);var r=1e3;Ktt(t)?r=t:t==="long"&&(r=3e3);var n=IO.select("body").selectAll(".plotly-notifier").data([0]);n.enter().append("div").classed("plotly-notifier",!0);var i=n.selectAll(".notifier-note").data(BS);function a(o){o.duration(700).style("opacity",0).each("end",function(s){var l=BS.indexOf(s);l!==-1&&BS.splice(l,1),IO.select(this).remove()})}i.enter().append("div").classed("notifier-note",!0).style("opacity",0).each(function(o){var s=IO.select(this);s.append("button").classed("notifier-close",!0).html("&times;").on("click",function(){s.transition().call(a)});for(var l=s.append("p"),u=o.split(/<br\s*\/?>/g),c=0;c<u.length;c++)c&&l.append("br"),l.append("span").text(u[c]);t==="stick"?s.transition().duration(350).style("opacity",1):s.transition().duration(700).style("opacity",1).transition().delay(r).call(a)})}});var G1=ye((Irr,fte)=>{"use strict";var l3=cb().dfltConfig,DO=RO(),FO=fte.exports={};FO.log=function(){var e;if(l3.logging>1){var t=["LOG:"];for(e=0;e<arguments.length;e++)t.push(arguments[e]);console.trace.apply(console,t)}if(l3.notifyOnLogging>1){var r=[];for(e=0;e<arguments.length;e++)r.push(arguments[e]);DO(r.join("<br>"),"long")}};FO.warn=function(){var e;if(l3.logging>0){var t=["WARN:"];for(e=0;e<arguments.length;e++)t.push(arguments[e]);console.trace.apply(console,t)}if(l3.notifyOnLogging>0){var r=[];for(e=0;e<arguments.length;e++)r.push(arguments[e]);DO(r.join("<br>"),"stick")}};FO.error=function(){var e;if(l3.logging>0){var t=["ERROR:"];for(e=0;e<arguments.length;e++)t.push(arguments[e]);console.error.apply(console,t)}if(l3.notifyOnLogging>0){var r=[];for(e=0;e<arguments.length;e++)r.push(arguments[e]);DO(r.join("<br>"),"stick")}}});var w6=ye((Rrr,hte)=>{"use strict";hte.exports=function(){}});var zO=ye((Drr,dte)=>{"use strict";dte.exports=function(t,r){if(r instanceof RegExp){for(var n=r.toString(),i=0;i<t.length;i++)if(t[i]instanceof RegExp&&t[i].toString()===n)return t;t.push(r)}else(r||r===0)&&t.indexOf(r)===-1&&t.push(r);return t}});var pte=ye((Frr,vte)=>{vte.exports=Jtt;function Jtt(){var e=new Float32Array(16);return e[0]=1,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=1,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=1,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var mte=ye((zrr,gte)=>{gte.exports=$tt;function $tt(e){var t=new Float32Array(16);return t[0]=e[0],t[1]=e[1],t[2]=e[2],t[3]=e[3],t[4]=e[4],t[5]=e[5],t[6]=e[6],t[7]=e[7],t[8]=e[8],t[9]=e[9],t[10]=e[10],t[11]=e[11],t[12]=e[12],t[13]=e[13],t[14]=e[14],t[15]=e[15],t}});var _te=ye((Orr,yte)=>{yte.exports=Qtt;function Qtt(e,t){return e[0]=t[0],e[1]=t[1],e[2]=t[2],e[3]=t[3],e[4]=t[4],e[5]=t[5],e[6]=t[6],e[7]=t[7],e[8]=t[8],e[9]=t[9],e[10]=t[10],e[11]=t[11],e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15],e}});var OO=ye((qrr,xte)=>{xte.exports=ert;function ert(e){return e[0]=1,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=1,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=1,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var wte=ye((Brr,bte)=>{bte.exports=trt;function trt(e,t){if(e===t){var r=t[1],n=t[2],i=t[3],a=t[6],o=t[7],s=t[11];e[1]=t[4],e[2]=t[8],e[3]=t[12],e[4]=r,e[6]=t[9],e[7]=t[13],e[8]=n,e[9]=a,e[11]=t[14],e[12]=i,e[13]=o,e[14]=s}else e[0]=t[0],e[1]=t[4],e[2]=t[8],e[3]=t[12],e[4]=t[1],e[5]=t[5],e[6]=t[9],e[7]=t[13],e[8]=t[2],e[9]=t[6],e[10]=t[10],e[11]=t[14],e[12]=t[3],e[13]=t[7],e[14]=t[11],e[15]=t[15];return e}});var Ate=ye((Nrr,Tte)=>{Tte.exports=rrt;function rrt(e,t){var r=t[0],n=t[1],i=t[2],a=t[3],o=t[4],s=t[5],l=t[6],u=t[7],c=t[8],f=t[9],h=t[10],d=t[11],v=t[12],_=t[13],b=t[14],p=t[15],k=r*s-n*o,E=r*l-i*o,S=r*u-a*o,L=n*l-i*s,x=n*u-a*s,C=i*u-a*l,M=c*_-f*v,g=c*b-h*v,P=c*p-d*v,T=f*b-h*_,z=f*p-d*_,O=h*p-d*b,V=k*O-E*z+S*T+L*P-x*g+C*M;return V?(V=1/V,e[0]=(s*O-l*z+u*T)*V,e[1]=(i*z-n*O-a*T)*V,e[2]=(_*C-b*x+p*L)*V,e[3]=(h*x-f*C-d*L)*V,e[4]=(l*P-o*O-u*g)*V,e[5]=(r*O-i*P+a*g)*V,e[6]=(b*S-v*C-p*E)*V,e[7]=(c*C-h*S+d*E)*V,e[8]=(o*z-s*P+u*M)*V,e[9]=(n*P-r*z-a*M)*V,e[10]=(v*x-_*S+p*k)*V,e[11]=(f*S-c*x-d*k)*V,e[12]=(s*g-o*T-l*M)*V,e[13]=(r*T-n*g+i*M)*V,e[14]=(_*E-v*L-b*k)*V,e[15]=(c*L-f*E+h*k)*V,e):null}});var Mte=ye((Urr,Ste)=>{Ste.exports=irt;function irt(e,t){var r=t[0],n=t[1],i=t[2],a=t[3],o=t[4],s=t[5],l=t[6],u=t[7],c=t[8],f=t[9],h=t[10],d=t[11],v=t[12],_=t[13],b=t[14],p=t[15];return e[0]=s*(h*p-d*b)-f*(l*p-u*b)+_*(l*d-u*h),e[1]=-(n*(h*p-d*b)-f*(i*p-a*b)+_*(i*d-a*h)),e[2]=n*(l*p-u*b)-s*(i*p-a*b)+_*(i*u-a*l),e[3]=-(n*(l*d-u*h)-s*(i*d-a*h)+f*(i*u-a*l)),e[4]=-(o*(h*p-d*b)-c*(l*p-u*b)+v*(l*d-u*h)),e[5]=r*(h*p-d*b)-c*(i*p-a*b)+v*(i*d-a*h),e[6]=-(r*(l*p-u*b)-o*(i*p-a*b)+v*(i*u-a*l)),e[7]=r*(l*d-u*h)-o*(i*d-a*h)+c*(i*u-a*l),e[8]=o*(f*p-d*_)-c*(s*p-u*_)+v*(s*d-u*f),e[9]=-(r*(f*p-d*_)-c*(n*p-a*_)+v*(n*d-a*f)),e[10]=r*(s*p-u*_)-o*(n*p-a*_)+v*(n*u-a*s),e[11]=-(r*(s*d-u*f)-o*(n*d-a*f)+c*(n*u-a*s)),e[12]=-(o*(f*b-h*_)-c*(s*b-l*_)+v*(s*h-l*f)),e[13]=r*(f*b-h*_)-c*(n*b-i*_)+v*(n*h-i*f),e[14]=-(r*(s*b-l*_)-o*(n*b-i*_)+v*(n*l-i*s)),e[15]=r*(s*h-l*f)-o*(n*h-i*f)+c*(n*l-i*s),e}});var kte=ye((Vrr,Ete)=>{Ete.exports=nrt;function nrt(e){var t=e[0],r=e[1],n=e[2],i=e[3],a=e[4],o=e[5],s=e[6],l=e[7],u=e[8],c=e[9],f=e[10],h=e[11],d=e[12],v=e[13],_=e[14],b=e[15],p=t*o-r*a,k=t*s-n*a,E=t*l-i*a,S=r*s-n*o,L=r*l-i*o,x=n*l-i*s,C=u*v-c*d,M=u*_-f*d,g=u*b-h*d,P=c*_-f*v,T=c*b-h*v,z=f*b-h*_;return p*z-k*T+E*P+S*g-L*M+x*C}});var Lte=ye((Grr,Cte)=>{Cte.exports=art;function art(e,t,r){var n=t[0],i=t[1],a=t[2],o=t[3],s=t[4],l=t[5],u=t[6],c=t[7],f=t[8],h=t[9],d=t[10],v=t[11],_=t[12],b=t[13],p=t[14],k=t[15],E=r[0],S=r[1],L=r[2],x=r[3];return e[0]=E*n+S*s+L*f+x*_,e[1]=E*i+S*l+L*h+x*b,e[2]=E*a+S*u+L*d+x*p,e[3]=E*o+S*c+L*v+x*k,E=r[4],S=r[5],L=r[6],x=r[7],e[4]=E*n+S*s+L*f+x*_,e[5]=E*i+S*l+L*h+x*b,e[6]=E*a+S*u+L*d+x*p,e[7]=E*o+S*c+L*v+x*k,E=r[8],S=r[9],L=r[10],x=r[11],e[8]=E*n+S*s+L*f+x*_,e[9]=E*i+S*l+L*h+x*b,e[10]=E*a+S*u+L*d+x*p,e[11]=E*o+S*c+L*v+x*k,E=r[12],S=r[13],L=r[14],x=r[15],e[12]=E*n+S*s+L*f+x*_,e[13]=E*i+S*l+L*h+x*b,e[14]=E*a+S*u+L*d+x*p,e[15]=E*o+S*c+L*v+x*k,e}});var Ite=ye((Hrr,Pte)=>{Pte.exports=ort;function ort(e,t,r){var n=r[0],i=r[1],a=r[2],o,s,l,u,c,f,h,d,v,_,b,p;return t===e?(e[12]=t[0]*n+t[4]*i+t[8]*a+t[12],e[13]=t[1]*n+t[5]*i+t[9]*a+t[13],e[14]=t[2]*n+t[6]*i+t[10]*a+t[14],e[15]=t[3]*n+t[7]*i+t[11]*a+t[15]):(o=t[0],s=t[1],l=t[2],u=t[3],c=t[4],f=t[5],h=t[6],d=t[7],v=t[8],_=t[9],b=t[10],p=t[11],e[0]=o,e[1]=s,e[2]=l,e[3]=u,e[4]=c,e[5]=f,e[6]=h,e[7]=d,e[8]=v,e[9]=_,e[10]=b,e[11]=p,e[12]=o*n+c*i+v*a+t[12],e[13]=s*n+f*i+_*a+t[13],e[14]=l*n+h*i+b*a+t[14],e[15]=u*n+d*i+p*a+t[15]),e}});var Dte=ye((jrr,Rte)=>{Rte.exports=srt;function srt(e,t,r){var n=r[0],i=r[1],a=r[2];return e[0]=t[0]*n,e[1]=t[1]*n,e[2]=t[2]*n,e[3]=t[3]*n,e[4]=t[4]*i,e[5]=t[5]*i,e[6]=t[6]*i,e[7]=t[7]*i,e[8]=t[8]*a,e[9]=t[9]*a,e[10]=t[10]*a,e[11]=t[11]*a,e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15],e}});var zte=ye((Wrr,Fte)=>{Fte.exports=lrt;function lrt(e,t,r,n){var i=n[0],a=n[1],o=n[2],s=Math.sqrt(i*i+a*a+o*o),l,u,c,f,h,d,v,_,b,p,k,E,S,L,x,C,M,g,P,T,z,O,V,G;return Math.abs(s)<1e-6?null:(s=1/s,i*=s,a*=s,o*=s,l=Math.sin(r),u=Math.cos(r),c=1-u,f=t[0],h=t[1],d=t[2],v=t[3],_=t[4],b=t[5],p=t[6],k=t[7],E=t[8],S=t[9],L=t[10],x=t[11],C=i*i*c+u,M=a*i*c+o*l,g=o*i*c-a*l,P=i*a*c-o*l,T=a*a*c+u,z=o*a*c+i*l,O=i*o*c+a*l,V=a*o*c-i*l,G=o*o*c+u,e[0]=f*C+_*M+E*g,e[1]=h*C+b*M+S*g,e[2]=d*C+p*M+L*g,e[3]=v*C+k*M+x*g,e[4]=f*P+_*T+E*z,e[5]=h*P+b*T+S*z,e[6]=d*P+p*T+L*z,e[7]=v*P+k*T+x*z,e[8]=f*O+_*V+E*G,e[9]=h*O+b*V+S*G,e[10]=d*O+p*V+L*G,e[11]=v*O+k*V+x*G,t!==e&&(e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15]),e)}});var qte=ye((Xrr,Ote)=>{Ote.exports=urt;function urt(e,t,r){var n=Math.sin(r),i=Math.cos(r),a=t[4],o=t[5],s=t[6],l=t[7],u=t[8],c=t[9],f=t[10],h=t[11];return t!==e&&(e[0]=t[0],e[1]=t[1],e[2]=t[2],e[3]=t[3],e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15]),e[4]=a*i+u*n,e[5]=o*i+c*n,e[6]=s*i+f*n,e[7]=l*i+h*n,e[8]=u*i-a*n,e[9]=c*i-o*n,e[10]=f*i-s*n,e[11]=h*i-l*n,e}});var Nte=ye((Zrr,Bte)=>{Bte.exports=crt;function crt(e,t,r){var n=Math.sin(r),i=Math.cos(r),a=t[0],o=t[1],s=t[2],l=t[3],u=t[8],c=t[9],f=t[10],h=t[11];return t!==e&&(e[4]=t[4],e[5]=t[5],e[6]=t[6],e[7]=t[7],e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15]),e[0]=a*i-u*n,e[1]=o*i-c*n,e[2]=s*i-f*n,e[3]=l*i-h*n,e[8]=a*n+u*i,e[9]=o*n+c*i,e[10]=s*n+f*i,e[11]=l*n+h*i,e}});var Vte=ye((Yrr,Ute)=>{Ute.exports=frt;function frt(e,t,r){var n=Math.sin(r),i=Math.cos(r),a=t[0],o=t[1],s=t[2],l=t[3],u=t[4],c=t[5],f=t[6],h=t[7];return t!==e&&(e[8]=t[8],e[9]=t[9],e[10]=t[10],e[11]=t[11],e[12]=t[12],e[13]=t[13],e[14]=t[14],e[15]=t[15]),e[0]=a*i+u*n,e[1]=o*i+c*n,e[2]=s*i+f*n,e[3]=l*i+h*n,e[4]=u*i-a*n,e[5]=c*i-o*n,e[6]=f*i-s*n,e[7]=h*i-l*n,e}});var Hte=ye((Krr,Gte)=>{Gte.exports=hrt;function hrt(e,t,r){var n,i,a,o=r[0],s=r[1],l=r[2],u=Math.sqrt(o*o+s*s+l*l);return Math.abs(u)<1e-6?null:(u=1/u,o*=u,s*=u,l*=u,n=Math.sin(t),i=Math.cos(t),a=1-i,e[0]=o*o*a+i,e[1]=s*o*a+l*n,e[2]=l*o*a-s*n,e[3]=0,e[4]=o*s*a-l*n,e[5]=s*s*a+i,e[6]=l*s*a+o*n,e[7]=0,e[8]=o*l*a+s*n,e[9]=s*l*a-o*n,e[10]=l*l*a+i,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e)}});var Wte=ye((Jrr,jte)=>{jte.exports=drt;function drt(e,t,r){var n=t[0],i=t[1],a=t[2],o=t[3],s=n+n,l=i+i,u=a+a,c=n*s,f=n*l,h=n*u,d=i*l,v=i*u,_=a*u,b=o*s,p=o*l,k=o*u;return e[0]=1-(d+_),e[1]=f+k,e[2]=h-p,e[3]=0,e[4]=f-k,e[5]=1-(c+_),e[6]=v+b,e[7]=0,e[8]=h+p,e[9]=v-b,e[10]=1-(c+d),e[11]=0,e[12]=r[0],e[13]=r[1],e[14]=r[2],e[15]=1,e}});var Zte=ye(($rr,Xte)=>{Xte.exports=vrt;function vrt(e,t){return e[0]=t[0],e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=t[1],e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=t[2],e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var Kte=ye((Qrr,Yte)=>{Yte.exports=prt;function prt(e,t){return e[0]=1,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=1,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=1,e[11]=0,e[12]=t[0],e[13]=t[1],e[14]=t[2],e[15]=1,e}});var $te=ye((eir,Jte)=>{Jte.exports=grt;function grt(e,t){var r=Math.sin(t),n=Math.cos(t);return e[0]=1,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=n,e[6]=r,e[7]=0,e[8]=0,e[9]=-r,e[10]=n,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var ere=ye((tir,Qte)=>{Qte.exports=mrt;function mrt(e,t){var r=Math.sin(t),n=Math.cos(t);return e[0]=n,e[1]=0,e[2]=-r,e[3]=0,e[4]=0,e[5]=1,e[6]=0,e[7]=0,e[8]=r,e[9]=0,e[10]=n,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var rre=ye((rir,tre)=>{tre.exports=yrt;function yrt(e,t){var r=Math.sin(t),n=Math.cos(t);return e[0]=n,e[1]=r,e[2]=0,e[3]=0,e[4]=-r,e[5]=n,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=1,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var nre=ye((iir,ire)=>{ire.exports=_rt;function _rt(e,t){var r=t[0],n=t[1],i=t[2],a=t[3],o=r+r,s=n+n,l=i+i,u=r*o,c=n*o,f=n*s,h=i*o,d=i*s,v=i*l,_=a*o,b=a*s,p=a*l;return e[0]=1-f-v,e[1]=c+p,e[2]=h-b,e[3]=0,e[4]=c-p,e[5]=1-u-v,e[6]=d+_,e[7]=0,e[8]=h+b,e[9]=d-_,e[10]=1-u-f,e[11]=0,e[12]=0,e[13]=0,e[14]=0,e[15]=1,e}});var ore=ye((nir,are)=>{are.exports=xrt;function xrt(e,t,r,n,i,a,o){var s=1/(r-t),l=1/(i-n),u=1/(a-o);return e[0]=a*2*s,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=a*2*l,e[6]=0,e[7]=0,e[8]=(r+t)*s,e[9]=(i+n)*l,e[10]=(o+a)*u,e[11]=-1,e[12]=0,e[13]=0,e[14]=o*a*2*u,e[15]=0,e}});var lre=ye((air,sre)=>{sre.exports=brt;function brt(e,t,r,n,i){var a=1/Math.tan(t/2),o=1/(n-i);return e[0]=a/r,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=a,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=(i+n)*o,e[11]=-1,e[12]=0,e[13]=0,e[14]=2*i*n*o,e[15]=0,e}});var cre=ye((oir,ure)=>{ure.exports=wrt;function wrt(e,t,r,n){var i=Math.tan(t.upDegrees*Math.PI/180),a=Math.tan(t.downDegrees*Math.PI/180),o=Math.tan(t.leftDegrees*Math.PI/180),s=Math.tan(t.rightDegrees*Math.PI/180),l=2/(o+s),u=2/(i+a);return e[0]=l,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=u,e[6]=0,e[7]=0,e[8]=-((o-s)*l*.5),e[9]=(i-a)*u*.5,e[10]=n/(r-n),e[11]=-1,e[12]=0,e[13]=0,e[14]=n*r/(r-n),e[15]=0,e}});var hre=ye((sir,fre)=>{fre.exports=Trt;function Trt(e,t,r,n,i,a,o){var s=1/(t-r),l=1/(n-i),u=1/(a-o);return e[0]=-2*s,e[1]=0,e[2]=0,e[3]=0,e[4]=0,e[5]=-2*l,e[6]=0,e[7]=0,e[8]=0,e[9]=0,e[10]=2*u,e[11]=0,e[12]=(t+r)*s,e[13]=(i+n)*l,e[14]=(o+a)*u,e[15]=1,e}});var vre=ye((lir,dre)=>{var Art=OO();dre.exports=Srt;function Srt(e,t,r,n){var i,a,o,s,l,u,c,f,h,d,v=t[0],_=t[1],b=t[2],p=n[0],k=n[1],E=n[2],S=r[0],L=r[1],x=r[2];return Math.abs(v-S)<1e-6&&Math.abs(_-L)<1e-6&&Math.abs(b-x)<1e-6?Art(e):(c=v-S,f=_-L,h=b-x,d=1/Math.sqrt(c*c+f*f+h*h),c*=d,f*=d,h*=d,i=k*h-E*f,a=E*c-p*h,o=p*f-k*c,d=Math.sqrt(i*i+a*a+o*o),d?(d=1/d,i*=d,a*=d,o*=d):(i=0,a=0,o=0),s=f*o-h*a,l=h*i-c*o,u=c*a-f*i,d=Math.sqrt(s*s+l*l+u*u),d?(d=1/d,s*=d,l*=d,u*=d):(s=0,l=0,u=0),e[0]=i,e[1]=s,e[2]=c,e[3]=0,e[4]=a,e[5]=l,e[6]=f,e[7]=0,e[8]=o,e[9]=u,e[10]=h,e[11]=0,e[12]=-(i*v+a*_+o*b),e[13]=-(s*v+l*_+u*b),e[14]=-(c*v+f*_+h*b),e[15]=1,e)}});var gre=ye((uir,pre)=>{pre.exports=Mrt;function Mrt(e){return"mat4("+e[0]+", "+e[1]+", "+e[2]+", "+e[3]+", "+e[4]+", "+e[5]+", "+e[6]+", "+e[7]+", "+e[8]+", "+e[9]+", "+e[10]+", "+e[11]+", "+e[12]+", "+e[13]+", "+e[14]+", "+e[15]+")"}});var qO=ye((cir,mre)=>{mre.exports={create:pte(),clone:mte(),copy:_te(),identity:OO(),transpose:wte(),invert:Ate(),adjoint:Mte(),determinant:kte(),multiply:Lte(),translate:Ite(),scale:Dte(),rotate:zte(),rotateX:qte(),rotateY:Nte(),rotateZ:Vte(),fromRotation:Hte(),fromRotationTranslation:Wte(),fromScaling:Zte(),fromTranslation:Kte(),fromXRotation:$te(),fromYRotation:ere(),fromZRotation:rre(),fromQuat:nre(),frustum:ore(),perspective:lre(),perspectiveFromFieldOfView:cre(),ortho:hre(),lookAt:vre(),str:gre()}});var T6=ye(fh=>{"use strict";var Ert=qO();fh.init2dArray=function(e,t){for(var r=new Array(e),n=0;n<e;n++)r[n]=new Array(t);return r};fh.transposeRagged=function(e){var t=0,r=e.length,n,i;for(n=0;n<r;n++)t=Math.max(t,e[n].length);var a=new Array(t);for(n=0;n<t;n++)for(a[n]=new Array(r),i=0;i<r;i++)a[n][i]=e[i][n];return a};fh.dot=function(e,t){if(!(e.length&&t.length)||e.length!==t.length)return null;var r=e.length,n,i;if(e[0].length)for(n=new Array(r),i=0;i<r;i++)n[i]=fh.dot(e[i],t);else if(t[0].length){var a=fh.transposeRagged(t);for(n=new Array(a.length),i=0;i<a.length;i++)n[i]=fh.dot(e,a[i])}else for(n=0,i=0;i<r;i++)n+=e[i]*t[i];return n};fh.translationMatrix=function(e,t){return[[1,0,e],[0,1,t],[0,0,1]]};fh.rotationMatrix=function(e){var t=e*Math.PI/180;return[[Math.cos(t),-Math.sin(t),0],[Math.sin(t),Math.cos(t),0],[0,0,1]]};fh.rotationXYMatrix=function(e,t,r){return fh.dot(fh.dot(fh.translationMatrix(t,r),fh.rotationMatrix(e)),fh.translationMatrix(-t,-r))};fh.apply3DTransform=function(e){return function(){var t=arguments,r=arguments.length===1?t[0]:[t[0],t[1],t[2]||0];return fh.dot(e,[r[0],r[1],r[2],1]).slice(0,3)}};fh.apply2DTransform=function(e){return function(){var t=arguments;t.length===3&&(t=t[0]);var r=arguments.length===1?t[0]:[t[0],t[1]];return fh.dot(e,[r[0],r[1],1]).slice(0,2)}};fh.apply2DTransform2=function(e){var t=fh.apply2DTransform(e);return function(r){return t(r.slice(0,2)).concat(t(r.slice(2,4)))}};fh.convertCssMatrix=function(e){if(e){var t=e.length;if(t===16)return e;if(t===6)return[e[0],e[1],0,0,e[2],e[3],0,0,0,0,1,0,e[4],e[5],0,1]}return[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1]};fh.inverseTransformMatrix=function(e){var t=[];return Ert.invert(t,e),[[t[0],t[1],t[2],t[3]],[t[4],t[5],t[6],t[7]],[t[8],t[9],t[10],t[11]],[t[12],t[13],t[14],t[15]]]}});var NS=ye((hir,Tre)=>{"use strict";var krt=Oa(),yre=G1(),Crt=T6(),Lrt=qO();function Prt(e){var t;if(typeof e=="string"){if(t=document.getElementById(e),t===null)throw new Error("No DOM element with id '"+e+"' exists on the page.");return t}else if(e==null)throw new Error("DOM element provided is null or undefined");return e}function Irt(e){var t=krt.select(e);return t.node()instanceof HTMLElement&&t.size()&&t.classed("js-plotly-plot")}function _re(e){var t=e&&e.parentNode;t&&t.removeChild(e)}function Rrt(e,t){xre("global",e,t)}function xre(e,t,r){var n="plotly.js-style-"+e,i=document.getElementById(n);if(!(i&&i.matches(".no-inline-styles"))){i||(i=document.createElement("style"),i.setAttribute("id",n),i.appendChild(document.createTextNode("")),document.head.appendChild(i));var a=i.sheet;a?a.insertRule?a.insertRule(t+"{"+r+"}",0):a.addRule?a.addRule(t,r,0):yre.warn("addStyleRule failed"):yre.warn("Cannot addRelatedStyleRule, probably due to strict CSP...")}}function Drt(e){var t="plotly.js-style-"+e,r=document.getElementById(t);r&&_re(r)}function Frt(e,t,r,n,i,a){var o=n.split(":"),s=i.split(":"),l="data-btn-style-event-added";a||(a=document),a.querySelectorAll(e).forEach(function(u){u.getAttribute(l)||(u.addEventListener("mouseenter",function(){var c=this.querySelector(r);c&&(c.style[o[0]]=o[1])}),u.addEventListener("mouseleave",function(){var c=this.querySelector(r);c&&(t&&this.matches(t)?c.style[o[0]]=o[1]:c.style[s[0]]=s[1])}),u.setAttribute(l,!0))})}function zrt(e){var t=wre(e),r=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];return t.forEach(function(n){var i=bre(n);if(i){var a=Crt.convertCssMatrix(i);r=Lrt.multiply(r,r,a)}}),r}function bre(e){var t=window.getComputedStyle(e,null),r=t.getPropertyValue("-webkit-transform")||t.getPropertyValue("-moz-transform")||t.getPropertyValue("-ms-transform")||t.getPropertyValue("-o-transform")||t.getPropertyValue("transform");return r==="none"?null:r.replace("matrix","").replace("3d","").slice(1,-1).split(",").map(function(n){return+n})}function wre(e){for(var t=[];Ort(e);)t.push(e),e=e.parentNode,typeof ShadowRoot=="function"&&e instanceof ShadowRoot&&(e=e.host);return t}function Ort(e){return e&&(e instanceof Element||e instanceof HTMLElement)}function qrt(e,t){return e&&t&&e.top===t.top&&e.left===t.left&&e.right===t.right&&e.bottom===t.bottom}Tre.exports={getGraphDiv:Prt,isPlotDiv:Irt,removeElement:_re,addStyleRule:Rrt,addRelatedStyleRule:xre,deleteRelatedStyleRule:Drt,setStyleOnHover:Frt,getFullTransformMatrix:zrt,getElementTransformMatrix:bre,getElementAndAncestors:wre,equalDomRects:qrt}});var US=ye((dir,Are)=>{"use strict";Are.exports={mode:{valType:"enumerated",dflt:"afterall",values:["immediate","next","afterall"]},direction:{valType:"enumerated",values:["forward","reverse"],dflt:"forward"},fromcurrent:{valType:"boolean",dflt:!1},frame:{duration:{valType:"number",min:0,dflt:500},redraw:{valType:"boolean",dflt:!0}},transition:{duration:{valType:"number",min:0,dflt:500,editType:"none"},easing:{valType:"enumerated",dflt:"cubic-in-out",values:["linear","quad","cubic","sin","exp","circle","elastic","back","bounce","linear-in","quad-in","cubic-in","sin-in","exp-in","circle-in","elastic-in","back-in","bounce-in","linear-out","quad-out","cubic-out","sin-out","exp-out","circle-out","elastic-out","back-out","bounce-out","linear-in-out","quad-in-out","cubic-in-out","sin-in-out","exp-in-out","circle-in-out","elastic-in-out","back-in-out","bounce-in-out"],editType:"none"},ordering:{valType:"enumerated",values:["layout first","traces first"],dflt:"layout first",editType:"none"}}}});var mc=ye((vir,Pre)=>{"use strict";var Mre=Ao().extendFlat,Brt=my(),Ere={valType:"flaglist",extras:["none"],flags:["calc","clearAxisTypes","plot","style","markerSize","colorbars"]},kre={valType:"flaglist",extras:["none"],flags:["calc","plot","legend","ticks","axrange","layoutstyle","modebar","camera","arraydraw","colorbars"]},Nrt=Ere.flags.slice().concat(["fullReplot"]),Urt=kre.flags.slice().concat("layoutReplot");Pre.exports={traces:Ere,layout:kre,traceFlags:function(){return Sre(Nrt)},layoutFlags:function(){return Sre(Urt)},update:function(e,t){var r=t.editType;if(r&&r!=="none")for(var n=r.split("+"),i=0;i<n.length;i++)e[n[i]]=!0},overrideAll:Cre};function Sre(e){for(var t={},r=0;r<e.length;r++)t[e[r]]=!1;return t}function Cre(e,t,r){var n=Mre({},e);for(var i in n){var a=n[i];Brt(a)&&(n[i]=Lre(a,t,r,i))}return r==="from-root"&&(n.editType=t),n}function Lre(e,t,r,n){if(e.valType){var i=Mre({},e);if(i.editType=t,Array.isArray(e.items)){i.items=new Array(e.items.length);for(var a=0;a<e.items.length;a++)i.items[a]=Lre(e.items[a],t,"from-root")}return i}else return Cre(e,t,n.charAt(0)==="_"?"nested":"from-root")}});var Pd=ye(BO=>{"use strict";BO.dash={valType:"string",values:["solid","dot","dash","longdash","dashdot","longdashdot"],dflt:"solid",editType:"style"};BO.pattern={shape:{valType:"enumerated",values:["","/","\\","x","-","|","+","."],dflt:"",arrayOk:!0,editType:"style"},path:{valType:"string",arrayOk:!0,editType:"style"},fillmode:{valType:"enumerated",values:["replace","overlay"],dflt:"replace",editType:"style"},bgcolor:{valType:"color",arrayOk:!0,editType:"style"},fgcolor:{valType:"color",arrayOk:!0,editType:"style"},fgopacity:{valType:"number",editType:"style",min:0,max:1},size:{valType:"number",min:0,dflt:8,arrayOk:!0,editType:"style"},solidity:{valType:"number",min:0,max:1,dflt:.3,arrayOk:!0,editType:"style"},editType:"style"}});var NO=ye((gir,Ire)=>{"use strict";Ire.exports={FORMAT_LINK:"https://github.com/d3/d3-format/tree/v1.4.5#d3-format",DATE_FORMAT_LINK:"https://github.com/d3/d3-time-format/tree/v2.2.3#locale_format"}});var Ll=ye(u3=>{"use strict";var{DATE_FORMAT_LINK:Vrt,FORMAT_LINK:Grt}=NO(),Hrt=["Variables that can't be found will be replaced with the specifier.",'For example, a template of "data: %{x}, %{y}" will result in a value of "data: 1, %{y}" if x is 1 and y is missing.',"Variables with an undefined value will be replaced with the fallback value."].join(" ");function jrt({supportOther:e}={}){return["Variables are inserted using %{variable},",'for example "y: %{y}"'+(e?" as well as %{xother}, {%_xother}, {%_xother_}, {%xother_}. When showing info for several points, *xother* will be added to those with different x positions from the first point. An underscore before or after *(x|y)other* will add a space on that side, only when this field is shown.":"."),`Numbers are formatted using d3-format's syntax %{variable:d3-format}, for example "Price: %{y:$.2f}".`,Grt,"for details on the formatting syntax.",`Dates are formatted using d3-time-format's syntax %{variable|d3-time-format}, for example "Day: %{2019-01-01|%A}".`,Vrt,"for details on the date formatting syntax.",Hrt].join(" ")}u3.templateFormatStringDescription=jrt;u3.hovertemplateAttrs=({editType:e="none",arrayOk:t}={},r={})=>mg({valType:"string",dflt:"",editType:e},t!==!1?{arrayOk:!0}:{});u3.texttemplateAttrs=({editType:e="calc",arrayOk:t}={},r={})=>mg({valType:"string",dflt:"",editType:e},t!==!1?{arrayOk:!0}:{});u3.shapeTexttemplateAttrs=({editType:e="arraydraw",newshape:t}={},r={})=>({valType:"string",dflt:"",editType:e});u3.templatefallbackAttrs=({editType:e="none"}={})=>({valType:"any",dflt:"-",editType:e})});var M6=ye((_ir,qre)=>{"use strict";function H1(e,t){return t?t.d2l(e):e}function Rre(e,t){return t?t.l2d(e):e}function Wrt(e){return e.x0}function Xrt(e){return e.x1}function Zrt(e){return e.y0}function Yrt(e){return e.y1}function Dre(e){return e.x0shift||0}function Fre(e){return e.x1shift||0}function zre(e){return e.y0shift||0}function Ore(e){return e.y1shift||0}function A6(e,t){return H1(e.x1,t)+Fre(e)-H1(e.x0,t)-Dre(e)}function S6(e,t,r){return H1(e.y1,r)+Ore(e)-H1(e.y0,r)-zre(e)}function Krt(e,t){return Math.abs(A6(e,t))}function Jrt(e,t,r){return Math.abs(S6(e,t,r))}function $rt(e,t,r){return e.type!=="line"?void 0:Math.sqrt(Math.pow(A6(e,t),2)+Math.pow(S6(e,t,r),2))}function Qrt(e,t){return Rre((H1(e.x1,t)+Fre(e)+H1(e.x0,t)+Dre(e))/2,t)}function eit(e,t,r){return Rre((H1(e.y1,r)+Ore(e)+H1(e.y0,r)+zre(e))/2,r)}function tit(e,t,r){return e.type!=="line"?void 0:S6(e,t,r)/A6(e,t)}qre.exports={x0:Wrt,x1:Xrt,y0:Zrt,y1:Yrt,slope:tit,dx:A6,dy:S6,width:Krt,height:Jrt,length:$rt,xcenter:Qrt,ycenter:eit}});var Ure=ye((xir,Nre)=>{"use strict";var rit=mc().overrideAll,fb=Gl(),Bre=ec(),iit=Pd().dash,j1=Ao().extendFlat,{shapeTexttemplateAttrs:nit,templatefallbackAttrs:ait}=Ll(),oit=M6();Nre.exports=rit({newshape:{visible:j1({},fb.visible,{}),showlegend:{valType:"boolean",dflt:!1},legend:j1({},fb.legend,{}),legendgroup:j1({},fb.legendgroup,{}),legendgrouptitle:{text:j1({},fb.legendgrouptitle.text,{}),font:Bre({})},legendrank:j1({},fb.legendrank,{}),legendwidth:j1({},fb.legendwidth,{}),line:{color:{valType:"color"},width:{valType:"number",min:0,dflt:4},dash:j1({},iit,{dflt:"solid"})},fillcolor:{valType:"color",dflt:"rgba(0,0,0,0)"},fillrule:{valType:"enumerated",values:["evenodd","nonzero"],dflt:"evenodd"},opacity:{valType:"number",min:0,max:1,dflt:1},layer:{valType:"enumerated",values:["below","above","between"],dflt:"above"},drawdirection:{valType:"enumerated",values:["ortho","horizontal","vertical","diagonal"],dflt:"diagonal"},name:j1({},fb.name,{}),label:{text:{valType:"string",dflt:""},texttemplate:nit({newshape:!0},{keys:Object.keys(oit)}),texttemplatefallback:ait({editType:"arraydraw"}),font:Bre({}),textposition:{valType:"enumerated",values:["top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right","start","middle","end"]},textangle:{valType:"angle",dflt:"auto"},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"auto"},yanchor:{valType:"enumerated",values:["top","middle","bottom"]},padding:{valType:"number",dflt:3,min:0}}},activeshape:{fillcolor:{valType:"color",dflt:"rgb(255,0,255)",description:"Sets the color filling the active shape' interior."},opacity:{valType:"number",min:0,max:1,dflt:.5}}},"none","from-root")});var Gre=ye((bir,Vre)=>{"use strict";var sit=Pd().dash,lit=Ao().extendFlat;Vre.exports={newselection:{mode:{valType:"enumerated",values:["immediate","gradual"],dflt:"immediate",editType:"none"},line:{color:{valType:"color",editType:"none"},width:{valType:"number",min:1,dflt:1,editType:"none"},dash:lit({},sit,{dflt:"dot",editType:"none"}),editType:"none"},editType:"none"},activeselection:{fillcolor:{valType:"color",dflt:"rgba(0,0,0,0)",editType:"none"},opacity:{valType:"number",min:0,max:1,dflt:.5,editType:"none"},editType:"none"}}});var E6=ye((wir,Hre)=>{"use strict";Hre.exports=function(e){var t=e.editType;return{t:{valType:"number",dflt:0,editType:t},r:{valType:"number",dflt:0,editType:t},b:{valType:"number",dflt:0,editType:t},l:{valType:"number",dflt:0,editType:t},editType:t}}});var c3=ye((Tir,Zre)=>{"use strict";var UO=ec(),uit=US(),k6=Lh(),jre=Ure(),Wre=Gre(),cit=E6(),Xre=Ao().extendFlat,C6=UO({editType:"calc"});C6.family.dflt='"Open Sans", verdana, arial, sans-serif';C6.size.dflt=12;C6.color.dflt=k6.defaultLine;Zre.exports={font:C6,title:{text:{valType:"string",editType:"layoutstyle"},font:UO({editType:"layoutstyle"}),subtitle:{text:{valType:"string",editType:"layoutstyle"},font:UO({editType:"layoutstyle"}),editType:"layoutstyle"},xref:{valType:"enumerated",dflt:"container",values:["container","paper"],editType:"layoutstyle"},yref:{valType:"enumerated",dflt:"container",values:["container","paper"],editType:"layoutstyle"},x:{valType:"number",min:0,max:1,dflt:.5,editType:"layoutstyle"},y:{valType:"number",min:0,max:1,dflt:"auto",editType:"layoutstyle"},xanchor:{valType:"enumerated",dflt:"auto",values:["auto","left","center","right"],editType:"layoutstyle"},yanchor:{valType:"enumerated",dflt:"auto",values:["auto","top","middle","bottom"],editType:"layoutstyle"},pad:Xre(cit({editType:"layoutstyle"}),{}),automargin:{valType:"boolean",dflt:!1,editType:"plot"},editType:"layoutstyle"},uniformtext:{mode:{valType:"enumerated",values:[!1,"hide","show"],dflt:!1,editType:"plot"},minsize:{valType:"number",min:0,dflt:0,editType:"plot"},editType:"plot"},autosize:{valType:"boolean",dflt:!1,editType:"none"},width:{valType:"number",min:10,dflt:700,editType:"plot"},height:{valType:"number",min:10,dflt:450,editType:"plot"},minreducedwidth:{valType:"number",min:2,dflt:64,editType:"plot"},minreducedheight:{valType:"number",min:2,dflt:64,editType:"plot"},margin:{l:{valType:"number",min:0,dflt:80,editType:"plot"},r:{valType:"number",min:0,dflt:80,editType:"plot"},t:{valType:"number",min:0,dflt:100,editType:"plot"},b:{valType:"number",min:0,dflt:80,editType:"plot"},pad:{valType:"number",min:0,dflt:0,editType:"plot"},autoexpand:{valType:"boolean",dflt:!0,editType:"plot"},editType:"plot"},computed:{valType:"any",editType:"none"},paper_bgcolor:{valType:"color",dflt:k6.background,editType:"plot"},plot_bgcolor:{valType:"color",dflt:k6.background,editType:"layoutstyle"},autotypenumbers:{valType:"enumerated",values:["convert types","strict"],dflt:"convert types",editType:"calc"},separators:{valType:"string",editType:"plot"},hidesources:{valType:"boolean",dflt:!1,editType:"plot"},showlegend:{valType:"boolean",editType:"legend"},colorway:{valType:"colorlist",dflt:k6.defaults,editType:"calc"},datarevision:{valType:"any",editType:"calc"},uirevision:{valType:"any",editType:"none"},editrevision:{valType:"any",editType:"none"},selectionrevision:{valType:"any",editType:"none"},template:{valType:"any",editType:"calc"},newshape:jre.newshape,activeshape:jre.activeshape,newselection:Wre.newselection,activeselection:Wre.activeselection,meta:{valType:"any",arrayOk:!0,editType:"plot"},transition:Xre({},uit.transition,{editType:"none"})}});var Yre=ye(()=>{(function(){if(!document.getElementById("696e55e75aaafa12d45b3ff634eadc8348f9c3015fc94984dac1ff824773eb97")){var e=document.createElement("style");e.id="696e55e75aaafa12d45b3ff634eadc8348f9c3015fc94984dac1ff824773eb97",e.textContent=`.maplibregl-map{font:12px/20px Helvetica Neue,Arial,Helvetica,sans-serif;overflow:hidden;position:relative;-webkit-tap-highlight-color:rgb(0 0 0/0)}.maplibregl-canvas{left:0;position:absolute;top:0}.maplibregl-map:fullscreen{height:100%;width:100%}.maplibregl-ctrl-group button.maplibregl-ctrl-compass{touch-action:none}.maplibregl-canvas-container.maplibregl-interactive,.maplibregl-ctrl-group button.maplibregl-ctrl-compass{cursor:grab;-webkit-user-select:none;-moz-user-select:none;user-select:none}.maplibregl-canvas-container.maplibregl-interactive.maplibregl-track-pointer{cursor:pointer}.maplibregl-canvas-container.maplibregl-interactive:active,.maplibregl-ctrl-group button.maplibregl-ctrl-compass:active{cursor:grabbing}.maplibregl-canvas-container.maplibregl-touch-zoom-rotate,.maplibregl-canvas-container.maplibregl-touch-zoom-rotate .maplibregl-canvas{touch-action:pan-x pan-y}.maplibregl-canvas-container.maplibregl-touch-drag-pan,.maplibregl-canvas-container.maplibregl-touch-drag-pan .maplibregl-canvas{touch-action:pinch-zoom}.maplibregl-canvas-container.maplibregl-touch-zoom-rotate.maplibregl-touch-drag-pan,.maplibregl-canvas-container.maplibregl-touch-zoom-rotate.maplibregl-touch-drag-pan .maplibregl-canvas{touch-action:none}.maplibregl-canvas-container.maplibregl-touch-drag-pan.maplibregl-cooperative-gestures,.maplibregl-canvas-container.maplibregl-touch-drag-pan.maplibregl-cooperative-gestures .maplibregl-canvas{touch-action:pan-x pan-y}.maplibregl-ctrl-bottom-left,.maplibregl-ctrl-bottom-right,.maplibregl-ctrl-top-left,.maplibregl-ctrl-top-right{pointer-events:none;position:absolute;z-index:2}.maplibregl-ctrl-top-left{left:0;top:0}.maplibregl-ctrl-top-right{right:0;top:0}.maplibregl-ctrl-bottom-left{bottom:0;left:0}.maplibregl-ctrl-bottom-right{bottom:0;right:0}.maplibregl-ctrl{clear:both;pointer-events:auto;transform:translate(0)}.maplibregl-ctrl-top-left .maplibregl-ctrl{float:left;margin:10px 0 0 10px}.maplibregl-ctrl-top-right .maplibregl-ctrl{float:right;margin:10px 10px 0 0}.maplibregl-ctrl-bottom-left .maplibregl-ctrl{float:left;margin:0 0 10px 10px}.maplibregl-ctrl-bottom-right .maplibregl-ctrl{float:right;margin:0 10px 10px 0}.maplibregl-ctrl-group{background:#fff;border-radius:4px}.maplibregl-ctrl-group:not(:empty){box-shadow:0 0 0 2px rgba(0,0,0,.1)}@media (forced-colors:active){.maplibregl-ctrl-group:not(:empty){box-shadow:0 0 0 2px ButtonText}}.maplibregl-ctrl-group button{background-color:transparent;border:0;box-sizing:border-box;cursor:pointer;display:block;height:29px;outline:none;padding:0;width:29px}.maplibregl-ctrl-group button+button{border-top:1px solid #ddd}.maplibregl-ctrl button .maplibregl-ctrl-icon{background-position:50%;background-repeat:no-repeat;display:block;height:100%;width:100%}@media (forced-colors:active){.maplibregl-ctrl-icon{background-color:transparent}.maplibregl-ctrl-group button+button{border-top:1px solid ButtonText}}.maplibregl-ctrl button::-moz-focus-inner{border:0;padding:0}.maplibregl-ctrl-attrib-button:focus,.maplibregl-ctrl-group button:focus{box-shadow:0 0 2px 2px #0096ff}.maplibregl-ctrl button:disabled{cursor:not-allowed}.maplibregl-ctrl button:disabled .maplibregl-ctrl-icon{opacity:.25}.maplibregl-ctrl button:not(:disabled):hover{background-color:rgb(0 0 0/5%)}.maplibregl-ctrl-group button:focus:focus-visible{box-shadow:0 0 2px 2px #0096ff}.maplibregl-ctrl-group button:focus:not(:focus-visible){box-shadow:none}.maplibregl-ctrl-group button:focus:first-child{border-radius:4px 4px 0 0}.maplibregl-ctrl-group button:focus:last-child{border-radius:0 0 4px 4px}.maplibregl-ctrl-group button:focus:only-child{border-radius:inherit}.maplibregl-ctrl button.maplibregl-ctrl-zoom-out .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23333' viewBox='0 0 29 29'%3E%3Cpath d='M10 13c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h9c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-zoom-in .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23333' viewBox='0 0 29 29'%3E%3Cpath d='M14.5 8.5c-.75 0-1.5.75-1.5 1.5v3h-3c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h3v3c0 .75.75 1.5 1.5 1.5S16 19.75 16 19v-3h3c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13h-3v-3c0-.75-.75-1.5-1.5-1.5'/%3E%3C/svg%3E")}@media (forced-colors:active){.maplibregl-ctrl button.maplibregl-ctrl-zoom-out .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 29 29'%3E%3Cpath d='M10 13c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h9c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-zoom-in .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 29 29'%3E%3Cpath d='M14.5 8.5c-.75 0-1.5.75-1.5 1.5v3h-3c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h3v3c0 .75.75 1.5 1.5 1.5S16 19.75 16 19v-3h3c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13h-3v-3c0-.75-.75-1.5-1.5-1.5'/%3E%3C/svg%3E")}}@media (forced-colors:active) and (prefers-color-scheme:light){.maplibregl-ctrl button.maplibregl-ctrl-zoom-out .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='M10 13c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h9c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-zoom-in .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='M14.5 8.5c-.75 0-1.5.75-1.5 1.5v3h-3c-.75 0-1.5.75-1.5 1.5S9.25 16 10 16h3v3c0 .75.75 1.5 1.5 1.5S16 19.75 16 19v-3h3c.75 0 1.5-.75 1.5-1.5S19.75 13 19 13h-3v-3c0-.75-.75-1.5-1.5-1.5'/%3E%3C/svg%3E")}}.maplibregl-ctrl button.maplibregl-ctrl-fullscreen .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23333' viewBox='0 0 29 29'%3E%3Cpath d='M24 16v5.5c0 1.75-.75 2.5-2.5 2.5H16v-1l3-1.5-4-5.5 1-1 5.5 4 1.5-3zM6 16l1.5 3 5.5-4 1 1-4 5.5 3 1.5v1H7.5C5.75 24 5 23.25 5 21.5V16zm7-11v1l-3 1.5 4 5.5-1 1-5.5-4L6 13H5V7.5C5 5.75 5.75 5 7.5 5zm11 2.5c0-1.75-.75-2.5-2.5-2.5H16v1l3 1.5-4 5.5 1 1 5.5-4 1.5 3h1z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-shrink .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='M18.5 16c-1.75 0-2.5.75-2.5 2.5V24h1l1.5-3 5.5 4 1-1-4-5.5 3-1.5v-1zM13 18.5c0-1.75-.75-2.5-2.5-2.5H5v1l3 1.5L4 24l1 1 5.5-4 1.5 3h1zm3-8c0 1.75.75 2.5 2.5 2.5H24v-1l-3-1.5L25 5l-1-1-5.5 4L17 5h-1zM10.5 13c1.75 0 2.5-.75 2.5-2.5V5h-1l-1.5 3L5 4 4 5l4 5.5L5 12v1z'/%3E%3C/svg%3E")}@media (forced-colors:active){.maplibregl-ctrl button.maplibregl-ctrl-fullscreen .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 29 29'%3E%3Cpath d='M24 16v5.5c0 1.75-.75 2.5-2.5 2.5H16v-1l3-1.5-4-5.5 1-1 5.5 4 1.5-3zM6 16l1.5 3 5.5-4 1 1-4 5.5 3 1.5v1H7.5C5.75 24 5 23.25 5 21.5V16zm7-11v1l-3 1.5 4 5.5-1 1-5.5-4L6 13H5V7.5C5 5.75 5.75 5 7.5 5zm11 2.5c0-1.75-.75-2.5-2.5-2.5H16v1l3 1.5-4 5.5 1 1 5.5-4 1.5 3h1z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-shrink .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 29 29'%3E%3Cpath d='M18.5 16c-1.75 0-2.5.75-2.5 2.5V24h1l1.5-3 5.5 4 1-1-4-5.5 3-1.5v-1zM13 18.5c0-1.75-.75-2.5-2.5-2.5H5v1l3 1.5L4 24l1 1 5.5-4 1.5 3h1zm3-8c0 1.75.75 2.5 2.5 2.5H24v-1l-3-1.5L25 5l-1-1-5.5 4L17 5h-1zM10.5 13c1.75 0 2.5-.75 2.5-2.5V5h-1l-1.5 3L5 4 4 5l4 5.5L5 12v1z'/%3E%3C/svg%3E")}}@media (forced-colors:active) and (prefers-color-scheme:light){.maplibregl-ctrl button.maplibregl-ctrl-fullscreen .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='M24 16v5.5c0 1.75-.75 2.5-2.5 2.5H16v-1l3-1.5-4-5.5 1-1 5.5 4 1.5-3zM6 16l1.5 3 5.5-4 1 1-4 5.5 3 1.5v1H7.5C5.75 24 5 23.25 5 21.5V16zm7-11v1l-3 1.5 4 5.5-1 1-5.5-4L6 13H5V7.5C5 5.75 5.75 5 7.5 5zm11 2.5c0-1.75-.75-2.5-2.5-2.5H16v1l3 1.5-4 5.5 1 1 5.5-4 1.5 3h1z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-shrink .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='M18.5 16c-1.75 0-2.5.75-2.5 2.5V24h1l1.5-3 5.5 4 1-1-4-5.5 3-1.5v-1zM13 18.5c0-1.75-.75-2.5-2.5-2.5H5v1l3 1.5L4 24l1 1 5.5-4 1.5 3h1zm3-8c0 1.75.75 2.5 2.5 2.5H24v-1l-3-1.5L25 5l-1-1-5.5 4L17 5h-1zM10.5 13c1.75 0 2.5-.75 2.5-2.5V5h-1l-1.5 3L5 4 4 5l4 5.5L5 12v1z'/%3E%3C/svg%3E")}}.maplibregl-ctrl button.maplibregl-ctrl-compass .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23333' viewBox='0 0 29 29'%3E%3Cpath d='m10.5 14 4-8 4 8z'/%3E%3Cpath fill='%23ccc' d='m10.5 16 4 8 4-8z'/%3E%3C/svg%3E")}@media (forced-colors:active){.maplibregl-ctrl button.maplibregl-ctrl-compass .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 29 29'%3E%3Cpath d='m10.5 14 4-8 4 8z'/%3E%3Cpath fill='%23ccc' d='m10.5 16 4 8 4-8z'/%3E%3C/svg%3E")}}@media (forced-colors:active) and (prefers-color-scheme:light){.maplibregl-ctrl button.maplibregl-ctrl-compass .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 29 29'%3E%3Cpath d='m10.5 14 4-8 4 8z'/%3E%3Cpath fill='%23ccc' d='m10.5 16 4 8 4-8z'/%3E%3C/svg%3E")}}.maplibregl-ctrl button.maplibregl-ctrl-terrain .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='22' height='22' fill='%23333' viewBox='0 0 22 22'%3E%3Cpath d='m1.754 13.406 4.453-4.851 3.09 3.09 3.281 3.277.969-.969-3.309-3.312 3.844-4.121 6.148 6.886h1.082v-.855l-7.207-8.07-4.84 5.187L6.169 6.57l-5.48 5.965v.871ZM.688 16.844h20.625v1.375H.688Zm0 0'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-terrain-enabled .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='22' height='22' fill='%2333b5e5' viewBox='0 0 22 22'%3E%3Cpath d='m1.754 13.406 4.453-4.851 3.09 3.09 3.281 3.277.969-.969-3.309-3.312 3.844-4.121 6.148 6.886h1.082v-.855l-7.207-8.07-4.84 5.187L6.169 6.57l-5.48 5.965v.871ZM.688 16.844h20.625v1.375H.688Zm0 0'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23333' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate:disabled .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23aaa' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3Cpath fill='red' d='m14 5 1 1-9 9-1-1z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-active .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%2333b5e5' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-active-error .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23e58978' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-background .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%2333b5e5' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-background-error .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23e54e33' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-waiting .maplibregl-ctrl-icon{animation:maplibregl-spin 2s linear infinite}@media (forced-colors:active){.maplibregl-ctrl button.maplibregl-ctrl-geolocate .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23fff' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate:disabled .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23999' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3Cpath fill='red' d='m14 5 1 1-9 9-1-1z'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-active .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%2333b5e5' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-active-error .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23e58978' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-background .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%2333b5e5' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate.maplibregl-ctrl-geolocate-background-error .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23e54e33' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3C/svg%3E")}}@media (forced-colors:active) and (prefers-color-scheme:light){.maplibregl-ctrl button.maplibregl-ctrl-geolocate .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3C/svg%3E")}.maplibregl-ctrl button.maplibregl-ctrl-geolocate:disabled .maplibregl-ctrl-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='29' height='29' fill='%23666' viewBox='0 0 20 20'%3E%3Cpath d='M10 4C9 4 9 5 9 5v.1A5 5 0 0 0 5.1 9H5s-1 0-1 1 1 1 1 1h.1A5 5 0 0 0 9 14.9v.1s0 1 1 1 1-1 1-1v-.1a5 5 0 0 0 3.9-3.9h.1s1 0 1-1-1-1-1-1h-.1A5 5 0 0 0 11 5.1V5s0-1-1-1m0 2.5a3.5 3.5 0 1 1 0 7 3.5 3.5 0 1 1 0-7'/%3E%3Ccircle cx='10' cy='10' r='2'/%3E%3Cpath fill='red' d='m14 5 1 1-9 9-1-1z'/%3E%3C/svg%3E")}}@keyframes maplibregl-spin{0%{transform:rotate(0deg)}to{transform:rotate(1turn)}}a.maplibregl-ctrl-logo{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='88' height='23' fill='none'%3E%3Cpath fill='%23000' fill-opacity='.4' fill-rule='evenodd' d='M17.408 16.796h-1.827l2.501-12.095h.198l3.324 6.533.988 2.19.988-2.19 3.258-6.533h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.929 5.644h-.098l-2.914-5.644-.757-1.71-.345 1.71zm1.958-3.42-.726 3.663a1.255 1.255 0 0 1-1.232 1.011h-1.827a1.255 1.255 0 0 1-1.229-1.509l2.501-12.095a1.255 1.255 0 0 1 1.23-1.001h.197a1.25 1.25 0 0 1 1.12.685l3.19 6.273 3.125-6.263a1.25 1.25 0 0 1 1.123-.695h.181a1.255 1.255 0 0 1 1.227.991l1.443 6.71a5 5 0 0 1 .314-.787l.009-.016a4.6 4.6 0 0 1 1.777-1.887c.782-.46 1.668-.667 2.611-.667a4.6 4.6 0 0 1 1.7.32l.306.134c.21-.16.474-.256.759-.256h1.694a1.255 1.255 0 0 1 1.212.925 1.255 1.255 0 0 1 1.212-.925h1.711c.284 0 .545.094.755.252.613-.3 1.312-.45 2.075-.45 1.356 0 2.557.445 3.482 1.4q.47.48.763 1.064V4.701a1.255 1.255 0 0 1 1.255-1.255h1.86A1.255 1.255 0 0 1 54.44 4.7v9.194h2.217c.19 0 .37.043.532.118v-4.77c0-.356.147-.678.385-.906a2.42 2.42 0 0 1-.682-1.71c0-.665.267-1.253.735-1.7a2.45 2.45 0 0 1 1.722-.674 2.43 2.43 0 0 1 1.705.675q.318.302.504.683V4.7a1.255 1.255 0 0 1 1.255-1.255h1.744A1.255 1.255 0 0 1 65.812 4.7v3.335a4.8 4.8 0 0 1 1.526-.246c.938 0 1.817.214 2.59.69a4.47 4.47 0 0 1 1.67 1.743v-.98a1.255 1.255 0 0 1 1.256-1.256h1.777c.233 0 .451.064.639.174a3.4 3.4 0 0 1 1.567-.372c.346 0 .861.02 1.285.232a1.25 1.25 0 0 1 .689 1.004 4.7 4.7 0 0 1 .853-.588c.795-.44 1.675-.647 2.61-.647 1.385 0 2.65.39 3.525 1.396.836.938 1.168 2.173 1.168 3.528q-.001.515-.056 1.051a1.255 1.255 0 0 1-.947 1.09l.408.952a1.255 1.255 0 0 1-.477 1.552c-.418.268-.92.463-1.458.612-.613.171-1.304.244-2.049.244-1.06 0-2.043-.207-2.886-.698l-.015-.008c-.798-.48-1.419-1.135-1.818-1.963l-.004-.008a5.8 5.8 0 0 1-.548-2.512q0-.429.053-.843a1.3 1.3 0 0 1-.333-.086l-.166-.004c-.223 0-.426.062-.643.228-.03.024-.142.139-.142.59v3.883a1.255 1.255 0 0 1-1.256 1.256h-1.777a1.255 1.255 0 0 1-1.256-1.256V15.69l-.032.057a4.8 4.8 0 0 1-1.86 1.833 5.04 5.04 0 0 1-2.484.634 4.5 4.5 0 0 1-1.935-.424 1.25 1.25 0 0 1-.764.258h-1.71a1.255 1.255 0 0 1-1.256-1.255V7.687a2.4 2.4 0 0 1-.428.625c.253.23.412.561.412.93v7.553a1.255 1.255 0 0 1-1.256 1.255h-1.843a1.25 1.25 0 0 1-.894-.373c-.228.23-.544.373-.894.373H51.32a1.255 1.255 0 0 1-1.256-1.255v-1.251l-.061.117a4.7 4.7 0 0 1-1.782 1.884 4.77 4.77 0 0 1-2.485.67 5.6 5.6 0 0 1-1.485-.188l.009 2.764a1.255 1.255 0 0 1-1.255 1.259h-1.729a1.255 1.255 0 0 1-1.255-1.255v-3.537a1.255 1.255 0 0 1-1.167.793h-1.679a1.25 1.25 0 0 1-.77-.263 4.5 4.5 0 0 1-1.945.429c-.885 0-1.724-.21-2.495-.632l-.017-.01a5 5 0 0 1-1.081-.836 1.255 1.255 0 0 1-1.254 1.312h-1.81a1.255 1.255 0 0 1-1.228-.99l-.782-3.625-2.044 3.939a1.25 1.25 0 0 1-1.115.676h-.098a1.25 1.25 0 0 1-1.116-.68l-2.061-3.994zM35.92 16.63l.207-.114.223-.15q.493-.356.735-.785l.061-.118.033 1.332h1.678V9.242h-1.694l-.033 1.267q-.133-.329-.526-.658l-.032-.028a3.2 3.2 0 0 0-.668-.428l-.27-.12a3.3 3.3 0 0 0-1.235-.23q-1.136-.001-1.974.493a3.36 3.36 0 0 0-1.3 1.382q-.445.89-.444 2.074 0 1.2.51 2.107a3.8 3.8 0 0 0 1.382 1.381 3.9 3.9 0 0 0 1.893.477q.795 0 1.455-.33zm-2.789-5.38q-.576.675-.575 1.762 0 1.102.559 1.794.576.675 1.645.675a2.25 2.25 0 0 0 .934-.19 2.2 2.2 0 0 0 .468-.29l.178-.161a2.2 2.2 0 0 0 .397-.561q.244-.5.244-1.15v-.115q0-.708-.296-1.267l-.043-.077a2.2 2.2 0 0 0-.633-.709l-.13-.086-.047-.028a2.1 2.1 0 0 0-1.073-.285q-1.052 0-1.629.692zm2.316 2.706c.163-.17.28-.407.28-.83v-.114c0-.292-.06-.508-.15-.68a.96.96 0 0 0-.353-.389.85.85 0 0 0-.464-.127c-.4 0-.56.114-.664.239l-.01.012c-.148.174-.275.45-.275.945 0 .506.122.801.27.99.097.11.266.224.68.224.303 0 .504-.09.687-.269zm7.545 1.705a2.6 2.6 0 0 0 .331.423q.319.33.755.548l.173.074q.65.255 1.49.255 1.02 0 1.844-.493a3.45 3.45 0 0 0 1.316-1.4q.493-.904.493-2.089 0-1.909-.988-2.913-.988-1.02-2.584-1.02-.898 0-1.575.347a3 3 0 0 0-.415.262l-.199.166a3.4 3.4 0 0 0-.64.82V9.242h-1.712v11.553h1.729l-.017-5.134zm.53-1.138q.206.29.48.5l.155.11.053.034q.51.296 1.119.297 1.07 0 1.645-.675.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.435 0-.835.16a2 2 0 0 0-.284.136 2 2 0 0 0-.363.254 2.2 2.2 0 0 0-.46.569l-.082.162a2.6 2.6 0 0 0-.213 1.072v.115q0 .707.296 1.267l.135.211zm.964-.818a1.1 1.1 0 0 0 .367.385.94.94 0 0 0 .476.118c.423 0 .59-.117.687-.23.159-.194.28-.478.28-.95 0-.53-.133-.8-.266-.952l-.021-.025c-.078-.094-.231-.221-.68-.221a1 1 0 0 0-.503.135l-.012.007a.86.86 0 0 0-.335.343c-.073.133-.132.324-.132.614v.115a1.4 1.4 0 0 0 .14.66zm15.7-6.222q.347-.346.346-.856a1.05 1.05 0 0 0-.345-.79 1.18 1.18 0 0 0-.84-.329q-.51 0-.855.33a1.05 1.05 0 0 0-.346.79q0 .51.346.855.345.346.856.346.51 0 .839-.346zm4.337 9.314.033-1.332q.191.403.59.747l.098.081a4 4 0 0 0 .316.224l.223.122a3.2 3.2 0 0 0 1.44.322 3.8 3.8 0 0 0 1.875-.477 3.5 3.5 0 0 0 1.382-1.366q.527-.89.526-2.09 0-1.184-.444-2.073a3.24 3.24 0 0 0-1.283-1.399q-.823-.51-1.942-.51a3.5 3.5 0 0 0-1.527.344l-.086.043-.165.09a3 3 0 0 0-.33.214q-.432.315-.656.707a2 2 0 0 0-.099.198l.082-1.283V4.701h-1.744v12.095zm.473-2.509a2.5 2.5 0 0 0 .566.7q.117.098.245.18l.144.08a2.1 2.1 0 0 0 .975.232q1.07 0 1.645-.675.576-.69.576-1.778 0-1.102-.576-1.777-.56-.691-1.645-.692a2.2 2.2 0 0 0-1.015.235q-.22.113-.415.282l-.15.142a2.1 2.1 0 0 0-.42.594q-.223.479-.223 1.1v.115q0 .705.293 1.26zm2.616-.293c.157-.191.28-.479.28-.967 0-.51-.13-.79-.276-.961l-.021-.026c-.082-.1-.232-.225-.67-.225a.87.87 0 0 0-.681.279l-.012.011c-.154.155-.274.38-.274.807v.115c0 .285.057.499.144.669a1.1 1.1 0 0 0 .367.405c.137.082.28.123.455.123.423 0 .59-.118.686-.23zm8.266-3.013q.345-.13.724-.14l.069-.002q.493 0 .642.099l.247-1.794q-.196-.099-.717-.099a2.3 2.3 0 0 0-.545.063 2 2 0 0 0-.411.148 2.2 2.2 0 0 0-.4.249 2.5 2.5 0 0 0-.485.499 2.7 2.7 0 0 0-.32.581l-.05.137v-1.48h-1.778v7.553h1.777v-3.884q0-.546.159-.943a1.5 1.5 0 0 1 .466-.636 2.5 2.5 0 0 1 .399-.253 2 2 0 0 1 .224-.099zm9.784 2.656.05-.922q0-1.743-.856-2.698-.838-.97-2.584-.97-1.119-.001-2.007.493a3.46 3.46 0 0 0-1.4 1.382q-.493.906-.493 2.106 0 1.07.428 1.975.428.89 1.332 1.432.906.526 2.255.526.973 0 1.668-.185l.044-.012.135-.04q.613-.184.984-.421l-.542-1.267q-.3.162-.642.274l-.297.087q-.51.131-1.3.131-.954 0-1.497-.444a1.6 1.6 0 0 1-.192-.193q-.366-.44-.512-1.234l-.004-.021zm-5.427-1.256-.003.022h3.752v-.138q-.011-.727-.288-1.118a1 1 0 0 0-.156-.176q-.46-.428-1.316-.428-.986 0-1.494.604-.379.45-.494 1.234zm-27.053 2.77V4.7h-1.86v12.095h5.333V15.15zm7.103-5.908v7.553h-1.843V9.242h1.843z'/%3E%3Cpath fill='%23fff' d='m19.63 11.151-.757-1.71-.345 1.71-1.12 5.644h-1.827L18.083 4.7h.197l3.325 6.533.988 2.19.988-2.19L26.839 4.7h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.93 5.644h-.098l-2.913-5.644zm14.836 5.81q-1.02 0-1.893-.478a3.8 3.8 0 0 1-1.381-1.382q-.51-.906-.51-2.106 0-1.185.444-2.074a3.36 3.36 0 0 1 1.3-1.382q.839-.494 1.974-.494a3.3 3.3 0 0 1 1.234.231 3.3 3.3 0 0 1 .97.575q.396.33.527.659l.033-1.267h1.694v7.553H37.18l-.033-1.332q-.279.593-1.02 1.053a3.17 3.17 0 0 1-1.662.444zm.296-1.482q.938 0 1.58-.642.642-.66.642-1.711v-.115q0-.708-.296-1.267a2.2 2.2 0 0 0-.807-.872 2.1 2.1 0 0 0-1.119-.313q-1.053 0-1.629.692-.575.675-.575 1.76 0 1.103.559 1.795.577.675 1.645.675zm6.521-6.237h1.711v1.4q.906-1.597 2.83-1.597 1.596 0 2.584 1.02.988 1.005.988 2.914 0 1.185-.493 2.09a3.46 3.46 0 0 1-1.316 1.399 3.5 3.5 0 0 1-1.844.493q-.954 0-1.662-.329a2.67 2.67 0 0 1-1.086-.97l.017 5.134h-1.728zm4.048 6.22q1.07 0 1.645-.674.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.592 0-1.12.296-.51.28-.822.823-.296.527-.296 1.234v.115q0 .708.296 1.267.313.543.823.855.51.296 1.119.297z'/%3E%3Cpath fill='%23e1e3e9' d='M51.325 4.7h1.86v10.45h3.473v1.646h-5.333zm7.12 4.542h1.843v7.553h-1.843zm.905-1.415a1.16 1.16 0 0 1-.856-.346 1.17 1.17 0 0 1-.346-.856 1.05 1.05 0 0 1 .346-.79q.346-.329.856-.329.494 0 .839.33a1.05 1.05 0 0 1 .345.79 1.16 1.16 0 0 1-.345.855q-.33.346-.84.346zm7.875 9.133a3.17 3.17 0 0 1-1.662-.444q-.723-.46-1.004-1.053l-.033 1.332h-1.71V4.701h1.743v4.657l-.082 1.283q.279-.658 1.086-1.119a3.5 3.5 0 0 1 1.778-.477q1.119 0 1.942.51a3.24 3.24 0 0 1 1.283 1.4q.445.888.444 2.072 0 1.201-.526 2.09a3.5 3.5 0 0 1-1.382 1.366 3.8 3.8 0 0 1-1.876.477zm-.296-1.481q1.069 0 1.645-.675.577-.69.577-1.778 0-1.102-.577-1.776-.56-.691-1.645-.692a2.12 2.12 0 0 0-1.58.659q-.642.641-.642 1.694v.115q0 .71.296 1.267a2.4 2.4 0 0 0 .807.872 2.1 2.1 0 0 0 1.119.313zm5.927-6.237h1.777v1.481q.263-.757.856-1.217a2.14 2.14 0 0 1 1.349-.46q.527 0 .724.098l-.247 1.794q-.149-.099-.642-.099-.774 0-1.416.494-.626.493-.626 1.58v3.883h-1.777V9.242zm9.534 7.718q-1.35 0-2.255-.526-.904-.543-1.332-1.432a4.6 4.6 0 0 1-.428-1.975q0-1.2.493-2.106a3.46 3.46 0 0 1 1.4-1.382q.889-.495 2.007-.494 1.744 0 2.584.97.855.956.856 2.7 0 .444-.05.92h-5.43q.18 1.005.708 1.45.542.443 1.497.443.79 0 1.3-.131a4 4 0 0 0 .938-.362l.542 1.267q-.411.263-1.119.46-.708.198-1.711.197zm1.596-4.558q.016-1.02-.444-1.432-.46-.428-1.316-.428-1.728 0-1.991 1.86z'/%3E%3Cpath d='M5.074 15.948a.484.657 0 0 0-.486.659v1.84a.484.657 0 0 0 .486.659h4.101a.484.657 0 0 0 .486-.659v-1.84a.484.657 0 0 0-.486-.659zm3.56 1.16H5.617v.838h3.017z' style='fill:%23fff;fill-rule:evenodd;stroke-width:1.03600001'/%3E%3Cg style='stroke-width:1.12603545'%3E%3Cpath d='M-9.408-1.416c-3.833-.025-7.056 2.912-7.08 6.615-.02 3.08 1.653 4.832 3.107 6.268.903.892 1.721 1.74 2.32 2.902l-.525-.004c-.543-.003-.992.304-1.24.639a1.87 1.87 0 0 0-.362 1.121l-.011 1.877c-.003.402.104.787.347 1.125.244.338.688.653 1.23.656l4.142.028c.542.003.99-.306 1.238-.641a1.87 1.87 0 0 0 .363-1.121l.012-1.875a1.87 1.87 0 0 0-.348-1.127c-.243-.338-.688-.653-1.23-.656l-.518-.004c.597-1.145 1.425-1.983 2.348-2.87 1.473-1.414 3.18-3.149 3.2-6.226-.016-3.59-2.923-6.684-6.993-6.707m-.006 1.1v.002c3.274.02 5.92 2.532 5.9 5.6-.017 2.706-1.39 4.026-2.863 5.44-1.034.994-2.118 2.033-2.814 3.633-.018.041-.052.055-.075.065q-.013.004-.02.01a.34.34 0 0 1-.226.084.34.34 0 0 1-.224-.086l-.092-.077c-.699-1.615-1.768-2.669-2.781-3.67-1.454-1.435-2.797-2.762-2.78-5.478.02-3.067 2.7-5.545 5.975-5.523m-.02 2.826c-1.62-.01-2.944 1.315-2.955 2.96-.01 1.646 1.295 2.988 2.916 2.999h.002c1.621.01 2.943-1.316 2.953-2.961.011-1.646-1.294-2.988-2.916-2.998m-.005 1.1c1.017.006 1.829.83 1.822 1.89s-.83 1.874-1.848 1.867c-1.018-.006-1.829-.83-1.822-1.89s.83-1.874 1.848-1.868m-2.155 11.857 4.14.025c.271.002.49.305.487.676l-.013 1.875c-.003.37-.224.67-.495.668l-4.14-.025c-.27-.002-.487-.306-.485-.676l.012-1.875c.003-.37.224-.67.494-.668' style='color:%23000;font-style:normal;font-variant:normal;font-weight:400;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:%23000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:evenodd;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:%23000;solid-opacity:1;vector-effect:none;fill:%23000;fill-opacity:.4;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-9.415-.316C-12.69-.338-15.37 2.14-15.39 5.207c-.017 2.716 1.326 4.041 2.78 5.477 1.013 1 2.081 2.055 2.78 3.67l.092.076a.34.34 0 0 0 .225.086.34.34 0 0 0 .227-.083l.019-.01c.022-.009.057-.024.074-.064.697-1.6 1.78-2.64 2.814-3.634 1.473-1.414 2.847-2.733 2.864-5.44.02-3.067-2.627-5.58-5.901-5.601m-.057 8.784c1.621.011 2.944-1.315 2.955-2.96.01-1.646-1.295-2.988-2.916-2.999-1.622-.01-2.945 1.315-2.955 2.96s1.295 2.989 2.916 3' style='clip-rule:evenodd;fill:%23e1e3e9;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-11.594 15.465c-.27-.002-.492.297-.494.668l-.012 1.876c-.003.371.214.673.485.675l4.14.027c.271.002.492-.298.495-.668l.012-1.877c.003-.37-.215-.672-.485-.674z' style='clip-rule:evenodd;fill:%23fff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3C/g%3E%3C/svg%3E");background-repeat:no-repeat;cursor:pointer;display:block;height:23px;margin:0 0 -4px -4px;overflow:hidden;width:88px}a.maplibregl-ctrl-logo.maplibregl-compact{width:14px}@media (forced-colors:active){a.maplibregl-ctrl-logo{background-color:transparent;background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='88' height='23' fill='none'%3E%3Cpath fill='%23000' fill-opacity='.4' fill-rule='evenodd' d='M17.408 16.796h-1.827l2.501-12.095h.198l3.324 6.533.988 2.19.988-2.19 3.258-6.533h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.929 5.644h-.098l-2.914-5.644-.757-1.71-.345 1.71zm1.958-3.42-.726 3.663a1.255 1.255 0 0 1-1.232 1.011h-1.827a1.255 1.255 0 0 1-1.229-1.509l2.501-12.095a1.255 1.255 0 0 1 1.23-1.001h.197a1.25 1.25 0 0 1 1.12.685l3.19 6.273 3.125-6.263a1.25 1.25 0 0 1 1.123-.695h.181a1.255 1.255 0 0 1 1.227.991l1.443 6.71a5 5 0 0 1 .314-.787l.009-.016a4.6 4.6 0 0 1 1.777-1.887c.782-.46 1.668-.667 2.611-.667a4.6 4.6 0 0 1 1.7.32l.306.134c.21-.16.474-.256.759-.256h1.694a1.255 1.255 0 0 1 1.212.925 1.255 1.255 0 0 1 1.212-.925h1.711c.284 0 .545.094.755.252.613-.3 1.312-.45 2.075-.45 1.356 0 2.557.445 3.482 1.4q.47.48.763 1.064V4.701a1.255 1.255 0 0 1 1.255-1.255h1.86A1.255 1.255 0 0 1 54.44 4.7v9.194h2.217c.19 0 .37.043.532.118v-4.77c0-.356.147-.678.385-.906a2.42 2.42 0 0 1-.682-1.71c0-.665.267-1.253.735-1.7a2.45 2.45 0 0 1 1.722-.674 2.43 2.43 0 0 1 1.705.675q.318.302.504.683V4.7a1.255 1.255 0 0 1 1.255-1.255h1.744A1.255 1.255 0 0 1 65.812 4.7v3.335a4.8 4.8 0 0 1 1.526-.246c.938 0 1.817.214 2.59.69a4.47 4.47 0 0 1 1.67 1.743v-.98a1.255 1.255 0 0 1 1.256-1.256h1.777c.233 0 .451.064.639.174a3.4 3.4 0 0 1 1.567-.372c.346 0 .861.02 1.285.232a1.25 1.25 0 0 1 .689 1.004 4.7 4.7 0 0 1 .853-.588c.795-.44 1.675-.647 2.61-.647 1.385 0 2.65.39 3.525 1.396.836.938 1.168 2.173 1.168 3.528q-.001.515-.056 1.051a1.255 1.255 0 0 1-.947 1.09l.408.952a1.255 1.255 0 0 1-.477 1.552c-.418.268-.92.463-1.458.612-.613.171-1.304.244-2.049.244-1.06 0-2.043-.207-2.886-.698l-.015-.008c-.798-.48-1.419-1.135-1.818-1.963l-.004-.008a5.8 5.8 0 0 1-.548-2.512q0-.429.053-.843a1.3 1.3 0 0 1-.333-.086l-.166-.004c-.223 0-.426.062-.643.228-.03.024-.142.139-.142.59v3.883a1.255 1.255 0 0 1-1.256 1.256h-1.777a1.255 1.255 0 0 1-1.256-1.256V15.69l-.032.057a4.8 4.8 0 0 1-1.86 1.833 5.04 5.04 0 0 1-2.484.634 4.5 4.5 0 0 1-1.935-.424 1.25 1.25 0 0 1-.764.258h-1.71a1.255 1.255 0 0 1-1.256-1.255V7.687a2.4 2.4 0 0 1-.428.625c.253.23.412.561.412.93v7.553a1.255 1.255 0 0 1-1.256 1.255h-1.843a1.25 1.25 0 0 1-.894-.373c-.228.23-.544.373-.894.373H51.32a1.255 1.255 0 0 1-1.256-1.255v-1.251l-.061.117a4.7 4.7 0 0 1-1.782 1.884 4.77 4.77 0 0 1-2.485.67 5.6 5.6 0 0 1-1.485-.188l.009 2.764a1.255 1.255 0 0 1-1.255 1.259h-1.729a1.255 1.255 0 0 1-1.255-1.255v-3.537a1.255 1.255 0 0 1-1.167.793h-1.679a1.25 1.25 0 0 1-.77-.263 4.5 4.5 0 0 1-1.945.429c-.885 0-1.724-.21-2.495-.632l-.017-.01a5 5 0 0 1-1.081-.836 1.255 1.255 0 0 1-1.254 1.312h-1.81a1.255 1.255 0 0 1-1.228-.99l-.782-3.625-2.044 3.939a1.25 1.25 0 0 1-1.115.676h-.098a1.25 1.25 0 0 1-1.116-.68l-2.061-3.994zM35.92 16.63l.207-.114.223-.15q.493-.356.735-.785l.061-.118.033 1.332h1.678V9.242h-1.694l-.033 1.267q-.133-.329-.526-.658l-.032-.028a3.2 3.2 0 0 0-.668-.428l-.27-.12a3.3 3.3 0 0 0-1.235-.23q-1.136-.001-1.974.493a3.36 3.36 0 0 0-1.3 1.382q-.445.89-.444 2.074 0 1.2.51 2.107a3.8 3.8 0 0 0 1.382 1.381 3.9 3.9 0 0 0 1.893.477q.795 0 1.455-.33zm-2.789-5.38q-.576.675-.575 1.762 0 1.102.559 1.794.576.675 1.645.675a2.25 2.25 0 0 0 .934-.19 2.2 2.2 0 0 0 .468-.29l.178-.161a2.2 2.2 0 0 0 .397-.561q.244-.5.244-1.15v-.115q0-.708-.296-1.267l-.043-.077a2.2 2.2 0 0 0-.633-.709l-.13-.086-.047-.028a2.1 2.1 0 0 0-1.073-.285q-1.052 0-1.629.692zm2.316 2.706c.163-.17.28-.407.28-.83v-.114c0-.292-.06-.508-.15-.68a.96.96 0 0 0-.353-.389.85.85 0 0 0-.464-.127c-.4 0-.56.114-.664.239l-.01.012c-.148.174-.275.45-.275.945 0 .506.122.801.27.99.097.11.266.224.68.224.303 0 .504-.09.687-.269zm7.545 1.705a2.6 2.6 0 0 0 .331.423q.319.33.755.548l.173.074q.65.255 1.49.255 1.02 0 1.844-.493a3.45 3.45 0 0 0 1.316-1.4q.493-.904.493-2.089 0-1.909-.988-2.913-.988-1.02-2.584-1.02-.898 0-1.575.347a3 3 0 0 0-.415.262l-.199.166a3.4 3.4 0 0 0-.64.82V9.242h-1.712v11.553h1.729l-.017-5.134zm.53-1.138q.206.29.48.5l.155.11.053.034q.51.296 1.119.297 1.07 0 1.645-.675.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.435 0-.835.16a2 2 0 0 0-.284.136 2 2 0 0 0-.363.254 2.2 2.2 0 0 0-.46.569l-.082.162a2.6 2.6 0 0 0-.213 1.072v.115q0 .707.296 1.267l.135.211zm.964-.818a1.1 1.1 0 0 0 .367.385.94.94 0 0 0 .476.118c.423 0 .59-.117.687-.23.159-.194.28-.478.28-.95 0-.53-.133-.8-.266-.952l-.021-.025c-.078-.094-.231-.221-.68-.221a1 1 0 0 0-.503.135l-.012.007a.86.86 0 0 0-.335.343c-.073.133-.132.324-.132.614v.115a1.4 1.4 0 0 0 .14.66zm15.7-6.222q.347-.346.346-.856a1.05 1.05 0 0 0-.345-.79 1.18 1.18 0 0 0-.84-.329q-.51 0-.855.33a1.05 1.05 0 0 0-.346.79q0 .51.346.855.345.346.856.346.51 0 .839-.346zm4.337 9.314.033-1.332q.191.403.59.747l.098.081a4 4 0 0 0 .316.224l.223.122a3.2 3.2 0 0 0 1.44.322 3.8 3.8 0 0 0 1.875-.477 3.5 3.5 0 0 0 1.382-1.366q.527-.89.526-2.09 0-1.184-.444-2.073a3.24 3.24 0 0 0-1.283-1.399q-.823-.51-1.942-.51a3.5 3.5 0 0 0-1.527.344l-.086.043-.165.09a3 3 0 0 0-.33.214q-.432.315-.656.707a2 2 0 0 0-.099.198l.082-1.283V4.701h-1.744v12.095zm.473-2.509a2.5 2.5 0 0 0 .566.7q.117.098.245.18l.144.08a2.1 2.1 0 0 0 .975.232q1.07 0 1.645-.675.576-.69.576-1.778 0-1.102-.576-1.777-.56-.691-1.645-.692a2.2 2.2 0 0 0-1.015.235q-.22.113-.415.282l-.15.142a2.1 2.1 0 0 0-.42.594q-.223.479-.223 1.1v.115q0 .705.293 1.26zm2.616-.293c.157-.191.28-.479.28-.967 0-.51-.13-.79-.276-.961l-.021-.026c-.082-.1-.232-.225-.67-.225a.87.87 0 0 0-.681.279l-.012.011c-.154.155-.274.38-.274.807v.115c0 .285.057.499.144.669a1.1 1.1 0 0 0 .367.405c.137.082.28.123.455.123.423 0 .59-.118.686-.23zm8.266-3.013q.345-.13.724-.14l.069-.002q.493 0 .642.099l.247-1.794q-.196-.099-.717-.099a2.3 2.3 0 0 0-.545.063 2 2 0 0 0-.411.148 2.2 2.2 0 0 0-.4.249 2.5 2.5 0 0 0-.485.499 2.7 2.7 0 0 0-.32.581l-.05.137v-1.48h-1.778v7.553h1.777v-3.884q0-.546.159-.943a1.5 1.5 0 0 1 .466-.636 2.5 2.5 0 0 1 .399-.253 2 2 0 0 1 .224-.099zm9.784 2.656.05-.922q0-1.743-.856-2.698-.838-.97-2.584-.97-1.119-.001-2.007.493a3.46 3.46 0 0 0-1.4 1.382q-.493.906-.493 2.106 0 1.07.428 1.975.428.89 1.332 1.432.906.526 2.255.526.973 0 1.668-.185l.044-.012.135-.04q.613-.184.984-.421l-.542-1.267q-.3.162-.642.274l-.297.087q-.51.131-1.3.131-.954 0-1.497-.444a1.6 1.6 0 0 1-.192-.193q-.366-.44-.512-1.234l-.004-.021zm-5.427-1.256-.003.022h3.752v-.138q-.011-.727-.288-1.118a1 1 0 0 0-.156-.176q-.46-.428-1.316-.428-.986 0-1.494.604-.379.45-.494 1.234zm-27.053 2.77V4.7h-1.86v12.095h5.333V15.15zm7.103-5.908v7.553h-1.843V9.242h1.843z'/%3E%3Cpath fill='%23fff' d='m19.63 11.151-.757-1.71-.345 1.71-1.12 5.644h-1.827L18.083 4.7h.197l3.325 6.533.988 2.19.988-2.19L26.839 4.7h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.93 5.644h-.098l-2.913-5.644zm14.836 5.81q-1.02 0-1.893-.478a3.8 3.8 0 0 1-1.381-1.382q-.51-.906-.51-2.106 0-1.185.444-2.074a3.36 3.36 0 0 1 1.3-1.382q.839-.494 1.974-.494a3.3 3.3 0 0 1 1.234.231 3.3 3.3 0 0 1 .97.575q.396.33.527.659l.033-1.267h1.694v7.553H37.18l-.033-1.332q-.279.593-1.02 1.053a3.17 3.17 0 0 1-1.662.444zm.296-1.482q.938 0 1.58-.642.642-.66.642-1.711v-.115q0-.708-.296-1.267a2.2 2.2 0 0 0-.807-.872 2.1 2.1 0 0 0-1.119-.313q-1.053 0-1.629.692-.575.675-.575 1.76 0 1.103.559 1.795.577.675 1.645.675zm6.521-6.237h1.711v1.4q.906-1.597 2.83-1.597 1.596 0 2.584 1.02.988 1.005.988 2.914 0 1.185-.493 2.09a3.46 3.46 0 0 1-1.316 1.399 3.5 3.5 0 0 1-1.844.493q-.954 0-1.662-.329a2.67 2.67 0 0 1-1.086-.97l.017 5.134h-1.728zm4.048 6.22q1.07 0 1.645-.674.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.592 0-1.12.296-.51.28-.822.823-.296.527-.296 1.234v.115q0 .708.296 1.267.313.543.823.855.51.296 1.119.297z'/%3E%3Cpath fill='%23e1e3e9' d='M51.325 4.7h1.86v10.45h3.473v1.646h-5.333zm7.12 4.542h1.843v7.553h-1.843zm.905-1.415a1.16 1.16 0 0 1-.856-.346 1.17 1.17 0 0 1-.346-.856 1.05 1.05 0 0 1 .346-.79q.346-.329.856-.329.494 0 .839.33a1.05 1.05 0 0 1 .345.79 1.16 1.16 0 0 1-.345.855q-.33.346-.84.346zm7.875 9.133a3.17 3.17 0 0 1-1.662-.444q-.723-.46-1.004-1.053l-.033 1.332h-1.71V4.701h1.743v4.657l-.082 1.283q.279-.658 1.086-1.119a3.5 3.5 0 0 1 1.778-.477q1.119 0 1.942.51a3.24 3.24 0 0 1 1.283 1.4q.445.888.444 2.072 0 1.201-.526 2.09a3.5 3.5 0 0 1-1.382 1.366 3.8 3.8 0 0 1-1.876.477zm-.296-1.481q1.069 0 1.645-.675.577-.69.577-1.778 0-1.102-.577-1.776-.56-.691-1.645-.692a2.12 2.12 0 0 0-1.58.659q-.642.641-.642 1.694v.115q0 .71.296 1.267a2.4 2.4 0 0 0 .807.872 2.1 2.1 0 0 0 1.119.313zm5.927-6.237h1.777v1.481q.263-.757.856-1.217a2.14 2.14 0 0 1 1.349-.46q.527 0 .724.098l-.247 1.794q-.149-.099-.642-.099-.774 0-1.416.494-.626.493-.626 1.58v3.883h-1.777V9.242zm9.534 7.718q-1.35 0-2.255-.526-.904-.543-1.332-1.432a4.6 4.6 0 0 1-.428-1.975q0-1.2.493-2.106a3.46 3.46 0 0 1 1.4-1.382q.889-.495 2.007-.494 1.744 0 2.584.97.855.956.856 2.7 0 .444-.05.92h-5.43q.18 1.005.708 1.45.542.443 1.497.443.79 0 1.3-.131a4 4 0 0 0 .938-.362l.542 1.267q-.411.263-1.119.46-.708.198-1.711.197zm1.596-4.558q.016-1.02-.444-1.432-.46-.428-1.316-.428-1.728 0-1.991 1.86z'/%3E%3Cpath d='M5.074 15.948a.484.657 0 0 0-.486.659v1.84a.484.657 0 0 0 .486.659h4.101a.484.657 0 0 0 .486-.659v-1.84a.484.657 0 0 0-.486-.659zm3.56 1.16H5.617v.838h3.017z' style='fill:%23fff;fill-rule:evenodd;stroke-width:1.03600001'/%3E%3Cg style='stroke-width:1.12603545'%3E%3Cpath d='M-9.408-1.416c-3.833-.025-7.056 2.912-7.08 6.615-.02 3.08 1.653 4.832 3.107 6.268.903.892 1.721 1.74 2.32 2.902l-.525-.004c-.543-.003-.992.304-1.24.639a1.87 1.87 0 0 0-.362 1.121l-.011 1.877c-.003.402.104.787.347 1.125.244.338.688.653 1.23.656l4.142.028c.542.003.99-.306 1.238-.641a1.87 1.87 0 0 0 .363-1.121l.012-1.875a1.87 1.87 0 0 0-.348-1.127c-.243-.338-.688-.653-1.23-.656l-.518-.004c.597-1.145 1.425-1.983 2.348-2.87 1.473-1.414 3.18-3.149 3.2-6.226-.016-3.59-2.923-6.684-6.993-6.707m-.006 1.1v.002c3.274.02 5.92 2.532 5.9 5.6-.017 2.706-1.39 4.026-2.863 5.44-1.034.994-2.118 2.033-2.814 3.633-.018.041-.052.055-.075.065q-.013.004-.02.01a.34.34 0 0 1-.226.084.34.34 0 0 1-.224-.086l-.092-.077c-.699-1.615-1.768-2.669-2.781-3.67-1.454-1.435-2.797-2.762-2.78-5.478.02-3.067 2.7-5.545 5.975-5.523m-.02 2.826c-1.62-.01-2.944 1.315-2.955 2.96-.01 1.646 1.295 2.988 2.916 2.999h.002c1.621.01 2.943-1.316 2.953-2.961.011-1.646-1.294-2.988-2.916-2.998m-.005 1.1c1.017.006 1.829.83 1.822 1.89s-.83 1.874-1.848 1.867c-1.018-.006-1.829-.83-1.822-1.89s.83-1.874 1.848-1.868m-2.155 11.857 4.14.025c.271.002.49.305.487.676l-.013 1.875c-.003.37-.224.67-.495.668l-4.14-.025c-.27-.002-.487-.306-.485-.676l.012-1.875c.003-.37.224-.67.494-.668' style='color:%23000;font-style:normal;font-variant:normal;font-weight:400;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:%23000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:evenodd;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:%23000;solid-opacity:1;vector-effect:none;fill:%23000;fill-opacity:.4;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-9.415-.316C-12.69-.338-15.37 2.14-15.39 5.207c-.017 2.716 1.326 4.041 2.78 5.477 1.013 1 2.081 2.055 2.78 3.67l.092.076a.34.34 0 0 0 .225.086.34.34 0 0 0 .227-.083l.019-.01c.022-.009.057-.024.074-.064.697-1.6 1.78-2.64 2.814-3.634 1.473-1.414 2.847-2.733 2.864-5.44.02-3.067-2.627-5.58-5.901-5.601m-.057 8.784c1.621.011 2.944-1.315 2.955-2.96.01-1.646-1.295-2.988-2.916-2.999-1.622-.01-2.945 1.315-2.955 2.96s1.295 2.989 2.916 3' style='clip-rule:evenodd;fill:%23e1e3e9;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-11.594 15.465c-.27-.002-.492.297-.494.668l-.012 1.876c-.003.371.214.673.485.675l4.14.027c.271.002.492-.298.495-.668l.012-1.877c.003-.37-.215-.672-.485-.674z' style='clip-rule:evenodd;fill:%23fff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3C/g%3E%3C/svg%3E")}}@media (forced-colors:active) and (prefers-color-scheme:light){a.maplibregl-ctrl-logo{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='88' height='23' fill='none'%3E%3Cpath fill='%23000' fill-opacity='.4' fill-rule='evenodd' d='M17.408 16.796h-1.827l2.501-12.095h.198l3.324 6.533.988 2.19.988-2.19 3.258-6.533h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.929 5.644h-.098l-2.914-5.644-.757-1.71-.345 1.71zm1.958-3.42-.726 3.663a1.255 1.255 0 0 1-1.232 1.011h-1.827a1.255 1.255 0 0 1-1.229-1.509l2.501-12.095a1.255 1.255 0 0 1 1.23-1.001h.197a1.25 1.25 0 0 1 1.12.685l3.19 6.273 3.125-6.263a1.25 1.25 0 0 1 1.123-.695h.181a1.255 1.255 0 0 1 1.227.991l1.443 6.71a5 5 0 0 1 .314-.787l.009-.016a4.6 4.6 0 0 1 1.777-1.887c.782-.46 1.668-.667 2.611-.667a4.6 4.6 0 0 1 1.7.32l.306.134c.21-.16.474-.256.759-.256h1.694a1.255 1.255 0 0 1 1.212.925 1.255 1.255 0 0 1 1.212-.925h1.711c.284 0 .545.094.755.252.613-.3 1.312-.45 2.075-.45 1.356 0 2.557.445 3.482 1.4q.47.48.763 1.064V4.701a1.255 1.255 0 0 1 1.255-1.255h1.86A1.255 1.255 0 0 1 54.44 4.7v9.194h2.217c.19 0 .37.043.532.118v-4.77c0-.356.147-.678.385-.906a2.42 2.42 0 0 1-.682-1.71c0-.665.267-1.253.735-1.7a2.45 2.45 0 0 1 1.722-.674 2.43 2.43 0 0 1 1.705.675q.318.302.504.683V4.7a1.255 1.255 0 0 1 1.255-1.255h1.744A1.255 1.255 0 0 1 65.812 4.7v3.335a4.8 4.8 0 0 1 1.526-.246c.938 0 1.817.214 2.59.69a4.47 4.47 0 0 1 1.67 1.743v-.98a1.255 1.255 0 0 1 1.256-1.256h1.777c.233 0 .451.064.639.174a3.4 3.4 0 0 1 1.567-.372c.346 0 .861.02 1.285.232a1.25 1.25 0 0 1 .689 1.004 4.7 4.7 0 0 1 .853-.588c.795-.44 1.675-.647 2.61-.647 1.385 0 2.65.39 3.525 1.396.836.938 1.168 2.173 1.168 3.528q-.001.515-.056 1.051a1.255 1.255 0 0 1-.947 1.09l.408.952a1.255 1.255 0 0 1-.477 1.552c-.418.268-.92.463-1.458.612-.613.171-1.304.244-2.049.244-1.06 0-2.043-.207-2.886-.698l-.015-.008c-.798-.48-1.419-1.135-1.818-1.963l-.004-.008a5.8 5.8 0 0 1-.548-2.512q0-.429.053-.843a1.3 1.3 0 0 1-.333-.086l-.166-.004c-.223 0-.426.062-.643.228-.03.024-.142.139-.142.59v3.883a1.255 1.255 0 0 1-1.256 1.256h-1.777a1.255 1.255 0 0 1-1.256-1.256V15.69l-.032.057a4.8 4.8 0 0 1-1.86 1.833 5.04 5.04 0 0 1-2.484.634 4.5 4.5 0 0 1-1.935-.424 1.25 1.25 0 0 1-.764.258h-1.71a1.255 1.255 0 0 1-1.256-1.255V7.687a2.4 2.4 0 0 1-.428.625c.253.23.412.561.412.93v7.553a1.255 1.255 0 0 1-1.256 1.255h-1.843a1.25 1.25 0 0 1-.894-.373c-.228.23-.544.373-.894.373H51.32a1.255 1.255 0 0 1-1.256-1.255v-1.251l-.061.117a4.7 4.7 0 0 1-1.782 1.884 4.77 4.77 0 0 1-2.485.67 5.6 5.6 0 0 1-1.485-.188l.009 2.764a1.255 1.255 0 0 1-1.255 1.259h-1.729a1.255 1.255 0 0 1-1.255-1.255v-3.537a1.255 1.255 0 0 1-1.167.793h-1.679a1.25 1.25 0 0 1-.77-.263 4.5 4.5 0 0 1-1.945.429c-.885 0-1.724-.21-2.495-.632l-.017-.01a5 5 0 0 1-1.081-.836 1.255 1.255 0 0 1-1.254 1.312h-1.81a1.255 1.255 0 0 1-1.228-.99l-.782-3.625-2.044 3.939a1.25 1.25 0 0 1-1.115.676h-.098a1.25 1.25 0 0 1-1.116-.68l-2.061-3.994zM35.92 16.63l.207-.114.223-.15q.493-.356.735-.785l.061-.118.033 1.332h1.678V9.242h-1.694l-.033 1.267q-.133-.329-.526-.658l-.032-.028a3.2 3.2 0 0 0-.668-.428l-.27-.12a3.3 3.3 0 0 0-1.235-.23q-1.136-.001-1.974.493a3.36 3.36 0 0 0-1.3 1.382q-.445.89-.444 2.074 0 1.2.51 2.107a3.8 3.8 0 0 0 1.382 1.381 3.9 3.9 0 0 0 1.893.477q.795 0 1.455-.33zm-2.789-5.38q-.576.675-.575 1.762 0 1.102.559 1.794.576.675 1.645.675a2.25 2.25 0 0 0 .934-.19 2.2 2.2 0 0 0 .468-.29l.178-.161a2.2 2.2 0 0 0 .397-.561q.244-.5.244-1.15v-.115q0-.708-.296-1.267l-.043-.077a2.2 2.2 0 0 0-.633-.709l-.13-.086-.047-.028a2.1 2.1 0 0 0-1.073-.285q-1.052 0-1.629.692zm2.316 2.706c.163-.17.28-.407.28-.83v-.114c0-.292-.06-.508-.15-.68a.96.96 0 0 0-.353-.389.85.85 0 0 0-.464-.127c-.4 0-.56.114-.664.239l-.01.012c-.148.174-.275.45-.275.945 0 .506.122.801.27.99.097.11.266.224.68.224.303 0 .504-.09.687-.269zm7.545 1.705a2.6 2.6 0 0 0 .331.423q.319.33.755.548l.173.074q.65.255 1.49.255 1.02 0 1.844-.493a3.45 3.45 0 0 0 1.316-1.4q.493-.904.493-2.089 0-1.909-.988-2.913-.988-1.02-2.584-1.02-.898 0-1.575.347a3 3 0 0 0-.415.262l-.199.166a3.4 3.4 0 0 0-.64.82V9.242h-1.712v11.553h1.729l-.017-5.134zm.53-1.138q.206.29.48.5l.155.11.053.034q.51.296 1.119.297 1.07 0 1.645-.675.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.435 0-.835.16a2 2 0 0 0-.284.136 2 2 0 0 0-.363.254 2.2 2.2 0 0 0-.46.569l-.082.162a2.6 2.6 0 0 0-.213 1.072v.115q0 .707.296 1.267l.135.211zm.964-.818a1.1 1.1 0 0 0 .367.385.94.94 0 0 0 .476.118c.423 0 .59-.117.687-.23.159-.194.28-.478.28-.95 0-.53-.133-.8-.266-.952l-.021-.025c-.078-.094-.231-.221-.68-.221a1 1 0 0 0-.503.135l-.012.007a.86.86 0 0 0-.335.343c-.073.133-.132.324-.132.614v.115a1.4 1.4 0 0 0 .14.66zm15.7-6.222q.347-.346.346-.856a1.05 1.05 0 0 0-.345-.79 1.18 1.18 0 0 0-.84-.329q-.51 0-.855.33a1.05 1.05 0 0 0-.346.79q0 .51.346.855.345.346.856.346.51 0 .839-.346zm4.337 9.314.033-1.332q.191.403.59.747l.098.081a4 4 0 0 0 .316.224l.223.122a3.2 3.2 0 0 0 1.44.322 3.8 3.8 0 0 0 1.875-.477 3.5 3.5 0 0 0 1.382-1.366q.527-.89.526-2.09 0-1.184-.444-2.073a3.24 3.24 0 0 0-1.283-1.399q-.823-.51-1.942-.51a3.5 3.5 0 0 0-1.527.344l-.086.043-.165.09a3 3 0 0 0-.33.214q-.432.315-.656.707a2 2 0 0 0-.099.198l.082-1.283V4.701h-1.744v12.095zm.473-2.509a2.5 2.5 0 0 0 .566.7q.117.098.245.18l.144.08a2.1 2.1 0 0 0 .975.232q1.07 0 1.645-.675.576-.69.576-1.778 0-1.102-.576-1.777-.56-.691-1.645-.692a2.2 2.2 0 0 0-1.015.235q-.22.113-.415.282l-.15.142a2.1 2.1 0 0 0-.42.594q-.223.479-.223 1.1v.115q0 .705.293 1.26zm2.616-.293c.157-.191.28-.479.28-.967 0-.51-.13-.79-.276-.961l-.021-.026c-.082-.1-.232-.225-.67-.225a.87.87 0 0 0-.681.279l-.012.011c-.154.155-.274.38-.274.807v.115c0 .285.057.499.144.669a1.1 1.1 0 0 0 .367.405c.137.082.28.123.455.123.423 0 .59-.118.686-.23zm8.266-3.013q.345-.13.724-.14l.069-.002q.493 0 .642.099l.247-1.794q-.196-.099-.717-.099a2.3 2.3 0 0 0-.545.063 2 2 0 0 0-.411.148 2.2 2.2 0 0 0-.4.249 2.5 2.5 0 0 0-.485.499 2.7 2.7 0 0 0-.32.581l-.05.137v-1.48h-1.778v7.553h1.777v-3.884q0-.546.159-.943a1.5 1.5 0 0 1 .466-.636 2.5 2.5 0 0 1 .399-.253 2 2 0 0 1 .224-.099zm9.784 2.656.05-.922q0-1.743-.856-2.698-.838-.97-2.584-.97-1.119-.001-2.007.493a3.46 3.46 0 0 0-1.4 1.382q-.493.906-.493 2.106 0 1.07.428 1.975.428.89 1.332 1.432.906.526 2.255.526.973 0 1.668-.185l.044-.012.135-.04q.613-.184.984-.421l-.542-1.267q-.3.162-.642.274l-.297.087q-.51.131-1.3.131-.954 0-1.497-.444a1.6 1.6 0 0 1-.192-.193q-.366-.44-.512-1.234l-.004-.021zm-5.427-1.256-.003.022h3.752v-.138q-.011-.727-.288-1.118a1 1 0 0 0-.156-.176q-.46-.428-1.316-.428-.986 0-1.494.604-.379.45-.494 1.234zm-27.053 2.77V4.7h-1.86v12.095h5.333V15.15zm7.103-5.908v7.553h-1.843V9.242h1.843z'/%3E%3Cpath fill='%23fff' d='m19.63 11.151-.757-1.71-.345 1.71-1.12 5.644h-1.827L18.083 4.7h.197l3.325 6.533.988 2.19.988-2.19L26.839 4.7h.181l2.6 12.095h-1.81l-1.218-5.644-.362-1.71-.658 1.71-2.93 5.644h-.098l-2.913-5.644zm14.836 5.81q-1.02 0-1.893-.478a3.8 3.8 0 0 1-1.381-1.382q-.51-.906-.51-2.106 0-1.185.444-2.074a3.36 3.36 0 0 1 1.3-1.382q.839-.494 1.974-.494a3.3 3.3 0 0 1 1.234.231 3.3 3.3 0 0 1 .97.575q.396.33.527.659l.033-1.267h1.694v7.553H37.18l-.033-1.332q-.279.593-1.02 1.053a3.17 3.17 0 0 1-1.662.444zm.296-1.482q.938 0 1.58-.642.642-.66.642-1.711v-.115q0-.708-.296-1.267a2.2 2.2 0 0 0-.807-.872 2.1 2.1 0 0 0-1.119-.313q-1.053 0-1.629.692-.575.675-.575 1.76 0 1.103.559 1.795.577.675 1.645.675zm6.521-6.237h1.711v1.4q.906-1.597 2.83-1.597 1.596 0 2.584 1.02.988 1.005.988 2.914 0 1.185-.493 2.09a3.46 3.46 0 0 1-1.316 1.399 3.5 3.5 0 0 1-1.844.493q-.954 0-1.662-.329a2.67 2.67 0 0 1-1.086-.97l.017 5.134h-1.728zm4.048 6.22q1.07 0 1.645-.674.577-.69.576-1.762 0-1.119-.576-1.777-.558-.675-1.645-.675-.592 0-1.12.296-.51.28-.822.823-.296.527-.296 1.234v.115q0 .708.296 1.267.313.543.823.855.51.296 1.119.297z'/%3E%3Cpath fill='%23e1e3e9' d='M51.325 4.7h1.86v10.45h3.473v1.646h-5.333zm7.12 4.542h1.843v7.553h-1.843zm.905-1.415a1.16 1.16 0 0 1-.856-.346 1.17 1.17 0 0 1-.346-.856 1.05 1.05 0 0 1 .346-.79q.346-.329.856-.329.494 0 .839.33a1.05 1.05 0 0 1 .345.79 1.16 1.16 0 0 1-.345.855q-.33.346-.84.346zm7.875 9.133a3.17 3.17 0 0 1-1.662-.444q-.723-.46-1.004-1.053l-.033 1.332h-1.71V4.701h1.743v4.657l-.082 1.283q.279-.658 1.086-1.119a3.5 3.5 0 0 1 1.778-.477q1.119 0 1.942.51a3.24 3.24 0 0 1 1.283 1.4q.445.888.444 2.072 0 1.201-.526 2.09a3.5 3.5 0 0 1-1.382 1.366 3.8 3.8 0 0 1-1.876.477zm-.296-1.481q1.069 0 1.645-.675.577-.69.577-1.778 0-1.102-.577-1.776-.56-.691-1.645-.692a2.12 2.12 0 0 0-1.58.659q-.642.641-.642 1.694v.115q0 .71.296 1.267a2.4 2.4 0 0 0 .807.872 2.1 2.1 0 0 0 1.119.313zm5.927-6.237h1.777v1.481q.263-.757.856-1.217a2.14 2.14 0 0 1 1.349-.46q.527 0 .724.098l-.247 1.794q-.149-.099-.642-.099-.774 0-1.416.494-.626.493-.626 1.58v3.883h-1.777V9.242zm9.534 7.718q-1.35 0-2.255-.526-.904-.543-1.332-1.432a4.6 4.6 0 0 1-.428-1.975q0-1.2.493-2.106a3.46 3.46 0 0 1 1.4-1.382q.889-.495 2.007-.494 1.744 0 2.584.97.855.956.856 2.7 0 .444-.05.92h-5.43q.18 1.005.708 1.45.542.443 1.497.443.79 0 1.3-.131a4 4 0 0 0 .938-.362l.542 1.267q-.411.263-1.119.46-.708.198-1.711.197zm1.596-4.558q.016-1.02-.444-1.432-.46-.428-1.316-.428-1.728 0-1.991 1.86z'/%3E%3Cpath d='M5.074 15.948a.484.657 0 0 0-.486.659v1.84a.484.657 0 0 0 .486.659h4.101a.484.657 0 0 0 .486-.659v-1.84a.484.657 0 0 0-.486-.659zm3.56 1.16H5.617v.838h3.017z' style='fill:%23fff;fill-rule:evenodd;stroke-width:1.03600001'/%3E%3Cg style='stroke-width:1.12603545'%3E%3Cpath d='M-9.408-1.416c-3.833-.025-7.056 2.912-7.08 6.615-.02 3.08 1.653 4.832 3.107 6.268.903.892 1.721 1.74 2.32 2.902l-.525-.004c-.543-.003-.992.304-1.24.639a1.87 1.87 0 0 0-.362 1.121l-.011 1.877c-.003.402.104.787.347 1.125.244.338.688.653 1.23.656l4.142.028c.542.003.99-.306 1.238-.641a1.87 1.87 0 0 0 .363-1.121l.012-1.875a1.87 1.87 0 0 0-.348-1.127c-.243-.338-.688-.653-1.23-.656l-.518-.004c.597-1.145 1.425-1.983 2.348-2.87 1.473-1.414 3.18-3.149 3.2-6.226-.016-3.59-2.923-6.684-6.993-6.707m-.006 1.1v.002c3.274.02 5.92 2.532 5.9 5.6-.017 2.706-1.39 4.026-2.863 5.44-1.034.994-2.118 2.033-2.814 3.633-.018.041-.052.055-.075.065q-.013.004-.02.01a.34.34 0 0 1-.226.084.34.34 0 0 1-.224-.086l-.092-.077c-.699-1.615-1.768-2.669-2.781-3.67-1.454-1.435-2.797-2.762-2.78-5.478.02-3.067 2.7-5.545 5.975-5.523m-.02 2.826c-1.62-.01-2.944 1.315-2.955 2.96-.01 1.646 1.295 2.988 2.916 2.999h.002c1.621.01 2.943-1.316 2.953-2.961.011-1.646-1.294-2.988-2.916-2.998m-.005 1.1c1.017.006 1.829.83 1.822 1.89s-.83 1.874-1.848 1.867c-1.018-.006-1.829-.83-1.822-1.89s.83-1.874 1.848-1.868m-2.155 11.857 4.14.025c.271.002.49.305.487.676l-.013 1.875c-.003.37-.224.67-.495.668l-4.14-.025c-.27-.002-.487-.306-.485-.676l.012-1.875c.003-.37.224-.67.494-.668' style='color:%23000;font-style:normal;font-variant:normal;font-weight:400;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:%23000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:evenodd;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:%23000;solid-opacity:1;vector-effect:none;fill:%23000;fill-opacity:.4;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-9.415-.316C-12.69-.338-15.37 2.14-15.39 5.207c-.017 2.716 1.326 4.041 2.78 5.477 1.013 1 2.081 2.055 2.78 3.67l.092.076a.34.34 0 0 0 .225.086.34.34 0 0 0 .227-.083l.019-.01c.022-.009.057-.024.074-.064.697-1.6 1.78-2.64 2.814-3.634 1.473-1.414 2.847-2.733 2.864-5.44.02-3.067-2.627-5.58-5.901-5.601m-.057 8.784c1.621.011 2.944-1.315 2.955-2.96.01-1.646-1.295-2.988-2.916-2.999-1.622-.01-2.945 1.315-2.955 2.96s1.295 2.989 2.916 3' style='clip-rule:evenodd;fill:%23e1e3e9;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3Cpath d='M-11.594 15.465c-.27-.002-.492.297-.494.668l-.012 1.876c-.003.371.214.673.485.675l4.14.027c.271.002.492-.298.495-.668l.012-1.877c.003-.37-.215-.672-.485-.674z' style='clip-rule:evenodd;fill:%23fff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.47727823;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:.4' transform='translate(15.553 2.85)scale(.88807)'/%3E%3C/g%3E%3C/svg%3E")}}.maplibregl-ctrl.maplibregl-ctrl-attrib{background-color:hsla(0,0%,100%,.5);margin:0;padding:0 5px}@media screen{.maplibregl-ctrl-attrib.maplibregl-compact{background-color:#fff;border-radius:12px;box-sizing:content-box;color:#000;margin:10px;min-height:20px;padding:2px 24px 2px 0;position:relative}.maplibregl-ctrl-attrib.maplibregl-compact-show{padding:2px 28px 2px 8px;visibility:visible}.maplibregl-ctrl-bottom-left>.maplibregl-ctrl-attrib.maplibregl-compact-show,.maplibregl-ctrl-top-left>.maplibregl-ctrl-attrib.maplibregl-compact-show{border-radius:12px;padding:2px 8px 2px 28px}.maplibregl-ctrl-attrib.maplibregl-compact .maplibregl-ctrl-attrib-inner{display:none}.maplibregl-ctrl-attrib-button{background-color:hsla(0,0%,100%,.5);background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' fill-rule='evenodd' viewBox='0 0 20 20'%3E%3Cpath d='M4 10a6 6 0 1 0 12 0 6 6 0 1 0-12 0m5-3a1 1 0 1 0 2 0 1 1 0 1 0-2 0m0 3a1 1 0 1 1 2 0v3a1 1 0 1 1-2 0'/%3E%3C/svg%3E");border:0;border-radius:12px;box-sizing:border-box;cursor:pointer;display:none;height:24px;outline:none;position:absolute;right:0;top:0;width:24px}.maplibregl-ctrl-attrib summary.maplibregl-ctrl-attrib-button{-webkit-appearance:none;-moz-appearance:none;appearance:none;list-style:none}.maplibregl-ctrl-attrib summary.maplibregl-ctrl-attrib-button::-webkit-details-marker{display:none}.maplibregl-ctrl-bottom-left .maplibregl-ctrl-attrib-button,.maplibregl-ctrl-top-left .maplibregl-ctrl-attrib-button{left:0}.maplibregl-ctrl-attrib.maplibregl-compact .maplibregl-ctrl-attrib-button,.maplibregl-ctrl-attrib.maplibregl-compact-show .maplibregl-ctrl-attrib-inner{display:block}.maplibregl-ctrl-attrib.maplibregl-compact-show .maplibregl-ctrl-attrib-button{background-color:rgb(0 0 0/5%)}.maplibregl-ctrl-bottom-right>.maplibregl-ctrl-attrib.maplibregl-compact:after{bottom:0;right:0}.maplibregl-ctrl-top-right>.maplibregl-ctrl-attrib.maplibregl-compact:after{right:0;top:0}.maplibregl-ctrl-top-left>.maplibregl-ctrl-attrib.maplibregl-compact:after{left:0;top:0}.maplibregl-ctrl-bottom-left>.maplibregl-ctrl-attrib.maplibregl-compact:after{bottom:0;left:0}}@media screen and (forced-colors:active){.maplibregl-ctrl-attrib.maplibregl-compact:after{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' fill='%23fff' fill-rule='evenodd' viewBox='0 0 20 20'%3E%3Cpath d='M4 10a6 6 0 1 0 12 0 6 6 0 1 0-12 0m5-3a1 1 0 1 0 2 0 1 1 0 1 0-2 0m0 3a1 1 0 1 1 2 0v3a1 1 0 1 1-2 0'/%3E%3C/svg%3E")}}@media screen and (forced-colors:active) and (prefers-color-scheme:light){.maplibregl-ctrl-attrib.maplibregl-compact:after{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' fill-rule='evenodd' viewBox='0 0 20 20'%3E%3Cpath d='M4 10a6 6 0 1 0 12 0 6 6 0 1 0-12 0m5-3a1 1 0 1 0 2 0 1 1 0 1 0-2 0m0 3a1 1 0 1 1 2 0v3a1 1 0 1 1-2 0'/%3E%3C/svg%3E")}}.maplibregl-ctrl-attrib a{color:rgba(0,0,0,.75);text-decoration:none}.maplibregl-ctrl-attrib a:hover{color:inherit;text-decoration:underline}.maplibregl-attrib-empty{display:none}.maplibregl-ctrl-scale{background-color:hsla(0,0%,100%,.75);border:2px solid #333;border-top:#333;box-sizing:border-box;color:#333;font-size:10px;padding:0 5px}.maplibregl-popup{display:flex;left:0;pointer-events:none;position:absolute;top:0;will-change:transform}.maplibregl-popup-anchor-top,.maplibregl-popup-anchor-top-left,.maplibregl-popup-anchor-top-right{flex-direction:column}.maplibregl-popup-anchor-bottom,.maplibregl-popup-anchor-bottom-left,.maplibregl-popup-anchor-bottom-right{flex-direction:column-reverse}.maplibregl-popup-anchor-left{flex-direction:row}.maplibregl-popup-anchor-right{flex-direction:row-reverse}.maplibregl-popup-tip{border:10px solid transparent;height:0;width:0;z-index:1}.maplibregl-popup-anchor-top .maplibregl-popup-tip{align-self:center;border-bottom-color:#fff;border-top:none}.maplibregl-popup-anchor-top-left .maplibregl-popup-tip{align-self:flex-start;border-bottom-color:#fff;border-left:none;border-top:none}.maplibregl-popup-anchor-top-right .maplibregl-popup-tip{align-self:flex-end;border-bottom-color:#fff;border-right:none;border-top:none}.maplibregl-popup-anchor-bottom .maplibregl-popup-tip{align-self:center;border-bottom:none;border-top-color:#fff}.maplibregl-popup-anchor-bottom-left .maplibregl-popup-tip{align-self:flex-start;border-bottom:none;border-left:none;border-top-color:#fff}.maplibregl-popup-anchor-bottom-right .maplibregl-popup-tip{align-self:flex-end;border-bottom:none;border-right:none;border-top-color:#fff}.maplibregl-popup-anchor-left .maplibregl-popup-tip{align-self:center;border-left:none;border-right-color:#fff}.maplibregl-popup-anchor-right .maplibregl-popup-tip{align-self:center;border-left-color:#fff;border-right:none}.maplibregl-popup-close-button{background-color:transparent;border:0;border-radius:0 3px 0 0;cursor:pointer;position:absolute;right:0;top:0}.maplibregl-popup-close-button:hover{background-color:rgb(0 0 0/5%)}.maplibregl-popup-content{background:#fff;border-radius:3px;box-shadow:0 1px 2px rgba(0,0,0,.1);padding:15px 10px;pointer-events:auto;position:relative}.maplibregl-popup-anchor-top-left .maplibregl-popup-content{border-top-left-radius:0}.maplibregl-popup-anchor-top-right .maplibregl-popup-content{border-top-right-radius:0}.maplibregl-popup-anchor-bottom-left .maplibregl-popup-content{border-bottom-left-radius:0}.maplibregl-popup-anchor-bottom-right .maplibregl-popup-content{border-bottom-right-radius:0}.maplibregl-popup-track-pointer{display:none}.maplibregl-popup-track-pointer *{pointer-events:none;-webkit-user-select:none;-moz-user-select:none;user-select:none}.maplibregl-map:hover .maplibregl-popup-track-pointer{display:flex}.maplibregl-map:active .maplibregl-popup-track-pointer{display:none}.maplibregl-marker{left:0;position:absolute;top:0;transition:opacity .2s;will-change:transform}.maplibregl-user-location-dot,.maplibregl-user-location-dot:before{background-color:#1da1f2;border-radius:50%;height:15px;width:15px}.maplibregl-user-location-dot:before{animation:maplibregl-user-location-dot-pulse 2s infinite;content:"";position:absolute}.maplibregl-user-location-dot:after{border:2px solid #fff;border-radius:50%;box-shadow:0 0 3px rgba(0,0,0,.35);box-sizing:border-box;content:"";height:19px;left:-2px;position:absolute;top:-2px;width:19px}@keyframes maplibregl-user-location-dot-pulse{0%{opacity:1;transform:scale(1)}70%{opacity:0;transform:scale(3)}to{opacity:0;transform:scale(1)}}.maplibregl-user-location-dot-stale{background-color:#aaa}.maplibregl-user-location-dot-stale:after{display:none}.maplibregl-user-location-accuracy-circle{background-color:#1da1f233;border-radius:100%;height:1px;width:1px}.maplibregl-crosshair,.maplibregl-crosshair .maplibregl-interactive,.maplibregl-crosshair .maplibregl-interactive:active{cursor:crosshair}.maplibregl-boxzoom{background:#fff;border:2px dotted #202020;height:0;left:0;opacity:.5;position:absolute;top:0;width:0}.maplibregl-cooperative-gesture-screen{align-items:center;background:rgba(0,0,0,.4);color:#fff;display:flex;font-size:1.4em;inset:0;justify-content:center;line-height:1.2;opacity:0;padding:1rem;pointer-events:none;position:absolute;transition:opacity 1s ease 1s;z-index:99999}.maplibregl-cooperative-gesture-screen.maplibregl-show{opacity:1;transition:opacity .05s}.maplibregl-cooperative-gesture-screen .maplibregl-mobile-message{display:none}@media (hover:none),(width <= 480px){.maplibregl-cooperative-gesture-screen .maplibregl-desktop-message{display:none}.maplibregl-cooperative-gesture-screen .maplibregl-mobile-message{display:block}}.maplibregl-pseudo-fullscreen{height:100%!important;left:0!important;position:fixed!important;top:0!important;width:100%!important;z-index:99999}`,document.head.appendChild(e)}})()});var qa=ye(el=>{"use strict";var f3=G1(),Kre=w6(),Jre=zO(),fit=my(),hit=NS().addStyleRule,$re=Ao(),dit=Gl(),vit=c3(),pit=$re.extendFlat,VO=$re.extendDeepAll;el.modules={};el.allCategories={};el.allTypes=[];el.subplotsRegistry={};el.componentsRegistry={};el.layoutArrayContainers=[];el.layoutArrayRegexes=[];el.traceLayoutAttributes={};el.localeRegistry={};el.apiMethodRegistry={};el.collectableSubplotTypes=null;el.register=function(t){if(el.collectableSubplotTypes=null,t)t&&!Array.isArray(t)&&(t=[t]);else throw new Error("No argument passed to Plotly.register.");for(var r=0;r<t.length;r++){var n=t[r];if(!n)throw new Error("Invalid module was attempted to be registered!");switch(n.moduleType){case"trace":git(n);break;case"transform":_it(n);break;case"component":yit(n);break;case"locale":xit(n);break;case"apiMethod":var i=n.name;el.apiMethodRegistry[i]=n.fn;break;default:throw new Error("Invalid module was attempted to be registered!")}}};el.getModule=function(e){var t=el.modules[rie(e)];return t?t._module:!1};el.traceIs=function(e,t){if(e=rie(e),e==="various")return!1;var r=el.modules[e];return r||(e&&f3.log("Unrecognized trace type "+e+"."),r=el.modules[dit.type.dflt]),!!r.categories[t]};el.getComponentMethod=function(e,t){var r=el.componentsRegistry[e];return r&&r[t]||Kre};el.call=function(){var e=arguments[0],t=[].slice.call(arguments,1);return el.apiMethodRegistry[e].apply(null,t)};function git(e){var t=e.name,r=e.categories,n=e.meta;if(el.modules[t]){f3.log("Type "+t+" already registered");return}el.subplotsRegistry[e.basePlotModule.name]||mit(e.basePlotModule);for(var i={},a=0;a<r.length;a++)i[r[a]]=!0,el.allCategories[r[a]]=!0;el.modules[t]={_module:e,categories:i},n&&Object.keys(n).length&&(el.modules[t].meta=n),el.allTypes.push(t);for(var o in el.componentsRegistry)eie(o,t);e.layoutAttributes&&pit(el.traceLayoutAttributes,e.layoutAttributes);var s=e.basePlotModule,l=s.name;if(l==="mapbox"){var u=s.constants.styleRules;for(var c in u)hit(".js-plotly-plot .plotly .mapboxgl-"+c,u[c])}l==="map"&&Yre(),(l==="geo"||l==="mapbox"||l==="map")&&window.PlotlyGeoAssets===void 0&&(window.PlotlyGeoAssets={topojson:{}})}function mit(e){var t=e.name;if(el.subplotsRegistry[t]){f3.log("Plot type "+t+" already registered.");return}Qre(e),el.subplotsRegistry[t]=e;for(var r in el.componentsRegistry)tie(r,e.name)}function yit(e){if(typeof e.name!="string")throw new Error("Component module *name* must be a string.");var t=e.name;el.componentsRegistry[t]=e,e.layoutAttributes&&(e.layoutAttributes._isLinkedToArray&&Jre(el.layoutArrayContainers,t),Qre(e));for(var r in el.modules)eie(t,r);for(var n in el.subplotsRegistry)tie(t,n);e.schema&&e.schema.layout&&VO(vit,e.schema.layout)}function _it(e){if(typeof e.name!="string")throw new Error("Transform module *name* must be a string.");var t="Transform module "+e.name,r=typeof e.transform=="function",n=typeof e.calcTransform=="function";if(!r&&!n)throw new Error(t+" is missing a *transform* or *calcTransform* method.");r&&n&&f3.log([t+" has both a *transform* and *calcTransform* methods.","Please note that all *transform* methods are executed","before all *calcTransform* methods."].join(" ")),fit(e.attributes)||f3.log(t+" registered without an *attributes* object."),typeof e.supplyDefaults!="function"&&f3.log(t+" registered without a *supplyDefaults* method.")}function xit(e){var t=e.name,r=t.split("-")[0],n=e.dictionary,i=e.format,a=n&&Object.keys(n).length,o=i&&Object.keys(i).length,s=el.localeRegistry,l=s[t];if(l||(s[t]=l={}),r!==t){var u=s[r];u||(s[r]=u={}),a&&u.dictionary===l.dictionary&&(u.dictionary=n),o&&u.format===l.format&&(u.format=i)}a&&(l.dictionary=n),o&&(l.format=i)}function Qre(e){if(e.layoutAttributes){var t=e.layoutAttributes._arrayAttrRegexps;if(t)for(var r=0;r<t.length;r++)Jre(el.layoutArrayRegexes,t[r])}}function eie(e,t){var r=el.componentsRegistry[e].schema;if(!(!r||!r.traces)){var n=r.traces[t];n&&VO(el.modules[t]._module.attributes,n)}}function tie(e,t){var r=el.componentsRegistry[e].schema;if(!(!r||!r.subplots)){var n=el.subplotsRegistry[t],i=n.layoutAttributes,a=n.attr==="subplot"?n.name:n.attr;Array.isArray(a)&&(a=a[0]);var o=r.subplots[a];i&&o&&VO(i,o)}}function rie(e){return typeof e=="object"&&(e=e.type),e}});var fie=ye(hh=>{"use strict";var bit=r3().timeFormat,uie=Eo(),GO=G1(),X1=n3().mod,v3=fs(),y0=v3.BADNUM,Tp=v3.ONEDAY,VS=v3.ONEHOUR,W1=v3.ONEMIN,d3=v3.ONESEC,GS=v3.EPOCHJD,yy=qa(),iie=r3().utcFormat,wit=/^\s*(-?\d\d\d\d|\d\d)(-(\d?\d)(-(\d?\d)([ Tt]([01]?\d|2[0-3])(:([0-5]\d)(:([0-5]\d(\.\d+)?))?(Z|z|[+\-]\d\d(:?\d\d)?)?)?)?)?)?\s*$/m,Tit=/^\s*(-?\d\d\d\d|\d\d)(-(\d?\di?)(-(\d?\d)([ Tt]([01]?\d|2[0-3])(:([0-5]\d)(:([0-5]\d(\.\d+)?))?(Z|z|[+\-]\d\d(:?\d\d)?)?)?)?)?)?\s*$/m,nie=new Date().getFullYear()-70;function _y(e){return e&&yy.componentsRegistry.calendars&&typeof e=="string"&&e!=="gregorian"}hh.dateTick0=function(e,t){var r=Ait(e,!!t);if(t<2)return r;var n=hh.dateTime2ms(r,e);return n+=Tp*(t-1),hh.ms2DateTime(n,0,e)};function Ait(e,t){return _y(e)?t?yy.getComponentMethod("calendars","CANONICAL_SUNDAY")[e]:yy.getComponentMethod("calendars","CANONICAL_TICK")[e]:t?"2000-01-02":"2000-01-01"}hh.dfltRange=function(e){return _y(e)?yy.getComponentMethod("calendars","DFLTRANGE")[e]:["2000-01-01","2001-01-01"]};hh.isJSDate=function(e){return typeof e=="object"&&e!==null&&typeof e.getTime=="function"};var P6,I6;hh.dateTime2ms=function(e,t){if(hh.isJSDate(e)){var r=e.getTimezoneOffset()*W1,n=(e.getUTCMinutes()-e.getMinutes())*W1+(e.getUTCSeconds()-e.getSeconds())*d3+(e.getUTCMilliseconds()-e.getMilliseconds());if(n){var i=3*W1;r=r-i/2+X1(n-r+i/2,i)}return e=Number(e)-r,e>=P6&&e<=I6?e:y0}if(typeof e!="string"&&typeof e!="number")return y0;e=String(e);var a=_y(t),o=e.charAt(0);a&&(o==="G"||o==="g")&&(e=e.slice(1),t="");var s=a&&t.slice(0,7)==="chinese",l=e.match(s?Tit:wit);if(!l)return y0;var u=l[1],c=l[3]||"1",f=Number(l[5]||1),h=Number(l[7]||0),d=Number(l[9]||0),v=Number(l[11]||0);if(a){if(u.length===2)return y0;u=Number(u);var _;try{var b=yy.getComponentMethod("calendars","getCal")(t);if(s){var p=c.charAt(c.length-1)==="i";c=parseInt(c,10),_=b.newDate(u,b.toMonthIndex(u,c,p),f)}else _=b.newDate(u,Number(c),f)}catch(E){return y0}return _?(_.toJD()-GS)*Tp+h*VS+d*W1+v*d3:y0}u.length===2?u=(Number(u)+2e3-nie)%100+nie:u=Number(u),c-=1;var k=new Date(Date.UTC(2e3,c,f,h,d));return k.setUTCFullYear(u),k.getUTCMonth()!==c||k.getUTCDate()!==f?y0:k.getTime()+v*d3};P6=hh.MIN_MS=hh.dateTime2ms("-9999");I6=hh.MAX_MS=hh.dateTime2ms("9999-12-31 23:59:59.9999");hh.isDateTime=function(e,t){return hh.dateTime2ms(e,t)!==y0};function h3(e,t){return String(e+Math.pow(10,t)).slice(1)}var L6=90*Tp,aie=3*VS,oie=5*W1;hh.ms2DateTime=function(e,t,r){if(typeof e!="number"||!(e>=P6&&e<=I6))return y0;t||(t=0);var n=Math.floor(X1(e+.05,1)*10),i=Math.round(e-n/10),a,o,s,l,u,c;if(_y(r)){var f=Math.floor(i/Tp)+GS,h=Math.floor(X1(e,Tp));try{a=yy.getComponentMethod("calendars","getCal")(r).fromJD(f).formatDate("yyyy-mm-dd")}catch(d){a=iie("G%Y-%m-%d")(new Date(i))}if(a.charAt(0)==="-")for(;a.length<11;)a="-0"+a.slice(1);else for(;a.length<10;)a="0"+a;o=t<L6?Math.floor(h/VS):0,s=t<L6?Math.floor(h%VS/W1):0,l=t<aie?Math.floor(h%W1/d3):0,u=t<oie?h%d3*10+n:0}else c=new Date(i),a=iie("%Y-%m-%d")(c),o=t<L6?c.getUTCHours():0,s=t<L6?c.getUTCMinutes():0,l=t<aie?c.getUTCSeconds():0,u=t<oie?c.getUTCMilliseconds()*10+n:0;return cie(a,o,s,l,u)};hh.ms2DateTimeLocal=function(e){if(!(e>=P6+Tp&&e<=I6-Tp))return y0;var t=Math.floor(X1(e+.05,1)*10),r=new Date(Math.round(e-t/10)),n=bit("%Y-%m-%d")(r),i=r.getHours(),a=r.getMinutes(),o=r.getSeconds(),s=r.getUTCMilliseconds()*10+t;return cie(n,i,a,o,s)};function cie(e,t,r,n,i){if((t||r||n||i)&&(e+=" "+h3(t,2)+":"+h3(r,2),(n||i)&&(e+=":"+h3(n,2),i))){for(var a=4;i%10===0;)a-=1,i/=10;e+="."+h3(i,a)}return e}hh.cleanDate=function(e,t,r){if(e===y0)return t;if(hh.isJSDate(e)||typeof e=="number"&&isFinite(e)){if(_y(r))return GO.error("JS Dates and milliseconds are incompatible with world calendars",e),t;if(e=hh.ms2DateTimeLocal(+e),!e&&t!==void 0)return t}else if(!hh.isDateTime(e,r))return GO.error("unrecognized date",e),t;return e};var Sit=/%\d?f/g,Mit=/%h/g,Eit={1:"1",2:"1",3:"2",4:"2"};function sie(e,t,r,n){e=e.replace(Sit,function(a){var o=Math.min(+a.charAt(1)||6,6),s=(t/1e3%1+2).toFixed(o).slice(2).replace(/0+$/,"")||"0";return s});var i=new Date(Math.floor(t+.05));if(e=e.replace(Mit,function(){return Eit[r("%q")(i)]}),_y(n))try{e=yy.getComponentMethod("calendars","worldCalFmt")(e,t,n)}catch(a){return"Invalid"}return r(e)(i)}var kit=[59,59.9,59.99,59.999,59.9999];function Cit(e,t){var r=X1(e+.05,Tp),n=h3(Math.floor(r/VS),2)+":"+h3(X1(Math.floor(r/W1),60),2);if(t!=="M"){uie(t)||(t=0);var i=Math.min(X1(e/d3,60),kit[t]),a=(100+i).toFixed(t).slice(1);t>0&&(a=a.replace(/0+$/,"").replace(/[\.]$/,"")),n+=":"+a}return n}hh.formatDate=function(e,t,r,n,i,a){if(i=_y(i)&&i,!t)if(r==="y")t=a.year;else if(r==="m")t=a.month;else if(r==="d")t=a.dayMonth+`
+`+a.year;else return Cit(e,r)+`
+`+sie(a.dayMonthYear,e,n,i);return sie(t,e,n,i)};var lie=3*Tp;hh.incrementMonth=function(e,t,r){r=_y(r)&&r;var n=X1(e,Tp);if(e=Math.round(e-n),r)try{var i=Math.round(e/Tp)+GS,a=yy.getComponentMethod("calendars","getCal")(r),o=a.fromJD(i);return t%12?a.add(o,t,"m"):a.add(o,t/12,"y"),(o.toJD()-GS)*Tp+n}catch(l){GO.error("invalid ms "+e+" in calendar "+r)}var s=new Date(e+lie);return s.setUTCMonth(s.getUTCMonth()+t)+n-lie};hh.findExactDates=function(e,t){for(var r=0,n=0,i=0,a=0,o,s,l=_y(t)&&yy.getComponentMethod("calendars","getCal")(t),u=0;u<e.length;u++){if(s=e[u],!uie(s)){a++;continue}if(!(s%Tp))if(l)try{o=l.fromJD(s/Tp+GS),o.day()===1?o.month()===1?r++:n++:i++}catch(f){}else o=new Date(s),o.getUTCDate()===1?o.getUTCMonth()===0?r++:n++:i++}n+=r,i+=n;var c=e.length-a;return{exactYears:r/c,exactMonths:n/c,exactDays:i/c}}});var HS=ye((kir,hie)=>{"use strict";hie.exports=function(t){return t}});var R6=ye(xy=>{"use strict";var Lit=Eo(),Pit=G1(),Iit=HS(),Rit=fs().BADNUM,HO=1e-9;xy.findBin=function(e,t,r){if(Lit(t.start))return r?Math.ceil((e-t.start)/t.size-HO)-1:Math.floor((e-t.start)/t.size+HO);var n=0,i=t.length,a=0,o=i>1?(t[i-1]-t[0])/(i-1):1,s,l;for(o>=0?l=r?Dit:Fit:l=r?Oit:zit,e+=o*HO*(r?-1:1)*(o>=0?1:-1);n<i&&a++<100;)s=Math.floor((n+i)/2),l(t[s],e)?n=s+1:i=s;return a>90&&Pit.log("Long binary search..."),n-1};function Dit(e,t){return e<t}function Fit(e,t){return e<=t}function zit(e,t){return e>t}function Oit(e,t){return e>=t}xy.sorterAsc=function(e,t){return e-t};xy.sorterDes=function(e,t){return t-e};xy.distinctVals=function(e){var t=e.slice();t.sort(xy.sorterAsc);var r;for(r=t.length-1;r>-1&&t[r]===Rit;r--);for(var n=t[r]-t[0]||1,i=n/(r||1)/1e4,a=[],o,s=0;s<=r;s++){var l=t[s],u=l-o;o===void 0?(a.push(l),o=l):u>i&&(n=Math.min(n,u),a.push(l),o=l)}return{vals:a,minDiff:n}};xy.roundUp=function(e,t,r){for(var n=0,i=t.length-1,a,o=0,s=r?0:1,l=r?1:0,u=r?Math.ceil:Math.floor;n<i&&o++<100;)a=u((n+i)/2),t[a]<=e?n=a+s:i=a-l;return t[n]};xy.sort=function(e,t){for(var r=0,n=0,i=1;i<e.length;i++){var a=t(e[i],e[i-1]);if(a<0?r=1:a>0&&(n=1),r&&n)return e.sort(t)}return n?e:e.reverse()};xy.findIndexOfMin=function(e,t){t=t||Iit;for(var r=1/0,n,i=0;i<e.length;i++){var a=t(e[i]);a<r&&(r=a,n=i)}return n}});var Z1=ye((Lir,die)=>{"use strict";die.exports=function(t){return Object.keys(t).sort()}});var vie=ye(dh=>{"use strict";var jS=Eo(),qit=vv().isArrayOrTypedArray;dh.aggNums=function(e,t,r,n){var i,a;if((!n||n>r.length)&&(n=r.length),jS(t)||(t=!1),qit(r[0])){for(a=new Array(n),i=0;i<n;i++)a[i]=dh.aggNums(e,t,r[i]);r=a}for(i=0;i<n;i++)jS(t)?jS(r[i])&&(t=e(+t,+r[i])):t=r[i];return t};dh.len=function(e){return dh.aggNums(function(t){return t+1},0,e)};dh.mean=function(e,t){return t||(t=dh.len(e)),dh.aggNums(function(r,n){return r+n},0,e)/t};dh.geometricMean=function(e,t){return t||(t=dh.len(e)),Math.pow(dh.aggNums(function(r,n){return r*n},1,e),1/t)};dh.midRange=function(e){if(!(e===void 0||e.length===0))return(dh.aggNums(Math.max,null,e)+dh.aggNums(Math.min,null,e))/2};dh.variance=function(e,t,r){return t||(t=dh.len(e)),jS(r)||(r=dh.mean(e,t)),dh.aggNums(function(n,i){return n+Math.pow(i-r,2)},0,e)/t};dh.stdev=function(e,t,r){return Math.sqrt(dh.variance(e,t,r))};dh.median=function(e){var t=e.slice().sort();return dh.interp(t,.5)};dh.interp=function(e,t){if(!jS(t))throw"n should be a finite number";if(t=t*e.length-.5,t<0)return e[0];if(t>e.length-1)return e[e.length-1];var r=t%1;return r*e[Math.ceil(t)]+(1-r)*e[Math.floor(t)]}});var _ie=ye((Iir,yie)=>{"use strict";var pie=n3(),jO=pie.mod,Bit=pie.modHalf,WS=Math.PI,Y1=2*WS;function Nit(e){return e/180*WS}function Uit(e){return e/WS*180}function WO(e){return Math.abs(e[1]-e[0])>Y1-1e-14}function gie(e,t){return Bit(t-e,Y1)}function Vit(e,t){return Math.abs(gie(e,t))}function mie(e,t){if(WO(t))return!0;var r,n;t[0]<t[1]?(r=t[0],n=t[1]):(r=t[1],n=t[0]),r=jO(r,Y1),n=jO(n,Y1),r>n&&(n+=Y1);var i=jO(e,Y1),a=i+Y1;return i>=r&&i<=n||a>=r&&a<=n}function Git(e,t,r,n){if(!mie(t,n))return!1;var i,a;return r[0]<r[1]?(i=r[0],a=r[1]):(i=r[1],a=r[0]),e>=i&&e<=a}function XO(e,t,r,n,i,a,o){i=i||0,a=a||0;var s=WO([r,n]),l,u,c,f,h;s?(l=0,u=WS,c=Y1):r<n?(l=r,c=n):(l=n,c=r),e<t?(f=e,h=t):(f=t,h=e);function d(p,k){return[p*Math.cos(k)+i,a-p*Math.sin(k)]}var v=Math.abs(c-l)<=WS?0:1;function _(p,k,E){return"A"+[p,p]+" "+[0,v,E]+" "+d(p,k)}var b;return s?f===null?b="M"+d(h,l)+_(h,u,0)+_(h,c,0)+"Z":b="M"+d(f,l)+_(f,u,0)+_(f,c,0)+"ZM"+d(h,l)+_(h,u,1)+_(h,c,1)+"Z":f===null?(b="M"+d(h,l)+_(h,c,0),o&&(b+="L0,0Z")):b="M"+d(f,l)+"L"+d(h,l)+_(h,c,0)+"L"+d(f,c)+_(f,l,1)+"Z",b}function Hit(e,t,r,n,i){return XO(null,e,t,r,n,i,0)}function jit(e,t,r,n,i){return XO(null,e,t,r,n,i,1)}function Wit(e,t,r,n,i,a){return XO(e,t,r,n,i,a,1)}yie.exports={deg2rad:Nit,rad2deg:Uit,angleDelta:gie,angleDist:Vit,isFullCircle:WO,isAngleInsideSector:mie,isPtInsideSector:Git,pathArc:Hit,pathSector:jit,pathAnnulus:Wit}});var xie=ye(hb=>{"use strict";hb.isLeftAnchor=function(t){return t.xanchor==="left"||t.xanchor==="auto"&&t.x<=1/3};hb.isCenterAnchor=function(t){return t.xanchor==="center"||t.xanchor==="auto"&&t.x>1/3&&t.x<2/3};hb.isRightAnchor=function(t){return t.xanchor==="right"||t.xanchor==="auto"&&t.x>=2/3};hb.isTopAnchor=function(t){return t.yanchor==="top"||t.yanchor==="auto"&&t.y>=2/3};hb.isMiddleAnchor=function(t){return t.yanchor==="middle"||t.yanchor==="auto"&&t.y>1/3&&t.y<2/3};hb.isBottomAnchor=function(t){return t.yanchor==="bottom"||t.yanchor==="auto"&&t.y<=1/3}});var Tie=ye(db=>{"use strict";var ZO=n3().mod;db.segmentsIntersect=wie;function wie(e,t,r,n,i,a,o,s){var l=r-e,u=i-e,c=o-i,f=n-t,h=a-t,d=s-a,v=l*d-c*f;if(v===0)return null;var _=(u*d-c*h)/v,b=(u*f-l*h)/v;return b<0||b>1||_<0||_>1?null:{x:e+l*_,y:t+f*_}}db.segmentDistance=function(t,r,n,i,a,o,s,l){if(wie(t,r,n,i,a,o,s,l))return 0;var u=n-t,c=i-r,f=s-a,h=l-o,d=u*u+c*c,v=f*f+h*h,_=Math.min(D6(u,c,d,a-t,o-r),D6(u,c,d,s-t,l-r),D6(f,h,v,t-a,r-o),D6(f,h,v,n-a,i-o));return Math.sqrt(_)};function D6(e,t,r,n,i){var a=n*e+i*t;if(a<0)return n*n+i*i;if(a>r){var o=n-e,s=i-t;return o*o+s*s}else{var l=n*t-i*e;return l*l/r}}var F6,YO,bie;db.getTextLocation=function(t,r,n,i){if((t!==YO||i!==bie)&&(F6={},YO=t,bie=i),F6[n])return F6[n];var a=t.getPointAtLength(ZO(n-i/2,r)),o=t.getPointAtLength(ZO(n+i/2,r)),s=Math.atan((o.y-a.y)/(o.x-a.x)),l=t.getPointAtLength(ZO(n,r)),u=(l.x*4+a.x+o.x)/6,c=(l.y*4+a.y+o.y)/6,f={x:u,y:c,theta:s};return F6[n]=f,f};db.clearLocationCache=function(){YO=null};db.getVisibleSegment=function(t,r,n){var i=r.left,a=r.right,o=r.top,s=r.bottom,l=0,u=t.getTotalLength(),c=u,f,h;function d(_){var b=t.getPointAtLength(_);_===0?f=b:_===u&&(h=b);var p=b.x<i?i-b.x:b.x>a?b.x-a:0,k=b.y<o?o-b.y:b.y>s?b.y-s:0;return Math.sqrt(p*p+k*k)}for(var v=d(l);v;){if(l+=v+n,l>c)return;v=d(l)}for(v=d(c);v;){if(c-=v+n,l>c)return;v=d(c)}return{min:l,max:c,len:c-l,total:u,isClosed:l===0&&c===u&&Math.abs(f.x-h.x)<.1&&Math.abs(f.y-h.y)<.1}};db.findPointOnPath=function(t,r,n,i){i=i||{};for(var a=i.pathLength||t.getTotalLength(),o=i.tolerance||.001,s=i.iterationLimit||30,l=t.getPointAtLength(0)[n]>t.getPointAtLength(a)[n]?-1:1,u=0,c=0,f=a,h,d,v;u<s;){if(h=(c+f)/2,d=t.getPointAtLength(h),v=d[n]-r,Math.abs(v)<o)return d;l*v>0?f=h:c=h,u++}return d}});var z6=ye(XS=>{"use strict";var by={};XS.throttle=function(t,r,n){var i=by[t],a=Date.now();if(!i){for(var o in by)by[o].ts<a-6e4&&delete by[o];i=by[t]={ts:0,timer:null}}Aie(i);function s(){n(),i.ts=Date.now(),i.onDone&&(i.onDone(),i.onDone=null)}if(a>i.ts+r){s();return}i.timer=setTimeout(function(){s(),i.timer=null},r)};XS.done=function(e){var t=by[e];return!t||!t.timer?Promise.resolve():new Promise(function(r){var n=t.onDone;t.onDone=function(){n&&n(),r(),t.onDone=null}})};XS.clear=function(e){if(e)Aie(by[e]),delete by[e];else for(var t in by)XS.clear(t)};function Aie(e){e&&e.timer!==null&&(clearTimeout(e.timer),e.timer=null)}});var Mie=ye((zir,Sie)=>{"use strict";Sie.exports=function(t){t._responsiveChartHandler&&(window.removeEventListener("resize",t._responsiveChartHandler),delete t._responsiveChartHandler)}});var Eie=ye((Oir,O6)=>{"use strict";O6.exports=KO;O6.exports.isMobile=KO;O6.exports.default=KO;var Xit=/(android|bb\d+|meego).+mobile|armv7l|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series[46]0|samsungbrowser.*mobile|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i,Zit=/CrOS/,Yit=/android|ipad|playbook|silk/i;function KO(e){e||(e={});let t=e.ua;if(!t&&typeof navigator!="undefined"&&(t=navigator.userAgent),t&&t.headers&&typeof t.headers["user-agent"]=="string"&&(t=t.headers["user-agent"]),typeof t!="string")return!1;let r=Xit.test(t)&&!Zit.test(t)||!!e.tablet&&Yit.test(t);return!r&&e.tablet&&e.featureDetect&&navigator&&navigator.maxTouchPoints>1&&t.indexOf("Macintosh")!==-1&&t.indexOf("Safari")!==-1&&(r=!0),r}});var Cie=ye((qir,kie)=>{"use strict";var Kit=Eo(),Jit=Eie();kie.exports=function(t){var r;if(t&&t.hasOwnProperty("userAgent")?r=t.userAgent:r=$it(),typeof r!="string")return!0;var n=Jit({ua:{headers:{"user-agent":r}},tablet:!0,featureDetect:!1});if(!n)for(var i=r.split(" "),a=1;a<i.length;a++){var o=i[a];if(o.indexOf("Safari")!==-1)for(var s=a-1;s>-1;s--){var l=i[s];if(l.slice(0,8)==="Version/"){var u=l.slice(8).split(".")[0];if(Kit(u)&&(u=+u),u>=13)return!0}}}return n};function $it(){var e;return typeof navigator!="undefined"&&(e=navigator.userAgent),e&&e.headers&&typeof e.headers["user-agent"]=="string"&&(e=e.headers["user-agent"]),e}});var Pie=ye((Bir,Lie)=>{"use strict";var Qit=Oa();Lie.exports=function(t,r,n){var i=t.selectAll("g."+n.replace(/\s/g,".")).data(r,function(o){return o[0].trace.uid});i.exit().remove(),i.enter().append("g").attr("class",n),i.order();var a=t.classed("rangeplot")?"nodeRangePlot3":"node3";return i.each(function(o){o[0][a]=Qit.select(this)}),i}});var Rie=ye((Nir,Iie)=>{"use strict";var ent=qa();Iie.exports=function(t,r){for(var n=t._context.locale,i=0;i<2;i++){for(var a=t._context.locales,o=0;o<2;o++){var s=(a[n]||{}).dictionary;if(s){var l=s[r];if(l)return l}a=ent.localeRegistry}var u=n.split("-")[0];if(u===n)break;n=u}return r}});var JO=ye((Uir,Die)=>{"use strict";Die.exports=function(t){for(var r={},n=[],i=0,a=0;a<t.length;a++){var o=t[a];r[o]!==1&&(r[o]=1,n[i++]=o)}return n}});var zie=ye((Vir,Fie)=>{"use strict";Fie.exports=function(t){for(var r=int(t)?rnt:tnt,n=[],i=0;i<t.length;i++){var a=t[i];r(a)&&n.push(a)}return n};function tnt(e){return e.visible===!0}function rnt(e){var t=e[0].trace;return t.visible===!0&&t._length!==0}function int(e){return Array.isArray(e)&&Array.isArray(e[0])&&e[0][0]&&e[0][0].trace}});var qie=ye((Gir,Oie)=>{"use strict";Oie.exports=function(t,r){if(!r)return t;var n=1/Math.abs(r),i=n>1?(n*t+n*r)/n:t+r,a=String(i).length;if(a>16){var o=String(r).length,s=String(t).length;if(a>=s+o){var l=parseFloat(i).toPrecision(12);l.indexOf("e+")===-1&&(i=+l)}}return i}});var Nie=ye((Hir,Bie)=>{"use strict";var nnt=Eo(),ant=fs().BADNUM,ont=/^['"%,$#\s']+|[, ]|['"%,$#\s']+$/g;Bie.exports=function(t){return typeof t=="string"&&(t=t.replace(ont,"")),nnt(t)?Number(t):ant}});var Dr=ye((jir,Qie)=>{"use strict";var ZS=Oa(),snt=r3().utcFormat,lnt=wO().format,Wie=Eo(),Xie=fs(),Zie=Xie.FP_SAFE,unt=-Zie,Uie=Xie.BADNUM,Mi=Qie.exports={};Mi.adjustFormat=function(t){return!t||/^\d[.]\df/.test(t)||/[.]\d%/.test(t)?t:t==="0.f"?"~f":/^\d%/.test(t)?"~%":/^\ds/.test(t)?"~s":!/^[~,.0$]/.test(t)&&/[&fps]/.test(t)?"~"+t:t};var Vie={};Mi.warnBadFormat=function(e){var t=String(e);Vie[t]||(Vie[t]=1,Mi.warn('encountered bad format: "'+t+'"'))};Mi.noFormat=function(e){return String(e)};Mi.numberFormat=function(e){var t;try{t=lnt(Mi.adjustFormat(e))}catch(r){return Mi.warnBadFormat(e),Mi.noFormat}return t};Mi.nestedProperty=PS();Mi.keyedContainer=Dee();Mi.relativeAttr=zee();Mi.isPlainObject=my();Mi.toLogRange=m6();Mi.relinkPrivateKeys=Nee();var K1=vv();Mi.isArrayBuffer=K1.isArrayBuffer;Mi.isTypedArray=K1.isTypedArray;Mi.isArrayOrTypedArray=K1.isArrayOrTypedArray;Mi.isArray1D=K1.isArray1D;Mi.ensureArray=K1.ensureArray;Mi.concat=K1.concat;Mi.maxRowLength=K1.maxRowLength;Mi.minRowLength=K1.minRowLength;var Yie=n3();Mi.mod=Yie.mod;Mi.modHalf=Yie.modHalf;var J1=ate();Mi.valObjectMeta=J1.valObjectMeta;Mi.coerce=J1.coerce;Mi.coerce2=J1.coerce2;Mi.coerceFont=J1.coerceFont;Mi.coercePattern=J1.coercePattern;Mi.coerceHoverinfo=J1.coerceHoverinfo;Mi.coerceSelectionMarkerOpacity=J1.coerceSelectionMarkerOpacity;Mi.validate=J1.validate;var jp=fie();Mi.dateTime2ms=jp.dateTime2ms;Mi.isDateTime=jp.isDateTime;Mi.ms2DateTime=jp.ms2DateTime;Mi.ms2DateTimeLocal=jp.ms2DateTimeLocal;Mi.cleanDate=jp.cleanDate;Mi.isJSDate=jp.isJSDate;Mi.formatDate=jp.formatDate;Mi.incrementMonth=jp.incrementMonth;Mi.dateTick0=jp.dateTick0;Mi.dfltRange=jp.dfltRange;Mi.findExactDates=jp.findExactDates;Mi.MIN_MS=jp.MIN_MS;Mi.MAX_MS=jp.MAX_MS;var vb=R6();Mi.findBin=vb.findBin;Mi.sorterAsc=vb.sorterAsc;Mi.sorterDes=vb.sorterDes;Mi.distinctVals=vb.distinctVals;Mi.roundUp=vb.roundUp;Mi.sort=vb.sort;Mi.findIndexOfMin=vb.findIndexOfMin;Mi.sortObjectKeys=Z1();var wy=vie();Mi.aggNums=wy.aggNums;Mi.len=wy.len;Mi.mean=wy.mean;Mi.geometricMean=wy.geometricMean;Mi.median=wy.median;Mi.midRange=wy.midRange;Mi.variance=wy.variance;Mi.stdev=wy.stdev;Mi.interp=wy.interp;var xg=T6();Mi.init2dArray=xg.init2dArray;Mi.transposeRagged=xg.transposeRagged;Mi.dot=xg.dot;Mi.translationMatrix=xg.translationMatrix;Mi.rotationMatrix=xg.rotationMatrix;Mi.rotationXYMatrix=xg.rotationXYMatrix;Mi.apply3DTransform=xg.apply3DTransform;Mi.apply2DTransform=xg.apply2DTransform;Mi.apply2DTransform2=xg.apply2DTransform2;Mi.convertCssMatrix=xg.convertCssMatrix;Mi.inverseTransformMatrix=xg.inverseTransformMatrix;var vm=_ie();Mi.deg2rad=vm.deg2rad;Mi.rad2deg=vm.rad2deg;Mi.angleDelta=vm.angleDelta;Mi.angleDist=vm.angleDist;Mi.isFullCircle=vm.isFullCircle;Mi.isAngleInsideSector=vm.isAngleInsideSector;Mi.isPtInsideSector=vm.isPtInsideSector;Mi.pathArc=vm.pathArc;Mi.pathSector=vm.pathSector;Mi.pathAnnulus=vm.pathAnnulus;var g3=xie();Mi.isLeftAnchor=g3.isLeftAnchor;Mi.isCenterAnchor=g3.isCenterAnchor;Mi.isRightAnchor=g3.isRightAnchor;Mi.isTopAnchor=g3.isTopAnchor;Mi.isMiddleAnchor=g3.isMiddleAnchor;Mi.isBottomAnchor=g3.isBottomAnchor;var m3=Tie();Mi.segmentsIntersect=m3.segmentsIntersect;Mi.segmentDistance=m3.segmentDistance;Mi.getTextLocation=m3.getTextLocation;Mi.clearLocationCache=m3.clearLocationCache;Mi.getVisibleSegment=m3.getVisibleSegment;Mi.findPointOnPath=m3.findPointOnPath;var N6=Ao();Mi.extendFlat=N6.extendFlat;Mi.extendDeep=N6.extendDeep;Mi.extendDeepAll=N6.extendDeepAll;Mi.extendDeepNoArrays=N6.extendDeepNoArrays;var $O=G1();Mi.log=$O.log;Mi.warn=$O.warn;Mi.error=$O.error;var cnt=o3();Mi.counterRegex=cnt.counter;var QO=z6();Mi.throttle=QO.throttle;Mi.throttleDone=QO.done;Mi.clearThrottle=QO.clear;var bg=NS();Mi.getGraphDiv=bg.getGraphDiv;Mi.isPlotDiv=bg.isPlotDiv;Mi.removeElement=bg.removeElement;Mi.addStyleRule=bg.addStyleRule;Mi.addRelatedStyleRule=bg.addRelatedStyleRule;Mi.deleteRelatedStyleRule=bg.deleteRelatedStyleRule;Mi.setStyleOnHover=bg.setStyleOnHover;Mi.getFullTransformMatrix=bg.getFullTransformMatrix;Mi.getElementTransformMatrix=bg.getElementTransformMatrix;Mi.getElementAndAncestors=bg.getElementAndAncestors;Mi.equalDomRects=bg.equalDomRects;Mi.clearResponsive=Mie();Mi.preserveDrawingBuffer=Cie();Mi.makeTraceGroups=Pie();Mi._=Rie();Mi.notifier=RO();Mi.filterUnique=JO();Mi.filterVisible=zie();Mi.pushUnique=zO();Mi.increment=qie();Mi.cleanNumber=Nie();Mi.ensureNumber=function(t){return Wie(t)?(t=Number(t),t>Zie||t<unt?Uie:t):Uie};Mi.isIndex=function(e,t){return t!==void 0&&e>=t?!1:Wie(e)&&e>=0&&e%1===0};Mi.noop=w6();Mi.identity=HS();Mi.repeat=function(e,t){for(var r=new Array(t),n=0;n<t;n++)r[n]=e;return r};Mi.swapAttrs=function(e,t,r,n){r||(r="x"),n||(n="y");for(var i=0;i<t.length;i++){var a=t[i],o=Mi.nestedProperty(e,a.replace("?",r)),s=Mi.nestedProperty(e,a.replace("?",n)),l=o.get();o.set(s.get()),s.set(l)}};Mi.raiseToTop=function(t){t.parentNode.appendChild(t)};Mi.cancelTransition=function(e){return e.transition().duration(0)};Mi.constrain=function(e,t,r){return t>r?Math.max(r,Math.min(t,e)):Math.max(t,Math.min(r,e))};Mi.bBoxIntersect=function(e,t,r){return r=r||0,e.left<=t.right+r&&t.left<=e.right+r&&e.top<=t.bottom+r&&t.top<=e.bottom+r};Mi.simpleMap=function(e,t,r,n,i){for(var a=e.length,o=new Array(a),s=0;s<a;s++)o[s]=t(e[s],r,n,i);return o};Mi.randstr=function e(t,r,n,i){if(n||(n=16),r===void 0&&(r=24),r<=0)return"0";var a=Math.log(Math.pow(2,r))/Math.log(n),o="",s,l,u;for(s=2;a===1/0;s*=2)a=Math.log(Math.pow(2,r/s))/Math.log(n)*s;var c=a-Math.floor(a);for(s=0;s<Math.floor(a);s++)u=Math.floor(Math.random()*n).toString(n),o=u+o;c&&(l=Math.pow(n,c),u=Math.floor(Math.random()*l).toString(n),o=u+o);var f=parseInt(o,n);return t&&t[o]||f!==1/0&&f>=Math.pow(2,r)?i>10?(Mi.warn("randstr failed uniqueness"),o):e(t,r,n,(i||0)+1):o};Mi.OptionControl=function(e,t){e||(e={}),t||(t="opt");var r={};return r.optionList=[],r._newoption=function(n){n[t]=e,r[n.name]=n,r.optionList.push(n)},r["_"+t]=e,r};Mi.smooth=function(e,t){if(t=Math.round(t)||0,t<2)return e;var r=e.length,n=2*r,i=2*t-1,a=new Array(i),o=new Array(r),s,l,u,c;for(s=0;s<i;s++)a[s]=(1-Math.cos(Math.PI*(s+1)/t))/(2*t);for(s=0;s<r;s++){for(c=0,l=0;l<i;l++)u=s+l+1-t,u<-r?u-=n*Math.round(u/n):u>=n&&(u-=n*Math.floor(u/n)),u<0?u=-1-u:u>=r&&(u=n-1-u),c+=e[u]*a[l];o[s]=c}return o};Mi.syncOrAsync=function(e,t,r){var n,i;function a(){return Mi.syncOrAsync(e,t,r)}for(;e.length;)if(i=e.splice(0,1)[0],n=i(t),n&&n.then)return n.then(a);return r&&r(t)};Mi.stripTrailingSlash=function(e){return e.slice(-1)==="/"?e.slice(0,-1):e};Mi.noneOrAll=function(e,t,r){if(e){var n=!1,i=!0,a,o;for(a=0;a<r.length;a++)o=e[r[a]],o!=null?n=!0:i=!1;if(n&&!i)for(a=0;a<r.length;a++)e[r[a]]=t[r[a]]}};Mi.mergeArray=function(e,t,r,n){var i=typeof n=="function";if(Mi.isArrayOrTypedArray(e))for(var a=Math.min(e.length,t.length),o=0;o<a;o++){var s=e[o];t[o][r]=i?n(s):s}};Mi.mergeArrayCastPositive=function(e,t,r){return Mi.mergeArray(e,t,r,function(n){var i=+n;return isFinite(i)&&i>0?i:0})};Mi.fillArray=function(e,t,r,n){if(n=n||Mi.identity,Mi.isArrayOrTypedArray(e))for(var i=0;i<t.length;i++)t[i][r]=n(e[i])};Mi.castOption=function(e,t,r,n){n=n||Mi.identity;var i=Mi.nestedProperty(e,r).get();return Mi.isArrayOrTypedArray(i)?Array.isArray(t)&&Mi.isArrayOrTypedArray(i[t[0]])?n(i[t[0]][t[1]]):n(i[t]):i};Mi.extractOption=function(e,t,r,n){if(r in e)return e[r];var i=Mi.nestedProperty(t,n).get();if(!Array.isArray(i))return i};function Kie(e){var t={};for(var r in e)for(var n=e[r],i=0;i<n.length;i++)t[n[i]]=+r;return t}Mi.tagSelected=function(e,t,r){var n=t.selectedpoints,i=t._indexToPoints,a;i&&(a=Kie(i));function o(f){return f!==void 0&&f<e.length}for(var s=0;s<n.length;s++){var l=n[s];if(Mi.isIndex(l)||Mi.isArrayOrTypedArray(l)&&Mi.isIndex(l[0])&&Mi.isIndex(l[1])){var u=a?a[l]:l,c=r?r[u]:u;o(c)&&(e[c].selected=1)}}};Mi.selIndices2selPoints=function(e){var t=e.selectedpoints,r=e._indexToPoints;if(r){for(var n=Kie(r),i=[],a=0;a<t.length;a++){var o=t[a];if(Mi.isIndex(o)){var s=n[o];Mi.isIndex(s)&&i.push(s)}}return i}else return t};Mi.getTargetArray=function(e,t){var r=t.target;if(typeof r=="string"&&r){var n=Mi.nestedProperty(e,r).get();return Mi.isArrayOrTypedArray(n)?n:!1}else if(Mi.isArrayOrTypedArray(r))return r;return!1};function Jie(e,t,r){var n={};typeof t!="object"&&(t={});var i=r==="pieLike"?-1:3,a=Object.keys(e),o,s,l;for(o=0;o<a.length;o++)s=a[o],l=e[s],!(s.charAt(0)==="_"||typeof l=="function")&&(s==="module"?n[s]=l:Array.isArray(l)?s==="colorscale"||i===-1?n[s]=l.slice():n[s]=l.slice(0,i):Mi.isTypedArray(l)?i===-1?n[s]=l.subarray():n[s]=l.subarray(0,i):l&&typeof l=="object"?n[s]=Jie(e[s],t[s],r):n[s]=l);for(a=Object.keys(t),o=0;o<a.length;o++)s=a[o],l=t[s],(typeof l!="object"||!(s in n)||typeof n[s]!="object")&&(n[s]=l);return n}Mi.minExtend=Jie;Mi.titleCase=function(e){return e.charAt(0).toUpperCase()+e.slice(1)};Mi.containsAny=function(e,t){for(var r=0;r<t.length;r++)if(e.indexOf(t[r])!==-1)return!0;return!1};var fnt=/Version\/[\d\.]+.*Safari/;Mi.isSafari=function(){return fnt.test(window.navigator.userAgent)};var hnt=/iPad|iPhone|iPod/;Mi.isIOS=function(){return hnt.test(window.navigator.userAgent)};var dnt=/Macintosh.+AppleWebKit.+Gecko\)$/;Mi.isMacWKWebView=()=>dnt.test(window.navigator.userAgent);var vnt=/Firefox\/(\d+)\.\d+/;Mi.getFirefoxVersion=function(){var e=vnt.exec(window.navigator.userAgent);if(e&&e.length===2){var t=parseInt(e[1]);if(!isNaN(t))return t}return null};Mi.isD3Selection=function(e){return e instanceof ZS.selection};Mi.ensureSingle=function(e,t,r,n){var i=e.select(t+(r?"."+r:""));if(i.size())return i;var a=e.append(t);return r&&a.classed(r,!0),n&&a.call(n),a};Mi.ensureSingleById=function(e,t,r,n){var i=e.select(t+"#"+r);if(i.size())return i;var a=e.append(t).attr("id",r);return n&&a.call(n),a};Mi.objectFromPath=function(e,t){for(var r=e.split("."),n,i=n={},a=0;a<r.length;a++){var o=r[a],s=null,l=r[a].match(/(.*)\[([0-9]+)\]/);l?(o=l[1],s=l[2],n=n[o]=[],a===r.length-1?n[s]=t:n[s]={},n=n[s]):(a===r.length-1?n[o]=t:n[o]={},n=n[o])}return i};var pnt=/^([^\[\.]+)\.(.+)?/,gnt=/^([^\.]+)\[([0-9]+)\](\.)?(.+)?/;function q6(e){return e.slice(0,2)==="__"}Mi.expandObjectPaths=function(e){var t,r,n,i,a,o,s;if(typeof e=="object"&&!Array.isArray(e)){for(r in e)if(e.hasOwnProperty(r))if(t=r.match(pnt)){if(i=e[r],n=t[1],q6(n))continue;delete e[r],e[n]=Mi.extendDeepNoArrays(e[n]||{},Mi.objectFromPath(r,Mi.expandObjectPaths(i))[n])}else if(t=r.match(gnt)){if(i=e[r],n=t[1],q6(n))continue;if(a=parseInt(t[2]),delete e[r],e[n]=e[n]||[],t[3]===".")s=t[4],o=e[n][a]=e[n][a]||{},Mi.extendDeepNoArrays(o,Mi.objectFromPath(s,Mi.expandObjectPaths(i)));else{if(q6(n))continue;e[n][a]=Mi.expandObjectPaths(i)}}else{if(q6(r))continue;e[r]=Mi.expandObjectPaths(e[r])}}return e};Mi.numSeparate=function(e,t,r){if(r||(r=!1),typeof t!="string"||t.length===0)throw new Error("Separator string required for formatting!");typeof e=="number"&&(e=String(e));var n=/(\d+)(\d{3})/,i=t.charAt(0),a=t.charAt(1),o=e.split("."),s=o[0],l=o.length>1?i+o[1]:"";if(a&&(o.length>1||s.length>4||r))for(;n.test(s);)s=s.replace(n,"$1"+a+"$2");return s+l};Mi.TEMPLATE_STRING_REGEX=/%{([^\s%{}:]*)([:|\|][^}]*)?}/g;var $ie=/^\w*$/;Mi.templateString=function(e,t){var r={};return e.replace(Mi.TEMPLATE_STRING_REGEX,function(n,i){var a;return $ie.test(i)?a=t[i]:(r[i]=r[i]||Mi.nestedProperty(t,i).get,a=r[i](!0)),a!==void 0?a:""})};var mnt={max:10,count:0,name:"hovertemplate"};Mi.hovertemplateString=e=>eq(q1(mg({},e),{opts:mnt}));var ynt={max:10,count:0,name:"texttemplate"};Mi.texttemplateString=e=>eq(q1(mg({},e),{opts:ynt}));var _nt=/^(\S+)([\*\/])(-?\d+(\.\d+)?)$/;function xnt(e){var t=e.match(_nt);return t?{key:t[1],op:t[2],number:Number(t[3])}:{key:e,op:null,number:null}}var bnt={max:10,count:0,name:"texttemplate",parseMultDiv:!0};Mi.texttemplateStringForShapes=e=>eq(q1(mg({},e),{opts:bnt}));var Gie=/^[:|\|]/;function eq({data:e=[],locale:t,fallback:r,labels:n={},opts:i,template:a}){return a.replace(Mi.TEMPLATE_STRING_REGEX,(o,s,l)=>{let u=["xother","yother"].includes(s),c=["_xother","_yother"].includes(s),f=["_xother_","_yother_"].includes(s),h=["xother_","yother_"].includes(s),d=u||c||h||f;(c||f)&&(s=s.substring(1)),(h||f)&&(s=s.substring(0,s.length-1));let v=null,_=null;if(i.parseMultDiv){var b=xnt(s);s=b.key,v=b.op,_=b.number}let p;if(d){if(n[s]===void 0)return"";p=n[s]}else for(let L of e)if(L){if(L.hasOwnProperty(s)){p=L[s];break}if($ie.test(s)||(p=Mi.nestedProperty(L,s).get(!0)),p!==void 0)break}if(p===void 0){let{count:L,max:x,name:C}=i,M=r===!1?o:r;return L<x&&Mi.warn([`Variable '${s}' in ${C} could not be found!`,"Please verify that the template is correct.",`Using value: '${M}'.`].join(" ")),L===x&&Mi.warn(`Too many '${C}' warnings - additional warnings will be suppressed.`),i.count++,M}if(v==="*"&&(p*=_),v==="/"&&(p/=_),l){var k;if(l[0]===":"&&(k=t?t.numberFormat:Mi.numberFormat,p!==""&&(p=k(l.replace(Gie,""))(p))),l[0]==="|"){k=t?t.timeFormat:snt;var E=Mi.dateTime2ms(p);p=Mi.formatDate(E,l.replace(Gie,""),!1,k)}}else{var S=s+"Label";n.hasOwnProperty(S)&&(p=n[S])}return d&&(p="("+p+")",(c||f)&&(p=" "+p),(h||f)&&(p=p+" ")),p})}var B6=48,Hie=57;Mi.subplotSort=function(e,t){for(var r=Math.min(e.length,t.length)+1,n=0,i=0,a=0;a<r;a++){var o=e.charCodeAt(a)||0,s=t.charCodeAt(a)||0,l=o>=B6&&o<=Hie,u=s>=B6&&s<=Hie;if(l&&(n=10*n+o-B6),u&&(i=10*i+s-B6),!l||!u){if(n!==i)return n-i;if(o!==s)return o-s}}return i-n};var p3=2e9;Mi.seedPseudoRandom=function(){p3=2e9};Mi.pseudoRandom=function(){var e=p3;return p3=(69069*p3+1)%4294967296,Math.abs(p3-e)<429496729?Mi.pseudoRandom():p3/4294967296};Mi.fillText=function(e,t,r){var n=Array.isArray(r)?function(o){r.push(o)}:function(o){r.text=o},i=Mi.extractOption(e,t,"htx","hovertext");if(Mi.isValidTextValue(i))return n(i);var a=Mi.extractOption(e,t,"tx","text");if(Mi.isValidTextValue(a))return n(a)};Mi.isValidTextValue=function(e){return e||e===0};Mi.formatPercent=function(e,t){t=t||0;for(var r=(Math.round(100*e*Math.pow(10,t))*Math.pow(.1,t)).toFixed(t)+"%",n=0;n<t;n++)r.indexOf(".")!==-1&&(r=r.replace("0%","%"),r=r.replace(".%","%"));return r};Mi.isHidden=function(e){var t=window.getComputedStyle(e).display;return!t||t==="none"};Mi.strTranslate=function(e,t){return e||t?"translate("+e+","+t+")":""};Mi.strRotate=function(e){return e?"rotate("+e+")":""};Mi.strScale=function(e){return e!==1?"scale("+e+")":""};Mi.getTextTransform=function(e){var t=e.noCenter,r=e.textX,n=e.textY,i=e.targetX,a=e.targetY,o=e.anchorX||0,s=e.anchorY||0,l=e.rotate,u=e.scale;return u?u>1&&(u=1):u=0,Mi.strTranslate(i-u*(r+o),a-u*(n+s))+Mi.strScale(u)+(l?"rotate("+l+(t?"":" "+r+" "+n)+")":"")};Mi.setTransormAndDisplay=function(e,t){e.attr("transform",Mi.getTextTransform(t)),e.style("display",t.scale?null:"none")};Mi.ensureUniformFontSize=function(e,t){var r=Mi.extendFlat({},t);return r.size=Math.max(t.size,e._fullLayout.uniformtext.minsize||0),r};Mi.join2=function(e,t,r){var n=e.length;return n>1?e.slice(0,-1).join(t)+r+e[n-1]:e.join(t)};Mi.bigFont=function(e){return Math.round(1.2*e)};var jie=Mi.getFirefoxVersion(),wnt=jie!==null&&jie<86;Mi.getPositionFromD3Event=function(){return wnt?[ZS.event.layerX,ZS.event.layerY]:[ZS.event.offsetX,ZS.event.offsetY]}});var rne=ye(()=>{"use strict";var Tnt=Dr(),ene={"X,X div":'direction:ltr;font-family:"Open Sans",verdana,arial,sans-serif;margin:0;padding:0;border:0;',"X input,X button":'font-family:"Open Sans",verdana,arial,sans-serif;',"X input:focus,X button:focus":"outline:none;","X a":"text-decoration:none;","X a:hover":"text-decoration:none;","X .crisp":"shape-rendering:crispEdges;","X .user-select-none":"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;","X svg a":"fill:#447adb;","X svg a:hover":"fill:#3c6dc5;","X .main-svg":"position:absolute;top:0;left:0;pointer-events:none;","X .main-svg .draglayer":"pointer-events:all;","X .cursor-default":"cursor:default;","X .cursor-pointer":"cursor:pointer;","X .cursor-crosshair":"cursor:crosshair;","X .cursor-move":"cursor:move;","X .cursor-col-resize":"cursor:col-resize;","X .cursor-row-resize":"cursor:row-resize;","X .cursor-ns-resize":"cursor:ns-resize;","X .cursor-ew-resize":"cursor:ew-resize;","X .cursor-sw-resize":"cursor:sw-resize;","X .cursor-s-resize":"cursor:s-resize;","X .cursor-se-resize":"cursor:se-resize;","X .cursor-w-resize":"cursor:w-resize;","X .cursor-e-resize":"cursor:e-resize;","X .cursor-nw-resize":"cursor:nw-resize;","X .cursor-n-resize":"cursor:n-resize;","X .cursor-ne-resize":"cursor:ne-resize;","X .cursor-grab":"cursor:-webkit-grab;cursor:grab;","X .modebar":"position:absolute;top:2px;right:2px;","X .ease-bg":"-webkit-transition:background-color .3s ease 0s;-moz-transition:background-color .3s ease 0s;-ms-transition:background-color .3s ease 0s;-o-transition:background-color .3s ease 0s;transition:background-color .3s ease 0s;","X .modebar--hover>:not(.watermark)":"opacity:0;-webkit-transition:opacity .3s ease 0s;-moz-transition:opacity .3s ease 0s;-ms-transition:opacity .3s ease 0s;-o-transition:opacity .3s ease 0s;transition:opacity .3s ease 0s;","X:hover .modebar--hover .modebar-group":"opacity:1;","X:focus-within .modebar--hover .modebar-group":"opacity:1;","X .modebar-group":"float:left;display:inline-block;box-sizing:border-box;padding-left:8px;position:relative;vertical-align:middle;white-space:nowrap;","X .modebar-group a":"display:grid;place-content:center;","X .modebar-btn":"position:relative;font-size:16px;padding:3px 4px;height:22px;cursor:pointer;line-height:normal;box-sizing:border-box;border:none;background:rgba(0,0,0,0);","X .modebar-btn svg":"position:relative;","X .modebar-btn:focus-visible":"outline:1px solid #000;outline-offset:1px;border-radius:3px;","X .modebar.vertical":"display:flex;flex-direction:column;flex-wrap:wrap;align-content:flex-end;max-height:100%;","X .modebar.vertical svg":"top:-1px;","X .modebar.vertical .modebar-group":"display:block;float:none;padding-left:0px;padding-bottom:8px;","X .modebar.vertical .modebar-group .modebar-btn":"display:block;text-align:center;","X [data-title]:before,X [data-title]:after":"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;","X [data-title]:hover:before,X [data-title]:hover:after":"display:block;opacity:1;","X [data-title]:before":'content:"";position:absolute;background:rgba(0,0,0,0);border:6px solid rgba(0,0,0,0);z-index:1002;margin-top:-12px;border-bottom-color:#69738a;margin-right:-6px;',"X [data-title]:after":"content:attr(data-title);background:#69738a;color:#fff;padding:8px 10px;font-size:12px;line-height:12px;white-space:nowrap;margin-right:-18px;border-radius:2px;","X .vertical [data-title]:before,X .vertical [data-title]:after":"top:0%;right:200%;","X .vertical [data-title]:before":"border:6px solid rgba(0,0,0,0);border-left-color:#69738a;margin-top:8px;margin-right:-30px;",Y:'font-family:"Open Sans",verdana,arial,sans-serif;position:fixed;top:50px;right:20px;z-index:10000;font-size:10pt;max-width:180px;',"Y p":"margin:0;","Y .notifier-note":"min-width:180px;max-width:250px;border:1px solid #fff;z-index:3000;margin:0;background-color:#8c97af;background-color:rgba(140,151,175,.9);color:#fff;padding:10px;overflow-wrap:break-word;word-wrap:break-word;-ms-hyphens:auto;-webkit-hyphens:auto;hyphens:auto;","Y .notifier-close":"color:#fff;opacity:.8;float:right;padding:0 5px;background:none;border:none;font-size:20px;font-weight:bold;line-height:20px;","Y .notifier-close:hover":"color:#444;text-decoration:none;cursor:pointer;"};for(tq in ene)tne=tq.replace(/^,/," ,").replace(/X/g,".js-plotly-plot .plotly").replace(/Y/g,".plotly-notifier"),Tnt.addStyleRule(tne,ene[tq]);var tne,tq});var rq=ye((Yir,ine)=>{ine.exports=!0});var nq=ye((Kir,nne)=>{"use strict";var Ant=rq(),iq;typeof window.matchMedia=="function"?iq=!window.matchMedia("(hover: none)").matches:iq=Ant;nne.exports=iq});var pb=ye((Jir,aq)=>{"use strict";var y3=typeof Reflect=="object"?Reflect:null,ane=y3&&typeof y3.apply=="function"?y3.apply:function(t,r,n){return Function.prototype.apply.call(t,r,n)},U6;y3&&typeof y3.ownKeys=="function"?U6=y3.ownKeys:Object.getOwnPropertySymbols?U6=function(t){return Object.getOwnPropertyNames(t).concat(Object.getOwnPropertySymbols(t))}:U6=function(t){return Object.getOwnPropertyNames(t)};function Snt(e){console&&console.warn&&console.warn(e)}var sne=Number.isNaN||function(t){return t!==t};function Jc(){Jc.init.call(this)}aq.exports=Jc;aq.exports.once=Cnt;Jc.EventEmitter=Jc;Jc.prototype._events=void 0;Jc.prototype._eventsCount=0;Jc.prototype._maxListeners=void 0;var one=10;function V6(e){if(typeof e!="function")throw new TypeError('The "listener" argument must be of type Function. Received type '+typeof e)}Object.defineProperty(Jc,"defaultMaxListeners",{enumerable:!0,get:function(){return one},set:function(e){if(typeof e!="number"||e<0||sne(e))throw new RangeError('The value of "defaultMaxListeners" is out of range. It must be a non-negative number. Received '+e+".");one=e}});Jc.init=function(){(this._events===void 0||this._events===Object.getPrototypeOf(this)._events)&&(this._events=Object.create(null),this._eventsCount=0),this._maxListeners=this._maxListeners||void 0};Jc.prototype.setMaxListeners=function(t){if(typeof t!="number"||t<0||sne(t))throw new RangeError('The value of "n" is out of range. It must be a non-negative number. Received '+t+".");return this._maxListeners=t,this};function lne(e){return e._maxListeners===void 0?Jc.defaultMaxListeners:e._maxListeners}Jc.prototype.getMaxListeners=function(){return lne(this)};Jc.prototype.emit=function(t){for(var r=[],n=1;n<arguments.length;n++)r.push(arguments[n]);var i=t==="error",a=this._events;if(a!==void 0)i=i&&a.error===void 0;else if(!i)return!1;if(i){var o;if(r.length>0&&(o=r[0]),o instanceof Error)throw o;var s=new Error("Unhandled error."+(o?" ("+o.message+")":""));throw s.context=o,s}var l=a[t];if(l===void 0)return!1;if(typeof l=="function")ane(l,this,r);else for(var u=l.length,c=dne(l,u),n=0;n<u;++n)ane(c[n],this,r);return!0};function une(e,t,r,n){var i,a,o;if(V6(r),a=e._events,a===void 0?(a=e._events=Object.create(null),e._eventsCount=0):(a.newListener!==void 0&&(e.emit("newListener",t,r.listener?r.listener:r),a=e._events),o=a[t]),o===void 0)o=a[t]=r,++e._eventsCount;else if(typeof o=="function"?o=a[t]=n?[r,o]:[o,r]:n?o.unshift(r):o.push(r),i=lne(e),i>0&&o.length>i&&!o.warned){o.warned=!0;var s=new Error("Possible EventEmitter memory leak detected. "+o.length+" "+String(t)+" listeners added. Use emitter.setMaxListeners() to increase limit");s.name="MaxListenersExceededWarning",s.emitter=e,s.type=t,s.count=o.length,Snt(s)}return e}Jc.prototype.addListener=function(t,r){return une(this,t,r,!1)};Jc.prototype.on=Jc.prototype.addListener;Jc.prototype.prependListener=function(t,r){return une(this,t,r,!0)};function Mnt(){if(!this.fired)return this.target.removeListener(this.type,this.wrapFn),this.fired=!0,arguments.length===0?this.listener.call(this.target):this.listener.apply(this.target,arguments)}function cne(e,t,r){var n={fired:!1,wrapFn:void 0,target:e,type:t,listener:r},i=Mnt.bind(n);return i.listener=r,n.wrapFn=i,i}Jc.prototype.once=function(t,r){return V6(r),this.on(t,cne(this,t,r)),this};Jc.prototype.prependOnceListener=function(t,r){return V6(r),this.prependListener(t,cne(this,t,r)),this};Jc.prototype.removeListener=function(t,r){var n,i,a,o,s;if(V6(r),i=this._events,i===void 0)return this;if(n=i[t],n===void 0)return this;if(n===r||n.listener===r)--this._eventsCount===0?this._events=Object.create(null):(delete i[t],i.removeListener&&this.emit("removeListener",t,n.listener||r));else if(typeof n!="function"){for(a=-1,o=n.length-1;o>=0;o--)if(n[o]===r||n[o].listener===r){s=n[o].listener,a=o;break}if(a<0)return this;a===0?n.shift():Ent(n,a),n.length===1&&(i[t]=n[0]),i.removeListener!==void 0&&this.emit("removeListener",t,s||r)}return this};Jc.prototype.off=Jc.prototype.removeListener;Jc.prototype.removeAllListeners=function(t){var r,n,i;if(n=this._events,n===void 0)return this;if(n.removeListener===void 0)return arguments.length===0?(this._events=Object.create(null),this._eventsCount=0):n[t]!==void 0&&(--this._eventsCount===0?this._events=Object.create(null):delete n[t]),this;if(arguments.length===0){var a=Object.keys(n),o;for(i=0;i<a.length;++i)o=a[i],o!=="removeListener"&&this.removeAllListeners(o);return this.removeAllListeners("removeListener"),this._events=Object.create(null),this._eventsCount=0,this}if(r=n[t],typeof r=="function")this.removeListener(t,r);else if(r!==void 0)for(i=r.length-1;i>=0;i--)this.removeListener(t,r[i]);return this};function fne(e,t,r){var n=e._events;if(n===void 0)return[];var i=n[t];return i===void 0?[]:typeof i=="function"?r?[i.listener||i]:[i]:r?knt(i):dne(i,i.length)}Jc.prototype.listeners=function(t){return fne(this,t,!0)};Jc.prototype.rawListeners=function(t){return fne(this,t,!1)};Jc.listenerCount=function(e,t){return typeof e.listenerCount=="function"?e.listenerCount(t):hne.call(e,t)};Jc.prototype.listenerCount=hne;function hne(e){var t=this._events;if(t!==void 0){var r=t[e];if(typeof r=="function")return 1;if(r!==void 0)return r.length}return 0}Jc.prototype.eventNames=function(){return this._eventsCount>0?U6(this._events):[]};function dne(e,t){for(var r=new Array(t),n=0;n<t;++n)r[n]=e[n];return r}function Ent(e,t){for(;t+1<e.length;t++)e[t]=e[t+1];e.pop()}function knt(e){for(var t=new Array(e.length),r=0;r<t.length;++r)t[r]=e[r].listener||e[r];return t}function Cnt(e,t){return new Promise(function(r,n){function i(o){e.removeListener(t,a),n(o)}function a(){typeof e.removeListener=="function"&&e.removeListener("error",i),r([].slice.call(arguments))}vne(e,t,a,{once:!0}),t!=="error"&&Lnt(e,i,{once:!0})})}function Lnt(e,t,r){typeof e.on=="function"&&vne(e,"error",t,r)}function vne(e,t,r,n){if(typeof e.on=="function")n.once?e.once(t,r):e.on(t,r);else if(typeof e.addEventListener=="function")e.addEventListener(t,function i(a){n.once&&e.removeEventListener(t,i),r(a)});else throw new TypeError('The "emitter" argument must be of type EventEmitter. Received type '+typeof e)}});var _3=ye(($ir,pne)=>{"use strict";var oq=pb().EventEmitter,Pnt={init:function(e){if(e._ev instanceof oq)return e;var t=new oq,r=new oq;return e._ev=t,e._internalEv=r,e.on=t.on.bind(t),e.once=t.once.bind(t),e.removeListener=t.removeListener.bind(t),e.removeAllListeners=t.removeAllListeners.bind(t),e._internalOn=r.on.bind(r),e._internalOnce=r.once.bind(r),e._removeInternalListener=r.removeListener.bind(r),e._removeAllInternalListeners=r.removeAllListeners.bind(r),e.emit=function(n,i){t.emit(n,i),r.emit(n,i)},typeof e.addEventListener=="function"&&e.addEventListener("wheel",()=>{},{passive:!0}),e},triggerHandler:function(e,t,r){var n,i=e._ev;if(!i)return;var a=i._events[t];if(!a)return;function o(l){if(l.listener){if(i.removeListener(t,l.listener),!l.fired)return l.fired=!0,l.listener.apply(i,[r])}else return l.apply(i,[r])}a=Array.isArray(a)?a:[a];var s;for(s=0;s<a.length-1;s++)o(a[s]);return n=o(a[s]),n},purge:function(e){return delete e._ev,delete e.on,delete e.once,delete e.removeListener,delete e.removeAllListeners,delete e.emit,delete e._ev,delete e._internalEv,delete e._internalOn,delete e._internalOnce,delete e._removeInternalListener,delete e._removeAllInternalListeners,e}};pne.exports=Pnt});var yne=ye((Qir,mne)=>{"use strict";var gne=Dr(),Int=cb().dfltConfig;function Rnt(e,t){for(var r=[],n,i=0;i<t.length;i++)n=t[i],n===e?r[i]=n:typeof n=="object"?r[i]=Array.isArray(n)?gne.extendDeep([],n):gne.extendDeepAll({},n):r[i]=n;return r}var Ty={};Ty.add=function(e,t,r,n,i){var a,o;if(e.undoQueue=e.undoQueue||{index:0,queue:[],sequence:!1},o=e.undoQueue.index,e.autoplay){e.undoQueue.inSequence||(e.autoplay=!1);return}!e.undoQueue.sequence||e.undoQueue.beginSequence?(a={undo:{calls:[],args:[]},redo:{calls:[],args:[]}},e.undoQueue.queue.splice(o,e.undoQueue.queue.length-o,a),e.undoQueue.index+=1):a=e.undoQueue.queue[o-1],e.undoQueue.beginSequence=!1,a&&(a.undo.calls.unshift(t),a.undo.args.unshift(r),a.redo.calls.push(n),a.redo.args.push(i)),e.undoQueue.queue.length>Int.queueLength&&(e.undoQueue.queue.shift(),e.undoQueue.index--)};Ty.startSequence=function(e){e.undoQueue=e.undoQueue||{index:0,queue:[],sequence:!1},e.undoQueue.sequence=!0,e.undoQueue.beginSequence=!0};Ty.stopSequence=function(e){e.undoQueue=e.undoQueue||{index:0,queue:[],sequence:!1},e.undoQueue.sequence=!1,e.undoQueue.beginSequence=!1};Ty.undo=function(t){var r,n;if(!(t.undoQueue===void 0||isNaN(t.undoQueue.index)||t.undoQueue.index<=0)){for(t.undoQueue.index--,r=t.undoQueue.queue[t.undoQueue.index],t.undoQueue.inSequence=!0,n=0;n<r.undo.calls.length;n++)Ty.plotDo(t,r.undo.calls[n],r.undo.args[n]);t.undoQueue.inSequence=!1,t.autoplay=!1}};Ty.redo=function(t){var r,n;if(!(t.undoQueue===void 0||isNaN(t.undoQueue.index)||t.undoQueue.index>=t.undoQueue.queue.length)){for(r=t.undoQueue.queue[t.undoQueue.index],t.undoQueue.inSequence=!0,n=0;n<r.redo.calls.length;n++)Ty.plotDo(t,r.redo.calls[n],r.redo.args[n]);t.undoQueue.inSequence=!1,t.autoplay=!1,t.undoQueue.index++}};Ty.plotDo=function(e,t,r){e.autoplay=!0,r=Rnt(e,r),t.apply(null,r)};mne.exports=Ty});var sq=ye((enr,_ne)=>{"use strict";_ne.exports={_isLinkedToArray:"frames_entry",group:{valType:"string"},name:{valType:"string"},traces:{valType:"any"},baseframe:{valType:"string"},data:{valType:"any"},layout:{valType:"any"}}});var w3=ye(Jh=>{"use strict";var _0=qa(),YS=Dr(),H6=Gl(),lq=c3(),Dnt=sq(),Fnt=US(),znt=cb().configAttributes,xne=mc(),wg=YS.extendDeepAll,x3=YS.isPlainObject,Ont=YS.isArrayOrTypedArray,j6=YS.nestedProperty,qnt=YS.valObjectMeta,uq="_isSubplotObj",W6="_isLinkedToArray",Bnt="_arrayAttrRegexps",wne="_deprecated",cq=[uq,W6,Bnt,wne];Jh.IS_SUBPLOT_OBJ=uq;Jh.IS_LINKED_TO_ARRAY=W6;Jh.DEPRECATED=wne;Jh.UNDERSCORE_ATTRS=cq;Jh.get=function(){var e={};return _0.allTypes.forEach(function(t){e[t]=Unt(t)}),{defs:{valObjects:qnt,metaKeys:cq.concat(["description","role","editType","impliedEdits"]),editType:{traces:xne.traces,layout:xne.layout},impliedEdits:{}},traces:e,layout:Vnt(),frames:Gnt(),animation:b3(Fnt),config:b3(znt)}};Jh.crawl=function(e,t,r,n){var i=r||0;n=n||"",Object.keys(e).forEach(function(a){var o=e[a];if(cq.indexOf(a)===-1){var s=(n?n+".":"")+a;t(o,a,e,i,s),!Jh.isValObject(o)&&x3(o)&&a!=="impliedEdits"&&Jh.crawl(o,t,i+1,s)}})};Jh.isValObject=function(e){return e&&e.valType!==void 0};Jh.findArrayAttributes=function(e){var t=[],r=[],n=[],i,a;function o(l,u,c,f){r=r.slice(0,f).concat([u]),n=n.slice(0,f).concat([l&&l._isLinkedToArray]);var h=l&&(l.valType==="data_array"||l.arrayOk===!0)&&!(r[f-1]==="colorbar"&&(u==="ticktext"||u==="tickvals"));h&&s(i,0,"")}function s(l,u,c){var f=l[r[u]],h=c+r[u];if(u===r.length-1)Ont(f)&&t.push(a+h);else if(n[u]){if(Array.isArray(f))for(var d=0;d<f.length;d++)x3(f[d])&&s(f[d],u+1,h+"["+d+"].")}else x3(f)&&s(f,u+1,h+".")}return i=e,a="",Jh.crawl(H6,o),e._module&&e._module.attributes&&Jh.crawl(e._module.attributes,o),t};Jh.getTraceValObject=function(e,t){var r=t[0],n=1,i,a,o=e._module;if(o||(o=(_0.modules[e.type||H6.type.dflt]||{})._module),!o)return!1;if(i=o.attributes,a=i&&i[r],!a){var s=o.basePlotModule;s&&s.attributes&&(a=s.attributes[r])}return a||(a=H6[r]),Tne(a,t,n)};Jh.getLayoutValObject=function(e,t){var r=Nnt(e,t[0]);return Tne(r,t,1)};function Nnt(e,t){var r,n,i,a,o=e._basePlotModules;if(o){var s;for(r=0;r<o.length;r++){if(i=o[r],i.attrRegex&&i.attrRegex.test(t)){if(i.layoutAttrOverrides)return i.layoutAttrOverrides;!s&&i.layoutAttributes&&(s=i.layoutAttributes)}var l=i.baseLayoutAttrOverrides;if(l&&t in l)return l[t]}if(s)return s}var u=e._modules;if(u){for(r=0;r<u.length;r++)if(a=u[r].layoutAttributes,a&&t in a)return a[t]}for(n in _0.componentsRegistry){if(i=_0.componentsRegistry[n],i.name==="colorscale"&&t.indexOf("coloraxis")===0)return i.layoutAttributes[t];if(!i.schema&&t===i.name)return i.layoutAttributes}return t in lq?lq[t]:!1}function Tne(e,t,r){if(!e)return!1;if(e._isLinkedToArray){if(G6(t[r]))r++;else if(r<t.length)return!1}for(;r<t.length;r++){var n=e[t[r]];if(x3(n))e=n;else break;if(r===t.length-1)break;if(e._isLinkedToArray){if(r++,!G6(t[r]))return!1}else if(e.valType==="info_array"){r++;var i=t[r];if(!G6(i))return!1;var a=e.items;if(Array.isArray(a)){if(i>=a.length)return!1;if(e.dimensions===2){if(r++,t.length===r)return e;var o=t[r];if(!G6(o))return!1;e=a[i][o]}else e=a[i]}else e=a}}return e}function G6(e){return e===Math.round(e)&&e>=0}function Unt(e){var t,r;t=_0.modules[e]._module,r=t.basePlotModule;var n={};n.type=null;var i=wg({},H6),a=wg({},t.attributes);Jh.crawl(a,function(l,u,c,f,h){j6(i,h).set(void 0),l===void 0&&j6(a,h).set(void 0)}),wg(n,i),_0.traceIs(e,"noOpacity")&&delete n.opacity,_0.traceIs(e,"showLegend")||(delete n.showlegend,delete n.legendgroup),_0.traceIs(e,"noHover")&&(delete n.hoverinfo,delete n.hoverlabel),t.selectPoints||delete n.selectedpoints,wg(n,a),r.attributes&&wg(n,r.attributes),n.type=e;var o={meta:t.meta||{},categories:t.categories||{},animatable:!!t.animatable,type:e,attributes:b3(n)};if(t.layoutAttributes){var s={};wg(s,t.layoutAttributes),o.layoutAttributes=b3(s)}return t.animatable||Jh.crawl(o,function(l){Jh.isValObject(l)&&"anim"in l&&delete l.anim}),o}function Vnt(){var e={},t,r;wg(e,lq);for(t in _0.subplotsRegistry)if(r=_0.subplotsRegistry[t],!!r.layoutAttributes)if(Array.isArray(r.attr))for(var n=0;n<r.attr.length;n++)bne(e,r,r.attr[n]);else{var i=r.attr==="subplot"?r.name:r.attr;bne(e,r,i)}for(t in _0.componentsRegistry){r=_0.componentsRegistry[t];var a=r.schema;if(a&&(a.subplots||a.layout)){var o=a.subplots;if(o&&o.xaxis&&!o.yaxis)for(var s in o.xaxis)delete e.yaxis[s];delete e.xaxis.shift,delete e.xaxis.autoshift}else r.name==="colorscale"?wg(e,r.layoutAttributes):r.layoutAttributes&&Xnt(e,r.layoutAttributes,r.name)}return{layoutAttributes:b3(e)}}function Gnt(){var e={frames:wg({},Dnt)};return b3(e),e.frames}function b3(e){return Hnt(e),jnt(e),Wnt(e),e}function Hnt(e){function t(n){return{valType:"string",editType:"none"}}function r(n,i,a){Jh.isValObject(n)?(n.arrayOk===!0||n.valType==="data_array")&&(a[i+"src"]=t(i)):x3(n)&&(n.role="object")}Jh.crawl(e,r)}function jnt(e){function t(r,n,i){if(r){var a=r[W6];a&&(delete r[W6],i[n]={items:{}},i[n].items[a]=r,i[n].role="object")}}Jh.crawl(e,t)}function Wnt(e){function t(r){for(var n in r)if(x3(r[n]))t(r[n]);else if(Array.isArray(r[n]))for(var i=0;i<r[n].length;i++)t(r[n][i]);else r[n]instanceof RegExp&&(r[n]=r[n].toString())}t(e)}function bne(e,t,r){var n=j6(e,r),i=wg({},t.layoutAttributes);i[uq]=!0,n.set(i)}function Xnt(e,t,r){var n=j6(e,r);n.set(wg(n.get()||{},t))}});var vl=ye(gb=>{"use strict";var T3=Dr(),Znt=Gl(),$1="templateitemname",fq={name:{valType:"string",editType:"none"}};fq[$1]={valType:"string",editType:"calc"};gb.templatedArray=function(e,t){return t._isLinkedToArray=e,t.name=fq.name,t[$1]=fq[$1],t};gb.traceTemplater=function(e){var t={},r,n;for(r in e)n=e[r],Array.isArray(n)&&n.length&&(t[r]=0);function i(a){r=T3.coerce(a,{},Znt,"type");var o={type:r,_template:null};if(r in t){n=e[r];var s=t[r]%n.length;t[r]++,o._template=n[s]}return o}return{newTrace:i}};gb.newContainer=function(e,t,r){var n=e._template,i=n&&(n[t]||r&&n[r]);T3.isPlainObject(i)||(i=null);var a=e[t]={_template:i};return a};gb.arrayTemplater=function(e,t,r){var n=e._template,i=n&&n[Sne(t)],a=n&&n[t];(!Array.isArray(a)||!a.length)&&(a=[]);var o={};function s(u){var c={name:u.name,_input:u},f=c[$1]=u[$1];if(!Ane(f))return c._template=i,c;for(var h=0;h<a.length;h++){var d=a[h];if(d.name===f)return o[f]=1,c._template=d,c}return c[r]=u[r]||!1,c._template=!1,c}function l(){for(var u=[],c=0;c<a.length;c++){var f=a[c],h=f.name;if(Ane(h)&&!o[h]){var d={_template:f,name:h,_input:{_templateitemname:h}};d[$1]=f[$1],u.push(d),o[h]=1}}return u}return{newItem:s,defaultItems:l}};function Ane(e){return e&&typeof e=="string"}function Sne(e){var t=e.length-1;return e.charAt(t)!=="s"&&T3.warn("bad argument to arrayDefaultKey: "+e),e.slice(0,-1)+"defaults"}gb.arrayDefaultKey=Sne;gb.arrayEditor=function(e,t,r){var n=(T3.nestedProperty(e,t).get()||[]).length,i=r._index,a=i>=n&&(r._input||{})._templateitemname;a&&(i=n);var o=t+"["+i+"]",s;function l(){s={},a&&(s[o]={},s[o][$1]=a)}l();function u(d,v){s[d]=v}function c(d,v){a?T3.nestedProperty(s[o],d).set(v):s[o+"."+d]=v}function f(){var d=s;return l(),d}function h(d,v){d&&c(d,v);var _=f();for(var b in _)T3.nestedProperty(e,b).set(_[b])}return{modifyBase:u,modifyItem:c,getUpdateObj:f,applyUpdate:h}}});var hd=ye((inr,Mne)=>{"use strict";var KS=o3().counter;Mne.exports={idRegex:{x:KS("x","( domain)?"),y:KS("y","( domain)?")},attrRegex:KS("[xy]axis"),xAxisMatch:KS("xaxis"),yAxisMatch:KS("yaxis"),AX_ID_PATTERN:/^[xyz][0-9]*( domain)?$/,AX_NAME_PATTERN:/^[xyz]axis[0-9]*$/,SUBPLOT_PATTERN:/^x([0-9]*)y([0-9]*)$/,HOUR_PATTERN:"hour",WEEKDAY_PATTERN:"day of week",MINDRAG:8,MINZOOM:20,DRAGGERSIZE:20,REDRAWDELAY:50,DFLTRANGEX:[-1,6],DFLTRANGEY:[-1,4],traceLayerClasses:["imagelayer","heatmaplayer","contourcarpetlayer","contourlayer","funnellayer","waterfalllayer","barlayer","carpetlayer","violinlayer","boxlayer","ohlclayer","scattercarpetlayer","scatterlayer"],clipOnAxisFalseQuery:[".scatterlayer",".barlayer",".funnellayer",".waterfalllayer"],layerValue2layerClass:{"above traces":"above","below traces":"below"},zindexSeparator:"z"}});var hf=ye(Ap=>{"use strict";var Ynt=qa(),hq=hd();Ap.id2name=function(t){if(!(typeof t!="string"||!t.match(hq.AX_ID_PATTERN))){var r=t.split(" ")[0].slice(1);return r==="1"&&(r=""),t.charAt(0)+"axis"+r}};Ap.name2id=function(t){if(t.match(hq.AX_NAME_PATTERN)){var r=t.slice(5);return r==="1"&&(r=""),t.charAt(0)+r}};Ap.cleanId=function(t,r,n){var i=/( domain)$/.test(t);if(!(typeof t!="string"||!t.match(hq.AX_ID_PATTERN))&&!(r&&t.charAt(0)!==r)&&!(i&&!n)){var a=t.split(" ")[0].slice(1).replace(/^0+/,"");return a==="1"&&(a=""),t.charAt(0)+a+(i&&n?" domain":"")}};Ap.list=function(e,t,r){var n=e._fullLayout;if(!n)return[];var i=Ap.listIds(e,t),a=new Array(i.length),o;for(o=0;o<i.length;o++){var s=i[o];a[o]=n[s.charAt(0)+"axis"+s.slice(1)]}if(!r){var l=n._subplots.gl3d||[];for(o=0;o<l.length;o++){var u=n[l[o]];t?a.push(u[t+"axis"]):a.push(u.xaxis,u.yaxis,u.zaxis)}}return a};Ap.listIds=function(e,t){var r=e._fullLayout;if(!r)return[];var n=r._subplots;return t?n[t+"axis"]:n.xaxis.concat(n.yaxis)};Ap.getFromId=function(e,t,r){var n=e._fullLayout;return t=t===void 0||typeof t!="string"?t:t.replace(" domain",""),r==="x"?t=t.replace(/y[0-9]*/,""):r==="y"&&(t=t.replace(/x[0-9]*/,"")),n[Ap.id2name(t)]};Ap.getFromTrace=function(e,t,r){var n=e._fullLayout,i=null;if(Ynt.traceIs(t,"gl3d")){var a=t.scene;a.slice(0,5)==="scene"&&(i=n[a][r+"axis"])}else i=Ap.getFromId(e,t[r+"axis"]||r);return i};Ap.idSort=function(e,t){var r=e.charAt(0),n=t.charAt(0);return r!==n?r>n?1:-1:+(e.slice(1)||1)-+(t.slice(1)||1)};Ap.ref2id=function(e){return/^[xyz]/.test(e)?e.split(" ")[0]:!1};function Ene(e,t){if(t&&t.length){for(var r=0;r<t.length;r++)if(t[r][e])return!0}return!1}Ap.isLinked=function(e,t){return Ene(t,e._axisMatchGroups)||Ene(t,e._axisConstraintGroups)}});var Q1=ye((anr,kne)=>{"use strict";function Knt(e){var t=e._fullLayout._zoomlayer;t&&t.selectAll(".outline-controllers").remove()}function Jnt(e){var t=e._fullLayout._zoomlayer;t&&t.selectAll(".select-outline").remove(),e._fullLayout._outlining=!1}kne.exports={clearOutlineControllers:Knt,clearOutline:Jnt}});var X6=ye((onr,Cne)=>{"use strict";Cne.exports={scattermode:{valType:"enumerated",values:["group","overlay"],dflt:"overlay",editType:"calc"},scattergap:{valType:"number",min:0,max:1,editType:"calc"}}});var Id=ye(Y6=>{"use strict";var Z6=qa(),snr=hd().SUBPLOT_PATTERN;Y6.getSubplotCalcData=function(e,t,r){var n=Z6.subplotsRegistry[t];if(!n)return[];for(var i=n.attr,a=[],o=0;o<e.length;o++){var s=e[o],l=s[0].trace;l[i]===r&&a.push(s)}return a};Y6.getModuleCalcData=function(e,t,r){var n=[],i=[],a;if(typeof t=="string"?a=Z6.getModule(t).plot:typeof t=="function"?a=t:a=t.plot,!a)return[n,e];for(var o=r,s=0;s<e.length;s++){var l=e[s],u=l[0].trace,c=u.zorder!==void 0;u.visible!==!0||u._length===0||(u._module&&u._module.plot===a&&(!c||u.zorder===o)?n.push(l):i.push(l))}return[n,i]};Y6.getSubplotData=function(t,r,n){if(!Z6.subplotsRegistry[r])return[];for(var i=Z6.subplotsRegistry[r].attr,a=[],o,s,l,u=0;u<t.length;u++)o=t[u],o[i]===n&&a.push(o);return a}});var Rne=ye(mb=>{"use strict";var $nt=qa(),A3=Dr();mb.manageCommandObserver=function(e,t,r,n){var i={},a=!0;t&&t._commandObserver&&(i=t._commandObserver),i.cache||(i.cache={}),i.lookupTable={};var o=mb.hasSimpleAPICommandBindings(e,r,i.lookupTable);if(t&&t._commandObserver){if(o)return i;if(t._commandObserver.remove)return t._commandObserver.remove(),t._commandObserver=null,i}if(o){Lne(e,o,i.cache),i.check=function(){if(a){var c=Lne(e,o,i.cache);return c.changed&&n&&i.lookupTable[c.value]!==void 0&&(i.disable(),Promise.resolve(n({value:c.value,type:o.type,prop:o.prop,traces:o.traces,index:i.lookupTable[c.value]})).then(i.enable,i.enable)),c.changed}};for(var s=["plotly_relayout","plotly_redraw","plotly_restyle","plotly_update","plotly_animatingframe","plotly_afterplot"],l=0;l<s.length;l++)e._internalOn(s[l],i.check);i.remove=function(){for(var u=0;u<s.length;u++)e._removeInternalListener(s[u],i.check)}}else A3.log("Unable to automatically bind plot updates to API command"),i.lookupTable={},i.remove=function(){};return i.disable=function(){a=!1},i.enable=function(){a=!0},t&&(t._commandObserver=i),i};mb.hasSimpleAPICommandBindings=function(e,t,r){var n,i=t.length,a;for(n=0;n<i;n++){var o,s=t[n],l=s.method,u=s.args;if(Array.isArray(u)||(u=[]),!l)return!1;var c=mb.computeAPICommandBindings(e,l,u);if(c.length!==1)return!1;if(!a)a=c[0],Array.isArray(a.traces)&&a.traces.sort();else{if(o=c[0],o.type!==a.type||o.prop!==a.prop)return!1;if(Array.isArray(a.traces))if(Array.isArray(o.traces)){o.traces.sort();for(var f=0;f<a.traces.length;f++)if(a.traces[f]!==o.traces[f])return!1}else return!1;else if(o.prop!==a.prop)return!1}o=c[0];var h=o.value;if(Array.isArray(h))if(h.length===1)h=h[0];else return!1;r&&(r[h]=n)}return a};function Lne(e,t,r){var n,i,a,o=!1;if(t.type==="data")n=e._fullData[t.traces!==null?t.traces[0]:0];else if(t.type==="layout")n=e._fullLayout;else return!1;return i=A3.nestedProperty(n,t.prop).get(),a=r[t.type]=r[t.type]||{},a.hasOwnProperty(t.prop)&&a[t.prop]!==i&&(o=!0),a[t.prop]=i,{changed:o,value:i}}mb.executeAPICommand=function(e,t,r){if(t==="skip")return Promise.resolve();var n=$nt.apiMethodRegistry[t],i=[e];Array.isArray(r)||(r=[]);for(var a=0;a<r.length;a++)i.push(r[a]);return n.apply(null,i).catch(function(o){return A3.warn("API call to Plotly."+t+" rejected.",o),Promise.reject(o)})};mb.computeAPICommandBindings=function(e,t,r){var n;switch(Array.isArray(r)||(r=[]),t){case"restyle":n=Ine(e,r);break;case"relayout":n=Pne(e,r);break;case"update":n=Ine(e,[r[0],r[2]]).concat(Pne(e,[r[1]]));break;case"animate":n=Qnt(e,r);break;default:n=[]}return n};function Qnt(e,t){return Array.isArray(t[0])&&t[0].length===1&&["string","number"].indexOf(typeof t[0][0])!==-1?[{type:"layout",prop:"_currentFrame",value:t[0][0].toString()}]:[]}function Pne(e,t){var r=[],n=t[0],i={};if(typeof n=="string")i[n]=t[1];else if(A3.isPlainObject(n))i=n;else return r;return dq(i,function(a,o,s){r.push({type:"layout",prop:a,value:s})},"",0),r}function Ine(e,t){var r,n,i,a,o=[];if(n=t[0],i=t[1],r=t[2],a={},typeof n=="string")a[n]=i;else if(A3.isPlainObject(n))a=n,r===void 0&&(r=i);else return o;return r===void 0&&(r=null),dq(a,function(s,l,u){var c,f;if(Array.isArray(u)){f=u.slice();var h=Math.min(f.length,e.data.length);r&&(h=Math.min(h,r.length)),c=[];for(var d=0;d<h;d++)c[d]=r?r[d]:d}else f=u,c=r?r.slice():null;if(c===null)Array.isArray(f)&&(f=f[0]);else if(Array.isArray(c)){if(!Array.isArray(f)){var v=f;f=[];for(var _=0;_<c.length;_++)f[_]=v}f.length=Math.min(c.length,f.length)}o.push({type:"data",prop:s,traces:c,value:f})},"",0),o}function dq(e,t,r,n){Object.keys(e).forEach(function(i){var a=e[i];if(i[0]!=="_"){var o=r+(n>0?".":"")+i;A3.isPlainObject(a)?dq(a,t,o,n+1):t(o,i,a)}})}});var Mc=ye((cnr,Xne)=>{"use strict";var Nne=Oa(),eat=r3().timeFormatLocale,tat=wO().formatLocale,JS=Eo(),rat=TO(),Xl=qa(),Une=w3(),iat=vl(),Ga=Dr(),Vne=ka(),Dne=fs().BADNUM,Sp=hf(),nat=Q1().clearOutline,aat=X6(),vq=US(),oat=sq(),sat=Id().getModuleCalcData,Fne=Ga.relinkPrivateKeys,yb=Ga._,xa=Xne.exports={};Ga.extendFlat(xa,Xl);xa.attributes=Gl();xa.attributes.type.values=xa.allTypes;xa.fontAttrs=ec();xa.layoutAttributes=c3();var J6=Rne();xa.executeAPICommand=J6.executeAPICommand;xa.computeAPICommandBindings=J6.computeAPICommandBindings;xa.manageCommandObserver=J6.manageCommandObserver;xa.hasSimpleAPICommandBindings=J6.hasSimpleAPICommandBindings;xa.redrawText=function(e){return e=Ga.getGraphDiv(e),new Promise(function(t){setTimeout(function(){e._fullLayout&&(Xl.getComponentMethod("annotations","draw")(e),Xl.getComponentMethod("legend","draw")(e),Xl.getComponentMethod("colorbar","draw")(e),t(xa.previousPromises(e)))},300)})};xa.resize=function(e){e=Ga.getGraphDiv(e);var t,r=new Promise(function(n,i){(!e||Ga.isHidden(e))&&i(new Error("Resize must be passed a displayed plot div element.")),e._redrawTimer&&clearTimeout(e._redrawTimer),e._resolveResize&&(t=e._resolveResize),e._resolveResize=n,e._redrawTimer=setTimeout(function(){if(!e.layout||e.layout.width&&e.layout.height||Ga.isHidden(e)){n(e);return}delete e.layout.width,delete e.layout.height;var a=e.changed;e.autoplay=!0,Xl.call("relayout",e,{autosize:!0}).then(function(){e.changed=a,e._resolveResize===n&&(delete e._resolveResize,n(e))})},100)});return t&&t(r),r};xa.previousPromises=function(e){if((e._promises||[]).length)return Promise.all(e._promises).then(function(){e._promises=[]})};xa.addLinks=function(e){if(!(!e._context.showLink&&!e._context.showSources)){var t=e._fullLayout,r=Ga.ensureSingle(t._paper,"text","js-plot-link-container",function(l){l.style({"font-family":'"Open Sans", Arial, sans-serif',"font-size":"12px",fill:Vne.defaultLine,"pointer-events":"all"}).each(function(){var u=Nne.select(this);u.append("tspan").classed("js-link-to-tool",!0),u.append("tspan").classed("js-link-spacer",!0),u.append("tspan").classed("js-sourcelinks",!0)})}),n=r.node(),i={y:t._paper.attr("height")-9};document.body.contains(n)&&n.getComputedTextLength()>=t.width-20?(i["text-anchor"]="start",i.x=5):(i["text-anchor"]="end",i.x=t._paper.attr("width")-7),r.attr(i);var a=r.select(".js-link-to-tool"),o=r.select(".js-link-spacer"),s=r.select(".js-sourcelinks");e._context.showSources&&e._context.showSources(e),e._context.showLink&&lat(e,a),o.text(a.text()&&s.text()?" - ":"")}};function lat(e,t){t.text("");var r=t.append("a").attr({"xlink:xlink:href":"#",class:"link--impt link--embedview","font-weight":"bold"}).text(e._context.linkText+" \xBB");if(e._context.sendData)r.on("click",function(){xa.sendDataToCloud(e)});else{var n=window.location.pathname.split("/"),i=window.location.search;r.attr({"xlink:xlink:show":"new","xlink:xlink:href":"/"+n[2].split(".")[0]+"/"+n[1]+i})}}xa.sendDataToCloud=function(e){var t=(window.PLOTLYENV||{}).BASE_URL||e._context.plotlyServerURL;if(t){e.emit("plotly_beforeexport");var r=Nne.select(e).append("div").attr("id","hiddenform").style("display","none"),n=r.append("form").attr({action:t+"/external",method:"post",target:"_blank"}),i=n.append("input").attr({type:"text",name:"data"});return i.node().value=xa.graphJson(e,!1,"keepdata"),n.node().submit(),r.remove(),e.emit("plotly_afterexport"),!1}};var uat=["days","shortDays","months","shortMonths","periods","dateTime","date","time","decimal","thousands","grouping","currency"],cat=["year","month","dayMonth","dayMonthYear"];xa.supplyDefaults=function(e,t){var r=t&&t.skipUpdateCalc,n=e._fullLayout||{};if(n._skipDefaults){delete n._skipDefaults;return}var i=e._fullLayout={},a=e.layout||{},o=e._fullData||[],s=e._fullData=[],l=e.data||[],u=e.calcdata||[],c=e._context||{},f;e._transitionData||xa.createTransitionData(e),i._dfltTitle={plot:yb(e,"Click to enter Plot title"),subtitle:yb(e,"Click to enter Plot subtitle"),x:yb(e,"Click to enter X axis title"),y:yb(e,"Click to enter Y axis title"),colorbar:yb(e,"Click to enter Colorscale title"),annotation:yb(e,"new text")},i._traceWord=yb(e,"trace");var h=zne(e,uat);if(i._mapboxAccessToken=c.mapboxAccessToken,n._initialAutoSizeIsDone){var d=n.width,v=n.height;xa.supplyLayoutGlobalDefaults(a,i,h),a.width||(i.width=d),a.height||(i.height=v),xa.sanitizeMargins(i)}else{xa.supplyLayoutGlobalDefaults(a,i,h);var _=!a.width||!a.height,b=i.autosize,p=c.autosizable,k=_&&(b||p);k?xa.plotAutoSize(e,a,i):_&&xa.sanitizeMargins(i),!b&&_&&(a.width=i.width,a.height=i.height)}i._d3locale=dat(h,i.separators),i._extraFormat=zne(e,cat),i._initialAutoSizeIsDone=!0,i._dataLength=l.length,i._modules=[],i._visibleModules=[],i._basePlotModules=[];var E=i._subplots=hat(),S=i._splomAxes={x:{},y:{}},L=i._splomSubplots={};i._splomGridDflt={},i._scatterStackOpts={},i._firstScatter={},i._alignmentOpts={},i._colorAxes={},i._requestRangeslider={},i._traceUids=fat(o,l),xa.supplyDataDefaults(l,s,a,i);var x=Object.keys(S.x),C=Object.keys(S.y);if(x.length>1&&C.length>1){for(Xl.getComponentMethod("grid","sizeDefaults")(a,i),f=0;f<x.length;f++)Ga.pushUnique(E.xaxis,x[f]);for(f=0;f<C.length;f++)Ga.pushUnique(E.yaxis,C[f]);for(var M in L)Ga.pushUnique(E.cartesian,M)}if(i._has=xa._hasPlotType.bind(i),o.length===s.length)for(f=0;f<s.length;f++)Fne(s[f],o[f]);xa.supplyLayoutModuleDefaults(a,i,s,e._transitionData);var g=i._visibleModules,P=[];for(f=0;f<g.length;f++){var T=g[f].crossTraceDefaults;T&&Ga.pushUnique(P,T)}for(f=0;f<P.length;f++)P[f](s,i);i._hasOnlyLargeSploms=i._basePlotModules.length===1&&i._basePlotModules[0].name==="splom"&&x.length>15&&C.length>15&&i.shapes.length===0&&i.images.length===0,xa.linkSubplots(s,i,o,n),xa.cleanPlot(s,i,o,n);var z=!!(n._has&&n._has("cartesian")),O=!!(i._has&&i._has("cartesian")),V=z,G=O;V&&!G?n._bgLayer.remove():G&&!V&&(i._shouldCreateBgLayer=!0),n._zoomlayer&&!e._dragging&&nat({_fullLayout:n}),vat(s,i),Fne(i,n),Xl.getComponentMethod("colorscale","crossTraceDefaults")(s,i),i._preGUI||(i._preGUI={}),i._tracePreGUI||(i._tracePreGUI={});var Z=i._tracePreGUI,j={},N;for(N in Z)j[N]="old";for(f=0;f<s.length;f++)N=s[f]._fullInput.uid,j[N]||(Z[N]={}),j[N]="new";for(N in j)j[N]==="old"&&delete Z[N];Gne(i),Xl.getComponentMethod("rangeslider","makeData")(i),!r&&u.length===s.length&&xa.supplyDefaultsUpdateCalc(u,s)};xa.supplyDefaultsUpdateCalc=function(e,t){for(var r=0;r<t.length;r++){var n=t[r],i=(e[r]||[])[0];if(i&&i.trace){var a=i.trace;if(a._hasCalcTransform){var o=a._arrayAttrs,s,l,u;for(s=0;s<o.length;s++)l=o[s],u=Ga.nestedProperty(a,l).get().slice(),Ga.nestedProperty(n,l).set(u)}i.trace=n}}};function fat(e,t){var r=t.length,n=[],i,a;for(i=0;i<e.length;i++){var o=e[i]._fullInput;o!==a&&n.push(o),a=o}var s=n.length,l=new Array(r),u={};function c(d,v){l[v]=d,u[d]=1}function f(d,v){if(d&&typeof d=="string"&&!u[d])return c(d,v),!0}for(i=0;i<r;i++){var h=t[i].uid;typeof h=="number"&&(h=String(h)),!f(h,i)&&(i<s&&f(n[i].uid,i)||c(Ga.randstr(u),i))}return l}function hat(){var e=Xl.collectableSubplotTypes,t={},r,n;if(!e){e=[];var i=Xl.subplotsRegistry;for(var a in i){var o=i[a],s=o.attr;if(s&&(e.push(a),Array.isArray(s)))for(n=0;n<s.length;n++)Ga.pushUnique(e,s[n])}}for(r=0;r<e.length;r++)t[e[r]]=[];return t}function zne(e,t){var r=e._context.locale;r||(r="en-US");var n=!1,i={};function a(f){for(var h=!0,d=0;d<t.length;d++){var v=t[d];i[v]||(f[v]?i[v]=f[v]:h=!1)}h&&(n=!0)}for(var o=0;o<2;o++){for(var s=e._context.locales,l=0;l<2;l++){var u=(s[r]||{}).format;if(u&&(a(u),n))break;s=Xl.localeRegistry}var c=r.split("-")[0];if(n||c===r)break;r=c}return n||a(Xl.localeRegistry.en.format),i}function dat(e,t){return e.decimal=t.charAt(0),e.thousands=t.charAt(1),{numberFormat:function(r){try{r=tat(e).format(Ga.adjustFormat(r))}catch(n){return Ga.warnBadFormat(r),Ga.noFormat}return r},timeFormat:eat(e).utcFormat}}function vat(e,t){var r,n=[];t.meta&&(r=t._meta={meta:t.meta,layout:{meta:t.meta}});for(var i=0;i<e.length;i++){var a=e[i];a.meta?n[a.index]=a._meta={meta:a.meta}:t.meta&&(a._meta={meta:t.meta}),t.meta&&(a._meta.layout={meta:t.meta})}n.length&&(r||(r=t._meta={}),r.data=n)}xa.createTransitionData=function(e){e._transitionData||(e._transitionData={}),e._transitionData._frames||(e._transitionData._frames=[]),e._transitionData._frameHash||(e._transitionData._frameHash={}),e._transitionData._counter||(e._transitionData._counter=0),e._transitionData._interruptCallbacks||(e._transitionData._interruptCallbacks=[])};xa._hasPlotType=function(e){var t,r=this._basePlotModules||[];for(t=0;t<r.length;t++)if(r[t].name===e)return!0;var n=this._modules||[];for(t=0;t<n.length;t++){var i=n[t].name;if(i===e)return!0;var a=Xl.modules[i];if(a&&a.categories[e])return!0}return!1};xa.cleanPlot=function(e,t,r,n){var i,a,o=n._basePlotModules||[];for(i=0;i<o.length;i++){var s=o[i];s.clean&&s.clean(e,t,r,n)}var l=n._has&&n._has("gl"),u=t._has&&t._has("gl");l&&!u&&n._glcontainer!==void 0&&(n._glcontainer.selectAll(".gl-canvas").remove(),n._glcontainer.selectAll(".no-webgl").remove(),n._glcanvas=null);var c=!!n._infolayer;e:for(i=0;i<r.length;i++){var f=r[i],h=f.uid;for(a=0;a<e.length;a++){var d=e[a];if(h===d.uid)continue e}c&&n._infolayer.select(".cb"+h).remove()}};xa.linkSubplots=function(e,t,r,n){var i,a,o=n._plots||{},s=t._plots={},l=t._subplots,u={_fullData:e,_fullLayout:t},c=l.cartesian||[];for(i=0;i<c.length;i++){var f=c[i],h=o[f],d=Sp.getFromId(u,f,"x"),v=Sp.getFromId(u,f,"y"),_;for(h?_=s[f]=h:(_=s[f]={},_.id=f),d._counterAxes.push(v._id),v._counterAxes.push(d._id),d._subplotsWith.push(f),v._subplotsWith.push(f),_.xaxis=d,_.yaxis=v,_._hasClipOnAxisFalse=!1,a=0;a<e.length;a++){var b=e[a];if(b.xaxis===_.xaxis._id&&b.yaxis===_.yaxis._id&&b.cliponaxis===!1){_._hasClipOnAxisFalse=!0;break}}}var p=Sp.list(u,null,!0),k;for(i=0;i<p.length;i++){k=p[i];var E=null;k.overlaying&&(E=Sp.getFromId(u,k.overlaying),E&&E.overlaying&&(k.overlaying=!1,E=null)),k._mainAxis=E||k,E&&(k.domain=E.domain.slice()),k._anchorAxis=k.anchor==="free"?null:Sp.getFromId(u,k.anchor)}for(i=0;i<p.length;i++)if(k=p[i],k._counterAxes.sort(Sp.idSort),k._subplotsWith.sort(Ga.subplotSort),k._mainSubplot=pat(k,t),k._counterAxes.length&&(k.spikemode&&k.spikemode.indexOf("across")!==-1||k.automargin&&k.mirror&&k.anchor!=="free"||Xl.getComponentMethod("rangeslider","isVisible")(k))){var S=1,L=0;for(a=0;a<k._counterAxes.length;a++){var x=Sp.getFromId(u,k._counterAxes[a]);S=Math.min(S,x.domain[0]),L=Math.max(L,x.domain[1])}S<L&&(k._counterDomainMin=S,k._counterDomainMax=L)}};function pat(e,t){var r={_fullLayout:t},n=e._id.charAt(0)==="x",i=e._mainAxis._anchorAxis,a="",o="",s="";if(i&&(s=i._mainAxis._id,a=n?e._id+s:s+e._id),!a||!t._plots[a]){a="";for(var l=e._counterAxes,u=0;u<l.length;u++){var c=l[u],f=n?e._id+c:c+e._id;o||(o=f);var h=Sp.getFromId(r,c);if(s&&h.overlaying===s){a=f;break}}}return a||o}xa.clearExpandedTraceDefaultColors=function(e){var t,r,n;function i(o,s,l,u){r[u]=s,r.length=u+1,o.valType==="color"&&o.dflt===void 0&&t.push(r.join("."))}for(r=[],t=e._module._colorAttrs,t||(e._module._colorAttrs=t=[],Une.crawl(e._module.attributes,i)),n=0;n<t.length;n++){var a=Ga.nestedProperty(e,"_input."+t[n]);a.get()||Ga.nestedProperty(e,t[n]).set(null)}};xa.supplyDataDefaults=function(e,t,r,n){var i=n._modules,a=n._visibleModules,o=n._basePlotModules,s=0,l=0,u,c,f;n._transformModules=[];function h(k){t.push(k);var E=k._module;E&&(Ga.pushUnique(i,E),k.visible===!0&&Ga.pushUnique(a,E),Ga.pushUnique(o,k._module.basePlotModule),s++,k._input.visible!==!1&&l++)}var d={},v=[],_=(r.template||{}).data||{},b=iat.traceTemplater(_);for(u=0;u<e.length;u++)f=e[u],c=b.newTrace(f),c.uid=n._traceUids[u],xa.supplyTraceDefaults(f,c,l,n,u),c.index=u,c._input=f,c._fullInput=c,h(c),Xl.traceIs(c,"carpetAxis")&&(d[c.carpet]=c),Xl.traceIs(c,"carpetDependent")&&v.push(u);for(u=0;u<v.length;u++)if(c=t[v[u]],!!c.visible){var p=d[c.carpet];if(c._carpet=p,!p||!p.visible){c.visible=!1;continue}c.xaxis=p.xaxis,c.yaxis=p.yaxis}};xa.supplyAnimationDefaults=function(e){e=e||{};var t,r={};function n(i,a){return Ga.coerce(e||{},r,vq,i,a)}if(n("mode"),n("direction"),n("fromcurrent"),Array.isArray(e.frame))for(r.frame=[],t=0;t<e.frame.length;t++)r.frame[t]=xa.supplyAnimationFrameDefaults(e.frame[t]||{});else r.frame=xa.supplyAnimationFrameDefaults(e.frame||{});if(Array.isArray(e.transition))for(r.transition=[],t=0;t<e.transition.length;t++)r.transition[t]=xa.supplyAnimationTransitionDefaults(e.transition[t]||{});else r.transition=xa.supplyAnimationTransitionDefaults(e.transition||{});return r};xa.supplyAnimationFrameDefaults=function(e){var t={};function r(n,i){return Ga.coerce(e||{},t,vq.frame,n,i)}return r("duration"),r("redraw"),t};xa.supplyAnimationTransitionDefaults=function(e){var t={};function r(n,i){return Ga.coerce(e||{},t,vq.transition,n,i)}return r("duration"),r("easing"),t};xa.supplyFrameDefaults=function(e){var t={};function r(n,i){return Ga.coerce(e,t,oat,n,i)}return r("group"),r("name"),r("traces"),r("baseframe"),r("data"),r("layout"),t};xa.supplyTraceDefaults=function(e,t,r,n,i){var a=n.colorway||Vne.defaults,o=a[r%a.length],s;function l(E,S){return Ga.coerce(e,t,xa.attributes,E,S)}var u=l("visible");l("type"),l("name",n._traceWord+" "+i),l("uirevision",n.uirevision);var c=xa.getModule(t);if(t._module=c,c){var f=c.basePlotModule,h=f.attr,d=f.attributes;if(h&&d){var v=n._subplots,_="";if(Array.isArray(h))for(s=0;s<h.length;s++){var b=h[s],p=Ga.coerce(e,t,d,b);v[b]&&Ga.pushUnique(v[b],p),_+=p}else _=Ga.coerce(e,t,d,h);v[f.name]&&Ga.pushUnique(v[f.name],_)}}if(u&&(l("customdata"),l("ids"),l("meta"),Xl.traceIs(t,"showLegend")?(Ga.coerce(e,t,c.attributes.showlegend?c.attributes:xa.attributes,"showlegend"),Ga.coerce(e,t,c.attributes.legend?c.attributes:xa.attributes,"legend"),l("legendwidth"),l("legendgroup"),l("legendgrouptitle.text"),l("legendrank"),t._dfltShowLegend=!0):t._dfltShowLegend=!1,c&&c.supplyDefaults(e,t,o,n),Xl.traceIs(t,"noOpacity")||l("opacity"),Xl.traceIs(t,"notLegendIsolatable")&&(t.visible=!!t.visible),Xl.traceIs(t,"noHover")||(t.hovertemplate||Ga.coerceHoverinfo(e,t,n),t.type!=="parcats"&&Xl.getComponentMethod("fx","supplyDefaults")(e,t,o,n)),c&&c.selectPoints)){var k=l("selectedpoints");Ga.isTypedArray(k)&&(t.selectedpoints=Array.from(k))}return t};xa.supplyLayoutGlobalDefaults=function(e,t,r){function n(f,h){return Ga.coerce(e,t,xa.layoutAttributes,f,h)}var i=e.template;Ga.isPlainObject(i)&&(t.template=i,t._template=i.layout,t._dataTemplate=i.data),n("autotypenumbers");var a=Ga.coerceFont(n,"font"),o=a.size;Ga.coerceFont(n,"title.font",a,{overrideDflt:{size:Math.round(o*1.4)}}),n("title.text",t._dfltTitle.plot),n("title.xref");var s=n("title.yref");n("title.pad.t"),n("title.pad.r"),n("title.pad.b"),n("title.pad.l");var l=n("title.automargin");n("title.x"),n("title.xanchor"),n("title.y"),n("title.yanchor"),n("title.subtitle.text",t._dfltTitle.subtitle),Ga.coerceFont(n,"title.subtitle.font",a,{overrideDflt:{size:Math.round(t.title.font.size*.7)}}),l&&(s==="paper"&&(t.title.y!==0&&(t.title.y=1),t.title.yanchor==="auto"&&(t.title.yanchor=t.title.y===0?"top":"bottom")),s==="container"&&(t.title.y==="auto"&&(t.title.y=1),t.title.yanchor==="auto"&&(t.title.yanchor=t.title.y<.5?"bottom":"top")));var u=n("uniformtext.mode");u&&n("uniformtext.minsize"),n("autosize",!(e.width&&e.height)),n("width"),n("height"),n("minreducedwidth"),n("minreducedheight"),n("margin.l"),n("margin.r"),n("margin.t"),n("margin.b"),n("margin.pad"),n("margin.autoexpand"),e.width&&e.height&&xa.sanitizeMargins(t),Xl.getComponentMethod("grid","sizeDefaults")(e,t),n("paper_bgcolor"),n("separators",r.decimal+r.thousands),n("hidesources"),n("colorway"),n("datarevision");var c=n("uirevision");n("editrevision",c),n("selectionrevision",c),Xl.getComponentMethod("modebar","supplyLayoutDefaults")(e,t),Xl.getComponentMethod("shapes","supplyDrawNewShapeDefaults")(e,t,n),Xl.getComponentMethod("selections","supplyDrawNewSelectionDefaults")(e,t,n),n("meta"),Ga.isPlainObject(e.transition)&&(n("transition.duration"),n("transition.easing"),n("transition.ordering")),Xl.getComponentMethod("calendars","handleDefaults")(e,t,"calendar"),Xl.getComponentMethod("fx","supplyLayoutGlobalDefaults")(e,t,n),Ga.coerce(e,t,aat,"scattermode")};function K6(e){return typeof e=="string"&&e.slice(-2)==="px"&&parseFloat(e)}xa.plotAutoSize=function(t,r,n){var i=t._context||{},a=i.frameMargins,o,s,l=Ga.isPlotDiv(t);if(l&&t.emit("plotly_autosize"),i.fillFrame)o=window.innerWidth,s=window.innerHeight,document.body.style.overflow="hidden";else{var u=l?window.getComputedStyle(t):{};if(o=K6(u.width)||K6(u.maxWidth)||n.width,s=K6(u.height)||K6(u.maxHeight)||n.height,JS(a)&&a>0){var c=1-2*a;o=Math.round(c*o),s=Math.round(c*s)}}var f=xa.layoutAttributes.width.min,h=xa.layoutAttributes.height.min;o<f&&(o=f),s<h&&(s=h);var d=!r.width&&Math.abs(n.width-o)>1,v=!r.height&&Math.abs(n.height-s)>1;(v||d)&&(d&&(n.width=o),v&&(n.height=s)),t._initialAutoSize||(t._initialAutoSize={width:o,height:s}),xa.sanitizeMargins(n)};xa.supplyLayoutModuleDefaults=function(e,t,r,n){var i=Xl.componentsRegistry,a=t._basePlotModules,o,s,l,u=Xl.subplotsRegistry.cartesian;for(o in i)l=i[o],l.includeBasePlot&&l.includeBasePlot(e,t);a.length||a.push(u),t._has("cartesian")&&(Xl.getComponentMethod("grid","contentDefaults")(e,t),u.finalizeSubplots(e,t));for(var c in t._subplots)t._subplots[c].sort(Ga.subplotSort);for(s=0;s<a.length;s++)l=a[s],l.supplyLayoutDefaults&&l.supplyLayoutDefaults(e,t,r);var f=t._modules;for(s=0;s<f.length;s++)l=f[s],l.supplyLayoutDefaults&&l.supplyLayoutDefaults(e,t,r);var h=t._transformModules;for(s=0;s<h.length;s++)l=h[s],l.supplyLayoutDefaults&&l.supplyLayoutDefaults(e,t,r,n);for(o in i)l=i[o],l.supplyLayoutDefaults&&l.supplyLayoutDefaults(e,t,r)};xa.purge=function(e){var t=e._fullLayout||{};t._glcontainer!==void 0&&(t._glcontainer.selectAll(".gl-canvas").remove(),t._glcontainer.remove(),t._glcanvas=null),t._modeBar&&t._modeBar.destroy(),e._transitionData&&(e._transitionData._interruptCallbacks&&(e._transitionData._interruptCallbacks.length=0),e._transitionData._animationRaf&&window.cancelAnimationFrame(e._transitionData._animationRaf)),Ga.clearThrottle(),Ga.clearResponsive(e),delete e.data,delete e.layout,delete e._fullData,delete e._fullLayout,delete e.calcdata,delete e.empty,delete e.fid,delete e.undoqueue,delete e.undonum,delete e.autoplay,delete e.changed,delete e._promises,delete e._redrawTimer,delete e._hmlumcount,delete e._hmpixcount,delete e._transitionData,delete e._transitioning,delete e._initialAutoSize,delete e._transitioningWithDuration,delete e._dragging,delete e._dragged,delete e._dragdata,delete e._hoverdata,delete e._snapshotInProgress,delete e._editing,delete e._mouseDownTime,delete e._legendMouseDownTime,e.removeAllListeners&&e.removeAllListeners()};xa.style=function(e){var t=e._fullLayout._visibleModules,r=[],n;for(n=0;n<t.length;n++){var i=t[n];i.style&&Ga.pushUnique(r,i.style)}for(n=0;n<r.length;n++)r[n](e)};xa.sanitizeMargins=function(e){if(!(!e||!e.margin)){var t=e.width,r=e.height,n=e.margin,i=t-(n.l+n.r),a=r-(n.t+n.b),o;i<0&&(o=(t-1)/(n.l+n.r),n.l=Math.floor(o*n.l),n.r=Math.floor(o*n.r)),a<0&&(o=(r-1)/(n.t+n.b),n.t=Math.floor(o*n.t),n.b=Math.floor(o*n.b))}};xa.clearAutoMarginIds=function(e){e._fullLayout._pushmarginIds={}};xa.allowAutoMargin=function(e,t){e._fullLayout._pushmarginIds[t]=1};function Gne(e){var t=e.margin;if(!e._size){var r=e._size={l:Math.round(t.l),r:Math.round(t.r),t:Math.round(t.t),b:Math.round(t.b),p:Math.round(t.pad)};r.w=Math.round(e.width)-r.l-r.r,r.h=Math.round(e.height)-r.t-r.b}e._pushmargin||(e._pushmargin={}),e._pushmarginIds||(e._pushmarginIds={}),e._reservedMargin||(e._reservedMargin={})}var Hne=2,jne=2;xa.autoMargin=function(e,t,r){var n=e._fullLayout,i=n.width,a=n.height,o=n.margin,s=n.minreducedwidth,l=n.minreducedheight,u=Ga.constrain(i-o.l-o.r,Hne,s),c=Ga.constrain(a-o.t-o.b,jne,l),f=Math.max(0,i-u),h=Math.max(0,a-c),d=n._pushmargin,v=n._pushmarginIds;if(o.autoexpand!==!1){if(!r)delete d[t],delete v[t];else{var _=r.pad;if(_===void 0&&(_=Math.min(12,o.l,o.r,o.t,o.b)),f){var b=(r.l+r.r)/f;b>1&&(r.l/=b,r.r/=b)}if(h){var p=(r.t+r.b)/h;p>1&&(r.t/=p,r.b/=p)}var k=r.xl!==void 0?r.xl:r.x,E=r.xr!==void 0?r.xr:r.x,S=r.yt!==void 0?r.yt:r.y,L=r.yb!==void 0?r.yb:r.y;d[t]={l:{val:k,size:r.l+_},r:{val:E,size:r.r+_},b:{val:L,size:r.b+_},t:{val:S,size:r.t+_}},v[t]=1}if(!n._replotting)return xa.doAutoMargin(e)}};function gat(e){if("_redrawFromAutoMarginCount"in e._fullLayout)return!1;var t=Sp.list(e,"",!0);for(var r in t)if(t[r].autoshift||t[r].shift)return!0;return!1}xa.doAutoMargin=function(e){var t=e._fullLayout,r=t.width,n=t.height;t._size||(t._size={}),Gne(t);var i=t._size,a=t.margin,o={t:0,b:0,l:0,r:0},s=Ga.extendFlat({},i),l=a.l,u=a.r,c=a.t,f=a.b,h=t._pushmargin,d=t._pushmarginIds,v=t.minreducedwidth,_=t.minreducedheight;if(a.autoexpand!==!1){for(var b in h)d[b]||delete h[b];var p=e._fullLayout._reservedMargin;for(var k in p)for(var E in p[k]){var S=p[k][E];o[E]=Math.max(o[E],S)}h.base={l:{val:0,size:l},r:{val:1,size:u},t:{val:1,size:c},b:{val:0,size:f}};for(var L in o){var x=0;for(var C in h)C!=="base"&&JS(h[C][L].size)&&(x=h[C][L].size>x?h[C][L].size:x);var M=Math.max(0,a[L]-x);o[L]=Math.max(0,o[L]-M)}for(var g in h){var P=h[g].l||{},T=h[g].b||{},z=P.val,O=P.size,V=T.val,G=T.size,Z=r-o.r-o.l,j=n-o.t-o.b;for(var N in h){if(JS(O)&&h[N].r){var H=h[N].r.val,te=h[N].r.size;if(H>z){var oe=(O*H+(te-Z)*z)/(H-z),_e=(te*(1-z)+(O-Z)*(1-H))/(H-z);oe+_e>l+u&&(l=oe,u=_e)}}if(JS(G)&&h[N].t){var Ee=h[N].t.val,Ce=h[N].t.size;if(Ee>V){var me=(G*Ee+(Ce-j)*V)/(Ee-V),ie=(Ce*(1-V)+(G-j)*(1-Ee))/(Ee-V);me+ie>f+c&&(f=me,c=ie)}}}}}var Se=Ga.constrain(r-a.l-a.r,Hne,v),Le=Ga.constrain(n-a.t-a.b,jne,_),Ae=Math.max(0,r-Se),Fe=Math.max(0,n-Le);if(Ae){var Pe=(l+u)/Ae;Pe>1&&(l/=Pe,u/=Pe)}if(Fe){var ge=(f+c)/Fe;ge>1&&(f/=ge,c/=ge)}if(i.l=Math.round(l)+o.l,i.r=Math.round(u)+o.r,i.t=Math.round(c)+o.t,i.b=Math.round(f)+o.b,i.p=Math.round(a.pad),i.w=Math.round(r)-i.l-i.r,i.h=Math.round(n)-i.t-i.b,!t._replotting&&(xa.didMarginChange(s,i)||gat(e))){"_redrawFromAutoMarginCount"in t?t._redrawFromAutoMarginCount++:t._redrawFromAutoMarginCount=1;var Re=3*(1+Object.keys(d).length);if(t._redrawFromAutoMarginCount<Re)return Xl.call("_doPlot",e);t._size=s,Ga.warn("Too many auto-margin redraws.")}mat(e)};function mat(e){var t=Sp.list(e,"",!0);["_adjustTickLabelsOverflow","_hideCounterAxisInsideTickLabels"].forEach(function(r){for(var n=0;n<t.length;n++){var i=t[n][r];i&&i()}})}var One=["l","r","t","b","p","w","h"];xa.didMarginChange=function(e,t){for(var r=0;r<One.length;r++){var n=One[r],i=e[n],a=t[n];if(!JS(i)||Math.abs(a-i)>1)return!0}return!1};xa.graphJson=function(e,t,r,n,i,a){(i&&t&&!e._fullData||i&&!t&&!e._fullLayout)&&xa.supplyDefaults(e);var o=i?e._fullData:e.data,s=i?e._fullLayout:e.layout,l=(e._transitionData||{})._frames;function u(h,d){if(typeof h=="function")return d?"_function_":null;if(Ga.isPlainObject(h)){var v={},_;return Object.keys(h).sort().forEach(function(E){if(["_","["].indexOf(E.charAt(0))===-1){if(typeof h[E]=="function"){d&&(v[E]="_function");return}if(r==="keepdata"){if(E.slice(-3)==="src")return}else if(r==="keepstream"){if(_=h[E+"src"],typeof _=="string"&&_.indexOf(":")>0&&!Ga.isPlainObject(h.stream))return}else if(r!=="keepall"&&(_=h[E+"src"],typeof _=="string"&&_.indexOf(":")>0))return;v[E]=u(h[E],d)}}),v}var b=Array.isArray(h),p=Ga.isTypedArray(h);if((b||p)&&h.dtype&&h.shape){var k=h.bdata;return u({dtype:h.dtype,shape:h.shape,bdata:Ga.isArrayBuffer(k)?rat.encode(k):k},d)}return b?h.map(function(E){return u(E,d)}):p?Ga.simpleMap(h,Ga.identity):Ga.isJSDate(h)?Ga.ms2DateTimeLocal(+h):h}var c={data:(o||[]).map(function(h){var d=u(h);return t&&delete d.fit,d})};if(!t&&(c.layout=u(s),i)){var f=s._size;c.layout.computed={margin:{b:f.b,l:f.l,r:f.r,t:f.t}}}return l&&(c.frames=u(l)),a&&(c.config=u(e._context,!0)),n==="object"?c:JSON.stringify(c)};xa.modifyFrames=function(e,t){var r,n,i,a=e._transitionData._frames,o=e._transitionData._frameHash;for(r=0;r<t.length;r++)switch(n=t[r],n.type){case"replace":i=n.value;var s=(a[n.index]||{}).name,l=i.name;a[n.index]=o[l]=i,l!==s&&(delete o[s],o[l]=i);break;case"insert":i=n.value,o[i.name]=i,a.splice(n.index,0,i);break;case"delete":i=a[n.index],delete o[i.name],a.splice(n.index,1);break}return Promise.resolve()};xa.computeFrame=function(e,t){var r=e._transitionData._frameHash,n,i,a,o;if(!t)throw new Error("computeFrame must be given a string frame name");var s=r[t.toString()];if(!s)return!1;for(var l=[s],u=[s.name];s.baseframe&&(s=r[s.baseframe.toString()])&&u.indexOf(s.name)===-1;)l.push(s),u.push(s.name);for(var c={};s=l.pop();)if(s.layout&&(c.layout=xa.extendLayout(c.layout,s.layout)),s.data){if(c.data||(c.data=[]),i=s.traces,!i)for(i=[],n=0;n<s.data.length;n++)i[n]=n;for(c.traces||(c.traces=[]),n=0;n<s.data.length;n++)a=i[n],a!=null&&(o=c.traces.indexOf(a),o===-1&&(o=c.data.length,c.traces[o]=a),c.data[o]=xa.extendTrace(c.data[o],s.data[n]))}return c};xa.recomputeFrameHash=function(e){for(var t=e._transitionData._frameHash={},r=e._transitionData._frames,n=0;n<r.length;n++){var i=r[n];i&&i.name&&(t[i.name]=i)}};xa.extendObjectWithContainers=function(e,t,r){var n,i,a,o,s,l,u,c,f=Ga.extendDeepNoArrays({},t||{}),h=Ga.expandObjectPaths(f),d={};if(r&&r.length)for(a=0;a<r.length;a++)n=Ga.nestedProperty(h,r[a]),i=n.get(),i===void 0?Ga.nestedProperty(d,r[a]).set(null):(n.set(null),Ga.nestedProperty(d,r[a]).set(i));if(e=Ga.extendDeepNoArrays(e||{},h),r&&r.length){for(a=0;a<r.length;a++)if(s=Ga.nestedProperty(d,r[a]),u=s.get(),!!u){for(l=Ga.nestedProperty(e,r[a]),c=l.get(),Array.isArray(c)||(c=[],l.set(c)),o=0;o<u.length;o++){var v=u[o];v===null?c[o]=null:c[o]=xa.extendObjectWithContainers(c[o],v)}l.set(c)}}return e};xa.dataArrayContainers=["transforms","dimensions"];xa.layoutArrayContainers=Xl.layoutArrayContainers;xa.extendTrace=function(e,t){return xa.extendObjectWithContainers(e,t,xa.dataArrayContainers)};xa.extendLayout=function(e,t){return xa.extendObjectWithContainers(e,t,xa.layoutArrayContainers)};xa.transition=function(e,t,r,n,i,a){var o={redraw:i.redraw},s={},l=[];return o.prepareFn=function(){for(var u=Array.isArray(t)?t.length:0,c=n.slice(0,u),f=0;f<c.length;f++){var h=c[f],d=e._fullData[h],v=d._module;if(v){if(v.animatable){var _=v.basePlotModule.name;s[_]||(s[_]=[]),s[_].push(h)}e.data[c[f]]=xa.extendTrace(e.data[c[f]],t[f])}}var b=Ga.expandObjectPaths(Ga.extendDeepNoArrays({},r)),p=/^[xy]axis[0-9]*$/;for(var k in b)p.test(k)&&delete b[k].range;xa.extendLayout(e.layout,b),delete e.calcdata,xa.supplyDefaults(e),xa.doCalcdata(e);var E=Ga.expandObjectPaths(r);if(E){var S=e._fullLayout._plots;for(var L in S){var x=S[L],C=x.xaxis,M=x.yaxis,g=C.range.slice(),P=M.range.slice(),T=null,z=null,O=null,V=null;Array.isArray(E[C._name+".range"])?T=E[C._name+".range"].slice():Array.isArray((E[C._name]||{}).range)&&(T=E[C._name].range.slice()),Array.isArray(E[M._name+".range"])?z=E[M._name+".range"].slice():Array.isArray((E[M._name]||{}).range)&&(z=E[M._name].range.slice()),g&&T&&(C.r2l(g[0])!==C.r2l(T[0])||C.r2l(g[1])!==C.r2l(T[1]))&&(O={xr0:g,xr1:T}),P&&z&&(M.r2l(P[0])!==M.r2l(z[0])||M.r2l(P[1])!==M.r2l(z[1]))&&(V={yr0:P,yr1:z}),(O||V)&&l.push(Ga.extendFlat({plotinfo:x},O,V))}}return Promise.resolve()},o.runFn=function(u){var c,f=e._fullLayout._basePlotModules,h=l.length,d;if(r)for(d=0;d<f.length;d++)f[d].transitionAxes&&f[d].transitionAxes(e,l,a,u);h?(c=Ga.extendFlat({},a),c.duration=0,delete s.cartesian):c=a;for(var v in s){var _=s[v],b=e._fullData[_[0]]._module;b.basePlotModule.plot(e,_,c,u)}},Wne(e,a,o)};xa.transitionFromReact=function(e,t,r,n){var i=e._fullLayout,a=i.transition,o={},s=[];return o.prepareFn=function(){var l=i._plots;o.redraw=!1,t.anim==="some"&&(o.redraw=!0),r.anim==="some"&&(o.redraw=!0);for(var u in l){var c=l[u],f=c.xaxis,h=c.yaxis,d=n[f._name].range.slice(),v=n[h._name].range.slice(),_=f.range.slice(),b=h.range.slice();f.setScale(),h.setScale();var p=null,k=null;(f.r2l(d[0])!==f.r2l(_[0])||f.r2l(d[1])!==f.r2l(_[1]))&&(p={xr0:d,xr1:_}),(h.r2l(v[0])!==h.r2l(b[0])||h.r2l(v[1])!==h.r2l(b[1]))&&(k={yr0:v,yr1:b}),(p||k)&&s.push(Ga.extendFlat({plotinfo:c},p,k))}return Promise.resolve()},o.runFn=function(l){for(var u=e._fullData,c=e._fullLayout,f=c._basePlotModules,h,d,v,_=[],b=0;b<u.length;b++)_.push(b);function p(){if(e._fullLayout)for(var E=0;E<f.length;E++)f[E].transitionAxes&&f[E].transitionAxes(e,s,h,l)}function k(){if(e._fullLayout)for(var E=0;E<f.length;E++)f[E].plot(e,v,d,l)}s.length&&t.anim?a.ordering==="traces first"?(h=Ga.extendFlat({},a,{duration:0}),v=_,d=a,setTimeout(p,a.duration),k()):(h=a,v=null,d=Ga.extendFlat({},a,{duration:0}),setTimeout(k,h.duration),p()):s.length?(h=a,p()):t.anim&&(v=_,d=a,k())},Wne(e,a,o)};function Wne(e,t,r){var n=!1;function i(f){var h=Promise.resolve();if(!f)return h;for(;f.length;)h=h.then(f.shift());return h}function a(f){if(f)for(;f.length;)f.shift()}function o(){return e.emit("plotly_transitioning",[]),new Promise(function(f){e._transitioning=!0,t.duration>0&&(e._transitioningWithDuration=!0),e._transitionData._interruptCallbacks.push(function(){n=!0}),r.redraw&&e._transitionData._interruptCallbacks.push(function(){return Xl.call("redraw",e)}),e._transitionData._interruptCallbacks.push(function(){e.emit("plotly_transitioninterrupted",[])});var h=0,d=0;function v(){return h++,function(){d++,!n&&d===h&&s(f)}}r.runFn(v),setTimeout(v())})}function s(f){if(e._transitionData)return a(e._transitionData._interruptCallbacks),Promise.resolve().then(function(){if(r.redraw)return Xl.call("redraw",e)}).then(function(){e._transitioning=!1,e._transitioningWithDuration=!1,e.emit("plotly_transitioned",[])}).then(f)}function l(){if(e._transitionData)return e._transitioning=!1,i(e._transitionData._interruptCallbacks)}var u=[xa.previousPromises,l,r.prepareFn,xa.rehover,xa.reselect,o],c=Ga.syncOrAsync(u,e);return(!c||!c.then)&&(c=Promise.resolve()),c.then(function(){return e})}xa.doCalcdata=function(e,t){var r=Sp.list(e),n=e._fullData,i=e._fullLayout,a,o,s,l,u=new Array(n.length),c=(e.calcdata||[]).slice();for(e.calcdata=u,i._numBoxes=0,i._numViolins=0,i._violinScaleGroupStats={},e._hmpixcount=0,e._hmlumcount=0,i._piecolormap={},i._sunburstcolormap={},i._treemapcolormap={},i._iciclecolormap={},i._funnelareacolormap={},s=0;s<n.length;s++)if(Array.isArray(t)&&t.indexOf(s)===-1){u[s]=c[s];continue}for(s=0;s<n.length;s++)a=n[s],a._arrayAttrs=Une.findArrayAttributes(a),a._extremes={};var f=i._subplots.polar||[];for(s=0;s<f.length;s++)r.push(i[f[s]].radialaxis,i[f[s]].angularaxis);for(var h in i._colorAxes){var d=i[h];d.cauto!==!1&&(delete d.cmin,delete d.cmax)}var v=!1;function _(k){if(a=n[k],o=a._module,a.visible===!0&&a.transforms){if(o&&o.calc){var E=o.calc(e,a);E[0]&&E[0].t&&E[0].t._scene&&delete E[0].t._scene.dirty}for(l=0;l<a.transforms.length;l++){var S=a.transforms[l];o=transformsRegistry[S.type],o&&o.calcTransform&&(a._hasCalcTransform=!0,v=!0,o.calcTransform(e,a,S))}}}function b(k,E){if(a=n[k],o=a._module,!!o.isContainer===E){var S=[];if(a.visible===!0&&a._length!==0){delete a._indexToPoints;var L=a.transforms||[];for(l=L.length-1;l>=0;l--)if(L[l].enabled){a._indexToPoints=L[l]._indexToPoints;break}o&&o.calc&&(S=o.calc(e,a))}(!Array.isArray(S)||!S[0])&&(S=[{x:Dne,y:Dne}]),S[0].t||(S[0].t={}),S[0].trace=a,u[k]=S}}for(qne(r,n,i),s=0;s<n.length;s++)b(s,!0);for(s=0;s<n.length;s++)_(s);for(v&&qne(r,n,i),s=0;s<n.length;s++)b(s,!0);for(s=0;s<n.length;s++)b(s,!1);Bne(e);var p=_at(r,e);if(p.length){for(i._numBoxes=0,i._numViolins=0,s=0;s<p.length;s++)b(p[s],!0);for(s=0;s<p.length;s++)b(p[s],!1);Bne(e)}Xl.getComponentMethod("fx","calc")(e),Xl.getComponentMethod("errorbars","calc")(e)};var yat=/(total|sum|min|max|mean|geometric mean|median) (ascending|descending)/;function _at(e,t){var r=[],n,i,a,o,s;function l(N,H,te){var oe=H._id.charAt(0);if(N==="histogram2dcontour"){var _e=H._counterAxes[0],Ee=Sp.getFromId(t,_e),Ce=oe==="x"||_e==="x"&&Ee.type==="category",me=oe==="y"||_e==="y"&&Ee.type==="category";return function(ie,Se){return ie===0||Se===0||Ce&&ie===te[Se].length-1||me&&Se===te.length-1?-1:(oe==="y"?Se:ie)-1}}else return function(ie,Se){return oe==="y"?Se:ie}}var u={min:function(N){return Ga.aggNums(Math.min,null,N)},max:function(N){return Ga.aggNums(Math.max,null,N)},sum:function(N){return Ga.aggNums(function(H,te){return H+te},null,N)},total:function(N){return Ga.aggNums(function(H,te){return H+te},null,N)},mean:function(N){return Ga.mean(N)},"geometric mean":function(N){return Ga.geometricMean(N)},median:function(N){return Ga.median(N)}};function c(N,H){return N[1]-H[1]}function f(N,H){return H[1]-N[1]}for(n=0;n<e.length;n++){var h=e[n];if(h.type==="category"){var d=h.categoryorder.match(yat);if(d){var v=d[1],_=d[2],b=h._id.charAt(0),p=b==="x",k=[];for(i=0;i<h._categories.length;i++)k.push([h._categories[i],[]]);for(i=0;i<h._traceIndices.length;i++){var E=h._traceIndices[i],S=t._fullData[E];if(S.visible===!0){var L=S.type;Xl.traceIs(S,"histogram")&&(delete S._xautoBinFinished,delete S._yautoBinFinished);var x=L==="splom",C=L==="scattergl",M=t.calcdata[E];for(a=0;a<M.length;a++){var g=M[a],P,T;if(x){var z=S._axesDim[h._id];if(!p){var O=S._diag[z][0];O&&(h=t._fullLayout[Sp.id2name(O)])}var V=g.trace.dimensions[z].values;for(o=0;o<V.length;o++)for(P=h._categoriesMap[V[o]],s=0;s<g.trace.dimensions.length;s++)if(s!==z){var G=g.trace.dimensions[s];k[P][1].push(G.values[o])}}else if(C){for(o=0;o<g.t.x.length;o++)p?(P=g.t.x[o],T=g.t.y[o]):(P=g.t.y[o],T=g.t.x[o]),k[P][1].push(T);g.t&&g.t._scene&&delete g.t._scene.dirty}else if(g.hasOwnProperty("z")){T=g.z;var Z=l(S.type,h,T);for(o=0;o<T.length;o++)for(s=0;s<T[o].length;s++)P=Z(s,o),P+1&&k[P][1].push(T[o][s])}else for(P=g.p,P===void 0&&(P=g[b]),T=g.s,T===void 0&&(T=g.v),T===void 0&&(T=p?g.y:g.x),Array.isArray(T)||(T===void 0?T=[]:T=[T]),o=0;o<T.length;o++)k[P][1].push(T[o])}}}h._categoriesValue=k;var j=[];for(i=0;i<k.length;i++)j.push([k[i][0],u[v](k[i][1])]);j.sort(_==="descending"?f:c),h._categoriesAggregatedValue=j,h._initialCategories=j.map(function(N){return N[0]}),r=r.concat(h.sortByInitialCategories())}}}return r}function qne(e,t,r){var n={};function i(l){l.clearCalc(),l.type==="multicategory"&&l.setupMultiCategory(t),n[l._id]=1}Ga.simpleMap(e,i);for(var a=r._axisMatchGroups||[],o=0;o<a.length;o++)for(var s in a[o])n[s]||i(r[Sp.id2name(s)])}function Bne(e){var t=e._fullLayout,r=t._visibleModules,n={},i,a,o;for(a=0;a<r.length;a++){var s=r[a],l=s.crossTraceCalc;if(l){var u=s.basePlotModule.name;n[u]?Ga.pushUnique(n[u],l):n[u]=[l]}}for(o in n){var c=n[o],f=t._subplots[o];if(Array.isArray(f))for(i=0;i<f.length;i++){var h=f[i],d=o==="cartesian"?t._plots[h]:t[h];for(a=0;a<c.length;a++)c[a](e,d,h)}else for(a=0;a<c.length;a++)c[a](e)}}xa.rehover=function(e){e._fullLayout._rehover&&e._fullLayout._rehover()};xa.redrag=function(e){e._fullLayout._redrag&&e._fullLayout._redrag()};xa.reselect=function(e){var t=e._fullLayout,r=(e.layout||{}).selections,n=t._previousSelections;t._previousSelections=r;var i=t._reselect||JSON.stringify(r)!==JSON.stringify(n);Xl.getComponentMethod("selections","reselect")(e,i)};xa.generalUpdatePerTraceModule=function(e,t,r,n){var i=t.traceHash,a={},o;for(o=0;o<r.length;o++){var s=r[o],l=s[0].trace;l.visible&&(a[l.type]=a[l.type]||[],a[l.type].push(s))}for(var u in i)if(!a[u]){var c=i[u][0],f=c[0].trace;f.visible=!1,a[u]=[c]}for(var h in a){var d=a[h],v=d[0][0].trace._module;v.plot(e,t,Ga.filterVisible(d),n)}t.traceHash=a};xa.plotBasePlot=function(e,t,r,n,i){var a=Xl.getModule(e),o=sat(t.calcdata,a)[0];a.plot(t,o,n,i)};xa.cleanBasePlot=function(e,t,r,n,i){var a=i._has&&i._has(e),o=r._has&&r._has(e);a&&!o&&i["_"+e+"layer"].selectAll("g.trace").remove()}});var Wp=ye(_b=>{"use strict";_b.xmlns="http://www.w3.org/2000/xmlns/";_b.svg="http://www.w3.org/2000/svg";_b.xlink="http://www.w3.org/1999/xlink";_b.svgAttrs={xmlns:_b.svg,"xmlns:xlink":_b.xlink}});var $h=ye((hnr,Zne)=>{"use strict";Zne.exports={FROM_BL:{left:0,center:.5,right:1,bottom:0,middle:.5,top:1},FROM_TL:{left:0,center:.5,right:1,bottom:1,middle:.5,top:0},FROM_BR:{left:1,center:.5,right:0,bottom:0,middle:.5,top:1},LINE_SPACING:1.3,CAP_SHIFT:.7,MID_SHIFT:.35,OPPOSITE_SIDE:{left:"right",right:"left",top:"bottom",bottom:"top"}}});var ru=ye(x0=>{"use strict";var Ph=Oa(),Ay=Dr(),xat=Ay.strTranslate,pq=Wp(),bat=$h().LINE_SPACING,wat=/([^$]*)([$]+[^$]*[$]+)([^$]*)/;x0.convertToTspans=function(e,t,r){var n=e.text(),i=!e.attr("data-notex")&&t&&t._context.typesetMath&&typeof MathJax!="undefined"&&n.match(wat),a=Ph.select(e.node().parentNode);if(a.empty())return;var o=e.attr("class")?e.attr("class").split(" ")[0]:"text";o+="-math",a.selectAll("svg."+o).remove(),a.selectAll("g."+o+"-group").remove(),e.style("display",null).attr({"data-unformatted":n,"data-math":"N"});function s(){a.empty()||(o=e.attr("class")+"-math",a.select("svg."+o).remove()),e.text("").style("white-space","pre");var l=Fat(e.node(),n);l&&e.style("pointer-events","all"),x0.positionText(e),r&&r.call(e)}return i?(t&&t._promises||[]).push(new Promise(function(l){e.style("display","none");var u=parseInt(e.node().style.fontSize,10),c={fontSize:u};Mat(i[2],c,function(f,h,d){a.selectAll("svg."+o).remove(),a.selectAll("g."+o+"-group").remove();var v=f&&f.select("svg");if(!v||!v.node()){s(),l();return}var _=a.append("g").classed(o+"-group",!0).attr({"pointer-events":"none","data-unformatted":n,"data-math":"Y"});_.node().appendChild(v.node()),h&&h.node()&&v.node().insertBefore(h.node().cloneNode(!0),v.node().firstChild);var b=d.width,p=d.height;v.attr({class:o,height:p,preserveAspectRatio:"xMinYMin meet"}).style({overflow:"visible","pointer-events":"none"});var k=e.node().style.fill||"black",E=v.select("g");E.attr({fill:k,stroke:k});var S=E.node().getBoundingClientRect(),L=S.width,x=S.height;(L>b||x>p)&&(v.style("overflow","hidden"),S=v.node().getBoundingClientRect(),L=S.width,x=S.height);var C=+e.attr("x"),M=+e.attr("y"),g=u||e.node().getBoundingClientRect().height,P=-g/4;if(o[0]==="y")_.attr({transform:"rotate("+[-90,C,M]+")"+xat(-L/2,P-x/2)});else if(o[0]==="l")M=P-x/2;else if(o[0]==="a"&&o.indexOf("atitle")!==0)C=0,M=P;else{var T=e.attr("text-anchor");C=C-L*(T==="middle"?.5:T==="end"?1:0),M=M+P-x/2}v.attr({x:C,y:M}),r&&r.call(e,_),l(_)})})):s(),e};var Tat=/(<|&lt;|&#60;)/g,Aat=/(>|&gt;|&#62;)/g;function Sat(e){return e.replace(Tat,"\\lt ").replace(Aat,"\\gt ")}var Yne=[["$","$"],["\\(","\\)"]];function Mat(e,t,r){var n=parseInt((MathJax.version||"").split(".")[0]);if(n!==2&&n!==3){Ay.warn("No MathJax version:",MathJax.version);return}var i,a,o,s,l=function(){return a=Ay.extendDeepAll({},MathJax.Hub.config),o=MathJax.Hub.processSectionDelay,MathJax.Hub.processSectionDelay!==void 0&&(MathJax.Hub.processSectionDelay=0),MathJax.Hub.Config({messageStyle:"none",tex2jax:{inlineMath:Yne},displayAlign:"left"})},u=function(){a=Ay.extendDeepAll({},MathJax.config),MathJax.config.tex||(MathJax.config.tex={}),MathJax.config.tex.inlineMath=Yne},c=function(){if(i=MathJax.Hub.config.menuSettings.renderer,i!=="SVG")return MathJax.Hub.setRenderer("SVG")},f=function(){i=MathJax.config.startup.output,i!=="svg"&&(MathJax.config.startup.output="svg")},h=function(){var k="math-output-"+Ay.randstr({},64);s=Ph.select("body").append("div").attr({id:k}).style({visibility:"hidden",position:"absolute","font-size":t.fontSize+"px"}).text(Sat(e));var E=s.node();return n===2?MathJax.Hub.Typeset(E):MathJax.typeset([E])},d=function(){var k=s.select(n===2?".MathJax_SVG":".MathJax"),E=!k.empty()&&s.select("svg").node();if(!E)Ay.log("There was an error in the tex syntax.",e),r();else{var S=E.getBoundingClientRect(),L;n===2?L=Ph.select("body").select("#MathJax_SVG_glyphs"):L=k.select("defs"),r(k,L,S)}s.remove()},v=function(){if(i!=="SVG")return MathJax.Hub.setRenderer(i)},_=function(){i!=="svg"&&(MathJax.config.startup.output=i)},b=function(){return o!==void 0&&(MathJax.Hub.processSectionDelay=o),MathJax.Hub.Config(a)},p=function(){MathJax.config=a};n===2?MathJax.Hub.Queue(l,c,h,d,v,b):n===3&&(u(),f(),MathJax.startup.defaultReady(),MathJax.startup.promise.then(function(){h(),d(),_(),p()}))}var Qne={sup:"font-size:70%",sub:"font-size:70%",s:"text-decoration:line-through",u:"text-decoration:underline",b:"font-weight:bold",i:"font-style:italic",a:"cursor:pointer",span:"",em:"font-style:italic;font-weight:bold"},Eat={sub:"0.3em",sup:"-0.6em"},kat={sub:"-0.21em",sup:"0.42em"},Kne="\u200B",Jne=["http:","https:","mailto:","",void 0,":"],eae=x0.NEWLINES=/(\r\n?|\n)/g,mq=/(<[^<>]*>)/,yq=/<(\/?)([^ >]*)(\s+(.*))?>/i,Cat=/<br(\s+.*)?>/i;x0.BR_TAG_ALL=/<br(\s+.*)?>/gi;var tae=/(^|[\s"'])style\s*=\s*("([^"]*);?"|'([^']*);?')/i,rae=/(^|[\s"'])href\s*=\s*("([^"]*)"|'([^']*)')/i,iae=/(^|[\s"'])target\s*=\s*("([^"\s]*)"|'([^'\s]*)')/i,Lat=/(^|[\s"'])popup\s*=\s*("([\w=,]*)"|'([\w=,]*)')/i;function xb(e,t){if(!e)return null;var r=e.match(t),n=r&&(r[3]||r[4]);return n&&$6(n)}var Pat=/(^|;)\s*color:/;x0.plainText=function(e,t){t=t||{};for(var r=t.len!==void 0&&t.len!==-1?t.len:1/0,n=t.allowedTags!==void 0?t.allowedTags:["br"],i="...",a=i.length,o=e.split(mq),s=[],l="",u=0,c=0;c<o.length;c++){var f=o[c],h=f.match(yq),d=h&&h[2].toLowerCase();if(d)n.indexOf(d)!==-1&&(s.push(f),l=d);else{var v=f.length;if(u+v<r)s.push(f),u+=v;else if(u<r){var _=r-u;l&&(l!=="br"||_<=a||v<=a)&&s.pop(),r>a?s.push(f.slice(0,Math.max(0,_-a))+i):s.push(f.slice(0,_));break}l=""}}return s.join("")};var Iat={mu:"\u03BC",amp:"&",lt:"<",gt:">",nbsp:"\xA0",times:"\xD7",plusmn:"\xB1",deg:"\xB0"},Rat=/&(#\d+|#x[\da-fA-F]+|[a-z]+);/g;function $6(e){return e.replace(Rat,function(t,r){var n;return r.charAt(0)==="#"?n=Dat(r.charAt(1)==="x"?parseInt(r.slice(2),16):parseInt(r.slice(1),10)):n=Iat[r],n||t})}x0.convertEntities=$6;function Dat(e){if(!(e>1114111)){var t=String.fromCodePoint;if(t)return t(e);var r=String.fromCharCode;return e<=65535?r(e):r((e>>10)+55232,e%1024+56320)}}function Fat(e,t){t=t.replace(eae," ");var r=!1,n=[],i,a=-1;function o(){a++;var x=document.createElementNS(pq.svg,"tspan");Ph.select(x).attr({class:"line",dy:a*bat+"em"}),e.appendChild(x),i=x;var C=n;if(n=[{node:x}],C.length>1)for(var M=1;M<C.length;M++)s(C[M])}function s(x){var C=x.type,M={},g;if(C==="a"){g="a";var P=x.target,T=x.href,z=x.popup;T&&(M={"xlink:xlink:show":P==="_blank"||P.charAt(0)!=="_"?"new":"replace",target:P,"xlink:xlink:href":T},z&&(M.onclick='window.open(this.href.baseVal,this.target.baseVal,"'+z+'");return false;'))}else g="tspan";x.style&&(M.style=x.style);var O=document.createElementNS(pq.svg,g);if(C==="sup"||C==="sub"){l(i,Kne),i.appendChild(O);var V=document.createElementNS(pq.svg,"tspan");l(V,Kne),Ph.select(V).attr("dy",kat[C]),M.dy=Eat[C],i.appendChild(O),i.appendChild(V)}else i.appendChild(O);Ph.select(O).attr(M),i=x.node=O,n.push(x)}function l(x,C){x.appendChild(document.createTextNode(C))}function u(x){if(n.length===1){Ay.log("Ignoring unexpected end tag </"+x+">.",t);return}var C=n.pop();x!==C.type&&Ay.log("Start tag <"+C.type+"> doesnt match end tag <"+x+">. Pretending it did match.",t),i=n[n.length-1].node}var c=Cat.test(t);c?o():(i=e,n=[{node:e}]);for(var f=t.split(mq),h=0;h<f.length;h++){var d=f[h],v=d.match(yq),_=v&&v[2].toLowerCase(),b=Qne[_];if(_==="br")o();else if(b===void 0)l(i,$6(d));else if(v[1])u(_);else{var p=v[4],k={type:_},E=xb(p,tae);if(E?(E=E.replace(Pat,"$1 fill:"),b&&(E+=";"+b)):b&&(E=b),E&&(k.style=E),_==="a"){r=!0;var S=xb(p,rae);if(S){var L=nae(S);L&&(k.href=L,k.target=xb(p,iae)||"_blank",k.popup=xb(p,Lat))}}s(k)}}return r}function nae(e){var t=encodeURI(decodeURI(e)),r=document.createElement("a"),n=document.createElement("a");r.href=e,n.href=t;var i=r.protocol,a=n.protocol;return Jne.indexOf(i)!==-1&&Jne.indexOf(a)!==-1?t:""}x0.sanitizeHTML=function(t){t=t.replace(eae," ");for(var r=document.createElement("p"),n=r,i=[],a=t.split(mq),o=0;o<a.length;o++){var s=a[o],l=s.match(yq),u=l&&l[2].toLowerCase();if(u in Qne)if(l[1])i.length&&(n=i.pop());else{var c=l[4],f=xb(c,tae),h=f?{style:f}:{};if(u==="a"){var d=xb(c,rae);if(d){var v=nae(d);if(v){h.href=v;var _=xb(c,iae);_&&(h.target=_)}}}var b=document.createElement(u);n.appendChild(b),Ph.select(b).attr(h),n=b,i.push(b)}else n.appendChild(document.createTextNode($6(s)))}var p="innerHTML";return r[p]};x0.lineCount=function(t){return t.selectAll("tspan.line").size()||1};x0.positionText=function(t,r,n){return t.each(function(){var i=Ph.select(this);function a(l,u){return u===void 0?(u=i.attr(l),u===null&&(i.attr(l,0),u=0)):i.attr(l,u),u}var o=a("x",r),s=a("y",n);this.nodeName==="text"&&i.selectAll("tspan.line").attr({x:o,y:s})})};function $ne(e,t,r){var n=r.horizontalAlign,i=r.verticalAlign||"top",a=e.node().getBoundingClientRect(),o=t.node().getBoundingClientRect(),s,l,u;return i==="bottom"?l=function(){return a.bottom-s.height}:i==="middle"?l=function(){return a.top+(a.height-s.height)/2}:l=function(){return a.top},n==="right"?u=function(){return a.right-s.width}:n==="center"?u=function(){return a.left+(a.width-s.width)/2}:u=function(){return a.left},function(){s=this.node().getBoundingClientRect();var c=u()-o.left,f=l()-o.top,h=r.gd||{};if(r.gd){h._fullLayout._calcInverseTransform(h);var d=Ay.apply3DTransform(h._fullLayout._invTransform)(c,f);c=d[0],f=d[1]}return this.style({top:f+"px",left:c+"px","z-index":1e3}),this}}var gq="1px ";x0.makeTextShadow=function(e){var t=gq,r=gq,n=gq;return t+r+n+e+", -"+t+"-"+r+n+e+", "+t+"-"+r+n+e+", -"+t+r+n+e};x0.makeEditable=function(e,t){var r=t.gd,n=t.delegate,i=Ph.dispatch("edit","input","cancel"),a=n||e;if(e.style({"pointer-events":n?"none":"all"}),e.size()!==1)throw new Error("boo");function o(){l(),e.style({opacity:0});var u=a.attr("class"),c;u?c="."+u.split(" ")[0]+"-math-group":c="[class*=-math-group]",c&&Ph.select(e.node().parentNode).select(c).style({opacity:0})}function s(u){var c=u.node(),f=document.createRange();f.selectNodeContents(c);var h=window.getSelection();h.removeAllRanges(),h.addRange(f),c.focus()}function l(){var u=Ph.select(r),c=u.select(".svg-container"),f=c.append("div"),h=e.node().style,d=parseFloat(h.fontSize||12),v=t.text;v===void 0&&(v=e.attr("data-unformatted")),f.classed("plugin-editable editable",!0).style({position:"absolute","font-family":h.fontFamily||"Arial","font-size":d,color:t.fill||h.fill||"black",opacity:1,"background-color":t.background||"transparent",outline:"#ffffff33 1px solid",margin:[-d/8+1,0,0,-1].join("px ")+"px",padding:"0","box-sizing":"border-box"}).attr({contenteditable:!0}).text(v).call($ne(e,c,t)).on("blur",function(){r._editing=!1,e.text(this.textContent).style({opacity:1});var _=Ph.select(this).attr("class"),b;_?b="."+_.split(" ")[0]+"-math-group":b="[class*=-math-group]",b&&Ph.select(e.node().parentNode).select(b).style({opacity:0});var p=this.textContent;Ph.select(this).transition().duration(0).remove(),Ph.select(document).on("mouseup",null),i.edit.call(e,p)}).on("focus",function(){var _=this;r._editing=!0,Ph.select(document).on("mouseup",function(){if(Ph.event.target===_)return!1;document.activeElement===f.node()&&f.node().blur()})}).on("keyup",function(){Ph.event.which===27?(r._editing=!1,e.style({opacity:1}),Ph.select(this).style({opacity:0}).on("blur",function(){return!1}).transition().remove(),i.cancel.call(e,this.textContent)):(i.input.call(e,this.textContent),Ph.select(this).call($ne(e,c,t)))}).on("keydown",function(){Ph.event.which===13&&this.blur()}).call(s)}return t.immediate?o():a.on("click",o),Ph.rebind(e,i,"on")}});var pv=ye((vnr,hae)=>{"use strict";var zat=Oa(),eL=cd(),QS=Eo(),Q6=Dr(),aae=ka(),Oat=lb().isValid;function qat(e,t,r){var n=t?Q6.nestedProperty(e,t).get()||{}:e,i=n[r||"color"];i&&i._inputArray&&(i=i._inputArray);var a=!1;if(Q6.isArrayOrTypedArray(i)){for(var o=0;o<i.length;o++)if(QS(i[o])){a=!0;break}}return Q6.isPlainObject(n)&&(a||n.showscale===!0||QS(n.cmin)&&QS(n.cmax)||Oat(n.colorscale)||Q6.isPlainObject(n.colorbar))}var oae=["showscale","autocolorscale","colorscale","reversescale","colorbar"],$S=["min","max","mid","auto"];function lae(e){var t=e._colorAx,r=t||e,n={},i,a,o;for(a=0;a<oae.length;a++)o=oae[a],n[o]=r[o];if(t)for(i="c",a=0;a<$S.length;a++)o=$S[a],n[o]=r["c"+o];else{var s;for(a=0;a<$S.length;a++){if(o=$S[a],s="c"+o,s in r){n[o]=r[s];continue}s="z"+o,s in r&&(n[o]=r[s])}i=s.charAt(0)}return n._sync=function(l,u){var c=$S.indexOf(l)!==-1?i+l:l;r[c]=r["_"+c]=u},n}function uae(e){for(var t=lae(e),r=t.min,n=t.max,i=t.reversescale?cae(t.colorscale):t.colorscale,a=i.length,o=new Array(a),s=new Array(a),l=0;l<a;l++){var u=i[l];o[l]=r+u[0]*(n-r),s[l]=u[1]}return{domain:o,range:s}}function cae(e){for(var t=e.length,r=new Array(t),n=t-1,i=0;n>=0;n--,i++){var a=e[n];r[i]=[1-a[0],a[1]]}return r}function fae(e,t){t=t||{};for(var r=e.domain,n=e.range,i=n.length,a=new Array(i),o=0;o<i;o++){var s=eL(n[o]).toRgb();a[o]=[s.r,s.g,s.b,s.a]}var l=zat.scale.linear().domain(r).range(a).clamp(!0),u=t.noNumericCheck,c=t.returnArray,f;return u&&c?f=l:u?f=function(h){return sae(l(h))}:c?f=function(h){return QS(h)?l(h):eL(h).isValid()?h:aae.defaultLine}:f=function(h){return QS(h)?sae(l(h)):eL(h).isValid()?h:aae.defaultLine},f.domain=l.domain,f.range=function(){return n},f}function Bat(e,t){return fae(uae(e),t)}function sae(e){var t={r:e[0],g:e[1],b:e[2],a:e[3]};return eL(t).toRgbString()}hae.exports={hasColorscale:qat,extractOpts:lae,extractScale:uae,flipScale:cae,makeColorScaleFunc:fae,makeColorScaleFuncFromTrace:Bat}});var df=ye((pnr,pae)=>{"use strict";var dae=NO(),Nat=dae.FORMAT_LINK,Uat=dae.DATE_FORMAT_LINK;function Vat(e,t){return{valType:"string",dflt:"",editType:"none",description:(t?_q:vae)("hover text",e)+["By default the values are formatted using "+(t?"generic number format":"`"+e+"axis.hoverformat`")+"."].join(" ")}}function _q(e,t){return["Sets the "+e+" formatting rule"+(t?"for `"+t+"` ":""),"using d3 formatting mini-languages","which are very similar to those in Python. For numbers, see: "+Nat+"."].join(" ")}function vae(e,t){return _q(e,t)+[" And for dates see: "+Uat+".","We add two items to d3's date formatter:","*%h* for half of the year as a decimal number as well as","*%{n}f* for fractional seconds","with n digits. For example, *2016-10-13 09:15:23.456* with tickformat","*%H~%M~%S.%2f* would display *09~15~23.46*"].join(" ")}pae.exports={axisHoverFormat:Vat,descriptionOnlyNumbers:_q,descriptionWithDates:vae}});var Rd=ye((mnr,Iae)=>{"use strict";var gae=ec(),S3=Lh(),Pae=Pd().dash,bq=Ao().extendFlat,mae=vl().templatedArray,gnr=Ll().templateFormatStringDescription,yae=df().descriptionWithDates,Gat=fs().ONEDAY,pm=hd(),Hat=pm.HOUR_PATTERN,jat=pm.WEEKDAY_PATTERN,xq={valType:"enumerated",values:["auto","linear","array"],editType:"ticks",impliedEdits:{tick0:void 0,dtick:void 0}},Wat=bq({},xq,{values:xq.values.slice().concat(["sync"])});function _ae(e){return{valType:"integer",min:0,dflt:e?5:0,editType:"ticks"}}var xae={valType:"any",editType:"ticks",impliedEdits:{tickmode:"linear"}},bae={valType:"any",editType:"ticks",impliedEdits:{tickmode:"linear"}},wae={valType:"data_array",editType:"ticks"},Tae={valType:"enumerated",values:["outside","inside",""],editType:"ticks"};function Aae(e){var t={valType:"number",min:0,editType:"ticks"};return e||(t.dflt=5),t}function Sae(e){var t={valType:"number",min:0,editType:"ticks"};return e||(t.dflt=1),t}var Mae={valType:"color",dflt:S3.defaultLine,editType:"ticks"},Eae={valType:"color",dflt:S3.lightLine,editType:"ticks"};function kae(e){var t={valType:"number",min:0,editType:"ticks"};return e||(t.dflt=1),t}var Cae=bq({},Pae,{editType:"ticks"}),Lae={valType:"boolean",editType:"ticks"};Iae.exports={visible:{valType:"boolean",editType:"plot"},color:{valType:"color",dflt:S3.defaultLine,editType:"ticks"},title:{text:{valType:"string",editType:"ticks"},font:gae({editType:"ticks"}),standoff:{valType:"number",min:0,editType:"ticks"},editType:"ticks"},type:{valType:"enumerated",values:["-","linear","log","date","category","multicategory"],dflt:"-",editType:"calc",_noTemplating:!0},autotypenumbers:{valType:"enumerated",values:["convert types","strict"],dflt:"convert types",editType:"calc"},autorange:{valType:"enumerated",values:[!0,!1,"reversed","min reversed","max reversed","min","max"],dflt:!0,editType:"axrange",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},autorangeoptions:{minallowed:{valType:"any",editType:"plot",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},maxallowed:{valType:"any",editType:"plot",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},clipmin:{valType:"any",editType:"plot",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},clipmax:{valType:"any",editType:"plot",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},include:{valType:"any",arrayOk:!0,editType:"plot",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},editType:"plot"},rangemode:{valType:"enumerated",values:["normal","tozero","nonnegative"],dflt:"normal",editType:"plot"},range:{valType:"info_array",items:[{valType:"any",editType:"axrange",impliedEdits:{"^autorange":!1},anim:!0},{valType:"any",editType:"axrange",impliedEdits:{"^autorange":!1},anim:!0}],editType:"axrange",impliedEdits:{autorange:!1},anim:!0},minallowed:{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}},maxallowed:{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}},fixedrange:{valType:"boolean",dflt:!1,editType:"calc"},modebardisable:{valType:"flaglist",flags:["autoscale","zoominout"],extras:["none"],dflt:"none",editType:"modebar"},insiderange:{valType:"info_array",items:[{valType:"any",editType:"plot"},{valType:"any",editType:"plot"}],editType:"plot"},scaleanchor:{valType:"enumerated",values:[pm.idRegex.x.toString(),pm.idRegex.y.toString(),!1],editType:"plot"},scaleratio:{valType:"number",min:0,dflt:1,editType:"plot"},constrain:{valType:"enumerated",values:["range","domain"],editType:"plot"},constraintoward:{valType:"enumerated",values:["left","center","right","top","middle","bottom"],editType:"plot"},matches:{valType:"enumerated",values:[pm.idRegex.x.toString(),pm.idRegex.y.toString()],editType:"calc"},rangebreaks:mae("rangebreak",{enabled:{valType:"boolean",dflt:!0,editType:"calc"},bounds:{valType:"info_array",items:[{valType:"any",editType:"calc"},{valType:"any",editType:"calc"}],editType:"calc"},pattern:{valType:"enumerated",values:[jat,Hat,""],editType:"calc"},values:{valType:"info_array",freeLength:!0,editType:"calc",items:{valType:"any",editType:"calc"}},dvalue:{valType:"number",editType:"calc",min:0,dflt:Gat},editType:"calc"}),tickmode:Wat,nticks:_ae(),tick0:xae,dtick:bae,ticklabelstep:{valType:"integer",min:1,dflt:1,editType:"ticks"},tickvals:wae,ticktext:{valType:"data_array",editType:"ticks"},ticks:Tae,tickson:{valType:"enumerated",values:["labels","boundaries"],dflt:"labels",editType:"ticks"},ticklabelmode:{valType:"enumerated",values:["instant","period"],dflt:"instant",editType:"ticks"},ticklabelposition:{valType:"enumerated",values:["outside","inside","outside top","inside top","outside left","inside left","outside right","inside right","outside bottom","inside bottom"],dflt:"outside",editType:"calc"},ticklabeloverflow:{valType:"enumerated",values:["allow","hide past div","hide past domain"],editType:"calc"},ticklabelshift:{valType:"integer",dflt:0,editType:"ticks"},ticklabelstandoff:{valType:"integer",dflt:0,editType:"ticks"},ticklabelindex:{valType:"integer",arrayOk:!0,editType:"calc"},mirror:{valType:"enumerated",values:[!0,"ticks",!1,"all","allticks"],dflt:!1,editType:"ticks+layoutstyle"},ticklen:Aae(),tickwidth:Sae(),tickcolor:Mae,showticklabels:{valType:"boolean",dflt:!0,editType:"ticks"},labelalias:{valType:"any",dflt:!1,editType:"ticks"},automargin:{valType:"flaglist",flags:["height","width","left","right","top","bottom"],extras:[!0,!1],dflt:!1,editType:"ticks"},showspikes:{valType:"boolean",dflt:!1,editType:"modebar"},spikecolor:{valType:"color",dflt:null,editType:"none"},spikethickness:{valType:"number",dflt:3,editType:"none"},spikedash:bq({},Pae,{dflt:"dash",editType:"none"}),spikemode:{valType:"flaglist",flags:["toaxis","across","marker"],dflt:"toaxis",editType:"none"},spikesnap:{valType:"enumerated",values:["data","cursor","hovered data"],dflt:"hovered data",editType:"none"},tickfont:gae({editType:"ticks"}),tickangle:{valType:"angle",dflt:"auto",editType:"ticks"},autotickangles:{valType:"info_array",freeLength:!0,items:{valType:"angle"},dflt:[0,30,90],editType:"ticks"},tickprefix:{valType:"string",dflt:"",editType:"ticks"},showtickprefix:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"ticks"},ticksuffix:{valType:"string",dflt:"",editType:"ticks"},showticksuffix:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"ticks"},showexponent:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"ticks"},exponentformat:{valType:"enumerated",values:["none","e","E","power","SI","B","SI extended"],dflt:"B",editType:"ticks"},minexponent:{valType:"number",dflt:3,min:0,editType:"ticks"},separatethousands:{valType:"boolean",dflt:!1,editType:"ticks"},tickformat:{valType:"string",dflt:"",editType:"ticks",description:yae("tick label")},tickformatstops:mae("tickformatstop",{enabled:{valType:"boolean",dflt:!0,editType:"ticks"},dtickrange:{valType:"info_array",items:[{valType:"any",editType:"ticks"},{valType:"any",editType:"ticks"}],editType:"ticks"},value:{valType:"string",dflt:"",editType:"ticks"},editType:"ticks"}),hoverformat:{valType:"string",dflt:"",editType:"none",description:yae("hover text")},unifiedhovertitle:{text:{valType:"string",dflt:"",editType:"none"},editType:"none"},showline:{valType:"boolean",dflt:!1,editType:"ticks+layoutstyle"},linecolor:{valType:"color",dflt:S3.defaultLine,editType:"layoutstyle"},linewidth:{valType:"number",min:0,dflt:1,editType:"ticks+layoutstyle"},showgrid:Lae,gridcolor:Eae,gridwidth:kae(),griddash:Cae,zeroline:{valType:"boolean",editType:"ticks"},zerolinecolor:{valType:"color",dflt:S3.defaultLine,editType:"ticks"},zerolinelayer:{valType:"enumerated",values:["above traces","below traces"],dflt:"below traces",editType:"plot"},zerolinewidth:{valType:"number",dflt:1,editType:"ticks"},showdividers:{valType:"boolean",dflt:!0,editType:"ticks"},dividercolor:{valType:"color",dflt:S3.defaultLine,editType:"ticks"},dividerwidth:{valType:"number",dflt:1,editType:"ticks"},anchor:{valType:"enumerated",values:["free",pm.idRegex.x.toString(),pm.idRegex.y.toString()],editType:"plot"},side:{valType:"enumerated",values:["top","bottom","left","right"],editType:"plot"},overlaying:{valType:"enumerated",values:["free",pm.idRegex.x.toString(),pm.idRegex.y.toString()],editType:"plot"},minor:{tickmode:xq,nticks:_ae("minor"),tick0:xae,dtick:bae,tickvals:wae,ticks:Tae,ticklen:Aae("minor"),tickwidth:Sae("minor"),tickcolor:Mae,gridcolor:Eae,gridwidth:kae("minor"),griddash:Cae,showgrid:Lae,editType:"ticks"},minorloglabels:{valType:"enumerated",values:["small digits","complete","none"],dflt:"small digits",editType:"calc"},layer:{valType:"enumerated",values:["above traces","below traces"],dflt:"above traces",editType:"plot"},domain:{valType:"info_array",items:[{valType:"number",min:0,max:1,editType:"plot"},{valType:"number",min:0,max:1,editType:"plot"}],dflt:[0,1],editType:"plot"},position:{valType:"number",min:0,max:1,dflt:0,editType:"plot"},autoshift:{valType:"boolean",dflt:!1,editType:"plot"},shift:{valType:"number",editType:"plot"},categoryorder:{valType:"enumerated",values:["trace","category ascending","category descending","array","total ascending","total descending","min ascending","min descending","max ascending","max descending","sum ascending","sum descending","mean ascending","mean descending","geometric mean ascending","geometric mean descending","median ascending","median descending"],dflt:"trace",editType:"calc"},categoryarray:{valType:"data_array",editType:"calc"},uirevision:{valType:"any",editType:"none"},editType:"calc"}});var tL=ye((ynr,Fae)=>{"use strict";var $c=Rd(),Rae=ec(),Dae=Ao().extendFlat,Xat=mc().overrideAll;Fae.exports=Xat({orientation:{valType:"enumerated",values:["h","v"],dflt:"v"},thicknessmode:{valType:"enumerated",values:["fraction","pixels"],dflt:"pixels"},thickness:{valType:"number",min:0,dflt:30},lenmode:{valType:"enumerated",values:["fraction","pixels"],dflt:"fraction"},len:{valType:"number",min:0,dflt:1},x:{valType:"number"},xref:{valType:"enumerated",dflt:"paper",values:["container","paper"],editType:"layoutstyle"},xanchor:{valType:"enumerated",values:["left","center","right"]},xpad:{valType:"number",min:0,dflt:10},y:{valType:"number"},yref:{valType:"enumerated",dflt:"paper",values:["container","paper"],editType:"layoutstyle"},yanchor:{valType:"enumerated",values:["top","middle","bottom"]},ypad:{valType:"number",min:0,dflt:10},outlinecolor:$c.linecolor,outlinewidth:$c.linewidth,bordercolor:$c.linecolor,borderwidth:{valType:"number",min:0,dflt:0},bgcolor:{valType:"color",dflt:"rgba(0,0,0,0)"},tickmode:$c.minor.tickmode,nticks:$c.nticks,tick0:$c.tick0,dtick:$c.dtick,tickvals:$c.tickvals,ticktext:$c.ticktext,ticks:Dae({},$c.ticks,{dflt:""}),ticklabeloverflow:Dae({},$c.ticklabeloverflow,{}),ticklabelposition:{valType:"enumerated",values:["outside","inside","outside top","inside top","outside left","inside left","outside right","inside right","outside bottom","inside bottom"],dflt:"outside"},ticklen:$c.ticklen,tickwidth:$c.tickwidth,tickcolor:$c.tickcolor,ticklabelstep:$c.ticklabelstep,showticklabels:$c.showticklabels,labelalias:$c.labelalias,tickfont:Rae({}),tickangle:$c.tickangle,tickformat:$c.tickformat,tickformatstops:$c.tickformatstops,tickprefix:$c.tickprefix,showtickprefix:$c.showtickprefix,ticksuffix:$c.ticksuffix,showticksuffix:$c.showticksuffix,separatethousands:$c.separatethousands,exponentformat:$c.exponentformat,minexponent:$c.minexponent,showexponent:$c.showexponent,title:{text:{valType:"string"},font:Rae({}),side:{valType:"enumerated",values:["right","top","bottom"]}}},"colorbars","from-root")});var Tu=ye((xnr,Oae)=>{"use strict";var Zat=tL(),Yat=o3().counter,Kat=Z1(),zae=lb().scales,_nr=Kat(zae);function rL(e){return"`"+e+"`"}Oae.exports=function(t,r){t=t||"",r=r||{};var n=r.cLetter||"c",i="onlyIfNumerical"in r?r.onlyIfNumerical:!!t,a="noScale"in r?r.noScale:t==="marker.line",o="showScaleDflt"in r?r.showScaleDflt:n==="z",s=typeof r.colorscaleDflt=="string"?zae[r.colorscaleDflt]:null,l=r.editTypeOverride||"",u=t?t+".":"",c,f;"colorAttr"in r?(c=r.colorAttr,f=r.colorAttr):(c={z:"z",c:"color"}[n],f="in "+rL(u+c));var h=i?" Has an effect only if "+f+" is set to a numerical array.":"",d=n+"auto",v=n+"min",_=n+"max",b=n+"mid",p=rL(u+d),k=rL(u+v),E=rL(u+_),S=k+" and "+E,L={};L[v]=L[_]=void 0;var x={};x[d]=!1;var C={};return c==="color"&&(C.color={valType:"color",arrayOk:!0,editType:l||"style"},r.anim&&(C.color.anim=!0)),C[d]={valType:"boolean",dflt:!0,editType:"calc",impliedEdits:L},C[v]={valType:"number",dflt:null,editType:l||"plot",impliedEdits:x},C[_]={valType:"number",dflt:null,editType:l||"plot",impliedEdits:x},C[b]={valType:"number",dflt:null,editType:"calc",impliedEdits:L},C.colorscale={valType:"colorscale",editType:"calc",dflt:s,impliedEdits:{autocolorscale:!1}},C.autocolorscale={valType:"boolean",dflt:r.autoColorDflt!==!1,editType:"calc",impliedEdits:{colorscale:void 0}},C.reversescale={valType:"boolean",dflt:!1,editType:"plot"},a||(C.showscale={valType:"boolean",dflt:o,editType:"calc"},C.colorbar=Zat),r.noColorAxis||(C.coloraxis={valType:"subplotid",regex:Yat("coloraxis"),dflt:null,editType:"calc"}),C}});var Tq=ye((bnr,qae)=>{"use strict";var Jat=Ao().extendFlat,$at=Tu(),wq=lb().scales;qae.exports={editType:"calc",colorscale:{editType:"calc",sequential:{valType:"colorscale",dflt:wq.Reds,editType:"calc"},sequentialminus:{valType:"colorscale",dflt:wq.Blues,editType:"calc"},diverging:{valType:"colorscale",dflt:wq.RdBu,editType:"calc"}},coloraxis:Jat({_isSubplotObj:!0,editType:"calc"},$at("",{colorAttr:"corresponding trace color array(s)",noColorAxis:!0,showScaleDflt:!0}))}});var Aq=ye((wnr,Bae)=>{"use strict";var Qat=Dr();Bae.exports=function(t){return Qat.isPlainObject(t.colorbar)}});var Eq=ye(Mq=>{"use strict";var Sq=Eo(),Nae=Dr(),Uae=fs(),eot=Uae.ONEDAY,tot=Uae.ONEWEEK;Mq.dtick=function(e,t){var r=t==="log",n=t==="date",i=t==="category",a=n?eot:1;if(!e)return a;if(Sq(e))return e=Number(e),e<=0?a:i?Math.max(1,Math.round(e)):n?Math.max(.1,e):e;if(typeof e!="string"||!(n||r))return a;var o=e.charAt(0),s=e.slice(1);return s=Sq(s)?Number(s):0,s<=0||!(n&&o==="M"&&s===Math.round(s)||r&&o==="L"||r&&o==="D"&&(s===1||s===2))?a:e};Mq.tick0=function(e,t,r,n){if(t==="date")return Nae.cleanDate(e,Nae.dateTick0(r,n%tot===0?1:0));if(!(n==="D1"||n==="D2"))return Sq(e)?Number(e):0}});var bb=ye((Anr,Gae)=>{"use strict";var Vae=Eq(),rot=Dr().isArrayOrTypedArray,iot=vv().isTypedArraySpec,not=vv().decodeTypedArraySpec;Gae.exports=function(t,r,n,i,a){a||(a={});var o=a.isMinor,s=o?t.minor||{}:t,l=o?r.minor:r,u=o?"minor.":"";function c(k){var E=s[k];return iot(E)&&(E=not(E)),E!==void 0?E:(l._template||{})[k]}var f=c("tick0"),h=c("dtick"),d=c("tickvals"),v=rot(d)?"array":h?"linear":"auto",_=n(u+"tickmode",v);if(_==="auto"||_==="sync")n(u+"nticks");else if(_==="linear"){var b=l.dtick=Vae.dtick(h,i);l.tick0=Vae.tick0(f,i,r.calendar,b)}else if(i!=="multicategory"){var p=n(u+"tickvals");p===void 0?l.tickmode="auto":o||n("ticktext")}}});var M3=ye((Snr,jae)=>{"use strict";var kq=Dr(),Hae=Rd();jae.exports=function(t,r,n,i){var a=i.isMinor,o=a?t.minor||{}:t,s=a?r.minor:r,l=a?Hae.minor:Hae,u=a?"minor.":"",c=kq.coerce2(o,s,l,"ticklen",a?(r.ticklen||5)*.6:void 0),f=kq.coerce2(o,s,l,"tickwidth",a?r.tickwidth||1:void 0),h=kq.coerce2(o,s,l,"tickcolor",(a?r.tickcolor:void 0)||s.color),d=n(u+"ticks",!a&&i.outerTicks||c||f||h?"outside":"");d||(delete s.ticklen,delete s.tickwidth,delete s.tickcolor)}});var Cq=ye((Mnr,Wae)=>{"use strict";Wae.exports=function(t){var r=["showexponent","showtickprefix","showticksuffix"],n=r.filter(function(a){return t[a]!==void 0}),i=function(a){return t[a]===t[n[0]]};if(n.every(i)||n.length===1)return t[n[0]]}});var Yd=ye((Enr,Xae)=>{"use strict";var iL=Dr(),aot=vl();Xae.exports=function(t,r,n){var i=n.name,a=n.inclusionAttr||"visible",o=r[i],s=iL.isArrayOrTypedArray(t[i])?t[i]:[],l=r[i]=[],u=aot.arrayTemplater(r,i,a),c,f;for(c=0;c<s.length;c++){var h=s[c];iL.isPlainObject(h)?f=u.newItem(h):(f=u.newItem({}),f[a]=!1),f._index=c,f[a]!==!1&&n.handleItemDefaults(h,f,r,n),l.push(f)}var d=u.defaultItems();for(c=0;c<d.length;c++)f=d[c],f._index=l.length,n.handleItemDefaults({},f,r,n,{}),l.push(f);if(iL.isArrayOrTypedArray(o)){var v=Math.min(o.length,l.length);for(c=0;c<v;c++)iL.relinkPrivateKeys(l[c],o[c])}return l}});var e_=ye((knr,Yae)=>{"use strict";var Lq=Dr(),oot=ka().contrast,Zae=Rd(),sot=Cq(),lot=Yd();Yae.exports=function(t,r,n,i,a){a||(a={});var o=n("labelalias");Lq.isPlainObject(o)||delete r.labelalias;var s=sot(t),l=n("showticklabels");if(l){a.noTicklabelshift||n("ticklabelshift"),a.noTicklabelstandoff||n("ticklabelstandoff");var u=a.font||{},c=r.color,f=r.ticklabelposition||"",h=f.indexOf("inside")!==-1?oot(a.bgColor):c&&c!==Zae.color.dflt?c:u.color;if(Lq.coerceFont(n,"tickfont",u,{overrideDflt:{color:h}}),!a.noTicklabelstep&&i!=="multicategory"&&i!=="log"&&n("ticklabelstep"),!a.noAng){var d=n("tickangle");!a.noAutotickangles&&d==="auto"&&n("autotickangles")}if(i!=="category"){var v=n("tickformat");lot(t,r,{name:"tickformatstops",inclusionAttr:"enabled",handleItemDefaults:uot}),r.tickformatstops.length||delete r.tickformatstops,!a.noExp&&!v&&i!=="date"&&(n("showexponent",s),n("exponentformat"),n("minexponent"),n("separatethousands"))}!a.noMinorloglabels&&i==="log"&&n("minorloglabels")}};function uot(e,t){function r(i,a){return Lq.coerce(e,t,Zae.tickformatstops,i,a)}var n=r("enabled");n&&(r("dtickrange"),r("value"))}});var t_=ye((Cnr,Kae)=>{"use strict";var cot=Cq();Kae.exports=function(t,r,n,i,a){a||(a={});var o=a.tickSuffixDflt,s=cot(t),l=n("tickprefix");l&&n("showtickprefix",s);var u=n("ticksuffix",o);u&&n("showticksuffix",s)}});var Pq=ye((Lnr,Jae)=>{"use strict";var r_=Dr(),fot=vl(),hot=bb(),dot=M3(),vot=e_(),pot=t_(),got=tL();Jae.exports=function(t,r,n){var i=fot.newContainer(r,"colorbar"),a=t.colorbar||{};function o(T,z){return r_.coerce(a,i,got,T,z)}var s=n.margin||{t:0,b:0,l:0,r:0},l=n.width-s.l-s.r,u=n.height-s.t-s.b,c=o("orientation"),f=c==="v",h=o("thicknessmode");o("thickness",h==="fraction"?30/(f?l:u):30);var d=o("lenmode");o("len",d==="fraction"?1:f?u:l);var v=o("yref"),_=o("xref"),b=v==="paper",p=_==="paper",k,E,S,L="left";f?(S="middle",L=p?"left":"right",k=p?1.02:1,E=.5):(S=b?"bottom":"top",L="center",k=.5,E=b?1.02:1),r_.coerce(a,i,{x:{valType:"number",min:p?-2:0,max:p?3:1,dflt:k}},"x"),r_.coerce(a,i,{y:{valType:"number",min:b?-2:0,max:b?3:1,dflt:E}},"y"),o("xanchor",L),o("xpad"),o("yanchor",S),o("ypad"),r_.noneOrAll(a,i,["x","y"]),o("outlinecolor"),o("outlinewidth"),o("bordercolor"),o("borderwidth"),o("bgcolor");var x=r_.coerce(a,i,{ticklabelposition:{valType:"enumerated",dflt:"outside",values:f?["outside","inside","outside top","inside top","outside bottom","inside bottom"]:["outside","inside","outside left","inside left","outside right","inside right"]}},"ticklabelposition");o("ticklabeloverflow",x.indexOf("inside")!==-1?"hide past domain":"hide past div"),hot(a,i,o,"linear");var C=n.font,M={noAutotickangles:!0,noTicklabelshift:!0,noTicklabelstandoff:!0,outerTicks:!1,font:C};x.indexOf("inside")!==-1&&(M.bgColor="black"),pot(a,i,o,"linear",M),vot(a,i,o,"linear",M),dot(a,i,o,"linear",M),o("title.text",n._dfltTitle.colorbar);var g=i.showticklabels?i.tickfont:C,P=r_.extendFlat({},C,{family:g.family,size:r_.bigFont(g.size)});r_.coerceFont(o,"title.font",P),o("title.side",f?"top":"right")}});var Qh=ye((Pnr,eoe)=>{"use strict";var $ae=Eo(),Rq=Dr(),mot=Aq(),yot=Pq(),Qae=lb().isValid,_ot=qa().traceIs;function Iq(e,t){var r=t.slice(0,t.length-1);return t?Rq.nestedProperty(e,r).get()||{}:e}eoe.exports=function e(t,r,n,i,a){var o=a.prefix,s=a.cLetter,l="_module"in r,u=Iq(t,o),c=Iq(r,o),f=Iq(r._template||{},o)||{},h=function(){return delete t.coloraxis,delete r.coloraxis,e(t,r,n,i,a)};if(l){var d=n._colorAxes||{},v=i(o+"coloraxis");if(v){var _=_ot(r,"contour")&&Rq.nestedProperty(r,"contours.coloring").get()||"heatmap",b=d[v];b?(b[2].push(h),b[0]!==_&&(b[0]=!1,Rq.warn(["Ignoring coloraxis:",v,"setting","as it is linked to incompatible colorscales."].join(" ")))):d[v]=[_,r,[h]];return}}var p=u[s+"min"],k=u[s+"max"],E=$ae(p)&&$ae(k)&&p<k,S=i(o+s+"auto",!E);S?i(o+s+"mid"):(i(o+s+"min"),i(o+s+"max"));var L=u.colorscale,x=f.colorscale,C;if(L!==void 0&&(C=!Qae(L)),x!==void 0&&(C=!Qae(x)),i(o+"autocolorscale",C),i(o+"colorscale"),i(o+"reversescale"),o!=="marker.line."){var M;o&&l&&(M=mot(u));var g=i(o+"showscale",M);g&&(o&&f&&(c._template=f),yot(u,c,n))}}});var noe=ye((Inr,ioe)=>{"use strict";var toe=Dr(),xot=vl(),roe=Tq(),bot=Qh();ioe.exports=function(t,r){function n(f,h){return toe.coerce(t,r,roe,f,h)}n("colorscale.sequential"),n("colorscale.sequentialminus"),n("colorscale.diverging");var i=r._colorAxes,a,o;function s(f,h){return toe.coerce(a,o,roe.coloraxis,f,h)}for(var l in i){var u=i[l];if(u[0])a=t[l]||{},o=xot.newContainer(r,l,"coloraxis"),o._name=l,bot(a,o,r,s,{prefix:"",cLetter:"c"});else{for(var c=0;c<u[2].length;c++)u[2][c]();delete r._colorAxes[l]}}}});var ooe=ye((Rnr,aoe)=>{"use strict";var wot=Dr(),Tot=pv().hasColorscale,Aot=pv().extractOpts;aoe.exports=function(t,r){function n(c,f){var h=c["_"+f];h!==void 0&&(c[f]=h)}function i(c,f){var h=f.container?wot.nestedProperty(c,f.container).get():c;if(h)if(h.coloraxis)h._colorAx=r[h.coloraxis];else{var d=Aot(h),v=d.auto;(v||d.min===void 0)&&n(h,f.min),(v||d.max===void 0)&&n(h,f.max),d.autocolorscale&&n(h,"colorscale")}}for(var a=0;a<t.length;a++){var o=t[a],s=o._module.colorbar;if(s)if(Array.isArray(s))for(var l=0;l<s.length;l++)i(o,s[l]);else i(o,s);Tot(o,"marker.line")&&i(o,{container:"marker.line",min:"cmin",max:"cmax"})}for(var u in r._colorAxes)i(r[u],{min:"cmin",max:"cmax"})}});var gv=ye((Dnr,loe)=>{"use strict";var soe=Eo(),Dq=Dr(),Sot=pv().extractOpts;loe.exports=function(t,r,n){var i=t._fullLayout,a=n.vals,o=n.containerStr,s=o?Dq.nestedProperty(r,o).get():r,l=Sot(s),u=l.auto!==!1,c=l.min,f=l.max,h=l.mid,d=function(){return Dq.aggNums(Math.min,null,a)},v=function(){return Dq.aggNums(Math.max,null,a)};if(c===void 0?c=d():u&&(s._colorAx&&soe(c)?c=Math.min(c,d()):c=d()),f===void 0?f=v():u&&(s._colorAx&&soe(f)?f=Math.max(f,v()):f=v()),u&&h!==void 0&&(f-h>h-c?c=h-(f-h):f-h<h-c&&(f=h+(h-c))),c===f&&(c-=.5,f+=.5),l._sync("min",c),l._sync("max",f),l.autocolorscale){var _;c*f<0?_=i.colorscale.diverging:c>=0?_=i.colorscale.sequential:_=i.colorscale.sequentialminus,l._sync("colorscale",_)}}});var tc=ye((Fnr,uoe)=>{"use strict";var nL=lb(),E3=pv();uoe.exports={moduleType:"component",name:"colorscale",attributes:Tu(),layoutAttributes:Tq(),supplyLayoutDefaults:noe(),handleDefaults:Qh(),crossTraceDefaults:ooe(),calc:gv(),scales:nL.scales,defaultScale:nL.defaultScale,getScale:nL.get,isValidScale:nL.isValid,hasColorscale:E3.hasColorscale,extractOpts:E3.extractOpts,extractScale:E3.extractScale,flipScale:E3.flipScale,makeColorScaleFunc:E3.makeColorScaleFunc,makeColorScaleFuncFromTrace:E3.makeColorScaleFuncFromTrace}});var Ru=ye((znr,foe)=>{"use strict";var coe=Dr(),Mot=vv().isTypedArraySpec;foe.exports={hasLines:function(e){return e.visible&&e.mode&&e.mode.indexOf("lines")!==-1},hasMarkers:function(e){return e.visible&&(e.mode&&e.mode.indexOf("markers")!==-1||e.type==="splom")},hasText:function(e){return e.visible&&e.mode&&e.mode.indexOf("text")!==-1},isBubble:function(e){var t=e.marker;return coe.isPlainObject(t)&&(coe.isArrayOrTypedArray(t.size)||Mot(t.size))}}});var k3=ye((Onr,hoe)=>{"use strict";var Eot=Eo();hoe.exports=function(t,r){r||(r=2);var n=t.marker,i=n.sizeref||1,a=n.sizemin||0,o=n.sizemode==="area"?function(s){return Math.sqrt(s/i)}:function(s){return s/i};return function(s){var l=o(s/r);return Eot(l)&&l>0?Math.max(l,a):0}}});var ip=ye(mv=>{"use strict";var aL=Dr();mv.getSubplot=function(e){return e.subplot||e.xaxis+e.yaxis||e.geo};mv.isTraceInSubplots=function(e,t){if(e.type==="splom"){for(var r=e.xaxes||[],n=e.yaxes||[],i=0;i<r.length;i++)for(var a=0;a<n.length;a++)if(t.indexOf(r[i]+n[a])!==-1)return!0;return!1}return t.indexOf(mv.getSubplot(e))!==-1};mv.flat=function(e,t){for(var r=new Array(e.length),n=0;n<e.length;n++)r[n]=t;return r};mv.p2c=function(e,t){for(var r=new Array(e.length),n=0;n<e.length;n++)r[n]=e[n].p2c(t);return r};mv.getDistanceFunction=function(e,t,r,n){return e==="closest"?n||mv.quadrature(t,r):e.charAt(0)==="x"?t:r};mv.getClosest=function(e,t,r){if(r.index!==!1)r.index>=0&&r.index<e.length?r.distance=0:r.index=!1;else for(var n=1/0,i=e.length,a=0;a<i;a++)n=t(e[a]),n<=r.distance&&(r.index=a,r.distance=n);return r};mv.inbox=function(e,t,r){return e*t<0||e===0?r:1/0};mv.quadrature=function(e,t){return function(r){var n=e(r),i=t(r);return Math.sqrt(n*n+i*i)}};mv.makeEventData=function(e,t,r){var n="index"in e?e.index:e.pointNumber,i={data:t._input,fullData:t,curveNumber:t.index,pointNumber:n};if(t._indexToPoints){var a=t._indexToPoints[n];a.length===1?i.pointIndex=a[0]:i.pointIndices=a}else i.pointIndex=n;return t._module.eventData?i=t._module.eventData(i,e,t,r,n):("xVal"in e?i.x=e.xVal:"x"in e&&(i.x=e.x),"yVal"in e?i.y=e.yVal:"y"in e&&(i.y=e.y),e.xa&&(i.xaxis=e.xa),e.ya&&(i.yaxis=e.ya),e.zLabelVal!==void 0&&(i.z=e.zLabelVal)),mv.appendArrayPointValue(i,t,n),i};mv.appendArrayPointValue=function(e,t,r){var n=t._arrayAttrs;if(n)for(var i=0;i<n.length;i++){var a=n[i],o=doe(a);if(e[o]===void 0){var s=aL.nestedProperty(t,a).get(),l=voe(s,r);l!==void 0&&(e[o]=l)}}};mv.appendArrayMultiPointValues=function(e,t,r){var n=t._arrayAttrs;if(n)for(var i=0;i<n.length;i++){var a=n[i],o=doe(a);if(e[o]===void 0){for(var s=aL.nestedProperty(t,a).get(),l=new Array(r.length),u=0;u<r.length;u++)l[u]=voe(s,r[u]);e[o]=l}}};var kot={ids:"id",locations:"location",labels:"label",values:"value","marker.colors":"color",parents:"parent"};function doe(e){return kot[e]||e}function voe(e,t){if(Array.isArray(t)){if(aL.isArrayOrTypedArray(e)&&aL.isArrayOrTypedArray(e[t[0]]))return e[t[0]][t[1]]}else return e[t]}var Cot={x:!0,y:!0},Lot={"x unified":!0,"y unified":!0};mv.isUnifiedHover=function(e){return typeof e!="string"?!1:!!Lot[e]};mv.isXYhover=function(e){return typeof e!="string"?!1:!!Cot[e]}});var eM=ye((Bnr,poe)=>{poe.exports=Iot;var Fq={a:7,c:6,h:1,l:2,m:2,q:4,s:4,t:2,v:1,z:0},Pot=/([astvzqmhlc])([^astvzqmhlc]*)/ig;function Iot(e){var t=[];return e.replace(Pot,function(r,n,i){var a=n.toLowerCase();for(i=Dot(i),a=="m"&&i.length>2&&(t.push([n].concat(i.splice(0,2))),a="l",n=n=="m"?"l":"L");;){if(i.length==Fq[a])return i.unshift(n),t.push(i);if(i.length<Fq[a])throw new Error("malformed path data");t.push([n].concat(i.splice(0,Fq[a])))}}),t}var Rot=/-?[0-9]*\.?[0-9]+(?:e[-+]?\d+)?/ig;function Dot(e){var t=e.match(Rot);return t?t.map(Number):[]}});var woe=ye((Nnr,boe)=>{"use strict";var Fot=eM(),ca=function(e,t){return t?Math.round(e*(t=Math.pow(10,t)))/t:Math.round(e)},hs="M0,0Z",goe=Math.sqrt(2),i_=Math.sqrt(3),zq=Math.PI,Oq=Math.cos,qq=Math.sin;boe.exports={circle:{n:0,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i="M"+n+",0A"+n+","+n+" 0 1,1 0,-"+n+"A"+n+","+n+" 0 0,1 "+n+",0Z";return r?vs(t,r,i):i}},square:{n:1,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"H-"+n+"V-"+n+"H"+n+"Z")}},diamond:{n:2,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.3,2);return vs(t,r,"M"+n+",0L0,"+n+"L-"+n+",0L0,-"+n+"Z")}},cross:{n:3,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.4,2),i=ca(e*1.2,2);return vs(t,r,"M"+i+","+n+"H"+n+"V"+i+"H-"+n+"V"+n+"H-"+i+"V-"+n+"H-"+n+"V-"+i+"H"+n+"V-"+n+"H"+i+"Z")}},x:{n:4,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.8/goe,2),i="l"+n+","+n,a="l"+n+",-"+n,o="l-"+n+",-"+n,s="l-"+n+","+n;return vs(t,r,"M0,"+n+i+a+o+a+o+s+o+s+i+s+i+"Z")}},"triangle-up":{n:5,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2/i_,2),i=ca(e/2,2),a=ca(e,2);return vs(t,r,"M-"+n+","+i+"H"+n+"L0,-"+a+"Z")}},"triangle-down":{n:6,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2/i_,2),i=ca(e/2,2),a=ca(e,2);return vs(t,r,"M-"+n+",-"+i+"H"+n+"L0,"+a+"Z")}},"triangle-left":{n:7,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2/i_,2),i=ca(e/2,2),a=ca(e,2);return vs(t,r,"M"+i+",-"+n+"V"+n+"L-"+a+",0Z")}},"triangle-right":{n:8,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2/i_,2),i=ca(e/2,2),a=ca(e,2);return vs(t,r,"M-"+i+",-"+n+"V"+n+"L"+a+",0Z")}},"triangle-ne":{n:9,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.6,2),i=ca(e*1.2,2);return vs(t,r,"M-"+i+",-"+n+"H"+n+"V"+i+"Z")}},"triangle-se":{n:10,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.6,2),i=ca(e*1.2,2);return vs(t,r,"M"+n+",-"+i+"V"+n+"H-"+i+"Z")}},"triangle-sw":{n:11,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.6,2),i=ca(e*1.2,2);return vs(t,r,"M"+i+","+n+"H-"+n+"V-"+i+"Z")}},"triangle-nw":{n:12,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.6,2),i=ca(e*1.2,2);return vs(t,r,"M-"+n+","+i+"V-"+n+"H"+i+"Z")}},pentagon:{n:13,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.951,2),i=ca(e*.588,2),a=ca(-e,2),o=ca(e*-.309,2),s=ca(e*.809,2);return vs(t,r,"M"+n+","+o+"L"+i+","+s+"H-"+i+"L-"+n+","+o+"L0,"+a+"Z")}},hexagon:{n:14,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e/2,2),a=ca(e*i_/2,2);return vs(t,r,"M"+a+",-"+i+"V"+i+"L0,"+n+"L-"+a+","+i+"V-"+i+"L0,-"+n+"Z")}},hexagon2:{n:15,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e/2,2),a=ca(e*i_/2,2);return vs(t,r,"M-"+i+","+a+"H"+i+"L"+n+",0L"+i+",-"+a+"H-"+i+"L-"+n+",0Z")}},octagon:{n:16,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.924,2),i=ca(e*.383,2);return vs(t,r,"M-"+i+",-"+n+"H"+i+"L"+n+",-"+i+"V"+i+"L"+i+","+n+"H-"+i+"L-"+n+","+i+"V-"+i+"Z")}},star:{n:17,f:function(e,t,r){if(ds(t))return hs;var n=e*1.4,i=ca(n*.225,2),a=ca(n*.951,2),o=ca(n*.363,2),s=ca(n*.588,2),l=ca(-n,2),u=ca(n*-.309,2),c=ca(n*.118,2),f=ca(n*.809,2),h=ca(n*.382,2);return vs(t,r,"M"+i+","+u+"H"+a+"L"+o+","+c+"L"+s+","+f+"L0,"+h+"L-"+s+","+f+"L-"+o+","+c+"L-"+a+","+u+"H-"+i+"L0,"+l+"Z")}},hexagram:{n:18,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.66,2),i=ca(e*.38,2),a=ca(e*.76,2);return vs(t,r,"M-"+a+",0l-"+i+",-"+n+"h"+a+"l"+i+",-"+n+"l"+i+","+n+"h"+a+"l-"+i+","+n+"l"+i+","+n+"h-"+a+"l-"+i+","+n+"l-"+i+",-"+n+"h-"+a+"Z")}},"star-triangle-up":{n:19,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*i_*.8,2),i=ca(e*.8,2),a=ca(e*1.6,2),o=ca(e*4,2),s="A "+o+","+o+" 0 0 1 ";return vs(t,r,"M-"+n+","+i+s+n+","+i+s+"0,-"+a+s+"-"+n+","+i+"Z")}},"star-triangle-down":{n:20,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*i_*.8,2),i=ca(e*.8,2),a=ca(e*1.6,2),o=ca(e*4,2),s="A "+o+","+o+" 0 0 1 ";return vs(t,r,"M"+n+",-"+i+s+"-"+n+",-"+i+s+"0,"+a+s+n+",-"+i+"Z")}},"star-square":{n:21,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.1,2),i=ca(e*2,2),a="A "+i+","+i+" 0 0 1 ";return vs(t,r,"M-"+n+",-"+n+a+"-"+n+","+n+a+n+","+n+a+n+",-"+n+a+"-"+n+",-"+n+"Z")}},"star-diamond":{n:22,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.4,2),i=ca(e*1.9,2),a="A "+i+","+i+" 0 0 1 ";return vs(t,r,"M-"+n+",0"+a+"0,"+n+a+n+",0"+a+"0,-"+n+a+"-"+n+",0Z")}},"diamond-tall":{n:23,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*.7,2),i=ca(e*1.4,2);return vs(t,r,"M0,"+i+"L"+n+",0L0,-"+i+"L-"+n+",0Z")}},"diamond-wide":{n:24,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.4,2),i=ca(e*.7,2);return vs(t,r,"M0,"+i+"L"+n+",0L0,-"+i+"L-"+n+",0Z")}},hourglass:{n:25,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"H-"+n+"L"+n+",-"+n+"H-"+n+"Z")},noDot:!0},bowtie:{n:26,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"V-"+n+"L-"+n+","+n+"V-"+n+"Z")},noDot:!0},"circle-cross":{n:27,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M0,"+n+"V-"+n+"M"+n+",0H-"+n+"M"+n+",0A"+n+","+n+" 0 1,1 0,-"+n+"A"+n+","+n+" 0 0,1 "+n+",0Z")},needLine:!0,noDot:!0},"circle-x":{n:28,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e/goe,2);return vs(t,r,"M"+i+","+i+"L-"+i+",-"+i+"M"+i+",-"+i+"L-"+i+","+i+"M"+n+",0A"+n+","+n+" 0 1,1 0,-"+n+"A"+n+","+n+" 0 0,1 "+n+",0Z")},needLine:!0,noDot:!0},"square-cross":{n:29,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M0,"+n+"V-"+n+"M"+n+",0H-"+n+"M"+n+","+n+"H-"+n+"V-"+n+"H"+n+"Z")},needLine:!0,noDot:!0},"square-x":{n:30,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"L-"+n+",-"+n+"M"+n+",-"+n+"L-"+n+","+n+"M"+n+","+n+"H-"+n+"V-"+n+"H"+n+"Z")},needLine:!0,noDot:!0},"diamond-cross":{n:31,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.3,2);return vs(t,r,"M"+n+",0L0,"+n+"L-"+n+",0L0,-"+n+"ZM0,-"+n+"V"+n+"M-"+n+",0H"+n)},needLine:!0,noDot:!0},"diamond-x":{n:32,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.3,2),i=ca(e*.65,2);return vs(t,r,"M"+n+",0L0,"+n+"L-"+n+",0L0,-"+n+"ZM-"+i+",-"+i+"L"+i+","+i+"M-"+i+","+i+"L"+i+",-"+i)},needLine:!0,noDot:!0},"cross-thin":{n:33,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.4,2);return vs(t,r,"M0,"+n+"V-"+n+"M"+n+",0H-"+n)},needLine:!0,noDot:!0,noFill:!0},"x-thin":{n:34,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"L-"+n+",-"+n+"M"+n+",-"+n+"L-"+n+","+n)},needLine:!0,noDot:!0,noFill:!0},asterisk:{n:35,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.2,2),i=ca(e*.85,2);return vs(t,r,"M0,"+n+"V-"+n+"M"+n+",0H-"+n+"M"+i+","+i+"L-"+i+",-"+i+"M"+i+",-"+i+"L-"+i+","+i)},needLine:!0,noDot:!0,noFill:!0},hash:{n:36,f:function(e,t,r){if(ds(t))return hs;var n=ca(e/2,2),i=ca(e,2);return vs(t,r,"M"+n+","+i+"V-"+i+"M"+(n-i)+",-"+i+"V"+i+"M"+i+","+n+"H-"+i+"M-"+i+","+(n-i)+"H"+i)},needLine:!0,noFill:!0},"y-up":{n:37,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.2,2),i=ca(e*1.6,2),a=ca(e*.8,2);return vs(t,r,"M-"+n+","+a+"L0,0M"+n+","+a+"L0,0M0,-"+i+"L0,0")},needLine:!0,noDot:!0,noFill:!0},"y-down":{n:38,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.2,2),i=ca(e*1.6,2),a=ca(e*.8,2);return vs(t,r,"M-"+n+",-"+a+"L0,0M"+n+",-"+a+"L0,0M0,"+i+"L0,0")},needLine:!0,noDot:!0,noFill:!0},"y-left":{n:39,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.2,2),i=ca(e*1.6,2),a=ca(e*.8,2);return vs(t,r,"M"+a+","+n+"L0,0M"+a+",-"+n+"L0,0M-"+i+",0L0,0")},needLine:!0,noDot:!0,noFill:!0},"y-right":{n:40,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.2,2),i=ca(e*1.6,2),a=ca(e*.8,2);return vs(t,r,"M-"+a+","+n+"L0,0M-"+a+",-"+n+"L0,0M"+i+",0L0,0")},needLine:!0,noDot:!0,noFill:!0},"line-ew":{n:41,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.4,2);return vs(t,r,"M"+n+",0H-"+n)},needLine:!0,noDot:!0,noFill:!0},"line-ns":{n:42,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*1.4,2);return vs(t,r,"M0,"+n+"V-"+n)},needLine:!0,noDot:!0,noFill:!0},"line-ne":{n:43,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+",-"+n+"L-"+n+","+n)},needLine:!0,noDot:!0,noFill:!0},"line-nw":{n:44,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2);return vs(t,r,"M"+n+","+n+"L-"+n+",-"+n)},needLine:!0,noDot:!0,noFill:!0},"arrow-up":{n:45,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e*2,2);return vs(t,r,"M0,0L-"+n+","+i+"H"+n+"Z")},backoff:1,noDot:!0},"arrow-down":{n:46,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e*2,2);return vs(t,r,"M0,0L-"+n+",-"+i+"H"+n+"Z")},noDot:!0},"arrow-left":{n:47,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2,2),i=ca(e,2);return vs(t,r,"M0,0L"+n+",-"+i+"V"+i+"Z")},noDot:!0},"arrow-right":{n:48,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2,2),i=ca(e,2);return vs(t,r,"M0,0L-"+n+",-"+i+"V"+i+"Z")},noDot:!0},"arrow-bar-up":{n:49,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e*2,2);return vs(t,r,"M-"+n+",0H"+n+"M0,0L-"+n+","+i+"H"+n+"Z")},backoff:1,needLine:!0,noDot:!0},"arrow-bar-down":{n:50,f:function(e,t,r){if(ds(t))return hs;var n=ca(e,2),i=ca(e*2,2);return vs(t,r,"M-"+n+",0H"+n+"M0,0L-"+n+",-"+i+"H"+n+"Z")},needLine:!0,noDot:!0},"arrow-bar-left":{n:51,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2,2),i=ca(e,2);return vs(t,r,"M0,-"+i+"V"+i+"M0,0L"+n+",-"+i+"V"+i+"Z")},needLine:!0,noDot:!0},"arrow-bar-right":{n:52,f:function(e,t,r){if(ds(t))return hs;var n=ca(e*2,2),i=ca(e,2);return vs(t,r,"M0,-"+i+"V"+i+"M0,0L-"+n+",-"+i+"V"+i+"Z")},needLine:!0,noDot:!0},arrow:{n:53,f:function(e,t,r){if(ds(t))return hs;var n=zq/2.5,i=2*e*Oq(n),a=2*e*qq(n);return vs(t,r,"M0,0L"+-i+","+a+"L"+i+","+a+"Z")},backoff:.9,noDot:!0},"arrow-wide":{n:54,f:function(e,t,r){if(ds(t))return hs;var n=zq/4,i=2*e*Oq(n),a=2*e*qq(n);return vs(t,r,"M0,0L"+-i+","+a+"A "+2*e+","+2*e+" 0 0 1 "+i+","+a+"Z")},backoff:.4,noDot:!0}};function ds(e){return e===null}var moe,yoe,_oe,xoe;function vs(e,t,r){if((!e||e%360===0)&&!t)return r;if(_oe===e&&xoe===t&&moe===r)return yoe;_oe=e,xoe=t,moe=r;function n(b,p){var k=Oq(b),E=qq(b),S=p[0],L=p[1]+(t||0);return[S*k-L*E,S*E+L*k]}for(var i=e/180*zq,a=0,o=0,s=Fot(r),l="",u=0;u<s.length;u++){var c=s[u],f=c[0],h=a,d=o;if(f==="M"||f==="L")a=+c[1],o=+c[2];else if(f==="m"||f==="l")a+=+c[1],o+=+c[2];else if(f==="H")a=+c[1];else if(f==="h")a+=+c[1];else if(f==="V")o=+c[1];else if(f==="v")o+=+c[1];else if(f==="A"){a=+c[1],o=+c[2];var v=n(i,[+c[6],+c[7]]);c[6]=v[0],c[7]=v[1],c[3]=+c[3]+e}(f==="H"||f==="V")&&(f="L"),(f==="h"||f==="v")&&(f="l"),(f==="m"||f==="l")&&(a-=h,o-=d);var _=n(i,[a,o]);(f==="H"||f==="V")&&(f="L"),(f==="M"||f==="L"||f==="m"||f==="l")&&(c[1]=_[0],c[2]=_[1]),c[0]=f,l+=c[0]+c.slice(1).join(",")}return yoe=l,l}});var So=ye((Unr,Voe)=>{"use strict";var dd=Oa(),Du=Dr(),zot=Du.numberFormat,Sb=Eo(),Hq=cd(),sL=qa(),Kd=ka(),Oot=tc(),rM=Du.strTranslate,lL=ru(),qot=Wp(),Bot=$h(),Not=Bot.LINE_SPACING,Ioe=N1().DESELECTDIM,Uot=Ru(),Vot=k3(),Got=ip().appendArrayPointValue,Aa=Voe.exports={};Aa.font=function(e,t){var r=t.variant,n=t.style,i=t.weight,a=t.color,o=t.size,s=t.family,l=t.shadow,u=t.lineposition,c=t.textcase;s&&e.style("font-family",s),o+1&&e.style("font-size",o+"px"),a&&e.call(Kd.fill,a),i&&e.style("font-weight",i),n&&e.style("font-style",n),r&&e.style("font-variant",r),c&&e.style("text-transform",Bq(jot(c))),l&&e.style("text-shadow",l==="auto"?lL.makeTextShadow(Kd.contrast(a)):Bq(l)),u&&e.style("text-decoration-line",Bq(Wot(u)))};function Bq(e){return e==="none"?void 0:e}var Hot={normal:"none",lower:"lowercase",upper:"uppercase","word caps":"capitalize"};function jot(e){return Hot[e]}function Wot(e){return e.replace("under","underline").replace("over","overline").replace("through","line-through").split("+").join(" ")}Aa.setPosition=function(e,t,r){e.attr("x",t).attr("y",r)};Aa.setSize=function(e,t,r){e.attr("width",t).attr("height",r)};Aa.setRect=function(e,t,r,n,i){e.call(Aa.setPosition,t,r).call(Aa.setSize,n,i)};Aa.translatePoint=function(e,t,r,n){var i=r.c2p(e.x),a=n.c2p(e.y);if(Sb(i)&&Sb(a)&&t.node())t.node().nodeName==="text"?t.attr("x",i).attr("y",a):t.attr("transform",rM(i,a));else return!1;return!0};Aa.translatePoints=function(e,t,r){e.each(function(n){var i=dd.select(this);Aa.translatePoint(n,i,t,r)})};Aa.hideOutsideRangePoint=function(e,t,r,n,i,a){t.attr("display",r.isPtWithinRange(e,i)&&n.isPtWithinRange(e,a)?null:"none")};Aa.hideOutsideRangePoints=function(e,t){if(t._hasClipOnAxisFalse){var r=t.xaxis,n=t.yaxis;e.each(function(i){var a=i[0].trace,o=a.xcalendar,s=a.ycalendar,l=sL.traceIs(a,"bar-like")?".bartext":".point,.textpoint";e.selectAll(l).each(function(u){Aa.hideOutsideRangePoint(u,dd.select(this),r,n,o,s)})})}};Aa.crispRound=function(e,t,r){return!t||!Sb(t)?r||0:e._context.staticPlot?t:t<1?1:Math.round(t)};Aa.singleLineStyle=function(e,t,r,n,i){t.style("fill","none");var a=(((e||[])[0]||{}).trace||{}).line||{},o=r||a.width||0,s=i||a.dash||"";Kd.stroke(t,n||a.color),Aa.dashLine(t,s,o)};Aa.lineGroupStyle=function(e,t,r,n){e.style("fill","none").each(function(i){var a=(((i||[])[0]||{}).trace||{}).line||{},o=t||a.width||0,s=n||a.dash||"";dd.select(this).call(Kd.stroke,r||a.color).call(Aa.dashLine,s,o)})};Aa.dashLine=function(e,t,r){r=+r||0,t=Aa.dashStyle(t,r),e.style({"stroke-dasharray":t,"stroke-width":r+"px"})};Aa.dashStyle=function(e,t){t=+t||1;var r=Math.max(t,3);return e==="solid"?e="":e==="dot"?e=r+"px,"+r+"px":e==="dash"?e=3*r+"px,"+3*r+"px":e==="longdash"?e=5*r+"px,"+5*r+"px":e==="dashdot"?e=3*r+"px,"+r+"px,"+r+"px,"+r+"px":e==="longdashdot"&&(e=5*r+"px,"+2*r+"px,"+r+"px,"+2*r+"px"),e};function Roe(e,t,r,n){var i=t.fillpattern,a=t.fillgradient,o=Aa.getPatternAttr,s=i&&(o(i.shape,0,"")||o(i.path,0,""));if(s){var l=o(i.bgcolor,0,null),u=o(i.fgcolor,0,null),c=i.fgopacity,f=o(i.size,0,8),h=o(i.solidity,0,.3),d=t.uid;Aa.pattern(e,"point",r,d,s,f,h,void 0,i.fillmode,l,u,c)}else if(a&&a.type!=="none"){var v=a.type,_="scatterfill-"+t.uid;if(n&&(_="legendfill-"+t.uid),!n&&(a.start!==void 0||a.stop!==void 0)){var b,p;v==="horizontal"?(b={x:a.start,y:0},p={x:a.stop,y:0}):v==="vertical"&&(b={x:0,y:a.start},p={x:0,y:a.stop}),b.x=t._xA.c2p(b.x===void 0?t._extremes.x.min[0].val:b.x,!0),b.y=t._yA.c2p(b.y===void 0?t._extremes.y.min[0].val:b.y,!0),p.x=t._xA.c2p(p.x===void 0?t._extremes.x.max[0].val:p.x,!0),p.y=t._yA.c2p(p.y===void 0?t._extremes.y.max[0].val:p.y,!0),e.call(zoe,r,_,"linear",a.colorscale,"fill",b,p,!0,!1)}else v==="horizontal"&&(v=v+"reversed"),e.call(Aa.gradient,r,_,v,a.colorscale,"fill")}else t.fillcolor&&e.call(Kd.fill,t.fillcolor)}Aa.singleFillStyle=function(e,t){var r=dd.select(e.node()),n=r.data(),i=((n[0]||[])[0]||{}).trace||{};Roe(e,i,t,!1)};Aa.fillGroupStyle=function(e,t,r){e.style("stroke-width",0).each(function(n){var i=dd.select(this);n[0].trace&&Roe(i,n[0].trace,t,r)})};var Toe=woe();Aa.symbolNames=[];Aa.symbolFuncs=[];Aa.symbolBackOffs=[];Aa.symbolNeedLines={};Aa.symbolNoDot={};Aa.symbolNoFill={};Aa.symbolList=[];Object.keys(Toe).forEach(function(e){var t=Toe[e],r=t.n;Aa.symbolList.push(r,String(r),e,r+100,String(r+100),e+"-open"),Aa.symbolNames[r]=e,Aa.symbolFuncs[r]=t.f,Aa.symbolBackOffs[r]=t.backoff||0,t.needLine&&(Aa.symbolNeedLines[r]=!0),t.noDot?Aa.symbolNoDot[r]=!0:Aa.symbolList.push(r+200,String(r+200),e+"-dot",r+300,String(r+300),e+"-open-dot"),t.noFill&&(Aa.symbolNoFill[r]=!0)});var Xot=Aa.symbolNames.length,Zot="M0,0.5L0.5,0L0,-0.5L-0.5,0Z";Aa.symbolNumber=function(e){if(Sb(e))e=+e;else if(typeof e=="string"){var t=0;e.indexOf("-open")>0&&(t=100,e=e.replace("-open","")),e.indexOf("-dot")>0&&(t+=200,e=e.replace("-dot","")),e=Aa.symbolNames.indexOf(e),e>=0&&(e+=t)}return e%100>=Xot||e>=400?0:Math.floor(Math.max(e,0))};function Doe(e,t,r,n){var i=e%100;return Aa.symbolFuncs[i](t,r,n)+(e>=200?Zot:"")}var Aoe=zot("~f"),Foe={radial:{type:"radial"},radialreversed:{type:"radial",reversed:!0},horizontal:{type:"linear",start:{x:1,y:0},stop:{x:0,y:0}},horizontalreversed:{type:"linear",start:{x:1,y:0},stop:{x:0,y:0},reversed:!0},vertical:{type:"linear",start:{x:0,y:1},stop:{x:0,y:0}},verticalreversed:{type:"linear",start:{x:0,y:1},stop:{x:0,y:0},reversed:!0}};Aa.gradient=function(e,t,r,n,i,a){var o=Foe[n];return zoe(e,t,r,o.type,i,a,o.start,o.stop,!1,o.reversed)};function zoe(e,t,r,n,i,a,o,s,l,u){var c=i.length,f;n==="linear"?f={node:"linearGradient",attrs:{x1:o.x,y1:o.y,x2:s.x,y2:s.y,gradientUnits:l?"userSpaceOnUse":"objectBoundingBox"},reversed:u}:n==="radial"&&(f={node:"radialGradient",reversed:u});for(var h=new Array(c),d=0;d<c;d++)f.reversed?h[c-1-d]=[Aoe((1-i[d][0])*100),i[d][1]]:h[d]=[Aoe(i[d][0]*100),i[d][1]];var v=t._fullLayout,_="g"+v._uid+"-"+r,b=v._defs.select(".gradients").selectAll("#"+_).data([n+h.join(";")],Du.identity);b.exit().remove(),b.enter().append(f.node).each(function(){var p=dd.select(this);f.attrs&&p.attr(f.attrs),p.attr("id",_);var k=p.selectAll("stop").data(h);k.exit().remove(),k.enter().append("stop"),k.each(function(E){var S=Hq(E[1]);dd.select(this).attr({offset:E[0]+"%","stop-color":Kd.tinyRGB(S),"stop-opacity":S.getAlpha()})})}),e.style(a,jq(_,t)).style(a+"-opacity",null),e.classed("gradient_filled",!0)}Aa.pattern=function(e,t,r,n,i,a,o,s,l,u,c,f){var h=t==="legend";s&&(l==="overlay"?(u=s,c=Kd.contrast(u)):(u=void 0,c=s));var d=r._fullLayout,v="p"+d._uid+"-"+n,_,b,p=function(O,V,G,Z,j){return Z+(j-Z)*(O-V)/(G-V)},k,E,S,L,x={},C=Hq(c),M=Kd.tinyRGB(C),g=C.getAlpha(),P=f*g;switch(i){case"/":_=a*Math.sqrt(2),b=a*Math.sqrt(2),k="M-"+_/4+","+b/4+"l"+_/2+",-"+b/2+"M0,"+b+"L"+_+",0M"+_/4*3+","+b/4*5+"l"+_/2+",-"+b/2,E=o*a,L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case"\\":_=a*Math.sqrt(2),b=a*Math.sqrt(2),k="M"+_/4*3+",-"+b/4+"l"+_/2+","+b/2+"M0,0L"+_+","+b+"M-"+_/4+","+b/4*3+"l"+_/2+","+b/2,E=o*a,L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case"x":_=a*Math.sqrt(2),b=a*Math.sqrt(2),k="M-"+_/4+","+b/4+"l"+_/2+",-"+b/2+"M0,"+b+"L"+_+",0M"+_/4*3+","+b/4*5+"l"+_/2+",-"+b/2+"M"+_/4*3+",-"+b/4+"l"+_/2+","+b/2+"M0,0L"+_+","+b+"M-"+_/4+","+b/4*3+"l"+_/2+","+b/2,E=a-a*Math.sqrt(1-o),L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case"|":_=a,b=a,L="path",k="M"+_/2+",0L"+_/2+","+b,E=o*a,L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case"-":_=a,b=a,L="path",k="M0,"+b/2+"L"+_+","+b/2,E=o*a,L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case"+":_=a,b=a,L="path",k="M"+_/2+",0L"+_/2+","+b+"M0,"+b/2+"L"+_+","+b/2,E=a-a*Math.sqrt(1-o),L="path",x={d:k,opacity:P,stroke:M,"stroke-width":E+"px"};break;case".":_=a,b=a,o<Math.PI/4?S=Math.sqrt(o*a*a/Math.PI):S=p(o,Math.PI/4,1,a/2,a/Math.sqrt(2)),L="circle",x={cx:_/2,cy:b/2,r:S,opacity:P,fill:M};break;default:_=a,b=a,L="path",x={d:i,opacity:P,fill:M};break}var T=[i||"noSh",u||"noBg",c||"noFg",a,o].join(";"),z=d._defs.select(".patterns").selectAll("#"+v).data([T],Du.identity);z.exit().remove(),z.enter().append("pattern").each(function(){var O=dd.select(this);if(O.attr({id:v,width:_+"px",height:b+"px",patternUnits:"userSpaceOnUse",patternTransform:h?"scale(0.8)":""}),u){var V=Hq(u),G=Kd.tinyRGB(V),Z=V.getAlpha(),j=O.selectAll("rect").data([0]);j.exit().remove(),j.enter().append("rect").attr({width:_+"px",height:b+"px",fill:G,"fill-opacity":Z})}var N=O.selectAll(L).data([0]);N.exit().remove(),N.enter().append(L).attr(x)}),e.style("fill",jq(v,r)).style("fill-opacity",null),e.classed("pattern_filled",!0)};Aa.initGradients=function(e){var t=e._fullLayout,r=Du.ensureSingle(t._defs,"g","gradients");r.selectAll("linearGradient,radialGradient").remove(),dd.select(e).selectAll(".gradient_filled").classed("gradient_filled",!1)};Aa.initPatterns=function(e){var t=e._fullLayout,r=Du.ensureSingle(t._defs,"g","patterns");r.selectAll("pattern").remove(),dd.select(e).selectAll(".pattern_filled").classed("pattern_filled",!1)};Aa.getPatternAttr=function(e,t,r){return e&&Du.isArrayOrTypedArray(e)?t<e.length?e[t]:r:e};Aa.pointStyle=function(e,t,r,n){if(e.size()){var i=Aa.makePointStyleFns(t);e.each(function(a){Aa.singlePointStyle(a,dd.select(this),t,i,r,n)})}};Aa.singlePointStyle=function(e,t,r,n,i,a){var o=r.marker,s=o.line;if(a&&a.i>=0&&e.i===void 0&&(e.i=a.i),t.style("opacity",n.selectedOpacityFn?n.selectedOpacityFn(e):e.mo===void 0?o.opacity:e.mo),n.ms2mrc){var l;e.ms==="various"||o.size==="various"?l=3:l=n.ms2mrc(e.ms),e.mrc=l,n.selectedSizeFn&&(l=e.mrc=n.selectedSizeFn(e));var u=Aa.symbolNumber(e.mx||o.symbol)||0;e.om=u%200>=100;var c=Xq(e,r),f=Wq(e,r);t.attr("d",Doe(u,l,c,f))}var h=!1,d,v,_;if(e.so)_=s.outlierwidth,v=s.outliercolor,d=o.outliercolor;else{var b=(s||{}).width;_=(e.mlw+1||b+1||(e.trace?(e.trace.marker.line||{}).width:0)+1)-1||0,"mlc"in e?v=e.mlcc=n.lineScale(e.mlc):Du.isArrayOrTypedArray(s.color)?v=Kd.defaultLine:v=s.color,Du.isArrayOrTypedArray(o.color)&&(d=Kd.defaultLine,h=!0),"mc"in e?d=e.mcc=n.markerScale(e.mc):d=o.color||o.colors||"rgba(0,0,0,0)",n.selectedColorFn&&(d=n.selectedColorFn(e))}if(e.om)t.call(Kd.stroke,d).style({"stroke-width":(_||1)+"px",fill:"none"});else{t.style("stroke-width",(e.isBlank?0:_)+"px");var p=o.gradient,k=e.mgt;k?h=!0:k=p&&p.type,Du.isArrayOrTypedArray(k)&&(k=k[0],Foe[k]||(k=0));var E=o.pattern,S=Aa.getPatternAttr,L=E&&(S(E.shape,e.i,"")||S(E.path,e.i,""));if(k&&k!=="none"){var x=e.mgc;x?h=!0:x=p.color;var C=r.uid;h&&(C+="-"+e.i),Aa.gradient(t,i,C,k,[[0,x],[1,d]],"fill")}else if(L){var M=!1,g=E.fgcolor;!g&&a&&a.color&&(g=a.color,M=!0);var P=S(g,e.i,a&&a.color||null),T=S(E.bgcolor,e.i,null),z=E.fgopacity,O=S(E.size,e.i,8),V=S(E.solidity,e.i,.3);M=M||e.mcc||Du.isArrayOrTypedArray(E.shape)||Du.isArrayOrTypedArray(E.path)||Du.isArrayOrTypedArray(E.bgcolor)||Du.isArrayOrTypedArray(E.fgcolor)||Du.isArrayOrTypedArray(E.size)||Du.isArrayOrTypedArray(E.solidity);var G=r.uid;M&&(G+="-"+e.i),Aa.pattern(t,"point",i,G,L,O,V,e.mcc,E.fillmode,T,P,z)}else Du.isArrayOrTypedArray(d)?Kd.fill(t,d[e.i]):Kd.fill(t,d);_&&Kd.stroke(t,v)}};Aa.makePointStyleFns=function(e){var t={},r=e.marker;return t.markerScale=Aa.tryColorscale(r,""),t.lineScale=Aa.tryColorscale(r,"line"),sL.traceIs(e,"symbols")&&(t.ms2mrc=Uot.isBubble(e)?Vot(e):function(){return(r.size||6)/2}),e.selectedpoints&&Du.extendFlat(t,Aa.makeSelectedPointStyleFns(e)),t};Aa.makeSelectedPointStyleFns=function(e){var t={},r=e.selected||{},n=e.unselected||{},i=e.marker||{},a=r.marker||{},o=n.marker||{},s=i.opacity,l=a.opacity,u=o.opacity,c=l!==void 0,f=u!==void 0;(Du.isArrayOrTypedArray(s)||c||f)&&(t.selectedOpacityFn=function(S){var L=S.mo===void 0?i.opacity:S.mo;return S.selected?c?l:L:f?u:Ioe*L});var h=i.color,d=a.color,v=o.color;(d||v)&&(t.selectedColorFn=function(S){var L=S.mcc||h;return S.selected?d||L:v||L});var _=i.size,b=a.size,p=o.size,k=b!==void 0,E=p!==void 0;return sL.traceIs(e,"symbols")&&(k||E)&&(t.selectedSizeFn=function(S){var L=S.mrc||_/2;return S.selected?k?b/2:L:E?p/2:L}),t};Aa.makeSelectedTextStyleFns=function(e){var t={},r=e.selected||{},n=e.unselected||{},i=e.textfont||{},a=r.textfont||{},o=n.textfont||{},s=i.color,l=a.color,u=o.color;return t.selectedTextColorFn=function(c){var f=c.tc||s;return c.selected?l||f:u||(l?f:Kd.addOpacity(f,Ioe))},t};Aa.selectedPointStyle=function(e,t){if(!(!e.size()||!t.selectedpoints)){var r=Aa.makeSelectedPointStyleFns(t),n=t.marker||{},i=[];r.selectedOpacityFn&&i.push(function(a,o){a.style("opacity",r.selectedOpacityFn(o))}),r.selectedColorFn&&i.push(function(a,o){Kd.fill(a,r.selectedColorFn(o))}),r.selectedSizeFn&&i.push(function(a,o){var s=o.mx||n.symbol||0,l=r.selectedSizeFn(o);a.attr("d",Doe(Aa.symbolNumber(s),l,Xq(o,t),Wq(o,t))),o.mrc2=l}),i.length&&e.each(function(a){for(var o=dd.select(this),s=0;s<i.length;s++)i[s](o,a)})}};Aa.tryColorscale=function(e,t){var r=t?Du.nestedProperty(e,t).get():e;if(r){var n=r.color;if((r.colorscale||r._colorAx)&&Du.isArrayOrTypedArray(n))return Oot.makeColorScaleFuncFromTrace(r)}return Du.identity};var Nq={start:1,end:-1,middle:0,bottom:1,top:-1};function Ooe(e,t,r,n,i){var a=dd.select(e.node().parentNode),o=t.indexOf("top")!==-1?"top":t.indexOf("bottom")!==-1?"bottom":"middle",s=t.indexOf("left")!==-1?"end":t.indexOf("right")!==-1?"start":"middle",l=n?n/.8+1:0,u=(lL.lineCount(e)-1)*Not+1,c=Nq[s]*l,f=r*.75+Nq[o]*l+(Nq[o]-1)*u*r/2;e.attr("text-anchor",s),i||a.attr("transform",rM(c,f))}function qoe(e,t){var r=e.ts||t.textfont.size;return Sb(r)&&r>0?r:0}Aa.textPointStyle=function(e,t,r){if(e.size()){var n;if(t.selectedpoints){var i=Aa.makeSelectedTextStyleFns(t);n=i.selectedTextColorFn}var a=t.texttemplate,o=r._fullLayout;e.each(function(s){var l=dd.select(this),u=a?Du.extractOption(s,t,"txt","texttemplate"):Du.extractOption(s,t,"tx","text");if(!u&&u!==0){l.remove();return}if(a){var c=t._module.formatLabels,f=c?c(s,t,o):{},h={};Got(h,t,s.i),u=Du.texttemplateString({data:[h,s,t._meta],fallback:t.texttemplatefallback,labels:f,locale:o._d3locale,template:u})}var d=s.tp||t.textposition,v=qoe(s,t),_=n?n(s):s.tc||t.textfont.color;l.call(Aa.font,{family:s.tf||t.textfont.family,weight:s.tw||t.textfont.weight,style:s.ty||t.textfont.style,variant:s.tv||t.textfont.variant,textcase:s.tC||t.textfont.textcase,lineposition:s.tE||t.textfont.lineposition,shadow:s.tS||t.textfont.shadow,size:v,color:_}).text(u).call(lL.convertToTspans,r).call(Ooe,d,v,s.mrc)})}};Aa.selectedTextStyle=function(e,t){if(!(!e.size()||!t.selectedpoints)){var r=Aa.makeSelectedTextStyleFns(t);e.each(function(n){var i=dd.select(this),a=r.selectedTextColorFn(n),o=n.tp||t.textposition,s=qoe(n,t);Kd.fill(i,a);var l=sL.traceIs(t,"bar-like");Ooe(i,o,s,n.mrc2||n.mrc,l)})}};var Soe=.5;Aa.smoothopen=function(e,t){if(e.length<3)return"M"+e.join("L");var r="M"+e[0],n=[],i;for(i=1;i<e.length-1;i++)n.push(oL(e[i-1],e[i],e[i+1],t));for(r+="Q"+n[0][0]+" "+e[1],i=2;i<e.length-1;i++)r+="C"+n[i-2][1]+" "+n[i-1][0]+" "+e[i];return r+="Q"+n[e.length-3][1]+" "+e[e.length-1],r};Aa.smoothclosed=function(e,t){if(e.length<3)return"M"+e.join("L")+"Z";var r="M"+e[0],n=e.length-1,i=[oL(e[n],e[0],e[1],t)],a;for(a=1;a<n;a++)i.push(oL(e[a-1],e[a],e[a+1],t));for(i.push(oL(e[n-1],e[n],e[0],t)),a=1;a<=n;a++)r+="C"+i[a-1][1]+" "+i[a][0]+" "+e[a];return r+="C"+i[n][1]+" "+i[0][0]+" "+e[0]+"Z",r};var Boe,Noe;function C3(e,t,r){return r&&(e=Uoe(e)),t?Ab(e[1]):Tb(e[0])}function Tb(e){var t=dd.round(e,2);return Boe=t,t}function Ab(e){var t=dd.round(e,2);return Noe=t,t}function oL(e,t,r,n){var i=e[0]-t[0],a=e[1]-t[1],o=r[0]-t[0],s=r[1]-t[1],l=Math.pow(i*i+a*a,Soe/2),u=Math.pow(o*o+s*s,Soe/2),c=(u*u*i-l*l*o)*n,f=(u*u*a-l*l*s)*n,h=3*u*(l+u),d=3*l*(l+u);return[[Tb(t[0]+(h&&c/h)),Ab(t[1]+(h&&f/h))],[Tb(t[0]-(d&&c/d)),Ab(t[1]-(d&&f/d))]]}var Yot={hv:function(e,t,r){return"H"+Tb(t[0])+"V"+C3(t,1,r)},vh:function(e,t,r){return"V"+Ab(t[1])+"H"+C3(t,0,r)},hvh:function(e,t,r){return"H"+Tb((e[0]+t[0])/2)+"V"+Ab(t[1])+"H"+C3(t,0,r)},vhv:function(e,t,r){return"V"+Ab((e[1]+t[1])/2)+"H"+Tb(t[0])+"V"+C3(t,1,r)}},Kot=function(e,t,r){return"L"+C3(t,0,r)+","+C3(t,1,r)};Aa.steps=function(e){var t=Yot[e]||Kot;return function(r){for(var n="M"+Tb(r[0][0])+","+Ab(r[0][1]),i=r.length,a=1;a<i;a++)n+=t(r[a-1],r[a],a===i-1);return n}};function Uoe(e,t){var r=e.backoff,n=e.trace,i=e.d,a=e.i;if(r&&n&&n.marker&&n.marker.angle%360===0&&n.line&&n.line.shape!=="spline"){var o=Du.isArrayOrTypedArray(r),s=e,l=t?t[0]:Boe||0,u=t?t[1]:Noe||0,c=s[0],f=s[1],h=c-l,d=f-u,v=Math.atan2(d,h),_=o?r[a]:r;if(_==="auto"){var b=s.i;n.type==="scatter"&&b--;var p=s.marker,k=p.symbol;Du.isArrayOrTypedArray(k)&&(k=k[b]);var E=p.size;Du.isArrayOrTypedArray(E)&&(E=E[b]),_=p?Aa.symbolBackOffs[Aa.symbolNumber(k)]*E:0,_+=Aa.getMarkerStandoff(i[b],n)||0}var S=c-_*Math.cos(v),L=f-_*Math.sin(v);(S<=c&&S>=l||S>=c&&S<=l)&&(L<=f&&L>=u||L>=f&&L<=u)&&(e=[S,L])}return e}Aa.applyBackoff=Uoe;Aa.makeTester=function(){var e=Du.ensureSingleById(dd.select("body"),"svg","js-plotly-tester",function(r){r.attr(qot.svgAttrs).style({position:"absolute",left:"-10000px",top:"-10000px",width:"9000px",height:"9000px","z-index":"1"})}),t=Du.ensureSingle(e,"path","js-reference-point",function(r){r.attr("d","M0,0H1V1H0Z").style({"stroke-width":0,fill:"black"})});Aa.tester=e,Aa.testref=t};Aa.savedBBoxes={};var Uq=0,Jot=1e4;Aa.bBox=function(e,t,r){r||(r=Moe(e));var n;if(r){if(n=Aa.savedBBoxes[r],n)return Du.extendFlat({},n)}else if(e.childNodes.length===1){var i=e.childNodes[0];if(r=Moe(i),r){var a=+i.getAttribute("x")||0,o=+i.getAttribute("y")||0,s=i.getAttribute("transform");if(!s){var l=Aa.bBox(i,!1,r);return a&&(l.left+=a,l.right+=a),o&&(l.top+=o,l.bottom+=o),l}if(r+="~"+a+"~"+o+"~"+s,n=Aa.savedBBoxes[r],n)return Du.extendFlat({},n)}}var u,c;t?u=e:(c=Aa.tester.node(),u=e.cloneNode(!0),c.appendChild(u)),dd.select(u).attr("transform",null).call(lL.positionText,0,0);var f=u.getBoundingClientRect(),h=Aa.testref.node().getBoundingClientRect();t||c.removeChild(u);var d={height:f.height,width:f.width,left:f.left-h.left,top:f.top-h.top,right:f.right-h.left,bottom:f.bottom-h.top};return Uq>=Jot&&(Aa.savedBBoxes={},Uq=0),r&&(Aa.savedBBoxes[r]=d),Uq++,Du.extendFlat({},d)};function Moe(e){var t=e.getAttribute("data-unformatted");if(t!==null)return t+e.getAttribute("data-math")+e.getAttribute("text-anchor")+e.getAttribute("style")}Aa.setClipUrl=function(e,t,r){e.attr("clip-path",jq(t,r))};function jq(e,t){if(!e)return null;var r=t._context,n=r._exportedPlot?"":r._baseUrl||"";return n?"url('"+n+"#"+e+"')":"url(#"+e+")"}Aa.getTranslate=function(e){var t=/.*\btranslate\((-?\d*\.?\d*)[^-\d]*(-?\d*\.?\d*)[^\d].*/,r=e.attr?"attr":"getAttribute",n=e[r]("transform")||"",i=n.replace(t,function(a,o,s){return[o,s].join(" ")}).split(" ");return{x:+i[0]||0,y:+i[1]||0}};Aa.setTranslate=function(e,t,r){var n=/(\btranslate\(.*?\);?)/,i=e.attr?"attr":"getAttribute",a=e.attr?"attr":"setAttribute",o=e[i]("transform")||"";return t=t||0,r=r||0,o=o.replace(n,"").trim(),o+=rM(t,r),o=o.trim(),e[a]("transform",o),o};Aa.getScale=function(e){var t=/.*\bscale\((\d*\.?\d*)[^\d]*(\d*\.?\d*)[^\d].*/,r=e.attr?"attr":"getAttribute",n=e[r]("transform")||"",i=n.replace(t,function(a,o,s){return[o,s].join(" ")}).split(" ");return{x:+i[0]||1,y:+i[1]||1}};Aa.setScale=function(e,t,r){var n=/(\bscale\(.*?\);?)/,i=e.attr?"attr":"getAttribute",a=e.attr?"attr":"setAttribute",o=e[i]("transform")||"";return t=t||1,r=r||1,o=o.replace(n,"").trim(),o+="scale("+t+","+r+")",o=o.trim(),e[a]("transform",o),o};var $ot=/\s*sc.*/;Aa.setPointGroupScale=function(e,t,r){if(t=t||1,r=r||1,!!e){var n=t===1&&r===1?"":"scale("+t+","+r+")";e.each(function(){var i=(this.getAttribute("transform")||"").replace($ot,"");i+=n,i=i.trim(),this.setAttribute("transform",i)})}};var Qot=/translate\([^)]*\)\s*$/;Aa.setTextPointsScale=function(e,t,r){e&&e.each(function(){var n,i=dd.select(this),a=i.select("text");if(a.node()){var o=parseFloat(a.attr("x")||0),s=parseFloat(a.attr("y")||0),l=(i.attr("transform")||"").match(Qot);t===1&&r===1?n=[]:n=[rM(o,s),"scale("+t+","+r+")",rM(-o,-s)],l&&n.push(l),i.attr("transform",n.join(""))}})};function Wq(e,t){var r;return e&&(r=e.mf),r===void 0&&(r=t.marker&&t.marker.standoff||0),!t._geo&&!t._xA?-r:r}Aa.getMarkerStandoff=Wq;var tM=Math.atan2,wb=Math.cos,L3=Math.sin;function Eoe(e,t){var r=t[0],n=t[1];return[r*wb(e)-n*L3(e),r*L3(e)+n*wb(e)]}var koe,Coe,Loe,Poe,Vq,Gq;function Xq(e,t){var r=e.ma;r===void 0&&(r=t.marker.angle,(!r||Du.isArrayOrTypedArray(r))&&(r=0));var n,i,a=t.marker.angleref;if(a==="previous"||a==="north"){if(t._geo){var o=t._geo.project(e.lonlat);n=o[0],i=o[1]}else{var s=t._xA,l=t._yA;if(s&&l)n=s.c2p(e.x),i=l.c2p(e.y);else return 90}if(t._geo){var u=e.lonlat[0],c=e.lonlat[1],f=t._geo.project([u,c+1e-5]),h=t._geo.project([u+1e-5,c]),d=tM(h[1]-i,h[0]-n),v=tM(f[1]-i,f[0]-n),_;if(a==="north")_=r/180*Math.PI;else if(a==="previous"){var b=u/180*Math.PI,p=c/180*Math.PI,k=koe/180*Math.PI,E=Coe/180*Math.PI,S=k-b,L=wb(E)*L3(S),x=L3(E)*wb(p)-wb(E)*L3(p)*wb(S);_=-tM(L,x)-Math.PI,koe=u,Coe=c}var C=Eoe(d,[wb(_),0]),M=Eoe(v,[L3(_),0]);r=tM(C[1]+M[1],C[0]+M[0])/Math.PI*180,a==="previous"&&!(Gq===t.uid&&e.i===Vq+1)&&(r=null)}if(a==="previous"&&!t._geo)if(Gq===t.uid&&e.i===Vq+1&&Sb(n)&&Sb(i)){var g=n-Loe,P=i-Poe,T=t.line&&t.line.shape||"",z=T.slice(T.length-1);z==="h"&&(P=0),z==="v"&&(g=0),r+=tM(P,g)/Math.PI*180+90}else r=null}return Loe=n,Poe=i,Vq=e.i,Gq=t.uid,r}Aa.getMarkerAngle=Xq});var Eb=ye((Vnr,Woe)=>{"use strict";var P3=Oa(),est=Eo(),tst=Mc(),Zq=qa(),Mb=Dr(),Goe=Mb.strTranslate,uL=So(),cL=ka(),I3=ru(),Hoe=N1(),rst=$h().OPPOSITE_SIDE,joe=/ [XY][0-9]* /,Yq=1.6,Kq=1.6;function ist(e,t,r){var n=e._fullLayout,i=r.propContainer,a=r.propName,o=r.placeholder,s=r.traceIndex,l=r.avoid||{},u=r.attributes,c=r.transform,f=r.containerGroup,h=1,d=i.title,v=(d&&d.text?d.text:"").trim(),_=!1,b=d&&d.font?d.font:{},p=b.family,k=b.size,E=b.color,S=b.weight,L=b.style,x=b.variant,C=b.textcase,M=b.lineposition,g=b.shadow,P=r.subtitlePropName,T=!!P,z=r.subtitlePlaceholder,O=(i.title||{}).subtitle||{text:"",font:{}},V=(O.text||"").trim(),G=!1,Z=1,j=O.font,N=j.family,H=j.size,te=j.color,oe=j.weight,_e=j.style,Ee=j.variant,Ce=j.textcase,me=j.lineposition,ie=j.shadow,Se;a==="title.text"?Se="titleText":a.indexOf("axis")!==-1?Se="axisTitleText":a.indexOf("colorbar")!==-1&&(Se="colorbarTitleText");var Le=e._context.edits[Se];function Ae(Nt,Jt){return Nt===void 0||Jt===void 0?!1:Nt.replace(joe," % ")===Jt.replace(joe," % ")}v===""?h=0:Ae(v,o)&&(Le||(v=""),h=.2,_=!0),T&&(V===""?Z=0:Ae(V,z)&&(Le||(V=""),Z=.2,G=!0)),r._meta?v=Mb.templateString(v,r._meta):n._meta&&(v=Mb.templateString(v,n._meta));var Fe=v||V||Le,Pe;f||(f=Mb.ensureSingle(n._infolayer,"g","g-"+t),Pe=n._hColorbarMoveTitle);var ge=f.selectAll("text."+t).data(Fe?[0]:[]);ge.enter().append("text"),ge.text(v).attr("class",t),ge.exit().remove();var Re=null,ce=t+"-subtitle",Ze=V||Le;if(T&&(Re=f.selectAll("text."+ce).data(Ze?[0]:[]),Re.enter().append("text"),Re.text(V).attr("class",ce),Re.exit().remove()),!Fe)return f;function ut(Nt,Jt){Mb.syncOrAsync([pt,Zt],{title:Nt,subtitle:Jt})}function pt(Nt){var Jt=Nt.title,sr=Nt.subtitle,wr;!c&&Pe&&(c={}),c?(wr="",c.rotate&&(wr+="rotate("+[c.rotate,u.x,u.y]+")"),(c.offset||Pe)&&(wr+=Goe(0,(c.offset||0)-(Pe||0)))):wr=null,Jt.attr("transform",wr);function cr(It){if(It){var mt=P3.select(It.node().parentNode).select("."+ce);if(!mt.empty()){var er=It.node().getBBox();if(er.height){var lr=er.y+er.height+Yq*H;mt.attr("y",lr)}}}}if(Jt.style("opacity",h*cL.opacity(E)).call(uL.font,{color:cL.rgb(E),size:P3.round(k,2),family:p,weight:S,style:L,variant:x,textcase:C,shadow:g,lineposition:M}).attr(u).call(I3.convertToTspans,e,cr),sr&&!sr.empty()){var $e=f.select("."+t+"-math-group"),St=Jt.node().getBBox(),Qt=$e.node()?$e.node().getBBox():void 0,Vt=Qt?Qt.y+Qt.height+Yq*H:St.y+St.height+Kq*H,_t=Mb.extendFlat({},u,{y:Vt});sr.attr("transform",wr),sr.style("opacity",Z*cL.opacity(te)).call(uL.font,{color:cL.rgb(te),size:P3.round(H,2),family:N,weight:oe,style:_e,variant:Ee,textcase:Ce,shadow:ie,lineposition:me}).attr(_t).call(I3.convertToTspans,e)}return tst.previousPromises(e)}function Zt(Nt){var Jt=Nt.title,sr=P3.select(Jt.node().parentNode);if(l&&l.selection&&l.side&&v){sr.attr("transform",null);var wr=rst[l.side],cr=l.side==="left"||l.side==="top"?-1:1,$e=est(l.pad)?l.pad:2,St=uL.bBox(sr.node()),Qt={t:0,b:0,l:0,r:0},Vt=e._fullLayout._reservedMargin;for(var _t in Vt)for(var It in Vt[_t]){var mt=Vt[_t][It];Qt[It]=Math.max(Qt[It],mt)}var er={left:Qt.l,top:Qt.t,right:n.width-Qt.r,bottom:n.height-Qt.b},lr=l.maxShift||cr*(er[l.side]-St[l.side]),Tr=0;if(lr<0)Tr=lr;else{var Lr=l.offsetLeft||0,ti=l.offsetTop||0;St.left-=Lr,St.right-=Lr,St.top-=ti,St.bottom-=ti,l.selection.each(function(){var Vr=uL.bBox(this);Mb.bBoxIntersect(St,Vr,$e)&&(Tr=Math.max(Tr,cr*(Vr[l.side]-St[wr])+$e))}),Tr=Math.min(lr,Tr),i._titleScoot=Math.abs(Tr)}if(Tr>0||lr<0){var Br={left:[-Tr,0],right:[Tr,0],top:[0,-Tr],bottom:[0,Tr]}[l.side];sr.attr("transform",Goe(Br[0],Br[1]))}}}ge.call(ut,Re);function st(Nt,Jt){Nt.text(Jt).on("mouseover.opacity",function(){P3.select(this).transition().duration(Hoe.SHOW_PLACEHOLDER).style("opacity",1)}).on("mouseout.opacity",function(){P3.select(this).transition().duration(Hoe.HIDE_PLACEHOLDER).style("opacity",0)})}if(Le&&(v?ge.on(".opacity",null):(st(ge,o),_=!0),ge.call(I3.makeEditable,{gd:e}).on("edit",function(Nt){s!==void 0?Zq.call("_guiRestyle",e,a,Nt,s):Zq.call("_guiRelayout",e,a,Nt)}).on("cancel",function(){this.text(this.attr("data-unformatted")).call(ut)}).on("input",function(Nt){this.text(Nt||" ").call(I3.positionText,u.x,u.y)}),T)){if(T&&!v){var lt=ge.node().getBBox(),Gt=lt.y+lt.height+Kq*H;Re.attr("y",Gt)}V?Re.on(".opacity",null):(st(Re,z),G=!0),Re.call(I3.makeEditable,{gd:e}).on("edit",function(Nt){Zq.call("_guiRelayout",e,"title.subtitle.text",Nt)}).on("cancel",function(){this.text(this.attr("data-unformatted")).call(ut)}).on("input",function(Nt){this.text(Nt||" ").call(I3.positionText,Re.attr("x"),Re.attr("y"))})}return ge.classed("js-placeholder",_),Re&&!Re.empty()&&Re.classed("js-placeholder",G),f}Woe.exports={draw:ist,SUBTITLE_PADDING_EM:Kq,SUBTITLE_PADDING_MATHJAX_EM:Yq}});var ym=ye((Gnr,Joe)=>{"use strict";var nst=Oa(),ast=r3().utcFormat,yc=Dr(),ost=yc.numberFormat,gm=Eo(),n_=yc.cleanNumber,sst=yc.ms2DateTime,Xoe=yc.dateTime2ms,mm=yc.ensureNumber,Zoe=yc.isArrayOrTypedArray,a_=fs(),fL=a_.FP_SAFE,Tg=a_.BADNUM,lst=a_.LOG_CLIP,ust=a_.ONEWEEK,hL=a_.ONEDAY,dL=a_.ONEHOUR,Yoe=a_.ONEMIN,Koe=a_.ONESEC,vL=hf(),mL=hd(),pL=mL.HOUR_PATTERN,gL=mL.WEEKDAY_PATTERN;function iM(e){return Math.pow(10,e)}function Jq(e){return e!=null}Joe.exports=function(t,r){r=r||{};var n=t._id||"x",i=n.charAt(0);function a(S,L){if(S>0)return Math.log(S)/Math.LN10;if(S<=0&&L&&t.range&&t.range.length===2){var x=t.range[0],C=t.range[1];return .5*(x+C-2*lst*Math.abs(x-C))}else return Tg}function o(S,L,x,C){if((C||{}).msUTC&&gm(S))return+S;var M=Xoe(S,x||t.calendar);if(M===Tg)if(gm(S)){S=+S;var g=Math.floor(yc.mod(S+.05,1)*10),P=Math.round(S-g/10);M=Xoe(new Date(P))+g/10}else return Tg;return M}function s(S,L,x){return sst(S,L,x||t.calendar)}function l(S){return t._categories[Math.round(S)]}function u(S){if(Jq(S)){if(t._categoriesMap===void 0&&(t._categoriesMap={}),t._categoriesMap[S]!==void 0)return t._categoriesMap[S];t._categories.push(typeof S=="number"?String(S):S);var L=t._categories.length-1;return t._categoriesMap[S]=L,L}return Tg}function c(S,L){for(var x=new Array(L),C=0;C<L;C++){var M=(S[0]||[])[C],g=(S[1]||[])[C];x[C]=f([M,g])}return x}function f(S){if(t._categoriesMap)return t._categoriesMap[S]}function h(S){var L=f(S);if(L!==void 0)return L;if(gm(S))return+S}function d(S){return gm(S)?+S:f(S)}function v(S,L,x){return nst.round(x+L*S,2)}function _(S,L,x){return(S-x)/L}var b=function(L){return gm(L)?v(L,t._m,t._b):Tg},p=function(S){return _(S,t._m,t._b)};if(t.rangebreaks){var k=i==="y";b=function(S){if(!gm(S))return Tg;var L=t._rangebreaks.length;if(!L)return v(S,t._m,t._b);var x=k;t.range[0]>t.range[1]&&(x=!x);for(var C=x?-1:1,M=C*S,g=0,P=0;P<L;P++){var T=C*t._rangebreaks[P].min,z=C*t._rangebreaks[P].max;if(M<T)break;if(M>z)g=P+1;else{g=M<(T+z)/2?P:P+1;break}}var O=t._B[g]||0;return isFinite(O)?v(S,t._m2,O):0},p=function(S){var L=t._rangebreaks.length;if(!L)return _(S,t._m,t._b);for(var x=0,C=0;C<L&&!(S<t._rangebreaks[C].pmin);C++)S>t._rangebreaks[C].pmax&&(x=C+1);return _(S,t._m2,t._B[x])}}t.c2l=t.type==="log"?a:mm,t.l2c=t.type==="log"?iM:mm,t.l2p=b,t.p2l=p,t.c2p=t.type==="log"?function(S,L){return b(a(S,L))}:b,t.p2c=t.type==="log"?function(S){return iM(p(S))}:p,["linear","-"].indexOf(t.type)!==-1?(t.d2r=t.r2d=t.d2c=t.r2c=t.d2l=t.r2l=n_,t.c2d=t.c2r=t.l2d=t.l2r=mm,t.d2p=t.r2p=function(S){return t.l2p(n_(S))},t.p2d=t.p2r=p,t.cleanPos=mm):t.type==="log"?(t.d2r=t.d2l=function(S,L){return a(n_(S),L)},t.r2d=t.r2c=function(S){return iM(n_(S))},t.d2c=t.r2l=n_,t.c2d=t.l2r=mm,t.c2r=a,t.l2d=iM,t.d2p=function(S,L){return t.l2p(t.d2r(S,L))},t.p2d=function(S){return iM(p(S))},t.r2p=function(S){return t.l2p(n_(S))},t.p2r=p,t.cleanPos=mm):t.type==="date"?(t.d2r=t.r2d=yc.identity,t.d2c=t.r2c=t.d2l=t.r2l=o,t.c2d=t.c2r=t.l2d=t.l2r=s,t.d2p=t.r2p=function(S,L,x){return t.l2p(o(S,0,x))},t.p2d=t.p2r=function(S,L,x){return s(p(S),L,x)},t.cleanPos=function(S){return yc.cleanDate(S,Tg,t.calendar)}):t.type==="category"?(t.d2c=t.d2l=u,t.r2d=t.c2d=t.l2d=l,t.d2r=t.d2l_noadd=h,t.r2c=function(S){var L=d(S);return L!==void 0?L:t.fraction2r(.5)},t.l2r=t.c2r=mm,t.r2l=d,t.d2p=function(S){return t.l2p(t.r2c(S))},t.p2d=function(S){return l(p(S))},t.r2p=t.d2p,t.p2r=p,t.cleanPos=function(S){return typeof S=="string"&&S!==""?S:mm(S)}):t.type==="multicategory"&&(t.r2d=t.c2d=t.l2d=l,t.d2r=t.d2l_noadd=h,t.r2c=function(S){var L=h(S);return L!==void 0?L:t.fraction2r(.5)},t.r2c_just_indices=f,t.l2r=t.c2r=mm,t.r2l=h,t.d2p=function(S){return t.l2p(t.r2c(S))},t.p2d=function(S){return l(p(S))},t.r2p=t.d2p,t.p2r=p,t.cleanPos=function(S){return Array.isArray(S)||typeof S=="string"&&S!==""?S:mm(S)},t.setupMultiCategory=function(S){var L=t._traceIndices,x,C,M=t._matchGroup;if(M&&t._categories.length===0){for(var g in M)if(g!==n){var P=r[vL.id2name(g)];L=L.concat(P._traceIndices)}}var T=[[0,{}],[0,{}]],z=[];for(x=0;x<L.length;x++){var O=S[L[x]];if(i in O){var V=O[i],G=O._length||yc.minRowLength(V);if(Zoe(V[0])&&Zoe(V[1]))for(C=0;C<G;C++){var Z=V[0][C],j=V[1][C];Jq(Z)&&Jq(j)&&(z.push([Z,j]),Z in T[0][1]||(T[0][1][Z]=T[0][0]++),j in T[1][1]||(T[1][1][j]=T[1][0]++))}}}for(z.sort(function(N,H){var te=T[0][1],oe=te[N[0]]-te[H[0]];if(oe)return oe;var _e=T[1][1];return _e[N[1]]-_e[H[1]]}),x=0;x<z.length;x++)u(z[x])}),t.fraction2r=function(S){var L=t.r2l(t.range[0]),x=t.r2l(t.range[1]);return t.l2r(L+S*(x-L))},t.r2fraction=function(S){var L=t.r2l(t.range[0]),x=t.r2l(t.range[1]);return(t.r2l(S)-L)/(x-L)},t.limitRange=function(S){var L=t.minallowed,x=t.maxallowed;if(!(L===void 0&&x===void 0)){S||(S="range");var C=yc.nestedProperty(t,S).get(),M=yc.simpleMap(C,t.r2l),g=M[1]<M[0];g&&M.reverse();var P=yc.simpleMap([L,x],t.r2l);if(L!==void 0&&M[0]<P[0]&&(C[g?1:0]=L),x!==void 0&&M[1]>P[1]&&(C[g?0:1]=x),C[0]===C[1]){var T=t.l2r(L),z=t.l2r(x);if(L!==void 0){var O=T+1;x!==void 0&&(O=Math.min(O,z)),C[g?1:0]=O}if(x!==void 0){var V=z+1;L!==void 0&&(V=Math.max(V,T)),C[g?0:1]=V}}}},t.cleanRange=function(S,L){t._cleanRange(S,L),t.limitRange(S)},t._cleanRange=function(S,L){L||(L={}),S||(S="range");var x=yc.nestedProperty(t,S).get(),C,M;if(t.type==="date"?M=yc.dfltRange(t.calendar):i==="y"?M=mL.DFLTRANGEY:t._name==="realaxis"?M=[0,1]:M=L.dfltRange||mL.DFLTRANGEX,M=M.slice(),(t.rangemode==="tozero"||t.rangemode==="nonnegative")&&(M[0]=0),!x||x.length!==2){yc.nestedProperty(t,S).set(M);return}var g=x[0]===null,P=x[1]===null;for(t.type==="date"&&!t.autorange&&(x[0]=yc.cleanDate(x[0],Tg,t.calendar),x[1]=yc.cleanDate(x[1],Tg,t.calendar)),C=0;C<2;C++)if(t.type==="date"){if(!yc.isDateTime(x[C],t.calendar)){t[S]=M;break}if(t.r2l(x[0])===t.r2l(x[1])){var T=yc.constrain(t.r2l(x[0]),yc.MIN_MS+1e3,yc.MAX_MS-1e3);x[0]=t.l2r(T-1e3),x[1]=t.l2r(T+1e3);break}}else{if(!gm(x[C]))if(!(g||P)&&gm(x[1-C]))x[C]=x[1-C]*(C?10:.1);else{t[S]=M;break}if(x[C]<-fL?x[C]=-fL:x[C]>fL&&(x[C]=fL),x[0]===x[1]){var z=Math.max(1,Math.abs(x[0]*1e-6));x[0]-=z,x[1]+=z}}},t.setScale=function(S){var L=r._size;if(t.overlaying){var x=vL.getFromId({_fullLayout:r},t.overlaying);t.domain=x.domain}var C=S&&t._r?"_r":"range",M=t.calendar;t.cleanRange(C);var g=t.r2l(t[C][0],M),P=t.r2l(t[C][1],M),T=i==="y";if(T?(t._offset=L.t+(1-t.domain[1])*L.h,t._length=L.h*(t.domain[1]-t.domain[0]),t._m=t._length/(g-P),t._b=-t._m*P):(t._offset=L.l+t.domain[0]*L.w,t._length=L.w*(t.domain[1]-t.domain[0]),t._m=t._length/(P-g),t._b=-t._m*g),t._rangebreaks=[],t._lBreaks=0,t._m2=0,t._B=[],t.rangebreaks){var z,O;if(t._rangebreaks=t.locateBreaks(Math.min(g,P),Math.max(g,P)),t._rangebreaks.length){for(z=0;z<t._rangebreaks.length;z++)O=t._rangebreaks[z],t._lBreaks+=Math.abs(O.max-O.min);var V=T;g>P&&(V=!V),V&&t._rangebreaks.reverse();var G=V?-1:1;for(t._m2=G*t._length/(Math.abs(P-g)-t._lBreaks),t._B.push(-t._m2*(T?P:g)),z=0;z<t._rangebreaks.length;z++)O=t._rangebreaks[z],t._B.push(t._B[t._B.length-1]-G*t._m2*(O.max-O.min));for(z=0;z<t._rangebreaks.length;z++)O=t._rangebreaks[z],O.pmin=b(O.min),O.pmax=b(O.max)}}if(!isFinite(t._m)||!isFinite(t._b)||t._length<0)throw r._replotting=!1,new Error("Something went wrong with axis scaling")},t.maskBreaks=function(S){var L=t.rangebreaks||[],x,C,M,g,P;L._cachedPatterns||(L._cachedPatterns=L.map(function(te){return te.enabled&&te.bounds?yc.simpleMap(te.bounds,te.pattern?n_:t.d2c):null})),L._cachedValues||(L._cachedValues=L.map(function(te){return te.enabled&&te.values?yc.simpleMap(te.values,t.d2c).sort(yc.sorterAsc):null}));for(var T=0;T<L.length;T++){var z=L[T];if(z.enabled){if(z.bounds){var O=z.pattern;switch(x=L._cachedPatterns[T],C=x[0],M=x[1],O){case gL:P=new Date(S),g=P.getUTCDay(),C>M&&(M+=7,g<C&&(g+=7));break;case pL:P=new Date(S);var V=P.getUTCHours(),G=P.getUTCMinutes(),Z=P.getUTCSeconds(),j=P.getUTCMilliseconds();g=V+(G/60+Z/3600+j/36e5),C>M&&(M+=24,g<C&&(g+=24));break;case"":g=S;break}if(g>=C&&g<M)return Tg}else for(var N=L._cachedValues[T],H=0;H<N.length;H++)if(C=N[H],M=C+z.dvalue,S>=C&&S<M)return Tg}}return S},t.locateBreaks=function(S,L){var x,C,M,g,P=[];if(!t.rangebreaks)return P;var T=t.rangebreaks.slice().sort(function(_e,Ee){return _e.pattern===gL&&Ee.pattern===pL?-1:Ee.pattern===gL&&_e.pattern===pL?1:0}),z=function(_e,Ee){if(_e=yc.constrain(_e,S,L),Ee=yc.constrain(Ee,S,L),_e!==Ee){for(var Ce=!0,me=0;me<P.length;me++){var ie=P[me];_e<ie.max&&Ee>=ie.min&&(_e<ie.min&&(ie.min=_e),Ee>ie.max&&(ie.max=Ee),Ce=!1)}Ce&&P.push({min:_e,max:Ee})}};for(x=0;x<T.length;x++){var O=T[x];if(O.enabled)if(O.bounds){var V=S,G=L;O.pattern&&(V=Math.floor(V)),C=yc.simpleMap(O.bounds,O.pattern?n_:t.r2l),M=C[0],g=C[1];var Z=new Date(V),j,N;switch(O.pattern){case gL:N=ust,j=((g<M?7:0)+(g-M))*hL,V+=M*hL-(Z.getUTCDay()*hL+Z.getUTCHours()*dL+Z.getUTCMinutes()*Yoe+Z.getUTCSeconds()*Koe+Z.getUTCMilliseconds());break;case pL:N=hL,j=((g<M?24:0)+(g-M))*dL,V+=M*dL-(Z.getUTCHours()*dL+Z.getUTCMinutes()*Yoe+Z.getUTCSeconds()*Koe+Z.getUTCMilliseconds());break;default:V=Math.min(C[0],C[1]),G=Math.max(C[0],C[1]),N=G-V,j=N}for(var H=V;H<G;H+=N)z(H,H+j)}else for(var te=yc.simpleMap(O.values,t.d2c),oe=0;oe<te.length;oe++)M=te[oe],g=M+O.dvalue,z(M,g)}return P.sort(function(_e,Ee){return _e.min-Ee.min}),P},t.makeCalcdata=function(S,L,x){var C,M,g,P,T=t.type,z=T==="date"&&S[L+"calendar"];if(L in S){if(C=S[L],P=S._length||yc.minRowLength(C),yc.isTypedArray(C)&&(T==="linear"||T==="log")){if(P===C.length)return C;if(C.subarray)return C.subarray(0,P)}if(T==="multicategory")return c(C,P);for(M=new Array(P),g=0;g<P;g++)M[g]=t.d2c(C[g],0,z,x)}else{var O=L+"0"in S?t.d2c(S[L+"0"],0,z):0,V=S["d"+L]?Number(S["d"+L]):1;for(C=S[{x:"y",y:"x"}[L]],P=S._length||C.length,M=new Array(P),g=0;g<P;g++)M[g]=O+g*V}if(t.rangebreaks)for(g=0;g<P;g++)M[g]=t.maskBreaks(M[g]);return M},t.isValidRange=function(S,L){return Array.isArray(S)&&S.length===2&&(L&&S[0]===null||gm(t.r2l(S[0])))&&(L&&S[1]===null||gm(t.r2l(S[1])))},t.getAutorangeDflt=function(S,L){var x=!t.isValidRange(S,"nullOk");return x&&L&&L.reverseDflt?x="reversed":S&&(S[0]===null&&S[1]===null?x=!0:S[0]===null&&S[1]!==null?x="min":S[0]!==null&&S[1]===null&&(x="max")),x},t.isReversed=function(){var S=t.autorange;return S==="reversed"||S==="min reversed"||S==="max reversed"},t.isPtWithinRange=function(S,L){var x=t.c2l(S[i],null,L),C=t.r2l(t.range[0]),M=t.r2l(t.range[1]);return C<M?C<=x&&x<=M:M<=x&&x<=C},t._emptyCategories=function(){t._categories=[],t._categoriesMap={}},t.clearCalc=function(){var S=t._matchGroup;if(S){var L=null,x=null;for(var C in S){var M=r[vL.id2name(C)];if(M._categories){L=M._categories,x=M._categoriesMap;break}}L&&x?(t._categories=L,t._categoriesMap=x):t._emptyCategories()}else t._emptyCategories();if(t._initialCategories)for(var g=0;g<t._initialCategories.length;g++)u(t._initialCategories[g])},t.sortByInitialCategories=function(){var S=[];if(t._emptyCategories(),t._initialCategories)for(var L=0;L<t._initialCategories.length;L++)u(t._initialCategories[L]);S=S.concat(t._traceIndices);var x=t._matchGroup;for(var C in x)if(n!==C){var M=r[vL.id2name(C)];M._categories=t._categories,M._categoriesMap=t._categoriesMap,S=S.concat(M._traceIndices)}return S};var E=r._d3locale;t.type==="date"&&(t._dateFormat=E?E.timeFormat:ast,t._extraFormat=r._extraFormat),t._separators=r.separators,t._numFormat=E?E.numberFormat:ost,delete t._minDtick,delete t._forceTick0}});var R3=ye((Hnr,tse)=>{"use strict";var $oe=Eo(),$q=Dr(),cst=fs().BADNUM,yL=$q.isArrayOrTypedArray,fst=$q.isDateTime,hst=$q.cleanNumber,Qoe=Math.round;tse.exports=function(t,r,n){var i=t,a=n.noMultiCategory;if(yL(i)&&!i.length)return"-";if(!a&&mst(i))return"multicategory";if(a&&Array.isArray(i[0])){for(var o=[],s=0;s<i.length;s++)if(yL(i[s]))for(var l=0;l<i[s].length;l++)o.push(i[s][l]);i=o}if(pst(i,r))return"date";var u=n.autotypenumbers!=="strict";return gst(i,u)?"category":vst(i,u)?"linear":"-"};function dst(e,t){return t?$oe(e):typeof e=="number"}function vst(e,t){for(var r=e.length,n=0;n<r;n++)if(dst(e[n],t))return!0;return!1}function pst(e,t){for(var r=e.length,n=ese(r),i=0,a=0,o={},s=0;s<r;s+=n){var l=Qoe(s),u=e[l],c=String(u);o[c]||(o[c]=1,fst(u,t)&&i++,$oe(u)&&a++)}return i>a*2}function ese(e){return Math.max(1,(e-1)/1e3)}function gst(e,t){for(var r=e.length,n=ese(r),i=0,a=0,o={},s=0;s<r;s+=n){var l=Qoe(s),u=e[l],c=String(u);if(!o[c]){o[c]=1;var f=typeof u;f==="boolean"?a++:(t?hst(u)!==cst:f==="number")?i++:f==="string"&&a++}}return a>i*2}function mst(e){return yL(e[0])&&yL(e[1])}});var Ag=ye((jnr,use)=>{"use strict";var yst=Oa(),ase=Eo(),o_=Dr(),_L=fs().FP_SAFE,_st=qa(),xst=So(),ose=hf(),bst=ose.getFromId,wst=ose.isLinked;use.exports={applyAutorangeOptions:lse,getAutoRange:Qq,makePadFn:eB,doAutoRange:Ast,findExtremes:Sst,concatExtremes:iB};function Qq(e,t){var r,n,i=[],a=e._fullLayout,o=eB(a,t,0),s=eB(a,t,1),l=iB(e,t),u=l.min,c=l.max;if(u.length===0||c.length===0)return o_.simpleMap(t.range,t.r2l);var f=u[0].val,h=c[0].val;for(r=1;r<u.length&&f===h;r++)f=Math.min(f,u[r].val);for(r=1;r<c.length&&f===h;r++)h=Math.max(h,c[r].val);var d=t.autorange,v=d==="reversed"||d==="min reversed"||d==="max reversed";if(!v&&t.range){var _=o_.simpleMap(t.range,t.r2l);v=_[1]<_[0]}t.autorange==="reversed"&&(t.autorange=!0);var b=t.rangemode,p=b==="tozero",k=b==="nonnegative",E=t._length,S=E/10,L=0,x,C,M,g,P,T;for(r=0;r<u.length;r++)for(x=u[r],n=0;n<c.length;n++)C=c[n],T=C.val-x.val-rse(t,x.val,C.val),T>0&&(P=E-o(x)-s(C),P>S?T/P>L&&(M=x,g=C,L=T/P):T/E>L&&(M={val:x.val,nopad:1},g={val:C.val,nopad:1},L=T/E));function z(j,N){return Math.max(j,s(N))}if(f===h){var O=f-1,V=f+1;if(p)if(f===0)i=[0,1];else{var G=(f>0?c:u).reduce(z,0),Z=f/(1-Math.min(.5,G/E));i=f>0?[0,Z]:[Z,0]}else k?i=[Math.max(0,O),Math.max(1,V)]:i=[O,V]}else p?(M.val>=0&&(M={val:0,nopad:1}),g.val<=0&&(g={val:0,nopad:1})):k&&(M.val-L*o(M)<0&&(M={val:0,nopad:1}),g.val<=0&&(g={val:1,nopad:1})),L=(g.val-M.val-rse(t,x.val,C.val))/(E-o(M)-s(g)),i=[M.val-L*o(M),g.val+L*s(g)];return i=lse(i,t),t.limitRange&&t.limitRange(),v&&i.reverse(),o_.simpleMap(i,t.l2r||Number)}function rse(e,t,r){var n=0;if(e.rangebreaks)for(var i=e.locateBreaks(t,r),a=0;a<i.length;a++){var o=i[a];n+=o.max-o.min}return n}function eB(e,t,r){var n=.05*t._length,i=t._anchorAxis||{};if((t.ticklabelposition||"").indexOf("inside")!==-1||(i.ticklabelposition||"").indexOf("inside")!==-1){var a=t.isReversed();if(!a){var o=o_.simpleMap(t.range,t.r2l);a=o[1]<o[0]}a&&(r=!r)}var s=0;return wst(e,t._id)||(s=Tst(e,t,r)),n=Math.max(s,n),t.constrain==="domain"&&t._inputDomain&&(n*=(t._inputDomain[1]-t._inputDomain[0])/(t.domain[1]-t.domain[0])),function(u){return u.nopad?0:u.pad+(u.extrapad?n:s)}}var ise=3;function Tst(e,t,r){var n=0,i=t._id.charAt(0)==="x";for(var a in e._plots){var o=e._plots[a];if(!(t._id!==o.xaxis._id&&t._id!==o.yaxis._id)){var s=(i?o.yaxis:o.xaxis)||{};if((s.ticklabelposition||"").indexOf("inside")!==-1&&(!r&&(s.side==="left"||s.side==="bottom")||r&&(s.side==="top"||s.side==="right"))){if(s._vals){var l=o_.deg2rad(s._tickAngles[s._id+"tick"]||0),u=Math.abs(Math.cos(l)),c=Math.abs(Math.sin(l));if(!s._vals[0].bb){var f=s._id+"tick",h=s._selections[f];h.each(function(k){var E=yst.select(this),S=E.select(".text-math-group");S.empty()&&(k.bb=xst.bBox(E.node()))})}for(var d=0;d<s._vals.length;d++){var v=s._vals[d],_=v.bb;if(_){var b=2*ise+_.width,p=2*ise+_.height;n=Math.max(n,i?Math.max(b*u,p*c):Math.max(p*u,b*c))}}}s.ticks==="inside"&&s.ticklabelposition==="inside"&&(n+=s.ticklen||0)}}}return n}function iB(e,t,r){var n=t._id,i=e._fullData,a=e._fullLayout,o=[],s=[],l,u,c;function f(b,p){for(l=0;l<p.length;l++){var k=b[p[l]],E=(k._extremes||{})[n];if(k.visible===!0&&E){for(u=0;u<E.min.length;u++)c=E.min[u],tB(o,c.val,c.pad,{extrapad:c.extrapad});for(u=0;u<E.max.length;u++)c=E.max[u],rB(s,c.val,c.pad,{extrapad:c.extrapad})}}}if(f(i,t._traceIndices),f(a.annotations||[],t._annIndices||[]),f(a.shapes||[],t._shapeIndices||[]),t._matchGroup&&!r){for(var h in t._matchGroup)if(h!==t._id){var d=bst(e,h),v=iB(e,d,!0),_=t._length/d._length;for(u=0;u<v.min.length;u++)c=v.min[u],tB(o,c.val,c.pad*_,{extrapad:c.extrapad});for(u=0;u<v.max.length;u++)c=v.max[u],rB(s,c.val,c.pad*_,{extrapad:c.extrapad})}}return{min:o,max:s}}function Ast(e,t,r){if(t.setScale(),t.autorange){t.range=r?r.slice():Qq(e,t),t._r=t.range.slice(),t._rl=o_.simpleMap(t._r,t.r2l);var n=t._input,i={};i[t._attr+".range"]=t.range,i[t._attr+".autorange"]=t.autorange,_st.call("_storeDirectGUIEdit",e.layout,e._fullLayout._preGUI,i),n.range=t.range.slice(),n.autorange=t.autorange}var a=t._anchorAxis;if(a&&a.rangeslider){var o=a.rangeslider[t._name];o&&o.rangemode==="auto"&&(o.range=Qq(e,t)),a._input.rangeslider[t._name]=o_.extendFlat({},o)}}function Sst(e,t,r){r||(r={}),e._m||e.setScale();var n=[],i=[],a=t.length,o=r.padded||!1,s=r.tozero&&(e.type==="linear"||e.type==="-"),l=e.type==="log",u=!1,c=r.vpadLinearized||!1,f,h,d,v,_,b,p,k,E;function S(z){if(Array.isArray(z))return u=!0,function(V){return Math.max(Number(z[V]||0),0)};var O=Math.max(Number(z||0),0);return function(){return O}}var L=S((e._m>0?r.ppadplus:r.ppadminus)||r.ppad||0),x=S((e._m>0?r.ppadminus:r.ppadplus)||r.ppad||0),C=S(r.vpadplus||r.vpad),M=S(r.vpadminus||r.vpad);if(!u){if(k=1/0,E=-1/0,l)for(f=0;f<a;f++)h=t[f],h<k&&h>0&&(k=h),h>E&&h<_L&&(E=h);else for(f=0;f<a;f++)h=t[f],h<k&&h>-_L&&(k=h),h>E&&h<_L&&(E=h);t=[k,E],a=2}var g={tozero:s,extrapad:o};function P(z){d=t[z],ase(d)&&(b=L(z),p=x(z),c?(v=e.c2l(d)-M(z),_=e.c2l(d)+C(z)):(k=d-M(z),E=d+C(z),l&&k<E/10&&(k=E/10),v=e.c2l(k),_=e.c2l(E)),s&&(v=Math.min(0,v),_=Math.max(0,_)),nse(v)&&tB(n,v,p,g),nse(_)&&rB(i,_,b,g))}var T=Math.min(6,a);for(f=0;f<T;f++)P(f);for(f=a-1;f>=T;f--)P(f);return{min:n,max:i,opts:r}}function tB(e,t,r,n){sse(e,t,r,n,Mst)}function rB(e,t,r,n){sse(e,t,r,n,Est)}function sse(e,t,r,n,i){for(var a=n.tozero,o=n.extrapad,s=!0,l=0;l<e.length&&s;l++){var u=e[l];if(i(u.val,t)&&u.pad>=r&&(u.extrapad||!o)){s=!1;break}else i(t,u.val)&&u.pad<=r&&(o||!u.extrapad)&&(e.splice(l,1),l--)}if(s){var c=a&&t===0;e.push({val:t,pad:c?0:r,extrapad:c?!1:o})}}function nse(e){return ase(e)&&Math.abs(e)<_L}function Mst(e,t){return e<=t}function Est(e,t){return e>=t}function kst(e,t){var r=t.autorangeoptions;return r&&r.minallowed!==void 0&&xL(t,r.minallowed,r.maxallowed)?r.minallowed:r&&r.clipmin!==void 0&&xL(t,r.clipmin,r.clipmax)?Math.max(e,t.d2l(r.clipmin)):e}function Cst(e,t){var r=t.autorangeoptions;return r&&r.maxallowed!==void 0&&xL(t,r.minallowed,r.maxallowed)?r.maxallowed:r&&r.clipmax!==void 0&&xL(t,r.clipmin,r.clipmax)?Math.min(e,t.d2l(r.clipmax)):e}function xL(e,t,r){return t!==void 0&&r!==void 0?(t=e.d2l(t),r=e.d2l(r),t<r):!0}function lse(e,t){if(!t||!t.autorangeoptions)return e;var r=e[0],n=e[1],i=t.autorangeoptions.include;if(i!==void 0){var a=t.d2l(r),o=t.d2l(n);o_.isArrayOrTypedArray(i)||(i=[i]);for(var s=0;s<i.length;s++){var l=t.d2l(i[s]);a>=l&&(a=l,r=l),o<=l&&(o=l,n=l)}}return r=kst(r,t),n=Cst(n,t),[r,n]}});var ho=ye((Wnr,Ise)=>{"use strict";var b0=Oa(),Ih=Eo(),D3=Mc(),aM=qa(),Jo=Dr(),F3=Jo.strTranslate,kb=ru(),Lst=Eb(),oM=ka(),Xp=So(),Pst=Rd(),cse=Eq(),Jd=fs(),Ist=Jd.ONEMAXYEAR,TL=Jd.ONEAVGYEAR,AL=Jd.ONEMINYEAR,Rst=Jd.ONEMAXQUARTER,sB=Jd.ONEAVGQUARTER,SL=Jd.ONEMINQUARTER,Dst=Jd.ONEMAXMONTH,z3=Jd.ONEAVGMONTH,ML=Jd.ONEMINMONTH,Zp=Jd.ONEWEEK,Ov=Jd.ONEDAY,s_=Ov/2,xm=Jd.ONEHOUR,sM=Jd.ONEMIN,EL=Jd.ONESEC,Fst=Jd.ONEMILLI,zst=Jd.ONEMICROSEC,Cb=Jd.MINUS_SIGN,CL=Jd.BADNUM,lB={K:"zeroline"},uB={K:"gridline",L:"path"},cB={K:"minor-gridline",L:"path"},bse={K:"tick",L:"path"},fse={K:"tick",L:"text"},hse={width:["x","r","l","xl","xr"],height:["y","t","b","yt","yb"],right:["r","xr"],left:["l","xl"],top:["t","yt"],bottom:["b","yb"]},LL=$h(),nM=LL.MID_SHIFT,Lb=LL.CAP_SHIFT,lM=LL.LINE_SPACING,Ost=LL.OPPOSITE_SIDE,kL=3,Qn=Ise.exports={};Qn.setConvert=ym();var qst=R3(),Sy=hf(),Bst=Sy.idSort,Nst=Sy.isLinked;Qn.id2name=Sy.id2name;Qn.name2id=Sy.name2id;Qn.cleanId=Sy.cleanId;Qn.list=Sy.list;Qn.listIds=Sy.listIds;Qn.getFromId=Sy.getFromId;Qn.getFromTrace=Sy.getFromTrace;var wse=Ag();Qn.getAutoRange=wse.getAutoRange;Qn.findExtremes=wse.findExtremes;var Ust=1e-4;function vB(e){var t=(e[1]-e[0])*Ust;return[e[0]-t,e[1]+t]}Qn.coerceRef=function(e,t,r,n,i,a){var o=n.charAt(n.length-1),s=r._fullLayout._subplots[o+"axis"],l=n+"ref",u={};return i||(i=s[0]||(typeof a=="string"?a:a[0])),a||(a=i),s=s.concat(s.map(function(c){return c+" domain"})),u[l]={valType:"enumerated",values:s.concat(a?typeof a=="string"?[a]:a:[]),dflt:i},Jo.coerce(e,t,u,l)};Qn.getRefType=function(e){return e===void 0?e:e==="paper"?"paper":e==="pixel"?"pixel":/( domain)$/.test(e)?"domain":"range"};Qn.coercePosition=function(e,t,r,n,i,a){var o,s,l=Qn.getRefType(n);if(l!=="range")o=Jo.ensureNumber,s=r(i,a);else{var u=Qn.getFromId(t,n);a=u.fraction2r(a),s=r(i,a),o=u.cleanPos}e[i]=o(s)};Qn.cleanPosition=function(e,t,r){var n=r==="paper"||r==="pixel"?Jo.ensureNumber:Qn.getFromId(t,r).cleanPos;return n(e)};Qn.redrawComponents=function(e,t){t=t||Qn.listIds(e);var r=e._fullLayout;function n(i,a,o,s){for(var l=aM.getComponentMethod(i,a),u={},c=0;c<t.length;c++)for(var f=r[Qn.id2name(t[c])],h=f[o],d=0;d<h.length;d++){var v=h[d];if(!u[v]&&(l(e,v),u[v]=1,s))return}}n("annotations","drawOne","_annIndices"),n("shapes","drawOne","_shapeIndices"),n("images","draw","_imgIndices",!0),n("selections","drawOne","_selectionIndices")};var Vst=Qn.getDataConversions=function(e,t,r,n){var i,a=r==="x"||r==="y"||r==="z"?r:n;if(Jo.isArrayOrTypedArray(a)){if(i={type:qst(n,void 0,{autotypenumbers:e._fullLayout.autotypenumbers}),_categories:[]},Qn.setConvert(i),i.type==="category")for(var o=0;o<n.length;o++)i.d2c(n[o])}else i=Qn.getFromTrace(e,t,a);return i?{d2c:i.d2c,c2d:i.c2d}:a==="ids"?{d2c:vse,c2d:vse}:{d2c:dse,c2d:dse}};function dse(e){return+e}function vse(e){return String(e)}Qn.getDataToCoordFunc=function(e,t,r,n){return Vst(e,t,r,n).d2c};Qn.counterLetter=function(e){var t=e.charAt(0);if(t==="x")return"y";if(t==="y")return"x"};Qn.minDtick=function(e,t,r,n){["log","category","multicategory"].indexOf(e.type)!==-1||!n?e._minDtick=0:e._minDtick===void 0?(e._minDtick=t,e._forceTick0=r):e._minDtick&&((e._minDtick/t+1e-6)%1<2e-6&&((r-e._forceTick0)/t%1+1.000001)%1<2e-6?(e._minDtick=t,e._forceTick0=r):((t/e._minDtick+1e-6)%1>2e-6||((r-e._forceTick0)/e._minDtick%1+1.000001)%1>2e-6)&&(e._minDtick=0))};Qn.saveRangeInitial=function(e,t){for(var r=Qn.list(e,"",!0),n=!1,i=0;i<r.length;i++){var a=r[i],o=a._rangeInitial0===void 0&&a._rangeInitial1===void 0,s=o||a.range[0]!==a._rangeInitial0||a.range[1]!==a._rangeInitial1,l=a.autorange;(o&&l!==!0||t&&s)&&(a._rangeInitial0=l==="min"||l==="max reversed"?void 0:a.range[0],a._rangeInitial1=l==="max"||l==="min reversed"?void 0:a.range[1],a._autorangeInitial=l,n=!0)}return n};Qn.saveShowSpikeInitial=function(e,t){for(var r=Qn.list(e,"",!0),n=!1,i="on",a=0;a<r.length;a++){var o=r[a],s=o._showSpikeInitial===void 0,l=s||o.showspikes!==o._showspikes;(s||t&&l)&&(o._showSpikeInitial=o.showspikes,n=!0),i==="on"&&!o.showspikes&&(i="off")}return e._fullLayout._cartesianSpikesEnabled=i,n};Qn.autoBin=function(e,t,r,n,i,a){var o=Jo.aggNums(Math.min,null,e),s=Jo.aggNums(Math.max,null,e);if(t.type==="category"||t.type==="multicategory")return{start:o-.5,end:s+.5,size:Math.max(1,Math.round(a)||1),_dataSpan:s-o};i||(i=t.calendar);var l;if(t.type==="log"?l={type:"linear",range:[o,s]}:l={type:t.type,range:Jo.simpleMap([o,s],t.c2r,0,i),calendar:i},Qn.setConvert(l),a=a&&cse.dtick(a,l.type),a)l.dtick=a,l.tick0=cse.tick0(void 0,l.type,i);else{var u;if(r)u=(s-o)/r;else{var c=Jo.distinctVals(e),f=Math.pow(10,Math.floor(Math.log(c.minDiff)/Math.LN10)),h=f*Jo.roundUp(c.minDiff/f,[.9,1.9,4.9,9.9],!0);u=Math.max(h,2*Jo.stdev(e)/Math.pow(e.length,n?.25:.4)),Ih(u)||(u=1)}Qn.autoTicks(l,u)}var d=l.dtick,v=Qn.tickIncrement(Qn.tickFirst(l),d,"reverse",i),_,b;if(typeof d=="number")v=Gst(v,e,l,o,s),b=1+Math.floor((s-v)/d),_=v+b*d;else for(l.dtick.charAt(0)==="M"&&(v=Hst(v,e,d,o,i)),_=v,b=0;_<=s;)_=Qn.tickIncrement(_,d,!1,i),b++;return{start:t.c2r(v,0,i),end:t.c2r(_,0,i),size:d,_dataSpan:s-o}};function Gst(e,t,r,n,i){var a=0,o=0,s=0,l=0;function u(d){return(1+(d-e)*100/r.dtick)%100<2}for(var c=0;c<t.length;c++)t[c]%1===0?s++:Ih(t[c])||l++,u(t[c])&&a++,u(t[c]+r.dtick/2)&&o++;var f=t.length-l;if(s===f&&r.type!=="date")r.dtick<1?e=n-.5*r.dtick:(e-=.5,e+r.dtick<n&&(e+=r.dtick));else if(o<f*.1&&(a>f*.3||u(n)||u(i))){var h=r.dtick/2;e+=e+h<n?h:-h}return e}function Hst(e,t,r,n,i){var a=Jo.findExactDates(t,i),o=.8;if(a.exactDays>o){var s=Number(r.slice(1));a.exactYears>o&&s%12===0?e=Qn.tickIncrement(e,"M6","reverse")+Ov*1.5:a.exactMonths>o?e=Qn.tickIncrement(e,"M1","reverse")+Ov*15.5:e-=s_;var l=Qn.tickIncrement(e,r);if(l<=n)return l}return e}Qn.prepMinorTicks=function(e,t,r){if(!t.minor.dtick){delete e.dtick;var n=t.dtick&&Ih(t._tmin),i;if(n){var a=Qn.tickIncrement(t._tmin,t.dtick,!0);i=[t._tmin,a*.99+t._tmin*.01]}else{var o=Jo.simpleMap(t.range,t.r2l);i=[o[0],.8*o[0]+.2*o[1]]}if(e.range=Jo.simpleMap(i,t.l2r),e._isMinor=!0,Qn.prepTicks(e,r),n){var s=Ih(t.dtick),l=Ih(e.dtick),u=s?t.dtick:+t.dtick.substring(1),c=l?e.dtick:+e.dtick.substring(1);s&&l?nB(u,c)?u===2*Zp&&c===2*Ov&&(e.dtick=Zp):u===2*Zp&&c===3*Ov?e.dtick=Zp:u===Zp&&!(t._input.minor||{}).nticks?e.dtick=Ov:pse(u/c,2.5)?e.dtick=u/2:e.dtick=u:String(t.dtick).charAt(0)==="M"?l?e.dtick="M1":nB(u,c)?u>=12&&c===2&&(e.dtick="M3"):e.dtick=t.dtick:String(e.dtick).charAt(0)==="L"?String(t.dtick).charAt(0)==="L"?nB(u,c)||(e.dtick=pse(u/c,2.5)?t.dtick/2:t.dtick):e.dtick="D1":e.dtick==="D2"&&+t.dtick>1&&(e.dtick=1)}e.range=t.range}t.minor._tick0Init===void 0&&(e.tick0=t.tick0)};function nB(e,t){return Math.abs((e/t+.5)%1-.5)<.001}function pse(e,t){return Math.abs(e/t-1)<.001}Qn.prepTicks=function(e,t){var r=Jo.simpleMap(e.range,e.r2l,void 0,void 0,t);if(e.tickmode==="auto"||!e.dtick){var n=e.nticks,i;n||(e.type==="category"||e.type==="multicategory"?(i=e.tickfont?Jo.bigFont(e.tickfont.size||12):15,n=e._length/i):(i=e._id.charAt(0)==="y"?40:80,n=Jo.constrain(e._length/i,4,9)+1),e._name==="radialaxis"&&(n*=2)),e.minor&&e.minor.tickmode!=="array"||e.tickmode==="array"&&(n*=100),e._roughDTick=Math.abs(r[1]-r[0])/n,Qn.autoTicks(e,e._roughDTick),e._minDtick>0&&e.dtick<e._minDtick*2&&(e.dtick=e._minDtick,e.tick0=e.l2r(e._forceTick0))}e.ticklabelmode==="period"&&jst(e),e.tick0||(e.tick0=e.type==="date"?"2000-01-01":0),e.type==="date"&&e.dtick<.1&&(e.dtick=.1),Mse(e)};function aB(e){return+e.substring(1)}function jst(e){var t;function r(){return!(Ih(e.dtick)||e.dtick.charAt(0)!=="M")}var n=r(),i=Qn.getTickFormat(e);if(i){var a=e._dtickInit!==e.dtick;/%[fLQsSMX]/.test(i)||(/%[HI]/.test(i)?(t=xm,a&&!n&&e.dtick<xm&&(e.dtick=xm)):/%p/.test(i)?(t=s_,a&&!n&&e.dtick<s_&&(e.dtick=s_)):/%[Aadejuwx]/.test(i)?(t=Ov,a&&!n&&e.dtick<Ov&&(e.dtick=Ov)):/%[UVW]/.test(i)?(t=Zp,a&&!n&&e.dtick<Zp&&(e.dtick=Zp)):/%[Bbm]/.test(i)?(t=z3,a&&(n?aB(e.dtick)<1:e.dtick<ML)&&(e.dtick="M1")):/%[q]/.test(i)?(t=sB,a&&(n?aB(e.dtick)<3:e.dtick<SL)&&(e.dtick="M3")):/%[Yy]/.test(i)&&(t=TL,a&&(n?aB(e.dtick)<12:e.dtick<AL)&&(e.dtick="M12")))}n=r(),n&&e.tick0===e._dowTick0&&(e.tick0=e._rawTick0),e._definedDelta=t}function Wst(e,t,r){for(var n=0;n<e.length;n++){var i=e[n].value,a=n,o=n+1;n<e.length-1?(a=n,o=n+1):n>0?(a=n-1,o=n):(a=n,o=n);var s=e[a].value,l=e[o].value,u=Math.abs(l-s),c=r||u,f=0;c>=AL?u>=AL&&u<=Ist?f=u:f=TL:r===sB&&c>=SL?u>=SL&&u<=Rst?f=u:f=sB:c>=ML?u>=ML&&u<=Dst?f=u:f=z3:r===Zp&&c>=Zp?f=Zp:c>=Ov?f=Ov:r===s_&&c>=s_?f=s_:r===xm&&c>=xm&&(f=xm);var h;f>=u&&(f=u,h=!0);var d=i+f;if(t.rangebreaks&&f>0){for(var v=84,_=0,b=0;b<v;b++){var p=(b+.5)/v;t.maskBreaks(i*(1-p)+p*d)!==CL&&_++}f*=_/v,f||(e[n].drop=!0),h&&u>Zp&&(f=u)}(f>0||n===0)&&(e[n].periodX=i+f/2)}}Qn.calcTicks=function(t,r){for(var n=t.type,i=t.calendar,a=t.ticklabelstep,o=t.ticklabelmode==="period",s=t.range[0]>t.range[1],l=!t.ticklabelindex||Jo.isArrayOrTypedArray(t.ticklabelindex)?t.ticklabelindex:[t.ticklabelindex],u=Jo.simpleMap(t.range,t.r2l,void 0,void 0,r),c=u[1]<u[0],f=Math.min(u[0],u[1]),h=Math.max(u[0],u[1]),d=Math.max(1e3,t._length||0),v=[],_=[],b=[],p=[],k=[],E=t.minor&&(t.minor.ticks||t.minor.showgrid),S=1;S>=(E?0:1);S--){var L=!S;S?(t._dtickInit=t.dtick,t._tick0Init=t.tick0):(t.minor._dtickInit=t.minor.dtick,t.minor._tick0Init=t.minor.tick0);var x=S?t:Jo.extendFlat({},t,t.minor);if(L?Qn.prepMinorTicks(x,t,r):Qn.prepTicks(x,r),x.tickmode==="array"){S?(b=[],v=gse(t,!L)):(p=[],_=gse(t,!L));continue}if(x.tickmode==="sync"){b=[],v=Xst(t);continue}var C=vB(u),M=C[0],g=C[1],P=Ih(x.dtick),T=n==="log"&&!(P||x.dtick.charAt(0)==="L"),z=Qn.tickFirst(x,r);if(S){if(t._tmin=z,z<M!==c)break;(n==="category"||n==="multicategory")&&(g=c?Math.max(-.5,g):Math.min(t._categories.length-.5,g))}var O=null,V=z,G;if(S){var Z;P?Z=t.dtick:n==="date"?typeof t.dtick=="string"&&t.dtick.charAt(0)==="M"&&(Z=z3*t.dtick.substring(1)):Z=t._roughDTick,G=Math.round((t.r2l(V)-t.r2l(t.tick0))/Z)-1}var j=x.dtick;for(x.rangebreaks&&x._tick0Init!==x.tick0&&(V=oB(V,t),c||(V=Qn.tickIncrement(V,j,!c,i))),S&&o&&(V=Qn.tickIncrement(V,j,!c,i),G--);c?V>=g:V<=g;V=Qn.tickIncrement(V,j,c,i)){if(S&&G++,x.rangebreaks&&!c){if(V<M)continue;if(x.maskBreaks(V)===CL&&oB(V,x)>=h)break}if(b.length>d||V===O)break;O=V;var N={value:V};S?(T&&V!==(V|0)&&(N.simpleLabel=!0),a>1&&G%a&&(N.skipLabel=!0),b.push(N)):(N.minor=!0,p.push(N))}}if(!p||p.length<2)l=!1;else{var H=(p[1].value-p[0].value)*(s?-1:1);ylt(H,t.tickformat)||(l=!1)}if(!l)k=b;else{var te=b.concat(p);o&&b.length&&(te=te.slice(1)),te=te.sort(function(Gt,Nt){return Gt.value-Nt.value}).filter(function(Gt,Nt,Jt){return Nt===0||Gt.value!==Jt[Nt-1].value});var oe=te.map(function(Gt,Nt){return Gt.minor===void 0&&!Gt.skipLabel?Nt:null}).filter(function(Gt){return Gt!==null});oe.forEach(function(Gt){l.map(function(Nt){var Jt=Gt+Nt;Jt>=0&&Jt<te.length&&Jo.pushUnique(k,te[Jt])})})}if(E){var _e=t.minor.ticks==="inside"&&t.ticks==="outside"||t.minor.ticks==="outside"&&t.ticks==="inside";if(!_e){for(var Ee=b.map(function(Gt){return Gt.value}),Ce=[],me=0;me<p.length;me++){var ie=p[me],Se=ie.value;if(Ee.indexOf(Se)===-1){for(var Le=!1,Ae=0;!Le&&Ae<b.length;Ae++)1e7+b[Ae].value===1e7+Se&&(Le=!0);Le||Ce.push(ie)}}p=Ce}}o&&Wst(k,t,t._definedDelta);var Fe;if(t.rangebreaks){var Pe=t._id.charAt(0)==="y",ge=1;t.tickmode==="auto"&&(ge=t.tickfont?t.tickfont.size:12);var Re=NaN;for(Fe=b.length-1;Fe>-1;Fe--){if(b[Fe].drop){b.splice(Fe,1);continue}b[Fe].value=oB(b[Fe].value,t);var ce=t.c2p(b[Fe].value);(Pe?Re>ce-ge:Re<ce+ge)?b.splice(c?Fe+1:Fe,1):Re=ce}}yB(t)&&Math.abs(u[1]-u[0])===360&&b.pop(),t._tmax=(b[b.length-1]||{}).value,t._prevDateHead="",t._inCalcTicks=!0;var Ze,ut=function(Gt){Gt.text="",t._prevDateHead=Ze};b=b.concat(p);function pt(Gt,Nt){var Jt=Qn.tickText(Gt,Nt.value,!1,Nt.simpleLabel),sr=Nt.periodX;return sr!==void 0&&(Jt.periodX=sr,(sr>h||sr<f)&&(sr>h&&(Jt.periodX=h),sr<f&&(Jt.periodX=f),ut(Jt))),Jt}var Zt;for(Fe=0;Fe<b.length;Fe++){var st=b[Fe].minor,lt=b[Fe].value;st?(l&&k.indexOf(b[Fe])!==-1?Zt=pt(t,b[Fe]):Zt={x:lt},Zt.minor=!0,_.push(Zt)):(Ze=t._prevDateHead,Zt=pt(t,b[Fe]),(b[Fe].skipLabel||l&&k.indexOf(b[Fe])===-1)&&ut(Zt),v.push(Zt))}return v=v.concat(_),t._inCalcTicks=!1,o&&v.length&&(v[0].noTick=!0),v};function Tse(e,t){return e.rangebreaks&&(t=t.filter(function(r){return e.maskBreaks(r.x)!==CL})),t}function Xst(e){var t=e._mainAxis,r=[];if(t._vals){for(var n=0;n<t._vals.length;n++)if(!t._vals[n].noTick){var i=t.l2p(t._vals[n].x),a=e.p2l(i),o=Qn.tickText(e,a);t._vals[n].minor&&(o.minor=!0,o.text=""),r.push(o)}}return r=Tse(e,r),r}function gse(e,t){var r=Jo.simpleMap(e.range,e.r2l),n=vB(r),i=Math.min(n[0],n[1]),a=Math.max(n[0],n[1]),o=e.type==="category"?e.d2l_noadd:e.d2l;e.type==="log"&&String(e.dtick).charAt(0)!=="L"&&(e.dtick="L"+Math.pow(10,Math.floor(Math.min(e.range[0],e.range[1]))-1));for(var s=[],l=0;l<=1;l++)if(!(t!==void 0&&(t&&l||t===!1&&!l))&&!(l&&!e.minor)){var u=l?e.minor.tickvals:e.tickvals,c=l?[]:e.ticktext;if(u){Jo.isArrayOrTypedArray(c)||(c=[]);for(var f=0;f<u.length;f++){var h=o(u[f]);if(h>i&&h<a){var d=Qn.tickText(e,h,!1,String(c[f]));l&&(d.minor=!0,d.text=""),s.push(d)}}}}return s=Tse(e,s),s}var bL=[2,5,10],mse=[1,2,3,6,12],yse=[1,2,5,10,15,30],Zst=[1,2,3,7,14],Ase=[-.046,0,.301,.477,.602,.699,.778,.845,.903,.954,1],Sse=[-.301,0,.301,.699,1],Yst=[15,30,45,90,180];function _m(e,t,r){return t*Jo.roundUp(e/t,r)}Qn.autoTicks=function(e,t,r){var n;function i(f){return Math.pow(f,Math.floor(Math.log(t)/Math.LN10))}if(e.type==="date"){e.tick0=Jo.dateTick0(e.calendar,0);var a=2*t;if(a>TL)t/=TL,n=i(10),e.dtick="M"+12*_m(t,n,bL);else if(a>z3)t/=z3,e.dtick="M"+_m(t,1,mse);else if(a>Ov){if(e.dtick=_m(t,Ov,e._hasDayOfWeekBreaks?[1,2,7,14]:Zst),!r){var o=Qn.getTickFormat(e),s=e.ticklabelmode==="period";s&&(e._rawTick0=e.tick0),/%[uVW]/.test(o)?e.tick0=Jo.dateTick0(e.calendar,2):e.tick0=Jo.dateTick0(e.calendar,1),s&&(e._dowTick0=e.tick0)}}else a>xm?e.dtick=_m(t,xm,mse):a>sM?e.dtick=_m(t,sM,yse):a>EL?e.dtick=_m(t,EL,yse):(n=i(10),e.dtick=_m(t,n,bL))}else if(e.type==="log"){e.tick0=0;var l=Jo.simpleMap(e.range,e.r2l);if(e._isMinor&&(t*=1.5),t>.7)e.dtick=Math.ceil(t);else if(Math.abs(l[1]-l[0])<1){var u=1.5*Math.abs((l[1]-l[0])/t);t=Math.abs(Math.pow(10,l[1])-Math.pow(10,l[0]))/u,n=i(10),e.dtick="L"+_m(t,n,bL)}else e.dtick=t>.3?"D2":"D1"}else e.type==="category"||e.type==="multicategory"?(e.tick0=0,e.dtick=Math.ceil(Math.max(t,1))):yB(e)?(e.tick0=0,n=1,e.dtick=_m(t,n,Yst)):(e.tick0=0,n=i(10),e.dtick=_m(t,n,bL));if(e.dtick===0&&(e.dtick=1),!Ih(e.dtick)&&typeof e.dtick!="string"){var c=e.dtick;throw e.dtick=1,"ax.dtick error: "+String(c)}};function Mse(e){var t=e.dtick;if(e._tickexponent=0,!Ih(t)&&typeof t!="string"&&(t=1),(e.type==="category"||e.type==="multicategory")&&(e._tickround=null),e.type==="date"){var r=e.r2l(e.tick0),n=e.l2r(r).replace(/(^-|i)/g,""),i=n.length;if(String(t).charAt(0)==="M")i>10||n.slice(5)!=="01-01"?e._tickround="d":e._tickround=+t.slice(1)%12===0?"y":"m";else if(t>=Ov&&i<=10||t>=Ov*15)e._tickround="d";else if(t>=sM&&i<=16||t>=xm)e._tickround="M";else if(t>=EL&&i<=19||t>=sM)e._tickround="S";else{var a=e.l2r(r+t).replace(/^-/,"").length;e._tickround=Math.max(i,a)-20,e._tickround<0&&(e._tickround=4)}}else if(Ih(t)||t.charAt(0)==="L"){var o=e.range.map(e.r2d||Number);Ih(t)||(t=Number(t.slice(1))),e._tickround=2-Math.floor(Math.log(t)/Math.LN10+.01);var s=Math.max(Math.abs(o[0]),Math.abs(o[1])),l=Math.floor(Math.log(s)/Math.LN10+.01),u=e.minexponent===void 0?3:e.minexponent;Math.abs(l)>u&&(O3(e.exponentformat)&&e.exponentformat!=="SI extended"&&!pB(l)||O3(e.exponentformat)&&e.exponentformat==="SI extended"&&!gB(l)?e._tickexponent=3*Math.round((l-1)/3):e._tickexponent=l)}else e._tickround=null}Qn.tickIncrement=function(e,t,r,n){var i=r?-1:1;if(Ih(t))return Jo.increment(e,i*t);var a=t.charAt(0),o=i*Number(t.slice(1));if(a==="M")return Jo.incrementMonth(e,o,n);if(a==="L")return Math.log(Math.pow(10,e)+o)/Math.LN10;if(a==="D"){var s=t==="D2"?Sse:Ase,l=e+i*.01,u=Jo.roundUp(Jo.mod(l,1),s,r);return Math.floor(l)+Math.log(b0.round(Math.pow(10,u),1))/Math.LN10}throw"unrecognized dtick "+String(t)};Qn.tickFirst=function(e,t){var r=e.r2l||Number,n=Jo.simpleMap(e.range,r,void 0,void 0,t),i=n[1]<n[0],a=i?Math.floor:Math.ceil,o=vB(n)[0],s=e.dtick,l=r(e.tick0);if(Ih(s)){var u=a((o-l)/s)*s+l;return(e.type==="category"||e.type==="multicategory")&&(u=Jo.constrain(u,0,e._categories.length-1)),u}var c=s.charAt(0),f=Number(s.slice(1));if(c==="M"){for(var h=0,d=l,v,_,b;h<10;){if(v=Qn.tickIncrement(d,s,i,e.calendar),(v-o)*(d-o)<=0)return i?Math.min(d,v):Math.max(d,v);_=(o-(d+v)/2)/(v-d),b=c+(Math.abs(Math.round(_))||1)*f,d=Qn.tickIncrement(d,b,_<0?!i:i,e.calendar),h++}return Jo.error("tickFirst did not converge",e),d}else{if(c==="L")return Math.log(a((Math.pow(10,o)-l)/f)*f+l)/Math.LN10;if(c==="D"){var p=s==="D2"?Sse:Ase,k=Jo.roundUp(Jo.mod(o,1),p,i);return Math.floor(o)+Math.log(b0.round(Math.pow(10,k),1))/Math.LN10}else throw"unrecognized dtick "+String(s)}};Qn.tickText=function(e,t,r,n){var i=Ese(e,t),a=e.tickmode==="array",o=r||a,s=e.type,l=s==="category"?e.d2l_noadd:e.d2l,u,c=function(b){var p=e.l2p(b);return p>=0&&p<=e._length?b:null};if(a&&Jo.isArrayOrTypedArray(e.ticktext)){var f=Jo.simpleMap(e.range,e.r2l),h=(Math.abs(f[1]-f[0])-(e._lBreaks||0))/1e4;for(u=0;u<e.ticktext.length&&!(Math.abs(t-l(e.tickvals[u]))<h);u++);if(u<e.ticktext.length)return i.text=String(e.ticktext[u]),i.xbnd=[c(i.x-.5),c(i.x+e.dtick-.5)],i}function d(b){if(b===void 0)return!0;if(r)return b==="none";var p={first:e._tmin,last:e._tmax}[b];return b!=="all"&&t!==p}var v=r?"never":e.exponentformat!=="none"&&d(e.showexponent)?"hide":"";if(s==="date"?Kst(e,i,r,o):s==="log"?Jst(e,i,r,o,v):s==="category"?$st(e,i):s==="multicategory"?Qst(e,i,r):yB(e)?tlt(e,i,r,o,v):elt(e,i,r,o,v),n||(e.tickprefix&&!d(e.showtickprefix)&&(i.text=e.tickprefix+i.text),e.ticksuffix&&!d(e.showticksuffix)&&(i.text+=e.ticksuffix)),e.labelalias&&e.labelalias.hasOwnProperty(i.text)){var _=e.labelalias[i.text];typeof _=="string"&&(i.text=_)}return(e.tickson==="boundaries"||e.showdividers)&&(i.xbnd=[c(i.x-.5),c(i.x+e.dtick-.5)]),i};Qn.hoverLabelText=function(e,t,r){r&&(e=Jo.extendFlat({},e,{hoverformat:r}));var n=Jo.isArrayOrTypedArray(t)?t[0]:t,i=Jo.isArrayOrTypedArray(t)?t[1]:void 0;if(i!==void 0&&i!==n)return Qn.hoverLabelText(e,n,r)+" - "+Qn.hoverLabelText(e,i,r);var a=e.type==="log"&&n<=0,o=Qn.tickText(e,e.c2l(a?-n:n),"hover").text;return a?n===0?"0":Cb+o:o};function Ese(e,t,r){var n=e.tickfont||{};return{x:t,dx:0,dy:0,text:r||"",fontSize:n.size,font:n.family,fontWeight:n.weight,fontStyle:n.style,fontVariant:n.variant,fontTextcase:n.textcase,fontLineposition:n.lineposition,fontShadow:n.shadow,fontColor:n.color}}function Kst(e,t,r,n){var i=e._tickround,a=r&&e.hoverformat||Qn.getTickFormat(e);n=!a&&n,n&&(Ih(i)?i=4:i={y:"m",m:"d",d:"M",M:"S",S:4}[i]);var o=Jo.formatDate(t.x,a,i,e._dateFormat,e.calendar,e._extraFormat),s,l=o.indexOf(`
+`);if(l!==-1&&(s=o.slice(l+1),o=o.slice(0,l)),n&&(s!==void 0&&(o==="00:00:00"||o==="00:00")?(o=s,s=""):o.length===8&&(o=o.replace(/:00$/,""))),s)if(r)i==="d"?o+=", "+s:o=s+(o?", "+o:"");else if(!e._inCalcTicks||e._prevDateHead!==s)e._prevDateHead=s,o+="<br>"+s;else{var u=cM(e),c=e._trueSide||e.side;(!u&&c==="top"||u&&c==="bottom")&&(o+="<br> ")}t.text=o}function Jst(e,t,r,n,i){var a=e.dtick,o=t.x,s=e.tickformat,l=typeof a=="string"&&a.charAt(0);if(i==="never"&&(i=""),n&&l!=="L"&&(a="L3",l="L"),s||l==="L")t.text=uM(Math.pow(10,o),e,i,n);else if(Ih(a)||l==="D"&&(e.minorloglabels==="complete"||Jo.mod(o+.01,1)<.1)){var u;e.minorloglabels==="complete"&&!(Jo.mod(o+.01,1)<.1)&&(u=!0,t.fontSize*=.75);var c=Math.pow(10,o).toExponential(0),f=c.split("e"),h=+f[1],d=Math.abs(h),v=e.exponentformat;v==="power"||O3(v)&&v!=="SI extended"&&pB(h)||O3(v)&&v==="SI extended"&&gB(h)?(t.text=f[0],d>0&&(t.text+="x10"),t.text==="1x10"&&(t.text="10"),h!==0&&h!==1&&(t.text+="<sup>"+(h>0?"":Cb)+d+"</sup>"),t.fontSize*=1.25):(v==="e"||v==="E")&&d>2?t.text=f[0]+v+(h>0?"+":Cb)+d:(t.text=uM(Math.pow(10,o),e,"","fakehover"),a==="D1"&&e._id.charAt(0)==="y"&&(t.dy-=t.fontSize/6))}else if(l==="D")t.text=e.minorloglabels==="none"?"":String(Math.round(Math.pow(10,Jo.mod(o,1)))),t.fontSize*=.75;else throw"unrecognized dtick "+String(a);if(e.dtick==="D1"){var _=String(t.text).charAt(0);(_==="0"||_==="1")&&(e._id.charAt(0)==="y"?t.dx-=t.fontSize/4:(t.dy+=t.fontSize/2,t.dx+=(e.range[1]>e.range[0]?1:-1)*t.fontSize*(o<0?.5:.25)))}}function $st(e,t){var r=e._categories[Math.round(t.x)];r===void 0&&(r=""),t.text=String(r)}function Qst(e,t,r){var n=Math.round(t.x),i=e._categories[n]||[],a=i[1]===void 0?"":String(i[1]),o=i[0]===void 0?"":String(i[0]);r?t.text=o+" - "+a:(t.text=a,t.text2=o)}function elt(e,t,r,n,i){i==="never"?i="":e.showexponent==="all"&&Math.abs(t.x/e.dtick)<1e-6&&(i="hide"),t.text=uM(t.x,e,i,n)}function tlt(e,t,r,n,i){if(e.thetaunit==="radians"&&!r){var a=t.x/180;if(a===0)t.text="0";else{var o=rlt(a);if(o[1]>=100)t.text=uM(Jo.deg2rad(t.x),e,i,n);else{var s=t.x<0;o[1]===1?o[0]===1?t.text="\u03C0":t.text=o[0]+"\u03C0":t.text=["<sup>",o[0],"</sup>","\u2044","<sub>",o[1],"</sub>","\u03C0"].join(""),s&&(t.text=Cb+t.text)}}}else t.text=uM(t.x,e,i,n)}function rlt(e){function t(s,l){return Math.abs(s-l)<=1e-6}function r(s,l){return t(l,0)?s:r(l,s%l)}function n(s){for(var l=1;!t(Math.round(s*l)/l,s);)l*=10;return l}var i=n(e),a=e*i,o=Math.abs(r(a,i));return[Math.round(a/o),Math.round(i/o)]}var kse=["f","p","n","\u03BC","m","","k","M","G","T"],ilt=["q","r","y","z","a",...kse,"P","E","Z","Y","R","Q"],O3=e=>["SI","SI extended","B"].includes(e);function pB(e){return e>14||e<-15}function gB(e){return e>32||e<-30}function nlt(e,t){return O3(t)?!!(t==="SI extended"&&gB(e)||t!=="SI extended"&&pB(e)):!1}function uM(e,t,r,n){var i=e<0,a=t._tickround,o=r||t.exponentformat||"B",s=t._tickexponent,l=Qn.getTickFormat(t),u=t.separatethousands;if(n){var c={exponentformat:o,minexponent:t.minexponent,dtick:t.showexponent==="none"?t.dtick:Ih(e)&&Math.abs(e)||1,range:t.showexponent==="none"?t.range.map(t.r2d):[0,e||1]};Mse(c),a=(Number(c._tickround)||0)+4,s=c._tickexponent,t.hoverformat&&(l=t.hoverformat)}if(l)return t._numFormat(l)(e).replace(/-/g,Cb);var f=Math.pow(10,-a)/2;if(o==="none"&&(s=0),e=Math.abs(e),e<f)e="0",i=!1;else{if(e+=f,s&&(e*=Math.pow(10,-s),a+=s),a===0)e=String(Math.floor(e));else if(a<0){e=String(Math.round(e)),e=e.slice(0,Math.max(0,e.length+a));for(var h=a;h<0;h++)e+="0"}else{e=String(e);var d=e.indexOf(".")+1;d&&(e=e.slice(0,d+a).replace(/\.?0+$/,""))}e=Jo.numSeparate(e,t._separators,u)}if(s&&o!=="hide"){nlt(s,o)&&(o="power");var v;s<0?v=Cb+-s:o!=="power"?v="+"+s:v=String(s),o==="e"||o==="E"?e+=o+v:o==="power"?e+="\xD710<sup>"+v+"</sup>":o==="B"&&s===9?e+="B":O3(o)&&(e+=o==="SI extended"?ilt[s/3+10]:kse[s/3+5])}return i?Cb+e:e}Qn.getTickFormat=function(e){var t;function r(l){return typeof l!="string"?l:Number(l.replace("M",""))*z3}function n(l,u){var c=["L","D"];if(typeof l==typeof u){if(typeof l=="number")return l-u;var f=c.indexOf(l.charAt(0)),h=c.indexOf(u.charAt(0));return f===h?Number(l.replace(/(L|D)/g,""))-Number(u.replace(/(L|D)/g,"")):f-h}else return typeof l=="number"?1:-1}function i(l,u,c){var f=c||function(v){return v},h=u[0],d=u[1];return(!h&&typeof h!="number"||f(h)<=f(l))&&(!d&&typeof d!="number"||f(d)>=f(l))}function a(l,u){var c=u[0]===null,f=u[1]===null,h=n(l,u[0])>=0,d=n(l,u[1])<=0;return(c||h)&&(f||d)}var o,s;if(e.tickformatstops&&e.tickformatstops.length>0)switch(e.type){case"date":case"linear":{for(t=0;t<e.tickformatstops.length;t++)if(s=e.tickformatstops[t],s.enabled&&i(e.dtick,s.dtickrange,r)){o=s;break}break}case"log":{for(t=0;t<e.tickformatstops.length;t++)if(s=e.tickformatstops[t],s.enabled&&a(e.dtick,s.dtickrange)){o=s;break}break}default:}return o?o.value:e.tickformat};Qn.getSubplots=function(e,t){var r=e._fullLayout._subplots,n=r.cartesian.concat(r.gl2d||[]),i=t?Qn.findSubplotsWithAxis(n,t):n;return i.sort(function(a,o){var s=a.slice(1).split("y"),l=o.slice(1).split("y");return s[0]===l[0]?+s[1]-+l[1]:+s[0]-+l[0]}),i};Qn.findSubplotsWithAxis=function(e,t){for(var r=new RegExp(t._id.charAt(0)==="x"?"^"+t._id+"y":t._id+"$"),n=[],i=0;i<e.length;i++){var a=e[i];r.test(a)&&n.push(a)}return n};Qn.makeClipPaths=function(e){var t=e._fullLayout;if(!t._hasOnlyLargeSploms){var r={_offset:0,_length:t.width,_id:""},n={_offset:0,_length:t.height,_id:""},i=Qn.list(e,"x",!0),a=Qn.list(e,"y",!0),o=[],s,l;for(s=0;s<i.length;s++)for(o.push({x:i[s],y:n}),l=0;l<a.length;l++)s===0&&o.push({x:r,y:a[l]}),o.push({x:i[s],y:a[l]});var u=t._clips.selectAll(".axesclip").data(o,function(c){return c.x._id+c.y._id});u.enter().append("clipPath").classed("axesclip",!0).attr("id",function(c){return"clip"+t._uid+c.x._id+c.y._id}).append("rect"),u.exit().remove(),u.each(function(c){b0.select(this).select("rect").attr({x:c.x._offset||0,y:c.y._offset||0,width:c.x._length||1,height:c.y._length||1})})}};Qn.draw=function(e,t,r){var n=e._fullLayout;t==="redraw"&&n._paper.selectAll("g.subplot").each(function(l){var u=l[0],c=n._plots[u];if(c){var f=c.xaxis,h=c.yaxis;c.xaxislayer.selectAll("."+f._id+"tick").remove(),c.yaxislayer.selectAll("."+h._id+"tick").remove(),c.xaxislayer.selectAll("."+f._id+"tick2").remove(),c.yaxislayer.selectAll("."+h._id+"tick2").remove(),c.xaxislayer.selectAll("."+f._id+"divider").remove(),c.yaxislayer.selectAll("."+h._id+"divider").remove(),c.minorGridlayer&&c.minorGridlayer.selectAll("path").remove(),c.gridlayer&&c.gridlayer.selectAll("path").remove(),c.zerolinelayer&&c.zerolinelayer.selectAll("path").remove(),c.zerolinelayerAbove&&c.zerolinelayerAbove.selectAll("path").remove(),n._infolayer.select(".g-"+f._id+"title").remove(),n._infolayer.select(".g-"+h._id+"title").remove()}});var i=!t||t==="redraw"?Qn.listIds(e):t,a=Qn.list(e),o=a.filter(function(l){return l.autoshift}).map(function(l){return l.overlaying});i.map(function(l){var u=Qn.getFromId(e,l);if(u.tickmode==="sync"&&u.overlaying){var c=i.findIndex(function(f){return f===u.overlaying});c>=0&&i.unshift(i.splice(c,1).shift())}});var s={false:{left:0,right:0}};return Jo.syncOrAsync(i.map(function(l){return function(){if(l){var u=Qn.getFromId(e,l);r||(r={}),r.axShifts=s,r.overlayingShiftedAx=o;var c=Qn.drawOne(e,u,r);return u._shiftPusher&&dB(u,u._fullDepth||0,s,!0),u._r=u.range.slice(),u._rl=Jo.simpleMap(u._r,u.r2l),c}}}))};Qn.drawOne=function(e,t,r){r=r||{};var n=r.axShifts||{},i=r.overlayingShiftedAx||[],a,o,s;t.setScale();var l=e._fullLayout,u=t._id,c=u.charAt(0),f=Qn.counterLetter(u),h=l._plots[t._mainSubplot],d=t.zerolinelayer==="above traces";if(!h)return;if(t._shiftPusher=t.autoshift||i.indexOf(t._id)!==-1||i.indexOf(t.overlaying)!==-1,t._shiftPusher&t.anchor==="free"){var v=t.linewidth/2||0;t.ticks==="inside"&&(v+=t.ticklen),dB(t,v,n,!0),dB(t,t.shift||0,n,!1)}(r.skipTitle!==!0||t._shift===void 0)&&(t._shift=mlt(t,n));var _=h[c+"axislayer"],b=t._mainLinePosition,p=b+=t._shift,k=t._mainMirrorPosition,E=t._vals=Qn.calcTicks(t),S=[t.mirror,p,k].join("_");for(a=0;a<E.length;a++)E[a].axInfo=S;t._selections={},t._tickAngles&&(t._prevTickAngles=t._tickAngles),t._tickAngles={},t._depth=null;var L={};function x(lt){var Gt=u+(lt||"tick");return L[Gt]||(L[Gt]=llt(t,Gt,p)),L[Gt]}if(t.visible){var C=Qn.makeTransTickFn(t),M=Qn.makeTransTickLabelFn(t),g,P,T=t.ticks==="inside",z=t.ticks==="outside";if(t.tickson==="boundaries"){var O=alt(t,E);P=Qn.clipEnds(t,O),g=T?P:O}else P=Qn.clipEnds(t,E),g=T&&t.ticklabelmode!=="period"?P:E;var V=t._gridVals=P,G=slt(t,E);if(!l._hasOnlyLargeSploms){var Z=t._subplotsWith,j={};for(a=0;a<Z.length;a++){o=Z[a],s=l._plots[o];var N=s[f+"axis"],H=N._mainAxis._id;if(!j[H]){j[H]=1;var te=c==="x"?"M0,"+N._offset+"v"+N._length:"M"+N._offset+",0h"+N._length;Qn.drawGrid(e,t,{vals:V,counterAxis:N,layer:s.gridlayer.select("."+u),minorLayer:s.minorGridlayer.select("."+u),path:te,transFn:C}),Qn.drawZeroLine(e,t,{counterAxis:N,layer:d?s.zerolinelayerAbove:s.zerolinelayer,path:te,transFn:C})}}}var oe,_e=Qn.getTickSigns(t),Ee=Qn.getTickSigns(t,"minor");if(t.ticks||t.minor&&t.minor.ticks){var Ce=Qn.makeTickPath(t,p,_e[2]),me=Qn.makeTickPath(t,p,Ee[2],{minor:!0}),ie,Se,Le,Ae;if(t._anchorAxis&&t.mirror&&t.mirror!==!0?(ie=Qn.makeTickPath(t,k,_e[3]),Se=Qn.makeTickPath(t,k,Ee[3],{minor:!0}),Le=Ce+ie,Ae=me+Se):(ie="",Se="",Le=Ce,Ae=me),t.showdividers&&z&&t.tickson==="boundaries"){var Fe={};for(a=0;a<G.length;a++)Fe[G[a].x]=1;oe=function(lt){return Fe[lt.x]?ie:Le}}else oe=function(lt){return lt.minor?Ae:Le}}if(Qn.drawTicks(e,t,{vals:g,layer:_,path:oe,transFn:C}),t.mirror==="allticks"){var Pe=Object.keys(t._linepositions||{});for(a=0;a<Pe.length;a++){o=Pe[a],s=l._plots[o];var ge=t._linepositions[o]||[],Re=ge[0],ce=ge[1],Ze=ge[2],ut=Qn.makeTickPath(t,Re,Ze?_e[0]:Ee[0],{minor:Ze})+Qn.makeTickPath(t,ce,Ze?_e[1]:Ee[1],{minor:Ze});Qn.drawTicks(e,t,{vals:g,layer:s[c+"axislayer"],path:ut,transFn:C})}}var pt=[];if(pt.push(function(){return Qn.drawLabels(e,t,{vals:E,layer:_,plotinfo:s,transFn:M,labelFns:Qn.makeLabelFns(t,p)})}),t.type==="multicategory"){var Zt={x:2,y:10}[c];pt.push(function(){var lt={x:"height",y:"width"}[c],Gt=x()[lt]+Zt+(t._tickAngles[u+"tick"]?t.tickfont.size*lM:0);return Qn.drawLabels(e,t,{vals:olt(t,E),layer:_,cls:u+"tick2",repositionOnUpdate:!0,secondary:!0,transFn:C,labelFns:Qn.makeLabelFns(t,p+Gt*_e[4])})}),pt.push(function(){return t._depth=_e[4]*(x("tick2")[t.side]-p),clt(e,t,{vals:G,layer:_,path:Qn.makeTickPath(t,p,_e[4],{len:t._depth}),transFn:C})})}else t.title.hasOwnProperty("standoff")&&pt.push(function(){t._depth=_e[4]*(x()[t.side]-p)});var st=aM.getComponentMethod("rangeslider","isVisible")(t);return!r.skipTitle&&!(st&&t.side==="bottom")&&pt.push(function(){return flt(e,t)}),pt.push(function(){var lt=t.side.charAt(0),Gt=Ost[t.side].charAt(0),Nt=Qn.getPxPosition(e,t),Jt=z?t.ticklen:0,sr,wr,cr,$e;(t.automargin||st||t._shiftPusher)&&(t.type==="multicategory"?sr=x("tick2"):(sr=x(),c==="x"&&lt==="b"&&(t._depth=Math.max(sr.width>0?sr.bottom-Nt:0,Jt))));var St=0,Qt=0;if(t._shiftPusher&&(St=Math.max(Jt,sr.height>0?lt==="l"?Nt-sr.left:sr.right-Nt:0),t.title.text!==l._dfltTitle[c]&&(Qt=(t._titleStandoff||0)+(t._titleScoot||0),lt==="l"&&(Qt+=xse(t))),t._fullDepth=Math.max(St,Qt)),t.automargin){wr={x:0,y:0,r:0,l:0,t:0,b:0};var Vt=[0,1],_t=typeof t._shift=="number"?t._shift:0;if(c==="x"){if(lt==="b"?wr[lt]=t._depth:(wr[lt]=t._depth=Math.max(sr.width>0?Nt-sr.top:0,Jt),Vt.reverse()),sr.width>0){var It=sr.right-(t._offset+t._length);It>0&&(wr.xr=1,wr.r=It);var mt=t._offset-sr.left;mt>0&&(wr.xl=0,wr.l=mt)}}else if(lt==="l"?(t._depth=Math.max(sr.height>0?Nt-sr.left:0,Jt),wr[lt]=t._depth-_t):(t._depth=Math.max(sr.height>0?sr.right-Nt:0,Jt),wr[lt]=t._depth+_t,Vt.reverse()),sr.height>0){var er=sr.bottom-(t._offset+t._length);er>0&&(wr.yb=0,wr.b=er);var lr=t._offset-sr.top;lr>0&&(wr.yt=1,wr.t=lr)}wr[f]=t.anchor==="free"?t.position:t._anchorAxis.domain[Vt[0]],t.title.text!==l._dfltTitle[c]&&(wr[lt]+=xse(t)+(t.title.standoff||0)),t.mirror&&t.anchor!=="free"&&(cr={x:0,y:0,r:0,l:0,t:0,b:0},cr[Gt]=t.linewidth,t.mirror&&t.mirror!==!0&&(cr[Gt]+=Jt),t.mirror===!0||t.mirror==="ticks"?cr[f]=t._anchorAxis.domain[Vt[1]]:(t.mirror==="all"||t.mirror==="allticks")&&(cr[f]=[t._counterDomainMin,t._counterDomainMax][Vt[1]]))}st&&($e=aM.getComponentMethod("rangeslider","autoMarginOpts")(e,t)),typeof t.automargin=="string"&&(_se(wr,t.automargin),_se(cr,t.automargin)),D3.autoMargin(e,mB(t),wr),D3.autoMargin(e,Lse(t),cr),D3.autoMargin(e,Pse(t),$e)}),Jo.syncOrAsync(pt)}};function _se(e,t){if(e){var r=Object.keys(hse).reduce(function(n,i){return t.indexOf(i)!==-1&&hse[i].forEach(function(a){n[a]=1}),n},{});Object.keys(e).forEach(function(n){r[n]||(n.length===1?e[n]=0:delete e[n])})}}function alt(e,t){var r=[],n,i=function(a,o){var s=a.xbnd[o];s!==null&&r.push(Jo.extendFlat({},a,{x:s}))};if(t.length){for(n=0;n<t.length;n++)i(t[n],0);i(t[n-1],1)}return r}function olt(e,t){for(var r=[],n={},i=0;i<t.length;i++){var a=t[i];n[a.text2]?n[a.text2].push(a.x):n[a.text2]=[a.x]}for(var o in n)r.push(Ese(e,Jo.interp(n[o],.5),o));return r}function slt(e,t){var r=[],n,i,a=t.length&&t[t.length-1].x<t[0].x,o=function(l,u){var c=l.xbnd[u];c!==null&&r.push(Jo.extendFlat({},l,{x:c}))};if(e.showdividers&&t.length){for(n=0;n<t.length;n++){var s=t[n];s.text2!==i&&o(s,a?1:0),i=s.text2}o(t[n-1],a?0:1)}return r}function llt(e,t,r){var n,i,a,o;if(e._selections[t].size())n=1/0,i=-1/0,a=1/0,o=-1/0,e._selections[t].each(function(){var l=hB(this);if(l.node().style.display!=="none"){var u=Xp.bBox(l.node().parentNode);n=Math.min(n,u.top),i=Math.max(i,u.bottom),a=Math.min(a,u.left),o=Math.max(o,u.right)}});else{var s=Qn.makeLabelFns(e,r);n=i=s.yFn({dx:0,dy:0,fontSize:0}),a=o=s.xFn({dx:0,dy:0,fontSize:0})}return{top:n,bottom:i,left:a,right:o,height:i-n,width:o-a}}Qn.getTickSigns=function(e,t){var r=e._id.charAt(0),n={x:"top",y:"right"}[r],i=e.side===n?1:-1,a=[-1,1,i,-i],o=t?(e.minor||{}).ticks:e.ticks;return o!=="inside"==(r==="x")&&(a=a.map(function(s){return-s})),e.side&&a.push({l:-1,t:-1,r:1,b:1}[e.side.charAt(0)]),a};Qn.makeTransTickFn=function(e){return e._id.charAt(0)==="x"?function(t){return F3(e._offset+e.l2p(t.x),0)}:function(t){return F3(0,e._offset+e.l2p(t.x))}};Qn.makeTransTickLabelFn=function(e){var t=ult(e),r=e.ticklabelshift||0,n=e.ticklabelstandoff||0,i=t[0],a=t[1],o=e.range[0]>e.range[1],s=e.ticklabelposition&&e.ticklabelposition.indexOf("inside")!==-1,l=!s;if(r){var u=o?-1:1;r=r*u}if(n){var c=e.side,f=s&&(c==="top"||c==="left")||l&&(c==="bottom"||c==="right")?1:-1;n=n*f}return e._id.charAt(0)==="x"?function(h){return F3(i+e._offset+e.l2p(fB(h))+r,a+n)}:function(h){return F3(a+n,i+e._offset+e.l2p(fB(h))+r)}};function fB(e){return e.periodX!==void 0?e.periodX:e.x}function ult(e){var t=e.ticklabelposition||"",r=e.tickson||"",n=function(v){return t.indexOf(v)!==-1},i=n("top"),a=n("left"),o=n("right"),s=n("bottom"),l=n("inside"),u=r!=="boundaries"&&(s||a||i||o);if(!u&&!l)return[0,0];var c=e.side,f=u?(e.tickwidth||0)/2:0,h=kL,d=e.tickfont?e.tickfont.size:12;return(s||i)&&(f+=d*Lb,h+=(e.linewidth||0)/2),(a||o)&&(f+=(e.linewidth||0)/2,h+=kL),l&&c==="top"&&(h-=d*(1-Lb)),(a||i)&&(f=-f),(c==="bottom"||c==="right")&&(h=-h),[u?f:0,l?h:0]}Qn.makeTickPath=function(e,t,r,n){n||(n={});var i=n.minor;if(i&&!e.minor)return"";var a=n.len!==void 0?n.len:i?e.minor.ticklen:e.ticklen,o=e._id.charAt(0),s=(e.linewidth||1)/2;return o==="x"?"M0,"+(t+s*r)+"v"+a*r:"M"+(t+s*r)+",0h"+a*r};Qn.makeLabelFns=function(e,t,r){var n=e.ticklabelposition||"",i=e.tickson||"",a=function(O){return n.indexOf(O)!==-1},o=a("top"),s=a("left"),l=a("right"),u=a("bottom"),c=i!=="boundaries"&&(u||s||o||l),f=a("inside"),h=n==="inside"&&e.ticks==="inside"||!f&&e.ticks==="outside"&&i!=="boundaries",d=0,v=0,_=h?e.ticklen:0;if(f?_*=-1:c&&(_=0),h&&(d+=_,r)){var b=Jo.deg2rad(r);d=_*Math.cos(b)+1,v=_*Math.sin(b)}e.showticklabels&&(h||e.showline)&&(d+=.2*e.tickfont.size),d+=(e.linewidth||1)/2*(f?-1:1);var p={labelStandoff:d,labelShift:v},k,E,S,L,x=0,C=e.side,M=e._id.charAt(0),g=e.tickangle,P;if(M==="x")P=!f&&C==="bottom"||f&&C==="top",L=P?1:-1,f&&(L*=-1),k=v*L,E=t+d*L,S=P?1:-.2,Math.abs(g)===90&&(f?S+=nM:g===-90&&C==="bottom"?S=Lb:g===90&&C==="top"?S=nM:S=.5,x=nM/2*(g/90)),p.xFn=function(O){return O.dx+k+x*O.fontSize},p.yFn=function(O){return O.dy+E+O.fontSize*S},p.anchorFn=function(O,V){if(c){if(s)return"end";if(l)return"start"}return!Ih(V)||V===0||V===180?"middle":V*L<0!==f?"end":"start"},p.heightFn=function(O,V,G){return V<-60||V>60?-.5*G:e.side==="top"!==f?-G:0};else if(M==="y"){if(P=!f&&C==="left"||f&&C==="right",L=P?1:-1,f&&(L*=-1),k=d,E=v*L,S=0,!f&&Math.abs(g)===90&&(g===-90&&C==="left"||g===90&&C==="right"?S=Lb:S=.5),f){var T=Ih(g)?+g:0;if(T!==0){var z=Jo.deg2rad(T);x=Math.abs(Math.sin(z))*Lb*L,S=0}}p.xFn=function(O){return O.dx+t-(k+O.fontSize*S)*L+x*O.fontSize},p.yFn=function(O){return O.dy+E+O.fontSize*nM},p.anchorFn=function(O,V){return Ih(V)&&Math.abs(V)===90?"middle":P?"end":"start"},p.heightFn=function(O,V,G){return e.side==="right"&&(V*=-1),V<-30?-G:V<30?-.5*G:0}}return p};function PL(e){return[e.text,e.x,e.axInfo,e.font,e.fontSize,e.fontColor].join("_")}Qn.drawTicks=function(e,t,r){r=r||{};var n=t._id+"tick",i=[].concat(t.minor&&t.minor.ticks?r.vals.filter(function(o){return o.minor&&!o.noTick}):[]).concat(t.ticks?r.vals.filter(function(o){return!o.minor&&!o.noTick}):[]),a=r.layer.selectAll("path."+n).data(i,PL);a.exit().remove(),a.enter().append("path").classed(n,1).classed("ticks",1).classed("crisp",r.crisp!==!1).each(function(o){return oM.stroke(b0.select(this),o.minor?t.minor.tickcolor:t.tickcolor)}).style("stroke-width",function(o){return Xp.crispRound(e,o.minor?t.minor.tickwidth:t.tickwidth,1)+"px"}).attr("d",r.path).style("display",null),IL(t,[bse]),a.attr("transform",r.transFn)};Qn.drawGrid=function(e,t,r){if(r=r||{},t.tickmode!=="sync"){var n=t._id+"grid",i=t.minor&&t.minor.showgrid,a=i?r.vals.filter(function(p){return p.minor}):[],o=t.showgrid?r.vals.filter(function(p){return!p.minor}):[],s=r.counterAxis;if(s&&Qn.shouldShowZeroLine(e,t,s))for(var l=t.tickmode==="array",u=0;u<o.length;u++){var c=o[u].x;if(l?!c:Math.abs(c)<t.dtick/100)if(o=o.slice(0,u).concat(o.slice(u+1)),l)u--;else break}t._gw=Xp.crispRound(e,t.gridwidth,1);for(var f=i?Xp.crispRound(e,t.minor.gridwidth,1):0,h=r.layer,d=r.minorLayer,v=1;v>=0;v--){var _=v?h:d;if(_){var b=_.selectAll("path."+n).data(v?o:a,PL);b.exit().remove(),b.enter().append("path").classed(n,1).classed("crisp",r.crisp!==!1),b.attr("transform",r.transFn).attr("d",r.path).each(function(p){return oM.stroke(b0.select(this),p.minor?t.minor.gridcolor:t.gridcolor||"#ddd")}).style("stroke-dasharray",function(p){return Xp.dashStyle(p.minor?t.minor.griddash:t.griddash,p.minor?t.minor.gridwidth:t.gridwidth)}).style("stroke-width",function(p){return(p.minor?f:t._gw)+"px"}).style("display",null),typeof r.path=="function"&&b.attr("d",r.path)}}IL(t,[uB,cB])}};Qn.drawZeroLine=function(e,t,r){r=r||r;var n=t._id+"zl",i=Qn.shouldShowZeroLine(e,t,r.counterAxis),a=r.layer.selectAll("path."+n).data(i?[{x:0,id:t._id}]:[]);a.exit().remove(),a.enter().append("path").classed(n,1).classed("zl",1).classed("crisp",r.crisp!==!1).each(function(){r.layer.selectAll("path").sort(function(o,s){return Bst(o.id,s.id)})}),a.attr("transform",r.transFn).attr("d",r.path).call(oM.stroke,t.zerolinecolor||oM.defaultLine).style("stroke-width",Xp.crispRound(e,t.zerolinewidth,t._gw||1)+"px").style("display",null),IL(t,[lB])};Qn.drawLabels=function(e,t,r){r=r||{};var n=e._fullLayout,i=t._id,a=t.zerolinelayer==="above traces",o=r.cls||i+"tick",s=r.vals.filter(function(H){return H.text}),l=r.labelFns,u=r.secondary?0:t.tickangle,c=(t._prevTickAngles||{})[o],f=r.layer.selectAll("g."+o).data(t.showticklabels?s:[],PL),h=[];f.enter().append("g").classed(o,1).append("text").attr("text-anchor","middle").each(function(H){var te=b0.select(this),oe=e._promises.length;te.call(kb.positionText,l.xFn(H),l.yFn(H)).call(Xp.font,{family:H.font,size:H.fontSize,color:H.fontColor,weight:H.fontWeight,style:H.fontStyle,variant:H.fontVariant,textcase:H.fontTextcase,lineposition:H.fontLineposition,shadow:H.fontShadow}).text(H.text).call(kb.convertToTspans,e),e._promises[oe]?h.push(e._promises.pop().then(function(){d(te,u)})):d(te,u)}),IL(t,[fse]),f.exit().remove(),r.repositionOnUpdate&&f.each(function(H){b0.select(this).select("text").call(kb.positionText,l.xFn(H),l.yFn(H))});function d(H,te){H.each(function(oe){var _e=b0.select(this),Ee=_e.select(".text-math-group"),Ce=l.anchorFn(oe,te),me=r.transFn.call(_e.node(),oe)+(Ih(te)&&+te!=0?" rotate("+te+","+l.xFn(oe)+","+(l.yFn(oe)-oe.fontSize/2)+")":""),ie=kb.lineCount(_e),Se=lM*oe.fontSize,Le=l.heightFn(oe,Ih(te)?+te:0,(ie-1)*Se);if(Le&&(me+=F3(0,Le)),Ee.empty()){var Ae=_e.select("text");Ae.attr({transform:me,"text-anchor":Ce}),Ae.style("display",null),t._adjustTickLabelsOverflow&&t._adjustTickLabelsOverflow()}else{var Fe=Xp.bBox(Ee.node()).width,Pe=Fe*{end:-.5,start:.5}[Ce];Ee.attr("transform",me+F3(Pe,0))}})}t._adjustTickLabelsOverflow=function(){var H=t.ticklabeloverflow;if(!(!H||H==="allow")){var te=H.indexOf("hide")!==-1,oe=t._id.charAt(0)==="x",_e=0,Ee=oe?e._fullLayout.width:e._fullLayout.height;if(H.indexOf("domain")!==-1){var Ce=Jo.simpleMap(t.range,t.r2l);_e=t.l2p(Ce[0])+t._offset,Ee=t.l2p(Ce[1])+t._offset}var me=Math.min(_e,Ee),ie=Math.max(_e,Ee),Se=t.side,Le=1/0,Ae=-1/0;f.each(function(Re){var ce=b0.select(this),Ze=ce.select(".text-math-group");if(Ze.empty()){var ut=Xp.bBox(ce.node()),pt=0;oe?(ut.right>ie||ut.left<me)&&(pt=1):(ut.bottom>ie||ut.top+(t.tickangle?0:Re.fontSize/4)<me)&&(pt=1);var Zt=ce.select("text");pt?te&&Zt.style("display","none"):Zt.node().style.display!=="none"&&(Zt.style("display",null),Se==="bottom"||Se==="right"?Le=Math.min(Le,oe?ut.top:ut.left):Le=-1/0,Se==="top"||Se==="left"?Ae=Math.max(Ae,oe?ut.bottom:ut.right):Ae=1/0)}});for(var Fe in n._plots){var Pe=n._plots[Fe];if(!(t._id!==Pe.xaxis._id&&t._id!==Pe.yaxis._id)){var ge=oe?Pe.yaxis:Pe.xaxis;ge&&(ge["_visibleLabelMin_"+t._id]=Le,ge["_visibleLabelMax_"+t._id]=Ae)}}}},t._hideCounterAxisInsideTickLabels=function(H){var te=t._id.charAt(0)==="x",oe=[];for(var _e in n._plots){var Ee=n._plots[_e];t._id!==Ee.xaxis._id&&t._id!==Ee.yaxis._id||oe.push(te?Ee.yaxis:Ee.xaxis)}oe.forEach(function(Ce,me){Ce&&cM(Ce)&&(H||[lB,cB,uB,bse,fse]).forEach(function(ie){var Se=ie.K==="tick"&&ie.L==="text"&&t.ticklabelmode==="period",Le=n._plots[t._mainSubplot],Ae;if(ie.K===lB.K){var Fe=a?Le.zerolinelayerAbove:Le.zerolinelayer;Ae=Fe.selectAll("."+t._id+"zl")}else ie.K===cB.K?Ae=Le.minorGridlayer.selectAll("."+t._id):ie.K===uB.K?Ae=Le.gridlayer.selectAll("."+t._id):Ae=Le[t._id.charAt(0)+"axislayer"];Ae.each(function(){var Pe=b0.select(this);ie.L&&(Pe=Pe.selectAll(ie.L)),Pe.each(function(ge){var Re=t.l2p(Se?fB(ge):ge.x)+t._offset,ce=b0.select(this);Re<t["_visibleLabelMax_"+Ce._id]&&Re>t["_visibleLabelMin_"+Ce._id]?ce.style("display","none"):ie.K==="tick"&&!me&&ce.node().style.display!=="none"&&ce.style("display",null)})})})})},d(f,c+1?c:u);function v(){return h.length&&Promise.all(h)}var _=null;function b(){if(d(f,u),s.length&&t.autotickangles&&(t.type!=="log"||String(t.dtick).charAt(0)!=="D")){_=t.autotickangles[0];var H=0,te=[],oe,_e=1;f.each(function(wr){H=Math.max(H,wr.fontSize);var cr=t.l2p(wr.x),$e=hB(this),St=Xp.bBox($e.node());_e=Math.max(_e,kb.lineCount($e)),te.push({top:0,bottom:10,height:10,left:cr-St.width/2,right:cr+St.width/2+2,width:St.width+2})});var Ee=(t.tickson==="boundaries"||t.showdividers)&&!r.secondary,Ce=s.length,me=Math.abs((s[Ce-1].x-s[0].x)*t._m)/(Ce-1),ie=Ee?me/2:me,Se=Ee?t.ticklen:H*1.25*_e,Le=Math.sqrt(Math.pow(ie,2)+Math.pow(Se,2)),Ae=ie/Le,Fe=t.autotickangles.map(function(wr){return wr*Math.PI/180}),Pe=Fe.find(function(wr){return Math.abs(Math.cos(wr))<=Ae});Pe===void 0&&(Pe=Fe.reduce(function(wr,cr){return Math.abs(Math.cos(wr))<Math.abs(Math.cos(cr))?wr:cr},Fe[0]));var ge=Pe*(180/Math.PI);if(Ee){var Re=2;for(t.ticks&&(Re+=t.tickwidth/2),oe=0;oe<te.length;oe++){var ce=s[oe].xbnd,Ze=te[oe];if(ce[0]!==null&&Ze.left-t.l2p(ce[0])<Re||ce[1]!==null&&t.l2p(ce[1])-Ze.right<Re){_=ge;break}}}else{var ut=t.ticklabelposition||"",pt=t.tickson||"",Zt=function(wr){return ut.indexOf(wr)!==-1},st=Zt("top"),lt=Zt("left"),Gt=Zt("right"),Nt=Zt("bottom"),Jt=pt!=="boundaries"&&(Nt||lt||st||Gt),sr=Jt?(t.tickwidth||0)+2*kL:0;for(oe=0;oe<te.length-1;oe++)if(Jo.bBoxIntersect(te[oe],te[oe+1],sr)){_=ge;break}}_&&d(f,_)}}t._selections&&(t._selections[o]=f);var p=[v];t.automargin&&n._redrawFromAutoMarginCount&&c===90?(_=c,p.push(function(){d(f,c)})):p.push(b),t._tickAngles&&p.push(function(){t._tickAngles[o]=_===null?Ih(u)?u:0:_});var k=function(){var H=0,te=0;return f.each(function(oe,_e){var Ee=hB(this),Ce=Ee.select(".text-math-group");if(Ce.empty()){var me;t._vals[_e]&&(me=t._vals[_e].bb||Xp.bBox(Ee.node()),t._vals[_e].bb=me),H=Math.max(H,me.width),te=Math.max(te,me.height)}}),{labelsMaxW:H,labelsMaxH:te}},E=t._anchorAxis;if(E&&(E.autorange||E.insiderange)&&cM(t)&&!Nst(n,t._id)&&(n._insideTickLabelsUpdaterange||(n._insideTickLabelsUpdaterange={}),E.autorange&&(n._insideTickLabelsUpdaterange[E._name+".autorange"]=E.autorange,p.push(k)),E.insiderange)){var S=k(),L=t._id.charAt(0)==="y"?S.labelsMaxW:S.labelsMaxH;L+=2*kL,t.ticklabelposition==="inside"&&(L+=t.ticklen||0);var x=t.side==="right"||t.side==="top"?1:-1,C=x===1?1:0,M=x===1?0:1,g=[];g[M]=E.range[M];var P=E.range,T=E.r2p(P[C]),z=E.r2p(P[M]),O=n._insideTickLabelsUpdaterange[E._name+".range"];if(O){var V=E.r2p(O[C]),G=E.r2p(O[M]),Z=x*(t._id.charAt(0)==="y"?1:-1);Z*T<Z*V&&(T=V,g[C]=P[C]=O[C]),Z*z>Z*G&&(z=G,g[M]=P[M]=O[M])}var j=Math.abs(z-T);j-L>0?(j-=L,L*=1+L/j):L=0,t._id.charAt(0)!=="y"&&(L=-L),g[C]=E.p2r(E.r2p(P[C])+x*L),E.autorange==="min"||E.autorange==="max reversed"?(g[0]=null,E._rangeInitial0=void 0,E._rangeInitial1=void 0):(E.autorange==="max"||E.autorange==="min reversed")&&(g[1]=null,E._rangeInitial0=void 0,E._rangeInitial1=void 0),n._insideTickLabelsUpdaterange[E._name+".range"]=g}var N=Jo.syncOrAsync(p);return N&&N.then&&e._promises.push(N),N};function clt(e,t,r){var n=t._id+"divider",i=r.vals,a=r.layer.selectAll("path."+n).data(i,PL);a.exit().remove(),a.enter().insert("path",":first-child").classed(n,1).classed("crisp",1).call(oM.stroke,t.dividercolor).style("stroke-width",Xp.crispRound(e,t.dividerwidth,1)+"px"),a.attr("transform",r.transFn).attr("d",r.path)}Qn.getPxPosition=function(e,t){var r=e._fullLayout._size,n=t._id.charAt(0),i=t.side,a;if(t.anchor!=="free"?a=t._anchorAxis:n==="x"?a={_offset:r.t+(1-(t.position||0))*r.h,_length:0}:n==="y"&&(a={_offset:r.l+(t.position||0)*r.w+t._shift,_length:0}),i==="top"||i==="left")return a._offset;if(i==="bottom"||i==="right")return a._offset+a._length};function xse(e){var t=e.title.font.size,r=(e.title.text.match(kb.BR_TAG_ALL)||[]).length;return e.title.hasOwnProperty("standoff")?t*(Lb+r*lM):r?t*(r+1)*lM:t}function flt(e,t){var r=e._fullLayout,n=t._id,i=n.charAt(0),a=t.title.font.size,o,s=(t.title.text.match(kb.BR_TAG_ALL)||[]).length;if(t.title.hasOwnProperty("standoff"))t.side==="bottom"||t.side==="right"?o=t._depth+t.title.standoff+a*Lb:(t.side==="top"||t.side==="left")&&(o=t._depth+t.title.standoff+a*(nM+s*lM));else{var l=cM(t);if(t.type==="multicategory")o=t._depth;else{var u=1.5*a;l&&(u=.5*a,t.ticks==="outside"&&(u+=t.ticklen)),o=10+u+(t.linewidth?t.linewidth-1:0)}l||(i==="x"?o+=t.side==="top"?a*(t.showticklabels?1:0):a*(t.showticklabels?1.5:.5):o+=t.side==="right"?a*(t.showticklabels?1:.5):a*(t.showticklabels?.5:0))}var c=Qn.getPxPosition(e,t),f,h,d;i==="x"?(h=t._offset+t._length/2,d=t.side==="top"?c-o:c+o):(d=t._offset+t._length/2,h=t.side==="right"?c+o:c-o,f={rotate:"-90",offset:0});var v;if(t.type!=="multicategory"){var _=t._selections[t._id+"tick"];if(v={selection:_,side:t.side},_&&_.node()&&_.node().parentNode){var b=Xp.getTranslate(_.node().parentNode);v.offsetLeft=b.x,v.offsetTop=b.y}t.title.hasOwnProperty("standoff")&&(v.pad=0)}return t._titleStandoff=o,Lst.draw(e,n+"title",{propContainer:t,propName:t._name+".title.text",placeholder:r._dfltTitle[i],avoid:v,transform:f,attributes:{x:h,y:d,"text-anchor":"middle"}})}Qn.shouldShowZeroLine=function(e,t,r){var n=Jo.simpleMap(t.range,t.r2l);return n[0]*n[1]<=0&&t.zeroline&&(t.type==="linear"||t.type==="-")&&!(t.rangebreaks&&t.maskBreaks(0)===CL)&&(Cse(t,0)||!hlt(e,t,r,n)||dlt(e,t))};Qn.clipEnds=function(e,t){return t.filter(function(r){return Cse(e,r.x)})};function Cse(e,t){var r=e.l2p(t);return r>1&&r<e._length-1}function hlt(e,t,r,n){var i=r._mainAxis;if(!i)return;var a=e._fullLayout,o=t._id.charAt(0),s=Qn.counterLetter(t._id),l=t._offset+(Math.abs(n[0])<Math.abs(n[1])==(o==="x")?0:t._length);function u(v){if(!v.showline||!v.linewidth)return!1;var _=Math.max((v.linewidth+t.zerolinewidth)/2,1);function b(E){return typeof E=="number"&&Math.abs(E-l)<_}if(b(v._mainLinePosition)||b(v._mainMirrorPosition))return!0;var p=v._linepositions||{};for(var k in p)if(b(p[k][0])||b(p[k][1]))return!0}var c=a._plots[r._mainSubplot];if(!(c.mainplotinfo||c).overlays.length)return u(r,l);for(var f=Qn.list(e,s),h=0;h<f.length;h++){var d=f[h];if(d._mainAxis===i&&u(d,l))return!0}}function dlt(e,t){for(var r=e._fullData,n=t._mainSubplot,i=t._id.charAt(0),a=0;a<r.length;a++){var o=r[a];if(o.visible===!0&&o.xaxis+o.yaxis===n&&(aM.traceIs(o,"bar-like")&&o.orientation==={x:"h",y:"v"}[i]||o.fill&&o.fill.charAt(o.fill.length-1)===i))return!0}return!1}function hB(e){var t=b0.select(e),r=t.select(".text-math-group");return r.empty()?t.select("text"):r}Qn.allowAutoMargin=function(e){for(var t=Qn.list(e,"",!0),r=0;r<t.length;r++){var n=t[r];n.automargin&&(D3.allowAutoMargin(e,mB(n)),n.mirror&&D3.allowAutoMargin(e,Lse(n))),aM.getComponentMethod("rangeslider","isVisible")(n)&&D3.allowAutoMargin(e,Pse(n))}};function mB(e){return e._id+".automargin"}function Lse(e){return mB(e)+".mirror"}function Pse(e){return e._id+".rangeslider"}Qn.swap=function(e,t){for(var r=vlt(e,t),n=0;n<r.length;n++)plt(e,r[n].x,r[n].y)};function vlt(e,t){var r=[],n,i;for(n=0;n<t.length;n++){var a=[],o=e._fullData[t[n]].xaxis,s=e._fullData[t[n]].yaxis;if(!(!o||!s)){for(i=0;i<r.length;i++)(r[i].x.indexOf(o)!==-1||r[i].y.indexOf(s)!==-1)&&a.push(i);if(!a.length){r.push({x:[o],y:[s]});continue}var l=r[a[0]],u;if(a.length>1)for(i=1;i<a.length;i++)u=r[a[i]],wL(l.x,u.x),wL(l.y,u.y);wL(l.x,[o]),wL(l.y,[s])}}return r}function wL(e,t){for(var r=0;r<t.length;r++)e.indexOf(t[r])===-1&&e.push(t[r])}function plt(e,t,r){var n=[],i=[],a=e.layout,o,s;for(o=0;o<t.length;o++)n.push(Qn.getFromId(e,t[o]));for(o=0;o<r.length;o++)i.push(Qn.getFromId(e,r[o]));var l=Object.keys(Pst),u=["anchor","domain","overlaying","position","side","tickangle","editType"],c=["linear","log"];for(o=0;o<l.length;o++){var f=l[o],h=n[0][f],d=i[0][f],v=!0,_=!1,b=!1;if(!(f.charAt(0)==="_"||typeof h=="function"||u.indexOf(f)!==-1)){for(s=1;s<n.length&&v;s++){var p=n[s][f];f==="type"&&c.indexOf(h)!==-1&&c.indexOf(p)!==-1&&h!==p?_=!0:p!==h&&(v=!1)}for(s=1;s<i.length&&v;s++){var k=i[s][f];f==="type"&&c.indexOf(d)!==-1&&c.indexOf(k)!==-1&&d!==k?b=!0:i[s][f]!==d&&(v=!1)}v&&(_&&(a[n[0]._name].type="linear"),b&&(a[i[0]._name].type="linear"),glt(a,f,n,i,e._fullLayout._dfltTitle))}}for(o=0;o<e._fullLayout.annotations.length;o++){var E=e._fullLayout.annotations[o];t.indexOf(E.xref)!==-1&&r.indexOf(E.yref)!==-1&&Jo.swapAttrs(a.annotations[o],["?"])}}function glt(e,t,r,n,i){var a=Jo.nestedProperty,o=a(e[r[0]._name],t).get(),s=a(e[n[0]._name],t).get(),l;for(t==="title"&&(o&&o.text===i.x&&(o.text=i.y),s&&s.text===i.y&&(s.text=i.x)),l=0;l<r.length;l++)a(e,r[l]._name+"."+t).set(s);for(l=0;l<n.length;l++)a(e,n[l]._name+"."+t).set(o)}function yB(e){return e._id==="angularaxis"}function oB(e,t){for(var r=t._rangebreaks.length,n=0;n<r;n++){var i=t._rangebreaks[n];if(e>=i.min&&e<i.max)return i.max}return e}function cM(e){return(e.ticklabelposition||"").indexOf("inside")!==-1}function IL(e,t){cM(e._anchorAxis||{})&&e._hideCounterAxisInsideTickLabels&&e._hideCounterAxisInsideTickLabels(t)}function dB(e,t,r,n){var i=e.anchor!=="free"&&(e.overlaying===void 0||e.overlaying===!1)?e._id:e.overlaying,a;n?a=e.side==="right"?t:-t:a=t,i in r||(r[i]={}),e.side in r[i]||(r[i][e.side]=0),r[i][e.side]+=a}function mlt(e,t){return e.autoshift?t[e.overlaying][e.side]:e.shift||0}function ylt(e,t){return/%f/.test(t)?e>=zst:/%L/.test(t)?e>=Fst:/%[SX]/.test(t)?e>=EL:/%M/.test(t)?e>=sM:/%[HI]/.test(t)?e>=xm:/%p/.test(t)?e>=s_:/%[Aadejuwx]/.test(t)?e>=Ov:/%[UVW]/.test(t)?e>=Zp:/%[Bbm]/.test(t)?e>=ML:/%[q]/.test(t)?e>=SL:/%[Yy]/.test(t)?e>=AL:!0}});var _B=ye((Xnr,Rse)=>{"use strict";Rse.exports=function(t,r,n){var i,a;if(n){var o=r==="reversed"||r==="min reversed"||r==="max reversed";i=n[o?1:0],a=n[o?0:1]}var s=t("autorangeoptions.minallowed",a===null?i:void 0),l=t("autorangeoptions.maxallowed",i===null?a:void 0);s===void 0&&t("autorangeoptions.clipmin"),l===void 0&&t("autorangeoptions.clipmax"),t("autorangeoptions.include")}});var xB=ye((Znr,Dse)=>{"use strict";var _lt=_B();Dse.exports=function(t,r,n,i){var a=r._template||{},o=r.type||a.type||"-";n("minallowed"),n("maxallowed");var s=n("range");if(!s){var l;!i.noInsiderange&&o!=="log"&&(l=n("insiderange"),l&&(l[0]===null||l[1]===null)&&(r.insiderange=!1,l=void 0),l&&(s=n("range",l)))}var u=r.getAutorangeDflt(s,i),c=n("autorange",u),f;s&&(s[0]===null&&s[1]===null||(s[0]===null||s[1]===null)&&(c==="reversed"||c===!0)||s[0]!==null&&(c==="min"||c==="max reversed")||s[1]!==null&&(c==="max"||c==="min reversed"))&&(s=void 0,delete r.range,r.autorange=!0,f=!0),f||(u=r.getAutorangeDflt(s,i),c=n("autorange",u)),c&&(_lt(n,c,s),(o==="linear"||o==="-")&&n("rangemode")),r.cleanRange()}});var zse=ye((Ynr,Fse)=>{var xlt={left:0,top:0};Fse.exports=blt;function blt(e,t,r){t=t||e.currentTarget||e.srcElement,Array.isArray(r)||(r=[0,0]);var n=e.clientX||0,i=e.clientY||0,a=wlt(t);return r[0]=n-a.left,r[1]=i-a.top,r}function wlt(e){return e===window||e===document||e===document.body?xlt:e.getBoundingClientRect()}});var RL=ye((Knr,Ose)=>{"use strict";var Tlt=rq();function Alt(){var e=!1;try{var t=Object.defineProperty({},"passive",{get:function(){e=!0}});window.addEventListener("test",null,t),window.removeEventListener("test",null,t)}catch(r){e=!1}return e}Ose.exports=Tlt&&Alt()});var Bse=ye((Jnr,qse)=>{"use strict";qse.exports=function(t,r,n,i,a){var o=(t-n)/(i-n),s=o+r/(i-n),l=(o+s)/2;return a==="left"||a==="bottom"?o:a==="center"||a==="middle"?l:a==="right"||a==="top"?s:o<2/3-l?o:s>4/3-l?s:l}});var Vse=ye(($nr,Use)=>{"use strict";var Nse=Dr(),Slt=[["sw-resize","s-resize","se-resize"],["w-resize","move","e-resize"],["nw-resize","n-resize","ne-resize"]];Use.exports=function(t,r,n,i){return n==="left"?t=0:n==="center"?t=1:n==="right"?t=2:t=Nse.constrain(Math.floor(t*3),0,2),i==="bottom"?r=0:i==="middle"?r=1:i==="top"?r=2:r=Nse.constrain(Math.floor(r*3),0,2),Slt[r][t]}});var Hse=ye((Qnr,Gse)=>{"use strict";var Mlt=_3(),Elt=z6(),klt=NS().getGraphDiv,Clt=zS(),bB=Gse.exports={};bB.wrapped=function(e,t,r){e=klt(e),e._fullLayout&&Elt.clear(e._fullLayout._uid+Clt.HOVERID),bB.raw(e,t,r)};bB.raw=function(t,r){var n=t._fullLayout,i=t._hoverdata;r||(r={}),!(r.target&&!t._dragged&&Mlt.triggerHandler(t,"plotly_beforehover",r)===!1)&&(n._hoverlayer.selectAll("g").remove(),n._hoverlayer.selectAll("line").remove(),n._hoverlayer.selectAll("circle").remove(),t._hoverdata=void 0,r.target&&i&&t.emit("plotly_unhover",{event:r,points:i}))}});var yv=ye((ear,Zse)=>{"use strict";var Llt=zse(),wB=nq(),Plt=RL(),Ilt=Dr().removeElement,Rlt=hd(),Pb=Zse.exports={};Pb.align=Bse();Pb.getCursor=Vse();var Wse=Hse();Pb.unhover=Wse.wrapped;Pb.unhoverRaw=Wse.raw;Pb.init=function(t){var r=t.gd,n=1,i=r._context.doubleClickDelay,a=t.element,o,s,l,u,c,f,h,d;r._mouseDownTime||(r._mouseDownTime=0),a.style.pointerEvents="all",a.onmousedown=b,Plt?(a._ontouchstart&&a.removeEventListener("touchstart",a._ontouchstart),a._ontouchstart=b,a.addEventListener("touchstart",b,{passive:!1})):a.ontouchstart=b;function v(E,S,L){return Math.abs(E)<L&&(E=0),Math.abs(S)<L&&(S=0),[E,S]}var _=t.clampFn||v;function b(E){r._dragged=!1,r._dragging=!0;var S=jse(E);o=S[0],s=S[1],h=E.target,f=E,d=E.buttons===2||E.ctrlKey,typeof E.clientX=="undefined"&&typeof E.clientY=="undefined"&&(E.clientX=o,E.clientY=s),l=new Date().getTime(),l-r._mouseDownTime<i?n+=1:(n=1,r._mouseDownTime=l),t.prepFn&&t.prepFn(E,o,s),wB&&!d?(c=Xse(),c.style.cursor=window.getComputedStyle(a).cursor):wB||(c=document,u=window.getComputedStyle(document.documentElement).cursor,document.documentElement.style.cursor=window.getComputedStyle(a).cursor),document.addEventListener("mouseup",k),document.addEventListener("touchend",k),t.dragmode!==!1&&(E.preventDefault(),document.addEventListener("mousemove",p),document.addEventListener("touchmove",p,{passive:!1}))}function p(E){E.preventDefault();var S=jse(E),L=t.minDrag||Rlt.MINDRAG,x=_(S[0]-o,S[1]-s,L),C=x[0],M=x[1];(C||M)&&(r._dragged=!0,Pb.unhover(r,E)),r._dragged&&t.moveFn&&!d&&(r._dragdata={element:a,dx:C,dy:M},t.moveFn(C,M))}function k(E){if(delete r._dragdata,t.dragmode!==!1&&(E.preventDefault(),document.removeEventListener("mousemove",p),document.removeEventListener("touchmove",p)),document.removeEventListener("mouseup",k),document.removeEventListener("touchend",k),wB?Ilt(c):u&&(c.documentElement.style.cursor=u,u=null),!r._dragging){r._dragged=!1;return}if(r._dragging=!1,new Date().getTime()-r._mouseDownTime>i&&(n=Math.max(n-1,1)),r._dragged)t.doneFn&&t.doneFn();else{var S;f.target===h?S=f:(S={target:h,srcElement:h,toElement:h},Object.keys(f).concat(Object.keys(f.__proto__)).forEach(L=>{var x=f[L];!S[L]&&typeof x!="function"&&(S[L]=x)})),t.clickFn&&t.clickFn(n,S),d||h.dispatchEvent(new MouseEvent("click",E))}r._dragging=!1,r._dragged=!1}};function Xse(){var e=document.createElement("div");e.className="dragcover";var t=e.style;return t.position="fixed",t.left=0,t.right=0,t.top=0,t.bottom=0,t.zIndex=999999999,t.background="none",document.body.appendChild(e),e}Pb.coverSlip=Xse;function jse(e){return Llt(e.changedTouches?e.changedTouches[0]:e,document.body)}});var Sg=ye((tar,Yse)=>{"use strict";Yse.exports=function(t,r){(t.attr("class")||"").split(" ").forEach(function(n){n.indexOf("cursor-")===0&&t.classed(n,!1)}),r&&t.classed("cursor-"+r,!0)}});var $se=ye((rar,Jse)=>{"use strict";var TB=Sg(),fM="data-savedcursor",Kse="!!";Jse.exports=function(t,r){var n=t.attr(fM);if(r){if(!n){for(var i=(t.attr("class")||"").split(" "),a=0;a<i.length;a++){var o=i[a];o.indexOf("cursor-")===0&&t.attr(fM,o.slice(7)).classed(o,!1)}t.attr(fM)||t.attr(fM,Kse)}TB(t,r)}else n&&(t.attr(fM,null),n===Kse?TB(t):TB(t,n))}});var SB=ye((iar,Qse)=>{"use strict";var AB=ec(),Dlt=Lh();Qse.exports={_isSubplotObj:!0,visible:{valType:"boolean",dflt:!0,editType:"legend"},bgcolor:{valType:"color",editType:"legend"},bordercolor:{valType:"color",dflt:Dlt.defaultLine,editType:"legend"},maxheight:{valType:"number",min:0,editType:"legend"},borderwidth:{valType:"number",min:0,dflt:0,editType:"legend"},font:AB({editType:"legend"}),grouptitlefont:AB({editType:"legend"}),orientation:{valType:"enumerated",values:["v","h"],dflt:"v",editType:"legend"},traceorder:{valType:"flaglist",flags:["reversed","grouped"],extras:["normal"],editType:"legend"},tracegroupgap:{valType:"number",min:0,dflt:10,editType:"legend"},entrywidth:{valType:"number",min:0,editType:"legend"},entrywidthmode:{valType:"enumerated",values:["fraction","pixels"],dflt:"pixels",editType:"legend"},indentation:{valType:"number",min:-15,dflt:0,editType:"legend"},itemsizing:{valType:"enumerated",values:["trace","constant"],dflt:"trace",editType:"legend"},itemwidth:{valType:"number",min:30,dflt:30,editType:"legend"},itemclick:{valType:"enumerated",values:["toggle","toggleothers",!1],dflt:"toggle",editType:"legend"},itemdoubleclick:{valType:"enumerated",values:["toggle","toggleothers",!1],dflt:"toggleothers",editType:"legend"},groupclick:{valType:"enumerated",values:["toggleitem","togglegroup"],dflt:"togglegroup",editType:"legend"},x:{valType:"number",editType:"legend"},xref:{valType:"enumerated",dflt:"paper",values:["container","paper"],editType:"layoutstyle"},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"left",editType:"legend"},y:{valType:"number",editType:"legend"},yref:{valType:"enumerated",dflt:"paper",values:["container","paper"],editType:"layoutstyle"},yanchor:{valType:"enumerated",values:["auto","top","middle","bottom"],editType:"legend"},uirevision:{valType:"any",editType:"none"},valign:{valType:"enumerated",values:["top","middle","bottom"],dflt:"middle",editType:"legend"},title:{text:{valType:"string",dflt:"",editType:"legend"},font:AB({editType:"legend"}),side:{valType:"enumerated",values:["top","left","top left","top center","top right"],editType:"legend"},editType:"legend"},editType:"legend"}});var FL=ye(DL=>{"use strict";DL.isGrouped=function(t){return(t.traceorder||"").indexOf("grouped")!==-1};DL.isVertical=function(t){return t.orientation!=="h"};DL.isReversed=function(t){return(t.traceorder||"").indexOf("reversed")!==-1}});var EB=ye((aar,ele)=>{"use strict";var zL=qa(),Yp=Dr(),Flt=vl(),zlt=Gl(),Olt=SB(),qlt=c3(),MB=FL();function Blt(e,t,r,n){var i=t[e]||{},a=Flt.newContainer(r,e);function o(N,H){return Yp.coerce(i,a,Olt,N,H)}var s=Yp.coerceFont(o,"font",r.font);o("bgcolor",r.paper_bgcolor),o("bordercolor");var l=o("visible");if(!l)return;var u,c=function(N,H){var te=u._input,oe=u;return Yp.coerce(te,oe,zlt,N,H)},f=r.font||{},h=Yp.coerceFont(o,"grouptitlefont",f,{overrideDflt:{size:Math.round(f.size*1.1)}}),d=0,v=!1,_="normal",b=(r.shapes||[]).filter(function(N){return N.showlegend});function p(N){return zL.traceIs(N,"pie-like")&&N._length!=null&&(Array.isArray(N.legend)||Array.isArray(N.showlegend))}n.filter(p).forEach(function(N){N.visible&&d++;for(var H=0;H<N._length;H++){var te=(Array.isArray(N.legend)?N.legend[H]:N.legend)||"legend";te===e&&((Array.isArray(N.showlegend)?N.showlegend[H]:N.showlegend)||N._dfltShowLegend)&&(v=!0,d++)}if(e==="legend"&&N._length>N.legend.length)for(var oe=N.legend.length;oe<N._length;oe++)v=!0,d++});for(var k=n.concat(b).filter(function(N){return!p(u)&&e===(N.legend||"legend")}),E=0;E<k.length;E++)if(u=k[E],!!u.visible){var S=u._isShape;(u.showlegend||u._dfltShowLegend&&!(u._module&&u._module.attributes&&u._module.attributes.showlegend&&u._module.attributes.showlegend.dflt===!1))&&(d++,u.showlegend&&(v=!0,(!S&&zL.traceIs(u,"pie-like")||u._input.showlegend===!0)&&d++),Yp.coerceFont(c,"legendgrouptitle.font",h)),(!S&&zL.traceIs(u,"bar")&&r.barmode==="stack"||["tonextx","tonexty"].indexOf(u.fill)!==-1)&&(_=MB.isGrouped({traceorder:_})?"grouped+reversed":"reversed"),u.legendgroup!==void 0&&u.legendgroup!==""&&(_=MB.isReversed({traceorder:_})?"reversed+grouped":"grouped")}var L=Yp.coerce(t,r,qlt,"showlegend",r.showlegend||v&&d>(e==="legend"?1:0));if(L===!1&&(r[e]=void 0),!(L===!1&&!i.uirevision)&&(o("uirevision",r.uirevision),L!==!1)){o("borderwidth");var x=o("orientation"),C=o("yref"),M=o("xref"),g=x==="h",P=C==="paper",T=M==="paper",z,O,V,G="left";g?(z=0,zL.getComponentMethod("rangeslider","isVisible")(t.xaxis)?P?(O=1.1,V="bottom"):(O=1,V="top"):P?(O=-.1,V="top"):(O=0,V="bottom")):(O=1,V="auto",T?z=1.02:(z=1,G="right")),Yp.coerce(i,a,{x:{valType:"number",editType:"legend",min:T?-2:0,max:T?3:1,dflt:z}},"x"),Yp.coerce(i,a,{y:{valType:"number",editType:"legend",min:P?-2:0,max:P?3:1,dflt:O}},"y"),o("traceorder",_),MB.isGrouped(r[e])&&o("tracegroupgap"),o("entrywidth"),o("entrywidthmode"),o("indentation"),o("itemsizing"),o("itemwidth"),o("itemclick"),o("itemdoubleclick"),o("groupclick"),o("xanchor",G),o("yanchor",V),o("maxheight"),o("valign"),Yp.noneOrAll(i,a,["x","y"]);var Z=o("title.text");if(Z){o("title.side",g?"left":"top");var j=Yp.extendFlat({},s,{size:Yp.bigFont(s.size)});Yp.coerceFont(o,"title.font",j)}}}ele.exports=function(t,r,n){var i,a=n.slice(),o=r.shapes;if(o)for(i=0;i<o.length;i++){var s=o[i];if(s.showlegend){var l={_input:s._input,visible:s.visible,showlegend:s.showlegend,legend:s.legend};a.push(l)}}var u=["legend"];for(i=0;i<a.length;i++)Array.isArray(a[i].legend)?u=u.concat(a[i].legend):Yp.pushUnique(u,a[i].legend);for(r._legends=[],i=0;i<u.length;i++){var c=u[i];Blt(c,t,r,a),r[c]&&(r[c]._id=c),r._legends.push(c)}}});var rle=ye((oar,tle)=>{"use strict";var q3=qa(),CB=Dr(),Nlt=CB.pushUnique,kB=!0;tle.exports=function(t,r,n){var i=r._fullLayout;if(r._dragged||r._editing)return;var a=i.legend.itemclick,o=i.legend.itemdoubleclick,s=i.legend.groupclick;n===1&&a==="toggle"&&o==="toggleothers"&&kB&&r.data&&r._context.showTips&&CB.notifier(CB._(r,"Double-click on legend to isolate one trace"),"long"),kB=!1;var l;if(n===1?l=a:n===2&&(l=o),!l)return;var u=s==="togglegroup",c=i.hiddenlabels?i.hiddenlabels.slice():[],f=t.data()[0][0];if(f.groupTitle&&f.noClick)return;var h=r._fullData,d=(i.shapes||[]).filter(function(Gt){return Gt.showlegend}),v=h.concat(d),_=f.trace;_._isShape&&(_=_._fullInput);var b=_.legendgroup,p,k,E,S,L,x,C={},M=[],g=[],P=[];function T(Gt,Nt){var Jt=M.indexOf(Gt),sr=C.visible;return sr||(sr=C.visible=[]),M.indexOf(Gt)===-1&&(M.push(Gt),Jt=M.length-1),sr[Jt]=Nt,Jt}var z=(i.shapes||[]).map(function(Gt){return Gt._input}),O=!1;function V(Gt,Nt){z[Gt].visible=Nt,O=!0}function G(Gt,Nt){if(!(f.groupTitle&&!u)){var Jt=Gt._fullInput||Gt,sr=Jt._isShape,wr=Jt.index;wr===void 0&&(wr=Jt._index);var cr=Jt.visible===!1?!1:Nt;sr?V(wr,cr):T(wr,cr)}}var Z=_.legend,j=_._fullInput,N=j&&j._isShape;if(!N&&q3.traceIs(_,"pie-like")){var H=f.label,te=c.indexOf(H);if(l==="toggle")te===-1?c.push(H):c.splice(te,1);else if(l==="toggleothers"){var oe=te!==-1,_e=[];for(p=0;p<r.calcdata.length;p++){var Ee=r.calcdata[p];for(k=0;k<Ee.length;k++){var Ce=Ee[k],me=Ce.label;Z===Ee[0].trace.legend&&H!==me&&(c.indexOf(me)===-1&&(oe=!0),Nlt(c,me),_e.push(me))}}if(!oe)for(var ie=0;ie<_e.length;ie++){var Se=c.indexOf(_e[ie]);Se!==-1&&c.splice(Se,1)}}q3.call("_guiRelayout",r,"hiddenlabels",c)}else{var Le=b&&b.length,Ae=[],Fe;if(Le)for(p=0;p<v.length;p++)Fe=v[p],Fe.visible&&Fe.legendgroup===b&&Ae.push(p);if(l==="toggle"){var Pe;switch(_.visible){case!0:Pe="legendonly";break;case!1:Pe=!1;break;case"legendonly":Pe=!0;break}if(Le)if(u)for(p=0;p<v.length;p++){var ge=v[p];ge.visible!==!1&&ge.legendgroup===b&&G(ge,Pe)}else G(_,Pe);else G(_,Pe)}else if(l==="toggleothers"){var Re,ce,Ze,ut,pt,Zt=!0;for(p=0;p<v.length;p++)if(pt=v[p],Re=pt===_,Ze=pt.showlegend!==!0,!(Re||Ze)&&(ce=Le&&pt.legendgroup===b,!ce&&pt.legend===Z&&pt.visible===!0&&!q3.traceIs(pt,"notLegendIsolatable"))){Zt=!1;break}for(p=0;p<v.length;p++)if(pt=v[p],!(pt.visible===!1||pt.legend!==Z)&&!q3.traceIs(pt,"notLegendIsolatable"))switch(_.visible){case"legendonly":G(pt,!0);break;case!0:ut=Zt?!0:"legendonly",Re=pt===_,Ze=pt.showlegend!==!0&&!pt.legendgroup,ce=Re||Le&&pt.legendgroup===b,G(pt,ce||Ze?!0:ut);break}}for(p=0;p<g.length;p++)if(E=g[p],!!E){var st=E.constructUpdate(),lt=Object.keys(st);for(k=0;k<lt.length;k++)S=lt[k],x=C[S]=C[S]||[],x[P[p]]=st[S]}for(L=Object.keys(C),p=0;p<L.length;p++)for(S=L[p],k=0;k<M.length;k++)C[S].hasOwnProperty(k)||(C[S][k]=void 0);O?q3.call("_guiUpdate",r,C,{shapes:z},M):q3.call("_guiRestyle",r,C,M)}}});var LB=ye((sar,ile)=>{"use strict";ile.exports={scrollBarWidth:6,scrollBarMinHeight:20,scrollBarColor:"#808BA4",scrollBarMargin:4,scrollBarEnterAttrs:{rx:20,ry:3,width:0,height:0},titlePad:2,itemGap:5}});var ole=ye((lar,ale)=>{"use strict";var nle=qa(),PB=FL();ale.exports=function(t,r,n){var i=r._inHover,a=PB.isGrouped(r),o=PB.isReversed(r),s={},l=[],u=!1,c={},f=0,h=0,d,v;function _(H,te,oe){if(r.visible!==!1&&!(n&&H!==r._id))if(te===""||!PB.isGrouped(r)){var _e="~~i"+f;l.push(_e),s[_e]=[oe],f++}else l.indexOf(te)===-1?(l.push(te),u=!0,s[te]=[oe]):s[te].push(oe)}for(d=0;d<t.length;d++){var b=t[d],p=b[0],k=p.trace,E=k.legend,S=k.legendgroup;if(!(!i&&(!k.visible||!k.showlegend)))if(nle.traceIs(k,"pie-like")){var L=Array.isArray(k.legend),x=Array.isArray(k.showlegend);for(c[S]||(c[S]={}),v=0;v<b.length;v++)if(!(x&&k.showlegend[b[v].i]===!1)){L&&(E=k.legend[b[v].i]||"legend");var C=b[v].label;c[S][C]||(_(E,S,{label:C,color:b[v].color,i:b[v].i,trace:k,pts:b[v].pts}),c[S][C]=!0,h=Math.max(h,(C||"").length))}}else _(E,S,p),h=Math.max(h,(k.name||"").length)}if(!l.length)return[];var M=!u||!a,g=[];for(d=0;d<l.length;d++){var P=s[l[d]];M?g.push(P[0]):g.push(P)}for(M&&(g=[g]),d=0;d<g.length;d++){var T=1/0;for(v=0;v<g[d].length;v++){var z=g[d][v].trace.legendrank;T>z&&(T=z)}g[d][0]._groupMinRank=T,g[d][0]._preGroupSort=d}var O=function(H,te){return H[0]._groupMinRank-te[0]._groupMinRank||H[0]._preGroupSort-te[0]._preGroupSort},V=function(H,te){return H.trace.legendrank-te.trace.legendrank||H._preSort-te._preSort};for(g.forEach(function(H,te){H[0]._preGroupSort=te}),g.sort(O),d=0;d<g.length;d++){g[d].forEach(function(H,te){H._preSort=te}),g[d].sort(V);var G=g[d][0].trace,Z=null;for(v=0;v<g[d].length;v++){var j=g[d][v].trace.legendgrouptitle;if(j&&j.text){Z=j,i&&(j.font=r._groupTitleFont);break}}if(o&&g[d].reverse(),Z){var N=!1;for(v=0;v<g[d].length;v++)if(nle.traceIs(g[d][v].trace,"pie-like")){N=!0;break}g[d].unshift({i:-1,groupTitle:Z,noClick:N,trace:{showlegend:G.showlegend,legendgroup:G.legendgroup,visible:r.groupclick==="toggleitem"?!0:G.visible}})}for(v=0;v<g[d].length;v++)g[d][v]=[g[d][v]]}return r._lgroupsLength=g.length,r._maxNameLength=h,g}});var l_=ye(Ib=>{"use strict";var OL=Dr();function sle(e){return e.indexOf("e")!==-1?e.replace(/[.]?0+e/,"e"):e.indexOf(".")!==-1?e.replace(/[.]?0+$/,""):e}Ib.formatPiePercent=function(t,r){var n=sle((t*100).toPrecision(3));return OL.numSeparate(n,r)+"%"};Ib.formatPieValue=function(t,r){var n=sle(t.toPrecision(10));return OL.numSeparate(n,r)};Ib.getFirstFilled=function(t,r){if(OL.isArrayOrTypedArray(t))for(var n=0;n<r.length;n++){var i=t[r[n]];if(i||i===0||i==="")return i}};Ib.castOption=function(t,r){if(OL.isArrayOrTypedArray(t))return Ib.getFirstFilled(t,r);if(t)return t};Ib.getRotationAngle=function(e){return(e==="auto"?0:e)*Math.PI/180}});var ule=ye((car,lle)=>{"use strict";var Ult=So(),Vlt=ka();lle.exports=function(t,r,n,i){var a=n.marker.pattern;a&&a.shape?Ult.pointStyle(t,n,i,r):Vlt.fill(t,r.color)}});var B3=ye((far,hle)=>{"use strict";var cle=ka(),fle=l_().castOption,Glt=ule();hle.exports=function(t,r,n,i){var a=n.marker.line,o=fle(a.color,r.pts)||cle.defaultLine,s=fle(a.width,r.pts)||0;t.call(Glt,r,n,i).style("stroke-width",s).call(cle.stroke,o)}});var FB=ye((har,yle)=>{"use strict";var qv=Oa(),IB=qa(),_v=Dr(),dle=_v.strTranslate,Kp=So(),w0=ka(),RB=pv().extractOpts,qL=Ru(),Hlt=B3(),jlt=l_().castOption,Wlt=LB(),vle=12,ple=5,Rb=2,Xlt=10,N3=5;yle.exports=function(t,r,n){var i=r._fullLayout;n||(n=i.legend);var a=n.itemsizing==="constant",o=n.itemwidth,s=(o+Wlt.itemGap*2)/2,l=dle(s,0),u=function(C,M,g,P){var T;if(C+1)T=C;else if(M&&M.width>0)T=M.width;else return 0;return a?P:Math.min(T,g)};t.each(function(C){var M=qv.select(this),g=_v.ensureSingle(M,"g","layers");g.style("opacity",C[0].trace.opacity);var P=n.indentation,T=n.valign,z=C[0].lineHeight,O=C[0].height;if(T==="middle"&&P===0||!z||!O)g.attr("transform",null);else{var V={top:1,bottom:-1}[T],G=V*(.5*(z-O+3))||0,Z=n.indentation;g.attr("transform",dle(Z,G))}var j=g.selectAll("g.legendfill").data([C]);j.enter().append("g").classed("legendfill",!0);var N=g.selectAll("g.legendlines").data([C]);N.enter().append("g").classed("legendlines",!0);var H=g.selectAll("g.legendsymbols").data([C]);H.enter().append("g").classed("legendsymbols",!0),H.selectAll("g.legendpoints").data([C]).enter().append("g").classed("legendpoints",!0)}).each(x).each(h).each(v).each(d).each(b).each(S).each(E).each(c).each(f).each(p).each(k);function c(C){var M=gle(C),g=M.showFill,P=M.showLine,T=M.showGradientLine,z=M.showGradientFill,O=M.anyFill,V=M.anyLine,G=C[0],Z=G.trace,j,N,H=RB(Z),te=H.colorscale,oe=H.reversescale,_e=function(Ae){if(Ae.size())if(g)Kp.fillGroupStyle(Ae,r,!0);else{var Fe="legendfill-"+Z.uid;Kp.gradient(Ae,r,Fe,DB(oe),te,"fill")}},Ee=function(Ae){if(Ae.size()){var Fe="legendline-"+Z.uid;Kp.lineGroupStyle(Ae),Kp.gradient(Ae,r,Fe,DB(oe),te,"stroke")}},Ce=qL.hasMarkers(Z)||!O?"M5,0":V?"M5,-2":"M5,-3",me=qv.select(this),ie=me.select(".legendfill").selectAll("path").data(g||z?[C]:[]);if(ie.enter().append("path").classed("js-fill",!0),ie.exit().remove(),ie.attr("d",Ce+"h"+o+"v6h-"+o+"z").call(_e),P||T){var Se=u(void 0,Z.line,Xlt,ple);N=_v.minExtend(Z,{line:{width:Se}}),j=[_v.minExtend(G,{trace:N})]}var Le=me.select(".legendlines").selectAll("path").data(P||T?[j]:[]);Le.enter().append("path").classed("js-line",!0),Le.exit().remove(),Le.attr("d",Ce+(T?"l"+o+",0.0001":"h"+o)).call(P?Kp.lineGroupStyle:Ee)}function f(C){var M=gle(C),g=M.anyFill,P=M.anyLine,T=M.showLine,z=M.showMarker,O=C[0],V=O.trace,G=!z&&!P&&!g&&qL.hasText(V),Z,j;function N(ie,Se,Le,Ae){var Fe=_v.nestedProperty(V,ie).get(),Pe=_v.isArrayOrTypedArray(Fe)&&Se?Se(Fe):Fe;if(a&&Pe&&Ae!==void 0&&(Pe=Ae),Le){if(Pe<Le[0])return Le[0];if(Pe>Le[1])return Le[1]}return Pe}function H(ie){return O._distinct&&O.index&&ie[O.index]?ie[O.index]:ie[0]}if(z||G||T){var te={},oe={};if(z){te.mc=N("marker.color",H),te.mx=N("marker.symbol",H),te.mo=N("marker.opacity",_v.mean,[.2,1]),te.mlc=N("marker.line.color",H),te.mlw=N("marker.line.width",_v.mean,[0,5],Rb),oe.marker={sizeref:1,sizemin:1,sizemode:"diameter"};var _e=N("marker.size",_v.mean,[2,16],vle);te.ms=_e,oe.marker.size=_e}T&&(oe.line={width:N("line.width",H,[0,10],ple)}),G&&(te.tx="Aa",te.tp=N("textposition",H),te.ts=10,te.tc=N("textfont.color",H),te.tf=N("textfont.family",H),te.tw=N("textfont.weight",H),te.ty=N("textfont.style",H),te.tv=N("textfont.variant",H),te.tC=N("textfont.textcase",H),te.tE=N("textfont.lineposition",H),te.tS=N("textfont.shadow",H)),Z=[_v.minExtend(O,te)],j=_v.minExtend(V,oe),j.selectedpoints=null,j.texttemplate=null}var Ee=qv.select(this).select("g.legendpoints"),Ce=Ee.selectAll("path.scatterpts").data(z?Z:[]);Ce.enter().insert("path",":first-child").classed("scatterpts",!0).attr("transform",l),Ce.exit().remove(),Ce.call(Kp.pointStyle,j,r),z&&(Z[0].mrc=3);var me=Ee.selectAll("g.pointtext").data(G?Z:[]);me.enter().append("g").classed("pointtext",!0).append("text").attr("transform",l),me.exit().remove(),me.selectAll("text").call(Kp.textPointStyle,j,r)}function h(C){var M=C[0].trace,g=M.type==="waterfall";if(C[0]._distinct&&g){var P=C[0].trace[C[0].dir].marker;return C[0].mc=P.color,C[0].mlw=P.line.width,C[0].mlc=P.line.color,_(C,this,"waterfall")}var T=[];M.visible&&g&&(T=C[0].hasTotals?[["increasing","M-6,-6V6H0Z"],["totals","M6,6H0L-6,-6H-0Z"],["decreasing","M6,6V-6H0Z"]]:[["increasing","M-6,-6V6H6Z"],["decreasing","M6,6V-6H-6Z"]]);var z=qv.select(this).select("g.legendpoints").selectAll("path.legendwaterfall").data(T);z.enter().append("path").classed("legendwaterfall",!0).attr("transform",l).style("stroke-miterlimit",1),z.exit().remove(),z.each(function(O){var V=qv.select(this),G=M[O[0]].marker,Z=u(void 0,G.line,N3,Rb);V.attr("d",O[1]).style("stroke-width",Z+"px").call(w0.fill,G.color),Z&&V.call(w0.stroke,G.line.color)})}function d(C){_(C,this)}function v(C){_(C,this,"funnel")}function _(C,M,g){var P=C[0].trace,T=P.marker||{},z=T.line||{},O=T.cornerradius?"M6,3a3,3,0,0,1-3,3H-3a3,3,0,0,1-3-3V-3a3,3,0,0,1,3-3H3a3,3,0,0,1,3,3Z":"M6,6H-6V-6H6Z",V=g?P.visible&&P.type===g:IB.traceIs(P,"bar"),G=qv.select(M).select("g.legendpoints").selectAll("path.legend"+g).data(V?[C]:[]);G.enter().append("path").classed("legend"+g,!0).attr("d",O).attr("transform",l),G.exit().remove(),G.each(function(Z){var j=qv.select(this),N=Z[0],H=u(N.mlw,T.line,N3,Rb);j.style("stroke-width",H+"px");var te=N.mcc;if(!n._inHover&&"mc"in N){var oe=RB(T),_e=oe.mid;_e===void 0&&(_e=(oe.max+oe.min)/2),te=Kp.tryColorscale(T,"")(_e)}var Ee=te||N.mc||T.color,Ce=T.pattern,me=Kp.getPatternAttr,ie=Ce&&(me(Ce.shape,0,"")||me(Ce.path,0,""));if(ie){var Se=me(Ce.bgcolor,0,null),Le=me(Ce.fgcolor,0,null),Ae=Ce.fgopacity,Fe=mle(Ce.size,8,10),Pe=mle(Ce.solidity,.5,1),ge="legend-"+P.uid;j.call(Kp.pattern,"legend",r,ge,ie,Fe,Pe,te,Ce.fillmode,Se,Le,Ae)}else j.call(w0.fill,Ee);H&&w0.stroke(j,N.mlc||z.color)})}function b(C){var M=C[0].trace,g=qv.select(this).select("g.legendpoints").selectAll("path.legendbox").data(M.visible&&IB.traceIs(M,"box-violin")?[C]:[]);g.enter().append("path").classed("legendbox",!0).attr("d","M6,6H-6V-6H6Z").attr("transform",l),g.exit().remove(),g.each(function(){var P=qv.select(this);if((M.boxpoints==="all"||M.points==="all")&&w0.opacity(M.fillcolor)===0&&w0.opacity((M.line||{}).color)===0){var T=_v.minExtend(M,{marker:{size:a?vle:_v.constrain(M.marker.size,2,16),sizeref:1,sizemin:1,sizemode:"diameter"}});g.call(Kp.pointStyle,T,r)}else{var z=u(void 0,M.line,N3,Rb);P.style("stroke-width",z+"px").call(w0.fill,M.fillcolor),z&&w0.stroke(P,M.line.color)}})}function p(C){var M=C[0].trace,g=qv.select(this).select("g.legendpoints").selectAll("path.legendcandle").data(M.visible&&M.type==="candlestick"?[C,C]:[]);g.enter().append("path").classed("legendcandle",!0).attr("d",function(P,T){return T?"M-15,0H-8M-8,6V-6H8Z":"M15,0H8M8,-6V6H-8Z"}).attr("transform",l).style("stroke-miterlimit",1),g.exit().remove(),g.each(function(P,T){var z=qv.select(this),O=M[T?"increasing":"decreasing"],V=u(void 0,O.line,N3,Rb);z.style("stroke-width",V+"px").call(w0.fill,O.fillcolor),V&&w0.stroke(z,O.line.color)})}function k(C){var M=C[0].trace,g=qv.select(this).select("g.legendpoints").selectAll("path.legendohlc").data(M.visible&&M.type==="ohlc"?[C,C]:[]);g.enter().append("path").classed("legendohlc",!0).attr("d",function(P,T){return T?"M-15,0H0M-8,-6V0":"M15,0H0M8,6V0"}).attr("transform",l).style("stroke-miterlimit",1),g.exit().remove(),g.each(function(P,T){var z=qv.select(this),O=M[T?"increasing":"decreasing"],V=u(void 0,O.line,N3,Rb);z.style("fill","none").call(Kp.dashLine,O.line.dash,V),V&&w0.stroke(z,O.line.color)})}function E(C){L(C,this,"pie")}function S(C){L(C,this,"funnelarea")}function L(C,M,g){var P=C[0],T=P.trace,z=g?T.visible&&T.type===g:IB.traceIs(T,g),O=qv.select(M).select("g.legendpoints").selectAll("path.legend"+g).data(z?[C]:[]);if(O.enter().append("path").classed("legend"+g,!0).attr("d","M6,6H-6V-6H6Z").attr("transform",l),O.exit().remove(),O.size()){var V=T.marker||{},G=u(jlt(V.line.width,P.pts),V.line,N3,Rb),Z="pieLike",j=_v.minExtend(T,{marker:{line:{width:G}}},Z),N=_v.minExtend(P,{trace:j},Z);Hlt(O,N,j,r)}}function x(C){var M=C[0].trace,g,P=[];if(M.visible)switch(M.type){case"histogram2d":case"heatmap":P=[["M-15,-2V4H15V-2Z"]],g=!0;break;case"choropleth":case"choroplethmapbox":case"choroplethmap":P=[["M-6,-6V6H6V-6Z"]],g=!0;break;case"densitymapbox":case"densitymap":P=[["M-6,0 a6,6 0 1,0 12,0 a 6,6 0 1,0 -12,0"]],g="radial";break;case"cone":P=[["M-6,2 A2,2 0 0,0 -6,6 V6L6,4Z"],["M-6,-6 A2,2 0 0,0 -6,-2 L6,-4Z"],["M-6,-2 A2,2 0 0,0 -6,2 L6,0Z"]],g=!1;break;case"streamtube":P=[["M-6,2 A2,2 0 0,0 -6,6 H6 A2,2 0 0,1 6,2 Z"],["M-6,-6 A2,2 0 0,0 -6,-2 H6 A2,2 0 0,1 6,-6 Z"],["M-6,-2 A2,2 0 0,0 -6,2 H6 A2,2 0 0,1 6,-2 Z"]],g=!1;break;case"surface":P=[["M-6,-6 A2,3 0 0,0 -6,0 H6 A2,3 0 0,1 6,-6 Z"],["M-6,1 A2,3 0 0,1 -6,6 H6 A2,3 0 0,0 6,0 Z"]],g=!0;break;case"mesh3d":P=[["M-6,6H0L-6,-6Z"],["M6,6H0L6,-6Z"],["M-6,-6H6L0,6Z"]],g=!1;break;case"volume":P=[["M-6,6H0L-6,-6Z"],["M6,6H0L6,-6Z"],["M-6,-6H6L0,6Z"]],g=!0;break;case"isosurface":P=[["M-6,6H0L-6,-6Z"],["M6,6H0L6,-6Z"],["M-6,-6 A12,24 0 0,0 6,-6 L0,6Z"]],g=!1;break}var T=qv.select(this).select("g.legendpoints").selectAll("path.legend3dandfriends").data(P);T.enter().append("path").classed("legend3dandfriends",!0).attr("transform",l).style("stroke-miterlimit",1),T.exit().remove(),T.each(function(z,O){var V=qv.select(this),G=RB(M),Z=G.colorscale,j=G.reversescale,N=function(_e){if(_e.size()){var Ee="legendfill-"+M.uid;Kp.gradient(_e,r,Ee,DB(j,g==="radial"),Z,"fill")}},H;if(Z){if(!g){var oe=Z.length;H=O===0?Z[j?oe-1:0][1]:O===1?Z[j?0:oe-1][1]:Z[Math.floor((oe-1)/2)][1]}}else{var te=M.vertexcolor||M.facecolor||M.color;H=_v.isArrayOrTypedArray(te)?te[O]||te[0]:te}V.attr("d",z[0]),H?V.call(w0.fill,H):V.call(N)})}};function DB(e,t){var r=t?"radial":"horizontal";return r+(e?"":"reversed")}function gle(e){var t=e[0].trace,r=t.contours,n=qL.hasLines(t),i=qL.hasMarkers(t),a=t.visible&&t.fill&&t.fill!=="none",o=!1,s=!1;if(r){var l=r.coloring;l==="lines"?o=!0:n=l==="none"||l==="heatmap"||r.showlines,r.type==="constraint"?a=r._operation!=="=":(l==="fill"||l==="heatmap")&&(s=!0)}return{showMarker:i,showLine:n,showFill:a,showGradientLine:o,showGradientFill:s,anyLine:n||o,anyFill:a||s}}function mle(e,t,r){return e&&_v.isArrayOrTypedArray(e)?t:e>r?r:e}});var BB=ye((dar,kle)=>{"use strict";var Mp=Oa(),Rh=Dr(),OB=Mc(),G3=qa(),_le=_3(),zB=yv(),Dh=So(),NL=ka(),Db=ru(),xle=rle(),ed=LB(),qB=$h(),Mle=qB.LINE_SPACING,V3=qB.FROM_TL,ble=qB.FROM_BR,wle=ole(),Zlt=FB(),Tle=FL(),U3=1,Ylt=/^legend[0-9]*$/;kle.exports=function(t,r){if(r)Ale(t,r);else{var n=t._fullLayout,i=n._legends,a=n._infolayer.selectAll('[class^="legend"]');a.each(function(){var u=Mp.select(this),c=u.attr("class"),f=c.split(" ")[0];f.match(Ylt)&&i.indexOf(f)===-1&&u.remove()});for(var o=0;o<i.length;o++){var s=i[o],l=t._fullLayout[s];Ale(t,l)}}};function Klt(e,t,r){if(!(t.title.side!=="top center"&&t.title.side!=="top right")){var n=t.title.font,i=n.size*Mle,a=0,o=e.node(),s=Dh.bBox(o).width;t.title.side==="top center"?a=.5*(t._width-2*r-2*ed.titlePad-s):t.title.side==="top right"&&(a=t._width-2*r-2*ed.titlePad-s),Db.positionText(e,r+ed.titlePad+a,r+i)}}function Ale(e,t){var r=t||{},n=e._fullLayout,i=HL(r),a,o,s=r._inHover;if(s?(o=r.layer,a="hover"):(o=n._infolayer,a=i),!!o){a+=n._uid,e._legendMouseDownTime||(e._legendMouseDownTime=0);var l;if(s){if(!r.entries)return;l=wle(r.entries,r)}else{for(var u=(e.calcdata||[]).slice(),c=n.shapes,f=0;f<c.length;f++){var h=c[f];if(h.showlegend){var d={_isShape:!0,_fullInput:h,index:h._index,name:h.name||h.label.text||"shape "+h._index,legend:h.legend,legendgroup:h.legendgroup,legendgrouptitle:h.legendgrouptitle,legendrank:h.legendrank,legendwidth:h.legendwidth,showlegend:h.showlegend,visible:h.visible,opacity:h.opacity,mode:h.type==="line"?"lines":"markers",line:h.line,marker:{line:h.line,color:h.fillcolor,size:12,symbol:h.type==="rect"?"square":h.type==="circle"?"circle":"hexagon2"}};u.push([{trace:d}])}}l=n.showlegend&&wle(u,r,n._legends.length>1)}var v=n.hiddenlabels||[];if(!s&&(!n.showlegend||!l.length))return o.selectAll("."+i).remove(),n._topdefs.select("#"+a).remove(),OB.autoMargin(e,i);var _=Rh.ensureSingle(o,"g",i,function(M){s||M.attr("pointer-events","all")}),b=Rh.ensureSingleById(n._topdefs,"clipPath",a,function(M){M.append("rect")}),p=Rh.ensureSingle(_,"rect","bg",function(M){M.attr("shape-rendering","crispEdges")});p.call(NL.stroke,r.bordercolor).call(NL.fill,r.bgcolor).style("stroke-width",r.borderwidth+"px");var k=Rh.ensureSingle(_,"g","scrollbox"),E=r.title;r._titleWidth=0,r._titleHeight=0;var S;E.text?(S=Rh.ensureSingle(k,"text",i+"titletext"),S.attr("text-anchor","start").call(Dh.font,E.font).text(E.text),UL(S,k,e,r,U3)):k.selectAll("."+i+"titletext").remove();var L=Rh.ensureSingle(_,"rect","scrollbar",function(M){M.attr(ed.scrollBarEnterAttrs).call(NL.fill,ed.scrollBarColor)}),x=k.selectAll("g.groups").data(l);x.enter().append("g").attr("class","groups"),x.exit().remove();var C=x.selectAll("g.traces").data(Rh.identity);C.enter().append("g").attr("class","traces"),C.exit().remove(),C.style("opacity",function(M){var g=M[0].trace;return G3.traceIs(g,"pie-like")?v.indexOf(M[0].label)!==-1?.5:1:g.visible==="legendonly"?.5:1}).each(function(){Mp.select(this).call(Jlt,e,r)}).call(Zlt,e,r).each(function(){s||Mp.select(this).call($lt,e,i)}),Rh.syncOrAsync([OB.previousPromises,function(){return tut(e,x,C,r)},function(){var M=n._size,g=r.borderwidth,P=r.xref==="paper",T=r.yref==="paper";if(E.text&&Klt(S,r,g),!s){var z,O;P?z=M.l+M.w*r.x-V3[VL(r)]*r._width:z=n.width*r.x-V3[VL(r)]*r._width,T?O=M.t+M.h*(1-r.y)-V3[GL(r)]*r._effHeight:O=n.height*(1-r.y)-V3[GL(r)]*r._effHeight;var V=rut(e,i,z,O);if(V)return;if(n.margin.autoexpand){var G=z,Z=O;z=P?Rh.constrain(z,0,n.width-r._width):G,O=T?Rh.constrain(O,0,n.height-r._effHeight):Z,z!==G&&Rh.log("Constrain "+i+".x to make legend fit inside graph"),O!==Z&&Rh.log("Constrain "+i+".y to make legend fit inside graph")}Dh.setTranslate(_,z,O)}if(L.on(".drag",null),_.on("wheel",null),s||r._height<=r._maxHeight||e._context.staticPlot){var j=r._effHeight;s&&(j=r._height),p.attr({width:r._width-g,height:j-g,x:g/2,y:g/2}),Dh.setTranslate(k,0,0),b.select("rect").attr({width:r._width-2*g,height:j-2*g,x:g,y:g}),Dh.setClipUrl(k,a,e),Dh.setRect(L,0,0,0,0),delete r._scrollY}else{var N=Math.max(ed.scrollBarMinHeight,r._effHeight*r._effHeight/r._height),H=r._effHeight-N-2*ed.scrollBarMargin,te=r._height-r._effHeight,oe=H/te,_e=Math.min(r._scrollY||0,te);p.attr({width:r._width-2*g+ed.scrollBarWidth+ed.scrollBarMargin,height:r._effHeight-g,x:g/2,y:g/2}),b.select("rect").attr({width:r._width-2*g+ed.scrollBarWidth+ed.scrollBarMargin,height:r._effHeight-2*g,x:g,y:g+_e}),Dh.setClipUrl(k,a,e),Fe(_e,N,oe),_.on("wheel",function(){_e=Rh.constrain(r._scrollY+Mp.event.deltaY/te*H,0,te),Fe(_e,N,oe),_e!==0&&_e!==te&&Mp.event.preventDefault()});var Ee,Ce,me,ie=function(Ze,ut,pt){var Zt=(pt-ut)/oe+Ze;return Rh.constrain(Zt,0,te)},Se=function(Ze,ut,pt){var Zt=(ut-pt)/oe+Ze;return Rh.constrain(Zt,0,te)},Le=Mp.behavior.drag().on("dragstart",function(){var Ze=Mp.event.sourceEvent;Ze.type==="touchstart"?Ee=Ze.changedTouches[0].clientY:Ee=Ze.clientY,me=_e}).on("drag",function(){var Ze=Mp.event.sourceEvent;Ze.buttons===2||Ze.ctrlKey||(Ze.type==="touchmove"?Ce=Ze.changedTouches[0].clientY:Ce=Ze.clientY,_e=ie(me,Ee,Ce),Fe(_e,N,oe))});L.call(Le);var Ae=Mp.behavior.drag().on("dragstart",function(){var Ze=Mp.event.sourceEvent;Ze.type==="touchstart"&&(Ee=Ze.changedTouches[0].clientY,me=_e)}).on("drag",function(){var Ze=Mp.event.sourceEvent;Ze.type==="touchmove"&&(Ce=Ze.changedTouches[0].clientY,_e=Se(me,Ee,Ce),Fe(_e,N,oe))});k.call(Ae)}function Fe(Ze,ut,pt){r._scrollY=e._fullLayout[i]._scrollY=Ze,Dh.setTranslate(k,0,-Ze),Dh.setRect(L,r._width,ed.scrollBarMargin+Ze*pt,ed.scrollBarWidth,ut),b.select("rect").attr("y",g+Ze)}if(e._context.edits.legendPosition){var Pe,ge,Re,ce;_.classed("cursor-move",!0),zB.init({element:_.node(),gd:e,prepFn:function(Ze){if(Ze.target!==L.node()){var ut=Dh.getTranslate(_);Re=ut.x,ce=ut.y}},moveFn:function(Ze,ut){if(Re!==void 0&&ce!==void 0){var pt=Re+Ze,Zt=ce+ut;Dh.setTranslate(_,pt,Zt),Pe=zB.align(pt,r._width,M.l,M.l+M.w,r.xanchor),ge=zB.align(Zt+r._height,-r._height,M.t+M.h,M.t,r.yanchor)}},doneFn:function(){if(Pe!==void 0&&ge!==void 0){var Ze={};Ze[i+".x"]=Pe,Ze[i+".y"]=ge,G3.call("_guiRelayout",e,Ze)}},clickFn:function(Ze,ut){var pt=o.selectAll("g.traces").filter(function(){var Zt=this.getBoundingClientRect();return ut.clientX>=Zt.left&&ut.clientX<=Zt.right&&ut.clientY>=Zt.top&&ut.clientY<=Zt.bottom});pt.size()>0&&Ele(e,_,pt,Ze,ut)}})}}],e)}}function BL(e,t,r){var n=e[0],i=n.width,a=t.entrywidthmode,o=n.trace.legendwidth||t.entrywidth;return a==="fraction"?t._maxWidth*o:r+(o||i)}function Ele(e,t,r,n,i){var a=r.data()[0][0].trace,o={event:i,node:r.node(),curveNumber:a.index,expandedIndex:a.index,data:e.data,layout:e.layout,frames:e._transitionData._frames,config:e._context,fullData:e._fullData,fullLayout:e._fullLayout};a._group&&(o.group=a._group),G3.traceIs(a,"pie-like")&&(o.label=r.datum()[0].label);var s=_le.triggerHandler(e,"plotly_legendclick",o);if(n===1){if(s===!1)return;t._clickTimeout=setTimeout(function(){e._fullLayout&&xle(r,e,n)},e._context.doubleClickDelay)}else if(n===2){t._clickTimeout&&clearTimeout(t._clickTimeout),e._legendMouseDownTime=0;var l=_le.triggerHandler(e,"plotly_legenddoubleclick",o);l!==!1&&s!==!1&&xle(r,e,n)}}function Jlt(e,t,r){var n=HL(r),i=e.data()[0][0],a=i.trace,o=G3.traceIs(a,"pie-like"),s=!r._inHover&&t._context.edits.legendText&&!o,l=r._maxNameLength,u,c;i.groupTitle?(u=i.groupTitle.text,c=i.groupTitle.font):(c=r.font,r.entries?u=i.text:(u=o?i.label:a.name,a._meta&&(u=Rh.templateString(u,a._meta))));var f=Rh.ensureSingle(e,"text",n+"text");f.attr("text-anchor","start").call(Dh.font,c).text(s?Sle(u,l):u);var h=r.indentation+r.itemwidth+ed.itemGap*2;Db.positionText(f,h,0),s?f.call(Db.makeEditable,{gd:t,text:u}).call(UL,e,t,r).on("edit",function(d){this.text(Sle(d,l)).call(UL,e,t,r);var v=i.trace._fullInput||{},_={};return _.name=d,v._isShape?G3.call("_guiRelayout",t,"shapes["+a.index+"].name",_.name):G3.call("_guiRestyle",t,_,a.index)}):UL(f,e,t,r)}function Sle(e,t){var r=Math.max(4,t);if(e&&e.trim().length>=r/2)return e;e=e||"";for(var n=r-e.length;n>0;n--)e+=" ";return e}function $lt(e,t,r){var n=t._context.doubleClickDelay,i,a=1,o=Rh.ensureSingle(e,"rect",r+"toggle",function(s){t._context.staticPlot||s.style("cursor","pointer").attr("pointer-events","all"),s.call(NL.fill,"rgba(0,0,0,0)")});t._context.staticPlot||(o.on("mousedown",function(){i=new Date().getTime(),i-t._legendMouseDownTime<n?a+=1:(a=1,t._legendMouseDownTime=i)}),o.on("mouseup",function(){if(!(t._dragged||t._editing)){var s=t._fullLayout[r];new Date().getTime()-t._legendMouseDownTime>n&&(a=Math.max(a-1,1)),Ele(t,s,e,a,Mp.event)}}))}function UL(e,t,r,n,i){n._inHover&&e.attr("data-notex",!0),Db.convertToTspans(e,r,function(){Qlt(t,r,n,i)})}function Qlt(e,t,r,n){var i=e.data()[0][0],a=i&&i.trace.showlegend;if(Array.isArray(a)&&(a=a[i.i]!==!1),!r._inHover&&i&&!a){e.remove();return}var o=e.select("g[class*=math-group]"),s=o.node(),l=HL(r);r||(r=t._fullLayout[l]);var u=r.borderwidth,c;n===U3?c=r.title.font:i.groupTitle?c=i.groupTitle.font:c=r.font;var f=c.size*Mle,h,d;if(s){var v=Dh.bBox(s);h=v.height,d=v.width,n===U3?Dh.setTranslate(o,u,u+h*.75):Dh.setTranslate(o,0,h*.25)}else{var _="."+l+(n===U3?"title":"")+"text",b=e.select(_),p=Db.lineCount(b),k=b.node();if(h=f*p,d=k?Dh.bBox(k).width:0,n===U3)r.title.side==="left"&&(d+=ed.itemGap*2),Db.positionText(b,u+ed.titlePad,u+f);else{var E=ed.itemGap*2+r.indentation+r.itemwidth;i.groupTitle&&(E=ed.itemGap,d-=r.indentation+r.itemwidth),Db.positionText(b,E,-f*((p-1)/2-.3))}}n===U3?(r._titleWidth=d,r._titleHeight=h):(i.lineHeight=f,i.height=Math.max(h,16)+3,i.width=d)}function eut(e){var t=0,r=0,n=e.title.side;return n&&(n.indexOf("left")!==-1&&(t=e._titleWidth),n.indexOf("top")!==-1&&(r=e._titleHeight)),[t,r]}function tut(e,t,r,n){var i=e._fullLayout,a=HL(n);n||(n=i[a]);var o=i._size,s=Tle.isVertical(n),l=Tle.isGrouped(n),u=n.entrywidthmode==="fraction",c=n.borderwidth,f=2*c,h=ed.itemGap,d=n.indentation+n.itemwidth+h*2,v=2*(c+h),_=GL(n),b=n.y<0||n.y===0&&_==="top",p=n.y>1||n.y===1&&_==="bottom",k=n.tracegroupgap,E={};let{orientation:S,yref:L}=n,{maxheight:x}=n,C=b||p||S!=="v"||L!=="paper";x||(x=C?.5:1);let M=C?i.height:o.h;n._maxHeight=Math.max(x>1?x:x*M,30);var g=0;n._width=0,n._height=0;var P=eut(n);if(s)r.each(function(Fe){var Pe=Fe[0].height;Dh.setTranslate(this,c+P[0],c+P[1]+n._height+Pe/2+h),n._height+=Pe,n._width=Math.max(n._width,Fe[0].width)}),g=d+n._width,n._width+=h+d+f,n._height+=v,l&&(t.each(function(Fe,Pe){Dh.setTranslate(this,0,Pe*n.tracegroupgap)}),n._height+=(n._lgroupsLength-1)*n.tracegroupgap);else{var T=VL(n),z=n.x<0||n.x===0&&T==="right",O=n.x>1||n.x===1&&T==="left",V=p||b,G=i.width/2;n._maxWidth=Math.max(z?V&&T==="left"?o.l+o.w:G:O?V&&T==="right"?o.r+o.w:G:o.w,2*d);var Z=0,j=0;r.each(function(Fe){var Pe=BL(Fe,n,d);Z=Math.max(Z,Pe),j+=Pe}),g=null;var N=0;if(l){var H=0,te=0,oe=0;t.each(function(){var Fe=0,Pe=0;Mp.select(this).selectAll("g.traces").each(function(Re){var ce=BL(Re,n,d),Ze=Re[0].height;Dh.setTranslate(this,P[0],P[1]+c+h+Ze/2+Pe),Pe+=Ze,Fe=Math.max(Fe,ce),E[Re[0].trace.legendgroup]=Fe});var ge=Fe+h;te>0&&ge+c+te>n._maxWidth?(N=Math.max(N,te),te=0,oe+=H+k,H=Pe):H=Math.max(H,Pe),Dh.setTranslate(this,te,oe),te+=ge}),n._width=Math.max(N,te)+c,n._height=oe+H+v}else{var _e=r.size(),Ee=j+f+(_e-1)*h<n._maxWidth,Ce=0,me=0,ie=0,Se=0;r.each(function(Fe){var Pe=Fe[0].height,ge=BL(Fe,n,d,l),Re=Ee?ge:Z;u||(Re+=h),Re+c+me-h>=n._maxWidth&&(N=Math.max(N,Se),me=0,ie+=Ce,n._height+=Ce,Ce=0),Dh.setTranslate(this,P[0]+c+me,P[1]+c+ie+Pe/2+h),Se=me+ge+h,me+=Re,Ce=Math.max(Ce,Pe)}),Ee?(n._width=me+f,n._height=Ce+v):(n._width=Math.max(N,Se)+f,n._height+=Ce+v)}}n._width=Math.ceil(Math.max(n._width+P[0],n._titleWidth+2*(c+ed.titlePad))),n._height=Math.ceil(Math.max(n._height+P[1],n._titleHeight+2*(c+ed.itemGap))),n._effHeight=Math.min(n._height,n._maxHeight);var Le=e._context.edits,Ae=Le.legendText||Le.legendPosition;r.each(function(Fe){var Pe=Mp.select(this).select("."+a+"toggle"),ge=Fe[0].height,Re=Fe[0].trace.legendgroup,ce=BL(Fe,n,d);l&&Re!==""&&(ce=E[Re]);var Ze=Ae?d:g||ce;!s&&!u&&(Ze+=h/2),Dh.setRect(Pe,0,-ge/2,Ze,ge)})}function rut(e,t,r,n){var i=e._fullLayout,a=i[t],o=VL(a),s=GL(a),l=a.xref==="paper",u=a.yref==="paper";e._fullLayout._reservedMargin[t]={};var c=a.y<.5?"b":"t",f=a.x<.5?"l":"r",h={r:i.width-r,l:r+a._width,b:i.height-n,t:n+a._effHeight};if(l&&u)return OB.autoMargin(e,t,{x:a.x,y:a.y,l:a._width*V3[o],r:a._width*ble[o],b:a._effHeight*ble[s],t:a._effHeight*V3[s]});l?e._fullLayout._reservedMargin[t][c]=h[c]:u||a.orientation==="v"?e._fullLayout._reservedMargin[t][f]=h[f]:e._fullLayout._reservedMargin[t][c]=h[c]}function VL(e){return Rh.isRightAnchor(e)?"right":Rh.isCenterAnchor(e)?"center":"left"}function GL(e){return Rh.isBottomAnchor(e)?"bottom":Rh.isMiddleAnchor(e)?"middle":"top"}function HL(e){return e._id||"legend"}});var GB=ye(VB=>{"use strict";var Fb=Oa(),My=Eo(),Cle=cd(),zf=Dr(),iut=zf.pushUnique,NB=zf.strTranslate,nut=zf.strRotate,aut=_3(),T0=ru(),out=$se(),bm=So(),vd=ka(),jL=yv(),wm=ho(),sut=hd().zindexSeparator,j3=qa(),Mg=ip(),zb=zS(),lut=EB(),uut=BB(),Ole=zb.YANGLE,UB=Math.PI*Ole/180,cut=1/Math.sin(UB),fut=Math.cos(UB),hut=Math.sin(UB),Qc=zb.HOVERARROWSIZE,sl=zb.HOVERTEXTPAD,Lle={box:!0,ohlc:!0,violin:!0,candlestick:!0},dut={scatter:!0,scattergl:!0,splom:!0};function Ple(e,t){return e.distance-t.distance}VB.hover=function(t,r,n,i){t=zf.getGraphDiv(t);var a=r.target;zf.throttle(t._fullLayout._uid+zb.HOVERID,zb.HOVERMINTIME,function(){vut(t,r,n,i,a)})};VB.loneHover=function(t,r){var n=!0;Array.isArray(t)||(n=!1,t=[t]);var i=r.gd,a=Vle(i),o=Gle(i),s=t.map(function(b){var p=b._x0||b.x0||b.x||0,k=b._x1||b.x1||b.x||0,E=b._y0||b.y0||b.y||0,S=b._y1||b.y1||b.y||0,L=b.eventData;if(L){var x=Math.min(p,k),C=Math.max(p,k),M=Math.min(E,S),g=Math.max(E,S),P=b.trace;if(j3.traceIs(P,"gl3d")){var T=i._fullLayout[P.scene]._scene.container,z=T.offsetLeft,O=T.offsetTop;x+=z,C+=z,M+=O,g+=O}L.bbox={x0:x+o,x1:C+o,y0:M+a,y1:g+a},r.inOut_bbox&&r.inOut_bbox.push(L.bbox)}else L=!1;return{color:b.color||vd.defaultLine,x0:b.x0||b.x||0,x1:b.x1||b.x||0,y0:b.y0||b.y||0,y1:b.y1||b.y||0,xLabel:b.xLabel,yLabel:b.yLabel,zLabel:b.zLabel,text:b.text,name:b.name,idealAlign:b.idealAlign,borderColor:b.borderColor,fontFamily:b.fontFamily,fontSize:b.fontSize,fontColor:b.fontColor,fontWeight:b.fontWeight,fontStyle:b.fontStyle,fontVariant:b.fontVariant,nameLength:b.nameLength,textAlign:b.textAlign,trace:b.trace||{index:0,hoverinfo:""},xa:{_offset:0},ya:{_offset:0},index:0,hovertemplate:b.hovertemplate||!1,hovertemplateLabels:b.hovertemplateLabels||!1,eventData:L}}),l=!1,u=Ble(s,{gd:i,hovermode:"closest",rotateLabels:l,bgColor:r.bgColor||vd.background,container:Fb.select(r.container),outerContainer:r.outerContainer||r.container}),c=u.hoverLabels,f=5,h=0,d=0;c.sort(function(b,p){return b.y0-p.y0}).each(function(b,p){var k=b.y0-b.by/2;k-f<h?b.offset=h-k+f:b.offset=0,h=k+b.by+b.offset,p===r.anchorIndex&&(d=b.offset)}).each(function(b){b.offset-=d});var v=i._fullLayout._invScaleX,_=i._fullLayout._invScaleY;return Ule(c,l,v,_),n?c:c.node()};function vut(e,t,r,n,i){r||(r="xy"),typeof r=="string"&&(r=r.split(sut)[0]);var a=Array.isArray(r)?r:[r],o,s=e._fullLayout,l=s.hoversubplots,u=s._plots||[],c=u[r],f=s._has("cartesian"),h=t.hovermode||s.hovermode,d=(h||"").charAt(0)==="x",v=(h||"").charAt(0)==="y",_,b;if(f&&(d||v)&&l==="axis"){for(var p=a.length,k=0;k<p;k++)if(o=a[k],u[o]){_=wm.getFromId(e,o,"x"),b=wm.getFromId(e,o,"y");var E=(d?_:b)._subplotsWith;if(E&&E.length)for(var S=0;S<E.length;S++)iut(a,E[S])}}if(c&&l!=="single"){var L=c.overlays.map(function(wi){return wi.id});a=a.concat(L)}for(var x=a.length,C=new Array(x),M=new Array(x),g=!1,P=0;P<x;P++)if(o=a[P],u[o])g=!0,C[P]=u[o].xaxis,M[P]=u[o].yaxis;else if(s[o]&&s[o]._subplot){var T=s[o]._subplot;C[P]=T.xaxis,M[P]=T.yaxis}else{zf.warn("Unrecognized subplot: "+o);return}if(h&&!g&&(h="closest"),["x","y","closest","x unified","y unified"].indexOf(h)===-1||!e.calcdata||e.querySelector(".zoombox")||e._dragging)return jL.unhoverRaw(e,t);var z=s.hoverdistance;z===-1&&(z=1/0);var O=s.spikedistance;O===-1&&(O=1/0);var V=[],G=[],Z,j,N,H,te,oe,_e,Ee,Ce,me,ie,Se,Le,Ae={hLinePoint:null,vLinePoint:null},Fe=!1;if(Array.isArray(t))for(h="array",N=0;N<t.length;N++)te=e.calcdata[t[N].curveNumber||0],te&&(oe=te[0].trace,te[0].trace.hoverinfo!=="skip"&&(G.push(te),oe.orientation==="h"&&(Fe=!0)));else{var Pe=e.calcdata.slice();for(Pe.sort(function(wi,On){var qn=wi[0].trace.zorder||0,Fn=On[0].trace.zorder||0;return qn-Fn}),H=0;H<Pe.length;H++)te=Pe[H],oe=te[0].trace,oe.hoverinfo!=="skip"&&Mg.isTraceInSubplots(oe,a)&&(G.push(te),oe.orientation==="h"&&(Fe=!0));var ge=!i,Re,ce;if(ge)"xpx"in t?Re=t.xpx:Re=C[0]._length/2,"ypx"in t?ce=t.ypx:ce=M[0]._length/2;else{if(aut.triggerHandler(e,"plotly_beforehover",t)===!1)return;var Ze=i.getBoundingClientRect();Re=t.clientX-Ze.left,ce=t.clientY-Ze.top,s._calcInverseTransform(e);var ut=zf.apply3DTransform(s._invTransform)(Re,ce);if(Re=ut[0],ce=ut[1],Re<0||Re>C[0]._length||ce<0||ce>M[0]._length)return jL.unhoverRaw(e,t)}if(t.pointerX=Re+C[0]._offset,t.pointerY=ce+M[0]._offset,"xval"in t?Z=Mg.flat(a,t.xval):Z=Mg.p2c(C,Re),"yval"in t?j=Mg.flat(a,t.yval):j=Mg.p2c(M,ce),!My(Z[0])||!My(j[0]))return zf.warn("Fx.hover failed",t,e),jL.unhoverRaw(e,t)}var pt=1/0;function Zt(wi,On){for(H=0;H<G.length;H++)if(te=G[H],!(!te||!te[0]||!te[0].trace)&&(oe=te[0].trace,!(oe.visible!==!0||oe._length===0)&&["carpet","contourcarpet"].indexOf(oe._module.name)===-1)){if(Ce=h,Mg.isUnifiedHover(Ce)&&(Ce=Ce.charAt(0)),oe.type==="splom"?(Ee=0,_e=a[Ee]):(_e=Mg.getSubplot(oe),Ee=a.indexOf(_e)),Se={cd:te,trace:oe,xa:C[Ee],ya:M[Ee],maxHoverDistance:z,maxSpikeDistance:O,index:!1,distance:Math.min(pt,z),spikeDistance:1/0,xSpike:void 0,ySpike:void 0,color:vd.defaultLine,name:oe.name,x0:void 0,x1:void 0,y0:void 0,y1:void 0,xLabelVal:void 0,yLabelVal:void 0,zLabelVal:void 0,text:void 0},s[_e]&&(Se.subplot=s[_e]._subplot),s._splomScenes&&s._splomScenes[oe.uid]&&(Se.scene=s._splomScenes[oe.uid]),Ce==="array"){var qn=t[H];"pointNumber"in qn?(Se.index=qn.pointNumber,Ce="closest"):(Ce="","xval"in qn&&(me=qn.xval,Ce="x"),"yval"in qn&&(ie=qn.yval,Ce=Ce?"closest":"y"))}else wi!==void 0&&On!==void 0?(me=wi,ie=On):(me=Z[Ee],ie=j[Ee]);if(Le=V.length,z!==0)if(oe._module&&oe._module.hoverPoints){var Fn=oe._module.hoverPoints(Se,me,ie,Ce,{finiteRange:!0,hoverLayer:s._hoverlayer,hoversubplots:l,gd:e});if(Fn)for(var ra,la=0;la<Fn.length;la++)ra=Fn[la],My(ra.x0)&&My(ra.y0)&&V.push(yut(ra,h))}else zf.log("Unrecognized trace type in hover:",oe);if(h==="closest"&&V.length>Le&&(V.splice(0,Le),pt=V[0].distance),f&&O!==0&&V.length===0){Se.distance=O,Se.index=!1;var Ut=oe._module.hoverPoints(Se,me,ie,"closest",{hoverLayer:s._hoverlayer});if(Ut&&(Ut=Ut.filter(function(ri){return ri.spikeDistance<=O})),Ut&&Ut.length){var wt,rr=Ut.filter(function(ri){return ri.xa.showspikes&&ri.xa.spikesnap!=="hovered data"});if(rr.length){var nr=rr[0];My(nr.x0)&&My(nr.y0)&&(wt=lt(nr),(!Ae.vLinePoint||Ae.vLinePoint.spikeDistance>wt.spikeDistance)&&(Ae.vLinePoint=wt))}var Er=Ut.filter(function(ri){return ri.ya.showspikes&&ri.ya.spikesnap!=="hovered data"});if(Er.length){var Xr=Er[0];My(Xr.x0)&&My(Xr.y0)&&(wt=lt(Xr),(!Ae.hLinePoint||Ae.hLinePoint.spikeDistance>wt.spikeDistance)&&(Ae.hLinePoint=wt))}}}}}Zt();function st(wi,On,qn){for(var Fn=null,ra=1/0,la,Ut=0;Ut<wi.length;Ut++)_&&_._id!==wi[Ut].xa._id||b&&b._id!==wi[Ut].ya._id||(la=wi[Ut].spikeDistance,qn&&Ut===0&&(la=-1/0),la<=ra&&la<=On&&(Fn=wi[Ut],ra=la));return Fn}function lt(wi){return wi?{xa:wi.xa,ya:wi.ya,x:wi.xSpike!==void 0?wi.xSpike:(wi.x0+wi.x1)/2,y:wi.ySpike!==void 0?wi.ySpike:(wi.y0+wi.y1)/2,distance:wi.distance,spikeDistance:wi.spikeDistance,curveNumber:wi.trace.index,color:wi.color,pointNumber:wi.index}:null}var Gt={fullLayout:s,container:s._hoverlayer,event:t},Nt=e._spikepoints,Jt={vLinePoint:Ae.vLinePoint,hLinePoint:Ae.hLinePoint};e._spikepoints=Jt;var sr=function(){var wi=V.filter(function(qn){return _&&_._id===qn.xa._id&&b&&b._id===qn.ya._id}),On=V.filter(function(qn){return!(_&&_._id===qn.xa._id&&b&&b._id===qn.ya._id)});wi.sort(Ple),On.sort(Ple),V=wi.concat(On),V=xut(V,h)};sr();var wr=h.charAt(0),cr=(wr==="x"||wr==="y")&&V[0]&&dut[V[0].trace.type];if(f&&O!==0&&V.length!==0){var $e=V.filter(function(wi){return wi.ya.showspikes}),St=st($e,O,cr);Ae.hLinePoint=lt(St);var Qt=V.filter(function(wi){return wi.xa.showspikes}),Vt=st(Qt,O,cr);Ae.vLinePoint=lt(Vt)}if(V.length===0){var _t=jL.unhoverRaw(e,t);return f&&(Ae.hLinePoint!==null||Ae.vLinePoint!==null)&&Dle(Nt)&&Rle(e,Ae,Gt),_t}if(f&&Dle(Nt)&&Rle(e,Ae,Gt),Mg.isXYhover(Ce)&&V[0].length!==0&&V[0].trace.type!=="splom"){var It=V[0];Lle[It.trace.type]?V=V.filter(wi=>wi.trace.index===It.trace.index):V=[It];var mt=V.length,er=zle("x",It,s),lr=zle("y",It,s);Zt(er,lr);var Tr=[],Lr={},ti=0,Br=function(wi){var On=Lle[wi.trace.type]?qle(wi):wi.trace.index;if(!Lr[On])ti++,Lr[On]=ti,Tr.push(wi);else{var qn=Lr[On]-1,Fn=Tr[qn];qn>0&&Math.abs(wi.distance)<Math.abs(Fn.distance)&&(Tr[qn]=wi)}},Vr;for(Vr=0;Vr<mt;Vr++)Br(V[Vr]);for(Vr=V.length-1;Vr>mt-1;Vr--)Br(V[Vr]);V=Tr,sr()}var dt=e._hoverdata,Ge=[],Je=Vle(e),je=Gle(e);for(let wi of V){var tt=Mg.makeEventData(wi,wi.trace,wi.cd);if(wi.hovertemplate!==!1){var xt=!1;wi.cd[wi.index]&&wi.cd[wi.index].ht&&(xt=wi.cd[wi.index].ht),wi.hovertemplate=xt||wi.trace.hovertemplate||!1}if(wi.xa&&wi.ya){var Ie=wi.x0+wi.xa._offset,xe=wi.x1+wi.xa._offset,ke=wi.y0+wi.ya._offset,vt=wi.y1+wi.ya._offset,ir=Math.min(Ie,xe),ar=Math.max(Ie,xe),vr=Math.min(ke,vt),ii=Math.max(ke,vt);tt.bbox={x0:ir+je,x1:ar+je,y0:vr+Je,y1:ii+Je}}wi.eventData=[tt],Ge.push(tt)}e._hoverdata=Ge;var pi=h==="y"&&(G.length>1||V.length>1)||h==="closest"&&Fe&&V.length>1,$r=vd.combine(s.plot_bgcolor||vd.background,s.paper_bgcolor),di=Ble(V,{gd:e,hovermode:h,rotateLabels:pi,bgColor:$r,container:s._hoverlayer,outerContainer:s._paper.node(),commonLabelOpts:s.hoverlabel,hoverdistance:s.hoverdistance}),ji=di.hoverLabels;if(Mg.isUnifiedHover(h)||(gut(ji,pi,s,di.commonLabelBoundingBox),Ule(ji,pi,s._invScaleX,s._invScaleY)),i&&i.tagName){var In=j3.getComponentMethod("annotations","hasClickToShow")(e,Ge);out(Fb.select(i),In?"pointer":"")}!i||n||!_ut(e,t,dt)||(dt&&e.emit("plotly_unhover",{event:t,points:dt}),e.emit("plotly_hover",{event:t,points:e._hoverdata,xaxes:C,yaxes:M,xvals:Z,yvals:j}))}function qle(e){return[e.trace.index,e.index,e.x0,e.y0,e.name,e.attr,e.xa?e.xa._id:"",e.ya?e.ya._id:""].join(",")}var put=/<extra>([\s\S]*)<\/extra>/;function Ble(e,t){var r=t.gd,n=r._fullLayout,i=t.hovermode,a=t.rotateLabels,o=t.bgColor,s=t.container,l=t.outerContainer,u=t.commonLabelOpts||{};if(e.length===0)return[[]];var c=t.fontFamily||zb.HOVERFONT,f=t.fontSize||zb.HOVERFONTSIZE,h=t.fontWeight||n.font.weight,d=t.fontStyle||n.font.style,v=t.fontVariant||n.font.variant,_=t.fontTextcase||n.font.textcase,b=t.fontLineposition||n.font.lineposition,p=t.fontShadow||n.font.shadow,k=e[0],E=k.xa,S=k.ya,L=i.charAt(0),x=L+"Label",C=k[x];if(C===void 0&&E.type==="multicategory")for(var M=0;M<e.length&&(C=e[M][x],C===void 0);M++);var g=H3(r,l),P=g.top,T=g.width,z=g.height,O=C!==void 0&&k.distance<=t.hoverdistance&&(i==="x"||i==="y");if(O){var V=!0,G,Z;for(G=0;G<e.length;G++)if(V&&e[G].zLabel===void 0&&(V=!1),Z=e[G].hoverinfo||e[G].trace.hoverinfo,Z){var j=Array.isArray(Z)?Z:Z.split("+");if(j.indexOf("all")===-1&&j.indexOf(i)===-1){O=!1;break}}V&&(O=!1)}var N=s.selectAll("g.axistext").data(O?[0]:[]);N.enter().append("g").classed("axistext",!0),N.exit().remove();var H={minX:0,maxX:0,minY:0,maxY:0};if(N.each(function(){var _t=Fb.select(this),It=zf.ensureSingle(_t,"path","",function(ii){ii.style({"stroke-width":"1px"})}),mt=zf.ensureSingle(_t,"text","",function(ii){ii.attr("data-notex",1)}),er=u.bgcolor||vd.defaultLine,lr=u.bordercolor||vd.contrast(er),Tr=vd.contrast(er),Lr=u.font,ti={weight:Lr.weight||h,style:Lr.style||d,variant:Lr.variant||v,textcase:Lr.textcase||_,lineposition:Lr.lineposition||b,shadow:Lr.shadow||p,family:Lr.family||c,size:Lr.size||f,color:Lr.color||Tr};It.style({fill:er,stroke:lr}),mt.text(C).call(bm.font,ti).call(T0.positionText,0,0).call(T0.convertToTspans,r),_t.attr("transform","");var Br=H3(r,mt.node()),Vr,dt;if(i==="x"){var Ge=E.side==="top"?"-":"";mt.attr("text-anchor","middle").call(T0.positionText,0,E.side==="top"?P-Br.bottom-Qc-sl:P-Br.top+Qc+sl),Vr=E._offset+(k.x0+k.x1)/2,dt=S._offset+(E.side==="top"?0:S._length);var Je=Br.width/2+sl,je=Vr;Vr<Je?je=Je:Vr>n.width-Je&&(je=n.width-Je),It.attr("d","M"+(Vr-je)+",0L"+(Vr-je+Qc)+","+Ge+Qc+"H"+Je+"v"+Ge+(sl*2+Br.height)+"H"+-Je+"V"+Ge+Qc+"H"+(Vr-je-Qc)+"Z"),Vr=je,H.minX=Vr-Je,H.maxX=Vr+Je,E.side==="top"?(H.minY=dt-(sl*2+Br.height),H.maxY=dt-sl):(H.minY=dt+sl,H.maxY=dt+(sl*2+Br.height))}else{var tt,xt,Ie;S.side==="right"?(tt="start",xt=1,Ie="",Vr=E._offset+E._length):(tt="end",xt=-1,Ie="-",Vr=E._offset),dt=S._offset+(k.y0+k.y1)/2,mt.attr("text-anchor",tt),It.attr("d","M0,0L"+Ie+Qc+","+Qc+"V"+(sl+Br.height/2)+"h"+Ie+(sl*2+Br.width)+"V-"+(sl+Br.height/2)+"H"+Ie+Qc+"V-"+Qc+"Z"),H.minY=dt-(sl+Br.height/2),H.maxY=dt+(sl+Br.height/2),S.side==="right"?(H.minX=Vr+Qc,H.maxX=Vr+Qc+(sl*2+Br.width)):(H.minX=Vr-Qc-(sl*2+Br.width),H.maxX=Vr-Qc);var xe=Br.height/2,ke=P-Br.top-xe,vt="clip"+n._uid+"commonlabel"+S._id,ir;if(Vr<Br.width+2*sl+Qc){ir="M-"+(Qc+sl)+"-"+xe+"h-"+(Br.width-sl)+"V"+xe+"h"+(Br.width-sl)+"Z";var ar=Br.width-Vr+sl;T0.positionText(mt,ar,ke),tt==="end"&&mt.selectAll("tspan").each(function(){var ii=Fb.select(this),pi=bm.tester.append("text").text(ii.text()).call(bm.font,ti),$r=H3(r,pi.node());Math.round($r.width)<Math.round(Br.width)&&ii.attr("x",ar-$r.width),pi.remove()})}else T0.positionText(mt,xt*(sl+Qc),ke),ir=null;var vr=n._topclips.selectAll("#"+vt).data(ir?[0]:[]);vr.enter().append("clipPath").attr("id",vt).append("path"),vr.exit().remove(),vr.select("path").attr("d",ir),bm.setClipUrl(mt,ir?vt:null,r)}_t.attr("transform",NB(Vr,dt))}),Mg.isUnifiedHover(i)){s.selectAll("g.hovertext").remove();let _t=e.filter(It=>It.hoverinfo!=="none");if(_t.length===0)return[];var te=n.hoverlabel,oe=te.font,_e=_t[0],Ee=((i==="x unified"?_e.xa:_e.ya).unifiedhovertitle||{}).text,Ce=Ee?zf.hovertemplateString({data:i==="x unified"?[{xa:_e.xa,x:_e.xVal}]:[{ya:_e.ya,y:_e.yVal}],fallback:_e.trace.hovertemplatefallback,locale:n._d3locale,template:Ee}):C,me={showlegend:!0,legend:{title:{text:Ce,font:oe},font:oe,bgcolor:te.bgcolor,bordercolor:te.bordercolor,borderwidth:1,tracegroupgap:7,traceorder:n.legend?n.legend.traceorder:void 0,orientation:"v"}},ie={font:oe};lut(me,ie,r._fullData);var Se=ie.legend;Se.entries=[];for(var Le=0;Le<_t.length;Le++){var Ae=_t[Le];if(Ae.hoverinfo!=="none"){var Fe=Ile(Ae,!0,i,n,C),Pe=Fe[0],ge=Fe[1];Ae.name=ge,ge!==""?Ae.text=ge+" : "+Pe:Ae.text=Pe;var Re=Ae.cd[Ae.index];Re&&(Re.mc&&(Ae.mc=Re.mc),Re.mcc&&(Ae.mc=Re.mcc),Re.mlc&&(Ae.mlc=Re.mlc),Re.mlcc&&(Ae.mlc=Re.mlcc),Re.mlw&&(Ae.mlw=Re.mlw),Re.mrc&&(Ae.mrc=Re.mrc),Re.dir&&(Ae.dir=Re.dir)),Ae._distinct=!0,Se.entries.push([Ae])}}Se.entries.sort(function(It,mt){return It[0].trace.index-mt[0].trace.index}),Se.layer=s,Se._inHover=!0,Se._groupTitleFont=te.grouptitlefont,uut(r,Se);var ce=s.select("g.legend"),Ze=H3(r,ce.node()),ut=Ze.width+2*sl,pt=Ze.height+2*sl,Zt=_t[0],st=(Zt.x0+Zt.x1)/2,lt=(Zt.y0+Zt.y1)/2,Gt=!(j3.traceIs(Zt.trace,"bar-like")||j3.traceIs(Zt.trace,"box-violin")),Nt,Jt;L==="y"?Gt?(Jt=lt-sl,Nt=lt+sl):(Jt=Math.min.apply(null,_t.map(function(It){return Math.min(It.y0,It.y1)})),Nt=Math.max.apply(null,_t.map(function(It){return Math.max(It.y0,It.y1)}))):Jt=Nt=zf.mean(_t.map(function(It){return(It.y0+It.y1)/2}))-pt/2;var sr,wr;L==="x"?Gt?(sr=st+sl,wr=st-sl):(sr=Math.max.apply(null,_t.map(function(It){return Math.max(It.x0,It.x1)})),wr=Math.min.apply(null,_t.map(function(It){return Math.min(It.x0,It.x1)}))):sr=wr=zf.mean(_t.map(function(It){return(It.x0+It.x1)/2}))-ut/2;var cr=E._offset,$e=S._offset;Nt+=$e,sr+=cr,wr+=cr-ut,Jt+=$e-pt;var St,Qt;return sr+ut<T&&sr>=0?St=sr:wr+ut<T&&wr>=0?St=wr:cr+ut<T?St=cr:sr-st<st-wr+ut?St=T-ut:St=0,St+=sl,Nt+pt<z&&Nt>=0?Qt=Nt:Jt+pt<z&&Jt>=0?Qt=Jt:$e+pt<z?Qt=$e:Nt-lt<lt-Jt+pt?Qt=z-pt:Qt=0,Qt+=sl,ce.attr("transform",NB(St-1,Qt-1)),ce}var Vt=s.selectAll("g.hovertext").data(e,function(_t){return qle(_t)});return Vt.enter().append("g").classed("hovertext",!0).each(function(){var _t=Fb.select(this);_t.append("rect").call(vd.fill,vd.addOpacity(o,.8)),_t.append("text").classed("name",!0),_t.append("path").style("stroke-width","1px"),_t.append("text").classed("nums",!0).call(bm.font,{weight:h,style:d,variant:v,textcase:_,lineposition:b,shadow:p,family:c,size:f})}),Vt.exit().remove(),Vt.each(function(_t){var It=Fb.select(this).attr("transform",""),mt=_t.color;Array.isArray(mt)&&(mt=mt[_t.eventData[0].pointNumber]);var er=_t.bgcolor||mt,lr=vd.combine(vd.opacity(er)?er:vd.defaultLine,o),Tr=vd.combine(vd.opacity(mt)?mt:vd.defaultLine,o),Lr=_t.borderColor||vd.contrast(lr),ti=Ile(_t,O,i,n,C,It),Br=ti[0],Vr=ti[1],dt=It.select("text.nums").call(bm.font,{family:_t.fontFamily||c,size:_t.fontSize||f,color:_t.fontColor||Lr,weight:_t.fontWeight||h,style:_t.fontStyle||d,variant:_t.fontVariant||v,textcase:_t.fontTextcase||_,lineposition:_t.fontLineposition||b,shadow:_t.fontShadow||p}).text(Br).attr("data-notex",1).call(T0.positionText,0,0).call(T0.convertToTspans,r),Ge=It.select("text.name"),Je=0,je=0;if(Vr&&Vr!==Br){Ge.call(bm.font,{family:_t.fontFamily||c,size:_t.fontSize||f,color:Tr,weight:_t.fontWeight||h,style:_t.fontStyle||d,variant:_t.fontVariant||v,textcase:_t.fontTextcase||_,lineposition:_t.fontLineposition||b,shadow:_t.fontShadow||p}).text(Vr).attr("data-notex",1).call(T0.positionText,0,0).call(T0.convertToTspans,r);var tt=H3(r,Ge.node());Je=tt.width+2*sl,je=tt.height+2*sl}else Ge.remove(),It.select("rect").remove();It.select("path").style({fill:lr,stroke:Lr});var xt=_t.xa._offset+(_t.x0+_t.x1)/2,Ie=_t.ya._offset+(_t.y0+_t.y1)/2,xe=Math.abs(_t.x1-_t.x0),ke=Math.abs(_t.y1-_t.y0),vt=H3(r,dt.node()),ir=vt.width/n._invScaleX,ar=vt.height/n._invScaleY;_t.ty0=(P-vt.top)/n._invScaleY,_t.bx=ir+2*sl,_t.by=Math.max(ar+2*sl,je),_t.anchor="start",_t.txwidth=ir,_t.tx2width=Je,_t.offset=0;var vr=(ir+Qc+sl+Je)*n._invScaleX,ii,pi;if(a)_t.pos=xt,ii=Ie+ke/2+vr<=z,pi=Ie-ke/2-vr>=0,(_t.idealAlign==="top"||!ii)&&pi?(Ie-=ke/2,_t.anchor="end"):ii?(Ie+=ke/2,_t.anchor="start"):_t.anchor="middle",_t.crossPos=Ie;else{if(_t.pos=Ie,ii=xt+xe/2+vr<=T,pi=xt-xe/2-vr>=0,(_t.idealAlign==="left"||!ii)&&pi)xt-=xe/2,_t.anchor="end";else if(ii)xt+=xe/2,_t.anchor="start";else{_t.anchor="middle";var $r=vr/2,di=xt+$r-T,ji=xt-$r;di>0&&(xt-=di),ji<0&&(xt+=-ji)}_t.crossPos=xt}dt.attr("text-anchor",_t.anchor),Je&&Ge.attr("text-anchor",_t.anchor),It.attr("transform",NB(xt,Ie)+(a?nut(Ole):""))}),{hoverLabels:Vt,commonLabelBoundingBox:H}}function Ile(e,t,r,n,i,a){var f,h;var o="",s="";e.nameOverride!==void 0&&(e.name=e.nameOverride),e.name&&(e.trace._meta&&(e.name=zf.templateString(e.name,e.trace._meta)),o=Fle(e.name,e.nameLength));var l=r.charAt(0),u=l==="x"?"y":"x";e.zLabel!==void 0?(e.xLabel!==void 0&&(s+="x: "+e.xLabel+"<br>"),e.yLabel!==void 0&&(s+="y: "+e.yLabel+"<br>"),e.trace.type!=="choropleth"&&e.trace.type!=="choroplethmapbox"&&e.trace.type!=="choroplethmap"&&(s+=(s?"z: ":"")+e.zLabel)):t&&e[l+"Label"]===i?s=e[u+"Label"]||"":e.xLabel===void 0?e.yLabel!==void 0&&e.trace.type!=="scattercarpet"&&(s=e.yLabel):e.yLabel===void 0?s=e.xLabel:s="("+e.xLabel+", "+e.yLabel+")",(e.text||e.text===0)&&!Array.isArray(e.text)&&(s+=(s?"<br>":"")+e.text),e.extraText!==void 0&&(s+=(s?"<br>":"")+e.extraText),a&&s===""&&!e.hovertemplate&&(o===""&&a.remove(),s=o),(h=(f=e.trace)==null?void 0:f.hoverlabel)!=null&&h.split&&(e.hovertemplate="");let{hovertemplate:c=!1}=e;if(c){let d=e.hovertemplateLabels||e;e[l+"Label"]!==i&&(d[l+"other"]=d[l+"Val"],d[l+"otherLabel"]=d[l+"Label"]),s=zf.hovertemplateString({data:[e.eventData[0]||{},e.trace._meta],fallback:e.trace.hovertemplatefallback,labels:d,locale:n._d3locale,template:c}),s=s.replace(put,(v,_)=>(o=Fle(_,e.nameLength),""))}return[s,o]}function gut(e,t,r,n){var i=t?"xa":"ya",a=t?"ya":"xa",o=0,s=1,l=e.size(),u=new Array(l),c=0,f=n.minX,h=n.maxX,d=n.minY,v=n.maxY,_=function(Z){return Z*r._invScaleX},b=function(Z){return Z*r._invScaleY};e.each(function(Z){var j=Z[i],N=Z[a],H=j._id.charAt(0)==="x",te=j.range;c===0&&te&&te[0]>te[1]!==H&&(s=-1);var oe=0,_e=H?r.width:r.height;if(r.hovermode==="x"||r.hovermode==="y"){var Ee=Nle(Z,t),Ce=Z.anchor,me=Ce==="end"?-1:1,ie,Se;if(Ce==="middle")ie=Z.crossPos+(H?b(Ee.y-Z.by/2):_(Z.bx/2+Z.tx2width/2)),Se=ie+(H?b(Z.by):_(Z.bx));else if(H)ie=Z.crossPos+b(Qc+Ee.y)-b(Z.by/2-Qc),Se=ie+b(Z.by);else{var Le=_(me*Qc+Ee.x),Ae=Le+_(me*Z.bx);ie=Z.crossPos+Math.min(Le,Ae),Se=Z.crossPos+Math.max(Le,Ae)}H?d!==void 0&&v!==void 0&&Math.min(Se,v)-Math.max(ie,d)>1&&(N.side==="left"?(oe=N._mainLinePosition,_e=r.width):_e=N._mainLinePosition):f!==void 0&&h!==void 0&&Math.min(Se,h)-Math.max(ie,f)>1&&(N.side==="top"?(oe=N._mainLinePosition,_e=r.height):_e=N._mainLinePosition)}u[c++]=[{datum:Z,traceIndex:Z.trace.index,dp:0,pos:Z.pos,posref:Z.posref,size:Z.by*(H?cut:1)/2,pmin:oe,pmax:_e}]}),u.sort(function(Z,j){return Z[0].posref-j[0].posref||s*(j[0].traceIndex-Z[0].traceIndex)});var p,k,E,S,L,x,C;function M(Z){var j=Z[0],N=Z[Z.length-1];if(k=j.pmin-j.pos-j.dp+j.size,E=N.pos+N.dp+N.size-j.pmax,k>.01){for(L=Z.length-1;L>=0;L--)Z[L].dp+=k;p=!1}if(!(E<.01)){if(k<-.01){for(L=Z.length-1;L>=0;L--)Z[L].dp-=E;p=!1}if(p){var H=0;for(S=0;S<Z.length;S++)x=Z[S],x.pos+x.dp+x.size>j.pmax&&H++;for(S=Z.length-1;S>=0&&!(H<=0);S--)x=Z[S],x.pos>j.pmax-1&&(x.del=!0,H--);for(S=0;S<Z.length&&!(H<=0);S++)if(x=Z[S],x.pos<j.pmin+1)for(x.del=!0,H--,E=x.size*2,L=Z.length-1;L>=0;L--)Z[L].dp-=E;for(S=Z.length-1;S>=0&&!(H<=0);S--)x=Z[S],x.pos+x.dp+x.size>j.pmax&&(x.del=!0,H--)}}}for(;!p&&o<=l;){for(o++,p=!0,S=0;S<u.length-1;){var g=u[S],P=u[S+1],T=g[g.length-1],z=P[0];if(k=T.pos+T.dp+T.size-z.pos-z.dp+z.size,k>.01){for(L=P.length-1;L>=0;L--)P[L].dp+=k;for(g.push.apply(g,P),u.splice(S+1,1),C=0,L=g.length-1;L>=0;L--)C+=g[L].dp;for(E=C/g.length,L=g.length-1;L>=0;L--)g[L].dp-=E;p=!1}else S++}u.forEach(M)}for(S=u.length-1;S>=0;S--){var O=u[S];for(L=O.length-1;L>=0;L--){var V=O[L],G=V.datum;G.offset=V.dp,G.del=V.del}}}function Nle(e,t){var r=0,n=e.offset;return t&&(n*=-hut,r=e.offset*fut),{x:r,y:n}}function mut(e){var t={start:1,end:-1,middle:0}[e.anchor],r=t*(Qc+sl),n=r+t*(e.txwidth+sl),i=e.anchor==="middle";return i&&(r-=e.tx2width/2,n+=e.txwidth/2+sl),{alignShift:t,textShiftX:r,text2ShiftX:n}}function Ule(e,t,r,n){var i=function(o){return o*r},a=function(o){return o*n};e.each(function(o){var s=Fb.select(this);if(o.del)return s.remove();var l=s.select("text.nums"),u=o.anchor,c=u==="end"?-1:1,f=mut(o),h=Nle(o,t),d=h.x,v=h.y,_=u==="middle",b="hoverlabel"in o.trace?o.trace.hoverlabel.showarrow:!0,p;_?p="M-"+i(o.bx/2+o.tx2width/2)+","+a(v-o.by/2)+"h"+i(o.bx)+"v"+a(o.by)+"h-"+i(o.bx)+"Z":b?p="M0,0L"+i(c*Qc+d)+","+a(Qc+v)+"v"+a(o.by/2-Qc)+"h"+i(c*o.bx)+"v-"+a(o.by)+"H"+i(c*Qc+d)+"V"+a(v-Qc)+"Z":p="M"+i(c*Qc+d)+","+a(v-o.by/2)+"h"+i(c*o.bx)+"v"+a(o.by)+"h"+i(-c*o.bx)+"Z",s.select("path").attr("d",p);var k=d+f.textShiftX,E=v+o.ty0-o.by/2+sl,S=o.textAlign||"auto";S!=="auto"&&(S==="left"&&u!=="start"?(l.attr("text-anchor","start"),k=_?-o.bx/2-o.tx2width/2+sl:-o.bx-sl):S==="right"&&u!=="end"&&(l.attr("text-anchor","end"),k=_?o.bx/2-o.tx2width/2-sl:o.bx+sl)),l.call(T0.positionText,i(k),a(E)),o.tx2width&&(s.select("text.name").call(T0.positionText,i(f.text2ShiftX+f.alignShift*sl+d),a(v+o.ty0-o.by/2+sl)),s.select("rect").call(bm.setRect,i(f.text2ShiftX+(f.alignShift-1)*o.tx2width/2+d),a(v-o.by/2-1),i(o.tx2width),a(o.by+2)))})}function yut(e,t){var r=e.index,n=e.trace||{},i=e.cd[0],a=e.cd[r]||{};function o(h){return h||My(h)&&h===0}var s=Array.isArray(r)?function(h,d){var v=zf.castOption(i,r,h);return o(v)?v:zf.extractOption({},n,"",d)}:function(h,d){return zf.extractOption(a,n,h,d)};function l(h,d,v){var _=s(d,v);o(_)&&(e[h]=_)}if(l("hoverinfo","hi","hoverinfo"),l("bgcolor","hbg","hoverlabel.bgcolor"),l("borderColor","hbc","hoverlabel.bordercolor"),l("fontFamily","htf","hoverlabel.font.family"),l("fontSize","hts","hoverlabel.font.size"),l("fontColor","htc","hoverlabel.font.color"),l("fontWeight","htw","hoverlabel.font.weight"),l("fontStyle","hty","hoverlabel.font.style"),l("fontVariant","htv","hoverlabel.font.variant"),l("nameLength","hnl","hoverlabel.namelength"),l("textAlign","hta","hoverlabel.align"),e.posref=t==="y"||t==="closest"&&n.orientation==="h"?e.xa._offset+(e.x0+e.x1)/2:e.ya._offset+(e.y0+e.y1)/2,e.x0=zf.constrain(e.x0,0,e.xa._length),e.x1=zf.constrain(e.x1,0,e.xa._length),e.y0=zf.constrain(e.y0,0,e.ya._length),e.y1=zf.constrain(e.y1,0,e.ya._length),e.xLabelVal!==void 0&&(e.xLabel="xLabel"in e?e.xLabel:wm.hoverLabelText(e.xa,e.xLabelVal,n.xhoverformat),e.xVal=e.xa.c2d(e.xLabelVal)),e.yLabelVal!==void 0&&(e.yLabel="yLabel"in e?e.yLabel:wm.hoverLabelText(e.ya,e.yLabelVal,n.yhoverformat),e.yVal=e.ya.c2d(e.yLabelVal)),e.zLabelVal!==void 0&&e.zLabel===void 0&&(e.zLabel=String(e.zLabelVal)),!isNaN(e.xerr)&&!(e.xa.type==="log"&&e.xerr<=0)){var u=wm.tickText(e.xa,e.xa.c2l(e.xerr),"hover").text;e.xerrneg!==void 0?e.xLabel+=" +"+u+" / -"+wm.tickText(e.xa,e.xa.c2l(e.xerrneg),"hover").text:e.xLabel+=" \xB1 "+u,t==="x"&&(e.distance+=1)}if(!isNaN(e.yerr)&&!(e.ya.type==="log"&&e.yerr<=0)){var c=wm.tickText(e.ya,e.ya.c2l(e.yerr),"hover").text;e.yerrneg!==void 0?e.yLabel+=" +"+c+" / -"+wm.tickText(e.ya,e.ya.c2l(e.yerrneg),"hover").text:e.yLabel+=" \xB1 "+c,t==="y"&&(e.distance+=1)}var f=e.hoverinfo||e.trace.hoverinfo;return f&&f!=="all"&&(f=Array.isArray(f)?f:f.split("+"),f.indexOf("x")===-1&&(e.xLabel=void 0),f.indexOf("y")===-1&&(e.yLabel=void 0),f.indexOf("z")===-1&&(e.zLabel=void 0),f.indexOf("text")===-1&&(e.text=void 0),f.indexOf("name")===-1&&(e.name=void 0)),e}function Rle(e,t,r){var n=r.container,i=r.fullLayout,a=i._size,o=r.event,s=!!t.hLinePoint,l=!!t.vLinePoint,u,c;if(n.selectAll(".spikeline").remove(),!!(l||s)){var f=vd.combine(i.plot_bgcolor,i.paper_bgcolor);if(s){var h=t.hLinePoint,d,v;u=h&&h.xa,c=h&&h.ya;var _=c.spikesnap;_==="cursor"?(d=o.pointerX,v=o.pointerY):(d=u._offset+h.x,v=c._offset+h.y);var b=Cle.readability(h.color,f)<1.5?vd.contrast(f):h.color,p=c.spikemode,k=c.spikethickness,E=c.spikecolor||b,S=wm.getPxPosition(e,c),L,x;if(p.indexOf("toaxis")!==-1||p.indexOf("across")!==-1){if(p.indexOf("toaxis")!==-1&&(L=S,x=d),p.indexOf("across")!==-1){var C=c._counterDomainMin,M=c._counterDomainMax;c.anchor==="free"&&(C=Math.min(C,c.position),M=Math.max(M,c.position)),L=a.l+C*a.w,x=a.l+M*a.w}n.insert("line",":first-child").attr({x1:L,x2:x,y1:v,y2:v,"stroke-width":k,stroke:E,"stroke-dasharray":bm.dashStyle(c.spikedash,k)}).classed("spikeline",!0).classed("crisp",!0),n.insert("line",":first-child").attr({x1:L,x2:x,y1:v,y2:v,"stroke-width":k+2,stroke:f}).classed("spikeline",!0).classed("crisp",!0)}p.indexOf("marker")!==-1&&n.insert("circle",":first-child").attr({cx:S+(c.side!=="right"?k:-k),cy:v,r:k,fill:E}).classed("spikeline",!0)}if(l){var g=t.vLinePoint,P,T;u=g&&g.xa,c=g&&g.ya;var z=u.spikesnap;z==="cursor"?(P=o.pointerX,T=o.pointerY):(P=u._offset+g.x,T=c._offset+g.y);var O=Cle.readability(g.color,f)<1.5?vd.contrast(f):g.color,V=u.spikemode,G=u.spikethickness,Z=u.spikecolor||O,j=wm.getPxPosition(e,u),N,H;if(V.indexOf("toaxis")!==-1||V.indexOf("across")!==-1){if(V.indexOf("toaxis")!==-1&&(N=j,H=T),V.indexOf("across")!==-1){var te=u._counterDomainMin,oe=u._counterDomainMax;u.anchor==="free"&&(te=Math.min(te,u.position),oe=Math.max(oe,u.position)),N=a.t+(1-oe)*a.h,H=a.t+(1-te)*a.h}n.insert("line",":first-child").attr({x1:P,x2:P,y1:N,y2:H,"stroke-width":G,stroke:Z,"stroke-dasharray":bm.dashStyle(u.spikedash,G)}).classed("spikeline",!0).classed("crisp",!0),n.insert("line",":first-child").attr({x1:P,x2:P,y1:N,y2:H,"stroke-width":G+2,stroke:f}).classed("spikeline",!0).classed("crisp",!0)}V.indexOf("marker")!==-1&&n.insert("circle",":first-child").attr({cx:P,cy:j-(u.side!=="top"?G:-G),r:G,fill:Z}).classed("spikeline",!0)}}}function _ut(e,t,r){if(!r||r.length!==e._hoverdata.length)return!0;for(var n=r.length-1;n>=0;n--){var i=r[n],a=e._hoverdata[n];if(i.curveNumber!==a.curveNumber||String(i.pointNumber)!==String(a.pointNumber)||String(i.pointNumbers)!==String(a.pointNumbers)||i.binNumber!==a.binNumber)return!0}return!1}function Dle(e,t){return!t||t.vLinePoint!==e._spikepoints.vLinePoint||t.hLinePoint!==e._spikepoints.hLinePoint}function Fle(e,t){return T0.plainText(e||"",{len:t,allowedTags:["br","sub","sup","b","i","em","s","u"]})}function xut(e,t){for(var r=t.charAt(0),n=[],i=[],a=[],o=0;o<e.length;o++){var s=e[o];j3.traceIs(s.trace,"bar-like")||j3.traceIs(s.trace,"box-violin")?a.push(s):s.trace[r+"period"]?i.push(s):n.push(s)}return n.concat(i).concat(a)}function zle(e,t,r){var n=t[e+"a"],i=t[e+"Val"],a=t.cd[0];if(n.type==="category"||n.type==="multicategory")i=n._categoriesMap[i];else if(n.type==="date"){var o=t.trace[e+"periodalignment"];if(o){var s=t.cd[t.index],l=s[e+"Start"];l===void 0&&(l=s[e]);var u=s[e+"End"];u===void 0&&(u=s[e]);var c=u-l;o==="end"?i+=c:o==="middle"&&(i+=c/2)}i=n.d2c(i)}return a&&a.t&&a.t.posLetter===n._id&&(r.boxmode==="group"||r.violinmode==="group")&&(i+=a.t.dPos),i}var Vle=e=>e.offsetTop+e.clientTop,Gle=e=>e.offsetLeft+e.clientLeft;function H3(e,t){var r=e._fullLayout,n=t.getBoundingClientRect(),i=n.left,a=n.top,o=i+n.width,s=a+n.height,l=zf.apply3DTransform(r._invTransform)(i,a),u=zf.apply3DTransform(r._invTransform)(o,s),c=l[0],f=l[1],h=u[0],d=u[1];return{x:c,y:f,width:h-c,height:d-f,top:Math.min(f,d),left:Math.min(c,h),right:Math.max(c,h),bottom:Math.max(f,d)}}});var hM=ye((gar,Hle)=>{"use strict";var but=Dr(),wut=ka(),Tut=ip().isUnifiedHover;Hle.exports=function(t,r,n,i){i=i||{};var a=r.legend;function o(s){i.font[s]||(i.font[s]=a?r.legend.font[s]:r.font[s])}r&&Tut(r.hovermode)&&(i.font||(i.font={}),o("size"),o("family"),o("color"),o("weight"),o("style"),o("variant"),a?(i.bgcolor||(i.bgcolor=wut.combine(r.legend.bgcolor,r.paper_bgcolor)),i.bordercolor||(i.bordercolor=r.legend.bordercolor)):i.bgcolor||(i.bgcolor=r.paper_bgcolor)),n("hoverlabel.bgcolor",i.bgcolor),n("hoverlabel.bordercolor",i.bordercolor),n("hoverlabel.namelength",i.namelength),n("hoverlabel.showarrow",i.showarrow),but.coerceFont(n,"hoverlabel.font",i.font),n("hoverlabel.align",i.align)}});var Wle=ye((mar,jle)=>{"use strict";var Aut=Dr(),Sut=hM(),Mut=B1();jle.exports=function(t,r){function n(i,a){return Aut.coerce(t,r,Mut,i,a)}Sut(t,r,n)}});var Yle=ye((yar,Zle)=>{"use strict";var Xle=Dr(),Eut=a3(),kut=hM();Zle.exports=function(t,r,n,i){function a(s,l){return Xle.coerce(t,r,Eut,s,l)}var o=Xle.extendFlat({},i.hoverlabel);r.hovertemplate&&(o.namelength=-1),kut(t,r,a,o)}});var HB=ye((_ar,Kle)=>{"use strict";var Cut=Dr(),Lut=B1();Kle.exports=function(t,r){function n(i,a){return r[i]!==void 0?r[i]:Cut.coerce(t,r,Lut,i,a)}return n("clickmode"),n("hoversubplots"),n("hovermode")}});var Qle=ye((xar,$le)=>{"use strict";var Jle=Dr(),Put=B1(),Iut=HB(),Rut=hM();$le.exports=function(t,r){function n(c,f){return Jle.coerce(t,r,Put,c,f)}var i=Iut(t,r);i&&(n("hoverdistance"),n("spikedistance"));var a=n("dragmode");a==="select"&&n("selectdirection");var o=r._has("mapbox"),s=r._has("map"),l=r._has("geo"),u=r._basePlotModules.length;r.dragmode==="zoom"&&((o||s||l)&&u===1||(o||s)&&l&&u===2)&&(r.dragmode="pan"),Rut(t,r,n),Jle.coerceFont(n,"hoverlabel.grouptitlefont",r.hoverlabel.font)}});var rue=ye((bar,tue)=>{"use strict";var jB=Dr(),eue=qa();tue.exports=function(t){var r=t.calcdata,n=t._fullLayout;function i(u){return function(c){return jB.coerceHoverinfo({hoverinfo:c},{_module:u._module},n)}}for(var a=0;a<r.length;a++){var o=r[a],s=o[0].trace;if(!eue.traceIs(s,"pie-like")){var l=eue.traceIs(s,"2dMap")?Dut:jB.fillArray;l(s.hoverinfo,o,"hi",i(s)),s.hovertemplate&&l(s.hovertemplate,o,"ht"),s.hoverlabel&&(l(s.hoverlabel.bgcolor,o,"hbg"),l(s.hoverlabel.bordercolor,o,"hbc"),l(s.hoverlabel.font.size,o,"hts"),l(s.hoverlabel.font.color,o,"htc"),l(s.hoverlabel.font.family,o,"htf"),l(s.hoverlabel.font.weight,o,"htw"),l(s.hoverlabel.font.style,o,"hty"),l(s.hoverlabel.font.variant,o,"htv"),l(s.hoverlabel.namelength,o,"hnl"),l(s.hoverlabel.align,o,"hta"),l(s.hoverlabel.showarrow,o,"htsa"))}}};function Dut(e,t,r,n){n=n||jB.identity,Array.isArray(e)&&(t[0][r]=n(e))}});var nue=ye((war,iue)=>{"use strict";var Fut=qa(),zut=GB().hover;iue.exports=function(t,r,n){var i=Fut.getComponentMethod("annotations","onClick")(t,t._hoverdata);n!==void 0&&zut(t,r,n,!0);function a(){t.emit("plotly_click",{points:t._hoverdata,event:r})}t._hoverdata&&r&&r.target&&(i&&i.then?i.then(a):a(),r.stopImmediatePropagation&&r.stopImmediatePropagation())}});var vf=ye((Tar,sue)=>{"use strict";var Out=Oa(),WL=Dr(),qut=yv(),dM=ip(),aue=B1(),oue=GB();sue.exports={moduleType:"component",name:"fx",constants:zS(),schema:{layout:aue},attributes:a3(),layoutAttributes:aue,supplyLayoutGlobalDefaults:Wle(),supplyDefaults:Yle(),supplyLayoutDefaults:Qle(),calc:rue(),getDistanceFunction:dM.getDistanceFunction,getClosest:dM.getClosest,inbox:dM.inbox,quadrature:dM.quadrature,appendArrayPointValue:dM.appendArrayPointValue,castHoverOption:Nut,castHoverinfo:Uut,hover:oue.hover,unhover:qut.unhover,loneHover:oue.loneHover,loneUnhover:But,click:nue()};function But(e){var t=WL.isD3Selection(e)?e:Out.select(e);t.selectAll("g.hovertext").remove(),t.selectAll(".spikeline").remove()}function Nut(e,t,r){return WL.castOption(e,t,"hoverlabel."+r)}function Uut(e,t,r){function n(i){return WL.coerceHoverinfo({hoverinfo:i},{_module:e._module},t)}return WL.castOption(e,r,"hoverinfo",n)}});var Eg=ye(Ey=>{"use strict";Ey.selectMode=function(e){return e==="lasso"||e==="select"};Ey.drawMode=function(e){return e==="drawclosedpath"||e==="drawopenpath"||e==="drawline"||e==="drawrect"||e==="drawcircle"};Ey.openMode=function(e){return e==="drawline"||e==="drawopenpath"};Ey.rectMode=function(e){return e==="select"||e==="drawline"||e==="drawrect"||e==="drawcircle"};Ey.freeMode=function(e){return e==="lasso"||e==="drawclosedpath"||e==="drawopenpath"};Ey.selectingOrDrawing=function(e){return Ey.freeMode(e)||Ey.rectMode(e)}});var vM=ye((Sar,lue)=>{"use strict";lue.exports=function(t){var r=t._fullLayout;r._glcanvas&&r._glcanvas.size()&&r._glcanvas.each(function(n){n.regl&&n.regl.clear({color:!0,depth:!0})})}});var XL=ye((Mar,uue)=>{"use strict";uue.exports={undo:{width:857.1,height:1e3,path:"m857 350q0-87-34-166t-91-137-137-92-166-34q-96 0-183 41t-147 114q-4 6-4 13t5 11l76 77q6 5 14 5 9-1 13-7 41-53 100-82t126-29q58 0 110 23t92 61 61 91 22 111-22 111-61 91-92 61-110 23q-55 0-105-20t-90-57l77-77q17-16 8-38-10-23-33-23h-250q-15 0-25 11t-11 25v250q0 24 22 33 22 10 39-8l72-72q60 57 137 88t159 31q87 0 166-34t137-92 91-137 34-166z",transform:"matrix(1 0 0 -1 0 850)"},home:{width:928.6,height:1e3,path:"m786 296v-267q0-15-11-26t-25-10h-214v214h-143v-214h-214q-15 0-25 10t-11 26v267q0 1 0 2t0 2l321 264 321-264q1-1 1-4z m124 39l-34-41q-5-5-12-6h-2q-7 0-12 3l-386 322-386-322q-7-4-13-4-7 2-12 7l-35 41q-4 5-3 13t6 12l401 334q18 15 42 15t43-15l136-114v109q0 8 5 13t13 5h107q8 0 13-5t5-13v-227l122-102q5-5 6-12t-4-13z",transform:"matrix(1 0 0 -1 0 850)"},"camera-retro":{width:1e3,height:1e3,path:"m518 386q0 8-5 13t-13 5q-37 0-63-27t-26-63q0-8 5-13t13-5 12 5 5 13q0 23 16 38t38 16q8 0 13 5t5 13z m125-73q0-59-42-101t-101-42-101 42-42 101 42 101 101 42 101-42 42-101z m-572-320h858v71h-858v-71z m643 320q0 89-62 152t-152 62-151-62-63-152 63-151 151-63 152 63 62 151z m-571 358h214v72h-214v-72z m-72-107h858v143h-462l-36-71h-360v-72z m929 143v-714q0-30-21-51t-50-21h-858q-29 0-50 21t-21 51v714q0 30 21 51t50 21h858q29 0 50-21t21-51z",transform:"matrix(1 0 0 -1 0 850)"},zoombox:{width:1e3,height:1e3,path:"m1000-25l-250 251c40 63 63 138 63 218 0 224-182 406-407 406-224 0-406-182-406-406s183-406 407-406c80 0 155 22 218 62l250-250 125 125z m-812 250l0 438 437 0 0-438-437 0z m62 375l313 0 0-312-313 0 0 312z",transform:"matrix(1 0 0 -1 0 850)"},pan:{width:1e3,height:1e3,path:"m1000 350l-187 188 0-125-250 0 0 250 125 0-188 187-187-187 125 0 0-250-250 0 0 125-188-188 186-187 0 125 252 0 0-250-125 0 187-188 188 188-125 0 0 250 250 0 0-126 187 188z",transform:"matrix(1 0 0 -1 0 850)"},zoom_plus:{width:875,height:1e3,path:"m1 787l0-875 875 0 0 875-875 0z m687-500l-187 0 0-187-125 0 0 187-188 0 0 125 188 0 0 187 125 0 0-187 187 0 0-125z",transform:"matrix(1 0 0 -1 0 850)"},zoom_minus:{width:875,height:1e3,path:"m0 788l0-876 875 0 0 876-875 0z m688-500l-500 0 0 125 500 0 0-125z",transform:"matrix(1 0 0 -1 0 850)"},autoscale:{width:1e3,height:1e3,path:"m250 850l-187 0-63 0 0-62 0-188 63 0 0 188 187 0 0 62z m688 0l-188 0 0-62 188 0 0-188 62 0 0 188 0 62-62 0z m-875-938l0 188-63 0 0-188 0-62 63 0 187 0 0 62-187 0z m875 188l0-188-188 0 0-62 188 0 62 0 0 62 0 188-62 0z m-125 188l-1 0-93-94-156 156 156 156 92-93 2 0 0 250-250 0 0-2 93-92-156-156-156 156 94 92 0 2-250 0 0-250 0 0 93 93 157-156-157-156-93 94 0 0 0-250 250 0 0 0-94 93 156 157 156-157-93-93 0 0 250 0 0 250z",transform:"matrix(1 0 0 -1 0 850)"},tooltip_basic:{width:1500,height:1e3,path:"m375 725l0 0-375-375 375-374 0-1 1125 0 0 750-1125 0z",transform:"matrix(1 0 0 -1 0 850)"},tooltip_compare:{width:1125,height:1e3,path:"m187 786l0 2-187-188 188-187 0 0 937 0 0 373-938 0z m0-499l0 1-187-188 188-188 0 0 937 0 0 376-938-1z",transform:"matrix(1 0 0 -1 0 850)"},plotlylogo:{width:1542,height:1e3,path:"m0-10h182v-140h-182v140z m228 146h183v-286h-183v286z m225 714h182v-1000h-182v1000z m225-285h182v-715h-182v715z m225 142h183v-857h-183v857z m231-428h182v-429h-182v429z m225-291h183v-138h-183v138z",transform:"matrix(1 0 0 -1 0 850)"},"z-axis":{width:1e3,height:1e3,path:"m833 5l-17 108v41l-130-65 130-66c0 0 0 38 0 39 0-1 36-14 39-25 4-15-6-22-16-30-15-12-39-16-56-20-90-22-187-23-279-23-261 0-341 34-353 59 3 60 228 110 228 110-140-8-351-35-351-116 0-120 293-142 474-142 155 0 477 22 477 142 0 50-74 79-163 96z m-374 94c-58-5-99-21-99-40 0-24 65-43 144-43 79 0 143 19 143 43 0 19-42 34-98 40v216h87l-132 135-133-135h88v-216z m167 515h-136v1c16 16 31 34 46 52l84 109v54h-230v-71h124v-1c-16-17-28-32-44-51l-89-114v-51h245v72z",transform:"matrix(1 0 0 -1 0 850)"},"3d_rotate":{width:1e3,height:1e3,path:"m922 660c-5 4-9 7-14 11-359 263-580-31-580-31l-102 28 58-400c0 1 1 1 2 2 118 108 351 249 351 249s-62 27-100 42c88 83 222 183 347 122 16-8 30-17 44-27-2 1-4 2-6 4z m36-329c0 0 64 229-88 296-62 27-124 14-175-11 157-78 225-208 249-266 8-19 11-31 11-31 2 5 6 15 11 32-5-13-8-20-8-20z m-775-239c70-31 117-50 198-32-121 80-199 346-199 346l-96-15-58-12c0 0 55-226 155-287z m603 133l-317-139c0 0 4-4 19-14 7-5 24-15 24-15s-177-147-389 4c235-287 536-112 536-112l31-22 100 299-4-1z m-298-153c6-4 14-9 24-15 0 0-17 10-24 15z",transform:"matrix(1 0 0 -1 0 850)"},camera:{width:1e3,height:1e3,path:"m500 450c-83 0-150-67-150-150 0-83 67-150 150-150 83 0 150 67 150 150 0 83-67 150-150 150z m400 150h-120c-16 0-34 13-39 29l-31 93c-6 15-23 28-40 28h-340c-16 0-34-13-39-28l-31-94c-6-15-23-28-40-28h-120c-55 0-100-45-100-100v-450c0-55 45-100 100-100h800c55 0 100 45 100 100v450c0 55-45 100-100 100z m-400-550c-138 0-250 112-250 250 0 138 112 250 250 250 138 0 250-112 250-250 0-138-112-250-250-250z m365 380c-19 0-35 16-35 35 0 19 16 35 35 35 19 0 35-16 35-35 0-19-16-35-35-35z",transform:"matrix(1 0 0 -1 0 850)"},movie:{width:1e3,height:1e3,path:"m938 413l-188-125c0 37-17 71-44 94 64 38 107 107 107 187 0 121-98 219-219 219-121 0-219-98-219-219 0-61 25-117 66-156h-115c30 33 49 76 49 125 0 103-84 187-187 187s-188-84-188-187c0-57 26-107 65-141-38-22-65-62-65-109v-250c0-70 56-126 125-126h500c69 0 125 56 125 126l188-126c34 0 62 28 62 63v375c0 35-28 63-62 63z m-750 0c-69 0-125 56-125 125s56 125 125 125 125-56 125-125-56-125-125-125z m406-1c-87 0-157 70-157 157 0 86 70 156 157 156s156-70 156-156-70-157-156-157z",transform:"matrix(1 0 0 -1 0 850)"},question:{width:857.1,height:1e3,path:"m500 82v107q0 8-5 13t-13 5h-107q-8 0-13-5t-5-13v-107q0-8 5-13t13-5h107q8 0 13 5t5 13z m143 375q0 49-31 91t-77 65-95 23q-136 0-207-119-9-14 4-24l74-55q4-4 10-4 9 0 14 7 30 38 48 51 19 14 48 14 27 0 48-15t21-33q0-21-11-34t-38-25q-35-16-65-48t-29-70v-20q0-8 5-13t13-5h107q8 0 13 5t5 13q0 10 12 27t30 28q18 10 28 16t25 19 25 27 16 34 7 45z m214-107q0-117-57-215t-156-156-215-58-216 58-155 156-58 215 58 215 155 156 216 58 215-58 156-156 57-215z",transform:"matrix(1 0 0 -1 0 850)"},disk:{width:857.1,height:1e3,path:"m214-7h429v214h-429v-214z m500 0h72v500q0 8-6 21t-11 20l-157 156q-5 6-19 12t-22 5v-232q0-22-15-38t-38-16h-322q-22 0-37 16t-16 38v232h-72v-714h72v232q0 22 16 38t37 16h465q22 0 38-16t15-38v-232z m-214 518v178q0 8-5 13t-13 5h-107q-7 0-13-5t-5-13v-178q0-8 5-13t13-5h107q7 0 13 5t5 13z m357-18v-518q0-22-15-38t-38-16h-750q-23 0-38 16t-16 38v750q0 22 16 38t38 16h517q23 0 50-12t42-26l156-157q16-15 27-42t11-49z",transform:"matrix(1 0 0 -1 0 850)"},drawopenpath:{width:70,height:70,path:"M33.21,85.65a7.31,7.31,0,0,1-2.59-.48c-8.16-3.11-9.27-19.8-9.88-41.3-.1-3.58-.19-6.68-.35-9-.15-2.1-.67-3.48-1.43-3.79-2.13-.88-7.91,2.32-12,5.86L3,32.38c1.87-1.64,11.55-9.66,18.27-6.9,2.13.87,4.75,3.14,5.17,9,.17,2.43.26,5.59.36,9.25a224.17,224.17,0,0,0,1.5,23.4c1.54,10.76,4,12.22,4.48,12.4.84.32,2.79-.46,5.76-3.59L43,80.07C41.53,81.57,37.68,85.64,33.21,85.65ZM74.81,69a11.34,11.34,0,0,0,6.09-6.72L87.26,44.5,74.72,32,56.9,38.35c-2.37.86-5.57,3.42-6.61,6L38.65,72.14l8.42,8.43ZM55,46.27a7.91,7.91,0,0,1,3.64-3.17l14.8-5.3,8,8L76.11,60.6l-.06.19a6.37,6.37,0,0,1-3,3.43L48.25,74.59,44.62,71Zm16.57,7.82A6.9,6.9,0,1,0,64.64,61,6.91,6.91,0,0,0,71.54,54.09Zm-4.05,0a2.85,2.85,0,1,1-2.85-2.85A2.86,2.86,0,0,1,67.49,54.09Zm-4.13,5.22L60.5,56.45,44.26,72.7l2.86,2.86ZM97.83,35.67,84.14,22l-8.57,8.57L89.26,44.24Zm-13.69-8,8,8-2.85,2.85-8-8Z",transform:"matrix(1 0 0 1 -15 -15)"},drawclosedpath:{width:90,height:90,path:"M88.41,21.12a26.56,26.56,0,0,0-36.18,0l-2.07,2-2.07-2a26.57,26.57,0,0,0-36.18,0,23.74,23.74,0,0,0,0,34.8L48,90.12a3.22,3.22,0,0,0,4.42,0l36-34.21a23.73,23.73,0,0,0,0-34.79ZM84,51.24,50.16,83.35,16.35,51.25a17.28,17.28,0,0,1,0-25.47,20,20,0,0,1,27.3,0l4.29,4.07a3.23,3.23,0,0,0,4.44,0l4.29-4.07a20,20,0,0,1,27.3,0,17.27,17.27,0,0,1,0,25.46ZM66.76,47.68h-33v6.91h33ZM53.35,35H46.44V68h6.91Z",transform:"matrix(1 0 0 1 -5 -5)"},lasso:{width:1031,height:1e3,path:"m1018 538c-36 207-290 336-568 286-277-48-473-256-436-463 10-57 36-108 76-151-13-66 11-137 68-183 34-28 75-41 114-42l-55-70 0 0c-2-1-3-2-4-3-10-14-8-34 5-45 14-11 34-8 45 4 1 1 2 3 2 5l0 0 113 140c16 11 31 24 45 40 4 3 6 7 8 11 48-3 100 0 151 9 278 48 473 255 436 462z m-624-379c-80 14-149 48-197 96 42 42 109 47 156 9 33-26 47-66 41-105z m-187-74c-19 16-33 37-39 60 50-32 109-55 174-68-42-25-95-24-135 8z m360 75c-34-7-69-9-102-8 8 62-16 128-68 170-73 59-175 54-244-5-9 20-16 40-20 61-28 159 121 317 333 354s407-60 434-217c28-159-121-318-333-355z",transform:"matrix(1 0 0 -1 0 850)"},selectbox:{width:1e3,height:1e3,path:"m0 850l0-143 143 0 0 143-143 0z m286 0l0-143 143 0 0 143-143 0z m285 0l0-143 143 0 0 143-143 0z m286 0l0-143 143 0 0 143-143 0z m-857-286l0-143 143 0 0 143-143 0z m857 0l0-143 143 0 0 143-143 0z m-857-285l0-143 143 0 0 143-143 0z m857 0l0-143 143 0 0 143-143 0z m-857-286l0-143 143 0 0 143-143 0z m286 0l0-143 143 0 0 143-143 0z m285 0l0-143 143 0 0 143-143 0z m286 0l0-143 143 0 0 143-143 0z",transform:"matrix(1 0 0 -1 0 850)"},drawline:{width:70,height:70,path:"M60.64,62.3a11.29,11.29,0,0,0,6.09-6.72l6.35-17.72L60.54,25.31l-17.82,6.4c-2.36.86-5.57,3.41-6.6,6L24.48,65.5l8.42,8.42ZM40.79,39.63a7.89,7.89,0,0,1,3.65-3.17l14.79-5.31,8,8L61.94,54l-.06.19a6.44,6.44,0,0,1-3,3.43L34.07,68l-3.62-3.63Zm16.57,7.81a6.9,6.9,0,1,0-6.89,6.9A6.9,6.9,0,0,0,57.36,47.44Zm-4,0a2.86,2.86,0,1,1-2.85-2.85A2.86,2.86,0,0,1,53.32,47.44Zm-4.13,5.22L46.33,49.8,30.08,66.05l2.86,2.86ZM83.65,29,70,15.34,61.4,23.9,75.09,37.59ZM70,21.06l8,8-2.84,2.85-8-8ZM87,80.49H10.67V87H87Z",transform:"matrix(1 0 0 1 -15 -15)"},drawrect:{width:80,height:80,path:"M78,22V79H21V22H78m9-9H12V88H87V13ZM68,46.22H31V54H68ZM53,32H45.22V69H53Z",transform:"matrix(1 0 0 1 -10 -10)"},drawcircle:{width:80,height:80,path:"M50,84.72C26.84,84.72,8,69.28,8,50.3S26.84,15.87,50,15.87,92,31.31,92,50.3,73.16,84.72,50,84.72Zm0-60.59c-18.6,0-33.74,11.74-33.74,26.17S31.4,76.46,50,76.46,83.74,64.72,83.74,50.3,68.6,24.13,50,24.13Zm17.15,22h-34v7.11h34Zm-13.8-13H46.24v34h7.11Z",transform:"matrix(1 0 0 1 -10 -10)"},eraseshape:{width:80,height:80,path:"M82.77,78H31.85L6,49.57,31.85,21.14H82.77a8.72,8.72,0,0,1,8.65,8.77V69.24A8.72,8.72,0,0,1,82.77,78ZM35.46,69.84H82.77a.57.57,0,0,0,.49-.6V29.91a.57.57,0,0,0-.49-.61H35.46L17,49.57Zm32.68-34.7-24,24,5,5,24-24Zm-19,.53-5,5,24,24,5-5Z",transform:"matrix(1 0 0 1 -10 -10)"},spikeline:{width:1e3,height:1e3,path:"M512 409c0-57-46-104-103-104-57 0-104 47-104 104 0 57 47 103 104 103 57 0 103-46 103-103z m-327-39l92 0 0 92-92 0z m-185 0l92 0 0 92-92 0z m370-186l92 0 0 93-92 0z m0-184l92 0 0 92-92 0z",transform:"matrix(1.5 0 0 -1.5 0 850)"},pencil:{width:1792,height:1792,path:"M491 1536l91-91-235-235-91 91v107h128v128h107zm523-928q0-22-22-22-10 0-17 7l-542 542q-7 7-7 17 0 22 22 22 10 0 17-7l542-542q7-7 7-17zm-54-192l416 416-832 832h-416v-416zm683 96q0 53-37 90l-166 166-416-416 166-165q36-38 90-38 53 0 91 38l235 234q37 39 37 91z",transform:"matrix(1 0 0 1 0 1)"},newplotlylogo:{name:"newplotlylogo",svg:["<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 132 132'>"," <title>plotly-logomark</title>"," <g id='symbol'>","  <rect fill='#000' x='0' y='0' width='132' height='132' rx='18' ry='18'/>","  <circle fill='#9EF' cx='102' cy='30' r='6'/>","  <circle fill='#BAC' cx='78' cy='30' r='6'/>","  <circle fill='#BAC' cx='78' cy='54' r='6'/>","  <circle fill='#D69' cx='54' cy='30' r='6'/>","  <circle fill='#F26' cx='30' cy='30' r='6'/>","  <circle fill='#F26' cx='30' cy='54' r='6'/>","  <path fill='#FFF' d='M30,72a6,6,0,0,0-6,6v24a6,6,0,0,0,12,0V78A6,6,0,0,0,30,72Z'/>","  <path fill='#FFF' d='M78,72a6,6,0,0,0-6,6v24a6,6,0,0,0,12,0V78A6,6,0,0,0,78,72Z'/>","  <path fill='#FFF' d='M54,48a6,6,0,0,0-6,6v48a6,6,0,0,0,12,0V54A6,6,0,0,0,54,48Z'/>","  <path fill='#FFF' d='M102,48a6,6,0,0,0-6,6v48a6,6,0,0,0,12,0V54A6,6,0,0,0,102,48Z'/>"," </g>","</svg>"].join("")}}});var YL=ye((Ear,cue)=>{"use strict";var ZL=32;cue.exports={CIRCLE_SIDES:ZL,i000:0,i090:ZL/4,i180:ZL/2,i270:ZL/4*3,cos45:Math.cos(Math.PI/4),sin45:Math.sin(Math.PI/4),SQRT2:Math.sqrt(2)}});var KL=ye((kar,hue)=>{"use strict";var Vut=Dr().strTranslate;function fue(e,t){switch(e.type){case"log":return e.p2d(t);case"date":return e.p2r(t,0,e.calendar);default:return e.p2r(t)}}function Gut(e,t){switch(e.type){case"log":return e.d2p(t);case"date":return e.r2p(t,0,e.calendar);default:return e.r2p(t)}}function Hut(e){var t=e._id.charAt(0)==="y"?1:0;return function(r){return fue(e,r[t])}}function jut(e){return Vut(e.xaxis._offset,e.yaxis._offset)}hue.exports={p2r:fue,r2p:Gut,axValue:Hut,getTransform:jut}});var u_=ye(ky=>{"use strict";var Wut=eM(),pue=YL(),W3=pue.CIRCLE_SIDES,WB=pue.SQRT2,gue=KL(),due=gue.p2r,vue=gue.r2p,Xut=[0,3,4,5,6,1,2],Zut=[0,3,4,1,2];ky.writePaths=function(e){var t=e.length;if(!t)return"M0,0Z";for(var r="",n=0;n<t;n++)for(var i=e[n].length,a=0;a<i;a++){var o=e[n][a][0];if(o==="Z")r+="Z";else for(var s=e[n][a].length,l=0;l<s;l++){var u=l;o==="Q"||o==="S"?u=Zut[l]:o==="C"&&(u=Xut[l]),r+=e[n][a][u],l>0&&l<s-1&&(r+=",")}}return r};ky.readPaths=function(e,t,r,n){var i=Wut(e),a=[],o=-1,s=function(){o++,a[o]=[]},l,u=0,c=0,f,h,d=function(){f=u,h=c};d();for(var v=0;v<i.length;v++){var _=[],b,p,k,E,S=i[v][0],L=S;switch(S){case"M":s(),u=+i[v][1],c=+i[v][2],_.push([L,u,c]),d();break;case"Q":case"S":b=+i[v][1],k=+i[v][2],u=+i[v][3],c=+i[v][4],_.push([L,u,c,b,k]);break;case"C":b=+i[v][1],k=+i[v][2],p=+i[v][3],E=+i[v][4],u=+i[v][5],c=+i[v][6],_.push([L,u,c,b,k,p,E]);break;case"T":case"L":u=+i[v][1],c=+i[v][2],_.push([L,u,c]);break;case"H":L="L",u=+i[v][1],_.push([L,u,c]);break;case"V":L="L",c=+i[v][1],_.push([L,u,c]);break;case"A":L="L";var x=+i[v][1],C=+i[v][2];+i[v][4]||(x=-x,C=-C);var M=u-x,g=c;for(l=1;l<=W3/2;l++){var P=2*Math.PI*l/W3;_.push([L,M+x*Math.cos(P),g+C*Math.sin(P)])}break;case"Z":(u!==f||c!==h)&&(u=f,c=h,_.push([L,u,c]));break}for(var T=(r||{}).domain,z=t._fullLayout._size,O=r&&r.xsizemode==="pixel",V=r&&r.ysizemode==="pixel",G=n===!1,Z=0;Z<_.length;Z++){for(l=0;l+2<7;l+=2){var j=_[Z][l+1],N=_[Z][l+2];j===void 0||N===void 0||(u=j,c=N,r&&(r.xaxis&&r.xaxis.p2r?(G&&(j-=r.xaxis._offset),O?j=vue(r.xaxis,r.xanchor)+j:j=due(r.xaxis,j)):(G&&(j-=z.l),T?j=T.x[0]+j/z.w:j=j/z.w),r.yaxis&&r.yaxis.p2r?(G&&(N-=r.yaxis._offset),V?N=vue(r.yaxis,r.yanchor)-N:N=due(r.yaxis,N)):(G&&(N-=z.t),T?N=T.y[1]-N/z.h:N=1-N/z.h)),_[Z][l+1]=j,_[Z][l+2]=N)}a[o].push(_[Z].slice())}}return a};function pM(e,t){return Math.abs(e-t)<=1e-6}function JL(e,t){var r=t[1]-e[1],n=t[2]-e[2];return Math.sqrt(r*r+n*n)}ky.pointsOnRectangle=function(e){var t=e.length;if(t!==5)return!1;for(var r=1;r<3;r++){var n=e[0][r]-e[1][r],i=e[3][r]-e[2][r];if(!pM(n,i))return!1;var a=e[0][r]-e[3][r],o=e[1][r]-e[2][r];if(!pM(a,o))return!1}return!pM(e[0][1],e[1][1])&&!pM(e[0][1],e[3][1])?!1:!!(JL(e[0],e[1])*JL(e[0],e[3]))};ky.pointsOnEllipse=function(e){var t=e.length;if(t!==W3+1)return!1;t=W3;for(var r=0;r<t;r++){var n=(t*2-r)%t,i=(t/2+n)%t,a=(t/2+r)%t;if(!pM(JL(e[r],e[a]),JL(e[n],e[i])))return!1}return!0};ky.handleEllipse=function(e,t,r){if(!e)return[t,r];var n=ky.ellipseOver({x0:t[0],y0:t[1],x1:r[0],y1:r[1]}),i=(n.x1+n.x0)/2,a=(n.y1+n.y0)/2,o=(n.x1-n.x0)/2,s=(n.y1-n.y0)/2;o||(o=s=s/WB),s||(s=o=o/WB);for(var l=[],u=0;u<W3;u++){var c=u*2*Math.PI/W3;l.push([i+o*Math.cos(c),a+s*Math.sin(c)])}return l};ky.ellipseOver=function(e){var t=e.x0,r=e.y0,n=e.x1,i=e.y1,a=n-t,o=i-r;t-=a,r-=o;var s=(t+n)/2,l=(r+i)/2,u=WB;return a*=u,o*=u,{x0:s-a,y0:l-o,x1:s+a,y1:l+o}};ky.fixDatesForPaths=function(e,t,r){var n=t.type==="date",i=r.type==="date";if(!n&&!i)return e;for(var a=0;a<e.length;a++)for(var o=0;o<e[a].length;o++)for(var s=0;s+2<e[a][o].length;s+=2)n&&(e[a][o][s+1]=e[a][o][s+1].replace(" ","_")),i&&(e[a][o][s+2]=e[a][o][s+2].replace(" ","_"));return e}});var eP=ye((Lar,Sue)=>{"use strict";var mue=hf(),wue=Eg(),Yut=wue.drawMode,Kut=wue.openMode,X3=YL(),yue=X3.i000,_ue=X3.i090,xue=X3.i180,bue=X3.i270,Jut=X3.cos45,$ut=X3.sin45,Tue=KL(),$L=Tue.p2r,c_=Tue.r2p,Qut=Q1(),ect=Qut.clearOutline,QL=u_(),tct=QL.readPaths,rct=QL.writePaths,ict=QL.ellipseOver,nct=QL.fixDatesForPaths;function act(e,t){if(e.length){var r=e[0][0];if(r){var n=t.gd,i=t.isActiveShape,a=t.dragmode,o=(n.layout||{}).shapes||[];if(!Yut(a)&&i!==void 0){var s=n._fullLayout._activeShapeIndex;if(s<o.length)switch(n._fullLayout.shapes[s].type){case"rect":a="drawrect";break;case"circle":a="drawcircle";break;case"line":a="drawline";break;case"path":var l=o[s].path||"";l[l.length-1]==="Z"?a="drawclosedpath":a="drawopenpath";break}}var u=Aue(e,t,a);ect(n);for(var c=t.editHelpers,f=(c||{}).modifyItem,h=[],d=0;d<o.length;d++){var v=n._fullLayout.shapes[d];if(h[d]=v._input,i!==void 0&&d===n._fullLayout._activeShapeIndex){var _=u;switch(v.type){case"line":case"rect":case"circle":var b=mue.getFromId(n,v.xref);v.xref.charAt(0)==="x"&&b.type.includes("category")?(f("x0",_.x0-(v.x0shift||0)),f("x1",_.x1-(v.x1shift||0))):(f("x0",_.x0),f("x1",_.x1));var p=mue.getFromId(n,v.yref);v.yref.charAt(0)==="y"&&p.type.includes("category")?(f("y0",_.y0-(v.y0shift||0)),f("y1",_.y1-(v.y1shift||0))):(f("y0",_.y0),f("y1",_.y1));break;case"path":f("path",_.path);break}}}return i===void 0?(h.push(u),h):c?c.getUpdateObj():{}}}}function Aue(e,t,r){var n=e[0][0],i=t.gd,a=n.getAttribute("d"),o=i._fullLayout.newshape,s=t.plotinfo,l=t.isActiveShape,u=s.xaxis,c=s.yaxis,f=!!s.domain||!s.xaxis,h=!!s.domain||!s.yaxis,d=Kut(r),v=tct(a,i,s,l),_={editable:!0,visible:o.visible,name:o.name,showlegend:o.showlegend,legend:o.legend,legendwidth:o.legendwidth,legendgroup:o.legendgroup,legendgrouptitle:{text:o.legendgrouptitle.text,font:o.legendgrouptitle.font},legendrank:o.legendrank,label:o.label,xref:f?"paper":u._id,yref:h?"paper":c._id,layer:o.layer,opacity:o.opacity,line:{color:o.line.color,width:o.line.width,dash:o.line.dash}};d||(_.fillcolor=o.fillcolor,_.fillrule=o.fillrule);var b;if(v.length===1&&(b=v[0]),b&&b.length===5&&r==="drawrect")_.type="rect",_.x0=b[0][1],_.y0=b[0][2],_.x1=b[2][1],_.y1=b[2][2];else if(b&&r==="drawline")_.type="line",_.x0=b[0][1],_.y0=b[0][2],_.x1=b[1][1],_.y1=b[1][2];else if(b&&r==="drawcircle"){_.type="circle";var p=b[yue][1],k=b[_ue][1],E=b[xue][1],S=b[bue][1],L=b[yue][2],x=b[_ue][2],C=b[xue][2],M=b[bue][2],g=s.xaxis&&(s.xaxis.type==="date"||s.xaxis.type==="log"),P=s.yaxis&&(s.yaxis.type==="date"||s.yaxis.type==="log");g&&(p=c_(s.xaxis,p),k=c_(s.xaxis,k),E=c_(s.xaxis,E),S=c_(s.xaxis,S)),P&&(L=c_(s.yaxis,L),x=c_(s.yaxis,x),C=c_(s.yaxis,C),M=c_(s.yaxis,M));var T=(k+S)/2,z=(L+C)/2,O=(S-k+E-p)/2,V=(M-x+C-L)/2,G=ict({x0:T,y0:z,x1:T+O*Jut,y1:z+V*$ut});g&&(G.x0=$L(s.xaxis,G.x0),G.x1=$L(s.xaxis,G.x1)),P&&(G.y0=$L(s.yaxis,G.y0),G.y1=$L(s.yaxis,G.y1)),_.x0=G.x0,_.y0=G.y0,_.x1=G.x1,_.y1=G.y1}else _.type="path",u&&c&&nct(v,u,c),_.path=rct(v),b=null;return _}Sue.exports={newShapes:act,createShapeObj:Aue}});var ZB=ye((Par,Mue)=>{"use strict";var oct=Eg(),sct=oct.selectMode,lct=Q1(),uct=lct.clearOutline,XB=u_(),cct=XB.readPaths,fct=XB.writePaths,hct=XB.fixDatesForPaths;Mue.exports=function(t,r){if(t.length){var n=t[0][0];if(n){var i=n.getAttribute("d"),a=r.gd,o=a._fullLayout.newselection,s=r.plotinfo,l=s.xaxis,u=s.yaxis,c=r.isActiveSelection,f=r.dragmode,h=(a.layout||{}).selections||[];if(!sct(f)&&c!==void 0){var d=a._fullLayout._activeSelectionIndex;if(d<h.length)switch(a._fullLayout.selections[d].type){case"rect":f="select";break;case"path":f="lasso";break}}var v=cct(i,a,s,c),_={xref:l._id,yref:u._id,opacity:o.opacity,line:{color:o.line.color,width:o.line.width,dash:o.line.dash}},b;v.length===1&&(b=v[0]),b&&b.length===5&&f==="select"?(_.type="rect",_.x0=b[0][1],_.y0=b[0][2],_.x1=b[2][1],_.y1=b[2][2]):(_.type="path",l&&u&&hct(v,l,u),_.path=fct(v),b=null),uct(a);for(var p=r.editHelpers,k=(p||{}).modifyItem,E=[],S=0;S<h.length;S++){var L=a._fullLayout.selections[S];if(!L){E[S]=L;continue}if(E[S]=L._input,c!==void 0&&S===a._fullLayout._activeSelectionIndex){var x=_;switch(L.type){case"rect":k("x0",x.x0),k("x1",x.x1),k("y0",x.y0),k("y1",x.y1);break;case"path":k("path",x.path);break}}}return c===void 0?(E.push(_),E):p?p.getUpdateObj():{}}}}});var gM=ye((Iar,Eue)=>{"use strict";Eue.exports={segmentRE:/[MLHVQCTSZ][^MLHVQCTSZ]*/g,paramRE:/[^\s,]+/g,paramIsX:{M:{0:!0,drawn:0},L:{0:!0,drawn:0},H:{0:!0,drawn:0},V:{},Q:{0:!0,2:!0,drawn:2},C:{0:!0,2:!0,4:!0,drawn:4},T:{0:!0,drawn:0},S:{0:!0,2:!0,drawn:2},Z:{}},paramIsY:{M:{1:!0,drawn:1},L:{1:!0,drawn:1},H:{},V:{0:!0,drawn:0},Q:{1:!0,3:!0,drawn:3},C:{1:!0,3:!0,5:!0,drawn:5},T:{1:!0,drawn:1},S:{1:!0,3:!0,drawn:5},Z:{}},numParams:{M:2,L:2,H:1,V:1,Q:4,C:6,T:2,S:4,Z:0}}});var f_=ye(Dd=>{"use strict";var Ob=gM(),kue=Dr(),tP=ho();Dd.rangeToShapePosition=function(e){return e.type==="log"?e.r2d:function(t){return t}};Dd.shapePositionToRange=function(e){return e.type==="log"?e.d2r:function(t){return t}};Dd.decodeDate=function(e){return function(t){return t.replace&&(t=t.replace("_"," ")),e(t)}};Dd.encodeDate=function(e){return function(t){return e(t).replace(" ","_")}};Dd.extractPathCoords=function(e,t,r){var n=[],i=e.match(Ob.segmentRE);return i.forEach(function(a){var o=t[a.charAt(0)].drawn;if(o!==void 0){var s=a.slice(1).match(Ob.paramRE);if(!(!s||s.length<o)){var l=s[o],u=r?l:kue.cleanNumber(l);n.push(u)}}}),n};Dd.getDataToPixel=function(e,t,r,n,i){var a=e._fullLayout._size,o;if(t)if(i==="domain")o=function(l){return t._length*(n?1-l:l)+t._offset};else{var s=Dd.shapePositionToRange(t);o=function(l){var u=mM(t,r);return t._offset+t.r2p(s(l,!0))+u},t.type==="date"&&(o=Dd.decodeDate(o))}else n?o=function(l){return a.t+a.h*(1-l)}:o=function(l){return a.l+a.w*l};return o};Dd.getPixelToData=function(e,t,r,n){var i=e._fullLayout._size,a;if(t)if(n==="domain")a=function(s){var l=(s-t._offset)/t._length;return r?1-l:l};else{var o=Dd.rangeToShapePosition(t);a=function(s){return o(t.p2r(s-t._offset))}}else r?a=function(s){return 1-(s-i.t)/i.h}:a=function(s){return(s-i.l)/i.w};return a};Dd.roundPositionForSharpStrokeRendering=function(e,t){var r=Math.round(t%2)===1,n=Math.round(e);return r?n+.5:n};Dd.makeShapesOptionsAndPlotinfo=function(e,t){var r=e._fullLayout.shapes[t]||{},n=e._fullLayout._plots[r.xref+r.yref],i=!!n;return i?n._hadPlotinfo=!0:(n={},r.xref&&r.xref!=="paper"&&(n.xaxis=e._fullLayout[r.xref+"axis"]),r.yref&&r.yref!=="paper"&&(n.yaxis=e._fullLayout[r.yref+"axis"])),n.xsizemode=r.xsizemode,n.ysizemode=r.ysizemode,n.xanchor=r.xanchor,n.yanchor=r.yanchor,{options:r,plotinfo:n}};Dd.makeSelectionsOptionsAndPlotinfo=function(e,t){var r=e._fullLayout.selections[t]||{},n=e._fullLayout._plots[r.xref+r.yref],i=!!n;return i?n._hadPlotinfo=!0:(n={},r.xref&&(n.xaxis=e._fullLayout[r.xref+"axis"]),r.yref&&(n.yaxis=e._fullLayout[r.yref+"axis"])),{options:r,plotinfo:n}};Dd.getPathString=function(e,t){var r=t.type,n=tP.getRefType(t.xref),i=tP.getRefType(t.yref),a=tP.getFromId(e,t.xref),o=tP.getFromId(e,t.yref),s=e._fullLayout._size,l,u,c,f,h=mM(a,t.x0shift),d=mM(a,t.x1shift),v=mM(o,t.y0shift),_=mM(o,t.y1shift),b,p,k,E;if(a?n==="domain"?u=function(O){return a._offset+a._length*O}:(l=Dd.shapePositionToRange(a),u=function(O){return a._offset+a.r2p(l(O,!0))}):u=function(O){return s.l+s.w*O},o?i==="domain"?f=function(O){return o._offset+o._length*(1-O)}:(c=Dd.shapePositionToRange(o),f=function(O){return o._offset+o.r2p(c(O,!0))}):f=function(O){return s.t+s.h*(1-O)},r==="path")return a&&a.type==="date"&&(u=Dd.decodeDate(u)),o&&o.type==="date"&&(f=Dd.decodeDate(f)),dct(t,u,f);if(t.xsizemode==="pixel"){var S=u(t.xanchor);b=S+t.x0+h,p=S+t.x1+d}else b=u(t.x0)+h,p=u(t.x1)+d;if(t.ysizemode==="pixel"){var L=f(t.yanchor);k=L-t.y0+v,E=L-t.y1+_}else k=f(t.y0)+v,E=f(t.y1)+_;if(r==="line")return"M"+b+","+k+"L"+p+","+E;if(r==="rect")return"M"+b+","+k+"H"+p+"V"+E+"H"+b+"Z";var x=(b+p)/2,C=(k+E)/2,M=Math.abs(x-b),g=Math.abs(C-k),P="A"+M+","+g,T=x+M+","+C,z=x+","+(C-g);return"M"+T+P+" 0 1,1 "+z+P+" 0 0,1 "+T+"Z"};function dct(e,t,r){var n=e.path,i=e.xsizemode,a=e.ysizemode,o=e.xanchor,s=e.yanchor;return n.replace(Ob.segmentRE,function(l){var u=0,c=l.charAt(0),f=Ob.paramIsX[c],h=Ob.paramIsY[c],d=Ob.numParams[c],v=l.slice(1).replace(Ob.paramRE,function(_){return f[u]?i==="pixel"?_=t(o)+Number(_):_=t(_):h[u]&&(a==="pixel"?_=r(s)-Number(_):_=r(_)),u++,u>d&&(_="X"),_});return u>d&&(v=v.replace(/[\s,]*X.*/,""),kue.log("Ignoring extra params in segment "+l)),c+v})}function mM(e,t){t=t||0;var r=0;return t&&e&&(e.type==="category"||e.type==="multicategory")&&(r=(e.r2p(1)-e.r2p(0))*t),r}});var KB=ye((Dar,Iue)=>{"use strict";var vct=Dr(),Z3=ho(),Cue=ru(),Lue=So(),pct=u_().readPaths,YB=f_(),gct=YB.getPathString,Pue=M6(),mct=$h().FROM_TL;Iue.exports=function(t,r,n,i){if(i.selectAll(".shape-label").remove(),!!(n.label.text||n.label.texttemplate)){var a;if(n.label.texttemplate){var o={};if(n.type!=="path"){var s=Z3.getFromId(t,n.xref),l=Z3.getFromId(t,n.yref);for(var u in Pue){var c=Pue[u](n,s,l);c!==void 0&&(o[u]=c)}}a=vct.texttemplateStringForShapes({data:[o],fallback:n.label.texttemplatefallback,locale:t._fullLayout._d3locale,template:n.label.texttemplate})}else a=n.label.text;var f={"data-index":r},h=n.label.font,d={"data-notex":1},v=i.append("g").attr(f).classed("shape-label",!0),_=v.append("text").attr(d).classed("shape-label-text",!0).text(a),b,p,k,E;if(n.path){var S=gct(t,n),L=pct(S,t);b=1/0,k=1/0,p=-1/0,E=-1/0;for(var x=0;x<L.length;x++)for(var C=0;C<L[x].length;C++)for(var M=L[x][C],g=1;g<M.length;g+=2){var P=M[g],T=M[g+1];b=Math.min(b,P),p=Math.max(p,P),k=Math.min(k,T),E=Math.max(E,T)}}else{var z=Z3.getFromId(t,n.xref),O=n.x0shift,V=n.x1shift,G=Z3.getRefType(n.xref),Z=Z3.getFromId(t,n.yref),j=n.y0shift,N=n.y1shift,H=Z3.getRefType(n.yref),te=function(Le,Ae){var Fe=YB.getDataToPixel(t,z,Ae,!1,G);return Fe(Le)},oe=function(Le,Ae){var Fe=YB.getDataToPixel(t,Z,Ae,!0,H);return Fe(Le)};b=te(n.x0,O),p=te(n.x1,V),k=oe(n.y0,j),E=oe(n.y1,N)}var _e=n.label.textangle;_e==="auto"&&(n.type==="line"?_e=yct(b,k,p,E):_e=0),_.call(function(Le){return Le.call(Lue.font,h).attr({}),Cue.convertToTspans(Le,t),Le});var Ee=Lue.bBox(_.node()),Ce=_ct(b,k,p,E,n,_e,Ee),me=Ce.textx,ie=Ce.texty,Se=Ce.xanchor;_.attr({"text-anchor":{left:"start",center:"middle",right:"end"}[Se],y:ie,x:me,transform:"rotate("+_e+","+me+","+ie+")"}).call(Cue.positionText,me,ie)}};function yct(e,t,r,n){var i,a;return a=Math.abs(r-e),r>=e?i=t-n:i=n-t,-180/Math.PI*Math.atan2(i,a)}function _ct(e,t,r,n,i,a,o){var s=i.label.textposition,l=i.label.textangle,u=i.label.padding,c=i.type,f=Math.PI/180*a,h=Math.sin(f),d=Math.cos(f),v=i.label.xanchor,_=i.label.yanchor,b,p,k,E;if(c==="line"){s==="start"?(b=e,p=t):s==="end"?(b=r,p=n):(b=(e+r)/2,p=(t+n)/2),v==="auto"&&(s==="start"?l==="auto"?r>e?v="left":r<e?v="right":v="center":r>e?v="right":r<e?v="left":v="center":s==="end"?l==="auto"?r>e?v="right":r<e?v="left":v="center":r>e?v="left":r<e?v="right":v="center":v="center");var S={left:1,center:0,right:-1},L={bottom:-1,middle:0,top:1};if(l==="auto"){var x=L[_];k=-u*h*x,E=u*d*x}else{var C=S[v],M=L[_];k=u*C,E=u*M}b=b+k,p=p+E}else k=u+3,s.indexOf("right")!==-1?(b=Math.max(e,r)-k,v==="auto"&&(v="right")):s.indexOf("left")!==-1?(b=Math.min(e,r)+k,v==="auto"&&(v="left")):(b=(e+r)/2,v==="auto"&&(v="center")),s.indexOf("top")!==-1?p=Math.min(t,n):s.indexOf("bottom")!==-1?p=Math.max(t,n):p=(t+n)/2,E=u,_==="bottom"?p=p-E:_==="top"&&(p=p+E);var g=mct[_],P=i.label.font.size,T=o.height,z=(T*g-P)*h,O=-(T*g-P)*d;return{textx:b+z,texty:p+O,xanchor:v}}});var nP=ye((Far,Nue)=>{"use strict";var xct=Dr(),bct=xct.strTranslate,Rue=yv(),zue=Eg(),wct=zue.drawMode,Oue=zue.selectMode,que=qa(),Due=ka(),iP=YL(),Tct=iP.i000,Act=iP.i090,Sct=iP.i180,Mct=iP.i270,Ect=Q1(),Bue=Ect.clearOutlineControllers,$B=u_(),rP=$B.pointsOnRectangle,JB=$B.pointsOnEllipse,kct=$B.writePaths,Cct=eP().newShapes,Lct=eP().createShapeObj,Pct=ZB(),Ict=KB();Nue.exports=function e(t,r,n,i){i||(i=0);var a=n.gd;function o(){e(t,r,n,i++),(JB(t[0])||n.hasText)&&s({redrawing:!0})}function s(j){var N={};n.isActiveShape!==void 0&&(n.isActiveShape=!1,N=Cct(r,n)),n.isActiveSelection!==void 0&&(n.isActiveSelection=!1,N=Pct(r,n),a._fullLayout._reselect=!0),Object.keys(N).length&&que.call((j||{}).redrawing?"relayout":"_guiRelayout",a,N)}var l=a._fullLayout,u=l._zoomlayer,c=n.dragmode,f=wct(c),h=Oue(c);(f||h)&&(a._fullLayout._outlining=!0),Bue(a),r.attr("d",kct(t));var d,v,_,b,p;if(!i&&(n.isActiveShape||n.isActiveSelection)){p=Rct([],t);var k=u.append("g").attr("class","outline-controllers");P(k),Z()}if(f&&n.hasText){var E=u.select(".label-temp"),S=Lct(r,n,n.dragmode);Ict(a,"label-temp",S,E)}function L(j){_=+j.srcElement.getAttribute("data-i"),b=+j.srcElement.getAttribute("data-j"),d[_][b].moveFn=x}function x(j,N){if(t.length){var H=p[_][b][1],te=p[_][b][2],oe=t[_],_e=oe.length;if(rP(oe)){var Ee=j,Ce=N;if(n.isActiveSelection){var me=Fue(oe,b);me[1]===oe[b][1]?Ce=0:Ee=0}for(var ie=0;ie<_e;ie++)if(ie!==b){var Se=oe[ie];Se[1]===oe[b][1]&&(Se[1]=H+Ee),Se[2]===oe[b][2]&&(Se[2]=te+Ce)}if(oe[b][1]=H+Ee,oe[b][2]=te+Ce,!rP(oe))for(var Le=0;Le<_e;Le++)for(var Ae=0;Ae<oe[Le].length;Ae++)oe[Le][Ae]=p[_][Le][Ae]}else oe[b][1]=H+j,oe[b][2]=te+N;o()}}function C(){s()}function M(){if(t.length&&t[_]&&t[_].length){for(var j=[],N=0;N<t[_].length;N++)N!==b&&j.push(t[_][N]);j.length>1&&!(j.length===2&&j[1][0]==="Z")&&(b===0&&(j[0][0]="M"),t[_]=j,o(),s())}}function g(j,N){if(j===2){_=+N.srcElement.getAttribute("data-i"),b=+N.srcElement.getAttribute("data-j");var H=t[_];!rP(H)&&!JB(H)&&M()}}function P(j){d=[];for(var N=0;N<t.length;N++){var H=t[N],te=rP(H),oe=!te&&JB(H);d[N]=[];for(var _e=H.length,Ee=0;Ee<_e;Ee++)if(H[Ee][0]!=="Z"&&!(oe&&Ee!==Tct&&Ee!==Act&&Ee!==Sct&&Ee!==Mct)){var Ce=te&&n.isActiveSelection,me;Ce&&(me=Fue(H,Ee));var ie=H[Ee][1],Se=H[Ee][2],Le=j.append(Ce?"rect":"circle").attr("data-i",N).attr("data-j",Ee).style({fill:Due.background,stroke:Due.defaultLine,"stroke-width":1,"shape-rendering":"crispEdges"});if(Ce){var Ae=me[1]-ie,Fe=me[2]-Se,Pe=Fe?5:Math.max(Math.min(25,Math.abs(Ae)-5),5),ge=Ae?5:Math.max(Math.min(25,Math.abs(Fe)-5),5);Le.classed(Fe?"cursor-ew-resize":"cursor-ns-resize",!0).attr("width",Pe).attr("height",ge).attr("x",ie-Pe/2).attr("y",Se-ge/2).attr("transform",bct(Ae/2,Fe/2))}else Le.classed("cursor-grab",!0).attr("r",5).attr("cx",ie).attr("cy",Se);d[N][Ee]={element:Le.node(),gd:a,prepFn:L,doneFn:C,clickFn:g},Rue.init(d[N][Ee])}}}function T(j,N){if(t.length)for(var H=0;H<t.length;H++)for(var te=0;te<t[H].length;te++)for(var oe=0;oe+2<t[H][te].length;oe+=2)t[H][te][oe+1]=p[H][te][oe+1]+j,t[H][te][oe+2]=p[H][te][oe+2]+N}function z(j,N){T(j,N),o()}function O(j){_=+j.srcElement.getAttribute("data-i"),_||(_=0),v[_].moveFn=z}function V(){s()}function G(j){j===2&&Dct(a)}function Z(){if(v=[],!!t.length){var j=0;v[j]={element:r[0][0],gd:a,prepFn:O,doneFn:V,clickFn:G},Rue.init(v[j])}}};function Rct(e,t){for(var r=0;r<t.length;r++){var n=t[r];e[r]=[];for(var i=0;i<n.length;i++){e[r][i]=[];for(var a=0;a<n[i].length;a++)e[r][i][a]=n[i][a]}}return e}function Fue(e,t){var r=e[t][1],n=e[t][2],i=e.length,a,o,s;return a=(t+1)%i,o=e[a][1],s=e[a][2],o===r&&s===n&&(a=(t+2)%i,o=e[a][1],s=e[a][2]),[a,o,s]}function Dct(e){if(Oue(e._fullLayout.dragmode)){Bue(e);var t=e._fullLayout._activeSelectionIndex,r=(e.layout||{}).selections||[];if(t<r.length){for(var n=[],i=0;i<r.length;i++)i!==t&&n.push(r[i]);delete e._fullLayout._activeSelectionIndex;var a=e._fullLayout.selections[t];e._fullLayout._deselect={xref:a.xref,yref:a.yref},que.call("_guiRelayout",e,{selections:n})}}}});var lP=ye((zar,Kue)=>{"use strict";var Fct=Oa(),Wue=qa(),Uue=Dr(),Y3=ho(),zct=u_().readPaths,Oct=nP(),oP=KB(),Xue=Q1().clearOutlineControllers,QB=ka(),tN=So(),qct=vl().arrayEditor,Vue=yv(),Gue=Sg(),qb=gM(),Ep=f_(),eN=Ep.getPathString;Kue.exports={draw:rN,drawOne:Zue,eraseActiveShape:Uct,drawLabel:oP};function rN(e){var t=e._fullLayout;t._shapeUpperLayer.selectAll("path").remove(),t._shapeLowerLayer.selectAll("path").remove(),t._shapeUpperLayer.selectAll("text").remove(),t._shapeLowerLayer.selectAll("text").remove();for(var r in t._plots){var n=t._plots[r].shapelayer;n&&(n.selectAll("path").remove(),n.selectAll("text").remove())}for(var i=0;i<t.shapes.length;i++)t.shapes[i].visible===!0&&Zue(e,i)}function aP(e){return!!e._fullLayout._outlining}function sP(e){return!e._context.edits.shapePosition}function Zue(e,t){e._fullLayout._paperdiv.selectAll('.shapelayer [data-index="'+t+'"]').remove();var r=Ep.makeShapesOptionsAndPlotinfo(e,t),n=r.options,i=r.plotinfo;if(!n._input||n.visible!==!0)return;if(n.layer==="above")o(e._fullLayout._shapeUpperLayer);else if(n.xref==="paper"||n.yref==="paper")o(e._fullLayout._shapeLowerLayer);else if(n.layer==="between")o(i.shapelayerBetween);else if(i._hadPlotinfo){var a=i.mainplotinfo||i;o(a.shapelayer)}else o(e._fullLayout._shapeLowerLayer);function o(s){var l=eN(e,n),u={"data-index":t,"fill-rule":n.fillrule,d:l},c=n.opacity,f=n.fillcolor,h=n.line.width?n.line.color:"rgba(0,0,0,0)",d=n.line.width,v=n.line.dash;!d&&n.editable===!0&&(d=5,v="solid");var _=l[l.length-1]!=="Z",b=sP(e)&&n.editable&&e._fullLayout._activeShapeIndex===t;b&&(f=_?"rgba(0,0,0,0)":e._fullLayout.activeshape.fillcolor,c=e._fullLayout.activeshape.opacity);var p=s.append("g").classed("shape-group",!0).attr({"data-index":t}),k=p.append("path").attr(u).style("opacity",c).call(QB.stroke,h).call(QB.fill,f).call(tN.dashLine,v,d);Yue(p,e,n),oP(e,t,n,p);var E;if((b||e._context.edits.shapePosition)&&(E=qct(e.layout,"shapes",n)),b){k.style({cursor:"move"});var S={element:k.node(),plotinfo:i,gd:e,editHelpers:E,hasText:n.label.text||n.label.texttemplate,isActiveShape:!0},L=zct(l,e);Oct(L,k,S)}else e._context.edits.shapePosition?Bct(e,k,n,t,s,E):n.editable===!0&&k.style("pointer-events",_||QB.opacity(f)*c<=.5?"stroke":"all");k.node().addEventListener("click",function(){return Nct(e,k)})}}function Yue(e,t,r){var n=(r.xref+r.yref).replace(/paper/g,"").replace(/[xyz][0-9]* *domain/g,"");tN.setClipUrl(e,n?"clip"+t._fullLayout._uid+n:null,t)}function Bct(e,t,r,n,i,a){var o=10,s=10,l=r.xsizemode==="pixel",u=r.ysizemode==="pixel",c=r.type==="line",f=r.type==="path",h=a.modifyItem,d,v,_,b,p,k,E,S,L,x,C,M,g,P,T,z=Fct.select(t.node().parentNode),O=Y3.getFromId(e,r.xref),V=Y3.getRefType(r.xref),G=Y3.getFromId(e,r.yref),Z=Y3.getRefType(r.yref),j=r.x0shift,N=r.x1shift,H=r.y0shift,te=r.y1shift,oe=function(st,lt){var Gt=Ep.getDataToPixel(e,O,lt,!1,V);return Gt(st)},_e=function(st,lt){var Gt=Ep.getDataToPixel(e,G,lt,!0,Z);return Gt(st)},Ee=Ep.getPixelToData(e,O,!1,V),Ce=Ep.getPixelToData(e,G,!0,Z),me=Le(),ie={element:me.node(),gd:e,prepFn:Pe,doneFn:ge,clickFn:Re},Se;Vue.init(ie),me.node().onmousemove=Fe;function Le(){return c?Ae():t}function Ae(){var st=10,lt=Math.max(r.line.width,st),Gt=i.append("g").attr("data-index",n).attr("drag-helper",!0);Gt.append("path").attr("d",t.attr("d")).style({cursor:"move","stroke-width":lt,"stroke-opacity":"0"});var Nt={"fill-opacity":"0"},Jt=Math.max(lt/2,st);return Gt.append("circle").attr({"data-line-point":"start-point",cx:l?oe(r.xanchor)+r.x0:oe(r.x0,j),cy:u?_e(r.yanchor)-r.y0:_e(r.y0,H),r:Jt}).style(Nt).classed("cursor-grab",!0),Gt.append("circle").attr({"data-line-point":"end-point",cx:l?oe(r.xanchor)+r.x1:oe(r.x1,N),cy:u?_e(r.yanchor)-r.y1:_e(r.y1,te),r:Jt}).style(Nt).classed("cursor-grab",!0),Gt}function Fe(st){if(aP(e)){Se=null;return}if(c)st.target.tagName==="path"?Se="move":Se=st.target.attributes["data-line-point"].value==="start-point"?"resize-over-start-point":"resize-over-end-point";else{var lt=ie.element.getBoundingClientRect(),Gt=lt.right-lt.left,Nt=lt.bottom-lt.top,Jt=st.clientX-lt.left,sr=st.clientY-lt.top,wr=!f&&Gt>o&&Nt>s&&!st.shiftKey?Vue.getCursor(Jt/Gt,1-sr/Nt):"move";Gue(t,wr),Se=wr.split("-")[0]}}function Pe(st){aP(e)||(l&&(p=oe(r.xanchor)),u&&(k=_e(r.yanchor)),r.type==="path"?T=r.path:(d=l?r.x0:oe(r.x0),v=u?r.y0:_e(r.y0),_=l?r.x1:oe(r.x1),b=u?r.y1:_e(r.y1)),d<_?(L=d,g="x0",x=_,P="x1"):(L=_,g="x1",x=d,P="x0"),!u&&v<b||u&&v>b?(E=v,C="y0",S=b,M="y1"):(E=b,C="y1",S=v,M="y0"),Fe(st),ut(i,r),Zt(t,r,e),ie.moveFn=Se==="move"?ce:Ze,ie.altKey=st.altKey)}function ge(){aP(e)||(Gue(t),pt(i),Yue(t,e,r),Wue.call("_guiRelayout",e,a.getUpdateObj()))}function Re(){aP(e)||pt(i)}function ce(st,lt){if(r.type==="path"){var Gt=function(sr){return sr},Nt=Gt,Jt=Gt;l?h("xanchor",r.xanchor=Ee(p+st)):(Nt=function(wr){return Ee(oe(wr)+st)},O&&O.type==="date"&&(Nt=Ep.encodeDate(Nt))),u?h("yanchor",r.yanchor=Ce(k+lt)):(Jt=function(wr){return Ce(_e(wr)+lt)},G&&G.type==="date"&&(Jt=Ep.encodeDate(Jt))),h("path",r.path=Hue(T,Nt,Jt))}else l?h("xanchor",r.xanchor=Ee(p+st)):(h("x0",r.x0=Ee(d+st)),h("x1",r.x1=Ee(_+st))),u?h("yanchor",r.yanchor=Ce(k+lt)):(h("y0",r.y0=Ce(v+lt)),h("y1",r.y1=Ce(b+lt)));t.attr("d",eN(e,r)),ut(i,r),oP(e,n,r,z)}function Ze(st,lt){if(f){var Gt=function(Lr){return Lr},Nt=Gt,Jt=Gt;l?h("xanchor",r.xanchor=Ee(p+st)):(Nt=function(ti){return Ee(oe(ti)+st)},O&&O.type==="date"&&(Nt=Ep.encodeDate(Nt))),u?h("yanchor",r.yanchor=Ce(k+lt)):(Jt=function(ti){return Ce(_e(ti)+lt)},G&&G.type==="date"&&(Jt=Ep.encodeDate(Jt))),h("path",r.path=Hue(T,Nt,Jt))}else if(c){if(Se==="resize-over-start-point"){var sr=d+st,wr=u?v-lt:v+lt;h("x0",r.x0=l?sr:Ee(sr)),h("y0",r.y0=u?wr:Ce(wr))}else if(Se==="resize-over-end-point"){var cr=_+st,$e=u?b-lt:b+lt;h("x1",r.x1=l?cr:Ee(cr)),h("y1",r.y1=u?$e:Ce($e))}}else{var St=function(Lr){return Se.indexOf(Lr)!==-1},Qt=St("n"),Vt=St("s"),_t=St("w"),It=St("e"),mt=Qt?E+lt:E,er=Vt?S+lt:S,lr=_t?L+st:L,Tr=It?x+st:x;u&&(Qt&&(mt=E-lt),Vt&&(er=S-lt)),(!u&&er-mt>s||u&&mt-er>s)&&(h(C,r[C]=u?mt:Ce(mt)),h(M,r[M]=u?er:Ce(er))),Tr-lr>o&&(h(g,r[g]=l?lr:Ee(lr)),h(P,r[P]=l?Tr:Ee(Tr)))}t.attr("d",eN(e,r)),ut(i,r),oP(e,n,r,z)}function ut(st,lt){(l||u)&&Gt();function Gt(){var Nt=lt.type!=="path",Jt=st.selectAll(".visual-cue").data([0]),sr=1;Jt.enter().append("path").attr({fill:"#fff","fill-rule":"evenodd",stroke:"#000","stroke-width":sr}).classed("visual-cue",!0);var wr=oe(l?lt.xanchor:Uue.midRange(Nt?[lt.x0,lt.x1]:Ep.extractPathCoords(lt.path,qb.paramIsX))),cr=_e(u?lt.yanchor:Uue.midRange(Nt?[lt.y0,lt.y1]:Ep.extractPathCoords(lt.path,qb.paramIsY)));if(wr=Ep.roundPositionForSharpStrokeRendering(wr,sr),cr=Ep.roundPositionForSharpStrokeRendering(cr,sr),l&&u){var $e="M"+(wr-1-sr)+","+(cr-1-sr)+"h-8v2h8 v8h2v-8 h8v-2h-8 v-8h-2 Z";Jt.attr("d",$e)}else if(l){var St="M"+(wr-1-sr)+","+(cr-9-sr)+"v18 h2 v-18 Z";Jt.attr("d",St)}else{var Qt="M"+(wr-9-sr)+","+(cr-1-sr)+"h18 v2 h-18 Z";Jt.attr("d",Qt)}}}function pt(st){st.selectAll(".visual-cue").remove()}function Zt(st,lt,Gt){var Nt=lt.xref,Jt=lt.yref,sr=Y3.getFromId(Gt,Nt),wr=Y3.getFromId(Gt,Jt),cr="";Nt!=="paper"&&!sr.autorange&&(cr+=Nt),Jt!=="paper"&&!wr.autorange&&(cr+=Jt),tN.setClipUrl(st,cr?"clip"+Gt._fullLayout._uid+cr:null,Gt)}}function Hue(e,t,r){return e.replace(qb.segmentRE,function(n){var i=0,a=n.charAt(0),o=qb.paramIsX[a],s=qb.paramIsY[a],l=qb.numParams[a],u=n.slice(1).replace(qb.paramRE,function(c){return i>=l||(o[i]?c=t(c):s[i]&&(c=r(c)),i++),c});return a+u})}function Nct(e,t){if(sP(e)){var r=t.node(),n=+r.getAttribute("data-index");if(n>=0){if(n===e._fullLayout._activeShapeIndex){jue(e);return}e._fullLayout._activeShapeIndex=n,e._fullLayout._deactivateShape=jue,rN(e)}}}function jue(e){if(sP(e)){var t=e._fullLayout._activeShapeIndex;t>=0&&(Xue(e),delete e._fullLayout._activeShapeIndex,rN(e))}}function Uct(e){if(sP(e)){Xue(e);var t=e._fullLayout._activeShapeIndex,r=(e.layout||{}).shapes||[];if(t<r.length){for(var n=[],i=0;i<r.length;i++)i!==t&&n.push(r[i]);return delete e._fullLayout._activeShapeIndex,Wue.call("_guiRelayout",e,{shapes:n})}}}});var aN=ye((Oar,ace)=>{"use strict";var A0=qa(),Jue=Mc(),$ue=hf(),Pl=XL(),Vct=lP().eraseActiveShape,uP=Dr(),tl=uP._,Il=ace.exports={};Il.toImage={name:"toImage",title:function(e){var t=e._context.toImageButtonOptions||{},r=t.format||"png";return r==="png"?tl(e,"Download plot as a PNG"):tl(e,"Download plot")},icon:Pl.camera,click:function(e){var t=e._context.toImageButtonOptions,r={format:t.format||"png"};uP.notifier(tl(e,"Taking snapshot - this may take a few seconds"),"long"),["filename","width","height","scale"].forEach(function(n){n in t&&(r[n]=t[n])}),A0.call("downloadImage",e,r).then(function(n){uP.notifier(tl(e,"Snapshot succeeded")+" - "+n,"long")}).catch(function(){uP.notifier(tl(e,"Sorry, there was a problem downloading your snapshot!"),"long")})}};Il.sendDataToCloud={name:"sendDataToCloud",title:function(e){return tl(e,"Edit in Chart Studio")},icon:Pl.disk,click:function(e){Jue.sendDataToCloud(e)}};Il.editInChartStudio={name:"editInChartStudio",title:function(e){return tl(e,"Edit in Chart Studio")},icon:Pl.pencil,click:function(e){Jue.sendDataToCloud(e)}};Il.zoom2d={name:"zoom2d",_cat:"zoom",title:function(e){return tl(e,"Zoom")},attr:"dragmode",val:"zoom",icon:Pl.zoombox,click:Bv};Il.pan2d={name:"pan2d",_cat:"pan",title:function(e){return tl(e,"Pan")},attr:"dragmode",val:"pan",icon:Pl.pan,click:Bv};Il.select2d={name:"select2d",_cat:"select",title:function(e){return tl(e,"Box Select")},attr:"dragmode",val:"select",icon:Pl.selectbox,click:Bv};Il.lasso2d={name:"lasso2d",_cat:"lasso",title:function(e){return tl(e,"Lasso Select")},attr:"dragmode",val:"lasso",icon:Pl.lasso,click:Bv};Il.drawclosedpath={name:"drawclosedpath",title:function(e){return tl(e,"Draw closed freeform")},attr:"dragmode",val:"drawclosedpath",icon:Pl.drawclosedpath,click:Bv};Il.drawopenpath={name:"drawopenpath",title:function(e){return tl(e,"Draw open freeform")},attr:"dragmode",val:"drawopenpath",icon:Pl.drawopenpath,click:Bv};Il.drawline={name:"drawline",title:function(e){return tl(e,"Draw line")},attr:"dragmode",val:"drawline",icon:Pl.drawline,click:Bv};Il.drawrect={name:"drawrect",title:function(e){return tl(e,"Draw rectangle")},attr:"dragmode",val:"drawrect",icon:Pl.drawrect,click:Bv};Il.drawcircle={name:"drawcircle",title:function(e){return tl(e,"Draw circle")},attr:"dragmode",val:"drawcircle",icon:Pl.drawcircle,click:Bv};Il.eraseshape={name:"eraseshape",title:function(e){return tl(e,"Erase active shape")},icon:Pl.eraseshape,click:Vct};Il.zoomIn2d={name:"zoomIn2d",_cat:"zoomin",title:function(e){return tl(e,"Zoom in")},attr:"zoom",val:"in",icon:Pl.zoom_plus,click:Bv};Il.zoomOut2d={name:"zoomOut2d",_cat:"zoomout",title:function(e){return tl(e,"Zoom out")},attr:"zoom",val:"out",icon:Pl.zoom_minus,click:Bv};Il.autoScale2d={name:"autoScale2d",_cat:"autoscale",title:function(e){return tl(e,"Autoscale")},attr:"zoom",val:"auto",icon:Pl.autoscale,click:Bv};Il.resetScale2d={name:"resetScale2d",_cat:"resetscale",title:function(e){return tl(e,"Reset axes")},attr:"zoom",val:"reset",icon:Pl.home,click:Bv};Il.hoverClosestCartesian={name:"hoverClosestCartesian",_cat:"hoverclosest",title:function(e){return tl(e,"Show closest data on hover")},attr:"hovermode",val:"closest",icon:Pl.tooltip_basic,gravity:"ne",click:Bv};Il.hoverCompareCartesian={name:"hoverCompareCartesian",_cat:"hoverCompare",title:function(e){return tl(e,"Compare data on hover")},attr:"hovermode",val:function(e){return e._fullLayout._isHoriz?"y":"x"},icon:Pl.tooltip_compare,gravity:"ne",click:Bv};function Bv(e,t){var r=t.currentTarget,n=r.getAttribute("data-attr"),i=r.getAttribute("data-val")||!0,a=e._fullLayout,o={},s=$ue.list(e,null,!0),l=a._cartesianSpikesEnabled,u,c;if(n==="zoom"){var f=i==="in"?.5:2,h=(1+f)/2,d=(1-f)/2,v,_;for(c=0;c<s.length;c++)if(u=s[c],_=u.modebardisable==="none"||u.modebardisable.indexOf(i==="auto"||i==="reset"?"autoscale":"zoominout")===-1,_&&!u.fixedrange)if(v=u._name,i==="auto")o[v+".autorange"]=!0;else if(i==="reset")u._rangeInitial0===void 0&&u._rangeInitial1===void 0?o[v+".autorange"]=!0:u._rangeInitial0===void 0?(o[v+".autorange"]=u._autorangeInitial,o[v+".range"]=[null,u._rangeInitial1]):u._rangeInitial1===void 0?(o[v+".range"]=[u._rangeInitial0,null],o[v+".autorange"]=u._autorangeInitial):o[v+".range"]=[u._rangeInitial0,u._rangeInitial1],u._showSpikeInitial!==void 0&&(o[v+".showspikes"]=u._showSpikeInitial,l==="on"&&!u._showSpikeInitial&&(l="off"));else{var b=[u.r2l(u.range[0]),u.r2l(u.range[1])],p=[h*b[0]+d*b[1],h*b[1]+d*b[0]];o[v+".range[0]"]=u.l2r(p[0]),o[v+".range[1]"]=u.l2r(p[1])}}else n==="hovermode"&&(i==="x"||i==="y")&&(i=a._isHoriz?"y":"x",r.setAttribute("data-val",i)),o[n]=i;a._cartesianSpikesEnabled=l,A0.call("_guiRelayout",e,o)}Il.zoom3d={name:"zoom3d",_cat:"zoom",title:function(e){return tl(e,"Zoom")},attr:"scene.dragmode",val:"zoom",icon:Pl.zoombox,click:cP};Il.pan3d={name:"pan3d",_cat:"pan",title:function(e){return tl(e,"Pan")},attr:"scene.dragmode",val:"pan",icon:Pl.pan,click:cP};Il.orbitRotation={name:"orbitRotation",title:function(e){return tl(e,"Orbital rotation")},attr:"scene.dragmode",val:"orbit",icon:Pl["3d_rotate"],click:cP};Il.tableRotation={name:"tableRotation",title:function(e){return tl(e,"Turntable rotation")},attr:"scene.dragmode",val:"turntable",icon:Pl["z-axis"],click:cP};function cP(e,t){for(var r=t.currentTarget,n=r.getAttribute("data-attr"),i=r.getAttribute("data-val")||!0,a=e._fullLayout._subplots.gl3d||[],o={},s=n.split("."),l=0;l<a.length;l++)o[a[l]+"."+s[1]]=i;var u=i==="pan"?i:"zoom";o.dragmode=u,A0.call("_guiRelayout",e,o)}Il.resetCameraDefault3d={name:"resetCameraDefault3d",_cat:"resetCameraDefault",title:function(e){return tl(e,"Reset camera to default")},attr:"resetDefault",icon:Pl.home,click:iN};Il.resetCameraLastSave3d={name:"resetCameraLastSave3d",_cat:"resetCameraLastSave",title:function(e){return tl(e,"Reset camera to last save")},attr:"resetLastSave",icon:Pl.movie,click:iN};function iN(e,t){for(var r=t.currentTarget,n=r.getAttribute("data-attr"),i=n==="resetLastSave",a=n==="resetDefault",o=e._fullLayout,s=o._subplots.gl3d||[],l={},u=0;u<s.length;u++){var c=s[u],f=c+".camera",h=c+".aspectratio",d=c+".aspectmode",v=o[c]._scene,_;i?(l[f+".up"]=v.viewInitial.up,l[f+".eye"]=v.viewInitial.eye,l[f+".center"]=v.viewInitial.center,_=!0):a&&(l[f+".up"]=null,l[f+".eye"]=null,l[f+".center"]=null,_=!0),_&&(l[h+".x"]=v.viewInitial.aspectratio.x,l[h+".y"]=v.viewInitial.aspectratio.y,l[h+".z"]=v.viewInitial.aspectratio.z,l[d]=v.viewInitial.aspectmode)}A0.call("_guiRelayout",e,l)}Il.hoverClosest3d={name:"hoverClosest3d",_cat:"hoverclosest",title:function(e){return tl(e,"Toggle show closest data on hover")},attr:"hovermode",val:null,toggle:!0,icon:Pl.tooltip_basic,gravity:"ne",click:Gct};function Que(e,t){var r=t.currentTarget,n=r._previousVal,i=e._fullLayout,a=i._subplots.gl3d||[],o=["xaxis","yaxis","zaxis"],s={},l={};if(n)l=n,r._previousVal=null;else{for(var u=0;u<a.length;u++){var c=a[u],f=i[c],h=c+".hovermode";s[h]=f.hovermode,l[h]=!1;for(var d=0;d<3;d++){var v=o[d],_=c+"."+v+".showspikes";l[_]=!1,s[_]=f[v].showspikes}}r._previousVal=s}return l}function Gct(e,t){var r=Que(e,t);A0.call("_guiRelayout",e,r)}Il.zoomInGeo={name:"zoomInGeo",_cat:"zoomin",title:function(e){return tl(e,"Zoom in")},attr:"zoom",val:"in",icon:Pl.zoom_plus,click:nN};Il.zoomOutGeo={name:"zoomOutGeo",_cat:"zoomout",title:function(e){return tl(e,"Zoom out")},attr:"zoom",val:"out",icon:Pl.zoom_minus,click:nN};Il.resetGeo={name:"resetGeo",_cat:"reset",title:function(e){return tl(e,"Reset")},attr:"reset",val:null,icon:Pl.autoscale,click:nN};Il.hoverClosestGeo={name:"hoverClosestGeo",_cat:"hoverclosest",title:function(e){return tl(e,"Toggle show closest data on hover")},attr:"hovermode",val:null,toggle:!0,icon:Pl.tooltip_basic,gravity:"ne",click:tce};function nN(e,t){for(var r=t.currentTarget,n=r.getAttribute("data-attr"),i=r.getAttribute("data-val")||!0,a=e._fullLayout,o=a._subplots.geo||[],s=0;s<o.length;s++){var l=o[s],u=a[l];if(n==="zoom"){var c=u.projection.scale,f=i==="in"?2*c:.5*c;A0.call("_guiRelayout",e,l+".projection.scale",f)}}n==="reset"&&K3(e,"geo")}Il.hoverClosestPie={name:"hoverClosestPie",_cat:"hoverclosest",title:function(e){return tl(e,"Toggle show closest data on hover")},attr:"hovermode",val:"closest",icon:Pl.tooltip_basic,gravity:"ne",click:tce};function ece(e){var t=e._fullLayout;return t.hovermode?!1:t._has("cartesian")?t._isHoriz?"y":"x":"closest"}function tce(e){var t=ece(e);A0.call("_guiRelayout",e,"hovermode",t)}Il.resetViewSankey={name:"resetSankeyGroup",title:function(e){return tl(e,"Reset view")},icon:Pl.home,click:function(e){for(var t={"node.groups":[],"node.x":[],"node.y":[]},r=0;r<e._fullData.length;r++){var n=e._fullData[r]._viewInitial;t["node.groups"].push(n.node.groups.slice()),t["node.x"].push(n.node.x.slice()),t["node.y"].push(n.node.y.slice())}A0.call("restyle",e,t)}};Il.toggleHover={name:"toggleHover",title:function(e){return tl(e,"Toggle show closest data on hover")},attr:"hovermode",val:null,toggle:!0,icon:Pl.tooltip_basic,gravity:"ne",click:function(e,t){var r=Que(e,t);r.hovermode=ece(e),A0.call("_guiRelayout",e,r)}};Il.resetViews={name:"resetViews",title:function(e){return tl(e,"Reset views")},icon:Pl.home,click:function(e,t){var r=t.currentTarget;r.setAttribute("data-attr","zoom"),r.setAttribute("data-val","reset"),Bv(e,t),r.setAttribute("data-attr","resetLastSave"),iN(e,t),K3(e,"geo"),K3(e,"mapbox"),K3(e,"map")}};Il.toggleSpikelines={name:"toggleSpikelines",title:function(e){return tl(e,"Toggle Spike Lines")},icon:Pl.spikeline,attr:"_cartesianSpikesEnabled",val:"on",click:function(e){var t=e._fullLayout,r=t._cartesianSpikesEnabled;t._cartesianSpikesEnabled=r==="on"?"off":"on",A0.call("_guiRelayout",e,Hct(e))}};function Hct(e){for(var t=e._fullLayout,r=t._cartesianSpikesEnabled==="on",n=$ue.list(e,null,!0),i={},a=0;a<n.length;a++){var o=n[a];i[o._name+".showspikes"]=r?!0:o._showSpikeInitial}return i}Il.resetViewMapbox={name:"resetViewMapbox",_cat:"resetView",title:function(e){return tl(e,"Reset view")},attr:"reset",icon:Pl.home,click:function(e){K3(e,"mapbox")}};Il.resetViewMap={name:"resetViewMap",_cat:"resetView",title:function(e){return tl(e,"Reset view")},attr:"reset",icon:Pl.home,click:function(e){K3(e,"map")}};Il.zoomInMapbox={name:"zoomInMapbox",_cat:"zoomin",title:function(e){return tl(e,"Zoom in")},attr:"zoom",val:"in",icon:Pl.zoom_plus,click:rce};Il.zoomInMap={name:"zoomInMap",_cat:"zoomin",title:function(e){return tl(e,"Zoom in")},attr:"zoom",val:"in",icon:Pl.zoom_plus,click:ice};Il.zoomOutMapbox={name:"zoomOutMapbox",_cat:"zoomout",title:function(e){return tl(e,"Zoom out")},attr:"zoom",val:"out",icon:Pl.zoom_minus,click:rce};Il.zoomOutMap={name:"zoomOutMap",_cat:"zoomout",title:function(e){return tl(e,"Zoom out")},attr:"zoom",val:"out",icon:Pl.zoom_minus,click:ice};function rce(e,t){nce(e,t,"mapbox")}function ice(e,t){nce(e,t,"map")}function nce(e,t,r){for(var n=t.currentTarget,i=n.getAttribute("data-val"),a=e._fullLayout,o=a._subplots[r]||[],s=1.05,l={},u=0;u<o.length;u++){var c=o[u],f=a[c].zoom,h=i==="in"?s*f:f/s;l[c+".zoom"]=h}A0.call("_guiRelayout",e,l)}function K3(e,t){for(var r=e._fullLayout,n=r._subplots[t]||[],i={},a=0;a<n.length;a++)for(var o=n[a],s=r[o]._subplot,l=s.viewInitial,u=Object.keys(l),c=0;c<u.length;c++){var f=u[c];i[o+"."+f]=l[f]}A0.call("_guiRelayout",e,i)}});var oN=ye((qar,uce)=>{"use strict";var oce=aN(),jct=Object.keys(oce),sce=["drawline","drawopenpath","drawclosedpath","drawcircle","drawrect","eraseshape"],lce=["v1hovermode","hoverclosest","hovercompare","togglehover","togglespikelines"].concat(sce),J3=[],Wct=function(e){if(lce.indexOf(e._cat||e.name)===-1){var t=e.name,r=(e._cat||e.name).toLowerCase();J3.indexOf(t)===-1&&J3.push(t),J3.indexOf(r)===-1&&J3.push(r)}};jct.forEach(function(e){Wct(oce[e])});J3.sort();uce.exports={DRAW_MODES:sce,backButtons:lce,foreButtons:J3}});var sN=ye((Nar,cce)=>{"use strict";var Bar=oN();cce.exports={editType:"modebar",orientation:{valType:"enumerated",values:["v","h"],dflt:"h",editType:"modebar"},bgcolor:{valType:"color",editType:"modebar"},color:{valType:"color",editType:"modebar"},activecolor:{valType:"color",editType:"modebar"},uirevision:{valType:"any",editType:"none"},add:{valType:"string",arrayOk:!0,dflt:"",editType:"modebar"},remove:{valType:"string",arrayOk:!0,dflt:"",editType:"modebar"}}});var hce=ye((Uar,fce)=>{"use strict";var Xct=Dr(),yM=ka(),Zct=vl(),Yct=sN();fce.exports=function(t,r){var n=t.modebar||{},i=Zct.newContainer(r,"modebar");function a(s,l){return Xct.coerce(n,i,Yct,s,l)}a("orientation"),a("bgcolor",yM.addOpacity(r.paper_bgcolor,.5));var o=yM.contrast(yM.rgb(r.modebar.bgcolor));a("color",yM.addOpacity(o,.3)),a("activecolor",yM.addOpacity(o,.7)),a("uirevision",r.uirevision),a("add"),a("remove")}});var gce=ye((Var,pce)=>{"use strict";var lN=Oa(),Kct=Eo(),fP=Dr(),dce=XL(),Jct=l6().version,$ct=new DOMParser;function vce(e){this.container=e.container,this.element=document.createElement("div"),this.update(e.graphInfo,e.buttons),this.container.appendChild(this.element)}var Tm=vce.prototype;Tm.update=function(e,t){this.graphInfo=e;var r=this.graphInfo._context,n=this.graphInfo._fullLayout,i="modebar-"+n._uid;this.element.setAttribute("id",i),this.element.setAttribute("role","toolbar"),this._uid=i,this.element.className="modebar modebar--custom",r.displayModeBar==="hover"&&(this.element.className+=" modebar--hover ease-bg"),n.modebar.orientation==="v"&&(this.element.className+=" vertical",t=t.reverse());var a=n.modebar,o="#"+i+" .modebar-group";document.querySelectorAll(o).forEach(function(f){f.style.backgroundColor=a.bgcolor});var s=!this.hasButtons(t),l=this.hasLogo!==r.displaylogo,u=this.locale!==r.locale;if(this.locale=r.locale,(s||l||u)&&(this.removeAllButtons(),this.updateButtons(t),r.watermark||r.displaylogo)){var c=this.getLogo();r.watermark&&(c.className=c.className+" watermark"),n.modebar.orientation==="v"?this.element.insertBefore(c,this.element.childNodes[0]):this.element.appendChild(c),this.hasLogo=!0}this.updateActiveButton(),fP.setStyleOnHover("#"+i+" .modebar-btn",".active",".icon path","fill: "+a.activecolor,"fill: "+a.color,this.element)};Tm.updateButtons=function(e){var t=this;this.buttons=e,this.buttonElements=[],this.buttonsNames=[],this.buttons.forEach(function(r){var n=t.createGroup();r.forEach(function(i){var a=i.name;if(!a)throw new Error("must provide button 'name' in button config");if(t.buttonsNames.indexOf(a)!==-1)throw new Error("button name '"+a+"' is taken");t.buttonsNames.push(a);var o=t.createButton(i);t.buttonElements.push(o),n.appendChild(o)}),t.element.appendChild(n)})};Tm.createGroup=function(){var e=document.createElement("div");e.className="modebar-group";var t=this.graphInfo._fullLayout.modebar;return e.style.backgroundColor=t.bgcolor,e};Tm.createButton=function(e){var t=this,r=document.createElement("button");r.setAttribute("type","button"),r.setAttribute("rel","tooltip"),r.className="modebar-btn";var n=e.title;n===void 0?n=e.name:typeof n=="function"&&(n=n(this.graphInfo)),(n||n===0)&&(r.setAttribute("data-title",n),r.setAttribute("aria-label",n)),e.attr!==void 0&&r.setAttribute("data-attr",e.attr);var i=e.val;i!==void 0&&(typeof i=="function"&&(i=i(this.graphInfo)),r.setAttribute("data-val",i));var a=e.click;if(typeof a!="function")throw new Error("must provide button 'click' function in button config");r.addEventListener("click",function(s){e.click(t.graphInfo,s),t.updateActiveButton(s.currentTarget)}),r.setAttribute("data-toggle",e.toggle||!1),e.toggle&&lN.select(r).classed("active",!0);var o=e.icon;return typeof o=="function"?r.appendChild(o()):r.appendChild(this.createIcon(o||dce.question)),r.setAttribute("data-gravity",e.gravity||"n"),r};Tm.createIcon=function(e){var t=Kct(e.height)?Number(e.height):e.ascent-e.descent,r="http://www.w3.org/2000/svg",n;if(e.path){n=document.createElementNS(r,"svg"),n.setAttribute("viewBox",[0,0,e.width,t].join(" ")),n.setAttribute("class","icon");var i=document.createElementNS(r,"path");i.setAttribute("d",e.path),e.transform?i.setAttribute("transform",e.transform):e.ascent!==void 0&&i.setAttribute("transform","matrix(1 0 0 -1 0 "+e.ascent+")"),n.appendChild(i)}if(e.svg){var a=$ct.parseFromString(e.svg,"application/xml");n=a.childNodes[0]}return n.setAttribute("height","1em"),n.setAttribute("width","1em"),n};Tm.updateActiveButton=function(e){var t=this.graphInfo._fullLayout,r=e!==void 0?e.getAttribute("data-attr"):null;this.buttonElements.forEach(function(n){var i=n.getAttribute("data-val")||!0,a=n.getAttribute("data-attr"),o=n.getAttribute("data-toggle")==="true",s=lN.select(n),l=function(f,h){var d=t.modebar,v=f.querySelector(".icon path");v&&(h||f.matches(":hover")?v.style.fill=d.activecolor:v.style.fill=d.color)};if(o){if(a===r){var u=!s.classed("active");s.classed("active",u),l(n,u)}}else{var c=a===null?a:fP.nestedProperty(t,a).get();s.classed("active",c===i),l(n,c===i)}})};Tm.hasButtons=function(e){var t=this.buttons;if(!t||e.length!==t.length)return!1;for(var r=0;r<e.length;++r){if(e[r].length!==t[r].length)return!1;for(var n=0;n<e[r].length;n++)if(e[r][n].name!==t[r][n].name)return!1}return!0};function Qct(e){return e+" (v"+Jct+")"}Tm.getLogo=function(){var e=this.createGroup(),t=document.createElement("a");return t.href="https://plotly.com/",t.target="_blank",t.setAttribute("data-title",Qct(fP._(this.graphInfo,"Produced with Plotly.js"))),t.className="modebar-btn plotlyjsicon modebar-btn--logo",t.appendChild(this.createIcon(dce.newplotlylogo)),e.appendChild(t),e};Tm.removeAllButtons=function(){for(;this.element.firstChild;)this.element.removeChild(this.element.firstChild);this.hasLogo=!1};Tm.destroy=function(){fP.removeElement(this.container.querySelector(".modebar"))};function eft(e,t){var r=e._fullLayout,n=new vce({graphInfo:e,container:r._modebardiv.node(),buttons:t});return r._privateplot&&lN.select(n.element).append("span").classed("badge-private float--left",!0).text("PRIVATE"),n}pce.exports=eft});var _ce=ye((Gar,yce)=>{"use strict";var tft=hf(),mce=Ru(),uN=qa(),rft=ip().isUnifiedHover,ift=gce(),hP=aN(),nft=oN().DRAW_MODES,aft=Dr().extendDeep;yce.exports=function(t){var r=t._fullLayout,n=t._context,i=r._modeBar;if(!n.displayModeBar&&!n.watermark){i&&(i.destroy(),delete r._modeBar);return}if(!Array.isArray(n.modeBarButtonsToRemove))throw new Error(["*modeBarButtonsToRemove* configuration options","must be an array."].join(" "));if(!Array.isArray(n.modeBarButtonsToAdd))throw new Error(["*modeBarButtonsToAdd* configuration options","must be an array."].join(" "));var a=n.modeBarButtons,o;Array.isArray(a)&&a.length?o=fft(a):!n.displayModeBar&&n.watermark?o=[]:o=oft(t),i?i.update(t,o):r._modeBar=ift(t,o)};function oft(e){var t=e._fullLayout,r=e._fullData,n=e._context;function i(N,H){if(typeof H=="string"){if(H.toLowerCase()===N.toLowerCase())return!0}else{var te=H.name,oe=H._cat||H.name;if(te===N||oe===N.toLowerCase())return!0}return!1}var a=t.modebar.add;typeof a=="string"&&(a=[a]);var o=t.modebar.remove;typeof o=="string"&&(o=[o]);var s=n.modeBarButtonsToAdd.concat(a.filter(function(N){for(var H=0;H<n.modeBarButtonsToRemove.length;H++)if(i(N,n.modeBarButtonsToRemove[H]))return!1;return!0})),l=n.modeBarButtonsToRemove.concat(o.filter(function(N){for(var H=0;H<n.modeBarButtonsToAdd.length;H++)if(i(N,n.modeBarButtonsToAdd[H]))return!1;return!0})),u=t._has("cartesian"),c=t._has("gl3d"),f=t._has("geo"),h=t._has("pie"),d=t._has("funnelarea"),v=t._has("ternary"),_=t._has("mapbox"),b=t._has("map"),p=t._has("polar"),k=t._has("smith"),E=t._has("sankey"),S=sft(t),L=rft(t.hovermode),x=[];function C(N){if(N.length){for(var H=[],te=0;te<N.length;te++){for(var oe=N[te],_e=hP[oe],Ee=_e.name.toLowerCase(),Ce=(_e._cat||_e.name).toLowerCase(),me=!1,ie=0;ie<l.length;ie++){var Se=l[ie].toLowerCase();if(Se===Ee||Se===Ce){me=!0;break}}me||H.push(hP[oe])}x.push(H)}}var M=["toImage"];n.showEditInChartStudio?M.push("editInChartStudio"):n.showSendToCloud&&M.push("sendDataToCloud"),C(M);var g=[],P=[],T=[],z=[];(u||h||d||v)+f+c+_+b+p+k>1?(P=["toggleHover"],T=["resetViews"]):f?(g=["zoomInGeo","zoomOutGeo"],P=["hoverClosestGeo"],T=["resetGeo"]):c?(P=["hoverClosest3d"],T=["resetCameraDefault3d","resetCameraLastSave3d"]):_?(g=["zoomInMapbox","zoomOutMapbox"],P=["toggleHover"],T=["resetViewMapbox"]):b?(g=["zoomInMap","zoomOutMap"],P=["toggleHover"],T=["resetViewMap"]):h?P=["hoverClosestPie"]:E?(P=["hoverClosestCartesian","hoverCompareCartesian"],T=["resetViewSankey"]):P=["toggleHover"],u&&P.push("toggleSpikelines","hoverClosestCartesian","hoverCompareCartesian"),(uft(r)||L)&&(P=[]),u&&!S&&(g=["zoomIn2d","zoomOut2d","autoScale2d"],T[0]!=="resetViews"&&(T=["resetScale2d"])),c?z=["zoom3d","pan3d","orbitRotation","tableRotation"]:u&&!S||v?z=["zoom2d","pan2d"]:_||b||f?z=["pan2d"]:p&&(z=["zoom2d"]),lft(r)&&z.push("select2d","lasso2d");var O=[],V=function(N){O.indexOf(N)===-1&&P.indexOf(N)!==-1&&O.push(N)};if(Array.isArray(s)){for(var G=[],Z=0;Z<s.length;Z++){var j=s[Z];typeof j=="string"?(j=j.toLowerCase(),nft.indexOf(j)!==-1?(t._has("mapbox")||t._has("map")||t._has("cartesian"))&&z.push(j):j==="togglespikelines"?V("toggleSpikelines"):j==="togglehover"?V("toggleHover"):j==="hovercompare"?V("hoverCompareCartesian"):j==="hoverclosest"?(V("hoverClosestCartesian"),V("hoverClosestGeo"),V("hoverClosest3d"),V("hoverClosestPie")):j==="v1hovermode"&&(V("hoverClosestCartesian"),V("hoverCompareCartesian"),V("hoverClosestGeo"),V("hoverClosest3d"),V("hoverClosestPie"))):G.push(j)}s=G}return C(z),C(g.concat(T)),C(O),cft(x,s)}function sft(e){for(var t=tft.list({_fullLayout:e},null,!0),r=0;r<t.length;r++){var n=t[r].modebardisable;if(!t[r].fixedrange&&n!=="autoscale+zoominout"&&n!=="zoominout+autoscale")return!1}return!0}function lft(e){for(var t=!1,r=0;r<e.length&&!t;r++){var n=e[r];!n._module||!n._module.selectPoints||(uN.traceIs(n,"scatter-like")?(mce.hasMarkers(n)||mce.hasText(n))&&(t=!0):uN.traceIs(n,"box-violin")?(n.boxpoints==="all"||n.points==="all")&&(t=!0):t=!0)}return t}function uft(e){for(var t=0;t<e.length;t++)if(!uN.traceIs(e[t],"noHover"))return!1;return!0}function cft(e,t){if(t.length)if(Array.isArray(t[0]))for(var r=0;r<t.length;r++)e.push(t[r]);else e.push(t);return e}function fft(e){for(var t=aft([],e),r=0;r<t.length;r++)for(var n=t[r],i=0;i<n.length;i++){var a=n[i];if(typeof a=="string")if(hP[a]!==void 0)t[r][i]=hP[a];else throw new Error(["*modeBarButtons* configuration options","invalid button name"].join(" "))}return t}});var cN=ye((Har,xce)=>{"use strict";xce.exports={moduleType:"component",name:"modebar",layoutAttributes:sN(),supplyLayoutDefaults:hce(),manage:_ce()}});var fN=ye((jar,bce)=>{"use strict";var hft=$h().FROM_BL;bce.exports=function(t,r,n){n===void 0&&(n=hft[t.constraintoward||"center"]);var i=[t.r2l(t.range[0]),t.r2l(t.range[1])],a=i[0]+(i[1]-i[0])*n;t.range=t._input.range=[t.l2r(a+(i[0]-a)*r),t.l2r(a+(i[1]-a)*r)],t.setScale()}});var Nb=ye(_M=>{"use strict";var Bb=Dr(),hN=Ag(),kg=hf().id2name,dft=Rd(),wce=fN(),vft=ym(),pft=fs().ALMOST_EQUAL,gft=$h().FROM_BL;_M.handleDefaults=function(e,t,r){var n=r.axIds,i=r.axHasImage,a=t._axisConstraintGroups=[],o=t._axisMatchGroups=[],s,l,u,c,f,h,d,v;for(s=0;s<n.length;s++)c=kg(n[s]),f=e[c],h=t[c],mft(f,h,{axIds:n,layoutOut:t,hasImage:i[c]});function _(M,g){for(s=0;s<M.length;s++){l=M[s];for(u in l)t[kg(u)][g]=l}}for(_(o,"_matchGroup"),s=0;s<a.length;s++){l=a[s];for(u in l)if(h=t[kg(u)],h.fixedrange){for(var b in l){var p=kg(b);(e[p]||{}).fixedrange===!1&&Bb.warn("fixedrange was specified as false for axis "+p+" but was overridden because another axis in its constraint group has fixedrange true"),t[p].fixedrange=!0}break}}for(s=0;s<a.length;){l=a[s];for(u in l){h=t[kg(u)],h._matchGroup&&Object.keys(h._matchGroup).length===Object.keys(l).length&&(a.splice(s,1),s--);break}s++}_(a,"_constraintGroup");var k=["constrain","range","autorange","rangemode","rangebreaks","categoryorder","categoryarray"],E=!1,S=!1;function L(){v=h[d],d==="rangebreaks"&&(S=h._hasDayOfWeekBreaks)}for(s=0;s<o.length;s++){l=o[s];for(var x=0;x<k.length;x++){d=k[x],v=null;var C;for(u in l)if(c=kg(u),f=e[c],h=t[c],d in h){if(!h.matches&&(C=h,d in f)){L();break}v===null&&d in f&&L()}if(d==="range"&&v&&f.range&&f.range.length===2&&f.range[0]!==null&&f.range[1]!==null&&(E=!0),d==="autorange"&&v===null&&E&&(v=!1),v===null&&d in C&&(v=C[d]),v!==null)for(u in l)h=t[kg(u)],h[d]=d==="range"?v.slice():v,d==="rangebreaks"&&(h._hasDayOfWeekBreaks=S,vft(h,t))}}};function mft(e,t,r){var n=r.axIds,i=r.layoutOut,a=r.hasImage,o=i._axisConstraintGroups,s=i._axisMatchGroups,l=t._id,u=l.charAt(0),c=((i._splomAxes||{})[u]||{})[l]||{},f=t._id,h=f.charAt(0)==="x";t._matchGroup=null,t._constraintGroup=null;function d(z,O){return Bb.coerce(e,t,dft,z,O)}d("constrain",a?"domain":"range"),Bb.coerce(e,t,{constraintoward:{valType:"enumerated",values:h?["left","center","right"]:["bottom","middle","top"],dflt:h?"center":"middle"}},"constraintoward");var v=t.type,_,b,p=[];for(_=0;_<n.length;_++)if(b=n[_],b!==f){var k=i[kg(b)];k.type===v&&p.push(b)}var E=Sce(o,f);if(E){var S=[];for(_=0;_<p.length;_++)b=p[_],E[b]||S.push(b);p=S}var L=p.length,x,C;L&&(e.matches||c.matches)&&(x=Bb.coerce(e,t,{matches:{valType:"enumerated",values:p,dflt:p.indexOf(c.matches)!==-1?c.matches:void 0}},"matches"));var M=a&&!h?t.anchor:void 0;if(L&&!x&&(e.scaleanchor||M)&&(C=Bb.coerce(e,t,{scaleanchor:{valType:"enumerated",values:p.concat([!1])}},"scaleanchor",M)),x){t._matchGroup=dN(s,f,x,1);var g=i[kg(x)],P=Tce(i,t)/Tce(i,g);h!==(x.charAt(0)==="x")&&(P=(h?"x":"y")+P),dN(o,f,x,P)}else e.matches&&n.indexOf(e.matches)!==-1&&Bb.warn("ignored "+t._name+'.matches: "'+e.matches+'" to avoid an infinite loop');if(C){var T=d("scaleratio");T||(T=t.scaleratio=1),dN(o,f,C,T)}else e.scaleanchor&&n.indexOf(e.scaleanchor)!==-1&&Bb.warn("ignored "+t._name+'.scaleanchor: "'+e.scaleanchor+'" to avoid either an infinite loop and possibly inconsistent scaleratios, or because this axis declares a *matches* constraint.')}function Tce(e,t){var r=t.domain;return r||(r=e[kg(t.overlaying)].domain),r[1]-r[0]}function Sce(e,t){for(var r=0;r<e.length;r++)if(e[r][t])return e[r];return null}function dN(e,t,r,n){var i,a,o,s,l,u=Sce(e,t);u===null?(u={},u[t]=1,l=e.length,e.push(u)):l=e.indexOf(u);var c=Object.keys(u);for(i=0;i<e.length;i++)if(o=e[i],i!==l&&o[r]){var f=o[r];for(a=0;a<c.length;a++)s=c[a],o[s]=vN(f,vN(n,u[s]));e.splice(l,1);return}if(n!==1)for(a=0;a<c.length;a++){var h=c[a];u[h]=vN(n,u[h])}u[r]=1}function vN(e,t){var r="",n="",i,a;typeof e=="string"&&(r=e.match(/^[xy]*/)[0],i=r.length,e=+e.slice(i)),typeof t=="string"&&(n=t.match(/^[xy]*/)[0],a=n.length,t=+t.slice(a));var o=e*t;return!i&&!a?o:!i||!a||r.charAt(0)===n.charAt(0)?r+n+e*t:i===a?o:(i>a?r.slice(a):n.slice(i))+o}function yft(e,t){for(var r=t._size,n=r.h/r.w,i={},a=Object.keys(e),o=0;o<a.length;o++){var s=a[o],l=e[s];if(typeof l=="string"){var u=l.match(/^[xy]*/)[0],c=u.length;l=+l.slice(c);for(var f=u.charAt(0)==="y"?n:1/n,h=0;h<c;h++)l*=f}i[s]=l}return i}_M.enforce=function(t){var r=t._fullLayout,n=r._axisConstraintGroups||[],i,a,o,s,l,u,c,f;for(i=0;i<n.length;i++){o=yft(n[i],r);var h=Object.keys(o),d=1/0,v=0,_=1/0,b={},p={},k=!1;for(a=0;a<h.length;a++)s=h[a],p[s]=l=r[kg(s)],l._inputDomain?l.domain=l._inputDomain.slice():l._inputDomain=l.domain.slice(),l._inputRange||(l._inputRange=l.range.slice()),l.setScale(),b[s]=u=Math.abs(l._m)/o[s],d=Math.min(d,u),(l.constrain==="domain"||!l._constraintShrinkable)&&(_=Math.min(_,u)),delete l._constraintShrinkable,v=Math.max(v,u),l.constrain==="domain"&&(k=!0);if(!(d>pft*v&&!k)){for(a=0;a<h.length;a++)if(s=h[a],u=b[s],l=p[s],c=l.constrain,u!==_||c==="domain")if(f=u/_,c==="range")wce(l,f);else{var E=l._inputDomain,S=(l.domain[1]-l.domain[0])/(E[1]-E[0]),L=(l.r2l(l.range[1])-l.r2l(l.range[0]))/(l.r2l(l._inputRange[1])-l.r2l(l._inputRange[0]));if(f/=S,f*L<1){l.domain=l._input.domain=E.slice(),wce(l,f);continue}if(L<1&&(l.range=l._input.range=l._inputRange.slice(),f*=L),l.autorange){var x=l.r2l(l.range[0]),C=l.r2l(l.range[1]),M=(x+C)/2,g=M,P=M,T=Math.abs(C-M),z=M-T*f*1.0001,O=M+T*f*1.0001,V=hN.makePadFn(r,l,0),G=hN.makePadFn(r,l,1);Ace(l,f);var Z=Math.abs(l._m),j=hN.concatExtremes(t,l),N=j.min,H=j.max,te,oe;for(oe=0;oe<N.length;oe++)te=N[oe].val-V(N[oe])/Z,te>z&&te<g&&(g=te);for(oe=0;oe<H.length;oe++)te=H[oe].val+G(H[oe])/Z,te<O&&te>P&&(P=te);var _e=(P-g)/(2*T);f/=_e,g=l.l2r(g),P=l.l2r(P),l.range=l._input.range=x<C?[g,P]:[P,g]}Ace(l,f)}}}};_M.getAxisGroup=function(t,r){for(var n=t._axisMatchGroups,i=0;i<n.length;i++){var a=n[i];if(a[r])return"g"+i}return r};_M.clean=function(t,r){if(r._inputDomain){for(var n=!1,i=r._id,a=t._fullLayout._axisConstraintGroups,o=0;o<a.length;o++)if(a[o][i]){n=!0;break}(!n||r.constrain!=="domain")&&(r._input.domain=r.domain=r._inputDomain,delete r._inputDomain)}};function Ace(e,t){var r=e._inputDomain,n=gft[e.constraintoward],i=r[0]+(r[1]-r[0])*n;e.domain=e._input.domain=[i+(r[0]-i)/t,i+(r[1]-i)/t],e.setScale()}});var wM=ye(pd=>{"use strict";var vP=Oa(),Nv=qa(),Jp=Mc(),S0=Dr(),pN=ru(),gN=vM(),xM=ka(),$3=So(),Mce=Eb(),Pce=cN(),bM=ho(),Cy=$h(),Ice=Nb(),_ft=Ice.enforce,xft=Ice.clean,Ece=Ag().doAutoRange,Rce="start",bft="middle",Dce="end",wft=hd().zindexSeparator;pd.layoutStyles=function(e){return S0.syncOrAsync([Jp.doAutoMargin,Aft],e)};function Tft(e,t,r){for(var n=0;n<r.length;n++){var i=r[n][0],a=r[n][1];if(!(i[0]>=e[1]||i[1]<=e[0])&&a[0]<t[1]&&a[1]>t[0])return!0}return!1}function Aft(e){var t=e._fullLayout,r=t._size,n=r.p,i=bM.list(e,"",!0),a,o,s,l,u,c;if(t._paperdiv.style({width:e._context.responsive&&t.autosize&&!e._context._hasZeroWidth&&!e.layout.width?"100%":t.width+"px",height:e._context.responsive&&t.autosize&&!e._context._hasZeroHeight&&!e.layout.height?"100%":t.height+"px"}).selectAll(".main-svg").call($3.setSize,t.width,t.height),e._context.setBackground(e,t.paper_bgcolor),pd.drawMainTitle(e),Pce.manage(e),!t._has("cartesian"))return Jp.previousPromises(e);function f(Pe,ge,Re){var ce=Pe._lw/2;if(Pe._id.charAt(0)==="x"){if(ge){if(Re==="top")return ge._offset-n-ce}else return r.t+r.h*(1-(Pe.position||0))+ce%1;return ge._offset+ge._length+n+ce}if(ge){if(Re==="right")return ge._offset+ge._length+n+ce}else return r.l+r.w*(Pe.position||0)+ce%1;return ge._offset-n-ce}for(a=0;a<i.length;a++){l=i[a];var h=l._anchorAxis;l._linepositions={},l._lw=$3.crispRound(e,l.linewidth,1),l._mainLinePosition=f(l,h,l.side),l._mainMirrorPosition=l.mirror&&h?f(l,h,Cy.OPPOSITE_SIDE[l.side]):null}var d=[],v=[],_=[],b=xM.opacity(t.paper_bgcolor)===1&&xM.opacity(t.plot_bgcolor)===1&&t.paper_bgcolor===t.plot_bgcolor;for(o in t._plots)if(s=t._plots[o],s.mainplot)s.bg&&s.bg.remove(),s.bg=void 0;else{var p=s.xaxis.domain,k=s.yaxis.domain,E=s.plotgroup;if(Tft(p,k,_)&&o.indexOf(wft)===-1){var S=E.node(),L=s.bg=S0.ensureSingle(E,"rect","bg");S.insertBefore(L.node(),S.childNodes[0]),v.push(o)}else E.select("rect.bg").remove(),_.push([p,k]),b||(d.push(o),v.push(o))}var x=t._bgLayer.selectAll(".bg").data(d);for(x.enter().append("rect").classed("bg",!0),x.exit().remove(),x.each(function(Pe){t._plots[Pe].bg=vP.select(this)}),a=0;a<v.length;a++)s=t._plots[v[a]],u=s.xaxis,c=s.yaxis,s.bg&&u._offset!==void 0&&c._offset!==void 0&&s.bg.call($3.setRect,u._offset-n,c._offset-n,u._length+2*n,c._length+2*n).call(xM.fill,t.plot_bgcolor).style("stroke-width",0);if(!t._hasOnlyLargeSploms)for(o in t._plots){s=t._plots[o],u=s.xaxis,c=s.yaxis;var C=s.clipId="clip"+t._uid+o+"plot",M=S0.ensureSingleById(t._clips,"clipPath",C,function(Pe){Pe.classed("plotclip",!0).append("rect")});s.clipRect=M.select("rect").attr({width:u._length,height:c._length}),$3.setTranslate(s.plot,u._offset,c._offset);var g,P;s._hasClipOnAxisFalse?(g=null,P=C):(g=C,P=null),$3.setClipUrl(s.plot,g,e),s.layerClipId=P}var T,z,O,V,G,Z,j,N,H,te,oe,_e,Ee;function Ce(Pe){return"M"+T+","+Pe+"H"+z}function me(Pe){return"M"+u._offset+","+Pe+"h"+u._length}function ie(Pe){return"M"+Pe+","+N+"V"+j}function Se(Pe){return c._shift!==void 0&&(Pe+=c._shift),"M"+Pe+","+c._offset+"v"+c._length}function Le(Pe,ge,Re){if(!Pe.showline||o!==Pe._mainSubplot)return"";if(!Pe._anchorAxis)return Re(Pe._mainLinePosition);var ce=ge(Pe._mainLinePosition);return Pe.mirror&&(ce+=ge(Pe._mainMirrorPosition)),ce}for(o in t._plots){s=t._plots[o],u=s.xaxis,c=s.yaxis;var Ae="M0,0";kce(u,o)&&(G=dP(u,"left",c,i),T=u._offset-(G?n+G:0),Z=dP(u,"right",c,i),z=u._offset+u._length+(Z?n+Z:0),O=f(u,c,"bottom"),V=f(u,c,"top"),Ee=!u._anchorAxis||o!==u._mainSubplot,Ee&&(u.mirror==="allticks"||u.mirror==="all")&&(u._linepositions[o]=[O,V]),Ae=Le(u,Ce,me),Ee&&u.showline&&(u.mirror==="all"||u.mirror==="allticks")&&(Ae+=Ce(O)+Ce(V)),s.xlines.style("stroke-width",u._lw+"px").call(xM.stroke,u.showline?u.linecolor:"rgba(0,0,0,0)")),s.xlines.attr("d",Ae);var Fe="M0,0";kce(c,o)&&(oe=dP(c,"bottom",u,i),j=c._offset+c._length+(oe?n:0),_e=dP(c,"top",u,i),N=c._offset-(_e?n:0),H=f(c,u,"left"),te=f(c,u,"right"),Ee=!c._anchorAxis||o!==c._mainSubplot,Ee&&(c.mirror==="allticks"||c.mirror==="all")&&(c._linepositions[o]=[H,te]),Fe=Le(c,ie,Se),Ee&&c.showline&&(c.mirror==="all"||c.mirror==="allticks")&&(Fe+=ie(H)+ie(te)),s.ylines.style("stroke-width",c._lw+"px").call(xM.stroke,c.showline?c.linecolor:"rgba(0,0,0,0)")),s.ylines.attr("d",Fe)}return bM.makeClipPaths(e),Jp.previousPromises(e)}function kce(e,t){return(e.ticks||e.showline)&&(t===e._mainSubplot||e.mirror==="all"||e.mirror==="allticks")}function Cce(e,t,r){if(!r.showline||!r._lw)return!1;if(r.mirror==="all"||r.mirror==="allticks")return!0;var n=r._anchorAxis;if(!n)return!1;var i=Cy.FROM_BL[t];return r.side===t?n.domain[i]===e.domain[i]:r.mirror&&n.domain[1-i]===e.domain[1-i]}function dP(e,t,r,n){if(Cce(e,t,r))return r._lw;for(var i=0;i<n.length;i++){var a=n[i];if(a._mainAxis===r._mainAxis&&Cce(e,t,a))return a._lw}return 0}pd.drawMainTitle=function(e){var t=e._fullLayout.title,r=e._fullLayout,n=Pft(r),i=Ift(r),a=Lft(r,i),o=Cft(r,n);if(Mce.draw(e,"gtitle",{propContainer:r,propName:"title.text",subtitlePropName:"title.subtitle.text",placeholder:r._dfltTitle.plot,subtitlePlaceholder:r._dfltTitle.subtitle,attributes:{x:o,y:a,"text-anchor":n,dy:i}}),t.text&&t.automargin){var s=vP.select(e).selectAll(".gtitle"),l=$3.bBox(vP.select(e).selectAll(".g-gtitle").node()).height,u=Eft(e,t,l);if(u>0){kft(e,a,u,l),s.attr({x:o,y:a,"text-anchor":n,dy:Lce(t.yanchor)}).call(pN.positionText,o,a);var c=(t.text.match(pN.BR_TAG_ALL)||[]).length;if(c){var f=Cy.LINE_SPACING*c+Cy.MID_SHIFT;t.y===0&&(f=-f),s.selectAll(".line").each(function(){var b=+this.getAttribute("dy").slice(0,-2)-f+"em";this.setAttribute("dy",b)})}var h=vP.select(e).selectAll(".gtitle-subtitle");if(h.node()){var d=s.node().getBBox(),v=d.y+d.height,_=v+Mce.SUBTITLE_PADDING_EM*t.subtitle.font.size;h.attr({x:o,y:_,"text-anchor":n,dy:Lce(t.yanchor)}).call(pN.positionText,o,_)}}}};function Sft(e,t,r,n,i){var a=t.yref==="paper"?e._fullLayout._size.h:e._fullLayout.height,o=S0.isTopAnchor(t)?n:n-i,s=r==="b"?a-o:o;return S0.isTopAnchor(t)&&r==="t"||S0.isBottomAnchor(t)&&r==="b"?!1:s<i}function Mft(e,t,r,n,i){var a=0;return r==="middle"&&(a+=i/2),e==="t"?(r==="top"&&(a+=i),a+=n-t*n):(r==="bottom"&&(a+=i),a+=t*n),a}function Eft(e,t,r){var n=t.y,i=t.yanchor,a=n>.5?"t":"b",o=e._fullLayout.margin[a],s=0;return t.yref==="paper"?s=r+t.pad.t+t.pad.b:t.yref==="container"&&(s=Mft(a,n,i,e._fullLayout.height,r)+t.pad.t+t.pad.b),s>o?s:0}function kft(e,t,r,n){var i="title.automargin",a=e._fullLayout.title,o=a.y>.5?"t":"b",s={x:a.x,y:a.y,t:0,b:0},l={};a.yref==="paper"&&Sft(e,a,o,t,n)?s[o]=r:a.yref==="container"&&(l[o]=r,e._fullLayout._reservedMargin[i]=l),Jp.allowAutoMargin(e,i),Jp.autoMargin(e,i,s)}function Cft(e,t){var r=e.title,n=e._size,i=0;switch(t===Rce?i=r.pad.l:t===Dce&&(i=-r.pad.r),r.xref){case"paper":return n.l+n.w*r.x+i;case"container":default:return e.width*r.x+i}}function Lft(e,t){var r=e.title,n=e._size,i=0;if(t==="0em"||!t?i=-r.pad.b:t===Cy.CAP_SHIFT+"em"&&(i=r.pad.t),r.y==="auto")return n.t/2;switch(r.yref){case"paper":return n.t+n.h-n.h*r.y+i;case"container":default:return e.height-e.height*r.y+i}}function Lce(e){return e==="top"?Cy.CAP_SHIFT+.3+"em":e==="bottom"?"-0.3em":Cy.MID_SHIFT+"em"}function Pft(e){var t=e.title,r=bft;return S0.isRightAnchor(t)?r=Dce:S0.isLeftAnchor(t)&&(r=Rce),r}function Ift(e){var t=e.title,r="0em";return S0.isTopAnchor(t)?r=Cy.CAP_SHIFT+"em":S0.isMiddleAnchor(t)&&(r=Cy.MID_SHIFT+"em"),r}pd.doTraceStyle=function(e){var t=e.calcdata,r=[],n;for(n=0;n<t.length;n++){var i=t[n],a=i[0]||{},o=a.trace||{},s=o._module||{},l=s.arraysToCalcdata;l&&l(i,o);var u=s.editStyle;u&&r.push({fn:u,cd0:a})}if(r.length){for(n=0;n<r.length;n++){var c=r[n];c.fn(e,c.cd0)}gN(e),pd.redrawReglTraces(e)}return Jp.style(e),Nv.getComponentMethod("legend","draw")(e),Jp.previousPromises(e)};pd.doColorBars=function(e){return Nv.getComponentMethod("colorbar","draw")(e),Jp.previousPromises(e)};pd.layoutReplot=function(e){var t=e.layout;return e.layout=void 0,Nv.call("_doPlot",e,"",t)};pd.doLegend=function(e){return Nv.getComponentMethod("legend","draw")(e),Jp.previousPromises(e)};pd.doTicksRelayout=function(e){return bM.draw(e,"redraw"),e._fullLayout._hasOnlyLargeSploms&&(Nv.subplotsRegistry.splom.updateGrid(e),gN(e),pd.redrawReglTraces(e)),pd.drawMainTitle(e),Jp.previousPromises(e)};pd.doModeBar=function(e){var t=e._fullLayout;Pce.manage(e);for(var r=0;r<t._basePlotModules.length;r++){var n=t._basePlotModules[r].updateFx;n&&n(e)}return Jp.previousPromises(e)};pd.doCamera=function(e){for(var t=e._fullLayout,r=t._subplots.gl3d,n=0;n<r.length;n++){var i=t[r[n]],a=i._scene;a.setViewport(i)}};pd.drawData=function(e){var t=e._fullLayout;gN(e);for(var r=t._basePlotModules,n=0;n<r.length;n++)r[n].plot(e);return pd.redrawReglTraces(e),Jp.style(e),Nv.getComponentMethod("selections","draw")(e),Nv.getComponentMethod("shapes","draw")(e),Nv.getComponentMethod("annotations","draw")(e),Nv.getComponentMethod("images","draw")(e),t._replotting=!1,Jp.previousPromises(e)};pd.redrawReglTraces=function(e){var t=e._fullLayout;if(t._has("regl")){var r=e._fullData,n=[],i=[],a,o;for(t._hasOnlyLargeSploms&&t._splomGrid.draw(),a=0;a<r.length;a++){var s=r[a];s.visible===!0&&s._length!==0&&(s.type==="splom"?t._splomScenes[s.uid].draw():s.type==="scattergl"?S0.pushUnique(n,s.xaxis+s.yaxis):s.type==="scatterpolargl"&&S0.pushUnique(i,s.subplot))}for(a=0;a<n.length;a++)o=t._plots[n[a]],o._scene&&o._scene.draw();for(a=0;a<i.length;a++)o=t[i[a]]._subplot,o._scene&&o._scene.draw()}};pd.doAutoRangeAndConstraints=function(e){for(var t=bM.list(e,"",!0),r,n={},i=0;i<t.length;i++)if(r=t[i],!n[r._id]){n[r._id]=1,xft(e,r),Ece(e,r);var a=r._matchGroup;if(a)for(var o in a){var s=bM.getFromId(e,o);Ece(e,s,r.range),n[o]=1}}_ft(e)};pd.finalDraw=function(e){Nv.getComponentMethod("rangeslider","draw")(e),Nv.getComponentMethod("rangeselector","draw")(e)};pd.drawMarginPushers=function(e){Nv.getComponentMethod("legend","draw")(e),Nv.getComponentMethod("rangeselector","draw")(e),Nv.getComponentMethod("sliders","draw")(e),Nv.getComponentMethod("updatemenus","draw")(e),Nv.getComponentMethod("colorbar","draw")(e)}});var _N=ye((Zar,Bce)=>{"use strict";var Rft=u_().readPaths,Dft=nP(),Fce=Q1().clearOutlineControllers,mN=ka(),zce=So(),Fft=vl().arrayEditor,Oce=f_(),zft=Oce.getPathString;Bce.exports={draw:pP,drawOne:qce,activateLastSelection:Bft};function pP(e){var t=e._fullLayout;Fce(e),t._selectionLayer.selectAll("path").remove();for(var r in t._plots){var n=t._plots[r].selectionLayer;n&&n.selectAll("path").remove()}for(var i=0;i<t.selections.length;i++)qce(e,i)}function gP(e){return e._context.editSelection}function qce(e,t){e._fullLayout._paperdiv.selectAll('.selectionlayer [data-index="'+t+'"]').remove();var r=Oce.makeSelectionsOptionsAndPlotinfo(e,t),n=r.options,i=r.plotinfo;if(!n._input)return;a(e._fullLayout._selectionLayer);function a(o){var s=zft(e,n),l={"data-index":t,"fill-rule":"evenodd",d:s},u=n.opacity,c="rgba(0,0,0,0)",f=n.line.color||mN.contrast(e._fullLayout.plot_bgcolor),h=n.line.width,d=n.line.dash;h||(h=5,d="solid");var v=gP(e)&&e._fullLayout._activeSelectionIndex===t;v&&(c=e._fullLayout.activeselection.fillcolor,u=e._fullLayout.activeselection.opacity);for(var _=[],b=1;b>=0;b--){var p=o.append("path").attr(l).style("opacity",b?.1:u).call(mN.stroke,f).call(mN.fill,c).call(zce.dashLine,b?"solid":d,b?4+h:h);if(Oft(p,e,n),v){var k=Fft(e.layout,"selections",n);p.style({cursor:"move"});var E={element:p.node(),plotinfo:i,gd:e,editHelpers:k,isActiveSelection:!0},S=Rft(s,e);Dft(S,p,E)}else p.style("pointer-events",b?"all":"none");_[b]=p}var L=_[0],x=_[1];x.node().addEventListener("click",function(){return qft(e,L)})}}function Oft(e,t,r){var n=r.xref+r.yref;zce.setClipUrl(e,"clip"+t._fullLayout._uid+n,t)}function qft(e,t){if(gP(e)){var r=t.node(),n=+r.getAttribute("data-index");if(n>=0){if(n===e._fullLayout._activeSelectionIndex){yN(e);return}e._fullLayout._activeSelectionIndex=n,e._fullLayout._deactivateSelection=yN,pP(e)}}}function Bft(e){if(gP(e)){var t=e._fullLayout.selections.length-1;e._fullLayout._activeSelectionIndex=t,e._fullLayout._deactivateSelection=yN,pP(e)}}function yN(e){if(gP(e)){var t=e._fullLayout._activeSelectionIndex;t>=0&&(Fce(e),delete e._fullLayout._activeSelectionIndex,pP(e))}}});var Uce=ye((Yar,Nce)=>{function Nft(){var e,t=0,r=!1;function n(i,a){return e.list.push({type:i,data:a?JSON.parse(JSON.stringify(a)):void 0}),e}return e={list:[],segmentId:function(){return t++},checkIntersection:function(i,a){return n("check",{seg1:i,seg2:a})},segmentChop:function(i,a){return n("div_seg",{seg:i,pt:a}),n("chop",{seg:i,pt:a})},statusRemove:function(i){return n("pop_seg",{seg:i})},segmentUpdate:function(i){return n("seg_update",{seg:i})},segmentNew:function(i,a){return n("new_seg",{seg:i,primary:a})},segmentRemove:function(i){return n("rem_seg",{seg:i})},tempStatus:function(i,a,o){return n("temp_status",{seg:i,above:a,below:o})},rewind:function(i){return n("rewind",{seg:i})},status:function(i,a,o){return n("status",{seg:i,above:a,below:o})},vert:function(i){return i===r?e:(r=i,n("vert",{x:i}))},log:function(i){return typeof i!="string"&&(i=JSON.stringify(i,!1,"  ")),n("log",{txt:i})},reset:function(){return n("reset")},selected:function(i){return n("selected",{segs:i})},chainStart:function(i){return n("chain_start",{seg:i})},chainRemoveHead:function(i,a){return n("chain_rem_head",{index:i,pt:a})},chainRemoveTail:function(i,a){return n("chain_rem_tail",{index:i,pt:a})},chainNew:function(i,a){return n("chain_new",{pt1:i,pt2:a})},chainMatch:function(i){return n("chain_match",{index:i})},chainClose:function(i){return n("chain_close",{index:i})},chainAddHead:function(i,a){return n("chain_add_head",{index:i,pt:a})},chainAddTail:function(i,a){return n("chain_add_tail",{index:i,pt:a})},chainConnect:function(i,a){return n("chain_con",{index1:i,index2:a})},chainReverse:function(i){return n("chain_rev",{index:i})},chainJoin:function(i,a){return n("chain_join",{index1:i,index2:a})},done:function(){return n("done")}},e}Nce.exports=Nft});var Gce=ye((Kar,Vce)=>{function Uft(e){typeof e!="number"&&(e=1e-10);var t={epsilon:function(r){return typeof r=="number"&&(e=r),e},pointAboveOrOnLine:function(r,n,i){var a=n[0],o=n[1],s=i[0],l=i[1],u=r[0],c=r[1];return(s-a)*(c-o)-(l-o)*(u-a)>=-e},pointBetween:function(r,n,i){var a=r[1]-n[1],o=i[0]-n[0],s=r[0]-n[0],l=i[1]-n[1],u=s*o+a*l;if(u<e)return!1;var c=o*o+l*l;return!(u-c>-e)},pointsSameX:function(r,n){return Math.abs(r[0]-n[0])<e},pointsSameY:function(r,n){return Math.abs(r[1]-n[1])<e},pointsSame:function(r,n){return t.pointsSameX(r,n)&&t.pointsSameY(r,n)},pointsCompare:function(r,n){return t.pointsSameX(r,n)?t.pointsSameY(r,n)?0:r[1]<n[1]?-1:1:r[0]<n[0]?-1:1},pointsCollinear:function(r,n,i){var a=r[0]-n[0],o=r[1]-n[1],s=n[0]-i[0],l=n[1]-i[1];return Math.abs(a*l-s*o)<e},linesIntersect:function(r,n,i,a){var o=n[0]-r[0],s=n[1]-r[1],l=a[0]-i[0],u=a[1]-i[1],c=o*u-s*l;if(Math.abs(c)<e)return!1;var f=r[0]-i[0],h=r[1]-i[1],d=(l*h-u*f)/c,v=(o*h-s*f)/c,_={alongA:0,alongB:0,pt:[r[0]+d*o,r[1]+d*s]};return d<=-e?_.alongA=-2:d<e?_.alongA=-1:d-1<=-e?_.alongA=0:d-1<e?_.alongA=1:_.alongA=2,v<=-e?_.alongB=-2:v<e?_.alongB=-1:v-1<=-e?_.alongB=0:v-1<e?_.alongB=1:_.alongB=2,_},pointInsideRegion:function(r,n){for(var i=r[0],a=r[1],o=n[n.length-1][0],s=n[n.length-1][1],l=!1,u=0;u<n.length;u++){var c=n[u][0],f=n[u][1];f-a>e!=s-a>e&&(o-c)*(a-f)/(s-f)+c-i>e&&(l=!l),o=c,s=f}return l}};return t}Vce.exports=Uft});var jce=ye((Jar,Hce)=>{var Vft={create:function(){var e={root:{root:!0,next:null},exists:function(t){return!(t===null||t===e.root)},isEmpty:function(){return e.root.next===null},getHead:function(){return e.root.next},insertBefore:function(t,r){for(var n=e.root,i=e.root.next;i!==null;){if(r(i)){t.prev=i.prev,t.next=i,i.prev.next=t,i.prev=t;return}n=i,i=i.next}n.next=t,t.prev=n,t.next=null},findTransition:function(t){for(var r=e.root,n=e.root.next;n!==null&&!t(n);)r=n,n=n.next;return{before:r===e.root?null:r,after:n,insert:function(i){return i.prev=r,i.next=n,r.next=i,n!==null&&(n.prev=i),i}}}};return e},node:function(e){return e.prev=null,e.next=null,e.remove=function(){e.prev.next=e.next,e.next&&(e.next.prev=e.prev),e.prev=null,e.next=null},e}};Hce.exports=Vft});var Xce=ye(($ar,Wce)=>{var TM=jce();function Gft(e,t,r){function n(v,_){return{id:r?r.segmentId():-1,start:v,end:_,myFill:{above:null,below:null},otherFill:null}}function i(v,_,b){return{id:r?r.segmentId():-1,start:v,end:_,myFill:{above:b.myFill.above,below:b.myFill.below},otherFill:null}}var a=TM.create();function o(v,_,b,p,k,E){var S=t.pointsCompare(_,k);return S!==0?S:t.pointsSame(b,E)?0:v!==p?v?1:-1:t.pointAboveOrOnLine(b,p?k:E,p?E:k)?1:-1}function s(v,_){a.insertBefore(v,function(b){var p=o(v.isStart,v.pt,_,b.isStart,b.pt,b.other.pt);return p<0})}function l(v,_){var b=TM.node({isStart:!0,pt:v.start,seg:v,primary:_,other:null,status:null});return s(b,v.end),b}function u(v,_,b){var p=TM.node({isStart:!1,pt:_.end,seg:_,primary:b,other:v,status:null});v.other=p,s(p,v.pt)}function c(v,_){var b=l(v,_);return u(b,v,_),b}function f(v,_){r&&r.segmentChop(v.seg,_),v.other.remove(),v.seg.end=_,v.other.pt=_,s(v.other,v.pt)}function h(v,_){var b=i(_,v.seg.end,v.seg);return f(v,_),c(b,v.primary)}function d(v,_){var b=TM.create();function p(G,Z){var j=G.seg.start,N=G.seg.end,H=Z.seg.start,te=Z.seg.end;return t.pointsCollinear(j,H,te)?t.pointsCollinear(N,H,te)||t.pointAboveOrOnLine(N,H,te)?1:-1:t.pointAboveOrOnLine(j,H,te)?1:-1}function k(G){return b.findTransition(function(Z){var j=p(G,Z.ev);return j>0})}function E(G,Z){var j=G.seg,N=Z.seg,H=j.start,te=j.end,oe=N.start,_e=N.end;r&&r.checkIntersection(j,N);var Ee=t.linesIntersect(H,te,oe,_e);if(Ee===!1){if(!t.pointsCollinear(H,te,oe)||t.pointsSame(H,_e)||t.pointsSame(te,oe))return!1;var Ce=t.pointsSame(H,oe),me=t.pointsSame(te,_e);if(Ce&&me)return Z;var ie=!Ce&&t.pointBetween(H,oe,_e),Se=!me&&t.pointBetween(te,oe,_e);if(Ce)return Se?h(Z,te):h(G,_e),Z;ie&&(me||(Se?h(Z,te):h(G,_e)),h(Z,H))}else Ee.alongA===0&&(Ee.alongB===-1?h(G,oe):Ee.alongB===0?h(G,Ee.pt):Ee.alongB===1&&h(G,_e)),Ee.alongB===0&&(Ee.alongA===-1?h(Z,H):Ee.alongA===0?h(Z,Ee.pt):Ee.alongA===1&&h(Z,te));return!1}for(var S=[];!a.isEmpty();){var L=a.getHead();if(r&&r.vert(L.pt[0]),L.isStart){let G=function(){if(C){var Z=E(L,C);if(Z)return Z}return M?E(L,M):!1};var V=G;r&&r.segmentNew(L.seg,L.primary);var x=k(L),C=x.before?x.before.ev:null,M=x.after?x.after.ev:null;r&&r.tempStatus(L.seg,C?C.seg:!1,M?M.seg:!1);var g=G();if(g){if(e){var P;L.seg.myFill.below===null?P=!0:P=L.seg.myFill.above!==L.seg.myFill.below,P&&(g.seg.myFill.above=!g.seg.myFill.above)}else g.seg.otherFill=L.seg.myFill;r&&r.segmentUpdate(g.seg),L.other.remove(),L.remove()}if(a.getHead()!==L){r&&r.rewind(L.seg);continue}if(e){var P;L.seg.myFill.below===null?P=!0:P=L.seg.myFill.above!==L.seg.myFill.below,M?L.seg.myFill.below=M.seg.myFill.above:L.seg.myFill.below=v,P?L.seg.myFill.above=!L.seg.myFill.below:L.seg.myFill.above=L.seg.myFill.below}else if(L.seg.otherFill===null){var T;M?L.primary===M.primary?T=M.seg.otherFill.above:T=M.seg.myFill.above:T=L.primary?_:v,L.seg.otherFill={above:T,below:T}}r&&r.status(L.seg,C?C.seg:!1,M?M.seg:!1),L.other.status=x.insert(TM.node({ev:L}))}else{var z=L.status;if(z===null)throw new Error("PolyBool: Zero-length segment detected; your epsilon is probably too small or too large");if(b.exists(z.prev)&&b.exists(z.next)&&E(z.prev.ev,z.next.ev),r&&r.statusRemove(z.ev.seg),z.remove(),!L.primary){var O=L.seg.myFill;L.seg.myFill=L.seg.otherFill,L.seg.otherFill=O}S.push(L.seg)}a.getHead().remove()}return r&&r.done(),S}return e?{addRegion:function(v){for(var _,b=v[v.length-1],p=0;p<v.length;p++){_=b,b=v[p];var k=t.pointsCompare(_,b);k!==0&&c(n(k<0?_:b,k<0?b:_),!0)}},calculate:function(v){return d(v,!1)}}:{calculate:function(v,_,b,p){return v.forEach(function(k){c(i(k.start,k.end,k),!0)}),b.forEach(function(k){c(i(k.start,k.end,k),!1)}),d(_,p)}}}Wce.exports=Gft});var Yce=ye((Qar,Zce)=>{function Hft(e,t,r){var n=[],i=[];return e.forEach(function(a){var o=a.start,s=a.end;if(t.pointsSame(o,s)){console.warn("PolyBool: Warning: Zero-length segment detected; your epsilon is probably too small or too large");return}r&&r.chainStart(a);var l={index:0,matches_head:!1,matches_pt1:!1},u={index:0,matches_head:!1,matches_pt1:!1},c=l;function f(V,G,Z){return c.index=V,c.matches_head=G,c.matches_pt1=Z,c===l?(c=u,!1):(c=null,!0)}for(var h=0;h<n.length;h++){var d=n[h],v=d[0],_=d[1],b=d[d.length-1],p=d[d.length-2];if(t.pointsSame(v,o)){if(f(h,!0,!0))break}else if(t.pointsSame(v,s)){if(f(h,!0,!1))break}else if(t.pointsSame(b,o)){if(f(h,!1,!0))break}else if(t.pointsSame(b,s)&&f(h,!1,!1))break}if(c===l){n.push([o,s]),r&&r.chainNew(o,s);return}if(c===u){r&&r.chainMatch(l.index);var k=l.index,E=l.matches_pt1?s:o,S=l.matches_head,d=n[k],L=S?d[0]:d[d.length-1],x=S?d[1]:d[d.length-2],C=S?d[d.length-1]:d[0],M=S?d[d.length-2]:d[1];if(t.pointsCollinear(x,L,E)&&(S?(r&&r.chainRemoveHead(l.index,E),d.shift()):(r&&r.chainRemoveTail(l.index,E),d.pop()),L=x),t.pointsSame(C,E)){n.splice(k,1),t.pointsCollinear(M,C,L)&&(S?(r&&r.chainRemoveTail(l.index,L),d.pop()):(r&&r.chainRemoveHead(l.index,L),d.shift())),r&&r.chainClose(l.index),i.push(d);return}S?(r&&r.chainAddHead(l.index,E),d.unshift(E)):(r&&r.chainAddTail(l.index,E),d.push(E));return}function g(V){r&&r.chainReverse(V),n[V].reverse()}function P(V,G){var Z=n[V],j=n[G],N=Z[Z.length-1],H=Z[Z.length-2],te=j[0],oe=j[1];t.pointsCollinear(H,N,te)&&(r&&r.chainRemoveTail(V,N),Z.pop(),N=H),t.pointsCollinear(N,te,oe)&&(r&&r.chainRemoveHead(G,te),j.shift()),r&&r.chainJoin(V,G),n[V]=Z.concat(j),n.splice(G,1)}var T=l.index,z=u.index;r&&r.chainConnect(T,z);var O=n[T].length<n[z].length;l.matches_head?u.matches_head?O?(g(T),P(T,z)):(g(z),P(z,T)):P(z,T):u.matches_head?P(T,z):O?(g(T),P(z,T)):(g(z),P(T,z))}),i}Zce.exports=Hft});var Jce=ye((eor,Kce)=>{function AM(e,t,r){var n=[];return e.forEach(function(i){var a=(i.myFill.above?8:0)+(i.myFill.below?4:0)+(i.otherFill&&i.otherFill.above?2:0)+(i.otherFill&&i.otherFill.below?1:0);t[a]!==0&&n.push({id:r?r.segmentId():-1,start:i.start,end:i.end,myFill:{above:t[a]===1,below:t[a]===2},otherFill:null})}),r&&r.selected(n),n}var jft={union:function(e,t){return AM(e,[0,2,1,0,2,2,0,0,1,0,1,0,0,0,0,0],t)},intersect:function(e,t){return AM(e,[0,0,0,0,0,2,0,2,0,0,1,1,0,2,1,0],t)},difference:function(e,t){return AM(e,[0,0,0,0,2,0,2,0,1,1,0,0,0,1,2,0],t)},differenceRev:function(e,t){return AM(e,[0,2,1,0,0,0,1,1,0,2,0,2,0,0,0,0],t)},xor:function(e,t){return AM(e,[0,2,1,0,2,0,0,1,1,0,0,2,0,1,2,0],t)}};Kce.exports=jft});var Qce=ye((tor,$ce)=>{var Wft={toPolygon:function(e,t){function r(a){if(a.length<=0)return e.segments({inverted:!1,regions:[]});function o(u){var c=u.slice(0,u.length-1);return e.segments({inverted:!1,regions:[c]})}for(var s=o(a[0]),l=1;l<a.length;l++)s=e.selectDifference(e.combine(s,o(a[l])));return s}if(t.type==="Polygon")return e.polygon(r(t.coordinates));if(t.type==="MultiPolygon"){for(var n=e.segments({inverted:!1,regions:[]}),i=0;i<t.coordinates.length;i++)n=e.selectUnion(e.combine(n,r(t.coordinates[i])));return e.polygon(n)}throw new Error("PolyBool: Cannot convert GeoJSON object to PolyBool polygon")},fromPolygon:function(e,t,r){r=e.polygon(e.segments(r));function n(d,v){return t.pointInsideRegion([(d[0][0]+d[1][0])*.5,(d[0][1]+d[1][1])*.5],v)}function i(d){return{region:d,children:[]}}var a=i(null);function o(d,v){for(var _=0;_<d.children.length;_++){var b=d.children[_];if(n(v,b.region)){o(b,v);return}}for(var p=i(v),_=0;_<d.children.length;_++){var b=d.children[_];n(b.region,v)&&(p.children.push(b),d.children.splice(_,1),_--)}d.children.push(p)}for(var s=0;s<r.regions.length;s++){var l=r.regions[s];l.length<3||o(a,l)}function u(d,v){for(var _=0,b=d[d.length-1][0],p=d[d.length-1][1],k=[],E=0;E<d.length;E++){var S=d[E][0],L=d[E][1];k.push([S,L]),_+=L*b-S*p,b=S,p=L}var x=_<0;return x!==v&&k.reverse(),k.push([k[0][0],k[0][1]]),k}var c=[];function f(d){var v=[u(d.region,!1)];c.push(v);for(var _=0;_<d.children.length;_++)v.push(h(d.children[_]))}function h(d){for(var v=0;v<d.children.length;v++)f(d.children[v]);return u(d.region,!0)}for(var s=0;s<a.children.length;s++)f(a.children[s]);return c.length<=0?{type:"Polygon",coordinates:[]}:c.length==1?{type:"Polygon",coordinates:c[0]}:{type:"MultiPolygon",coordinates:c}}};$ce.exports=Wft});var ife=ye((ror,rfe)=>{var Xft=Uce(),Zft=Gce(),efe=Xce(),Yft=Yce(),SM=Jce(),tfe=Qce(),M0=!1,MM=Zft(),kp;kp={buildLog:function(e){return e===!0?M0=Xft():e===!1&&(M0=!1),M0===!1?!1:M0.list},epsilon:function(e){return MM.epsilon(e)},segments:function(e){var t=efe(!0,MM,M0);return e.regions.forEach(t.addRegion),{segments:t.calculate(e.inverted),inverted:e.inverted}},combine:function(e,t){var r=efe(!1,MM,M0);return{combined:r.calculate(e.segments,e.inverted,t.segments,t.inverted),inverted1:e.inverted,inverted2:t.inverted}},selectUnion:function(e){return{segments:SM.union(e.combined,M0),inverted:e.inverted1||e.inverted2}},selectIntersect:function(e){return{segments:SM.intersect(e.combined,M0),inverted:e.inverted1&&e.inverted2}},selectDifference:function(e){return{segments:SM.difference(e.combined,M0),inverted:e.inverted1&&!e.inverted2}},selectDifferenceRev:function(e){return{segments:SM.differenceRev(e.combined,M0),inverted:!e.inverted1&&e.inverted2}},selectXor:function(e){return{segments:SM.xor(e.combined,M0),inverted:e.inverted1!==e.inverted2}},polygon:function(e){return{regions:Yft(e.segments,MM,M0),inverted:e.inverted}},polygonFromGeoJSON:function(e){return tfe.toPolygon(kp,e)},polygonToGeoJSON:function(e){return tfe.fromPolygon(kp,MM,e)},union:function(e,t){return EM(e,t,kp.selectUnion)},intersect:function(e,t){return EM(e,t,kp.selectIntersect)},difference:function(e,t){return EM(e,t,kp.selectDifference)},differenceRev:function(e,t){return EM(e,t,kp.selectDifferenceRev)},xor:function(e,t){return EM(e,t,kp.selectXor)}};function EM(e,t,r){var n=kp.segments(e),i=kp.segments(t),a=kp.combine(n,i),o=r(a);return kp.polygon(o)}typeof window=="object"&&(window.PolyBool=kp);rfe.exports=kp});var afe=ye((ior,nfe)=>{nfe.exports=function(t,r,n,i){var a=t[0],o=t[1],s=!1;n===void 0&&(n=0),i===void 0&&(i=r.length);for(var l=i-n,u=0,c=l-1;u<l;c=u++){var f=r[u+n][0],h=r[u+n][1],d=r[c+n][0],v=r[c+n][1],_=h>o!=v>o&&a<(d-f)*(o-h)/(v-h)+f;_&&(s=!s)}return s}});var kM=ye((nor,ofe)=>{"use strict";var xN=T6().dot,mP=fs().BADNUM,yP=ofe.exports={};yP.tester=function(t){var r=t.slice(),n=r[0][0],i=n,a=r[0][1],o=a,s;for((r[r.length-1][0]!==r[0][0]||r[r.length-1][1]!==r[0][1])&&r.push(r[0]),s=1;s<r.length;s++)n=Math.min(n,r[s][0]),i=Math.max(i,r[s][0]),a=Math.min(a,r[s][1]),o=Math.max(o,r[s][1]);var l=!1,u;r.length===5&&(r[0][0]===r[1][0]?r[2][0]===r[3][0]&&r[0][1]===r[3][1]&&r[1][1]===r[2][1]&&(l=!0,u=function(v){return v[0]===r[0][0]}):r[0][1]===r[1][1]&&r[2][1]===r[3][1]&&r[0][0]===r[3][0]&&r[1][0]===r[2][0]&&(l=!0,u=function(v){return v[1]===r[0][1]}));function c(v,_){var b=v[0],p=v[1];return!(b===mP||b<n||b>i||p===mP||p<a||p>o||_&&u(v))}function f(v,_){var b=v[0],p=v[1];if(b===mP||b<n||b>i||p===mP||p<a||p>o)return!1;var k=r.length,E=r[0][0],S=r[0][1],L=0,x,C,M,g,P;for(x=1;x<k;x++)if(C=E,M=S,E=r[x][0],S=r[x][1],g=Math.min(C,E),!(b<g||b>Math.max(C,E)||p>Math.max(M,S)))if(p<Math.min(M,S))b!==g&&L++;else{if(E===C?P=p:P=M+(b-C)*(S-M)/(E-C),p===P)return!(x===1&&_);p<=P&&b!==g&&L++}return L%2===1}var h=!0,d=r[0];for(s=1;s<r.length;s++)if(d[0]!==r[s][0]||d[1]!==r[s][1]){h=!1;break}return{xmin:n,xmax:i,ymin:a,ymax:o,pts:r,contains:l?c:f,isRect:l,degenerate:h}};yP.isSegmentBent=function(t,r,n,i){var a=t[r],o=[t[n][0]-a[0],t[n][1]-a[1]],s=xN(o,o),l=Math.sqrt(s),u=[-o[1]/l,o[0]/l],c,f,h;for(c=r+1;c<n;c++)if(f=[t[c][0]-a[0],t[c][1]-a[1]],h=xN(f,o),h<0||h>s||Math.abs(xN(f,u))>i)return!0;return!1};yP.filter=function(t,r){var n=[t[0]],i=0,a=0;function o(l){t.push(l);var u=n.length,c=i;n.splice(a+1);for(var f=c+1;f<t.length;f++)(f===t.length-1||yP.isSegmentBent(t,c,f+1,r))&&(n.push(t[f]),n.length<u-2&&(i=f,a=n.length-1),c=f)}if(t.length>1){var s=t.pop();o(s)}return{addPt:o,raw:t,filtered:n}}});var lfe=ye((aor,sfe)=>{"use strict";sfe.exports={BENDPX:1.5,MINSELECT:12,SELECTDELAY:100,SELECTID:"-select"}});var Lfe=ye((oor,Cfe)=>{"use strict";var ufe=ife(),Kft=afe(),PM=qa(),Jft=So().dashStyle,CM=ka(),$ft=vf(),Qft=ip().makeEventData,zM=Eg(),eht=zM.freeMode,tht=zM.rectMode,IM=zM.drawMode,AN=zM.openMode,SN=zM.selectMode,cfe=f_(),ffe=gM(),gfe=nP(),mfe=Q1().clearOutline,yfe=u_(),bN=yfe.handleEllipse,rht=yfe.readPaths,iht=eP().newShapes,nht=ZB(),aht=_N().activateLastSelection,xP=Dr(),oht=xP.sorterAsc,_fe=kM(),LM=z6(),E0=hf().getFromId,sht=vM(),lht=wM().redrawReglTraces,bP=lfe(),Am=bP.MINSELECT,uht=_fe.filter,MN=_fe.tester,EN=KL(),hfe=EN.p2r,cht=EN.axValue,fht=EN.getTransform;function kN(e){return e.subplot!==void 0}function hht(e,t,r,n,i){var a=!kN(n),o=eht(i),s=tht(i),l=AN(i),u=IM(i),c=SN(i),f=i==="drawline",h=i==="drawcircle",d=f||h,v=n.gd,_=v._fullLayout,b=c&&_.newselection.mode==="immediate"&&a,p=_._zoomlayer,k=n.element.getBoundingClientRect(),E=n.plotinfo,S=fht(E),L=t-k.left,x=r-k.top;_._calcInverseTransform(v);var C=xP.apply3DTransform(_._invTransform)(L,x);L=C[0],x=C[1];var M=_._invScaleX,g=_._invScaleY,P=L,T=x,z="M"+L+","+x,O=n.xaxes[0],V=n.yaxes[0],G=O._length,Z=V._length,j=e.altKey&&!(IM(i)&&l),N,H,te,oe,_e,Ee,Ce;bfe(e,v,n),o&&(N=uht([[L,x]],bP.BENDPX));var me=p.selectAll("path.select-outline-"+E.id).data([1]),ie=u?_.newshape:_.newselection;u&&(n.hasText=ie.label.text||ie.label.texttemplate);var Se=u&&!l?ie.fillcolor:"rgba(0,0,0,0)",Le=ie.line.color||(a?CM.contrast(v._fullLayout.plot_bgcolor):"#7f7f7f");me.enter().append("path").attr("class","select-outline select-outline-"+E.id).style({opacity:u?ie.opacity/2:1,"stroke-dasharray":Jft(ie.line.dash,ie.line.width),"stroke-width":ie.line.width+"px","shape-rendering":"crispEdges"}).call(CM.stroke,Le).call(CM.fill,Se).attr("fill-rule","evenodd").classed("cursor-move",!!u).attr("transform",S).attr("d",z+"Z");var Ae=p.append("path").attr("class","zoombox-corners").style({fill:CM.background,stroke:CM.defaultLine,"stroke-width":1}).attr("transform",S).attr("d","M0,0Z");if(u&&n.hasText){var Fe=p.select(".label-temp");Fe.empty()&&(Fe=p.append("g").classed("label-temp",!0).classed("select-outline",!0).style({opacity:.8}))}var Pe=_._uid+bP.SELECTID,ge=[],Re=wP(v,n.xaxes,n.yaxes,n.subplot);b&&!e.shiftKey&&(n._clearSubplotSelections=function(){if(a){var Ze=O._id,ut=V._id;Mfe(v,Ze,ut,Re);for(var pt=(v.layout||{}).selections||[],Zt=[],st=!1,lt=0;lt<pt.length;lt++){var Gt=_.selections[lt];!Gt||Gt.xref!==Ze||Gt.yref!==ut?Zt.push(pt[lt]):st=!0}st&&(v._fullLayout._noEmitSelectedAtStart=!0,PM.call("_guiRelayout",v,{selections:Zt}))}});var ce=Mht(n);n.moveFn=function(Ze,ut){n._clearSubplotSelections&&(n._clearSubplotSelections(),n._clearSubplotSelections=void 0),P=Math.max(0,Math.min(G,M*Ze+L)),T=Math.max(0,Math.min(Z,g*ut+x));var pt=Math.abs(P-L),Zt=Math.abs(T-x);if(s){var st,lt,Gt;if(c){var Nt=_.selectdirection;switch(Nt==="any"?Zt<Math.min(pt*.6,Am)?st="h":pt<Math.min(Zt*.6,Am)?st="v":st="d":st=Nt,st){case"h":lt=h?Z/2:0,Gt=Z;break;case"v":lt=h?G/2:0,Gt=G;break}}if(u)switch(_.newshape.drawdirection){case"vertical":st="h",lt=h?Z/2:0,Gt=Z;break;case"horizontal":st="v",lt=h?G/2:0,Gt=G;break;case"ortho":pt<Zt?(st="h",lt=x,Gt=T):(st="v",lt=L,Gt=P);break;default:st="d"}st==="h"?(oe=d?bN(h,[P,lt],[P,Gt]):[[L,lt],[L,Gt],[P,Gt],[P,lt]],oe.xmin=d?P:Math.min(L,P),oe.xmax=d?P:Math.max(L,P),oe.ymin=Math.min(lt,Gt),oe.ymax=Math.max(lt,Gt),Ae.attr("d","M"+oe.xmin+","+(x-Am)+"h-4v"+2*Am+"h4ZM"+(oe.xmax-1)+","+(x-Am)+"h4v"+2*Am+"h-4Z")):st==="v"?(oe=d?bN(h,[lt,T],[Gt,T]):[[lt,x],[lt,T],[Gt,T],[Gt,x]],oe.xmin=Math.min(lt,Gt),oe.xmax=Math.max(lt,Gt),oe.ymin=d?T:Math.min(x,T),oe.ymax=d?T:Math.max(x,T),Ae.attr("d","M"+(L-Am)+","+oe.ymin+"v-4h"+2*Am+"v4ZM"+(L-Am)+","+(oe.ymax-1)+"v4h"+2*Am+"v-4Z")):st==="d"&&(oe=d?bN(h,[L,x],[P,T]):[[L,x],[L,T],[P,T],[P,x]],oe.xmin=Math.min(L,P),oe.xmax=Math.max(L,P),oe.ymin=Math.min(x,T),oe.ymax=Math.max(x,T),Ae.attr("d","M0,0Z"))}else o&&(N.addPt([P,T]),oe=N.filtered);if(n.selectionDefs&&n.selectionDefs.length?(te=wfe(n.mergedPolygons,oe,j),oe.subtract=j,H=CN(n.selectionDefs.concat([oe]))):(te=[oe],H=MN(oe)),gfe(Afe(te,l),me,n),c){var Jt=TN(v,!1),sr=Jt.eventData?Jt.eventData.points.slice():[];Jt=TN(v,!1,H,Re,n),H=Jt.selectionTesters,Ce=Jt.eventData;var wr;N?wr=N.filtered:wr=Efe(te),LM.throttle(Pe,bP.SELECTDELAY,function(){ge=Sfe(H,Re);for(var cr=ge.slice(),$e=0;$e<sr.length;$e++){for(var St=sr[$e],Qt=!1,Vt=0;Vt<cr.length;Vt++)if(cr[Vt].curveNumber===St.curveNumber&&cr[Vt].pointNumber===St.pointNumber){Qt=!0;break}Qt||cr.push(St)}cr.length&&(Ce||(Ce={}),Ce.points=cr),ce(Ce,wr),Eht(v,Ce)})}},n.clickFn=function(Ze,ut){if(Ae.remove(),v._fullLayout._activeShapeIndex>=0){v._fullLayout._deactivateShape(v);return}if(!u){var pt=_.clickmode;LM.done(Pe).then(function(){if(LM.clear(Pe),Ze===2){for(me.remove(),_e=0;_e<Re.length;_e++)Ee=Re[_e],Ee._module.selectPoints(Ee,!1);if(DM(v,Re),RM(n),PN(v),Re.length){var Zt=Re[0].xaxis,st=Re[0].yaxis;if(Zt&&st){for(var lt=[],Gt=v._fullLayout.selections,Nt=0;Nt<Gt.length;Nt++){var Jt=Gt[Nt];Jt&&(Jt.xref!==Zt._id||Jt.yref!==st._id)&&lt.push(Jt)}lt.length<Gt.length&&(v._fullLayout._noEmitSelectedAtStart=!0,PM.call("_guiRelayout",v,{selections:lt}))}}}else pt.indexOf("select")>-1&&xfe(ut,v,n.xaxes,n.yaxes,n.subplot,n,me),pt==="event"&&FM(v,void 0);$ft.click(v,ut,E.id)}).catch(xP.error)}},n.doneFn=function(){Ae.remove(),LM.done(Pe).then(function(){LM.clear(Pe),!b&&oe&&n.selectionDefs&&(oe.subtract=j,n.selectionDefs.push(oe),n.mergedPolygons.length=0,[].push.apply(n.mergedPolygons,te)),(b||u)&&RM(n,b),n.doneFnCompleted&&n.doneFnCompleted(ge),c&&FM(v,Ce)}).catch(xP.error)}}function xfe(e,t,r,n,i,a,o){var s=t._hoverdata,l=t._fullLayout,u=l.clickmode,c=u.indexOf("event")>-1,f=[],h,d,v,_,b,p,k,E,S,L;if(mht(s)){bfe(e,t,a),h=wP(t,r,n,i);var x=yht(s,h),C=x.pointNumbers.length>0;if(C?_ht(h,x):xht(h)&&(k=vfe(x))){for(o&&o.remove(),L=0;L<h.length;L++)d=h[L],d._module.selectPoints(d,!1);DM(t,h),RM(a),c&&PN(t)}else{E=e.shiftKey&&(k!==void 0?k:vfe(x)),v=dht(x.pointNumber,x.searchInfo,E);var M=a.selectionDefs.concat([v]);for(_=CN(M,_),L=0;L<h.length;L++)if(b=h[L]._module.selectPoints(h[L],_),p=Tfe(b,h[L]),f.length)for(var g=0;g<p.length;g++)f.push(p[g]);else f=p;if(S={points:f},DM(t,h,S),v&&a&&a.selectionDefs.push(v),o){var P=a.mergedPolygons,T=AN(a.dragmode);gfe(Afe(P,T),o,a)}c&&FM(t,S)}}}function dht(e,t,r){return{pointNumber:e,searchInfo:t,subtract:!!r}}function wN(e){return"pointNumber"in e&&"searchInfo"in e}function vht(e){return{xmin:0,xmax:0,ymin:0,ymax:0,pts:[],contains:function(t,r,n,i){var a=e.searchInfo.cd[0].trace.index,o=i.cd[0].trace.index;return o===a&&n===e.pointNumber},isRect:!1,degenerate:!1,subtract:!!e.subtract}}function CN(e){if(!e.length)return;for(var t=[],r=wN(e[0])?0:e[0][0][0],n=r,i=wN(e[0])?0:e[0][0][1],a=i,o=0;o<e.length;o++)if(wN(e[o]))t.push(vht(e[o]));else{var s=MN(e[o]);s.subtract=!!e[o].subtract,t.push(s),r=Math.min(r,s.xmin),n=Math.max(n,s.xmax),i=Math.min(i,s.ymin),a=Math.max(a,s.ymax)}function l(u,c,f,h){for(var d=!1,v=0;v<t.length;v++)t[v].contains(u,c,f,h)&&(d=!t[v].subtract);return d}return{xmin:r,xmax:n,ymin:i,ymax:a,pts:[],contains:l,isRect:!1,degenerate:!1}}function bfe(e,t,r){var n=t._fullLayout,i=r.plotinfo,a=r.dragmode,o=n._lastSelectedSubplot&&n._lastSelectedSubplot===i.id,s=(e.shiftKey||e.altKey)&&!(IM(a)&&AN(a));o&&s&&i.selection&&i.selection.selectionDefs&&!r.selectionDefs?(r.selectionDefs=i.selection.selectionDefs,r.mergedPolygons=i.selection.mergedPolygons):(!s||!i.selection)&&RM(r),o||(mfe(t),n._lastSelectedSubplot=i.id)}function pht(e){return e._fullLayout._activeShapeIndex>=0}function ght(e){return e._fullLayout._activeSelectionIndex>=0}function RM(e,t){var r=e.dragmode,n=e.plotinfo,i=e.gd;pht(i)&&i._fullLayout._deactivateShape(i),ght(i)&&i._fullLayout._deactivateSelection(i);var a=i._fullLayout,o=a._zoomlayer,s=IM(r),l=SN(r);if(s||l){var u=o.selectAll(".select-outline-"+n.id);if(u&&i._fullLayout._outlining){var c;s&&(c=iht(u,e)),c&&PM.call("_guiRelayout",i,{shapes:c});var f;l&&!kN(e)&&(f=nht(u,e)),f&&(i._fullLayout._noEmitSelectedAtStart=!0,PM.call("_guiRelayout",i,{selections:f}).then(function(){t&&aht(i)})),i._fullLayout._outlining=!1}}n.selection={},n.selection.selectionDefs=e.selectionDefs=[],n.selection.mergedPolygons=e.mergedPolygons=[]}function dfe(e){return e._id}function wP(e,t,r,n){if(!e.calcdata)return[];var i=[],a=t.map(dfe),o=r.map(dfe),s,l,u;for(u=0;u<e.calcdata.length;u++)if(s=e.calcdata[u],l=s[0].trace,!(l.visible!==!0||!l._module||!l._module.selectPoints))if(kN({subplot:n})&&(l.subplot===n||l.geo===n))i.push(_P(l._module,s,t[0],r[0]));else if(l.type==="splom"){if(l._xaxes[a[0]]&&l._yaxes[o[0]]){var c=_P(l._module,s,t[0],r[0]);c.scene=e._fullLayout._splomScenes[l.uid],i.push(c)}}else if(l.type==="sankey"){var f=_P(l._module,s,t[0],r[0]);i.push(f)}else{if(a.indexOf(l.xaxis)===-1&&(!l._xA||!l._xA.overlaying)||o.indexOf(l.yaxis)===-1&&(!l._yA||!l._yA.overlaying))continue;i.push(_P(l._module,s,E0(e,l.xaxis),E0(e,l.yaxis)))}return i}function _P(e,t,r,n){return{_module:e,cd:t,xaxis:r,yaxis:n}}function mht(e){return e&&Array.isArray(e)&&e[0].hoverOnBox!==!0}function yht(e,t){var r=e[0],n=-1,i=[],a,o;for(o=0;o<t.length;o++)if(a=t[o],r.fullData.index===a.cd[0].trace.index){if(r.hoverOnBox===!0)break;r.pointNumber!==void 0?n=r.pointNumber:r.binNumber!==void 0&&(n=r.binNumber,i=r.pointNumbers);break}return{pointNumber:n,pointNumbers:i,searchInfo:a}}function vfe(e){var t=e.searchInfo.cd[0].trace,r=e.pointNumber,n=e.pointNumbers,i=n.length>0,a=i?n[0]:r;return t.selectedpoints?t.selectedpoints.indexOf(a)>-1:!1}function _ht(e,t){var r=[],n,i,a,o;for(o=0;o<e.length;o++)n=e[o],n.cd[0].trace.selectedpoints&&n.cd[0].trace.selectedpoints.length>0&&r.push(n);if(r.length===1&&(a=r[0]===t.searchInfo,a&&(i=t.searchInfo.cd[0].trace,i.selectedpoints.length===t.pointNumbers.length))){for(o=0;o<t.pointNumbers.length;o++)if(i.selectedpoints.indexOf(t.pointNumbers[o])<0)return!1;return!0}return!1}function xht(e){var t=0,r,n,i;for(i=0;i<e.length;i++)if(r=e[i],n=r.cd[0].trace,n.selectedpoints&&(n.selectedpoints.length>1||(t+=n.selectedpoints.length,t>1)))return!1;return t===1}function DM(e,t,r){var n;for(n=0;n<t.length;n++){var i=t[n].cd[0].trace._fullInput,a=e._fullLayout._tracePreGUI[i.uid]||{};a.selectedpoints===void 0&&(a.selectedpoints=i._input.selectedpoints||null)}var o;if(r){var s=r.points||[];for(n=0;n<t.length;n++)o=t[n].cd[0].trace,o._input.selectedpoints=o._fullInput.selectedpoints=[],o._fullInput!==o&&(o.selectedpoints=[]);for(var l=0;l<s.length;l++){var u=s[l],c=u.data,f=u.fullData,h=u.pointIndex,d=u.pointIndices;d?([].push.apply(c.selectedpoints,d),o._fullInput!==o&&[].push.apply(f.selectedpoints,d)):(c.selectedpoints.push(h),o._fullInput!==o&&f.selectedpoints.push(h))}}else for(n=0;n<t.length;n++)o=t[n].cd[0].trace,delete o.selectedpoints,delete o._input.selectedpoints,o._fullInput!==o&&delete o._fullInput.selectedpoints;bht(e,t)}function bht(e,t){for(var r=!1,n=0;n<t.length;n++){var i=t[n],a=i.cd;PM.traceIs(a[0].trace,"regl")&&(r=!0);var o=i._module,s=o.styleOnSelect||o.style;s&&(s(e,a,a[0].node3),a[0].nodeRangePlot3&&s(e,a,a[0].nodeRangePlot3))}r&&(sht(e),lht(e))}function wfe(e,t,r){for(var n=r?ufe.difference:ufe.union,i=n({regions:e},{regions:[t]}),a=i.regions.reverse(),o=0;o<a.length;o++){var s=a[o];s.subtract=LN(s,a.slice(0,o))}return a}function Tfe(e,t){if(Array.isArray(e))for(var r=t.cd,n=t.cd[0].trace,i=0;i<e.length;i++)e[i]=Qft(e[i],n,r);return e}function Afe(e,t){for(var r=[],n=0;n<e.length;n++){r[n]=[];for(var i=0;i<e[n].length;i++){r[n][i]=[],r[n][i][0]=i?"L":"M";for(var a=0;a<e[n][i].length;a++)r[n][i].push(e[n][i][a])}t||r[n].push(["Z",r[n][0][1],r[n][0][2]])}return r}function Sfe(e,t){for(var r=[],n,i=[],a,o=0;o<t.length;o++){var s=t[o];a=s._module.selectPoints(s,e),i.push(a),n=Tfe(a,s),r=r.concat(n)}return r}function TN(e,t,r,n,i){var a=!!n,o,s,l;i&&(o=i.plotinfo,s=i.xaxes[0]._id,l=i.yaxes[0]._id);var u=[],c=[],f=pfe(e),h=e._fullLayout;if(o){var d=h._zoomlayer,v=h.dragmode,_=IM(v),b=SN(v);if(_||b){var p=E0(e,s,"x"),k=E0(e,l,"y");if(p&&k){var E=d.selectAll(".select-outline-"+o.id);if(E&&e._fullLayout._outlining&&E.length){for(var S=E[0][0],L=S.getAttribute("d"),x=rht(L,e,o),C=[],M=0;M<x.length;M++){for(var g=x[M],P=[],T=0;T<g.length;T++)P.push([h_(p,g[T][1]),h_(k,g[T][2])]);P.xref=s,P.yref=l,P.subtract=LN(P,C),C.push(P)}f=f.concat(C)}}}}var z=s&&l?[s+l]:h._subplots.cartesian;wht(e);for(var O={},V=0;V<z.length;V++){var G=z[V],Z=G.indexOf("y"),j=G.slice(0,Z),N=G.slice(Z),H=s&&l?r:void 0;if(H=Aht(f,j,N,H),H){var te=n;if(!a){var oe=E0(e,j,"x"),_e=E0(e,N,"y");te=wP(e,[oe],[_e],G);for(var Ee=0;Ee<te.length;Ee++){var Ce=te[Ee],me=Ce.cd[0],ie=me.trace;if(Ce._module.name==="scattergl"&&!me.t.xpx){var Se=ie.x,Le=ie.y,Ae=ie._length;me.t.xpx=[],me.t.ypx=[];for(var Fe=0;Fe<Ae;Fe++)me.t.xpx[Fe]=oe.c2p(Se[Fe]),me.t.ypx[Fe]=_e.c2p(Le[Fe])}Ce._module.name==="splom"&&(O[ie.uid]||(O[ie.uid]=!0))}}var Pe=Sfe(H,te);u=u.concat(Pe),c=c.concat(te)}}var ge={points:u};DM(e,c,ge);var Re=h.clickmode,ce=Re.indexOf("event")>-1&&t;if(!o&&t){var Ze=pfe(e,!0);if(Ze.length){var ut=Ze[0].xref,pt=Ze[0].yref;if(ut&&pt){var Zt=Efe(Ze),st=kfe([E0(e,ut,"x"),E0(e,pt,"y")]);st(ge,Zt)}}e._fullLayout._noEmitSelectedAtStart?e._fullLayout._noEmitSelectedAtStart=!1:ce&&FM(e,ge),h._reselect=!1}if(!o&&h._deselect){var lt=h._deselect;s=lt.xref,l=lt.yref,Tht(s,l,c)||Mfe(e,s,l,n),ce&&(ge.points.length?FM(e,ge):PN(e)),h._deselect=!1}return{eventData:ge,selectionTesters:r}}function wht(e){var t=e.calcdata;if(t)for(var r=0;r<t.length;r++){var n=t[r][0],i=n.trace,a=e._fullLayout._splomScenes;if(a){var o=a[i.uid];o&&(o.selectBatch=[])}}}function Tht(e,t,r){for(var n=0;n<r.length;n++){var i=r[n];if(i.xaxis&&i.xaxis._id===e&&i.yaxis&&i.yaxis._id===t)return!0}return!1}function Mfe(e,t,r,n){n=wP(e,[E0(e,t,"x")],[E0(e,r,"y")],t+r);for(var i=0;i<n.length;i++){var a=n[i];a._module.selectPoints(a,!1)}DM(e,n)}function Aht(e,t,r,n){for(var i,a=0;a<e.length;a++){var o=e[a];if(!(t!==o.xref||r!==o.yref))if(i){var s=!!o.subtract;i=wfe(i,o,s),n=CN(i)}else i=[o],n=MN(o)}return n}function pfe(e,t){for(var r=[],n=e._fullLayout,i=n.selections,a=i.length,o=0;o<a;o++)if(!(t&&o!==n._activeSelectionIndex)){var s=i[o];if(s){var l=s.xref,u=s.yref,c=E0(e,l,"x"),f=E0(e,u,"y"),h,d,v,_,b;if(s.type==="rect"){b=[];var p=h_(c,s.x0),k=h_(c,s.x1),E=h_(f,s.y0),S=h_(f,s.y1);b=[[p,E],[p,S],[k,S],[k,E]],h=Math.min(p,k),d=Math.max(p,k),v=Math.min(E,S),_=Math.max(E,S),b.xmin=h,b.xmax=d,b.ymin=v,b.ymax=_,b.xref=l,b.yref=u,b.subtract=!1,b.isRect=!0,r.push(b)}else if(s.type==="path")for(var L=s.path.split("Z"),x=[],C=0;C<L.length;C++){var M=L[C];if(M){M+="Z";var g=cfe.extractPathCoords(M,ffe.paramIsX,"raw"),P=cfe.extractPathCoords(M,ffe.paramIsY,"raw");h=1/0,d=-1/0,v=1/0,_=-1/0,b=[];for(var T=0;T<g.length;T++){var z=h_(c,g[T]),O=h_(f,P[T]);b.push([z,O]),h=Math.min(z,h),d=Math.max(z,d),v=Math.min(O,v),_=Math.max(O,_)}b.xmin=h,b.xmax=d,b.ymin=v,b.ymax=_,b.xref=l,b.yref=u,b.subtract=LN(b,x),x.push(b),r.push(b)}}}}return r}function LN(e,t){for(var r=!1,n=0;n<t.length;n++)for(var i=t[n],a=0;a<e.length;a++)if(Kft(e[a],i)){r=!r;break}return r}function h_(e,t){return e.type==="date"&&(t=t.replace("_"," ")),e.type==="log"?e.c2p(t):e.r2p(t,null,e.calendar)}function Efe(e){for(var t=e.length,r=[],n=0;n<t;n++){var i=e[n];r=r.concat(i),r=r.concat([i[0]])}return Sht(r)}function Sht(e){return e.isRect=e.length===5&&e[0][0]===e[4][0]&&e[0][1]===e[4][1]&&e[0][0]===e[1][0]&&e[2][0]===e[3][0]&&e[0][1]===e[3][1]&&e[1][1]===e[2][1]||e[0][1]===e[1][1]&&e[2][1]===e[3][1]&&e[0][0]===e[3][0]&&e[1][0]===e[2][0],e.isRect&&(e.xmin=Math.min(e[0][0],e[2][0]),e.xmax=Math.max(e[0][0],e[2][0]),e.ymin=Math.min(e[0][1],e[2][1]),e.ymax=Math.max(e[0][1],e[2][1])),e}function kfe(e){return function(t,r){for(var n,i,a=0;a<e.length;a++){var o=e[a],s=o._id,l=s.charAt(0);if(r.isRect){n||(n={});var u=r[l+"min"],c=r[l+"max"];u!==void 0&&c!==void 0&&(n[s]=[hfe(o,u),hfe(o,c)].sort(oht))}else i||(i={}),i[s]=r.map(cht(o))}n&&(t.range=n),i&&(t.lassoPoints=i)}}function Mht(e){var t=e.plotinfo;return t.fillRangeItems||kfe(e.xaxes.concat(e.yaxes))}function Eht(e,t){e.emit("plotly_selecting",t)}function FM(e,t){t&&(t.selections=(e.layout||{}).selections||[]),e.emit("plotly_selected",t)}function PN(e){e.emit("plotly_deselect",null)}Cfe.exports={reselect:TN,prepSelect:hht,clearOutline:mfe,clearSelectionsCache:RM,selectOnClick:xfe}});var IN=ye((sor,Pfe)=>{"use strict";Pfe.exports=[{path:"",backoff:0},{path:"M-2.4,-3V3L0.6,0Z",backoff:.6},{path:"M-3.7,-2.5V2.5L1.3,0Z",backoff:1.3},{path:"M-4.45,-3L-1.65,-0.2V0.2L-4.45,3L1.55,0Z",backoff:1.55},{path:"M-2.2,-2.2L-0.2,-0.2V0.2L-2.2,2.2L-1.4,3L1.6,0L-1.4,-3Z",backoff:1.6},{path:"M-4.4,-2.1L-0.6,-0.2V0.2L-4.4,2.1L-4,3L2,0L-4,-3Z",backoff:2},{path:"M2,0A2,2 0 1,1 0,-2A2,2 0 0,1 2,0Z",backoff:0,noRotate:!0},{path:"M2,2V-2H-2V2Z",backoff:0,noRotate:!0}]});var OM=ye((lor,Ife)=>{"use strict";Ife.exports={axisRefDescription:function(e,t,r){return["If set to a",e,"axis id (e.g. *"+e+"* or","*"+e+"2*), the `"+e+"` position refers to a",e,"coordinate. If set to *paper*, the `"+e+"`","position refers to the distance from the",t,"of the plotting","area in normalized coordinates where *0* (*1*) corresponds to the",t,"("+r+"). If set to a",e,"axis ID followed by","*domain* (separated by a space), the position behaves like for","*paper*, but refers to the distance in fractions of the domain","length from the",t,"of the domain of that axis: e.g.,","*"+e+"2 domain* refers to the domain of the second",e," axis and a",e,"position of 0.5 refers to the","point between the",t,"and the",r,"of the domain of the","second",e,"axis."].join(" ")}}});var Ub=ye((cor,Ffe)=>{"use strict";var Rfe=IN(),Dfe=ec(),TP=hd(),kht=vl().templatedArray,uor=OM();Ffe.exports=kht("annotation",{visible:{valType:"boolean",dflt:!0,editType:"calc+arraydraw"},text:{valType:"string",editType:"calc+arraydraw"},textangle:{valType:"angle",dflt:0,editType:"calc+arraydraw"},font:Dfe({editType:"calc+arraydraw",colorEditType:"arraydraw"}),width:{valType:"number",min:1,dflt:null,editType:"calc+arraydraw"},height:{valType:"number",min:1,dflt:null,editType:"calc+arraydraw"},opacity:{valType:"number",min:0,max:1,dflt:1,editType:"arraydraw"},align:{valType:"enumerated",values:["left","center","right"],dflt:"center",editType:"arraydraw"},valign:{valType:"enumerated",values:["top","middle","bottom"],dflt:"middle",editType:"arraydraw"},bgcolor:{valType:"color",dflt:"rgba(0,0,0,0)",editType:"arraydraw"},bordercolor:{valType:"color",dflt:"rgba(0,0,0,0)",editType:"arraydraw"},borderpad:{valType:"number",min:0,dflt:1,editType:"calc+arraydraw"},borderwidth:{valType:"number",min:0,dflt:1,editType:"calc+arraydraw"},showarrow:{valType:"boolean",dflt:!0,editType:"calc+arraydraw"},arrowcolor:{valType:"color",editType:"arraydraw"},arrowhead:{valType:"integer",min:0,max:Rfe.length,dflt:1,editType:"arraydraw"},startarrowhead:{valType:"integer",min:0,max:Rfe.length,dflt:1,editType:"arraydraw"},arrowside:{valType:"flaglist",flags:["end","start"],extras:["none"],dflt:"end",editType:"arraydraw"},arrowsize:{valType:"number",min:.3,dflt:1,editType:"calc+arraydraw"},startarrowsize:{valType:"number",min:.3,dflt:1,editType:"calc+arraydraw"},arrowwidth:{valType:"number",min:.1,editType:"calc+arraydraw"},standoff:{valType:"number",min:0,dflt:0,editType:"calc+arraydraw"},startstandoff:{valType:"number",min:0,dflt:0,editType:"calc+arraydraw"},ax:{valType:"any",editType:"calc+arraydraw"},ay:{valType:"any",editType:"calc+arraydraw"},axref:{valType:"enumerated",dflt:"pixel",values:["pixel",TP.idRegex.x.toString()],editType:"calc"},ayref:{valType:"enumerated",dflt:"pixel",values:["pixel",TP.idRegex.y.toString()],editType:"calc"},xref:{valType:"enumerated",values:["paper",TP.idRegex.x.toString()],editType:"calc"},x:{valType:"any",editType:"calc+arraydraw"},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"auto",editType:"calc+arraydraw"},xshift:{valType:"number",dflt:0,editType:"calc+arraydraw"},yref:{valType:"enumerated",values:["paper",TP.idRegex.y.toString()],editType:"calc"},y:{valType:"any",editType:"calc+arraydraw"},yanchor:{valType:"enumerated",values:["auto","top","middle","bottom"],dflt:"auto",editType:"calc+arraydraw"},yshift:{valType:"number",dflt:0,editType:"calc+arraydraw"},clicktoshow:{valType:"enumerated",values:[!1,"onoff","onout"],dflt:!1,editType:"arraydraw"},xclick:{valType:"any",editType:"arraydraw"},yclick:{valType:"any",editType:"arraydraw"},hovertext:{valType:"string",editType:"arraydraw"},hoverlabel:{bgcolor:{valType:"color",editType:"arraydraw"},bordercolor:{valType:"color",editType:"arraydraw"},font:Dfe({editType:"arraydraw"}),editType:"arraydraw"},captureevents:{valType:"boolean",editType:"arraydraw"},editType:"calc"})});var Sm=ye((hor,zfe)=>{"use strict";zfe.exports={PTS_LINESONLY:20,minTolerance:.2,toleranceGrowth:10,maxScreensAway:20,eventDataKeys:[]}});var Cg=ye((dor,Ofe)=>{"use strict";Ofe.exports=function(t){return{valType:"color",editType:"style",anim:!0}}});var pf=ye((vor,Hfe)=>{"use strict";var qfe=df().axisHoverFormat,{hovertemplateAttrs:Cht,texttemplateAttrs:Lht,templatefallbackAttrs:Bfe}=Ll(),Nfe=Tu(),Pht=ec(),Iht=Pd().dash,Rht=Pd().pattern,Dht=So(),Fht=Sm(),AP=Ao().extendFlat,zht=Cg();function Ufe(e){return{valType:"any",dflt:0,editType:"calc"}}function Vfe(e){return{valType:"any",editType:"calc"}}function Gfe(e){return{valType:"enumerated",values:["start","middle","end"],dflt:"middle",editType:"calc"}}Hfe.exports={x:{valType:"data_array",editType:"calc+clearAxisTypes",anim:!0},x0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes",anim:!0},dx:{valType:"number",dflt:1,editType:"calc",anim:!0},y:{valType:"data_array",editType:"calc+clearAxisTypes",anim:!0},y0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes",anim:!0},dy:{valType:"number",dflt:1,editType:"calc",anim:!0},xperiod:Ufe("x"),yperiod:Ufe("y"),xperiod0:Vfe("x0"),yperiod0:Vfe("y0"),xperiodalignment:Gfe("x"),yperiodalignment:Gfe("y"),xhoverformat:qfe("x"),yhoverformat:qfe("y"),offsetgroup:{valType:"string",dflt:"",editType:"calc"},alignmentgroup:{valType:"string",dflt:"",editType:"calc"},stackgroup:{valType:"string",dflt:"",editType:"calc"},orientation:{valType:"enumerated",values:["v","h"],editType:"calc"},groupnorm:{valType:"enumerated",values:["","fraction","percent"],dflt:"",editType:"calc"},stackgaps:{valType:"enumerated",values:["infer zero","interpolate"],dflt:"infer zero",editType:"calc"},text:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},texttemplate:Lht(),texttemplatefallback:Bfe({editType:"calc"}),hovertext:{valType:"string",dflt:"",arrayOk:!0,editType:"style"},mode:{valType:"flaglist",flags:["lines","markers","text"],extras:["none"],editType:"calc"},hoveron:{valType:"flaglist",flags:["points","fills"],editType:"style"},hovertemplate:Cht({},{keys:Fht.eventDataKeys}),hovertemplatefallback:Bfe(),line:{color:{valType:"color",editType:"style",anim:!0},width:{valType:"number",min:0,dflt:2,editType:"style",anim:!0},shape:{valType:"enumerated",values:["linear","spline","hv","vh","hvh","vhv"],dflt:"linear",editType:"plot"},smoothing:{valType:"number",min:0,max:1.3,dflt:1,editType:"plot"},dash:AP({},Iht,{editType:"style"}),backoff:{valType:"number",min:0,dflt:"auto",arrayOk:!0,editType:"plot"},simplify:{valType:"boolean",dflt:!0,editType:"plot"},editType:"plot"},connectgaps:{valType:"boolean",dflt:!1,editType:"calc"},cliponaxis:{valType:"boolean",dflt:!0,editType:"plot"},fill:{valType:"enumerated",values:["none","tozeroy","tozerox","tonexty","tonextx","toself","tonext"],editType:"calc"},fillcolor:zht(!0),fillgradient:AP({type:{valType:"enumerated",values:["radial","horizontal","vertical","none"],dflt:"none",editType:"calc"},start:{valType:"number",editType:"calc"},stop:{valType:"number",editType:"calc"},colorscale:{valType:"colorscale",editType:"style"},editType:"calc"}),fillpattern:Rht,marker:AP({symbol:{valType:"enumerated",values:Dht.symbolList,dflt:"circle",arrayOk:!0,editType:"style"},opacity:{valType:"number",min:0,max:1,arrayOk:!0,editType:"style",anim:!0},angle:{valType:"angle",dflt:0,arrayOk:!0,editType:"plot",anim:!1},angleref:{valType:"enumerated",values:["previous","up"],dflt:"up",editType:"plot",anim:!1},standoff:{valType:"number",min:0,dflt:0,arrayOk:!0,editType:"plot",anim:!0},size:{valType:"number",min:0,dflt:6,arrayOk:!0,editType:"calc",anim:!0},maxdisplayed:{valType:"number",min:0,dflt:0,editType:"plot"},sizeref:{valType:"number",dflt:1,editType:"calc"},sizemin:{valType:"number",min:0,dflt:0,editType:"calc"},sizemode:{valType:"enumerated",values:["diameter","area"],dflt:"diameter",editType:"calc"},line:AP({width:{valType:"number",min:0,arrayOk:!0,editType:"style",anim:!0},editType:"calc"},Nfe("marker.line",{anim:!0})),gradient:{type:{valType:"enumerated",values:["radial","horizontal","vertical","none"],arrayOk:!0,dflt:"none",editType:"calc"},color:{valType:"color",arrayOk:!0,editType:"calc"},editType:"calc"},editType:"calc"},Nfe("marker",{anim:!0})),selected:{marker:{opacity:{valType:"number",min:0,max:1,editType:"style"},color:{valType:"color",editType:"style"},size:{valType:"number",min:0,editType:"style"},editType:"style"},textfont:{color:{valType:"color",editType:"style"},editType:"style"},editType:"style"},unselected:{marker:{opacity:{valType:"number",min:0,max:1,editType:"style"},color:{valType:"color",editType:"style"},size:{valType:"number",min:0,editType:"style"},editType:"style"},textfont:{color:{valType:"color",editType:"style"},editType:"style"},editType:"style"},textposition:{valType:"enumerated",values:["top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right"],dflt:"middle center",arrayOk:!0,editType:"calc"},textfont:Pht({editType:"calc",colorEditType:"style",arrayOk:!0}),zorder:{valType:"integer",dflt:0,editType:"plot"}}});var RN=ye((gor,Xfe)=>{"use strict";var jfe=Ub(),Wfe=pf().line,Oht=Pd().dash,SP=Ao().extendFlat,qht=mc().overrideAll,Bht=vl().templatedArray,por=OM();Xfe.exports=qht(Bht("selection",{type:{valType:"enumerated",values:["rect","path"]},xref:SP({},jfe.xref,{}),yref:SP({},jfe.yref,{}),x0:{valType:"any"},x1:{valType:"any"},y0:{valType:"any"},y1:{valType:"any"},path:{valType:"string",editType:"arraydraw"},opacity:{valType:"number",min:0,max:1,dflt:.7,editType:"arraydraw"},line:{color:Wfe.color,width:SP({},Wfe.width,{min:1,dflt:1}),dash:SP({},Oht,{dflt:"dot"})}}),"arraydraw","from-root")});var Jfe=ye((mor,Kfe)=>{"use strict";var Zfe=Dr(),MP=ho(),Nht=Yd(),Uht=RN(),Yfe=f_();Kfe.exports=function(t,r){Nht(t,r,{name:"selections",handleItemDefaults:Vht});for(var n=r.selections,i=0;i<n.length;i++){var a=n[i];a&&a.path===void 0&&(a.x0===void 0||a.x1===void 0||a.y0===void 0||a.y1===void 0)&&(r.selections[i]=null)}};function Vht(e,t,r){function n(x,C){return Zfe.coerce(e,t,Uht,x,C)}var i=n("path"),a=i?"path":"rect",o=n("type",a),s=o!=="path";s&&delete t.path,n("opacity"),n("line.color"),n("line.width"),n("line.dash");for(var l=["x","y"],u=0;u<2;u++){var c=l[u],f={_fullLayout:r},h,d,v,_=MP.coerceRef(e,t,f,c);if(h=MP.getFromId(f,_),h._selectionIndices.push(t._index),v=Yfe.rangeToShapePosition(h),d=Yfe.shapePositionToRange(h),s){var b=c+"0",p=c+"1",k=e[b],E=e[p];e[b]=d(e[b],!0),e[p]=d(e[p],!0),MP.coercePosition(t,f,n,_,b),MP.coercePosition(t,f,n,_,p);var S=t[b],L=t[p];S!==void 0&&L!==void 0&&(t[b]=v(S),t[p]=v(L),e[b]=k,e[p]=E)}}s&&Zfe.noneOrAll(e,t,["x0","x1","y0","y1"])}});var Qfe=ye((yor,$fe)=>{"use strict";$fe.exports=function(t,r,n){n("newselection.mode");var i=n("newselection.line.width");i&&(n("newselection.line.color"),n("newselection.line.dash")),n("activeselection.fillcolor"),n("activeselection.opacity")}});var qM=ye((_or,rhe)=>{"use strict";var Ght=qa(),ehe=Dr(),the=hf();rhe.exports=function(t){return function(n,i){var a=n[t];if(Array.isArray(a))for(var o=Ght.subplotsRegistry.cartesian,s=o.idRegex,l=i._subplots,u=l.xaxis,c=l.yaxis,f=l.cartesian,h=i._has("cartesian"),d=0;d<a.length;d++){var v=a[d];if(ehe.isPlainObject(v)){var _=the.cleanId(v.xref,"x",!1),b=the.cleanId(v.yref,"y",!1),p=s.x.test(_),k=s.y.test(b);if(p||k){h||ehe.pushUnique(i._basePlotModules,o);var E=!1;p&&u.indexOf(_)===-1&&(u.push(_),E=!0),k&&c.indexOf(b)===-1&&(c.push(b),E=!0),E&&p&&k&&f.push(_+b)}}}}}});var Of=ye((xor,nhe)=>{"use strict";var ihe=_N(),BM=Lfe();nhe.exports={moduleType:"component",name:"selections",layoutAttributes:RN(),supplyLayoutDefaults:Jfe(),supplyDrawNewSelectionDefaults:Qfe(),includeBasePlot:qM()("selections"),draw:ihe.draw,drawOne:ihe.drawOne,reselect:BM.reselect,prepSelect:BM.prepSelect,clearOutline:BM.clearOutline,clearSelectionsCache:BM.clearSelectionsCache,selectOnClick:BM.selectOnClick}});var NN=ye((bor,Ahe)=>{"use strict";var qN=Oa(),k0=Dr(),ahe=k0.numberFormat,Hht=cd(),jht=RL(),EP=qa(),vhe=k0.strTranslate,Wht=ru(),ohe=ka(),d_=So(),Xht=vf(),she=ho(),Zht=Sg(),Yht=yv(),phe=Eg(),kP=phe.selectingOrDrawing,Kht=phe.freeMode,Jht=$h().FROM_TL,$ht=vM(),Qht=wM().redrawReglTraces,edt=Mc(),FN=hf().getFromId,tdt=Of().prepSelect,rdt=Of().clearOutline,idt=Of().selectOnClick,DN=fN(),BN=hd(),lhe=BN.MINDRAG,np=BN.MINZOOM,uhe=!0;function ndt(e,t,r,n,i,a,o,s){var l=e._fullLayout._zoomlayer,u=o+s==="nsew",c=(o+s).length===1,f,h,d,v,_,b,p,k,E,S,L,x,C,M,g,P,T,z,O,V,G,Z,j;r+=t.yaxis._shift;function N(){if(f=t.xaxis,h=t.yaxis,E=f._length,S=h._length,p=f._offset,k=h._offset,d={},d[f._id]=f,v={},v[h._id]=h,o&&s)for(var It=t.overlays,mt=0;mt<It.length;mt++){var er=It[mt].xaxis;d[er._id]=er;var lr=It[mt].yaxis;v[lr._id]=lr}_=dhe(d),b=dhe(v),C=che(_,s),M=che(b,o),g=!M&&!C,x=hhe(e,e._fullLayout._axisMatchGroups,d,v),L=hhe(e,e._fullLayout._axisConstraintGroups,d,v,x);var Tr=L.isSubplotConstrained||x.isSubplotConstrained;P=s||Tr,T=o||Tr;var Lr=e._fullLayout;z=Lr._has("scattergl"),O=Lr._has("splom"),V=Lr._has("svg")}N();var H=sdt(M+C,e._fullLayout.dragmode,u),te=mhe(t,o+s+"drag",H,r,n,i,a);if(g&&!u)return te.onmousedown=null,te.style.pointerEvents="none",te;var oe={element:te,gd:e,plotinfo:t};oe.prepFn=function(It,mt,er){var lr=oe.dragmode,Tr=e._fullLayout.dragmode;Tr!==lr&&(oe.dragmode=Tr),N(),Z=e._fullLayout._invScaleX,j=e._fullLayout._invScaleY,g||(u?It.shiftKey?Tr==="pan"?Tr="zoom":kP(Tr)||(Tr="pan"):It.ctrlKey&&(Tr="pan"):Tr="pan"),Kht(Tr)?oe.minDrag=1:oe.minDrag=void 0,kP(Tr)?(oe.xaxes=_,oe.yaxes=b,tdt(It,mt,er,oe,Tr)):(oe.clickFn=Ee,kP(lr)&&_e(),g||(Tr==="zoom"?(oe.moveFn=Ze,oe.doneFn=pt,oe.minDrag=1,ce(It,mt,er)):Tr==="pan"&&(oe.moveFn=Jt,oe.doneFn=$e))),e._fullLayout._redrag=function(){var Lr=e._dragdata;if(Lr&&Lr.element===te){var ti=e._fullLayout.dragmode;kP(ti)||(N(),St([0,0,E,S]),oe.moveFn(Lr.dx,Lr.dy))}}};function _e(){oe.plotinfo.selection=!1,rdt(e)}function Ee(It,mt){var er=oe.gd;if(er._fullLayout._activeShapeIndex>=0){er._fullLayout._deactivateShape(er);return}var lr=er._fullLayout.clickmode;if(ON(er),It===2&&!c&&cr(),u)lr.indexOf("select")>-1&&idt(mt,er,_,b,t.id,oe),lr.indexOf("event")>-1&&Xht.click(er,mt,t.id);else if(It===1&&c){var Tr=o?h:f,Lr=o==="s"||s==="w"?0:1,ti=Tr._name+".range["+Lr+"]",Br=adt(Tr,Lr),Vr="left",dt="middle";if(Tr.fixedrange)return;o?(dt=o==="n"?"top":"bottom",Tr.side==="right"&&(Vr="right")):s==="e"&&(Vr="right"),er._context.showAxisRangeEntryBoxes&&qN.select(te).call(Wht.makeEditable,{gd:er,immediate:!0,background:er._fullLayout.paper_bgcolor,text:String(Br),fill:Tr.tickfont?Tr.tickfont.color:"#444",horizontalAlign:Vr,verticalAlign:dt}).on("edit",function(Ge){var Je=Tr.d2r(Ge);Je!==void 0&&EP.call("_guiRelayout",er,ti,Je)})}}Yht.init(oe);var Ce,me,ie,Se,Le,Ae,Fe,Pe,ge,Re;function ce(It,mt,er){var lr=te.getBoundingClientRect();Ce=mt-lr.left,me=er-lr.top,e._fullLayout._calcInverseTransform(e);var Tr=k0.apply3DTransform(e._fullLayout._invTransform)(Ce,me);Ce=Tr[0],me=Tr[1],ie={l:Ce,r:Ce,w:0,t:me,b:me,h:0},Se=e._hmpixcount?e._hmlumcount/e._hmpixcount:Hht(e._fullLayout.plot_bgcolor).getLuminance(),Le="M0,0H"+E+"V"+S+"H0V0",Ae=!1,Fe="xy",Re=!1,Pe=yhe(l,Se,p,k,Le),ge=_he(l,p,k)}function Ze(It,mt){if(e._transitioningWithDuration)return!1;var er=Math.max(0,Math.min(E,Z*It+Ce)),lr=Math.max(0,Math.min(S,j*mt+me)),Tr=Math.abs(er-Ce),Lr=Math.abs(lr-me);ie.l=Math.min(Ce,er),ie.r=Math.max(Ce,er),ie.t=Math.min(me,lr),ie.b=Math.max(me,lr);function ti(){Fe="",ie.r=ie.l,ie.t=ie.b,ge.attr("d","M0,0Z")}if(L.isSubplotConstrained)Tr>np||Lr>np?(Fe="xy",Tr/E>Lr/S?(Lr=Tr*S/E,me>lr?ie.t=me-Lr:ie.b=me+Lr):(Tr=Lr*E/S,Ce>er?ie.l=Ce-Tr:ie.r=Ce+Tr),ge.attr("d",CP(ie))):ti();else if(x.isSubplotConstrained)if(Tr>np||Lr>np){Fe="xy";var Br=Math.min(ie.l/E,(S-ie.b)/S),Vr=Math.max(ie.r/E,(S-ie.t)/S);ie.l=Br*E,ie.r=Vr*E,ie.b=(1-Br)*S,ie.t=(1-Vr)*S,ge.attr("d",CP(ie))}else ti();else!M||Lr<Math.min(Math.max(Tr*.6,lhe),np)?Tr<lhe||!C?ti():(ie.t=0,ie.b=S,Fe="x",ge.attr("d",ldt(ie,me))):!C||Tr<Math.min(Lr*.6,np)?(ie.l=0,ie.r=E,Fe="y",ge.attr("d",udt(ie,Ce))):(Fe="xy",ge.attr("d",CP(ie)));ie.w=ie.r-ie.l,ie.h=ie.b-ie.t,Fe&&(Re=!0),e._dragged=Re,xhe(Pe,ge,ie,Le,Ae,Se),ut(),e.emit("plotly_relayouting",G),Ae=!0}function ut(){G={},(Fe==="xy"||Fe==="x")&&(zN(_,ie.l/E,ie.r/E,G,L.xaxes),sr("x",G)),(Fe==="xy"||Fe==="y")&&(zN(b,(S-ie.b)/S,(S-ie.t)/S,G,L.yaxes),sr("y",G))}function pt(){ut(),ON(e),$e(),whe(e)}var Zt=[0,0,E,S],st=null,lt=BN.REDRAWDELAY,Gt=t.mainplot?e._fullLayout._plots[t.mainplot]:t;function Nt(It){if(!e._context._scrollZoom.cartesian&&!e._fullLayout._enablescrollzoom)return;if(_e(),e._transitioningWithDuration){It.preventDefault(),It.stopPropagation();return}N(),clearTimeout(st);var mt=-It.deltaY;if(isFinite(mt)||(mt=It.wheelDelta/10),!isFinite(mt)){k0.log("Did not find wheel motion attributes: ",It);return}var er=Math.exp(-Math.min(Math.max(mt,-20),20)/200),lr=Gt.draglayer.select(".nsewdrag").node().getBoundingClientRect(),Tr=(It.clientX-lr.left)/lr.width,Lr=(lr.bottom-It.clientY)/lr.height,ti;function Br(Vr,dt,Ge){if(Vr.fixedrange)return;var Je=k0.simpleMap(Vr.range,Vr.r2l),je=Je[0]+(Je[1]-Je[0])*dt;function tt(xt){return Vr.l2r(je+(xt-je)*Ge)}Vr.range=Je.map(tt)}if(P){for(s||(Tr=.5),ti=0;ti<_.length;ti++)Br(_[ti],Tr,er);sr("x"),Zt[2]*=er,Zt[0]+=Zt[2]*Tr*(1/er-1)}if(T){for(o||(Lr=.5),ti=0;ti<b.length;ti++)Br(b[ti],Lr,er);sr("y"),Zt[3]*=er,Zt[1]+=Zt[3]*(1-Lr)*(1/er-1)}St(Zt),wr(),e.emit("plotly_relayouting",G),st=setTimeout(function(){e._fullLayout&&(Zt=[0,0,E,S],$e())},lt),It.preventDefault()}o.length*s.length!==1&&The(te,Nt);function Jt(It,mt){if(It=It*Z,mt=mt*j,e._transitioningWithDuration)return;if(e._fullLayout._replotting=!0,C==="ew"||M==="ns"){var er=C?-It:0,lr=M?-mt:0;if(x.isSubplotConstrained){if(C&&M){var Tr=(It/E-mt/S)/2;It=Tr*E,mt=-Tr*S,er=-It,lr=-mt}M?er=-lr*E/S:lr=-er*S/E}C&&(fhe(_,It),sr("x")),M&&(fhe(b,mt),sr("y")),St([er,lr,E,S]),wr(),e.emit("plotly_relayouting",G);return}function Lr(tt,xt,Ie){for(var xe=1-xt,ke,vt,ir=0;ir<tt.length;ir++){var ar=tt[ir];if(!ar.fixedrange){ke=ar,vt=ar._rl[xe]+(ar._rl[xt]-ar._rl[xe])/odt(Ie/ar._length);var vr=ar.l2r(vt);vr!==!1&&vr!==void 0&&(ar.range[xt]=vr)}}return ke._length*(ke._rl[xt]-vt)/(ke._rl[xt]-ke._rl[xe])}var ti=C==="w"==(M==="n")?1:-1;if(C&&M&&(L.isSubplotConstrained||x.isSubplotConstrained)){var Br=(It/E+ti*mt/S)/2;It=Br*E,mt=ti*Br*S}var Vr,dt;if(C==="w"?It=Lr(_,0,It):C==="e"?It=Lr(_,1,-It):C||(It=0),M==="n"?mt=Lr(b,1,mt):M==="s"?mt=Lr(b,0,-mt):M||(mt=0),Vr=C==="w"?It:0,dt=M==="n"?mt:0,L.isSubplotConstrained&&!x.isSubplotConstrained||x.isSubplotConstrained&&C&&M&&ti>0){var Ge;if(x.isSubplotConstrained||!C&&M.length===1){for(Ge=0;Ge<_.length;Ge++)_[Ge].range=_[Ge]._r.slice(),DN(_[Ge],1-mt/S);It=mt*E/S,Vr=It/2}if(x.isSubplotConstrained||!M&&C.length===1){for(Ge=0;Ge<b.length;Ge++)b[Ge].range=b[Ge]._r.slice(),DN(b[Ge],1-It/E);mt=It*S/E,dt=mt/2}}(!x.isSubplotConstrained||!M)&&sr("x"),(!x.isSubplotConstrained||!C)&&sr("y");var Je=E-It,je=S-mt;x.isSubplotConstrained&&!(C&&M)&&(C?(dt=Vr?0:It*S/E,je=Je*S/E):(Vr=dt?0:mt*E/S,Je=je*E/S)),St([Vr,dt,Je,je]),wr(),e.emit("plotly_relayouting",G)}function sr(It,mt){for(var er=x.isSubplotConstrained?{x:b,y:_}[It]:x[It+"axes"],lr=x.isSubplotConstrained?{x:_,y:b}[It]:[],Tr=0;Tr<er.length;Tr++){var Lr=er[Tr],ti=Lr._id,Br=x.xLinks[ti]||x.yLinks[ti],Vr=lr[0]||d[Br]||v[Br];Vr&&(mt?(mt[Lr._name+".range[0]"]=mt[Vr._name+".range[0]"],mt[Lr._name+".range[1]"]=mt[Vr._name+".range[1]"]):Lr.range=Vr.range.slice())}}function wr(){var It=[],mt;function er(ti){for(mt=0;mt<ti.length;mt++)ti[mt].fixedrange||It.push(ti[mt]._id)}function lr(ti,Br){for(mt=0;mt<ti.length;mt++){var Vr=ti[mt],dt=Vr[Br];!Vr.fixedrange&&dt.tickmode==="sync"&&It.push(dt._id)}}for(P&&(er(_),er(L.xaxes),er(x.xaxes),lr(t.overlays,"xaxis")),T&&(er(b),er(L.yaxes),er(x.yaxes),lr(t.overlays,"yaxis")),G={},mt=0;mt<It.length;mt++){var Tr=It[mt],Lr=FN(e,Tr);she.drawOne(e,Lr,{skipTitle:!0}),G[Lr._name+".range[0]"]=Lr.range[0],G[Lr._name+".range[1]"]=Lr.range[1]}she.redrawComponents(e,It)}function cr(){if(!e._transitioningWithDuration){var It=e._context.doubleClick,mt=[];C&&(mt=mt.concat(_)),M&&(mt=mt.concat(b)),x.xaxes&&(mt=mt.concat(x.xaxes)),x.yaxes&&(mt=mt.concat(x.yaxes));var er={},lr,Tr;if(It==="reset+autosize")for(It="autosize",Tr=0;Tr<mt.length;Tr++){lr=mt[Tr];var Lr=lr._rangeInitial0,ti=lr._rangeInitial1,Br=Lr!==void 0||ti!==void 0;if(Br&&(Lr!==void 0&&Lr!==lr.range[0]||ti!==void 0&&ti!==lr.range[1])||!Br&&lr.autorange!==!0){It="reset";break}}if(It==="autosize")for(Tr=0;Tr<mt.length;Tr++)lr=mt[Tr],lr.fixedrange||(er[lr._name+".autorange"]=!0);else if(It==="reset"){for((C||L.isSubplotConstrained)&&(mt=mt.concat(L.xaxes)),M&&!L.isSubplotConstrained&&(mt=mt.concat(L.yaxes)),L.isSubplotConstrained&&(C?M||(mt=mt.concat(b)):mt=mt.concat(_)),Tr=0;Tr<mt.length;Tr++)if(lr=mt[Tr],!lr.fixedrange){var Vr=lr._name,dt=lr._autorangeInitial;lr._rangeInitial0===void 0&&lr._rangeInitial1===void 0?er[Vr+".autorange"]=!0:lr._rangeInitial0===void 0?(er[Vr+".autorange"]=dt,er[Vr+".range"]=[null,lr._rangeInitial1]):lr._rangeInitial1===void 0?(er[Vr+".range"]=[lr._rangeInitial0,null],er[Vr+".autorange"]=dt):er[Vr+".range"]=[lr._rangeInitial0,lr._rangeInitial1]}}e.emit("plotly_doubleclick",null),EP.call("_guiRelayout",e,er)}}function $e(){St([0,0,E,S]),k0.syncOrAsync([edt.previousPromises,function(){e._fullLayout._replotting=!1,EP.call("_guiRelayout",e,G)}],e)}function St(It){var mt=e._fullLayout,er=mt._plots,lr=mt._subplots.cartesian,Tr,Lr,ti,Br;if(O&&EP.subplotsRegistry.splom.drag(e),z){for(Tr=0;Tr<lr.length;Tr++)if(Lr=er[lr[Tr]],ti=Lr.xaxis,Br=Lr.yaxis,Lr._scene){ti.limitRange&&ti.limitRange(),Br.limitRange&&Br.limitRange();var Vr=k0.simpleMap(ti.range,ti.r2l),dt=k0.simpleMap(Br.range,Br.r2l);Lr._scene.update({range:[Vr[0],dt[0],Vr[1],dt[1]]})}}if((O||z)&&($ht(e),Qht(e)),V){var Ge=It[2]/f._length,Je=It[3]/h._length;for(Tr=0;Tr<lr.length;Tr++){Lr=er[lr[Tr]],ti=Lr.xaxis,Br=Lr.yaxis;var je=(P||x.isSubplotConstrained)&&!ti.fixedrange&&d[ti._id],tt=(T||x.isSubplotConstrained)&&!Br.fixedrange&&v[Br._id],xt,Ie,xe,ke;if(je?(xt=Ge,xe=s||x.isSubplotConstrained?It[0]:_t(ti,xt)):x.xaHash[ti._id]?(xt=Ge,xe=It[0]*ti._length/f._length):x.yaHash[ti._id]?(xt=Je,xe=M==="ns"?-It[1]*ti._length/h._length:_t(ti,xt,{n:"top",s:"bottom"}[M])):(xt=Qt(ti,Ge,Je),xe=Vt(ti,xt)),xt>1&&(ti.maxallowed!==void 0&&P===(ti.range[0]<ti.range[1]?"e":"w")||ti.minallowed!==void 0&&P===(ti.range[0]<ti.range[1]?"w":"e"))&&(xt=1,xe=0),tt?(Ie=Je,ke=o||x.isSubplotConstrained?It[1]:_t(Br,Ie)):x.yaHash[Br._id]?(Ie=Je,ke=It[1]*Br._length/h._length):x.xaHash[Br._id]?(Ie=Ge,ke=C==="ew"?-It[0]*Br._length/f._length:_t(Br,Ie,{e:"right",w:"left"}[C])):(Ie=Qt(Br,Ge,Je),ke=Vt(Br,Ie)),Ie>1&&(Br.maxallowed!==void 0&&T===(Br.range[0]<Br.range[1]?"n":"s")||Br.minallowed!==void 0&&T===(Br.range[0]<Br.range[1]?"s":"n"))&&(Ie=1,ke=0),!(!xt&&!Ie)){xt||(xt=1),Ie||(Ie=1);var vt=ti._offset-xe/xt,ir=Br._offset-ke/Ie;Lr.clipRect.call(d_.setTranslate,xe,ke).call(d_.setScale,xt,Ie),Lr.plot.call(d_.setTranslate,vt,ir).call(d_.setScale,1/xt,1/Ie),(xt!==Lr.xScaleFactor||Ie!==Lr.yScaleFactor)&&(d_.setPointGroupScale(Lr.zoomScalePts,xt,Ie),d_.setTextPointsScale(Lr.zoomScaleTxt,xt,Ie)),d_.hideOutsideRangePoints(Lr.clipOnAxisFalseTraces,Lr),Lr.xScaleFactor=xt,Lr.yScaleFactor=Ie}}}}function Qt(It,mt,er){return It.fixedrange?0:P&&L.xaHash[It._id]?mt:T&&(L.isSubplotConstrained?L.xaHash:L.yaHash)[It._id]?er:0}function Vt(It,mt){return mt?(It.range=It._r.slice(),DN(It,mt),_t(It,mt)):0}function _t(It,mt,er){return It._length*(1-mt)*Jht[er||It.constraintoward||"middle"]}return te}function ghe(e,t,r,n){var i=k0.ensureSingle(e.draglayer,t,r,function(a){a.classed("drag",!0).style({fill:"transparent","stroke-width":0}).attr("data-subplot",e.id)});return i.call(Zht,n),i.node()}function mhe(e,t,r,n,i,a,o){var s=ghe(e,"rect",t,r);return qN.select(s).call(d_.setRect,n,i,a,o),s}function che(e,t){for(var r=0;r<e.length;r++)if(!e[r].fixedrange)return t;return""}function adt(e,t){var r=e.range[t],n=Math.abs(r-e.range[1-t]),i;return e.type==="date"?r:e.type==="log"?(i=Math.ceil(Math.max(0,-Math.log(n)/Math.LN10))+3,ahe("."+i+"g")(Math.pow(10,r))):(i=Math.floor(Math.log(Math.abs(r))/Math.LN10)-Math.floor(Math.log(n)/Math.LN10)+4,ahe("."+String(i)+"g")(r))}function zN(e,t,r,n,i){for(var a=0;a<e.length;a++){var o=e[a];if(!o.fixedrange)if(o.rangebreaks){var s=o._id.charAt(0)==="y",l=s?1-t:t,u=s?1-r:r;n[o._name+".range[0]"]=o.l2r(o.p2l(l*o._length)),n[o._name+".range[1]"]=o.l2r(o.p2l(u*o._length))}else{var c=o._rl[0],f=o._rl[1]-c;n[o._name+".range[0]"]=o.l2r(c+f*t),n[o._name+".range[1]"]=o.l2r(c+f*r)}}if(i&&i.length){var h=(t+(1-r))/2;zN(i,h,1-h,n,[])}}function fhe(e,t){for(var r=0;r<e.length;r++){var n=e[r];if(!n.fixedrange){if(n.rangebreaks){var i=0,a=n._length,o=n.p2l(i+t)-n.p2l(i),s=n.p2l(a+t)-n.p2l(a),l=(o+s)/2;n.range=[n.l2r(n._rl[0]-l),n.l2r(n._rl[1]-l)]}else n.range=[n.l2r(n._rl[0]-t/n._m),n.l2r(n._rl[1]-t/n._m)];n.limitRange&&n.limitRange()}}}function odt(e){return 1-(e>=0?Math.min(e,.9):1/(1/Math.max(e,-.3)+3.222))}function sdt(e,t,r){return e?e==="nsew"?r?"":t==="pan"?"move":"crosshair":e.toLowerCase()+"-resize":"pointer"}function yhe(e,t,r,n,i){return e.append("path").attr("class","zoombox").style({fill:t>.2?"rgba(0,0,0,0)":"rgba(255,255,255,0)","stroke-width":0}).attr("transform",vhe(r,n)).attr("d",i+"Z")}function _he(e,t,r){return e.append("path").attr("class","zoombox-corners").style({fill:ohe.background,stroke:ohe.defaultLine,"stroke-width":1,opacity:0}).attr("transform",vhe(t,r)).attr("d","M0,0Z")}function xhe(e,t,r,n,i,a){e.attr("d",n+"M"+r.l+","+r.t+"v"+r.h+"h"+r.w+"v-"+r.h+"h-"+r.w+"Z"),bhe(e,t,i,a)}function bhe(e,t,r,n){r||(e.transition().style("fill",n>.2?"rgba(0,0,0,0.4)":"rgba(255,255,255,0.3)").duration(200),t.transition().style("opacity",1).duration(200))}function ON(e){qN.select(e).selectAll(".zoombox,.js-zoombox-backdrop,.js-zoombox-menu,.zoombox-corners").remove()}function whe(e){uhe&&e.data&&e._context.showTips&&(k0.notifier(k0._(e,"Double-click to zoom back out"),"long"),uhe=!1)}function ldt(e,t){return"M"+(e.l-.5)+","+(t-np-.5)+"h-3v"+(2*np+1)+"h3ZM"+(e.r+.5)+","+(t-np-.5)+"h3v"+(2*np+1)+"h-3Z"}function udt(e,t){return"M"+(t-np-.5)+","+(e.t-.5)+"v-3h"+(2*np+1)+"v3ZM"+(t-np-.5)+","+(e.b+.5)+"v3h"+(2*np+1)+"v-3Z"}function CP(e){var t=Math.floor(Math.min(e.b-e.t,e.r-e.l,np)/2);return"M"+(e.l-3.5)+","+(e.t-.5+t)+"h3v"+-t+"h"+t+"v-3h-"+(t+3)+"ZM"+(e.r+3.5)+","+(e.t-.5+t)+"h-3v"+-t+"h"+-t+"v-3h"+(t+3)+"ZM"+(e.r+3.5)+","+(e.b+.5-t)+"h-3v"+t+"h"+-t+"v3h"+(t+3)+"ZM"+(e.l-3.5)+","+(e.b+.5-t)+"h3v"+t+"h"+t+"v3h-"+(t+3)+"Z"}function hhe(e,t,r,n,i){for(var a=!1,o={},s={},l,u,c,f,h=(i||{}).xaHash,d=(i||{}).yaHash,v=0;v<t.length;v++){var _=t[v];for(l in r)if(_[l]){for(c in _)!(i&&(h[c]||d[c]))&&!(c.charAt(0)==="x"?r:n)[c]&&(o[c]=l);for(u in n)!(i&&(h[u]||d[u]))&&_[u]&&(a=!0)}for(u in n)if(_[u])for(f in _)!(i&&(h[f]||d[f]))&&!(f.charAt(0)==="x"?r:n)[f]&&(s[f]=u)}a&&(k0.extendFlat(o,s),s={});var b={},p=[];for(c in o){var k=FN(e,c);p.push(k),b[k._id]=k}var E={},S=[];for(f in s){var L=FN(e,f);S.push(L),E[L._id]=L}return{xaHash:b,yaHash:E,xaxes:p,yaxes:S,xLinks:o,yLinks:s,isSubplotConstrained:a}}function The(e,t){if(!jht)e.onwheel!==void 0?e.onwheel=t:e.onmousewheel!==void 0?e.onmousewheel=t:e.isAddedWheelEvent||(e.isAddedWheelEvent=!0,e.addEventListener("wheel",t,{passive:!1}));else{var r=e.onwheel!==void 0?"wheel":"mousewheel";e._onwheel&&e.removeEventListener(r,e._onwheel),e._onwheel=t,e.addEventListener(r,t,{passive:!1})}}function dhe(e){var t=[];for(var r in e)t.push(e[r]);return t}Ahe.exports={makeDragBox:ndt,makeDragger:ghe,makeRectDragger:mhe,makeZoombox:yhe,makeCorners:_he,updateZoombox:xhe,xyCorners:CP,transitionZoombox:bhe,removeZoombox:ON,showDoubleClickNotifier:whe,attachWheelEventHandler:The}});var UN=ye(PP=>{"use strict";var cdt=Oa(),LP=vf(),fdt=yv(),hdt=Sg(),Lg=NN().makeDragBox,gd=hd().DRAGGERSIZE;PP.initInteractions=function(t){var r=t._fullLayout;if(t._context.staticPlot){cdt.select(t).selectAll(".drag").remove();return}if(!(!r._has("cartesian")&&!r._has("splom"))){var n=Object.keys(r._plots||{}).sort(function(a,o){if((r._plots[a].mainplot&&!0)===(r._plots[o].mainplot&&!0)){var s=a.split("y"),l=o.split("y");return s[0]===l[0]?Number(s[1]||1)-Number(l[1]||1):Number(s[0]||1)-Number(l[0]||1)}return r._plots[a].mainplot?1:-1});n.forEach(function(a){var o=r._plots[a],s=o.xaxis,l=o.yaxis;if(!o.mainplot){var u=Lg(t,o,s._offset,l._offset,s._length,l._length,"ns","ew");u.onmousemove=function(h){t._fullLayout._rehover=function(){t._fullLayout._hoversubplot===a&&t._fullLayout._plots[a]&&LP.hover(t,h,a)},LP.hover(t,h,a),t._fullLayout._lasthover=u,t._fullLayout._hoversubplot=a},u.onmouseout=function(h){t._dragging||(t._fullLayout._hoversubplot=null,fdt.unhover(t,h))},t._context.showAxisDragHandles&&(Lg(t,o,s._offset-gd,l._offset-gd,gd,gd,"n","w"),Lg(t,o,s._offset+s._length,l._offset-gd,gd,gd,"n","e"),Lg(t,o,s._offset-gd,l._offset+l._length,gd,gd,"s","w"),Lg(t,o,s._offset+s._length,l._offset+l._length,gd,gd,"s","e"))}if(t._context.showAxisDragHandles){if(a===s._mainSubplot){var c=s._mainLinePosition;s.side==="top"&&(c-=gd),Lg(t,o,s._offset+s._length*.1,c,s._length*.8,gd,"","ew"),Lg(t,o,s._offset,c,s._length*.1,gd,"","w"),Lg(t,o,s._offset+s._length*.9,c,s._length*.1,gd,"","e")}if(a===l._mainSubplot){var f=l._mainLinePosition;l.side!=="right"&&(f-=gd),Lg(t,o,f,l._offset+l._length*.1,gd,l._length*.8,"ns",""),Lg(t,o,f,l._offset+l._length*.9,gd,l._length*.1,"s",""),Lg(t,o,f,l._offset,gd,l._length*.1,"n","")}}});var i=r._hoverlayer.node();i.onmousemove=function(a){a.target=t._fullLayout._lasthover,LP.hover(t,a,r._hoversubplot)},i.onclick=function(a){a.target=t._fullLayout._lasthover,LP.click(t,a)},i.onmousedown=function(a){t._fullLayout._lasthover.onmousedown(a)},PP.updateFx(t)}};PP.updateFx=function(e){var t=e._fullLayout,r=t.dragmode==="pan"?"move":"crosshair";hdt(t._draggers,r)}});var Ehe=ye((Tor,Mhe)=>{"use strict";var She=qa();Mhe.exports=function(t){for(var r=She.layoutArrayContainers,n=She.layoutArrayRegexes,i=t.split("[")[0],a,o,s=0;s<n.length;s++)if(o=t.match(n[s]),o&&o.index===0){a=o[0];break}if(a||(a=r[r.indexOf(i)]),!a)return!1;var l=t.slice(a.length);return l?(o=l.match(/^\[(0|[1-9][0-9]*)\](\.(.+))?$/),o?{array:a,index:Number(o[1]),property:o[3]||""}:!1):{array:a,index:"",property:""}}});var Che=ye(UM=>{"use strict";var ddt=my(),VN=w6(),NM=G1(),vdt=R6().sorterAsc,GN=qa();UM.containerArrayMatch=Ehe();var pdt=UM.isAddVal=function(t){return t==="add"||ddt(t)},khe=UM.isRemoveVal=function(t){return t===null||t==="remove"};UM.applyContainerArrayChanges=function(t,r,n,i,a){var o=r.astr,s=GN.getComponentMethod(o,"supplyLayoutDefaults"),l=GN.getComponentMethod(o,"draw"),u=GN.getComponentMethod(o,"drawOne"),c=i.replot||i.recalc||s===VN||l===VN,f=t.layout,h=t._fullLayout;if(n[""]){Object.keys(n).length>1&&NM.warn("Full array edits are incompatible with other edits",o);var d=n[""][""];if(khe(d))r.set(null);else if(Array.isArray(d))r.set(d);else return NM.warn("Unrecognized full array edit value",o,d),!0;return c?!1:(s(f,h),l(t),!0)}var v=Object.keys(n).map(Number).sort(vdt),_=r.get(),b=_||[],p=a(h,o).get(),k=[],E=-1,S=b.length,L,x,C,M,g,P,T,z;for(L=0;L<v.length;L++){if(C=v[L],M=n[C],g=Object.keys(M),P=M[""],T=pdt(P),C<0||C>b.length-(T?0:1)){NM.warn("index out of range",o,C);continue}if(P!==void 0)g.length>1&&NM.warn("Insertion & removal are incompatible with edits to the same index.",o,C),khe(P)?k.push(C):T?(P==="add"&&(P={}),b.splice(C,0,P),p&&p.splice(C,0,{})):NM.warn("Unrecognized full object edit value",o,C,P),E===-1&&(E=C);else for(x=0;x<g.length;x++)z=o+"["+C+"].",a(b[C],g[x],z).set(M[g[x]])}for(L=k.length-1;L>=0;L--)b.splice(k[L],1),p&&p.splice(k[L],1);if(b.length?_||r.set(b):r.set(null),c)return!1;if(s(f,h),u!==VN){var O;if(E===-1)O=v;else{for(S=Math.max(b.length,S),O=[],L=0;L<v.length&&(C=v[L],!(C>=E));L++)O.push(C);for(L=E;L<S;L++)O.push(L)}for(L=0;L<O.length;L++)u(t,O[L])}else l(t);return!0}});var Ohe=ye(C0=>{"use strict";var Rhe=Eo(),Dhe=qa(),Uv=Dr(),VM=Mc(),Fhe=hf(),zhe=ka(),GM=Fhe.cleanId,gdt=Fhe.getFromTrace,HN=Dhe.traceIs,mdt=["x","y","z"];C0.clearPromiseQueue=function(e){Array.isArray(e._promises)&&e._promises.length>0&&Uv.log("Clearing previous rejected promises from queue."),e._promises=[]};C0.cleanLayout=function(e){var t,r;e||(e={}),e.xaxis1&&(e.xaxis||(e.xaxis=e.xaxis1),delete e.xaxis1),e.yaxis1&&(e.yaxis||(e.yaxis=e.yaxis1),delete e.yaxis1),e.scene1&&(e.scene||(e.scene=e.scene1),delete e.scene1);var n=(VM.subplotsRegistry.cartesian||{}).attrRegex,i=(VM.subplotsRegistry.polar||{}).attrRegex,a=(VM.subplotsRegistry.ternary||{}).attrRegex,o=(VM.subplotsRegistry.gl3d||{}).attrRegex,s=Object.keys(e);for(t=0;t<s.length;t++){var l=s[t];if(n&&n.test(l)){var u=e[l];u.anchor&&u.anchor!=="free"&&(u.anchor=GM(u.anchor)),u.overlaying&&(u.overlaying=GM(u.overlaying)),u.type||(u.isdate?u.type="date":u.islog?u.type="log":u.isdate===!1&&u.islog===!1&&(u.type="linear")),(u.autorange==="withzero"||u.autorange==="tozero")&&(u.autorange=!0,u.rangemode="tozero"),u.insiderange&&delete u.range,delete u.islog,delete u.isdate,delete u.categories,IP(u,"domain")&&delete u.domain}}var c=Array.isArray(e.annotations)?e.annotations.length:0;for(t=0;t<c;t++){var f=e.annotations[t];Uv.isPlainObject(f)&&(Q3(f,"xref"),Q3(f,"yref"))}var h=Array.isArray(e.shapes)?e.shapes.length:0;for(t=0;t<h;t++){var d=e.shapes[t];Uv.isPlainObject(d)&&(Q3(d,"xref"),Q3(d,"yref"))}var v=Array.isArray(e.images)?e.images.length:0;for(t=0;t<v;t++){var _=e.images[t];Uv.isPlainObject(_)&&(Q3(_,"xref"),Q3(_,"yref"))}var b=e.legend;return b&&(b.x>3?(b.x=1.02,b.xanchor="left"):b.x<-2&&(b.x=-.02,b.xanchor="right"),b.y>3?(b.y=1.02,b.yanchor="bottom"):b.y<-2&&(b.y=-.02,b.yanchor="top")),e.dragmode==="rotate"&&(e.dragmode="orbit"),zhe.clean(e),e.template&&e.template.layout&&C0.cleanLayout(e.template.layout),e};function Q3(e,t){var r=e[t],n=t.charAt(0);r&&r!=="paper"&&(e[t]=GM(r,n,!0))}C0.cleanData=function(e){for(var t=0;t<e.length;t++){var r=e[t],n;if(r.type==="histogramy"&&"xbins"in r&&!("ybins"in r)&&(r.ybins=r.xbins,delete r.xbins),r.type==="histogramy"&&C0.swapXYData(r),(r.type==="histogramx"||r.type==="histogramy")&&(r.type="histogram"),"scl"in r&&!("colorscale"in r)&&(r.colorscale=r.scl,delete r.scl),"reversescl"in r&&!("reversescale"in r)&&(r.reversescale=r.reversescl,delete r.reversescl),r.xaxis&&(r.xaxis=GM(r.xaxis,"x")),r.yaxis&&(r.yaxis=GM(r.yaxis,"y")),HN(r,"gl3d")&&r.scene&&(r.scene=VM.subplotsRegistry.gl3d.cleanId(r.scene)),!HN(r,"pie-like")&&!HN(r,"bar-like"))if(Array.isArray(r.textposition))for(n=0;n<r.textposition.length;n++)r.textposition[n]=Phe(r.textposition[n]);else r.textposition&&(r.textposition=Phe(r.textposition));var i=Dhe.getModule(r);if(i&&i.colorbar){var a=i.colorbar.container,o=a?r[a]:r;o&&o.colorscale&&(o.colorscale==="YIGnBu"&&(o.colorscale="YlGnBu"),o.colorscale==="YIOrRd"&&(o.colorscale="YlOrRd"))}if(r.type==="surface"&&Uv.isPlainObject(r.contours)){var s=["x","y","z"];for(n=0;n<s.length;n++){var l=r.contours[s[n]];Uv.isPlainObject(l)&&(l.highlightColor&&(l.highlightcolor=l.highlightColor,delete l.highlightColor),l.highlightWidth&&(l.highlightwidth=l.highlightWidth,delete l.highlightWidth))}}if(r.type==="candlestick"||r.type==="ohlc"){var u=(r.increasing||{}).showlegend!==!1,c=(r.decreasing||{}).showlegend!==!1,f=Lhe(r.increasing),h=Lhe(r.decreasing);if(f!==!1&&h!==!1){var d=ydt(f,h,u,c);d&&(r.name=d)}else(f||h)&&!r.name&&(r.name=f||h)}IP(r,"line")&&delete r.line,"marker"in r&&(IP(r.marker,"line")&&delete r.marker.line,IP(r,"marker")&&delete r.marker),zhe.clean(r),r.autobinx&&(delete r.autobinx,delete r.xbins),r.autobiny&&(delete r.autobiny,delete r.ybins)}};function Lhe(e){if(!Uv.isPlainObject(e))return!1;var t=e.name;return delete e.name,delete e.showlegend,(typeof t=="string"||typeof t=="number")&&String(t)}function ydt(e,t,r,n){if(r&&!n)return e;if(n&&!r||!e.trim())return t;if(!t.trim())return e;var i=Math.min(e.length,t.length),a;for(a=0;a<i&&e.charAt(a)===t.charAt(a);a++);var o=e.slice(0,a);return o.trim()}function Phe(e){var t="middle",r="center";return typeof e=="string"&&(e.indexOf("top")!==-1?t="top":e.indexOf("bottom")!==-1&&(t="bottom"),e.indexOf("left")!==-1?r="left":e.indexOf("right")!==-1&&(r="right")),t+" "+r}function IP(e,t){return t in e&&typeof e[t]=="object"&&Object.keys(e[t]).length===0}C0.swapXYData=function(e){var t;if(Uv.swapAttrs(e,["?","?0","d?","?bins","nbins?","autobin?","?src","error_?"]),Array.isArray(e.z)&&Array.isArray(e.z[0])&&(e.transpose?delete e.transpose:e.transpose=!0),e.error_x&&e.error_y){var r=e.error_y,n="copy_ystyle"in r?r.copy_ystyle:!(r.color||r.thickness||r.width);Uv.swapAttrs(e,["error_?.copy_ystyle"]),n&&Uv.swapAttrs(e,["error_?.color","error_?.thickness","error_?.width"])}if(typeof e.hoverinfo=="string"){var i=e.hoverinfo.split("+");for(t=0;t<i.length;t++)i[t]==="x"?i[t]="y":i[t]==="y"&&(i[t]="x");e.hoverinfo=i.join("+")}};C0.coerceTraceIndices=function(e,t){if(Rhe(t))return[t];if(!Array.isArray(t)||!t.length)return e.data.map(function(i,a){return a});if(Array.isArray(t)){for(var r=[],n=0;n<t.length;n++)Uv.isIndex(t[n],e.data.length)?r.push(t[n]):Uv.warn("trace index (",t[n],") is not a number or is out of bounds");return r}return t};C0.manageArrayContainers=function(e,t,r){var n=e.obj,i=e.parts,a=i.length,o=i[a-1],s=Rhe(o);if(s&&t===null){var l=i.slice(0,a-1).join("."),u=Uv.nestedProperty(n,l).get();u.splice(o,1)}else s&&e.get()===void 0&&e.get()===void 0&&(r[e.astr]=null),e.set(t)};var _dt=/(\.[^\[\]\.]+|\[[^\[\]\.]+\])$/;function Ihe(e){var t=e.search(_dt);if(t>0)return e.slice(0,t)}C0.hasParent=function(e,t){for(var r=Ihe(t);r;){if(r in e)return!0;r=Ihe(r)}return!1};C0.clearAxisTypes=function(e,t,r){for(var n=0;n<t.length;n++)for(var i=e._fullData[n],a=0;a<3;a++){var o=gdt(e,i,mdt[a]);if(o&&o.type!=="log"){var s=o._name,l=o._id.slice(1);if(l.slice(0,5)==="scene"){if(r[l]!==void 0)continue;s=l+"."+s}var u=s+".type";r[s]===void 0&&r[u]===void 0&&Uv.nestedProperty(e.layout,u).set(null)}}};var jN=(e,t)=>{let r=(...n)=>n.every(i=>Uv.isPlainObject(i))||n.every(i=>Array.isArray(i));if([e,t].every(n=>Array.isArray(n))){if(e.length!==t.length)return!1;for(let n=0;n<e.length;n++){let i=e[n],a=t[n];if(i!==a&&!(r(i,a)?jN(i,a):!1))return!1}return!0}else if([e,t].every(n=>Uv.isPlainObject(n))){if(Object.keys(e).length!==Object.keys(t).length)return!1;for(let n in e){if(n.startsWith("_"))continue;let i=e[n],a=t[n];if(i!==a&&!(r(i,a)?jN(i,a):!1))return!1}return!0}return!1};C0.collectionsAreEqual=jN});var HP=ye(_l=>{"use strict";var FP=Oa(),xdt=Eo(),bdt=nq(),Sa=Dr(),Ec=Sa.nestedProperty,ZN=_3(),ap=yne(),L0=qa(),VP=w3(),Qo=Mc(),Vv=ho(),wdt=xB(),Tdt=Rd(),WN=So(),Adt=ka(),Sdt=UN().initInteractions,Mdt=Wp(),Edt=Of().clearOutline,Vhe=cb().dfltConfig,RP=Che(),vh=Ohe(),Au=wM(),v_=mc(),kdt=hd().AX_NAME_PATTERN,XN=0,qhe=5;function Cdt(e,t,r,n){var i;if(e=Sa.getGraphDiv(e),ZN.init(e),Sa.isPlainObject(t)){var a=t;t=a.data,r=a.layout,n=a.config,i=a.frames}var o=ZN.triggerHandler(e,"plotly_beforeplot",[t,r,n]);if(o===!1)return Promise.reject();!t&&!r&&!Sa.isPlotDiv(e)&&Sa.warn("Calling _doPlot as if redrawing but this container doesn't yet have a plot.",e);function s(){if(i)return _l.addFrames(e,i)}Hhe(e,n),r||(r={}),FP.select(e).classed("js-plotly-plot",!0),WN.makeTester(),Array.isArray(e._promises)||(e._promises=[]);var l=(e.data||[]).length===0&&Array.isArray(t);Array.isArray(t)&&(vh.cleanData(t),l?e.data=t:e.data.push.apply(e.data,t),e.empty=!1),(!e.layout||l)&&(e.layout=vh.cleanLayout(r)),Qo.supplyDefaults(e);var u=e._fullLayout,c=u._has("cartesian");u._replotting=!0,(l||u._shouldCreateBgLayer)&&($dt(e),u._shouldCreateBgLayer&&delete u._shouldCreateBgLayer),WN.initGradients(e),WN.initPatterns(e),l&&Vv.saveShowSpikeInitial(e);var f=!e.calcdata||e.calcdata.length!==(e._fullData||[]).length;f&&Qo.doCalcdata(e);for(var h=0;h<e.calcdata.length;h++)e.calcdata[h][0].trace=e._fullData[h];e._context.responsive?e._responsiveChartHandler||(e._responsiveChartHandler=function(){Sa.isHidden(e)||Qo.resize(e)},window.addEventListener("resize",e._responsiveChartHandler)):Sa.clearResponsive(e);var d=Sa.extendFlat({},u._size),v=0;function _(){for(var C=u._basePlotModules,M=0;M<C.length;M++)C[M].drawFramework&&C[M].drawFramework(e);!u._glcanvas&&u._has("gl")&&(u._glcanvas=u._glcontainer.selectAll(".gl-canvas").data([{key:"contextLayer",context:!0,pick:!1},{key:"focusLayer",context:!1,pick:!1},{key:"pickLayer",context:!1,pick:!0}],function(z){return z.key}),u._glcanvas.enter().append("canvas").attr("class",function(z){return"gl-canvas gl-canvas-"+z.key.replace("Layer","")}).style({position:"absolute",top:0,left:0,overflow:"visible","pointer-events":"none"}));var g=e._context.plotGlPixelRatio;if(u._glcanvas){u._glcanvas.attr("width",u.width*g).attr("height",u.height*g).style("width",u.width+"px").style("height",u.height+"px");var P=u._glcanvas.data()[0].regl;if(P&&(Math.floor(u.width*g)!==P._gl.drawingBufferWidth||Math.floor(u.height*g)!==P._gl.drawingBufferHeight)){var T="WebGL context buffer and canvas dimensions do not match due to browser/WebGL bug.";if(v)Sa.error(T);else return Sa.log(T+" Clearing graph and plotting again."),Qo.cleanPlot([],{},e._fullData,u),Qo.supplyDefaults(e),u=e._fullLayout,Qo.doCalcdata(e),v++,_()}}return u.modebar.orientation==="h"?u._modebardiv.style("height",null).style("width","100%"):u._modebardiv.style("width",null).style("height",u.height+"px"),Qo.previousPromises(e)}function b(){if(Qo.clearAutoMarginIds(e),Au.drawMarginPushers(e),Vv.allowAutoMargin(e),e._fullLayout.title.text&&e._fullLayout.title.automargin&&Qo.allowAutoMargin(e,"title.automargin"),u._has("pie"))for(var C=e._fullData,M=0;M<C.length;M++){var g=C[M];g.type==="pie"&&g.automargin&&Qo.allowAutoMargin(e,"pie."+g.uid+".automargin")}return Qo.doAutoMargin(e),Qo.previousPromises(e)}function p(){if(Qo.didMarginChange(d,u._size))return Sa.syncOrAsync([b,Au.layoutStyles],e)}function k(){if(!f){E();return}return Sa.syncOrAsync([L0.getComponentMethod("shapes","calcAutorange"),L0.getComponentMethod("annotations","calcAutorange"),E],e)}function E(){e._transitioning||(Au.doAutoRangeAndConstraints(e),l&&Vv.saveRangeInitial(e),L0.getComponentMethod("rangeslider","calcAutorange")(e))}function S(){return Vv.draw(e,l?"":"redraw")}var L=[Qo.previousPromises,s,_,b,p];c&&L.push(k),L.push(Au.layoutStyles),c&&L.push(S,function(M){var g=M._fullLayout._insideTickLabelsUpdaterange;if(g)return M._fullLayout._insideTickLabelsUpdaterange=void 0,HM(M,g).then(function(){Vv.saveRangeInitial(M,!0)})}),L.push(Au.drawData,Au.finalDraw,Sdt,Qo.addLinks,Qo.rehover,Qo.redrag,Qo.reselect,Qo.doAutoMargin,Qo.previousPromises);var x=Sa.syncOrAsync(L,e);return(!x||!x.then)&&(x=Promise.resolve()),x.then(function(){return jM(e),e})}function jM(e){var t=e._fullLayout;t._redrawFromAutoMarginCount?t._redrawFromAutoMarginCount--:e.emit("plotly_afterplot")}function Ldt(e){return Sa.extendFlat(Vhe,e)}function Ghe(e,t){try{e._fullLayout._paper.style("background",t)}catch(r){Sa.error(r)}}function Pdt(e,t){var r=Adt.combine(t,"white");Ghe(e,r)}function Hhe(e,t){if(!e._context){e._context=Sa.extendDeep({},Vhe);var r=FP.select("base");e._context._baseUrl=r.size()&&r.attr("href")?window.location.href.split("#")[0]:""}var n=e._context,i,a,o;if(t){for(a=Object.keys(t),i=0;i<a.length;i++)o=a[i],!(o==="editable"||o==="edits")&&o in n&&(o==="setBackground"&&t[o]==="opaque"?n[o]=Pdt:n[o]=t[o]);var s=t.editable;if(s!==void 0)for(n.editable=s,a=Object.keys(n.edits),i=0;i<a.length;i++)n.edits[a[i]]=s;if(t.edits)for(a=Object.keys(t.edits),i=0;i<a.length;i++)o=a[i],o in n.edits&&(n.edits[o]=t.edits[o]);n._exportedPlot=t._exportedPlot}n.staticPlot&&(n.editable=!1,n.edits={},n.autosizable=!1,n.scrollZoom=!1,n.doubleClick=!1,n.showTips=!1,n.showLink=!1,n.displayModeBar=!1),n.displayModeBar==="hover"&&!bdt&&(n.displayModeBar=!0),(n.setBackground==="transparent"||typeof n.setBackground!="function")&&(n.setBackground=Ghe),n._hasZeroHeight=n._hasZeroHeight||e.clientHeight===0,n._hasZeroWidth=n._hasZeroWidth||e.clientWidth===0;var l=n.scrollZoom,u=n._scrollZoom={};if(l===!0)u.cartesian=1,u.gl3d=1,u.geo=1,u.mapbox=1,u.map=1;else if(typeof l=="string"){var c=l.split("+");for(i=0;i<c.length;i++)u[c[i]]=1}else l!==!1&&(u.gl3d=1,u.geo=1,u.mapbox=1,u.map=1)}function Idt(e){if(e=Sa.getGraphDiv(e),!Sa.isPlotDiv(e))throw new Error("This element is not a Plotly plot: "+e);return vh.cleanData(e.data),vh.cleanLayout(e.layout),e.calcdata=void 0,_l._doPlot(e).then(function(){return e.emit("plotly_redraw"),e})}function Rdt(e,t,r,n){return e=Sa.getGraphDiv(e),Qo.cleanPlot([],{},e._fullData||[],e._fullLayout||{}),Qo.purge(e),_l._doPlot(e,t,r,n)}function zP(e,t){var r=t+1,n=[],i,a;for(i=0;i<e.length;i++)a=e[i],a<0?n.push(r+a):n.push(a);return n}function OP(e,t,r){var n,i;for(n=0;n<t.length;n++){if(i=t[n],i!==parseInt(i,10))throw new Error("all values in "+r+" must be integers");if(i>=e.data.length||i<-e.data.length)throw new Error(r+" must be valid indices for gd.data.");if(t.indexOf(i,n+1)>-1||i>=0&&t.indexOf(-e.data.length+i)>-1||i<0&&t.indexOf(e.data.length+i)>-1)throw new Error("each index in "+r+" must be unique.")}}function jhe(e,t,r){if(!Array.isArray(e.data))throw new Error("gd.data must be an array.");if(typeof t=="undefined")throw new Error("currentIndices is a required argument.");if(Array.isArray(t)||(t=[t]),OP(e,t,"currentIndices"),typeof r!="undefined"&&!Array.isArray(r)&&(r=[r]),typeof r!="undefined"&&OP(e,r,"newIndices"),typeof r!="undefined"&&t.length!==r.length)throw new Error("current and new indices must be of equal length.")}function Ddt(e,t,r){var n,i;if(!Array.isArray(e.data))throw new Error("gd.data must be an array.");if(typeof t=="undefined")throw new Error("traces must be defined.");for(Array.isArray(t)||(t=[t]),n=0;n<t.length;n++)if(i=t[n],typeof i!="object"||Array.isArray(i)||i===null)throw new Error("all values in traces array must be non-array objects");if(typeof r!="undefined"&&!Array.isArray(r)&&(r=[r]),typeof r!="undefined"&&r.length!==t.length)throw new Error("if indices is specified, traces.length must equal indices.length")}function Fdt(e,t,r,n){var i=Sa.isPlainObject(n);if(!Array.isArray(e.data))throw new Error("gd.data must be an array");if(!Sa.isPlainObject(t))throw new Error("update must be a key:value object");if(typeof r=="undefined")throw new Error("indices must be an integer or array of integers");OP(e,r,"indices");for(var a in t){if(!Array.isArray(t[a])||t[a].length!==r.length)throw new Error("attribute "+a+" must be an array of length equal to indices array length");if(i&&(!(a in n)||!Array.isArray(n[a])||n[a].length!==t[a].length))throw new Error("when maxPoints is set as a key:value object it must contain a 1:1 correspondence with the keys and number of traces in the update object")}}function zdt(e,t,r,n){var i=Sa.isPlainObject(n),a=[],o,s,l,u,c;Array.isArray(r)||(r=[r]),r=zP(r,e.data.length-1);for(var f in t)for(var h=0;h<r.length;h++){if(o=e.data[r[h]],l=Ec(o,f),s=l.get(),u=t[f][h],!Sa.isArrayOrTypedArray(u))throw new Error("attribute: "+f+" index: "+h+" must be an array");if(!Sa.isArrayOrTypedArray(s))throw new Error("cannot extend missing or non-array attribute: "+f);if(s.constructor!==u.constructor)throw new Error("cannot extend array with an array of a different type: "+f);c=i?n[f][h]:n,xdt(c)||(c=-1),a.push({prop:l,target:s,insert:u,maxp:Math.floor(c)})}return a}function Whe(e,t,r,n,i){Fdt(e,t,r,n);for(var a=zdt(e,t,r,n),o={},s={},l=0;l<a.length;l++){var u=a[l].prop,c=a[l].maxp,f=i(a[l].target,a[l].insert,c);u.set(f[0]),Array.isArray(o[u.astr])||(o[u.astr]=[]),o[u.astr].push(f[1]),Array.isArray(s[u.astr])||(s[u.astr]=[]),s[u.astr].push(a[l].target.length)}return{update:o,maxPoints:s}}function Xhe(e,t){var r=new e.constructor(e.length+t.length);return r.set(e),r.set(t,e.length),r}function Zhe(e,t,r,n){e=Sa.getGraphDiv(e);function i(l,u,c){var f,h;if(Sa.isTypedArray(l))if(c<0){var d=new l.constructor(0),v=Xhe(l,u);c<0?(f=v,h=d):(f=d,h=v)}else if(f=new l.constructor(c),h=new l.constructor(l.length+u.length-c),c===u.length)f.set(u),h.set(l);else if(c<u.length){var _=u.length-c;f.set(u.subarray(_)),h.set(l),h.set(u.subarray(0,_),l.length)}else{var b=c-u.length,p=l.length-b;f.set(l.subarray(p)),f.set(u,b),h.set(l.subarray(0,p))}else f=l.concat(u),h=c>=0&&c<f.length?f.splice(0,f.length-c):[];return[f,h]}var a=Whe(e,t,r,n,i),o=_l.redraw(e),s=[e,a.update,r,a.maxPoints];return ap.add(e,_l.prependTraces,s,Zhe,arguments),o}function Yhe(e,t,r,n){e=Sa.getGraphDiv(e);function i(l,u,c){var f,h;if(Sa.isTypedArray(l))if(c<=0){var d=new l.constructor(0),v=Xhe(u,l);c<0?(f=v,h=d):(f=d,h=v)}else if(f=new l.constructor(c),h=new l.constructor(l.length+u.length-c),c===u.length)f.set(u),h.set(l);else if(c<u.length){var _=u.length-c;f.set(u.subarray(0,_)),h.set(u.subarray(_)),h.set(l,_)}else{var b=c-u.length;f.set(u),f.set(l.subarray(0,b),u.length),h.set(l.subarray(b))}else f=u.concat(l),h=c>=0&&c<f.length?f.splice(c,f.length):[];return[f,h]}var a=Whe(e,t,r,n,i),o=_l.redraw(e),s=[e,a.update,r,a.maxPoints];return ap.add(e,_l.extendTraces,s,Yhe,arguments),o}function Khe(e,t,r){e=Sa.getGraphDiv(e);var n=[],i=_l.deleteTraces,a=Khe,o=[e,n],s=[e,t],l,u;for(Ddt(e,t,r),Array.isArray(t)||(t=[t]),t=t.map(function(c){return Sa.extendFlat({},c)}),vh.cleanData(t),l=0;l<t.length;l++)e.data.push(t[l]);for(l=0;l<t.length;l++)n.push(-t.length+l);if(typeof r=="undefined")return u=_l.redraw(e),ap.add(e,i,o,a,s),u;Array.isArray(r)||(r=[r]);try{jhe(e,n,r)}catch(c){throw e.data.splice(e.data.length-t.length,t.length),c}return ap.startSequence(e),ap.add(e,i,o,a,s),u=_l.moveTraces(e,n,r),ap.stopSequence(e),u}function Jhe(e,t){e=Sa.getGraphDiv(e);var r=[],n=_l.addTraces,i=Jhe,a=[e,r,t],o=[e,t],s,l;if(typeof t=="undefined")throw new Error("indices must be an integer or array of integers.");for(Array.isArray(t)||(t=[t]),OP(e,t,"indices"),t=zP(t,e.data.length-1),t.sort(Sa.sorterDes),s=0;s<t.length;s+=1)l=e.data.splice(t[s],1)[0],r.push(l);var u=_l.redraw(e);return ap.add(e,n,a,i,o),u}function YN(e,t,r){e=Sa.getGraphDiv(e);var n=[],i=[],a=YN,o=YN,s=[e,r,t],l=[e,t,r],u;if(jhe(e,t,r),t=Array.isArray(t)?t:[t],typeof r=="undefined")for(r=[],u=0;u<t.length;u++)r.push(-t.length+u);for(r=Array.isArray(r)?r:[r],t=zP(t,e.data.length-1),r=zP(r,e.data.length-1),u=0;u<e.data.length;u++)t.indexOf(u)===-1&&n.push(e.data[u]);for(u=0;u<t.length;u++)i.push({newIndex:r[u],trace:e.data[t[u]]});for(i.sort(function(f,h){return f.newIndex-h.newIndex}),u=0;u<i.length;u+=1)n.splice(i[u].newIndex,0,i[u].trace);e.data=n;var c=_l.redraw(e);return ap.add(e,a,s,o,l),c}function qP(e,t,r,n){e=Sa.getGraphDiv(e),vh.clearPromiseQueue(e);var i={};if(typeof t=="string")i[t]=r;else if(Sa.isPlainObject(t))i=Sa.extendFlat({},t),n===void 0&&(n=r);else return Sa.warn("Restyle fail.",t,r,n),Promise.reject();Object.keys(i).length&&(e.changed=!0);var a=vh.coerceTraceIndices(e,n),o=$he(e,i,a),s=o.flags;s.calc&&(e.calcdata=void 0),s.clearAxisTypes&&vh.clearAxisTypes(e,a,{});var l=[];s.fullReplot?l.push(_l._doPlot):(l.push(Qo.previousPromises),Qo.supplyDefaults(e),s.markerSize&&(Qo.doCalcdata(e),GP(l)),s.style&&l.push(Au.doTraceStyle),s.colorbars&&l.push(Au.doColorBars),l.push(jM)),l.push(Qo.rehover,Qo.redrag,Qo.reselect),ap.add(e,qP,[e,o.undoit,o.traces],qP,[e,o.redoit,o.traces]);var u=Sa.syncOrAsync(l,e);return(!u||!u.then)&&(u=Promise.resolve()),u.then(function(){return e.emit("plotly_restyle",o.eventData),e})}function Ly(e){return e===void 0?null:e}function DP(e,t){return t?function(r,n,i){var a=Ec(r,n),o=a.set;return a.set=function(s){var l=(i||"")+n;BP(l,a.get(),s,e),o(s)},a}:Ec}function BP(e,t,r,n){if(Array.isArray(t)||Array.isArray(r))for(var i=Array.isArray(t)?t:[],a=Array.isArray(r)?r:[],o=Math.max(i.length,a.length),s=0;s<o;s++)BP(e+"["+s+"]",i[s],a[s],n);else if(Sa.isPlainObject(t)||Sa.isPlainObject(r)){var l=Sa.isPlainObject(t)?t:{},u=Sa.isPlainObject(r)?r:{},c=Sa.extendFlat({},l,u);for(var f in c)BP(e+"."+f,l[f],u[f],n)}else n[e]===void 0&&(n[e]=Ly(t))}function Odt(e,t,r){for(var n in r){var i=Ec(e,n);BP(n,i.get(),r[n],t)}}function $he(e,t,r){var n=e._fullLayout,i=e._fullData,a=e.data,o=n._guiEditing,s=DP(n._preGUI,o),l=Sa.extendDeepAll({},t),u,c=v_.traceFlags(),f={},h={},d;function v(){return r.map(function(){})}function _(ge){var Re=Vv.id2name(ge);d.indexOf(Re)===-1&&d.push(Re)}function b(ge){return"LAYOUT"+ge+".autorange"}function p(ge){return"LAYOUT"+ge+".range"}function k(ge){for(var Re=ge;Re<i.length;Re++)if(i[Re]._input===a[ge])return i[Re]}function E(ge,Re,ce){if(Array.isArray(ge)){ge.forEach(function(Zt){E(Zt,Re,ce)});return}if(!(ge in t||vh.hasParent(t,ge))){var Ze;if(ge.slice(0,6)==="LAYOUT")Ze=s(e.layout,ge.replace("LAYOUT",""));else{var ut=r[ce],pt=n._tracePreGUI[k(ut)._fullInput.uid];Ze=DP(pt,o)(a[ut],ge)}ge in h||(h[ge]=v()),h[ge][ce]===void 0&&(h[ge][ce]=Ly(Ze.get())),Re!==void 0&&Ze.set(Re)}}function S(ge){return function(Re){return i[Re][ge]}}function L(ge){return function(Re,ce){return Re===!1?i[r[ce]][ge]:null}}for(var x in t){if(vh.hasParent(t,x))throw new Error("cannot set "+x+" and a parent attribute simultaneously");var C=t[x],M,g,P,T,z,O;if((x==="autobinx"||x==="autobiny")&&(x=x.charAt(x.length-1)+"bins",Array.isArray(C)?C=C.map(L(x)):C===!1?C=r.map(S(x)):C=null),f[x]=C,x.slice(0,6)==="LAYOUT"){P=s(e.layout,x.replace("LAYOUT","")),h[x]=[Ly(P.get())],P.set(Array.isArray(C)?C[0]:C),c.calc=!0;continue}for(h[x]=v(),u=0;u<r.length;u++){M=a[r[u]],g=k(r[u]);var V=n._tracePreGUI[g._fullInput.uid];if(P=DP(V,o)(M,x),T=P.get(),z=Array.isArray(C)?C[u%C.length]:C,z!==void 0){var G=P.parts[P.parts.length-1],Z=x.slice(0,x.length-G.length-1),j=Z?Z+".":"",N=Z?Ec(g,Z).get():g;if(O=VP.getTraceValObject(g,P.parts),O&&O.impliedEdits&&z!==null)for(var H in O.impliedEdits)E(Sa.relativeAttr(x,H),O.impliedEdits[H],u);else if((G==="thicknessmode"||G==="lenmode")&&T!==z&&(z==="fraction"||z==="pixels")&&N){var te=n._size,oe=N.orient,_e=oe==="top"||oe==="bottom";if(G==="thicknessmode"){var Ee=_e?te.h:te.w;E(j+"thickness",N.thickness*(z==="fraction"?1/Ee:Ee),u)}else{var Ce=_e?te.w:te.h;E(j+"len",N.len*(z==="fraction"?1/Ce:Ce),u)}}else if(x==="type"&&(z==="pie"!=(T==="pie")||z==="funnelarea"!=(T==="funnelarea"))){var me="x",ie="y";(z==="bar"||T==="bar")&&M.orientation==="h"&&(me="y",ie="x"),Sa.swapAttrs(M,["?","?src"],"labels",me),Sa.swapAttrs(M,["d?","?0"],"label",me),Sa.swapAttrs(M,["?","?src"],"values",ie),T==="pie"||T==="funnelarea"?(Ec(M,"marker.color").set(Ec(M,"marker.colors").get()),n._pielayer.selectAll("g.trace").remove()):L0.traceIs(M,"cartesian")&&Ec(M,"marker.colors").set(Ec(M,"marker.color").get())}h[x][u]=Ly(T);var Se=["swapxy","swapxyaxes","orientation","orientationaxes"];if(Se.indexOf(x)!==-1){if(x==="orientation"){P.set(z);var Le=M.x&&!M.y?"h":"v";if((P.get()||Le)===g.orientation)continue}else x==="orientationaxes"&&(M.orientation={v:"h",h:"v"}[g.orientation]);vh.swapXYData(M),c.calc=c.clearAxisTypes=!0}else Qo.dataArrayContainers.indexOf(P.parts[0])!==-1?(vh.manageArrayContainers(P,z,h),c.calc=!0):(O?O.arrayOk&&!L0.traceIs(g,"regl")&&(Sa.isArrayOrTypedArray(z)||Sa.isArrayOrTypedArray(T))?c.calc=!0:v_.update(c,O):c.calc=!0,P.set(z))}}if(["swapxyaxes","orientationaxes"].indexOf(x)!==-1&&Vv.swap(e,r),x==="orientationaxes"){var Ae=Ec(e.layout,"hovermode"),Fe=Ae.get();Fe==="x"?Ae.set("y"):Fe==="y"?Ae.set("x"):Fe==="x unified"?Ae.set("y unified"):Fe==="y unified"&&Ae.set("x unified")}if(["orientation","type"].indexOf(x)!==-1){for(d=[],u=0;u<r.length;u++){var Pe=a[r[u]];L0.traceIs(Pe,"cartesian")&&(_(Pe.xaxis||"x"),_(Pe.yaxis||"y"))}E(d.map(b),!0,0),E(d.map(p),[0,1],0)}}return(c.calc||c.plot)&&(c.fullReplot=!0),{flags:c,undoit:h,redoit:f,traces:r,eventData:Sa.extendDeepNoArrays([],[l,r])}}function HM(e,t,r){e=Sa.getGraphDiv(e),vh.clearPromiseQueue(e);var n={};if(typeof t=="string")n[t]=r;else if(Sa.isPlainObject(t))n=Sa.extendFlat({},t);else return Sa.warn("Relayout fail.",t,r),Promise.reject();Object.keys(n).length&&(e.changed=!0);var i=rde(e,n),a=i.flags;a.calc&&(e.calcdata=void 0);var o=[Qo.previousPromises];a.layoutReplot?o.push(Au.layoutReplot):Object.keys(n).length&&(Qhe(e,a,i)||Qo.supplyDefaults(e),a.legend&&o.push(Au.doLegend),a.layoutstyle&&o.push(Au.layoutStyles),a.axrange&&GP(o,i.rangesAltered),a.ticks&&o.push(Au.doTicksRelayout),a.modebar&&o.push(Au.doModeBar),a.camera&&o.push(Au.doCamera),a.colorbars&&o.push(Au.doColorBars),o.push(jM)),o.push(Qo.rehover,Qo.redrag,Qo.reselect),ap.add(e,HM,[e,i.undoit],HM,[e,i.redoit]);var s=Sa.syncOrAsync(o,e);return(!s||!s.then)&&(s=Promise.resolve(e)),s.then(function(){return e.emit("plotly_relayout",i.eventData),e})}function Qhe(e,t,r){var n=e._fullLayout;if(!t.axrange)return!1;for(var i in t)if(i!=="axrange"&&t[i])return!1;var a,o,s=function(d,v){return Sa.coerce(a,o,Tdt,d,v)},l={};for(var u in r.rangesAltered){var c=Vv.id2name(u);if(a=e.layout[c],o=n[c],wdt(a,o,s,l),o._matchGroup){for(var f in o._matchGroup)if(f!==u){var h=n[Vv.id2name(f)];h.autorange=o.autorange,h.range=o.range.slice(),h._input.range=o.range.slice()}}}return!0}function GP(e,t){var r=t?function(n){var i=[],a=!0;for(var o in t){var s=Vv.getFromId(n,o);if(i.push(o),(s.ticklabelposition||"").indexOf("inside")!==-1&&s._anchorAxis&&i.push(s._anchorAxis._id),s._matchGroup)for(var l in s._matchGroup)t[l]||i.push(l)}return Vv.draw(n,i,{skipTitle:a})}:function(n){return Vv.draw(n,"redraw")};e.push(Edt,Au.doAutoRangeAndConstraints,r,Au.drawData,Au.finalDraw)}var ede=/^[xyz]axis[0-9]*\.range(\[[0|1]\])?$/,tde=/^[xyz]axis[0-9]*\.autorange$/,qdt=/^[xyz]axis[0-9]*\.domain(\[[0|1]\])?$/;function rde(e,t){var r=e.layout,n=e._fullLayout,i=n._guiEditing,a=DP(n._preGUI,i),o=Object.keys(t),s=Vv.list(e),l=Sa.extendDeepAll({},t),u={},c,f,h;for(o=Object.keys(t),f=0;f<o.length;f++)if(o[f].indexOf("allaxes")===0){for(h=0;h<s.length;h++){var d=s[h]._id.slice(1),v=d.indexOf("scene")!==-1?d+".":"",_=o[f].replace("allaxes",v+s[h]._name);t[_]||(t[_]=t[o[f]])}delete t[o[f]]}var b=v_.layoutFlags(),p={},k={};function E(pt,Zt){if(Array.isArray(pt)){pt.forEach(function(lt){E(lt,Zt)});return}if(!(pt in t||vh.hasParent(t,pt))){var st=a(r,pt);pt in k||(k[pt]=Ly(st.get())),Zt!==void 0&&st.set(Zt)}}var S={},L;function x(pt){var Zt=Vv.name2id(pt.split(".")[0]);return S[Zt]=1,Zt}for(var C in t){if(vh.hasParent(t,C))throw new Error("cannot set "+C+" and a parent attribute simultaneously");for(var M=a(r,C),g=t[C],P=M.parts.length,T=P-1;T>0&&typeof M.parts[T]!="string";)T--;var z=M.parts[T],O=M.parts[T-1]+"."+z,V=M.parts.slice(0,T).join("."),G=Ec(e.layout,V).get(),Z=Ec(n,V).get(),j=M.get();if(g!==void 0){p[C]=g,k[C]=z==="reverse"?g:Ly(j);var N=VP.getLayoutValObject(n,M.parts);if(N&&N.impliedEdits&&g!==null)for(var H in N.impliedEdits)E(Sa.relativeAttr(C,H),N.impliedEdits[H]);if(["width","height"].indexOf(C)!==-1)if(g){E("autosize",null);var te=C==="height"?"width":"height";E(te,n[te])}else n[C]=e._initialAutoSize[C];else if(C==="autosize")E("width",g?null:n.width),E("height",g?null:n.height);else if(O.match(ede))x(O),Ec(n,V+"._inputRange").set(null);else if(O.match(tde)){x(O),Ec(n,V+"._inputRange").set(null);var oe=Ec(n,V).get();oe._inputDomain&&(oe._input.domain=oe._inputDomain.slice())}else O.match(qdt)&&Ec(n,V+"._inputDomain").set(null);if(z==="type"){L=G;var _e=Z.type==="linear"&&g==="log",Ee=Z.type==="log"&&g==="linear";if(_e||Ee){if(!L||!L.range)E(V+".autorange",!0);else if(Z.autorange)_e&&(L.range=L.range[1]>L.range[0]?[1,2]:[2,1]);else{var Ce=L.range[0],me=L.range[1];_e?(Ce<=0&&me<=0&&E(V+".autorange",!0),Ce<=0?Ce=me/1e6:me<=0&&(me=Ce/1e6),E(V+".range[0]",Math.log(Ce)/Math.LN10),E(V+".range[1]",Math.log(me)/Math.LN10)):(E(V+".range[0]",Math.pow(10,Ce)),E(V+".range[1]",Math.pow(10,me)))}Array.isArray(n._subplots.polar)&&n._subplots.polar.length&&n[M.parts[0]]&&M.parts[1]==="radialaxis"&&delete n[M.parts[0]]._subplot.viewInitial["radialaxis.range"],L0.getComponentMethod("annotations","convertCoords")(e,Z,g,E),L0.getComponentMethod("images","convertCoords")(e,Z,g,E)}else E(V+".autorange",!0),E(V+".range",null);Ec(n,V+"._inputRange").set(null)}else if(z.match(kdt)){var ie=Ec(n,C).get(),Se=(g||{}).type;(!Se||Se==="-")&&(Se="linear"),L0.getComponentMethod("annotations","convertCoords")(e,ie,Se,E),L0.getComponentMethod("images","convertCoords")(e,ie,Se,E)}var Le=RP.containerArrayMatch(C);if(Le){c=Le.array,f=Le.index;var Ae=Le.property,Fe=N||{editType:"calc"};f!==""&&Ae===""&&(RP.isAddVal(g)?k[C]=null:RP.isRemoveVal(g)?k[C]=(Ec(r,c).get()||[])[f]:Sa.warn("unrecognized full object value",t)),v_.update(b,Fe),u[c]||(u[c]={});var Pe=u[c][f];Pe||(Pe=u[c][f]={}),Pe[Ae]=g,delete t[C]}else z==="reverse"?(G.range?G.range.reverse():(E(V+".autorange",!0),G.range=[1,0]),Z.autorange?b.calc=!0:b.plot=!0):(C==="dragmode"&&(g===!1&&j!==!1||g!==!1&&j===!1)||n._has("scatter-like")&&n._has("regl")&&C==="dragmode"&&(g==="lasso"||g==="select")&&!(j==="lasso"||j==="select")?b.plot=!0:N?v_.update(b,N):b.calc=!0,M.set(g))}}for(c in u){var ge=RP.applyContainerArrayChanges(e,a(r,c),u[c],b,a);ge||(b.plot=!0)}for(var Re in S){L=Vv.getFromId(e,Re);var ce=L&&L._constraintGroup;if(ce){b.calc=!0;for(var Ze in ce)S[Ze]||(Vv.getFromId(e,Ze)._constraintShrinkable=!0)}}(ide(e)||t.height||t.width)&&(b.plot=!0);var ut=n.shapes;for(f=0;f<ut.length;f++)if(ut[f].showlegend){b.calc=!0;break}return(b.plot||b.calc)&&(b.layoutReplot=!0),{flags:b,rangesAltered:S,undoit:k,redoit:p,eventData:l}}function ide(e){var t=e._fullLayout,r=t.width,n=t.height;return e.layout.autosize&&Qo.plotAutoSize(e,e.layout,t),t.width!==r||t.height!==n}function NP(e,t,r,n){e=Sa.getGraphDiv(e),vh.clearPromiseQueue(e),Sa.isPlainObject(t)||(t={}),Sa.isPlainObject(r)||(r={}),Object.keys(t).length&&(e.changed=!0),Object.keys(r).length&&(e.changed=!0);var i=vh.coerceTraceIndices(e,n),a=$he(e,Sa.extendFlat({},t),i),o=a.flags,s=rde(e,Sa.extendFlat({},r)),l=s.flags;(o.calc||l.calc)&&(e.calcdata=void 0),o.clearAxisTypes&&vh.clearAxisTypes(e,i,r);var u=[];l.layoutReplot?u.push(Au.layoutReplot):o.fullReplot?u.push(_l._doPlot):(u.push(Qo.previousPromises),Qhe(e,l,s)||Qo.supplyDefaults(e),o.style&&u.push(Au.doTraceStyle),(o.colorbars||l.colorbars)&&u.push(Au.doColorBars),l.legend&&u.push(Au.doLegend),l.layoutstyle&&u.push(Au.layoutStyles),l.axrange&&GP(u,s.rangesAltered),l.ticks&&u.push(Au.doTicksRelayout),l.modebar&&u.push(Au.doModeBar),l.camera&&u.push(Au.doCamera),u.push(jM)),u.push(Qo.rehover,Qo.redrag,Qo.reselect),ap.add(e,NP,[e,a.undoit,s.undoit,a.traces],NP,[e,a.redoit,s.redoit,a.traces]);var c=Sa.syncOrAsync(u,e);return(!c||!c.then)&&(c=Promise.resolve(e)),c.then(function(){return e.emit("plotly_update",{data:a.eventData,layout:s.eventData}),e})}function KN(e){return function(r){r._fullLayout._guiEditing=!0;var n=e.apply(null,arguments);return r._fullLayout._guiEditing=!1,n}}var Bdt=[{pattern:/^hiddenlabels/,attr:"legend.uirevision"},{pattern:/^((x|y)axis\d*)\.((auto)?range|title\.text)/},{pattern:/axis\d*\.showspikes$/,attr:"modebar.uirevision"},{pattern:/(hover|drag)mode$/,attr:"modebar.uirevision"},{pattern:/^(scene\d*)\.camera/},{pattern:/^(geo\d*)\.(projection|center|fitbounds)/},{pattern:/^(ternary\d*\.[abc]axis)\.(min|title\.text)$/},{pattern:/^(polar\d*\.radialaxis)\.((auto)?range|angle|title\.text)/},{pattern:/^(polar\d*\.angularaxis)\.rotation/},{pattern:/^(mapbox\d*)\.(center|zoom|bearing|pitch)/},{pattern:/^(map\d*)\.(center|zoom|bearing|pitch)/},{pattern:/^legend\.(x|y)$/,attr:"editrevision"},{pattern:/^(shapes|annotations)/,attr:"editrevision"},{pattern:/^title\.text$/,attr:"editrevision"}],Ndt=[{pattern:/^selectedpoints$/,attr:"selectionrevision"},{pattern:/(^|value\.)visible$/,attr:"legend.uirevision"},{pattern:/^dimensions\[\d+\]\.constraintrange/},{pattern:/^node\.(x|y|groups)/},{pattern:/^level$/},{pattern:/(^|value\.)name$/},{pattern:/colorbar\.title\.text$/},{pattern:/colorbar\.(x|y)$/,attr:"editrevision"}];function Bhe(e,t){for(var r=0;r<t.length;r++){var n=t[r],i=e.match(n.pattern);if(i){var a=i[1]||"";return{head:a,tail:e.slice(a.length+1),attr:n.attr}}}}function Nhe(e,t){var r=Ec(t,e).get();if(r!==void 0)return r;var n=e.split(".");for(n.pop();n.length>1;)if(n.pop(),r=Ec(t,n.join(".")+".uirevision").get(),r!==void 0)return r;return t.uirevision}function Udt(e,t){for(var r=0;r<t.length;r++)if(t[r]._fullInput.uid===e)return r;return-1}function Vdt(e,t,r){for(var n=0;n<t.length;n++)if(t[n].uid===e)return n;return!t[r]||t[r].uid?-1:r}function Uhe(e,t){var r=Sa.isPlainObject(e),n=Array.isArray(e);return r||n?(r&&Sa.isPlainObject(t)||n&&Array.isArray(t))&&JSON.stringify(e)===JSON.stringify(t):e===t}function Gdt(e,t,r,n){var i=n._preGUI,a,o,s,l,u,c,f,h,d,v,_=[],b={},p={};for(a in i){if(u=Bhe(a,Bdt),u){if(d=u.head,v=u.tail,o=u.attr||d+".uirevision",s=Ec(n,o).get(),l=s&&Nhe(o,t),l&&l===s){if(c=i[a],c===null&&(c=void 0),f=Ec(t,a),h=f.get(),Uhe(h,c)){h===void 0&&v==="autorange"&&_.push(d),f.set(Ly(Ec(n,a).get()));continue}else if(v==="autorange"||v.slice(0,6)==="range["){var k=i[d+".range[0]"],E=i[d+".range[1]"],S=i[d+".autorange"];if(S||S===null&&k===null&&E===null){if(!(d in b)){var L=Ec(t,d).get();b[d]=L&&(L.autorange||L.autorange!==!1&&(!L.range||L.range.length!==2))}if(b[d]){f.set(Ly(Ec(n,a).get()));continue}}}}}else Sa.warn("unrecognized GUI edit: "+a);delete i[a],u&&u.tail.slice(0,6)==="range["&&(p[u.head]=1)}for(var x=0;x<_.length;x++){var C=_[x];if(p[C]){var M=Ec(t,C).get();M&&delete M.autorange}}var g=n._tracePreGUI;for(var P in g){var T=g[P],z=null,O;for(a in T){if(!z){var V=Udt(P,r);if(V<0){delete g[P];break}var G=r[V];O=G._fullInput;var Z=Vdt(P,e,O.index);if(Z<0){delete g[P];break}z=e[Z]}if(u=Bhe(a,Ndt),u){if(u.attr?(s=Ec(n,u.attr).get(),l=s&&Nhe(u.attr,t)):(s=O.uirevision,l=z.uirevision,l===void 0&&(l=t.uirevision)),l&&l===s&&(c=T[a],c===null&&(c=void 0),f=Ec(z,a),h=f.get(),Uhe(h,c))){f.set(Ly(Ec(O,a).get()));continue}}else Sa.warn("unrecognized GUI edit: "+a+" in trace uid "+P);delete T[a]}}}function Hdt(e,t,r,n){var i,a;function o(){return _l.addFrames(e,i)}e=Sa.getGraphDiv(e),vh.clearPromiseQueue(e);var s=e._fullData,l=e._fullLayout;if(!Sa.isPlotDiv(e)||!s||!l)a=_l.newPlot(e,t,r,n);else{if(Sa.isPlainObject(t)){var u=t;t=u.data,r=u.layout,n=u.config,i=u.frames}var c=!1;if(n){let O=Sa.extendDeepAll({},e._context);e._context=void 0,Hhe(e,n),c=!vh.collectionsAreEqual(O,e._context)}if(c){let O=e._ev.eventNames().map(V=>[V,e._ev.listeners(V)]);a=_l.newPlot(e,t,r,n).then(()=>{for(let[V,G]of O)G.forEach(Z=>e.on(V,Z));return _l.react(e,t,r,n)})}else{e.data=t||[],vh.cleanData(e.data),e.layout=r||{},vh.cleanLayout(e.layout),Gdt(e.data,e.layout,s,l),Qo.supplyDefaults(e,{skipUpdateCalc:!0});var f=e._fullData,h=e._fullLayout,d=h.datarevision===void 0,v=h.transition,_=Wdt(e,l,h,d,v),b=_.newDataRevision,p=jdt(e,s,f,d,v,b);if(ide(e)&&(_.layoutReplot=!0),p.calc||_.calc){e.calcdata=void 0;for(var k=Object.getOwnPropertyNames(h),E=0;E<k.length;E++){var S=k[E],L=S.substring(0,5);if(L==="xaxis"||L==="yaxis"){var x=h[S]._emptyCategories;x&&x()}}}else Qo.supplyDefaultsUpdateCalc(e.calcdata,f);var C=[];if(i&&(e._transitionData={},Qo.createTransitionData(e),C.push(o)),h.transition&&(p.anim||_.anim))_.ticks&&C.push(Au.doTicksRelayout),Qo.doCalcdata(e),Au.doAutoRangeAndConstraints(e),C.push(function(){return Qo.transitionFromReact(e,p,_,l)});else if(p.fullReplot||_.layoutReplot)e._fullLayout._skipDefaults=!0,C.push(_l._doPlot);else{for(var M in _.arrays){var g=_.arrays[M];if(g.length){var P=L0.getComponentMethod(M,"drawOne");if(P!==Sa.noop)for(var T=0;T<g.length;T++)P(e,g[T]);else{var z=L0.getComponentMethod(M,"draw");if(z===Sa.noop)throw new Error("cannot draw components: "+M);z(e)}}}C.push(Qo.previousPromises),p.style&&C.push(Au.doTraceStyle),(p.colorbars||_.colorbars)&&C.push(Au.doColorBars),_.legend&&C.push(Au.doLegend),_.layoutstyle&&C.push(Au.layoutStyles),_.axrange&&GP(C),_.ticks&&C.push(Au.doTicksRelayout),_.modebar&&C.push(Au.doModeBar),_.camera&&C.push(Au.doCamera),C.push(jM)}C.push(Qo.rehover,Qo.redrag,Qo.reselect),a=Sa.syncOrAsync(C,e),(!a||!a.then)&&(a=Promise.resolve(e))}}return a.then(()=>(c||e.emit("plotly_react",{config:n,data:t,layout:r}),e))}function jdt(e,t,r,n,i,a){var o=t.length===r.length;if(!i&&!o)return{fullReplot:!0,calc:!0};var s=v_.traceFlags();s.arrays={},s.nChanges=0,s.nChangesAnim=0;var l,u;function c(d){var v=VP.getTraceValObject(u,d);return!u._module.animatable&&v.anim&&(v.anim=!1),v}var f={getValObject:c,flags:s,immutable:n,transition:i,newDataRevision:a,gd:e},h={};for(l=0;l<t.length;l++)if(r[l]){if(u=r[l]._fullInput,h[u.uid])continue;h[u.uid]=1,UP(t[l]._fullInput,u,[],f)}return(s.calc||s.plot)&&(s.fullReplot=!0),i&&s.nChanges&&s.nChangesAnim&&(s.anim=s.nChanges===s.nChangesAnim&&o?"all":"some"),s}function Wdt(e,t,r,n,i){var a=v_.layoutFlags();a.arrays={},a.rangesAltered={},a.nChanges=0,a.nChangesAnim=0;function o(h){return VP.getLayoutValObject(r,h)}for(var s in r)if(!(!s.startsWith("xaxis")&&!s.startsWith("yaxis"))&&t[s]){var l=r[s].domain,u=t[s].domain,c=t[s]._inputDomain;t[s]._inputDomain&&(l[0]===c[0]&&l[1]===c[1]?r[s].domain=t[s].domain:(l[0]!==u[0]||l[1]!==u[1])&&(r[s]._inputDomain=null))}var f={getValObject:o,flags:a,immutable:n,transition:i,gd:e};return UP(t,r,[],f),(a.plot||a.calc)&&(a.layoutReplot=!0),i&&a.nChanges&&a.nChangesAnim&&(a.anim=a.nChanges===a.nChangesAnim?"all":"some"),a}function UP(e,t,r,n){var i,a,o,s=n.getValObject,l=n.flags,u=n.immutable,c=n.inArray,f=n.arrayIndex;function h(){var V=i.editType;if(c&&V.indexOf("arraydraw")!==-1){Sa.pushUnique(l.arrays[c],f);return}v_.update(l,i),V!=="none"&&l.nChanges++,n.transition&&i.anim&&l.nChangesAnim++,(ede.test(o)||tde.test(o))&&(l.rangesAltered[r[0]]=1),a==="datarevision"&&(l.newDataRevision=1)}function d(V){return V.valType==="data_array"||V.arrayOk}for(a in e){if(l.calc&&!n.transition)return;var v=e[a],_=t[a],b=r.concat(a);if(o=b.join("."),!(a.charAt(0)==="_"||typeof v=="function"||v===_)){if((a==="tick0"||a==="dtick")&&r[0]!=="geo"){var p=t.tickmode;if(p==="auto"||p==="array"||!p)continue}if(!(a==="range"&&t.autorange)&&!((a==="zmin"||a==="zmax")&&t.type==="contourcarpet")&&(i=s(b),!!i&&!(i._compareAsJSON&&JSON.stringify(v)===JSON.stringify(_)))){var k=i.valType,E,S=d(i),L=Array.isArray(v),x=Array.isArray(_);if(L&&x){var C="_input_"+a,M=e[C],g=t[C];if(Array.isArray(M)&&M===g)continue}if(_===void 0)S&&L?l.calc=!0:h();else if(i._isLinkedToArray){var P=[],T=!1;c||(l.arrays[a]=P);var z=Math.min(v.length,_.length),O=Math.max(v.length,_.length);if(z!==O)if(i.editType==="arraydraw")T=!0;else{h();continue}for(E=0;E<z;E++)UP(v[E],_[E],b.concat(E),Sa.extendFlat({inArray:a,arrayIndex:E},n));if(T)for(E=z;E<O;E++)P.push(E)}else!k&&Sa.isPlainObject(v)?UP(v,_,b,n):S?L&&x?(u&&(l.calc=!0),(u||n.newDataRevision)&&h()):L!==x?l.calc=!0:h():L&&x?(v.length!==_.length||String(v)!==String(_))&&h():h()}}}for(a in t)if(!(a in e||a.charAt(0)==="_"||typeof t[a]=="function"))if(i=s(r.concat(a)),d(i)&&Array.isArray(t[a])){l.calc=!0;return}else h()}function Xdt(e,t,r){if(e=Sa.getGraphDiv(e),!Sa.isPlotDiv(e))throw new Error("This element is not a Plotly plot: "+e+". It's likely that you've failed to create a plot before animating it. For more details, see https://plotly.com/javascript/animations/");var n=e._transitionData;n._frameQueue||(n._frameQueue=[]),r=Qo.supplyAnimationDefaults(r);var i=r.transition,a=r.frame;n._frameWaitingCnt===void 0&&(n._frameWaitingCnt=0);function o(u){return Array.isArray(i)?u>=i.length?i[0]:i[u]:i}function s(u){return Array.isArray(a)?u>=a.length?a[0]:a[u]:a}function l(u,c){var f=0;return function(){if(u&&++f===c)return u()}}return new Promise(function(u,c){function f(){if(n._frameQueue.length!==0){for(;n._frameQueue.length;){var z=n._frameQueue.pop();z.onInterrupt&&z.onInterrupt()}e.emit("plotly_animationinterrupted",[])}}function h(z){if(z.length!==0){for(var O=0;O<z.length;O++){var V;z[O].type==="byname"?V=Qo.computeFrame(e,z[O].name):V=z[O].data;var G=s(O),Z=o(O);Z.duration=Math.min(Z.duration,G.duration);var j={frame:V,name:z[O].name,frameOpts:G,transitionOpts:Z};O===z.length-1&&(j.onComplete=l(u,2),j.onInterrupt=c),n._frameQueue.push(j)}r.mode==="immediate"&&(n._lastFrameAt=-1/0),n._animationRaf||_()}}function d(){e.emit("plotly_animated"),window.cancelAnimationFrame(n._animationRaf),n._animationRaf=null}function v(){n._currentFrame&&n._currentFrame.onComplete&&n._currentFrame.onComplete();var z=n._currentFrame=n._frameQueue.shift();if(z){var O=z.name?z.name.toString():null;e._fullLayout._currentFrame=O,n._lastFrameAt=Date.now(),n._timeToNext=z.frameOpts.duration,Qo.transition(e,z.frame.data,z.frame.layout,vh.coerceTraceIndices(e,z.frame.traces),z.frameOpts,z.transitionOpts).then(function(){z.onComplete&&z.onComplete()}),e.emit("plotly_animatingframe",{name:O,frame:z.frame,animation:{frame:z.frameOpts,transition:z.transitionOpts}})}else d()}function _(){e.emit("plotly_animating"),n._lastFrameAt=-1/0,n._timeToNext=0,n._runningTransitions=0,n._currentFrame=null;var z=function(){n._animationRaf=window.requestAnimationFrame(z),Date.now()-n._lastFrameAt>n._timeToNext&&v()};z()}var b=0;function p(z){return Array.isArray(i)?b>=i.length?z.transitionOpts=i[b]:z.transitionOpts=i[0]:z.transitionOpts=i,b++,z}var k,E,S=[],L=t==null,x=Array.isArray(t),C=!L&&!x&&Sa.isPlainObject(t);if(C)S.push({type:"object",data:p(Sa.extendFlat({},t))});else if(L||["string","number"].indexOf(typeof t)!==-1)for(k=0;k<n._frames.length;k++)E=n._frames[k],E&&(L||String(E.group)===String(t))&&S.push({type:"byname",name:String(E.name),data:p({name:E.name})});else if(x)for(k=0;k<t.length;k++){var M=t[k];["number","string"].indexOf(typeof M)!==-1?(M=String(M),S.push({type:"byname",name:M,data:p({name:M})})):Sa.isPlainObject(M)&&S.push({type:"object",data:p(Sa.extendFlat({},M))})}for(k=0;k<S.length;k++)if(E=S[k],E.type==="byname"&&!n._frameHash[E.data.name]){Sa.warn('animate failure: frame not found: "'+E.data.name+'"'),c();return}["next","immediate"].indexOf(r.mode)!==-1&&f(),r.direction==="reverse"&&S.reverse();var g=e._fullLayout._currentFrame;if(g&&r.fromcurrent){var P=-1;for(k=0;k<S.length;k++)if(E=S[k],E.type==="byname"&&E.name===g){P=k;break}if(P>0&&P<S.length-1){var T=[];for(k=0;k<S.length;k++)E=S[k],(S[k].type!=="byname"||k>P)&&T.push(E);S=T}}S.length>0?h(S):(e.emit("plotly_animated"),u())})}function Zdt(e,t,r){if(e=Sa.getGraphDiv(e),t==null)return Promise.resolve();if(!Sa.isPlotDiv(e))throw new Error("This element is not a Plotly plot: "+e+". It's likely that you've failed to create a plot before adding frames. For more details, see https://plotly.com/javascript/animations/");var n,i,a,o,s=e._transitionData._frames,l=e._transitionData._frameHash;if(!Array.isArray(t))throw new Error("addFrames failure: frameList must be an Array of frame definitions"+t);var u=s.length+t.length*2,c=[],f={};for(n=t.length-1;n>=0;n--)if(Sa.isPlainObject(t[n])){var h=t[n].name,d=(l[h]||f[h]||{}).name,v=t[n].name,_=l[d]||f[d];d&&v&&typeof v=="number"&&_&&XN<qhe&&(XN++,Sa.warn('addFrames: overwriting frame "'+(l[d]||f[d]).name+'" with a frame whose name of type "number" also equates to "'+d+'". This is valid but may potentially lead to unexpected behavior since all plotly.js frame names are stored internally as strings.'),XN===qhe&&Sa.warn("addFrames: This API call has yielded too many of these warnings. For the rest of this call, further warnings about numeric frame names will be suppressed.")),f[h]={name:h},c.push({frame:Qo.supplyFrameDefaults(t[n]),index:r&&r[n]!==void 0&&r[n]!==null?r[n]:u+n})}c.sort(function(C,M){return C.index>M.index?-1:C.index<M.index?1:0});var b=[],p=[],k=s.length;for(n=c.length-1;n>=0;n--){if(i=c[n].frame,typeof i.name=="number"&&Sa.warn("Warning: addFrames accepts frames with numeric names, but the numbers areimplicitly cast to strings"),!i.name)for(;l[i.name="frame "+e._transitionData._counter++];);if(l[i.name]){for(a=0;a<s.length&&(s[a]||{}).name!==i.name;a++);b.push({type:"replace",index:a,value:i}),p.unshift({type:"replace",index:a,value:s[a]})}else o=Math.max(0,Math.min(c[n].index,k)),b.push({type:"insert",index:o,value:i}),p.unshift({type:"delete",index:o}),k++}var E=Qo.modifyFrames,S=Qo.modifyFrames,L=[e,p],x=[e,b];return ap&&ap.add(e,E,L,S,x),Qo.modifyFrames(e,b)}function Ydt(e,t){if(e=Sa.getGraphDiv(e),!Sa.isPlotDiv(e))throw new Error("This element is not a Plotly plot: "+e);var r,n,i=e._transitionData._frames,a=[],o=[];if(!t)for(t=[],r=0;r<i.length;r++)t.push(r);for(t=t.slice(),t.sort(),r=t.length-1;r>=0;r--)n=t[r],a.push({type:"delete",index:n}),o.unshift({type:"insert",index:n,value:i[n]});var s=Qo.modifyFrames,l=Qo.modifyFrames,u=[e,o],c=[e,a];return ap&&ap.add(e,s,u,l,c),Qo.modifyFrames(e,a)}function Kdt(e){e=Sa.getGraphDiv(e);var t=e._fullLayout||{},r=e._fullData||[];return Qo.cleanPlot([],{},r,t),Qo.purge(e),ZN.purge(e),t._container&&t._container.remove(),delete e._context,e}function Jdt(e){var t=e._fullLayout,r=e.getBoundingClientRect();if(!Sa.equalDomRects(r,t._lastBBox)){var n=t._invTransform=Sa.inverseTransformMatrix(Sa.getFullTransformMatrix(e));t._invScaleX=Math.sqrt(n[0][0]*n[0][0]+n[0][1]*n[0][1]+n[0][2]*n[0][2]),t._invScaleY=Math.sqrt(n[1][0]*n[1][0]+n[1][1]*n[1][1]+n[1][2]*n[1][2]),t._lastBBox=r}}function $dt(e){var t=FP.select(e),r=e._fullLayout;if(r._calcInverseTransform=Jdt,r._calcInverseTransform(e),r._container=t.selectAll(".plot-container").data([0]),r._container.enter().insert("div",":first-child").classed("plot-container",!0).classed("plotly",!0).style({width:"100%",height:"100%"}),r._paperdiv=r._container.selectAll(".svg-container").data([0]),r._paperdiv.enter().append("div").classed("user-select-none",!0).classed("svg-container",!0).style("position","relative"),r._glcontainer=r._paperdiv.selectAll(".gl-container").data([{}]),r._glcontainer.enter().append("div").classed("gl-container",!0),r._paperdiv.selectAll(".main-svg").remove(),r._paperdiv.select(".modebar-container").remove(),r._paper=r._paperdiv.insert("svg",":first-child").classed("main-svg",!0),r._toppaper=r._paperdiv.append("svg").classed("main-svg",!0),r._modebardiv=r._paperdiv.append("div"),delete r._modeBar,r._hoverpaper=r._paperdiv.append("svg").classed("main-svg",!0),!r._uid){var n={};FP.selectAll("defs").each(function(){this.id&&(n[this.id.split("-")[1]]=1)}),r._uid=Sa.randstr(n)}r._paperdiv.selectAll(".main-svg").attr(Mdt.svgAttrs),r._defs=r._paper.append("defs").attr("id","defs-"+r._uid),r._clips=r._defs.append("g").classed("clips",!0),r._topdefs=r._toppaper.append("defs").attr("id","topdefs-"+r._uid),r._topclips=r._topdefs.append("g").classed("clips",!0),r._bgLayer=r._paper.append("g").classed("bglayer",!0),r._draggers=r._paper.append("g").classed("draglayer",!0);var i=r._paper.append("g").classed("layer-below",!0);r._imageLowerLayer=i.append("g").classed("imagelayer",!0),r._shapeLowerLayer=i.append("g").classed("shapelayer",!0),r._cartesianlayer=r._paper.append("g").classed("cartesianlayer",!0),r._polarlayer=r._paper.append("g").classed("polarlayer",!0),r._smithlayer=r._paper.append("g").classed("smithlayer",!0),r._ternarylayer=r._paper.append("g").classed("ternarylayer",!0),r._geolayer=r._paper.append("g").classed("geolayer",!0),r._funnelarealayer=r._paper.append("g").classed("funnelarealayer",!0),r._pielayer=r._paper.append("g").classed("pielayer",!0),r._iciclelayer=r._paper.append("g").classed("iciclelayer",!0),r._treemaplayer=r._paper.append("g").classed("treemaplayer",!0),r._sunburstlayer=r._paper.append("g").classed("sunburstlayer",!0),r._indicatorlayer=r._toppaper.append("g").classed("indicatorlayer",!0),r._glimages=r._paper.append("g").classed("glimages",!0);var a=r._toppaper.append("g").classed("layer-above",!0);r._imageUpperLayer=a.append("g").classed("imagelayer",!0),r._shapeUpperLayer=a.append("g").classed("shapelayer",!0),r._selectionLayer=r._toppaper.append("g").classed("selectionlayer",!0),r._infolayer=r._toppaper.append("g").classed("infolayer",!0),r._menulayer=r._toppaper.append("g").classed("menulayer",!0),r._zoomlayer=r._toppaper.append("g").classed("zoomlayer",!0),r._hoverlayer=r._hoverpaper.append("g").classed("hoverlayer",!0),r._modebardiv.classed("modebar-container",!0).style("position","absolute").style("top","0px").style("right","0px"),e.emit("plotly_framework")}_l.animate=Xdt;_l.addFrames=Zdt;_l.deleteFrames=Ydt;_l.addTraces=Khe;_l.deleteTraces=Jhe;_l.extendTraces=Zhe;_l.moveTraces=YN;_l.prependTraces=Yhe;_l.newPlot=Rdt;_l._doPlot=Cdt;_l.purge=Kdt;_l.react=Hdt;_l.redraw=Idt;_l.relayout=HM;_l.restyle=qP;_l.setPlotConfig=Ldt;_l.update=NP;_l._guiRelayout=KN(HM);_l._guiRestyle=KN(qP);_l._guiUpdate=KN(NP);_l._storeDirectGUIEdit=Odt});var Py=ye(Mm=>{"use strict";var Qdt=qa();Mm.getDelay=function(e){return e._has&&(e._has("gl3d")||e._has("mapbox")||e._has("map"))?500:0};Mm.getRedrawFunc=function(e){return function(){Qdt.getComponentMethod("colorbar","draw")(e)}};Mm.encodeSVG=function(e){return"data:image/svg+xml,"+encodeURIComponent(e)};Mm.encodeJSON=function(e){return"data:application/json,"+encodeURIComponent(e)};var nde=window.URL||window.webkitURL;Mm.createObjectURL=function(e){return nde.createObjectURL(e)};Mm.revokeObjectURL=function(e){return nde.revokeObjectURL(e)};Mm.createBlob=function(e,t){if(t==="svg")return new window.Blob([e],{type:"image/svg+xml;charset=utf-8"});if(t==="full-json")return new window.Blob([e],{type:"application/json;charset=utf-8"});var r=evt(window.atob(e));return new window.Blob([r],{type:"image/"+t})};Mm.octetStream=function(e){document.location.href="data:application/octet-stream"+e};function evt(e){for(var t=e.length,r=new ArrayBuffer(t),n=new Uint8Array(r),i=0;i<t;i++)n[i]=e.charCodeAt(i);return r}Mm.IMAGE_URL_PREFIX=/^data:image\/\w+;base64,/});var jP=ye((Lor,ade)=>{"use strict";var $N=Oa(),kor=Dr(),tvt=So(),rvt=ka(),Cor=Wp(),JN=/"/g,WM="TOBESTRIPPED",ivt=new RegExp('("'+WM+")|("+WM+'")',"g");function nvt(e){var t=$N.select("body").append("div").style({display:"none"}).html(""),r=e.replace(/(&[^;]*;)/gi,function(n){return n==="&lt;"?"&#60;":n==="&rt;"?"&#62;":n.indexOf("<")!==-1||n.indexOf(">")!==-1?"":t.html(n).text()});return t.remove(),r}function avt(e){return e.replace(/&(?!\w+;|\#[0-9]+;| \#x[0-9A-F]+;)/g,"&amp;")}ade.exports=function(t,r,n){var i=t._fullLayout,a=i._paper,o=i._toppaper,s=i.width,l=i.height,u;a.insert("rect",":first-child").call(tvt.setRect,0,0,s,l).call(rvt.fill,i.paper_bgcolor);var c=i._basePlotModules||[];for(u=0;u<c.length;u++){var f=c[u];f.toSVG&&f.toSVG(t)}if(o){var h=o.node().childNodes,d=Array.prototype.slice.call(h);for(u=0;u<d.length;u++){var v=d[u];v.childNodes.length&&a.node().appendChild(v)}}i._draggers&&i._draggers.remove(),a.node().style.background="",a.selectAll("text").attr({"data-unformatted":null,"data-math":null}).each(function(){var b=$N.select(this);if(this.style.visibility==="hidden"||this.style.display==="none"){b.remove();return}else b.style({visibility:null,display:null});var p=this.style.fontFamily;p&&p.indexOf('"')!==-1&&b.style("font-family",p.replace(JN,WM));var k=this.style.fontWeight;k&&(k==="normal"||k==="400")&&b.style("font-weight",void 0);var E=this.style.fontStyle;E&&E==="normal"&&b.style("font-style",void 0);var S=this.style.fontVariant;S&&S==="normal"&&b.style("font-variant",void 0)}),a.selectAll(".gradient_filled,.pattern_filled").each(function(){var b=$N.select(this),p=this.style.fill;p&&p.indexOf("url(")!==-1&&b.style("fill",p.replace(JN,WM));var k=this.style.stroke;k&&k.indexOf("url(")!==-1&&b.style("stroke",k.replace(JN,WM))}),(r==="pdf"||r==="eps")&&a.selectAll("#MathJax_SVG_glyphs path").attr("stroke-width",0),r==="svg"&&n&&(a.attr("width",n*s),a.attr("height",n*l),a.attr("viewBox","0 0 "+s+" "+l));var _=new window.XMLSerializer().serializeToString(a.node());return _=nvt(_),_=avt(_),_=_.replace(ivt,"'"),_}});var WP=ye((Por,ode)=>{"use strict";var ovt=Dr(),svt=pb().EventEmitter,XM=Py();function lvt(e){var t=e.emitter||new svt,r=new Promise(function(n,i){var a=window.Image,o=e.svg,s=e.format||"png",l=e.canvas,u=e.scale||1,c=e.width||300,f=e.height||150,h=u*c,d=u*f,v=l.getContext("2d",{willReadFrequently:!0}),_=new a,b,p;s==="svg"||ovt.isSafari()?p=XM.encodeSVG(o):(b=XM.createBlob(o,"svg"),p=XM.createObjectURL(b)),l.width=h,l.height=d,_.onload=function(){var k;switch(b=null,XM.revokeObjectURL(p),s!=="svg"&&v.drawImage(_,0,0,h,d),s){case"jpeg":k=l.toDataURL("image/jpeg");break;case"png":k=l.toDataURL("image/png");break;case"webp":k=l.toDataURL("image/webp");break;case"svg":k=p;break;default:var E="Image format is not jpeg, png, svg or webp.";if(i(new Error(E)),!e.promise)return t.emit("error",E)}n(k),e.promise||t.emit("success",k)},_.onerror=function(k){if(b=null,XM.revokeObjectURL(p),i(k),!e.promise)return t.emit("error",k)},_.src=p});return e.promise?r:t}ode.exports=lvt});var eU=ye((Ior,ude)=>{"use strict";var sde=Eo(),lde=HP(),uvt=Mc(),Em=Dr(),ZM=Py(),cvt=jP(),fvt=WP(),hvt=l6().version,QN={format:{valType:"enumerated",values:["png","jpeg","webp","svg","full-json"],dflt:"png"},width:{valType:"number",min:1},height:{valType:"number",min:1},scale:{valType:"number",min:0,dflt:1},setBackground:{valType:"any",dflt:!1},imageDataOnly:{valType:"boolean",dflt:!1}};function dvt(e,t){t=t||{};var r,n,i,a;Em.isPlainObject(e)?(r=e.data||[],n=e.layout||{},i=e.config||{},a={}):(e=Em.getGraphDiv(e),r=Em.extendDeep([],e.data),n=Em.extendDeep({},e.layout),i=e._context,a=e._fullLayout||{});function o(x){return!(x in t)||Em.validate(t[x],QN[x])}if(!o("width")&&t.width!==null||!o("height")&&t.height!==null)throw new Error("Height and width should be pixel values.");if(!o("format"))throw new Error("Export format is not "+Em.join2(QN.format.values,", "," or ")+".");var s={};function l(x,C){return Em.coerce(t,s,QN,x,C)}var u=l("format"),c=l("width"),f=l("height"),h=l("scale"),d=l("setBackground"),v=l("imageDataOnly"),_=document.createElement("div");_.style.position="absolute",_.style.left="-5000px",document.body.appendChild(_);var b=Em.extendFlat({},n);c?b.width=c:t.width===null&&sde(a.width)&&(b.width=a.width),f?b.height=f:t.height===null&&sde(a.height)&&(b.height=a.height);var p=Em.extendFlat({},i,{_exportedPlot:!0,staticPlot:!0,setBackground:d}),k=ZM.getRedrawFunc(_);function E(){return new Promise(function(x){setTimeout(x,ZM.getDelay(_._fullLayout))})}function S(){return new Promise(function(x,C){var M=cvt(_,u,h),g=_._fullLayout.width,P=_._fullLayout.height;function T(){lde.purge(_),document.body.removeChild(_)}if(u==="full-json"){var z=uvt.graphJson(_,!1,"keepdata","object",!0,!0);return z.version=hvt,z=JSON.stringify(z),T(),x(v?z:ZM.encodeJSON(z))}if(T(),u==="svg")return x(v?M:ZM.encodeSVG(M));var O=document.createElement("canvas");O.id=Em.randstr(),fvt({format:u,width:g,height:P,scale:h,canvas:O,svg:M,promise:!0}).then(x).catch(C)})}function L(x){return v?x.replace(ZM.IMAGE_URL_PREFIX,""):x}return new Promise(function(x,C){lde.newPlot(_,r,b,p).then(k).then(E).then(S).then(function(M){x(L(M))}).catch(function(M){C(M)})})}ude.exports=dvt});var hde=ye((Ror,fde)=>{"use strict";var P0=Dr(),vvt=Mc(),pvt=w3(),gvt=cb().dfltConfig,Pg=P0.isPlainObject,Gb=Array.isArray,tU=P0.isArrayOrTypedArray;fde.exports=function(t,r){t===void 0&&(t=[]),r===void 0&&(r={});var n=pvt.get(),i=[],a={_context:P0.extendFlat({},gvt)},o,s;Gb(t)?(a.data=P0.extendDeep([],t),o=t):(a.data=[],o=[],i.push(md("array","data"))),Pg(r)?(a.layout=P0.extendDeep({},r),s=r):(a.layout={},s={},arguments.length>1&&i.push(md("object","layout"))),vvt.supplyDefaults(a);for(var l=a._fullData,u=o.length,c=0;c<u;c++){var f=o[c],h=["data",c];if(!Pg(f)){i.push(md("object",h));continue}var d=l[c],v=d.type,_=n.traces[v].attributes;_.type={valType:"enumerated",values:[v]},d.visible===!1&&f.visible!==!1&&i.push(md("invisible",h)),XP(f,d,_,i,h)}var b=a._fullLayout,p=mvt(n,l);return XP(s,b,p,i,"layout"),i.length===0?void 0:i};function XP(e,t,r,n,i,a){a=a||[];for(var o=Object.keys(e),s=0;s<o.length;s++){var l=o[s],u=a.slice();u.push(l);var c=e[l],f=t[l],h=xvt(r,l),d=(h||{}).valType,v=d==="info_array",_=d==="colorscale",b=(h||{}).items;if(!_vt(r,l))n.push(md("schema",i,u));else if(Pg(c)&&Pg(f)&&d!=="any")XP(c,f,h,n,i,u);else if(v&&Gb(c)){c.length>f.length&&n.push(md("unused",i,u.concat(f.length)));var p=f.length,k=Array.isArray(b);k&&(p=Math.min(p,b.length));var E,S,L,x,C;if(h.dimensions===2)for(S=0;S<p;S++)if(Gb(c[S])){c[S].length>f[S].length&&n.push(md("unused",i,u.concat(S,f[S].length)));var M=f[S].length;for(E=0;E<(k?Math.min(M,b[S].length):M);E++)L=k?b[S][E]:b,x=c[S][E],C=f[S][E],P0.validate(x,L)?C!==x&&C!==+x&&n.push(md("dynamic",i,u.concat(S,E),x,C)):n.push(md("value",i,u.concat(S,E),x))}else n.push(md("array",i,u.concat(S),c[S]));else for(S=0;S<p;S++)L=k?b[S]:b,x=c[S],C=f[S],P0.validate(x,L)?C!==x&&C!==+x&&n.push(md("dynamic",i,u.concat(S),x,C)):n.push(md("value",i,u.concat(S),x))}else if(h.items&&!v&&Gb(c)){var g=b[Object.keys(b)[0]],P=[],T,z;for(T=0;T<f.length;T++){var O=f[T]._index||T;if(z=u.slice(),z.push(O),Pg(c[O])&&Pg(f[T])){P.push(O);var V=c[O],G=f[T];Pg(V)&&V.visible!==!1&&G.visible===!1?n.push(md("invisible",i,z)):XP(V,G,g,n,i,z)}}for(T=0;T<c.length;T++)z=u.slice(),z.push(T),Pg(c[T])?P.indexOf(T)===-1&&n.push(md("unused",i,z)):n.push(md("object",i,z,c[T]))}else!Pg(c)&&Pg(f)?n.push(md("object",i,u,c)):!tU(c)&&tU(f)&&!v&&!_?n.push(md("array",i,u,c)):l in t?P0.validate(c,h)?h.valType==="enumerated"&&(h.coerceNumber&&c!==+f||!tU(c)&&c!==f||String(c)!==String(f))&&n.push(md("dynamic",i,u,c,f)):n.push(md("value",i,u,c)):n.push(md("unused",i,u,c))}return n}function mvt(e,t){for(var r=e.layout.layoutAttributes,n=0;n<t.length;n++){var i=t[n],a=e.traces[i.type],o=a.layoutAttributes;o&&(i.subplot?P0.extendFlat(r[a.attributes.subplot.dflt],o):P0.extendFlat(r,o))}return r}var yvt={object:function(e,t){var r;return e==="layout"&&t===""?r="The layout argument":e[0]==="data"&&t===""?r="Trace "+e[1]+" in the data argument":r=Vb(e)+"key "+t,r+" must be linked to an object container"},array:function(e,t){var r;return e==="data"?r="The data argument":r=Vb(e)+"key "+t,r+" must be linked to an array container"},schema:function(e,t){return Vb(e)+"key "+t+" is not part of the schema"},unused:function(e,t,r){var n=Pg(r)?"container":"key";return Vb(e)+n+" "+t+" did not get coerced"},dynamic:function(e,t,r,n){return[Vb(e)+"key",t,"(set to '"+r+"')","got reset to","'"+n+"'","during defaults."].join(" ")},invisible:function(e,t){return(t?Vb(e)+"item "+t:"Trace "+e[1])+" got defaulted to be not visible"},value:function(e,t,r){return[Vb(e)+"key "+t,"is set to an invalid value ("+r+")"].join(" ")}};function Vb(e){return Gb(e)?"In data trace "+e[1]+", ":"In "+e+", "}function md(e,t,r,n,i){r=r||"";var a,o;Gb(t)?(a=t[0],o=t[1]):(a=t,o=null);var s=wvt(r),l=yvt[e](t,s,n,i);return P0.log(l),{code:e,container:a,trace:o,path:r,astr:s,msg:l}}function _vt(e,t){var r=cde(t),n=r.keyMinusId,i=r.id;return n in e&&e[n]._isSubplotObj&&i?!0:t in e}function xvt(e,t){if(t in e)return e[t];var r=cde(t);return e[r.keyMinusId]}var bvt=P0.counterRegex("([a-z]+)");function cde(e){var t=e.match(bvt);return{keyMinusId:t&&t[1],id:t&&t[2]}}function wvt(e){if(!Gb(e))return String(e);for(var t="",r=0;r<e.length;r++){var n=e[r];typeof n=="number"?t=t.slice(0,-1)+"["+n+"]":t+=n,r<e.length-1&&(t+=".")}return t}});var vde=ye((Dor,dde)=>{"use strict";var Tvt=Dr(),ZP=Py();function Avt(e,t,r){var n=document.createElement("a"),i="download"in n,a=new Promise(function(o,s){var l,u;if(i)return l=ZP.createBlob(e,r),u=ZP.createObjectURL(l),n.href=u,n.download=t,document.body.appendChild(n),n.click(),document.body.removeChild(n),ZP.revokeObjectURL(u),l=null,o(t);if(Tvt.isSafari()){var c=r==="svg"?",":";base64,";return ZP.octetStream(c+encodeURIComponent(e)),o(t)}s(new Error("download error"))});return a}dde.exports=Avt});var rU=ye((zor,gde)=>{"use strict";var pde=Dr(),Svt=eU(),Mvt=vde(),For=Py();function Evt(e,t){var r;return pde.isPlainObject(e)||(r=pde.getGraphDiv(e)),t=t||{},t.format=t.format||"png",t.width=t.width||null,t.height=t.height||null,t.imageDataOnly=!0,new Promise(function(n,i){r&&r._snapshotInProgress&&i(new Error("Snapshotting already in progress.")),r&&(r._snapshotInProgress=!0);var a=Svt(e,t),o=t.filename||e.fn||"newplot";o+="."+t.format.replace("-","."),a.then(function(s){return r&&(r._snapshotInProgress=!1),Mvt(s,o,t.format)}).then(function(s){n(s)}).catch(function(s){r&&(r._snapshotInProgress=!1),i(s)})})}gde.exports=Evt});var bde=ye(iU=>{"use strict";var Cp=Dr(),Lp=Cp.isPlainObject,mde=w3(),yde=Mc(),kvt=Gl(),_de=vl(),xde=cb().dfltConfig;iU.makeTemplate=function(e){e=Cp.isPlainObject(e)?e:Cp.getGraphDiv(e),e=Cp.extendDeep({_context:xde},{data:e.data,layout:e.layout}),yde.supplyDefaults(e);var t=e.data||[],r=e.layout||{};r._basePlotModules=e._fullLayout._basePlotModules,r._modules=e._fullLayout._modules;var n={data:{},layout:{}};t.forEach(function(d){var v={};YM(d,v,Lvt.bind(null,d));var _=Cp.coerce(d,{},kvt,"type"),b=n.data[_];b||(b=n.data[_]=[]),b.push(v)}),YM(r,n.layout,Cvt.bind(null,r)),delete n.layout.template;var i=r.template;if(Lp(i)){var a=i.layout,o,s,l,u,c,f;Lp(a)&&YP(a,n.layout);var h=i.data;if(Lp(h)){for(s in n.data)if(l=h[s],Array.isArray(l)){for(c=n.data[s],f=c.length,u=l.length,o=0;o<f;o++)YP(l[o%u],c[o]);for(o=f;o<u;o++)c.push(Cp.extendDeep({},l[o]))}for(s in h)s in n.data||(n.data[s]=Cp.extendDeep([],h[s]))}}return n};function YP(e,t){e=Cp.extendDeep({},e);var r=Object.keys(e).sort(),n,i;function a(c,f,h){if(Lp(f)&&Lp(c))YP(c,f);else if(Array.isArray(f)&&Array.isArray(c)){var d=_de.arrayTemplater({_template:e},h);for(i=0;i<f.length;i++){var v=f[i],_=d.newItem(v)._template;_&&YP(_,v)}var b=d.defaultItems();for(i=0;i<b.length;i++)f.push(b[i]._template);for(i=0;i<f.length;i++)delete f[i].templateitemname}}for(n=0;n<r.length;n++){var o=r[n],s=e[o];if(o in t?a(s,t[o],o):t[o]=s,KP(o)===o)for(var l in t){var u=KP(l);l!==u&&u===o&&!(l in e)&&a(s,t[l],o)}}}function KP(e){return e.replace(/[0-9]+$/,"")}function YM(e,t,r,n,i){var a=i&&r(i);for(var o in e){var s=e[o],l=I0(e,o,n),u=I0(e,o,i),c=r(u);if(!c){var f=KP(o);f!==o&&(u=I0(e,f,i),c=r(u))}if(!(a&&a===c)&&!(!c||c._noTemplating||c.valType==="data_array"||c.arrayOk&&Array.isArray(s)))if(!c.valType&&Lp(s))YM(s,t,r,l,u);else if(c._isLinkedToArray&&Array.isArray(s))for(var h=!1,d=0,v={},_=0;_<s.length;_++){var b=s[_];if(Lp(b)){var p=b.name;if(p)v[p]||(YM(b,t,r,I0(s,d,l),I0(s,d,u)),d++,v[p]=1);else if(!h){var k=_de.arrayDefaultKey(o),E=I0(e,k,n),S=I0(s,d,l);YM(b,t,r,S,I0(s,d,u));var L=Cp.nestedProperty(t,S),x=Cp.nestedProperty(t,E);x.set(L.get()),L.set(null),h=!0}}}else{var C=Cp.nestedProperty(t,l);C.set(s)}}}function Cvt(e,t){return mde.getLayoutValObject(e,Cp.nestedProperty({},t).parts)}function Lvt(e,t){return mde.getTraceValObject(e,Cp.nestedProperty({},t).parts)}function I0(e,t,r){var n;return r?Array.isArray(e)?n=r+"["+t+"]":n=r+"."+t:n=t,n}iU.validateTemplate=function(e,t){var r=Cp.extendDeep({},{_context:xde,data:e.data,layout:e.layout}),n=r.layout||{};Lp(t)||(t=n.template||{});var i=t.layout,a=t.data,o=[];r.layout=n,r.layout.template=t,yde.supplyDefaults(r);var s=r._fullLayout,l=r._fullData,u={};function c(E,S){for(var L in E)if(L.charAt(0)!=="_"&&Lp(E[L])){var x=KP(L),C=[],M;for(M=0;M<S.length;M++)C.push(I0(E,L,S[M])),x!==L&&C.push(I0(E,x,S[M]));for(M=0;M<C.length;M++)u[C[M]]=1;c(E[L],C)}}function f(E,S){for(var L in E)if(L.indexOf("defaults")===-1&&Lp(E[L])){var x=I0(E,L,S);u[x]?f(E[L],x):o.push({code:"unused",path:x})}}if(Lp(i)?(c(s,["layout"]),f(i,"layout")):o.push({code:"layout"}),!Lp(a))o.push({code:"data"});else{for(var h={},d,v=0;v<l.length;v++){var _=l[v];d=_.type,h[d]=(h[d]||0)+1,_._fullInput._template||o.push({code:"missing",index:_.index,traceType:d})}for(d in a){var b=a[d].length,p=h[d]||0;b>p?o.push({code:"unused",traceType:d,templateCount:b,dataCount:p}):p>b&&o.push({code:"reused",traceType:d,templateCount:b,dataCount:p})}}function k(E,S){for(var L in E)if(L.charAt(0)!=="_"){var x=E[L],C=I0(E,L,S);Lp(x)?(Array.isArray(E)&&x._template===!1&&x.templateitemname&&o.push({code:"missing",path:C,templateitemname:x.templateitemname}),k(x,C)):Array.isArray(x)&&Pvt(x)&&k(x,C)}}if(k({data:l,layout:s},""),o.length)return o.map(Ivt)};function Pvt(e){for(var t=0;t<e.length;t++)if(Lp(e[t]))return!0}function Ivt(e){var t;switch(e.code){case"data":t="The template has no key data.";break;case"layout":t="The template has no key layout.";break;case"missing":e.path?t="There are no templates for item "+e.path+" with name "+e.templateitemname:t="There are no templates for trace "+e.index+", of type "+e.traceType+".";break;case"unused":e.path?t="The template item at "+e.path+" was not used in constructing the plot.":e.dataCount?t="Some of the templates of type "+e.traceType+" were not used. The template has "+e.templateCount+" traces, the data only has "+e.dataCount+" of this type.":t="The template has "+e.templateCount+" traces of type "+e.traceType+" but there are none in the data.";break;case"reused":t="Some of the templates of type "+e.traceType+" were used more than once. The template has "+e.templateCount+" traces, the data has "+e.dataCount+" of this type.";break}return e.msg=t,e}});var Tde=ye(ef=>{"use strict";var td=HP();ef._doPlot=td._doPlot;ef.newPlot=td.newPlot;ef.restyle=td.restyle;ef.relayout=td.relayout;ef.redraw=td.redraw;ef.update=td.update;ef._guiRestyle=td._guiRestyle;ef._guiRelayout=td._guiRelayout;ef._guiUpdate=td._guiUpdate;ef._storeDirectGUIEdit=td._storeDirectGUIEdit;ef.react=td.react;ef.extendTraces=td.extendTraces;ef.prependTraces=td.prependTraces;ef.addTraces=td.addTraces;ef.deleteTraces=td.deleteTraces;ef.moveTraces=td.moveTraces;ef.purge=td.purge;ef.addFrames=td.addFrames;ef.deleteFrames=td.deleteFrames;ef.animate=td.animate;ef.setPlotConfig=td.setPlotConfig;var Rvt=NS().getGraphDiv,Dvt=lP().eraseActiveShape;ef.deleteActiveShape=function(e){return Dvt(Rvt(e))};ef.toImage=eU();ef.validate=hde();ef.downloadImage=rU();var wde=bde();ef.makeTemplate=wde.makeTemplate;ef.validateTemplate=wde.validateTemplate});var eT=ye((Bor,Ade)=>{"use strict";var nU=Dr(),Fvt=qa();Ade.exports=function(t,r,n,i){var a=i("x"),o=i("y"),s,l=Fvt.getComponentMethod("calendars","handleTraceDefaults");if(l(t,r,["x","y"],n),a){var u=nU.minRowLength(a);o?s=Math.min(u,nU.minRowLength(o)):(s=u,i("y0"),i("dy"))}else{if(!o)return 0;s=nU.minRowLength(o),i("x0"),i("dx")}return r._length=s,s}});var Ig=ye((Nor,Ede)=>{"use strict";var Sde=Dr().dateTick0,zvt=fs(),Ovt=zvt.ONEWEEK;function Mde(e,t){return e%Ovt===0?Sde(t,1):Sde(t,0)}Ede.exports=function(t,r,n,i,a){if(a||(a={x:!0,y:!0}),a.x){var o=i("xperiod");o&&(i("xperiod0",Mde(o,r.xcalendar)),i("xperiodalignment"))}if(a.y){var s=i("yperiod");s&&(i("yperiod0",Mde(s,r.ycalendar)),i("yperiodalignment"))}}});var Lde=ye((Uor,Cde)=>{"use strict";var kde=["orientation","groupnorm","stackgaps"];Cde.exports=function(t,r,n,i){var a=n._scatterStackOpts,o=i("stackgroup");if(o){var s=r.xaxis+r.yaxis,l=a[s];l||(l=a[s]={});var u=l[o],c=!1;u?u.traces.push(r):(u=l[o]={traceIndices:[],traces:[r]},c=!0);for(var f={orientation:r.x&&!r.y?"h":"v"},h=0;h<kde.length;h++){var d=kde[h],v=d+"Found";if(!u[v]){var _=t[d]!==void 0,b=d==="orientation";if((_||c)&&(u[d]=i(d,f[d]),b&&(u.fillDflt=u[d]==="h"?"tonextx":"tonexty"),_&&(u[v]=!0,!c&&(delete u.traces[0][d],b))))for(var p=0;p<u.traces.length-1;p++){var k=u.traces[p];k._input.fill!==k.fill&&(k.fill=u.fillDflt)}}}return u}}});var $p=ye((Vor,Dde)=>{"use strict";var Pde=ka(),Ide=pv().hasColorscale,Rde=Qh(),qvt=Ru();Dde.exports=function(t,r,n,i,a,o){var s=qvt.isBubble(t),l=(t.line||{}).color,u;if(o=o||{},l&&(n=l),a("marker.symbol"),a("marker.opacity",s?.7:1),a("marker.size"),o.noAngle||(a("marker.angle"),o.noAngleRef||a("marker.angleref"),o.noStandOff||a("marker.standoff")),a("marker.color",n),Ide(t,"marker")&&Rde(t,r,i,a,{prefix:"marker.",cLetter:"c"}),o.noSelect||(a("selected.marker.color"),a("unselected.marker.color"),a("selected.marker.size"),a("unselected.marker.size")),o.noLine||(l&&!Array.isArray(l)&&r.marker.color!==l?u=l:s?u=Pde.background:u=Pde.defaultLine,a("marker.line.color",u),Ide(t,"marker.line")&&Rde(t,r,i,a,{prefix:"marker.line.",cLetter:"c"}),a("marker.line.width",s?1:0)),s&&(a("marker.sizeref"),a("marker.sizemin"),a("marker.sizemode")),o.gradient){var c=a("marker.gradient.type");c!=="none"&&a("marker.gradient.color")}}});var R0=ye((Gor,Fde)=>{"use strict";var Bvt=Dr().isArrayOrTypedArray,Nvt=pv().hasColorscale,Uvt=Qh();Fde.exports=function(t,r,n,i,a,o){o||(o={});var s=(t.marker||{}).color;if(s&&s._inputArray&&(s=s._inputArray),a("line.color",n),Nvt(t,"line"))Uvt(t,r,i,a,{prefix:"line.",cLetter:"c"});else{var l=(Bvt(s)?!1:s)||n;a("line.color",l)}a("line.width"),o.noDash||a("line.dash"),o.backoff&&a("line.backoff")}});var tT=ye((Hor,zde)=>{"use strict";zde.exports=function(t,r,n){var i=n("line.shape");i==="spline"&&n("line.smoothing")}});var D0=ye((jor,Ode)=>{"use strict";var Vvt=Dr();Ode.exports=function(e,t,r,n,i){i=i||{},n("textposition"),Vvt.coerceFont(n,"textfont",i.font||r.font,i),i.noSelect||(n("selected.textfont.color"),n("unselected.textfont.color"))}});var Rg=ye((Wor,Bde)=>{"use strict";var JP=ka(),qde=Dr().isArrayOrTypedArray;function Gvt(e){for(var t=JP.interpolate(e[0][1],e[1][1],.5),r=2;r<e.length;r++){var n=JP.interpolate(e[r-1][1],e[r][1],.5);t=JP.interpolate(t,n,e[r-1][0]/e[r][0])}return t}Bde.exports=function(t,r,n,i,a){a||(a={});var o=!1;if(r.marker){var s=r.marker.color,l=(r.marker.line||{}).color;s&&!qde(s)?o=s:l&&!qde(l)&&(o=l)}var u;if(a.moduleHasFillgradient){var c=i("fillgradient.type");if(c!=="none"){i("fillgradient.start"),i("fillgradient.stop");var f=i("fillgradient.colorscale");f&&(u=Gvt(f))}}i("fillcolor",JP.addOpacity((r.line||{}).color||o||u||n,.5))}});var Gde=ye((Xor,Vde)=>{"use strict";var Nde=Dr(),Hvt=qa(),jvt=pf(),Wvt=Sm(),rT=Ru(),Xvt=eT(),Zvt=Ig(),Yvt=Lde(),Kvt=$p(),Jvt=R0(),Ude=tT(),$vt=D0(),Qvt=Rg(),ept=Dr().coercePattern;Vde.exports=function(t,r,n,i){function a(d,v){return Nde.coerce(t,r,jvt,d,v)}var o=Xvt(t,r,i,a);if(o||(r.visible=!1),!!r.visible){Zvt(t,r,i,a),a("xhoverformat"),a("yhoverformat"),a("zorder");var s=Yvt(t,r,i,a);i.scattermode==="group"&&r.orientation===void 0&&a("orientation","v");var l=!s&&o<Wvt.PTS_LINESONLY?"lines+markers":"lines";a("text"),a("hovertext"),a("mode",l),rT.hasMarkers(r)&&Kvt(t,r,n,i,a,{gradient:!0}),rT.hasLines(r)&&(Jvt(t,r,n,i,a,{backoff:!0}),Ude(t,r,a),a("connectgaps"),a("line.simplify")),rT.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),$vt(t,r,i,a));var u=[];(rT.hasMarkers(r)||rT.hasText(r))&&(a("cliponaxis"),a("marker.maxdisplayed"),u.push("points")),a("fill",s?s.fillDflt:"none"),r.fill!=="none"&&(Qvt(t,r,n,a,{moduleHasFillgradient:!0}),rT.hasLines(r)||Ude(t,r,a),ept(a,"fillpattern",r.fillcolor,!1));var c=(r.line||{}).color,f=(r.marker||{}).color;(r.fill==="tonext"||r.fill==="toself")&&u.push("fills"),a("hoveron",u.join("+")||"points"),r.hoveron!=="fills"&&(a("hovertemplate"),a("hovertemplatefallback"));var h=Hvt.getComponentMethod("errorbars","supplyDefaults");h(t,r,c||f||n,{axis:"y"}),h(t,r,c||f||n,{axis:"x",inherit:"y"}),Nde.coerceSelectionMarkerOpacity(r,a)}}});var Hb=ye((Zor,Hde)=>{"use strict";var tpt=Nb().getAxisGroup;Hde.exports=function(t,r,n,i,a){var o=r.orientation,s=r[{v:"x",h:"y"}[o]+"axis"],l=tpt(n,s)+o,u=n._alignmentOpts||{},c=i("alignmentgroup"),f=u[l];f||(f=u[l]={});var h=f[c];h?h.traces.push(r):h=f[c]={traces:[r],alignmentIndex:Object.keys(f).length,offsetGroups:{}};var d=i("offsetgroup")||"",v=h.offsetGroups,_=v[d];r._offsetIndex=0,(a!=="group"||d)&&(_||(_=v[d]={offsetIndex:Object.keys(v).length}),r._offsetIndex=_.offsetIndex)}});var aU=ye((Yor,jde)=>{"use strict";var rpt=Dr(),ipt=Hb(),npt=pf();jde.exports=function(t,r){var n,i,a,o=r.scattermode;function s(h){return rpt.coerce(i._input,i,npt,h)}if(r.scattermode==="group")for(a=0;a<t.length;a++)i=t[a],i.type==="scatter"&&(n=i._input,ipt(n,i,r,s,o));for(a=0;a<t.length;a++){var l=t[a];if(l.type==="scatter"){var u=l.fill;if(!(u==="none"||u==="toself")&&(l.opacity=void 0,u==="tonexty"||u==="tonextx"))for(var c=a-1;c>=0;c--){var f=t[c];if(f.type==="scatter"&&f.xaxis===l.xaxis&&f.yaxis===l.yaxis){f.opacity=void 0;break}}}}}});var Xde=ye((Kor,Wde)=>{"use strict";var apt=Dr(),opt=X6();Wde.exports=function(e,t){function r(i,a){return apt.coerce(e,t,opt,i,a)}var n=t.barmode==="group";t.scattermode==="group"&&r("scattergap",n?t.bargap:.2)}});var Dg=ye((Jor,Yde)=>{"use strict";var spt=Eo(),Zde=Dr(),lpt=Zde.dateTime2ms,$P=Zde.incrementMonth,upt=fs(),cpt=upt.ONEAVGMONTH;Yde.exports=function(t,r,n,i){if(r.type!=="date")return{vals:i};var a=t[n+"periodalignment"];if(!a)return{vals:i};var o=t[n+"period"],s;if(spt(o)){if(o=+o,o<=0)return{vals:i}}else if(typeof o=="string"&&o.charAt(0)==="M"){var l=+o.substring(1);if(l>0&&Math.round(l)===l)s=l;else return{vals:i}}for(var u=r.calendar,c=a==="start",f=a==="end",h=t[n+"period0"],d=lpt(h,u)||0,v=[],_=[],b=[],p=i.length,k=0;k<p;k++){var E=i[k],S,L,x;if(s){for(S=Math.round((E-d)/(s*cpt)),x=$P(d,s*S,u);x>E;)x=$P(x,-s,u);for(;x<=E;)x=$P(x,s,u);L=$P(x,-s,u)}else{for(S=Math.round((E-d)/o),x=d+S*o;x>E;)x-=o;for(;x<=E;)x+=o;L=x-o}v[k]=c?L:f?x:(L+x)/2,_[k]=L,b[k]=x}return{vals:v,starts:_,ends:b}}});var F0=ye(($or,Jde)=>{"use strict";var oU=pv().hasColorscale,sU=gv(),Kde=Ru();Jde.exports=function(t,r){Kde.hasLines(r)&&oU(r,"line")&&sU(t,r,{vals:r.line.color,containerStr:"line",cLetter:"c"}),Kde.hasMarkers(r)&&(oU(r,"marker")&&sU(t,r,{vals:r.marker.color,containerStr:"marker",cLetter:"c"}),oU(r,"marker.line")&&sU(t,r,{vals:r.marker.line.color,containerStr:"marker.line",cLetter:"c"}))}});var km=ye((Qor,$de)=>{"use strict";var Yf=Dr();$de.exports=function(t,r){for(var n=0;n<t.length;n++)t[n].i=n;Yf.mergeArray(r.text,t,"tx"),Yf.mergeArray(r.texttemplate,t,"txt"),Yf.mergeArray(r.hovertext,t,"htx"),Yf.mergeArray(r.customdata,t,"data"),Yf.mergeArray(r.textposition,t,"tp"),r.textfont&&(Yf.mergeArrayCastPositive(r.textfont.size,t,"ts"),Yf.mergeArray(r.textfont.color,t,"tc"),Yf.mergeArray(r.textfont.family,t,"tf"),Yf.mergeArray(r.textfont.weight,t,"tw"),Yf.mergeArray(r.textfont.style,t,"ty"),Yf.mergeArray(r.textfont.variant,t,"tv"),Yf.mergeArray(r.textfont.textcase,t,"tC"),Yf.mergeArray(r.textfont.lineposition,t,"tE"),Yf.mergeArray(r.textfont.shadow,t,"tS"));var i=r.marker;if(i){Yf.mergeArrayCastPositive(i.size,t,"ms"),Yf.mergeArrayCastPositive(i.opacity,t,"mo"),Yf.mergeArray(i.symbol,t,"mx"),Yf.mergeArray(i.angle,t,"ma"),Yf.mergeArray(i.standoff,t,"mf"),Yf.mergeArray(i.color,t,"mc");var a=i.line;i.line&&(Yf.mergeArray(a.color,t,"mlc"),Yf.mergeArrayCastPositive(a.width,t,"mlw"));var o=i.gradient;o&&o.type!=="none"&&(Yf.mergeArray(o.type,t,"mgt"),Yf.mergeArray(o.color,t,"mgc"))}}});var z0=ye((esr,eve)=>{"use strict";var Qde=Dr();eve.exports=function(t,r){Qde.isArrayOrTypedArray(r.selectedpoints)&&Qde.tagSelected(t,r)}});var O0=ye((tsr,sve)=>{"use strict";var tve=Eo(),uU=Dr(),KM=ho(),rve=Dg(),lU=fs().BADNUM,cU=Ru(),fpt=F0(),hpt=km(),dpt=z0();function vpt(e,t){var r=e._fullLayout,n=t._xA=KM.getFromId(e,t.xaxis||"x","x"),i=t._yA=KM.getFromId(e,t.yaxis||"y","y"),a=n.makeCalcdata(t,"x"),o=i.makeCalcdata(t,"y"),s=rve(t,n,"x",a),l=rve(t,i,"y",o),u=s.vals,c=l.vals,f=t._length,h=new Array(f),d=t.ids,v=fU(t,r,n,i),_=!1,b,p,k,E,S,L;ave(r,t);var x="x",C="y",M;if(v)uU.pushUnique(v.traceIndices,t.index),b=v.orientation==="v",b?(C="s",M="x"):(x="s",M="y"),S=v.stackgaps==="interpolate";else{var g=nve(t,f);ive(e,t,n,i,u,c,g)}var P=!!t.xperiodalignment,T=!!t.yperiodalignment;for(p=0;p<f;p++){var z=h[p]={},O=tve(u[p]),V=tve(c[p]);O&&V?(z[x]=u[p],z[C]=c[p],P&&(z.orig_x=a[p],z.xEnd=s.ends[p],z.xStart=s.starts[p]),T&&(z.orig_y=o[p],z.yEnd=l.ends[p],z.yStart=l.starts[p])):v&&(b?O:V)?(z[M]=b?u[p]:c[p],z.gap=!0,S?(z.s=lU,_=!0):z.s=0):z[x]=z[C]=lU,d&&(z.id=String(d[p]))}if(hpt(h,t),fpt(e,t),dpt(h,t),v){for(p=0;p<h.length;)h[p][M]===lU?h.splice(p,1):p++;if(uU.sort(h,function(N,H){return N[M]-H[M]||N.i-H.i}),_){for(p=0;p<h.length-1&&h[p].gap;)p++;for(L=h[p].s,L||(L=h[p].s=0),k=0;k<p;k++)h[k].s=L;for(E=h.length-1;E>p&&h[E].gap;)E--;for(L=h[E].s,k=h.length-1;k>E;k--)h[k].s=L;for(;p<E;)if(p++,h[p].gap){for(k=p+1;h[k].gap;)k++;for(var G=h[p-1][M],Z=h[p-1].s,j=(h[k].s-Z)/(h[k][M]-G);p<k;)h[p].s=Z+(h[p][M]-G)*j,p++}}}return h}function ive(e,t,r,n,i,a,o){var s=t._length,l=e._fullLayout,u=r._id,c=n._id,f=l._firstScatter[ove(t)]===t.uid,h=(fU(t,l,r,n)||{}).orientation,d=t.fill;r._minDtick=0,n._minDtick=0;var v={padded:!0},_={padded:!0};o&&(v.ppad=_.ppad=o);var b=s<2||i[0]!==i[s-1]||a[0]!==a[s-1];b&&(d==="tozerox"||d==="tonextx"&&(f||h==="h"))?v.tozero=!0:!(t.error_y||{}).visible&&(d==="tonexty"||d==="tozeroy"||!cU.hasMarkers(t)&&!cU.hasText(t))&&(v.padded=!1,v.ppad=0),b&&(d==="tozeroy"||d==="tonexty"&&(f||h==="v"))?_.tozero=!0:(d==="tonextx"||d==="tozerox")&&(_.padded=!1),u&&(t._extremes[u]=KM.findExtremes(r,i,v)),c&&(t._extremes[c]=KM.findExtremes(n,a,_))}function nve(e,t){if(cU.hasMarkers(e)){var r=e.marker,n=1.6*(e.marker.sizeref||1),i;if(e.marker.sizemode==="area"?i=function(u){return Math.max(Math.sqrt((u||0)/n),3)}:i=function(u){return Math.max((u||0)/n,3)},uU.isArrayOrTypedArray(r.size)){var a={type:"linear"};KM.setConvert(a);for(var o=a.makeCalcdata(e.marker,"size"),s=new Array(t),l=0;l<t;l++)s[l]=i(o[l]);return s}else return i(r.size)}}function ave(e,t){var r=ove(t),n=e._firstScatter;n[r]||(n[r]=t.uid)}function ove(e){var t=e.stackgroup;return e.xaxis+e.yaxis+e.type+(t?"-"+t:"")}function fU(e,t,r,n){var i=e.stackgroup;if(i){var a=t._scatterStackOpts[r._id+n._id][i],o=a.orientation==="v"?n:r;if(o.type==="linear"||o.type==="log")return a}}sve.exports={calc:vpt,calcMarkerSize:nve,calcAxisExpansion:ive,setFirstScatter:ave,getStackOpts:fU}});var uve=ye((rsr,lve)=>{"use strict";lve.exports=QP;var ppt=Dr().distinctVals;function QP(e,t){this.traces=e,this.sepNegVal=t.sepNegVal,this.overlapNoMerge=t.overlapNoMerge;for(var r=1/0,n=t.posAxis._id.charAt(0),i=[],a=0;a<e.length;a++){for(var o=e[a],s=0;s<o.length;s++){var l=o[s],u=l.p;u===void 0&&(u=l[n]),u!==void 0&&i.push(u)}o[0]&&o[0].width1&&(r=Math.min(o[0].width1,r))}this.positions=i;var c=ppt(i);this.distinctPositions=c.vals,c.vals.length===1&&r!==1/0?this.minDiff=r:this.minDiff=Math.min(c.minDiff,r);var f=(t.posAxis||{}).type;(f==="category"||f==="multicategory")&&(this.minDiff=1),this.binWidth=this.minDiff,this.bins={}}QP.prototype.put=function(t,r,n){var i=this.getLabel(t,r,n),a=this.bins[i]||0;return this.bins[i]=a+n,a};QP.prototype.get=function(t,r,n){var i=this.getLabel(t,r,n);return this.bins[i]||0};QP.prototype.getLabel=function(t,r,n){var i=n<0&&this.sepNegVal?"v":"^",a=this.overlapNoMerge?t:Math.round(t/this.binWidth);return i+a+"g"+r}});var jb=ye((isr,dve)=>{"use strict";var q0=Eo(),p_=Dr().isArrayOrTypedArray,iT=fs().BADNUM,gpt=qa(),JM=ho(),mpt=Nb().getAxisGroup,eI=uve();function ypt(e,t){for(var r=t.xaxis,n=t.yaxis,i=e._fullLayout,a=e._fullData,o=e.calcdata,s=[],l=[],u=0;u<a.length;u++){var c=a[u];if(c.visible===!0&&gpt.traceIs(c,"bar")&&c.xaxis===r._id&&c.yaxis===n._id&&(c.orientation==="h"?s.push(o[u]):l.push(o[u]),c._computePh))for(var f=e.calcdata[u],h=0;h<f.length;h++)typeof f[h].ph0=="function"&&(f[h].ph0=f[h].ph0()),typeof f[h].ph1=="function"&&(f[h].ph1=f[h].ph1())}var d={xCat:r.type==="category"||r.type==="multicategory",yCat:n.type==="category"||n.type==="multicategory",mode:i.barmode,norm:i.barnorm,gap:i.bargap,groupgap:i.bargroupgap};dU(e,r,n,l,d),dU(e,n,r,s,d)}function dU(e,t,r,n,i){if(n.length){var a,o,s,l,u;switch(bpt(r,n),i.mode){case"overlay":hU(e,t,r,n,i);break;case"group":for(a=[],o=[],s=0;s<n.length;s++)l=n[s],u=l[0].trace,u.offset===void 0?o.push(l):a.push(l);o.length&&wpt(e,t,r,o,i),a.length&&hU(e,t,r,a,i);break;case"stack":case"relative":for(a=[],o=[],s=0;s<n.length;s++)l=n[s],u=l[0].trace,u.base===void 0?o.push(l):a.push(l);xpt(o),o.length&&Tpt(e,t,r,o,i),a.length&&hU(e,t,r,a,i);break}_pt(n),Cpt(n,t)}}function _pt(e){var t,r,n,i,a,o,s;for(t=0;t<e.length;t++)r=e[t],n=r[0].trace,i=r[0].t,i.cornerradiusvalue===void 0&&(a=n.marker?n.marker.cornerradius:void 0,a!==void 0&&(o=q0(a)?+a:+a.slice(0,-1),s=q0(a)?"px":"%",i.cornerradiusvalue=o,i.cornerradiusform=s))}function xpt(e){if(!(e.length<2)){var t,r,n,i,a,o,s;for(t=0;t<e.length&&(r=e[t],n=r[0].trace,a=n.marker?n.marker.cornerradius:void 0,a===void 0);t++);if(a!==void 0)for(o=q0(a)?+a:+a.slice(0,-1),s=q0(a)?"px":"%",t=0;t<e.length;t++)r=e[t],i=r[0].t,i.cornerradiusvalue=o,i.cornerradiusform=s}}function bpt(e,t){var r,n;for(r=0;r<t.length;r++){var i=t[r],a=i[0].trace,o=a.type==="funnel"?a._base:a.base,s,l=a.orientation==="h"?a.xcalendar:a.ycalendar,u=e.type==="category"||e.type==="multicategory"?function(){return null}:e.d2c;if(p_(o)){for(n=0;n<Math.min(o.length,i.length);n++)s=u(o[n],0,l),q0(s)?(i[n].b=+s,i[n].hasB=1):i[n].b=0;for(;n<i.length;n++)i[n].b=0}else{s=u(o,0,l);var c=q0(s);for(s=c?s:0,n=0;n<i.length;n++)i[n].b=s,c&&(i[n].hasB=1)}}}function hU(e,t,r,n,i){for(var a=0;a<n.length;a++){var o=n[a],s=new eI([o],{posAxis:t,sepNegVal:!1,overlapNoMerge:!i.norm});vU(e,t,s,i),i.norm?(hve(s),pU(r,s,i)):fve(r,s)}}function wpt(e,t,r,n,i){var a=new eI(n,{posAxis:t,sepNegVal:!1,overlapNoMerge:!i.norm});vU(e,t,a,i),Ept(a,t),i.norm?(hve(a),pU(r,a,i)):fve(r,a)}function Tpt(e,t,r,n,i){var a=new eI(n,{posAxis:t,sepNegVal:i.mode==="relative",overlapNoMerge:!(i.norm||i.mode==="stack"||i.mode==="relative")});vU(e,t,a,i),Mpt(r,a,i);for(var o=0;o<n.length;o++)for(var s=n[o],l=s[0].t.offsetindex,u=0;u<s.length;u++){var c=s[u];if(c.s!==iT){var f=c.b+c.s===a.get(c.p,l,c.s);f&&(c._outmost=!0)}}i.norm&&pU(r,a,i)}function vU(e,t,r,n){var i=e._fullLayout,a=r.positions,o=r.distinctPositions,s=r.minDiff,l=r.traces,u=l.length,c=a.length!==o.length,f=s*(1-n.gap),h,d,v,_;if(t._id==="angularaxis")h=f,d=h*(1-(n.groupgap||0)),v=-d/2;else{var b=mpt(i,t._id)+l[0][0].trace.orientation;_=i._alignmentOpts[b]||{}}for(var p=0;p<u;p++){var k=l[p],E=k[0].trace;if(t._id!=="angularaxis"){var S=_[E.alignmentgroup]||{},L=Object.keys(S.offsetGroups||{}).length;L?h=f/L:h=c?f/u:f,d=h*(1-(n.groupgap||0)),L?v=((2*E._offsetIndex+1-L)*h-d)/2:v=c?((2*p+1-u)*h-d)/2:-d/2}var x=k[0].t;x.barwidth=d,x.offsetindex=E._offsetIndex||0,x.poffset=v,x.bargroupwidth=f,x.bardelta=s}r.binWidth=l[0][0].t.barwidth/100,Apt(r),Spt(t,r),t._id==="angularaxis"?cve(t,r):cve(t,r,c)}function Apt(e){var t=e.traces,r,n;for(r=0;r<t.length;r++){var i=t[r],a=i[0],o=a.trace,s=a.t,l=o._offset||o.offset,u=s.poffset,c;if(p_(l)){for(c=Array.prototype.slice.call(l,0,i.length),n=0;n<c.length;n++)q0(c[n])||(c[n]=u);for(n=c.length;n<i.length;n++)c.push(u);s.poffset=c}else l!==void 0&&(s.poffset=l);var f=o._width||o.width,h=s.barwidth;if(p_(f)){var d=Array.prototype.slice.call(f,0,i.length);for(n=0;n<d.length;n++)q0(d[n])||(d[n]=h);for(n=d.length;n<i.length;n++)d.push(h);if(s.barwidth=d,l===void 0){for(c=[],n=0;n<i.length;n++)c.push(u+(h-d[n])/2);s.poffset=c}}else f!==void 0&&(s.barwidth=f,l===void 0&&(s.poffset=u+(h-f)/2))}}function Spt(e,t){for(var r=t.traces,n=nT(e),i=0;i<r.length;i++)for(var a=r[i],o=a[0].t,s=o.poffset,l=p_(s),u=o.barwidth,c=p_(u),f=0;f<a.length;f++){var h=a[f],d=h.w=c?u[f]:u;h.p===void 0&&(h.p=h[n],h["orig_"+n]=h[n]);var v=(l?s[f]:s)+d/2;h[n]=h.p+v}}function cve(e,t,r){var n=t.traces,i=t.minDiff,a=i/2;JM.minDtick(e,t.minDiff,t.distinctPositions[0],r);for(var o=0;o<n.length;o++){var s=n[o],l=s[0],u=l.trace,c=[],f,h,d,v;for(v=0;v<s.length;v++)f=s[v],h=f.p-a,d=f.p+a,c.push(h,d);if(u.width||u.offset){var _=l.t,b=_.poffset,p=_.barwidth,k=p_(b),E=p_(p);for(v=0;v<s.length;v++){f=s[v];var S=k?b[v]:b,L=E?p[v]:p;h=f.p+S,d=h+L,c.push(h,d)}}u._extremes[e._id]=JM.findExtremes(e,c,{padded:!1})}}function fve(e,t){for(var r=t.traces,n=nT(e),i=0;i<r.length;i++){for(var a=r[i],o=a[0].trace,s=o.type==="scatter",l=o.orientation==="v",u=[],c=!1,f=0;f<a.length;f++){var h=a[f],d=s?0:h.b,v=s?l?h.y:h.x:d+h.s;h[n]=v,u.push(v),h.hasB&&u.push(d),(!h.hasB||!h.b)&&(c=!0)}o._extremes[e._id]=JM.findExtremes(e,u,{tozero:c,padded:!0})}}function Mpt(e,t,r){var n=nT(e),i=t.traces,a,o,s,l,u,c,f;for(l=0;l<i.length;l++)if(a=i[l],o=a[0].trace,o.type==="funnel")for(f=a[0].t.offsetindex,u=0;u<a.length;u++)c=a[u],c.s!==iT&&t.put(c.p,f,-.5*c.s);for(l=0;l<i.length;l++){a=i[l],o=a[0].trace,s=o.type==="funnel",f=o.type==="barpolar"?0:a[0].t.offsetindex;var h=[];for(u=0;u<a.length;u++)if(c=a[u],c.s!==iT){var d;s?d=c.s:d=c.s+c.b;var v=t.put(c.p,f,d),_=v+d;c.b=v,c[n]=_,r.norm||(h.push(_),c.hasB&&h.push(v))}r.norm||(o._extremes[e._id]=JM.findExtremes(e,h,{tozero:!0,padded:!0}))}}function hve(e){for(var t=e.traces,r=0;r<t.length;r++)for(var n=t[r],i=n[0].t.offsetindex,a=0;a<n.length;a++){var o=n[a];o.s!==iT&&e.put(o.p,i,o.b+o.s)}}function Ept(e,t){for(var r=e.traces,n=0;n<r.length;n++){var i=r[n],a=i[0].trace,o=i[0].t.offsetindex;if(a.base===void 0)for(var s=new eI([i],{posAxis:t,sepNegVal:!0,overlapNoMerge:!0}),l=0;l<i.length;l++){var u=i[l];if(u.p!==iT){var c=s.put(u.p,o,u.b+u.s);c&&(u.b=c)}}}}function pU(e,t,r){var n=t.traces,i=nT(e),a=r.norm==="fraction"?1:100,o=a/1e9,s=e.l2c(e.c2l(0)),l=r.mode==="stack"?a:s;function u(x){return q0(e.c2l(x))&&(x<s-o||x>l+o||!q0(s))}for(var c=0;c<n.length;c++){for(var f=n[c],h=f[0].t.offsetindex,d=f[0].trace,v=[],_=!1,b=!1,p=0;p<f.length;p++){var k=f[p];if(k.s!==iT){var E=Math.abs(a/t.get(k.p,h,k.s));k.b*=E,k.s*=E;var S=k.b,L=S+k.s;k[i]=L,v.push(L),b=b||u(L),k.hasB&&(v.push(S),b=b||u(S)),(!k.hasB||!k.b)&&(_=!0)}}d._extremes[e._id]=JM.findExtremes(e,v,{tozero:_,padded:b})}}function kpt(e,t,r,n){for(var i=nT(n),a=0;a<e.length;a++)for(var o=e[a],s=0;s<o.length;s++){var l=o[s],u=l[i];l._sMin=t[u],l._sMax=r[u]}}function Cpt(e,t){var r=nT(t),n={},i,a,o,s=1/0,l=-1/0;for(i=0;i<e.length;i++)for(o=e[i],a=0;a<o.length;a++){var u=o[a].p;q0(u)&&(s=Math.min(s,u),l=Math.max(l,u))}var c=1e4/(l-s),f=n.round=function(M){return String(Math.round(c*(M-s)))},h={},d={},v=e.some(function(M){var g=M[0].trace;return"marker"in g&&g.marker.cornerradius});for(i=0;i<e.length;i++){o=e[i],o[0].t.extents=n;var _=o[0].t.poffset,b=p_(_);for(a=0;a<o.length;a++){var p=o[a],k=p[r]-p.w/2;if(q0(k)){var E=p[r]+p.w/2,S=f(p.p);n[S]?n[S]=[Math.min(k,n[S][0]),Math.max(E,n[S][1])]:n[S]=[k,E]}if(p.p0=p.p+(b?_[a]:_),p.p1=p.p0+p.w,p.s0=p.b,p.s1=p.s0+p.s,v){var L=Math.min(p.s0,p.s1)||0,x=Math.max(p.s0,p.s1)||0,C=p[r];h[C]=C in h?Math.min(h[C],L):L,d[C]=C in d?Math.max(d[C],x):x}}}v&&kpt(e,h,d,t)}function nT(e){return e._id.charAt(0)}dve.exports={crossTraceCalc:ypt,setGroupPositions:dU}});var mve=ye((nsr,gve)=>{"use strict";var vve=O0(),pve=jb().setGroupPositions;function Lpt(e,t){for(var r=t.xaxis,n=t.yaxis,i=e._fullLayout,a=e._fullData,o=e.calcdata,s=[],l=[],u=0;u<a.length;u++){var c=a[u];c.visible===!0&&c.type==="scatter"&&c.xaxis===r._id&&c.yaxis===n._id&&(c.orientation==="h"?s.push(o[u]):c.orientation==="v"&&l.push(o[u]))}var f={mode:i.scattermode,gap:i.scattergap};pve(e,r,n,l,f),pve(e,n,r,s,f)}gve.exports=function(t,r){t._fullLayout.scattermode==="group"&&Lpt(t,r);var n=r.xaxis,i=r.yaxis,a=n._id+i._id,o=t._fullLayout._scatterStackOpts[a];if(o){var s=t.calcdata,l,u,c,f,h,d,v,_,b,p,k,E,S,L,x;for(var C in o){p=o[C];var M=p.traceIndices;if(M.length){for(k=p.stackgaps==="interpolate",E=p.groupnorm,p.orientation==="v"?(S="x",L="y"):(S="y",L="x"),x=new Array(M.length),l=0;l<x.length;l++)x[l]=!1;d=s[M[0]];var g=new Array(d.length);for(l=0;l<d.length;l++)g[l]=d[l][S];for(l=1;l<M.length;l++){for(h=s[M[l]],u=c=0;u<h.length;u++){for(v=h[u][S];v>g[c]&&c<g.length;c++)gU(h,u,g[c],l,x,k,S),u++;if(v!==g[c]){for(f=0;f<l;f++)gU(s[M[f]],c,v,f,x,k,S);g.splice(c,0,v)}c++}for(;c<g.length;c++)gU(h,u,g[c],l,x,k,S),u++}var P=g.length;for(u=0;u<d.length;u++){for(_=d[u][L]=d[u].s,l=1;l<M.length;l++)h=s[M[l]],h[0].trace._rawLength=h[0].trace._length,h[0].trace._length=P,_+=h[u].s,h[u][L]=_;if(E)for(b=(E==="fraction"?_:_/100)||1,l=0;l<M.length;l++){var T=s[M[l]][u];T[L]/=b,T.sNorm=T.s/b}}for(l=0;l<M.length;l++){h=s[M[l]];var z=h[0].trace,O=vve.calcMarkerSize(z,z._rawLength),V=Array.isArray(O);if(O&&x[l]||V){var G=O;for(O=new Array(P),u=0;u<P;u++)O[u]=h[u].gap?0:V?G[h[u].i]:G}var Z=new Array(P),j=new Array(P);for(u=0;u<P;u++)Z[u]=h[u].x,j[u]=h[u].y;vve.calcAxisExpansion(t,z,n,i,Z,j,O),h[0].t.orientation=p.orientation}}}}};function gU(e,t,r,n,i,a,o){i[n]=!0;var s={i:null,gap:!0,s:0};if(s[o]=r,e.splice(t,0,s),t&&r===e[t-1][o]){var l=e[t-1];s.s=l.s,s.i=l.i,s.gap=l.gap}else a&&(s.s=Ppt(e,t,r,o));t||(e[0].t=e[1].t,e[0].trace=e[1].trace,delete e[1].t,delete e[1].trace)}function Ppt(e,t,r,n){var i=e[t-1],a=e[t+1];return a?i?i.s+(a.s-i.s)*(r-i[n])/(a[n]-i[n]):a.s:i.s}});var yU=ye((asr,Tve)=>{"use strict";var Ipt=So(),bve=fs(),$M=bve.BADNUM,wve=bve.LOG_CLIP,yve=wve+.5,_ve=wve-.5,tI=Dr(),Rpt=tI.segmentsIntersect,xve=tI.constrain,mU=Sm();Tve.exports=function(t,r){var n=r.trace||{},i=r.xaxis,a=r.yaxis,o=i.type==="log",s=a.type==="log",l=i._length,u=a._length,c=r.backoff,f=n.marker,h=r.connectGaps,d=r.baseTolerance,v=r.shape,_=v==="linear",b=n.fill&&n.fill!=="none",p=[],k=mU.minTolerance,E=t.length,S=new Array(E),L=0,x,C,M,g,P,T,z,O,V,G,Z,j,N,H,te,oe;function _e(dt){var Ge=t[dt];if(!Ge)return!1;var Je=r.linearized?i.l2p(Ge.x):i.c2p(Ge.x),je=r.linearized?a.l2p(Ge.y):a.c2p(Ge.y);if(Je===$M){if(o&&(Je=i.c2p(Ge.x,!0)),Je===$M)return!1;s&&je===$M&&(Je*=Math.abs(i._m*u*(i._m>0?yve:_ve)/(a._m*l*(a._m>0?yve:_ve)))),Je*=1e3}if(je===$M){if(s&&(je=a.c2p(Ge.y,!0)),je===$M)return!1;je*=1e3}return[Je,je]}function Ee(dt,Ge,Je,je){var tt=Je-dt,xt=je-Ge,Ie=.5-dt,xe=.5-Ge,ke=tt*tt+xt*xt,vt=tt*Ie+xt*xe;if(vt>0&&vt<ke){var ir=Ie*xt-xe*tt;if(ir*ir<ke)return!0}}var Ce,me;function ie(dt,Ge){var Je=dt[0]/l,je=dt[1]/u,tt=Math.max(0,-Je,Je-1,-je,je-1);return tt&&Ce!==void 0&&Ee(Je,je,Ce,me)&&(tt=0),tt&&Ge&&Ee(Je,je,Ge[0]/l,Ge[1]/u)&&(tt=0),(1+mU.toleranceGrowth*tt)*d}function Se(dt,Ge){var Je=dt[0]-Ge[0],je=dt[1]-Ge[1];return Math.sqrt(Je*Je+je*je)}var Le=mU.maxScreensAway,Ae=-l*Le,Fe=l*(1+Le),Pe=-u*Le,ge=u*(1+Le),Re=[[Ae,Pe,Fe,Pe],[Fe,Pe,Fe,ge],[Fe,ge,Ae,ge],[Ae,ge,Ae,Pe]],ce,Ze,ut,pt,Zt,st;function lt(dt,Ge){for(var Je=[],je=0,tt=0;tt<4;tt++){var xt=Re[tt],Ie=Rpt(dt[0],dt[1],Ge[0],Ge[1],xt[0],xt[1],xt[2],xt[3]);Ie&&(!je||Math.abs(Ie.x-Je[0][0])>1||Math.abs(Ie.y-Je[0][1])>1)&&(Ie=[Ie.x,Ie.y],je&&Se(Ie,dt)<Se(Je[0],dt)?Je.unshift(Ie):Je.push(Ie),je++)}return Je}function Gt(dt){if(dt[0]<Ae||dt[0]>Fe||dt[1]<Pe||dt[1]>ge)return[xve(dt[0],Ae,Fe),xve(dt[1],Pe,ge)]}function Nt(dt,Ge){if(dt[0]===Ge[0]&&(dt[0]===Ae||dt[0]===Fe)||dt[1]===Ge[1]&&(dt[1]===Pe||dt[1]===ge))return!0}function Jt(dt,Ge){var Je=[],je=Gt(dt),tt=Gt(Ge);return je&&tt&&Nt(je,tt)||(je&&Je.push(je),tt&&Je.push(tt)),Je}function sr(dt,Ge,Je){return function(je,tt){var xt=Gt(je),Ie=Gt(tt),xe=[];if(xt&&Ie&&Nt(xt,Ie))return xe;xt&&xe.push(xt),Ie&&xe.push(Ie);var ke=2*tI.constrain((je[dt]+tt[dt])/2,Ge,Je)-((xt||je)[dt]+(Ie||tt)[dt]);if(ke){var vt;xt&&Ie?vt=ke>0==xt[dt]>Ie[dt]?xt:Ie:vt=xt||Ie,vt[dt]+=ke}return xe}}var wr;v==="linear"||v==="spline"?wr=lt:v==="hv"||v==="vh"?wr=Jt:v==="hvh"?wr=sr(0,Ae,Fe):v==="vhv"&&(wr=sr(1,Pe,ge));function cr(dt,Ge){var Je=Ge[0]-dt[0],je=(Ge[1]-dt[1])/Je,tt=(dt[1]*Ge[0]-Ge[1]*dt[0])/Je;return tt>0?[je>0?Ae:Fe,ge]:[je>0?Fe:Ae,Pe]}function $e(dt){var Ge=dt[0],Je=dt[1],je=Ge===S[L-1][0],tt=Je===S[L-1][1];if(!(je&&tt))if(L>1){var xt=Ge===S[L-2][0],Ie=Je===S[L-2][1];je&&(Ge===Ae||Ge===Fe)&&xt?Ie?L--:S[L-1]=dt:tt&&(Je===Pe||Je===ge)&&Ie?xt?L--:S[L-1]=dt:S[L++]=dt}else S[L++]=dt}function St(dt){S[L-1][0]!==dt[0]&&S[L-1][1]!==dt[1]&&$e([ut,pt]),$e(dt),Zt=null,ut=pt=0}var Qt=tI.isArrayOrTypedArray(f);function Vt(dt){if(dt&&c&&(dt.i=x,dt.d=t,dt.trace=n,dt.marker=Qt?f[dt.i]:f,dt.backoff=c),Ce=dt[0]/l,me=dt[1]/u,ce=dt[0]<Ae?Ae:dt[0]>Fe?Fe:0,Ze=dt[1]<Pe?Pe:dt[1]>ge?ge:0,ce||Ze){if(!L)S[L++]=[ce||dt[0],Ze||dt[1]];else if(Zt){var Ge=wr(Zt,dt);Ge.length>1&&(St(Ge[0]),S[L++]=Ge[1])}else st=wr(S[L-1],dt)[0],S[L++]=st;var Je=S[L-1];ce&&Ze&&(Je[0]!==ce||Je[1]!==Ze)?(Zt&&(ut!==ce&&pt!==Ze?$e(ut&&pt?cr(Zt,dt):[ut||ce,pt||Ze]):ut&&pt&&$e([ut,pt])),$e([ce,Ze])):ut-ce&&pt-Ze&&$e([ce||ut,Ze||pt]),Zt=dt,ut=ce,pt=Ze}else Zt&&St(wr(Zt,dt)[0]),S[L++]=dt}for(x=0;x<E;x++)if(C=_e(x),!!C){for(L=0,Zt=null,Vt(C),x++;x<E;x++){if(g=_e(x),!g){if(h)continue;break}if(!_||!r.simplify){Vt(g);continue}var _t=_e(x+1);if(G=Se(g,C),!(!(b&&(L===0||L===E-1))&&G<ie(g,_t)*k)){for(O=[(g[0]-C[0])/G,(g[1]-C[1])/G],P=C,Z=G,j=H=te=0,z=!1,M=g,x++;x<t.length;x++){if(T=_t,_t=_e(x+1),!T){if(h)continue;break}if(V=[T[0]-C[0],T[1]-C[1]],oe=V[0]*O[1]-V[1]*O[0],H=Math.min(H,oe),te=Math.max(te,oe),te-H>ie(T,_t))break;M=T,N=V[0]*O[0]+V[1]*O[1],N>Z?(Z=N,g=T,z=!1):N<j&&(j=N,P=T,z=!0)}if(z?(Vt(g),M!==P&&Vt(P)):(P!==C&&Vt(P),M!==g&&Vt(g)),Vt(M),x>=t.length||!T)break;Vt(T),C=T}}Zt&&$e([ut||Zt[0],pt||Zt[1]]),p.push(S.slice(0,L))}var It=v.slice(v.length-1);if(c&&It!=="h"&&It!=="v"){for(var mt=!1,er=-1,lr=[],Tr=0;Tr<p.length;Tr++)for(var Lr=0;Lr<p[Tr].length-1;Lr++){var ti=p[Tr][Lr],Br=p[Tr][Lr+1],Vr=Ipt.applyBackoff(Br,ti);(Vr[0]!==Br[0]||Vr[1]!==Br[1])&&(mt=!0),lr[er+1]||(er++,lr[er]=[ti,[Vr[0],Vr[1]]])}return mt?lr:p}return p}});var _U=ye((osr,Sve)=>{"use strict";var Ave={tonextx:1,tonexty:1,tonext:1};Sve.exports=function(t,r,n){var i,a,o,s,l,u={},c=!1,f=-1,h=0,d=-1;for(a=0;a<n.length;a++)i=n[a][0].trace,o=i.stackgroup||"",o?o in u?l=u[o]:(l=u[o]=h,h++):i.fill in Ave&&d>=0?l=d:(l=d=h,h++),l<f&&(c=!0),i._groupIndex=f=l;var v=n.slice();c&&v.sort(function(b,p){var k=b[0].trace,E=p[0].trace;return k._groupIndex-E._groupIndex||k.index-E.index});var _={};for(a=0;a<v.length;a++)i=v[a][0].trace,o=i.stackgroup||"",i.visible===!0?(i._nexttrace=null,i.fill in Ave&&(s=_[o],i._prevtrace=s||null,s&&(s._nexttrace=i)),i._ownfill=i.fill&&(i.fill.slice(0,6)==="tozero"||i.fill==="toself"||i.fill.slice(0,2)==="to"&&!i._prevtrace),_[o]=i):i._prevtrace=i._nexttrace=i._ownfill=null;return v}});var sT=ye((ssr,kve)=>{"use strict";var Fg=Oa(),Dpt=qa(),QM=Dr(),aT=QM.ensureSingle,Eve=QM.identity,Kf=So(),oT=Ru(),Fpt=yU(),zpt=_U(),rI=kM().tester;kve.exports=function(t,r,n,i,a,o){var s,l,u=!a,c=!!a&&a.duration>0,f=zpt(t,r,n);if(s=i.selectAll("g.trace").data(f,function(d){return d[0].trace.uid}),s.enter().append("g").attr("class",function(d){return"trace scatter trace"+d[0].trace.uid}).style("stroke-miterlimit",2),s.order(),Opt(t,s,r),c){o&&(l=o());var h=Fg.transition().duration(a.duration).ease(a.easing).each("end",function(){l&&l()}).each("interrupt",function(){l&&l()});h.each(function(){i.selectAll("g.trace").each(function(d,v){Mve(t,v,r,d,f,this,a)})})}else s.each(function(d,v){Mve(t,v,r,d,f,this,a)});u&&s.exit().remove(),i.selectAll("path:not([d])").remove()};function Opt(e,t,r){t.each(function(n){var i=aT(Fg.select(this),"g","fills");Kf.setClipUrl(i,r.layerClipId,e);var a=n[0].trace,o=[];a._ownfill&&o.push("_ownFill"),a._nexttrace&&o.push("_nextFill");var s=i.selectAll("g").data(o,Eve);s.enter().append("g"),s.exit().each(function(l){a[l]=null}).remove(),s.order().each(function(l){a[l]=aT(Fg.select(this),"path","js-fill")})})}function Mve(e,t,r,n,i,a,o){var s=e._context.staticPlot,l;qpt(e,t,r,n,i);var u=!!o&&o.duration>0;function c(sr){return u?sr.transition():sr}var f=r.xaxis,h=r.yaxis,d=n[0].trace,v=d.line,_=Fg.select(a),b=aT(_,"g","errorbars"),p=aT(_,"g","lines"),k=aT(_,"g","points"),E=aT(_,"g","text");if(Dpt.getComponentMethod("errorbars","plot")(e,b,r,o),d.visible!==!0)return;c(_).style("opacity",d.opacity);var S,L,x=d.fill.charAt(d.fill.length-1);x!=="x"&&x!=="y"&&(x="");var C,M;x==="y"?(C=1,M=h.c2p(0,!0)):x==="x"&&(C=0,M=f.c2p(0,!0)),n[0][r.isRangePlot?"nodeRangePlot3":"node3"]=_;var g="",P=[],T=d._prevtrace,z=null,O=null;T&&(g=T._prevRevpath||"",L=T._nextFill,P=T._ownPolygons,z=T._fillsegments,O=T._fillElement);var V,G,Z="",j="",N,H,te,oe,_e,Ee,Ce=[];d._polygons=[];var me=[],ie=[],Se=QM.noop;if(S=d._ownFill,oT.hasLines(d)||d.fill!=="none"){L&&L.datum(n),["hv","vh","hvh","vhv"].indexOf(v.shape)!==-1?(N=Kf.steps(v.shape),H=Kf.steps(v.shape.split("").reverse().join(""))):v.shape==="spline"?N=H=function(sr){var wr=sr[sr.length-1];return sr.length>1&&sr[0][0]===wr[0]&&sr[0][1]===wr[1]?Kf.smoothclosed(sr.slice(1),v.smoothing):Kf.smoothopen(sr,v.smoothing)}:N=H=function(sr){return"M"+sr.join("L")},te=function(sr){return H(sr.reverse())},ie=Fpt(n,{xaxis:f,yaxis:h,trace:d,connectGaps:d.connectgaps,baseTolerance:Math.max(v.width||1,3)/4,shape:v.shape,backoff:v.backoff,simplify:v.simplify,fill:d.fill}),me=new Array(ie.length);var Le=0;for(l=0;l<ie.length;l++){var Ae,Fe=ie[l];!Ae||!x?(Ae=Fe.slice(),me[Le]=Ae,Le++):Ae.push.apply(Ae,Fe)}d._fillElement=null,d._fillExclusionElement=O,d._fillsegments=me.slice(0,Le),me=d._fillsegments,ie.length&&(oe=ie[0][0].slice(),_e=ie[ie.length-1],Ee=_e[_e.length-1].slice()),Se=function(sr){return function(wr){if(V=N(wr),G=te(wr),Z?x?(Z+="L"+V.slice(1),j=G+("L"+j.slice(1))):(Z+="Z"+V,j=G+"Z"+j):(Z=V,j=G),oT.hasLines(d)){var cr=Fg.select(this);if(cr.datum(n),sr)c(cr.style("opacity",0).attr("d",V).call(Kf.lineGroupStyle)).style("opacity",1);else{var $e=c(cr);$e.attr("d",V),Kf.singleLineStyle(n,$e)}}}}}var Pe=p.selectAll(".js-line").data(ie);c(Pe.exit()).style("opacity",0).remove(),Pe.each(Se(!1)),Pe.enter().append("path").classed("js-line",!0).style("vector-effect",s?"none":"non-scaling-stroke").call(Kf.lineGroupStyle).each(Se(!0)),Kf.setClipUrl(Pe,r.layerClipId,e);function ge(sr){c(sr).attr("d","M0,0Z")}var Re=function(){var sr=new Array(me.length);for(l=0;l<me.length;l++)sr[l]=rI(me[l]);return sr},ce=function(sr){var wr,cr;if(!sr||sr.length===0)for(wr=new Array(me.length),cr=0;cr<me.length;cr++){var $e=me[cr][0].slice(),St=me[cr][me[cr].length-1].slice();$e[C]=St[C]=M;var Qt=[St,$e],Vt=Qt.concat(me[cr]);wr[cr]=rI(Vt)}else{for(wr=new Array(sr.length-1+me.length),cr=0;cr<sr.length-1;cr++)wr[cr]=rI(sr[cr]);var _t=sr[sr.length-1].slice();for(_t.reverse(),cr=0;cr<me.length;cr++)wr[sr.length-1+cr]=rI(me[cr].concat(_t))}return wr};ie.length?(S?(S.datum(n),oe&&Ee&&(x?(oe[C]=Ee[C]=M,c(S).attr("d","M"+Ee+"L"+oe+"L"+Z.slice(1)).call(Kf.singleFillStyle,e),Ce=ce(null)):(c(S).attr("d",Z+"Z").call(Kf.singleFillStyle,e),Ce=Re())),d._polygons=Ce,d._fillElement=S):L&&(d.fill.slice(0,6)==="tonext"&&Z&&g?(d.fill==="tonext"?(c(L).attr("d",Z+"Z"+g+"Z").call(Kf.singleFillStyle,e),Ce=Re(),d._polygons=Ce.concat(P)):(c(L).attr("d",Z+"L"+g.slice(1)+"Z").call(Kf.singleFillStyle,e),Ce=ce(z),d._polygons=Ce),d._fillElement=L):ge(L)),d._prevRevpath=j):(S?ge(S):L&&ge(L),d._prevRevpath=null),d._ownPolygons=Ce;function Ze(sr){return sr.filter(function(wr){return!wr.gap&&wr.vis})}function ut(sr){return sr.filter(function(wr){return wr.vis})}function pt(sr){return sr.filter(function(wr){return!wr.gap})}function Zt(sr){return sr.id}function st(sr){if(sr.ids)return Zt}function lt(){return!1}function Gt(sr,wr,cr){var $e,St,Qt,Vt=cr[0].trace,_t=oT.hasMarkers(Vt),It=oT.hasText(Vt),mt=st(Vt),er=lt,lr=lt;if(_t||It){var Tr=Eve,Lr=Vt.stackgroup,ti=Lr&&e._fullLayout._scatterStackOpts[f._id+h._id][Lr].stackgaps==="infer zero";Vt.marker.maxdisplayed||Vt._needsCull?Tr=ti?ut:Ze:Lr&&!ti&&(Tr=pt),_t&&(er=Tr),It&&(lr=Tr)}St=sr.selectAll("path.point"),$e=St.data(er,mt);var Br=$e.enter().append("path").classed("point",!0);u&&Br.call(Kf.pointStyle,Vt,e).call(Kf.translatePoints,f,h).style("opacity",0).transition().style("opacity",1),$e.order();var Vr;_t&&(Vr=Kf.makePointStyleFns(Vt)),$e.each(function(dt){var Ge=Fg.select(this),Je=c(Ge);Qt=Kf.translatePoint(dt,Je,f,h),Qt?(Kf.singlePointStyle(dt,Je,Vt,Vr,e),r.layerClipId&&Kf.hideOutsideRangePoint(dt,Je,f,h,Vt.xcalendar,Vt.ycalendar),Vt.customdata&&Ge.classed("plotly-customdata",dt.data!==null&&dt.data!==void 0)):Je.remove()}),u?$e.exit().transition().style("opacity",0).remove():$e.exit().remove(),St=wr.selectAll("g"),$e=St.data(lr,mt),$e.enter().append("g").classed("textpoint",!0).append("text"),$e.order(),$e.each(function(dt){var Ge=Fg.select(this),Je=c(Ge.select("text"));Qt=Kf.translatePoint(dt,Je,f,h),Qt?r.layerClipId&&Kf.hideOutsideRangePoint(dt,Ge,f,h,Vt.xcalendar,Vt.ycalendar):Ge.remove()}),$e.selectAll("text").call(Kf.textPointStyle,Vt,e).each(function(dt){var Ge=f.c2p(dt.x),Je=h.c2p(dt.y);Fg.select(this).selectAll("tspan.line").each(function(){c(Fg.select(this)).attr({x:Ge,y:Je})})}),$e.exit().remove()}k.datum(n),E.datum(n),Gt(k,E,n);var Nt=d.cliponaxis===!1,Jt=Nt?null:r.layerClipId;Kf.setClipUrl(k,Jt,e),Kf.setClipUrl(E,Jt,e)}function qpt(e,t,r,n,i){var a=r.xaxis,o=r.yaxis,s=Fg.extent(QM.simpleMap(a.range,a.r2c)),l=Fg.extent(QM.simpleMap(o.range,o.r2c)),u=n[0].trace;if(oT.hasMarkers(u)){var c=u.marker.maxdisplayed;if(c!==0){var f=n.filter(function(_){return _.x>=s[0]&&_.x<=s[1]&&_.y>=l[0]&&_.y<=l[1]}),h=Math.ceil(f.length/c),d=0;i.forEach(function(_,b){var p=_[0].trace;oT.hasMarkers(p)&&p.marker.maxdisplayed>0&&b<t&&d++});var v=Math.round(d*h/3+Math.floor(d/3)*h/7.1);n.forEach(function(_){delete _.vis}),f.forEach(function(_,b){Math.round((b+v)%h)===0&&(_.vis=!0)})}}}});var $d=ye((lsr,Cve)=>{"use strict";Cve.exports={container:"marker",min:"cmin",max:"cmax"}});var nI=ye((usr,Lve)=>{"use strict";var iI=ho();Lve.exports=function(t,r,n){var i={},a={_fullLayout:n},o=iI.getFromTrace(a,r,"x"),s=iI.getFromTrace(a,r,"y"),l=t.orig_x;l===void 0&&(l=t.x);var u=t.orig_y;return u===void 0&&(u=t.y),i.xLabel=iI.tickText(o,o.c2l(l),!0).text,i.yLabel=iI.tickText(s,s.c2l(u),!0).text,i}});var op=ye((csr,Pve)=>{"use strict";var xU=Oa(),lT=So(),Bpt=qa();function Npt(e){var t=xU.select(e).selectAll("g.trace.scatter");t.style("opacity",function(r){return r[0].trace.opacity}),t.selectAll("g.points").each(function(r){var n=xU.select(this),i=r.trace||r[0].trace;bU(n,i,e)}),t.selectAll("g.text").each(function(r){var n=xU.select(this),i=r.trace||r[0].trace;wU(n,i,e)}),t.selectAll("g.trace path.js-line").call(lT.lineGroupStyle),t.selectAll("g.trace path.js-fill").call(lT.fillGroupStyle,e,!1),Bpt.getComponentMethod("errorbars","style")(t)}function bU(e,t,r){lT.pointStyle(e.selectAll("path.point"),t,r)}function wU(e,t,r){lT.textPointStyle(e.selectAll("text"),t,r)}function Upt(e,t,r){var n=t[0].trace;n.selectedpoints?(lT.selectedPointStyle(r.selectAll("path.point"),n),lT.selectedTextStyle(r.selectAll("text"),n)):(bU(r,n,e),wU(r,n,e))}Pve.exports={style:Npt,stylePoints:bU,styleText:wU,styleOnSelect:Upt}});var cT=ye((fsr,Ive)=>{"use strict";var uT=ka(),Vpt=Ru();Ive.exports=function(t,r){var n,i;if(t.mode==="lines")return n=t.line.color,n&&uT.opacity(n)?n:t.fillcolor;if(t.mode==="none")return t.fill?t.fillcolor:"";var a=r.mcc||(t.marker||{}).color,o=r.mlcc||((t.marker||{}).line||{}).color;return i=a&&uT.opacity(a)?a:o&&uT.opacity(o)&&(r.mlw||((t.marker||{}).line||{}).width)?o:"",i?uT.opacity(i)<.3?uT.addOpacity(i,.3):i:(n=(t.line||{}).color,n&&uT.opacity(n)&&Vpt.hasLines(t)&&t.line.width?n:t.fillcolor)}});var fT=ye((hsr,Dve)=>{"use strict";var aI=Dr(),Rve=vf(),Gpt=qa(),Hpt=cT(),TU=ka(),jpt=aI.fillText;Dve.exports=function(t,r,n,i){var a=t.cd,o=a[0].trace,s=t.xa,l=t.ya,u=s.c2p(r),c=l.c2p(n),f=[u,c],h=o.hoveron||"",d=o.mode.indexOf("markers")!==-1?3:.5,v=!!o.xperiodalignment,_=!!o.yperiodalignment;if(h.indexOf("points")!==-1){var b=function(j){if(v){var N=s.c2p(j.xStart),H=s.c2p(j.xEnd);return u>=Math.min(N,H)&&u<=Math.max(N,H)?0:1/0}var te=Math.max(3,j.mrc||0),oe=1-1/te,_e=Math.abs(s.c2p(j.x)-u);return _e<te?oe*_e/te:_e-te+oe},p=function(j){if(_){var N=l.c2p(j.yStart),H=l.c2p(j.yEnd);return c>=Math.min(N,H)&&c<=Math.max(N,H)?0:1/0}var te=Math.max(3,j.mrc||0),oe=1-1/te,_e=Math.abs(l.c2p(j.y)-c);return _e<te?oe*_e/te:_e-te+oe},k=function(j){var N=Math.max(d,j.mrc||0),H=s.c2p(j.x)-u,te=l.c2p(j.y)-c;return Math.max(Math.sqrt(H*H+te*te)-N,1-d/N)},E=Rve.getDistanceFunction(i,b,p,k);if(Rve.getClosest(a,E,t),t.index!==!1){var S=a[t.index],L=s.c2p(S.x,!0),x=l.c2p(S.y,!0),C=S.mrc||1;t.index=S.i;var M=a[0].t.orientation,g=M&&(S.sNorm||S.s),P=M==="h"?g:S.orig_x!==void 0?S.orig_x:S.x,T=M==="v"?g:S.orig_y!==void 0?S.orig_y:S.y;return aI.extendFlat(t,{color:Hpt(o,S),x0:L-C,x1:L+C,xLabelVal:P,y0:x-C,y1:x+C,yLabelVal:T,spikeDistance:k(S),hovertemplate:o.hovertemplate}),jpt(S,o,t),Gpt.getComponentMethod("errorbars","hoverInfo")(S,o,t),[t]}}function z(j){if(!j)return!1;var N=j.node();try{var H=new DOMPoint(f[0],f[1]);return N.isPointInFill(H)}catch(oe){var te=N.ownerSVGElement.createSVGPoint();return te.x=f[0],te.y=f[1],N.isPointInFill(te)}}function O(j){var N,H=[],te=1/0,oe=-1/0,_e=1/0,Ee=-1/0,Ce;for(N=0;N<j.length;N++){var me=j[N];me.contains(f)&&(H.push(me),_e=Math.min(_e,me.ymin),Ee=Math.max(Ee,me.ymax))}if(H.length===0)return null;_e=Math.max(_e,0),Ee=Math.min(Ee,l._length),Ce=(_e+Ee)/2;var ie,Se,Le,Ae,Fe,Pe,ge;for(N=0;N<H.length;N++)for(Se=H[N].pts,ie=1;ie<Se.length;ie++)Pe=Se[ie-1][1],ge=Se[ie][1],Pe>Ce!=ge>=Ce&&(Ae=Se[ie-1][0],Fe=Se[ie][0],ge-Pe&&(Le=Ae+(Fe-Ae)*(Ce-Pe)/(ge-Pe),te=Math.min(te,Le),oe=Math.max(oe,Le)));return te=Math.max(te,0),oe=Math.min(oe,s._length),{x0:te,x1:oe,y0:Ce,y1:Ce}}if(h.indexOf("fills")!==-1&&o._fillElement){var V=z(o._fillElement)&&!z(o._fillExclusionElement);if(V){var G=O(o._polygons);G===null&&(G={x0:f[0],x1:f[0],y0:f[1],y1:f[1]});var Z=TU.defaultLine;return TU.opacity(o.fillcolor)?Z=o.fillcolor:TU.opacity((o.line||{}).color)&&(Z=o.line.color),aI.extendFlat(t,{distance:t.maxHoverDistance,x0:G.x0,x1:G.x1,y0:G.y0,y1:G.y1,color:Z,hovertemplate:!1}),delete t.index,o.text&&!aI.isArrayOrTypedArray(o.text)?t.text=String(o.text):t.text=o.name,[t]}}}});var hT=ye((dsr,zve)=>{"use strict";var Fve=Ru();zve.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s=n[0].trace,l,u,c,f,h=!Fve.hasMarkers(s)&&!Fve.hasText(s);if(h)return[];if(r===!1)for(l=0;l<n.length;l++)n[l].selected=0;else for(l=0;l<n.length;l++)u=n[l],c=i.c2p(u.x),f=a.c2p(u.y),u.i!==null&&r.contains([c,f],!1,l,t)?(o.push({pointNumber:u.i,x:i.c2d(u.x),y:a.c2d(u.y)}),u.selected=1):u.selected=0;return o}});var qve=ye((vsr,Ove)=>{"use strict";Ove.exports={xaxis:{valType:"subplotid",dflt:"x",editType:"calc+clearAxisTypes"},yaxis:{valType:"subplotid",dflt:"y",editType:"calc+clearAxisTypes"}}});var SU=ye((psr,Uve)=>{"use strict";var e4=qa().traceIs,AU=R3();Uve.exports=function(t,r,n,i){n("autotypenumbers",i.autotypenumbersDflt);var a=n("type",(i.splomStash||{}).type);a==="-"&&(Wpt(r,i.data),r.type==="-"?r.type="linear":t.type=r.type)};function Wpt(e,t){if(e.type==="-"){var r=e._id,n=r.charAt(0),i;r.indexOf("scene")!==-1&&(r=n);var a=Xpt(t,r,n);if(a){if(a.type==="histogram"&&n==={v:"y",h:"x"}[a.orientation||"v"]){e.type="linear";return}var o=n+"calendar",s=a[o],l={noMultiCategory:!e4(a,"cartesian")||e4(a,"noMultiCategory")};if(a.type==="box"&&a._hasPreCompStats&&n==={h:"x",v:"y"}[a.orientation||"v"]&&(l.noMultiCategory=!0),l.autotypenumbers=e.autotypenumbers,Nve(a,n)){var u=Bve(a),c=[];for(i=0;i<t.length;i++){var f=t[i];!e4(f,"box-violin")||(f[n+"axis"]||n)!==r||(f[u]!==void 0?c.push(f[u][0]):f.name!==void 0?c.push(f.name):c.push("text"),f[o]!==s&&(s=void 0))}e.type=AU(c,s,l)}else if(a.type==="splom"){var h=a.dimensions,d=h[a._axesDim[r]];d.visible&&(e.type=AU(d.values,s,l))}else e.type=AU(a[n]||[a[n+"0"]],s,l)}}}function Xpt(e,t,r){for(var n=0;n<e.length;n++){var i=e[n];if(i.type==="splom"&&i._length>0&&(i["_"+r+"axes"]||{})[t])return i;if((i[r+"axis"]||r)===t){if(Nve(i,r))return i;if((i[r]||[]).length||i[r+"0"])return i}}}function Bve(e){return{v:"x",h:"y"}[e.orientation||"v"]}function Nve(e,t){var r=Bve(e),n=e4(e,"box-violin"),i=e4(e._fullInput||{},"candlestick");return n&&!i&&t===r&&e[r]===void 0&&e[r+"0"]===void 0}});var oI=ye((gsr,Vve)=>{"use strict";var Zpt=vv().isTypedArraySpec;function Ypt(e,t){var r=t.dataAttr||e._id.charAt(0),n={},i,a,o;if(t.axData)i=t.axData;else for(i=[],a=0;a<t.data.length;a++){var s=t.data[a];s[r+"axis"]===e._id&&i.push(s)}for(a=0;a<i.length;a++){var l=i[a][r];for(o=0;o<l.length;o++){var u=l[o];u!=null&&(n[u]=1)}}return Object.keys(n)}Vve.exports=function(t,r,n,i){if(r.type==="category"){var a=t.categoryarray,o=Array.isArray(a)&&a.length>0||Zpt(a),s;o&&(s="array");var l=n("categoryorder",s),u;l==="array"&&(u=n("categoryarray")),!o&&l==="array"&&(l=r.categoryorder="trace"),l==="trace"?r._initialCategories=[]:l==="array"?r._initialCategories=u.slice():(u=Ypt(r,i).sort(),l==="category ascending"?r._initialCategories=u:l==="category descending"&&(r._initialCategories=u.reverse()))}}});var t4=ye((msr,Hve)=>{"use strict";var Gve=cd().mix,Kpt=Lh(),Jpt=Dr();Hve.exports=function(t,r,n,i){i=i||{};var a=i.dfltColor;function o(M,g){return Jpt.coerce2(t,r,i.attributes,M,g)}var s=o("linecolor",a),l=o("linewidth"),u=n("showline",i.showLine||!!s||!!l);u||(delete r.linecolor,delete r.linewidth);var c=Gve(a,i.bgColor,i.blend||Kpt.lightFraction).toRgbString(),f=o("gridcolor",c),h=o("gridwidth"),d=o("griddash"),v=n("showgrid",i.showGrid||!!f||!!h||!!d);if(v||(delete r.gridcolor,delete r.gridwidth,delete r.griddash),i.hasMinor){var _=Gve(r.gridcolor,i.bgColor,67).toRgbString(),b=o("minor.gridcolor",_),p=o("minor.gridwidth",r.gridwidth||1),k=o("minor.griddash",r.griddash||"solid"),E=n("minor.showgrid",!!b||!!p||!!k);E||(delete r.minor.gridcolor,delete r.minor.gridwidth,delete r.minor.griddash)}if(!i.noZeroLine){var S=o("zerolinelayer"),L=o("zerolinecolor",a),x=o("zerolinewidth"),C=n("zeroline",i.showGrid||!!L||!!x);C||(delete r.zerolinelayer,delete r.zerolinecolor,delete r.zerolinewidth)}}});var i4=ye((ysr,Kve)=>{"use strict";var jve=Eo(),$pt=qa(),r4=Dr(),Qpt=vl(),e0t=Yd(),MU=Rd(),Wve=bb(),Xve=M3(),t0t=e_(),r0t=t_(),i0t=oI(),n0t=t4(),a0t=xB(),Zve=ym(),sI=hd().WEEKDAY_PATTERN,o0t=hd().HOUR_PATTERN;Kve.exports=function(t,r,n,i,a){var o=i.letter,s=i.font||{},l=i.splomStash||{},u=n("visible",!i.visibleDflt),c=r._template||{},f=r.type||c.type||"-",h;if(f==="date"){var d=$pt.getComponentMethod("calendars","handleDefaults");d(t,r,"calendar",i.calendar),i.noTicklabelmode||(h=n("ticklabelmode"))}!i.noTicklabelindex&&(f==="date"||f==="linear")&&n("ticklabelindex");var v="";(!i.noTicklabelposition||f==="multicategory")&&(v=r4.coerce(t,r,{ticklabelposition:{valType:"enumerated",dflt:"outside",values:h==="period"?["outside","inside"]:o==="x"?["outside","inside","outside left","inside left","outside right","inside right"]:["outside","inside","outside top","inside top","outside bottom","inside bottom"]}},"ticklabelposition")),i.noTicklabeloverflow||n("ticklabeloverflow",v.indexOf("inside")!==-1?"hide past domain":f==="category"||f==="multicategory"?"allow":"hide past div"),Zve(r,a),a0t(t,r,n,i),i0t(t,r,n,i),i.noHover||(f!=="category"&&n("hoverformat"),i.noUnifiedhovertitle||n("unifiedhovertitle.text"));var _=n("color"),b=_!==MU.color.dflt?_:s.color,p=l.label||a._dfltTitle[o];if(r0t(t,r,n,f,i),!u)return r;n("title.text",p),r4.coerceFont(n,"title.font",s,{overrideDflt:{size:r4.bigFont(s.size),color:b}}),Wve(t,r,n,f);var k=i.hasMinor;if(k&&(Qpt.newContainer(r,"minor"),Wve(t,r,n,f,{isMinor:!0})),t0t(t,r,n,f,i),Xve(t,r,n,i),k){var E=i.isMinor;i.isMinor=!0,Xve(t,r,n,i),i.isMinor=E}n0t(t,r,n,{dfltColor:_,bgColor:i.bgColor,showGrid:i.showGrid,hasMinor:k,attributes:MU}),k&&!r.minor.ticks&&!r.minor.showgrid&&delete r.minor,(r.showline||r.ticks)&&n("mirror");var S=f==="multicategory";if(!i.noTickson&&(f==="category"||S)&&(r.ticks||r.showgrid)&&(S?(n("tickson","boundaries"),delete r.ticklabelposition):n("tickson")),S){var L=n("showdividers");L&&(n("dividercolor"),n("dividerwidth"))}if(f==="date")if(e0t(t,r,{name:"rangebreaks",inclusionAttr:"enabled",handleItemDefaults:s0t}),!r.rangebreaks.length)delete r.rangebreaks;else{for(var x=0;x<r.rangebreaks.length;x++)if(r.rangebreaks[x].pattern===sI){r._hasDayOfWeekBreaks=!0;break}if(Zve(r,a),a._has("scattergl")||a._has("splom"))for(var C=0;C<i.data.length;C++){var M=i.data[C];(M.type==="scattergl"||M.type==="splom")&&(M.visible=!1,r4.warn(M.type+" traces do not work on axes with rangebreaks. Setting trace "+M.index+" to `visible: false`."))}}return r};function s0t(e,t,r){function n(h,d){return r4.coerce(e,t,MU.rangebreaks,h,d)}var i=n("enabled");if(i){var a=n("bounds");if(a&&a.length>=2){var o="",s,l;if(a.length===2){for(s=0;s<2;s++)if(l=Yve(a[s]),l){o=sI;break}}var u=n("pattern",o);if(u===sI)for(s=0;s<2;s++)l=Yve(a[s]),l&&(t.bounds[s]=a[s]=l-1);if(u)for(s=0;s<2;s++)switch(l=a[s],u){case sI:if(!jve(l)){t.enabled=!1;return}if(l=+l,l!==Math.floor(l)||l<0||l>=7){t.enabled=!1;return}t.bounds[s]=a[s]=l;break;case o0t:if(!jve(l)){t.enabled=!1;return}if(l=+l,l<0||l>24){t.enabled=!1;return}t.bounds[s]=a[s]=l;break}if(r.autorange===!1){var c=r.range;if(c[0]<c[1]){if(a[0]<c[0]&&a[1]>c[1]){t.enabled=!1;return}}else if(a[0]>c[0]&&a[1]<c[1]){t.enabled=!1;return}}}else{var f=n("values");if(f&&f.length)n("dvalue");else{t.enabled=!1;return}}}}var l0t={sun:1,mon:2,tue:3,wed:4,thu:5,fri:6,sat:7};function Yve(e){if(typeof e=="string")return l0t[e.slice(0,3).toLowerCase()]}});var uI=ye((_sr,Jve)=>{"use strict";var u0t=Eo(),lI=Dr();Jve.exports=function(t,r,n,i){var a=i.counterAxes||[],o=i.overlayableAxes||[],s=i.letter,l=i.grid,u=i.overlayingDomain,c,f,h,d,v,_;l&&(f=l._domains[s][l._axisMap[r._id]],c=l._anchors[r._id],f&&(h=l[s+"side"].split(" ")[0],d=l.domain[s][h==="right"||h==="top"?1:0])),f=f||[0,1],c=c||(u0t(t.position)?"free":a[0]||"free"),h=h||(s==="x"?"bottom":"left"),d=d||0,v=0,_=!1;var b=lI.coerce(t,r,{anchor:{valType:"enumerated",values:["free"].concat(a),dflt:c}},"anchor"),p=lI.coerce(t,r,{side:{valType:"enumerated",values:s==="x"?["bottom","top"]:["left","right"],dflt:h}},"side");if(b==="free"){if(s==="y"){var k=n("autoshift");k&&(d=p==="left"?u[0]:u[1],_=r.automargin?r.automargin:!0,v=p==="left"?-3:3),n("shift",v)}n("position",d)}n("automargin",_);var E=!1;if(o.length&&(E=lI.coerce(t,r,{overlaying:{valType:"enumerated",values:[!1].concat(o),dflt:!1}},"overlaying")),!E){var S=n("domain",f);S[0]>S[1]-1/4096&&(r.domain=f),lI.noneOrAll(t.domain,r.domain,f),r.tickmode==="sync"&&(r.tickmode="auto")}return n("layer"),r}});var ope=ye((xsr,ape)=>{"use strict";var Wb=Dr(),$ve=ka(),c0t=ip().isUnifiedHover,f0t=HB(),Qve=vl(),h0t=c3(),epe=Rd(),d0t=SU(),tpe=i4(),v0t=Nb(),rpe=uI(),kU=hf(),Cm=kU.id2name,ipe=kU.name2id,p0t=hd().AX_ID_PATTERN,npe=qa(),cI=npe.traceIs,EU=npe.getComponentMethod;function fI(e,t,r){Array.isArray(e[t])?e[t].push(r):e[t]=[r]}ape.exports=function(t,r,n){var i=r.autotypenumbers,a={},o={},s={},l={},u={},c={},f={},h={},d={},v={},_,b;for(_=0;_<n.length;_++){var p=n[_];if(cI(p,"cartesian")){var k;if(p.xaxis)k=Cm(p.xaxis),fI(a,k,p);else if(p.xaxes)for(b=0;b<p.xaxes.length;b++)fI(a,Cm(p.xaxes[b]),p);var E;if(p.yaxis)E=Cm(p.yaxis),fI(a,E,p);else if(p.yaxes)for(b=0;b<p.yaxes.length;b++)fI(a,Cm(p.yaxes[b]),p);if(p.type==="funnel"?p.orientation==="h"?(k&&(o[k]=!0),E&&(f[E]=!0)):E&&(s[E]=!0):p.type==="image"?(E&&(h[E]=!0),k&&(h[k]=!0)):(E&&(u[E]=!0,c[E]=!0),(!cI(p,"carpet")||p.type==="carpet"&&!p._cheater)&&k&&(l[k]=!0)),p.type==="carpet"&&p._cheater&&k&&(o[k]=!0),cI(p,"2dMap")&&(d[k]=!0,d[E]=!0),cI(p,"oriented")){var S=p.orientation==="h"?E:k;v[S]=!0}}}var L=r._subplots,x=L.xaxis,C=L.yaxis,M=Wb.simpleMap(x,Cm),g=Wb.simpleMap(C,Cm),P=M.concat(g),T=$ve.background;x.length&&C.length&&(T=Wb.coerce(t,r,h0t,"plot_bgcolor"));var z=$ve.combine(T,r.paper_bgcolor),O,V,G,Z,j;function N(){var St=a[O]||[];j._traceIndices=St.map(function(Qt){return Qt.index}),j._annIndices=[],j._shapeIndices=[],j._selectionIndices=[],j._imgIndices=[],j._subplotsWith=[],j._counterAxes=[],j._name=j._attr=O,j._id=V}function H(St,Qt){return Wb.coerce(Z,j,epe,St,Qt)}function te(St,Qt){return Wb.coerce2(Z,j,epe,St,Qt)}function oe(St){return St==="x"?C:x}function _e(St,Qt){for(var Vt=St==="x"?M:g,_t=[],It=0;It<Vt.length;It++){var mt=Vt[It];mt!==Qt&&!(t[mt]||{}).overlaying&&_t.push(ipe(mt))}return _t}var Ee={x:oe("x"),y:oe("y")},Ce=Ee.x.concat(Ee.y),me={},ie=[];function Se(){var St=Z.matches;p0t.test(St)&&Ce.indexOf(St)===-1&&(me[St]=Z.type,ie=Object.keys(me))}var Le=f0t(t,r),Ae=c0t(Le);for(_=0;_<P.length;_++){O=P[_],V=ipe(O),G=O.charAt(0),Wb.isPlainObject(t[O])||(t[O]={}),Z=t[O],j=Qve.newContainer(r,O,G+"axis"),N();var Fe=G==="x"&&!l[O]&&o[O]||G==="y"&&!u[O]&&s[O],Pe=G==="y"&&(!c[O]&&f[O]||h[O]),ge={hasMinor:!0,letter:G,font:r.font,outerTicks:d[O],showGrid:!v[O],data:a[O]||[],bgColor:z,calendar:r.calendar,automargin:!0,visibleDflt:Fe,reverseDflt:Pe,autotypenumbersDflt:i,splomStash:((r._splomAxes||{})[G]||{})[V],noAutotickangles:G==="y"};H("uirevision",r.uirevision),d0t(Z,j,H,ge),tpe(Z,j,H,ge,r);var Re=Ae&&G===Le.charAt(0),ce=te("spikecolor",Ae?j.color:void 0),Ze=te("spikethickness",Ae?1.5:void 0),ut=te("spikedash",Ae?"dot":void 0),pt=te("spikemode",Ae?"across":void 0),Zt=te("spikesnap"),st=H("showspikes",!!Re||!!ce||!!Ze||!!ut||!!pt||!!Zt);st||(delete j.spikecolor,delete j.spikethickness,delete j.spikedash,delete j.spikemode,delete j.spikesnap);var lt=Cm(Z.overlaying),Gt=[0,1];if(r[lt]!==void 0){var Nt=Cm(r[lt].anchor);r[Nt]!==void 0&&(Gt=r[Nt].domain)}rpe(Z,j,H,{letter:G,counterAxes:Ee[G],overlayableAxes:_e(G,O),grid:r.grid,overlayingDomain:Gt}),H("title.standoff"),Se(),j._input=Z}for(_=0;_<ie.length;){V=ie[_++],O=Cm(V),G=O.charAt(0),Wb.isPlainObject(t[O])||(t[O]={}),Z=t[O],j=Qve.newContainer(r,O,G+"axis"),N();var Jt={letter:G,font:r.font,outerTicks:d[O],showGrid:!v[O],data:[],bgColor:z,calendar:r.calendar,automargin:!0,visibleDflt:!1,reverseDflt:!1,autotypenumbersDflt:i,splomStash:((r._splomAxes||{})[G]||{})[V]};H("uirevision",r.uirevision),j.type=me[V]||"linear",tpe(Z,j,H,Jt,r),rpe(Z,j,H,{letter:G,counterAxes:Ee[G],overlayableAxes:_e(G,O),grid:r.grid}),H("fixedrange"),H("modebardisable"),Se(),j._input=Z}var sr=EU("rangeslider","handleDefaults"),wr=EU("rangeselector","handleDefaults");for(_=0;_<M.length;_++)O=M[_],Z=t[O],j=r[O],sr(t,r,O),j.type==="date"&&wr(Z,j,r,g,j.calendar),H("fixedrange"),H("modebardisable");for(_=0;_<g.length;_++){O=g[_],Z=t[O],j=r[O];var cr=r[Cm(j.anchor)],$e=EU("rangeslider","isVisible")(cr);H("fixedrange",$e),H("modebardisable")}v0t.handleDefaults(t,r,{axIds:Ce.concat(ie).sort(kU.idSort),axHasImage:h})}});var upe=ye((bsr,lpe)=>{"use strict";var g0t=Oa(),spe=qa(),hI=Dr(),Qp=So(),dI=ho();lpe.exports=function(t,r,n,i){var a=t._fullLayout;if(r.length===0){dI.redrawComponents(t);return}function o(b){var p=b.xaxis,k=b.yaxis;a._defs.select("#"+b.clipId+"> rect").call(Qp.setTranslate,0,0).call(Qp.setScale,1,1),b.plot.call(Qp.setTranslate,p._offset,k._offset).call(Qp.setScale,1,1);var E=b.plot.selectAll(".scatterlayer .trace");E.selectAll(".point").call(Qp.setPointGroupScale,1,1),E.selectAll(".textpoint").call(Qp.setTextPointsScale,1,1),E.call(Qp.hideOutsideRangePoints,b)}function s(b,p){var k=b.plotinfo,E=k.xaxis,S=k.yaxis,L=E._length,x=S._length,C=!!b.xr1,M=!!b.yr1,g=[];if(C){var P=hI.simpleMap(b.xr0,E.r2l),T=hI.simpleMap(b.xr1,E.r2l),z=P[1]-P[0],O=T[1]-T[0];g[0]=(P[0]*(1-p)+p*T[0]-P[0])/(P[1]-P[0])*L,g[2]=L*(1-p+p*O/z),E.range[0]=E.l2r(P[0]*(1-p)+p*T[0]),E.range[1]=E.l2r(P[1]*(1-p)+p*T[1])}else g[0]=0,g[2]=L;if(M){var V=hI.simpleMap(b.yr0,S.r2l),G=hI.simpleMap(b.yr1,S.r2l),Z=V[1]-V[0],j=G[1]-G[0];g[1]=(V[1]*(1-p)+p*G[1]-V[1])/(V[0]-V[1])*x,g[3]=x*(1-p+p*j/Z),S.range[0]=E.l2r(V[0]*(1-p)+p*G[0]),S.range[1]=S.l2r(V[1]*(1-p)+p*G[1])}else g[1]=0,g[3]=x;dI.drawOne(t,E,{skipTitle:!0}),dI.drawOne(t,S,{skipTitle:!0}),dI.redrawComponents(t,[E._id,S._id]);var N=C?L/g[2]:1,H=M?x/g[3]:1,te=C?g[0]:0,oe=M?g[1]:0,_e=C?g[0]/g[2]*L:0,Ee=M?g[1]/g[3]*x:0,Ce=E._offset-_e,me=S._offset-Ee;k.clipRect.call(Qp.setTranslate,te,oe).call(Qp.setScale,1/N,1/H),k.plot.call(Qp.setTranslate,Ce,me).call(Qp.setScale,N,H),Qp.setPointGroupScale(k.zoomScalePts,1/N,1/H),Qp.setTextPointsScale(k.zoomScaleTxt,1/N,1/H)}var l;i&&(l=i());function u(){for(var b={},p=0;p<r.length;p++){var k=r[p],E=k.plotinfo.xaxis,S=k.plotinfo.yaxis;k.xr1&&(b[E._name+".range"]=k.xr1.slice()),k.yr1&&(b[S._name+".range"]=k.yr1.slice())}return l&&l(),spe.call("relayout",t,b).then(function(){for(var L=0;L<r.length;L++)o(r[L].plotinfo)})}function c(){for(var b={},p=0;p<r.length;p++){var k=r[p],E=k.plotinfo.xaxis,S=k.plotinfo.yaxis;k.xr0&&(b[E._name+".range"]=k.xr0.slice()),k.yr0&&(b[S._name+".range"]=k.yr0.slice())}return spe.call("relayout",t,b).then(function(){for(var L=0;L<r.length;L++)o(r[L].plotinfo)})}var f,h,d,v=g0t.ease(n.easing);t._transitionData._interruptCallbacks.push(function(){return window.cancelAnimationFrame(d),d=null,c()});function _(){h=Date.now();for(var b=Math.min(1,(h-f)/n.duration),p=v(b),k=0;k<r.length;k++)s(r[k],p);h-f>n.duration?(u(),d=window.cancelAnimationFrame(_)):d=window.requestAnimationFrame(_)}return f=Date.now(),d=window.requestAnimationFrame(_),Promise.resolve()}});var ph=ye(xv=>{"use strict";var pI=Oa(),cpe=qa(),Xb=Dr(),m0t=Mc(),y0t=So(),fpe=Id().getModuleCalcData,g_=hf(),zg=hd(),_0t=Wp(),iu=Xb.ensureSingle;function vI(e,t,r){return Xb.ensureSingle(e,t,r,function(n){n.datum(r)})}var Zb=zg.zindexSeparator;xv.name="cartesian";xv.attr=["xaxis","yaxis"];xv.idRoot=["x","y"];xv.idRegex=zg.idRegex;xv.attrRegex=zg.attrRegex;xv.attributes=qve();xv.layoutAttributes=Rd();xv.supplyLayoutDefaults=ope();xv.transitionAxes=upe();xv.finalizeSubplots=function(e,t){var r=t._subplots,n=r.xaxis,i=r.yaxis,a=r.cartesian,o=a,s={},l={},u,c,f;for(u=0;u<o.length;u++){var h=o[u].split("y");s[h[0]]=1,l["y"+h[1]]=1}for(u=0;u<n.length;u++)c=n[u],s[c]||(f=(e[g_.id2name(c)]||{}).anchor,zg.idRegex.y.test(f)||(f="y"),a.push(c+f),o.push(c+f),l[f]||(l[f]=1,Xb.pushUnique(i,f)));for(u=0;u<i.length;u++)f=i[u],l[f]||(c=(e[g_.id2name(f)]||{}).anchor,zg.idRegex.x.test(c)||(c="x"),a.push(c+f),o.push(c+f),s[c]||(s[c]=1,Xb.pushUnique(n,c)));if(!o.length){c="",f="";for(var d in e)if(zg.attrRegex.test(d)){var v=d.charAt(0);v==="x"?(!c||+d.slice(5)<+c.slice(5))&&(c=d):(!f||+d.slice(5)<+f.slice(5))&&(f=d)}c=c?g_.name2id(c):"x",f=f?g_.name2id(f):"y",n.push(c),i.push(f),a.push(c+f)}};xv.plot=function(e,t,r,n){var i=e._fullLayout,a=i._subplots.cartesian,o=e.calcdata,s;if(!Array.isArray(t))for(t=[],s=0;s<o.length;s++)t.push(s);for(var l=i._zindices,u=0;u<l.length;u++){var c=l[u];for(s=0;s<a.length;s++){var f=a[s],h=i._plots[f];if(u>0){var d=h.id;if(d.indexOf(Zb)!==-1)continue;d+=Zb+(u+1),h=Xb.extendFlat({},h,{id:d,plot:i._cartesianlayer.selectAll(".subplot").select("."+d)})}for(var v=[],_,b=0;b<o.length;b++){var p=o[b],k=p[0].trace;c===(k.zorder||0)&&k.xaxis+k.yaxis===f&&((t.indexOf(k.index)!==-1||k.carpet)&&(_&&_[0].trace.xaxis+_[0].trace.yaxis===f&&["tonextx","tonexty","tonext"].indexOf(k.fill)!==-1&&v.indexOf(_)===-1&&v.push(_),v.push(p)),_=p)}hpe(e,h,v,r,n)}}};function hpe(e,t,r,n,i){for(var a=zg.traceLayerClasses,o=e._fullLayout,s=o._zindices,l=o._modules,u,c,f,h=[],d=[],v=0;v<s.length;v++)for(var _=s[v],b=0;b<l.length;b++){u=l[b];var p=u.name,k=cpe.modules[p].categories;if(k.svg){var E=u.layerName||p+"layer",S=E+(v?Number(v)+1:""),L=u.plot;c=fpe(r,L,_),f=c[0],r=c[1],f.length&&h.push({i:a.indexOf(E),zindex:v,className:S,plotMethod:L,cdModule:f}),k.zoomScale&&d.push("."+S)}}h.sort(function(M,g){return(M.zindex||0)-(g.zindex||0)||M.i-g.i});var x=t.plot.selectAll("g.mlayer").data(h,function(M){return M.className});if(x.enter().append("g").attr("class",function(M){return M.className}).classed("mlayer",!0).classed("rangeplot",t.isRangePlot),x.exit().remove(),x.order(),x.each(function(M){var g=pI.select(this),P=M.className;M.plotMethod(e,t,M.cdModule,g,n,i),zg.clipOnAxisFalseQuery.indexOf("."+P)===-1&&y0t.setClipUrl(g,t.layerClipId,e)}),o._has("scattergl")&&(u=cpe.getModule("scattergl"),f=fpe(r,u)[0],u.plot(e,t,f)),!e._context.staticPlot&&(t._hasClipOnAxisFalse&&(t.clipOnAxisFalseTraces=t.plot.selectAll(zg.clipOnAxisFalseQuery.join(",")).selectAll(".trace")),d.length)){var C=t.plot.selectAll(d.join(",")).selectAll(".trace");t.zoomScalePts=C.selectAll("path.point"),t.zoomScaleTxt=C.selectAll(".textpoint")}}xv.clean=function(e,t,r,n){var i=n._plots||{},a=t._plots||{},o=n._subplots||{},s,l,u;if(n._hasOnlyLargeSploms&&!t._hasOnlyLargeSploms)for(u in i)s=i[u],s.plotgroup&&s.plotgroup.remove();var c=n._has&&n._has("gl"),f=t._has&&t._has("gl");if(c&&!f)for(u in i)s=i[u],s._scene&&s._scene.destroy();if(o.xaxis&&o.yaxis){var h=g_.listIds({_fullLayout:n});for(l=0;l<h.length;l++){var d=h[l];t[g_.id2name(d)]||n._infolayer.selectAll(".g-"+d+"title").remove()}}var v=n._has&&n._has("cartesian"),_=t._has&&t._has("cartesian");if(v&&!_)vpe(n._cartesianlayer.selectAll(".subplot"),n),n._defs.selectAll(".axesclip").remove(),delete n._axisConstraintGroups,delete n._axisMatchGroups;else if(o.cartesian)for(l=0;l<o.cartesian.length;l++){var b=o.cartesian[l];if(b.indexOf(Zb)===-1&&!a[b]){var p="."+b+",."+b+"-x,."+b+"-y";n._cartesianlayer.selectAll(p).remove(),ppe(b,n)}}};xv.drawFramework=function(e){var t=e._fullLayout,r=e.calcdata,n,i={};for(n=0;n<r.length;n++){var a=r[n][0],o=a.trace,s=o.zorder||0;i[s]||(i[s]=[]),i[s].push(a)}var l=Object.keys(i).map(Number).sort(Xb.sorterAsc);l.length||(l=[0]),t._zindices=l;var u=x0t(e),c=u.length,f=[];for(n=0;n<c;n++)f[n]=u[n].slice();for(var h=1;h<l.length;h++){var d=[];for(n=0;n<c;n++)d[n]=u[n].slice(),d[n][0]+=Zb+(h+1);f=f.concat(d)}var v=t._cartesianlayer.selectAll(".subplot").data(f,String);v.enter().append("g").attr("class",function(_){return"subplot "+_[0]}),v.order(),v.exit().call(vpe,t),v.each(function(_){var b=_[0],p=b.indexOf(Zb),k=p!==-1,E=k?b.slice(0,p):b,S=t._plots[b];S||(S=Xb.extendFlat({},t._plots[E]),S&&(S.id=b,t._plots[b]=S,t._subplots.cartesian.push(b))),S&&(S.plotgroup=pI.select(this),dpe(e,S),k||(S.draglayer=iu(t._draggers,"g",b)))})};xv.rangePlot=function(e,t,r){dpe(e,t),hpe(e,t,r),m0t.style(e)};function x0t(e){var t=e._fullLayout,r=t._zindices.length,n=t._subplots.cartesian,i=n.length,a,o,s,l,u,c,f=[],h=[];for(a=0;a<i;a++){s=n[a],l=t._plots[s],u=l.xaxis,c=l.yaxis;var d=u._mainAxis,v=c._mainAxis,_=d._id+v._id,b=t._plots[_];l.overlays=[],_!==s&&b?(l.mainplot=_,l.mainplotinfo=b,h.push(s)):(l.mainplot=void 0,l.mainplotinfo=void 0,f.push(s))}for(a=0;a<h.length;a++)s=h[a],l=t._plots[s],l.mainplotinfo.overlays.push(l);var p=f.concat(h),k=[];for(a=0;a<i;a++){s=p[a],l=t._plots[s],u=l.xaxis,c=l.yaxis;for(var E=[],S=1;S<=r;S++){var L="";for(S>1&&(L+=Zb+S),E.push(s+L),o=0;o<l.overlays.length;o++)E.push(l.overlays[o].id+L)}E=E.concat([u.layer,c.layer,u.overlaying||"",c.overlaying||""]),k.push(E)}return k}function dpe(e,t){var r=e._fullLayout,n=t.plotgroup,i=t.id,a=i.indexOf(Zb),o=a!==-1,s=zg.layerValue2layerClass[t.xaxis.layer],l=zg.layerValue2layerClass[t.yaxis.layer],u=r._hasOnlyLargeSploms,c=r._zindices.length>1,f=t.mainplotinfo;if(!t.mainplot||c)if(u)t.xlines=iu(n,"path","xlines-above"),t.ylines=iu(n,"path","ylines-above"),t.xaxislayer=iu(n,"g","xaxislayer-above"),t.yaxislayer=iu(n,"g","yaxislayer-above");else{if(!o){var h=iu(n,"g","layer-subplot");t.shapelayer=iu(h,"g","shapelayer"),t.imagelayer=iu(h,"g","imagelayer"),f&&c?(t.minorGridlayer=f.minorGridlayer,t.gridlayer=f.gridlayer,t.zerolinelayer=f.zerolinelayer):(t.minorGridlayer=iu(n,"g","minor-gridlayer"),t.gridlayer=iu(n,"g","gridlayer"),t.zerolinelayer=iu(n,"g","zerolinelayer"));var d=iu(n,"g","layer-between");t.shapelayerBetween=iu(d,"g","shapelayer"),t.imagelayerBetween=iu(d,"g","imagelayer"),iu(n,"path","xlines-below"),iu(n,"path","ylines-below"),t.overlinesBelow=iu(n,"g","overlines-below"),iu(n,"g","xaxislayer-below"),iu(n,"g","yaxislayer-below"),t.overaxesBelow=iu(n,"g","overaxes-below")}t.overplot=iu(n,"g","overplot"),t.plot=iu(t.overplot,"g",i),f&&c?t.zerolinelayerAbove=f.zerolinelayerAbove:t.zerolinelayerAbove=iu(n,"g","zerolinelayer-above"),o||(t.xlines=iu(n,"path","xlines-above"),t.ylines=iu(n,"path","ylines-above"),t.overlinesAbove=iu(n,"g","overlines-above"),iu(n,"g","xaxislayer-above"),iu(n,"g","yaxislayer-above"),t.overaxesAbove=iu(n,"g","overaxes-above"),t.xlines=n.select(".xlines-"+s),t.ylines=n.select(".ylines-"+l),t.xaxislayer=n.select(".xaxislayer-"+s),t.yaxislayer=n.select(".yaxislayer-"+l))}else{var v=f.plotgroup,_=i+"-x",b=i+"-y";t.minorGridlayer=f.minorGridlayer,t.gridlayer=f.gridlayer,t.zerolinelayer=f.zerolinelayer,t.zerolinelayerAbove=f.zerolinelayerAbove,iu(f.overlinesBelow,"path",_),iu(f.overlinesBelow,"path",b),iu(f.overaxesBelow,"g",_),iu(f.overaxesBelow,"g",b),t.plot=iu(f.overplot,"g",i),iu(f.overlinesAbove,"path",_),iu(f.overlinesAbove,"path",b),iu(f.overaxesAbove,"g",_),iu(f.overaxesAbove,"g",b),t.xlines=v.select(".overlines-"+s).select("."+_),t.ylines=v.select(".overlines-"+l).select("."+b),t.xaxislayer=v.select(".overaxes-"+s).select("."+_),t.yaxislayer=v.select(".overaxes-"+l).select("."+b)}o||(u||(vI(t.minorGridlayer,"g",t.xaxis._id),vI(t.minorGridlayer,"g",t.yaxis._id),t.minorGridlayer.selectAll("g").map(function(p){return p[0]}).sort(g_.idSort),vI(t.gridlayer,"g",t.xaxis._id),vI(t.gridlayer,"g",t.yaxis._id),t.gridlayer.selectAll("g").map(function(p){return p[0]}).sort(g_.idSort)),t.xlines.style("fill","none").classed("crisp",!0),t.ylines.style("fill","none").classed("crisp",!0))}function vpe(e,t){if(e){var r={};e.each(function(l){var u=l[0],c=pI.select(this);c.remove(),ppe(u,t),r[u]=!0});for(var n in t._plots)for(var i=t._plots[n],a=i.overlays||[],o=0;o<a.length;o++){var s=a[o];r[s.id]&&s.plot.selectAll(".trace").remove()}}}function ppe(e,t){t._draggers.selectAll("g."+e).remove(),t._defs.select("#clip"+t._uid+e+"plot").remove()}xv.toSVG=function(e){var t=e._fullLayout._glimages,r=pI.select(e).selectAll(".svg-container"),n=r.filter(function(a,o){return o===r.size()-1}).selectAll(".gl-canvas-context, .gl-canvas-focus");function i(){var a=this,o=a.toDataURL("image/png"),s=t.append("svg:image");s.attr({xmlns:_0t.svg,"xlink:href":o,preserveAspectRatio:"none",x:0,y:0,width:a.style.width,height:a.style.height})}n.each(i)};xv.updateFx=UN().updateFx});var mpe=ye((Tsr,gpe)=>{"use strict";var gI=Ru();gpe.exports={hasLines:gI.hasLines,hasMarkers:gI.hasMarkers,hasText:gI.hasText,isBubble:gI.isBubble,attributes:pf(),layoutAttributes:X6(),supplyDefaults:Gde(),crossTraceDefaults:aU(),supplyLayoutDefaults:Xde(),calc:O0().calc,crossTraceCalc:mve(),arraysToCalcdata:km(),plot:sT(),colorbar:$d(),formatLabels:nI(),style:op().style,styleOnSelect:op().styleOnSelect,hoverPoints:fT(),selectPoints:hT(),animatable:!0,moduleType:"trace",name:"scatter",basePlotModule:ph(),categories:["cartesian","svg","symbols","errorBarsOK","showLegend","scatter-like","zoomScale"],meta:{}}});var xpe=ye((Asr,_pe)=>{"use strict";var b0t=Oa(),w0t=ka(),ype=IN(),CU=Dr(),T0t=CU.strScale,A0t=CU.strRotate,S0t=CU.strTranslate;_pe.exports=function(t,r,n){var i=t.node(),a=ype[n.arrowhead||0],o=ype[n.startarrowhead||0],s=(n.arrowwidth||1)*(n.arrowsize||1),l=(n.arrowwidth||1)*(n.startarrowsize||1),u=r.indexOf("start")>=0,c=r.indexOf("end")>=0,f=a.backoff*s+n.standoff,h=o.backoff*l+n.startstandoff,d,v,_,b;if(i.nodeName==="line"){d={x:+t.attr("x1"),y:+t.attr("y1")},v={x:+t.attr("x2"),y:+t.attr("y2")};var p=d.x-v.x,k=d.y-v.y;if(_=Math.atan2(k,p),b=_+Math.PI,f&&h&&f+h>Math.sqrt(p*p+k*k)){V();return}if(f){if(f*f>p*p+k*k){V();return}var E=f*Math.cos(_),S=f*Math.sin(_);v.x+=E,v.y+=S,t.attr({x2:v.x,y2:v.y})}if(h){if(h*h>p*p+k*k){V();return}var L=h*Math.cos(_),x=h*Math.sin(_);d.x-=L,d.y-=x,t.attr({x1:d.x,y1:d.y})}}else if(i.nodeName==="path"){var C=i.getTotalLength(),M="";if(C<f+h){V();return}var g=i.getPointAtLength(0),P=i.getPointAtLength(.1);_=Math.atan2(g.y-P.y,g.x-P.x),d=i.getPointAtLength(Math.min(h,C)),M="0px,"+h+"px,";var T=i.getPointAtLength(C),z=i.getPointAtLength(C-.1);b=Math.atan2(T.y-z.y,T.x-z.x),v=i.getPointAtLength(Math.max(0,C-f));var O=M?h+f:f;M+=C-O+"px,"+C+"px",t.style("stroke-dasharray",M)}function V(){t.style("stroke-dasharray","0px,100px")}function G(Z,j,N,H){Z.path&&(Z.noRotate&&(N=0),b0t.select(i.parentNode).append("path").attr({class:t.attr("class"),d:Z.path,transform:S0t(j.x,j.y)+A0t(N*180/Math.PI)+T0t(H)}).style({fill:w0t.rgb(n.arrowcolor),"stroke-width":0}))}u&&G(o,d,_,l),c&&G(a,v,b,s)}});var mI=ye((Ssr,Spe)=>{"use strict";var bpe=Oa(),LU=qa(),M0t=Mc(),y_=Dr(),PU=y_.strTranslate,a4=ho(),Yb=ka(),Iy=So(),wpe=vf(),IU=ru(),RU=Sg(),n4=yv(),E0t=vl().arrayEditor,k0t=xpe();Spe.exports={draw:C0t,drawOne:Tpe,drawRaw:Ape};function C0t(e){var t=e._fullLayout;t._infolayer.selectAll(".annotation").remove();for(var r=0;r<t.annotations.length;r++)t.annotations[r].visible&&Tpe(e,r);return M0t.previousPromises(e)}function Tpe(e,t){var r=e._fullLayout,n=r.annotations[t]||{},i=a4.getFromId(e,n.xref),a=a4.getFromId(e,n.yref);i&&i.setScale(),a&&a.setScale(),Ape(e,n,t,!1,i,a)}function m_(e,t,r,n,i){var a=i[r],o=i[r+"ref"],s=r.indexOf("y")!==-1,l=a4.getRefType(o)==="domain",u=s?n.h:n.w;return e?l?a+(s?-t:t)/e._length:e.p2r(e.r2p(a)+t):a+(s?-t:t)/u}function Ape(e,t,r,n,i,a){var o=e._fullLayout,s=e._fullLayout._size,l=e._context.edits,u,c;n?(u="annotation-"+n,c=n+".annotations"):(u="annotation",c="annotations");var f=E0t(e.layout,c,t),h=f.modifyBase,d=f.modifyItem,v=f.getUpdateObj;o._infolayer.selectAll("."+u+'[data-index="'+r+'"]').remove();var _="clip"+o._uid+"_ann"+r;if(!t._input||t.visible===!1){bpe.selectAll("#"+_).remove();return}var b={x:{},y:{}},p=+t.textangle||0,k=o._infolayer.append("g").classed(u,!0).attr("data-index",String(r)).style("opacity",t.opacity),E=k.append("g").classed("annotation-text-g",!0),S=l[t.showarrow?"annotationTail":"annotationPosition"],L=t.captureevents||l.annotationText||S;function x(H){var te={index:r,annotation:t._input,fullAnnotation:t,event:H};return n&&(te.subplotId=n),te}var C=E.append("g").style("pointer-events",L?"all":null).call(RU,"pointer").on("click",function(){e._dragging=!1,e.emit("plotly_clickannotation",x(bpe.event))});t.hovertext&&C.on("mouseover",function(){var H=t.hoverlabel,te=H.font,oe=this.getBoundingClientRect(),_e=e.getBoundingClientRect();wpe.loneHover({x0:oe.left-_e.left,x1:oe.right-_e.left,y:(oe.top+oe.bottom)/2-_e.top,text:t.hovertext,color:H.bgcolor,borderColor:H.bordercolor,fontFamily:te.family,fontSize:te.size,fontColor:te.color,fontWeight:te.weight,fontStyle:te.style,fontVariant:te.variant,fontShadow:te.fontShadow,fontLineposition:te.fontLineposition,fontTextcase:te.fontTextcase},{container:o._hoverlayer.node(),outerContainer:o._paper.node(),gd:e})}).on("mouseout",function(){wpe.loneUnhover(o._hoverlayer.node())});var M=t.borderwidth,g=t.borderpad,P=M+g,T=C.append("rect").attr("class","bg").style("stroke-width",M+"px").call(Yb.stroke,t.bordercolor).call(Yb.fill,t.bgcolor),z=t.width||t.height,O=o._topclips.selectAll("#"+_).data(z?[0]:[]);O.enter().append("clipPath").classed("annclip",!0).attr("id",_).append("rect"),O.exit().remove();var V=t.font,G=o._meta?y_.templateString(t.text,o._meta):t.text,Z=C.append("text").classed("annotation-text",!0).text(G);function j(H){return H.call(Iy.font,V).attr({"text-anchor":{left:"start",right:"end"}[t.align]||"middle"}),IU.convertToTspans(H,e,N),H}function N(){var H=Z.selectAll("a");if(H.size()===1&&H.text()===Z.text()){var te=C.insert("a",":first-child").attr({"xlink:xlink:href":H.attr("xlink:href"),"xlink:xlink:show":H.attr("xlink:show")}).style({cursor:"pointer"});te.node().appendChild(T.node())}var oe=C.select(".annotation-text-math-group"),_e=!oe.empty(),Ee=Iy.bBox((_e?oe:Z).node()),Ce=Ee.width,me=Ee.height,ie=t.width||Ce,Se=t.height||me,Le=Math.round(ie+2*P),Ae=Math.round(Se+2*P);function Fe(je,tt){return tt==="auto"&&(je<1/3?tt="left":je>2/3?tt="right":tt="center"),{center:0,middle:0,left:.5,bottom:-.5,right:-.5,top:.5}[tt]}for(var Pe=!1,ge=["x","y"],Re=0;Re<ge.length;Re++){var ce=ge[Re],Ze=t[ce+"ref"]||ce,ut=t["a"+ce+"ref"],pt={x:i,y:a}[ce],Zt=(p+(ce==="x"?0:-90))*Math.PI/180,st=Le*Math.cos(Zt),lt=Ae*Math.sin(Zt),Gt=Math.abs(st)+Math.abs(lt),Nt=t[ce+"anchor"],Jt=t[ce+"shift"]*(ce==="x"?1:-1),sr=b[ce],wr,cr,$e,St,Qt,Vt=a4.getRefType(Ze);if(pt&&Vt!=="domain"){var _t=pt.r2fraction(t[ce]);(_t<0||_t>1)&&(ut===Ze?(_t=pt.r2fraction(t["a"+ce]),(_t<0||_t>1)&&(Pe=!0)):Pe=!0),wr=pt._offset+pt.r2p(t[ce]),St=.5}else{var It=Vt==="domain";ce==="x"?($e=t[ce],wr=It?pt._offset+pt._length*$e:wr=s.l+s.w*$e):($e=1-t[ce],wr=It?pt._offset+pt._length*$e:wr=s.t+s.h*$e),St=t.showarrow?.5:$e}if(t.showarrow){sr.head=wr;var mt=t["a"+ce];if(Qt=st*Fe(.5,t.xanchor)-lt*Fe(.5,t.yanchor),ut===Ze){var er=a4.getRefType(ut);er==="domain"?(ce==="y"&&(mt=1-mt),sr.tail=pt._offset+pt._length*mt):er==="paper"?ce==="y"?(mt=1-mt,sr.tail=s.t+s.h*mt):sr.tail=s.l+s.w*mt:sr.tail=pt._offset+pt.r2p(mt),cr=Qt}else sr.tail=wr+mt,cr=Qt+mt;sr.text=sr.tail+Qt;var lr=o[ce==="x"?"width":"height"];if(Ze==="paper"&&(sr.head=y_.constrain(sr.head,1,lr-1)),ut==="pixel"){var Tr=-Math.max(sr.tail-3,sr.text),Lr=Math.min(sr.tail+3,sr.text)-lr;Tr>0?(sr.tail+=Tr,sr.text+=Tr):Lr>0&&(sr.tail-=Lr,sr.text-=Lr)}sr.tail+=Jt,sr.head+=Jt}else Qt=Gt*Fe(St,Nt),cr=Qt,sr.text=wr+Qt;sr.text+=Jt,Qt+=Jt,cr+=Jt,t["_"+ce+"padplus"]=Gt/2+cr,t["_"+ce+"padminus"]=Gt/2-cr,t["_"+ce+"size"]=Gt,t["_"+ce+"shift"]=Qt}if(Pe){C.remove();return}var ti=0,Br=0;if(t.align!=="left"&&(ti=(ie-Ce)*(t.align==="center"?.5:1)),t.valign!=="top"&&(Br=(Se-me)*(t.valign==="middle"?.5:1)),_e)oe.select("svg").attr({x:P+ti-1,y:P+Br}).call(Iy.setClipUrl,z?_:null,e);else{var Vr=P+Br-Ee.top,dt=P+ti-Ee.left;Z.call(IU.positionText,dt,Vr).call(Iy.setClipUrl,z?_:null,e)}O.select("rect").call(Iy.setRect,P,P,ie,Se),T.call(Iy.setRect,M/2,M/2,Le-M,Ae-M),C.call(Iy.setTranslate,Math.round(b.x.text-Le/2),Math.round(b.y.text-Ae/2)),E.attr({transform:"rotate("+p+","+b.x.text+","+b.y.text+")"});var Ge=function(je,tt){k.selectAll(".annotation-arrow-g").remove();var xt=b.x.head,Ie=b.y.head,xe=b.x.tail+je,ke=b.y.tail+tt,vt=b.x.text+je,ir=b.y.text+tt,ar=y_.rotationXYMatrix(p,vt,ir),vr=y_.apply2DTransform(ar),ii=y_.apply2DTransform2(ar),pi=+T.attr("width"),$r=+T.attr("height"),di=vt-.5*pi,ji=di+pi,In=ir-.5*$r,wi=In+$r,On=[[di,In,di,wi],[di,wi,ji,wi],[ji,wi,ji,In],[ji,In,di,In]].map(ii);if(!On.reduce(function(Qr,Oi){return Qr^!!y_.segmentsIntersect(xt,Ie,xt+1e6,Ie+1e6,Oi[0],Oi[1],Oi[2],Oi[3])},!1)){On.forEach(function(Qr){var Oi=y_.segmentsIntersect(xe,ke,xt,Ie,Qr[0],Qr[1],Qr[2],Qr[3]);Oi&&(xe=Oi.x,ke=Oi.y)});var qn=t.arrowwidth,Fn=t.arrowcolor,ra=t.arrowside,la=k.append("g").style({opacity:Yb.opacity(Fn)}).classed("annotation-arrow-g",!0),Ut=la.append("path").attr("d","M"+xe+","+ke+"L"+xt+","+Ie).style("stroke-width",qn+"px").call(Yb.stroke,Yb.rgb(Fn));if(k0t(Ut,ra,t),l.annotationPosition&&Ut.node().parentNode&&!n){var wt=xt,rr=Ie;if(t.standoff){var nr=Math.sqrt(Math.pow(xt-xe,2)+Math.pow(Ie-ke,2));wt+=t.standoff*(xe-xt)/nr,rr+=t.standoff*(ke-Ie)/nr}var Er=la.append("path").classed("annotation-arrow",!0).classed("anndrag",!0).classed("cursor-move",!0).attr({d:"M3,3H-3V-3H3ZM0,0L"+(xe-wt)+","+(ke-rr),transform:PU(wt,rr)}).style("stroke-width",qn+6+"px").call(Yb.stroke,"rgba(0,0,0,0)").call(Yb.fill,"rgba(0,0,0,0)"),Xr,ri;n4.init({element:Er.node(),gd:e,prepFn:function(){var Qr=Iy.getTranslate(C);Xr=Qr.x,ri=Qr.y,i&&i.autorange&&h(i._name+".autorange",!0),a&&a.autorange&&h(a._name+".autorange",!0)},moveFn:function(Qr,Oi){var $i=vr(Xr,ri),tn=$i[0]+Qr,fn=$i[1]+Oi;C.call(Iy.setTranslate,tn,fn),d("x",m_(i,Qr,"x",s,t)),d("y",m_(a,Oi,"y",s,t)),t.axref===t.xref&&d("ax",m_(i,Qr,"ax",s,t)),t.ayref===t.yref&&d("ay",m_(a,Oi,"ay",s,t)),la.attr("transform",PU(Qr,Oi)),E.attr({transform:"rotate("+p+","+tn+","+fn+")"})},doneFn:function(){LU.call("_guiRelayout",e,v());var Qr=document.querySelector(".js-notes-box-panel");Qr&&Qr.redraw(Qr.selectedObj)}})}}};if(t.showarrow&&Ge(0,0),S){var Je;n4.init({element:C.node(),gd:e,prepFn:function(){Je=E.attr("transform")},moveFn:function(je,tt){var xt="pointer";if(t.showarrow)t.axref===t.xref?d("ax",m_(i,je,"ax",s,t)):d("ax",t.ax+je),t.ayref===t.yref?d("ay",m_(a,tt,"ay",s.w,t)):d("ay",t.ay+tt),Ge(je,tt);else{if(n)return;var Ie,xe;if(i)Ie=m_(i,je,"x",s,t);else{var ke=t._xsize/s.w,vt=t.x+(t._xshift-t.xshift)/s.w-ke/2;Ie=n4.align(vt+je/s.w,ke,0,1,t.xanchor)}if(a)xe=m_(a,tt,"y",s,t);else{var ir=t._ysize/s.h,ar=t.y-(t._yshift+t.yshift)/s.h-ir/2;xe=n4.align(ar-tt/s.h,ir,0,1,t.yanchor)}d("x",Ie),d("y",xe),(!i||!a)&&(xt=n4.getCursor(i?.5:Ie,a?.5:xe,t.xanchor,t.yanchor))}E.attr({transform:PU(je,tt)+Je}),RU(C,xt)},clickFn:function(je,tt){t.captureevents&&e.emit("plotly_clickannotation",x(tt))},doneFn:function(){RU(C),LU.call("_guiRelayout",e,v());var je=document.querySelector(".js-notes-box-panel");je&&je.redraw(je.selectedObj)}})}}l.annotationText?Z.call(IU.makeEditable,{delegate:C,gd:e}).call(j).on("edit",function(H){t.text=H,this.call(j),d("text",H),i&&i.autorange&&h(i._name+".autorange",!0),a&&a.autorange&&h(a._name+".autorange",!0),LU.call("_guiRelayout",e,v())}):Z.call(j)}});var Ppe=ye((Msr,Lpe)=>{"use strict";var Mpe=Dr(),L0t=qa(),Epe=vl().arrayEditor;Lpe.exports={hasClickToShow:P0t,onClick:I0t};function P0t(e,t){var r=Cpe(e,t);return r.on.length>0||r.explicitOff.length>0}function I0t(e,t){var r=Cpe(e,t),n=r.on,i=r.off.concat(r.explicitOff),a={},o=e._fullLayout.annotations,s,l;if(n.length||i.length){for(s=0;s<n.length;s++)l=Epe(e.layout,"annotations",o[n[s]]),l.modifyItem("visible",!0),Mpe.extendFlat(a,l.getUpdateObj());for(s=0;s<i.length;s++)l=Epe(e.layout,"annotations",o[i[s]]),l.modifyItem("visible",!1),Mpe.extendFlat(a,l.getUpdateObj());return L0t.call("update",e,{},a)}}function Cpe(e,t){var r=e._fullLayout.annotations,n=[],i=[],a=[],o=(t||[]).length,s,l,u,c,f,h,d,v;for(s=0;s<r.length;s++)if(u=r[s],c=u.clicktoshow,c){for(l=0;l<o;l++)if(f=t[l],h=f.xaxis,d=f.yaxis,h._id===u.xref&&d._id===u.yref&&h.d2r(f.x)===kpe(u._xclick,h)&&d.d2r(f.y)===kpe(u._yclick,d)){u.visible?c==="onout"?v=i:v=a:v=n,v.push(s);break}l===o&&u.visible&&c==="onout"&&i.push(s)}return{on:n,off:i,explicitOff:a}}function kpe(e,t){return t.type==="log"?t.l2r(e):t.d2r(e)}});var FU=ye((Esr,Ipe)=>{"use strict";var DU=Dr(),dT=ka();Ipe.exports=function(t,r,n,i){i("opacity");var a=i("bgcolor"),o=i("bordercolor"),s=dT.opacity(o);i("borderpad");var l=i("borderwidth"),u=i("showarrow");i("text",u?" ":n._dfltTitle.annotation),i("textangle"),DU.coerceFont(i,"font",n.font),i("width"),i("align");var c=i("height");if(c&&i("valign"),u){var f=i("arrowside"),h,d;f.indexOf("end")!==-1&&(h=i("arrowhead"),d=i("arrowsize")),f.indexOf("start")!==-1&&(i("startarrowhead",h),i("startarrowsize",d)),i("arrowcolor",s?r.bordercolor:dT.defaultLine),i("arrowwidth",(s&&l||1)*2),i("standoff"),i("startstandoff")}var v=i("hovertext"),_=n.hoverlabel||{};if(v){var b=i("hoverlabel.bgcolor",_.bgcolor||(dT.opacity(a)?dT.rgb(a):dT.defaultLine)),p=i("hoverlabel.bordercolor",_.bordercolor||dT.contrast(b)),k=DU.extendFlat({},_.font);k.color||(k.color=p),DU.coerceFont(i,"hoverlabel.font",k)}i("captureevents",!!v)}});var Dpe=ye((ksr,Rpe)=>{"use strict";var zU=Dr(),Kb=ho(),R0t=Yd(),D0t=FU(),F0t=Ub();Rpe.exports=function(t,r){R0t(t,r,{name:"annotations",handleItemDefaults:z0t})};function z0t(e,t,r){function n(E,S){return zU.coerce(e,t,F0t,E,S)}var i=n("visible"),a=n("clicktoshow");if(i||a){D0t(e,t,r,n);for(var o=t.showarrow,s=["x","y"],l=[-10,-30],u={_fullLayout:r},c=0;c<2;c++){var f=s[c],h=Kb.coerceRef(e,t,u,f,"","paper");if(h!=="paper"){var d=Kb.getFromId(u,h);d._annIndices.push(t._index)}if(Kb.coercePosition(t,u,n,h,f,.5),o){var v="a"+f,_=Kb.coerceRef(e,t,u,v,"pixel",["pixel","paper"]);_!=="pixel"&&_!==h&&(_=t[v]="pixel");var b=_==="pixel"?l[c]:.4;Kb.coercePosition(t,u,n,_,v,b)}n(f+"anchor"),n(f+"shift")}if(zU.noneOrAll(e,t,["x","y"]),o&&zU.noneOrAll(e,t,["ax","ay"]),a){var p=n("xclick"),k=n("yclick");t._xclick=p===void 0?t.x:Kb.cleanPosition(p,u,t.xref),t._yclick=k===void 0?t.y:Kb.cleanPosition(k,u,t.yref)}}}});var Ope=ye((Csr,zpe)=>{"use strict";var OU=Dr(),Jb=ho(),O0t=mI().draw;zpe.exports=function(t){var r=t._fullLayout,n=OU.filterVisible(r.annotations);if(n.length&&t._fullData.length)return OU.syncOrAsync([O0t,q0t],t)};function q0t(e){var t=e._fullLayout;OU.filterVisible(t.annotations).forEach(function(r){var n=Jb.getFromId(e,r.xref),i=Jb.getFromId(e,r.yref),a=Jb.getRefType(r.xref),o=Jb.getRefType(r.yref);r._extremes={},a==="range"&&Fpe(r,n),o==="range"&&Fpe(r,i)})}function Fpe(e,t){var r=t._id,n=r.charAt(0),i=e[n],a=e["a"+n],o=e[n+"ref"],s=e["a"+n+"ref"],l=e["_"+n+"padplus"],u=e["_"+n+"padminus"],c={x:1,y:-1}[n]*e[n+"shift"],f=3*e.arrowsize*e.arrowwidth||0,h=f+c,d=f-c,v=3*e.startarrowsize*e.arrowwidth||0,_=v+c,b=v-c,p;if(s===o){var k=Jb.findExtremes(t,[t.r2c(i)],{ppadplus:h,ppadminus:d}),E=Jb.findExtremes(t,[t.r2c(a)],{ppadplus:Math.max(l,_),ppadminus:Math.max(u,b)});p={min:[k.min[0],E.min[0]],max:[k.max[0],E.max[0]]}}else _=a?_+a:_,b=a?b-a:b,p=Jb.findExtremes(t,[t.r2c(i)],{ppadplus:Math.max(l,h,_),ppadminus:Math.max(u,d,b)});e._extremes[r]=p}});var Bpe=ye((Lsr,qpe)=>{"use strict";var B0t=Eo(),N0t=m6();qpe.exports=function(t,r,n,i){r=r||{};var a=n==="log"&&r.type==="linear",o=n==="linear"&&r.type==="log";if(!(a||o))return;var s=t._fullLayout.annotations,l=r._id.charAt(0),u,c;function f(d){var v=u[d],_=null;a?_=N0t(v,r.range):_=Math.pow(10,v),B0t(_)||(_=null),i(c+d,_)}for(var h=0;h<s.length;h++)u=s[h],c="annotations["+h+"].",u[l+"ref"]===r._id&&f(l),u["a"+l+"ref"]===r._id&&f("a"+l)}});var Vpe=ye((Psr,Upe)=>{"use strict";var qU=mI(),Npe=Ppe();Upe.exports={moduleType:"component",name:"annotations",layoutAttributes:Ub(),supplyLayoutDefaults:Dpe(),includeBasePlot:qM()("annotations"),calcAutorange:Ope(),draw:qU.draw,drawOne:qU.drawOne,drawRaw:qU.drawRaw,hasClickToShow:Npe.hasClickToShow,onClick:Npe.onClick,convertCoords:Bpe()}});var yI=ye((Isr,Gpe)=>{"use strict";var kc=Ub(),U0t=mc().overrideAll,V0t=vl().templatedArray;Gpe.exports=U0t(V0t("annotation",{visible:kc.visible,x:{valType:"any"},y:{valType:"any"},z:{valType:"any"},ax:{valType:"number"},ay:{valType:"number"},xanchor:kc.xanchor,xshift:kc.xshift,yanchor:kc.yanchor,yshift:kc.yshift,text:kc.text,textangle:kc.textangle,font:kc.font,width:kc.width,height:kc.height,opacity:kc.opacity,align:kc.align,valign:kc.valign,bgcolor:kc.bgcolor,bordercolor:kc.bordercolor,borderpad:kc.borderpad,borderwidth:kc.borderwidth,showarrow:kc.showarrow,arrowcolor:kc.arrowcolor,arrowhead:kc.arrowhead,startarrowhead:kc.startarrowhead,arrowside:kc.arrowside,arrowsize:kc.arrowsize,startarrowsize:kc.startarrowsize,arrowwidth:kc.arrowwidth,standoff:kc.standoff,startstandoff:kc.startstandoff,hovertext:kc.hovertext,hoverlabel:kc.hoverlabel,captureevents:kc.captureevents}),"calc","from-root")});var jpe=ye((Rsr,Hpe)=>{"use strict";var BU=Dr(),G0t=ho(),H0t=Yd(),j0t=FU(),W0t=yI();Hpe.exports=function(t,r,n){H0t(t,r,{name:"annotations",handleItemDefaults:X0t,fullLayout:n.fullLayout})};function X0t(e,t,r,n){function i(s,l){return BU.coerce(e,t,W0t,s,l)}function a(s){var l=s+"axis",u={_fullLayout:{}};return u._fullLayout[l]=r[l],G0t.coercePosition(t,u,i,s,s,.5)}var o=i("visible");o&&(j0t(e,t,n.fullLayout,i),a("x"),a("y"),a("z"),BU.noneOrAll(e,t,["x","y","z"]),t.xref="x",t.yref="y",t.zref="z",i("xanchor"),i("yanchor"),i("xshift"),i("yshift"),t.showarrow&&(t.axref="pixel",t.ayref="pixel",i("ax",-10),i("ay",-30),BU.noneOrAll(e,t,["ax","ay"])))}});var Ype=ye((Dsr,Zpe)=>{"use strict";var Wpe=Dr(),Xpe=ho();Zpe.exports=function(t){for(var r=t.fullSceneLayout,n=r.annotations,i=0;i<n.length;i++)Z0t(n[i],t);t.fullLayout._infolayer.selectAll(".annotation-"+t.id).remove()};function Z0t(e,t){var r=t.fullSceneLayout,n=r.domain,i=t.fullLayout._size,a={pdata:null,type:"linear",autorange:!1,range:[-1/0,1/0]};e._xa={},Wpe.extendFlat(e._xa,a),Xpe.setConvert(e._xa),e._xa._offset=i.l+n.x[0]*i.w,e._xa.l2p=function(){return .5*(1+e._pdata[0]/e._pdata[3])*i.w*(n.x[1]-n.x[0])},e._ya={},Wpe.extendFlat(e._ya,a),Xpe.setConvert(e._ya),e._ya._offset=i.t+(1-n.y[1])*i.h,e._ya.l2p=function(){return .5*(1-e._pdata[1]/e._pdata[3])*i.h*(n.y[1]-n.y[0])}}});var UU=ye((Fsr,Kpe)=>{"use strict";function NU(e,t){var r=[0,0,0,0],n,i;for(n=0;n<4;++n)for(i=0;i<4;++i)r[i]+=e[4*n+i]*t[n];return r}function Y0t(e,t){var r=NU(e.projection,NU(e.view,NU(e.model,[t[0],t[1],t[2],1])));return r}Kpe.exports=Y0t});var $pe=ye((zsr,Jpe)=>{"use strict";var K0t=mI().drawRaw,J0t=UU(),$0t=["x","y","z"];Jpe.exports=function(t){for(var r=t.fullSceneLayout,n=t.dataScale,i=r.annotations,a=0;a<i.length;a++){for(var o=i[a],s=!1,l=0;l<3;l++){var u=$0t[l],c=o[u],f=r[u+"axis"],h=f.r2fraction(c);if(h<0||h>1){s=!0;break}}s?t.fullLayout._infolayer.select(".annotation-"+t.id+'[data-index="'+a+'"]').remove():(o._pdata=J0t(t.glplot.cameraParams,[r.xaxis.r2l(o.x)*n[0],r.yaxis.r2l(o.y)*n[1],r.zaxis.r2l(o.z)*n[2]]),K0t(t.graphDiv,o,a,t.id,o._xa,o._ya))}}});var t0e=ye((Osr,e0e)=>{"use strict";var Q0t=qa(),Qpe=Dr();e0e.exports={moduleType:"component",name:"annotations3d",schema:{subplots:{scene:{annotations:yI()}}},layoutAttributes:yI(),handleDefaults:jpe(),includeBasePlot:egt,convert:Ype(),draw:$pe()};function egt(e,t){var r=Q0t.subplotsRegistry.gl3d;if(r)for(var n=r.attrRegex,i=Object.keys(e),a=0;a<i.length;a++){var o=i[a];n.test(o)&&(e[o].annotations||[]).length&&(Qpe.pushUnique(t._basePlotModules,r),Qpe.pushUnique(t._subplots.gl3d,o))}}});var VU=ye((Bsr,a0e)=>{"use strict";var r0e=Ub(),i0e=ec(),n0e=pf().line,tgt=Pd().dash,Og=Ao().extendFlat,rgt=vl().templatedArray,qsr=OM(),vT=Gl(),{shapeTexttemplateAttrs:igt,templatefallbackAttrs:ngt}=Ll(),agt=M6();a0e.exports=rgt("shape",{visible:Og({},vT.visible,{editType:"calc+arraydraw"}),showlegend:{valType:"boolean",dflt:!1,editType:"calc+arraydraw"},legend:Og({},vT.legend,{editType:"calc+arraydraw"}),legendgroup:Og({},vT.legendgroup,{editType:"calc+arraydraw"}),legendgrouptitle:{text:Og({},vT.legendgrouptitle.text,{editType:"calc+arraydraw"}),font:i0e({editType:"calc+arraydraw"}),editType:"calc+arraydraw"},legendrank:Og({},vT.legendrank,{editType:"calc+arraydraw"}),legendwidth:Og({},vT.legendwidth,{editType:"calc+arraydraw"}),type:{valType:"enumerated",values:["circle","rect","path","line"],editType:"calc+arraydraw"},layer:{valType:"enumerated",values:["below","above","between"],dflt:"above",editType:"arraydraw"},xref:Og({},r0e.xref,{}),xsizemode:{valType:"enumerated",values:["scaled","pixel"],dflt:"scaled",editType:"calc+arraydraw"},xanchor:{valType:"any",editType:"calc+arraydraw"},x0:{valType:"any",editType:"calc+arraydraw"},x1:{valType:"any",editType:"calc+arraydraw"},x0shift:{valType:"number",dflt:0,min:-1,max:1,editType:"calc"},x1shift:{valType:"number",dflt:0,min:-1,max:1,editType:"calc"},yref:Og({},r0e.yref,{}),ysizemode:{valType:"enumerated",values:["scaled","pixel"],dflt:"scaled",editType:"calc+arraydraw"},yanchor:{valType:"any",editType:"calc+arraydraw"},y0:{valType:"any",editType:"calc+arraydraw"},y1:{valType:"any",editType:"calc+arraydraw"},y0shift:{valType:"number",dflt:0,min:-1,max:1,editType:"calc"},y1shift:{valType:"number",dflt:0,min:-1,max:1,editType:"calc"},path:{valType:"string",editType:"calc+arraydraw"},opacity:{valType:"number",min:0,max:1,dflt:1,editType:"arraydraw"},line:{color:Og({},n0e.color,{editType:"arraydraw"}),width:Og({},n0e.width,{editType:"calc+arraydraw"}),dash:Og({},tgt,{editType:"arraydraw"}),editType:"calc+arraydraw"},fillcolor:{valType:"color",dflt:"rgba(0,0,0,0)",editType:"arraydraw"},fillrule:{valType:"enumerated",values:["evenodd","nonzero"],dflt:"evenodd",editType:"arraydraw"},editable:{valType:"boolean",dflt:!1,editType:"calc+arraydraw"},label:{text:{valType:"string",dflt:"",editType:"arraydraw"},texttemplate:igt({},{keys:Object.keys(agt)}),texttemplatefallback:ngt({editType:"arraydraw"}),font:i0e({editType:"calc+arraydraw",colorEditType:"arraydraw"}),textposition:{valType:"enumerated",values:["top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right","start","middle","end"],editType:"arraydraw"},textangle:{valType:"angle",dflt:"auto",editType:"calc+arraydraw"},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"auto",editType:"calc+arraydraw"},yanchor:{valType:"enumerated",values:["top","middle","bottom"],editType:"calc+arraydraw"},padding:{valType:"number",dflt:3,min:0,editType:"arraydraw"},editType:"arraydraw"},editType:"arraydraw"})});var l0e=ye((Nsr,s0e)=>{"use strict";var o4=Dr(),pT=ho(),ogt=Yd(),sgt=VU(),o0e=f_();s0e.exports=function(t,r){ogt(t,r,{name:"shapes",handleItemDefaults:ugt})};function lgt(e,t){return e?"bottom":t.indexOf("top")!==-1?"top":t.indexOf("bottom")!==-1?"bottom":"middle"}function ugt(e,t,r){function n(H,te){return o4.coerce(e,t,sgt,H,te)}t._isShape=!0;var i=n("visible");if(i){var a=n("showlegend");a&&(n("legend"),n("legendwidth"),n("legendgroup"),n("legendgrouptitle.text"),o4.coerceFont(n,"legendgrouptitle.font"),n("legendrank"));var o=n("path"),s=o?"path":"rect",l=n("type",s),u=l!=="path";u&&delete t.path,n("editable"),n("layer"),n("opacity"),n("fillcolor"),n("fillrule");var c=n("line.width");c&&(n("line.color"),n("line.dash"));for(var f=n("xsizemode"),h=n("ysizemode"),d=["x","y"],v=0;v<2;v++){var _=d[v],b=_+"anchor",p=_==="x"?f:h,k={_fullLayout:r},E,S,L,x=pT.coerceRef(e,t,k,_,void 0,"paper"),C=pT.getRefType(x);if(C==="range"?(E=pT.getFromId(k,x),E._shapeIndices.push(t._index),L=o0e.rangeToShapePosition(E),S=o0e.shapePositionToRange(E),(E.type==="category"||E.type==="multicategory")&&(n(_+"0shift"),n(_+"1shift"))):S=L=o4.identity,u){var M=.25,g=.75,P=_+"0",T=_+"1",z=e[P],O=e[T];e[P]=S(e[P],!0),e[T]=S(e[T],!0),p==="pixel"?(n(P,0),n(T,10)):(pT.coercePosition(t,k,n,x,P,M),pT.coercePosition(t,k,n,x,T,g)),t[P]=L(t[P]),t[T]=L(t[T]),e[P]=z,e[T]=O}if(p==="pixel"){var V=e[b];e[b]=S(e[b],!0),pT.coercePosition(t,k,n,x,b,.25),t[b]=L(t[b]),e[b]=V}}u&&o4.noneOrAll(e,t,["x0","x1","y0","y1"]);var G=l==="line",Z,j;if(u&&(Z=n("label.texttemplate"),n("label.texttemplatefallback")),Z||(j=n("label.text")),j||Z){n("label.textangle");var N=n("label.textposition",G?"middle":"middle center");n("label.xanchor"),n("label.yanchor",lgt(G,N)),n("label.padding"),o4.coerceFont(n,"label.font",r.font)}}}});var f0e=ye((Usr,c0e)=>{"use strict";var cgt=ka(),u0e=Dr();function fgt(e,t){return e?"bottom":t.indexOf("top")!==-1?"top":t.indexOf("bottom")!==-1?"bottom":"middle"}c0e.exports=function(t,r,n){n("newshape.visible"),n("newshape.name"),n("newshape.showlegend"),n("newshape.legend"),n("newshape.legendwidth"),n("newshape.legendgroup"),n("newshape.legendgrouptitle.text"),u0e.coerceFont(n,"newshape.legendgrouptitle.font"),n("newshape.legendrank"),n("newshape.drawdirection"),n("newshape.layer"),n("newshape.fillcolor"),n("newshape.fillrule"),n("newshape.opacity");var i=n("newshape.line.width");if(i){var a=(t||{}).plot_bgcolor||"#FFF";n("newshape.line.color",cgt.contrast(a)),n("newshape.line.dash")}var o=t.dragmode==="drawline",s=n("newshape.label.text"),l=n("newshape.label.texttemplate");if(n("newshape.label.texttemplatefallback"),s||l){n("newshape.label.textangle");var u=n("newshape.label.textposition",o?"middle":"middle center");n("newshape.label.xanchor"),n("newshape.label.yanchor",fgt(o,u)),n("newshape.label.padding"),u0e.coerceFont(n,"newshape.label.font",r.font)}n("activeshape.fillcolor"),n("activeshape.opacity")}});var g0e=ye((Vsr,p0e)=>{"use strict";var GU=Dr(),gT=ho(),mT=gM(),d0e=f_();p0e.exports=function(t){var r=t._fullLayout,n=GU.filterVisible(r.shapes);if(!(!n.length||!t._fullData.length))for(var i=0;i<n.length;i++){var a=n[i];a._extremes={};var o,s,l=gT.getRefType(a.xref),u=gT.getRefType(a.yref);a.xref!=="paper"&&l!=="domain"&&(o=gT.getFromId(t,a.xref),s=h0e(o,a,mT.paramIsX),s&&(a._extremes[o._id]=gT.findExtremes(o,s,hgt(a)))),a.yref!=="paper"&&u!=="domain"&&(o=gT.getFromId(t,a.yref),s=h0e(o,a,mT.paramIsY),s&&(a._extremes[o._id]=gT.findExtremes(o,s,dgt(a))))}};function hgt(e){return v0e(e.line.width,e.xsizemode,e.x0,e.x1,e.path,!1)}function dgt(e){return v0e(e.line.width,e.ysizemode,e.y0,e.y1,e.path,!0)}function v0e(e,t,r,n,i,a){var o=e/2,s=a;if(t==="pixel"){var l=i?d0e.extractPathCoords(i,a?mT.paramIsY:mT.paramIsX):[r,n],u=GU.aggNums(Math.max,null,l),c=GU.aggNums(Math.min,null,l),f=c<0?Math.abs(c)+o:o,h=u>0?u+o:o;return{ppad:o,ppadplus:s?f:h,ppadminus:s?h:f}}else return{ppad:o}}function h0e(e,t,r){var n=e._id.charAt(0)==="x"?"x":"y",i=e.type==="category"||e.type==="multicategory",a,o,s=0,l=0,u=i?e.r2c:e.d2c,c=t[n+"sizemode"]==="scaled";if(c?(a=t[n+"0"],o=t[n+"1"],i&&(s=t[n+"0shift"],l=t[n+"1shift"])):(a=t[n+"anchor"],o=t[n+"anchor"]),a!==void 0)return[u(a)+s,u(o)+l];if(t.path){var f=1/0,h=-1/0,d=t.path.match(mT.segmentRE),v,_,b,p,k;for(e.type==="date"&&(u=d0e.decodeDate(u)),v=0;v<d.length;v++)_=d[v],b=r[_.charAt(0)].drawn,b!==void 0&&(p=d[v].slice(1).match(mT.paramRE),!(!p||p.length<b)&&(k=u(p[b]),k<f&&(f=k),k>h&&(h=k)));if(h>=f)return[f,h]}}});var _0e=ye((Gsr,y0e)=>{"use strict";var m0e=lP();y0e.exports={moduleType:"component",name:"shapes",layoutAttributes:VU(),supplyLayoutDefaults:l0e(),supplyDrawNewShapeDefaults:f0e(),includeBasePlot:qM()("shapes"),calcAutorange:g0e(),draw:m0e.draw,drawOne:m0e.drawOne}});var HU=ye((jsr,b0e)=>{"use strict";var x0e=hd(),vgt=vl().templatedArray,Hsr=OM();b0e.exports=vgt("image",{visible:{valType:"boolean",dflt:!0,editType:"arraydraw"},source:{valType:"string",editType:"arraydraw"},layer:{valType:"enumerated",values:["below","above"],dflt:"above",editType:"arraydraw"},sizex:{valType:"number",dflt:0,editType:"arraydraw"},sizey:{valType:"number",dflt:0,editType:"arraydraw"},sizing:{valType:"enumerated",values:["fill","contain","stretch"],dflt:"contain",editType:"arraydraw"},opacity:{valType:"number",min:0,max:1,dflt:1,editType:"arraydraw"},x:{valType:"any",dflt:0,editType:"arraydraw"},y:{valType:"any",dflt:0,editType:"arraydraw"},xanchor:{valType:"enumerated",values:["left","center","right"],dflt:"left",editType:"arraydraw"},yanchor:{valType:"enumerated",values:["top","middle","bottom"],dflt:"top",editType:"arraydraw"},xref:{valType:"enumerated",values:["paper",x0e.idRegex.x.toString()],dflt:"paper",editType:"arraydraw"},yref:{valType:"enumerated",values:["paper",x0e.idRegex.y.toString()],dflt:"paper",editType:"arraydraw"},editType:"arraydraw"})});var T0e=ye((Wsr,w0e)=>{"use strict";var pgt=Dr(),jU=ho(),ggt=Yd(),mgt=HU(),ygt="images";w0e.exports=function(t,r){var n={name:ygt,handleItemDefaults:_gt};ggt(t,r,n)};function _gt(e,t,r){function n(h,d){return pgt.coerce(e,t,mgt,h,d)}var i=n("source"),a=n("visible",!!i);if(!a)return t;n("layer"),n("xanchor"),n("yanchor"),n("sizex"),n("sizey"),n("sizing"),n("opacity");for(var o={_fullLayout:r},s=["x","y"],l=0;l<2;l++){var u=s[l],c=jU.coerceRef(e,t,o,u,"paper",void 0);if(c!=="paper"){var f=jU.getFromId(o,c);f._imgIndices.push(t._index)}jU.coercePosition(t,o,n,c,u,0)}return t}});var E0e=ye((Xsr,M0e)=>{"use strict";var A0e=Oa(),xgt=So(),yT=ho(),S0e=hf(),bgt=Wp();M0e.exports=function(t){var r=t._fullLayout,n=[],i={},a=[],o,s;for(s=0;s<r.images.length;s++){var l=r.images[s];if(l.visible)if(l.layer==="below"&&l.xref!=="paper"&&l.yref!=="paper"){o=S0e.ref2id(l.xref)+S0e.ref2id(l.yref);var u=r._plots[o];if(!u){a.push(l);continue}u.mainplot&&(o=u.mainplot.id),i[o]||(i[o]=[]),i[o].push(l)}else l.layer==="above"?n.push(l):a.push(l)}var c={x:{left:{sizing:"xMin",offset:0},center:{sizing:"xMid",offset:-1/2},right:{sizing:"xMax",offset:-1}},y:{top:{sizing:"YMin",offset:0},middle:{sizing:"YMid",offset:-1/2},bottom:{sizing:"YMax",offset:-1}}};function f(S){var L=A0e.select(this);if(this._imgSrc!==S.source)if(L.attr("xmlns",bgt.svg),!t._context.staticPlot||S.source&&S.source.slice(0,5)==="data:")L.attr("xlink:href",S.source),this._imgSrc=S.source;else{var x=new Promise(function(C){var M=new Image;this.img=M,M.setAttribute("crossOrigin","anonymous"),M.onerror=g,M.onload=function(){var P=document.createElement("canvas");P.width=this.width,P.height=this.height;var T=P.getContext("2d",{willReadFrequently:!0});T.drawImage(this,0,0);var z=P.toDataURL("image/png");L.attr("xlink:href",z),C()},L.on("error",g),M.src=S.source,this._imgSrc=S.source;function g(){L.remove(),C()}}.bind(this));t._promises.push(x)}}function h(S){var L=A0e.select(this),x=yT.getFromId(t,S.xref),C=yT.getFromId(t,S.yref),M=yT.getRefType(S.xref)==="domain",g=yT.getRefType(S.yref)==="domain",P=r._size,T,z;x!==void 0?T=typeof S.xref=="string"&&M?x._length*S.sizex:Math.abs(x.l2p(S.sizex)-x.l2p(0)):T=S.sizex*P.w,C!==void 0?z=typeof S.yref=="string"&&g?C._length*S.sizey:Math.abs(C.l2p(S.sizey)-C.l2p(0)):z=S.sizey*P.h;var O=T*c.x[S.xanchor].offset,V=z*c.y[S.yanchor].offset,G=c.x[S.xanchor].sizing+c.y[S.yanchor].sizing,Z,j;switch(x!==void 0?Z=typeof S.xref=="string"&&M?x._length*S.x+x._offset:x.r2p(S.x)+x._offset:Z=S.x*P.w+P.l,Z+=O,C!==void 0?j=typeof S.yref=="string"&&g?C._length*(1-S.y)+C._offset:C.r2p(S.y)+C._offset:j=P.h-S.y*P.h+P.t,j+=V,S.sizing){case"fill":G+=" slice";break;case"stretch":G="none";break}L.attr({x:Z,y:j,width:T,height:z,preserveAspectRatio:G,opacity:S.opacity});var N=x&&yT.getRefType(S.xref)!=="domain"?x._id:"",H=C&&yT.getRefType(S.yref)!=="domain"?C._id:"",te=N+H;xgt.setClipUrl(L,te?"clip"+r._uid+te:null,t)}function d(S){return[S.xref,S.x,S.sizex,S.yref,S.y,S.sizey].join("_")}function v(S,L){return S._index-L._index}var _=r._imageLowerLayer.selectAll("image").data(a,d),b=r._imageUpperLayer.selectAll("image").data(n,d);_.enter().append("image"),b.enter().append("image"),_.exit().remove(),b.exit().remove(),_.each(function(S){f.bind(this)(S),h.bind(this)(S)}),b.each(function(S){f.bind(this)(S),h.bind(this)(S)}),_.sort(v),b.sort(v);var p=Object.keys(r._plots);for(s=0;s<p.length;s++){o=p[s];var k=r._plots[o];if(k.imagelayer){var E=k.imagelayer.selectAll("image").data(i[o]||[],d);E.enter().append("image"),E.exit().remove(),E.each(function(S){f.bind(this)(S),h.bind(this)(S)}),E.sort(v)}}}});var L0e=ye((Zsr,C0e)=>{"use strict";var k0e=Eo(),wgt=m6();C0e.exports=function(t,r,n,i){r=r||{};var a=n==="log"&&r.type==="linear",o=n==="linear"&&r.type==="log";if(a||o){for(var s=t._fullLayout.images,l=r._id.charAt(0),u,c,f=0;f<s.length;f++)if(u=s[f],c="images["+f+"].",u[l+"ref"]===r._id){var h=u[l],d=u["size"+l],v=null,_=null;if(a){v=wgt(h,r.range);var b=d/Math.pow(10,v)/2;_=2*Math.log(b+Math.sqrt(1+b*b))/Math.LN10}else v=Math.pow(10,h),_=v*(Math.pow(10,d/2)-Math.pow(10,-d/2));k0e(v)?k0e(_)||(_=null):(v=null,_=null),i(c+l,v),i(c+"size"+l,_)}}}});var I0e=ye((Ysr,P0e)=>{"use strict";P0e.exports={moduleType:"component",name:"images",layoutAttributes:HU(),supplyLayoutDefaults:T0e(),includeBasePlot:qM()("images"),draw:E0e(),convertCoords:L0e()}});var _I=ye((Ksr,R0e)=>{"use strict";R0e.exports={name:"updatemenus",containerClassName:"updatemenu-container",headerGroupClassName:"updatemenu-header-group",headerClassName:"updatemenu-header",headerArrowClassName:"updatemenu-header-arrow",dropdownButtonGroupClassName:"updatemenu-dropdown-button-group",dropdownButtonClassName:"updatemenu-dropdown-button",buttonClassName:"updatemenu-button",itemRectClassName:"updatemenu-item-rect",itemTextClassName:"updatemenu-item-text",menuIndexAttrName:"updatemenu-active-index",autoMarginIdRoot:"updatemenu-",blankHeaderOpts:{label:"  "},minWidth:30,minHeight:30,textPadX:24,arrowPadX:16,rx:2,ry:2,textOffsetX:12,textOffsetY:3,arrowOffsetX:4,gapButtonHeader:5,gapButton:2,activeColor:"#F4FAFF",hoverColor:"#F4FAFF",arrowSymbol:{left:"\u25C4",right:"\u25BA",up:"\u25B2",down:"\u25BC"}}});var WU=ye((Jsr,F0e)=>{"use strict";var Tgt=ec(),Agt=Lh(),Sgt=Ao().extendFlat,Mgt=mc().overrideAll,Egt=E6(),D0e=vl().templatedArray,kgt=D0e("button",{visible:{valType:"boolean"},method:{valType:"enumerated",values:["restyle","relayout","animate","update","skip"],dflt:"restyle"},args:{valType:"info_array",freeLength:!0,items:[{valType:"any"},{valType:"any"},{valType:"any"}]},args2:{valType:"info_array",freeLength:!0,items:[{valType:"any"},{valType:"any"},{valType:"any"}]},label:{valType:"string",dflt:""},execute:{valType:"boolean",dflt:!0}});F0e.exports=Mgt(D0e("updatemenu",{_arrayAttrRegexps:[/^updatemenus\[(0|[1-9][0-9]+)\]\.buttons/],visible:{valType:"boolean"},type:{valType:"enumerated",values:["dropdown","buttons"],dflt:"dropdown"},direction:{valType:"enumerated",values:["left","right","up","down"],dflt:"down"},active:{valType:"integer",min:-1,dflt:0},showactive:{valType:"boolean",dflt:!0},buttons:kgt,x:{valType:"number",min:-2,max:3,dflt:-.05},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"right"},y:{valType:"number",min:-2,max:3,dflt:1},yanchor:{valType:"enumerated",values:["auto","top","middle","bottom"],dflt:"top"},pad:Sgt(Egt({editType:"arraydraw"}),{}),font:Tgt({}),bgcolor:{valType:"color"},bordercolor:{valType:"color",dflt:Agt.borderLine},borderwidth:{valType:"number",min:0,dflt:1,editType:"arraydraw"}}),"arraydraw","from-root")});var B0e=ye(($sr,q0e)=>{"use strict";var xI=Dr(),z0e=Yd(),O0e=WU(),Cgt=_I(),Lgt=Cgt.name,Pgt=O0e.buttons;q0e.exports=function(t,r){var n={name:Lgt,handleItemDefaults:Igt};z0e(t,r,n)};function Igt(e,t,r){function n(o,s){return xI.coerce(e,t,O0e,o,s)}var i=z0e(e,t,{name:"buttons",handleItemDefaults:Rgt}),a=n("visible",i.length>0);a&&(n("active"),n("direction"),n("type"),n("showactive"),n("x"),n("y"),xI.noneOrAll(e,t,["x","y"]),n("xanchor"),n("yanchor"),n("pad.t"),n("pad.r"),n("pad.b"),n("pad.l"),xI.coerceFont(n,"font",r.font),n("bgcolor",r.paper_bgcolor),n("bordercolor"),n("borderwidth"))}function Rgt(e,t){function r(i,a){return xI.coerce(e,t,Pgt,i,a)}var n=r("visible",e.method==="skip"||Array.isArray(e.args));n&&(r("method"),r("args"),r("args2"),r("label"),r("execute"))}});var V0e=ye((Qsr,U0e)=>{"use strict";U0e.exports=Mf;var qg=Oa(),N0e=ka(),_T=So(),bI=Dr();function Mf(e,t,r){this.gd=e,this.container=t,this.id=r,this.position=null,this.translateX=null,this.translateY=null,this.hbar=null,this.vbar=null,this.bg=this.container.selectAll("rect.scrollbox-bg").data([0]),this.bg.exit().on(".drag",null).on("wheel",null).remove(),this.bg.enter().append("rect").classed("scrollbox-bg",!0).style("pointer-events","all").attr({opacity:0,x:0,y:0,width:0,height:0})}Mf.barWidth=2;Mf.barLength=20;Mf.barRadius=2;Mf.barPad=1;Mf.barColor="#808BA4";Mf.prototype.enable=function(t,r,n){var i=this.gd._fullLayout,a=i.width,o=i.height;this.position=t;var s=this.position.l,l=this.position.w,u=this.position.t,c=this.position.h,f=this.position.direction,h=f==="down",d=f==="left",v=f==="right",_=f==="up",b=l,p=c,k,E,S,L;!h&&!d&&!v&&!_&&(this.position.direction="down",h=!0);var x=h||_;x?(k=s,E=k+b,h?(S=u,L=Math.min(S+p,o),p=L-S):(L=u+p,S=Math.max(L-p,0),p=L-S)):(S=u,L=S+p,d?(E=s+b,k=Math.max(E-b,0),b=E-k):(k=s,E=Math.min(k+b,a),b=E-k)),this._box={l:k,t:S,w:b,h:p};var C=l>b,M=Mf.barLength+2*Mf.barPad,g=Mf.barWidth+2*Mf.barPad,P=s,T=u+c;T+g>o&&(T=o-g);var z=this.container.selectAll("rect.scrollbar-horizontal").data(C?[0]:[]);z.exit().on(".drag",null).remove(),z.enter().append("rect").classed("scrollbar-horizontal",!0).call(N0e.fill,Mf.barColor),C?(this.hbar=z.attr({rx:Mf.barRadius,ry:Mf.barRadius,x:P,y:T,width:M,height:g}),this._hbarXMin=P+M/2,this._hbarTranslateMax=b-M):(delete this.hbar,delete this._hbarXMin,delete this._hbarTranslateMax);var O=c>p,V=Mf.barWidth+2*Mf.barPad,G=Mf.barLength+2*Mf.barPad,Z=s+l,j=u;Z+V>a&&(Z=a-V);var N=this.container.selectAll("rect.scrollbar-vertical").data(O?[0]:[]);N.exit().on(".drag",null).remove(),N.enter().append("rect").classed("scrollbar-vertical",!0).call(N0e.fill,Mf.barColor),O?(this.vbar=N.attr({rx:Mf.barRadius,ry:Mf.barRadius,x:Z,y:j,width:V,height:G}),this._vbarYMin=j+G/2,this._vbarTranslateMax=p-G):(delete this.vbar,delete this._vbarYMin,delete this._vbarTranslateMax);var H=this.id,te=k-.5,oe=O?E+V+.5:E+.5,_e=S-.5,Ee=C?L+g+.5:L+.5,Ce=i._topdefs.selectAll("#"+H).data(C||O?[0]:[]);if(Ce.exit().remove(),Ce.enter().append("clipPath").attr("id",H).append("rect"),C||O?(this._clipRect=Ce.select("rect").attr({x:Math.floor(te),y:Math.floor(_e),width:Math.ceil(oe)-Math.floor(te),height:Math.ceil(Ee)-Math.floor(_e)}),this.container.call(_T.setClipUrl,H,this.gd),this.bg.attr({x:s,y:u,width:l,height:c})):(this.bg.attr({width:0,height:0}),this.container.on("wheel",null).on(".drag",null).call(_T.setClipUrl,null),delete this._clipRect),C||O){var me=qg.behavior.drag().on("dragstart",function(){qg.event.sourceEvent.preventDefault()}).on("drag",this._onBoxDrag.bind(this));this.container.on("wheel",null).on("wheel",this._onBoxWheel.bind(this)).on(".drag",null).call(me);var ie=qg.behavior.drag().on("dragstart",function(){qg.event.sourceEvent.preventDefault(),qg.event.sourceEvent.stopPropagation()}).on("drag",this._onBarDrag.bind(this));C&&this.hbar.on(".drag",null).call(ie),O&&this.vbar.on(".drag",null).call(ie)}this.setTranslate(r,n)};Mf.prototype.disable=function(){(this.hbar||this.vbar)&&(this.bg.attr({width:0,height:0}),this.container.on("wheel",null).on(".drag",null).call(_T.setClipUrl,null),delete this._clipRect),this.hbar&&(this.hbar.on(".drag",null),this.hbar.remove(),delete this.hbar,delete this._hbarXMin,delete this._hbarTranslateMax),this.vbar&&(this.vbar.on(".drag",null),this.vbar.remove(),delete this.vbar,delete this._vbarYMin,delete this._vbarTranslateMax)};Mf.prototype._onBoxDrag=function(){var t=this.translateX,r=this.translateY;this.hbar&&(t-=qg.event.dx),this.vbar&&(r-=qg.event.dy),this.setTranslate(t,r)};Mf.prototype._onBoxWheel=function(){var t=this.translateX,r=this.translateY;this.hbar&&(t+=qg.event.deltaY),this.vbar&&(r+=qg.event.deltaY),this.setTranslate(t,r)};Mf.prototype._onBarDrag=function(){var t=this.translateX,r=this.translateY;if(this.hbar){var n=t+this._hbarXMin,i=n+this._hbarTranslateMax,a=bI.constrain(qg.event.x,n,i),o=(a-n)/(i-n),s=this.position.w-this._box.w;t=o*s}if(this.vbar){var l=r+this._vbarYMin,u=l+this._vbarTranslateMax,c=bI.constrain(qg.event.y,l,u),f=(c-l)/(u-l),h=this.position.h-this._box.h;r=f*h}this.setTranslate(t,r)};Mf.prototype.setTranslate=function(t,r){var n=this.position.w-this._box.w,i=this.position.h-this._box.h;if(t=bI.constrain(t||0,0,n),r=bI.constrain(r||0,0,i),this.translateX=t,this.translateY=r,this.container.call(_T.setTranslate,this._box.l-this.position.l-t,this._box.t-this.position.t-r),this._clipRect&&this._clipRect.attr({x:Math.floor(this.position.l+t-.5),y:Math.floor(this.position.t+r-.5)}),this.hbar){var a=t/n;this.hbar.call(_T.setTranslate,t+a*this._hbarTranslateMax,r)}if(this.vbar){var o=r/i;this.vbar.call(_T.setTranslate,t,r+o*this._vbarTranslateMax)}}});var $0e=ye((elr,J0e)=>{"use strict";var xT=Oa(),s4=Mc(),l4=ka(),bT=So(),e0=Dr(),wI=ru(),Dgt=vl().arrayEditor,H0e=$h().LINE_SPACING,es=_I(),Fgt=V0e();J0e.exports=function(t){var r=t._fullLayout,n=e0.filterVisible(r[es.name]);function i(h){s4.autoMargin(t,Y0e(h))}var a=r._menulayer.selectAll("g."+es.containerClassName).data(n.length>0?[0]:[]);if(a.enter().append("g").classed(es.containerClassName,!0).style("cursor","pointer"),a.exit().each(function(){xT.select(this).selectAll("g."+es.headerGroupClassName).each(i)}).remove(),n.length!==0){var o=a.selectAll("g."+es.headerGroupClassName).data(n,zgt);o.enter().append("g").classed(es.headerGroupClassName,!0);for(var s=e0.ensureSingle(a,"g",es.dropdownButtonGroupClassName,function(h){h.style("pointer-events","all")}),l=0;l<n.length;l++){var u=n[l];Vgt(t,u)}var c="updatemenus"+r._uid,f=new Fgt(t,s,c);o.enter().size()&&(s.node().parentNode.appendChild(s.node()),s.call(ZU)),o.exit().each(function(h){s.call(ZU),i(h)}).remove(),o.each(function(h){var d=xT.select(this),v=h.type==="dropdown"?s:null;s4.manageCommandObserver(t,h,h.buttons,function(_){XU(t,h,h.buttons[_.index],d,v,f,_.index,!0)}),h.type==="dropdown"?(W0e(t,d,s,f,h),j0e(s,h)&&u4(t,d,s,f,h)):u4(t,d,null,null,h)})}};function zgt(e){return e._index}function Ogt(e){return+e.attr(es.menuIndexAttrName)==-1}function j0e(e,t){return+e.attr(es.menuIndexAttrName)===t._index}function XU(e,t,r,n,i,a,o,s){t.active=o,Dgt(e.layout,es.name,t).applyUpdate("active",o),t.type==="buttons"?u4(e,n,null,null,t):t.type==="dropdown"&&(i.attr(es.menuIndexAttrName,"-1"),W0e(e,n,i,a,t),s||u4(e,n,i,a,t))}function W0e(e,t,r,n,i){var a=e0.ensureSingle(t,"g",es.headerClassName,function(h){h.style("pointer-events","all")}),o=i._dims,s=i.active,l=i.buttons[s]||es.blankHeaderOpts,u={y:i.pad.t,yPad:0,x:i.pad.l,xPad:0,index:0},c={width:o.headerWidth,height:o.headerHeight};a.call(YU,i,l,e).call(K0e,i,u,c);var f=e0.ensureSingle(t,"text",es.headerArrowClassName,function(h){h.attr("text-anchor","end").call(bT.font,i.font).text(es.arrowSymbol[i.direction])});f.attr({x:o.headerWidth-es.arrowOffsetX+i.pad.l,y:o.headerHeight/2+es.textOffsetY+i.pad.t}),a.on("click",function(){r.call(ZU,String(j0e(r,i)?-1:i._index)),u4(e,t,r,n,i)}),a.on("mouseover",function(){a.call(X0e)}),a.on("mouseout",function(){a.call(Z0e,i)}),bT.setTranslate(t,o.lx,o.ly)}function u4(e,t,r,n,i){r||(r=t,r.attr("pointer-events","all"));var a=!Ogt(r)||i.type==="buttons"?i.buttons:[],o=i.type==="dropdown"?es.dropdownButtonClassName:es.buttonClassName,s=r.selectAll("g."+o).data(e0.filterVisible(a)),l=s.enter().append("g").classed(o,!0),u=s.exit();i.type==="dropdown"?(l.attr("opacity","0").transition().attr("opacity","1"),u.transition().attr("opacity","0").remove()):u.remove();var c=0,f=0,h=i._dims,d=["up","down"].indexOf(i.direction)!==-1;i.type==="dropdown"&&(d?f=h.headerHeight+es.gapButtonHeader:c=h.headerWidth+es.gapButtonHeader),i.type==="dropdown"&&i.direction==="up"&&(f=-es.gapButtonHeader+es.gapButton-h.openHeight),i.type==="dropdown"&&i.direction==="left"&&(c=-es.gapButtonHeader+es.gapButton-h.openWidth);var v={x:h.lx+c+i.pad.l,y:h.ly+f+i.pad.t,yPad:es.gapButton,xPad:es.gapButton,index:0},_={l:v.x+i.borderwidth,t:v.y+i.borderwidth};s.each(function(b,p){var k=xT.select(this);k.call(YU,i,b,e).call(K0e,i,v),k.on("click",function(){xT.event.defaultPrevented||(b.execute&&(b.args2&&i.active===p?(XU(e,i,b,t,r,n,-1),s4.executeAPICommand(e,b.method,b.args2)):(XU(e,i,b,t,r,n,p),s4.executeAPICommand(e,b.method,b.args))),e.emit("plotly_buttonclicked",{menu:i,button:b,active:i.active}))}),k.on("mouseover",function(){k.call(X0e)}),k.on("mouseout",function(){k.call(Z0e,i),s.call(G0e,i)})}),s.call(G0e,i),d?(_.w=Math.max(h.openWidth,h.headerWidth),_.h=v.y-_.t):(_.w=v.x-_.l,_.h=Math.max(h.openHeight,h.headerHeight)),_.direction=i.direction,n&&(s.size()?qgt(e,t,r,n,i,_):Bgt(n))}function qgt(e,t,r,n,i,a){var o=i.direction,s=o==="up"||o==="down",l=i._dims,u=i.active,c,f,h;if(s)for(f=0,h=0;h<u;h++)f+=l.heights[h]+es.gapButton;else for(c=0,h=0;h<u;h++)c+=l.widths[h]+es.gapButton;n.enable(a,c,f),n.hbar&&n.hbar.attr("opacity","0").transition().attr("opacity","1"),n.vbar&&n.vbar.attr("opacity","0").transition().attr("opacity","1")}function Bgt(e){var t=!!e.hbar,r=!!e.vbar;t&&e.hbar.transition().attr("opacity","0").each("end",function(){t=!1,r||e.disable()}),r&&e.vbar.transition().attr("opacity","0").each("end",function(){r=!1,t||e.disable()})}function YU(e,t,r,n){e.call(Ngt,t).call(Ugt,t,r,n)}function Ngt(e,t){var r=e0.ensureSingle(e,"rect",es.itemRectClassName,function(n){n.attr({rx:es.rx,ry:es.ry,"shape-rendering":"crispEdges"})});r.call(l4.stroke,t.bordercolor).call(l4.fill,t.bgcolor).style("stroke-width",t.borderwidth+"px")}function Ugt(e,t,r,n){var i=e0.ensureSingle(e,"text",es.itemTextClassName,function(s){s.attr({"text-anchor":"start","data-notex":1})}),a=r.label,o=n._fullLayout._meta;o&&(a=e0.templateString(a,o)),i.call(bT.font,t.font).text(a).call(wI.convertToTspans,n)}function G0e(e,t){var r=t.active;e.each(function(n,i){var a=xT.select(this);i===r&&t.showactive&&a.select("rect."+es.itemRectClassName).call(l4.fill,es.activeColor)})}function X0e(e){e.select("rect."+es.itemRectClassName).call(l4.fill,es.hoverColor)}function Z0e(e,t){e.select("rect."+es.itemRectClassName).call(l4.fill,t.bgcolor)}function Vgt(e,t){var r=t._dims={width1:0,height1:0,heights:[],widths:[],totalWidth:0,totalHeight:0,openWidth:0,openHeight:0,lx:0,ly:0},n=bT.tester.selectAll("g."+es.dropdownButtonClassName).data(e0.filterVisible(t.buttons));n.enter().append("g").classed(es.dropdownButtonClassName,!0);var i=["up","down"].indexOf(t.direction)!==-1;n.each(function(c,f){var h=xT.select(this);h.call(YU,t,c,e);var d=h.select("."+es.itemTextClassName),v=d.node()&&bT.bBox(d.node()).width,_=Math.max(v+es.textPadX,es.minWidth),b=t.font.size*H0e,p=wI.lineCount(d),k=Math.max(b*p,es.minHeight)+es.textOffsetY;k=Math.ceil(k),_=Math.ceil(_),r.widths[f]=_,r.heights[f]=k,r.height1=Math.max(r.height1,k),r.width1=Math.max(r.width1,_),i?(r.totalWidth=Math.max(r.totalWidth,_),r.openWidth=r.totalWidth,r.totalHeight+=k+es.gapButton,r.openHeight+=k+es.gapButton):(r.totalWidth+=_+es.gapButton,r.openWidth+=_+es.gapButton,r.totalHeight=Math.max(r.totalHeight,k),r.openHeight=r.totalHeight)}),i?r.totalHeight-=es.gapButton:r.totalWidth-=es.gapButton,r.headerWidth=r.width1+es.arrowPadX,r.headerHeight=r.height1,t.type==="dropdown"&&(i?(r.width1+=es.arrowPadX,r.totalHeight=r.height1):r.totalWidth=r.width1,r.totalWidth+=es.arrowPadX),n.remove();var a=r.totalWidth+t.pad.l+t.pad.r,o=r.totalHeight+t.pad.t+t.pad.b,s=e._fullLayout._size;r.lx=s.l+s.w*t.x,r.ly=s.t+s.h*(1-t.y);var l="left";e0.isRightAnchor(t)&&(r.lx-=a,l="right"),e0.isCenterAnchor(t)&&(r.lx-=a/2,l="center");var u="top";e0.isBottomAnchor(t)&&(r.ly-=o,u="bottom"),e0.isMiddleAnchor(t)&&(r.ly-=o/2,u="middle"),r.totalWidth=Math.ceil(r.totalWidth),r.totalHeight=Math.ceil(r.totalHeight),r.lx=Math.round(r.lx),r.ly=Math.round(r.ly),s4.autoMargin(e,Y0e(t),{x:t.x,y:t.y,l:a*({right:1,center:.5}[l]||0),r:a*({left:1,center:.5}[l]||0),b:o*({top:1,middle:.5}[u]||0),t:o*({bottom:1,middle:.5}[u]||0)})}function Y0e(e){return es.autoMarginIdRoot+e._index}function K0e(e,t,r,n){n=n||{};var i=e.select("."+es.itemRectClassName),a=e.select("."+es.itemTextClassName),o=t.borderwidth,s=r.index,l=t._dims;bT.setTranslate(e,o+r.x,o+r.y);var u=["up","down"].indexOf(t.direction)!==-1,c=n.height||(u?l.heights[s]:l.height1);i.attr({x:0,y:0,width:n.width||(u?l.width1:l.widths[s]),height:c});var f=t.font.size*H0e,h=wI.lineCount(a),d=(h-1)*f/2;wI.positionText(a,es.textOffsetX,c/2-d+es.textOffsetY),u?r.y+=l.heights[s]+r.yPad:r.x+=l.widths[s]+r.xPad,r.index++}function ZU(e,t){e.attr(es.menuIndexAttrName,t||"-1").selectAll("g."+es.dropdownButtonClassName).remove()}});var ege=ye((tlr,Q0e)=>{"use strict";var Ggt=_I();Q0e.exports={moduleType:"component",name:Ggt.name,layoutAttributes:WU(),supplyLayoutDefaults:B0e(),draw:$0e()}});var c4=ye((rlr,tge)=>{"use strict";tge.exports={name:"sliders",containerClassName:"slider-container",groupClassName:"slider-group",inputAreaClass:"slider-input-area",railRectClass:"slider-rail-rect",railTouchRectClass:"slider-rail-touch-rect",gripRectClass:"slider-grip-rect",tickRectClass:"slider-tick-rect",inputProxyClass:"slider-input-proxy",labelsClass:"slider-labels",labelGroupClass:"slider-label-group",labelClass:"slider-label",currentValueClass:"slider-current-value",railHeight:5,menuIndexAttrName:"slider-active-index",autoMarginIdRoot:"slider-",minWidth:30,minHeight:30,textPadX:40,arrowOffsetX:4,railRadius:2,railWidth:5,railBorder:4,railBorderWidth:1,railBorderColor:"#bec8d9",railBgColor:"#f8fafc",railInset:8,stepInset:10,gripRadius:10,gripWidth:20,gripHeight:20,gripBorder:20,gripBorderWidth:1,gripBorderColor:"#bec8d9",gripBgColor:"#f6f8fa",gripBgActiveColor:"#dbdde0",labelPadding:8,labelOffset:0,tickWidth:1,tickColor:"#333",tickOffset:25,tickLength:7,minorTickOffset:25,minorTickColor:"#333",minorTickLength:4,currentValuePadding:8,currentValueInset:0}});var KU=ye((ilr,nge)=>{"use strict";var rge=ec(),Hgt=E6(),jgt=Ao().extendDeepAll,Wgt=mc().overrideAll,Xgt=US(),ige=vl().templatedArray,$b=c4(),Zgt=ige("step",{visible:{valType:"boolean",dflt:!0},method:{valType:"enumerated",values:["restyle","relayout","animate","update","skip"],dflt:"restyle"},args:{valType:"info_array",freeLength:!0,items:[{valType:"any"},{valType:"any"},{valType:"any"}]},label:{valType:"string"},value:{valType:"string"},execute:{valType:"boolean",dflt:!0}});nge.exports=Wgt(ige("slider",{visible:{valType:"boolean",dflt:!0},active:{valType:"number",min:0,dflt:0},steps:Zgt,lenmode:{valType:"enumerated",values:["fraction","pixels"],dflt:"fraction"},len:{valType:"number",min:0,dflt:1},x:{valType:"number",min:-2,max:3,dflt:0},pad:jgt(Hgt({editType:"arraydraw"}),{},{t:{dflt:20}}),xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"left"},y:{valType:"number",min:-2,max:3,dflt:0},yanchor:{valType:"enumerated",values:["auto","top","middle","bottom"],dflt:"top"},transition:{duration:{valType:"number",min:0,dflt:150},easing:{valType:"enumerated",values:Xgt.transition.easing.values,dflt:"cubic-in-out"}},currentvalue:{visible:{valType:"boolean",dflt:!0},xanchor:{valType:"enumerated",values:["left","center","right"],dflt:"left"},offset:{valType:"number",dflt:10},prefix:{valType:"string"},suffix:{valType:"string"},font:rge({})},font:rge({}),activebgcolor:{valType:"color",dflt:$b.gripBgActiveColor},bgcolor:{valType:"color",dflt:$b.railBgColor},bordercolor:{valType:"color",dflt:$b.railBorderColor},borderwidth:{valType:"number",min:0,dflt:$b.railBorderWidth},ticklen:{valType:"number",min:0,dflt:$b.tickLength},tickcolor:{valType:"color",dflt:$b.tickColor},tickwidth:{valType:"number",min:0,dflt:1},minorticklen:{valType:"number",min:0,dflt:$b.minorTickLength}}),"arraydraw","from-root")});var lge=ye((nlr,sge)=>{"use strict";var wT=Dr(),age=Yd(),oge=KU(),Ygt=c4(),Kgt=Ygt.name,Jgt=oge.steps;sge.exports=function(t,r){age(t,r,{name:Kgt,handleItemDefaults:$gt})};function $gt(e,t,r){function n(f,h){return wT.coerce(e,t,oge,f,h)}for(var i=age(e,t,{name:"steps",handleItemDefaults:Qgt}),a=0,o=0;o<i.length;o++)i[o].visible&&a++;var s;if(a<2?s=t.visible=!1:s=n("visible"),!!s){t._stepCount=a;var l=t._visibleSteps=wT.filterVisible(i),u=n("active");(i[u]||{}).visible||(t.active=l[0]._index),n("x"),n("y"),wT.noneOrAll(e,t,["x","y"]),n("xanchor"),n("yanchor"),n("len"),n("lenmode"),n("pad.t"),n("pad.r"),n("pad.b"),n("pad.l"),wT.coerceFont(n,"font",r.font);var c=n("currentvalue.visible");c&&(n("currentvalue.xanchor"),n("currentvalue.prefix"),n("currentvalue.suffix"),n("currentvalue.offset"),wT.coerceFont(n,"currentvalue.font",t.font)),n("transition.duration"),n("transition.easing"),n("bgcolor"),n("activebgcolor"),n("bordercolor"),n("borderwidth"),n("ticklen"),n("tickwidth"),n("tickcolor"),n("minorticklen")}}function Qgt(e,t){function r(a,o){return wT.coerce(e,t,Jgt,a,o)}var n;if(e.method!=="skip"&&!Array.isArray(e.args)?n=t.visible=!1:n=r("visible"),n){r("method"),r("args");var i=r("label","step-"+t._index);r("value",i),r("execute")}}});var yge=ye((alr,mge)=>{"use strict";var Bg=Oa(),TI=Mc(),__=ka(),Ng=So(),t0=Dr(),emt=t0.strTranslate,f4=ru(),tmt=vl().arrayEditor,Rs=c4(),QU=$h(),fge=QU.LINE_SPACING,JU=QU.FROM_TL,$U=QU.FROM_BR;mge.exports=function(t){var r=t._context.staticPlot,n=t._fullLayout,i=rmt(n,t),a=n._infolayer.selectAll("g."+Rs.containerClassName).data(i.length>0?[0]:[]);a.enter().append("g").classed(Rs.containerClassName,!0).style("cursor",r?null:"ew-resize");function o(c){c._commandObserver&&(c._commandObserver.remove(),delete c._commandObserver),TI.autoMargin(t,hge(c))}if(a.exit().each(function(){Bg.select(this).selectAll("g."+Rs.groupClassName).each(o)}).remove(),i.length!==0){var s=a.selectAll("g."+Rs.groupClassName).data(i,imt);s.enter().append("g").classed(Rs.groupClassName,!0),s.exit().each(o).remove();for(var l=0;l<i.length;l++){var u=i[l];nmt(t,u)}s.each(function(c){var f=Bg.select(this);umt(c),TI.manageCommandObserver(t,c,c._visibleSteps,function(h){var d=f.data()[0];d.active!==h.index&&(d._dragging||vge(t,f,d,h.index,!1,!0))}),amt(t,Bg.select(this),c)})}};function hge(e){return Rs.autoMarginIdRoot+e._index}function rmt(e,t){for(var r=e[Rs.name],n=[],i=0;i<r.length;i++){var a=r[i];a.visible&&(a._gd=t,n.push(a))}return n}function imt(e){return e._index}function nmt(e,t){var r=Ng.tester.selectAll("g."+Rs.labelGroupClass).data(t._visibleSteps);r.enter().append("g").classed(Rs.labelGroupClass,!0);var n=0,i=0;r.each(function(v){var _=Bg.select(this),b=dge(_,{step:v},t),p=b.node();if(p){var k=Ng.bBox(p);i=Math.max(i,k.height),n=Math.max(n,k.width)}}),r.remove();var a=t._dims={};a.inputAreaWidth=Math.max(Rs.railWidth,Rs.gripHeight);var o=e._fullLayout._size;a.lx=o.l+o.w*t.x,a.ly=o.t+o.h*(1-t.y),t.lenmode==="fraction"?a.outerLength=Math.round(o.w*t.len):a.outerLength=t.len,a.inputAreaStart=0,a.inputAreaLength=Math.round(a.outerLength-t.pad.l-t.pad.r);var s=a.inputAreaLength-2*Rs.stepInset,l=s/(t._stepCount-1),u=n+Rs.labelPadding;if(a.labelStride=Math.max(1,Math.ceil(u/l)),a.labelHeight=i,a.currentValueMaxWidth=0,a.currentValueHeight=0,a.currentValueTotalHeight=0,a.currentValueMaxLines=1,t.currentvalue.visible){var c=Ng.tester.append("g");r.each(function(v){var _=AI(c,t,v.label),b=_.node()&&Ng.bBox(_.node())||{width:0,height:0},p=f4.lineCount(_);a.currentValueMaxWidth=Math.max(a.currentValueMaxWidth,Math.ceil(b.width)),a.currentValueHeight=Math.max(a.currentValueHeight,Math.ceil(b.height)),a.currentValueMaxLines=Math.max(a.currentValueMaxLines,p)}),a.currentValueTotalHeight=a.currentValueHeight+t.currentvalue.offset,c.remove()}a.height=a.currentValueTotalHeight+Rs.tickOffset+t.ticklen+Rs.labelOffset+a.labelHeight+t.pad.t+t.pad.b;var f="left";t0.isRightAnchor(t)&&(a.lx-=a.outerLength,f="right"),t0.isCenterAnchor(t)&&(a.lx-=a.outerLength/2,f="center");var h="top";t0.isBottomAnchor(t)&&(a.ly-=a.height,h="bottom"),t0.isMiddleAnchor(t)&&(a.ly-=a.height/2,h="middle"),a.outerLength=Math.ceil(a.outerLength),a.height=Math.ceil(a.height),a.lx=Math.round(a.lx),a.ly=Math.round(a.ly);var d={y:t.y,b:a.height*$U[h],t:a.height*JU[h]};t.lenmode==="fraction"?(d.l=0,d.xl=t.x-t.len*JU[f],d.r=0,d.xr=t.x+t.len*$U[f]):(d.x=t.x,d.l=a.outerLength*JU[f],d.r=a.outerLength*$U[f]),TI.autoMargin(e,hge(t),d)}function amt(e,t,r){(r.steps[r.active]||{}).visible||(r.active=r._visibleSteps[0]._index),t.call(AI,r).call(fmt,r).call(smt,r).call(lmt,r).call(cmt,e,r).call(omt,e,r);var n=r._dims;Ng.setTranslate(t,n.lx+r.pad.l,n.ly+r.pad.t),t.call(gge,r,!1),t.call(AI,r)}function AI(e,t,r){if(t.currentvalue.visible){var n=t._dims,i,a;switch(t.currentvalue.xanchor){case"right":i=n.inputAreaLength-Rs.currentValueInset-n.currentValueMaxWidth,a="left";break;case"center":i=n.inputAreaLength*.5,a="middle";break;default:i=Rs.currentValueInset,a="left"}var o=t0.ensureSingle(e,"text",Rs.labelClass,function(h){h.attr({"text-anchor":a,"data-notex":1})}),s=t.currentvalue.prefix?t.currentvalue.prefix:"";if(typeof r=="string")s+=r;else{var l=t.steps[t.active].label,u=t._gd._fullLayout._meta;u&&(l=t0.templateString(l,u)),s+=l}t.currentvalue.suffix&&(s+=t.currentvalue.suffix),o.call(Ng.font,t.currentvalue.font).text(s).call(f4.convertToTspans,t._gd);var c=f4.lineCount(o),f=(n.currentValueMaxLines+1-c)*t.currentvalue.font.size*fge;return f4.positionText(o,i,f),o}}function omt(e,t,r){var n=t0.ensureSingle(e,"rect",Rs.gripRectClass,function(i){i.call(pge,t,e,r).style("pointer-events","all")});n.attr({width:Rs.gripWidth,height:Rs.gripHeight,rx:Rs.gripRadius,ry:Rs.gripRadius}).call(__.stroke,r.bordercolor).call(__.fill,r.bgcolor).style("stroke-width",r.borderwidth+"px")}function dge(e,t,r){var n=t0.ensureSingle(e,"text",Rs.labelClass,function(o){o.attr({"text-anchor":"middle","data-notex":1})}),i=t.step.label,a=r._gd._fullLayout._meta;return a&&(i=t0.templateString(i,a)),n.call(Ng.font,r.font).text(i).call(f4.convertToTspans,r._gd),n}function smt(e,t){var r=t0.ensureSingle(e,"g",Rs.labelsClass),n=t._dims,i=r.selectAll("g."+Rs.labelGroupClass).data(n.labelSteps);i.enter().append("g").classed(Rs.labelGroupClass,!0),i.exit().remove(),i.each(function(a){var o=Bg.select(this);o.call(dge,a,t),Ng.setTranslate(o,eV(t,a.fraction),Rs.tickOffset+t.ticklen+t.font.size*fge+Rs.labelOffset+n.currentValueTotalHeight)})}function uge(e,t,r,n,i){var a=Math.round(n*(r._stepCount-1)),o=r._visibleSteps[a]._index;o!==r.active&&vge(e,t,r,o,!0,i)}function vge(e,t,r,n,i,a){var o=r.active;r.active=n,tmt(e.layout,Rs.name,r).applyUpdate("active",n);var s=r.steps[r.active];t.call(gge,r,a),t.call(AI,r),e.emit("plotly_sliderchange",{slider:r,step:r.steps[r.active],interaction:i,previousActive:o}),s&&s.method&&i&&(t._nextMethod?(t._nextMethod.step=s,t._nextMethod.doCallback=i,t._nextMethod.doTransition=a):(t._nextMethod={step:s,doCallback:i,doTransition:a},t._nextMethodRaf=window.requestAnimationFrame(function(){var l=t._nextMethod.step;l.method&&(l.execute&&TI.executeAPICommand(e,l.method,l.args),t._nextMethod=null,t._nextMethodRaf=null)})))}function pge(e,t,r){if(t._context.staticPlot)return;var n=r.node(),i=Bg.select(t);function a(){return r.data()[0]}function o(){var s=a();t.emit("plotly_sliderstart",{slider:s});var l=r.select("."+Rs.gripRectClass);Bg.event.stopPropagation(),Bg.event.preventDefault(),l.call(__.fill,s.activebgcolor);var u=cge(s,Bg.mouse(n)[0]);uge(t,r,s,u,!0),s._dragging=!0;function c(){var h=a(),d=cge(h,Bg.mouse(n)[0]);uge(t,r,h,d,!1)}i.on("mousemove",c),i.on("touchmove",c);function f(){var h=a();h._dragging=!1,l.call(__.fill,h.bgcolor),i.on("mouseup",null),i.on("mousemove",null),i.on("touchend",null),i.on("touchmove",null),t.emit("plotly_sliderend",{slider:h,step:h.steps[h.active]})}i.on("mouseup",f),i.on("touchend",f)}e.on("mousedown",o),e.on("touchstart",o)}function lmt(e,t){var r=e.selectAll("rect."+Rs.tickRectClass).data(t._visibleSteps),n=t._dims;r.enter().append("rect").classed(Rs.tickRectClass,!0),r.exit().remove(),r.attr({width:t.tickwidth+"px","shape-rendering":"crispEdges"}),r.each(function(i,a){var o=a%n.labelStride===0,s=Bg.select(this);s.attr({height:o?t.ticklen:t.minorticklen}).call(__.fill,t.tickcolor),Ng.setTranslate(s,eV(t,a/(t._stepCount-1))-.5*t.tickwidth,(o?Rs.tickOffset:Rs.minorTickOffset)+n.currentValueTotalHeight)})}function umt(e){var t=e._dims;t.labelSteps=[];for(var r=e._stepCount,n=0;n<r;n+=t.labelStride)t.labelSteps.push({fraction:n/(r-1),step:e._visibleSteps[n]})}function gge(e,t,r){for(var n=e.select("rect."+Rs.gripRectClass),i=0,a=0;a<t._stepCount;a++)if(t._visibleSteps[a]._index===t.active){i=a;break}var o=eV(t,i/(t._stepCount-1));if(!t._invokingCommand){var s=n;r&&t.transition.duration>0&&(s=s.transition().duration(t.transition.duration).ease(t.transition.easing)),s.attr("transform",emt(o-Rs.gripWidth*.5,t._dims.currentValueTotalHeight))}}function eV(e,t){var r=e._dims;return r.inputAreaStart+Rs.stepInset+(r.inputAreaLength-2*Rs.stepInset)*Math.min(1,Math.max(0,t))}function cge(e,t){var r=e._dims;return Math.min(1,Math.max(0,(t-Rs.stepInset-r.inputAreaStart)/(r.inputAreaLength-2*Rs.stepInset-2*r.inputAreaStart)))}function cmt(e,t,r){var n=r._dims,i=t0.ensureSingle(e,"rect",Rs.railTouchRectClass,function(a){a.call(pge,t,e,r).style("pointer-events","all")});i.attr({width:n.inputAreaLength,height:Math.max(n.inputAreaWidth,Rs.tickOffset+r.ticklen+n.labelHeight)}).call(__.fill,r.bgcolor).attr("opacity",0),Ng.setTranslate(i,0,n.currentValueTotalHeight)}function fmt(e,t){var r=t._dims,n=r.inputAreaLength-Rs.railInset*2,i=t0.ensureSingle(e,"rect",Rs.railRectClass);i.attr({width:n,height:Rs.railWidth,rx:Rs.railRadius,ry:Rs.railRadius,"shape-rendering":"crispEdges"}).call(__.stroke,t.bordercolor).call(__.fill,t.bgcolor).style("stroke-width",t.borderwidth+"px"),Ng.setTranslate(i,Rs.railInset,(r.inputAreaWidth-Rs.railWidth)*.5+r.currentValueTotalHeight)}});var xge=ye((olr,_ge)=>{"use strict";var hmt=c4();_ge.exports={moduleType:"component",name:hmt.name,layoutAttributes:KU(),supplyLayoutDefaults:lge(),draw:yge()}});var SI=ye((slr,wge)=>{"use strict";var bge=Lh();wge.exports={bgcolor:{valType:"color",dflt:bge.background,editType:"plot"},bordercolor:{valType:"color",dflt:bge.defaultLine,editType:"plot"},borderwidth:{valType:"integer",dflt:0,min:0,editType:"plot"},autorange:{valType:"boolean",dflt:!0,editType:"calc",impliedEdits:{"range[0]":void 0,"range[1]":void 0}},range:{valType:"info_array",items:[{valType:"any",editType:"calc",impliedEdits:{"^autorange":!1}},{valType:"any",editType:"calc",impliedEdits:{"^autorange":!1}}],editType:"calc",impliedEdits:{autorange:!1}},thickness:{valType:"number",dflt:.15,min:0,max:1,editType:"plot"},visible:{valType:"boolean",dflt:!0,editType:"calc"},editType:"calc"}});var tV=ye((llr,Tge)=>{"use strict";Tge.exports={_isSubplotObj:!0,rangemode:{valType:"enumerated",values:["auto","fixed","match"],dflt:"match",editType:"calc"},range:{valType:"info_array",items:[{valType:"any",editType:"plot"},{valType:"any",editType:"plot"}],editType:"plot"},editType:"calc"}});var MI=ye((ulr,Age)=>{"use strict";Age.exports={name:"rangeslider",containerClassName:"rangeslider-container",bgClassName:"rangeslider-bg",rangePlotClassName:"rangeslider-rangeplot",maskMinClassName:"rangeslider-mask-min",maskMaxClassName:"rangeslider-mask-max",slideBoxClassName:"rangeslider-slidebox",grabberMinClassName:"rangeslider-grabber-min",grabAreaMinClassName:"rangeslider-grabarea-min",handleMinClassName:"rangeslider-handle-min",grabberMaxClassName:"rangeslider-grabber-max",grabAreaMaxClassName:"rangeslider-grabarea-max",handleMaxClassName:"rangeslider-handle-max",maskMinOppAxisClassName:"rangeslider-mask-min-opp-axis",maskMaxOppAxisClassName:"rangeslider-mask-max-opp-axis",maskColor:"rgba(0,0,0,0.4)",maskOppAxisColor:"rgba(0,0,0,0.2)",slideBoxFill:"transparent",slideBoxCursor:"ew-resize",grabAreaFill:"transparent",grabAreaCursor:"col-resize",grabAreaWidth:10,handleWidth:4,handleRadius:1,handleStrokeWidth:1,extraPad:15}});var Ege=ye(kI=>{"use strict";var dmt=hf(),vmt=ru(),Sge=MI(),pmt=$h().LINE_SPACING,EI=Sge.name;function Mge(e){var t=e&&e[EI];return t&&t.visible}kI.isVisible=Mge;kI.makeData=function(e){for(var t=dmt.list({_fullLayout:e},"x",!0),r=e.margin,n=[],i=0;i<t.length;i++){var a=t[i];if(Mge(a)){n.push(a);var o=a[EI];o._id=EI+a._id,o._height=(e.height-r.b-r.t)*o.thickness,o._offsetShift=Math.floor(o.borderwidth/2)}}e._rangeSliderData=n};kI.autoMarginOpts=function(e,t){var r=e._fullLayout,n=t[EI],i=t._id.charAt(0),a=0,o=0;if(t.side==="bottom"&&(a=t._depth,t.title.text!==r._dfltTitle[i])){o=1.5*t.title.font.size+10+n._offsetShift;var s=(t.title.text.match(vmt.BR_TAG_ALL)||[]).length;o+=s*t.title.font.size*pmt}return{x:0,y:t._counterDomainMin,l:0,r:0,t:0,b:n._height+a+Math.max(r.margin.b,o),pad:Sge.extraPad+n._offsetShift*2}}});var Pge=ye((flr,Lge)=>{"use strict";var CI=Dr(),kge=vl(),Cge=hf(),gmt=SI(),mmt=tV();Lge.exports=function(t,r,n){var i=t[n],a=r[n];if(!(i.rangeslider||r._requestRangeslider[a._id]))return;CI.isPlainObject(i.rangeslider)||(i.rangeslider={});var o=i.rangeslider,s=kge.newContainer(a,"rangeslider");function l(L,x){return CI.coerce(o,s,gmt,L,x)}var u,c;function f(L,x){return CI.coerce(u,c,mmt,L,x)}var h=l("visible");if(h){l("bgcolor",r.plot_bgcolor),l("bordercolor"),l("borderwidth"),l("thickness"),l("autorange",!a.isValidRange(o.range)),l("range");var d=r._subplots;if(d)for(var v=d.cartesian.filter(function(L){return L.slice(0,Math.max(0,L.indexOf("y")))===Cge.name2id(n)}).map(function(L){return L.slice(L.indexOf("y"),L.length)}),_=CI.simpleMap(v,Cge.id2name),b=0;b<_.length;b++){var p=_[b];u=o[p]||{},c=kge.newContainer(s,p,"yaxis");var k=r[p],E;u.range&&k.isValidRange(u.range)&&(E="fixed");var S=f("rangemode",E);S!=="match"&&f("range",k.range.slice())}s._input=o}}});var Rge=ye((hlr,Ige)=>{"use strict";var ymt=hf().list,_mt=Ag().getAutoRange,xmt=MI();Ige.exports=function(t){for(var r=ymt(t,"x",!0),n=0;n<r.length;n++){var i=r[n],a=i[xmt.name];a&&a.visible&&a.autorange&&(a._input.autorange=!0,a._input.range=a.range=_mt(t,i))}}});var Oge=ye((dlr,zge)=>{"use strict";var LI=Oa(),bmt=qa(),wmt=Mc(),Jf=Dr(),PI=Jf.strTranslate,Fge=So(),x_=ka(),Tmt=Eb(),Amt=ph(),rV=hf(),Smt=yv(),Mmt=Sg(),rl=MI();zge.exports=function(e){for(var t=e._fullLayout,r=t._rangeSliderData,n=0;n<r.length;n++){var i=r[n][rl.name];i._clipId=i._id+"-"+t._uid}function a(s){return s._name}var o=t._infolayer.selectAll("g."+rl.containerClassName).data(r,a);o.exit().each(function(s){var l=s[rl.name];t._topdefs.select("#"+l._clipId).remove()}).remove(),r.length!==0&&(o.enter().append("g").classed(rl.containerClassName,!0).attr("pointer-events","all"),o.each(function(s){var l=LI.select(this),u=s[rl.name],c=t[rV.id2name(s.anchor)],f=u[rV.id2name(s.anchor)];if(u.range){var h=Jf.simpleMap(u.range,s.r2l),d=Jf.simpleMap(s.range,s.r2l),v;d[0]<d[1]?v=[Math.min(h[0],d[0]),Math.max(h[1],d[1])]:v=[Math.max(h[0],d[0]),Math.min(h[1],d[1])],u.range=u._input.range=Jf.simpleMap(v,s.l2r)}s.cleanRange("rangeslider.range");var _=t._size,b=s.domain;u._width=_.w*(b[1]-b[0]);var p=Math.round(_.l+_.w*b[0]),k=Math.round(_.t+_.h*(1-s._counterDomainMin)+(s.side==="bottom"?s._depth:0)+u._offsetShift+rl.extraPad);l.attr("transform",PI(p,k)),u._rl=Jf.simpleMap(u.range,s.r2l);var E=u._rl[0],S=u._rl[1],L=S-E;if(u.p2d=function(G){return G/u._width*L+E},u.d2p=function(G){return(G-E)/L*u._width},s.rangebreaks){var x=s.locateBreaks(E,S);if(x.length){var C,M,g=0;for(C=0;C<x.length;C++)M=x[C],g+=M.max-M.min;var P=u._width/(S-E-g),T=[-P*E];for(C=0;C<x.length;C++)M=x[C],T.push(T[T.length-1]-P*(M.max-M.min));for(u.d2p=function(G){for(var Z=T[0],j=0;j<x.length;j++){var N=x[j];if(G>=N.max)Z=T[j+1];else if(G<N.min)break}return Z+P*G},C=0;C<x.length;C++)M=x[C],M.pmin=u.d2p(M.min),M.pmax=u.d2p(M.max);u.p2d=function(G){for(var Z=T[0],j=0;j<x.length;j++){var N=x[j];if(G>=N.pmax)Z=T[j+1];else if(G<N.pmin)break}return(G-Z)/P}}}if(f.rangemode!=="match"){var z=c.r2l(f.range[0]),O=c.r2l(f.range[1]),V=O-z;u.d2pOppAxis=function(G){return(G-z)/V*u._height}}l.call(Lmt,e,s,u).call(Pmt,e,s,u).call(Imt,e,s,u).call(Dmt,e,s,u,f).call(Fmt,e,s,u).call(zmt,e,s,u),Emt(l,e,s,u),Cmt(l,e,s,u,c,f),s.side==="bottom"&&Tmt.draw(e,s._id+"title",{propContainer:s,propName:s._name+".title.text",placeholder:t._dfltTitle.x,attributes:{x:s._offset+s._length/2,y:k+u._height+u._offsetShift+10+1.5*s.title.font.size,"text-anchor":"middle"}})}))};function Dge(e){return typeof e.clientX=="number"?e.clientX:e.touches&&e.touches.length>0?e.touches[0].clientX:0}function Emt(e,t,r,n){if(t._context.staticPlot)return;var i=e.select("rect."+rl.slideBoxClassName).node(),a=e.select("rect."+rl.grabAreaMinClassName).node(),o=e.select("rect."+rl.grabAreaMaxClassName).node();function s(){var l=LI.event,u=l.target,c=Dge(l),f=c-e.node().getBoundingClientRect().left,h=n.d2p(r._rl[0]),d=n.d2p(r._rl[1]),v=Smt.coverSlip();this.addEventListener("touchmove",_),this.addEventListener("touchend",b),v.addEventListener("mousemove",_),v.addEventListener("mouseup",b);function _(p){var k=Dge(p),E=+k-c,S,L,x;switch(u){case i:if(x="ew-resize",h+E>r._length||d+E<0)return;S=h+E,L=d+E;break;case a:if(x="col-resize",h+E>r._length)return;S=h+E,L=d;break;case o:if(x="col-resize",d+E<0)return;S=h,L=d+E;break;default:x="ew-resize",S=f,L=f+E;break}if(L<S){var C=L;L=S,S=C}n._pixelMin=S,n._pixelMax=L,Mmt(LI.select(v),x),kmt(e,t,r,n)}function b(){v.removeEventListener("mousemove",_),v.removeEventListener("mouseup",b),this.removeEventListener("touchmove",_),this.removeEventListener("touchend",b),Jf.removeElement(v)}}e.on("mousedown",s),e.on("touchstart",s)}function kmt(e,t,r,n){function i(s){return r.l2r(Jf.constrain(s,n._rl[0],n._rl[1]))}var a=i(n.p2d(n._pixelMin)),o=i(n.p2d(n._pixelMax));window.requestAnimationFrame(function(){bmt.call("_guiRelayout",t,r._name+".range",[a,o])})}function Cmt(e,t,r,n,i,a){var o=rl.handleWidth/2;function s(p){return Jf.constrain(p,0,n._width)}function l(p){return Jf.constrain(p,0,n._height)}function u(p){return Jf.constrain(p,-o,n._width+o)}var c=s(n.d2p(r._rl[0])),f=s(n.d2p(r._rl[1]));if(e.select("rect."+rl.slideBoxClassName).attr("x",c).attr("width",f-c),e.select("rect."+rl.maskMinClassName).attr("width",c),e.select("rect."+rl.maskMaxClassName).attr("x",f).attr("width",n._width-f),a.rangemode!=="match"){var h=n._height-l(n.d2pOppAxis(i._rl[1])),d=n._height-l(n.d2pOppAxis(i._rl[0]));e.select("rect."+rl.maskMinOppAxisClassName).attr("x",c).attr("height",h).attr("width",f-c),e.select("rect."+rl.maskMaxOppAxisClassName).attr("x",c).attr("y",d).attr("height",n._height-d).attr("width",f-c),e.select("rect."+rl.slideBoxClassName).attr("y",h).attr("height",d-h)}var v=.5,_=Math.round(u(c-o))-v,b=Math.round(u(f-o))+v;e.select("g."+rl.grabberMinClassName).attr("transform",PI(_,v)),e.select("g."+rl.grabberMaxClassName).attr("transform",PI(b,v))}function Lmt(e,t,r,n){var i=Jf.ensureSingle(e,"rect",rl.bgClassName,function(l){l.attr({x:0,y:0,"shape-rendering":"crispEdges"})}),a=n.borderwidth%2===0?n.borderwidth:n.borderwidth-1,o=-n._offsetShift,s=Fge.crispRound(t,n.borderwidth);i.attr({width:n._width+a,height:n._height+a,transform:PI(o,o),"stroke-width":s}).call(x_.stroke,n.bordercolor).call(x_.fill,n.bgcolor)}function Pmt(e,t,r,n){var i=t._fullLayout,a=Jf.ensureSingleById(i._topdefs,"clipPath",n._clipId,function(o){o.append("rect").attr({x:0,y:0})});a.select("rect").attr({width:n._width,height:n._height})}function Imt(e,t,r,n){var i=t.calcdata,a=e.selectAll("g."+rl.rangePlotClassName).data(r._subplotsWith,Jf.identity);a.enter().append("g").attr("class",function(s){return rl.rangePlotClassName+" "+s}).call(Fge.setClipUrl,n._clipId,t),a.order(),a.exit().remove();var o;a.each(function(s,l){var u=LI.select(this),c=l===0,f=rV.getFromId(t,s,"y"),h=f._name,d=n[h],v={data:[],layout:{xaxis:{type:r.type,domain:[0,1],range:n.range.slice(),calendar:r.calendar},width:n._width,height:n._height,margin:{t:0,b:0,l:0,r:0}},_context:t._context};r.rangebreaks&&(v.layout.xaxis.rangebreaks=r.rangebreaks),v.layout[h]={type:f.type,domain:[0,1],range:d.rangemode!=="match"?d.range.slice():f.range.slice(),calendar:f.calendar},f.rangebreaks&&(v.layout[h].rangebreaks=f.rangebreaks),wmt.supplyDefaults(v);var _=v._fullLayout.xaxis,b=v._fullLayout[h];_.clearCalc(),_.setScale(),b.clearCalc(),b.setScale();var p={id:s,plotgroup:u,xaxis:_,yaxis:b,isRangePlot:!0};c?o=p:(p.mainplot="xy",p.mainplotinfo=o),Amt.rangePlot(t,p,Rmt(i,s))})}function Rmt(e,t){for(var r=[],n=0;n<e.length;n++){var i=e[n],a=i[0].trace;a.xaxis+a.yaxis===t&&r.push(i)}return r}function Dmt(e,t,r,n,i){var a=Jf.ensureSingle(e,"rect",rl.maskMinClassName,function(u){u.attr({x:0,y:0,"shape-rendering":"crispEdges"})});a.attr("height",n._height).call(x_.fill,rl.maskColor);var o=Jf.ensureSingle(e,"rect",rl.maskMaxClassName,function(u){u.attr({y:0,"shape-rendering":"crispEdges"})});if(o.attr("height",n._height).call(x_.fill,rl.maskColor),i.rangemode!=="match"){var s=Jf.ensureSingle(e,"rect",rl.maskMinOppAxisClassName,function(u){u.attr({y:0,"shape-rendering":"crispEdges"})});s.attr("width",n._width).call(x_.fill,rl.maskOppAxisColor);var l=Jf.ensureSingle(e,"rect",rl.maskMaxOppAxisClassName,function(u){u.attr({y:0,"shape-rendering":"crispEdges"})});l.attr("width",n._width).style("border-top",rl.maskOppBorder).call(x_.fill,rl.maskOppAxisColor)}}function Fmt(e,t,r,n){if(!t._context.staticPlot){var i=Jf.ensureSingle(e,"rect",rl.slideBoxClassName,function(a){a.attr({y:0,cursor:rl.slideBoxCursor,"shape-rendering":"crispEdges"})});i.attr({height:n._height,fill:rl.slideBoxFill})}}function zmt(e,t,r,n){var i=Jf.ensureSingle(e,"g",rl.grabberMinClassName),a=Jf.ensureSingle(e,"g",rl.grabberMaxClassName),o={x:0,width:rl.handleWidth,rx:rl.handleRadius,fill:x_.background,stroke:x_.defaultLine,"stroke-width":rl.handleStrokeWidth,"shape-rendering":"crispEdges"},s={y:Math.round(n._height/4),height:Math.round(n._height/2)},l=Jf.ensureSingle(i,"rect",rl.handleMinClassName,function(d){d.attr(o)});l.attr(s);var u=Jf.ensureSingle(a,"rect",rl.handleMaxClassName,function(d){d.attr(o)});u.attr(s);var c={width:rl.grabAreaWidth,x:0,y:0,fill:rl.grabAreaFill,cursor:t._context.staticPlot?void 0:rl.grabAreaCursor},f=Jf.ensureSingle(i,"rect",rl.grabAreaMinClassName,function(d){d.attr(c)});f.attr("height",n._height);var h=Jf.ensureSingle(a,"rect",rl.grabAreaMaxClassName,function(d){d.attr(c)});h.attr("height",n._height)}});var Bge=ye((vlr,qge)=>{"use strict";var Omt=Dr(),qmt=SI(),Bmt=tV(),iV=Ege();qge.exports={moduleType:"component",name:"rangeslider",schema:{subplots:{xaxis:{rangeslider:Omt.extendFlat({},qmt,{yaxis:Bmt})}}},layoutAttributes:SI(),handleDefaults:Pge(),calcAutorange:Rge(),draw:Oge(),isVisible:iV.isVisible,makeData:iV.makeData,autoMarginOpts:iV.autoMarginOpts}});var II=ye((plr,Uge)=>{"use strict";var Nmt=ec(),Nge=Lh(),Umt=vl().templatedArray,Vmt=Umt("button",{visible:{valType:"boolean",dflt:!0,editType:"plot"},step:{valType:"enumerated",values:["month","year","day","hour","minute","second","all"],dflt:"month",editType:"plot"},stepmode:{valType:"enumerated",values:["backward","todate"],dflt:"backward",editType:"plot"},count:{valType:"number",min:0,dflt:1,editType:"plot"},label:{valType:"string",editType:"plot"},editType:"plot"});Uge.exports={visible:{valType:"boolean",editType:"plot"},buttons:Vmt,x:{valType:"number",min:-2,max:3,editType:"plot"},xanchor:{valType:"enumerated",values:["auto","left","center","right"],dflt:"left",editType:"plot"},y:{valType:"number",min:-2,max:3,editType:"plot"},yanchor:{valType:"enumerated",values:["auto","top","middle","bottom"],dflt:"bottom",editType:"plot"},font:Nmt({editType:"plot"}),bgcolor:{valType:"color",dflt:Nge.lightLine,editType:"plot"},activecolor:{valType:"color",editType:"plot"},bordercolor:{valType:"color",dflt:Nge.defaultLine,editType:"plot"},borderwidth:{valType:"number",min:0,dflt:0,editType:"plot"},editType:"plot"}});var nV=ye((glr,Vge)=>{"use strict";Vge.exports={yPad:.02,minButtonWidth:30,rx:3,ry:3,lightAmount:25,darkAmount:10}});var jge=ye((mlr,Hge)=>{"use strict";var RI=Dr(),Gmt=ka(),Hmt=vl(),jmt=Yd(),Gge=II(),aV=nV();Hge.exports=function(t,r,n,i,a){var o=t.rangeselector||{},s=Hmt.newContainer(r,"rangeselector");function l(d,v){return RI.coerce(o,s,Gge,d,v)}var u=jmt(o,s,{name:"buttons",handleItemDefaults:Wmt,calendar:a}),c=l("visible",u.length>0);if(c){var f=Xmt(r,n,i);l("x",f[0]),l("y",f[1]),RI.noneOrAll(t,r,["x","y"]),l("xanchor"),l("yanchor"),RI.coerceFont(l,"font",n.font);var h=l("bgcolor");l("activecolor",Gmt.contrast(h,aV.lightAmount,aV.darkAmount)),l("bordercolor"),l("borderwidth")}};function Wmt(e,t,r,n){var i=n.calendar;function a(l,u){return RI.coerce(e,t,Gge.buttons,l,u)}var o=a("visible");if(o){var s=a("step");s!=="all"&&(i&&i!=="gregorian"&&(s==="month"||s==="year")?t.stepmode="backward":a("stepmode"),a("count")),a("label")}}function Xmt(e,t,r){for(var n=r.filter(function(s){return t[s].anchor===e._id}),i=0,a=0;a<n.length;a++){var o=t[n[a]].domain;o&&(i=Math.max(o[1],i))}return[e.domain[0],i+aV.yPad]}});var Xge=ye((ylr,Wge)=>{"use strict";var Zmt=bO(),Ymt=Dr().titleCase;Wge.exports=function(t,r){var n=t._name,i={};if(r.step==="all")i[n+".autorange"]=!0;else{var a=Kmt(t,r);i[n+".range[0]"]=a[0],i[n+".range[1]"]=a[1]}return i};function Kmt(e,t){var r=e.range,n=new Date(e.r2l(r[1])),i=t.step,a=Zmt["utc"+Ymt(i)],o=t.count,s;switch(t.stepmode){case"backward":s=e.l2r(+a.offset(n,-o));break;case"todate":var l=a.offset(n,-o);s=e.l2r(+a.ceil(l));break}var u=r[1];return[s,u]}});var tme=ye((_lr,eme)=>{"use strict";var FI=Oa(),Jmt=qa(),$mt=Mc(),Zge=ka(),Qge=So(),Ry=Dr(),Yge=Ry.strTranslate,DI=ru(),Qmt=hf(),lV=$h(),Kge=lV.LINE_SPACING,Jge=lV.FROM_TL,$ge=lV.FROM_BR,sV=nV(),eyt=Xge();eme.exports=function(t){var r=t._fullLayout,n=r._infolayer.selectAll(".rangeselector").data(tyt(t),ryt);n.enter().append("g").classed("rangeselector",!0),n.exit().remove(),n.style({cursor:"pointer","pointer-events":"all"}),n.each(function(i){var a=FI.select(this),o=i,s=o.rangeselector,l=a.selectAll("g.button").data(Ry.filterVisible(s.buttons));l.enter().append("g").classed("button",!0),l.exit().remove(),l.each(function(u){var c=FI.select(this),f=eyt(o,u);u._isActive=iyt(o,u,f),c.call(oV,s,u),c.call(ayt,s,u,t),c.on("click",function(){t._dragged||Jmt.call("_guiRelayout",t,f)}),c.on("mouseover",function(){u._isHovered=!0,c.call(oV,s,u)}),c.on("mouseout",function(){u._isHovered=!1,c.call(oV,s,u)})}),syt(t,l,s,o._name,a)})};function tyt(e){for(var t=Qmt.list(e,"x",!0),r=[],n=0;n<t.length;n++){var i=t[n];i.rangeselector&&i.rangeselector.visible&&r.push(i)}return r}function ryt(e){return e._id}function iyt(e,t,r){if(t.step==="all")return e.autorange===!0;var n=Object.keys(r);return e.range[0]===r[n[0]]&&e.range[1]===r[n[1]]}function oV(e,t,r){var n=Ry.ensureSingle(e,"rect","selector-rect",function(i){i.attr("shape-rendering","crispEdges")});n.attr({rx:sV.rx,ry:sV.ry}),n.call(Zge.stroke,t.bordercolor).call(Zge.fill,nyt(t,r)).style("stroke-width",t.borderwidth+"px")}function nyt(e,t){return t._isActive||t._isHovered?e.activecolor:e.bgcolor}function ayt(e,t,r,n){function i(o){DI.convertToTspans(o,n)}var a=Ry.ensureSingle(e,"text","selector-text",function(o){o.attr("text-anchor","middle")});a.call(Qge.font,t.font).text(oyt(r,n._fullLayout._meta)).call(i)}function oyt(e,t){return e.label?t?Ry.templateString(e.label,t):e.label:e.step==="all"?"all":e.count+e.step.charAt(0)}function syt(e,t,r,n,i){var a=0,o=0,s=r.borderwidth;t.each(function(){var d=FI.select(this),v=d.select(".selector-text"),_=r.font.size*Kge,b=Math.max(_*DI.lineCount(v),16)+3;o=Math.max(o,b)}),t.each(function(){var d=FI.select(this),v=d.select(".selector-rect"),_=d.select(".selector-text"),b=_.node()&&Qge.bBox(_.node()).width,p=r.font.size*Kge,k=DI.lineCount(_),E=Math.max(b+10,sV.minButtonWidth);d.attr("transform",Yge(s+a,s)),v.attr({x:0,y:0,width:E,height:o}),DI.positionText(_,E/2,o/2-(k-1)*p/2+3),a+=E+5});var l=e._fullLayout._size,u=l.l+l.w*r.x,c=l.t+l.h*(1-r.y),f="left";Ry.isRightAnchor(r)&&(u-=a,f="right"),Ry.isCenterAnchor(r)&&(u-=a/2,f="center");var h="top";Ry.isBottomAnchor(r)&&(c-=o,h="bottom"),Ry.isMiddleAnchor(r)&&(c-=o/2,h="middle"),a=Math.ceil(a),o=Math.ceil(o),u=Math.round(u),c=Math.round(c),$mt.autoMargin(e,n+"-range-selector",{x:r.x,y:r.y,l:a*Jge[f],r:a*$ge[f],b:o*$ge[h],t:o*Jge[h]}),i.attr("transform",Yge(u,c))}});var ime=ye((xlr,rme)=>{"use strict";rme.exports={moduleType:"component",name:"rangeselector",schema:{subplots:{xaxis:{rangeselector:II()}}},layoutAttributes:II(),handleDefaults:jge(),draw:tme()}});var Cc=ye(uV=>{"use strict";var nme=Ao().extendFlat;uV.attributes=function(e,t){e=e||{},t=t||{};var r={valType:"info_array",editType:e.editType,items:[{valType:"number",min:0,max:1,editType:e.editType},{valType:"number",min:0,max:1,editType:e.editType}],dflt:[0,1]},n=e.name?e.name+" ":"",i=e.trace?"trace ":"subplot ",a=t.description?" "+t.description:"",o={x:nme({},r,{}),y:nme({},r,{}),editType:e.editType};return e.noGridCell||(o.row={valType:"integer",min:0,dflt:0,editType:e.editType},o.column={valType:"integer",min:0,dflt:0,editType:e.editType}),o};uV.defaults=function(e,t,r,n){var i=n&&n.x||[0,1],a=n&&n.y||[0,1],o=t.grid;if(o){var s=r("domain.column");s!==void 0&&(s<o.columns?i=o._domains.x[s]:delete e.domain.column);var l=r("domain.row");l!==void 0&&(l<o.rows?a=o._domains.y[l]:delete e.domain.row)}var u=r("domain.x",i),c=r("domain.y",a);u[0]<u[1]||(e.domain.x=i.slice()),c[0]<c[1]||(e.domain.y=a.slice())}});var fV=ye((wlr,lme)=>{"use strict";var lyt=Dr(),uyt=o3().counter,cyt=Cc().attributes,ame=hd().idRegex,fyt=vl(),cV={rows:{valType:"integer",min:1,editType:"plot"},roworder:{valType:"enumerated",values:["top to bottom","bottom to top"],dflt:"top to bottom",editType:"plot"},columns:{valType:"integer",min:1,editType:"plot"},subplots:{valType:"info_array",freeLength:!0,dimensions:2,items:{valType:"enumerated",values:[uyt("xy").toString(),""],editType:"plot"},editType:"plot"},xaxes:{valType:"info_array",freeLength:!0,items:{valType:"enumerated",values:[ame.x.toString(),""],editType:"plot"},editType:"plot"},yaxes:{valType:"info_array",freeLength:!0,items:{valType:"enumerated",values:[ame.y.toString(),""],editType:"plot"},editType:"plot"},pattern:{valType:"enumerated",values:["independent","coupled"],dflt:"coupled",editType:"plot"},xgap:{valType:"number",min:0,max:1,editType:"plot"},ygap:{valType:"number",min:0,max:1,editType:"plot"},domain:cyt({name:"grid",editType:"plot",noGridCell:!0},{}),xside:{valType:"enumerated",values:["bottom","bottom plot","top plot","top"],dflt:"bottom plot",editType:"plot"},yside:{valType:"enumerated",values:["left","left plot","right plot","right"],dflt:"left plot",editType:"plot"},editType:"plot"};function zI(e,t,r){var n=t[r+"axes"],i=Object.keys((e._splomAxes||{})[r]||{});if(Array.isArray(n))return n;if(i.length)return i}function hyt(e,t){var r=e.grid||{},n=zI(t,r,"x"),i=zI(t,r,"y");if(!e.grid&&!n&&!i)return;var a=Array.isArray(r.subplots)&&Array.isArray(r.subplots[0]),o=Array.isArray(n),s=Array.isArray(i),l=o&&n!==r.xaxes&&s&&i!==r.yaxes,u,c;a?(u=r.subplots.length,c=r.subplots[0].length):(s&&(u=i.length),o&&(c=n.length));var f=fyt.newContainer(t,"grid");function h(x,C){return lyt.coerce(r,f,cV,x,C)}var d=h("rows",u),v=h("columns",c);if(!(d*v>1)){delete t.grid;return}if(!a&&!o&&!s){var _=h("pattern")==="independent";_&&(a=!0)}f._hasSubplotGrid=a;var b=h("roworder"),p=b==="top to bottom",k=a?.2:.1,E=a?.3:.1,S,L;l&&t._splomGridDflt&&(S=t._splomGridDflt.xside,L=t._splomGridDflt.yside),f._domains={x:ome("x",h,k,S,v),y:ome("y",h,E,L,d,p)}}function ome(e,t,r,n,i,a){var o=t(e+"gap",r),s=t("domain."+e);t(e+"side",n);for(var l=new Array(i),u=s[0],c=(s[1]-u)/(i-o),f=c*(1-o),h=0;h<i;h++){var d=u+c*h;l[a?i-1-h:h]=[d,d+f]}return l}function dyt(e,t){var r=t.grid;if(!(!r||!r._domains)){var n=e.grid||{},i=t._subplots,a=r._hasSubplotGrid,o=r.rows,s=r.columns,l=r.pattern==="independent",u,c,f,h,d,v,_,b=r._axisMap={};if(a){var p=n.subplots||[];v=r.subplots=new Array(o);var k=1;for(u=0;u<o;u++){var E=v[u]=new Array(s),S=p[u]||[];for(c=0;c<s;c++)if(l?(d=k===1?"xy":"x"+k+"y"+k,k++):d=S[c],E[c]="",i.cartesian.indexOf(d)!==-1){if(_=d.indexOf("y"),f=d.slice(0,_),h=d.slice(_),b[f]!==void 0&&b[f]!==c||b[h]!==void 0&&b[h]!==u)continue;E[c]=d,b[f]=c,b[h]=u}}}else{var L=zI(t,n,"x"),x=zI(t,n,"y");r.xaxes=sme(L,i.xaxis,s,b,"x"),r.yaxes=sme(x,i.yaxis,o,b,"y")}var C=r._anchors={},M=r.roworder==="top to bottom";for(var g in b){var P=g.charAt(0),T=r[P+"side"],z,O,V;if(T.length<8)C[g]="free";else if(P==="x"){if(T.charAt(0)==="t"===M?(z=0,O=1,V=o):(z=o-1,O=-1,V=-1),a){var G=b[g];for(u=z;u!==V;u+=O)if(d=v[u][G],!!d&&(_=d.indexOf("y"),d.slice(0,_)===g)){C[g]=d.slice(_);break}}else for(u=z;u!==V;u+=O)if(h=r.yaxes[u],i.cartesian.indexOf(g+h)!==-1){C[g]=h;break}}else if(T.charAt(0)==="l"?(z=0,O=1,V=s):(z=s-1,O=-1,V=-1),a){var Z=b[g];for(u=z;u!==V;u+=O)if(d=v[Z][u],!!d&&(_=d.indexOf("y"),d.slice(_)===g)){C[g]=d.slice(0,_);break}}else for(u=z;u!==V;u+=O)if(f=r.xaxes[u],i.cartesian.indexOf(f+g)!==-1){C[g]=f;break}}}}function sme(e,t,r,n,i){var a=new Array(r),o;function s(l,u){t.indexOf(u)!==-1&&n[u]===void 0?(a[l]=u,n[u]=l):a[l]=""}if(Array.isArray(e))for(o=0;o<r;o++)s(o,e[o]);else for(s(0,i),o=1;o<r;o++)s(o,i+(o+1));return a}lme.exports={moduleType:"component",name:"grid",schema:{layout:{grid:cV}},layoutAttributes:cV,sizeDefaults:hyt,contentDefaults:dyt}});var hV=ye((Tlr,ume)=>{"use strict";ume.exports={visible:{valType:"boolean",editType:"calc"},type:{valType:"enumerated",values:["percent","constant","sqrt","data"],editType:"calc"},symmetric:{valType:"boolean",editType:"calc"},array:{valType:"data_array",editType:"calc"},arrayminus:{valType:"data_array",editType:"calc"},value:{valType:"number",min:0,dflt:10,editType:"calc"},valueminus:{valType:"number",min:0,dflt:10,editType:"calc"},traceref:{valType:"integer",min:0,dflt:0,editType:"style"},tracerefminus:{valType:"integer",min:0,dflt:0,editType:"style"},copy_ystyle:{valType:"boolean",editType:"plot"},copy_zstyle:{valType:"boolean",editType:"style"},color:{valType:"color",editType:"style"},thickness:{valType:"number",min:0,dflt:2,editType:"style"},width:{valType:"number",min:0,editType:"plot"},editType:"calc"}});var hme=ye((Alr,fme)=>{"use strict";var cme=Eo(),vyt=qa(),pyt=Dr(),gyt=vl(),myt=hV();fme.exports=function(e,t,r,n){var i="error_"+n.axis,a=gyt.newContainer(t,i),o=e[i]||{};function s(v,_){return pyt.coerce(o,a,myt,v,_)}var l=o.array!==void 0||o.value!==void 0||o.type==="sqrt",u=s("visible",l);if(u!==!1){var c=s("type","array"in o?"data":"percent"),f=!0;c!=="sqrt"&&(f=s("symmetric",!((c==="data"?"arrayminus":"valueminus")in o))),c==="data"?(s("array"),s("traceref"),f||(s("arrayminus"),s("tracerefminus"))):(c==="percent"||c==="constant")&&(s("value"),f||s("valueminus"));var h="copy_"+n.inherit+"style";if(n.inherit){var d=t["error_"+n.inherit];(d||{}).visible&&s(h,!(o.color||cme(o.thickness)||cme(o.width)))}(!n.inherit||!a[h])&&(s("color",r),s("thickness"),s("width",vyt.traceIs(t,"gl3d")?0:4))}}});var dV=ye((Slr,vme)=>{"use strict";vme.exports=function(t){var r=t.type,n=t.symmetric;if(r==="data"){var i=t.array||[];if(n)return function(u,c){var f=+i[c];return[f,f]};var a=t.arrayminus||[];return function(u,c){var f=+i[c],h=+a[c];return!isNaN(f)||!isNaN(h)?[h||0,f||0]:[NaN,NaN]}}else{var o=dme(r,t.value),s=dme(r,t.valueminus);return n||t.valueminus===void 0?function(u){var c=o(u);return[c,c]}:function(u){return[s(u),o(u)]}}};function dme(e,t){if(e==="percent")return function(r){return Math.abs(r*t/100)};if(e==="constant")return function(){return Math.abs(t)};if(e==="sqrt")return function(r){return Math.sqrt(Math.abs(r))}}});var mme=ye((Mlr,gme)=>{"use strict";var vV=Eo(),yyt=qa(),pV=ho(),_yt=Dr(),xyt=dV();gme.exports=function(t){for(var r=t.calcdata,n=0;n<r.length;n++){var i=r[n],a=i[0].trace;if(a.visible===!0&&yyt.traceIs(a,"errorBarsOK")){var o=pV.getFromId(t,a.xaxis),s=pV.getFromId(t,a.yaxis);pme(i,a,o,"x"),pme(i,a,s,"y")}}};function pme(e,t,r,n){var i=t["error_"+n]||{},a=i.visible&&["linear","log"].indexOf(r.type)!==-1,o=[];if(a){for(var s=xyt(i),l=0;l<e.length;l++){var u=e[l],c=u.i;if(c===void 0)c=l;else if(c===null)continue;var f=u[n];if(vV(r.c2l(f))){var h=s(f,c);if(vV(h[0])&&vV(h[1])){var d=u[n+"s"]=f-h[0],v=u[n+"h"]=f+h[1];o.push(d,v)}}}var _=r._id,b=t._extremes[_],p=pV.findExtremes(r,o,_yt.extendFlat({tozero:b.opts.tozero},{padded:!0}));b.min=b.min.concat(p.min),b.max=b.max.concat(p.max)}}});var xme=ye((Elr,_me)=>{"use strict";var yme=Oa(),b_=Eo(),byt=So(),wyt=Ru();_me.exports=function(t,r,n,i){var a,o=n.xaxis,s=n.yaxis,l=i&&i.duration>0,u=t._context.staticPlot;r.each(function(c){var f=c[0].trace,h=f.error_x||{},d=f.error_y||{},v;f.ids&&(v=function(k){return k.id});var _=wyt.hasMarkers(f)&&f.marker.maxdisplayed>0;!d.visible&&!h.visible&&(c=[]);var b=yme.select(this).selectAll("g.errorbar").data(c,v);if(b.exit().remove(),!!c.length){h.visible||b.selectAll("path.xerror").remove(),d.visible||b.selectAll("path.yerror").remove(),b.style("opacity",1);var p=b.enter().append("g").classed("errorbar",!0);l&&p.style("opacity",0).transition().duration(i.duration).style("opacity",1),byt.setClipUrl(b,n.layerClipId,t),b.each(function(k){var E=yme.select(this),S=Tyt(k,o,s);if(!(_&&!k.vis)){var L,x=E.select("path.yerror");if(d.visible&&b_(S.x)&&b_(S.yh)&&b_(S.ys)){var C=d.width;L="M"+(S.x-C)+","+S.yh+"h"+2*C+"m-"+C+",0V"+S.ys,S.noYS||(L+="m-"+C+",0h"+2*C),a=!x.size(),a?x=E.append("path").style("vector-effect",u?"none":"non-scaling-stroke").classed("yerror",!0):l&&(x=x.transition().duration(i.duration).ease(i.easing)),x.attr("d",L)}else x.remove();var M=E.select("path.xerror");if(h.visible&&b_(S.y)&&b_(S.xh)&&b_(S.xs)){var g=(h.copy_ystyle?d:h).width;L="M"+S.xh+","+(S.y-g)+"v"+2*g+"m0,-"+g+"H"+S.xs,S.noXS||(L+="m0,-"+g+"v"+2*g),a=!M.size(),a?M=E.append("path").style("vector-effect",u?"none":"non-scaling-stroke").classed("xerror",!0):l&&(M=M.transition().duration(i.duration).ease(i.easing)),M.attr("d",L)}else M.remove()}})}})};function Tyt(e,t,r){var n={x:t.c2p(e.x),y:r.c2p(e.y)};return e.yh!==void 0&&(n.yh=r.c2p(e.yh),n.ys=r.c2p(e.ys),b_(n.ys)||(n.noYS=!0,n.ys=r.c2p(e.ys,!0))),e.xh!==void 0&&(n.xh=t.c2p(e.xh),n.xs=t.c2p(e.xs),b_(n.xs)||(n.noXS=!0,n.xs=t.c2p(e.xs,!0))),n}});var Tme=ye((klr,wme)=>{"use strict";var Ayt=Oa(),bme=ka();wme.exports=function(t){t.each(function(r){var n=r[0].trace,i=n.error_y||{},a=n.error_x||{},o=Ayt.select(this);o.selectAll("path.yerror").style("stroke-width",i.thickness+"px").call(bme.stroke,i.color),a.copy_ystyle&&(a=i),o.selectAll("path.xerror").style("stroke-width",a.thickness+"px").call(bme.stroke,a.color)})}});var Mme=ye((Clr,Sme)=>{"use strict";var h4=Dr(),Ame=mc().overrideAll,d4=hV(),Qb={error_x:h4.extendFlat({},d4),error_y:h4.extendFlat({},d4)};delete Qb.error_x.copy_zstyle;delete Qb.error_y.copy_zstyle;delete Qb.error_y.copy_ystyle;var v4={error_x:h4.extendFlat({},d4),error_y:h4.extendFlat({},d4),error_z:h4.extendFlat({},d4)};delete v4.error_x.copy_ystyle;delete v4.error_y.copy_ystyle;delete v4.error_z.copy_ystyle;delete v4.error_z.copy_zstyle;Sme.exports={moduleType:"component",name:"errorbars",schema:{traces:{scatter:Qb,bar:Qb,histogram:Qb,scatter3d:Ame(v4,"calc","nested"),scattergl:Ame(Qb,"calc","nested")}},supplyDefaults:hme(),calc:mme(),makeComputeError:dV(),plot:xme(),style:Tme(),hoverInfo:Syt};function Syt(e,t,r){(t.error_y||{}).visible&&(r.yerr=e.yh-e.y,t.error_y.symmetric||(r.yerrneg=e.y-e.ys)),(t.error_x||{}).visible&&(r.xerr=e.xh-e.x,t.error_x.symmetric||(r.xerrneg=e.x-e.xs))}});var kme=ye((Llr,Eme)=>{"use strict";Eme.exports={cn:{colorbar:"colorbar",cbbg:"cbbg",cbfill:"cbfill",cbfills:"cbfills",cbline:"cbline",cblines:"cblines",cbaxis:"cbaxis",cbtitleunshift:"cbtitleunshift",cbtitle:"cbtitle",cboutline:"cboutline",crisp:"crisp",jsPlaceholder:"js-placeholder"}}});var Fme=ye((Plr,Dme)=>{"use strict";var w_=Oa(),gV=cd(),qI=Mc(),Cme=qa(),Dy=ho(),OI=yv(),B0=Dr(),Vg=B0.strTranslate,Rme=Ao().extendFlat,mV=Sg(),Ug=So(),yV=ka(),Myt=Eb(),Eyt=ru(),kyt=pv().flipScale,Cyt=i4(),Lyt=uI(),Pyt=Rd(),_V=$h(),Lme=_V.LINE_SPACING,Pme=_V.FROM_TL,Ime=_V.FROM_BR,gf=kme().cn;function Iyt(e){var t=e._fullLayout,r=t._infolayer.selectAll("g."+gf.colorbar).data(Ryt(e),function(n){return n._id});r.enter().append("g").attr("class",function(n){return n._id}).classed(gf.colorbar,!0),r.each(function(n){var i=w_.select(this);B0.ensureSingle(i,"rect",gf.cbbg),B0.ensureSingle(i,"g",gf.cbfills),B0.ensureSingle(i,"g",gf.cblines),B0.ensureSingle(i,"g",gf.cbaxis,function(o){o.classed(gf.crisp,!0)}),B0.ensureSingle(i,"g",gf.cbtitleunshift,function(o){o.append("g").classed(gf.cbtitle,!0)}),B0.ensureSingle(i,"rect",gf.cboutline);var a=Dyt(i,n,e);a&&a.then&&(e._promises||[]).push(a),e._context.edits.colorbarPosition&&Fyt(i,n,e)}),r.exit().each(function(n){qI.autoMargin(e,n._id)}).remove(),r.order()}function Ryt(e){var t=e._fullLayout,r=e.calcdata,n=[],i,a,o,s;function l(E){return Rme(E,{_fillcolor:null,_line:{color:null,width:null,dash:null},_levels:{start:null,end:null,size:null},_filllevels:null,_fillgradient:null,_zrange:null})}function u(){typeof s.calc=="function"?s.calc(e,o,i):(i._fillgradient=a.reversescale?kyt(a.colorscale):a.colorscale,i._zrange=[a[s.min],a[s.max]])}for(var c=0;c<r.length;c++){var f=r[c];if(o=f[0].trace,!!o._module){var h=o._module.colorbar;if(o.visible===!0&&h)for(var d=Array.isArray(h),v=d?h:[h],_=0;_<v.length;_++){s=v[_];var b=s.container;a=b?o[b]:o,a&&a.showscale&&(i=l(a.colorbar),i._id="cb"+o.uid+(d&&b?"-"+b:""),i._traceIndex=o.index,i._propPrefix=(b?b+".":"")+"colorbar.",i._meta=o._meta,u(),n.push(i))}}}for(var p in t._colorAxes)if(a=t[p],a.showscale){var k=t._colorAxes[p];i=l(a.colorbar),i._id="cb"+p,i._propPrefix=p+".colorbar.",i._meta=t._meta,s={min:"cmin",max:"cmax"},k[0]!=="heatmap"&&(o=k[1],s.calc=o._module.colorbar.calc),u(),n.push(i)}return n}function Dyt(e,t,r){var n=t.orientation==="v",i=t.len,a=t.lenmode,o=t.thickness,s=t.thicknessmode,l=t.outlinewidth,u=t.borderwidth,c=t.bgcolor,f=t.xanchor,h=t.yanchor,d=t.xpad,v=t.ypad,_=t.x,b=n?t.y:1-t.y,p=t.yref==="paper",k=t.xref==="paper",E=r._fullLayout,S=E._size,L=t._fillcolor,x=t._line,C=t.title,M=C.side,g=t._zrange||w_.extent((typeof L=="function"?L:x.color).domain()),P=typeof x.color=="function"?x.color:function(){return x.color},T=typeof L=="function"?L:function(){return L},z=t._levels,O=zyt(r,t,g),V=O.fill,G=O.line,Z=Math.round(o*(s==="fraction"?n?S.w:S.h:1)),j=Z/(n?S.w:S.h),N=Math.round(i*(a==="fraction"?n?S.h:S.w:1)),H=N/(n?S.h:S.w),te=k?S.w:r._fullLayout.width,oe=p?S.h:r._fullLayout.height,_e=Math.round(n?_*te+d:b*oe+v),Ee={center:.5,right:1}[f]||0,Ce={top:1,middle:.5}[h]||0,me=n?_-Ee*j:b-Ce*j,ie=n?b-Ce*H:_-Ee*H,Se=Math.round(n?oe*(1-ie):te*ie);t._lenFrac=H,t._thickFrac=j,t._uFrac=me,t._vFrac=ie;var Le=t._axis=Oyt(r,t,g);Le.position=j+(n?_+d/S.w:b+v/S.h);var Ae=["top","bottom"].indexOf(M)!==-1;if(n&&Ae&&(Le.title.side=M,Le.titlex=_+d/S.w,Le.titley=ie+(C.side==="top"?H-v/S.h:v/S.h)),!n&&!Ae&&(Le.title.side=M,Le.titley=b+v/S.h,Le.titlex=ie+d/S.w),x.color&&t.tickmode==="auto"){Le.tickmode="linear",Le.tick0=z.start;var Fe=z.size,Pe=B0.constrain(N/50,4,15)+1,ge=(g[1]-g[0])/((t.nticks||Pe)*Fe);if(ge>1){var Re=Math.pow(10,Math.floor(Math.log(ge)/Math.LN10));Fe*=Re*B0.roundUp(ge/Re,[2,5,10]),(Math.abs(z.start)/z.size+1e-6)%1<2e-6&&(Le.tick0=0)}Le.dtick=Fe}Le.domain=n?[ie+v/S.h,ie+H-v/S.h]:[ie+d/S.w,ie+H-d/S.w],Le.setScale(),e.attr("transform",Vg(Math.round(S.l),Math.round(S.t)));var ce=e.select("."+gf.cbtitleunshift).attr("transform",Vg(-Math.round(S.l),-Math.round(S.t))),Ze=Le.ticklabelposition,ut=Le.title.font.size,pt=e.select("."+gf.cbaxis),Zt,st=0,lt=0;function Gt(cr,$e){var St={propContainer:Le,propName:t._propPrefix+"title.text",traceIndex:t._traceIndex,_meta:t._meta,placeholder:E._dfltTitle.colorbar,containerGroup:e.select("."+gf.cbtitle)},Qt=cr.charAt(0)==="h"?cr.slice(1):"h"+cr;e.selectAll("."+Qt+",."+Qt+"-math-group").remove(),Myt.draw(r,cr,Rme(St,$e||{}))}function Nt(){if(n&&Ae||!n&&!Ae){var cr,$e;M==="top"&&(cr=d+S.l+te*_,$e=v+S.t+oe*(1-ie-H)+3+ut*.75),M==="bottom"&&(cr=d+S.l+te*_,$e=v+S.t+oe*(1-ie)-3-ut*.25),M==="right"&&($e=v+S.t+oe*b+3+ut*.75,cr=d+S.l+te*ie),Gt(Le._id+"title",{attributes:{x:cr,y:$e,"text-anchor":n?"start":"middle"}})}}function Jt(){if(n&&!Ae||!n&&Ae){var cr=Le.position||0,$e=Le._offset+Le._length/2,St,Qt;if(M==="right")Qt=$e,St=S.l+te*cr+10+ut*(Le.showticklabels?1:.5);else if(St=$e,M==="bottom"&&(Qt=S.t+oe*cr+10+(Ze.indexOf("inside")===-1?Le.tickfont.size:0)+(Le.ticks!=="inside"&&t.ticklen||0)),M==="top"){var Vt=C.text.split("<br>").length;Qt=S.t+oe*cr+10-Z-Lme*ut*Vt}Gt((n?"h":"v")+Le._id+"title",{avoid:{selection:w_.select(r).selectAll("g."+Le._id+"tick"),side:M,offsetTop:n?0:S.t,offsetLeft:n?S.l:0,maxShift:n?E.width:E.height},attributes:{x:St,y:Qt,"text-anchor":"middle"},transform:{rotate:n?-90:0,offset:0}})}}function sr(){if(!n&&!Ae||n&&Ae){var cr=e.select("."+gf.cbtitle),$e=cr.select("text"),St=[-l/2,l/2],Qt=cr.select(".h"+Le._id+"title-math-group").node(),Vt=15.6;$e.node()&&(Vt=parseInt($e.node().style.fontSize,10)*Lme);var _t;if(Qt?(_t=Ug.bBox(Qt),lt=_t.width,st=_t.height,st>Vt&&(St[1]-=(st-Vt)/2)):$e.node()&&!$e.classed(gf.jsPlaceholder)&&(_t=Ug.bBox($e.node()),lt=_t.width,st=_t.height),n){if(st){if(st+=5,M==="top")Le.domain[1]-=st/S.h,St[1]*=-1;else{Le.domain[0]+=st/S.h;var It=Eyt.lineCount($e);St[1]+=(1-It)*Vt}cr.attr("transform",Vg(St[0],St[1])),Le.setScale()}}else lt&&(M==="right"&&(Le.domain[0]+=(lt+ut/2)/S.w),cr.attr("transform",Vg(St[0],St[1])),Le.setScale())}e.selectAll("."+gf.cbfills+",."+gf.cblines).attr("transform",n?Vg(0,Math.round(S.h*(1-Le.domain[1]))):Vg(Math.round(S.w*Le.domain[0]),0)),pt.attr("transform",n?Vg(0,Math.round(-S.t)):Vg(Math.round(-S.l),0));var mt=e.select("."+gf.cbfills).selectAll("rect."+gf.cbfill).attr("style","").data(V);mt.enter().append("rect").classed(gf.cbfill,!0).attr("style",""),mt.exit().remove();var er=g.map(Le.c2p).map(Math.round).sort(function(Br,Vr){return Br-Vr});mt.each(function(Br,Vr){var dt=[Vr===0?g[0]:(V[Vr]+V[Vr-1])/2,Vr===V.length-1?g[1]:(V[Vr]+V[Vr+1])/2].map(Le.c2p).map(Math.round);n&&(dt[1]=B0.constrain(dt[1]+(dt[1]>dt[0])?1:-1,er[0],er[1]));var Ge=w_.select(this).attr(n?"x":"y",_e).attr(n?"y":"x",w_.min(dt)).attr(n?"width":"height",Math.max(Z,2)).attr(n?"height":"width",Math.max(w_.max(dt)-w_.min(dt),2));if(t._fillgradient)Ug.gradient(Ge,r,t._id,n?"vertical":"horizontalreversed",t._fillgradient,"fill");else{var Je=T(Br).replace("e-","");Ge.attr("fill",gV(Je).toHexString())}});var lr=e.select("."+gf.cblines).selectAll("path."+gf.cbline).data(x.color&&x.width?G:[]);lr.enter().append("path").classed(gf.cbline,!0),lr.exit().remove(),lr.each(function(Br){var Vr=_e,dt=Math.round(Le.c2p(Br))+x.width/2%1;w_.select(this).attr("d","M"+(n?Vr+","+dt:dt+","+Vr)+(n?"h":"v")+Z).call(Ug.lineGroupStyle,x.width,P(Br),x.dash)}),pt.selectAll("g."+Le._id+"tick,path").remove();var Tr=_e+Z+(l||0)/2-(t.ticks==="outside"?1:0),Lr=Dy.calcTicks(Le),ti=Dy.getTickSigns(Le)[2];return Dy.drawTicks(r,Le,{vals:Le.ticks==="inside"?Dy.clipEnds(Le,Lr):Lr,layer:pt,path:Dy.makeTickPath(Le,Tr,ti),transFn:Dy.makeTransTickFn(Le)}),Dy.drawLabels(r,Le,{vals:Lr,layer:pt,transFn:Dy.makeTransTickLabelFn(Le),labelFns:Dy.makeLabelFns(Le,Tr)})}function wr(){var cr,$e=Z+l/2;Ze.indexOf("inside")===-1&&(cr=Ug.bBox(pt.node()),$e+=n?cr.width:cr.height),Zt=ce.select("text");var St=0,Qt=n&&M==="top",Vt=!n&&M==="right",_t=0;if(Zt.node()&&!Zt.classed(gf.jsPlaceholder)){var It,mt=ce.select(".h"+Le._id+"title-math-group").node();mt&&(n&&Ae||!n&&!Ae)?(cr=Ug.bBox(mt),St=cr.width,It=cr.height):(cr=Ug.bBox(ce.node()),St=cr.right-S.l-(n?_e:Se),It=cr.bottom-S.t-(n?Se:_e),!n&&M==="top"&&($e+=cr.height,_t=cr.height)),Vt&&(Zt.attr("transform",Vg(St/2+ut/2,0)),St*=2),$e=Math.max($e,n?St:It)}var er=(n?d:v)*2+$e+u+l/2,lr=0;!n&&C.text&&h==="bottom"&&b<=0&&(lr=er/2,er+=lr,_t+=lr),E._hColorbarMoveTitle=lr,E._hColorbarMoveCBTitle=_t;var Tr=u+l,Lr=(n?_e:Se)-Tr/2-(n?d:0),ti=(n?Se:_e)-(n?N:v+_t-lr);e.select("."+gf.cbbg).attr("x",Lr).attr("y",ti).attr(n?"width":"height",Math.max(er-lr,2)).attr(n?"height":"width",Math.max(N+Tr,2)).call(yV.fill,c).call(yV.stroke,t.bordercolor).style("stroke-width",u);var Br=Vt?Math.max(St-10,0):0;e.selectAll("."+gf.cboutline).attr("x",(n?_e:Se+d)+Br).attr("y",(n?Se+v-N:_e)+(Qt?st:0)).attr(n?"width":"height",Math.max(Z,2)).attr(n?"height":"width",Math.max(N-(n?2*v+st:2*d+Br),2)).call(yV.stroke,t.outlinecolor).style({fill:"none","stroke-width":l});var Vr=n?Ee*er:0,dt=n?0:(1-Ce)*er-_t;if(Vr=k?S.l-Vr:-Vr,dt=p?S.t-dt:-dt,e.attr("transform",Vg(Vr,dt)),!n&&(u||gV(c).getAlpha()&&!gV.equals(E.paper_bgcolor,c))){var Ge=pt.selectAll("text"),Je=Ge[0].length,je=e.select("."+gf.cbbg).node(),tt=Ug.bBox(je),xt=Ug.getTranslate(e),Ie=2;Ge.each(function(di,ji){var In=0,wi=Je-1;if(ji===In||ji===wi){var On=Ug.bBox(this),qn=Ug.getTranslate(this),Fn;if(ji===wi){var ra=On.right+qn.x,la=tt.right+xt.x+Se-u-Ie+_;Fn=la-ra,Fn>0&&(Fn=0)}else if(ji===In){var Ut=On.left+qn.x,wt=tt.left+xt.x+Se+u+Ie;Fn=wt-Ut,Fn<0&&(Fn=0)}Fn&&(Je<3?this.setAttribute("transform","translate("+Fn+",0) "+this.getAttribute("transform")):this.setAttribute("visibility","hidden"))}})}var xe={},ke=Pme[f],vt=Ime[f],ir=Pme[h],ar=Ime[h],vr=er-Z;n?(a==="pixels"?(xe.y=b,xe.t=N*ir,xe.b=N*ar):(xe.t=xe.b=0,xe.yt=b+i*ir,xe.yb=b-i*ar),s==="pixels"?(xe.x=_,xe.l=er*ke,xe.r=er*vt):(xe.l=vr*ke,xe.r=vr*vt,xe.xl=_-o*ke,xe.xr=_+o*vt)):(a==="pixels"?(xe.x=_,xe.l=N*ke,xe.r=N*vt):(xe.l=xe.r=0,xe.xl=_+i*ke,xe.xr=_-i*vt),s==="pixels"?(xe.y=1-b,xe.t=er*ir,xe.b=er*ar):(xe.t=vr*ir,xe.b=vr*ar,xe.yt=b-o*ir,xe.yb=b+o*ar));var ii=t.y<.5?"b":"t",pi=t.x<.5?"l":"r";r._fullLayout._reservedMargin[t._id]={};var $r={r:E.width-Lr-Vr,l:Lr+xe.r,b:E.height-ti-dt,t:ti+xe.b};k&&p?qI.autoMargin(r,t._id,xe):k?r._fullLayout._reservedMargin[t._id][ii]=$r[ii]:p||n?r._fullLayout._reservedMargin[t._id][pi]=$r[pi]:r._fullLayout._reservedMargin[t._id][ii]=$r[ii]}return B0.syncOrAsync([qI.previousPromises,Nt,sr,Jt,qI.previousPromises,wr],r)}function Fyt(e,t,r){var n=t.orientation==="v",i=r._fullLayout,a=i._size,o,s,l;OI.init({element:e.node(),gd:r,prepFn:function(){o=e.attr("transform"),mV(e)},moveFn:function(u,c){e.attr("transform",o+Vg(u,c)),s=OI.align((n?t._uFrac:t._vFrac)+u/a.w,n?t._thickFrac:t._lenFrac,0,1,t.xanchor),l=OI.align((n?t._vFrac:1-t._uFrac)-c/a.h,n?t._lenFrac:t._thickFrac,0,1,t.yanchor);var f=OI.getCursor(s,l,t.xanchor,t.yanchor);mV(e,f)},doneFn:function(){if(mV(e),s!==void 0&&l!==void 0){var u={};u[t._propPrefix+"x"]=s,u[t._propPrefix+"y"]=l,t._traceIndex!==void 0?Cme.call("_guiRestyle",r,u,t._traceIndex):Cme.call("_guiRelayout",r,u)}}})}function zyt(e,t,r){var n=t._levels,i=[],a=[],o,s,l=n.end+n.size/100,u=n.size,c=1.001*r[0]-.001*r[1],f=1.001*r[1]-.001*r[0];for(s=0;s<1e5&&(o=n.start+s*u,!(u>0?o>=l:o<=l));s++)o>c&&o<f&&i.push(o);if(t._fillgradient)a=[0];else if(typeof t._fillcolor=="function"){var h=t._filllevels;if(h)for(l=h.end+h.size/100,u=h.size,s=0;s<1e5&&(o=h.start+s*u,!(u>0?o>=l:o<=l));s++)o>r[0]&&o<r[1]&&a.push(o);else a=i.map(function(d){return d-n.size/2}),a.push(a[a.length-1]+n.size)}else t._fillcolor&&typeof t._fillcolor=="string"&&(a=[0]);return n.size<0&&(i.reverse(),a.reverse()),{line:i,fill:a}}function Oyt(e,t,r){var n=e._fullLayout,i=t.orientation==="v",a={type:"linear",range:r,tickmode:t.tickmode,nticks:t.nticks,tick0:t.tick0,dtick:t.dtick,tickvals:t.tickvals,ticktext:t.ticktext,ticks:t.ticks,ticklen:t.ticklen,tickwidth:t.tickwidth,tickcolor:t.tickcolor,showticklabels:t.showticklabels,labelalias:t.labelalias,ticklabelposition:t.ticklabelposition,ticklabeloverflow:t.ticklabeloverflow,ticklabelstep:t.ticklabelstep,tickfont:t.tickfont,tickangle:t.tickangle,tickformat:t.tickformat,exponentformat:t.exponentformat,minexponent:t.minexponent,separatethousands:t.separatethousands,showexponent:t.showexponent,showtickprefix:t.showtickprefix,tickprefix:t.tickprefix,showticksuffix:t.showticksuffix,ticksuffix:t.ticksuffix,title:t.title,showline:!0,anchor:"free",side:i?"right":"bottom",position:1},o=i?"y":"x",s={type:"linear",_id:o+t._id},l={letter:o,font:n.font,noAutotickangles:o==="y",noHover:!0,noTickson:!0,noTicklabelmode:!0,noInsideRange:!0,calendar:n.calendar};function u(c,f){return B0.coerce(a,s,Pyt,c,f)}return Cyt(a,s,u,l,n),Lyt(a,s,u,l),s}Dme.exports={draw:Iyt}});var Ome=ye((Ilr,zme)=>{"use strict";zme.exports={moduleType:"component",name:"colorbar",attributes:tL(),supplyDefaults:Pq(),draw:Fme().draw,hasColorbar:Aq()}});var Bme=ye((Rlr,qme)=>{"use strict";qme.exports={moduleType:"component",name:"legend",layoutAttributes:SB(),supplyLayoutDefaults:EB(),draw:BB(),style:FB()}});var Ume=ye((Dlr,Nme)=>{"use strict";Nme.exports={moduleType:"locale",name:"en",dictionary:{"Click to enter Colorscale title":"Click to enter Colourscale title"},format:{days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],periods:["AM","PM"],dateTime:"%a %b %e %X %Y",date:"%d/%m/%Y",time:"%H:%M:%S",decimal:".",thousands:",",grouping:[3],currency:["$",""],year:"%Y",month:"%b %Y",dayMonth:"%b %-d",dayMonthYear:"%b %-d, %Y"}}});var Gme=ye((Flr,Vme)=>{"use strict";Vme.exports={moduleType:"locale",name:"en-US",dictionary:{"Click to enter Colorscale title":"Click to enter Colorscale title"},format:{date:"%m/%d/%Y"}}});var bV=ye((zlr,Xme)=>{"use strict";var qyt=qa(),Wme=Dr(),xV=Wme.extendFlat,Hme=Wme.extendDeep;function jme(e){var t;switch(e){case"themes__thumb":t={autosize:!0,width:150,height:150,title:{text:""},showlegend:!1,margin:{l:5,r:5,t:5,b:5,pad:0},annotations:[]};break;case"thumbnail":t={title:{text:""},hidesources:!0,showlegend:!1,borderwidth:0,bordercolor:"",margin:{l:1,r:1,t:1,b:1,pad:0},annotations:[]};break;default:t={}}return t}function Byt(e){var t=["xaxis","yaxis","zaxis"];return t.indexOf(e.slice(0,5))>-1}Xme.exports=function(t,r){var n,i=t.data,a=t.layout,o=Hme([],i),s=Hme({},a,jme(r.tileClass)),l=t._context||{};if(r.width&&(s.width=r.width),r.height&&(s.height=r.height),r.tileClass==="thumbnail"||r.tileClass==="themes__thumb"){s.annotations=[];var u=Object.keys(s);for(n=0;n<u.length;n++)Byt(u[n])&&(s[u[n]].title={text:""});for(n=0;n<o.length;n++){var c=o[n];c.showscale=!1,c.marker&&(c.marker.showscale=!1),qyt.traceIs(c,"pie-like")&&(c.textposition="none")}}if(Array.isArray(r.annotations))for(n=0;n<r.annotations.length;n++)s.annotations.push(r.annotations[n]);var f=Object.keys(s).filter(function(b){return b.match(/^scene\d*$/)});if(f.length){var h={};for(r.tileClass==="thumbnail"&&(h={title:{text:""},showaxeslabels:!1,showticklabels:!1,linetickenable:!1}),n=0;n<f.length;n++){var d=s[f[n]];d.xaxis||(d.xaxis={}),d.yaxis||(d.yaxis={}),d.zaxis||(d.zaxis={}),xV(d.xaxis,h),xV(d.yaxis,h),xV(d.zaxis,h),d._scene=null}}var v=document.createElement("div");r.tileClass&&(v.className=r.tileClass);var _={gd:v,td:v,layout:s,data:o,config:{staticPlot:r.staticPlot===void 0?!0:r.staticPlot,plotGlPixelRatio:r.plotGlPixelRatio===void 0?2:r.plotGlPixelRatio,displaylogo:r.displaylogo||!1,showLink:r.showLink||!1,showTips:r.showTips||!1,mapboxAccessToken:l.mapboxAccessToken}};return r.setBackground!=="transparent"&&(_.config.setBackground=r.setBackground||"opaque"),_.gd.defaultLayout=jme(r.tileClass),_}});var Kme=ye((Olr,Yme)=>{"use strict";var Nyt=pb().EventEmitter,Uyt=qa(),Vyt=Dr(),Zme=Py(),Gyt=bV(),Hyt=jP(),jyt=WP();function Wyt(e,t){var r=new Nyt,n=Gyt(e,{format:"png"}),i=n.gd;i.style.position="absolute",i.style.left="-5000px",document.body.appendChild(i);function a(){var s=Zme.getDelay(i._fullLayout);setTimeout(function(){var l=Hyt(i),u=document.createElement("canvas");u.id=Vyt.randstr(),r=jyt({format:t.format,width:i._fullLayout.width,height:i._fullLayout.height,canvas:u,emitter:r,svg:l}),r.clean=function(){i&&document.body.removeChild(i)}},s)}var o=Zme.getRedrawFunc(i);return Uyt.call("_doPlot",i,n.data,n.layout,n.config).then(o).then(a).catch(function(s){r.emit("error",s)}),r}Yme.exports=Wyt});var Qme=ye((qlr,$me)=>{"use strict";var Jme=Py(),Xyt={getDelay:Jme.getDelay,getRedrawFunc:Jme.getRedrawFunc,clone:bV(),toSVG:jP(),svgToImg:WP(),toImage:Kme(),downloadImage:rU()};$me.exports=Xyt});var tye=ye(Fy=>{"use strict";Fy.version=l6().version;vee();rne();var Zyt=qa(),p4=Fy.register=Zyt.register,TV=Tde(),eye=Object.keys(TV);for(BI=0;BI<eye.length;BI++)TT=eye[BI],TT.charAt(0)!=="_"&&(Fy[TT]=TV[TT]),p4({moduleType:"apiMethod",name:TT,fn:TV[TT]});var TT,BI;p4(mpe());p4([Vpe(),t0e(),Of(),_0e(),I0e(),ege(),xge(),Bge(),ime(),fV(),Mme(),tc(),Ome(),Bme(),vf(),cN()]);p4([Ume(),Gme()]);window.PlotlyLocales&&Array.isArray(window.PlotlyLocales)&&(p4(window.PlotlyLocales),delete window.PlotlyLocales);Fy.Icons=XL();var NI=vf(),wV=Mc();Fy.Plots={resize:wV.resize,graphJson:wV.graphJson,sendDataToCloud:wV.sendDataToCloud};Fy.Fx={hover:NI.hover,unhover:NI.unhover,loneHover:NI.loneHover,loneUnhover:NI.loneUnhover};Fy.Snapshot=Qme();Fy.PlotSchema=w3()});var iye=ye((Nlr,rye)=>{"use strict";rye.exports=tye()});var e2=ye((Ulr,nye)=>{"use strict";nye.exports={TEXTPAD:3,eventDataKeys:["value","label"]}});var Lm=ye((Vlr,uye)=>{"use strict";var qf=pf(),aye=df().axisHoverFormat,{hovertemplateAttrs:Yyt,texttemplateAttrs:Kyt,templatefallbackAttrs:oye}=Ll(),lye=Tu(),Jyt=ec(),sye=e2(),$yt=Pd().pattern,t2=Ao().extendFlat,AV=Jyt({editType:"calc",arrayOk:!0,colorEditType:"style"}),Qyt=qf.marker,e1t=Qyt.line,t1t=t2({},e1t.width,{dflt:0}),r1t=t2({width:t1t,editType:"calc"},lye("marker.line")),i1t=t2({line:r1t,editType:"calc"},lye("marker"),{opacity:{valType:"number",arrayOk:!0,dflt:1,min:0,max:1,editType:"style"},pattern:$yt,cornerradius:{valType:"any",editType:"calc"}});uye.exports={x:qf.x,x0:qf.x0,dx:qf.dx,y:qf.y,y0:qf.y0,dy:qf.dy,xperiod:qf.xperiod,yperiod:qf.yperiod,xperiod0:qf.xperiod0,yperiod0:qf.yperiod0,xperiodalignment:qf.xperiodalignment,yperiodalignment:qf.yperiodalignment,xhoverformat:aye("x"),yhoverformat:aye("y"),text:qf.text,texttemplate:Kyt({editType:"plot"},{keys:sye.eventDataKeys}),texttemplatefallback:oye({editType:"plot"}),hovertext:qf.hovertext,hovertemplate:Yyt({},{keys:sye.eventDataKeys}),hovertemplatefallback:oye(),textposition:{valType:"enumerated",values:["inside","outside","auto","none"],dflt:"auto",arrayOk:!0,editType:"calc"},insidetextanchor:{valType:"enumerated",values:["end","middle","start"],dflt:"end",editType:"plot"},textangle:{valType:"angle",dflt:"auto",editType:"plot"},textfont:t2({},AV,{}),insidetextfont:t2({},AV,{}),outsidetextfont:t2({},AV,{}),constraintext:{valType:"enumerated",values:["inside","outside","both","none"],dflt:"both",editType:"calc"},cliponaxis:t2({},qf.cliponaxis,{}),orientation:{valType:"enumerated",values:["v","h"],editType:"calc+clearAxisTypes"},base:{valType:"any",dflt:null,arrayOk:!0,editType:"calc"},offset:{valType:"number",dflt:null,arrayOk:!0,editType:"calc"},width:{valType:"number",dflt:null,min:0,arrayOk:!0,editType:"calc"},marker:i1t,offsetgroup:qf.offsetgroup,alignmentgroup:qf.alignmentgroup,selected:{marker:{opacity:qf.selected.marker.opacity,color:qf.selected.marker.color,editType:"style"},textfont:qf.selected.textfont,editType:"style"},unselected:{marker:{opacity:qf.unselected.marker.opacity,color:qf.unselected.marker.color,editType:"style"},textfont:qf.unselected.textfont,editType:"style"},zorder:qf.zorder}});var UI=ye((Glr,cye)=>{"use strict";cye.exports={barmode:{valType:"enumerated",values:["stack","group","overlay","relative"],dflt:"group",editType:"calc"},barnorm:{valType:"enumerated",values:["","fraction","percent"],dflt:"",editType:"calc"},bargap:{valType:"number",min:0,max:1,editType:"calc"},bargroupgap:{valType:"number",min:0,max:1,dflt:0,editType:"calc"},barcornerradius:{valType:"any",editType:"calc"}}});var VI=ye((Hlr,dye)=>{"use strict";var n1t=ka(),fye=pv().hasColorscale,hye=Qh(),a1t=Dr().coercePattern;dye.exports=function(t,r,n,i,a){var o=n("marker.color",i),s=fye(t,"marker");s&&hye(t,r,a,n,{prefix:"marker.",cLetter:"c"}),n("marker.line.color",n1t.defaultLine),fye(t,"marker.line")&&hye(t,r,a,n,{prefix:"marker.line.",cLetter:"c"}),n("marker.line.width"),n("marker.opacity"),a1t(n,"marker.pattern",o,s),n("selected.marker.color"),n("unselected.marker.color")}});var r0=ye((jlr,_ye)=>{"use strict";var vye=Eo(),AT=Dr(),pye=ka(),o1t=qa(),s1t=eT(),l1t=Ig(),u1t=VI(),c1t=Hb(),gye=Lm(),GI=AT.coerceFont;function f1t(e,t,r,n){function i(u,c){return AT.coerce(e,t,gye,u,c)}var a=s1t(e,t,n,i);if(!a){t.visible=!1;return}l1t(e,t,n,i),i("xhoverformat"),i("yhoverformat"),i("zorder"),i("orientation",t.x&&!t.y?"h":"v"),i("base"),i("offset"),i("width"),i("text"),i("hovertext"),i("hovertemplate"),i("hovertemplatefallback");var o=i("textposition");yye(e,t,n,i,o,{moduleHasSelected:!0,moduleHasUnselected:!0,moduleHasConstrain:!0,moduleHasCliponaxis:!0,moduleHasTextangle:!0,moduleHasInsideanchor:!0}),u1t(e,t,i,r,n);var s=(t.marker.line||{}).color,l=o1t.getComponentMethod("errorbars","supplyDefaults");l(e,t,s||pye.defaultLine,{axis:"y"}),l(e,t,s||pye.defaultLine,{axis:"x",inherit:"y"}),AT.coerceSelectionMarkerOpacity(t,i)}function h1t(e,t){var r,n;function i(s,l){return AT.coerce(n._input,n,gye,s,l)}for(var a=0;a<e.length;a++)if(n=e[a],n.type==="bar"){r=n._input;var o=i("marker.cornerradius",t.barcornerradius);n.marker&&(n.marker.cornerradius=mye(o)),c1t(r,n,t,i,t.barmode)}}function mye(e){if(vye(e)){if(e=+e,e>=0)return e}else if(typeof e=="string"&&(e=e.trim(),e.slice(-1)==="%"&&vye(e.slice(0,-1))&&(e=+e.slice(0,-1),e>=0)))return e+"%"}function yye(e,t,r,n,i,a){a=a||{};var o=a.moduleHasSelected!==!1,s=a.moduleHasUnselected!==!1,l=a.moduleHasConstrain!==!1,u=a.moduleHasCliponaxis!==!1,c=a.moduleHasTextangle!==!1,f=a.moduleHasInsideanchor!==!1,h=!!a.hasPathbar,d=Array.isArray(i)||i==="auto",v=d||i==="inside",_=d||i==="outside";if(v||_){var b=GI(n,"textfont",r.font),p=AT.extendFlat({},b),k=e.textfont&&e.textfont.color,E=!k;if(E&&delete p.color,GI(n,"insidetextfont",p),h){var S=AT.extendFlat({},b);E&&delete S.color,GI(n,"pathbar.textfont",S)}_&&GI(n,"outsidetextfont",b),o&&n("selected.textfont.color"),s&&n("unselected.textfont.color"),l&&n("constraintext"),u&&n("cliponaxis"),c&&n("textangle"),n("texttemplate"),n("texttemplatefallback")}v&&f&&n("insidetextanchor")}_ye.exports={supplyDefaults:f1t,crossTraceDefaults:h1t,handleText:yye,validateCornerradius:mye}});var SV=ye((Wlr,xye)=>{"use strict";var d1t=qa(),v1t=ho(),p1t=Dr(),g1t=UI(),m1t=r0().validateCornerradius;xye.exports=function(e,t,r){function n(_,b){return p1t.coerce(e,t,g1t,_,b)}for(var i=!1,a=!1,o=!1,s={},l=n("barmode"),u=l==="group",c=0;c<r.length;c++){var f=r[c];if(d1t.traceIs(f,"bar")&&f.visible)i=!0;else continue;var h=f.xaxis+f.yaxis;if(u?(s[h]&&(o=!0),s[h]=!0):(h+=f._input.offsetgroup,s.length>0&&!s[h]&&(o=!0),s[h]=!0),f.visible&&f.type==="histogram"){var d=v1t.getFromId({_fullLayout:t},f[f.orientation==="v"?"xaxis":"yaxis"]);d.type!=="category"&&(a=!0)}}if(!i){delete t.barmode;return}l!=="overlay"&&n("barnorm"),n("bargap",a&&!o?0:.2),n("bargroupgap");var v=n("barcornerradius");t.barcornerradius=m1t(v)}});var g4=ye((Xlr,bye)=>{"use strict";var ST=Dr();bye.exports=function(t,r){for(var n=0;n<t.length;n++)t[n].i=n;ST.mergeArray(r.text,t,"tx"),ST.mergeArray(r.hovertext,t,"htx");var i=r.marker;if(i){ST.mergeArray(i.opacity,t,"mo",!0),ST.mergeArray(i.color,t,"mc");var a=i.line;a&&(ST.mergeArray(a.color,t,"mlc"),ST.mergeArrayCastPositive(a.width,t,"mlw"))}}});var Eye=ye((Zlr,Mye)=>{"use strict";var wye=ho(),Tye=Dg(),Aye=pv().hasColorscale,Sye=gv(),y1t=g4(),_1t=z0();Mye.exports=function(t,r){var n=wye.getFromId(t,r.xaxis||"x"),i=wye.getFromId(t,r.yaxis||"y"),a,o,s,l,u,c,f={msUTC:!!(r.base||r.base===0)};r.orientation==="h"?(a=n.makeCalcdata(r,"x",f),s=i.makeCalcdata(r,"y"),l=Tye(r,i,"y",s),u=!!r.yperiodalignment,c="y"):(a=i.makeCalcdata(r,"y",f),s=n.makeCalcdata(r,"x"),l=Tye(r,n,"x",s),u=!!r.xperiodalignment,c="x"),o=l.vals;for(var h=Math.min(o.length,a.length),d=new Array(h),v=0;v<h;v++)d[v]={p:o[v],s:a[v]},u&&(d[v].orig_p=s[v],d[v][c+"End"]=l.ends[v],d[v][c+"Start"]=l.starts[v]),r.ids&&(d[v].id=String(r.ids[v]));return Aye(r,"marker")&&Sye(t,r,{vals:r.marker.color,containerStr:"marker",cLetter:"c"}),Aye(r,"marker.line")&&Sye(t,r,{vals:r.marker.line.color,containerStr:"marker.line",cLetter:"c"}),y1t(d,r),_1t(d,r),d}});var bv=ye((Ylr,Cye)=>{"use strict";var x1t=Oa(),b1t=Dr();function w1t(e,t,r){var n=e._fullLayout,i=n["_"+r+"Text_minsize"];if(i){var a=n.uniformtext.mode==="hide",o;switch(r){case"funnelarea":case"pie":case"sunburst":o="g.slice";break;case"treemap":case"icicle":o="g.slice, g.pathbar";break;default:o="g.points > g.point"}t.selectAll(o).each(function(s){var l=s.transform;if(l){l.scale=a&&l.hide?0:i/l.fontSize;var u=x1t.select(this).select("text");b1t.setTransormAndDisplay(u,l)}})}}function T1t(e,t,r){if(r.uniformtext.mode){var n=kye(e),i=r.uniformtext.minsize,a=t.scale*t.fontSize;t.hide=a<i,r[n]=r[n]||1/0,t.hide||(r[n]=Math.min(r[n],Math.max(a,i)))}}function A1t(e,t){var r=kye(e);t[r]=void 0}function kye(e){return"_"+e+"Text_minsize"}Cye.exports={recordMinTextSize:T1t,clearMinTextSize:A1t,resizeText:w1t}});var HI=ye(r2=>{"use strict";var S1t=Eo(),M1t=cd(),Lye=Dr().isArrayOrTypedArray;r2.coerceString=function(e,t,r){if(typeof t=="string"){if(t||!e.noBlank)return t}else if((typeof t=="number"||t===!0)&&!e.strict)return String(t);return r!==void 0?r:e.dflt};r2.coerceNumber=function(e,t,r){if(S1t(t)){t=+t;var n=e.min,i=e.max,a=n!==void 0&&t<n||i!==void 0&&t>i;if(!a)return t}return r!==void 0?r:e.dflt};r2.coerceColor=function(e,t,r){return M1t(t).isValid()?t:r!==void 0?r:e.dflt};r2.coerceEnumerated=function(e,t,r){return e.coerceNumber&&(t=+t),e.values.indexOf(t)!==-1?t:r!==void 0?r:e.dflt};r2.getValue=function(e,t){var r;return Lye(e)?t<e.length&&(r=e[t]):r=e,r};r2.getLineWidth=function(e,t){var r=0<t.mlw?t.mlw:Lye(e.marker.line.width)?0:e.marker.line.width;return r}});var N0=ye((Jlr,Uye)=>{"use strict";var m4=Oa(),E1t=ka(),y4=So(),Pye=Dr(),Iye=qa(),Rye=bv().resizeText,MV=Lm(),k1t=MV.textfont,C1t=MV.insidetextfont,L1t=MV.outsidetextfont,Qd=HI();function P1t(e){var t=m4.select(e).selectAll('g[class^="barlayer"]').selectAll("g.trace");Rye(e,t,"bar");var r=t.size(),n=e._fullLayout;t.style("opacity",function(i){return i[0].trace.opacity}).each(function(i){(n.barmode==="stack"&&r>1||n.bargap===0&&n.bargroupgap===0&&!i[0].trace.marker.line.width)&&m4.select(this).attr("shape-rendering","crispEdges")}),t.selectAll("g.points").each(function(i){var a=m4.select(this),o=i[0].trace;Dye(a,o,e)}),Iye.getComponentMethod("errorbars","style")(t)}function Dye(e,t,r){y4.pointStyle(e.selectAll("path"),t,r),Fye(e,t,r)}function Fye(e,t,r){e.selectAll("text").each(function(n){var i=m4.select(this),a=Pye.ensureUniformFontSize(r,zye(i,n,t,r));y4.font(i,a)})}function I1t(e,t,r){var n=t[0].trace;n.selectedpoints?R1t(r,n,e):(Dye(r,n,e),Iye.getComponentMethod("errorbars","style")(r))}function R1t(e,t,r){y4.selectedPointStyle(e.selectAll("path"),t),D1t(e.selectAll("text"),t,r)}function D1t(e,t,r){e.each(function(n){var i=m4.select(this),a;if(n.selected){a=Pye.ensureUniformFontSize(r,zye(i,n,t,r));var o=t.selected.textfont&&t.selected.textfont.color;o&&(a.color=o),y4.font(i,a)}else y4.selectedTextStyle(i,t)})}function zye(e,t,r,n){var i=n._fullLayout.font,a=r.textfont;if(e.classed("bartext-inside")){var o=Nye(t,r);a=qye(r,t.i,i,o)}else e.classed("bartext-outside")&&(a=Bye(r,t.i,i));return a}function Oye(e,t,r){return EV(k1t,e.textfont,t,r)}function qye(e,t,r,n){var i=Oye(e,t,r),a=e._input.textfont===void 0||e._input.textfont.color===void 0||Array.isArray(e.textfont.color)&&e.textfont.color[t]===void 0;return a&&(i={color:E1t.contrast(n),family:i.family,size:i.size,weight:i.weight,style:i.style,variant:i.variant,textcase:i.textcase,lineposition:i.lineposition,shadow:i.shadow}),EV(C1t,e.insidetextfont,t,i)}function Bye(e,t,r){var n=Oye(e,t,r);return EV(L1t,e.outsidetextfont,t,n)}function EV(e,t,r,n){t=t||{};var i=Qd.getValue(t.family,r),a=Qd.getValue(t.size,r),o=Qd.getValue(t.color,r),s=Qd.getValue(t.weight,r),l=Qd.getValue(t.style,r),u=Qd.getValue(t.variant,r),c=Qd.getValue(t.textcase,r),f=Qd.getValue(t.lineposition,r),h=Qd.getValue(t.shadow,r);return{family:Qd.coerceString(e.family,i,n.family),size:Qd.coerceNumber(e.size,a,n.size),color:Qd.coerceColor(e.color,o,n.color),weight:Qd.coerceString(e.weight,s,n.weight),style:Qd.coerceString(e.style,l,n.style),variant:Qd.coerceString(e.variant,u,n.variant),textcase:Qd.coerceString(e.variant,c,n.textcase),lineposition:Qd.coerceString(e.variant,f,n.lineposition),shadow:Qd.coerceString(e.variant,h,n.shadow)}}function Nye(e,t){return t.type==="waterfall"?t[e.dir].marker.color:e.mcc||e.mc||t.marker.color}Uye.exports={style:P1t,styleTextPoints:Fye,styleOnSelect:I1t,getInsideTextFont:qye,getOutsideTextFont:Bye,getBarColor:Nye,resizeText:Rye}});var n2=ye(($lr,Yye)=>{"use strict";var jI=Oa(),WI=Eo(),Fd=Dr(),F1t=ru(),z1t=ka(),T_=So(),O1t=qa(),XI=ho().tickText,Vye=bv(),q1t=Vye.recordMinTextSize,B1t=Vye.clearMinTextSize,kV=N0(),MT=HI(),N1t=e2(),Gye=Lm(),U1t=Gye.text,V1t=Gye.textposition,G1t=ip().appendArrayPointValue,Gv=N1t.TEXTPAD;function H1t(e){return e.id}function j1t(e){if(e.ids)return H1t}function CV(e){return(e>0)-(e<0)}function Pm(e,t){return e<t?1:-1}function W1t(e,t,r,n){var i=[],a=[],o=n?t:r,s=n?r:t;return i[0]=o.c2p(e.s0,!0),a[0]=s.c2p(e.p0,!0),i[1]=o.c2p(e.s1,!0),a[1]=s.c2p(e.p1,!0),n?[i,a]:[a,i]}function Hye(e,t,r,n){if(!t.uniformtext.mode&&jye(r)){var i;return n&&(i=n()),e.transition().duration(r.duration).ease(r.easing).each("end",function(){i&&i()}).each("interrupt",function(){i&&i()})}else return e}function jye(e){return e&&e.duration>0}function X1t(e,t,r,n,i,a){var o=t.xaxis,s=t.yaxis,l=e._fullLayout,u=e._context.staticPlot;i||(i={mode:l.barmode,norm:l.barmode,gap:l.bargap,groupgap:l.bargroupgap},B1t("bar",l));var c=Fd.makeTraceGroups(n,r,"trace bars").each(function(f){var h=jI.select(this),d=f[0].trace,v=f[0].t,_=d.type==="waterfall",b=d.type==="funnel",p=d.type==="histogram",k=d.type==="bar",E=k||b,S=0;_&&d.connector.visible&&d.connector.mode==="between"&&(S=d.connector.line.width/2);var L=d.orientation==="h",x=jye(i),C=Fd.ensureSingle(h,"g","points"),M=j1t(d),g=C.selectAll("g.point").data(Fd.identity,M);g.enter().append("g").classed("point",!0),g.exit().remove(),g.each(function(T,z){var O=jI.select(this),V=W1t(T,o,s,L),G=V[0][0],Z=V[0][1],j=V[1][0],N=V[1][1],H=(L?Z-G:N-j)===0;H&&E&&MT.getLineWidth(d,T)&&(H=!1),H||(H=!WI(G)||!WI(Z)||!WI(j)||!WI(N)),T.isBlank=H,H&&(L?Z=G:N=j),S&&!H&&(L?(G-=Pm(G,Z)*S,Z+=Pm(G,Z)*S):(j-=Pm(j,N)*S,N+=Pm(j,N)*S));var te,oe;if(d.type==="waterfall"){if(!H){var _e=d[T.dir].marker;te=_e.line.width,oe=_e.color}}else te=MT.getLineWidth(d,T),oe=T.mc||d.marker.color;function Ee($e){var St=jI.round(te/2%1,2);return i.gap===0&&i.groupgap===0?jI.round(Math.round($e)-St,2):$e}function Ce($e,St,Qt){return Qt&&$e===St?$e:Math.abs($e-St)>=2?Ee($e):$e>St?Math.ceil($e):Math.floor($e)}var me=z1t.opacity(oe),ie=me<1||te>.01?Ee:Ce;e._context.staticPlot||(G=ie(G,Z,L),Z=ie(Z,G,L),j=ie(j,N,!L),N=ie(N,j,!L));var Se=L?o.c2p:s.c2p,Le;T.s0>0?Le=T._sMax:T.s0<0?Le=T._sMin:Le=T.s1>0?T._sMax:T._sMin;function Ae($e,St){if(!$e)return 0;var Qt=Math.abs(L?N-j:Z-G),Vt=Math.abs(L?Z-G:N-j),_t=ie(Math.abs(Se(Le,!0)-Se(0,!0))),It=T.hasB?Math.min(Qt/2,Vt/2):Math.min(Qt/2,_t),mt;if(St==="%"){var er=Math.min(50,$e);mt=Qt*(er/100)}else mt=$e;return ie(Math.max(Math.min(mt,It),0))}var Fe=k||p?Ae(v.cornerradiusvalue,v.cornerradiusform):0,Pe,ge,Re="M"+G+","+j+"V"+N+"H"+Z+"V"+j+"Z",ce=0;if(Fe&&T.s){var Ze=CV(T.s0)===0||CV(T.s)===CV(T.s0)?T.s1:T.s0;if(ce=ie(T.hasB?0:Math.abs(Se(Le,!0)-Se(Ze,!0))),ce<Fe){var ut=Pm(G,Z),pt=Pm(j,N),Zt=ut===-pt?1:0;if(L)if(T.hasB)Pe="M"+(G+Fe*ut)+","+j+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+G+","+(j+Fe*pt)+"V"+(N-Fe*pt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(G+Fe*ut)+","+N+"H"+(Z-Fe*ut)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+Z+","+(N-Fe*pt)+"V"+(j+Fe*pt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(Z-Fe*ut)+","+j+"Z";else{ge=Math.abs(Z-G)+ce;var st=ge<Fe?Fe-Math.sqrt(ge*(2*Fe-ge)):0,lt=ce>0?Math.sqrt(ce*(2*Fe-ce)):0,Gt=ut>0?Math.max:Math.min;Pe="M"+G+","+j+"V"+(N-st*pt)+"H"+Gt(Z-(Fe-ce)*ut,G)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+Z+","+(N-Fe*pt-lt)+"V"+(j+Fe*pt+lt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+Gt(Z-(Fe-ce)*ut,G)+","+(j+st*pt)+"Z"}else if(T.hasB)Pe="M"+(G+Fe*ut)+","+j+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+G+","+(j+Fe*pt)+"V"+(N-Fe*pt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(G+Fe*ut)+","+N+"H"+(Z-Fe*ut)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+Z+","+(N-Fe*pt)+"V"+(j+Fe*pt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(Z-Fe*ut)+","+j+"Z";else{ge=Math.abs(N-j)+ce;var Nt=ge<Fe?Fe-Math.sqrt(ge*(2*Fe-ge)):0,Jt=ce>0?Math.sqrt(ce*(2*Fe-ce)):0,sr=pt>0?Math.max:Math.min;Pe="M"+(G+Nt*ut)+","+j+"V"+sr(N-(Fe-ce)*pt,j)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(G+Fe*ut-Jt)+","+N+"H"+(Z-Fe*ut+Jt)+"A "+Fe+","+Fe+" 0 0 "+Zt+" "+(Z-Nt*ut)+","+sr(N-(Fe-ce)*pt,j)+"V"+j+"Z"}}else Pe=Re}else Pe=Re;var wr=Hye(Fd.ensureSingle(O,"path"),l,i,a);if(wr.style("vector-effect",u?"none":"non-scaling-stroke").attr("d",isNaN((Z-G)*(N-j))||H&&e._context.staticPlot?"M0,0Z":Pe).call(T_.setClipUrl,t.layerClipId,e),!l.uniformtext.mode&&x){var cr=T_.makePointStyleFns(d);T_.singlePointStyle(T,wr,d,cr,e)}Z1t(e,t,O,f,z,G,Z,j,N,Fe,ce,i,a),t.layerClipId&&T_.hideOutsideRangePoint(T,O.select("text"),o,s,d.xcalendar,d.ycalendar)});var P=d.cliponaxis===!1;T_.setClipUrl(h,P?null:t.layerClipId,e)});O1t.getComponentMethod("errorbars","plot")(e,c,t,i)}function Z1t(e,t,r,n,i,a,o,s,l,u,c,f,h){var d=t.xaxis,v=t.yaxis,_=e._fullLayout,b;function p(ge,Re,ce){var Ze=Fd.ensureSingle(ge,"text").text(Re).attr({class:"bartext bartext-"+b,"text-anchor":"middle","data-notex":1}).call(T_.font,ce).call(F1t.convertToTspans,e);return Ze}var k=n[0].trace,E=k.orientation==="h",S=J1t(_,n,i,d,v);b=$1t(k,i);var L=f.mode==="stack"||f.mode==="relative",x=n[i],C=!L||x._outmost,M=x.hasB,g=u&&u-c>Gv;if(!S||b==="none"||(x.isBlank||a===o||s===l)&&(b==="auto"||b==="inside")){r.select("text").remove();return}var P=_.font,T=kV.getBarColor(n[i],k),z=kV.getInsideTextFont(k,i,P,T),O=kV.getOutsideTextFont(k,i,P),V=k.insidetextanchor||"end",G=r.datum();E?d.type==="log"&&G.s0<=0&&(d.range[0]<d.range[1]?a=0:a=d._length):v.type==="log"&&G.s0<=0&&(v.range[0]<v.range[1]?s=v._length:s=0);var Z=Math.abs(o-a),j=Math.abs(l-s),N=Z-2*Gv,H=j-2*Gv,te,oe,_e,Ee,Ce;if(b==="outside"&&!C&&!x.hasB&&(b="inside"),b==="auto")if(C){b="inside",Ce=Fd.ensureUniformFontSize(e,z),te=p(r,S,Ce),oe=T_.bBox(te.node()),_e=oe.width,Ee=oe.height;var me=_e>0&&Ee>0,ie;g?M?ie=i2(N-2*u,H,_e,Ee,E)||i2(N,H-2*u,_e,Ee,E):E?ie=i2(N-(u-c),H,_e,Ee,E)||i2(N,H-2*(u-c),_e,Ee,E):ie=i2(N,H-(u-c),_e,Ee,E)||i2(N-2*(u-c),H,_e,Ee,E):ie=i2(N,H,_e,Ee,E),me&&ie?b="inside":(b="outside",te.remove(),te=null)}else b="inside";if(!te){Ce=Fd.ensureUniformFontSize(e,b==="outside"?O:z),te=p(r,S,Ce);var Se=te.attr("transform");if(te.attr("transform",""),oe=T_.bBox(te.node()),_e=oe.width,Ee=oe.height,te.attr("transform",Se),_e<=0||Ee<=0){te.remove();return}}var Le=k.textangle,Ae,Fe;b==="outside"?(Fe=k.constraintext==="both"||k.constraintext==="outside",Ae=K1t(a,o,s,l,oe,{isHorizontal:E,constrained:Fe,angle:Le})):(Fe=k.constraintext==="both"||k.constraintext==="inside",Ae=Zye(a,o,s,l,oe,{isHorizontal:E,constrained:Fe,angle:Le,anchor:V,hasB:M,r:u,overhead:c})),Ae.fontSize=Ce.size,q1t(k.type==="histogram"?"bar":k.type,Ae,_),x.transform=Ae;var Pe=Hye(te,_,f,h);Fd.setTransormAndDisplay(Pe,Ae)}function i2(e,t,r,n,i){if(e<0||t<0)return!1;var a=r<=e&&n<=t,o=r<=t&&n<=e,s=i?e>=r*(t/n):t>=n*(e/r);return a||o||s}function Wye(e){return e==="auto"?0:e}function Xye(e,t){var r=Math.PI/180*t,n=Math.abs(Math.sin(r)),i=Math.abs(Math.cos(r));return{x:e.width*i+e.height*n,y:e.width*n+e.height*i}}function Zye(e,t,r,n,i,a){var o=!!a.isHorizontal,s=!!a.constrained,l=a.angle||0,u=a.anchor,c=u==="end",f=u==="start",h=a.leftToRight||0,d=(h+1)/2,v=1-d,_=a.hasB,b=a.r,p=a.overhead,k=i.width,E=i.height,S=Math.abs(t-e),L=Math.abs(n-r),x=S>2*Gv&&L>2*Gv?Gv:0;S-=2*x,L-=2*x;var C=Wye(l);l==="auto"&&!(k<=S&&E<=L)&&(k>S||E>L)&&(!(k>L||E>S)||k<E!=S<L)&&(C+=90);var M=Xye(i,C),g,P;if(b&&b-p>Gv){var T=Y1t(e,t,r,n,M,b,p,o,_);g=T.scale,P=T.pad}else g=1,s&&(g=Math.min(1,S/M.x,L/M.y)),P=0;var z=i.left*v+i.right*d,O=(i.top+i.bottom)/2,V=(e+Gv)*v+(t-Gv)*d,G=(r+n)/2,Z=0,j=0;if(f||c){var N=(o?M.x:M.y)/2;b&&(c||_)&&(x+=P);var H=o?Pm(e,t):Pm(r,n);o?f?(V=e+H*x,Z=-H*N):(V=t-H*x,Z=H*N):f?(G=r+H*x,j=-H*N):(G=n-H*x,j=H*N)}return{textX:z,textY:O,targetX:V,targetY:G,anchorX:Z,anchorY:j,scale:g,rotate:C}}function Y1t(e,t,r,n,i,a,o,s,l){var u=Math.max(0,Math.abs(t-e)-2*Gv),c=Math.max(0,Math.abs(n-r)-2*Gv),f=a-Gv,h=o?f-Math.sqrt(f*f-(f-o)*(f-o)):f,d=l?f*2:s?f-o:2*h,v=l?f*2:s?2*h:f-o,_,b,p,k,E;return i.y/i.x>=c/(u-d)?k=c/i.y:i.y/i.x<=(c-v)/u?k=u/i.x:!l&&s?(_=i.x*i.x+i.y*i.y/4,b=-2*i.x*(u-f)-i.y*(c/2-f),p=(u-f)*(u-f)+(c/2-f)*(c/2-f)-f*f,k=(-b+Math.sqrt(b*b-4*_*p))/(2*_)):l?(_=(i.x*i.x+i.y*i.y)/4,b=-i.x*(u/2-f)-i.y*(c/2-f),p=(u/2-f)*(u/2-f)+(c/2-f)*(c/2-f)-f*f,k=(-b+Math.sqrt(b*b-4*_*p))/(2*_)):(_=i.x*i.x/4+i.y*i.y,b=-i.x*(u/2-f)-2*i.y*(c-f),p=(u/2-f)*(u/2-f)+(c-f)*(c-f)-f*f,k=(-b+Math.sqrt(b*b-4*_*p))/(2*_)),k=Math.min(1,k),s?E=Math.max(0,f-Math.sqrt(Math.max(0,f*f-(f-(c-i.y*k)/2)*(f-(c-i.y*k)/2)))-o):E=Math.max(0,f-Math.sqrt(Math.max(0,f*f-(f-(u-i.x*k)/2)*(f-(u-i.x*k)/2)))-o),{scale:k,pad:E}}function K1t(e,t,r,n,i,a){var o=!!a.isHorizontal,s=!!a.constrained,l=a.angle||0,u=i.width,c=i.height,f=Math.abs(t-e),h=Math.abs(n-r),d;o?d=h>2*Gv?Gv:0:d=f>2*Gv?Gv:0;var v=1;s&&(v=o?Math.min(1,h/c):Math.min(1,f/u));var _=Wye(l),b=Xye(i,_),p=(o?b.x:b.y)/2,k=(i.left+i.right)/2,E=(i.top+i.bottom)/2,S=(e+t)/2,L=(r+n)/2,x=0,C=0,M=o?Pm(t,e):Pm(r,n);return o?(S=t-M*d,x=M*p):(L=n+M*d,C=-M*p),{textX:k,textY:E,targetX:S,targetY:L,anchorX:x,anchorY:C,scale:v,rotate:_}}function J1t(e,t,r,n,i){var a=t[0].trace,o=a.texttemplate,s;return o?s=Q1t(e,t,r,n,i):a.textinfo?s=e_t(t,r,n,i):s=MT.getValue(a.text,r),MT.coerceString(U1t,s)}function $1t(e,t){var r=MT.getValue(e.textposition,t);return MT.coerceEnumerated(V1t,r)}function Q1t(e,t,r,n,i){var a=t[0].trace,o=Fd.castOption(a,r,"texttemplate");if(!o)return"";var s=a.type==="histogram",l=a.type==="waterfall",u=a.type==="funnel",c=a.orientation==="h",f,h,d,v;c?(f="y",h=i,d="x",v=n):(f="x",h=n,d="y",v=i);function _(x){return XI(h,h.c2l(x),!0).text}function b(x){return XI(v,v.c2l(x),!0).text}var p=t[r],k={};k.label=p.p,k.labelLabel=k[f+"Label"]=_(p.p);var E=Fd.castOption(a,p.i,"text");(E===0||E)&&(k.text=E),k.value=p.s,k.valueLabel=k[d+"Label"]=b(p.s);var S={};G1t(S,a,p.i),(s||S.x===void 0)&&(S.x=c?k.value:k.label),(s||S.y===void 0)&&(S.y=c?k.label:k.value),(s||S.xLabel===void 0)&&(S.xLabel=c?k.valueLabel:k.labelLabel),(s||S.yLabel===void 0)&&(S.yLabel=c?k.labelLabel:k.valueLabel),l&&(k.delta=+p.rawS||p.s,k.deltaLabel=b(k.delta),k.final=p.v,k.finalLabel=b(k.final),k.initial=k.final-k.delta,k.initialLabel=b(k.initial)),u&&(k.value=p.s,k.valueLabel=b(k.value),k.percentInitial=p.begR,k.percentInitialLabel=Fd.formatPercent(p.begR),k.percentPrevious=p.difR,k.percentPreviousLabel=Fd.formatPercent(p.difR),k.percentTotal=p.sumR,k.percenTotalLabel=Fd.formatPercent(p.sumR));var L=Fd.castOption(a,p.i,"customdata");return L&&(k.customdata=L),Fd.texttemplateString({data:[S,k,a._meta],fallback:a.texttemplatefallback,labels:k,locale:e._d3locale,template:o})}function e_t(e,t,r,n){var i=e[0].trace,a=i.orientation==="h",o=i.type==="waterfall",s=i.type==="funnel";function l(L){var x=a?n:r;return XI(x,L,!0).text}function u(L){var x=a?r:n;return XI(x,+L,!0).text}var c=i.textinfo,f=e[t],h=c.split("+"),d=[],v,_=function(L){return h.indexOf(L)!==-1};if(_("label")&&d.push(l(e[t].p)),_("text")&&(v=Fd.castOption(i,f.i,"text"),(v===0||v)&&d.push(v)),o){var b=+f.rawS||f.s,p=f.v,k=p-b;_("initial")&&d.push(u(k)),_("delta")&&d.push(u(b)),_("final")&&d.push(u(p))}if(s){_("value")&&d.push(u(f.s));var E=0;_("percent initial")&&E++,_("percent previous")&&E++,_("percent total")&&E++;var S=E>1;_("percent initial")&&(v=Fd.formatPercent(f.begR),S&&(v+=" of initial"),d.push(v)),_("percent previous")&&(v=Fd.formatPercent(f.difR),S&&(v+=" of previous"),d.push(v)),_("percent total")&&(v=Fd.formatPercent(f.sumR),S&&(v+=" of total"),d.push(v))}return d.join("<br>")}Yye.exports={plot:X1t,toMoveInsideBar:Zye}});var ET=ye((Qlr,Qye)=>{"use strict";var _4=vf(),t_t=qa(),Kye=ka(),r_t=Dr().fillText,i_t=HI().getLineWidth,LV=ho().hoverLabelText,n_t=fs().BADNUM;function a_t(e,t,r,n,i){var a=Jye(e,t,r,n,i);if(a){var o=a.cd,s=o[0].trace,l=o[a.index];return a.color=$ye(s,l),t_t.getComponentMethod("errorbars","hoverInfo")(l,s,a),[a]}}function Jye(e,t,r,n,i){var a=e.cd,o=a[0].trace,s=a[0].t,l=n==="closest",u=o.type==="waterfall",c=e.maxHoverDistance,f=e.maxSpikeDistance,h,d,v,_,b,p,k;o.orientation==="h"?(h=r,d=t,v="y",_="x",b=G,p=z):(h=t,d=r,v="x",_="y",p=G,b=z);var E=o[v+"period"],S=l||E;function L(ie){return C(ie,-1)}function x(ie){return C(ie,1)}function C(ie,Se){var Le=ie.w;return ie[v]+Se*Le/2}function M(ie){return ie[v+"End"]-ie[v+"Start"]}var g=l?L:E?function(ie){return ie.p-M(ie)/2}:function(ie){return Math.min(L(ie),ie.p-s.bardelta/2)},P=l?x:E?function(ie){return ie.p+M(ie)/2}:function(ie){return Math.max(x(ie),ie.p+s.bardelta/2)};function T(ie,Se,Le){return i.finiteRange&&(Le=0),_4.inbox(ie-h,Se-h,Le+Math.min(1,Math.abs(Se-ie)/k)-1)}function z(ie){return T(g(ie),P(ie),c)}function O(ie){return T(L(ie),x(ie),f)}function V(ie){var Se=ie[_];if(u){var Le=Math.abs(ie.rawS)||0;d>0?Se+=Le:d<0&&(Se-=Le)}return Se}function G(ie){var Se=d,Le=ie.b,Ae=V(ie);return _4.inbox(Le-Se,Ae-Se,c+(Ae-Se)/(Ae-Le)-1)}function Z(ie){var Se=d,Le=ie.b,Ae=V(ie);return _4.inbox(Le-Se,Ae-Se,f+(Ae-Se)/(Ae-Le)-1)}var j=e[v+"a"],N=e[_+"a"];k=Math.abs(j.r2c(j.range[1])-j.r2c(j.range[0]));function H(ie){return(b(ie)+p(ie))/2}var te=_4.getDistanceFunction(n,b,p,H);if(_4.getClosest(a,te,e),e.index!==!1&&a[e.index].p!==n_t){S||(g=function(ie){return Math.min(L(ie),ie.p-s.bargroupwidth/2)},P=function(ie){return Math.max(x(ie),ie.p+s.bargroupwidth/2)});var oe=e.index,_e=a[oe],Ee=o.base?_e.b+_e.s:_e.s;e[_+"0"]=e[_+"1"]=N.c2p(_e[_],!0),e[_+"LabelVal"]=Ee;var Ce=s.extents[s.extents.round(_e.p)];e[v+"0"]=j.c2p(l?g(_e):Ce[0],!0),e[v+"1"]=j.c2p(l?P(_e):Ce[1],!0);var me=_e.orig_p!==void 0;return e[v+"LabelVal"]=me?_e.orig_p:_e.p,e.labelLabel=LV(j,e[v+"LabelVal"],o[v+"hoverformat"]),e.valueLabel=LV(N,e[_+"LabelVal"],o[_+"hoverformat"]),e.baseLabel=LV(N,_e.b,o[_+"hoverformat"]),e.spikeDistance=(Z(_e)+O(_e))/2,e[v+"Spike"]=j.c2p(_e.p,!0),r_t(_e,o,e),e.hovertemplate=o.hovertemplate,e}}function $ye(e,t){var r=t.mcc||e.marker.color,n=t.mlcc||e.marker.line.color,i=i_t(e,t);if(Kye.opacity(r))return r;if(Kye.opacity(n)&&i)return n}Qye.exports={hoverPoints:a_t,hoverOnBars:Jye,getTraceColor:$ye}});var t1e=ye((eur,e1e)=>{"use strict";e1e.exports=function(t,r,n){return t.x="xVal"in r?r.xVal:r.x,t.y="yVal"in r?r.yVal:r.y,r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),n.orientation==="h"?(t.label=t.y,t.value=t.x):(t.label=t.x,t.value=t.y),t}});var kT=ye((tur,r1e)=>{"use strict";r1e.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=n[0].trace,s=o.type==="funnel",l=o.orientation==="h",u=[],c;if(r===!1)for(c=0;c<n.length;c++)n[c].selected=0;else for(c=0;c<n.length;c++){var f=n[c],h="ct"in f?f.ct:o_t(f,i,a,l,s);r.contains(h,!1,c,t)?(u.push({pointNumber:c,x:i.c2d(f.x),y:a.c2d(f.y)}),f.selected=1):f.selected=0}return u};function o_t(e,t,r,n,i){var a=t.c2p(n?e.s0:e.p0,!0),o=t.c2p(n?e.s1:e.p1,!0),s=r.c2p(n?e.p0:e.s0,!0),l=r.c2p(n?e.p1:e.s1,!0);return i?[(a+o)/2,(s+l)/2]:n?[o,(s+l)/2]:[(a+o)/2,l]}});var n1e=ye((rur,i1e)=>{"use strict";i1e.exports={attributes:Lm(),layoutAttributes:UI(),supplyDefaults:r0().supplyDefaults,crossTraceDefaults:r0().crossTraceDefaults,supplyLayoutDefaults:SV(),calc:Eye(),crossTraceCalc:jb().crossTraceCalc,colorbar:$d(),arraysToCalcdata:g4(),plot:n2().plot,style:N0().style,styleOnSelect:N0().styleOnSelect,hoverPoints:ET().hoverPoints,eventData:t1e(),selectPoints:kT(),moduleType:"trace",name:"bar",basePlotModule:ph(),categories:["bar-like","cartesian","svg","bar","oriented","errorBarsOK","showLegend","zoomScale"],animatable:!0,meta:{}}});var o1e=ye((iur,a1e)=>{"use strict";a1e.exports=n1e()});var x4=ye((nur,c1e)=>{"use strict";var s_t=Cg(),U0=pf(),s1e=Lm(),l_t=Lh(),l1e=df().axisHoverFormat,{hovertemplateAttrs:u_t,templatefallbackAttrs:c_t}=Ll(),zy=Ao().extendFlat,CT=U0.marker,u1e=CT.line;c1e.exports={y:{valType:"data_array",editType:"calc+clearAxisTypes"},x:{valType:"data_array",editType:"calc+clearAxisTypes"},x0:{valType:"any",editType:"calc+clearAxisTypes"},y0:{valType:"any",editType:"calc+clearAxisTypes"},dx:{valType:"number",editType:"calc"},dy:{valType:"number",editType:"calc"},xperiod:U0.xperiod,yperiod:U0.yperiod,xperiod0:U0.xperiod0,yperiod0:U0.yperiod0,xperiodalignment:U0.xperiodalignment,yperiodalignment:U0.yperiodalignment,xhoverformat:l1e("x"),yhoverformat:l1e("y"),name:{valType:"string",editType:"calc+clearAxisTypes"},q1:{valType:"data_array",editType:"calc+clearAxisTypes"},median:{valType:"data_array",editType:"calc+clearAxisTypes"},q3:{valType:"data_array",editType:"calc+clearAxisTypes"},lowerfence:{valType:"data_array",editType:"calc"},upperfence:{valType:"data_array",editType:"calc"},notched:{valType:"boolean",editType:"calc"},notchwidth:{valType:"number",min:0,max:.5,dflt:.25,editType:"calc"},notchspan:{valType:"data_array",editType:"calc"},boxpoints:{valType:"enumerated",values:["all","outliers","suspectedoutliers",!1],editType:"calc"},jitter:{valType:"number",min:0,max:1,editType:"calc"},pointpos:{valType:"number",min:-2,max:2,editType:"calc"},sdmultiple:{valType:"number",min:0,editType:"calc",dflt:1},sizemode:{valType:"enumerated",values:["quartiles","sd"],editType:"calc",dflt:"quartiles"},boxmean:{valType:"enumerated",values:[!0,"sd",!1],editType:"calc"},mean:{valType:"data_array",editType:"calc"},sd:{valType:"data_array",editType:"calc"},orientation:{valType:"enumerated",values:["v","h"],editType:"calc+clearAxisTypes"},quartilemethod:{valType:"enumerated",values:["linear","exclusive","inclusive"],dflt:"linear",editType:"calc"},width:{valType:"number",min:0,dflt:0,editType:"calc"},marker:{outliercolor:{valType:"color",dflt:"rgba(0, 0, 0, 0)",editType:"style"},symbol:zy({},CT.symbol,{arrayOk:!1,editType:"plot"}),opacity:zy({},CT.opacity,{arrayOk:!1,dflt:1,editType:"style"}),angle:zy({},CT.angle,{arrayOk:!1,editType:"calc"}),size:zy({},CT.size,{arrayOk:!1,editType:"calc"}),color:zy({},CT.color,{arrayOk:!1,editType:"style"}),line:{color:zy({},u1e.color,{arrayOk:!1,dflt:l_t.defaultLine,editType:"style"}),width:zy({},u1e.width,{arrayOk:!1,dflt:0,editType:"style"}),outliercolor:{valType:"color",editType:"style"},outlierwidth:{valType:"number",min:0,dflt:1,editType:"style"},editType:"style"},editType:"plot"},line:{color:{valType:"color",editType:"style"},width:{valType:"number",min:0,dflt:2,editType:"style"},editType:"plot"},fillcolor:s_t(),whiskerwidth:{valType:"number",min:0,max:1,dflt:.5,editType:"calc"},showwhiskers:{valType:"boolean",editType:"calc"},offsetgroup:s1e.offsetgroup,alignmentgroup:s1e.alignmentgroup,selected:{marker:U0.selected.marker,editType:"style"},unselected:{marker:U0.unselected.marker,editType:"style"},text:zy({},U0.text,{}),hovertext:zy({},U0.hovertext,{}),hovertemplate:u_t({}),hovertemplatefallback:c_t(),hoveron:{valType:"flaglist",flags:["boxes","points"],dflt:"boxes+points",editType:"style"},zorder:U0.zorder}});var b4=ye((aur,f1e)=>{"use strict";f1e.exports={boxmode:{valType:"enumerated",values:["group","overlay"],dflt:"overlay",editType:"calc"},boxgap:{valType:"number",min:0,max:1,dflt:.3,editType:"calc"},boxgroupgap:{valType:"number",min:0,max:1,dflt:.3,editType:"calc"}}});var T4=ye((our,p1e)=>{"use strict";var V0=Dr(),f_t=qa(),h_t=ka(),d_t=Ig(),v_t=Hb(),h1e=R3(),w4=x4();function p_t(e,t,r,n){function i(v,_){return V0.coerce(e,t,w4,v,_)}if(d1e(e,t,i,n),t.visible!==!1){d_t(e,t,n,i),i("xhoverformat"),i("yhoverformat");var a=t._hasPreCompStats;a&&(i("lowerfence"),i("upperfence")),i("line.color",(e.marker||{}).color||r),i("line.width"),i("fillcolor",h_t.addOpacity(t.line.color,.5));var o=!1;if(a){var s=i("mean"),l=i("sd");s&&s.length&&(o=!0,l&&l.length&&(o="sd"))}i("whiskerwidth");var u=i("sizemode"),c;u==="quartiles"&&(c=i("boxmean",o)),i("showwhiskers",u==="quartiles"),(u==="sd"||c==="sd")&&i("sdmultiple"),i("width"),i("quartilemethod");var f=!1;if(a){var h=i("notchspan");h&&h.length&&(f=!0)}else V0.validate(e.notchwidth,w4.notchwidth)&&(f=!0);var d=i("notched",f);d&&i("notchwidth"),v1e(e,t,i,{prefix:"box"}),i("zorder")}}function d1e(e,t,r,n){function i(P){var T=0;return P&&P.length&&(T+=1,V0.isArrayOrTypedArray(P[0])&&P[0].length&&(T+=1)),T}function a(P){return V0.validate(e[P],w4[P])}var o=r("y"),s=r("x"),l;if(t.type==="box"){var u=r("q1"),c=r("median"),f=r("q3");t._hasPreCompStats=u&&u.length&&c&&c.length&&f&&f.length,l=Math.min(V0.minRowLength(u),V0.minRowLength(c),V0.minRowLength(f))}var h=i(o),d=i(s),v=h&&V0.minRowLength(o),_=d&&V0.minRowLength(s),b=n.calendar,p={autotypenumbers:n.autotypenumbers},k,E;if(t._hasPreCompStats)switch(String(d)+String(h)){case"00":var S=a("x0")||a("dx"),L=a("y0")||a("dy");L&&!S?k="h":k="v",E=l;break;case"10":k="v",E=Math.min(l,_);break;case"20":k="h",E=Math.min(l,s.length);break;case"01":k="h",E=Math.min(l,v);break;case"02":k="v",E=Math.min(l,o.length);break;case"12":k="v",E=Math.min(l,_,o.length);break;case"21":k="h",E=Math.min(l,s.length,v);break;case"11":E=0;break;case"22":var x=!1,C;for(C=0;C<s.length;C++)if(h1e(s[C],b,p)==="category"){x=!0;break}if(x)k="v",E=Math.min(l,_,o.length);else{for(C=0;C<o.length;C++)if(h1e(o[C],b,p)==="category"){x=!0;break}x?(k="h",E=Math.min(l,s.length,v)):(k="v",E=Math.min(l,_,o.length))}break}else h>0?(k="v",d>0?E=Math.min(_,v):E=Math.min(v)):d>0?(k="h",E=Math.min(_)):E=0;if(!E){t.visible=!1;return}t._length=E;var M=r("orientation",k);t._hasPreCompStats?M==="v"&&d===0?(r("x0",0),r("dx",1)):M==="h"&&h===0&&(r("y0",0),r("dy",1)):M==="v"&&d===0?r("x0"):M==="h"&&h===0&&r("y0");var g=f_t.getComponentMethod("calendars","handleTraceDefaults");g(e,t,["x","y"],n)}function v1e(e,t,r,n){var i=n.prefix,a=V0.coerce2(e,t,w4,"marker.outliercolor"),o=r("marker.line.outliercolor"),s="outliers";t._hasPreCompStats?s="all":(a||o)&&(s="suspectedoutliers");var l=r(i+"points",s);l?(r("jitter",l==="all"?.3:0),r("pointpos",l==="all"?-1.5:0),r("marker.symbol"),r("marker.opacity"),r("marker.size"),r("marker.angle"),r("marker.color",t.line.color),r("marker.line.color"),r("marker.line.width"),l==="suspectedoutliers"&&(r("marker.line.outliercolor",t.marker.color),r("marker.line.outlierwidth")),r("selected.marker.color"),r("unselected.marker.color"),r("selected.marker.size"),r("unselected.marker.size"),r("text"),r("hovertext")):delete t.marker;var u=r("hoveron");(u==="all"||u.indexOf("points")!==-1)&&(r("hovertemplate"),r("hovertemplatefallback")),V0.coerceSelectionMarkerOpacity(t,r)}function g_t(e,t){var r,n;function i(l){return V0.coerce(n._input,n,w4,l)}for(var a=0;a<e.length;a++){n=e[a];var o=n.type;if(o==="box"||o==="violin"){r=n._input;var s=t[o+"mode"];s==="group"&&v_t(r,n,t,i,s)}}}p1e.exports={supplyDefaults:p_t,crossTraceDefaults:g_t,handleSampleDefaults:d1e,handlePointsDefaults:v1e}});var ZI=ye((sur,m1e)=>{"use strict";var m_t=qa(),y_t=Dr(),__t=b4();function g1e(e,t,r,n,i){for(var a=i+"Layout",o=!1,s=0;s<r.length;s++){var l=r[s];if(m_t.traceIs(l,a)){o=!0;break}}o&&(n(i+"mode"),n(i+"gap"),n(i+"groupgap"))}function x_t(e,t,r){function n(i,a){return y_t.coerce(e,t,__t,i,a)}g1e(e,t,r,n,"box")}m1e.exports={supplyLayoutDefaults:x_t,_supply:g1e}});var RV=ye((lur,M1e)=>{"use strict";var IV=Eo(),YI=ho(),b_t=Dg(),gh=Dr(),i0=fs().BADNUM,Oy=gh._;M1e.exports=function(t,r){var n=t._fullLayout,i=YI.getFromId(t,r.xaxis||"x"),a=YI.getFromId(t,r.yaxis||"y"),o=[],s=r.type==="violin"?"_numViolins":"_numBoxes",l,u,c,f,h,d,v;r.orientation==="h"?(c=i,f="x",h=a,d="y",v=!!r.yperiodalignment):(c=a,f="y",h=i,d="x",v=!!r.xperiodalignment);var _=w_t(r,d,h,n[s]),b=_[0],p=_[1],k=gh.distinctVals(b,h),E=k.vals,S=k.minDiff/2,L,x,C,M,g,P,T=(r.boxpoints||r.points)==="all"?gh.identity:function(Zt){return Zt.v<L.lf||Zt.v>L.uf};if(r._hasPreCompStats){var z=r[f],O=function(Zt){return c.d2c((r[Zt]||[])[l])},V=1/0,G=-1/0;for(l=0;l<r._length;l++){var Z=b[l];if(IV(Z)){if(L={},L.pos=L[d]=Z,v&&p&&(L.orig_p=p[l]),L.q1=O("q1"),L.med=O("median"),L.q3=O("q3"),x=[],z&&gh.isArrayOrTypedArray(z[l]))for(u=0;u<z[l].length;u++)P=c.d2c(z[l][u]),P!==i0&&(g={v:P,i:[l,u]},y1e(g,r,[l,u]),x.push(g));if(L.pts=x.sort(_1e),C=L[f]=x.map(x1e),M=C.length,L.med!==i0&&L.q1!==i0&&L.q3!==i0&&L.med>=L.q1&&L.q3>=L.med){var j=O("lowerfence");L.lf=j!==i0&&j<=L.q1?j:b1e(L,C,M);var N=O("upperfence");L.uf=N!==i0&&N>=L.q3?N:w1e(L,C,M);var H=O("mean");L.mean=H!==i0?H:M?gh.mean(C,M):(L.q1+L.q3)/2;var te=O("sd");L.sd=H!==i0&&te>=0?te:M?gh.stdev(C,M,L.mean):L.q3-L.q1,L.lo=T1e(L),L.uo=A1e(L);var oe=O("notchspan");oe=oe!==i0&&oe>0?oe:S1e(L,M),L.ln=L.med-oe,L.un=L.med+oe;var _e=L.lf,Ee=L.uf;r.boxpoints&&C.length&&(_e=Math.min(_e,C[0]),Ee=Math.max(Ee,C[M-1])),r.notched&&(_e=Math.min(_e,L.ln),Ee=Math.max(Ee,L.un)),L.min=_e,L.max=Ee}else{gh.warn(["Invalid input - make sure that q1 <= median <= q3","q1 = "+L.q1,"median = "+L.med,"q3 = "+L.q3].join(`
+`));var Ce;L.med!==i0?Ce=L.med:L.q1!==i0?L.q3!==i0?Ce=(L.q1+L.q3)/2:Ce=L.q1:L.q3!==i0?Ce=L.q3:Ce=0,L.med=Ce,L.q1=L.q3=Ce,L.lf=L.uf=Ce,L.mean=L.sd=Ce,L.ln=L.un=Ce,L.min=L.max=Ce}V=Math.min(V,L.min),G=Math.max(G,L.max),L.pts2=x.filter(T),o.push(L)}}r._extremes[c._id]=YI.findExtremes(c,[V,G],{padded:!0})}else{var me=c.makeCalcdata(r,f),ie=T_t(E,S),Se=E.length,Le=A_t(Se);for(l=0;l<r._length;l++)if(P=me[l],!!IV(P)){var Ae=gh.findBin(b[l],ie);Ae>=0&&Ae<Se&&(g={v:P,i:l},y1e(g,r,l),Le[Ae].push(g))}var Fe=1/0,Pe=-1/0,ge=r.quartilemethod,Re=ge==="exclusive",ce=ge==="inclusive";for(l=0;l<Se;l++)if(Le[l].length>0){if(L={},L.pos=L[d]=E[l],x=L.pts=Le[l].sort(_1e),C=L[f]=x.map(x1e),M=C.length,L.min=C[0],L.max=C[M-1],L.mean=gh.mean(C,M),L.sd=gh.stdev(C,M,L.mean)*r.sdmultiple,L.med=gh.interp(C,.5),M%2&&(Re||ce)){var Ze,ut;Re?(Ze=C.slice(0,M/2),ut=C.slice(M/2+1)):ce&&(Ze=C.slice(0,M/2+1),ut=C.slice(M/2)),L.q1=gh.interp(Ze,.5),L.q3=gh.interp(ut,.5)}else L.q1=gh.interp(C,.25),L.q3=gh.interp(C,.75);L.lf=b1e(L,C,M),L.uf=w1e(L,C,M),L.lo=T1e(L),L.uo=A1e(L);var pt=S1e(L,M);L.ln=L.med-pt,L.un=L.med+pt,Fe=Math.min(Fe,L.ln),Pe=Math.max(Pe,L.un),L.pts2=x.filter(T),o.push(L)}r.notched&&gh.isTypedArray(me)&&(me=Array.from(me)),r._extremes[c._id]=YI.findExtremes(c,r.notched?me.concat([Fe,Pe]):me,{padded:!0})}return S_t(o,r),o.length>0?(o[0].t={num:n[s],dPos:S,posLetter:d,valLetter:f,labels:{med:Oy(t,"median:"),min:Oy(t,"min:"),q1:Oy(t,"q1:"),q3:Oy(t,"q3:"),max:Oy(t,"max:"),mean:r.boxmean==="sd"||r.sizemode==="sd"?Oy(t,"mean \xB1 \u03C3:").replace("\u03C3",r.sdmultiple===1?"\u03C3":r.sdmultiple+"\u03C3"):Oy(t,"mean:"),lf:Oy(t,"lower fence:"),uf:Oy(t,"upper fence:")}},n[s]++,o):[{t:{empty:!0}}]};function w_t(e,t,r,n){var i=t in e,a=t+"0"in e,o="d"+t in e;if(i||a&&o){var s=r.makeCalcdata(e,t),l=b_t(e,r,t,s).vals;return[l,s]}var u;a?u=e[t+"0"]:"name"in e&&(r.type==="category"||IV(e.name)&&["linear","log"].indexOf(r.type)!==-1||gh.isDateTime(e.name)&&r.type==="date")?u=e.name:u=n;for(var c=r.type==="multicategory"?r.r2c_just_indices(u):r.d2c(u,0,e[t+"calendar"]),f=e._length,h=new Array(f),d=0;d<f;d++)h[d]=c;return[h]}function T_t(e,t){for(var r=e.length,n=new Array(r+1),i=0;i<r;i++)n[i]=e[i]-t;return n[r]=e[r-1]+t,n}function A_t(e){for(var t=new Array(e),r=0;r<e;r++)t[r]=[];return t}var PV={text:"tx",hovertext:"htx"};function y1e(e,t,r){for(var n in PV)gh.isArrayOrTypedArray(t[n])&&(Array.isArray(r)?gh.isArrayOrTypedArray(t[n][r[0]])&&(e[PV[n]]=t[n][r[0]][r[1]]):e[PV[n]]=t[n][r])}function S_t(e,t){if(gh.isArrayOrTypedArray(t.selectedpoints))for(var r=0;r<e.length;r++){for(var n=e[r].pts||[],i={},a=0;a<n.length;a++)i[n[a].i]=a;gh.tagSelected(n,t,i)}}function _1e(e,t){return e.v-t.v}function x1e(e){return e.v}function b1e(e,t,r){return r===0?e.q1:Math.min(e.q1,t[Math.min(gh.findBin(2.5*e.q1-1.5*e.q3,t,!0)+1,r-1)])}function w1e(e,t,r){return r===0?e.q3:Math.max(e.q3,t[Math.max(gh.findBin(2.5*e.q3-1.5*e.q1,t),0)])}function T1e(e){return 4*e.q1-3*e.q3}function A1e(e){return 4*e.q3-3*e.q1}function S1e(e,t){return t===0?0:1.57*(e.q3-e.q1)/Math.sqrt(t)}});var KI=ye((uur,L1e)=>{"use strict";var E1e=ho(),M_t=Dr(),E_t=Nb().getAxisGroup,k1e=["v","h"];function k_t(e,t){for(var r=e.calcdata,n=t.xaxis,i=t.yaxis,a=0;a<k1e.length;a++){for(var o=k1e[a],s=o==="h"?i:n,l=[],u=0;u<r.length;u++){var c=r[u],f=c[0].t,h=c[0].trace;h.visible===!0&&(h.type==="box"||h.type==="candlestick")&&!f.empty&&(h.orientation||"v")===o&&h.xaxis===n._id&&h.yaxis===i._id&&l.push(u)}C1e("box",e,l,s)}}function C1e(e,t,r,n){var i=t.calcdata,a=t._fullLayout,o=n._id,s=o.charAt(0),l,u,c,f=[],h=0;for(l=0;l<r.length;l++)for(c=i[r[l]],u=0;u<c.length;u++)f.push(n.c2l(c[u].pos,!0)),h+=(c[u].pts2||[]).length;if(f.length){var d=M_t.distinctVals(f);(n.type==="category"||n.type==="multicategory")&&(d.minDiff=1);var v=d.minDiff/2;E1e.minDtick(n,d.minDiff,d.vals[0],!0);var _=e==="violin"?"_numViolins":"_numBoxes",b=a[_],p=a[e+"mode"]==="group"&&b>1,k=1-a[e+"gap"],E=1-a[e+"groupgap"];for(l=0;l<r.length;l++){c=i[r[l]];var S=c[0].trace,L=c[0].t,x=S.width,C=S.side,M,g,P,T;if(x)M=g=T=x/2,P=0;else if(M=v,p){var z=E_t(a,n._id)+S.orientation,O=a._alignmentOpts[z]||{},V=O[S.alignmentgroup]||{},G=Object.keys(V.offsetGroups||{}).length,Z=G||b,j=G?S._offsetIndex:L.num;g=M*k*E/Z,P=2*M*(-.5+(j+.5)/Z)*k,T=M*k/Z}else g=M*k*E,P=0,T=M;L.dPos=M,L.bPos=P,L.bdPos=g,L.wHover=T;var N,H,te=P+g,oe,_e,Ee,Ce,me,ie,Se=!!x,Le=(S.boxpoints||S.points)&&h>0;if(C==="positive"?(N=M*(x?1:.5),oe=te,H=oe=P):C==="negative"?(N=oe=P,H=M*(x?1:.5),_e=te):(N=H=M,oe=_e=te),Le){var Ae=S.pointpos,Fe=S.jitter,Pe=S.marker.size/2,ge=0;Ae+Fe>=0&&(ge=te*(Ae+Fe),ge>N?(Se=!0,me=Pe,Ee=ge):ge>oe&&(me=Pe,Ee=N)),ge<=N&&(Ee=N);var Re=0;Ae-Fe<=0&&(Re=-te*(Ae-Fe),Re>H?(Se=!0,ie=Pe,Ce=Re):Re>_e&&(ie=Pe,Ce=H)),Re<=H&&(Ce=H)}else Ee=N,Ce=H;var ce=new Array(c.length);for(u=0;u<c.length;u++)ce[u]=c[u].pos;S._extremes[o]=E1e.findExtremes(n,ce,{padded:Se,vpadminus:Ce,vpadplus:Ee,vpadLinearized:!0,ppadminus:{x:ie,y:me}[s],ppadplus:{x:me,y:ie}[s]})}}}L1e.exports={crossTraceCalc:k_t,setPositionOffset:C1e}});var JI=ye((cur,F1e)=>{"use strict";var LT=Oa(),a2=Dr(),C_t=So(),P1e=5,L_t=.01;function P_t(e,t,r,n){var i=e._context.staticPlot,a=t.xaxis,o=t.yaxis;a2.makeTraceGroups(n,r,"trace boxes").each(function(s){var l=LT.select(this),u=s[0],c=u.t,f=u.trace;if(c.wdPos=c.bdPos*f.whiskerwidth,f.visible!==!0||c.empty){l.remove();return}var h,d;f.orientation==="h"?(h=o,d=a):(h=a,d=o),I1e(l,{pos:h,val:d},f,c,i),R1e(l,{x:a,y:o},f,c),D1e(l,{pos:h,val:d},f,c)})}function I1e(e,t,r,n,i){var a=r.orientation==="h",o=t.val,s=t.pos,l=!!s.rangebreaks,u=n.bPos,c=n.wdPos||0,f=n.bPosPxOffset||0,h=r.whiskerwidth||0,d=r.showwhiskers!==!1,v=r.notched||!1,_=v?1-2*r.notchwidth:1,b,p;Array.isArray(n.bdPos)?(b=n.bdPos[0],p=n.bdPos[1]):(b=n.bdPos,p=n.bdPos);var k=e.selectAll("path.box").data(r.type!=="violin"||r.box.visible?a2.identity:[]);k.enter().append("path").style("vector-effect",i?"none":"non-scaling-stroke").attr("class","box"),k.exit().remove(),k.each(function(E){if(E.empty)return LT.select(this).attr("d","M0,0Z");var S=s.c2l(E.pos+u,!0),L=s.l2p(S-b)+f,x=s.l2p(S+p)+f,C=l?(L+x)/2:s.l2p(S)+f,M=r.whiskerwidth,g=l?L*M+(1-M)*C:s.l2p(S-c)+f,P=l?x*M+(1-M)*C:s.l2p(S+c)+f,T=s.l2p(S-b*_)+f,z=s.l2p(S+p*_)+f,O=r.sizemode==="sd",V=o.c2p(O?E.mean-E.sd:E.q1,!0),G=O?o.c2p(E.mean+E.sd,!0):o.c2p(E.q3,!0),Z=a2.constrain(O?o.c2p(E.mean,!0):o.c2p(E.med,!0),Math.min(V,G)+1,Math.max(V,G)-1),j=E.lf===void 0||r.boxpoints===!1||O,N=o.c2p(j?E.min:E.lf,!0),H=o.c2p(j?E.max:E.uf,!0),te=o.c2p(E.ln,!0),oe=o.c2p(E.un,!0);a?LT.select(this).attr("d","M"+Z+","+T+"V"+z+"M"+V+","+L+"V"+x+(v?"H"+te+"L"+Z+","+z+"L"+oe+","+x:"")+"H"+G+"V"+L+(v?"H"+oe+"L"+Z+","+T+"L"+te+","+L:"")+"Z"+(d?"M"+V+","+C+"H"+N+"M"+G+","+C+"H"+H+(h===0?"":"M"+N+","+g+"V"+P+"M"+H+","+g+"V"+P):"")):LT.select(this).attr("d","M"+T+","+Z+"H"+z+"M"+L+","+V+"H"+x+(v?"V"+te+"L"+z+","+Z+"L"+x+","+oe:"")+"V"+G+"H"+L+(v?"V"+oe+"L"+T+","+Z+"L"+L+","+te:"")+"Z"+(d?"M"+C+","+V+"V"+N+"M"+C+","+G+"V"+H+(h===0?"":"M"+g+","+N+"H"+P+"M"+g+","+H+"H"+P):""))})}function R1e(e,t,r,n){var i=t.x,a=t.y,o=n.bdPos,s=n.bPos,l=r.boxpoints||r.points;a2.seedPseudoRandom();var u=function(h){return h.forEach(function(d){d.t=n,d.trace=r}),h},c=e.selectAll("g.points").data(l?u:[]);c.enter().append("g").attr("class","points"),c.exit().remove();var f=c.selectAll("path").data(function(h){var d,v=h.pts2,_=Math.max((h.max-h.min)/10,h.q3-h.q1),b=_*1e-9,p=_*L_t,k=[],E=0,S;if(r.jitter){if(_===0)for(E=1,k=new Array(v.length),d=0;d<v.length;d++)k[d]=1;else for(d=0;d<v.length;d++){var L=Math.max(0,d-P1e),x=v[L].v,C=Math.min(v.length-1,d+P1e),M=v[C].v;l!=="all"&&(v[d].v<h.lf?M=Math.min(M,h.lf):x=Math.max(x,h.uf));var g=Math.sqrt(p*(C-L)/(M-x+b))||0;g=a2.constrain(Math.abs(g),0,1),k.push(g),E=Math.max(g,E)}S=r.jitter*2/(E||1)}for(d=0;d<v.length;d++){var P=v[d],T=P.v,z=r.jitter?S*k[d]*(a2.pseudoRandom()-.5):0,O=h.pos+s+o*(r.pointpos+z);r.orientation==="h"?(P.y=O,P.x=T):(P.x=O,P.y=T),l==="suspectedoutliers"&&T<h.uo&&T>h.lo&&(P.so=!0)}return v});f.enter().append("path").classed("point",!0),f.exit().remove(),f.call(C_t.translatePoints,i,a)}function D1e(e,t,r,n){var i=t.val,a=t.pos,o=!!a.rangebreaks,s=n.bPos,l=n.bPosPxOffset||0,u=r.boxmean||(r.meanline||{}).visible,c,f;Array.isArray(n.bdPos)?(c=n.bdPos[0],f=n.bdPos[1]):(c=n.bdPos,f=n.bdPos);var h=e.selectAll("path.mean").data(r.type==="box"&&r.boxmean||r.type==="violin"&&r.box.visible&&r.meanline.visible?a2.identity:[]);h.enter().append("path").attr("class","mean").style({fill:"none","vector-effect":"non-scaling-stroke"}),h.exit().remove(),h.each(function(d){var v=a.c2l(d.pos+s,!0),_=a.l2p(v-c)+l,b=a.l2p(v+f)+l,p=o?(_+b)/2:a.l2p(v)+l,k=i.c2p(d.mean,!0),E=i.c2p(d.mean-d.sd,!0),S=i.c2p(d.mean+d.sd,!0);r.orientation==="h"?LT.select(this).attr("d","M"+k+","+_+"V"+b+(u==="sd"?"m0,0L"+E+","+p+"L"+k+","+_+"L"+S+","+p+"Z":"")):LT.select(this).attr("d","M"+_+","+k+"H"+b+(u==="sd"?"m0,0L"+p+","+E+"L"+_+","+k+"L"+p+","+S+"Z":""))})}F1e.exports={plot:P_t,plotBoxAndWhiskers:I1e,plotPoints:R1e,plotBoxMean:D1e}});var $I=ye((fur,z1e)=>{"use strict";var DV=Oa(),FV=ka(),zV=So();function I_t(e,t,r){var n=r||DV.select(e).selectAll("g.trace.boxes");n.style("opacity",function(i){return i[0].trace.opacity}),n.each(function(i){var a=DV.select(this),o=i[0].trace,s=o.line.width;function l(f,h,d,v){f.style("stroke-width",h+"px").call(FV.stroke,d).call(FV.fill,v)}var u=a.selectAll("path.box");if(o.type==="candlestick")u.each(function(f){if(!f.empty){var h=DV.select(this),d=o[f.dir];l(h,d.line.width,d.line.color,d.fillcolor),h.style("opacity",o.selectedpoints&&!f.selected?.3:1)}});else{l(u,s,o.line.color,o.fillcolor),a.selectAll("path.mean").style({"stroke-width":s,"stroke-dasharray":2*s+"px,"+s+"px"}).call(FV.stroke,o.line.color);var c=a.selectAll("path.point");zV.pointStyle(c,o,e)}})}function R_t(e,t,r){var n=t[0].trace,i=r.selectAll("path.point");n.selectedpoints?zV.selectedPointStyle(i,n):zV.pointStyle(i,n,e)}z1e.exports={style:I_t,styleOnSelect:R_t}});var qV=ye((hur,N1e)=>{"use strict";var D_t=ho(),OV=Dr(),A_=vf(),O1e=ka(),F_t=OV.fillText;function z_t(e,t,r,n){var i=e.cd,a=i[0].trace,o=a.hoveron,s=[],l;return o.indexOf("boxes")!==-1&&(s=s.concat(q1e(e,t,r,n))),o.indexOf("points")!==-1&&(l=B1e(e,t,r)),n==="closest"?l?[l]:s:(l&&s.push(l),s)}function q1e(e,t,r,n){var i=e.cd,a=e.xa,o=e.ya,s=i[0].trace,l=i[0].t,u=s.type==="violin",c,f,h,d,v,_,b,p,k,E,S,L=l.bdPos,x,C,M=l.wHover,g=function(Pe){return h.c2l(Pe.pos)+l.bPos-h.c2l(_)};u&&s.side!=="both"?(s.side==="positive"&&(k=function(Pe){var ge=g(Pe);return A_.inbox(ge,ge+M,E)},x=L,C=0),s.side==="negative"&&(k=function(Pe){var ge=g(Pe);return A_.inbox(ge-M,ge,E)},x=0,C=L)):(k=function(Pe){var ge=g(Pe);return A_.inbox(ge-M,ge+M,E)},x=C=L);var P;u?P=function(Pe){return A_.inbox(Pe.span[0]-v,Pe.span[1]-v,E)}:P=function(Pe){return A_.inbox(Pe.min-v,Pe.max-v,E)},s.orientation==="h"?(v=t,_=r,b=P,p=k,c="y",h=o,f="x",d=a):(v=r,_=t,b=k,p=P,c="x",h=a,f="y",d=o);var T=Math.min(1,L/Math.abs(h.r2c(h.range[1])-h.r2c(h.range[0])));E=e.maxHoverDistance-T,S=e.maxSpikeDistance-T;function z(Pe){return(b(Pe)+p(Pe))/2}var O=A_.getDistanceFunction(n,b,p,z);if(A_.getClosest(i,O,e),e.index===!1)return[];var V=i[e.index],G=s.line.color,Z=(s.marker||{}).color;O1e.opacity(G)&&s.line.width?e.color=G:O1e.opacity(Z)&&s.boxpoints?e.color=Z:e.color=s.fillcolor,e[c+"0"]=h.c2p(V.pos+l.bPos-C,!0),e[c+"1"]=h.c2p(V.pos+l.bPos+x,!0),e[c+"LabelVal"]=V.orig_p!==void 0?V.orig_p:V.pos;var j=c+"Spike";e.spikeDistance=z(V)*S/E,e[j]=h.c2p(V.pos,!0);var N=s.boxmean||s.sizemode==="sd"||(s.meanline||{}).visible,H=s.boxpoints||s.points,te=H&&N?["max","uf","q3","med","mean","q1","lf","min"]:H&&!N?["max","uf","q3","med","q1","lf","min"]:!H&&N?["max","q3","med","mean","q1","min"]:["max","q3","med","q1","min"],oe=d.range[1]<d.range[0];s.orientation===(oe?"v":"h")&&te.reverse();for(var _e=e.spikeDistance,Ee=e[j],Ce=[],me=0;me<te.length;me++){var ie=te[me];if(ie in V){var Se=V[ie],Le=d.c2p(Se,!0),Ae=OV.extendFlat({},e);Ae.attr=ie,Ae[f+"0"]=Ae[f+"1"]=Le,Ae[f+"LabelVal"]=Se,Ae[f+"Label"]=(l.labels?l.labels[ie]+" ":"")+D_t.hoverLabelText(d,Se,s[f+"hoverformat"]),Ae.hoverOnBox=!0,ie==="mean"&&"sd"in V&&(s.boxmean==="sd"||s.sizemode==="sd")&&(Ae[f+"err"]=V.sd),Ae.hovertemplate=!1,Ce.push(Ae)}}e.name="",e.spikeDistance=void 0,e[j]=void 0;for(var Fe=0;Fe<Ce.length;Fe++)Ce[Fe].attr!=="med"?(Ce[Fe].name="",Ce[Fe].spikeDistance=void 0,Ce[Fe][j]=void 0):(Ce[Fe].spikeDistance=_e,Ce[Fe][j]=Ee);return Ce}function B1e(e,t,r){for(var n=e.cd,i=e.xa,a=e.ya,o=n[0].trace,s=i.c2p(t),l=a.c2p(r),u,c=function(P){var T=Math.max(3,P.mrc||0);return Math.max(Math.abs(i.c2p(P.x)-s)-T,1-3/T)},f=function(P){var T=Math.max(3,P.mrc||0);return Math.max(Math.abs(a.c2p(P.y)-l)-T,1-3/T)},h=A_.quadrature(c,f),d=!1,v,_,b=0;b<n.length;b++){v=n[b];for(var p=0;p<(v.pts||[]).length;p++){_=v.pts[p];var k=h(_);k<=e.distance&&(e.distance=k,d=[b,p])}}if(!d)return!1;v=n[d[0]],_=v.pts[d[1]];var E=i.c2p(_.x,!0),S=a.c2p(_.y,!0),L=_.mrc||1;u=OV.extendFlat({},e,{index:_.i,color:(o.marker||{}).color,name:o.name,x0:E-L,x1:E+L,y0:S-L,y1:S+L,spikeDistance:e.distance,hovertemplate:o.hovertemplate});var x=v.orig_p,C=x!==void 0?x:v.pos,M;o.orientation==="h"?(M=a,u.xLabelVal=_.x,u.yLabelVal=C):(M=i,u.xLabelVal=C,u.yLabelVal=_.y);var g=M._id.charAt(0);return u[g+"Spike"]=M.c2p(v.pos,!0),F_t(_,o,u),u}N1e.exports={hoverPoints:z_t,hoverOnBoxes:q1e,hoverOnPoints:B1e}});var V1e=ye((dur,U1e)=>{"use strict";U1e.exports=function(t,r){return r.hoverOnBox&&(t.hoverOnBox=r.hoverOnBox),"xVal"in r&&(t.x=r.xVal),"yVal"in r&&(t.y=r.yVal),r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),t}});var BV=ye((vur,G1e)=>{"use strict";G1e.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s,l;if(r===!1)for(s=0;s<n.length;s++)for(l=0;l<(n[s].pts||[]).length;l++)n[s].pts[l].selected=0;else for(s=0;s<n.length;s++)for(l=0;l<(n[s].pts||[]).length;l++){var u=n[s].pts[l],c=i.c2p(u.x),f=a.c2p(u.y);r.contains([c,f],null,u.i,t)?(o.push({pointNumber:u.i,x:i.c2d(u.x),y:a.c2d(u.y)}),u.selected=1):u.selected=0}return o}});var j1e=ye((pur,H1e)=>{"use strict";H1e.exports={attributes:x4(),layoutAttributes:b4(),supplyDefaults:T4().supplyDefaults,crossTraceDefaults:T4().crossTraceDefaults,supplyLayoutDefaults:ZI().supplyLayoutDefaults,calc:RV(),crossTraceCalc:KI().crossTraceCalc,plot:JI().plot,style:$I().style,styleOnSelect:$I().styleOnSelect,hoverPoints:qV().hoverPoints,eventData:V1e(),selectPoints:BV(),moduleType:"trace",name:"box",basePlotModule:ph(),categories:["cartesian","svg","symbols","oriented","box-violin","showLegend","boxLayout","zoomScale"],meta:{}}});var X1e=ye((gur,W1e)=>{"use strict";W1e.exports=j1e()});var PT=ye((mur,Y1e)=>{"use strict";var O_t=Tu(),{extendFlat:Pp}=Ao(),q_t=Gl(),{axisHoverFormat:NV}=df(),B_t=ec(),{hovertemplateAttrs:N_t,templatefallbackAttrs:Z1e,texttemplateAttrs:U_t}=Ll(),n0=pf();Y1e.exports=Pp({z:{valType:"data_array",editType:"calc"},x:Pp({},n0.x,{impliedEdits:{xtype:"array"}}),x0:Pp({},n0.x0,{impliedEdits:{xtype:"scaled"}}),dx:Pp({},n0.dx,{impliedEdits:{xtype:"scaled"}}),y:Pp({},n0.y,{impliedEdits:{ytype:"array"}}),y0:Pp({},n0.y0,{impliedEdits:{ytype:"scaled"}}),dy:Pp({},n0.dy,{impliedEdits:{ytype:"scaled"}}),xperiod:Pp({},n0.xperiod,{impliedEdits:{xtype:"scaled"}}),yperiod:Pp({},n0.yperiod,{impliedEdits:{ytype:"scaled"}}),xperiod0:Pp({},n0.xperiod0,{impliedEdits:{xtype:"scaled"}}),yperiod0:Pp({},n0.yperiod0,{impliedEdits:{ytype:"scaled"}}),xperiodalignment:Pp({},n0.xperiodalignment,{impliedEdits:{xtype:"scaled"}}),yperiodalignment:Pp({},n0.yperiodalignment,{impliedEdits:{ytype:"scaled"}}),text:{valType:"data_array",editType:"calc"},hovertext:{valType:"data_array",editType:"calc"},transpose:{valType:"boolean",dflt:!1,editType:"calc"},xtype:{valType:"enumerated",values:["array","scaled"],editType:"calc+clearAxisTypes"},ytype:{valType:"enumerated",values:["array","scaled"],editType:"calc+clearAxisTypes"},zsmooth:{valType:"enumerated",values:["fast","best",!1],dflt:!1,editType:"calc"},hoverongaps:{valType:"boolean",dflt:!0,editType:"none"},connectgaps:{valType:"boolean",editType:"calc"},xgap:{valType:"number",dflt:0,min:0,editType:"plot"},ygap:{valType:"number",dflt:0,min:0,editType:"plot"},xhoverformat:NV("x"),yhoverformat:NV("y"),zhoverformat:NV("z",1),hovertemplate:N_t(),hovertemplatefallback:Z1e(),texttemplate:U_t({arrayOk:!1,editType:"plot"},{keys:["x","y","z","text"]}),texttemplatefallback:Z1e({editType:"plot"}),textfont:B_t({editType:"plot",autoSize:!0,autoColor:!0,colorEditType:"style"}),showlegend:Pp({},q_t.showlegend,{dflt:!1}),zorder:n0.zorder},O_t("",{cLetter:"z",autoColorDflt:!1}))});var e8=ye((yur,J1e)=>{"use strict";var V_t=Eo(),QI=Dr(),G_t=qa();J1e.exports=function(t,r,n,i,a,o){var s=n("z");a=a||"x",o=o||"y";var l,u;if(s===void 0||!s.length)return 0;if(QI.isArray1D(s)){l=n(a),u=n(o);var c=QI.minRowLength(l),f=QI.minRowLength(u);if(c===0||f===0)return 0;r._length=Math.min(c,f,s.length)}else{if(l=K1e(a,n),u=K1e(o,n),!H_t(s))return 0;n("transpose"),r._length=null}var h=G_t.getComponentMethod("calendars","handleTraceDefaults");return h(t,r,[a,o],i),!0};function K1e(e,t){var r=t(e),n=r?t(e+"type","array"):"scaled";return n==="scaled"&&(t(e+"0"),t("d"+e)),r}function H_t(e){for(var t=!0,r=!1,n=!1,i,a=0;a<e.length;a++){if(i=e[a],!QI.isArrayOrTypedArray(i)){t=!1;break}i.length>0&&(r=!0);for(var o=0;o<i.length;o++)if(V_t(i[o])){n=!0;break}}return t&&r&&n}});var A4=ye((_ur,Q1e)=>{"use strict";var $1e=Dr();Q1e.exports=function(t,r){t("texttemplate"),t("texttemplatefallback");var n=$1e.extendFlat({},r.font,{color:"auto",size:"auto"});$1e.coerceFont(t,"textfont",n)}});var UV=ye((xur,e_e)=>{"use strict";e_e.exports=function(t,r,n){var i=n("zsmooth");i===!1&&(n("xgap"),n("ygap")),n("zhoverformat")}});var i_e=ye((bur,r_e)=>{"use strict";var t_e=Dr(),j_t=e8(),W_t=A4(),X_t=Ig(),Z_t=UV(),Y_t=Qh(),K_t=PT();r_e.exports=function(t,r,n,i){function a(s,l){return t_e.coerce(t,r,K_t,s,l)}var o=j_t(t,r,a,i);if(!o){r.visible=!1;return}X_t(t,r,i,a),a("xhoverformat"),a("yhoverformat"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),W_t(a,i),Z_t(t,r,a,i),a("hoverongaps"),a("connectgaps",t_e.isArray1D(r.z)&&r.zsmooth!==!1),Y_t(t,r,i,a,{prefix:"",cLetter:"z"}),a("zorder")}});var VV=ye((wur,n_e)=>{"use strict";var IT=Eo();n_e.exports={count:function(e,t,r){return r[e]++,1},sum:function(e,t,r,n){var i=n[t];return IT(i)?(i=Number(i),r[e]+=i,i):0},avg:function(e,t,r,n,i){var a=n[t];return IT(a)&&(a=Number(a),r[e]+=a,i[e]++),0},min:function(e,t,r,n){var i=n[t];if(IT(i))if(i=Number(i),IT(r[e])){if(r[e]>i){var a=i-r[e];return r[e]=i,a}}else return r[e]=i,i;return 0},max:function(e,t,r,n){var i=n[t];if(IT(i))if(i=Number(i),IT(r[e])){if(r[e]<i){var a=i-r[e];return r[e]=i,a}}else return r[e]=i,i;return 0}}});var GV=ye((Tur,a_e)=>{"use strict";a_e.exports={percent:function(e,t){for(var r=e.length,n=100/t,i=0;i<r;i++)e[i]*=n},probability:function(e,t){for(var r=e.length,n=0;n<r;n++)e[n]/=t},density:function(e,t,r,n){var i=e.length;n=n||1;for(var a=0;a<i;a++)e[a]*=r[a]*n},"probability density":function(e,t,r,n){var i=e.length;n&&(t/=n);for(var a=0;a<i;a++)e[a]*=r[a]/t}}});var HV=ye((Aur,o_e)=>{"use strict";o_e.exports=function(t,r){for(var n=t.length,i=0,a=0;a<n;a++)r[a]?(t[a]/=r[a],i+=t[a]):t[a]=null;return i}});var jV=ye((Sur,v_e)=>{"use strict";var RT=fs(),o2=RT.ONEAVGYEAR,s_e=RT.ONEAVGMONTH,r8=RT.ONEDAY,l_e=RT.ONEHOUR,u_e=RT.ONEMIN,c_e=RT.ONESEC,f_e=ho().tickIncrement;v_e.exports=function(t,r,n,i,a){var o=-1.1*r,s=-.1*r,l=t-s,u=n[0],c=n[1],f=Math.min(t8(u+s,u+l,i,a),t8(c+s,c+l,i,a)),h=Math.min(t8(u+o,u+s,i,a),t8(c+o,c+s,i,a)),d,v;if(f>h&&h<Math.abs(c-u)/4e3?(d=f,v=!1):(d=Math.min(f,h),v=!0),i.type==="date"&&d>r8){var _=d===o2?1:6,b=d===o2?"M12":"M1";return function(p,k){var E=i.c2d(p,o2,a),S=E.indexOf("-",_);S>0&&(E=E.slice(0,S));var L=i.d2c(E,0,a);if(L<p){var x=f_e(L,b,!1,a);(L+x)/2<p+t&&(L=x)}return k&&v?f_e(L,b,!0,a):L}}return function(p,k){var E=d*Math.round(p/d);return E+d/10<p&&E+d*.9<p+t&&(E+=d),k&&v&&(E-=d),E}};function t8(e,t,r,n){if(e*t<=0)return 1/0;for(var i=Math.abs(t-e),a=r.type==="date",o=h_e(i,a),s=0;s<10;s++){var l=h_e(o*80,a);if(o===l)break;if(J_t(l,e,t,a,r,n))o=l;else break}return o}function h_e(e,t){return t&&e>c_e?e>r8?e>o2*1.1?o2:e>s_e*1.1?s_e:r8:e>l_e?l_e:e>u_e?u_e:c_e:Math.pow(10,Math.floor(Math.log(e)/Math.LN10))}function J_t(e,t,r,n,i,a){if(n&&e>r8){var o=d_e(t,i,a),s=d_e(r,i,a),l=e===o2?0:1;return o[l]!==s[l]}return Math.floor(r/e)-Math.floor(t/e)>.1}function d_e(e,t,r){var n=t.c2d(e,o2,r).split("-");return n[0]===""&&(n.unshift(),n[0]="-"+n[0]),n}});var ZV=ye((Mur,__e)=>{"use strict";var WV=Eo(),Hv=Dr(),p_e=qa(),G0=ho(),{hasColorscale:g_e}=pv(),m_e=gv(),$_t=g4(),y_e=VV(),Q_t=GV(),ext=HV(),txt=jV();function rxt(e,t){var r=[],n=[],i=t.orientation==="h",a=G0.getFromId(e,i?t.yaxis:t.xaxis),o=i?"y":"x",s={x:"y",y:"x"}[o],l=t[o+"calendar"],u=t.cumulative,c,f=XV(e,t,a,o),h=f[0],d=f[1],v=typeof h.size=="string",_=[],b=v?_:h,p=[],k=[],E=[],S=0,L=t.histnorm,x=t.histfunc,C=L.indexOf("density")!==-1,M,g,P;u.enabled&&C&&(L=L.replace(/ ?density$/,""),C=!1);var T=x==="max"||x==="min",z=T?null:0,O=y_e.count,V=Q_t[L],G=!1,Z=function(ge){return a.r2c(ge,0,l)},j;for(Hv.isArrayOrTypedArray(t[s])&&x!=="count"&&(j=t[s],G=x==="avg",O=y_e[x]),c=Z(h.start),g=Z(h.end)+(c-G0.tickIncrement(c,h.size,!1,l))/1e6;c<g&&r.length<1e6&&(M=G0.tickIncrement(c,h.size,!1,l),r.push((c+M)/2),n.push(z),E.push([]),_.push(c),C&&p.push(1/(M-c)),G&&k.push(0),!(M<=c));)c=M;_.push(c),!v&&a.type==="date"&&(b={start:Z(b.start),end:Z(b.end),size:b.size}),e._fullLayout._roundFnOpts||(e._fullLayout._roundFnOpts={});var N=t["_"+o+"bingroup"],H={leftGap:1/0,rightGap:1/0};N&&(e._fullLayout._roundFnOpts[N]||(e._fullLayout._roundFnOpts[N]=H),H=e._fullLayout._roundFnOpts[N]);var te=n.length,oe=!0,_e=H.leftGap,Ee=H.rightGap,Ce={};for(c=0;c<d.length;c++){var me=d[c];P=Hv.findBin(me,b),P>=0&&P<te&&(S+=O(P,c,n,j,k),oe&&E[P].length&&me!==d[E[P][0]]&&(oe=!1),E[P].push(c),Ce[c]=P,_e=Math.min(_e,me-_[P]),Ee=Math.min(Ee,_[P+1]-me))}H.leftGap=_e,H.rightGap=Ee;var ie;oe||(ie=function(ge,Re){return function(){var ce=e._fullLayout._roundFnOpts[N];return txt(ce.leftGap,ce.rightGap,_,a,l)(ge,Re)}}),G&&(S=ext(n,k)),V&&V(n,S,p),u.enabled&&axt(n,u.direction,u.currentbin);var Se=Math.min(r.length,n.length),Le=[],Ae=0,Fe=Se-1;for(c=0;c<Se;c++)if(n[c]){Ae=c;break}for(c=Se-1;c>=Ae;c--)if(n[c]){Fe=c;break}for(c=Ae;c<=Fe;c++)if(WV(r[c])&&WV(n[c])){var Pe={p:r[c],s:n[c],b:0};u.enabled||(Pe.pts=E[c],oe?Pe.ph0=Pe.ph1=E[c].length?d[E[c][0]]:r[c]:(t._computePh=!0,Pe.ph0=ie(_[c]),Pe.ph1=ie(_[c+1],!0))),Le.push(Pe)}return Le.length===1&&(Le[0].width1=G0.tickIncrement(Le[0].p,h.size,!1,l)-Le[0].p),g_e(t,"marker")&&m_e(e,t,{vals:t.marker.color,containerStr:"marker",cLetter:"c"}),g_e(t,"marker.line")&&m_e(e,t,{vals:t.marker.line.color,containerStr:"marker.line",cLetter:"c"}),$_t(Le,t),Hv.isArrayOrTypedArray(t.selectedpoints)&&Hv.tagSelected(Le,t,Ce),Le}function XV(e,t,r,n,i){var a=n+"bins",o=e._fullLayout,s=t["_"+n+"bingroup"],l=o._histogramBinOpts[s],u=o.barmode==="overlay",c,f,h,d,v,_,b,p=function(me){return r.r2c(me,0,d)},k=function(me){return r.c2r(me,0,d)},E=r.type==="date"?function(me){return me||me===0?Hv.cleanDate(me,null,d):null}:function(me){return WV(me)?Number(me):null};function S(me,ie,Se){ie[me+"Found"]?(ie[me]=E(ie[me]),ie[me]===null&&(ie[me]=Se[me])):(_[me]=ie[me]=Se[me],Hv.nestedProperty(f[0],a+"."+me).set(Se[me]))}if(t["_"+n+"autoBinFinished"])delete t["_"+n+"autoBinFinished"];else{f=l.traces;var L=[],x=!0,C=!1,M=!1;for(c=0;c<f.length;c++)if(h=f[c],h.visible){var g=l.dirs[c];v=h["_"+g+"pos0"]=r.makeCalcdata(h,g),L=Hv.concat(L,v),delete h["_"+n+"autoBinFinished"],t.visible===!0&&(x?x=!1:(delete h._autoBin,h["_"+n+"autoBinFinished"]=1),p_e.traceIs(h,"2dMap")&&(C=!0),h.type==="histogram2dcontour"&&(M=!0))}d=f[0][n+"calendar"];var P=G0.autoBin(L,r,l.nbins,C,d,l.sizeFound&&l.size),T=f[0]._autoBin={};if(_=T[l.dirs[0]]={},M&&(l.size||(P.start=k(G0.tickIncrement(p(P.start),P.size,!0,d))),l.end===void 0&&(P.end=k(G0.tickIncrement(p(P.end),P.size,!1,d)))),u&&!p_e.traceIs(t,"2dMap")&&P._dataSpan===0&&r.type!=="category"&&r.type!=="multicategory"&&t.bingroup===""&&typeof t.xbins=="undefined"){if(i)return[P,v,!0];P=ixt(e,t,r,n,a)}b=h.cumulative||{},b.enabled&&b.currentbin!=="include"&&(b.direction==="decreasing"?P.start=k(G0.tickIncrement(p(P.start),P.size,!0,d)):P.end=k(G0.tickIncrement(p(P.end),P.size,!1,d))),l.size=P.size,l.sizeFound||(_.size=P.size,Hv.nestedProperty(f[0],a+".size").set(P.size)),S("start",l,P),S("end",l,P)}v=t["_"+n+"pos0"],delete t["_"+n+"pos0"];var z=t._input[a]||{},O=Hv.extendFlat({},l),V=l.start,G=r.r2l(z.start),Z=G!==void 0;if((l.startFound||Z)&&G!==r.r2l(V)){var j=Z?G:Hv.aggNums(Math.min,null,v),N={type:r.type==="category"||r.type==="multicategory"?"linear":r.type,r2l:r.r2l,dtick:l.size,tick0:V,calendar:d,range:[j,G0.tickIncrement(j,l.size,!1,d)].map(r.l2r)},H=G0.tickFirst(N);H>r.r2l(j)&&(H=G0.tickIncrement(H,l.size,!0,d)),O.start=r.l2r(H),Z||Hv.nestedProperty(t,a+".start").set(O.start)}var te=l.end,oe=r.r2l(z.end),_e=oe!==void 0;if((l.endFound||_e)&&oe!==r.r2l(te)){var Ee=_e?oe:Hv.aggNums(Math.max,null,v);O.end=r.l2r(Ee),_e||Hv.nestedProperty(t,a+".start").set(O.end)}var Ce="autobin"+n;return t._input[Ce]===!1&&(t._input[a]=Hv.extendFlat({},t[a]||{}),delete t._input[Ce],delete t[Ce]),[O,v]}function ixt(e,t,r,n,i){var a=e._fullLayout,o=nxt(e,t),s=!1,l=1/0,u=[t],c,f,h;for(c=0;c<o.length;c++)if(f=o[c],f===t)s=!0;else if(!s)h=a._histogramBinOpts[f["_"+n+"bingroup"]],l=Math.min(l,h.size||f[i].size);else{var d=XV(e,f,r,n,!0),v=d[0],_=d[2];f["_"+n+"autoBinFinished"]=1,f["_"+n+"pos0"]=d[1],_?u.push(f):l=Math.min(l,v.size)}var b=new Array(u.length);for(c=0;c<u.length;c++)for(var p=u[c]["_"+n+"pos0"],k=0;k<p.length;k++)if(p[k]!==void 0){b[c]=p[k];break}for(isFinite(l)||(l=Hv.distinctVals(b).minDiff),c=0;c<u.length;c++){f=u[c];var E=f[n+"calendar"],S={start:r.c2r(b[c]-l/2,0,E),end:r.c2r(b[c]+l/2,0,E),size:l};f._input[i]=f[i]=S,h=a._histogramBinOpts[f["_"+n+"bingroup"]],h&&Hv.extendFlat(h,S)}return t[i]}function nxt(e,t){for(var r=t.xaxis,n=t.yaxis,i=t.orientation,a=[],o=e._fullData,s=0;s<o.length;s++){var l=o[s];l.type==="histogram"&&l.visible===!0&&l.orientation===i&&l.xaxis===r&&l.yaxis===n&&a.push(l)}return a}function axt(e,t,r){var n,i,a;function o(l){a=e[l],e[l]/=2}function s(l){i=e[l],e[l]=a+i/2,a+=i}if(r==="half")if(t==="increasing")for(o(0),n=1;n<e.length;n++)s(n);else for(o(e.length-1),n=e.length-2;n>=0;n--)s(n);else if(t==="increasing"){for(n=1;n<e.length;n++)e[n]+=e[n-1];r==="exclude"&&(e.unshift(0),e.pop())}else{for(n=e.length-2;n>=0;n--)e[n]+=e[n+1];r==="exclude"&&(e.push(0),e.shift())}}__e.exports={calc:rxt,calcAllAutoBins:XV}});var E_e=ye((Eur,M_e)=>{"use strict";var x_e=Dr(),DT=ho(),b_e=VV(),oxt=GV(),sxt=HV(),lxt=jV(),w_e=ZV().calcAllAutoBins;M_e.exports=function(t,r){var n=DT.getFromId(t,r.xaxis),i=DT.getFromId(t,r.yaxis),a=r.xcalendar,o=r.ycalendar,s=function(It){return n.r2c(It,0,a)},l=function(It){return i.r2c(It,0,o)},u=function(It){return n.c2r(It,0,a)},c=function(It){return i.c2r(It,0,o)},f,h,d,v,_=w_e(t,r,n,"x"),b=_[0],p=_[1],k=w_e(t,r,i,"y"),E=k[0],S=k[1],L=r._length;p.length>L&&p.splice(L,p.length-L),S.length>L&&S.splice(L,S.length-L);var x=[],C=[],M=[],g=typeof b.size=="string",P=typeof E.size=="string",T=[],z=[],O=g?T:b,V=P?z:E,G=0,Z=[],j=[],N=r.histnorm,H=r.histfunc,te=N.indexOf("density")!==-1,oe=H==="max"||H==="min",_e=oe?null:0,Ee=b_e.count,Ce=oxt[N],me=!1,ie=[],Se=[],Le="z"in r?r.z:"marker"in r&&Array.isArray(r.marker.color)?r.marker.color:"";Le&&H!=="count"&&(me=H==="avg",Ee=b_e[H]);var Ae=b.size,Fe=s(b.start),Pe=s(b.end)+(Fe-DT.tickIncrement(Fe,Ae,!1,a))/1e6;for(f=Fe;f<Pe;f=DT.tickIncrement(f,Ae,!1,a))C.push(_e),T.push(f),me&&M.push(0);T.push(f);var ge=C.length,Re=(f-Fe)/ge,ce=u(Fe+Re/2),Ze=E.size,ut=l(E.start),pt=l(E.end)+(ut-DT.tickIncrement(ut,Ze,!1,o))/1e6;for(f=ut;f<pt;f=DT.tickIncrement(f,Ze,!1,o)){x.push(C.slice()),z.push(f);var Zt=new Array(ge);for(h=0;h<ge;h++)Zt[h]=[];j.push(Zt),me&&Z.push(M.slice())}z.push(f);var st=x.length,lt=(f-ut)/st,Gt=c(ut+lt/2);te&&(ie=T_e(C.length,O,Re,g),Se=T_e(x.length,V,lt,P)),!g&&n.type==="date"&&(O=A_e(s,O)),!P&&i.type==="date"&&(V=A_e(l,V));var Nt=!0,Jt=!0,sr=new Array(ge),wr=new Array(st),cr=1/0,$e=1/0,St=1/0,Qt=1/0;for(f=0;f<L;f++){var Vt=p[f],_t=S[f];d=x_e.findBin(Vt,O),v=x_e.findBin(_t,V),d>=0&&d<ge&&v>=0&&v<st&&(G+=Ee(d,f,x[v],Le,Z[v]),j[v][d].push(f),Nt&&(sr[d]===void 0?sr[d]=Vt:sr[d]!==Vt&&(Nt=!1)),Jt&&(wr[v]===void 0?wr[v]=_t:wr[v]!==_t&&(Jt=!1)),cr=Math.min(cr,Vt-T[d]),$e=Math.min($e,T[d+1]-Vt),St=Math.min(St,_t-z[v]),Qt=Math.min(Qt,z[v+1]-_t))}if(me)for(v=0;v<st;v++)G+=sxt(x[v],Z[v]);if(Ce)for(v=0;v<st;v++)Ce(x[v],G,ie,Se[v]);return{x:p,xRanges:S_e(T,Nt&&sr,cr,$e,n,a),x0:ce,dx:Re,y:S,yRanges:S_e(z,Jt&&wr,St,Qt,i,o),y0:Gt,dy:lt,z:x,pts:j}};function T_e(e,t,r,n){var i=new Array(e),a;if(n)for(a=0;a<e;a++)i[a]=1/(t[a+1]-t[a]);else{var o=1/r;for(a=0;a<e;a++)i[a]=o}return i}function A_e(e,t){return{start:e(t.start),end:e(t.end),size:t.size}}function S_e(e,t,r,n,i,a){var o,s=e.length-1,l=new Array(s),u=lxt(r,n,e,i,a);for(o=0;o<s;o++){var c=(t||[])[o];l[o]=c===void 0?[u(e[o]),u(e[o+1],!0)]:[c,c]}return l}});var i8=ye((kur,L_e)=>{"use strict";var Im=Dr(),k_e=fs().BADNUM,C_e=Dg();L_e.exports=function(t,r,n,i,a,o){var s=t._length,l=r.makeCalcdata(t,i),u=n.makeCalcdata(t,a);l=C_e(t,r,i,l).vals,u=C_e(t,n,a,u).vals;var c=t.text,f=c!==void 0&&Im.isArray1D(c),h=t.hovertext,d=h!==void 0&&Im.isArray1D(h),v,_,b=Im.distinctVals(l),p=b.vals,k=Im.distinctVals(u),E=k.vals,S=[],L,x,C=E.length,M=p.length;for(v=0;v<o.length;v++)S[v]=Im.init2dArray(C,M);f&&(L=Im.init2dArray(C,M)),d&&(x=Im.init2dArray(C,M));var g=Im.init2dArray(C,M);for(v=0;v<s;v++)if(l[v]!==k_e&&u[v]!==k_e){var P=Im.findBin(l[v]+b.minDiff/2,p),T=Im.findBin(u[v]+k.minDiff/2,E);for(_=0;_<o.length;_++){var z=o[_],O=t[z],V=S[_];V[T][P]=O[v],g[T][P]=v}f&&(L[T][P]=c[v]),d&&(x[T][P]=h[v])}for(t["_"+i]=p,t["_"+a]=E,_=0;_<o.length;_++)t["_"+o[_]]=S[_];f&&(t._text=L),d&&(t._hovertext=x),r&&r.type==="category"&&(t["_"+i+"CategoryMap"]=p.map(function(G){return r._categories[G]})),n&&n.type==="category"&&(t["_"+a+"CategoryMap"]=E.map(function(G){return n._categories[G]})),t._after2before=g}});var a8=ye((Cur,P_e)=>{"use strict";var uxt=Eo(),cxt=Dr(),n8=fs().BADNUM;P_e.exports=function(t,r,n,i){var a,o,s,l,u,c;function f(p){if(uxt(p))return+p}if(r&&r.transpose){for(a=0,u=0;u<t.length;u++)a=Math.max(a,t[u].length);if(a===0)return!1;s=function(p){return p.length},l=function(p,k,E){return(p[E]||[])[k]}}else a=t.length,s=function(p,k){return p[k].length},l=function(p,k,E){return(p[k]||[])[E]};var h=function(p,k,E){return k===n8||E===n8?n8:l(p,k,E)};function d(p){if(r&&r.type!=="carpet"&&r.type!=="contourcarpet"&&p&&p.type==="category"&&r["_"+p._id.charAt(0)].length){var k=p._id.charAt(0),E={},S=r["_"+k+"CategoryMap"]||r[k];for(u=0;u<S.length;u++)E[S[u]]=u;return function(L){var x=E[p._categories[L]];return x+1?x:n8}}else return cxt.identity}var v=d(n),_=d(i);i&&i.type==="category"&&(a=i._categories.length);var b=new Array(a);for(u=0;u<a;u++)for(n&&n.type==="category"?o=n._categories.length:o=s(t,u),b[u]=new Array(o),c=0;c<o;c++)b[u][c]=f(h(t,_(u),v(c)));return b}});var o8=ye((Lur,D_e)=>{"use strict";var fxt=Dr(),I_e=.01,hxt=[[-1,0],[1,0],[0,-1],[0,1]];function dxt(e){return .5-.25*Math.min(1,e*.5)}D_e.exports=function(t,r){var n=1,i;for(R_e(t,r),i=0;i<r.length&&!(r[i][2]<4);i++);for(r=r.slice(i),i=0;i<100&&n>I_e;i++)n=R_e(t,r,dxt(n));return n>I_e&&fxt.log("interp2d didn't converge quickly",n),t};function R_e(e,t,r){var n=0,i,a,o,s,l,u,c,f,h,d,v,_,b;for(s=0;s<t.length;s++){for(i=t[s],a=i[0],o=i[1],v=e[a][o],d=0,h=0,l=0;l<4;l++)u=hxt[l],c=e[a+u[0]],c&&(f=c[o+u[1]],f!==void 0&&(d===0?_=b=f:(_=Math.min(_,f),b=Math.max(b,f)),h++,d+=f));if(h===0)throw"iterateInterp2d order is wrong: no defined neighbors";e[a][o]=d/h,v===void 0?h<4&&(n=1):(e[a][o]=(1+r)*e[a][o]-r*v,b>_&&(n=Math.max(n,Math.abs(e[a][o]-v)/(b-_))))}return n}});var s8=ye((Pur,F_e)=>{"use strict";var vxt=Dr().maxRowLength;F_e.exports=function(t){var r=[],n={},i=[],a=t[0],o=[],s=[0,0,0],l=vxt(t),u,c,f,h,d,v,_,b;for(c=0;c<t.length;c++)for(u=o,o=a,a=t[c+1]||[],f=0;f<l;f++)o[f]===void 0&&(v=(o[f-1]!==void 0?1:0)+(o[f+1]!==void 0?1:0)+(u[f]!==void 0?1:0)+(a[f]!==void 0?1:0),v?(c===0&&v++,f===0&&v++,c===t.length-1&&v++,f===o.length-1&&v++,v<4&&(n[[c,f]]=[c,f,v]),r.push([c,f,v])):i.push([c,f]));for(;i.length;){for(_={},b=!1,d=i.length-1;d>=0;d--)h=i[d],c=h[0],f=h[1],v=((n[[c-1,f]]||s)[2]+(n[[c+1,f]]||s)[2]+(n[[c,f-1]]||s)[2]+(n[[c,f+1]]||s)[2])/20,v&&(_[h]=[c,f,v],i.splice(d,1),b=!0);if(!b)throw"findEmpties iterated with no new neighbors";for(h in _)n[h]=_[h],r.push(_[h])}return r.sort(function(p,k){return k[2]-p[2]})}});var YV=ye((Iur,q_e)=>{"use strict";var z_e=qa(),O_e=Dr().isArrayOrTypedArray;q_e.exports=function(t,r,n,i,a,o){var s=[],l=z_e.traceIs(t,"contour"),u=z_e.traceIs(t,"histogram"),c,f,h,d=O_e(r)&&r.length>1;if(d&&!u&&o.type!=="category"){var v=r.length;if(v<=a){if(l)s=Array.from(r).slice(0,a);else if(a===1)o.type==="log"?s=[.5*r[0],2*r[0]]:s=[r[0]-.5,r[0]+.5];else if(o.type==="log"){for(s=[Math.pow(r[0],1.5)/Math.pow(r[1],.5)],h=1;h<v;h++)s.push(Math.sqrt(r[h-1]*r[h]));s.push(Math.pow(r[v-1],1.5)/Math.pow(r[v-2],.5))}else{for(s=[1.5*r[0]-.5*r[1]],h=1;h<v;h++)s.push((r[h-1]+r[h])*.5);s.push(1.5*r[v-1]-.5*r[v-2])}if(v<a){var _=s[s.length-1],b;if(o.type==="log")for(b=_/s[s.length-2],h=v;h<a;h++)_*=b,s.push(_);else for(b=_-s[s.length-2],h=v;h<a;h++)_+=b,s.push(_)}}else return l?r.slice(0,a):r.slice(0,a+1)}else{var p=t[o._id.charAt(0)+"calendar"];if(u)c=o.r2c(n,0,p);else if(O_e(r)&&r.length===1)c=r[0];else if(n===void 0)c=0;else{var k=o.type==="log"?o.d2c:o.r2c;c=k(n,0,p)}for(f=i||1,h=l?0:-.5;h<a;h++)s.push(c+f*h)}return s}});var c8=ye((Rur,V_e)=>{"use strict";var B_e=qa(),KV=Dr(),l8=ho(),N_e=Dg(),pxt=E_e(),gxt=gv(),mxt=i8(),yxt=a8(),_xt=o8(),xxt=s8(),u8=YV(),JV=fs().BADNUM;V_e.exports=function(t,r){var n=l8.getFromId(t,r.xaxis||"x"),i=l8.getFromId(t,r.yaxis||"y"),a=B_e.traceIs(r,"contour"),o=B_e.traceIs(r,"histogram"),s=a?"best":r.zsmooth,l,u,c,f,h,d,v,_,b,p,k;if(n._minDtick=0,i._minDtick=0,o)k=pxt(t,r),f=k.orig_x,l=k.x,u=k.x0,c=k.dx,_=k.orig_y,h=k.y,d=k.y0,v=k.dy,b=k.z;else{var E=r.z;KV.isArray1D(E)?(mxt(r,n,i,"x","y",["z"]),l=r._x,h=r._y,E=r._z):(f=r.x?n.makeCalcdata(r,"x"):[],_=r.y?i.makeCalcdata(r,"y"):[],l=N_e(r,n,"x",f).vals,h=N_e(r,i,"y",_).vals,r._x=l,r._y=h),u=r.x0,c=r.dx,d=r.y0,v=r.dy,b=yxt(E,r,n,i)}(n.rangebreaks||i.rangebreaks)&&(b=bxt(l,h,b),o||(l=U_e(l),h=U_e(h),r._x=l,r._y=h)),!o&&(a||r.connectgaps)&&(r._emptypoints=xxt(b),_xt(b,r._emptypoints));function S(O){s=r._input.zsmooth=r.zsmooth=!1,KV.warn('cannot use zsmooth: "fast": '+O)}function L(O){if(O.length>1){var V=(O[O.length-1]-O[0])/(O.length-1),G=Math.abs(V/100);for(p=0;p<O.length-1;p++)if(Math.abs(O[p+1]-O[p]-V)>G)return!1}return!0}r._islinear=!1,n.type==="log"||i.type==="log"?s==="fast"&&S("log axis found"):L(l)?L(h)?r._islinear=!0:s==="fast"&&S("y scale is not linear"):s==="fast"&&S("x scale is not linear");var x=KV.maxRowLength(b),C=r.xtype==="scaled"?"":l,M=u8(r,C,u,c,x,n),g=r.ytype==="scaled"?"":h,P=u8(r,g,d,v,b.length,i);r._extremes[n._id]=l8.findExtremes(n,M),r._extremes[i._id]=l8.findExtremes(i,P);var T={x:M,y:P,z:b,text:r._text||r.text,hovertext:r._hovertext||r.hovertext};if(r.xperiodalignment&&f&&(T.orig_x=f),r.yperiodalignment&&_&&(T.orig_y=_),C&&C.length===M.length-1&&(T.xCenter=C),g&&g.length===P.length-1&&(T.yCenter=g),o&&(T.xRanges=k.xRanges,T.yRanges=k.yRanges,T.pts=k.pts),a||gxt(t,r,{vals:b,cLetter:"z"}),a&&r.contours&&r.contours.coloring==="heatmap"){var z={type:r.type==="contour"?"heatmap":"histogram2d",xcalendar:r.xcalendar,ycalendar:r.ycalendar};T.xfill=u8(z,C,u,c,x,n),T.yfill=u8(z,g,d,v,b.length,i)}return[T]};function U_e(e){for(var t=[],r=e.length,n=0;n<r;n++){var i=e[n];i!==JV&&t.push(i)}return t}function bxt(e,t,r){for(var n=[],i=-1,a=0;a<r.length;a++)if(t[a]!==JV){i++,n[i]=[];for(var o=0;o<r[a].length;o++)e[o]!==JV&&n[i].push(r[a][o])}return n}});var h8=ye(f8=>{"use strict";f8.CSS_DECLARATIONS=[["image-rendering","optimizeSpeed"],["image-rendering","-moz-crisp-edges"],["image-rendering","-o-crisp-edges"],["image-rendering","-webkit-optimize-contrast"],["image-rendering","optimize-contrast"],["image-rendering","crisp-edges"],["image-rendering","pixelated"]];f8.STYLE=f8.CSS_DECLARATIONS.map(function(e){return e.join(": ")+"; "}).join("")});var QV=ye((Fur,H_e)=>{"use strict";var G_e=h8(),wxt=So(),$V=Dr(),FT=null;function Txt(){if(FT!==null)return FT;FT=!1;var e=$V.isSafari()||$V.isMacWKWebView()||$V.isIOS();if(window.navigator.userAgent&&!e){var t=Array.from(G_e.CSS_DECLARATIONS).reverse(),r=window.CSS&&window.CSS.supports||window.supportsCSS;if(typeof r=="function")FT=t.some(function(o){return r.apply(null,o)});else{var n=wxt.tester.append("image").attr("style",G_e.STYLE),i=window.getComputedStyle(n.node()),a=i.imageRendering;FT=t.some(function(o){var s=o[1];return a===s||a===s.toLowerCase()}),n.remove()}}return FT}H_e.exports=Txt});var d8=ye((zur,Q_e)=>{"use strict";var j_e=Oa(),Axt=cd(),Sxt=qa(),Mxt=So(),Ext=ho(),H0=Dr(),W_e=ru(),kxt=nI(),Cxt=ka(),Lxt=tc().extractOpts,Pxt=tc().makeColorScaleFuncFromTrace,Ixt=Wp(),Rxt=$h(),eG=Rxt.LINE_SPACING,Dxt=QV(),Fxt=h8().STYLE,J_e="heatmap-label";function $_e(e){return e.selectAll("g."+J_e)}function X_e(e){$_e(e).remove()}Q_e.exports=function(e,t,r,n){var i=t.xaxis,a=t.yaxis;H0.makeTraceGroups(n,r,"hm").each(function(o){var s=j_e.select(this),l=o[0],u=l.trace,c=u.xgap||0,f=u.ygap||0,h=l.z,d=l.x,v=l.y,_=l.xCenter,b=l.yCenter,p=Sxt.traceIs(u,"contour"),k=p?"best":u.zsmooth,E=h.length,S=H0.maxRowLength(h),L=!1,x=!1,C,M,g,P,T,z,O,V;for(z=0;C===void 0&&z<d.length-1;)C=i.c2p(d[z]),z++;for(z=d.length-1;M===void 0&&z>0;)M=i.c2p(d[z]),z--;for(M<C&&(g=M,M=C,C=g,L=!0),z=0;P===void 0&&z<v.length-1;)P=a.c2p(v[z]),z++;for(z=v.length-1;T===void 0&&z>0;)T=a.c2p(v[z]),z--;T<P&&(g=P,P=T,T=g,x=!0),p&&(_=d,b=v,d=l.xfill,v=l.yfill);var G="default";if(k?G=k==="best"?"smooth":"fast":u._islinear&&c===0&&f===0&&Dxt()&&(G="fast"),G!=="fast"){var Z=k==="best"?0:.5;C=Math.max(-Z*i._length,C),M=Math.min((1+Z)*i._length,M),P=Math.max(-Z*a._length,P),T=Math.min((1+Z)*a._length,T)}var j=Math.round(M-C),N=Math.round(T-P),H=C>=i._length||M<=0||P>=a._length||T<=0;if(H){var te=s.selectAll("image").data([]);te.exit().remove(),X_e(s);return}var oe,_e;G==="fast"?(oe=S,_e=E):(oe=j,_e=N);var Ee=document.createElement("canvas");Ee.width=oe,Ee.height=_e;var Ce=Ee.getContext("2d",{willReadFrequently:!0}),me=Pxt(u,{noNumericCheck:!0,returnArray:!0}),ie,Se;G==="fast"?(ie=L?function(fn){return S-1-fn}:H0.identity,Se=x?function(fn){return E-1-fn}:H0.identity):(ie=function(fn){return H0.constrain(Math.round(i.c2p(d[fn])-C),0,j)},Se=function(fn){return H0.constrain(Math.round(a.c2p(v[fn])-P),0,N)});var Le=Se(0),Ae=[Le,Le],Fe=L?0:1,Pe=x?0:1,ge=0,Re=0,ce=0,Ze=0,ut,pt,Zt,st,lt;function Gt(fn,yn){if(fn!==void 0){var Sn=me(fn);return Sn[0]=Math.round(Sn[0]),Sn[1]=Math.round(Sn[1]),Sn[2]=Math.round(Sn[2]),ge+=yn,Re+=Sn[0]*yn,ce+=Sn[1]*yn,Ze+=Sn[2]*yn,Sn}return[0,0,0,0]}function Nt(fn,yn,Sn,Ba){var ua=fn[Sn.bin0];if(ua===void 0)return Gt(void 0,1);var ma=fn[Sn.bin1],Wa=yn[Sn.bin0],Fa=yn[Sn.bin1],Wo=ma-ua||0,da=Wa-ua||0,Wn;return ma===void 0?Fa===void 0?Wn=0:Wa===void 0?Wn=2*(Fa-ua):Wn=(2*Fa-Wa-ua)*2/3:Fa===void 0?Wa===void 0?Wn=0:Wn=(2*ua-ma-Wa)*2/3:Wa===void 0?Wn=(2*Fa-ma-ua)*2/3:Wn=Fa+ua-ma-Wa,Gt(ua+Sn.frac*Wo+Ba.frac*(da+Sn.frac*Wn))}if(G!=="default"){var Jt=0,sr;try{sr=new Uint8Array(oe*_e*4)}catch(fn){sr=new Array(oe*_e*4)}if(G==="smooth"){var wr=_||d,cr=b||v,$e=new Array(wr.length),St=new Array(cr.length),Qt=new Array(j),Vt=_?Y_e:Z_e,_t=b?Y_e:Z_e,It,mt,er;for(z=0;z<wr.length;z++)$e[z]=Math.round(i.c2p(wr[z])-C);for(z=0;z<cr.length;z++)St[z]=Math.round(a.c2p(cr[z])-P);for(z=0;z<j;z++)Qt[z]=Vt(z,$e);for(O=0;O<N;O++)for(It=_t(O,St),mt=h[It.bin0],er=h[It.bin1],z=0;z<j;z++,Jt+=4)lt=Nt(mt,er,Qt[z],It),K_e(sr,Jt,lt)}else for(O=0;O<E;O++)for(st=h[O],Ae=Se(O),z=0;z<S;z++)lt=Gt(st[z],1),Jt=(Ae*S+ie(z))*4,K_e(sr,Jt,lt);var lr=Ce.createImageData(oe,_e);try{lr.data.set(sr)}catch(fn){var Tr=lr.data,Lr=Tr.length;for(O=0;O<Lr;O++)Tr[O]=sr[O]}Ce.putImageData(lr,0,0)}else{var ti=Math.floor(c/2),Br=Math.floor(f/2);for(O=0;O<E;O++)if(st=h[O],Ae.reverse(),Ae[Pe]=Se(O+1),!(Ae[0]===Ae[1]||Ae[0]===void 0||Ae[1]===void 0))for(pt=ie(0),ut=[pt,pt],z=0;z<S;z++)ut.reverse(),ut[Fe]=ie(z+1),!(ut[0]===ut[1]||ut[0]===void 0||ut[1]===void 0)&&(Zt=st[z],lt=Gt(Zt,(ut[1]-ut[0])*(Ae[1]-Ae[0])),Ce.fillStyle="rgba("+lt.join(",")+")",Ce.fillRect(ut[0]+ti,Ae[0]+Br,ut[1]-ut[0]-c,Ae[1]-Ae[0]-f))}Re=Math.round(Re/ge),ce=Math.round(ce/ge),Ze=Math.round(Ze/ge);var Vr=Axt("rgb("+Re+","+ce+","+Ze+")");e._hmpixcount=(e._hmpixcount||0)+ge,e._hmlumcount=(e._hmlumcount||0)+ge*Vr.getLuminance();var dt=s.selectAll("image").data(o);dt.enter().append("svg:image").attr({xmlns:Ixt.svg,preserveAspectRatio:"none"}),dt.attr({height:N,width:j,x:C,y:P,"xlink:href":Ee.toDataURL("image/png")}),G==="fast"&&!k&&dt.attr("style",Fxt),X_e(s);var Ge=u.texttemplate;if(Ge){var Je=Lxt(u),je={type:"linear",range:[Je.min,Je.max],_separators:i._separators,_numFormat:i._numFormat},tt=u.type==="histogram2dcontour",xt=u.type==="contour",Ie=xt?1:0,xe=xt?E-1:E,ke=xt?1:0,vt=xt?S-1:S,ir=[];for(z=Ie;z<xe;z++){var ar;if(xt)ar=l.y[z];else if(tt){if(z===0||z===E-1)continue;ar=l.y[z]}else if(l.yCenter)ar=l.yCenter[z];else{if(z+1===E&&l.y[z+1]===void 0)continue;ar=(l.y[z]+l.y[z+1])/2}var vr=Math.round(a.c2p(ar));if(!(0>vr||vr>a._length))for(O=ke;O<vt;O++){var ii;if(xt)ii=l.x[O];else if(tt){if(O===0||O===S-1)continue;ii=l.x[O]}else if(l.xCenter)ii=l.xCenter[O];else{if(O+1===S&&l.x[O+1]===void 0)continue;ii=(l.x[O]+l.x[O+1])/2}var pi=Math.round(i.c2p(ii));if(!(0>pi||pi>i._length)){var $r=kxt({x:ii,y:ar},u,e._fullLayout);$r.x=ii,$r.y=ar;var di=l.z[z][O];di===void 0?($r.z="",$r.zLabel=""):($r.z=di,$r.zLabel=Ext.tickText(je,di,"hover").text);var ji=l.text&&l.text[z]&&l.text[z][O];(ji===void 0||ji===!1)&&(ji=""),$r.text=ji;var In=H0.texttemplateString({data:[$r,u._meta],fallback:u.texttemplatefallback,labels:$r,locale:e._fullLayout._d3locale,template:Ge});if(In){var wi=In.split("<br>"),On=wi.length,qn=0;for(V=0;V<On;V++)qn=Math.max(qn,wi[V].length);ir.push({l:On,c:qn,t:In,x:pi,y:vr,z:di})}}}}var Fn=u.textfont,ra=Fn.size,la=e._fullLayout.font.size;if(!ra||ra==="auto"){var Ut=1/0,wt=1/0,rr=0,nr=0;for(V=0;V<ir.length;V++){var Er=ir[V];if(rr=Math.max(rr,Er.l),nr=Math.max(nr,Er.c),V<ir.length-1){var Xr=ir[V+1],ri=Math.abs(Xr.x-Er.x),Qr=Math.abs(Xr.y-Er.y);ri&&(Ut=Math.min(Ut,ri)),Qr&&(wt=Math.min(wt,Qr))}}!isFinite(Ut)||!isFinite(wt)?ra=la:(Ut-=c,wt-=f,Ut/=nr,wt/=rr,Ut/=eG/2,wt/=eG,ra=Math.min(Math.floor(Ut),Math.floor(wt),la))}if(ra<=0||!isFinite(ra))return;var Oi=function(fn){return fn.x},$i=function(fn){return fn.y-ra*(fn.l*eG/2-1)},tn=$_e(s).data(ir);tn.enter().append("g").classed(J_e,1).append("text").attr("text-anchor","middle").each(function(fn){var yn=j_e.select(this),Sn=Fn.color;(!Sn||Sn==="auto")&&(Sn=Cxt.contrast(fn.z===void 0?e._fullLayout.plot_bgcolor:"rgba("+me(fn.z).join()+")")),yn.attr("data-notex",1).call(W_e.positionText,Oi(fn),$i(fn)).call(Mxt.font,{family:Fn.family,size:ra,color:Sn,weight:Fn.weight,style:Fn.style,variant:Fn.variant,textcase:Fn.textcase,lineposition:Fn.lineposition,shadow:Fn.shadow}).text(fn.t).call(W_e.convertToTspans,e)})}})};function Z_e(e,t){var r=t.length-2,n=H0.constrain(H0.findBin(e,t),0,r),i=t[n],a=t[n+1],o=H0.constrain(n+(e-i)/(a-i)-.5,0,r),s=Math.round(o),l=Math.abs(o-s);return!o||o===r||!l?{bin0:s,bin1:s,frac:0}:{bin0:s,frac:l,bin1:Math.round(s+l/(o-s))}}function Y_e(e,t){var r=t.length-1,n=H0.constrain(H0.findBin(e,t),0,r),i=t[n],a=t[n+1],o=(e-i)/(a-i)||0;return o<=0?{bin0:n,bin1:n,frac:0}:o<.5?{bin0:n,bin1:n+1,frac:o}:{bin0:n+1,bin1:n,frac:1-o}}function K_e(e,t,r){e[t]=r[0],e[t+1]=r[1],e[t+2]=r[2],e[t+3]=Math.round(r[3]*255)}});var S_=ye((Our,exe)=>{"use strict";exe.exports={min:"zmin",max:"zmax"}});var v8=ye((qur,txe)=>{"use strict";var zxt=Oa();txe.exports=function(t){zxt.select(t).selectAll(".hm image").style("opacity",function(r){return r.trace.opacity})}});var g8=ye((Bur,ixe)=>{"use strict";var rxe=vf(),S4=Dr(),p8=S4.isArrayOrTypedArray,Oxt=ho(),qxt=tc().extractOpts;ixe.exports=function(t,r,n,i,a){a||(a={});var o=a.isContour,s=t.cd[0],l=s.trace,u=t.xa,c=t.ya,f=s.x,h=s.y,d=s.z,v=s.xCenter,_=s.yCenter,b=s.zmask,p=l.zhoverformat,k=f,E=h,S,L,x,C;if(t.index!==!1){try{x=Math.round(t.index[1]),C=Math.round(t.index[0])}catch(te){S4.error("Error hovering on heatmap, pointNumber must be [row,col], found:",t.index);return}if(x<0||x>=d[0].length||C<0||C>d.length)return}else{if(rxe.inbox(r-f[0],r-f[f.length-1],0)>0||rxe.inbox(n-h[0],n-h[h.length-1],0)>0)return;if(o){var M;for(k=[2*f[0]-f[1]],M=1;M<f.length;M++)k.push((f[M]+f[M-1])/2);for(k.push([2*f[f.length-1]-f[f.length-2]]),E=[2*h[0]-h[1]],M=1;M<h.length;M++)E.push((h[M]+h[M-1])/2);E.push([2*h[h.length-1]-h[h.length-2]])}x=Math.max(0,Math.min(k.length-2,S4.findBin(r,k))),C=Math.max(0,Math.min(E.length-2,S4.findBin(n,E)))}var g=u.c2p(f[x]),P=u.c2p(f[x+1]),T=c.c2p(h[C]),z=c.c2p(h[C+1]),O,V;o?(O=s.orig_x||f,V=s.orig_y||h,P=g,S=O[x],z=T,L=V[C]):(O=s.orig_x||v||f,V=s.orig_y||_||h,S=v?O[x]:(O[x]+O[x+1])/2,L=_?V[C]:(V[C]+V[C+1])/2,u&&u.type==="category"&&(S=f[x]),c&&c.type==="category"&&(L=h[C]),l.zsmooth&&(g=P=u.c2p(S),T=z=c.c2p(L)));var G=d[C][x];if(b&&!b[C][x]&&(G=void 0),!(G===void 0&&!l.hoverongaps)){var Z;p8(s.hovertext)&&p8(s.hovertext[C])?Z=s.hovertext[C][x]:p8(s.text)&&p8(s.text[C])&&(Z=s.text[C][x]);var j=qxt(l),N={type:"linear",range:[j.min,j.max],hoverformat:p,_separators:u._separators,_numFormat:u._numFormat},H=Oxt.tickText(N,G,"hover").text;return[S4.extendFlat(t,{index:l._after2before?l._after2before[C][x]:[C,x],distance:t.maxHoverDistance,spikeDistance:t.maxSpikeDistance,x0:g,x1:P,y0:T,y1:z,xLabelVal:S,yLabelVal:L,zLabelVal:G,zLabel:H,text:Z})]}}});var axe=ye((Nur,nxe)=>{"use strict";nxe.exports={attributes:PT(),supplyDefaults:i_e(),calc:c8(),plot:d8(),colorbar:S_(),style:v8(),hoverPoints:g8(),moduleType:"trace",name:"heatmap",basePlotModule:ph(),categories:["cartesian","svg","2dMap","showLegend"],meta:{}}});var sxe=ye((Uur,oxe)=>{"use strict";oxe.exports=axe()});var tG=ye((Vur,lxe)=>{"use strict";lxe.exports=function(t,r){return{start:{valType:"any",editType:"calc"},end:{valType:"any",editType:"calc"},size:{valType:"any",editType:"calc"},editType:"calc"}}});var cxe=ye((Gur,uxe)=>{"use strict";uxe.exports={eventDataKeys:["binNumber"]}});var m8=ye((Hur,vxe)=>{"use strict";var Ip=Lm(),fxe=df().axisHoverFormat,{hovertemplateAttrs:Bxt,texttemplateAttrs:Nxt,templatefallbackAttrs:hxe}=Ll(),rG=ec(),dxe=tG(),Uxt=cxe(),iG=Ao().extendFlat;vxe.exports={x:{valType:"data_array",editType:"calc+clearAxisTypes"},y:{valType:"data_array",editType:"calc+clearAxisTypes"},xhoverformat:fxe("x"),yhoverformat:fxe("y"),text:iG({},Ip.text,{}),hovertext:iG({},Ip.hovertext,{}),orientation:Ip.orientation,histfunc:{valType:"enumerated",values:["count","sum","avg","min","max"],dflt:"count",editType:"calc"},histnorm:{valType:"enumerated",values:["","percent","probability","density","probability density"],dflt:"",editType:"calc"},cumulative:{enabled:{valType:"boolean",dflt:!1,editType:"calc"},direction:{valType:"enumerated",values:["increasing","decreasing"],dflt:"increasing",editType:"calc"},currentbin:{valType:"enumerated",values:["include","exclude","half"],dflt:"include",editType:"calc"},editType:"calc"},nbinsx:{valType:"integer",min:0,dflt:0,editType:"calc"},xbins:dxe("x",!0),nbinsy:{valType:"integer",min:0,dflt:0,editType:"calc"},ybins:dxe("y",!0),autobinx:{valType:"boolean",dflt:null,editType:"calc"},autobiny:{valType:"boolean",dflt:null,editType:"calc"},bingroup:{valType:"string",dflt:"",editType:"calc"},hovertemplate:Bxt({},{keys:Uxt.eventDataKeys}),hovertemplatefallback:hxe(),texttemplate:Nxt({arrayOk:!1,editType:"plot"},{keys:["label","value"]}),texttemplatefallback:hxe({editType:"plot"}),textposition:iG({},Ip.textposition,{arrayOk:!1}),textfont:rG({arrayOk:!1,editType:"plot",colorEditType:"style"}),outsidetextfont:rG({arrayOk:!1,editType:"plot",colorEditType:"style"}),insidetextfont:rG({arrayOk:!1,editType:"plot",colorEditType:"style"}),insidetextanchor:Ip.insidetextanchor,textangle:Ip.textangle,cliponaxis:Ip.cliponaxis,constraintext:Ip.constraintext,marker:Ip.marker,offsetgroup:Ip.offsetgroup,alignmentgroup:Ip.alignmentgroup,selected:Ip.selected,unselected:Ip.unselected,zorder:Ip.zorder}});var yxe=ye((jur,mxe)=>{"use strict";var pxe=qa(),M4=Dr(),gxe=ka(),Vxt=r0().handleText,Gxt=VI(),Hxt=m8();mxe.exports=function(t,r,n,i){function a(k,E){return M4.coerce(t,r,Hxt,k,E)}var o=a("x"),s=a("y"),l=a("cumulative.enabled");l&&(a("cumulative.direction"),a("cumulative.currentbin")),a("text");var u=a("textposition");Vxt(t,r,i,a,u,{moduleHasSelected:!0,moduleHasUnselected:!0,moduleHasConstrain:!0,moduleHasCliponaxis:!0,moduleHasTextangle:!0,moduleHasInsideanchor:!0}),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("xhoverformat"),a("yhoverformat");var c=a("orientation",s&&!o?"h":"v"),f=c==="v"?"x":"y",h=c==="v"?"y":"x",d=o&&s?Math.min(M4.minRowLength(o)&&M4.minRowLength(s)):M4.minRowLength(r[f]||[]);if(!d){r.visible=!1;return}r._length=d;var v=pxe.getComponentMethod("calendars","handleTraceDefaults");v(t,r,["x","y"],i);var _=r[h];_&&a("histfunc"),a("histnorm"),a("autobin"+f),Gxt(t,r,a,n,i),M4.coerceSelectionMarkerOpacity(r,a);var b=(r.marker.line||{}).color,p=pxe.getComponentMethod("errorbars","supplyDefaults");p(t,r,b||gxe.defaultLine,{axis:"y"}),p(t,r,b||gxe.defaultLine,{axis:"x",inherit:"y"}),a("zorder")}});var _8=ye((Wur,bxe)=>{"use strict";var E4=Dr(),jxt=hf(),y8=qa().traceIs,Wxt=Hb(),Xxt=r0().validateCornerradius,_xe=E4.nestedProperty,nG=Nb().getAxisGroup,xxe=[{aStr:{x:"xbins.start",y:"ybins.start"},name:"start"},{aStr:{x:"xbins.end",y:"ybins.end"},name:"end"},{aStr:{x:"xbins.size",y:"ybins.size"},name:"size"},{aStr:{x:"nbinsx",y:"nbinsy"},name:"nbins"}],Zxt=["x","y"];bxe.exports=function(t,r){var n=r._histogramBinOpts={},i=[],a={},o=[],s,l,u,c,f,h,d;function v(G,Z){return E4.coerce(s._input,s,s._module.attributes,G,Z)}function _(G){return G.orientation==="v"?"x":"y"}function b(G,Z){var j=jxt.getFromTrace({_fullLayout:r},G,Z);return j.type}function p(G,Z,j){var N=G.uid+"__"+j;Z||(Z=N);var H=b(G,j),te=G[j+"calendar"]||"",oe=n[Z],_e=!0;oe&&(H===oe.axType&&te===oe.calendar?(_e=!1,oe.traces.push(G),oe.dirs.push(j)):(Z=N,H!==oe.axType&&E4.warn(["Attempted to group the bins of trace",G.index,"set on a","type:"+H,"axis","with bins on","type:"+oe.axType,"axis."].join(" ")),te!==oe.calendar&&E4.warn(["Attempted to group the bins of trace",G.index,"set with a",te,"calendar","with bins",oe.calendar?"on a "+oe.calendar+" calendar":"w/o a set calendar"].join(" ")))),_e&&(n[Z]={traces:[G],dirs:[j],axType:H,calendar:G[j+"calendar"]||""}),G["_"+j+"bingroup"]=Z}for(f=0;f<t.length;f++)if(s=t[f],y8(s,"histogram")){if(i.push(s),delete s._xautoBinFinished,delete s._yautoBinFinished,s.type==="histogram"){var k=v("marker.cornerradius",r.barcornerradius);s.marker&&(s.marker.cornerradius=Xxt(k))}y8(s,"2dMap")||Wxt(s._input,s,r,v,r.barmode)}var E=r._alignmentOpts||{};for(f=0;f<i.length;f++){if(s=i[f],u="",!y8(s,"2dMap")){if(c=_(s),r.barmode==="group"&&s.alignmentgroup){var S=s[c+"axis"],L=nG(r,S)+s.orientation;(E[L]||{})[s.alignmentgroup]&&(u=L)}!u&&r.barmode!=="overlay"&&(u=nG(r,s.xaxis)+nG(r,s.yaxis)+_(s))}u?(a[u]||(a[u]=[]),a[u].push(s)):o.push(s)}for(u in a){if(l=a[u],l.length===1){o.push(l[0]);continue}var x=!1;for(l.length&&(s=l[0],x=v("bingroup")),u=x||u,f=0;f<l.length;f++){s=l[f];var C=s._input.bingroup;C&&C!==u&&E4.warn(["Trace",s.index,"must match","within bingroup",u+".","Ignoring its bingroup:",C,"setting."].join(" ")),s.bingroup=u,p(s,u,_(s))}}for(f=0;f<o.length;f++){s=o[f];var M=v("bingroup");if(y8(s,"2dMap"))for(d=0;d<2;d++){c=Zxt[d];var g=v(c+"bingroup",M?M+"__"+c:null);p(s,g,c)}else p(s,M,_(s))}for(u in n){var P=n[u];for(l=P.traces,h=0;h<xxe.length;h++){var T=xxe[h],z=T.name,O,V;if(!(z==="nbins"&&P.sizeFound)){for(f=0;f<l.length;f++){if(s=l[f],c=P.dirs[f],O=T.aStr[c],_xe(s._input,O).get()!==void 0){P[z]=v(O),P[z+"Found"]=!0;break}V=(s._autoBin||{})[c]||{},V[z]&&_xe(s,O).set(V[z])}if(z==="start"||z==="end")for(;f<l.length;f++)s=l[f],s["_"+c+"bingroup"]&&(V=(s._autoBin||{})[c]||{},v(O,V[z]));z==="nbins"&&!P.sizeFound&&!P.nbinsFound&&(s=l[0],P[z]=v(O))}}}}});var Txe=ye((Xur,wxe)=>{"use strict";var Yxt=ET().hoverPoints,Kxt=ho().hoverLabelText;wxe.exports=function(t,r,n,i,a){var o=Yxt(t,r,n,i,a);if(o){t=o[0];var s=t.cd[t.index],l=t.cd[0].trace;if(!l.cumulative.enabled){var u=l.orientation==="h"?"y":"x";t[u+"Label"]=Kxt(t[u+"a"],[s.ph0,s.ph1],l[u+"hoverformat"])}return o}}});var aG=ye((Zur,Axe)=>{"use strict";Axe.exports=function(t,r,n,i,a){if(t.x="xVal"in r?r.xVal:r.x,t.y="yVal"in r?r.yVal:r.y,"zLabelVal"in r&&(t.z=r.zLabelVal),r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),!(n.cumulative||{}).enabled){var o=Array.isArray(a)?i[0].pts[a[0]][a[1]]:i[a].pts;t.pointNumbers=o,t.binNumber=t.pointNumber,delete t.pointNumber,delete t.pointIndex;var s;if(n._indexToPoints){s=[];for(var l=0;l<o.length;l++)s=s.concat(n._indexToPoints[o[l]])}else s=o;t.pointIndices=s}return t}});var Mxe=ye((Yur,Sxe)=>{"use strict";Sxe.exports={attributes:m8(),layoutAttributes:UI(),supplyDefaults:yxe(),crossTraceDefaults:_8(),supplyLayoutDefaults:SV(),calc:ZV().calc,crossTraceCalc:jb().crossTraceCalc,plot:n2().plot,layerName:"barlayer",style:N0().style,styleOnSelect:N0().styleOnSelect,colorbar:$d(),hoverPoints:Txe(),selectPoints:kT(),eventData:aG(),moduleType:"trace",name:"histogram",basePlotModule:ph(),categories:["bar-like","cartesian","svg","bar","histogram","oriented","errorBarsOK","showLegend"],meta:{}}});var kxe=ye((Kur,Exe)=>{"use strict";Exe.exports=Mxe()});var b8=ye((Jur,Pxe)=>{"use strict";var Gg=m8(),Cxe=tG(),x8=PT(),Jxt=Gl(),oG=df().axisHoverFormat,{hovertemplateAttrs:$xt,texttemplateAttrs:Qxt,templatefallbackAttrs:Lxe}=Ll(),ebt=Tu(),k4=Ao().extendFlat;Pxe.exports=k4({x:Gg.x,y:Gg.y,z:{valType:"data_array",editType:"calc"},marker:{color:{valType:"data_array",editType:"calc"},editType:"calc"},histnorm:Gg.histnorm,histfunc:Gg.histfunc,nbinsx:Gg.nbinsx,xbins:Cxe("x"),nbinsy:Gg.nbinsy,ybins:Cxe("y"),autobinx:Gg.autobinx,autobiny:Gg.autobiny,bingroup:k4({},Gg.bingroup,{}),xbingroup:k4({},Gg.bingroup,{}),ybingroup:k4({},Gg.bingroup,{}),xgap:x8.xgap,ygap:x8.ygap,zsmooth:x8.zsmooth,xhoverformat:oG("x"),yhoverformat:oG("y"),zhoverformat:oG("z",1),hovertemplate:$xt({},{keys:["z"]}),hovertemplatefallback:Lxe(),texttemplate:Qxt({arrayOk:!1,editType:"plot"},{keys:["z"]}),texttemplatefallback:Lxe({editType:"plot"}),textfont:x8.textfont,showlegend:k4({},Jxt.showlegend,{dflt:!1})},ebt("",{cLetter:"z",autoColorDflt:!1}))});var sG=ye(($ur,Rxe)=>{"use strict";var tbt=qa(),Ixe=Dr();Rxe.exports=function(t,r,n,i){var a=n("x"),o=n("y"),s=Ixe.minRowLength(a),l=Ixe.minRowLength(o);if(!s||!l){r.visible=!1;return}r._length=Math.min(s,l);var u=tbt.getComponentMethod("calendars","handleTraceDefaults");u(t,r,["x","y"],i);var c=n("z")||n("marker.color");c&&n("histfunc"),n("histnorm"),n("autobinx"),n("autobiny")}});var Fxe=ye((Qur,Dxe)=>{"use strict";var rbt=Dr(),ibt=sG(),nbt=UV(),abt=Qh(),obt=A4(),sbt=b8();Dxe.exports=function(t,r,n,i){function a(o,s){return rbt.coerce(t,r,sbt,o,s)}ibt(t,r,a,i),r.visible!==!1&&(nbt(t,r,a,i),abt(t,r,i,a,{prefix:"",cLetter:"z"}),a("hovertemplate"),a("hovertemplatefallback"),obt(a,i),a("xhoverformat"),a("yhoverformat"))}});var qxe=ye((ecr,Oxe)=>{"use strict";var lbt=g8(),zxe=ho().hoverLabelText;Oxe.exports=function(t,r,n,i,a){var o=lbt(t,r,n,i,a);if(o){t=o[0];var s=t.index,l=s[0],u=s[1],c=t.cd[0],f=c.trace,h=c.xRanges[u],d=c.yRanges[l];return t.xLabel=zxe(t.xa,[h[0],h[1]],f.xhoverformat),t.yLabel=zxe(t.ya,[d[0],d[1]],f.yhoverformat),o}}});var Nxe=ye((tcr,Bxe)=>{"use strict";Bxe.exports={attributes:b8(),supplyDefaults:Fxe(),crossTraceDefaults:_8(),calc:c8(),plot:d8(),layerName:"heatmaplayer",colorbar:S_(),style:v8(),hoverPoints:qxe(),eventData:aG(),moduleType:"trace",name:"histogram2d",basePlotModule:ph(),categories:["cartesian","svg","2dMap","histogram","showLegend"],meta:{}}});var Vxe=ye((rcr,Uxe)=>{"use strict";Uxe.exports=Nxe()});var w8=ye((icr,Gxe)=>{"use strict";Gxe.exports={COMPARISON_OPS:["=","!=","<",">=",">","<="],COMPARISON_OPS2:["=","<",">=",">","<="],INTERVAL_OPS:["[]","()","[)","(]","][",")(","](",")["],SET_OPS:["{}","}{"],CONSTRAINT_REDUCTION:{"=":"=","<":"<","<=":"<",">":">",">=":">","[]":"[]","()":"[]","[)":"[]","(]":"[]","][":"][",")(":"][","](":"][",")[":"]["}}});var C4=ye((ncr,Xxe)=>{"use strict";var mh=PT(),T8=pf(),jxe=df(),lG=jxe.axisHoverFormat,ubt=jxe.descriptionOnlyNumbers,cbt=Tu(),fbt=Pd().dash,hbt=ec(),zT=Ao().extendFlat,Wxe=w8(),dbt=Wxe.COMPARISON_OPS2,vbt=Wxe.INTERVAL_OPS,Hxe=T8.line;Xxe.exports=zT({z:mh.z,x:mh.x,x0:mh.x0,dx:mh.dx,y:mh.y,y0:mh.y0,dy:mh.dy,xperiod:mh.xperiod,yperiod:mh.yperiod,xperiod0:T8.xperiod0,yperiod0:T8.yperiod0,xperiodalignment:mh.xperiodalignment,yperiodalignment:mh.yperiodalignment,text:mh.text,hovertext:mh.hovertext,transpose:mh.transpose,xtype:mh.xtype,ytype:mh.ytype,xhoverformat:lG("x"),yhoverformat:lG("y"),zhoverformat:lG("z",1),hovertemplate:mh.hovertemplate,hovertemplatefallback:mh.hovertemplatefallback,texttemplate:zT({},mh.texttemplate,{}),texttemplatefallback:mh.texttemplatefallback,textfont:zT({},mh.textfont,{}),hoverongaps:mh.hoverongaps,connectgaps:zT({},mh.connectgaps,{}),fillcolor:{valType:"color",editType:"calc"},autocontour:{valType:"boolean",dflt:!0,editType:"calc",impliedEdits:{"contours.start":void 0,"contours.end":void 0,"contours.size":void 0}},ncontours:{valType:"integer",dflt:15,min:1,editType:"calc"},contours:{type:{valType:"enumerated",values:["levels","constraint"],dflt:"levels",editType:"calc"},start:{valType:"number",dflt:null,editType:"plot",impliedEdits:{"^autocontour":!1}},end:{valType:"number",dflt:null,editType:"plot",impliedEdits:{"^autocontour":!1}},size:{valType:"number",dflt:null,min:0,editType:"plot",impliedEdits:{"^autocontour":!1}},coloring:{valType:"enumerated",values:["fill","heatmap","lines","none"],dflt:"fill",editType:"calc"},showlines:{valType:"boolean",dflt:!0,editType:"plot"},showlabels:{valType:"boolean",dflt:!1,editType:"plot"},labelfont:hbt({editType:"plot",colorEditType:"style"}),labelformat:{valType:"string",dflt:"",editType:"plot",description:ubt("contour label")},operation:{valType:"enumerated",values:[].concat(dbt).concat(vbt),dflt:"=",editType:"calc"},value:{valType:"any",dflt:0,editType:"calc"},editType:"calc",impliedEdits:{autocontour:!1}},line:{color:zT({},Hxe.color,{editType:"style+colorbars"}),width:{valType:"number",min:0,editType:"style+colorbars"},dash:fbt,smoothing:zT({},Hxe.smoothing,{}),editType:"plot"},zorder:T8.zorder},cbt("",{cLetter:"z",autoColorDflt:!1,editTypeOverride:"calc"}))});var cG=ye((acr,Yxe)=>{"use strict";var wv=b8(),Rm=C4(),pbt=Tu(),uG=df().axisHoverFormat,Zxe=Ao().extendFlat;Yxe.exports=Zxe({x:wv.x,y:wv.y,z:wv.z,marker:wv.marker,histnorm:wv.histnorm,histfunc:wv.histfunc,nbinsx:wv.nbinsx,xbins:wv.xbins,nbinsy:wv.nbinsy,ybins:wv.ybins,autobinx:wv.autobinx,autobiny:wv.autobiny,bingroup:wv.bingroup,xbingroup:wv.xbingroup,ybingroup:wv.ybingroup,autocontour:Rm.autocontour,ncontours:Rm.ncontours,contours:Rm.contours,line:{color:Rm.line.color,width:Zxe({},Rm.line.width,{dflt:.5}),dash:Rm.line.dash,smoothing:Rm.line.smoothing,editType:"plot"},xhoverformat:uG("x"),yhoverformat:uG("y"),zhoverformat:uG("z",1),hovertemplate:wv.hovertemplate,hovertemplatefallback:wv.hovertemplatefallback,texttemplate:Rm.texttemplate,texttemplatefallback:Rm.texttemplatefallback,textfont:Rm.textfont},pbt("",{cLetter:"z",editTypeOverride:"calc"}))});var A8=ye((ocr,Kxe)=>{"use strict";Kxe.exports=function(t,r,n,i){var a=i("contours.start"),o=i("contours.end"),s=a===!1||o===!1,l=n("contours.size"),u;s?u=r.autocontour=!0:u=n("autocontour",!1),(u||!l)&&n("ncontours")}});var fG=ye((scr,Jxe)=>{"use strict";var gbt=Dr();Jxe.exports=function(t,r,n,i){i||(i={});var a=t("contours.showlabels");if(a){var o=r.font;gbt.coerceFont(t,"contours.labelfont",o,{overrideDflt:{color:n}}),t("contours.labelformat")}i.hasHover!==!1&&t("zhoverformat")}});var S8=ye((lcr,$xe)=>{"use strict";var mbt=Qh(),ybt=fG();$xe.exports=function(t,r,n,i,a){var o=n("contours.coloring"),s,l="";o==="fill"&&(s=n("contours.showlines")),s!==!1&&(o!=="lines"&&(l=n("line.color","#000")),n("line.width",.5),n("line.dash")),o!=="none"&&(t.showlegend!==!0&&(r.showlegend=!1),r._dfltShowLegend=!1,mbt(t,r,i,n,{prefix:"",cLetter:"z"})),n("line.smoothing"),ybt(n,i,l,a)}});var rbe=ye((ucr,tbe)=>{"use strict";var Qxe=Dr(),_bt=sG(),xbt=A8(),bbt=S8(),wbt=A4(),ebe=cG();tbe.exports=function(t,r,n,i){function a(s,l){return Qxe.coerce(t,r,ebe,s,l)}function o(s){return Qxe.coerce2(t,r,ebe,s)}_bt(t,r,a,i),r.visible!==!1&&(xbt(t,r,a,o),bbt(t,r,a,i),a("xhoverformat"),a("yhoverformat"),a("hovertemplate"),a("hovertemplatefallback"),r.contours&&r.contours.coloring==="heatmap"&&wbt(a,i))}});var vG=ye((ccr,nbe)=>{"use strict";var dG=ho(),hG=Dr();nbe.exports=function(t,r){var n=t.contours;if(t.autocontour){var i=t.zmin,a=t.zmax;(t.zauto||i===void 0)&&(i=hG.aggNums(Math.min,null,r)),(t.zauto||a===void 0)&&(a=hG.aggNums(Math.max,null,r));var o=ibe(i,a,t.ncontours);n.size=o.dtick,n.start=dG.tickFirst(o),o.range.reverse(),n.end=dG.tickFirst(o),n.start===i&&(n.start+=n.size),n.end===a&&(n.end-=n.size),n.start>n.end&&(n.start=n.end=(n.start+n.end)/2),t._input.contours||(t._input.contours={}),hG.extendFlat(t._input.contours,{start:n.start,end:n.end,size:n.size}),t._input.autocontour=!0}else if(n.type!=="constraint"){var s=n.start,l=n.end,u=t._input.contours;if(s>l&&(n.start=u.start=l,l=n.end=u.end=s,s=n.start),!(n.size>0)){var c;s===l?c=1:c=ibe(s,l,t.ncontours).dtick,u.size=n.size=c}}};function ibe(e,t,r){var n={type:"linear",range:[e,t]};return dG.autoTicks(n,(t-e)/(r||15)),n}});var L4=ye((fcr,abe)=>{"use strict";abe.exports=function(t){return t.end+t.size/1e6}});var pG=ye((hcr,sbe)=>{"use strict";var obe=tc(),Tbt=c8(),Abt=vG(),Sbt=L4();sbe.exports=function(t,r){var n=Tbt(t,r),i=n[0].z;Abt(r,i);var a=r.contours,o=obe.extractOpts(r),s;if(a.coloring==="heatmap"&&o.auto&&r.autocontour===!1){var l=a.start,u=Sbt(a),c=a.size||1,f=Math.floor((u-l)/c)+1;isFinite(c)||(c=1,f=1);var h=l-c/2,d=h+f*c;s=[h,d]}else s=i;return obe.calc(t,r,{vals:s,cLetter:"z"}),n}});var P4=ye((dcr,lbe)=>{"use strict";lbe.exports={BOTTOMSTART:[1,9,13,104,713],TOPSTART:[4,6,7,104,713],LEFTSTART:[8,12,14,208,1114],RIGHTSTART:[2,3,11,208,1114],NEWDELTA:[null,[-1,0],[0,-1],[-1,0],[1,0],null,[0,-1],[-1,0],[0,1],[0,1],null,[0,1],[1,0],[1,0],[0,-1]],CHOOSESADDLE:{104:[4,1],208:[2,8],713:[7,13],1114:[11,14]},SADDLEREMAINDER:{1:4,2:8,4:1,7:13,8:2,11:14,13:7,14:11},LABELDISTANCE:2,LABELINCREASE:10,LABELMIN:3,LABELMAX:10,LABELOPTIMIZER:{EDGECOST:1,ANGLECOST:1,NEIGHBORCOST:5,SAMELEVELFACTOR:10,SAMELEVELDISTANCE:5,MAXCOST:100,INITIALSEARCHPOINTS:10,ITERATIONS:5}}});var gG=ye((vcr,ube)=>{"use strict";var M8=P4();ube.exports=function(t){var r=t[0].z,n=r.length,i=r[0].length,a=n===2||i===2,o,s,l,u,c,f,h,d,v;for(s=0;s<n-1;s++)for(u=[],s===0&&(u=u.concat(M8.BOTTOMSTART)),s===n-2&&(u=u.concat(M8.TOPSTART)),o=0;o<i-1;o++)for(l=u.slice(),o===0&&(l=l.concat(M8.LEFTSTART)),o===i-2&&(l=l.concat(M8.RIGHTSTART)),c=o+","+s,f=[[r[s][o],r[s][o+1]],[r[s+1][o],r[s+1][o+1]]],v=0;v<t.length;v++)d=t[v],h=Mbt(d.level,f),h&&(d.crossings[c]=h,l.indexOf(h)!==-1&&(d.starts.push([o,s]),a&&l.indexOf(h,l.indexOf(h)+1)!==-1&&d.starts.push([o,s])))};function Mbt(e,t){var r=(t[0][0]>e?0:1)+(t[0][1]>e?0:2)+(t[1][1]>e?0:4)+(t[1][0]>e?0:8);if(r===5||r===10){var n=(t[0][0]+t[0][1]+t[1][0]+t[1][1])/4;return e>n?r===5?713:1114:r===5?104:208}return r===15?0:r}});var mG=ye((pcr,hbe)=>{"use strict";var E8=Dr(),OT=P4();hbe.exports=function(t,r,n){var i,a,o,s,l;for(r=r||.01,n=n||.01,o=0;o<t.length;o++){for(s=t[o],l=0;l<s.starts.length;l++)a=s.starts[l],cbe(s,a,"edge",r,n);for(i=0;Object.keys(s.crossings).length&&i<1e4;)i++,a=Object.keys(s.crossings)[0].split(",").map(Number),cbe(s,a,void 0,r,n);i===1e4&&E8.log("Infinite loop in contour?")}};function I4(e,t,r,n){return Math.abs(e[0]-t[0])<r&&Math.abs(e[1]-t[1])<n}function Ebt(e,t){var r=e[2]-t[2],n=e[3]-t[3];return Math.sqrt(r*r+n*n)}function cbe(e,t,r,n,i){var a=t.join(","),o=e.crossings[a],s=kbt(o,r,t),l=[fbe(e,t,[-s[0],-s[1]])],u=e.z.length,c=e.z[0].length,f=t.slice(),h=s.slice(),d;for(d=0;d<1e4;d++){if(o>20?(o=OT.CHOOSESADDLE[o][(s[0]||s[1])<0?0:1],e.crossings[a]=OT.SADDLEREMAINDER[o]):delete e.crossings[a],s=OT.NEWDELTA[o],!s){E8.log("Found bad marching index:",o,t,e.level);break}l.push(fbe(e,t,s)),t[0]+=s[0],t[1]+=s[1],a=t.join(","),I4(l[l.length-1],l[l.length-2],n,i)&&l.pop();var v=s[0]&&(t[0]<0||t[0]>c-2)||s[1]&&(t[1]<0||t[1]>u-2),_=t[0]===f[0]&&t[1]===f[1]&&s[0]===h[0]&&s[1]===h[1];if(_||r&&v)break;o=e.crossings[a]}d===1e4&&E8.log("Infinite loop in contour?");var b=I4(l[0],l[l.length-1],n,i),p=0,k=.2*e.smoothing,E=[],S=0,L,x,C,M,g,P,T,z,O,V,G;for(d=1;d<l.length;d++)T=Ebt(l[d],l[d-1]),p+=T,E.push(T);var Z=p/E.length*k;function j(te){return l[te%l.length]}for(d=l.length-2;d>=S;d--)if(L=E[d],L<Z){for(C=0,x=d-1;x>=S&&L+E[x]<Z;x--)L+=E[x];if(b&&d===l.length-2)for(C=0;C<x&&L+E[C]<Z;C++)L+=E[C];g=d-x+C+1,P=Math.floor((d+x+C+2)/2),!b&&d===l.length-2?M=l[l.length-1]:!b&&x===-1?M=l[0]:g%2?M=j(P):M=[(j(P)[0]+j(P+1)[0])/2,(j(P)[1]+j(P+1)[1])/2],l.splice(x+1,d-x+1,M),d=x+1,C&&(S=C),b&&(d===l.length-2?l[C]=l[l.length-1]:d===0&&(l[l.length-1]=l[0]))}for(l.splice(0,S),d=0;d<l.length;d++)l[d].length=2;if(!(l.length<2))if(b)l.pop(),e.paths.push(l);else{r||E8.log("Unclosed interior contour?",e.level,f.join(","),l.join("L"));var N=!1;for(z=0;z<e.edgepaths.length;z++)if(V=e.edgepaths[z],!N&&I4(V[0],l[l.length-1],n,i)){l.pop(),N=!0;var H=!1;for(O=0;O<e.edgepaths.length;O++)if(G=e.edgepaths[O],I4(G[G.length-1],l[0],n,i)){H=!0,l.shift(),e.edgepaths.splice(z,1),O===z?e.paths.push(l.concat(G)):(O>z&&O--,e.edgepaths[O]=G.concat(l,V));break}H||(e.edgepaths[z]=l.concat(V))}for(z=0;z<e.edgepaths.length&&!N;z++)V=e.edgepaths[z],I4(V[V.length-1],l[0],n,i)&&(l.shift(),e.edgepaths[z]=V.concat(l),N=!0);N||e.edgepaths.push(l)}}function kbt(e,t,r){var n=0,i=0;return e>20&&t?e===208||e===1114?n=r[0]===0?1:-1:i=r[1]===0?1:-1:OT.BOTTOMSTART.indexOf(e)!==-1?i=1:OT.LEFTSTART.indexOf(e)!==-1?n=1:OT.TOPSTART.indexOf(e)!==-1?i=-1:n=-1,[n,i]}function fbe(e,t,r){var n=t[0]+Math.max(r[0],0),i=t[1]+Math.max(r[1],0),a=e.z[i][n],o=e.xaxis,s=e.yaxis;if(r[1]){var l=(e.level-a)/(e.z[i][n+1]-a),u=(l!==1?(1-l)*o.c2l(e.x[n]):0)+(l!==0?l*o.c2l(e.x[n+1]):0);return[o.c2p(o.l2c(u),!0),s.c2p(e.y[i],!0),n+l,i]}else{var c=(e.level-a)/(e.z[i+1][n]-a),f=(c!==1?(1-c)*s.c2l(e.y[i]):0)+(c!==0?c*s.c2l(e.y[i+1]):0);return[o.c2p(e.x[n],!0),s.c2p(s.l2c(f),!0),n,i+c]}}});var gbe=ye((gcr,pbe)=>{"use strict";var yG=w8(),Cbt=Eo();pbe.exports={"[]":dbe("[]"),"][":dbe("]["),">":_G(">"),"<":_G("<"),"=":_G("=")};function vbe(e,t){var r=Array.isArray(t),n;function i(a){return Cbt(a)?+a:null}return yG.COMPARISON_OPS2.indexOf(e)!==-1?n=i(r?t[0]:t):yG.INTERVAL_OPS.indexOf(e)!==-1?n=r?[i(t[0]),i(t[1])]:[i(t),i(t)]:yG.SET_OPS.indexOf(e)!==-1&&(n=r?t.map(i):[i(t)]),n}function dbe(e){return function(t){t=vbe(e,t);var r=Math.min(t[0],t[1]),n=Math.max(t[0],t[1]);return{start:r,end:n,size:n-r}}}function _G(e){return function(t){return t=vbe(e,t),{start:t,end:1/0,size:1/0}}}});var xG=ye((mcr,ybe)=>{"use strict";var mbe=Dr(),Lbt=gbe(),Pbt=L4();ybe.exports=function(t,r,n){for(var i=t.type==="constraint"?Lbt[t._operation](t.value):t,a=i.size,o=[],s=Pbt(i),l=n.trace._carpetTrace,u=l?{xaxis:l.aaxis,yaxis:l.baxis,x:n.a,y:n.b}:{xaxis:r.xaxis,yaxis:r.yaxis,x:n.x,y:n.y},c=i.start;c<s;c+=a)if(o.push(mbe.extendFlat({level:c,crossings:{},starts:[],edgepaths:[],paths:[],z:n.z,smoothing:n.trace.line.smoothing},u)),o.length>1e3){mbe.warn("Too many contours, clipping at 1000",t);break}return o}});var bG=ye((ycr,xbe)=>{"use strict";var qT=Dr();xbe.exports=function(e,t){var r,n,i,a=function(l){return l.reverse()},o=function(l){return l};switch(t){case"=":case"<":return e;case">":for(e.length!==1&&qT.warn("Contour data invalid for the specified inequality operation."),n=e[0],r=0;r<n.edgepaths.length;r++)n.edgepaths[r]=a(n.edgepaths[r]);for(r=0;r<n.paths.length;r++)n.paths[r]=a(n.paths[r]);for(r=0;r<n.starts.length;r++)n.starts[r]=a(n.starts[r]);return e;case"][":var s=a;a=o,o=s;case"[]":for(e.length!==2&&qT.warn("Contour data invalid for the specified inequality range operation."),n=_be(e[0]),i=_be(e[1]),r=0;r<n.edgepaths.length;r++)n.edgepaths[r]=a(n.edgepaths[r]);for(r=0;r<n.paths.length;r++)n.paths[r]=a(n.paths[r]);for(r=0;r<n.starts.length;r++)n.starts[r]=a(n.starts[r]);for(;i.edgepaths.length;)n.edgepaths.push(o(i.edgepaths.shift()));for(;i.paths.length;)n.paths.push(o(i.paths.shift()));for(;i.starts.length;)n.starts.push(o(i.starts.shift()));return[n]}};function _be(e){return qT.extendFlat({},e,{edgepaths:qT.extendDeep([],e.edgepaths),paths:qT.extendDeep([],e.paths),starts:qT.extendDeep([],e.starts)})}});var wG=ye((_cr,bbe)=>{"use strict";bbe.exports=function(e,t){var r=e[0],n=r.z,i;switch(t.type){case"levels":var a=Math.min(n[0][0],n[0][1]);for(i=0;i<e.length;i++){var o=e[i];o.prefixBoundary=!o.edgepaths.length&&(a>o.level||o.starts.length&&a===o.level)}break;case"constraint":if(r.prefixBoundary=!1,r.edgepaths.length)return;var s=r.x.length,l=r.y.length,u=-1/0,c=1/0;for(i=0;i<l;i++)c=Math.min(c,n[i][0]),c=Math.min(c,n[i][s-1]),u=Math.max(u,n[i][0]),u=Math.max(u,n[i][s-1]);for(i=1;i<s-1;i++)c=Math.min(c,n[0][i]),c=Math.min(c,n[l-1][i]),u=Math.max(u,n[0][i]),u=Math.max(u,n[l-1][i]);var f=t.value,h,d;switch(t._operation){case">":f>u&&(r.prefixBoundary=!0);break;case"<":(f<c||r.starts.length&&f===c)&&(r.prefixBoundary=!0);break;case"[]":h=Math.min(f[0],f[1]),d=Math.max(f[0],f[1]),(d<c||h>u||r.starts.length&&d===c)&&(r.prefixBoundary=!0);break;case"][":h=Math.min(f[0],f[1]),d=Math.max(f[0],f[1]),h<c&&d>u&&(r.prefixBoundary=!0);break}break}}});var k8=ye(jv=>{"use strict";var D4=Oa(),zd=Dr(),qy=So(),Ibt=tc(),Abe=ru(),wbe=ho(),Tbe=ym(),Rbt=d8(),Sbe=gG(),Mbe=mG(),Dbt=xG(),Fbt=bG(),Ebe=wG(),R4=P4(),Dm=R4.LABELOPTIMIZER;jv.plot=function(t,r,n,i){var a=r.xaxis,o=r.yaxis;zd.makeTraceGroups(i,n,"contour").each(function(s){var l=D4.select(this),u=s[0],c=u.trace,f=u.x,h=u.y,d=c.contours,v=Dbt(d,r,u),_=zd.ensureSingle(l,"g","heatmapcoloring"),b=[];d.coloring==="heatmap"&&(b=[s]),Rbt(t,r,b,_),Sbe(v),Mbe(v);var p=a.c2p(f[0],!0),k=a.c2p(f[f.length-1],!0),E=o.c2p(h[0],!0),S=o.c2p(h[h.length-1],!0),L=[[p,S],[k,S],[k,E],[p,E]],x=v;d.type==="constraint"&&(x=Fbt(v,d._operation)),zbt(l,L,d),Obt(l,x,L,d),qbt(l,v,t,u,d),Nbt(l,r,t,u,L)})};function zbt(e,t,r){var n=zd.ensureSingle(e,"g","contourbg"),i=n.selectAll("path").data(r.coloring==="fill"?[0]:[]);i.enter().append("path"),i.exit().remove(),i.attr("d","M"+t.join("L")+"Z").style("stroke","none")}function Obt(e,t,r,n){var i=n.coloring==="fill"||n.type==="constraint"&&n._operation!=="=",a="M"+r.join("L")+"Z";i&&Ebe(t,n);var o=zd.ensureSingle(e,"g","contourfill"),s=o.selectAll("path").data(i?t:[]);s.enter().append("path"),s.exit().remove(),s.each(function(l){var u=(l.prefixBoundary?a:"")+kbe(l,r);u?D4.select(this).attr("d",u).style("stroke","none"):D4.select(this).remove()})}function kbe(e,t){var r="",n=0,i=e.edgepaths.map(function(p,k){return k}),a=!0,o,s,l,u,c,f;function h(p){return Math.abs(p[1]-t[0][1])<.01}function d(p){return Math.abs(p[1]-t[2][1])<.01}function v(p){return Math.abs(p[0]-t[0][0])<.01}function _(p){return Math.abs(p[0]-t[2][0])<.01}for(;i.length;){for(f=qy.smoothopen(e.edgepaths[n],e.smoothing),r+=a?f:f.replace(/^M/,"L"),i.splice(i.indexOf(n),1),o=e.edgepaths[n][e.edgepaths[n].length-1],u=-1,l=0;l<4;l++){if(!o){zd.log("Missing end?",n,e);break}for(h(o)&&!_(o)?s=t[1]:v(o)?s=t[0]:d(o)?s=t[3]:_(o)&&(s=t[2]),c=0;c<e.edgepaths.length;c++){var b=e.edgepaths[c][0];Math.abs(o[0]-s[0])<.01?Math.abs(o[0]-b[0])<.01&&(b[1]-o[1])*(s[1]-b[1])>=0&&(s=b,u=c):Math.abs(o[1]-s[1])<.01?Math.abs(o[1]-b[1])<.01&&(b[0]-o[0])*(s[0]-b[0])>=0&&(s=b,u=c):zd.log("endpt to newendpt is not vert. or horz.",o,s,b)}if(o=s,u>=0)break;r+="L"+s}if(u===e.edgepaths.length){zd.log("unclosed perimeter path");break}n=u,a=i.indexOf(n)===-1,a&&(n=i[0],r+="Z")}for(n=0;n<e.paths.length;n++)r+=qy.smoothclosed(e.paths[n],e.smoothing);return r}function qbt(e,t,r,n,i){var a=r._context.staticPlot,o=zd.ensureSingle(e,"g","contourlines"),s=i.showlines!==!1,l=i.showlabels,u=s&&l,c=jv.createLines(o,s||l,t,a),f=jv.createLineClip(o,u,r,n.trace.uid),h=e.selectAll("g.contourlabels").data(l?[0]:[]);if(h.exit().remove(),h.enter().append("g").classed("contourlabels",!0),l){var d=[],v=[];zd.clearLocationCache();var _=jv.labelFormatter(r,n),b=qy.tester.append("text").attr("data-notex",1).call(qy.font,i.labelfont),p=t[0].xaxis,k=t[0].yaxis,E=p._length,S=k._length,L=p.range,x=k.range,C=zd.aggNums(Math.min,null,n.x),M=zd.aggNums(Math.max,null,n.x),g=zd.aggNums(Math.min,null,n.y),P=zd.aggNums(Math.max,null,n.y),T=Math.max(p.c2p(C,!0),0),z=Math.min(p.c2p(M,!0),E),O=Math.max(k.c2p(P,!0),0),V=Math.min(k.c2p(g,!0),S),G={};L[0]<L[1]?(G.left=T,G.right=z):(G.left=z,G.right=T),x[0]<x[1]?(G.top=O,G.bottom=V):(G.top=V,G.bottom=O),G.middle=(G.top+G.bottom)/2,G.center=(G.left+G.right)/2,d.push([[G.left,G.top],[G.right,G.top],[G.right,G.bottom],[G.left,G.bottom]]);var Z=Math.sqrt(E*E+S*S),j=R4.LABELDISTANCE*Z/Math.max(1,t.length/R4.LABELINCREASE);c.each(function(N){var H=jv.calcTextOpts(N.level,_,b,r);D4.select(this).selectAll("path").each(function(){var te=this,oe=zd.getVisibleSegment(te,G,H.height/2);if(oe&&!(oe.len<(H.width+H.height)*R4.LABELMIN))for(var _e=Math.min(Math.ceil(oe.len/j),R4.LABELMAX),Ee=0;Ee<_e;Ee++){var Ce=jv.findBestTextLocation(te,oe,H,v,G);if(!Ce)break;jv.addLabelData(Ce,H,v,d)}})}),b.remove(),jv.drawLabels(h,v,r,f,u?d:null)}l&&!s&&c.remove()}jv.createLines=function(e,t,r,n){var i=r[0].smoothing,a=e.selectAll("g.contourlevel").data(t?r:[]);if(a.exit().remove(),a.enter().append("g").classed("contourlevel",!0),t){var o=a.selectAll("path.openline").data(function(l){return l.pedgepaths||l.edgepaths});o.exit().remove(),o.enter().append("path").classed("openline",!0),o.attr("d",function(l){return qy.smoothopen(l,i)}).style("stroke-miterlimit",1).style("vector-effect",n?"none":"non-scaling-stroke");var s=a.selectAll("path.closedline").data(function(l){return l.ppaths||l.paths});s.exit().remove(),s.enter().append("path").classed("closedline",!0),s.attr("d",function(l){return qy.smoothclosed(l,i)}).style("stroke-miterlimit",1).style("vector-effect",n?"none":"non-scaling-stroke")}return a};jv.createLineClip=function(e,t,r,n){var i=r._fullLayout._clips,a=t?"clipline"+n:null,o=i.selectAll("#"+a).data(t?[0]:[]);return o.exit().remove(),o.enter().append("clipPath").classed("contourlineclip",!0).attr("id",a),qy.setClipUrl(e,a,r),o};jv.labelFormatter=function(e,t){var r=e._fullLayout,n=t.trace,i=n.contours,a={type:"linear",_id:"ycontour",showexponent:"all",exponentformat:"B"};if(i.labelformat)a.tickformat=i.labelformat,Tbe(a,r);else{var o=Ibt.extractOpts(n);if(o&&o.colorbar&&o.colorbar._axis)a=o.colorbar._axis;else{if(i.type==="constraint"){var s=i.value;zd.isArrayOrTypedArray(s)?a.range=[s[0],s[s.length-1]]:a.range=[s,s]}else a.range=[i.start,i.end],a.nticks=(i.end-i.start)/i.size;a.range[0]===a.range[1]&&(a.range[1]+=a.range[0]||1),a.nticks||(a.nticks=1e3),Tbe(a,r),wbe.prepTicks(a),a._tmin=null,a._tmax=null}}return function(l){return wbe.tickText(a,l).text}};jv.calcTextOpts=function(e,t,r,n){var i=t(e);r.text(i).call(Abe.convertToTspans,n);var a=r.node(),o=qy.bBox(a,!0);return{text:i,width:o.width,height:o.height,fontSize:+a.style["font-size"].replace("px",""),level:e,dy:(o.top+o.bottom)/2}};jv.findBestTextLocation=function(e,t,r,n,i){var a=r.width,o,s,l,u,c;t.isClosed?(s=t.len/Dm.INITIALSEARCHPOINTS,o=t.min+s/2,l=t.max):(s=(t.len-a)/(Dm.INITIALSEARCHPOINTS+1),o=t.min+s+a/2,l=t.max-(s+a)/2);for(var f=1/0,h=0;h<Dm.ITERATIONS;h++){for(var d=o;d<l;d+=s){var v=zd.getTextLocation(e,t.total,d,a),_=Bbt(v,r,n,i);_<f&&(f=_,c=v,u=d)}if(f>Dm.MAXCOST*2)break;h&&(s/=2),o=u-s/2,l=o+s*1.5}if(f<=Dm.MAXCOST)return c};function Bbt(e,t,r,n){var i=t.width/2,a=t.height/2,o=e.x,s=e.y,l=e.theta,u=Math.cos(l)*i,c=Math.sin(l)*i,f=(o>n.center?n.right-o:o-n.left)/(u+Math.abs(Math.sin(l)*a)),h=(s>n.middle?n.bottom-s:s-n.top)/(Math.abs(c)+Math.cos(l)*a);if(f<1||h<1)return 1/0;var d=Dm.EDGECOST*(1/(f-1)+1/(h-1));d+=Dm.ANGLECOST*l*l;for(var v=o-u,_=s-c,b=o+u,p=s+c,k=0;k<r.length;k++){var E=r[k],S=Math.cos(E.theta)*E.width/2,L=Math.sin(E.theta)*E.width/2,x=zd.segmentDistance(v,_,b,p,E.x-S,E.y-L,E.x+S,E.y+L)*2/(t.height+E.height),C=E.level===t.level,M=C?Dm.SAMELEVELDISTANCE:1;if(x<=M)return 1/0;var g=Dm.NEIGHBORCOST*(C?Dm.SAMELEVELFACTOR:1);d+=g/(x-M)}return d}jv.addLabelData=function(e,t,r,n){var i=t.fontSize,a=t.width+i/3,o=Math.max(0,t.height-i/3),s=e.x,l=e.y,u=e.theta,c=Math.sin(u),f=Math.cos(u),h=function(v,_){return[s+v*f-_*c,l+v*c+_*f]},d=[h(-a/2,-o/2),h(-a/2,o/2),h(a/2,o/2),h(a/2,-o/2)];r.push({text:t.text,x:s,y:l,dy:t.dy,theta:u,level:t.level,width:a,height:o}),n.push(d)};jv.drawLabels=function(e,t,r,n,i){var a=e.selectAll("text").data(t,function(u){return u.text+","+u.x+","+u.y+","+u.theta});if(a.exit().remove(),a.enter().append("text").attr({"data-notex":1,"text-anchor":"middle"}).each(function(u){var c=u.x+Math.sin(u.theta)*u.dy,f=u.y-Math.cos(u.theta)*u.dy;D4.select(this).text(u.text).attr({x:c,y:f,transform:"rotate("+180*u.theta/Math.PI+" "+c+" "+f+")"}).call(Abe.convertToTspans,r)}),i){for(var o="",s=0;s<i.length;s++)o+="M"+i[s].join("L")+"Z";var l=zd.ensureSingle(n,"path","");l.attr("d",o)}};function Nbt(e,t,r,n,i){var a=n.trace,o=r._fullLayout._clips,s="clip"+a.uid,l=o.selectAll("#"+s).data(a.connectgaps?[]:[0]);if(l.enter().append("clipPath").classed("contourclip",!0).attr("id",s),l.exit().remove(),a.connectgaps===!1){var u={level:.9,crossings:{},starts:[],edgepaths:[],paths:[],xaxis:t.xaxis,yaxis:t.yaxis,x:n.x,y:n.y,z:Ubt(n),smoothing:0};Sbe([u]),Mbe([u]),Ebe([u],{type:"levels"});var c=zd.ensureSingle(l,"path","");c.attr("d",(u.prefixBoundary?"M"+i.join("L")+"Z":"")+kbe(u,i))}else s=null;qy.setClipUrl(e,s,r)}function Ubt(e){var t=e.trace._emptypoints,r=[],n=e.z.length,i=e.z[0].length,a,o=[],s;for(a=0;a<i;a++)o.push(1);for(a=0;a<n;a++)r.push(o.slice());for(a=0;a<t.length;a++)s=t[a],r[s[0]][s[1]]=0;return e.zmask=r,r}});var AG=ye((bcr,Cbe)=>{"use strict";var Vbt=Oa(),TG=tc(),Gbt=L4();Cbe.exports=function(t){var r=t.contours,n=r.start,i=Gbt(r),a=r.size||1,o=Math.floor((i-n)/a)+1,s=r.coloring==="lines"?0:1,l=TG.extractOpts(t);isFinite(a)||(a=1,o=1);var u=l.reversescale?TG.flipScale(l.colorscale):l.colorscale,c=u.length,f=new Array(c),h=new Array(c),d,v,_=l.min,b=l.max;if(r.coloring==="heatmap"){for(v=0;v<c;v++)d=u[v],f[v]=d[0]*(b-_)+_,h[v]=d[1];var p=Vbt.extent([_,b,r.start,r.start+a*(o-1)]),k=p[_<b?0:1],E=p[_<b?1:0];k!==_&&(f.splice(0,0,k),h.splice(0,0,h[0])),E!==b&&(f.push(E),h.push(h[h.length-1]))}else{var S=t._input&&typeof t._input.zmin=="number"&&typeof t._input.zmax=="number";for(S&&(n<=_||i>=b)&&(n<=_&&(n=_),i>=b&&(i=b),o=Math.floor((i-n)/a)+1,s=0),v=0;v<c;v++)d=u[v],f[v]=(d[0]*(o+s-1)-s/2)*a+n,h[v]=d[1];(S||t.autocontour)&&(f[0]>_&&(f.unshift(_),h.unshift(h[0])),f[f.length-1]<b&&(f.push(b),h.push(h[h.length-1])))}return TG.makeColorScaleFunc({domain:f,range:h},{noNumericCheck:!0})}});var L8=ye((wcr,Pbe)=>{"use strict";var C8=Oa(),Lbe=So(),Hbt=v8(),jbt=AG();Pbe.exports=function(t){var r=C8.select(t).selectAll("g.contour");r.style("opacity",function(n){return n[0].trace.opacity}),r.each(function(n){var i=C8.select(this),a=n[0].trace,o=a.contours,s=a.line,l=o.size||1,u=o.start,c=o.type==="constraint",f=!c&&o.coloring==="lines",h=!c&&o.coloring==="fill",d=f||h?jbt(a):null;i.selectAll("g.contourlevel").each(function(b){C8.select(this).selectAll("path").call(Lbe.lineGroupStyle,s.width,f?d(b.level):s.color,s.dash)});var v=o.labelfont;if(i.selectAll("g.contourlabels text").each(function(b){Lbe.font(C8.select(this),{weight:v.weight,style:v.style,variant:v.variant,textcase:v.textcase,lineposition:v.lineposition,shadow:v.shadow,family:v.family,size:v.size,color:v.color||(f?d(b.level):s.color)})}),c)i.selectAll("g.contourfill path").style("fill",a.fillcolor);else if(h){var _;i.selectAll("g.contourfill path").style("fill",function(b){return _===void 0&&(_=b.level),d(b.level+.5*l)}),_===void 0&&(_=u),i.selectAll("g.contourbg path").style("fill",d(_-.5*l))}}),Hbt(t)}});var P8=ye((Tcr,Rbe)=>{"use strict";var Ibe=tc(),Wbt=AG(),Xbt=L4();function Zbt(e,t,r){var n=t.contours,i=t.line,a=n.size||1,o=n.coloring,s=Wbt(t,{isColorbar:!0});if(o==="heatmap"){var l=Ibe.extractOpts(t);r._fillgradient=l.reversescale?Ibe.flipScale(l.colorscale):l.colorscale,r._zrange=[l.min,l.max]}else o==="fill"&&(r._fillcolor=s);r._line={color:o==="lines"?s:i.color,width:n.showlines!==!1?i.width:0,dash:i.dash},r._levels={start:n.start,end:Xbt(n),size:a}}Rbe.exports={min:"zmin",max:"zmax",calc:Zbt}});var SG=ye((Acr,Dbe)=>{"use strict";var I8=ka(),Ybt=g8();Dbe.exports=function(t,r,n,i,a){a||(a={}),a.isContour=!0;var o=Ybt(t,r,n,i,a);return o&&o.forEach(function(s){var l=s.trace;l.contours.type==="constraint"&&(l.fillcolor&&I8.opacity(l.fillcolor)?s.color=I8.addOpacity(l.fillcolor,1):l.contours.showlines&&I8.opacity(l.line.color)&&(s.color=I8.addOpacity(l.line.color,1)))}),o}});var zbe=ye((Scr,Fbe)=>{"use strict";Fbe.exports={attributes:cG(),supplyDefaults:rbe(),crossTraceDefaults:_8(),calc:pG(),plot:k8().plot,layerName:"contourlayer",style:L8(),colorbar:P8(),hoverPoints:SG(),moduleType:"trace",name:"histogram2dcontour",basePlotModule:ph(),categories:["cartesian","svg","2dMap","contour","histogram","showLegend"],meta:{}}});var qbe=ye((Mcr,Obe)=>{"use strict";Obe.exports=zbe()});var MG=ye((Ecr,Hbe)=>{"use strict";var Bbe=Eo(),Kbt=fG(),Vbe=ka(),Nbe=Vbe.addOpacity,Jbt=Vbe.opacity,Gbe=w8(),Ube=Dr().isArrayOrTypedArray,$bt=Gbe.CONSTRAINT_REDUCTION,Qbt=Gbe.COMPARISON_OPS2;Hbe.exports=function(t,r,n,i,a,o){var s=r.contours,l,u,c,f=n("contours.operation");if(s._operation=$bt[f],e2t(n,s),f==="="?l=s.showlines=!0:(l=n("contours.showlines"),c=n("fillcolor",Nbe((t.line||{}).color||a,.5))),l){var h=c&&Jbt(c)?Nbe(r.fillcolor,1):a;u=n("line.color",h),n("line.width",2),n("line.dash")}n("line.smoothing"),Kbt(n,i,u,o)};function e2t(e,t){var r;Qbt.indexOf(t.operation)===-1?(e("contours.value",[0,1]),Ube(t.value)?t.value.length>2?t.value=t.value.slice(2):t.length===0?t.value=[0,1]:t.length<2?(r=parseFloat(t.value[0]),t.value=[r,r+1]):t.value=[parseFloat(t.value[0]),parseFloat(t.value[1])]:Bbe(t.value)&&(r=parseFloat(t.value),t.value=[r,r+1])):(e("contours.value",0),Bbe(t.value)||(Ube(t.value)?t.value=parseFloat(t.value[0]):t.value=0))}});var Xbe=ye((kcr,Wbe)=>{"use strict";var EG=Dr(),t2t=e8(),r2t=Ig(),i2t=MG(),n2t=A8(),a2t=S8(),o2t=A4(),jbe=C4();Wbe.exports=function(t,r,n,i){function a(u,c){return EG.coerce(t,r,jbe,u,c)}function o(u){return EG.coerce2(t,r,jbe,u)}var s=t2t(t,r,a,i);if(!s){r.visible=!1;return}r2t(t,r,i,a),a("xhoverformat"),a("yhoverformat"),a("text"),a("hovertext"),a("hoverongaps"),a("hovertemplate"),a("hovertemplatefallback");var l=a("contours.type")==="constraint";a("connectgaps",EG.isArray1D(r.z)),l?i2t(t,r,a,i,n):(n2t(t,r,a,o),a2t(t,r,a,i)),r.contours&&r.contours.coloring==="heatmap"&&o2t(a,i),a("zorder")}});var Ybe=ye((Ccr,Zbe)=>{"use strict";Zbe.exports={attributes:C4(),supplyDefaults:Xbe(),calc:pG(),plot:k8().plot,style:L8(),colorbar:P8(),hoverPoints:SG(),moduleType:"trace",name:"contour",basePlotModule:ph(),categories:["cartesian","svg","2dMap","contour","showLegend"],meta:{}}});var Jbe=ye((Lcr,Kbe)=>{"use strict";Kbe.exports=Ybe()});var kG=ye((Pcr,e2e)=>{"use strict";var{hovertemplateAttrs:s2t,texttemplateAttrs:l2t,templatefallbackAttrs:$be}=Ll(),u2t=Cg(),a0=pf(),c2t=Gl(),Qbe=Tu(),f2t=Pd().dash,M_=Ao().extendFlat,j0=a0.marker,F4=a0.line,h2t=j0.line;e2e.exports={a:{valType:"data_array",editType:"calc"},b:{valType:"data_array",editType:"calc"},c:{valType:"data_array",editType:"calc"},sum:{valType:"number",dflt:0,min:0,editType:"calc"},mode:M_({},a0.mode,{dflt:"markers"}),text:M_({},a0.text,{}),texttemplate:l2t({editType:"plot"},{keys:["a","b","c","text"]}),texttemplatefallback:$be({editType:"plot"}),hovertext:M_({},a0.hovertext,{}),line:{color:F4.color,width:F4.width,dash:f2t,backoff:F4.backoff,shape:M_({},F4.shape,{values:["linear","spline"]}),smoothing:F4.smoothing,editType:"calc"},connectgaps:a0.connectgaps,cliponaxis:a0.cliponaxis,fill:M_({},a0.fill,{values:["none","toself","tonext"],dflt:"none"}),fillcolor:u2t(),marker:M_({symbol:j0.symbol,opacity:j0.opacity,angle:j0.angle,angleref:j0.angleref,standoff:j0.standoff,maxdisplayed:j0.maxdisplayed,size:j0.size,sizeref:j0.sizeref,sizemin:j0.sizemin,sizemode:j0.sizemode,line:M_({width:h2t.width,editType:"calc"},Qbe("marker.line")),gradient:j0.gradient,editType:"calc"},Qbe("marker")),textfont:a0.textfont,textposition:a0.textposition,selected:a0.selected,unselected:a0.unselected,hoverinfo:M_({},c2t.hoverinfo,{flags:["a","b","c","text","name"]}),hoveron:a0.hoveron,hovertemplate:s2t(),hovertemplatefallback:$be()}});var n2e=ye((Icr,i2e)=>{"use strict";var t2e=Dr(),d2t=Sm(),BT=Ru(),v2t=$p(),p2t=R0(),r2e=tT(),g2t=D0(),m2t=Rg(),y2t=kG();i2e.exports=function(t,r,n,i){function a(h,d){return t2e.coerce(t,r,y2t,h,d)}var o=a("a"),s=a("b"),l=a("c"),u;if(o?(u=o.length,s?(u=Math.min(u,s.length),l&&(u=Math.min(u,l.length))):l?u=Math.min(u,l.length):u=0):s&&l&&(u=Math.min(s.length,l.length)),!u){r.visible=!1;return}r._length=u,a("sum"),a("text"),a("hovertext"),r.hoveron!=="fills"&&(a("hovertemplate"),a("hovertemplatefallback"));var c=u<d2t.PTS_LINESONLY?"lines+markers":"lines";a("mode",c),BT.hasMarkers(r)&&v2t(t,r,n,i,a,{gradient:!0}),BT.hasLines(r)&&(p2t(t,r,n,i,a,{backoff:!0}),r2e(t,r,a),a("connectgaps")),BT.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),g2t(t,r,i,a));var f=[];(BT.hasMarkers(r)||BT.hasText(r))&&(a("cliponaxis"),a("marker.maxdisplayed"),f.push("points")),a("fill"),r.fill!=="none"&&(m2t(t,r,n,a),BT.hasLines(r)||r2e(t,r,a)),(r.fill==="tonext"||r.fill==="toself")&&f.push("fills"),a("hoveron",f.join("+")||"points"),t2e.coerceSelectionMarkerOpacity(r,a)}});var o2e=ye((Rcr,a2e)=>{"use strict";var CG=ho();a2e.exports=function(t,r,n){var i={},a=n[r.subplot]._subplot;return i.aLabel=CG.tickText(a.aaxis,t.a,!0).text,i.bLabel=CG.tickText(a.baxis,t.b,!0).text,i.cLabel=CG.tickText(a.caxis,t.c,!0).text,i}});var c2e=ye((Dcr,u2e)=>{"use strict";var LG=Eo(),_2t=F0(),x2t=km(),b2t=z0(),w2t=O0().calcMarkerSize,s2e=["a","b","c"],l2e={a:["b","c"],b:["a","c"],c:["a","b"]};u2e.exports=function(t,r){var n=t._fullLayout[r.subplot],i=n.sum,a=r.sum||i,o={a:r.a,b:r.b,c:r.c},s=r.ids,l,u,c,f,h,d;for(l=0;l<s2e.length;l++)if(c=s2e[l],!o[c]){for(h=o[l2e[c][0]],d=o[l2e[c][1]],f=new Array(h.length),u=0;u<h.length;u++)f[u]=a-h[u]-d[u];o[c]=f}var v=r._length,_=new Array(v),b,p,k,E,S,L;for(l=0;l<v;l++)b=o.a[l],p=o.b[l],k=o.c[l],LG(b)&&LG(p)&&LG(k)?(b=+b,p=+p,k=+k,E=i/(b+p+k),E!==1&&(b*=E,p*=E,k*=E),L=b,S=k-p,_[l]={x:S,y:L,a:b,b:p,c:k},s&&(_[l].id=s[l])):_[l]={x:!1,y:!1};return w2t(r,v),_2t(t,r),x2t(_,r),b2t(_,r),_}});var h2e=ye((Fcr,f2e)=>{"use strict";var T2t=sT();f2e.exports=function(t,r,n){var i=r.plotContainer;i.select(".scatterlayer").selectAll("*").remove();for(var a=r.xaxis,o=r.yaxis,s={xaxis:a,yaxis:o,plot:i,layerClipId:r._hasClipOnAxisFalse?r.clipIdRelative:null},l=r.layers.frontplot.select("g.scatterlayer"),u=0;u<n.length;u++){var c=n[u];c.length&&(c[0].trace._xA=a,c[0].trace._yA=o)}T2t(t,s,n,l)}});var v2e=ye((zcr,d2e)=>{"use strict";var A2t=fT();d2e.exports=function(t,r,n,i){var a=A2t(t,r,n,i);if(!a||a[0].index===!1)return;var o=a[0];if(o.index===void 0){var s=1-o.y0/t.ya._length,l=t.xa._length,u=l*s/2,c=l-u;return o.x0=Math.max(Math.min(o.x0,c),u),o.x1=Math.max(Math.min(o.x1,c),u),a}var f=o.cd[o.index],h=o.trace,d=o.subplot;o.a=f.a,o.b=f.b,o.c=f.c,o.xLabelVal=void 0,o.yLabelVal=void 0;var v={};v[h.subplot]={_subplot:d};var _=h._module.formatLabels(f,h,v);o.aLabel=_.aLabel,o.bLabel=_.bLabel,o.cLabel=_.cLabel;var b=f.hi||h.hoverinfo,p=[];function k(S,L){p.push(S._hovertitle+": "+L)}if(!h.hovertemplate){var E=b.split("+");E.indexOf("all")!==-1&&(E=["a","b","c"]),E.indexOf("a")!==-1&&k(d.aaxis,o.aLabel),E.indexOf("b")!==-1&&k(d.baxis,o.bLabel),E.indexOf("c")!==-1&&k(d.caxis,o.cLabel)}return o.extraText=p.join("<br>"),o.hovertemplate=h.hovertemplate,a}});var g2e=ye((Ocr,p2e)=>{"use strict";p2e.exports=function(t,r,n,i,a){if(r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),i[a]){var o=i[a];t.a=o.a,t.b=o.b,t.c=o.c}else t.a=r.a,t.b=r.b,t.c=r.c;return t}});var E2e=ye((qcr,M2e)=>{"use strict";var w2e=Oa(),S2t=cd(),PG=qa(),By=Dr(),Fm=By.strTranslate,R8=By._,UT=ka(),D8=So(),z4=ym(),IG=Ao().extendFlat,M2t=Mc(),E_=ho(),m2e=yv(),y2e=vf(),T2e=Eg(),_2e=T2e.freeMode,E2t=T2e.rectMode,RG=Eb(),k2t=Of().prepSelect,C2t=Of().selectOnClick,L2t=Of().clearOutline,P2t=Of().clearSelectionsCache,A2e=hd();function S2e(e,t){this.id=e.id,this.graphDiv=e.graphDiv,this.init(t),this.makeFramework(t),this.updateFx(t),this.aTickLayout=null,this.bTickLayout=null,this.cTickLayout=null}M2e.exports=S2e;var zm=S2e.prototype;zm.init=function(e){this.container=e._ternarylayer,this.defs=e._defs,this.layoutId=e._uid,this.traceHash={},this.layers={}};zm.plot=function(e,t){var r=this,n=t[r.id],i=t._size;r._hasClipOnAxisFalse=!1;for(var a=0;a<e.length;a++){var o=e[a][0].trace;if(o.cliponaxis===!1){r._hasClipOnAxisFalse=!0;break}}r.updateLayers(n),r.adjustLayout(n,i),M2t.generalUpdatePerTraceModule(r.graphDiv,r,e,n),r.layers.plotbg.select("path").call(UT.fill,n.bgcolor)};zm.makeFramework=function(e){var t=this,r=t.graphDiv,n=e[t.id],i=t.clipId="clip"+t.layoutId+t.id,a=t.clipIdRelative="clip-relative"+t.layoutId+t.id;t.clipDef=By.ensureSingleById(e._clips,"clipPath",i,function(o){o.append("path").attr("d","M0,0Z")}),t.clipDefRelative=By.ensureSingleById(e._clips,"clipPath",a,function(o){o.append("path").attr("d","M0,0Z")}),t.plotContainer=By.ensureSingle(t.container,"g",t.id),t.updateLayers(n),D8.setClipUrl(t.layers.backplot,i,r),D8.setClipUrl(t.layers.grids,i,r)};zm.updateFx=function(e){e._ternarylayer.selectAll("g.toplevel").style("cursor",e.dragmode==="pan"?"move":"crosshair")};zm.updateLayers=function(e){var t=this,r=t.layers,n=["draglayer","plotbg","backplot","grids"];e.aaxis.layer==="below traces"&&n.push("aaxis","aline"),e.baxis.layer==="below traces"&&n.push("baxis","bline"),e.caxis.layer==="below traces"&&n.push("caxis","cline"),n.push("frontplot"),e.aaxis.layer==="above traces"&&n.push("aaxis","aline"),e.baxis.layer==="above traces"&&n.push("baxis","bline"),e.caxis.layer==="above traces"&&n.push("caxis","cline");var i=t.plotContainer.selectAll("g.toplevel").data(n,String),a=["agrid","bgrid","cgrid"];i.enter().append("g").attr("class",function(o){return"toplevel "+o}).each(function(o){var s=w2e.select(this);r[o]=s,o==="frontplot"?s.append("g").classed("scatterlayer",!0):o==="backplot"?s.append("g").classed("maplayer",!0):o==="plotbg"?s.append("path").attr("d","M0,0Z"):o==="aline"||o==="bline"||o==="cline"?s.append("path"):o==="grids"&&a.forEach(function(l){r[l]=s.append("g").classed("grid "+l,!0)})}),i.order()};var NT=Math.sqrt(4/3);zm.adjustLayout=function(e,t){var r=this,n=e.domain,i=(n.x[0]+n.x[1])/2,a=(n.y[0]+n.y[1])/2,o=n.x[1]-n.x[0],s=n.y[1]-n.y[0],l=o*t.w,u=s*t.h,c=e.sum,f=e.aaxis.min,h=e.baxis.min,d=e.caxis.min,v,_,b,p,k,E;l>NT*u?(p=u,b=p*NT):(b=l,p=b/NT),k=o*b/l,E=s*p/u,v=t.l+t.w*i-b/2,_=t.t+t.h*(1-a)-p/2,r.x0=v,r.y0=_,r.w=b,r.h=p,r.sum=c,r.xaxis={type:"linear",range:[f+2*d-c,c-f-2*h],domain:[i-k/2,i+k/2],_id:"x"},z4(r.xaxis,r.graphDiv._fullLayout),r.xaxis.setScale(),r.xaxis.isPtWithinRange=function(V){return V.a>=r.aaxis.range[0]&&V.a<=r.aaxis.range[1]&&V.b>=r.baxis.range[1]&&V.b<=r.baxis.range[0]&&V.c>=r.caxis.range[1]&&V.c<=r.caxis.range[0]},r.yaxis={type:"linear",range:[f,c-h-d],domain:[a-E/2,a+E/2],_id:"y"},z4(r.yaxis,r.graphDiv._fullLayout),r.yaxis.setScale(),r.yaxis.isPtWithinRange=function(){return!0};var S=r.yaxis.domain[0],L=r.aaxis=IG({},e.aaxis,{range:[f,c-h-d],side:"left",tickangle:(+e.aaxis.tickangle||0)-30,domain:[S,S+E*NT],anchor:"free",position:0,_id:"y",_length:b});z4(L,r.graphDiv._fullLayout),L.setScale();var x=r.baxis=IG({},e.baxis,{range:[c-f-d,h],side:"bottom",domain:r.xaxis.domain,anchor:"free",position:0,_id:"x",_length:b});z4(x,r.graphDiv._fullLayout),x.setScale();var C=r.caxis=IG({},e.caxis,{range:[c-f-h,d],side:"right",tickangle:(+e.caxis.tickangle||0)+30,domain:[S,S+E*NT],anchor:"free",position:0,_id:"y",_length:b});z4(C,r.graphDiv._fullLayout),C.setScale();var M="M"+v+","+(_+p)+"h"+b+"l-"+b/2+",-"+p+"Z";r.clipDef.select("path").attr("d",M),r.layers.plotbg.select("path").attr("d",M);var g="M0,"+p+"h"+b+"l-"+b/2+",-"+p+"Z";r.clipDefRelative.select("path").attr("d",g);var P=Fm(v,_);r.plotContainer.selectAll(".scatterlayer,.maplayer").attr("transform",P),r.clipDefRelative.select("path").attr("transform",null);var T=Fm(v-x._offset,_+p);r.layers.baxis.attr("transform",T),r.layers.bgrid.attr("transform",T);var z=Fm(v+b/2,_)+"rotate(30)"+Fm(0,-L._offset);r.layers.aaxis.attr("transform",z),r.layers.agrid.attr("transform",z);var O=Fm(v+b/2,_)+"rotate(-30)"+Fm(0,-C._offset);r.layers.caxis.attr("transform",O),r.layers.cgrid.attr("transform",O),r.drawAxes(!0),r.layers.aline.select("path").attr("d",L.showline?"M"+v+","+(_+p)+"l"+b/2+",-"+p:"M0,0").call(UT.stroke,L.linecolor||"#000").style("stroke-width",(L.linewidth||0)+"px"),r.layers.bline.select("path").attr("d",x.showline?"M"+v+","+(_+p)+"h"+b:"M0,0").call(UT.stroke,x.linecolor||"#000").style("stroke-width",(x.linewidth||0)+"px"),r.layers.cline.select("path").attr("d",C.showline?"M"+(v+b/2)+","+_+"l"+b/2+","+p:"M0,0").call(UT.stroke,C.linecolor||"#000").style("stroke-width",(C.linewidth||0)+"px"),r.graphDiv._context.staticPlot||r.initInteractions(),D8.setClipUrl(r.layers.frontplot,r._hasClipOnAxisFalse?null:r.clipId,r.graphDiv)};zm.drawAxes=function(e){var t=this,r=t.graphDiv,n=t.id.slice(7)+"title",i=t.layers,a=t.aaxis,o=t.baxis,s=t.caxis;if(t.drawAx(a),t.drawAx(o),t.drawAx(s),e){var l=Math.max(a.showticklabels?a.tickfont.size/2:0,(s.showticklabels?s.tickfont.size*.75:0)+(s.ticks==="outside"?s.ticklen*.87:0)),u=(o.showticklabels?o.tickfont.size:0)+(o.ticks==="outside"?o.ticklen:0)+3;i["a-title"]=RG.draw(r,"a"+n,{propContainer:a,propName:t.id+".aaxis.title.text",placeholder:R8(r,"Click to enter Component A title"),attributes:{x:t.x0+t.w/2,y:t.y0-a.title.font.size/3-l,"text-anchor":"middle"}}),i["b-title"]=RG.draw(r,"b"+n,{propContainer:o,propName:t.id+".baxis.title.text",placeholder:R8(r,"Click to enter Component B title"),attributes:{x:t.x0-u,y:t.y0+t.h+o.title.font.size*.83+u,"text-anchor":"middle"}}),i["c-title"]=RG.draw(r,"c"+n,{propContainer:s,propName:t.id+".caxis.title.text",placeholder:R8(r,"Click to enter Component C title"),attributes:{x:t.x0+t.w+u,y:t.y0+t.h+s.title.font.size*.83+u,"text-anchor":"middle"}})}};zm.drawAx=function(e){var t=this,r=t.graphDiv,n=e._name,i=n.charAt(0),a=e._id,o=t.layers[n],s=30,l=i+"tickLayout",u=I2t(e);t[l]!==u&&(o.selectAll("."+a+"tick").remove(),t[l]=u),e.setScale();var c=E_.calcTicks(e),f=E_.clipEnds(e,c),h=E_.makeTransTickFn(e),d=E_.getTickSigns(e)[2],v=By.deg2rad(s),_=d*(e.linewidth||1)/2,b=d*e.ticklen,p=t.w,k=t.h,E=i==="b"?"M0,"+_+"l"+Math.sin(v)*b+","+Math.cos(v)*b:"M"+_+",0l"+Math.cos(v)*b+","+-Math.sin(v)*b,S={a:"M0,0l"+k+",-"+p/2,b:"M0,0l-"+p/2+",-"+k,c:"M0,0l-"+k+","+p/2}[i];E_.drawTicks(r,e,{vals:e.ticks==="inside"?f:c,layer:o,path:E,transFn:h,crisp:!1}),E_.drawGrid(r,e,{vals:f,layer:t.layers[i+"grid"],path:S,transFn:h,crisp:!1}),E_.drawLabels(r,e,{vals:c,layer:o,transFn:h,labelFns:E_.makeLabelFns(e,0,s)})};function I2t(e){return e.ticks+String(e.ticklen)+String(e.showticklabels)}var yd=A2e.MINZOOM/2+.87,R2t="m-0.87,.5h"+yd+"v3h-"+(yd+5.2)+"l"+(yd/2+2.6)+",-"+(yd*.87+4.5)+"l2.6,1.5l-"+yd/2+","+yd*.87+"Z",D2t="m0.87,.5h-"+yd+"v3h"+(yd+5.2)+"l-"+(yd/2+2.6)+",-"+(yd*.87+4.5)+"l-2.6,1.5l"+yd/2+","+yd*.87+"Z",F2t="m0,1l"+yd/2+","+yd*.87+"l2.6,-1.5l-"+(yd/2+2.6)+",-"+(yd*.87+4.5)+"l-"+(yd/2+2.6)+","+(yd*.87+4.5)+"l2.6,1.5l"+yd/2+",-"+yd*.87+"Z",z2t="m0.5,0.5h5v-2h-5v-5h-2v5h-5v2h5v5h2Z",x2e=!0;zm.clearOutline=function(){P2t(this.dragOptions),L2t(this.dragOptions.gd)};zm.initInteractions=function(){var e=this,t=e.layers.plotbg.select("path").node(),r=e.graphDiv,n=r._fullLayout._zoomlayer,i,a;this.dragOptions={element:t,gd:r,plotinfo:{id:e.id,domain:r._fullLayout[e.id].domain,xaxis:e.xaxis,yaxis:e.yaxis},subplot:e.id,prepFn:function(T,z,O){e.dragOptions.xaxes=[e.xaxis],e.dragOptions.yaxes=[e.yaxis],i=r._fullLayout._invScaleX,a=r._fullLayout._invScaleY;var V=e.dragOptions.dragmode=r._fullLayout.dragmode;_2e(V)?e.dragOptions.minDrag=1:e.dragOptions.minDrag=void 0,V==="zoom"?(e.dragOptions.moveFn=x,e.dragOptions.clickFn=p,e.dragOptions.doneFn=C,k(T,z,O)):V==="pan"?(e.dragOptions.moveFn=g,e.dragOptions.clickFn=p,e.dragOptions.doneFn=P,M(),e.clearOutline(r)):(E2t(V)||_2e(V))&&k2t(T,z,O,e.dragOptions,V)}};var o,s,l,u,c,f,h,d,v,_;function b(T){var z={};return z[e.id+".aaxis.min"]=T.a,z[e.id+".baxis.min"]=T.b,z[e.id+".caxis.min"]=T.c,z}function p(T,z){var O=r._fullLayout.clickmode;b2e(r),T===2&&(r.emit("plotly_doubleclick",null),PG.call("_guiRelayout",r,b({a:0,b:0,c:0}))),O.indexOf("select")>-1&&T===1&&C2t(z,r,[e.xaxis],[e.yaxis],e.id,e.dragOptions),O.indexOf("event")>-1&&y2e.click(r,z,e.id)}function k(T,z,O){var V=t.getBoundingClientRect();o=z-V.left,s=O-V.top,r._fullLayout._calcInverseTransform(r);var G=r._fullLayout._invTransform,Z=By.apply3DTransform(G)(o,s);o=Z[0],s=Z[1],l={a:e.aaxis.range[0],b:e.baxis.range[1],c:e.caxis.range[1]},c=l,u=e.aaxis.range[1]-l.a,f=S2t(e.graphDiv._fullLayout[e.id].bgcolor).getLuminance(),h="M0,"+e.h+"L"+e.w/2+", 0L"+e.w+","+e.h+"Z",d=!1,v=n.append("path").attr("class","zoombox").attr("transform",Fm(e.x0,e.y0)).style({fill:f>.2?"rgba(0,0,0,0)":"rgba(255,255,255,0)","stroke-width":0}).attr("d",h),_=n.append("path").attr("class","zoombox-corners").attr("transform",Fm(e.x0,e.y0)).style({fill:UT.background,stroke:UT.defaultLine,"stroke-width":1,opacity:0}).attr("d","M0,0Z"),e.clearOutline(r)}function E(T,z){return 1-z/e.h}function S(T,z){return 1-(T+(e.h-z)/Math.sqrt(3))/e.w}function L(T,z){return(T-(e.h-z)/Math.sqrt(3))/e.w}function x(T,z){var O=o+T*i,V=s+z*a,G=Math.max(0,Math.min(1,E(o,s),E(O,V))),Z=Math.max(0,Math.min(1,S(o,s),S(O,V))),j=Math.max(0,Math.min(1,L(o,s),L(O,V))),N=(G/2+j)*e.w,H=(1-G/2-Z)*e.w,te=(N+H)/2,oe=H-N,_e=(1-G)*e.h,Ee=_e-oe/NT;oe<A2e.MINZOOM?(c=l,v.attr("d",h),_.attr("d","M0,0Z")):(c={a:l.a+G*u,b:l.b+Z*u,c:l.c+j*u},v.attr("d",h+"M"+N+","+_e+"H"+H+"L"+te+","+Ee+"L"+N+","+_e+"Z"),_.attr("d","M"+o+","+s+z2t+"M"+N+","+_e+R2t+"M"+H+","+_e+D2t+"M"+te+","+Ee+F2t)),d||(v.transition().style("fill",f>.2?"rgba(0,0,0,0.4)":"rgba(255,255,255,0.3)").duration(200),_.transition().style("opacity",1).duration(200),d=!0),r.emit("plotly_relayouting",b(c))}function C(){b2e(r),c!==l&&(PG.call("_guiRelayout",r,b(c)),x2e&&r.data&&r._context.showTips&&(By.notifier(R8(r,"Double-click to zoom back out"),"long"),x2e=!1))}function M(){l={a:e.aaxis.range[0],b:e.baxis.range[1],c:e.caxis.range[1]},c=l}function g(T,z){var O=T/e.xaxis._m,V=z/e.yaxis._m;c={a:l.a-V,b:l.b+(O+V)/2,c:l.c-(O-V)/2};var G=[c.a,c.b,c.c].sort(By.sorterAsc),Z={a:G.indexOf(c.a),b:G.indexOf(c.b),c:G.indexOf(c.c)};G[0]<0&&(G[1]+G[0]/2<0?(G[2]+=G[0]+G[1],G[0]=G[1]=0):(G[2]+=G[0]/2,G[1]+=G[0]/2,G[0]=0),c={a:G[Z.a],b:G[Z.b],c:G[Z.c]},z=(l.a-c.a)*e.yaxis._m,T=(l.c-c.c-l.b+c.b)*e.xaxis._m);var j=Fm(e.x0+T,e.y0+z);e.plotContainer.selectAll(".scatterlayer,.maplayer").attr("transform",j);var N=Fm(-T,-z);e.clipDefRelative.select("path").attr("transform",N),e.aaxis.range=[c.a,e.sum-c.b-c.c],e.baxis.range=[e.sum-c.a-c.c,c.b],e.caxis.range=[e.sum-c.a-c.b,c.c],e.drawAxes(!1),e._hasClipOnAxisFalse&&e.plotContainer.select(".scatterlayer").selectAll(".trace").call(D8.hideOutsideRangePoints,e),r.emit("plotly_relayouting",b(c))}function P(){PG.call("_guiRelayout",r,b(c))}t.onmousemove=function(T){y2e.hover(r,T,e.id),r._fullLayout._lasthover=t,r._fullLayout._hoversubplot=e.id},t.onmouseout=function(T){r._dragging||m2e.unhover(r,T)},m2e.init(this.dragOptions)};function b2e(e){w2e.select(e).selectAll(".zoombox,.js-zoombox-backdrop,.js-zoombox-menu,.zoombox-corners").remove()}});var zG=ye((Bcr,k2e)=>{"use strict";var O2t=Lh(),q2t=Cc().attributes,su=Rd(),B2t=mc().overrideAll,DG=Ao().extendFlat,FG={title:{text:su.title.text,font:su.title.font},color:su.color,tickmode:su.minor.tickmode,nticks:DG({},su.nticks,{dflt:6,min:1}),tick0:su.tick0,dtick:su.dtick,tickvals:su.tickvals,ticktext:su.ticktext,ticks:su.ticks,ticklen:su.ticklen,tickwidth:su.tickwidth,tickcolor:su.tickcolor,ticklabelstep:su.ticklabelstep,showticklabels:su.showticklabels,labelalias:su.labelalias,showtickprefix:su.showtickprefix,tickprefix:su.tickprefix,showticksuffix:su.showticksuffix,ticksuffix:su.ticksuffix,showexponent:su.showexponent,exponentformat:su.exponentformat,minexponent:su.minexponent,separatethousands:su.separatethousands,tickfont:su.tickfont,tickangle:su.tickangle,tickformat:su.tickformat,tickformatstops:su.tickformatstops,hoverformat:su.hoverformat,showline:DG({},su.showline,{dflt:!0}),linecolor:su.linecolor,linewidth:su.linewidth,showgrid:DG({},su.showgrid,{dflt:!0}),gridcolor:su.gridcolor,gridwidth:su.gridwidth,griddash:su.griddash,layer:su.layer,min:{valType:"number",dflt:0,min:0}},F8=k2e.exports=B2t({domain:q2t({name:"ternary"}),bgcolor:{valType:"color",dflt:O2t.background},sum:{valType:"number",dflt:1,min:0},aaxis:FG,baxis:FG,caxis:FG},"plot","from-root");F8.uirevision={valType:"any",editType:"none"};F8.aaxis.uirevision=F8.baxis.uirevision=F8.caxis.uirevision={valType:"any",editType:"none"}});var k_=ye((Ncr,C2e)=>{"use strict";var N2t=Dr(),U2t=vl(),V2t=Cc().defaults;C2e.exports=function(t,r,n,i){var a=i.type,o=i.attributes,s=i.handleDefaults,l=i.partition||"x",u=r._subplots[a],c=u.length,f=c&&u[0].replace(/\d+$/,""),h,d;function v(k,E){return N2t.coerce(h,d,o,k,E)}for(var _=0;_<c;_++){var b=u[_];t[b]?h=t[b]:h=t[b]={},d=U2t.newContainer(r,b,f),i.noUirevision||v("uirevision",r.uirevision);var p={};p[l]=[_/c,(_+1)/c],V2t(d,r,v,p),i.id=b,s(h,d,v,i)}}});var R2e=ye((Ucr,I2e)=>{"use strict";var G2t=ka(),H2t=vl(),z8=Dr(),j2t=k_(),W2t=e_(),X2t=t_(),Z2t=M3(),Y2t=bb(),K2t=t4(),P2e=zG(),L2e=["aaxis","baxis","caxis"];I2e.exports=function(t,r,n){j2t(t,r,n,{type:"ternary",attributes:P2e,handleDefaults:J2t,font:r.font,paper_bgcolor:r.paper_bgcolor})};function J2t(e,t,r,n){var i=r("bgcolor"),a=r("sum");n.bgColor=G2t.combine(i,n.paper_bgcolor);for(var o,s,l,u=0;u<L2e.length;u++)o=L2e[u],s=e[o]||{},l=H2t.newContainer(t,o),l._name=o,$2t(s,l,n,t);var c=t.aaxis,f=t.baxis,h=t.caxis;c.min+f.min+h.min>=a&&(c.min=0,f.min=0,h.min=0,e.aaxis&&delete e.aaxis.min,e.baxis&&delete e.baxis.min,e.caxis&&delete e.caxis.min)}function $2t(e,t,r,n){var i=P2e[t._name];function a(d,v){return z8.coerce(e,t,i,d,v)}a("uirevision",n.uirevision),t.type="linear";var o=a("color"),s=o!==i.color.dflt?o:r.font.color,l=t._name,u=l.charAt(0).toUpperCase(),c="Component "+u,f=a("title.text",c);t._hovertitle=f===c?f:u,z8.coerceFont(a,"title.font",r.font,{overrideDflt:{size:z8.bigFont(r.font.size),color:s}}),a("min"),Y2t(e,t,a,"linear"),X2t(e,t,a,"linear"),W2t(e,t,a,"linear",{noAutotickangles:!0,noTicklabelshift:!0,noTicklabelstandoff:!0}),Z2t(e,t,a,{outerTicks:!0});var h=a("showticklabels");h&&(z8.coerceFont(a,"tickfont",r.font,{overrideDflt:{color:s}}),a("tickangle"),a("tickformat")),K2t(e,t,a,{dfltColor:o,bgColor:r.bgColor,blend:60,showLine:!0,showGrid:!0,noZeroLine:!0,attributes:i}),a("hoverformat"),a("layer")}});var D2e=ye(W0=>{"use strict";var Q2t=E2e(),ewt=Id().getSubplotCalcData,twt=Dr().counterRegex,VT="ternary";W0.name=VT;var rwt=W0.attr="subplot";W0.idRoot=VT;W0.idRegex=W0.attrRegex=twt(VT);var iwt=W0.attributes={};iwt[rwt]={valType:"subplotid",dflt:"ternary",editType:"calc"};W0.layoutAttributes=zG();W0.supplyLayoutDefaults=R2e();W0.plot=function(t){for(var r=t._fullLayout,n=t.calcdata,i=r._subplots[VT],a=0;a<i.length;a++){var o=i[a],s=ewt(n,VT,o),l=r[o]._subplot;l||(l=new Q2t({id:o,graphDiv:t,container:r._ternarylayer.node()},r),r[o]._subplot=l),l.plot(s,r,t._promises)}};W0.clean=function(e,t,r,n){for(var i=n._subplots[VT]||[],a=0;a<i.length;a++){var o=i[a],s=n[o]._subplot;!t[o]&&s&&(s.plotContainer.remove(),s.clipDef.remove(),s.clipDefRelative.remove(),s.layers["a-title"].remove(),s.layers["b-title"].remove(),s.layers["c-title"].remove())}};W0.updateFx=function(e){var t=e._fullLayout;t._ternarylayer.selectAll("g.toplevel").style("cursor",t.dragmode==="pan"?"move":"crosshair")}});var z2e=ye((Gcr,F2e)=>{"use strict";F2e.exports={attributes:kG(),supplyDefaults:n2e(),colorbar:$d(),formatLabels:o2e(),calc:c2e(),plot:h2e(),style:op().style,styleOnSelect:op().styleOnSelect,hoverPoints:v2e(),selectPoints:hT(),eventData:g2e(),moduleType:"trace",name:"scatterternary",basePlotModule:D2e(),categories:["ternary","symbols","showLegend","scatter-like"],meta:{}}});var q2e=ye((Hcr,O2e)=>{"use strict";O2e.exports=z2e()});var OG=ye((jcr,N2e)=>{"use strict";var Fh=x4(),GT=Ao().extendFlat,B2e=df().axisHoverFormat;N2e.exports={y:Fh.y,x:Fh.x,x0:Fh.x0,y0:Fh.y0,xhoverformat:B2e("x"),yhoverformat:B2e("y"),name:GT({},Fh.name,{}),orientation:GT({},Fh.orientation,{}),bandwidth:{valType:"number",min:0,editType:"calc"},scalegroup:{valType:"string",dflt:"",editType:"calc"},scalemode:{valType:"enumerated",values:["width","count"],dflt:"width",editType:"calc"},spanmode:{valType:"enumerated",values:["soft","hard","manual"],dflt:"soft",editType:"calc"},span:{valType:"info_array",items:[{valType:"any",editType:"calc"},{valType:"any",editType:"calc"}],editType:"calc"},line:{color:{valType:"color",editType:"style"},width:{valType:"number",min:0,dflt:2,editType:"style"},editType:"plot"},fillcolor:Fh.fillcolor,points:GT({},Fh.boxpoints,{}),jitter:GT({},Fh.jitter,{}),pointpos:GT({},Fh.pointpos,{}),width:GT({},Fh.width,{}),marker:Fh.marker,text:Fh.text,hovertext:Fh.hovertext,hovertemplate:Fh.hovertemplate,hovertemplatefallback:Fh.hovertemplatefallback,quartilemethod:Fh.quartilemethod,box:{visible:{valType:"boolean",dflt:!1,editType:"plot"},width:{valType:"number",min:0,max:1,dflt:.25,editType:"plot"},fillcolor:{valType:"color",editType:"style"},line:{color:{valType:"color",editType:"style"},width:{valType:"number",min:0,editType:"style"},editType:"style"},editType:"plot"},meanline:{visible:{valType:"boolean",dflt:!1,editType:"plot"},color:{valType:"color",editType:"style"},width:{valType:"number",min:0,editType:"style"},editType:"plot"},side:{valType:"enumerated",values:["both","positive","negative"],dflt:"both",editType:"calc"},offsetgroup:Fh.offsetgroup,alignmentgroup:Fh.alignmentgroup,selected:Fh.selected,unselected:Fh.unselected,hoveron:{valType:"flaglist",flags:["violins","points","kde"],dflt:"violins+points+kde",extras:["all"],editType:"style"},zorder:Fh.zorder}});var NG=ye((Wcr,U2e)=>{"use strict";var qG=b4(),BG=Dr().extendFlat;U2e.exports={violinmode:BG({},qG.boxmode,{}),violingap:BG({},qG.boxgap,{}),violingroupgap:BG({},qG.boxgroupgap,{})}});var W2e=ye((Xcr,j2e)=>{"use strict";var V2e=Dr(),nwt=ka(),G2e=T4(),H2e=OG();j2e.exports=function(t,r,n,i){function a(L,x){return V2e.coerce(t,r,H2e,L,x)}function o(L,x){return V2e.coerce2(t,r,H2e,L,x)}if(G2e.handleSampleDefaults(t,r,a,i),r.visible!==!1){a("bandwidth"),a("side");var s=a("width");s||(a("scalegroup",r.name),a("scalemode"));var l=a("span"),u;Array.isArray(l)&&(u="manual"),a("spanmode",u);var c=a("line.color",(t.marker||{}).color||n),f=a("line.width"),h=a("fillcolor",nwt.addOpacity(r.line.color,.5));G2e.handlePointsDefaults(t,r,a,{prefix:""});var d=o("box.width"),v=o("box.fillcolor",h),_=o("box.line.color",c),b=o("box.line.width",f),p=a("box.visible",!!(d||v||_||b));p||(r.box={visible:!1});var k=o("meanline.color",c),E=o("meanline.width",f),S=a("meanline.visible",!!(k||E));S||(r.meanline={visible:!1}),a("quartilemethod"),a("zorder")}}});var Z2e=ye((Zcr,X2e)=>{"use strict";var awt=Dr(),owt=NG(),swt=ZI();X2e.exports=function(t,r,n){function i(a,o){return awt.coerce(t,r,owt,a,o)}swt._supply(t,r,n,i,"violin")}});var O8=ye(s2=>{"use strict";var lwt=Dr(),uwt={gaussian:function(e){return 1/Math.sqrt(2*Math.PI)*Math.exp(-.5*e*e)}};s2.makeKDE=function(e,t,r){var n=r.length,i=uwt.gaussian,a=e.bandwidth,o=1/(n*a);return function(s){for(var l=0,u=0;u<n;u++)l+=i((s-r[u])/a);return o*l}};s2.getPositionOnKdePath=function(e,t,r){var n,i;t.orientation==="h"?(n="y",i="x"):(n="x",i="y");var a=lwt.findPointOnPath(e.path,r,i,{pathLength:e.pathLength}),o=e.posCenterPx,s=a[n],l=t.side==="both"?2*o-s:o;return[s,l]};s2.getKdeValue=function(e,t,r){var n=e.pts.map(s2.extractVal),i=s2.makeKDE(e,t,n);return i(r)/e.posDensityScale};s2.extractVal=function(e){return e.v}});var J2e=ye((Kcr,K2e)=>{"use strict";var UG=Dr(),VG=ho(),cwt=RV(),Y2e=O8(),fwt=fs().BADNUM;K2e.exports=function(t,r){var n=cwt(t,r);if(n[0].t.empty)return n;for(var i=t._fullLayout,a=VG.getFromId(t,r[r.orientation==="h"?"xaxis":"yaxis"]),o=1/0,s=-1/0,l=0,u=0,c=0;c<n.length;c++){var f=n[c],h=f.pts.map(Y2e.extractVal),d=f.bandwidth=dwt(r,f,h),v=f.span=vwt(r,f,a,d);if(f.min===f.max&&d===0)v=f.span=[f.min,f.max],f.density=[{v:1,t:v[0]}],f.bandwidth=d,l=Math.max(l,1);else{var _=v[1]-v[0],b=Math.ceil(_/(d/3)),p=_/b;if(!isFinite(p)||!isFinite(b))return UG.error("Something went wrong with computing the violin span"),n[0].t.empty=!0,n;var k=Y2e.makeKDE(f,r,h);f.density=new Array(b+1);for(var E=0;E<f.density.length;E++){var S=v[0]+E*p,L=k(S);f.density[E]={v:L,t:S},l=Math.max(l,L)}}u=Math.max(u,h.length),o=Math.min(o,v[0]),s=Math.max(s,v[1])}var x=VG.findExtremes(a,[o,s],{padded:!0});if(r._extremes[a._id]=x,r.width)n[0].t.maxKDE=l;else{var C=i._violinScaleGroupStats,M=r.scalegroup,g=C[M];g?(g.maxKDE=Math.max(g.maxKDE,l),g.maxCount=Math.max(g.maxCount,u)):C[M]={maxKDE:l,maxCount:u}}return n[0].t.labels.kde=UG._(t,"kde:"),n};function hwt(e,t,r){var n=Math.min(t,r/1.349);return 1.059*n*Math.pow(e,-.2)}function dwt(e,t,r){var n=t.max-t.min;if(!n)return e.bandwidth?e.bandwidth:0;if(e.bandwidth)return Math.max(e.bandwidth,n/1e4);var i=r.length,a=UG.stdev(r,i-1,t.mean);return Math.max(hwt(i,a,t.q3-t.q1),n/100)}function vwt(e,t,r,n){var i=e.spanmode,a=e.span||[],o=[t.min,t.max],s=[t.min-2*n,t.max+2*n],l;function u(f){var h=a[f],d=r.type==="multicategory"?r.r2c(h):r.d2c(h,0,e[t.valLetter+"calendar"]);return d===fwt?s[f]:d}i==="soft"?l=s:i==="hard"?l=o:l=[u(0),u(1)];var c={type:"linear",range:l};return VG.setConvert(c),c.cleanRange(),l}});var ewe=ye((Jcr,Q2e)=>{"use strict";var pwt=KI().setPositionOffset,$2e=["v","h"];Q2e.exports=function(t,r){for(var n=t.calcdata,i=r.xaxis,a=r.yaxis,o=0;o<$2e.length;o++){for(var s=$2e[o],l=s==="h"?a:i,u=[],c=0;c<n.length;c++){var f=n[c],h=f[0].t,d=f[0].trace;d.visible===!0&&d.type==="violin"&&!h.empty&&d.orientation===s&&d.xaxis===i._id&&d.yaxis===a._id&&u.push(c)}pwt("violin",t,u,l)}}});var rwe=ye(($cr,twe)=>{"use strict";var GG=Oa(),HG=Dr(),gwt=So(),jG=JI(),mwt=yU(),ywt=O8();twe.exports=function(t,r,n,i){var a=t._context.staticPlot,o=t._fullLayout,s=r.xaxis,l=r.yaxis;function u(c,f){var h=mwt(c,{xaxis:s,yaxis:l,trace:f,connectGaps:!0,baseTolerance:.75,shape:"spline",simplify:!0,linearized:!0});return gwt.smoothopen(h[0],1)}HG.makeTraceGroups(i,n,"trace violins").each(function(c){var f=GG.select(this),h=c[0],d=h.t,v=h.trace;if(v.visible!==!0||d.empty){f.remove();return}var _=d.bPos,b=d.bdPos,p=r[d.valLetter+"axis"],k=r[d.posLetter+"axis"],E=v.side==="both",S=E||v.side==="positive",L=E||v.side==="negative",x=f.selectAll("path.violin").data(HG.identity);x.enter().append("path").style("vector-effect",a?"none":"non-scaling-stroke").attr("class","violin"),x.exit().remove(),x.each(function(V){var G=GG.select(this),Z=V.density,j=Z.length,N=k.c2l(V.pos+_,!0),H=k.l2p(N),te;if(v.width)te=d.maxKDE/b;else{var oe=o._violinScaleGroupStats[v.scalegroup];te=v.scalemode==="count"?oe.maxKDE/b*(oe.maxCount/V.pts.length):oe.maxKDE/b}var _e,Ee,Ce,me,ie,Se,Le;if(S){for(Se=new Array(j),me=0;me<j;me++)Le=Se[me]={},Le[d.posLetter]=N+Z[me].v/te,Le[d.valLetter]=p.c2l(Z[me].t,!0);_e=u(Se,v)}if(L){for(Se=new Array(j),ie=0,me=j-1;ie<j;ie++,me--)Le=Se[ie]={},Le[d.posLetter]=N-Z[me].v/te,Le[d.valLetter]=p.c2l(Z[me].t,!0);Ee=u(Se,v)}if(E)Ce=_e+"L"+Ee.slice(1)+"Z";else{var Ae=[H,p.c2p(Z[0].t)],Fe=[H,p.c2p(Z[j-1].t)];v.orientation==="h"&&(Ae.reverse(),Fe.reverse()),S?Ce="M"+Ae+"L"+_e.slice(1)+"L"+Fe:Ce="M"+Fe+"L"+Ee.slice(1)+"L"+Ae}G.attr("d",Ce),V.posCenterPx=H,V.posDensityScale=te*b,V.path=G.node(),V.pathLength=V.path.getTotalLength()/(E?2:1)});var C=v.box,M=C.width,g=(C.line||{}).width,P,T;E?(P=b*M,T=0):S?(P=[0,b*M/2],T=g*{x:1,y:-1}[d.posLetter]):(P=[b*M/2,0],T=g*{x:-1,y:1}[d.posLetter]),jG.plotBoxAndWhiskers(f,{pos:k,val:p},v,{bPos:_,bdPos:P,bPosPxOffset:T}),jG.plotBoxMean(f,{pos:k,val:p},v,{bPos:_,bdPos:P,bPosPxOffset:T});var z;!v.box.visible&&v.meanline.visible&&(z=HG.identity);var O=f.selectAll("path.meanline").data(z||[]);O.enter().append("path").attr("class","meanline").style("fill","none").style("vector-effect",a?"none":"non-scaling-stroke"),O.exit().remove(),O.each(function(V){var G=p.c2p(V.mean,!0),Z=ywt.getPositionOnKdePath(V,v,G);GG.select(this).attr("d",v.orientation==="h"?"M"+G+","+Z[0]+"V"+Z[1]:"M"+Z[0]+","+G+"H"+Z[1])}),jG.plotPoints(f,{x:s,y:l},v,d)})}});var awe=ye((Qcr,nwe)=>{"use strict";var iwe=Oa(),HT=ka(),_wt=op().stylePoints;nwe.exports=function(t){var r=iwe.select(t).selectAll("g.trace.violins");r.style("opacity",function(n){return n[0].trace.opacity}),r.each(function(n){var i=n[0].trace,a=iwe.select(this),o=i.box||{},s=o.line||{},l=i.meanline||{},u=l.width;a.selectAll("path.violin").style("stroke-width",i.line.width+"px").call(HT.stroke,i.line.color).call(HT.fill,i.fillcolor),a.selectAll("path.box").style("stroke-width",s.width+"px").call(HT.stroke,s.color).call(HT.fill,o.fillcolor);var c={"stroke-width":u+"px","stroke-dasharray":2*u+"px,"+u+"px"};a.selectAll("path.mean").style(c).call(HT.stroke,l.color),a.selectAll("path.meanline").style(c).call(HT.stroke,l.color),_wt(a,i,t)})}});var uwe=ye((efr,lwe)=>{"use strict";var xwt=ka(),WG=Dr(),bwt=ho(),owe=qV(),swe=O8();lwe.exports=function(t,r,n,i,a){a||(a={});var o=a.hoverLayer,s=t.cd,l=s[0].trace,u=l.hoveron,c=u.indexOf("violins")!==-1,f=u.indexOf("kde")!==-1,h=[],d,v;if(c||f){var _=owe.hoverOnBoxes(t,r,n,i);if(f&&_.length>0){var b=t.xa,p=t.ya,k,E,S,L,x;l.orientation==="h"?(x=r,k="y",S=p,E="x",L=b):(x=n,k="x",S=b,E="y",L=p);var C=s[t.index];if(x>=C.span[0]&&x<=C.span[1]){var M=WG.extendFlat({},t),g=L.c2p(x,!0),P=swe.getKdeValue(C,l,x),T=swe.getPositionOnKdePath(C,l,g),z=S._offset,O=S._length;M[k+"0"]=T[0],M[k+"1"]=T[1],M[E+"0"]=M[E+"1"]=g,M[E+"Label"]=E+": "+bwt.hoverLabelText(L,x,l[E+"hoverformat"])+", "+s[0].t.labels.kde+" "+P.toFixed(3);for(var V=0,G=0;G<_.length;G++)if(_[G].attr==="med"){V=G;break}M.spikeDistance=_[V].spikeDistance;var Z=k+"Spike";M[Z]=_[V][Z],_[V].spikeDistance=void 0,_[V][Z]=void 0,M.hovertemplate=!1,h.push(M),v={},v[k+"1"]=WG.constrain(z+T[0],z,z+O),v[k+"2"]=WG.constrain(z+T[1],z,z+O),v[E+"1"]=v[E+"2"]=L._offset+g}}c&&(h=h.concat(_))}u.indexOf("points")!==-1&&(d=owe.hoverOnPoints(t,r,n));var j=o.selectAll(".violinline-"+l.uid).data(v?[0]:[]);return j.enter().append("line").classed("violinline-"+l.uid,!0).attr("stroke-width",1.5),j.exit().remove(),j.attr(v).call(xwt.stroke,t.color),i==="closest"?d?[d]:h:(d&&h.push(d),h)}});var fwe=ye((tfr,cwe)=>{"use strict";cwe.exports={attributes:OG(),layoutAttributes:NG(),supplyDefaults:W2e(),crossTraceDefaults:T4().crossTraceDefaults,supplyLayoutDefaults:Z2e(),calc:J2e(),crossTraceCalc:ewe(),plot:rwe(),style:awe(),styleOnSelect:op().styleOnSelect,hoverPoints:uwe(),selectPoints:BV(),moduleType:"trace",name:"violin",basePlotModule:ph(),categories:["cartesian","svg","symbols","oriented","box-violin","showLegend","violinLayout","zoomScale"],meta:{}}});var dwe=ye((rfr,hwe)=>{"use strict";hwe.exports=fwe()});var pwe=ye((ifr,vwe)=>{"use strict";vwe.exports={eventDataKeys:["percentInitial","percentPrevious","percentTotal"]}});var ZG=ye((nfr,_we)=>{"use strict";var jc=Lm(),XG=pf().line,wwt=Gl(),gwe=df().axisHoverFormat,{hovertemplateAttrs:Twt,texttemplateAttrs:Awt,templatefallbackAttrs:mwe}=Ll(),ywe=pwe(),Ny=Ao().extendFlat,Swt=ka();_we.exports={x:jc.x,x0:jc.x0,dx:jc.dx,y:jc.y,y0:jc.y0,dy:jc.dy,xperiod:jc.xperiod,yperiod:jc.yperiod,xperiod0:jc.xperiod0,yperiod0:jc.yperiod0,xperiodalignment:jc.xperiodalignment,yperiodalignment:jc.yperiodalignment,xhoverformat:gwe("x"),yhoverformat:gwe("y"),hovertext:jc.hovertext,hovertemplate:Twt({},{keys:ywe.eventDataKeys}),hovertemplatefallback:mwe(),hoverinfo:Ny({},wwt.hoverinfo,{flags:["name","x","y","text","percent initial","percent previous","percent total"]}),textinfo:{valType:"flaglist",flags:["label","text","percent initial","percent previous","percent total","value"],extras:["none"],editType:"plot",arrayOk:!1},texttemplate:Awt({editType:"plot"},{keys:ywe.eventDataKeys.concat(["label","value"])}),texttemplatefallback:mwe({editType:"plot"}),text:jc.text,textposition:jc.textposition,insidetextanchor:Ny({},jc.insidetextanchor,{dflt:"middle"}),textangle:Ny({},jc.textangle,{dflt:0}),textfont:jc.textfont,insidetextfont:jc.insidetextfont,outsidetextfont:jc.outsidetextfont,constraintext:jc.constraintext,cliponaxis:jc.cliponaxis,orientation:Ny({},jc.orientation,{}),offset:Ny({},jc.offset,{arrayOk:!1}),width:Ny({},jc.width,{arrayOk:!1}),marker:Mwt(),connector:{fillcolor:{valType:"color",editType:"style"},line:{color:Ny({},XG.color,{dflt:Swt.defaultLine}),width:Ny({},XG.width,{dflt:0,editType:"plot"}),dash:XG.dash,editType:"style"},visible:{valType:"boolean",dflt:!0,editType:"plot"},editType:"plot"},offsetgroup:jc.offsetgroup,alignmentgroup:jc.alignmentgroup,zorder:jc.zorder};function Mwt(){var e=Ny({},jc.marker);return delete e.pattern,delete e.cornerradius,e}});var YG=ye((afr,xwe)=>{"use strict";xwe.exports={funnelmode:{valType:"enumerated",values:["stack","group","overlay"],dflt:"stack",editType:"calc"},funnelgap:{valType:"number",min:0,max:1,editType:"calc"},funnelgroupgap:{valType:"number",min:0,max:1,dflt:0,editType:"calc"}}});var JG=ye((ofr,wwe)=>{"use strict";var q8=Dr(),Ewt=Hb(),kwt=r0().handleText,Cwt=eT(),Lwt=Ig(),bwe=ZG(),KG=ka();function Pwt(e,t,r,n){function i(f,h){return q8.coerce(e,t,bwe,f,h)}var a=Cwt(e,t,n,i);if(!a){t.visible=!1;return}Lwt(e,t,n,i),i("xhoverformat"),i("yhoverformat"),i("orientation",t.y&&!t.x?"v":"h"),i("offset"),i("width");var o=i("text");i("hovertext"),i("hovertemplate"),i("hovertemplatefallback");var s=i("textposition");kwt(e,t,n,i,s,{moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!0,moduleHasCliponaxis:!0,moduleHasTextangle:!0,moduleHasInsideanchor:!0}),t.textposition!=="none"&&!t.texttemplate&&i("textinfo",q8.isArrayOrTypedArray(o)?"text+value":"value");var l=i("marker.color",r);i("marker.line.color",KG.defaultLine),i("marker.line.width");var u=i("connector.visible");if(u){i("connector.fillcolor",Iwt(l));var c=i("connector.line.width");c&&(i("connector.line.color"),i("connector.line.dash"))}i("zorder")}function Iwt(e){var t=q8.isArrayOrTypedArray(e)?"#000":e;return KG.addOpacity(t,.5*KG.opacity(t))}function Rwt(e,t){var r,n;function i(o){return q8.coerce(n._input,n,bwe,o)}for(var a=0;a<e.length;a++)n=e[a],n.type==="funnel"&&(r=n._input,Ewt(r,n,t,i,t.funnelmode))}wwe.exports={supplyDefaults:Pwt,crossTraceDefaults:Rwt}});var Awe=ye((sfr,Twe)=>{"use strict";var Dwt=Dr(),Fwt=YG();Twe.exports=function(e,t,r){var n=!1;function i(s,l){return Dwt.coerce(e,t,Fwt,s,l)}for(var a=0;a<r.length;a++){var o=r[a];if(o.visible&&o.type==="funnel"){n=!0;break}}n&&(i("funnelmode"),i("funnelgap",.2),i("funnelgroupgap"))}});var Mwe=ye((lfr,Swe)=>{"use strict";var jT=Dr();Swe.exports=function(t,r){for(var n=0;n<t.length;n++)t[n].i=n;jT.mergeArray(r.text,t,"tx"),jT.mergeArray(r.hovertext,t,"htx");var i=r.marker;if(i){jT.mergeArray(i.opacity,t,"mo"),jT.mergeArray(i.color,t,"mc");var a=i.line;a&&(jT.mergeArray(a.color,t,"mlc"),jT.mergeArrayCastPositive(a.width,t,"mlw"))}}});var Lwe=ye((ufr,Cwe)=>{"use strict";var Ewe=ho(),kwe=Dg(),zwt=Mwe(),Owt=z0(),O4=fs().BADNUM;Cwe.exports=function(t,r){var n=Ewe.getFromId(t,r.xaxis||"x"),i=Ewe.getFromId(t,r.yaxis||"y"),a,o,s,l,u,c,f,h;r.orientation==="h"?(a=n.makeCalcdata(r,"x"),s=i.makeCalcdata(r,"y"),l=kwe(r,i,"y",s),u=!!r.yperiodalignment,c="y"):(a=i.makeCalcdata(r,"y"),s=n.makeCalcdata(r,"x"),l=kwe(r,n,"x",s),u=!!r.xperiodalignment,c="x"),o=l.vals;var d=Math.min(o.length,a.length),v=new Array(d);for(r._base=[],f=0;f<d;f++){a[f]<0&&(a[f]=O4);var _=!1;a[f]!==O4&&f+1<d&&a[f+1]!==O4&&(_=!0),h=v[f]={p:o[f],s:a[f],cNext:_},r._base[f]=-.5*h.s,u&&(v[f].orig_p=s[f],v[f][c+"End"]=l.ends[f],v[f][c+"Start"]=l.starts[f]),r.ids&&(h.id=String(r.ids[f])),f===0&&(v[0].vTotal=0),v[0].vTotal+=$G(h.s),h.begR=$G(h.s)/$G(v[0].s)}var b;for(f=0;f<d;f++)h=v[f],h.s!==O4&&(h.sumR=h.s/v[0].vTotal,h.difR=b!==void 0?h.s/b:1,b=h.s);return zwt(v,r),Owt(v,r),v};function $G(e){return e===O4?0:e}});var Rwe=ye((cfr,Iwe)=>{"use strict";var Pwe=jb().setGroupPositions;Iwe.exports=function(t,r){var n=t._fullLayout,i=t._fullData,a=t.calcdata,o=r.xaxis,s=r.yaxis,l=[],u=[],c=[],f,h;for(h=0;h<i.length;h++){var d=i[h],v=d.orientation==="h";d.visible===!0&&d.xaxis===o._id&&d.yaxis===s._id&&d.type==="funnel"&&(f=a[h],v?c.push(f):u.push(f),l.push(f))}var _={mode:n.funnelmode,norm:n.funnelnorm,gap:n.funnelgap,groupgap:n.funnelgroupgap};for(Pwe(t,o,s,u,_),Pwe(t,s,o,c,_),h=0;h<l.length;h++){f=l[h];for(var b=0;b<f.length;b++)b+1<f.length&&(f[b].nextP0=f[b+1].p0,f[b].nextS0=f[b+1].s0,f[b].nextP1=f[b+1].p1,f[b].nextS1=f[b+1].s1)}}});var Owe=ye((ffr,zwe)=>{"use strict";var B8=Oa(),L_=Dr(),Dwe=So(),C_=fs().BADNUM,qwt=n2(),Bwt=bv().clearMinTextSize;zwe.exports=function(t,r,n,i){var a=t._fullLayout;Bwt("funnel",a),Nwt(t,r,n,i),Uwt(t,r,n,i),qwt.plot(t,r,n,i,{mode:a.funnelmode,norm:a.funnelmode,gap:a.funnelgap,groupgap:a.funnelgroupgap})};function Nwt(e,t,r,n){var i=t.xaxis,a=t.yaxis;L_.makeTraceGroups(n,r,"trace bars").each(function(o){var s=B8.select(this),l=o[0].trace,u=L_.ensureSingle(s,"g","regions");if(!l.connector||!l.connector.visible){u.remove();return}var c=l.orientation==="h",f=u.selectAll("g.region").data(L_.identity);f.enter().append("g").classed("region",!0),f.exit().remove();var h=f.size();f.each(function(d,v){if(!(v!==h-1&&!d.cNext)){var _=Fwe(d,i,a,c),b=_[0],p=_[1],k="";b[0]!==C_&&p[0]!==C_&&b[1]!==C_&&p[1]!==C_&&b[2]!==C_&&p[2]!==C_&&b[3]!==C_&&p[3]!==C_&&(c?k+="M"+b[0]+","+p[1]+"L"+b[2]+","+p[2]+"H"+b[3]+"L"+b[1]+","+p[1]+"Z":k+="M"+b[1]+","+p[1]+"L"+b[2]+","+p[3]+"V"+p[2]+"L"+b[1]+","+p[0]+"Z"),k===""&&(k="M0,0Z"),L_.ensureSingle(B8.select(this),"path").attr("d",k).call(Dwe.setClipUrl,t.layerClipId,e)}})})}function Uwt(e,t,r,n){var i=t.xaxis,a=t.yaxis;L_.makeTraceGroups(n,r,"trace bars").each(function(o){var s=B8.select(this),l=o[0].trace,u=L_.ensureSingle(s,"g","lines");if(!l.connector||!l.connector.visible||!l.connector.line.width){u.remove();return}var c=l.orientation==="h",f=u.selectAll("g.line").data(L_.identity);f.enter().append("g").classed("line",!0),f.exit().remove();var h=f.size();f.each(function(d,v){if(!(v!==h-1&&!d.cNext)){var _=Fwe(d,i,a,c),b=_[0],p=_[1],k="";b[3]!==void 0&&p[3]!==void 0&&(c?(k+="M"+b[0]+","+p[1]+"L"+b[2]+","+p[2],k+="M"+b[1]+","+p[1]+"L"+b[3]+","+p[2]):(k+="M"+b[1]+","+p[1]+"L"+b[2]+","+p[3],k+="M"+b[1]+","+p[0]+"L"+b[2]+","+p[2])),k===""&&(k="M0,0Z"),L_.ensureSingle(B8.select(this),"path").attr("d",k).call(Dwe.setClipUrl,t.layerClipId,e)}})})}function Fwe(e,t,r,n){var i=[],a=[],o=n?t:r,s=n?r:t;return i[0]=o.c2p(e.s0,!0),a[0]=s.c2p(e.p0,!0),i[1]=o.c2p(e.s1,!0),a[1]=s.c2p(e.p1,!0),i[2]=o.c2p(e.nextS0,!0),a[2]=s.c2p(e.nextP0,!0),i[3]=o.c2p(e.nextS1,!0),a[3]=s.c2p(e.nextP1,!0),n?[i,a]:[a,i]}});var Nwe=ye((hfr,Bwe)=>{"use strict";var q4=Oa(),qwe=So(),QG=ka(),Vwt=N1().DESELECTDIM,Gwt=N0(),Hwt=bv().resizeText,jwt=Gwt.styleTextPoints;function Wwt(e,t,r){var n=r||q4.select(e).selectAll('g[class^="funnellayer"]').selectAll("g.trace");Hwt(e,n,"funnel"),n.style("opacity",function(i){return i[0].trace.opacity}),n.each(function(i){var a=q4.select(this),o=i[0].trace;a.selectAll(".point > path").each(function(s){if(!s.isBlank){var l=o.marker;q4.select(this).call(QG.fill,s.mc||l.color).call(QG.stroke,s.mlc||l.line.color).call(qwe.dashLine,l.line.dash,s.mlw||l.line.width).style("opacity",o.selectedpoints&&!s.selected?Vwt:1)}}),jwt(a,o,e),a.selectAll(".regions").each(function(){q4.select(this).selectAll("path").style("stroke-width",0).call(QG.fill,o.connector.fillcolor)}),a.selectAll(".lines").each(function(){var s=o.connector.line;qwe.lineGroupStyle(q4.select(this).selectAll("path"),s.width,s.color,s.dash)})})}Bwe.exports={style:Wwt}});var Gwe=ye((dfr,Vwe)=>{"use strict";var Uwe=ka().opacity,Xwt=ET().hoverOnBars,eH=Dr().formatPercent;Vwe.exports=function(t,r,n,i,a){var o=Xwt(t,r,n,i,a);if(o){var s=o.cd,l=s[0].trace,u=l.orientation==="h",c=o.index,f=s[c],h=u?"x":"y";o[h+"LabelVal"]=f.s,o.percentInitial=f.begR,o.percentInitialLabel=eH(f.begR,1),o.percentPrevious=f.difR,o.percentPreviousLabel=eH(f.difR,1),o.percentTotal=f.sumR,o.percentTotalLabel=eH(f.sumR,1);var d=f.hi||l.hoverinfo,v=[];if(d&&d!=="none"&&d!=="skip"){var _=d==="all",b=d.split("+"),p=function(k){return _||b.indexOf(k)!==-1};p("percent initial")&&v.push(o.percentInitialLabel+" of initial"),p("percent previous")&&v.push(o.percentPreviousLabel+" of previous"),p("percent total")&&v.push(o.percentTotalLabel+" of total")}return o.extraText=v.join("<br>"),o.color=Zwt(l,f),[o]}};function Zwt(e,t){var r=e.marker,n=t.mc||r.color,i=t.mlc||r.line.color,a=t.mlw||r.line.width;if(Uwe(n))return n;if(Uwe(i)&&a)return i}});var jwe=ye((vfr,Hwe)=>{"use strict";Hwe.exports=function(t,r){return t.x="xVal"in r?r.xVal:r.x,t.y="yVal"in r?r.yVal:r.y,"percentInitial"in r&&(t.percentInitial=r.percentInitial),"percentPrevious"in r&&(t.percentPrevious=r.percentPrevious),"percentTotal"in r&&(t.percentTotal=r.percentTotal),r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),t}});var Xwe=ye((pfr,Wwe)=>{"use strict";Wwe.exports={attributes:ZG(),layoutAttributes:YG(),supplyDefaults:JG().supplyDefaults,crossTraceDefaults:JG().crossTraceDefaults,supplyLayoutDefaults:Awe(),calc:Lwe(),crossTraceCalc:Rwe(),plot:Owe(),style:Nwe().style,hoverPoints:Gwe(),eventData:jwe(),selectPoints:kT(),moduleType:"trace",name:"funnel",basePlotModule:ph(),categories:["bar-like","cartesian","svg","oriented","showLegend","zoomScale"],meta:{}}});var Ywe=ye((gfr,Zwe)=>{"use strict";Zwe.exports=Xwe()});var Jwe=ye((mfr,Kwe)=>{"use strict";Kwe.exports={eventDataKeys:["initial","delta","final"]}});var iH=ye((yfr,t3e)=>{"use strict";var _c=Lm(),tH=pf().line,Ywt=Gl(),$we=df().axisHoverFormat,{hovertemplateAttrs:Kwt,texttemplateAttrs:Jwt,templatefallbackAttrs:Qwe}=Ll(),e3e=Jwe(),WT=Ao().extendFlat,$wt=ka();function rH(e){return{marker:{color:WT({},_c.marker.color,{arrayOk:!1,editType:"style"}),line:{color:WT({},_c.marker.line.color,{arrayOk:!1,editType:"style"}),width:WT({},_c.marker.line.width,{arrayOk:!1,editType:"style"}),editType:"style"},editType:"style"},editType:"style"}}t3e.exports={measure:{valType:"data_array",dflt:[],editType:"calc"},base:{valType:"number",dflt:null,arrayOk:!1,editType:"calc"},x:_c.x,x0:_c.x0,dx:_c.dx,y:_c.y,y0:_c.y0,dy:_c.dy,xperiod:_c.xperiod,yperiod:_c.yperiod,xperiod0:_c.xperiod0,yperiod0:_c.yperiod0,xperiodalignment:_c.xperiodalignment,yperiodalignment:_c.yperiodalignment,xhoverformat:$we("x"),yhoverformat:$we("y"),hovertext:_c.hovertext,hovertemplate:Kwt({},{keys:e3e.eventDataKeys}),hovertemplatefallback:Qwe(),hoverinfo:WT({},Ywt.hoverinfo,{flags:["name","x","y","text","initial","delta","final"]}),textinfo:{valType:"flaglist",flags:["label","text","initial","delta","final"],extras:["none"],editType:"plot",arrayOk:!1},texttemplate:Jwt({editType:"plot"},{keys:e3e.eventDataKeys.concat(["label"])}),texttemplatefallback:Qwe({editType:"plot"}),text:_c.text,textposition:_c.textposition,insidetextanchor:_c.insidetextanchor,textangle:_c.textangle,textfont:_c.textfont,insidetextfont:_c.insidetextfont,outsidetextfont:_c.outsidetextfont,constraintext:_c.constraintext,cliponaxis:_c.cliponaxis,orientation:_c.orientation,offset:_c.offset,width:_c.width,increasing:rH("increasing"),decreasing:rH("decreasing"),totals:rH("intermediate sums and total"),connector:{line:{color:WT({},tH.color,{dflt:$wt.defaultLine}),width:WT({},tH.width,{editType:"plot"}),dash:tH.dash,editType:"plot"},mode:{valType:"enumerated",values:["spanning","between"],dflt:"between",editType:"plot"},visible:{valType:"boolean",dflt:!0,editType:"plot"},editType:"plot"},offsetgroup:_c.offsetgroup,alignmentgroup:_c.alignmentgroup,zorder:_c.zorder}});var nH=ye((_fr,r3e)=>{"use strict";r3e.exports={waterfallmode:{valType:"enumerated",values:["group","overlay"],dflt:"group",editType:"calc"},waterfallgap:{valType:"number",min:0,max:1,editType:"calc"},waterfallgroupgap:{valType:"number",min:0,max:1,dflt:0,editType:"calc"}}});var XT=ye((xfr,i3e)=>{"use strict";i3e.exports={INCREASING:{COLOR:"#3D9970",SYMBOL:"\u25B2"},DECREASING:{COLOR:"#FF4136",SYMBOL:"\u25BC"}}});var oH=ye((bfr,s3e)=>{"use strict";var n3e=Dr(),Qwt=Hb(),e3t=r0().handleText,t3t=eT(),r3t=Ig(),a3e=iH(),i3t=ka(),o3e=XT(),n3t=o3e.INCREASING.COLOR,a3t=o3e.DECREASING.COLOR,o3t="#4499FF";function aH(e,t,r){e(t+".marker.color",r),e(t+".marker.line.color",i3t.defaultLine),e(t+".marker.line.width")}function s3t(e,t,r,n){function i(u,c){return n3e.coerce(e,t,a3e,u,c)}var a=t3t(e,t,n,i);if(!a){t.visible=!1;return}r3t(e,t,n,i),i("xhoverformat"),i("yhoverformat"),i("measure"),i("orientation",t.x&&!t.y?"h":"v"),i("base"),i("offset"),i("width"),i("text"),i("hovertext"),i("hovertemplate"),i("hovertemplatefallback");var o=i("textposition");e3t(e,t,n,i,o,{moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!0,moduleHasCliponaxis:!0,moduleHasTextangle:!0,moduleHasInsideanchor:!0}),t.textposition!=="none"&&(i("texttemplate"),i("texttemplatefallback"),t.texttemplate||i("textinfo")),aH(i,"increasing",n3t),aH(i,"decreasing",a3t),aH(i,"totals",o3t);var s=i("connector.visible");if(s){i("connector.mode");var l=i("connector.line.width");l&&(i("connector.line.color"),i("connector.line.dash"))}i("zorder")}function l3t(e,t){var r,n;function i(o){return n3e.coerce(n._input,n,a3e,o)}if(t.waterfallmode==="group")for(var a=0;a<e.length;a++)n=e[a],r=n._input,Qwt(r,n,t,i,t.waterfallmode)}s3e.exports={supplyDefaults:s3t,crossTraceDefaults:l3t}});var u3e=ye((wfr,l3e)=>{"use strict";var u3t=Dr(),c3t=nH();l3e.exports=function(e,t,r){var n=!1;function i(s,l){return u3t.coerce(e,t,c3t,s,l)}for(var a=0;a<r.length;a++){var o=r[a];if(o.visible&&o.type==="waterfall"){n=!0;break}}n&&(i("waterfallmode"),i("waterfallgap",.2),i("waterfallgroupgap"))}});var p3e=ye((Tfr,v3e)=>{"use strict";var c3e=ho(),f3e=Dg(),h3e=Dr().mergeArray,f3t=z0(),d3e=fs().BADNUM;function sH(e){return e==="a"||e==="absolute"}function lH(e){return e==="t"||e==="total"}v3e.exports=function(t,r){var n=c3e.getFromId(t,r.xaxis||"x"),i=c3e.getFromId(t,r.yaxis||"y"),a,o,s,l,u,c;r.orientation==="h"?(a=n.makeCalcdata(r,"x"),s=i.makeCalcdata(r,"y"),l=f3e(r,i,"y",s),u=!!r.yperiodalignment,c="y"):(a=i.makeCalcdata(r,"y"),s=n.makeCalcdata(r,"x"),l=f3e(r,n,"x",s),u=!!r.xperiodalignment,c="x"),o=l.vals;for(var f=Math.min(o.length,a.length),h=new Array(f),d=0,v,_=!1,b=0;b<f;b++){var p=a[b]||0,k=!1;(a[b]!==d3e||lH(r.measure[b])||sH(r.measure[b]))&&b+1<f&&(a[b+1]!==d3e||lH(r.measure[b+1])||sH(r.measure[b+1]))&&(k=!0);var E=h[b]={i:b,p:o[b],s:p,rawS:p,cNext:k};sH(r.measure[b])?(d=E.s,E.isSum=!0,E.dir="totals",E.s=d):lH(r.measure[b])?(E.isSum=!0,E.dir="totals",E.s=d):(E.isSum=!1,E.dir=E.rawS<0?"decreasing":"increasing",v=E.s,E.s=d+v,d+=v),E.dir==="totals"&&(_=!0),u&&(h[b].orig_p=s[b],h[b][c+"End"]=l.ends[b],h[b][c+"Start"]=l.starts[b]),r.ids&&(E.id=String(r.ids[b])),E.v=(r.base||0)+d}return h.length&&(h[0].hasTotals=_),h3e(r.text,h,"tx"),h3e(r.hovertext,h,"htx"),f3t(h,r),h}});var y3e=ye((Afr,m3e)=>{"use strict";var g3e=jb().setGroupPositions;m3e.exports=function(t,r){var n=t._fullLayout,i=t._fullData,a=t.calcdata,o=r.xaxis,s=r.yaxis,l=[],u=[],c=[],f,h;for(h=0;h<i.length;h++){var d=i[h];d.visible===!0&&d.xaxis===o._id&&d.yaxis===s._id&&d.type==="waterfall"&&(f=a[h],d.orientation==="h"?c.push(f):u.push(f),l.push(f))}var v={mode:n.waterfallmode,norm:n.waterfallnorm,gap:n.waterfallgap,groupgap:n.waterfallgroupgap};for(g3e(t,o,s,u,v),g3e(t,s,o,c,v),h=0;h<l.length;h++){f=l[h];for(var _=0;_<f.length;_++){var b=f[_];b.isSum===!1&&(b.s0+=_===0?0:f[_-1].s),_+1<f.length&&(f[_].nextP0=f[_+1].p0,f[_].nextS0=f[_+1].s0)}}}});var b3e=ye((Sfr,x3e)=>{"use strict";var _3e=Oa(),N8=Dr(),h3t=So(),ZT=fs().BADNUM,d3t=n2(),v3t=bv().clearMinTextSize;x3e.exports=function(t,r,n,i){var a=t._fullLayout;v3t("waterfall",a),d3t.plot(t,r,n,i,{mode:a.waterfallmode,norm:a.waterfallmode,gap:a.waterfallgap,groupgap:a.waterfallgroupgap}),p3t(t,r,n,i)};function p3t(e,t,r,n){var i=t.xaxis,a=t.yaxis;N8.makeTraceGroups(n,r,"trace bars").each(function(o){var s=_3e.select(this),l=o[0].trace,u=N8.ensureSingle(s,"g","lines");if(!l.connector||!l.connector.visible){u.remove();return}var c=l.orientation==="h",f=l.connector.mode,h=u.selectAll("g.line").data(N8.identity);h.enter().append("g").classed("line",!0),h.exit().remove();var d=h.size();h.each(function(v,_){if(!(_!==d-1&&!v.cNext)){var b=g3t(v,i,a,c),p=b[0],k=b[1],E="";p[0]!==ZT&&k[0]!==ZT&&p[1]!==ZT&&k[1]!==ZT&&(f==="spanning"&&!v.isSum&&_>0&&(c?E+="M"+p[0]+","+k[1]+"V"+k[0]:E+="M"+p[1]+","+k[0]+"H"+p[0]),f!=="between"&&(v.isSum||_<d-1)&&(c?E+="M"+p[1]+","+k[0]+"V"+k[1]:E+="M"+p[0]+","+k[1]+"H"+p[1]),p[2]!==ZT&&k[2]!==ZT&&(c?E+="M"+p[1]+","+k[1]+"V"+k[2]:E+="M"+p[1]+","+k[1]+"H"+p[2])),E===""&&(E="M0,0Z"),N8.ensureSingle(_3e.select(this),"path").attr("d",E).call(h3t.setClipUrl,t.layerClipId,e)}})})}function g3t(e,t,r,n){var i=[],a=[],o=n?t:r,s=n?r:t;return i[0]=o.c2p(e.s0,!0),a[0]=s.c2p(e.p0,!0),i[1]=o.c2p(e.s1,!0),a[1]=s.c2p(e.p1,!0),i[2]=o.c2p(e.nextS0,!0),a[2]=s.c2p(e.nextP0,!0),n?[i,a]:[a,i]}});var S3e=ye((Mfr,A3e)=>{"use strict";var U8=Oa(),w3e=So(),T3e=ka(),m3t=N1().DESELECTDIM,y3t=N0(),_3t=bv().resizeText,x3t=y3t.styleTextPoints;function b3t(e,t,r){var n=r||U8.select(e).selectAll('g[class^="waterfalllayer"]').selectAll("g.trace");_3t(e,n,"waterfall"),n.style("opacity",function(i){return i[0].trace.opacity}),n.each(function(i){var a=U8.select(this),o=i[0].trace;a.selectAll(".point > path").each(function(s){if(!s.isBlank){var l=o[s.dir].marker;U8.select(this).call(T3e.fill,l.color).call(T3e.stroke,l.line.color).call(w3e.dashLine,l.line.dash,l.line.width).style("opacity",o.selectedpoints&&!s.selected?m3t:1)}}),x3t(a,o,e),a.selectAll(".lines").each(function(){var s=o.connector.line;w3e.lineGroupStyle(U8.select(this).selectAll("path"),s.width,s.color,s.dash)})})}A3e.exports={style:b3t}});var L3e=ye((Efr,C3e)=>{"use strict";var w3t=ho().hoverLabelText,M3e=ka().opacity,T3t=ET().hoverOnBars,E3e=XT(),k3e={increasing:E3e.INCREASING.SYMBOL,decreasing:E3e.DECREASING.SYMBOL};C3e.exports=function(t,r,n,i,a){var o=T3t(t,r,n,i,a);if(!o)return;var s=o.cd,l=s[0].trace,u=l.orientation==="h",c=u?"x":"y",f=u?t.xa:t.ya;function h(x){return w3t(f,x,l[c+"hoverformat"])}var d=o.index,v=s[d],_=v.isSum?v.b+v.s:v.rawS;o.initial=v.b+v.s-_,o.delta=_,o.final=o.initial+o.delta;var b=h(Math.abs(o.delta));o.deltaLabel=_<0?"("+b+")":b,o.finalLabel=h(o.final),o.initialLabel=h(o.initial);var p=v.hi||l.hoverinfo,k=[];if(p&&p!=="none"&&p!=="skip"){var E=p==="all",S=p.split("+"),L=function(x){return E||S.indexOf(x)!==-1};v.isSum||(L("final")&&(u?!L("x"):!L("y"))&&k.push(o.finalLabel),L("delta")&&(_<0?k.push(o.deltaLabel+" "+k3e.decreasing):k.push(o.deltaLabel+" "+k3e.increasing)),L("initial")&&k.push("Initial: "+o.initialLabel))}return k.length&&(o.extraText=k.join("<br>")),o.color=A3t(l,v),[o]};function A3t(e,t){var r=e[t.dir].marker,n=r.color,i=r.line.color,a=r.line.width;if(M3e(n))return n;if(M3e(i)&&a)return i}});var I3e=ye((kfr,P3e)=>{"use strict";P3e.exports=function(t,r){return t.x="xVal"in r?r.xVal:r.x,t.y="yVal"in r?r.yVal:r.y,"initial"in r&&(t.initial=r.initial),"delta"in r&&(t.delta=r.delta),"final"in r&&(t.final=r.final),r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),t}});var D3e=ye((Cfr,R3e)=>{"use strict";R3e.exports={attributes:iH(),layoutAttributes:nH(),supplyDefaults:oH().supplyDefaults,crossTraceDefaults:oH().crossTraceDefaults,supplyLayoutDefaults:u3e(),calc:p3e(),crossTraceCalc:y3e(),plot:b3e(),style:S3e().style,hoverPoints:L3e(),eventData:I3e(),selectPoints:kT(),moduleType:"trace",name:"waterfall",basePlotModule:ph(),categories:["bar-like","cartesian","svg","oriented","showLegend","zoomScale"],meta:{}}});var z3e=ye((Lfr,F3e)=>{"use strict";F3e.exports=D3e()});var YT=ye((Pfr,O3e)=>{"use strict";O3e.exports={colormodel:{rgb:{min:[0,0,0],max:[255,255,255],fmt:function(e){return e.slice(0,3)},suffix:["","",""]},rgba:{min:[0,0,0,0],max:[255,255,255,1],fmt:function(e){return e.slice(0,4)},suffix:["","","",""]},rgba256:{colormodel:"rgba",zminDflt:[0,0,0,0],zmaxDflt:[255,255,255,255],min:[0,0,0,0],max:[255,255,255,1],fmt:function(e){return e.slice(0,4)},suffix:["","","",""]},hsl:{min:[0,0,0],max:[360,100,100],fmt:function(e){var t=e.slice(0,3);return t[1]=t[1]+"%",t[2]=t[2]+"%",t},suffix:["\xB0","%","%"]},hsla:{min:[0,0,0,0],max:[360,100,100,1],fmt:function(e){var t=e.slice(0,4);return t[1]=t[1]+"%",t[2]=t[2]+"%",t},suffix:["\xB0","%","%",""]}}}});var uH=ye((Ifr,B3e)=>{"use strict";var S3t=Gl(),M3t=pf().zorder,{hovertemplateAttrs:E3t,templatefallbackAttrs:k3t}=Ll(),q3e=Ao().extendFlat,C3t=YT().colormodel,N4=["rgb","rgba","rgba256","hsl","hsla"],L3t=[],P3t=[];for(KT=0;KT<N4.length;KT++)B4=C3t[N4[KT]],L3t.push("For the `"+N4[KT]+"` colormodel, it is ["+(B4.zminDflt||B4.min).join(", ")+"]."),P3t.push("For the `"+N4[KT]+"` colormodel, it is ["+(B4.zmaxDflt||B4.max).join(", ")+"].");var B4,KT;B3e.exports=q3e({source:{valType:"string",editType:"calc"},z:{valType:"data_array",editType:"calc"},colormodel:{valType:"enumerated",values:N4,editType:"calc"},zsmooth:{valType:"enumerated",values:["fast",!1],dflt:!1,editType:"plot"},zmin:{valType:"info_array",items:[{valType:"number",editType:"calc"},{valType:"number",editType:"calc"},{valType:"number",editType:"calc"},{valType:"number",editType:"calc"}],editType:"calc"},zmax:{valType:"info_array",items:[{valType:"number",editType:"calc"},{valType:"number",editType:"calc"},{valType:"number",editType:"calc"},{valType:"number",editType:"calc"}],editType:"calc"},x0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes"},y0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes"},dx:{valType:"number",dflt:1,editType:"calc",description:"Set the pixel's horizontal size."},dy:{valType:"number",dflt:1,editType:"calc",description:"Set the pixel's vertical size"},text:{valType:"data_array",editType:"plot"},hovertext:{valType:"data_array",editType:"plot"},hoverinfo:q3e({},S3t.hoverinfo,{flags:["x","y","z","color","name","text"],dflt:"x+y+z+text+name"}),hovertemplate:E3t({},{keys:["z","color","colormodel"]}),hovertemplatefallback:k3t(),zorder:M3t})});var V3e=ye((Rfr,U3e)=>{"use strict";var I3t=Dr(),R3t=uH(),N3e=YT(),D3t=Py().IMAGE_URL_PREFIX;U3e.exports=function(t,r){function n(o,s){return I3t.coerce(t,r,R3t,o,s)}n("source"),r.source&&!r.source.match(D3t)&&delete r.source,r._hasSource=!!r.source;var i=n("z");if(r._hasZ=!(i===void 0||!i.length||!i[0]||!i[0].length),!r._hasZ&&!r._hasSource){r.visible=!1;return}n("x0"),n("y0"),n("dx"),n("dy");var a;r._hasZ?(n("colormodel","rgb"),a=N3e.colormodel[r.colormodel],n("zmin",a.zminDflt||a.min),n("zmax",a.zmaxDflt||a.max)):r._hasSource&&(r.colormodel="rgba256",a=N3e.colormodel[r.colormodel],r.zmin=a.zminDflt,r.zmax=a.zmaxDflt),n("zsmooth"),n("text"),n("hovertext"),n("hovertemplate"),n("hovertemplatefallback"),r._length=null,n("zorder")}});var Uy=ye((Dfr,cH)=>{typeof Object.create=="function"?cH.exports=function(t,r){r&&(t.super_=r,t.prototype=Object.create(r.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}))}:cH.exports=function(t,r){if(r){t.super_=r;var n=function(){};n.prototype=r.prototype,t.prototype=new n,t.prototype.constructor=t}}});var fH=ye((Ffr,G3e)=>{G3e.exports=pb().EventEmitter});var W3e=ye(V8=>{"use strict";V8.byteLength=z3t;V8.toByteArray=q3t;V8.fromByteArray=U3t;var Om=[],X0=[],F3t=typeof Uint8Array!="undefined"?Uint8Array:Array,hH="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";for(l2=0,H3e=hH.length;l2<H3e;++l2)Om[l2]=hH[l2],X0[hH.charCodeAt(l2)]=l2;var l2,H3e;X0[45]=62;X0[95]=63;function j3e(e){var t=e.length;if(t%4>0)throw new Error("Invalid string. Length must be a multiple of 4");var r=e.indexOf("=");r===-1&&(r=t);var n=r===t?0:4-r%4;return[r,n]}function z3t(e){var t=j3e(e),r=t[0],n=t[1];return(r+n)*3/4-n}function O3t(e,t,r){return(t+r)*3/4-r}function q3t(e){var t,r=j3e(e),n=r[0],i=r[1],a=new F3t(O3t(e,n,i)),o=0,s=i>0?n-4:n,l;for(l=0;l<s;l+=4)t=X0[e.charCodeAt(l)]<<18|X0[e.charCodeAt(l+1)]<<12|X0[e.charCodeAt(l+2)]<<6|X0[e.charCodeAt(l+3)],a[o++]=t>>16&255,a[o++]=t>>8&255,a[o++]=t&255;return i===2&&(t=X0[e.charCodeAt(l)]<<2|X0[e.charCodeAt(l+1)]>>4,a[o++]=t&255),i===1&&(t=X0[e.charCodeAt(l)]<<10|X0[e.charCodeAt(l+1)]<<4|X0[e.charCodeAt(l+2)]>>2,a[o++]=t>>8&255,a[o++]=t&255),a}function B3t(e){return Om[e>>18&63]+Om[e>>12&63]+Om[e>>6&63]+Om[e&63]}function N3t(e,t,r){for(var n,i=[],a=t;a<r;a+=3)n=(e[a]<<16&16711680)+(e[a+1]<<8&65280)+(e[a+2]&255),i.push(B3t(n));return i.join("")}function U3t(e){for(var t,r=e.length,n=r%3,i=[],a=16383,o=0,s=r-n;o<s;o+=a)i.push(N3t(e,o,o+a>s?s:o+a));return n===1?(t=e[r-1],i.push(Om[t>>2]+Om[t<<4&63]+"==")):n===2&&(t=(e[r-2]<<8)+e[r-1],i.push(Om[t>>10]+Om[t>>4&63]+Om[t<<2&63]+"=")),i.join("")}});var X3e=ye(dH=>{dH.read=function(e,t,r,n,i){var a,o,s=i*8-n-1,l=(1<<s)-1,u=l>>1,c=-7,f=r?i-1:0,h=r?-1:1,d=e[t+f];for(f+=h,a=d&(1<<-c)-1,d>>=-c,c+=s;c>0;a=a*256+e[t+f],f+=h,c-=8);for(o=a&(1<<-c)-1,a>>=-c,c+=n;c>0;o=o*256+e[t+f],f+=h,c-=8);if(a===0)a=1-u;else{if(a===l)return o?NaN:(d?-1:1)*(1/0);o=o+Math.pow(2,n),a=a-u}return(d?-1:1)*o*Math.pow(2,a-n)};dH.write=function(e,t,r,n,i,a){var o,s,l,u=a*8-i-1,c=(1<<u)-1,f=c>>1,h=i===23?Math.pow(2,-24)-Math.pow(2,-77):0,d=n?0:a-1,v=n?1:-1,_=t<0||t===0&&1/t<0?1:0;for(t=Math.abs(t),isNaN(t)||t===1/0?(s=isNaN(t)?1:0,o=c):(o=Math.floor(Math.log(t)/Math.LN2),t*(l=Math.pow(2,-o))<1&&(o--,l*=2),o+f>=1?t+=h/l:t+=h*Math.pow(2,1-f),t*l>=2&&(o++,l/=2),o+f>=c?(s=0,o=c):o+f>=1?(s=(t*l-1)*Math.pow(2,i),o=o+f):(s=t*Math.pow(2,f-1)*Math.pow(2,i),o=0));i>=8;e[r+d]=s&255,d+=v,s/=256,i-=8);for(o=o<<i|s,u+=i;u>0;e[r+d]=o&255,d+=v,o/=256,u-=8);e[r+d-v]|=_*128}});var c2=ye(eA=>{"use strict";var vH=W3e(),$T=X3e(),Z3e=typeof Symbol=="function"&&typeof Symbol.for=="function"?Symbol.for("nodejs.util.inspect.custom"):null;eA.Buffer=ea;eA.SlowBuffer=X3t;eA.INSPECT_MAX_BYTES=50;var G8=2147483647;eA.kMaxLength=G8;ea.TYPED_ARRAY_SUPPORT=V3t();!ea.TYPED_ARRAY_SUPPORT&&typeof console!="undefined"&&typeof console.error=="function"&&console.error("This browser lacks typed array (Uint8Array) support which is required by `buffer` v5.x. Use `buffer` v4.x if you require old browser support.");function V3t(){try{let e=new Uint8Array(1),t={foo:function(){return 42}};return Object.setPrototypeOf(t,Uint8Array.prototype),Object.setPrototypeOf(e,t),e.foo()===42}catch(e){return!1}}Object.defineProperty(ea.prototype,"parent",{enumerable:!0,get:function(){if(ea.isBuffer(this))return this.buffer}});Object.defineProperty(ea.prototype,"offset",{enumerable:!0,get:function(){if(ea.isBuffer(this))return this.byteOffset}});function Vy(e){if(e>G8)throw new RangeError('The value "'+e+'" is invalid for option "size"');let t=new Uint8Array(e);return Object.setPrototypeOf(t,ea.prototype),t}function ea(e,t,r){if(typeof e=="number"){if(typeof t=="string")throw new TypeError('The "string" argument must be of type string. Received type number');return yH(e)}return $3e(e,t,r)}ea.poolSize=8192;function $3e(e,t,r){if(typeof e=="string")return H3t(e,t);if(ArrayBuffer.isView(e))return j3t(e);if(e==null)throw new TypeError("The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type "+typeof e);if(qm(e,ArrayBuffer)||e&&qm(e.buffer,ArrayBuffer)||typeof SharedArrayBuffer!="undefined"&&(qm(e,SharedArrayBuffer)||e&&qm(e.buffer,SharedArrayBuffer)))return gH(e,t,r);if(typeof e=="number")throw new TypeError('The "value" argument must not be of type number. Received type number');let n=e.valueOf&&e.valueOf();if(n!=null&&n!==e)return ea.from(n,t,r);let i=W3t(e);if(i)return i;if(typeof Symbol!="undefined"&&Symbol.toPrimitive!=null&&typeof e[Symbol.toPrimitive]=="function")return ea.from(e[Symbol.toPrimitive]("string"),t,r);throw new TypeError("The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type "+typeof e)}ea.from=function(e,t,r){return $3e(e,t,r)};Object.setPrototypeOf(ea.prototype,Uint8Array.prototype);Object.setPrototypeOf(ea,Uint8Array);function Q3e(e){if(typeof e!="number")throw new TypeError('"size" argument must be of type number');if(e<0)throw new RangeError('The value "'+e+'" is invalid for option "size"')}function G3t(e,t,r){return Q3e(e),e<=0?Vy(e):t!==void 0?typeof r=="string"?Vy(e).fill(t,r):Vy(e).fill(t):Vy(e)}ea.alloc=function(e,t,r){return G3t(e,t,r)};function yH(e){return Q3e(e),Vy(e<0?0:_H(e)|0)}ea.allocUnsafe=function(e){return yH(e)};ea.allocUnsafeSlow=function(e){return yH(e)};function H3t(e,t){if((typeof t!="string"||t==="")&&(t="utf8"),!ea.isEncoding(t))throw new TypeError("Unknown encoding: "+t);let r=eTe(e,t)|0,n=Vy(r),i=n.write(e,t);return i!==r&&(n=n.slice(0,i)),n}function pH(e){let t=e.length<0?0:_H(e.length)|0,r=Vy(t);for(let n=0;n<t;n+=1)r[n]=e[n]&255;return r}function j3t(e){if(qm(e,Uint8Array)){let t=new Uint8Array(e);return gH(t.buffer,t.byteOffset,t.byteLength)}return pH(e)}function gH(e,t,r){if(t<0||e.byteLength<t)throw new RangeError('"offset" is outside of buffer bounds');if(e.byteLength<t+(r||0))throw new RangeError('"length" is outside of buffer bounds');let n;return t===void 0&&r===void 0?n=new Uint8Array(e):r===void 0?n=new Uint8Array(e,t):n=new Uint8Array(e,t,r),Object.setPrototypeOf(n,ea.prototype),n}function W3t(e){if(ea.isBuffer(e)){let t=_H(e.length)|0,r=Vy(t);return r.length===0||e.copy(r,0,0,t),r}if(e.length!==void 0)return typeof e.length!="number"||bH(e.length)?Vy(0):pH(e);if(e.type==="Buffer"&&Array.isArray(e.data))return pH(e.data)}function _H(e){if(e>=G8)throw new RangeError("Attempt to allocate Buffer larger than maximum size: 0x"+G8.toString(16)+" bytes");return e|0}function X3t(e){return+e!=e&&(e=0),ea.alloc(+e)}ea.isBuffer=function(t){return t!=null&&t._isBuffer===!0&&t!==ea.prototype};ea.compare=function(t,r){if(qm(t,Uint8Array)&&(t=ea.from(t,t.offset,t.byteLength)),qm(r,Uint8Array)&&(r=ea.from(r,r.offset,r.byteLength)),!ea.isBuffer(t)||!ea.isBuffer(r))throw new TypeError('The "buf1", "buf2" arguments must be one of type Buffer or Uint8Array');if(t===r)return 0;let n=t.length,i=r.length;for(let a=0,o=Math.min(n,i);a<o;++a)if(t[a]!==r[a]){n=t[a],i=r[a];break}return n<i?-1:i<n?1:0};ea.isEncoding=function(t){switch(String(t).toLowerCase()){case"hex":case"utf8":case"utf-8":case"ascii":case"latin1":case"binary":case"base64":case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return!0;default:return!1}};ea.concat=function(t,r){if(!Array.isArray(t))throw new TypeError('"list" argument must be an Array of Buffers');if(t.length===0)return ea.alloc(0);let n;if(r===void 0)for(r=0,n=0;n<t.length;++n)r+=t[n].length;let i=ea.allocUnsafe(r),a=0;for(n=0;n<t.length;++n){let o=t[n];if(qm(o,Uint8Array))a+o.length>i.length?(ea.isBuffer(o)||(o=ea.from(o)),o.copy(i,a)):Uint8Array.prototype.set.call(i,o,a);else if(ea.isBuffer(o))o.copy(i,a);else throw new TypeError('"list" argument must be an Array of Buffers');a+=o.length}return i};function eTe(e,t){if(ea.isBuffer(e))return e.length;if(ArrayBuffer.isView(e)||qm(e,ArrayBuffer))return e.byteLength;if(typeof e!="string")throw new TypeError('The "string" argument must be one of type string, Buffer, or ArrayBuffer. Received type '+typeof e);let r=e.length,n=arguments.length>2&&arguments[2]===!0;if(!n&&r===0)return 0;let i=!1;for(;;)switch(t){case"ascii":case"latin1":case"binary":return r;case"utf8":case"utf-8":return mH(e).length;case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return r*2;case"hex":return r>>>1;case"base64":return uTe(e).length;default:if(i)return n?-1:mH(e).length;t=(""+t).toLowerCase(),i=!0}}ea.byteLength=eTe;function Z3t(e,t,r){let n=!1;if((t===void 0||t<0)&&(t=0),t>this.length||((r===void 0||r>this.length)&&(r=this.length),r<=0)||(r>>>=0,t>>>=0,r<=t))return"";for(e||(e="utf8");;)switch(e){case"hex":return nTt(this,t,r);case"utf8":case"utf-8":return rTe(this,t,r);case"ascii":return rTt(this,t,r);case"latin1":case"binary":return iTt(this,t,r);case"base64":return eTt(this,t,r);case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return aTt(this,t,r);default:if(n)throw new TypeError("Unknown encoding: "+e);e=(e+"").toLowerCase(),n=!0}}ea.prototype._isBuffer=!0;function u2(e,t,r){let n=e[t];e[t]=e[r],e[r]=n}ea.prototype.swap16=function(){let t=this.length;if(t%2!==0)throw new RangeError("Buffer size must be a multiple of 16-bits");for(let r=0;r<t;r+=2)u2(this,r,r+1);return this};ea.prototype.swap32=function(){let t=this.length;if(t%4!==0)throw new RangeError("Buffer size must be a multiple of 32-bits");for(let r=0;r<t;r+=4)u2(this,r,r+3),u2(this,r+1,r+2);return this};ea.prototype.swap64=function(){let t=this.length;if(t%8!==0)throw new RangeError("Buffer size must be a multiple of 64-bits");for(let r=0;r<t;r+=8)u2(this,r,r+7),u2(this,r+1,r+6),u2(this,r+2,r+5),u2(this,r+3,r+4);return this};ea.prototype.toString=function(){let t=this.length;return t===0?"":arguments.length===0?rTe(this,0,t):Z3t.apply(this,arguments)};ea.prototype.toLocaleString=ea.prototype.toString;ea.prototype.equals=function(t){if(!ea.isBuffer(t))throw new TypeError("Argument must be a Buffer");return this===t?!0:ea.compare(this,t)===0};ea.prototype.inspect=function(){let t="",r=eA.INSPECT_MAX_BYTES;return t=this.toString("hex",0,r).replace(/(.{2})/g,"$1 ").trim(),this.length>r&&(t+=" ... "),"<Buffer "+t+">"};Z3e&&(ea.prototype[Z3e]=ea.prototype.inspect);ea.prototype.compare=function(t,r,n,i,a){if(qm(t,Uint8Array)&&(t=ea.from(t,t.offset,t.byteLength)),!ea.isBuffer(t))throw new TypeError('The "target" argument must be one of type Buffer or Uint8Array. Received type '+typeof t);if(r===void 0&&(r=0),n===void 0&&(n=t?t.length:0),i===void 0&&(i=0),a===void 0&&(a=this.length),r<0||n>t.length||i<0||a>this.length)throw new RangeError("out of range index");if(i>=a&&r>=n)return 0;if(i>=a)return-1;if(r>=n)return 1;if(r>>>=0,n>>>=0,i>>>=0,a>>>=0,this===t)return 0;let o=a-i,s=n-r,l=Math.min(o,s),u=this.slice(i,a),c=t.slice(r,n);for(let f=0;f<l;++f)if(u[f]!==c[f]){o=u[f],s=c[f];break}return o<s?-1:s<o?1:0};function tTe(e,t,r,n,i){if(e.length===0)return-1;if(typeof r=="string"?(n=r,r=0):r>2147483647?r=2147483647:r<-2147483648&&(r=-2147483648),r=+r,bH(r)&&(r=i?0:e.length-1),r<0&&(r=e.length+r),r>=e.length){if(i)return-1;r=e.length-1}else if(r<0)if(i)r=0;else return-1;if(typeof t=="string"&&(t=ea.from(t,n)),ea.isBuffer(t))return t.length===0?-1:Y3e(e,t,r,n,i);if(typeof t=="number")return t=t&255,typeof Uint8Array.prototype.indexOf=="function"?i?Uint8Array.prototype.indexOf.call(e,t,r):Uint8Array.prototype.lastIndexOf.call(e,t,r):Y3e(e,[t],r,n,i);throw new TypeError("val must be string, number or Buffer")}function Y3e(e,t,r,n,i){let a=1,o=e.length,s=t.length;if(n!==void 0&&(n=String(n).toLowerCase(),n==="ucs2"||n==="ucs-2"||n==="utf16le"||n==="utf-16le")){if(e.length<2||t.length<2)return-1;a=2,o/=2,s/=2,r/=2}function l(c,f){return a===1?c[f]:c.readUInt16BE(f*a)}let u;if(i){let c=-1;for(u=r;u<o;u++)if(l(e,u)===l(t,c===-1?0:u-c)){if(c===-1&&(c=u),u-c+1===s)return c*a}else c!==-1&&(u-=u-c),c=-1}else for(r+s>o&&(r=o-s),u=r;u>=0;u--){let c=!0;for(let f=0;f<s;f++)if(l(e,u+f)!==l(t,f)){c=!1;break}if(c)return u}return-1}ea.prototype.includes=function(t,r,n){return this.indexOf(t,r,n)!==-1};ea.prototype.indexOf=function(t,r,n){return tTe(this,t,r,n,!0)};ea.prototype.lastIndexOf=function(t,r,n){return tTe(this,t,r,n,!1)};function Y3t(e,t,r,n){r=Number(r)||0;let i=e.length-r;n?(n=Number(n),n>i&&(n=i)):n=i;let a=t.length;n>a/2&&(n=a/2);let o;for(o=0;o<n;++o){let s=parseInt(t.substr(o*2,2),16);if(bH(s))return o;e[r+o]=s}return o}function K3t(e,t,r,n){return H8(mH(t,e.length-r),e,r,n)}function J3t(e,t,r,n){return H8(uTt(t),e,r,n)}function $3t(e,t,r,n){return H8(uTe(t),e,r,n)}function Q3t(e,t,r,n){return H8(cTt(t,e.length-r),e,r,n)}ea.prototype.write=function(t,r,n,i){if(r===void 0)i="utf8",n=this.length,r=0;else if(n===void 0&&typeof r=="string")i=r,n=this.length,r=0;else if(isFinite(r))r=r>>>0,isFinite(n)?(n=n>>>0,i===void 0&&(i="utf8")):(i=n,n=void 0);else throw new Error("Buffer.write(string, encoding, offset[, length]) is no longer supported");let a=this.length-r;if((n===void 0||n>a)&&(n=a),t.length>0&&(n<0||r<0)||r>this.length)throw new RangeError("Attempt to write outside buffer bounds");i||(i="utf8");let o=!1;for(;;)switch(i){case"hex":return Y3t(this,t,r,n);case"utf8":case"utf-8":return K3t(this,t,r,n);case"ascii":case"latin1":case"binary":return J3t(this,t,r,n);case"base64":return $3t(this,t,r,n);case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return Q3t(this,t,r,n);default:if(o)throw new TypeError("Unknown encoding: "+i);i=(""+i).toLowerCase(),o=!0}};ea.prototype.toJSON=function(){return{type:"Buffer",data:Array.prototype.slice.call(this._arr||this,0)}};function eTt(e,t,r){return t===0&&r===e.length?vH.fromByteArray(e):vH.fromByteArray(e.slice(t,r))}function rTe(e,t,r){r=Math.min(e.length,r);let n=[],i=t;for(;i<r;){let a=e[i],o=null,s=a>239?4:a>223?3:a>191?2:1;if(i+s<=r){let l,u,c,f;switch(s){case 1:a<128&&(o=a);break;case 2:l=e[i+1],(l&192)===128&&(f=(a&31)<<6|l&63,f>127&&(o=f));break;case 3:l=e[i+1],u=e[i+2],(l&192)===128&&(u&192)===128&&(f=(a&15)<<12|(l&63)<<6|u&63,f>2047&&(f<55296||f>57343)&&(o=f));break;case 4:l=e[i+1],u=e[i+2],c=e[i+3],(l&192)===128&&(u&192)===128&&(c&192)===128&&(f=(a&15)<<18|(l&63)<<12|(u&63)<<6|c&63,f>65535&&f<1114112&&(o=f))}}o===null?(o=65533,s=1):o>65535&&(o-=65536,n.push(o>>>10&1023|55296),o=56320|o&1023),n.push(o),i+=s}return tTt(n)}var K3e=4096;function tTt(e){let t=e.length;if(t<=K3e)return String.fromCharCode.apply(String,e);let r="",n=0;for(;n<t;)r+=String.fromCharCode.apply(String,e.slice(n,n+=K3e));return r}function rTt(e,t,r){let n="";r=Math.min(e.length,r);for(let i=t;i<r;++i)n+=String.fromCharCode(e[i]&127);return n}function iTt(e,t,r){let n="";r=Math.min(e.length,r);for(let i=t;i<r;++i)n+=String.fromCharCode(e[i]);return n}function nTt(e,t,r){let n=e.length;(!t||t<0)&&(t=0),(!r||r<0||r>n)&&(r=n);let i="";for(let a=t;a<r;++a)i+=fTt[e[a]];return i}function aTt(e,t,r){let n=e.slice(t,r),i="";for(let a=0;a<n.length-1;a+=2)i+=String.fromCharCode(n[a]+n[a+1]*256);return i}ea.prototype.slice=function(t,r){let n=this.length;t=~~t,r=r===void 0?n:~~r,t<0?(t+=n,t<0&&(t=0)):t>n&&(t=n),r<0?(r+=n,r<0&&(r=0)):r>n&&(r=n),r<t&&(r=t);let i=this.subarray(t,r);return Object.setPrototypeOf(i,ea.prototype),i};function ev(e,t,r){if(e%1!==0||e<0)throw new RangeError("offset is not uint");if(e+t>r)throw new RangeError("Trying to access beyond buffer length")}ea.prototype.readUintLE=ea.prototype.readUIntLE=function(t,r,n){t=t>>>0,r=r>>>0,n||ev(t,r,this.length);let i=this[t],a=1,o=0;for(;++o<r&&(a*=256);)i+=this[t+o]*a;return i};ea.prototype.readUintBE=ea.prototype.readUIntBE=function(t,r,n){t=t>>>0,r=r>>>0,n||ev(t,r,this.length);let i=this[t+--r],a=1;for(;r>0&&(a*=256);)i+=this[t+--r]*a;return i};ea.prototype.readUint8=ea.prototype.readUInt8=function(t,r){return t=t>>>0,r||ev(t,1,this.length),this[t]};ea.prototype.readUint16LE=ea.prototype.readUInt16LE=function(t,r){return t=t>>>0,r||ev(t,2,this.length),this[t]|this[t+1]<<8};ea.prototype.readUint16BE=ea.prototype.readUInt16BE=function(t,r){return t=t>>>0,r||ev(t,2,this.length),this[t]<<8|this[t+1]};ea.prototype.readUint32LE=ea.prototype.readUInt32LE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),(this[t]|this[t+1]<<8|this[t+2]<<16)+this[t+3]*16777216};ea.prototype.readUint32BE=ea.prototype.readUInt32BE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),this[t]*16777216+(this[t+1]<<16|this[t+2]<<8|this[t+3])};ea.prototype.readBigUInt64LE=P_(function(t){t=t>>>0,QT(t,"offset");let r=this[t],n=this[t+7];(r===void 0||n===void 0)&&U4(t,this.length-8);let i=r+this[++t]*2**8+this[++t]*2**16+this[++t]*2**24,a=this[++t]+this[++t]*2**8+this[++t]*2**16+n*2**24;return BigInt(i)+(BigInt(a)<<BigInt(32))});ea.prototype.readBigUInt64BE=P_(function(t){t=t>>>0,QT(t,"offset");let r=this[t],n=this[t+7];(r===void 0||n===void 0)&&U4(t,this.length-8);let i=r*2**24+this[++t]*2**16+this[++t]*2**8+this[++t],a=this[++t]*2**24+this[++t]*2**16+this[++t]*2**8+n;return(BigInt(i)<<BigInt(32))+BigInt(a)});ea.prototype.readIntLE=function(t,r,n){t=t>>>0,r=r>>>0,n||ev(t,r,this.length);let i=this[t],a=1,o=0;for(;++o<r&&(a*=256);)i+=this[t+o]*a;return a*=128,i>=a&&(i-=Math.pow(2,8*r)),i};ea.prototype.readIntBE=function(t,r,n){t=t>>>0,r=r>>>0,n||ev(t,r,this.length);let i=r,a=1,o=this[t+--i];for(;i>0&&(a*=256);)o+=this[t+--i]*a;return a*=128,o>=a&&(o-=Math.pow(2,8*r)),o};ea.prototype.readInt8=function(t,r){return t=t>>>0,r||ev(t,1,this.length),this[t]&128?(255-this[t]+1)*-1:this[t]};ea.prototype.readInt16LE=function(t,r){t=t>>>0,r||ev(t,2,this.length);let n=this[t]|this[t+1]<<8;return n&32768?n|4294901760:n};ea.prototype.readInt16BE=function(t,r){t=t>>>0,r||ev(t,2,this.length);let n=this[t+1]|this[t]<<8;return n&32768?n|4294901760:n};ea.prototype.readInt32LE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),this[t]|this[t+1]<<8|this[t+2]<<16|this[t+3]<<24};ea.prototype.readInt32BE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),this[t]<<24|this[t+1]<<16|this[t+2]<<8|this[t+3]};ea.prototype.readBigInt64LE=P_(function(t){t=t>>>0,QT(t,"offset");let r=this[t],n=this[t+7];(r===void 0||n===void 0)&&U4(t,this.length-8);let i=this[t+4]+this[t+5]*2**8+this[t+6]*2**16+(n<<24);return(BigInt(i)<<BigInt(32))+BigInt(r+this[++t]*2**8+this[++t]*2**16+this[++t]*2**24)});ea.prototype.readBigInt64BE=P_(function(t){t=t>>>0,QT(t,"offset");let r=this[t],n=this[t+7];(r===void 0||n===void 0)&&U4(t,this.length-8);let i=(r<<24)+this[++t]*2**16+this[++t]*2**8+this[++t];return(BigInt(i)<<BigInt(32))+BigInt(this[++t]*2**24+this[++t]*2**16+this[++t]*2**8+n)});ea.prototype.readFloatLE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),$T.read(this,t,!0,23,4)};ea.prototype.readFloatBE=function(t,r){return t=t>>>0,r||ev(t,4,this.length),$T.read(this,t,!1,23,4)};ea.prototype.readDoubleLE=function(t,r){return t=t>>>0,r||ev(t,8,this.length),$T.read(this,t,!0,52,8)};ea.prototype.readDoubleBE=function(t,r){return t=t>>>0,r||ev(t,8,this.length),$T.read(this,t,!1,52,8)};function Rp(e,t,r,n,i,a){if(!ea.isBuffer(e))throw new TypeError('"buffer" argument must be a Buffer instance');if(t>i||t<a)throw new RangeError('"value" argument is out of bounds');if(r+n>e.length)throw new RangeError("Index out of range")}ea.prototype.writeUintLE=ea.prototype.writeUIntLE=function(t,r,n,i){if(t=+t,r=r>>>0,n=n>>>0,!i){let s=Math.pow(2,8*n)-1;Rp(this,t,r,n,s,0)}let a=1,o=0;for(this[r]=t&255;++o<n&&(a*=256);)this[r+o]=t/a&255;return r+n};ea.prototype.writeUintBE=ea.prototype.writeUIntBE=function(t,r,n,i){if(t=+t,r=r>>>0,n=n>>>0,!i){let s=Math.pow(2,8*n)-1;Rp(this,t,r,n,s,0)}let a=n-1,o=1;for(this[r+a]=t&255;--a>=0&&(o*=256);)this[r+a]=t/o&255;return r+n};ea.prototype.writeUint8=ea.prototype.writeUInt8=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,1,255,0),this[r]=t&255,r+1};ea.prototype.writeUint16LE=ea.prototype.writeUInt16LE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,2,65535,0),this[r]=t&255,this[r+1]=t>>>8,r+2};ea.prototype.writeUint16BE=ea.prototype.writeUInt16BE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,2,65535,0),this[r]=t>>>8,this[r+1]=t&255,r+2};ea.prototype.writeUint32LE=ea.prototype.writeUInt32LE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,4,4294967295,0),this[r+3]=t>>>24,this[r+2]=t>>>16,this[r+1]=t>>>8,this[r]=t&255,r+4};ea.prototype.writeUint32BE=ea.prototype.writeUInt32BE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,4,4294967295,0),this[r]=t>>>24,this[r+1]=t>>>16,this[r+2]=t>>>8,this[r+3]=t&255,r+4};function iTe(e,t,r,n,i){lTe(t,n,i,e,r,7);let a=Number(t&BigInt(4294967295));e[r++]=a,a=a>>8,e[r++]=a,a=a>>8,e[r++]=a,a=a>>8,e[r++]=a;let o=Number(t>>BigInt(32)&BigInt(4294967295));return e[r++]=o,o=o>>8,e[r++]=o,o=o>>8,e[r++]=o,o=o>>8,e[r++]=o,r}function nTe(e,t,r,n,i){lTe(t,n,i,e,r,7);let a=Number(t&BigInt(4294967295));e[r+7]=a,a=a>>8,e[r+6]=a,a=a>>8,e[r+5]=a,a=a>>8,e[r+4]=a;let o=Number(t>>BigInt(32)&BigInt(4294967295));return e[r+3]=o,o=o>>8,e[r+2]=o,o=o>>8,e[r+1]=o,o=o>>8,e[r]=o,r+8}ea.prototype.writeBigUInt64LE=P_(function(t,r=0){return iTe(this,t,r,BigInt(0),BigInt("0xffffffffffffffff"))});ea.prototype.writeBigUInt64BE=P_(function(t,r=0){return nTe(this,t,r,BigInt(0),BigInt("0xffffffffffffffff"))});ea.prototype.writeIntLE=function(t,r,n,i){if(t=+t,r=r>>>0,!i){let l=Math.pow(2,8*n-1);Rp(this,t,r,n,l-1,-l)}let a=0,o=1,s=0;for(this[r]=t&255;++a<n&&(o*=256);)t<0&&s===0&&this[r+a-1]!==0&&(s=1),this[r+a]=(t/o>>0)-s&255;return r+n};ea.prototype.writeIntBE=function(t,r,n,i){if(t=+t,r=r>>>0,!i){let l=Math.pow(2,8*n-1);Rp(this,t,r,n,l-1,-l)}let a=n-1,o=1,s=0;for(this[r+a]=t&255;--a>=0&&(o*=256);)t<0&&s===0&&this[r+a+1]!==0&&(s=1),this[r+a]=(t/o>>0)-s&255;return r+n};ea.prototype.writeInt8=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,1,127,-128),t<0&&(t=255+t+1),this[r]=t&255,r+1};ea.prototype.writeInt16LE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,2,32767,-32768),this[r]=t&255,this[r+1]=t>>>8,r+2};ea.prototype.writeInt16BE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,2,32767,-32768),this[r]=t>>>8,this[r+1]=t&255,r+2};ea.prototype.writeInt32LE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,4,2147483647,-2147483648),this[r]=t&255,this[r+1]=t>>>8,this[r+2]=t>>>16,this[r+3]=t>>>24,r+4};ea.prototype.writeInt32BE=function(t,r,n){return t=+t,r=r>>>0,n||Rp(this,t,r,4,2147483647,-2147483648),t<0&&(t=4294967295+t+1),this[r]=t>>>24,this[r+1]=t>>>16,this[r+2]=t>>>8,this[r+3]=t&255,r+4};ea.prototype.writeBigInt64LE=P_(function(t,r=0){return iTe(this,t,r,-BigInt("0x8000000000000000"),BigInt("0x7fffffffffffffff"))});ea.prototype.writeBigInt64BE=P_(function(t,r=0){return nTe(this,t,r,-BigInt("0x8000000000000000"),BigInt("0x7fffffffffffffff"))});function aTe(e,t,r,n,i,a){if(r+n>e.length)throw new RangeError("Index out of range");if(r<0)throw new RangeError("Index out of range")}function oTe(e,t,r,n,i){return t=+t,r=r>>>0,i||aTe(e,t,r,4,34028234663852886e22,-34028234663852886e22),$T.write(e,t,r,n,23,4),r+4}ea.prototype.writeFloatLE=function(t,r,n){return oTe(this,t,r,!0,n)};ea.prototype.writeFloatBE=function(t,r,n){return oTe(this,t,r,!1,n)};function sTe(e,t,r,n,i){return t=+t,r=r>>>0,i||aTe(e,t,r,8,17976931348623157e292,-17976931348623157e292),$T.write(e,t,r,n,52,8),r+8}ea.prototype.writeDoubleLE=function(t,r,n){return sTe(this,t,r,!0,n)};ea.prototype.writeDoubleBE=function(t,r,n){return sTe(this,t,r,!1,n)};ea.prototype.copy=function(t,r,n,i){if(!ea.isBuffer(t))throw new TypeError("argument should be a Buffer");if(n||(n=0),!i&&i!==0&&(i=this.length),r>=t.length&&(r=t.length),r||(r=0),i>0&&i<n&&(i=n),i===n||t.length===0||this.length===0)return 0;if(r<0)throw new RangeError("targetStart out of bounds");if(n<0||n>=this.length)throw new RangeError("Index out of range");if(i<0)throw new RangeError("sourceEnd out of bounds");i>this.length&&(i=this.length),t.length-r<i-n&&(i=t.length-r+n);let a=i-n;return this===t&&typeof Uint8Array.prototype.copyWithin=="function"?this.copyWithin(r,n,i):Uint8Array.prototype.set.call(t,this.subarray(n,i),r),a};ea.prototype.fill=function(t,r,n,i){if(typeof t=="string"){if(typeof r=="string"?(i=r,r=0,n=this.length):typeof n=="string"&&(i=n,n=this.length),i!==void 0&&typeof i!="string")throw new TypeError("encoding must be a string");if(typeof i=="string"&&!ea.isEncoding(i))throw new TypeError("Unknown encoding: "+i);if(t.length===1){let o=t.charCodeAt(0);(i==="utf8"&&o<128||i==="latin1")&&(t=o)}}else typeof t=="number"?t=t&255:typeof t=="boolean"&&(t=Number(t));if(r<0||this.length<r||this.length<n)throw new RangeError("Out of range index");if(n<=r)return this;r=r>>>0,n=n===void 0?this.length:n>>>0,t||(t=0);let a;if(typeof t=="number")for(a=r;a<n;++a)this[a]=t;else{let o=ea.isBuffer(t)?t:ea.from(t,i),s=o.length;if(s===0)throw new TypeError('The value "'+t+'" is invalid for argument "value"');for(a=0;a<n-r;++a)this[a+r]=o[a%s]}return this};var JT={};function xH(e,t,r){JT[e]=class extends r{constructor(){super(),Object.defineProperty(this,"message",{value:t.apply(this,arguments),writable:!0,configurable:!0}),this.name=`${this.name} [${e}]`,this.stack,delete this.name}get code(){return e}set code(i){Object.defineProperty(this,"code",{configurable:!0,enumerable:!0,value:i,writable:!0})}toString(){return`${this.name} [${e}]: ${this.message}`}}}xH("ERR_BUFFER_OUT_OF_BOUNDS",function(e){return e?`${e} is outside of buffer bounds`:"Attempt to access memory outside buffer bounds"},RangeError);xH("ERR_INVALID_ARG_TYPE",function(e,t){return`The "${e}" argument must be of type number. Received type ${typeof t}`},TypeError);xH("ERR_OUT_OF_RANGE",function(e,t,r){let n=`The value of "${e}" is out of range.`,i=r;return Number.isInteger(r)&&Math.abs(r)>2**32?i=J3e(String(r)):typeof r=="bigint"&&(i=String(r),(r>BigInt(2)**BigInt(32)||r<-(BigInt(2)**BigInt(32)))&&(i=J3e(i)),i+="n"),n+=` It must be ${t}. Received ${i}`,n},RangeError);function J3e(e){let t="",r=e.length,n=e[0]==="-"?1:0;for(;r>=n+4;r-=3)t=`_${e.slice(r-3,r)}${t}`;return`${e.slice(0,r)}${t}`}function oTt(e,t,r){QT(t,"offset"),(e[t]===void 0||e[t+r]===void 0)&&U4(t,e.length-(r+1))}function lTe(e,t,r,n,i,a){if(e>r||e<t){let o=typeof t=="bigint"?"n":"",s;throw a>3?t===0||t===BigInt(0)?s=`>= 0${o} and < 2${o} ** ${(a+1)*8}${o}`:s=`>= -(2${o} ** ${(a+1)*8-1}${o}) and < 2 ** ${(a+1)*8-1}${o}`:s=`>= ${t}${o} and <= ${r}${o}`,new JT.ERR_OUT_OF_RANGE("value",s,e)}oTt(n,i,a)}function QT(e,t){if(typeof e!="number")throw new JT.ERR_INVALID_ARG_TYPE(t,"number",e)}function U4(e,t,r){throw Math.floor(e)!==e?(QT(e,r),new JT.ERR_OUT_OF_RANGE(r||"offset","an integer",e)):t<0?new JT.ERR_BUFFER_OUT_OF_BOUNDS:new JT.ERR_OUT_OF_RANGE(r||"offset",`>= ${r?1:0} and <= ${t}`,e)}var sTt=/[^+/0-9A-Za-z-_]/g;function lTt(e){if(e=e.split("=")[0],e=e.trim().replace(sTt,""),e.length<2)return"";for(;e.length%4!==0;)e=e+"=";return e}function mH(e,t){t=t||1/0;let r,n=e.length,i=null,a=[];for(let o=0;o<n;++o){if(r=e.charCodeAt(o),r>55295&&r<57344){if(!i){if(r>56319){(t-=3)>-1&&a.push(239,191,189);continue}else if(o+1===n){(t-=3)>-1&&a.push(239,191,189);continue}i=r;continue}if(r<56320){(t-=3)>-1&&a.push(239,191,189),i=r;continue}r=(i-55296<<10|r-56320)+65536}else i&&(t-=3)>-1&&a.push(239,191,189);if(i=null,r<128){if((t-=1)<0)break;a.push(r)}else if(r<2048){if((t-=2)<0)break;a.push(r>>6|192,r&63|128)}else if(r<65536){if((t-=3)<0)break;a.push(r>>12|224,r>>6&63|128,r&63|128)}else if(r<1114112){if((t-=4)<0)break;a.push(r>>18|240,r>>12&63|128,r>>6&63|128,r&63|128)}else throw new Error("Invalid code point")}return a}function uTt(e){let t=[];for(let r=0;r<e.length;++r)t.push(e.charCodeAt(r)&255);return t}function cTt(e,t){let r,n,i,a=[];for(let o=0;o<e.length&&!((t-=2)<0);++o)r=e.charCodeAt(o),n=r>>8,i=r%256,a.push(i),a.push(n);return a}function uTe(e){return vH.toByteArray(lTt(e))}function H8(e,t,r,n){let i;for(i=0;i<n&&!(i+r>=t.length||i>=e.length);++i)t[i+r]=e[i];return i}function qm(e,t){return e instanceof t||e!=null&&e.constructor!=null&&e.constructor.name!=null&&e.constructor.name===t.name}function bH(e){return e!==e}var fTt=function(){let e="0123456789abcdef",t=new Array(256);for(let r=0;r<16;++r){let n=r*16;for(let i=0;i<16;++i)t[n+i]=e[r]+e[i]}return t}();function P_(e){return typeof BigInt=="undefined"?hTt:e}function hTt(){throw new Error("BigInt not supported")}});var j8=ye((Nfr,cTe)=>{"use strict";cTe.exports=function(){if(typeof Symbol!="function"||typeof Object.getOwnPropertySymbols!="function")return!1;if(typeof Symbol.iterator=="symbol")return!0;var t={},r=Symbol("test"),n=Object(r);if(typeof r=="string"||Object.prototype.toString.call(r)!=="[object Symbol]"||Object.prototype.toString.call(n)!=="[object Symbol]")return!1;var i=42;t[r]=i;for(var a in t)return!1;if(typeof Object.keys=="function"&&Object.keys(t).length!==0||typeof Object.getOwnPropertyNames=="function"&&Object.getOwnPropertyNames(t).length!==0)return!1;var o=Object.getOwnPropertySymbols(t);if(o.length!==1||o[0]!==r||!Object.prototype.propertyIsEnumerable.call(t,r))return!1;if(typeof Object.getOwnPropertyDescriptor=="function"){var s=Object.getOwnPropertyDescriptor(t,r);if(s.value!==i||s.enumerable!==!0)return!1}return!0}});var V4=ye((Ufr,fTe)=>{"use strict";var dTt=j8();fTe.exports=function(){return dTt()&&!!Symbol.toStringTag}});var wH=ye((Vfr,hTe)=>{"use strict";hTe.exports=Object});var vTe=ye((Gfr,dTe)=>{"use strict";dTe.exports=Error});var gTe=ye((Hfr,pTe)=>{"use strict";pTe.exports=EvalError});var yTe=ye((jfr,mTe)=>{"use strict";mTe.exports=RangeError});var xTe=ye((Wfr,_Te)=>{"use strict";_Te.exports=ReferenceError});var TH=ye((Xfr,bTe)=>{"use strict";bTe.exports=SyntaxError});var tA=ye((Zfr,wTe)=>{"use strict";wTe.exports=TypeError});var ATe=ye((Yfr,TTe)=>{"use strict";TTe.exports=URIError});var MTe=ye((Kfr,STe)=>{"use strict";STe.exports=Math.abs});var kTe=ye((Jfr,ETe)=>{"use strict";ETe.exports=Math.floor});var LTe=ye(($fr,CTe)=>{"use strict";CTe.exports=Math.max});var ITe=ye((Qfr,PTe)=>{"use strict";PTe.exports=Math.min});var DTe=ye((ehr,RTe)=>{"use strict";RTe.exports=Math.pow});var zTe=ye((thr,FTe)=>{"use strict";FTe.exports=Math.round});var qTe=ye((rhr,OTe)=>{"use strict";OTe.exports=Number.isNaN||function(t){return t!==t}});var NTe=ye((ihr,BTe)=>{"use strict";var vTt=qTe();BTe.exports=function(t){return vTt(t)||t===0?t:t<0?-1:1}});var VTe=ye((nhr,UTe)=>{"use strict";UTe.exports=Object.getOwnPropertyDescriptor});var f2=ye((ahr,GTe)=>{"use strict";var W8=VTe();if(W8)try{W8([],"length")}catch(e){W8=null}GTe.exports=W8});var G4=ye((ohr,HTe)=>{"use strict";var X8=Object.defineProperty||!1;if(X8)try{X8({},"a",{value:1})}catch(e){X8=!1}HTe.exports=X8});var XTe=ye((shr,WTe)=>{"use strict";var jTe=typeof Symbol!="undefined"&&Symbol,pTt=j8();WTe.exports=function(){return typeof jTe!="function"||typeof Symbol!="function"||typeof jTe("foo")!="symbol"||typeof Symbol("bar")!="symbol"?!1:pTt()}});var AH=ye((lhr,ZTe)=>{"use strict";ZTe.exports=typeof Reflect!="undefined"&&Reflect.getPrototypeOf||null});var SH=ye((uhr,YTe)=>{"use strict";var gTt=wH();YTe.exports=gTt.getPrototypeOf||null});var $Te=ye((chr,JTe)=>{"use strict";var mTt="Function.prototype.bind called on incompatible ",yTt=Object.prototype.toString,_Tt=Math.max,xTt="[object Function]",KTe=function(t,r){for(var n=[],i=0;i<t.length;i+=1)n[i]=t[i];for(var a=0;a<r.length;a+=1)n[a+t.length]=r[a];return n},bTt=function(t,r){for(var n=[],i=r||0,a=0;i<t.length;i+=1,a+=1)n[a]=t[i];return n},wTt=function(e,t){for(var r="",n=0;n<e.length;n+=1)r+=e[n],n+1<e.length&&(r+=t);return r};JTe.exports=function(t){var r=this;if(typeof r!="function"||yTt.apply(r)!==xTt)throw new TypeError(mTt+r);for(var n=bTt(arguments,1),i,a=function(){if(this instanceof i){var c=r.apply(this,KTe(n,arguments));return Object(c)===c?c:this}return r.apply(t,KTe(n,arguments))},o=_Tt(0,r.length-n.length),s=[],l=0;l<o;l++)s[l]="$"+l;if(i=Function("binder","return function ("+wTt(s,",")+"){ return binder.apply(this,arguments); }")(a),r.prototype){var u=function(){};u.prototype=r.prototype,i.prototype=new u,u.prototype=null}return i}});var rA=ye((fhr,QTe)=>{"use strict";var TTt=$Te();QTe.exports=Function.prototype.bind||TTt});var Z8=ye((hhr,eAe)=>{"use strict";eAe.exports=Function.prototype.call});var MH=ye((dhr,tAe)=>{"use strict";tAe.exports=Function.prototype.apply});var iAe=ye((vhr,rAe)=>{"use strict";rAe.exports=typeof Reflect!="undefined"&&Reflect&&Reflect.apply});var aAe=ye((phr,nAe)=>{"use strict";var ATt=rA(),STt=MH(),MTt=Z8(),ETt=iAe();nAe.exports=ETt||ATt.call(MTt,STt)});var sAe=ye((ghr,oAe)=>{"use strict";var kTt=rA(),CTt=tA(),LTt=Z8(),PTt=aAe();oAe.exports=function(t){if(t.length<1||typeof t[0]!="function")throw new CTt("a function is required");return PTt(kTt,LTt,t)}});var dAe=ye((mhr,hAe)=>{"use strict";var ITt=sAe(),lAe=f2(),cAe;try{cAe=[].__proto__===Array.prototype}catch(e){if(!e||typeof e!="object"||!("code"in e)||e.code!=="ERR_PROTO_ACCESS")throw e}var EH=!!cAe&&lAe&&lAe(Object.prototype,"__proto__"),fAe=Object,uAe=fAe.getPrototypeOf;hAe.exports=EH&&typeof EH.get=="function"?ITt([EH.get]):typeof uAe=="function"?function(t){return uAe(t==null?t:fAe(t))}:!1});var yAe=ye((yhr,mAe)=>{"use strict";var vAe=AH(),pAe=SH(),gAe=dAe();mAe.exports=vAe?function(t){return vAe(t)}:pAe?function(t){if(!t||typeof t!="object"&&typeof t!="function")throw new TypeError("getProto: not an object");return pAe(t)}:gAe?function(t){return gAe(t)}:null});var xAe=ye((_hr,_Ae)=>{"use strict";var RTt=Function.prototype.call,DTt=Object.prototype.hasOwnProperty,FTt=rA();_Ae.exports=FTt.call(RTt,DTt)});var J8=ye((xhr,MAe)=>{"use strict";var lu,zTt=wH(),OTt=vTe(),qTt=gTe(),BTt=yTe(),NTt=xTe(),oA=TH(),aA=tA(),UTt=ATe(),VTt=MTe(),GTt=kTe(),HTt=LTe(),jTt=ITe(),WTt=DTe(),XTt=zTe(),ZTt=NTe(),AAe=Function,kH=function(e){try{return AAe('"use strict"; return ('+e+").constructor;")()}catch(t){}},H4=f2(),YTt=G4(),CH=function(){throw new aA},KTt=H4?function(){try{return arguments.callee,CH}catch(e){try{return H4(arguments,"callee").get}catch(t){return CH}}}():CH,iA=XTe()(),tv=yAe(),JTt=SH(),$Tt=AH(),SAe=MH(),j4=Z8(),nA={},QTt=typeof Uint8Array=="undefined"||!tv?lu:tv(Uint8Array),h2={__proto__:null,"%AggregateError%":typeof AggregateError=="undefined"?lu:AggregateError,"%Array%":Array,"%ArrayBuffer%":typeof ArrayBuffer=="undefined"?lu:ArrayBuffer,"%ArrayIteratorPrototype%":iA&&tv?tv([][Symbol.iterator]()):lu,"%AsyncFromSyncIteratorPrototype%":lu,"%AsyncFunction%":nA,"%AsyncGenerator%":nA,"%AsyncGeneratorFunction%":nA,"%AsyncIteratorPrototype%":nA,"%Atomics%":typeof Atomics=="undefined"?lu:Atomics,"%BigInt%":typeof BigInt=="undefined"?lu:BigInt,"%BigInt64Array%":typeof BigInt64Array=="undefined"?lu:BigInt64Array,"%BigUint64Array%":typeof BigUint64Array=="undefined"?lu:BigUint64Array,"%Boolean%":Boolean,"%DataView%":typeof DataView=="undefined"?lu:DataView,"%Date%":Date,"%decodeURI%":decodeURI,"%decodeURIComponent%":decodeURIComponent,"%encodeURI%":encodeURI,"%encodeURIComponent%":encodeURIComponent,"%Error%":OTt,"%eval%":eval,"%EvalError%":qTt,"%Float16Array%":typeof Float16Array=="undefined"?lu:Float16Array,"%Float32Array%":typeof Float32Array=="undefined"?lu:Float32Array,"%Float64Array%":typeof Float64Array=="undefined"?lu:Float64Array,"%FinalizationRegistry%":typeof FinalizationRegistry=="undefined"?lu:FinalizationRegistry,"%Function%":AAe,"%GeneratorFunction%":nA,"%Int8Array%":typeof Int8Array=="undefined"?lu:Int8Array,"%Int16Array%":typeof Int16Array=="undefined"?lu:Int16Array,"%Int32Array%":typeof Int32Array=="undefined"?lu:Int32Array,"%isFinite%":isFinite,"%isNaN%":isNaN,"%IteratorPrototype%":iA&&tv?tv(tv([][Symbol.iterator]())):lu,"%JSON%":typeof JSON=="object"?JSON:lu,"%Map%":typeof Map=="undefined"?lu:Map,"%MapIteratorPrototype%":typeof Map=="undefined"||!iA||!tv?lu:tv(new Map()[Symbol.iterator]()),"%Math%":Math,"%Number%":Number,"%Object%":zTt,"%Object.getOwnPropertyDescriptor%":H4,"%parseFloat%":parseFloat,"%parseInt%":parseInt,"%Promise%":typeof Promise=="undefined"?lu:Promise,"%Proxy%":typeof Proxy=="undefined"?lu:Proxy,"%RangeError%":BTt,"%ReferenceError%":NTt,"%Reflect%":typeof Reflect=="undefined"?lu:Reflect,"%RegExp%":RegExp,"%Set%":typeof Set=="undefined"?lu:Set,"%SetIteratorPrototype%":typeof Set=="undefined"||!iA||!tv?lu:tv(new Set()[Symbol.iterator]()),"%SharedArrayBuffer%":typeof SharedArrayBuffer=="undefined"?lu:SharedArrayBuffer,"%String%":String,"%StringIteratorPrototype%":iA&&tv?tv(""[Symbol.iterator]()):lu,"%Symbol%":iA?Symbol:lu,"%SyntaxError%":oA,"%ThrowTypeError%":KTt,"%TypedArray%":QTt,"%TypeError%":aA,"%Uint8Array%":typeof Uint8Array=="undefined"?lu:Uint8Array,"%Uint8ClampedArray%":typeof Uint8ClampedArray=="undefined"?lu:Uint8ClampedArray,"%Uint16Array%":typeof Uint16Array=="undefined"?lu:Uint16Array,"%Uint32Array%":typeof Uint32Array=="undefined"?lu:Uint32Array,"%URIError%":UTt,"%WeakMap%":typeof WeakMap=="undefined"?lu:WeakMap,"%WeakRef%":typeof WeakRef=="undefined"?lu:WeakRef,"%WeakSet%":typeof WeakSet=="undefined"?lu:WeakSet,"%Function.prototype.call%":j4,"%Function.prototype.apply%":SAe,"%Object.defineProperty%":YTt,"%Object.getPrototypeOf%":JTt,"%Math.abs%":VTt,"%Math.floor%":GTt,"%Math.max%":HTt,"%Math.min%":jTt,"%Math.pow%":WTt,"%Math.round%":XTt,"%Math.sign%":ZTt,"%Reflect.getPrototypeOf%":$Tt};if(tv)try{null.error}catch(e){bAe=tv(tv(e)),h2["%Error.prototype%"]=bAe}var bAe,eAt=function e(t){var r;if(t==="%AsyncFunction%")r=kH("async function () {}");else if(t==="%GeneratorFunction%")r=kH("function* () {}");else if(t==="%AsyncGeneratorFunction%")r=kH("async function* () {}");else if(t==="%AsyncGenerator%"){var n=e("%AsyncGeneratorFunction%");n&&(r=n.prototype)}else if(t==="%AsyncIteratorPrototype%"){var i=e("%AsyncGenerator%");i&&tv&&(r=tv(i.prototype))}return h2[t]=r,r},wAe={__proto__:null,"%ArrayBufferPrototype%":["ArrayBuffer","prototype"],"%ArrayPrototype%":["Array","prototype"],"%ArrayProto_entries%":["Array","prototype","entries"],"%ArrayProto_forEach%":["Array","prototype","forEach"],"%ArrayProto_keys%":["Array","prototype","keys"],"%ArrayProto_values%":["Array","prototype","values"],"%AsyncFunctionPrototype%":["AsyncFunction","prototype"],"%AsyncGenerator%":["AsyncGeneratorFunction","prototype"],"%AsyncGeneratorPrototype%":["AsyncGeneratorFunction","prototype","prototype"],"%BooleanPrototype%":["Boolean","prototype"],"%DataViewPrototype%":["DataView","prototype"],"%DatePrototype%":["Date","prototype"],"%ErrorPrototype%":["Error","prototype"],"%EvalErrorPrototype%":["EvalError","prototype"],"%Float32ArrayPrototype%":["Float32Array","prototype"],"%Float64ArrayPrototype%":["Float64Array","prototype"],"%FunctionPrototype%":["Function","prototype"],"%Generator%":["GeneratorFunction","prototype"],"%GeneratorPrototype%":["GeneratorFunction","prototype","prototype"],"%Int8ArrayPrototype%":["Int8Array","prototype"],"%Int16ArrayPrototype%":["Int16Array","prototype"],"%Int32ArrayPrototype%":["Int32Array","prototype"],"%JSONParse%":["JSON","parse"],"%JSONStringify%":["JSON","stringify"],"%MapPrototype%":["Map","prototype"],"%NumberPrototype%":["Number","prototype"],"%ObjectPrototype%":["Object","prototype"],"%ObjProto_toString%":["Object","prototype","toString"],"%ObjProto_valueOf%":["Object","prototype","valueOf"],"%PromisePrototype%":["Promise","prototype"],"%PromiseProto_then%":["Promise","prototype","then"],"%Promise_all%":["Promise","all"],"%Promise_reject%":["Promise","reject"],"%Promise_resolve%":["Promise","resolve"],"%RangeErrorPrototype%":["RangeError","prototype"],"%ReferenceErrorPrototype%":["ReferenceError","prototype"],"%RegExpPrototype%":["RegExp","prototype"],"%SetPrototype%":["Set","prototype"],"%SharedArrayBufferPrototype%":["SharedArrayBuffer","prototype"],"%StringPrototype%":["String","prototype"],"%SymbolPrototype%":["Symbol","prototype"],"%SyntaxErrorPrototype%":["SyntaxError","prototype"],"%TypedArrayPrototype%":["TypedArray","prototype"],"%TypeErrorPrototype%":["TypeError","prototype"],"%Uint8ArrayPrototype%":["Uint8Array","prototype"],"%Uint8ClampedArrayPrototype%":["Uint8ClampedArray","prototype"],"%Uint16ArrayPrototype%":["Uint16Array","prototype"],"%Uint32ArrayPrototype%":["Uint32Array","prototype"],"%URIErrorPrototype%":["URIError","prototype"],"%WeakMapPrototype%":["WeakMap","prototype"],"%WeakSetPrototype%":["WeakSet","prototype"]},W4=rA(),Y8=xAe(),tAt=W4.call(j4,Array.prototype.concat),rAt=W4.call(SAe,Array.prototype.splice),TAe=W4.call(j4,String.prototype.replace),K8=W4.call(j4,String.prototype.slice),iAt=W4.call(j4,RegExp.prototype.exec),nAt=/[^%.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\\]|\\.)*?)\2)\]|(?=(?:\.|\[\])(?:\.|\[\]|%$))/g,aAt=/\\(\\)?/g,oAt=function(t){var r=K8(t,0,1),n=K8(t,-1);if(r==="%"&&n!=="%")throw new oA("invalid intrinsic syntax, expected closing `%`");if(n==="%"&&r!=="%")throw new oA("invalid intrinsic syntax, expected opening `%`");var i=[];return TAe(t,nAt,function(a,o,s,l){i[i.length]=s?TAe(l,aAt,"$1"):o||a}),i},sAt=function(t,r){var n=t,i;if(Y8(wAe,n)&&(i=wAe[n],n="%"+i[0]+"%"),Y8(h2,n)){var a=h2[n];if(a===nA&&(a=eAt(n)),typeof a=="undefined"&&!r)throw new aA("intrinsic "+t+" exists, but is not available. Please file an issue!");return{alias:i,name:n,value:a}}throw new oA("intrinsic "+t+" does not exist!")};MAe.exports=function(t,r){if(typeof t!="string"||t.length===0)throw new aA("intrinsic name must be a non-empty string");if(arguments.length>1&&typeof r!="boolean")throw new aA('"allowMissing" argument must be a boolean');if(iAt(/^%?[^%]*%?$/,t)===null)throw new oA("`%` may not be present anywhere but at the beginning and end of the intrinsic name");var n=oAt(t),i=n.length>0?n[0]:"",a=sAt("%"+i+"%",r),o=a.name,s=a.value,l=!1,u=a.alias;u&&(i=u[0],rAt(n,tAt([0,1],u)));for(var c=1,f=!0;c<n.length;c+=1){var h=n[c],d=K8(h,0,1),v=K8(h,-1);if((d==='"'||d==="'"||d==="`"||v==='"'||v==="'"||v==="`")&&d!==v)throw new oA("property names with quotes must have matching quotes");if((h==="constructor"||!f)&&(l=!0),i+="."+h,o="%"+i+"%",Y8(h2,o))s=h2[o];else if(s!=null){if(!(h in s)){if(!r)throw new aA("base intrinsic for "+t+" exists, but the property is not available.");return}if(H4&&c+1>=n.length){var _=H4(s,h);f=!!_,f&&"get"in _&&!("originalValue"in _.get)?s=_.get:s=s[h]}else f=Y8(s,h),s=s[h];f&&!l&&(h2[o]=s)}}return s}});var LAe=ye((bhr,CAe)=>{"use strict";var EAe=G4(),lAt=TH(),sA=tA(),kAe=f2();CAe.exports=function(t,r,n){if(!t||typeof t!="object"&&typeof t!="function")throw new sA("`obj` must be an object or a function`");if(typeof r!="string"&&typeof r!="symbol")throw new sA("`property` must be a string or a symbol`");if(arguments.length>3&&typeof arguments[3]!="boolean"&&arguments[3]!==null)throw new sA("`nonEnumerable`, if provided, must be a boolean or null");if(arguments.length>4&&typeof arguments[4]!="boolean"&&arguments[4]!==null)throw new sA("`nonWritable`, if provided, must be a boolean or null");if(arguments.length>5&&typeof arguments[5]!="boolean"&&arguments[5]!==null)throw new sA("`nonConfigurable`, if provided, must be a boolean or null");if(arguments.length>6&&typeof arguments[6]!="boolean")throw new sA("`loose`, if provided, must be a boolean");var i=arguments.length>3?arguments[3]:null,a=arguments.length>4?arguments[4]:null,o=arguments.length>5?arguments[5]:null,s=arguments.length>6?arguments[6]:!1,l=!!kAe&&kAe(t,r);if(EAe)EAe(t,r,{configurable:o===null&&l?l.configurable:!o,enumerable:i===null&&l?l.enumerable:!i,value:n,writable:a===null&&l?l.writable:!a});else if(s||!i&&!a&&!o)t[r]=n;else throw new lAt("This environment does not support defining a property as non-configurable, non-writable, or non-enumerable.")}});var PH=ye((whr,IAe)=>{"use strict";var LH=G4(),PAe=function(){return!!LH};PAe.hasArrayLengthDefineBug=function(){if(!LH)return null;try{return LH([],"length",{value:1}).length!==1}catch(t){return!0}};IAe.exports=PAe});var OAe=ye((Thr,zAe)=>{"use strict";var uAt=J8(),RAe=LAe(),cAt=PH()(),DAe=f2(),FAe=tA(),fAt=uAt("%Math.floor%");zAe.exports=function(t,r){if(typeof t!="function")throw new FAe("`fn` is not a function");if(typeof r!="number"||r<0||r>4294967295||fAt(r)!==r)throw new FAe("`length` must be a positive 32-bit integer");var n=arguments.length>2&&!!arguments[2],i=!0,a=!0;if("length"in t&&DAe){var o=DAe(t,"length");o&&!o.configurable&&(i=!1),o&&!o.writable&&(a=!1)}return(i||a||!n)&&(cAt?RAe(t,"length",r,!0,!0):RAe(t,"length",r)),t}});var X4=ye((Ahr,$8)=>{"use strict";var IH=rA(),Q8=J8(),hAt=OAe(),dAt=tA(),NAe=Q8("%Function.prototype.apply%"),UAe=Q8("%Function.prototype.call%"),VAe=Q8("%Reflect.apply%",!0)||IH.call(UAe,NAe),qAe=G4(),vAt=Q8("%Math.max%");$8.exports=function(t){if(typeof t!="function")throw new dAt("a function is required");var r=VAe(IH,UAe,arguments);return hAt(r,1+vAt(0,t.length-(arguments.length-1)),!0)};var BAe=function(){return VAe(IH,NAe,arguments)};qAe?qAe($8.exports,"apply",{value:BAe}):$8.exports.apply=BAe});var lA=ye((Shr,jAe)=>{"use strict";var GAe=J8(),HAe=X4(),pAt=HAe(GAe("String.prototype.indexOf"));jAe.exports=function(t,r){var n=GAe(t,!!r);return typeof n=="function"&&pAt(t,".prototype.")>-1?HAe(n):n}});var ZAe=ye((Mhr,XAe)=>{"use strict";var gAt=V4()(),mAt=lA(),RH=mAt("Object.prototype.toString"),eR=function(t){return gAt&&t&&typeof t=="object"&&Symbol.toStringTag in t?!1:RH(t)==="[object Arguments]"},WAe=function(t){return eR(t)?!0:t!==null&&typeof t=="object"&&typeof t.length=="number"&&t.length>=0&&RH(t)!=="[object Array]"&&RH(t.callee)==="[object Function]"},yAt=function(){return eR(arguments)}();eR.isLegacyArguments=WAe;XAe.exports=yAt?eR:WAe});var JAe=ye((Ehr,KAe)=>{"use strict";var _At=Object.prototype.toString,xAt=Function.prototype.toString,bAt=/^\s*(?:function)?\*/,YAe=V4()(),DH=Object.getPrototypeOf,wAt=function(){if(!YAe)return!1;try{return Function("return function*() {}")()}catch(e){}},FH;KAe.exports=function(t){if(typeof t!="function")return!1;if(bAt.test(xAt.call(t)))return!0;if(!YAe){var r=_At.call(t);return r==="[object GeneratorFunction]"}if(!DH)return!1;if(typeof FH=="undefined"){var n=wAt();FH=n?DH(n):!1}return DH(t)===FH}});var t5e=ye((khr,e5e)=>{"use strict";var QAe=Function.prototype.toString,uA=typeof Reflect=="object"&&Reflect!==null&&Reflect.apply,OH,tR;if(typeof uA=="function"&&typeof Object.defineProperty=="function")try{OH=Object.defineProperty({},"length",{get:function(){throw tR}}),tR={},uA(function(){throw 42},null,OH)}catch(e){e!==tR&&(uA=null)}else uA=null;var TAt=/^\s*class\b/,qH=function(t){try{var r=QAe.call(t);return TAt.test(r)}catch(n){return!1}},zH=function(t){try{return qH(t)?!1:(QAe.call(t),!0)}catch(r){return!1}},rR=Object.prototype.toString,AAt="[object Object]",SAt="[object Function]",MAt="[object GeneratorFunction]",EAt="[object HTMLAllCollection]",kAt="[object HTML document.all class]",CAt="[object HTMLCollection]",LAt=typeof Symbol=="function"&&!!Symbol.toStringTag,PAt=!(0 in[,]),BH=function(){return!1};typeof document=="object"&&($Ae=document.all,rR.call($Ae)===rR.call(document.all)&&(BH=function(t){if((PAt||!t)&&(typeof t=="undefined"||typeof t=="object"))try{var r=rR.call(t);return(r===EAt||r===kAt||r===CAt||r===AAt)&&t("")==null}catch(n){}return!1}));var $Ae;e5e.exports=uA?function(t){if(BH(t))return!0;if(!t||typeof t!="function"&&typeof t!="object")return!1;try{uA(t,null,OH)}catch(r){if(r!==tR)return!1}return!qH(t)&&zH(t)}:function(t){if(BH(t))return!0;if(!t||typeof t!="function"&&typeof t!="object")return!1;if(LAt)return zH(t);if(qH(t))return!1;var r=rR.call(t);return r!==SAt&&r!==MAt&&!/^\[object HTML/.test(r)?!1:zH(t)}});var NH=ye((Chr,i5e)=>{"use strict";var IAt=t5e(),RAt=Object.prototype.toString,r5e=Object.prototype.hasOwnProperty,DAt=function(t,r,n){for(var i=0,a=t.length;i<a;i++)r5e.call(t,i)&&(n==null?r(t[i],i,t):r.call(n,t[i],i,t))},FAt=function(t,r,n){for(var i=0,a=t.length;i<a;i++)n==null?r(t.charAt(i),i,t):r.call(n,t.charAt(i),i,t)},zAt=function(t,r,n){for(var i in t)r5e.call(t,i)&&(n==null?r(t[i],i,t):r.call(n,t[i],i,t))},OAt=function(t,r,n){if(!IAt(r))throw new TypeError("iterator must be a function");var i;arguments.length>=3&&(i=n),RAt.call(t)==="[object Array]"?DAt(t,r,i):typeof t=="string"?FAt(t,r,i):zAt(t,r,i)};i5e.exports=OAt});var VH=ye((Lhr,n5e)=>{"use strict";var UH=["BigInt64Array","BigUint64Array","Float32Array","Float64Array","Int16Array","Int32Array","Int8Array","Uint16Array","Uint32Array","Uint8Array","Uint8ClampedArray"],qAt=typeof globalThis=="undefined"?window:globalThis;n5e.exports=function(){for(var t=[],r=0;r<UH.length;r++)typeof qAt[UH[r]]=="function"&&(t[t.length]=UH[r]);return t}});var u5e=ye((Phr,l5e)=>{"use strict";var nR=NH(),BAt=VH(),a5e=X4(),jH=lA(),iR=f2(),NAt=jH("Object.prototype.toString"),s5e=V4()(),o5e=typeof globalThis=="undefined"?window:globalThis,HH=BAt(),WH=jH("String.prototype.slice"),GH=Object.getPrototypeOf,UAt=jH("Array.prototype.indexOf",!0)||function(t,r){for(var n=0;n<t.length;n+=1)if(t[n]===r)return n;return-1},aR={__proto__:null};s5e&&iR&&GH?nR(HH,function(e){var t=new o5e[e];if(Symbol.toStringTag in t){var r=GH(t),n=iR(r,Symbol.toStringTag);if(!n){var i=GH(r);n=iR(i,Symbol.toStringTag)}aR["$"+e]=a5e(n.get)}}):nR(HH,function(e){var t=new o5e[e],r=t.slice||t.set;r&&(aR["$"+e]=a5e(r))});var VAt=function(t){var r=!1;return nR(aR,function(n,i){if(!r)try{"$"+n(t)===i&&(r=WH(i,1))}catch(a){}}),r},GAt=function(t){var r=!1;return nR(aR,function(n,i){if(!r)try{n(t),r=WH(i,1)}catch(a){}}),r};l5e.exports=function(t){if(!t||typeof t!="object")return!1;if(!s5e){var r=WH(NAt(t),8,-1);return UAt(HH,r)>-1?r:r!=="Object"?!1:GAt(t)}return iR?VAt(t):null}});var p5e=ye((Ihr,v5e)=>{"use strict";var c5e=NH(),HAt=VH(),ZH=lA(),jAt=ZH("Object.prototype.toString"),f5e=V4()(),oR=f2(),WAt=typeof globalThis=="undefined"?window:globalThis,h5e=HAt(),XAt=ZH("Array.prototype.indexOf",!0)||function(t,r){for(var n=0;n<t.length;n+=1)if(t[n]===r)return n;return-1},ZAt=ZH("String.prototype.slice"),d5e={},XH=Object.getPrototypeOf;f5e&&oR&&XH&&c5e(h5e,function(e){var t=new WAt[e];if(Symbol.toStringTag in t){var r=XH(t),n=oR(r,Symbol.toStringTag);if(!n){var i=XH(r);n=oR(i,Symbol.toStringTag)}d5e[e]=n.get}});var YAt=function(t){var r=!1;return c5e(d5e,function(n,i){if(!r)try{r=n.call(t)===i}catch(a){}}),r};v5e.exports=function(t){if(!t||typeof t!="object")return!1;if(!f5e||!(Symbol.toStringTag in t)){var r=ZAt(jAt(t),8,-1);return XAt(h5e,r)>-1}return oR?YAt(t):!1}});var JH=ye(uu=>{"use strict";var KAt=ZAe(),JAt=JAe(),Hg=u5e(),g5e=p5e();function cA(e){return e.call.bind(e)}var m5e=typeof BigInt!="undefined",y5e=typeof Symbol!="undefined",Z0=cA(Object.prototype.toString),$At=cA(Number.prototype.valueOf),QAt=cA(String.prototype.valueOf),e5t=cA(Boolean.prototype.valueOf);m5e&&(_5e=cA(BigInt.prototype.valueOf));var _5e;y5e&&(x5e=cA(Symbol.prototype.valueOf));var x5e;function Y4(e,t){if(typeof e!="object")return!1;try{return t(e),!0}catch(r){return!1}}uu.isArgumentsObject=KAt;uu.isGeneratorFunction=JAt;uu.isTypedArray=g5e;function t5t(e){return typeof Promise!="undefined"&&e instanceof Promise||e!==null&&typeof e=="object"&&typeof e.then=="function"&&typeof e.catch=="function"}uu.isPromise=t5t;function r5t(e){return typeof ArrayBuffer!="undefined"&&ArrayBuffer.isView?ArrayBuffer.isView(e):g5e(e)||w5e(e)}uu.isArrayBufferView=r5t;function i5t(e){return Hg(e)==="Uint8Array"}uu.isUint8Array=i5t;function n5t(e){return Hg(e)==="Uint8ClampedArray"}uu.isUint8ClampedArray=n5t;function a5t(e){return Hg(e)==="Uint16Array"}uu.isUint16Array=a5t;function o5t(e){return Hg(e)==="Uint32Array"}uu.isUint32Array=o5t;function s5t(e){return Hg(e)==="Int8Array"}uu.isInt8Array=s5t;function l5t(e){return Hg(e)==="Int16Array"}uu.isInt16Array=l5t;function u5t(e){return Hg(e)==="Int32Array"}uu.isInt32Array=u5t;function c5t(e){return Hg(e)==="Float32Array"}uu.isFloat32Array=c5t;function f5t(e){return Hg(e)==="Float64Array"}uu.isFloat64Array=f5t;function h5t(e){return Hg(e)==="BigInt64Array"}uu.isBigInt64Array=h5t;function d5t(e){return Hg(e)==="BigUint64Array"}uu.isBigUint64Array=d5t;function sR(e){return Z0(e)==="[object Map]"}sR.working=typeof Map!="undefined"&&sR(new Map);function v5t(e){return typeof Map=="undefined"?!1:sR.working?sR(e):e instanceof Map}uu.isMap=v5t;function lR(e){return Z0(e)==="[object Set]"}lR.working=typeof Set!="undefined"&&lR(new Set);function p5t(e){return typeof Set=="undefined"?!1:lR.working?lR(e):e instanceof Set}uu.isSet=p5t;function uR(e){return Z0(e)==="[object WeakMap]"}uR.working=typeof WeakMap!="undefined"&&uR(new WeakMap);function g5t(e){return typeof WeakMap=="undefined"?!1:uR.working?uR(e):e instanceof WeakMap}uu.isWeakMap=g5t;function KH(e){return Z0(e)==="[object WeakSet]"}KH.working=typeof WeakSet!="undefined"&&KH(new WeakSet);function m5t(e){return KH(e)}uu.isWeakSet=m5t;function cR(e){return Z0(e)==="[object ArrayBuffer]"}cR.working=typeof ArrayBuffer!="undefined"&&cR(new ArrayBuffer);function b5e(e){return typeof ArrayBuffer=="undefined"?!1:cR.working?cR(e):e instanceof ArrayBuffer}uu.isArrayBuffer=b5e;function fR(e){return Z0(e)==="[object DataView]"}fR.working=typeof ArrayBuffer!="undefined"&&typeof DataView!="undefined"&&fR(new DataView(new ArrayBuffer(1),0,1));function w5e(e){return typeof DataView=="undefined"?!1:fR.working?fR(e):e instanceof DataView}uu.isDataView=w5e;var YH=typeof SharedArrayBuffer!="undefined"?SharedArrayBuffer:void 0;function Z4(e){return Z0(e)==="[object SharedArrayBuffer]"}function T5e(e){return typeof YH=="undefined"?!1:(typeof Z4.working=="undefined"&&(Z4.working=Z4(new YH)),Z4.working?Z4(e):e instanceof YH)}uu.isSharedArrayBuffer=T5e;function y5t(e){return Z0(e)==="[object AsyncFunction]"}uu.isAsyncFunction=y5t;function _5t(e){return Z0(e)==="[object Map Iterator]"}uu.isMapIterator=_5t;function x5t(e){return Z0(e)==="[object Set Iterator]"}uu.isSetIterator=x5t;function b5t(e){return Z0(e)==="[object Generator]"}uu.isGeneratorObject=b5t;function w5t(e){return Z0(e)==="[object WebAssembly.Module]"}uu.isWebAssemblyCompiledModule=w5t;function A5e(e){return Y4(e,$At)}uu.isNumberObject=A5e;function S5e(e){return Y4(e,QAt)}uu.isStringObject=S5e;function M5e(e){return Y4(e,e5t)}uu.isBooleanObject=M5e;function E5e(e){return m5e&&Y4(e,_5e)}uu.isBigIntObject=E5e;function k5e(e){return y5e&&Y4(e,x5e)}uu.isSymbolObject=k5e;function T5t(e){return A5e(e)||S5e(e)||M5e(e)||E5e(e)||k5e(e)}uu.isBoxedPrimitive=T5t;function A5t(e){return typeof Uint8Array!="undefined"&&(b5e(e)||T5e(e))}uu.isAnyArrayBuffer=A5t;["isProxy","isExternal","isModuleNamespaceObject"].forEach(function(e){Object.defineProperty(uu,e,{enumerable:!1,value:function(){throw new Error(e+" is not supported in userland")}})})});var $H=ye((Dhr,C5e)=>{C5e.exports=function(t){return t&&typeof t=="object"&&typeof t.copy=="function"&&typeof t.fill=="function"&&typeof t.readUInt8=="function"}});var nj=ye(cu=>{var L5e=Object.getOwnPropertyDescriptors||function(t){for(var r=Object.keys(t),n={},i=0;i<r.length;i++)n[r[i]]=Object.getOwnPropertyDescriptor(t,r[i]);return n},S5t=/%[sdj%]/g;cu.format=function(e){if(!yR(e)){for(var t=[],r=0;r<arguments.length;r++)t.push(I_(arguments[r]));return t.join(" ")}for(var r=1,n=arguments,i=n.length,a=String(e).replace(S5t,function(s){if(s==="%%")return"%";if(r>=i)return s;switch(s){case"%s":return String(n[r++]);case"%d":return Number(n[r++]);case"%j":try{return JSON.stringify(n[r++])}catch(l){return"[Circular]"}default:return s}}),o=n[r];r<i;o=n[++r])mR(o)||!fA(o)?a+=" "+o:a+=" "+I_(o);return a};cu.deprecate=function(e,t){if(typeof process!="undefined"&&process.noDeprecation===!0)return e;if(typeof process=="undefined")return function(){return cu.deprecate(e,t).apply(this,arguments)};var r=!1;function n(){if(!r){if(process.throwDeprecation)throw new Error(t);process.traceDeprecation?console.trace(t):console.error(t),r=!0}return e.apply(this,arguments)}return n};var hR={},P5e=/^$/;dR="false",dR=dR.replace(/[|\\{}()[\]^$+?.]/g,"\\$&").replace(/\*/g,".*").replace(/,/g,"$|^").toUpperCase(),P5e=new RegExp("^"+dR+"$","i");var dR;cu.debuglog=function(e){if(e=e.toUpperCase(),!hR[e])if(P5e.test(e)){var t=process.pid;hR[e]=function(){var r=cu.format.apply(cu,arguments);console.error("%s %d: %s",e,t,r)}}else hR[e]=function(){};return hR[e]};function I_(e,t){var r={seen:[],stylize:E5t};return arguments.length>=3&&(r.depth=arguments[2]),arguments.length>=4&&(r.colors=arguments[3]),rj(t)?r.showHidden=t:t&&cu._extend(r,t),v2(r.showHidden)&&(r.showHidden=!1),v2(r.depth)&&(r.depth=2),v2(r.colors)&&(r.colors=!1),v2(r.customInspect)&&(r.customInspect=!0),r.colors&&(r.stylize=M5t),pR(r,e,r.depth)}cu.inspect=I_;I_.colors={bold:[1,22],italic:[3,23],underline:[4,24],inverse:[7,27],white:[37,39],grey:[90,39],black:[30,39],blue:[34,39],cyan:[36,39],green:[32,39],magenta:[35,39],red:[31,39],yellow:[33,39]};I_.styles={special:"cyan",number:"yellow",boolean:"yellow",undefined:"grey",null:"bold",string:"green",date:"magenta",regexp:"red"};function M5t(e,t){var r=I_.styles[t];return r?"\x1B["+I_.colors[r][0]+"m"+e+"\x1B["+I_.colors[r][1]+"m":e}function E5t(e,t){return e}function k5t(e){var t={};return e.forEach(function(r,n){t[r]=!0}),t}function pR(e,t,r){if(e.customInspect&&t&&vR(t.inspect)&&t.inspect!==cu.inspect&&!(t.constructor&&t.constructor.prototype===t)){var n=t.inspect(r,e);return yR(n)||(n=pR(e,n,r)),n}var i=C5t(e,t);if(i)return i;var a=Object.keys(t),o=k5t(a);if(e.showHidden&&(a=Object.getOwnPropertyNames(t)),J4(t)&&(a.indexOf("message")>=0||a.indexOf("description")>=0))return QH(t);if(a.length===0){if(vR(t)){var s=t.name?": "+t.name:"";return e.stylize("[Function"+s+"]","special")}if(K4(t))return e.stylize(RegExp.prototype.toString.call(t),"regexp");if(gR(t))return e.stylize(Date.prototype.toString.call(t),"date");if(J4(t))return QH(t)}var l="",u=!1,c=["{","}"];if(I5e(t)&&(u=!0,c=["[","]"]),vR(t)){var f=t.name?": "+t.name:"";l=" [Function"+f+"]"}if(K4(t)&&(l=" "+RegExp.prototype.toString.call(t)),gR(t)&&(l=" "+Date.prototype.toUTCString.call(t)),J4(t)&&(l=" "+QH(t)),a.length===0&&(!u||t.length==0))return c[0]+l+c[1];if(r<0)return K4(t)?e.stylize(RegExp.prototype.toString.call(t),"regexp"):e.stylize("[Object]","special");e.seen.push(t);var h;return u?h=L5t(e,t,r,o,a):h=a.map(function(d){return tj(e,t,r,o,d,u)}),e.seen.pop(),P5t(h,l,c)}function C5t(e,t){if(v2(t))return e.stylize("undefined","undefined");if(yR(t)){var r="'"+JSON.stringify(t).replace(/^"|"$/g,"").replace(/'/g,"\\'").replace(/\\"/g,'"')+"'";return e.stylize(r,"string")}if(R5e(t))return e.stylize(""+t,"number");if(rj(t))return e.stylize(""+t,"boolean");if(mR(t))return e.stylize("null","null")}function QH(e){return"["+Error.prototype.toString.call(e)+"]"}function L5t(e,t,r,n,i){for(var a=[],o=0,s=t.length;o<s;++o)D5e(t,String(o))?a.push(tj(e,t,r,n,String(o),!0)):a.push("");return i.forEach(function(l){l.match(/^\d+$/)||a.push(tj(e,t,r,n,l,!0))}),a}function tj(e,t,r,n,i,a){var o,s,l;if(l=Object.getOwnPropertyDescriptor(t,i)||{value:t[i]},l.get?l.set?s=e.stylize("[Getter/Setter]","special"):s=e.stylize("[Getter]","special"):l.set&&(s=e.stylize("[Setter]","special")),D5e(n,i)||(o="["+i+"]"),s||(e.seen.indexOf(l.value)<0?(mR(r)?s=pR(e,l.value,null):s=pR(e,l.value,r-1),s.indexOf(`
+`)>-1&&(a?s=s.split(`
+`).map(function(u){return"  "+u}).join(`
+`).slice(2):s=`
+`+s.split(`
+`).map(function(u){return"   "+u}).join(`
+`))):s=e.stylize("[Circular]","special")),v2(o)){if(a&&i.match(/^\d+$/))return s;o=JSON.stringify(""+i),o.match(/^"([a-zA-Z_][a-zA-Z_0-9]*)"$/)?(o=o.slice(1,-1),o=e.stylize(o,"name")):(o=o.replace(/'/g,"\\'").replace(/\\"/g,'"').replace(/(^"|"$)/g,"'"),o=e.stylize(o,"string"))}return o+": "+s}function P5t(e,t,r){var n=0,i=e.reduce(function(a,o){return n++,o.indexOf(`
+`)>=0&&n++,a+o.replace(/\u001b\[\d\d?m/g,"").length+1},0);return i>60?r[0]+(t===""?"":t+`
+ `)+" "+e.join(`,
+  `)+" "+r[1]:r[0]+t+" "+e.join(", ")+" "+r[1]}cu.types=JH();function I5e(e){return Array.isArray(e)}cu.isArray=I5e;function rj(e){return typeof e=="boolean"}cu.isBoolean=rj;function mR(e){return e===null}cu.isNull=mR;function I5t(e){return e==null}cu.isNullOrUndefined=I5t;function R5e(e){return typeof e=="number"}cu.isNumber=R5e;function yR(e){return typeof e=="string"}cu.isString=yR;function R5t(e){return typeof e=="symbol"}cu.isSymbol=R5t;function v2(e){return e===void 0}cu.isUndefined=v2;function K4(e){return fA(e)&&ij(e)==="[object RegExp]"}cu.isRegExp=K4;cu.types.isRegExp=K4;function fA(e){return typeof e=="object"&&e!==null}cu.isObject=fA;function gR(e){return fA(e)&&ij(e)==="[object Date]"}cu.isDate=gR;cu.types.isDate=gR;function J4(e){return fA(e)&&(ij(e)==="[object Error]"||e instanceof Error)}cu.isError=J4;cu.types.isNativeError=J4;function vR(e){return typeof e=="function"}cu.isFunction=vR;function D5t(e){return e===null||typeof e=="boolean"||typeof e=="number"||typeof e=="string"||typeof e=="symbol"||typeof e=="undefined"}cu.isPrimitive=D5t;cu.isBuffer=$H();function ij(e){return Object.prototype.toString.call(e)}function ej(e){return e<10?"0"+e.toString(10):e.toString(10)}var F5t=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"];function z5t(){var e=new Date,t=[ej(e.getHours()),ej(e.getMinutes()),ej(e.getSeconds())].join(":");return[e.getDate(),F5t[e.getMonth()],t].join(" ")}cu.log=function(){console.log("%s - %s",z5t(),cu.format.apply(cu,arguments))};cu.inherits=Uy();cu._extend=function(e,t){if(!t||!fA(t))return e;for(var r=Object.keys(t),n=r.length;n--;)e[r[n]]=t[r[n]];return e};function D5e(e,t){return Object.prototype.hasOwnProperty.call(e,t)}var d2=typeof Symbol!="undefined"?Symbol("util.promisify.custom"):void 0;cu.promisify=function(t){if(typeof t!="function")throw new TypeError('The "original" argument must be of type Function');if(d2&&t[d2]){var r=t[d2];if(typeof r!="function")throw new TypeError('The "util.promisify.custom" argument must be of type Function');return Object.defineProperty(r,d2,{value:r,enumerable:!1,writable:!1,configurable:!0}),r}function r(){for(var n,i,a=new Promise(function(l,u){n=l,i=u}),o=[],s=0;s<arguments.length;s++)o.push(arguments[s]);o.push(function(l,u){l?i(l):n(u)});try{t.apply(this,o)}catch(l){i(l)}return a}return Object.setPrototypeOf(r,Object.getPrototypeOf(t)),d2&&Object.defineProperty(r,d2,{value:r,enumerable:!1,writable:!1,configurable:!0}),Object.defineProperties(r,L5e(t))};cu.promisify.custom=d2;function O5t(e,t){if(!e){var r=new Error("Promise was rejected with a falsy value");r.reason=e,e=r}return t(e)}function q5t(e){if(typeof e!="function")throw new TypeError('The "original" argument must be of type Function');function t(){for(var r=[],n=0;n<arguments.length;n++)r.push(arguments[n]);var i=r.pop();if(typeof i!="function")throw new TypeError("The last argument must be of type Function");var a=this,o=function(){return i.apply(a,arguments)};e.apply(this,r).then(function(s){process.nextTick(o.bind(null,null,s))},function(s){process.nextTick(O5t.bind(null,s,o))})}return Object.setPrototypeOf(t,Object.getPrototypeOf(e)),Object.defineProperties(t,L5e(e)),t}cu.callbackify=q5t});var q5e=ye((zhr,O5e)=>{"use strict";function F5e(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter(function(i){return Object.getOwnPropertyDescriptor(e,i).enumerable})),r.push.apply(r,n)}return r}function B5t(e){for(var t=1;t<arguments.length;t++){var r=arguments[t]!=null?arguments[t]:{};t%2?F5e(Object(r),!0).forEach(function(n){N5t(e,n,r[n])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(r)):F5e(Object(r)).forEach(function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(r,n))})}return e}function N5t(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function U5t(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function z5e(e,t){for(var r=0;r<t.length;r++){var n=t[r];n.enumerable=n.enumerable||!1,n.configurable=!0,"value"in n&&(n.writable=!0),Object.defineProperty(e,n.key,n)}}function V5t(e,t,r){return t&&z5e(e.prototype,t),r&&z5e(e,r),e}var G5t=c2(),_R=G5t.Buffer,H5t=nj(),aj=H5t.inspect,j5t=aj&&aj.custom||"inspect";function W5t(e,t,r){_R.prototype.copy.call(e,t,r)}O5e.exports=function(){function e(){U5t(this,e),this.head=null,this.tail=null,this.length=0}return V5t(e,[{key:"push",value:function(r){var n={data:r,next:null};this.length>0?this.tail.next=n:this.head=n,this.tail=n,++this.length}},{key:"unshift",value:function(r){var n={data:r,next:this.head};this.length===0&&(this.tail=n),this.head=n,++this.length}},{key:"shift",value:function(){if(this.length!==0){var r=this.head.data;return this.length===1?this.head=this.tail=null:this.head=this.head.next,--this.length,r}}},{key:"clear",value:function(){this.head=this.tail=null,this.length=0}},{key:"join",value:function(r){if(this.length===0)return"";for(var n=this.head,i=""+n.data;n=n.next;)i+=r+n.data;return i}},{key:"concat",value:function(r){if(this.length===0)return _R.alloc(0);for(var n=_R.allocUnsafe(r>>>0),i=this.head,a=0;i;)W5t(i.data,n,a),a+=i.data.length,i=i.next;return n}},{key:"consume",value:function(r,n){var i;return r<this.head.data.length?(i=this.head.data.slice(0,r),this.head.data=this.head.data.slice(r)):r===this.head.data.length?i=this.shift():i=n?this._getString(r):this._getBuffer(r),i}},{key:"first",value:function(){return this.head.data}},{key:"_getString",value:function(r){var n=this.head,i=1,a=n.data;for(r-=a.length;n=n.next;){var o=n.data,s=r>o.length?o.length:r;if(s===o.length?a+=o:a+=o.slice(0,r),r-=s,r===0){s===o.length?(++i,n.next?this.head=n.next:this.head=this.tail=null):(this.head=n,n.data=o.slice(s));break}++i}return this.length-=i,a}},{key:"_getBuffer",value:function(r){var n=_R.allocUnsafe(r),i=this.head,a=1;for(i.data.copy(n),r-=i.data.length;i=i.next;){var o=i.data,s=r>o.length?o.length:r;if(o.copy(n,n.length-r,0,s),r-=s,r===0){s===o.length?(++a,i.next?this.head=i.next:this.head=this.tail=null):(this.head=i,i.data=o.slice(s));break}++a}return this.length-=a,n}},{key:j5t,value:function(r,n){return aj(this,B5t({},n,{depth:0,customInspect:!1}))}}]),e}()});var sj=ye((Ohr,N5e)=>{"use strict";function X5t(e,t){var r=this,n=this._readableState&&this._readableState.destroyed,i=this._writableState&&this._writableState.destroyed;return n||i?(t?t(e):e&&(this._writableState?this._writableState.errorEmitted||(this._writableState.errorEmitted=!0,process.nextTick(oj,this,e)):process.nextTick(oj,this,e)),this):(this._readableState&&(this._readableState.destroyed=!0),this._writableState&&(this._writableState.destroyed=!0),this._destroy(e||null,function(a){!t&&a?r._writableState?r._writableState.errorEmitted?process.nextTick(xR,r):(r._writableState.errorEmitted=!0,process.nextTick(B5e,r,a)):process.nextTick(B5e,r,a):t?(process.nextTick(xR,r),t(a)):process.nextTick(xR,r)}),this)}function B5e(e,t){oj(e,t),xR(e)}function xR(e){e._writableState&&!e._writableState.emitClose||e._readableState&&!e._readableState.emitClose||e.emit("close")}function Z5t(){this._readableState&&(this._readableState.destroyed=!1,this._readableState.reading=!1,this._readableState.ended=!1,this._readableState.endEmitted=!1),this._writableState&&(this._writableState.destroyed=!1,this._writableState.ended=!1,this._writableState.ending=!1,this._writableState.finalCalled=!1,this._writableState.prefinished=!1,this._writableState.finished=!1,this._writableState.errorEmitted=!1)}function oj(e,t){e.emit("error",t)}function Y5t(e,t){var r=e._readableState,n=e._writableState;r&&r.autoDestroy||n&&n.autoDestroy?e.destroy(t):e.emit("error",t)}N5e.exports={destroy:X5t,undestroy:Z5t,errorOrDestroy:Y5t}});var p2=ye((qhr,G5e)=>{"use strict";function K5t(e,t){e.prototype=Object.create(t.prototype),e.prototype.constructor=e,e.__proto__=t}var V5e={};function Y0(e,t,r){r||(r=Error);function n(a,o,s){return typeof t=="string"?t:t(a,o,s)}var i=function(a){K5t(o,a);function o(s,l,u){return a.call(this,n(s,l,u))||this}return o}(r);i.prototype.name=r.name,i.prototype.code=e,V5e[e]=i}function U5e(e,t){if(Array.isArray(e)){var r=e.length;return e=e.map(function(n){return String(n)}),r>2?"one of ".concat(t," ").concat(e.slice(0,r-1).join(", "),", or ")+e[r-1]:r===2?"one of ".concat(t," ").concat(e[0]," or ").concat(e[1]):"of ".concat(t," ").concat(e[0])}else return"of ".concat(t," ").concat(String(e))}function J5t(e,t,r){return e.substr(!r||r<0?0:+r,t.length)===t}function $5t(e,t,r){return(r===void 0||r>e.length)&&(r=e.length),e.substring(r-t.length,r)===t}function Q5t(e,t,r){return typeof r!="number"&&(r=0),r+t.length>e.length?!1:e.indexOf(t,r)!==-1}Y0("ERR_INVALID_OPT_VALUE",function(e,t){return'The value "'+t+'" is invalid for option "'+e+'"'},TypeError);Y0("ERR_INVALID_ARG_TYPE",function(e,t,r){var n;typeof t=="string"&&J5t(t,"not ")?(n="must not be",t=t.replace(/^not /,"")):n="must be";var i;if($5t(e," argument"))i="The ".concat(e," ").concat(n," ").concat(U5e(t,"type"));else{var a=Q5t(e,".")?"property":"argument";i='The "'.concat(e,'" ').concat(a," ").concat(n," ").concat(U5e(t,"type"))}return i+=". Received type ".concat(typeof r),i},TypeError);Y0("ERR_STREAM_PUSH_AFTER_EOF","stream.push() after EOF");Y0("ERR_METHOD_NOT_IMPLEMENTED",function(e){return"The "+e+" method is not implemented"});Y0("ERR_STREAM_PREMATURE_CLOSE","Premature close");Y0("ERR_STREAM_DESTROYED",function(e){return"Cannot call "+e+" after a stream was destroyed"});Y0("ERR_MULTIPLE_CALLBACK","Callback called multiple times");Y0("ERR_STREAM_CANNOT_PIPE","Cannot pipe, not readable");Y0("ERR_STREAM_WRITE_AFTER_END","write after end");Y0("ERR_STREAM_NULL_VALUES","May not write null values to stream",TypeError);Y0("ERR_UNKNOWN_ENCODING",function(e){return"Unknown encoding: "+e},TypeError);Y0("ERR_STREAM_UNSHIFT_AFTER_END_EVENT","stream.unshift() after end event");G5e.exports.codes=V5e});var lj=ye((Bhr,H5e)=>{"use strict";var eSt=p2().codes.ERR_INVALID_OPT_VALUE;function tSt(e,t,r){return e.highWaterMark!=null?e.highWaterMark:t?e[r]:null}function rSt(e,t,r,n){var i=tSt(t,n,r);if(i!=null){if(!(isFinite(i)&&Math.floor(i)===i)||i<0){var a=n?r:"highWaterMark";throw new eSt(a,i)}return Math.floor(i)}return e.objectMode?16:16*1024}H5e.exports={getHighWaterMark:rSt}});var W5e=ye((Nhr,j5e)=>{j5e.exports=iSt;function iSt(e,t){if(uj("noDeprecation"))return e;var r=!1;function n(){if(!r){if(uj("throwDeprecation"))throw new Error(t);uj("traceDeprecation")?console.trace(t):console.warn(t),r=!0}return e.apply(this,arguments)}return n}function uj(e){try{if(!window.localStorage)return!1}catch(r){return!1}var t=window.localStorage[e];return t==null?!1:String(t).toLowerCase()==="true"}});var hj=ye((Uhr,$5e)=>{"use strict";$5e.exports=zh;function Z5e(e){var t=this;this.next=null,this.entry=null,this.finish=function(){LSt(t,e)}}var hA;zh.WritableState=Q4;var nSt={deprecate:W5e()},Y5e=fH(),wR=c2().Buffer,aSt=window.Uint8Array||function(){};function oSt(e){return wR.from(e)}function sSt(e){return wR.isBuffer(e)||e instanceof aSt}var fj=sj(),lSt=lj(),uSt=lSt.getHighWaterMark,R_=p2().codes,cSt=R_.ERR_INVALID_ARG_TYPE,fSt=R_.ERR_METHOD_NOT_IMPLEMENTED,hSt=R_.ERR_MULTIPLE_CALLBACK,dSt=R_.ERR_STREAM_CANNOT_PIPE,vSt=R_.ERR_STREAM_DESTROYED,pSt=R_.ERR_STREAM_NULL_VALUES,gSt=R_.ERR_STREAM_WRITE_AFTER_END,mSt=R_.ERR_UNKNOWN_ENCODING,dA=fj.errorOrDestroy;Uy()(zh,Y5e);function ySt(){}function Q4(e,t,r){hA=hA||g2(),e=e||{},typeof r!="boolean"&&(r=t instanceof hA),this.objectMode=!!e.objectMode,r&&(this.objectMode=this.objectMode||!!e.writableObjectMode),this.highWaterMark=uSt(this,e,"writableHighWaterMark",r),this.finalCalled=!1,this.needDrain=!1,this.ending=!1,this.ended=!1,this.finished=!1,this.destroyed=!1;var n=e.decodeStrings===!1;this.decodeStrings=!n,this.defaultEncoding=e.defaultEncoding||"utf8",this.length=0,this.writing=!1,this.corked=0,this.sync=!0,this.bufferProcessing=!1,this.onwrite=function(i){SSt(t,i)},this.writecb=null,this.writelen=0,this.bufferedRequest=null,this.lastBufferedRequest=null,this.pendingcb=0,this.prefinished=!1,this.errorEmitted=!1,this.emitClose=e.emitClose!==!1,this.autoDestroy=!!e.autoDestroy,this.bufferedRequestCount=0,this.corkedRequestsFree=new Z5e(this)}Q4.prototype.getBuffer=function(){for(var t=this.bufferedRequest,r=[];t;)r.push(t),t=t.next;return r};(function(){try{Object.defineProperty(Q4.prototype,"buffer",{get:nSt.deprecate(function(){return this.getBuffer()},"_writableState.buffer is deprecated. Use _writableState.getBuffer instead.","DEP0003")})}catch(e){}})();var bR;typeof Symbol=="function"&&Symbol.hasInstance&&typeof Function.prototype[Symbol.hasInstance]=="function"?(bR=Function.prototype[Symbol.hasInstance],Object.defineProperty(zh,Symbol.hasInstance,{value:function(t){return bR.call(this,t)?!0:this!==zh?!1:t&&t._writableState instanceof Q4}})):bR=function(t){return t instanceof this};function zh(e){hA=hA||g2();var t=this instanceof hA;if(!t&&!bR.call(zh,this))return new zh(e);this._writableState=new Q4(e,this,t),this.writable=!0,e&&(typeof e.write=="function"&&(this._write=e.write),typeof e.writev=="function"&&(this._writev=e.writev),typeof e.destroy=="function"&&(this._destroy=e.destroy),typeof e.final=="function"&&(this._final=e.final)),Y5e.call(this)}zh.prototype.pipe=function(){dA(this,new dSt)};function _St(e,t){var r=new gSt;dA(e,r),process.nextTick(t,r)}function xSt(e,t,r,n){var i;return r===null?i=new pSt:typeof r!="string"&&!t.objectMode&&(i=new cSt("chunk",["string","Buffer"],r)),i?(dA(e,i),process.nextTick(n,i),!1):!0}zh.prototype.write=function(e,t,r){var n=this._writableState,i=!1,a=!n.objectMode&&sSt(e);return a&&!wR.isBuffer(e)&&(e=oSt(e)),typeof t=="function"&&(r=t,t=null),a?t="buffer":t||(t=n.defaultEncoding),typeof r!="function"&&(r=ySt),n.ending?_St(this,r):(a||xSt(this,n,e,r))&&(n.pendingcb++,i=wSt(this,n,a,e,t,r)),i};zh.prototype.cork=function(){this._writableState.corked++};zh.prototype.uncork=function(){var e=this._writableState;e.corked&&(e.corked--,!e.writing&&!e.corked&&!e.bufferProcessing&&e.bufferedRequest&&K5e(this,e))};zh.prototype.setDefaultEncoding=function(t){if(typeof t=="string"&&(t=t.toLowerCase()),!(["hex","utf8","utf-8","ascii","binary","base64","ucs2","ucs-2","utf16le","utf-16le","raw"].indexOf((t+"").toLowerCase())>-1))throw new mSt(t);return this._writableState.defaultEncoding=t,this};Object.defineProperty(zh.prototype,"writableBuffer",{enumerable:!1,get:function(){return this._writableState&&this._writableState.getBuffer()}});function bSt(e,t,r){return!e.objectMode&&e.decodeStrings!==!1&&typeof t=="string"&&(t=wR.from(t,r)),t}Object.defineProperty(zh.prototype,"writableHighWaterMark",{enumerable:!1,get:function(){return this._writableState.highWaterMark}});function wSt(e,t,r,n,i,a){if(!r){var o=bSt(t,n,i);n!==o&&(r=!0,i="buffer",n=o)}var s=t.objectMode?1:n.length;t.length+=s;var l=t.length<t.highWaterMark;if(l||(t.needDrain=!0),t.writing||t.corked){var u=t.lastBufferedRequest;t.lastBufferedRequest={chunk:n,encoding:i,isBuf:r,callback:a,next:null},u?u.next=t.lastBufferedRequest:t.bufferedRequest=t.lastBufferedRequest,t.bufferedRequestCount+=1}else cj(e,t,!1,s,n,i,a);return l}function cj(e,t,r,n,i,a,o){t.writelen=n,t.writecb=o,t.writing=!0,t.sync=!0,t.destroyed?t.onwrite(new vSt("write")):r?e._writev(i,t.onwrite):e._write(i,a,t.onwrite),t.sync=!1}function TSt(e,t,r,n,i){--t.pendingcb,r?(process.nextTick(i,n),process.nextTick($4,e,t),e._writableState.errorEmitted=!0,dA(e,n)):(i(n),e._writableState.errorEmitted=!0,dA(e,n),$4(e,t))}function ASt(e){e.writing=!1,e.writecb=null,e.length-=e.writelen,e.writelen=0}function SSt(e,t){var r=e._writableState,n=r.sync,i=r.writecb;if(typeof i!="function")throw new hSt;if(ASt(r),t)TSt(e,r,n,t,i);else{var a=J5e(r)||e.destroyed;!a&&!r.corked&&!r.bufferProcessing&&r.bufferedRequest&&K5e(e,r),n?process.nextTick(X5e,e,r,a,i):X5e(e,r,a,i)}}function X5e(e,t,r,n){r||MSt(e,t),t.pendingcb--,n(),$4(e,t)}function MSt(e,t){t.length===0&&t.needDrain&&(t.needDrain=!1,e.emit("drain"))}function K5e(e,t){t.bufferProcessing=!0;var r=t.bufferedRequest;if(e._writev&&r&&r.next){var n=t.bufferedRequestCount,i=new Array(n),a=t.corkedRequestsFree;a.entry=r;for(var o=0,s=!0;r;)i[o]=r,r.isBuf||(s=!1),r=r.next,o+=1;i.allBuffers=s,cj(e,t,!0,t.length,i,"",a.finish),t.pendingcb++,t.lastBufferedRequest=null,a.next?(t.corkedRequestsFree=a.next,a.next=null):t.corkedRequestsFree=new Z5e(t),t.bufferedRequestCount=0}else{for(;r;){var l=r.chunk,u=r.encoding,c=r.callback,f=t.objectMode?1:l.length;if(cj(e,t,!1,f,l,u,c),r=r.next,t.bufferedRequestCount--,t.writing)break}r===null&&(t.lastBufferedRequest=null)}t.bufferedRequest=r,t.bufferProcessing=!1}zh.prototype._write=function(e,t,r){r(new fSt("_write()"))};zh.prototype._writev=null;zh.prototype.end=function(e,t,r){var n=this._writableState;return typeof e=="function"?(r=e,e=null,t=null):typeof t=="function"&&(r=t,t=null),e!=null&&this.write(e,t),n.corked&&(n.corked=1,this.uncork()),n.ending||CSt(this,n,r),this};Object.defineProperty(zh.prototype,"writableLength",{enumerable:!1,get:function(){return this._writableState.length}});function J5e(e){return e.ending&&e.length===0&&e.bufferedRequest===null&&!e.finished&&!e.writing}function ESt(e,t){e._final(function(r){t.pendingcb--,r&&dA(e,r),t.prefinished=!0,e.emit("prefinish"),$4(e,t)})}function kSt(e,t){!t.prefinished&&!t.finalCalled&&(typeof e._final=="function"&&!t.destroyed?(t.pendingcb++,t.finalCalled=!0,process.nextTick(ESt,e,t)):(t.prefinished=!0,e.emit("prefinish")))}function $4(e,t){var r=J5e(t);if(r&&(kSt(e,t),t.pendingcb===0&&(t.finished=!0,e.emit("finish"),t.autoDestroy))){var n=e._readableState;(!n||n.autoDestroy&&n.endEmitted)&&e.destroy()}return r}function CSt(e,t,r){t.ending=!0,$4(e,t),r&&(t.finished?process.nextTick(r):e.once("finish",r)),t.ended=!0,e.writable=!1}function LSt(e,t,r){var n=e.entry;for(e.entry=null;n;){var i=n.callback;t.pendingcb--,i(r),n=n.next}t.corkedRequestsFree.next=e}Object.defineProperty(zh.prototype,"destroyed",{enumerable:!1,get:function(){return this._writableState===void 0?!1:this._writableState.destroyed},set:function(t){this._writableState&&(this._writableState.destroyed=t)}});zh.prototype.destroy=fj.destroy;zh.prototype._undestroy=fj.undestroy;zh.prototype._destroy=function(e,t){t(e)}});var g2=ye((Vhr,eSe)=>{"use strict";var PSt=Object.keys||function(e){var t=[];for(var r in e)t.push(r);return t};eSe.exports=Bm;var Q5e=pj(),vj=hj();Uy()(Bm,Q5e);for(dj=PSt(vj.prototype),TR=0;TR<dj.length;TR++)AR=dj[TR],Bm.prototype[AR]||(Bm.prototype[AR]=vj.prototype[AR]);var dj,AR,TR;function Bm(e){if(!(this instanceof Bm))return new Bm(e);Q5e.call(this,e),vj.call(this,e),this.allowHalfOpen=!0,e&&(e.readable===!1&&(this.readable=!1),e.writable===!1&&(this.writable=!1),e.allowHalfOpen===!1&&(this.allowHalfOpen=!1,this.once("end",ISt)))}Object.defineProperty(Bm.prototype,"writableHighWaterMark",{enumerable:!1,get:function(){return this._writableState.highWaterMark}});Object.defineProperty(Bm.prototype,"writableBuffer",{enumerable:!1,get:function(){return this._writableState&&this._writableState.getBuffer()}});Object.defineProperty(Bm.prototype,"writableLength",{enumerable:!1,get:function(){return this._writableState.length}});function ISt(){this._writableState.ended||process.nextTick(RSt,this)}function RSt(e){e.end()}Object.defineProperty(Bm.prototype,"destroyed",{enumerable:!1,get:function(){return this._readableState===void 0||this._writableState===void 0?!1:this._readableState.destroyed&&this._writableState.destroyed},set:function(t){this._readableState===void 0||this._writableState===void 0||(this._readableState.destroyed=t,this._writableState.destroyed=t)}})});var iSe=ye((gj,rSe)=>{var SR=c2(),Nm=SR.Buffer;function tSe(e,t){for(var r in e)t[r]=e[r]}Nm.from&&Nm.alloc&&Nm.allocUnsafe&&Nm.allocUnsafeSlow?rSe.exports=SR:(tSe(SR,gj),gj.Buffer=m2);function m2(e,t,r){return Nm(e,t,r)}m2.prototype=Object.create(Nm.prototype);tSe(Nm,m2);m2.from=function(e,t,r){if(typeof e=="number")throw new TypeError("Argument must not be a number");return Nm(e,t,r)};m2.alloc=function(e,t,r){if(typeof e!="number")throw new TypeError("Argument must be a number");var n=Nm(e);return t!==void 0?typeof r=="string"?n.fill(t,r):n.fill(t):n.fill(0),n};m2.allocUnsafe=function(e){if(typeof e!="number")throw new TypeError("Argument must be a number");return Nm(e)};m2.allocUnsafeSlow=function(e){if(typeof e!="number")throw new TypeError("Argument must be a number");return SR.SlowBuffer(e)}});var _j=ye(aSe=>{"use strict";var yj=iSe().Buffer,nSe=yj.isEncoding||function(e){switch(e=""+e,e&&e.toLowerCase()){case"hex":case"utf8":case"utf-8":case"ascii":case"binary":case"base64":case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":case"raw":return!0;default:return!1}};function DSt(e){if(!e)return"utf8";for(var t;;)switch(e){case"utf8":case"utf-8":return"utf8";case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return"utf16le";case"latin1":case"binary":return"latin1";case"base64":case"ascii":case"hex":return e;default:if(t)return;e=(""+e).toLowerCase(),t=!0}}function FSt(e){var t=DSt(e);if(typeof t!="string"&&(yj.isEncoding===nSe||!nSe(e)))throw new Error("Unknown encoding: "+e);return t||e}aSe.StringDecoder=eE;function eE(e){this.encoding=FSt(e);var t;switch(this.encoding){case"utf16le":this.text=USt,this.end=VSt,t=4;break;case"utf8":this.fillLast=qSt,t=4;break;case"base64":this.text=GSt,this.end=HSt,t=3;break;default:this.write=jSt,this.end=WSt;return}this.lastNeed=0,this.lastTotal=0,this.lastChar=yj.allocUnsafe(t)}eE.prototype.write=function(e){if(e.length===0)return"";var t,r;if(this.lastNeed){if(t=this.fillLast(e),t===void 0)return"";r=this.lastNeed,this.lastNeed=0}else r=0;return r<e.length?t?t+this.text(e,r):this.text(e,r):t||""};eE.prototype.end=NSt;eE.prototype.text=BSt;eE.prototype.fillLast=function(e){if(this.lastNeed<=e.length)return e.copy(this.lastChar,this.lastTotal-this.lastNeed,0,this.lastNeed),this.lastChar.toString(this.encoding,0,this.lastTotal);e.copy(this.lastChar,this.lastTotal-this.lastNeed,0,e.length),this.lastNeed-=e.length};function mj(e){return e<=127?0:e>>5===6?2:e>>4===14?3:e>>3===30?4:e>>6===2?-1:-2}function zSt(e,t,r){var n=t.length-1;if(n<r)return 0;var i=mj(t[n]);return i>=0?(i>0&&(e.lastNeed=i-1),i):--n<r||i===-2?0:(i=mj(t[n]),i>=0?(i>0&&(e.lastNeed=i-2),i):--n<r||i===-2?0:(i=mj(t[n]),i>=0?(i>0&&(i===2?i=0:e.lastNeed=i-3),i):0))}function OSt(e,t,r){if((t[0]&192)!==128)return e.lastNeed=0,"\uFFFD";if(e.lastNeed>1&&t.length>1){if((t[1]&192)!==128)return e.lastNeed=1,"\uFFFD";if(e.lastNeed>2&&t.length>2&&(t[2]&192)!==128)return e.lastNeed=2,"\uFFFD"}}function qSt(e){var t=this.lastTotal-this.lastNeed,r=OSt(this,e,t);if(r!==void 0)return r;if(this.lastNeed<=e.length)return e.copy(this.lastChar,t,0,this.lastNeed),this.lastChar.toString(this.encoding,0,this.lastTotal);e.copy(this.lastChar,t,0,e.length),this.lastNeed-=e.length}function BSt(e,t){var r=zSt(this,e,t);if(!this.lastNeed)return e.toString("utf8",t);this.lastTotal=r;var n=e.length-(r-this.lastNeed);return e.copy(this.lastChar,0,n),e.toString("utf8",t,n)}function NSt(e){var t=e&&e.length?this.write(e):"";return this.lastNeed?t+"\uFFFD":t}function USt(e,t){if((e.length-t)%2===0){var r=e.toString("utf16le",t);if(r){var n=r.charCodeAt(r.length-1);if(n>=55296&&n<=56319)return this.lastNeed=2,this.lastTotal=4,this.lastChar[0]=e[e.length-2],this.lastChar[1]=e[e.length-1],r.slice(0,-1)}return r}return this.lastNeed=1,this.lastTotal=2,this.lastChar[0]=e[e.length-1],e.toString("utf16le",t,e.length-1)}function VSt(e){var t=e&&e.length?this.write(e):"";if(this.lastNeed){var r=this.lastTotal-this.lastNeed;return t+this.lastChar.toString("utf16le",0,r)}return t}function GSt(e,t){var r=(e.length-t)%3;return r===0?e.toString("base64",t):(this.lastNeed=3-r,this.lastTotal=3,r===1?this.lastChar[0]=e[e.length-1]:(this.lastChar[0]=e[e.length-2],this.lastChar[1]=e[e.length-1]),e.toString("base64",t,e.length-r))}function HSt(e){var t=e&&e.length?this.write(e):"";return this.lastNeed?t+this.lastChar.toString("base64",0,3-this.lastNeed):t}function jSt(e){return e.toString(this.encoding)}function WSt(e){return e&&e.length?this.write(e):""}});var MR=ye((Hhr,lSe)=>{"use strict";var oSe=p2().codes.ERR_STREAM_PREMATURE_CLOSE;function XSt(e){var t=!1;return function(){if(!t){t=!0;for(var r=arguments.length,n=new Array(r),i=0;i<r;i++)n[i]=arguments[i];e.apply(this,n)}}}function ZSt(){}function YSt(e){return e.setHeader&&typeof e.abort=="function"}function sSe(e,t,r){if(typeof t=="function")return sSe(e,null,t);t||(t={}),r=XSt(r||ZSt);var n=t.readable||t.readable!==!1&&e.readable,i=t.writable||t.writable!==!1&&e.writable,a=function(){e.writable||s()},o=e._writableState&&e._writableState.finished,s=function(){i=!1,o=!0,n||r.call(e)},l=e._readableState&&e._readableState.endEmitted,u=function(){n=!1,l=!0,i||r.call(e)},c=function(v){r.call(e,v)},f=function(){var v;if(n&&!l)return(!e._readableState||!e._readableState.ended)&&(v=new oSe),r.call(e,v);if(i&&!o)return(!e._writableState||!e._writableState.ended)&&(v=new oSe),r.call(e,v)},h=function(){e.req.on("finish",s)};return YSt(e)?(e.on("complete",s),e.on("abort",f),e.req?h():e.on("request",h)):i&&!e._writableState&&(e.on("end",a),e.on("close",a)),e.on("end",u),e.on("finish",s),t.error!==!1&&e.on("error",c),e.on("close",f),function(){e.removeListener("complete",s),e.removeListener("abort",f),e.removeListener("request",h),e.req&&e.req.removeListener("finish",s),e.removeListener("end",a),e.removeListener("close",a),e.removeListener("finish",s),e.removeListener("end",u),e.removeListener("error",c),e.removeListener("close",f)}}lSe.exports=sSe});var cSe=ye((jhr,uSe)=>{"use strict";var ER;function D_(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}var KSt=MR(),F_=Symbol("lastResolve"),y2=Symbol("lastReject"),tE=Symbol("error"),kR=Symbol("ended"),_2=Symbol("lastPromise"),xj=Symbol("handlePromise"),x2=Symbol("stream");function z_(e,t){return{value:e,done:t}}function JSt(e){var t=e[F_];if(t!==null){var r=e[x2].read();r!==null&&(e[_2]=null,e[F_]=null,e[y2]=null,t(z_(r,!1)))}}function $St(e){process.nextTick(JSt,e)}function QSt(e,t){return function(r,n){e.then(function(){if(t[kR]){r(z_(void 0,!0));return}t[xj](r,n)},n)}}var eMt=Object.getPrototypeOf(function(){}),tMt=Object.setPrototypeOf((ER={get stream(){return this[x2]},next:function(){var t=this,r=this[tE];if(r!==null)return Promise.reject(r);if(this[kR])return Promise.resolve(z_(void 0,!0));if(this[x2].destroyed)return new Promise(function(o,s){process.nextTick(function(){t[tE]?s(t[tE]):o(z_(void 0,!0))})});var n=this[_2],i;if(n)i=new Promise(QSt(n,this));else{var a=this[x2].read();if(a!==null)return Promise.resolve(z_(a,!1));i=new Promise(this[xj])}return this[_2]=i,i}},D_(ER,Symbol.asyncIterator,function(){return this}),D_(ER,"return",function(){var t=this;return new Promise(function(r,n){t[x2].destroy(null,function(i){if(i){n(i);return}r(z_(void 0,!0))})})}),ER),eMt),rMt=function(t){var r,n=Object.create(tMt,(r={},D_(r,x2,{value:t,writable:!0}),D_(r,F_,{value:null,writable:!0}),D_(r,y2,{value:null,writable:!0}),D_(r,tE,{value:null,writable:!0}),D_(r,kR,{value:t._readableState.endEmitted,writable:!0}),D_(r,xj,{value:function(a,o){var s=n[x2].read();s?(n[_2]=null,n[F_]=null,n[y2]=null,a(z_(s,!1))):(n[F_]=a,n[y2]=o)},writable:!0}),r));return n[_2]=null,KSt(t,function(i){if(i&&i.code!=="ERR_STREAM_PREMATURE_CLOSE"){var a=n[y2];a!==null&&(n[_2]=null,n[F_]=null,n[y2]=null,a(i)),n[tE]=i;return}var o=n[F_];o!==null&&(n[_2]=null,n[F_]=null,n[y2]=null,o(z_(void 0,!0))),n[kR]=!0}),t.on("readable",$St.bind(null,n)),n};uSe.exports=rMt});var hSe=ye((Whr,fSe)=>{fSe.exports=function(){throw new Error("Readable.from is not available in the browser")}});var pj=ye((Zhr,wSe)=>{"use strict";wSe.exports=Bu;var vA;Bu.ReadableState=gSe;var Xhr=pb().EventEmitter,pSe=function(t,r){return t.listeners(r).length},iE=fH(),CR=c2().Buffer,iMt=window.Uint8Array||function(){};function nMt(e){return CR.from(e)}function aMt(e){return CR.isBuffer(e)||e instanceof iMt}var bj=nj(),nu;bj&&bj.debuglog?nu=bj.debuglog("stream"):nu=function(){};var oMt=q5e(),kj=sj(),sMt=lj(),lMt=sMt.getHighWaterMark,LR=p2().codes,uMt=LR.ERR_INVALID_ARG_TYPE,cMt=LR.ERR_STREAM_PUSH_AFTER_EOF,fMt=LR.ERR_METHOD_NOT_IMPLEMENTED,hMt=LR.ERR_STREAM_UNSHIFT_AFTER_END_EVENT,pA,wj,Tj;Uy()(Bu,iE);var rE=kj.errorOrDestroy,Aj=["error","close","destroy","pause","resume"];function dMt(e,t,r){if(typeof e.prependListener=="function")return e.prependListener(t,r);!e._events||!e._events[t]?e.on(t,r):Array.isArray(e._events[t])?e._events[t].unshift(r):e._events[t]=[r,e._events[t]]}function gSe(e,t,r){vA=vA||g2(),e=e||{},typeof r!="boolean"&&(r=t instanceof vA),this.objectMode=!!e.objectMode,r&&(this.objectMode=this.objectMode||!!e.readableObjectMode),this.highWaterMark=lMt(this,e,"readableHighWaterMark",r),this.buffer=new oMt,this.length=0,this.pipes=null,this.pipesCount=0,this.flowing=null,this.ended=!1,this.endEmitted=!1,this.reading=!1,this.sync=!0,this.needReadable=!1,this.emittedReadable=!1,this.readableListening=!1,this.resumeScheduled=!1,this.paused=!0,this.emitClose=e.emitClose!==!1,this.autoDestroy=!!e.autoDestroy,this.destroyed=!1,this.defaultEncoding=e.defaultEncoding||"utf8",this.awaitDrain=0,this.readingMore=!1,this.decoder=null,this.encoding=null,e.encoding&&(pA||(pA=_j().StringDecoder),this.decoder=new pA(e.encoding),this.encoding=e.encoding)}function Bu(e){if(vA=vA||g2(),!(this instanceof Bu))return new Bu(e);var t=this instanceof vA;this._readableState=new gSe(e,this,t),this.readable=!0,e&&(typeof e.read=="function"&&(this._read=e.read),typeof e.destroy=="function"&&(this._destroy=e.destroy)),iE.call(this)}Object.defineProperty(Bu.prototype,"destroyed",{enumerable:!1,get:function(){return this._readableState===void 0?!1:this._readableState.destroyed},set:function(t){this._readableState&&(this._readableState.destroyed=t)}});Bu.prototype.destroy=kj.destroy;Bu.prototype._undestroy=kj.undestroy;Bu.prototype._destroy=function(e,t){t(e)};Bu.prototype.push=function(e,t){var r=this._readableState,n;return r.objectMode?n=!0:typeof e=="string"&&(t=t||r.defaultEncoding,t!==r.encoding&&(e=CR.from(e,t),t=""),n=!0),mSe(this,e,t,!1,n)};Bu.prototype.unshift=function(e){return mSe(this,e,null,!0,!1)};function mSe(e,t,r,n,i){nu("readableAddChunk",t);var a=e._readableState;if(t===null)a.reading=!1,gMt(e,a);else{var o;if(i||(o=vMt(a,t)),o)rE(e,o);else if(a.objectMode||t&&t.length>0)if(typeof t!="string"&&!a.objectMode&&Object.getPrototypeOf(t)!==CR.prototype&&(t=nMt(t)),n)a.endEmitted?rE(e,new hMt):Sj(e,a,t,!0);else if(a.ended)rE(e,new cMt);else{if(a.destroyed)return!1;a.reading=!1,a.decoder&&!r?(t=a.decoder.write(t),a.objectMode||t.length!==0?Sj(e,a,t,!1):Ej(e,a)):Sj(e,a,t,!1)}else n||(a.reading=!1,Ej(e,a))}return!a.ended&&(a.length<a.highWaterMark||a.length===0)}function Sj(e,t,r,n){t.flowing&&t.length===0&&!t.sync?(t.awaitDrain=0,e.emit("data",r)):(t.length+=t.objectMode?1:r.length,n?t.buffer.unshift(r):t.buffer.push(r),t.needReadable&&PR(e)),Ej(e,t)}function vMt(e,t){var r;return!aMt(t)&&typeof t!="string"&&t!==void 0&&!e.objectMode&&(r=new uMt("chunk",["string","Buffer","Uint8Array"],t)),r}Bu.prototype.isPaused=function(){return this._readableState.flowing===!1};Bu.prototype.setEncoding=function(e){pA||(pA=_j().StringDecoder);var t=new pA(e);this._readableState.decoder=t,this._readableState.encoding=this._readableState.decoder.encoding;for(var r=this._readableState.buffer.head,n="";r!==null;)n+=t.write(r.data),r=r.next;return this._readableState.buffer.clear(),n!==""&&this._readableState.buffer.push(n),this._readableState.length=n.length,this};var dSe=1073741824;function pMt(e){return e>=dSe?e=dSe:(e--,e|=e>>>1,e|=e>>>2,e|=e>>>4,e|=e>>>8,e|=e>>>16,e++),e}function vSe(e,t){return e<=0||t.length===0&&t.ended?0:t.objectMode?1:e!==e?t.flowing&&t.length?t.buffer.head.data.length:t.length:(e>t.highWaterMark&&(t.highWaterMark=pMt(e)),e<=t.length?e:t.ended?t.length:(t.needReadable=!0,0))}Bu.prototype.read=function(e){nu("read",e),e=parseInt(e,10);var t=this._readableState,r=e;if(e!==0&&(t.emittedReadable=!1),e===0&&t.needReadable&&((t.highWaterMark!==0?t.length>=t.highWaterMark:t.length>0)||t.ended))return nu("read: emitReadable",t.length,t.ended),t.length===0&&t.ended?Mj(this):PR(this),null;if(e=vSe(e,t),e===0&&t.ended)return t.length===0&&Mj(this),null;var n=t.needReadable;nu("need readable",n),(t.length===0||t.length-e<t.highWaterMark)&&(n=!0,nu("length less than watermark",n)),t.ended||t.reading?(n=!1,nu("reading or ended",n)):n&&(nu("do read"),t.reading=!0,t.sync=!0,t.length===0&&(t.needReadable=!0),this._read(t.highWaterMark),t.sync=!1,t.reading||(e=vSe(r,t)));var i;return e>0?i=xSe(e,t):i=null,i===null?(t.needReadable=t.length<=t.highWaterMark,e=0):(t.length-=e,t.awaitDrain=0),t.length===0&&(t.ended||(t.needReadable=!0),r!==e&&t.ended&&Mj(this)),i!==null&&this.emit("data",i),i};function gMt(e,t){if(nu("onEofChunk"),!t.ended){if(t.decoder){var r=t.decoder.end();r&&r.length&&(t.buffer.push(r),t.length+=t.objectMode?1:r.length)}t.ended=!0,t.sync?PR(e):(t.needReadable=!1,t.emittedReadable||(t.emittedReadable=!0,ySe(e)))}}function PR(e){var t=e._readableState;nu("emitReadable",t.needReadable,t.emittedReadable),t.needReadable=!1,t.emittedReadable||(nu("emitReadable",t.flowing),t.emittedReadable=!0,process.nextTick(ySe,e))}function ySe(e){var t=e._readableState;nu("emitReadable_",t.destroyed,t.length,t.ended),!t.destroyed&&(t.length||t.ended)&&(e.emit("readable"),t.emittedReadable=!1),t.needReadable=!t.flowing&&!t.ended&&t.length<=t.highWaterMark,Cj(e)}function Ej(e,t){t.readingMore||(t.readingMore=!0,process.nextTick(mMt,e,t))}function mMt(e,t){for(;!t.reading&&!t.ended&&(t.length<t.highWaterMark||t.flowing&&t.length===0);){var r=t.length;if(nu("maybeReadMore read 0"),e.read(0),r===t.length)break}t.readingMore=!1}Bu.prototype._read=function(e){rE(this,new fMt("_read()"))};Bu.prototype.pipe=function(e,t){var r=this,n=this._readableState;switch(n.pipesCount){case 0:n.pipes=e;break;case 1:n.pipes=[n.pipes,e];break;default:n.pipes.push(e);break}n.pipesCount+=1,nu("pipe count=%d opts=%j",n.pipesCount,t);var i=(!t||t.end!==!1)&&e!==process.stdout&&e!==process.stderr,a=i?s:_;n.endEmitted?process.nextTick(a):r.once("end",a),e.on("unpipe",o);function o(b,p){nu("onunpipe"),b===r&&p&&p.hasUnpiped===!1&&(p.hasUnpiped=!0,c())}function s(){nu("onend"),e.end()}var l=yMt(r);e.on("drain",l);var u=!1;function c(){nu("cleanup"),e.removeListener("close",d),e.removeListener("finish",v),e.removeListener("drain",l),e.removeListener("error",h),e.removeListener("unpipe",o),r.removeListener("end",s),r.removeListener("end",_),r.removeListener("data",f),u=!0,n.awaitDrain&&(!e._writableState||e._writableState.needDrain)&&l()}r.on("data",f);function f(b){nu("ondata");var p=e.write(b);nu("dest.write",p),p===!1&&((n.pipesCount===1&&n.pipes===e||n.pipesCount>1&&bSe(n.pipes,e)!==-1)&&!u&&(nu("false write response, pause",n.awaitDrain),n.awaitDrain++),r.pause())}function h(b){nu("onerror",b),_(),e.removeListener("error",h),pSe(e,"error")===0&&rE(e,b)}dMt(e,"error",h);function d(){e.removeListener("finish",v),_()}e.once("close",d);function v(){nu("onfinish"),e.removeListener("close",d),_()}e.once("finish",v);function _(){nu("unpipe"),r.unpipe(e)}return e.emit("pipe",r),n.flowing||(nu("pipe resume"),r.resume()),e};function yMt(e){return function(){var r=e._readableState;nu("pipeOnDrain",r.awaitDrain),r.awaitDrain&&r.awaitDrain--,r.awaitDrain===0&&pSe(e,"data")&&(r.flowing=!0,Cj(e))}}Bu.prototype.unpipe=function(e){var t=this._readableState,r={hasUnpiped:!1};if(t.pipesCount===0)return this;if(t.pipesCount===1)return e&&e!==t.pipes?this:(e||(e=t.pipes),t.pipes=null,t.pipesCount=0,t.flowing=!1,e&&e.emit("unpipe",this,r),this);if(!e){var n=t.pipes,i=t.pipesCount;t.pipes=null,t.pipesCount=0,t.flowing=!1;for(var a=0;a<i;a++)n[a].emit("unpipe",this,{hasUnpiped:!1});return this}var o=bSe(t.pipes,e);return o===-1?this:(t.pipes.splice(o,1),t.pipesCount-=1,t.pipesCount===1&&(t.pipes=t.pipes[0]),e.emit("unpipe",this,r),this)};Bu.prototype.on=function(e,t){var r=iE.prototype.on.call(this,e,t),n=this._readableState;return e==="data"?(n.readableListening=this.listenerCount("readable")>0,n.flowing!==!1&&this.resume()):e==="readable"&&!n.endEmitted&&!n.readableListening&&(n.readableListening=n.needReadable=!0,n.flowing=!1,n.emittedReadable=!1,nu("on readable",n.length,n.reading),n.length?PR(this):n.reading||process.nextTick(_Mt,this)),r};Bu.prototype.addListener=Bu.prototype.on;Bu.prototype.removeListener=function(e,t){var r=iE.prototype.removeListener.call(this,e,t);return e==="readable"&&process.nextTick(_Se,this),r};Bu.prototype.removeAllListeners=function(e){var t=iE.prototype.removeAllListeners.apply(this,arguments);return(e==="readable"||e===void 0)&&process.nextTick(_Se,this),t};function _Se(e){var t=e._readableState;t.readableListening=e.listenerCount("readable")>0,t.resumeScheduled&&!t.paused?t.flowing=!0:e.listenerCount("data")>0&&e.resume()}function _Mt(e){nu("readable nexttick read 0"),e.read(0)}Bu.prototype.resume=function(){var e=this._readableState;return e.flowing||(nu("resume"),e.flowing=!e.readableListening,xMt(this,e)),e.paused=!1,this};function xMt(e,t){t.resumeScheduled||(t.resumeScheduled=!0,process.nextTick(bMt,e,t))}function bMt(e,t){nu("resume",t.reading),t.reading||e.read(0),t.resumeScheduled=!1,e.emit("resume"),Cj(e),t.flowing&&!t.reading&&e.read(0)}Bu.prototype.pause=function(){return nu("call pause flowing=%j",this._readableState.flowing),this._readableState.flowing!==!1&&(nu("pause"),this._readableState.flowing=!1,this.emit("pause")),this._readableState.paused=!0,this};function Cj(e){var t=e._readableState;for(nu("flow",t.flowing);t.flowing&&e.read()!==null;);}Bu.prototype.wrap=function(e){var t=this,r=this._readableState,n=!1;e.on("end",function(){if(nu("wrapped end"),r.decoder&&!r.ended){var o=r.decoder.end();o&&o.length&&t.push(o)}t.push(null)}),e.on("data",function(o){if(nu("wrapped data"),r.decoder&&(o=r.decoder.write(o)),!(r.objectMode&&o==null)&&!(!r.objectMode&&(!o||!o.length))){var s=t.push(o);s||(n=!0,e.pause())}});for(var i in e)this[i]===void 0&&typeof e[i]=="function"&&(this[i]=function(s){return function(){return e[s].apply(e,arguments)}}(i));for(var a=0;a<Aj.length;a++)e.on(Aj[a],this.emit.bind(this,Aj[a]));return this._read=function(o){nu("wrapped _read",o),n&&(n=!1,e.resume())},this};typeof Symbol=="function"&&(Bu.prototype[Symbol.asyncIterator]=function(){return wj===void 0&&(wj=cSe()),wj(this)});Object.defineProperty(Bu.prototype,"readableHighWaterMark",{enumerable:!1,get:function(){return this._readableState.highWaterMark}});Object.defineProperty(Bu.prototype,"readableBuffer",{enumerable:!1,get:function(){return this._readableState&&this._readableState.buffer}});Object.defineProperty(Bu.prototype,"readableFlowing",{enumerable:!1,get:function(){return this._readableState.flowing},set:function(t){this._readableState&&(this._readableState.flowing=t)}});Bu._fromList=xSe;Object.defineProperty(Bu.prototype,"readableLength",{enumerable:!1,get:function(){return this._readableState.length}});function xSe(e,t){if(t.length===0)return null;var r;return t.objectMode?r=t.buffer.shift():!e||e>=t.length?(t.decoder?r=t.buffer.join(""):t.buffer.length===1?r=t.buffer.first():r=t.buffer.concat(t.length),t.buffer.clear()):r=t.buffer.consume(e,t.decoder),r}function Mj(e){var t=e._readableState;nu("endReadable",t.endEmitted),t.endEmitted||(t.ended=!0,process.nextTick(wMt,t,e))}function wMt(e,t){if(nu("endReadableNT",e.endEmitted,e.length),!e.endEmitted&&e.length===0&&(e.endEmitted=!0,t.readable=!1,t.emit("end"),e.autoDestroy)){var r=t._writableState;(!r||r.autoDestroy&&r.finished)&&t.destroy()}}typeof Symbol=="function"&&(Bu.from=function(e,t){return Tj===void 0&&(Tj=hSe()),Tj(Bu,e,t)});function bSe(e,t){for(var r=0,n=e.length;r<n;r++)if(e[r]===t)return r;return-1}});var Lj=ye((Yhr,ASe)=>{"use strict";ASe.exports=Gy;var IR=p2().codes,TMt=IR.ERR_METHOD_NOT_IMPLEMENTED,AMt=IR.ERR_MULTIPLE_CALLBACK,SMt=IR.ERR_TRANSFORM_ALREADY_TRANSFORMING,MMt=IR.ERR_TRANSFORM_WITH_LENGTH_0,RR=g2();Uy()(Gy,RR);function EMt(e,t){var r=this._transformState;r.transforming=!1;var n=r.writecb;if(n===null)return this.emit("error",new AMt);r.writechunk=null,r.writecb=null,t!=null&&this.push(t),n(e);var i=this._readableState;i.reading=!1,(i.needReadable||i.length<i.highWaterMark)&&this._read(i.highWaterMark)}function Gy(e){if(!(this instanceof Gy))return new Gy(e);RR.call(this,e),this._transformState={afterTransform:EMt.bind(this),needTransform:!1,transforming:!1,writecb:null,writechunk:null,writeencoding:null},this._readableState.needReadable=!0,this._readableState.sync=!1,e&&(typeof e.transform=="function"&&(this._transform=e.transform),typeof e.flush=="function"&&(this._flush=e.flush)),this.on("prefinish",kMt)}function kMt(){var e=this;typeof this._flush=="function"&&!this._readableState.destroyed?this._flush(function(t,r){TSe(e,t,r)}):TSe(this,null,null)}Gy.prototype.push=function(e,t){return this._transformState.needTransform=!1,RR.prototype.push.call(this,e,t)};Gy.prototype._transform=function(e,t,r){r(new TMt("_transform()"))};Gy.prototype._write=function(e,t,r){var n=this._transformState;if(n.writecb=r,n.writechunk=e,n.writeencoding=t,!n.transforming){var i=this._readableState;(n.needTransform||i.needReadable||i.length<i.highWaterMark)&&this._read(i.highWaterMark)}};Gy.prototype._read=function(e){var t=this._transformState;t.writechunk!==null&&!t.transforming?(t.transforming=!0,this._transform(t.writechunk,t.writeencoding,t.afterTransform)):t.needTransform=!0};Gy.prototype._destroy=function(e,t){RR.prototype._destroy.call(this,e,function(r){t(r)})};function TSe(e,t,r){if(t)return e.emit("error",t);if(r!=null&&e.push(r),e._writableState.length)throw new MMt;if(e._transformState.transforming)throw new SMt;return e.push(null)}});var ESe=ye((Khr,MSe)=>{"use strict";MSe.exports=nE;var SSe=Lj();Uy()(nE,SSe);function nE(e){if(!(this instanceof nE))return new nE(e);SSe.call(this,e)}nE.prototype._transform=function(e,t,r){r(null,e)}});var ISe=ye((Jhr,PSe)=>{"use strict";var Pj;function CMt(e){var t=!1;return function(){t||(t=!0,e.apply(void 0,arguments))}}var LSe=p2().codes,LMt=LSe.ERR_MISSING_ARGS,PMt=LSe.ERR_STREAM_DESTROYED;function kSe(e){if(e)throw e}function IMt(e){return e.setHeader&&typeof e.abort=="function"}function RMt(e,t,r,n){n=CMt(n);var i=!1;e.on("close",function(){i=!0}),Pj===void 0&&(Pj=MR()),Pj(e,{readable:t,writable:r},function(o){if(o)return n(o);i=!0,n()});var a=!1;return function(o){if(!i&&!a){if(a=!0,IMt(e))return e.abort();if(typeof e.destroy=="function")return e.destroy();n(o||new PMt("pipe"))}}}function CSe(e){e()}function DMt(e,t){return e.pipe(t)}function FMt(e){return!e.length||typeof e[e.length-1]!="function"?kSe:e.pop()}function zMt(){for(var e=arguments.length,t=new Array(e),r=0;r<e;r++)t[r]=arguments[r];var n=FMt(t);if(Array.isArray(t[0])&&(t=t[0]),t.length<2)throw new LMt("streams");var i,a=t.map(function(o,s){var l=s<t.length-1,u=s>0;return RMt(o,l,u,function(c){i||(i=c),c&&a.forEach(CSe),!l&&(a.forEach(CSe),n(i))})});return t.reduce(DMt)}PSe.exports=zMt});var DSe=ye(($hr,RSe)=>{RSe.exports=K0;var Ij=pb().EventEmitter,OMt=Uy();OMt(K0,Ij);K0.Readable=pj();K0.Writable=hj();K0.Duplex=g2();K0.Transform=Lj();K0.PassThrough=ESe();K0.finished=MR();K0.pipeline=ISe();K0.Stream=K0;function K0(){Ij.call(this)}K0.prototype.pipe=function(e,t){var r=this;function n(c){e.writable&&e.write(c)===!1&&r.pause&&r.pause()}r.on("data",n);function i(){r.readable&&r.resume&&r.resume()}e.on("drain",i),!e._isStdio&&(!t||t.end!==!1)&&(r.on("end",o),r.on("close",s));var a=!1;function o(){a||(a=!0,e.end())}function s(){a||(a=!0,typeof e.destroy=="function"&&e.destroy())}function l(c){if(u(),Ij.listenerCount(this,"error")===0)throw c}r.on("error",l),e.on("error",l);function u(){r.removeListener("data",n),e.removeListener("drain",i),r.removeListener("end",o),r.removeListener("close",s),r.removeListener("error",l),e.removeListener("error",l),r.removeListener("end",u),r.removeListener("close",u),e.removeListener("close",u)}return r.on("end",u),r.on("close",u),e.on("close",u),e.emit("pipe",r),e}});var mA=ye(fu=>{var FSe=Object.getOwnPropertyDescriptors||function(t){for(var r=Object.keys(t),n={},i=0;i<r.length;i++)n[r[i]]=Object.getOwnPropertyDescriptor(t,r[i]);return n},qMt=/%[sdj%]/g;fu.format=function(e){if(!NR(e)){for(var t=[],r=0;r<arguments.length;r++)t.push(O_(arguments[r]));return t.join(" ")}for(var r=1,n=arguments,i=n.length,a=String(e).replace(qMt,function(s){if(s==="%%")return"%";if(r>=i)return s;switch(s){case"%s":return String(n[r++]);case"%d":return Number(n[r++]);case"%j":try{return JSON.stringify(n[r++])}catch(l){return"[Circular]"}default:return s}}),o=n[r];r<i;o=n[++r])BR(o)||!gA(o)?a+=" "+o:a+=" "+O_(o);return a};fu.deprecate=function(e,t){if(typeof process!="undefined"&&process.noDeprecation===!0)return e;if(typeof process=="undefined")return function(){return fu.deprecate(e,t).apply(this,arguments)};var r=!1;function n(){if(!r){if(process.throwDeprecation)throw new Error(t);process.traceDeprecation?console.trace(t):console.error(t),r=!0}return e.apply(this,arguments)}return n};var DR={},zSe=/^$/;FR="false",FR=FR.replace(/[|\\{}()[\]^$+?.]/g,"\\$&").replace(/\*/g,".*").replace(/,/g,"$|^").toUpperCase(),zSe=new RegExp("^"+FR+"$","i");var FR;fu.debuglog=function(e){if(e=e.toUpperCase(),!DR[e])if(zSe.test(e)){var t=process.pid;DR[e]=function(){var r=fu.format.apply(fu,arguments);console.error("%s %d: %s",e,t,r)}}else DR[e]=function(){};return DR[e]};function O_(e,t){var r={seen:[],stylize:NMt};return arguments.length>=3&&(r.depth=arguments[2]),arguments.length>=4&&(r.colors=arguments[3]),zj(t)?r.showHidden=t:t&&fu._extend(r,t),w2(r.showHidden)&&(r.showHidden=!1),w2(r.depth)&&(r.depth=2),w2(r.colors)&&(r.colors=!1),w2(r.customInspect)&&(r.customInspect=!0),r.colors&&(r.stylize=BMt),OR(r,e,r.depth)}fu.inspect=O_;O_.colors={bold:[1,22],italic:[3,23],underline:[4,24],inverse:[7,27],white:[37,39],grey:[90,39],black:[30,39],blue:[34,39],cyan:[36,39],green:[32,39],magenta:[35,39],red:[31,39],yellow:[33,39]};O_.styles={special:"cyan",number:"yellow",boolean:"yellow",undefined:"grey",null:"bold",string:"green",date:"magenta",regexp:"red"};function BMt(e,t){var r=O_.styles[t];return r?"\x1B["+O_.colors[r][0]+"m"+e+"\x1B["+O_.colors[r][1]+"m":e}function NMt(e,t){return e}function UMt(e){var t={};return e.forEach(function(r,n){t[r]=!0}),t}function OR(e,t,r){if(e.customInspect&&t&&zR(t.inspect)&&t.inspect!==fu.inspect&&!(t.constructor&&t.constructor.prototype===t)){var n=t.inspect(r,e);return NR(n)||(n=OR(e,n,r)),n}var i=VMt(e,t);if(i)return i;var a=Object.keys(t),o=UMt(a);if(e.showHidden&&(a=Object.getOwnPropertyNames(t)),oE(t)&&(a.indexOf("message")>=0||a.indexOf("description")>=0))return Rj(t);if(a.length===0){if(zR(t)){var s=t.name?": "+t.name:"";return e.stylize("[Function"+s+"]","special")}if(aE(t))return e.stylize(RegExp.prototype.toString.call(t),"regexp");if(qR(t))return e.stylize(Date.prototype.toString.call(t),"date");if(oE(t))return Rj(t)}var l="",u=!1,c=["{","}"];if(OSe(t)&&(u=!0,c=["[","]"]),zR(t)){var f=t.name?": "+t.name:"";l=" [Function"+f+"]"}if(aE(t)&&(l=" "+RegExp.prototype.toString.call(t)),qR(t)&&(l=" "+Date.prototype.toUTCString.call(t)),oE(t)&&(l=" "+Rj(t)),a.length===0&&(!u||t.length==0))return c[0]+l+c[1];if(r<0)return aE(t)?e.stylize(RegExp.prototype.toString.call(t),"regexp"):e.stylize("[Object]","special");e.seen.push(t);var h;return u?h=GMt(e,t,r,o,a):h=a.map(function(d){return Fj(e,t,r,o,d,u)}),e.seen.pop(),HMt(h,l,c)}function VMt(e,t){if(w2(t))return e.stylize("undefined","undefined");if(NR(t)){var r="'"+JSON.stringify(t).replace(/^"|"$/g,"").replace(/'/g,"\\'").replace(/\\"/g,'"')+"'";return e.stylize(r,"string")}if(qSe(t))return e.stylize(""+t,"number");if(zj(t))return e.stylize(""+t,"boolean");if(BR(t))return e.stylize("null","null")}function Rj(e){return"["+Error.prototype.toString.call(e)+"]"}function GMt(e,t,r,n,i){for(var a=[],o=0,s=t.length;o<s;++o)BSe(t,String(o))?a.push(Fj(e,t,r,n,String(o),!0)):a.push("");return i.forEach(function(l){l.match(/^\d+$/)||a.push(Fj(e,t,r,n,l,!0))}),a}function Fj(e,t,r,n,i,a){var o,s,l;if(l=Object.getOwnPropertyDescriptor(t,i)||{value:t[i]},l.get?l.set?s=e.stylize("[Getter/Setter]","special"):s=e.stylize("[Getter]","special"):l.set&&(s=e.stylize("[Setter]","special")),BSe(n,i)||(o="["+i+"]"),s||(e.seen.indexOf(l.value)<0?(BR(r)?s=OR(e,l.value,null):s=OR(e,l.value,r-1),s.indexOf(`
+`)>-1&&(a?s=s.split(`
+`).map(function(u){return"  "+u}).join(`
+`).slice(2):s=`
+`+s.split(`
+`).map(function(u){return"   "+u}).join(`
+`))):s=e.stylize("[Circular]","special")),w2(o)){if(a&&i.match(/^\d+$/))return s;o=JSON.stringify(""+i),o.match(/^"([a-zA-Z_][a-zA-Z_0-9]*)"$/)?(o=o.slice(1,-1),o=e.stylize(o,"name")):(o=o.replace(/'/g,"\\'").replace(/\\"/g,'"').replace(/(^"|"$)/g,"'"),o=e.stylize(o,"string"))}return o+": "+s}function HMt(e,t,r){var n=0,i=e.reduce(function(a,o){return n++,o.indexOf(`
+`)>=0&&n++,a+o.replace(/\u001b\[\d\d?m/g,"").length+1},0);return i>60?r[0]+(t===""?"":t+`
+ `)+" "+e.join(`,
+  `)+" "+r[1]:r[0]+t+" "+e.join(", ")+" "+r[1]}fu.types=JH();function OSe(e){return Array.isArray(e)}fu.isArray=OSe;function zj(e){return typeof e=="boolean"}fu.isBoolean=zj;function BR(e){return e===null}fu.isNull=BR;function jMt(e){return e==null}fu.isNullOrUndefined=jMt;function qSe(e){return typeof e=="number"}fu.isNumber=qSe;function NR(e){return typeof e=="string"}fu.isString=NR;function WMt(e){return typeof e=="symbol"}fu.isSymbol=WMt;function w2(e){return e===void 0}fu.isUndefined=w2;function aE(e){return gA(e)&&Oj(e)==="[object RegExp]"}fu.isRegExp=aE;fu.types.isRegExp=aE;function gA(e){return typeof e=="object"&&e!==null}fu.isObject=gA;function qR(e){return gA(e)&&Oj(e)==="[object Date]"}fu.isDate=qR;fu.types.isDate=qR;function oE(e){return gA(e)&&(Oj(e)==="[object Error]"||e instanceof Error)}fu.isError=oE;fu.types.isNativeError=oE;function zR(e){return typeof e=="function"}fu.isFunction=zR;function XMt(e){return e===null||typeof e=="boolean"||typeof e=="number"||typeof e=="string"||typeof e=="symbol"||typeof e=="undefined"}fu.isPrimitive=XMt;fu.isBuffer=$H();function Oj(e){return Object.prototype.toString.call(e)}function Dj(e){return e<10?"0"+e.toString(10):e.toString(10)}var ZMt=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"];function YMt(){var e=new Date,t=[Dj(e.getHours()),Dj(e.getMinutes()),Dj(e.getSeconds())].join(":");return[e.getDate(),ZMt[e.getMonth()],t].join(" ")}fu.log=function(){console.log("%s - %s",YMt(),fu.format.apply(fu,arguments))};fu.inherits=Uy();fu._extend=function(e,t){if(!t||!gA(t))return e;for(var r=Object.keys(t),n=r.length;n--;)e[r[n]]=t[r[n]];return e};function BSe(e,t){return Object.prototype.hasOwnProperty.call(e,t)}var b2=typeof Symbol!="undefined"?Symbol("util.promisify.custom"):void 0;fu.promisify=function(t){if(typeof t!="function")throw new TypeError('The "original" argument must be of type Function');if(b2&&t[b2]){var r=t[b2];if(typeof r!="function")throw new TypeError('The "util.promisify.custom" argument must be of type Function');return Object.defineProperty(r,b2,{value:r,enumerable:!1,writable:!1,configurable:!0}),r}function r(){for(var n,i,a=new Promise(function(l,u){n=l,i=u}),o=[],s=0;s<arguments.length;s++)o.push(arguments[s]);o.push(function(l,u){l?i(l):n(u)});try{t.apply(this,o)}catch(l){i(l)}return a}return Object.setPrototypeOf(r,Object.getPrototypeOf(t)),b2&&Object.defineProperty(r,b2,{value:r,enumerable:!1,writable:!1,configurable:!0}),Object.defineProperties(r,FSe(t))};fu.promisify.custom=b2;function KMt(e,t){if(!e){var r=new Error("Promise was rejected with a falsy value");r.reason=e,e=r}return t(e)}function JMt(e){if(typeof e!="function")throw new TypeError('The "original" argument must be of type Function');function t(){for(var r=[],n=0;n<arguments.length;n++)r.push(arguments[n]);var i=r.pop();if(typeof i!="function")throw new TypeError("The last argument must be of type Function");var a=this,o=function(){return i.apply(a,arguments)};e.apply(this,r).then(function(s){process.nextTick(o.bind(null,null,s))},function(s){process.nextTick(KMt.bind(null,s,o))})}return Object.setPrototypeOf(t,Object.getPrototypeOf(e)),Object.defineProperties(t,FSe(e)),t}fu.callbackify=JMt});var Nj=ye((edr,GSe)=>{"use strict";function q_(e){"@babel/helpers - typeof";return q_=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},q_(e)}function NSe(e,t){for(var r=0;r<t.length;r++){var n=t[r];n.enumerable=n.enumerable||!1,n.configurable=!0,"value"in n&&(n.writable=!0),Object.defineProperty(e,QMt(n.key),n)}}function $Mt(e,t,r){return t&&NSe(e.prototype,t),r&&NSe(e,r),Object.defineProperty(e,"prototype",{writable:!1}),e}function QMt(e){var t=e4t(e,"string");return q_(t)==="symbol"?t:String(t)}function e4t(e,t){if(q_(e)!=="object"||e===null)return e;var r=e[Symbol.toPrimitive];if(r!==void 0){var n=r.call(e,t||"default");if(q_(n)!=="object")return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return(t==="string"?String:Number)(e)}function t4t(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function r4t(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Super expression must either be null or a function");e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,writable:!0,configurable:!0}}),Object.defineProperty(e,"prototype",{writable:!1}),t&&Bj(e,t)}function Bj(e,t){return Bj=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(n,i){return n.__proto__=i,n},Bj(e,t)}function i4t(e){var t=o4t();return function(){var n=UR(e),i;if(t){var a=UR(this).constructor;i=Reflect.construct(n,arguments,a)}else i=n.apply(this,arguments);return n4t(this,i)}}function n4t(e,t){if(t&&(q_(t)==="object"||typeof t=="function"))return t;if(t!==void 0)throw new TypeError("Derived constructors may only return object or undefined");return a4t(e)}function a4t(e){if(e===void 0)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function o4t(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}function UR(e){return UR=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(r){return r.__proto__||Object.getPrototypeOf(r)},UR(e)}var VSe={},yA,qj;function sE(e,t,r){r||(r=Error);function n(a,o,s){return typeof t=="string"?t:t(a,o,s)}var i=function(a){r4t(s,a);var o=i4t(s);function s(l,u,c){var f;return t4t(this,s),f=o.call(this,n(l,u,c)),f.code=e,f}return $Mt(s)}(r);VSe[e]=i}function USe(e,t){if(Array.isArray(e)){var r=e.length;return e=e.map(function(n){return String(n)}),r>2?"one of ".concat(t," ").concat(e.slice(0,r-1).join(", "),", or ")+e[r-1]:r===2?"one of ".concat(t," ").concat(e[0]," or ").concat(e[1]):"of ".concat(t," ").concat(e[0])}else return"of ".concat(t," ").concat(String(e))}function s4t(e,t,r){return e.substr(!r||r<0?0:+r,t.length)===t}function l4t(e,t,r){return(r===void 0||r>e.length)&&(r=e.length),e.substring(r-t.length,r)===t}function u4t(e,t,r){return typeof r!="number"&&(r=0),r+t.length>e.length?!1:e.indexOf(t,r)!==-1}sE("ERR_AMBIGUOUS_ARGUMENT",'The "%s" argument is ambiguous. %s',TypeError);sE("ERR_INVALID_ARG_TYPE",function(e,t,r){yA===void 0&&(yA=lE()),yA(typeof e=="string","'name' must be a string");var n;typeof t=="string"&&s4t(t,"not ")?(n="must not be",t=t.replace(/^not /,"")):n="must be";var i;if(l4t(e," argument"))i="The ".concat(e," ").concat(n," ").concat(USe(t,"type"));else{var a=u4t(e,".")?"property":"argument";i='The "'.concat(e,'" ').concat(a," ").concat(n," ").concat(USe(t,"type"))}return i+=". Received type ".concat(q_(r)),i},TypeError);sE("ERR_INVALID_ARG_VALUE",function(e,t){var r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:"is invalid";qj===void 0&&(qj=mA());var n=qj.inspect(t);return n.length>128&&(n="".concat(n.slice(0,128),"...")),"The argument '".concat(e,"' ").concat(r,". Received ").concat(n)},TypeError,RangeError);sE("ERR_INVALID_RETURN_VALUE",function(e,t,r){var n;return r&&r.constructor&&r.constructor.name?n="instance of ".concat(r.constructor.name):n="type ".concat(q_(r)),"Expected ".concat(e,' to be returned from the "').concat(t,'"')+" function but got ".concat(n,".")},TypeError);sE("ERR_MISSING_ARGS",function(){for(var e=arguments.length,t=new Array(e),r=0;r<e;r++)t[r]=arguments[r];yA===void 0&&(yA=lE()),yA(t.length>0,"At least one arg needs to be specified");var n="The ",i=t.length;switch(t=t.map(function(a){return'"'.concat(a,'"')}),i){case 1:n+="".concat(t[0]," argument");break;case 2:n+="".concat(t[0]," and ").concat(t[1]," arguments");break;default:n+=t.slice(0,i-1).join(", "),n+=", and ".concat(t[i-1]," arguments");break}return"".concat(n," must be specified")},TypeError);GSe.exports.codes=VSe});var QSe=ye((tdr,$Se)=>{"use strict";function HSe(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter(function(i){return Object.getOwnPropertyDescriptor(e,i).enumerable})),r.push.apply(r,n)}return r}function jSe(e){for(var t=1;t<arguments.length;t++){var r=arguments[t]!=null?arguments[t]:{};t%2?HSe(Object(r),!0).forEach(function(n){c4t(e,n,r[n])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(r)):HSe(Object(r)).forEach(function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(r,n))})}return e}function c4t(e,t,r){return t=YSe(t),t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function f4t(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function WSe(e,t){for(var r=0;r<t.length;r++){var n=t[r];n.enumerable=n.enumerable||!1,n.configurable=!0,"value"in n&&(n.writable=!0),Object.defineProperty(e,YSe(n.key),n)}}function h4t(e,t,r){return t&&WSe(e.prototype,t),r&&WSe(e,r),Object.defineProperty(e,"prototype",{writable:!1}),e}function YSe(e){var t=d4t(e,"string");return Dp(t)==="symbol"?t:String(t)}function d4t(e,t){if(Dp(e)!=="object"||e===null)return e;var r=e[Symbol.toPrimitive];if(r!==void 0){var n=r.call(e,t||"default");if(Dp(n)!=="object")return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return(t==="string"?String:Number)(e)}function v4t(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Super expression must either be null or a function");e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,writable:!0,configurable:!0}}),Object.defineProperty(e,"prototype",{writable:!1}),t&&hE(e,t)}function p4t(e){var t=JSe();return function(){var n=dE(e),i;if(t){var a=dE(this).constructor;i=Reflect.construct(n,arguments,a)}else i=n.apply(this,arguments);return KSe(this,i)}}function KSe(e,t){if(t&&(Dp(t)==="object"||typeof t=="function"))return t;if(t!==void 0)throw new TypeError("Derived constructors may only return object or undefined");return Uj(e)}function Uj(e){if(e===void 0)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return e}function Vj(e){var t=typeof Map=="function"?new Map:void 0;return Vj=function(n){if(n===null||!g4t(n))return n;if(typeof n!="function")throw new TypeError("Super expression must either be null or a function");if(typeof t!="undefined"){if(t.has(n))return t.get(n);t.set(n,i)}function i(){return VR(n,arguments,dE(this).constructor)}return i.prototype=Object.create(n.prototype,{constructor:{value:i,enumerable:!1,writable:!0,configurable:!0}}),hE(i,n)},Vj(e)}function VR(e,t,r){return JSe()?VR=Reflect.construct.bind():VR=function(i,a,o){var s=[null];s.push.apply(s,a);var l=Function.bind.apply(i,s),u=new l;return o&&hE(u,o.prototype),u},VR.apply(null,arguments)}function JSe(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}function g4t(e){return Function.toString.call(e).indexOf("[native code]")!==-1}function hE(e,t){return hE=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(n,i){return n.__proto__=i,n},hE(e,t)}function dE(e){return dE=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(r){return r.__proto__||Object.getPrototypeOf(r)},dE(e)}function Dp(e){"@babel/helpers - typeof";return Dp=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},Dp(e)}var m4t=mA(),Gj=m4t.inspect,y4t=Nj(),_4t=y4t.codes.ERR_INVALID_ARG_TYPE;function XSe(e,t,r){return(r===void 0||r>e.length)&&(r=e.length),e.substring(r-t.length,r)===t}function x4t(e,t){if(t=Math.floor(t),e.length==0||t==0)return"";var r=e.length*t;for(t=Math.floor(Math.log(t)/Math.log(2));t;)e+=e,t--;return e+=e.substring(0,r-e.length),e}var jg="",uE="",cE="",Tv="",T2={deepStrictEqual:"Expected values to be strictly deep-equal:",strictEqual:"Expected values to be strictly equal:",strictEqualObject:'Expected "actual" to be reference-equal to "expected":',deepEqual:"Expected values to be loosely deep-equal:",equal:"Expected values to be loosely equal:",notDeepStrictEqual:'Expected "actual" not to be strictly deep-equal to:',notStrictEqual:'Expected "actual" to be strictly unequal to:',notStrictEqualObject:'Expected "actual" not to be reference-equal to "expected":',notDeepEqual:'Expected "actual" not to be loosely deep-equal to:',notEqual:'Expected "actual" to be loosely unequal to:',notIdentical:"Values identical but not reference-equal:"},b4t=10;function ZSe(e){var t=Object.keys(e),r=Object.create(Object.getPrototypeOf(e));return t.forEach(function(n){r[n]=e[n]}),Object.defineProperty(r,"message",{value:e.message}),r}function fE(e){return Gj(e,{compact:!1,customInspect:!1,depth:1e3,maxArrayLength:1/0,showHidden:!1,breakLength:1/0,showProxy:!1,sorted:!0,getters:!0})}function w4t(e,t,r){var n="",i="",a=0,o="",s=!1,l=fE(e),u=l.split(`
+`),c=fE(t).split(`
+`),f=0,h="";if(r==="strictEqual"&&Dp(e)==="object"&&Dp(t)==="object"&&e!==null&&t!==null&&(r="strictEqualObject"),u.length===1&&c.length===1&&u[0]!==c[0]){var d=u[0].length+c[0].length;if(d<=b4t){if((Dp(e)!=="object"||e===null)&&(Dp(t)!=="object"||t===null)&&(e!==0||t!==0))return"".concat(T2[r],`
+
+`)+"".concat(u[0]," !== ").concat(c[0],`
+`)}else if(r!=="strictEqualObject"){var v=process.stderr&&process.stderr.isTTY?process.stderr.columns:80;if(d<v){for(;u[0][f]===c[0][f];)f++;f>2&&(h=`
+  `.concat(x4t(" ",f),"^"),f=0)}}}for(var _=u[u.length-1],b=c[c.length-1];_===b&&(f++<2?o=`
+  `.concat(_).concat(o):n=_,u.pop(),c.pop(),!(u.length===0||c.length===0));)_=u[u.length-1],b=c[c.length-1];var p=Math.max(u.length,c.length);if(p===0){var k=l.split(`
+`);if(k.length>30)for(k[26]="".concat(jg,"...").concat(Tv);k.length>27;)k.pop();return"".concat(T2.notIdentical,`
+
+`).concat(k.join(`
+`),`
+`)}f>3&&(o=`
+`.concat(jg,"...").concat(Tv).concat(o),s=!0),n!==""&&(o=`
+  `.concat(n).concat(o),n="");var E=0,S=T2[r]+`
+`.concat(uE,"+ actual").concat(Tv," ").concat(cE,"- expected").concat(Tv),L=" ".concat(jg,"...").concat(Tv," Lines skipped");for(f=0;f<p;f++){var x=f-a;if(u.length<f+1)x>1&&f>2&&(x>4?(i+=`
+`.concat(jg,"...").concat(Tv),s=!0):x>3&&(i+=`
+  `.concat(c[f-2]),E++),i+=`
+  `.concat(c[f-1]),E++),a=f,n+=`
+`.concat(cE,"-").concat(Tv," ").concat(c[f]),E++;else if(c.length<f+1)x>1&&f>2&&(x>4?(i+=`
+`.concat(jg,"...").concat(Tv),s=!0):x>3&&(i+=`
+  `.concat(u[f-2]),E++),i+=`
+  `.concat(u[f-1]),E++),a=f,i+=`
+`.concat(uE,"+").concat(Tv," ").concat(u[f]),E++;else{var C=c[f],M=u[f],g=M!==C&&(!XSe(M,",")||M.slice(0,-1)!==C);g&&XSe(C,",")&&C.slice(0,-1)===M&&(g=!1,M+=","),g?(x>1&&f>2&&(x>4?(i+=`
+`.concat(jg,"...").concat(Tv),s=!0):x>3&&(i+=`
+  `.concat(u[f-2]),E++),i+=`
+  `.concat(u[f-1]),E++),a=f,i+=`
+`.concat(uE,"+").concat(Tv," ").concat(M),n+=`
+`.concat(cE,"-").concat(Tv," ").concat(C),E+=2):(i+=n,n="",(x===1||f===0)&&(i+=`
+  `.concat(M),E++))}if(E>20&&f<p-2)return"".concat(S).concat(L,`
+`).concat(i,`
+`).concat(jg,"...").concat(Tv).concat(n,`
+`)+"".concat(jg,"...").concat(Tv)}return"".concat(S).concat(s?L:"",`
+`).concat(i).concat(n).concat(o).concat(h)}var T4t=function(e,t){v4t(n,e);var r=p4t(n);function n(i){var a;if(f4t(this,n),Dp(i)!=="object"||i===null)throw new _4t("options","Object",i);var o=i.message,s=i.operator,l=i.stackStartFn,u=i.actual,c=i.expected,f=Error.stackTraceLimit;if(Error.stackTraceLimit=0,o!=null)a=r.call(this,String(o));else if(process.stderr&&process.stderr.isTTY&&(process.stderr&&process.stderr.getColorDepth&&process.stderr.getColorDepth()!==1?(jg="\x1B[34m",uE="\x1B[32m",Tv="\x1B[39m",cE="\x1B[31m"):(jg="",uE="",Tv="",cE="")),Dp(u)==="object"&&u!==null&&Dp(c)==="object"&&c!==null&&"stack"in u&&u instanceof Error&&"stack"in c&&c instanceof Error&&(u=ZSe(u),c=ZSe(c)),s==="deepStrictEqual"||s==="strictEqual")a=r.call(this,w4t(u,c,s));else if(s==="notDeepStrictEqual"||s==="notStrictEqual"){var h=T2[s],d=fE(u).split(`
+`);if(s==="notStrictEqual"&&Dp(u)==="object"&&u!==null&&(h=T2.notStrictEqualObject),d.length>30)for(d[26]="".concat(jg,"...").concat(Tv);d.length>27;)d.pop();d.length===1?a=r.call(this,"".concat(h," ").concat(d[0])):a=r.call(this,"".concat(h,`
+
+`).concat(d.join(`
+`),`
+`))}else{var v=fE(u),_="",b=T2[s];s==="notDeepEqual"||s==="notEqual"?(v="".concat(T2[s],`
+
+`).concat(v),v.length>1024&&(v="".concat(v.slice(0,1021),"..."))):(_="".concat(fE(c)),v.length>512&&(v="".concat(v.slice(0,509),"...")),_.length>512&&(_="".concat(_.slice(0,509),"...")),s==="deepEqual"||s==="equal"?v="".concat(b,`
+
+`).concat(v,`
+
+should equal
+
+`):_=" ".concat(s," ").concat(_)),a=r.call(this,"".concat(v).concat(_))}return Error.stackTraceLimit=f,a.generatedMessage=!o,Object.defineProperty(Uj(a),"name",{value:"AssertionError [ERR_ASSERTION]",enumerable:!1,writable:!0,configurable:!0}),a.code="ERR_ASSERTION",a.actual=u,a.expected=c,a.operator=s,Error.captureStackTrace&&Error.captureStackTrace(Uj(a),l),a.stack,a.name="AssertionError",KSe(a)}return h4t(n,[{key:"toString",value:function(){return"".concat(this.name," [").concat(this.code,"]: ").concat(this.message)}},{key:t,value:function(a,o){return Gj(this,jSe(jSe({},o),{},{customInspect:!1,depth:0}))}}]),n}(Vj(Error),Gj.custom);$Se.exports=T4t});var Hj=ye((rdr,tMe)=>{"use strict";var eMe=Object.prototype.toString;tMe.exports=function(t){var r=eMe.call(t),n=r==="[object Arguments]";return n||(n=r!=="[object Array]"&&t!==null&&typeof t=="object"&&typeof t.length=="number"&&t.length>=0&&eMe.call(t.callee)==="[object Function]"),n}});var cMe=ye((idr,uMe)=>{"use strict";var lMe;Object.keys||(vE=Object.prototype.hasOwnProperty,jj=Object.prototype.toString,rMe=Hj(),Wj=Object.prototype.propertyIsEnumerable,iMe=!Wj.call({toString:null},"toString"),nMe=Wj.call(function(){},"prototype"),pE=["toString","toLocaleString","valueOf","hasOwnProperty","isPrototypeOf","propertyIsEnumerable","constructor"],GR=function(e){var t=e.constructor;return t&&t.prototype===e},aMe={$applicationCache:!0,$console:!0,$external:!0,$frame:!0,$frameElement:!0,$frames:!0,$innerHeight:!0,$innerWidth:!0,$onmozfullscreenchange:!0,$onmozfullscreenerror:!0,$outerHeight:!0,$outerWidth:!0,$pageXOffset:!0,$pageYOffset:!0,$parent:!0,$scrollLeft:!0,$scrollTop:!0,$scrollX:!0,$scrollY:!0,$self:!0,$webkitIndexedDB:!0,$webkitStorageInfo:!0,$window:!0},oMe=function(){if(typeof window=="undefined")return!1;for(var e in window)try{if(!aMe["$"+e]&&vE.call(window,e)&&window[e]!==null&&typeof window[e]=="object")try{GR(window[e])}catch(t){return!0}}catch(t){return!0}return!1}(),sMe=function(e){if(typeof window=="undefined"||!oMe)return GR(e);try{return GR(e)}catch(t){return!1}},lMe=function(t){var r=t!==null&&typeof t=="object",n=jj.call(t)==="[object Function]",i=rMe(t),a=r&&jj.call(t)==="[object String]",o=[];if(!r&&!n&&!i)throw new TypeError("Object.keys called on a non-object");var s=nMe&&n;if(a&&t.length>0&&!vE.call(t,0))for(var l=0;l<t.length;++l)o.push(String(l));if(i&&t.length>0)for(var u=0;u<t.length;++u)o.push(String(u));else for(var c in t)!(s&&c==="prototype")&&vE.call(t,c)&&o.push(String(c));if(iMe)for(var f=sMe(t),h=0;h<pE.length;++h)!(f&&pE[h]==="constructor")&&vE.call(t,pE[h])&&o.push(pE[h]);return o});var vE,jj,rMe,Wj,iMe,nMe,pE,GR,aMe,oMe,sMe;uMe.exports=lMe});var Xj=ye((ndr,dMe)=>{"use strict";var A4t=Array.prototype.slice,S4t=Hj(),fMe=Object.keys,HR=fMe?function(t){return fMe(t)}:cMe(),hMe=Object.keys;HR.shim=function(){if(Object.keys){var t=function(){var r=Object.keys(arguments);return r&&r.length===arguments.length}(1,2);t||(Object.keys=function(n){return S4t(n)?hMe(A4t.call(n)):hMe(n)})}else Object.keys=HR;return Object.keys||HR};dMe.exports=HR});var _Me=ye((adr,yMe)=>{"use strict";var M4t=Xj(),gMe=j8()(),mMe=lA(),vMe=Object,E4t=mMe("Array.prototype.push"),pMe=mMe("Object.prototype.propertyIsEnumerable"),k4t=gMe?Object.getOwnPropertySymbols:null;yMe.exports=function(t,r){if(t==null)throw new TypeError("target must be an object");var n=vMe(t);if(arguments.length===1)return n;for(var i=1;i<arguments.length;++i){var a=vMe(arguments[i]),o=M4t(a),s=gMe&&(Object.getOwnPropertySymbols||k4t);if(s)for(var l=s(a),u=0;u<l.length;++u){var c=l[u];pMe(a,c)&&E4t(o,c)}for(var f=0;f<o.length;++f){var h=o[f];if(pMe(a,h)){var d=a[h];n[h]=d}}}return n}});var bMe=ye((odr,xMe)=>{"use strict";var Zj=_Me(),C4t=function(){if(!Object.assign)return!1;for(var e="abcdefghijklmnopqrst",t=e.split(""),r={},n=0;n<t.length;++n)r[t[n]]=t[n];var i=Object.assign({},r),a="";for(var o in i)a+=o;return e!==a},L4t=function(){if(!Object.assign||!Object.preventExtensions)return!1;var e=Object.preventExtensions({1:2});try{Object.assign(e,"xy")}catch(t){return e[1]==="y"}return!1};xMe.exports=function(){return!Object.assign||C4t()||L4t()?Zj:Object.assign}});var Yj=ye((sdr,TMe)=>{"use strict";var wMe=function(e){return e!==e};TMe.exports=function(t,r){return t===0&&r===0?1/t===1/r:!!(t===r||wMe(t)&&wMe(r))}});var jR=ye((ldr,AMe)=>{"use strict";var P4t=Yj();AMe.exports=function(){return typeof Object.is=="function"?Object.is:P4t}});var gE=ye((udr,kMe)=>{"use strict";var I4t=Xj(),R4t=typeof Symbol=="function"&&typeof Symbol("foo")=="symbol",D4t=Object.prototype.toString,F4t=Array.prototype.concat,SMe=Object.defineProperty,z4t=function(e){return typeof e=="function"&&D4t.call(e)==="[object Function]"},O4t=PH()(),MMe=SMe&&O4t,q4t=function(e,t,r,n){if(t in e){if(n===!0){if(e[t]===r)return}else if(!z4t(n)||!n())return}MMe?SMe(e,t,{configurable:!0,enumerable:!1,value:r,writable:!0}):e[t]=r},EMe=function(e,t){var r=arguments.length>2?arguments[2]:{},n=I4t(t);R4t&&(n=F4t.call(n,Object.getOwnPropertySymbols(t)));for(var i=0;i<n.length;i+=1)q4t(e,n[i],t[n[i]],r[n[i]])};EMe.supportsDescriptors=!!MMe;kMe.exports=EMe});var LMe=ye((cdr,CMe)=>{"use strict";var B4t=jR(),N4t=gE();CMe.exports=function(){var t=B4t();return N4t(Object,{is:t},{is:function(){return Object.is!==t}}),t}});var DMe=ye((fdr,RMe)=>{"use strict";var U4t=gE(),V4t=X4(),G4t=Yj(),PMe=jR(),H4t=LMe(),IMe=V4t(PMe(),Object);U4t(IMe,{getPolyfill:PMe,implementation:G4t,shim:H4t});RMe.exports=IMe});var Kj=ye((hdr,FMe)=>{"use strict";FMe.exports=function(t){return t!==t}});var Jj=ye((ddr,zMe)=>{"use strict";var j4t=Kj();zMe.exports=function(){return Number.isNaN&&Number.isNaN(NaN)&&!Number.isNaN("a")?Number.isNaN:j4t}});var qMe=ye((vdr,OMe)=>{"use strict";var W4t=gE(),X4t=Jj();OMe.exports=function(){var t=X4t();return W4t(Number,{isNaN:t},{isNaN:function(){return Number.isNaN!==t}}),t}});var VMe=ye((pdr,UMe)=>{"use strict";var Z4t=X4(),Y4t=gE(),K4t=Kj(),BMe=Jj(),J4t=qMe(),NMe=Z4t(BMe(),Number);Y4t(NMe,{getPolyfill:BMe,implementation:K4t,shim:J4t});UMe.exports=NMe});var l4e=ye((gdr,s4e)=>{"use strict";function GMe(e,t){return tEt(e)||eEt(e,t)||Q4t(e,t)||$4t()}function $4t(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
+In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function Q4t(e,t){if(e){if(typeof e=="string")return HMe(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);if(r==="Object"&&e.constructor&&(r=e.constructor.name),r==="Map"||r==="Set")return Array.from(e);if(r==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return HMe(e,t)}}function HMe(e,t){(t==null||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r<t;r++)n[r]=e[r];return n}function eEt(e,t){var r=e==null?null:typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(r!=null){var n,i,a,o,s=[],l=!0,u=!1;try{if(a=(r=r.call(e)).next,t===0){if(Object(r)!==r)return;l=!1}else for(;!(l=(n=a.call(r)).done)&&(s.push(n.value),s.length!==t);l=!0);}catch(c){u=!0,i=c}finally{try{if(!l&&r.return!=null&&(o=r.return(),Object(o)!==o))return}finally{if(u)throw i}}return s}}function tEt(e){if(Array.isArray(e))return e}function J0(e){"@babel/helpers - typeof";return J0=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},J0(e)}var rEt=/a/g.flags!==void 0,$R=function(t){var r=[];return t.forEach(function(n){return r.push(n)}),r},jMe=function(t){var r=[];return t.forEach(function(n,i){return r.push([i,n])}),r},r4e=Object.is?Object.is:DMe(),KR=Object.getOwnPropertySymbols?Object.getOwnPropertySymbols:function(){return[]},$j=Number.isNaN?Number.isNaN:VMe();function eW(e){return e.call.bind(e)}var yE=eW(Object.prototype.hasOwnProperty),JR=eW(Object.prototype.propertyIsEnumerable),WMe=eW(Object.prototype.toString),sp=mA().types,iEt=sp.isAnyArrayBuffer,nEt=sp.isArrayBufferView,XMe=sp.isDate,WR=sp.isMap,ZMe=sp.isRegExp,XR=sp.isSet,aEt=sp.isNativeError,oEt=sp.isBoxedPrimitive,YMe=sp.isNumberObject,KMe=sp.isStringObject,JMe=sp.isBooleanObject,$Me=sp.isBigIntObject,sEt=sp.isSymbolObject,lEt=sp.isFloat32Array,uEt=sp.isFloat64Array;function cEt(e){if(e.length===0||e.length>10)return!0;for(var t=0;t<e.length;t++){var r=e.charCodeAt(t);if(r<48||r>57)return!0}return e.length===10&&e>=Math.pow(2,32)}function ZR(e){return Object.keys(e).filter(cEt).concat(KR(e).filter(Object.prototype.propertyIsEnumerable.bind(e)))}function i4e(e,t){if(e===t)return 0;for(var r=e.length,n=t.length,i=0,a=Math.min(r,n);i<a;++i)if(e[i]!==t[i]){r=e[i],n=t[i];break}return r<n?-1:n<r?1:0}var YR=void 0,fEt=!0,hEt=!1,Qj=0,tW=1,n4e=2,a4e=3;function dEt(e,t){return rEt?e.source===t.source&&e.flags===t.flags:RegExp.prototype.toString.call(e)===RegExp.prototype.toString.call(t)}function vEt(e,t){if(e.byteLength!==t.byteLength)return!1;for(var r=0;r<e.byteLength;r++)if(e[r]!==t[r])return!1;return!0}function pEt(e,t){return e.byteLength!==t.byteLength?!1:i4e(new Uint8Array(e.buffer,e.byteOffset,e.byteLength),new Uint8Array(t.buffer,t.byteOffset,t.byteLength))===0}function gEt(e,t){return e.byteLength===t.byteLength&&i4e(new Uint8Array(e),new Uint8Array(t))===0}function mEt(e,t){return YMe(e)?YMe(t)&&r4e(Number.prototype.valueOf.call(e),Number.prototype.valueOf.call(t)):KMe(e)?KMe(t)&&String.prototype.valueOf.call(e)===String.prototype.valueOf.call(t):JMe(e)?JMe(t)&&Boolean.prototype.valueOf.call(e)===Boolean.prototype.valueOf.call(t):$Me(e)?$Me(t)&&BigInt.prototype.valueOf.call(e)===BigInt.prototype.valueOf.call(t):sEt(t)&&Symbol.prototype.valueOf.call(e)===Symbol.prototype.valueOf.call(t)}function $0(e,t,r,n){if(e===t)return e!==0?!0:r?r4e(e,t):!0;if(r){if(J0(e)!=="object")return typeof e=="number"&&$j(e)&&$j(t);if(J0(t)!=="object"||e===null||t===null||Object.getPrototypeOf(e)!==Object.getPrototypeOf(t))return!1}else{if(e===null||J0(e)!=="object")return t===null||J0(t)!=="object"?e==t:!1;if(t===null||J0(t)!=="object")return!1}var i=WMe(e),a=WMe(t);if(i!==a)return!1;if(Array.isArray(e)){if(e.length!==t.length)return!1;var o=ZR(e,YR),s=ZR(t,YR);return o.length!==s.length?!1:mE(e,t,r,n,tW,o)}if(i==="[object Object]"&&(!WR(e)&&WR(t)||!XR(e)&&XR(t)))return!1;if(XMe(e)){if(!XMe(t)||Date.prototype.getTime.call(e)!==Date.prototype.getTime.call(t))return!1}else if(ZMe(e)){if(!ZMe(t)||!dEt(e,t))return!1}else if(aEt(e)||e instanceof Error){if(e.message!==t.message||e.name!==t.name)return!1}else if(nEt(e)){if(!r&&(lEt(e)||uEt(e))){if(!vEt(e,t))return!1}else if(!pEt(e,t))return!1;var l=ZR(e,YR),u=ZR(t,YR);return l.length!==u.length?!1:mE(e,t,r,n,Qj,l)}else{if(XR(e))return!XR(t)||e.size!==t.size?!1:mE(e,t,r,n,n4e);if(WR(e))return!WR(t)||e.size!==t.size?!1:mE(e,t,r,n,a4e);if(iEt(e)){if(!gEt(e,t))return!1}else if(oEt(e)&&!mEt(e,t))return!1}return mE(e,t,r,n,Qj)}function QMe(e,t){return t.filter(function(r){return JR(e,r)})}function mE(e,t,r,n,i,a){if(arguments.length===5){a=Object.keys(e);var o=Object.keys(t);if(a.length!==o.length)return!1}for(var s=0;s<a.length;s++)if(!yE(t,a[s]))return!1;if(r&&arguments.length===5){var l=KR(e);if(l.length!==0){var u=0;for(s=0;s<l.length;s++){var c=l[s];if(JR(e,c)){if(!JR(t,c))return!1;a.push(c),u++}else if(JR(t,c))return!1}var f=KR(t);if(l.length!==f.length&&QMe(t,f).length!==u)return!1}else{var h=KR(t);if(h.length!==0&&QMe(t,h).length!==0)return!1}}if(a.length===0&&(i===Qj||i===tW&&e.length===0||e.size===0))return!0;if(n===void 0)n={val1:new Map,val2:new Map,position:0};else{var d=n.val1.get(e);if(d!==void 0){var v=n.val2.get(t);if(v!==void 0)return d===v}n.position++}n.val1.set(e,n.position),n.val2.set(t,n.position);var _=wEt(e,t,r,a,n,i);return n.val1.delete(e),n.val2.delete(t),_}function e4e(e,t,r,n){for(var i=$R(e),a=0;a<i.length;a++){var o=i[a];if($0(t,o,r,n))return e.delete(o),!0}return!1}function o4e(e){switch(J0(e)){case"undefined":return null;case"object":return;case"symbol":return!1;case"string":e=+e;case"number":if($j(e))return!1}return!0}function yEt(e,t,r){var n=o4e(r);return n!=null?n:t.has(n)&&!e.has(n)}function _Et(e,t,r,n,i){var a=o4e(r);if(a!=null)return a;var o=t.get(a);return o===void 0&&!t.has(a)||!$0(n,o,!1,i)?!1:!e.has(a)&&$0(n,o,!1,i)}function xEt(e,t,r,n){for(var i=null,a=$R(e),o=0;o<a.length;o++){var s=a[o];if(J0(s)==="object"&&s!==null)i===null&&(i=new Set),i.add(s);else if(!t.has(s)){if(r||!yEt(e,t,s))return!1;i===null&&(i=new Set),i.add(s)}}if(i!==null){for(var l=$R(t),u=0;u<l.length;u++){var c=l[u];if(J0(c)==="object"&&c!==null){if(!e4e(i,c,r,n))return!1}else if(!r&&!e.has(c)&&!e4e(i,c,r,n))return!1}return i.size===0}return!0}function t4e(e,t,r,n,i,a){for(var o=$R(e),s=0;s<o.length;s++){var l=o[s];if($0(r,l,i,a)&&$0(n,t.get(l),i,a))return e.delete(l),!0}return!1}function bEt(e,t,r,n){for(var i=null,a=jMe(e),o=0;o<a.length;o++){var s=GMe(a[o],2),l=s[0],u=s[1];if(J0(l)==="object"&&l!==null)i===null&&(i=new Set),i.add(l);else{var c=t.get(l);if(c===void 0&&!t.has(l)||!$0(u,c,r,n)){if(r||!_Et(e,t,l,u,n))return!1;i===null&&(i=new Set),i.add(l)}}}if(i!==null){for(var f=jMe(t),h=0;h<f.length;h++){var d=GMe(f[h],2),v=d[0],_=d[1];if(J0(v)==="object"&&v!==null){if(!t4e(i,e,v,_,r,n))return!1}else if(!r&&(!e.has(v)||!$0(e.get(v),_,!1,n))&&!t4e(i,e,v,_,!1,n))return!1}return i.size===0}return!0}function wEt(e,t,r,n,i,a){var o=0;if(a===n4e){if(!xEt(e,t,r,i))return!1}else if(a===a4e){if(!bEt(e,t,r,i))return!1}else if(a===tW)for(;o<e.length;o++)if(yE(e,o)){if(!yE(t,o)||!$0(e[o],t[o],r,i))return!1}else{if(yE(t,o))return!1;for(var s=Object.keys(e);o<s.length;o++){var l=s[o];if(!yE(t,l)||!$0(e[l],t[l],r,i))return!1}return s.length===Object.keys(t).length}for(o=0;o<n.length;o++){var u=n[o];if(!$0(e[u],t[u],r,i))return!1}return!0}function TEt(e,t){return $0(e,t,hEt)}function AEt(e,t){return $0(e,t,fEt)}s4e.exports={isDeepEqual:TEt,isDeepStrictEqual:AEt}});var lE=ye((mdr,M4e)=>{"use strict";function Wg(e){"@babel/helpers - typeof";return Wg=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},Wg(e)}function u4e(e,t){for(var r=0;r<t.length;r++){var n=t[r];n.enumerable=n.enumerable||!1,n.configurable=!0,"value"in n&&(n.writable=!0),Object.defineProperty(e,MEt(n.key),n)}}function SEt(e,t,r){return t&&u4e(e.prototype,t),r&&u4e(e,r),Object.defineProperty(e,"prototype",{writable:!1}),e}function MEt(e){var t=EEt(e,"string");return Wg(t)==="symbol"?t:String(t)}function EEt(e,t){if(Wg(e)!=="object"||e===null)return e;var r=e[Symbol.toPrimitive];if(r!==void 0){var n=r.call(e,t||"default");if(Wg(n)!=="object")return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return(t==="string"?String:Number)(e)}function kEt(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}var CEt=Nj(),_E=CEt.codes,c4e=_E.ERR_AMBIGUOUS_ARGUMENT,_A=_E.ERR_INVALID_ARG_TYPE,LEt=_E.ERR_INVALID_ARG_VALUE,PEt=_E.ERR_INVALID_RETURN_VALUE,N_=_E.ERR_MISSING_ARGS,U_=QSe(),IEt=mA(),QR=IEt.inspect,v4e=mA().types,REt=v4e.isPromise,eD=v4e.isRegExp,DEt=bMe()(),p4e=jR()(),tD=lA()("RegExp.prototype.test"),B_,rD;function xE(){var e=l4e();B_=e.isDeepEqual,rD=e.isDeepStrictEqual}var f4e=!1,Ef=M4e.exports=rW,iD={};function Xg(e){throw e.message instanceof Error?e.message:new U_(e)}function g4e(e,t,r,n,i){var a=arguments.length,o;if(a===0)o="Failed";else if(a===1)r=e,e=void 0;else{if(f4e===!1){f4e=!0;var s=process.emitWarning?process.emitWarning:console.warn.bind(console);s("assert.fail() with more than one argument is deprecated. Please use assert.strictEqual() instead or only pass a message.","DeprecationWarning","DEP0094")}a===2&&(n="!=")}if(r instanceof Error)throw r;var l={actual:e,expected:t,operator:n===void 0?"fail":n,stackStartFn:i||g4e};r!==void 0&&(l.message=r);var u=new U_(l);throw o&&(u.message=o,u.generatedMessage=!0),u}Ef.fail=g4e;Ef.AssertionError=U_;function m4e(e,t,r,n){if(!r){var i=!1;if(t===0)i=!0,n="No value argument passed to `assert.ok()`";else if(n instanceof Error)throw n;var a=new U_({actual:r,expected:!0,message:n,operator:"==",stackStartFn:e});throw a.generatedMessage=i,a}}function rW(){for(var e=arguments.length,t=new Array(e),r=0;r<e;r++)t[r]=arguments[r];m4e.apply(void 0,[rW,t.length].concat(t))}Ef.ok=rW;Ef.equal=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");t!=r&&Xg({actual:t,expected:r,message:n,operator:"==",stackStartFn:e})};Ef.notEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");t==r&&Xg({actual:t,expected:r,message:n,operator:"!=",stackStartFn:e})};Ef.deepEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");B_===void 0&&xE(),B_(t,r)||Xg({actual:t,expected:r,message:n,operator:"deepEqual",stackStartFn:e})};Ef.notDeepEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");B_===void 0&&xE(),B_(t,r)&&Xg({actual:t,expected:r,message:n,operator:"notDeepEqual",stackStartFn:e})};Ef.deepStrictEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");B_===void 0&&xE(),rD(t,r)||Xg({actual:t,expected:r,message:n,operator:"deepStrictEqual",stackStartFn:e})};Ef.notDeepStrictEqual=y4e;function y4e(e,t,r){if(arguments.length<2)throw new N_("actual","expected");B_===void 0&&xE(),rD(e,t)&&Xg({actual:e,expected:t,message:r,operator:"notDeepStrictEqual",stackStartFn:y4e})}Ef.strictEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");p4e(t,r)||Xg({actual:t,expected:r,message:n,operator:"strictEqual",stackStartFn:e})};Ef.notStrictEqual=function e(t,r,n){if(arguments.length<2)throw new N_("actual","expected");p4e(t,r)&&Xg({actual:t,expected:r,message:n,operator:"notStrictEqual",stackStartFn:e})};var h4e=SEt(function e(t,r,n){var i=this;kEt(this,e),r.forEach(function(a){a in t&&(n!==void 0&&typeof n[a]=="string"&&eD(t[a])&&tD(t[a],n[a])?i[a]=n[a]:i[a]=t[a])})});function FEt(e,t,r,n,i,a){if(!(r in e)||!rD(e[r],t[r])){if(!n){var o=new h4e(e,i),s=new h4e(t,i,e),l=new U_({actual:o,expected:s,operator:"deepStrictEqual",stackStartFn:a});throw l.actual=e,l.expected=t,l.operator=a.name,l}Xg({actual:e,expected:t,message:n,operator:a.name,stackStartFn:a})}}function _4e(e,t,r,n){if(typeof t!="function"){if(eD(t))return tD(t,e);if(arguments.length===2)throw new _A("expected",["Function","RegExp"],t);if(Wg(e)!=="object"||e===null){var i=new U_({actual:e,expected:t,message:r,operator:"deepStrictEqual",stackStartFn:n});throw i.operator=n.name,i}var a=Object.keys(t);if(t instanceof Error)a.push("name","message");else if(a.length===0)throw new LEt("error",t,"may not be an empty object");return B_===void 0&&xE(),a.forEach(function(o){typeof e[o]=="string"&&eD(t[o])&&tD(t[o],e[o])||FEt(e,t,o,r,a,n)}),!0}return t.prototype!==void 0&&e instanceof t?!0:Error.isPrototypeOf(t)?!1:t.call({},e)===!0}function x4e(e){if(typeof e!="function")throw new _A("fn","Function",e);try{e()}catch(t){return t}return iD}function d4e(e){return REt(e)||e!==null&&Wg(e)==="object"&&typeof e.then=="function"&&typeof e.catch=="function"}function b4e(e){return Promise.resolve().then(function(){var t;if(typeof e=="function"){if(t=e(),!d4e(t))throw new PEt("instance of Promise","promiseFn",t)}else if(d4e(e))t=e;else throw new _A("promiseFn",["Function","Promise"],e);return Promise.resolve().then(function(){return t}).then(function(){return iD}).catch(function(r){return r})})}function w4e(e,t,r,n){if(typeof r=="string"){if(arguments.length===4)throw new _A("error",["Object","Error","Function","RegExp"],r);if(Wg(t)==="object"&&t!==null){if(t.message===r)throw new c4e("error/message",'The error message "'.concat(t.message,'" is identical to the message.'))}else if(t===r)throw new c4e("error/message",'The error "'.concat(t,'" is identical to the message.'));n=r,r=void 0}else if(r!=null&&Wg(r)!=="object"&&typeof r!="function")throw new _A("error",["Object","Error","Function","RegExp"],r);if(t===iD){var i="";r&&r.name&&(i+=" (".concat(r.name,")")),i+=n?": ".concat(n):".";var a=e.name==="rejects"?"rejection":"exception";Xg({actual:void 0,expected:r,operator:e.name,message:"Missing expected ".concat(a).concat(i),stackStartFn:e})}if(r&&!_4e(t,r,n,e))throw t}function T4e(e,t,r,n){if(t!==iD){if(typeof r=="string"&&(n=r,r=void 0),!r||_4e(t,r)){var i=n?": ".concat(n):".",a=e.name==="doesNotReject"?"rejection":"exception";Xg({actual:t,expected:r,operator:e.name,message:"Got unwanted ".concat(a).concat(i,`
+`)+'Actual message: "'.concat(t&&t.message,'"'),stackStartFn:e})}throw t}}Ef.throws=function e(t){for(var r=arguments.length,n=new Array(r>1?r-1:0),i=1;i<r;i++)n[i-1]=arguments[i];w4e.apply(void 0,[e,x4e(t)].concat(n))};Ef.rejects=function e(t){for(var r=arguments.length,n=new Array(r>1?r-1:0),i=1;i<r;i++)n[i-1]=arguments[i];return b4e(t).then(function(a){return w4e.apply(void 0,[e,a].concat(n))})};Ef.doesNotThrow=function e(t){for(var r=arguments.length,n=new Array(r>1?r-1:0),i=1;i<r;i++)n[i-1]=arguments[i];T4e.apply(void 0,[e,x4e(t)].concat(n))};Ef.doesNotReject=function e(t){for(var r=arguments.length,n=new Array(r>1?r-1:0),i=1;i<r;i++)n[i-1]=arguments[i];return b4e(t).then(function(a){return T4e.apply(void 0,[e,a].concat(n))})};Ef.ifError=function e(t){if(t!=null){var r="ifError got unwanted exception: ";Wg(t)==="object"&&typeof t.message=="string"?t.message.length===0&&t.constructor?r+=t.constructor.name:r+=t.message:r+=QR(t);var n=new U_({actual:t,expected:null,operator:"ifError",message:r,stackStartFn:e}),i=t.stack;if(typeof i=="string"){var a=i.split(`
+`);a.shift();for(var o=n.stack.split(`
+`),s=0;s<a.length;s++){var l=o.indexOf(a[s]);if(l!==-1){o=o.slice(0,l);break}}n.stack="".concat(o.join(`
+`),`
+`).concat(a.join(`
+`))}throw n}};function A4e(e,t,r,n,i){if(!eD(t))throw new _A("regexp","RegExp",t);var a=i==="match";if(typeof e!="string"||tD(t,e)!==a){if(r instanceof Error)throw r;var o=!r;r=r||(typeof e!="string"?'The "string" argument must be of type string. Received type '+"".concat(Wg(e)," (").concat(QR(e),")"):(a?"The input did not match the regular expression ":"The input was expected to not match the regular expression ")+"".concat(QR(t),`. Input:
+
+`).concat(QR(e),`
+`));var s=new U_({actual:e,expected:t,message:r,operator:i,stackStartFn:n});throw s.generatedMessage=o,s}}Ef.match=function e(t,r,n){A4e(t,r,n,e,"match")};Ef.doesNotMatch=function e(t,r,n){A4e(t,r,n,e,"doesNotMatch")};function S4e(){for(var e=arguments.length,t=new Array(e),r=0;r<e;r++)t[r]=arguments[r];m4e.apply(void 0,[S4e,t.length].concat(t))}Ef.strict=DEt(S4e,Ef,{equal:Ef.strictEqual,deepEqual:Ef.deepStrictEqual,notEqual:Ef.notStrictEqual,notDeepEqual:Ef.notDeepStrictEqual});Ef.strict.strict=Ef.strict});var k4e=ye((ydr,E4e)=>{var bE=1e3,wE=bE*60,TE=wE*60,AE=TE*24,zEt=AE*365.25;E4e.exports=function(e,t){t=t||{};var r=typeof e;if(r==="string"&&e.length>0)return OEt(e);if(r==="number"&&isNaN(e)===!1)return t.long?BEt(e):qEt(e);throw new Error("val is not a non-empty string or a valid number. val="+JSON.stringify(e))};function OEt(e){if(e=String(e),!(e.length>100)){var t=/^((?:\d+)?\.?\d+) *(milliseconds?|msecs?|ms|seconds?|secs?|s|minutes?|mins?|m|hours?|hrs?|h|days?|d|years?|yrs?|y)?$/i.exec(e);if(t){var r=parseFloat(t[1]),n=(t[2]||"ms").toLowerCase();switch(n){case"years":case"year":case"yrs":case"yr":case"y":return r*zEt;case"days":case"day":case"d":return r*AE;case"hours":case"hour":case"hrs":case"hr":case"h":return r*TE;case"minutes":case"minute":case"mins":case"min":case"m":return r*wE;case"seconds":case"second":case"secs":case"sec":case"s":return r*bE;case"milliseconds":case"millisecond":case"msecs":case"msec":case"ms":return r;default:return}}}}function qEt(e){return e>=AE?Math.round(e/AE)+"d":e>=TE?Math.round(e/TE)+"h":e>=wE?Math.round(e/wE)+"m":e>=bE?Math.round(e/bE)+"s":e+"ms"}function BEt(e){return nD(e,AE,"day")||nD(e,TE,"hour")||nD(e,wE,"minute")||nD(e,bE,"second")||e+" ms"}function nD(e,t,r){if(!(e<t))return e<t*1.5?Math.floor(e/t)+" "+r:Math.ceil(e/t)+" "+r+"s"}});var L4e=ye((Lc,C4e)=>{Lc=C4e.exports=nW.debug=nW.default=nW;Lc.coerce=HEt;Lc.disable=VEt;Lc.enable=UEt;Lc.enabled=GEt;Lc.humanize=k4e();Lc.names=[];Lc.skips=[];Lc.formatters={};var iW;function NEt(e){var t=0,r;for(r in e)t=(t<<5)-t+e.charCodeAt(r),t|=0;return Lc.colors[Math.abs(t)%Lc.colors.length]}function nW(e){function t(){if(t.enabled){var r=t,n=+new Date,i=n-(iW||n);r.diff=i,r.prev=iW,r.curr=n,iW=n;for(var a=new Array(arguments.length),o=0;o<a.length;o++)a[o]=arguments[o];a[0]=Lc.coerce(a[0]),typeof a[0]!="string"&&a.unshift("%O");var s=0;a[0]=a[0].replace(/%([a-zA-Z%])/g,function(u,c){if(u==="%%")return u;s++;var f=Lc.formatters[c];if(typeof f=="function"){var h=a[s];u=f.call(r,h),a.splice(s,1),s--}return u}),Lc.formatArgs.call(r,a);var l=t.log||Lc.log||console.log.bind(console);l.apply(r,a)}}return t.namespace=e,t.enabled=Lc.enabled(e),t.useColors=Lc.useColors(),t.color=NEt(e),typeof Lc.init=="function"&&Lc.init(t),t}function UEt(e){Lc.save(e),Lc.names=[],Lc.skips=[];for(var t=(typeof e=="string"?e:"").split(/[\s,]+/),r=t.length,n=0;n<r;n++)t[n]&&(e=t[n].replace(/\*/g,".*?"),e[0]==="-"?Lc.skips.push(new RegExp("^"+e.substr(1)+"$")):Lc.names.push(new RegExp("^"+e+"$")))}function VEt(){Lc.enable("")}function GEt(e){var t,r;for(t=0,r=Lc.skips.length;t<r;t++)if(Lc.skips[t].test(e))return!1;for(t=0,r=Lc.names.length;t<r;t++)if(Lc.names[t].test(e))return!0;return!1}function HEt(e){return e instanceof Error?e.stack||e.message:e}});var R4e=ye((lp,I4e)=>{lp=I4e.exports=L4e();lp.log=XEt;lp.formatArgs=WEt;lp.save=ZEt;lp.load=P4e;lp.useColors=jEt;lp.storage=typeof chrome!="undefined"&&typeof chrome.storage!="undefined"?chrome.storage.local:YEt();lp.colors=["lightseagreen","forestgreen","goldenrod","dodgerblue","darkorchid","crimson"];function jEt(){return typeof window!="undefined"&&window.process&&window.process.type==="renderer"?!0:typeof document!="undefined"&&document.documentElement&&document.documentElement.style&&document.documentElement.style.WebkitAppearance||typeof window!="undefined"&&window.console&&(window.console.firebug||window.console.exception&&window.console.table)||typeof navigator!="undefined"&&navigator.userAgent&&navigator.userAgent.toLowerCase().match(/firefox\/(\d+)/)&&parseInt(RegExp.$1,10)>=31||typeof navigator!="undefined"&&navigator.userAgent&&navigator.userAgent.toLowerCase().match(/applewebkit\/(\d+)/)}lp.formatters.j=function(e){try{return JSON.stringify(e)}catch(t){return"[UnexpectedJSONParseError]: "+t.message}};function WEt(e){var t=this.useColors;if(e[0]=(t?"%c":"")+this.namespace+(t?" %c":" ")+e[0]+(t?"%c ":" ")+"+"+lp.humanize(this.diff),!!t){var r="color: "+this.color;e.splice(1,0,r,"color: inherit");var n=0,i=0;e[0].replace(/%[a-zA-Z%]/g,function(a){a!=="%%"&&(n++,a==="%c"&&(i=n))}),e.splice(i,0,r)}}function XEt(){return typeof console=="object"&&console.log&&Function.prototype.apply.call(console.log,console,arguments)}function ZEt(e){try{e==null?lp.storage.removeItem("debug"):lp.storage.debug=e}catch(t){}}function P4e(){var e;try{e=lp.storage.debug}catch(t){}return!e&&typeof process!="undefined"&&"env"in process&&(e=process.env.DEBUG),e}lp.enable(P4e());function YEt(){try{return window.localStorage}catch(e){}}});var U4e=ye((_dr,N4e)=>{var xA=lE(),V_=R4e()("stream-parser");N4e.exports=JEt;var F4e=-1,aD=0,KEt=1,z4e=2;function JEt(e){var t=e&&typeof e._transform=="function",r=e&&typeof e._write=="function";if(!t&&!r)throw new Error("must pass a Writable or Transform stream in");V_("extending Parser into stream"),e._bytes=$Et,e._skipBytes=QEt,t&&(e._passthrough=ekt),t?e._transform=rkt:e._write=tkt}function SE(e){V_("initializing parser stream"),e._parserBytesLeft=0,e._parserBuffers=[],e._parserBuffered=0,e._parserState=F4e,e._parserCallback=null,typeof e.push=="function"&&(e._parserOutput=e.push.bind(e)),e._parserInit=!0}function $Et(e,t){xA(!this._parserCallback,'there is already a "callback" set!'),xA(isFinite(e)&&e>0,'can only buffer a finite number of bytes > 0, got "'+e+'"'),this._parserInit||SE(this),V_("buffering %o bytes",e),this._parserBytesLeft=e,this._parserCallback=t,this._parserState=aD}function QEt(e,t){xA(!this._parserCallback,'there is already a "callback" set!'),xA(e>0,'can only skip > 0 bytes, got "'+e+'"'),this._parserInit||SE(this),V_("skipping %o bytes",e),this._parserBytesLeft=e,this._parserCallback=t,this._parserState=KEt}function ekt(e,t){xA(!this._parserCallback,'There is already a "callback" set!'),xA(e>0,'can only pass through > 0 bytes, got "'+e+'"'),this._parserInit||SE(this),V_("passing through %o bytes",e),this._parserBytesLeft=e,this._parserCallback=t,this._parserState=z4e}function tkt(e,t,r){this._parserInit||SE(this),V_("write(%o bytes)",e.length),typeof t=="function"&&(r=t),q4e(this,e,null,r)}function rkt(e,t,r){this._parserInit||SE(this),V_("transform(%o bytes)",e.length),typeof t!="function"&&(t=this._parserOutput),q4e(this,e,t,r)}function O4e(e,t,r,n){return e._parserBytesLeft<=0?n(new Error("got data but not currently parsing anything")):t.length<=e._parserBytesLeft?function(){return D4e(e,t,r,n)}:function(){var i=t.slice(0,e._parserBytesLeft);return D4e(e,i,r,function(a){if(a)return n(a);if(t.length>i.length)return function(){return O4e(e,t.slice(i.length),r,n)}})}}function D4e(e,t,r,n){if(e._parserBytesLeft-=t.length,V_("%o bytes left for stream piece",e._parserBytesLeft),e._parserState===aD?(e._parserBuffers.push(t),e._parserBuffered+=t.length):e._parserState===z4e&&r(t),e._parserBytesLeft===0){var i=e._parserCallback;if(i&&e._parserState===aD&&e._parserBuffers.length>1&&(t=Buffer.concat(e._parserBuffers,e._parserBuffered)),e._parserState!==aD&&(t=null),e._parserCallback=null,e._parserBuffered=0,e._parserState=F4e,e._parserBuffers.splice(0),i){var a=[];t&&a.push(t),r&&a.push(r);var o=i.length>a.length;o&&a.push(B4e(n));var s=i.apply(e,a);if(!o||n===s)return n}}else return n}var q4e=B4e(O4e);function B4e(e){return function(){for(var t=e.apply(this,arguments);typeof t=="function";)t=t();return t}}});var rc=ye(Hy=>{"use strict";var V4e=DSe().Transform,ikt=U4e();function ME(){V4e.call(this,{readableObjectMode:!0})}ME.prototype=Object.create(V4e.prototype);ME.prototype.constructor=ME;ikt(ME.prototype);Hy.ParserStream=ME;Hy.sliceEq=function(e,t,r){for(var n=t,i=0;i<r.length;)if(e[n++]!==r[i++])return!1;return!0};Hy.str2arr=function(e,t){var r=[],n=0;if(t&&t==="hex")for(;n<e.length;)r.push(parseInt(e.slice(n,n+2),16)),n+=2;else for(;n<e.length;n++)r.push(e.charCodeAt(n)&255);return r};Hy.readUInt16LE=function(e,t){return e[t]|e[t+1]<<8};Hy.readUInt16BE=function(e,t){return e[t+1]|e[t]<<8};Hy.readUInt32LE=function(e,t){return e[t]|e[t+1]<<8|e[t+2]<<16|e[t+3]*16777216};Hy.readUInt32BE=function(e,t){return e[t+3]|e[t+2]<<8|e[t+1]<<16|e[t]*16777216};function oD(e,t,r){Error.call(this),Error.captureStackTrace?Error.captureStackTrace(this,this.constructor):this.stack=new Error().stack||"",this.name=this.constructor.name,this.message=e,t&&(this.code=t),r&&(this.statusCode=r)}oD.prototype=Object.create(Error.prototype);oD.prototype.constructor=oD;Hy.ProbeError=oD});var G4e=ye((bdr,sD)=>{"use strict";var bA=rc().readUInt16BE,oW=rc().readUInt32BE;function EE(e,t){if(e.length<4+t)return null;var r=oW(e,t);return e.length<r+t||r<8?null:{boxtype:String.fromCharCode.apply(null,e.slice(t+4,t+8)),data:e.slice(t+8,t+r),end:t+r}}sD.exports.unbox=EE;function nkt(e,t){for(var r=0;;){var n=EE(e,r);if(!n)break;switch(n.boxtype){case"ispe":t.sizes.push({width:oW(n.data,4),height:oW(n.data,8)});break;case"irot":t.transforms.push({type:"irot",value:n.data[0]&3});break;case"imir":t.transforms.push({type:"imir",value:n.data[0]&1});break}r=n.end}}function aW(e,t,r){for(var n=0,i=0;i<r;i++)n=n*256+(e[t+i]||0);return n}function akt(e,t){for(var r=e[4]>>4&15,n=e[4]&15,i=e[5]>>4&15,a=bA(e,6),o=8,s=0;s<a;s++){var l=bA(e,o);o+=2;var u=bA(e,o);o+=2;var c=aW(e,o,i);o+=i;var f=bA(e,o);if(o+=2,u===0&&f===1){var h=aW(e,o,r),d=aW(e,o+r,n);t.item_loc[l]={length:d,offset:h+c}}o+=f*(r+n)}}function okt(e,t){for(var r=bA(e,4),n=6,i=0;i<r;i++){var a=EE(e,n);if(!a)break;if(a.boxtype==="infe"){for(var o=bA(a.data,4),s="",l=8;l<a.data.length&&a.data[l];l++)s+=String.fromCharCode(a.data[l]);t.item_inf[s]=o}n=a.end}}function skt(e,t){for(var r=0;;){var n=EE(e,r);if(!n)break;n.boxtype==="ipco"&&nkt(n.data,t),r=n.end}}function lkt(e,t){for(var r=4;;){var n=EE(e,r);if(!n)break;n.boxtype==="iprp"&&skt(n.data,t),n.boxtype==="iloc"&&akt(n.data,t),n.boxtype==="iinf"&&okt(n.data,t),r=n.end}}function ukt(e){var t=e.reduce(function(i,a){return i.width>a.width||i.width===a.width&&i.height>a.height?i:a}),r=e.reduce(function(i,a){return i.height>a.height||i.height===a.height&&i.width>a.width?i:a}),n;return t.width>r.height||t.width===r.height&&t.height>r.width?n=t:n=r,n}sD.exports.readSizeFromMeta=function(e){var t={sizes:[],transforms:[],item_inf:{},item_loc:{}};if(lkt(e,t),!!t.sizes.length){var r=ukt(t.sizes),n=1;t.transforms.forEach(function(a){var o={1:6,2:5,3:8,4:7,5:4,6:3,7:2,8:1},s={1:4,2:3,3:2,4:1,5:6,6:5,7:8,8:7};if(a.type==="imir"&&(a.value===0?n=s[n]:(n=s[n],n=o[n],n=o[n])),a.type==="irot")for(var l=0;l<a.value;l++)n=o[n]});var i=null;return t.item_inf.Exif&&(i=t.item_loc[t.item_inf.Exif]),{width:r.width,height:r.height,orientation:t.transforms.length?n:null,variants:t.sizes,exif_location:i}}};sD.exports.getMimeType=function(e){var t=String.fromCharCode.apply(null,e.slice(0,4)),r={};r[t]=!0;for(var n=8;n<e.length;n+=4)r[String.fromCharCode.apply(null,e.slice(n,n+4))]=!0;if(!(!r.mif1&&!r.msf1&&!r.miaf))return t==="avif"||t==="avis"||t==="avio"?{type:"avif",mime:"image/avif"}:t==="heic"||t==="heix"?{type:"heic",mime:"image/heic"}:t==="hevc"||t==="hevx"?{type:"heic",mime:"image/heic-sequence"}:r.avif||r.avis?{type:"avif",mime:"image/avif"}:r.heic||r.heix||r.hevc||r.hevx||r.heis?r.msf1?{type:"heif",mime:"image/heif-sequence"}:{type:"heif",mime:"image/heif"}:{type:"avif",mime:"image/avif"}}});var uD=ye((wdr,sW)=>{"use strict";function lD(e,t){var r=new Error(e);return r.code=t,r}function ckt(e){try{return decodeURIComponent(escape(e))}catch(t){return e}}function jy(e,t,r){this.input=e.subarray(t,r),this.start=t;var n=String.fromCharCode.apply(null,this.input.subarray(0,4));if(n!=="II*\0"&&n!=="MM\0*")throw lD("invalid TIFF signature","EBADDATA");this.big_endian=n[0]==="M"}jy.prototype.each=function(e){this.aborted=!1;var t=this.read_uint32(4);for(this.ifds_to_read=[{id:0,offset:t}];this.ifds_to_read.length>0&&!this.aborted;){var r=this.ifds_to_read.shift();r.offset&&this.scan_ifd(r.id,r.offset,e)}};jy.prototype.read_uint16=function(e){var t=this.input;if(e+2>t.length)throw lD("unexpected EOF","EBADDATA");return this.big_endian?t[e]*256+t[e+1]:t[e]+t[e+1]*256};jy.prototype.read_uint32=function(e){var t=this.input;if(e+4>t.length)throw lD("unexpected EOF","EBADDATA");return this.big_endian?t[e]*16777216+t[e+1]*65536+t[e+2]*256+t[e+3]:t[e]+t[e+1]*256+t[e+2]*65536+t[e+3]*16777216};jy.prototype.is_subifd_link=function(e,t){return e===0&&t===34665||e===0&&t===34853||e===34665&&t===40965};jy.prototype.exif_format_length=function(e){switch(e){case 1:case 2:case 6:case 7:return 1;case 3:case 8:return 2;case 4:case 9:case 11:return 4;case 5:case 10:case 12:return 8;default:return 0}};jy.prototype.exif_format_read=function(e,t){var r;switch(e){case 1:case 2:return r=this.input[t],r;case 6:return r=this.input[t],r|(r&128)*33554430;case 3:return r=this.read_uint16(t),r;case 8:return r=this.read_uint16(t),r|(r&32768)*131070;case 4:return r=this.read_uint32(t),r;case 9:return r=this.read_uint32(t),r|0;case 5:case 10:case 11:case 12:return null;case 7:return null;default:return null}};jy.prototype.scan_ifd=function(e,t,r){var n=this.read_uint16(t);t+=2;for(var i=0;i<n;i++){var a=this.read_uint16(t),o=this.read_uint16(t+2),s=this.read_uint32(t+4),l=this.exif_format_length(o),u=s*l,c=u<=4?t+8:this.read_uint32(t+8),f=!1;if(c+u>this.input.length)throw lD("unexpected EOF","EBADDATA");for(var h=[],d=c,v=0;v<s;v++,d+=l){var _=this.exif_format_read(o,d);if(_===null){h=null;break}h.push(_)}Array.isArray(h)&&o===2&&(h=ckt(String.fromCharCode.apply(null,h)),h&&h[h.length-1]==="\0"&&(h=h.slice(0,-1))),this.is_subifd_link(e,a)&&Array.isArray(h)&&Number.isInteger(h[0])&&h[0]>0&&(this.ifds_to_read.push({id:a,offset:h[0]}),f=!0);var b={is_big_endian:this.big_endian,ifd:e,tag:a,format:o,count:s,entry_offset:t+this.start,data_length:u,data_offset:c+this.start,value:h,is_subifd_link:f};if(r(b)===!1){this.aborted=!0;return}t+=12}e===0&&this.ifds_to_read.push({id:1,offset:this.read_uint32(t)})};sW.exports.ExifParser=jy;sW.exports.get_orientation=function(e){var t=0;try{return new jy(e,0,e.length).each(function(r){if(r.ifd===0&&r.tag===274&&Array.isArray(r.value))return t=r.value[0],!1}),t}catch(r){return-1}}});var j4e=ye((Tdr,H4e)=>{"use strict";var fkt=rc().str2arr,hkt=rc().sliceEq,dkt=rc().readUInt32BE,cD=G4e(),vkt=uD(),pkt=fkt("ftyp");H4e.exports=function(e){if(hkt(e,4,pkt)){var t=cD.unbox(e,0);if(t){var r=cD.getMimeType(t.data);if(r){for(var n,i=t.end;;){var a=cD.unbox(e,i);if(!a)break;if(i=a.end,a.boxtype==="mdat")return;if(a.boxtype==="meta"){n=a.data;break}}if(n){var o=cD.readSizeFromMeta(n);if(o){var s={width:o.width,height:o.height,type:r.type,mime:r.mime,wUnits:"px",hUnits:"px"};if(o.variants.length>1&&(s.variants=o.variants),o.orientation&&(s.orientation=o.orientation),o.exif_location&&o.exif_location.offset+o.exif_location.length<=e.length){var l=dkt(e,o.exif_location.offset),u=e.slice(o.exif_location.offset+l+4,o.exif_location.offset+o.exif_location.length),c=vkt.get_orientation(u);c>0&&(s.orientation=c)}return s}}}}}}});var Z4e=ye((Adr,X4e)=>{"use strict";var gkt=rc().str2arr,mkt=rc().sliceEq,W4e=rc().readUInt16LE,ykt=gkt("BM");X4e.exports=function(e){if(!(e.length<26)&&mkt(e,0,ykt))return{width:W4e(e,18),height:W4e(e,22),type:"bmp",mime:"image/bmp",wUnits:"px",hUnits:"px"}}});var Q4e=ye((Sdr,$4e)=>{"use strict";var J4e=rc().str2arr,Y4e=rc().sliceEq,K4e=rc().readUInt16LE,_kt=J4e("GIF87a"),xkt=J4e("GIF89a");$4e.exports=function(e){if(!(e.length<10)&&!(!Y4e(e,0,_kt)&&!Y4e(e,0,xkt)))return{width:K4e(e,6),height:K4e(e,8),type:"gif",mime:"image/gif",wUnits:"px",hUnits:"px"}}});var rEe=ye((Mdr,tEe)=>{"use strict";var lW=rc().readUInt16LE,bkt=0,wkt=1,eEe=16;tEe.exports=function(e){var t=lW(e,0),r=lW(e,2),n=lW(e,4);if(!(t!==bkt||r!==wkt||!n)){for(var i=[],a={width:0,height:0},o=0;o<n;o++){var s=e[6+eEe*o]||256,l=e[6+eEe*o+1]||256,u={width:s,height:l};i.push(u),(s>a.width||l>a.height)&&(a=u)}return{width:a.width,height:a.height,variants:i,type:"ico",mime:"image/x-icon",wUnits:"px",hUnits:"px"}}}});var nEe=ye((Edr,iEe)=>{"use strict";var uW=rc().readUInt16BE,Tkt=rc().str2arr,Akt=rc().sliceEq,Skt=uD(),Mkt=Tkt("Exif\0\0");iEe.exports=function(e){if(!(e.length<2)&&!(e[0]!==255||e[1]!==216||e[2]!==255))for(var t=2;;){for(;;){if(e.length-t<2)return;if(e[t++]===255)break}for(var r=e[t++],n;r===255;)r=e[t++];if(208<=r&&r<=217||r===1)n=0;else if(192<=r&&r<=254){if(e.length-t<2)return;n=uW(e,t)-2,t+=2}else return;if(r===217||r===218)return;var i;if(r===225&&n>=10&&Akt(e,t,Mkt)&&(i=Skt.get_orientation(e.slice(t+6,t+n))),n>=5&&192<=r&&r<=207&&r!==196&&r!==200&&r!==204){if(e.length-t<n)return;var a={width:uW(e,t+3),height:uW(e,t+1),type:"jpg",mime:"image/jpeg",wUnits:"px",hUnits:"px"};return i>0&&(a.orientation=i),a}t+=n}}});var uEe=ye((kdr,lEe)=>{"use strict";var sEe=rc().str2arr,aEe=rc().sliceEq,oEe=rc().readUInt32BE,Ekt=sEe(`\x89PNG\r
+
+`),kkt=sEe("IHDR");lEe.exports=function(e){if(!(e.length<24)&&aEe(e,0,Ekt)&&aEe(e,12,kkt))return{width:oEe(e,16),height:oEe(e,20),type:"png",mime:"image/png",wUnits:"px",hUnits:"px"}}});var hEe=ye((Cdr,fEe)=>{"use strict";var Ckt=rc().str2arr,Lkt=rc().sliceEq,cEe=rc().readUInt32BE,Pkt=Ckt("8BPS\0");fEe.exports=function(e){if(!(e.length<22)&&Lkt(e,0,Pkt))return{width:cEe(e,18),height:cEe(e,14),type:"psd",mime:"image/vnd.adobe.photoshop",wUnits:"px",hUnits:"px"}}});var pEe=ye((Ldr,vEe)=>{"use strict";function Ikt(e){return e===32||e===9||e===13||e===10}function wA(e){return typeof e=="number"&&isFinite(e)&&e>0}function Rkt(e){var t=0,r=e.length;for(e[0]===239&&e[1]===187&&e[2]===191&&(t=3);t<r&&Ikt(e[t]);)t++;return t===r?!1:e[t]===60}var Dkt=/<[-_.:a-zA-Z0-9][^>]*>/,Fkt=/^<([-_.:a-zA-Z0-9]+:)?svg\s/,zkt=/[^-]\bwidth="([^%]+?)"|[^-]\bwidth='([^%]+?)'/,Okt=/\bheight="([^%]+?)"|\bheight='([^%]+?)'/,qkt=/\bview[bB]ox="(.+?)"|\bview[bB]ox='(.+?)'/,dEe=/in$|mm$|cm$|pt$|pc$|px$|em$|ex$/;function Bkt(e){var t=e.match(zkt),r=e.match(Okt),n=e.match(qkt);return{width:t&&(t[1]||t[2]),height:r&&(r[1]||r[2]),viewbox:n&&(n[1]||n[2])}}function Um(e){return dEe.test(e)?e.match(dEe)[0]:"px"}vEe.exports=function(e){if(Rkt(e)){for(var t="",r=0;r<e.length;r++)t+=String.fromCharCode(e[r]);var n=(t.match(Dkt)||[""])[0];if(Fkt.test(n)){var i=Bkt(n),a=parseFloat(i.width),o=parseFloat(i.height);if(i.width&&i.height)return!wA(a)||!wA(o)?void 0:{width:a,height:o,type:"svg",mime:"image/svg+xml",wUnits:Um(i.width),hUnits:Um(i.height)};var s=(i.viewbox||"").split(" "),l={width:s[2],height:s[3]},u=parseFloat(l.width),c=parseFloat(l.height);if(!(!wA(u)||!wA(c))&&Um(l.width)===Um(l.height)){var f=u/c;return i.width?wA(a)?{width:a,height:a/f,type:"svg",mime:"image/svg+xml",wUnits:Um(i.width),hUnits:Um(i.width)}:void 0:i.height?wA(o)?{width:o*f,height:o,type:"svg",mime:"image/svg+xml",wUnits:Um(i.height),hUnits:Um(i.height)}:void 0:{width:u,height:c,type:"svg",mime:"image/svg+xml",wUnits:Um(l.width),hUnits:Um(l.height)}}}}}});var xEe=ye((Pdr,_Ee)=>{"use strict";var yEe=rc().str2arr,gEe=rc().sliceEq,Nkt=rc().readUInt16LE,Ukt=rc().readUInt16BE,Vkt=rc().readUInt32LE,Gkt=rc().readUInt32BE,Hkt=yEe("II*\0"),jkt=yEe("MM\0*");function fD(e,t,r){return r?Ukt(e,t):Nkt(e,t)}function cW(e,t,r){return r?Gkt(e,t):Vkt(e,t)}function mEe(e,t,r){var n=fD(e,t+2,r),i=cW(e,t+4,r);return i!==1||n!==3&&n!==4?null:n===3?fD(e,t+8,r):cW(e,t+8,r)}_Ee.exports=function(e){if(!(e.length<8)&&!(!gEe(e,0,Hkt)&&!gEe(e,0,jkt))){var t=e[0]===77,r=cW(e,4,t)-8;if(!(r<0)){var n=r+8;if(!(e.length-n<2)){var i=fD(e,n+0,t)*12;if(!(i<=0)&&(n+=2,!(e.length-n<i))){var a,o,s,l;for(a=0;a<i;a+=12)l=fD(e,n+a,t),l===256?o=mEe(e,n+a,t):l===257&&(s=mEe(e,n+a,t));if(o&&s)return{width:o,height:s,type:"tiff",mime:"image/tiff",wUnits:"px",hUnits:"px"}}}}}}});var SEe=ye((Idr,AEe)=>{"use strict";var TEe=rc().str2arr,bEe=rc().sliceEq,wEe=rc().readUInt16LE,fW=rc().readUInt32LE,Wkt=uD(),Xkt=TEe("RIFF"),Zkt=TEe("WEBP");function Ykt(e,t){if(!(e[t+3]!==157||e[t+4]!==1||e[t+5]!==42))return{width:wEe(e,t+6)&16383,height:wEe(e,t+8)&16383,type:"webp",mime:"image/webp",wUnits:"px",hUnits:"px"}}function Kkt(e,t){if(e[t]===47){var r=fW(e,t+1);return{width:(r&16383)+1,height:(r>>14&16383)+1,type:"webp",mime:"image/webp",wUnits:"px",hUnits:"px"}}}function Jkt(e,t){return{width:(e[t+6]<<16|e[t+5]<<8|e[t+4])+1,height:(e[t+9]<<t|e[t+8]<<8|e[t+7])+1,type:"webp",mime:"image/webp",wUnits:"px",hUnits:"px"}}AEe.exports=function(e){if(!(e.length<16)&&!(!bEe(e,0,Xkt)&&!bEe(e,8,Zkt))){var t=12,r=null,n=0,i=fW(e,4)+8;if(!(i>e.length)){for(;t+8<i;){if(e[t]===0){t++;continue}var a=String.fromCharCode.apply(null,e.slice(t,t+4)),o=fW(e,t+4);a==="VP8 "&&o>=10?r=r||Ykt(e,t+8):a==="VP8L"&&o>=9?r=r||Kkt(e,t+8):a==="VP8X"&&o>=10?r=r||Jkt(e,t+8):a==="EXIF"&&(n=Wkt.get_orientation(e.slice(t+8,t+8+o)),t=1/0),t+=8+o}if(r)return n>0&&(r.orientation=n),r}}}});var EEe=ye((Rdr,MEe)=>{"use strict";MEe.exports={avif:j4e(),bmp:Z4e(),gif:Q4e(),ico:rEe(),jpeg:nEe(),png:uEe(),psd:hEe(),svg:pEe(),tiff:xEe(),webp:SEe()}});var kEe=ye((Ddr,dW)=>{"use strict";var hW=EEe();function $kt(e){for(var t=Object.keys(hW),r=0;r<t.length;r++){var n=hW[t[r]](e);if(n)return n}return null}dW.exports=function(t){return $kt(t)};dW.exports.parsers=hW});var LEe=ye(CEe=>{"use strict";var Qkt=kEe(),eCt=Py().IMAGE_URL_PREFIX,tCt=c2().Buffer;CEe.getImageSize=function(e){var t=e.replace(eCt,""),r=new tCt(t,"base64");return Qkt(r)}});var REe=ye((zdr,IEe)=>{"use strict";var PEe=Dr(),rCt=YT(),iCt=Eo(),hD=ho(),nCt=Dr().maxRowLength,aCt=LEe().getImageSize;IEe.exports=function(t,r){var n,i;if(r._hasZ)n=r.z.length,i=nCt(r.z);else if(r._hasSource){var a=aCt(r.source);n=a.height,i=a.width}var o=hD.getFromId(t,r.xaxis||"x"),s=hD.getFromId(t,r.yaxis||"y"),l=o.d2c(r.x0)-r.dx/2,u=s.d2c(r.y0)-r.dy/2,c,f=[l,l+i*r.dx],h=[u,u+n*r.dy];if(o&&o.type==="log")for(c=0;c<i;c++)f.push(l+c*r.dx);if(s&&s.type==="log")for(c=0;c<n;c++)h.push(u+c*r.dy);r._extremes[o._id]=hD.findExtremes(o,f),r._extremes[s._id]=hD.findExtremes(s,h),r._scaler=lCt(r);var d={x0:l,y0:u,z:r.z,w:i,h:n};return[d]};function oCt(e,t,r,n){return function(i){return PEe.constrain((i-e)*t,r,n)}}function sCt(e,t){return function(r){return PEe.constrain(r,e,t)}}function lCt(e){var t=rCt.colormodel[e.colormodel],r=t.colormodel||e.colormodel,n=r.length;e._sArray=[];for(var i=0;i<n;i++)t.min[i]!==e.zmin[i]||t.max[i]!==e.zmax[i]?e._sArray.push(oCt(e.zmin[i],(t.max[i]-t.min[i])/(e.zmax[i]-e.zmin[i]),t.min[i],t.max[i])):e._sArray.push(sCt(t.min[i],t.max[i]));return function(a){for(var o=a.slice(0,n),s=0;s<n;s++){var l=o[s];if(!iCt(l))return!1;o[s]=e._sArray[s](l)}return o}}});var zEe=ye((Odr,FEe)=>{"use strict";var uCt=Oa(),A2=Dr(),DEe=A2.strTranslate,cCt=Wp(),fCt=YT(),hCt=QV(),dCt=h8().STYLE;FEe.exports=function(t,r,n,i){var a=r.xaxis,o=r.yaxis,s=!t._context._exportedPlot&&hCt();A2.makeTraceGroups(i,n,"im").each(function(l){var u=uCt.select(this),c=l[0],f=c.trace,h=(f.zsmooth==="fast"||f.zsmooth===!1&&s)&&!f._hasZ&&f._hasSource&&a.type==="linear"&&o.type==="linear";f._realImage=h;var d=c.z,v=c.x0,_=c.y0,b=c.w,p=c.h,k=f.dx,E=f.dy,S,L,x,C,M,g;for(g=0;S===void 0&&g<b;)S=a.c2p(v+g*k),g++;for(g=b;L===void 0&&g>0;)L=a.c2p(v+g*k),g--;for(g=0;C===void 0&&g<p;)C=o.c2p(_+g*E),g++;for(g=p;M===void 0&&g>0;)M=o.c2p(_+g*E),g--;if(L<S&&(x=L,L=S,S=x),M<C&&(x=C,C=M,M=x),!h){var P=.5;S=Math.max(-P*a._length,S),L=Math.min((1+P)*a._length,L),C=Math.max(-P*o._length,C),M=Math.min((1+P)*o._length,M)}var T=Math.round(L-S),z=Math.round(M-C),O=T<=0||z<=0;if(O){var V=u.selectAll("image").data([]);V.exit().remove();return}function G(me){var ie=document.createElement("canvas");ie.width=T,ie.height=z;var Se=ie.getContext("2d",{willReadFrequently:!0}),Le=function(st){return A2.constrain(Math.round(a.c2p(v+st*k)-S),0,T)},Ae=function(st){return A2.constrain(Math.round(o.c2p(_+st*E)-C),0,z)},Fe=fCt.colormodel[f.colormodel],Pe=Fe.colormodel||f.colormodel,ge=Fe.fmt,Re;for(g=0;g<c.w;g++){var ce=Le(g),Ze=Le(g+1);if(!(Ze===ce||isNaN(Ze)||isNaN(ce)))for(var ut=0;ut<c.h;ut++){var pt=Ae(ut),Zt=Ae(ut+1);Zt===pt||isNaN(Zt)||isNaN(pt)||!me(g,ut)||(Re=f._scaler(me(g,ut)),Re?Se.fillStyle=Pe+"("+ge(Re).join(",")+")":Se.fillStyle="rgba(0,0,0,0)",Se.fillRect(ce,pt,Ze-ce,Zt-pt))}}return ie}var Z=u.selectAll("image").data([l]);Z.enter().append("svg:image").attr({xmlns:cCt.svg,preserveAspectRatio:"none"}),Z.exit().remove();var j=f.zsmooth===!1?dCt:"";if(h){var N=A2.simpleMap(a.range,a.r2l),H=A2.simpleMap(o.range,o.r2l),te=N[1]<N[0],oe=H[1]>H[0];if(te||oe){var _e=S+T/2,Ee=C+z/2;j+="transform:"+DEe(_e+"px",Ee+"px")+"scale("+(te?-1:1)+","+(oe?-1:1)+")"+DEe(-_e+"px",-Ee+"px")+";"}}Z.attr("style",j);var Ce=new Promise(function(me){if(f._hasZ)me();else if(f._hasSource)if(f._canvas&&f._canvas.el.width===b&&f._canvas.el.height===p&&f._canvas.source===f.source)me();else{var ie=document.createElement("canvas");ie.width=b,ie.height=p;var Se=ie.getContext("2d",{willReadFrequently:!0});f._image=f._image||new Image;var Le=f._image;Le.onload=function(){Se.drawImage(Le,0,0),f._canvas={el:ie,source:f.source},me()},Le.setAttribute("src",f.source)}}).then(function(){var me,ie;if(f._hasZ)ie=G(function(Ae,Fe){var Pe=d[Fe][Ae];return A2.isTypedArray(Pe)&&(Pe=Array.from(Pe)),Pe}),me=ie.toDataURL("image/png");else if(f._hasSource)if(h)me=f.source;else{var Se=f._canvas.el.getContext("2d",{willReadFrequently:!0}),Le=Se.getImageData(0,0,b,p).data;ie=G(function(Ae,Fe){var Pe=4*(Fe*b+Ae);return[Le[Pe],Le[Pe+1],Le[Pe+2],Le[Pe+3]]}),me=ie.toDataURL("image/png")}Z.attr({"xlink:href":me,height:z,width:T,x:S,y:C})});t._promises.push(Ce)})}});var qEe=ye((qdr,OEe)=>{"use strict";var vCt=Oa();OEe.exports=function(t){vCt.select(t).selectAll(".im image").style("opacity",function(r){return r[0].trace.opacity})}});var VEe=ye((Bdr,UEe)=>{"use strict";var BEe=vf(),NEe=Dr(),dD=NEe.isArrayOrTypedArray,pCt=YT();UEe.exports=function(t,r,n){var i=t.cd[0],a=i.trace,o=t.xa,s=t.ya;if(!(BEe.inbox(r-i.x0,r-(i.x0+i.w*a.dx),0)>0||BEe.inbox(n-i.y0,n-(i.y0+i.h*a.dy),0)>0)){var l=Math.floor((r-i.x0)/a.dx),u=Math.floor(Math.abs(n-i.y0)/a.dy),c;if(a._hasZ?c=i.z[u][l]:a._hasSource&&(c=a._canvas.el.getContext("2d",{willReadFrequently:!0}).getImageData(l,u,1,1).data),!!c){var f=i.hi||a.hoverinfo,h;if(f){var d=f.split("+");d.indexOf("all")!==-1&&(d=["color"]),d.indexOf("color")!==-1&&(h=!0)}var v=pCt.colormodel[a.colormodel],_=v.colormodel||a.colormodel,b=_.length,p=a._scaler(c),k=v.suffix,E=[];(a.hovertemplate||h)&&(E.push("["+[p[0]+k[0],p[1]+k[1],p[2]+k[2]].join(", ")),b===4&&E.push(", "+p[3]+k[3]),E.push("]"),E=E.join(""),t.extraText=_.toUpperCase()+": "+E);var S;dD(a.hovertext)&&dD(a.hovertext[u])?S=a.hovertext[u][l]:dD(a.text)&&dD(a.text[u])&&(S=a.text[u][l]);var L=s.c2p(i.y0+(u+.5)*a.dy),x=i.x0+(l+.5)*a.dx,C=i.y0+(u+.5)*a.dy,M="["+c.slice(0,a.colormodel.length).join(", ")+"]";return[NEe.extendFlat(t,{index:[u,l],x0:o.c2p(i.x0+l*a.dx),x1:o.c2p(i.x0+(l+1)*a.dx),y0:L,y1:L,color:p,xVal:x,xLabelVal:x,yVal:C,yLabelVal:C,zLabelVal:M,text:S,hovertemplateLabels:{zLabel:M,colorLabel:E,"color[0]Label":p[0]+k[0],"color[1]Label":p[1]+k[1],"color[2]Label":p[2]+k[2],"color[3]Label":p[3]+k[3]}})]}}}});var HEe=ye((Ndr,GEe)=>{"use strict";GEe.exports=function(t,r){return"xVal"in r&&(t.x=r.xVal),"yVal"in r&&(t.y=r.yVal),r.xa&&(t.xaxis=r.xa),r.ya&&(t.yaxis=r.ya),t.color=r.color,t.colormodel=r.trace.colormodel,t.z||(t.z=r.color),t}});var WEe=ye((Udr,jEe)=>{"use strict";jEe.exports={attributes:uH(),supplyDefaults:V3e(),calc:REe(),plot:zEe(),style:qEe(),hoverPoints:VEe(),eventData:HEe(),moduleType:"trace",name:"image",basePlotModule:ph(),categories:["cartesian","svg","2dMap","noSortingByValue"],animatable:!1,meta:{}}});var ZEe=ye((Vdr,XEe)=>{"use strict";XEe.exports=WEe()});var M2=ye((Gdr,KEe)=>{"use strict";var vW=Gl(),gCt=Cc().attributes,mCt=ec(),yCt=Lh(),{hovertemplateAttrs:_Ct,texttemplateAttrs:xCt,templatefallbackAttrs:YEe}=Ll(),S2=Ao().extendFlat,bCt=Pd().pattern,vD=mCt({editType:"plot",arrayOk:!0,colorEditType:"plot"});KEe.exports={labels:{valType:"data_array",editType:"calc"},label0:{valType:"number",dflt:0,editType:"calc"},dlabel:{valType:"number",dflt:1,editType:"calc"},values:{valType:"data_array",editType:"calc"},marker:{colors:{valType:"data_array",editType:"calc"},line:{color:{valType:"color",dflt:yCt.defaultLine,arrayOk:!0,editType:"style"},width:{valType:"number",min:0,dflt:0,arrayOk:!0,editType:"style"},editType:"calc"},pattern:bCt,editType:"calc"},text:{valType:"data_array",editType:"plot"},hovertext:{valType:"string",dflt:"",arrayOk:!0,editType:"style"},scalegroup:{valType:"string",dflt:"",editType:"calc"},textinfo:{valType:"flaglist",flags:["label","text","value","percent"],extras:["none"],editType:"calc"},hoverinfo:S2({},vW.hoverinfo,{flags:["label","text","value","percent","name"]}),hovertemplate:_Ct({},{keys:["label","color","value","percent","text"]}),hovertemplatefallback:YEe(),texttemplate:xCt({editType:"plot"},{keys:["label","color","value","percent","text"]}),texttemplatefallback:YEe({editType:"plot"}),textposition:{valType:"enumerated",values:["inside","outside","auto","none"],dflt:"auto",arrayOk:!0,editType:"plot"},textfont:S2({},vD,{}),insidetextorientation:{valType:"enumerated",values:["horizontal","radial","tangential","auto"],dflt:"auto",editType:"plot"},insidetextfont:S2({},vD,{}),outsidetextfont:S2({},vD,{}),automargin:{valType:"boolean",dflt:!1,editType:"plot"},showlegend:S2({},vW.showlegend,{arrayOk:!0}),legend:S2({},vW.legend,{arrayOk:!0}),title:{text:{valType:"string",dflt:"",editType:"plot"},font:S2({},vD,{}),position:{valType:"enumerated",values:["top left","top center","top right","middle center","bottom left","bottom center","bottom right"],editType:"plot"},editType:"plot"},domain:gCt({name:"pie",trace:!0,editType:"calc"}),hole:{valType:"number",min:0,max:1,dflt:0,editType:"calc"},sort:{valType:"boolean",dflt:!0,editType:"calc"},direction:{valType:"enumerated",values:["clockwise","counterclockwise"],dflt:"counterclockwise",editType:"calc"},rotation:{valType:"angle",dflt:0,editType:"calc"},pull:{valType:"number",min:0,max:1,dflt:0,arrayOk:!0,editType:"calc"}}});var E2=ye((Hdr,QEe)=>{"use strict";var wCt=Eo(),kE=Dr(),TCt=M2(),ACt=Cc().defaults,SCt=r0().handleText,MCt=Dr().coercePattern;function JEe(e,t){var r=kE.isArrayOrTypedArray(e),n=kE.isArrayOrTypedArray(t),i=Math.min(r?e.length:1/0,n?t.length:1/0);if(isFinite(i)||(i=0),i&&n){for(var a,o=0;o<i;o++){var s=t[o];if(wCt(s)&&s>0){a=!0;break}}a||(i=0)}return{hasLabels:r,hasValues:n,len:i}}function $Ee(e,t,r,n,i){var a=n("marker.line.width");a&&n("marker.line.color",i?void 0:r.paper_bgcolor);var o=n("marker.colors");MCt(n,"marker.pattern",o),e.marker&&!t.marker.pattern.fgcolor&&(t.marker.pattern.fgcolor=e.marker.colors),t.marker.pattern.bgcolor||(t.marker.pattern.bgcolor=r.paper_bgcolor)}function ECt(e,t,r,n){function i(k,E){return kE.coerce(e,t,TCt,k,E)}var a=i("labels"),o=i("values"),s=JEe(a,o),l=s.len;if(t._hasLabels=s.hasLabels,t._hasValues=s.hasValues,!t._hasLabels&&t._hasValues&&(i("label0"),i("dlabel")),!l){t.visible=!1;return}t._length=l,$Ee(e,t,n,i,!0),i("scalegroup");var u=i("text"),c=i("texttemplate");i("texttemplatefallback");var f;if(c||(f=i("textinfo",kE.isArrayOrTypedArray(u)?"text+percent":"percent")),i("hovertext"),i("hovertemplate"),i("hovertemplatefallback"),c||f&&f!=="none"){var h=i("textposition");SCt(e,t,n,i,h,{moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!1,moduleHasCliponaxis:!1,moduleHasTextangle:!1,moduleHasInsideanchor:!1});var d=Array.isArray(h)||h==="auto",v=d||h==="outside";v&&i("automargin"),(h==="inside"||h==="auto"||Array.isArray(h))&&i("insidetextorientation")}else f==="none"&&i("textposition","none");ACt(t,n,i);var _=i("hole"),b=i("title.text");if(b){var p=i("title.position",_?"middle center":"top center");!_&&p==="middle center"&&(t.title.position="top center"),kE.coerceFont(i,"title.font",n.font)}i("sort"),i("direction"),i("rotation"),i("pull")}QEe.exports={handleLabelsAndValues:JEe,handleMarkerDefaults:$Ee,supplyDefaults:ECt}});var pD=ye((jdr,eke)=>{"use strict";eke.exports={hiddenlabels:{valType:"data_array",editType:"calc"},piecolorway:{valType:"colorlist",editType:"calc"},extendpiecolors:{valType:"boolean",dflt:!0,editType:"calc"}}});var rke=ye((Wdr,tke)=>{"use strict";var kCt=Dr(),CCt=pD();tke.exports=function(t,r){function n(i,a){return kCt.coerce(t,r,CCt,i,a)}n("hiddenlabels"),n("piecolorway",r.colorway),n("extendpiecolors")}});var TA=ye((Xdr,ake)=>{"use strict";var LCt=Eo(),pW=cd(),PCt=ka(),ICt={};function RCt(e,t){var r=[],n=e._fullLayout,i=n.hiddenlabels||[],a=t.labels,o=t.marker.colors||[],s=t.values,l=t._length,u=t._hasValues&&l,c,f;if(t.dlabel)for(a=new Array(l),c=0;c<l;c++)a[c]=String(t.label0+c*t.dlabel);var h={},d=ike(n["_"+t.type+"colormap"]),v=0,_=!1;for(c=0;c<l;c++){var b,p,k;if(u){if(b=s[c],!LCt(b))continue;b=+b}else b=1;p=a[c],(p===void 0||p==="")&&(p=c),p=String(p);var E=h[p];E===void 0?(h[p]=r.length,k=i.indexOf(p)!==-1,k||(v+=b),r.push({v:b,label:p,color:d(o[c],p),i:c,pts:[c],hidden:k})):(_=!0,f=r[E],f.v+=b,f.pts.push(c),f.hidden||(v+=b),f.color===!1&&o[c]&&(f.color=d(o[c],p)))}r=r.filter(function(L){return L.v>=0});var S=t.type==="funnelarea"?_:t.sort;return S&&r.sort(function(L,x){return x.v-L.v}),r[0]&&(r[0].vTotal=v),r}function ike(e){return function(r,n){return!r||(r=pW(r),!r.isValid())?!1:(r=PCt.addOpacity(r,r.getAlpha()),e[n]||(e[n]=r),r)}}function DCt(e,t){var r=(t||{}).type;r||(r="pie");var n=e._fullLayout,i=e.calcdata,a=n[r+"colorway"],o=n["_"+r+"colormap"];n["extend"+r+"colors"]&&(a=nke(a,ICt));for(var s=0,l=0;l<i.length;l++){var u=i[l],c=u[0].trace.type;if(c===r)for(var f=0;f<u.length;f++){var h=u[f];h.color===!1&&(o[h.label]?h.color=o[h.label]:(o[h.label]=h.color=a[s%a.length],s++))}}}function nke(e,t){var r,n=JSON.stringify(e),i=t[n];if(!i){for(i=e.slice(),r=0;r<e.length;r++)i.push(pW(e[r]).lighten(20).toHexString());for(r=0;r<e.length;r++)i.push(pW(e[r]).darken(20).toHexString());t[n]=i}return i}ake.exports={calc:RCt,crossTraceCalc:DCt,makePullColorFn:ike,generateExtendedColors:nke}});var ske=ye((Zdr,oke)=>{"use strict";var FCt=ip().appendArrayMultiPointValues;oke.exports=function(t,r){var n={curveNumber:r.index,pointNumbers:t.pts,data:r._input,fullData:r,label:t.label,color:t.color,value:t.v,percent:t.percent,text:t.text,bbox:t.bbox,v:t.v};return t.pts.length===1&&(n.pointNumber=n.i=t.pts[0]),FCt(n,r,t.pts),r.type==="funnelarea"&&(delete n.v,delete n.i),n}});var _D=ye((Ydr,kke)=>{"use strict";var Fp=Oa(),zCt=Mc(),gD=vf(),dke=ka(),Wy=So(),rv=Dr(),OCt=rv.strScale,lke=rv.strTranslate,gW=ru(),vke=bv(),qCt=vke.recordMinTextSize,BCt=vke.clearMinTextSize,pke=e2().TEXTPAD,ns=l_(),mD=ske(),uke=Dr().isValidTextValue;function NCt(e,t){var r=e._context.staticPlot,n=e._fullLayout,i=n._size;BCt("pie",n),yke(t,e),Ske(t,i);var a=rv.makeTraceGroups(n._pielayer,t,"trace").each(function(o){var s=Fp.select(this),l=o[0],u=l.trace;YCt(o),s.attr("stroke-linejoin","round"),s.each(function(){var c=Fp.select(this).selectAll("g.slice").data(o);c.enter().append("g").classed("slice",!0),c.exit().remove();var f=[[[],[]],[[],[]]],h=!1;c.each(function(S,L){if(S.hidden){Fp.select(this).selectAll("path,g").remove();return}S.pointNumber=S.i,S.curveNumber=u.index,f[S.pxmid[1]<0?0:1][S.pxmid[0]<0?0:1].push(S);var x=l.cx,C=l.cy,M=Fp.select(this),g=M.selectAll("path.surface").data([S]);if(g.enter().append("path").classed("surface",!0).style({"pointer-events":r?"none":"all"}),M.call(gke,e,o),u.pull){var P=+ns.castOption(u.pull,S.pts)||0;P>0&&(x+=P*S.pxmid[0],C+=P*S.pxmid[1])}S.cxFinal=x,S.cyFinal=C;function T(N,H,te,oe){var _e=oe*(H[0]-N[0]),Ee=oe*(H[1]-N[1]);return"a"+oe*l.r+","+oe*l.r+" 0 "+S.largeArc+(te?" 1 ":" 0 ")+_e+","+Ee}var z=u.hole;if(S.v===l.vTotal){var O="M"+(x+S.px0[0])+","+(C+S.px0[1])+T(S.px0,S.pxmid,!0,1)+T(S.pxmid,S.px0,!0,1)+"Z";z?g.attr("d","M"+(x+z*S.px0[0])+","+(C+z*S.px0[1])+T(S.px0,S.pxmid,!1,z)+T(S.pxmid,S.px0,!1,z)+"Z"+O):g.attr("d",O)}else{var V=T(S.px0,S.px1,!0,1);if(z){var G=1-z;g.attr("d","M"+(x+z*S.px1[0])+","+(C+z*S.px1[1])+T(S.px1,S.px0,!1,z)+"l"+G*S.px0[0]+","+G*S.px0[1]+V+"Z")}else g.attr("d","M"+x+","+C+"l"+S.px0[0]+","+S.px0[1]+V+"Z")}Mke(e,S,l);var Z=ns.castOption(u.textposition,S.pts),j=M.selectAll("g.slicetext").data(S.text&&Z!=="none"?[0]:[]);j.enter().append("g").classed("slicetext",!0),j.exit().remove(),j.each(function(){var N=rv.ensureSingle(Fp.select(this),"text","",function(ie){ie.attr("data-notex",1)}),H=rv.ensureUniformFontSize(e,Z==="outside"?VCt(u,S,n.font):mke(u,S,n.font));N.text(S.text).attr({class:"slicetext",transform:"","text-anchor":"middle"}).call(Wy.font,H).call(gW.convertToTspans,e);var te=Wy.bBox(N.node()),oe;if(Z==="outside")oe=hke(te,S);else if(oe=_ke(te,S,l),Z==="auto"&&oe.scale<1){var _e=rv.ensureUniformFontSize(e,u.outsidetextfont);N.call(Wy.font,_e),te=Wy.bBox(N.node()),oe=hke(te,S)}var Ee=oe.textPosAngle,Ce=Ee===void 0?S.pxmid:yD(l.r,Ee);if(oe.targetX=x+Ce[0]*oe.rCenter+(oe.x||0),oe.targetY=C+Ce[1]*oe.rCenter+(oe.y||0),Eke(oe,te),oe.outside){var me=oe.targetY;S.yLabelMin=me-te.height/2,S.yLabelMid=me,S.yLabelMax=me+te.height/2,S.labelExtraX=0,S.labelExtraY=0,h=!0}oe.fontSize=H.size,qCt(u.type,oe,n),o[L].transform=oe,rv.setTransormAndDisplay(N,oe)})});var d=Fp.select(this).selectAll("g.titletext").data(u.title.text?[0]:[]);if(d.enter().append("g").classed("titletext",!0),d.exit().remove(),d.each(function(){var S=rv.ensureSingle(Fp.select(this),"text","",function(C){C.attr("data-notex",1)}),L=u.title.text;u._meta&&(L=rv.templateString(L,u._meta)),S.text(L).attr({class:"titletext",transform:"","text-anchor":"middle"}).call(Wy.font,u.title.font).call(gW.convertToTspans,e);var x;u.title.position==="middle center"?x=jCt(l):x=Tke(l,i),S.attr("transform",lke(x.x,x.y)+OCt(Math.min(1,x.scale))+lke(x.tx,x.ty))}),h&&XCt(f,u),UCt(c,u),h&&u.automargin){var v=Wy.bBox(s.node()),_=u.domain,b=i.w*(_.x[1]-_.x[0]),p=i.h*(_.y[1]-_.y[0]),k=(.5*b-l.r)/i.w,E=(.5*p-l.r)/i.h;zCt.autoMargin(e,"pie."+u.uid+".automargin",{xl:_.x[0]-k,xr:_.x[1]+k,yb:_.y[0]-E,yt:_.y[1]+E,l:Math.max(l.cx-l.r-v.left,0),r:Math.max(v.right-(l.cx+l.r),0),b:Math.max(v.bottom-(l.cy+l.r),0),t:Math.max(l.cy-l.r-v.top,0),pad:5})}})});setTimeout(function(){a.selectAll("tspan").each(function(){var o=Fp.select(this);o.attr("dy")&&o.attr("dy",o.attr("dy"))})},0)}function UCt(e,t){e.each(function(r){var n=Fp.select(this);if(!r.labelExtraX&&!r.labelExtraY){n.select("path.textline").remove();return}var i=n.select("g.slicetext text");r.transform.targetX+=r.labelExtraX,r.transform.targetY+=r.labelExtraY,rv.setTransormAndDisplay(i,r.transform);var a=r.cxFinal+r.pxmid[0],o=r.cyFinal+r.pxmid[1],s="M"+a+","+o,l=(r.yLabelMax-r.yLabelMin)*(r.pxmid[0]<0?-1:1)/4;if(r.labelExtraX){var u=r.labelExtraX*r.pxmid[1]/r.pxmid[0],c=r.yLabelMid+r.labelExtraY-(r.cyFinal+r.pxmid[1]);Math.abs(u)>Math.abs(c)?s+="l"+c*r.pxmid[0]/r.pxmid[1]+","+c+"H"+(a+r.labelExtraX+l):s+="l"+r.labelExtraX+","+u+"v"+(c-u)+"h"+l}else s+="V"+(r.yLabelMid+r.labelExtraY)+"h"+l;rv.ensureSingle(n,"path","textline").call(dke.stroke,t.outsidetextfont.color).attr({"stroke-width":Math.min(2,t.outsidetextfont.size/8),d:s,fill:"none"})})}function gke(e,t,r){var n=r[0],i=n.cx,a=n.cy,o=n.trace,s=o.type==="funnelarea";"_hasHoverLabel"in o||(o._hasHoverLabel=!1),"_hasHoverEvent"in o||(o._hasHoverEvent=!1),e.on("mouseover",function(l){var u=t._fullLayout,c=t._fullData[o.index];if(!(t._dragging||u.hovermode===!1)){var f=c.hoverinfo;if(Array.isArray(f)&&(f=gD.castHoverinfo({hoverinfo:[ns.castOption(f,l.pts)],_module:o._module},u,0)),f==="all"&&(f="label+text+value+percent+name"),c.hovertemplate||f!=="none"&&f!=="skip"&&f){var h=l.rInscribed||0,d=i+l.pxmid[0]*(1-h),v=a+l.pxmid[1]*(1-h),_=u.separators,b=[];if(f&&f.indexOf("label")!==-1&&b.push(l.label),l.text=ns.castOption(c.hovertext||c.text,l.pts),f&&f.indexOf("text")!==-1){var p=l.text;rv.isValidTextValue(p)&&b.push(p)}l.value=l.v,l.valueLabel=ns.formatPieValue(l.v,_),f&&f.indexOf("value")!==-1&&b.push(l.valueLabel),l.percent=l.v/n.vTotal,l.percentLabel=ns.formatPiePercent(l.percent,_),f&&f.indexOf("percent")!==-1&&b.push(l.percentLabel);var k=c.hoverlabel,E=k.font,S=[];gD.loneHover({trace:o,x0:d-h*n.r,x1:d+h*n.r,y:v,_x0:s?i+l.TL[0]:d-h*n.r,_x1:s?i+l.TR[0]:d+h*n.r,_y0:s?a+l.TL[1]:v-h*n.r,_y1:s?a+l.BL[1]:v+h*n.r,text:b.join("<br>"),name:c.hovertemplate||f.indexOf("name")!==-1?c.name:void 0,idealAlign:l.pxmid[0]<0?"left":"right",color:ns.castOption(k.bgcolor,l.pts)||l.color,borderColor:ns.castOption(k.bordercolor,l.pts),fontFamily:ns.castOption(E.family,l.pts),fontSize:ns.castOption(E.size,l.pts),fontColor:ns.castOption(E.color,l.pts),nameLength:ns.castOption(k.namelength,l.pts),textAlign:ns.castOption(k.align,l.pts),hovertemplate:ns.castOption(c.hovertemplate,l.pts),hovertemplateLabels:l,eventData:[mD(l,c)]},{container:u._hoverlayer.node(),outerContainer:u._paper.node(),gd:t,inOut_bbox:S}),l.bbox=S[0],o._hasHoverLabel=!0}o._hasHoverEvent=!0,t.emit("plotly_hover",{points:[mD(l,c)],event:Fp.event})}}),e.on("mouseout",function(l){var u=t._fullLayout,c=t._fullData[o.index],f=Fp.select(this).datum();o._hasHoverEvent&&(l.originalEvent=Fp.event,t.emit("plotly_unhover",{points:[mD(f,c)],event:Fp.event}),o._hasHoverEvent=!1),o._hasHoverLabel&&(gD.loneUnhover(u._hoverlayer.node()),o._hasHoverLabel=!1)}),e.on("click",function(l){var u=t._fullLayout,c=t._fullData[o.index];t._dragging||u.hovermode===!1||(t._hoverdata=[mD(l,c)],gD.click(t,Fp.event))})}function VCt(e,t,r){var n=ns.castOption(e.outsidetextfont.color,t.pts)||ns.castOption(e.textfont.color,t.pts)||r.color,i=ns.castOption(e.outsidetextfont.family,t.pts)||ns.castOption(e.textfont.family,t.pts)||r.family,a=ns.castOption(e.outsidetextfont.size,t.pts)||ns.castOption(e.textfont.size,t.pts)||r.size,o=ns.castOption(e.outsidetextfont.weight,t.pts)||ns.castOption(e.textfont.weight,t.pts)||r.weight,s=ns.castOption(e.outsidetextfont.style,t.pts)||ns.castOption(e.textfont.style,t.pts)||r.style,l=ns.castOption(e.outsidetextfont.variant,t.pts)||ns.castOption(e.textfont.variant,t.pts)||r.variant,u=ns.castOption(e.outsidetextfont.textcase,t.pts)||ns.castOption(e.textfont.textcase,t.pts)||r.textcase,c=ns.castOption(e.outsidetextfont.lineposition,t.pts)||ns.castOption(e.textfont.lineposition,t.pts)||r.lineposition,f=ns.castOption(e.outsidetextfont.shadow,t.pts)||ns.castOption(e.textfont.shadow,t.pts)||r.shadow;return{color:n,family:i,size:a,weight:o,style:s,variant:l,textcase:u,lineposition:c,shadow:f}}function mke(e,t,r){var n=ns.castOption(e.insidetextfont.color,t.pts);!n&&e._input.textfont&&(n=ns.castOption(e._input.textfont.color,t.pts));var i=ns.castOption(e.insidetextfont.family,t.pts)||ns.castOption(e.textfont.family,t.pts)||r.family,a=ns.castOption(e.insidetextfont.size,t.pts)||ns.castOption(e.textfont.size,t.pts)||r.size,o=ns.castOption(e.insidetextfont.weight,t.pts)||ns.castOption(e.textfont.weight,t.pts)||r.weight,s=ns.castOption(e.insidetextfont.style,t.pts)||ns.castOption(e.textfont.style,t.pts)||r.style,l=ns.castOption(e.insidetextfont.variant,t.pts)||ns.castOption(e.textfont.variant,t.pts)||r.variant,u=ns.castOption(e.insidetextfont.textcase,t.pts)||ns.castOption(e.textfont.textcase,t.pts)||r.textcase,c=ns.castOption(e.insidetextfont.lineposition,t.pts)||ns.castOption(e.textfont.lineposition,t.pts)||r.lineposition,f=ns.castOption(e.insidetextfont.shadow,t.pts)||ns.castOption(e.textfont.shadow,t.pts)||r.shadow;return{color:n||dke.contrast(t.color),family:i,size:a,weight:o,style:s,variant:l,textcase:u,lineposition:c,shadow:f}}function yke(e,t){for(var r,n,i=0;i<e.length;i++)if(r=e[i][0],n=r.trace,n.title.text){var a=n.title.text;n._meta&&(a=rv.templateString(a,n._meta));var o=Wy.tester.append("text").attr("data-notex",1).text(a).call(Wy.font,n.title.font).call(gW.convertToTspans,t),s=Wy.bBox(o.node(),!0);r.titleBox={width:s.width,height:s.height},o.remove()}}function _ke(e,t,r){var n=r.r||t.rpx1,i=t.rInscribed,a=t.startangle===t.stopangle;if(a)return{rCenter:1-i,scale:0,rotate:0,textPosAngle:0};var o=t.ring,s=o===1&&Math.abs(t.startangle-t.stopangle)===Math.PI*2,l=t.halfangle,u=t.midangle,c=r.trace.insidetextorientation,f=c==="horizontal",h=c==="tangential",d=c==="radial",v=c==="auto",_=[],b;if(!v){var p=function(M,g){if(GCt(t,M)){var P=Math.abs(M-t.startangle),T=Math.abs(M-t.stopangle),z=P<T?P:T;g==="tan"?b=fke(e,n,o,z,0):b=cke(e,n,o,z,Math.PI/2),b.textPosAngle=M,_.push(b)}},k;if(f||h){for(k=4;k>=-4;k-=2)p(Math.PI*k,"tan");for(k=4;k>=-4;k-=2)p(Math.PI*(k+1),"tan")}if(f||d){for(k=4;k>=-4;k-=2)p(Math.PI*(k+1.5),"rad");for(k=4;k>=-4;k-=2)p(Math.PI*(k+.5),"rad")}}if(s||v||f){var E=Math.sqrt(e.width*e.width+e.height*e.height);if(b={scale:i*n*2/E,rCenter:1-i,rotate:0},b.textPosAngle=(t.startangle+t.stopangle)/2,b.scale>=1)return b;_.push(b)}(v||d)&&(b=cke(e,n,o,l,u),b.textPosAngle=(t.startangle+t.stopangle)/2,_.push(b)),(v||h)&&(b=fke(e,n,o,l,u),b.textPosAngle=(t.startangle+t.stopangle)/2,_.push(b));for(var S=0,L=0,x=0;x<_.length;x++){var C=_[x].scale;if(L<C&&(L=C,S=x),!v&&L>=1)break}return _[S]}function GCt(e,t){var r=e.startangle,n=e.stopangle;return r>t&&t>n||r<t&&t<n}function cke(e,t,r,n,i){t=Math.max(0,t-2*pke);var a=e.width/e.height,o=wke(a,n,t,r);return{scale:o*2/e.height,rCenter:xke(a,o/t),rotate:bke(i)}}function fke(e,t,r,n,i){t=Math.max(0,t-2*pke);var a=e.height/e.width,o=wke(a,n,t,r);return{scale:o*2/e.width,rCenter:xke(a,o/t),rotate:bke(i+Math.PI/2)}}function xke(e,t){return Math.cos(t)-e*t}function bke(e){return(180/Math.PI*e+720)%180-90}function wke(e,t,r,n){var i=e+1/(2*Math.tan(t));return r*Math.min(1/(Math.sqrt(i*i+.5)+i),n/(Math.sqrt(e*e+n/2)+e))}function HCt(e,t){return e.v===t.vTotal&&!t.trace.hole?1:Math.min(1/(1+1/Math.sin(e.halfangle)),e.ring/2)}function hke(e,t){var r=t.pxmid[0],n=t.pxmid[1],i=e.width/2,a=e.height/2;return r<0&&(i*=-1),n<0&&(a*=-1),{scale:1,rCenter:1,rotate:0,x:i+Math.abs(a)*(i>0?1:-1)/2,y:a/(1+r*r/(n*n)),outside:!0}}function jCt(e){var t=Math.sqrt(e.titleBox.width*e.titleBox.width+e.titleBox.height*e.titleBox.height);return{x:e.cx,y:e.cy,scale:e.trace.hole*e.r*2/t,tx:0,ty:-e.titleBox.height/2+e.trace.title.font.size}}function Tke(e,t){var r=1,n=1,i,a=e.trace,o={x:e.cx,y:e.cy},s={tx:0,ty:0};s.ty+=a.title.font.size,i=Ake(a),a.title.position.indexOf("top")!==-1?(o.y-=(1+i)*e.r,s.ty-=e.titleBox.height):a.title.position.indexOf("bottom")!==-1&&(o.y+=(1+i)*e.r);var l=WCt(e.r,e.trace.aspectratio),u=t.w*(a.domain.x[1]-a.domain.x[0])/2;return a.title.position.indexOf("left")!==-1?(u=u+l,o.x-=(1+i)*l,s.tx+=e.titleBox.width/2):a.title.position.indexOf("center")!==-1?u*=2:a.title.position.indexOf("right")!==-1&&(u=u+l,o.x+=(1+i)*l,s.tx-=e.titleBox.width/2),r=u/e.titleBox.width,n=mW(e,t)/e.titleBox.height,{x:o.x,y:o.y,scale:Math.min(r,n),tx:s.tx,ty:s.ty}}function WCt(e,t){return e/(t===void 0?1:t)}function mW(e,t){var r=e.trace,n=t.h*(r.domain.y[1]-r.domain.y[0]);return Math.min(e.titleBox.height,n/2)}function Ake(e){var t=e.pull;if(!t)return 0;var r;if(rv.isArrayOrTypedArray(t))for(t=0,r=0;r<e.pull.length;r++)e.pull[r]>t&&(t=e.pull[r]);return t}function XCt(e,t){var r,n,i,a,o,s,l,u,c,f,h,d,v;function _(E,S){return E.pxmid[1]-S.pxmid[1]}function b(E,S){return S.pxmid[1]-E.pxmid[1]}function p(E,S){S||(S={});var L=S.labelExtraY+(n?S.yLabelMax:S.yLabelMin),x=n?E.yLabelMin:E.yLabelMax,C=n?E.yLabelMax:E.yLabelMin,M=E.cyFinal+o(E.px0[1],E.px1[1]),g=L-x,P,T,z,O,V,G;if(g*l>0&&(E.labelExtraY=g),!!rv.isArrayOrTypedArray(t.pull))for(T=0;T<f.length;T++)z=f[T],!(z===E||(ns.castOption(t.pull,E.pts)||0)>=(ns.castOption(t.pull,z.pts)||0))&&((E.pxmid[1]-z.pxmid[1])*l>0?(O=z.cyFinal+o(z.px0[1],z.px1[1]),g=O-x-E.labelExtraY,g*l>0&&(E.labelExtraY+=g)):(C+E.labelExtraY-M)*l>0&&(P=3*s*Math.abs(T-f.indexOf(E)),V=z.cxFinal+a(z.px0[0],z.px1[0]),G=V+P-(E.cxFinal+E.pxmid[0])-E.labelExtraX,G*s>0&&(E.labelExtraX+=G)))}for(n=0;n<2;n++)for(i=n?_:b,o=n?Math.max:Math.min,l=n?1:-1,r=0;r<2;r++){for(a=r?Math.max:Math.min,s=r?1:-1,u=e[n][r],u.sort(i),c=e[1-n][r],f=c.concat(u),d=[],h=0;h<u.length;h++)u[h].yLabelMid!==void 0&&d.push(u[h]);for(v=!1,h=0;n&&h<c.length;h++)if(c[h].yLabelMid!==void 0){v=c[h];break}for(h=0;h<d.length;h++){var k=h&&d[h-1];v&&!h&&(k=v),p(d[h],k)}}}function Ske(e,t){for(var r=[],n=0;n<e.length;n++){var i=e[n][0],a=i.trace,o=a.domain,s=t.w*(o.x[1]-o.x[0]),l=t.h*(o.y[1]-o.y[0]);a.title.text&&a.title.position!=="middle center"&&(l-=mW(i,t));var u=s/2,c=l/2;a.type==="funnelarea"&&!a.scalegroup&&(c/=a.aspectratio),i.r=Math.min(u,c)/(1+Ake(a)),i.cx=t.l+t.w*(a.domain.x[1]+a.domain.x[0])/2,i.cy=t.t+t.h*(1-a.domain.y[0])-l/2,a.title.text&&a.title.position.indexOf("bottom")!==-1&&(i.cy-=mW(i,t)),a.scalegroup&&r.indexOf(a.scalegroup)===-1&&r.push(a.scalegroup)}ZCt(e,r)}function ZCt(e,t){for(var r,n,i,a=0;a<t.length;a++){var o=1/0,s=t[a];for(n=0;n<e.length;n++)if(r=e[n][0],i=r.trace,i.scalegroup===s){var l;if(i.type==="pie")l=r.r*r.r;else if(i.type==="funnelarea"){var u,c;i.aspectratio>1?(u=r.r,c=u/i.aspectratio):(c=r.r,u=c*i.aspectratio),u*=(1+i.baseratio)/2,l=u*c}o=Math.min(o,l/r.vTotal)}for(n=0;n<e.length;n++)if(r=e[n][0],i=r.trace,i.scalegroup===s){var f=o*r.vTotal;i.type==="funnelarea"&&(f/=(1+i.baseratio)/2,f/=i.aspectratio),r.r=Math.sqrt(f)}}}function YCt(e){var t=e[0],r=t.r,n=t.trace,i=ns.getRotationAngle(n.rotation),a=2*Math.PI/t.vTotal,o="px0",s="px1",l,u,c;if(n.direction==="counterclockwise"){for(l=0;l<e.length&&e[l].hidden;l++);if(l===e.length)return;i+=a*e[l].v,a*=-1,o="px1",s="px0"}for(c=yD(r,i),l=0;l<e.length;l++)u=e[l],!u.hidden&&(u[o]=c,u.startangle=i,i+=a*u.v/2,u.pxmid=yD(r,i),u.midangle=i,i+=a*u.v/2,c=yD(r,i),u.stopangle=i,u[s]=c,u.largeArc=u.v>t.vTotal/2?1:0,u.halfangle=Math.PI*Math.min(u.v/t.vTotal,.5),u.ring=1-n.hole,u.rInscribed=HCt(u,t))}function yD(e,t){return[e*Math.sin(t),-e*Math.cos(t)]}function Mke(e,t,r){var n=e._fullLayout,i=r.trace,a=i.texttemplate,o=i.textinfo;if(!a&&o&&o!=="none"){var s=o.split("+"),l=function(S){return s.indexOf(S)!==-1},u=l("label"),c=l("text"),f=l("value"),h=l("percent"),d=n.separators,v;if(v=u?[t.label]:[],c){var _=ns.getFirstFilled(i.text,t.pts);uke(_)&&v.push(_)}f&&v.push(ns.formatPieValue(t.v,d)),h&&v.push(ns.formatPiePercent(t.v/r.vTotal,d)),t.text=v.join("<br>")}function b(S){return{label:S.label,value:S.v,valueLabel:ns.formatPieValue(S.v,n.separators),percent:S.v/r.vTotal,percentLabel:ns.formatPiePercent(S.v/r.vTotal,n.separators),color:S.color,text:S.text,customdata:rv.castOption(i,S.i,"customdata")}}if(a){var p=rv.castOption(i,t.i,"texttemplate");if(!p)t.text="";else{var k=b(t),E=ns.getFirstFilled(i.text,t.pts);(uke(E)||E==="")&&(k.text=E),t.text=rv.texttemplateString({data:[k,i._meta],fallback:i.texttemplatefallback,labels:k,locale:e._fullLayout._d3locale,template:p})}}}function Eke(e,t){var r=e.rotate*Math.PI/180,n=Math.cos(r),i=Math.sin(r),a=(t.left+t.right)/2,o=(t.top+t.bottom)/2;e.textX=a*n-o*i,e.textY=a*i+o*n,e.noCenter=!0}kke.exports={plot:NCt,formatSliceLabel:Mke,transformInsideText:_ke,determineInsideTextFont:mke,positionTitleOutside:Tke,prerenderTitles:yke,layoutAreas:Ske,attachFxHandlers:gke,computeTransform:Eke}});var Pke=ye((Kdr,Lke)=>{"use strict";var Cke=Oa(),KCt=B3(),JCt=bv().resizeText;Lke.exports=function(t){var r=t._fullLayout._pielayer.selectAll(".trace");JCt(t,r,"pie"),r.each(function(n){var i=n[0],a=i.trace,o=Cke.select(this);o.style({opacity:a.opacity}),o.selectAll("path.surface").each(function(s){Cke.select(this).call(KCt,s,a,t)})})}});var Rke=ye(AA=>{"use strict";var Ike=Mc();AA.name="pie";AA.plot=function(e,t,r,n){Ike.plotBasePlot(AA.name,e,t,r,n)};AA.clean=function(e,t,r,n){Ike.cleanBasePlot(AA.name,e,t,r,n)}});var Fke=ye(($dr,Dke)=>{"use strict";Dke.exports={attributes:M2(),supplyDefaults:E2().supplyDefaults,supplyLayoutDefaults:rke(),layoutAttributes:pD(),calc:TA().calc,crossTraceCalc:TA().crossTraceCalc,plot:_D().plot,style:Pke(),styleOne:B3(),moduleType:"trace",name:"pie",basePlotModule:Rke(),categories:["pie-like","pie","showLegend"],meta:{}}});var Oke=ye((Qdr,zke)=>{"use strict";zke.exports=Fke()});var Bke=ye(SA=>{"use strict";var qke=Mc();SA.name="sunburst";SA.plot=function(e,t,r,n){qke.plotBasePlot(SA.name,e,t,r,n)};SA.clean=function(e,t,r,n){qke.cleanBasePlot(SA.name,e,t,r,n)}});var yW=ye((tvr,Nke)=>{"use strict";Nke.exports={CLICK_TRANSITION_TIME:750,CLICK_TRANSITION_EASING:"linear",eventDataKeys:["currentPath","root","entry","percentRoot","percentEntry","percentParent"]}});var LE=ye((rvr,Gke)=>{"use strict";var $Ct=Gl(),{hovertemplateAttrs:QCt,texttemplateAttrs:e6t,templatefallbackAttrs:Uke}=Ll(),t6t=Tu(),r6t=Cc().attributes,Xy=M2(),Vke=yW(),CE=Ao().extendFlat,i6t=Pd().pattern;Gke.exports={labels:{valType:"data_array",editType:"calc"},parents:{valType:"data_array",editType:"calc"},values:{valType:"data_array",editType:"calc"},branchvalues:{valType:"enumerated",values:["remainder","total"],dflt:"remainder",editType:"calc"},count:{valType:"flaglist",flags:["branches","leaves"],dflt:"leaves",editType:"calc"},level:{valType:"any",editType:"plot",anim:!0},maxdepth:{valType:"integer",editType:"plot",dflt:-1},marker:CE({colors:{valType:"data_array",editType:"calc"},line:{color:CE({},Xy.marker.line.color,{dflt:null}),width:CE({},Xy.marker.line.width,{dflt:1}),editType:"calc"},pattern:i6t,editType:"calc"},t6t("marker",{colorAttr:"colors",anim:!1})),leaf:{opacity:{valType:"number",editType:"style",min:0,max:1},editType:"plot"},text:Xy.text,textinfo:{valType:"flaglist",flags:["label","text","value","current path","percent root","percent entry","percent parent"],extras:["none"],editType:"plot"},texttemplate:e6t({editType:"plot"},{keys:Vke.eventDataKeys.concat(["label","value"])}),texttemplatefallback:Uke({editType:"plot"}),hovertext:Xy.hovertext,hoverinfo:CE({},$Ct.hoverinfo,{flags:["label","text","value","name","current path","percent root","percent entry","percent parent"],dflt:"label+text+value+name"}),hovertemplate:QCt({},{keys:Vke.eventDataKeys}),hovertemplatefallback:Uke(),textfont:Xy.textfont,insidetextorientation:Xy.insidetextorientation,insidetextfont:Xy.insidetextfont,outsidetextfont:CE({},Xy.outsidetextfont,{}),rotation:{valType:"angle",dflt:0,editType:"plot"},sort:Xy.sort,root:{color:{valType:"color",editType:"calc",dflt:"rgba(0,0,0,0)"},editType:"calc"},domain:r6t({name:"sunburst",trace:!0,editType:"calc"})}});var _W=ye((ivr,Hke)=>{"use strict";Hke.exports={sunburstcolorway:{valType:"colorlist",editType:"calc"},extendsunburstcolors:{valType:"boolean",dflt:!0,editType:"calc"}}});var Zke=ye((nvr,Xke)=>{"use strict";var jke=Dr(),n6t=LE(),a6t=Cc().defaults,o6t=r0().handleText,s6t=E2().handleMarkerDefaults,Wke=tc(),l6t=Wke.hasColorscale,u6t=Wke.handleDefaults;Xke.exports=function(t,r,n,i){function a(h,d){return jke.coerce(t,r,n6t,h,d)}var o=a("labels"),s=a("parents");if(!o||!o.length||!s||!s.length){r.visible=!1;return}var l=a("values");l&&l.length?a("branchvalues"):a("count"),a("level"),a("maxdepth"),s6t(t,r,i,a);var u=r._hasColorscale=l6t(t,"marker","colors")||(t.marker||{}).coloraxis;u&&u6t(t,r,i,a,{prefix:"marker.",cLetter:"c"}),a("leaf.opacity",u?1:.7);var c=a("text");a("texttemplate"),a("texttemplatefallback"),r.texttemplate||a("textinfo",jke.isArrayOrTypedArray(c)?"text+label":"label"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var f="auto";o6t(t,r,i,a,f,{moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!1,moduleHasCliponaxis:!1,moduleHasTextangle:!1,moduleHasInsideanchor:!1}),a("insidetextorientation"),a("sort"),a("rotation"),a("root.color"),a6t(r,i,a),r._length=null}});var Kke=ye((avr,Yke)=>{"use strict";var c6t=Dr(),f6t=_W();Yke.exports=function(t,r){function n(i,a){return c6t.coerce(t,r,f6t,i,a)}n("sunburstcolorway",r.colorway),n("extendsunburstcolors")}});var PE=ye((xD,Jke)=>{(function(e,t){typeof xD=="object"&&typeof Jke!="undefined"?t(xD):(e=e||self,t(e.d3=e.d3||{}))})(xD,function(e){"use strict";function t(je,tt){return je.parent===tt.parent?1:2}function r(je){return je.reduce(n,0)/je.length}function n(je,tt){return je+tt.x}function i(je){return 1+je.reduce(a,0)}function a(je,tt){return Math.max(je,tt.y)}function o(je){for(var tt;tt=je.children;)je=tt[0];return je}function s(je){for(var tt;tt=je.children;)je=tt[tt.length-1];return je}function l(){var je=t,tt=1,xt=1,Ie=!1;function xe(ke){var vt,ir=0;ke.eachAfter(function($r){var di=$r.children;di?($r.x=r(di),$r.y=i(di)):($r.x=vt?ir+=je($r,vt):0,$r.y=0,vt=$r)});var ar=o(ke),vr=s(ke),ii=ar.x-je(ar,vr)/2,pi=vr.x+je(vr,ar)/2;return ke.eachAfter(Ie?function($r){$r.x=($r.x-ke.x)*tt,$r.y=(ke.y-$r.y)*xt}:function($r){$r.x=($r.x-ii)/(pi-ii)*tt,$r.y=(1-(ke.y?$r.y/ke.y:1))*xt})}return xe.separation=function(ke){return arguments.length?(je=ke,xe):je},xe.size=function(ke){return arguments.length?(Ie=!1,tt=+ke[0],xt=+ke[1],xe):Ie?null:[tt,xt]},xe.nodeSize=function(ke){return arguments.length?(Ie=!0,tt=+ke[0],xt=+ke[1],xe):Ie?[tt,xt]:null},xe}function u(je){var tt=0,xt=je.children,Ie=xt&&xt.length;if(!Ie)tt=1;else for(;--Ie>=0;)tt+=xt[Ie].value;je.value=tt}function c(){return this.eachAfter(u)}function f(je){var tt=this,xt,Ie=[tt],xe,ke,vt;do for(xt=Ie.reverse(),Ie=[];tt=xt.pop();)if(je(tt),xe=tt.children,xe)for(ke=0,vt=xe.length;ke<vt;++ke)Ie.push(xe[ke]);while(Ie.length);return this}function h(je){for(var tt=this,xt=[tt],Ie,xe;tt=xt.pop();)if(je(tt),Ie=tt.children,Ie)for(xe=Ie.length-1;xe>=0;--xe)xt.push(Ie[xe]);return this}function d(je){for(var tt=this,xt=[tt],Ie=[],xe,ke,vt;tt=xt.pop();)if(Ie.push(tt),xe=tt.children,xe)for(ke=0,vt=xe.length;ke<vt;++ke)xt.push(xe[ke]);for(;tt=Ie.pop();)je(tt);return this}function v(je){return this.eachAfter(function(tt){for(var xt=+je(tt.data)||0,Ie=tt.children,xe=Ie&&Ie.length;--xe>=0;)xt+=Ie[xe].value;tt.value=xt})}function _(je){return this.eachBefore(function(tt){tt.children&&tt.children.sort(je)})}function b(je){for(var tt=this,xt=p(tt,je),Ie=[tt];tt!==xt;)tt=tt.parent,Ie.push(tt);for(var xe=Ie.length;je!==xt;)Ie.splice(xe,0,je),je=je.parent;return Ie}function p(je,tt){if(je===tt)return je;var xt=je.ancestors(),Ie=tt.ancestors(),xe=null;for(je=xt.pop(),tt=Ie.pop();je===tt;)xe=je,je=xt.pop(),tt=Ie.pop();return xe}function k(){for(var je=this,tt=[je];je=je.parent;)tt.push(je);return tt}function E(){var je=[];return this.each(function(tt){je.push(tt)}),je}function S(){var je=[];return this.eachBefore(function(tt){tt.children||je.push(tt)}),je}function L(){var je=this,tt=[];return je.each(function(xt){xt!==je&&tt.push({source:xt.parent,target:xt})}),tt}function x(je,tt){var xt=new T(je),Ie=+je.value&&(xt.value=je.value),xe,ke=[xt],vt,ir,ar,vr;for(tt==null&&(tt=M);xe=ke.pop();)if(Ie&&(xe.value=+xe.data.value),(ir=tt(xe.data))&&(vr=ir.length))for(xe.children=new Array(vr),ar=vr-1;ar>=0;--ar)ke.push(vt=xe.children[ar]=new T(ir[ar])),vt.parent=xe,vt.depth=xe.depth+1;return xt.eachBefore(P)}function C(){return x(this).eachBefore(g)}function M(je){return je.children}function g(je){je.data=je.data.data}function P(je){var tt=0;do je.height=tt;while((je=je.parent)&&je.height<++tt)}function T(je){this.data=je,this.depth=this.height=0,this.parent=null}T.prototype=x.prototype={constructor:T,count:c,each:f,eachAfter:d,eachBefore:h,sum:v,sort:_,path:b,ancestors:k,descendants:E,leaves:S,links:L,copy:C};var z=Array.prototype.slice;function O(je){for(var tt=je.length,xt,Ie;tt;)Ie=Math.random()*tt--|0,xt=je[tt],je[tt]=je[Ie],je[Ie]=xt;return je}function V(je){for(var tt=0,xt=(je=O(z.call(je))).length,Ie=[],xe,ke;tt<xt;)xe=je[tt],ke&&j(ke,xe)?++tt:(ke=H(Ie=G(Ie,xe)),tt=0);return ke}function G(je,tt){var xt,Ie;if(N(tt,je))return[tt];for(xt=0;xt<je.length;++xt)if(Z(tt,je[xt])&&N(oe(je[xt],tt),je))return[je[xt],tt];for(xt=0;xt<je.length-1;++xt)for(Ie=xt+1;Ie<je.length;++Ie)if(Z(oe(je[xt],je[Ie]),tt)&&Z(oe(je[xt],tt),je[Ie])&&Z(oe(je[Ie],tt),je[xt])&&N(_e(je[xt],je[Ie],tt),je))return[je[xt],je[Ie],tt];throw new Error}function Z(je,tt){var xt=je.r-tt.r,Ie=tt.x-je.x,xe=tt.y-je.y;return xt<0||xt*xt<Ie*Ie+xe*xe}function j(je,tt){var xt=je.r-tt.r+1e-6,Ie=tt.x-je.x,xe=tt.y-je.y;return xt>0&&xt*xt>Ie*Ie+xe*xe}function N(je,tt){for(var xt=0;xt<tt.length;++xt)if(!j(je,tt[xt]))return!1;return!0}function H(je){switch(je.length){case 1:return te(je[0]);case 2:return oe(je[0],je[1]);case 3:return _e(je[0],je[1],je[2])}}function te(je){return{x:je.x,y:je.y,r:je.r}}function oe(je,tt){var xt=je.x,Ie=je.y,xe=je.r,ke=tt.x,vt=tt.y,ir=tt.r,ar=ke-xt,vr=vt-Ie,ii=ir-xe,pi=Math.sqrt(ar*ar+vr*vr);return{x:(xt+ke+ar/pi*ii)/2,y:(Ie+vt+vr/pi*ii)/2,r:(pi+xe+ir)/2}}function _e(je,tt,xt){var Ie=je.x,xe=je.y,ke=je.r,vt=tt.x,ir=tt.y,ar=tt.r,vr=xt.x,ii=xt.y,pi=xt.r,$r=Ie-vt,di=Ie-vr,ji=xe-ir,In=xe-ii,wi=ar-ke,On=pi-ke,qn=Ie*Ie+xe*xe-ke*ke,Fn=qn-vt*vt-ir*ir+ar*ar,ra=qn-vr*vr-ii*ii+pi*pi,la=di*ji-$r*In,Ut=(ji*ra-In*Fn)/(la*2)-Ie,wt=(In*wi-ji*On)/la,rr=(di*Fn-$r*ra)/(la*2)-xe,nr=($r*On-di*wi)/la,Er=wt*wt+nr*nr-1,Xr=2*(ke+Ut*wt+rr*nr),ri=Ut*Ut+rr*rr-ke*ke,Qr=-(Er?(Xr+Math.sqrt(Xr*Xr-4*Er*ri))/(2*Er):ri/Xr);return{x:Ie+Ut+wt*Qr,y:xe+rr+nr*Qr,r:Qr}}function Ee(je,tt,xt){var Ie=je.x-tt.x,xe,ke,vt=je.y-tt.y,ir,ar,vr=Ie*Ie+vt*vt;vr?(ke=tt.r+xt.r,ke*=ke,ar=je.r+xt.r,ar*=ar,ke>ar?(xe=(vr+ar-ke)/(2*vr),ir=Math.sqrt(Math.max(0,ar/vr-xe*xe)),xt.x=je.x-xe*Ie-ir*vt,xt.y=je.y-xe*vt+ir*Ie):(xe=(vr+ke-ar)/(2*vr),ir=Math.sqrt(Math.max(0,ke/vr-xe*xe)),xt.x=tt.x+xe*Ie-ir*vt,xt.y=tt.y+xe*vt+ir*Ie)):(xt.x=tt.x+xt.r,xt.y=tt.y)}function Ce(je,tt){var xt=je.r+tt.r-1e-6,Ie=tt.x-je.x,xe=tt.y-je.y;return xt>0&&xt*xt>Ie*Ie+xe*xe}function me(je){var tt=je._,xt=je.next._,Ie=tt.r+xt.r,xe=(tt.x*xt.r+xt.x*tt.r)/Ie,ke=(tt.y*xt.r+xt.y*tt.r)/Ie;return xe*xe+ke*ke}function ie(je){this._=je,this.next=null,this.previous=null}function Se(je){if(!(xe=je.length))return 0;var tt,xt,Ie,xe,ke,vt,ir,ar,vr,ii,pi;if(tt=je[0],tt.x=0,tt.y=0,!(xe>1))return tt.r;if(xt=je[1],tt.x=-xt.r,xt.x=tt.r,xt.y=0,!(xe>2))return tt.r+xt.r;Ee(xt,tt,Ie=je[2]),tt=new ie(tt),xt=new ie(xt),Ie=new ie(Ie),tt.next=Ie.previous=xt,xt.next=tt.previous=Ie,Ie.next=xt.previous=tt;e:for(ir=3;ir<xe;++ir){Ee(tt._,xt._,Ie=je[ir]),Ie=new ie(Ie),ar=xt.next,vr=tt.previous,ii=xt._.r,pi=tt._.r;do if(ii<=pi){if(Ce(ar._,Ie._)){xt=ar,tt.next=xt,xt.previous=tt,--ir;continue e}ii+=ar._.r,ar=ar.next}else{if(Ce(vr._,Ie._)){tt=vr,tt.next=xt,xt.previous=tt,--ir;continue e}pi+=vr._.r,vr=vr.previous}while(ar!==vr.next);for(Ie.previous=tt,Ie.next=xt,tt.next=xt.previous=xt=Ie,ke=me(tt);(Ie=Ie.next)!==xt;)(vt=me(Ie))<ke&&(tt=Ie,ke=vt);xt=tt.next}for(tt=[xt._],Ie=xt;(Ie=Ie.next)!==xt;)tt.push(Ie._);for(Ie=V(tt),ir=0;ir<xe;++ir)tt=je[ir],tt.x-=Ie.x,tt.y-=Ie.y;return Ie.r}function Le(je){return Se(je),je}function Ae(je){return je==null?null:Fe(je)}function Fe(je){if(typeof je!="function")throw new Error;return je}function Pe(){return 0}function ge(je){return function(){return je}}function Re(je){return Math.sqrt(je.value)}function ce(){var je=null,tt=1,xt=1,Ie=Pe;function xe(ke){return ke.x=tt/2,ke.y=xt/2,je?ke.eachBefore(Ze(je)).eachAfter(ut(Ie,.5)).eachBefore(pt(1)):ke.eachBefore(Ze(Re)).eachAfter(ut(Pe,1)).eachAfter(ut(Ie,ke.r/Math.min(tt,xt))).eachBefore(pt(Math.min(tt,xt)/(2*ke.r))),ke}return xe.radius=function(ke){return arguments.length?(je=Ae(ke),xe):je},xe.size=function(ke){return arguments.length?(tt=+ke[0],xt=+ke[1],xe):[tt,xt]},xe.padding=function(ke){return arguments.length?(Ie=typeof ke=="function"?ke:ge(+ke),xe):Ie},xe}function Ze(je){return function(tt){tt.children||(tt.r=Math.max(0,+je(tt)||0))}}function ut(je,tt){return function(xt){if(Ie=xt.children){var Ie,xe,ke=Ie.length,vt=je(xt)*tt||0,ir;if(vt)for(xe=0;xe<ke;++xe)Ie[xe].r+=vt;if(ir=Se(Ie),vt)for(xe=0;xe<ke;++xe)Ie[xe].r-=vt;xt.r=ir+vt}}}function pt(je){return function(tt){var xt=tt.parent;tt.r*=je,xt&&(tt.x=xt.x+je*tt.x,tt.y=xt.y+je*tt.y)}}function Zt(je){je.x0=Math.round(je.x0),je.y0=Math.round(je.y0),je.x1=Math.round(je.x1),je.y1=Math.round(je.y1)}function st(je,tt,xt,Ie,xe){for(var ke=je.children,vt,ir=-1,ar=ke.length,vr=je.value&&(Ie-tt)/je.value;++ir<ar;)vt=ke[ir],vt.y0=xt,vt.y1=xe,vt.x0=tt,vt.x1=tt+=vt.value*vr}function lt(){var je=1,tt=1,xt=0,Ie=!1;function xe(vt){var ir=vt.height+1;return vt.x0=vt.y0=xt,vt.x1=je,vt.y1=tt/ir,vt.eachBefore(ke(tt,ir)),Ie&&vt.eachBefore(Zt),vt}function ke(vt,ir){return function(ar){ar.children&&st(ar,ar.x0,vt*(ar.depth+1)/ir,ar.x1,vt*(ar.depth+2)/ir);var vr=ar.x0,ii=ar.y0,pi=ar.x1-xt,$r=ar.y1-xt;pi<vr&&(vr=pi=(vr+pi)/2),$r<ii&&(ii=$r=(ii+$r)/2),ar.x0=vr,ar.y0=ii,ar.x1=pi,ar.y1=$r}}return xe.round=function(vt){return arguments.length?(Ie=!!vt,xe):Ie},xe.size=function(vt){return arguments.length?(je=+vt[0],tt=+vt[1],xe):[je,tt]},xe.padding=function(vt){return arguments.length?(xt=+vt,xe):xt},xe}var Gt="$",Nt={depth:-1},Jt={};function sr(je){return je.id}function wr(je){return je.parentId}function cr(){var je=sr,tt=wr;function xt(Ie){var xe,ke,vt=Ie.length,ir,ar,vr,ii=new Array(vt),pi,$r,di={};for(ke=0;ke<vt;++ke)xe=Ie[ke],vr=ii[ke]=new T(xe),(pi=je(xe,ke,Ie))!=null&&(pi+="")&&($r=Gt+(vr.id=pi),di[$r]=$r in di?Jt:vr);for(ke=0;ke<vt;++ke)if(vr=ii[ke],pi=tt(Ie[ke],ke,Ie),pi==null||!(pi+="")){if(ir)throw new Error("multiple roots");ir=vr}else{if(ar=di[Gt+pi],!ar)throw new Error("missing: "+pi);if(ar===Jt)throw new Error("ambiguous: "+pi);ar.children?ar.children.push(vr):ar.children=[vr],vr.parent=ar}if(!ir)throw new Error("no root");if(ir.parent=Nt,ir.eachBefore(function(ji){ji.depth=ji.parent.depth+1,--vt}).eachBefore(P),ir.parent=null,vt>0)throw new Error("cycle");return ir}return xt.id=function(Ie){return arguments.length?(je=Fe(Ie),xt):je},xt.parentId=function(Ie){return arguments.length?(tt=Fe(Ie),xt):tt},xt}function $e(je,tt){return je.parent===tt.parent?1:2}function St(je){var tt=je.children;return tt?tt[0]:je.t}function Qt(je){var tt=je.children;return tt?tt[tt.length-1]:je.t}function Vt(je,tt,xt){var Ie=xt/(tt.i-je.i);tt.c-=Ie,tt.s+=xt,je.c+=Ie,tt.z+=xt,tt.m+=xt}function _t(je){for(var tt=0,xt=0,Ie=je.children,xe=Ie.length,ke;--xe>=0;)ke=Ie[xe],ke.z+=tt,ke.m+=tt,tt+=ke.s+(xt+=ke.c)}function It(je,tt,xt){return je.a.parent===tt.parent?je.a:xt}function mt(je,tt){this._=je,this.parent=null,this.children=null,this.A=null,this.a=this,this.z=0,this.m=0,this.c=0,this.s=0,this.t=null,this.i=tt}mt.prototype=Object.create(T.prototype);function er(je){for(var tt=new mt(je,0),xt,Ie=[tt],xe,ke,vt,ir;xt=Ie.pop();)if(ke=xt._.children)for(xt.children=new Array(ir=ke.length),vt=ir-1;vt>=0;--vt)Ie.push(xe=xt.children[vt]=new mt(ke[vt],vt)),xe.parent=xt;return(tt.parent=new mt(null,0)).children=[tt],tt}function lr(){var je=$e,tt=1,xt=1,Ie=null;function xe(vr){var ii=er(vr);if(ii.eachAfter(ke),ii.parent.m=-ii.z,ii.eachBefore(vt),Ie)vr.eachBefore(ar);else{var pi=vr,$r=vr,di=vr;vr.eachBefore(function(qn){qn.x<pi.x&&(pi=qn),qn.x>$r.x&&($r=qn),qn.depth>di.depth&&(di=qn)});var ji=pi===$r?1:je(pi,$r)/2,In=ji-pi.x,wi=tt/($r.x+ji+In),On=xt/(di.depth||1);vr.eachBefore(function(qn){qn.x=(qn.x+In)*wi,qn.y=qn.depth*On})}return vr}function ke(vr){var ii=vr.children,pi=vr.parent.children,$r=vr.i?pi[vr.i-1]:null;if(ii){_t(vr);var di=(ii[0].z+ii[ii.length-1].z)/2;$r?(vr.z=$r.z+je(vr._,$r._),vr.m=vr.z-di):vr.z=di}else $r&&(vr.z=$r.z+je(vr._,$r._));vr.parent.A=ir(vr,$r,vr.parent.A||pi[0])}function vt(vr){vr._.x=vr.z+vr.parent.m,vr.m+=vr.parent.m}function ir(vr,ii,pi){if(ii){for(var $r=vr,di=vr,ji=ii,In=$r.parent.children[0],wi=$r.m,On=di.m,qn=ji.m,Fn=In.m,ra;ji=Qt(ji),$r=St($r),ji&&$r;)In=St(In),di=Qt(di),di.a=vr,ra=ji.z+qn-$r.z-wi+je(ji._,$r._),ra>0&&(Vt(It(ji,vr,pi),vr,ra),wi+=ra,On+=ra),qn+=ji.m,wi+=$r.m,Fn+=In.m,On+=di.m;ji&&!Qt(di)&&(di.t=ji,di.m+=qn-On),$r&&!St(In)&&(In.t=$r,In.m+=wi-Fn,pi=vr)}return pi}function ar(vr){vr.x*=tt,vr.y=vr.depth*xt}return xe.separation=function(vr){return arguments.length?(je=vr,xe):je},xe.size=function(vr){return arguments.length?(Ie=!1,tt=+vr[0],xt=+vr[1],xe):Ie?null:[tt,xt]},xe.nodeSize=function(vr){return arguments.length?(Ie=!0,tt=+vr[0],xt=+vr[1],xe):Ie?[tt,xt]:null},xe}function Tr(je,tt,xt,Ie,xe){for(var ke=je.children,vt,ir=-1,ar=ke.length,vr=je.value&&(xe-xt)/je.value;++ir<ar;)vt=ke[ir],vt.x0=tt,vt.x1=Ie,vt.y0=xt,vt.y1=xt+=vt.value*vr}var Lr=(1+Math.sqrt(5))/2;function ti(je,tt,xt,Ie,xe,ke){for(var vt=[],ir=tt.children,ar,vr,ii=0,pi=0,$r=ir.length,di,ji,In=tt.value,wi,On,qn,Fn,ra,la,Ut;ii<$r;){di=xe-xt,ji=ke-Ie;do wi=ir[pi++].value;while(!wi&&pi<$r);for(On=qn=wi,la=Math.max(ji/di,di/ji)/(In*je),Ut=wi*wi*la,ra=Math.max(qn/Ut,Ut/On);pi<$r;++pi){if(wi+=vr=ir[pi].value,vr<On&&(On=vr),vr>qn&&(qn=vr),Ut=wi*wi*la,Fn=Math.max(qn/Ut,Ut/On),Fn>ra){wi-=vr;break}ra=Fn}vt.push(ar={value:wi,dice:di<ji,children:ir.slice(ii,pi)}),ar.dice?st(ar,xt,Ie,xe,In?Ie+=ji*wi/In:ke):Tr(ar,xt,Ie,In?xt+=di*wi/In:xe,ke),In-=wi,ii=pi}return vt}var Br=function je(tt){function xt(Ie,xe,ke,vt,ir){ti(tt,Ie,xe,ke,vt,ir)}return xt.ratio=function(Ie){return je((Ie=+Ie)>1?Ie:1)},xt}(Lr);function Vr(){var je=Br,tt=!1,xt=1,Ie=1,xe=[0],ke=Pe,vt=Pe,ir=Pe,ar=Pe,vr=Pe;function ii($r){return $r.x0=$r.y0=0,$r.x1=xt,$r.y1=Ie,$r.eachBefore(pi),xe=[0],tt&&$r.eachBefore(Zt),$r}function pi($r){var di=xe[$r.depth],ji=$r.x0+di,In=$r.y0+di,wi=$r.x1-di,On=$r.y1-di;wi<ji&&(ji=wi=(ji+wi)/2),On<In&&(In=On=(In+On)/2),$r.x0=ji,$r.y0=In,$r.x1=wi,$r.y1=On,$r.children&&(di=xe[$r.depth+1]=ke($r)/2,ji+=vr($r)-di,In+=vt($r)-di,wi-=ir($r)-di,On-=ar($r)-di,wi<ji&&(ji=wi=(ji+wi)/2),On<In&&(In=On=(In+On)/2),je($r,ji,In,wi,On))}return ii.round=function($r){return arguments.length?(tt=!!$r,ii):tt},ii.size=function($r){return arguments.length?(xt=+$r[0],Ie=+$r[1],ii):[xt,Ie]},ii.tile=function($r){return arguments.length?(je=Fe($r),ii):je},ii.padding=function($r){return arguments.length?ii.paddingInner($r).paddingOuter($r):ii.paddingInner()},ii.paddingInner=function($r){return arguments.length?(ke=typeof $r=="function"?$r:ge(+$r),ii):ke},ii.paddingOuter=function($r){return arguments.length?ii.paddingTop($r).paddingRight($r).paddingBottom($r).paddingLeft($r):ii.paddingTop()},ii.paddingTop=function($r){return arguments.length?(vt=typeof $r=="function"?$r:ge(+$r),ii):vt},ii.paddingRight=function($r){return arguments.length?(ir=typeof $r=="function"?$r:ge(+$r),ii):ir},ii.paddingBottom=function($r){return arguments.length?(ar=typeof $r=="function"?$r:ge(+$r),ii):ar},ii.paddingLeft=function($r){return arguments.length?(vr=typeof $r=="function"?$r:ge(+$r),ii):vr},ii}function dt(je,tt,xt,Ie,xe){var ke=je.children,vt,ir=ke.length,ar,vr=new Array(ir+1);for(vr[0]=ar=vt=0;vt<ir;++vt)vr[vt+1]=ar+=ke[vt].value;ii(0,ir,je.value,tt,xt,Ie,xe);function ii(pi,$r,di,ji,In,wi,On){if(pi>=$r-1){var qn=ke[pi];qn.x0=ji,qn.y0=In,qn.x1=wi,qn.y1=On;return}for(var Fn=vr[pi],ra=di/2+Fn,la=pi+1,Ut=$r-1;la<Ut;){var wt=la+Ut>>>1;vr[wt]<ra?la=wt+1:Ut=wt}ra-vr[la-1]<vr[la]-ra&&pi+1<la&&--la;var rr=vr[la]-Fn,nr=di-rr;if(wi-ji>On-In){var Er=(ji*nr+wi*rr)/di;ii(pi,la,rr,ji,In,Er,On),ii(la,$r,nr,Er,In,wi,On)}else{var Xr=(In*nr+On*rr)/di;ii(pi,la,rr,ji,In,wi,Xr),ii(la,$r,nr,ji,Xr,wi,On)}}}function Ge(je,tt,xt,Ie,xe){(je.depth&1?Tr:st)(je,tt,xt,Ie,xe)}var Je=function je(tt){function xt(Ie,xe,ke,vt,ir){if((ar=Ie._squarify)&&ar.ratio===tt)for(var ar,vr,ii,pi,$r=-1,di,ji=ar.length,In=Ie.value;++$r<ji;){for(vr=ar[$r],ii=vr.children,pi=vr.value=0,di=ii.length;pi<di;++pi)vr.value+=ii[pi].value;vr.dice?st(vr,xe,ke,vt,ke+=(ir-ke)*vr.value/In):Tr(vr,xe,ke,xe+=(vt-xe)*vr.value/In,ir),In-=vr.value}else Ie._squarify=ar=ti(tt,Ie,xe,ke,vt,ir),ar.ratio=tt}return xt.ratio=function(Ie){return je((Ie=+Ie)>1?Ie:1)},xt}(Lr);e.cluster=l,e.hierarchy=x,e.pack=ce,e.packEnclose=V,e.packSiblings=Le,e.partition=lt,e.stratify=cr,e.tree=lr,e.treemap=Vr,e.treemapBinary=dt,e.treemapDice=st,e.treemapResquarify=Je,e.treemapSlice=Tr,e.treemapSliceDice=Ge,e.treemapSquarify=Br,Object.defineProperty(e,"__esModule",{value:!0})})});var RE=ye(IE=>{"use strict";var $ke=PE(),h6t=Eo(),MA=Dr(),d6t=tc().makeColorScaleFuncFromTrace,v6t=TA().makePullColorFn,p6t=TA().generateExtendedColors,g6t=tc().calc,m6t=fs().ALMOST_EQUAL,y6t={},_6t={},x6t={};IE.calc=function(e,t){var r=e._fullLayout,n=t.ids,i=MA.isArrayOrTypedArray(n),a=t.labels,o=t.parents,s=t.values,l=MA.isArrayOrTypedArray(s),u=[],c={},f={},h=function(j,N){c[j]?c[j].push(N):c[j]=[N],f[N]=1},d=function(j){return j||typeof j=="number"},v=function(j){return!l||h6t(s[j])&&s[j]>=0},_,b,p;i?(_=Math.min(n.length,o.length),b=function(j){return d(n[j])&&v(j)},p=function(j){return String(n[j])}):(_=Math.min(a.length,o.length),b=function(j){return d(a[j])&&v(j)},p=function(j){return String(a[j])}),l&&(_=Math.min(_,s.length));for(var k=0;k<_;k++)if(b(k)){var E=p(k),S=d(o[k])?String(o[k]):"",L={i:k,id:E,pid:S,label:d(a[k])?String(a[k]):""};l&&(L.v=+s[k]),u.push(L),h(S,E)}if(c[""]){if(c[""].length>1){for(var M=MA.randstr(),g=0;g<u.length;g++)u[g].pid===""&&(u[g].pid=M);u.unshift({hasMultipleRoots:!0,id:M,pid:"",label:""})}}else{var x=[],C;for(C in c)f[C]||x.push(C);if(x.length===1)C=x[0],u.unshift({hasImpliedRoot:!0,id:C,pid:"",label:C});else return MA.warn(["Multiple implied roots, cannot build",t.type,"hierarchy of",t.name+".","These roots include:",x.join(", ")].join(" "))}var P;try{P=$ke.stratify().id(function(j){return j.id}).parentId(function(j){return j.pid})(u)}catch(j){return MA.warn(["Failed to build",t.type,"hierarchy of",t.name+".","Error:",j.message].join(" "))}var T=$ke.hierarchy(P),z=!1;if(l)switch(t.branchvalues){case"remainder":T.sum(function(j){return j.data.v});break;case"total":T.each(function(j){var N=j.data.data,H=N.v;if(j.children){var te=j.children.reduce(function(oe,_e){return oe+_e.data.data.v},0);if((N.hasImpliedRoot||N.hasMultipleRoots)&&(H=te),H<te*m6t)return z=!0,MA.warn(["Total value for node",j.data.data.id,"of",t.name,"is smaller than the sum of its children.",`
+parent value =`,H,`
+children sum =`,te].join(" "))}j.value=H});break}else Qke(T,t,{branches:t.count.indexOf("branches")!==-1,leaves:t.count.indexOf("leaves")!==-1});if(!z){t.sort&&T.sort(function(j,N){return N.value-j.value});var O,V,G=t.marker.colors||[],Z=!!G.length;return t._hasColorscale?(Z||(G=l?t.values:t._values),g6t(e,t,{vals:G,containerStr:"marker",cLetter:"c"}),V=d6t(t.marker)):O=v6t(r["_"+t.type+"colormap"]),T.each(function(j){var N=j.data.data;N.color=t._hasColorscale?V(G[N.i]):O(G[N.i],N.id)}),u[0].hierarchy=T,u}};IE._runCrossTraceCalc=function(e,t){var r=t._fullLayout,n=t.calcdata,i=r[e+"colorway"],a=r["_"+e+"colormap"];r["extend"+e+"colors"]&&(i=p6t(i,e==="icicle"?x6t:e==="treemap"?_6t:y6t));var o=0,s;function l(h){var d=h.data.data,v=d.id;d.color===!1&&(a[v]?d.color=a[v]:h.parent?h.parent.parent?d.color=h.parent.data.data.color:(a[v]=d.color=i[o%i.length],o++):d.color=s)}for(var u=0;u<n.length;u++){var c=n[u],f=c[0];f.trace.type===e&&f.hierarchy&&(s=f.trace.root.color,f.hierarchy.each(l))}};IE.crossTraceCalc=function(e){return IE._runCrossTraceCalc("sunburst",e)};function Qke(e,t,r){var n=0,i=e.children;if(i){for(var a=i.length,o=0;o<a;o++)n+=Qke(i[o],t,r);r.branches&&n++}else r.leaves&&n++;return e.value=e.data.data.value=n,t._values||(t._values=[]),t._values[e.data.data.i]=n,n}});function Zy(e,t,r){e.prototype=t.prototype=r,r.constructor=e}function G_(e,t){var r=Object.create(e.prototype);for(var n in t)r[n]=t[n];return r}var bD=gu(()=>{});function Gm(){}function tCe(){return this.rgb().formatHex()}function k6t(){return this.rgb().formatHex8()}function C6t(){return lCe(this).formatHsl()}function rCe(){return this.rgb().formatRgb()}function j_(e){var t,r;return e=(e+"").trim().toLowerCase(),(t=b6t.exec(e))?(r=t[1].length,t=parseInt(t[1],16),r===6?iCe(t):r===3?new _d(t>>8&15|t>>4&240,t>>4&15|t&240,(t&15)<<4|t&15,1):r===8?wD(t>>24&255,t>>16&255,t>>8&255,(t&255)/255):r===4?wD(t>>12&15|t>>8&240,t>>8&15|t>>4&240,t>>4&15|t&240,((t&15)<<4|t&15)/255):null):(t=w6t.exec(e))?new _d(t[1],t[2],t[3],1):(t=T6t.exec(e))?new _d(t[1]*255/100,t[2]*255/100,t[3]*255/100,1):(t=A6t.exec(e))?wD(t[1],t[2],t[3],t[4]):(t=S6t.exec(e))?wD(t[1]*255/100,t[2]*255/100,t[3]*255/100,t[4]):(t=M6t.exec(e))?oCe(t[1],t[2]/100,t[3]/100,1):(t=E6t.exec(e))?oCe(t[1],t[2]/100,t[3]/100,t[4]):eCe.hasOwnProperty(e)?iCe(eCe[e]):e==="transparent"?new _d(NaN,NaN,NaN,0):null}function iCe(e){return new _d(e>>16&255,e>>8&255,e&255,1)}function wD(e,t,r,n){return n<=0&&(e=t=r=NaN),new _d(e,t,r,n)}function FE(e){return e instanceof Gm||(e=j_(e)),e?(e=e.rgb(),new _d(e.r,e.g,e.b,e.opacity)):new _d}function kA(e,t,r,n){return arguments.length===1?FE(e):new _d(e,t,r,n==null?1:n)}function _d(e,t,r,n){this.r=+e,this.g=+t,this.b=+r,this.opacity=+n}function nCe(){return`#${k2(this.r)}${k2(this.g)}${k2(this.b)}`}function L6t(){return`#${k2(this.r)}${k2(this.g)}${k2(this.b)}${k2((isNaN(this.opacity)?1:this.opacity)*255)}`}function aCe(){let e=AD(this.opacity);return`${e===1?"rgb(":"rgba("}${C2(this.r)}, ${C2(this.g)}, ${C2(this.b)}${e===1?")":`, ${e})`}`}function AD(e){return isNaN(e)?1:Math.max(0,Math.min(1,e))}function C2(e){return Math.max(0,Math.min(255,Math.round(e)||0))}function k2(e){return e=C2(e),(e<16?"0":"")+e.toString(16)}function oCe(e,t,r,n){return n<=0?e=t=r=NaN:r<=0||r>=1?e=t=NaN:t<=0&&(e=NaN),new Zg(e,t,r,n)}function lCe(e){if(e instanceof Zg)return new Zg(e.h,e.s,e.l,e.opacity);if(e instanceof Gm||(e=j_(e)),!e)return new Zg;if(e instanceof Zg)return e;e=e.rgb();var t=e.r/255,r=e.g/255,n=e.b/255,i=Math.min(t,r,n),a=Math.max(t,r,n),o=NaN,s=a-i,l=(a+i)/2;return s?(t===a?o=(r-n)/s+(r<n)*6:r===a?o=(n-t)/s+2:o=(t-r)/s+4,s/=l<.5?a+i:2-a-i,o*=60):s=l>0&&l<1?0:o,new Zg(o,s,l,e.opacity)}function zE(e,t,r,n){return arguments.length===1?lCe(e):new Zg(e,t,r,n==null?1:n)}function Zg(e,t,r,n){this.h=+e,this.s=+t,this.l=+r,this.opacity=+n}function sCe(e){return e=(e||0)%360,e<0?e+360:e}function TD(e){return Math.max(0,Math.min(1,e||0))}function xW(e,t,r){return(e<60?t+(r-t)*e/60:e<180?r:e<240?t+(r-t)*(240-e)/60:t)*255}var H_,L2,EA,DE,Vm,b6t,w6t,T6t,A6t,S6t,M6t,E6t,eCe,SD=gu(()=>{bD();H_=.7,L2=1/H_,EA="\\s*([+-]?\\d+)\\s*",DE="\\s*([+-]?(?:\\d*\\.)?\\d+(?:[eE][+-]?\\d+)?)\\s*",Vm="\\s*([+-]?(?:\\d*\\.)?\\d+(?:[eE][+-]?\\d+)?)%\\s*",b6t=/^#([0-9a-f]{3,8})$/,w6t=new RegExp(`^rgb\\(${EA},${EA},${EA}\\)$`),T6t=new RegExp(`^rgb\\(${Vm},${Vm},${Vm}\\)$`),A6t=new RegExp(`^rgba\\(${EA},${EA},${EA},${DE}\\)$`),S6t=new RegExp(`^rgba\\(${Vm},${Vm},${Vm},${DE}\\)$`),M6t=new RegExp(`^hsl\\(${DE},${Vm},${Vm}\\)$`),E6t=new RegExp(`^hsla\\(${DE},${Vm},${Vm},${DE}\\)$`),eCe={aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074};Zy(Gm,j_,{copy(e){return Object.assign(new this.constructor,this,e)},displayable(){return this.rgb().displayable()},hex:tCe,formatHex:tCe,formatHex8:k6t,formatHsl:C6t,formatRgb:rCe,toString:rCe});Zy(_d,kA,G_(Gm,{brighter(e){return e=e==null?L2:Math.pow(L2,e),new _d(this.r*e,this.g*e,this.b*e,this.opacity)},darker(e){return e=e==null?H_:Math.pow(H_,e),new _d(this.r*e,this.g*e,this.b*e,this.opacity)},rgb(){return this},clamp(){return new _d(C2(this.r),C2(this.g),C2(this.b),AD(this.opacity))},displayable(){return-.5<=this.r&&this.r<255.5&&-.5<=this.g&&this.g<255.5&&-.5<=this.b&&this.b<255.5&&0<=this.opacity&&this.opacity<=1},hex:nCe,formatHex:nCe,formatHex8:L6t,formatRgb:aCe,toString:aCe}));Zy(Zg,zE,G_(Gm,{brighter(e){return e=e==null?L2:Math.pow(L2,e),new Zg(this.h,this.s,this.l*e,this.opacity)},darker(e){return e=e==null?H_:Math.pow(H_,e),new Zg(this.h,this.s,this.l*e,this.opacity)},rgb(){var e=this.h%360+(this.h<0)*360,t=isNaN(e)||isNaN(this.s)?0:this.s,r=this.l,n=r+(r<.5?r:1-r)*t,i=2*r-n;return new _d(xW(e>=240?e-240:e+120,i,n),xW(e,i,n),xW(e<120?e+240:e-120,i,n),this.opacity)},clamp(){return new Zg(sCe(this.h),TD(this.s),TD(this.l),AD(this.opacity))},displayable(){return(0<=this.s&&this.s<=1||isNaN(this.s))&&0<=this.l&&this.l<=1&&0<=this.opacity&&this.opacity<=1},formatHsl(){let e=AD(this.opacity);return`${e===1?"hsl(":"hsla("}${sCe(this.h)}, ${TD(this.s)*100}%, ${TD(this.l)*100}%${e===1?")":`, ${e})`}`}}))});var MD,ED,bW=gu(()=>{MD=Math.PI/180,ED=180/Math.PI});function vCe(e){if(e instanceof Hm)return new Hm(e.l,e.a,e.b,e.opacity);if(e instanceof Yy)return pCe(e);e instanceof _d||(e=FE(e));var t=SW(e.r),r=SW(e.g),n=SW(e.b),i=wW((.2225045*t+.7168786*r+.0606169*n)/cCe),a,o;return t===r&&r===n?a=o=i:(a=wW((.4360747*t+.3850649*r+.1430804*n)/uCe),o=wW((.0139322*t+.0971045*r+.7141733*n)/fCe)),new Hm(116*i-16,500*(a-i),200*(i-o),e.opacity)}function LA(e,t,r,n){return arguments.length===1?vCe(e):new Hm(e,t,r,n==null?1:n)}function Hm(e,t,r,n){this.l=+e,this.a=+t,this.b=+r,this.opacity=+n}function wW(e){return e>P6t?Math.pow(e,1/3):e/dCe+hCe}function TW(e){return e>CA?e*e*e:dCe*(e-hCe)}function AW(e){return 255*(e<=.0031308?12.92*e:1.055*Math.pow(e,1/2.4)-.055)}function SW(e){return(e/=255)<=.04045?e/12.92:Math.pow((e+.055)/1.055,2.4)}function I6t(e){if(e instanceof Yy)return new Yy(e.h,e.c,e.l,e.opacity);if(e instanceof Hm||(e=vCe(e)),e.a===0&&e.b===0)return new Yy(NaN,0<e.l&&e.l<100?0:NaN,e.l,e.opacity);var t=Math.atan2(e.b,e.a)*ED;return new Yy(t<0?t+360:t,Math.sqrt(e.a*e.a+e.b*e.b),e.l,e.opacity)}function OE(e,t,r,n){return arguments.length===1?I6t(e):new Yy(e,t,r,n==null?1:n)}function Yy(e,t,r,n){this.h=+e,this.c=+t,this.l=+r,this.opacity=+n}function pCe(e){if(isNaN(e.h))return new Hm(e.l,0,0,e.opacity);var t=e.h*MD;return new Hm(e.l,Math.cos(t)*e.c,Math.sin(t)*e.c,e.opacity)}var kD,uCe,cCe,fCe,hCe,CA,dCe,P6t,gCe=gu(()=>{bD();SD();bW();kD=18,uCe=.96422,cCe=1,fCe=.82521,hCe=4/29,CA=6/29,dCe=3*CA*CA,P6t=CA*CA*CA;Zy(Hm,LA,G_(Gm,{brighter(e){return new Hm(this.l+kD*(e==null?1:e),this.a,this.b,this.opacity)},darker(e){return new Hm(this.l-kD*(e==null?1:e),this.a,this.b,this.opacity)},rgb(){var e=(this.l+16)/116,t=isNaN(this.a)?e:e+this.a/500,r=isNaN(this.b)?e:e-this.b/200;return t=uCe*TW(t),e=cCe*TW(e),r=fCe*TW(r),new _d(AW(3.1338561*t-1.6168667*e-.4906146*r),AW(-.9787684*t+1.9161415*e+.033454*r),AW(.0719453*t-.2289914*e+1.4052427*r),this.opacity)}}));Zy(Yy,OE,G_(Gm,{brighter(e){return new Yy(this.h,this.c,this.l+kD*(e==null?1:e),this.opacity)},darker(e){return new Yy(this.h,this.c,this.l-kD*(e==null?1:e),this.opacity)},rgb(){return pCe(this).rgb()}}))});function R6t(e){if(e instanceof P2)return new P2(e.h,e.s,e.l,e.opacity);e instanceof _d||(e=FE(e));var t=e.r/255,r=e.g/255,n=e.b/255,i=(_Ce*n+mCe*t-yCe*r)/(_Ce+mCe-yCe),a=n-i,o=(qE*(r-i)-EW*a)/CD,s=Math.sqrt(o*o+a*a)/(qE*i*(1-i)),l=s?Math.atan2(o,a)*ED-120:NaN;return new P2(l<0?l+360:l,s,i,e.opacity)}function PA(e,t,r,n){return arguments.length===1?R6t(e):new P2(e,t,r,n==null?1:n)}function P2(e,t,r,n){this.h=+e,this.s=+t,this.l=+r,this.opacity=+n}var xCe,MW,EW,CD,qE,mCe,yCe,_Ce,bCe=gu(()=>{bD();SD();bW();xCe=-.14861,MW=1.78277,EW=-.29227,CD=-.90649,qE=1.97294,mCe=qE*CD,yCe=qE*MW,_Ce=MW*EW-CD*xCe;Zy(P2,PA,G_(Gm,{brighter(e){return e=e==null?L2:Math.pow(L2,e),new P2(this.h,this.s,this.l*e,this.opacity)},darker(e){return e=e==null?H_:Math.pow(H_,e),new P2(this.h,this.s,this.l*e,this.opacity)},rgb(){var e=isNaN(this.h)?0:(this.h+120)*MD,t=+this.l,r=isNaN(this.s)?0:this.s*t*(1-t),n=Math.cos(e),i=Math.sin(e);return new _d(255*(t+r*(xCe*n+MW*i)),255*(t+r*(EW*n+CD*i)),255*(t+r*(qE*n)),this.opacity)}}))});var I2=gu(()=>{SD();gCe();bCe()});function kW(e,t,r,n,i){var a=e*e,o=a*e;return((1-3*e+3*a-o)*t+(4-6*a+3*o)*r+(1+3*e+3*a-3*o)*n+o*i)/6}function LD(e){var t=e.length-1;return function(r){var n=r<=0?r=0:r>=1?(r=1,t-1):Math.floor(r*t),i=e[n],a=e[n+1],o=n>0?e[n-1]:2*i-a,s=n<t-1?e[n+2]:2*a-i;return kW((r-n/t)*t,o,i,a,s)}}var PD=gu(()=>{});function ID(e){var t=e.length;return function(r){var n=Math.floor(((r%=1)<0?++r:r)*t),i=e[(n+t-1)%t],a=e[n%t],o=e[(n+1)%t],s=e[(n+2)%t];return kW((r-n/t)*t,i,a,o,s)}}var CW=gu(()=>{PD()});var IA,LW=gu(()=>{IA=e=>()=>e});function wCe(e,t){return function(r){return e+r*t}}function D6t(e,t,r){return e=Math.pow(e,r),t=Math.pow(t,r)-e,r=1/r,function(n){return Math.pow(e+n*t,r)}}function W_(e,t){var r=t-e;return r?wCe(e,r>180||r<-180?r-360*Math.round(r/360):r):IA(isNaN(e)?t:e)}function TCe(e){return(e=+e)==1?$f:function(t,r){return r-t?D6t(t,r,e):IA(isNaN(t)?r:t)}}function $f(e,t){var r=t-e;return r?wCe(e,r):IA(isNaN(e)?t:e)}var R2=gu(()=>{LW()});function ACe(e){return function(t){var r=t.length,n=new Array(r),i=new Array(r),a=new Array(r),o,s;for(o=0;o<r;++o)s=kA(t[o]),n[o]=s.r||0,i[o]=s.g||0,a[o]=s.b||0;return n=e(n),i=e(i),a=e(a),s.opacity=1,function(l){return s.r=n(l),s.g=i(l),s.b=a(l),s+""}}}var BE,SCe,MCe,PW=gu(()=>{I2();PD();CW();R2();BE=function e(t){var r=TCe(t);function n(i,a){var o=r((i=kA(i)).r,(a=kA(a)).r),s=r(i.g,a.g),l=r(i.b,a.b),u=$f(i.opacity,a.opacity);return function(c){return i.r=o(c),i.g=s(c),i.b=l(c),i.opacity=u(c),i+""}}return n.gamma=e,n}(1);SCe=ACe(LD),MCe=ACe(ID)});function RA(e,t){t||(t=[]);var r=e?Math.min(t.length,e.length):0,n=t.slice(),i;return function(a){for(i=0;i<r;++i)n[i]=e[i]*(1-a)+t[i]*a;return n}}function RD(e){return ArrayBuffer.isView(e)&&!(e instanceof DataView)}var DD=gu(()=>{});function ECe(e,t){return(RD(t)?RA:IW)(e,t)}function IW(e,t){var r=t?t.length:0,n=e?Math.min(r,e.length):0,i=new Array(n),a=new Array(r),o;for(o=0;o<n;++o)i[o]=X_(e[o],t[o]);for(;o<r;++o)a[o]=t[o];return function(s){for(o=0;o<n;++o)a[o]=i[o](s);return a}}var RW=gu(()=>{NE();DD()});function FD(e,t){var r=new Date;return e=+e,t=+t,function(n){return r.setTime(e*(1-n)+t*n),r}}var DW=gu(()=>{});function zp(e,t){return e=+e,t=+t,function(r){return e*(1-r)+t*r}}var UE=gu(()=>{});function zD(e,t){var r={},n={},i;(e===null||typeof e!="object")&&(e={}),(t===null||typeof t!="object")&&(t={});for(i in t)i in e?r[i]=X_(e[i],t[i]):n[i]=t[i];return function(a){for(i in r)n[i]=r[i](a);return n}}var FW=gu(()=>{NE()});function F6t(e){return function(){return e}}function z6t(e){return function(t){return e(t)+""}}function OD(e,t){var r=OW.lastIndex=zW.lastIndex=0,n,i,a,o=-1,s=[],l=[];for(e=e+"",t=t+"";(n=OW.exec(e))&&(i=zW.exec(t));)(a=i.index)>r&&(a=t.slice(r,a),s[o]?s[o]+=a:s[++o]=a),(n=n[0])===(i=i[0])?s[o]?s[o]+=i:s[++o]=i:(s[++o]=null,l.push({i:o,x:zp(n,i)})),r=zW.lastIndex;return r<t.length&&(a=t.slice(r),s[o]?s[o]+=a:s[++o]=a),s.length<2?l[0]?z6t(l[0].x):F6t(t):(t=l.length,function(u){for(var c=0,f;c<t;++c)s[(f=l[c]).i]=f.x(u);return s.join("")})}var OW,zW,qW=gu(()=>{UE();OW=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,zW=new RegExp(OW.source,"g")});function X_(e,t){var r=typeof t,n;return t==null||r==="boolean"?IA(t):(r==="number"?zp:r==="string"?(n=j_(t))?(t=n,BE):OD:t instanceof j_?BE:t instanceof Date?FD:RD(t)?RA:Array.isArray(t)?IW:typeof t.valueOf!="function"&&typeof t.toString!="function"||isNaN(t)?zD:zp)(e,t)}var NE=gu(()=>{I2();PW();RW();DW();UE();FW();qW();LW();DD()});function kCe(e){var t=e.length;return function(r){return e[Math.max(0,Math.min(t-1,Math.floor(r*t)))]}}var CCe=gu(()=>{});function LCe(e,t){var r=W_(+e,+t);return function(n){var i=r(n);return i-360*Math.floor(i/360)}}var PCe=gu(()=>{R2()});function ICe(e,t){return e=+e,t=+t,function(r){return Math.round(e*(1-r)+t*r)}}var RCe=gu(()=>{});function BW(e,t,r,n,i,a){var o,s,l;return(o=Math.sqrt(e*e+t*t))&&(e/=o,t/=o),(l=e*r+t*n)&&(r-=e*l,n-=t*l),(s=Math.sqrt(r*r+n*n))&&(r/=s,n/=s,l/=s),e*n<t*r&&(e=-e,t=-t,l=-l,o=-o),{translateX:i,translateY:a,rotate:Math.atan2(t,e)*DCe,skewX:Math.atan(l)*DCe,scaleX:o,scaleY:s}}var DCe,qD,FCe=gu(()=>{DCe=180/Math.PI,qD={translateX:0,translateY:0,rotate:0,skewX:0,scaleX:1,scaleY:1}});function zCe(e){let t=new(typeof DOMMatrix=="function"?DOMMatrix:WebKitCSSMatrix)(e+"");return t.isIdentity?qD:BW(t.a,t.b,t.c,t.d,t.e,t.f)}function OCe(e){return e==null?qD:(BD||(BD=document.createElementNS("http://www.w3.org/2000/svg","g")),BD.setAttribute("transform",e),(e=BD.transform.baseVal.consolidate())?(e=e.matrix,BW(e.a,e.b,e.c,e.d,e.e,e.f)):qD)}var BD,qCe=gu(()=>{FCe()});function BCe(e,t,r,n){function i(u){return u.length?u.pop()+" ":""}function a(u,c,f,h,d,v){if(u!==f||c!==h){var _=d.push("translate(",null,t,null,r);v.push({i:_-4,x:zp(u,f)},{i:_-2,x:zp(c,h)})}else(f||h)&&d.push("translate("+f+t+h+r)}function o(u,c,f,h){u!==c?(u-c>180?c+=360:c-u>180&&(u+=360),h.push({i:f.push(i(f)+"rotate(",null,n)-2,x:zp(u,c)})):c&&f.push(i(f)+"rotate("+c+n)}function s(u,c,f,h){u!==c?h.push({i:f.push(i(f)+"skewX(",null,n)-2,x:zp(u,c)}):c&&f.push(i(f)+"skewX("+c+n)}function l(u,c,f,h,d,v){if(u!==f||c!==h){var _=d.push(i(d)+"scale(",null,",",null,")");v.push({i:_-4,x:zp(u,f)},{i:_-2,x:zp(c,h)})}else(f!==1||h!==1)&&d.push(i(d)+"scale("+f+","+h+")")}return function(u,c){var f=[],h=[];return u=e(u),c=e(c),a(u.translateX,u.translateY,c.translateX,c.translateY,f,h),o(u.rotate,c.rotate,f,h),s(u.skewX,c.skewX,f,h),l(u.scaleX,u.scaleY,c.scaleX,c.scaleY,f,h),u=c=null,function(d){for(var v=-1,_=h.length,b;++v<_;)f[(b=h[v]).i]=b.x(d);return f.join("")}}}var NCe,UCe,VCe=gu(()=>{UE();qCe();NCe=BCe(zCe,"px, ","px)","deg)"),UCe=BCe(OCe,", ",")",")")});function GCe(e){return((e=Math.exp(e))+1/e)/2}function q6t(e){return((e=Math.exp(e))-1/e)/2}function B6t(e){return((e=Math.exp(2*e))-1)/(e+1)}var O6t,HCe,jCe=gu(()=>{O6t=1e-12;HCe=function e(t,r,n){function i(a,o){var s=a[0],l=a[1],u=a[2],c=o[0],f=o[1],h=o[2],d=c-s,v=f-l,_=d*d+v*v,b,p;if(_<O6t)p=Math.log(h/u)/t,b=function(C){return[s+C*d,l+C*v,u*Math.exp(t*C*p)]};else{var k=Math.sqrt(_),E=(h*h-u*u+n*_)/(2*u*r*k),S=(h*h-u*u-n*_)/(2*h*r*k),L=Math.log(Math.sqrt(E*E+1)-E),x=Math.log(Math.sqrt(S*S+1)-S);p=(x-L)/t,b=function(C){var M=C*p,g=GCe(L),P=u/(r*k)*(g*B6t(t*M+L)-q6t(L));return[s+P*d,l+P*v,u*g/GCe(t*M+L)]}}return b.duration=p*1e3*t/Math.SQRT2,b}return i.rho=function(a){var o=Math.max(.001,+a),s=o*o,l=s*s;return e(o,s,l)},i}(Math.SQRT2,2,4)});function WCe(e){return function(t,r){var n=e((t=zE(t)).h,(r=zE(r)).h),i=$f(t.s,r.s),a=$f(t.l,r.l),o=$f(t.opacity,r.opacity);return function(s){return t.h=n(s),t.s=i(s),t.l=a(s),t.opacity=o(s),t+""}}}var XCe,ZCe,YCe=gu(()=>{I2();R2();XCe=WCe(W_),ZCe=WCe($f)});function NW(e,t){var r=$f((e=LA(e)).l,(t=LA(t)).l),n=$f(e.a,t.a),i=$f(e.b,t.b),a=$f(e.opacity,t.opacity);return function(o){return e.l=r(o),e.a=n(o),e.b=i(o),e.opacity=a(o),e+""}}var KCe=gu(()=>{I2();R2()});function JCe(e){return function(t,r){var n=e((t=OE(t)).h,(r=OE(r)).h),i=$f(t.c,r.c),a=$f(t.l,r.l),o=$f(t.opacity,r.opacity);return function(s){return t.h=n(s),t.c=i(s),t.l=a(s),t.opacity=o(s),t+""}}}var $Ce,QCe,e6e=gu(()=>{I2();R2();$Ce=JCe(W_),QCe=JCe($f)});function t6e(e){return function t(r){r=+r;function n(i,a){var o=e((i=PA(i)).h,(a=PA(a)).h),s=$f(i.s,a.s),l=$f(i.l,a.l),u=$f(i.opacity,a.opacity);return function(c){return i.h=o(c),i.s=s(c),i.l=l(Math.pow(c,r)),i.opacity=u(c),i+""}}return n.gamma=t,n}(1)}var r6e,i6e,n6e=gu(()=>{I2();R2();r6e=t6e(W_),i6e=t6e($f)});function UW(e,t){t===void 0&&(t=e,e=X_);for(var r=0,n=t.length-1,i=t[0],a=new Array(n<0?0:n);r<n;)a[r]=e(i,i=t[++r]);return function(o){var s=Math.max(0,Math.min(n-1,Math.floor(o*=n)));return a[s](o-s)}}var a6e=gu(()=>{NE()});function o6e(e,t){for(var r=new Array(t),n=0;n<t;++n)r[n]=e(n/(t-1));return r}var s6e=gu(()=>{});var D2={};cee(D2,{interpolate:()=>X_,interpolateArray:()=>ECe,interpolateBasis:()=>LD,interpolateBasisClosed:()=>ID,interpolateCubehelix:()=>r6e,interpolateCubehelixLong:()=>i6e,interpolateDate:()=>FD,interpolateDiscrete:()=>kCe,interpolateHcl:()=>$Ce,interpolateHclLong:()=>QCe,interpolateHsl:()=>XCe,interpolateHslLong:()=>ZCe,interpolateHue:()=>LCe,interpolateLab:()=>NW,interpolateNumber:()=>zp,interpolateNumberArray:()=>RA,interpolateObject:()=>zD,interpolateRgb:()=>BE,interpolateRgbBasis:()=>SCe,interpolateRgbBasisClosed:()=>MCe,interpolateRound:()=>ICe,interpolateString:()=>OD,interpolateTransformCss:()=>NCe,interpolateTransformSvg:()=>UCe,interpolateZoom:()=>HCe,piecewise:()=>UW,quantize:()=>o6e});var F2=gu(()=>{NE();RW();PD();CW();DW();CCe();PCe();UE();DD();FW();RCe();qW();VCe();jCe();PW();YCe();KCe();e6e();n6e();a6e();s6e()});var ND=ye((Ypr,l6e)=>{"use strict";var N6t=So(),U6t=ka();l6e.exports=function(t,r,n,i,a){var o=r.data.data,s=o.i,l=a||o.color;if(s>=0){r.i=o.i;var u=n.marker;u.pattern?(!u.colors||!u.pattern.shape)&&(u.color=l,r.color=l):(u.color=l,r.color=l),N6t.pointStyle(t,n,i,r)}else U6t.fill(t,l)}});var VW=ye((Kpr,d6e)=>{"use strict";var u6e=Oa(),c6e=ka(),f6e=Dr(),V6t=bv().resizeText,G6t=ND();function H6t(e){var t=e._fullLayout._sunburstlayer.selectAll(".trace");V6t(e,t,"sunburst"),t.each(function(r){var n=u6e.select(this),i=r[0],a=i.trace;n.style("opacity",a.opacity),n.selectAll("path.surface").each(function(o){u6e.select(this).call(h6e,o,a,e)})})}function h6e(e,t,r,n){var i=t.data.data,a=!t.children,o=i.i,s=f6e.castOption(r,o,"marker.line.color")||c6e.defaultLine,l=f6e.castOption(r,o,"marker.line.width")||0;e.call(G6t,t,r,n).style("stroke-width",l).call(c6e.stroke,s).style("opacity",a?r.leaf.opacity:null)}d6e.exports={style:H6t,styleOne:h6e}});var Ky=ye(Bs=>{"use strict";var z2=Dr(),j6t=ka(),W6t=Sg(),v6e=l_();Bs.findEntryWithLevel=function(e,t){var r;return t&&e.eachAfter(function(n){if(Bs.getPtId(n)===t)return r=n.copy()}),r||e};Bs.findEntryWithChild=function(e,t){var r;return e.eachAfter(function(n){for(var i=n.children||[],a=0;a<i.length;a++){var o=i[a];if(Bs.getPtId(o)===t)return r=n.copy()}}),r||e};Bs.isEntry=function(e){return!e.parent};Bs.isLeaf=function(e){return!e.children};Bs.getPtId=function(e){return e.data.data.id};Bs.getPtLabel=function(e){return e.data.data.label};Bs.getValue=function(e){return e.value};Bs.isHierarchyRoot=function(e){return p6e(e)===""};Bs.setSliceCursor=function(e,t,r){var n=r.isTransitioning;if(!n){var i=e.datum();n=r.hideOnRoot&&Bs.isHierarchyRoot(i)||r.hideOnLeaves&&Bs.isLeaf(i)}W6t(e,n?null:"pointer")};function X6t(e,t,r){return{color:Bs.getOutsideTextFontKey("color",e,t,r),family:Bs.getOutsideTextFontKey("family",e,t,r),size:Bs.getOutsideTextFontKey("size",e,t,r),weight:Bs.getOutsideTextFontKey("weight",e,t,r),style:Bs.getOutsideTextFontKey("style",e,t,r),variant:Bs.getOutsideTextFontKey("variant",e,t,r),textcase:Bs.getOutsideTextFontKey("textcase",e,t,r),lineposition:Bs.getOutsideTextFontKey("lineposition",e,t,r),shadow:Bs.getOutsideTextFontKey("shadow",e,t,r)}}function Z6t(e,t,r,n){var i=(n||{}).onPathbar,a=t.data.data,o=a.i,s=z2.castOption(e,o,(i?"pathbar.textfont":"insidetextfont")+".color");return!s&&e._input.textfont&&(s=z2.castOption(e._input,o,"textfont.color")),{color:s||j6t.contrast(a.color),family:Bs.getInsideTextFontKey("family",e,t,r,n),size:Bs.getInsideTextFontKey("size",e,t,r,n),weight:Bs.getInsideTextFontKey("weight",e,t,r,n),style:Bs.getInsideTextFontKey("style",e,t,r,n),variant:Bs.getInsideTextFontKey("variant",e,t,r,n),textcase:Bs.getInsideTextFontKey("textcase",e,t,r,n),lineposition:Bs.getInsideTextFontKey("lineposition",e,t,r,n),shadow:Bs.getInsideTextFontKey("shadow",e,t,r,n)}}Bs.getInsideTextFontKey=function(e,t,r,n,i){var a=(i||{}).onPathbar,o=a?"pathbar.textfont":"insidetextfont",s=r.data.data.i;return z2.castOption(t,s,o+"."+e)||z2.castOption(t,s,"textfont."+e)||n.size};Bs.getOutsideTextFontKey=function(e,t,r,n){var i=r.data.data.i;return z2.castOption(t,i,"outsidetextfont."+e)||z2.castOption(t,i,"textfont."+e)||n.size};Bs.isOutsideText=function(e,t){return!e._hasColorscale&&Bs.isHierarchyRoot(t)};Bs.determineTextFont=function(e,t,r,n){return Bs.isOutsideText(e,t)?X6t(e,t,r):Z6t(e,t,r,n)};Bs.hasTransition=function(e){return!!(e&&e.duration>0)};Bs.getMaxDepth=function(e){return e.maxdepth>=0?e.maxdepth:1/0};Bs.isHeader=function(e,t){return!(Bs.isLeaf(e)||e.depth===t._maxDepth-1)};function p6e(e){return e.data.data.pid}Bs.getParent=function(e,t){return Bs.findEntryWithLevel(e,p6e(t))};Bs.listPath=function(e,t){var r=e.parent;if(!r)return[];var n=t?[r.data[t]]:[r];return Bs.listPath(r,t).concat(n)};Bs.getPath=function(e){return Bs.listPath(e,"label").join("/")+"/"};Bs.formatValue=v6e.formatPieValue;Bs.formatPercent=function(e,t){var r=z2.formatPercent(e,0);return r==="0%"&&(r=v6e.formatPiePercent(e,t)),r}});var HE=ye(($pr,y6e)=>{"use strict";var DA=Oa(),g6e=qa(),Y6t=ip().appendArrayPointValue,VE=vf(),m6e=Dr(),K6t=_3(),rd=Ky(),J6t=l_(),$6t=J6t.formatPieValue;y6e.exports=function(t,r,n,i,a){var o=i[0],s=o.trace,l=o.hierarchy,u=s.type==="sunburst",c=s.type==="treemap"||s.type==="icicle";"_hasHoverLabel"in s||(s._hasHoverLabel=!1),"_hasHoverEvent"in s||(s._hasHoverEvent=!1);var f=function(v){var _=n._fullLayout;if(!(n._dragging||_.hovermode===!1)){var b=n._fullData[s.index],p=v.data.data,k=p.i,E=rd.isHierarchyRoot(v),S=rd.getParent(l,v),L=rd.getValue(v),x=function(Ee){return m6e.castOption(b,k,Ee)},C=x("hovertemplate"),M=VE.castHoverinfo(b,_,k),g=_.separators,P;if(C||M&&M!=="none"&&M!=="skip"){var T,z;u&&(T=o.cx+v.pxmid[0]*(1-v.rInscribed),z=o.cy+v.pxmid[1]*(1-v.rInscribed)),c&&(T=v._hoverX,z=v._hoverY);var O={},V=[],G=[],Z=function(Ee){return V.indexOf(Ee)!==-1};M&&(V=M==="all"?b._module.attributes.hoverinfo.flags:M.split("+")),O.label=p.label,Z("label")&&O.label&&G.push(O.label),p.hasOwnProperty("v")&&(O.value=p.v,O.valueLabel=$6t(O.value,g),Z("value")&&G.push(O.valueLabel)),O.currentPath=v.currentPath=rd.getPath(v.data),Z("current path")&&!E&&G.push(O.currentPath);var j,N=[],H=function(){N.indexOf(j)===-1&&(G.push(j),N.push(j))};O.percentParent=v.percentParent=L/rd.getValue(S),O.parent=v.parentString=rd.getPtLabel(S),Z("percent parent")&&(j=rd.formatPercent(O.percentParent,g)+" of "+O.parent,H()),O.percentEntry=v.percentEntry=L/rd.getValue(r),O.entry=v.entry=rd.getPtLabel(r),Z("percent entry")&&!E&&!v.onPathbar&&(j=rd.formatPercent(O.percentEntry,g)+" of "+O.entry,H()),O.percentRoot=v.percentRoot=L/rd.getValue(l),O.root=v.root=rd.getPtLabel(l),Z("percent root")&&!E&&(j=rd.formatPercent(O.percentRoot,g)+" of "+O.root,H()),O.text=x("hovertext")||x("text"),Z("text")&&(j=O.text,m6e.isValidTextValue(j)&&G.push(j)),P=[GE(v,b,a.eventDataKeys)];var te={trace:b,y:z,_x0:v._x0,_x1:v._x1,_y0:v._y0,_y1:v._y1,text:G.join("<br>"),name:C||Z("name")?b.name:void 0,color:x("hoverlabel.bgcolor")||p.color,borderColor:x("hoverlabel.bordercolor"),fontFamily:x("hoverlabel.font.family"),fontSize:x("hoverlabel.font.size"),fontColor:x("hoverlabel.font.color"),fontWeight:x("hoverlabel.font.weight"),fontStyle:x("hoverlabel.font.style"),fontVariant:x("hoverlabel.font.variant"),nameLength:x("hoverlabel.namelength"),textAlign:x("hoverlabel.align"),hovertemplate:C,hovertemplateLabels:O,eventData:P};u&&(te.x0=T-v.rInscribed*v.rpx1,te.x1=T+v.rInscribed*v.rpx1,te.idealAlign=v.pxmid[0]<0?"left":"right"),c&&(te.x=T,te.idealAlign=T<0?"left":"right");var oe=[];VE.loneHover(te,{container:_._hoverlayer.node(),outerContainer:_._paper.node(),gd:n,inOut_bbox:oe}),P[0].bbox=oe[0],s._hasHoverLabel=!0}if(c){var _e=t.select("path.surface");a.styleOne(_e,v,b,n,{hovered:!0})}s._hasHoverEvent=!0,n.emit("plotly_hover",{points:P||[GE(v,b,a.eventDataKeys)],event:DA.event})}},h=function(v){var _=n._fullLayout,b=n._fullData[s.index],p=DA.select(this).datum();if(s._hasHoverEvent&&(v.originalEvent=DA.event,n.emit("plotly_unhover",{points:[GE(p,b,a.eventDataKeys)],event:DA.event}),s._hasHoverEvent=!1),s._hasHoverLabel&&(VE.loneUnhover(_._hoverlayer.node()),s._hasHoverLabel=!1),c){var k=t.select("path.surface");a.styleOne(k,p,b,n,{hovered:!1})}},d=function(v){var _=n._fullLayout,b=n._fullData[s.index],p=u&&(rd.isHierarchyRoot(v)||rd.isLeaf(v)),k=rd.getPtId(v),E=rd.isEntry(v)?rd.findEntryWithChild(l,k):rd.findEntryWithLevel(l,k),S=rd.getPtId(E),L={points:[GE(v,b,a.eventDataKeys)],event:DA.event};p||(L.nextLevel=S);var x=K6t.triggerHandler(n,"plotly_"+s.type+"click",L);if(x!==!1&&_.hovermode&&(n._hoverdata=[GE(v,b,a.eventDataKeys)],VE.click(n,DA.event)),!p&&x!==!1&&!n._dragging&&!n._transitioning){g6e.call("_storeDirectGUIEdit",b,_._tracePreGUI[b.uid],{level:b.level});var C={data:[{level:S}],traces:[s.index]},M={frame:{redraw:!1,duration:a.transitionTime},transition:{duration:a.transitionTime,easing:a.transitionEasing},mode:"immediate",fromcurrent:!0};VE.loneUnhover(_._hoverlayer.node()),g6e.call("animate",n,C,M)}};t.on("mouseover",f),t.on("mouseout",h),t.on("click",d)};function GE(e,t,r){for(var n=e.data.data,i={curveNumber:t.index,pointNumber:n.i,data:t._input,fullData:t},a=0;a<r.length;a++){var o=r[a];o in e&&(i[o]=e[o])}return"parentString"in e&&!rd.isHierarchyRoot(e)&&(i.parent=e.parentString),Y6t(i,t,n.i),i}});var VD=ye(UD=>{"use strict";var jE=Oa(),Q6t=PE(),Yg=(F2(),ob(D2)).interpolate,_6e=So(),Av=Dr(),eLt=ru(),T6e=bv(),x6e=T6e.recordMinTextSize,tLt=T6e.clearMinTextSize,A6e=_D(),rLt=l_().getRotationAngle,iLt=A6e.computeTransform,nLt=A6e.transformInsideText,aLt=VW().styleOne,oLt=N0().resizeText,sLt=HE(),GW=yW(),Rl=Ky();UD.plot=function(e,t,r,n){var i=e._fullLayout,a=i._sunburstlayer,o,s,l=!r,u=!i.uniformtext.mode&&Rl.hasTransition(r);if(tLt("sunburst",i),o=a.selectAll("g.trace.sunburst").data(t,function(f){return f[0].trace.uid}),o.enter().append("g").classed("trace",!0).classed("sunburst",!0).attr("stroke-linejoin","round"),o.order(),u){n&&(s=n());var c=jE.transition().duration(r.duration).ease(r.easing).each("end",function(){s&&s()}).each("interrupt",function(){s&&s()});c.each(function(){a.selectAll("g.trace").each(function(f){b6e(e,f,this,r)})})}else o.each(function(f){b6e(e,f,this,r)}),i.uniformtext.mode&&oLt(e,i._sunburstlayer.selectAll(".trace"),"sunburst");l&&o.exit().remove()};function b6e(e,t,r,n){var i=e._context.staticPlot,a=e._fullLayout,o=!a.uniformtext.mode&&Rl.hasTransition(n),s=jE.select(r),l=s.selectAll("g.slice"),u=t[0],c=u.trace,f=u.hierarchy,h=Rl.findEntryWithLevel(f,c.level),d=Rl.getMaxDepth(c),v=a._size,_=c.domain,b=v.w*(_.x[1]-_.x[0]),p=v.h*(_.y[1]-_.y[0]),k=.5*Math.min(b,p),E=u.cx=v.l+v.w*(_.x[1]+_.x[0])/2,S=u.cy=v.t+v.h*(1-_.y[0])-p/2;if(!h)return l.remove();var L=null,x={};o&&l.each(function(me){x[Rl.getPtId(me)]={rpx0:me.rpx0,rpx1:me.rpx1,x0:me.x0,x1:me.x1,transform:me.transform},!L&&Rl.isEntry(me)&&(L=me)});var C=lLt(h).descendants(),M=h.height+1,g=0,P=d;u.hasMultipleRoots&&Rl.isHierarchyRoot(h)&&(C=C.slice(1),M-=1,g=1,P+=1),C=C.filter(function(me){return me.y1<=P});var T=rLt(c.rotation);T&&C.forEach(function(me){me.x0+=T,me.x1+=T});var z=Math.min(M,d),O=function(me){return(me-g)/z*k},V=function(me,ie){return[me*Math.cos(ie),-me*Math.sin(ie)]},G=function(me){return Av.pathAnnulus(me.rpx0,me.rpx1,me.x0,me.x1,E,S)},Z=function(me){return E+w6e(me)[0]*(me.transform.rCenter||0)+(me.transform.x||0)},j=function(me){return S+w6e(me)[1]*(me.transform.rCenter||0)+(me.transform.y||0)};l=l.data(C,Rl.getPtId),l.enter().append("g").classed("slice",!0),o?l.exit().transition().each(function(){var me=jE.select(this),ie=me.select("path.surface");ie.transition().attrTween("d",function(Le){var Ae=oe(Le);return function(Fe){return G(Ae(Fe))}});var Se=me.select("g.slicetext");Se.attr("opacity",0)}).remove():l.exit().remove(),l.order();var N=null;if(o&&L){var H=Rl.getPtId(L);l.each(function(me){N===null&&Rl.getPtId(me)===H&&(N=me.x1)})}var te=l;o&&(te=te.transition().each("end",function(){var me=jE.select(this);Rl.setSliceCursor(me,e,{hideOnRoot:!0,hideOnLeaves:!0,isTransitioning:!1})})),te.each(function(me){var ie=jE.select(this),Se=Av.ensureSingle(ie,"path","surface",function(Re){Re.style("pointer-events",i?"none":"all")});me.rpx0=O(me.y0),me.rpx1=O(me.y1),me.xmid=(me.x0+me.x1)/2,me.pxmid=V(me.rpx1,me.xmid),me.midangle=-(me.xmid-Math.PI/2),me.startangle=-(me.x0-Math.PI/2),me.stopangle=-(me.x1-Math.PI/2),me.halfangle=.5*Math.min(Av.angleDelta(me.x0,me.x1)||Math.PI,Math.PI),me.ring=1-me.rpx0/me.rpx1,me.rInscribed=uLt(me,c),o?Se.transition().attrTween("d",function(Re){var ce=_e(Re);return function(Ze){return G(ce(Ze))}}):Se.attr("d",G),ie.call(sLt,h,e,t,{eventDataKeys:GW.eventDataKeys,transitionTime:GW.CLICK_TRANSITION_TIME,transitionEasing:GW.CLICK_TRANSITION_EASING}).call(Rl.setSliceCursor,e,{hideOnRoot:!0,hideOnLeaves:!0,isTransitioning:e._transitioning}),Se.call(aLt,me,c,e);var Le=Av.ensureSingle(ie,"g","slicetext"),Ae=Av.ensureSingle(Le,"text","",function(Re){Re.attr("data-notex",1)}),Fe=Av.ensureUniformFontSize(e,Rl.determineTextFont(c,me,a.font));Ae.text(UD.formatSliceLabel(me,h,c,t,a)).classed("slicetext",!0).attr("text-anchor","middle").call(_6e.font,Fe).call(eLt.convertToTspans,e);var Pe=_6e.bBox(Ae.node());me.transform=nLt(Pe,me,u),me.transform.targetX=Z(me),me.transform.targetY=j(me);var ge=function(Re,ce){var Ze=Re.transform;return iLt(Ze,ce),Ze.fontSize=Fe.size,x6e(c.type,Ze,a),Av.getTextTransform(Ze)};o?Ae.transition().attrTween("transform",function(Re){var ce=Ee(Re);return function(Ze){return ge(ce(Ze),Pe)}}):Ae.attr("transform",ge(me,Pe))});function oe(me){var ie=Rl.getPtId(me),Se=x[ie],Le=x[Rl.getPtId(h)],Ae;if(Le){var Fe=(me.x1>Le.x1?2*Math.PI:0)+T;Ae=me.rpx1<Le.rpx1?{x0:me.x0,x1:me.x1,rpx0:0,rpx1:0}:{x0:Fe,x1:Fe,rpx0:me.rpx0,rpx1:me.rpx1}}else{var Pe,ge=Rl.getPtId(me.parent);l.each(function(pt){if(Rl.getPtId(pt)===ge)return Pe=pt});var Re=Pe.children,ce;Re.forEach(function(pt,Zt){if(Rl.getPtId(pt)===ie)return ce=Zt});var Ze=Re.length,ut=Yg(Pe.x0,Pe.x1);Ae={rpx0:k,rpx1:k,x0:ut(ce/Ze),x1:ut((ce+1)/Ze)}}return Yg(Se,Ae)}function _e(me){var ie=x[Rl.getPtId(me)],Se,Le={x0:me.x0,x1:me.x1,rpx0:me.rpx0,rpx1:me.rpx1};if(ie)Se=ie;else if(L)if(me.parent)if(N){var Ae=(me.x1>N?2*Math.PI:0)+T;Se={x0:Ae,x1:Ae}}else Se={rpx0:k,rpx1:k},Av.extendFlat(Se,Ce(me));else Se={rpx0:0,rpx1:0};else Se={x0:T,x1:T};return Yg(Se,Le)}function Ee(me){var ie=x[Rl.getPtId(me)],Se,Le=me.transform;if(ie)Se=ie;else if(Se={rpx1:me.rpx1,transform:{textPosAngle:Le.textPosAngle,scale:0,rotate:Le.rotate,rCenter:Le.rCenter,x:Le.x,y:Le.y}},L)if(me.parent)if(N){var Ae=me.x1>N?2*Math.PI:0;Se.x0=Se.x1=Ae}else Av.extendFlat(Se,Ce(me));else Se.x0=Se.x1=T;else Se.x0=Se.x1=T;var Fe=Yg(Se.transform.textPosAngle,me.transform.textPosAngle),Pe=Yg(Se.rpx1,me.rpx1),ge=Yg(Se.x0,me.x0),Re=Yg(Se.x1,me.x1),ce=Yg(Se.transform.scale,Le.scale),Ze=Yg(Se.transform.rotate,Le.rotate),ut=Le.rCenter===0?3:Se.transform.rCenter===0?1/3:1,pt=Yg(Se.transform.rCenter,Le.rCenter),Zt=function(st){return pt(Math.pow(st,ut))};return function(st){var lt=Pe(st),Gt=ge(st),Nt=Re(st),Jt=Zt(st),sr=V(lt,(Gt+Nt)/2),wr=Fe(st),cr={pxmid:sr,rpx1:lt,transform:{textPosAngle:wr,rCenter:Jt,x:Le.x,y:Le.y}};return x6e(c.type,Le,a),{transform:{targetX:Z(cr),targetY:j(cr),scale:ce(st),rotate:Ze(st),rCenter:Jt}}}}function Ce(me){var ie=me.parent,Se=x[Rl.getPtId(ie)],Le={};if(Se){var Ae=ie.children,Fe=Ae.indexOf(me),Pe=Ae.length,ge=Yg(Se.x0,Se.x1);Le.x0=ge(Fe/Pe),Le.x1=ge(Fe/Pe)}else Le.x0=Le.x1=0;return Le}}function lLt(e){return Q6t.partition().size([2*Math.PI,e.height+1])(e)}UD.formatSliceLabel=function(e,t,r,n,i){var a=r.texttemplate,o=r.textinfo;if(!a&&(!o||o==="none"))return"";var s=i.separators,l=n[0],u=e.data.data,c=l.hierarchy,f=Rl.isHierarchyRoot(e),h=Rl.getParent(c,e),d=Rl.getValue(e);if(!a){var v=o.split("+"),_=function(g){return v.indexOf(g)!==-1},b=[],p;if(_("label")&&u.label&&b.push(u.label),u.hasOwnProperty("v")&&_("value")&&b.push(Rl.formatValue(u.v,s)),!f){_("current path")&&b.push(Rl.getPath(e.data));var k=0;_("percent parent")&&k++,_("percent entry")&&k++,_("percent root")&&k++;var E=k>1;if(k){var S,L=function(g){p=Rl.formatPercent(S,s),E&&(p+=" of "+g),b.push(p)};_("percent parent")&&!f&&(S=d/Rl.getValue(h),L("parent")),_("percent entry")&&(S=d/Rl.getValue(t),L("entry")),_("percent root")&&(S=d/Rl.getValue(c),L("root"))}}return _("text")&&(p=Av.castOption(r,u.i,"text"),Av.isValidTextValue(p)&&b.push(p)),b.join("<br>")}var x=Av.castOption(r,u.i,"texttemplate");if(!x)return"";var C={};u.label&&(C.label=u.label),u.hasOwnProperty("v")&&(C.value=u.v,C.valueLabel=Rl.formatValue(u.v,s)),C.currentPath=Rl.getPath(e.data),f||(C.percentParent=d/Rl.getValue(h),C.percentParentLabel=Rl.formatPercent(C.percentParent,s),C.parent=Rl.getPtLabel(h)),C.percentEntry=d/Rl.getValue(t),C.percentEntryLabel=Rl.formatPercent(C.percentEntry,s),C.entry=Rl.getPtLabel(t),C.percentRoot=d/Rl.getValue(c),C.percentRootLabel=Rl.formatPercent(C.percentRoot,s),C.root=Rl.getPtLabel(c),u.hasOwnProperty("color")&&(C.color=u.color);var M=Av.castOption(r,u.i,"text");return(Av.isValidTextValue(M)||M==="")&&(C.text=M),C.customdata=Av.castOption(r,u.i,"customdata"),Av.texttemplateString({data:[C,r._meta],fallback:r.texttemplatefallback,labels:C,locale:i._d3locale,template:x})};function uLt(e){return e.rpx0===0&&Av.isFullCircle([e.x0,e.x1])?1:Math.max(0,Math.min(1/(1+1/Math.sin(e.halfangle)),e.ring/2))}function w6e(e){return cLt(e.rpx1,e.transform.textPosAngle)}function cLt(e,t){return[e*Math.sin(t),-e*Math.cos(t)]}});var M6e=ye((e0r,S6e)=>{"use strict";S6e.exports={moduleType:"trace",name:"sunburst",basePlotModule:Bke(),categories:[],animatable:!0,attributes:LE(),layoutAttributes:_W(),supplyDefaults:Zke(),supplyLayoutDefaults:Kke(),calc:RE().calc,crossTraceCalc:RE().crossTraceCalc,plot:VD().plot,style:VW().style,colorbar:$d(),meta:{}}});var k6e=ye((t0r,E6e)=>{"use strict";E6e.exports=M6e()});var L6e=ye(FA=>{"use strict";var C6e=Mc();FA.name="treemap";FA.plot=function(e,t,r,n){C6e.plotBasePlot(FA.name,e,t,r,n)};FA.clean=function(e,t,r,n){C6e.cleanBasePlot(FA.name,e,t,r,n)}});var O2=ye((i0r,P6e)=>{"use strict";P6e.exports={CLICK_TRANSITION_TIME:750,CLICK_TRANSITION_EASING:"poly",eventDataKeys:["currentPath","root","entry","percentRoot","percentEntry","percentParent"],gapWithPathbar:1}});var GD=ye((n0r,D6e)=>{"use strict";var{hovertemplateAttrs:fLt,texttemplateAttrs:hLt,templatefallbackAttrs:I6e}=Ll(),dLt=Tu(),vLt=Cc().attributes,q2=M2(),Q0=LE(),R6e=O2(),HW=Ao().extendFlat,pLt=Pd().pattern;D6e.exports={labels:Q0.labels,parents:Q0.parents,values:Q0.values,branchvalues:Q0.branchvalues,count:Q0.count,level:Q0.level,maxdepth:Q0.maxdepth,tiling:{packing:{valType:"enumerated",values:["squarify","binary","dice","slice","slice-dice","dice-slice"],dflt:"squarify",editType:"plot"},squarifyratio:{valType:"number",min:1,dflt:1,editType:"plot"},flip:{valType:"flaglist",flags:["x","y"],dflt:"",editType:"plot"},pad:{valType:"number",min:0,dflt:3,editType:"plot"},editType:"calc"},marker:HW({pad:{t:{valType:"number",min:0,editType:"plot"},l:{valType:"number",min:0,editType:"plot"},r:{valType:"number",min:0,editType:"plot"},b:{valType:"number",min:0,editType:"plot"},editType:"calc"},colors:Q0.marker.colors,pattern:pLt,depthfade:{valType:"enumerated",values:[!0,!1,"reversed"],editType:"style"},line:Q0.marker.line,cornerradius:{valType:"number",min:0,dflt:0,editType:"plot"},editType:"calc"},dLt("marker",{colorAttr:"colors",anim:!1})),pathbar:{visible:{valType:"boolean",dflt:!0,editType:"plot"},side:{valType:"enumerated",values:["top","bottom"],dflt:"top",editType:"plot"},edgeshape:{valType:"enumerated",values:[">","<","|","/","\\"],dflt:">",editType:"plot"},thickness:{valType:"number",min:12,editType:"plot"},textfont:HW({},q2.textfont,{}),editType:"calc"},text:q2.text,textinfo:Q0.textinfo,texttemplate:hLt({editType:"plot"},{keys:R6e.eventDataKeys.concat(["label","value"])}),texttemplatefallback:I6e({editType:"plot"}),hovertext:q2.hovertext,hoverinfo:Q0.hoverinfo,hovertemplate:fLt({},{keys:R6e.eventDataKeys}),hovertemplatefallback:I6e(),textfont:q2.textfont,insidetextfont:q2.insidetextfont,outsidetextfont:HW({},q2.outsidetextfont,{}),textposition:{valType:"enumerated",values:["top left","top center","top right","middle left","middle center","middle right","bottom left","bottom center","bottom right"],dflt:"top left",editType:"plot"},sort:q2.sort,root:Q0.root,domain:vLt({name:"treemap",trace:!0,editType:"calc"})}});var jW=ye((a0r,F6e)=>{"use strict";F6e.exports={treemapcolorway:{valType:"colorlist",editType:"calc"},extendtreemapcolors:{valType:"boolean",dflt:!0,editType:"calc"}}});var B6e=ye((o0r,q6e)=>{"use strict";var z6e=Dr(),gLt=GD(),mLt=ka(),yLt=Cc().defaults,_Lt=r0().handleText,xLt=e2().TEXTPAD,bLt=E2().handleMarkerDefaults,O6e=tc(),wLt=O6e.hasColorscale,TLt=O6e.handleDefaults;q6e.exports=function(t,r,n,i){function a(b,p){return z6e.coerce(t,r,gLt,b,p)}var o=a("labels"),s=a("parents");if(!o||!o.length||!s||!s.length){r.visible=!1;return}var l=a("values");l&&l.length?a("branchvalues"):a("count"),a("level"),a("maxdepth");var u=a("tiling.packing");u==="squarify"&&a("tiling.squarifyratio"),a("tiling.flip"),a("tiling.pad");var c=a("text");a("texttemplate"),a("texttemplatefallback"),r.texttemplate||a("textinfo",z6e.isArrayOrTypedArray(c)?"text+label":"label"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var f=a("pathbar.visible"),h="auto";_Lt(t,r,i,a,h,{hasPathbar:f,moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!1,moduleHasCliponaxis:!1,moduleHasTextangle:!1,moduleHasInsideanchor:!1}),a("textposition");var d=r.textposition.indexOf("bottom")!==-1;bLt(t,r,i,a);var v=r._hasColorscale=wLt(t,"marker","colors")||(t.marker||{}).coloraxis;v?TLt(t,r,i,a,{prefix:"marker.",cLetter:"c"}):a("marker.depthfade",!(r.marker.colors||[]).length);var _=r.textfont.size*2;a("marker.pad.t",d?_/4:_),a("marker.pad.l",_/4),a("marker.pad.r",_/4),a("marker.pad.b",d?_:_/4),a("marker.cornerradius"),r._hovered={marker:{line:{width:2,color:mLt.contrast(i.paper_bgcolor)}}},f&&(a("pathbar.thickness",r.pathbar.textfont.size+2*xLt),a("pathbar.side"),a("pathbar.edgeshape")),a("sort"),a("root.color"),yLt(r,i,a),r._length=null}});var U6e=ye((s0r,N6e)=>{"use strict";var ALt=Dr(),SLt=jW();N6e.exports=function(t,r){function n(i,a){return ALt.coerce(t,r,SLt,i,a)}n("treemapcolorway",r.colorway),n("extendtreemapcolors")}});var XW=ye(WW=>{"use strict";var V6e=RE();WW.calc=function(e,t){return V6e.calc(e,t)};WW.crossTraceCalc=function(e){return V6e._runCrossTraceCalc("treemap",e)}});var ZW=ye((u0r,G6e)=>{"use strict";G6e.exports=function e(t,r,n){var i;n.swapXY&&(i=t.x0,t.x0=t.y0,t.y0=i,i=t.x1,t.x1=t.y1,t.y1=i),n.flipX&&(i=t.x0,t.x0=r[0]-t.x1,t.x1=r[0]-i),n.flipY&&(i=t.y0,t.y0=r[1]-t.y1,t.y1=r[1]-i);var a=t.children;if(a)for(var o=0;o<a.length;o++)e(a[o],r,n)}});var YW=ye((c0r,H6e)=>{"use strict";var zA=PE(),MLt=ZW();H6e.exports=function(t,r,n){var i=n.flipX,a=n.flipY,o=n.packing==="dice-slice",s=n.pad[a?"bottom":"top"],l=n.pad[i?"right":"left"],u=n.pad[i?"left":"right"],c=n.pad[a?"top":"bottom"],f;o&&(f=l,l=s,s=f,f=u,u=c,c=f);var h=zA.treemap().tile(ELt(n.packing,n.squarifyratio)).paddingInner(n.pad.inner).paddingLeft(l).paddingRight(u).paddingTop(s).paddingBottom(c).size(o?[r[1],r[0]]:r)(t);return(o||i||a)&&MLt(h,r,{swapXY:o,flipX:i,flipY:a}),h};function ELt(e,t){switch(e){case"squarify":return zA.treemapSquarify.ratio(t);case"binary":return zA.treemapBinary;case"dice":return zA.treemapDice;case"slice":return zA.treemapSlice;default:return zA.treemapSliceDice}}});var HD=ye((f0r,Z6e)=>{"use strict";var j6e=Oa(),OA=ka(),W6e=Dr(),KW=Ky(),kLt=bv().resizeText,CLt=ND();function LLt(e){var t=e._fullLayout._treemaplayer.selectAll(".trace");kLt(e,t,"treemap"),t.each(function(r){var n=j6e.select(this),i=r[0],a=i.trace;n.style("opacity",a.opacity),n.selectAll("path.surface").each(function(o){j6e.select(this).call(X6e,o,a,e,{hovered:!1})})})}function X6e(e,t,r,n,i){var a=(i||{}).hovered,o=t.data.data,s=o.i,l,u,c=o.color,f=KW.isHierarchyRoot(t),h=1;if(a)l=r._hovered.marker.line.color,u=r._hovered.marker.line.width;else if(f&&c===r.root.color)h=100,l="rgba(0,0,0,0)",u=0;else if(l=W6e.castOption(r,s,"marker.line.color")||OA.defaultLine,u=W6e.castOption(r,s,"marker.line.width")||0,!r._hasColorscale&&!t.onPathbar){var d=r.marker.depthfade;if(d){var v=OA.combine(OA.addOpacity(r._backgroundColor,.75),c),_;if(d===!0){var b=KW.getMaxDepth(r);isFinite(b)?KW.isLeaf(t)?_=0:_=r._maxVisibleLayers-(t.data.depth-r._entryDepth):_=t.data.height+1}else _=t.data.depth-r._entryDepth,r._atRootLevel||_++;if(_>0)for(var p=0;p<_;p++){var k=.5*p/_;c=OA.combine(OA.addOpacity(v,k),c)}}}e.call(CLt,t,r,n,c).style("stroke-width",u).call(OA.stroke,l).style("opacity",h)}Z6e.exports={style:LLt,styleOne:X6e}});var Q6e=ye((h0r,$6e)=>{"use strict";var Y6e=Oa(),jD=Dr(),K6e=So(),PLt=ru(),ILt=YW(),J6e=HD().styleOne,JW=O2(),qA=Ky(),RLt=HE(),$W=!0;$6e.exports=function(t,r,n,i,a){var o=a.barDifY,s=a.width,l=a.height,u=a.viewX,c=a.viewY,f=a.pathSlice,h=a.toMoveInsideSlice,d=a.strTransform,v=a.hasTransition,_=a.handleSlicesExit,b=a.makeUpdateSliceInterpolator,p=a.makeUpdateTextInterpolator,k={},E=t._context.staticPlot,S=t._fullLayout,L=r[0],x=L.trace,C=L.hierarchy,M=s/x._entryDepth,g=qA.listPath(n.data,"id"),P=ILt(C.copy(),[s,l],{packing:"dice",pad:{inner:0,top:0,left:0,right:0,bottom:0}}).descendants();P=P.filter(function(z){var O=g.indexOf(z.data.id);return O===-1?!1:(z.x0=M*O,z.x1=M*(O+1),z.y0=o,z.y1=o+l,z.onPathbar=!0,!0)}),P.reverse(),i=i.data(P,qA.getPtId),i.enter().append("g").classed("pathbar",!0),_(i,$W,k,[s,l],f),i.order();var T=i;v&&(T=T.transition().each("end",function(){var z=Y6e.select(this);qA.setSliceCursor(z,t,{hideOnRoot:!1,hideOnLeaves:!1,isTransitioning:!1})})),T.each(function(z){z._x0=u(z.x0),z._x1=u(z.x1),z._y0=c(z.y0),z._y1=c(z.y1),z._hoverX=u(z.x1-Math.min(s,l)/2),z._hoverY=c(z.y1-l/2);var O=Y6e.select(this),V=jD.ensureSingle(O,"path","surface",function(N){N.style("pointer-events",E?"none":"all")});v?V.transition().attrTween("d",function(N){var H=b(N,$W,k,[s,l]);return function(te){return f(H(te))}}):V.attr("d",f),O.call(RLt,n,t,r,{styleOne:J6e,eventDataKeys:JW.eventDataKeys,transitionTime:JW.CLICK_TRANSITION_TIME,transitionEasing:JW.CLICK_TRANSITION_EASING}).call(qA.setSliceCursor,t,{hideOnRoot:!1,hideOnLeaves:!1,isTransitioning:t._transitioning}),V.call(J6e,z,x,t,{hovered:!1}),z._text=(qA.getPtLabel(z)||"").split("<br>").join(" ")||"";var G=jD.ensureSingle(O,"g","slicetext"),Z=jD.ensureSingle(G,"text","",function(N){N.attr("data-notex",1)}),j=jD.ensureUniformFontSize(t,qA.determineTextFont(x,z,S.font,{onPathbar:!0}));Z.text(z._text||" ").classed("slicetext",!0).attr("text-anchor","start").call(K6e.font,j).call(PLt.convertToTspans,t),z.textBB=K6e.bBox(Z.node()),z.transform=h(z,{fontSize:j.size,onPathbar:!0}),z.transform.fontSize=j.size,v?Z.transition().attrTween("transform",function(N){var H=p(N,$W,k,[s,l]);return function(te){return d(H(te))}}):Z.attr("transform",d(z))})}});var iLe=ye((d0r,rLe)=>{"use strict";var eLe=Oa(),QW=(F2(),ob(D2)).interpolate,Z_=Ky(),WE=Dr(),tLe=e2().TEXTPAD,DLt=n2(),FLt=DLt.toMoveInsideBar,zLt=bv(),eX=zLt.recordMinTextSize,OLt=O2(),qLt=Q6e();function B2(e){return Z_.isHierarchyRoot(e)?"":Z_.getPtId(e)}rLe.exports=function(t,r,n,i,a){var o=t._fullLayout,s=r[0],l=s.trace,u=l.type,c=u==="icicle",f=s.hierarchy,h=Z_.findEntryWithLevel(f,l.level),d=eLe.select(n),v=d.selectAll("g.pathbar"),_=d.selectAll("g.slice");if(!h){v.remove(),_.remove();return}var b=Z_.isHierarchyRoot(h),p=!o.uniformtext.mode&&Z_.hasTransition(i),k=Z_.getMaxDepth(l),E=function($e){return $e.data.depth-h.data.depth<k},S=o._size,L=l.domain,x=S.w*(L.x[1]-L.x[0]),C=S.h*(L.y[1]-L.y[0]),M=x,g=l.pathbar.thickness,P=l.marker.line.width+OLt.gapWithPathbar,T=l.pathbar.visible?l.pathbar.side.indexOf("bottom")>-1?C+P:-(g+P):0,z={x0:M,x1:M,y0:T,y1:T+g},O=function($e,St,Qt){var Vt=l.tiling.pad,_t=function(lr){return lr-Vt<=St.x0},It=function(lr){return lr+Vt>=St.x1},mt=function(lr){return lr-Vt<=St.y0},er=function(lr){return lr+Vt>=St.y1};return $e.x0===St.x0&&$e.x1===St.x1&&$e.y0===St.y0&&$e.y1===St.y1?{x0:$e.x0,x1:$e.x1,y0:$e.y0,y1:$e.y1}:{x0:_t($e.x0-Vt)?0:It($e.x0-Vt)?Qt[0]:$e.x0,x1:_t($e.x1+Vt)?0:It($e.x1+Vt)?Qt[0]:$e.x1,y0:mt($e.y0-Vt)?0:er($e.y0-Vt)?Qt[1]:$e.y0,y1:mt($e.y1+Vt)?0:er($e.y1+Vt)?Qt[1]:$e.y1}},V=null,G={},Z={},j=null,N=function($e,St){return St?G[B2($e)]:Z[B2($e)]},H=function($e,St,Qt,Vt){if(St)return G[B2(f)]||z;var _t=Z[l.level]||Qt;return E($e)?O($e,_t,Vt):{}};s.hasMultipleRoots&&b&&k++,l._maxDepth=k,l._backgroundColor=o.paper_bgcolor,l._entryDepth=h.data.depth,l._atRootLevel=b;var te=-x/2+S.l+S.w*(L.x[1]+L.x[0])/2,oe=-C/2+S.t+S.h*(1-(L.y[1]+L.y[0])/2),_e=function($e){return te+$e},Ee=function($e){return oe+$e},Ce=Ee(0),me=_e(0),ie=function($e){return me+$e},Se=function($e){return Ce+$e};function Le($e,St){return $e+","+St}var Ae=ie(0),Fe=function($e){$e.x=Math.max(Ae,$e.x)},Pe=l.pathbar.edgeshape,ge=function($e){var St=ie(Math.max(Math.min($e.x0,$e.x0),0)),Qt=ie(Math.min(Math.max($e.x1,$e.x1),M)),Vt=Se($e.y0),_t=Se($e.y1),It=g/2,mt={},er={};mt.x=St,er.x=Qt,mt.y=er.y=(Vt+_t)/2;var lr={x:St,y:Vt},Tr={x:Qt,y:Vt},Lr={x:Qt,y:_t},ti={x:St,y:_t};return Pe===">"?(lr.x-=It,Tr.x-=It,Lr.x-=It,ti.x-=It):Pe==="/"?(Lr.x-=It,ti.x-=It,mt.x-=It/2,er.x-=It/2):Pe==="\\"?(lr.x-=It,Tr.x-=It,mt.x-=It/2,er.x-=It/2):Pe==="<"&&(mt.x-=It,er.x-=It),Fe(lr),Fe(ti),Fe(mt),Fe(Tr),Fe(Lr),Fe(er),"M"+Le(lr.x,lr.y)+"L"+Le(Tr.x,Tr.y)+"L"+Le(er.x,er.y)+"L"+Le(Lr.x,Lr.y)+"L"+Le(ti.x,ti.y)+"L"+Le(mt.x,mt.y)+"Z"},Re=l[c?"tiling":"marker"].pad,ce=function($e){return l.textposition.indexOf($e)!==-1},Ze=ce("top"),ut=ce("left"),pt=ce("right"),Zt=ce("bottom"),st=function($e){var St=_e($e.x0),Qt=_e($e.x1),Vt=Ee($e.y0),_t=Ee($e.y1),It=Qt-St,mt=_t-Vt;if(!It||!mt)return"";var er=l.marker.cornerradius||0,lr=Math.min(er,It/2,mt/2);lr&&$e.data&&$e.data.data&&$e.data.data.label&&(Ze&&(lr=Math.min(lr,Re.t)),ut&&(lr=Math.min(lr,Re.l)),pt&&(lr=Math.min(lr,Re.r)),Zt&&(lr=Math.min(lr,Re.b)));var Tr=function(Lr,ti){return lr?"a"+Le(lr,lr)+" 0 0 1 "+Le(Lr,ti):""};return"M"+Le(St,Vt+lr)+Tr(lr,-lr)+"L"+Le(Qt-lr,Vt)+Tr(lr,lr)+"L"+Le(Qt,_t-lr)+Tr(-lr,lr)+"L"+Le(St+lr,_t)+Tr(-lr,-lr)+"Z"},lt=function($e,St){var Qt=$e.x0,Vt=$e.x1,_t=$e.y0,It=$e.y1,mt=$e.textBB,er=Ze||St.isHeader&&!Zt,lr=er?"start":Zt?"end":"middle",Tr=ce("right"),Lr=ce("left")||St.onPathbar,ti=Lr?-1:Tr?1:0;if(St.isHeader){if(Qt+=(c?Re:Re.l)-tLe,Vt-=(c?Re:Re.r)-tLe,Qt>=Vt){var Br=(Qt+Vt)/2;Qt=Br,Vt=Br}var Vr;Zt?(Vr=It-(c?Re:Re.b),_t<Vr&&Vr<It&&(_t=Vr)):(Vr=_t+(c?Re:Re.t),_t<Vr&&Vr<It&&(It=Vr))}var dt=FLt(Qt,Vt,_t,It,mt,{isHorizontal:!1,constrained:!0,angle:0,anchor:lr,leftToRight:ti});return dt.fontSize=St.fontSize,dt.targetX=_e(dt.targetX),dt.targetY=Ee(dt.targetY),isNaN(dt.targetX)||isNaN(dt.targetY)?{}:(Qt!==Vt&&_t!==It&&eX(l.type,dt,o),{scale:dt.scale,rotate:dt.rotate,textX:dt.textX,textY:dt.textY,anchorX:dt.anchorX,anchorY:dt.anchorY,targetX:dt.targetX,targetY:dt.targetY})},Gt=function($e,St){for(var Qt,Vt=0,_t=$e;!Qt&&Vt<k;)Vt++,_t=_t.parent,_t?Qt=N(_t,St):Vt=k;return Qt||{}},Nt=function($e,St,Qt,Vt){var _t=N($e,St),It;if(St)It=z;else{var mt=N(h,St);mt?It=O($e,mt,Vt):It={}}return QW(_t,It)},Jt=function($e,St,Qt,Vt,_t){var It=N($e,St),mt;if(It)mt=It;else if(St)mt=z;else if(V)if($e.parent){var er=j||Qt;er&&!St?mt=O($e,er,Vt):(mt={},WE.extendFlat(mt,Gt($e,St)))}else mt=WE.extendFlat({},$e),c&&(_t.orientation==="h"?_t.flipX?mt.x0=$e.x1:mt.x1=0:_t.flipY?mt.y0=$e.y1:mt.y1=0);else mt={};return QW(mt,{x0:$e.x0,x1:$e.x1,y0:$e.y0,y1:$e.y1})},sr=function($e,St,Qt,Vt){var _t=N($e,St),It={},mt=H($e,St,Qt,Vt);WE.extendFlat(It,{transform:lt({x0:mt.x0,x1:mt.x1,y0:mt.y0,y1:mt.y1,textBB:$e.textBB,_text:$e._text},{isHeader:Z_.isHeader($e,l)})}),_t?It=_t:$e.parent&&WE.extendFlat(It,Gt($e,St));var er=$e.transform;return $e.x0!==$e.x1&&$e.y0!==$e.y1&&eX(l.type,er,o),QW(It,{transform:{scale:er.scale,rotate:er.rotate,textX:er.textX,textY:er.textY,anchorX:er.anchorX,anchorY:er.anchorY,targetX:er.targetX,targetY:er.targetY}})},wr=function($e,St,Qt,Vt,_t){var It=Vt[0],mt=Vt[1];p?$e.exit().transition().each(function(){var er=eLe.select(this),lr=er.select("path.surface");lr.transition().attrTween("d",function(Lr){var ti=Nt(Lr,St,Qt,[It,mt]);return function(Br){return _t(ti(Br))}});var Tr=er.select("g.slicetext");Tr.attr("opacity",0)}).remove():$e.exit().remove()},cr=function($e){var St=$e.transform;return $e.x0!==$e.x1&&$e.y0!==$e.y1&&eX(l.type,St,o),WE.getTextTransform({textX:St.textX,textY:St.textY,anchorX:St.anchorX,anchorY:St.anchorY,targetX:St.targetX,targetY:St.targetY,scale:St.scale,rotate:St.rotate})};p&&(v.each(function($e){G[B2($e)]={x0:$e.x0,x1:$e.x1,y0:$e.y0,y1:$e.y1},$e.transform&&(G[B2($e)].transform={textX:$e.transform.textX,textY:$e.transform.textY,anchorX:$e.transform.anchorX,anchorY:$e.transform.anchorY,targetX:$e.transform.targetX,targetY:$e.transform.targetY,scale:$e.transform.scale,rotate:$e.transform.rotate})}),_.each(function($e){Z[B2($e)]={x0:$e.x0,x1:$e.x1,y0:$e.y0,y1:$e.y1},$e.transform&&(Z[B2($e)].transform={textX:$e.transform.textX,textY:$e.transform.textY,anchorX:$e.transform.anchorX,anchorY:$e.transform.anchorY,targetX:$e.transform.targetX,targetY:$e.transform.targetY,scale:$e.transform.scale,rotate:$e.transform.rotate}),!V&&Z_.isEntry($e)&&(V=$e)})),j=a(t,r,h,_,{width:x,height:C,viewX:_e,viewY:Ee,pathSlice:st,toMoveInsideSlice:lt,prevEntry:V,makeUpdateSliceInterpolator:Jt,makeUpdateTextInterpolator:sr,handleSlicesExit:wr,hasTransition:p,strTransform:cr}),l.pathbar.visible?qLt(t,r,h,v,{barDifY:T,width:M,height:g,viewX:ie,viewY:Se,pathSlice:ge,toMoveInsideSlice:lt,makeUpdateSliceInterpolator:Jt,makeUpdateTextInterpolator:sr,handleSlicesExit:wr,hasTransition:p,strTransform:cr}):v.remove()}});var tX=ye((v0r,aLe)=>{"use strict";var BLt=Oa(),NLt=Ky(),ULt=bv(),VLt=ULt.clearMinTextSize,GLt=N0().resizeText,nLe=iLe();aLe.exports=function(t,r,n,i,a){var o=a.type,s=a.drawDescendants,l=t._fullLayout,u=l["_"+o+"layer"],c,f,h=!n;if(VLt(o,l),c=u.selectAll("g.trace."+o).data(r,function(v){return v[0].trace.uid}),c.enter().append("g").classed("trace",!0).classed(o,!0),c.order(),!l.uniformtext.mode&&NLt.hasTransition(n)){i&&(f=i());var d=BLt.transition().duration(n.duration).ease(n.easing).each("end",function(){f&&f()}).each("interrupt",function(){f&&f()});d.each(function(){u.selectAll("g.trace").each(function(v){nLe(t,v,this,n,s)})})}else c.each(function(v){nLe(t,v,this,n,s)}),l.uniformtext.mode&&GLt(t,u.selectAll(".trace"),o);h&&c.exit().remove()}});var cLe=ye((p0r,uLe)=>{"use strict";var oLe=Oa(),WD=Dr(),sLe=So(),HLt=ru(),jLt=YW(),lLe=HD().styleOne,rX=O2(),Y_=Ky(),WLt=HE(),XLt=VD().formatSliceLabel,iX=!1;uLe.exports=function(t,r,n,i,a){var o=a.width,s=a.height,l=a.viewX,u=a.viewY,c=a.pathSlice,f=a.toMoveInsideSlice,h=a.strTransform,d=a.hasTransition,v=a.handleSlicesExit,_=a.makeUpdateSliceInterpolator,b=a.makeUpdateTextInterpolator,p=a.prevEntry,k={},E=t._context.staticPlot,S=t._fullLayout,L=r[0],x=L.trace,C=x.textposition.indexOf("left")!==-1,M=x.textposition.indexOf("right")!==-1,g=x.textposition.indexOf("bottom")!==-1,P=!g&&!x.marker.pad.t||g&&!x.marker.pad.b,T=jLt(n,[o,s],{packing:x.tiling.packing,squarifyratio:x.tiling.squarifyratio,flipX:x.tiling.flip.indexOf("x")>-1,flipY:x.tiling.flip.indexOf("y")>-1,pad:{inner:x.tiling.pad,top:x.marker.pad.t,left:x.marker.pad.l,right:x.marker.pad.r,bottom:x.marker.pad.b}}),z=T.descendants(),O=1/0,V=-1/0;z.forEach(function(H){var te=H.depth;te>=x._maxDepth?(H.x0=H.x1=(H.x0+H.x1)/2,H.y0=H.y1=(H.y0+H.y1)/2):(O=Math.min(O,te),V=Math.max(V,te))}),i=i.data(z,Y_.getPtId),x._maxVisibleLayers=isFinite(V)?V-O+1:0,i.enter().append("g").classed("slice",!0),v(i,iX,k,[o,s],c),i.order();var G=null;if(d&&p){var Z=Y_.getPtId(p);i.each(function(H){G===null&&Y_.getPtId(H)===Z&&(G={x0:H.x0,x1:H.x1,y0:H.y0,y1:H.y1})})}var j=function(){return G||{x0:0,x1:o,y0:0,y1:s}},N=i;return d&&(N=N.transition().each("end",function(){var H=oLe.select(this);Y_.setSliceCursor(H,t,{hideOnRoot:!0,hideOnLeaves:!1,isTransitioning:!1})})),N.each(function(H){var te=Y_.isHeader(H,x);H._x0=l(H.x0),H._x1=l(H.x1),H._y0=u(H.y0),H._y1=u(H.y1),H._hoverX=l(H.x1-x.marker.pad.r),H._hoverY=u(g?H.y1-x.marker.pad.b/2:H.y0+x.marker.pad.t/2);var oe=oLe.select(this),_e=WD.ensureSingle(oe,"path","surface",function(Le){Le.style("pointer-events",E?"none":"all")});d?_e.transition().attrTween("d",function(Le){var Ae=_(Le,iX,j(),[o,s]);return function(Fe){return c(Ae(Fe))}}):_e.attr("d",c),oe.call(WLt,n,t,r,{styleOne:lLe,eventDataKeys:rX.eventDataKeys,transitionTime:rX.CLICK_TRANSITION_TIME,transitionEasing:rX.CLICK_TRANSITION_EASING}).call(Y_.setSliceCursor,t,{isTransitioning:t._transitioning}),_e.call(lLe,H,x,t,{hovered:!1}),H.x0===H.x1||H.y0===H.y1?H._text="":te?H._text=P?"":Y_.getPtLabel(H)||"":H._text=XLt(H,n,x,r,S)||"";var Ee=WD.ensureSingle(oe,"g","slicetext"),Ce=WD.ensureSingle(Ee,"text","",function(Le){Le.attr("data-notex",1)}),me=WD.ensureUniformFontSize(t,Y_.determineTextFont(x,H,S.font)),ie=H._text||" ",Se=te&&ie.indexOf("<br>")===-1;Ce.text(ie).classed("slicetext",!0).attr("text-anchor",M?"end":C||Se?"start":"middle").call(sLe.font,me).call(HLt.convertToTspans,t),H.textBB=sLe.bBox(Ce.node()),H.transform=f(H,{fontSize:me.size,isHeader:te}),H.transform.fontSize=me.size,d?Ce.transition().attrTween("transform",function(Le){var Ae=b(Le,iX,j(),[o,s]);return function(Fe){return h(Ae(Fe))}}):Ce.attr("transform",h(H))}),G}});var hLe=ye((g0r,fLe)=>{"use strict";var ZLt=tX(),YLt=cLe();fLe.exports=function(t,r,n,i){return ZLt(t,r,n,i,{type:"treemap",drawDescendants:YLt})}});var vLe=ye((m0r,dLe)=>{"use strict";dLe.exports={moduleType:"trace",name:"treemap",basePlotModule:L6e(),categories:[],animatable:!0,attributes:GD(),layoutAttributes:jW(),supplyDefaults:B6e(),supplyLayoutDefaults:U6e(),calc:XW().calc,crossTraceCalc:XW().crossTraceCalc,plot:hLe(),style:HD().style,colorbar:$d(),meta:{}}});var gLe=ye((y0r,pLe)=>{"use strict";pLe.exports=vLe()});var yLe=ye(BA=>{"use strict";var mLe=Mc();BA.name="icicle";BA.plot=function(e,t,r,n){mLe.plotBasePlot(BA.name,e,t,r,n)};BA.clean=function(e,t,r,n){mLe.cleanBasePlot(BA.name,e,t,r,n)}});var nX=ye((x0r,bLe)=>{"use strict";var{hovertemplateAttrs:KLt,texttemplateAttrs:JLt,templatefallbackAttrs:_Le}=Ll(),$Lt=Tu(),QLt=Cc().attributes,XE=M2(),o0=LE(),XD=GD(),xLe=O2(),ePt=Ao().extendFlat,tPt=Pd().pattern;bLe.exports={labels:o0.labels,parents:o0.parents,values:o0.values,branchvalues:o0.branchvalues,count:o0.count,level:o0.level,maxdepth:o0.maxdepth,tiling:{orientation:{valType:"enumerated",values:["v","h"],dflt:"h",editType:"plot"},flip:XD.tiling.flip,pad:{valType:"number",min:0,dflt:0,editType:"plot"},editType:"calc"},marker:ePt({colors:o0.marker.colors,line:o0.marker.line,pattern:tPt,editType:"calc"},$Lt("marker",{colorAttr:"colors",anim:!1})),leaf:o0.leaf,pathbar:XD.pathbar,text:XE.text,textinfo:o0.textinfo,texttemplate:JLt({editType:"plot"},{keys:xLe.eventDataKeys.concat(["label","value"])}),texttemplatefallback:_Le({editType:"plot"}),hovertext:XE.hovertext,hoverinfo:o0.hoverinfo,hovertemplate:KLt({},{keys:xLe.eventDataKeys}),hovertemplatefallback:_Le(),textfont:XE.textfont,insidetextfont:XE.insidetextfont,outsidetextfont:XD.outsidetextfont,textposition:XD.textposition,sort:XE.sort,root:o0.root,domain:QLt({name:"icicle",trace:!0,editType:"calc"})}});var aX=ye((b0r,wLe)=>{"use strict";wLe.exports={iciclecolorway:{valType:"colorlist",editType:"calc"},extendiciclecolors:{valType:"boolean",dflt:!0,editType:"calc"}}});var MLe=ye((w0r,SLe)=>{"use strict";var TLe=Dr(),rPt=nX(),iPt=ka(),nPt=Cc().defaults,aPt=r0().handleText,oPt=e2().TEXTPAD,sPt=E2().handleMarkerDefaults,ALe=tc(),lPt=ALe.hasColorscale,uPt=ALe.handleDefaults;SLe.exports=function(t,r,n,i){function a(d,v){return TLe.coerce(t,r,rPt,d,v)}var o=a("labels"),s=a("parents");if(!o||!o.length||!s||!s.length){r.visible=!1;return}var l=a("values");l&&l.length?a("branchvalues"):a("count"),a("level"),a("maxdepth"),a("tiling.orientation"),a("tiling.flip"),a("tiling.pad");var u=a("text");a("texttemplate"),a("texttemplatefallback"),r.texttemplate||a("textinfo",TLe.isArrayOrTypedArray(u)?"text+label":"label"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var c=a("pathbar.visible"),f="auto";aPt(t,r,i,a,f,{hasPathbar:c,moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!1,moduleHasCliponaxis:!1,moduleHasTextangle:!1,moduleHasInsideanchor:!1}),a("textposition"),sPt(t,r,i,a);var h=r._hasColorscale=lPt(t,"marker","colors")||(t.marker||{}).coloraxis;h&&uPt(t,r,i,a,{prefix:"marker.",cLetter:"c"}),a("leaf.opacity",h?1:.7),r._hovered={marker:{line:{width:2,color:iPt.contrast(i.paper_bgcolor)}}},c&&(a("pathbar.thickness",r.pathbar.textfont.size+2*oPt),a("pathbar.side"),a("pathbar.edgeshape")),a("sort"),a("root.color"),nPt(r,i,a),r._length=null}});var kLe=ye((T0r,ELe)=>{"use strict";var cPt=Dr(),fPt=aX();ELe.exports=function(t,r){function n(i,a){return cPt.coerce(t,r,fPt,i,a)}n("iciclecolorway",r.colorway),n("extendiciclecolors")}});var sX=ye(oX=>{"use strict";var CLe=RE();oX.calc=function(e,t){return CLe.calc(e,t)};oX.crossTraceCalc=function(e){return CLe._runCrossTraceCalc("icicle",e)}});var PLe=ye((S0r,LLe)=>{"use strict";var hPt=PE(),dPt=ZW();LLe.exports=function(t,r,n){var i=n.flipX,a=n.flipY,o=n.orientation==="h",s=n.maxDepth,l=r[0],u=r[1];s&&(l=(t.height+1)*r[0]/Math.min(t.height+1,s),u=(t.height+1)*r[1]/Math.min(t.height+1,s));var c=hPt.partition().padding(n.pad.inner).size(o?[r[1],l]:[r[0],u])(t);return(o||i||a)&&dPt(c,r,{swapXY:o,flipX:i,flipY:a}),c}});var lX=ye((M0r,zLe)=>{"use strict";var ILe=Oa(),RLe=ka(),DLe=Dr(),vPt=bv().resizeText,pPt=ND();function gPt(e){var t=e._fullLayout._iciclelayer.selectAll(".trace");vPt(e,t,"icicle"),t.each(function(r){var n=ILe.select(this),i=r[0],a=i.trace;n.style("opacity",a.opacity),n.selectAll("path.surface").each(function(o){ILe.select(this).call(FLe,o,a,e)})})}function FLe(e,t,r,n){var i=t.data.data,a=!t.children,o=i.i,s=DLe.castOption(r,o,"marker.line.color")||RLe.defaultLine,l=DLe.castOption(r,o,"marker.line.width")||0;e.call(pPt,t,r,n).style("stroke-width",l).call(RLe.stroke,s).style("opacity",a?r.leaf.opacity:null)}zLe.exports={style:gPt,styleOne:FLe}});var ULe=ye((E0r,NLe)=>{"use strict";var OLe=Oa(),ZD=Dr(),qLe=So(),mPt=ru(),yPt=PLe(),BLe=lX().styleOne,uX=O2(),NA=Ky(),_Pt=HE(),xPt=VD().formatSliceLabel,cX=!1;NLe.exports=function(t,r,n,i,a){var o=a.width,s=a.height,l=a.viewX,u=a.viewY,c=a.pathSlice,f=a.toMoveInsideSlice,h=a.strTransform,d=a.hasTransition,v=a.handleSlicesExit,_=a.makeUpdateSliceInterpolator,b=a.makeUpdateTextInterpolator,p=a.prevEntry,k={},E=t._context.staticPlot,S=t._fullLayout,L=r[0],x=L.trace,C=x.textposition.indexOf("left")!==-1,M=x.textposition.indexOf("right")!==-1,g=x.textposition.indexOf("bottom")!==-1,P=yPt(n,[o,s],{flipX:x.tiling.flip.indexOf("x")>-1,flipY:x.tiling.flip.indexOf("y")>-1,orientation:x.tiling.orientation,pad:{inner:x.tiling.pad},maxDepth:x._maxDepth}),T=P.descendants(),z=1/0,O=-1/0;T.forEach(function(N){var H=N.depth;H>=x._maxDepth?(N.x0=N.x1=(N.x0+N.x1)/2,N.y0=N.y1=(N.y0+N.y1)/2):(z=Math.min(z,H),O=Math.max(O,H))}),i=i.data(T,NA.getPtId),x._maxVisibleLayers=isFinite(O)?O-z+1:0,i.enter().append("g").classed("slice",!0),v(i,cX,k,[o,s],c),i.order();var V=null;if(d&&p){var G=NA.getPtId(p);i.each(function(N){V===null&&NA.getPtId(N)===G&&(V={x0:N.x0,x1:N.x1,y0:N.y0,y1:N.y1})})}var Z=function(){return V||{x0:0,x1:o,y0:0,y1:s}},j=i;return d&&(j=j.transition().each("end",function(){var N=OLe.select(this);NA.setSliceCursor(N,t,{hideOnRoot:!0,hideOnLeaves:!1,isTransitioning:!1})})),j.each(function(N){N._x0=l(N.x0),N._x1=l(N.x1),N._y0=u(N.y0),N._y1=u(N.y1),N._hoverX=l(N.x1-x.tiling.pad),N._hoverY=u(g?N.y1-x.tiling.pad/2:N.y0+x.tiling.pad/2);var H=OLe.select(this),te=ZD.ensureSingle(H,"path","surface",function(Ce){Ce.style("pointer-events",E?"none":"all")});d?te.transition().attrTween("d",function(Ce){var me=_(Ce,cX,Z(),[o,s],{orientation:x.tiling.orientation,flipX:x.tiling.flip.indexOf("x")>-1,flipY:x.tiling.flip.indexOf("y")>-1});return function(ie){return c(me(ie))}}):te.attr("d",c),H.call(_Pt,n,t,r,{styleOne:BLe,eventDataKeys:uX.eventDataKeys,transitionTime:uX.CLICK_TRANSITION_TIME,transitionEasing:uX.CLICK_TRANSITION_EASING}).call(NA.setSliceCursor,t,{isTransitioning:t._transitioning}),te.call(BLe,N,x,t,{hovered:!1}),N.x0===N.x1||N.y0===N.y1?N._text="":N._text=xPt(N,n,x,r,S)||"";var oe=ZD.ensureSingle(H,"g","slicetext"),_e=ZD.ensureSingle(oe,"text","",function(Ce){Ce.attr("data-notex",1)}),Ee=ZD.ensureUniformFontSize(t,NA.determineTextFont(x,N,S.font));_e.text(N._text||" ").classed("slicetext",!0).attr("text-anchor",M?"end":C?"start":"middle").call(qLe.font,Ee).call(mPt.convertToTspans,t),N.textBB=qLe.bBox(_e.node()),N.transform=f(N,{fontSize:Ee.size}),N.transform.fontSize=Ee.size,d?_e.transition().attrTween("transform",function(Ce){var me=b(Ce,cX,Z(),[o,s]);return function(ie){return h(me(ie))}}):_e.attr("transform",h(N))}),V}});var GLe=ye((k0r,VLe)=>{"use strict";var bPt=tX(),wPt=ULe();VLe.exports=function(t,r,n,i){return bPt(t,r,n,i,{type:"icicle",drawDescendants:wPt})}});var jLe=ye((C0r,HLe)=>{"use strict";HLe.exports={moduleType:"trace",name:"icicle",basePlotModule:yLe(),categories:[],animatable:!0,attributes:nX(),layoutAttributes:aX(),supplyDefaults:MLe(),supplyLayoutDefaults:kLe(),calc:sX().calc,crossTraceCalc:sX().crossTraceCalc,plot:GLe(),style:lX().style,colorbar:$d(),meta:{}}});var XLe=ye((L0r,WLe)=>{"use strict";WLe.exports=jLe()});var YLe=ye(UA=>{"use strict";var ZLe=Mc();UA.name="funnelarea";UA.plot=function(e,t,r,n){ZLe.plotBasePlot(UA.name,e,t,r,n)};UA.clean=function(e,t,r,n){ZLe.cleanBasePlot(UA.name,e,t,r,n)}});var fX=ye((I0r,JLe)=>{"use strict";var iv=M2(),TPt=Gl(),APt=Cc().attributes,{hovertemplateAttrs:SPt,texttemplateAttrs:MPt,templatefallbackAttrs:KLe}=Ll(),N2=Ao().extendFlat;JLe.exports={labels:iv.labels,label0:iv.label0,dlabel:iv.dlabel,values:iv.values,marker:{colors:iv.marker.colors,line:{color:N2({},iv.marker.line.color,{dflt:null}),width:N2({},iv.marker.line.width,{dflt:1}),editType:"calc"},pattern:iv.marker.pattern,editType:"calc"},text:iv.text,hovertext:iv.hovertext,scalegroup:N2({},iv.scalegroup,{}),textinfo:N2({},iv.textinfo,{flags:["label","text","value","percent"]}),texttemplate:MPt({editType:"plot"},{keys:["label","color","value","text","percent"]}),texttemplatefallback:KLe({editType:"plot"}),hoverinfo:N2({},TPt.hoverinfo,{flags:["label","text","value","percent","name"]}),hovertemplate:SPt({},{keys:["label","color","value","text","percent"]}),hovertemplatefallback:KLe(),textposition:N2({},iv.textposition,{values:["inside","none"],dflt:"inside"}),textfont:iv.textfont,insidetextfont:iv.insidetextfont,title:{text:iv.title.text,font:iv.title.font,position:N2({},iv.title.position,{values:["top left","top center","top right"],dflt:"top center"}),editType:"plot"},domain:APt({name:"funnelarea",trace:!0,editType:"calc"}),aspectratio:{valType:"number",min:0,dflt:1,editType:"plot"},baseratio:{valType:"number",min:0,max:1,dflt:.333,editType:"plot"}}});var hX=ye((R0r,$Le)=>{"use strict";var EPt=pD().hiddenlabels;$Le.exports={hiddenlabels:EPt,funnelareacolorway:{valType:"colorlist",editType:"calc"},extendfunnelareacolors:{valType:"boolean",dflt:!0,editType:"calc"}}});var tPe=ye((D0r,ePe)=>{"use strict";var QLe=Dr(),kPt=fX(),CPt=Cc().defaults,LPt=r0().handleText,PPt=E2().handleLabelsAndValues,IPt=E2().handleMarkerDefaults;ePe.exports=function(t,r,n,i){function a(_,b){return QLe.coerce(t,r,kPt,_,b)}var o=a("labels"),s=a("values"),l=PPt(o,s),u=l.len;if(r._hasLabels=l.hasLabels,r._hasValues=l.hasValues,!r._hasLabels&&r._hasValues&&(a("label0"),a("dlabel")),!u){r.visible=!1;return}r._length=u,IPt(t,r,i,a),a("scalegroup");var c=a("text"),f=a("texttemplate");a("texttemplatefallback");var h;if(f||(h=a("textinfo",Array.isArray(c)?"text+percent":"percent")),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),f||h&&h!=="none"){var d=a("textposition");LPt(t,r,i,a,d,{moduleHasSelected:!1,moduleHasUnselected:!1,moduleHasConstrain:!1,moduleHasCliponaxis:!1,moduleHasTextangle:!1,moduleHasInsideanchor:!1})}else h==="none"&&a("textposition","none");CPt(r,i,a);var v=a("title.text");v&&(a("title.position"),QLe.coerceFont(a,"title.font",i.font)),a("aspectratio"),a("baseratio")}});var iPe=ye((F0r,rPe)=>{"use strict";var RPt=Dr(),DPt=hX();rPe.exports=function(t,r){function n(i,a){return RPt.coerce(t,r,DPt,i,a)}n("hiddenlabels"),n("funnelareacolorway",r.colorway),n("extendfunnelareacolors")}});var dX=ye((z0r,aPe)=>{"use strict";var nPe=TA();function FPt(e,t){return nPe.calc(e,t)}function zPt(e){nPe.crossTraceCalc(e,{type:"funnelarea"})}aPe.exports={calc:FPt,crossTraceCalc:zPt}});var cPe=ye((O0r,uPe)=>{"use strict";var U2=Oa(),vX=So(),K_=Dr(),OPt=K_.strScale,oPe=K_.strTranslate,sPe=ru(),qPt=n2(),BPt=qPt.toMoveInsideBar,lPe=bv(),NPt=lPe.recordMinTextSize,UPt=lPe.clearMinTextSize,VPt=l_(),VA=_D(),GPt=VA.attachFxHandlers,HPt=VA.determineInsideTextFont,jPt=VA.layoutAreas,WPt=VA.prerenderTitles,XPt=VA.positionTitleOutside,ZPt=VA.formatSliceLabel;uPe.exports=function(t,r){var n=t._context.staticPlot,i=t._fullLayout;UPt("funnelarea",i),WPt(r,t),jPt(r,i._size),K_.makeTraceGroups(i._funnelarealayer,r,"trace").each(function(a){var o=U2.select(this),s=a[0],l=s.trace;KPt(a),o.each(function(){var u=U2.select(this).selectAll("g.slice").data(a);u.enter().append("g").classed("slice",!0),u.exit().remove(),u.each(function(f,h){if(f.hidden){U2.select(this).selectAll("path,g").remove();return}f.pointNumber=f.i,f.curveNumber=l.index;var d=s.cx,v=s.cy,_=U2.select(this),b=_.selectAll("path.surface").data([f]);b.enter().append("path").classed("surface",!0).style({"pointer-events":n?"none":"all"}),_.call(GPt,t,a);var p="M"+(d+f.TR[0])+","+(v+f.TR[1])+pX(f.TR,f.BR)+pX(f.BR,f.BL)+pX(f.BL,f.TL)+"Z";b.attr("d",p),ZPt(t,f,s);var k=VPt.castOption(l.textposition,f.pts),E=_.selectAll("g.slicetext").data(f.text&&k!=="none"?[0]:[]);E.enter().append("g").classed("slicetext",!0),E.exit().remove(),E.each(function(){var S=K_.ensureSingle(U2.select(this),"text","",function(z){z.attr("data-notex",1)}),L=K_.ensureUniformFontSize(t,HPt(l,f,i.font));S.text(f.text).attr({class:"slicetext",transform:"","text-anchor":"middle"}).call(vX.font,L).call(sPe.convertToTspans,t);var x=vX.bBox(S.node()),C,M,g,P=Math.min(f.BL[1],f.BR[1])+v,T=Math.max(f.TL[1],f.TR[1])+v;M=Math.max(f.TL[0],f.BL[0])+d,g=Math.min(f.TR[0],f.BR[0])+d,C=BPt(M,g,P,T,x,{isHorizontal:!0,constrained:!0,angle:0,anchor:"middle"}),C.fontSize=L.size,NPt(l.type,C,i),a[h].transform=C,K_.setTransormAndDisplay(S,C)})});var c=U2.select(this).selectAll("g.titletext").data(l.title.text?[0]:[]);c.enter().append("g").classed("titletext",!0),c.exit().remove(),c.each(function(){var f=K_.ensureSingle(U2.select(this),"text","",function(v){v.attr("data-notex",1)}),h=l.title.text;l._meta&&(h=K_.templateString(h,l._meta)),f.text(h).attr({class:"titletext",transform:"","text-anchor":"middle"}).call(vX.font,l.title.font).call(sPe.convertToTspans,t);var d=XPt(s,i._size);f.attr("transform",oPe(d.x,d.y)+OPt(Math.min(1,d.scale))+oPe(d.tx,d.ty))})})})};function pX(e,t){var r=t[0]-e[0],n=t[1]-e[1];return"l"+r+","+n}function YPt(e,t){return[.5*(e[0]+t[0]),.5*(e[1]+t[1])]}function KPt(e){if(!e.length)return;var t=e[0],r=t.trace,n=r.aspectratio,i=r.baseratio;i>.999&&(i=.999);var a=Math.pow(i,2),o=t.vTotal,s=o*a/(1-a),l=o,u=s/o;function c(){var O=Math.sqrt(u);return{x:O,y:-O}}function f(){var O=c();return[O.x,O.y]}var h,d=[];d.push(f());var v,_;for(v=e.length-1;v>-1;v--)if(_=e[v],!_.hidden){var b=_.v/l;u+=b,d.push(f())}var p=1/0,k=-1/0;for(v=0;v<d.length;v++)h=d[v],p=Math.min(p,h[1]),k=Math.max(k,h[1]);for(v=0;v<d.length;v++)d[v][1]-=(k+p)/2;var E=d[d.length-1][0],S=t.r,L=(k-p)/2,x=S/E,C=S/L*n;for(t.r=C*L,v=0;v<d.length;v++)d[v][0]*=x,d[v][1]*=C;h=d[0];var M=[-h[0],h[1]],g=[h[0],h[1]],P=0;for(v=e.length-1;v>-1;v--)if(_=e[v],!_.hidden){P+=1;var T=d[P][0],z=d[P][1];_.TL=[-T,z],_.TR=[T,z],_.BL=M,_.BR=g,_.pxmid=YPt(_.TR,_.BR),M=_.TL,g=_.TR}}});var dPe=ye((q0r,hPe)=>{"use strict";var fPe=Oa(),JPt=B3(),$Pt=bv().resizeText;hPe.exports=function(t){var r=t._fullLayout._funnelarealayer.selectAll(".trace");$Pt(t,r,"funnelarea"),r.each(function(n){var i=n[0],a=i.trace,o=fPe.select(this);o.style({opacity:a.opacity}),o.selectAll("path.surface").each(function(s){fPe.select(this).call(JPt,s,a,t)})})}});var pPe=ye((B0r,vPe)=>{"use strict";vPe.exports={moduleType:"trace",name:"funnelarea",basePlotModule:YLe(),categories:["pie-like","funnelarea","showLegend"],attributes:fX(),layoutAttributes:hX(),supplyDefaults:tPe(),supplyLayoutDefaults:iPe(),calc:dX().calc,crossTraceCalc:dX().crossTraceCalc,plot:cPe(),style:dPe(),styleOne:B3(),meta:{}}});var mPe=ye((N0r,gPe)=>{"use strict";gPe.exports=pPe()});var Od=ye((U0r,yPe)=>{(function(){var e={24:function(i){var a={left:0,top:0};i.exports=o;function o(l,u,c){u=u||l.currentTarget||l.srcElement,Array.isArray(c)||(c=[0,0]);var f=l.clientX||0,h=l.clientY||0,d=s(u);return c[0]=f-d.left,c[1]=h-d.top,c}function s(l){return l===window||l===document||l===document.body?a:l.getBoundingClientRect()}},109:function(i){i.exports=a;function a(o,s,l,u){var c=l[0],f=l[2],h=s[0]-c,d=s[2]-f,v=Math.sin(u),_=Math.cos(u);return o[0]=c+d*v+h*_,o[1]=s[1],o[2]=f+d*_-h*v,o}},160:function(i){i.exports=a;function a(o,s,l){return o[0]=Math.max(s[0],l[0]),o[1]=Math.max(s[1],l[1]),o[2]=Math.max(s[2],l[2]),o[3]=Math.max(s[3],l[3]),o}},216:function(i){"use strict";i.exports=a;function a(o,s){for(var l={},u=0;u<o.length;++u)for(var c=o[u].name,f=c.split("."),h=l,d=0;d<f.length;++d){var v=f[d].split("[");if(v.length>1){v[0]in h||(h[v[0]]=[]),h=h[v[0]];for(var _=1;_<v.length;++_){var b=parseInt(v[_]);_<v.length-1||d<f.length-1?(b in h||(_<v.length-1?h[b]=[]:h[b]={}),h=h[b]):s?h[b]=u:h[b]=o[u].type}}else d<f.length-1?(v[0]in h||(h[v[0]]={}),h=h[v[0]]):s?h[v[0]]=u:h[v[0]]=o[u].type}return l}},236:function(i,a,o){var s=o(8284);i.exports=l;function l(){var u={};return function(c){if((typeof c!="object"||c===null)&&typeof c!="function")throw new Error("Weakmap-shim: Key must be object");var f=c.valueOf(u);return f&&f.identity===u?f:s(c,u)}}},244:function(i){i.exports=a;function a(o,s){return o[0]*s[0]+o[1]*s[1]+o[2]*s[2]}},264:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=l[0],d=l[1],v=l[2],_=l[3],b=_*u+d*f-v*c,p=_*c+v*u-h*f,k=_*f+h*c-d*u,E=-h*u-d*c-v*f;return o[0]=b*_+E*-h+p*-v-k*-d,o[1]=p*_+E*-d+k*-h-b*-v,o[2]=k*_+E*-v+b*-d-p*-h,o}},332:function(i,a,o){"use strict";i.exports=z;var s=o(1755),l=o(6867),u=o(1125),c=o(7842),f=o(1318),h=o(946),d=o(5838),v=o(1278),_=o(3637);function b(O){var V=h(O);return[v(V,-1/0),v(V,1/0)]}function p(O,V){for(var G=new Array(V.length),Z=0;Z<V.length;++Z){var j=V[Z],N=O[j[0]],H=O[j[1]];G[Z]=[v(Math.min(N[0],H[0]),-1/0),v(Math.min(N[1],H[1]),-1/0),v(Math.max(N[0],H[0]),1/0),v(Math.max(N[1],H[1]),1/0)]}return G}function k(O){for(var V=new Array(O.length),G=0;G<O.length;++G){var Z=O[G];V[G]=[v(Z[0],-1/0),v(Z[1],-1/0),v(Z[0],1/0),v(Z[1],1/0)]}return V}function E(O,V,G){var Z=[];return l(G,function(j,N){var H=V[j],te=V[N];if(!(H[0]===te[0]||H[0]===te[1]||H[1]===te[0]||H[1]===te[1])){var oe=O[H[0]],_e=O[H[1]],Ee=O[te[0]],Ce=O[te[1]];u(oe,_e,Ee,Ce)&&Z.push([j,N])}}),Z}function S(O,V,G,Z){var j=[];return l(G,Z,function(N,H){var te=V[N];if(!(te[0]===H||te[1]===H)){var oe=O[H],_e=O[te[0]],Ee=O[te[1]];u(_e,Ee,oe,oe)&&j.push([N,H])}}),j}function L(O,V,G,Z,j){var N,H,te=O.map(function(pt){return[c(pt[0]),c(pt[1])]});for(N=0;N<G.length;++N){var oe=G[N];H=oe[0];var _e=oe[1],Ee=V[H],Ce=V[_e],me=_(d(O[Ee[0]]),d(O[Ee[1]]),d(O[Ce[0]]),d(O[Ce[1]]));if(me){var ie=O.length;O.push([h(me[0]),h(me[1])]),te.push(me),Z.push([H,ie],[_e,ie])}}for(Z.sort(function(pt,Zt){if(pt[0]!==Zt[0])return pt[0]-Zt[0];var st=te[pt[1]],lt=te[Zt[1]];return f(st[0],lt[0])||f(st[1],lt[1])}),N=Z.length-1;N>=0;--N){var Se=Z[N];H=Se[0];var Le=V[H],Ae=Le[0],Fe=Le[1],Pe=O[Ae],ge=O[Fe];if((Pe[0]-ge[0]||Pe[1]-ge[1])<0){var Re=Ae;Ae=Fe,Fe=Re}Le[0]=Ae;var ce=Le[1]=Se[1],Ze;for(j&&(Ze=Le[2]);N>0&&Z[N-1][0]===H;){var Se=Z[--N],ut=Se[1];j?V.push([ce,ut,Ze]):V.push([ce,ut]),ce=ut}j?V.push([ce,Fe,Ze]):V.push([ce,Fe])}return te}function x(O,V,G){for(var Z=V.length,j=new s(Z),N=[],H=0;H<V.length;++H){var te=V[H],oe=b(te[0]),_e=b(te[1]);N.push([v(oe[0],-1/0),v(_e[0],-1/0),v(oe[1],1/0),v(_e[1],1/0)])}l(N,function(Se,Le){j.link(Se,Le)});for(var Ee=!0,Ce=new Array(Z),H=0;H<Z;++H){var me=j.find(H);me!==H&&(Ee=!1,O[me]=[Math.min(O[H][0],O[me][0]),Math.min(O[H][1],O[me][1])])}if(Ee)return null;for(var ie=0,H=0;H<Z;++H){var me=j.find(H);me===H?(Ce[H]=ie,O[ie++]=O[H]):Ce[H]=-1}O.length=ie;for(var H=0;H<Z;++H)Ce[H]<0&&(Ce[H]=Ce[j.find(H)]);return Ce}function C(O,V){return O[0]-V[0]||O[1]-V[1]}function M(O,V){var G=O[0]-V[0]||O[1]-V[1];return G||(O[2]<V[2]?-1:O[2]>V[2]?1:0)}function g(O,V,G){if(O.length!==0){if(V)for(var Z=0;Z<O.length;++Z){var j=O[Z],N=V[j[0]],H=V[j[1]];j[0]=Math.min(N,H),j[1]=Math.max(N,H)}else for(var Z=0;Z<O.length;++Z){var j=O[Z],N=j[0],H=j[1];j[0]=Math.min(N,H),j[1]=Math.max(N,H)}G?O.sort(M):O.sort(C);for(var te=1,Z=1;Z<O.length;++Z){var oe=O[Z-1],_e=O[Z];_e[0]===oe[0]&&_e[1]===oe[1]&&(!G||_e[2]===oe[2])||(O[te++]=_e)}O.length=te}}function P(O,V,G){var Z=x(O,[],k(O));return g(V,Z,G),!!Z}function T(O,V,G){var Z=p(O,V),j=E(O,V,Z),N=k(O),H=S(O,V,Z,N),te=L(O,V,j,H,G),oe=x(O,te,N);return g(V,oe,G),oe?!0:j.length>0||H.length>0}function z(O,V,G){var Z;if(G){Z=V;for(var j=new Array(V.length),N=0;N<V.length;++N){var H=V[N];j[N]=[H[0],H[1],G[N]]}V=j}for(var te=P(O,V,!!G);T(O,V,!!G);)te=!0;if(G&&te){Z.length=0,G.length=0;for(var N=0;N<V.length;++N){var H=V[N];Z.push([H[0],H[1]]),G.push(H[2])}}return te}},351:function(i,a,o){"use strict";i.exports=l;var s=o(4687);function l(u,c){c||(c=u,u=window);var f=0,h=0,d=0,v={shift:!1,alt:!1,control:!1,meta:!1},_=!1;function b(T){var z=!1;return"altKey"in T&&(z=z||T.altKey!==v.alt,v.alt=!!T.altKey),"shiftKey"in T&&(z=z||T.shiftKey!==v.shift,v.shift=!!T.shiftKey),"ctrlKey"in T&&(z=z||T.ctrlKey!==v.control,v.control=!!T.ctrlKey),"metaKey"in T&&(z=z||T.metaKey!==v.meta,v.meta=!!T.metaKey),z}function p(T,z){var O=s.x(z),V=s.y(z);"buttons"in z&&(T=z.buttons|0),(T!==f||O!==h||V!==d||b(z))&&(f=T|0,h=O||0,d=V||0,c&&c(f,h,d,v))}function k(T){p(0,T)}function E(){(f||h||d||v.shift||v.alt||v.meta||v.control)&&(h=d=0,f=0,v.shift=v.alt=v.control=v.meta=!1,c&&c(0,0,0,v))}function S(T){b(T)&&c&&c(f,h,d,v)}function L(T){s.buttons(T)===0?p(0,T):p(f,T)}function x(T){p(f|s.buttons(T),T)}function C(T){p(f&~s.buttons(T),T)}function M(){_||(_=!0,u.addEventListener("mousemove",L),u.addEventListener("mousedown",x),u.addEventListener("mouseup",C),u.addEventListener("mouseleave",k),u.addEventListener("mouseenter",k),u.addEventListener("mouseout",k),u.addEventListener("mouseover",k),u.addEventListener("blur",E),u.addEventListener("keyup",S),u.addEventListener("keydown",S),u.addEventListener("keypress",S),u!==window&&(window.addEventListener("blur",E),window.addEventListener("keyup",S),window.addEventListener("keydown",S),window.addEventListener("keypress",S)))}function g(){_&&(_=!1,u.removeEventListener("mousemove",L),u.removeEventListener("mousedown",x),u.removeEventListener("mouseup",C),u.removeEventListener("mouseleave",k),u.removeEventListener("mouseenter",k),u.removeEventListener("mouseout",k),u.removeEventListener("mouseover",k),u.removeEventListener("blur",E),u.removeEventListener("keyup",S),u.removeEventListener("keydown",S),u.removeEventListener("keypress",S),u!==window&&(window.removeEventListener("blur",E),window.removeEventListener("keyup",S),window.removeEventListener("keydown",S),window.removeEventListener("keypress",S)))}M();var P={element:u};return Object.defineProperties(P,{enabled:{get:function(){return _},set:function(T){T?M():g()},enumerable:!0},buttons:{get:function(){return f},enumerable:!0},x:{get:function(){return h},enumerable:!0},y:{get:function(){return d},enumerable:!0},mods:{get:function(){return v},enumerable:!0}}),P}},395:function(i){function a(o,s,l){return o*(1-l)+s*l}i.exports=a},446:function(i,a,o){"use strict";var s=o(7640),l={};function u(c){var f=c.order,h=c.dtype,d=[f,h],v=d.join(":"),_=l[v];return _||(l[v]=_=s(f,h)),_(c),c}i.exports=u},483:function(i){i.exports=a;function a(o){var s=o[0],l=o[1],u=o[2],c=o[3];return s*s+l*l+u*u+c*c}},492:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2];return o[0]=u*l[0]+c*l[3]+f*l[6],o[1]=u*l[1]+c*l[4]+f*l[7],o[2]=u*l[2]+c*l[5]+f*l[8],o}},501:function(i,a,o){"use strict";i.exports=E;var s=o(2762),l=o(8116),u=o(1879).n,c=[0,0,0],f=[0,0,0],h=[0,0,0],d=[0,0,0],v=[1,1];function _(S){return S[0]=S[1]=S[2]=0,S}function b(S,L){return S[0]=L[0],S[1]=L[1],S[2]=L[2],S}function p(S,L,x,C,M,g,P,T){this.gl=S,this.vertBuffer=L,this.vao=x,this.shader=C,this.tickCount=M,this.tickOffset=g,this.gridCount=P,this.gridOffset=T}var k=p.prototype;k.bind=function(S,L,x){this.shader.bind(),this.shader.uniforms.model=S,this.shader.uniforms.view=L,this.shader.uniforms.projection=x,v[0]=this.gl.drawingBufferWidth,v[1]=this.gl.drawingBufferHeight,this.shader.uniforms.screenShape=v,this.vao.bind()},k.unbind=function(){this.vao.unbind()},k.drawAxisLine=function(S,L,x,C,M){var g=_(f);this.shader.uniforms.majorAxis=f,g[S]=L[1][S]-L[0][S],this.shader.uniforms.minorAxis=g;var P=b(d,x);P[S]+=L[0][S],this.shader.uniforms.offset=P,this.shader.uniforms.lineWidth=M,this.shader.uniforms.color=C;var T=_(h);T[(S+2)%3]=1,this.shader.uniforms.screenAxis=T,this.vao.draw(this.gl.TRIANGLES,6);var T=_(h);T[(S+1)%3]=1,this.shader.uniforms.screenAxis=T,this.vao.draw(this.gl.TRIANGLES,6)},k.drawAxisTicks=function(S,L,x,C,M){if(this.tickCount[S]){var g=_(c);g[S]=1,this.shader.uniforms.majorAxis=g,this.shader.uniforms.offset=L,this.shader.uniforms.minorAxis=x,this.shader.uniforms.color=C,this.shader.uniforms.lineWidth=M;var P=_(h);P[S]=1,this.shader.uniforms.screenAxis=P,this.vao.draw(this.gl.TRIANGLES,this.tickCount[S],this.tickOffset[S])}},k.drawGrid=function(S,L,x,C,M,g){if(this.gridCount[S]){var P=_(f);P[L]=x[1][L]-x[0][L],this.shader.uniforms.minorAxis=P;var T=b(d,C);T[L]+=x[0][L],this.shader.uniforms.offset=T;var z=_(c);z[S]=1,this.shader.uniforms.majorAxis=z;var O=_(h);O[S]=1,this.shader.uniforms.screenAxis=O,this.shader.uniforms.lineWidth=g,this.shader.uniforms.color=M,this.vao.draw(this.gl.TRIANGLES,this.gridCount[S],this.gridOffset[S])}},k.drawZero=function(S,L,x,C,M,g){var P=_(f);this.shader.uniforms.majorAxis=P,P[S]=x[1][S]-x[0][S],this.shader.uniforms.minorAxis=P;var T=b(d,C);T[S]+=x[0][S],this.shader.uniforms.offset=T;var z=_(h);z[L]=1,this.shader.uniforms.screenAxis=z,this.shader.uniforms.lineWidth=g,this.shader.uniforms.color=M,this.vao.draw(this.gl.TRIANGLES,6)},k.dispose=function(){this.vao.dispose(),this.vertBuffer.dispose(),this.shader.dispose()};function E(S,L,x){var C=[],M=[0,0,0],g=[0,0,0],P=[0,0,0],T=[0,0,0];C.push(0,0,1,0,1,1,0,0,-1,0,0,-1,0,1,1,0,1,-1);for(var z=0;z<3;++z){for(var G=C.length/3|0,O=0;O<x[z].length;++O){var V=+x[z][O].x;C.push(V,0,1,V,1,1,V,0,-1,V,0,-1,V,1,1,V,1,-1)}var j=C.length/3|0;M[z]=G,g[z]=j-G;for(var G=C.length/3|0,Z=0;Z<x[z].length;++Z){var V=+x[z][Z].x;C.push(V,0,1,V,1,1,V,0,-1,V,0,-1,V,1,1,V,1,-1)}var j=C.length/3|0;P[z]=G,T[z]=j-G}var N=s(S,new Float32Array(C)),H=l(S,[{buffer:N,type:S.FLOAT,size:3,stride:0,offset:0}]),te=u(S);return te.attributes.position.location=0,new p(S,N,H,te,g,M,T,P)}},544:function(i,a,o){"use strict";var s=o(5572);i.exports=l;function l(u,c){for(var f=u.length,h=new Array(f),d=0;d<f;++d)h[d]=s(u[d],c[d]);return h}},606:function(i,a,o){var s=o(236);i.exports=l;function l(){var u=s();return{get:function(c,f){var h=u(c);return h.hasOwnProperty("value")?h.value:f},set:function(c,f){return u(c).value=f,this},has:function(c){return"value"in u(c)},delete:function(c){return delete u(c).value}}}},614:function(i,a,o){var s=o(3236),l=s([`precision highp float;
+
+precision highp float;
+#define GLSLIFY 1
+
+vec3 getOrthogonalVector(vec3 v) {
+  // Return up-vector for only-z vector.
+  // Return ax + by + cz = 0, a point that lies on the plane that has v as a normal and that isn't (0,0,0).
+  // From the above if-statement we have ||a|| > 0  U  ||b|| > 0.
+  // Assign z = 0, x = -b, y = a:
+  // a*-b + b*a + c*0 = -ba + ba + 0 = 0
+  if (v.x*v.x > v.z*v.z || v.y*v.y > v.z*v.z) {
+    return normalize(vec3(-v.y, v.x, 0.0));
+  } else {
+    return normalize(vec3(0.0, v.z, -v.y));
+  }
+}
+
+// Calculate the cone vertex and normal at the given index.
+//
+// The returned vertex is for a cone with its top at origin and height of 1.0,
+// pointing in the direction of the vector attribute.
+//
+// Each cone is made up of a top vertex, a center base vertex and base perimeter vertices.
+// These vertices are used to make up the triangles of the cone by the following:
+//   segment + 0 top vertex
+//   segment + 1 perimeter vertex a+1
+//   segment + 2 perimeter vertex a
+//   segment + 3 center base vertex
+//   segment + 4 perimeter vertex a
+//   segment + 5 perimeter vertex a+1
+// Where segment is the number of the radial segment * 6 and a is the angle at that radial segment.
+// To go from index to segment, floor(index / 6)
+// To go from segment to angle, 2*pi * (segment/segmentCount)
+// To go from index to segment index, index - (segment*6)
+//
+vec3 getConePosition(vec3 d, float rawIndex, float coneOffset, out vec3 normal) {
+
+  const float segmentCount = 8.0;
+
+  float index = rawIndex - floor(rawIndex /
+    (segmentCount * 6.0)) *
+    (segmentCount * 6.0);
+
+  float segment = floor(0.001 + index/6.0);
+  float segmentIndex = index - (segment*6.0);
+
+  normal = -normalize(d);
+
+  if (segmentIndex > 2.99 && segmentIndex < 3.01) {
+    return mix(vec3(0.0), -d, coneOffset);
+  }
+
+  float nextAngle = (
+    (segmentIndex > 0.99 &&  segmentIndex < 1.01) ||
+    (segmentIndex > 4.99 &&  segmentIndex < 5.01)
+  ) ? 1.0 : 0.0;
+  float angle = 2.0 * 3.14159 * ((segment + nextAngle) / segmentCount);
+
+  vec3 v1 = mix(d, vec3(0.0), coneOffset);
+  vec3 v2 = v1 - d;
+
+  vec3 u = getOrthogonalVector(d);
+  vec3 v = normalize(cross(u, d));
+
+  vec3 x = u * cos(angle) * length(d)*0.25;
+  vec3 y = v * sin(angle) * length(d)*0.25;
+  vec3 v3 = v2 + x + y;
+  if (segmentIndex < 3.0) {
+    vec3 tx = u * sin(angle);
+    vec3 ty = v * -cos(angle);
+    vec3 tangent = tx + ty;
+    normal = normalize(cross(v3 - v1, tangent));
+  }
+
+  if (segmentIndex == 0.0) {
+    return mix(d, vec3(0.0), coneOffset);
+  }
+  return v3;
+}
+
+attribute vec3 vector;
+attribute vec4 color, position;
+attribute vec2 uv;
+
+uniform float vectorScale, coneScale, coneOffset;
+uniform mat4 model, view, projection, inverseModel;
+uniform vec3 eyePosition, lightPosition;
+
+varying vec3 f_normal, f_lightDirection, f_eyeDirection, f_data, f_position;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  // Scale the vector magnitude to stay constant with
+  // model & view changes.
+  vec3 normal;
+  vec3 XYZ = getConePosition(mat3(model) * ((vectorScale * coneScale) * vector), position.w, coneOffset, normal);
+  vec4 conePosition = model * vec4(position.xyz, 1.0) + vec4(XYZ, 0.0);
+
+  //Lighting geometry parameters
+  vec4 cameraCoordinate = view * conePosition;
+  cameraCoordinate.xyz /= cameraCoordinate.w;
+  f_lightDirection = lightPosition - cameraCoordinate.xyz;
+  f_eyeDirection   = eyePosition - cameraCoordinate.xyz;
+  f_normal = normalize((vec4(normal, 0.0) * inverseModel).xyz);
+
+  // vec4 m_position  = model * vec4(conePosition, 1.0);
+  vec4 t_position  = view * conePosition;
+  gl_Position      = projection * t_position;
+
+  f_color          = color;
+  f_data           = conePosition.xyz;
+  f_position       = position.xyz;
+  f_uv             = uv;
+}
+`]),u=s([`#extension GL_OES_standard_derivatives : enable
+
+precision highp float;
+#define GLSLIFY 1
+
+float beckmannDistribution(float x, float roughness) {
+  float NdotH = max(x, 0.0001);
+  float cos2Alpha = NdotH * NdotH;
+  float tan2Alpha = (cos2Alpha - 1.0) / cos2Alpha;
+  float roughness2 = roughness * roughness;
+  float denom = 3.141592653589793 * roughness2 * cos2Alpha * cos2Alpha;
+  return exp(tan2Alpha / roughness2) / denom;
+}
+
+float cookTorranceSpecular(
+  vec3 lightDirection,
+  vec3 viewDirection,
+  vec3 surfaceNormal,
+  float roughness,
+  float fresnel) {
+
+  float VdotN = max(dot(viewDirection, surfaceNormal), 0.0);
+  float LdotN = max(dot(lightDirection, surfaceNormal), 0.0);
+
+  //Half angle vector
+  vec3 H = normalize(lightDirection + viewDirection);
+
+  //Geometric term
+  float NdotH = max(dot(surfaceNormal, H), 0.0);
+  float VdotH = max(dot(viewDirection, H), 0.000001);
+  float LdotH = max(dot(lightDirection, H), 0.000001);
+  float G1 = (2.0 * NdotH * VdotN) / VdotH;
+  float G2 = (2.0 * NdotH * LdotN) / LdotH;
+  float G = min(1.0, min(G1, G2));
+  
+  //Distribution term
+  float D = beckmannDistribution(NdotH, roughness);
+
+  //Fresnel term
+  float F = pow(1.0 - VdotN, fresnel);
+
+  //Multiply terms and done
+  return  G * F * D / max(3.14159265 * VdotN, 0.000001);
+}
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 clipBounds[2];
+uniform float roughness, fresnel, kambient, kdiffuse, kspecular, opacity;
+uniform sampler2D texture;
+
+varying vec3 f_normal, f_lightDirection, f_eyeDirection, f_data, f_position;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_position)) discard;
+  vec3 N = normalize(f_normal);
+  vec3 L = normalize(f_lightDirection);
+  vec3 V = normalize(f_eyeDirection);
+
+  if(gl_FrontFacing) {
+    N = -N;
+  }
+
+  float specular = min(1.0, max(0.0, cookTorranceSpecular(L, V, N, roughness, fresnel)));
+  float diffuse  = min(kambient + kdiffuse * max(dot(N, L), 0.0), 1.0);
+
+  vec4 surfaceColor = f_color * texture2D(texture, f_uv);
+  vec4 litColor = surfaceColor.a * vec4(diffuse * surfaceColor.rgb + kspecular * vec3(1,1,1) * specular,  1.0);
+
+  gl_FragColor = litColor * opacity;
+}
+`]),c=s([`precision highp float;
+
+precision highp float;
+#define GLSLIFY 1
+
+vec3 getOrthogonalVector(vec3 v) {
+  // Return up-vector for only-z vector.
+  // Return ax + by + cz = 0, a point that lies on the plane that has v as a normal and that isn't (0,0,0).
+  // From the above if-statement we have ||a|| > 0  U  ||b|| > 0.
+  // Assign z = 0, x = -b, y = a:
+  // a*-b + b*a + c*0 = -ba + ba + 0 = 0
+  if (v.x*v.x > v.z*v.z || v.y*v.y > v.z*v.z) {
+    return normalize(vec3(-v.y, v.x, 0.0));
+  } else {
+    return normalize(vec3(0.0, v.z, -v.y));
+  }
+}
+
+// Calculate the cone vertex and normal at the given index.
+//
+// The returned vertex is for a cone with its top at origin and height of 1.0,
+// pointing in the direction of the vector attribute.
+//
+// Each cone is made up of a top vertex, a center base vertex and base perimeter vertices.
+// These vertices are used to make up the triangles of the cone by the following:
+//   segment + 0 top vertex
+//   segment + 1 perimeter vertex a+1
+//   segment + 2 perimeter vertex a
+//   segment + 3 center base vertex
+//   segment + 4 perimeter vertex a
+//   segment + 5 perimeter vertex a+1
+// Where segment is the number of the radial segment * 6 and a is the angle at that radial segment.
+// To go from index to segment, floor(index / 6)
+// To go from segment to angle, 2*pi * (segment/segmentCount)
+// To go from index to segment index, index - (segment*6)
+//
+vec3 getConePosition(vec3 d, float rawIndex, float coneOffset, out vec3 normal) {
+
+  const float segmentCount = 8.0;
+
+  float index = rawIndex - floor(rawIndex /
+    (segmentCount * 6.0)) *
+    (segmentCount * 6.0);
+
+  float segment = floor(0.001 + index/6.0);
+  float segmentIndex = index - (segment*6.0);
+
+  normal = -normalize(d);
+
+  if (segmentIndex > 2.99 && segmentIndex < 3.01) {
+    return mix(vec3(0.0), -d, coneOffset);
+  }
+
+  float nextAngle = (
+    (segmentIndex > 0.99 &&  segmentIndex < 1.01) ||
+    (segmentIndex > 4.99 &&  segmentIndex < 5.01)
+  ) ? 1.0 : 0.0;
+  float angle = 2.0 * 3.14159 * ((segment + nextAngle) / segmentCount);
+
+  vec3 v1 = mix(d, vec3(0.0), coneOffset);
+  vec3 v2 = v1 - d;
+
+  vec3 u = getOrthogonalVector(d);
+  vec3 v = normalize(cross(u, d));
+
+  vec3 x = u * cos(angle) * length(d)*0.25;
+  vec3 y = v * sin(angle) * length(d)*0.25;
+  vec3 v3 = v2 + x + y;
+  if (segmentIndex < 3.0) {
+    vec3 tx = u * sin(angle);
+    vec3 ty = v * -cos(angle);
+    vec3 tangent = tx + ty;
+    normal = normalize(cross(v3 - v1, tangent));
+  }
+
+  if (segmentIndex == 0.0) {
+    return mix(d, vec3(0.0), coneOffset);
+  }
+  return v3;
+}
+
+attribute vec4 vector;
+attribute vec4 position;
+attribute vec4 id;
+
+uniform mat4 model, view, projection;
+uniform float vectorScale, coneScale, coneOffset;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  vec3 normal;
+  vec3 XYZ = getConePosition(mat3(model) * ((vectorScale * coneScale) * vector.xyz), position.w, coneOffset, normal);
+  vec4 conePosition = model * vec4(position.xyz, 1.0) + vec4(XYZ, 0.0);
+  gl_Position = projection * (view * conePosition);
+  f_id        = id;
+  f_position  = position.xyz;
+}
+`]),f=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3  clipBounds[2];
+uniform float pickId;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_position)) discard;
+
+  gl_FragColor = vec4(pickId, f_id.xyz);
+}`]);a.meshShader={vertex:l,fragment:u,attributes:[{name:"position",type:"vec4"},{name:"color",type:"vec4"},{name:"uv",type:"vec2"},{name:"vector",type:"vec3"}]},a.pickShader={vertex:c,fragment:f,attributes:[{name:"position",type:"vec4"},{name:"id",type:"vec4"},{name:"vector",type:"vec3"}]}},620:function(i){i.exports=["precision","highp","mediump","lowp","attribute","const","uniform","varying","break","continue","do","for","while","if","else","in","out","inout","float","int","uint","void","bool","true","false","discard","return","mat2","mat3","mat4","vec2","vec3","vec4","ivec2","ivec3","ivec4","bvec2","bvec3","bvec4","sampler1D","sampler2D","sampler3D","samplerCube","sampler1DShadow","sampler2DShadow","struct","asm","class","union","enum","typedef","template","this","packed","goto","switch","default","inline","noinline","volatile","public","static","extern","external","interface","long","short","double","half","fixed","unsigned","input","output","hvec2","hvec3","hvec4","dvec2","dvec3","dvec4","fvec2","fvec3","fvec4","sampler2DRect","sampler3DRect","sampler2DRectShadow","sizeof","cast","namespace","using"]},665:function(i,a,o){"use strict";var s=o(3202);i.exports=f;var l=96;function u(h,d){var v=s(getComputedStyle(h).getPropertyValue(d));return v[0]*f(v[1],h)}function c(h,d){var v=document.createElement("div");v.style["font-size"]="128"+h,d.appendChild(v);var _=u(v,"font-size")/128;return d.removeChild(v),_}function f(h,d){switch(d=d||document.body,h=(h||"px").trim().toLowerCase(),(d===window||d===document)&&(d=document.body),h){case"%":return d.clientHeight/100;case"ch":case"ex":return c(h,d);case"em":return u(d,"font-size");case"rem":return u(document.body,"font-size");case"vw":return window.innerWidth/100;case"vh":return window.innerHeight/100;case"vmin":return Math.min(window.innerWidth,window.innerHeight)/100;case"vmax":return Math.max(window.innerWidth,window.innerHeight)/100;case"in":return l;case"cm":return l/2.54;case"mm":return l/25.4;case"pt":return l/72;case"pc":return l/6}return 1}},727:function(i,a,o){"use strict";var s=o(2962),l=6;function u(S){var L=S===2?h:S===3?d:S===4?v:S===5?_:b;return S<6?L(s[S]):L(s)}function c(){return[[0]]}function f(S,L){return[[L[0]],[S[0][0]]]}function h(S){return function(x,C){return[S([[+C[0],+x[0][1]],[+C[1],+x[1][1]]]),S([[+x[0][0],+C[0]],[+x[1][0],+C[1]]]),S(x)]}}function d(S){return function(x,C){return[S([[+C[0],+x[0][1],+x[0][2]],[+C[1],+x[1][1],+x[1][2]],[+C[2],+x[2][1],+x[2][2]]]),S([[+x[0][0],+C[0],+x[0][2]],[+x[1][0],+C[1],+x[1][2]],[+x[2][0],+C[2],+x[2][2]]]),S([[+x[0][0],+x[0][1],+C[0]],[+x[1][0],+x[1][1],+C[1]],[+x[2][0],+x[2][1],+C[2]]]),S(x)]}}function v(S){return function(x,C){return[S([[+C[0],+x[0][1],+x[0][2],+x[0][3]],[+C[1],+x[1][1],+x[1][2],+x[1][3]],[+C[2],+x[2][1],+x[2][2],+x[2][3]],[+C[3],+x[3][1],+x[3][2],+x[3][3]]]),S([[+x[0][0],+C[0],+x[0][2],+x[0][3]],[+x[1][0],+C[1],+x[1][2],+x[1][3]],[+x[2][0],+C[2],+x[2][2],+x[2][3]],[+x[3][0],+C[3],+x[3][2],+x[3][3]]]),S([[+x[0][0],+x[0][1],+C[0],+x[0][3]],[+x[1][0],+x[1][1],+C[1],+x[1][3]],[+x[2][0],+x[2][1],+C[2],+x[2][3]],[+x[3][0],+x[3][1],+C[3],+x[3][3]]]),S([[+x[0][0],+x[0][1],+x[0][2],+C[0]],[+x[1][0],+x[1][1],+x[1][2],+C[1]],[+x[2][0],+x[2][1],+x[2][2],+C[2]],[+x[3][0],+x[3][1],+x[3][2],+C[3]]]),S(x)]}}function _(S){return function(x,C){return[S([[+C[0],+x[0][1],+x[0][2],+x[0][3],+x[0][4]],[+C[1],+x[1][1],+x[1][2],+x[1][3],+x[1][4]],[+C[2],+x[2][1],+x[2][2],+x[2][3],+x[2][4]],[+C[3],+x[3][1],+x[3][2],+x[3][3],+x[3][4]],[+C[4],+x[4][1],+x[4][2],+x[4][3],+x[4][4]]]),S([[+x[0][0],+C[0],+x[0][2],+x[0][3],+x[0][4]],[+x[1][0],+C[1],+x[1][2],+x[1][3],+x[1][4]],[+x[2][0],+C[2],+x[2][2],+x[2][3],+x[2][4]],[+x[3][0],+C[3],+x[3][2],+x[3][3],+x[3][4]],[+x[4][0],+C[4],+x[4][2],+x[4][3],+x[4][4]]]),S([[+x[0][0],+x[0][1],+C[0],+x[0][3],+x[0][4]],[+x[1][0],+x[1][1],+C[1],+x[1][3],+x[1][4]],[+x[2][0],+x[2][1],+C[2],+x[2][3],+x[2][4]],[+x[3][0],+x[3][1],+C[3],+x[3][3],+x[3][4]],[+x[4][0],+x[4][1],+C[4],+x[4][3],+x[4][4]]]),S([[+x[0][0],+x[0][1],+x[0][2],+C[0],+x[0][4]],[+x[1][0],+x[1][1],+x[1][2],+C[1],+x[1][4]],[+x[2][0],+x[2][1],+x[2][2],+C[2],+x[2][4]],[+x[3][0],+x[3][1],+x[3][2],+C[3],+x[3][4]],[+x[4][0],+x[4][1],+x[4][2],+C[4],+x[4][4]]]),S([[+x[0][0],+x[0][1],+x[0][2],+x[0][3],+C[0]],[+x[1][0],+x[1][1],+x[1][2],+x[1][3],+C[1]],[+x[2][0],+x[2][1],+x[2][2],+x[2][3],+C[2]],[+x[3][0],+x[3][1],+x[3][2],+x[3][3],+C[3]],[+x[4][0],+x[4][1],+x[4][2],+x[4][3],+C[4]]]),S(x)]}}function b(S){return function(x,C){return[S([[+C[0],+x[0][1],+x[0][2],+x[0][3],+x[0][4],+x[0][5]],[+C[1],+x[1][1],+x[1][2],+x[1][3],+x[1][4],+x[1][5]],[+C[2],+x[2][1],+x[2][2],+x[2][3],+x[2][4],+x[2][5]],[+C[3],+x[3][1],+x[3][2],+x[3][3],+x[3][4],+x[3][5]],[+C[4],+x[4][1],+x[4][2],+x[4][3],+x[4][4],+x[4][5]],[+C[5],+x[5][1],+x[5][2],+x[5][3],+x[5][4],+x[5][5]]]),S([[+x[0][0],+C[0],+x[0][2],+x[0][3],+x[0][4],+x[0][5]],[+x[1][0],+C[1],+x[1][2],+x[1][3],+x[1][4],+x[1][5]],[+x[2][0],+C[2],+x[2][2],+x[2][3],+x[2][4],+x[2][5]],[+x[3][0],+C[3],+x[3][2],+x[3][3],+x[3][4],+x[3][5]],[+x[4][0],+C[4],+x[4][2],+x[4][3],+x[4][4],+x[4][5]],[+x[5][0],+C[5],+x[5][2],+x[5][3],+x[5][4],+x[5][5]]]),S([[+x[0][0],+x[0][1],+C[0],+x[0][3],+x[0][4],+x[0][5]],[+x[1][0],+x[1][1],+C[1],+x[1][3],+x[1][4],+x[1][5]],[+x[2][0],+x[2][1],+C[2],+x[2][3],+x[2][4],+x[2][5]],[+x[3][0],+x[3][1],+C[3],+x[3][3],+x[3][4],+x[3][5]],[+x[4][0],+x[4][1],+C[4],+x[4][3],+x[4][4],+x[4][5]],[+x[5][0],+x[5][1],+C[5],+x[5][3],+x[5][4],+x[5][5]]]),S([[+x[0][0],+x[0][1],+x[0][2],+C[0],+x[0][4],+x[0][5]],[+x[1][0],+x[1][1],+x[1][2],+C[1],+x[1][4],+x[1][5]],[+x[2][0],+x[2][1],+x[2][2],+C[2],+x[2][4],+x[2][5]],[+x[3][0],+x[3][1],+x[3][2],+C[3],+x[3][4],+x[3][5]],[+x[4][0],+x[4][1],+x[4][2],+C[4],+x[4][4],+x[4][5]],[+x[5][0],+x[5][1],+x[5][2],+C[5],+x[5][4],+x[5][5]]]),S([[+x[0][0],+x[0][1],+x[0][2],+x[0][3],+C[0],+x[0][5]],[+x[1][0],+x[1][1],+x[1][2],+x[1][3],+C[1],+x[1][5]],[+x[2][0],+x[2][1],+x[2][2],+x[2][3],+C[2],+x[2][5]],[+x[3][0],+x[3][1],+x[3][2],+x[3][3],+C[3],+x[3][5]],[+x[4][0],+x[4][1],+x[4][2],+x[4][3],+C[4],+x[4][5]],[+x[5][0],+x[5][1],+x[5][2],+x[5][3],+C[5],+x[5][5]]]),S([[+x[0][0],+x[0][1],+x[0][2],+x[0][3],+x[0][4],+C[0]],[+x[1][0],+x[1][1],+x[1][2],+x[1][3],+x[1][4],+C[1]],[+x[2][0],+x[2][1],+x[2][2],+x[2][3],+x[2][4],+C[2]],[+x[3][0],+x[3][1],+x[3][2],+x[3][3],+x[3][4],+C[3]],[+x[4][0],+x[4][1],+x[4][2],+x[4][3],+x[4][4],+C[4]],[+x[5][0],+x[5][1],+x[5][2],+x[5][3],+x[5][4],+C[5]]]),S(x)]}}var p=[c,f];function k(S,L,x,C,M,g,P,T){return function(O,V){switch(O.length){case 0:return S(O,V);case 1:return L(O,V);case 2:return x(O,V);case 3:return C(O,V);case 4:return M(O,V);case 5:return g(O,V)}var G=P[O.length];return G||(G=P[O.length]=T(O.length)),G(O,V)}}function E(){for(;p.length<l;)p.push(u(p.length));i.exports=k.apply(void 0,p.concat([p,u]));for(var S=0;S<l;++S)i.exports[S]=p[S]}E()},737:function(i){i.exports={0:"NONE",1:"ONE",2:"LINE_LOOP",3:"LINE_STRIP",4:"TRIANGLES",5:"TRIANGLE_STRIP",6:"TRIANGLE_FAN",256:"DEPTH_BUFFER_BIT",512:"NEVER",513:"LESS",514:"EQUAL",515:"LEQUAL",516:"GREATER",517:"NOTEQUAL",518:"GEQUAL",519:"ALWAYS",768:"SRC_COLOR",769:"ONE_MINUS_SRC_COLOR",770:"SRC_ALPHA",771:"ONE_MINUS_SRC_ALPHA",772:"DST_ALPHA",773:"ONE_MINUS_DST_ALPHA",774:"DST_COLOR",775:"ONE_MINUS_DST_COLOR",776:"SRC_ALPHA_SATURATE",1024:"STENCIL_BUFFER_BIT",1028:"FRONT",1029:"BACK",1032:"FRONT_AND_BACK",1280:"INVALID_ENUM",1281:"INVALID_VALUE",1282:"INVALID_OPERATION",1285:"OUT_OF_MEMORY",1286:"INVALID_FRAMEBUFFER_OPERATION",2304:"CW",2305:"CCW",2849:"LINE_WIDTH",2884:"CULL_FACE",2885:"CULL_FACE_MODE",2886:"FRONT_FACE",2928:"DEPTH_RANGE",2929:"DEPTH_TEST",2930:"DEPTH_WRITEMASK",2931:"DEPTH_CLEAR_VALUE",2932:"DEPTH_FUNC",2960:"STENCIL_TEST",2961:"STENCIL_CLEAR_VALUE",2962:"STENCIL_FUNC",2963:"STENCIL_VALUE_MASK",2964:"STENCIL_FAIL",2965:"STENCIL_PASS_DEPTH_FAIL",2966:"STENCIL_PASS_DEPTH_PASS",2967:"STENCIL_REF",2968:"STENCIL_WRITEMASK",2978:"VIEWPORT",3024:"DITHER",3042:"BLEND",3088:"SCISSOR_BOX",3089:"SCISSOR_TEST",3106:"COLOR_CLEAR_VALUE",3107:"COLOR_WRITEMASK",3317:"UNPACK_ALIGNMENT",3333:"PACK_ALIGNMENT",3379:"MAX_TEXTURE_SIZE",3386:"MAX_VIEWPORT_DIMS",3408:"SUBPIXEL_BITS",3410:"RED_BITS",3411:"GREEN_BITS",3412:"BLUE_BITS",3413:"ALPHA_BITS",3414:"DEPTH_BITS",3415:"STENCIL_BITS",3553:"TEXTURE_2D",4352:"DONT_CARE",4353:"FASTEST",4354:"NICEST",5120:"BYTE",5121:"UNSIGNED_BYTE",5122:"SHORT",5123:"UNSIGNED_SHORT",5124:"INT",5125:"UNSIGNED_INT",5126:"FLOAT",5386:"INVERT",5890:"TEXTURE",6401:"STENCIL_INDEX",6402:"DEPTH_COMPONENT",6406:"ALPHA",6407:"RGB",6408:"RGBA",6409:"LUMINANCE",6410:"LUMINANCE_ALPHA",7680:"KEEP",7681:"REPLACE",7682:"INCR",7683:"DECR",7936:"VENDOR",7937:"RENDERER",7938:"VERSION",9728:"NEAREST",9729:"LINEAR",9984:"NEAREST_MIPMAP_NEAREST",9985:"LINEAR_MIPMAP_NEAREST",9986:"NEAREST_MIPMAP_LINEAR",9987:"LINEAR_MIPMAP_LINEAR",10240:"TEXTURE_MAG_FILTER",10241:"TEXTURE_MIN_FILTER",10242:"TEXTURE_WRAP_S",10243:"TEXTURE_WRAP_T",10497:"REPEAT",10752:"POLYGON_OFFSET_UNITS",16384:"COLOR_BUFFER_BIT",32769:"CONSTANT_COLOR",32770:"ONE_MINUS_CONSTANT_COLOR",32771:"CONSTANT_ALPHA",32772:"ONE_MINUS_CONSTANT_ALPHA",32773:"BLEND_COLOR",32774:"FUNC_ADD",32777:"BLEND_EQUATION_RGB",32778:"FUNC_SUBTRACT",32779:"FUNC_REVERSE_SUBTRACT",32819:"UNSIGNED_SHORT_4_4_4_4",32820:"UNSIGNED_SHORT_5_5_5_1",32823:"POLYGON_OFFSET_FILL",32824:"POLYGON_OFFSET_FACTOR",32854:"RGBA4",32855:"RGB5_A1",32873:"TEXTURE_BINDING_2D",32926:"SAMPLE_ALPHA_TO_COVERAGE",32928:"SAMPLE_COVERAGE",32936:"SAMPLE_BUFFERS",32937:"SAMPLES",32938:"SAMPLE_COVERAGE_VALUE",32939:"SAMPLE_COVERAGE_INVERT",32968:"BLEND_DST_RGB",32969:"BLEND_SRC_RGB",32970:"BLEND_DST_ALPHA",32971:"BLEND_SRC_ALPHA",33071:"CLAMP_TO_EDGE",33170:"GENERATE_MIPMAP_HINT",33189:"DEPTH_COMPONENT16",33306:"DEPTH_STENCIL_ATTACHMENT",33635:"UNSIGNED_SHORT_5_6_5",33648:"MIRRORED_REPEAT",33901:"ALIASED_POINT_SIZE_RANGE",33902:"ALIASED_LINE_WIDTH_RANGE",33984:"TEXTURE0",33985:"TEXTURE1",33986:"TEXTURE2",33987:"TEXTURE3",33988:"TEXTURE4",33989:"TEXTURE5",33990:"TEXTURE6",33991:"TEXTURE7",33992:"TEXTURE8",33993:"TEXTURE9",33994:"TEXTURE10",33995:"TEXTURE11",33996:"TEXTURE12",33997:"TEXTURE13",33998:"TEXTURE14",33999:"TEXTURE15",34e3:"TEXTURE16",34001:"TEXTURE17",34002:"TEXTURE18",34003:"TEXTURE19",34004:"TEXTURE20",34005:"TEXTURE21",34006:"TEXTURE22",34007:"TEXTURE23",34008:"TEXTURE24",34009:"TEXTURE25",34010:"TEXTURE26",34011:"TEXTURE27",34012:"TEXTURE28",34013:"TEXTURE29",34014:"TEXTURE30",34015:"TEXTURE31",34016:"ACTIVE_TEXTURE",34024:"MAX_RENDERBUFFER_SIZE",34041:"DEPTH_STENCIL",34055:"INCR_WRAP",34056:"DECR_WRAP",34067:"TEXTURE_CUBE_MAP",34068:"TEXTURE_BINDING_CUBE_MAP",34069:"TEXTURE_CUBE_MAP_POSITIVE_X",34070:"TEXTURE_CUBE_MAP_NEGATIVE_X",34071:"TEXTURE_CUBE_MAP_POSITIVE_Y",34072:"TEXTURE_CUBE_MAP_NEGATIVE_Y",34073:"TEXTURE_CUBE_MAP_POSITIVE_Z",34074:"TEXTURE_CUBE_MAP_NEGATIVE_Z",34076:"MAX_CUBE_MAP_TEXTURE_SIZE",34338:"VERTEX_ATTRIB_ARRAY_ENABLED",34339:"VERTEX_ATTRIB_ARRAY_SIZE",34340:"VERTEX_ATTRIB_ARRAY_STRIDE",34341:"VERTEX_ATTRIB_ARRAY_TYPE",34342:"CURRENT_VERTEX_ATTRIB",34373:"VERTEX_ATTRIB_ARRAY_POINTER",34466:"NUM_COMPRESSED_TEXTURE_FORMATS",34467:"COMPRESSED_TEXTURE_FORMATS",34660:"BUFFER_SIZE",34661:"BUFFER_USAGE",34816:"STENCIL_BACK_FUNC",34817:"STENCIL_BACK_FAIL",34818:"STENCIL_BACK_PASS_DEPTH_FAIL",34819:"STENCIL_BACK_PASS_DEPTH_PASS",34877:"BLEND_EQUATION_ALPHA",34921:"MAX_VERTEX_ATTRIBS",34922:"VERTEX_ATTRIB_ARRAY_NORMALIZED",34930:"MAX_TEXTURE_IMAGE_UNITS",34962:"ARRAY_BUFFER",34963:"ELEMENT_ARRAY_BUFFER",34964:"ARRAY_BUFFER_BINDING",34965:"ELEMENT_ARRAY_BUFFER_BINDING",34975:"VERTEX_ATTRIB_ARRAY_BUFFER_BINDING",35040:"STREAM_DRAW",35044:"STATIC_DRAW",35048:"DYNAMIC_DRAW",35632:"FRAGMENT_SHADER",35633:"VERTEX_SHADER",35660:"MAX_VERTEX_TEXTURE_IMAGE_UNITS",35661:"MAX_COMBINED_TEXTURE_IMAGE_UNITS",35663:"SHADER_TYPE",35664:"FLOAT_VEC2",35665:"FLOAT_VEC3",35666:"FLOAT_VEC4",35667:"INT_VEC2",35668:"INT_VEC3",35669:"INT_VEC4",35670:"BOOL",35671:"BOOL_VEC2",35672:"BOOL_VEC3",35673:"BOOL_VEC4",35674:"FLOAT_MAT2",35675:"FLOAT_MAT3",35676:"FLOAT_MAT4",35678:"SAMPLER_2D",35680:"SAMPLER_CUBE",35712:"DELETE_STATUS",35713:"COMPILE_STATUS",35714:"LINK_STATUS",35715:"VALIDATE_STATUS",35716:"INFO_LOG_LENGTH",35717:"ATTACHED_SHADERS",35718:"ACTIVE_UNIFORMS",35719:"ACTIVE_UNIFORM_MAX_LENGTH",35720:"SHADER_SOURCE_LENGTH",35721:"ACTIVE_ATTRIBUTES",35722:"ACTIVE_ATTRIBUTE_MAX_LENGTH",35724:"SHADING_LANGUAGE_VERSION",35725:"CURRENT_PROGRAM",36003:"STENCIL_BACK_REF",36004:"STENCIL_BACK_VALUE_MASK",36005:"STENCIL_BACK_WRITEMASK",36006:"FRAMEBUFFER_BINDING",36007:"RENDERBUFFER_BINDING",36048:"FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE",36049:"FRAMEBUFFER_ATTACHMENT_OBJECT_NAME",36050:"FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL",36051:"FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE",36053:"FRAMEBUFFER_COMPLETE",36054:"FRAMEBUFFER_INCOMPLETE_ATTACHMENT",36055:"FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT",36057:"FRAMEBUFFER_INCOMPLETE_DIMENSIONS",36061:"FRAMEBUFFER_UNSUPPORTED",36064:"COLOR_ATTACHMENT0",36096:"DEPTH_ATTACHMENT",36128:"STENCIL_ATTACHMENT",36160:"FRAMEBUFFER",36161:"RENDERBUFFER",36162:"RENDERBUFFER_WIDTH",36163:"RENDERBUFFER_HEIGHT",36164:"RENDERBUFFER_INTERNAL_FORMAT",36168:"STENCIL_INDEX8",36176:"RENDERBUFFER_RED_SIZE",36177:"RENDERBUFFER_GREEN_SIZE",36178:"RENDERBUFFER_BLUE_SIZE",36179:"RENDERBUFFER_ALPHA_SIZE",36180:"RENDERBUFFER_DEPTH_SIZE",36181:"RENDERBUFFER_STENCIL_SIZE",36194:"RGB565",36336:"LOW_FLOAT",36337:"MEDIUM_FLOAT",36338:"HIGH_FLOAT",36339:"LOW_INT",36340:"MEDIUM_INT",36341:"HIGH_INT",36346:"SHADER_COMPILER",36347:"MAX_VERTEX_UNIFORM_VECTORS",36348:"MAX_VARYING_VECTORS",36349:"MAX_FRAGMENT_UNIFORM_VECTORS",37440:"UNPACK_FLIP_Y_WEBGL",37441:"UNPACK_PREMULTIPLY_ALPHA_WEBGL",37442:"CONTEXT_LOST_WEBGL",37443:"UNPACK_COLORSPACE_CONVERSION_WEBGL",37444:"BROWSER_DEFAULT_WEBGL"}},781:function(i,a,o){"use strict";i.exports=l;var s=o(3349);function l(u,c){var f=[];return c=+c||0,s(u.hi(u.shape[0]-1),f,c),f}},783:function(i){i.exports=a;function a(o,s,l,u){var c=s[0],f=s[1],h=s[2],d=s[3],v=l[0],_=l[1],b=l[2],p=l[3],k,E,S,L,x;return E=c*v+f*_+h*b+d*p,E<0&&(E=-E,v=-v,_=-_,b=-b,p=-p),1-E>1e-6?(k=Math.acos(E),S=Math.sin(k),L=Math.sin((1-u)*k)/S,x=Math.sin(u*k)/S):(L=1-u,x=u),o[0]=L*c+x*v,o[1]=L*f+x*_,o[2]=L*h+x*b,o[3]=L*d+x*p,o}},799:function(i,a,o){var s=o(3236),l=o(9405),u=s([`precision mediump float;
+#define GLSLIFY 1
+attribute vec2 position;
+varying vec2 uv;
+void main() {
+  uv = position;
+  gl_Position = vec4(position, 0, 1);
+}`]),c=s([`precision mediump float;
+#define GLSLIFY 1
+
+uniform sampler2D accumBuffer;
+varying vec2 uv;
+
+void main() {
+  vec4 accum = texture2D(accumBuffer, 0.5 * (uv + 1.0));
+  gl_FragColor = min(vec4(1,1,1,1), accum);
+}`]);i.exports=function(f){return l(f,u,c,null,[{name:"position",type:"vec2"}])}},811:function(i){i.exports=a;function a(o,s){return o[0]=1/s[0],o[1]=1/s[1],o[2]=1/s[2],o}},840:function(i,a,o){var s=o(3236),l=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position, normal;
+attribute vec4 color;
+attribute vec2 uv;
+
+uniform mat4 model
+           , view
+           , projection
+           , inverseModel;
+uniform vec3 eyePosition
+           , lightPosition;
+
+varying vec3 f_normal
+           , f_lightDirection
+           , f_eyeDirection
+           , f_data;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+vec4 project(vec3 p) {
+  return projection * (view * (model * vec4(p, 1.0)));
+}
+
+void main() {
+  gl_Position      = project(position);
+
+  //Lighting geometry parameters
+  vec4 cameraCoordinate = view * vec4(position , 1.0);
+  cameraCoordinate.xyz /= cameraCoordinate.w;
+  f_lightDirection = lightPosition - cameraCoordinate.xyz;
+  f_eyeDirection   = eyePosition - cameraCoordinate.xyz;
+  f_normal  = normalize((vec4(normal, 0.0) * inverseModel).xyz);
+
+  f_color          = color;
+  f_data           = position;
+  f_uv             = uv;
+}
+`]),u=s([`#extension GL_OES_standard_derivatives : enable
+
+precision highp float;
+#define GLSLIFY 1
+
+float beckmannDistribution(float x, float roughness) {
+  float NdotH = max(x, 0.0001);
+  float cos2Alpha = NdotH * NdotH;
+  float tan2Alpha = (cos2Alpha - 1.0) / cos2Alpha;
+  float roughness2 = roughness * roughness;
+  float denom = 3.141592653589793 * roughness2 * cos2Alpha * cos2Alpha;
+  return exp(tan2Alpha / roughness2) / denom;
+}
+
+float cookTorranceSpecular(
+  vec3 lightDirection,
+  vec3 viewDirection,
+  vec3 surfaceNormal,
+  float roughness,
+  float fresnel) {
+
+  float VdotN = max(dot(viewDirection, surfaceNormal), 0.0);
+  float LdotN = max(dot(lightDirection, surfaceNormal), 0.0);
+
+  //Half angle vector
+  vec3 H = normalize(lightDirection + viewDirection);
+
+  //Geometric term
+  float NdotH = max(dot(surfaceNormal, H), 0.0);
+  float VdotH = max(dot(viewDirection, H), 0.000001);
+  float LdotH = max(dot(lightDirection, H), 0.000001);
+  float G1 = (2.0 * NdotH * VdotN) / VdotH;
+  float G2 = (2.0 * NdotH * LdotN) / LdotH;
+  float G = min(1.0, min(G1, G2));
+  
+  //Distribution term
+  float D = beckmannDistribution(NdotH, roughness);
+
+  //Fresnel term
+  float F = pow(1.0 - VdotN, fresnel);
+
+  //Multiply terms and done
+  return  G * F * D / max(3.14159265 * VdotN, 0.000001);
+}
+
+//#pragma glslify: beckmann = require(glsl-specular-beckmann) // used in gl-surface3d
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 clipBounds[2];
+uniform float roughness
+            , fresnel
+            , kambient
+            , kdiffuse
+            , kspecular;
+uniform sampler2D texture;
+
+varying vec3 f_normal
+           , f_lightDirection
+           , f_eyeDirection
+           , f_data;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  if (f_color.a == 0.0 ||
+    outOfRange(clipBounds[0], clipBounds[1], f_data)
+  ) discard;
+
+  vec3 N = normalize(f_normal);
+  vec3 L = normalize(f_lightDirection);
+  vec3 V = normalize(f_eyeDirection);
+
+  if(gl_FrontFacing) {
+    N = -N;
+  }
+
+  float specular = min(1.0, max(0.0, cookTorranceSpecular(L, V, N, roughness, fresnel)));
+  //float specular = max(0.0, beckmann(L, V, N, roughness)); // used in gl-surface3d
+
+  float diffuse  = min(kambient + kdiffuse * max(dot(N, L), 0.0), 1.0);
+
+  vec4 surfaceColor = vec4(f_color.rgb, 1.0) * texture2D(texture, f_uv);
+  vec4 litColor = surfaceColor.a * vec4(diffuse * surfaceColor.rgb + kspecular * vec3(1,1,1) * specular,  1.0);
+
+  gl_FragColor = litColor * f_color.a;
+}
+`]),c=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+attribute vec4 color;
+attribute vec2 uv;
+
+uniform mat4 model, view, projection;
+
+varying vec4 f_color;
+varying vec3 f_data;
+varying vec2 f_uv;
+
+void main() {
+  gl_Position = projection * (view * (model * vec4(position, 1.0)));
+  f_color = color;
+  f_data  = position;
+  f_uv    = uv;
+}`]),f=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 clipBounds[2];
+uniform sampler2D texture;
+uniform float opacity;
+
+varying vec4 f_color;
+varying vec3 f_data;
+varying vec2 f_uv;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_data)) discard;
+
+  gl_FragColor = f_color * texture2D(texture, f_uv) * opacity;
+}`]),h=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+attribute vec3 position;
+attribute vec4 color;
+attribute vec2 uv;
+attribute float pointSize;
+
+uniform mat4 model, view, projection;
+uniform vec3 clipBounds[2];
+
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], position)) {
+
+    gl_Position = vec4(0.0, 0.0 ,0.0 ,0.0);
+  } else {
+    gl_Position = projection * (view * (model * vec4(position, 1.0)));
+  }
+  gl_PointSize = pointSize;
+  f_color = color;
+  f_uv = uv;
+}`]),d=s([`precision highp float;
+#define GLSLIFY 1
+
+uniform sampler2D texture;
+uniform float opacity;
+
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  vec2 pointR = gl_PointCoord.xy - vec2(0.5, 0.5);
+  if(dot(pointR, pointR) > 0.25) {
+    discard;
+  }
+  gl_FragColor = f_color * texture2D(texture, f_uv) * opacity;
+}`]),v=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+attribute vec4 id;
+
+uniform mat4 model, view, projection;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  gl_Position = projection * (view * (model * vec4(position, 1.0)));
+  f_id        = id;
+  f_position  = position;
+}`]),_=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3  clipBounds[2];
+uniform float pickId;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_position)) discard;
+
+  gl_FragColor = vec4(pickId, f_id.xyz);
+}`]),b=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+attribute vec3  position;
+attribute float pointSize;
+attribute vec4  id;
+
+uniform mat4 model, view, projection;
+uniform vec3 clipBounds[2];
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], position)) {
+
+    gl_Position = vec4(0.0, 0.0, 0.0, 0.0);
+  } else {
+    gl_Position  = projection * (view * (model * vec4(position, 1.0)));
+    gl_PointSize = pointSize;
+  }
+  f_id         = id;
+  f_position   = position;
+}`]),p=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+
+uniform mat4 model, view, projection;
+
+void main() {
+  gl_Position = projection * (view * (model * vec4(position, 1.0)));
+}`]),k=s([`precision highp float;
+#define GLSLIFY 1
+
+uniform vec3 contourColor;
+
+void main() {
+  gl_FragColor = vec4(contourColor, 1.0);
+}
+`]);a.meshShader={vertex:l,fragment:u,attributes:[{name:"position",type:"vec3"},{name:"normal",type:"vec3"},{name:"color",type:"vec4"},{name:"uv",type:"vec2"}]},a.wireShader={vertex:c,fragment:f,attributes:[{name:"position",type:"vec3"},{name:"color",type:"vec4"},{name:"uv",type:"vec2"}]},a.pointShader={vertex:h,fragment:d,attributes:[{name:"position",type:"vec3"},{name:"color",type:"vec4"},{name:"uv",type:"vec2"},{name:"pointSize",type:"float"}]},a.pickShader={vertex:v,fragment:_,attributes:[{name:"position",type:"vec3"},{name:"id",type:"vec4"}]},a.pointPickShader={vertex:b,fragment:_,attributes:[{name:"position",type:"vec3"},{name:"pointSize",type:"float"},{name:"id",type:"vec4"}]},a.contourShader={vertex:p,fragment:k,attributes:[{name:"position",type:"vec3"}]}},855:function(i,a,o){"use strict";i.exports={init:E,sweepBipartite:x,sweepComplete:C,scanBipartite:M,scanComplete:g};var s=o(1888),l=o(8828),u=o(4192),c=1<<28,f=1024,h=s.mallocInt32(f),d=s.mallocInt32(f),v=s.mallocInt32(f),_=s.mallocInt32(f),b=s.mallocInt32(f),p=s.mallocInt32(f),k=s.mallocDouble(f*8);function E(P){var T=l.nextPow2(P);h.length<T&&(s.free(h),h=s.mallocInt32(T)),d.length<T&&(s.free(d),d=s.mallocInt32(T)),v.length<T&&(s.free(v),v=s.mallocInt32(T)),_.length<T&&(s.free(_),_=s.mallocInt32(T)),b.length<T&&(s.free(b),b=s.mallocInt32(T)),p.length<T&&(s.free(p),p=s.mallocInt32(T));var z=8*T;k.length<z&&(s.free(k),k=s.mallocDouble(z))}function S(P,T,z,O){var V=T[O],G=P[z-1];P[V]=G,T[G]=V}function L(P,T,z,O){P[z]=O,T[O]=z}function x(P,T,z,O,V,G,Z,j,N,H){for(var te=0,oe=2*P,_e=P-1,Ee=oe-1,Ce=z;Ce<O;++Ce){var me=G[Ce],ie=oe*Ce;k[te++]=V[ie+_e],k[te++]=-(me+1),k[te++]=V[ie+Ee],k[te++]=me}for(var Ce=Z;Ce<j;++Ce){var me=H[Ce]+c,Se=oe*Ce;k[te++]=N[Se+_e],k[te++]=-me,k[te++]=N[Se+Ee],k[te++]=me}var Le=te>>>1;u(k,Le);for(var Ae=0,Fe=0,Ce=0;Ce<Le;++Ce){var Pe=k[2*Ce+1]|0;if(Pe>=c)Pe=Pe-c|0,S(v,_,Fe--,Pe);else if(Pe>=0)S(h,d,Ae--,Pe);else if(Pe<=-c){Pe=-Pe-c|0;for(var ge=0;ge<Ae;++ge){var Re=T(h[ge],Pe);if(Re!==void 0)return Re}L(v,_,Fe++,Pe)}else{Pe=-Pe-1|0;for(var ge=0;ge<Fe;++ge){var Re=T(Pe,v[ge]);if(Re!==void 0)return Re}L(h,d,Ae++,Pe)}}}function C(P,T,z,O,V,G,Z,j,N,H){for(var te=0,oe=2*P,_e=P-1,Ee=oe-1,Ce=z;Ce<O;++Ce){var me=G[Ce]+1<<1,ie=oe*Ce;k[te++]=V[ie+_e],k[te++]=-me,k[te++]=V[ie+Ee],k[te++]=me}for(var Ce=Z;Ce<j;++Ce){var me=H[Ce]+1<<1,Se=oe*Ce;k[te++]=N[Se+_e],k[te++]=-me|1,k[te++]=N[Se+Ee],k[te++]=me|1}var Le=te>>>1;u(k,Le);for(var Ae=0,Fe=0,Pe=0,Ce=0;Ce<Le;++Ce){var ge=k[2*Ce+1]|0,Re=ge&1;if(Ce<Le-1&&ge>>1===k[2*Ce+3]>>1&&(Re=2,Ce+=1),ge<0){for(var ce=-(ge>>1)-1,Ze=0;Ze<Pe;++Ze){var ut=T(b[Ze],ce);if(ut!==void 0)return ut}if(Re!==0)for(var Ze=0;Ze<Ae;++Ze){var ut=T(h[Ze],ce);if(ut!==void 0)return ut}if(Re!==1)for(var Ze=0;Ze<Fe;++Ze){var ut=T(v[Ze],ce);if(ut!==void 0)return ut}Re===0?L(h,d,Ae++,ce):Re===1?L(v,_,Fe++,ce):Re===2&&L(b,p,Pe++,ce)}else{var ce=(ge>>1)-1;Re===0?S(h,d,Ae--,ce):Re===1?S(v,_,Fe--,ce):Re===2&&S(b,p,Pe--,ce)}}}function M(P,T,z,O,V,G,Z,j,N,H,te,oe){var _e=0,Ee=2*P,Ce=T,me=T+P,ie=1,Se=1;O?Se=c:ie=c;for(var Le=V;Le<G;++Le){var Ae=Le+ie,Fe=Ee*Le;k[_e++]=Z[Fe+Ce],k[_e++]=-Ae,k[_e++]=Z[Fe+me],k[_e++]=Ae}for(var Le=N;Le<H;++Le){var Ae=Le+Se,Pe=Ee*Le;k[_e++]=te[Pe+Ce],k[_e++]=-Ae}var ge=_e>>>1;u(k,ge);for(var Re=0,Le=0;Le<ge;++Le){var ce=k[2*Le+1]|0;if(ce<0){var Ae=-ce,Ze=!1;if(Ae>=c?(Ze=!O,Ae-=c):(Ze=!!O,Ae-=1),Ze)L(h,d,Re++,Ae);else{var ut=oe[Ae],pt=Ee*Ae,Zt=te[pt+T+1],st=te[pt+T+1+P];e:for(var lt=0;lt<Re;++lt){var Gt=h[lt],Nt=Ee*Gt;if(!(st<Z[Nt+T+1]||Z[Nt+T+1+P]<Zt)){for(var Jt=T+2;Jt<P;++Jt)if(te[pt+Jt+P]<Z[Nt+Jt]||Z[Nt+Jt+P]<te[pt+Jt])continue e;var sr=j[Gt],wr;if(O?wr=z(ut,sr):wr=z(sr,ut),wr!==void 0)return wr}}}}else S(h,d,Re--,ce-ie)}}function g(P,T,z,O,V,G,Z,j,N,H,te){for(var oe=0,_e=2*P,Ee=T,Ce=T+P,me=O;me<V;++me){var ie=me+c,Se=_e*me;k[oe++]=G[Se+Ee],k[oe++]=-ie,k[oe++]=G[Se+Ce],k[oe++]=ie}for(var me=j;me<N;++me){var ie=me+1,Le=_e*me;k[oe++]=H[Le+Ee],k[oe++]=-ie}var Ae=oe>>>1;u(k,Ae);for(var Fe=0,me=0;me<Ae;++me){var Pe=k[2*me+1]|0;if(Pe<0){var ie=-Pe;if(ie>=c)h[Fe++]=ie-c;else{ie-=1;var ge=te[ie],Re=_e*ie,ce=H[Re+T+1],Ze=H[Re+T+1+P];e:for(var ut=0;ut<Fe;++ut){var pt=h[ut],Zt=Z[pt];if(Zt===ge)break;var st=_e*pt;if(!(Ze<G[st+T+1]||G[st+T+1+P]<ce)){for(var lt=T+2;lt<P;++lt)if(H[Re+lt+P]<G[st+lt]||G[st+lt+P]<H[Re+lt])continue e;var Gt=z(Zt,ge);if(Gt!==void 0)return Gt}}}}else{for(var ie=Pe-c,ut=Fe-1;ut>=0;--ut)if(h[ut]===ie){for(var lt=ut+1;lt<Fe;++lt)h[lt-1]=h[lt];break}--Fe}}}},868:function(i,a,o){i.exports=o(1387)},869:function(i,a,o){"use strict";var s=o(2651),l=o(5716);i.exports=u;function u(c,f){var h=l(c),d=l(f);if(h===0)return[s(0),s(1)];if(d===0)return[s(0),s(0)];d<0&&(c=c.neg(),f=f.neg());var v=c.gcd(f);return v.cmpn(1)?[c.div(v),f.div(v)]:[c,f]}},870:function(i,a,o){"use strict";var s=o(1433);function l(c){this.gl=c,this._elements=null,this._attributes=null,this._elementsType=c.UNSIGNED_SHORT}l.prototype.bind=function(){s(this.gl,this._elements,this._attributes)},l.prototype.update=function(c,f,h){this._elements=f,this._attributes=c,this._elementsType=h||this.gl.UNSIGNED_SHORT},l.prototype.dispose=function(){},l.prototype.unbind=function(){},l.prototype.draw=function(c,f,h){h=h||0;var d=this.gl;this._elements?d.drawElements(c,f,this._elementsType,h):d.drawArrays(c,h,f)};function u(c){return new l(c)}i.exports=u},946:function(i,a,o){"use strict";var s=o(1369),l=o(4025);i.exports=u;function u(c){var f=c[0],h=c[1];if(f.cmpn(0)===0)return 0;var d=f.abs().divmod(h.abs()),v=d.div,_=s(v),b=d.mod,p=f.negative!==h.negative?-1:1;if(b.cmpn(0)===0)return p*_;if(_){var k=l(_)+4,E=s(b.ushln(k).divRound(h));return p*(_+E*Math.pow(2,-k))}else{var S=h.bitLength()-b.bitLength()+53,E=s(b.ushln(S).divRound(h));return S<1023?p*E*Math.pow(2,-S):(E*=Math.pow(2,-1023),p*E*Math.pow(2,1023-S))}}},990:function(i,a,o){var s=o(9405),l=o(3236),u=l([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec4 uv;
+attribute vec3 f;
+attribute vec3 normal;
+
+uniform vec3 objectOffset;
+uniform mat4 model, view, projection, inverseModel;
+uniform vec3 lightPosition, eyePosition;
+uniform sampler2D colormap;
+
+varying float value, kill;
+varying vec3 worldCoordinate;
+varying vec2 planeCoordinate;
+varying vec3 lightDirection, eyeDirection, surfaceNormal;
+varying vec4 vColor;
+
+void main() {
+  vec3 localCoordinate = vec3(uv.zw, f.x);
+  worldCoordinate = objectOffset + localCoordinate;
+  mat4 objectOffsetTranslation = mat4(1.0) + mat4(vec4(0), vec4(0), vec4(0), vec4(objectOffset, 0));
+  vec4 worldPosition = (model * objectOffsetTranslation) * vec4(localCoordinate, 1.0);
+  vec4 clipPosition = projection * (view * worldPosition);
+  gl_Position = clipPosition;
+  kill = f.y;
+  value = f.z;
+  planeCoordinate = uv.xy;
+
+  vColor = texture2D(colormap, vec2(value, value));
+
+  //Lighting geometry parameters
+  vec4 cameraCoordinate = view * worldPosition;
+  cameraCoordinate.xyz /= cameraCoordinate.w;
+  lightDirection = lightPosition - cameraCoordinate.xyz;
+  eyeDirection   = eyePosition - cameraCoordinate.xyz;
+  surfaceNormal  = normalize((vec4(normal,0) * inverseModel).xyz);
+}
+`]),c=l([`precision highp float;
+#define GLSLIFY 1
+
+float beckmannDistribution(float x, float roughness) {
+  float NdotH = max(x, 0.0001);
+  float cos2Alpha = NdotH * NdotH;
+  float tan2Alpha = (cos2Alpha - 1.0) / cos2Alpha;
+  float roughness2 = roughness * roughness;
+  float denom = 3.141592653589793 * roughness2 * cos2Alpha * cos2Alpha;
+  return exp(tan2Alpha / roughness2) / denom;
+}
+
+float beckmannSpecular(
+  vec3 lightDirection,
+  vec3 viewDirection,
+  vec3 surfaceNormal,
+  float roughness) {
+  return beckmannDistribution(dot(surfaceNormal, normalize(lightDirection + viewDirection)), roughness);
+}
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 lowerBound, upperBound;
+uniform float contourTint;
+uniform vec4 contourColor;
+uniform sampler2D colormap;
+uniform vec3 clipBounds[2];
+uniform float roughness, fresnel, kambient, kdiffuse, kspecular, opacity;
+uniform float vertexColor;
+
+varying float value, kill;
+varying vec3 worldCoordinate;
+varying vec3 lightDirection, eyeDirection, surfaceNormal;
+varying vec4 vColor;
+
+void main() {
+  if (
+    kill > 0.0 ||
+    vColor.a == 0.0 ||
+    outOfRange(clipBounds[0], clipBounds[1], worldCoordinate)
+  ) discard;
+
+  vec3 N = normalize(surfaceNormal);
+  vec3 V = normalize(eyeDirection);
+  vec3 L = normalize(lightDirection);
+
+  if(gl_FrontFacing) {
+    N = -N;
+  }
+
+  float specular = max(beckmannSpecular(L, V, N, roughness), 0.);
+  float diffuse  = min(kambient + kdiffuse * max(dot(N, L), 0.0), 1.0);
+
+  //decide how to interpolate color \u2014 in vertex or in fragment
+  vec4 surfaceColor =
+    step(vertexColor, .5) * texture2D(colormap, vec2(value, value)) +
+    step(.5, vertexColor) * vColor;
+
+  vec4 litColor = surfaceColor.a * vec4(diffuse * surfaceColor.rgb + kspecular * vec3(1,1,1) * specular,  1.0);
+
+  gl_FragColor = mix(litColor, contourColor, contourTint) * opacity;
+}
+`]),f=l([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec4 uv;
+attribute float f;
+
+uniform vec3 objectOffset;
+uniform mat3 permutation;
+uniform mat4 model, view, projection;
+uniform float height, zOffset;
+uniform sampler2D colormap;
+
+varying float value, kill;
+varying vec3 worldCoordinate;
+varying vec2 planeCoordinate;
+varying vec3 lightDirection, eyeDirection, surfaceNormal;
+varying vec4 vColor;
+
+void main() {
+  vec3 dataCoordinate = permutation * vec3(uv.xy, height);
+  worldCoordinate = objectOffset + dataCoordinate;
+  mat4 objectOffsetTranslation = mat4(1.0) + mat4(vec4(0), vec4(0), vec4(0), vec4(objectOffset, 0));
+  vec4 worldPosition = (model * objectOffsetTranslation) * vec4(dataCoordinate, 1.0);
+
+  vec4 clipPosition = projection * (view * worldPosition);
+  clipPosition.z += zOffset;
+
+  gl_Position = clipPosition;
+  value = f + objectOffset.z;
+  kill = -1.0;
+  planeCoordinate = uv.zw;
+
+  vColor = texture2D(colormap, vec2(value, value));
+
+  //Don't do lighting for contours
+  surfaceNormal   = vec3(1,0,0);
+  eyeDirection    = vec3(0,1,0);
+  lightDirection  = vec3(0,0,1);
+}
+`]),h=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec2 shape;
+uniform vec3 clipBounds[2];
+uniform float pickId;
+
+varying float value, kill;
+varying vec3 worldCoordinate;
+varying vec2 planeCoordinate;
+varying vec3 surfaceNormal;
+
+vec2 splitFloat(float v) {
+  float vh = 255.0 * v;
+  float upper = floor(vh);
+  float lower = fract(vh);
+  return vec2(upper / 255.0, floor(lower * 16.0) / 16.0);
+}
+
+void main() {
+  if ((kill > 0.0) ||
+      (outOfRange(clipBounds[0], clipBounds[1], worldCoordinate))) discard;
+
+  vec2 ux = splitFloat(planeCoordinate.x / shape.x);
+  vec2 uy = splitFloat(planeCoordinate.y / shape.y);
+  gl_FragColor = vec4(pickId, ux.x, uy.x, ux.y + (uy.y/16.0));
+}
+`]);a.createShader=function(d){var v=s(d,u,c,null,[{name:"uv",type:"vec4"},{name:"f",type:"vec3"},{name:"normal",type:"vec3"}]);return v.attributes.uv.location=0,v.attributes.f.location=1,v.attributes.normal.location=2,v},a.createPickShader=function(d){var v=s(d,u,h,null,[{name:"uv",type:"vec4"},{name:"f",type:"vec3"},{name:"normal",type:"vec3"}]);return v.attributes.uv.location=0,v.attributes.f.location=1,v.attributes.normal.location=2,v},a.createContourShader=function(d){var v=s(d,f,c,null,[{name:"uv",type:"vec4"},{name:"f",type:"float"}]);return v.attributes.uv.location=0,v.attributes.f.location=1,v},a.createPickContourShader=function(d){var v=s(d,f,h,null,[{name:"uv",type:"vec4"},{name:"f",type:"float"}]);return v.attributes.uv.location=0,v.attributes.f.location=1,v}},1085:function(i,a,o){var s=o(1371);i.exports=l;function l(u,c,f){c=typeof c=="number"?c:1,f=f||": ";var h=u.split(/\r?\n/),d=String(h.length+c-1).length;return h.map(function(v,_){var b=_+c,p=String(b).length,k=s(b,d-p);return k+f+v}).join(`
+`)}},1091:function(i){i.exports=a;function a(){var o=new Float32Array(3);return o[0]=0,o[1]=0,o[2]=0,o}},1125:function(i,a,o){"use strict";i.exports=u;var s=o(3250)[3];function l(c,f,h,d){for(var v=0;v<2;++v){var _=c[v],b=f[v],p=Math.min(_,b),k=Math.max(_,b),E=h[v],S=d[v],L=Math.min(E,S),x=Math.max(E,S);if(x<p||k<L)return!1}return!0}function u(c,f,h,d){var v=s(c,h,d),_=s(f,h,d);if(v>0&&_>0||v<0&&_<0)return!1;var b=s(h,c,f),p=s(d,c,f);return b>0&&p>0||b<0&&p<0?!1:v===0&&_===0&&b===0&&p===0?l(c,f,h,d):!0}},1278:function(i,a,o){"use strict";var s=o(2361),l=Math.pow(2,-1074),u=-1>>>0;i.exports=c;function c(f,h){if(isNaN(f)||isNaN(h))return NaN;if(f===h)return f;if(f===0)return h<0?-l:l;var d=s.hi(f),v=s.lo(f);return h>f==f>0?v===u?(d+=1,v=0):v+=1:v===0?(v=u,d-=1):v-=1,s.pack(v,d)}},1283:function(i,a,o){var s=o(9405),l=o(3236),u=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+attribute vec3 position;
+attribute vec4 color;
+attribute vec2 glyph;
+attribute vec4 id;
+
+uniform vec4 highlightId;
+uniform float highlightScale;
+uniform mat4 model, view, projection;
+uniform vec3 clipBounds[2];
+
+varying vec4 interpColor;
+varying vec4 pickId;
+varying vec3 dataCoordinate;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], position)) {
+
+    gl_Position = vec4(0,0,0,0);
+  } else {
+    float scale = 1.0;
+    if(distance(highlightId, id) < 0.0001) {
+      scale = highlightScale;
+    }
+
+    vec4 worldPosition = model * vec4(position, 1);
+    vec4 viewPosition = view * worldPosition;
+    viewPosition = viewPosition / viewPosition.w;
+    vec4 clipPosition = projection * (viewPosition + scale * vec4(glyph.x, -glyph.y, 0, 0));
+
+    gl_Position = clipPosition;
+    interpColor = color;
+    pickId = id;
+    dataCoordinate = position;
+  }
+}`]),c=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+attribute vec3 position;
+attribute vec4 color;
+attribute vec2 glyph;
+attribute vec4 id;
+
+uniform mat4 model, view, projection;
+uniform vec2 screenSize;
+uniform vec3 clipBounds[2];
+uniform float highlightScale, pixelRatio;
+uniform vec4 highlightId;
+
+varying vec4 interpColor;
+varying vec4 pickId;
+varying vec3 dataCoordinate;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], position)) {
+
+    gl_Position = vec4(0,0,0,0);
+  } else {
+    float scale = pixelRatio;
+    if(distance(highlightId.bgr, id.bgr) < 0.001) {
+      scale *= highlightScale;
+    }
+
+    vec4 worldPosition = model * vec4(position, 1.0);
+    vec4 viewPosition = view * worldPosition;
+    vec4 clipPosition = projection * viewPosition;
+    clipPosition /= clipPosition.w;
+
+    gl_Position = clipPosition + vec4(screenSize * scale * vec2(glyph.x, -glyph.y), 0.0, 0.0);
+    interpColor = color;
+    pickId = id;
+    dataCoordinate = position;
+  }
+}`]),f=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+attribute vec3 position;
+attribute vec4 color;
+attribute vec2 glyph;
+attribute vec4 id;
+
+uniform float highlightScale;
+uniform vec4 highlightId;
+uniform vec3 axes[2];
+uniform mat4 model, view, projection;
+uniform vec2 screenSize;
+uniform vec3 clipBounds[2];
+uniform float scale, pixelRatio;
+
+varying vec4 interpColor;
+varying vec4 pickId;
+varying vec3 dataCoordinate;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], position)) {
+
+    gl_Position = vec4(0,0,0,0);
+  } else {
+    float lscale = pixelRatio * scale;
+    if(distance(highlightId, id) < 0.0001) {
+      lscale *= highlightScale;
+    }
+
+    vec4 clipCenter   = projection * (view * (model * vec4(position, 1)));
+    vec3 dataPosition = position + 0.5*lscale*(axes[0] * glyph.x + axes[1] * glyph.y) * clipCenter.w * screenSize.y;
+    vec4 clipPosition = projection * (view * (model * vec4(dataPosition, 1)));
+
+    gl_Position = clipPosition;
+    interpColor = color;
+    pickId = id;
+    dataCoordinate = dataPosition;
+  }
+}
+`]),h=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 fragClipBounds[2];
+uniform float opacity;
+
+varying vec4 interpColor;
+varying vec3 dataCoordinate;
+
+void main() {
+  if (
+    outOfRange(fragClipBounds[0], fragClipBounds[1], dataCoordinate) ||
+    interpColor.a * opacity == 0.
+  ) discard;
+  gl_FragColor = interpColor * opacity;
+}
+`]),d=l([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 fragClipBounds[2];
+uniform float pickGroup;
+
+varying vec4 pickId;
+varying vec3 dataCoordinate;
+
+void main() {
+  if (outOfRange(fragClipBounds[0], fragClipBounds[1], dataCoordinate)) discard;
+
+  gl_FragColor = vec4(pickGroup, pickId.bgr);
+}`]),v=[{name:"position",type:"vec3"},{name:"color",type:"vec4"},{name:"glyph",type:"vec2"},{name:"id",type:"vec4"}],_={vertex:u,fragment:h,attributes:v},b={vertex:c,fragment:h,attributes:v},p={vertex:f,fragment:h,attributes:v},k={vertex:u,fragment:d,attributes:v},E={vertex:c,fragment:d,attributes:v},S={vertex:f,fragment:d,attributes:v};function L(x,C){var M=s(x,C),g=M.attributes;return g.position.location=0,g.color.location=1,g.glyph.location=2,g.id.location=3,M}a.createPerspective=function(x){return L(x,_)},a.createOrtho=function(x){return L(x,b)},a.createProject=function(x){return L(x,p)},a.createPickPerspective=function(x){return L(x,k)},a.createPickOrtho=function(x){return L(x,E)},a.createPickProject=function(x){return L(x,S)}},1303:function(i,a,o){"use strict";i.exports=u;var s=o(3250);function l(c,f){var h,d;if(f[0][0]<f[1][0])h=f[0],d=f[1];else if(f[0][0]>f[1][0])h=f[1],d=f[0];else{var v=Math.min(c[0][1],c[1][1]),_=Math.max(c[0][1],c[1][1]),b=Math.min(f[0][1],f[1][1]),p=Math.max(f[0][1],f[1][1]);return _<b?_-b:v>p?v-p:_-p}var k,E;c[0][1]<c[1][1]?(k=c[0],E=c[1]):(k=c[1],E=c[0]);var S=s(d,h,k);return S||(S=s(d,h,E),S)?S:E-d}function u(c,f){var h,d;if(f[0][0]<f[1][0])h=f[0],d=f[1];else if(f[0][0]>f[1][0])h=f[1],d=f[0];else return l(f,c);var v,_;if(c[0][0]<c[1][0])v=c[0],_=c[1];else if(c[0][0]>c[1][0])v=c[1],_=c[0];else return-l(c,f);var b=s(h,d,_),p=s(h,d,v);if(b<0){if(p<=0)return b}else if(b>0){if(p>=0)return b}else if(p)return p;if(b=s(_,v,d),p=s(_,v,h),b<0){if(p<=0)return b}else if(b>0){if(p>=0)return b}else if(p)return p;return d[0]-_[0]}},1318:function(i){"use strict";i.exports=a;function a(o,s){return o[0].mul(s[1]).cmp(s[0].mul(o[1]))}},1338:function(i){"use strict";function a(l,u,c){var f=l[c]|0;if(f<=0)return[];var h=new Array(f),d;if(c===l.length-1)for(d=0;d<f;++d)h[d]=u;else for(d=0;d<f;++d)h[d]=a(l,u,c+1);return h}function o(l,u){var c,f;for(c=new Array(l),f=0;f<l;++f)c[f]=u;return c}function s(l,u){switch(typeof u=="undefined"&&(u=0),typeof l){case"number":if(l>0)return o(l|0,u);break;case"object":if(typeof l.length=="number")return a(l,u,0);break}return[]}i.exports=s},1369:function(i,a,o){"use strict";var s=o(5716);i.exports=l;function l(u){var c=u.length,f=u.words,h=0;if(c===1)h=f[0];else if(c===2)h=f[0]+f[1]*67108864;else for(var d=0;d<c;d++){var v=f[d];h+=v*Math.pow(67108864,d)}return s(u)*h}},1371:function(i,a,o){"use strict";var s=o(3233);i.exports=function(u,c,f){return f=typeof f!="undefined"?f+"":" ",s(f,c)+u}},1373:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]/l[0],o[1]=s[1]/l[1],o[2]=s[2]/l[2],o[3]=s[3]/l[3],o}},1387:function(i){i.exports=a;function a(o){var s=o[0],l=o[1],u=o[2];return Math.sqrt(s*s+l*l+u*u)}},1433:function(i){"use strict";function a(o,s,l){s?s.bind():o.bindBuffer(o.ELEMENT_ARRAY_BUFFER,null);var u=o.getParameter(o.MAX_VERTEX_ATTRIBS)|0;if(l){if(l.length>u)throw new Error("gl-vao: Too many vertex attributes");for(var c=0;c<l.length;++c){var f=l[c];if(f.buffer){var h=f.buffer,d=f.size||4,v=f.type||o.FLOAT,_=!!f.normalized,b=f.stride||0,p=f.offset||0;h.bind(),o.enableVertexAttribArray(c),o.vertexAttribPointer(c,d,v,_,b,p)}else{if(typeof f=="number")o.vertexAttrib1f(c,f);else if(f.length===1)o.vertexAttrib1f(c,f[0]);else if(f.length===2)o.vertexAttrib2f(c,f[0],f[1]);else if(f.length===3)o.vertexAttrib3f(c,f[0],f[1],f[2]);else if(f.length===4)o.vertexAttrib4f(c,f[0],f[1],f[2],f[3]);else throw new Error("gl-vao: Invalid vertex attribute");o.disableVertexAttribArray(c)}}for(;c<u;++c)o.disableVertexAttribArray(c)}else{o.bindBuffer(o.ARRAY_BUFFER,null);for(var c=0;c<u;++c)o.disableVertexAttribArray(c)}}i.exports=a},1463:function(i){i.exports=a;function a(o,s,l,u){return o[0]=s,o[1]=l,o[2]=u,o}},1493:function(i,a,o){"use strict";var s=o(3236),l=o(9405),u=s([`precision mediump float;
+#define GLSLIFY 1
+
+attribute vec3 position, color;
+attribute float weight;
+
+uniform mat4 model, view, projection;
+uniform vec3 coordinates[3];
+uniform vec4 colors[3];
+uniform vec2 screenShape;
+uniform float lineWidth;
+
+varying vec4 fragColor;
+
+void main() {
+  vec3 vertexPosition = mix(coordinates[0],
+    mix(coordinates[2], coordinates[1], 0.5 * (position + 1.0)), abs(position));
+
+  vec4 clipPos = projection * (view * (model * vec4(vertexPosition, 1.0)));
+  vec2 clipOffset = (projection * (view * (model * vec4(color, 0.0)))).xy;
+  vec2 delta = weight * clipOffset * screenShape;
+  vec2 lineOffset = normalize(vec2(delta.y, -delta.x)) / screenShape;
+
+  gl_Position   = vec4(clipPos.xy + clipPos.w * 0.5 * lineWidth * lineOffset, clipPos.z, clipPos.w);
+  fragColor     = color.x * colors[0] + color.y * colors[1] + color.z * colors[2];
+}
+`]),c=s([`precision mediump float;
+#define GLSLIFY 1
+
+varying vec4 fragColor;
+
+void main() {
+  gl_FragColor = fragColor;
+}`]);i.exports=function(f){return l(f,u,c,null,[{name:"position",type:"vec3"},{name:"color",type:"vec3"},{name:"weight",type:"float"}])}},1498:function(i){i.exports=a;function a(o,s){return o[0]=-s[0],o[1]=-s[1],o[2]=-s[2],o[3]=-s[3],o}},1533:function(i,a,o){"use strict";var s=o(6859);i.exports=l;function l(u){return u&&typeof u=="object"&&!!u.words}},1538:function(i){(function(){"use strict";if(typeof ses!="undefined"&&ses.ok&&!ses.ok())return;function o(T){T.permitHostObjects___&&T.permitHostObjects___(o)}typeof ses!="undefined"&&(ses.weakMapPermitHostObjects=o);var s=!1;if(typeof WeakMap=="function"){var l=WeakMap;if(!(typeof navigator!="undefined"&&/Firefox/.test(navigator.userAgent))){var u=new l,c=Object.freeze({});if(u.set(c,1),u.get(c)!==1)s=!0;else{i.exports=WeakMap;return}}}var f=Object.prototype.hasOwnProperty,h=Object.getOwnPropertyNames,d=Object.defineProperty,v=Object.isExtensible,_="weakmap:",b=_+"ident:"+Math.random()+"___";if(typeof crypto!="undefined"&&typeof crypto.getRandomValues=="function"&&typeof ArrayBuffer=="function"&&typeof Uint8Array=="function"){var p=new ArrayBuffer(25),k=new Uint8Array(p);crypto.getRandomValues(k),b=_+"rand:"+Array.prototype.map.call(k,function(T){return(T%36).toString(36)}).join("")+"___"}function E(T){return!(T.substr(0,_.length)==_&&T.substr(T.length-3)==="___")}if(d(Object,"getOwnPropertyNames",{value:function(z){return h(z).filter(E)}}),"getPropertyNames"in Object){var S=Object.getPropertyNames;d(Object,"getPropertyNames",{value:function(z){return S(z).filter(E)}})}function L(T){if(T!==Object(T))throw new TypeError("Not an object: "+T);var z=T[b];if(z&&z.key===T)return z;if(v(T)){z={key:T};try{return d(T,b,{value:z,writable:!1,enumerable:!1,configurable:!1}),z}catch(O){return}}}(function(){var T=Object.freeze;d(Object,"freeze",{value:function(G){return L(G),T(G)}});var z=Object.seal;d(Object,"seal",{value:function(G){return L(G),z(G)}});var O=Object.preventExtensions;d(Object,"preventExtensions",{value:function(G){return L(G),O(G)}})})();function x(T){return T.prototype=null,Object.freeze(T)}var C=!1;function M(){!C&&typeof console!="undefined"&&(C=!0,console.warn("WeakMap should be invoked as new WeakMap(), not WeakMap(). This will be an error in the future."))}var g=0,P=function(){this instanceof P||M();var T=[],z=[],O=g++;function V(N,H){var te,oe=L(N);return oe?O in oe?oe[O]:H:(te=T.indexOf(N),te>=0?z[te]:H)}function G(N){var H=L(N);return H?O in H:T.indexOf(N)>=0}function Z(N,H){var te,oe=L(N);return oe?oe[O]=H:(te=T.indexOf(N),te>=0?z[te]=H:(te=T.length,z[te]=H,T[te]=N)),this}function j(N){var H=L(N),te,oe;return H?O in H&&delete H[O]:(te=T.indexOf(N),te<0?!1:(oe=T.length-1,T[te]=void 0,z[te]=z[oe],T[te]=T[oe],T.length=oe,z.length=oe,!0))}return Object.create(P.prototype,{get___:{value:x(V)},has___:{value:x(G)},set___:{value:x(Z)},delete___:{value:x(j)}})};P.prototype=Object.create(Object.prototype,{get:{value:function(z,O){return this.get___(z,O)},writable:!0,configurable:!0},has:{value:function(z){return this.has___(z)},writable:!0,configurable:!0},set:{value:function(z,O){return this.set___(z,O)},writable:!0,configurable:!0},delete:{value:function(z){return this.delete___(z)},writable:!0,configurable:!0}}),typeof l=="function"?function(){s&&typeof Proxy!="undefined"&&(Proxy=void 0);function T(){this instanceof P||M();var z=new l,O=void 0,V=!1;function G(H,te){return O?z.has(H)?z.get(H):O.get___(H,te):z.get(H,te)}function Z(H){return z.has(H)||(O?O.has___(H):!1)}var j;s?j=function(H,te){return z.set(H,te),z.has(H)||(O||(O=new P),O.set(H,te)),this}:j=function(H,te){if(V)try{z.set(H,te)}catch(oe){O||(O=new P),O.set___(H,te)}else z.set(H,te);return this};function N(H){var te=!!z.delete(H);return O&&O.delete___(H)||te}return Object.create(P.prototype,{get___:{value:x(G)},has___:{value:x(Z)},set___:{value:x(j)},delete___:{value:x(N)},permitHostObjects___:{value:x(function(H){if(H===o)V=!0;else throw new Error("bogus call to permitHostObjects___")})}})}T.prototype=P.prototype,i.exports=T,Object.defineProperty(WeakMap.prototype,"constructor",{value:WeakMap,enumerable:!1,configurable:!0,writable:!0})}():(typeof Proxy!="undefined"&&(Proxy=void 0),i.exports=P)})()},1570:function(i){"use strict";i.exports=o;var a=[function(){function l(c,f,h,d){for(var v=Math.min(h,d)|0,_=Math.max(h,d)|0,b=c[2*v],p=c[2*v+1];b<p;){var k=b+p>>1,E=f[2*k+1];if(E===_)return k;_<E?p=k:b=k+1}return b}function u(c,f,h,d){for(var v=c.length,_=[],b=0;b<v;++b)var p=c[b],k=p.length;return _}return u},function(){function l(c,f,h,d){for(var v=Math.min(h,d)|0,_=Math.max(h,d)|0,b=c[2*v],p=c[2*v+1];b<p;){var k=b+p>>1,E=f[2*k+1];if(E===_)return k;_<E?p=k:b=k+1}return b}function u(c,f,h,d){for(var v=c.length,_=[],b=0;b<v;++b){var p=c[b],k=p.length;if(k===2){var E=(d[p[0]]<<0)+(d[p[1]]<<1);if(E===0||E===3)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[1])]);break;case 2:_.push([l(h,f,p[1],p[0])]);break;case 3:break}}}return _}return u},function(){function l(c,f,h,d){for(var v=Math.min(h,d)|0,_=Math.max(h,d)|0,b=c[2*v],p=c[2*v+1];b<p;){var k=b+p>>1,E=f[2*k+1];if(E===_)return k;_<E?p=k:b=k+1}return b}function u(c,f,h,d){for(var v=c.length,_=[],b=0;b<v;++b){var p=c[b],k=p.length;if(k===3){var E=(d[p[0]]<<0)+(d[p[1]]<<1)+(d[p[2]]<<2);if(E===0||E===7)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[2]),l(h,f,p[0],p[1])]);break;case 2:_.push([l(h,f,p[1],p[0]),l(h,f,p[1],p[2])]);break;case 3:_.push([l(h,f,p[0],p[2]),l(h,f,p[1],p[2])]);break;case 4:_.push([l(h,f,p[2],p[1]),l(h,f,p[2],p[0])]);break;case 5:_.push([l(h,f,p[2],p[1]),l(h,f,p[0],p[1])]);break;case 6:_.push([l(h,f,p[1],p[0]),l(h,f,p[2],p[0])]);break;case 7:break}}else if(k===2){var E=(d[p[0]]<<0)+(d[p[1]]<<1);if(E===0||E===3)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[1])]);break;case 2:_.push([l(h,f,p[1],p[0])]);break;case 3:break}}}return _}return u},function(){function l(c,f,h,d){for(var v=Math.min(h,d)|0,_=Math.max(h,d)|0,b=c[2*v],p=c[2*v+1];b<p;){var k=b+p>>1,E=f[2*k+1];if(E===_)return k;_<E?p=k:b=k+1}return b}function u(c,f,h,d){for(var v=c.length,_=[],b=0;b<v;++b){var p=c[b],k=p.length;if(k===4){var E=(d[p[0]]<<0)+(d[p[1]]<<1)+(d[p[2]]<<2)+(d[p[3]]<<3);if(E===0||E===15)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[1]),l(h,f,p[0],p[2]),l(h,f,p[0],p[3])]);break;case 2:_.push([l(h,f,p[1],p[2]),l(h,f,p[1],p[0]),l(h,f,p[1],p[3])]);break;case 3:_.push([l(h,f,p[1],p[2]),l(h,f,p[0],p[2]),l(h,f,p[0],p[3])],[l(h,f,p[1],p[3]),l(h,f,p[1],p[2]),l(h,f,p[0],p[3])]);break;case 4:_.push([l(h,f,p[2],p[0]),l(h,f,p[2],p[1]),l(h,f,p[2],p[3])]);break;case 5:_.push([l(h,f,p[0],p[1]),l(h,f,p[2],p[1]),l(h,f,p[0],p[3])],[l(h,f,p[2],p[1]),l(h,f,p[2],p[3]),l(h,f,p[0],p[3])]);break;case 6:_.push([l(h,f,p[2],p[0]),l(h,f,p[1],p[0]),l(h,f,p[1],p[3])],[l(h,f,p[2],p[3]),l(h,f,p[2],p[0]),l(h,f,p[1],p[3])]);break;case 7:_.push([l(h,f,p[0],p[3]),l(h,f,p[1],p[3]),l(h,f,p[2],p[3])]);break;case 8:_.push([l(h,f,p[3],p[1]),l(h,f,p[3],p[0]),l(h,f,p[3],p[2])]);break;case 9:_.push([l(h,f,p[3],p[1]),l(h,f,p[0],p[1]),l(h,f,p[0],p[2])],[l(h,f,p[3],p[2]),l(h,f,p[3],p[1]),l(h,f,p[0],p[2])]);break;case 10:_.push([l(h,f,p[1],p[0]),l(h,f,p[3],p[0]),l(h,f,p[1],p[2])],[l(h,f,p[3],p[0]),l(h,f,p[3],p[2]),l(h,f,p[1],p[2])]);break;case 11:_.push([l(h,f,p[1],p[2]),l(h,f,p[0],p[2]),l(h,f,p[3],p[2])]);break;case 12:_.push([l(h,f,p[3],p[0]),l(h,f,p[2],p[0]),l(h,f,p[2],p[1])],[l(h,f,p[3],p[1]),l(h,f,p[3],p[0]),l(h,f,p[2],p[1])]);break;case 13:_.push([l(h,f,p[0],p[1]),l(h,f,p[2],p[1]),l(h,f,p[3],p[1])]);break;case 14:_.push([l(h,f,p[2],p[0]),l(h,f,p[1],p[0]),l(h,f,p[3],p[0])]);break;case 15:break}}else if(k===3){var E=(d[p[0]]<<0)+(d[p[1]]<<1)+(d[p[2]]<<2);if(E===0||E===7)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[2]),l(h,f,p[0],p[1])]);break;case 2:_.push([l(h,f,p[1],p[0]),l(h,f,p[1],p[2])]);break;case 3:_.push([l(h,f,p[0],p[2]),l(h,f,p[1],p[2])]);break;case 4:_.push([l(h,f,p[2],p[1]),l(h,f,p[2],p[0])]);break;case 5:_.push([l(h,f,p[2],p[1]),l(h,f,p[0],p[1])]);break;case 6:_.push([l(h,f,p[1],p[0]),l(h,f,p[2],p[0])]);break;case 7:break}}else if(k===2){var E=(d[p[0]]<<0)+(d[p[1]]<<1);if(E===0||E===3)continue;switch(E){case 0:break;case 1:_.push([l(h,f,p[0],p[1])]);break;case 2:_.push([l(h,f,p[1],p[0])]);break;case 3:break}}}return _}return u}];function o(s){return a[s]()}},1682:function(i){"use strict";function a(l,u){for(var c=1,f=l.length,h=l[0],d=l[0],v=1;v<f;++v)if(d=h,h=l[v],u(h,d)){if(v===c){c++;continue}l[c++]=h}return l.length=c,l}function o(l){for(var u=1,c=l.length,f=l[0],h=l[0],d=1;d<c;++d,h=f)if(h=f,f=l[d],f!==h){if(d===u){u++;continue}l[u++]=f}return l.length=u,l}function s(l,u,c){return l.length===0?l:u?(c||l.sort(u),a(l,u)):(c||l.sort(),o(l))}i.exports=s},1755:function(i){"use strict";"use restrict";i.exports=a;function a(s){this.roots=new Array(s),this.ranks=new Array(s);for(var l=0;l<s;++l)this.roots[l]=l,this.ranks[l]=0}var o=a.prototype;Object.defineProperty(o,"length",{get:function(){return this.roots.length}}),o.makeSet=function(){var s=this.roots.length;return this.roots.push(s),this.ranks.push(0),s},o.find=function(s){for(var l=s,u=this.roots;u[s]!==s;)s=u[s];for(;u[l]!==s;){var c=u[l];u[l]=s,l=c}return s},o.link=function(s,l){var u=this.find(s),c=this.find(l);if(u!==c){var f=this.ranks,h=this.roots,d=f[u],v=f[c];d<v?h[u]=c:v<d?h[c]=u:(h[c]=u,++f[u])}}},1811:function(i,a,o){"use strict";var s=o(2478),l=o(7442),u=o(7608),c=o(5567),f=o(2408),h=o(7089),d=o(6582),v=o(7656),_=o(2504),b=o(3536),p=[0,0,0];i.exports=L;function k(x){this._components=x.slice(),this._time=[0],this.prevMatrix=x.slice(),this.nextMatrix=x.slice(),this.computedMatrix=x.slice(),this.computedInverse=x.slice(),this.computedEye=[0,0,0],this.computedUp=[0,0,0],this.computedCenter=[0,0,0],this.computedRadius=[0],this._limits=[-1/0,1/0]}var E=k.prototype;E.recalcMatrix=function(x){var C=this._time,M=s.le(C,x),g=this.computedMatrix;if(!(M<0)){var P=this._components;if(M===C.length-1)for(var T=16*M,z=0;z<16;++z)g[z]=P[T++];else{for(var O=C[M+1]-C[M],T=16*M,V=this.prevMatrix,G=!0,z=0;z<16;++z)V[z]=P[T++];for(var Z=this.nextMatrix,z=0;z<16;++z)Z[z]=P[T++],G=G&&V[z]===Z[z];if(O<1e-6||G)for(var z=0;z<16;++z)g[z]=V[z];else l(g,V,Z,(x-C[M])/O)}var j=this.computedUp;j[0]=g[1],j[1]=g[5],j[2]=g[9],b(j,j);var N=this.computedInverse;u(N,g);var H=this.computedEye,te=N[15];H[0]=N[12]/te,H[1]=N[13]/te,H[2]=N[14]/te;for(var oe=this.computedCenter,_e=Math.exp(this.computedRadius[0]),z=0;z<3;++z)oe[z]=H[z]-g[2+4*z]*_e}},E.idle=function(x){if(!(x<this.lastT())){for(var C=this._components,M=C.length-16,g=0;g<16;++g)C.push(C[M++]);this._time.push(x)}},E.flush=function(x){var C=s.gt(this._time,x)-2;C<0||(this._time.splice(0,C),this._components.splice(0,16*C))},E.lastT=function(){return this._time[this._time.length-1]},E.lookAt=function(x,C,M,g){this.recalcMatrix(x),C=C||this.computedEye,M=M||p,g=g||this.computedUp,this.setMatrix(x,d(this.computedMatrix,C,M,g));for(var P=0,T=0;T<3;++T)P+=Math.pow(M[T]-C[T],2);P=Math.log(Math.sqrt(P)),this.computedRadius[0]=P},E.rotate=function(x,C,M,g){this.recalcMatrix(x);var P=this.computedInverse;C&&f(P,P,C),M&&c(P,P,M),g&&h(P,P,g),this.setMatrix(x,u(this.computedMatrix,P))};var S=[0,0,0];E.pan=function(x,C,M,g){S[0]=-(C||0),S[1]=-(M||0),S[2]=-(g||0),this.recalcMatrix(x);var P=this.computedInverse;v(P,P,S),this.setMatrix(x,u(P,P))},E.translate=function(x,C,M,g){S[0]=C||0,S[1]=M||0,S[2]=g||0,this.recalcMatrix(x);var P=this.computedMatrix;v(P,P,S),this.setMatrix(x,P)},E.setMatrix=function(x,C){if(!(x<this.lastT())){this._time.push(x);for(var M=0;M<16;++M)this._components.push(C[M])}},E.setDistance=function(x,C){this.computedRadius[0]=C},E.setDistanceLimits=function(x,C){var M=this._limits;M[0]=x,M[1]=C},E.getDistanceLimits=function(x){var C=this._limits;return x?(x[0]=C[0],x[1]=C[1],x):C};function L(x){x=x||{};var C=x.matrix||[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];return new k(C)}},1848:function(i,a,o){var s=o(4905),l=o(6468);i.exports=u;function u(c){for(var f=Array.isArray(c)?c:s(c),h=0;h<f.length;h++){var d=f[h];if(d.type==="preprocessor"){var v=d.data.match(/\#define\s+SHADER_NAME(_B64)?\s+(.+)$/);if(v&&v[2]){var _=v[1],b=v[2];return(_?l(b):b).trim()}}}}},1879:function(i,a,o){"use strict";var s=o(3236),l=o(9405),u=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+
+uniform mat4 model, view, projection;
+uniform vec3 offset, majorAxis, minorAxis, screenAxis;
+uniform float lineWidth;
+uniform vec2 screenShape;
+
+vec3 project(vec3 p) {
+  vec4 pp = projection * (view * (model * vec4(p, 1.0)));
+  return pp.xyz / max(pp.w, 0.0001);
+}
+
+void main() {
+  vec3 major = position.x * majorAxis;
+  vec3 minor = position.y * minorAxis;
+
+  vec3 vPosition = major + minor + offset;
+  vec3 pPosition = project(vPosition);
+  vec3 offset = project(vPosition + screenAxis * position.z);
+
+  vec2 screen = normalize((offset - pPosition).xy * screenShape) / screenShape;
+
+  gl_Position = vec4(pPosition + vec3(0.5 * screen * lineWidth, 0), 1.0);
+}
+`]),c=s([`precision highp float;
+#define GLSLIFY 1
+
+uniform vec4 color;
+void main() {
+  gl_FragColor = color;
+}`]);a.n=function(_){return l(_,u,c,null,[{name:"position",type:"vec3"}])};var f=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+
+uniform mat4 model, view, projection;
+uniform vec3 offset, axis, alignDir, alignOpt;
+uniform float scale, angle, pixelScale;
+uniform vec2 resolution;
+
+vec3 project(vec3 p) {
+  vec4 pp = projection * (view * (model * vec4(p, 1.0)));
+  return pp.xyz / max(pp.w, 0.0001);
+}
+
+float computeViewAngle(vec3 a, vec3 b) {
+  vec3 A = project(a);
+  vec3 B = project(b);
+
+  return atan(
+    (B.y - A.y) * resolution.y,
+    (B.x - A.x) * resolution.x
+  );
+}
+
+const float PI = 3.141592;
+const float TWO_PI = 2.0 * PI;
+const float HALF_PI = 0.5 * PI;
+const float ONE_AND_HALF_PI = 1.5 * PI;
+
+int option = int(floor(alignOpt.x + 0.001));
+float hv_ratio =       alignOpt.y;
+bool enableAlign =    (alignOpt.z != 0.0);
+
+float mod_angle(float a) {
+  return mod(a, PI);
+}
+
+float positive_angle(float a) {
+  return mod_angle((a < 0.0) ?
+    a + TWO_PI :
+    a
+  );
+}
+
+float look_upwards(float a) {
+  float b = positive_angle(a);
+  return ((b > HALF_PI) && (b <= ONE_AND_HALF_PI)) ?
+    b - PI :
+    b;
+}
+
+float look_horizontal_or_vertical(float a, float ratio) {
+  // ratio controls the ratio between being horizontal to (vertical + horizontal)
+  // if ratio is set to 0.5 then it is 50%, 50%.
+  // when using a higher ratio e.g. 0.75 the result would
+  // likely be more horizontal than vertical.
+
+  float b = positive_angle(a);
+
+  return
+    (b < (      ratio) * HALF_PI) ? 0.0 :
+    (b < (2.0 - ratio) * HALF_PI) ? -HALF_PI :
+    (b < (2.0 + ratio) * HALF_PI) ? 0.0 :
+    (b < (4.0 - ratio) * HALF_PI) ? HALF_PI :
+                                    0.0;
+}
+
+float roundTo(float a, float b) {
+  return float(b * floor((a + 0.5 * b) / b));
+}
+
+float look_round_n_directions(float a, int n) {
+  float b = positive_angle(a);
+  float div = TWO_PI / float(n);
+  float c = roundTo(b, div);
+  return look_upwards(c);
+}
+
+float applyAlignOption(float rawAngle, float delta) {
+  return
+    (option >  2) ? look_round_n_directions(rawAngle + delta, option) :       // option 3-n: round to n directions
+    (option == 2) ? look_horizontal_or_vertical(rawAngle + delta, hv_ratio) : // horizontal or vertical
+    (option == 1) ? rawAngle + delta :       // use free angle, and flip to align with one direction of the axis
+    (option == 0) ? look_upwards(rawAngle) : // use free angle, and stay upwards
+    (option ==-1) ? 0.0 :                    // useful for backward compatibility, all texts remains horizontal
+                    rawAngle;                // otherwise return back raw input angle
+}
+
+bool isAxisTitle = (axis.x == 0.0) &&
+                   (axis.y == 0.0) &&
+                   (axis.z == 0.0);
+
+void main() {
+  //Compute world offset
+  float axisDistance = position.z;
+  vec3 dataPosition = axisDistance * axis + offset;
+
+  float beta = angle; // i.e. user defined attributes for each tick
+
+  float axisAngle;
+  float clipAngle;
+  float flip;
+
+  if (enableAlign) {
+    axisAngle = (isAxisTitle) ? HALF_PI :
+                      computeViewAngle(dataPosition, dataPosition + axis);
+    clipAngle = computeViewAngle(dataPosition, dataPosition + alignDir);
+
+    axisAngle += (sin(axisAngle) < 0.0) ? PI : 0.0;
+    clipAngle += (sin(clipAngle) < 0.0) ? PI : 0.0;
+
+    flip = (dot(vec2(cos(axisAngle), sin(axisAngle)),
+                vec2(sin(clipAngle),-cos(clipAngle))) > 0.0) ? 1.0 : 0.0;
+
+    beta += applyAlignOption(clipAngle, flip * PI);
+  }
+
+  //Compute plane offset
+  vec2 planeCoord = position.xy * pixelScale;
+
+  mat2 planeXform = scale * mat2(
+     cos(beta), sin(beta),
+    -sin(beta), cos(beta)
+  );
+
+  vec2 viewOffset = 2.0 * planeXform * planeCoord / resolution;
+
+  //Compute clip position
+  vec3 clipPosition = project(dataPosition);
+
+  //Apply text offset in clip coordinates
+  clipPosition += vec3(viewOffset, 0.0);
+
+  //Done
+  gl_Position = vec4(clipPosition, 1.0);
+}
+`]),h=s([`precision highp float;
+#define GLSLIFY 1
+
+uniform vec4 color;
+void main() {
+  gl_FragColor = color;
+}`]);a.Q=function(_){return l(_,f,h,null,[{name:"position",type:"vec3"}])};var d=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position;
+attribute vec3 normal;
+
+uniform mat4 model, view, projection;
+uniform vec3 enable;
+uniform vec3 bounds[2];
+
+varying vec3 colorChannel;
+
+void main() {
+
+  vec3 signAxis = sign(bounds[1] - bounds[0]);
+
+  vec3 realNormal = signAxis * normal;
+
+  if(dot(realNormal, enable) > 0.0) {
+    vec3 minRange = min(bounds[0], bounds[1]);
+    vec3 maxRange = max(bounds[0], bounds[1]);
+    vec3 nPosition = mix(minRange, maxRange, 0.5 * (position + 1.0));
+    gl_Position = projection * (view * (model * vec4(nPosition, 1.0)));
+  } else {
+    gl_Position = vec4(0,0,0,0);
+  }
+
+  colorChannel = abs(realNormal);
+}
+`]),v=s([`precision highp float;
+#define GLSLIFY 1
+
+uniform vec4 colors[3];
+
+varying vec3 colorChannel;
+
+void main() {
+  gl_FragColor = colorChannel.x * colors[0] +
+                 colorChannel.y * colors[1] +
+                 colorChannel.z * colors[2];
+}`]);a.bg=function(_){return l(_,d,v,null,[{name:"position",type:"vec3"},{name:"normal",type:"vec3"}])}},1888:function(i,a,o){"use strict";var s=o(8828),l=o(1338),u=o(4793).hp;o.g.__TYPEDARRAY_POOL||(o.g.__TYPEDARRAY_POOL={UINT8:l([32,0]),UINT16:l([32,0]),UINT32:l([32,0]),BIGUINT64:l([32,0]),INT8:l([32,0]),INT16:l([32,0]),INT32:l([32,0]),BIGINT64:l([32,0]),FLOAT:l([32,0]),DOUBLE:l([32,0]),DATA:l([32,0]),UINT8C:l([32,0]),BUFFER:l([32,0])});var c=typeof Uint8ClampedArray!="undefined",f=typeof BigUint64Array!="undefined",h=typeof BigInt64Array!="undefined",d=o.g.__TYPEDARRAY_POOL;d.UINT8C||(d.UINT8C=l([32,0])),d.BIGUINT64||(d.BIGUINT64=l([32,0])),d.BIGINT64||(d.BIGINT64=l([32,0])),d.BUFFER||(d.BUFFER=l([32,0]));var v=d.DATA,_=d.BUFFER;a.free=function(j){if(u.isBuffer(j))_[s.log2(j.length)].push(j);else{if(Object.prototype.toString.call(j)!=="[object ArrayBuffer]"&&(j=j.buffer),!j)return;var N=j.length||j.byteLength,H=s.log2(N)|0;v[H].push(j)}};function b(Z){if(Z){var j=Z.length||Z.byteLength,N=s.log2(j);v[N].push(Z)}}function p(Z){b(Z.buffer)}a.freeUint8=a.freeUint16=a.freeUint32=a.freeBigUint64=a.freeInt8=a.freeInt16=a.freeInt32=a.freeBigInt64=a.freeFloat32=a.freeFloat=a.freeFloat64=a.freeDouble=a.freeUint8Clamped=a.freeDataView=p,a.freeArrayBuffer=b,a.freeBuffer=function(j){_[s.log2(j.length)].push(j)},a.malloc=function(j,N){if(N===void 0||N==="arraybuffer")return k(j);switch(N){case"uint8":return E(j);case"uint16":return S(j);case"uint32":return L(j);case"int8":return x(j);case"int16":return C(j);case"int32":return M(j);case"float":case"float32":return g(j);case"double":case"float64":return P(j);case"uint8_clamped":return T(j);case"bigint64":return O(j);case"biguint64":return z(j);case"buffer":return G(j);case"data":case"dataview":return V(j);default:return null}return null};function k(j){var j=s.nextPow2(j),N=s.log2(j),H=v[N];return H.length>0?H.pop():new ArrayBuffer(j)}a.mallocArrayBuffer=k;function E(Z){return new Uint8Array(k(Z),0,Z)}a.mallocUint8=E;function S(Z){return new Uint16Array(k(2*Z),0,Z)}a.mallocUint16=S;function L(Z){return new Uint32Array(k(4*Z),0,Z)}a.mallocUint32=L;function x(Z){return new Int8Array(k(Z),0,Z)}a.mallocInt8=x;function C(Z){return new Int16Array(k(2*Z),0,Z)}a.mallocInt16=C;function M(Z){return new Int32Array(k(4*Z),0,Z)}a.mallocInt32=M;function g(Z){return new Float32Array(k(4*Z),0,Z)}a.mallocFloat32=a.mallocFloat=g;function P(Z){return new Float64Array(k(8*Z),0,Z)}a.mallocFloat64=a.mallocDouble=P;function T(Z){return c?new Uint8ClampedArray(k(Z),0,Z):E(Z)}a.mallocUint8Clamped=T;function z(Z){return f?new BigUint64Array(k(8*Z),0,Z):null}a.mallocBigUint64=z;function O(Z){return h?new BigInt64Array(k(8*Z),0,Z):null}a.mallocBigInt64=O;function V(Z){return new DataView(k(Z),0,Z)}a.mallocDataView=V;function G(Z){Z=s.nextPow2(Z);var j=s.log2(Z),N=_[j];return N.length>0?N.pop():new u(Z)}a.mallocBuffer=G,a.clearCache=function(){for(var j=0;j<32;++j)d.UINT8[j].length=0,d.UINT16[j].length=0,d.UINT32[j].length=0,d.INT8[j].length=0,d.INT16[j].length=0,d.INT32[j].length=0,d.FLOAT[j].length=0,d.DOUBLE[j].length=0,d.BIGUINT64[j].length=0,d.BIGINT64[j].length=0,d.UINT8C[j].length=0,v[j].length=0,_[j].length=0}},1903:function(i){i.exports=a;function a(o){var s=new Float32Array(16);return s[0]=o[0],s[1]=o[1],s[2]=o[2],s[3]=o[3],s[4]=o[4],s[5]=o[5],s[6]=o[6],s[7]=o[7],s[8]=o[8],s[9]=o[9],s[10]=o[10],s[11]=o[11],s[12]=o[12],s[13]=o[13],s[14]=o[14],s[15]=o[15],s}},1944:function(i,a,o){"use strict";var s=o(5250),l=o(8210);i.exports=u;function u(c,f){for(var h=s(c[0],f[0]),d=1;d<c.length;++d)h=l(h,s(c[d],f[d]));return h}},1964:function(i,a,o){i.exports={alpha_shape:o(3502),convex_hull:o(7352),delaunay_triangulate:o(7642),gl_cone3d:o(6405),gl_error3d:o(9165),gl_line3d:o(5714),gl_mesh3d:o(7201),gl_plot3d:o(4100),gl_scatter3d:o(8418),gl_streamtube3d:o(7815),gl_surface3d:o(9499),ndarray:o(9618),ndarray_linear_interpolate:o(4317)}},2014:function(i,a,o){"use strict";"use restrict";var s=o(3105),l=o(4623);function u(g){for(var P=0,T=Math.max,z=0,O=g.length;z<O;++z)P=T(P,g[z].length);return P-1}a.dimension=u;function c(g){for(var P=-1,T=Math.max,z=0,O=g.length;z<O;++z)for(var V=g[z],G=0,Z=V.length;G<Z;++G)P=T(P,V[G]);return P+1}a.countVertices=c;function f(g){for(var P=new Array(g.length),T=0,z=g.length;T<z;++T)P[T]=g[T].slice(0);return P}a.cloneCells=f;function h(g,P){var T=g.length,z=g.length-P.length,O=Math.min;if(z)return z;switch(T){case 0:return 0;case 1:return g[0]-P[0];case 2:var N=g[0]+g[1]-P[0]-P[1];return N||O(g[0],g[1])-O(P[0],P[1]);case 3:var V=g[0]+g[1],G=P[0]+P[1];if(N=V+g[2]-(G+P[2]),N)return N;var Z=O(g[0],g[1]),j=O(P[0],P[1]),N=O(Z,g[2])-O(j,P[2]);return N||O(Z+g[2],V)-O(j+P[2],G);default:var H=g.slice(0);H.sort();var te=P.slice(0);te.sort();for(var oe=0;oe<T;++oe)if(z=H[oe]-te[oe],z)return z;return 0}}a.compareCells=h;function d(g,P){return h(g[0],P[0])}function v(g,P){if(P){for(var T=g.length,z=new Array(T),O=0;O<T;++O)z[O]=[g[O],P[O]];z.sort(d);for(var O=0;O<T;++O)g[O]=z[O][0],P[O]=z[O][1];return g}else return g.sort(h),g}a.normalize=v;function _(g){if(g.length===0)return[];for(var P=1,T=g.length,z=1;z<T;++z){var O=g[z];if(h(O,g[z-1])){if(z===P){P++;continue}g[P++]=O}}return g.length=P,g}a.unique=_;function b(g,P){for(var T=0,z=g.length-1,O=-1;T<=z;){var V=T+z>>1,G=h(g[V],P);G<=0?(G===0&&(O=V),T=V+1):G>0&&(z=V-1)}return O}a.findCell=b;function p(g,P){for(var T=new Array(g.length),z=0,O=T.length;z<O;++z)T[z]=[];for(var V=[],z=0,G=P.length;z<G;++z)for(var Z=P[z],j=Z.length,N=1,H=1<<j;N<H;++N){V.length=s.popCount(N);for(var te=0,oe=0;oe<j;++oe)N&1<<oe&&(V[te++]=Z[oe]);var _e=b(g,V);if(!(_e<0))for(;T[_e++].push(z),!(_e>=g.length||h(g[_e],V)!==0););}return T}a.incidence=p;function k(g,P){if(!P)return p(_(S(g,0)),g,0);for(var T=new Array(P),z=0;z<P;++z)T[z]=[];for(var z=0,O=g.length;z<O;++z)for(var V=g[z],G=0,Z=V.length;G<Z;++G)T[V[G]].push(z);return T}a.dual=k;function E(g){for(var P=[],T=0,z=g.length;T<z;++T)for(var O=g[T],V=O.length|0,G=1,Z=1<<V;G<Z;++G){for(var j=[],N=0;N<V;++N)G>>>N&1&&j.push(O[N]);P.push(j)}return v(P)}a.explode=E;function S(g,P){if(P<0)return[];for(var T=[],z=(1<<P+1)-1,O=0;O<g.length;++O)for(var V=g[O],G=z;G<1<<V.length;G=s.nextCombination(G)){for(var Z=new Array(P+1),j=0,N=0;N<V.length;++N)G&1<<N&&(Z[j++]=V[N]);T.push(Z)}return v(T)}a.skeleton=S;function L(g){for(var P=[],T=0,z=g.length;T<z;++T)for(var O=g[T],V=0,G=O.length;V<G;++V){for(var Z=new Array(O.length-1),j=0,N=0;j<G;++j)j!==V&&(Z[N++]=O[j]);P.push(Z)}return v(P)}a.boundary=L;function x(g,P){for(var T=new l(P),z=0;z<g.length;++z)for(var O=g[z],V=0;V<O.length;++V)for(var G=V+1;G<O.length;++G)T.link(O[V],O[G]);for(var Z=[],j=T.ranks,z=0;z<j.length;++z)j[z]=-1;for(var z=0;z<g.length;++z){var N=T.find(g[z][0]);j[N]<0?(j[N]=Z.length,Z.push([g[z].slice(0)])):Z[j[N]].push(g[z].slice(0))}return Z}function C(g){for(var P=_(v(S(g,0))),T=new l(P.length),z=0;z<g.length;++z)for(var O=g[z],V=0;V<O.length;++V)for(var G=b(P,[O[V]]),Z=V+1;Z<O.length;++Z)T.link(G,b(P,[O[Z]]));for(var j=[],N=T.ranks,z=0;z<N.length;++z)N[z]=-1;for(var z=0;z<g.length;++z){var H=T.find(b(P,[g[z][0]]));N[H]<0?(N[H]=j.length,j.push([g[z].slice(0)])):j[N[H]].push(g[z].slice(0))}return j}function M(g,P){return P?x(g,P):C(g)}a.connectedComponents=M},2095:function(i,a,o){"use strict";i.exports=b;var s=o(3134),l=o(3088),u=o(5085),c=o(5250),f=o(8210),h=o(1682),d=o(5609);function v(p,k){for(var E=new Array(p),S=0;S<p;++S)E[S]=k;return E}function _(p){for(var k=new Array(p),E=0;E<p;++E)k[E]=[];return k}function b(p,k){var Re=d(p,k);p=Re[0],k=Re[1];for(var E=k.length,S=p.length,L=s(p,k.length),x=0;x<E;++x)if(L[x].length%2===1)throw new Error("planar-graph-to-polyline: graph must be manifold");var C=l(p,k);function M(lt){for(var Gt=lt.length,Nt=[0],Jt=0;Jt<Gt;++Jt){var sr=k[lt[Jt]],wr=k[lt[(Jt+1)%Gt]],cr=c(-sr[0],sr[1]),$e=c(-sr[0],wr[1]),St=c(wr[0],sr[1]),Qt=c(wr[0],wr[1]);Nt=f(Nt,f(f(cr,$e),f(St,Qt)))}return Nt[Nt.length-1]>0}C=C.filter(M);for(var g=C.length,P=new Array(g),T=new Array(g),x=0;x<g;++x){P[x]=x;var z=new Array(g),O=C[x].map(function(Gt){return k[Gt]}),V=u([O]),G=0;e:for(var Z=0;Z<g;++Z)if(z[Z]=0,x!==Z){for(var j=C[Z],N=j.length,H=0;H<N;++H){var te=V(k[j[H]]);if(te!==0){te<0&&(z[Z]=1,G+=1);continue e}}z[Z]=1,G+=1}T[x]=[G,x,z]}T.sort(function(lt,Gt){return Gt[0]-lt[0]});for(var x=0;x<g;++x)for(var z=T[x],oe=z[1],_e=z[2],Z=0;Z<g;++Z)_e[Z]&&(P[Z]=oe);for(var Ee=_(g),x=0;x<g;++x)Ee[x].push(P[x]),Ee[P[x]].push(x);for(var Ce={},me=v(E,!1),x=0;x<g;++x)for(var j=C[x],N=j.length,Z=0;Z<N;++Z){var ie=j[Z],Se=j[(Z+1)%N],Le=Math.min(ie,Se)+":"+Math.max(ie,Se);if(Le in Ce){var Ae=Ce[Le];Ee[Ae].push(x),Ee[x].push(Ae),me[ie]=me[Se]=!0}else Ce[Le]=x}function Fe(lt){for(var Gt=lt.length,Nt=0;Nt<Gt;++Nt)if(!me[lt[Nt]])return!1;return!0}for(var Pe=[],ge=v(g,-1),x=0;x<g;++x)P[x]===x&&!Fe(C[x])?(Pe.push(x),ge[x]=0):ge[x]=-1;for(var Re=[];Pe.length>0;){var ce=Pe.pop(),Ze=Ee[ce];h(Ze,function(lt,Gt){return lt-Gt});var ut=Ze.length,pt=ge[ce],Zt;if(pt===0){var j=C[ce];Zt=[j]}for(var x=0;x<ut;++x){var st=Ze[x];if(!(ge[st]>=0)&&(ge[st]=pt^1,Pe.push(st),pt===0)){var j=C[st];Fe(j)||(j.reverse(),Zt.push(j))}}pt===0&&Re.push(Zt)}return Re}},2145:function(i,a){"use strict";a.uniforms=u,a.attributes=c;var o={FLOAT:"float",FLOAT_VEC2:"vec2",FLOAT_VEC3:"vec3",FLOAT_VEC4:"vec4",INT:"int",INT_VEC2:"ivec2",INT_VEC3:"ivec3",INT_VEC4:"ivec4",BOOL:"bool",BOOL_VEC2:"bvec2",BOOL_VEC3:"bvec3",BOOL_VEC4:"bvec4",FLOAT_MAT2:"mat2",FLOAT_MAT3:"mat3",FLOAT_MAT4:"mat4",SAMPLER_2D:"sampler2D",SAMPLER_CUBE:"samplerCube"},s=null;function l(f,h){if(!s){var d=Object.keys(o);s={};for(var v=0;v<d.length;++v){var _=d[v];s[f[_]]=o[_]}}return s[h]}function u(f,h){for(var d=f.getProgramParameter(h,f.ACTIVE_UNIFORMS),v=[],_=0;_<d;++_){var b=f.getActiveUniform(h,_);if(b){var p=l(f,b.type);if(b.size>1)for(var k=0;k<b.size;++k)v.push({name:b.name.replace("[0]","["+k+"]"),type:p});else v.push({name:b.name,type:p})}}return v}function c(f,h){for(var d=f.getProgramParameter(h,f.ACTIVE_ATTRIBUTES),v=[],_=0;_<d;++_){var b=f.getActiveAttrib(h,_);b&&v.push({name:b.name,type:l(f,b.type)})}return v}},2229:function(i,a,o){i.exports=o(6843)},2260:function(i,a,o){"use strict";var s=o(7766);i.exports=C;var l=null,u,c,f,h;function d(M){var g=M.getParameter(M.FRAMEBUFFER_BINDING),P=M.getParameter(M.RENDERBUFFER_BINDING),T=M.getParameter(M.TEXTURE_BINDING_2D);return[g,P,T]}function v(M,g){M.bindFramebuffer(M.FRAMEBUFFER,g[0]),M.bindRenderbuffer(M.RENDERBUFFER,g[1]),M.bindTexture(M.TEXTURE_2D,g[2])}function _(M,g){var P=M.getParameter(g.MAX_COLOR_ATTACHMENTS_WEBGL);l=new Array(P+1);for(var T=0;T<=P;++T){for(var z=new Array(P),O=0;O<T;++O)z[O]=M.COLOR_ATTACHMENT0+O;for(var O=T;O<P;++O)z[O]=M.NONE;l[T]=z}}function b(M){switch(M){case u:throw new Error("gl-fbo: Framebuffer unsupported");case c:throw new Error("gl-fbo: Framebuffer incomplete attachment");case f:throw new Error("gl-fbo: Framebuffer incomplete dimensions");case h:throw new Error("gl-fbo: Framebuffer incomplete missing attachment");default:throw new Error("gl-fbo: Framebuffer failed for unspecified reason")}}function p(M,g,P,T,z,O){if(!T)return null;var V=s(M,g,P,z,T);return V.magFilter=M.NEAREST,V.minFilter=M.NEAREST,V.mipSamples=1,V.bind(),M.framebufferTexture2D(M.FRAMEBUFFER,O,M.TEXTURE_2D,V.handle,0),V}function k(M,g,P,T,z){var O=M.createRenderbuffer();return M.bindRenderbuffer(M.RENDERBUFFER,O),M.renderbufferStorage(M.RENDERBUFFER,T,g,P),M.framebufferRenderbuffer(M.FRAMEBUFFER,z,M.RENDERBUFFER,O),O}function E(M){var g=d(M.gl),P=M.gl,T=M.handle=P.createFramebuffer(),z=M._shape[0],O=M._shape[1],V=M.color.length,G=M._ext,Z=M._useStencil,j=M._useDepth,N=M._colorType;P.bindFramebuffer(P.FRAMEBUFFER,T);for(var H=0;H<V;++H)M.color[H]=p(P,z,O,N,P.RGBA,P.COLOR_ATTACHMENT0+H);V===0?(M._color_rb=k(P,z,O,P.RGBA4,P.COLOR_ATTACHMENT0),G&&G.drawBuffersWEBGL(l[0])):V>1&&G.drawBuffersWEBGL(l[V]);var te=P.getExtension("WEBGL_depth_texture");te?Z?M.depth=p(P,z,O,te.UNSIGNED_INT_24_8_WEBGL,P.DEPTH_STENCIL,P.DEPTH_STENCIL_ATTACHMENT):j&&(M.depth=p(P,z,O,P.UNSIGNED_SHORT,P.DEPTH_COMPONENT,P.DEPTH_ATTACHMENT)):j&&Z?M._depth_rb=k(P,z,O,P.DEPTH_STENCIL,P.DEPTH_STENCIL_ATTACHMENT):j?M._depth_rb=k(P,z,O,P.DEPTH_COMPONENT16,P.DEPTH_ATTACHMENT):Z&&(M._depth_rb=k(P,z,O,P.STENCIL_INDEX,P.STENCIL_ATTACHMENT));var oe=P.checkFramebufferStatus(P.FRAMEBUFFER);if(oe!==P.FRAMEBUFFER_COMPLETE){M._destroyed=!0,P.bindFramebuffer(P.FRAMEBUFFER,null),P.deleteFramebuffer(M.handle),M.handle=null,M.depth&&(M.depth.dispose(),M.depth=null),M._depth_rb&&(P.deleteRenderbuffer(M._depth_rb),M._depth_rb=null);for(var H=0;H<M.color.length;++H)M.color[H].dispose(),M.color[H]=null;M._color_rb&&(P.deleteRenderbuffer(M._color_rb),M._color_rb=null),v(P,g),b(oe)}v(P,g)}function S(M,g,P,T,z,O,V,G){this.gl=M,this._shape=[g|0,P|0],this._destroyed=!1,this._ext=G,this.color=new Array(z);for(var Z=0;Z<z;++Z)this.color[Z]=null;this._color_rb=null,this.depth=null,this._depth_rb=null,this._colorType=T,this._useDepth=O,this._useStencil=V;var j=this,N=[g|0,P|0];Object.defineProperties(N,{0:{get:function(){return j._shape[0]},set:function(H){return j.width=H}},1:{get:function(){return j._shape[1]},set:function(H){return j.height=H}}}),this._shapeVector=N,E(this)}var L=S.prototype;function x(M,g,P){if(M._destroyed)throw new Error("gl-fbo: Can't resize destroyed FBO");if(!(M._shape[0]===g&&M._shape[1]===P)){var T=M.gl,z=T.getParameter(T.MAX_RENDERBUFFER_SIZE);if(g<0||g>z||P<0||P>z)throw new Error("gl-fbo: Can't resize FBO, invalid dimensions");M._shape[0]=g,M._shape[1]=P;for(var O=d(T),V=0;V<M.color.length;++V)M.color[V].shape=M._shape;M._color_rb&&(T.bindRenderbuffer(T.RENDERBUFFER,M._color_rb),T.renderbufferStorage(T.RENDERBUFFER,T.RGBA4,M._shape[0],M._shape[1])),M.depth&&(M.depth.shape=M._shape),M._depth_rb&&(T.bindRenderbuffer(T.RENDERBUFFER,M._depth_rb),M._useDepth&&M._useStencil?T.renderbufferStorage(T.RENDERBUFFER,T.DEPTH_STENCIL,M._shape[0],M._shape[1]):M._useDepth?T.renderbufferStorage(T.RENDERBUFFER,T.DEPTH_COMPONENT16,M._shape[0],M._shape[1]):M._useStencil&&T.renderbufferStorage(T.RENDERBUFFER,T.STENCIL_INDEX,M._shape[0],M._shape[1])),T.bindFramebuffer(T.FRAMEBUFFER,M.handle);var G=T.checkFramebufferStatus(T.FRAMEBUFFER);G!==T.FRAMEBUFFER_COMPLETE&&(M.dispose(),v(T,O),b(G)),v(T,O)}}Object.defineProperties(L,{shape:{get:function(){return this._destroyed?[0,0]:this._shapeVector},set:function(M){if(Array.isArray(M)||(M=[M|0,M|0]),M.length!==2)throw new Error("gl-fbo: Shape vector must be length 2");var g=M[0]|0,P=M[1]|0;return x(this,g,P),[g,P]},enumerable:!1},width:{get:function(){return this._destroyed?0:this._shape[0]},set:function(M){return M=M|0,x(this,M,this._shape[1]),M},enumerable:!1},height:{get:function(){return this._destroyed?0:this._shape[1]},set:function(M){return M=M|0,x(this,this._shape[0],M),M},enumerable:!1}}),L.bind=function(){if(!this._destroyed){var M=this.gl;M.bindFramebuffer(M.FRAMEBUFFER,this.handle),M.viewport(0,0,this._shape[0],this._shape[1])}},L.dispose=function(){if(!this._destroyed){this._destroyed=!0;var M=this.gl;M.deleteFramebuffer(this.handle),this.handle=null,this.depth&&(this.depth.dispose(),this.depth=null),this._depth_rb&&(M.deleteRenderbuffer(this._depth_rb),this._depth_rb=null);for(var g=0;g<this.color.length;++g)this.color[g].dispose(),this.color[g]=null;this._color_rb&&(M.deleteRenderbuffer(this._color_rb),this._color_rb=null)}};function C(M,g,P,T){u||(u=M.FRAMEBUFFER_UNSUPPORTED,c=M.FRAMEBUFFER_INCOMPLETE_ATTACHMENT,f=M.FRAMEBUFFER_INCOMPLETE_DIMENSIONS,h=M.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT);var z=M.getExtension("WEBGL_draw_buffers");if(!l&&z&&_(M,z),Array.isArray(g)&&(T=P,P=g[1]|0,g=g[0]|0),typeof g!="number")throw new Error("gl-fbo: Missing shape parameter");var O=M.getParameter(M.MAX_RENDERBUFFER_SIZE);if(g<0||g>O||P<0||P>O)throw new Error("gl-fbo: Parameters are too large for FBO");T=T||{};var V=1;if("color"in T){if(V=Math.max(T.color|0,0),V<0)throw new Error("gl-fbo: Must specify a nonnegative number of colors");if(V>1)if(z){if(V>M.getParameter(z.MAX_COLOR_ATTACHMENTS_WEBGL))throw new Error("gl-fbo: Context does not support "+V+" draw buffers")}else throw new Error("gl-fbo: Multiple draw buffer extension not supported")}var G=M.UNSIGNED_BYTE,Z=M.getExtension("OES_texture_float");if(T.float&&V>0){if(!Z)throw new Error("gl-fbo: Context does not support floating point textures");G=M.FLOAT}else T.preferFloat&&V>0&&Z&&(G=M.FLOAT);var j=!0;"depth"in T&&(j=!!T.depth);var N=!1;return"stencil"in T&&(N=!!T.stencil),new S(M,g,P,G,V,j,N,z)}},2272:function(i,a,o){"use strict";var s=o(2646)[4],l=o(2478);i.exports=c;function u(f,h,d,v,_,b){var p=h.opposite(v,_);if(!(p<0)){if(_<v){var k=v;v=_,_=k,k=b,b=p,p=k}h.isConstraint(v,_)||s(f[v],f[_],f[b],f[p])<0&&d.push(v,_)}}function c(f,h){for(var d=[],v=f.length,_=h.stars,b=0;b<v;++b)for(var p=_[b],k=1;k<p.length;k+=2){var E=p[k];if(!(E<b)&&!h.isConstraint(b,E)){for(var S=p[k-1],L=-1,x=1;x<p.length;x+=2)if(p[x-1]===E){L=p[x];break}L<0||s(f[b],f[E],f[S],f[L])<0&&d.push(b,E)}}for(;d.length>0;){for(var E=d.pop(),b=d.pop(),S=-1,L=-1,p=_[b],C=1;C<p.length;C+=2){var M=p[C-1],g=p[C];M===E?L=g:g===E&&(S=M)}S<0||L<0||s(f[b],f[E],f[S],f[L])>=0||(h.flip(b,E),u(f,h,d,S,b,L),u(f,h,d,b,L,S),u(f,h,d,L,E,S),u(f,h,d,E,S,L))}}},2334:function(i){i.exports=a;function a(o,s,l){return o[0]=Math.min(s[0],l[0]),o[1]=Math.min(s[1],l[1]),o[2]=Math.min(s[2],l[2]),o[3]=Math.min(s[3],l[3]),o}},2335:function(i){i.exports=a;function a(o){var s=new Float32Array(4);return s[0]=o[0],s[1]=o[1],s[2]=o[2],s[3]=o[3],s}},2361:function(i){var a=!1;if(typeof Float64Array!="undefined"){var o=new Float64Array(1),s=new Uint32Array(o.buffer);if(o[0]=1,a=!0,s[1]===1072693248){let k=function(L,x){return s[0]=L,s[1]=x,o[0]},E=function(L){return o[0]=L,s[0]},S=function(L){return o[0]=L,s[1]};var u=k,c=E,f=S;i.exports=function(x){return o[0]=x,[s[0],s[1]]},i.exports.pack=k,i.exports.lo=E,i.exports.hi=S}else if(s[0]===1072693248){let k=function(L,x){return s[1]=L,s[0]=x,o[0]},E=function(L){return o[0]=L,s[1]},S=function(L){return o[0]=L,s[0]};var h=k,d=E,v=S;i.exports=function(x){return o[0]=x,[s[1],s[0]]},i.exports.pack=k,i.exports.lo=E,i.exports.hi=S}else a=!1}if(!a){let k=function(L,x){return l.writeUInt32LE(L,0,!0),l.writeUInt32LE(x,4,!0),l.readDoubleLE(0,!0)},E=function(L){return l.writeDoubleLE(L,0,!0),l.readUInt32LE(0,!0)},S=function(L){return l.writeDoubleLE(L,0,!0),l.readUInt32LE(4,!0)};var _=k,b=E,p=S,l=new Buffer(8);i.exports=function(x){return l.writeDoubleLE(x,0,!0),[l.readUInt32LE(0,!0),l.readUInt32LE(4,!0)]},i.exports.pack=k,i.exports.lo=E,i.exports.hi=S}i.exports.sign=function(k){return i.exports.hi(k)>>>31},i.exports.exponent=function(k){var E=i.exports.hi(k);return(E<<1>>>21)-1023},i.exports.fraction=function(k){var E=i.exports.lo(k),S=i.exports.hi(k),L=S&(1<<20)-1;return S&2146435072&&(L+=1048576),[E,L]},i.exports.denormalized=function(k){var E=i.exports.hi(k);return!(E&2146435072)}},2408:function(i){i.exports=a;function a(o,s,l){var u=Math.sin(l),c=Math.cos(l),f=s[0],h=s[1],d=s[2],v=s[3],_=s[8],b=s[9],p=s[10],k=s[11];return s!==o&&(o[4]=s[4],o[5]=s[5],o[6]=s[6],o[7]=s[7],o[12]=s[12],o[13]=s[13],o[14]=s[14],o[15]=s[15]),o[0]=f*c-_*u,o[1]=h*c-b*u,o[2]=d*c-p*u,o[3]=v*c-k*u,o[8]=f*u+_*c,o[9]=h*u+b*c,o[10]=d*u+p*c,o[11]=v*u+k*c,o}},2419:function(i){"use strict";i.exports=a;function a(o){for(var s=1,l=1;l<o.length;++l)for(var u=0;u<l;++u)if(o[l]<o[u])s=-s;else if(o[u]===o[l])return 0;return s}},2447:function(i){i.exports=a;function a(o,s){return o[0]=Math.round(s[0]),o[1]=Math.round(s[1]),o[2]=Math.round(s[2]),o}},2455:function(i,a){"use strict";function o(){function u(h,d,v,_,b,p,k,E,S,L,x){for(var C=2*h,M=_,g=C*_;M<b;++M,g+=C){var P=p[d+g],T=p[d+g+h],z=k[M];e:for(var O=E,V=C*E;O<S;++O,V+=C){var G=L[d+V],Z=L[d+V+h],j=x[O];if(!(Z<P||T<G)){for(var N=d+1;N<h;++N){var H=p[N+g],te=p[N+h+g],oe=L[N+V],_e=L[N+h+V];if(te<oe||_e<H)continue e}var Ee=v(z,j);if(Ee!==void 0)return Ee}}}}function c(h,d,v,_,b,p,k,E,S,L,x){for(var C=2*h,M=E,g=C*E;M<S;++M,g+=C){var P=L[d+g],T=L[d+g+h],z=x[M];e:for(var O=_,V=C*_;O<b;++O,V+=C){var G=p[d+V],Z=p[d+V+h],j=k[O];if(!(T<G||Z<P)){for(var N=d+1;N<h;++N){var H=p[N+V],te=p[N+h+V],oe=L[N+g],_e=L[N+h+g];if(te<oe||_e<H)continue e}var Ee=v(j,z);if(Ee!==void 0)return Ee}}}}function f(h,d,v,_,b,p,k,E,S,L,x){return b-_>S-E?u(h,d,v,_,b,p,k,E,S,L,x):c(h,d,v,_,b,p,k,E,S,L,x)}return f}function s(){function u(v,_,b,p,k,E,S,L,x,C,M){for(var g=2*v,P=p,T=g*p;P<k;++P,T+=g){var z=E[_+T],O=E[_+T+v],V=S[P];e:for(var G=L,Z=g*L;G<x;++G,Z+=g){var j=C[_+Z],N=M[G];if(!(j<=z||O<j)){for(var H=_+1;H<v;++H){var te=E[H+T],oe=E[H+v+T],_e=C[H+Z],Ee=C[H+v+Z];if(oe<_e||Ee<te)continue e}var Ce=b(N,V);if(Ce!==void 0)return Ce}}}}function c(v,_,b,p,k,E,S,L,x,C,M){for(var g=2*v,P=p,T=g*p;P<k;++P,T+=g){var z=E[_+T],O=E[_+T+v],V=S[P];e:for(var G=L,Z=g*L;G<x;++G,Z+=g){var j=C[_+Z],N=M[G];if(!(j<z||O<j)){for(var H=_+1;H<v;++H){var te=E[H+T],oe=E[H+v+T],_e=C[H+Z],Ee=C[H+v+Z];if(oe<_e||Ee<te)continue e}var Ce=b(V,N);if(Ce!==void 0)return Ce}}}}function f(v,_,b,p,k,E,S,L,x,C,M){for(var g=2*v,P=L,T=g*L;P<x;++P,T+=g){var z=C[_+T],O=M[P];e:for(var V=p,G=g*p;V<k;++V,G+=g){var Z=E[_+G],j=E[_+G+v],N=S[V];if(!(z<=Z||j<z)){for(var H=_+1;H<v;++H){var te=E[H+G],oe=E[H+v+G],_e=C[H+T],Ee=C[H+v+T];if(oe<_e||Ee<te)continue e}var Ce=b(O,N);if(Ce!==void 0)return Ce}}}}function h(v,_,b,p,k,E,S,L,x,C,M){for(var g=2*v,P=L,T=g*L;P<x;++P,T+=g){var z=C[_+T],O=M[P];e:for(var V=p,G=g*p;V<k;++V,G+=g){var Z=E[_+G],j=E[_+G+v],N=S[V];if(!(z<Z||j<z)){for(var H=_+1;H<v;++H){var te=E[H+G],oe=E[H+v+G],_e=C[H+T],Ee=C[H+v+T];if(oe<_e||Ee<te)continue e}var Ce=b(N,O);if(Ce!==void 0)return Ce}}}}function d(v,_,b,p,k,E,S,L,x,C,M,g){return E-k>C-x?p?u(v,_,b,k,E,S,L,x,C,M,g):c(v,_,b,k,E,S,L,x,C,M,g):p?f(v,_,b,k,E,S,L,x,C,M,g):h(v,_,b,k,E,S,L,x,C,M,g)}return d}function l(u){return u?o():s()}a.partial=l(!1),a.full=l(!0)},2478:function(i){"use strict";function a(f,h,d,v,_){for(var b=_+1;v<=_;){var p=v+_>>>1,k=f[p],E=d!==void 0?d(k,h):k-h;E>=0?(b=p,_=p-1):v=p+1}return b}function o(f,h,d,v,_){for(var b=_+1;v<=_;){var p=v+_>>>1,k=f[p],E=d!==void 0?d(k,h):k-h;E>0?(b=p,_=p-1):v=p+1}return b}function s(f,h,d,v,_){for(var b=v-1;v<=_;){var p=v+_>>>1,k=f[p],E=d!==void 0?d(k,h):k-h;E<0?(b=p,v=p+1):_=p-1}return b}function l(f,h,d,v,_){for(var b=v-1;v<=_;){var p=v+_>>>1,k=f[p],E=d!==void 0?d(k,h):k-h;E<=0?(b=p,v=p+1):_=p-1}return b}function u(f,h,d,v,_){for(;v<=_;){var b=v+_>>>1,p=f[b],k=d!==void 0?d(p,h):p-h;if(k===0)return b;k<=0?v=b+1:_=b-1}return-1}function c(f,h,d,v,_,b){return typeof d=="function"?b(f,h,d,v===void 0?0:v|0,_===void 0?f.length-1:_|0):b(f,h,void 0,d===void 0?0:d|0,v===void 0?f.length-1:v|0)}i.exports={ge:function(f,h,d,v,_){return c(f,h,d,v,_,a)},gt:function(f,h,d,v,_){return c(f,h,d,v,_,o)},lt:function(f,h,d,v,_){return c(f,h,d,v,_,s)},le:function(f,h,d,v,_){return c(f,h,d,v,_,l)},eq:function(f,h,d,v,_){return c(f,h,d,v,_,u)}}},2504:function(i){i.exports=a;function a(o,s,l){var u=l[0],c=l[1],f=l[2];return o[0]=s[0]*u,o[1]=s[1]*u,o[2]=s[2]*u,o[3]=s[3]*u,o[4]=s[4]*c,o[5]=s[5]*c,o[6]=s[6]*c,o[7]=s[7]*c,o[8]=s[8]*f,o[9]=s[9]*f,o[10]=s[10]*f,o[11]=s[11]*f,o[12]=s[12],o[13]=s[13],o[14]=s[14],o[15]=s[15],o}},2538:function(i,a,o){"use strict";var s=o(8902),l=o(5542),u=o(2272),c=o(5023);i.exports=_;function f(b){return[Math.min(b[0],b[1]),Math.max(b[0],b[1])]}function h(b,p){return b[0]-p[0]||b[1]-p[1]}function d(b){return b.map(f).sort(h)}function v(b,p,k){return p in b?b[p]:k}function _(b,p,k){Array.isArray(p)?(k=k||{},p=p||[]):(k=p||{},p=[]);var E=!!v(k,"delaunay",!0),S=!!v(k,"interior",!0),L=!!v(k,"exterior",!0),x=!!v(k,"infinity",!1);if(!S&&!L||b.length===0)return[];var C=s(b,p);if(E||S!==L||x){for(var M=l(b.length,d(p)),g=0;g<C.length;++g){var P=C[g];M.addTriangle(P[0],P[1],P[2])}return E&&u(b,M),L?S?x?c(M,0,x):M.cells():c(M,1,x):c(M,-1)}else return C}},2573:function(i){i.exports=a;function a(o,s,l,u){var c=s[0],f=s[1],h=s[2],d=s[3];return o[0]=c+u*(l[0]-c),o[1]=f+u*(l[1]-f),o[2]=h+u*(l[2]-h),o[3]=d+u*(l[3]-d),o}},2613:function(i){i.exports=1e-6},2640:function(i,a,o){"use strict";var s=o(1888);i.exports=c;var l={"false,0,1":function(h,d,v,_,b){return function(k,E,S,L){var x=k.shape[0]|0,C=k.shape[1]|0,M=k.data,g=k.offset|0,P=k.stride[0]|0,T=k.stride[1]|0,z=g,O,V=-P|0,G=0,Z=-T|0,j=0,N=-P-T|0,H=0,te=P|0,oe=T-P*x|0,_e=0,Ee=0,Ce=0,me=2*x|0,ie=_(me),Se=_(me),Le=0,Ae=0,Fe=-1,Pe=-1,ge=0,Re=-x|0,ce=x|0,Ze=0,ut=-x-1|0,pt=x-1|0,Zt=0,st=0,lt=0;for(_e=0;_e<x;++_e)ie[Le++]=v(M[z],E,S,L),z+=te;if(z+=oe,C>0){if(Ee=1,ie[Le++]=v(M[z],E,S,L),z+=te,x>0)for(_e=1,O=M[z],Ae=ie[Le]=v(O,E,S,L),ge=ie[Le+Fe],Ze=ie[Le+Re],Zt=ie[Le+ut],(Ae!==ge||Ae!==Ze||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,ge,Ze,Zt,E,S,L),st=Se[Le]=Ce++),Le+=1,z+=te,_e=2;_e<x;++_e)O=M[z],Ae=ie[Le]=v(O,E,S,L),ge=ie[Le+Fe],Ze=ie[Le+Re],Zt=ie[Le+ut],(Ae!==ge||Ae!==Ze||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,ge,Ze,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==ge&&d(Se[Le+Fe],st,H,G,Zt,ge,E,S,L)),Le+=1,z+=te;for(z+=oe,Le=0,lt=Fe,Fe=Pe,Pe=lt,lt=Re,Re=ce,ce=lt,lt=ut,ut=pt,pt=lt,Ee=2;Ee<C;++Ee){if(ie[Le++]=v(M[z],E,S,L),z+=te,x>0)for(_e=1,O=M[z],Ae=ie[Le]=v(O,E,S,L),ge=ie[Le+Fe],Ze=ie[Le+Re],Zt=ie[Le+ut],(Ae!==ge||Ae!==Ze||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,ge,Ze,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==Ze&&d(Se[Le+Re],st,j,H,Ze,Zt,E,S,L)),Le+=1,z+=te,_e=2;_e<x;++_e)O=M[z],Ae=ie[Le]=v(O,E,S,L),ge=ie[Le+Fe],Ze=ie[Le+Re],Zt=ie[Le+ut],(Ae!==ge||Ae!==Ze||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,ge,Ze,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==Ze&&d(Se[Le+Re],st,j,H,Ze,Zt,E,S,L),Zt!==ge&&d(Se[Le+Fe],st,H,G,Zt,ge,E,S,L)),Le+=1,z+=te;Ee&1&&(Le=0),lt=Fe,Fe=Pe,Pe=lt,lt=Re,Re=ce,ce=lt,lt=ut,ut=pt,pt=lt,z+=oe}}b(Se),b(ie)}},"false,1,0":function(h,d,v,_,b){return function(k,E,S,L){var x=k.shape[0]|0,C=k.shape[1]|0,M=k.data,g=k.offset|0,P=k.stride[0]|0,T=k.stride[1]|0,z=g,O,V=-P|0,G=0,Z=-T|0,j=0,N=-P-T|0,H=0,te=T|0,oe=P-T*C|0,_e=0,Ee=0,Ce=0,me=2*C|0,ie=_(me),Se=_(me),Le=0,Ae=0,Fe=-1,Pe=-1,ge=0,Re=-C|0,ce=C|0,Ze=0,ut=-C-1|0,pt=C-1|0,Zt=0,st=0,lt=0;for(Ee=0;Ee<C;++Ee)ie[Le++]=v(M[z],E,S,L),z+=te;if(z+=oe,x>0){if(_e=1,ie[Le++]=v(M[z],E,S,L),z+=te,C>0)for(Ee=1,O=M[z],Ae=ie[Le]=v(O,E,S,L),Ze=ie[Le+Re],ge=ie[Le+Fe],Zt=ie[Le+ut],(Ae!==Ze||Ae!==ge||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,Ze,ge,Zt,E,S,L),st=Se[Le]=Ce++),Le+=1,z+=te,Ee=2;Ee<C;++Ee)O=M[z],Ae=ie[Le]=v(O,E,S,L),Ze=ie[Le+Re],ge=ie[Le+Fe],Zt=ie[Le+ut],(Ae!==Ze||Ae!==ge||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,Ze,ge,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==ge&&d(Se[Le+Fe],st,j,H,ge,Zt,E,S,L)),Le+=1,z+=te;for(z+=oe,Le=0,lt=Re,Re=ce,ce=lt,lt=Fe,Fe=Pe,Pe=lt,lt=ut,ut=pt,pt=lt,_e=2;_e<x;++_e){if(ie[Le++]=v(M[z],E,S,L),z+=te,C>0)for(Ee=1,O=M[z],Ae=ie[Le]=v(O,E,S,L),Ze=ie[Le+Re],ge=ie[Le+Fe],Zt=ie[Le+ut],(Ae!==Ze||Ae!==ge||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,Ze,ge,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==Ze&&d(Se[Le+Re],st,H,G,Zt,Ze,E,S,L)),Le+=1,z+=te,Ee=2;Ee<C;++Ee)O=M[z],Ae=ie[Le]=v(O,E,S,L),Ze=ie[Le+Re],ge=ie[Le+Fe],Zt=ie[Le+ut],(Ae!==Ze||Ae!==ge||Ae!==Zt)&&(G=M[z+V],j=M[z+Z],H=M[z+N],h(_e,Ee,O,G,j,H,Ae,Ze,ge,Zt,E,S,L),st=Se[Le]=Ce++,Zt!==ge&&d(Se[Le+Fe],st,j,H,ge,Zt,E,S,L),Zt!==Ze&&d(Se[Le+Re],st,H,G,Zt,Ze,E,S,L)),Le+=1,z+=te;_e&1&&(Le=0),lt=Re,Re=ce,ce=lt,lt=Fe,Fe=Pe,Pe=lt,lt=ut,ut=pt,pt=lt,z+=oe}}b(Se),b(ie)}}};function u(f,h,d,v,_,b){var p=[b,_].join(","),k=l[p];return k(f,h,d,s.mallocUint32,s.freeUint32)}function c(f){function h(E){throw new Error("ndarray-extract-contour: "+E)}typeof f!="object"&&h("Must specify arguments");var d=f.order;Array.isArray(d)||h("Must specify order");var v=f.arrayArguments||1;v<1&&h("Must have at least one array argument");var _=f.scalarArguments||0;_<0&&h("Scalar arg count must be > 0"),typeof f.vertex!="function"&&h("Must specify vertex creation function"),typeof f.cell!="function"&&h("Must specify cell creation function"),typeof f.phase!="function"&&h("Must specify phase function");for(var b=f.getters||[],p=new Array(v),k=0;k<v;++k)b.indexOf(k)>=0?p[k]=!0:p[k]=!1;return u(f.vertex,f.cell,f.phase,_,d,p)}},2642:function(i,a,o){"use strict";i.exports=u;var s=o(727);function l(c){for(var f=0,h=0;h<c.length;++h)f+=c[h];return f}function u(c,f){for(var h=f.length,d=new Array(h+1),v=0;v<h;++v){for(var _=new Array(h+1),b=0;b<=h;++b)_[b]=c[b][v];d[v]=_}d[h]=new Array(h+1);for(var v=0;v<=h;++v)d[h][v]=1;for(var p=new Array(h+1),v=0;v<h;++v)p[v]=f[v];p[h]=1;var k=s(d,p),E=l(k[h+1]);E===0&&(E=1);for(var S=new Array(h+1),v=0;v<=h;++v)S[v]=l(k[v])/E;return S}},2646:function(i,a,o){"use strict";var s=o(5250),l=o(8210),u=o(8545),c=o(3012),f=6;function h(M){var g=M===3?b:M===4?p:M===5?k:E;return g(l,u,s,c)}function d(){return 0}function v(){return 0}function _(){return 0}function b(M,g,P,T){function z(O,V,G){var Z=P(O[0],O[0]),j=T(Z,V[0]),N=T(Z,G[0]),H=P(V[0],V[0]),te=T(H,O[0]),oe=T(H,G[0]),_e=P(G[0],G[0]),Ee=T(_e,O[0]),Ce=T(_e,V[0]),me=M(g(Ce,oe),g(te,j)),ie=g(Ee,N),Se=g(me,ie);return Se[Se.length-1]}return z}function p(M,g,P,T){function z(O,V,G,Z){var j=M(P(O[0],O[0]),P(O[1],O[1])),N=T(j,V[0]),H=T(j,G[0]),te=T(j,Z[0]),oe=M(P(V[0],V[0]),P(V[1],V[1])),_e=T(oe,O[0]),Ee=T(oe,G[0]),Ce=T(oe,Z[0]),me=M(P(G[0],G[0]),P(G[1],G[1])),ie=T(me,O[0]),Se=T(me,V[0]),Le=T(me,Z[0]),Ae=M(P(Z[0],Z[0]),P(Z[1],Z[1])),Fe=T(Ae,O[0]),Pe=T(Ae,V[0]),ge=T(Ae,G[0]),Re=M(M(T(g(ge,Le),V[1]),M(T(g(Pe,Ce),-G[1]),T(g(Se,Ee),Z[1]))),M(T(g(Pe,Ce),O[1]),M(T(g(Fe,te),-V[1]),T(g(_e,N),Z[1])))),ce=M(M(T(g(ge,Le),O[1]),M(T(g(Fe,te),-G[1]),T(g(ie,H),Z[1]))),M(T(g(Se,Ee),O[1]),M(T(g(ie,H),-V[1]),T(g(_e,N),G[1])))),Ze=g(Re,ce);return Ze[Ze.length-1]}return z}function k(M,g,P,T){function z(O,V,G,Z,j){var N=M(P(O[0],O[0]),M(P(O[1],O[1]),P(O[2],O[2]))),H=T(N,V[0]),te=T(N,G[0]),oe=T(N,Z[0]),_e=T(N,j[0]),Ee=M(P(V[0],V[0]),M(P(V[1],V[1]),P(V[2],V[2]))),Ce=T(Ee,O[0]),me=T(Ee,G[0]),ie=T(Ee,Z[0]),Se=T(Ee,j[0]),Le=M(P(G[0],G[0]),M(P(G[1],G[1]),P(G[2],G[2]))),Ae=T(Le,O[0]),Fe=T(Le,V[0]),Pe=T(Le,Z[0]),ge=T(Le,j[0]),Re=M(P(Z[0],Z[0]),M(P(Z[1],Z[1]),P(Z[2],Z[2]))),ce=T(Re,O[0]),Ze=T(Re,V[0]),ut=T(Re,G[0]),pt=T(Re,j[0]),Zt=M(P(j[0],j[0]),M(P(j[1],j[1]),P(j[2],j[2]))),st=T(Zt,O[0]),lt=T(Zt,V[0]),Gt=T(Zt,G[0]),Nt=T(Zt,Z[0]),Jt=M(M(M(T(M(T(g(Nt,pt),G[1]),M(T(g(Gt,ge),-Z[1]),T(g(ut,Pe),j[1]))),V[2]),M(T(M(T(g(Nt,pt),V[1]),M(T(g(lt,Se),-Z[1]),T(g(Ze,ie),j[1]))),-G[2]),T(M(T(g(Gt,ge),V[1]),M(T(g(lt,Se),-G[1]),T(g(Fe,me),j[1]))),Z[2]))),M(T(M(T(g(ut,Pe),V[1]),M(T(g(Ze,ie),-G[1]),T(g(Fe,me),Z[1]))),-j[2]),M(T(M(T(g(Nt,pt),V[1]),M(T(g(lt,Se),-Z[1]),T(g(Ze,ie),j[1]))),O[2]),T(M(T(g(Nt,pt),O[1]),M(T(g(st,_e),-Z[1]),T(g(ce,oe),j[1]))),-V[2])))),M(M(T(M(T(g(lt,Se),O[1]),M(T(g(st,_e),-V[1]),T(g(Ce,H),j[1]))),Z[2]),M(T(M(T(g(Ze,ie),O[1]),M(T(g(ce,oe),-V[1]),T(g(Ce,H),Z[1]))),-j[2]),T(M(T(g(ut,Pe),V[1]),M(T(g(Ze,ie),-G[1]),T(g(Fe,me),Z[1]))),O[2]))),M(T(M(T(g(ut,Pe),O[1]),M(T(g(ce,oe),-G[1]),T(g(Ae,te),Z[1]))),-V[2]),M(T(M(T(g(Ze,ie),O[1]),M(T(g(ce,oe),-V[1]),T(g(Ce,H),Z[1]))),G[2]),T(M(T(g(Fe,me),O[1]),M(T(g(Ae,te),-V[1]),T(g(Ce,H),G[1]))),-Z[2]))))),sr=M(M(M(T(M(T(g(Nt,pt),G[1]),M(T(g(Gt,ge),-Z[1]),T(g(ut,Pe),j[1]))),O[2]),T(M(T(g(Nt,pt),O[1]),M(T(g(st,_e),-Z[1]),T(g(ce,oe),j[1]))),-G[2])),M(T(M(T(g(Gt,ge),O[1]),M(T(g(st,_e),-G[1]),T(g(Ae,te),j[1]))),Z[2]),T(M(T(g(ut,Pe),O[1]),M(T(g(ce,oe),-G[1]),T(g(Ae,te),Z[1]))),-j[2]))),M(M(T(M(T(g(Gt,ge),V[1]),M(T(g(lt,Se),-G[1]),T(g(Fe,me),j[1]))),O[2]),T(M(T(g(Gt,ge),O[1]),M(T(g(st,_e),-G[1]),T(g(Ae,te),j[1]))),-V[2])),M(T(M(T(g(lt,Se),O[1]),M(T(g(st,_e),-V[1]),T(g(Ce,H),j[1]))),G[2]),T(M(T(g(Fe,me),O[1]),M(T(g(Ae,te),-V[1]),T(g(Ce,H),G[1]))),-j[2])))),wr=g(Jt,sr);return wr[wr.length-1]}return z}function E(M,g,P,T){function z(O,V,G,Z,j,N){var H=M(M(P(O[0],O[0]),P(O[1],O[1])),M(P(O[2],O[2]),P(O[3],O[3]))),te=T(H,V[0]),oe=T(H,G[0]),_e=T(H,Z[0]),Ee=T(H,j[0]),Ce=T(H,N[0]),me=M(M(P(V[0],V[0]),P(V[1],V[1])),M(P(V[2],V[2]),P(V[3],V[3]))),ie=T(me,O[0]),Se=T(me,G[0]),Le=T(me,Z[0]),Ae=T(me,j[0]),Fe=T(me,N[0]),Pe=M(M(P(G[0],G[0]),P(G[1],G[1])),M(P(G[2],G[2]),P(G[3],G[3]))),ge=T(Pe,O[0]),Re=T(Pe,V[0]),ce=T(Pe,Z[0]),Ze=T(Pe,j[0]),ut=T(Pe,N[0]),pt=M(M(P(Z[0],Z[0]),P(Z[1],Z[1])),M(P(Z[2],Z[2]),P(Z[3],Z[3]))),Zt=T(pt,O[0]),st=T(pt,V[0]),lt=T(pt,G[0]),Gt=T(pt,j[0]),Nt=T(pt,N[0]),Jt=M(M(P(j[0],j[0]),P(j[1],j[1])),M(P(j[2],j[2]),P(j[3],j[3]))),sr=T(Jt,O[0]),wr=T(Jt,V[0]),cr=T(Jt,G[0]),$e=T(Jt,Z[0]),St=T(Jt,N[0]),Qt=M(M(P(N[0],N[0]),P(N[1],N[1])),M(P(N[2],N[2]),P(N[3],N[3]))),Vt=T(Qt,O[0]),_t=T(Qt,V[0]),It=T(Qt,G[0]),mt=T(Qt,Z[0]),er=T(Qt,j[0]),lr=M(M(M(T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),G[2]),T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),j[2]),T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),-N[2]))),V[3]),M(T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),V[2]),T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),j[2]),T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),-N[2]))),-G[3]),T(M(M(T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),V[2]),T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),-G[2])),M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),j[2]),T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),-N[2]))),Z[3]))),M(M(T(M(M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),V[2]),T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),-G[2])),M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),Z[2]),T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),-N[2]))),-j[3]),T(M(M(T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),V[2]),T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),-G[2])),M(T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),Z[2]),T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),-j[2]))),N[3])),M(T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),V[2]),T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),j[2]),T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),-N[2]))),O[3]),T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),j[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-N[2]))),-V[3])))),M(M(M(T(M(M(T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),j[2]),T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),-N[2]))),Z[3]),T(M(M(T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),O[2]),T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),Z[2]),T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),-N[2]))),-j[3])),M(T(M(M(T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),O[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-V[2])),M(T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),Z[2]),T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),-j[2]))),N[3]),T(M(M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),V[2]),T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),-G[2])),M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),Z[2]),T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),-N[2]))),O[3]))),M(M(T(M(M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),O[2]),T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),-G[2])),M(T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),Z[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-N[2]))),-V[3]),T(M(M(T(M(T(g(mt,Nt),V[1]),M(T(g(_t,Fe),-Z[1]),T(g(st,Le),N[1]))),O[2]),T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),Z[2]),T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),-N[2]))),G[3])),M(T(M(M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),O[2]),T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-N[2]))),-Z[3]),T(M(M(T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),O[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-V[2])),M(T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-Z[2]))),N[3]))))),Tr=M(M(M(T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),G[2]),T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),j[2]),T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),-N[2]))),O[3]),M(T(M(M(T(M(T(g(er,St),Z[1]),M(T(g(mt,Nt),-j[1]),T(g($e,Gt),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-Z[2])),M(T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),j[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-N[2]))),-G[3]),T(M(M(T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-G[2])),M(T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),j[2]),T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),-N[2]))),Z[3]))),M(M(T(M(M(T(M(T(g(mt,Nt),G[1]),M(T(g(It,ut),-Z[1]),T(g(lt,ce),N[1]))),O[2]),T(M(T(g(mt,Nt),O[1]),M(T(g(Vt,Ce),-Z[1]),T(g(Zt,_e),N[1]))),-G[2])),M(T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),Z[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-N[2]))),-j[3]),T(M(M(T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),O[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-G[2])),M(T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),Z[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-j[2]))),N[3])),M(T(M(M(T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),V[2]),T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),-G[2])),M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),j[2]),T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),-N[2]))),O[3]),T(M(M(T(M(T(g(er,St),G[1]),M(T(g(It,ut),-j[1]),T(g(cr,Ze),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-G[2])),M(T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),j[2]),T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),-N[2]))),-V[3])))),M(M(M(T(M(M(T(M(T(g(er,St),V[1]),M(T(g(_t,Fe),-j[1]),T(g(wr,Ae),N[1]))),O[2]),T(M(T(g(er,St),O[1]),M(T(g(Vt,Ce),-j[1]),T(g(sr,Ee),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),j[2]),T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),-N[2]))),G[3]),T(M(M(T(M(T(g(It,ut),V[1]),M(T(g(_t,Fe),-G[1]),T(g(Re,Se),N[1]))),O[2]),T(M(T(g(It,ut),O[1]),M(T(g(Vt,Ce),-G[1]),T(g(ge,oe),N[1]))),-V[2])),M(T(M(T(g(_t,Fe),O[1]),M(T(g(Vt,Ce),-V[1]),T(g(ie,te),N[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-N[2]))),-j[3])),M(T(M(M(T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),O[2]),T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),-V[2])),M(T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-j[2]))),N[3]),T(M(M(T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),V[2]),T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),-G[2])),M(T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),Z[2]),T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),-j[2]))),O[3]))),M(M(T(M(M(T(M(T(g($e,Gt),G[1]),M(T(g(cr,Ze),-Z[1]),T(g(lt,ce),j[1]))),O[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-G[2])),M(T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),Z[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-j[2]))),-V[3]),T(M(M(T(M(T(g($e,Gt),V[1]),M(T(g(wr,Ae),-Z[1]),T(g(st,Le),j[1]))),O[2]),T(M(T(g($e,Gt),O[1]),M(T(g(sr,Ee),-Z[1]),T(g(Zt,_e),j[1]))),-V[2])),M(T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),Z[2]),T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),-j[2]))),G[3])),M(T(M(M(T(M(T(g(cr,Ze),V[1]),M(T(g(wr,Ae),-G[1]),T(g(Re,Se),j[1]))),O[2]),T(M(T(g(cr,Ze),O[1]),M(T(g(sr,Ee),-G[1]),T(g(ge,oe),j[1]))),-V[2])),M(T(M(T(g(wr,Ae),O[1]),M(T(g(sr,Ee),-V[1]),T(g(ie,te),j[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-j[2]))),-Z[3]),T(M(M(T(M(T(g(lt,ce),V[1]),M(T(g(st,Le),-G[1]),T(g(Re,Se),Z[1]))),O[2]),T(M(T(g(lt,ce),O[1]),M(T(g(Zt,_e),-G[1]),T(g(ge,oe),Z[1]))),-V[2])),M(T(M(T(g(st,Le),O[1]),M(T(g(Zt,_e),-V[1]),T(g(ie,te),Z[1]))),G[2]),T(M(T(g(Re,Se),O[1]),M(T(g(ge,oe),-V[1]),T(g(ie,te),G[1]))),-Z[2]))),j[3]))))),Lr=g(lr,Tr);return Lr[Lr.length-1]}return z}var S=[d,v,_];function L(M){var g=S[M.length];return g||(g=S[M.length]=h(M.length)),g.apply(void 0,M)}function x(M,g,P,T,z,O,V,G){function Z(j,N,H,te,oe,_e){switch(arguments.length){case 0:case 1:return 0;case 2:return T(j,N);case 3:return z(j,N,H);case 4:return O(j,N,H,te);case 5:return V(j,N,H,te,oe);case 6:return G(j,N,H,te,oe,_e)}for(var Ee=new Array(arguments.length),Ce=0;Ce<arguments.length;++Ce)Ee[Ce]=arguments[Ce];return M(Ee)}return Z}function C(){for(;S.length<=f;)S.push(h(S.length));i.exports=x.apply(void 0,[L].concat(S));for(var M=0;M<=f;++M)i.exports[M]=S[M]}C()},2651:function(i,a,o){"use strict";var s=o(6859),l=o(2361);i.exports=u;function u(c){var f=l.exponent(c);return f<52?new s(c):new s(c*Math.pow(2,52-f)).ushln(f-52)}},2652:function(i,a,o){var s=o(4335),l=o(6864),u=o(1903),c=o(9921),f=o(7608),h=o(5665),d={length:o(1387),normalize:o(3536),dot:o(244),cross:o(5911)},v=l(),_=l(),b=[0,0,0,0],p=[[0,0,0],[0,0,0],[0,0,0]],k=[0,0,0];i.exports=function(C,M,g,P,T,z){if(M||(M=[0,0,0]),g||(g=[0,0,0]),P||(P=[0,0,0]),T||(T=[0,0,0,1]),z||(z=[0,0,0,1]),!s(v,C)||(u(_,v),_[3]=0,_[7]=0,_[11]=0,_[15]=1,Math.abs(c(_)<1e-8)))return!1;var O=v[3],V=v[7],G=v[11],Z=v[12],j=v[13],N=v[14],H=v[15];if(O!==0||V!==0||G!==0){b[0]=O,b[1]=V,b[2]=G,b[3]=H;var te=f(_,_);if(!te)return!1;h(_,_),E(T,b,_)}else T[0]=T[1]=T[2]=0,T[3]=1;if(M[0]=Z,M[1]=j,M[2]=N,S(p,v),g[0]=d.length(p[0]),d.normalize(p[0],p[0]),P[0]=d.dot(p[0],p[1]),L(p[1],p[1],p[0],1,-P[0]),g[1]=d.length(p[1]),d.normalize(p[1],p[1]),P[0]/=g[1],P[1]=d.dot(p[0],p[2]),L(p[2],p[2],p[0],1,-P[1]),P[2]=d.dot(p[1],p[2]),L(p[2],p[2],p[1],1,-P[2]),g[2]=d.length(p[2]),d.normalize(p[2],p[2]),P[1]/=g[2],P[2]/=g[2],d.cross(k,p[1],p[2]),d.dot(p[0],k)<0)for(var oe=0;oe<3;oe++)g[oe]*=-1,p[oe][0]*=-1,p[oe][1]*=-1,p[oe][2]*=-1;return z[0]=.5*Math.sqrt(Math.max(1+p[0][0]-p[1][1]-p[2][2],0)),z[1]=.5*Math.sqrt(Math.max(1-p[0][0]+p[1][1]-p[2][2],0)),z[2]=.5*Math.sqrt(Math.max(1-p[0][0]-p[1][1]+p[2][2],0)),z[3]=.5*Math.sqrt(Math.max(1+p[0][0]+p[1][1]+p[2][2],0)),p[2][1]>p[1][2]&&(z[0]=-z[0]),p[0][2]>p[2][0]&&(z[1]=-z[1]),p[1][0]>p[0][1]&&(z[2]=-z[2]),!0};function E(x,C,M){var g=C[0],P=C[1],T=C[2],z=C[3];return x[0]=M[0]*g+M[4]*P+M[8]*T+M[12]*z,x[1]=M[1]*g+M[5]*P+M[9]*T+M[13]*z,x[2]=M[2]*g+M[6]*P+M[10]*T+M[14]*z,x[3]=M[3]*g+M[7]*P+M[11]*T+M[15]*z,x}function S(x,C){x[0][0]=C[0],x[0][1]=C[1],x[0][2]=C[2],x[1][0]=C[4],x[1][1]=C[5],x[1][2]=C[6],x[2][0]=C[8],x[2][1]=C[9],x[2][2]=C[10]}function L(x,C,M,g,P){x[0]=C[0]*g+M[0]*P,x[1]=C[1]*g+M[1]*P,x[2]=C[2]*g+M[2]*P}},2653:function(i,a,o){"use strict";var s=o(3865);i.exports=l;function l(u,c){for(var f=u.length,h=new Array(f),d=0;d<f;++d)h[d]=s(u[d],c[d]);return h}},2681:function(i){i.exports=a;function a(o,s){return o[0]=Math.floor(s[0]),o[1]=Math.floor(s[1]),o[2]=Math.floor(s[2]),o}},2690:function(i,a,o){"use strict";i.exports=f;var s=o(8954),l=o(3952);function u(h,d){for(var v=h.length,_=new Array(v),b=0;b<d.length;++b)_[b]=h[d[b]];for(var p=d.length,b=0;b<v;++b)d.indexOf(b)<0&&(_[p++]=h[b]);return _}function c(h,d){for(var v=h.length,_=d.length,b=0;b<v;++b)for(var p=h[b],k=0;k<p.length;++k){var E=p[k];if(E<_)p[k]=d[E];else{E=E-_;for(var S=0;S<_;++S)E>=d[S]&&(E+=1);p[k]=E}}return h}function f(h,d){try{return s(h,!0)}catch(p){var v=l(h);if(v.length<=d)return[];var _=u(h,v),b=s(_,!0);return c(b,v)}}},2762:function(i,a,o){"use strict";var s=o(1888),l=o(5298),u=o(9618),c=["uint8","uint8_clamped","uint16","uint32","int8","int16","int32","float32"];function f(p,k,E,S,L){this.gl=p,this.type=k,this.handle=E,this.length=S,this.usage=L}var h=f.prototype;h.bind=function(){this.gl.bindBuffer(this.type,this.handle)},h.unbind=function(){this.gl.bindBuffer(this.type,null)},h.dispose=function(){this.gl.deleteBuffer(this.handle)};function d(p,k,E,S,L,x){var C=L.length*L.BYTES_PER_ELEMENT;if(x<0)return p.bufferData(k,L,S),C;if(C+x>E)throw new Error("gl-buffer: If resizing buffer, must not specify offset");return p.bufferSubData(k,x,L),E}function v(p,k){for(var E=s.malloc(p.length,k),S=p.length,L=0;L<S;++L)E[L]=p[L];return E}function _(p,k){for(var E=1,S=k.length-1;S>=0;--S){if(k[S]!==E)return!1;E*=p[S]}return!0}h.update=function(p,k){if(typeof k!="number"&&(k=-1),this.bind(),typeof p=="object"&&typeof p.shape!="undefined"){var E=p.dtype;if(c.indexOf(E)<0&&(E="float32"),this.type===this.gl.ELEMENT_ARRAY_BUFFER){var S=gl.getExtension("OES_element_index_uint");S&&E!=="uint16"?E="uint32":E="uint16"}if(E===p.dtype&&_(p.shape,p.stride))p.offset===0&&p.data.length===p.shape[0]?this.length=d(this.gl,this.type,this.length,this.usage,p.data,k):this.length=d(this.gl,this.type,this.length,this.usage,p.data.subarray(p.offset,p.shape[0]),k);else{var L=s.malloc(p.size,E),x=u(L,p.shape);l.assign(x,p),k<0?this.length=d(this.gl,this.type,this.length,this.usage,L,k):this.length=d(this.gl,this.type,this.length,this.usage,L.subarray(0,p.size),k),s.free(L)}}else if(Array.isArray(p)){var C;this.type===this.gl.ELEMENT_ARRAY_BUFFER?C=v(p,"uint16"):C=v(p,"float32"),k<0?this.length=d(this.gl,this.type,this.length,this.usage,C,k):this.length=d(this.gl,this.type,this.length,this.usage,C.subarray(0,p.length),k),s.free(C)}else if(typeof p=="object"&&typeof p.length=="number")this.length=d(this.gl,this.type,this.length,this.usage,p,k);else if(typeof p=="number"||p===void 0){if(k>=0)throw new Error("gl-buffer: Cannot specify offset when resizing buffer");p=p|0,p<=0&&(p=1),this.gl.bufferData(this.type,p|0,this.usage),this.length=p}else throw new Error("gl-buffer: Invalid data type")};function b(p,k,E,S){if(E=E||p.ARRAY_BUFFER,S=S||p.DYNAMIC_DRAW,E!==p.ARRAY_BUFFER&&E!==p.ELEMENT_ARRAY_BUFFER)throw new Error("gl-buffer: Invalid type for webgl buffer, must be either gl.ARRAY_BUFFER or gl.ELEMENT_ARRAY_BUFFER");if(S!==p.DYNAMIC_DRAW&&S!==p.STATIC_DRAW&&S!==p.STREAM_DRAW)throw new Error("gl-buffer: Invalid usage for buffer, must be either gl.DYNAMIC_DRAW, gl.STATIC_DRAW or gl.STREAM_DRAW");var L=p.createBuffer(),x=new f(p,E,L,0,S);return x.update(k),x}i.exports=b},2825:function(i){i.exports=a;function a(o,s,l){var u=new Float32Array(3);return u[0]=o,u[1]=s,u[2]=l,u}},2931:function(i,a,o){i.exports={EPSILON:o(2613),create:o(1091),clone:o(3126),angle:o(8192),fromValues:o(2825),copy:o(3990),set:o(1463),equals:o(9922),exactEquals:o(9265),add:o(5632),subtract:o(6843),sub:o(2229),multiply:o(5847),mul:o(4505),divide:o(6690),div:o(4008),min:o(8107),max:o(7417),floor:o(2681),ceil:o(9226),round:o(2447),scale:o(6621),scaleAndAdd:o(8489),distance:o(7056),dist:o(5455),squaredDistance:o(2953),sqrDist:o(6141),length:o(1387),len:o(868),squaredLength:o(3066),sqrLen:o(5486),negate:o(5093),inverse:o(811),normalize:o(3536),dot:o(244),cross:o(5911),lerp:o(6658),random:o(7636),transformMat4:o(5673),transformMat3:o(492),transformQuat:o(264),rotateX:o(6894),rotateY:o(109),rotateZ:o(8692),forEach:o(5137)}},2933:function(i){i.exports=a;function a(o,s){return o[0]=s[0],o[1]=s[1],o[2]=s[2],o[3]=s[3],o}},2953:function(i){i.exports=a;function a(o,s){var l=s[0]-o[0],u=s[1]-o[1],c=s[2]-o[2];return l*l+u*u+c*c}},2962:function(i,a,o){"use strict";var s=o(5250),l=o(8210),u=o(3012),c=o(7004),f=6;function h(S,L,x,C){return function(g){return C(S(x(g[0][0],g[1][1]),x(-g[0][1],g[1][0])))}}function d(S,L,x,C){return function(g){return C(S(L(S(x(g[1][1],g[2][2]),x(-g[1][2],g[2][1])),g[0][0]),S(L(S(x(g[1][0],g[2][2]),x(-g[1][2],g[2][0])),-g[0][1]),L(S(x(g[1][0],g[2][1]),x(-g[1][1],g[2][0])),g[0][2]))))}}function v(S,L,x,C){return function(g){return C(S(S(L(S(L(S(x(g[2][2],g[3][3]),x(-g[2][3],g[3][2])),g[1][1]),S(L(S(x(g[2][1],g[3][3]),x(-g[2][3],g[3][1])),-g[1][2]),L(S(x(g[2][1],g[3][2]),x(-g[2][2],g[3][1])),g[1][3]))),g[0][0]),L(S(L(S(x(g[2][2],g[3][3]),x(-g[2][3],g[3][2])),g[1][0]),S(L(S(x(g[2][0],g[3][3]),x(-g[2][3],g[3][0])),-g[1][2]),L(S(x(g[2][0],g[3][2]),x(-g[2][2],g[3][0])),g[1][3]))),-g[0][1])),S(L(S(L(S(x(g[2][1],g[3][3]),x(-g[2][3],g[3][1])),g[1][0]),S(L(S(x(g[2][0],g[3][3]),x(-g[2][3],g[3][0])),-g[1][1]),L(S(x(g[2][0],g[3][1]),x(-g[2][1],g[3][0])),g[1][3]))),g[0][2]),L(S(L(S(x(g[2][1],g[3][2]),x(-g[2][2],g[3][1])),g[1][0]),S(L(S(x(g[2][0],g[3][2]),x(-g[2][2],g[3][0])),-g[1][1]),L(S(x(g[2][0],g[3][1]),x(-g[2][1],g[3][0])),g[1][2]))),-g[0][3]))))}}function _(S,L,x,C){return function(g){return C(S(S(L(S(S(L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][2]),S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),-g[2][3]),L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][4]))),g[1][1]),L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][1]),S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),-g[2][3]),L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),g[2][4]))),-g[1][2])),S(L(S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),g[2][1]),S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),-g[2][2]),L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][4]))),g[1][3]),L(S(L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][1]),S(L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),-g[2][2]),L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][3]))),-g[1][4]))),g[0][0]),L(S(S(L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][2]),S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),-g[2][3]),L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][4]))),g[1][0]),L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][3]),L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),g[2][4]))),-g[1][2])),S(L(S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][2]),L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),g[2][4]))),g[1][3]),L(S(L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][0]),S(L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),-g[2][2]),L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),g[2][3]))),-g[1][4]))),-g[0][1])),S(L(S(S(L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][1]),S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),-g[2][3]),L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),g[2][4]))),g[1][0]),L(S(L(S(x(g[3][3],g[4][4]),x(-g[3][4],g[4][3])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][3]),L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),g[2][4]))),-g[1][1])),S(L(S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][4]))),g[1][3]),L(S(L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][3]))),-g[1][4]))),g[0][2]),S(L(S(S(L(S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),g[2][1]),S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),-g[2][2]),L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][4]))),g[1][0]),L(S(L(S(x(g[3][2],g[4][4]),x(-g[3][4],g[4][2])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][2]),L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),g[2][4]))),-g[1][1])),S(L(S(L(S(x(g[3][1],g[4][4]),x(-g[3][4],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][4]),x(-g[3][4],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][4]))),g[1][2]),L(S(L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][2]))),-g[1][4]))),-g[0][3]),L(S(S(L(S(L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][1]),S(L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),-g[2][2]),L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][3]))),g[1][0]),L(S(L(S(x(g[3][2],g[4][3]),x(-g[3][3],g[4][2])),g[2][0]),S(L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),-g[2][2]),L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),g[2][3]))),-g[1][1])),S(L(S(L(S(x(g[3][1],g[4][3]),x(-g[3][3],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][3]),x(-g[3][3],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][3]))),g[1][2]),L(S(L(S(x(g[3][1],g[4][2]),x(-g[3][2],g[4][1])),g[2][0]),S(L(S(x(g[3][0],g[4][2]),x(-g[3][2],g[4][0])),-g[2][1]),L(S(x(g[3][0],g[4][1]),x(-g[3][1],g[4][0])),g[2][2]))),-g[1][3]))),g[0][4])))))}}function b(S){var L=S===2?h:S===3?d:S===4?v:S===5?_:void 0;return L(l,u,s,c)}var p=[function(){return[0]},function(L){return[L[0][0]]}];function k(S,L,x,C,M,g,P,T){return function(O){switch(O.length){case 0:return S(O);case 1:return L(O);case 2:return x(O);case 3:return C(O);case 4:return M(O);case 5:return g(O)}var V=P[O.length];return V||(V=P[O.length]=T(O.length)),V(O)}}function E(){for(;p.length<f;)p.push(b(p.length));i.exports=k.apply(void 0,p.concat([p,b]));for(var S=0;S<p.length;++S)i.exports[S]=p[S]}E()},2992:function(i,a,o){var s=o(3387).sprintf,l=o(5171),u=o(1848),c=o(1085);i.exports=f;function f(h,d,v){"use strict";var _=u(d)||"of unknown name (see npm glsl-shader-name)",b="unknown type";v!==void 0&&(b=v===l.FRAGMENT_SHADER?"fragment":"vertex");for(var p=s(`Error compiling %s shader %s:
+`,b,_),k=s("%s%s",p,h),E=h.split(`
+`),S={},L=0;L<E.length;L++){var x=E[L];if(!(x===""||x==="\0")){var C=parseInt(x.split(":")[2]);if(isNaN(C))throw new Error(s("Could not parse error: %s",x));S[C]=x}}for(var M=c(d).split(`
+`),L=0;L<M.length;L++)if(!(!S[L+3]&&!S[L+2]&&!S[L+1])){var g=M[L];if(p+=g+`
+`,S[L+1]){var P=S[L+1];P=P.substr(P.split(":",3).join(":").length+1).trim(),p+=s(`^^^ %s
+
+`,P)}}return{long:p.trim(),short:k.trim()}}},3012:function(i,a,o){"use strict";var s=o(5250),l=o(9362);i.exports=u;function u(c,f){var h=c.length;if(h===1){var d=s(c[0],f);return d[0]?d:[d[1]]}var v=new Array(2*h),_=[.1,.1],b=[.1,.1],p=0;s(c[0],f,_),_[0]&&(v[p++]=_[0]);for(var k=1;k<h;++k){s(c[k],f,b);var E=_[1];l(E,b[0],_),_[0]&&(v[p++]=_[0]);var S=b[1],L=_[1],x=S+L,C=x-S,M=L-C;_[1]=x,M&&(v[p++]=M)}return _[1]&&(v[p++]=_[1]),p===0&&(v[p++]=0),v.length=p,v}},3025:function(i,a,o){i.exports=o.g.performance&&o.g.performance.now?function(){return performance.now()}:Date.now||function(){return+new Date}},3066:function(i){i.exports=a;function a(o){var s=o[0],l=o[1],u=o[2];return s*s+l*l+u*u}},3088:function(i,a,o){"use strict";i.exports=l;var s=o(3140);function l(u,c){for(var f=c.length|0,h=u.length,d=[new Array(f),new Array(f)],v=0;v<f;++v)d[0][v]=[],d[1][v]=[];for(var v=0;v<h;++v){var _=u[v];d[0][_[0]].push(_),d[1][_[1]].push(_)}for(var b=[],v=0;v<f;++v)d[0][v].length+d[1][v].length===0&&b.push([v]);function p(g,P){var T=d[P][g[P]];T.splice(T.indexOf(g),1)}function k(g,P,T){for(var z,O,V,G=0;G<2;++G)if(d[G][P].length>0){z=d[G][P][0],V=G;break}O=z[V^1];for(var Z=0;Z<2;++Z)for(var j=d[Z][P],N=0;N<j.length;++N){var H=j[N],te=H[Z^1],oe=s(c[g],c[P],c[O],c[te]);oe>0&&(z=H,O=te,V=Z)}return T||z&&p(z,V),O}function E(g,P){var T=d[P][g][0],z=[g];p(T,P);for(var O=T[P^1],V=P;;){for(;O!==g;)z.push(O),O=k(z[z.length-2],O,!1);if(d[0][g].length+d[1][g].length===0)break;var G=z[z.length-1],Z=g,j=z[1],N=k(G,Z,!0);if(s(c[G],c[Z],c[j],c[N])<0)break;z.push(g),O=k(G,Z)}return z}function S(g,P){return P[1]===P[P.length-1]}for(var v=0;v<f;++v)for(var L=0;L<2;++L){for(var x=[];d[L][v].length>0;){var C=d[0][v].length,M=E(v,L);S(x,M)?x.push.apply(x,M):(x.length>0&&b.push(x),x=M)}x.length>0&&b.push(x)}return b}},3090:function(i,a,o){"use strict";i.exports=l;var s=o(3250)[3];function l(u){var c=u.length;if(c<3){for(var k=new Array(c),f=0;f<c;++f)k[f]=f;return c===2&&u[0][0]===u[1][0]&&u[0][1]===u[1][1]?[0]:k}for(var h=new Array(c),f=0;f<c;++f)h[f]=f;h.sort(function(x,C){var M=u[x][0]-u[C][0];return M||u[x][1]-u[C][1]});for(var d=[h[0],h[1]],v=[h[0],h[1]],f=2;f<c;++f){for(var _=h[f],b=u[_],p=d.length;p>1&&s(u[d[p-2]],u[d[p-1]],b)<=0;)p-=1,d.pop();for(d.push(_),p=v.length;p>1&&s(u[v[p-2]],u[v[p-1]],b)>=0;)p-=1,v.pop();v.push(_)}for(var k=new Array(v.length+d.length-2),E=0,f=0,S=d.length;f<S;++f)k[E++]=d[f];for(var L=v.length-2;L>0;--L)k[E++]=v[L];return k}},3105:function(i,a){"use strict";"use restrict";var o=32;a.INT_BITS=o,a.INT_MAX=2147483647,a.INT_MIN=-1<<o-1,a.sign=function(u){return(u>0)-(u<0)},a.abs=function(u){var c=u>>o-1;return(u^c)-c},a.min=function(u,c){return c^(u^c)&-(u<c)},a.max=function(u,c){return u^(u^c)&-(u<c)},a.isPow2=function(u){return!(u&u-1)&&!!u},a.log2=function(u){var c,f;return c=(u>65535)<<4,u>>>=c,f=(u>255)<<3,u>>>=f,c|=f,f=(u>15)<<2,u>>>=f,c|=f,f=(u>3)<<1,u>>>=f,c|=f,c|u>>1},a.log10=function(u){return u>=1e9?9:u>=1e8?8:u>=1e7?7:u>=1e6?6:u>=1e5?5:u>=1e4?4:u>=1e3?3:u>=100?2:u>=10?1:0},a.popCount=function(u){return u=u-(u>>>1&1431655765),u=(u&858993459)+(u>>>2&858993459),(u+(u>>>4)&252645135)*16843009>>>24};function s(u){var c=32;return u&=-u,u&&c--,u&65535&&(c-=16),u&16711935&&(c-=8),u&252645135&&(c-=4),u&858993459&&(c-=2),u&1431655765&&(c-=1),c}a.countTrailingZeros=s,a.nextPow2=function(u){return u+=u===0,--u,u|=u>>>1,u|=u>>>2,u|=u>>>4,u|=u>>>8,u|=u>>>16,u+1},a.prevPow2=function(u){return u|=u>>>1,u|=u>>>2,u|=u>>>4,u|=u>>>8,u|=u>>>16,u-(u>>>1)},a.parity=function(u){return u^=u>>>16,u^=u>>>8,u^=u>>>4,u&=15,27030>>>u&1};var l=new Array(256);(function(u){for(var c=0;c<256;++c){var f=c,h=c,d=7;for(f>>>=1;f;f>>>=1)h<<=1,h|=f&1,--d;u[c]=h<<d&255}})(l),a.reverse=function(u){return l[u&255]<<24|l[u>>>8&255]<<16|l[u>>>16&255]<<8|l[u>>>24&255]},a.interleave2=function(u,c){return u&=65535,u=(u|u<<8)&16711935,u=(u|u<<4)&252645135,u=(u|u<<2)&858993459,u=(u|u<<1)&1431655765,c&=65535,c=(c|c<<8)&16711935,c=(c|c<<4)&252645135,c=(c|c<<2)&858993459,c=(c|c<<1)&1431655765,u|c<<1},a.deinterleave2=function(u,c){return u=u>>>c&1431655765,u=(u|u>>>1)&858993459,u=(u|u>>>2)&252645135,u=(u|u>>>4)&16711935,u=(u|u>>>16)&65535,u<<16>>16},a.interleave3=function(u,c,f){return u&=1023,u=(u|u<<16)&4278190335,u=(u|u<<8)&251719695,u=(u|u<<4)&3272356035,u=(u|u<<2)&1227133513,c&=1023,c=(c|c<<16)&4278190335,c=(c|c<<8)&251719695,c=(c|c<<4)&3272356035,c=(c|c<<2)&1227133513,u|=c<<1,f&=1023,f=(f|f<<16)&4278190335,f=(f|f<<8)&251719695,f=(f|f<<4)&3272356035,f=(f|f<<2)&1227133513,u|f<<2},a.deinterleave3=function(u,c){return u=u>>>c&1227133513,u=(u|u>>>2)&3272356035,u=(u|u>>>4)&251719695,u=(u|u>>>8)&4278190335,u=(u|u>>>16)&1023,u<<22>>22},a.nextCombination=function(u){var c=u|u-1;return c+1|(~c&-~c)-1>>>s(u)+1}},3126:function(i){i.exports=a;function a(o){var s=new Float32Array(3);return s[0]=o[0],s[1]=o[1],s[2]=o[2],s}},3134:function(i,a,o){"use strict";i.exports=l;var s=o(1682);function l(u,c){var f=u.length;if(typeof c!="number"){c=0;for(var h=0;h<f;++h){var d=u[h];c=Math.max(c,d[0],d[1])}c=(c|0)+1}c=c|0;for(var v=new Array(c),h=0;h<c;++h)v[h]=[];for(var h=0;h<f;++h){var d=u[h];v[d[0]].push(d[1]),v[d[1]].push(d[0])}for(var _=0;_<c;++_)s(v[_],function(b,p){return b-p});return v}},3140:function(i,a,o){"use strict";i.exports=d;var s=o(3250),l=o(8572),u=o(9362),c=o(5382),f=o(8210);function h(v,_,b){var p=u(v[0],-_[0]),k=u(v[1],-_[1]),E=u(b[0],-_[0]),S=u(b[1],-_[1]),L=f(c(p,E),c(k,S));return L[L.length-1]>=0}function d(v,_,b,p){var k=s(_,b,p);if(k===0){var E=l(s(v,_,b)),S=l(s(v,_,p));if(E===S){if(E===0){var L=h(v,_,b),x=h(v,_,p);return L===x?0:L?1:-1}return 0}else{if(S===0)return E>0||h(v,_,p)?-1:1;if(E===0)return S>0||h(v,_,b)?1:-1}return l(S-E)}var C=s(v,_,b);if(C>0)return k>0&&s(v,_,p)>0?1:-1;if(C<0)return k>0||s(v,_,p)>0?1:-1;var M=s(v,_,p);return M>0||h(v,_,b)?1:-1}},3202:function(i){i.exports=function(o,s){s||(s=[0,""]),o=String(o);var l=parseFloat(o,10);return s[0]=l,s[1]=o.match(/[\d.\-\+]*\s*(.*)/)[1]||"",s}},3233:function(i){"use strict";var a="",o;i.exports=s;function s(l,u){if(typeof l!="string")throw new TypeError("expected a string");if(u===1)return l;if(u===2)return l+l;var c=l.length*u;if(o!==l||typeof o=="undefined")o=l,a="";else if(a.length>=c)return a.substr(0,c);for(;c>a.length&&u>1;)u&1&&(a+=l),u>>=1,l+=l;return a+=l,a=a.substr(0,c),a}},3236:function(i){i.exports=function(a){typeof a=="string"&&(a=[a]);for(var o=[].slice.call(arguments,1),s=[],l=0;l<a.length-1;l++)s.push(a[l],o[l]||"");return s.push(a[l]),s.join("")}},3250:function(i,a,o){"use strict";var s=o(5250),l=o(8210),u=o(3012),c=o(8545),f=5,h=11102230246251565e-32,d=(3+16*h)*h,v=(7+56*h)*h;function _(g,P,T,z){return function(V,G,Z){var j=g(g(P(G[1],Z[0]),P(-Z[1],G[0])),g(P(V[1],G[0]),P(-G[1],V[0]))),N=g(P(V[1],Z[0]),P(-Z[1],V[0])),H=z(j,N);return H[H.length-1]}}function b(g,P,T,z){return function(V,G,Z,j){var N=g(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),G[2]),g(T(g(P(G[1],j[0]),P(-j[1],G[0])),-Z[2]),T(g(P(G[1],Z[0]),P(-Z[1],G[0])),j[2]))),g(T(g(P(G[1],j[0]),P(-j[1],G[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),j[2])))),H=g(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-Z[2]),T(g(P(V[1],Z[0]),P(-Z[1],V[0])),j[2]))),g(T(g(P(G[1],Z[0]),P(-Z[1],G[0])),V[2]),g(T(g(P(V[1],Z[0]),P(-Z[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),Z[2])))),te=z(N,H);return te[te.length-1]}}function p(g,P,T,z){return function(V,G,Z,j,N){var H=g(g(g(T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),Z[2]),g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),-j[2]),T(g(P(Z[1],j[0]),P(-j[1],Z[0])),N[2]))),G[3]),g(T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),G[2]),g(T(g(P(G[1],N[0]),P(-N[1],G[0])),-j[2]),T(g(P(G[1],j[0]),P(-j[1],G[0])),N[2]))),-Z[3]),T(g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),G[2]),g(T(g(P(G[1],N[0]),P(-N[1],G[0])),-Z[2]),T(g(P(G[1],Z[0]),P(-Z[1],G[0])),N[2]))),j[3]))),g(T(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),G[2]),g(T(g(P(G[1],j[0]),P(-j[1],G[0])),-Z[2]),T(g(P(G[1],Z[0]),P(-Z[1],G[0])),j[2]))),-N[3]),g(T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),G[2]),g(T(g(P(G[1],N[0]),P(-N[1],G[0])),-j[2]),T(g(P(G[1],j[0]),P(-j[1],G[0])),N[2]))),V[3]),T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-j[2]),T(g(P(V[1],j[0]),P(-j[1],V[0])),N[2]))),-G[3])))),g(g(T(g(T(g(P(G[1],N[0]),P(-N[1],G[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),N[2]))),j[3]),g(T(g(T(g(P(G[1],j[0]),P(-j[1],G[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),j[2]))),-N[3]),T(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),G[2]),g(T(g(P(G[1],j[0]),P(-j[1],G[0])),-Z[2]),T(g(P(G[1],Z[0]),P(-Z[1],G[0])),j[2]))),V[3]))),g(T(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-Z[2]),T(g(P(V[1],Z[0]),P(-Z[1],V[0])),j[2]))),-G[3]),g(T(g(T(g(P(G[1],j[0]),P(-j[1],G[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),j[2]))),Z[3]),T(g(T(g(P(G[1],Z[0]),P(-Z[1],G[0])),V[2]),g(T(g(P(V[1],Z[0]),P(-Z[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),Z[2]))),-j[3]))))),te=g(g(g(T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),Z[2]),g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),-j[2]),T(g(P(Z[1],j[0]),P(-j[1],Z[0])),N[2]))),V[3]),T(g(T(g(P(j[1],N[0]),P(-N[1],j[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-j[2]),T(g(P(V[1],j[0]),P(-j[1],V[0])),N[2]))),-Z[3])),g(T(g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-Z[2]),T(g(P(V[1],Z[0]),P(-Z[1],V[0])),N[2]))),j[3]),T(g(T(g(P(Z[1],j[0]),P(-j[1],Z[0])),V[2]),g(T(g(P(V[1],j[0]),P(-j[1],V[0])),-Z[2]),T(g(P(V[1],Z[0]),P(-Z[1],V[0])),j[2]))),-N[3]))),g(g(T(g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),G[2]),g(T(g(P(G[1],N[0]),P(-N[1],G[0])),-Z[2]),T(g(P(G[1],Z[0]),P(-Z[1],G[0])),N[2]))),V[3]),T(g(T(g(P(Z[1],N[0]),P(-N[1],Z[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-Z[2]),T(g(P(V[1],Z[0]),P(-Z[1],V[0])),N[2]))),-G[3])),g(T(g(T(g(P(G[1],N[0]),P(-N[1],G[0])),V[2]),g(T(g(P(V[1],N[0]),P(-N[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),N[2]))),Z[3]),T(g(T(g(P(G[1],Z[0]),P(-Z[1],G[0])),V[2]),g(T(g(P(V[1],Z[0]),P(-Z[1],V[0])),-G[2]),T(g(P(V[1],G[0]),P(-G[1],V[0])),Z[2]))),-N[3])))),oe=z(H,te);return oe[oe.length-1]}}function k(g){var P=g===3?_:g===4?b:p;return P(l,s,u,c)}var E=k(3),S=k(4),L=[function(){return 0},function(){return 0},function(P,T){return T[0]-P[0]},function(P,T,z){var O=(P[1]-z[1])*(T[0]-z[0]),V=(P[0]-z[0])*(T[1]-z[1]),G=O-V,Z;if(O>0){if(V<=0)return G;Z=O+V}else if(O<0){if(V>=0)return G;Z=-(O+V)}else return G;var j=d*Z;return G>=j||G<=-j?G:E(P,T,z)},function(P,T,z,O){var V=P[0]-O[0],G=T[0]-O[0],Z=z[0]-O[0],j=P[1]-O[1],N=T[1]-O[1],H=z[1]-O[1],te=P[2]-O[2],oe=T[2]-O[2],_e=z[2]-O[2],Ee=G*H,Ce=Z*N,me=Z*j,ie=V*H,Se=V*N,Le=G*j,Ae=te*(Ee-Ce)+oe*(me-ie)+_e*(Se-Le),Fe=(Math.abs(Ee)+Math.abs(Ce))*Math.abs(te)+(Math.abs(me)+Math.abs(ie))*Math.abs(oe)+(Math.abs(Se)+Math.abs(Le))*Math.abs(_e),Pe=v*Fe;return Ae>Pe||-Ae>Pe?Ae:S(P,T,z,O)}];function x(g){var P=L[g.length];return P||(P=L[g.length]=k(g.length)),P.apply(void 0,g)}function C(g,P,T,z,O,V,G){return function(j,N,H,te,oe){switch(arguments.length){case 0:case 1:return 0;case 2:return z(j,N);case 3:return O(j,N,H);case 4:return V(j,N,H,te);case 5:return G(j,N,H,te,oe)}for(var _e=new Array(arguments.length),Ee=0;Ee<arguments.length;++Ee)_e[Ee]=arguments[Ee];return g(_e)}}function M(){for(;L.length<=f;)L.push(k(L.length));i.exports=C.apply(void 0,[x].concat(L));for(var g=0;g<=f;++g)i.exports[g]=L[g]}M()},3327:function(i,a,o){"use strict";var s=o(216),l=o(8866);i.exports=f;function u(h){return function(){return h}}function c(h,d){for(var v=new Array(h),_=0;_<h;++_)v[_]=d;return v}function f(h,d,v,_){function b(C){return function(M,g,P){return M.getUniform(g.program,P[C])}}function p(C){return function(g){for(var P=k("",C),T=0;T<P.length;++T){var z=P[T],O=z[0],V=z[1];if(_[V]){var G=g;if(typeof O=="string"&&(O.indexOf(".")===0||O.indexOf("[")===0)){var Z=O;if(O.indexOf(".")===0&&(Z=O.slice(1)),Z.indexOf("]")===Z.length-1){var j=Z.indexOf("["),N=Z.slice(0,j),H=Z.slice(j+1,Z.length-1);G=N?g[N][H]:g[H]}else G=g[Z]}var te=v[V].type,oe;switch(te){case"bool":case"int":case"sampler2D":case"samplerCube":h.uniform1i(_[V],G);break;case"float":h.uniform1f(_[V],G);break;default:var _e=te.indexOf("vec");if(0<=_e&&_e<=1&&te.length===4+_e){if(oe=te.charCodeAt(te.length-1)-48,oe<2||oe>4)throw new l("","Invalid data type");switch(te.charAt(0)){case"b":case"i":h["uniform"+oe+"iv"](_[V],G);break;case"v":h["uniform"+oe+"fv"](_[V],G);break;default:throw new l("","Unrecognized data type for vector "+name+": "+te)}}else if(te.indexOf("mat")===0&&te.length===4){if(oe=te.charCodeAt(te.length-1)-48,oe<2||oe>4)throw new l("","Invalid uniform dimension type for matrix "+name+": "+te);h["uniformMatrix"+oe+"fv"](_[V],!1,G);break}else throw new l("","Unknown uniform data type for "+name+": "+te)}}}}}function k(C,M){if(typeof M!="object")return[[C,M]];var g=[];for(var P in M){var T=M[P],z=C;parseInt(P)+""===P?z+="["+P+"]":z+="."+P,typeof T=="object"?g.push.apply(g,k(z,T)):g.push([z,T])}return g}function E(C){switch(C){case"bool":return!1;case"int":case"sampler2D":case"samplerCube":return 0;case"float":return 0;default:var M=C.indexOf("vec");if(0<=M&&M<=1&&C.length===4+M){var g=C.charCodeAt(C.length-1)-48;if(g<2||g>4)throw new l("","Invalid data type");return C.charAt(0)==="b"?c(g,!1):c(g,0)}else if(C.indexOf("mat")===0&&C.length===4){var g=C.charCodeAt(C.length-1)-48;if(g<2||g>4)throw new l("","Invalid uniform dimension type for matrix "+name+": "+C);return c(g*g,0)}else throw new l("","Unknown uniform data type for "+name+": "+C)}}function S(C,M,g){if(typeof g=="object"){var P=L(g);Object.defineProperty(C,M,{get:u(P),set:p(g),enumerable:!0,configurable:!1})}else _[g]?Object.defineProperty(C,M,{get:b(g),set:p(g),enumerable:!0,configurable:!1}):C[M]=E(v[g].type)}function L(C){var M;if(Array.isArray(C)){M=new Array(C.length);for(var g=0;g<C.length;++g)S(M,g,C[g])}else{M={};for(var P in C)S(M,P,C[P])}return M}var x=s(v,!0);return{get:u(L(x)),set:p(x),enumerable:!0,configurable:!0}}},3349:function(i){"use strict";function a(){return function(f,h,d,v,_,b){var p=f[0],k=d[0],E=[0],S=k;v|=0;var L=0,x=k;for(L=0;L<p;++L){{var C=h[v]-b,M=h[v+S]-b;C>=0!=M>=0&&_.push(E[0]+.5+.5*(C+M)/(C-M))}v+=x,++E[0]}}}function o(){return a()}var s=o;function l(f){var h={};return function(v,_,b){var p=v.dtype,k=v.order,E=[p,k.join()].join(),S=h[E];return S||(h[E]=S=f([p,k])),S(v.shape.slice(0),v.data,v.stride,v.offset|0,_,b)}}function u(f){return l(s.bind(void 0,f))}function c(f){return u({funcName:f.funcName})}i.exports=c({funcName:"zeroCrossings"})},3352:function(i,a,o){"use strict";var s=o(2478),l=0,u=1,c=2;i.exports=P;function f(T,z,O,V,G){this.mid=T,this.left=z,this.right=O,this.leftPoints=V,this.rightPoints=G,this.count=(z?z.count:0)+(O?O.count:0)+V.length}var h=f.prototype;function d(T,z){T.mid=z.mid,T.left=z.left,T.right=z.right,T.leftPoints=z.leftPoints,T.rightPoints=z.rightPoints,T.count=z.count}function v(T,z){var O=C(z);T.mid=O.mid,T.left=O.left,T.right=O.right,T.leftPoints=O.leftPoints,T.rightPoints=O.rightPoints,T.count=O.count}function _(T,z){var O=T.intervals([]);O.push(z),v(T,O)}function b(T,z){var O=T.intervals([]),V=O.indexOf(z);return V<0?l:(O.splice(V,1),v(T,O),u)}h.intervals=function(T){return T.push.apply(T,this.leftPoints),this.left&&this.left.intervals(T),this.right&&this.right.intervals(T),T},h.insert=function(T){var z=this.count-this.leftPoints.length;if(this.count+=1,T[1]<this.mid)this.left?4*(this.left.count+1)>3*(z+1)?_(this,T):this.left.insert(T):this.left=C([T]);else if(T[0]>this.mid)this.right?4*(this.right.count+1)>3*(z+1)?_(this,T):this.right.insert(T):this.right=C([T]);else{var O=s.ge(this.leftPoints,T,L),V=s.ge(this.rightPoints,T,x);this.leftPoints.splice(O,0,T),this.rightPoints.splice(V,0,T)}},h.remove=function(T){var z=this.count-this.leftPoints;if(T[1]<this.mid){if(!this.left)return l;var O=this.right?this.right.count:0;if(4*O>3*(z-1))return b(this,T);var V=this.left.remove(T);return V===c?(this.left=null,this.count-=1,u):(V===u&&(this.count-=1),V)}else if(T[0]>this.mid){if(!this.right)return l;var G=this.left?this.left.count:0;if(4*G>3*(z-1))return b(this,T);var V=this.right.remove(T);return V===c?(this.right=null,this.count-=1,u):(V===u&&(this.count-=1),V)}else{if(this.count===1)return this.leftPoints[0]===T?c:l;if(this.leftPoints.length===1&&this.leftPoints[0]===T){if(this.left&&this.right){for(var Z=this,j=this.left;j.right;)Z=j,j=j.right;if(Z===this)j.right=this.right;else{var N=this.left,V=this.right;Z.count-=j.count,Z.right=j.left,j.left=N,j.right=V}d(this,j),this.count=(this.left?this.left.count:0)+(this.right?this.right.count:0)+this.leftPoints.length}else this.left?d(this,this.left):d(this,this.right);return u}for(var N=s.ge(this.leftPoints,T,L);N<this.leftPoints.length&&this.leftPoints[N][0]===T[0];++N)if(this.leftPoints[N]===T){this.count-=1,this.leftPoints.splice(N,1);for(var V=s.ge(this.rightPoints,T,x);V<this.rightPoints.length&&this.rightPoints[V][1]===T[1];++V)if(this.rightPoints[V]===T)return this.rightPoints.splice(V,1),u}return l}};function p(T,z,O){for(var V=0;V<T.length&&T[V][0]<=z;++V){var G=O(T[V]);if(G)return G}}function k(T,z,O){for(var V=T.length-1;V>=0&&T[V][1]>=z;--V){var G=O(T[V]);if(G)return G}}function E(T,z){for(var O=0;O<T.length;++O){var V=z(T[O]);if(V)return V}}h.queryPoint=function(T,z){if(T<this.mid){if(this.left){var O=this.left.queryPoint(T,z);if(O)return O}return p(this.leftPoints,T,z)}else if(T>this.mid){if(this.right){var O=this.right.queryPoint(T,z);if(O)return O}return k(this.rightPoints,T,z)}else return E(this.leftPoints,z)},h.queryInterval=function(T,z,O){if(T<this.mid&&this.left){var V=this.left.queryInterval(T,z,O);if(V)return V}if(z>this.mid&&this.right){var V=this.right.queryInterval(T,z,O);if(V)return V}return z<this.mid?p(this.leftPoints,z,O):T>this.mid?k(this.rightPoints,T,O):E(this.leftPoints,O)};function S(T,z){return T-z}function L(T,z){var O=T[0]-z[0];return O||T[1]-z[1]}function x(T,z){var O=T[1]-z[1];return O||T[0]-z[0]}function C(T){if(T.length===0)return null;for(var z=[],O=0;O<T.length;++O)z.push(T[O][0],T[O][1]);z.sort(S);for(var V=z[z.length>>1],G=[],Z=[],j=[],O=0;O<T.length;++O){var N=T[O];N[1]<V?G.push(N):V<N[0]?Z.push(N):j.push(N)}var H=j,te=j.slice();return H.sort(L),te.sort(x),new f(V,C(G),C(Z),H,te)}function M(T){this.root=T}var g=M.prototype;g.insert=function(T){this.root?this.root.insert(T):this.root=new f(T[0],null,null,[T],[T])},g.remove=function(T){if(this.root){var z=this.root.remove(T);return z===c&&(this.root=null),z!==l}return!1},g.queryPoint=function(T,z){if(this.root)return this.root.queryPoint(T,z)},g.queryInterval=function(T,z,O){if(T<=z&&this.root)return this.root.queryInterval(T,z,O)},Object.defineProperty(g,"count",{get:function(){return this.root?this.root.count:0}}),Object.defineProperty(g,"intervals",{get:function(){return this.root?this.root.intervals([]):[]}});function P(T){return!T||T.length===0?new M(null):new M(C(T))}},3387:function(i,a,o){var s;(function(){"use strict";var l={not_string:/[^s]/,not_bool:/[^t]/,not_type:/[^T]/,not_primitive:/[^v]/,number:/[diefg]/,numeric_arg:/[bcdiefguxX]/,json:/[j]/,not_json:/[^j]/,text:/^[^\x25]+/,modulo:/^\x25{2}/,placeholder:/^\x25(?:([1-9]\d*)\$|\(([^)]+)\))?(\+)?(0|'[^$])?(-)?(\d+)?(?:\.(\d+))?([b-gijostTuvxX])/,key:/^([a-z_][a-z_\d]*)/i,key_access:/^\.([a-z_][a-z_\d]*)/i,index_access:/^\[(\d+)\]/,sign:/^[+-]/};function u(v){return f(d(v),arguments)}function c(v,_){return u.apply(null,[v].concat(_||[]))}function f(v,_){var b=1,p=v.length,k,E="",S,L,x,C,M,g,P,T;for(S=0;S<p;S++)if(typeof v[S]=="string")E+=v[S];else if(typeof v[S]=="object"){if(x=v[S],x.keys)for(k=_[b],L=0;L<x.keys.length;L++){if(k==null)throw new Error(u('[sprintf] Cannot access property "%s" of undefined value "%s"',x.keys[L],x.keys[L-1]));k=k[x.keys[L]]}else x.param_no?k=_[x.param_no]:k=_[b++];if(l.not_type.test(x.type)&&l.not_primitive.test(x.type)&&k instanceof Function&&(k=k()),l.numeric_arg.test(x.type)&&typeof k!="number"&&isNaN(k))throw new TypeError(u("[sprintf] expecting number but found %T",k));switch(l.number.test(x.type)&&(P=k>=0),x.type){case"b":k=parseInt(k,10).toString(2);break;case"c":k=String.fromCharCode(parseInt(k,10));break;case"d":case"i":k=parseInt(k,10);break;case"j":k=JSON.stringify(k,null,x.width?parseInt(x.width):0);break;case"e":k=x.precision?parseFloat(k).toExponential(x.precision):parseFloat(k).toExponential();break;case"f":k=x.precision?parseFloat(k).toFixed(x.precision):parseFloat(k);break;case"g":k=x.precision?String(Number(k.toPrecision(x.precision))):parseFloat(k);break;case"o":k=(parseInt(k,10)>>>0).toString(8);break;case"s":k=String(k),k=x.precision?k.substring(0,x.precision):k;break;case"t":k=String(!!k),k=x.precision?k.substring(0,x.precision):k;break;case"T":k=Object.prototype.toString.call(k).slice(8,-1).toLowerCase(),k=x.precision?k.substring(0,x.precision):k;break;case"u":k=parseInt(k,10)>>>0;break;case"v":k=k.valueOf(),k=x.precision?k.substring(0,x.precision):k;break;case"x":k=(parseInt(k,10)>>>0).toString(16);break;case"X":k=(parseInt(k,10)>>>0).toString(16).toUpperCase();break}l.json.test(x.type)?E+=k:(l.number.test(x.type)&&(!P||x.sign)?(T=P?"+":"-",k=k.toString().replace(l.sign,"")):T="",M=x.pad_char?x.pad_char==="0"?"0":x.pad_char.charAt(1):" ",g=x.width-(T+k).length,C=x.width&&g>0?M.repeat(g):"",E+=x.align?T+k+C:M==="0"?T+C+k:C+T+k)}return E}var h=Object.create(null);function d(v){if(h[v])return h[v];for(var _=v,b,p=[],k=0;_;){if((b=l.text.exec(_))!==null)p.push(b[0]);else if((b=l.modulo.exec(_))!==null)p.push("%");else if((b=l.placeholder.exec(_))!==null){if(b[2]){k|=1;var E=[],S=b[2],L=[];if((L=l.key.exec(S))!==null)for(E.push(L[1]);(S=S.substring(L[0].length))!=="";)if((L=l.key_access.exec(S))!==null)E.push(L[1]);else if((L=l.index_access.exec(S))!==null)E.push(L[1]);else throw new SyntaxError("[sprintf] failed to parse named argument key");else throw new SyntaxError("[sprintf] failed to parse named argument key");b[2]=E}else k|=2;if(k===3)throw new Error("[sprintf] mixing positional and named placeholders is not (yet) supported");p.push({placeholder:b[0],param_no:b[1],keys:b[2],sign:b[3],pad_char:b[4],align:b[5],width:b[6],precision:b[7],type:b[8]})}else throw new SyntaxError("[sprintf] unexpected placeholder");_=_.substring(b[0].length)}return h[v]=p}a.sprintf=u,a.vsprintf=c,typeof window!="undefined"&&(window.sprintf=u,window.vsprintf=c,s=function(){return{sprintf:u,vsprintf:c}}.call(a,o,a,i),s!==void 0&&(i.exports=s))})()},3390:function(i){i.exports=a;function a(o,s,l,u){var c=new Float32Array(4);return c[0]=o,c[1]=s,c[2]=l,c[3]=u,c}},3436:function(i,a,o){"use strict";var s=o(3236),l=o(9405),u=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position, offset;
+attribute vec4 color;
+uniform mat4 model, view, projection;
+uniform float capSize;
+varying vec4 fragColor;
+varying vec3 fragPosition;
+
+void main() {
+  vec4 worldPosition  = model * vec4(position, 1.0);
+  worldPosition       = (worldPosition / worldPosition.w) + vec4(capSize * offset, 0.0);
+  gl_Position         = projection * (view * worldPosition);
+  fragColor           = color;
+  fragPosition        = position;
+}`]),c=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 clipBounds[2];
+uniform float opacity;
+varying vec3 fragPosition;
+varying vec4 fragColor;
+
+void main() {
+  if (
+    outOfRange(clipBounds[0], clipBounds[1], fragPosition) ||
+    fragColor.a * opacity == 0.
+  ) discard;
+
+  gl_FragColor = opacity * fragColor;
+}`]);i.exports=function(f){return l(f,u,c,null,[{name:"position",type:"vec3"},{name:"color",type:"vec4"},{name:"offset",type:"vec3"}])}},3502:function(i,a,o){i.exports=u;var s=o(5995),l=o(9127);function u(c,f){return l(s(c,f))}},3508:function(i,a,o){var s=o(6852);s=s.slice().filter(function(l){return!/^(gl\_|texture)/.test(l)}),i.exports=s.concat(["gl_VertexID","gl_InstanceID","gl_Position","gl_PointSize","gl_FragCoord","gl_FrontFacing","gl_FragDepth","gl_PointCoord","gl_MaxVertexAttribs","gl_MaxVertexUniformVectors","gl_MaxVertexOutputVectors","gl_MaxFragmentInputVectors","gl_MaxVertexTextureImageUnits","gl_MaxCombinedTextureImageUnits","gl_MaxTextureImageUnits","gl_MaxFragmentUniformVectors","gl_MaxDrawBuffers","gl_MinProgramTexelOffset","gl_MaxProgramTexelOffset","gl_DepthRangeParameters","gl_DepthRange","trunc","round","roundEven","isnan","isinf","floatBitsToInt","floatBitsToUint","intBitsToFloat","uintBitsToFloat","packSnorm2x16","unpackSnorm2x16","packUnorm2x16","unpackUnorm2x16","packHalf2x16","unpackHalf2x16","outerProduct","transpose","determinant","inverse","texture","textureSize","textureProj","textureLod","textureOffset","texelFetch","texelFetchOffset","textureProjOffset","textureLodOffset","textureProjLod","textureProjLodOffset","textureGrad","textureGradOffset","textureProjGrad","textureProjGradOffset"])},3536:function(i){i.exports=a;function a(o,s){var l=s[0],u=s[1],c=s[2],f=l*l+u*u+c*c;return f>0&&(f=1/Math.sqrt(f),o[0]=s[0]*f,o[1]=s[1]*f,o[2]=s[2]*f),o}},3545:function(i,a,o){"use strict";i.exports=f;var s=o(8105),l=s("lo<p0"),u=8;function c(h,d,v,_,b,p){for(var k=2*h,E=k*(v+1)+d,S=v+1;S<_;++S,E+=k)for(var L=b[E],x=S,C=k*(S-1);x>v&&b[C+d]>L;--x,C-=k){for(var M=C,g=C+k,P=0;P<k;++P,++M,++g){var T=b[M];b[M]=b[g],b[g]=T}var z=p[x];p[x]=p[x-1],p[x-1]=z}}function f(h,d,v,_,b,p){if(_<=v+1)return v;for(var k=v,E=_,S=_+v>>>1,L=2*h,x=S,C=b[L*S+d];k<E;){if(E-k<u){c(h,d,k,E,b,p),C=b[L*S+d];break}var M=E-k,g=Math.random()*M+k|0,P=b[L*g+d],T=Math.random()*M+k|0,z=b[L*T+d],O=Math.random()*M+k|0,V=b[L*O+d];P<=z?V>=z?(x=T,C=z):P>=V?(x=g,C=P):(x=O,C=V):z>=V?(x=T,C=z):V>=P?(x=g,C=P):(x=O,C=V);for(var j=L*(E-1),N=L*x,G=0;G<L;++G,++j,++N){var Z=b[j];b[j]=b[N],b[N]=Z}var H=p[E-1];p[E-1]=p[x],p[x]=H,x=l(h,d,k,E-1,b,p,C);for(var j=L*(E-1),N=L*x,G=0;G<L;++G,++j,++N){var Z=b[j];b[j]=b[N],b[N]=Z}var H=p[E-1];if(p[E-1]=p[x],p[x]=H,S<x){for(E=x-1;k<E&&b[L*(E-1)+d]===C;)E-=1;E+=1}else if(x<S)for(k=x+1;k<E&&b[L*k+d]===C;)k+=1;else break}return l(h,d,v,S,b,p,b[L*S+d])}},3576:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]*l[0],o[1]=s[1]*l[1],o[2]=s[2]*l[2],o[3]=s[3]*l[3],o}},3589:function(i,a,o){"use strict";i.exports=_;var s=o(2260),l=o(1888),u=o(9618),c=o(8828).nextPow2,f=function(b,p,k){for(var E=1e8,S=-1,L=-1,x=b.shape[0],C=b.shape[1],M=0;M<x;M++)for(var g=0;g<C;g++){var P=b.get(M,g,0),T=b.get(M,g,1),z=b.get(M,g,2),O=b.get(M,g,3);if(P<255||T<255||z<255||O<255){var V=p-M,G=k-g,Z=V*V+G*G;Z<E&&(E=Z,S=M,L=g)}}return[S,L,E]};function h(b,p,k,E,S){this.coord=[b,p],this.id=k,this.value=E,this.distance=S}function d(b,p,k){this.gl=b,this.fbo=p,this.buffer=k,this._readTimeout=null;var E=this;this._readCallback=function(){E.gl&&(p.bind(),b.readPixels(0,0,p.shape[0],p.shape[1],b.RGBA,b.UNSIGNED_BYTE,E.buffer),E._readTimeout=null)}}var v=d.prototype;Object.defineProperty(v,"shape",{get:function(){return this.gl?this.fbo.shape.slice():[0,0]},set:function(b){if(this.gl){this.fbo.shape=b;var p=this.fbo.shape[0],k=this.fbo.shape[1];if(k*p*4>this.buffer.length){l.free(this.buffer);for(var E=this.buffer=l.mallocUint8(c(k*p*4)),S=0;S<k*p*4;++S)E[S]=255}return b}}}),v.begin=function(){var b=this.gl,p=this.shape;b&&(this.fbo.bind(),b.clearColor(1,1,1,1),b.clear(b.COLOR_BUFFER_BIT|b.DEPTH_BUFFER_BIT))},v.end=function(){var b=this.gl;b&&(b.bindFramebuffer(b.FRAMEBUFFER,null),this._readTimeout||clearTimeout(this._readTimeout),this._readTimeout=setTimeout(this._readCallback,1))},v.query=function(b,p,k){if(!this.gl)return null;var E=this.fbo.shape.slice();b=b|0,p=p|0,typeof k!="number"&&(k=1);var S=Math.min(Math.max(b-k,0),E[0])|0,L=Math.min(Math.max(b+k,0),E[0])|0,x=Math.min(Math.max(p-k,0),E[1])|0,C=Math.min(Math.max(p+k,0),E[1])|0;if(L<=S||C<=x)return null;var M=[L-S,C-x],g=u(this.buffer,[M[0],M[1],4],[4,E[0]*4,1],4*(S+E[0]*x)),P=f(g.hi(M[0],M[1],1),k,k),T=P[0],z=P[1];if(T<0||Math.pow(this.radius,2)<P[2])return null;var O=g.get(T,z,0),V=g.get(T,z,1),G=g.get(T,z,2),Z=g.get(T,z,3);return new h(T+S|0,z+x|0,O,[V,G,Z],Math.sqrt(P[2]))},v.dispose=function(){this.gl&&(this.fbo.dispose(),l.free(this.buffer),this.gl=null,this._readTimeout&&clearTimeout(this._readTimeout))};function _(b,p){var k=p[0],E=p[1],S={},L=s(b,k,E,S),x=l.mallocUint8(k*E*4);return new d(b,L,x)}},3628:function(i,a,o){"use strict";var s=o(1338),l=o(727);function u(h,d){for(var v=0,_=h.length,b=0;b<_;++b)v+=h[b]*d[b];return v}function c(h){var d=h.length;if(d===0)return[];var v=h[0].length,_=s([h.length+1,h.length+1],1),b=s([h.length+1],1);_[d][d]=0;for(var p=0;p<d;++p){for(var k=0;k<=p;++k)_[k][p]=_[p][k]=2*u(h[p],h[k]);b[p]=u(h[p],h[p])}for(var E=l(_,b),S=0,L=E[d+1],p=0;p<L.length;++p)S+=L[p];for(var x=new Array(d),p=0;p<d;++p){for(var L=E[p],C=0,k=0;k<L.length;++k)C+=L[k];x[p]=C/S}return x}function f(h){if(h.length===0)return[];for(var d=h[0].length,v=s([d]),_=c(h),b=0;b<h.length;++b)for(var p=0;p<d;++p)v[p]+=h[b][p]*_[b];return v}f.barycenetric=c,i.exports=f},3637:function(i,a,o){"use strict";i.exports=_;var s=o(6504),l=o(8697),u=o(5572),c=o(7721),f=o(544),h=o(2653),d=o(8987);function v(b,p){return u(s(b[0],p[1]),s(b[1],p[0]))}function _(b,p,k,E){var S=f(p,b),L=f(E,k),x=v(S,L);if(c(x)===0)return null;var C=f(b,k),M=v(L,C),g=l(M,x),P=d(S,g),T=h(b,P);return T}},3642:function(i){i.exports={jet:[{index:0,rgb:[0,0,131]},{index:.125,rgb:[0,60,170]},{index:.375,rgb:[5,255,255]},{index:.625,rgb:[255,255,0]},{index:.875,rgb:[250,0,0]},{index:1,rgb:[128,0,0]}],hsv:[{index:0,rgb:[255,0,0]},{index:.169,rgb:[253,255,2]},{index:.173,rgb:[247,255,2]},{index:.337,rgb:[0,252,4]},{index:.341,rgb:[0,252,10]},{index:.506,rgb:[1,249,255]},{index:.671,rgb:[2,0,253]},{index:.675,rgb:[8,0,253]},{index:.839,rgb:[255,0,251]},{index:.843,rgb:[255,0,245]},{index:1,rgb:[255,0,6]}],hot:[{index:0,rgb:[0,0,0]},{index:.3,rgb:[230,0,0]},{index:.6,rgb:[255,210,0]},{index:1,rgb:[255,255,255]}],spring:[{index:0,rgb:[255,0,255]},{index:1,rgb:[255,255,0]}],summer:[{index:0,rgb:[0,128,102]},{index:1,rgb:[255,255,102]}],autumn:[{index:0,rgb:[255,0,0]},{index:1,rgb:[255,255,0]}],winter:[{index:0,rgb:[0,0,255]},{index:1,rgb:[0,255,128]}],bone:[{index:0,rgb:[0,0,0]},{index:.376,rgb:[84,84,116]},{index:.753,rgb:[169,200,200]},{index:1,rgb:[255,255,255]}],copper:[{index:0,rgb:[0,0,0]},{index:.804,rgb:[255,160,102]},{index:1,rgb:[255,199,127]}],greys:[{index:0,rgb:[0,0,0]},{index:1,rgb:[255,255,255]}],yignbu:[{index:0,rgb:[8,29,88]},{index:.125,rgb:[37,52,148]},{index:.25,rgb:[34,94,168]},{index:.375,rgb:[29,145,192]},{index:.5,rgb:[65,182,196]},{index:.625,rgb:[127,205,187]},{index:.75,rgb:[199,233,180]},{index:.875,rgb:[237,248,217]},{index:1,rgb:[255,255,217]}],greens:[{index:0,rgb:[0,68,27]},{index:.125,rgb:[0,109,44]},{index:.25,rgb:[35,139,69]},{index:.375,rgb:[65,171,93]},{index:.5,rgb:[116,196,118]},{index:.625,rgb:[161,217,155]},{index:.75,rgb:[199,233,192]},{index:.875,rgb:[229,245,224]},{index:1,rgb:[247,252,245]}],yiorrd:[{index:0,rgb:[128,0,38]},{index:.125,rgb:[189,0,38]},{index:.25,rgb:[227,26,28]},{index:.375,rgb:[252,78,42]},{index:.5,rgb:[253,141,60]},{index:.625,rgb:[254,178,76]},{index:.75,rgb:[254,217,118]},{index:.875,rgb:[255,237,160]},{index:1,rgb:[255,255,204]}],bluered:[{index:0,rgb:[0,0,255]},{index:1,rgb:[255,0,0]}],rdbu:[{index:0,rgb:[5,10,172]},{index:.35,rgb:[106,137,247]},{index:.5,rgb:[190,190,190]},{index:.6,rgb:[220,170,132]},{index:.7,rgb:[230,145,90]},{index:1,rgb:[178,10,28]}],picnic:[{index:0,rgb:[0,0,255]},{index:.1,rgb:[51,153,255]},{index:.2,rgb:[102,204,255]},{index:.3,rgb:[153,204,255]},{index:.4,rgb:[204,204,255]},{index:.5,rgb:[255,255,255]},{index:.6,rgb:[255,204,255]},{index:.7,rgb:[255,153,255]},{index:.8,rgb:[255,102,204]},{index:.9,rgb:[255,102,102]},{index:1,rgb:[255,0,0]}],rainbow:[{index:0,rgb:[150,0,90]},{index:.125,rgb:[0,0,200]},{index:.25,rgb:[0,25,255]},{index:.375,rgb:[0,152,255]},{index:.5,rgb:[44,255,150]},{index:.625,rgb:[151,255,0]},{index:.75,rgb:[255,234,0]},{index:.875,rgb:[255,111,0]},{index:1,rgb:[255,0,0]}],portland:[{index:0,rgb:[12,51,131]},{index:.25,rgb:[10,136,186]},{index:.5,rgb:[242,211,56]},{index:.75,rgb:[242,143,56]},{index:1,rgb:[217,30,30]}],blackbody:[{index:0,rgb:[0,0,0]},{index:.2,rgb:[230,0,0]},{index:.4,rgb:[230,210,0]},{index:.7,rgb:[255,255,255]},{index:1,rgb:[160,200,255]}],earth:[{index:0,rgb:[0,0,130]},{index:.1,rgb:[0,180,180]},{index:.2,rgb:[40,210,40]},{index:.4,rgb:[230,230,50]},{index:.6,rgb:[120,70,20]},{index:1,rgb:[255,255,255]}],electric:[{index:0,rgb:[0,0,0]},{index:.15,rgb:[30,0,100]},{index:.4,rgb:[120,0,100]},{index:.6,rgb:[160,90,0]},{index:.8,rgb:[230,200,0]},{index:1,rgb:[255,250,220]}],alpha:[{index:0,rgb:[255,255,255,0]},{index:1,rgb:[255,255,255,1]}],viridis:[{index:0,rgb:[68,1,84]},{index:.13,rgb:[71,44,122]},{index:.25,rgb:[59,81,139]},{index:.38,rgb:[44,113,142]},{index:.5,rgb:[33,144,141]},{index:.63,rgb:[39,173,129]},{index:.75,rgb:[92,200,99]},{index:.88,rgb:[170,220,50]},{index:1,rgb:[253,231,37]}],inferno:[{index:0,rgb:[0,0,4]},{index:.13,rgb:[31,12,72]},{index:.25,rgb:[85,15,109]},{index:.38,rgb:[136,34,106]},{index:.5,rgb:[186,54,85]},{index:.63,rgb:[227,89,51]},{index:.75,rgb:[249,140,10]},{index:.88,rgb:[249,201,50]},{index:1,rgb:[252,255,164]}],magma:[{index:0,rgb:[0,0,4]},{index:.13,rgb:[28,16,68]},{index:.25,rgb:[79,18,123]},{index:.38,rgb:[129,37,129]},{index:.5,rgb:[181,54,122]},{index:.63,rgb:[229,80,100]},{index:.75,rgb:[251,135,97]},{index:.88,rgb:[254,194,135]},{index:1,rgb:[252,253,191]}],plasma:[{index:0,rgb:[13,8,135]},{index:.13,rgb:[75,3,161]},{index:.25,rgb:[125,3,168]},{index:.38,rgb:[168,34,150]},{index:.5,rgb:[203,70,121]},{index:.63,rgb:[229,107,93]},{index:.75,rgb:[248,148,65]},{index:.88,rgb:[253,195,40]},{index:1,rgb:[240,249,33]}],warm:[{index:0,rgb:[125,0,179]},{index:.13,rgb:[172,0,187]},{index:.25,rgb:[219,0,170]},{index:.38,rgb:[255,0,130]},{index:.5,rgb:[255,63,74]},{index:.63,rgb:[255,123,0]},{index:.75,rgb:[234,176,0]},{index:.88,rgb:[190,228,0]},{index:1,rgb:[147,255,0]}],cool:[{index:0,rgb:[125,0,179]},{index:.13,rgb:[116,0,218]},{index:.25,rgb:[98,74,237]},{index:.38,rgb:[68,146,231]},{index:.5,rgb:[0,204,197]},{index:.63,rgb:[0,247,146]},{index:.75,rgb:[0,255,88]},{index:.88,rgb:[40,255,8]},{index:1,rgb:[147,255,0]}],"rainbow-soft":[{index:0,rgb:[125,0,179]},{index:.1,rgb:[199,0,180]},{index:.2,rgb:[255,0,121]},{index:.3,rgb:[255,108,0]},{index:.4,rgb:[222,194,0]},{index:.5,rgb:[150,255,0]},{index:.6,rgb:[0,255,55]},{index:.7,rgb:[0,246,150]},{index:.8,rgb:[50,167,222]},{index:.9,rgb:[103,51,235]},{index:1,rgb:[124,0,186]}],bathymetry:[{index:0,rgb:[40,26,44]},{index:.13,rgb:[59,49,90]},{index:.25,rgb:[64,76,139]},{index:.38,rgb:[63,110,151]},{index:.5,rgb:[72,142,158]},{index:.63,rgb:[85,174,163]},{index:.75,rgb:[120,206,163]},{index:.88,rgb:[187,230,172]},{index:1,rgb:[253,254,204]}],cdom:[{index:0,rgb:[47,15,62]},{index:.13,rgb:[87,23,86]},{index:.25,rgb:[130,28,99]},{index:.38,rgb:[171,41,96]},{index:.5,rgb:[206,67,86]},{index:.63,rgb:[230,106,84]},{index:.75,rgb:[242,149,103]},{index:.88,rgb:[249,193,135]},{index:1,rgb:[254,237,176]}],chlorophyll:[{index:0,rgb:[18,36,20]},{index:.13,rgb:[25,63,41]},{index:.25,rgb:[24,91,59]},{index:.38,rgb:[13,119,72]},{index:.5,rgb:[18,148,80]},{index:.63,rgb:[80,173,89]},{index:.75,rgb:[132,196,122]},{index:.88,rgb:[175,221,162]},{index:1,rgb:[215,249,208]}],density:[{index:0,rgb:[54,14,36]},{index:.13,rgb:[89,23,80]},{index:.25,rgb:[110,45,132]},{index:.38,rgb:[120,77,178]},{index:.5,rgb:[120,113,213]},{index:.63,rgb:[115,151,228]},{index:.75,rgb:[134,185,227]},{index:.88,rgb:[177,214,227]},{index:1,rgb:[230,241,241]}],"freesurface-blue":[{index:0,rgb:[30,4,110]},{index:.13,rgb:[47,14,176]},{index:.25,rgb:[41,45,236]},{index:.38,rgb:[25,99,212]},{index:.5,rgb:[68,131,200]},{index:.63,rgb:[114,156,197]},{index:.75,rgb:[157,181,203]},{index:.88,rgb:[200,208,216]},{index:1,rgb:[241,237,236]}],"freesurface-red":[{index:0,rgb:[60,9,18]},{index:.13,rgb:[100,17,27]},{index:.25,rgb:[142,20,29]},{index:.38,rgb:[177,43,27]},{index:.5,rgb:[192,87,63]},{index:.63,rgb:[205,125,105]},{index:.75,rgb:[216,162,148]},{index:.88,rgb:[227,199,193]},{index:1,rgb:[241,237,236]}],oxygen:[{index:0,rgb:[64,5,5]},{index:.13,rgb:[106,6,15]},{index:.25,rgb:[144,26,7]},{index:.38,rgb:[168,64,3]},{index:.5,rgb:[188,100,4]},{index:.63,rgb:[206,136,11]},{index:.75,rgb:[220,174,25]},{index:.88,rgb:[231,215,44]},{index:1,rgb:[248,254,105]}],par:[{index:0,rgb:[51,20,24]},{index:.13,rgb:[90,32,35]},{index:.25,rgb:[129,44,34]},{index:.38,rgb:[159,68,25]},{index:.5,rgb:[182,99,19]},{index:.63,rgb:[199,134,22]},{index:.75,rgb:[212,171,35]},{index:.88,rgb:[221,210,54]},{index:1,rgb:[225,253,75]}],phase:[{index:0,rgb:[145,105,18]},{index:.13,rgb:[184,71,38]},{index:.25,rgb:[186,58,115]},{index:.38,rgb:[160,71,185]},{index:.5,rgb:[110,97,218]},{index:.63,rgb:[50,123,164]},{index:.75,rgb:[31,131,110]},{index:.88,rgb:[77,129,34]},{index:1,rgb:[145,105,18]}],salinity:[{index:0,rgb:[42,24,108]},{index:.13,rgb:[33,50,162]},{index:.25,rgb:[15,90,145]},{index:.38,rgb:[40,118,137]},{index:.5,rgb:[59,146,135]},{index:.63,rgb:[79,175,126]},{index:.75,rgb:[120,203,104]},{index:.88,rgb:[193,221,100]},{index:1,rgb:[253,239,154]}],temperature:[{index:0,rgb:[4,35,51]},{index:.13,rgb:[23,51,122]},{index:.25,rgb:[85,59,157]},{index:.38,rgb:[129,79,143]},{index:.5,rgb:[175,95,130]},{index:.63,rgb:[222,112,101]},{index:.75,rgb:[249,146,66]},{index:.88,rgb:[249,196,65]},{index:1,rgb:[232,250,91]}],turbidity:[{index:0,rgb:[34,31,27]},{index:.13,rgb:[65,50,41]},{index:.25,rgb:[98,69,52]},{index:.38,rgb:[131,89,57]},{index:.5,rgb:[161,112,59]},{index:.63,rgb:[185,140,66]},{index:.75,rgb:[202,174,88]},{index:.88,rgb:[216,209,126]},{index:1,rgb:[233,246,171]}],"velocity-blue":[{index:0,rgb:[17,32,64]},{index:.13,rgb:[35,52,116]},{index:.25,rgb:[29,81,156]},{index:.38,rgb:[31,113,162]},{index:.5,rgb:[50,144,169]},{index:.63,rgb:[87,173,176]},{index:.75,rgb:[149,196,189]},{index:.88,rgb:[203,221,211]},{index:1,rgb:[254,251,230]}],"velocity-green":[{index:0,rgb:[23,35,19]},{index:.13,rgb:[24,64,38]},{index:.25,rgb:[11,95,45]},{index:.38,rgb:[39,123,35]},{index:.5,rgb:[95,146,12]},{index:.63,rgb:[152,165,18]},{index:.75,rgb:[201,186,69]},{index:.88,rgb:[233,216,137]},{index:1,rgb:[255,253,205]}],cubehelix:[{index:0,rgb:[0,0,0]},{index:.07,rgb:[22,5,59]},{index:.13,rgb:[60,4,105]},{index:.2,rgb:[109,1,135]},{index:.27,rgb:[161,0,147]},{index:.33,rgb:[210,2,142]},{index:.4,rgb:[251,11,123]},{index:.47,rgb:[255,29,97]},{index:.53,rgb:[255,54,69]},{index:.6,rgb:[255,85,46]},{index:.67,rgb:[255,120,34]},{index:.73,rgb:[255,157,37]},{index:.8,rgb:[241,191,57]},{index:.87,rgb:[224,220,93]},{index:.93,rgb:[218,241,142]},{index:1,rgb:[227,253,198]}]}},3711:function(i,a,o){"use strict";i.exports=d;var s=o(2640),l=o(781),u={"2d":function(v,_,b){var p=v({order:_,scalarArguments:3,getters:b==="generic"?[0]:void 0,phase:function(E,S,L,x){return E>x|0},vertex:function(E,S,L,x,C,M,g,P,T,z,O,V,G){var Z=(g<<0)+(P<<1)+(T<<2)+(z<<3)|0;if(!(Z===0||Z===15))switch(Z){case 0:O.push([E-.5,S-.5]);break;case 1:O.push([E-.25-.25*(x+L-2*G)/(L-x),S-.25-.25*(C+L-2*G)/(L-C)]);break;case 2:O.push([E-.75-.25*(-x-L+2*G)/(x-L),S-.25-.25*(M+x-2*G)/(x-M)]);break;case 3:O.push([E-.5,S-.5-.5*(C+L+M+x-4*G)/(L-C+x-M)]);break;case 4:O.push([E-.25-.25*(M+C-2*G)/(C-M),S-.75-.25*(-C-L+2*G)/(C-L)]);break;case 5:O.push([E-.5-.5*(x+L+M+C-4*G)/(L-x+C-M),S-.5]);break;case 6:O.push([E-.5-.25*(-x-L+M+C)/(x-L+C-M),S-.5-.25*(-C-L+M+x)/(C-L+x-M)]);break;case 7:O.push([E-.75-.25*(M+C-2*G)/(C-M),S-.75-.25*(M+x-2*G)/(x-M)]);break;case 8:O.push([E-.75-.25*(-M-C+2*G)/(M-C),S-.75-.25*(-M-x+2*G)/(M-x)]);break;case 9:O.push([E-.5-.25*(x+L+-M-C)/(L-x+M-C),S-.5-.25*(C+L+-M-x)/(L-C+M-x)]);break;case 10:O.push([E-.5-.5*(-x-L+-M-C+4*G)/(x-L+M-C),S-.5]);break;case 11:O.push([E-.25-.25*(-M-C+2*G)/(M-C),S-.75-.25*(C+L-2*G)/(L-C)]);break;case 12:O.push([E-.5,S-.5-.5*(-C-L+-M-x+4*G)/(C-L+M-x)]);break;case 13:O.push([E-.75-.25*(x+L-2*G)/(L-x),S-.25-.25*(-M-x+2*G)/(M-x)]);break;case 14:O.push([E-.25-.25*(-x-L+2*G)/(x-L),S-.25-.25*(-C-L+2*G)/(C-L)]);break;case 15:O.push([E-.5,S-.5]);break}},cell:function(E,S,L,x,C,M,g,P,T){C?P.push([E,S]):P.push([S,E])}});return function(k,E){var S=[],L=[];return p(k,S,L,E),{positions:S,cells:L}}}};function c(v,_){var b=v.length+"d",p=u[b];if(p)return p(s,v,_)}function f(v,_){for(var b=l(v,_),p=b.length,k=new Array(p),E=new Array(p),S=0;S<p;++S)k[S]=[b[S]],E[S]=[S];return{positions:k,cells:E}}var h={};function d(v,k){if(v.dimension<=0)return{positions:[],cells:[]};if(v.dimension===1)return f(v,k);var b=v.order.join()+"-"+v.dtype,p=h[b],k=+k||0;return p||(p=h[b]=c(v.order,v.dtype)),p(v,k)}},3750:function(i){i.exports=a;function a(o,s){return o[0]*s[0]+o[1]*s[1]+o[2]*s[2]+o[3]*s[3]}},3778:function(i,a){a.read=function(o,s,l,u,c){var f,h,d=c*8-u-1,v=(1<<d)-1,_=v>>1,b=-7,p=l?c-1:0,k=l?-1:1,E=o[s+p];for(p+=k,f=E&(1<<-b)-1,E>>=-b,b+=d;b>0;f=f*256+o[s+p],p+=k,b-=8);for(h=f&(1<<-b)-1,f>>=-b,b+=u;b>0;h=h*256+o[s+p],p+=k,b-=8);if(f===0)f=1-_;else{if(f===v)return h?NaN:(E?-1:1)*(1/0);h=h+Math.pow(2,u),f=f-_}return(E?-1:1)*h*Math.pow(2,f-u)},a.write=function(o,s,l,u,c,f){var h,d,v,_=f*8-c-1,b=(1<<_)-1,p=b>>1,k=c===23?Math.pow(2,-24)-Math.pow(2,-77):0,E=u?0:f-1,S=u?1:-1,L=s<0||s===0&&1/s<0?1:0;for(s=Math.abs(s),isNaN(s)||s===1/0?(d=isNaN(s)?1:0,h=b):(h=Math.floor(Math.log(s)/Math.LN2),s*(v=Math.pow(2,-h))<1&&(h--,v*=2),h+p>=1?s+=k/v:s+=k*Math.pow(2,1-p),s*v>=2&&(h++,v/=2),h+p>=b?(d=0,h=b):h+p>=1?(d=(s*v-1)*Math.pow(2,c),h=h+p):(d=s*Math.pow(2,p-1)*Math.pow(2,c),h=0));c>=8;o[l+E]=d&255,E+=S,d/=256,c-=8);for(h=h<<c|d,_+=c;_>0;o[l+E]=h&255,E+=S,h/=256,_-=8);o[l+E-S]|=L*128}},3788:function(i,a,o){"use strict";var s=o(8507),l=o(2419);i.exports=u;function u(c,f){return s(c,f)||l(c)-l(f)}},3837:function(i,a,o){"use strict";i.exports=O;var s=o(4935),l=o(501),u=o(5304),c=o(6429),f=o(6444),h=new Float32Array([1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1]),d=ArrayBuffer,v=DataView;function _(V){return d.isView(V)&&!(V instanceof v)}function b(V){return Array.isArray(V)||_(V)}function p(V,G){return V[0]=G[0],V[1]=G[1],V[2]=G[2],V}function k(V){this.gl=V,this.pixelRatio=1,this.bounds=[[-10,-10,-10],[10,10,10]],this.ticks=[[],[],[]],this.autoTicks=!0,this.tickSpacing=[1,1,1],this.tickEnable=[!0,!0,!0],this.tickFont=["sans-serif","sans-serif","sans-serif"],this.tickFontStyle=["normal","normal","normal"],this.tickFontWeight=["normal","normal","normal"],this.tickFontVariant=["normal","normal","normal"],this.tickSize=[12,12,12],this.tickAngle=[0,0,0],this.tickAlign=["auto","auto","auto"],this.tickColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.tickPad=[10,10,10],this.lastCubeProps={cubeEdges:[0,0,0],axis:[0,0,0]},this.labels=["x","y","z"],this.labelEnable=[!0,!0,!0],this.labelFont=["sans-serif","sans-serif","sans-serif"],this.labelFontStyle=["normal","normal","normal"],this.labelFontWeight=["normal","normal","normal"],this.labelFontVariant=["normal","normal","normal"],this.labelSize=[20,20,20],this.labelAngle=[0,0,0],this.labelAlign=["auto","auto","auto"],this.labelColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.labelPad=[10,10,10],this.lineEnable=[!0,!0,!0],this.lineMirror=[!1,!1,!1],this.lineWidth=[1,1,1],this.lineColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.lineTickEnable=[!0,!0,!0],this.lineTickMirror=[!1,!1,!1],this.lineTickLength=[0,0,0],this.lineTickWidth=[1,1,1],this.lineTickColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.gridEnable=[!0,!0,!0],this.gridWidth=[1,1,1],this.gridColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.zeroEnable=[!0,!0,!0],this.zeroLineColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.zeroLineWidth=[2,2,2],this.backgroundEnable=[!1,!1,!1],this.backgroundColor=[[.8,.8,.8,.5],[.8,.8,.8,.5],[.8,.8,.8,.5]],this._firstInit=!0,this._text=null,this._lines=null,this._background=u(V)}var E=k.prototype;E.update=function(V){V=V||{};function G(Ae,Fe,Pe){if(Pe in V){var ge=V[Pe],Re=this[Pe],ce;(Ae?b(ge)&&b(ge[0]):b(ge))?this[Pe]=ce=[Fe(ge[0]),Fe(ge[1]),Fe(ge[2])]:this[Pe]=ce=[Fe(ge),Fe(ge),Fe(ge)];for(var Ze=0;Ze<3;++Ze)if(ce[Ze]!==Re[Ze])return!0}return!1}var Z=G.bind(this,!1,Number),j=G.bind(this,!1,Boolean),N=G.bind(this,!1,String),H=G.bind(this,!0,function(Ae){if(b(Ae)){if(Ae.length===3)return[+Ae[0],+Ae[1],+Ae[2],1];if(Ae.length===4)return[+Ae[0],+Ae[1],+Ae[2],+Ae[3]]}return[0,0,0,1]}),te,oe=!1,_e=!1;if("bounds"in V)for(var Ee=V.bounds,Ce=0;Ce<2;++Ce)for(var me=0;me<3;++me)Ee[Ce][me]!==this.bounds[Ce][me]&&(_e=!0),this.bounds[Ce][me]=Ee[Ce][me];if("ticks"in V){te=V.ticks,oe=!0,this.autoTicks=!1;for(var Ce=0;Ce<3;++Ce)this.tickSpacing[Ce]=0}else Z("tickSpacing")&&(this.autoTicks=!0,_e=!0);if(this._firstInit&&("ticks"in V||"tickSpacing"in V||(this.autoTicks=!0),_e=!0,oe=!0,this._firstInit=!1),_e&&this.autoTicks&&(te=f.create(this.bounds,this.tickSpacing),oe=!0),oe){for(var Ce=0;Ce<3;++Ce)te[Ce].sort(function(Fe,Pe){return Fe.x-Pe.x});f.equal(te,this.ticks)?oe=!1:this.ticks=te}j("tickEnable"),N("tickFont")&&(oe=!0),N("tickFontStyle")&&(oe=!0),N("tickFontWeight")&&(oe=!0),N("tickFontVariant")&&(oe=!0),Z("tickSize"),Z("tickAngle"),Z("tickPad"),H("tickColor");var ie=N("labels");N("labelFont")&&(ie=!0),N("labelFontStyle")&&(ie=!0),N("labelFontWeight")&&(ie=!0),N("labelFontVariant")&&(ie=!0),j("labelEnable"),Z("labelSize"),Z("labelPad"),H("labelColor"),j("lineEnable"),j("lineMirror"),Z("lineWidth"),H("lineColor"),j("lineTickEnable"),j("lineTickMirror"),Z("lineTickLength"),Z("lineTickWidth"),H("lineTickColor"),j("gridEnable"),Z("gridWidth"),H("gridColor"),j("zeroEnable"),H("zeroLineColor"),Z("zeroLineWidth"),j("backgroundEnable"),H("backgroundColor");var Se=[{family:this.labelFont[0],style:this.labelFontStyle[0],weight:this.labelFontWeight[0],variant:this.labelFontVariant[0]},{family:this.labelFont[1],style:this.labelFontStyle[1],weight:this.labelFontWeight[1],variant:this.labelFontVariant[1]},{family:this.labelFont[2],style:this.labelFontStyle[2],weight:this.labelFontWeight[2],variant:this.labelFontVariant[2]}],Le=[{family:this.tickFont[0],style:this.tickFontStyle[0],weight:this.tickFontWeight[0],variant:this.tickFontVariant[0]},{family:this.tickFont[1],style:this.tickFontStyle[1],weight:this.tickFontWeight[1],variant:this.tickFontVariant[1]},{family:this.tickFont[2],style:this.tickFontStyle[2],weight:this.tickFontWeight[2],variant:this.tickFontVariant[2]}];this._text?this._text&&(ie||oe)&&this._text.update(this.bounds,this.labels,Se,this.ticks,Le):this._text=s(this.gl,this.bounds,this.labels,Se,this.ticks,Le),this._lines&&oe&&(this._lines.dispose(),this._lines=null),this._lines||(this._lines=l(this.gl,this.bounds,this.ticks))};function S(){this.primalOffset=[0,0,0],this.primalMinor=[0,0,0],this.mirrorOffset=[0,0,0],this.mirrorMinor=[0,0,0]}var L=[new S,new S,new S];function x(V,G,Z,j,N){for(var H=V.primalOffset,te=V.primalMinor,oe=V.mirrorOffset,_e=V.mirrorMinor,Ee=j[G],Ce=0;Ce<3;++Ce)if(G!==Ce){var me=H,ie=oe,Se=te,Le=_e;Ee&1<<Ce&&(me=oe,ie=H,Se=_e,Le=te),me[Ce]=Z[0][Ce],ie[Ce]=Z[1][Ce],N[Ce]>0?(Se[Ce]=-1,Le[Ce]=0):(Se[Ce]=0,Le[Ce]=1)}}var C=[0,0,0],M={model:h,view:h,projection:h,_ortho:!1};E.isOpaque=function(){return!0},E.isTransparent=function(){return!1},E.drawTransparent=function(V){};var g=0,P=[0,0,0],T=[0,0,0],z=[0,0,0];E.draw=function(V){V=V||M;for(var Pe=this.gl,G=V.model||h,Z=V.view||h,j=V.projection||h,N=this.bounds,H=V._ortho||!1,te=c(G,Z,j,N,H),oe=te.cubeEdges,_e=te.axis,Ee=Z[12],Ce=Z[13],me=Z[14],ie=Z[15],Se=H?2:1,Le=Se*this.pixelRatio*(j[3]*Ee+j[7]*Ce+j[11]*me+j[15]*ie)/Pe.drawingBufferHeight,Ae=0;Ae<3;++Ae)this.lastCubeProps.cubeEdges[Ae]=oe[Ae],this.lastCubeProps.axis[Ae]=_e[Ae];for(var Fe=L,Ae=0;Ae<3;++Ae)x(L[Ae],Ae,this.bounds,oe,_e);for(var Pe=this.gl,ge=C,Ae=0;Ae<3;++Ae)this.backgroundEnable[Ae]?ge[Ae]=_e[Ae]:ge[Ae]=0;this._background.draw(G,Z,j,N,ge,this.backgroundColor),this._lines.bind(G,Z,j,this);for(var Ae=0;Ae<3;++Ae){var Re=[0,0,0];_e[Ae]>0?Re[Ae]=N[1][Ae]:Re[Ae]=N[0][Ae];for(var ce=0;ce<2;++ce){var Ze=(Ae+1+ce)%3,ut=(Ae+1+(ce^1))%3;this.gridEnable[Ze]&&this._lines.drawGrid(Ze,ut,this.bounds,Re,this.gridColor[Ze],this.gridWidth[Ze]*this.pixelRatio)}for(var ce=0;ce<2;++ce){var Ze=(Ae+1+ce)%3,ut=(Ae+1+(ce^1))%3;this.zeroEnable[ut]&&Math.min(N[0][ut],N[1][ut])<=0&&Math.max(N[0][ut],N[1][ut])>=0&&this._lines.drawZero(Ze,ut,this.bounds,Re,this.zeroLineColor[ut],this.zeroLineWidth[ut]*this.pixelRatio)}}for(var Ae=0;Ae<3;++Ae){this.lineEnable[Ae]&&this._lines.drawAxisLine(Ae,this.bounds,Fe[Ae].primalOffset,this.lineColor[Ae],this.lineWidth[Ae]*this.pixelRatio),this.lineMirror[Ae]&&this._lines.drawAxisLine(Ae,this.bounds,Fe[Ae].mirrorOffset,this.lineColor[Ae],this.lineWidth[Ae]*this.pixelRatio);for(var pt=p(P,Fe[Ae].primalMinor),Zt=p(T,Fe[Ae].mirrorMinor),st=this.lineTickLength,ce=0;ce<3;++ce){var lt=Le/G[5*ce];pt[ce]*=st[ce]*lt,Zt[ce]*=st[ce]*lt}this.lineTickEnable[Ae]&&this._lines.drawAxisTicks(Ae,Fe[Ae].primalOffset,pt,this.lineTickColor[Ae],this.lineTickWidth[Ae]*this.pixelRatio),this.lineTickMirror[Ae]&&this._lines.drawAxisTicks(Ae,Fe[Ae].mirrorOffset,Zt,this.lineTickColor[Ae],this.lineTickWidth[Ae]*this.pixelRatio)}this._lines.unbind(),this._text.bind(G,Z,j,this.pixelRatio);var Gt,Nt=.5,Jt,sr;function wr(_t){sr=[0,0,0],sr[_t]=1}function cr(_t,It,mt){var er=(_t+1)%3,lr=(_t+2)%3,Tr=It[er],Lr=It[lr],ti=mt[er],Br=mt[lr];if(Tr>0&&Br>0){wr(er);return}else if(Tr>0&&Br<0){wr(er);return}else if(Tr<0&&Br>0){wr(er);return}else if(Tr<0&&Br<0){wr(er);return}else if(Lr>0&&ti>0){wr(lr);return}else if(Lr>0&&ti<0){wr(lr);return}else if(Lr<0&&ti>0){wr(lr);return}else if(Lr<0&&ti<0){wr(lr);return}}for(var Ae=0;Ae<3;++Ae){for(var $e=Fe[Ae].primalMinor,St=Fe[Ae].mirrorMinor,Qt=p(z,Fe[Ae].primalOffset),ce=0;ce<3;++ce)this.lineTickEnable[Ae]&&(Qt[ce]+=Le*$e[ce]*Math.max(this.lineTickLength[ce],0)/G[5*ce]);var Vt=[0,0,0];if(Vt[Ae]=1,this.tickEnable[Ae]){this.tickAngle[Ae]===-3600?(this.tickAngle[Ae]=0,this.tickAlign[Ae]="auto"):this.tickAlign[Ae]=-1,Jt=1,Gt=[this.tickAlign[Ae],Nt,Jt],Gt[0]==="auto"?Gt[0]=g:Gt[0]=parseInt(""+Gt[0]),sr=[0,0,0],cr(Ae,$e,St);for(var ce=0;ce<3;++ce)Qt[ce]+=Le*$e[ce]*this.tickPad[ce]/G[5*ce];this._text.drawTicks(Ae,this.tickSize[Ae],this.tickAngle[Ae],Qt,this.tickColor[Ae],Vt,sr,Gt)}if(this.labelEnable[Ae]){Jt=0,sr=[0,0,0],this.labels[Ae].length>4&&(wr(Ae),Jt=1),Gt=[this.labelAlign[Ae],Nt,Jt],Gt[0]==="auto"?Gt[0]=g:Gt[0]=parseInt(""+Gt[0]);for(var ce=0;ce<3;++ce)Qt[ce]+=Le*$e[ce]*this.labelPad[ce]/G[5*ce];Qt[Ae]+=.5*(N[0][Ae]+N[1][Ae]),this._text.drawLabel(Ae,this.labelSize[Ae],this.labelAngle[Ae],Qt,this.labelColor[Ae],[0,0,0],sr,Gt)}}this._text.unbind()},E.dispose=function(){this._text.dispose(),this._lines.dispose(),this._background.dispose(),this._lines=null,this._text=null,this._background=null,this.gl=null};function O(V,G){var Z=new k(V);return Z.update(G),Z}},3840:function(i){"use strict";i.exports=L;var a=0,o=1;function s(x,C,M,g,P,T){this._color=x,this.key=C,this.value=M,this.left=g,this.right=P,this._count=T}function l(x){return new s(x._color,x.key,x.value,x.left,x.right,x._count)}function u(x,C){return new s(x,C.key,C.value,C.left,C.right,C._count)}function c(x){x._count=1+(x.left?x.left._count:0)+(x.right?x.right._count:0)}function f(x,C){this._compare=x,this.root=C}var h=f.prototype;Object.defineProperty(h,"keys",{get:function(){var x=[];return this.forEach(function(C,M){x.push(C)}),x}}),Object.defineProperty(h,"values",{get:function(){var x=[];return this.forEach(function(C,M){x.push(M)}),x}}),Object.defineProperty(h,"length",{get:function(){return this.root?this.root._count:0}}),h.insert=function(x,C){for(var M=this._compare,g=this.root,P=[],T=[];g;){var z=M(x,g.key);P.push(g),T.push(z),z<=0?g=g.left:g=g.right}P.push(new s(a,x,C,null,null,1));for(var O=P.length-2;O>=0;--O){var g=P[O];T[O]<=0?P[O]=new s(g._color,g.key,g.value,P[O+1],g.right,g._count+1):P[O]=new s(g._color,g.key,g.value,g.left,P[O+1],g._count+1)}for(var O=P.length-1;O>1;--O){var V=P[O-1],g=P[O];if(V._color===o||g._color===o)break;var G=P[O-2];if(G.left===V)if(V.left===g){var Z=G.right;if(Z&&Z._color===a)V._color=o,G.right=u(o,Z),G._color=a,O-=1;else{if(G._color=a,G.left=V.right,V._color=o,V.right=G,P[O-2]=V,P[O-1]=g,c(G),c(V),O>=3){var j=P[O-3];j.left===G?j.left=V:j.right=V}break}}else{var Z=G.right;if(Z&&Z._color===a)V._color=o,G.right=u(o,Z),G._color=a,O-=1;else{if(V.right=g.left,G._color=a,G.left=g.right,g._color=o,g.left=V,g.right=G,P[O-2]=g,P[O-1]=V,c(G),c(V),c(g),O>=3){var j=P[O-3];j.left===G?j.left=g:j.right=g}break}}else if(V.right===g){var Z=G.left;if(Z&&Z._color===a)V._color=o,G.left=u(o,Z),G._color=a,O-=1;else{if(G._color=a,G.right=V.left,V._color=o,V.left=G,P[O-2]=V,P[O-1]=g,c(G),c(V),O>=3){var j=P[O-3];j.right===G?j.right=V:j.left=V}break}}else{var Z=G.left;if(Z&&Z._color===a)V._color=o,G.left=u(o,Z),G._color=a,O-=1;else{if(V.left=g.right,G._color=a,G.right=g.left,g._color=o,g.right=V,g.left=G,P[O-2]=g,P[O-1]=V,c(G),c(V),c(g),O>=3){var j=P[O-3];j.right===G?j.right=g:j.left=g}break}}}return P[0]._color=o,new f(M,P[0])};function d(x,C){if(C.left){var M=d(x,C.left);if(M)return M}var M=x(C.key,C.value);if(M)return M;if(C.right)return d(x,C.right)}function v(x,C,M,g){var P=C(x,g.key);if(P<=0){if(g.left){var T=v(x,C,M,g.left);if(T)return T}var T=M(g.key,g.value);if(T)return T}if(g.right)return v(x,C,M,g.right)}function _(x,C,M,g,P){var T=M(x,P.key),z=M(C,P.key),O;if(T<=0&&(P.left&&(O=_(x,C,M,g,P.left),O)||z>0&&(O=g(P.key,P.value),O)))return O;if(z>0&&P.right)return _(x,C,M,g,P.right)}h.forEach=function(C,M,g){if(this.root)switch(arguments.length){case 1:return d(C,this.root);case 2:return v(M,this._compare,C,this.root);case 3:return this._compare(M,g)>=0?void 0:_(M,g,this._compare,C,this.root)}},Object.defineProperty(h,"begin",{get:function(){for(var x=[],C=this.root;C;)x.push(C),C=C.left;return new b(this,x)}}),Object.defineProperty(h,"end",{get:function(){for(var x=[],C=this.root;C;)x.push(C),C=C.right;return new b(this,x)}}),h.at=function(x){if(x<0)return new b(this,[]);for(var C=this.root,M=[];;){if(M.push(C),C.left){if(x<C.left._count){C=C.left;continue}x-=C.left._count}if(!x)return new b(this,M);if(x-=1,C.right){if(x>=C.right._count)break;C=C.right}else break}return new b(this,[])},h.ge=function(x){for(var C=this._compare,M=this.root,g=[],P=0;M;){var T=C(x,M.key);g.push(M),T<=0&&(P=g.length),T<=0?M=M.left:M=M.right}return g.length=P,new b(this,g)},h.gt=function(x){for(var C=this._compare,M=this.root,g=[],P=0;M;){var T=C(x,M.key);g.push(M),T<0&&(P=g.length),T<0?M=M.left:M=M.right}return g.length=P,new b(this,g)},h.lt=function(x){for(var C=this._compare,M=this.root,g=[],P=0;M;){var T=C(x,M.key);g.push(M),T>0&&(P=g.length),T<=0?M=M.left:M=M.right}return g.length=P,new b(this,g)},h.le=function(x){for(var C=this._compare,M=this.root,g=[],P=0;M;){var T=C(x,M.key);g.push(M),T>=0&&(P=g.length),T<0?M=M.left:M=M.right}return g.length=P,new b(this,g)},h.find=function(x){for(var C=this._compare,M=this.root,g=[];M;){var P=C(x,M.key);if(g.push(M),P===0)return new b(this,g);P<=0?M=M.left:M=M.right}return new b(this,[])},h.remove=function(x){var C=this.find(x);return C?C.remove():this},h.get=function(x){for(var C=this._compare,M=this.root;M;){var g=C(x,M.key);if(g===0)return M.value;g<=0?M=M.left:M=M.right}};function b(x,C){this.tree=x,this._stack=C}var p=b.prototype;Object.defineProperty(p,"valid",{get:function(){return this._stack.length>0}}),Object.defineProperty(p,"node",{get:function(){return this._stack.length>0?this._stack[this._stack.length-1]:null},enumerable:!0}),p.clone=function(){return new b(this.tree,this._stack.slice())};function k(x,C){x.key=C.key,x.value=C.value,x.left=C.left,x.right=C.right,x._color=C._color,x._count=C._count}function E(x){for(var C,M,g,P,T=x.length-1;T>=0;--T){if(C=x[T],T===0){C._color=o;return}if(M=x[T-1],M.left===C){if(g=M.right,g.right&&g.right._color===a){if(g=M.right=l(g),P=g.right=l(g.right),M.right=g.left,g.left=M,g.right=P,g._color=M._color,C._color=o,M._color=o,P._color=o,c(M),c(g),T>1){var z=x[T-2];z.left===M?z.left=g:z.right=g}x[T-1]=g;return}else if(g.left&&g.left._color===a){if(g=M.right=l(g),P=g.left=l(g.left),M.right=P.left,g.left=P.right,P.left=M,P.right=g,P._color=M._color,M._color=o,g._color=o,C._color=o,c(M),c(g),c(P),T>1){var z=x[T-2];z.left===M?z.left=P:z.right=P}x[T-1]=P;return}if(g._color===o)if(M._color===a){M._color=o,M.right=u(a,g);return}else{M.right=u(a,g);continue}else{if(g=l(g),M.right=g.left,g.left=M,g._color=M._color,M._color=a,c(M),c(g),T>1){var z=x[T-2];z.left===M?z.left=g:z.right=g}x[T-1]=g,x[T]=M,T+1<x.length?x[T+1]=C:x.push(C),T=T+2}}else{if(g=M.left,g.left&&g.left._color===a){if(g=M.left=l(g),P=g.left=l(g.left),M.left=g.right,g.right=M,g.left=P,g._color=M._color,C._color=o,M._color=o,P._color=o,c(M),c(g),T>1){var z=x[T-2];z.right===M?z.right=g:z.left=g}x[T-1]=g;return}else if(g.right&&g.right._color===a){if(g=M.left=l(g),P=g.right=l(g.right),M.left=P.right,g.right=P.left,P.right=M,P.left=g,P._color=M._color,M._color=o,g._color=o,C._color=o,c(M),c(g),c(P),T>1){var z=x[T-2];z.right===M?z.right=P:z.left=P}x[T-1]=P;return}if(g._color===o)if(M._color===a){M._color=o,M.left=u(a,g);return}else{M.left=u(a,g);continue}else{if(g=l(g),M.left=g.right,g.right=M,g._color=M._color,M._color=a,c(M),c(g),T>1){var z=x[T-2];z.right===M?z.right=g:z.left=g}x[T-1]=g,x[T]=M,T+1<x.length?x[T+1]=C:x.push(C),T=T+2}}}}p.remove=function(){var x=this._stack;if(x.length===0)return this.tree;var C=new Array(x.length),M=x[x.length-1];C[C.length-1]=new s(M._color,M.key,M.value,M.left,M.right,M._count);for(var g=x.length-2;g>=0;--g){var M=x[g];M.left===x[g+1]?C[g]=new s(M._color,M.key,M.value,C[g+1],M.right,M._count):C[g]=new s(M._color,M.key,M.value,M.left,C[g+1],M._count)}if(M=C[C.length-1],M.left&&M.right){var P=C.length;for(M=M.left;M.right;)C.push(M),M=M.right;var T=C[P-1];C.push(new s(M._color,T.key,T.value,M.left,M.right,M._count)),C[P-1].key=M.key,C[P-1].value=M.value;for(var g=C.length-2;g>=P;--g)M=C[g],C[g]=new s(M._color,M.key,M.value,M.left,C[g+1],M._count);C[P-1].left=C[P]}if(M=C[C.length-1],M._color===a){var z=C[C.length-2];z.left===M?z.left=null:z.right===M&&(z.right=null),C.pop();for(var g=0;g<C.length;++g)C[g]._count--;return new f(this.tree._compare,C[0])}else if(M.left||M.right){M.left?k(M,M.left):M.right&&k(M,M.right),M._color=o;for(var g=0;g<C.length-1;++g)C[g]._count--;return new f(this.tree._compare,C[0])}else{if(C.length===1)return new f(this.tree._compare,null);for(var g=0;g<C.length;++g)C[g]._count--;var O=C[C.length-2];E(C),O.left===M?O.left=null:O.right=null}return new f(this.tree._compare,C[0])},Object.defineProperty(p,"key",{get:function(){if(this._stack.length>0)return this._stack[this._stack.length-1].key},enumerable:!0}),Object.defineProperty(p,"value",{get:function(){if(this._stack.length>0)return this._stack[this._stack.length-1].value},enumerable:!0}),Object.defineProperty(p,"index",{get:function(){var x=0,C=this._stack;if(C.length===0){var M=this.tree.root;return M?M._count:0}else C[C.length-1].left&&(x=C[C.length-1].left._count);for(var g=C.length-2;g>=0;--g)C[g+1]===C[g].right&&(++x,C[g].left&&(x+=C[g].left._count));return x},enumerable:!0}),p.next=function(){var x=this._stack;if(x.length!==0){var C=x[x.length-1];if(C.right)for(C=C.right;C;)x.push(C),C=C.left;else for(x.pop();x.length>0&&x[x.length-1].right===C;)C=x[x.length-1],x.pop()}},Object.defineProperty(p,"hasNext",{get:function(){var x=this._stack;if(x.length===0)return!1;if(x[x.length-1].right)return!0;for(var C=x.length-1;C>0;--C)if(x[C-1].left===x[C])return!0;return!1}}),p.update=function(x){var C=this._stack;if(C.length===0)throw new Error("Can't update empty node!");var M=new Array(C.length),g=C[C.length-1];M[M.length-1]=new s(g._color,g.key,x,g.left,g.right,g._count);for(var P=C.length-2;P>=0;--P)g=C[P],g.left===C[P+1]?M[P]=new s(g._color,g.key,g.value,M[P+1],g.right,g._count):M[P]=new s(g._color,g.key,g.value,g.left,M[P+1],g._count);return new f(this.tree._compare,M[0])},p.prev=function(){var x=this._stack;if(x.length!==0){var C=x[x.length-1];if(C.left)for(C=C.left;C;)x.push(C),C=C.right;else for(x.pop();x.length>0&&x[x.length-1].left===C;)C=x[x.length-1],x.pop()}},Object.defineProperty(p,"hasPrev",{get:function(){var x=this._stack;if(x.length===0)return!1;if(x[x.length-1].left)return!0;for(var C=x.length-1;C>0;--C)if(x[C-1].right===x[C])return!0;return!1}});function S(x,C){return x<C?-1:x>C?1:0}function L(x){return new f(x||S,null)}},3865:function(i,a,o){"use strict";var s=o(869);i.exports=l;function l(u,c){return s(u[0].mul(c[1]).add(c[0].mul(u[1])),u[1].mul(c[1]))}},3952:function(i,a,o){"use strict";i.exports=u;var s=o(3250);function l(c,f){for(var h=new Array(f+1),d=0;d<c.length;++d)h[d]=c[d];for(var d=0;d<=c.length;++d){for(var v=c.length;v<=f;++v){for(var _=new Array(f),b=0;b<f;++b)_[b]=Math.pow(v+1-d,b);h[v]=_}var p=s.apply(void 0,h);if(p)return!0}return!1}function u(c){var f=c.length;if(f===0)return[];if(f===1)return[0];for(var h=c[0].length,d=[c[0]],v=[0],_=1;_<f;++_){if(d.push(c[_]),!l(d,h)){d.pop();continue}if(v.push(_),v.length===h+1)return v}return v}},3990:function(i){i.exports=a;function a(o,s){return o[0]=s[0],o[1]=s[1],o[2]=s[2],o}},4008:function(i,a,o){i.exports=o(6690)},4025:function(i,a,o){"use strict";var s=o(2361),l=o(8828).countTrailingZeros;i.exports=u;function u(c){var f=l(s.lo(c));if(f<32)return f;var h=l(s.hi(c));return h>20?52:h+32}},4040:function(i){i.exports=a;function a(o,s,l,u,c,f,h){var d=1/(s-l),v=1/(u-c),_=1/(f-h);return o[0]=-2*d,o[1]=0,o[2]=0,o[3]=0,o[4]=0,o[5]=-2*v,o[6]=0,o[7]=0,o[8]=0,o[9]=0,o[10]=2*_,o[11]=0,o[12]=(s+l)*d,o[13]=(c+u)*v,o[14]=(h+f)*_,o[15]=1,o}},4041:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=l[0],d=l[1],v=l[2],_=l[3],b=_*u+d*f-v*c,p=_*c+v*u-h*f,k=_*f+h*c-d*u,E=-h*u-d*c-v*f;return o[0]=b*_+E*-h+p*-v-k*-d,o[1]=p*_+E*-d+k*-h-b*-v,o[2]=k*_+E*-v+b*-d-p*-h,o[3]=s[3],o}},4081:function(i){"use strict";i.exports=a;function a(o,s,l,u,c,f,h,d,v,_){var b=s+f+_;if(p>0){var p=Math.sqrt(b+1);o[0]=.5*(h-v)/p,o[1]=.5*(d-u)/p,o[2]=.5*(l-f)/p,o[3]=.5*p}else{var k=Math.max(s,f,_),p=Math.sqrt(2*k-b+1);s>=k?(o[0]=.5*p,o[1]=.5*(c+l)/p,o[2]=.5*(d+u)/p,o[3]=.5*(h-v)/p):f>=k?(o[0]=.5*(l+c)/p,o[1]=.5*p,o[2]=.5*(v+h)/p,o[3]=.5*(d-u)/p):(o[0]=.5*(u+d)/p,o[1]=.5*(h+v)/p,o[2]=.5*p,o[3]=.5*(l-c)/p)}return o}},4100:function(i,a,o){"use strict";var s=o(4437),l=o(3837),u=o(5445),c=o(4449),f=o(3589),h=o(2260),d=o(7169),v=o(351),_=o(4772),b=o(4040),p=o(799),k=o(9216)({tablet:!0,featureDetect:!0});i.exports={createScene:C,createCamera:s};function E(){this.mouse=[-1,-1],this.screen=null,this.distance=1/0,this.index=null,this.dataCoordinate=null,this.dataPosition=null,this.object=null,this.data=null}function S(g,P){var T=null;try{T=g.getContext("webgl",P),T||(T=g.getContext("experimental-webgl",P))}catch(z){return null}return T}function L(g){var P=Math.round(Math.log(Math.abs(g))/Math.log(10));if(P<0){var T=Math.round(Math.pow(10,-P));return Math.ceil(g*T)/T}else if(P>0){var T=Math.round(Math.pow(10,P));return Math.ceil(g/T)*T}return Math.ceil(g)}function x(g){return typeof g=="boolean"?g:!0}function C(g){g=g||{},g.camera=g.camera||{};var P=g.canvas;if(!P)if(P=document.createElement("canvas"),g.container){var T=g.container;T.appendChild(P)}else document.body.appendChild(P);var z=g.gl;if(z||(g.glOptions&&(k=!!g.glOptions.preserveDrawingBuffer),z=S(P,g.glOptions||{premultipliedAlpha:!0,antialias:!0,preserveDrawingBuffer:k})),!z)throw new Error("webgl not supported");var O=g.bounds||[[-10,-10,-10],[10,10,10]],V=new E,G=h(z,z.drawingBufferWidth,z.drawingBufferHeight,{preferFloat:!k}),Z=p(z),j=g.cameraObject&&g.cameraObject._ortho===!0||g.camera.projection&&g.camera.projection.type==="orthographic"||!1,N={eye:g.camera.eye||[2,0,0],center:g.camera.center||[0,0,0],up:g.camera.up||[0,1,0],zoomMin:g.camera.zoomMax||.1,zoomMax:g.camera.zoomMin||100,mode:g.camera.mode||"turntable",_ortho:j},H=g.axes||{},te=l(z,H);te.enable=!H.disable;var oe=g.spikes||{},_e=c(z,oe),Ee=[],Ce=[],me=[],ie=[],Se=!0,Pe=!0,Le=new Array(16),Ae=new Array(16),Fe={view:null,projection:Le,model:Ae,_ortho:!1},Pe=!0,ge=[z.drawingBufferWidth,z.drawingBufferHeight],Re=g.cameraObject||s(P,N),ce={gl:z,contextLost:!1,pixelRatio:g.pixelRatio||1,canvas:P,selection:V,camera:Re,axes:te,axesPixels:null,spikes:_e,bounds:O,objects:Ee,shape:ge,aspect:g.aspectRatio||[1,1,1],pickRadius:g.pickRadius||10,zNear:g.zNear||.01,zFar:g.zFar||1e3,fovy:g.fovy||Math.PI/4,clearColor:g.clearColor||[0,0,0,0],autoResize:x(g.autoResize),autoBounds:x(g.autoBounds),autoScale:!!g.autoScale,autoCenter:x(g.autoCenter),clipToBounds:x(g.clipToBounds),snapToData:!!g.snapToData,onselect:g.onselect||null,onrender:g.onrender||null,onclick:g.onclick||null,cameraParams:Fe,oncontextloss:null,mouseListener:null,_stopped:!1,getAspectratio:function(){return{x:this.aspect[0],y:this.aspect[1],z:this.aspect[2]}},setAspectratio:function(sr){this.aspect[0]=sr.x,this.aspect[1]=sr.y,this.aspect[2]=sr.z,Pe=!0},setBounds:function(sr,wr){this.bounds[0][sr]=wr.min,this.bounds[1][sr]=wr.max},setClearColor:function(sr){this.clearColor=sr},clearRGBA:function(){this.gl.clearColor(this.clearColor[0],this.clearColor[1],this.clearColor[2],this.clearColor[3]),this.gl.clear(this.gl.COLOR_BUFFER_BIT|this.gl.DEPTH_BUFFER_BIT)}},Ze=[z.drawingBufferWidth/ce.pixelRatio|0,z.drawingBufferHeight/ce.pixelRatio|0];function ut(){if(!ce._stopped&&ce.autoResize){var sr=P.parentNode,wr=1,cr=1;sr&&sr!==document.body?(wr=sr.clientWidth,cr=sr.clientHeight):(wr=window.innerWidth,cr=window.innerHeight);var $e=Math.ceil(wr*ce.pixelRatio)|0,St=Math.ceil(cr*ce.pixelRatio)|0;if($e!==P.width||St!==P.height){P.width=$e,P.height=St;var Qt=P.style;Qt.position=Qt.position||"absolute",Qt.left="0px",Qt.top="0px",Qt.width=wr+"px",Qt.height=cr+"px",Se=!0}}}ce.autoResize&&ut(),window.addEventListener("resize",ut);function pt(){for(var sr=Ee.length,wr=ie.length,cr=0;cr<wr;++cr)me[cr]=0;e:for(var cr=0;cr<sr;++cr){var $e=Ee[cr],St=$e.pickSlots;if(!St){Ce[cr]=-1;continue}for(var Qt=0;Qt<wr;++Qt)if(me[Qt]+St<255){Ce[cr]=Qt,$e.setPickBase(me[Qt]+1),me[Qt]+=St;continue e}var Vt=f(z,ge);Ce[cr]=wr,ie.push(Vt),me.push(St),$e.setPickBase(1),wr+=1}for(;wr>0&&me[wr-1]===0;)me.pop(),ie.pop().dispose()}ce.update=function(sr){ce._stopped||(sr=sr||{},Se=!0,Pe=!0)},ce.add=function(sr){ce._stopped||(sr.axes=te,Ee.push(sr),Ce.push(-1),Se=!0,Pe=!0,pt())},ce.remove=function(sr){if(!ce._stopped){var wr=Ee.indexOf(sr);wr<0||(Ee.splice(wr,1),Ce.pop(),Se=!0,Pe=!0,pt())}},ce.dispose=function(){if(!ce._stopped&&(ce._stopped=!0,window.removeEventListener("resize",ut),P.removeEventListener("webglcontextlost",Zt),ce.mouseListener.enabled=!1,!ce.contextLost)){te.dispose(),_e.dispose();for(var sr=0;sr<Ee.length;++sr)Ee[sr].dispose();G.dispose();for(var sr=0;sr<ie.length;++sr)ie[sr].dispose();Z.dispose(),z=null,te=null,_e=null,Ee=[]}},ce._mouseRotating=!1,ce._prevButtons=0,ce.enableMouseListeners=function(){ce.mouseListener=v(P,function(sr,wr,cr){if(!ce._stopped){var $e=ie.length,St=Ee.length,Qt=V.object;V.distance=1/0,V.mouse[0]=wr,V.mouse[1]=cr,V.object=null,V.screen=null,V.dataCoordinate=V.dataPosition=null;var Vt=!1;if(sr&&ce._prevButtons)ce._mouseRotating=!0;else{ce._mouseRotating&&(Pe=!0),ce._mouseRotating=!1;for(var _t=0;_t<$e;++_t){var It=ie[_t].query(wr,Ze[1]-cr-1,ce.pickRadius);if(It){if(It.distance>V.distance)continue;for(var mt=0;mt<St;++mt){var er=Ee[mt];if(Ce[mt]===_t){var lr=er.pick(It);lr&&(V.buttons=sr,V.screen=It.coord,V.distance=It.distance,V.object=er,V.index=lr.distance,V.dataPosition=lr.position,V.dataCoordinate=lr.dataCoordinate,V.data=lr,Vt=!0)}}}}}Qt&&Qt!==V.object&&(Qt.highlight&&Qt.highlight(null),Se=!0),V.object&&(V.object.highlight&&V.object.highlight(V.data),Se=!0),Vt=Vt||V.object!==Qt,Vt&&ce.onselect&&ce.onselect(V),sr&1&&!(ce._prevButtons&1)&&ce.onclick&&ce.onclick(V),ce._prevButtons=sr}})};function Zt(){if(ce.contextLost)return!0;z.isContextLost()&&(ce.contextLost=!0,ce.mouseListener.enabled=!1,ce.selection.object=null,ce.oncontextloss&&ce.oncontextloss())}P.addEventListener("webglcontextlost",Zt);function st(){if(!Zt()){z.colorMask(!0,!0,!0,!0),z.depthMask(!0),z.disable(z.BLEND),z.enable(z.DEPTH_TEST),z.depthFunc(z.LEQUAL);for(var sr=Ee.length,wr=ie.length,cr=0;cr<wr;++cr){var $e=ie[cr];$e.shape=Ze,$e.begin();for(var St=0;St<sr;++St)if(Ce[St]===cr){var Qt=Ee[St];Qt.drawPick&&(Qt.pixelRatio=1,Qt.drawPick(Fe))}$e.end()}}}var lt=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],Gt=[lt[0].slice(),lt[1].slice()];function Nt(){if(!Zt()){ut();var sr=ce.camera.tick();Fe.view=ce.camera.matrix,Se=Se||sr,Pe=Pe||sr,te.pixelRatio=ce.pixelRatio,_e.pixelRatio=ce.pixelRatio;var wr=Ee.length,cr=lt[0],$e=lt[1];cr[0]=cr[1]=cr[2]=1/0,$e[0]=$e[1]=$e[2]=-1/0;for(var St=0;St<wr;++St){var Qt=Ee[St];Qt.pixelRatio=ce.pixelRatio,Qt.axes=ce.axes,Se=Se||!!Qt.dirty,Pe=Pe||!!Qt.dirty;var Vt=Qt.bounds;if(Vt)for(var _t=Vt[0],It=Vt[1],mt=0;mt<3;++mt)cr[mt]=Math.min(cr[mt],_t[mt]),$e[mt]=Math.max($e[mt],It[mt])}var er=ce.bounds;if(ce.autoBounds)for(var mt=0;mt<3;++mt){if($e[mt]<cr[mt])cr[mt]=-1,$e[mt]=1;else{cr[mt]===$e[mt]&&(cr[mt]-=1,$e[mt]+=1);var lr=.05*($e[mt]-cr[mt]);cr[mt]=cr[mt]-lr,$e[mt]=$e[mt]+lr}er[0][mt]=cr[mt],er[1][mt]=$e[mt]}for(var Tr=!1,mt=0;mt<3;++mt)Tr=Tr||Gt[0][mt]!==er[0][mt]||Gt[1][mt]!==er[1][mt],Gt[0][mt]=er[0][mt],Gt[1][mt]=er[1][mt];if(Pe=Pe||Tr,Se=Se||Tr,!!Se){if(Tr){for(var Lr=[0,0,0],St=0;St<3;++St)Lr[St]=L((er[1][St]-er[0][St])/10);te.autoTicks?te.update({bounds:er,tickSpacing:Lr}):te.update({bounds:er})}var ti=z.drawingBufferWidth,Br=z.drawingBufferHeight;ge[0]=ti,ge[1]=Br,Ze[0]=Math.max(ti/ce.pixelRatio,1)|0,Ze[1]=Math.max(Br/ce.pixelRatio,1)|0,M(ce,j);for(var St=0;St<wr;++St){var Qt=Ee[St];Qt.axesBounds=er,ce.clipToBounds&&(Qt.clipBounds=er)}V.object&&(ce.snapToData?_e.position=V.dataCoordinate:_e.position=V.dataPosition,_e.bounds=er),Pe&&(Pe=!1,st()),ce.axesPixels=u(ce.axes,Fe,ti,Br),ce.onrender&&ce.onrender(),z.bindFramebuffer(z.FRAMEBUFFER,null),z.viewport(0,0,ti,Br),ce.clearRGBA(),z.depthMask(!0),z.colorMask(!0,!0,!0,!0),z.enable(z.DEPTH_TEST),z.depthFunc(z.LEQUAL),z.disable(z.BLEND),z.disable(z.CULL_FACE);var Vr=!1;te.enable&&(Vr=Vr||te.isTransparent(),te.draw(Fe)),_e.axes=te,V.object&&_e.draw(Fe),z.disable(z.CULL_FACE);for(var St=0;St<wr;++St){var Qt=Ee[St];Qt.axes=te,Qt.pixelRatio=ce.pixelRatio,Qt.isOpaque&&Qt.isOpaque()&&Qt.draw(Fe),Qt.isTransparent&&Qt.isTransparent()&&(Vr=!0)}if(Vr){G.shape=ge,G.bind(),z.clear(z.DEPTH_BUFFER_BIT),z.colorMask(!1,!1,!1,!1),z.depthMask(!0),z.depthFunc(z.LESS),te.enable&&te.isTransparent()&&te.drawTransparent(Fe);for(var St=0;St<wr;++St){var Qt=Ee[St];Qt.isOpaque&&Qt.isOpaque()&&Qt.draw(Fe)}z.enable(z.BLEND),z.blendEquation(z.FUNC_ADD),z.blendFunc(z.ONE,z.ONE_MINUS_SRC_ALPHA),z.colorMask(!0,!0,!0,!0),z.depthMask(!1),z.clearColor(0,0,0,0),z.clear(z.COLOR_BUFFER_BIT),te.isTransparent()&&te.drawTransparent(Fe);for(var St=0;St<wr;++St){var Qt=Ee[St];Qt.isTransparent&&Qt.isTransparent()&&Qt.drawTransparent(Fe)}z.bindFramebuffer(z.FRAMEBUFFER,null),z.blendFunc(z.ONE,z.ONE_MINUS_SRC_ALPHA),z.disable(z.DEPTH_TEST),Z.bind(),G.color[0].bind(0),Z.uniforms.accumBuffer=0,d(z),z.disable(z.BLEND)}Se=!1;for(var St=0;St<wr;++St)Ee[St].dirty=!1}}}function Jt(){ce._stopped||ce.contextLost||(Nt(),requestAnimationFrame(Jt))}return ce.enableMouseListeners(),Jt(),ce.redraw=function(){ce._stopped||(Se=!0,Nt())},ce}function M(g,P){var T=g.bounds,z=g.cameraParams,O=z.projection,V=z.model,G=g.gl.drawingBufferWidth,Z=g.gl.drawingBufferHeight,j=g.zNear,N=g.zFar,H=g.fovy,te=G/Z;P?(b(O,-te,te,-1,1,j,N),z._ortho=!0):(_(O,H,te,j,N),z._ortho=!1);for(var oe=0;oe<16;++oe)V[oe]=0;V[15]=1;for(var _e=0,oe=0;oe<3;++oe)_e=Math.max(_e,T[1][oe]-T[0][oe]);for(var oe=0;oe<3;++oe)g.autoScale?V[5*oe]=g.aspect[oe]/(T[1][oe]-T[0][oe]):V[5*oe]=1/_e,g.autoCenter&&(V[12+oe]=-V[5*oe]*.5*(T[0][oe]+T[1][oe]))}},4192:function(i){"use strict";i.exports=o;var a=32;function o(_,b){b<=4*a?s(0,b-1,_):v(0,b-1,_)}function s(_,b,p){for(var k=2*(_+1),E=_+1;E<=b;++E){for(var S=p[k++],L=p[k++],x=E,C=k-2;x-- >_;){var M=p[C-2],g=p[C-1];if(M<S)break;if(M===S&&g<L)break;p[C]=M,p[C+1]=g,C-=2}p[C]=S,p[C+1]=L}}function l(_,b,p){_*=2,b*=2;var k=p[_],E=p[_+1];p[_]=p[b],p[_+1]=p[b+1],p[b]=k,p[b+1]=E}function u(_,b,p){_*=2,b*=2,p[_]=p[b],p[_+1]=p[b+1]}function c(_,b,p,k){_*=2,b*=2,p*=2;var E=k[_],S=k[_+1];k[_]=k[b],k[_+1]=k[b+1],k[b]=k[p],k[b+1]=k[p+1],k[p]=E,k[p+1]=S}function f(_,b,p,k,E){_*=2,b*=2,E[_]=E[b],E[b]=p,E[_+1]=E[b+1],E[b+1]=k}function h(_,b,p){_*=2,b*=2;var k=p[_],E=p[b];return k<E?!1:k===E?p[_+1]>p[b+1]:!0}function d(_,b,p,k){_*=2;var E=k[_];return E<b?!0:E===b?k[_+1]<p:!1}function v(_,b,p){var k=(b-_+1)/6|0,E=_+k,S=b-k,L=_+b>>1,x=L-k,C=L+k,M=E,g=x,P=L,T=C,z=S,O=_+1,V=b-1,G=0;h(M,g,p)&&(G=M,M=g,g=G),h(T,z,p)&&(G=T,T=z,z=G),h(M,P,p)&&(G=M,M=P,P=G),h(g,P,p)&&(G=g,g=P,P=G),h(M,T,p)&&(G=M,M=T,T=G),h(P,T,p)&&(G=P,P=T,T=G),h(g,z,p)&&(G=g,g=z,z=G),h(g,P,p)&&(G=g,g=P,P=G),h(T,z,p)&&(G=T,T=z,z=G);for(var Z=p[2*g],j=p[2*g+1],N=p[2*T],H=p[2*T+1],te=2*M,oe=2*P,_e=2*z,Ee=2*E,Ce=2*L,me=2*S,ie=0;ie<2;++ie){var Se=p[te+ie],Le=p[oe+ie],Ae=p[_e+ie];p[Ee+ie]=Se,p[Ce+ie]=Le,p[me+ie]=Ae}u(x,_,p),u(C,b,p);for(var Fe=O;Fe<=V;++Fe)if(d(Fe,Z,j,p))Fe!==O&&l(Fe,O,p),++O;else if(!d(Fe,N,H,p))for(;;)if(d(V,N,H,p)){d(V,Z,j,p)?(c(Fe,O,V,p),++O,--V):(l(Fe,V,p),--V);break}else{if(--V<Fe)break;continue}f(_,O-1,Z,j,p),f(b,V+1,N,H,p),O-2-_<=a?s(_,O-2,p):v(_,O-2,p),b-(V+2)<=a?s(V+2,b,p):v(V+2,b,p),V-O<=a?s(O,V,p):v(O,V,p)}},4209:function(i,a,o){"use strict";i.exports=p;var s=o(2478),l=o(3840),u=o(3250),c=o(1303);function f(k,E,S){this.slabs=k,this.coordinates=E,this.horizontal=S}var h=f.prototype;function d(k,E){return k.y-E}function v(k,E){for(var S=null;k;){var L=k.key,x,C;L[0][0]<L[1][0]?(x=L[0],C=L[1]):(x=L[1],C=L[0]);var M=u(x,C,E);if(M<0)k=k.left;else if(M>0)if(E[0]!==L[1][0])S=k,k=k.right;else{var g=v(k.right,E);if(g)return g;k=k.left}else{if(E[0]!==L[1][0])return k;var g=v(k.right,E);if(g)return g;k=k.left}}return S}h.castUp=function(k){var E=s.le(this.coordinates,k[0]);if(E<0)return-1;var S=this.slabs[E],L=v(this.slabs[E],k),x=-1;if(L&&(x=L.value),this.coordinates[E]===k[0]){var C=null;if(L&&(C=L.key),E>0){var M=v(this.slabs[E-1],k);M&&(C?c(M.key,C)>0&&(C=M.key,x=M.value):(x=M.value,C=M.key))}var g=this.horizontal[E];if(g.length>0){var P=s.ge(g,k[1],d);if(P<g.length){var T=g[P];if(k[1]===T.y){if(T.closed)return T.index;for(;P<g.length-1&&g[P+1].y===k[1];)if(P=P+1,T=g[P],T.closed)return T.index;if(T.y===k[1]&&!T.start){if(P=P+1,P>=g.length)return x;T=g[P]}}if(T.start)if(C){var z=u(C[0],C[1],[k[0],T.y]);C[0][0]>C[1][0]&&(z=-z),z>0&&(x=T.index)}else x=T.index;else T.y!==k[1]&&(x=T.index)}}}return x};function _(k,E,S,L){this.y=k,this.index=E,this.start=S,this.closed=L}function b(k,E,S,L){this.x=k,this.segment=E,this.create=S,this.index=L}function p(k){for(var E=k.length,S=2*E,L=new Array(S),x=0;x<E;++x){var C=k[x],M=C[0][0]<C[1][0];L[2*x]=new b(C[0][0],C,M,x),L[2*x+1]=new b(C[1][0],C,!M,x)}L.sort(function(j,N){var H=j.x-N.x;return H||(H=j.create-N.create,H)?H:Math.min(j.segment[0][1],j.segment[1][1])-Math.min(N.segment[0][1],N.segment[1][1])});for(var g=l(c),P=[],T=[],z=[],O=-1/0,x=0;x<S;){for(var V=L[x].x,G=[];x<S;){var Z=L[x];if(Z.x!==V)break;x+=1,Z.segment[0][0]===Z.x&&Z.segment[1][0]===Z.x?Z.create&&(Z.segment[0][1]<Z.segment[1][1]?(G.push(new _(Z.segment[0][1],Z.index,!0,!0)),G.push(new _(Z.segment[1][1],Z.index,!1,!1))):(G.push(new _(Z.segment[1][1],Z.index,!0,!1)),G.push(new _(Z.segment[0][1],Z.index,!1,!0)))):Z.create?g=g.insert(Z.segment,Z.index):g=g.remove(Z.segment)}P.push(g.root),T.push(V),z.push(G)}return new f(P,T,z)}},4317:function(i){"use strict";function a(c,f){var h=Math.floor(f),d=f-h,v=0<=h&&h<c.shape[0],_=0<=h+1&&h+1<c.shape[0],b=v?+c.get(h):0,p=_?+c.get(h+1):0;return(1-d)*b+d*p}function o(c,f,h){var d=Math.floor(f),v=f-d,_=0<=d&&d<c.shape[0],b=0<=d+1&&d+1<c.shape[0],p=Math.floor(h),k=h-p,E=0<=p&&p<c.shape[1],S=0<=p+1&&p+1<c.shape[1],L=_&&E?c.get(d,p):0,x=_&&S?c.get(d,p+1):0,C=b&&E?c.get(d+1,p):0,M=b&&S?c.get(d+1,p+1):0;return(1-k)*((1-v)*L+v*C)+k*((1-v)*x+v*M)}function s(c,f,h,d){var v=Math.floor(f),_=f-v,b=0<=v&&v<c.shape[0],p=0<=v+1&&v+1<c.shape[0],k=Math.floor(h),E=h-k,S=0<=k&&k<c.shape[1],L=0<=k+1&&k+1<c.shape[1],x=Math.floor(d),C=d-x,M=0<=x&&x<c.shape[2],g=0<=x+1&&x+1<c.shape[2],P=b&&S&&M?c.get(v,k,x):0,T=b&&L&&M?c.get(v,k+1,x):0,z=p&&S&&M?c.get(v+1,k,x):0,O=p&&L&&M?c.get(v+1,k+1,x):0,V=b&&S&&g?c.get(v,k,x+1):0,G=b&&L&&g?c.get(v,k+1,x+1):0,Z=p&&S&&g?c.get(v+1,k,x+1):0,j=p&&L&&g?c.get(v+1,k+1,x+1):0;return(1-C)*((1-E)*((1-_)*P+_*z)+E*((1-_)*T+_*O))+C*((1-E)*((1-_)*V+_*Z)+E*((1-_)*G+_*j))}function l(c){var f=c.shape.length|0,h=new Array(f),d=new Array(f),v=new Array(f),_=new Array(f),b,p;for(b=0;b<f;++b)p=+arguments[b+1],h[b]=Math.floor(p),d[b]=p-h[b],v[b]=0<=h[b]&&h[b]<c.shape[b],_[b]=0<=h[b]+1&&h[b]+1<c.shape[b];var k=0,E,S,L;e:for(b=0;b<1<<f;++b){for(S=1,L=c.offset,E=0;E<f;++E)if(b&1<<E){if(!_[E])continue e;S*=d[E],L+=c.stride[E]*(h[E]+1)}else{if(!v[E])continue e;S*=1-d[E],L+=c.stride[E]*h[E]}k+=S*c.data[L]}return k}function u(c,f,h,d){switch(c.shape.length){case 0:return 0;case 1:return a(c,f);case 2:return o(c,f,h);case 3:return s(c,f,h,d);default:return l.apply(void 0,arguments)}}i.exports=u,i.exports.d1=a,i.exports.d2=o,i.exports.d3=s},4335:function(i){i.exports=function(o,s){var l=s[15];if(l===0)return!1;for(var u=1/l,c=0;c<16;c++)o[c]=s[c]*u;return!0}},4359:function(i,a,o){"use strict";i.exports=c;var s=o(7718),l=null,u=null;typeof document!="undefined"&&(l=document.createElement("canvas"),l.width=8192,l.height=1024,u=l.getContext("2d"));function c(f,h){return(typeof h!="object"||h===null)&&(h={}),s(f,h.canvas||l,h.context||u,h)}},4361:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]+l[0],o[1]=s[1]+l[1],o[2]=s[2]+l[2],o[3]=s[3]+l[3],o}},4437:function(i,a,o){"use strict";i.exports=d;var s=o(3025),l=o(6296),u=o(351),c=o(8512),f=o(24),h=o(7520);function d(v,_){v=v||document.body,_=_||{};var b=[.01,1/0];"distanceLimits"in _&&(b[0]=_.distanceLimits[0],b[1]=_.distanceLimits[1]),"zoomMin"in _&&(b[0]=_.zoomMin),"zoomMax"in _&&(b[1]=_.zoomMax);var p=l({center:_.center||[0,0,0],up:_.up||[0,1,0],eye:_.eye||[0,0,10],mode:_.mode||"orbit",distanceLimits:b}),k=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],E=0,S=v.clientWidth,L=v.clientHeight,x={keyBindingMode:"rotate",enableWheel:!0,view:p,element:v,delay:_.delay||16,rotateSpeed:_.rotateSpeed||1,zoomSpeed:_.zoomSpeed||1,translateSpeed:_.translateSpeed||1,flipX:!!_.flipX,flipY:!!_.flipY,modes:p.modes,_ortho:_._ortho||_.projection&&_.projection.type==="orthographic"||!1,tick:function(){var C=s(),M=this.delay,g=C-2*M;p.idle(C-M),p.recalcMatrix(g),p.flush(C-(100+M*2));for(var P=!0,T=p.computedMatrix,z=0;z<16;++z)P=P&&k[z]===T[z],k[z]=T[z];var O=v.clientWidth===S&&v.clientHeight===L;return S=v.clientWidth,L=v.clientHeight,P?!O:(E=Math.exp(p.computedRadius[0]),!0)},lookAt:function(C,M,g){p.lookAt(p.lastT(),C,M,g)},rotate:function(C,M,g){p.rotate(p.lastT(),C,M,g)},pan:function(C,M,g){p.pan(p.lastT(),C,M,g)},translate:function(C,M,g){p.translate(p.lastT(),C,M,g)}};return Object.defineProperties(x,{matrix:{get:function(){return p.computedMatrix},set:function(C){return p.setMatrix(p.lastT(),C),p.computedMatrix},enumerable:!0},mode:{get:function(){return p.getMode()},set:function(C){var M=p.computedUp.slice(),g=p.computedEye.slice(),P=p.computedCenter.slice();if(p.setMode(C),C==="turntable"){var T=s();p._active.lookAt(T,g,P,M),p._active.lookAt(T+500,g,P,[0,0,1]),p._active.flush(T)}return p.getMode()},enumerable:!0},center:{get:function(){return p.computedCenter},set:function(C){return p.lookAt(p.lastT(),null,C),p.computedCenter},enumerable:!0},eye:{get:function(){return p.computedEye},set:function(C){return p.lookAt(p.lastT(),C),p.computedEye},enumerable:!0},up:{get:function(){return p.computedUp},set:function(C){return p.lookAt(p.lastT(),null,null,C),p.computedUp},enumerable:!0},distance:{get:function(){return E},set:function(C){return p.setDistance(p.lastT(),C),C},enumerable:!0},distanceLimits:{get:function(){return p.getDistanceLimits(b)},set:function(C){return p.setDistanceLimits(C),C},enumerable:!0}}),v.addEventListener("contextmenu",function(C){return C.preventDefault(),!1}),x._lastX=-1,x._lastY=-1,x._lastMods={shift:!1,control:!1,alt:!1,meta:!1},x.enableMouseListeners=function(){x.mouseListener=u(v,C),v.addEventListener("touchstart",function(M){var g=f(M.changedTouches[0],v);C(0,g[0],g[1],x._lastMods),C(1,g[0],g[1],x._lastMods)},h?{passive:!0}:!1),v.addEventListener("touchmove",function(M){var g=f(M.changedTouches[0],v);C(1,g[0],g[1],x._lastMods),M.preventDefault()},h?{passive:!1}:!1),v.addEventListener("touchend",function(M){C(0,x._lastX,x._lastY,x._lastMods)},h?{passive:!0}:!1);function C(M,g,P,T){var z=x.keyBindingMode;if(z!==!1){var O=z==="rotate",V=z==="pan",G=z==="zoom",Z=!!T.control,j=!!T.alt,N=!!T.shift,H=!!(M&1),te=!!(M&2),oe=!!(M&4),_e=1/v.clientHeight,Ee=_e*(g-x._lastX),Ce=_e*(P-x._lastY),me=x.flipX?1:-1,ie=x.flipY?1:-1,Se=Math.PI*x.rotateSpeed,Le=s();if(x._lastX!==-1&&x._lastY!==-1&&((O&&H&&!Z&&!j&&!N||H&&!Z&&!j&&N)&&p.rotate(Le,me*Se*Ee,-ie*Se*Ce,0),(V&&H&&!Z&&!j&&!N||te||H&&Z&&!j&&!N)&&p.pan(Le,-x.translateSpeed*Ee*E,x.translateSpeed*Ce*E,0),G&&H&&!Z&&!j&&!N||oe||H&&!Z&&j&&!N)){var Ae=-x.zoomSpeed*Ce/window.innerHeight*(Le-p.lastT())*100;p.pan(Le,0,0,E*(Math.exp(Ae)-1))}return x._lastX=g,x._lastY=P,x._lastMods=T,!0}}x.wheelListener=c(v,function(M,g){if(x.keyBindingMode!==!1&&x.enableWheel){var P=x.flipX?1:-1,T=x.flipY?1:-1,z=s();if(Math.abs(M)>Math.abs(g))p.rotate(z,0,0,-M*P*Math.PI*x.rotateSpeed/window.innerWidth);else if(!x._ortho){var O=-x.zoomSpeed*T*g/window.innerHeight*(z-p.lastT())/20;p.pan(z,0,0,E*(Math.exp(O)-1))}}},!0)},x.enableMouseListeners(),x}},4449:function(i,a,o){"use strict";var s=o(2762),l=o(8116),u=o(1493);i.exports=b;var c=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];function f(p,k,E,S){this.gl=p,this.buffer=k,this.vao=E,this.shader=S,this.pixelRatio=1,this.bounds=[[-1e3,-1e3,-1e3],[1e3,1e3,1e3]],this.position=[0,0,0],this.lineWidth=[2,2,2],this.colors=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.enabled=[!0,!0,!0],this.drawSides=[!0,!0,!0],this.axes=null}var h=f.prototype,d=[0,0,0],v=[0,0,0],_=[0,0];h.isTransparent=function(){return!1},h.drawTransparent=function(p){},h.draw=function(p){var k=this.gl,E=this.vao,S=this.shader;E.bind(),S.bind();var L=p.model||c,x=p.view||c,C=p.projection||c,M;this.axes&&(M=this.axes.lastCubeProps.axis);for(var g=d,P=v,T=0;T<3;++T)M&&M[T]<0?(g[T]=this.bounds[0][T],P[T]=this.bounds[1][T]):(g[T]=this.bounds[1][T],P[T]=this.bounds[0][T]);_[0]=k.drawingBufferWidth,_[1]=k.drawingBufferHeight,S.uniforms.model=L,S.uniforms.view=x,S.uniforms.projection=C,S.uniforms.coordinates=[this.position,g,P],S.uniforms.colors=this.colors,S.uniforms.screenShape=_;for(var T=0;T<3;++T)S.uniforms.lineWidth=this.lineWidth[T]*this.pixelRatio,this.enabled[T]&&(E.draw(k.TRIANGLES,6,6*T),this.drawSides[T]&&E.draw(k.TRIANGLES,12,18+12*T));E.unbind()},h.update=function(p){p&&("bounds"in p&&(this.bounds=p.bounds),"position"in p&&(this.position=p.position),"lineWidth"in p&&(this.lineWidth=p.lineWidth),"colors"in p&&(this.colors=p.colors),"enabled"in p&&(this.enabled=p.enabled),"drawSides"in p&&(this.drawSides=p.drawSides))},h.dispose=function(){this.vao.dispose(),this.buffer.dispose(),this.shader.dispose()};function b(p,k){var E=[];function S(g,P,T,z,O,V){var G=[g,P,T,0,0,0,1];G[z+3]=1,G[z]=O,E.push.apply(E,G),G[6]=-1,E.push.apply(E,G),G[z]=V,E.push.apply(E,G),E.push.apply(E,G),G[6]=1,E.push.apply(E,G),G[z]=O,E.push.apply(E,G)}S(0,0,0,0,0,1),S(0,0,0,1,0,1),S(0,0,0,2,0,1),S(1,0,0,1,-1,1),S(1,0,0,2,-1,1),S(0,1,0,0,-1,1),S(0,1,0,2,-1,1),S(0,0,1,0,-1,1),S(0,0,1,1,-1,1);var L=s(p,E),x=l(p,[{type:p.FLOAT,buffer:L,size:3,offset:0,stride:28},{type:p.FLOAT,buffer:L,size:3,offset:12,stride:28},{type:p.FLOAT,buffer:L,size:1,offset:24,stride:28}]),C=u(p);C.attributes.position.location=0,C.attributes.color.location=1,C.attributes.weight.location=2;var M=new f(p,L,x,C);return M.update(k),M}},4494:function(i){i.exports=a;function a(o,s){return o[0]=1/s[0],o[1]=1/s[1],o[2]=1/s[2],o[3]=1/s[3],o}},4505:function(i,a,o){i.exports=o(5847)},4578:function(i){i.exports=a;function a(o,s,l,u,c){return o[0]=s,o[1]=l,o[2]=u,o[3]=c,o}},4623:function(i){"use strict";"use restrict";i.exports=a;function a(o){this.roots=new Array(o),this.ranks=new Array(o);for(var s=0;s<o;++s)this.roots[s]=s,this.ranks[s]=0}a.prototype.length=function(){return this.roots.length},a.prototype.makeSet=function(){var o=this.roots.length;return this.roots.push(o),this.ranks.push(0),o},a.prototype.find=function(o){for(var s=this.roots;s[o]!==o;){var l=s[o];s[o]=s[l],o=l}return o},a.prototype.link=function(o,s){var l=this.find(o),u=this.find(s);if(l!==u){var c=this.ranks,f=this.roots,h=c[l],d=c[u];h<d?f[l]=u:d<h?f[u]=l:(f[u]=l,++c[l])}}},4687:function(i,a){"use strict";function o(c){if(typeof c=="object"){if("buttons"in c)return c.buttons;if("which"in c){var f=c.which;if(f===2)return 4;if(f===3)return 2;if(f>0)return 1<<f-1}else if("button"in c){var f=c.button;if(f===1)return 4;if(f===2)return 2;if(f>=0)return 1<<f}}return 0}a.buttons=o;function s(c){return c.target||c.srcElement||window}a.element=s;function l(c){if(typeof c=="object"){if("offsetX"in c)return c.offsetX;var f=s(c),h=f.getBoundingClientRect();return c.clientX-h.left}return 0}a.x=l;function u(c){if(typeof c=="object"){if("offsetY"in c)return c.offsetY;var f=s(c),h=f.getBoundingClientRect();return c.clientY-h.top}return 0}a.y=u},4691:function(i){i.exports=a;function a(o,s){var l=s[0]-o[0],u=s[1]-o[1],c=s[2]-o[2],f=s[3]-o[3];return Math.sqrt(l*l+u*u+c*c+f*f)}},4750:function(i,a,o){"use strict";i.exports=l;var s=o(3090);function l(u){var c=s(u),f=c.length;if(f<=2)return[];for(var h=new Array(f),d=c[f-1],v=0;v<f;++v){var _=c[v];h[v]=[d,_],d=_}return h}},4769:function(i){"use strict";function a(s,l,u,c,f,h){var d=6*f*f-6*f,v=3*f*f-4*f+1,_=-6*f*f+6*f,b=3*f*f-2*f;if(s.length){h||(h=new Array(s.length));for(var p=s.length-1;p>=0;--p)h[p]=d*s[p]+v*l[p]+_*u[p]+b*c[p];return h}return d*s+v*l+_*u[p]+b*c}function o(s,l,u,c,f,h){var d=f-1,v=f*f,_=d*d,b=(1+2*f)*_,p=f*_,k=v*(3-2*f),E=v*d;if(s.length){h||(h=new Array(s.length));for(var S=s.length-1;S>=0;--S)h[S]=b*s[S]+p*l[S]+k*u[S]+E*c[S];return h}return b*s+p*l+k*u+E*c}i.exports=o,i.exports.derivative=a},4772:function(i){i.exports=a;function a(o,s,l,u,c){var f=1/Math.tan(s/2),h=1/(u-c);return o[0]=f/l,o[1]=0,o[2]=0,o[3]=0,o[4]=0,o[5]=f,o[6]=0,o[7]=0,o[8]=0,o[9]=0,o[10]=(c+u)*h,o[11]=-1,o[12]=0,o[13]=0,o[14]=2*c*u*h,o[15]=0,o}},4793:function(i,a,o){"use strict";var s;function l(Ie,xe){if(!(Ie instanceof xe))throw new TypeError("Cannot call a class as a function")}function u(Ie,xe){for(var ke=0;ke<xe.length;ke++){var vt=xe[ke];vt.enumerable=vt.enumerable||!1,vt.configurable=!0,"value"in vt&&(vt.writable=!0),Object.defineProperty(Ie,f(vt.key),vt)}}function c(Ie,xe,ke){return xe&&u(Ie.prototype,xe),ke&&u(Ie,ke),Object.defineProperty(Ie,"prototype",{writable:!1}),Ie}function f(Ie){var xe=h(Ie,"string");return S(xe)=="symbol"?xe:xe+""}function h(Ie,xe){if(S(Ie)!="object"||!Ie)return Ie;var ke=Ie[Symbol.toPrimitive];if(ke!==void 0){var vt=ke.call(Ie,xe||"default");if(S(vt)!="object")return vt;throw new TypeError("@@toPrimitive must return a primitive value.")}return(xe==="string"?String:Number)(Ie)}function d(Ie,xe,ke){return xe=p(xe),v(Ie,b()?Reflect.construct(xe,ke||[],p(Ie).constructor):xe.apply(Ie,ke))}function v(Ie,xe){if(xe&&(S(xe)=="object"||typeof xe=="function"))return xe;if(xe!==void 0)throw new TypeError("Derived constructors may only return object or undefined");return _(Ie)}function _(Ie){if(Ie===void 0)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return Ie}function b(){try{var Ie=!Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){}))}catch(xe){}return(b=function(){return!!Ie})()}function p(Ie){return p=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(xe){return xe.__proto__||Object.getPrototypeOf(xe)},p(Ie)}function k(Ie,xe){if(typeof xe!="function"&&xe!==null)throw new TypeError("Super expression must either be null or a function");Ie.prototype=Object.create(xe&&xe.prototype,{constructor:{value:Ie,writable:!0,configurable:!0}}),Object.defineProperty(Ie,"prototype",{writable:!1}),xe&&E(Ie,xe)}function E(Ie,xe){return E=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(ke,vt){return ke.__proto__=vt,ke},E(Ie,xe)}function S(Ie){"@babel/helpers - typeof";return S=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(xe){return typeof xe}:function(xe){return xe&&typeof Symbol=="function"&&xe.constructor===Symbol&&xe!==Symbol.prototype?"symbol":typeof xe},S(Ie)}var L=o(7507),x=o(3778),C=typeof Symbol=="function"&&typeof Symbol.for=="function"?Symbol.for("nodejs.util.inspect.custom"):null;a.hp=T,s=_e,a.IS=50;var M=2147483647;s=M,T.TYPED_ARRAY_SUPPORT=g(),!T.TYPED_ARRAY_SUPPORT&&typeof console!="undefined"&&typeof console.error=="function"&&console.error("This browser lacks typed array (Uint8Array) support which is required by `buffer` v5.x. Use `buffer` v4.x if you require old browser support.");function g(){try{var Ie=new Uint8Array(1),xe={foo:function(){return 42}};return Object.setPrototypeOf(xe,Uint8Array.prototype),Object.setPrototypeOf(Ie,xe),Ie.foo()===42}catch(ke){return!1}}Object.defineProperty(T.prototype,"parent",{enumerable:!0,get:function(){if(T.isBuffer(this))return this.buffer}}),Object.defineProperty(T.prototype,"offset",{enumerable:!0,get:function(){if(T.isBuffer(this))return this.byteOffset}});function P(Ie){if(Ie>M)throw new RangeError('The value "'+Ie+'" is invalid for option "size"');var xe=new Uint8Array(Ie);return Object.setPrototypeOf(xe,T.prototype),xe}function T(Ie,xe,ke){if(typeof Ie=="number"){if(typeof xe=="string")throw new TypeError('The "string" argument must be of type string. Received type number');return G(Ie)}return z(Ie,xe,ke)}T.poolSize=8192;function z(Ie,xe,ke){if(typeof Ie=="string")return Z(Ie,xe);if(ArrayBuffer.isView(Ie))return N(Ie);if(Ie==null)throw new TypeError("The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type "+S(Ie));if(Ge(Ie,ArrayBuffer)||Ie&&Ge(Ie.buffer,ArrayBuffer)||typeof SharedArrayBuffer!="undefined"&&(Ge(Ie,SharedArrayBuffer)||Ie&&Ge(Ie.buffer,SharedArrayBuffer)))return H(Ie,xe,ke);if(typeof Ie=="number")throw new TypeError('The "value" argument must not be of type number. Received type number');var vt=Ie.valueOf&&Ie.valueOf();if(vt!=null&&vt!==Ie)return T.from(vt,xe,ke);var ir=te(Ie);if(ir)return ir;if(typeof Symbol!="undefined"&&Symbol.toPrimitive!=null&&typeof Ie[Symbol.toPrimitive]=="function")return T.from(Ie[Symbol.toPrimitive]("string"),xe,ke);throw new TypeError("The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type "+S(Ie))}T.from=function(Ie,xe,ke){return z(Ie,xe,ke)},Object.setPrototypeOf(T.prototype,Uint8Array.prototype),Object.setPrototypeOf(T,Uint8Array);function O(Ie){if(typeof Ie!="number")throw new TypeError('"size" argument must be of type number');if(Ie<0)throw new RangeError('The value "'+Ie+'" is invalid for option "size"')}function V(Ie,xe,ke){return O(Ie),Ie<=0?P(Ie):xe!==void 0?typeof ke=="string"?P(Ie).fill(xe,ke):P(Ie).fill(xe):P(Ie)}T.alloc=function(Ie,xe,ke){return V(Ie,xe,ke)};function G(Ie){return O(Ie),P(Ie<0?0:oe(Ie)|0)}T.allocUnsafe=function(Ie){return G(Ie)},T.allocUnsafeSlow=function(Ie){return G(Ie)};function Z(Ie,xe){if((typeof xe!="string"||xe==="")&&(xe="utf8"),!T.isEncoding(xe))throw new TypeError("Unknown encoding: "+xe);var ke=Ee(Ie,xe)|0,vt=P(ke),ir=vt.write(Ie,xe);return ir!==ke&&(vt=vt.slice(0,ir)),vt}function j(Ie){for(var xe=Ie.length<0?0:oe(Ie.length)|0,ke=P(xe),vt=0;vt<xe;vt+=1)ke[vt]=Ie[vt]&255;return ke}function N(Ie){if(Ge(Ie,Uint8Array)){var xe=new Uint8Array(Ie);return H(xe.buffer,xe.byteOffset,xe.byteLength)}return j(Ie)}function H(Ie,xe,ke){if(xe<0||Ie.byteLength<xe)throw new RangeError('"offset" is outside of buffer bounds');if(Ie.byteLength<xe+(ke||0))throw new RangeError('"length" is outside of buffer bounds');var vt;return xe===void 0&&ke===void 0?vt=new Uint8Array(Ie):ke===void 0?vt=new Uint8Array(Ie,xe):vt=new Uint8Array(Ie,xe,ke),Object.setPrototypeOf(vt,T.prototype),vt}function te(Ie){if(T.isBuffer(Ie)){var xe=oe(Ie.length)|0,ke=P(xe);return ke.length===0||Ie.copy(ke,0,0,xe),ke}if(Ie.length!==void 0)return typeof Ie.length!="number"||Je(Ie.length)?P(0):j(Ie);if(Ie.type==="Buffer"&&Array.isArray(Ie.data))return j(Ie.data)}function oe(Ie){if(Ie>=M)throw new RangeError("Attempt to allocate Buffer larger than maximum size: 0x"+M.toString(16)+" bytes");return Ie|0}function _e(Ie){return+Ie!=Ie&&(Ie=0),T.alloc(+Ie)}T.isBuffer=function(xe){return xe!=null&&xe._isBuffer===!0&&xe!==T.prototype},T.compare=function(xe,ke){if(Ge(xe,Uint8Array)&&(xe=T.from(xe,xe.offset,xe.byteLength)),Ge(ke,Uint8Array)&&(ke=T.from(ke,ke.offset,ke.byteLength)),!T.isBuffer(xe)||!T.isBuffer(ke))throw new TypeError('The "buf1", "buf2" arguments must be one of type Buffer or Uint8Array');if(xe===ke)return 0;for(var vt=xe.length,ir=ke.length,ar=0,vr=Math.min(vt,ir);ar<vr;++ar)if(xe[ar]!==ke[ar]){vt=xe[ar],ir=ke[ar];break}return vt<ir?-1:ir<vt?1:0},T.isEncoding=function(xe){switch(String(xe).toLowerCase()){case"hex":case"utf8":case"utf-8":case"ascii":case"latin1":case"binary":case"base64":case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return!0;default:return!1}},T.concat=function(xe,ke){if(!Array.isArray(xe))throw new TypeError('"list" argument must be an Array of Buffers');if(xe.length===0)return T.alloc(0);var vt;if(ke===void 0)for(ke=0,vt=0;vt<xe.length;++vt)ke+=xe[vt].length;var ir=T.allocUnsafe(ke),ar=0;for(vt=0;vt<xe.length;++vt){var vr=xe[vt];if(Ge(vr,Uint8Array))ar+vr.length>ir.length?(T.isBuffer(vr)||(vr=T.from(vr)),vr.copy(ir,ar)):Uint8Array.prototype.set.call(ir,vr,ar);else if(T.isBuffer(vr))vr.copy(ir,ar);else throw new TypeError('"list" argument must be an Array of Buffers');ar+=vr.length}return ir};function Ee(Ie,xe){if(T.isBuffer(Ie))return Ie.length;if(ArrayBuffer.isView(Ie)||Ge(Ie,ArrayBuffer))return Ie.byteLength;if(typeof Ie!="string")throw new TypeError('The "string" argument must be one of type string, Buffer, or ArrayBuffer. Received type '+S(Ie));var ke=Ie.length,vt=arguments.length>2&&arguments[2]===!0;if(!vt&&ke===0)return 0;for(var ir=!1;;)switch(xe){case"ascii":case"latin1":case"binary":return ke;case"utf8":case"utf-8":return Lr(Ie).length;case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return ke*2;case"hex":return ke>>>1;case"base64":return Vr(Ie).length;default:if(ir)return vt?-1:Lr(Ie).length;xe=(""+xe).toLowerCase(),ir=!0}}T.byteLength=Ee;function Ce(Ie,xe,ke){var vt=!1;if((xe===void 0||xe<0)&&(xe=0),xe>this.length||((ke===void 0||ke>this.length)&&(ke=this.length),ke<=0)||(ke>>>=0,xe>>>=0,ke<=xe))return"";for(Ie||(Ie="utf8");;)switch(Ie){case"hex":return st(this,xe,ke);case"utf8":case"utf-8":return ce(this,xe,ke);case"ascii":return pt(this,xe,ke);case"latin1":case"binary":return Zt(this,xe,ke);case"base64":return Re(this,xe,ke);case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return lt(this,xe,ke);default:if(vt)throw new TypeError("Unknown encoding: "+Ie);Ie=(Ie+"").toLowerCase(),vt=!0}}T.prototype._isBuffer=!0;function me(Ie,xe,ke){var vt=Ie[xe];Ie[xe]=Ie[ke],Ie[ke]=vt}T.prototype.swap16=function(){var xe=this.length;if(xe%2!==0)throw new RangeError("Buffer size must be a multiple of 16-bits");for(var ke=0;ke<xe;ke+=2)me(this,ke,ke+1);return this},T.prototype.swap32=function(){var xe=this.length;if(xe%4!==0)throw new RangeError("Buffer size must be a multiple of 32-bits");for(var ke=0;ke<xe;ke+=4)me(this,ke,ke+3),me(this,ke+1,ke+2);return this},T.prototype.swap64=function(){var xe=this.length;if(xe%8!==0)throw new RangeError("Buffer size must be a multiple of 64-bits");for(var ke=0;ke<xe;ke+=8)me(this,ke,ke+7),me(this,ke+1,ke+6),me(this,ke+2,ke+5),me(this,ke+3,ke+4);return this},T.prototype.toString=function(){var xe=this.length;return xe===0?"":arguments.length===0?ce(this,0,xe):Ce.apply(this,arguments)},T.prototype.toLocaleString=T.prototype.toString,T.prototype.equals=function(xe){if(!T.isBuffer(xe))throw new TypeError("Argument must be a Buffer");return this===xe?!0:T.compare(this,xe)===0},T.prototype.inspect=function(){var xe="",ke=a.IS;return xe=this.toString("hex",0,ke).replace(/(.{2})/g,"$1 ").trim(),this.length>ke&&(xe+=" ... "),"<Buffer "+xe+">"},C&&(T.prototype[C]=T.prototype.inspect),T.prototype.compare=function(xe,ke,vt,ir,ar){if(Ge(xe,Uint8Array)&&(xe=T.from(xe,xe.offset,xe.byteLength)),!T.isBuffer(xe))throw new TypeError('The "target" argument must be one of type Buffer or Uint8Array. Received type '+S(xe));if(ke===void 0&&(ke=0),vt===void 0&&(vt=xe?xe.length:0),ir===void 0&&(ir=0),ar===void 0&&(ar=this.length),ke<0||vt>xe.length||ir<0||ar>this.length)throw new RangeError("out of range index");if(ir>=ar&&ke>=vt)return 0;if(ir>=ar)return-1;if(ke>=vt)return 1;if(ke>>>=0,vt>>>=0,ir>>>=0,ar>>>=0,this===xe)return 0;for(var vr=ar-ir,ii=vt-ke,pi=Math.min(vr,ii),$r=this.slice(ir,ar),di=xe.slice(ke,vt),ji=0;ji<pi;++ji)if($r[ji]!==di[ji]){vr=$r[ji],ii=di[ji];break}return vr<ii?-1:ii<vr?1:0};function ie(Ie,xe,ke,vt,ir){if(Ie.length===0)return-1;if(typeof ke=="string"?(vt=ke,ke=0):ke>2147483647?ke=2147483647:ke<-2147483648&&(ke=-2147483648),ke=+ke,Je(ke)&&(ke=ir?0:Ie.length-1),ke<0&&(ke=Ie.length+ke),ke>=Ie.length){if(ir)return-1;ke=Ie.length-1}else if(ke<0)if(ir)ke=0;else return-1;if(typeof xe=="string"&&(xe=T.from(xe,vt)),T.isBuffer(xe))return xe.length===0?-1:Se(Ie,xe,ke,vt,ir);if(typeof xe=="number")return xe=xe&255,typeof Uint8Array.prototype.indexOf=="function"?ir?Uint8Array.prototype.indexOf.call(Ie,xe,ke):Uint8Array.prototype.lastIndexOf.call(Ie,xe,ke):Se(Ie,[xe],ke,vt,ir);throw new TypeError("val must be string, number or Buffer")}function Se(Ie,xe,ke,vt,ir){var ar=1,vr=Ie.length,ii=xe.length;if(vt!==void 0&&(vt=String(vt).toLowerCase(),vt==="ucs2"||vt==="ucs-2"||vt==="utf16le"||vt==="utf-16le")){if(Ie.length<2||xe.length<2)return-1;ar=2,vr/=2,ii/=2,ke/=2}function pi(wi,On){return ar===1?wi[On]:wi.readUInt16BE(On*ar)}var $r;if(ir){var di=-1;for($r=ke;$r<vr;$r++)if(pi(Ie,$r)===pi(xe,di===-1?0:$r-di)){if(di===-1&&(di=$r),$r-di+1===ii)return di*ar}else di!==-1&&($r-=$r-di),di=-1}else for(ke+ii>vr&&(ke=vr-ii),$r=ke;$r>=0;$r--){for(var ji=!0,In=0;In<ii;In++)if(pi(Ie,$r+In)!==pi(xe,In)){ji=!1;break}if(ji)return $r}return-1}T.prototype.includes=function(xe,ke,vt){return this.indexOf(xe,ke,vt)!==-1},T.prototype.indexOf=function(xe,ke,vt){return ie(this,xe,ke,vt,!0)},T.prototype.lastIndexOf=function(xe,ke,vt){return ie(this,xe,ke,vt,!1)};function Le(Ie,xe,ke,vt){ke=Number(ke)||0;var ir=Ie.length-ke;vt?(vt=Number(vt),vt>ir&&(vt=ir)):vt=ir;var ar=xe.length;vt>ar/2&&(vt=ar/2);var vr;for(vr=0;vr<vt;++vr){var ii=parseInt(xe.substr(vr*2,2),16);if(Je(ii))return vr;Ie[ke+vr]=ii}return vr}function Ae(Ie,xe,ke,vt){return dt(Lr(xe,Ie.length-ke),Ie,ke,vt)}function Fe(Ie,xe,ke,vt){return dt(ti(xe),Ie,ke,vt)}function Pe(Ie,xe,ke,vt){return dt(Vr(xe),Ie,ke,vt)}function ge(Ie,xe,ke,vt){return dt(Br(xe,Ie.length-ke),Ie,ke,vt)}T.prototype.write=function(xe,ke,vt,ir){if(ke===void 0)ir="utf8",vt=this.length,ke=0;else if(vt===void 0&&typeof ke=="string")ir=ke,vt=this.length,ke=0;else if(isFinite(ke))ke=ke>>>0,isFinite(vt)?(vt=vt>>>0,ir===void 0&&(ir="utf8")):(ir=vt,vt=void 0);else throw new Error("Buffer.write(string, encoding, offset[, length]) is no longer supported");var ar=this.length-ke;if((vt===void 0||vt>ar)&&(vt=ar),xe.length>0&&(vt<0||ke<0)||ke>this.length)throw new RangeError("Attempt to write outside buffer bounds");ir||(ir="utf8");for(var vr=!1;;)switch(ir){case"hex":return Le(this,xe,ke,vt);case"utf8":case"utf-8":return Ae(this,xe,ke,vt);case"ascii":case"latin1":case"binary":return Fe(this,xe,ke,vt);case"base64":return Pe(this,xe,ke,vt);case"ucs2":case"ucs-2":case"utf16le":case"utf-16le":return ge(this,xe,ke,vt);default:if(vr)throw new TypeError("Unknown encoding: "+ir);ir=(""+ir).toLowerCase(),vr=!0}},T.prototype.toJSON=function(){return{type:"Buffer",data:Array.prototype.slice.call(this._arr||this,0)}};function Re(Ie,xe,ke){return xe===0&&ke===Ie.length?L.fromByteArray(Ie):L.fromByteArray(Ie.slice(xe,ke))}function ce(Ie,xe,ke){ke=Math.min(Ie.length,ke);for(var vt=[],ir=xe;ir<ke;){var ar=Ie[ir],vr=null,ii=ar>239?4:ar>223?3:ar>191?2:1;if(ir+ii<=ke){var pi=void 0,$r=void 0,di=void 0,ji=void 0;switch(ii){case 1:ar<128&&(vr=ar);break;case 2:pi=Ie[ir+1],(pi&192)===128&&(ji=(ar&31)<<6|pi&63,ji>127&&(vr=ji));break;case 3:pi=Ie[ir+1],$r=Ie[ir+2],(pi&192)===128&&($r&192)===128&&(ji=(ar&15)<<12|(pi&63)<<6|$r&63,ji>2047&&(ji<55296||ji>57343)&&(vr=ji));break;case 4:pi=Ie[ir+1],$r=Ie[ir+2],di=Ie[ir+3],(pi&192)===128&&($r&192)===128&&(di&192)===128&&(ji=(ar&15)<<18|(pi&63)<<12|($r&63)<<6|di&63,ji>65535&&ji<1114112&&(vr=ji))}}vr===null?(vr=65533,ii=1):vr>65535&&(vr-=65536,vt.push(vr>>>10&1023|55296),vr=56320|vr&1023),vt.push(vr),ir+=ii}return ut(vt)}var Ze=4096;function ut(Ie){var xe=Ie.length;if(xe<=Ze)return String.fromCharCode.apply(String,Ie);for(var ke="",vt=0;vt<xe;)ke+=String.fromCharCode.apply(String,Ie.slice(vt,vt+=Ze));return ke}function pt(Ie,xe,ke){var vt="";ke=Math.min(Ie.length,ke);for(var ir=xe;ir<ke;++ir)vt+=String.fromCharCode(Ie[ir]&127);return vt}function Zt(Ie,xe,ke){var vt="";ke=Math.min(Ie.length,ke);for(var ir=xe;ir<ke;++ir)vt+=String.fromCharCode(Ie[ir]);return vt}function st(Ie,xe,ke){var vt=Ie.length;(!xe||xe<0)&&(xe=0),(!ke||ke<0||ke>vt)&&(ke=vt);for(var ir="",ar=xe;ar<ke;++ar)ir+=je[Ie[ar]];return ir}function lt(Ie,xe,ke){for(var vt=Ie.slice(xe,ke),ir="",ar=0;ar<vt.length-1;ar+=2)ir+=String.fromCharCode(vt[ar]+vt[ar+1]*256);return ir}T.prototype.slice=function(xe,ke){var vt=this.length;xe=~~xe,ke=ke===void 0?vt:~~ke,xe<0?(xe+=vt,xe<0&&(xe=0)):xe>vt&&(xe=vt),ke<0?(ke+=vt,ke<0&&(ke=0)):ke>vt&&(ke=vt),ke<xe&&(ke=xe);var ir=this.subarray(xe,ke);return Object.setPrototypeOf(ir,T.prototype),ir};function Gt(Ie,xe,ke){if(Ie%1!==0||Ie<0)throw new RangeError("offset is not uint");if(Ie+xe>ke)throw new RangeError("Trying to access beyond buffer length")}T.prototype.readUintLE=T.prototype.readUIntLE=function(xe,ke,vt){xe=xe>>>0,ke=ke>>>0,vt||Gt(xe,ke,this.length);for(var ir=this[xe],ar=1,vr=0;++vr<ke&&(ar*=256);)ir+=this[xe+vr]*ar;return ir},T.prototype.readUintBE=T.prototype.readUIntBE=function(xe,ke,vt){xe=xe>>>0,ke=ke>>>0,vt||Gt(xe,ke,this.length);for(var ir=this[xe+--ke],ar=1;ke>0&&(ar*=256);)ir+=this[xe+--ke]*ar;return ir},T.prototype.readUint8=T.prototype.readUInt8=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,1,this.length),this[xe]},T.prototype.readUint16LE=T.prototype.readUInt16LE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,2,this.length),this[xe]|this[xe+1]<<8},T.prototype.readUint16BE=T.prototype.readUInt16BE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,2,this.length),this[xe]<<8|this[xe+1]},T.prototype.readUint32LE=T.prototype.readUInt32LE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),(this[xe]|this[xe+1]<<8|this[xe+2]<<16)+this[xe+3]*16777216},T.prototype.readUint32BE=T.prototype.readUInt32BE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),this[xe]*16777216+(this[xe+1]<<16|this[xe+2]<<8|this[xe+3])},T.prototype.readBigUInt64LE=tt(function(xe){xe=xe>>>0,mt(xe,"offset");var ke=this[xe],vt=this[xe+7];(ke===void 0||vt===void 0)&&er(xe,this.length-8);var ir=ke+this[++xe]*Math.pow(2,8)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,24),ar=this[++xe]+this[++xe]*Math.pow(2,8)+this[++xe]*Math.pow(2,16)+vt*Math.pow(2,24);return BigInt(ir)+(BigInt(ar)<<BigInt(32))}),T.prototype.readBigUInt64BE=tt(function(xe){xe=xe>>>0,mt(xe,"offset");var ke=this[xe],vt=this[xe+7];(ke===void 0||vt===void 0)&&er(xe,this.length-8);var ir=ke*Math.pow(2,24)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,8)+this[++xe],ar=this[++xe]*Math.pow(2,24)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,8)+vt;return(BigInt(ir)<<BigInt(32))+BigInt(ar)}),T.prototype.readIntLE=function(xe,ke,vt){xe=xe>>>0,ke=ke>>>0,vt||Gt(xe,ke,this.length);for(var ir=this[xe],ar=1,vr=0;++vr<ke&&(ar*=256);)ir+=this[xe+vr]*ar;return ar*=128,ir>=ar&&(ir-=Math.pow(2,8*ke)),ir},T.prototype.readIntBE=function(xe,ke,vt){xe=xe>>>0,ke=ke>>>0,vt||Gt(xe,ke,this.length);for(var ir=ke,ar=1,vr=this[xe+--ir];ir>0&&(ar*=256);)vr+=this[xe+--ir]*ar;return ar*=128,vr>=ar&&(vr-=Math.pow(2,8*ke)),vr},T.prototype.readInt8=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,1,this.length),this[xe]&128?(255-this[xe]+1)*-1:this[xe]},T.prototype.readInt16LE=function(xe,ke){xe=xe>>>0,ke||Gt(xe,2,this.length);var vt=this[xe]|this[xe+1]<<8;return vt&32768?vt|4294901760:vt},T.prototype.readInt16BE=function(xe,ke){xe=xe>>>0,ke||Gt(xe,2,this.length);var vt=this[xe+1]|this[xe]<<8;return vt&32768?vt|4294901760:vt},T.prototype.readInt32LE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),this[xe]|this[xe+1]<<8|this[xe+2]<<16|this[xe+3]<<24},T.prototype.readInt32BE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),this[xe]<<24|this[xe+1]<<16|this[xe+2]<<8|this[xe+3]},T.prototype.readBigInt64LE=tt(function(xe){xe=xe>>>0,mt(xe,"offset");var ke=this[xe],vt=this[xe+7];(ke===void 0||vt===void 0)&&er(xe,this.length-8);var ir=this[xe+4]+this[xe+5]*Math.pow(2,8)+this[xe+6]*Math.pow(2,16)+(vt<<24);return(BigInt(ir)<<BigInt(32))+BigInt(ke+this[++xe]*Math.pow(2,8)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,24))}),T.prototype.readBigInt64BE=tt(function(xe){xe=xe>>>0,mt(xe,"offset");var ke=this[xe],vt=this[xe+7];(ke===void 0||vt===void 0)&&er(xe,this.length-8);var ir=(ke<<24)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,8)+this[++xe];return(BigInt(ir)<<BigInt(32))+BigInt(this[++xe]*Math.pow(2,24)+this[++xe]*Math.pow(2,16)+this[++xe]*Math.pow(2,8)+vt)}),T.prototype.readFloatLE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),x.read(this,xe,!0,23,4)},T.prototype.readFloatBE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,4,this.length),x.read(this,xe,!1,23,4)},T.prototype.readDoubleLE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,8,this.length),x.read(this,xe,!0,52,8)},T.prototype.readDoubleBE=function(xe,ke){return xe=xe>>>0,ke||Gt(xe,8,this.length),x.read(this,xe,!1,52,8)};function Nt(Ie,xe,ke,vt,ir,ar){if(!T.isBuffer(Ie))throw new TypeError('"buffer" argument must be a Buffer instance');if(xe>ir||xe<ar)throw new RangeError('"value" argument is out of bounds');if(ke+vt>Ie.length)throw new RangeError("Index out of range")}T.prototype.writeUintLE=T.prototype.writeUIntLE=function(xe,ke,vt,ir){if(xe=+xe,ke=ke>>>0,vt=vt>>>0,!ir){var ar=Math.pow(2,8*vt)-1;Nt(this,xe,ke,vt,ar,0)}var vr=1,ii=0;for(this[ke]=xe&255;++ii<vt&&(vr*=256);)this[ke+ii]=xe/vr&255;return ke+vt},T.prototype.writeUintBE=T.prototype.writeUIntBE=function(xe,ke,vt,ir){if(xe=+xe,ke=ke>>>0,vt=vt>>>0,!ir){var ar=Math.pow(2,8*vt)-1;Nt(this,xe,ke,vt,ar,0)}var vr=vt-1,ii=1;for(this[ke+vr]=xe&255;--vr>=0&&(ii*=256);)this[ke+vr]=xe/ii&255;return ke+vt},T.prototype.writeUint8=T.prototype.writeUInt8=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,1,255,0),this[ke]=xe&255,ke+1},T.prototype.writeUint16LE=T.prototype.writeUInt16LE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,2,65535,0),this[ke]=xe&255,this[ke+1]=xe>>>8,ke+2},T.prototype.writeUint16BE=T.prototype.writeUInt16BE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,2,65535,0),this[ke]=xe>>>8,this[ke+1]=xe&255,ke+2},T.prototype.writeUint32LE=T.prototype.writeUInt32LE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,4,4294967295,0),this[ke+3]=xe>>>24,this[ke+2]=xe>>>16,this[ke+1]=xe>>>8,this[ke]=xe&255,ke+4},T.prototype.writeUint32BE=T.prototype.writeUInt32BE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,4,4294967295,0),this[ke]=xe>>>24,this[ke+1]=xe>>>16,this[ke+2]=xe>>>8,this[ke+3]=xe&255,ke+4};function Jt(Ie,xe,ke,vt,ir){It(xe,vt,ir,Ie,ke,7);var ar=Number(xe&BigInt(4294967295));Ie[ke++]=ar,ar=ar>>8,Ie[ke++]=ar,ar=ar>>8,Ie[ke++]=ar,ar=ar>>8,Ie[ke++]=ar;var vr=Number(xe>>BigInt(32)&BigInt(4294967295));return Ie[ke++]=vr,vr=vr>>8,Ie[ke++]=vr,vr=vr>>8,Ie[ke++]=vr,vr=vr>>8,Ie[ke++]=vr,ke}function sr(Ie,xe,ke,vt,ir){It(xe,vt,ir,Ie,ke,7);var ar=Number(xe&BigInt(4294967295));Ie[ke+7]=ar,ar=ar>>8,Ie[ke+6]=ar,ar=ar>>8,Ie[ke+5]=ar,ar=ar>>8,Ie[ke+4]=ar;var vr=Number(xe>>BigInt(32)&BigInt(4294967295));return Ie[ke+3]=vr,vr=vr>>8,Ie[ke+2]=vr,vr=vr>>8,Ie[ke+1]=vr,vr=vr>>8,Ie[ke]=vr,ke+8}T.prototype.writeBigUInt64LE=tt(function(xe){var ke=arguments.length>1&&arguments[1]!==void 0?arguments[1]:0;return Jt(this,xe,ke,BigInt(0),BigInt("0xffffffffffffffff"))}),T.prototype.writeBigUInt64BE=tt(function(xe){var ke=arguments.length>1&&arguments[1]!==void 0?arguments[1]:0;return sr(this,xe,ke,BigInt(0),BigInt("0xffffffffffffffff"))}),T.prototype.writeIntLE=function(xe,ke,vt,ir){if(xe=+xe,ke=ke>>>0,!ir){var ar=Math.pow(2,8*vt-1);Nt(this,xe,ke,vt,ar-1,-ar)}var vr=0,ii=1,pi=0;for(this[ke]=xe&255;++vr<vt&&(ii*=256);)xe<0&&pi===0&&this[ke+vr-1]!==0&&(pi=1),this[ke+vr]=(xe/ii>>0)-pi&255;return ke+vt},T.prototype.writeIntBE=function(xe,ke,vt,ir){if(xe=+xe,ke=ke>>>0,!ir){var ar=Math.pow(2,8*vt-1);Nt(this,xe,ke,vt,ar-1,-ar)}var vr=vt-1,ii=1,pi=0;for(this[ke+vr]=xe&255;--vr>=0&&(ii*=256);)xe<0&&pi===0&&this[ke+vr+1]!==0&&(pi=1),this[ke+vr]=(xe/ii>>0)-pi&255;return ke+vt},T.prototype.writeInt8=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,1,127,-128),xe<0&&(xe=255+xe+1),this[ke]=xe&255,ke+1},T.prototype.writeInt16LE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,2,32767,-32768),this[ke]=xe&255,this[ke+1]=xe>>>8,ke+2},T.prototype.writeInt16BE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,2,32767,-32768),this[ke]=xe>>>8,this[ke+1]=xe&255,ke+2},T.prototype.writeInt32LE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,4,2147483647,-2147483648),this[ke]=xe&255,this[ke+1]=xe>>>8,this[ke+2]=xe>>>16,this[ke+3]=xe>>>24,ke+4},T.prototype.writeInt32BE=function(xe,ke,vt){return xe=+xe,ke=ke>>>0,vt||Nt(this,xe,ke,4,2147483647,-2147483648),xe<0&&(xe=4294967295+xe+1),this[ke]=xe>>>24,this[ke+1]=xe>>>16,this[ke+2]=xe>>>8,this[ke+3]=xe&255,ke+4},T.prototype.writeBigInt64LE=tt(function(xe){var ke=arguments.length>1&&arguments[1]!==void 0?arguments[1]:0;return Jt(this,xe,ke,-BigInt("0x8000000000000000"),BigInt("0x7fffffffffffffff"))}),T.prototype.writeBigInt64BE=tt(function(xe){var ke=arguments.length>1&&arguments[1]!==void 0?arguments[1]:0;return sr(this,xe,ke,-BigInt("0x8000000000000000"),BigInt("0x7fffffffffffffff"))});function wr(Ie,xe,ke,vt,ir,ar){if(ke+vt>Ie.length)throw new RangeError("Index out of range");if(ke<0)throw new RangeError("Index out of range")}function cr(Ie,xe,ke,vt,ir){return xe=+xe,ke=ke>>>0,ir||wr(Ie,xe,ke,4,34028234663852886e22,-34028234663852886e22),x.write(Ie,xe,ke,vt,23,4),ke+4}T.prototype.writeFloatLE=function(xe,ke,vt){return cr(this,xe,ke,!0,vt)},T.prototype.writeFloatBE=function(xe,ke,vt){return cr(this,xe,ke,!1,vt)};function $e(Ie,xe,ke,vt,ir){return xe=+xe,ke=ke>>>0,ir||wr(Ie,xe,ke,8,17976931348623157e292,-17976931348623157e292),x.write(Ie,xe,ke,vt,52,8),ke+8}T.prototype.writeDoubleLE=function(xe,ke,vt){return $e(this,xe,ke,!0,vt)},T.prototype.writeDoubleBE=function(xe,ke,vt){return $e(this,xe,ke,!1,vt)},T.prototype.copy=function(xe,ke,vt,ir){if(!T.isBuffer(xe))throw new TypeError("argument should be a Buffer");if(vt||(vt=0),!ir&&ir!==0&&(ir=this.length),ke>=xe.length&&(ke=xe.length),ke||(ke=0),ir>0&&ir<vt&&(ir=vt),ir===vt||xe.length===0||this.length===0)return 0;if(ke<0)throw new RangeError("targetStart out of bounds");if(vt<0||vt>=this.length)throw new RangeError("Index out of range");if(ir<0)throw new RangeError("sourceEnd out of bounds");ir>this.length&&(ir=this.length),xe.length-ke<ir-vt&&(ir=xe.length-ke+vt);var ar=ir-vt;return this===xe&&typeof Uint8Array.prototype.copyWithin=="function"?this.copyWithin(ke,vt,ir):Uint8Array.prototype.set.call(xe,this.subarray(vt,ir),ke),ar},T.prototype.fill=function(xe,ke,vt,ir){if(typeof xe=="string"){if(typeof ke=="string"?(ir=ke,ke=0,vt=this.length):typeof vt=="string"&&(ir=vt,vt=this.length),ir!==void 0&&typeof ir!="string")throw new TypeError("encoding must be a string");if(typeof ir=="string"&&!T.isEncoding(ir))throw new TypeError("Unknown encoding: "+ir);if(xe.length===1){var ar=xe.charCodeAt(0);(ir==="utf8"&&ar<128||ir==="latin1")&&(xe=ar)}}else typeof xe=="number"?xe=xe&255:typeof xe=="boolean"&&(xe=Number(xe));if(ke<0||this.length<ke||this.length<vt)throw new RangeError("Out of range index");if(vt<=ke)return this;ke=ke>>>0,vt=vt===void 0?this.length:vt>>>0,xe||(xe=0);var vr;if(typeof xe=="number")for(vr=ke;vr<vt;++vr)this[vr]=xe;else{var ii=T.isBuffer(xe)?xe:T.from(xe,ir),pi=ii.length;if(pi===0)throw new TypeError('The value "'+xe+'" is invalid for argument "value"');for(vr=0;vr<vt-ke;++vr)this[vr+ke]=ii[vr%pi]}return this};var St={};function Qt(Ie,xe,ke){St[Ie]=function(vt){function ir(){var ar;return l(this,ir),ar=d(this,ir),Object.defineProperty(ar,"message",{value:xe.apply(ar,arguments),writable:!0,configurable:!0}),ar.name="".concat(ar.name," [").concat(Ie,"]"),ar.stack,delete ar.name,ar}return k(ir,vt),c(ir,[{key:"code",get:function(){return Ie},set:function(vr){Object.defineProperty(this,"code",{configurable:!0,enumerable:!0,value:vr,writable:!0})}},{key:"toString",value:function(){return"".concat(this.name," [").concat(Ie,"]: ").concat(this.message)}}])}(ke)}Qt("ERR_BUFFER_OUT_OF_BOUNDS",function(Ie){return Ie?"".concat(Ie," is outside of buffer bounds"):"Attempt to access memory outside buffer bounds"},RangeError),Qt("ERR_INVALID_ARG_TYPE",function(Ie,xe){return'The "'.concat(Ie,'" argument must be of type number. Received type ').concat(S(xe))},TypeError),Qt("ERR_OUT_OF_RANGE",function(Ie,xe,ke){var vt='The value of "'.concat(Ie,'" is out of range.'),ir=ke;return Number.isInteger(ke)&&Math.abs(ke)>Math.pow(2,32)?ir=Vt(String(ke)):typeof ke=="bigint"&&(ir=String(ke),(ke>Math.pow(BigInt(2),BigInt(32))||ke<-Math.pow(BigInt(2),BigInt(32)))&&(ir=Vt(ir)),ir+="n"),vt+=" It must be ".concat(xe,". Received ").concat(ir),vt},RangeError);function Vt(Ie){for(var xe="",ke=Ie.length,vt=Ie[0]==="-"?1:0;ke>=vt+4;ke-=3)xe="_".concat(Ie.slice(ke-3,ke)).concat(xe);return"".concat(Ie.slice(0,ke)).concat(xe)}function _t(Ie,xe,ke){mt(xe,"offset"),(Ie[xe]===void 0||Ie[xe+ke]===void 0)&&er(xe,Ie.length-(ke+1))}function It(Ie,xe,ke,vt,ir,ar){if(Ie>ke||Ie<xe){var vr=typeof xe=="bigint"?"n":"",ii;throw ar>3?xe===0||xe===BigInt(0)?ii=">= 0".concat(vr," and < 2").concat(vr," ** ").concat((ar+1)*8).concat(vr):ii=">= -(2".concat(vr," ** ").concat((ar+1)*8-1).concat(vr,") and < 2 ** ")+"".concat((ar+1)*8-1).concat(vr):ii=">= ".concat(xe).concat(vr," and <= ").concat(ke).concat(vr),new St.ERR_OUT_OF_RANGE("value",ii,Ie)}_t(vt,ir,ar)}function mt(Ie,xe){if(typeof Ie!="number")throw new St.ERR_INVALID_ARG_TYPE(xe,"number",Ie)}function er(Ie,xe,ke){throw Math.floor(Ie)!==Ie?(mt(Ie,ke),new St.ERR_OUT_OF_RANGE(ke||"offset","an integer",Ie)):xe<0?new St.ERR_BUFFER_OUT_OF_BOUNDS:new St.ERR_OUT_OF_RANGE(ke||"offset",">= ".concat(ke?1:0," and <= ").concat(xe),Ie)}var lr=/[^+/0-9A-Za-z-_]/g;function Tr(Ie){if(Ie=Ie.split("=")[0],Ie=Ie.trim().replace(lr,""),Ie.length<2)return"";for(;Ie.length%4!==0;)Ie=Ie+"=";return Ie}function Lr(Ie,xe){xe=xe||1/0;for(var ke,vt=Ie.length,ir=null,ar=[],vr=0;vr<vt;++vr){if(ke=Ie.charCodeAt(vr),ke>55295&&ke<57344){if(!ir){if(ke>56319){(xe-=3)>-1&&ar.push(239,191,189);continue}else if(vr+1===vt){(xe-=3)>-1&&ar.push(239,191,189);continue}ir=ke;continue}if(ke<56320){(xe-=3)>-1&&ar.push(239,191,189),ir=ke;continue}ke=(ir-55296<<10|ke-56320)+65536}else ir&&(xe-=3)>-1&&ar.push(239,191,189);if(ir=null,ke<128){if((xe-=1)<0)break;ar.push(ke)}else if(ke<2048){if((xe-=2)<0)break;ar.push(ke>>6|192,ke&63|128)}else if(ke<65536){if((xe-=3)<0)break;ar.push(ke>>12|224,ke>>6&63|128,ke&63|128)}else if(ke<1114112){if((xe-=4)<0)break;ar.push(ke>>18|240,ke>>12&63|128,ke>>6&63|128,ke&63|128)}else throw new Error("Invalid code point")}return ar}function ti(Ie){for(var xe=[],ke=0;ke<Ie.length;++ke)xe.push(Ie.charCodeAt(ke)&255);return xe}function Br(Ie,xe){for(var ke,vt,ir,ar=[],vr=0;vr<Ie.length&&!((xe-=2)<0);++vr)ke=Ie.charCodeAt(vr),vt=ke>>8,ir=ke%256,ar.push(ir),ar.push(vt);return ar}function Vr(Ie){return L.toByteArray(Tr(Ie))}function dt(Ie,xe,ke,vt){var ir;for(ir=0;ir<vt&&!(ir+ke>=xe.length||ir>=Ie.length);++ir)xe[ir+ke]=Ie[ir];return ir}function Ge(Ie,xe){return Ie instanceof xe||Ie!=null&&Ie.constructor!=null&&Ie.constructor.name!=null&&Ie.constructor.name===xe.name}function Je(Ie){return Ie!==Ie}var je=function(){for(var Ie="0123456789abcdef",xe=new Array(256),ke=0;ke<16;++ke)for(var vt=ke*16,ir=0;ir<16;++ir)xe[vt+ir]=Ie[ke]+Ie[ir];return xe}();function tt(Ie){return typeof BigInt=="undefined"?xt:Ie}function xt(){throw new Error("BigInt not supported")}},4844:function(i){i.exports=a;function a(o,s,l,u){return o[0]=s[0]+l[0]*u,o[1]=s[1]+l[1]*u,o[2]=s[2]+l[2]*u,o[3]=s[3]+l[3]*u,o}},4905:function(i,a,o){var s=o(5874);i.exports=l;function l(u,c){var f=s(c),h=[];return h=h.concat(f(u)),h=h.concat(f(null)),h}},4935:function(i,a,o){"use strict";i.exports=k;var s=o(2762),l=o(8116),u=o(4359),c=o(1879).Q,f=window||process.global||{},h=f.__TEXT_CACHE||{};f.__TEXT_CACHE={};var d=3;function v(E,S,L,x){this.gl=E,this.shader=S,this.buffer=L,this.vao=x,this.tickOffset=this.tickCount=this.labelOffset=this.labelCount=null}var _=v.prototype,b=[0,0];_.bind=function(E,S,L,x){this.vao.bind(),this.shader.bind();var C=this.shader.uniforms;C.model=E,C.view=S,C.projection=L,C.pixelScale=x,b[0]=this.gl.drawingBufferWidth,b[1]=this.gl.drawingBufferHeight,this.shader.uniforms.resolution=b},_.unbind=function(){this.vao.unbind()},_.update=function(E,S,L,x,C){var M=[];function g(H,te,oe,_e,Ee,Ce){var me=[oe.style,oe.weight,oe.variant,oe.family].join("_"),ie=h[me];ie||(ie=h[me]={});var Se=ie[te];Se||(Se=ie[te]=p(te,{triangles:!0,font:oe.family,fontStyle:oe.style,fontWeight:oe.weight,fontVariant:oe.variant,textAlign:"center",textBaseline:"middle",lineSpacing:Ee,styletags:Ce}));for(var Le=(_e||12)/12,Ae=Se.positions,Fe=Se.cells,Pe=0,ge=Fe.length;Pe<ge;++Pe)for(var Re=Fe[Pe],ce=2;ce>=0;--ce){var Ze=Ae[Re[ce]];M.push(Le*Ze[0],-Le*Ze[1],H)}}for(var P=[0,0,0],T=[0,0,0],z=[0,0,0],O=[0,0,0],V=1.25,G={breaklines:!0,bolds:!0,italics:!0,subscripts:!0,superscripts:!0},Z=0;Z<3;++Z){z[Z]=M.length/d|0,g(.5*(E[0][Z]+E[1][Z]),S[Z],L[Z],12,V,G),O[Z]=(M.length/d|0)-z[Z],P[Z]=M.length/d|0;for(var j=0;j<x[Z].length;++j)if(x[Z][j].text){var N={family:x[Z][j].font||C[Z].family,style:C[Z].fontStyle||C[Z].style,weight:C[Z].fontWeight||C[Z].weight,variant:C[Z].fontVariant||C[Z].variant};g(x[Z][j].x,x[Z][j].text,N,x[Z][j].fontSize||12,V,G)}T[Z]=(M.length/d|0)-P[Z]}this.buffer.update(M),this.tickOffset=P,this.tickCount=T,this.labelOffset=z,this.labelCount=O},_.drawTicks=function(E,S,L,x,C,M,g,P){this.tickCount[E]&&(this.shader.uniforms.axis=M,this.shader.uniforms.color=C,this.shader.uniforms.angle=L,this.shader.uniforms.scale=S,this.shader.uniforms.offset=x,this.shader.uniforms.alignDir=g,this.shader.uniforms.alignOpt=P,this.vao.draw(this.gl.TRIANGLES,this.tickCount[E],this.tickOffset[E]))},_.drawLabel=function(E,S,L,x,C,M,g,P){this.labelCount[E]&&(this.shader.uniforms.axis=M,this.shader.uniforms.color=C,this.shader.uniforms.angle=L,this.shader.uniforms.scale=S,this.shader.uniforms.offset=x,this.shader.uniforms.alignDir=g,this.shader.uniforms.alignOpt=P,this.vao.draw(this.gl.TRIANGLES,this.labelCount[E],this.labelOffset[E]))},_.dispose=function(){this.shader.dispose(),this.vao.dispose(),this.buffer.dispose()};function p(E,S){try{return u(E,S)}catch(L){return console.warn('error vectorizing text:"'+E+'" error:',L),{cells:[],positions:[]}}}function k(E,S,L,x,C,M){var g=s(E),P=l(E,[{buffer:g,size:3}]),T=c(E);T.attributes.position.location=0;var z=new v(E,T,g,P);return z.update(S,L,x,C,M),z}},5023:function(i,a,o){"use strict";var s=o(2478);i.exports=d;function l(v,_,b,p,k,E,S){this.cells=v,this.neighbor=_,this.flags=p,this.constraint=b,this.active=k,this.next=E,this.boundary=S}var u=l.prototype;function c(v,_){return v[0]-_[0]||v[1]-_[1]||v[2]-_[2]}u.locate=function(){var v=[0,0,0];return function(_,b,p){var k=_,E=b,S=p;return b<p?b<_&&(k=b,E=p,S=_):p<_&&(k=p,E=_,S=b),k<0?-1:(v[0]=k,v[1]=E,v[2]=S,s.eq(this.cells,v,c))}}();function f(v,_){for(var b=v.cells(),p=b.length,k=0;k<p;++k){var E=b[k],S=E[0],L=E[1],x=E[2];L<x?L<S&&(E[0]=L,E[1]=x,E[2]=S):x<S&&(E[0]=x,E[1]=S,E[2]=L)}b.sort(c);for(var C=new Array(p),k=0;k<C.length;++k)C[k]=0;var M=[],g=[],P=new Array(3*p),T=new Array(3*p),z=null;_&&(z=[]);for(var O=new l(b,P,T,C,M,g,z),k=0;k<p;++k)for(var E=b[k],V=0;V<3;++V){var S=E[V],L=E[(V+1)%3],G=P[3*k+V]=O.locate(L,S,v.opposite(L,S)),Z=T[3*k+V]=v.isConstraint(S,L);G<0&&(Z?g.push(k):(M.push(k),C[k]=1),_&&z.push([L,S,-1]))}return O}function h(v,_,b){for(var p=0,k=0;k<v.length;++k)_[k]===b&&(v[p++]=v[k]);return v.length=p,v}function d(v,_,b){var p=f(v,b);if(_===0)return b?p.cells.concat(p.boundary):p.cells;for(var k=1,E=p.active,S=p.next,L=p.flags,x=p.cells,C=p.constraint,M=p.neighbor;E.length>0||S.length>0;){for(;E.length>0;){var g=E.pop();if(L[g]!==-k){L[g]=k;for(var P=x[g],T=0;T<3;++T){var z=M[3*g+T];z>=0&&L[z]===0&&(C[3*g+T]?S.push(z):(E.push(z),L[z]=k))}}}var O=S;S=E,E=O,S.length=0,k=-k}var V=h(x,L,_);return b?V.concat(p.boundary):V}},5033:function(i){"use strict";i.exports=a;function a(o,s,l){var u=s||0,c=l||1;return[[o[12]+o[0],o[13]+o[1],o[14]+o[2],o[15]+o[3]],[o[12]-o[0],o[13]-o[1],o[14]-o[2],o[15]-o[3]],[o[12]+o[4],o[13]+o[5],o[14]+o[6],o[15]+o[7]],[o[12]-o[4],o[13]-o[5],o[14]-o[6],o[15]-o[7]],[u*o[12]+o[8],u*o[13]+o[9],u*o[14]+o[10],u*o[15]+o[11]],[c*o[12]-o[8],c*o[13]-o[9],c*o[14]-o[10],c*o[15]-o[11]]]}},5085:function(i,a,o){i.exports=k;var s=o(3250)[3],l=o(4209),u=o(3352),c=o(2478);function f(){return!0}function h(E){return function(S,L){var x=E[S];return x?!!x.queryPoint(L,f):!1}}function d(E){for(var S={},L=0;L<E.length;++L){var x=E[L],C=x[0][0],M=x[0][1],g=x[1][1],P=[Math.min(M,g),Math.max(M,g)];C in S?S[C].push(P):S[C]=[P]}for(var T={},z=Object.keys(S),L=0;L<z.length;++L){var O=S[z[L]];T[z[L]]=u(O)}return h(T)}function v(E,S){return function(L){var x=c.le(S,L[0]);if(x<0)return 1;var C=E[x];if(!C)if(x>0&&S[x]===L[0])C=E[x-1];else return 1;for(var M=1;C;){var g=C.key,P=s(L,g[0],g[1]);if(g[0][0]<g[1][0])if(P<0)C=C.left;else if(P>0)M=-1,C=C.right;else return 0;else if(P>0)C=C.left;else if(P<0)M=1,C=C.right;else return 0}return M}}function _(E){return 1}function b(E){return function(L){return E(L[0],L[1])?0:1}}function p(E,S){return function(x){return E(x[0],x[1])?0:S(x)}}function k(E){for(var S=E.length,L=[],x=[],C=0,M=0;M<S;++M)for(var g=E[M],P=g.length,T=P-1,z=0;z<P;T=z++){var O=g[T],V=g[z];O[0]===V[0]?x.push([O,V]):L.push([O,V])}if(L.length===0)return x.length===0?_:b(d(x));var G=l(L),Z=v(G.slabs,G.coordinates);return x.length===0?Z:p(d(x),Z)}},5091:function(i,a,o){"use strict";a.shader=k,a.program=E;var s=o(8866),l=o(2992),u=typeof WeakMap=="undefined"?o(606):WeakMap,c=new u,f=0;function h(S,L,x,C,M,g,P){this.id=S,this.src=L,this.type=x,this.shader=C,this.count=g,this.programs=[],this.cache=P}h.prototype.dispose=function(){if(--this.count===0){for(var S=this.cache,L=S.gl,x=this.programs,C=0,M=x.length;C<M;++C){var g=S.programs[x[C]];g&&(delete S.programs[C],L.deleteProgram(g))}L.deleteShader(this.shader),delete S.shaders[this.type===L.FRAGMENT_SHADER|0][this.src]}};function d(S){this.gl=S,this.shaders=[{},{}],this.programs={}}var v=d.prototype;function _(S,L,x){var C=S.createShader(L);if(S.shaderSource(C,x),S.compileShader(C),!S.getShaderParameter(C,S.COMPILE_STATUS)){var M=S.getShaderInfoLog(C);try{var g=l(M,x,L)}catch(P){throw console.warn("Failed to format compiler error: "+P),new s(M,`Error compiling shader:
+`+M)}throw new s(M,g.short,g.long)}return C}v.getShaderReference=function(S,L){var x=this.gl,C=this.shaders[S===x.FRAGMENT_SHADER|0],M=C[L];if(!M||!x.isShader(M.shader)){var g=_(x,S,L);M=C[L]=new h(f++,L,S,g,[],1,this)}else M.count+=1;return M};function b(S,L,x,C,M){var g=S.createProgram();S.attachShader(g,L),S.attachShader(g,x);for(var P=0;P<C.length;++P)S.bindAttribLocation(g,M[P],C[P]);if(S.linkProgram(g),!S.getProgramParameter(g,S.LINK_STATUS)){var T=S.getProgramInfoLog(g);throw new s(T,"Error linking program: "+T)}return g}v.getProgram=function(S,L,x,C){var M=[S.id,L.id,x.join(":"),C.join(":")].join("@"),g=this.programs[M];return(!g||!this.gl.isProgram(g))&&(this.programs[M]=g=b(this.gl,S.shader,L.shader,x,C),S.programs.push(M),L.programs.push(M)),g};function p(S){var L=c.get(S);return L||(L=new d(S),c.set(S,L)),L}function k(S,L,x){return p(S).getShaderReference(L,x)}function E(S,L,x,C,M){return p(S).getProgram(L,x,C,M)}},5093:function(i){i.exports=a;function a(o,s){return o[0]=-s[0],o[1]=-s[1],o[2]=-s[2],o}},5137:function(i,a,o){i.exports=l;var s=o(1091)();function l(u,c,f,h,d,v){var _,b;for(c||(c=3),f||(f=0),h?b=Math.min(h*c+f,u.length):b=u.length,_=f;_<b;_+=c)s[0]=u[_],s[1]=u[_+1],s[2]=u[_+2],d(s,s,v),u[_]=s[0],u[_+1]=s[1],u[_+2]=s[2];return u}},5171:function(i,a,o){var s=o(737);i.exports=function(u){return s[u]}},5177:function(i){i.exports=a;function a(o,s){var l=s[0],u=s[1],c=s[2],f=s[3],h=l*l+u*u+c*c+f*f;return h>0&&(h=1/Math.sqrt(h),o[0]=l*h,o[1]=u*h,o[2]=c*h,o[3]=f*h),o}},5202:function(i,a,o){"use strict";var s=o(1944),l=o(8210);i.exports=f,i.exports.positive=h,i.exports.negative=d;function u(v,_){var b=l(s(v,_),[_[_.length-1]]);return b[b.length-1]}function c(v,_,b,p){var k=p-_,E=-_/k;E<0?E=0:E>1&&(E=1);for(var S=1-E,L=v.length,x=new Array(L),C=0;C<L;++C)x[C]=E*v[C]+S*b[C];return x}function f(v,_){for(var b=[],p=[],k=u(v[v.length-1],_),E=v[v.length-1],S=v[0],L=0;L<v.length;++L,E=S){S=v[L];var x=u(S,_);if(k<0&&x>0||k>0&&x<0){var C=c(E,x,S,k);b.push(C),p.push(C.slice())}x<0?p.push(S.slice()):x>0?b.push(S.slice()):(b.push(S.slice()),p.push(S.slice())),k=x}return{positive:b,negative:p}}function h(v,_){for(var b=[],p=u(v[v.length-1],_),k=v[v.length-1],E=v[0],S=0;S<v.length;++S,k=E){E=v[S];var L=u(E,_);(p<0&&L>0||p>0&&L<0)&&b.push(c(k,L,E,p)),L>=0&&b.push(E.slice()),p=L}return b}function d(v,_){for(var b=[],p=u(v[v.length-1],_),k=v[v.length-1],E=v[0],S=0;S<v.length;++S,k=E){E=v[S];var L=u(E,_);(p<0&&L>0||p>0&&L<0)&&b.push(c(k,L,E,p)),L<=0&&b.push(E.slice()),p=L}return b}},5219:function(i){"use strict";i.exports=function(a){for(var o=a.length,s,l=0;l<o;l++)if(s=a.charCodeAt(l),(s<9||s>13)&&s!==32&&s!==133&&s!==160&&s!==5760&&s!==6158&&(s<8192||s>8205)&&s!==8232&&s!==8233&&s!==8239&&s!==8287&&s!==8288&&s!==12288&&s!==65279)return!1;return!0}},5250:function(i){"use strict";i.exports=o;var a=+(Math.pow(2,27)+1);function o(s,l,u){var c=s*l,f=a*s,h=f-s,d=f-h,v=s-d,_=a*l,b=_-l,p=_-b,k=l-p,E=c-d*p,S=E-v*p,L=S-d*k,x=v*k-L;return u?(u[0]=x,u[1]=c,u):[x,c]}},5298:function(i,a){"use strict";var o={"float64,2,1,0":function(){return function(v,_,b,p,k){var E=v[0],S=v[1],L=v[2],x=b[0],C=b[1],M=b[2];p|=0;var g=0,P=0,T=0,z=M,O=C-L*M,V=x-S*C;for(T=0;T<E;++T){for(P=0;P<S;++P){for(g=0;g<L;++g)_[p]/=k,p+=z;p+=O}p+=V}}},"uint8,2,0,1,float64,2,1,0":function(){return function(v,_,b,p,k,E,S,L){var x=v[0],C=v[1],M=v[2],g=b[0],P=b[1],T=b[2],z=E[0],O=E[1],V=E[2];p|=0,S|=0;for(var G=p,Z=S,j=v[0]|0;j>0;){j<64?(x=j,j=0):(x=64,j-=64);for(var N=v[1]|0;N>0;){N<64?(C=N,N=0):(C=64,N-=64),p=G+j*g+N*P,S=Z+j*z+N*O;var H=0,te=0,oe=0,_e=T,Ee=g-M*T,Ce=P-x*g,me=V,ie=z-M*V,Se=O-x*z;for(oe=0;oe<C;++oe){for(te=0;te<x;++te){for(H=0;H<M;++H)_[p]=k[S]*L,p+=_e,S+=me;p+=Ee,S+=ie}p+=Ce,S+=Se}}}}},"float32,1,0,float32,1,0":function(){return function(v,_,b,p,k,E,S){var L=v[0],x=v[1],C=b[0],M=b[1],g=E[0],P=E[1];p|=0,S|=0;var T=0,z=0,O=M,V=C-x*M,G=P,Z=g-x*P;for(z=0;z<L;++z){for(T=0;T<x;++T)_[p]=k[S],p+=O,S+=G;p+=V,S+=Z}}},"float32,1,0,float32,0,1":function(){return function(v,_,b,p,k,E,S){var L=v[0],x=v[1],C=b[0],M=b[1],g=E[0],P=E[1];p|=0,S|=0;for(var T=p,z=S,O=v[1]|0;O>0;){O<64?(x=O,O=0):(x=64,O-=64);for(var V=v[0]|0;V>0;){V<64?(L=V,V=0):(L=64,V-=64),p=T+O*M+V*C,S=z+O*P+V*g;var G=0,Z=0,j=M,N=C-x*M,H=P,te=g-x*P;for(Z=0;Z<L;++Z){for(G=0;G<x;++G)_[p]=k[S],p+=j,S+=H;p+=N,S+=te}}}}},"uint8,2,0,1,uint8,1,2,0":function(){return function(v,_,b,p,k,E,S){var L=v[0],x=v[1],C=v[2],M=b[0],g=b[1],P=b[2],T=E[0],z=E[1],O=E[2];p|=0,S|=0;for(var V=p,G=S,Z=v[2]|0;Z>0;){Z<64?(C=Z,Z=0):(C=64,Z-=64);for(var j=v[0]|0;j>0;){j<64?(L=j,j=0):(L=64,j-=64);for(var N=v[1]|0;N>0;){N<64?(x=N,N=0):(x=64,N-=64),p=V+Z*P+j*M+N*g,S=G+Z*O+j*T+N*z;var H=0,te=0,oe=0,_e=P,Ee=M-C*P,Ce=g-L*M,me=O,ie=T-C*O,Se=z-L*T;for(oe=0;oe<x;++oe){for(te=0;te<L;++te){for(H=0;H<C;++H)_[p]=k[S],p+=_e,S+=me;p+=Ee,S+=ie}p+=Ce,S+=Se}}}}}},"uint8,2,0,1,array,2,0,1":function(){return function(v,_,b,p,k,E,S){var L=v[0],x=v[1],C=v[2],M=b[0],g=b[1],P=b[2],T=E[0],z=E[1],O=E[2];p|=0,S|=0;var V=0,G=0,Z=0,j=P,N=M-C*P,H=g-L*M,te=O,oe=T-C*O,_e=z-L*T;for(Z=0;Z<x;++Z){for(G=0;G<L;++G){for(V=0;V<C;++V)_[p]=k[S],p+=j,S+=te;p+=N,S+=oe}p+=H,S+=_e}}}};function s(d,v){var _=v.join(","),b=o[_];return b()}var l=s,u={mul:function(d){var v={};return function(b,p,k){var E=b.dtype,S=b.order,L=p.dtype,x=p.order,C=k.dtype,M=k.order,g=[E,S.join(),L,x.join(),C,M.join()].join(),P=v[g];return P||(v[g]=P=d([E,S,L,x,C,M])),P(b.shape.slice(0),b.data,b.stride,b.offset|0,p.data,p.stride,p.offset|0,k.data,k.stride,k.offset|0)}},muls:function(d){var v={};return function(b,p,k){var E=b.dtype,S=b.order,L=p.dtype,x=p.order,C=[E,S.join(),L,x.join()].join(),M=v[C];return M||(v[C]=M=d([E,S,L,x])),M(b.shape.slice(0),b.data,b.stride,b.offset|0,p.data,p.stride,p.offset|0,k)}},mulseq:function(d){var v={};return function(b,p){var k=b.dtype,E=b.order,S=[k,E.join()].join(),L=v[S];return L||(v[S]=L=d([k,E])),L(b.shape.slice(0),b.data,b.stride,b.offset|0,p)}},div:function(d){var v={};return function(b,p,k){var E=b.dtype,S=b.order,L=p.dtype,x=p.order,C=k.dtype,M=k.order,g=[E,S.join(),L,x.join(),C,M.join()].join(),P=v[g];return P||(v[g]=P=d([E,S,L,x,C,M])),P(b.shape.slice(0),b.data,b.stride,b.offset|0,p.data,p.stride,p.offset|0,k.data,k.stride,k.offset|0)}},divs:function(d){var v={};return function(b,p,k){var E=b.dtype,S=b.order,L=p.dtype,x=p.order,C=[E,S.join(),L,x.join()].join(),M=v[C];return M||(v[C]=M=d([E,S,L,x])),M(b.shape.slice(0),b.data,b.stride,b.offset|0,p.data,p.stride,p.offset|0,k)}},divseq:function(d){var v={};return function(b,p){var k=b.dtype,E=b.order,S=[k,E.join()].join(),L=v[S];return L||(v[S]=L=d([k,E])),L(b.shape.slice(0),b.data,b.stride,b.offset|0,p)}},assign:function(d){var v={};return function(b,p){var k=b.dtype,E=b.order,S=p.dtype,L=p.order,x=[k,E.join(),S,L.join()].join(),C=v[x];return C||(v[x]=C=d([k,E,S,L])),C(b.shape.slice(0),b.data,b.stride,b.offset|0,p.data,p.stride,p.offset|0)}}};function c(d){var v=u[d.funcName];return v(l.bind(void 0,d))}function f(d){return c({funcName:d.funcName})}var h={mul:"*",div:"/"};(function(){for(var d in h)a[d]=f({funcName:d}),a[d+"s"]=f({funcName:d+"s"}),a[d+"seq"]=f({funcName:d+"seq"})})(),a.assign=f({funcName:"assign"})},5304:function(i,a,o){"use strict";i.exports=h;var s=o(2762),l=o(8116),u=o(1879).bg;function c(d,v,_,b){this.gl=d,this.buffer=v,this.vao=_,this.shader=b}var f=c.prototype;f.draw=function(d,v,_,b,p,k){for(var E=!1,S=0;S<3;++S)E=E||p[S];if(E){var L=this.gl;L.enable(L.POLYGON_OFFSET_FILL),L.polygonOffset(1,2),this.shader.bind(),this.shader.uniforms={model:d,view:v,projection:_,bounds:b,enable:p,colors:k},this.vao.bind(),this.vao.draw(this.gl.TRIANGLES,36),this.vao.unbind(),L.disable(L.POLYGON_OFFSET_FILL)}},f.dispose=function(){this.vao.dispose(),this.buffer.dispose(),this.shader.dispose()};function h(d){for(var v=[],_=[],b=0,p=0;p<3;++p)for(var k=(p+1)%3,E=(p+2)%3,S=[0,0,0],L=[0,0,0],x=-1;x<=1;x+=2){_.push(b,b+2,b+1,b+1,b+2,b+3),S[p]=x,L[p]=x;for(var C=-1;C<=1;C+=2){S[k]=C;for(var M=-1;M<=1;M+=2)S[E]=M,v.push(S[0],S[1],S[2],L[0],L[1],L[2]),b+=1}var g=k;k=E,E=g}var P=s(d,new Float32Array(v)),T=s(d,new Uint16Array(_),d.ELEMENT_ARRAY_BUFFER),z=l(d,[{buffer:P,type:d.FLOAT,size:3,offset:0,stride:24},{buffer:P,type:d.FLOAT,size:3,offset:12,stride:24}],T),O=u(d);return O.attributes.position.location=0,O.attributes.normal.location=1,new c(d,P,z,O)}},5352:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=s[3];return o[0]=l[0]*u+l[4]*c+l[8]*f+l[12]*h,o[1]=l[1]*u+l[5]*c+l[9]*f+l[13]*h,o[2]=l[2]*u+l[6]*c+l[10]*f+l[14]*h,o[3]=l[3]*u+l[7]*c+l[11]*f+l[15]*h,o}},5382:function(i,a,o){"use strict";var s=o(8210),l=o(3012);i.exports=u;function u(c,f){if(c.length===1)return l(f,c[0]);if(f.length===1)return l(c,f[0]);if(c.length===0||f.length===0)return[0];var h=[0];if(c.length<f.length)for(var d=0;d<c.length;++d)h=s(h,l(f,c[d]));else for(var d=0;d<f.length;++d)h=s(h,l(c,f[d]));return h}},5445:function(i,a,o){"use strict";i.exports=L;var s=o(5033),l=o(5202),u=o(6429),c=o(6760),f=o(5665),h=o(5352),d=new Float32Array([1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1]),v=new Float32Array(16);function _(x,C,M){this.lo=x,this.hi=C,this.pixelsPerDataUnit=M}var b=[0,0,0,1],p=[0,0,0,1];function k(x,C,M,g,P){for(var T=0;T<3;++T){for(var z=b,O=p,V=0;V<3;++V)O[V]=z[V]=M[V];O[3]=z[3]=1,O[T]+=1,h(O,O,C),O[3]<0&&(x[T]=1/0),z[T]-=1,h(z,z,C),z[3]<0&&(x[T]=1/0);var G=(z[0]/z[3]-O[0]/O[3])*g,Z=(z[1]/z[3]-O[1]/O[3])*P;x[T]=.25*Math.sqrt(G*G+Z*Z)}return x}var E=[new _(1/0,-1/0,1/0),new _(1/0,-1/0,1/0),new _(1/0,-1/0,1/0)],S=[0,0,0];function L(x,C,M,g,Z){var T=C.model||d,z=C.view||d,O=C.projection||d,V=C._ortho||!1,G=x.bounds,Z=Z||u(T,z,O,G,V),j=Z.axis;c(v,z,T),c(v,O,v);for(var N=E,H=0;H<3;++H)N[H].lo=1/0,N[H].hi=-1/0,N[H].pixelsPerDataUnit=1/0;var te=s(f(v,v));f(v,v);for(var oe=0;oe<3;++oe){var _e=(oe+1)%3,Ee=(oe+2)%3,Ce=S;e:for(var H=0;H<2;++H){var me=[];if(j[oe]<0!=!!H){Ce[oe]=G[H][oe];for(var ie=0;ie<2;++ie){Ce[_e]=G[ie^H][_e];for(var Se=0;Se<2;++Se)Ce[Ee]=G[Se^ie^H][Ee],me.push(Ce.slice())}for(var Le=V?5:4,ie=Le;ie===Le;++ie){if(me.length===0)continue e;me=l.positive(me,te[ie])}for(var ie=0;ie<me.length;++ie)for(var Ee=me[ie],Ae=k(S,v,Ee,M,g),Se=0;Se<3;++Se)N[Se].lo=Math.min(N[Se].lo,Ee[Se]),N[Se].hi=Math.max(N[Se].hi,Ee[Se]),Se!==oe&&(N[Se].pixelsPerDataUnit=Math.min(N[Se].pixelsPerDataUnit,Math.abs(Ae[Se])))}}}return N}},5455:function(i,a,o){i.exports=o(7056)},5486:function(i,a,o){i.exports=o(3066)},5542:function(i,a,o){"use strict";var s=o(2478);i.exports=f;function l(h,d){this.stars=h,this.edges=d}var u=l.prototype;function c(h,d,v){for(var _=1,b=h.length;_<b;_+=2)if(h[_-1]===d&&h[_]===v){h[_-1]=h[b-2],h[_]=h[b-1],h.length=b-2;return}}u.isConstraint=function(){var h=[0,0];function d(v,_){return v[0]-_[0]||v[1]-_[1]}return function(v,_){return h[0]=Math.min(v,_),h[1]=Math.max(v,_),s.eq(this.edges,h,d)>=0}}(),u.removeTriangle=function(h,d,v){var _=this.stars;c(_[h],d,v),c(_[d],v,h),c(_[v],h,d)},u.addTriangle=function(h,d,v){var _=this.stars;_[h].push(d,v),_[d].push(v,h),_[v].push(h,d)},u.opposite=function(h,d){for(var v=this.stars[d],_=1,b=v.length;_<b;_+=2)if(v[_]===h)return v[_-1];return-1},u.flip=function(h,d){var v=this.opposite(h,d),_=this.opposite(d,h);this.removeTriangle(h,d,v),this.removeTriangle(d,h,_),this.addTriangle(h,_,v),this.addTriangle(d,v,_)},u.edges=function(){for(var h=this.stars,d=[],v=0,_=h.length;v<_;++v)for(var b=h[v],p=0,k=b.length;p<k;p+=2)d.push([b[p],b[p+1]]);return d},u.cells=function(){for(var h=this.stars,d=[],v=0,_=h.length;v<_;++v)for(var b=h[v],p=0,k=b.length;p<k;p+=2){var E=b[p],S=b[p+1];v<Math.min(E,S)&&d.push([v,E,S])}return d};function f(h,d){for(var v=new Array(h),_=0;_<h;++_)v[_]=[];return new l(v,d)}},5567:function(i){i.exports=a;function a(o,s,l){var u=Math.sin(l),c=Math.cos(l),f=s[4],h=s[5],d=s[6],v=s[7],_=s[8],b=s[9],p=s[10],k=s[11];return s!==o&&(o[0]=s[0],o[1]=s[1],o[2]=s[2],o[3]=s[3],o[12]=s[12],o[13]=s[13],o[14]=s[14],o[15]=s[15]),o[4]=f*c+_*u,o[5]=h*c+b*u,o[6]=d*c+p*u,o[7]=v*c+k*u,o[8]=_*c-f*u,o[9]=b*c-h*u,o[10]=p*c-d*u,o[11]=k*c-v*u,o}},5572:function(i,a,o){"use strict";var s=o(869);i.exports=l;function l(u,c){return s(u[0].mul(c[1]).sub(u[1].mul(c[0])),u[1].mul(c[1]))}},5609:function(i,a,o){"use strict";i.exports=l;var s=o(3134);function l(u,c){for(var f=s(u,c.length),h=new Array(c.length),d=new Array(c.length),v=[],_=0;_<c.length;++_){var b=f[_].length;d[_]=b,h[_]=!0,b<=1&&v.push(_)}for(;v.length>0;){var p=v.pop();h[p]=!1;for(var k=f[p],_=0;_<k.length;++_){var E=k[_];--d[E]===0&&v.push(E)}}for(var S=new Array(c.length),L=[],_=0;_<c.length;++_)if(h[_]){var p=L.length;S[_]=p,L.push(c[_])}else S[_]=-1;for(var x=[],_=0;_<u.length;++_){var C=u[_];h[C[0]]&&h[C[1]]&&x.push([S[C[0]],S[C[1]]])}return[x,L]}},5632:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]+l[0],o[1]=s[1]+l[1],o[2]=s[2]+l[2],o}},5665:function(i){i.exports=a;function a(o,s){if(o===s){var l=s[1],u=s[2],c=s[3],f=s[6],h=s[7],d=s[11];o[1]=s[4],o[2]=s[8],o[3]=s[12],o[4]=l,o[6]=s[9],o[7]=s[13],o[8]=u,o[9]=f,o[11]=s[14],o[12]=c,o[13]=h,o[14]=d}else o[0]=s[0],o[1]=s[4],o[2]=s[8],o[3]=s[12],o[4]=s[1],o[5]=s[5],o[6]=s[9],o[7]=s[13],o[8]=s[2],o[9]=s[6],o[10]=s[10],o[11]=s[14],o[12]=s[3],o[13]=s[7],o[14]=s[11],o[15]=s[15];return o}},5673:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=l[3]*u+l[7]*c+l[11]*f+l[15];return h=h||1,o[0]=(l[0]*u+l[4]*c+l[8]*f+l[12])/h,o[1]=(l[1]*u+l[5]*c+l[9]*f+l[13])/h,o[2]=(l[2]*u+l[6]*c+l[10]*f+l[14])/h,o}},5714:function(i,a,o){"use strict";i.exports=M;var s=o(2762),l=o(8116),u=o(7766),c=new Uint8Array(4),f=new Float32Array(c.buffer);function h(g,P,T,z){return c[0]=z,c[1]=T,c[2]=P,c[3]=g,f[0]}var d=o(2478),v=o(9618),_=o(7319),b=_.createShader,p=_.createPickShader,k=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];function E(g,P){for(var T=0,z=0;z<3;++z){var O=g[z]-P[z];T+=O*O}return Math.sqrt(T)}function S(g){for(var P=[[-1e6,-1e6,-1e6],[1e6,1e6,1e6]],T=0;T<3;++T)P[0][T]=Math.max(g[0][T],P[0][T]),P[1][T]=Math.min(g[1][T],P[1][T]);return P}function L(g,P,T,z){this.arcLength=g,this.position=P,this.index=T,this.dataCoordinate=z}function x(g,P,T,z,O,V){this.gl=g,this.shader=P,this.pickShader=T,this.buffer=z,this.vao=O,this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.points=[],this.arcLength=[],this.vertexCount=0,this.bounds=[[0,0,0],[0,0,0]],this.pickId=0,this.lineWidth=1,this.texture=V,this.dashScale=1,this.opacity=1,this.hasAlpha=!1,this.dirty=!0,this.pixelRatio=1}var C=x.prototype;C.isTransparent=function(){return this.hasAlpha},C.isOpaque=function(){return!this.hasAlpha},C.pickSlots=1,C.setPickBase=function(g){this.pickId=g},C.drawTransparent=C.draw=function(g){if(this.vertexCount){var P=this.gl,T=this.shader,z=this.vao;T.bind(),T.uniforms={model:g.model||k,view:g.view||k,projection:g.projection||k,clipBounds:S(this.clipBounds),dashTexture:this.texture.bind(),dashScale:this.dashScale/this.arcLength[this.arcLength.length-1],opacity:this.opacity,screenShape:[P.drawingBufferWidth,P.drawingBufferHeight],pixelRatio:this.pixelRatio},z.bind(),z.draw(P.TRIANGLE_STRIP,this.vertexCount),z.unbind()}},C.drawPick=function(g){if(this.vertexCount){var P=this.gl,T=this.pickShader,z=this.vao;T.bind(),T.uniforms={model:g.model||k,view:g.view||k,projection:g.projection||k,pickId:this.pickId,clipBounds:S(this.clipBounds),screenShape:[P.drawingBufferWidth,P.drawingBufferHeight],pixelRatio:this.pixelRatio},z.bind(),z.draw(P.TRIANGLE_STRIP,this.vertexCount),z.unbind()}},C.update=function(g){var P,T;this.dirty=!0;var z=!!g.connectGaps;"dashScale"in g&&(this.dashScale=g.dashScale),this.hasAlpha=!1,"opacity"in g&&(this.opacity=+g.opacity,this.opacity<1&&(this.hasAlpha=!0));var O=[],V=[],G=[],Z=0,j=0,N=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],H=g.position||g.positions;if(H){var te=g.color||g.colors||[0,0,0,1],oe=g.lineWidth||1,_e=!1;e:for(P=1;P<H.length;++P){var Ee=H[P-1],Ce=H[P];for(V.push(Z),G.push(Ee.slice()),T=0;T<3;++T){if(isNaN(Ee[T])||isNaN(Ce[T])||!isFinite(Ee[T])||!isFinite(Ce[T])){if(!z&&O.length>0){for(var me=0;me<24;++me)O.push(O[O.length-12]);j+=2,_e=!0}continue e}N[0][T]=Math.min(N[0][T],Ee[T],Ce[T]),N[1][T]=Math.max(N[1][T],Ee[T],Ce[T])}var ie,Se;Array.isArray(te[0])?(ie=te.length>P-1?te[P-1]:te.length>0?te[te.length-1]:[0,0,0,1],Se=te.length>P?te[P]:te.length>0?te[te.length-1]:[0,0,0,1]):ie=Se=te,ie.length===3&&(ie=[ie[0],ie[1],ie[2],1]),Se.length===3&&(Se=[Se[0],Se[1],Se[2],1]),!this.hasAlpha&&ie[3]<1&&(this.hasAlpha=!0);var Le;Array.isArray(oe)?Le=oe.length>P-1?oe[P-1]:oe.length>0?oe[oe.length-1]:[0,0,0,1]:Le=oe;var Ae=Z;if(Z+=E(Ee,Ce),_e){for(T=0;T<2;++T)O.push(Ee[0],Ee[1],Ee[2],Ce[0],Ce[1],Ce[2],Ae,Le,ie[0],ie[1],ie[2],ie[3]);j+=2,_e=!1}O.push(Ee[0],Ee[1],Ee[2],Ce[0],Ce[1],Ce[2],Ae,Le,ie[0],ie[1],ie[2],ie[3],Ee[0],Ee[1],Ee[2],Ce[0],Ce[1],Ce[2],Ae,-Le,ie[0],ie[1],ie[2],ie[3],Ce[0],Ce[1],Ce[2],Ee[0],Ee[1],Ee[2],Z,-Le,Se[0],Se[1],Se[2],Se[3],Ce[0],Ce[1],Ce[2],Ee[0],Ee[1],Ee[2],Z,Le,Se[0],Se[1],Se[2],Se[3]),j+=4}}if(this.buffer.update(O),V.push(Z),G.push(H[H.length-1].slice()),this.bounds=N,this.vertexCount=j,this.points=G,this.arcLength=V,"dashes"in g){var Fe=g.dashes,Pe=Fe.slice();for(Pe.unshift(0),P=1;P<Pe.length;++P)Pe[P]=Pe[P-1]+Pe[P];var ge=v(new Array(256*4),[256,1,4]);for(P=0;P<256;++P){for(T=0;T<4;++T)ge.set(P,0,T,0);d.le(Pe,Pe[Pe.length-1]*P/255)&1?ge.set(P,0,0,0):ge.set(P,0,0,255)}this.texture.setPixels(ge)}},C.dispose=function(){this.shader.dispose(),this.vao.dispose(),this.buffer.dispose()},C.pick=function(g){if(!g||g.id!==this.pickId)return null;var P=h(g.value[0],g.value[1],g.value[2],0),T=d.le(this.arcLength,P);if(T<0)return null;if(T===this.arcLength.length-1)return new L(this.arcLength[this.arcLength.length-1],this.points[this.points.length-1].slice(),T);for(var z=this.points[T],O=this.points[Math.min(T+1,this.points.length-1)],V=(P-this.arcLength[T])/(this.arcLength[T+1]-this.arcLength[T]),G=1-V,Z=[0,0,0],j=0;j<3;++j)Z[j]=G*z[j]+V*O[j];var N=Math.min(V<.5?T:T+1,this.points.length-1);return new L(P,Z,N,this.points[N])};function M(g){var P=g.gl||g.scene&&g.scene.gl,T=b(P);T.attributes.position.location=0,T.attributes.nextPosition.location=1,T.attributes.arcLength.location=2,T.attributes.lineWidth.location=3,T.attributes.color.location=4;var z=p(P);z.attributes.position.location=0,z.attributes.nextPosition.location=1,z.attributes.arcLength.location=2,z.attributes.lineWidth.location=3,z.attributes.color.location=4;for(var O=s(P),V=l(P,[{buffer:O,size:3,offset:0,stride:48},{buffer:O,size:3,offset:12,stride:48},{buffer:O,size:1,offset:24,stride:48},{buffer:O,size:1,offset:28,stride:48},{buffer:O,size:4,offset:32,stride:48}]),G=v(new Array(256*4),[256,1,4]),Z=0;Z<1024;++Z)G.data[Z]=255;var j=u(P,G);j.wrap=P.REPEAT;var N=new x(P,T,z,O,V,j);return N.update(g),N}},5716:function(i,a,o){"use strict";var s=o(6859);i.exports=l;function l(u){return u.cmp(new s(0))}},5721:function(i){"use strict";i.exports=a;function a(o){for(var s=0,l=0,u=1;u<o.length;++u)o[u][0]<o[s][0]&&(s=u),o[u][0]>o[l][0]&&(l=u);return s<l?[[s],[l]]:s>l?[[l],[s]]:[[s]]}},5771:function(i,a,o){"use strict";var s=o(8507),l=o(3788),u=o(2419);i.exports=c;function c(f){f.sort(l);for(var h=f.length,d=0,v=0;v<h;++v){var _=f[v],b=u(_);if(b!==0){if(d>0){var p=f[d-1];if(s(_,p)===0&&u(p)!==b){d-=1;continue}}f[d++]=_}}return f.length=d,f}},5838:function(i,a,o){"use strict";i.exports=l;var s=o(7842);function l(u){for(var c=new Array(u.length),f=0;f<u.length;++f)c[f]=s(u[f]);return c}},5847:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]*l[0],o[1]=s[1]*l[1],o[2]=s[2]*l[2],o}},5874:function(i,a,o){i.exports=T;var s=o(620),l=o(7827),u=o(6852),c=o(7932),f=o(3508),h=999,d=9999,v=0,_=1,b=2,p=3,k=4,E=5,S=6,L=7,x=8,C=9,M=10,g=11,P=["block-comment","line-comment","preprocessor","operator","integer","float","ident","builtin","keyword","whitespace","eof","integer"];function T(z){var O=0,V=0,G=h,Z,j,N=[],H=[],te=0,oe=0,_e=1,Ee=0,Ce=0,me=!1,ie=!1,Se="",Le;z=z||{};var Ae=u,Fe=s;z.version==="300 es"&&(Ae=f,Fe=c);for(var Pe={},ge={},O=0;O<Ae.length;O++)Pe[Ae[O]]=!0;for(var O=0;O<Fe.length;O++)ge[Fe[O]]=!0;return function($e){return H=[],$e!==null?ce($e):Ze()};function Re($e){$e.length&&H.push({type:P[G],data:$e,position:Ce,line:_e,column:Ee})}function ce($e){O=0,$e.toString&&($e=$e.toString()),Se+=$e.replace(/\r\n/g,`
+`),Le=Se.length;for(var St;Z=Se[O],O<Le;){switch(St=O,G){case v:O=lt();break;case _:O=st();break;case b:O=Zt();break;case p:O=Gt();break;case k:O=sr();break;case g:O=Jt();break;case E:O=wr();break;case d:O=cr();break;case C:O=pt();break;case h:O=ut();break}if(St!==O)switch(Se[St]){case`
+`:Ee=0,++_e;break;default:++Ee;break}}return V+=O,Se=Se.slice(O),H}function Ze($e){return N.length&&Re(N.join("")),G=M,Re("(eof)"),H}function ut(){return N=N.length?[]:N,j==="/"&&Z==="*"?(Ce=V+O-1,G=v,j=Z,O+1):j==="/"&&Z==="/"?(Ce=V+O-1,G=_,j=Z,O+1):Z==="#"?(G=b,Ce=V+O,O):/\s/.test(Z)?(G=C,Ce=V+O,O):(me=/\d/.test(Z),ie=/[^\w_]/.test(Z),Ce=V+O,G=me?k:ie?p:d,O)}function pt(){return/[^\s]/g.test(Z)?(Re(N.join("")),G=h,O):(N.push(Z),j=Z,O+1)}function Zt(){return(Z==="\r"||Z===`
+`)&&j!=="\\"?(Re(N.join("")),G=h,O):(N.push(Z),j=Z,O+1)}function st(){return Zt()}function lt(){return Z==="/"&&j==="*"?(N.push(Z),Re(N.join("")),G=h,O+1):(N.push(Z),j=Z,O+1)}function Gt(){if(j==="."&&/\d/.test(Z))return G=E,O;if(j==="/"&&Z==="*")return G=v,O;if(j==="/"&&Z==="/")return G=_,O;if(Z==="."&&N.length){for(;Nt(N););return G=E,O}if(Z===";"||Z===")"||Z==="("){if(N.length)for(;Nt(N););return Re(Z),G=h,O+1}var $e=N.length===2&&Z!=="=";if(/[\w_\d\s]/.test(Z)||$e){for(;Nt(N););return G=h,O}return N.push(Z),j=Z,O+1}function Nt($e){var St=0,Qt,Vt;do{if(Qt=l.indexOf($e.slice(0,$e.length+St).join("")),Vt=l[Qt],Qt===-1){if(St--+$e.length>0)continue;Vt=$e.slice(0,1).join("")}return Re(Vt),Ce+=Vt.length,N=N.slice(Vt.length),N.length}while(!0)}function Jt(){return/[^a-fA-F0-9]/.test(Z)?(Re(N.join("")),G=h,O):(N.push(Z),j=Z,O+1)}function sr(){return Z==="."||/[eE]/.test(Z)?(N.push(Z),G=E,j=Z,O+1):Z==="x"&&N.length===1&&N[0]==="0"?(G=g,N.push(Z),j=Z,O+1):/[^\d]/.test(Z)?(Re(N.join("")),G=h,O):(N.push(Z),j=Z,O+1)}function wr(){return Z==="f"&&(N.push(Z),j=Z,O+=1),/[eE]/.test(Z)||(Z==="-"||Z==="+")&&/[eE]/.test(j)?(N.push(Z),j=Z,O+1):/[^\d]/.test(Z)?(Re(N.join("")),G=h,O):(N.push(Z),j=Z,O+1)}function cr(){if(/[^\d\w_]/.test(Z)){var $e=N.join("");return ge[$e]?G=x:Pe[$e]?G=L:G=S,Re(N.join("")),G=h,O}return N.push(Z),j=Z,O+1}}},5878:function(i,a,o){"use strict";i.exports=c;var s=o(3250),l=o(2014);function u(f,h,d){var v=Math.abs(s(f,h,d)),_=Math.sqrt(Math.pow(h[0]-d[0],2)+Math.pow(h[1]-d[1],2));return v/_}function c(f,h,d){for(var v=h.length,_=f.length,b=new Array(v),p=new Array(v),k=new Array(v),E=new Array(v),S=0;S<v;++S)b[S]=p[S]=-1,k[S]=1/0,E[S]=!1;for(var S=0;S<_;++S){var L=f[S];if(L.length!==2)throw new Error("Input must be a graph");var x=L[1],C=L[0];p[C]!==-1?p[C]=-2:p[C]=x,b[x]!==-1?b[x]=-2:b[x]=C}function M(ie){if(E[ie])return 1/0;var Se=b[ie],Le=p[ie];return Se<0||Le<0?1/0:u(h[ie],h[Se],h[Le])}function g(ie,Se){var Le=j[ie],Ae=j[Se];j[ie]=Ae,j[Se]=Le,N[Le]=Se,N[Ae]=ie}function P(ie){return k[j[ie]]}function T(ie){return ie&1?ie-1>>1:(ie>>1)-1}function z(ie){for(var Se=P(ie);;){var Le=Se,Ae=2*ie+1,Fe=2*(ie+1),Pe=ie;if(Ae<te){var ge=P(Ae);ge<Le&&(Pe=Ae,Le=ge)}if(Fe<te){var Re=P(Fe);Re<Le&&(Pe=Fe)}if(Pe===ie)return ie;g(ie,Pe),ie=Pe}}function O(ie){for(var Se=P(ie);ie>0;){var Le=T(ie);if(Le>=0){var Ae=P(Le);if(Se<Ae){g(ie,Le),ie=Le;continue}}return ie}}function V(){if(te>0){var ie=j[0];return g(0,te-1),te-=1,z(0),ie}return-1}function G(ie,Se){var Le=j[ie];return k[Le]===Se?ie:(k[Le]=-1/0,O(ie),V(),k[Le]=Se,te+=1,O(te-1))}function Z(ie){if(!E[ie]){E[ie]=!0;var Se=b[ie],Le=p[ie];b[Le]>=0&&(b[Le]=Se),p[Se]>=0&&(p[Se]=Le),N[Se]>=0&&G(N[Se],M(Se)),N[Le]>=0&&G(N[Le],M(Le))}}for(var j=[],N=new Array(v),S=0;S<v;++S){var H=k[S]=M(S);H<1/0?(N[S]=j.length,j.push(S)):N[S]=-1}for(var te=j.length,S=te>>1;S>=0;--S)z(S);for(;;){var oe=V();if(oe<0||k[oe]>d)break;Z(oe)}for(var _e=[],S=0;S<v;++S)E[S]||(N[S]=_e.length,_e.push(h[S].slice()));var Ee=_e.length;function Ce(ie,Se){if(ie[Se]<0)return Se;var Le=Se,Ae=Se;do{var Fe=ie[Ae];if(!E[Ae]||Fe<0||Fe===Ae||(Ae=Fe,Fe=ie[Ae],!E[Ae]||Fe<0||Fe===Ae))break;Ae=Fe,Le=ie[Le]}while(Le!==Ae);for(var Pe=Se;Pe!==Ae;Pe=ie[Pe])ie[Pe]=Ae;return Ae}var me=[];return f.forEach(function(ie){var Se=Ce(b,ie[0]),Le=Ce(p,ie[1]);if(Se>=0&&Le>=0&&Se!==Le){var Ae=N[Se],Fe=N[Le];Ae!==Fe&&me.push([Ae,Fe])}}),l.unique(l.normalize(me)),{positions:_e,edges:me}}},5911:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=l[0],d=l[1],v=l[2];return o[0]=c*v-f*d,o[1]=f*h-u*v,o[2]=u*d-c*h,o}},5964:function(i){"use strict";i.exports=function(a){return!a&&a!==0?"":a.toString()}},5995:function(i,a,o){"use strict";i.exports=u;var s=o(7642),l=o(6037);function u(c,f){return s(f).filter(function(h){for(var d=new Array(h.length),v=0;v<h.length;++v)d[v]=f[h[v]];return l(d)*c<1})}},6037:function(i,a,o){i.exports=l;var s=o(3628);function l(u){for(var c=s(u),f=0,h=0;h<u.length;++h)for(var d=u[h],v=0;v<c.length;++v)f+=Math.pow(d[v]-c[v],2);return Math.sqrt(f/u.length)}},6079:function(i){i.exports=a;function a(o,s,l,u){var c=u[0],f=u[1],h=u[2],d=Math.sqrt(c*c+f*f+h*h),v,_,b,p,k,E,S,L,x,C,M,g,P,T,z,O,V,G,Z,j,N,H,te,oe;return Math.abs(d)<1e-6?null:(d=1/d,c*=d,f*=d,h*=d,v=Math.sin(l),_=Math.cos(l),b=1-_,p=s[0],k=s[1],E=s[2],S=s[3],L=s[4],x=s[5],C=s[6],M=s[7],g=s[8],P=s[9],T=s[10],z=s[11],O=c*c*b+_,V=f*c*b+h*v,G=h*c*b-f*v,Z=c*f*b-h*v,j=f*f*b+_,N=h*f*b+c*v,H=c*h*b+f*v,te=f*h*b-c*v,oe=h*h*b+_,o[0]=p*O+L*V+g*G,o[1]=k*O+x*V+P*G,o[2]=E*O+C*V+T*G,o[3]=S*O+M*V+z*G,o[4]=p*Z+L*j+g*N,o[5]=k*Z+x*j+P*N,o[6]=E*Z+C*j+T*N,o[7]=S*Z+M*j+z*N,o[8]=p*H+L*te+g*oe,o[9]=k*H+x*te+P*oe,o[10]=E*H+C*te+T*oe,o[11]=S*H+M*te+z*oe,s!==o&&(o[12]=s[12],o[13]=s[13],o[14]=s[14],o[15]=s[15]),o)}},6141:function(i,a,o){i.exports=o(2953)},6199:function(i,a,o){"use strict";var s=o(1338),l={zero:function(L,x,C,M){var g=L[0],P=C[0];M|=0;var T=0,z=P;for(T=0;T<g;++T)x[M]=0,M+=z},fdTemplate1:function(L,x,C,M,g,P,T){var z=L[0],O=C[0],V=P[0],G=-1*O,Z=O;M|=0,T|=0;var j=0,N=O,H=V;for(j=0;j<z;++j)g[T]=.5*(x[M+G]-x[M+Z]),M+=N,T+=H},fdTemplate2:function(L,x,C,M,g,P,T,z,O,V){var G=L[0],Z=L[1],j=C[0],N=C[1],H=P[0],te=P[1],oe=O[0],_e=O[1],Ee=-1*j,Ce=j,me=-1*N,ie=N;M|=0,T|=0,V|=0;var Se=0,Le=0,Ae=N,Fe=j-Z*N,Pe=te,ge=H-Z*te,Re=_e,ce=oe-Z*_e;for(Le=0;Le<G;++Le){for(Se=0;Se<Z;++Se)g[T]=.5*(x[M+Ee]-x[M+Ce]),z[V]=.5*(x[M+me]-x[M+ie]),M+=Ae,T+=Pe,V+=Re;M+=Fe,T+=ge,V+=ce}}},u={cdiff:function(L){var x={};return function(M,g,P){var T=M.dtype,z=M.order,O=g.dtype,V=g.order,G=P.dtype,Z=P.order,j=[T,z.join(),O,V.join(),G,Z.join()].join(),N=x[j];return N||(x[j]=N=L([T,z,O,V,G,Z])),N(M.shape.slice(0),M.data,M.stride,M.offset|0,g.data,g.stride,g.offset|0,P.data,P.stride,P.offset|0)}},zero:function(L){var x={};return function(M){var g=M.dtype,P=M.order,T=[g,P.join()].join(),z=x[T];return z||(x[T]=z=L([g,P])),z(M.shape.slice(0),M.data,M.stride,M.offset|0)}},fdTemplate1:function(L){var x={};return function(M,g){var P=M.dtype,T=M.order,z=g.dtype,O=g.order,V=[P,T.join(),z,O.join()].join(),G=x[V];return G||(x[V]=G=L([P,T,z,O])),G(M.shape.slice(0),M.data,M.stride,M.offset|0,g.data,g.stride,g.offset|0)}},fdTemplate2:function(L){var x={};return function(M,g,P){var T=M.dtype,z=M.order,O=g.dtype,V=g.order,G=P.dtype,Z=P.order,j=[T,z.join(),O,V.join(),G,Z.join()].join(),N=x[j];return N||(x[j]=N=L([T,z,O,V,G,Z])),N(M.shape.slice(0),M.data,M.stride,M.offset|0,g.data,g.stride,g.offset|0,P.data,P.stride,P.offset|0)}}};function c(L){var x=u[L.funcName];return x(f.bind(void 0,L))}function f(L){return l[L.funcName]}function h(L){return c({funcName:L.funcName})}var d={},v={},_={body:"",args:[],thisVars:[],localVars:[]},b=h({funcName:"cdiff"}),p=h({funcName:"zero"});function k(L){return L in d?d[L]:d[L]=h({funcName:"fdTemplate"+L})}function E(L,x,C,M){return function(g,P){var T=P.shape.slice();return T[0]>2&&T[1]>2&&M(P.pick(-1,-1).lo(1,1).hi(T[0]-2,T[1]-2),g.pick(-1,-1,0).lo(1,1).hi(T[0]-2,T[1]-2),g.pick(-1,-1,1).lo(1,1).hi(T[0]-2,T[1]-2)),T[1]>2&&(C(P.pick(0,-1).lo(1).hi(T[1]-2),g.pick(0,-1,1).lo(1).hi(T[1]-2)),x(g.pick(0,-1,0).lo(1).hi(T[1]-2))),T[1]>2&&(C(P.pick(T[0]-1,-1).lo(1).hi(T[1]-2),g.pick(T[0]-1,-1,1).lo(1).hi(T[1]-2)),x(g.pick(T[0]-1,-1,0).lo(1).hi(T[1]-2))),T[0]>2&&(C(P.pick(-1,0).lo(1).hi(T[0]-2),g.pick(-1,0,0).lo(1).hi(T[0]-2)),x(g.pick(-1,0,1).lo(1).hi(T[0]-2))),T[0]>2&&(C(P.pick(-1,T[1]-1).lo(1).hi(T[0]-2),g.pick(-1,T[1]-1,0).lo(1).hi(T[0]-2)),x(g.pick(-1,T[1]-1,1).lo(1).hi(T[0]-2))),g.set(0,0,0,0),g.set(0,0,1,0),g.set(T[0]-1,0,0,0),g.set(T[0]-1,0,1,0),g.set(0,T[1]-1,0,0),g.set(0,T[1]-1,1,0),g.set(T[0]-1,T[1]-1,0,0),g.set(T[0]-1,T[1]-1,1,0),g}}function S(L){var x=L.join(),T=v[x];if(T)return T;for(var C=L.length,M=[b,p],g=1;g<=C;++g)M.push(k(g));var P=E,T=P.apply(void 0,M);return v[x]=T,T}i.exports=function(x,C,M){if(Array.isArray(M)||(typeof M=="string"?M=s(C.dimension,M):M=s(C.dimension,"clamp")),C.size===0)return x;if(C.dimension===0)return x.set(0),x;var g=S(M);return g(x,C)}},6204:function(i){"use strict";i.exports=a;function a(o){var s,l,u,c=o.length,f=0;for(s=0;s<c;++s)f+=o[s].length;var h=new Array(f),d=0;for(s=0;s<c;++s){var v=o[s],_=v.length;for(l=0;l<_;++l){var b=h[d++]=new Array(_-1),p=0;for(u=0;u<_;++u)u!==l&&(b[p++]=v[u]);if(l&1){var k=b[1];b[1]=b[0],b[0]=k}}}return h}},6296:function(i,a,o){"use strict";i.exports=h;var s=o(7261),l=o(9977),u=o(1811);function c(d,v){this._controllerNames=Object.keys(d),this._controllerList=this._controllerNames.map(function(_){return d[_]}),this._mode=v,this._active=d[v],this._active||(this._mode="turntable",this._active=d.turntable),this.modes=this._controllerNames,this.computedMatrix=this._active.computedMatrix,this.computedEye=this._active.computedEye,this.computedUp=this._active.computedUp,this.computedCenter=this._active.computedCenter,this.computedRadius=this._active.computedRadius}var f=c.prototype;f.flush=function(d){for(var v=this._controllerList,_=0;_<v.length;++_)v[_].flush(d)},f.idle=function(d){for(var v=this._controllerList,_=0;_<v.length;++_)v[_].idle(d)},f.lookAt=function(d,v,_,b){for(var p=this._controllerList,k=0;k<p.length;++k)p[k].lookAt(d,v,_,b)},f.rotate=function(d,v,_,b){for(var p=this._controllerList,k=0;k<p.length;++k)p[k].rotate(d,v,_,b)},f.pan=function(d,v,_,b){for(var p=this._controllerList,k=0;k<p.length;++k)p[k].pan(d,v,_,b)},f.translate=function(d,v,_,b){for(var p=this._controllerList,k=0;k<p.length;++k)p[k].translate(d,v,_,b)},f.setMatrix=function(d,v){for(var _=this._controllerList,b=0;b<_.length;++b)_[b].setMatrix(d,v)},f.setDistanceLimits=function(d,v){for(var _=this._controllerList,b=0;b<_.length;++b)_[b].setDistanceLimits(d,v)},f.setDistance=function(d,v){for(var _=this._controllerList,b=0;b<_.length;++b)_[b].setDistance(d,v)},f.recalcMatrix=function(d){this._active.recalcMatrix(d)},f.getDistance=function(d){return this._active.getDistance(d)},f.getDistanceLimits=function(d){return this._active.getDistanceLimits(d)},f.lastT=function(){return this._active.lastT()},f.setMode=function(d){if(d!==this._mode){var v=this._controllerNames.indexOf(d);if(!(v<0)){var _=this._active,b=this._controllerList[v],p=Math.max(_.lastT(),b.lastT());_.recalcMatrix(p),b.setMatrix(p,_.computedMatrix),this._active=b,this._mode=d,this.computedMatrix=this._active.computedMatrix,this.computedEye=this._active.computedEye,this.computedUp=this._active.computedUp,this.computedCenter=this._active.computedCenter,this.computedRadius=this._active.computedRadius}}},f.getMode=function(){return this._mode};function h(d){d=d||{};var v=d.eye||[0,0,1],_=d.center||[0,0,0],b=d.up||[0,1,0],p=d.distanceLimits||[0,1/0],k=d.mode||"turntable",E=s(),S=l(),L=u();return E.setDistanceLimits(p[0],p[1]),E.lookAt(0,v,_,b),S.setDistanceLimits(p[0],p[1]),S.lookAt(0,v,_,b),L.setDistanceLimits(p[0],p[1]),L.lookAt(0,v,_,b),new c({turntable:E,orbit:S,matrix:L},k)}},6330:function(i,a,o){"use strict";var s=o(1533);i.exports=l;function l(u){return Array.isArray(u)&&u.length===2&&s(u[0])&&s(u[1])}},6405:function(i,a,o){"use strict";var s=o(2931);i.exports=function(u,c){var f=u.positions,h=u.vectors,d={positions:[],vertexIntensity:[],vertexIntensityBounds:u.vertexIntensityBounds,vectors:[],cells:[],coneOffset:u.coneOffset,colormap:u.colormap};if(u.positions.length===0)return c&&(c[0]=[0,0,0],c[1]=[0,0,0]),d;for(var v=0,_=1/0,b=-1/0,p=1/0,k=-1/0,E=1/0,S=-1/0,L=null,x=null,C=[],M=1/0,g=!1,P=u.coneSizemode==="raw",T=0;T<f.length;T++){var z=f[T];_=Math.min(z[0],_),b=Math.max(z[0],b),p=Math.min(z[1],p),k=Math.max(z[1],k),E=Math.min(z[2],E),S=Math.max(z[2],S);var O=h[T];if(s.length(O)>v&&(v=s.length(O)),T&&!P){var V=2*s.distance(L,z)/(s.length(x)+s.length(O));V?(M=Math.min(M,V),g=!1):g=!0}g||(L=z,x=O),C.push(O)}var G=[_,p,E],Z=[b,k,S];c&&(c[0]=G,c[1]=Z),v===0&&(v=1);var j=1/v;isFinite(M)||(M=1),d.vectorScale=M;var N=u.coneSize||(P?1:.5);u.absoluteConeSize&&(N=u.absoluteConeSize*j),d.coneScale=N;for(var T=0,H=0;T<f.length;T++)for(var z=f[T],te=z[0],oe=z[1],_e=z[2],Ee=C[T],Ce=s.length(Ee)*j,me=0,ie=8;me<ie;me++){d.positions.push([te,oe,_e,H++]),d.positions.push([te,oe,_e,H++]),d.positions.push([te,oe,_e,H++]),d.positions.push([te,oe,_e,H++]),d.positions.push([te,oe,_e,H++]),d.positions.push([te,oe,_e,H++]),d.vectors.push(Ee),d.vectors.push(Ee),d.vectors.push(Ee),d.vectors.push(Ee),d.vectors.push(Ee),d.vectors.push(Ee),d.vertexIntensity.push(Ce,Ce,Ce),d.vertexIntensity.push(Ce,Ce,Ce);var Se=d.positions.length;d.cells.push([Se-6,Se-5,Se-4],[Se-3,Se-2,Se-1])}return d};var l=o(614);i.exports.createMesh=o(9060),i.exports.createConeMesh=function(u,c){return i.exports.createMesh(u,c,{shaders:l,traceType:"cone"})}},6429:function(i,a,o){"use strict";i.exports=x;var s=o(8828),l=o(6760),u=o(5202),c=o(3250),f=new Array(16),h=new Array(8),d=new Array(8),v=new Array(3),_=[0,0,0];(function(){for(var C=0;C<8;++C)h[C]=[1,1,1,1],d[C]=[1,1,1]})();function b(C,M,g){for(var P=0;P<4;++P){C[P]=g[12+P];for(var T=0;T<3;++T)C[P]+=M[T]*g[4*T+P]}}var p=[[0,0,1,0,0],[0,0,-1,1,0],[0,-1,0,1,0],[0,1,0,1,0],[-1,0,0,1,0],[1,0,0,1,0]];function k(C){for(var M=0;M<p.length;++M)if(C=u.positive(C,p[M]),C.length<3)return 0;for(var g=C[0],P=g[0]/g[3],T=g[1]/g[3],z=0,M=1;M+1<C.length;++M){var O=C[M],V=C[M+1],G=O[0]/O[3],Z=O[1]/O[3],j=V[0]/V[3],N=V[1]/V[3],H=G-P,te=Z-T,oe=j-P,_e=N-T;z+=Math.abs(H*_e-te*oe)}return z}var E=[1,1,1],S=[0,0,0],L={cubeEdges:E,axis:S};function x(C,M,g,P,T){l(f,M,C),l(f,g,f);for(var z=0,O=0;O<2;++O){v[2]=P[O][2];for(var V=0;V<2;++V){v[1]=P[V][1];for(var G=0;G<2;++G)v[0]=P[G][0],b(h[z],v,f),z+=1}}for(var Z=-1,O=0;O<8;++O){for(var j=h[O][3],N=0;N<3;++N)d[O][N]=h[O][N]/j;T&&(d[O][2]*=-1),j<0&&(Z<0||d[O][2]<d[Z][2])&&(Z=O)}if(Z<0){Z=0;for(var H=0;H<3;++H){for(var te=(H+2)%3,oe=(H+1)%3,_e=-1,Ee=-1,Ce=0;Ce<2;++Ce){var me=Ce<<H,ie=me+(Ce<<te)+(1-Ce<<oe),Se=me+(1-Ce<<te)+(Ce<<oe);c(d[me],d[ie],d[Se],_)<0||(Ce?_e=1:Ee=1)}if(_e<0||Ee<0){Ee>_e&&(Z|=1<<H);continue}for(var Ce=0;Ce<2;++Ce){var me=Ce<<H,ie=me+(Ce<<te)+(1-Ce<<oe),Se=me+(1-Ce<<te)+(Ce<<oe),Le=k([h[me],h[ie],h[Se],h[me+(1<<te)+(1<<oe)]]);Ce?_e=Le:Ee=Le}if(Ee>_e){Z|=1<<H;continue}}}for(var Ae=7^Z,Fe=-1,O=0;O<8;++O)O===Z||O===Ae||(Fe<0||d[Fe][1]>d[O][1])&&(Fe=O);for(var Pe=-1,O=0;O<3;++O){var ge=Fe^1<<O;if(!(ge===Z||ge===Ae)){Pe<0&&(Pe=ge);var oe=d[ge];oe[0]<d[Pe][0]&&(Pe=ge)}}for(var Re=-1,O=0;O<3;++O){var ge=Fe^1<<O;if(!(ge===Z||ge===Ae||ge===Pe)){Re<0&&(Re=ge);var oe=d[ge];oe[0]>d[Re][0]&&(Re=ge)}}var ce=E;ce[0]=ce[1]=ce[2]=0,ce[s.log2(Pe^Fe)]=Fe&Pe,ce[s.log2(Fe^Re)]=Fe&Re;var Ze=Re^7;Ze===Z||Ze===Ae?(Ze=Pe^7,ce[s.log2(Re^Ze)]=Ze&Re):ce[s.log2(Pe^Ze)]=Ze&Pe;for(var ut=S,pt=Z,H=0;H<3;++H)pt&1<<H?ut[H]=-1:ut[H]=1;return L}},6444:function(i,a){"use strict";a.create=s,a.equal=l;function o(u,c){var f=u+"",h=f.indexOf("."),d=0;h>=0&&(d=f.length-h-1);var v=Math.pow(10,d),_=Math.round(u*c*v),b=_+"";if(b.indexOf("e")>=0)return b;var p=_/v,k=_%v;_<0?(p=-Math.ceil(p)|0,k=-k|0):(p=Math.floor(p)|0,k=k|0);var E=""+p;if(_<0&&(E="-"+E),d){for(var S=""+k;S.length<d;)S="0"+S;return E+"."+S}else return E}function s(u,c){for(var f=[],h=0;h<3;++h){for(var d=[],v=.5*(u[0][h]+u[1][h]),_=0;_*c[h]<=u[1][h];++_)d.push({x:_*c[h],text:o(c[h],_)});for(var _=-1;_*c[h]>=u[0][h];--_)d.push({x:_*c[h],text:o(c[h],_)});f.push(d)}return f}function l(u,c){for(var f=0;f<3;++f){if(u[f].length!==c[f].length)return!1;for(var h=0;h<u[f].length;++h){var d=u[f][h],v=c[f][h];if(d.x!==v.x||d.text!==v.text||d.font!==v.font||d.fontColor!==v.fontColor||d.fontSize!==v.fontSize||d.dx!==v.dx||d.dy!==v.dy)return!1}}return!0}},6468:function(i){i.exports=function(o){return atob(o)}},6504:function(i,a,o){"use strict";var s=o(869);i.exports=l;function l(u,c){return s(u[0].mul(c[0]),u[1].mul(c[1]))}},6582:function(i,a,o){var s=o(7894);i.exports=l;function l(u,c,f,h){var d,v,_,b,p,k,E,S,L,x,C=c[0],M=c[1],g=c[2],P=h[0],T=h[1],z=h[2],O=f[0],V=f[1],G=f[2];return Math.abs(C-O)<1e-6&&Math.abs(M-V)<1e-6&&Math.abs(g-G)<1e-6?s(u):(E=C-O,S=M-V,L=g-G,x=1/Math.sqrt(E*E+S*S+L*L),E*=x,S*=x,L*=x,d=T*L-z*S,v=z*E-P*L,_=P*S-T*E,x=Math.sqrt(d*d+v*v+_*_),x?(x=1/x,d*=x,v*=x,_*=x):(d=0,v=0,_=0),b=S*_-L*v,p=L*d-E*_,k=E*v-S*d,x=Math.sqrt(b*b+p*p+k*k),x?(x=1/x,b*=x,p*=x,k*=x):(b=0,p=0,k=0),u[0]=d,u[1]=b,u[2]=E,u[3]=0,u[4]=v,u[5]=p,u[6]=S,u[7]=0,u[8]=_,u[9]=k,u[10]=L,u[11]=0,u[12]=-(d*C+v*M+_*g),u[13]=-(b*C+p*M+k*g),u[14]=-(E*C+S*M+L*g),u[15]=1,u)}},6621:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]*l,o[1]=s[1]*l,o[2]=s[2]*l,o}},6658:function(i){i.exports=a;function a(o,s,l,u){var c=s[0],f=s[1],h=s[2];return o[0]=c+u*(l[0]-c),o[1]=f+u*(l[1]-f),o[2]=h+u*(l[2]-h),o}},6690:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]/l[0],o[1]=s[1]/l[1],o[2]=s[2]/l[2],o}},6729:function(i,a,o){"use strict";var s=o(3642),l=o(395);i.exports=u;function u(d){var v,_,b,p,k,E,S,L,g,x,C;if(d||(d={}),L=(d.nshades||72)-1,S=d.format||"hex",E=d.colormap,E||(E="jet"),typeof E=="string"){if(E=E.toLowerCase(),!s[E])throw Error(E+" not a supported colorscale");k=s[E]}else if(Array.isArray(E))k=E.slice();else throw Error("unsupported colormap option",E);if(k.length>L+1)throw new Error(E+" map requires nshades to be at least size "+k.length);Array.isArray(d.alpha)?d.alpha.length!==2?x=[1,1]:x=d.alpha.slice():typeof d.alpha=="number"?x=[d.alpha,d.alpha]:x=[1,1],v=k.map(function(z){return Math.round(z.index*L)}),x[0]=Math.min(Math.max(x[0],0),1),x[1]=Math.min(Math.max(x[1],0),1);var M=k.map(function(z,O){var V=k[O].index,G=k[O].rgb.slice();return G.length===4&&G[3]>=0&&G[3]<=1||(G[3]=x[0]+(x[1]-x[0])*V),G}),g=[];for(C=0;C<v.length-1;++C){p=v[C+1]-v[C],_=M[C],b=M[C+1];for(var P=0;P<p;P++){var T=P/p;g.push([Math.round(l(_[0],b[0],T)),Math.round(l(_[1],b[1],T)),Math.round(l(_[2],b[2],T)),l(_[3],b[3],T)])}}return g.push(k[k.length-1].rgb.concat(x[1])),S==="hex"?g=g.map(f):S==="rgbaString"?g=g.map(h):S==="float"&&(g=g.map(c)),g}function c(d){return[d[0]/255,d[1]/255,d[2]/255,d[3]]}function f(d){for(var v,_="#",b=0;b<3;++b)v=d[b],v=v.toString(16),_+=("00"+v).substr(v.length);return _}function h(d){return"rgba("+d.join(",")+")"}},6740:function(i,a,o){var s=o(3236),l=s([`precision highp float;
+
+precision highp float;
+#define GLSLIFY 1
+
+vec3 getOrthogonalVector(vec3 v) {
+  // Return up-vector for only-z vector.
+  // Return ax + by + cz = 0, a point that lies on the plane that has v as a normal and that isn't (0,0,0).
+  // From the above if-statement we have ||a|| > 0  U  ||b|| > 0.
+  // Assign z = 0, x = -b, y = a:
+  // a*-b + b*a + c*0 = -ba + ba + 0 = 0
+  if (v.x*v.x > v.z*v.z || v.y*v.y > v.z*v.z) {
+    return normalize(vec3(-v.y, v.x, 0.0));
+  } else {
+    return normalize(vec3(0.0, v.z, -v.y));
+  }
+}
+
+// Calculate the tube vertex and normal at the given index.
+//
+// The returned vertex is for a tube ring with its center at origin, radius of length(d), pointing in the direction of d.
+//
+// Each tube segment is made up of a ring of vertices.
+// These vertices are used to make up the triangles of the tube by connecting them together in the vertex array.
+// The indexes of tube segments run from 0 to 8.
+//
+vec3 getTubePosition(vec3 d, float index, out vec3 normal) {
+  float segmentCount = 8.0;
+
+  float angle = 2.0 * 3.14159 * (index / segmentCount);
+
+  vec3 u = getOrthogonalVector(d);
+  vec3 v = normalize(cross(u, d));
+
+  vec3 x = u * cos(angle) * length(d);
+  vec3 y = v * sin(angle) * length(d);
+  vec3 v3 = x + y;
+
+  normal = normalize(v3);
+
+  return v3;
+}
+
+attribute vec4 vector;
+attribute vec4 color, position;
+attribute vec2 uv;
+
+uniform float vectorScale, tubeScale;
+uniform mat4 model, view, projection, inverseModel;
+uniform vec3 eyePosition, lightPosition;
+
+varying vec3 f_normal, f_lightDirection, f_eyeDirection, f_data, f_position;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  // Scale the vector magnitude to stay constant with
+  // model & view changes.
+  vec3 normal;
+  vec3 XYZ = getTubePosition(mat3(model) * (tubeScale * vector.w * normalize(vector.xyz)), position.w, normal);
+  vec4 tubePosition = model * vec4(position.xyz, 1.0) + vec4(XYZ, 0.0);
+
+  //Lighting geometry parameters
+  vec4 cameraCoordinate = view * tubePosition;
+  cameraCoordinate.xyz /= cameraCoordinate.w;
+  f_lightDirection = lightPosition - cameraCoordinate.xyz;
+  f_eyeDirection   = eyePosition - cameraCoordinate.xyz;
+  f_normal = normalize((vec4(normal, 0.0) * inverseModel).xyz);
+
+  // vec4 m_position  = model * vec4(tubePosition, 1.0);
+  vec4 t_position  = view * tubePosition;
+  gl_Position      = projection * t_position;
+
+  f_color          = color;
+  f_data           = tubePosition.xyz;
+  f_position       = position.xyz;
+  f_uv             = uv;
+}
+`]),u=s([`#extension GL_OES_standard_derivatives : enable
+
+precision highp float;
+#define GLSLIFY 1
+
+float beckmannDistribution(float x, float roughness) {
+  float NdotH = max(x, 0.0001);
+  float cos2Alpha = NdotH * NdotH;
+  float tan2Alpha = (cos2Alpha - 1.0) / cos2Alpha;
+  float roughness2 = roughness * roughness;
+  float denom = 3.141592653589793 * roughness2 * cos2Alpha * cos2Alpha;
+  return exp(tan2Alpha / roughness2) / denom;
+}
+
+float cookTorranceSpecular(
+  vec3 lightDirection,
+  vec3 viewDirection,
+  vec3 surfaceNormal,
+  float roughness,
+  float fresnel) {
+
+  float VdotN = max(dot(viewDirection, surfaceNormal), 0.0);
+  float LdotN = max(dot(lightDirection, surfaceNormal), 0.0);
+
+  //Half angle vector
+  vec3 H = normalize(lightDirection + viewDirection);
+
+  //Geometric term
+  float NdotH = max(dot(surfaceNormal, H), 0.0);
+  float VdotH = max(dot(viewDirection, H), 0.000001);
+  float LdotH = max(dot(lightDirection, H), 0.000001);
+  float G1 = (2.0 * NdotH * VdotN) / VdotH;
+  float G2 = (2.0 * NdotH * LdotN) / LdotH;
+  float G = min(1.0, min(G1, G2));
+  
+  //Distribution term
+  float D = beckmannDistribution(NdotH, roughness);
+
+  //Fresnel term
+  float F = pow(1.0 - VdotN, fresnel);
+
+  //Multiply terms and done
+  return  G * F * D / max(3.14159265 * VdotN, 0.000001);
+}
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3 clipBounds[2];
+uniform float roughness, fresnel, kambient, kdiffuse, kspecular, opacity;
+uniform sampler2D texture;
+
+varying vec3 f_normal, f_lightDirection, f_eyeDirection, f_data, f_position;
+varying vec4 f_color;
+varying vec2 f_uv;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_position)) discard;
+  vec3 N = normalize(f_normal);
+  vec3 L = normalize(f_lightDirection);
+  vec3 V = normalize(f_eyeDirection);
+
+  if(gl_FrontFacing) {
+    N = -N;
+  }
+
+  float specular = min(1.0, max(0.0, cookTorranceSpecular(L, V, N, roughness, fresnel)));
+  float diffuse  = min(kambient + kdiffuse * max(dot(N, L), 0.0), 1.0);
+
+  vec4 surfaceColor = f_color * texture2D(texture, f_uv);
+  vec4 litColor = surfaceColor.a * vec4(diffuse * surfaceColor.rgb + kspecular * vec3(1,1,1) * specular,  1.0);
+
+  gl_FragColor = litColor * opacity;
+}
+`]),c=s([`precision highp float;
+
+precision highp float;
+#define GLSLIFY 1
+
+vec3 getOrthogonalVector(vec3 v) {
+  // Return up-vector for only-z vector.
+  // Return ax + by + cz = 0, a point that lies on the plane that has v as a normal and that isn't (0,0,0).
+  // From the above if-statement we have ||a|| > 0  U  ||b|| > 0.
+  // Assign z = 0, x = -b, y = a:
+  // a*-b + b*a + c*0 = -ba + ba + 0 = 0
+  if (v.x*v.x > v.z*v.z || v.y*v.y > v.z*v.z) {
+    return normalize(vec3(-v.y, v.x, 0.0));
+  } else {
+    return normalize(vec3(0.0, v.z, -v.y));
+  }
+}
+
+// Calculate the tube vertex and normal at the given index.
+//
+// The returned vertex is for a tube ring with its center at origin, radius of length(d), pointing in the direction of d.
+//
+// Each tube segment is made up of a ring of vertices.
+// These vertices are used to make up the triangles of the tube by connecting them together in the vertex array.
+// The indexes of tube segments run from 0 to 8.
+//
+vec3 getTubePosition(vec3 d, float index, out vec3 normal) {
+  float segmentCount = 8.0;
+
+  float angle = 2.0 * 3.14159 * (index / segmentCount);
+
+  vec3 u = getOrthogonalVector(d);
+  vec3 v = normalize(cross(u, d));
+
+  vec3 x = u * cos(angle) * length(d);
+  vec3 y = v * sin(angle) * length(d);
+  vec3 v3 = x + y;
+
+  normal = normalize(v3);
+
+  return v3;
+}
+
+attribute vec4 vector;
+attribute vec4 position;
+attribute vec4 id;
+
+uniform mat4 model, view, projection;
+uniform float tubeScale;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  vec3 normal;
+  vec3 XYZ = getTubePosition(mat3(model) * (tubeScale * vector.w * normalize(vector.xyz)), position.w, normal);
+  vec4 tubePosition = model * vec4(position.xyz, 1.0) + vec4(XYZ, 0.0);
+
+  gl_Position = projection * (view * tubePosition);
+  f_id        = id;
+  f_position  = position.xyz;
+}
+`]),f=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3  clipBounds[2];
+uniform float pickId;
+
+varying vec3 f_position;
+varying vec4 f_id;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], f_position)) discard;
+
+  gl_FragColor = vec4(pickId, f_id.xyz);
+}`]);a.meshShader={vertex:l,fragment:u,attributes:[{name:"position",type:"vec4"},{name:"color",type:"vec4"},{name:"uv",type:"vec2"},{name:"vector",type:"vec4"}]},a.pickShader={vertex:c,fragment:f,attributes:[{name:"position",type:"vec4"},{name:"id",type:"vec4"},{name:"vector",type:"vec4"}]}},6743:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=s[3],d=u+u,v=c+c,_=f+f,b=u*d,p=u*v,k=u*_,E=c*v,S=c*_,L=f*_,x=h*d,C=h*v,M=h*_;return o[0]=1-(E+L),o[1]=p+M,o[2]=k-C,o[3]=0,o[4]=p-M,o[5]=1-(b+L),o[6]=S+x,o[7]=0,o[8]=k+C,o[9]=S-x,o[10]=1-(b+E),o[11]=0,o[12]=l[0],o[13]=l[1],o[14]=l[2],o[15]=1,o}},6760:function(i){i.exports=a;function a(o,s,l){var u=s[0],c=s[1],f=s[2],h=s[3],d=s[4],v=s[5],_=s[6],b=s[7],p=s[8],k=s[9],E=s[10],S=s[11],L=s[12],x=s[13],C=s[14],M=s[15],g=l[0],P=l[1],T=l[2],z=l[3];return o[0]=g*u+P*d+T*p+z*L,o[1]=g*c+P*v+T*k+z*x,o[2]=g*f+P*_+T*E+z*C,o[3]=g*h+P*b+T*S+z*M,g=l[4],P=l[5],T=l[6],z=l[7],o[4]=g*u+P*d+T*p+z*L,o[5]=g*c+P*v+T*k+z*x,o[6]=g*f+P*_+T*E+z*C,o[7]=g*h+P*b+T*S+z*M,g=l[8],P=l[9],T=l[10],z=l[11],o[8]=g*u+P*d+T*p+z*L,o[9]=g*c+P*v+T*k+z*x,o[10]=g*f+P*_+T*E+z*C,o[11]=g*h+P*b+T*S+z*M,g=l[12],P=l[13],T=l[14],z=l[15],o[12]=g*u+P*d+T*p+z*L,o[13]=g*c+P*v+T*k+z*x,o[14]=g*f+P*_+T*E+z*C,o[15]=g*h+P*b+T*S+z*M,o}},6768:function(i,a,o){"use strict";var s=o(6859);i.exports=l;function l(u){return new s(u)}},6803:function(i,a,o){"use strict";var s,l=o(8828),u=o(1755);function c(P){for(var T=0,z=Math.max,O=0,V=P.length;O<V;++O)T=z(T,P[O].length);return T-1}s=c;function f(P){for(var T=-1,z=Math.max,O=0,V=P.length;O<V;++O)for(var G=P[O],Z=0,j=G.length;Z<j;++Z)T=z(T,G[Z]);return T+1}s=f;function h(P){for(var T=new Array(P.length),z=0,O=P.length;z<O;++z)T[z]=P[z].slice(0);return T}s=h;function d(P,T){var z=P.length,O=P.length-T.length,V=Math.min;if(O)return O;switch(z){case 0:return 0;case 1:return P[0]-T[0];case 2:var H=P[0]+P[1]-T[0]-T[1];return H||V(P[0],P[1])-V(T[0],T[1]);case 3:var G=P[0]+P[1],Z=T[0]+T[1];if(H=G+P[2]-(Z+T[2]),H)return H;var j=V(P[0],P[1]),N=V(T[0],T[1]),H=V(j,P[2])-V(N,T[2]);return H||V(j+P[2],G)-V(N+T[2],Z);default:var te=P.slice(0);te.sort();var oe=T.slice(0);oe.sort();for(var _e=0;_e<z;++_e)if(O=te[_e]-oe[_e],O)return O;return 0}}a.Fw=d;function v(P,T){return d(P[0],T[0])}function _(P,T){if(T){for(var z=P.length,O=new Array(z),V=0;V<z;++V)O[V]=[P[V],T[V]];O.sort(v);for(var V=0;V<z;++V)P[V]=O[V][0],T[V]=O[V][1];return P}else return P.sort(d),P}s=_;function b(P){if(P.length===0)return[];for(var T=1,z=P.length,O=1;O<z;++O){var V=P[O];if(d(V,P[O-1])){if(O===T){T++;continue}P[T++]=V}}return P.length=T,P}s=b;function p(P,T){for(var z=0,O=P.length-1,V=-1;z<=O;){var G=z+O>>1,Z=d(P[G],T);Z<=0?(Z===0&&(V=G),z=G+1):Z>0&&(O=G-1)}return V}s=p;function k(P,T){for(var z=new Array(P.length),O=0,V=z.length;O<V;++O)z[O]=[];for(var G=[],O=0,Z=T.length;O<Z;++O)for(var j=T[O],N=j.length,H=1,te=1<<N;H<te;++H){G.length=l.popCount(H);for(var oe=0,_e=0;_e<N;++_e)H&1<<_e&&(G[oe++]=j[_e]);var Ee=p(P,G);if(!(Ee<0))for(;z[Ee++].push(O),!(Ee>=P.length||d(P[Ee],G)!==0););}return z}s=k;function E(P,T){if(!T)return k(b(L(P,0)),P,0);for(var z=new Array(T),O=0;O<T;++O)z[O]=[];for(var O=0,V=P.length;O<V;++O)for(var G=P[O],Z=0,j=G.length;Z<j;++Z)z[G[Z]].push(O);return z}s=E;function S(P){for(var T=[],z=0,O=P.length;z<O;++z)for(var V=P[z],G=V.length|0,Z=1,j=1<<G;Z<j;++Z){for(var N=[],H=0;H<G;++H)Z>>>H&1&&N.push(V[H]);T.push(N)}return _(T)}s=S;function L(P,T){if(T<0)return[];for(var z=[],O=(1<<T+1)-1,V=0;V<P.length;++V)for(var G=P[V],Z=O;Z<1<<G.length;Z=l.nextCombination(Z)){for(var j=new Array(T+1),N=0,H=0;H<G.length;++H)Z&1<<H&&(j[N++]=G[H]);z.push(j)}return _(z)}s=L;function x(P){for(var T=[],z=0,O=P.length;z<O;++z)for(var V=P[z],G=0,Z=V.length;G<Z;++G){for(var j=new Array(V.length-1),N=0,H=0;N<Z;++N)N!==G&&(j[H++]=V[N]);T.push(j)}return _(T)}s=x;function C(P,T){for(var z=new u(T),O=0;O<P.length;++O)for(var V=P[O],G=0;G<V.length;++G)for(var Z=G+1;Z<V.length;++Z)z.link(V[G],V[Z]);for(var j=[],N=z.ranks,O=0;O<N.length;++O)N[O]=-1;for(var O=0;O<P.length;++O){var H=z.find(P[O][0]);N[H]<0?(N[H]=j.length,j.push([P[O].slice(0)])):j[N[H]].push(P[O].slice(0))}return j}function M(P){for(var T=b(_(L(P,0))),z=new u(T.length),O=0;O<P.length;++O)for(var V=P[O],G=0;G<V.length;++G)for(var Z=p(T,[V[G]]),j=G+1;j<V.length;++j)z.link(Z,p(T,[V[j]]));for(var N=[],H=z.ranks,O=0;O<H.length;++O)H[O]=-1;for(var O=0;O<P.length;++O){var te=z.find(p(T,[P[O][0]]));H[te]<0?(H[te]=N.length,N.push([P[O].slice(0)])):N[H[te]].push(P[O].slice(0))}return N}function g(P,T){return T?C(P,T):M(P)}s=g},6808:function(i){i.exports=a;function a(o){var s=o[0],l=o[1],u=o[2],c=o[3];return Math.sqrt(s*s+l*l+u*u+c*c)}},6843:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]-l[0],o[1]=s[1]-l[1],o[2]=s[2]-l[2],o}},6852:function(i){i.exports=["abs","acos","all","any","asin","atan","ceil","clamp","cos","cross","dFdx","dFdy","degrees","distance","dot","equal","exp","exp2","faceforward","floor","fract","gl_BackColor","gl_BackLightModelProduct","gl_BackLightProduct","gl_BackMaterial","gl_BackSecondaryColor","gl_ClipPlane","gl_ClipVertex","gl_Color","gl_DepthRange","gl_DepthRangeParameters","gl_EyePlaneQ","gl_EyePlaneR","gl_EyePlaneS","gl_EyePlaneT","gl_Fog","gl_FogCoord","gl_FogFragCoord","gl_FogParameters","gl_FragColor","gl_FragCoord","gl_FragData","gl_FragDepth","gl_FragDepthEXT","gl_FrontColor","gl_FrontFacing","gl_FrontLightModelProduct","gl_FrontLightProduct","gl_FrontMaterial","gl_FrontSecondaryColor","gl_LightModel","gl_LightModelParameters","gl_LightModelProducts","gl_LightProducts","gl_LightSource","gl_LightSourceParameters","gl_MaterialParameters","gl_MaxClipPlanes","gl_MaxCombinedTextureImageUnits","gl_MaxDrawBuffers","gl_MaxFragmentUniformComponents","gl_MaxLights","gl_MaxTextureCoords","gl_MaxTextureImageUnits","gl_MaxTextureUnits","gl_MaxVaryingFloats","gl_MaxVertexAttribs","gl_MaxVertexTextureImageUnits","gl_MaxVertexUniformComponents","gl_ModelViewMatrix","gl_ModelViewMatrixInverse","gl_ModelViewMatrixInverseTranspose","gl_ModelViewMatrixTranspose","gl_ModelViewProjectionMatrix","gl_ModelViewProjectionMatrixInverse","gl_ModelViewProjectionMatrixInverseTranspose","gl_ModelViewProjectionMatrixTranspose","gl_MultiTexCoord0","gl_MultiTexCoord1","gl_MultiTexCoord2","gl_MultiTexCoord3","gl_MultiTexCoord4","gl_MultiTexCoord5","gl_MultiTexCoord6","gl_MultiTexCoord7","gl_Normal","gl_NormalMatrix","gl_NormalScale","gl_ObjectPlaneQ","gl_ObjectPlaneR","gl_ObjectPlaneS","gl_ObjectPlaneT","gl_Point","gl_PointCoord","gl_PointParameters","gl_PointSize","gl_Position","gl_ProjectionMatrix","gl_ProjectionMatrixInverse","gl_ProjectionMatrixInverseTranspose","gl_ProjectionMatrixTranspose","gl_SecondaryColor","gl_TexCoord","gl_TextureEnvColor","gl_TextureMatrix","gl_TextureMatrixInverse","gl_TextureMatrixInverseTranspose","gl_TextureMatrixTranspose","gl_Vertex","greaterThan","greaterThanEqual","inversesqrt","length","lessThan","lessThanEqual","log","log2","matrixCompMult","max","min","mix","mod","normalize","not","notEqual","pow","radians","reflect","refract","sign","sin","smoothstep","sqrt","step","tan","texture2D","texture2DLod","texture2DProj","texture2DProjLod","textureCube","textureCubeLod","texture2DLodEXT","texture2DProjLodEXT","textureCubeLodEXT","texture2DGradEXT","texture2DProjGradEXT","textureCubeGradEXT"]},6859:function(i,a,o){i=o.nmd(i),function(s,l){"use strict";function u(j,N){if(!j)throw new Error(N||"Assertion failed")}function c(j,N){j.super_=N;var H=function(){};H.prototype=N.prototype,j.prototype=new H,j.prototype.constructor=j}function f(j,N,H){if(f.isBN(j))return j;this.negative=0,this.words=null,this.length=0,this.red=null,j!==null&&((N==="le"||N==="be")&&(H=N,N=10),this._init(j||0,N||10,H||"be"))}typeof s=="object"?s.exports=f:l.BN=f,f.BN=f,f.wordSize=26;var h;try{typeof window!="undefined"&&typeof window.Buffer!="undefined"?h=window.Buffer:h=o(7790).Buffer}catch(j){}f.isBN=function(N){return N instanceof f?!0:N!==null&&typeof N=="object"&&N.constructor.wordSize===f.wordSize&&Array.isArray(N.words)},f.max=function(N,H){return N.cmp(H)>0?N:H},f.min=function(N,H){return N.cmp(H)<0?N:H},f.prototype._init=function(N,H,te){if(typeof N=="number")return this._initNumber(N,H,te);if(typeof N=="object")return this._initArray(N,H,te);H==="hex"&&(H=16),u(H===(H|0)&&H>=2&&H<=36),N=N.toString().replace(/\s+/g,"");var oe=0;N[0]==="-"&&(oe++,this.negative=1),oe<N.length&&(H===16?this._parseHex(N,oe,te):(this._parseBase(N,H,oe),te==="le"&&this._initArray(this.toArray(),H,te)))},f.prototype._initNumber=function(N,H,te){N<0&&(this.negative=1,N=-N),N<67108864?(this.words=[N&67108863],this.length=1):N<4503599627370496?(this.words=[N&67108863,N/67108864&67108863],this.length=2):(u(N<9007199254740992),this.words=[N&67108863,N/67108864&67108863,1],this.length=3),te==="le"&&this._initArray(this.toArray(),H,te)},f.prototype._initArray=function(N,H,te){if(u(typeof N.length=="number"),N.length<=0)return this.words=[0],this.length=1,this;this.length=Math.ceil(N.length/3),this.words=new Array(this.length);for(var oe=0;oe<this.length;oe++)this.words[oe]=0;var _e,Ee,Ce=0;if(te==="be")for(oe=N.length-1,_e=0;oe>=0;oe-=3)Ee=N[oe]|N[oe-1]<<8|N[oe-2]<<16,this.words[_e]|=Ee<<Ce&67108863,this.words[_e+1]=Ee>>>26-Ce&67108863,Ce+=24,Ce>=26&&(Ce-=26,_e++);else if(te==="le")for(oe=0,_e=0;oe<N.length;oe+=3)Ee=N[oe]|N[oe+1]<<8|N[oe+2]<<16,this.words[_e]|=Ee<<Ce&67108863,this.words[_e+1]=Ee>>>26-Ce&67108863,Ce+=24,Ce>=26&&(Ce-=26,_e++);return this.strip()};function d(j,N){var H=j.charCodeAt(N);return H>=65&&H<=70?H-55:H>=97&&H<=102?H-87:H-48&15}function v(j,N,H){var te=d(j,H);return H-1>=N&&(te|=d(j,H-1)<<4),te}f.prototype._parseHex=function(N,H,te){this.length=Math.ceil((N.length-H)/6),this.words=new Array(this.length);for(var oe=0;oe<this.length;oe++)this.words[oe]=0;var _e=0,Ee=0,Ce;if(te==="be")for(oe=N.length-1;oe>=H;oe-=2)Ce=v(N,H,oe)<<_e,this.words[Ee]|=Ce&67108863,_e>=18?(_e-=18,Ee+=1,this.words[Ee]|=Ce>>>26):_e+=8;else{var me=N.length-H;for(oe=me%2===0?H+1:H;oe<N.length;oe+=2)Ce=v(N,H,oe)<<_e,this.words[Ee]|=Ce&67108863,_e>=18?(_e-=18,Ee+=1,this.words[Ee]|=Ce>>>26):_e+=8}this.strip()};function _(j,N,H,te){for(var oe=0,_e=Math.min(j.length,H),Ee=N;Ee<_e;Ee++){var Ce=j.charCodeAt(Ee)-48;oe*=te,Ce>=49?oe+=Ce-49+10:Ce>=17?oe+=Ce-17+10:oe+=Ce}return oe}f.prototype._parseBase=function(N,H,te){this.words=[0],this.length=1;for(var oe=0,_e=1;_e<=67108863;_e*=H)oe++;oe--,_e=_e/H|0;for(var Ee=N.length-te,Ce=Ee%oe,me=Math.min(Ee,Ee-Ce)+te,ie=0,Se=te;Se<me;Se+=oe)ie=_(N,Se,Se+oe,H),this.imuln(_e),this.words[0]+ie<67108864?this.words[0]+=ie:this._iaddn(ie);if(Ce!==0){var Le=1;for(ie=_(N,Se,N.length,H),Se=0;Se<Ce;Se++)Le*=H;this.imuln(Le),this.words[0]+ie<67108864?this.words[0]+=ie:this._iaddn(ie)}this.strip()},f.prototype.copy=function(N){N.words=new Array(this.length);for(var H=0;H<this.length;H++)N.words[H]=this.words[H];N.length=this.length,N.negative=this.negative,N.red=this.red},f.prototype.clone=function(){var N=new f(null);return this.copy(N),N},f.prototype._expand=function(N){for(;this.length<N;)this.words[this.length++]=0;return this},f.prototype.strip=function(){for(;this.length>1&&this.words[this.length-1]===0;)this.length--;return this._normSign()},f.prototype._normSign=function(){return this.length===1&&this.words[0]===0&&(this.negative=0),this},f.prototype.inspect=function(){return(this.red?"<BN-R: ":"<BN: ")+this.toString(16)+">"};var b=["","0","00","000","0000","00000","000000","0000000","00000000","000000000","0000000000","00000000000","000000000000","0000000000000","00000000000000","000000000000000","0000000000000000","00000000000000000","000000000000000000","0000000000000000000","00000000000000000000","000000000000000000000","0000000000000000000000","00000000000000000000000","000000000000000000000000","0000000000000000000000000"],p=[0,0,25,16,12,11,10,9,8,8,7,7,7,7,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5],k=[0,0,33554432,43046721,16777216,48828125,60466176,40353607,16777216,43046721,1e7,19487171,35831808,62748517,7529536,11390625,16777216,24137569,34012224,47045881,64e6,4084101,5153632,6436343,7962624,9765625,11881376,14348907,17210368,20511149,243e5,28629151,33554432,39135393,45435424,52521875,60466176];f.prototype.toString=function(N,H){N=N||10,H=H|0||1;var te;if(N===16||N==="hex"){te="";for(var oe=0,_e=0,Ee=0;Ee<this.length;Ee++){var Ce=this.words[Ee],me=((Ce<<oe|_e)&16777215).toString(16);_e=Ce>>>24-oe&16777215,_e!==0||Ee!==this.length-1?te=b[6-me.length]+me+te:te=me+te,oe+=2,oe>=26&&(oe-=26,Ee--)}for(_e!==0&&(te=_e.toString(16)+te);te.length%H!==0;)te="0"+te;return this.negative!==0&&(te="-"+te),te}if(N===(N|0)&&N>=2&&N<=36){var ie=p[N],Se=k[N];te="";var Le=this.clone();for(Le.negative=0;!Le.isZero();){var Ae=Le.modn(Se).toString(N);Le=Le.idivn(Se),Le.isZero()?te=Ae+te:te=b[ie-Ae.length]+Ae+te}for(this.isZero()&&(te="0"+te);te.length%H!==0;)te="0"+te;return this.negative!==0&&(te="-"+te),te}u(!1,"Base should be between 2 and 36")},f.prototype.toNumber=function(){var N=this.words[0];return this.length===2?N+=this.words[1]*67108864:this.length===3&&this.words[2]===1?N+=4503599627370496+this.words[1]*67108864:this.length>2&&u(!1,"Number can only safely store up to 53 bits"),this.negative!==0?-N:N},f.prototype.toJSON=function(){return this.toString(16)},f.prototype.toBuffer=function(N,H){return u(typeof h!="undefined"),this.toArrayLike(h,N,H)},f.prototype.toArray=function(N,H){return this.toArrayLike(Array,N,H)},f.prototype.toArrayLike=function(N,H,te){var oe=this.byteLength(),_e=te||Math.max(1,oe);u(oe<=_e,"byte array longer than desired length"),u(_e>0,"Requested array length <= 0"),this.strip();var Ee=H==="le",Ce=new N(_e),me,ie,Se=this.clone();if(Ee){for(ie=0;!Se.isZero();ie++)me=Se.andln(255),Se.iushrn(8),Ce[ie]=me;for(;ie<_e;ie++)Ce[ie]=0}else{for(ie=0;ie<_e-oe;ie++)Ce[ie]=0;for(ie=0;!Se.isZero();ie++)me=Se.andln(255),Se.iushrn(8),Ce[_e-ie-1]=me}return Ce},Math.clz32?f.prototype._countBits=function(N){return 32-Math.clz32(N)}:f.prototype._countBits=function(N){var H=N,te=0;return H>=4096&&(te+=13,H>>>=13),H>=64&&(te+=7,H>>>=7),H>=8&&(te+=4,H>>>=4),H>=2&&(te+=2,H>>>=2),te+H},f.prototype._zeroBits=function(N){if(N===0)return 26;var H=N,te=0;return(H&8191)===0&&(te+=13,H>>>=13),(H&127)===0&&(te+=7,H>>>=7),(H&15)===0&&(te+=4,H>>>=4),(H&3)===0&&(te+=2,H>>>=2),(H&1)===0&&te++,te},f.prototype.bitLength=function(){var N=this.words[this.length-1],H=this._countBits(N);return(this.length-1)*26+H};function E(j){for(var N=new Array(j.bitLength()),H=0;H<N.length;H++){var te=H/26|0,oe=H%26;N[H]=(j.words[te]&1<<oe)>>>oe}return N}f.prototype.zeroBits=function(){if(this.isZero())return 0;for(var N=0,H=0;H<this.length;H++){var te=this._zeroBits(this.words[H]);if(N+=te,te!==26)break}return N},f.prototype.byteLength=function(){return Math.ceil(this.bitLength()/8)},f.prototype.toTwos=function(N){return this.negative!==0?this.abs().inotn(N).iaddn(1):this.clone()},f.prototype.fromTwos=function(N){return this.testn(N-1)?this.notn(N).iaddn(1).ineg():this.clone()},f.prototype.isNeg=function(){return this.negative!==0},f.prototype.neg=function(){return this.clone().ineg()},f.prototype.ineg=function(){return this.isZero()||(this.negative^=1),this},f.prototype.iuor=function(N){for(;this.length<N.length;)this.words[this.length++]=0;for(var H=0;H<N.length;H++)this.words[H]=this.words[H]|N.words[H];return this.strip()},f.prototype.ior=function(N){return u((this.negative|N.negative)===0),this.iuor(N)},f.prototype.or=function(N){return this.length>N.length?this.clone().ior(N):N.clone().ior(this)},f.prototype.uor=function(N){return this.length>N.length?this.clone().iuor(N):N.clone().iuor(this)},f.prototype.iuand=function(N){var H;this.length>N.length?H=N:H=this;for(var te=0;te<H.length;te++)this.words[te]=this.words[te]&N.words[te];return this.length=H.length,this.strip()},f.prototype.iand=function(N){return u((this.negative|N.negative)===0),this.iuand(N)},f.prototype.and=function(N){return this.length>N.length?this.clone().iand(N):N.clone().iand(this)},f.prototype.uand=function(N){return this.length>N.length?this.clone().iuand(N):N.clone().iuand(this)},f.prototype.iuxor=function(N){var H,te;this.length>N.length?(H=this,te=N):(H=N,te=this);for(var oe=0;oe<te.length;oe++)this.words[oe]=H.words[oe]^te.words[oe];if(this!==H)for(;oe<H.length;oe++)this.words[oe]=H.words[oe];return this.length=H.length,this.strip()},f.prototype.ixor=function(N){return u((this.negative|N.negative)===0),this.iuxor(N)},f.prototype.xor=function(N){return this.length>N.length?this.clone().ixor(N):N.clone().ixor(this)},f.prototype.uxor=function(N){return this.length>N.length?this.clone().iuxor(N):N.clone().iuxor(this)},f.prototype.inotn=function(N){u(typeof N=="number"&&N>=0);var H=Math.ceil(N/26)|0,te=N%26;this._expand(H),te>0&&H--;for(var oe=0;oe<H;oe++)this.words[oe]=~this.words[oe]&67108863;return te>0&&(this.words[oe]=~this.words[oe]&67108863>>26-te),this.strip()},f.prototype.notn=function(N){return this.clone().inotn(N)},f.prototype.setn=function(N,H){u(typeof N=="number"&&N>=0);var te=N/26|0,oe=N%26;return this._expand(te+1),H?this.words[te]=this.words[te]|1<<oe:this.words[te]=this.words[te]&~(1<<oe),this.strip()},f.prototype.iadd=function(N){var H;if(this.negative!==0&&N.negative===0)return this.negative=0,H=this.isub(N),this.negative^=1,this._normSign();if(this.negative===0&&N.negative!==0)return N.negative=0,H=this.isub(N),N.negative=1,H._normSign();var te,oe;this.length>N.length?(te=this,oe=N):(te=N,oe=this);for(var _e=0,Ee=0;Ee<oe.length;Ee++)H=(te.words[Ee]|0)+(oe.words[Ee]|0)+_e,this.words[Ee]=H&67108863,_e=H>>>26;for(;_e!==0&&Ee<te.length;Ee++)H=(te.words[Ee]|0)+_e,this.words[Ee]=H&67108863,_e=H>>>26;if(this.length=te.length,_e!==0)this.words[this.length]=_e,this.length++;else if(te!==this)for(;Ee<te.length;Ee++)this.words[Ee]=te.words[Ee];return this},f.prototype.add=function(N){var H;return N.negative!==0&&this.negative===0?(N.negative=0,H=this.sub(N),N.negative^=1,H):N.negative===0&&this.negative!==0?(this.negative=0,H=N.sub(this),this.negative=1,H):this.length>N.length?this.clone().iadd(N):N.clone().iadd(this)},f.prototype.isub=function(N){if(N.negative!==0){N.negative=0;var H=this.iadd(N);return N.negative=1,H._normSign()}else if(this.negative!==0)return this.negative=0,this.iadd(N),this.negative=1,this._normSign();var te=this.cmp(N);if(te===0)return this.negative=0,this.length=1,this.words[0]=0,this;var oe,_e;te>0?(oe=this,_e=N):(oe=N,_e=this);for(var Ee=0,Ce=0;Ce<_e.length;Ce++)H=(oe.words[Ce]|0)-(_e.words[Ce]|0)+Ee,Ee=H>>26,this.words[Ce]=H&67108863;for(;Ee!==0&&Ce<oe.length;Ce++)H=(oe.words[Ce]|0)+Ee,Ee=H>>26,this.words[Ce]=H&67108863;if(Ee===0&&Ce<oe.length&&oe!==this)for(;Ce<oe.length;Ce++)this.words[Ce]=oe.words[Ce];return this.length=Math.max(this.length,Ce),oe!==this&&(this.negative=1),this.strip()},f.prototype.sub=function(N){return this.clone().isub(N)};function S(j,N,H){H.negative=N.negative^j.negative;var te=j.length+N.length|0;H.length=te,te=te-1|0;var oe=j.words[0]|0,_e=N.words[0]|0,Ee=oe*_e,Ce=Ee&67108863,me=Ee/67108864|0;H.words[0]=Ce;for(var ie=1;ie<te;ie++){for(var Se=me>>>26,Le=me&67108863,Ae=Math.min(ie,N.length-1),Fe=Math.max(0,ie-j.length+1);Fe<=Ae;Fe++){var Pe=ie-Fe|0;oe=j.words[Pe]|0,_e=N.words[Fe]|0,Ee=oe*_e+Le,Se+=Ee/67108864|0,Le=Ee&67108863}H.words[ie]=Le|0,me=Se|0}return me!==0?H.words[ie]=me|0:H.length--,H.strip()}var L=function(N,H,te){var oe=N.words,_e=H.words,Ee=te.words,Ce=0,me,ie,Se,Le=oe[0]|0,Ae=Le&8191,Fe=Le>>>13,Pe=oe[1]|0,ge=Pe&8191,Re=Pe>>>13,ce=oe[2]|0,Ze=ce&8191,ut=ce>>>13,pt=oe[3]|0,Zt=pt&8191,st=pt>>>13,lt=oe[4]|0,Gt=lt&8191,Nt=lt>>>13,Jt=oe[5]|0,sr=Jt&8191,wr=Jt>>>13,cr=oe[6]|0,$e=cr&8191,St=cr>>>13,Qt=oe[7]|0,Vt=Qt&8191,_t=Qt>>>13,It=oe[8]|0,mt=It&8191,er=It>>>13,lr=oe[9]|0,Tr=lr&8191,Lr=lr>>>13,ti=_e[0]|0,Br=ti&8191,Vr=ti>>>13,dt=_e[1]|0,Ge=dt&8191,Je=dt>>>13,je=_e[2]|0,tt=je&8191,xt=je>>>13,Ie=_e[3]|0,xe=Ie&8191,ke=Ie>>>13,vt=_e[4]|0,ir=vt&8191,ar=vt>>>13,vr=_e[5]|0,ii=vr&8191,pi=vr>>>13,$r=_e[6]|0,di=$r&8191,ji=$r>>>13,In=_e[7]|0,wi=In&8191,On=In>>>13,qn=_e[8]|0,Fn=qn&8191,ra=qn>>>13,la=_e[9]|0,Ut=la&8191,wt=la>>>13;te.negative=N.negative^H.negative,te.length=19,me=Math.imul(Ae,Br),ie=Math.imul(Ae,Vr),ie=ie+Math.imul(Fe,Br)|0,Se=Math.imul(Fe,Vr);var rr=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(rr>>>26)|0,rr&=67108863,me=Math.imul(ge,Br),ie=Math.imul(ge,Vr),ie=ie+Math.imul(Re,Br)|0,Se=Math.imul(Re,Vr),me=me+Math.imul(Ae,Ge)|0,ie=ie+Math.imul(Ae,Je)|0,ie=ie+Math.imul(Fe,Ge)|0,Se=Se+Math.imul(Fe,Je)|0;var nr=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(nr>>>26)|0,nr&=67108863,me=Math.imul(Ze,Br),ie=Math.imul(Ze,Vr),ie=ie+Math.imul(ut,Br)|0,Se=Math.imul(ut,Vr),me=me+Math.imul(ge,Ge)|0,ie=ie+Math.imul(ge,Je)|0,ie=ie+Math.imul(Re,Ge)|0,Se=Se+Math.imul(Re,Je)|0,me=me+Math.imul(Ae,tt)|0,ie=ie+Math.imul(Ae,xt)|0,ie=ie+Math.imul(Fe,tt)|0,Se=Se+Math.imul(Fe,xt)|0;var Er=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Er>>>26)|0,Er&=67108863,me=Math.imul(Zt,Br),ie=Math.imul(Zt,Vr),ie=ie+Math.imul(st,Br)|0,Se=Math.imul(st,Vr),me=me+Math.imul(Ze,Ge)|0,ie=ie+Math.imul(Ze,Je)|0,ie=ie+Math.imul(ut,Ge)|0,Se=Se+Math.imul(ut,Je)|0,me=me+Math.imul(ge,tt)|0,ie=ie+Math.imul(ge,xt)|0,ie=ie+Math.imul(Re,tt)|0,Se=Se+Math.imul(Re,xt)|0,me=me+Math.imul(Ae,xe)|0,ie=ie+Math.imul(Ae,ke)|0,ie=ie+Math.imul(Fe,xe)|0,Se=Se+Math.imul(Fe,ke)|0;var Xr=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Xr>>>26)|0,Xr&=67108863,me=Math.imul(Gt,Br),ie=Math.imul(Gt,Vr),ie=ie+Math.imul(Nt,Br)|0,Se=Math.imul(Nt,Vr),me=me+Math.imul(Zt,Ge)|0,ie=ie+Math.imul(Zt,Je)|0,ie=ie+Math.imul(st,Ge)|0,Se=Se+Math.imul(st,Je)|0,me=me+Math.imul(Ze,tt)|0,ie=ie+Math.imul(Ze,xt)|0,ie=ie+Math.imul(ut,tt)|0,Se=Se+Math.imul(ut,xt)|0,me=me+Math.imul(ge,xe)|0,ie=ie+Math.imul(ge,ke)|0,ie=ie+Math.imul(Re,xe)|0,Se=Se+Math.imul(Re,ke)|0,me=me+Math.imul(Ae,ir)|0,ie=ie+Math.imul(Ae,ar)|0,ie=ie+Math.imul(Fe,ir)|0,Se=Se+Math.imul(Fe,ar)|0;var ri=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(ri>>>26)|0,ri&=67108863,me=Math.imul(sr,Br),ie=Math.imul(sr,Vr),ie=ie+Math.imul(wr,Br)|0,Se=Math.imul(wr,Vr),me=me+Math.imul(Gt,Ge)|0,ie=ie+Math.imul(Gt,Je)|0,ie=ie+Math.imul(Nt,Ge)|0,Se=Se+Math.imul(Nt,Je)|0,me=me+Math.imul(Zt,tt)|0,ie=ie+Math.imul(Zt,xt)|0,ie=ie+Math.imul(st,tt)|0,Se=Se+Math.imul(st,xt)|0,me=me+Math.imul(Ze,xe)|0,ie=ie+Math.imul(Ze,ke)|0,ie=ie+Math.imul(ut,xe)|0,Se=Se+Math.imul(ut,ke)|0,me=me+Math.imul(ge,ir)|0,ie=ie+Math.imul(ge,ar)|0,ie=ie+Math.imul(Re,ir)|0,Se=Se+Math.imul(Re,ar)|0,me=me+Math.imul(Ae,ii)|0,ie=ie+Math.imul(Ae,pi)|0,ie=ie+Math.imul(Fe,ii)|0,Se=Se+Math.imul(Fe,pi)|0;var Qr=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Qr>>>26)|0,Qr&=67108863,me=Math.imul($e,Br),ie=Math.imul($e,Vr),ie=ie+Math.imul(St,Br)|0,Se=Math.imul(St,Vr),me=me+Math.imul(sr,Ge)|0,ie=ie+Math.imul(sr,Je)|0,ie=ie+Math.imul(wr,Ge)|0,Se=Se+Math.imul(wr,Je)|0,me=me+Math.imul(Gt,tt)|0,ie=ie+Math.imul(Gt,xt)|0,ie=ie+Math.imul(Nt,tt)|0,Se=Se+Math.imul(Nt,xt)|0,me=me+Math.imul(Zt,xe)|0,ie=ie+Math.imul(Zt,ke)|0,ie=ie+Math.imul(st,xe)|0,Se=Se+Math.imul(st,ke)|0,me=me+Math.imul(Ze,ir)|0,ie=ie+Math.imul(Ze,ar)|0,ie=ie+Math.imul(ut,ir)|0,Se=Se+Math.imul(ut,ar)|0,me=me+Math.imul(ge,ii)|0,ie=ie+Math.imul(ge,pi)|0,ie=ie+Math.imul(Re,ii)|0,Se=Se+Math.imul(Re,pi)|0,me=me+Math.imul(Ae,di)|0,ie=ie+Math.imul(Ae,ji)|0,ie=ie+Math.imul(Fe,di)|0,Se=Se+Math.imul(Fe,ji)|0;var Oi=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Oi>>>26)|0,Oi&=67108863,me=Math.imul(Vt,Br),ie=Math.imul(Vt,Vr),ie=ie+Math.imul(_t,Br)|0,Se=Math.imul(_t,Vr),me=me+Math.imul($e,Ge)|0,ie=ie+Math.imul($e,Je)|0,ie=ie+Math.imul(St,Ge)|0,Se=Se+Math.imul(St,Je)|0,me=me+Math.imul(sr,tt)|0,ie=ie+Math.imul(sr,xt)|0,ie=ie+Math.imul(wr,tt)|0,Se=Se+Math.imul(wr,xt)|0,me=me+Math.imul(Gt,xe)|0,ie=ie+Math.imul(Gt,ke)|0,ie=ie+Math.imul(Nt,xe)|0,Se=Se+Math.imul(Nt,ke)|0,me=me+Math.imul(Zt,ir)|0,ie=ie+Math.imul(Zt,ar)|0,ie=ie+Math.imul(st,ir)|0,Se=Se+Math.imul(st,ar)|0,me=me+Math.imul(Ze,ii)|0,ie=ie+Math.imul(Ze,pi)|0,ie=ie+Math.imul(ut,ii)|0,Se=Se+Math.imul(ut,pi)|0,me=me+Math.imul(ge,di)|0,ie=ie+Math.imul(ge,ji)|0,ie=ie+Math.imul(Re,di)|0,Se=Se+Math.imul(Re,ji)|0,me=me+Math.imul(Ae,wi)|0,ie=ie+Math.imul(Ae,On)|0,ie=ie+Math.imul(Fe,wi)|0,Se=Se+Math.imul(Fe,On)|0;var $i=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+($i>>>26)|0,$i&=67108863,me=Math.imul(mt,Br),ie=Math.imul(mt,Vr),ie=ie+Math.imul(er,Br)|0,Se=Math.imul(er,Vr),me=me+Math.imul(Vt,Ge)|0,ie=ie+Math.imul(Vt,Je)|0,ie=ie+Math.imul(_t,Ge)|0,Se=Se+Math.imul(_t,Je)|0,me=me+Math.imul($e,tt)|0,ie=ie+Math.imul($e,xt)|0,ie=ie+Math.imul(St,tt)|0,Se=Se+Math.imul(St,xt)|0,me=me+Math.imul(sr,xe)|0,ie=ie+Math.imul(sr,ke)|0,ie=ie+Math.imul(wr,xe)|0,Se=Se+Math.imul(wr,ke)|0,me=me+Math.imul(Gt,ir)|0,ie=ie+Math.imul(Gt,ar)|0,ie=ie+Math.imul(Nt,ir)|0,Se=Se+Math.imul(Nt,ar)|0,me=me+Math.imul(Zt,ii)|0,ie=ie+Math.imul(Zt,pi)|0,ie=ie+Math.imul(st,ii)|0,Se=Se+Math.imul(st,pi)|0,me=me+Math.imul(Ze,di)|0,ie=ie+Math.imul(Ze,ji)|0,ie=ie+Math.imul(ut,di)|0,Se=Se+Math.imul(ut,ji)|0,me=me+Math.imul(ge,wi)|0,ie=ie+Math.imul(ge,On)|0,ie=ie+Math.imul(Re,wi)|0,Se=Se+Math.imul(Re,On)|0,me=me+Math.imul(Ae,Fn)|0,ie=ie+Math.imul(Ae,ra)|0,ie=ie+Math.imul(Fe,Fn)|0,Se=Se+Math.imul(Fe,ra)|0;var tn=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(tn>>>26)|0,tn&=67108863,me=Math.imul(Tr,Br),ie=Math.imul(Tr,Vr),ie=ie+Math.imul(Lr,Br)|0,Se=Math.imul(Lr,Vr),me=me+Math.imul(mt,Ge)|0,ie=ie+Math.imul(mt,Je)|0,ie=ie+Math.imul(er,Ge)|0,Se=Se+Math.imul(er,Je)|0,me=me+Math.imul(Vt,tt)|0,ie=ie+Math.imul(Vt,xt)|0,ie=ie+Math.imul(_t,tt)|0,Se=Se+Math.imul(_t,xt)|0,me=me+Math.imul($e,xe)|0,ie=ie+Math.imul($e,ke)|0,ie=ie+Math.imul(St,xe)|0,Se=Se+Math.imul(St,ke)|0,me=me+Math.imul(sr,ir)|0,ie=ie+Math.imul(sr,ar)|0,ie=ie+Math.imul(wr,ir)|0,Se=Se+Math.imul(wr,ar)|0,me=me+Math.imul(Gt,ii)|0,ie=ie+Math.imul(Gt,pi)|0,ie=ie+Math.imul(Nt,ii)|0,Se=Se+Math.imul(Nt,pi)|0,me=me+Math.imul(Zt,di)|0,ie=ie+Math.imul(Zt,ji)|0,ie=ie+Math.imul(st,di)|0,Se=Se+Math.imul(st,ji)|0,me=me+Math.imul(Ze,wi)|0,ie=ie+Math.imul(Ze,On)|0,ie=ie+Math.imul(ut,wi)|0,Se=Se+Math.imul(ut,On)|0,me=me+Math.imul(ge,Fn)|0,ie=ie+Math.imul(ge,ra)|0,ie=ie+Math.imul(Re,Fn)|0,Se=Se+Math.imul(Re,ra)|0,me=me+Math.imul(Ae,Ut)|0,ie=ie+Math.imul(Ae,wt)|0,ie=ie+Math.imul(Fe,Ut)|0,Se=Se+Math.imul(Fe,wt)|0;var fn=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(fn>>>26)|0,fn&=67108863,me=Math.imul(Tr,Ge),ie=Math.imul(Tr,Je),ie=ie+Math.imul(Lr,Ge)|0,Se=Math.imul(Lr,Je),me=me+Math.imul(mt,tt)|0,ie=ie+Math.imul(mt,xt)|0,ie=ie+Math.imul(er,tt)|0,Se=Se+Math.imul(er,xt)|0,me=me+Math.imul(Vt,xe)|0,ie=ie+Math.imul(Vt,ke)|0,ie=ie+Math.imul(_t,xe)|0,Se=Se+Math.imul(_t,ke)|0,me=me+Math.imul($e,ir)|0,ie=ie+Math.imul($e,ar)|0,ie=ie+Math.imul(St,ir)|0,Se=Se+Math.imul(St,ar)|0,me=me+Math.imul(sr,ii)|0,ie=ie+Math.imul(sr,pi)|0,ie=ie+Math.imul(wr,ii)|0,Se=Se+Math.imul(wr,pi)|0,me=me+Math.imul(Gt,di)|0,ie=ie+Math.imul(Gt,ji)|0,ie=ie+Math.imul(Nt,di)|0,Se=Se+Math.imul(Nt,ji)|0,me=me+Math.imul(Zt,wi)|0,ie=ie+Math.imul(Zt,On)|0,ie=ie+Math.imul(st,wi)|0,Se=Se+Math.imul(st,On)|0,me=me+Math.imul(Ze,Fn)|0,ie=ie+Math.imul(Ze,ra)|0,ie=ie+Math.imul(ut,Fn)|0,Se=Se+Math.imul(ut,ra)|0,me=me+Math.imul(ge,Ut)|0,ie=ie+Math.imul(ge,wt)|0,ie=ie+Math.imul(Re,Ut)|0,Se=Se+Math.imul(Re,wt)|0;var yn=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(yn>>>26)|0,yn&=67108863,me=Math.imul(Tr,tt),ie=Math.imul(Tr,xt),ie=ie+Math.imul(Lr,tt)|0,Se=Math.imul(Lr,xt),me=me+Math.imul(mt,xe)|0,ie=ie+Math.imul(mt,ke)|0,ie=ie+Math.imul(er,xe)|0,Se=Se+Math.imul(er,ke)|0,me=me+Math.imul(Vt,ir)|0,ie=ie+Math.imul(Vt,ar)|0,ie=ie+Math.imul(_t,ir)|0,Se=Se+Math.imul(_t,ar)|0,me=me+Math.imul($e,ii)|0,ie=ie+Math.imul($e,pi)|0,ie=ie+Math.imul(St,ii)|0,Se=Se+Math.imul(St,pi)|0,me=me+Math.imul(sr,di)|0,ie=ie+Math.imul(sr,ji)|0,ie=ie+Math.imul(wr,di)|0,Se=Se+Math.imul(wr,ji)|0,me=me+Math.imul(Gt,wi)|0,ie=ie+Math.imul(Gt,On)|0,ie=ie+Math.imul(Nt,wi)|0,Se=Se+Math.imul(Nt,On)|0,me=me+Math.imul(Zt,Fn)|0,ie=ie+Math.imul(Zt,ra)|0,ie=ie+Math.imul(st,Fn)|0,Se=Se+Math.imul(st,ra)|0,me=me+Math.imul(Ze,Ut)|0,ie=ie+Math.imul(Ze,wt)|0,ie=ie+Math.imul(ut,Ut)|0,Se=Se+Math.imul(ut,wt)|0;var Sn=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Sn>>>26)|0,Sn&=67108863,me=Math.imul(Tr,xe),ie=Math.imul(Tr,ke),ie=ie+Math.imul(Lr,xe)|0,Se=Math.imul(Lr,ke),me=me+Math.imul(mt,ir)|0,ie=ie+Math.imul(mt,ar)|0,ie=ie+Math.imul(er,ir)|0,Se=Se+Math.imul(er,ar)|0,me=me+Math.imul(Vt,ii)|0,ie=ie+Math.imul(Vt,pi)|0,ie=ie+Math.imul(_t,ii)|0,Se=Se+Math.imul(_t,pi)|0,me=me+Math.imul($e,di)|0,ie=ie+Math.imul($e,ji)|0,ie=ie+Math.imul(St,di)|0,Se=Se+Math.imul(St,ji)|0,me=me+Math.imul(sr,wi)|0,ie=ie+Math.imul(sr,On)|0,ie=ie+Math.imul(wr,wi)|0,Se=Se+Math.imul(wr,On)|0,me=me+Math.imul(Gt,Fn)|0,ie=ie+Math.imul(Gt,ra)|0,ie=ie+Math.imul(Nt,Fn)|0,Se=Se+Math.imul(Nt,ra)|0,me=me+Math.imul(Zt,Ut)|0,ie=ie+Math.imul(Zt,wt)|0,ie=ie+Math.imul(st,Ut)|0,Se=Se+Math.imul(st,wt)|0;var Ba=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Ba>>>26)|0,Ba&=67108863,me=Math.imul(Tr,ir),ie=Math.imul(Tr,ar),ie=ie+Math.imul(Lr,ir)|0,Se=Math.imul(Lr,ar),me=me+Math.imul(mt,ii)|0,ie=ie+Math.imul(mt,pi)|0,ie=ie+Math.imul(er,ii)|0,Se=Se+Math.imul(er,pi)|0,me=me+Math.imul(Vt,di)|0,ie=ie+Math.imul(Vt,ji)|0,ie=ie+Math.imul(_t,di)|0,Se=Se+Math.imul(_t,ji)|0,me=me+Math.imul($e,wi)|0,ie=ie+Math.imul($e,On)|0,ie=ie+Math.imul(St,wi)|0,Se=Se+Math.imul(St,On)|0,me=me+Math.imul(sr,Fn)|0,ie=ie+Math.imul(sr,ra)|0,ie=ie+Math.imul(wr,Fn)|0,Se=Se+Math.imul(wr,ra)|0,me=me+Math.imul(Gt,Ut)|0,ie=ie+Math.imul(Gt,wt)|0,ie=ie+Math.imul(Nt,Ut)|0,Se=Se+Math.imul(Nt,wt)|0;var ua=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(ua>>>26)|0,ua&=67108863,me=Math.imul(Tr,ii),ie=Math.imul(Tr,pi),ie=ie+Math.imul(Lr,ii)|0,Se=Math.imul(Lr,pi),me=me+Math.imul(mt,di)|0,ie=ie+Math.imul(mt,ji)|0,ie=ie+Math.imul(er,di)|0,Se=Se+Math.imul(er,ji)|0,me=me+Math.imul(Vt,wi)|0,ie=ie+Math.imul(Vt,On)|0,ie=ie+Math.imul(_t,wi)|0,Se=Se+Math.imul(_t,On)|0,me=me+Math.imul($e,Fn)|0,ie=ie+Math.imul($e,ra)|0,ie=ie+Math.imul(St,Fn)|0,Se=Se+Math.imul(St,ra)|0,me=me+Math.imul(sr,Ut)|0,ie=ie+Math.imul(sr,wt)|0,ie=ie+Math.imul(wr,Ut)|0,Se=Se+Math.imul(wr,wt)|0;var ma=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(ma>>>26)|0,ma&=67108863,me=Math.imul(Tr,di),ie=Math.imul(Tr,ji),ie=ie+Math.imul(Lr,di)|0,Se=Math.imul(Lr,ji),me=me+Math.imul(mt,wi)|0,ie=ie+Math.imul(mt,On)|0,ie=ie+Math.imul(er,wi)|0,Se=Se+Math.imul(er,On)|0,me=me+Math.imul(Vt,Fn)|0,ie=ie+Math.imul(Vt,ra)|0,ie=ie+Math.imul(_t,Fn)|0,Se=Se+Math.imul(_t,ra)|0,me=me+Math.imul($e,Ut)|0,ie=ie+Math.imul($e,wt)|0,ie=ie+Math.imul(St,Ut)|0,Se=Se+Math.imul(St,wt)|0;var Wa=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Wa>>>26)|0,Wa&=67108863,me=Math.imul(Tr,wi),ie=Math.imul(Tr,On),ie=ie+Math.imul(Lr,wi)|0,Se=Math.imul(Lr,On),me=me+Math.imul(mt,Fn)|0,ie=ie+Math.imul(mt,ra)|0,ie=ie+Math.imul(er,Fn)|0,Se=Se+Math.imul(er,ra)|0,me=me+Math.imul(Vt,Ut)|0,ie=ie+Math.imul(Vt,wt)|0,ie=ie+Math.imul(_t,Ut)|0,Se=Se+Math.imul(_t,wt)|0;var Fa=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Fa>>>26)|0,Fa&=67108863,me=Math.imul(Tr,Fn),ie=Math.imul(Tr,ra),ie=ie+Math.imul(Lr,Fn)|0,Se=Math.imul(Lr,ra),me=me+Math.imul(mt,Ut)|0,ie=ie+Math.imul(mt,wt)|0,ie=ie+Math.imul(er,Ut)|0,Se=Se+Math.imul(er,wt)|0;var Wo=(Ce+me|0)+((ie&8191)<<13)|0;Ce=(Se+(ie>>>13)|0)+(Wo>>>26)|0,Wo&=67108863,me=Math.imul(Tr,Ut),ie=Math.imul(Tr,wt),ie=ie+Math.imul(Lr,Ut)|0,Se=Math.imul(Lr,wt);var da=(Ce+me|0)+((ie&8191)<<13)|0;return Ce=(Se+(ie>>>13)|0)+(da>>>26)|0,da&=67108863,Ee[0]=rr,Ee[1]=nr,Ee[2]=Er,Ee[3]=Xr,Ee[4]=ri,Ee[5]=Qr,Ee[6]=Oi,Ee[7]=$i,Ee[8]=tn,Ee[9]=fn,Ee[10]=yn,Ee[11]=Sn,Ee[12]=Ba,Ee[13]=ua,Ee[14]=ma,Ee[15]=Wa,Ee[16]=Fa,Ee[17]=Wo,Ee[18]=da,Ce!==0&&(Ee[19]=Ce,te.length++),te};Math.imul||(L=S);function x(j,N,H){H.negative=N.negative^j.negative,H.length=j.length+N.length;for(var te=0,oe=0,_e=0;_e<H.length-1;_e++){var Ee=oe;oe=0;for(var Ce=te&67108863,me=Math.min(_e,N.length-1),ie=Math.max(0,_e-j.length+1);ie<=me;ie++){var Se=_e-ie,Le=j.words[Se]|0,Ae=N.words[ie]|0,Fe=Le*Ae,Pe=Fe&67108863;Ee=Ee+(Fe/67108864|0)|0,Pe=Pe+Ce|0,Ce=Pe&67108863,Ee=Ee+(Pe>>>26)|0,oe+=Ee>>>26,Ee&=67108863}H.words[_e]=Ce,te=Ee,Ee=oe}return te!==0?H.words[_e]=te:H.length--,H.strip()}function C(j,N,H){var te=new M;return te.mulp(j,N,H)}f.prototype.mulTo=function(N,H){var te,oe=this.length+N.length;return this.length===10&&N.length===10?te=L(this,N,H):oe<63?te=S(this,N,H):oe<1024?te=x(this,N,H):te=C(this,N,H),te};function M(j,N){this.x=j,this.y=N}M.prototype.makeRBT=function(N){for(var H=new Array(N),te=f.prototype._countBits(N)-1,oe=0;oe<N;oe++)H[oe]=this.revBin(oe,te,N);return H},M.prototype.revBin=function(N,H,te){if(N===0||N===te-1)return N;for(var oe=0,_e=0;_e<H;_e++)oe|=(N&1)<<H-_e-1,N>>=1;return oe},M.prototype.permute=function(N,H,te,oe,_e,Ee){for(var Ce=0;Ce<Ee;Ce++)oe[Ce]=H[N[Ce]],_e[Ce]=te[N[Ce]]},M.prototype.transform=function(N,H,te,oe,_e,Ee){this.permute(Ee,N,H,te,oe,_e);for(var Ce=1;Ce<_e;Ce<<=1)for(var me=Ce<<1,ie=Math.cos(2*Math.PI/me),Se=Math.sin(2*Math.PI/me),Le=0;Le<_e;Le+=me)for(var Ae=ie,Fe=Se,Pe=0;Pe<Ce;Pe++){var ge=te[Le+Pe],Re=oe[Le+Pe],ce=te[Le+Pe+Ce],Ze=oe[Le+Pe+Ce],ut=Ae*ce-Fe*Ze;Ze=Ae*Ze+Fe*ce,ce=ut,te[Le+Pe]=ge+ce,oe[Le+Pe]=Re+Ze,te[Le+Pe+Ce]=ge-ce,oe[Le+Pe+Ce]=Re-Ze,Pe!==me&&(ut=ie*Ae-Se*Fe,Fe=ie*Fe+Se*Ae,Ae=ut)}},M.prototype.guessLen13b=function(N,H){var te=Math.max(H,N)|1,oe=te&1,_e=0;for(te=te/2|0;te;te=te>>>1)_e++;return 1<<_e+1+oe},M.prototype.conjugate=function(N,H,te){if(!(te<=1))for(var oe=0;oe<te/2;oe++){var _e=N[oe];N[oe]=N[te-oe-1],N[te-oe-1]=_e,_e=H[oe],H[oe]=-H[te-oe-1],H[te-oe-1]=-_e}},M.prototype.normalize13b=function(N,H){for(var te=0,oe=0;oe<H/2;oe++){var _e=Math.round(N[2*oe+1]/H)*8192+Math.round(N[2*oe]/H)+te;N[oe]=_e&67108863,_e<67108864?te=0:te=_e/67108864|0}return N},M.prototype.convert13b=function(N,H,te,oe){for(var _e=0,Ee=0;Ee<H;Ee++)_e=_e+(N[Ee]|0),te[2*Ee]=_e&8191,_e=_e>>>13,te[2*Ee+1]=_e&8191,_e=_e>>>13;for(Ee=2*H;Ee<oe;++Ee)te[Ee]=0;u(_e===0),u((_e&-8192)===0)},M.prototype.stub=function(N){for(var H=new Array(N),te=0;te<N;te++)H[te]=0;return H},M.prototype.mulp=function(N,H,te){var oe=2*this.guessLen13b(N.length,H.length),_e=this.makeRBT(oe),Ee=this.stub(oe),Ce=new Array(oe),me=new Array(oe),ie=new Array(oe),Se=new Array(oe),Le=new Array(oe),Ae=new Array(oe),Fe=te.words;Fe.length=oe,this.convert13b(N.words,N.length,Ce,oe),this.convert13b(H.words,H.length,Se,oe),this.transform(Ce,Ee,me,ie,oe,_e),this.transform(Se,Ee,Le,Ae,oe,_e);for(var Pe=0;Pe<oe;Pe++){var ge=me[Pe]*Le[Pe]-ie[Pe]*Ae[Pe];ie[Pe]=me[Pe]*Ae[Pe]+ie[Pe]*Le[Pe],me[Pe]=ge}return this.conjugate(me,ie,oe),this.transform(me,ie,Fe,Ee,oe,_e),this.conjugate(Fe,Ee,oe),this.normalize13b(Fe,oe),te.negative=N.negative^H.negative,te.length=N.length+H.length,te.strip()},f.prototype.mul=function(N){var H=new f(null);return H.words=new Array(this.length+N.length),this.mulTo(N,H)},f.prototype.mulf=function(N){var H=new f(null);return H.words=new Array(this.length+N.length),C(this,N,H)},f.prototype.imul=function(N){return this.clone().mulTo(N,this)},f.prototype.imuln=function(N){u(typeof N=="number"),u(N<67108864);for(var H=0,te=0;te<this.length;te++){var oe=(this.words[te]|0)*N,_e=(oe&67108863)+(H&67108863);H>>=26,H+=oe/67108864|0,H+=_e>>>26,this.words[te]=_e&67108863}return H!==0&&(this.words[te]=H,this.length++),this},f.prototype.muln=function(N){return this.clone().imuln(N)},f.prototype.sqr=function(){return this.mul(this)},f.prototype.isqr=function(){return this.imul(this.clone())},f.prototype.pow=function(N){var H=E(N);if(H.length===0)return new f(1);for(var te=this,oe=0;oe<H.length&&H[oe]===0;oe++,te=te.sqr());if(++oe<H.length)for(var _e=te.sqr();oe<H.length;oe++,_e=_e.sqr())H[oe]!==0&&(te=te.mul(_e));return te},f.prototype.iushln=function(N){u(typeof N=="number"&&N>=0);var H=N%26,te=(N-H)/26,oe=67108863>>>26-H<<26-H,_e;if(H!==0){var Ee=0;for(_e=0;_e<this.length;_e++){var Ce=this.words[_e]&oe,me=(this.words[_e]|0)-Ce<<H;this.words[_e]=me|Ee,Ee=Ce>>>26-H}Ee&&(this.words[_e]=Ee,this.length++)}if(te!==0){for(_e=this.length-1;_e>=0;_e--)this.words[_e+te]=this.words[_e];for(_e=0;_e<te;_e++)this.words[_e]=0;this.length+=te}return this.strip()},f.prototype.ishln=function(N){return u(this.negative===0),this.iushln(N)},f.prototype.iushrn=function(N,H,te){u(typeof N=="number"&&N>=0);var oe;H?oe=(H-H%26)/26:oe=0;var _e=N%26,Ee=Math.min((N-_e)/26,this.length),Ce=67108863^67108863>>>_e<<_e,me=te;if(oe-=Ee,oe=Math.max(0,oe),me){for(var ie=0;ie<Ee;ie++)me.words[ie]=this.words[ie];me.length=Ee}if(Ee!==0)if(this.length>Ee)for(this.length-=Ee,ie=0;ie<this.length;ie++)this.words[ie]=this.words[ie+Ee];else this.words[0]=0,this.length=1;var Se=0;for(ie=this.length-1;ie>=0&&(Se!==0||ie>=oe);ie--){var Le=this.words[ie]|0;this.words[ie]=Se<<26-_e|Le>>>_e,Se=Le&Ce}return me&&Se!==0&&(me.words[me.length++]=Se),this.length===0&&(this.words[0]=0,this.length=1),this.strip()},f.prototype.ishrn=function(N,H,te){return u(this.negative===0),this.iushrn(N,H,te)},f.prototype.shln=function(N){return this.clone().ishln(N)},f.prototype.ushln=function(N){return this.clone().iushln(N)},f.prototype.shrn=function(N){return this.clone().ishrn(N)},f.prototype.ushrn=function(N){return this.clone().iushrn(N)},f.prototype.testn=function(N){u(typeof N=="number"&&N>=0);var H=N%26,te=(N-H)/26,oe=1<<H;if(this.length<=te)return!1;var _e=this.words[te];return!!(_e&oe)},f.prototype.imaskn=function(N){u(typeof N=="number"&&N>=0);var H=N%26,te=(N-H)/26;if(u(this.negative===0,"imaskn works only with positive numbers"),this.length<=te)return this;if(H!==0&&te++,this.length=Math.min(te,this.length),H!==0){var oe=67108863^67108863>>>H<<H;this.words[this.length-1]&=oe}return this.strip()},f.prototype.maskn=function(N){return this.clone().imaskn(N)},f.prototype.iaddn=function(N){return u(typeof N=="number"),u(N<67108864),N<0?this.isubn(-N):this.negative!==0?this.length===1&&(this.words[0]|0)<N?(this.words[0]=N-(this.words[0]|0),this.negative=0,this):(this.negative=0,this.isubn(N),this.negative=1,this):this._iaddn(N)},f.prototype._iaddn=function(N){this.words[0]+=N;for(var H=0;H<this.length&&this.words[H]>=67108864;H++)this.words[H]-=67108864,H===this.length-1?this.words[H+1]=1:this.words[H+1]++;return this.length=Math.max(this.length,H+1),this},f.prototype.isubn=function(N){if(u(typeof N=="number"),u(N<67108864),N<0)return this.iaddn(-N);if(this.negative!==0)return this.negative=0,this.iaddn(N),this.negative=1,this;if(this.words[0]-=N,this.length===1&&this.words[0]<0)this.words[0]=-this.words[0],this.negative=1;else for(var H=0;H<this.length&&this.words[H]<0;H++)this.words[H]+=67108864,this.words[H+1]-=1;return this.strip()},f.prototype.addn=function(N){return this.clone().iaddn(N)},f.prototype.subn=function(N){return this.clone().isubn(N)},f.prototype.iabs=function(){return this.negative=0,this},f.prototype.abs=function(){return this.clone().iabs()},f.prototype._ishlnsubmul=function(N,H,te){var oe=N.length+te,_e;this._expand(oe);var Ee,Ce=0;for(_e=0;_e<N.length;_e++){Ee=(this.words[_e+te]|0)+Ce;var me=(N.words[_e]|0)*H;Ee-=me&67108863,Ce=(Ee>>26)-(me/67108864|0),this.words[_e+te]=Ee&67108863}for(;_e<this.length-te;_e++)Ee=(this.words[_e+te]|0)+Ce,Ce=Ee>>26,this.words[_e+te]=Ee&67108863;if(Ce===0)return this.strip();for(u(Ce===-1),Ce=0,_e=0;_e<this.length;_e++)Ee=-(this.words[_e]|0)+Ce,Ce=Ee>>26,this.words[_e]=Ee&67108863;return this.negative=1,this.strip()},f.prototype._wordDiv=function(N,H){var te=this.length-N.length,oe=this.clone(),_e=N,Ee=_e.words[_e.length-1]|0,Ce=this._countBits(Ee);te=26-Ce,te!==0&&(_e=_e.ushln(te),oe.iushln(te),Ee=_e.words[_e.length-1]|0);var me=oe.length-_e.length,ie;if(H!=="mod"){ie=new f(null),ie.length=me+1,ie.words=new Array(ie.length);for(var Se=0;Se<ie.length;Se++)ie.words[Se]=0}var Le=oe.clone()._ishlnsubmul(_e,1,me);Le.negative===0&&(oe=Le,ie&&(ie.words[me]=1));for(var Ae=me-1;Ae>=0;Ae--){var Fe=(oe.words[_e.length+Ae]|0)*67108864+(oe.words[_e.length+Ae-1]|0);for(Fe=Math.min(Fe/Ee|0,67108863),oe._ishlnsubmul(_e,Fe,Ae);oe.negative!==0;)Fe--,oe.negative=0,oe._ishlnsubmul(_e,1,Ae),oe.isZero()||(oe.negative^=1);ie&&(ie.words[Ae]=Fe)}return ie&&ie.strip(),oe.strip(),H!=="div"&&te!==0&&oe.iushrn(te),{div:ie||null,mod:oe}},f.prototype.divmod=function(N,H,te){if(u(!N.isZero()),this.isZero())return{div:new f(0),mod:new f(0)};var oe,_e,Ee;return this.negative!==0&&N.negative===0?(Ee=this.neg().divmod(N,H),H!=="mod"&&(oe=Ee.div.neg()),H!=="div"&&(_e=Ee.mod.neg(),te&&_e.negative!==0&&_e.iadd(N)),{div:oe,mod:_e}):this.negative===0&&N.negative!==0?(Ee=this.divmod(N.neg(),H),H!=="mod"&&(oe=Ee.div.neg()),{div:oe,mod:Ee.mod}):(this.negative&N.negative)!==0?(Ee=this.neg().divmod(N.neg(),H),H!=="div"&&(_e=Ee.mod.neg(),te&&_e.negative!==0&&_e.isub(N)),{div:Ee.div,mod:_e}):N.length>this.length||this.cmp(N)<0?{div:new f(0),mod:this}:N.length===1?H==="div"?{div:this.divn(N.words[0]),mod:null}:H==="mod"?{div:null,mod:new f(this.modn(N.words[0]))}:{div:this.divn(N.words[0]),mod:new f(this.modn(N.words[0]))}:this._wordDiv(N,H)},f.prototype.div=function(N){return this.divmod(N,"div",!1).div},f.prototype.mod=function(N){return this.divmod(N,"mod",!1).mod},f.prototype.umod=function(N){return this.divmod(N,"mod",!0).mod},f.prototype.divRound=function(N){var H=this.divmod(N);if(H.mod.isZero())return H.div;var te=H.div.negative!==0?H.mod.isub(N):H.mod,oe=N.ushrn(1),_e=N.andln(1),Ee=te.cmp(oe);return Ee<0||_e===1&&Ee===0?H.div:H.div.negative!==0?H.div.isubn(1):H.div.iaddn(1)},f.prototype.modn=function(N){u(N<=67108863);for(var H=(1<<26)%N,te=0,oe=this.length-1;oe>=0;oe--)te=(H*te+(this.words[oe]|0))%N;return te},f.prototype.idivn=function(N){u(N<=67108863);for(var H=0,te=this.length-1;te>=0;te--){var oe=(this.words[te]|0)+H*67108864;this.words[te]=oe/N|0,H=oe%N}return this.strip()},f.prototype.divn=function(N){return this.clone().idivn(N)},f.prototype.egcd=function(N){u(N.negative===0),u(!N.isZero());var H=this,te=N.clone();H.negative!==0?H=H.umod(N):H=H.clone();for(var oe=new f(1),_e=new f(0),Ee=new f(0),Ce=new f(1),me=0;H.isEven()&&te.isEven();)H.iushrn(1),te.iushrn(1),++me;for(var ie=te.clone(),Se=H.clone();!H.isZero();){for(var Le=0,Ae=1;(H.words[0]&Ae)===0&&Le<26;++Le,Ae<<=1);if(Le>0)for(H.iushrn(Le);Le-- >0;)(oe.isOdd()||_e.isOdd())&&(oe.iadd(ie),_e.isub(Se)),oe.iushrn(1),_e.iushrn(1);for(var Fe=0,Pe=1;(te.words[0]&Pe)===0&&Fe<26;++Fe,Pe<<=1);if(Fe>0)for(te.iushrn(Fe);Fe-- >0;)(Ee.isOdd()||Ce.isOdd())&&(Ee.iadd(ie),Ce.isub(Se)),Ee.iushrn(1),Ce.iushrn(1);H.cmp(te)>=0?(H.isub(te),oe.isub(Ee),_e.isub(Ce)):(te.isub(H),Ee.isub(oe),Ce.isub(_e))}return{a:Ee,b:Ce,gcd:te.iushln(me)}},f.prototype._invmp=function(N){u(N.negative===0),u(!N.isZero());var H=this,te=N.clone();H.negative!==0?H=H.umod(N):H=H.clone();for(var oe=new f(1),_e=new f(0),Ee=te.clone();H.cmpn(1)>0&&te.cmpn(1)>0;){for(var Ce=0,me=1;(H.words[0]&me)===0&&Ce<26;++Ce,me<<=1);if(Ce>0)for(H.iushrn(Ce);Ce-- >0;)oe.isOdd()&&oe.iadd(Ee),oe.iushrn(1);for(var ie=0,Se=1;(te.words[0]&Se)===0&&ie<26;++ie,Se<<=1);if(ie>0)for(te.iushrn(ie);ie-- >0;)_e.isOdd()&&_e.iadd(Ee),_e.iushrn(1);H.cmp(te)>=0?(H.isub(te),oe.isub(_e)):(te.isub(H),_e.isub(oe))}var Le;return H.cmpn(1)===0?Le=oe:Le=_e,Le.cmpn(0)<0&&Le.iadd(N),Le},f.prototype.gcd=function(N){if(this.isZero())return N.abs();if(N.isZero())return this.abs();var H=this.clone(),te=N.clone();H.negative=0,te.negative=0;for(var oe=0;H.isEven()&&te.isEven();oe++)H.iushrn(1),te.iushrn(1);do{for(;H.isEven();)H.iushrn(1);for(;te.isEven();)te.iushrn(1);var _e=H.cmp(te);if(_e<0){var Ee=H;H=te,te=Ee}else if(_e===0||te.cmpn(1)===0)break;H.isub(te)}while(!0);return te.iushln(oe)},f.prototype.invm=function(N){return this.egcd(N).a.umod(N)},f.prototype.isEven=function(){return(this.words[0]&1)===0},f.prototype.isOdd=function(){return(this.words[0]&1)===1},f.prototype.andln=function(N){return this.words[0]&N},f.prototype.bincn=function(N){u(typeof N=="number");var H=N%26,te=(N-H)/26,oe=1<<H;if(this.length<=te)return this._expand(te+1),this.words[te]|=oe,this;for(var _e=oe,Ee=te;_e!==0&&Ee<this.length;Ee++){var Ce=this.words[Ee]|0;Ce+=_e,_e=Ce>>>26,Ce&=67108863,this.words[Ee]=Ce}return _e!==0&&(this.words[Ee]=_e,this.length++),this},f.prototype.isZero=function(){return this.length===1&&this.words[0]===0},f.prototype.cmpn=function(N){var H=N<0;if(this.negative!==0&&!H)return-1;if(this.negative===0&&H)return 1;this.strip();var te;if(this.length>1)te=1;else{H&&(N=-N),u(N<=67108863,"Number is too big");var oe=this.words[0]|0;te=oe===N?0:oe<N?-1:1}return this.negative!==0?-te|0:te},f.prototype.cmp=function(N){if(this.negative!==0&&N.negative===0)return-1;if(this.negative===0&&N.negative!==0)return 1;var H=this.ucmp(N);return this.negative!==0?-H|0:H},f.prototype.ucmp=function(N){if(this.length>N.length)return 1;if(this.length<N.length)return-1;for(var H=0,te=this.length-1;te>=0;te--){var oe=this.words[te]|0,_e=N.words[te]|0;if(oe!==_e){oe<_e?H=-1:oe>_e&&(H=1);break}}return H},f.prototype.gtn=function(N){return this.cmpn(N)===1},f.prototype.gt=function(N){return this.cmp(N)===1},f.prototype.gten=function(N){return this.cmpn(N)>=0},f.prototype.gte=function(N){return this.cmp(N)>=0},f.prototype.ltn=function(N){return this.cmpn(N)===-1},f.prototype.lt=function(N){return this.cmp(N)===-1},f.prototype.lten=function(N){return this.cmpn(N)<=0},f.prototype.lte=function(N){return this.cmp(N)<=0},f.prototype.eqn=function(N){return this.cmpn(N)===0},f.prototype.eq=function(N){return this.cmp(N)===0},f.red=function(N){return new G(N)},f.prototype.toRed=function(N){return u(!this.red,"Already a number in reduction context"),u(this.negative===0,"red works only with positives"),N.convertTo(this)._forceRed(N)},f.prototype.fromRed=function(){return u(this.red,"fromRed works only with numbers in reduction context"),this.red.convertFrom(this)},f.prototype._forceRed=function(N){return this.red=N,this},f.prototype.forceRed=function(N){return u(!this.red,"Already a number in reduction context"),this._forceRed(N)},f.prototype.redAdd=function(N){return u(this.red,"redAdd works only with red numbers"),this.red.add(this,N)},f.prototype.redIAdd=function(N){return u(this.red,"redIAdd works only with red numbers"),this.red.iadd(this,N)},f.prototype.redSub=function(N){return u(this.red,"redSub works only with red numbers"),this.red.sub(this,N)},f.prototype.redISub=function(N){return u(this.red,"redISub works only with red numbers"),this.red.isub(this,N)},f.prototype.redShl=function(N){return u(this.red,"redShl works only with red numbers"),this.red.shl(this,N)},f.prototype.redMul=function(N){return u(this.red,"redMul works only with red numbers"),this.red._verify2(this,N),this.red.mul(this,N)},f.prototype.redIMul=function(N){return u(this.red,"redMul works only with red numbers"),this.red._verify2(this,N),this.red.imul(this,N)},f.prototype.redSqr=function(){return u(this.red,"redSqr works only with red numbers"),this.red._verify1(this),this.red.sqr(this)},f.prototype.redISqr=function(){return u(this.red,"redISqr works only with red numbers"),this.red._verify1(this),this.red.isqr(this)},f.prototype.redSqrt=function(){return u(this.red,"redSqrt works only with red numbers"),this.red._verify1(this),this.red.sqrt(this)},f.prototype.redInvm=function(){return u(this.red,"redInvm works only with red numbers"),this.red._verify1(this),this.red.invm(this)},f.prototype.redNeg=function(){return u(this.red,"redNeg works only with red numbers"),this.red._verify1(this),this.red.neg(this)},f.prototype.redPow=function(N){return u(this.red&&!N.red,"redPow(normalNum)"),this.red._verify1(this),this.red.pow(this,N)};var g={k256:null,p224:null,p192:null,p25519:null};function P(j,N){this.name=j,this.p=new f(N,16),this.n=this.p.bitLength(),this.k=new f(1).iushln(this.n).isub(this.p),this.tmp=this._tmp()}P.prototype._tmp=function(){var N=new f(null);return N.words=new Array(Math.ceil(this.n/13)),N},P.prototype.ireduce=function(N){var H=N,te;do this.split(H,this.tmp),H=this.imulK(H),H=H.iadd(this.tmp),te=H.bitLength();while(te>this.n);var oe=te<this.n?-1:H.ucmp(this.p);return oe===0?(H.words[0]=0,H.length=1):oe>0?H.isub(this.p):H.strip!==void 0?H.strip():H._strip(),H},P.prototype.split=function(N,H){N.iushrn(this.n,0,H)},P.prototype.imulK=function(N){return N.imul(this.k)};function T(){P.call(this,"k256","ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffc2f")}c(T,P),T.prototype.split=function(N,H){for(var te=4194303,oe=Math.min(N.length,9),_e=0;_e<oe;_e++)H.words[_e]=N.words[_e];if(H.length=oe,N.length<=9){N.words[0]=0,N.length=1;return}var Ee=N.words[9];for(H.words[H.length++]=Ee&te,_e=10;_e<N.length;_e++){var Ce=N.words[_e]|0;N.words[_e-10]=(Ce&te)<<4|Ee>>>22,Ee=Ce}Ee>>>=22,N.words[_e-10]=Ee,Ee===0&&N.length>10?N.length-=10:N.length-=9},T.prototype.imulK=function(N){N.words[N.length]=0,N.words[N.length+1]=0,N.length+=2;for(var H=0,te=0;te<N.length;te++){var oe=N.words[te]|0;H+=oe*977,N.words[te]=H&67108863,H=oe*64+(H/67108864|0)}return N.words[N.length-1]===0&&(N.length--,N.words[N.length-1]===0&&N.length--),N};function z(){P.call(this,"p224","ffffffff ffffffff ffffffff ffffffff 00000000 00000000 00000001")}c(z,P);function O(){P.call(this,"p192","ffffffff ffffffff ffffffff fffffffe ffffffff ffffffff")}c(O,P);function V(){P.call(this,"25519","7fffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffed")}c(V,P),V.prototype.imulK=function(N){for(var H=0,te=0;te<N.length;te++){var oe=(N.words[te]|0)*19+H,_e=oe&67108863;oe>>>=26,N.words[te]=_e,H=oe}return H!==0&&(N.words[N.length++]=H),N},f._prime=function(N){if(g[N])return g[N];var H;if(N==="k256")H=new T;else if(N==="p224")H=new z;else if(N==="p192")H=new O;else if(N==="p25519")H=new V;else throw new Error("Unknown prime "+N);return g[N]=H,H};function G(j){if(typeof j=="string"){var N=f._prime(j);this.m=N.p,this.prime=N}else u(j.gtn(1),"modulus must be greater than 1"),this.m=j,this.prime=null}G.prototype._verify1=function(N){u(N.negative===0,"red works only with positives"),u(N.red,"red works only with red numbers")},G.prototype._verify2=function(N,H){u((N.negative|H.negative)===0,"red works only with positives"),u(N.red&&N.red===H.red,"red works only with red numbers")},G.prototype.imod=function(N){return this.prime?this.prime.ireduce(N)._forceRed(this):N.umod(this.m)._forceRed(this)},G.prototype.neg=function(N){return N.isZero()?N.clone():this.m.sub(N)._forceRed(this)},G.prototype.add=function(N,H){this._verify2(N,H);var te=N.add(H);return te.cmp(this.m)>=0&&te.isub(this.m),te._forceRed(this)},G.prototype.iadd=function(N,H){this._verify2(N,H);var te=N.iadd(H);return te.cmp(this.m)>=0&&te.isub(this.m),te},G.prototype.sub=function(N,H){this._verify2(N,H);var te=N.sub(H);return te.cmpn(0)<0&&te.iadd(this.m),te._forceRed(this)},G.prototype.isub=function(N,H){this._verify2(N,H);var te=N.isub(H);return te.cmpn(0)<0&&te.iadd(this.m),te},G.prototype.shl=function(N,H){return this._verify1(N),this.imod(N.ushln(H))},G.prototype.imul=function(N,H){return this._verify2(N,H),this.imod(N.imul(H))},G.prototype.mul=function(N,H){return this._verify2(N,H),this.imod(N.mul(H))},G.prototype.isqr=function(N){return this.imul(N,N.clone())},G.prototype.sqr=function(N){return this.mul(N,N)},G.prototype.sqrt=function(N){if(N.isZero())return N.clone();var H=this.m.andln(3);if(u(H%2===1),H===3){var te=this.m.add(new f(1)).iushrn(2);return this.pow(N,te)}for(var oe=this.m.subn(1),_e=0;!oe.isZero()&&oe.andln(1)===0;)_e++,oe.iushrn(1);u(!oe.isZero());var Ee=new f(1).toRed(this),Ce=Ee.redNeg(),me=this.m.subn(1).iushrn(1),ie=this.m.bitLength();for(ie=new f(2*ie*ie).toRed(this);this.pow(ie,me).cmp(Ce)!==0;)ie.redIAdd(Ce);for(var Se=this.pow(ie,oe),Le=this.pow(N,oe.addn(1).iushrn(1)),Ae=this.pow(N,oe),Fe=_e;Ae.cmp(Ee)!==0;){for(var Pe=Ae,ge=0;Pe.cmp(Ee)!==0;ge++)Pe=Pe.redSqr();u(ge<Fe);var Re=this.pow(Se,new f(1).iushln(Fe-ge-1));Le=Le.redMul(Re),Se=Re.redSqr(),Ae=Ae.redMul(Se),Fe=ge}return Le},G.prototype.invm=function(N){var H=N._invmp(this.m);return H.negative!==0?(H.negative=0,this.imod(H).redNeg()):this.imod(H)},G.prototype.pow=function(N,H){if(H.isZero())return new f(1).toRed(this);if(H.cmpn(1)===0)return N.clone();var te=4,oe=new Array(1<<te);oe[0]=new f(1).toRed(this),oe[1]=N;for(var _e=2;_e<oe.length;_e++)oe[_e]=this.mul(oe[_e-1],N);var Ee=oe[0],Ce=0,me=0,ie=H.bitLength()%26;for(ie===0&&(ie=26),_e=H.length-1;_e>=0;_e--){for(var Se=H.words[_e],Le=ie-1;Le>=0;Le--){var Ae=Se>>Le&1;if(Ee!==oe[0]&&(Ee=this.sqr(Ee)),Ae===0&&Ce===0){me=0;continue}Ce<<=1,Ce|=Ae,me++,!(me!==te&&(_e!==0||Le!==0))&&(Ee=this.mul(Ee,oe[Ce]),me=0,Ce=0)}ie=26}return Ee},G.prototype.convertTo=function(N){var H=N.umod(this.m);return H===N?H.clone():H},G.prototype.convertFrom=function(N){var H=N.clone();return H.red=null,H},f.mont=function(N){return new Z(N)};function Z(j){G.call(this,j),this.shift=this.m.bitLength(),this.shift%26!==0&&(this.shift+=26-this.shift%26),this.r=new f(1).iushln(this.shift),this.r2=this.imod(this.r.sqr()),this.rinv=this.r._invmp(this.m),this.minv=this.rinv.mul(this.r).isubn(1).div(this.m),this.minv=this.minv.umod(this.r),this.minv=this.r.sub(this.minv)}c(Z,G),Z.prototype.convertTo=function(N){return this.imod(N.ushln(this.shift))},Z.prototype.convertFrom=function(N){var H=this.imod(N.mul(this.rinv));return H.red=null,H},Z.prototype.imul=function(N,H){if(N.isZero()||H.isZero())return N.words[0]=0,N.length=1,N;var te=N.imul(H),oe=te.maskn(this.shift).mul(this.minv).imaskn(this.shift).mul(this.m),_e=te.isub(oe).iushrn(this.shift),Ee=_e;return _e.cmp(this.m)>=0?Ee=_e.isub(this.m):_e.cmpn(0)<0&&(Ee=_e.iadd(this.m)),Ee._forceRed(this)},Z.prototype.mul=function(N,H){if(N.isZero()||H.isZero())return new f(0)._forceRed(this);var te=N.mul(H),oe=te.maskn(this.shift).mul(this.minv).imaskn(this.shift).mul(this.m),_e=te.isub(oe).iushrn(this.shift),Ee=_e;return _e.cmp(this.m)>=0?Ee=_e.isub(this.m):_e.cmpn(0)<0&&(Ee=_e.iadd(this.m)),Ee._forceRed(this)},Z.prototype.invm=function(N){var H=this.imod(N._invmp(this.m).mul(this.r2));return H._forceRed(this)}}(i,this)},6860:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]-l[0],o[1]=s[1]-l[1],o[2]=s[2]-l[2],o[3]=s[3]-l[3],o}},6864:function(i){i.exports=a;function a(){var o=new Float32Array(16);return o[0]=1,o[1]=0,o[2]=0,o[3]=0,o[4]=0,o[5]=1,o[6]=0,o[7]=0,o[8]=0,o[9]=0,o[10]=1,o[11]=0,o[12]=0,o[13]=0,o[14]=0,o[15]=1,o}},6867:function(i,a,o){"use strict";i.exports=p;var s=o(1888),l=o(855),u=o(7150);function c(k,E){for(var S=0;S<k;++S)if(!(E[S]<=E[S+k]))return!0;return!1}function f(k,E,S,L){for(var x=0,C=0,M=0,g=k.length;M<g;++M){var P=k[M];if(!c(E,P)){for(var T=0;T<2*E;++T)S[x++]=P[T];L[C++]=M}}return C}function h(k,E,S,L){var x=k.length,C=E.length;if(!(x<=0||C<=0)){var M=k[0].length>>>1;if(!(M<=0)){var g,P=s.mallocDouble(2*M*x),T=s.mallocInt32(x);if(x=f(k,M,P,T),x>0){if(M===1&&L)l.init(x),g=l.sweepComplete(M,S,0,x,P,T,0,x,P,T);else{var z=s.mallocDouble(2*M*C),O=s.mallocInt32(C);C=f(E,M,z,O),C>0&&(l.init(x+C),M===1?g=l.sweepBipartite(M,S,0,x,P,T,0,C,z,O):g=u(M,S,L,x,P,T,C,z,O),s.free(z),s.free(O))}s.free(P),s.free(T)}return g}}}var d;function v(k,E){d.push([k,E])}function _(k){return d=[],h(k,k,v,!0),d}function b(k,E){return d=[],h(k,E,v,!1),d}function p(k,E,S){switch(arguments.length){case 1:return _(k);case 2:return typeof E=="function"?h(k,k,E,!0):b(k,E);case 3:return h(k,E,S,!1);default:throw new Error("box-intersect: Invalid arguments")}}},6894:function(i){i.exports=a;function a(o,s,l,u){var c=l[1],f=l[2],h=s[1]-c,d=s[2]-f,v=Math.sin(u),_=Math.cos(u);return o[0]=s[0],o[1]=c+h*_-d*v,o[2]=f+h*v+d*_,o}},7004:function(i){"use strict";i.exports=a;function a(o){for(var s=o.length,l=o[o.length-1],u=s,c=s-2;c>=0;--c){var f=l,h=o[c];l=f+h;var d=l-f,v=h-d;v&&(o[--u]=l,l=v)}for(var _=0,c=u;c<s;++c){var f=o[c],h=l;l=f+h;var d=l-f,v=h-d;v&&(o[_++]=v)}return o[_++]=l,o.length=_,o}},7056:function(i){i.exports=a;function a(o,s){var l=s[0]-o[0],u=s[1]-o[1],c=s[2]-o[2];return Math.sqrt(l*l+u*u+c*c)}},7089:function(i){i.exports=a;function a(o,s,l){var u=Math.sin(l),c=Math.cos(l),f=s[0],h=s[1],d=s[2],v=s[3],_=s[4],b=s[5],p=s[6],k=s[7];return s!==o&&(o[8]=s[8],o[9]=s[9],o[10]=s[10],o[11]=s[11],o[12]=s[12],o[13]=s[13],o[14]=s[14],o[15]=s[15]),o[0]=f*c+_*u,o[1]=h*c+b*u,o[2]=d*c+p*u,o[3]=v*c+k*u,o[4]=_*c-f*u,o[5]=b*c-h*u,o[6]=p*c-d*u,o[7]=k*c-v*u,o}},7150:function(i,a,o){"use strict";i.exports=j;var s=o(1888),l=o(8828),u=o(2455),c=u.partial,f=u.full,h=o(855),d=o(3545),v=o(8105),_=128,b=1<<22,p=1<<22,k=v("!(lo>=p0)&&!(p1>=hi)"),E=v("lo===p0"),S=v("lo<p0"),L=v("hi<=p0"),x=v("lo<=p0&&p0<=hi"),C=v("lo<p0&&p0<=hi"),M=6,g=2,P=1024,T=s.mallocInt32(P),z=s.mallocDouble(P);function O(N,H){var te=8*l.log2(H+1)*(N+1)|0,oe=l.nextPow2(M*te);T.length<oe&&(s.free(T),T=s.mallocInt32(oe));var _e=l.nextPow2(g*te);z.length<_e&&(s.free(z),z=s.mallocDouble(_e))}function V(N,H,te,oe,_e,Ee,Ce,me,ie){var Se=M*N;T[Se]=H,T[Se+1]=te,T[Se+2]=oe,T[Se+3]=_e,T[Se+4]=Ee,T[Se+5]=Ce;var Le=g*N;z[Le]=me,z[Le+1]=ie}function G(N,H,te,oe,_e,Ee,Ce,me,ie,Se,Le){var Ae=2*N,Fe=ie*Ae,Pe=Se[Fe+H];e:for(var ge=_e,Re=_e*Ae;ge<Ee;++ge,Re+=Ae){var ce=Ce[Re+H],Ze=Ce[Re+H+N];if(!(Pe<ce||Ze<Pe)&&!(oe&&Pe===ce)){for(var ut=me[ge],pt=H+1;pt<N;++pt){var ce=Ce[Re+pt],Ze=Ce[Re+pt+N],Zt=Se[Fe+pt],st=Se[Fe+pt+N];if(Ze<Zt||st<ce)continue e}var lt;if(oe?lt=te(Le,ut):lt=te(ut,Le),lt!==void 0)return lt}}}function Z(N,H,te,oe,_e,Ee,Ce,me,ie,Se){var Le=2*N,Ae=me*Le,Fe=ie[Ae+H];e:for(var Pe=oe,ge=oe*Le;Pe<_e;++Pe,ge+=Le){var Re=Ce[Pe];if(Re!==Se){var ce=Ee[ge+H],Ze=Ee[ge+H+N];if(!(Fe<ce||Ze<Fe)){for(var ut=H+1;ut<N;++ut){var ce=Ee[ge+ut],Ze=Ee[ge+ut+N],pt=ie[Ae+ut],Zt=ie[Ae+ut+N];if(Ze<pt||Zt<ce)continue e}var st=te(Re,Se);if(st!==void 0)return st}}}}function j(N,H,te,oe,_e,Ee,Ce,me,ie){O(N,oe+Ce);var Se=0,Le=2*N,Ae;for(V(Se++,0,0,oe,0,Ce,te?16:0,-1/0,1/0),te||V(Se++,0,0,Ce,0,oe,1,-1/0,1/0);Se>0;){Se-=1;var Fe=Se*M,Pe=T[Fe],ge=T[Fe+1],Re=T[Fe+2],ce=T[Fe+3],Ze=T[Fe+4],ut=T[Fe+5],pt=Se*g,Zt=z[pt],st=z[pt+1],lt=ut&1,Gt=!!(ut&16),Nt=_e,Jt=Ee,sr=me,wr=ie;if(lt&&(Nt=me,Jt=ie,sr=_e,wr=Ee),!(ut&2&&(Re=S(N,Pe,ge,Re,Nt,Jt,st),ge>=Re))&&!(ut&4&&(ge=L(N,Pe,ge,Re,Nt,Jt,Zt),ge>=Re))){var cr=Re-ge,$e=Ze-ce;if(Gt){if(N*cr*(cr+$e)<p){if(Ae=h.scanComplete(N,Pe,H,ge,Re,Nt,Jt,ce,Ze,sr,wr),Ae!==void 0)return Ae;continue}}else if(N*Math.min(cr,$e)<_){if(Ae=c(N,Pe,H,lt,ge,Re,Nt,Jt,ce,Ze,sr,wr),Ae!==void 0)return Ae;continue}else if(N*cr*$e<b){if(Ae=h.scanBipartite(N,Pe,H,lt,ge,Re,Nt,Jt,ce,Ze,sr,wr),Ae!==void 0)return Ae;continue}var St=k(N,Pe,ge,Re,Nt,Jt,Zt,st);if(ge<St)if(N*(St-ge)<_){if(Ae=f(N,Pe+1,H,ge,St,Nt,Jt,ce,Ze,sr,wr),Ae!==void 0)return Ae}else if(Pe===N-2){if(lt?Ae=h.sweepBipartite(N,H,ce,Ze,sr,wr,ge,St,Nt,Jt):Ae=h.sweepBipartite(N,H,ge,St,Nt,Jt,ce,Ze,sr,wr),Ae!==void 0)return Ae}else V(Se++,Pe+1,ge,St,ce,Ze,lt,-1/0,1/0),V(Se++,Pe+1,ce,Ze,ge,St,lt^1,-1/0,1/0);if(St<Re){var Qt=d(N,Pe,ce,Ze,sr,wr),Vt=sr[Le*Qt+Pe],_t=E(N,Pe,Qt,Ze,sr,wr,Vt);if(_t<Ze&&V(Se++,Pe,St,Re,_t,Ze,(lt|4)+(Gt?16:0),Vt,st),ce<Qt&&V(Se++,Pe,St,Re,ce,Qt,(lt|2)+(Gt?16:0),Zt,Vt),Qt+1===_t){if(Gt?Ae=Z(N,Pe,H,St,Re,Nt,Jt,Qt,sr,wr[Qt]):Ae=G(N,Pe,H,lt,St,Re,Nt,Jt,Qt,sr,wr[Qt]),Ae!==void 0)return Ae}else if(Qt<_t){var It;if(Gt){if(It=x(N,Pe,St,Re,Nt,Jt,Vt),St<It){var mt=E(N,Pe,St,It,Nt,Jt,Vt);if(Pe===N-2){if(St<mt&&(Ae=h.sweepComplete(N,H,St,mt,Nt,Jt,Qt,_t,sr,wr),Ae!==void 0)||mt<It&&(Ae=h.sweepBipartite(N,H,mt,It,Nt,Jt,Qt,_t,sr,wr),Ae!==void 0))return Ae}else St<mt&&V(Se++,Pe+1,St,mt,Qt,_t,16,-1/0,1/0),mt<It&&(V(Se++,Pe+1,mt,It,Qt,_t,0,-1/0,1/0),V(Se++,Pe+1,Qt,_t,mt,It,1,-1/0,1/0))}}else lt?It=C(N,Pe,St,Re,Nt,Jt,Vt):It=x(N,Pe,St,Re,Nt,Jt,Vt),St<It&&(Pe===N-2?lt?Ae=h.sweepBipartite(N,H,Qt,_t,sr,wr,St,It,Nt,Jt):Ae=h.sweepBipartite(N,H,St,It,Nt,Jt,Qt,_t,sr,wr):(V(Se++,Pe+1,St,It,Qt,_t,lt,-1/0,1/0),V(Se++,Pe+1,Qt,_t,St,It,lt^1,-1/0,1/0)))}}}}}},7163:function(i){i.exports=function(s){return s!=null&&(a(s)||o(s)||!!s._isBuffer)};function a(s){return!!s.constructor&&typeof s.constructor.isBuffer=="function"&&s.constructor.isBuffer(s)}function o(s){return typeof s.readFloatLE=="function"&&typeof s.slice=="function"&&a(s.slice(0,0))}},7169:function(i,a,o){"use strict";var s=typeof WeakMap=="undefined"?o(1538):WeakMap,l=o(2762),u=o(8116),c=new s;function f(h){var d=c.get(h),v=d&&(d._triangleBuffer.handle||d._triangleBuffer.buffer);if(!v||!h.isBuffer(v)){var _=l(h,new Float32Array([-1,-1,-1,4,4,-1]));d=u(h,[{buffer:_,type:h.FLOAT,size:2}]),d._triangleBuffer=_,c.set(h,d)}d.bind(),h.drawArrays(h.TRIANGLES,0,3),d.unbind()}i.exports=f},7182:function(i,a,o){var s={identity:o(7894),translate:o(7656),multiply:o(6760),create:o(6864),scale:o(2504),fromRotationTranslation:o(6743)},l=s.create(),u=s.create();i.exports=function(f,h,d,v,_,b){return s.identity(f),s.fromRotationTranslation(f,b,h),f[3]=_[0],f[7]=_[1],f[11]=_[2],f[15]=_[3],s.identity(u),v[2]!==0&&(u[9]=v[2],s.multiply(f,f,u)),v[1]!==0&&(u[9]=0,u[8]=v[1],s.multiply(f,f,u)),v[0]!==0&&(u[8]=0,u[4]=v[0],s.multiply(f,f,u)),s.scale(f,f,d),f}},7201:function(i,a,o){"use strict";var s=1e-6,l=1e-6,u=o(9405),c=o(2762),f=o(8116),h=o(7766),d=o(8406),v=o(6760),_=o(7608),b=o(9618),p=o(6729),k=o(7765),E=o(1888),S=o(840),L=o(7626),x=S.meshShader,C=S.wireShader,M=S.pointShader,g=S.pickShader,P=S.pointPickShader,T=S.contourShader,z=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];function O(me,ie,Se,Le,Ae,Fe,Pe,ge,Re,ce,Ze,ut,pt,Zt,st,lt,Gt,Nt,Jt,sr,wr,cr,$e,St,Qt,Vt,_t){this.gl=me,this.pixelRatio=1,this.cells=[],this.positions=[],this.intensity=[],this.texture=ie,this.dirty=!0,this.triShader=Se,this.lineShader=Le,this.pointShader=Ae,this.pickShader=Fe,this.pointPickShader=Pe,this.contourShader=ge,this.trianglePositions=Re,this.triangleColors=Ze,this.triangleNormals=pt,this.triangleUVs=ut,this.triangleIds=ce,this.triangleVAO=Zt,this.triangleCount=0,this.lineWidth=1,this.edgePositions=st,this.edgeColors=Gt,this.edgeUVs=Nt,this.edgeIds=lt,this.edgeVAO=Jt,this.edgeCount=0,this.pointPositions=sr,this.pointColors=cr,this.pointUVs=$e,this.pointSizes=St,this.pointIds=wr,this.pointVAO=Qt,this.pointCount=0,this.contourLineWidth=1,this.contourPositions=Vt,this.contourVAO=_t,this.contourCount=0,this.contourColor=[0,0,0],this.contourEnable=!0,this.pickVertex=!0,this.pickId=1,this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.lightPosition=[1e5,1e5,0],this.ambientLight=.8,this.diffuseLight=.8,this.specularLight=2,this.roughness=.5,this.fresnel=1.5,this.opacity=1,this.hasAlpha=!1,this.opacityscale=!1,this._model=z,this._view=z,this._projection=z,this._resolution=[1,1]}var V=O.prototype;V.isOpaque=function(){return!this.hasAlpha},V.isTransparent=function(){return this.hasAlpha},V.pickSlots=1,V.setPickBase=function(me){this.pickId=me};function G(me,ie){if(!ie||!ie.length)return 1;for(var Se=0;Se<ie.length;++Se){if(ie.length<2)return 1;if(ie[Se][0]===me)return ie[Se][1];if(ie[Se][0]>me&&Se>0){var Le=(ie[Se][0]-me)/(ie[Se][0]-ie[Se-1][0]);return ie[Se][1]*(1-Le)+Le*ie[Se-1][1]}}return 1}function Z(me,ie){for(var Se=p({colormap:me,nshades:256,format:"rgba"}),Le=new Uint8Array(256*4),Ae=0;Ae<256;++Ae){for(var Fe=Se[Ae],Pe=0;Pe<3;++Pe)Le[4*Ae+Pe]=Fe[Pe];ie?Le[4*Ae+3]=255*G(Ae/255,ie):Le[4*Ae+3]=255*Fe[3]}return b(Le,[256,256,4],[4,0,1])}function j(me){for(var ie=me.length,Se=new Array(ie),Le=0;Le<ie;++Le)Se[Le]=me[Le][2];return Se}V.highlight=function(me){if(!me||!this.contourEnable){this.contourCount=0;return}for(var ie=k(this.cells,this.intensity,me.intensity),Se=ie.cells,Le=ie.vertexIds,Ae=ie.vertexWeights,Fe=Se.length,Pe=E.mallocFloat32(2*3*Fe),ge=0,Re=0;Re<Fe;++Re)for(var ce=Se[Re],Ze=0;Ze<2;++Ze){var ut=ce[0];ce.length===2&&(ut=ce[Ze]);for(var pt=Le[ut][0],Zt=Le[ut][1],st=Ae[ut],lt=1-st,Gt=this.positions[pt],Nt=this.positions[Zt],Jt=0;Jt<3;++Jt)Pe[ge++]=st*Gt[Jt]+lt*Nt[Jt]}this.contourCount=ge/3|0,this.contourPositions.update(Pe.subarray(0,ge)),E.free(Pe)},V.update=function(me){me=me||{};var ie=this.gl;this.dirty=!0,"contourEnable"in me&&(this.contourEnable=me.contourEnable),"contourColor"in me&&(this.contourColor=me.contourColor),"lineWidth"in me&&(this.lineWidth=me.lineWidth),"lightPosition"in me&&(this.lightPosition=me.lightPosition),this.hasAlpha=!1,"opacity"in me&&(this.opacity=me.opacity,this.opacity<1&&(this.hasAlpha=!0)),"opacityscale"in me&&(this.opacityscale=me.opacityscale,this.hasAlpha=!0),"ambient"in me&&(this.ambientLight=me.ambient),"diffuse"in me&&(this.diffuseLight=me.diffuse),"specular"in me&&(this.specularLight=me.specular),"roughness"in me&&(this.roughness=me.roughness),"fresnel"in me&&(this.fresnel=me.fresnel),me.texture?(this.texture.dispose(),this.texture=h(ie,me.texture)):me.colormap&&(this.texture.shape=[256,256],this.texture.minFilter=ie.LINEAR_MIPMAP_LINEAR,this.texture.magFilter=ie.LINEAR,this.texture.setPixels(Z(me.colormap,this.opacityscale)),this.texture.generateMipmap());var Se=me.cells,Le=me.positions;if(!(!Le||!Se)){var Ae=[],Fe=[],Pe=[],ge=[],Re=[],ce=[],Ze=[],ut=[],pt=[],Zt=[],st=[],lt=[],Gt=[],Nt=[];this.cells=Se,this.positions=Le;var Jt=me.vertexNormals,sr=me.cellNormals,wr=me.vertexNormalsEpsilon===void 0?s:me.vertexNormalsEpsilon,cr=me.faceNormalsEpsilon===void 0?l:me.faceNormalsEpsilon;me.useFacetNormals&&!sr&&(sr=d.faceNormals(Se,Le,cr)),!sr&&!Jt&&(Jt=d.vertexNormals(Se,Le,wr));var $e=me.vertexColors,St=me.cellColors,Qt=me.meshColor||[1,1,1,1],Vt=me.vertexUVs,_t=me.vertexIntensity,It=me.cellUVs,mt=me.cellIntensity,er=1/0,lr=-1/0;if(!Vt&&!It)if(_t)if(me.vertexIntensityBounds)er=+me.vertexIntensityBounds[0],lr=+me.vertexIntensityBounds[1];else for(var Tr=0;Tr<_t.length;++Tr){var Lr=_t[Tr];er=Math.min(er,Lr),lr=Math.max(lr,Lr)}else if(mt)if(me.cellIntensityBounds)er=+me.cellIntensityBounds[0],lr=+me.cellIntensityBounds[1];else for(var Tr=0;Tr<mt.length;++Tr){var Lr=mt[Tr];er=Math.min(er,Lr),lr=Math.max(lr,Lr)}else for(var Tr=0;Tr<Le.length;++Tr){var Lr=Le[Tr][2];er=Math.min(er,Lr),lr=Math.max(lr,Lr)}_t?this.intensity=_t:mt?this.intensity=mt:this.intensity=j(Le),this.pickVertex=!(mt||St);var ti=me.pointSizes,Br=me.pointSize||1;this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]];for(var Tr=0;Tr<Le.length;++Tr)for(var Vr=Le[Tr],dt=0;dt<3;++dt)isNaN(Vr[dt])||!isFinite(Vr[dt])||(this.bounds[0][dt]=Math.min(this.bounds[0][dt],Vr[dt]),this.bounds[1][dt]=Math.max(this.bounds[1][dt],Vr[dt]));var Ge=0,Je=0,je=0;e:for(var Tr=0;Tr<Se.length;++Tr){var tt=Se[Tr];switch(tt.length){case 1:for(var xt=tt[0],Vr=Le[xt],dt=0;dt<3;++dt)if(isNaN(Vr[dt])||!isFinite(Vr[dt]))continue e;Zt.push(Vr[0],Vr[1],Vr[2]);var Ie;$e?Ie=$e[xt]:St?Ie=St[Tr]:Ie=Qt,this.opacityscale&&_t?Fe.push(Ie[0],Ie[1],Ie[2],this.opacity*G((_t[xt]-er)/(lr-er),this.opacityscale)):Ie.length===3?st.push(Ie[0],Ie[1],Ie[2],this.opacity):(st.push(Ie[0],Ie[1],Ie[2],Ie[3]*this.opacity),Ie[3]<1&&(this.hasAlpha=!0));var xe;Vt?xe=Vt[xt]:_t?xe=[(_t[xt]-er)/(lr-er),0]:It?xe=It[Tr]:mt?xe=[(mt[Tr]-er)/(lr-er),0]:xe=[(Vr[2]-er)/(lr-er),0],lt.push(xe[0],xe[1]),ti?Gt.push(ti[xt]):Gt.push(Br),Nt.push(Tr),je+=1;break;case 2:for(var dt=0;dt<2;++dt)for(var xt=tt[dt],Vr=Le[xt],ke=0;ke<3;++ke)if(isNaN(Vr[ke])||!isFinite(Vr[ke]))continue e;for(var dt=0;dt<2;++dt){var xt=tt[dt],Vr=Le[xt];ce.push(Vr[0],Vr[1],Vr[2]);var Ie;$e?Ie=$e[xt]:St?Ie=St[Tr]:Ie=Qt,this.opacityscale&&_t?Fe.push(Ie[0],Ie[1],Ie[2],this.opacity*G((_t[xt]-er)/(lr-er),this.opacityscale)):Ie.length===3?Ze.push(Ie[0],Ie[1],Ie[2],this.opacity):(Ze.push(Ie[0],Ie[1],Ie[2],Ie[3]*this.opacity),Ie[3]<1&&(this.hasAlpha=!0));var xe;Vt?xe=Vt[xt]:_t?xe=[(_t[xt]-er)/(lr-er),0]:It?xe=It[Tr]:mt?xe=[(mt[Tr]-er)/(lr-er),0]:xe=[(Vr[2]-er)/(lr-er),0],ut.push(xe[0],xe[1]),pt.push(Tr)}Je+=1;break;case 3:for(var dt=0;dt<3;++dt)for(var xt=tt[dt],Vr=Le[xt],ke=0;ke<3;++ke)if(isNaN(Vr[ke])||!isFinite(Vr[ke]))continue e;for(var dt=0;dt<3;++dt){var xt=tt[2-dt],Vr=Le[xt];Ae.push(Vr[0],Vr[1],Vr[2]);var Ie;$e?Ie=$e[xt]:St?Ie=St[Tr]:Ie=Qt,Ie?this.opacityscale&&_t?Fe.push(Ie[0],Ie[1],Ie[2],this.opacity*G((_t[xt]-er)/(lr-er),this.opacityscale)):Ie.length===3?Fe.push(Ie[0],Ie[1],Ie[2],this.opacity):(Fe.push(Ie[0],Ie[1],Ie[2],Ie[3]*this.opacity),Ie[3]<1&&(this.hasAlpha=!0)):Fe.push(.5,.5,.5,1);var xe;Vt?xe=Vt[xt]:_t?xe=[(_t[xt]-er)/(lr-er),0]:It?xe=It[Tr]:mt?xe=[(mt[Tr]-er)/(lr-er),0]:xe=[(Vr[2]-er)/(lr-er),0],ge.push(xe[0],xe[1]);var vt;Jt?vt=Jt[xt]:vt=sr[Tr],Pe.push(vt[0],vt[1],vt[2]),Re.push(Tr)}Ge+=1;break;default:break}}this.pointCount=je,this.edgeCount=Je,this.triangleCount=Ge,this.pointPositions.update(Zt),this.pointColors.update(st),this.pointUVs.update(lt),this.pointSizes.update(Gt),this.pointIds.update(new Uint32Array(Nt)),this.edgePositions.update(ce),this.edgeColors.update(Ze),this.edgeUVs.update(ut),this.edgeIds.update(new Uint32Array(pt)),this.trianglePositions.update(Ae),this.triangleColors.update(Fe),this.triangleUVs.update(ge),this.triangleNormals.update(Pe),this.triangleIds.update(new Uint32Array(Re))}},V.drawTransparent=V.draw=function(me){me=me||{};for(var ie=this.gl,Se=me.model||z,Le=me.view||z,Ae=me.projection||z,Fe=[[-1e6,-1e6,-1e6],[1e6,1e6,1e6]],Pe=0;Pe<3;++Pe)Fe[0][Pe]=Math.max(Fe[0][Pe],this.clipBounds[0][Pe]),Fe[1][Pe]=Math.min(Fe[1][Pe],this.clipBounds[1][Pe]);var ge={model:Se,view:Le,projection:Ae,inverseModel:z.slice(),clipBounds:Fe,kambient:this.ambientLight,kdiffuse:this.diffuseLight,kspecular:this.specularLight,roughness:this.roughness,fresnel:this.fresnel,eyePosition:[0,0,0],lightPosition:[0,0,0],contourColor:this.contourColor,texture:0};ge.inverseModel=_(ge.inverseModel,ge.model),ie.disable(ie.CULL_FACE),this.texture.bind(0);var Re=new Array(16);v(Re,ge.view,ge.model),v(Re,ge.projection,Re),_(Re,Re);for(var Pe=0;Pe<3;++Pe)ge.eyePosition[Pe]=Re[12+Pe]/Re[15];for(var ce=Re[15],Pe=0;Pe<3;++Pe)ce+=this.lightPosition[Pe]*Re[4*Pe+3];for(var Pe=0;Pe<3;++Pe){for(var Ze=Re[12+Pe],ut=0;ut<3;++ut)Ze+=Re[4*ut+Pe]*this.lightPosition[ut];ge.lightPosition[Pe]=Ze/ce}if(this.triangleCount>0){var pt=this.triShader;pt.bind(),pt.uniforms=ge,this.triangleVAO.bind(),ie.drawArrays(ie.TRIANGLES,0,this.triangleCount*3),this.triangleVAO.unbind()}if(this.edgeCount>0&&this.lineWidth>0){var pt=this.lineShader;pt.bind(),pt.uniforms=ge,this.edgeVAO.bind(),ie.lineWidth(this.lineWidth*this.pixelRatio),ie.drawArrays(ie.LINES,0,this.edgeCount*2),this.edgeVAO.unbind()}if(this.pointCount>0){var pt=this.pointShader;pt.bind(),pt.uniforms=ge,this.pointVAO.bind(),ie.drawArrays(ie.POINTS,0,this.pointCount),this.pointVAO.unbind()}if(this.contourEnable&&this.contourCount>0&&this.contourLineWidth>0){var pt=this.contourShader;pt.bind(),pt.uniforms=ge,this.contourVAO.bind(),ie.drawArrays(ie.LINES,0,this.contourCount),this.contourVAO.unbind()}},V.drawPick=function(me){me=me||{};for(var ie=this.gl,Se=me.model||z,Le=me.view||z,Ae=me.projection||z,Fe=[[-1e6,-1e6,-1e6],[1e6,1e6,1e6]],Pe=0;Pe<3;++Pe)Fe[0][Pe]=Math.max(Fe[0][Pe],this.clipBounds[0][Pe]),Fe[1][Pe]=Math.min(Fe[1][Pe],this.clipBounds[1][Pe]);this._model=[].slice.call(Se),this._view=[].slice.call(Le),this._projection=[].slice.call(Ae),this._resolution=[ie.drawingBufferWidth,ie.drawingBufferHeight];var ge={model:Se,view:Le,projection:Ae,clipBounds:Fe,pickId:this.pickId/255},Re=this.pickShader;if(Re.bind(),Re.uniforms=ge,this.triangleCount>0&&(this.triangleVAO.bind(),ie.drawArrays(ie.TRIANGLES,0,this.triangleCount*3),this.triangleVAO.unbind()),this.edgeCount>0&&(this.edgeVAO.bind(),ie.lineWidth(this.lineWidth*this.pixelRatio),ie.drawArrays(ie.LINES,0,this.edgeCount*2),this.edgeVAO.unbind()),this.pointCount>0){var Re=this.pointPickShader;Re.bind(),Re.uniforms=ge,this.pointVAO.bind(),ie.drawArrays(ie.POINTS,0,this.pointCount),this.pointVAO.unbind()}},V.pick=function(me){if(!me||me.id!==this.pickId)return null;for(var ie=me.value[0]+256*me.value[1]+65536*me.value[2],Se=this.cells[ie],Le=this.positions,Ae=new Array(Se.length),Fe=0;Fe<Se.length;++Fe)Ae[Fe]=Le[Se[Fe]];var Pe=me.coord[0],ge=me.coord[1];if(!this.pickVertex){var Re=this.positions[Se[0]],ce=this.positions[Se[1]],Ze=this.positions[Se[2]],ut=[(Re[0]+ce[0]+Ze[0])/3,(Re[1]+ce[1]+Ze[1])/3,(Re[2]+ce[2]+Ze[2])/3];return{_cellCenter:!0,position:[Pe,ge],index:ie,cell:Se,cellId:ie,intensity:this.intensity[ie],dataCoordinate:ut}}var pt=L(Ae,[Pe*this.pixelRatio,this._resolution[1]-ge*this.pixelRatio],this._model,this._view,this._projection,this._resolution);if(!pt)return null;for(var Zt=pt[2],st=0,Fe=0;Fe<Se.length;++Fe)st+=Zt[Fe]*this.intensity[Se[Fe]];return{position:pt[1],index:Se[pt[0]],cell:Se,cellId:ie,intensity:st,dataCoordinate:this.positions[Se[pt[0]]]}},V.dispose=function(){this.texture.dispose(),this.triShader.dispose(),this.lineShader.dispose(),this.pointShader.dispose(),this.pickShader.dispose(),this.pointPickShader.dispose(),this.triangleVAO.dispose(),this.trianglePositions.dispose(),this.triangleColors.dispose(),this.triangleUVs.dispose(),this.triangleNormals.dispose(),this.triangleIds.dispose(),this.edgeVAO.dispose(),this.edgePositions.dispose(),this.edgeColors.dispose(),this.edgeUVs.dispose(),this.edgeIds.dispose(),this.pointVAO.dispose(),this.pointPositions.dispose(),this.pointColors.dispose(),this.pointUVs.dispose(),this.pointSizes.dispose(),this.pointIds.dispose(),this.contourVAO.dispose(),this.contourPositions.dispose(),this.contourShader.dispose()};function N(me){var ie=u(me,x.vertex,x.fragment);return ie.attributes.position.location=0,ie.attributes.color.location=2,ie.attributes.uv.location=3,ie.attributes.normal.location=4,ie}function H(me){var ie=u(me,C.vertex,C.fragment);return ie.attributes.position.location=0,ie.attributes.color.location=2,ie.attributes.uv.location=3,ie}function te(me){var ie=u(me,M.vertex,M.fragment);return ie.attributes.position.location=0,ie.attributes.color.location=2,ie.attributes.uv.location=3,ie.attributes.pointSize.location=4,ie}function oe(me){var ie=u(me,g.vertex,g.fragment);return ie.attributes.position.location=0,ie.attributes.id.location=1,ie}function _e(me){var ie=u(me,P.vertex,P.fragment);return ie.attributes.position.location=0,ie.attributes.id.location=1,ie.attributes.pointSize.location=4,ie}function Ee(me){var ie=u(me,T.vertex,T.fragment);return ie.attributes.position.location=0,ie}function Ce(me,ie){arguments.length===1&&(ie=me,me=ie.gl);var Se=me.getExtension("OES_standard_derivatives")||me.getExtension("MOZ_OES_standard_derivatives")||me.getExtension("WEBKIT_OES_standard_derivatives");if(!Se)throw new Error("derivatives not supported");var Le=N(me),Ae=H(me),Fe=te(me),Pe=oe(me),ge=_e(me),Re=Ee(me),ce=h(me,b(new Uint8Array([255,255,255,255]),[1,1,4]));ce.generateMipmap(),ce.minFilter=me.LINEAR_MIPMAP_LINEAR,ce.magFilter=me.LINEAR;var Ze=c(me),ut=c(me),pt=c(me),Zt=c(me),st=c(me),lt=f(me,[{buffer:Ze,type:me.FLOAT,size:3},{buffer:st,type:me.UNSIGNED_BYTE,size:4,normalized:!0},{buffer:ut,type:me.FLOAT,size:4},{buffer:pt,type:me.FLOAT,size:2},{buffer:Zt,type:me.FLOAT,size:3}]),Gt=c(me),Nt=c(me),Jt=c(me),sr=c(me),wr=f(me,[{buffer:Gt,type:me.FLOAT,size:3},{buffer:sr,type:me.UNSIGNED_BYTE,size:4,normalized:!0},{buffer:Nt,type:me.FLOAT,size:4},{buffer:Jt,type:me.FLOAT,size:2}]),cr=c(me),$e=c(me),St=c(me),Qt=c(me),Vt=c(me),_t=f(me,[{buffer:cr,type:me.FLOAT,size:3},{buffer:Vt,type:me.UNSIGNED_BYTE,size:4,normalized:!0},{buffer:$e,type:me.FLOAT,size:4},{buffer:St,type:me.FLOAT,size:2},{buffer:Qt,type:me.FLOAT,size:1}]),It=c(me),mt=f(me,[{buffer:It,type:me.FLOAT,size:3}]),er=new O(me,ce,Le,Ae,Fe,Pe,ge,Re,Ze,st,ut,pt,Zt,lt,Gt,sr,Nt,Jt,wr,cr,Vt,$e,St,Qt,_t,It,mt);return er.update(ie),er}i.exports=Ce},7261:function(i,a,o){"use strict";i.exports=E;var s=o(9215),l=o(7608),u=o(6079),c=o(5911),f=o(3536),h=o(244);function d(S,L,x){return Math.sqrt(Math.pow(S,2)+Math.pow(L,2)+Math.pow(x,2))}function v(S){return Math.min(1,Math.max(-1,S))}function _(S){var L=Math.abs(S[0]),x=Math.abs(S[1]),C=Math.abs(S[2]),M=[0,0,0];L>Math.max(x,C)?M[2]=1:x>Math.max(L,C)?M[0]=1:M[1]=1;for(var g=0,P=0,T=0;T<3;++T)g+=S[T]*S[T],P+=M[T]*S[T];for(var T=0;T<3;++T)M[T]-=P/g*S[T];return f(M,M),M}function b(S,L,x,C,M,g,P,T){this.center=s(x),this.up=s(C),this.right=s(M),this.radius=s([g]),this.angle=s([P,T]),this.angle.bounds=[[-1/0,-Math.PI/2],[1/0,Math.PI/2]],this.setDistanceLimits(S,L),this.computedCenter=this.center.curve(0),this.computedUp=this.up.curve(0),this.computedRight=this.right.curve(0),this.computedRadius=this.radius.curve(0),this.computedAngle=this.angle.curve(0),this.computedToward=[0,0,0],this.computedEye=[0,0,0],this.computedMatrix=new Array(16);for(var z=0;z<16;++z)this.computedMatrix[z]=.5;this.recalcMatrix(0)}var p=b.prototype;p.setDistanceLimits=function(S,L){S>0?S=Math.log(S):S=-1/0,L>0?L=Math.log(L):L=1/0,L=Math.max(L,S),this.radius.bounds[0][0]=S,this.radius.bounds[1][0]=L},p.getDistanceLimits=function(S){var L=this.radius.bounds[0];return S?(S[0]=Math.exp(L[0][0]),S[1]=Math.exp(L[1][0]),S):[Math.exp(L[0][0]),Math.exp(L[1][0])]},p.recalcMatrix=function(S){this.center.curve(S),this.up.curve(S),this.right.curve(S),this.radius.curve(S),this.angle.curve(S);for(var L=this.computedUp,x=this.computedRight,C=0,M=0,g=0;g<3;++g)M+=L[g]*x[g],C+=L[g]*L[g];for(var P=Math.sqrt(C),T=0,g=0;g<3;++g)x[g]-=L[g]*M/C,T+=x[g]*x[g],L[g]/=P;for(var z=Math.sqrt(T),g=0;g<3;++g)x[g]/=z;var O=this.computedToward;c(O,L,x),f(O,O);for(var V=Math.exp(this.computedRadius[0]),G=this.computedAngle[0],Z=this.computedAngle[1],j=Math.cos(G),N=Math.sin(G),H=Math.cos(Z),te=Math.sin(Z),oe=this.computedCenter,_e=j*H,Ee=N*H,Ce=te,me=-j*te,ie=-N*te,Se=H,Le=this.computedEye,Ae=this.computedMatrix,g=0;g<3;++g){var Fe=_e*x[g]+Ee*O[g]+Ce*L[g];Ae[4*g+1]=me*x[g]+ie*O[g]+Se*L[g],Ae[4*g+2]=Fe,Ae[4*g+3]=0}var Pe=Ae[1],ge=Ae[5],Re=Ae[9],ce=Ae[2],Ze=Ae[6],ut=Ae[10],pt=ge*ut-Re*Ze,Zt=Re*ce-Pe*ut,st=Pe*Ze-ge*ce,lt=d(pt,Zt,st);pt/=lt,Zt/=lt,st/=lt,Ae[0]=pt,Ae[4]=Zt,Ae[8]=st;for(var g=0;g<3;++g)Le[g]=oe[g]+Ae[2+4*g]*V;for(var g=0;g<3;++g){for(var T=0,Gt=0;Gt<3;++Gt)T+=Ae[g+4*Gt]*Le[Gt];Ae[12+g]=-T}Ae[15]=1},p.getMatrix=function(S,L){this.recalcMatrix(S);var x=this.computedMatrix;if(L){for(var C=0;C<16;++C)L[C]=x[C];return L}return x};var k=[0,0,0];p.rotate=function(S,L,x,C){if(this.angle.move(S,L,x),C){this.recalcMatrix(S);var M=this.computedMatrix;k[0]=M[2],k[1]=M[6],k[2]=M[10];for(var g=this.computedUp,P=this.computedRight,T=this.computedToward,z=0;z<3;++z)M[4*z]=g[z],M[4*z+1]=P[z],M[4*z+2]=T[z];u(M,M,C,k);for(var z=0;z<3;++z)g[z]=M[4*z],P[z]=M[4*z+1];this.up.set(S,g[0],g[1],g[2]),this.right.set(S,P[0],P[1],P[2])}},p.pan=function(S,L,x,C){L=L||0,x=x||0,C=C||0,this.recalcMatrix(S);var M=this.computedMatrix,g=Math.exp(this.computedRadius[0]),P=M[1],T=M[5],z=M[9],O=d(P,T,z);P/=O,T/=O,z/=O;var V=M[0],G=M[4],Z=M[8],j=V*P+G*T+Z*z;V-=P*j,G-=T*j,Z-=z*j;var N=d(V,G,Z);V/=N,G/=N,Z/=N;var H=V*L+P*x,te=G*L+T*x,oe=Z*L+z*x;this.center.move(S,H,te,oe);var _e=Math.exp(this.computedRadius[0]);_e=Math.max(1e-4,_e+C),this.radius.set(S,Math.log(_e))},p.translate=function(S,L,x,C){this.center.move(S,L||0,x||0,C||0)},p.setMatrix=function(S,L,x,C){var M=1;typeof x=="number"&&(M=x|0),(M<0||M>3)&&(M=1);var g=(M+2)%3,P=(M+1)%3;L||(this.recalcMatrix(S),L=this.computedMatrix);var T=L[M],z=L[M+4],O=L[M+8];if(C){var G=Math.abs(T),Z=Math.abs(z),j=Math.abs(O),N=Math.max(G,Z,j);G===N?(T=T<0?-1:1,z=O=0):j===N?(O=O<0?-1:1,T=z=0):(z=z<0?-1:1,T=O=0)}else{var V=d(T,z,O);T/=V,z/=V,O/=V}var H=L[g],te=L[g+4],oe=L[g+8],_e=H*T+te*z+oe*O;H-=T*_e,te-=z*_e,oe-=O*_e;var Ee=d(H,te,oe);H/=Ee,te/=Ee,oe/=Ee;var Ce=z*oe-O*te,me=O*H-T*oe,ie=T*te-z*H,Se=d(Ce,me,ie);Ce/=Se,me/=Se,ie/=Se,this.center.jump(S,cr,$e,St),this.radius.idle(S),this.up.jump(S,T,z,O),this.right.jump(S,H,te,oe);var Le,Ae;if(M===2){var Fe=L[1],Pe=L[5],ge=L[9],Re=Fe*H+Pe*te+ge*oe,ce=Fe*Ce+Pe*me+ge*ie;Zt<0?Le=-Math.PI/2:Le=Math.PI/2,Ae=Math.atan2(ce,Re)}else{var Ze=L[2],ut=L[6],pt=L[10],Zt=Ze*T+ut*z+pt*O,st=Ze*H+ut*te+pt*oe,lt=Ze*Ce+ut*me+pt*ie;Le=Math.asin(v(Zt)),Ae=Math.atan2(lt,st)}this.angle.jump(S,Ae,Le),this.recalcMatrix(S);var Gt=L[2],Nt=L[6],Jt=L[10],sr=this.computedMatrix;l(sr,L);var wr=sr[15],cr=sr[12]/wr,$e=sr[13]/wr,St=sr[14]/wr,Qt=Math.exp(this.computedRadius[0]);this.center.jump(S,cr-Gt*Qt,$e-Nt*Qt,St-Jt*Qt)},p.lastT=function(){return Math.max(this.center.lastT(),this.up.lastT(),this.right.lastT(),this.radius.lastT(),this.angle.lastT())},p.idle=function(S){this.center.idle(S),this.up.idle(S),this.right.idle(S),this.radius.idle(S),this.angle.idle(S)},p.flush=function(S){this.center.flush(S),this.up.flush(S),this.right.flush(S),this.radius.flush(S),this.angle.flush(S)},p.setDistance=function(S,L){L>0&&this.radius.set(S,Math.log(L))},p.lookAt=function(S,L,x,C){this.recalcMatrix(S),L=L||this.computedEye,x=x||this.computedCenter,C=C||this.computedUp;var M=C[0],g=C[1],P=C[2],T=d(M,g,P);if(!(T<1e-6)){M/=T,g/=T,P/=T;var z=L[0]-x[0],O=L[1]-x[1],V=L[2]-x[2],G=d(z,O,V);if(!(G<1e-6)){z/=G,O/=G,V/=G;var Z=this.computedRight,j=Z[0],N=Z[1],H=Z[2],te=M*j+g*N+P*H;j-=te*M,N-=te*g,H-=te*P;var oe=d(j,N,H);if(!(oe<.01&&(j=g*V-P*O,N=P*z-M*V,H=M*O-g*z,oe=d(j,N,H),oe<1e-6))){j/=oe,N/=oe,H/=oe,this.up.set(S,M,g,P),this.right.set(S,j,N,H),this.center.set(S,x[0],x[1],x[2]),this.radius.set(S,Math.log(G));var _e=g*H-P*N,Ee=P*j-M*H,Ce=M*N-g*j,me=d(_e,Ee,Ce);_e/=me,Ee/=me,Ce/=me;var ie=M*z+g*O+P*V,Se=j*z+N*O+H*V,Le=_e*z+Ee*O+Ce*V,Ae=Math.asin(v(ie)),Fe=Math.atan2(Le,Se),Pe=this.angle._state,ge=Pe[Pe.length-1],Re=Pe[Pe.length-2];ge=ge%(2*Math.PI);var ce=Math.abs(ge+2*Math.PI-Fe),Ze=Math.abs(ge-Fe),ut=Math.abs(ge-2*Math.PI-Fe);ce<Ze&&(ge+=2*Math.PI),ut<Ze&&(ge-=2*Math.PI),this.angle.jump(this.angle.lastT(),ge,Re),this.angle.set(S,Fe,Ae)}}}};function E(S){S=S||{};var L=S.center||[0,0,0],x=S.up||[0,1,0],C=S.right||_(x),M=S.radius||1,g=S.theta||0,P=S.phi||0;if(L=[].slice.call(L,0,3),x=[].slice.call(x,0,3),f(x,x),C=[].slice.call(C,0,3),f(C,C),"eye"in S){var T=S.eye,z=[T[0]-L[0],T[1]-L[1],T[2]-L[2]];c(C,z,x),d(C[0],C[1],C[2])<1e-6?C=_(x):f(C,C),M=d(z[0],z[1],z[2]);var O=h(x,z)/M,V=h(C,z)/M;P=Math.acos(O),g=Math.acos(V)}return M=Math.log(M),new b(S.zoomMin,S.zoomMax,L,x,C,M,g,P)}},7319:function(i,a,o){var s=o(3236),l=o(9405),u=s([`precision highp float;
+#define GLSLIFY 1
+
+attribute vec3 position, nextPosition;
+attribute float arcLength, lineWidth;
+attribute vec4 color;
+
+uniform vec2 screenShape;
+uniform float pixelRatio;
+uniform mat4 model, view, projection;
+
+varying vec4 fragColor;
+varying vec3 worldPosition;
+varying float pixelArcLength;
+
+vec4 project(vec3 p) {
+  return projection * (view * (model * vec4(p, 1.0)));
+}
+
+void main() {
+  vec4 startPoint = project(position);
+  vec4 endPoint   = project(nextPosition);
+
+  vec2 A = startPoint.xy / startPoint.w;
+  vec2 B =   endPoint.xy /   endPoint.w;
+
+  float clipAngle = atan(
+    (B.y - A.y) * screenShape.y,
+    (B.x - A.x) * screenShape.x
+  );
+
+  vec2 offset = 0.5 * pixelRatio * lineWidth * vec2(
+    sin(clipAngle),
+    -cos(clipAngle)
+  ) / screenShape;
+
+  gl_Position = vec4(startPoint.xy + startPoint.w * offset, startPoint.zw);
+
+  worldPosition = position;
+  pixelArcLength = arcLength;
+  fragColor = color;
+}
+`]),c=s([`precision highp float;
+#define GLSLIFY 1
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform vec3      clipBounds[2];
+uniform sampler2D dashTexture;
+uniform float     dashScale;
+uniform float     opacity;
+
+varying vec3    worldPosition;
+varying float   pixelArcLength;
+varying vec4    fragColor;
+
+void main() {
+  if (
+    outOfRange(clipBounds[0], clipBounds[1], worldPosition) ||
+    fragColor.a * opacity == 0.
+  ) discard;
+
+  float dashWeight = texture2D(dashTexture, vec2(dashScale * pixelArcLength, 0)).r;
+  if(dashWeight < 0.5) {
+    discard;
+  }
+  gl_FragColor = fragColor * opacity;
+}
+`]),f=s([`precision highp float;
+#define GLSLIFY 1
+
+#define FLOAT_MAX  1.70141184e38
+#define FLOAT_MIN  1.17549435e-38
+
+// https://github.com/mikolalysenko/glsl-read-float/blob/master/index.glsl
+vec4 packFloat(float v) {
+  float av = abs(v);
+
+  //Handle special cases
+  if(av < FLOAT_MIN) {
+    return vec4(0.0, 0.0, 0.0, 0.0);
+  } else if(v > FLOAT_MAX) {
+    return vec4(127.0, 128.0, 0.0, 0.0) / 255.0;
+  } else if(v < -FLOAT_MAX) {
+    return vec4(255.0, 128.0, 0.0, 0.0) / 255.0;
+  }
+
+  vec4 c = vec4(0,0,0,0);
+
+  //Compute exponent and mantissa
+  float e = floor(log2(av));
+  float m = av * pow(2.0, -e) - 1.0;
+
+  //Unpack mantissa
+  c[1] = floor(128.0 * m);
+  m -= c[1] / 128.0;
+  c[2] = floor(32768.0 * m);
+  m -= c[2] / 32768.0;
+  c[3] = floor(8388608.0 * m);
+
+  //Unpack exponent
+  float ebias = e + 127.0;
+  c[0] = floor(ebias / 2.0);
+  ebias -= c[0] * 2.0;
+  c[1] += floor(ebias) * 128.0;
+
+  //Unpack sign bit
+  c[0] += 128.0 * step(0.0, -v);
+
+  //Scale back to range
+  return c / 255.0;
+}
+
+bool outOfRange(float a, float b, float p) {
+  return ((p > max(a, b)) || 
+          (p < min(a, b)));
+}
+
+bool outOfRange(vec2 a, vec2 b, vec2 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y));
+}
+
+bool outOfRange(vec3 a, vec3 b, vec3 p) {
+  return (outOfRange(a.x, b.x, p.x) ||
+          outOfRange(a.y, b.y, p.y) ||
+          outOfRange(a.z, b.z, p.z));
+}
+
+bool outOfRange(vec4 a, vec4 b, vec4 p) {
+  return outOfRange(a.xyz, b.xyz, p.xyz);
+}
+
+uniform float pickId;
+uniform vec3 clipBounds[2];
+
+varying vec3 worldPosition;
+varying float pixelArcLength;
+varying vec4 fragColor;
+
+void main() {
+  if (outOfRange(clipBounds[0], clipBounds[1], worldPosition)) discard;
+
+  gl_FragColor = vec4(pickId/255.0, packFloat(pixelArcLength).xyz);
+}`]),h=[{name:"position",type:"vec3"},{name:"nextPosition",type:"vec3"},{name:"arcLength",type:"float"},{name:"lineWidth",type:"float"},{name:"color",type:"vec4"}];a.createShader=function(d){return l(d,u,c,null,h)},a.createPickShader=function(d){return l(d,u,f,null,h)}},7352:function(i,a,o){"use strict";var s=o(5721),l=o(4750),u=o(2690);i.exports=c;function c(f){var h=f.length;if(h===0)return[];if(h===1)return[[0]];var d=f[0].length;return d===0?[]:d===1?s(f):d===2?l(f):u(f,d)}},7399:function(i){i.exports=a;function a(o,s){var l=s[0],u=s[1],c=s[2],f=s[3],h=l+l,d=u+u,v=c+c,_=l*h,b=u*h,p=u*d,k=c*h,E=c*d,S=c*v,L=f*h,x=f*d,C=f*v;return o[0]=1-p-S,o[1]=b+C,o[2]=k-x,o[3]=0,o[4]=b-C,o[5]=1-_-S,o[6]=E+L,o[7]=0,o[8]=k+x,o[9]=E-L,o[10]=1-_-p,o[11]=0,o[12]=0,o[13]=0,o[14]=0,o[15]=1,o}},7417:function(i){i.exports=a;function a(o,s,l){return o[0]=Math.max(s[0],l[0]),o[1]=Math.max(s[1],l[1]),o[2]=Math.max(s[2],l[2]),o}},7442:function(i,a,o){var s=o(6658),l=o(7182),u=o(2652),c=o(9921),f=o(8648),h=b(),d=b(),v=b();i.exports=_;function _(E,S,L,x){if(c(S)===0||c(L)===0)return!1;var C=u(S,h.translate,h.scale,h.skew,h.perspective,h.quaternion),M=u(L,d.translate,d.scale,d.skew,d.perspective,d.quaternion);return!C||!M?!1:(s(v.translate,h.translate,d.translate,x),s(v.skew,h.skew,d.skew,x),s(v.scale,h.scale,d.scale,x),s(v.perspective,h.perspective,d.perspective,x),f(v.quaternion,h.quaternion,d.quaternion,x),l(E,v.translate,v.scale,v.skew,v.perspective,v.quaternion),!0)}function b(){return{translate:p(),scale:p(1),skew:p(),perspective:k(),quaternion:k()}}function p(E){return[E||0,E||0,E||0]}function k(){return[0,0,0,1]}},7507:function(i,a){"use strict";a.byteLength=d,a.toByteArray=_,a.fromByteArray=k;for(var o=[],s=[],l=typeof Uint8Array!="undefined"?Uint8Array:Array,u="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",c=0,f=u.length;c<f;++c)o[c]=u[c],s[u.charCodeAt(c)]=c;s[45]=62,s[95]=63;function h(E){var S=E.length;if(S%4>0)throw new Error("Invalid string. Length must be a multiple of 4");var L=E.indexOf("=");L===-1&&(L=S);var x=L===S?0:4-L%4;return[L,x]}function d(E){var S=h(E),L=S[0],x=S[1];return(L+x)*3/4-x}function v(E,S,L){return(S+L)*3/4-L}function _(E){var S,L=h(E),x=L[0],C=L[1],M=new l(v(E,x,C)),g=0,P=C>0?x-4:x,T;for(T=0;T<P;T+=4)S=s[E.charCodeAt(T)]<<18|s[E.charCodeAt(T+1)]<<12|s[E.charCodeAt(T+2)]<<6|s[E.charCodeAt(T+3)],M[g++]=S>>16&255,M[g++]=S>>8&255,M[g++]=S&255;return C===2&&(S=s[E.charCodeAt(T)]<<2|s[E.charCodeAt(T+1)]>>4,M[g++]=S&255),C===1&&(S=s[E.charCodeAt(T)]<<10|s[E.charCodeAt(T+1)]<<4|s[E.charCodeAt(T+2)]>>2,M[g++]=S>>8&255,M[g++]=S&255),M}function b(E){return o[E>>18&63]+o[E>>12&63]+o[E>>6&63]+o[E&63]}function p(E,S,L){for(var x,C=[],M=S;M<L;M+=3)x=(E[M]<<16&16711680)+(E[M+1]<<8&65280)+(E[M+2]&255),C.push(b(x));return C.join("")}function k(E){for(var S,L=E.length,x=L%3,C=[],M=16383,g=0,P=L-x;g<P;g+=M)C.push(p(E,g,g+M>P?P:g+M));return x===1?(S=E[L-1],C.push(o[S>>2]+o[S<<4&63]+"==")):x===2&&(S=(E[L-2]<<8)+E[L-1],C.push(o[S>>10]+o[S>>4&63]+o[S<<2&63]+"=")),C.join("")}},7518:function(i,a,o){"use strict";var s=o(1433);function l(f,h,d,v,_,b){this.location=f,this.dimension=h,this.a=d,this.b=v,this.c=_,this.d=b}l.prototype.bind=function(f){switch(this.dimension){case 1:f.vertexAttrib1f(this.location,this.a);break;case 2:f.vertexAttrib2f(this.location,this.a,this.b);break;case 3:f.vertexAttrib3f(this.location,this.a,this.b,this.c);break;case 4:f.vertexAttrib4f(this.location,this.a,this.b,this.c,this.d);break}};function u(f,h,d){this.gl=f,this._ext=h,this.handle=d,this._attribs=[],this._useElements=!1,this._elementsType=f.UNSIGNED_SHORT}u.prototype.bind=function(){this._ext.bindVertexArrayOES(this.handle);for(var f=0;f<this._attribs.length;++f)this._attribs[f].bind(this.gl)},u.prototype.unbind=function(){this._ext.bindVertexArrayOES(null)},u.prototype.dispose=function(){this._ext.deleteVertexArrayOES(this.handle)},u.prototype.update=function(f,h,d){if(this.bind(),s(this.gl,h,f),this.unbind(),this._attribs.length=0,f)for(var v=0;v<f.length;++v){var _=f[v];typeof _=="number"?this._attribs.push(new l(v,1,_)):Array.isArray(_)&&this._attribs.push(new l(v,_.length,_[0],_[1],_[2],_[3]))}this._useElements=!!h,this._elementsType=d||this.gl.UNSIGNED_SHORT},u.prototype.draw=function(f,h,d){d=d||0;var v=this.gl;this._useElements?v.drawElements(f,h,this._elementsType,d):v.drawArrays(f,d,h)};function c(f,h){return new u(f,h,h.createVertexArrayOES())}i.exports=c},7520:function(i,a,o){"use strict";var s=o(9507);function l(){var u=!1;try{var c=Object.defineProperty({},"passive",{get:function(){u=!0}});window.addEventListener("test",null,c),window.removeEventListener("test",null,c)}catch(f){u=!1}return u}i.exports=s&&l()},7536:function(i){i.exports=a;function a(){var o=new Float32Array(4);return o[0]=0,o[1]=0,o[2]=0,o[3]=0,o}},7608:function(i){i.exports=a;function a(o,s){var l=s[0],u=s[1],c=s[2],f=s[3],h=s[4],d=s[5],v=s[6],_=s[7],b=s[8],p=s[9],k=s[10],E=s[11],S=s[12],L=s[13],x=s[14],C=s[15],M=l*d-u*h,g=l*v-c*h,P=l*_-f*h,T=u*v-c*d,z=u*_-f*d,O=c*_-f*v,V=b*L-p*S,G=b*x-k*S,Z=b*C-E*S,j=p*x-k*L,N=p*C-E*L,H=k*C-E*x,te=M*H-g*N+P*j+T*Z-z*G+O*V;return te?(te=1/te,o[0]=(d*H-v*N+_*j)*te,o[1]=(c*N-u*H-f*j)*te,o[2]=(L*O-x*z+C*T)*te,o[3]=(k*z-p*O-E*T)*te,o[4]=(v*Z-h*H-_*G)*te,o[5]=(l*H-c*Z+f*G)*te,o[6]=(x*P-S*O-C*g)*te,o[7]=(b*O-k*P+E*g)*te,o[8]=(h*N-d*Z+_*V)*te,o[9]=(u*Z-l*N-f*V)*te,o[10]=(S*z-L*P+C*M)*te,o[11]=(p*P-b*z-E*M)*te,o[12]=(d*G-h*j-v*V)*te,o[13]=(l*j-u*G+c*V)*te,o[14]=(L*g-S*T-x*M)*te,o[15]=(b*T-p*g+k*M)*te,o):null}},7626:function(i,a,o){"use strict";var s=o(2642),l=o(9346);i.exports=d;function u(v,_){for(var b=[0,0,0,0],p=0;p<4;++p)for(var k=0;k<4;++k)b[k]+=v[4*p+k]*_[p];return b}function c(v,_,b,p,k){for(var E=u(p,u(b,u(_,[v[0],v[1],v[2],1]))),S=0;S<3;++S)E[S]/=E[3];return[.5*k[0]*(1+E[0]),.5*k[1]*(1-E[1])]}function f(v,_){if(v.length===2){for(var b=0,p=0,k=0;k<2;++k)b+=Math.pow(_[k]-v[0][k],2),p+=Math.pow(_[k]-v[1][k],2);return b=Math.sqrt(b),p=Math.sqrt(p),b+p<1e-6?[1,0]:[p/(b+p),b/(p+b)]}else if(v.length===3){var E=[0,0];return l(v[0],v[1],v[2],_,E),s(v,E)}return[]}function h(v,_){for(var b=[0,0,0],p=0;p<v.length;++p)for(var k=v[p],E=_[p],S=0;S<3;++S)b[S]+=E*k[S];return b}function d(v,_,b,p,k,E){if(v.length===1)return[0,v[0].slice()];for(var S=new Array(v.length),L=0;L<v.length;++L)S[L]=c(v[L],b,p,k,E);for(var x=0,C=1/0,L=0;L<S.length;++L){for(var M=0,g=0;g<2;++g)M+=Math.pow(S[L][g]-_[g],2);M<C&&(C=M,x=L)}for(var P=f(S,_),T=0,L=0;L<3;++L){if(P[L]<-.001||P[L]>1.0001)return null;T+=P[L]}return Math.abs(T-1)>.001?null:[x,h(v,P),P]}},7636:function(i){i.exports=a;function a(o,s){s=s||1;var l=Math.random()*2*Math.PI,u=Math.random()*2-1,c=Math.sqrt(1-u*u)*s;return o[0]=Math.cos(l)*c,o[1]=Math.sin(l)*c,o[2]=u*s,o}},7640:function(i,a,o){"use strict";var s=o(1888);function l(_){switch(_){case"uint32":return[s.mallocUint32,s.freeUint32];default:return null}}var u={"uint32,1,0":function(_,b){return function(k,E,S,L,x,C,M,g,P,T,z){var O,V,G,Z=k*x+L,j,N=_(g),H,te,oe,_e;for(O=k+1;O<=E;++O){for(V=O,Z+=x,G=Z,H=0,te=Z,j=0;j<g;++j)N[H++]=S[te],te+=P;e:for(;V-- >k;){H=0,te=G-x;t:for(j=0;j<g;++j){if(oe=S[te],_e=N[H],oe<_e)break e;if(oe>_e)break t;te+=T,H+=z}for(H=G,te=G-x,j=0;j<g;++j)S[H]=S[te],H+=P,te+=P;G-=x}for(H=G,te=0,j=0;j<g;++j)S[H]=N[te++],H+=P}b(N)}}};function c(_,b){var p=l(b),k=[b,_].join(","),E=u[k];return p?E(p[0],p[1]):E()}var f={"uint32,1,0":function(_,b,p){return function k(E,S,L,x,C,M,g,P,T,z,O){var V=(S-E+1)/6|0,G=E+V,Z=S-V,j=E+S>>1,N=j-V,H=j+V,te=G,oe=N,_e=j,Ee=H,Ce=Z,me=E+1,ie=S-1,Se=!0,Le,Ae,Fe,Pe,ge,Re,ce,Ze,ut,pt=0,Zt=0,st=0,lt,Gt,Nt,Jt,sr,wr,cr,$e,St,Qt,Vt,_t,It,mt,er,lr,Tr=P,Lr=b(Tr),ti=b(Tr);Gt=C*te,Nt=C*oe,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=te,te=oe,oe=Ae;break e}if(st<0)break e;lr+=z}Gt=C*Ee,Nt=C*Ce,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=Ee,Ee=Ce,Ce=Ae;break e}if(st<0)break e;lr+=z}Gt=C*te,Nt=C*_e,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=te,te=_e,_e=Ae;break e}if(st<0)break e;lr+=z}Gt=C*oe,Nt=C*_e,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=oe,oe=_e,_e=Ae;break e}if(st<0)break e;lr+=z}Gt=C*te,Nt=C*Ee,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=te,te=Ee,Ee=Ae;break e}if(st<0)break e;lr+=z}Gt=C*_e,Nt=C*Ee,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=_e,_e=Ee,Ee=Ae;break e}if(st<0)break e;lr+=z}Gt=C*oe,Nt=C*Ce,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=oe,oe=Ce,Ce=Ae;break e}if(st<0)break e;lr+=z}Gt=C*oe,Nt=C*_e,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=oe,oe=_e,_e=Ae;break e}if(st<0)break e;lr+=z}Gt=C*Ee,Nt=C*Ce,lr=x;e:for(lt=0;lt<P;++lt){if(ce=Gt+lr,Ze=Nt+lr,st=L[ce]-L[Ze],st>0){Ae=Ee,Ee=Ce,Ce=Ae;break e}if(st<0)break e;lr+=z}for(Gt=C*te,Nt=C*oe,Jt=C*_e,sr=C*Ee,wr=C*Ce,cr=C*G,$e=C*j,St=C*Z,er=0,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,ut=Jt+lr,Qt=sr+lr,Vt=wr+lr,_t=cr+lr,It=$e+lr,mt=St+lr,Lr[er]=L[Ze],ti[er]=L[Qt],Se=Se&&Lr[er]===ti[er],Fe=L[ce],Pe=L[ut],ge=L[Vt],L[_t]=Fe,L[It]=Pe,L[mt]=ge,++er,lr+=T;for(Gt=C*N,Nt=C*E,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,L[ce]=L[Ze],lr+=T;for(Gt=C*H,Nt=C*S,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,L[ce]=L[Ze],lr+=T;if(Se)for(Re=me;Re<=ie;++Re){ce=x+Re*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-Lr[er],st!==0)break e;er+=O,ce+=z}if(st!==0)if(st<0){if(Re!==me)for(Gt=C*Re,Nt=C*me,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;++me}else for(;;){ce=x+ie*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-Lr[er],st!==0)break e;er+=O,ce+=z}if(st>0)ie--;else if(st<0){for(Gt=C*Re,Nt=C*me,Jt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,ut=Jt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=L[ut],L[ut]=Le,lr+=T;++me,--ie;break}else{for(Gt=C*Re,Nt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;--ie;break}}}else for(Re=me;Re<=ie;++Re){ce=x+Re*C,er=0;e:for(lt=0;lt<P;++lt){if(pt=L[ce]-Lr[er],pt!==0)break e;er+=O,ce+=z}if(pt<0){if(Re!==me)for(Gt=C*Re,Nt=C*me,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;++me}else{ce=x+Re*C,er=0;e:for(lt=0;lt<P;++lt){if(Zt=L[ce]-ti[er],Zt!==0)break e;er+=O,ce+=z}if(Zt>0)for(;;){ce=x+ie*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-ti[er],st!==0)break e;er+=O,ce+=z}if(st>0){if(--ie<Re)break;continue}else{ce=x+ie*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-Lr[er],st!==0)break e;er+=O,ce+=z}if(st<0){for(Gt=C*Re,Nt=C*me,Jt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,ut=Jt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=L[ut],L[ut]=Le,lr+=T;++me,--ie}else{for(Gt=C*Re,Nt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;--ie}break}}}}for(Gt=C*E,Nt=C*(me-1),er=0,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,L[ce]=L[Ze],L[Ze]=Lr[er],++er,lr+=T;for(Gt=C*S,Nt=C*(ie+1),er=0,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,L[ce]=L[Ze],L[Ze]=ti[er],++er,lr+=T;if(me-2-E<=32?_(E,me-2,L,x,C,M,g,P,T,z,O):k(E,me-2,L,x,C,M,g,P,T,z,O),S-(ie+2)<=32?_(ie+2,S,L,x,C,M,g,P,T,z,O):k(ie+2,S,L,x,C,M,g,P,T,z,O),Se){p(Lr),p(ti);return}if(me<G&&ie>Z){e:for(;;){for(ce=x+me*C,er=0,lr=x,lt=0;lt<P;++lt){if(L[ce]!==Lr[er])break e;++er,ce+=T}++me}e:for(;;){for(ce=x+ie*C,er=0,lr=x,lt=0;lt<P;++lt){if(L[ce]!==ti[er])break e;++er,ce+=T}--ie}for(Re=me;Re<=ie;++Re){ce=x+Re*C,er=0;e:for(lt=0;lt<P;++lt){if(pt=L[ce]-Lr[er],pt!==0)break e;er+=O,ce+=z}if(pt===0){if(Re!==me)for(Gt=C*Re,Nt=C*me,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;++me}else{ce=x+Re*C,er=0;e:for(lt=0;lt<P;++lt){if(Zt=L[ce]-ti[er],Zt!==0)break e;er+=O,ce+=z}if(Zt===0)for(;;){ce=x+ie*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-ti[er],st!==0)break e;er+=O,ce+=z}if(st===0){if(--ie<Re)break;continue}else{ce=x+ie*C,er=0;e:for(lt=0;lt<P;++lt){if(st=L[ce]-Lr[er],st!==0)break e;er+=O,ce+=z}if(st<0){for(Gt=C*Re,Nt=C*me,Jt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,ut=Jt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=L[ut],L[ut]=Le,lr+=T;++me,--ie}else{for(Gt=C*Re,Nt=C*ie,lr=x,lt=0;lt<P;++lt)ce=Gt+lr,Ze=Nt+lr,Le=L[ce],L[ce]=L[Ze],L[Ze]=Le,lr+=T;--ie}break}}}}}p(Lr),p(ti),ie-me<=32?_(me,ie,L,x,C,M,g,P,T,z,O):k(me,ie,L,x,C,M,g,P,T,z,O)}}};function h(_,b,p){var k=l(b),E=[b,_].join(","),S=f[E];return _.length>1&&k?S(p,k[0],k[1]):S(p)}var d={"uint32,1,0":function(_,b){return function(p){var k=p.data,E=p.offset|0,S=p.shape,L=p.stride,x=L[0]|0,C=S[0]|0,M=L[1]|0,g=S[1]|0,P=M,T=M,z=1;C<=32?_(0,C-1,k,E,x,M,C,g,P,T,z):b(0,C-1,k,E,x,M,C,g,P,T,z)}}};function v(_,b){var p=[b,_].join(","),k=d[p],E=c(_,b),S=h(_,b,E);return k(E,S)}i.exports=v},7642:function(i,a,o){"use strict";var s=o(8954),l=o(1682);i.exports=h;function u(d,v){this.point=d,this.index=v}function c(d,v){for(var _=d.point,b=v.point,p=_.length,k=0;k<p;++k){var E=b[k]-_[k];if(E)return E}return 0}function f(d,v,_){if(d===1)return _?[[-1,0]]:[];var b=v.map(function(L,x){return[L[0],x]});b.sort(function(L,x){return L[0]-x[0]});for(var p=new Array(d-1),k=1;k<d;++k){var E=b[k-1],S=b[k];p[k-1]=[E[1],S[1]]}return _&&p.push([-1,p[0][1]],[p[d-1][1],-1]),p}function h(d,v){var _=d.length;if(_===0)return[];var b=d[0].length;if(b<1)return[];if(b===1)return f(_,d,v);for(var p=new Array(_),k=1,E=0;E<_;++E){for(var S=d[E],L=new Array(b+1),x=0,C=0;C<b;++C){var M=S[C];L[C]=M,x+=M*M}L[b]=x,p[E]=new u(L,E),k=Math.max(x,k)}l(p,c),_=p.length;for(var g=new Array(_+b+1),P=new Array(_+b+1),T=(b+1)*(b+1)*k,z=new Array(b+1),E=0;E<=b;++E)z[E]=0;z[b]=T,g[0]=z.slice(),P[0]=-1;for(var E=0;E<=b;++E){var L=z.slice();L[E]=1,g[E+1]=L,P[E+1]=-1}for(var E=0;E<_;++E){var O=p[E];g[E+b+1]=O.point,P[E+b+1]=O.index}var V=s(g,!1);if(v?V=V.filter(function(G){for(var Z=0,j=0;j<=b;++j){var N=P[G[j]];if(N<0&&++Z>=2)return!1;G[j]=N}return!0}):V=V.filter(function(G){for(var Z=0;Z<=b;++Z){var j=P[G[Z]];if(j<0)return!1;G[Z]=j}return!0}),b&1)for(var E=0;E<V.length;++E){var O=V[E],L=O[0];O[0]=O[1],O[1]=L}return V}},7656:function(i){i.exports=a;function a(o,s,l){var u=l[0],c=l[1],f=l[2],h,d,v,_,b,p,k,E,S,L,x,C;return s===o?(o[12]=s[0]*u+s[4]*c+s[8]*f+s[12],o[13]=s[1]*u+s[5]*c+s[9]*f+s[13],o[14]=s[2]*u+s[6]*c+s[10]*f+s[14],o[15]=s[3]*u+s[7]*c+s[11]*f+s[15]):(h=s[0],d=s[1],v=s[2],_=s[3],b=s[4],p=s[5],k=s[6],E=s[7],S=s[8],L=s[9],x=s[10],C=s[11],o[0]=h,o[1]=d,o[2]=v,o[3]=_,o[4]=b,o[5]=p,o[6]=k,o[7]=E,o[8]=S,o[9]=L,o[10]=x,o[11]=C,o[12]=h*u+b*c+S*f+s[12],o[13]=d*u+p*c+L*f+s[13],o[14]=v*u+k*c+x*f+s[14],o[15]=_*u+E*c+C*f+s[15]),o}},7718:function(i,a,o){i.exports=O,i.exports.processPixels=z;var s=o(3711),l=o(9618),u=o(5878),c=o(332),f=o(2538),h=o(2095),d="b",v="b|",_="i",b="i|",p="sup",k="+",E="+1",S="sub",L="-",x="-1";function C(V,G,Z,j){for(var N="<"+V+">",H="</"+V+">",te=N.length,oe=H.length,_e=G[0]===k||G[0]===L,Ee=0,Ce=-oe;Ee>-1&&(Ee=Z.indexOf(N,Ee),!(Ee===-1||(Ce=Z.indexOf(H,Ee+te),Ce===-1)||Ce<=Ee));){for(var me=Ee;me<Ce+oe;++me)if(me<Ee+te||me>=Ce)j[me]=null,Z=Z.substr(0,me)+" "+Z.substr(me+1);else if(j[me]!==null){var ie=j[me].indexOf(G[0]);ie===-1?j[me]+=G:_e&&(j[me]=j[me].substr(0,ie+1)+(1+parseInt(j[me][ie+1]))+j[me].substr(ie+2))}var Se=Ee+te,Le=Z.substr(Se,Ce-Se),Ae=Le.indexOf(N);Ae!==-1?Ee=Ae:Ee=Ce+oe}return j}function M(V,G,Z){for(var j=G.textAlign||"start",N=G.textBaseline||"alphabetic",H=[1<<30,1<<30],te=[0,0],oe=V.length,_e=0;_e<oe;++_e)for(var Ee=V[_e],Ce=0;Ce<2;++Ce)H[Ce]=Math.min(H[Ce],Ee[Ce])|0,te[Ce]=Math.max(te[Ce],Ee[Ce])|0;var me=0;switch(j){case"center":me=-.5*(H[0]+te[0]);break;case"right":case"end":me=-te[0];break;case"left":case"start":me=-H[0];break;default:throw new Error("vectorize-text: Unrecognized textAlign: '"+j+"'")}var ie=0;switch(N){case"hanging":case"top":ie=-H[1];break;case"middle":ie=-.5*(H[1]+te[1]);break;case"alphabetic":case"ideographic":ie=-3*Z;break;case"bottom":ie=-te[1];break;default:throw new Error("vectorize-text: Unrecoginized textBaseline: '"+N+"'")}var Se=1/Z;return"lineHeight"in G?Se*=+G.lineHeight:"width"in G?Se=G.width/(te[0]-H[0]):"height"in G&&(Se=G.height/(te[1]-H[1])),V.map(function(Le){return[Se*(Le[0]+me),Se*(Le[1]+ie)]})}function g(V,G,Z,j,N,H){Z=Z.replace(/\n/g,""),H.breaklines===!0?Z=Z.replace(/\<br\>/g,`
+`):Z=Z.replace(/\<br\>/g," ");var te="",oe=[];for(ge=0;ge<Z.length;++ge)oe[ge]=te;H.bolds===!0&&(oe=C(d,v,Z,oe)),H.italics===!0&&(oe=C(_,b,Z,oe)),H.superscripts===!0&&(oe=C(p,E,Z,oe)),H.subscripts===!0&&(oe=C(S,x,Z,oe));var _e=[],Ee="";for(ge=0;ge<Z.length;++ge)oe[ge]!==null&&(Ee+=Z[ge],_e.push(oe[ge]));var Ce=Ee.split(`
+`),me=Ce.length,ie=Math.round(N*j),Se=j,Le=j*2,Ae=0,Fe=me*ie+Le;V.height<Fe&&(V.height=Fe),G.fillStyle="#000",G.fillRect(0,0,V.width,V.height),G.fillStyle="#fff";var Pe,ge,Re,ce,Ze,ut=0,pt="";function Zt(){if(pt!==""){var $e=G.measureText(pt).width;G.fillText(pt,Se+Re,Le+ce),Re+=$e}}function st(){return""+Math.round(Ze)+"px "}function lt($e,St){var Qt=""+G.font;if(H.subscripts===!0){var Vt=$e.indexOf(L),_t=St.indexOf(L),It=Vt>-1?parseInt($e[1+Vt]):0,mt=_t>-1?parseInt(St[1+_t]):0;It!==mt&&(Qt=Qt.replace(st(),"?px "),Ze*=Math.pow(.75,mt-It),Qt=Qt.replace("?px ",st())),ce+=.25*ie*(mt-It)}if(H.superscripts===!0){var er=$e.indexOf(k),lr=St.indexOf(k),Tr=er>-1?parseInt($e[1+er]):0,Lr=lr>-1?parseInt(St[1+lr]):0;Tr!==Lr&&(Qt=Qt.replace(st(),"?px "),Ze*=Math.pow(.75,Lr-Tr),Qt=Qt.replace("?px ",st())),ce-=.25*ie*(Lr-Tr)}if(H.bolds===!0){var ti=$e.indexOf(v)>-1,Br=St.indexOf(v)>-1;!ti&&Br&&(Vr?Qt=Qt.replace("italic ","italic bold "):Qt="bold "+Qt),ti&&!Br&&(Qt=Qt.replace("bold ",""))}if(H.italics===!0){var Vr=$e.indexOf(b)>-1,dt=St.indexOf(b)>-1;!Vr&&dt&&(Qt="italic "+Qt),Vr&&!dt&&(Qt=Qt.replace("italic ",""))}G.font=Qt}for(Pe=0;Pe<me;++Pe){var Gt=Ce[Pe]+`
+`;for(Re=0,ce=Pe*ie,Ze=j,pt="",ge=0;ge<Gt.length;++ge){var Nt=ge+ut<_e.length?_e[ge+ut]:_e[_e.length-1];te===Nt?pt+=Gt[ge]:(Zt(),pt=Gt[ge],Nt!==void 0&&(lt(te,Nt),te=Nt))}Zt(),ut+=Gt.length;var Jt=Math.round(Re+2*Se)|0;Ae<Jt&&(Ae=Jt)}var sr=Ae,wr=Le+ie*me,cr=l(G.getImageData(0,0,sr,wr).data,[wr,sr,4]);return cr.pick(-1,-1,0).transpose(1,0)}function P(V,G){var Z=s(V,128);return G?u(Z.cells,Z.positions,.25):{edges:Z.cells,positions:Z.positions}}function T(V,G,Z,j){var N=P(V,j),H=M(N.positions,G,Z),te=N.edges,oe=G.orientation==="ccw";if(c(H,te),G.polygons||G.polygon||G.polyline){for(var _e=h(te,H),Ee=new Array(_e.length),Ce=0;Ce<_e.length;++Ce){for(var me=_e[Ce],ie=new Array(me.length),Se=0;Se<me.length;++Se){for(var Le=me[Se],Ae=new Array(Le.length),Fe=0;Fe<Le.length;++Fe)Ae[Fe]=H[Le[Fe]].slice();oe&&Ae.reverse(),ie[Se]=Ae}Ee[Ce]=ie}return Ee}else return G.triangles||G.triangulate||G.triangle?{cells:f(H,te,{delaunay:!1,exterior:!1,interior:!0}),positions:H}:{edges:te,positions:H}}function z(V,G,Z){try{return T(V,G,Z,!0)}catch(j){}try{return T(V,G,Z,!1)}catch(j){}return G.polygons||G.polyline||G.polygon?[]:G.triangles||G.triangulate||G.triangle?{cells:[],positions:[]}:{edges:[],positions:[]}}function O(V,G,Z,j){var N=64,H=1.25,te={breaklines:!1,bolds:!1,italics:!1,subscripts:!1,superscripts:!1};j&&(j.size&&j.size>0&&(N=j.size),j.lineSpacing&&j.lineSpacing>0&&(H=j.lineSpacing),j.styletags&&j.styletags.breaklines&&(te.breaklines=!!j.styletags.breaklines),j.styletags&&j.styletags.bolds&&(te.bolds=!!j.styletags.bolds),j.styletags&&j.styletags.italics&&(te.italics=!!j.styletags.italics),j.styletags&&j.styletags.subscripts&&(te.subscripts=!!j.styletags.subscripts),j.styletags&&j.styletags.superscripts&&(te.superscripts=!!j.styletags.superscripts)),Z.font=[j.fontStyle,j.fontVariant,j.fontWeight,N+"px",j.font].filter(function(_e){return _e}).join(" "),Z.textAlign="start",Z.textBaseline="alphabetic",Z.direction="ltr";var oe=g(G,Z,V,N,H,te);return z(oe,j,N)}},7721:function(i,a,o){"use strict";var s=o(5716);i.exports=l;function l(u){return s(u[0])*s(u[1])}},7765:function(i,a,o){"use strict";i.exports=p;var s=o(9618),l=o(1888),u=o(446),c=o(1570);function f(k){for(var E=k.length,S=0,L=0;L<E;++L)S=Math.max(S,k[L].length)|0;return S-1}function h(k,E){for(var S=k.length,L=l.mallocUint8(S),x=0;x<S;++x)L[x]=k[x]<E|0;return L}function d(k,E){for(var S=k.length,L=E*(E+1)/2*S|0,x=l.mallocUint32(L*2),C=0,M=0;M<S;++M)for(var g=k[M],E=g.length,P=0;P<E;++P)for(var T=0;T<P;++T){var z=g[T],O=g[P];x[C++]=Math.min(z,O)|0,x[C++]=Math.max(z,O)|0}var V=C/2|0;u(s(x,[V,2]));for(var G=2,M=2;M<C;M+=2)x[M-2]===x[M]&&x[M-1]===x[M+1]||(x[G++]=x[M],x[G++]=x[M+1]);return s(x,[G/2|0,2])}function v(k,E,S,L){for(var x=k.data,C=k.shape[0],M=l.mallocDouble(C),g=0,P=0;P<C;++P){var T=x[2*P],z=x[2*P+1];if(S[T]!==S[z]){var O=E[T],V=E[z];x[2*g]=T,x[2*g+1]=z,M[g++]=(V-L)/(V-O)}}return k.shape[0]=g,s(M,[g])}function _(k,E){var S=l.mallocInt32(E*2),L=k.shape[0],x=k.data;S[0]=0;for(var C=0,M=0;M<L;++M){var g=x[2*M];if(g!==C){for(S[2*C+1]=M;++C<g;)S[2*C]=M,S[2*C+1]=M;S[2*C]=M}}for(S[2*C+1]=L;++C<E;)S[2*C]=S[2*C+1]=L;return S}function b(k){for(var E=k.shape[0]|0,S=k.data,L=new Array(E),x=0;x<E;++x)L[x]=[S[2*x],S[2*x+1]];return L}function p(k,E,S,L){S=S||0,typeof L=="undefined"&&(L=f(k));var x=k.length;if(x===0||L<1)return{cells:[],vertexIds:[],vertexWeights:[]};var C=h(E,+S),M=d(k,L),g=v(M,E,C,+S),P=_(M,E.length|0),T=c(L)(k,M.data,P,C),z=b(M),O=[].slice.call(g.data,0,g.shape[0]);return l.free(C),l.free(M.data),l.free(g.data),l.free(P),{cells:T,vertexIds:z,vertexWeights:O}}},7766:function(i,a,o){"use strict";var s=o(9618),l=o(5298),u=o(1888);i.exports=g;var c=null,f=null,h=null;function d(P){c=[P.LINEAR,P.NEAREST_MIPMAP_LINEAR,P.LINEAR_MIPMAP_NEAREST,P.LINEAR_MIPMAP_NEAREST],f=[P.NEAREST,P.LINEAR,P.NEAREST_MIPMAP_NEAREST,P.NEAREST_MIPMAP_LINEAR,P.LINEAR_MIPMAP_NEAREST,P.LINEAR_MIPMAP_LINEAR],h=[P.REPEAT,P.CLAMP_TO_EDGE,P.MIRRORED_REPEAT]}function v(P){return typeof HTMLCanvasElement!="undefined"&&P instanceof HTMLCanvasElement||typeof HTMLImageElement!="undefined"&&P instanceof HTMLImageElement||typeof HTMLVideoElement!="undefined"&&P instanceof HTMLVideoElement||typeof ImageData!="undefined"&&P instanceof ImageData}var _=function(P,T){l.muls(P,T,255)};function b(P,T,z){var O=P.gl,V=O.getParameter(O.MAX_TEXTURE_SIZE);if(T<0||T>V||z<0||z>V)throw new Error("gl-texture2d: Invalid texture size");return P._shape=[T,z],P.bind(),O.texImage2D(O.TEXTURE_2D,0,P.format,T,z,0,P.format,P.type,null),P._mipLevels=[0],P}function p(P,T,z,O,V,G){this.gl=P,this.handle=T,this.format=V,this.type=G,this._shape=[z,O],this._mipLevels=[0],this._magFilter=P.NEAREST,this._minFilter=P.NEAREST,this._wrapS=P.CLAMP_TO_EDGE,this._wrapT=P.CLAMP_TO_EDGE,this._anisoSamples=1;var Z=this,j=[this._wrapS,this._wrapT];Object.defineProperties(j,[{get:function(){return Z._wrapS},set:function(H){return Z.wrapS=H}},{get:function(){return Z._wrapT},set:function(H){return Z.wrapT=H}}]),this._wrapVector=j;var N=[this._shape[0],this._shape[1]];Object.defineProperties(N,[{get:function(){return Z._shape[0]},set:function(H){return Z.width=H}},{get:function(){return Z._shape[1]},set:function(H){return Z.height=H}}]),this._shapeVector=N}var k=p.prototype;Object.defineProperties(k,{minFilter:{get:function(){return this._minFilter},set:function(P){this.bind();var T=this.gl;if(this.type===T.FLOAT&&c.indexOf(P)>=0&&(T.getExtension("OES_texture_float_linear")||(P=T.NEAREST)),f.indexOf(P)<0)throw new Error("gl-texture2d: Unknown filter mode "+P);return T.texParameteri(T.TEXTURE_2D,T.TEXTURE_MIN_FILTER,P),this._minFilter=P}},magFilter:{get:function(){return this._magFilter},set:function(P){this.bind();var T=this.gl;if(this.type===T.FLOAT&&c.indexOf(P)>=0&&(T.getExtension("OES_texture_float_linear")||(P=T.NEAREST)),f.indexOf(P)<0)throw new Error("gl-texture2d: Unknown filter mode "+P);return T.texParameteri(T.TEXTURE_2D,T.TEXTURE_MAG_FILTER,P),this._magFilter=P}},mipSamples:{get:function(){return this._anisoSamples},set:function(P){var T=this._anisoSamples;if(this._anisoSamples=Math.max(P,1)|0,T!==this._anisoSamples){var z=this.gl.getExtension("EXT_texture_filter_anisotropic");z&&this.gl.texParameterf(this.gl.TEXTURE_2D,z.TEXTURE_MAX_ANISOTROPY_EXT,this._anisoSamples)}return this._anisoSamples}},wrapS:{get:function(){return this._wrapS},set:function(P){if(this.bind(),h.indexOf(P)<0)throw new Error("gl-texture2d: Unknown wrap mode "+P);return this.gl.texParameteri(this.gl.TEXTURE_2D,this.gl.TEXTURE_WRAP_S,P),this._wrapS=P}},wrapT:{get:function(){return this._wrapT},set:function(P){if(this.bind(),h.indexOf(P)<0)throw new Error("gl-texture2d: Unknown wrap mode "+P);return this.gl.texParameteri(this.gl.TEXTURE_2D,this.gl.TEXTURE_WRAP_T,P),this._wrapT=P}},wrap:{get:function(){return this._wrapVector},set:function(P){if(Array.isArray(P)||(P=[P,P]),P.length!==2)throw new Error("gl-texture2d: Must specify wrap mode for rows and columns");for(var T=0;T<2;++T)if(h.indexOf(P[T])<0)throw new Error("gl-texture2d: Unknown wrap mode "+P);this._wrapS=P[0],this._wrapT=P[1];var z=this.gl;return this.bind(),z.texParameteri(z.TEXTURE_2D,z.TEXTURE_WRAP_S,this._wrapS),z.texParameteri(z.TEXTURE_2D,z.TEXTURE_WRAP_T,this._wrapT),P}},shape:{get:function(){return this._shapeVector},set:function(P){if(!Array.isArray(P))P=[P|0,P|0];else if(P.length!==2)throw new Error("gl-texture2d: Invalid texture shape");return b(this,P[0]|0,P[1]|0),[P[0]|0,P[1]|0]}},width:{get:function(){return this._shape[0]},set:function(P){return P=P|0,b(this,P,this._shape[1]),P}},height:{get:function(){return this._shape[1]},set:function(P){return P=P|0,b(this,this._shape[0],P),P}}}),k.bind=function(P){var T=this.gl;return P!==void 0&&T.activeTexture(T.TEXTURE0+(P|0)),T.bindTexture(T.TEXTURE_2D,this.handle),P!==void 0?P|0:T.getParameter(T.ACTIVE_TEXTURE)-T.TEXTURE0},k.dispose=function(){this.gl.deleteTexture(this.handle)},k.generateMipmap=function(){this.bind(),this.gl.generateMipmap(this.gl.TEXTURE_2D);for(var P=Math.min(this._shape[0],this._shape[1]),T=0;P>0;++T,P>>>=1)this._mipLevels.indexOf(T)<0&&this._mipLevels.push(T)},k.setPixels=function(P,T,z,O){var V=this.gl;this.bind(),Array.isArray(T)?(O=z,z=T[1]|0,T=T[0]|0):(T=T||0,z=z||0),O=O||0;var G=v(P)?P:P.raw;if(G){var Z=this._mipLevels.indexOf(O)<0;Z?(V.texImage2D(V.TEXTURE_2D,0,this.format,this.format,this.type,G),this._mipLevels.push(O)):V.texSubImage2D(V.TEXTURE_2D,O,T,z,this.format,this.type,G)}else if(P.shape&&P.stride&&P.data){if(P.shape.length<2||T+P.shape[1]>this._shape[1]>>>O||z+P.shape[0]>this._shape[0]>>>O||T<0||z<0)throw new Error("gl-texture2d: Texture dimensions are out of bounds");S(V,T,z,O,this.format,this.type,this._mipLevels,P)}else throw new Error("gl-texture2d: Unsupported data type")};function E(P,T){return P.length===3?T[2]===1&&T[1]===P[0]*P[2]&&T[0]===P[2]:T[0]===1&&T[1]===P[0]}function S(P,T,z,O,V,G,Z,j){var N=j.dtype,H=j.shape.slice();if(H.length<2||H.length>3)throw new Error("gl-texture2d: Invalid ndarray, must be 2d or 3d");var te=0,oe=0,_e=E(H,j.stride.slice());N==="float32"?te=P.FLOAT:N==="float64"?(te=P.FLOAT,_e=!1,N="float32"):N==="uint8"?te=P.UNSIGNED_BYTE:(te=P.UNSIGNED_BYTE,_e=!1,N="uint8");var Ee=1;if(H.length===2)oe=P.LUMINANCE,H=[H[0],H[1],1],j=s(j.data,H,[j.stride[0],j.stride[1],1],j.offset);else if(H.length===3){if(H[2]===1)oe=P.ALPHA;else if(H[2]===2)oe=P.LUMINANCE_ALPHA;else if(H[2]===3)oe=P.RGB;else if(H[2]===4)oe=P.RGBA;else throw new Error("gl-texture2d: Invalid shape for pixel coords");Ee=H[2]}else throw new Error("gl-texture2d: Invalid shape for texture");if((oe===P.LUMINANCE||oe===P.ALPHA)&&(V===P.LUMINANCE||V===P.ALPHA)&&(oe=V),oe!==V)throw new Error("gl-texture2d: Incompatible texture format for setPixels");var Ce=j.size,me=Z.indexOf(O)<0;if(me&&Z.push(O),te===G&&_e)j.offset===0&&j.data.length===Ce?me?P.texImage2D(P.TEXTURE_2D,O,V,H[0],H[1],0,V,G,j.data):P.texSubImage2D(P.TEXTURE_2D,O,T,z,H[0],H[1],V,G,j.data):me?P.texImage2D(P.TEXTURE_2D,O,V,H[0],H[1],0,V,G,j.data.subarray(j.offset,j.offset+Ce)):P.texSubImage2D(P.TEXTURE_2D,O,T,z,H[0],H[1],V,G,j.data.subarray(j.offset,j.offset+Ce));else{var ie;G===P.FLOAT?ie=u.mallocFloat32(Ce):ie=u.mallocUint8(Ce);var Se=s(ie,H,[H[2],H[2]*H[0],1]);te===P.FLOAT&&G===P.UNSIGNED_BYTE?_(Se,j):l.assign(Se,j),me?P.texImage2D(P.TEXTURE_2D,O,V,H[0],H[1],0,V,G,ie.subarray(0,Ce)):P.texSubImage2D(P.TEXTURE_2D,O,T,z,H[0],H[1],V,G,ie.subarray(0,Ce)),G===P.FLOAT?u.freeFloat32(ie):u.freeUint8(ie)}}function L(P){var T=P.createTexture();return P.bindTexture(P.TEXTURE_2D,T),P.texParameteri(P.TEXTURE_2D,P.TEXTURE_MIN_FILTER,P.NEAREST),P.texParameteri(P.TEXTURE_2D,P.TEXTURE_MAG_FILTER,P.NEAREST),P.texParameteri(P.TEXTURE_2D,P.TEXTURE_WRAP_S,P.CLAMP_TO_EDGE),P.texParameteri(P.TEXTURE_2D,P.TEXTURE_WRAP_T,P.CLAMP_TO_EDGE),T}function x(P,T,z,O,V){var G=P.getParameter(P.MAX_TEXTURE_SIZE);if(T<0||T>G||z<0||z>G)throw new Error("gl-texture2d: Invalid texture shape");if(V===P.FLOAT&&!P.getExtension("OES_texture_float"))throw new Error("gl-texture2d: Floating point textures not supported on this platform");var Z=L(P);return P.texImage2D(P.TEXTURE_2D,0,O,T,z,0,O,V,null),new p(P,Z,T,z,O,V)}function C(P,T,z,O,V,G){var Z=L(P);return P.texImage2D(P.TEXTURE_2D,0,V,V,G,T),new p(P,Z,z,O,V,G)}function M(P,T){var z=T.dtype,O=T.shape.slice(),V=P.getParameter(P.MAX_TEXTURE_SIZE);if(O[0]<0||O[0]>V||O[1]<0||O[1]>V)throw new Error("gl-texture2d: Invalid texture size");var G=E(O,T.stride.slice()),Z=0;z==="float32"?Z=P.FLOAT:z==="float64"?(Z=P.FLOAT,G=!1,z="float32"):z==="uint8"?Z=P.UNSIGNED_BYTE:(Z=P.UNSIGNED_BYTE,G=!1,z="uint8");var j=0;if(O.length===2)j=P.LUMINANCE,O=[O[0],O[1],1],T=s(T.data,O,[T.stride[0],T.stride[1],1],T.offset);else if(O.length===3)if(O[2]===1)j=P.ALPHA;else if(O[2]===2)j=P.LUMINANCE_ALPHA;else if(O[2]===3)j=P.RGB;else if(O[2]===4)j=P.RGBA;else throw new Error("gl-texture2d: Invalid shape for pixel coords");else throw new Error("gl-texture2d: Invalid shape for texture");Z===P.FLOAT&&!P.getExtension("OES_texture_float")&&(Z=P.UNSIGNED_BYTE,G=!1);var N,H,te=T.size;if(G)T.offset===0&&T.data.length===te?N=T.data:N=T.data.subarray(T.offset,T.offset+te);else{var oe=[O[2],O[2]*O[0],1];H=u.malloc(te,z);var _e=s(H,O,oe,0);(z==="float32"||z==="float64")&&Z===P.UNSIGNED_BYTE?_(_e,T):l.assign(_e,T),N=H.subarray(0,te)}var Ee=L(P);return P.texImage2D(P.TEXTURE_2D,0,j,O[0],O[1],0,j,Z,N),G||u.free(H),new p(P,Ee,O[0],O[1],j,Z)}function g(P){if(arguments.length<=1)throw new Error("gl-texture2d: Missing arguments for texture2d constructor");if(c||d(P),typeof arguments[1]=="number")return x(P,arguments[1],arguments[2],arguments[3]||P.RGBA,arguments[4]||P.UNSIGNED_BYTE);if(Array.isArray(arguments[1]))return x(P,arguments[1][0]|0,arguments[1][1]|0,arguments[2]||P.RGBA,arguments[3]||P.UNSIGNED_BYTE);if(typeof arguments[1]=="object"){var T=arguments[1],z=v(T)?T:T.raw;if(z)return C(P,z,T.width|0,T.height|0,arguments[2]||P.RGBA,arguments[3]||P.UNSIGNED_BYTE);if(T.shape&&T.data&&T.stride)return M(P,T)}throw new Error("gl-texture2d: Invalid arguments for texture2d constructor")}},7790:function(){},7815:function(i,a,o){"use strict";var s=o(2931),l=o(9970),u=["xyz","xzy","yxz","yzx","zxy","zyx"],c=function(S,L,x,C){for(var M=S.points,g=S.velocities,P=S.divergences,T=[],z=[],O=[],V=[],G=[],Z=[],j=0,N=0,H=l.create(),te=l.create(),oe=8,_e=0;_e<M.length;_e++){var Ee=M[_e],Ce=g[_e],me=P[_e];L===0&&(me=x*.05),N=s.length(Ce)/C,H=l.create(),s.copy(H,Ce),H[3]=me;for(var ie=0;ie<oe;ie++)G[ie]=[Ee[0],Ee[1],Ee[2],ie];if(V.length>0)for(var ie=0;ie<oe;ie++){var Se=(ie+1)%oe;T.push(V[ie],G[ie],G[Se],G[Se],V[Se],V[ie]),O.push(te,H,H,H,te,te),Z.push(j,N,N,N,j,j);var Le=T.length;z.push([Le-6,Le-5,Le-4],[Le-3,Le-2,Le-1])}var Ae=V;V=G,G=Ae;var Fe=te;te=H,H=Fe;var Pe=j;j=N,N=Pe}return{positions:T,cells:z,vectors:O,vertexIntensity:Z}},f=function(S,L,x,C){for(var M=0,g=0;g<S.length;g++)for(var P=S[g].velocities,T=0;T<P.length;T++)M=Math.max(M,s.length(P[T]));for(var z=S.map(function(_e){return c(_e,x,C,M)}),O=[],V=[],G=[],Z=[],g=0;g<z.length;g++){var j=z[g],N=O.length;O=O.concat(j.positions),G=G.concat(j.vectors),Z=Z.concat(j.vertexIntensity);for(var T=0;T<j.cells.length;T++){var H=j.cells[T],te=[];V.push(te);for(var oe=0;oe<H.length;oe++)te.push(H[oe]+N)}}return{positions:O,cells:V,vectors:G,vertexIntensity:Z,colormap:L}},h=function(S,L){var x=S.length,C;for(C=0;C<x;C++){var M=S[C];if(M===L)return C;if(M>L)return C-1}return C},d=function(S,L,x){return S<L?L:S>x?x:S},v=function(S,L,x){var C=L.vectors,M=L.meshgrid,g=S[0],P=S[1],T=S[2],z=M[0].length,O=M[1].length,V=M[2].length,G=h(M[0],g),Z=h(M[1],P),j=h(M[2],T),N=G+1,H=Z+1,te=j+1;if(G=d(G,0,z-1),N=d(N,0,z-1),Z=d(Z,0,O-1),H=d(H,0,O-1),j=d(j,0,V-1),te=d(te,0,V-1),G<0||Z<0||j<0||N>z-1||H>O-1||te>V-1)return s.create();var oe=M[0][G],_e=M[0][N],Ee=M[1][Z],Ce=M[1][H],me=M[2][j],ie=M[2][te],Se=(g-oe)/(_e-oe),Le=(P-Ee)/(Ce-Ee),Ae=(T-me)/(ie-me);isFinite(Se)||(Se=.5),isFinite(Le)||(Le=.5),isFinite(Ae)||(Ae=.5);var Fe,Pe,ge,Re,ce,Ze;switch(x.reversedX&&(G=z-1-G,N=z-1-N),x.reversedY&&(Z=O-1-Z,H=O-1-H),x.reversedZ&&(j=V-1-j,te=V-1-te),x.filled){case 5:ce=j,Ze=te,ge=Z*V,Re=H*V,Fe=G*V*O,Pe=N*V*O;break;case 4:ce=j,Ze=te,Fe=G*V,Pe=N*V,ge=Z*V*z,Re=H*V*z;break;case 3:ge=Z,Re=H,ce=j*O,Ze=te*O,Fe=G*O*V,Pe=N*O*V;break;case 2:ge=Z,Re=H,Fe=G*O,Pe=N*O,ce=j*O*z,Ze=te*O*z;break;case 1:Fe=G,Pe=N,ce=j*z,Ze=te*z,ge=Z*z*V,Re=H*z*V;break;default:Fe=G,Pe=N,ge=Z*z,Re=H*z,ce=j*z*O,Ze=te*z*O;break}var ut=C[Fe+ge+ce],pt=C[Fe+ge+Ze],Zt=C[Fe+Re+ce],st=C[Fe+Re+Ze],lt=C[Pe+ge+ce],Gt=C[Pe+ge+Ze],Nt=C[Pe+Re+ce],Jt=C[Pe+Re+Ze],sr=s.create(),wr=s.create(),cr=s.create(),$e=s.create();s.lerp(sr,ut,lt,Se),s.lerp(wr,pt,Gt,Se),s.lerp(cr,Zt,Nt,Se),s.lerp($e,st,Jt,Se);var St=s.create(),Qt=s.create();s.lerp(St,sr,cr,Le),s.lerp(Qt,wr,$e,Le);var Vt=s.create();return s.lerp(Vt,St,Qt,Ae),Vt},_=function(S,L){var x=L[0],C=L[1],M=L[2];return S[0]=x<0?-x:x,S[1]=C<0?-C:C,S[2]=M<0?-M:M,S},b=function(S){var L=1/0;S.sort(function(g,P){return g-P});for(var x=S.length,C=1;C<x;C++){var M=Math.abs(S[C]-S[C-1]);M<L&&(L=M)}return L},p=function(S){for(var L=[],x=[],C=[],M={},g={},P={},T=S.length,z=0;z<T;z++){var O=S[z],V=O[0],G=O[1],Z=O[2];M[V]||(L.push(V),M[V]=!0),g[G]||(x.push(G),g[G]=!0),P[Z]||(C.push(Z),P[Z]=!0)}var j=b(L),N=b(x),H=b(C),te=Math.min(j,N,H);return isFinite(te)?te:1};i.exports=function(S,L){var x=S.startingPositions,C=S.maxLength||1e3,M=S.tubeSize||1,g=S.absoluteTubeSize,P=S.gridFill||"+x+y+z",T={};P.indexOf("-x")!==-1&&(T.reversedX=!0),P.indexOf("-y")!==-1&&(T.reversedY=!0),P.indexOf("-z")!==-1&&(T.reversedZ=!0),T.filled=u.indexOf(P.replace(/-/g,"").replace(/\+/g,""));var z=S.getVelocity||function(Gt){return v(Gt,S,T)},O=S.getDivergence||function(Gt,Nt){var Jt=s.create(),sr=1e-4;s.add(Jt,Gt,[sr,0,0]);var wr=z(Jt);s.subtract(wr,wr,Nt),s.scale(wr,wr,1/sr),s.add(Jt,Gt,[0,sr,0]);var cr=z(Jt);s.subtract(cr,cr,Nt),s.scale(cr,cr,1/sr),s.add(Jt,Gt,[0,0,sr]);var $e=z(Jt);return s.subtract($e,$e,Nt),s.scale($e,$e,1/sr),s.add(Jt,wr,cr),s.add(Jt,Jt,$e),Jt},V=[],G=L[0][0],Z=L[0][1],j=L[0][2],N=L[1][0],H=L[1][1],te=L[1][2],oe=function(Gt){var Nt=Gt[0],Jt=Gt[1],sr=Gt[2];return!(Nt<G||Nt>N||Jt<Z||Jt>H||sr<j||sr>te)},_e=s.distance(L[0],L[1]),Ee=10*_e/C,Ce=Ee*Ee,me=1,ie=0,Se=x.length;Se>1&&(me=p(x));for(var Le=0;Le<Se;Le++){var Ae=s.create();s.copy(Ae,x[Le]);var Fe=[Ae],Pe=[],ge=z(Ae),Re=Ae;Pe.push(ge);var ce=[],Ze=O(Ae,ge),ut=s.length(Ze);isFinite(ut)&&ut>ie&&(ie=ut),ce.push(ut),V.push({points:Fe,velocities:Pe,divergences:ce});for(var pt=0;pt<C*100&&Fe.length<C&&oe(Ae);){pt++;var Zt=s.clone(ge),st=s.squaredLength(Zt);if(st===0)break;if(st>Ce&&s.scale(Zt,Zt,Ee/Math.sqrt(st)),s.add(Zt,Zt,Ae),ge=z(Zt),s.squaredDistance(Re,Zt)-Ce>-1e-4*Ce){Fe.push(Zt),Re=Zt,Pe.push(ge);var Ze=O(Zt,ge),ut=s.length(Ze);isFinite(ut)&&ut>ie&&(ie=ut),ce.push(ut)}Ae=Zt}}var lt=f(V,S.colormap,ie,me);return g?lt.tubeScale=g:(ie===0&&(ie=1),lt.tubeScale=M*.5*me/ie),lt};var k=o(6740),E=o(6405).createMesh;i.exports.createTubeMesh=function(S,L){return E(S,L,{shaders:k,traceType:"streamtube"})}},7827:function(i){i.exports=["<<=",">>=","++","--","<<",">>","<=",">=","==","!=","&&","||","+=","-=","*=","/=","%=","&=","^^","^=","|=","(",")","[","]",".","!","~","*","/","%","+","-","<",">","&","^","|","?",":","=",",",";","{","}"]},7842:function(i,a,o){"use strict";var s=o(6330),l=o(1533),u=o(2651),c=o(6768),f=o(869),h=o(8697);i.exports=d;function d(v,_){if(s(v))return _?h(v,d(_)):[v[0].clone(),v[1].clone()];var b=0,p,k;if(l(v))p=v.clone();else if(typeof v=="string")p=c(v);else{if(v===0)return[u(0),u(1)];if(v===Math.floor(v))p=u(v);else{for(;v!==Math.floor(v);)v=v*Math.pow(2,256),b-=256;p=u(v)}}if(s(_))p.mul(_[1]),k=_[0].clone();else if(l(_))k=_.clone();else if(typeof _=="string")k=c(_);else if(!_)k=u(1);else if(_===Math.floor(_))k=u(_);else{for(;_!==Math.floor(_);)_=_*Math.pow(2,256),b+=256;k=u(_)}return b>0?p=p.ushln(b):b<0&&(k=k.ushln(-b)),f(p,k)}},7894:function(i){i.exports=a;function a(o){return o[0]=1,o[1]=0,o[2]=0,o[3]=0,o[4]=0,o[5]=1,o[6]=0,o[7]=0,o[8]=0,o[9]=0,o[10]=1,o[11]=0,o[12]=0,o[13]=0,o[14]=0,o[15]=1,o}},7932:function(i,a,o){var s=o(620);i.exports=s.slice().concat(["layout","centroid","smooth","case","mat2x2","mat2x3","mat2x4","mat3x2","mat3x3","mat3x4","mat4x2","mat4x3","mat4x4","uvec2","uvec3","uvec4","samplerCubeShadow","sampler2DArray","sampler2DArrayShadow","isampler2D","isampler3D","isamplerCube","isampler2DArray","usampler2D","usampler3D","usamplerCube","usampler2DArray","coherent","restrict","readonly","writeonly","resource","atomic_uint","noperspective","patch","sample","subroutine","common","partition","active","filter","image1D","image2D","image3D","imageCube","iimage1D","iimage2D","iimage3D","iimageCube","uimage1D","uimage2D","uimage3D","uimageCube","image1DArray","image2DArray","iimage1DArray","iimage2DArray","uimage1DArray","uimage2DArray","image1DShadow","image2DShadow","image1DArrayShadow","image2DArrayShadow","imageBuffer","iimageBuffer","uimageBuffer","sampler1DArray","sampler1DArrayShadow","isampler1D","isampler1DArray","usampler1D","usampler1DArray","isampler2DRect","usampler2DRect","samplerBuffer","isamplerBuffer","usamplerBuffer","sampler2DMS","isampler2DMS","usampler2DMS","sampler2DMSArray","isampler2DMSArray","usampler2DMSArray"])},7960:function(i){i.exports=a;function a(o,s){var l=s[0]-o[0],u=s[1]-o[1],c=s[2]-o[2],f=s[3]-o[3];return l*l+u*u+c*c+f*f}},8105:function(i){"use strict";i.exports=o;var a={"lo===p0":s,"lo<p0":l,"lo<=p0":u,"hi<=p0":c,"lo<p0&&p0<=hi":h,"lo<=p0&&p0<=hi":f,"!(lo>=p0)&&!(p1>=hi)":d};function o(v){return a[v]}function s(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+g];if(z===S)if(M===T)M+=1,C+=L;else{for(var O=0;L>O;++O){var V=k[x+O];k[x+O]=k[C],k[C++]=V}var G=E[T];E[T]=E[M],E[M++]=G}}return M}function l(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+g];if(z<S)if(M===T)M+=1,C+=L;else{for(var O=0;L>O;++O){var V=k[x+O];k[x+O]=k[C],k[C++]=V}var G=E[T];E[T]=E[M],E[M++]=G}}return M}function u(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+P];if(z<=S)if(M===T)M+=1,C+=L;else{for(var O=0;L>O;++O){var V=k[x+O];k[x+O]=k[C],k[C++]=V}var G=E[T];E[T]=E[M],E[M++]=G}}return M}function c(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+P];if(z<=S)if(M===T)M+=1,C+=L;else{for(var O=0;L>O;++O){var V=k[x+O];k[x+O]=k[C],k[C++]=V}var G=E[T];E[T]=E[M],E[M++]=G}}return M}function f(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+g],O=k[x+P];if(z<=S&&S<=O)if(M===T)M+=1,C+=L;else{for(var V=0;L>V;++V){var G=k[x+V];k[x+V]=k[C],k[C++]=G}var Z=E[T];E[T]=E[M],E[M++]=Z}}return M}function h(v,_,b,p,k,E,S){for(var L=2*v,x=L*b,C=x,M=b,g=_,P=v+_,T=b;p>T;++T,x+=L){var z=k[x+g],O=k[x+P];if(z<S&&S<=O)if(M===T)M+=1,C+=L;else{for(var V=0;L>V;++V){var G=k[x+V];k[x+V]=k[C],k[C++]=G}var Z=E[T];E[T]=E[M],E[M++]=Z}}return M}function d(v,_,b,p,k,E,S,L){for(var x=2*v,C=x*b,M=C,g=b,P=_,T=v+_,z=b;p>z;++z,C+=x){var O=k[C+P],V=k[C+T];if(!(O>=S)&&!(L>=V))if(g===z)g+=1,M+=x;else{for(var G=0;x>G;++G){var Z=k[C+G];k[C+G]=k[M],k[M++]=Z}var j=E[z];E[z]=E[g],E[g++]=j}}return g}},8107:function(i){i.exports=a;function a(o,s,l){return o[0]=Math.min(s[0],l[0]),o[1]=Math.min(s[1],l[1]),o[2]=Math.min(s[2],l[2]),o}},8116:function(i,a,o){"use strict";var s=o(7518),l=o(870);function u(f){this.bindVertexArrayOES=f.bindVertexArray.bind(f),this.createVertexArrayOES=f.createVertexArray.bind(f),this.deleteVertexArrayOES=f.deleteVertexArray.bind(f)}function c(f,h,d,v){var _=f.createVertexArray?new u(f):f.getExtension("OES_vertex_array_object"),b;return _?b=s(f,_):b=l(f),b.update(h,d,v),b}i.exports=c},8192:function(i,a,o){i.exports=c;var s=o(2825),l=o(3536),u=o(244);function c(f,h){var d=s(f[0],f[1],f[2]),v=s(h[0],h[1],h[2]);l(d,d),l(v,v);var _=u(d,v);return _>1?0:Math.acos(_)}},8210:function(i){"use strict";i.exports=o;function a(s,l){var u=s+l,c=u-s,f=u-c,h=l-c,d=s-f,v=d+h;return v?[v,u]:[u]}function o(s,l){var u=s.length|0,c=l.length|0;if(u===1&&c===1)return a(s[0],l[0]);var f=u+c,h=new Array(f),d=0,v=0,_=0,b=Math.abs,p=s[v],k=b(p),E=l[_],S=b(E),L,x;k<S?(x=p,v+=1,v<u&&(p=s[v],k=b(p))):(x=E,_+=1,_<c&&(E=l[_],S=b(E))),v<u&&k<S||_>=c?(L=p,v+=1,v<u&&(p=s[v],k=b(p))):(L=E,_+=1,_<c&&(E=l[_],S=b(E)));for(var C=L+x,M=C-L,g=x-M,P=g,T=C,z,O,V,G,Z;v<u&&_<c;)k<S?(L=p,v+=1,v<u&&(p=s[v],k=b(p))):(L=E,_+=1,_<c&&(E=l[_],S=b(E))),x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z;for(;v<u;)L=p,x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z,v+=1,v<u&&(p=s[v]);for(;_<c;)L=E,x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z,_+=1,_<c&&(E=l[_]);return P&&(h[d++]=P),T&&(h[d++]=T),d||(h[d++]=0),h.length=d,h}},8277:function(i){"use strict";function a(){return function(f,h,d,v,_){var b=f[0],p=f[1],k=f[2],E=d[0],S=d[1],L=d[2],x=[0,0,0];v|=0;var C=0,M=0,g=0,P=L,T=S-k*L,z=E-p*S;for(g=0;g<b;++g){for(M=0;M<p;++M){for(C=0;C<k;++C){{var O=_,V;for(V=0;V<x.length-1;++V)O=O[x[V]];h[v]=O[x[x.length-1]]}v+=P,++x[2]}v+=T,x[2]-=k,++x[1]}v+=z,x[1]-=p,++x[0]}}}function o(){return a()}var s=o;function l(f){var h={};return function(v,_){var b=v.dtype,p=v.order,k=[b,p.join()].join(),E=h[k];return E||(h[k]=E=f([b,p])),E(v.shape.slice(0),v.data,v.stride,v.offset|0,_)}}function u(f){return l(s.bind(void 0,f))}function c(f){return u({funcName:f.funcName})}i.exports=c({funcName:"convert"})},8284:function(i){i.exports=a;function a(o,s){var l={identity:s},u=o.valueOf;return Object.defineProperty(o,"valueOf",{value:function(c){return c!==s?u.apply(this,arguments):l},writable:!0}),l}},8406:function(i,a){var o=1e-6,s=1e-6;a.vertexNormals=function(l,u,c){for(var f=u.length,h=new Array(f),d=c===void 0?o:c,v=0;v<f;++v)h[v]=[0,0,0];for(var v=0;v<l.length;++v)for(var _=l[v],b=0,p=_[_.length-1],k=_[0],E=0;E<_.length;++E){b=p,p=k,k=_[(E+1)%_.length];for(var S=u[b],L=u[p],x=u[k],C=new Array(3),M=0,g=new Array(3),P=0,T=0;T<3;++T)C[T]=S[T]-L[T],M+=C[T]*C[T],g[T]=x[T]-L[T],P+=g[T]*g[T];if(M*P>d)for(var z=h[p],O=1/Math.sqrt(M*P),T=0;T<3;++T){var V=(T+1)%3,G=(T+2)%3;z[T]+=O*(g[V]*C[G]-g[G]*C[V])}}for(var v=0;v<f;++v){for(var z=h[v],Z=0,T=0;T<3;++T)Z+=z[T]*z[T];if(Z>d)for(var O=1/Math.sqrt(Z),T=0;T<3;++T)z[T]*=O;else for(var T=0;T<3;++T)z[T]=0}return h},a.faceNormals=function(l,u,c){for(var f=l.length,h=new Array(f),d=c===void 0?s:c,v=0;v<f;++v){for(var _=l[v],b=new Array(3),p=0;p<3;++p)b[p]=u[_[p]];for(var k=new Array(3),E=new Array(3),p=0;p<3;++p)k[p]=b[1][p]-b[0][p],E[p]=b[2][p]-b[0][p];for(var S=new Array(3),L=0,p=0;p<3;++p){var x=(p+1)%3,C=(p+2)%3;S[p]=k[x]*E[C]-k[C]*E[x],L+=S[p]*S[p]}L>d?L=1/Math.sqrt(L):L=0;for(var p=0;p<3;++p)S[p]*=L;h[v]=S}return h}},8418:function(i,a,o){"use strict";var s=o(5219),l=o(2762),u=o(8116),c=o(1888),f=o(6760),h=o(1283),d=o(9366),v=o(5964),_=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1],b=ArrayBuffer,p=DataView;function k(Ae){return b.isView(Ae)&&!(Ae instanceof p)}function E(Ae){return Array.isArray(Ae)||k(Ae)}i.exports=Le;function S(Ae,Fe){var Pe=Ae[0],ge=Ae[1],Re=Ae[2],ce=Ae[3];return Ae[0]=Fe[0]*Pe+Fe[4]*ge+Fe[8]*Re+Fe[12]*ce,Ae[1]=Fe[1]*Pe+Fe[5]*ge+Fe[9]*Re+Fe[13]*ce,Ae[2]=Fe[2]*Pe+Fe[6]*ge+Fe[10]*Re+Fe[14]*ce,Ae[3]=Fe[3]*Pe+Fe[7]*ge+Fe[11]*Re+Fe[15]*ce,Ae}function L(Ae,Fe,Pe,ge){return S(ge,ge,Pe),S(ge,ge,Fe),S(ge,ge,Ae)}function x(Ae,Fe){this.index=Ae,this.dataCoordinate=this.position=Fe}function C(Ae){return Ae===!0||Ae>1?1:Ae}function M(Ae,Fe,Pe,ge,Re,ce,Ze,ut,pt,Zt,st,lt){this.gl=Ae,this.pixelRatio=1,this.shader=Fe,this.orthoShader=Pe,this.projectShader=ge,this.pointBuffer=Re,this.colorBuffer=ce,this.glyphBuffer=Ze,this.idBuffer=ut,this.vao=pt,this.vertexCount=0,this.lineVertexCount=0,this.opacity=1,this.hasAlpha=!1,this.lineWidth=0,this.projectScale=[.6666666666666666,.6666666666666666,.6666666666666666],this.projectOpacity=[1,1,1],this.projectHasAlpha=!1,this.pickId=0,this.pickPerspectiveShader=Zt,this.pickOrthoShader=st,this.pickProjectShader=lt,this.points=[],this._selectResult=new x(0,[0,0,0]),this.useOrtho=!0,this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.axesProject=[!0,!0,!0],this.axesBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.highlightId=[1,1,1,1],this.highlightScale=2,this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.dirty=!0}var g=M.prototype;g.pickSlots=1,g.setPickBase=function(Ae){this.pickId=Ae},g.isTransparent=function(){if(this.hasAlpha)return!0;for(var Ae=0;Ae<3;++Ae)if(this.axesProject[Ae]&&this.projectHasAlpha)return!0;return!1},g.isOpaque=function(){if(!this.hasAlpha)return!0;for(var Ae=0;Ae<3;++Ae)if(this.axesProject[Ae]&&!this.projectHasAlpha)return!0;return!1};var P=[0,0],T=[0,0,0],z=[0,0,0],O=[0,0,0,1],V=[0,0,0,1],G=_.slice(),Z=[0,0,0],j=[[0,0,0],[0,0,0]];function N(Ae){return Ae[0]=Ae[1]=Ae[2]=0,Ae}function H(Ae,Fe){return Ae[0]=Fe[0],Ae[1]=Fe[1],Ae[2]=Fe[2],Ae[3]=1,Ae}function te(Ae,Fe,Pe,ge){return Ae[0]=Fe[0],Ae[1]=Fe[1],Ae[2]=Fe[2],Ae[Pe]=ge,Ae}function oe(Ae){for(var Fe=j,Pe=0;Pe<2;++Pe)for(var ge=0;ge<3;++ge)Fe[Pe][ge]=Math.max(Math.min(Ae[Pe][ge],1e8),-1e8);return Fe}function _e(Ae,Fe,Pe,ge){var Re=Fe.axesProject,ce=Fe.gl,Ze=Ae.uniforms,ut=Pe.model||_,pt=Pe.view||_,Zt=Pe.projection||_,st=Fe.axesBounds,lt=oe(Fe.clipBounds),Gt;Fe.axes&&Fe.axes.lastCubeProps?Gt=Fe.axes.lastCubeProps.axis:Gt=[1,1,1],P[0]=2/ce.drawingBufferWidth,P[1]=2/ce.drawingBufferHeight,Ae.bind(),Ze.view=pt,Ze.projection=Zt,Ze.screenSize=P,Ze.highlightId=Fe.highlightId,Ze.highlightScale=Fe.highlightScale,Ze.clipBounds=lt,Ze.pickGroup=Fe.pickId/255,Ze.pixelRatio=ge;for(var Nt=0;Nt<3;++Nt)if(Re[Nt]){Ze.scale=Fe.projectScale[Nt],Ze.opacity=Fe.projectOpacity[Nt];for(var Jt=G,sr=0;sr<16;++sr)Jt[sr]=0;for(var sr=0;sr<4;++sr)Jt[5*sr]=1;Jt[5*Nt]=0,Gt[Nt]<0?Jt[12+Nt]=st[0][Nt]:Jt[12+Nt]=st[1][Nt],f(Jt,ut,Jt),Ze.model=Jt;var wr=(Nt+1)%3,cr=(Nt+2)%3,$e=N(T),St=N(z);$e[wr]=1,St[cr]=1;var Qt=L(Zt,pt,ut,H(O,$e)),Vt=L(Zt,pt,ut,H(V,St));if(Math.abs(Qt[1])>Math.abs(Vt[1])){var _t=Qt;Qt=Vt,Vt=_t,_t=$e,$e=St,St=_t;var It=wr;wr=cr,cr=It}Qt[0]<0&&($e[wr]=-1),Vt[1]>0&&(St[cr]=-1);for(var mt=0,er=0,sr=0;sr<4;++sr)mt+=Math.pow(ut[4*wr+sr],2),er+=Math.pow(ut[4*cr+sr],2);$e[wr]/=Math.sqrt(mt),St[cr]/=Math.sqrt(er),Ze.axes[0]=$e,Ze.axes[1]=St,Ze.fragClipBounds[0]=te(Z,lt[0],Nt,-1e8),Ze.fragClipBounds[1]=te(Z,lt[1],Nt,1e8),Fe.vao.bind(),Fe.vao.draw(ce.TRIANGLES,Fe.vertexCount),Fe.lineWidth>0&&(ce.lineWidth(Fe.lineWidth*ge),Fe.vao.draw(ce.LINES,Fe.lineVertexCount,Fe.vertexCount)),Fe.vao.unbind()}}var Ee=[-1e8,-1e8,-1e8],Ce=[1e8,1e8,1e8],me=[Ee,Ce];function ie(Ae,Fe,Pe,ge,Re,ce,Ze){var ut=Pe.gl;if((ce===Pe.projectHasAlpha||Ze)&&_e(Fe,Pe,ge,Re),ce===Pe.hasAlpha||Ze){Ae.bind();var pt=Ae.uniforms;pt.model=ge.model||_,pt.view=ge.view||_,pt.projection=ge.projection||_,P[0]=2/ut.drawingBufferWidth,P[1]=2/ut.drawingBufferHeight,pt.screenSize=P,pt.highlightId=Pe.highlightId,pt.highlightScale=Pe.highlightScale,pt.fragClipBounds=me,pt.clipBounds=Pe.axes.bounds,pt.opacity=Pe.opacity,pt.pickGroup=Pe.pickId/255,pt.pixelRatio=Re,Pe.vao.bind(),Pe.vao.draw(ut.TRIANGLES,Pe.vertexCount),Pe.lineWidth>0&&(ut.lineWidth(Pe.lineWidth*Re),Pe.vao.draw(ut.LINES,Pe.lineVertexCount,Pe.vertexCount)),Pe.vao.unbind()}}g.draw=function(Ae){var Fe=this.useOrtho?this.orthoShader:this.shader;ie(Fe,this.projectShader,this,Ae,this.pixelRatio,!1,!1)},g.drawTransparent=function(Ae){var Fe=this.useOrtho?this.orthoShader:this.shader;ie(Fe,this.projectShader,this,Ae,this.pixelRatio,!0,!1)},g.drawPick=function(Ae){var Fe=this.useOrtho?this.pickOrthoShader:this.pickPerspectiveShader;ie(Fe,this.pickProjectShader,this,Ae,1,!0,!0)},g.pick=function(Ae){if(!Ae||Ae.id!==this.pickId)return null;var Fe=Ae.value[2]+(Ae.value[1]<<8)+(Ae.value[0]<<16);if(Fe>=this.pointCount||Fe<0)return null;var Pe=this.points[Fe],ge=this._selectResult;ge.index=Fe;for(var Re=0;Re<3;++Re)ge.position[Re]=ge.dataCoordinate[Re]=Pe[Re];return ge},g.highlight=function(Ae){if(!Ae)this.highlightId=[1,1,1,1];else{var Fe=Ae.index,Pe=Fe&255,ge=Fe>>8&255,Re=Fe>>16&255;this.highlightId=[Pe/255,ge/255,Re/255,0]}};function Se(Ae,Fe,Pe,ge){var Re;E(Ae)?Fe<Ae.length?Re=Ae[Fe]:Re=void 0:Re=Ae,Re=v(Re);var ce=!0;s(Re)&&(Re="\u25BC",ce=!1),Pe||(Pe={});var Ze=Pe.family;E(Ze)&&(Ze=Ze[Fe]),Ze||(Ze="normal");var ut=Pe.weight;E(ut)&&(ut=ut[Fe]),ut||(ut="normal");var pt=Pe.style;E(pt)&&(pt=pt[Fe]),pt||(pt="normal");var Zt=Pe.variant;E(Zt)&&(Zt=Zt[Fe]),Zt||(Zt="normal");var st=d(Re,{family:Ze,weight:ut,style:pt,variant:Zt},ge),st=d(Re,Pe,ge);return{mesh:st[0],lines:st[1],bounds:st[2],visible:ce}}g.update=function(Ae){if(Ae=Ae||{},"perspective"in Ae&&(this.useOrtho=!Ae.perspective),"orthographic"in Ae&&(this.useOrtho=!!Ae.orthographic),"lineWidth"in Ae&&(this.lineWidth=Ae.lineWidth),"project"in Ae)if(E(Ae.project))this.axesProject=Ae.project;else{var Fe=!!Ae.project;this.axesProject=[Fe,Fe,Fe]}if("projectScale"in Ae)if(E(Ae.projectScale))this.projectScale=Ae.projectScale.slice();else{var Pe=+Ae.projectScale;this.projectScale=[Pe,Pe,Pe]}if(this.projectHasAlpha=!1,"projectOpacity"in Ae){if(E(Ae.projectOpacity))this.projectOpacity=Ae.projectOpacity.slice();else{var Pe=+Ae.projectOpacity;this.projectOpacity=[Pe,Pe,Pe]}for(var ge=0;ge<3;++ge)this.projectOpacity[ge]=C(this.projectOpacity[ge]),this.projectOpacity[ge]<1&&(this.projectHasAlpha=!0)}this.hasAlpha=!1,"opacity"in Ae&&(this.opacity=C(Ae.opacity),this.opacity<1&&(this.hasAlpha=!0)),this.dirty=!0;var Re=Ae.position,ce={family:Ae.font||"normal",style:Ae.fontStyle||"normal",weight:Ae.fontWeight||"normal",variant:Ae.fontVariant||"normal"},Ze=Ae.alignment||[0,0],ut,pt;if(Ze.length===2)ut=Ze[0],pt=Ze[1];else{ut=[],pt=[];for(var ge=0;ge<Ze.length;++ge)ut[ge]=Ze[ge][0],pt[ge]=Ze[ge][1]}var Zt=[1/0,1/0,1/0],st=[-1/0,-1/0,-1/0],lt=Ae.glyph,Gt=Ae.color,Nt=Ae.size,Jt=Ae.angle,sr=Ae.lineColor,wr=-1,cr=0,$e=0,St=0;if(Re.length){St=Re.length;e:for(var ge=0;ge<St;++ge){for(var Qt=Re[ge],Vt=0;Vt<3;++Vt)if(isNaN(Qt[Vt])||!isFinite(Qt[Vt]))continue e;var _t=Se(lt,ge,ce,this.pixelRatio),It=_t.mesh,mt=_t.lines,er=_t.bounds;cr+=It.cells.length*3,$e+=mt.edges.length*2}}var lr=cr+$e,Tr=c.mallocFloat(3*lr),Lr=c.mallocFloat(4*lr),ti=c.mallocFloat(2*lr),Br=c.mallocUint32(lr);if(lr>0){var Vr=0,dt=cr,Ge=[0,0,0,1],Je=[0,0,0,1],je=E(Gt)&&E(Gt[0]),tt=E(sr)&&E(sr[0]);e:for(var ge=0;ge<St;++ge){wr+=1;for(var Qt=Re[ge],Vt=0;Vt<3;++Vt){if(isNaN(Qt[Vt])||!isFinite(Qt[Vt]))continue e;st[Vt]=Math.max(st[Vt],Qt[Vt]),Zt[Vt]=Math.min(Zt[Vt],Qt[Vt])}var _t=Se(lt,ge,ce,this.pixelRatio),It=_t.mesh,mt=_t.lines,er=_t.bounds,xt=_t.visible;if(!xt)Ge=[1,1,1,0];else if(E(Gt)){var Ie;if(je?ge<Gt.length?Ie=Gt[ge]:Ie=[0,0,0,0]:Ie=Gt,Ie.length===3){for(var Vt=0;Vt<3;++Vt)Ge[Vt]=Ie[Vt];Ge[3]=1}else if(Ie.length===4){for(var Vt=0;Vt<4;++Vt)Ge[Vt]=Ie[Vt];!this.hasAlpha&&Ie[3]<1&&(this.hasAlpha=!0)}}else Ge[0]=Ge[1]=Ge[2]=0,Ge[3]=1;if(!xt)Je=[1,1,1,0];else if(E(sr)){var Ie;if(tt?ge<sr.length?Ie=sr[ge]:Ie=[0,0,0,0]:Ie=sr,Ie.length===3){for(var Vt=0;Vt<3;++Vt)Je[Vt]=Ie[Vt];Je[Vt]=1}else if(Ie.length===4){for(var Vt=0;Vt<4;++Vt)Je[Vt]=Ie[Vt];!this.hasAlpha&&Ie[3]<1&&(this.hasAlpha=!0)}}else Je[0]=Je[1]=Je[2]=0,Je[3]=1;var xe=.5;xt?E(Nt)?ge<Nt.length?xe=+Nt[ge]:xe=12:Nt?xe=+Nt:this.useOrtho&&(xe=12):xe=0;var ke=0;E(Jt)?ge<Jt.length?ke=+Jt[ge]:ke=0:Jt&&(ke=+Jt);for(var vt=Math.cos(ke),ir=Math.sin(ke),Qt=Re[ge],Vt=0;Vt<3;++Vt)st[Vt]=Math.max(st[Vt],Qt[Vt]),Zt[Vt]=Math.min(Zt[Vt],Qt[Vt]);var ar=ut,vr=pt,ar=0;E(ut)?ge<ut.length?ar=ut[ge]:ar=0:ut&&(ar=ut);var vr=0;E(pt)?ge<pt.length?vr=pt[ge]:vr=0:pt&&(vr=pt),ar*=ar>0?1-er[0][0]:ar<0?1+er[1][0]:1,vr*=vr>0?1-er[0][1]:vr<0?1+er[1][1]:1;for(var ii=[ar,vr],In=It.cells||[],wi=It.positions||[],Vt=0;Vt<In.length;++Vt)for(var pi=In[Vt],$r=0;$r<3;++$r){for(var di=0;di<3;++di)Tr[3*Vr+di]=Qt[di];for(var di=0;di<4;++di)Lr[4*Vr+di]=Ge[di];Br[Vr]=wr;var ji=wi[pi[$r]];ti[2*Vr]=xe*(vt*ji[0]-ir*ji[1]+ii[0]),ti[2*Vr+1]=xe*(ir*ji[0]+vt*ji[1]+ii[1]),Vr+=1}for(var In=mt.edges,wi=mt.positions,Vt=0;Vt<In.length;++Vt)for(var pi=In[Vt],$r=0;$r<2;++$r){for(var di=0;di<3;++di)Tr[3*dt+di]=Qt[di];for(var di=0;di<4;++di)Lr[4*dt+di]=Je[di];Br[dt]=wr;var ji=wi[pi[$r]];ti[2*dt]=xe*(vt*ji[0]-ir*ji[1]+ii[0]),ti[2*dt+1]=xe*(ir*ji[0]+vt*ji[1]+ii[1]),dt+=1}}}this.bounds=[Zt,st],this.points=Re,this.pointCount=Re.length,this.vertexCount=cr,this.lineVertexCount=$e,this.pointBuffer.update(Tr),this.colorBuffer.update(Lr),this.glyphBuffer.update(ti),this.idBuffer.update(Br),c.free(Tr),c.free(Lr),c.free(ti),c.free(Br)},g.dispose=function(){this.shader.dispose(),this.orthoShader.dispose(),this.pickPerspectiveShader.dispose(),this.pickOrthoShader.dispose(),this.vao.dispose(),this.pointBuffer.dispose(),this.colorBuffer.dispose(),this.glyphBuffer.dispose(),this.idBuffer.dispose()};function Le(Ae){var Fe=Ae.gl,Pe=h.createPerspective(Fe),ge=h.createOrtho(Fe),Re=h.createProject(Fe),ce=h.createPickPerspective(Fe),Ze=h.createPickOrtho(Fe),ut=h.createPickProject(Fe),pt=l(Fe),Zt=l(Fe),st=l(Fe),lt=l(Fe),Gt=u(Fe,[{buffer:pt,size:3,type:Fe.FLOAT},{buffer:Zt,size:4,type:Fe.FLOAT},{buffer:st,size:2,type:Fe.FLOAT},{buffer:lt,size:4,type:Fe.UNSIGNED_BYTE,normalized:!0}]),Nt=new M(Fe,Pe,ge,Re,pt,Zt,st,lt,Gt,ce,Ze,ut);return Nt.update(Ae),Nt}},8489:function(i){i.exports=a;function a(o,s,l,u){return o[0]=s[0]+l[0]*u,o[1]=s[1]+l[1]*u,o[2]=s[2]+l[2]*u,o}},8507:function(i){i.exports=s;var a=Math.min;function o(l,u){return l-u}function s(l,u){var c=l.length,f=l.length-u.length;if(f)return f;switch(c){case 0:return 0;case 1:return l[0]-u[0];case 2:return l[0]+l[1]-u[0]-u[1]||a(l[0],l[1])-a(u[0],u[1]);case 3:var h=l[0]+l[1],d=u[0]+u[1];if(f=h+l[2]-(d+u[2]),f)return f;var v=a(l[0],l[1]),_=a(u[0],u[1]);return a(v,l[2])-a(_,u[2])||a(v+l[2],h)-a(_+u[2],d);case 4:var b=l[0],p=l[1],k=l[2],E=l[3],S=u[0],L=u[1],x=u[2],C=u[3];return b+p+k+E-(S+L+x+C)||a(b,p,k,E)-a(S,L,x,C,S)||a(b+p,b+k,b+E,p+k,p+E,k+E)-a(S+L,S+x,S+C,L+x,L+C,x+C)||a(b+p+k,b+p+E,b+k+E,p+k+E)-a(S+L+x,S+L+C,S+x+C,L+x+C);default:for(var M=l.slice().sort(o),g=u.slice().sort(o),P=0;P<c;++P)if(f=M[P]-g[P],f)return f;return 0}}},8512:function(i,a,o){"use strict";var s=o(665);i.exports=l;function l(u,c,f){typeof u=="function"&&(f=!!c,c=u,u=window);var h=s("ex",u),d=function(v){f&&v.preventDefault();var _=v.deltaX||0,b=v.deltaY||0,p=v.deltaZ||0,k=v.deltaMode,E=1;switch(k){case 1:E=h;break;case 2:E=window.innerHeight;break}if(_*=E,b*=E,p*=E,_||b||p)return c(_,b,p,v)};return u.addEventListener("wheel",d),d}},8545:function(i){"use strict";i.exports=o;function a(s,l){var u=s+l,c=u-s,f=u-c,h=l-c,d=s-f,v=d+h;return v?[v,u]:[u]}function o(s,l){var u=s.length|0,c=l.length|0;if(u===1&&c===1)return a(s[0],-l[0]);var f=u+c,h=new Array(f),d=0,v=0,_=0,b=Math.abs,p=s[v],k=b(p),E=-l[_],S=b(E),L,x;k<S?(x=p,v+=1,v<u&&(p=s[v],k=b(p))):(x=E,_+=1,_<c&&(E=-l[_],S=b(E))),v<u&&k<S||_>=c?(L=p,v+=1,v<u&&(p=s[v],k=b(p))):(L=E,_+=1,_<c&&(E=-l[_],S=b(E)));for(var C=L+x,M=C-L,g=x-M,P=g,T=C,z,O,V,G,Z;v<u&&_<c;)k<S?(L=p,v+=1,v<u&&(p=s[v],k=b(p))):(L=E,_+=1,_<c&&(E=-l[_],S=b(E))),x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z;for(;v<u;)L=p,x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z,v+=1,v<u&&(p=s[v]);for(;_<c;)L=E,x=P,C=L+x,M=C-L,g=x-M,g&&(h[d++]=g),z=T+C,O=z-T,V=z-O,G=C-O,Z=T-V,P=Z+G,T=z,_+=1,_<c&&(E=-l[_]);return P&&(h[d++]=P),T&&(h[d++]=T),d||(h[d++]=0),h.length=d,h}},8572:function(i){"use strict";i.exports=function(o){return o<0?-1:o>0?1:0}},8648:function(i,a,o){i.exports=o(783)},8692:function(i){i.exports=a;function a(o,s,l,u){var c=l[0],f=l[1],h=s[0]-c,d=s[1]-f,v=Math.sin(u),_=Math.cos(u);return o[0]=c+h*_-d*v,o[1]=f+h*v+d*_,o[2]=s[2],o}},8697:function(i,a,o){"use strict";var s=o(869);i.exports=l;function l(u,c){return s(u[0].mul(c[1]),u[1].mul(c[0]))}},8731:function(i,a,o){"use strict";i.exports=d;var s=o(8866);function l(v,_,b,p,k,E){this._gl=v,this._wrapper=_,this._index=b,this._locations=p,this._dimension=k,this._constFunc=E}var u=l.prototype;u.pointer=function(_,b,p,k){var E=this,S=E._gl,L=E._locations[E._index];S.vertexAttribPointer(L,E._dimension,_||S.FLOAT,!!b,p||0,k||0),S.enableVertexAttribArray(L)},u.set=function(v,_,b,p){return this._constFunc(this._locations[this._index],v,_,b,p)},Object.defineProperty(u,"location",{get:function(){return this._locations[this._index]},set:function(v){return v!==this._locations[this._index]&&(this._locations[this._index]=v|0,this._wrapper.program=null),v|0}});var c=[function(v,_,b){return b.length===void 0?v.vertexAttrib1f(_,b):v.vertexAttrib1fv(_,b)},function(v,_,b,p){return b.length===void 0?v.vertexAttrib2f(_,b,p):v.vertexAttrib2fv(_,b)},function(v,_,b,p,k){return b.length===void 0?v.vertexAttrib3f(_,b,p,k):v.vertexAttrib3fv(_,b)},function(v,_,b,p,k,E){return b.length===void 0?v.vertexAttrib4f(_,b,p,k,E):v.vertexAttrib4fv(_,b)}];function f(v,_,b,p,k,E,S){var L=c[k],x=new l(v,_,b,p,k,L);Object.defineProperty(E,S,{set:function(C){return v.disableVertexAttribArray(p[b]),L(v,p[b],C),C},get:function(){return x},enumerable:!0})}function h(v,_,b,p,k,E,S){for(var L=new Array(k),x=new Array(k),C=0;C<k;++C)f(v,_,b[C],p,k,L,C),x[C]=L[C];Object.defineProperty(L,"location",{set:function(P){if(Array.isArray(P))for(var T=0;T<k;++T)x[T].location=P[T];else for(var T=0;T<k;++T)x[T].location=P+T;return P},get:function(){for(var P=new Array(k),T=0;T<k;++T)P[T]=p[b[T]];return P},enumerable:!0}),L.pointer=function(P,T,z,O){P=P||v.FLOAT,T=!!T,z=z||k*k,O=O||0;for(var V=0;V<k;++V){var G=p[b[V]];v.vertexAttribPointer(G,k,P,T,z,O+V*k),v.enableVertexAttribArray(G)}};var M=new Array(k),g=v["vertexAttrib"+k+"fv"];Object.defineProperty(E,S,{set:function(P){for(var T=0;T<k;++T){var z=p[b[T]];if(v.disableVertexAttribArray(z),Array.isArray(P[0]))g.call(v,z,P[T]);else{for(var O=0;O<k;++O)M[O]=P[k*T+O];g.call(v,z,M)}}return P},get:function(){return L},enumerable:!0})}function d(v,_,b,p){for(var k={},E=0,S=b.length;E<S;++E){var L=b[E],x=L.name,C=L.type,M=L.locations;switch(C){case"bool":case"int":case"float":f(v,_,M[0],p,1,k,x);break;default:if(C.indexOf("vec")>=0){var g=C.charCodeAt(C.length-1)-48;if(g<2||g>4)throw new s("","Invalid data type for attribute "+x+": "+C);f(v,_,M[0],p,g,k,x)}else if(C.indexOf("mat")>=0){var g=C.charCodeAt(C.length-1)-48;if(g<2||g>4)throw new s("","Invalid data type for attribute "+x+": "+C);h(v,_,M,p,g,k,x)}else throw new s("","Unknown data type for attribute "+x+": "+C);break}}return k}},8828:function(i,a){"use strict";"use restrict";var o=32;a.INT_BITS=o,a.INT_MAX=2147483647,a.INT_MIN=-1<<o-1,a.sign=function(u){return(u>0)-(u<0)},a.abs=function(u){var c=u>>o-1;return(u^c)-c},a.min=function(u,c){return c^(u^c)&-(u<c)},a.max=function(u,c){return u^(u^c)&-(u<c)},a.isPow2=function(u){return!(u&u-1)&&!!u},a.log2=function(u){var c,f;return c=(u>65535)<<4,u>>>=c,f=(u>255)<<3,u>>>=f,c|=f,f=(u>15)<<2,u>>>=f,c|=f,f=(u>3)<<1,u>>>=f,c|=f,c|u>>1},a.log10=function(u){return u>=1e9?9:u>=1e8?8:u>=1e7?7:u>=1e6?6:u>=1e5?5:u>=1e4?4:u>=1e3?3:u>=100?2:u>=10?1:0},a.popCount=function(u){return u=u-(u>>>1&1431655765),u=(u&858993459)+(u>>>2&858993459),(u+(u>>>4)&252645135)*16843009>>>24};function s(u){var c=32;return u&=-u,u&&c--,u&65535&&(c-=16),u&16711935&&(c-=8),u&252645135&&(c-=4),u&858993459&&(c-=2),u&1431655765&&(c-=1),c}a.countTrailingZeros=s,a.nextPow2=function(u){return u+=u===0,--u,u|=u>>>1,u|=u>>>2,u|=u>>>4,u|=u>>>8,u|=u>>>16,u+1},a.prevPow2=function(u){return u|=u>>>1,u|=u>>>2,u|=u>>>4,u|=u>>>8,u|=u>>>16,u-(u>>>1)},a.parity=function(u){return u^=u>>>16,u^=u>>>8,u^=u>>>4,u&=15,27030>>>u&1};var l=new Array(256);(function(u){for(var c=0;c<256;++c){var f=c,h=c,d=7;for(f>>>=1;f;f>>>=1)h<<=1,h|=f&1,--d;u[c]=h<<d&255}})(l),a.reverse=function(u){return l[u&255]<<24|l[u>>>8&255]<<16|l[u>>>16&255]<<8|l[u>>>24&255]},a.interleave2=function(u,c){return u&=65535,u=(u|u<<8)&16711935,u=(u|u<<4)&252645135,u=(u|u<<2)&858993459,u=(u|u<<1)&1431655765,c&=65535,c=(c|c<<8)&16711935,c=(c|c<<4)&252645135,c=(c|c<<2)&858993459,c=(c|c<<1)&1431655765,u|c<<1},a.deinterleave2=function(u,c){return u=u>>>c&1431655765,u=(u|u>>>1)&858993459,u=(u|u>>>2)&252645135,u=(u|u>>>4)&16711935,u=(u|u>>>16)&65535,u<<16>>16},a.interleave3=function(u,c,f){return u&=1023,u=(u|u<<16)&4278190335,u=(u|u<<8)&251719695,u=(u|u<<4)&3272356035,u=(u|u<<2)&1227133513,c&=1023,c=(c|c<<16)&4278190335,c=(c|c<<8)&251719695,c=(c|c<<4)&3272356035,c=(c|c<<2)&1227133513,u|=c<<1,f&=1023,f=(f|f<<16)&4278190335,f=(f|f<<8)&251719695,f=(f|f<<4)&3272356035,f=(f|f<<2)&1227133513,u|f<<2},a.deinterleave3=function(u,c){return u=u>>>c&1227133513,u=(u|u>>>2)&3272356035,u=(u|u>>>4)&251719695,u=(u|u>>>8)&4278190335,u=(u|u>>>16)&1023,u<<22>>22},a.nextCombination=function(u){var c=u|u-1;return c+1|(~c&-~c)-1>>>s(u)+1}},8866:function(i){function a(o,s,l){this.shortMessage=s||"",this.longMessage=l||"",this.rawError=o||"",this.message="gl-shader: "+(s||o||"")+(l?`
+`+l:""),this.stack=new Error().stack}a.prototype=new Error,a.prototype.name="GLError",a.prototype.constructor=a,i.exports=a},8902:function(i,a,o){"use strict";var s=o(2478),l=o(3250)[3],u=0,c=1,f=2;i.exports=S;function h(L,x,C,M,g){this.a=L,this.b=x,this.idx=C,this.lowerIds=M,this.upperIds=g}function d(L,x,C,M){this.a=L,this.b=x,this.type=C,this.idx=M}function v(L,x){var C=L.a[0]-x.a[0]||L.a[1]-x.a[1]||L.type-x.type;return C||L.type!==u&&(C=l(L.a,L.b,x.b),C)?C:L.idx-x.idx}function _(L,x){return l(L.a,L.b,x)}function b(L,x,C,M,g){for(var P=s.lt(x,M,_),T=s.gt(x,M,_),z=P;z<T;++z){for(var O=x[z],V=O.lowerIds,Z=V.length;Z>1&&l(C[V[Z-2]],C[V[Z-1]],M)>0;)L.push([V[Z-1],V[Z-2],g]),Z-=1;V.length=Z,V.push(g);for(var G=O.upperIds,Z=G.length;Z>1&&l(C[G[Z-2]],C[G[Z-1]],M)<0;)L.push([G[Z-2],G[Z-1],g]),Z-=1;G.length=Z,G.push(g)}}function p(L,x){var C;return L.a[0]<x.a[0]?C=l(L.a,L.b,x.a):C=l(x.b,x.a,L.a),C||(x.b[0]<L.b[0]?C=l(L.a,L.b,x.b):C=l(x.b,x.a,L.b),C||L.idx-x.idx)}function k(L,x,C){var M=s.le(L,C,p),g=L[M],P=g.upperIds,T=P[P.length-1];g.upperIds=[T],L.splice(M+1,0,new h(C.a,C.b,C.idx,[T],P))}function E(L,x,C){var M=C.a;C.a=C.b,C.b=M;var g=s.eq(L,C,p),P=L[g],T=L[g-1];T.upperIds=P.upperIds,L.splice(g,1)}function S(L,x){for(var C=L.length,M=x.length,g=[],P=0;P<C;++P)g.push(new d(L[P],null,u,P));for(var P=0;P<M;++P){var T=x[P],z=L[T[0]],O=L[T[1]];z[0]<O[0]?g.push(new d(z,O,f,P),new d(O,z,c,P)):z[0]>O[0]&&g.push(new d(O,z,f,P),new d(z,O,c,P))}g.sort(v);for(var V=g[0].a[0]-(1+Math.abs(g[0].a[0]))*Math.pow(2,-52),G=[new h([V,1],[V,0],-1,[],[],[],[])],Z=[],P=0,j=g.length;P<j;++P){var N=g[P],H=N.type;H===u?b(Z,G,L,N.a,N.idx):H===f?k(G,L,N):E(G,L,N)}return Z}},8954:function(i,a,o){"use strict";i.exports=p;var s=o(3250),l=o(6803).Fw;function u(k,E,S){this.vertices=k,this.adjacent=E,this.boundary=S,this.lastVisited=-1}u.prototype.flip=function(){var k=this.vertices[0];this.vertices[0]=this.vertices[1],this.vertices[1]=k;var E=this.adjacent[0];this.adjacent[0]=this.adjacent[1],this.adjacent[1]=E};function c(k,E,S){this.vertices=k,this.cell=E,this.index=S}function f(k,E){return l(k.vertices,E.vertices)}function h(k){return function(){var E=this.tuple;return k.apply(this,E)}}function d(k){var E=s[k+1];return E||(E=s),h(E)}var v=[];function _(k,E,S){this.dimension=k,this.vertices=E,this.simplices=S,this.interior=S.filter(function(C){return!C.boundary}),this.tuple=new Array(k+1);for(var L=0;L<=k;++L)this.tuple[L]=this.vertices[L];var x=v[k];x||(x=v[k]=d(k)),this.orient=x}var b=_.prototype;b.handleBoundaryDegeneracy=function(k,E){var S=this.dimension,L=this.vertices.length-1,x=this.tuple,C=this.vertices,M=[k];for(k.lastVisited=-L;M.length>0;){k=M.pop();for(var g=k.adjacent,P=0;P<=S;++P){var T=g[P];if(!(!T.boundary||T.lastVisited<=-L)){for(var z=T.vertices,O=0;O<=S;++O){var V=z[O];V<0?x[O]=E:x[O]=C[V]}var G=this.orient();if(G>0)return T;T.lastVisited=-L,G===0&&M.push(T)}}}return null},b.walk=function(k,E){var S=this.vertices.length-1,L=this.dimension,x=this.vertices,C=this.tuple,M=E?this.interior.length*Math.random()|0:this.interior.length-1,g=this.interior[M];e:for(;!g.boundary;){for(var P=g.vertices,T=g.adjacent,z=0;z<=L;++z)C[z]=x[P[z]];g.lastVisited=S;for(var z=0;z<=L;++z){var O=T[z];if(!(O.lastVisited>=S)){var V=C[z];C[z]=k;var G=this.orient();if(C[z]=V,G<0){g=O;continue e}else O.boundary?O.lastVisited=-S:O.lastVisited=S}}return}return g},b.addPeaks=function(k,E){var S=this.vertices.length-1,L=this.dimension,x=this.vertices,C=this.tuple,M=this.interior,g=this.simplices,P=[E];E.lastVisited=S,E.vertices[E.vertices.indexOf(-1)]=S,E.boundary=!1,M.push(E);for(var T=[];P.length>0;){var E=P.pop(),z=E.vertices,O=E.adjacent,V=z.indexOf(S);if(!(V<0)){for(var G=0;G<=L;++G)if(G!==V){var Z=O[G];if(!(!Z.boundary||Z.lastVisited>=S)){var j=Z.vertices;if(Z.lastVisited!==-S){for(var N=0,H=0;H<=L;++H)j[H]<0?(N=H,C[H]=k):C[H]=x[j[H]];var te=this.orient();if(te>0){j[N]=S,Z.boundary=!1,M.push(Z),P.push(Z),Z.lastVisited=S;continue}else Z.lastVisited=-S}var oe=Z.adjacent,_e=z.slice(),Ee=O.slice(),Ce=new u(_e,Ee,!0);g.push(Ce);var me=oe.indexOf(E);if(!(me<0)){oe[me]=Ce,Ee[V]=Z,_e[G]=-1,Ee[G]=E,O[G]=Ce,Ce.flip();for(var H=0;H<=L;++H){var ie=_e[H];if(!(ie<0||ie===S)){for(var Se=new Array(L-1),Le=0,Ae=0;Ae<=L;++Ae){var Fe=_e[Ae];Fe<0||Ae===H||(Se[Le++]=Fe)}T.push(new c(Se,Ce,H))}}}}}}}T.sort(f);for(var G=0;G+1<T.length;G+=2){var Pe=T[G],ge=T[G+1],Re=Pe.index,ce=ge.index;Re<0||ce<0||(Pe.cell.adjacent[Pe.index]=ge.cell,ge.cell.adjacent[ge.index]=Pe.cell)}},b.insert=function(k,E){var S=this.vertices;S.push(k);var L=this.walk(k,E);if(L){for(var x=this.dimension,C=this.tuple,M=0;M<=x;++M){var g=L.vertices[M];g<0?C[M]=k:C[M]=S[g]}var P=this.orient(C);P<0||P===0&&(L=this.handleBoundaryDegeneracy(L,k),!L)||this.addPeaks(k,L)}},b.boundary=function(){for(var k=this.dimension,E=[],S=this.simplices,L=S.length,x=0;x<L;++x){var C=S[x];if(C.boundary){for(var M=new Array(k),g=C.vertices,P=0,T=0,z=0;z<=k;++z)g[z]>=0?M[P++]=g[z]:T=z&1;if(T===(k&1)){var O=M[0];M[0]=M[1],M[1]=O}E.push(M)}}return E};function p(k,E){var S=k.length;if(S===0)throw new Error("Must have at least d+1 points");var L=k[0].length;if(S<=L)throw new Error("Must input at least d+1 points");var x=k.slice(0,L+1),C=s.apply(void 0,x);if(C===0)throw new Error("Input not in general position");for(var M=new Array(L+1),g=0;g<=L;++g)M[g]=g;C<0&&(M[0]=1,M[1]=0);for(var P=new u(M,new Array(L+1),!1),T=P.adjacent,z=new Array(L+2),g=0;g<=L;++g){for(var O=M.slice(),V=0;V<=L;++V)V===g&&(O[V]=-1);var G=O[0];O[0]=O[1],O[1]=G;var Z=new u(O,new Array(L+1),!0);T[g]=Z,z[g]=Z}z[L+1]=P;for(var g=0;g<=L;++g)for(var O=T[g].vertices,j=T[g].adjacent,V=0;V<=L;++V){var N=O[V];if(N<0){j[V]=P;continue}for(var H=0;H<=L;++H)T[H].vertices.indexOf(N)<0&&(j[V]=T[H])}for(var te=new _(L,x,z),oe=!!E,g=L+1;g<S;++g)te.insert(k[g],oe);return te.boundary()}},8987:function(i,a,o){"use strict";var s=o(7842),l=o(6504);i.exports=u;function u(c,f){for(var h=s(f),d=c.length,v=new Array(d),_=0;_<d;++_)v[_]=l(c[_],h);return v}},9060:function(i,a,o){"use strict";var s=o(9405),l=o(2762),u=o(8116),c=o(7766),f=o(6760),h=o(7608),d=o(9618),v=o(6729),_=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];function b(C,M,g,P,T,z,O,V,G,Z,j){this.gl=C,this.pixelRatio=1,this.cells=[],this.positions=[],this.intensity=[],this.texture=M,this.dirty=!0,this.triShader=g,this.pickShader=P,this.trianglePositions=T,this.triangleVectors=z,this.triangleColors=V,this.triangleUVs=G,this.triangleIds=O,this.triangleVAO=Z,this.triangleCount=0,this.pickId=1,this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.lightPosition=[1e5,1e5,0],this.ambientLight=.8,this.diffuseLight=.8,this.specularLight=2,this.roughness=.5,this.fresnel=1.5,this.opacity=1,this.traceType=j,this.tubeScale=1,this.coneScale=2,this.vectorScale=1,this.coneOffset=.25,this._model=_,this._view=_,this._projection=_,this._resolution=[1,1]}var p=b.prototype;p.isOpaque=function(){return this.opacity>=1},p.isTransparent=function(){return this.opacity<1},p.pickSlots=1,p.setPickBase=function(C){this.pickId=C};function k(C){for(var M=v({colormap:C,nshades:256,format:"rgba"}),g=new Uint8Array(256*4),P=0;P<256;++P){for(var T=M[P],z=0;z<3;++z)g[4*P+z]=T[z];g[4*P+3]=T[3]*255}return d(g,[256,256,4],[4,0,1])}function E(C){for(var M=C.length,g=new Array(M),P=0;P<M;++P)g[P]=C[P][2];return g}p.update=function(C){C=C||{};var M=this.gl;this.dirty=!0,"lightPosition"in C&&(this.lightPosition=C.lightPosition),"opacity"in C&&(this.opacity=C.opacity),"ambient"in C&&(this.ambientLight=C.ambient),"diffuse"in C&&(this.diffuseLight=C.diffuse),"specular"in C&&(this.specularLight=C.specular),"roughness"in C&&(this.roughness=C.roughness),"fresnel"in C&&(this.fresnel=C.fresnel),C.tubeScale!==void 0&&(this.tubeScale=C.tubeScale),C.vectorScale!==void 0&&(this.vectorScale=C.vectorScale),C.coneScale!==void 0&&(this.coneScale=C.coneScale),C.coneOffset!==void 0&&(this.coneOffset=C.coneOffset),C.colormap&&(this.texture.shape=[256,256],this.texture.minFilter=M.LINEAR_MIPMAP_LINEAR,this.texture.magFilter=M.LINEAR,this.texture.setPixels(k(C.colormap)),this.texture.generateMipmap());var g=C.cells,P=C.positions,T=C.vectors;if(!(!P||!g||!T)){var z=[],O=[],V=[],G=[],Z=[];this.cells=g,this.positions=P,this.vectors=T;var j=C.meshColor||[1,1,1,1],N=C.vertexIntensity,H=1/0,te=-1/0;if(N)if(C.vertexIntensityBounds)H=+C.vertexIntensityBounds[0],te=+C.vertexIntensityBounds[1];else for(var oe=0;oe<N.length;++oe){var _e=N[oe];H=Math.min(H,_e),te=Math.max(te,_e)}else for(var oe=0;oe<P.length;++oe){var _e=P[oe][2];H=Math.min(H,_e),te=Math.max(te,_e)}N?this.intensity=N:this.intensity=E(P),this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]];for(var oe=0;oe<P.length;++oe)for(var Ee=P[oe],Ce=0;Ce<3;++Ce)isNaN(Ee[Ce])||!isFinite(Ee[Ce])||(this.bounds[0][Ce]=Math.min(this.bounds[0][Ce],Ee[Ce]),this.bounds[1][Ce]=Math.max(this.bounds[1][Ce],Ee[Ce]));var me=0;e:for(var oe=0;oe<g.length;++oe){var ie=g[oe];switch(ie.length){case 3:for(var Ce=0;Ce<3;++Ce)for(var Se=ie[Ce],Ee=P[Se],Le=0;Le<3;++Le)if(isNaN(Ee[Le])||!isFinite(Ee[Le]))continue e;for(var Ce=0;Ce<3;++Ce){var Se=ie[2-Ce],Ee=P[Se];z.push(Ee[0],Ee[1],Ee[2],Ee[3]);var Ae=T[Se];O.push(Ae[0],Ae[1],Ae[2],Ae[3]||0);var Fe=j;Fe.length===3?V.push(Fe[0],Fe[1],Fe[2],1):V.push(Fe[0],Fe[1],Fe[2],Fe[3]);var Pe;N?Pe=[(N[Se]-H)/(te-H),0]:Pe=[(Ee[2]-H)/(te-H),0],G.push(Pe[0],Pe[1]),Z.push(oe)}me+=1;break;default:break}}this.triangleCount=me,this.trianglePositions.update(z),this.triangleVectors.update(O),this.triangleColors.update(V),this.triangleUVs.update(G),this.triangleIds.update(new Uint32Array(Z))}},p.drawTransparent=p.draw=function(C){C=C||{};for(var M=this.gl,g=C.model||_,P=C.view||_,T=C.projection||_,z=[[-1e6,-1e6,-1e6],[1e6,1e6,1e6]],O=0;O<3;++O)z[0][O]=Math.max(z[0][O],this.clipBounds[0][O]),z[1][O]=Math.min(z[1][O],this.clipBounds[1][O]);var V={model:g,view:P,projection:T,inverseModel:_.slice(),clipBounds:z,kambient:this.ambientLight,kdiffuse:this.diffuseLight,kspecular:this.specularLight,roughness:this.roughness,fresnel:this.fresnel,eyePosition:[0,0,0],lightPosition:[0,0,0],opacity:this.opacity,tubeScale:this.tubeScale,vectorScale:this.vectorScale,coneScale:this.coneScale,coneOffset:this.coneOffset,texture:0};V.inverseModel=h(V.inverseModel,V.model),M.disable(M.CULL_FACE),this.texture.bind(0);var G=new Array(16);f(G,V.view,V.model),f(G,V.projection,G),h(G,G);for(var O=0;O<3;++O)V.eyePosition[O]=G[12+O]/G[15];for(var Z=G[15],O=0;O<3;++O)Z+=this.lightPosition[O]*G[4*O+3];for(var O=0;O<3;++O){for(var j=G[12+O],N=0;N<3;++N)j+=G[4*N+O]*this.lightPosition[N];V.lightPosition[O]=j/Z}if(this.triangleCount>0){var H=this.triShader;H.bind(),H.uniforms=V,this.triangleVAO.bind(),M.drawArrays(M.TRIANGLES,0,this.triangleCount*3),this.triangleVAO.unbind()}},p.drawPick=function(C){C=C||{};for(var M=this.gl,g=C.model||_,P=C.view||_,T=C.projection||_,z=[[-1e6,-1e6,-1e6],[1e6,1e6,1e6]],O=0;O<3;++O)z[0][O]=Math.max(z[0][O],this.clipBounds[0][O]),z[1][O]=Math.min(z[1][O],this.clipBounds[1][O]);this._model=[].slice.call(g),this._view=[].slice.call(P),this._projection=[].slice.call(T),this._resolution=[M.drawingBufferWidth,M.drawingBufferHeight];var V={model:g,view:P,projection:T,clipBounds:z,tubeScale:this.tubeScale,vectorScale:this.vectorScale,coneScale:this.coneScale,coneOffset:this.coneOffset,pickId:this.pickId/255},G=this.pickShader;G.bind(),G.uniforms=V,this.triangleCount>0&&(this.triangleVAO.bind(),M.drawArrays(M.TRIANGLES,0,this.triangleCount*3),this.triangleVAO.unbind())},p.pick=function(C){if(!C||C.id!==this.pickId)return null;var M=C.value[0]+256*C.value[1]+65536*C.value[2],g=this.cells[M],P=this.positions[g[1]].slice(0,3),T={position:P,dataCoordinate:P,index:Math.floor(g[1]/48)};return this.traceType==="cone"?T.index=Math.floor(g[1]/48):this.traceType==="streamtube"&&(T.intensity=this.intensity[g[1]],T.velocity=this.vectors[g[1]].slice(0,3),T.divergence=this.vectors[g[1]][3],T.index=M),T},p.dispose=function(){this.texture.dispose(),this.triShader.dispose(),this.pickShader.dispose(),this.triangleVAO.dispose(),this.trianglePositions.dispose(),this.triangleVectors.dispose(),this.triangleColors.dispose(),this.triangleUVs.dispose(),this.triangleIds.dispose()};function S(C,M){var g=s(C,M.meshShader.vertex,M.meshShader.fragment,null,M.meshShader.attributes);return g.attributes.position.location=0,g.attributes.color.location=2,g.attributes.uv.location=3,g.attributes.vector.location=4,g}function L(C,M){var g=s(C,M.pickShader.vertex,M.pickShader.fragment,null,M.pickShader.attributes);return g.attributes.position.location=0,g.attributes.id.location=1,g.attributes.vector.location=4,g}function x(C,M,g){var P=g.shaders;arguments.length===1&&(M=C,C=M.gl);var T=S(C,P),z=L(C,P),O=c(C,d(new Uint8Array([255,255,255,255]),[1,1,4]));O.generateMipmap(),O.minFilter=C.LINEAR_MIPMAP_LINEAR,O.magFilter=C.LINEAR;var V=l(C),G=l(C),Z=l(C),j=l(C),N=l(C),H=u(C,[{buffer:V,type:C.FLOAT,size:4},{buffer:N,type:C.UNSIGNED_BYTE,size:4,normalized:!0},{buffer:Z,type:C.FLOAT,size:4},{buffer:j,type:C.FLOAT,size:2},{buffer:G,type:C.FLOAT,size:4}]),te=new b(C,O,T,z,V,G,N,Z,j,H,g.traceType||"cone");return te.update(M),te}i.exports=x},9127:function(i,a,o){"use strict";i.exports=u;var s=o(6204),l=o(5771);function u(c){return l(s(c))}},9131:function(i,a,o){var s=o(5177),l=o(9288);i.exports=u;function u(c,f){return f=f||1,c[0]=Math.random(),c[1]=Math.random(),c[2]=Math.random(),c[3]=Math.random(),s(c,c),l(c,c,f),c}},9165:function(i,a,o){"use strict";i.exports=b;var s=o(2762),l=o(8116),u=o(3436),c=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1];function f(p,k,E,S){this.gl=p,this.shader=S,this.buffer=k,this.vao=E,this.pixelRatio=1,this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.lineWidth=[1,1,1],this.capSize=[10,10,10],this.lineCount=[0,0,0],this.lineOffset=[0,0,0],this.opacity=1,this.hasAlpha=!1}var h=f.prototype;h.isOpaque=function(){return!this.hasAlpha},h.isTransparent=function(){return this.hasAlpha},h.drawTransparent=h.draw=function(p){var k=this.gl,E=this.shader.uniforms;this.shader.bind();var S=E.view=p.view||c,L=E.projection=p.projection||c;E.model=p.model||c,E.clipBounds=this.clipBounds,E.opacity=this.opacity;var x=S[12],C=S[13],M=S[14],g=S[15],P=p._ortho||!1,T=P?2:1,z=T*this.pixelRatio*(L[3]*x+L[7]*C+L[11]*M+L[15]*g)/k.drawingBufferHeight;this.vao.bind();for(var O=0;O<3;++O)k.lineWidth(this.lineWidth[O]*this.pixelRatio),E.capSize=this.capSize[O]*z,this.lineCount[O]&&k.drawArrays(k.LINES,this.lineOffset[O],this.lineCount[O]);this.vao.unbind()};function d(p,k){for(var E=0;E<3;++E)p[0][E]=Math.min(p[0][E],k[E]),p[1][E]=Math.max(p[1][E],k[E])}var v=function(){for(var p=new Array(3),k=0;k<3;++k){for(var E=[],S=1;S<=2;++S)for(var L=-1;L<=1;L+=2){var x=(S+k)%3,C=[0,0,0];C[x]=L,E.push(C)}p[k]=E}return p}();function _(p,k,E,S){for(var L=v[S],x=0;x<L.length;++x){var C=L[x];p.push(k[0],k[1],k[2],E[0],E[1],E[2],E[3],C[0],C[1],C[2])}return L.length}h.update=function(p){p=p||{},"lineWidth"in p&&(this.lineWidth=p.lineWidth,Array.isArray(this.lineWidth)||(this.lineWidth=[this.lineWidth,this.lineWidth,this.lineWidth])),"capSize"in p&&(this.capSize=p.capSize,Array.isArray(this.capSize)||(this.capSize=[this.capSize,this.capSize,this.capSize])),this.hasAlpha=!1,"opacity"in p&&(this.opacity=+p.opacity,this.opacity<1&&(this.hasAlpha=!0));var k=p.color||[[0,0,0],[0,0,0],[0,0,0]],E=p.position,S=p.error;if(Array.isArray(k[0])||(k=[k,k,k]),E&&S){var L=[],x=E.length,C=0;this.bounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.lineCount=[0,0,0];for(var M=0;M<3;++M){this.lineOffset[M]=C;e:for(var g=0;g<x;++g){for(var P=E[g],T=0;T<3;++T)if(isNaN(P[T])||!isFinite(P[T]))continue e;var z=S[g],O=k[M];if(Array.isArray(O[0])&&(O=k[g]),O.length===3?O=[O[0],O[1],O[2],1]:O.length===4&&(O=[O[0],O[1],O[2],O[3]],!this.hasAlpha&&O[3]<1&&(this.hasAlpha=!0)),!(isNaN(z[0][M])||isNaN(z[1][M]))){if(z[0][M]<0){var V=P.slice();V[M]+=z[0][M],L.push(P[0],P[1],P[2],O[0],O[1],O[2],O[3],0,0,0,V[0],V[1],V[2],O[0],O[1],O[2],O[3],0,0,0),d(this.bounds,V),C+=2+_(L,V,O,M)}if(z[1][M]>0){var V=P.slice();V[M]+=z[1][M],L.push(P[0],P[1],P[2],O[0],O[1],O[2],O[3],0,0,0,V[0],V[1],V[2],O[0],O[1],O[2],O[3],0,0,0),d(this.bounds,V),C+=2+_(L,V,O,M)}}}this.lineCount[M]=C-this.lineOffset[M]}this.buffer.update(L)}},h.dispose=function(){this.shader.dispose(),this.buffer.dispose(),this.vao.dispose()};function b(p){var k=p.gl,E=s(k),S=l(k,[{buffer:E,type:k.FLOAT,size:3,offset:0,stride:40},{buffer:E,type:k.FLOAT,size:4,offset:12,stride:40},{buffer:E,type:k.FLOAT,size:3,offset:28,stride:40}]),L=u(k);L.attributes.position.location=0,L.attributes.color.location=1,L.attributes.offset.location=2;var x=new f(k,E,S,L);return x.update(p),x}},9215:function(i,a,o){"use strict";i.exports=d;var s=o(4769),l=o(2478);function u(v,_,b){return Math.min(_,Math.max(v,b))}function c(v,_,b){this.dimension=v.length,this.bounds=[new Array(this.dimension),new Array(this.dimension)];for(var p=0;p<this.dimension;++p)this.bounds[0][p]=-1/0,this.bounds[1][p]=1/0;this._state=v.slice().reverse(),this._velocity=_.slice().reverse(),this._time=[b],this._scratch=[v.slice(),v.slice(),v.slice(),v.slice(),v.slice()]}var f=c.prototype;f.flush=function(v){var _=l.gt(this._time,v)-1;_<=0||(this._time.splice(0,_),this._state.splice(0,_*this.dimension),this._velocity.splice(0,_*this.dimension))},f.curve=function(v){var _=this._time,b=_.length,p=l.le(_,v),k=this._scratch[0],E=this._state,S=this._velocity,L=this.dimension,x=this.bounds;if(p<0)for(var C=L-1,M=0;M<L;++M,--C)k[M]=E[C];else if(p>=b-1)for(var C=E.length-1,g=v-_[b-1],M=0;M<L;++M,--C)k[M]=E[C]+g*S[C];else{for(var C=L*(p+1)-1,P=_[p],T=_[p+1],z=T-P||1,O=this._scratch[1],V=this._scratch[2],G=this._scratch[3],Z=this._scratch[4],j=!0,M=0;M<L;++M,--C)O[M]=E[C],G[M]=S[C]*z,V[M]=E[C+L],Z[M]=S[C+L]*z,j=j&&O[M]===V[M]&&G[M]===Z[M]&&G[M]===0;if(j)for(var M=0;M<L;++M)k[M]=O[M];else s(O,G,V,Z,(v-P)/z,k)}for(var N=x[0],H=x[1],M=0;M<L;++M)k[M]=u(N[M],H[M],k[M]);return k},f.dcurve=function(v){var _=this._time,b=_.length,p=l.le(_,v),k=this._scratch[0],E=this._state,S=this._velocity,L=this.dimension;if(p>=b-1)for(var x=E.length-1,C=v-_[b-1],M=0;M<L;++M,--x)k[M]=S[x];else{for(var x=L*(p+1)-1,g=_[p],P=_[p+1],T=P-g||1,z=this._scratch[1],O=this._scratch[2],V=this._scratch[3],G=this._scratch[4],Z=!0,M=0;M<L;++M,--x)z[M]=E[x],V[M]=S[x]*T,O[M]=E[x+L],G[M]=S[x+L]*T,Z=Z&&z[M]===O[M]&&V[M]===G[M]&&V[M]===0;if(Z)for(var M=0;M<L;++M)k[M]=0;else{s.derivative(z,V,O,G,(v-g)/T,k);for(var M=0;M<L;++M)k[M]/=T}}return k},f.lastT=function(){var v=this._time;return v[v.length-1]},f.stable=function(){for(var v=this._velocity,_=v.length,b=this.dimension-1;b>=0;--b)if(v[--_])return!1;return!0},f.jump=function(v){var _=this.lastT(),b=this.dimension;if(!(v<_||arguments.length!==b+1)){var p=this._state,k=this._velocity,E=p.length-this.dimension,S=this.bounds,L=S[0],x=S[1];this._time.push(_,v);for(var C=0;C<2;++C)for(var M=0;M<b;++M)p.push(p[E++]),k.push(0);this._time.push(v);for(var M=b;M>0;--M)p.push(u(L[M-1],x[M-1],arguments[M])),k.push(0)}},f.push=function(v){var _=this.lastT(),b=this.dimension;if(!(v<_||arguments.length!==b+1)){var p=this._state,k=this._velocity,E=p.length-this.dimension,S=v-_,L=this.bounds,x=L[0],C=L[1],M=S>1e-6?1/S:0;this._time.push(v);for(var g=b;g>0;--g){var P=u(x[g-1],C[g-1],arguments[g]);p.push(P),k.push((P-p[E++])*M)}}},f.set=function(v){var _=this.dimension;if(!(v<this.lastT()||arguments.length!==_+1)){var b=this._state,p=this._velocity,k=this.bounds,E=k[0],S=k[1];this._time.push(v);for(var L=_;L>0;--L)b.push(u(E[L-1],S[L-1],arguments[L])),p.push(0)}},f.move=function(v){var _=this.lastT(),b=this.dimension;if(!(v<=_||arguments.length!==b+1)){var p=this._state,k=this._velocity,E=p.length-this.dimension,S=this.bounds,L=S[0],x=S[1],C=v-_,M=C>1e-6?1/C:0;this._time.push(v);for(var g=b;g>0;--g){var P=arguments[g];p.push(u(L[g-1],x[g-1],p[E++]+P)),k.push(P*M)}}},f.idle=function(v){var _=this.lastT();if(!(v<_)){var b=this.dimension,p=this._state,k=this._velocity,E=p.length-b,S=this.bounds,L=S[0],x=S[1],C=v-_;this._time.push(v);for(var M=b-1;M>=0;--M)p.push(u(L[M],x[M],p[E]+C*k[E])),k.push(0),E+=1}};function h(v){for(var _=new Array(v),b=0;b<v;++b)_[b]=0;return _}function d(v,_,b){switch(arguments.length){case 0:return new c([0],[0],0);case 1:if(typeof v=="number"){var p=h(v);return new c(p,p,0)}else return new c(v,h(v.length),0);case 2:if(typeof _=="number"){var p=h(v.length);return new c(v,p,+_)}else b=0;case 3:if(v.length!==_.length)throw new Error("state and velocity lengths must match");return new c(v,_,b)}}},9216:function(i){"use strict";i.exports=l,i.exports.isMobile=l,i.exports.default=l;var a=/(android|bb\d+|meego).+mobile|armv7l|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series[46]0|samsungbrowser.*mobile|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i,o=/CrOS/,s=/android|ipad|playbook|silk/i;function l(u){u||(u={});var c=u.ua;if(!c&&typeof navigator!="undefined"&&(c=navigator.userAgent),c&&c.headers&&typeof c.headers["user-agent"]=="string"&&(c=c.headers["user-agent"]),typeof c!="string")return!1;var f=a.test(c)&&!o.test(c)||!!u.tablet&&s.test(c);return!f&&u.tablet&&u.featureDetect&&navigator&&navigator.maxTouchPoints>1&&c.indexOf("Macintosh")!==-1&&c.indexOf("Safari")!==-1&&(f=!0),f}},9226:function(i){i.exports=a;function a(o,s){return o[0]=Math.ceil(s[0]),o[1]=Math.ceil(s[1]),o[2]=Math.ceil(s[2]),o}},9265:function(i){i.exports=a;function a(o,s){return o[0]===s[0]&&o[1]===s[1]&&o[2]===s[2]}},9288:function(i){i.exports=a;function a(o,s,l){return o[0]=s[0]*l,o[1]=s[1]*l,o[2]=s[2]*l,o[3]=s[3]*l,o}},9346:function(i){"use strict";var a=new Float64Array(4),o=new Float64Array(4),s=new Float64Array(4);function l(u,c,f,h,d){a.length<h.length&&(a=new Float64Array(h.length),o=new Float64Array(h.length),s=new Float64Array(h.length));for(var v=0;v<h.length;++v)a[v]=u[v]-h[v],o[v]=c[v]-u[v],s[v]=f[v]-u[v];for(var _=0,b=0,p=0,k=0,E=0,S=0,v=0;v<h.length;++v){var L=o[v],x=s[v],C=a[v];_+=L*L,b+=L*x,p+=x*x,k+=C*L,E+=C*x,S+=C*C}var M=Math.abs(_*p-b*b),g=b*E-p*k,P=b*k-_*E,T;if(g+P<=M)if(g<0)P<0&&k<0?(P=0,-k>=_?(g=1,T=_+2*k+S):(g=-k/_,T=k*g+S)):(g=0,E>=0?(P=0,T=S):-E>=p?(P=1,T=p+2*E+S):(P=-E/p,T=E*P+S));else if(P<0)P=0,k>=0?(g=0,T=S):-k>=_?(g=1,T=_+2*k+S):(g=-k/_,T=k*g+S);else{var z=1/M;g*=z,P*=z,T=g*(_*g+b*P+2*k)+P*(b*g+p*P+2*E)+S}else{var O,V,G,Z;g<0?(O=b+k,V=p+E,V>O?(G=V-O,Z=_-2*b+p,G>=Z?(g=1,P=0,T=_+2*k+S):(g=G/Z,P=1-g,T=g*(_*g+b*P+2*k)+P*(b*g+p*P+2*E)+S)):(g=0,V<=0?(P=1,T=p+2*E+S):E>=0?(P=0,T=S):(P=-E/p,T=E*P+S))):P<0?(O=b+E,V=_+k,V>O?(G=V-O,Z=_-2*b+p,G>=Z?(P=1,g=0,T=p+2*E+S):(P=G/Z,g=1-P,T=g*(_*g+b*P+2*k)+P*(b*g+p*P+2*E)+S)):(P=0,V<=0?(g=1,T=_+2*k+S):k>=0?(g=0,T=S):(g=-k/_,T=k*g+S))):(G=p+E-b-k,G<=0?(g=0,P=1,T=p+2*E+S):(Z=_-2*b+p,G>=Z?(g=1,P=0,T=_+2*k+S):(g=G/Z,P=1-g,T=g*(_*g+b*P+2*k)+P*(b*g+p*P+2*E)+S)))}for(var j=1-g-P,v=0;v<h.length;++v)d[v]=j*u[v]+g*c[v]+P*f[v];return T<0?0:T}i.exports=l},9362:function(i){"use strict";i.exports=a;function a(o,s,l){var u=o+s,c=u-o,f=u-c,h=s-c,d=o-f;return l?(l[0]=d+h,l[1]=u,l):[d+h,u]}},9366:function(i,a,o){"use strict";var s=o(4359);i.exports=u;var l={};function u(c,f,h){var d=[f.style,f.weight,f.variant,f.family].join("_"),v=l[d];if(v||(v=l[d]={}),c in v)return v[c];var _={textAlign:"center",textBaseline:"middle",lineHeight:1,font:f.family,fontStyle:f.style,fontWeight:f.weight,fontVariant:f.variant,lineSpacing:1.25,styletags:{breaklines:!0,bolds:!0,italics:!0,subscripts:!0,superscripts:!0}};_.triangles=!0;var b=s(c,_);_.triangles=!1;var p=s(c,_),k,E;if(h&&h!==1){for(k=0;k<b.positions.length;++k)for(E=0;E<b.positions[k].length;++E)b.positions[k][E]/=h;for(k=0;k<p.positions.length;++k)for(E=0;E<p.positions[k].length;++E)p.positions[k][E]/=h}var S=[[1/0,1/0],[-1/0,-1/0]],L=p.positions.length;for(k=0;k<L;++k){var x=p.positions[k];for(E=0;E<2;++E)S[0][E]=Math.min(S[0][E],x[E]),S[1][E]=Math.max(S[1][E],x[E])}return v[c]=[b,p,S]}},9405:function(i,a,o){"use strict";var s=o(3327),l=o(8731),u=o(216),c=o(5091),f=o(2145),h=o(8866);function d(p){this.gl=p,this.gl.lastAttribCount=0,this._vref=this._fref=this._relink=this.vertShader=this.fragShader=this.program=this.attributes=this.uniforms=this.types=null}var v=d.prototype;v.bind=function(){this.program||this._relink();var p,k=this.gl.getProgramParameter(this.program,this.gl.ACTIVE_ATTRIBUTES),E=this.gl.lastAttribCount;if(k>E)for(p=E;p<k;p++)this.gl.enableVertexAttribArray(p);else if(E>k)for(p=k;p<E;p++)this.gl.disableVertexAttribArray(p);this.gl.lastAttribCount=k,this.gl.useProgram(this.program)},v.dispose=function(){for(var p=this.gl.lastAttribCount,k=0;k<p;k++)this.gl.disableVertexAttribArray(k);this.gl.lastAttribCount=0,this._fref&&this._fref.dispose(),this._vref&&this._vref.dispose(),this.attributes=this.types=this.vertShader=this.fragShader=this.program=this._relink=this._fref=this._vref=null};function _(p,k){return p.name<k.name?-1:1}v.update=function(p,k,E,S){if(!k||arguments.length===1){var L=p;p=L.vertex,k=L.fragment,E=L.uniforms,S=L.attributes}var x=this,C=x.gl,M=x._vref;x._vref=c.shader(C,C.VERTEX_SHADER,p),M&&M.dispose(),x.vertShader=x._vref.shader;var g=this._fref;if(x._fref=c.shader(C,C.FRAGMENT_SHADER,k),g&&g.dispose(),x.fragShader=x._fref.shader,!E||!S){var P=C.createProgram();if(C.attachShader(P,x.fragShader),C.attachShader(P,x.vertShader),C.linkProgram(P),!C.getProgramParameter(P,C.LINK_STATUS)){var T=C.getProgramInfoLog(P);throw new h(T,"Error linking program:"+T)}E=E||f.uniforms(C,P),S=S||f.attributes(C,P),C.deleteProgram(P)}S=S.slice(),S.sort(_);var z=[],O=[],V=[],G;for(G=0;G<S.length;++G){var Z=S[G];if(Z.type.indexOf("mat")>=0){for(var j=Z.type.charAt(Z.type.length-1)|0,N=new Array(j),H=0;H<j;++H)N[H]=V.length,O.push(Z.name+"["+H+"]"),typeof Z.location=="number"?V.push(Z.location+H):Array.isArray(Z.location)&&Z.location.length===j&&typeof Z.location[H]=="number"?V.push(Z.location[H]|0):V.push(-1);z.push({name:Z.name,type:Z.type,locations:N})}else z.push({name:Z.name,type:Z.type,locations:[V.length]}),O.push(Z.name),typeof Z.location=="number"?V.push(Z.location|0):V.push(-1)}var te=0;for(G=0;G<V.length;++G)if(V[G]<0){for(;V.indexOf(te)>=0;)te+=1;V[G]=te}var oe=new Array(E.length);function _e(){x.program=c.program(C,x._vref,x._fref,O,V);for(var Ee=0;Ee<E.length;++Ee)oe[Ee]=C.getUniformLocation(x.program,E[Ee].name)}_e(),x._relink=_e,x.types={uniforms:u(E),attributes:u(S)},x.attributes=l(C,x,z,V),Object.defineProperty(x,"uniforms",s(C,x,E,oe))};function b(p,k,E,S,L){var x=new d(p);return x.update(k,E,S,L),x}i.exports=b},9499:function(i,a,o){"use strict";i.exports=Fe;var s=o(8828),l=o(2762),u=o(8116),c=o(7766),f=o(1888),h=o(6729),d=o(5298),v=o(9994),_=o(9618),b=o(3711),p=o(6760),k=o(7608),E=o(2478),S=o(6199),L=o(990),x=L.createShader,C=L.createContourShader,M=L.createPickShader,g=L.createPickContourShader,P=4*10,T=[1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1],z=[[0,0],[0,1],[1,0],[1,1],[1,0],[0,1]],O=[[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0]];(function(){for(var Pe=0;Pe<3;++Pe){var ge=O[Pe],Re=(Pe+1)%3,ce=(Pe+2)%3;ge[Re+0]=1,ge[ce+3]=1,ge[Pe+6]=1}})();function V(Pe,ge,Re,ce,Ze){this.position=Pe,this.index=ge,this.uv=Re,this.level=ce,this.dataCoordinate=Ze}var G=256;function Z(Pe,ge,Re,ce,Ze,ut,pt,Zt,st,lt,Gt,Nt,Jt,sr,wr){this.gl=Pe,this.shape=ge,this.bounds=Re,this.objectOffset=wr,this.intensityBounds=[],this._shader=ce,this._pickShader=Ze,this._coordinateBuffer=ut,this._vao=pt,this._colorMap=Zt,this._contourShader=st,this._contourPickShader=lt,this._contourBuffer=Gt,this._contourVAO=Nt,this._contourOffsets=[[],[],[]],this._contourCounts=[[],[],[]],this._vertexCount=0,this._pickResult=new V([0,0,0],[0,0],[0,0],[0,0,0],[0,0,0]),this._dynamicBuffer=Jt,this._dynamicVAO=sr,this._dynamicOffsets=[0,0,0],this._dynamicCounts=[0,0,0],this.contourWidth=[1,1,1],this.contourLevels=[[1],[1],[1]],this.contourTint=[0,0,0],this.contourColor=[[.5,.5,.5,1],[.5,.5,.5,1],[.5,.5,.5,1]],this.showContour=!0,this.showSurface=!0,this.enableHighlight=[!0,!0,!0],this.highlightColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.highlightTint=[1,1,1],this.highlightLevel=[-1,-1,-1],this.enableDynamic=[!0,!0,!0],this.dynamicLevel=[NaN,NaN,NaN],this.dynamicColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.dynamicTint=[1,1,1],this.dynamicWidth=[1,1,1],this.axesBounds=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]],this.surfaceProject=[!1,!1,!1],this.contourProject=[[!1,!1,!1],[!1,!1,!1],[!1,!1,!1]],this.colorBounds=[!1,!1],this._field=[_(f.mallocFloat(1024),[0,0]),_(f.mallocFloat(1024),[0,0]),_(f.mallocFloat(1024),[0,0])],this.pickId=1,this.clipBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.snapToData=!1,this.pixelRatio=1,this.opacity=1,this.lightPosition=[10,1e4,0],this.ambientLight=.8,this.diffuseLight=.8,this.specularLight=2,this.roughness=.5,this.fresnel=1.5,this.vertexColor=0,this.dirty=!0}var j=Z.prototype;j.genColormap=function(Pe,ge){var Re=!1,ce=v([h({colormap:Pe,nshades:G,format:"rgba"}).map(function(Ze,ut){var pt=ge?N(ut/255,ge):Ze[3];return pt<1&&(Re=!0),[Ze[0],Ze[1],Ze[2],255*pt]})]);return d.divseq(ce,255),this.hasAlphaScale=Re,ce},j.isTransparent=function(){return this.opacity<1||this.hasAlphaScale},j.isOpaque=function(){return!this.isTransparent()},j.pickSlots=1,j.setPickBase=function(Pe){this.pickId=Pe};function N(Pe,ge){if(!ge||!ge.length)return 1;for(var Re=0;Re<ge.length;++Re){if(ge.length<2)return 1;if(ge[Re][0]===Pe)return ge[Re][1];if(ge[Re][0]>Pe&&Re>0){var ce=(ge[Re][0]-Pe)/(ge[Re][0]-ge[Re-1][0]);return ge[Re][1]*(1-ce)+ce*ge[Re-1][1]}}return 1}var H=[0,0,0],te={showSurface:!1,showContour:!1,projections:[T.slice(),T.slice(),T.slice()],clipBounds:[[[0,0,0],[0,0,0]],[[0,0,0],[0,0,0]],[[0,0,0],[0,0,0]]]};function oe(Pe,ge){var Re,ce,Ze,ut=ge.axes&&ge.axes.lastCubeProps.axis||H,pt=ge.showSurface,Zt=ge.showContour;for(Re=0;Re<3;++Re)for(pt=pt||ge.surfaceProject[Re],ce=0;ce<3;++ce)Zt=Zt||ge.contourProject[Re][ce];for(Re=0;Re<3;++Re){var st=te.projections[Re];for(ce=0;ce<16;++ce)st[ce]=0;for(ce=0;ce<4;++ce)st[5*ce]=1;st[5*Re]=0,st[12+Re]=ge.axesBounds[+(ut[Re]>0)][Re],p(st,Pe.model,st);var lt=te.clipBounds[Re];for(Ze=0;Ze<2;++Ze)for(ce=0;ce<3;++ce)lt[Ze][ce]=Pe.clipBounds[Ze][ce];lt[0][Re]=-1e8,lt[1][Re]=1e8}return te.showSurface=pt,te.showContour=Zt,te}var _e={model:T,view:T,projection:T,inverseModel:T.slice(),lowerBound:[0,0,0],upperBound:[0,0,0],colorMap:0,clipBounds:[[0,0,0],[0,0,0]],height:0,contourTint:0,contourColor:[0,0,0,1],permutation:[1,0,0,0,1,0,0,0,1],zOffset:-1e-4,objectOffset:[0,0,0],kambient:1,kdiffuse:1,kspecular:1,lightPosition:[1e3,1e3,1e3],eyePosition:[0,0,0],roughness:1,fresnel:1,opacity:1,vertexColor:0},Ee=T.slice(),Ce=[1,0,0,0,1,0,0,0,1];function me(Pe,ge){Pe=Pe||{};var Re=this.gl;Re.disable(Re.CULL_FACE),this._colorMap.bind(0);var ce=_e;ce.model=Pe.model||T,ce.view=Pe.view||T,ce.projection=Pe.projection||T,ce.lowerBound=[this.bounds[0][0],this.bounds[0][1],this.colorBounds[0]||this.bounds[0][2]],ce.upperBound=[this.bounds[1][0],this.bounds[1][1],this.colorBounds[1]||this.bounds[1][2]],ce.objectOffset=this.objectOffset,ce.contourColor=this.contourColor[0],ce.inverseModel=k(ce.inverseModel,ce.model);for(var Ze=0;Ze<2;++Ze)for(var ut=ce.clipBounds[Ze],pt=0;pt<3;++pt)ut[pt]=Math.min(Math.max(this.clipBounds[Ze][pt],-1e8),1e8);ce.kambient=this.ambientLight,ce.kdiffuse=this.diffuseLight,ce.kspecular=this.specularLight,ce.roughness=this.roughness,ce.fresnel=this.fresnel,ce.opacity=this.opacity,ce.height=0,ce.permutation=Ce,ce.vertexColor=this.vertexColor;var Zt=Ee;for(p(Zt,ce.view,ce.model),p(Zt,ce.projection,Zt),k(Zt,Zt),Ze=0;Ze<3;++Ze)ce.eyePosition[Ze]=Zt[12+Ze]/Zt[15];var st=Zt[15];for(Ze=0;Ze<3;++Ze)st+=this.lightPosition[Ze]*Zt[4*Ze+3];for(Ze=0;Ze<3;++Ze){var lt=Zt[12+Ze];for(pt=0;pt<3;++pt)lt+=Zt[4*pt+Ze]*this.lightPosition[pt];ce.lightPosition[Ze]=lt/st}var Gt=oe(ce,this);if(Gt.showSurface){for(this._shader.bind(),this._shader.uniforms=ce,this._vao.bind(),this.showSurface&&this._vertexCount&&this._vao.draw(Re.TRIANGLES,this._vertexCount),Ze=0;Ze<3;++Ze)!this.surfaceProject[Ze]||!this.vertexCount||(this._shader.uniforms.model=Gt.projections[Ze],this._shader.uniforms.clipBounds=Gt.clipBounds[Ze],this._vao.draw(Re.TRIANGLES,this._vertexCount));this._vao.unbind()}if(Gt.showContour){var Nt=this._contourShader;ce.kambient=1,ce.kdiffuse=0,ce.kspecular=0,ce.opacity=1,Nt.bind(),Nt.uniforms=ce;var Jt=this._contourVAO;for(Jt.bind(),Ze=0;Ze<3;++Ze)for(Nt.uniforms.permutation=O[Ze],Re.lineWidth(this.contourWidth[Ze]*this.pixelRatio),pt=0;pt<this.contourLevels[Ze].length;++pt)pt===this.highlightLevel[Ze]?(Nt.uniforms.contourColor=this.highlightColor[Ze],Nt.uniforms.contourTint=this.highlightTint[Ze]):(pt===0||pt-1===this.highlightLevel[Ze])&&(Nt.uniforms.contourColor=this.contourColor[Ze],Nt.uniforms.contourTint=this.contourTint[Ze]),this._contourCounts[Ze][pt]&&(Nt.uniforms.height=this.contourLevels[Ze][pt],Jt.draw(Re.LINES,this._contourCounts[Ze][pt],this._contourOffsets[Ze][pt]));for(Ze=0;Ze<3;++Ze)for(Nt.uniforms.model=Gt.projections[Ze],Nt.uniforms.clipBounds=Gt.clipBounds[Ze],pt=0;pt<3;++pt)if(this.contourProject[Ze][pt]){Nt.uniforms.permutation=O[pt],Re.lineWidth(this.contourWidth[pt]*this.pixelRatio);for(var sr=0;sr<this.contourLevels[pt].length;++sr)sr===this.highlightLevel[pt]?(Nt.uniforms.contourColor=this.highlightColor[pt],Nt.uniforms.contourTint=this.highlightTint[pt]):(sr===0||sr-1===this.highlightLevel[pt])&&(Nt.uniforms.contourColor=this.contourColor[pt],Nt.uniforms.contourTint=this.contourTint[pt]),this._contourCounts[pt][sr]&&(Nt.uniforms.height=this.contourLevels[pt][sr],Jt.draw(Re.LINES,this._contourCounts[pt][sr],this._contourOffsets[pt][sr]))}for(Jt.unbind(),Jt=this._dynamicVAO,Jt.bind(),Ze=0;Ze<3;++Ze)if(this._dynamicCounts[Ze]!==0)for(Nt.uniforms.model=ce.model,Nt.uniforms.clipBounds=ce.clipBounds,Nt.uniforms.permutation=O[Ze],Re.lineWidth(this.dynamicWidth[Ze]*this.pixelRatio),Nt.uniforms.contourColor=this.dynamicColor[Ze],Nt.uniforms.contourTint=this.dynamicTint[Ze],Nt.uniforms.height=this.dynamicLevel[Ze],Jt.draw(Re.LINES,this._dynamicCounts[Ze],this._dynamicOffsets[Ze]),pt=0;pt<3;++pt)this.contourProject[pt][Ze]&&(Nt.uniforms.model=Gt.projections[pt],Nt.uniforms.clipBounds=Gt.clipBounds[pt],Jt.draw(Re.LINES,this._dynamicCounts[Ze],this._dynamicOffsets[Ze]));Jt.unbind()}}j.draw=function(Pe){return me.call(this,Pe,!1)},j.drawTransparent=function(Pe){return me.call(this,Pe,!0)};var ie={model:T,view:T,projection:T,inverseModel:T,clipBounds:[[0,0,0],[0,0,0]],height:0,shape:[0,0],pickId:0,lowerBound:[0,0,0],upperBound:[0,0,0],zOffset:0,objectOffset:[0,0,0],permutation:[1,0,0,0,1,0,0,0,1],lightPosition:[0,0,0],eyePosition:[0,0,0]};j.drawPick=function(Pe){Pe=Pe||{};var ge=this.gl;ge.disable(ge.CULL_FACE);var Re=ie;Re.model=Pe.model||T,Re.view=Pe.view||T,Re.projection=Pe.projection||T,Re.shape=this._field[2].shape,Re.pickId=this.pickId/255,Re.lowerBound=this.bounds[0],Re.upperBound=this.bounds[1],Re.objectOffset=this.objectOffset,Re.permutation=Ce;for(var ce=0;ce<2;++ce)for(var Ze=Re.clipBounds[ce],ut=0;ut<3;++ut)Ze[ut]=Math.min(Math.max(this.clipBounds[ce][ut],-1e8),1e8);var pt=oe(Re,this);if(pt.showSurface){for(this._pickShader.bind(),this._pickShader.uniforms=Re,this._vao.bind(),this._vao.draw(ge.TRIANGLES,this._vertexCount),ce=0;ce<3;++ce)this.surfaceProject[ce]&&(this._pickShader.uniforms.model=pt.projections[ce],this._pickShader.uniforms.clipBounds=pt.clipBounds[ce],this._vao.draw(ge.TRIANGLES,this._vertexCount));this._vao.unbind()}if(pt.showContour){var Zt=this._contourPickShader;Zt.bind(),Zt.uniforms=Re;var st=this._contourVAO;for(st.bind(),ut=0;ut<3;++ut)for(ge.lineWidth(this.contourWidth[ut]*this.pixelRatio),Zt.uniforms.permutation=O[ut],ce=0;ce<this.contourLevels[ut].length;++ce)this._contourCounts[ut][ce]&&(Zt.uniforms.height=this.contourLevels[ut][ce],st.draw(ge.LINES,this._contourCounts[ut][ce],this._contourOffsets[ut][ce]));for(ce=0;ce<3;++ce)for(Zt.uniforms.model=pt.projections[ce],Zt.uniforms.clipBounds=pt.clipBounds[ce],ut=0;ut<3;++ut)if(this.contourProject[ce][ut]){Zt.uniforms.permutation=O[ut],ge.lineWidth(this.contourWidth[ut]*this.pixelRatio);for(var lt=0;lt<this.contourLevels[ut].length;++lt)this._contourCounts[ut][lt]&&(Zt.uniforms.height=this.contourLevels[ut][lt],st.draw(ge.LINES,this._contourCounts[ut][lt],this._contourOffsets[ut][lt]))}st.unbind()}},j.pick=function(Pe){if(!Pe||Pe.id!==this.pickId)return null;var ge=this._field[2].shape,Re=this._pickResult,ce=ge[0]*(Pe.value[0]+(Pe.value[2]>>4)/16)/255,Ze=Math.floor(ce),ut=ce-Ze,pt=ge[1]*(Pe.value[1]+(Pe.value[2]&15)/16)/255,Zt=Math.floor(pt),st=pt-Zt;Ze+=1,Zt+=1;var lt=Re.position;lt[0]=lt[1]=lt[2]=0;for(var Gt=0;Gt<2;++Gt)for(var Nt=Gt?ut:1-ut,Jt=0;Jt<2;++Jt)for(var sr=Jt?st:1-st,wr=Ze+Gt,cr=Zt+Jt,$e=Nt*sr,St=0;St<3;++St)lt[St]+=this._field[St].get(wr,cr)*$e;for(var Qt=this._pickResult.level,Vt=0;Vt<3;++Vt)if(Qt[Vt]=E.le(this.contourLevels[Vt],lt[Vt]),Qt[Vt]<0)this.contourLevels[Vt].length>0&&(Qt[Vt]=0);else if(Qt[Vt]<this.contourLevels[Vt].length-1){var _t=this.contourLevels[Vt][Qt[Vt]],It=this.contourLevels[Vt][Qt[Vt]+1];Math.abs(_t-lt[Vt])>Math.abs(It-lt[Vt])&&(Qt[Vt]+=1)}for(Re.index[0]=ut<.5?Ze:Ze+1,Re.index[1]=st<.5?Zt:Zt+1,Re.uv[0]=ce/ge[0],Re.uv[1]=pt/ge[1],St=0;St<3;++St)Re.dataCoordinate[St]=this._field[St].get(Re.index[0],Re.index[1]);return Re},j.padField=function(Pe,ge){var Re=ge.shape.slice(),ce=Pe.shape.slice();d.assign(Pe.lo(1,1).hi(Re[0],Re[1]),ge),d.assign(Pe.lo(1).hi(Re[0],1),ge.hi(Re[0],1)),d.assign(Pe.lo(1,ce[1]-1).hi(Re[0],1),ge.lo(0,Re[1]-1).hi(Re[0],1)),d.assign(Pe.lo(0,1).hi(1,Re[1]),ge.hi(1)),d.assign(Pe.lo(ce[0]-1,1).hi(1,Re[1]),ge.lo(Re[0]-1)),Pe.set(0,0,ge.get(0,0)),Pe.set(0,ce[1]-1,ge.get(0,Re[1]-1)),Pe.set(ce[0]-1,0,ge.get(Re[0]-1,0)),Pe.set(ce[0]-1,ce[1]-1,ge.get(Re[0]-1,Re[1]-1))};function Se(Pe,ge){return Array.isArray(Pe)?[ge(Pe[0]),ge(Pe[1]),ge(Pe[2])]:[ge(Pe),ge(Pe),ge(Pe)]}function Le(Pe){return Array.isArray(Pe)?Pe.length===3?[Pe[0],Pe[1],Pe[2],1]:[Pe[0],Pe[1],Pe[2],Pe[3]]:[0,0,0,1]}function Ae(Pe){if(Array.isArray(Pe)){if(Array.isArray(Pe))return[Le(Pe[0]),Le(Pe[1]),Le(Pe[2])];var ge=Le(Pe);return[ge.slice(),ge.slice(),ge.slice()]}}j.update=function(Pe){Pe=Pe||{},this.objectOffset=Pe.objectOffset||this.objectOffset,this.dirty=!0,"contourWidth"in Pe&&(this.contourWidth=Se(Pe.contourWidth,Number)),"showContour"in Pe&&(this.showContour=Se(Pe.showContour,Boolean)),"showSurface"in Pe&&(this.showSurface=!!Pe.showSurface),"contourTint"in Pe&&(this.contourTint=Se(Pe.contourTint,Boolean)),"contourColor"in Pe&&(this.contourColor=Ae(Pe.contourColor)),"contourProject"in Pe&&(this.contourProject=Se(Pe.contourProject,function(yn){return Se(yn,Boolean)})),"surfaceProject"in Pe&&(this.surfaceProject=Pe.surfaceProject),"dynamicColor"in Pe&&(this.dynamicColor=Ae(Pe.dynamicColor)),"dynamicTint"in Pe&&(this.dynamicTint=Se(Pe.dynamicTint,Number)),"dynamicWidth"in Pe&&(this.dynamicWidth=Se(Pe.dynamicWidth,Number)),"opacity"in Pe&&(this.opacity=Pe.opacity),"opacityscale"in Pe&&(this.opacityscale=Pe.opacityscale),"colorBounds"in Pe&&(this.colorBounds=Pe.colorBounds),"vertexColor"in Pe&&(this.vertexColor=Pe.vertexColor?1:0),"colormap"in Pe&&this._colorMap.setPixels(this.genColormap(Pe.colormap,this.opacityscale));var ge=Pe.field||Pe.coords&&Pe.coords[2]||null,Re=!1;if(ge||(this._field[2].shape[0]||this._field[2].shape[2]?ge=this._field[2].lo(1,1).hi(this._field[2].shape[0]-2,this._field[2].shape[1]-2):ge=this._field[2].hi(0,0)),"field"in Pe||"coords"in Pe){var ce=(ge.shape[0]+2)*(ge.shape[1]+2);ce>this._field[2].data.length&&(f.freeFloat(this._field[2].data),this._field[2].data=f.mallocFloat(s.nextPow2(ce))),this._field[2]=_(this._field[2].data,[ge.shape[0]+2,ge.shape[1]+2]),this.padField(this._field[2],ge),this.shape=ge.shape.slice();for(var Ze=this.shape,ut=0;ut<2;++ut)this._field[2].size>this._field[ut].data.length&&(f.freeFloat(this._field[ut].data),this._field[ut].data=f.mallocFloat(this._field[2].size)),this._field[ut]=_(this._field[ut].data,[Ze[0]+2,Ze[1]+2]);if(Pe.coords){var pt=Pe.coords;if(!Array.isArray(pt)||pt.length!==3)throw new Error("gl-surface: invalid coordinates for x/y");for(ut=0;ut<2;++ut){var Zt=pt[ut];for(Jt=0;Jt<2;++Jt)if(Zt.shape[Jt]!==Ze[Jt])throw new Error("gl-surface: coords have incorrect shape");this.padField(this._field[ut],Zt)}}else if(Pe.ticks){var st=Pe.ticks;if(!Array.isArray(st)||st.length!==2)throw new Error("gl-surface: invalid ticks");for(ut=0;ut<2;++ut){var lt=st[ut];if((Array.isArray(lt)||lt.length)&&(lt=_(lt)),lt.shape[0]!==Ze[ut])throw new Error("gl-surface: invalid tick length");var Gt=_(lt.data,Ze);Gt.stride[ut]=lt.stride[0],Gt.stride[ut^1]=0,this.padField(this._field[ut],Gt)}}else{for(ut=0;ut<2;++ut){var Nt=[0,0];Nt[ut]=1,this._field[ut]=_(this._field[ut].data,[Ze[0]+2,Ze[1]+2],Nt,0)}this._field[0].set(0,0,0);for(var Jt=0;Jt<Ze[0];++Jt)this._field[0].set(Jt+1,0,Jt);for(this._field[0].set(Ze[0]+1,0,Ze[0]-1),this._field[1].set(0,0,0),Jt=0;Jt<Ze[1];++Jt)this._field[1].set(0,Jt+1,Jt);this._field[1].set(0,Ze[1]+1,Ze[1]-1)}var sr=this._field,wr=_(f.mallocFloat(sr[2].size*3*2),[3,Ze[0]+2,Ze[1]+2,2]);for(ut=0;ut<3;++ut)S(wr.pick(ut),sr[ut],"mirror");var cr=_(f.mallocFloat(sr[2].size*3),[Ze[0]+2,Ze[1]+2,3]);for(ut=0;ut<Ze[0]+2;++ut)for(Jt=0;Jt<Ze[1]+2;++Jt){var $e=wr.get(0,ut,Jt,0),St=wr.get(0,ut,Jt,1),Qt=wr.get(1,ut,Jt,0),Vt=wr.get(1,ut,Jt,1),_t=wr.get(2,ut,Jt,0),It=wr.get(2,ut,Jt,1),mt=Qt*It-Vt*_t,er=_t*St-It*$e,lr=$e*Vt-St*Qt,Tr=Math.sqrt(mt*mt+er*er+lr*lr);Tr<1e-8?(Tr=Math.max(Math.abs(mt),Math.abs(er),Math.abs(lr)),Tr<1e-8?(lr=1,er=mt=0,Tr=1):Tr=1/Tr):Tr=1/Math.sqrt(Tr),cr.set(ut,Jt,0,mt*Tr),cr.set(ut,Jt,1,er*Tr),cr.set(ut,Jt,2,lr*Tr)}f.free(wr.data);var Lr=[1/0,1/0,1/0],ti=[-1/0,-1/0,-1/0],Br=1/0,Vr=-1/0,dt=(Ze[0]-1)*(Ze[1]-1)*6,Ge=f.mallocFloat(s.nextPow2(10*dt)),Je=0,je=0;for(ut=0;ut<Ze[0]-1;++ut)e:for(Jt=0;Jt<Ze[1]-1;++Jt){for(var tt=0;tt<2;++tt)for(var xt=0;xt<2;++xt)for(var Ie=0;Ie<3;++Ie){var xe=this._field[Ie].get(1+ut+tt,1+Jt+xt);if(isNaN(xe)||!isFinite(xe))continue e}for(Ie=0;Ie<6;++Ie){var ke=ut+z[Ie][0],vt=Jt+z[Ie][1],ir=this._field[0].get(ke+1,vt+1),ar=this._field[1].get(ke+1,vt+1);xe=this._field[2].get(ke+1,vt+1),mt=cr.get(ke+1,vt+1,0),er=cr.get(ke+1,vt+1,1),lr=cr.get(ke+1,vt+1,2),Pe.intensity&&(vr=Pe.intensity.get(ke,vt));var vr=Pe.intensity?Pe.intensity.get(ke,vt):xe+this.objectOffset[2];Ge[Je++]=ke,Ge[Je++]=vt,Ge[Je++]=ir,Ge[Je++]=ar,Ge[Je++]=xe,Ge[Je++]=0,Ge[Je++]=vr,Ge[Je++]=mt,Ge[Je++]=er,Ge[Je++]=lr,Lr[0]=Math.min(Lr[0],ir+this.objectOffset[0]),Lr[1]=Math.min(Lr[1],ar+this.objectOffset[1]),Lr[2]=Math.min(Lr[2],xe+this.objectOffset[2]),Br=Math.min(Br,vr),ti[0]=Math.max(ti[0],ir+this.objectOffset[0]),ti[1]=Math.max(ti[1],ar+this.objectOffset[1]),ti[2]=Math.max(ti[2],xe+this.objectOffset[2]),Vr=Math.max(Vr,vr),je+=1}}for(Pe.intensityBounds&&(Br=+Pe.intensityBounds[0],Vr=+Pe.intensityBounds[1]),ut=6;ut<Je;ut+=10)Ge[ut]=(Ge[ut]-Br)/(Vr-Br);this._vertexCount=je,this._coordinateBuffer.update(Ge.subarray(0,Je)),f.freeFloat(Ge),f.free(cr.data),this.bounds=[Lr,ti],this.intensity=Pe.intensity||this._field[2],(this.intensityBounds[0]!==Br||this.intensityBounds[1]!==Vr)&&(Re=!0),this.intensityBounds=[Br,Vr]}if("levels"in Pe){var ii=Pe.levels;for(Array.isArray(ii[0])?ii=ii.slice():ii=[[],[],ii],ut=0;ut<3;++ut)ii[ut]=ii[ut].slice(),ii[ut].sort(function(yn,Sn){return yn-Sn});for(ut=0;ut<3;++ut)for(Jt=0;Jt<ii[ut].length;++Jt)ii[ut][Jt]-=this.objectOffset[ut];e:for(ut=0;ut<3;++ut){if(ii[ut].length!==this.contourLevels[ut].length){Re=!0;break}for(Jt=0;Jt<ii[ut].length;++Jt)if(ii[ut][Jt]!==this.contourLevels[ut][Jt]){Re=!0;break e}}this.contourLevels=ii}if(Re){sr=this._field,Ze=this.shape;for(var pi=[],$r=0;$r<3;++$r){var di=this.contourLevels[$r],ji=[],In=[],wi=[0,0,0];for(ut=0;ut<di.length;++ut){var On=b(this._field[$r],di[ut]);ji.push(pi.length/5|0),je=0;e:for(Jt=0;Jt<On.cells.length;++Jt){var qn=On.cells[Jt];for(Ie=0;Ie<2;++Ie){var Fn=On.positions[qn[Ie]],ra=Fn[0],la=Math.floor(ra)|0,Ut=ra-la,wt=Fn[1],rr=Math.floor(wt)|0,nr=wt-rr,Er=!1;t:for(var Xr=0;Xr<3;++Xr){wi[Xr]=0;var ri=($r+Xr+1)%3;for(tt=0;tt<2;++tt){var Qr=tt?Ut:1-Ut;for(ke=Math.min(Math.max(la+tt,0),Ze[0])|0,xt=0;xt<2;++xt){var Oi=xt?nr:1-nr;if(vt=Math.min(Math.max(rr+xt,0),Ze[1])|0,Xr<2?xe=this._field[ri].get(ke,vt):xe=(this.intensity.get(ke,vt)-this.intensityBounds[0])/(this.intensityBounds[1]-this.intensityBounds[0]),!isFinite(xe)||isNaN(xe)){Er=!0;break t}var $i=Qr*Oi;wi[Xr]+=$i*xe}}}if(!Er)pi.push(wi[0],wi[1],Fn[0],Fn[1],wi[2]),je+=1;else{if(Ie>0){for(var tn=0;tn<5;++tn)pi.pop();je-=1}continue e}}}In.push(je)}this._contourOffsets[$r]=ji,this._contourCounts[$r]=In}var fn=f.mallocFloat(pi.length);for(ut=0;ut<pi.length;++ut)fn[ut]=pi[ut];this._contourBuffer.update(fn),f.freeFloat(fn)}},j.dispose=function(){this._shader.dispose(),this._vao.dispose(),this._coordinateBuffer.dispose(),this._colorMap.dispose(),this._contourBuffer.dispose(),this._contourVAO.dispose(),this._contourShader.dispose(),this._contourPickShader.dispose(),this._dynamicBuffer.dispose(),this._dynamicVAO.dispose();for(var Pe=0;Pe<3;++Pe)f.freeFloat(this._field[Pe].data)},j.highlight=function(Pe){var ge;if(!Pe){this._dynamicCounts=[0,0,0],this.dyanamicLevel=[NaN,NaN,NaN],this.highlightLevel=[-1,-1,-1];return}for(ge=0;ge<3;++ge)this.enableHighlight[ge]?this.highlightLevel[ge]=Pe.level[ge]:this.highlightLevel[ge]=-1;var Re;for(this.snapToData?Re=Pe.dataCoordinate:Re=Pe.position,ge=0;ge<3;++ge)Re[ge]-=this.objectOffset[ge];if(!((!this.enableDynamic[0]||Re[0]===this.dynamicLevel[0])&&(!this.enableDynamic[1]||Re[1]===this.dynamicLevel[1])&&(!this.enableDynamic[2]||Re[2]===this.dynamicLevel[2]))){for(var ce=0,Ze=this.shape,ut=f.mallocFloat(12*Ze[0]*Ze[1]),pt=0;pt<3;++pt){if(!this.enableDynamic[pt]){this.dynamicLevel[pt]=NaN,this._dynamicCounts[pt]=0;continue}this.dynamicLevel[pt]=Re[pt];var Zt=(pt+1)%3,st=(pt+2)%3,lt=this._field[pt],Gt=this._field[Zt],Nt=this._field[st],Jt=b(lt,Re[pt]),sr=Jt.cells,wr=Jt.positions;for(this._dynamicOffsets[pt]=ce,ge=0;ge<sr.length;++ge)for(var cr=sr[ge],$e=0;$e<2;++$e){var St=wr[cr[$e]],Qt=+St[0],Vt=Qt|0,_t=Math.min(Vt+1,Ze[0])|0,It=Qt-Vt,mt=1-It,er=+St[1],lr=er|0,Tr=Math.min(lr+1,Ze[1])|0,Lr=er-lr,ti=1-Lr,Br=mt*ti,Vr=mt*Lr,dt=It*ti,Ge=It*Lr,Je=Br*Gt.get(Vt,lr)+Vr*Gt.get(Vt,Tr)+dt*Gt.get(_t,lr)+Ge*Gt.get(_t,Tr),je=Br*Nt.get(Vt,lr)+Vr*Nt.get(Vt,Tr)+dt*Nt.get(_t,lr)+Ge*Nt.get(_t,Tr);if(isNaN(Je)||isNaN(je)){$e&&(ce-=1);break}ut[2*ce+0]=Je,ut[2*ce+1]=je,ce+=1}this._dynamicCounts[pt]=ce-this._dynamicOffsets[pt]}this._dynamicBuffer.update(ut.subarray(0,2*ce)),f.freeFloat(ut)}};function Fe(Pe){var ge=Pe.gl,Re=x(ge),ce=M(ge),Ze=C(ge),ut=g(ge),pt=l(ge),Zt=u(ge,[{buffer:pt,size:4,stride:P,offset:0},{buffer:pt,size:3,stride:P,offset:16},{buffer:pt,size:3,stride:P,offset:28}]),st=l(ge),lt=u(ge,[{buffer:st,size:4,stride:20,offset:0},{buffer:st,size:1,stride:20,offset:16}]),Gt=l(ge),Nt=u(ge,[{buffer:Gt,size:2,type:ge.FLOAT}]),Jt=c(ge,1,G,ge.RGBA,ge.UNSIGNED_BYTE);Jt.minFilter=ge.LINEAR,Jt.magFilter=ge.LINEAR;var sr=new Z(ge,[0,0],[[0,0,0],[0,0,0]],Re,ce,pt,Zt,Jt,Ze,ut,st,lt,Gt,Nt,[0,0,0]),wr={levels:[[],[],[]]};for(var cr in Pe)wr[cr]=Pe[cr];return wr.colormap=wr.colormap||"jet",sr.update(wr),sr}},9507:function(i){i.exports=!0},9618:function(i,a,o){var s=o(7163),l=typeof Float64Array!="undefined";function u(b,p){return b[0]-p[0]}function c(){var b=this.stride,p=new Array(b.length),k;for(k=0;k<p.length;++k)p[k]=[Math.abs(b[k]),k];p.sort(u);var E=new Array(p.length);for(k=0;k<E.length;++k)E[k]=p[k][1];return E}var f={T:function(b){function p(E){this.data=E}var k=p.prototype;return k.dtype=b,k.index=function(){return-1},k.size=0,k.dimension=-1,k.shape=k.stride=k.order=[],k.lo=k.hi=k.transpose=k.step=function(){return new p(this.data)},k.get=k.set=function(){},k.pick=function(){return null},function(S){return new p(S)}},0:function(b,p){function k(S,L){this.data=S,this.offset=L}var E=k.prototype;return E.dtype=b,E.index=function(){return this.offset},E.dimension=0,E.size=1,E.shape=E.stride=E.order=[],E.lo=E.hi=E.transpose=E.step=function(){return new k(this.data,this.offset)},E.pick=function(){return p(this.data)},E.valueOf=E.get=function(){return b==="generic"?this.data.get(this.offset):this.data[this.offset]},E.set=function(L){return b==="generic"?this.data.set(this.offset,L):this.data[this.offset]=L},function(L,x,C,M){return new k(L,M)}},1:function(b,p,k){function E(L,x,C,M){this.data=L,this.shape=[x],this.stride=[C],this.offset=M|0}var S=E.prototype;return S.dtype=b,S.dimension=1,Object.defineProperty(S,"size",{get:function(){return this.shape[0]}}),S.order=[0],S.set=function(x,C){return b==="generic"?this.data.set(this.offset+this.stride[0]*x,C):this.data[this.offset+this.stride[0]*x]=C},S.get=function(x){return b==="generic"?this.data.get(this.offset+this.stride[0]*x):this.data[this.offset+this.stride[0]*x]},S.index=function(x){return this.offset+this.stride[0]*x},S.hi=function(x){return new E(this.data,typeof x!="number"||x<0?this.shape[0]:x|0,this.stride[0],this.offset)},S.lo=function(x){var C=this.offset,M=0,g=this.shape[0],P=this.stride[0];return typeof x=="number"&&x>=0&&(M=x|0,C+=P*M,g-=M),new E(this.data,g,P,C)},S.step=function(x){var C=this.shape[0],M=this.stride[0],g=this.offset,P=0,T=Math.ceil;return typeof x=="number"&&(P=x|0,P<0?(g+=M*(C-1),C=T(-C/P)):C=T(C/P),M*=P),new E(this.data,C,M,g)},S.transpose=function(x){x=x===void 0?0:x|0;var C=this.shape,M=this.stride;return new E(this.data,C[x],M[x],this.offset)},S.pick=function(x){var C=[],M=[],g=this.offset;typeof x=="number"&&x>=0?g=g+this.stride[0]*x|0:(C.push(this.shape[0]),M.push(this.stride[0]));var P=p[C.length+1];return P(this.data,C,M,g)},function(x,C,M,g){return new E(x,C[0],M[0],g)}},2:function(b,p,k){function E(L,x,C,M,g,P){this.data=L,this.shape=[x,C],this.stride=[M,g],this.offset=P|0}var S=E.prototype;return S.dtype=b,S.dimension=2,Object.defineProperty(S,"size",{get:function(){return this.shape[0]*this.shape[1]}}),Object.defineProperty(S,"order",{get:function(){return Math.abs(this.stride[0])>Math.abs(this.stride[1])?[1,0]:[0,1]}}),S.set=function(x,C,M){return b==="generic"?this.data.set(this.offset+this.stride[0]*x+this.stride[1]*C,M):this.data[this.offset+this.stride[0]*x+this.stride[1]*C]=M},S.get=function(x,C){return b==="generic"?this.data.get(this.offset+this.stride[0]*x+this.stride[1]*C):this.data[this.offset+this.stride[0]*x+this.stride[1]*C]},S.index=function(x,C){return this.offset+this.stride[0]*x+this.stride[1]*C},S.hi=function(x,C){return new E(this.data,typeof x!="number"||x<0?this.shape[0]:x|0,typeof C!="number"||C<0?this.shape[1]:C|0,this.stride[0],this.stride[1],this.offset)},S.lo=function(x,C){var M=this.offset,g=0,P=this.shape[0],T=this.shape[1],z=this.stride[0],O=this.stride[1];return typeof x=="number"&&x>=0&&(g=x|0,M+=z*g,P-=g),typeof C=="number"&&C>=0&&(g=C|0,M+=O*g,T-=g),new E(this.data,P,T,z,O,M)},S.step=function(x,C){var M=this.shape[0],g=this.shape[1],P=this.stride[0],T=this.stride[1],z=this.offset,O=0,V=Math.ceil;return typeof x=="number"&&(O=x|0,O<0?(z+=P*(M-1),M=V(-M/O)):M=V(M/O),P*=O),typeof C=="number"&&(O=C|0,O<0?(z+=T*(g-1),g=V(-g/O)):g=V(g/O),T*=O),new E(this.data,M,g,P,T,z)},S.transpose=function(x,C){x=x===void 0?0:x|0,C=C===void 0?1:C|0;var M=this.shape,g=this.stride;return new E(this.data,M[x],M[C],g[x],g[C],this.offset)},S.pick=function(x,C){var M=[],g=[],P=this.offset;typeof x=="number"&&x>=0?P=P+this.stride[0]*x|0:(M.push(this.shape[0]),g.push(this.stride[0])),typeof C=="number"&&C>=0?P=P+this.stride[1]*C|0:(M.push(this.shape[1]),g.push(this.stride[1]));var T=p[M.length+1];return T(this.data,M,g,P)},function(x,C,M,g){return new E(x,C[0],C[1],M[0],M[1],g)}},3:function(b,p,k){function E(L,x,C,M,g,P,T,z){this.data=L,this.shape=[x,C,M],this.stride=[g,P,T],this.offset=z|0}var S=E.prototype;return S.dtype=b,S.dimension=3,Object.defineProperty(S,"size",{get:function(){return this.shape[0]*this.shape[1]*this.shape[2]}}),Object.defineProperty(S,"order",{get:function(){var x=Math.abs(this.stride[0]),C=Math.abs(this.stride[1]),M=Math.abs(this.stride[2]);return x>C?C>M?[2,1,0]:x>M?[1,2,0]:[1,0,2]:x>M?[2,0,1]:M>C?[0,1,2]:[0,2,1]}}),S.set=function(x,C,M,g){return b==="generic"?this.data.set(this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M,g):this.data[this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M]=g},S.get=function(x,C,M){return b==="generic"?this.data.get(this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M):this.data[this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M]},S.index=function(x,C,M){return this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M},S.hi=function(x,C,M){return new E(this.data,typeof x!="number"||x<0?this.shape[0]:x|0,typeof C!="number"||C<0?this.shape[1]:C|0,typeof M!="number"||M<0?this.shape[2]:M|0,this.stride[0],this.stride[1],this.stride[2],this.offset)},S.lo=function(x,C,M){var g=this.offset,P=0,T=this.shape[0],z=this.shape[1],O=this.shape[2],V=this.stride[0],G=this.stride[1],Z=this.stride[2];return typeof x=="number"&&x>=0&&(P=x|0,g+=V*P,T-=P),typeof C=="number"&&C>=0&&(P=C|0,g+=G*P,z-=P),typeof M=="number"&&M>=0&&(P=M|0,g+=Z*P,O-=P),new E(this.data,T,z,O,V,G,Z,g)},S.step=function(x,C,M){var g=this.shape[0],P=this.shape[1],T=this.shape[2],z=this.stride[0],O=this.stride[1],V=this.stride[2],G=this.offset,Z=0,j=Math.ceil;return typeof x=="number"&&(Z=x|0,Z<0?(G+=z*(g-1),g=j(-g/Z)):g=j(g/Z),z*=Z),typeof C=="number"&&(Z=C|0,Z<0?(G+=O*(P-1),P=j(-P/Z)):P=j(P/Z),O*=Z),typeof M=="number"&&(Z=M|0,Z<0?(G+=V*(T-1),T=j(-T/Z)):T=j(T/Z),V*=Z),new E(this.data,g,P,T,z,O,V,G)},S.transpose=function(x,C,M){x=x===void 0?0:x|0,C=C===void 0?1:C|0,M=M===void 0?2:M|0;var g=this.shape,P=this.stride;return new E(this.data,g[x],g[C],g[M],P[x],P[C],P[M],this.offset)},S.pick=function(x,C,M){var g=[],P=[],T=this.offset;typeof x=="number"&&x>=0?T=T+this.stride[0]*x|0:(g.push(this.shape[0]),P.push(this.stride[0])),typeof C=="number"&&C>=0?T=T+this.stride[1]*C|0:(g.push(this.shape[1]),P.push(this.stride[1])),typeof M=="number"&&M>=0?T=T+this.stride[2]*M|0:(g.push(this.shape[2]),P.push(this.stride[2]));var z=p[g.length+1];return z(this.data,g,P,T)},function(x,C,M,g){return new E(x,C[0],C[1],C[2],M[0],M[1],M[2],g)}},4:function(b,p,k){function E(L,x,C,M,g,P,T,z,O,V){this.data=L,this.shape=[x,C,M,g],this.stride=[P,T,z,O],this.offset=V|0}var S=E.prototype;return S.dtype=b,S.dimension=4,Object.defineProperty(S,"size",{get:function(){return this.shape[0]*this.shape[1]*this.shape[2]*this.shape[3]}}),Object.defineProperty(S,"order",{get:k}),S.set=function(x,C,M,g,P){return b==="generic"?this.data.set(this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M+this.stride[3]*g,P):this.data[this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M+this.stride[3]*g]=P},S.get=function(x,C,M,g){return b==="generic"?this.data.get(this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M+this.stride[3]*g):this.data[this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M+this.stride[3]*g]},S.index=function(x,C,M,g){return this.offset+this.stride[0]*x+this.stride[1]*C+this.stride[2]*M+this.stride[3]*g},S.hi=function(x,C,M,g){return new E(this.data,typeof x!="number"||x<0?this.shape[0]:x|0,typeof C!="number"||C<0?this.shape[1]:C|0,typeof M!="number"||M<0?this.shape[2]:M|0,typeof g!="number"||g<0?this.shape[3]:g|0,this.stride[0],this.stride[1],this.stride[2],this.stride[3],this.offset)},S.lo=function(x,C,M,g){var P=this.offset,T=0,z=this.shape[0],O=this.shape[1],V=this.shape[2],G=this.shape[3],Z=this.stride[0],j=this.stride[1],N=this.stride[2],H=this.stride[3];return typeof x=="number"&&x>=0&&(T=x|0,P+=Z*T,z-=T),typeof C=="number"&&C>=0&&(T=C|0,P+=j*T,O-=T),typeof M=="number"&&M>=0&&(T=M|0,P+=N*T,V-=T),typeof g=="number"&&g>=0&&(T=g|0,P+=H*T,G-=T),new E(this.data,z,O,V,G,Z,j,N,H,P)},S.step=function(x,C,M,g){var P=this.shape[0],T=this.shape[1],z=this.shape[2],O=this.shape[3],V=this.stride[0],G=this.stride[1],Z=this.stride[2],j=this.stride[3],N=this.offset,H=0,te=Math.ceil;return typeof x=="number"&&(H=x|0,H<0?(N+=V*(P-1),P=te(-P/H)):P=te(P/H),V*=H),typeof C=="number"&&(H=C|0,H<0?(N+=G*(T-1),T=te(-T/H)):T=te(T/H),G*=H),typeof M=="number"&&(H=M|0,H<0?(N+=Z*(z-1),z=te(-z/H)):z=te(z/H),Z*=H),typeof g=="number"&&(H=g|0,H<0?(N+=j*(O-1),O=te(-O/H)):O=te(O/H),j*=H),new E(this.data,P,T,z,O,V,G,Z,j,N)},S.transpose=function(x,C,M,g){x=x===void 0?0:x|0,C=C===void 0?1:C|0,M=M===void 0?2:M|0,g=g===void 0?3:g|0;var P=this.shape,T=this.stride;return new E(this.data,P[x],P[C],P[M],P[g],T[x],T[C],T[M],T[g],this.offset)},S.pick=function(x,C,M,g){var P=[],T=[],z=this.offset;typeof x=="number"&&x>=0?z=z+this.stride[0]*x|0:(P.push(this.shape[0]),T.push(this.stride[0])),typeof C=="number"&&C>=0?z=z+this.stride[1]*C|0:(P.push(this.shape[1]),T.push(this.stride[1])),typeof M=="number"&&M>=0?z=z+this.stride[2]*M|0:(P.push(this.shape[2]),T.push(this.stride[2])),typeof g=="number"&&g>=0?z=z+this.stride[3]*g|0:(P.push(this.shape[3]),T.push(this.stride[3]));var O=p[P.length+1];return O(this.data,P,T,z)},function(x,C,M,g){return new E(x,C[0],C[1],C[2],C[3],M[0],M[1],M[2],M[3],g)}},5:function(p,k,E){function S(x,C,M,g,P,T,z,O,V,G,Z,j){this.data=x,this.shape=[C,M,g,P,T],this.stride=[z,O,V,G,Z],this.offset=j|0}var L=S.prototype;return L.dtype=p,L.dimension=5,Object.defineProperty(L,"size",{get:function(){return this.shape[0]*this.shape[1]*this.shape[2]*this.shape[3]*this.shape[4]}}),Object.defineProperty(L,"order",{get:E}),L.set=function(C,M,g,P,T,z){return p==="generic"?this.data.set(this.offset+this.stride[0]*C+this.stride[1]*M+this.stride[2]*g+this.stride[3]*P+this.stride[4]*T,z):this.data[this.offset+this.stride[0]*C+this.stride[1]*M+this.stride[2]*g+this.stride[3]*P+this.stride[4]*T]=z},L.get=function(C,M,g,P,T){return p==="generic"?this.data.get(this.offset+this.stride[0]*C+this.stride[1]*M+this.stride[2]*g+this.stride[3]*P+this.stride[4]*T):this.data[this.offset+this.stride[0]*C+this.stride[1]*M+this.stride[2]*g+this.stride[3]*P+this.stride[4]*T]},L.index=function(C,M,g,P,T){return this.offset+this.stride[0]*C+this.stride[1]*M+this.stride[2]*g+this.stride[3]*P+this.stride[4]*T},L.hi=function(C,M,g,P,T){return new S(this.data,typeof C!="number"||C<0?this.shape[0]:C|0,typeof M!="number"||M<0?this.shape[1]:M|0,typeof g!="number"||g<0?this.shape[2]:g|0,typeof P!="number"||P<0?this.shape[3]:P|0,typeof T!="number"||T<0?this.shape[4]:T|0,this.stride[0],this.stride[1],this.stride[2],this.stride[3],this.stride[4],this.offset)},L.lo=function(C,M,g,P,T){var z=this.offset,O=0,V=this.shape[0],G=this.shape[1],Z=this.shape[2],j=this.shape[3],N=this.shape[4],H=this.stride[0],te=this.stride[1],oe=this.stride[2],_e=this.stride[3],Ee=this.stride[4];return typeof C=="number"&&C>=0&&(O=C|0,z+=H*O,V-=O),typeof M=="number"&&M>=0&&(O=M|0,z+=te*O,G-=O),typeof g=="number"&&g>=0&&(O=g|0,z+=oe*O,Z-=O),typeof P=="number"&&P>=0&&(O=P|0,z+=_e*O,j-=O),typeof T=="number"&&T>=0&&(O=T|0,z+=Ee*O,N-=O),new S(this.data,V,G,Z,j,N,H,te,oe,_e,Ee,z)},L.step=function(C,M,g,P,T){var z=this.shape[0],O=this.shape[1],V=this.shape[2],G=this.shape[3],Z=this.shape[4],j=this.stride[0],N=this.stride[1],H=this.stride[2],te=this.stride[3],oe=this.stride[4],_e=this.offset,Ee=0,Ce=Math.ceil;return typeof C=="number"&&(Ee=C|0,Ee<0?(_e+=j*(z-1),z=Ce(-z/Ee)):z=Ce(z/Ee),j*=Ee),typeof M=="number"&&(Ee=M|0,Ee<0?(_e+=N*(O-1),O=Ce(-O/Ee)):O=Ce(O/Ee),N*=Ee),typeof g=="number"&&(Ee=g|0,Ee<0?(_e+=H*(V-1),V=Ce(-V/Ee)):V=Ce(V/Ee),H*=Ee),typeof P=="number"&&(Ee=P|0,Ee<0?(_e+=te*(G-1),G=Ce(-G/Ee)):G=Ce(G/Ee),te*=Ee),typeof T=="number"&&(Ee=T|0,Ee<0?(_e+=oe*(Z-1),Z=Ce(-Z/Ee)):Z=Ce(Z/Ee),oe*=Ee),new S(this.data,z,O,V,G,Z,j,N,H,te,oe,_e)},L.transpose=function(C,M,g,P,T){C=C===void 0?0:C|0,M=M===void 0?1:M|0,g=g===void 0?2:g|0,P=P===void 0?3:P|0,T=T===void 0?4:T|0;var z=this.shape,O=this.stride;return new S(this.data,z[C],z[M],z[g],z[P],z[T],O[C],O[M],O[g],O[P],O[T],this.offset)},L.pick=function(C,M,g,P,T){var z=[],O=[],V=this.offset;typeof C=="number"&&C>=0?V=V+this.stride[0]*C|0:(z.push(this.shape[0]),O.push(this.stride[0])),typeof M=="number"&&M>=0?V=V+this.stride[1]*M|0:(z.push(this.shape[1]),O.push(this.stride[1])),typeof g=="number"&&g>=0?V=V+this.stride[2]*g|0:(z.push(this.shape[2]),O.push(this.stride[2])),typeof P=="number"&&P>=0?V=V+this.stride[3]*P|0:(z.push(this.shape[3]),O.push(this.stride[3])),typeof T=="number"&&T>=0?V=V+this.stride[4]*T|0:(z.push(this.shape[4]),O.push(this.stride[4]));var G=k[z.length+1];return G(this.data,z,O,V)},function(C,M,g,P){return new S(C,M[0],M[1],M[2],M[3],M[4],g[0],g[1],g[2],g[3],g[4],P)}}};function h(b,p){var k=p===-1?"T":String(p),E=f[k];return p===-1?E(b):p===0?E(b,v[b][0]):E(b,v[b],c)}function d(b){if(s(b))return"buffer";if(l)switch(Object.prototype.toString.call(b)){case"[object Float64Array]":return"float64";case"[object Float32Array]":return"float32";case"[object Int8Array]":return"int8";case"[object Int16Array]":return"int16";case"[object Int32Array]":return"int32";case"[object Uint8ClampedArray]":return"uint8_clamped";case"[object Uint8Array]":return"uint8";case"[object Uint16Array]":return"uint16";case"[object Uint32Array]":return"uint32";case"[object BigInt64Array]":return"bigint64";case"[object BigUint64Array]":return"biguint64"}return Array.isArray(b)?"array":"generic"}var v={generic:[],buffer:[],array:[],float32:[],float64:[],int8:[],int16:[],int32:[],uint8_clamped:[],uint8:[],uint16:[],uint32:[],bigint64:[],biguint64:[]};function _(b,p,k,E){if(b===void 0){var g=v.array[0];return g([])}else typeof b=="number"&&(b=[b]);p===void 0&&(p=[b.length]);var S=p.length;if(k===void 0){k=new Array(S);for(var L=S-1,x=1;L>=0;--L)k[L]=x,x*=p[L]}if(E===void 0){E=0;for(var L=0;L<S;++L)k[L]<0&&(E-=(p[L]-1)*k[L])}for(var C=d(b),M=v[C];M.length<=S+1;)M.push(h(C,M.length-1));var g=M[S+1];return g(b,p,k,E)}i.exports=_},9921:function(i){i.exports=a;function a(o){var s=o[0],l=o[1],u=o[2],c=o[3],f=o[4],h=o[5],d=o[6],v=o[7],_=o[8],b=o[9],p=o[10],k=o[11],E=o[12],S=o[13],L=o[14],x=o[15],C=s*h-l*f,M=s*d-u*f,g=s*v-c*f,P=l*d-u*h,T=l*v-c*h,z=u*v-c*d,O=_*S-b*E,V=_*L-p*E,G=_*x-k*E,Z=b*L-p*S,j=b*x-k*S,N=p*x-k*L;return C*N-M*j+g*Z+P*G-T*V+z*O}},9922:function(i,a,o){i.exports=l;var s=o(2613);function l(u,c){var f=u[0],h=u[1],d=u[2],v=c[0],_=c[1],b=c[2];return Math.abs(f-v)<=s*Math.max(1,Math.abs(f),Math.abs(v))&&Math.abs(h-_)<=s*Math.max(1,Math.abs(h),Math.abs(_))&&Math.abs(d-b)<=s*Math.max(1,Math.abs(d),Math.abs(b))}},9970:function(i,a,o){i.exports={create:o(7536),clone:o(2335),fromValues:o(3390),copy:o(2933),set:o(4578),add:o(4361),subtract:o(6860),multiply:o(3576),divide:o(1373),min:o(2334),max:o(160),scale:o(9288),scaleAndAdd:o(4844),distance:o(4691),squaredDistance:o(7960),length:o(6808),squaredLength:o(483),negate:o(1498),inverse:o(4494),normalize:o(5177),dot:o(3750),lerp:o(2573),random:o(9131),transformMat4:o(5352),transformQuat:o(4041)}},9977:function(i,a,o){"use strict";i.exports=p;var s=o(9215),l=o(6582),u=o(7399),c=o(7608),f=o(4081);function h(k,E,S){return Math.sqrt(Math.pow(k,2)+Math.pow(E,2)+Math.pow(S,2))}function d(k,E,S,L){return Math.sqrt(Math.pow(k,2)+Math.pow(E,2)+Math.pow(S,2)+Math.pow(L,2))}function v(k,E){var S=E[0],L=E[1],x=E[2],C=E[3],M=d(S,L,x,C);M>1e-6?(k[0]=S/M,k[1]=L/M,k[2]=x/M,k[3]=C/M):(k[0]=k[1]=k[2]=0,k[3]=1)}function _(k,E,S){this.radius=s([S]),this.center=s(E),this.rotation=s(k),this.computedRadius=this.radius.curve(0),this.computedCenter=this.center.curve(0),this.computedRotation=this.rotation.curve(0),this.computedUp=[.1,0,0],this.computedEye=[.1,0,0],this.computedMatrix=[.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],this.recalcMatrix(0)}var b=_.prototype;b.lastT=function(){return Math.max(this.radius.lastT(),this.center.lastT(),this.rotation.lastT())},b.recalcMatrix=function(k){this.radius.curve(k),this.center.curve(k),this.rotation.curve(k);var E=this.computedRotation;v(E,E);var S=this.computedMatrix;u(S,E);var L=this.computedCenter,x=this.computedEye,C=this.computedUp,M=Math.exp(this.computedRadius[0]);x[0]=L[0]+M*S[2],x[1]=L[1]+M*S[6],x[2]=L[2]+M*S[10],C[0]=S[1],C[1]=S[5],C[2]=S[9];for(var g=0;g<3;++g){for(var P=0,T=0;T<3;++T)P+=S[g+4*T]*x[T];S[12+g]=-P}},b.getMatrix=function(k,E){this.recalcMatrix(k);var S=this.computedMatrix;if(E){for(var L=0;L<16;++L)E[L]=S[L];return E}return S},b.idle=function(k){this.center.idle(k),this.radius.idle(k),this.rotation.idle(k)},b.flush=function(k){this.center.flush(k),this.radius.flush(k),this.rotation.flush(k)},b.pan=function(k,E,S,L){E=E||0,S=S||0,L=L||0,this.recalcMatrix(k);var x=this.computedMatrix,C=x[1],M=x[5],g=x[9],P=h(C,M,g);C/=P,M/=P,g/=P;var T=x[0],z=x[4],O=x[8],V=T*C+z*M+O*g;T-=C*V,z-=M*V,O-=g*V;var G=h(T,z,O);T/=G,z/=G,O/=G;var Z=x[2],j=x[6],N=x[10],H=Z*C+j*M+N*g,te=Z*T+j*z+N*O;Z-=H*C+te*T,j-=H*M+te*z,N-=H*g+te*O;var oe=h(Z,j,N);Z/=oe,j/=oe,N/=oe;var _e=T*E+C*S,Ee=z*E+M*S,Ce=O*E+g*S;this.center.move(k,_e,Ee,Ce);var me=Math.exp(this.computedRadius[0]);me=Math.max(1e-4,me+L),this.radius.set(k,Math.log(me))},b.rotate=function(k,E,S,L){this.recalcMatrix(k),E=E||0,S=S||0;var x=this.computedMatrix,C=x[0],M=x[4],g=x[8],P=x[1],T=x[5],z=x[9],O=x[2],V=x[6],G=x[10],Z=E*C+S*P,j=E*M+S*T,N=E*g+S*z,H=-(V*N-G*j),te=-(G*Z-O*N),oe=-(O*j-V*Z),_e=Math.sqrt(Math.max(0,1-Math.pow(H,2)-Math.pow(te,2)-Math.pow(oe,2))),Ee=d(H,te,oe,_e);Ee>1e-6?(H/=Ee,te/=Ee,oe/=Ee,_e/=Ee):(H=te=oe=0,_e=1);var Ce=this.computedRotation,me=Ce[0],ie=Ce[1],Se=Ce[2],Le=Ce[3],Ae=me*_e+Le*H+ie*oe-Se*te,Fe=ie*_e+Le*te+Se*H-me*oe,Pe=Se*_e+Le*oe+me*te-ie*H,ge=Le*_e-me*H-ie*te-Se*oe;if(L){H=O,te=V,oe=G;var Re=Math.sin(L)/h(H,te,oe);H*=Re,te*=Re,oe*=Re,_e=Math.cos(E),Ae=Ae*_e+ge*H+Fe*oe-Pe*te,Fe=Fe*_e+ge*te+Pe*H-Ae*oe,Pe=Pe*_e+ge*oe+Ae*te-Fe*H,ge=ge*_e-Ae*H-Fe*te-Pe*oe}var ce=d(Ae,Fe,Pe,ge);ce>1e-6?(Ae/=ce,Fe/=ce,Pe/=ce,ge/=ce):(Ae=Fe=Pe=0,ge=1),this.rotation.set(k,Ae,Fe,Pe,ge)},b.lookAt=function(k,E,S,L){this.recalcMatrix(k),S=S||this.computedCenter,E=E||this.computedEye,L=L||this.computedUp;var x=this.computedMatrix;l(x,E,S,L);var C=this.computedRotation;f(C,x[0],x[1],x[2],x[4],x[5],x[6],x[8],x[9],x[10]),v(C,C),this.rotation.set(k,C[0],C[1],C[2],C[3]);for(var M=0,g=0;g<3;++g)M+=Math.pow(S[g]-E[g],2);this.radius.set(k,.5*Math.log(Math.max(M,1e-6))),this.center.set(k,S[0],S[1],S[2])},b.translate=function(k,E,S,L){this.center.move(k,E||0,S||0,L||0)},b.setMatrix=function(k,E){var S=this.computedRotation;f(S,E[0],E[1],E[2],E[4],E[5],E[6],E[8],E[9],E[10]),v(S,S),this.rotation.set(k,S[0],S[1],S[2],S[3]);var L=this.computedMatrix;c(L,E);var x=L[15];if(Math.abs(x)>1e-6){var C=L[12]/x,M=L[13]/x,g=L[14]/x;this.recalcMatrix(k);var P=Math.exp(this.computedRadius[0]);this.center.set(k,C-L[2]*P,M-L[6]*P,g-L[10]*P),this.radius.idle(k)}else this.center.idle(k),this.radius.idle(k)},b.setDistance=function(k,E){E>0&&this.radius.set(k,Math.log(E))},b.setDistanceLimits=function(k,E){k>0?k=Math.log(k):k=-1/0,E>0?E=Math.log(E):E=1/0,E=Math.max(E,k),this.radius.bounds[0][0]=k,this.radius.bounds[1][0]=E},b.getDistanceLimits=function(k){var E=this.radius.bounds;return k?(k[0]=Math.exp(E[0][0]),k[1]=Math.exp(E[1][0]),k):[Math.exp(E[0][0]),Math.exp(E[1][0])]},b.toJSON=function(){return this.recalcMatrix(this.lastT()),{center:this.computedCenter.slice(),rotation:this.computedRotation.slice(),distance:Math.log(this.computedRadius[0]),zoomMin:this.radius.bounds[0][0],zoomMax:this.radius.bounds[1][0]}},b.fromJSON=function(k){var E=this.lastT(),S=k.center;S&&this.center.set(E,S[0],S[1],S[2]);var L=k.rotation;L&&this.rotation.set(E,L[0],L[1],L[2],L[3]);var x=k.distance;x&&x>0&&this.radius.set(E,Math.log(x)),this.setDistanceLimits(k.zoomMin,k.zoomMax)};function p(k){k=k||{};var E=k.center||[0,0,0],S=k.rotation||[0,0,0,1],L=k.radius||1;E=[].slice.call(E,0,3),S=[].slice.call(S,0,4),v(S,S);var x=new _(S,E,Math.log(L));return x.setDistanceLimits(k.zoomMin,k.zoomMax),("eye"in k||"up"in k)&&x.lookAt(0,k.eye,k.center,k.up),x}},9994:function(i,a,o){"use strict";var s=o(9618),l=o(8277);i.exports=function(c,f){for(var h=[],d=c,v=1;Array.isArray(d);)h.push(d.length),v*=d.length,d=d[0];return h.length===0?s():(f||(f=s(new Float64Array(v),h)),l(f,c),f)}}},t={};function r(i){var a=t[i];if(a!==void 0)return a.exports;var o=t[i]={id:i,loaded:!1,exports:{}};return e[i].call(o.exports,o,o.exports,r),o.loaded=!0,o.exports}(function(){r.g=function(){if(typeof globalThis=="object")return globalThis;try{return this||new Function("return this")()}catch(i){if(typeof window=="object")return window}}()})(),function(){r.nmd=function(i){return i.paths=[],i.children||(i.children=[]),i}}();var n=r(1964);yPe.exports=n})()});var gX=ye((V0r,_Pe)=>{"use strict";_Pe.exports={aliceblue:[240,248,255],antiquewhite:[250,235,215],aqua:[0,255,255],aquamarine:[127,255,212],azure:[240,255,255],beige:[245,245,220],bisque:[255,228,196],black:[0,0,0],blanchedalmond:[255,235,205],blue:[0,0,255],blueviolet:[138,43,226],brown:[165,42,42],burlywood:[222,184,135],cadetblue:[95,158,160],chartreuse:[127,255,0],chocolate:[210,105,30],coral:[255,127,80],cornflowerblue:[100,149,237],cornsilk:[255,248,220],crimson:[220,20,60],cyan:[0,255,255],darkblue:[0,0,139],darkcyan:[0,139,139],darkgoldenrod:[184,134,11],darkgray:[169,169,169],darkgreen:[0,100,0],darkgrey:[169,169,169],darkkhaki:[189,183,107],darkmagenta:[139,0,139],darkolivegreen:[85,107,47],darkorange:[255,140,0],darkorchid:[153,50,204],darkred:[139,0,0],darksalmon:[233,150,122],darkseagreen:[143,188,143],darkslateblue:[72,61,139],darkslategray:[47,79,79],darkslategrey:[47,79,79],darkturquoise:[0,206,209],darkviolet:[148,0,211],deeppink:[255,20,147],deepskyblue:[0,191,255],dimgray:[105,105,105],dimgrey:[105,105,105],dodgerblue:[30,144,255],firebrick:[178,34,34],floralwhite:[255,250,240],forestgreen:[34,139,34],fuchsia:[255,0,255],gainsboro:[220,220,220],ghostwhite:[248,248,255],gold:[255,215,0],goldenrod:[218,165,32],gray:[128,128,128],green:[0,128,0],greenyellow:[173,255,47],grey:[128,128,128],honeydew:[240,255,240],hotpink:[255,105,180],indianred:[205,92,92],indigo:[75,0,130],ivory:[255,255,240],khaki:[240,230,140],lavender:[230,230,250],lavenderblush:[255,240,245],lawngreen:[124,252,0],lemonchiffon:[255,250,205],lightblue:[173,216,230],lightcoral:[240,128,128],lightcyan:[224,255,255],lightgoldenrodyellow:[250,250,210],lightgray:[211,211,211],lightgreen:[144,238,144],lightgrey:[211,211,211],lightpink:[255,182,193],lightsalmon:[255,160,122],lightseagreen:[32,178,170],lightskyblue:[135,206,250],lightslategray:[119,136,153],lightslategrey:[119,136,153],lightsteelblue:[176,196,222],lightyellow:[255,255,224],lime:[0,255,0],limegreen:[50,205,50],linen:[250,240,230],magenta:[255,0,255],maroon:[128,0,0],mediumaquamarine:[102,205,170],mediumblue:[0,0,205],mediumorchid:[186,85,211],mediumpurple:[147,112,219],mediumseagreen:[60,179,113],mediumslateblue:[123,104,238],mediumspringgreen:[0,250,154],mediumturquoise:[72,209,204],mediumvioletred:[199,21,133],midnightblue:[25,25,112],mintcream:[245,255,250],mistyrose:[255,228,225],moccasin:[255,228,181],navajowhite:[255,222,173],navy:[0,0,128],oldlace:[253,245,230],olive:[128,128,0],olivedrab:[107,142,35],orange:[255,165,0],orangered:[255,69,0],orchid:[218,112,214],palegoldenrod:[238,232,170],palegreen:[152,251,152],paleturquoise:[175,238,238],palevioletred:[219,112,147],papayawhip:[255,239,213],peachpuff:[255,218,185],peru:[205,133,63],pink:[255,192,203],plum:[221,160,221],powderblue:[176,224,230],purple:[128,0,128],rebeccapurple:[102,51,153],red:[255,0,0],rosybrown:[188,143,143],royalblue:[65,105,225],saddlebrown:[139,69,19],salmon:[250,128,114],sandybrown:[244,164,96],seagreen:[46,139,87],seashell:[255,245,238],sienna:[160,82,45],silver:[192,192,192],skyblue:[135,206,235],slateblue:[106,90,205],slategray:[112,128,144],slategrey:[112,128,144],snow:[255,250,250],springgreen:[0,255,127],steelblue:[70,130,180],tan:[210,180,140],teal:[0,128,128],thistle:[216,191,216],tomato:[255,99,71],turquoise:[64,224,208],violet:[238,130,238],wheat:[245,222,179],white:[255,255,255],whitesmoke:[245,245,245],yellow:[255,255,0],yellowgreen:[154,205,50]}});var TPe=ye((G0r,wPe)=>{"use strict";var xPe=gX();wPe.exports=QPt;var bPe={red:0,orange:60,yellow:120,green:180,blue:240,purple:300};function QPt(e){var t,r=[],n=1,i;if(typeof e=="string")if(e=e.toLowerCase(),xPe[e])r=xPe[e].slice(),i="rgb";else if(e==="transparent")n=0,i="rgb",r=[0,0,0];else if(/^#[A-Fa-f0-9]+$/.test(e)){var a=e.slice(1),o=a.length,s=o<=4;n=1,s?(r=[parseInt(a[0]+a[0],16),parseInt(a[1]+a[1],16),parseInt(a[2]+a[2],16)],o===4&&(n=parseInt(a[3]+a[3],16)/255)):(r=[parseInt(a[0]+a[1],16),parseInt(a[2]+a[3],16),parseInt(a[4]+a[5],16)],o===8&&(n=parseInt(a[6]+a[7],16)/255)),r[0]||(r[0]=0),r[1]||(r[1]=0),r[2]||(r[2]=0),i="rgb"}else if(t=/^((?:rgb|hs[lvb]|hwb|cmyk?|xy[zy]|gray|lab|lchu?v?|[ly]uv|lms)a?)\s*\(([^\)]*)\)/.exec(e)){var l=t[1],u=l==="rgb",a=l.replace(/a$/,"");i=a;var o=a==="cmyk"?4:a==="gray"?1:3;r=t[2].trim().split(/\s*[,\/]\s*|\s+/).map(function(h,d){if(/%$/.test(h))return d===o?parseFloat(h)/100:a==="rgb"?parseFloat(h)*255/100:parseFloat(h);if(a[d]==="h"){if(/deg$/.test(h))return parseFloat(h);if(bPe[h]!==void 0)return bPe[h]}return parseFloat(h)}),l===a&&r.push(1),n=u||r[o]===void 0?1:r[o],r=r.slice(0,o)}else e.length>10&&/[0-9](?:\s|\/)/.test(e)&&(r=e.match(/([0-9]+)/g).map(function(c){return parseFloat(c)}),i=e.match(/([a-z])/ig).join("").toLowerCase());else isNaN(e)?Array.isArray(e)||e.length?(r=[e[0],e[1],e[2]],i="rgb",n=e.length===4?e[3]:1):e instanceof Object&&(e.r!=null||e.red!=null||e.R!=null?(i="rgb",r=[e.r||e.red||e.R||0,e.g||e.green||e.G||0,e.b||e.blue||e.B||0]):(i="hsl",r=[e.h||e.hue||e.H||0,e.s||e.saturation||e.S||0,e.l||e.lightness||e.L||e.b||e.brightness]),n=e.a||e.alpha||e.opacity||1,e.opacity!=null&&(n/=100)):(i="rgb",r=[e>>>16,(e&65280)>>>8,e&255]);return{space:i,values:r,alpha:n}}});var SPe=ye((H0r,APe)=>{"use strict";var eIt=TPe();APe.exports=function(t){Array.isArray(t)&&t.raw&&(t=String.raw.apply(null,arguments));var r,n,i,a=eIt(t);if(!a.space)return[];var o=[0,0,0],s=a.space[0]==="h"?[360,100,100]:[255,255,255];return r=Array(3),r[0]=Math.min(Math.max(a.values[0],o[0]),s[0]),r[1]=Math.min(Math.max(a.values[1],o[1]),s[1]),r[2]=Math.min(Math.max(a.values[2],o[2]),s[2]),a.space[0]==="h"&&(r=tIt(r)),r.push(Math.min(Math.max(a.alpha,0),1)),r};function tIt(e){var t=e[0]/360,r=e[1]/100,n=e[2]/100,i,a,o,s,l,u=0;if(r===0)return l=n*255,[l,l,l];for(a=n<.5?n*(1+r):n+r-n*r,i=2*n-a,s=[0,0,0];u<3;)o=t+1/3*-(u-1),o<0?o++:o>1&&o--,l=6*o<1?i+(a-i)*6*o:2*o<1?a:3*o<2?i+(a-i)*(2/3-o)*6:i,s[u++]=l*255;return s}});var ZE=ye((j0r,MPe)=>{MPe.exports=rIt;function rIt(e,t,r){return t<r?e<t?t:e>r?r:e:e<r?r:e>t?t:e}});var YD=ye((W0r,EPe)=>{EPe.exports=function(e){switch(e){case"int8":return Int8Array;case"int16":return Int16Array;case"int32":return Int32Array;case"uint8":return Uint8Array;case"uint16":return Uint16Array;case"uint32":return Uint32Array;case"float32":return Float32Array;case"float64":return Float64Array;case"array":return Array;case"uint8_clamped":return Uint8ClampedArray}}});var J_=ye((X0r,kPe)=>{"use strict";var iIt=SPe(),KD=ZE(),nIt=YD();kPe.exports=function(t,r){(r==="float"||!r)&&(r="array"),r==="uint"&&(r="uint8"),r==="uint_clamped"&&(r="uint8_clamped");var n=nIt(r),i=new n(4),a=r!=="uint8"&&r!=="uint8_clamped";return(!t.length||typeof t=="string")&&(t=iIt(t),t[0]/=255,t[1]/=255,t[2]/=255),aIt(t)?(i[0]=t[0],i[1]=t[1],i[2]=t[2],i[3]=t[3]!=null?t[3]:255,a&&(i[0]/=255,i[1]/=255,i[2]/=255,i[3]/=255),i):(a?(i[0]=t[0],i[1]=t[1],i[2]=t[2],i[3]=t[3]!=null?t[3]:1):(i[0]=KD(Math.floor(t[0]*255),0,255),i[1]=KD(Math.floor(t[1]*255),0,255),i[2]=KD(Math.floor(t[2]*255),0,255),i[3]=t[3]==null?255:KD(Math.floor(t[3]*255),0,255)),i)};function aIt(e){return!!(e instanceof Uint8Array||e instanceof Uint8ClampedArray||Array.isArray(e)&&(e[0]>1||e[0]===0)&&(e[1]>1||e[1]===0)&&(e[2]>1||e[2]===0)&&(!e[3]||e[3]>1))}});var Jy=ye((Z0r,CPe)=>{"use strict";var oIt=J_();function sIt(e){return e?oIt(e):[0,0,0,1]}CPe.exports=sIt});var $y=ye((Y0r,zPe)=>{"use strict";var DPe=Eo(),lIt=cd(),JD=J_(),$D=tc(),uIt=Lh().defaultLine,LPe=vv().isArrayOrTypedArray,mX=JD(uIt),FPe=1;function PPe(e,t){var r=e;return r[3]*=t,r}function IPe(e){if(DPe(e))return mX;var t=JD(e);return t.length?t:mX}function RPe(e){return DPe(e)?e:FPe}function cIt(e,t,r){var n=e.color;n&&n._inputArray&&(n=n._inputArray);var i=LPe(n),a=LPe(t),o=$D.extractOpts(e),s=[],l,u,c,f,h;if(o.colorscale!==void 0?l=$D.makeColorScaleFuncFromTrace(e):l=IPe,i?u=function(v,_){return v[_]===void 0?mX:JD(l(v[_]))}:u=IPe,a?c=function(v,_){return v[_]===void 0?FPe:RPe(v[_])}:c=RPe,i||a)for(var d=0;d<r;d++)f=u(n,d),h=c(t,d),s[d]=PPe(f,h);else s=PPe(JD(n),t);return s}function fIt(e){var t=$D.extractOpts(e),r=t.colorscale;return t.reversescale&&(r=$D.flipScale(t.colorscale)),r.map(function(n){var i=n[0],a=lIt(n[1]),o=a.toRgb();return{index:i,rgb:[o.r,o.g,o.b,o.a]}})}zPe.exports={formatColor:cIt,parseColorScale:fIt}});var yX=ye((K0r,OPe)=>{"use strict";OPe.exports={solid:[[],0],dot:[[.5,1],200],dash:[[.5,1],50],longdash:[[.5,1],10],dashdot:[[.5,.625,.875,1],50],longdashdot:[[.5,.7,.8,1],10]}});var QD=ye((J0r,qPe)=>{"use strict";qPe.exports={circle:"\u25CF","circle-open":"\u25CB",square:"\u25A0","square-open":"\u25A1",diamond:"\u25C6","diamond-open":"\u25C7",cross:"+",x:"\u274C"}});var NPe=ye(($0r,BPe)=>{"use strict";var hIt=qa();function _X(e,t,r,n){if(!t||!t.visible)return null;for(var i=hIt.getComponentMethod("errorbars","makeComputeError")(t),a=new Array(e.length),o=0;o<e.length;o++){var s=i(+e[o],o);if(n.type==="log"){var l=n.c2l(e[o]),u=e[o]-s[0],c=e[o]+s[1];if(a[o]=[(n.c2l(u,!0)-l)*r,(n.c2l(c,!0)-l)*r],u>0){var f=n.c2l(u);n._lowerLogErrorBound||(n._lowerLogErrorBound=f),n._lowerErrorBound=Math.min(n._lowerLogErrorBound,f)}}else a[o]=[-s[0]*r,s[1]*r]}return a}function dIt(e){for(var t=0;t<e.length;t++)if(e[t])return e[t].length;return 0}function vIt(e,t,r){var n=[_X(e.x,e.error_x,t[0],r.xaxis),_X(e.y,e.error_y,t[1],r.yaxis),_X(e.z,e.error_z,t[2],r.zaxis)],i=dIt(n);if(i===0)return null;for(var a=new Array(i),o=0;o<i;o++){for(var s=[[0,0,0],[0,0,0]],l=0;l<3;l++)if(n[l])for(var u=0;u<2;u++)s[u][l]=n[l][o][u];a[o]=s}return a}BPe.exports=vIt});var YPe=ye((Q0r,ZPe)=>{"use strict";var pIt=Od().gl_line3d,UPe=Od().gl_scatter3d,gIt=Od().gl_error3d,mIt=Od().gl_mesh3d,yIt=Od().delaunay_triangulate,Qy=Dr(),WPe=Jy(),eF=$y().formatColor,_It=k3(),xX=yX(),xIt=QD(),bIt=ho(),wIt=ip().appendArrayPointValue,TIt=NPe();function XPe(e,t){this.scene=e,this.uid=t,this.linePlot=null,this.scatterPlot=null,this.errorBars=null,this.textMarkers=null,this.delaunayMesh=null,this.color=null,this.mode="",this.dataPoints=[],this.axesBounds=[[-1/0,-1/0,-1/0],[1/0,1/0,1/0]],this.textLabels=null,this.data=null}var wX=XPe.prototype;wX.handlePick=function(e){if(e.object&&(e.object===this.linePlot||e.object===this.delaunayMesh||e.object===this.textMarkers||e.object===this.scatterPlot)){var t=e.index=e.data.index;return e.object.highlight&&e.object.highlight(null),this.scatterPlot&&(e.object=this.scatterPlot,this.scatterPlot.highlight(e.data)),e.textLabel="",this.textLabels&&(Qy.isArrayOrTypedArray(this.textLabels)?(this.textLabels[t]||this.textLabels[t]===0)&&(e.textLabel=this.textLabels[t]):e.textLabel=this.textLabels),e.traceCoordinate=[this.data.x[t],this.data.y[t],this.data.z[t]],!0}};function AIt(e,t,r){var n=(r+1)%3,i=(r+2)%3,a=[],o=[],s;for(s=0;s<e.length;++s){var l=e[s];isNaN(l[n])||!isFinite(l[n])||isNaN(l[i])||!isFinite(l[i])||(a.push([l[n],l[i]]),o.push(s))}var u=yIt(a);for(s=0;s<u.length;++s)for(var c=u[s],f=0;f<c.length;++f)c[f]=o[c[f]];return{positions:e,cells:u,meshColor:t}}function SIt(e){for(var t=[0,0,0],r=[[0,0,0],[0,0,0],[0,0,0]],n=[1,1,1],i=0;i<3;i++){var a=e[i];a&&a.copy_zstyle!==!1&&e[2].visible!==!1&&(a=e[2]),!(!a||!a.visible)&&(t[i]=a.width/2,r[i]=WPe(a.color),n[i]=a.thickness)}return{capSize:t,color:r,lineWidth:n}}function VPe(e){return e==null?0:e.indexOf("left")>-1?-1:e.indexOf("right")>-1?1:0}function GPe(e){return e==null?0:e.indexOf("top")>-1?-1:e.indexOf("bottom")>-1?1:0}function MIt(e){var t=0,r=0,n=[t,r];if(Array.isArray(e))for(var i=0;i<e.length;i++)n[i]=[t,r],e[i]&&(n[i][0]=VPe(e[i]),n[i][1]=GPe(e[i]));else n[0]=VPe(e),n[1]=GPe(e);return n}function EIt(e,t){return t(e*4)}function kIt(e){return xIt[e]}function bX(e,t,r,n,i){var a=null;if(Qy.isArrayOrTypedArray(e)){a=[];for(var o=0;o<t;o++)e[o]===void 0?a[o]=n:a[o]=r(e[o],i)}else a=r(e,Qy.identity);return a}function CIt(e,t){var r=[],n=e.fullSceneLayout,i=e.dataScale,a=n.xaxis,o=n.yaxis,s=n.zaxis,l=t.marker,u=t.line,c=t.x||[],f=t.y||[],h=t.z||[],d=c.length,v=t.xcalendar,_=t.ycalendar,b=t.zcalendar,p,k,E,S,L,x;for(L=0;L<d;L++)p=a.d2l(c[L],0,v)*i[0],k=o.d2l(f[L],0,_)*i[1],E=s.d2l(h[L],0,b)*i[2],r[L]=[p,k,E];if(Array.isArray(t.text))x=t.text;else if(Qy.isTypedArray(t.text))x=Array.from(t.text);else if(t.text!==void 0)for(x=new Array(d),L=0;L<d;L++)x[L]=t.text;function C(oe,_e){var Ee=n[oe];return bIt.tickText(Ee,Ee.d2l(_e),!0).text}var M=t.texttemplate;if(M){var g=e.fullLayout,P=g._d3locale,T=Array.isArray(M),z=T?Math.min(M.length,d):d,O=T?function(oe){return M[oe]}:function(){return M};for(x=new Array(z),L=0;L<z;L++){var V={x:c[L],y:f[L],z:h[L]},G={xLabel:C("xaxis",c[L]),yLabel:C("yaxis",f[L]),zLabel:C("zaxis",h[L])},Z={};wIt(Z,t,L),x[L]=Qy.texttemplateString({data:[Z,V,t._meta],fallback:t.texttemplatefallback,labels:G,locale:P,template:O(L)})}}if(S={position:r,mode:t.mode,text:x},"line"in t&&(S.lineColor=eF(u,1,d),S.lineWidth=u.width,S.lineDashes=u.dash),"marker"in t){var j=_It(t);S.scatterColor=eF(l,1,d),S.scatterSize=bX(l.size,d,EIt,20,j),S.scatterMarker=bX(l.symbol,d,kIt,"\u25CF"),S.scatterLineWidth=l.line.width,S.scatterLineColor=eF(l.line,1,d),S.scatterAngle=0}"textposition"in t&&(S.textOffset=MIt(t.textposition),S.textColor=eF(t.textfont,1,d),S.textSize=bX(t.textfont.size,d,Qy.identity,12),S.textFontFamily=t.textfont.family,S.textFontWeight=t.textfont.weight,S.textFontStyle=t.textfont.style,S.textFontVariant=t.textfont.variant,S.textAngle=0);var N=["x","y","z"];for(S.project=[!1,!1,!1],S.projectScale=[1,1,1],S.projectOpacity=[1,1,1],L=0;L<3;++L){var H=t.projection[N[L]];(S.project[L]=H.show)&&(S.projectOpacity[L]=H.opacity,S.projectScale[L]=H.scale)}S.errorBounds=TIt(t,i,n);var te=SIt([t.error_x,t.error_y,t.error_z]);return S.errorColor=te.color,S.errorLineWidth=te.lineWidth,S.errorCapSize=te.capSize,S.delaunayAxis=t.surfaceaxis,S.delaunayColor=WPe(t.surfacecolor),S}function HPe(e){if(Qy.isArrayOrTypedArray(e)){var t=e[0];return Qy.isArrayOrTypedArray(t)&&(e=t),"rgb("+e.slice(0,3).map(function(r){return Math.round(r*255)})+")"}return null}function jPe(e){return Qy.isArrayOrTypedArray(e)?e.length===4&&typeof e[0]=="number"?HPe(e):e.map(HPe):null}wX.update=function(e){var t=this.scene.glplot.gl,r,n,i,a,o=xX.solid;this.data=e;var s=CIt(this.scene,e);"mode"in s&&(this.mode=s.mode),"lineDashes"in s&&s.lineDashes in xX&&(o=xX[s.lineDashes]),this.color=jPe(s.scatterColor)||jPe(s.lineColor),this.dataPoints=s.position,r={gl:this.scene.glplot.gl,position:s.position,color:s.lineColor,lineWidth:s.lineWidth||1,dashes:o[0],dashScale:o[1],opacity:e.opacity,connectGaps:e.connectgaps},this.mode.indexOf("lines")!==-1?this.linePlot?this.linePlot.update(r):(this.linePlot=pIt(r),this.linePlot._trace=this,this.scene.glplot.add(this.linePlot)):this.linePlot&&(this.scene.glplot.remove(this.linePlot),this.linePlot.dispose(),this.linePlot=null);var l=e.opacity;if(e.marker&&e.marker.opacity!==void 0&&(l*=e.marker.opacity),n={gl:this.scene.glplot.gl,position:s.position,color:s.scatterColor,size:s.scatterSize,glyph:s.scatterMarker,opacity:l,orthographic:!0,lineWidth:s.scatterLineWidth,lineColor:s.scatterLineColor,project:s.project,projectScale:s.projectScale,projectOpacity:s.projectOpacity},this.mode.indexOf("markers")!==-1?this.scatterPlot?this.scatterPlot.update(n):(this.scatterPlot=UPe(n),this.scatterPlot._trace=this,this.scatterPlot.highlightScale=1,this.scene.glplot.add(this.scatterPlot)):this.scatterPlot&&(this.scene.glplot.remove(this.scatterPlot),this.scatterPlot.dispose(),this.scatterPlot=null),a={gl:this.scene.glplot.gl,position:s.position,glyph:s.text,color:s.textColor,size:s.textSize,angle:s.textAngle,alignment:s.textOffset,font:s.textFontFamily,fontWeight:s.textFontWeight,fontStyle:s.textFontStyle,fontVariant:s.textFontVariant,orthographic:!0,lineWidth:0,project:!1,opacity:e.opacity},this.textLabels=e.hovertext||e.text,this.mode.indexOf("text")!==-1?this.textMarkers?this.textMarkers.update(a):(this.textMarkers=UPe(a),this.textMarkers._trace=this,this.textMarkers.highlightScale=1,this.scene.glplot.add(this.textMarkers)):this.textMarkers&&(this.scene.glplot.remove(this.textMarkers),this.textMarkers.dispose(),this.textMarkers=null),i={gl:this.scene.glplot.gl,position:s.position,color:s.errorColor,error:s.errorBounds,lineWidth:s.errorLineWidth,capSize:s.errorCapSize,opacity:e.opacity},this.errorBars?s.errorBounds?this.errorBars.update(i):(this.scene.glplot.remove(this.errorBars),this.errorBars.dispose(),this.errorBars=null):s.errorBounds&&(this.errorBars=gIt(i),this.errorBars._trace=this,this.scene.glplot.add(this.errorBars)),s.delaunayAxis>=0){var u=AIt(s.position,s.delaunayColor,s.delaunayAxis);u.opacity=e.opacity,this.delaunayMesh?this.delaunayMesh.update(u):(u.gl=t,this.delaunayMesh=mIt(u),this.delaunayMesh._trace=this,this.scene.glplot.add(this.delaunayMesh))}else this.delaunayMesh&&(this.scene.glplot.remove(this.delaunayMesh),this.delaunayMesh.dispose(),this.delaunayMesh=null)};wX.dispose=function(){this.linePlot&&(this.scene.glplot.remove(this.linePlot),this.linePlot.dispose()),this.scatterPlot&&(this.scene.glplot.remove(this.scatterPlot),this.scatterPlot.dispose()),this.errorBars&&(this.scene.glplot.remove(this.errorBars),this.errorBars.dispose()),this.textMarkers&&(this.scene.glplot.remove(this.textMarkers),this.textMarkers.dispose()),this.delaunayMesh&&(this.scene.glplot.remove(this.delaunayMesh),this.delaunayMesh.dispose())};function LIt(e,t){var r=new XPe(e,t.uid);return r.update(t),r}ZPe.exports=LIt});var EX=ye((egr,QPe)=>{"use strict";var e1=pf(),PIt=ec(),MX=Tu(),TX=df().axisHoverFormat,{hovertemplateAttrs:IIt,texttemplateAttrs:RIt,templatefallbackAttrs:KPe}=Ll(),JPe=Gl(),DIt=yX(),FIt=QD(),Kg=Ao().extendFlat,zIt=mc().overrideAll,$Pe=Z1(),OIt=e1.line,V2=e1.marker,qIt=V2.line,BIt=Kg({width:OIt.width,dash:{valType:"enumerated",values:$Pe(DIt),dflt:"solid"}},MX("line"));function AX(e){return{show:{valType:"boolean",dflt:!1},opacity:{valType:"number",min:0,max:1,dflt:1},scale:{valType:"number",min:0,max:10,dflt:2/3}}}var SX=QPe.exports=zIt({x:e1.x,y:e1.y,z:{valType:"data_array"},text:Kg({},e1.text,{}),texttemplate:RIt(),texttemplatefallback:KPe({editType:"calc"}),hovertext:Kg({},e1.hovertext,{}),hovertemplate:IIt(),hovertemplatefallback:KPe(),xhoverformat:TX("x"),yhoverformat:TX("y"),zhoverformat:TX("z"),mode:Kg({},e1.mode,{dflt:"lines+markers"}),surfaceaxis:{valType:"enumerated",values:[-1,0,1,2],dflt:-1},surfacecolor:{valType:"color"},projection:{x:AX("x"),y:AX("y"),z:AX("z")},connectgaps:e1.connectgaps,line:BIt,marker:Kg({symbol:{valType:"enumerated",values:$Pe(FIt),dflt:"circle",arrayOk:!0},size:Kg({},V2.size,{dflt:8}),sizeref:V2.sizeref,sizemin:V2.sizemin,sizemode:V2.sizemode,opacity:Kg({},V2.opacity,{arrayOk:!1}),colorbar:V2.colorbar,line:Kg({width:Kg({},qIt.width,{arrayOk:!1})},MX("marker.line"))},MX("marker")),textposition:Kg({},e1.textposition,{dflt:"top center"}),textfont:PIt({noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0,editType:"calc",colorEditType:"style",arrayOk:!0,variantValues:["normal","small-caps"]}),opacity:JPe.opacity,hoverinfo:Kg({},JPe.hoverinfo)},"calc","nested");SX.x.editType=SX.y.editType=SX.z.editType="calc+clearAxisTypes"});var rIe=ye((tgr,tIe)=>{"use strict";var eIe=qa(),NIt=Dr(),kX=Ru(),UIt=$p(),VIt=R0(),GIt=D0(),HIt=EX();tIe.exports=function(t,r,n,i){function a(d,v){return NIt.coerce(t,r,HIt,d,v)}var o=jIt(t,r,a,i);if(!o){r.visible=!1;return}a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("xhoverformat"),a("yhoverformat"),a("zhoverformat"),a("mode"),kX.hasMarkers(r)&&UIt(t,r,n,i,a,{noSelect:!0,noAngle:!0}),kX.hasLines(r)&&(a("connectgaps"),VIt(t,r,n,i,a)),kX.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),GIt(t,r,i,a,{noSelect:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0}));var s=(r.line||{}).color,l=(r.marker||{}).color;a("surfaceaxis")>=0&&a("surfacecolor",s||l);for(var u=["x","y","z"],c=0;c<3;++c){var f="projection."+u[c];a(f+".show")&&(a(f+".opacity"),a(f+".scale"))}var h=eIe.getComponentMethod("errorbars","supplyDefaults");h(t,r,s||l||n,{axis:"z"}),h(t,r,s||l||n,{axis:"y",inherit:"z"}),h(t,r,s||l||n,{axis:"x",inherit:"z"})};function jIt(e,t,r,n){var i=0,a=r("x"),o=r("y"),s=r("z"),l=eIe.getComponentMethod("calendars","handleTraceDefaults");return l(e,t,["x","y","z"],n),a&&o&&s&&(i=Math.min(a.length,o.length,s.length),t._length=t._xlength=t._ylength=t._zlength=i),i}});var nIe=ye((rgr,iIe)=>{"use strict";var WIt=km(),XIt=F0();iIe.exports=function(t,r){var n=[{x:!1,y:!1,trace:r,t:{}}];return WIt(n,r),XIt(t,r),n}});var oIe=ye((igr,aIe)=>{aIe.exports=ZIt;function ZIt(e,t){if(typeof e!="string")throw new TypeError("must specify type string");if(t=t||{},typeof document=="undefined"&&!t.canvas)return null;var r=t.canvas||document.createElement("canvas");typeof t.width=="number"&&(r.width=t.width),typeof t.height=="number"&&(r.height=t.height);var n=t,i;try{var a=[e];e.indexOf("webgl")===0&&a.push("experimental-"+e);for(var o=0;o<a.length;o++)if(i=r.getContext(a[o],n),i)return i}catch(s){i=null}return i||null}});var lIe=ye((ngr,sIe)=>{var YIt=oIe();sIe.exports=function(t){return YIt("webgl",t)}});var CX=ye((agr,cIe)=>{"use strict";var uIe=ka(),KIt=function(){};cIe.exports=function(t){for(var r in t)typeof t[r]=="function"&&(t[r]=KIt);t.destroy=function(){t.container.parentNode.removeChild(t.container)};var n=document.createElement("div");n.className="no-webgl",n.style.cursor="pointer",n.style.fontSize="24px",n.style.color=uIe.defaults[0],n.style.position="absolute",n.style.left=n.style.top="0px",n.style.width=n.style.height="100%",n.style["background-color"]=uIe.lightLine,n.style["z-index"]=30;var i=document.createElement("p");return i.textContent="WebGL is not supported by your browser - visit https://get.webgl.org for more info",i.style.position="relative",i.style.top="50%",i.style.left="50%",i.style.height="30%",i.style.width="50%",i.style.margin="-15% 0 0 -25%",n.appendChild(i),t.container.appendChild(n),t.container.style.background="#FFFFFF",t.container.onclick=function(){window.open("https://get.webgl.org")},!1}});var dIe=ye((ogr,hIe)=>{"use strict";var G2=Jy(),JIt=Dr(),$It=["xaxis","yaxis","zaxis"];function fIe(){this.bounds=[[-10,-10,-10],[10,10,10]],this.ticks=[[],[],[]],this.tickEnable=[!0,!0,!0],this.tickFont=["sans-serif","sans-serif","sans-serif"],this.tickSize=[12,12,12],this.tickFontWeight=["normal","normal","normal","normal"],this.tickFontStyle=["normal","normal","normal","normal"],this.tickFontVariant=["normal","normal","normal","normal"],this.tickAngle=[0,0,0],this.tickColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.tickPad=[18,18,18],this.labels=["x","y","z"],this.labelEnable=[!0,!0,!0],this.labelFont=["Open Sans","Open Sans","Open Sans"],this.labelSize=[20,20,20],this.labelFontWeight=["normal","normal","normal","normal"],this.labelFontStyle=["normal","normal","normal","normal"],this.labelFontVariant=["normal","normal","normal","normal"],this.labelColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.labelPad=[30,30,30],this.lineEnable=[!0,!0,!0],this.lineMirror=[!1,!1,!1],this.lineWidth=[1,1,1],this.lineColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.lineTickEnable=[!0,!0,!0],this.lineTickMirror=[!1,!1,!1],this.lineTickLength=[10,10,10],this.lineTickWidth=[1,1,1],this.lineTickColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.gridEnable=[!0,!0,!0],this.gridWidth=[1,1,1],this.gridColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.zeroEnable=[!0,!0,!0],this.zeroLineColor=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.zeroLineWidth=[2,2,2],this.backgroundEnable=[!0,!0,!0],this.backgroundColor=[[.8,.8,.8,.5],[.8,.8,.8,.5],[.8,.8,.8,.5]],this._defaultTickPad=this.tickPad.slice(),this._defaultLabelPad=this.labelPad.slice(),this._defaultLineTickLength=this.lineTickLength.slice()}var QIt=fIe.prototype;QIt.merge=function(e,t){for(var r=this,n=0;n<3;++n){var i=t[$It[n]];if(!i.visible){r.tickEnable[n]=!1,r.labelEnable[n]=!1,r.lineEnable[n]=!1,r.lineTickEnable[n]=!1,r.gridEnable[n]=!1,r.zeroEnable[n]=!1,r.backgroundEnable[n]=!1;continue}r.labels[n]=e._meta?JIt.templateString(i.title.text,e._meta):i.title.text,"font"in i.title&&(i.title.font.color&&(r.labelColor[n]=G2(i.title.font.color)),i.title.font.family&&(r.labelFont[n]=i.title.font.family),i.title.font.size&&(r.labelSize[n]=i.title.font.size),i.title.font.weight&&(r.labelFontWeight[n]=i.title.font.weight),i.title.font.style&&(r.labelFontStyle[n]=i.title.font.style),i.title.font.variant&&(r.labelFontVariant[n]=i.title.font.variant)),"showline"in i&&(r.lineEnable[n]=i.showline),"linecolor"in i&&(r.lineColor[n]=G2(i.linecolor)),"linewidth"in i&&(r.lineWidth[n]=i.linewidth),"showgrid"in i&&(r.gridEnable[n]=i.showgrid),"gridcolor"in i&&(r.gridColor[n]=G2(i.gridcolor)),"gridwidth"in i&&(r.gridWidth[n]=i.gridwidth),i.type==="log"?r.zeroEnable[n]=!1:"zeroline"in i&&(r.zeroEnable[n]=i.zeroline),"zerolinecolor"in i&&(r.zeroLineColor[n]=G2(i.zerolinecolor)),"zerolinewidth"in i&&(r.zeroLineWidth[n]=i.zerolinewidth),"ticks"in i&&i.ticks?r.lineTickEnable[n]=!0:r.lineTickEnable[n]=!1,"ticklen"in i&&(r.lineTickLength[n]=r._defaultLineTickLength[n]=i.ticklen),"tickcolor"in i&&(r.lineTickColor[n]=G2(i.tickcolor)),"tickwidth"in i&&(r.lineTickWidth[n]=i.tickwidth),"tickangle"in i&&(r.tickAngle[n]=i.tickangle==="auto"?-3600:Math.PI*-i.tickangle/180),"showticklabels"in i&&(r.tickEnable[n]=i.showticklabels),"tickfont"in i&&(i.tickfont.color&&(r.tickColor[n]=G2(i.tickfont.color)),i.tickfont.family&&(r.tickFont[n]=i.tickfont.family),i.tickfont.size&&(r.tickSize[n]=i.tickfont.size),i.tickfont.weight&&(r.tickFontWeight[n]=i.tickfont.weight),i.tickfont.style&&(r.tickFontStyle[n]=i.tickfont.style),i.tickfont.variant&&(r.tickFontVariant[n]=i.tickfont.variant)),"mirror"in i?["ticks","all","allticks"].indexOf(i.mirror)!==-1?(r.lineTickMirror[n]=!0,r.lineMirror[n]=!0):i.mirror===!0?(r.lineTickMirror[n]=!1,r.lineMirror[n]=!0):(r.lineTickMirror[n]=!1,r.lineMirror[n]=!1):r.lineMirror[n]=!1,"showbackground"in i&&i.showbackground!==!1?(r.backgroundEnable[n]=!0,r.backgroundColor[n]=G2(i.backgroundcolor)):r.backgroundEnable[n]=!1}};function e8t(e,t){var r=new fIe;return r.merge(e,t),r}hIe.exports=e8t});var gIe=ye((sgr,pIe)=>{"use strict";var t8t=Jy(),r8t=["xaxis","yaxis","zaxis"];function vIe(){this.enabled=[!0,!0,!0],this.colors=[[0,0,0,1],[0,0,0,1],[0,0,0,1]],this.drawSides=[!0,!0,!0],this.lineWidth=[1,1,1]}var i8t=vIe.prototype;i8t.merge=function(e){for(var t=0;t<3;++t){var r=e[r8t[t]];if(!r.visible){this.enabled[t]=!1,this.drawSides[t]=!1;continue}this.enabled[t]=r.showspikes,this.colors[t]=t8t(r.spikecolor),this.drawSides[t]=r.spikesides,this.lineWidth[t]=r.spikethickness}};function n8t(e){var t=new vIe;return t.merge(e),t}pIe.exports=n8t});var _Ie=ye((lgr,yIe)=>{"use strict";yIe.exports=u8t;var mIe=ho(),a8t=Dr(),o8t=["xaxis","yaxis","zaxis"],s8t=[0,0,0];function l8t(e){for(var t=new Array(3),r=0;r<3;++r){for(var n=e[r],i=new Array(n.length),a=0;a<n.length;++a)i[a]=n[a].x;t[r]=i}return t}function u8t(e){for(var t=e.axesOptions,r=e.glplot.axesPixels,n=e.fullSceneLayout,i=[[],[],[]],a=0;a<3;++a){var o=n[o8t[a]];if(o._length=(r[a].hi-r[a].lo)*r[a].pixelsPerDataUnit/e.dataScale[a],Math.abs(o._length)===1/0||isNaN(o._length))i[a]=[];else{o._input_range=o.range.slice(),o.range[0]=r[a].lo/e.dataScale[a],o.range[1]=r[a].hi/e.dataScale[a],o._m=1/(e.dataScale[a]*r[a].pixelsPerDataUnit),o.range[0]===o.range[1]&&(o.range[0]-=1,o.range[1]+=1);var s=o.tickmode;if(o.tickmode==="auto"){o.tickmode="linear";var l=o.nticks||a8t.constrain(o._length/40,4,9);mIe.autoTicks(o,Math.abs(o.range[1]-o.range[0])/l)}for(var u=mIe.calcTicks(o,{msUTC:!0}),c=0;c<u.length;++c)u[c].x=u[c].x*e.dataScale[a],o.type==="date"&&(u[c].text=u[c].text.replace(/\<br\>/g," "));i[a]=u,o.tickmode=s}}t.ticks=i;for(var a=0;a<3;++a){s8t[a]=.5*(e.glplot.bounds[0][a]+e.glplot.bounds[1][a]);for(var c=0;c<2;++c)t.bounds[c][a]=e.glplot.bounds[c][a]}e.contourLevels=l8t(i)}});var MIe=ye((ugr,SIe)=>{"use strict";var wIe=Od().gl_plot3d,c8t=wIe.createCamera,xIe=wIe.createScene,f8t=lIe(),h8t=RL(),iF=qa(),up=Dr(),rF=up.preserveDrawingBuffer(),nF=ho(),Jg=vf(),d8t=Jy(),v8t=CX(),p8t=UU(),g8t=dIe(),m8t=gIe(),y8t=_Ie(),_8t=Ag().applyAutorangeOptions,YE,tF,TIe=!1;function AIe(e,t){var r=document.createElement("div"),n=e.container;this.graphDiv=e.graphDiv;var i=document.createElementNS("http://www.w3.org/2000/svg","svg");i.style.position="absolute",i.style.top=i.style.left="0px",i.style.width=i.style.height="100%",i.style["z-index"]=20,i.style["pointer-events"]="none",r.appendChild(i),this.svgContainer=i,r.id=e.id,r.style.position="absolute",r.style.top=r.style.left="0px",r.style.width=r.style.height="100%",n.appendChild(r),this.fullLayout=t,this.id=e.id||"scene",this.fullSceneLayout=t[this.id],this.plotArgs=[[],{},{}],this.axesOptions=g8t(t,t[this.id]),this.spikeOptions=m8t(t[this.id]),this.container=r,this.staticMode=!!e.staticPlot,this.pixelRatio=this.pixelRatio||e.plotGlPixelRatio||2,this.dataScale=[1,1,1],this.contourLevels=[[],[],[]],this.convertAnnotations=iF.getComponentMethod("annotations3d","convert"),this.drawAnnotations=iF.getComponentMethod("annotations3d","draw"),this.initializeGLPlot()}var Sv=AIe.prototype;Sv.prepareOptions=function(){var e=this,t={canvas:e.canvas,gl:e.gl,glOptions:{preserveDrawingBuffer:rF,premultipliedAlpha:!0,antialias:!0},container:e.container,axes:e.axesOptions,spikes:e.spikeOptions,pickRadius:10,snapToData:!0,autoScale:!0,autoBounds:!1,cameraObject:e.camera,pixelRatio:e.pixelRatio};if(e.staticMode){if(!tF&&(YE=document.createElement("canvas"),tF=f8t({canvas:YE,preserveDrawingBuffer:!0,premultipliedAlpha:!0,antialias:!0}),!tF))throw new Error("error creating static canvas/context for image server");t.gl=tF,t.canvas=YE}return t};var bIe=!0;Sv.tryCreatePlot=function(){var e=this,t=e.prepareOptions(),r=!0;try{e.glplot=xIe(t)}catch(n){if(e.staticMode||!bIe||rF)r=!1;else{up.warn(["webgl setup failed possibly due to","false preserveDrawingBuffer config.","The mobile/tablet device may not be detected by is-mobile module.","Enabling preserveDrawingBuffer in second attempt to create webgl scene..."].join(" "));try{rF=t.glOptions.preserveDrawingBuffer=!0,e.glplot=xIe(t)}catch(i){rF=t.glOptions.preserveDrawingBuffer=!1,r=!1}}}return bIe=!1,r};Sv.initializeGLCamera=function(){var e=this,t=e.fullSceneLayout.camera,r=t.projection.type==="orthographic";e.camera=c8t(e.container,{center:[t.center.x,t.center.y,t.center.z],eye:[t.eye.x,t.eye.y,t.eye.z],up:[t.up.x,t.up.y,t.up.z],_ortho:r,zoomMin:.01,zoomMax:100,mode:"orbit"})};Sv.initializeGLPlot=function(){var e=this;e.initializeGLCamera();var t=e.tryCreatePlot();if(!t)return v8t(e);e.traces={},e.make4thDimension();var r=e.graphDiv,n=r.layout,i=function(){var o={};return e.isCameraChanged(n)&&(o[e.id+".camera"]=e.getCamera()),e.isAspectChanged(n)&&(o[e.id+".aspectratio"]=e.glplot.getAspectratio(),n[e.id].aspectmode!=="manual"&&(e.fullSceneLayout.aspectmode=n[e.id].aspectmode=o[e.id+".aspectmode"]="manual")),o},a=function(o){if(o.fullSceneLayout.dragmode!==!1){var s=i();o.saveLayout(n),o.graphDiv.emit("plotly_relayout",s)}};return e.glplot.canvas&&(e.glplot.canvas.addEventListener("mouseup",function(){a(e)}),e.glplot.canvas.addEventListener("touchstart",function(){TIe=!0}),e.glplot.canvas.addEventListener("wheel",function(o){if(r._context._scrollZoom.gl3d){if(e.camera._ortho){var s=o.deltaX>o.deltaY?1.1:.9090909090909091,l=e.glplot.getAspectratio();e.glplot.setAspectratio({x:s*l.x,y:s*l.y,z:s*l.z})}a(e)}},h8t?{passive:!1}:!1),e.glplot.canvas.addEventListener("mousemove",function(){if(e.fullSceneLayout.dragmode!==!1&&e.camera.mouseListener.buttons!==0){var o=i();e.graphDiv.emit("plotly_relayouting",o)}}),e.staticMode||e.glplot.canvas.addEventListener("webglcontextlost",function(o){r&&r.emit&&r.emit("plotly_webglcontextlost",{event:o,layer:e.id})},!1)),e.glplot.oncontextloss=function(){e.recoverContext()},e.glplot.onrender=function(){e.render()},!0};Sv.render=function(){var e=this,t=e.graphDiv,r,n=e.svgContainer,i=e.container.getBoundingClientRect();t._fullLayout._calcInverseTransform(t);var a=t._fullLayout._invScaleX,o=t._fullLayout._invScaleY,s=i.width*a,l=i.height*o;n.setAttributeNS(null,"viewBox","0 0 "+s+" "+l),n.setAttributeNS(null,"width",s),n.setAttributeNS(null,"height",l),y8t(e),e.glplot.axes.update(e.axesOptions);for(var u=Object.keys(e.traces),c=null,f=e.glplot.selection,h=0;h<u.length;++h)r=e.traces[u[h]],r.data.hoverinfo!=="skip"&&r.handlePick(f)&&(c=r),r.setContourLevels&&r.setContourLevels();function d(P,T,z){var O=e.fullSceneLayout[P+"axis"];return O.type!=="log"&&(T=O.d2l(T)),nF.hoverLabelText(O,T,z)}if(c!==null){var v=p8t(e.glplot.cameraParams,f.dataCoordinate);r=c.data;var _=t._fullData[r.index],b=f.index,p={xLabel:d("x",f.traceCoordinate[0],r.xhoverformat),yLabel:d("y",f.traceCoordinate[1],r.yhoverformat),zLabel:d("z",f.traceCoordinate[2],r.zhoverformat)},k=Jg.castHoverinfo(_,e.fullLayout,b),E=(k||"").split("+"),S=k&&k==="all";!_.hovertemplate&&!S&&(E.indexOf("x")===-1&&(p.xLabel=void 0),E.indexOf("y")===-1&&(p.yLabel=void 0),E.indexOf("z")===-1&&(p.zLabel=void 0),E.indexOf("text")===-1&&(f.textLabel=void 0),E.indexOf("name")===-1&&(c.name=void 0));var L,x=[];r.type==="cone"||r.type==="streamtube"?(p.uLabel=d("x",f.traceCoordinate[3],r.uhoverformat),(S||E.indexOf("u")!==-1)&&x.push("u: "+p.uLabel),p.vLabel=d("y",f.traceCoordinate[4],r.vhoverformat),(S||E.indexOf("v")!==-1)&&x.push("v: "+p.vLabel),p.wLabel=d("z",f.traceCoordinate[5],r.whoverformat),(S||E.indexOf("w")!==-1)&&x.push("w: "+p.wLabel),p.normLabel=f.traceCoordinate[6].toPrecision(3),(S||E.indexOf("norm")!==-1)&&x.push("norm: "+p.normLabel),r.type==="streamtube"&&(p.divergenceLabel=f.traceCoordinate[7].toPrecision(3),(S||E.indexOf("divergence")!==-1)&&x.push("divergence: "+p.divergenceLabel)),f.textLabel&&x.push(f.textLabel),L=x.join("<br>")):r.type==="isosurface"||r.type==="volume"?(p.valueLabel=nF.hoverLabelText(e._mockAxis,e._mockAxis.d2l(f.traceCoordinate[3]),r.valuehoverformat),x.push("value: "+p.valueLabel),f.textLabel&&x.push(f.textLabel),L=x.join("<br>")):L=f.textLabel;var C={x:f.traceCoordinate[0],y:f.traceCoordinate[1],z:f.traceCoordinate[2],data:_._input,fullData:_,curveNumber:_.index,pointNumber:b};Jg.appendArrayPointValue(C,_,b),r._module.eventData&&(C=_._module.eventData(C,f,_,{},b));var M={points:[C]};if(e.fullSceneLayout.hovermode){var g=[];Jg.loneHover({trace:_,x:(.5+.5*v[0]/v[3])*s,y:(.5-.5*v[1]/v[3])*l,xLabel:p.xLabel,yLabel:p.yLabel,zLabel:p.zLabel,text:L,name:c.name,color:Jg.castHoverOption(_,b,"bgcolor")||c.color,borderColor:Jg.castHoverOption(_,b,"bordercolor"),fontFamily:Jg.castHoverOption(_,b,"font.family"),fontSize:Jg.castHoverOption(_,b,"font.size"),fontColor:Jg.castHoverOption(_,b,"font.color"),nameLength:Jg.castHoverOption(_,b,"namelength"),textAlign:Jg.castHoverOption(_,b,"align"),hovertemplate:up.castOption(_,b,"hovertemplate"),hovertemplateLabels:up.extendFlat({},C,p),eventData:[C]},{container:n,gd:t,inOut_bbox:g}),C.bbox=g[0]}f.distance<5&&(f.buttons||TIe)?t.emit("plotly_click",M):t.emit("plotly_hover",M),this.oldEventData=M}else Jg.loneUnhover(n),this.oldEventData&&t.emit("plotly_unhover",this.oldEventData),this.oldEventData=void 0;e.drawAnnotations(e)};Sv.recoverContext=function(){var e=this;e.glplot.dispose();var t=function(){if(e.glplot.gl.isContextLost()){requestAnimationFrame(t);return}if(!e.initializeGLPlot()){up.error("Catastrophic and unrecoverable WebGL error. Context lost.");return}e.plot.apply(e,e.plotArgs)};requestAnimationFrame(t)};var KE=["xaxis","yaxis","zaxis"];function x8t(e,t,r){for(var n=e.fullSceneLayout,i=0;i<3;i++){var a=KE[i],o=a.charAt(0),s=n[a],l=t[o],u=t[o+"calendar"],c=t["_"+o+"length"];if(!up.isArrayOrTypedArray(l))r[0][i]=Math.min(r[0][i],0),r[1][i]=Math.max(r[1][i],c-1);else for(var f,h=0;h<(c||l.length);h++)if(up.isArrayOrTypedArray(l[h]))for(var d=0;d<l[h].length;++d)f=s.d2l(l[h][d],0,u),!isNaN(f)&&isFinite(f)&&(r[0][i]=Math.min(r[0][i],f),r[1][i]=Math.max(r[1][i],f));else f=s.d2l(l[h],0,u),!isNaN(f)&&isFinite(f)&&(r[0][i]=Math.min(r[0][i],f),r[1][i]=Math.max(r[1][i],f))}}function b8t(e,t){for(var r=e.fullSceneLayout,n=r.annotations||[],i=0;i<3;i++)for(var a=KE[i],o=a.charAt(0),s=r[a],l=0;l<n.length;l++){var u=n[l];if(u.visible){var c=s.r2l(u[o]);!isNaN(c)&&isFinite(c)&&(t[0][i]=Math.min(t[0][i],c),t[1][i]=Math.max(t[1][i],c))}}}Sv.plot=function(e,t,r){var n=this;if(n.plotArgs=[e,t,r],!n.glplot.contextLost){var i,a,o,s,l,u,c=t[n.id],f=r[n.id];n.fullLayout=t,n.fullSceneLayout=c,n.axesOptions.merge(t,c),n.spikeOptions.merge(c),n.setViewport(c),n.updateFx(c.dragmode,c.hovermode),n.camera.enableWheel=n.graphDiv._context._scrollZoom.gl3d,n.glplot.setClearColor(d8t(c.bgcolor)),n.setConvert(l),e?Array.isArray(e)||(e=[e]):e=[];var h=[[1/0,1/0,1/0],[-1/0,-1/0,-1/0]];for(o=0;o<e.length;++o)i=e[o],!(i.visible!==!0||i._length===0)&&x8t(this,i,h);b8t(this,h);var d=[1,1,1];for(s=0;s<3;++s)h[1][s]===h[0][s]?d[s]=1:d[s]=1/(h[1][s]-h[0][s]);for(n.dataScale=d,n.convertAnnotations(this),o=0;o<e.length;++o)i=e[o],!(i.visible!==!0||i._length===0)&&(a=n.traces[i.uid],a?a.data.type===i.type?a.update(i):(a.dispose(),a=i._module.plot(this,i),n.traces[i.uid]=a):(a=i._module.plot(this,i),n.traces[i.uid]=a),a.name=i.name);var v=Object.keys(n.traces);e:for(o=0;o<v.length;++o){for(s=0;s<e.length;++s)if(e[s].uid===v[o]&&e[s].visible===!0&&e[s]._length!==0)continue e;a=n.traces[v[o]],a.dispose(),delete n.traces[v[o]]}n.glplot.objects.sort(function(oe,_e){return oe._trace.data.index-_e._trace.data.index});var _=[[0,0,0],[0,0,0]],b=[],p={};for(o=0;o<3;++o){l=c[KE[o]],u=l.type,u in p?(p[u].acc*=d[o],p[u].count+=1):p[u]={acc:d[o],count:1};var k;if(l.autorange){_[0][o]=1/0,_[1][o]=-1/0;var E=n.glplot.objects,S=n.fullSceneLayout.annotations||[],L=l._name.charAt(0);for(s=0;s<E.length;s++){var x=E[s],C=x.bounds,M=x._trace.data._pad||0;x.constructor.name==="ErrorBars"&&l._lowerLogErrorBound?_[0][o]=Math.min(_[0][o],l._lowerLogErrorBound):_[0][o]=Math.min(_[0][o],C[0][o]/d[o]-M),_[1][o]=Math.max(_[1][o],C[1][o]/d[o]+M)}for(s=0;s<S.length;s++){var g=S[s];if(g.visible){var P=l.r2l(g[L]);_[0][o]=Math.min(_[0][o],P),_[1][o]=Math.max(_[1][o],P)}}if("rangemode"in l&&l.rangemode==="tozero"&&(_[0][o]=Math.min(_[0][o],0),_[1][o]=Math.max(_[1][o],0)),_[0][o]>_[1][o])_[0][o]=-1,_[1][o]=1;else{var T=_[1][o]-_[0][o];_[0][o]-=T/32,_[1][o]+=T/32}if(k=[_[0][o],_[1][o]],k=_8t(k,l),_[0][o]=k[0],_[1][o]=k[1],l.isReversed()){var z=_[0][o];_[0][o]=_[1][o],_[1][o]=z}}else k=l.range,_[0][o]=l.r2l(k[0]),_[1][o]=l.r2l(k[1]);_[0][o]===_[1][o]&&(_[0][o]-=1,_[1][o]+=1),b[o]=_[1][o]-_[0][o],l.range=[_[0][o],_[1][o]],l.limitRange(),n.glplot.setBounds(o,{min:l.range[0]*d[o],max:l.range[1]*d[o]})}var O,V=c.aspectmode;if(V==="cube")O=[1,1,1];else if(V==="manual"){var G=c.aspectratio;O=[G.x,G.y,G.z]}else if(V==="auto"||V==="data"){var Z=[1,1,1];for(o=0;o<3;++o){l=c[KE[o]],u=l.type;var j=p[u];Z[o]=Math.pow(j.acc,1/j.count)/d[o]}V==="data"||Math.max.apply(null,Z)/Math.min.apply(null,Z)<=4?O=Z:O=[1,1,1]}else throw new Error("scene.js aspectRatio was not one of the enumerated types");c.aspectratio.x=f.aspectratio.x=O[0],c.aspectratio.y=f.aspectratio.y=O[1],c.aspectratio.z=f.aspectratio.z=O[2],n.glplot.setAspectratio(c.aspectratio),n.viewInitial.aspectratio||(n.viewInitial.aspectratio={x:c.aspectratio.x,y:c.aspectratio.y,z:c.aspectratio.z}),n.viewInitial.aspectmode||(n.viewInitial.aspectmode=c.aspectmode);var N=c.domain||null,H=t._size||null;if(N&&H){var te=n.container.style;te.position="absolute",te.left=H.l+N.x[0]*H.w+"px",te.top=H.t+(1-N.y[1])*H.h+"px",te.width=H.w*(N.x[1]-N.x[0])+"px",te.height=H.h*(N.y[1]-N.y[0])+"px"}n.glplot.redraw()}};Sv.destroy=function(){var e=this;e.glplot&&(e.camera.mouseListener.enabled=!1,e.container.removeEventListener("wheel",e.camera.wheelListener),e.camera=null,e.glplot.dispose(),e.container.parentNode.removeChild(e.container),e.glplot=null)};function w8t(e){return[[e.eye.x,e.eye.y,e.eye.z],[e.center.x,e.center.y,e.center.z],[e.up.x,e.up.y,e.up.z]]}function T8t(e){return{up:{x:e.up[0],y:e.up[1],z:e.up[2]},center:{x:e.center[0],y:e.center[1],z:e.center[2]},eye:{x:e.eye[0],y:e.eye[1],z:e.eye[2]},projection:{type:e._ortho===!0?"orthographic":"perspective"}}}Sv.getCamera=function(){var e=this;return e.camera.view.recalcMatrix(e.camera.view.lastT()),T8t(e.camera)};Sv.setViewport=function(e){var t=this,r=e.camera;t.camera.lookAt.apply(this,w8t(r)),t.glplot.setAspectratio(e.aspectratio);var n=r.projection.type==="orthographic",i=t.camera._ortho;n!==i&&(t.glplot.redraw(),t.glplot.clearRGBA(),t.glplot.dispose(),t.initializeGLPlot())};Sv.isCameraChanged=function(e){var t=this,r=t.getCamera(),n=up.nestedProperty(e,t.id+".camera"),i=n.get();function a(u,c,f,h){var d=["up","center","eye"],v=["x","y","z"];return c[d[f]]&&u[d[f]][v[h]]===c[d[f]][v[h]]}var o=!1;if(i===void 0)o=!0;else{for(var s=0;s<3;s++)for(var l=0;l<3;l++)if(!a(r,i,s,l)){o=!0;break}(!i.projection||r.projection&&r.projection.type!==i.projection.type)&&(o=!0)}return o};Sv.isAspectChanged=function(e){var t=this,r=t.glplot.getAspectratio(),n=up.nestedProperty(e,t.id+".aspectratio"),i=n.get();return i===void 0||i.x!==r.x||i.y!==r.y||i.z!==r.z};Sv.saveLayout=function(e){var t=this,r=t.fullLayout,n,i,a,o,s,l,u=t.isCameraChanged(e),c=t.isAspectChanged(e),f=u||c;if(f){var h={};if(u&&(n=t.getCamera(),i=up.nestedProperty(e,t.id+".camera"),a=i.get(),h[t.id+".camera"]=a),c&&(o=t.glplot.getAspectratio(),s=up.nestedProperty(e,t.id+".aspectratio"),l=s.get(),h[t.id+".aspectratio"]=l),iF.call("_storeDirectGUIEdit",e,r._preGUI,h),u){i.set(n);var d=up.nestedProperty(r,t.id+".camera");d.set(n)}if(c){s.set(o);var v=up.nestedProperty(r,t.id+".aspectratio");v.set(o),t.glplot.redraw()}}return f};Sv.updateFx=function(e,t){var r=this,n=r.camera;if(n)if(e==="orbit")n.mode="orbit",n.keyBindingMode="rotate";else if(e==="turntable"){n.up=[0,0,1],n.mode="turntable",n.keyBindingMode="rotate";var i=r.graphDiv,a=i._fullLayout,o=r.fullSceneLayout.camera,s=o.up.x,l=o.up.y,u=o.up.z;if(u/Math.sqrt(s*s+l*l+u*u)<.999){var c=r.id+".camera.up",f={x:0,y:0,z:1},h={};h[c]=f;var d=i.layout;iF.call("_storeDirectGUIEdit",d,a._preGUI,h),o.up=f,up.nestedProperty(d,c).set(f)}}else n.keyBindingMode=e;r.fullSceneLayout.hovermode=t};function A8t(e,t,r){for(var n=0,i=r-1;n<i;++n,--i)for(var a=0;a<t;++a)for(var o=0;o<4;++o){var s=4*(t*n+a)+o,l=4*(t*i+a)+o,u=e[s];e[s]=e[l],e[l]=u}}function S8t(e,t,r){for(var n=0;n<r;++n)for(var i=0;i<t;++i){var a=4*(t*n+i),o=e[a+3];if(o>0)for(var s=255/o,l=0;l<3;++l)e[a+l]=Math.min(s*e[a+l],255)}}Sv.toImage=function(e){var t=this;e||(e="png"),t.staticMode&&t.container.appendChild(YE),t.glplot.redraw();var r=t.glplot.gl,n=r.drawingBufferWidth,i=r.drawingBufferHeight;r.bindFramebuffer(r.FRAMEBUFFER,null);var a=new Uint8Array(n*i*4);r.readPixels(0,0,n,i,r.RGBA,r.UNSIGNED_BYTE,a),A8t(a,n,i),S8t(a,n,i);var o=document.createElement("canvas");o.width=n,o.height=i;var s=o.getContext("2d",{willReadFrequently:!0}),l=s.createImageData(n,i);l.data.set(a),s.putImageData(l,0,0);var u;switch(e){case"jpeg":u=o.toDataURL("image/jpeg");break;case"webp":u=o.toDataURL("image/webp");break;default:u=o.toDataURL("image/png")}return t.staticMode&&t.container.removeChild(YE),u};Sv.setConvert=function(){for(var e=this,t=0;t<3;t++){var r=e.fullSceneLayout[KE[t]];nF.setConvert(r,e.fullLayout),r.setScale=up.noop}};Sv.make4thDimension=function(){var e=this,t=e.graphDiv,r=t._fullLayout;e._mockAxis={type:"linear",showexponent:"all",exponentformat:"B"},nF.setConvert(e._mockAxis,r)};SIe.exports=AIe});var kIe=ye((cgr,EIe)=>{"use strict";EIe.exports={scene:{valType:"subplotid",dflt:"scene",editType:"calc+clearAxisTypes"}}});var PX=ye((fgr,CIe)=>{"use strict";var M8t=ka(),xs=Rd(),LX=Ao().extendFlat,E8t=mc().overrideAll;CIe.exports=E8t({visible:xs.visible,showspikes:{valType:"boolean",dflt:!0},spikesides:{valType:"boolean",dflt:!0},spikethickness:{valType:"number",min:0,dflt:2},spikecolor:{valType:"color",dflt:M8t.defaultLine},showbackground:{valType:"boolean",dflt:!1},backgroundcolor:{valType:"color",dflt:"rgba(204, 204, 204, 0.5)"},showaxeslabels:{valType:"boolean",dflt:!0},color:xs.color,categoryorder:xs.categoryorder,categoryarray:xs.categoryarray,title:{text:xs.title.text,font:xs.title.font},type:LX({},xs.type,{values:["-","linear","log","date","category"]}),autotypenumbers:xs.autotypenumbers,autorange:xs.autorange,autorangeoptions:{minallowed:xs.autorangeoptions.minallowed,maxallowed:xs.autorangeoptions.maxallowed,clipmin:xs.autorangeoptions.clipmin,clipmax:xs.autorangeoptions.clipmax,include:xs.autorangeoptions.include,editType:"plot"},rangemode:xs.rangemode,minallowed:xs.minallowed,maxallowed:xs.maxallowed,range:LX({},xs.range,{items:[{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}},{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}}],anim:!1}),tickmode:xs.minor.tickmode,nticks:xs.nticks,tick0:xs.tick0,dtick:xs.dtick,tickvals:xs.tickvals,ticktext:xs.ticktext,ticks:xs.ticks,mirror:xs.mirror,ticklen:xs.ticklen,tickwidth:xs.tickwidth,tickcolor:xs.tickcolor,showticklabels:xs.showticklabels,labelalias:xs.labelalias,tickfont:xs.tickfont,tickangle:xs.tickangle,tickprefix:xs.tickprefix,showtickprefix:xs.showtickprefix,ticksuffix:xs.ticksuffix,showticksuffix:xs.showticksuffix,showexponent:xs.showexponent,exponentformat:xs.exponentformat,minexponent:xs.minexponent,separatethousands:xs.separatethousands,tickformat:xs.tickformat,tickformatstops:xs.tickformatstops,hoverformat:xs.hoverformat,showline:xs.showline,linecolor:xs.linecolor,linewidth:xs.linewidth,showgrid:xs.showgrid,gridcolor:LX({},xs.gridcolor,{dflt:"rgb(204, 204, 204)"}),gridwidth:xs.gridwidth,zeroline:xs.zeroline,zerolinecolor:xs.zerolinecolor,zerolinewidth:xs.zerolinewidth},"plot","from-root")});var FX=ye((hgr,LIe)=>{"use strict";var IX=PX(),k8t=Cc().attributes,RX=Ao().extendFlat,C8t=Dr().counterRegex;function DX(e,t,r){return{x:{valType:"number",dflt:e,editType:"camera"},y:{valType:"number",dflt:t,editType:"camera"},z:{valType:"number",dflt:r,editType:"camera"},editType:"camera"}}LIe.exports={_arrayAttrRegexps:[C8t("scene",".annotations",!0)],bgcolor:{valType:"color",dflt:"rgba(0,0,0,0)",editType:"plot"},camera:{up:RX(DX(0,0,1),{}),center:RX(DX(0,0,0),{}),eye:RX(DX(1.25,1.25,1.25),{}),projection:{type:{valType:"enumerated",values:["perspective","orthographic"],dflt:"perspective",editType:"calc"},editType:"calc"},editType:"camera"},domain:k8t({name:"scene",editType:"plot"}),aspectmode:{valType:"enumerated",values:["auto","cube","data","manual"],dflt:"auto",editType:"plot",impliedEdits:{"aspectratio.x":void 0,"aspectratio.y":void 0,"aspectratio.z":void 0}},aspectratio:{x:{valType:"number",min:0,editType:"plot",impliedEdits:{"^aspectmode":"manual"}},y:{valType:"number",min:0,editType:"plot",impliedEdits:{"^aspectmode":"manual"}},z:{valType:"number",min:0,editType:"plot",impliedEdits:{"^aspectmode":"manual"}},editType:"plot",impliedEdits:{aspectmode:"manual"}},xaxis:IX,yaxis:IX,zaxis:IX,dragmode:{valType:"enumerated",values:["orbit","turntable","zoom","pan",!1],editType:"plot"},hovermode:{valType:"enumerated",values:["closest",!1],dflt:"closest",editType:"modebar"},uirevision:{valType:"any",editType:"none"},editType:"plot"}});var DIe=ye((dgr,RIe)=>{"use strict";var L8t=cd().mix,PIe=Dr(),P8t=vl(),I8t=PX(),R8t=SU(),D8t=i4(),IIe=["xaxis","yaxis","zaxis"],F8t=100*136/187;RIe.exports=function(t,r,n){var i,a;function o(u,c){return PIe.coerce(i,a,I8t,u,c)}for(var s=0;s<IIe.length;s++){var l=IIe[s];i=t[l]||{},a=P8t.newContainer(r,l),a._id=l[0]+n.scene,a._name=l,R8t(i,a,o,n),D8t(i,a,o,{font:n.font,letter:l[0],data:n.data,showGrid:!0,noAutotickangles:!0,noMinorloglabels:!0,noTicklabelindex:!0,noTickson:!0,noTicklabelmode:!0,noTicklabelshift:!0,noTicklabelstandoff:!0,noTicklabelstep:!0,noTicklabelposition:!0,noTicklabeloverflow:!0,noInsiderange:!0,noUnifiedhovertitle:!0,bgColor:n.bgColor,calendar:n.calendar},n.fullLayout),o("gridcolor",L8t(a.color,n.bgColor,F8t).toRgbString()),o("title.text",l[0]),a.setScale=PIe.noop,o("showspikes")&&(o("spikesides"),o("spikethickness"),o("spikecolor",a.color)),o("showaxeslabels"),o("showbackground")&&o("backgroundcolor")}}});var qIe=ye((vgr,OIe)=>{"use strict";var z8t=Dr(),O8t=ka(),q8t=qa(),B8t=k_(),N8t=DIe(),FIe=FX(),U8t=Id().getSubplotData,zIe="gl3d";OIe.exports=function(t,r,n){var i=r._basePlotModules.length>1;function a(o){if(!i){var s=z8t.validate(t[o],FIe[o]);if(s)return t[o]}}B8t(t,r,n,{type:zIe,attributes:FIe,handleDefaults:V8t,fullLayout:r,font:r.font,fullData:n,getDfltFromLayout:a,autotypenumbersDflt:r.autotypenumbers,paper_bgcolor:r.paper_bgcolor,calendar:r.calendar})};function V8t(e,t,r,n){for(var i=r("bgcolor"),a=O8t.combine(i,n.paper_bgcolor),o=["up","center","eye"],s=0;s<o.length;s++)r("camera."+o[s]+".x"),r("camera."+o[s]+".y"),r("camera."+o[s]+".z");r("camera.projection.type");var l=!!r("aspectratio.x")&&!!r("aspectratio.y")&&!!r("aspectratio.z"),u=l?"manual":"auto",c=r("aspectmode",u);l||(e.aspectratio=t.aspectratio={x:1,y:1,z:1},c==="manual"&&(t.aspectmode="auto"),e.aspectmode=t.aspectmode);var f=U8t(n.fullData,zIe,n.id);N8t(e,t,{font:n.font,scene:n.id,data:f,bgColor:a,calendar:n.calendar,autotypenumbersDflt:n.autotypenumbersDflt,fullLayout:n.fullLayout}),q8t.getComponentMethod("annotations3d","handleDefaults")(e,t,n);var h=n.getDfltFromLayout("dragmode");if(h!==!1&&!h)if(h="orbit",e.camera&&e.camera.up){var d=e.camera.up.x,v=e.camera.up.y,_=e.camera.up.z;_!==0&&(!d||!v||!_||_/Math.sqrt(d*d+v*v+_*_)>.999)&&(h="turntable")}else h="turntable";r("dragmode",h),r("hovermode",n.getDfltFromLayout("hovermode"))}});var $_=ye(cp=>{"use strict";var G8t=mc().overrideAll,H8t=B1(),j8t=MIe(),W8t=Id().getSubplotData,X8t=Dr(),Z8t=Wp(),GA="gl3d",zX="scene";cp.name=GA;cp.attr=zX;cp.idRoot=zX;cp.idRegex=cp.attrRegex=X8t.counterRegex("scene");cp.attributes=kIe();cp.layoutAttributes=FX();cp.baseLayoutAttrOverrides=G8t({hoverlabel:H8t.hoverlabel},"plot","nested");cp.supplyLayoutDefaults=qIe();cp.plot=function(t){for(var r=t._fullLayout,n=t._fullData,i=r._subplots[GA],a=0;a<i.length;a++){var o=i[a],s=W8t(n,GA,o),l=r[o],u=l.camera,c=l._scene;c||(c=new j8t({id:o,graphDiv:t,container:t.querySelector(".gl-container"),staticPlot:t._context.staticPlot,plotGlPixelRatio:t._context.plotGlPixelRatio,camera:u},r),l._scene=c),c.viewInitial||(c.viewInitial={up:{x:u.up.x,y:u.up.y,z:u.up.z},eye:{x:u.eye.x,y:u.eye.y,z:u.eye.z},center:{x:u.center.x,y:u.center.y,z:u.center.z}}),c.plot(s,r,t.layout)}};cp.clean=function(e,t,r,n){for(var i=n._subplots[GA]||[],a=0;a<i.length;a++){var o=i[a];!t[o]&&n[o]._scene&&(n[o]._scene.destroy(),n._infolayer&&n._infolayer.selectAll(".annotation-"+o).remove())}};cp.toSVG=function(e){for(var t=e._fullLayout,r=t._subplots[GA],n=t._size,i=0;i<r.length;i++){var a=t[r[i]],o=a.domain,s=a._scene,l=s.toImage("png"),u=t._glimages.append("svg:image");u.attr({xmlns:Z8t.svg,"xlink:href":l,x:n.l+n.w*o.x[0],y:n.t+n.h*(1-o.y[1]),width:n.w*(o.x[1]-o.x[0]),height:n.h*(o.y[1]-o.y[0]),preserveAspectRatio:"none"}),s.destroy()}};cp.cleanId=function(t){if(t.match(/^scene[0-9]*$/)){var r=t.slice(5);return r==="1"&&(r=""),zX+r}};cp.updateFx=function(e){for(var t=e._fullLayout,r=t._subplots[GA],n=0;n<r.length;n++){var i=t[r[n]]._scene;i.updateFx(t.dragmode,t.hovermode)}}});var NIe=ye((ggr,BIe)=>{"use strict";BIe.exports={plot:YPe(),attributes:EX(),markerSymbols:QD(),supplyDefaults:rIe(),colorbar:[{container:"marker",min:"cmin",max:"cmax"},{container:"line",min:"cmin",max:"cmax"}],calc:nIe(),moduleType:"trace",name:"scatter3d",basePlotModule:$_(),categories:["gl3d","symbols","showLegend","scatter-like"],meta:{}}});var VIe=ye((mgr,UIe)=>{"use strict";UIe.exports=NIe()});var JE=ye((ygr,jIe)=>{"use strict";var GIe=ka(),Y8t=Tu(),OX=df().axisHoverFormat,{hovertemplateAttrs:K8t,templatefallbackAttrs:J8t}=Ll(),HIe=Gl(),qX=Ao().extendFlat,$8t=mc().overrideAll;function BX(e){return{valType:"boolean",dflt:!1}}function NX(e){return{show:{valType:"boolean",dflt:!1},start:{valType:"number",dflt:null,editType:"plot"},end:{valType:"number",dflt:null,editType:"plot"},size:{valType:"number",dflt:null,min:0,editType:"plot"},project:{x:BX("x"),y:BX("y"),z:BX("z")},color:{valType:"color",dflt:GIe.defaultLine},usecolormap:{valType:"boolean",dflt:!1},width:{valType:"number",min:1,max:16,dflt:2},highlight:{valType:"boolean",dflt:!0},highlightcolor:{valType:"color",dflt:GIe.defaultLine},highlightwidth:{valType:"number",min:1,max:16,dflt:2}}}var UX=jIe.exports=$8t(qX({z:{valType:"data_array"},x:{valType:"data_array"},y:{valType:"data_array"},text:{valType:"string",dflt:"",arrayOk:!0},hovertext:{valType:"string",dflt:"",arrayOk:!0},hovertemplate:K8t(),hovertemplatefallback:J8t(),xhoverformat:OX("x"),yhoverformat:OX("y"),zhoverformat:OX("z"),connectgaps:{valType:"boolean",dflt:!1,editType:"calc"},surfacecolor:{valType:"data_array"}},Y8t("",{colorAttr:"z or surfacecolor",showScaleDflt:!0,autoColorDflt:!1,editTypeOverride:"calc"}),{contours:{x:NX("x"),y:NX("y"),z:NX("z")},hidesurface:{valType:"boolean",dflt:!1},lightposition:{x:{valType:"number",min:-1e5,max:1e5,dflt:10},y:{valType:"number",min:-1e5,max:1e5,dflt:1e4},z:{valType:"number",min:-1e5,max:1e5,dflt:0}},lighting:{ambient:{valType:"number",min:0,max:1,dflt:.8},diffuse:{valType:"number",min:0,max:1,dflt:.8},specular:{valType:"number",min:0,max:2,dflt:.05,description:"Represents the level that incident rays are reflected in a single direction, causing shine."},roughness:{valType:"number",min:0,max:1,dflt:.5,description:"Alters specular reflection; the rougher the surface, the wider and less contrasty the shine."},fresnel:{valType:"number",min:0,max:5,dflt:.2}},opacity:{valType:"number",min:0,max:1,dflt:1},opacityscale:{valType:"any",editType:"calc"},hoverinfo:qX({},HIe.hoverinfo),showlegend:qX({},HIe.showlegend,{dflt:!1})}),"calc","nested");UX.x.editType=UX.y.editType=UX.z.editType="calc+clearAxisTypes"});var GX=ye((_gr,ZIe)=>{"use strict";var Q8t=qa(),WIe=Dr(),eRt=Qh(),tRt=JE(),VX=.1;function rRt(e,t){for(var r=[],n=32,i=0;i<n;i++){var a=i/(n-1),o=t+(1-t)*(1-Math.pow(Math.sin(e*a*Math.PI),2));r.push([a,Math.max(0,Math.min(1,o))])}return r}function iRt(e){var t=0;if(!Array.isArray(e)||e.length<2||!e[0]||!e[e.length-1]||+e[0][0]!=0||+e[e.length-1][0]!=1)return!1;for(var r=0;r<e.length;r++){var n=e[r];if(n.length!==2||+n[0]<t)return!1;t=+n[0]}return!0}function nRt(e,t,r,n){var i,a;function o(b,p){return WIe.coerce(e,t,tRt,b,p)}var s=o("x"),l=o("y"),u=o("z");if(!u||!u.length||s&&s.length<1||l&&l.length<1){t.visible=!1;return}t._xlength=Array.isArray(s)&&WIe.isArrayOrTypedArray(s[0])?u.length:u[0].length,t._ylength=u.length;var c=Q8t.getComponentMethod("calendars","handleTraceDefaults");c(e,t,["x","y","z"],n),o("text"),o("hovertext"),o("hovertemplate"),o("hovertemplatefallback"),o("xhoverformat"),o("yhoverformat"),o("zhoverformat"),["lighting.ambient","lighting.diffuse","lighting.specular","lighting.roughness","lighting.fresnel","lightposition.x","lightposition.y","lightposition.z","hidesurface","connectgaps","opacity"].forEach(function(b){o(b)});var f=o("surfacecolor"),h=["x","y","z"];for(i=0;i<3;++i){var d="contours."+h[i],v=o(d+".show"),_=o(d+".highlight");if(v||_)for(a=0;a<3;++a)o(d+".project."+h[a]);v&&(o(d+".color"),o(d+".width"),o(d+".usecolormap")),_&&(o(d+".highlightcolor"),o(d+".highlightwidth")),o(d+".start"),o(d+".end"),o(d+".size")}eRt(e,t,n,o,{prefix:"",cLetter:"c"}),XIe(e,t,n,o),t._length=null}function XIe(e,t,r,n){var i=n("opacityscale");i==="max"?t.opacityscale=[[0,VX],[1,1]]:i==="min"?t.opacityscale=[[0,1],[1,VX]]:i==="extremes"?t.opacityscale=rRt(1,VX):iRt(i)||(t.opacityscale=void 0)}ZIe.exports={supplyDefaults:nRt,opacityscaleDefaults:XIe}});var JIe=ye((xgr,KIe)=>{"use strict";var YIe=gv();KIe.exports=function(t,r){r.surfacecolor?YIe(t,r,{vals:r.surfacecolor,containerStr:"",cLetter:"c"}):YIe(t,r,{vals:r.z,containerStr:"",cLetter:"c"})}});var i8e=ye((bgr,r8e)=>{"use strict";var aRt=Od().gl_surface3d,HA=Od().ndarray,oRt=Od().ndarray_linear_interpolate.d2,sRt=o8(),lRt=s8(),$E=Dr().isArrayOrTypedArray,uRt=$y().parseColorScale,$Ie=Jy(),cRt=tc().extractOpts;function e8e(e,t,r){this.scene=e,this.uid=r,this.surface=t,this.data=null,this.showContour=[!1,!1,!1],this.contourStart=[null,null,null],this.contourEnd=[null,null,null],this.contourSize=[0,0,0],this.minValues=[1/0,1/0,1/0],this.maxValues=[-1/0,-1/0,-1/0],this.dataScaleX=1,this.dataScaleY=1,this.refineData=!0,this.objectOffset=[0,0,0]}var $g=e8e.prototype;$g.getXat=function(e,t,r,n){var i=$E(this.data.x)?$E(this.data.x[0])?this.data.x[t][e]:this.data.x[e]:e;return r===void 0?i:n.d2l(i,0,r)};$g.getYat=function(e,t,r,n){var i=$E(this.data.y)?$E(this.data.y[0])?this.data.y[t][e]:this.data.y[t]:t;return r===void 0?i:n.d2l(i,0,r)};$g.getZat=function(e,t,r,n){var i=this.data.z[t][e];return i===null&&this.data.connectgaps&&this.data._interpolatedZ&&(i=this.data._interpolatedZ[t][e]),r===void 0?i:n.d2l(i,0,r)};$g.handlePick=function(e){if(e.object===this.surface){var t=(e.data.index[0]-1)/this.dataScaleX-1,r=(e.data.index[1]-1)/this.dataScaleY-1,n=Math.max(Math.min(Math.round(t),this.data.z[0].length-1),0),i=Math.max(Math.min(Math.round(r),this.data._ylength-1),0);e.index=[n,i],e.traceCoordinate=[this.getXat(n,i),this.getYat(n,i),this.getZat(n,i)],e.dataCoordinate=[this.getXat(n,i,this.data.xcalendar,this.scene.fullSceneLayout.xaxis),this.getYat(n,i,this.data.ycalendar,this.scene.fullSceneLayout.yaxis),this.getZat(n,i,this.data.zcalendar,this.scene.fullSceneLayout.zaxis)];for(var a=0;a<3;a++){var o=e.dataCoordinate[a];o!=null&&(e.dataCoordinate[a]*=this.scene.dataScale[a])}var s=this.data.hovertext||this.data.text;return $E(s)&&s[i]&&s[i][n]!==void 0?e.textLabel=s[i][n]:s?e.textLabel=s:e.textLabel="",e.data.dataCoordinate=e.dataCoordinate.slice(),this.surface.highlight(e.data),this.scene.glplot.spikes.position=e.dataCoordinate,!0}};function fRt(e){var t=e[0].rgb,r=e[e.length-1].rgb;return t[0]===r[0]&&t[1]===r[1]&&t[2]===r[2]&&t[3]===r[3]}var jA=[2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,1949,1951,1973,1979,1987,1993,1997,1999,2003,2011,2017,2027,2029,2039,2053,2063,2069,2081,2083,2087,2089,2099,2111,2113,2129,2131,2137,2141,2143,2153,2161,2179,2203,2207,2213,2221,2237,2239,2243,2251,2267,2269,2273,2281,2287,2293,2297,2309,2311,2333,2339,2341,2347,2351,2357,2371,2377,2381,2383,2389,2393,2399,2411,2417,2423,2437,2441,2447,2459,2467,2473,2477,2503,2521,2531,2539,2543,2549,2551,2557,2579,2591,2593,2609,2617,2621,2633,2647,2657,2659,2663,2671,2677,2683,2687,2689,2693,2699,2707,2711,2713,2719,2729,2731,2741,2749,2753,2767,2777,2789,2791,2797,2801,2803,2819,2833,2837,2843,2851,2857,2861,2879,2887,2897,2903,2909,2917,2927,2939,2953,2957,2963,2969,2971,2999];function hRt(e,t){if(e<t)return 0;for(var r=0;Math.floor(e%t)===0;)e/=t,r++;return r}function HX(e){for(var t=[],r=0;r<jA.length;r++){var n=jA[r];t.push(hRt(e,n))}return t}function dRt(e){for(var t=HX(e),r=e,n=0;n<jA.length;n++)if(t[n]>0){r=jA[n];break}return r}function vRt(e,t){if(!(e<1||t<1)){for(var r=HX(e),n=HX(t),i=1,a=0;a<jA.length;a++)i*=Math.pow(jA[a],Math.max(r[a],n[a]));return i}}function pRt(e){if(e.length!==0){for(var t=1,r=0;r<e.length;r++)t=vRt(t,e[r]);return t}}$g.calcXnums=function(e){var t,r=[];for(t=1;t<e;t++){var n=this.getXat(t-1,0),i=this.getXat(t,0);i!==n&&n!==void 0&&n!==null&&i!==void 0&&i!==null?r[t-1]=Math.abs(i-n):r[t-1]=0}var a=0;for(t=1;t<e;t++)a+=r[t-1];for(t=1;t<e;t++)r[t-1]===0?r[t-1]=1:r[t-1]=Math.round(a/r[t-1]);return r};$g.calcYnums=function(e){var t,r=[];for(t=1;t<e;t++){var n=this.getYat(0,t-1),i=this.getYat(0,t);i!==n&&n!==void 0&&n!==null&&i!==void 0&&i!==null?r[t-1]=Math.abs(i-n):r[t-1]=0}var a=0;for(t=1;t<e;t++)a+=r[t-1];for(t=1;t<e;t++)r[t-1]===0?r[t-1]=1:r[t-1]=Math.round(a/r[t-1]);return r};var t8e=[1,2,4,6,12,24,36,48,60,120,180,240,360,720,840,1260],QIe=t8e[9],aF=t8e[13];$g.estimateScale=function(e,t){for(var r=t===0?this.calcXnums(e):this.calcYnums(e),n=1+pRt(r);n<QIe;)n*=2;for(;n>aF;)n--,n/=dRt(n),n++,n<QIe&&(n=aF);var i=Math.round(n/e);return i>1?i:1};function gRt(e,t,r){var n=r[8]+r[2]*t[0]+r[5]*t[1];return e[0]=(r[6]+r[0]*t[0]+r[3]*t[1])/n,e[1]=(r[7]+r[1]*t[0]+r[4]*t[1])/n,e}function mRt(e,t,r){return yRt(e,t,gRt,r),e}function yRt(e,t,r,n){for(var i=[0,0],a=e.shape[0],o=e.shape[1],s=0;s<a;s++)for(var l=0;l<o;l++)r(i,[s,l],n),e.set(s,l,oRt(t,i[0],i[1]));return e}$g.refineCoords=function(e){for(var t=this.dataScaleX,r=this.dataScaleY,n=e[0].shape[0],i=e[0].shape[1],a=Math.floor(e[0].shape[0]*t+1)|0,o=Math.floor(e[0].shape[1]*r+1)|0,s=1+n+1,l=1+i+1,u=HA(new Float32Array(s*l),[s,l]),c=[1/t,0,0,0,1/r,0,0,0,1],f=0;f<e.length;++f){this.surface.padField(u,e[f]);var h=HA(new Float32Array(a*o),[a,o]);mRt(h,u,c),e[f]=h}};function _Rt(e,t){for(var r=!1,n=0;n<e.length;n++)if(t===e[n]){r=!0;break}r===!1&&e.push(t)}$g.setContourLevels=function(){var e=[[],[],[]],t=[!1,!1,!1],r=!1,n,i,a;for(n=0;n<3;++n)if(this.showContour[n]&&(r=!0,this.contourSize[n]>0&&this.contourStart[n]!==null&&this.contourEnd[n]!==null&&this.contourEnd[n]>this.contourStart[n]))for(t[n]=!0,i=this.contourStart[n];i<this.contourEnd[n];i+=this.contourSize[n])a=i*this.scene.dataScale[n],_Rt(e[n],a);if(r){var o=[[],[],[]];for(n=0;n<3;++n)this.showContour[n]&&(o[n]=t[n]?e[n]:this.scene.contourLevels[n]);this.surface.update({levels:o})}};$g.update=function(e){var t=this.scene,r=t.fullSceneLayout,n=this.surface,i=uRt(e),a=t.dataScale,o=e.z[0].length,s=e._ylength,l=t.contourLevels;this.data=e;var u,c,f,h,d=[];for(u=0;u<3;u++)for(d[u]=[],c=0;c<o;c++)d[u][c]=[];for(c=0;c<o;c++)for(f=0;f<s;f++)d[0][c][f]=this.getXat(c,f,e.xcalendar,r.xaxis),d[1][c][f]=this.getYat(c,f,e.ycalendar,r.yaxis),d[2][c][f]=this.getZat(c,f,e.zcalendar,r.zaxis);if(e.connectgaps)for(e._emptypoints=lRt(d[2]),sRt(d[2],e._emptypoints),e._interpolatedZ=[],c=0;c<o;c++)for(e._interpolatedZ[c]=[],f=0;f<s;f++)e._interpolatedZ[c][f]=d[2][c][f];for(u=0;u<3;u++)for(c=0;c<o;c++)for(f=0;f<s;f++)h=d[u][c][f],h==null?d[u][c][f]=NaN:h=d[u][c][f]*=a[u];for(u=0;u<3;u++)for(c=0;c<o;c++)for(f=0;f<s;f++)h=d[u][c][f],h!=null&&(this.minValues[u]>h&&(this.minValues[u]=h),this.maxValues[u]<h&&(this.maxValues[u]=h));for(u=0;u<3;u++)this.objectOffset[u]=.5*(this.minValues[u]+this.maxValues[u]);for(u=0;u<3;u++)for(c=0;c<o;c++)for(f=0;f<s;f++)h=d[u][c][f],h!=null&&(d[u][c][f]-=this.objectOffset[u]);var v=[HA(new Float32Array(o*s),[o,s]),HA(new Float32Array(o*s),[o,s]),HA(new Float32Array(o*s),[o,s])];for(u=0;u<3;u++)for(c=0;c<o;c++)for(f=0;f<s;f++)v[u].set(c,f,d[u][c][f]);d=[];var _={colormap:i,levels:[[],[],[]],showContour:[!0,!0,!0],showSurface:!e.hidesurface,contourProject:[[!1,!1,!1],[!1,!1,!1],[!1,!1,!1]],contourWidth:[1,1,1],contourColor:[[1,1,1,1],[1,1,1,1],[1,1,1,1]],contourTint:[1,1,1],dynamicColor:[[1,1,1,1],[1,1,1,1],[1,1,1,1]],dynamicWidth:[1,1,1],dynamicTint:[1,1,1],opacityscale:e.opacityscale,opacity:e.opacity},b=cRt(e);if(_.intensityBounds=[b.min,b.max],e.surfacecolor){var p=HA(new Float32Array(o*s),[o,s]);for(c=0;c<o;c++)for(f=0;f<s;f++)p.set(c,f,e.surfacecolor[f][c]);v.push(p)}else _.intensityBounds[0]*=a[2],_.intensityBounds[1]*=a[2];(aF<v[0].shape[0]||aF<v[0].shape[1])&&(this.refineData=!1),this.refineData===!0&&(this.dataScaleX=this.estimateScale(v[0].shape[0],0),this.dataScaleY=this.estimateScale(v[0].shape[1],1),(this.dataScaleX!==1||this.dataScaleY!==1)&&this.refineCoords(v)),e.surfacecolor&&(_.intensity=v.pop());var k=[!0,!0,!0],E=["x","y","z"];for(u=0;u<3;++u){var S=e.contours[E[u]];k[u]=S.highlight,_.showContour[u]=S.show||S.highlight,_.showContour[u]&&(_.contourProject[u]=[S.project.x,S.project.y,S.project.z],S.show?(this.showContour[u]=!0,_.levels[u]=l[u],n.highlightColor[u]=_.contourColor[u]=$Ie(S.color),S.usecolormap?n.highlightTint[u]=_.contourTint[u]=0:n.highlightTint[u]=_.contourTint[u]=1,_.contourWidth[u]=S.width,this.contourStart[u]=S.start,this.contourEnd[u]=S.end,this.contourSize[u]=S.size):(this.showContour[u]=!1,this.contourStart[u]=null,this.contourEnd[u]=null,this.contourSize[u]=0),S.highlight&&(_.dynamicColor[u]=$Ie(S.highlightcolor),_.dynamicWidth[u]=S.highlightwidth))}fRt(i)&&(_.vertexColor=!0),_.objectOffset=this.objectOffset,_.coords=v,n.update(_),n.visible=e.visible,n.enableDynamic=k,n.enableHighlight=k,n.snapToData=!0,"lighting"in e&&(n.ambientLight=e.lighting.ambient,n.diffuseLight=e.lighting.diffuse,n.specularLight=e.lighting.specular,n.roughness=e.lighting.roughness,n.fresnel=e.lighting.fresnel),"lightposition"in e&&(n.lightPosition=[e.lightposition.x,e.lightposition.y,e.lightposition.z])};$g.dispose=function(){this.scene.glplot.remove(this.surface),this.surface.dispose()};function xRt(e,t){var r=e.glplot.gl,n=aRt({gl:r}),i=new e8e(e,n,t.uid);return n._trace=i,i.update(t),e.glplot.add(n),i}r8e.exports=xRt});var a8e=ye((wgr,n8e)=>{"use strict";n8e.exports={attributes:JE(),supplyDefaults:GX().supplyDefaults,colorbar:{min:"cmin",max:"cmax"},calc:JIe(),plot:i8e(),moduleType:"trace",name:"surface",basePlotModule:$_(),categories:["gl3d","2dMap","showLegend"],meta:{}}});var s8e=ye((Tgr,o8e)=>{"use strict";o8e.exports=a8e()});var WA=ye((Agr,u8e)=>{"use strict";var bRt=Tu(),jX=df().axisHoverFormat,{hovertemplateAttrs:wRt,templatefallbackAttrs:TRt}=Ll(),Q_=JE(),l8e=Gl(),ex=Ao().extendFlat;u8e.exports=ex({x:{valType:"data_array",editType:"calc+clearAxisTypes"},y:{valType:"data_array",editType:"calc+clearAxisTypes"},z:{valType:"data_array",editType:"calc+clearAxisTypes"},i:{valType:"data_array",editType:"calc"},j:{valType:"data_array",editType:"calc"},k:{valType:"data_array",editType:"calc"},text:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertext:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertemplate:wRt({editType:"calc"}),hovertemplatefallback:TRt({editType:"calc"}),xhoverformat:jX("x"),yhoverformat:jX("y"),zhoverformat:jX("z"),delaunayaxis:{valType:"enumerated",values:["x","y","z"],dflt:"z",editType:"calc"},alphahull:{valType:"number",dflt:-1,editType:"calc"},intensity:{valType:"data_array",editType:"calc"},intensitymode:{valType:"enumerated",values:["vertex","cell"],dflt:"vertex",editType:"calc"},color:{valType:"color",editType:"calc"},vertexcolor:{valType:"data_array",editType:"calc"},facecolor:{valType:"data_array",editType:"calc"}},bRt("",{colorAttr:"`intensity`",showScaleDflt:!0,editTypeOverride:"calc"}),{opacity:Q_.opacity,flatshading:{valType:"boolean",dflt:!1,editType:"calc"},contour:{show:ex({},Q_.contours.x.show,{}),color:Q_.contours.x.color,width:Q_.contours.x.width,editType:"calc"},lightposition:{x:ex({},Q_.lightposition.x,{dflt:1e5}),y:ex({},Q_.lightposition.y,{dflt:1e5}),z:ex({},Q_.lightposition.z,{dflt:0}),editType:"calc"},lighting:ex({vertexnormalsepsilon:{valType:"number",min:0,max:1,dflt:1e-12,editType:"calc",description:"Epsilon for vertex normals calculation avoids math issues arising from degenerate geometry."},facenormalsepsilon:{valType:"number",min:0,max:1,dflt:1e-6,editType:"calc",description:"Epsilon for face normals calculation avoids math issues arising from degenerate geometry."},editType:"calc"},Q_.lighting),hoverinfo:ex({},l8e.hoverinfo,{editType:"calc"}),showlegend:ex({},l8e.showlegend,{dflt:!1})})});var sF=ye((Sgr,f8e)=>{"use strict";var ARt=Tu(),oF=df().axisHoverFormat,{hovertemplateAttrs:SRt,templatefallbackAttrs:MRt}=Ll(),QE=WA(),c8e=Gl(),WX=Ao().extendFlat,ERt=mc().overrideAll;function XX(e){return{show:{valType:"boolean",dflt:!1},locations:{valType:"data_array",dflt:[]},fill:{valType:"number",min:0,max:1,dflt:1}}}function ZX(e){return{show:{valType:"boolean",dflt:!0},fill:{valType:"number",min:0,max:1,dflt:1}}}var XA=f8e.exports=ERt(WX({x:{valType:"data_array"},y:{valType:"data_array"},z:{valType:"data_array"},value:{valType:"data_array"},isomin:{valType:"number"},isomax:{valType:"number"},surface:{show:{valType:"boolean",dflt:!0},count:{valType:"integer",dflt:2,min:1},fill:{valType:"number",min:0,max:1,dflt:1},pattern:{valType:"flaglist",flags:["A","B","C","D","E"],extras:["all","odd","even"],dflt:"all"}},spaceframe:{show:{valType:"boolean",dflt:!1},fill:{valType:"number",min:0,max:1,dflt:.15}},slices:{x:XX("x"),y:XX("y"),z:XX("z")},caps:{x:ZX("x"),y:ZX("y"),z:ZX("z")},text:{valType:"string",dflt:"",arrayOk:!0},hovertext:{valType:"string",dflt:"",arrayOk:!0},hovertemplate:SRt(),hovertemplatefallback:MRt(),xhoverformat:oF("x"),yhoverformat:oF("y"),zhoverformat:oF("z"),valuehoverformat:oF("value",1),showlegend:WX({},c8e.showlegend,{dflt:!1})},ARt("",{colorAttr:"`value`",showScaleDflt:!0,editTypeOverride:"calc"}),{opacity:QE.opacity,lightposition:QE.lightposition,lighting:QE.lighting,flatshading:QE.flatshading,contour:QE.contour,hoverinfo:WX({},c8e.hoverinfo)}),"calc","nested");XA.flatshading.dflt=!0;XA.lighting.facenormalsepsilon.dflt=0;XA.x.editType=XA.y.editType=XA.z.editType=XA.value.editType="calc+clearAxisTypes"});var YX=ye((Mgr,d8e)=>{"use strict";var kRt=Dr(),CRt=qa(),LRt=sF(),PRt=Qh();function IRt(e,t,r,n){function i(a,o){return kRt.coerce(e,t,LRt,a,o)}h8e(e,t,r,n,i)}function h8e(e,t,r,n,i){var a=i("isomin"),o=i("isomax");o!=null&&a!==void 0&&a!==null&&a>o&&(t.isomin=null,t.isomax=null);var s=i("x"),l=i("y"),u=i("z"),c=i("value");if(!s||!s.length||!l||!l.length||!u||!u.length||!c||!c.length){t.visible=!1;return}var f=CRt.getComponentMethod("calendars","handleTraceDefaults");f(e,t,["x","y","z"],n),i("valuehoverformat"),["x","y","z"].forEach(function(_){i(_+"hoverformat");var b="caps."+_,p=i(b+".show");p&&i(b+".fill");var k="slices."+_,E=i(k+".show");E&&(i(k+".fill"),i(k+".locations"))});var h=i("spaceframe.show");h&&i("spaceframe.fill");var d=i("surface.show");d&&(i("surface.count"),i("surface.fill"),i("surface.pattern"));var v=i("contour.show");v&&(i("contour.color"),i("contour.width")),["text","hovertext","hovertemplate","lighting.ambient","lighting.diffuse","lighting.specular","lighting.roughness","lighting.fresnel","lighting.vertexnormalsepsilon","lighting.facenormalsepsilon","lightposition.x","lightposition.y","lightposition.z","flatshading","opacity"].forEach(function(_){i(_)}),PRt(e,t,n,i,{prefix:"",cLetter:"c"}),t._length=null}d8e.exports={supplyDefaults:IRt,supplyIsoDefaults:h8e}});var lF=ye((Egr,p8e)=>{"use strict";var JX=Dr(),RRt=gv();function DRt(e,t){t._len=Math.min(t.u.length,t.v.length,t.w.length,t.x.length,t.y.length,t.z.length),t._u=jm(t.u,t._len),t._v=jm(t.v,t._len),t._w=jm(t.w,t._len),t._x=jm(t.x,t._len),t._y=jm(t.y,t._len),t._z=jm(t.z,t._len);var r=v8e(t);t._gridFill=r.fill,t._Xs=r.Xs,t._Ys=r.Ys,t._Zs=r.Zs,t._len=r.len;var n=0,i,a,o;t.starts&&(i=jm(t.starts.x||[]),a=jm(t.starts.y||[]),o=jm(t.starts.z||[]),n=Math.min(i.length,a.length,o.length)),t._startsX=i||[],t._startsY=a||[],t._startsZ=o||[];var s=0,l=1/0,u;for(u=0;u<t._len;u++){var c=t._u[u],f=t._v[u],h=t._w[u],d=Math.sqrt(c*c+f*f+h*h);s=Math.max(s,d),l=Math.min(l,d)}for(RRt(e,t,{vals:[l,s],containerStr:"",cLetter:"c"}),u=0;u<n;u++){var v=i[u];r.xMax=Math.max(r.xMax,v),r.xMin=Math.min(r.xMin,v);var _=a[u];r.yMax=Math.max(r.yMax,_),r.yMin=Math.min(r.yMin,_);var b=o[u];r.zMax=Math.max(r.zMax,b),r.zMin=Math.min(r.zMin,b)}t._slen=n,t._normMax=s,t._xbnds=[r.xMin,r.xMax],t._ybnds=[r.yMin,r.yMax],t._zbnds=[r.zMin,r.zMax]}function v8e(e){var t=e._x,r=e._y,n=e._z,i=e._len,a,o,s,l=-1/0,u=1/0,c=-1/0,f=1/0,h=-1/0,d=1/0,v="",_,b,p,k,E,S,L,x,C;for(i&&(k=t[0],S=r[0],x=n[0]),i>1&&(E=t[i-1],L=r[i-1],C=n[i-1]),a=0;a<i;a++)l=Math.max(l,t[a]),u=Math.min(u,t[a]),c=Math.max(c,r[a]),f=Math.min(f,r[a]),h=Math.max(h,n[a]),d=Math.min(d,n[a]),!_&&t[a]!==k&&(_=!0,v+="x"),!b&&r[a]!==S&&(b=!0,v+="y"),!p&&n[a]!==x&&(p=!0,v+="z");_||(v+="x"),b||(v+="y"),p||(v+="z");var M=KX(e._x),g=KX(e._y),P=KX(e._z);v=v.replace("x",(k>E?"-":"+")+"x"),v=v.replace("y",(S>L?"-":"+")+"y"),v=v.replace("z",(x>C?"-":"+")+"z");var T=function(){i=0,M=[],g=[],P=[]};(!i||i<M.length*g.length*P.length)&&T();var z=function(Fe){return Fe==="x"?t:Fe==="y"?r:n},O=function(Fe){return Fe==="x"?M:Fe==="y"?g:P},V=function(Fe){return Fe[i-1]<Fe[0]?-1:1},G=z(v[1]),Z=z(v[3]),j=z(v[5]),N=O(v[1]).length,H=O(v[3]).length,te=O(v[5]).length,oe=!1,_e=function(Fe,Pe,ge){return N*(H*Fe+Pe)+ge},Ee=V(z(v[1])),Ce=V(z(v[3])),me=V(z(v[5]));for(a=0;a<te-1;a++){for(o=0;o<H-1;o++){for(s=0;s<N-1;s++){var ie=_e(a,o,s),Se=_e(a,o,s+1),Le=_e(a,o+1,s),Ae=_e(a+1,o,s);if((!(G[ie]*Ee<G[Se]*Ee)||!(Z[ie]*Ce<Z[Le]*Ce)||!(j[ie]*me<j[Ae]*me))&&(oe=!0),oe)break}if(oe)break}if(oe)break}return oe&&(JX.warn("Encountered arbitrary coordinates! Unable to input data grid."),T()),{xMin:u,yMin:f,zMin:d,xMax:l,yMax:c,zMax:h,Xs:M,Ys:g,Zs:P,len:i,fill:v}}function KX(e){return JX.distinctVals(e).vals}function jm(e,t){if(t===void 0&&(t=e.length),JX.isTypedArray(e))return e.subarray(0,t);for(var r=[],n=0;n<t;n++)r[n]=+e[n];return r}p8e.exports={calc:DRt,filter:jm,processGrid:v8e}});var $X=ye((kgr,g8e)=>{"use strict";var FRt=gv(),zRt=lF().processGrid,uF=lF().filter;g8e.exports=function(t,r){r._len=Math.min(r.x.length,r.y.length,r.z.length,r.value.length),r._x=uF(r.x,r._len),r._y=uF(r.y,r._len),r._z=uF(r.z,r._len),r._value=uF(r.value,r._len);var n=zRt(r);r._gridFill=n.fill,r._Xs=n.Xs,r._Ys=n.Ys,r._Zs=n.Zs,r._len=n.len;for(var i=1/0,a=-1/0,o=0;o<r._len;o++){var s=r._value[o];i=Math.min(i,s),a=Math.max(a,s)}r._minValues=i,r._maxValues=a,r._vMin=r.isomin===void 0||r.isomin===null?i:r.isomin,r._vMax=r.isomax===void 0||r.isomax===null?a:r.isomax,FRt(t,r,{vals:[r._vMin,r._vMax],containerStr:"",cLetter:"c"})}});var ZA=ye((Cgr,m8e)=>{"use strict";m8e.exports=function(t,r,n,i){i=i||t.length;for(var a=new Array(i),o=0;o<i;o++)a[o]=[t[o],r[o],n[o]];return a}});var cF=ye((Lgr,b8e)=>{"use strict";var ORt=Od().gl_mesh3d,qRt=$y().parseColorScale,BRt=Dr().isArrayOrTypedArray,NRt=Jy(),URt=tc().extractOpts,y8e=ZA(),ek=function(e,t){for(var r=t.length-1;r>0;r--){var n=Math.min(t[r],t[r-1]),i=Math.max(t[r],t[r-1]);if(i>n&&n<e&&e<=i)return{id:r,distRatio:(i-e)/(i-n)}}return{id:0,distRatio:0}};function _8e(e,t,r){this.scene=e,this.uid=r,this.mesh=t,this.name="",this.data=null,this.showContour=!1}var QX=_8e.prototype;QX.handlePick=function(e){if(e.object===this.mesh){var t=e.data.index,r=this.data._meshX[t],n=this.data._meshY[t],i=this.data._meshZ[t],a=this.data._Ys.length,o=this.data._Zs.length,s=ek(r,this.data._Xs).id,l=ek(n,this.data._Ys).id,u=ek(i,this.data._Zs).id,c=e.index=u+o*l+o*a*s;e.traceCoordinate=[this.data._meshX[c],this.data._meshY[c],this.data._meshZ[c],this.data._value[c]];var f=this.data.hovertext||this.data.text;return BRt(f)&&f[c]!==void 0?e.textLabel=f[c]:f&&(e.textLabel=f),!0}};QX.update=function(e){var t=this.scene,r=t.fullSceneLayout;this.data=x8e(e);function n(l,u,c,f){return u.map(function(h){return l.d2l(h,0,f)*c})}var i=y8e(n(r.xaxis,e._meshX,t.dataScale[0],e.xcalendar),n(r.yaxis,e._meshY,t.dataScale[1],e.ycalendar),n(r.zaxis,e._meshZ,t.dataScale[2],e.zcalendar)),a=y8e(e._meshI,e._meshJ,e._meshK),o={positions:i,cells:a,lightPosition:[e.lightposition.x,e.lightposition.y,e.lightposition.z],ambient:e.lighting.ambient,diffuse:e.lighting.diffuse,specular:e.lighting.specular,roughness:e.lighting.roughness,fresnel:e.lighting.fresnel,vertexNormalsEpsilon:e.lighting.vertexnormalsepsilon,faceNormalsEpsilon:e.lighting.facenormalsepsilon,opacity:e.opacity,contourEnable:e.contour.show,contourColor:NRt(e.contour.color).slice(0,3),contourWidth:e.contour.width,useFacetNormals:e.flatshading},s=URt(e);o.vertexIntensity=e._meshIntensity,o.vertexIntensityBounds=[s.min,s.max],o.colormap=qRt(e),this.mesh.update(o)};QX.dispose=function(){this.scene.glplot.remove(this.mesh),this.mesh.dispose()};var VRt=["xyz","xzy","yxz","yzx","zxy","zyx"];function x8e(e){e._meshI=[],e._meshJ=[],e._meshK=[];var t=e.surface.show,r=e.spaceframe.show,n=e.surface.fill,i=e.spaceframe.fill,a=!1,o=!1,s=0,l,u,c=e._Xs,f=e._Ys,h=e._Zs,d=c.length,v=f.length,_=h.length,b=VRt.indexOf(e._gridFill.replace(/-/g,"").replace(/\+/g,"")),p=function(Qt,Vt,_t){switch(b){case 5:return _t+_*Vt+_*v*Qt;case 4:return _t+_*Qt+_*d*Vt;case 3:return Vt+v*_t+v*_*Qt;case 2:return Vt+v*Qt+v*d*_t;case 1:return Qt+d*_t+d*_*Vt;default:return Qt+d*Vt+d*v*_t}},k=e._minValues,E=e._maxValues,S=e._vMin,L=e._vMax,x,C,M,g;function P(Qt,Vt,_t){for(var It=g.length,mt=u;mt<It;mt++)if(Qt===x[mt]&&Vt===C[mt]&&_t===M[mt])return mt;return-1}function T(){u=l}function z(){x=[],C=[],M=[],g=[],l=0,T()}function O(Qt,Vt,_t,It){return x.push(Qt),C.push(Vt),M.push(_t),g.push(It),l++,l-1}function V(Qt,Vt,_t){return e._meshI.push(Qt),e._meshJ.push(Vt),e._meshK.push(_t),s++,s-1}function G(Qt,Vt,_t){for(var It=[],mt=0;mt<Qt.length;mt++)It[mt]=(Qt[mt]+Vt[mt]+_t[mt])/3;return It}function Z(Qt,Vt,_t){for(var It=[],mt=0;mt<Qt.length;mt++)It[mt]=Qt[mt]*(1-_t)+_t*Vt[mt];return It}var j;function N(Qt){j=Qt}function H(Qt,Vt){var _t=Qt[0],It=Qt[1],mt=Qt[2],er=G(_t,It,mt),lr=Math.sqrt(1-j),Tr=Z(er,_t,lr),Lr=Z(er,It,lr),ti=Z(er,mt,lr),Br=Vt[0],Vr=Vt[1],dt=Vt[2];return{xyzv:[[_t,It,Lr],[Lr,Tr,_t],[It,mt,ti],[ti,Lr,It],[mt,_t,Tr],[Tr,ti,mt]],abc:[[Br,Vr,-1],[-1,-1,Br],[Vr,dt,-1],[-1,-1,Vr],[dt,Br,-1],[-1,-1,dt]]}}function te(Qt,Vt){return Qt==="all"||Qt===null?!0:Qt.indexOf(Vt)>-1}function oe(Qt,Vt){return Qt===null?Vt:Qt}function _e(Qt,Vt,_t){T();var It=[Vt],mt=[_t];if(j>=1)It=[Vt],mt=[_t];else if(j>0){var er=H(Vt,_t);It=er.xyzv,mt=er.abc}for(var lr=0;lr<It.length;lr++){Vt=It[lr],_t=mt[lr];for(var Tr=[],Lr=0;Lr<3;Lr++){var ti=Vt[Lr][0],Br=Vt[Lr][1],Vr=Vt[Lr][2],dt=Vt[Lr][3],Ge=_t[Lr]>-1?_t[Lr]:P(ti,Br,Vr);Ge>-1?Tr[Lr]=Ge:Tr[Lr]=O(ti,Br,Vr,oe(Qt,dt))}V(Tr[0],Tr[1],Tr[2])}}function Ee(Qt,Vt,_t){var It=function(mt,er,lr){_e(Qt,[Vt[mt],Vt[er],Vt[lr]],[_t[mt],_t[er],_t[lr]])};It(0,1,2),It(2,3,0)}function Ce(Qt,Vt,_t){var It=function(mt,er,lr){_e(Qt,[Vt[mt],Vt[er],Vt[lr]],[_t[mt],_t[er],_t[lr]])};It(0,1,2),It(3,0,1),It(2,3,0),It(1,2,3)}function me(Qt,Vt,_t,It){var mt=Qt[3];mt<_t&&(mt=_t),mt>It&&(mt=It);for(var er=(Qt[3]-mt)/(Qt[3]-Vt[3]+1e-9),lr=[],Tr=0;Tr<4;Tr++)lr[Tr]=(1-er)*Qt[Tr]+er*Vt[Tr];return lr}function ie(Qt,Vt,_t){return Qt>=Vt&&Qt<=_t}function Se(Qt){var Vt=.001*(L-S);return Qt>=S-Vt&&Qt<=L+Vt}function Le(Qt){for(var Vt=[],_t=0;_t<4;_t++){var It=Qt[_t];Vt.push([e._x[It],e._y[It],e._z[It],e._value[It]])}return Vt}var Ae=3;function Fe(Qt,Vt,_t,It,mt,er){er||(er=1),_t=[-1,-1,-1];var lr=!1,Tr=[ie(Vt[0][3],It,mt),ie(Vt[1][3],It,mt),ie(Vt[2][3],It,mt)];if(!Tr[0]&&!Tr[1]&&!Tr[2])return!1;var Lr=function(Br,Vr,dt){return Se(Vr[0][3])&&Se(Vr[1][3])&&Se(Vr[2][3])?(_e(Br,Vr,dt),!0):er<Ae?Fe(Br,Vr,dt,S,L,++er):!1};if(Tr[0]&&Tr[1]&&Tr[2])return Lr(Qt,Vt,_t)||lr;var ti=!1;return[[0,1,2],[2,0,1],[1,2,0]].forEach(function(Br){if(Tr[Br[0]]&&Tr[Br[1]]&&!Tr[Br[2]]){var Vr=Vt[Br[0]],dt=Vt[Br[1]],Ge=Vt[Br[2]],Je=me(Ge,Vr,It,mt),je=me(Ge,dt,It,mt);lr=Lr(Qt,[je,Je,Vr],[-1,-1,_t[Br[0]]])||lr,lr=Lr(Qt,[Vr,dt,je],[_t[Br[0]],_t[Br[1]],-1])||lr,ti=!0}}),ti||[[0,1,2],[1,2,0],[2,0,1]].forEach(function(Br){if(Tr[Br[0]]&&!Tr[Br[1]]&&!Tr[Br[2]]){var Vr=Vt[Br[0]],dt=Vt[Br[1]],Ge=Vt[Br[2]],Je=me(dt,Vr,It,mt),je=me(Ge,Vr,It,mt);lr=Lr(Qt,[je,Je,Vr],[-1,-1,_t[Br[0]]])||lr,ti=!0}}),lr}function Pe(Qt,Vt,_t,It){var mt=!1,er=Le(Vt),lr=[ie(er[0][3],_t,It),ie(er[1][3],_t,It),ie(er[2][3],_t,It),ie(er[3][3],_t,It)];if(!lr[0]&&!lr[1]&&!lr[2]&&!lr[3])return mt;if(lr[0]&&lr[1]&&lr[2]&&lr[3])return o&&(mt=Ce(Qt,er,Vt)||mt),mt;var Tr=!1;return[[0,1,2,3],[3,0,1,2],[2,3,0,1],[1,2,3,0]].forEach(function(Lr){if(lr[Lr[0]]&&lr[Lr[1]]&&lr[Lr[2]]&&!lr[Lr[3]]){var ti=er[Lr[0]],Br=er[Lr[1]],Vr=er[Lr[2]],dt=er[Lr[3]];if(o)mt=_e(Qt,[ti,Br,Vr],[Vt[Lr[0]],Vt[Lr[1]],Vt[Lr[2]]])||mt;else{var Ge=me(dt,ti,_t,It),Je=me(dt,Br,_t,It),je=me(dt,Vr,_t,It);mt=_e(null,[Ge,Je,je],[-1,-1,-1])||mt}Tr=!0}}),Tr||([[0,1,2,3],[1,2,3,0],[2,3,0,1],[3,0,1,2],[0,2,3,1],[1,3,2,0]].forEach(function(Lr){if(lr[Lr[0]]&&lr[Lr[1]]&&!lr[Lr[2]]&&!lr[Lr[3]]){var ti=er[Lr[0]],Br=er[Lr[1]],Vr=er[Lr[2]],dt=er[Lr[3]],Ge=me(Vr,ti,_t,It),Je=me(Vr,Br,_t,It),je=me(dt,Br,_t,It),tt=me(dt,ti,_t,It);o?(mt=_e(Qt,[ti,tt,Ge],[Vt[Lr[0]],-1,-1])||mt,mt=_e(Qt,[Br,Je,je],[Vt[Lr[1]],-1,-1])||mt):mt=Ee(null,[Ge,Je,je,tt],[-1,-1,-1,-1])||mt,Tr=!0}}),Tr)||[[0,1,2,3],[1,2,3,0],[2,3,0,1],[3,0,1,2]].forEach(function(Lr){if(lr[Lr[0]]&&!lr[Lr[1]]&&!lr[Lr[2]]&&!lr[Lr[3]]){var ti=er[Lr[0]],Br=er[Lr[1]],Vr=er[Lr[2]],dt=er[Lr[3]],Ge=me(Br,ti,_t,It),Je=me(Vr,ti,_t,It),je=me(dt,ti,_t,It);o?(mt=_e(Qt,[ti,Ge,Je],[Vt[Lr[0]],-1,-1])||mt,mt=_e(Qt,[ti,Je,je],[Vt[Lr[0]],-1,-1])||mt,mt=_e(Qt,[ti,je,Ge],[Vt[Lr[0]],-1,-1])||mt):mt=_e(null,[Ge,Je,je],[-1,-1,-1])||mt,Tr=!0}}),mt}function ge(Qt,Vt,_t,It,mt,er,lr,Tr,Lr,ti,Br){var Vr=!1;return a&&(te(Qt,"A")&&(Vr=Pe(null,[Vt,_t,It,er],ti,Br)||Vr),te(Qt,"B")&&(Vr=Pe(null,[_t,It,mt,Lr],ti,Br)||Vr),te(Qt,"C")&&(Vr=Pe(null,[_t,er,lr,Lr],ti,Br)||Vr),te(Qt,"D")&&(Vr=Pe(null,[It,er,Tr,Lr],ti,Br)||Vr),te(Qt,"E")&&(Vr=Pe(null,[_t,It,er,Lr],ti,Br)||Vr)),o&&(Vr=Pe(Qt,[_t,It,er,Lr],ti,Br)||Vr),Vr}function Re(Qt,Vt,_t,It,mt,er,lr,Tr){return[Tr[0]===!0?!0:Fe(Qt,Le([Vt,_t,It]),[Vt,_t,It],er,lr),Tr[1]===!0?!0:Fe(Qt,Le([It,mt,Vt]),[It,mt,Vt],er,lr)]}function ce(Qt,Vt,_t,It,mt,er,lr,Tr,Lr){return Tr?Re(Qt,Vt,_t,mt,It,er,lr,Lr):Re(Qt,_t,mt,It,Vt,er,lr,Lr)}function Ze(Qt,Vt,_t,It,mt,er,lr){var Tr=!1,Lr,ti,Br,Vr,dt=function(){Tr=Fe(Qt,[Lr,ti,Br],[-1,-1,-1],mt,er)||Tr,Tr=Fe(Qt,[Br,Vr,Lr],[-1,-1,-1],mt,er)||Tr},Ge=lr[0],Je=lr[1],je=lr[2];return Ge&&(Lr=Z(Le([p(Vt,_t-0,It-0)])[0],Le([p(Vt-1,_t-0,It-0)])[0],Ge),ti=Z(Le([p(Vt,_t-0,It-1)])[0],Le([p(Vt-1,_t-0,It-1)])[0],Ge),Br=Z(Le([p(Vt,_t-1,It-1)])[0],Le([p(Vt-1,_t-1,It-1)])[0],Ge),Vr=Z(Le([p(Vt,_t-1,It-0)])[0],Le([p(Vt-1,_t-1,It-0)])[0],Ge),dt()),Je&&(Lr=Z(Le([p(Vt-0,_t,It-0)])[0],Le([p(Vt-0,_t-1,It-0)])[0],Je),ti=Z(Le([p(Vt-0,_t,It-1)])[0],Le([p(Vt-0,_t-1,It-1)])[0],Je),Br=Z(Le([p(Vt-1,_t,It-1)])[0],Le([p(Vt-1,_t-1,It-1)])[0],Je),Vr=Z(Le([p(Vt-1,_t,It-0)])[0],Le([p(Vt-1,_t-1,It-0)])[0],Je),dt()),je&&(Lr=Z(Le([p(Vt-0,_t-0,It)])[0],Le([p(Vt-0,_t-0,It-1)])[0],je),ti=Z(Le([p(Vt-0,_t-1,It)])[0],Le([p(Vt-0,_t-1,It-1)])[0],je),Br=Z(Le([p(Vt-1,_t-1,It)])[0],Le([p(Vt-1,_t-1,It-1)])[0],je),Vr=Z(Le([p(Vt-1,_t-0,It)])[0],Le([p(Vt-1,_t-0,It-1)])[0],je),dt()),Tr}function ut(Qt,Vt,_t,It,mt,er,lr,Tr,Lr,ti,Br,Vr){var dt=Qt;return Vr?(a&&Qt==="even"&&(dt=null),ge(dt,Vt,_t,It,mt,er,lr,Tr,Lr,ti,Br)):(a&&Qt==="odd"&&(dt=null),ge(dt,Lr,Tr,lr,er,mt,It,_t,Vt,ti,Br))}function pt(Qt,Vt,_t,It,mt){for(var er=[],lr=0,Tr=0;Tr<Vt.length;Tr++)for(var Lr=Vt[Tr],ti=1;ti<_;ti++)for(var Br=1;Br<v;Br++)er.push(ce(Qt,p(Lr,Br-1,ti-1),p(Lr,Br-1,ti),p(Lr,Br,ti-1),p(Lr,Br,ti),_t,It,(Lr+Br+ti)%2,mt&&mt[lr]?mt[lr]:[])),lr++;return er}function Zt(Qt,Vt,_t,It,mt){for(var er=[],lr=0,Tr=0;Tr<Vt.length;Tr++)for(var Lr=Vt[Tr],ti=1;ti<d;ti++)for(var Br=1;Br<_;Br++)er.push(ce(Qt,p(ti-1,Lr,Br-1),p(ti,Lr,Br-1),p(ti-1,Lr,Br),p(ti,Lr,Br),_t,It,(ti+Lr+Br)%2,mt&&mt[lr]?mt[lr]:[])),lr++;return er}function st(Qt,Vt,_t,It,mt){for(var er=[],lr=0,Tr=0;Tr<Vt.length;Tr++)for(var Lr=Vt[Tr],ti=1;ti<v;ti++)for(var Br=1;Br<d;Br++)er.push(ce(Qt,p(Br-1,ti-1,Lr),p(Br-1,ti,Lr),p(Br,ti-1,Lr),p(Br,ti,Lr),_t,It,(Br+ti+Lr)%2,mt&&mt[lr]?mt[lr]:[])),lr++;return er}function lt(Qt,Vt,_t){for(var It=1;It<_;It++)for(var mt=1;mt<v;mt++)for(var er=1;er<d;er++)ut(Qt,p(er-1,mt-1,It-1),p(er-1,mt-1,It),p(er-1,mt,It-1),p(er-1,mt,It),p(er,mt-1,It-1),p(er,mt-1,It),p(er,mt,It-1),p(er,mt,It),Vt,_t,(er+mt+It)%2)}function Gt(Qt,Vt,_t){o=!0,lt(Qt,Vt,_t),o=!1}function Nt(Qt,Vt,_t){a=!0,lt(Qt,Vt,_t),a=!1}function Jt(Qt,Vt,_t,It,mt,er){for(var lr=[],Tr=0,Lr=0;Lr<Vt.length;Lr++)for(var ti=Vt[Lr],Br=1;Br<_;Br++)for(var Vr=1;Vr<v;Vr++)lr.push(Ze(Qt,ti,Vr,Br,_t,It,mt[Lr],er&&er[Tr]?er[Tr]:[])),Tr++;return lr}function sr(Qt,Vt,_t,It,mt,er){for(var lr=[],Tr=0,Lr=0;Lr<Vt.length;Lr++)for(var ti=Vt[Lr],Br=1;Br<d;Br++)for(var Vr=1;Vr<_;Vr++)lr.push(Ze(Qt,Br,ti,Vr,_t,It,mt[Lr],er&&er[Tr]?er[Tr]:[])),Tr++;return lr}function wr(Qt,Vt,_t,It,mt,er){for(var lr=[],Tr=0,Lr=0;Lr<Vt.length;Lr++)for(var ti=Vt[Lr],Br=1;Br<v;Br++)for(var Vr=1;Vr<d;Vr++)lr.push(Ze(Qt,Vr,Br,ti,_t,It,mt[Lr],er&&er[Tr]?er[Tr]:[])),Tr++;return lr}function cr(Qt,Vt){for(var _t=[],It=Qt;It<Vt;It++)_t.push(It);return _t}function $e(){for(var Qt=0;Qt<d;Qt++)for(var Vt=0;Vt<v;Vt++)for(var _t=0;_t<_;_t++){var It=p(Qt,Vt,_t);O(e._x[It],e._y[It],e._z[It],e._value[It])}}function St(){z(),$e();var Qt=null;if(r&&i&&(N(i),Gt(Qt,S,L)),t&&n){N(n);for(var Vt=e.surface.pattern,_t=e.surface.count,It=0;It<_t;It++){var mt=_t===1?.5:It/(_t-1),er=(1-mt)*S+mt*L,lr=Math.abs(er-k),Tr=Math.abs(er-E),Lr=lr>Tr?[k,er]:[er,E];Nt(Vt,Lr[0],Lr[1])}}var ti=[[Math.min(S,E),Math.max(S,E)],[Math.min(k,L),Math.max(k,L)]];["x","y","z"].forEach(function(Br){for(var Vr=[],dt=0;dt<ti.length;dt++){var Ge=0,Je=ti[dt][0],je=ti[dt][1],tt=e.slices[Br];if(tt.show&&tt.fill){N(tt.fill);var xt=[],Ie=[],xe=[];if(tt.locations.length)for(var ke=0;ke<tt.locations.length;ke++){var vt=ek(tt.locations[ke],Br==="x"?c:Br==="y"?f:h);vt.distRatio===0?xt.push(vt.id):vt.id>0&&(Ie.push(vt.id),Br==="x"?xe.push([vt.distRatio,0,0]):Br==="y"?xe.push([0,vt.distRatio,0]):xe.push([0,0,vt.distRatio]))}else Br==="x"?xt=cr(1,d-1):Br==="y"?xt=cr(1,v-1):xt=cr(1,_-1);Ie.length>0&&(Br==="x"?Vr[Ge]=Jt(Qt,Ie,Je,je,xe,Vr[Ge]):Br==="y"?Vr[Ge]=sr(Qt,Ie,Je,je,xe,Vr[Ge]):Vr[Ge]=wr(Qt,Ie,Je,je,xe,Vr[Ge]),Ge++),xt.length>0&&(Br==="x"?Vr[Ge]=pt(Qt,xt,Je,je,Vr[Ge]):Br==="y"?Vr[Ge]=Zt(Qt,xt,Je,je,Vr[Ge]):Vr[Ge]=st(Qt,xt,Je,je,Vr[Ge]),Ge++)}var ir=e.caps[Br];ir.show&&ir.fill&&(N(ir.fill),Br==="x"?Vr[Ge]=pt(Qt,[0,d-1],Je,je,Vr[Ge]):Br==="y"?Vr[Ge]=Zt(Qt,[0,v-1],Je,je,Vr[Ge]):Vr[Ge]=st(Qt,[0,_-1],Je,je,Vr[Ge]),Ge++)}}),s===0&&z(),e._meshX=x,e._meshY=C,e._meshZ=M,e._meshIntensity=g,e._Xs=c,e._Ys=f,e._Zs=h}return St(),e}function GRt(e,t){var r=e.glplot.gl,n=ORt({gl:r}),i=new _8e(e,n,t.uid);return n._trace=i,i.update(t),e.glplot.add(n),i}b8e.exports={findNearestOnAxis:ek,generateIsoMeshes:x8e,createIsosurfaceTrace:GRt}});var T8e=ye((Pgr,w8e)=>{"use strict";w8e.exports={attributes:sF(),supplyDefaults:YX().supplyDefaults,calc:$X(),colorbar:{min:"cmin",max:"cmax"},plot:cF().createIsosurfaceTrace,moduleType:"trace",name:"isosurface",basePlotModule:$_(),categories:["gl3d","showLegend"],meta:{}}});var S8e=ye((Igr,A8e)=>{"use strict";A8e.exports=T8e()});var tZ=ye((Rgr,E8e)=>{"use strict";var HRt=Tu(),yh=sF(),jRt=JE(),M8e=Gl(),eZ=Ao().extendFlat,WRt=mc().overrideAll,fF=E8e.exports=WRt(eZ({x:yh.x,y:yh.y,z:yh.z,value:yh.value,isomin:yh.isomin,isomax:yh.isomax,surface:yh.surface,spaceframe:{show:{valType:"boolean",dflt:!1},fill:{valType:"number",min:0,max:1,dflt:1}},slices:yh.slices,caps:yh.caps,text:yh.text,hovertext:yh.hovertext,xhoverformat:yh.xhoverformat,yhoverformat:yh.yhoverformat,zhoverformat:yh.zhoverformat,valuehoverformat:yh.valuehoverformat,hovertemplate:yh.hovertemplate,hovertemplatefallback:yh.hovertemplatefallback},HRt("",{colorAttr:"`value`",showScaleDflt:!0,editTypeOverride:"calc"}),{colorbar:yh.colorbar,opacity:yh.opacity,opacityscale:jRt.opacityscale,lightposition:yh.lightposition,lighting:yh.lighting,flatshading:yh.flatshading,contour:yh.contour,hoverinfo:eZ({},M8e.hoverinfo),showlegend:eZ({},M8e.showlegend,{dflt:!1})}),"calc","nested");fF.x.editType=fF.y.editType=fF.z.editType=fF.value.editType="calc+clearAxisTypes"});var C8e=ye((Dgr,k8e)=>{"use strict";var XRt=Dr(),ZRt=tZ(),YRt=YX().supplyIsoDefaults,KRt=GX().opacityscaleDefaults;k8e.exports=function(t,r,n,i){function a(o,s){return XRt.coerce(t,r,ZRt,o,s)}YRt(t,r,n,i,a),KRt(t,r,i,a)}});var R8e=ye((Fgr,I8e)=>{"use strict";var JRt=Od().gl_mesh3d,$Rt=$y().parseColorScale,QRt=Dr().isArrayOrTypedArray,eDt=Jy(),tDt=tc().extractOpts,L8e=ZA(),rZ=cF().findNearestOnAxis,rDt=cF().generateIsoMeshes;function P8e(e,t,r){this.scene=e,this.uid=r,this.mesh=t,this.name="",this.data=null,this.showContour=!1}var iZ=P8e.prototype;iZ.handlePick=function(e){if(e.object===this.mesh){var t=e.data.index,r=this.data._meshX[t],n=this.data._meshY[t],i=this.data._meshZ[t],a=this.data._Ys.length,o=this.data._Zs.length,s=rZ(r,this.data._Xs).id,l=rZ(n,this.data._Ys).id,u=rZ(i,this.data._Zs).id,c=e.index=u+o*l+o*a*s;e.traceCoordinate=[this.data._meshX[c],this.data._meshY[c],this.data._meshZ[c],this.data._value[c]];var f=this.data.hovertext||this.data.text;return QRt(f)&&f[c]!==void 0?e.textLabel=f[c]:f&&(e.textLabel=f),!0}};iZ.update=function(e){var t=this.scene,r=t.fullSceneLayout;this.data=rDt(e);function n(l,u,c,f){return u.map(function(h){return l.d2l(h,0,f)*c})}var i=L8e(n(r.xaxis,e._meshX,t.dataScale[0],e.xcalendar),n(r.yaxis,e._meshY,t.dataScale[1],e.ycalendar),n(r.zaxis,e._meshZ,t.dataScale[2],e.zcalendar)),a=L8e(e._meshI,e._meshJ,e._meshK),o={positions:i,cells:a,lightPosition:[e.lightposition.x,e.lightposition.y,e.lightposition.z],ambient:e.lighting.ambient,diffuse:e.lighting.diffuse,specular:e.lighting.specular,roughness:e.lighting.roughness,fresnel:e.lighting.fresnel,vertexNormalsEpsilon:e.lighting.vertexnormalsepsilon,faceNormalsEpsilon:e.lighting.facenormalsepsilon,opacity:e.opacity,opacityscale:e.opacityscale,contourEnable:e.contour.show,contourColor:eDt(e.contour.color).slice(0,3),contourWidth:e.contour.width,useFacetNormals:e.flatshading},s=tDt(e);o.vertexIntensity=e._meshIntensity,o.vertexIntensityBounds=[s.min,s.max],o.colormap=$Rt(e),this.mesh.update(o)};iZ.dispose=function(){this.scene.glplot.remove(this.mesh),this.mesh.dispose()};function iDt(e,t){var r=e.glplot.gl,n=JRt({gl:r}),i=new P8e(e,n,t.uid);return n._trace=i,i.update(t),e.glplot.add(n),i}I8e.exports=iDt});var F8e=ye((zgr,D8e)=>{"use strict";D8e.exports={attributes:tZ(),supplyDefaults:C8e(),calc:$X(),colorbar:{min:"cmin",max:"cmax"},plot:R8e(),moduleType:"trace",name:"volume",basePlotModule:$_(),categories:["gl3d","showLegend"],meta:{}}});var O8e=ye((Ogr,z8e)=>{"use strict";z8e.exports=F8e()});var N8e=ye((qgr,B8e)=>{"use strict";var nDt=qa(),q8e=Dr(),aDt=Qh(),oDt=WA();B8e.exports=function(t,r,n,i){function a(c,f){return q8e.coerce(t,r,oDt,c,f)}function o(c){var f=c.map(function(h){var d=a(h);return d&&q8e.isArrayOrTypedArray(d)?d:null});return f.every(function(h){return h&&h.length===f[0].length})&&f}var s=o(["x","y","z"]);if(!s){r.visible=!1;return}if(o(["i","j","k"]),r.i&&(!r.j||!r.k)||r.j&&(!r.k||!r.i)||r.k&&(!r.i||!r.j)){r.visible=!1;return}var l=nDt.getComponentMethod("calendars","handleTraceDefaults");l(t,r,["x","y","z"],i),["lighting.ambient","lighting.diffuse","lighting.specular","lighting.roughness","lighting.fresnel","lighting.vertexnormalsepsilon","lighting.facenormalsepsilon","lightposition.x","lightposition.y","lightposition.z","flatshading","alphahull","delaunayaxis","opacity"].forEach(function(c){a(c)});var u=a("contour.show");u&&(a("contour.color"),a("contour.width")),"intensity"in t?(a("intensity"),a("intensitymode"),aDt(t,r,i,a,{prefix:"",cLetter:"c"})):(r.showscale=!1,"facecolor"in t?a("facecolor"):"vertexcolor"in t?a("vertexcolor"):a("color",n)),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("xhoverformat"),a("yhoverformat"),a("zhoverformat"),r._length=null}});var V8e=ye((Bgr,U8e)=>{"use strict";var sDt=gv();U8e.exports=function(t,r){r.intensity&&sDt(t,r,{vals:r.intensity,containerStr:"",cLetter:"c"})}});var X8e=ye((Ngr,W8e)=>{"use strict";var lDt=Od().gl_mesh3d,uDt=Od().delaunay_triangulate,cDt=Od().alpha_shape,fDt=Od().convex_hull,hDt=$y().parseColorScale,dDt=Dr().isArrayOrTypedArray,sZ=Jy(),vDt=tc().extractOpts,G8e=ZA();function j8e(e,t,r){this.scene=e,this.uid=r,this.mesh=t,this.name="",this.color="#fff",this.data=null,this.showContour=!1}var lZ=j8e.prototype;lZ.handlePick=function(e){if(e.object===this.mesh){var t=e.index=e.data.index;e.data._cellCenter?e.traceCoordinate=e.data.dataCoordinate:e.traceCoordinate=[this.data.x[t],this.data.y[t],this.data.z[t]];var r=this.data.hovertext||this.data.text;return dDt(r)&&r[t]!==void 0?e.textLabel=r[t]:r&&(e.textLabel=r),!0}};function H8e(e){for(var t=[],r=e.length,n=0;n<r;n++)t[n]=sZ(e[n]);return t}function nZ(e,t,r,n){for(var i=[],a=t.length,o=0;o<a;o++)i[o]=e.d2l(t[o],0,n)*r;return i}function aZ(e){for(var t=[],r=e.length,n=0;n<r;n++)t[n]=Math.round(e[n]);return t}function pDt(e,t){for(var r=["x","y","z"].indexOf(e),n=[],i=t.length,a=0;a<i;a++)n[a]=[t[a][(r+1)%3],t[a][(r+2)%3]];return uDt(n)}function oZ(e,t){for(var r=e.length,n=0;n<r;n++)if(e[n]<=-.5||e[n]>=t-.5)return!1;return!0}lZ.update=function(e){var t=this.scene,r=t.fullSceneLayout;this.data=e;var n=e.x.length,i=G8e(nZ(r.xaxis,e.x,t.dataScale[0],e.xcalendar),nZ(r.yaxis,e.y,t.dataScale[1],e.ycalendar),nZ(r.zaxis,e.z,t.dataScale[2],e.zcalendar)),a;if(e.i&&e.j&&e.k){if(e.i.length!==e.j.length||e.j.length!==e.k.length||!oZ(e.i,n)||!oZ(e.j,n)||!oZ(e.k,n))return;a=G8e(aZ(e.i),aZ(e.j),aZ(e.k))}else e.alphahull===0?a=fDt(i):e.alphahull>0?a=cDt(e.alphahull,i):a=pDt(e.delaunayaxis,i);var o={positions:i,cells:a,lightPosition:[e.lightposition.x,e.lightposition.y,e.lightposition.z],ambient:e.lighting.ambient,diffuse:e.lighting.diffuse,specular:e.lighting.specular,roughness:e.lighting.roughness,fresnel:e.lighting.fresnel,vertexNormalsEpsilon:e.lighting.vertexnormalsepsilon,faceNormalsEpsilon:e.lighting.facenormalsepsilon,opacity:e.opacity,contourEnable:e.contour.show,contourColor:sZ(e.contour.color).slice(0,3),contourWidth:e.contour.width,useFacetNormals:e.flatshading};if(e.intensity){var s=vDt(e);this.color="#fff";var l=e.intensitymode;o[l+"Intensity"]=e.intensity,o[l+"IntensityBounds"]=[s.min,s.max],o.colormap=hDt(e)}else e.vertexcolor?(this.color=e.vertexcolor[0],o.vertexColors=H8e(e.vertexcolor)):e.facecolor?(this.color=e.facecolor[0],o.cellColors=H8e(e.facecolor)):(this.color=e.color,o.meshColor=sZ(e.color));this.mesh.update(o)};lZ.dispose=function(){this.scene.glplot.remove(this.mesh),this.mesh.dispose()};function gDt(e,t){var r=e.glplot.gl,n=lDt({gl:r}),i=new j8e(e,n,t.uid);return n._trace=i,i.update(t),e.glplot.add(n),i}W8e.exports=gDt});var Y8e=ye((Ugr,Z8e)=>{"use strict";Z8e.exports={attributes:WA(),supplyDefaults:N8e(),calc:V8e(),colorbar:{min:"cmin",max:"cmax"},plot:X8e(),moduleType:"trace",name:"mesh3d",basePlotModule:$_(),categories:["gl3d","showLegend"],meta:{}}});var J8e=ye((Vgr,K8e)=>{"use strict";K8e.exports=Y8e()});var cZ=ye((Ggr,Q8e)=>{"use strict";var mDt=Tu(),YA=df().axisHoverFormat,{hovertemplateAttrs:yDt,templatefallbackAttrs:_Dt}=Ll(),xDt=WA(),$8e=Gl(),uZ=Ao().extendFlat,hF={x:{valType:"data_array",editType:"calc+clearAxisTypes"},y:{valType:"data_array",editType:"calc+clearAxisTypes"},z:{valType:"data_array",editType:"calc+clearAxisTypes"},u:{valType:"data_array",editType:"calc"},v:{valType:"data_array",editType:"calc"},w:{valType:"data_array",editType:"calc"},sizemode:{valType:"enumerated",values:["scaled","absolute","raw"],editType:"calc",dflt:"scaled"},sizeref:{valType:"number",editType:"calc",min:0},anchor:{valType:"enumerated",editType:"calc",values:["tip","tail","cm","center"],dflt:"cm"},text:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertext:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertemplate:yDt({editType:"calc"},{keys:["norm"]}),hovertemplatefallback:_Dt({editType:"calc"}),uhoverformat:YA("u",1),vhoverformat:YA("v",1),whoverformat:YA("w",1),xhoverformat:YA("x"),yhoverformat:YA("y"),zhoverformat:YA("z"),showlegend:uZ({},$8e.showlegend,{dflt:!1})};uZ(hF,mDt("",{colorAttr:"u/v/w norm",showScaleDflt:!0,editTypeOverride:"calc"}));var bDt=["opacity","lightposition","lighting"];bDt.forEach(function(e){hF[e]=xDt[e]});hF.hoverinfo=uZ({},$8e.hoverinfo,{editType:"calc",flags:["x","y","z","u","v","w","norm","text","name"],dflt:"x+y+z+norm+text+name"});Q8e.exports=hF});var tRe=ye((Hgr,eRe)=>{"use strict";var wDt=Dr(),TDt=Qh(),ADt=cZ();eRe.exports=function(t,r,n,i){function a(d,v){return wDt.coerce(t,r,ADt,d,v)}var o=a("u"),s=a("v"),l=a("w"),u=a("x"),c=a("y"),f=a("z");if(!o||!o.length||!s||!s.length||!l||!l.length||!u||!u.length||!c||!c.length||!f||!f.length){r.visible=!1;return}var h=a("sizemode");a("sizeref",h==="raw"?1:.5),a("anchor"),a("lighting.ambient"),a("lighting.diffuse"),a("lighting.specular"),a("lighting.roughness"),a("lighting.fresnel"),a("lightposition.x"),a("lightposition.y"),a("lightposition.z"),TDt(t,r,i,a,{prefix:"",cLetter:"c"}),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("uhoverformat"),a("vhoverformat"),a("whoverformat"),a("xhoverformat"),a("yhoverformat"),a("zhoverformat"),r._length=null}});var iRe=ye((jgr,rRe)=>{"use strict";var SDt=gv();rRe.exports=function(t,r){for(var n=r.u,i=r.v,a=r.w,o=Math.min(r.x.length,r.y.length,r.z.length,n.length,i.length,a.length),s=-1/0,l=1/0,u=0;u<o;u++){var c=n[u],f=i[u],h=a[u],d=Math.sqrt(c*c+f*f+h*h);s=Math.max(s,d),l=Math.min(l,d)}r._len=o,r._normMax=s,SDt(t,r,{vals:[l,s],containerStr:"",cLetter:"c"})}});var lRe=ye((Wgr,sRe)=>{"use strict";var MDt=Od().gl_cone3d,EDt=Od().gl_cone3d.createConeMesh,kDt=Dr().simpleMap,CDt=$y().parseColorScale,LDt=tc().extractOpts,PDt=Dr().isArrayOrTypedArray,nRe=ZA();function aRe(e,t){this.scene=e,this.uid=t,this.mesh=null,this.data=null}var fZ=aRe.prototype;fZ.handlePick=function(e){if(e.object===this.mesh){var t=e.index=e.data.index,r=this.data.x[t],n=this.data.y[t],i=this.data.z[t],a=this.data.u[t],o=this.data.v[t],s=this.data.w[t];e.traceCoordinate=[r,n,i,a,o,s,Math.sqrt(a*a+o*o+s*s)];var l=this.data.hovertext||this.data.text;return PDt(l)&&l[t]!==void 0?e.textLabel=l[t]:l&&(e.textLabel=l),!0}};var IDt={xaxis:0,yaxis:1,zaxis:2},RDt={tip:1,tail:0,cm:.25,center:.5},DDt={tip:1,tail:1,cm:.75,center:.5};function oRe(e,t){var r=e.fullSceneLayout,n=e.dataScale,i={};function a(c,f){var h=r[f],d=n[IDt[f]];return kDt(c,function(v){return h.d2l(v)*d})}i.vectors=nRe(a(t.u,"xaxis"),a(t.v,"yaxis"),a(t.w,"zaxis"),t._len),i.positions=nRe(a(t.x,"xaxis"),a(t.y,"yaxis"),a(t.z,"zaxis"),t._len);var o=LDt(t);i.colormap=CDt(t),i.vertexIntensityBounds=[o.min/t._normMax,o.max/t._normMax],i.coneOffset=RDt[t.anchor];var s=t.sizemode;s==="scaled"?i.coneSize=t.sizeref||.5:s==="absolute"?i.coneSize=t.sizeref&&t._normMax?t.sizeref/t._normMax:.5:s==="raw"&&(i.coneSize=t.sizeref),i.coneSizemode=s;var l=MDt(i),u=t.lightposition;return l.lightPosition=[u.x,u.y,u.z],l.ambient=t.lighting.ambient,l.diffuse=t.lighting.diffuse,l.specular=t.lighting.specular,l.roughness=t.lighting.roughness,l.fresnel=t.lighting.fresnel,l.opacity=t.opacity,t._pad=DDt[t.anchor]*l.vectorScale*l.coneScale*t._normMax,l}fZ.update=function(e){this.data=e;var t=oRe(this.scene,e);this.mesh.update(t)};fZ.dispose=function(){this.scene.glplot.remove(this.mesh),this.mesh.dispose()};function FDt(e,t){var r=e.glplot.gl,n=oRe(e,t),i=EDt(r,n),a=new aRe(e,t.uid);return a.mesh=i,a.data=t,i._trace=a,e.glplot.add(i),a}sRe.exports=FDt});var cRe=ye((Xgr,uRe)=>{"use strict";uRe.exports={moduleType:"trace",name:"cone",basePlotModule:$_(),categories:["gl3d","showLegend"],attributes:cZ(),supplyDefaults:tRe(),colorbar:{min:"cmin",max:"cmax"},calc:iRe(),plot:lRe(),eventData:function(e,t){return e.norm=t.traceCoordinate[6],e},meta:{}}});var hRe=ye((Zgr,fRe)=>{"use strict";fRe.exports=cRe()});var dZ=ye((Ygr,vRe)=>{"use strict";var zDt=Tu(),KA=df().axisHoverFormat,{hovertemplateAttrs:ODt,templatefallbackAttrs:qDt}=Ll(),BDt=WA(),dRe=Gl(),hZ=Ao().extendFlat,dF={x:{valType:"data_array",editType:"calc+clearAxisTypes"},y:{valType:"data_array",editType:"calc+clearAxisTypes"},z:{valType:"data_array",editType:"calc+clearAxisTypes"},u:{valType:"data_array",editType:"calc"},v:{valType:"data_array",editType:"calc"},w:{valType:"data_array",editType:"calc"},starts:{x:{valType:"data_array",editType:"calc"},y:{valType:"data_array",editType:"calc"},z:{valType:"data_array",editType:"calc"},editType:"calc"},maxdisplayed:{valType:"integer",min:0,dflt:1e3,editType:"calc"},sizeref:{valType:"number",editType:"calc",min:0,dflt:1},text:{valType:"string",dflt:"",editType:"calc"},hovertext:{valType:"string",dflt:"",editType:"calc"},hovertemplate:ODt({editType:"calc"},{keys:["tubex","tubey","tubez","tubeu","tubev","tubew","norm","divergence"]}),hovertemplatefallback:qDt({editType:"calc"}),uhoverformat:KA("u",1),vhoverformat:KA("v",1),whoverformat:KA("w",1),xhoverformat:KA("x"),yhoverformat:KA("y"),zhoverformat:KA("z"),showlegend:hZ({},dRe.showlegend,{dflt:!1})};hZ(dF,zDt("",{colorAttr:"u/v/w norm",showScaleDflt:!0,editTypeOverride:"calc"}));var NDt=["opacity","lightposition","lighting"];NDt.forEach(function(e){dF[e]=BDt[e]});dF.hoverinfo=hZ({},dRe.hoverinfo,{editType:"calc",flags:["x","y","z","u","v","w","norm","divergence","text","name"],dflt:"x+y+z+norm+text+name"});vRe.exports=dF});var gRe=ye((Kgr,pRe)=>{"use strict";var UDt=Dr(),VDt=Qh(),GDt=dZ();pRe.exports=function(t,r,n,i){function a(h,d){return UDt.coerce(t,r,GDt,h,d)}var o=a("u"),s=a("v"),l=a("w"),u=a("x"),c=a("y"),f=a("z");if(!o||!o.length||!s||!s.length||!l||!l.length||!u||!u.length||!c||!c.length||!f||!f.length){r.visible=!1;return}a("starts.x"),a("starts.y"),a("starts.z"),a("maxdisplayed"),a("sizeref"),a("lighting.ambient"),a("lighting.diffuse"),a("lighting.specular"),a("lighting.roughness"),a("lighting.fresnel"),a("lightposition.x"),a("lightposition.y"),a("lightposition.z"),VDt(t,r,i,a,{prefix:"",cLetter:"c"}),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("uhoverformat"),a("vhoverformat"),a("whoverformat"),a("xhoverformat"),a("yhoverformat"),a("zhoverformat"),r._length=null}});var ARe=ye((Jgr,TRe)=>{"use strict";var _Re=Od().gl_streamtube3d,HDt=_Re.createTubeMesh,jDt=Dr(),WDt=$y().parseColorScale,XDt=tc().extractOpts,mRe=ZA(),xRe={xaxis:0,yaxis:1,zaxis:2};function bRe(e,t){this.scene=e,this.uid=t,this.mesh=null,this.data=null}var pZ=bRe.prototype;pZ.handlePick=function(e){var t=this.scene.fullSceneLayout,r=this.scene.dataScale;function n(o,s){var l=t[s],u=r[xRe[s]];return l.l2c(o)/u}if(e.object===this.mesh){var i=e.data.position,a=e.data.velocity;return e.traceCoordinate=[n(i[0],"xaxis"),n(i[1],"yaxis"),n(i[2],"zaxis"),n(a[0],"xaxis"),n(a[1],"yaxis"),n(a[2],"zaxis"),e.data.intensity*this.data._normMax,e.data.divergence],e.textLabel=this.data.hovertext||this.data.text,!0}};function yRe(e){var t=e.length,r;return t>2?r=e.slice(1,t-1):t===2?r=[(e[0]+e[1])/2]:r=e,r}function vZ(e){var t=e.length;return t===1?[.5,.5]:[e[1]-e[0],e[t-1]-e[t-2]]}function wRe(e,t){var r=e.fullSceneLayout,n=e.dataScale,i=t._len,a={};function o(z,O){var V=r[O],G=n[xRe[O]];return jDt.simpleMap(z,function(Z){return V.d2l(Z)*G})}if(a.vectors=mRe(o(t._u,"xaxis"),o(t._v,"yaxis"),o(t._w,"zaxis"),i),!i)return{positions:[],cells:[]};var s=o(t._Xs,"xaxis"),l=o(t._Ys,"yaxis"),u=o(t._Zs,"zaxis");a.meshgrid=[s,l,u],a.gridFill=t._gridFill;var c=t._slen;if(c)a.startingPositions=mRe(o(t._startsX,"xaxis"),o(t._startsY,"yaxis"),o(t._startsZ,"zaxis"));else{for(var f=l[0],h=yRe(s),d=yRe(u),v=new Array(h.length*d.length),_=0,b=0;b<h.length;b++)for(var p=0;p<d.length;p++)v[_++]=[h[b],f,d[p]];a.startingPositions=v}a.colormap=WDt(t),a.tubeSize=t.sizeref,a.maxLength=t.maxdisplayed;var k=o(t._xbnds,"xaxis"),E=o(t._ybnds,"yaxis"),S=o(t._zbnds,"zaxis"),L=vZ(s),x=vZ(l),C=vZ(u),M=[[k[0]-L[0],E[0]-x[0],S[0]-C[0]],[k[1]+L[1],E[1]+x[1],S[1]+C[1]]],g=_Re(a,M),P=XDt(t);g.vertexIntensityBounds=[P.min/t._normMax,P.max/t._normMax];var T=t.lightposition;return g.lightPosition=[T.x,T.y,T.z],g.ambient=t.lighting.ambient,g.diffuse=t.lighting.diffuse,g.specular=t.lighting.specular,g.roughness=t.lighting.roughness,g.fresnel=t.lighting.fresnel,g.opacity=t.opacity,t._pad=g.tubeScale*t.sizeref*2,g}pZ.update=function(e){this.data=e;var t=wRe(this.scene,e);this.mesh.update(t)};pZ.dispose=function(){this.scene.glplot.remove(this.mesh),this.mesh.dispose()};function ZDt(e,t){var r=e.glplot.gl,n=wRe(e,t),i=HDt(r,n),a=new bRe(e,t.uid);return a.mesh=i,a.data=t,i._trace=a,e.glplot.add(i),a}TRe.exports=ZDt});var MRe=ye(($gr,SRe)=>{"use strict";SRe.exports={moduleType:"trace",name:"streamtube",basePlotModule:$_(),categories:["gl3d","showLegend"],attributes:dZ(),supplyDefaults:gRe(),colorbar:{min:"cmin",max:"cmax"},calc:lF().calc,plot:ARe(),eventData:function(e,t){return e.tubex=e.x,e.tubey=e.y,e.tubez=e.z,e.tubeu=t.traceCoordinate[3],e.tubev=t.traceCoordinate[4],e.tubew=t.traceCoordinate[5],e.norm=t.traceCoordinate[6],e.divergence=t.traceCoordinate[7],delete e.x,delete e.y,delete e.z,e},meta:{}}});var kRe=ye((Qgr,ERe)=>{"use strict";ERe.exports=MRe()});var j2=ye((tmr,IRe)=>{"use strict";var{hovertemplateAttrs:YDt,texttemplateAttrs:KDt,templatefallbackAttrs:CRe}=Ll(),JDt=Cg(),Wm=pf(),$Dt=Gl(),LRe=Tu(),QDt=Pd().dash,H2=Ao().extendFlat,eFt=mc().overrideAll,eg=Wm.marker,PRe=Wm.line,tFt=eg.line,emr=["The library used by the *country names* `locationmode` option is changing in an upcoming version.","Country names in existing plots may not work in the new version."].join(" ");IRe.exports=eFt({lon:{valType:"data_array"},lat:{valType:"data_array"},locations:{valType:"data_array"},locationmode:{valType:"enumerated",values:["ISO-3","USA-states","country names","geojson-id"],dflt:"ISO-3"},geojson:{valType:"any",editType:"calc"},featureidkey:{valType:"string",editType:"calc",dflt:"id"},mode:H2({},Wm.mode,{dflt:"markers"}),text:H2({},Wm.text,{}),texttemplate:KDt({editType:"plot"},{keys:["lat","lon","location","text"]}),texttemplatefallback:CRe({editType:"plot"}),hovertext:H2({},Wm.hovertext,{}),textfont:Wm.textfont,textposition:Wm.textposition,line:{color:PRe.color,width:PRe.width,dash:QDt},connectgaps:Wm.connectgaps,marker:H2({symbol:eg.symbol,opacity:eg.opacity,angle:eg.angle,angleref:H2({},eg.angleref,{values:["previous","up","north"]}),standoff:eg.standoff,size:eg.size,sizeref:eg.sizeref,sizemin:eg.sizemin,sizemode:eg.sizemode,colorbar:eg.colorbar,line:H2({width:tFt.width},LRe("marker.line")),gradient:eg.gradient},LRe("marker")),fill:{valType:"enumerated",values:["none","toself"],dflt:"none"},fillcolor:JDt(),selected:Wm.selected,unselected:Wm.unselected,hoverinfo:H2({},$Dt.hoverinfo,{flags:["lon","lat","location","text","name"]}),hovertemplate:YDt(),hovertemplatefallback:CRe()},"calc","nested")});var DRe=ye((rmr,RRe)=>{"use strict";var vF=Dr(),gZ=Ru(),rFt=$p(),iFt=R0(),nFt=D0(),aFt=Rg(),oFt=j2(),sFt=["The library used by the *country names* `locationmode` option is changing in the next major version.","Some country names in existing plots may not work in the new version.","To ensure consistent behavior, consider setting `locationmode` to *ISO-3*."].join(" ");RRe.exports=function(t,r,n,i){function a(d,v){return vF.coerce(t,r,oFt,d,v)}var o=a("locations"),s;if(o&&o.length){var l=a("geojson"),u;(typeof l=="string"&&l!==""||vF.isPlainObject(l))&&(u="geojson-id");var c=a("locationmode",u);c==="country names"&&vF.warn(sFt),c==="geojson-id"&&a("featureidkey"),s=o.length}else{var f=a("lon")||[],h=a("lat")||[];s=Math.min(f.length,h.length)}if(!s){r.visible=!1;return}r._length=s,a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("mode"),gZ.hasMarkers(r)&&rFt(t,r,n,i,a,{gradient:!0}),gZ.hasLines(r)&&(iFt(t,r,n,i,a),a("connectgaps")),gZ.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),nFt(t,r,i,a)),a("fill"),r.fill!=="none"&&aFt(t,r,n,a),vF.coerceSelectionMarkerOpacity(r,a)}});var ORe=ye((imr,zRe)=>{"use strict";var FRe=ho();zRe.exports=function(t,r,n){var i={},a=n[r.geo]._subplot,o=a.mockAxis,s=t.lonlat;return i.lonLabel=FRe.tickText(o,o.c2l(s[0]),!0).text,i.latLabel=FRe.tickText(o,o.c2l(s[1]),!0).text,i}});var pF=ye((nmr,URe)=>{"use strict";var mZ=Eo(),qRe=fs().BADNUM,lFt=F0(),uFt=km(),cFt=z0(),fFt=Dr().isArrayOrTypedArray,BRe=Dr()._;function NRe(e){return e&&typeof e=="string"}URe.exports=function(t,r){var n=fFt(r.locations),i=n?r.locations.length:r._length,a=new Array(i),o;r.geojson?o=function(h){return NRe(h)||mZ(h)}:o=NRe;for(var s=0;s<i;s++){var l=a[s]={};if(n){var u=r.locations[s];l.loc=o(u)?u:null}else{var c=r.lon[s],f=r.lat[s];mZ(c)&&mZ(f)?l.lonlat=[+c,+f]:l.lonlat=[qRe,qRe]}}return uFt(a,r),lFt(t,r),cFt(a,r),i&&(a[0].t={labels:{lat:BRe(t,"lat:")+" ",lon:BRe(t,"lon:")+" "}}),a}});var tk=ye(Mv=>{"use strict";Mv.projNames={airy:"airy",aitoff:"aitoff","albers usa":"albersUsa",albers:"albers",august:"august","azimuthal equal area":"azimuthalEqualArea","azimuthal equidistant":"azimuthalEquidistant",baker:"baker",bertin1953:"bertin1953",boggs:"boggs",bonne:"bonne",bottomley:"bottomley",bromley:"bromley",collignon:"collignon","conic conformal":"conicConformal","conic equal area":"conicEqualArea","conic equidistant":"conicEquidistant",craig:"craig",craster:"craster","cylindrical equal area":"cylindricalEqualArea","cylindrical stereographic":"cylindricalStereographic",eckert1:"eckert1",eckert2:"eckert2",eckert3:"eckert3",eckert4:"eckert4",eckert5:"eckert5",eckert6:"eckert6",eisenlohr:"eisenlohr","equal earth":"equalEarth",equirectangular:"equirectangular",fahey:"fahey","foucaut sinusoidal":"foucautSinusoidal",foucaut:"foucaut",ginzburg4:"ginzburg4",ginzburg5:"ginzburg5",ginzburg6:"ginzburg6",ginzburg8:"ginzburg8",ginzburg9:"ginzburg9",gnomonic:"gnomonic","gringorten quincuncial":"gringortenQuincuncial",gringorten:"gringorten",guyou:"guyou",hammer:"hammer",hill:"hill",homolosine:"homolosine",hufnagel:"hufnagel",hyperelliptical:"hyperelliptical",kavrayskiy7:"kavrayskiy7",lagrange:"lagrange",larrivee:"larrivee",laskowski:"laskowski",loximuthal:"loximuthal",mercator:"mercator",miller:"miller",mollweide:"mollweide","mt flat polar parabolic":"mtFlatPolarParabolic","mt flat polar quartic":"mtFlatPolarQuartic","mt flat polar sinusoidal":"mtFlatPolarSinusoidal","natural earth":"naturalEarth","natural earth1":"naturalEarth1","natural earth2":"naturalEarth2","nell hammer":"nellHammer",nicolosi:"nicolosi",orthographic:"orthographic",patterson:"patterson","peirce quincuncial":"peirceQuincuncial",polyconic:"polyconic","rectangular polyconic":"rectangularPolyconic",robinson:"robinson",satellite:"satellite","sinu mollweide":"sinuMollweide",sinusoidal:"sinusoidal",stereographic:"stereographic",times:"times","transverse mercator":"transverseMercator","van der grinten":"vanDerGrinten","van der grinten2":"vanDerGrinten2","van der grinten3":"vanDerGrinten3","van der grinten4":"vanDerGrinten4",wagner4:"wagner4",wagner6:"wagner6",wiechel:"wiechel","winkel tripel":"winkel3",winkel3:"winkel3"};Mv.axesNames=["lonaxis","lataxis"];Mv.lonaxisSpan={orthographic:180,"azimuthal equal area":360,"azimuthal equidistant":360,"conic conformal":180,gnomonic:160,stereographic:180,"transverse mercator":180,"*":360};Mv.lataxisSpan={"conic conformal":150,stereographic:179.5,"*":180};Mv.scopeDefaults={world:{lonaxisRange:[-180,180],lataxisRange:[-90,90],projType:"equirectangular",projRotate:[0,0,0]},usa:{lonaxisRange:[-180,-50],lataxisRange:[15,80],projType:"albers usa"},europe:{lonaxisRange:[-30,60],lataxisRange:[30,85],projType:"conic conformal",projRotate:[15,0,0],projParallels:[0,60]},asia:{lonaxisRange:[22,160],lataxisRange:[-15,55],projType:"mercator",projRotate:[0,0,0]},africa:{lonaxisRange:[-30,60],lataxisRange:[-40,40],projType:"mercator",projRotate:[0,0,0]},"north america":{lonaxisRange:[-180,-45],lataxisRange:[5,85],projType:"conic conformal",projRotate:[-100,0,0],projParallels:[29.5,45.5]},"south america":{lonaxisRange:[-100,-30],lataxisRange:[-60,15],projType:"mercator",projRotate:[0,0,0]},antarctica:{lonaxisRange:[-180,180],lataxisRange:[-90,-60],projType:"equirectangular",projRotate:[0,0,0]},oceania:{lonaxisRange:[-180,180],lataxisRange:[-50,25],projType:"equirectangular",projRotate:[0,0,0]}};Mv.clipPad=.001;Mv.precision=.1;Mv.landColor="#F0DC82";Mv.waterColor="#3399FF";Mv.locationmodeToLayer={"ISO-3":"countries","USA-states":"subunits","country names":"countries"};Mv.sphereSVG={type:"Sphere"};Mv.fillLayers={ocean:1,land:1,lakes:1};Mv.lineLayers={subunits:1,countries:1,coastlines:1,rivers:1,frame:1};Mv.layers=["bg","ocean","land","lakes","subunits","countries","coastlines","rivers","lataxis","lonaxis","frame","backplot","frontplot"];Mv.layersForChoropleth=["bg","ocean","land","subunits","countries","coastlines","lataxis","lonaxis","frame","backplot","rivers","lakes","frontplot"];Mv.layerNameToAdjective={ocean:"ocean",land:"land",lakes:"lake",subunits:"subunit",countries:"country",coastlines:"coastline",rivers:"river",frame:"frame"}});var yZ=ye((gF,VRe)=>{(function(e,t){typeof gF=="object"&&typeof VRe!="undefined"?t(gF):(e=e||self,t(e.topojson=e.topojson||{}))})(gF,function(e){"use strict";function t(E){return E}function r(E){if(E==null)return t;var S,L,x=E.scale[0],C=E.scale[1],M=E.translate[0],g=E.translate[1];return function(P,T){T||(S=L=0);var z=2,O=P.length,V=new Array(O);for(V[0]=(S+=P[0])*x+M,V[1]=(L+=P[1])*C+g;z<O;)V[z]=P[z],++z;return V}}function n(E){var S=r(E.transform),L,x=1/0,C=x,M=-x,g=-x;function P(z){z=S(z),z[0]<x&&(x=z[0]),z[0]>M&&(M=z[0]),z[1]<C&&(C=z[1]),z[1]>g&&(g=z[1])}function T(z){switch(z.type){case"GeometryCollection":z.geometries.forEach(T);break;case"Point":P(z.coordinates);break;case"MultiPoint":z.coordinates.forEach(P);break}}E.arcs.forEach(function(z){for(var O=-1,V=z.length,G;++O<V;)G=S(z[O],O),G[0]<x&&(x=G[0]),G[0]>M&&(M=G[0]),G[1]<C&&(C=G[1]),G[1]>g&&(g=G[1])});for(L in E.objects)T(E.objects[L]);return[x,C,M,g]}function i(E,S){for(var L,x=E.length,C=x-S;C<--x;)L=E[C],E[C++]=E[x],E[x]=L}function a(E,S){return typeof S=="string"&&(S=E.objects[S]),S.type==="GeometryCollection"?{type:"FeatureCollection",features:S.geometries.map(function(L){return o(E,L)})}:o(E,S)}function o(E,S){var L=S.id,x=S.bbox,C=S.properties==null?{}:S.properties,M=s(E,S);return L==null&&x==null?{type:"Feature",properties:C,geometry:M}:x==null?{type:"Feature",id:L,properties:C,geometry:M}:{type:"Feature",id:L,bbox:x,properties:C,geometry:M}}function s(E,S){var L=r(E.transform),x=E.arcs;function C(O,V){V.length&&V.pop();for(var G=x[O<0?~O:O],Z=0,j=G.length;Z<j;++Z)V.push(L(G[Z],Z));O<0&&i(V,j)}function M(O){return L(O)}function g(O){for(var V=[],G=0,Z=O.length;G<Z;++G)C(O[G],V);return V.length<2&&V.push(V[0]),V}function P(O){for(var V=g(O);V.length<4;)V.push(V[0]);return V}function T(O){return O.map(P)}function z(O){var V=O.type,G;switch(V){case"GeometryCollection":return{type:V,geometries:O.geometries.map(z)};case"Point":G=M(O.coordinates);break;case"MultiPoint":G=O.coordinates.map(M);break;case"LineString":G=g(O.arcs);break;case"MultiLineString":G=O.arcs.map(g);break;case"Polygon":G=T(O.arcs);break;case"MultiPolygon":G=O.arcs.map(T);break;default:return null}return{type:V,coordinates:G}}return z(S)}function l(E,S){var L={},x={},C={},M=[],g=-1;S.forEach(function(z,O){var V=E.arcs[z<0?~z:z],G;V.length<3&&!V[1][0]&&!V[1][1]&&(G=S[++g],S[g]=z,S[O]=G)}),S.forEach(function(z){var O=P(z),V=O[0],G=O[1],Z,j;if(Z=C[V])if(delete C[Z.end],Z.push(z),Z.end=G,j=x[G]){delete x[j.start];var N=j===Z?Z:Z.concat(j);x[N.start=Z.start]=C[N.end=j.end]=N}else x[Z.start]=C[Z.end]=Z;else if(Z=x[G])if(delete x[Z.start],Z.unshift(z),Z.start=V,j=C[V]){delete C[j.end];var H=j===Z?Z:j.concat(Z);x[H.start=j.start]=C[H.end=Z.end]=H}else x[Z.start]=C[Z.end]=Z;else Z=[z],x[Z.start=V]=C[Z.end=G]=Z});function P(z){var O=E.arcs[z<0?~z:z],V=O[0],G;return E.transform?(G=[0,0],O.forEach(function(Z){G[0]+=Z[0],G[1]+=Z[1]})):G=O[O.length-1],z<0?[G,V]:[V,G]}function T(z,O){for(var V in z){var G=z[V];delete O[G.start],delete G.start,delete G.end,G.forEach(function(Z){L[Z<0?~Z:Z]=1}),M.push(G)}}return T(C,x),T(x,C),S.forEach(function(z){L[z<0?~z:z]||M.push([z])}),M}function u(E){return s(E,c.apply(this,arguments))}function c(E,S,L){var x,C,M;if(arguments.length>1)x=f(E,S,L);else for(C=0,x=new Array(M=E.arcs.length);C<M;++C)x[C]=C;return{type:"MultiLineString",arcs:l(E,x)}}function f(E,S,L){var x=[],C=[],M;function g(V){var G=V<0?~V:V;(C[G]||(C[G]=[])).push({i:V,g:M})}function P(V){V.forEach(g)}function T(V){V.forEach(P)}function z(V){V.forEach(T)}function O(V){switch(M=V,V.type){case"GeometryCollection":V.geometries.forEach(O);break;case"LineString":P(V.arcs);break;case"MultiLineString":case"Polygon":T(V.arcs);break;case"MultiPolygon":z(V.arcs);break}}return O(S),C.forEach(L==null?function(V){x.push(V[0].i)}:function(V){L(V[0].g,V[V.length-1].g)&&x.push(V[0].i)}),x}function h(E){for(var S=-1,L=E.length,x,C=E[L-1],M=0;++S<L;)x=C,C=E[S],M+=x[0]*C[1]-x[1]*C[0];return Math.abs(M)}function d(E){return s(E,v.apply(this,arguments))}function v(E,S){var L={},x=[],C=[];S.forEach(M);function M(T){switch(T.type){case"GeometryCollection":T.geometries.forEach(M);break;case"Polygon":g(T.arcs);break;case"MultiPolygon":T.arcs.forEach(g);break}}function g(T){T.forEach(function(z){z.forEach(function(O){(L[O=O<0?~O:O]||(L[O]=[])).push(T)})}),x.push(T)}function P(T){return h(s(E,{type:"Polygon",arcs:[T]}).coordinates[0])}return x.forEach(function(T){if(!T._){var z=[],O=[T];for(T._=1,C.push(z);T=O.pop();)z.push(T),T.forEach(function(V){V.forEach(function(G){L[G<0?~G:G].forEach(function(Z){Z._||(Z._=1,O.push(Z))})})})}}),x.forEach(function(T){delete T._}),{type:"MultiPolygon",arcs:C.map(function(T){var z=[],O;if(T.forEach(function(N){N.forEach(function(H){H.forEach(function(te){L[te<0?~te:te].length<2&&z.push(te)})})}),z=l(E,z),(O=z.length)>1)for(var V=1,G=P(z[0]),Z,j;V<O;++V)(Z=P(z[V]))>G&&(j=z[0],z[0]=z[V],z[V]=j,G=Z);return z}).filter(function(T){return T.length>0})}}function _(E,S){for(var L=0,x=E.length;L<x;){var C=L+x>>>1;E[C]<S?L=C+1:x=C}return L}function b(E){var S={},L=E.map(function(){return[]});function x(N,H){N.forEach(function(te){te<0&&(te=~te);var oe=S[te];oe?oe.push(H):S[te]=[H]})}function C(N,H){N.forEach(function(te){x(te,H)})}function M(N,H){N.type==="GeometryCollection"?N.geometries.forEach(function(te){M(te,H)}):N.type in g&&g[N.type](N.arcs,H)}var g={LineString:x,MultiLineString:C,Polygon:C,MultiPolygon:function(N,H){N.forEach(function(te){C(te,H)})}};E.forEach(M);for(var P in S)for(var T=S[P],z=T.length,O=0;O<z;++O)for(var V=O+1;V<z;++V){var G=T[O],Z=T[V],j;(j=L[G])[P=_(j,Z)]!==Z&&j.splice(P,0,Z),(j=L[Z])[P=_(j,G)]!==G&&j.splice(P,0,G)}return L}function p(E){if(E==null)return t;var S,L,x=E.scale[0],C=E.scale[1],M=E.translate[0],g=E.translate[1];return function(P,T){T||(S=L=0);var z=2,O=P.length,V=new Array(O),G=Math.round((P[0]-M)/x),Z=Math.round((P[1]-g)/C);for(V[0]=G-S,S=G,V[1]=Z-L,L=Z;z<O;)V[z]=P[z],++z;return V}}function k(E,S){if(E.transform)throw new Error("already quantized");if(!S||!S.scale){if(!((g=Math.floor(S))>=2))throw new Error("n must be \u22652");T=E.bbox||n(E);var L=T[0],x=T[1],C=T[2],M=T[3],g;S={scale:[C-L?(C-L)/(g-1):1,M-x?(M-x)/(g-1):1],translate:[L,x]}}else T=E.bbox;var P=p(S),T,z,O=E.objects,V={};function G(N){return P(N)}function Z(N){var H;switch(N.type){case"GeometryCollection":H={type:"GeometryCollection",geometries:N.geometries.map(Z)};break;case"Point":H={type:"Point",coordinates:G(N.coordinates)};break;case"MultiPoint":H={type:"MultiPoint",coordinates:N.coordinates.map(G)};break;default:return N}return N.id!=null&&(H.id=N.id),N.bbox!=null&&(H.bbox=N.bbox),N.properties!=null&&(H.properties=N.properties),H}function j(N){var H=0,te=1,oe=N.length,_e,Ee=new Array(oe);for(Ee[0]=P(N[0],0);++H<oe;)((_e=P(N[H],H))[0]||_e[1])&&(Ee[te++]=_e);return te===1&&(Ee[te++]=[0,0]),Ee.length=te,Ee}for(z in O)V[z]=Z(O[z]);return{type:"Topology",bbox:T,transform:S,objects:V,arcs:E.arcs.map(j)}}e.bbox=n,e.feature=a,e.merge=d,e.mergeArcs=v,e.mesh=u,e.meshArcs=c,e.neighbors=b,e.quantize=k,e.transform=r,e.untransform=p,Object.defineProperty(e,"__esModule",{value:!0})})});var mF=ye((omr,GRe)=>{"use strict";var _Z=GRe.exports={},hFt=tk().locationmodeToLayer,dFt=yZ().feature;_Z.getTopojsonName=function(e){return[e.scope.replace(/ /g,"-"),"_",e.resolution.toString(),"m"].join("")};_Z.getTopojsonPath=function(e,t){return e+=e.endsWith("/")?"":"/",`${e}${t}.json`};_Z.getTopojsonFeatures=function(e,t){var r=hFt[e.locationmode],n=t.objects[r];return dFt(t,n).features}});var tx=ye(rk=>{"use strict";var vFt=fs().BADNUM;rk.calcTraceToLineCoords=function(e){for(var t=e[0].trace,r=t.connectgaps,n=[],i=[],a=0;a<e.length;a++){var o=e[a],s=o.lonlat;s[0]!==vFt?i.push(s):!r&&i.length>0&&(n.push(i),i=[])}return i.length>0&&n.push(i),n};rk.makeLine=function(e){return e.length===1?{type:"LineString",coordinates:e[0]}:{type:"MultiLineString",coordinates:e}};rk.makePolygon=function(e){if(e.length===1)return{type:"Polygon",coordinates:e};for(var t=new Array(e.length),r=0;r<e.length;r++)t[r]=[e[r]];return{type:"MultiPolygon",coordinates:t}};rk.makeBlank=function(){return{type:"Point",coordinates:[]}}});var jRe=ye((lmr,HRe)=>{HRe.exports={AFG:"afghan",ALA:"\\b\\wland",ALB:"albania",DZA:"algeria",ASM:"^(?=.*americ).*samoa",AND:"andorra",AGO:"angola",AIA:"anguill?a",ATA:"antarctica",ATG:"antigua",ARG:"argentin",ARM:"armenia",ABW:"^(?!.*bonaire).*\\baruba",AUS:"australia",AUT:"^(?!.*hungary).*austria|\\baustri.*\\bemp",AZE:"azerbaijan",BHS:"bahamas",BHR:"bahrain",BGD:"bangladesh|^(?=.*east).*paki?stan",BRB:"barbados",BLR:"belarus|byelo",BEL:"^(?!.*luxem).*belgium",BLZ:"belize|^(?=.*british).*honduras",BEN:"benin|dahome",BMU:"bermuda",BTN:"bhutan",BOL:"bolivia",BES:"^(?=.*bonaire).*eustatius|^(?=.*carib).*netherlands|\\bbes.?islands",BIH:"herzegovina|bosnia",BWA:"botswana|bechuana",BVT:"bouvet",BRA:"brazil",IOT:"british.?indian.?ocean",BRN:"brunei",BGR:"bulgaria",BFA:"burkina|\\bfaso|upper.?volta",BDI:"burundi",CPV:"verde",KHM:"cambodia|kampuchea|khmer",CMR:"cameroon",CAN:"canada",CYM:"cayman",CAF:"\\bcentral.african.republic",TCD:"\\bchad",CHL:"\\bchile",CHN:"^(?!.*\\bmac)(?!.*\\bhong)(?!.*\\btai)(?!.*\\brep).*china|^(?=.*peo)(?=.*rep).*china",CXR:"christmas",CCK:"\\bcocos|keeling",COL:"colombia",COM:"comoro",COG:"^(?!.*\\bdem)(?!.*\\bd[\\.]?r)(?!.*kinshasa)(?!.*zaire)(?!.*belg)(?!.*l.opoldville)(?!.*free).*\\bcongo",COK:"\\bcook",CRI:"costa.?rica",CIV:"ivoire|ivory",HRV:"croatia",CUB:"\\bcuba",CUW:"^(?!.*bonaire).*\\bcura(c|\xE7)ao",CYP:"cyprus",CSK:"czechoslovakia",CZE:"^(?=.*rep).*czech|czechia|bohemia",COD:"\\bdem.*congo|congo.*\\bdem|congo.*\\bd[\\.]?r|\\bd[\\.]?r.*congo|belgian.?congo|congo.?free.?state|kinshasa|zaire|l.opoldville|drc|droc|rdc",DNK:"denmark",DJI:"djibouti",DMA:"dominica(?!n)",DOM:"dominican.rep",ECU:"ecuador",EGY:"egypt",SLV:"el.?salvador",GNQ:"guine.*eq|eq.*guine|^(?=.*span).*guinea",ERI:"eritrea",EST:"estonia",ETH:"ethiopia|abyssinia",FLK:"falkland|malvinas",FRO:"faroe|faeroe",FJI:"fiji",FIN:"finland",FRA:"^(?!.*\\bdep)(?!.*martinique).*france|french.?republic|\\bgaul",GUF:"^(?=.*french).*guiana",PYF:"french.?polynesia|tahiti",ATF:"french.?southern",GAB:"gabon",GMB:"gambia",GEO:"^(?!.*south).*georgia",DDR:"german.?democratic.?republic|democratic.?republic.*germany|east.germany",DEU:"^(?!.*east).*germany|^(?=.*\\bfed.*\\brep).*german",GHA:"ghana|gold.?coast",GIB:"gibraltar",GRC:"greece|hellenic|hellas",GRL:"greenland",GRD:"grenada",GLP:"guadeloupe",GUM:"\\bguam",GTM:"guatemala",GGY:"guernsey",GIN:"^(?!.*eq)(?!.*span)(?!.*bissau)(?!.*portu)(?!.*new).*guinea",GNB:"bissau|^(?=.*portu).*guinea",GUY:"guyana|british.?guiana",HTI:"haiti",HMD:"heard.*mcdonald",VAT:"holy.?see|vatican|papal.?st",HND:"^(?!.*brit).*honduras",HKG:"hong.?kong",HUN:"^(?!.*austr).*hungary",ISL:"iceland",IND:"india(?!.*ocea)",IDN:"indonesia",IRN:"\\biran|persia",IRQ:"\\biraq|mesopotamia",IRL:"(^ireland)|(^republic.*ireland)",IMN:"^(?=.*isle).*\\bman",ISR:"israel",ITA:"italy",JAM:"jamaica",JPN:"japan",JEY:"jersey",JOR:"jordan",KAZ:"kazak",KEN:"kenya|british.?east.?africa|east.?africa.?prot",KIR:"kiribati",PRK:"^(?=.*democrat|people|north|d.*p.*.r).*\\bkorea|dprk|korea.*(d.*p.*r)",KWT:"kuwait",KGZ:"kyrgyz|kirghiz",LAO:"\\blaos?\\b",LVA:"latvia",LBN:"lebanon",LSO:"lesotho|basuto",LBR:"liberia",LBY:"libya",LIE:"liechtenstein",LTU:"lithuania",LUX:"^(?!.*belg).*luxem",MAC:"maca(o|u)",MDG:"madagascar|malagasy",MWI:"malawi|nyasa",MYS:"malaysia",MDV:"maldive",MLI:"\\bmali\\b",MLT:"\\bmalta",MHL:"marshall",MTQ:"martinique",MRT:"mauritania",MUS:"mauritius",MYT:"\\bmayotte",MEX:"\\bmexic",FSM:"fed.*micronesia|micronesia.*fed",MCO:"monaco",MNG:"mongolia",MNE:"^(?!.*serbia).*montenegro",MSR:"montserrat",MAR:"morocco|\\bmaroc",MOZ:"mozambique",MMR:"myanmar|burma",NAM:"namibia",NRU:"nauru",NPL:"nepal",NLD:"^(?!.*\\bant)(?!.*\\bcarib).*netherlands",ANT:"^(?=.*\\bant).*(nether|dutch)",NCL:"new.?caledonia",NZL:"new.?zealand",NIC:"nicaragua",NER:"\\bniger(?!ia)",NGA:"nigeria",NIU:"niue",NFK:"norfolk",MNP:"mariana",NOR:"norway",OMN:"\\boman|trucial",PAK:"^(?!.*east).*paki?stan",PLW:"palau",PSE:"palestin|\\bgaza|west.?bank",PAN:"panama",PNG:"papua|new.?guinea",PRY:"paraguay",PER:"peru",PHL:"philippines",PCN:"pitcairn",POL:"poland",PRT:"portugal",PRI:"puerto.?rico",QAT:"qatar",KOR:"^(?!.*d.*p.*r)(?!.*democrat)(?!.*people)(?!.*north).*\\bkorea(?!.*d.*p.*r)",MDA:"moldov|b(a|e)ssarabia",REU:"r(e|\xE9)union",ROU:"r(o|u|ou)mania",RUS:"\\brussia|soviet.?union|u\\.?s\\.?s\\.?r|socialist.?republics",RWA:"rwanda",BLM:"barth(e|\xE9)lemy",SHN:"helena",KNA:"kitts|\\bnevis",LCA:"\\blucia",MAF:"^(?=.*collectivity).*martin|^(?=.*france).*martin(?!ique)|^(?=.*french).*martin(?!ique)",SPM:"miquelon",VCT:"vincent",WSM:"^(?!.*amer).*samoa",SMR:"san.?marino",STP:"\\bs(a|\xE3)o.?tom(e|\xE9)",SAU:"\\bsa\\w*.?arabia",SEN:"senegal",SRB:"^(?!.*monte).*serbia",SYC:"seychell",SLE:"sierra",SGP:"singapore",SXM:"^(?!.*martin)(?!.*saba).*maarten",SVK:"^(?!.*cze).*slovak",SVN:"slovenia",SLB:"solomon",SOM:"somali",ZAF:"south.africa|s\\\\..?africa",SGS:"south.?georgia|sandwich",SSD:"\\bs\\w*.?sudan",ESP:"spain",LKA:"sri.?lanka|ceylon",SDN:"^(?!.*\\bs(?!u)).*sudan",SUR:"surinam|dutch.?guiana",SJM:"svalbard",SWZ:"swaziland",SWE:"sweden",CHE:"switz|swiss",SYR:"syria",TWN:"taiwan|taipei|formosa|^(?!.*peo)(?=.*rep).*china",TJK:"tajik",THA:"thailand|\\bsiam",MKD:"macedonia|fyrom",TLS:"^(?=.*leste).*timor|^(?=.*east).*timor",TGO:"togo",TKL:"tokelau",TON:"tonga",TTO:"trinidad|tobago",TUN:"tunisia",TUR:"turkey",TKM:"turkmen",TCA:"turks",TUV:"tuvalu",UGA:"uganda",UKR:"ukrain",ARE:"emirates|^u\\.?a\\.?e\\.?$|united.?arab.?em",GBR:"united.?kingdom|britain|^u\\.?k\\.?$",TZA:"tanzania",USA:"united.?states\\b(?!.*islands)|\\bu\\.?s\\.?a\\.?\\b|^\\s*u\\.?s\\.?\\b(?!.*islands)",UMI:"minor.?outlying.?is",URY:"uruguay",UZB:"uzbek",VUT:"vanuatu|new.?hebrides",VEN:"venezuela",VNM:"^(?!.*republic).*viet.?nam|^(?=.*socialist).*viet.?nam",VGB:"^(?=.*\\bu\\.?\\s?k).*virgin|^(?=.*brit).*virgin|^(?=.*kingdom).*virgin",VIR:"^(?=.*\\bu\\.?\\s?s).*virgin|^(?=.*states).*virgin",WLF:"futuna|wallis",ESH:"western.sahara",YEM:"^(?!.*arab)(?!.*north)(?!.*sana)(?!.*peo)(?!.*dem)(?!.*south)(?!.*aden)(?!.*\\bp\\.?d\\.?r).*yemen",YMD:"^(?=.*peo).*yemen|^(?!.*rep)(?=.*dem).*yemen|^(?=.*south).*yemen|^(?=.*aden).*yemen|^(?=.*\\bp\\.?d\\.?r).*yemen",YUG:"yugoslavia",ZMB:"zambia|northern.?rhodesia",EAZ:"zanzibar",ZWE:"zimbabwe|^(?!.*northern).*rhodesia"}});var xF=ye(ic=>{"use strict";Object.defineProperty(ic,"__esModule",{value:!0});var Op=63710088e-1,bZ={centimeters:Op*100,centimetres:Op*100,degrees:360/(2*Math.PI),feet:Op*3.28084,inches:Op*39.37,kilometers:Op/1e3,kilometres:Op/1e3,meters:Op,metres:Op,miles:Op/1609.344,millimeters:Op*1e3,millimetres:Op*1e3,nauticalmiles:Op/1852,radians:1,yards:Op*1.0936},xZ={acres:247105e-9,centimeters:1e4,centimetres:1e4,feet:10.763910417,hectares:1e-4,inches:1550.003100006,kilometers:1e-6,kilometres:1e-6,meters:1,metres:1,miles:386e-9,nauticalmiles:29155334959812285e-23,millimeters:1e6,millimetres:1e6,yards:1.195990046};function rx(e,t,r={}){let n={type:"Feature"};return(r.id===0||r.id)&&(n.id=r.id),r.bbox&&(n.bbox=r.bbox),n.properties=t||{},n.geometry=e,n}function pFt(e,t,r={}){switch(e){case"Point":return wZ(t).geometry;case"LineString":return AZ(t).geometry;case"Polygon":return TZ(t).geometry;case"MultiPoint":return XRe(t).geometry;case"MultiLineString":return WRe(t).geometry;case"MultiPolygon":return ZRe(t).geometry;default:throw new Error(e+" is invalid")}}function wZ(e,t,r={}){if(!e)throw new Error("coordinates is required");if(!Array.isArray(e))throw new Error("coordinates must be an Array");if(e.length<2)throw new Error("coordinates must be at least 2 numbers long");if(!yF(e[0])||!yF(e[1]))throw new Error("coordinates must contain numbers");return rx({type:"Point",coordinates:e},t,r)}function gFt(e,t,r={}){return _F(e.map(n=>wZ(n,t)),r)}function TZ(e,t,r={}){for(let i of e){if(i.length<4)throw new Error("Each LinearRing of a Polygon must have 4 or more Positions.");if(i[i.length-1].length!==i[0].length)throw new Error("First and last Position are not equivalent.");for(let a=0;a<i[i.length-1].length;a++)if(i[i.length-1][a]!==i[0][a])throw new Error("First and last Position are not equivalent.")}return rx({type:"Polygon",coordinates:e},t,r)}function mFt(e,t,r={}){return _F(e.map(n=>TZ(n,t)),r)}function AZ(e,t,r={}){if(e.length<2)throw new Error("coordinates must be an array of two or more positions");return rx({type:"LineString",coordinates:e},t,r)}function yFt(e,t,r={}){return _F(e.map(n=>AZ(n,t)),r)}function _F(e,t={}){let r={type:"FeatureCollection"};return t.id&&(r.id=t.id),t.bbox&&(r.bbox=t.bbox),r.features=e,r}function WRe(e,t,r={}){return rx({type:"MultiLineString",coordinates:e},t,r)}function XRe(e,t,r={}){return rx({type:"MultiPoint",coordinates:e},t,r)}function ZRe(e,t,r={}){return rx({type:"MultiPolygon",coordinates:e},t,r)}function _Ft(e,t,r={}){return rx({type:"GeometryCollection",geometries:e},t,r)}function xFt(e,t=0){if(t&&!(t>=0))throw new Error("precision must be a positive number");let r=Math.pow(10,t||0);return Math.round(e*r)/r}function YRe(e,t="kilometers"){let r=bZ[t];if(!r)throw new Error(t+" units is invalid");return e*r}function SZ(e,t="kilometers"){let r=bZ[t];if(!r)throw new Error(t+" units is invalid");return e/r}function bFt(e,t){return KRe(SZ(e,t))}function wFt(e){let t=e%360;return t<0&&(t+=360),t}function TFt(e){return e=e%360,e>180?e-360:e<-180?e+360:e}function KRe(e){return e%(2*Math.PI)*180/Math.PI}function AFt(e){return e%360*Math.PI/180}function SFt(e,t="kilometers",r="kilometers"){if(!(e>=0))throw new Error("length must be a positive number");return YRe(SZ(e,t),r)}function MFt(e,t="meters",r="kilometers"){if(!(e>=0))throw new Error("area must be a positive number");let n=xZ[t];if(!n)throw new Error("invalid original units");let i=xZ[r];if(!i)throw new Error("invalid final units");return e/n*i}function yF(e){return!isNaN(e)&&e!==null&&!Array.isArray(e)}function EFt(e){return e!==null&&typeof e=="object"&&!Array.isArray(e)}function kFt(e){if(!e)throw new Error("bbox is required");if(!Array.isArray(e))throw new Error("bbox must be an Array");if(e.length!==4&&e.length!==6)throw new Error("bbox must be an Array of 4 or 6 numbers");e.forEach(t=>{if(!yF(t))throw new Error("bbox must only contain numbers")})}function CFt(e){if(!e)throw new Error("id is required");if(["string","number"].indexOf(typeof e)===-1)throw new Error("id must be a number or a string")}ic.areaFactors=xZ;ic.azimuthToBearing=TFt;ic.bearingToAzimuth=wFt;ic.convertArea=MFt;ic.convertLength=SFt;ic.degreesToRadians=AFt;ic.earthRadius=Op;ic.factors=bZ;ic.feature=rx;ic.featureCollection=_F;ic.geometry=pFt;ic.geometryCollection=_Ft;ic.isNumber=yF;ic.isObject=EFt;ic.lengthToDegrees=bFt;ic.lengthToRadians=SZ;ic.lineString=AZ;ic.lineStrings=yFt;ic.multiLineString=WRe;ic.multiPoint=XRe;ic.multiPolygon=ZRe;ic.point=wZ;ic.points=gFt;ic.polygon=TZ;ic.polygons=mFt;ic.radiansToDegrees=KRe;ic.radiansToLength=YRe;ic.round=xFt;ic.validateBBox=kFt;ic.validateId=CFt});var wF=ye(qd=>{"use strict";Object.defineProperty(qd,"__esModule",{value:!0});var Wv=xF();function ik(e,t,r){if(e!==null)for(var n,i,a,o,s,l,u,c=0,f=0,h,d=e.type,v=d==="FeatureCollection",_=d==="Feature",b=v?e.features.length:1,p=0;p<b;p++){u=v?e.features[p].geometry:_?e.geometry:e,h=u?u.type==="GeometryCollection":!1,s=h?u.geometries.length:1;for(var k=0;k<s;k++){var E=0,S=0;if(o=h?u.geometries[k]:u,o!==null){l=o.coordinates;var L=o.type;switch(c=r&&(L==="Polygon"||L==="MultiPolygon")?1:0,L){case null:break;case"Point":if(t(l,f,p,E,S)===!1)return!1;f++,E++;break;case"LineString":case"MultiPoint":for(n=0;n<l.length;n++){if(t(l[n],f,p,E,S)===!1)return!1;f++,L==="MultiPoint"&&E++}L==="LineString"&&E++;break;case"Polygon":case"MultiLineString":for(n=0;n<l.length;n++){for(i=0;i<l[n].length-c;i++){if(t(l[n][i],f,p,E,S)===!1)return!1;f++}L==="MultiLineString"&&E++,L==="Polygon"&&S++}L==="Polygon"&&E++;break;case"MultiPolygon":for(n=0;n<l.length;n++){for(S=0,i=0;i<l[n].length;i++){for(a=0;a<l[n][i].length-c;a++){if(t(l[n][i][a],f,p,E,S)===!1)return!1;f++}S++}E++}break;case"GeometryCollection":for(n=0;n<o.geometries.length;n++)if(ik(o.geometries[n],t,r)===!1)return!1;break;default:throw new Error("Unknown Geometry Type")}}}}}function LFt(e,t,r,n){var i=r;return ik(e,function(a,o,s,l,u){o===0&&r===void 0?i=a:i=t(i,a,o,s,l,u)},n),i}function JRe(e,t){var r;switch(e.type){case"FeatureCollection":for(r=0;r<e.features.length&&t(e.features[r].properties,r)!==!1;r++);break;case"Feature":t(e.properties,0);break}}function PFt(e,t,r){var n=r;return JRe(e,function(i,a){a===0&&r===void 0?n=i:n=t(n,i,a)}),n}function $Re(e,t){if(e.type==="Feature")t(e,0);else if(e.type==="FeatureCollection")for(var r=0;r<e.features.length&&t(e.features[r],r)!==!1;r++);}function IFt(e,t,r){var n=r;return $Re(e,function(i,a){a===0&&r===void 0?n=i:n=t(n,i,a)}),n}function RFt(e){var t=[];return ik(e,function(r){t.push(r)}),t}function MZ(e,t){var r,n,i,a,o,s,l,u,c,f,h=0,d=e.type==="FeatureCollection",v=e.type==="Feature",_=d?e.features.length:1;for(r=0;r<_;r++){for(s=d?e.features[r].geometry:v?e.geometry:e,u=d?e.features[r].properties:v?e.properties:{},c=d?e.features[r].bbox:v?e.bbox:void 0,f=d?e.features[r].id:v?e.id:void 0,l=s?s.type==="GeometryCollection":!1,o=l?s.geometries.length:1,i=0;i<o;i++){if(a=l?s.geometries[i]:s,a===null){if(t(null,h,u,c,f)===!1)return!1;continue}switch(a.type){case"Point":case"LineString":case"MultiPoint":case"Polygon":case"MultiLineString":case"MultiPolygon":{if(t(a,h,u,c,f)===!1)return!1;break}case"GeometryCollection":{for(n=0;n<a.geometries.length;n++)if(t(a.geometries[n],h,u,c,f)===!1)return!1;break}default:throw new Error("Unknown Geometry Type")}}h++}}function DFt(e,t,r){var n=r;return MZ(e,function(i,a,o,s,l){a===0&&r===void 0?n=i:n=t(n,i,a,o,s,l)}),n}function bF(e,t){MZ(e,function(r,n,i,a,o){var s=r===null?null:r.type;switch(s){case null:case"Point":case"LineString":case"Polygon":return t(Wv.feature.call(void 0,r,i,{bbox:a,id:o}),n,0)===!1?!1:void 0}var l;switch(s){case"MultiPoint":l="Point";break;case"MultiLineString":l="LineString";break;case"MultiPolygon":l="Polygon";break}for(var u=0;u<r.coordinates.length;u++){var c=r.coordinates[u],f={type:l,coordinates:c};if(t(Wv.feature.call(void 0,f,i),n,u)===!1)return!1}})}function FFt(e,t,r){var n=r;return bF(e,function(i,a,o){a===0&&o===0&&r===void 0?n=i:n=t(n,i,a,o)}),n}function QRe(e,t){bF(e,function(r,n,i){var a=0;if(r.geometry){var o=r.geometry.type;if(!(o==="Point"||o==="MultiPoint")){var s,l=0,u=0,c=0;if(ik(r,function(f,h,d,v,_){if(s===void 0||n>l||v>u||_>c){s=f,l=n,u=v,c=_,a=0;return}var b=Wv.lineString.call(void 0,[s,f],r.properties);if(t(b,n,i,_,a)===!1)return!1;a++,s=f})===!1)return!1}}})}function zFt(e,t,r){var n=r,i=!1;return QRe(e,function(a,o,s,l,u){i===!1&&r===void 0?n=a:n=t(n,a,o,s,l,u),i=!0}),n}function eDe(e,t){if(!e)throw new Error("geojson is required");bF(e,function(r,n,i){if(r.geometry!==null){var a=r.geometry.type,o=r.geometry.coordinates;switch(a){case"LineString":if(t(r,n,i,0,0)===!1)return!1;break;case"Polygon":for(var s=0;s<o.length;s++)if(t(Wv.lineString.call(void 0,o[s],r.properties),n,i,s)===!1)return!1;break}}})}function OFt(e,t,r){var n=r;return eDe(e,function(i,a,o,s){a===0&&r===void 0?n=i:n=t(n,i,a,o,s)}),n}function qFt(e,t){if(t=t||{},!Wv.isObject.call(void 0,t))throw new Error("options is invalid");var r=t.featureIndex||0,n=t.multiFeatureIndex||0,i=t.geometryIndex||0,a=t.segmentIndex||0,o=t.properties,s;switch(e.type){case"FeatureCollection":r<0&&(r=e.features.length+r),o=o||e.features[r].properties,s=e.features[r].geometry;break;case"Feature":o=o||e.properties,s=e.geometry;break;case"Point":case"MultiPoint":return null;case"LineString":case"Polygon":case"MultiLineString":case"MultiPolygon":s=e;break;default:throw new Error("geojson is invalid")}if(s===null)return null;var l=s.coordinates;switch(s.type){case"Point":case"MultiPoint":return null;case"LineString":return a<0&&(a=l.length+a-1),Wv.lineString.call(void 0,[l[a],l[a+1]],o,t);case"Polygon":return i<0&&(i=l.length+i),a<0&&(a=l[i].length+a-1),Wv.lineString.call(void 0,[l[i][a],l[i][a+1]],o,t);case"MultiLineString":return n<0&&(n=l.length+n),a<0&&(a=l[n].length+a-1),Wv.lineString.call(void 0,[l[n][a],l[n][a+1]],o,t);case"MultiPolygon":return n<0&&(n=l.length+n),i<0&&(i=l[n].length+i),a<0&&(a=l[n][i].length-a-1),Wv.lineString.call(void 0,[l[n][i][a],l[n][i][a+1]],o,t)}throw new Error("geojson is invalid")}function BFt(e,t){if(t=t||{},!Wv.isObject.call(void 0,t))throw new Error("options is invalid");var r=t.featureIndex||0,n=t.multiFeatureIndex||0,i=t.geometryIndex||0,a=t.coordIndex||0,o=t.properties,s;switch(e.type){case"FeatureCollection":r<0&&(r=e.features.length+r),o=o||e.features[r].properties,s=e.features[r].geometry;break;case"Feature":o=o||e.properties,s=e.geometry;break;case"Point":case"MultiPoint":return null;case"LineString":case"Polygon":case"MultiLineString":case"MultiPolygon":s=e;break;default:throw new Error("geojson is invalid")}if(s===null)return null;var l=s.coordinates;switch(s.type){case"Point":return Wv.point.call(void 0,l,o,t);case"MultiPoint":return n<0&&(n=l.length+n),Wv.point.call(void 0,l[n],o,t);case"LineString":return a<0&&(a=l.length+a),Wv.point.call(void 0,l[a],o,t);case"Polygon":return i<0&&(i=l.length+i),a<0&&(a=l[i].length+a),Wv.point.call(void 0,l[i][a],o,t);case"MultiLineString":return n<0&&(n=l.length+n),a<0&&(a=l[n].length+a),Wv.point.call(void 0,l[n][a],o,t);case"MultiPolygon":return n<0&&(n=l.length+n),i<0&&(i=l[n].length+i),a<0&&(a=l[n][i].length-a),Wv.point.call(void 0,l[n][i][a],o,t)}throw new Error("geojson is invalid")}qd.coordAll=RFt;qd.coordEach=ik;qd.coordReduce=LFt;qd.featureEach=$Re;qd.featureReduce=IFt;qd.findPoint=BFt;qd.findSegment=qFt;qd.flattenEach=bF;qd.flattenReduce=FFt;qd.geomEach=MZ;qd.geomReduce=DFt;qd.lineEach=eDe;qd.lineReduce=OFt;qd.propEach=JRe;qd.propReduce=PFt;qd.segmentEach=QRe;qd.segmentReduce=zFt});var aDe=ye(TF=>{"use strict";Object.defineProperty(TF,"__esModule",{value:!0});var tDe=xF(),NFt=wF();function nDe(e){return NFt.geomReduce.call(void 0,e,(t,r)=>t+UFt(r),0)}function UFt(e){let t=0,r;switch(e.type){case"Polygon":return rDe(e.coordinates);case"MultiPolygon":for(r=0;r<e.coordinates.length;r++)t+=rDe(e.coordinates[r]);return t;case"Point":case"MultiPoint":case"LineString":case"MultiLineString":return 0}return 0}function rDe(e){let t=0;if(e&&e.length>0){t+=Math.abs(iDe(e[0]));for(let r=1;r<e.length;r++)t-=Math.abs(iDe(e[r]))}return t}var VFt=tDe.earthRadius*tDe.earthRadius/2,EZ=Math.PI/180;function iDe(e){let t=e.length-1;if(t<=2)return 0;let r=0,n=0;for(;n<t;){let i=e[n],a=e[n+1===t?0:n+1],o=e[n+2>=t?(n+2)%t:n+2],s=i[0]*EZ,l=a[1]*EZ,u=o[0]*EZ;r+=(u-s)*Math.sin(l),n++}return r*VFt}var GFt=nDe;TF.area=nDe;TF.default=GFt});var sDe=ye(AF=>{"use strict";Object.defineProperty(AF,"__esModule",{value:!0});var HFt=xF(),jFt=wF();function oDe(e,t={}){let r=0,n=0,i=0;return jFt.coordEach.call(void 0,e,function(a){r+=a[0],n+=a[1],i++},!0),HFt.point.call(void 0,[r/i,n/i],t.properties)}var WFt=oDe;AF.centroid=oDe;AF.default=WFt});var uDe=ye(SF=>{"use strict";Object.defineProperty(SF,"__esModule",{value:!0});var XFt=wF();function lDe(e,t={}){if(e.bbox!=null&&t.recompute!==!0)return e.bbox;let r=[1/0,1/0,-1/0,-1/0];return XFt.coordEach.call(void 0,e,n=>{r[0]>n[0]&&(r[0]=n[0]),r[1]>n[1]&&(r[1]=n[1]),r[2]<n[0]&&(r[2]=n[0]),r[3]<n[1]&&(r[3]=n[1])}),r}var ZFt=lDe;SF.bbox=lDe;SF.default=ZFt});var ix=ye((vmr,vDe)=>{"use strict";var YFt=Oa(),hDe=jRe(),{area:KFt}=aDe(),{centroid:JFt}=sDe(),{bbox:$Ft}=uDe(),cDe=HS(),JA=G1(),QFt=my(),ezt=PS(),MF=kM(),fDe=Object.keys(hDe),tzt={"ISO-3":cDe,"USA-states":cDe,"country names":rzt};function rzt(e){for(var t=0;t<fDe.length;t++){var r=fDe[t],n=new RegExp(hDe[r]);if(n.test(e.trim().toLowerCase()))return r}return JA.log("Unrecognized country name: "+e+"."),!1}function izt(e,t,r){if(!t||typeof t!="string")return!1;var n=tzt[e](t),i,a,o;if(n){if(e==="USA-states")for(i=[],o=0;o<r.length;o++)a=r[o],a.properties&&a.properties.gu&&a.properties.gu==="USA"&&i.push(a);else i=r;for(o=0;o<i.length;o++)if(a=i[o],a.id===n)return a;JA.log(["Location with id",n,"does not have a matching topojson feature at this resolution."].join(" "))}return!1}function nzt(e){var t=e.geometry,r=t.coordinates,n=e.id,i=[],a,o,s,l;function u(c){for(var f=0;f<c.length-1;f++)if(c[f][0]>0&&c[f+1][0]<0)return f;return null}switch(n==="RUS"||n==="FJI"?a=function(c){var f;if(u(c)===null)f=c;else for(f=new Array(c.length),l=0;l<c.length;l++)f[l]=[c[l][0]<0?c[l][0]+360:c[l][0],c[l][1]];i.push(MF.tester(f))}:n==="ATA"?a=function(c){var f=u(c);if(f===null)return i.push(MF.tester(c));var h=new Array(c.length+1),d=0;for(l=0;l<c.length;l++)l>f?h[d++]=[c[l][0]+360,c[l][1]]:l===f?(h[d++]=c[l],h[d++]=[c[l][0],-90]):h[d++]=c[l];var v=MF.tester(h);v.pts.pop(),i.push(v)}:a=function(c){i.push(MF.tester(c))},t.type){case"MultiPolygon":for(o=0;o<r.length;o++)for(s=0;s<r[o].length;s++)a(r[o][s]);break;case"Polygon":for(o=0;o<r.length;o++)a(r[o]);break}return i}function dDe(e){var t=e.geojson,r=window.PlotlyGeoAssets||{},n=typeof t=="string"?r[t]:t;return QFt(n)?n:(JA.error("Oops ... something went wrong when fetching "+t),!1)}function azt(e){var t=e[0].trace,r=dDe(t);if(!r)return!1;var n={},i=[],a;for(a=0;a<t._length;a++){var o=e[a];(o.loc||o.loc===0)&&(n[o.loc]=o)}function s(c){var f=ezt(c,t.featureidkey||"id").get(),h=n[f];if(h){var d=c.geometry;if(d.type==="Polygon"||d.type==="MultiPolygon"){var v={type:"Feature",id:f,geometry:d,properties:{}};v.geometry.coordinates.length>0?v.properties.ct=ozt(v):v.properties.ct=[NaN,NaN],h.fIn=c,h.fOut=v,i.push(v)}else JA.log(["Location",h.loc,"does not have a valid GeoJSON geometry.","Traces with locationmode *geojson-id* only support","*Polygon* and *MultiPolygon* geometries."].join(" "))}delete n[f]}switch(r.type){case"FeatureCollection":var l=r.features;for(a=0;a<l.length;a++)s(l[a]);break;case"Feature":s(r);break;default:return JA.warn(["Invalid GeoJSON type",(r.type||"none")+".","Traces with locationmode *geojson-id* only support","*FeatureCollection* and *Feature* types."].join(" ")),!1}for(var u in n)JA.log(["Location *"+u+"*","does not have a matching feature with id-key","*"+t.featureidkey+"*."].join(" "));return i}function ozt(e){var t=e.geometry,r;if(t.type==="MultiPolygon")for(var n=t.coordinates,i=0,a=0;a<n.length;a++){var o={type:"Polygon",coordinates:n[a]},s=KFt(o);s>i&&(i=s,r=o)}else r=t;return JFt(r).geometry.coordinates}function szt(e){var t=window.PlotlyGeoAssets||{},r=[];function n(l){return new Promise(function(u,c){YFt.json(l,function(f,h){if(f){delete t[l];var d=f.status===404?'GeoJSON at URL "'+l+'" does not exist.':"Unexpected error while fetching from "+l;return c(new Error(d))}return t[l]=h,u(h)})})}function i(l){return new Promise(function(u,c){var f=0,h=setInterval(function(){if(t[l]&&t[l]!=="pending")return clearInterval(h),u(t[l]);if(f>100)return clearInterval(h),c("Unexpected error while fetching from "+l);f++},50)})}for(var a=0;a<e.length;a++){var o=e[a][0].trace,s=o.geojson;typeof s=="string"&&(t[s]?t[s]==="pending"&&r.push(i(s)):(t[s]="pending",r.push(n(s))))}return r}function lzt(e){return $Ft(e)}vDe.exports={locationToFeature:izt,feature2polygons:nzt,getTraceGeojson:dDe,extractTraceFeature:azt,fetchTraceGeoData:szt,computeBbox:lzt}});var kZ=ye((pmr,mDe)=>{"use strict";var uzt=Oa(),czt=So(),pDe=ka(),gDe=op(),fzt=gDe.stylePoints,hzt=gDe.styleText;mDe.exports=function(t,r){r&&dzt(t,r)};function dzt(e,t){var r=t[0].trace,n=t[0].node3;n.style("opacity",t[0].trace.opacity),fzt(n,r,e),hzt(n,r,e),n.selectAll("path.js-line").style("fill","none").each(function(i){var a=uzt.select(this),o=i.trace,s=o.line||{};a.call(pDe.stroke,s.color).call(czt.dashLine,s.dash||"",s.width||0),o.fill!=="none"&&a.call(pDe.fill,o.fillcolor)})}});var IZ=ye((gmr,xDe)=>{"use strict";var yDe=Oa(),kF=Dr(),vzt=mF().getTopojsonFeatures,CZ=tx(),EF=ix(),_De=Ag().findExtremes,PZ=fs().BADNUM,pzt=O0().calcMarkerSize,LZ=Ru(),gzt=kZ();function mzt(e,t,r){var n=t.layers.frontplot.select(".scatterlayer"),i=kF.makeTraceGroups(n,r,"trace scattergeo");function a(o,s){o.lonlat[0]===PZ&&yDe.select(s).remove()}i.selectAll("*").remove(),i.each(function(o){var s=yDe.select(this),l=o[0].trace;if(LZ.hasLines(l)||l.fill!=="none"){var u=CZ.calcTraceToLineCoords(o),c=l.fill!=="none"?CZ.makePolygon(u):CZ.makeLine(u);s.selectAll("path.js-line").data([{geojson:c,trace:l}]).enter().append("path").classed("js-line",!0).style("stroke-miterlimit",2)}LZ.hasMarkers(l)&&s.selectAll("path.point").data(kF.identity).enter().append("path").classed("point",!0).each(function(f){a(f,this)}),LZ.hasText(l)&&s.selectAll("g").data(kF.identity).enter().append("g").append("text").each(function(f){a(f,this)}),gzt(e,o)})}function yzt(e,t){var r=e[0].trace,n=t[r.geo],i=n._subplot,a=r._length,o,s;if(kF.isArrayOrTypedArray(r.locations)){var l=r.locationmode,u=l==="geojson-id"?EF.extractTraceFeature(e):vzt(r,i.topojson);for(o=0;o<a;o++){s=e[o];var c=l==="geojson-id"?s.fOut:EF.locationToFeature(l,s.loc,u);s.lonlat=c?c.properties.ct:[PZ,PZ]}}var f={padded:!0},h,d;if(n.fitbounds==="geojson"&&r.locationmode==="geojson-id"){var v=EF.computeBbox(EF.getTraceGeojson(r));h=[v[0],v[2]],d=[v[1],v[3]]}else{for(h=new Array(a),d=new Array(a),o=0;o<a;o++)s=e[o],h[o]=s.lonlat[0],d[o]=s.lonlat[1];f.ppad=pzt(r,a)}r._extremes.lon=_De(n.lonaxis._ax,h,f),r._extremes.lat=_De(n.lataxis._ax,d,f)}xDe.exports={calcGeoJSON:yzt,plot:mzt}});var wDe=ye((mmr,bDe)=>{"use strict";var _zt=vf(),xzt=fs().BADNUM,bzt=cT(),wzt=Dr().fillText,Tzt=j2();bDe.exports=function(t,r,n){var i=t.cd,a=i[0].trace,o=t.xa,s=t.ya,l=t.subplot,u=l.projection.isLonLatOverEdges,c=l.project;function f(k){var E=k.lonlat;if(E[0]===xzt||u(E))return 1/0;var S=c(E),L=c([r,n]),x=Math.abs(S[0]-L[0]),C=Math.abs(S[1]-L[1]),M=Math.max(3,k.mrc||0);return Math.max(Math.sqrt(x*x+C*C)-M,1-3/M)}if(_zt.getClosest(i,f,t),t.index!==!1){var h=i[t.index],d=h.lonlat,v=[o.c2p(d),s.c2p(d)],_=h.mrc||1;t.x0=v[0]-_,t.x1=v[0]+_,t.y0=v[1]-_,t.y1=v[1]+_,t.loc=h.loc,t.lon=d[0],t.lat=d[1];var b={};b[a.geo]={_subplot:l};var p=a._module.formatLabels(h,a,b);return t.lonLabel=p.lonLabel,t.latLabel=p.latLabel,t.color=bzt(a,h),t.extraText=Azt(a,h,t,i[0].t.labels),t.hovertemplate=a.hovertemplate,[t]}};function Azt(e,t,r,n){if(e.hovertemplate)return;var i=t.hi||e.hoverinfo,a=i==="all"?Tzt.hoverinfo.flags:i.split("+"),o=a.indexOf("location")!==-1&&Array.isArray(e.locations),s=a.indexOf("lon")!==-1,l=a.indexOf("lat")!==-1,u=a.indexOf("text")!==-1,c=[];function f(h){return h+"\xB0"}return o?c.push(t.loc):s&&l?c.push("("+f(r.latLabel)+", "+f(r.lonLabel)+")"):s?c.push(n.lon+f(r.lonLabel)):l&&c.push(n.lat+f(r.latLabel)),u&&wzt(t,e,c),c.join("<br>")}});var ADe=ye((ymr,TDe)=>{"use strict";TDe.exports=function(t,r,n,i,a){t.lon=r.lon,t.lat=r.lat,t.location=r.loc?r.loc:null;var o=i[a];return o.fIn&&o.fIn.properties&&(t.properties=o.fIn.properties),t}});var EDe=ye((_mr,MDe)=>{"use strict";var SDe=Ru(),Szt=fs().BADNUM;MDe.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s=n[0].trace,l,u,c,f,h,d=!SDe.hasMarkers(s)&&!SDe.hasText(s);if(d)return[];if(r===!1)for(h=0;h<n.length;h++)n[h].selected=0;else for(h=0;h<n.length;h++)l=n[h],u=l.lonlat,u[0]!==Szt&&(c=i.c2p(u),f=a.c2p(u),r.contains([c,f],null,h,t)?(o.push({pointNumber:h,lon:u[0],lat:u[1]}),l.selected=1):l.selected=0);return o}});var nk=ye((CF,kDe)=>{(function(e,t){t(typeof CF=="object"&&typeof kDe!="undefined"?CF:e.d3=e.d3||{})})(CF,function(e){"use strict";function t(Le,Ae){return Le<Ae?-1:Le>Ae?1:Le>=Ae?0:NaN}function r(Le){return Le.length===1&&(Le=n(Le)),{left:function(Ae,Fe,Pe,ge){for(Pe==null&&(Pe=0),ge==null&&(ge=Ae.length);Pe<ge;){var Re=Pe+ge>>>1;Le(Ae[Re],Fe)<0?Pe=Re+1:ge=Re}return Pe},right:function(Ae,Fe,Pe,ge){for(Pe==null&&(Pe=0),ge==null&&(ge=Ae.length);Pe<ge;){var Re=Pe+ge>>>1;Le(Ae[Re],Fe)>0?ge=Re:Pe=Re+1}return Pe}}}function n(Le){return function(Ae,Fe){return t(Le(Ae),Fe)}}var i=r(t),a=i.right,o=i.left;function s(Le,Ae){Ae==null&&(Ae=l);for(var Fe=0,Pe=Le.length-1,ge=Le[0],Re=new Array(Pe<0?0:Pe);Fe<Pe;)Re[Fe]=Ae(ge,ge=Le[++Fe]);return Re}function l(Le,Ae){return[Le,Ae]}function u(Le,Ae,Fe){var Pe=Le.length,ge=Ae.length,Re=new Array(Pe*ge),ce,Ze,ut,pt;for(Fe==null&&(Fe=l),ce=ut=0;ce<Pe;++ce)for(pt=Le[ce],Ze=0;Ze<ge;++Ze,++ut)Re[ut]=Fe(pt,Ae[Ze]);return Re}function c(Le,Ae){return Ae<Le?-1:Ae>Le?1:Ae>=Le?0:NaN}function f(Le){return Le===null?NaN:+Le}function h(Le,Ae){var Fe=Le.length,Pe=0,ge=-1,Re=0,ce,Ze,ut=0;if(Ae==null)for(;++ge<Fe;)isNaN(ce=f(Le[ge]))||(Ze=ce-Re,Re+=Ze/++Pe,ut+=Ze*(ce-Re));else for(;++ge<Fe;)isNaN(ce=f(Ae(Le[ge],ge,Le)))||(Ze=ce-Re,Re+=Ze/++Pe,ut+=Ze*(ce-Re));if(Pe>1)return ut/(Pe-1)}function d(Le,Ae){var Fe=h(Le,Ae);return Fe&&Math.sqrt(Fe)}function v(Le,Ae){var Fe=Le.length,Pe=-1,ge,Re,ce;if(Ae==null){for(;++Pe<Fe;)if((ge=Le[Pe])!=null&&ge>=ge)for(Re=ce=ge;++Pe<Fe;)(ge=Le[Pe])!=null&&(Re>ge&&(Re=ge),ce<ge&&(ce=ge))}else for(;++Pe<Fe;)if((ge=Ae(Le[Pe],Pe,Le))!=null&&ge>=ge)for(Re=ce=ge;++Pe<Fe;)(ge=Ae(Le[Pe],Pe,Le))!=null&&(Re>ge&&(Re=ge),ce<ge&&(ce=ge));return[Re,ce]}var _=Array.prototype,b=_.slice,p=_.map;function k(Le){return function(){return Le}}function E(Le){return Le}function S(Le,Ae,Fe){Le=+Le,Ae=+Ae,Fe=(ge=arguments.length)<2?(Ae=Le,Le=0,1):ge<3?1:+Fe;for(var Pe=-1,ge=Math.max(0,Math.ceil((Ae-Le)/Fe))|0,Re=new Array(ge);++Pe<ge;)Re[Pe]=Le+Pe*Fe;return Re}var L=Math.sqrt(50),x=Math.sqrt(10),C=Math.sqrt(2);function M(Le,Ae,Fe){var Pe,ge=-1,Re,ce,Ze;if(Ae=+Ae,Le=+Le,Fe=+Fe,Le===Ae&&Fe>0)return[Le];if((Pe=Ae<Le)&&(Re=Le,Le=Ae,Ae=Re),(Ze=g(Le,Ae,Fe))===0||!isFinite(Ze))return[];if(Ze>0)for(Le=Math.ceil(Le/Ze),Ae=Math.floor(Ae/Ze),ce=new Array(Re=Math.ceil(Ae-Le+1));++ge<Re;)ce[ge]=(Le+ge)*Ze;else for(Le=Math.floor(Le*Ze),Ae=Math.ceil(Ae*Ze),ce=new Array(Re=Math.ceil(Le-Ae+1));++ge<Re;)ce[ge]=(Le-ge)/Ze;return Pe&&ce.reverse(),ce}function g(Le,Ae,Fe){var Pe=(Ae-Le)/Math.max(0,Fe),ge=Math.floor(Math.log(Pe)/Math.LN10),Re=Pe/Math.pow(10,ge);return ge>=0?(Re>=L?10:Re>=x?5:Re>=C?2:1)*Math.pow(10,ge):-Math.pow(10,-ge)/(Re>=L?10:Re>=x?5:Re>=C?2:1)}function P(Le,Ae,Fe){var Pe=Math.abs(Ae-Le)/Math.max(0,Fe),ge=Math.pow(10,Math.floor(Math.log(Pe)/Math.LN10)),Re=Pe/ge;return Re>=L?ge*=10:Re>=x?ge*=5:Re>=C&&(ge*=2),Ae<Le?-ge:ge}function T(Le){return Math.ceil(Math.log(Le.length)/Math.LN2)+1}function z(){var Le=E,Ae=v,Fe=T;function Pe(ge){var Re,ce=ge.length,Ze,ut=new Array(ce);for(Re=0;Re<ce;++Re)ut[Re]=Le(ge[Re],Re,ge);var pt=Ae(ut),Zt=pt[0],st=pt[1],lt=Fe(ut,Zt,st);Array.isArray(lt)||(lt=P(Zt,st,lt),lt=S(Math.ceil(Zt/lt)*lt,st,lt));for(var Gt=lt.length;lt[0]<=Zt;)lt.shift(),--Gt;for(;lt[Gt-1]>st;)lt.pop(),--Gt;var Nt=new Array(Gt+1),Jt;for(Re=0;Re<=Gt;++Re)Jt=Nt[Re]=[],Jt.x0=Re>0?lt[Re-1]:Zt,Jt.x1=Re<Gt?lt[Re]:st;for(Re=0;Re<ce;++Re)Ze=ut[Re],Zt<=Ze&&Ze<=st&&Nt[a(lt,Ze,0,Gt)].push(ge[Re]);return Nt}return Pe.value=function(ge){return arguments.length?(Le=typeof ge=="function"?ge:k(ge),Pe):Le},Pe.domain=function(ge){return arguments.length?(Ae=typeof ge=="function"?ge:k([ge[0],ge[1]]),Pe):Ae},Pe.thresholds=function(ge){return arguments.length?(Fe=typeof ge=="function"?ge:Array.isArray(ge)?k(b.call(ge)):k(ge),Pe):Fe},Pe}function O(Le,Ae,Fe){if(Fe==null&&(Fe=f),!!(Pe=Le.length)){if((Ae=+Ae)<=0||Pe<2)return+Fe(Le[0],0,Le);if(Ae>=1)return+Fe(Le[Pe-1],Pe-1,Le);var Pe,ge=(Pe-1)*Ae,Re=Math.floor(ge),ce=+Fe(Le[Re],Re,Le),Ze=+Fe(Le[Re+1],Re+1,Le);return ce+(Ze-ce)*(ge-Re)}}function V(Le,Ae,Fe){return Le=p.call(Le,f).sort(t),Math.ceil((Fe-Ae)/(2*(O(Le,.75)-O(Le,.25))*Math.pow(Le.length,-1/3)))}function G(Le,Ae,Fe){return Math.ceil((Fe-Ae)/(3.5*d(Le)*Math.pow(Le.length,-1/3)))}function Z(Le,Ae){var Fe=Le.length,Pe=-1,ge,Re;if(Ae==null){for(;++Pe<Fe;)if((ge=Le[Pe])!=null&&ge>=ge)for(Re=ge;++Pe<Fe;)(ge=Le[Pe])!=null&&ge>Re&&(Re=ge)}else for(;++Pe<Fe;)if((ge=Ae(Le[Pe],Pe,Le))!=null&&ge>=ge)for(Re=ge;++Pe<Fe;)(ge=Ae(Le[Pe],Pe,Le))!=null&&ge>Re&&(Re=ge);return Re}function j(Le,Ae){var Fe=Le.length,Pe=Fe,ge=-1,Re,ce=0;if(Ae==null)for(;++ge<Fe;)isNaN(Re=f(Le[ge]))?--Pe:ce+=Re;else for(;++ge<Fe;)isNaN(Re=f(Ae(Le[ge],ge,Le)))?--Pe:ce+=Re;if(Pe)return ce/Pe}function N(Le,Ae){var Fe=Le.length,Pe=-1,ge,Re=[];if(Ae==null)for(;++Pe<Fe;)isNaN(ge=f(Le[Pe]))||Re.push(ge);else for(;++Pe<Fe;)isNaN(ge=f(Ae(Le[Pe],Pe,Le)))||Re.push(ge);return O(Re.sort(t),.5)}function H(Le){for(var Ae=Le.length,Fe,Pe=-1,ge=0,Re,ce;++Pe<Ae;)ge+=Le[Pe].length;for(Re=new Array(ge);--Ae>=0;)for(ce=Le[Ae],Fe=ce.length;--Fe>=0;)Re[--ge]=ce[Fe];return Re}function te(Le,Ae){var Fe=Le.length,Pe=-1,ge,Re;if(Ae==null){for(;++Pe<Fe;)if((ge=Le[Pe])!=null&&ge>=ge)for(Re=ge;++Pe<Fe;)(ge=Le[Pe])!=null&&Re>ge&&(Re=ge)}else for(;++Pe<Fe;)if((ge=Ae(Le[Pe],Pe,Le))!=null&&ge>=ge)for(Re=ge;++Pe<Fe;)(ge=Ae(Le[Pe],Pe,Le))!=null&&Re>ge&&(Re=ge);return Re}function oe(Le,Ae){for(var Fe=Ae.length,Pe=new Array(Fe);Fe--;)Pe[Fe]=Le[Ae[Fe]];return Pe}function _e(Le,Ae){if(Fe=Le.length){var Fe,Pe=0,ge=0,Re,ce=Le[ge];for(Ae==null&&(Ae=t);++Pe<Fe;)(Ae(Re=Le[Pe],ce)<0||Ae(ce,ce)!==0)&&(ce=Re,ge=Pe);if(Ae(ce,ce)===0)return ge}}function Ee(Le,Ae,Fe){for(var Pe=(Fe==null?Le.length:Fe)-(Ae=Ae==null?0:+Ae),ge,Re;Pe;)Re=Math.random()*Pe--|0,ge=Le[Pe+Ae],Le[Pe+Ae]=Le[Re+Ae],Le[Re+Ae]=ge;return Le}function Ce(Le,Ae){var Fe=Le.length,Pe=-1,ge,Re=0;if(Ae==null)for(;++Pe<Fe;)(ge=+Le[Pe])&&(Re+=ge);else for(;++Pe<Fe;)(ge=+Ae(Le[Pe],Pe,Le))&&(Re+=ge);return Re}function me(Le){if(!(Re=Le.length))return[];for(var Ae=-1,Fe=te(Le,ie),Pe=new Array(Fe);++Ae<Fe;)for(var ge=-1,Re,ce=Pe[Ae]=new Array(Re);++ge<Re;)ce[ge]=Le[ge][Ae];return Pe}function ie(Le){return Le.length}function Se(){return me(arguments)}e.bisect=a,e.bisectRight=a,e.bisectLeft=o,e.ascending=t,e.bisector=r,e.cross=u,e.descending=c,e.deviation=d,e.extent=v,e.histogram=z,e.thresholdFreedmanDiaconis=V,e.thresholdScott=G,e.thresholdSturges=T,e.max=Z,e.mean=j,e.median=N,e.merge=H,e.min=te,e.pairs=s,e.permute=oe,e.quantile=O,e.range=S,e.scan=_e,e.shuffle=Ee,e.sum=Ce,e.ticks=M,e.tickIncrement=g,e.tickStep=P,e.transpose=me,e.variance=h,e.zip=Se,Object.defineProperty(e,"__esModule",{value:!0})})});var RZ=ye((LF,CDe)=>{(function(e,t){typeof LF=="object"&&typeof CDe!="undefined"?t(LF,nk()):(e=e||self,t(e.d3=e.d3||{},e.d3))})(LF,function(e,t){"use strict";function r(){return new n}function n(){this.reset()}n.prototype={constructor:n,reset:function(){this.s=this.t=0},add:function(At){a(i,At,this.t),a(this,i.s,this.s),this.s?this.t+=i.t:this.s=i.t},valueOf:function(){return this.s}};var i=new n;function a(At,Wt,Cr){var Ar=At.s=Wt+Cr,Kr=Ar-Wt,ki=Ar-Kr;At.t=Wt-ki+(Cr-Kr)}var o=1e-6,s=1e-12,l=Math.PI,u=l/2,c=l/4,f=l*2,h=180/l,d=l/180,v=Math.abs,_=Math.atan,b=Math.atan2,p=Math.cos,k=Math.ceil,E=Math.exp,S=Math.log,L=Math.pow,x=Math.sin,C=Math.sign||function(At){return At>0?1:At<0?-1:0},M=Math.sqrt,g=Math.tan;function P(At){return At>1?0:At<-1?l:Math.acos(At)}function T(At){return At>1?u:At<-1?-u:Math.asin(At)}function z(At){return(At=x(At/2))*At}function O(){}function V(At,Wt){At&&Z.hasOwnProperty(At.type)&&Z[At.type](At,Wt)}var G={Feature:function(At,Wt){V(At.geometry,Wt)},FeatureCollection:function(At,Wt){for(var Cr=At.features,Ar=-1,Kr=Cr.length;++Ar<Kr;)V(Cr[Ar].geometry,Wt)}},Z={Sphere:function(At,Wt){Wt.sphere()},Point:function(At,Wt){At=At.coordinates,Wt.point(At[0],At[1],At[2])},MultiPoint:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)At=Cr[Ar],Wt.point(At[0],At[1],At[2])},LineString:function(At,Wt){j(At.coordinates,Wt,0)},MultiLineString:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)j(Cr[Ar],Wt,0)},Polygon:function(At,Wt){N(At.coordinates,Wt)},MultiPolygon:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)N(Cr[Ar],Wt)},GeometryCollection:function(At,Wt){for(var Cr=At.geometries,Ar=-1,Kr=Cr.length;++Ar<Kr;)V(Cr[Ar],Wt)}};function j(At,Wt,Cr){var Ar=-1,Kr=At.length-Cr,ki;for(Wt.lineStart();++Ar<Kr;)ki=At[Ar],Wt.point(ki[0],ki[1],ki[2]);Wt.lineEnd()}function N(At,Wt){var Cr=-1,Ar=At.length;for(Wt.polygonStart();++Cr<Ar;)j(At[Cr],Wt,1);Wt.polygonEnd()}function H(At,Wt){At&&G.hasOwnProperty(At.type)?G[At.type](At,Wt):V(At,Wt)}var te=r(),oe=r(),_e,Ee,Ce,me,ie,Se={point:O,lineStart:O,lineEnd:O,polygonStart:function(){te.reset(),Se.lineStart=Le,Se.lineEnd=Ae},polygonEnd:function(){var At=+te;oe.add(At<0?f+At:At),this.lineStart=this.lineEnd=this.point=O},sphere:function(){oe.add(f)}};function Le(){Se.point=Fe}function Ae(){Pe(_e,Ee)}function Fe(At,Wt){Se.point=Pe,_e=At,Ee=Wt,At*=d,Wt*=d,Ce=At,me=p(Wt=Wt/2+c),ie=x(Wt)}function Pe(At,Wt){At*=d,Wt*=d,Wt=Wt/2+c;var Cr=At-Ce,Ar=Cr>=0?1:-1,Kr=Ar*Cr,ki=p(Wt),Xi=x(Wt),dn=ie*Xi,wn=me*ki+dn*p(Kr),Nn=dn*Ar*x(Kr);te.add(b(Nn,wn)),Ce=At,me=ki,ie=Xi}function ge(At){return oe.reset(),H(At,Se),oe*2}function Re(At){return[b(At[1],At[0]),T(At[2])]}function ce(At){var Wt=At[0],Cr=At[1],Ar=p(Cr);return[Ar*p(Wt),Ar*x(Wt),x(Cr)]}function Ze(At,Wt){return At[0]*Wt[0]+At[1]*Wt[1]+At[2]*Wt[2]}function ut(At,Wt){return[At[1]*Wt[2]-At[2]*Wt[1],At[2]*Wt[0]-At[0]*Wt[2],At[0]*Wt[1]-At[1]*Wt[0]]}function pt(At,Wt){At[0]+=Wt[0],At[1]+=Wt[1],At[2]+=Wt[2]}function Zt(At,Wt){return[At[0]*Wt,At[1]*Wt,At[2]*Wt]}function st(At){var Wt=M(At[0]*At[0]+At[1]*At[1]+At[2]*At[2]);At[0]/=Wt,At[1]/=Wt,At[2]/=Wt}var lt,Gt,Nt,Jt,sr,wr,cr,$e,St=r(),Qt,Vt,_t={point:It,lineStart:er,lineEnd:lr,polygonStart:function(){_t.point=Tr,_t.lineStart=Lr,_t.lineEnd=ti,St.reset(),Se.polygonStart()},polygonEnd:function(){Se.polygonEnd(),_t.point=It,_t.lineStart=er,_t.lineEnd=lr,te<0?(lt=-(Nt=180),Gt=-(Jt=90)):St>o?Jt=90:St<-o&&(Gt=-90),Vt[0]=lt,Vt[1]=Nt},sphere:function(){lt=-(Nt=180),Gt=-(Jt=90)}};function It(At,Wt){Qt.push(Vt=[lt=At,Nt=At]),Wt<Gt&&(Gt=Wt),Wt>Jt&&(Jt=Wt)}function mt(At,Wt){var Cr=ce([At*d,Wt*d]);if($e){var Ar=ut($e,Cr),Kr=[Ar[1],-Ar[0],0],ki=ut(Kr,Ar);st(ki),ki=Re(ki);var Xi=At-sr,dn=Xi>0?1:-1,wn=ki[0]*h*dn,Nn,Yi=v(Xi)>180;Yi^(dn*sr<wn&&wn<dn*At)?(Nn=ki[1]*h,Nn>Jt&&(Jt=Nn)):(wn=(wn+360)%360-180,Yi^(dn*sr<wn&&wn<dn*At)?(Nn=-ki[1]*h,Nn<Gt&&(Gt=Nn)):(Wt<Gt&&(Gt=Wt),Wt>Jt&&(Jt=Wt))),Yi?At<sr?Br(lt,At)>Br(lt,Nt)&&(Nt=At):Br(At,Nt)>Br(lt,Nt)&&(lt=At):Nt>=lt?(At<lt&&(lt=At),At>Nt&&(Nt=At)):At>sr?Br(lt,At)>Br(lt,Nt)&&(Nt=At):Br(At,Nt)>Br(lt,Nt)&&(lt=At)}else Qt.push(Vt=[lt=At,Nt=At]);Wt<Gt&&(Gt=Wt),Wt>Jt&&(Jt=Wt),$e=Cr,sr=At}function er(){_t.point=mt}function lr(){Vt[0]=lt,Vt[1]=Nt,_t.point=It,$e=null}function Tr(At,Wt){if($e){var Cr=At-sr;St.add(v(Cr)>180?Cr+(Cr>0?360:-360):Cr)}else wr=At,cr=Wt;Se.point(At,Wt),mt(At,Wt)}function Lr(){Se.lineStart()}function ti(){Tr(wr,cr),Se.lineEnd(),v(St)>o&&(lt=-(Nt=180)),Vt[0]=lt,Vt[1]=Nt,$e=null}function Br(At,Wt){return(Wt-=At)<0?Wt+360:Wt}function Vr(At,Wt){return At[0]-Wt[0]}function dt(At,Wt){return At[0]<=At[1]?At[0]<=Wt&&Wt<=At[1]:Wt<At[0]||At[1]<Wt}function Ge(At){var Wt,Cr,Ar,Kr,ki,Xi,dn;if(Jt=Nt=-(lt=Gt=1/0),Qt=[],H(At,_t),Cr=Qt.length){for(Qt.sort(Vr),Wt=1,Ar=Qt[0],ki=[Ar];Wt<Cr;++Wt)Kr=Qt[Wt],dt(Ar,Kr[0])||dt(Ar,Kr[1])?(Br(Ar[0],Kr[1])>Br(Ar[0],Ar[1])&&(Ar[1]=Kr[1]),Br(Kr[0],Ar[1])>Br(Ar[0],Ar[1])&&(Ar[0]=Kr[0])):ki.push(Ar=Kr);for(Xi=-1/0,Cr=ki.length-1,Wt=0,Ar=ki[Cr];Wt<=Cr;Ar=Kr,++Wt)Kr=ki[Wt],(dn=Br(Ar[1],Kr[0]))>Xi&&(Xi=dn,lt=Kr[0],Nt=Ar[1])}return Qt=Vt=null,lt===1/0||Gt===1/0?[[NaN,NaN],[NaN,NaN]]:[[lt,Gt],[Nt,Jt]]}var Je,je,tt,xt,Ie,xe,ke,vt,ir,ar,vr,ii,pi,$r,di,ji,In={sphere:O,point:wi,lineStart:qn,lineEnd:la,polygonStart:function(){In.lineStart=Ut,In.lineEnd=wt},polygonEnd:function(){In.lineStart=qn,In.lineEnd=la}};function wi(At,Wt){At*=d,Wt*=d;var Cr=p(Wt);On(Cr*p(At),Cr*x(At),x(Wt))}function On(At,Wt,Cr){++Je,tt+=(At-tt)/Je,xt+=(Wt-xt)/Je,Ie+=(Cr-Ie)/Je}function qn(){In.point=Fn}function Fn(At,Wt){At*=d,Wt*=d;var Cr=p(Wt);$r=Cr*p(At),di=Cr*x(At),ji=x(Wt),In.point=ra,On($r,di,ji)}function ra(At,Wt){At*=d,Wt*=d;var Cr=p(Wt),Ar=Cr*p(At),Kr=Cr*x(At),ki=x(Wt),Xi=b(M((Xi=di*ki-ji*Kr)*Xi+(Xi=ji*Ar-$r*ki)*Xi+(Xi=$r*Kr-di*Ar)*Xi),$r*Ar+di*Kr+ji*ki);je+=Xi,xe+=Xi*($r+($r=Ar)),ke+=Xi*(di+(di=Kr)),vt+=Xi*(ji+(ji=ki)),On($r,di,ji)}function la(){In.point=wi}function Ut(){In.point=rr}function wt(){nr(ii,pi),In.point=wi}function rr(At,Wt){ii=At,pi=Wt,At*=d,Wt*=d,In.point=nr;var Cr=p(Wt);$r=Cr*p(At),di=Cr*x(At),ji=x(Wt),On($r,di,ji)}function nr(At,Wt){At*=d,Wt*=d;var Cr=p(Wt),Ar=Cr*p(At),Kr=Cr*x(At),ki=x(Wt),Xi=di*ki-ji*Kr,dn=ji*Ar-$r*ki,wn=$r*Kr-di*Ar,Nn=M(Xi*Xi+dn*dn+wn*wn),Yi=T(Nn),Qi=Nn&&-Yi/Nn;ir+=Qi*Xi,ar+=Qi*dn,vr+=Qi*wn,je+=Yi,xe+=Yi*($r+($r=Ar)),ke+=Yi*(di+(di=Kr)),vt+=Yi*(ji+(ji=ki)),On($r,di,ji)}function Er(At){Je=je=tt=xt=Ie=xe=ke=vt=ir=ar=vr=0,H(At,In);var Wt=ir,Cr=ar,Ar=vr,Kr=Wt*Wt+Cr*Cr+Ar*Ar;return Kr<s&&(Wt=xe,Cr=ke,Ar=vt,je<o&&(Wt=tt,Cr=xt,Ar=Ie),Kr=Wt*Wt+Cr*Cr+Ar*Ar,Kr<s)?[NaN,NaN]:[b(Cr,Wt)*h,T(Ar/M(Kr))*h]}function Xr(At){return function(){return At}}function ri(At,Wt){function Cr(Ar,Kr){return Ar=At(Ar,Kr),Wt(Ar[0],Ar[1])}return At.invert&&Wt.invert&&(Cr.invert=function(Ar,Kr){return Ar=Wt.invert(Ar,Kr),Ar&&At.invert(Ar[0],Ar[1])}),Cr}function Qr(At,Wt){return[v(At)>l?At+Math.round(-At/f)*f:At,Wt]}Qr.invert=Qr;function Oi(At,Wt,Cr){return(At%=f)?Wt||Cr?ri(tn(At),fn(Wt,Cr)):tn(At):Wt||Cr?fn(Wt,Cr):Qr}function $i(At){return function(Wt,Cr){return Wt+=At,[Wt>l?Wt-f:Wt<-l?Wt+f:Wt,Cr]}}function tn(At){var Wt=$i(At);return Wt.invert=$i(-At),Wt}function fn(At,Wt){var Cr=p(At),Ar=x(At),Kr=p(Wt),ki=x(Wt);function Xi(dn,wn){var Nn=p(wn),Yi=p(dn)*Nn,Qi=x(dn)*Nn,on=x(wn),Fi=on*Cr+Yi*Ar;return[b(Qi*Kr-Fi*ki,Yi*Cr-on*Ar),T(Fi*Kr+Qi*ki)]}return Xi.invert=function(dn,wn){var Nn=p(wn),Yi=p(dn)*Nn,Qi=x(dn)*Nn,on=x(wn),Fi=on*Kr-Qi*ki;return[b(Qi*Kr+on*ki,Yi*Cr+Fi*Ar),T(Fi*Cr-Yi*Ar)]},Xi}function yn(At){At=Oi(At[0]*d,At[1]*d,At.length>2?At[2]*d:0);function Wt(Cr){return Cr=At(Cr[0]*d,Cr[1]*d),Cr[0]*=h,Cr[1]*=h,Cr}return Wt.invert=function(Cr){return Cr=At.invert(Cr[0]*d,Cr[1]*d),Cr[0]*=h,Cr[1]*=h,Cr},Wt}function Sn(At,Wt,Cr,Ar,Kr,ki){if(Cr){var Xi=p(Wt),dn=x(Wt),wn=Ar*Cr;Kr==null?(Kr=Wt+Ar*f,ki=Wt-wn/2):(Kr=Ba(Xi,Kr),ki=Ba(Xi,ki),(Ar>0?Kr<ki:Kr>ki)&&(Kr+=Ar*f));for(var Nn,Yi=Kr;Ar>0?Yi>ki:Yi<ki;Yi-=wn)Nn=Re([Xi,-dn*p(Yi),-dn*x(Yi)]),At.point(Nn[0],Nn[1])}}function Ba(At,Wt){Wt=ce(Wt),Wt[0]-=At,st(Wt);var Cr=P(-Wt[1]);return((-Wt[2]<0?-Cr:Cr)+f-o)%f}function ua(){var At=Xr([0,0]),Wt=Xr(90),Cr=Xr(6),Ar,Kr,ki={point:Xi};function Xi(wn,Nn){Ar.push(wn=Kr(wn,Nn)),wn[0]*=h,wn[1]*=h}function dn(){var wn=At.apply(this,arguments),Nn=Wt.apply(this,arguments)*d,Yi=Cr.apply(this,arguments)*d;return Ar=[],Kr=Oi(-wn[0]*d,-wn[1]*d,0).invert,Sn(ki,Nn,Yi,1),wn={type:"Polygon",coordinates:[Ar]},Ar=Kr=null,wn}return dn.center=function(wn){return arguments.length?(At=typeof wn=="function"?wn:Xr([+wn[0],+wn[1]]),dn):At},dn.radius=function(wn){return arguments.length?(Wt=typeof wn=="function"?wn:Xr(+wn),dn):Wt},dn.precision=function(wn){return arguments.length?(Cr=typeof wn=="function"?wn:Xr(+wn),dn):Cr},dn}function ma(){var At=[],Wt;return{point:function(Cr,Ar,Kr){Wt.push([Cr,Ar,Kr])},lineStart:function(){At.push(Wt=[])},lineEnd:O,rejoin:function(){At.length>1&&At.push(At.pop().concat(At.shift()))},result:function(){var Cr=At;return At=[],Wt=null,Cr}}}function Wa(At,Wt){return v(At[0]-Wt[0])<o&&v(At[1]-Wt[1])<o}function Fa(At,Wt,Cr,Ar){this.x=At,this.z=Wt,this.o=Cr,this.e=Ar,this.v=!1,this.n=this.p=null}function Wo(At,Wt,Cr,Ar,Kr){var ki=[],Xi=[],dn,wn;if(At.forEach(function($n){if(!((Ca=$n.length-1)<=0)){var Ca,Ra=$n[0],La=$n[Ca],Na;if(Wa(Ra,La)){if(!Ra[2]&&!La[2]){for(Kr.lineStart(),dn=0;dn<Ca;++dn)Kr.point((Ra=$n[dn])[0],Ra[1]);Kr.lineEnd();return}La[0]+=2*o}ki.push(Na=new Fa(Ra,$n,null,!0)),Xi.push(Na.o=new Fa(Ra,null,Na,!1)),ki.push(Na=new Fa(La,$n,null,!1)),Xi.push(Na.o=new Fa(La,null,Na,!0))}}),!!ki.length){for(Xi.sort(Wt),da(ki),da(Xi),dn=0,wn=Xi.length;dn<wn;++dn)Xi[dn].e=Cr=!Cr;for(var Nn=ki[0],Yi,Qi;;){for(var on=Nn,Fi=!0;on.v;)if((on=on.n)===Nn)return;Yi=on.z,Kr.lineStart();do{if(on.v=on.o.v=!0,on.e){if(Fi)for(dn=0,wn=Yi.length;dn<wn;++dn)Kr.point((Qi=Yi[dn])[0],Qi[1]);else Ar(on.x,on.n.x,1,Kr);on=on.n}else{if(Fi)for(Yi=on.p.z,dn=Yi.length-1;dn>=0;--dn)Kr.point((Qi=Yi[dn])[0],Qi[1]);else Ar(on.x,on.p.x,-1,Kr);on=on.p}on=on.o,Yi=on.z,Fi=!Fi}while(!on.v);Kr.lineEnd()}}}function da(At){if(Wt=At.length){for(var Wt,Cr=0,Ar=At[0],Kr;++Cr<Wt;)Ar.n=Kr=At[Cr],Kr.p=Ar,Ar=Kr;Ar.n=Kr=At[0],Kr.p=Ar}}var Wn=r();function Ha(At){return v(At[0])<=l?At[0]:C(At[0])*((v(At[0])+l)%f-l)}function vo(At,Wt){var Cr=Ha(Wt),Ar=Wt[1],Kr=x(Ar),ki=[x(Cr),-p(Cr),0],Xi=0,dn=0;Wn.reset(),Kr===1?Ar=u+o:Kr===-1&&(Ar=-u-o);for(var wn=0,Nn=At.length;wn<Nn;++wn)if(Qi=(Yi=At[wn]).length)for(var Yi,Qi,on=Yi[Qi-1],Fi=Ha(on),$n=on[1]/2+c,Ca=x($n),Ra=p($n),La=0;La<Qi;++La,Fi=Yn,Ca=Ka,Ra=bo,on=Na){var Na=Yi[La],Yn=Ha(Na),Dn=Na[1]/2+c,Ka=x(Dn),bo=p(Dn),Xo=Yn-Fi,Ss=Xo>=0?1:-1,as=Ss*Xo,ws=as>l,Ho=Ca*Ka;if(Wn.add(b(Ho*Ss*x(as),Ra*bo+Ho*p(as))),Xi+=ws?Xo+Ss*f:Xo,ws^Fi>=Cr^Yn>=Cr){var ml=ut(ce(on),ce(Na));st(ml);var Ws=ut(ki,ml);st(Ws);var Ls=(ws^Xo>=0?-1:1)*T(Ws[2]);(Ar>Ls||Ar===Ls&&(ml[0]||ml[1]))&&(dn+=ws^Xo>=0?1:-1)}}return(Xi<-o||Xi<o&&Wn<-o)^dn&1}function jn(At,Wt,Cr,Ar){return function(Kr){var ki=Wt(Kr),Xi=ma(),dn=Wt(Xi),wn=!1,Nn,Yi,Qi,on={point:Fi,lineStart:Ca,lineEnd:Ra,polygonStart:function(){on.point=La,on.lineStart=Na,on.lineEnd=Yn,Yi=[],Nn=[]},polygonEnd:function(){on.point=Fi,on.lineStart=Ca,on.lineEnd=Ra,Yi=t.merge(Yi);var Dn=vo(Nn,Ar);Yi.length?(wn||(Kr.polygonStart(),wn=!0),Wo(Yi,kr,Dn,Cr,Kr)):Dn&&(wn||(Kr.polygonStart(),wn=!0),Kr.lineStart(),Cr(null,null,1,Kr),Kr.lineEnd()),wn&&(Kr.polygonEnd(),wn=!1),Yi=Nn=null},sphere:function(){Kr.polygonStart(),Kr.lineStart(),Cr(null,null,1,Kr),Kr.lineEnd(),Kr.polygonEnd()}};function Fi(Dn,Ka){At(Dn,Ka)&&Kr.point(Dn,Ka)}function $n(Dn,Ka){ki.point(Dn,Ka)}function Ca(){on.point=$n,ki.lineStart()}function Ra(){on.point=Fi,ki.lineEnd()}function La(Dn,Ka){Qi.push([Dn,Ka]),dn.point(Dn,Ka)}function Na(){dn.lineStart(),Qi=[]}function Yn(){La(Qi[0][0],Qi[0][1]),dn.lineEnd();var Dn=dn.clean(),Ka=Xi.result(),bo,Xo=Ka.length,Ss,as,ws;if(Qi.pop(),Nn.push(Qi),Qi=null,!!Xo){if(Dn&1){if(as=Ka[0],(Ss=as.length-1)>0){for(wn||(Kr.polygonStart(),wn=!0),Kr.lineStart(),bo=0;bo<Ss;++bo)Kr.point((ws=as[bo])[0],ws[1]);Kr.lineEnd()}return}Xo>1&&Dn&2&&Ka.push(Ka.pop().concat(Ka.shift())),Yi.push(Ka.filter(Mt))}}return on}}function Mt(At){return At.length>1}function kr(At,Wt){return((At=At.x)[0]<0?At[1]-u-o:u-At[1])-((Wt=Wt.x)[0]<0?Wt[1]-u-o:u-Wt[1])}var Jr=jn(function(){return!0},vi,An,[-l,-u]);function vi(At){var Wt=NaN,Cr=NaN,Ar=NaN,Kr;return{lineStart:function(){At.lineStart(),Kr=1},point:function(ki,Xi){var dn=ki>0?l:-l,wn=v(ki-Wt);v(wn-l)<o?(At.point(Wt,Cr=(Cr+Xi)/2>0?u:-u),At.point(Ar,Cr),At.lineEnd(),At.lineStart(),At.point(dn,Cr),At.point(ki,Cr),Kr=0):Ar!==dn&&wn>=l&&(v(Wt-Ar)<o&&(Wt-=Ar*o),v(ki-dn)<o&&(ki-=dn*o),Cr=hn(Wt,Cr,ki,Xi),At.point(Ar,Cr),At.lineEnd(),At.lineStart(),At.point(dn,Cr),Kr=0),At.point(Wt=ki,Cr=Xi),Ar=dn},lineEnd:function(){At.lineEnd(),Wt=Cr=NaN},clean:function(){return 2-Kr}}}function hn(At,Wt,Cr,Ar){var Kr,ki,Xi=x(At-Cr);return v(Xi)>o?_((x(Wt)*(ki=p(Ar))*x(Cr)-x(Ar)*(Kr=p(Wt))*x(At))/(Kr*ki*Xi)):(Wt+Ar)/2}function An(At,Wt,Cr,Ar){var Kr;if(At==null)Kr=Cr*u,Ar.point(-l,Kr),Ar.point(0,Kr),Ar.point(l,Kr),Ar.point(l,0),Ar.point(l,-Kr),Ar.point(0,-Kr),Ar.point(-l,-Kr),Ar.point(-l,0),Ar.point(-l,Kr);else if(v(At[0]-Wt[0])>o){var ki=At[0]<Wt[0]?l:-l;Kr=Cr*ki/2,Ar.point(-ki,Kr),Ar.point(0,Kr),Ar.point(ki,Kr)}else Ar.point(Wt[0],Wt[1])}function Mn(At){var Wt=p(At),Cr=6*d,Ar=Wt>0,Kr=v(Wt)>o;function ki(Yi,Qi,on,Fi){Sn(Fi,At,Cr,on,Yi,Qi)}function Xi(Yi,Qi){return p(Yi)*p(Qi)>Wt}function dn(Yi){var Qi,on,Fi,$n,Ca;return{lineStart:function(){$n=Fi=!1,Ca=1},point:function(Ra,La){var Na=[Ra,La],Yn,Dn=Xi(Ra,La),Ka=Ar?Dn?0:Nn(Ra,La):Dn?Nn(Ra+(Ra<0?l:-l),La):0;if(!Qi&&($n=Fi=Dn)&&Yi.lineStart(),Dn!==Fi&&(Yn=wn(Qi,Na),(!Yn||Wa(Qi,Yn)||Wa(Na,Yn))&&(Na[2]=1)),Dn!==Fi)Ca=0,Dn?(Yi.lineStart(),Yn=wn(Na,Qi),Yi.point(Yn[0],Yn[1])):(Yn=wn(Qi,Na),Yi.point(Yn[0],Yn[1],2),Yi.lineEnd()),Qi=Yn;else if(Kr&&Qi&&Ar^Dn){var bo;!(Ka&on)&&(bo=wn(Na,Qi,!0))&&(Ca=0,Ar?(Yi.lineStart(),Yi.point(bo[0][0],bo[0][1]),Yi.point(bo[1][0],bo[1][1]),Yi.lineEnd()):(Yi.point(bo[1][0],bo[1][1]),Yi.lineEnd(),Yi.lineStart(),Yi.point(bo[0][0],bo[0][1],3)))}Dn&&(!Qi||!Wa(Qi,Na))&&Yi.point(Na[0],Na[1]),Qi=Na,Fi=Dn,on=Ka},lineEnd:function(){Fi&&Yi.lineEnd(),Qi=null},clean:function(){return Ca|($n&&Fi)<<1}}}function wn(Yi,Qi,on){var Fi=ce(Yi),$n=ce(Qi),Ca=[1,0,0],Ra=ut(Fi,$n),La=Ze(Ra,Ra),Na=Ra[0],Yn=La-Na*Na;if(!Yn)return!on&&Yi;var Dn=Wt*La/Yn,Ka=-Wt*Na/Yn,bo=ut(Ca,Ra),Xo=Zt(Ca,Dn),Ss=Zt(Ra,Ka);pt(Xo,Ss);var as=bo,ws=Ze(Xo,as),Ho=Ze(as,as),ml=ws*ws-Ho*(Ze(Xo,Xo)-1);if(!(ml<0)){var Ws=M(ml),Ls=Zt(as,(-ws-Ws)/Ho);if(pt(Ls,Xo),Ls=Re(Ls),!on)return Ls;var va=Yi[0],no=Qi[0],ys=Yi[1],rs=Qi[1],$l;no<va&&($l=va,va=no,no=$l);var Cu=no-va,Yu=v(Cu-l)<o,Nc=Yu||Cu<o;if(!Yu&&rs<ys&&($l=ys,ys=rs,rs=$l),Nc?Yu?ys+rs>0^Ls[1]<(v(Ls[0]-va)<o?ys:rs):ys<=Ls[1]&&Ls[1]<=rs:Cu>l^(va<=Ls[0]&&Ls[0]<=no)){var pu=Zt(as,(-ws+Ws)/Ho);return pt(pu,Xo),[Ls,Re(pu)]}}}function Nn(Yi,Qi){var on=Ar?At:l-At,Fi=0;return Yi<-on?Fi|=1:Yi>on&&(Fi|=2),Qi<-on?Fi|=4:Qi>on&&(Fi|=8),Fi}return jn(Xi,dn,ki,Ar?[0,-At]:[-l,At-l])}function Li(At,Wt,Cr,Ar,Kr,ki){var Xi=At[0],dn=At[1],wn=Wt[0],Nn=Wt[1],Yi=0,Qi=1,on=wn-Xi,Fi=Nn-dn,$n;if($n=Cr-Xi,!(!on&&$n>0)){if($n/=on,on<0){if($n<Yi)return;$n<Qi&&(Qi=$n)}else if(on>0){if($n>Qi)return;$n>Yi&&(Yi=$n)}if($n=Kr-Xi,!(!on&&$n<0)){if($n/=on,on<0){if($n>Qi)return;$n>Yi&&(Yi=$n)}else if(on>0){if($n<Yi)return;$n<Qi&&(Qi=$n)}if($n=Ar-dn,!(!Fi&&$n>0)){if($n/=Fi,Fi<0){if($n<Yi)return;$n<Qi&&(Qi=$n)}else if(Fi>0){if($n>Qi)return;$n>Yi&&(Yi=$n)}if($n=ki-dn,!(!Fi&&$n<0)){if($n/=Fi,Fi<0){if($n>Qi)return;$n>Yi&&(Yi=$n)}else if(Fi>0){if($n<Yi)return;$n<Qi&&(Qi=$n)}return Yi>0&&(At[0]=Xi+Yi*on,At[1]=dn+Yi*Fi),Qi<1&&(Wt[0]=Xi+Qi*on,Wt[1]=dn+Qi*Fi),!0}}}}}var _n=1e9,ya=-_n;function Jn(At,Wt,Cr,Ar){function Kr(Nn,Yi){return At<=Nn&&Nn<=Cr&&Wt<=Yi&&Yi<=Ar}function ki(Nn,Yi,Qi,on){var Fi=0,$n=0;if(Nn==null||(Fi=Xi(Nn,Qi))!==($n=Xi(Yi,Qi))||wn(Nn,Yi)<0^Qi>0)do on.point(Fi===0||Fi===3?At:Cr,Fi>1?Ar:Wt);while((Fi=(Fi+Qi+4)%4)!==$n);else on.point(Yi[0],Yi[1])}function Xi(Nn,Yi){return v(Nn[0]-At)<o?Yi>0?0:3:v(Nn[0]-Cr)<o?Yi>0?2:1:v(Nn[1]-Wt)<o?Yi>0?1:0:Yi>0?3:2}function dn(Nn,Yi){return wn(Nn.x,Yi.x)}function wn(Nn,Yi){var Qi=Xi(Nn,1),on=Xi(Yi,1);return Qi!==on?Qi-on:Qi===0?Yi[1]-Nn[1]:Qi===1?Nn[0]-Yi[0]:Qi===2?Nn[1]-Yi[1]:Yi[0]-Nn[0]}return function(Nn){var Yi=Nn,Qi=ma(),on,Fi,$n,Ca,Ra,La,Na,Yn,Dn,Ka,bo,Xo={point:Ss,lineStart:ml,lineEnd:Ws,polygonStart:ws,polygonEnd:Ho};function Ss(va,no){Kr(va,no)&&Yi.point(va,no)}function as(){for(var va=0,no=0,ys=Fi.length;no<ys;++no)for(var rs=Fi[no],$l=1,Cu=rs.length,Yu=rs[0],Nc,pu,Uc=Yu[0],xu=Yu[1];$l<Cu;++$l)Nc=Uc,pu=xu,Yu=rs[$l],Uc=Yu[0],xu=Yu[1],pu<=Ar?xu>Ar&&(Uc-Nc)*(Ar-pu)>(xu-pu)*(At-Nc)&&++va:xu<=Ar&&(Uc-Nc)*(Ar-pu)<(xu-pu)*(At-Nc)&&--va;return va}function ws(){Yi=Qi,on=[],Fi=[],bo=!0}function Ho(){var va=as(),no=bo&&va,ys=(on=t.merge(on)).length;(no||ys)&&(Nn.polygonStart(),no&&(Nn.lineStart(),ki(null,null,1,Nn),Nn.lineEnd()),ys&&Wo(on,dn,va,ki,Nn),Nn.polygonEnd()),Yi=Nn,on=Fi=$n=null}function ml(){Xo.point=Ls,Fi&&Fi.push($n=[]),Ka=!0,Dn=!1,Na=Yn=NaN}function Ws(){on&&(Ls(Ca,Ra),La&&Dn&&Qi.rejoin(),on.push(Qi.result())),Xo.point=Ss,Dn&&Yi.lineEnd()}function Ls(va,no){var ys=Kr(va,no);if(Fi&&$n.push([va,no]),Ka)Ca=va,Ra=no,La=ys,Ka=!1,ys&&(Yi.lineStart(),Yi.point(va,no));else if(ys&&Dn)Yi.point(va,no);else{var rs=[Na=Math.max(ya,Math.min(_n,Na)),Yn=Math.max(ya,Math.min(_n,Yn))],$l=[va=Math.max(ya,Math.min(_n,va)),no=Math.max(ya,Math.min(_n,no))];Li(rs,$l,At,Wt,Cr,Ar)?(Dn||(Yi.lineStart(),Yi.point(rs[0],rs[1])),Yi.point($l[0],$l[1]),ys||Yi.lineEnd(),bo=!1):ys&&(Yi.lineStart(),Yi.point(va,no),bo=!1)}Na=va,Yn=no,Dn=ys}return Xo}}function Ma(){var At=0,Wt=0,Cr=960,Ar=500,Kr,ki,Xi;return Xi={stream:function(dn){return Kr&&ki===dn?Kr:Kr=Jn(At,Wt,Cr,Ar)(ki=dn)},extent:function(dn){return arguments.length?(At=+dn[0][0],Wt=+dn[0][1],Cr=+dn[1][0],Ar=+dn[1][1],Kr=ki=null,Xi):[[At,Wt],[Cr,Ar]]}}}var _o=r(),No,po,Lo,ko={sphere:O,point:O,lineStart:Ds,lineEnd:O,polygonStart:O,polygonEnd:O};function Ds(){ko.point=ll,ko.lineEnd=Fs}function Fs(){ko.point=ko.lineEnd=O}function ll(At,Wt){At*=d,Wt*=d,No=At,po=x(Wt),Lo=p(Wt),ko.point=ul}function ul(At,Wt){At*=d,Wt*=d;var Cr=x(Wt),Ar=p(Wt),Kr=v(At-No),ki=p(Kr),Xi=x(Kr),dn=Ar*Xi,wn=Lo*Cr-po*Ar*ki,Nn=po*Cr+Lo*Ar*ki;_o.add(b(M(dn*dn+wn*wn),Nn)),No=At,po=Cr,Lo=Ar}function zl(At){return _o.reset(),H(At,ko),+_o}var us=[null,null],il={type:"LineString",coordinates:us};function As(At,Wt){return us[0]=At,us[1]=Wt,zl(il)}var cl={Feature:function(At,Wt){return zs(At.geometry,Wt)},FeatureCollection:function(At,Wt){for(var Cr=At.features,Ar=-1,Kr=Cr.length;++Ar<Kr;)if(zs(Cr[Ar].geometry,Wt))return!0;return!1}},Ks={Sphere:function(){return!0},Point:function(At,Wt){return Io(At.coordinates,Wt)},MultiPoint:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)if(Io(Cr[Ar],Wt))return!0;return!1},LineString:function(At,Wt){return ls(At.coordinates,Wt)},MultiLineString:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)if(ls(Cr[Ar],Wt))return!0;return!1},Polygon:function(At,Wt){return Zl(At.coordinates,Wt)},MultiPolygon:function(At,Wt){for(var Cr=At.coordinates,Ar=-1,Kr=Cr.length;++Ar<Kr;)if(Zl(Cr[Ar],Wt))return!0;return!1},GeometryCollection:function(At,Wt){for(var Cr=At.geometries,Ar=-1,Kr=Cr.length;++Ar<Kr;)if(zs(Cr[Ar],Wt))return!0;return!1}};function zs(At,Wt){return At&&Ks.hasOwnProperty(At.type)?Ks[At.type](At,Wt):!1}function Io(At,Wt){return As(At,Wt)===0}function ls(At,Wt){for(var Cr,Ar,Kr,ki=0,Xi=At.length;ki<Xi;ki++){if(Ar=As(At[ki],Wt),Ar===0||ki>0&&(Kr=As(At[ki],At[ki-1]),Kr>0&&Cr<=Kr&&Ar<=Kr&&(Cr+Ar-Kr)*(1-Math.pow((Cr-Ar)/Kr,2))<s*Kr))return!0;Cr=Ar}return!1}function Zl(At,Wt){return!!vo(At.map(Su),nc(Wt))}function Su(At){return At=At.map(nc),At.pop(),At}function nc(At){return[At[0]*d,At[1]*d]}function bs(At,Wt){return(At&&cl.hasOwnProperty(At.type)?cl[At.type]:zs)(At,Wt)}function Rn(At,Wt,Cr){var Ar=t.range(At,Wt-o,Cr).concat(Wt);return function(Kr){return Ar.map(function(ki){return[Kr,ki]})}}function _a(At,Wt,Cr){var Ar=t.range(At,Wt-o,Cr).concat(Wt);return function(Kr){return Ar.map(function(ki){return[ki,Kr]})}}function Vu(){var At,Wt,Cr,Ar,Kr,ki,Xi,dn,wn=10,Nn=wn,Yi=90,Qi=360,on,Fi,$n,Ca,Ra=2.5;function La(){return{type:"MultiLineString",coordinates:Na()}}function Na(){return t.range(k(Ar/Yi)*Yi,Cr,Yi).map($n).concat(t.range(k(dn/Qi)*Qi,Xi,Qi).map(Ca)).concat(t.range(k(Wt/wn)*wn,At,wn).filter(function(Yn){return v(Yn%Yi)>o}).map(on)).concat(t.range(k(ki/Nn)*Nn,Kr,Nn).filter(function(Yn){return v(Yn%Qi)>o}).map(Fi))}return La.lines=function(){return Na().map(function(Yn){return{type:"LineString",coordinates:Yn}})},La.outline=function(){return{type:"Polygon",coordinates:[$n(Ar).concat(Ca(Xi).slice(1),$n(Cr).reverse().slice(1),Ca(dn).reverse().slice(1))]}},La.extent=function(Yn){return arguments.length?La.extentMajor(Yn).extentMinor(Yn):La.extentMinor()},La.extentMajor=function(Yn){return arguments.length?(Ar=+Yn[0][0],Cr=+Yn[1][0],dn=+Yn[0][1],Xi=+Yn[1][1],Ar>Cr&&(Yn=Ar,Ar=Cr,Cr=Yn),dn>Xi&&(Yn=dn,dn=Xi,Xi=Yn),La.precision(Ra)):[[Ar,dn],[Cr,Xi]]},La.extentMinor=function(Yn){return arguments.length?(Wt=+Yn[0][0],At=+Yn[1][0],ki=+Yn[0][1],Kr=+Yn[1][1],Wt>At&&(Yn=Wt,Wt=At,At=Yn),ki>Kr&&(Yn=ki,ki=Kr,Kr=Yn),La.precision(Ra)):[[Wt,ki],[At,Kr]]},La.step=function(Yn){return arguments.length?La.stepMajor(Yn).stepMinor(Yn):La.stepMinor()},La.stepMajor=function(Yn){return arguments.length?(Yi=+Yn[0],Qi=+Yn[1],La):[Yi,Qi]},La.stepMinor=function(Yn){return arguments.length?(wn=+Yn[0],Nn=+Yn[1],La):[wn,Nn]},La.precision=function(Yn){return arguments.length?(Ra=+Yn,on=Rn(ki,Kr,90),Fi=_a(Wt,At,Ra),$n=Rn(dn,Xi,90),Ca=_a(Ar,Cr,Ra),La):Ra},La.extentMajor([[-180,-90+o],[180,90-o]]).extentMinor([[-180,-80-o],[180,80+o]])}function Ol(){return Vu()()}function xo(At,Wt){var Cr=At[0]*d,Ar=At[1]*d,Kr=Wt[0]*d,ki=Wt[1]*d,Xi=p(Ar),dn=x(Ar),wn=p(ki),Nn=x(ki),Yi=Xi*p(Cr),Qi=Xi*x(Cr),on=wn*p(Kr),Fi=wn*x(Kr),$n=2*T(M(z(ki-Ar)+Xi*wn*z(Kr-Cr))),Ca=x($n),Ra=$n?function(La){var Na=x(La*=$n)/Ca,Yn=x($n-La)/Ca,Dn=Yn*Yi+Na*on,Ka=Yn*Qi+Na*Fi,bo=Yn*dn+Na*Nn;return[b(Ka,Dn)*h,b(bo,M(Dn*Dn+Ka*Ka))*h]}:function(){return[Cr*h,Ar*h]};return Ra.distance=$n,Ra}function Yl(At){return At}var Ns=r(),Hl=r(),ac,aa,Oo,qo,ql={point:O,lineStart:O,lineEnd:O,polygonStart:function(){ql.lineStart=Pc,ql.lineEnd=Uf},polygonEnd:function(){ql.lineStart=ql.lineEnd=ql.point=O,Ns.add(v(Hl)),Hl.reset()},result:function(){var At=Ns/2;return Ns.reset(),At}};function Pc(){ql.point=Do}function Do(At,Wt){ql.point=rf,ac=Oo=At,aa=qo=Wt}function rf(At,Wt){Hl.add(qo*At-Oo*Wt),Oo=At,qo=Wt}function Uf(){rf(ac,aa)}var pl=1/0,Zc=pl,Kl=-pl,Os=Kl,yu={point:oc,lineStart:O,lineEnd:O,polygonStart:O,polygonEnd:O,result:function(){var At=[[pl,Zc],[Kl,Os]];return Kl=Os=-(Zc=pl=1/0),At}};function oc(At,Wt){At<pl&&(pl=At),At>Kl&&(Kl=At),Wt<Zc&&(Zc=Wt),Wt>Os&&(Os=Wt)}var Cf=0,sc=0,Vh=0,Lf=0,cs=0,nf=0,Vf=0,Jl=0,fl=0,lc,Fu,Es,Hs,Go={point:ps,lineStart:uc,lineEnd:qs,polygonStart:function(){Go.lineStart=ad,Go.lineEnd=Po},polygonEnd:function(){Go.point=ps,Go.lineStart=uc,Go.lineEnd=qs},result:function(){var At=fl?[Vf/fl,Jl/fl]:nf?[Lf/nf,cs/nf]:Vh?[Cf/Vh,sc/Vh]:[NaN,NaN];return Cf=sc=Vh=Lf=cs=nf=Vf=Jl=fl=0,At}};function ps(At,Wt){Cf+=At,sc+=Wt,++Vh}function uc(){Go.point=xl}function xl(At,Wt){Go.point=Gu,ps(Es=At,Hs=Wt)}function Gu(At,Wt){var Cr=At-Es,Ar=Wt-Hs,Kr=M(Cr*Cr+Ar*Ar);Lf+=Kr*(Es+At)/2,cs+=Kr*(Hs+Wt)/2,nf+=Kr,ps(Es=At,Hs=Wt)}function qs(){Go.point=ps}function ad(){Go.point=od}function Po(){Yo(lc,Fu)}function od(At,Wt){Go.point=Yo,ps(lc=Es=At,Fu=Hs=Wt)}function Yo(At,Wt){var Cr=At-Es,Ar=Wt-Hs,Kr=M(Cr*Cr+Ar*Ar);Lf+=Kr*(Es+At)/2,cs+=Kr*(Hs+Wt)/2,nf+=Kr,Kr=Hs*At-Es*Wt,Vf+=Kr*(Es+At),Jl+=Kr*(Hs+Wt),fl+=Kr*3,ps(Es=At,Hs=Wt)}function Pa(At){this._context=At}Pa.prototype={_radius:4.5,pointRadius:function(At){return this._radius=At,this},polygonStart:function(){this._line=0},polygonEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){this._line===0&&this._context.closePath(),this._point=NaN},point:function(At,Wt){switch(this._point){case 0:{this._context.moveTo(At,Wt),this._point=1;break}case 1:{this._context.lineTo(At,Wt);break}default:{this._context.moveTo(At+this._radius,Wt),this._context.arc(At,Wt,this._radius,0,f);break}}},result:O};var af=r(),Hu,bl,Gf,Ic,yf,Bl={point:O,lineStart:function(){Bl.point=wh},lineEnd:function(){Hu&&Qf(bl,Gf),Bl.point=O},polygonStart:function(){Hu=!0},polygonEnd:function(){Hu=null},result:function(){var At=+af;return af.reset(),At}};function wh(At,Wt){Bl.point=Qf,bl=Ic=At,Gf=yf=Wt}function Qf(At,Wt){Ic-=At,yf-=Wt,af.add(M(Ic*Ic+yf*yf)),Ic=At,yf=Wt}function _f(){this._string=[]}_f.prototype={_radius:4.5,_circle:Yc(4.5),pointRadius:function(At){return(At=+At)!==this._radius&&(this._radius=At,this._circle=null),this},polygonStart:function(){this._line=0},polygonEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){this._line===0&&this._string.push("Z"),this._point=NaN},point:function(At,Wt){switch(this._point){case 0:{this._string.push("M",At,",",Wt),this._point=1;break}case 1:{this._string.push("L",At,",",Wt);break}default:{this._circle==null&&(this._circle=Yc(this._radius)),this._string.push("M",At,",",Wt,this._circle);break}}},result:function(){if(this._string.length){var At=this._string.join("");return this._string=[],At}else return null}};function Yc(At){return"m0,"+At+"a"+At+","+At+" 0 1,1 0,"+-2*At+"a"+At+","+At+" 0 1,1 0,"+2*At+"z"}function eh(At,Wt){var Cr=4.5,Ar,Kr;function ki(Xi){return Xi&&(typeof Cr=="function"&&Kr.pointRadius(+Cr.apply(this,arguments)),H(Xi,Ar(Kr))),Kr.result()}return ki.area=function(Xi){return H(Xi,Ar(ql)),ql.result()},ki.measure=function(Xi){return H(Xi,Ar(Bl)),Bl.result()},ki.bounds=function(Xi){return H(Xi,Ar(yu)),yu.result()},ki.centroid=function(Xi){return H(Xi,Ar(Go)),Go.result()},ki.projection=function(Xi){return arguments.length?(Ar=Xi==null?(At=null,Yl):(At=Xi).stream,ki):At},ki.context=function(Xi){return arguments.length?(Kr=Xi==null?(Wt=null,new _f):new Pa(Wt=Xi),typeof Cr!="function"&&Kr.pointRadius(Cr),ki):Wt},ki.pointRadius=function(Xi){return arguments.length?(Cr=typeof Xi=="function"?Xi:(Kr.pointRadius(+Xi),+Xi),ki):Cr},ki.projection(At).context(Wt)}function th(At){return{stream:ju(At)}}function ju(At){return function(Wt){var Cr=new Hf;for(var Ar in At)Cr[Ar]=At[Ar];return Cr.stream=Wt,Cr}}function Hf(){}Hf.prototype={constructor:Hf,point:function(At,Wt){this.stream.point(At,Wt)},sphere:function(){this.stream.sphere()},lineStart:function(){this.stream.lineStart()},lineEnd:function(){this.stream.lineEnd()},polygonStart:function(){this.stream.polygonStart()},polygonEnd:function(){this.stream.polygonEnd()}};function cc(At,Wt,Cr){var Ar=At.clipExtent&&At.clipExtent();return At.scale(150).translate([0,0]),Ar!=null&&At.clipExtent(null),H(Cr,At.stream(yu)),Wt(yu.result()),Ar!=null&&At.clipExtent(Ar),At}function of(At,Wt,Cr){return cc(At,function(Ar){var Kr=Wt[1][0]-Wt[0][0],ki=Wt[1][1]-Wt[0][1],Xi=Math.min(Kr/(Ar[1][0]-Ar[0][0]),ki/(Ar[1][1]-Ar[0][1])),dn=+Wt[0][0]+(Kr-Xi*(Ar[1][0]+Ar[0][0]))/2,wn=+Wt[0][1]+(ki-Xi*(Ar[1][1]+Ar[0][1]))/2;At.scale(150*Xi).translate([dn,wn])},Cr)}function Nl(At,Wt,Cr){return of(At,[[0,0],Wt],Cr)}function Kc(At,Wt,Cr){return cc(At,function(Ar){var Kr=+Wt,ki=Kr/(Ar[1][0]-Ar[0][0]),Xi=(Kr-ki*(Ar[1][0]+Ar[0][0]))/2,dn=-ki*Ar[0][1];At.scale(150*ki).translate([Xi,dn])},Cr)}function Rc(At,Wt,Cr){return cc(At,function(Ar){var Kr=+Wt,ki=Kr/(Ar[1][1]-Ar[0][1]),Xi=-ki*Ar[0][0],dn=(Kr-ki*(Ar[1][1]+Ar[0][1]))/2;At.scale(150*ki).translate([Xi,dn])},Cr)}var gs=16,jf=p(30*d);function Gh(At,Wt){return+Wt?sf(At,Wt):rh(At)}function rh(At){return ju({point:function(Wt,Cr){Wt=At(Wt,Cr),this.stream.point(Wt[0],Wt[1])}})}function sf(At,Wt){function Cr(Ar,Kr,ki,Xi,dn,wn,Nn,Yi,Qi,on,Fi,$n,Ca,Ra){var La=Nn-Ar,Na=Yi-Kr,Yn=La*La+Na*Na;if(Yn>4*Wt&&Ca--){var Dn=Xi+on,Ka=dn+Fi,bo=wn+$n,Xo=M(Dn*Dn+Ka*Ka+bo*bo),Ss=T(bo/=Xo),as=v(v(bo)-1)<o||v(ki-Qi)<o?(ki+Qi)/2:b(Ka,Dn),ws=At(as,Ss),Ho=ws[0],ml=ws[1],Ws=Ho-Ar,Ls=ml-Kr,va=Na*Ws-La*Ls;(va*va/Yn>Wt||v((La*Ws+Na*Ls)/Yn-.5)>.3||Xi*on+dn*Fi+wn*$n<jf)&&(Cr(Ar,Kr,ki,Xi,dn,wn,Ho,ml,as,Dn/=Xo,Ka/=Xo,bo,Ca,Ra),Ra.point(Ho,ml),Cr(Ho,ml,as,Dn,Ka,bo,Nn,Yi,Qi,on,Fi,$n,Ca,Ra))}}return function(Ar){var Kr,ki,Xi,dn,wn,Nn,Yi,Qi,on,Fi,$n,Ca,Ra={point:La,lineStart:Na,lineEnd:Dn,polygonStart:function(){Ar.polygonStart(),Ra.lineStart=Ka},polygonEnd:function(){Ar.polygonEnd(),Ra.lineStart=Na}};function La(Ss,as){Ss=At(Ss,as),Ar.point(Ss[0],Ss[1])}function Na(){Qi=NaN,Ra.point=Yn,Ar.lineStart()}function Yn(Ss,as){var ws=ce([Ss,as]),Ho=At(Ss,as);Cr(Qi,on,Yi,Fi,$n,Ca,Qi=Ho[0],on=Ho[1],Yi=Ss,Fi=ws[0],$n=ws[1],Ca=ws[2],gs,Ar),Ar.point(Qi,on)}function Dn(){Ra.point=La,Ar.lineEnd()}function Ka(){Na(),Ra.point=bo,Ra.lineEnd=Xo}function bo(Ss,as){Yn(Kr=Ss,as),ki=Qi,Xi=on,dn=Fi,wn=$n,Nn=Ca,Ra.point=Yn}function Xo(){Cr(Qi,on,Yi,Fi,$n,Ca,ki,Xi,Kr,dn,wn,Nn,gs,Ar),Ra.lineEnd=Dn,Dn()}return Ra}}var Th=ju({point:function(At,Wt){this.stream.point(At*d,Wt*d)}});function Mu(At){return ju({point:function(Wt,Cr){var Ar=At(Wt,Cr);return this.stream.point(Ar[0],Ar[1])}})}function ih(At,Wt,Cr,Ar,Kr){function ki(Xi,dn){return Xi*=Ar,dn*=Kr,[Wt+At*Xi,Cr-At*dn]}return ki.invert=function(Xi,dn){return[(Xi-Wt)/At*Ar,(Cr-dn)/At*Kr]},ki}function js(At,Wt,Cr,Ar,Kr,ki){var Xi=p(ki),dn=x(ki),wn=Xi*At,Nn=dn*At,Yi=Xi/At,Qi=dn/At,on=(dn*Cr-Xi*Wt)/At,Fi=(dn*Wt+Xi*Cr)/At;function $n(Ca,Ra){return Ca*=Ar,Ra*=Kr,[wn*Ca-Nn*Ra+Wt,Cr-Nn*Ca-wn*Ra]}return $n.invert=function(Ca,Ra){return[Ar*(Yi*Ca-Qi*Ra+on),Kr*(Fi-Qi*Ca-Yi*Ra)]},$n}function Eu(At){return Dc(function(){return At})()}function Dc(At){var Wt,Cr=150,Ar=480,Kr=250,ki=0,Xi=0,dn=0,wn=0,Nn=0,Yi,Qi=0,on=1,Fi=1,$n=null,Ca=Jr,Ra=null,La,Na,Yn,Dn=Yl,Ka=.5,bo,Xo,Ss,as,ws;function Ho(va){return Ss(va[0]*d,va[1]*d)}function ml(va){return va=Ss.invert(va[0],va[1]),va&&[va[0]*h,va[1]*h]}Ho.stream=function(va){return as&&ws===va?as:as=Th(Mu(Yi)(Ca(bo(Dn(ws=va)))))},Ho.preclip=function(va){return arguments.length?(Ca=va,$n=void 0,Ls()):Ca},Ho.postclip=function(va){return arguments.length?(Dn=va,Ra=La=Na=Yn=null,Ls()):Dn},Ho.clipAngle=function(va){return arguments.length?(Ca=+va?Mn($n=va*d):($n=null,Jr),Ls()):$n*h},Ho.clipExtent=function(va){return arguments.length?(Dn=va==null?(Ra=La=Na=Yn=null,Yl):Jn(Ra=+va[0][0],La=+va[0][1],Na=+va[1][0],Yn=+va[1][1]),Ls()):Ra==null?null:[[Ra,La],[Na,Yn]]},Ho.scale=function(va){return arguments.length?(Cr=+va,Ws()):Cr},Ho.translate=function(va){return arguments.length?(Ar=+va[0],Kr=+va[1],Ws()):[Ar,Kr]},Ho.center=function(va){return arguments.length?(ki=va[0]%360*d,Xi=va[1]%360*d,Ws()):[ki*h,Xi*h]},Ho.rotate=function(va){return arguments.length?(dn=va[0]%360*d,wn=va[1]%360*d,Nn=va.length>2?va[2]%360*d:0,Ws()):[dn*h,wn*h,Nn*h]},Ho.angle=function(va){return arguments.length?(Qi=va%360*d,Ws()):Qi*h},Ho.reflectX=function(va){return arguments.length?(on=va?-1:1,Ws()):on<0},Ho.reflectY=function(va){return arguments.length?(Fi=va?-1:1,Ws()):Fi<0},Ho.precision=function(va){return arguments.length?(bo=Gh(Xo,Ka=va*va),Ls()):M(Ka)},Ho.fitExtent=function(va,no){return of(Ho,va,no)},Ho.fitSize=function(va,no){return Nl(Ho,va,no)},Ho.fitWidth=function(va,no){return Kc(Ho,va,no)},Ho.fitHeight=function(va,no){return Rc(Ho,va,no)};function Ws(){var va=js(Cr,0,0,on,Fi,Qi).apply(null,Wt(ki,Xi)),no=(Qi?js:ih)(Cr,Ar-va[0],Kr-va[1],on,Fi,Qi);return Yi=Oi(dn,wn,Nn),Xo=ri(Wt,no),Ss=ri(Yi,Xo),bo=Gh(Xo,Ka),Ls()}function Ls(){return as=ws=null,Ho}return function(){return Wt=At.apply(this,arguments),Ho.invert=Wt.invert&&ml,Ws()}}function ks(At){var Wt=0,Cr=l/3,Ar=Dc(At),Kr=Ar(Wt,Cr);return Kr.parallels=function(ki){return arguments.length?Ar(Wt=ki[0]*d,Cr=ki[1]*d):[Wt*h,Cr*h]},Kr}function bc(At){var Wt=p(At);function Cr(Ar,Kr){return[Ar*Wt,x(Kr)/Wt]}return Cr.invert=function(Ar,Kr){return[Ar/Wt,T(Kr*Wt)]},Cr}function hu(At,Wt){var Cr=x(At),Ar=(Cr+x(Wt))/2;if(v(Ar)<o)return bc(At);var Kr=1+Cr*(2*Ar-Cr),ki=M(Kr)/Ar;function Xi(dn,wn){var Nn=M(Kr-2*Ar*x(wn))/Ar;return[Nn*x(dn*=Ar),ki-Nn*p(dn)]}return Xi.invert=function(dn,wn){var Nn=ki-wn,Yi=b(dn,v(Nn))*C(Nn);return Nn*Ar<0&&(Yi-=l*C(dn)*C(Nn)),[Yi/Ar,T((Kr-(dn*dn+Nn*Nn)*Ar*Ar)/(2*Ar))]},Xi}function _u(){return ks(hu).scale(155.424).center([0,33.6442])}function nl(){return _u().parallels([29.5,45.5]).scale(1070).translate([480,250]).rotate([96,0]).center([-.6,38.7])}function nh(At){var Wt=At.length;return{point:function(Cr,Ar){for(var Kr=-1;++Kr<Wt;)At[Kr].point(Cr,Ar)},sphere:function(){for(var Cr=-1;++Cr<Wt;)At[Cr].sphere()},lineStart:function(){for(var Cr=-1;++Cr<Wt;)At[Cr].lineStart()},lineEnd:function(){for(var Cr=-1;++Cr<Wt;)At[Cr].lineEnd()},polygonStart:function(){for(var Cr=-1;++Cr<Wt;)At[Cr].polygonStart()},polygonEnd:function(){for(var Cr=-1;++Cr<Wt;)At[Cr].polygonEnd()}}}function Ah(){var At,Wt,Cr=nl(),Ar,Kr=_u().rotate([154,0]).center([-2,58.5]).parallels([55,65]),ki,Xi=_u().rotate([157,0]).center([-3,19.9]).parallels([8,18]),dn,wn,Nn={point:function(on,Fi){wn=[on,Fi]}};function Yi(on){var Fi=on[0],$n=on[1];return wn=null,Ar.point(Fi,$n),wn||(ki.point(Fi,$n),wn)||(dn.point(Fi,$n),wn)}Yi.invert=function(on){var Fi=Cr.scale(),$n=Cr.translate(),Ca=(on[0]-$n[0])/Fi,Ra=(on[1]-$n[1])/Fi;return(Ra>=.12&&Ra<.234&&Ca>=-.425&&Ca<-.214?Kr:Ra>=.166&&Ra<.234&&Ca>=-.214&&Ca<-.115?Xi:Cr).invert(on)},Yi.stream=function(on){return At&&Wt===on?At:At=nh([Cr.stream(Wt=on),Kr.stream(on),Xi.stream(on)])},Yi.precision=function(on){return arguments.length?(Cr.precision(on),Kr.precision(on),Xi.precision(on),Qi()):Cr.precision()},Yi.scale=function(on){return arguments.length?(Cr.scale(on),Kr.scale(on*.35),Xi.scale(on),Yi.translate(Cr.translate())):Cr.scale()},Yi.translate=function(on){if(!arguments.length)return Cr.translate();var Fi=Cr.scale(),$n=+on[0],Ca=+on[1];return Ar=Cr.translate(on).clipExtent([[$n-.455*Fi,Ca-.238*Fi],[$n+.455*Fi,Ca+.238*Fi]]).stream(Nn),ki=Kr.translate([$n-.307*Fi,Ca+.201*Fi]).clipExtent([[$n-.425*Fi+o,Ca+.12*Fi+o],[$n-.214*Fi-o,Ca+.234*Fi-o]]).stream(Nn),dn=Xi.translate([$n-.205*Fi,Ca+.212*Fi]).clipExtent([[$n-.214*Fi+o,Ca+.166*Fi+o],[$n-.115*Fi-o,Ca+.234*Fi-o]]).stream(Nn),Qi()},Yi.fitExtent=function(on,Fi){return of(Yi,on,Fi)},Yi.fitSize=function(on,Fi){return Nl(Yi,on,Fi)},Yi.fitWidth=function(on,Fi){return Kc(Yi,on,Fi)},Yi.fitHeight=function(on,Fi){return Rc(Yi,on,Fi)};function Qi(){return At=Wt=null,Yi}return Yi.scale(1070)}function zu(At){return function(Wt,Cr){var Ar=p(Wt),Kr=p(Cr),ki=At(Ar*Kr);return[ki*Kr*x(Wt),ki*x(Cr)]}}function Fc(At){return function(Wt,Cr){var Ar=M(Wt*Wt+Cr*Cr),Kr=At(Ar),ki=x(Kr),Xi=p(Kr);return[b(Wt*ki,Ar*Xi),T(Ar&&Cr*ki/Ar)]}}var wc=zu(function(At){return M(2/(1+At))});wc.invert=Fc(function(At){return 2*T(At/2)});function bd(){return Eu(wc).scale(124.75).clipAngle(180-.001)}var xf=zu(function(At){return(At=P(At))&&At/x(At)});xf.invert=Fc(function(At){return At});function Pf(){return Eu(xf).scale(79.4188).clipAngle(180-.001)}function Ou(At,Wt){return[At,S(g((u+Wt)/2))]}Ou.invert=function(At,Wt){return[At,2*_(E(Wt))-u]};function bf(){return jl(Ou).scale(961/f)}function jl(At){var Wt=Eu(At),Cr=Wt.center,Ar=Wt.scale,Kr=Wt.translate,ki=Wt.clipExtent,Xi=null,dn,wn,Nn;Wt.scale=function(Qi){return arguments.length?(Ar(Qi),Yi()):Ar()},Wt.translate=function(Qi){return arguments.length?(Kr(Qi),Yi()):Kr()},Wt.center=function(Qi){return arguments.length?(Cr(Qi),Yi()):Cr()},Wt.clipExtent=function(Qi){return arguments.length?(Qi==null?Xi=dn=wn=Nn=null:(Xi=+Qi[0][0],dn=+Qi[0][1],wn=+Qi[1][0],Nn=+Qi[1][1]),Yi()):Xi==null?null:[[Xi,dn],[wn,Nn]]};function Yi(){var Qi=l*Ar(),on=Wt(yn(Wt.rotate()).invert([0,0]));return ki(Xi==null?[[on[0]-Qi,on[1]-Qi],[on[0]+Qi,on[1]+Qi]]:At===Ou?[[Math.max(on[0]-Qi,Xi),dn],[Math.min(on[0]+Qi,wn),Nn]]:[[Xi,Math.max(on[1]-Qi,dn)],[wn,Math.min(on[1]+Qi,Nn)]])}return Yi()}function lf(At){return g((u+At)/2)}function Hh(At,Wt){var Cr=p(At),Ar=At===Wt?x(At):S(Cr/p(Wt))/S(lf(Wt)/lf(At)),Kr=Cr*L(lf(At),Ar)/Ar;if(!Ar)return Ou;function ki(Xi,dn){Kr>0?dn<-u+o&&(dn=-u+o):dn>u-o&&(dn=u-o);var wn=Kr/L(lf(dn),Ar);return[wn*x(Ar*Xi),Kr-wn*p(Ar*Xi)]}return ki.invert=function(Xi,dn){var wn=Kr-dn,Nn=C(Ar)*M(Xi*Xi+wn*wn),Yi=b(Xi,v(wn))*C(wn);return wn*Ar<0&&(Yi-=l*C(Xi)*C(wn)),[Yi/Ar,2*_(L(Kr/Nn,1/Ar))-u]},ki}function If(){return ks(Hh).scale(109.5).parallels([30,30])}function Cs(At,Wt){return[At,Wt]}Cs.invert=Cs;function du(){return Eu(Cs).scale(152.63)}function ku(At,Wt){var Cr=p(At),Ar=At===Wt?x(At):(Cr-p(Wt))/(Wt-At),Kr=Cr/Ar+At;if(v(Ar)<o)return Cs;function ki(Xi,dn){var wn=Kr-dn,Nn=Ar*Xi;return[wn*x(Nn),Kr-wn*p(Nn)]}return ki.invert=function(Xi,dn){var wn=Kr-dn,Nn=b(Xi,v(wn))*C(wn);return wn*Ar<0&&(Nn-=l*C(Xi)*C(wn)),[Nn/Ar,Kr-C(Ar)*M(Xi*Xi+wn*wn)]},ki}function Wf(){return ks(ku).scale(131.154).center([0,13.9389])}var Us=1.340264,wf=-.081106,zc=893e-6,Wu=.003796,Rf=M(3)/2,Xu=12;function uf(At,Wt){var Cr=T(Rf*x(Wt)),Ar=Cr*Cr,Kr=Ar*Ar*Ar;return[At*p(Cr)/(Rf*(Us+3*wf*Ar+Kr*(7*zc+9*Wu*Ar))),Cr*(Us+wf*Ar+Kr*(zc+Wu*Ar))]}uf.invert=function(At,Wt){for(var Cr=Wt,Ar=Cr*Cr,Kr=Ar*Ar*Ar,ki=0,Xi,dn,wn;ki<Xu&&(dn=Cr*(Us+wf*Ar+Kr*(zc+Wu*Ar))-Wt,wn=Us+3*wf*Ar+Kr*(7*zc+9*Wu*Ar),Cr-=Xi=dn/wn,Ar=Cr*Cr,Kr=Ar*Ar*Ar,!(v(Xi)<s));++ki);return[Rf*At*(Us+3*wf*Ar+Kr*(7*zc+9*Wu*Ar))/p(Cr),T(x(Cr)/Rf)]};function Xf(){return Eu(uf).scale(177.158)}function Wl(At,Wt){var Cr=p(Wt),Ar=p(At)*Cr;return[Cr*x(At)/Ar,x(Wt)/Ar]}Wl.invert=Fc(_);function ah(){return Eu(Wl).scale(144.049).clipAngle(60)}function Zu(){var At=1,Wt=0,Cr=0,Ar=1,Kr=1,ki=0,Xi,dn,wn=null,Nn,Yi,Qi,on=1,Fi=1,$n=ju({point:function(Dn,Ka){var bo=Yn([Dn,Ka]);this.stream.point(bo[0],bo[1])}}),Ca=Yl,Ra,La;function Na(){return on=At*Ar,Fi=At*Kr,Ra=La=null,Yn}function Yn(Dn){var Ka=Dn[0]*on,bo=Dn[1]*Fi;if(ki){var Xo=bo*Xi-Ka*dn;Ka=Ka*Xi+bo*dn,bo=Xo}return[Ka+Wt,bo+Cr]}return Yn.invert=function(Dn){var Ka=Dn[0]-Wt,bo=Dn[1]-Cr;if(ki){var Xo=bo*Xi+Ka*dn;Ka=Ka*Xi-bo*dn,bo=Xo}return[Ka/on,bo/Fi]},Yn.stream=function(Dn){return Ra&&La===Dn?Ra:Ra=$n(Ca(La=Dn))},Yn.postclip=function(Dn){return arguments.length?(Ca=Dn,wn=Nn=Yi=Qi=null,Na()):Ca},Yn.clipExtent=function(Dn){return arguments.length?(Ca=Dn==null?(wn=Nn=Yi=Qi=null,Yl):Jn(wn=+Dn[0][0],Nn=+Dn[0][1],Yi=+Dn[1][0],Qi=+Dn[1][1]),Na()):wn==null?null:[[wn,Nn],[Yi,Qi]]},Yn.scale=function(Dn){return arguments.length?(At=+Dn,Na()):At},Yn.translate=function(Dn){return arguments.length?(Wt=+Dn[0],Cr=+Dn[1],Na()):[Wt,Cr]},Yn.angle=function(Dn){return arguments.length?(ki=Dn%360*d,dn=x(ki),Xi=p(ki),Na()):ki*h},Yn.reflectX=function(Dn){return arguments.length?(Ar=Dn?-1:1,Na()):Ar<0},Yn.reflectY=function(Dn){return arguments.length?(Kr=Dn?-1:1,Na()):Kr<0},Yn.fitExtent=function(Dn,Ka){return of(Yn,Dn,Ka)},Yn.fitSize=function(Dn,Ka){return Nl(Yn,Dn,Ka)},Yn.fitWidth=function(Dn,Ka){return Kc(Yn,Dn,Ka)},Yn.fitHeight=function(Dn,Ka){return Rc(Yn,Dn,Ka)},Yn}function Oc(At,Wt){var Cr=Wt*Wt,Ar=Cr*Cr;return[At*(.8707-.131979*Cr+Ar*(-.013791+Ar*(.003971*Cr-.001529*Ar))),Wt*(1.007226+Cr*(.015085+Ar*(-.044475+.028874*Cr-.005916*Ar)))]}Oc.invert=function(At,Wt){var Cr=Wt,Ar=25,Kr;do{var ki=Cr*Cr,Xi=ki*ki;Cr-=Kr=(Cr*(1.007226+ki*(.015085+Xi*(-.044475+.028874*ki-.005916*Xi)))-Wt)/(1.007226+ki*(.015085*3+Xi*(-.044475*7+.028874*9*ki-.005916*11*Xi)))}while(v(Kr)>o&&--Ar>0);return[At/(.8707+(ki=Cr*Cr)*(-.131979+ki*(-.013791+ki*ki*ki*(.003971-.001529*ki)))),Cr]};function Tc(){return Eu(Oc).scale(175.295)}function wl(At,Wt){return[p(Wt)*x(At),x(Wt)]}wl.invert=Fc(T);function vu(){return Eu(wl).scale(249.5).clipAngle(90+o)}function qc(At,Wt){var Cr=p(Wt),Ar=1+p(At)*Cr;return[Cr*x(At)/Ar,x(Wt)/Ar]}qc.invert=Fc(function(At){return 2*_(At)});function cf(){return Eu(qc).scale(250).clipAngle(142)}function fc(At,Wt){return[S(g((u+Wt)/2)),-At]}fc.invert=function(At,Wt){return[-Wt,2*_(E(At))-u]};function Bc(){var At=jl(fc),Wt=At.center,Cr=At.rotate;return At.center=function(Ar){return arguments.length?Wt([-Ar[1],Ar[0]]):(Ar=Wt(),[Ar[1],-Ar[0]])},At.rotate=function(Ar){return arguments.length?Cr([Ar[0],Ar[1],Ar.length>2?Ar[2]+90:90]):(Ar=Cr(),[Ar[0],Ar[1],Ar[2]-90])},Cr([0,0,90]).scale(159.155)}e.geoAlbers=nl,e.geoAlbersUsa=Ah,e.geoArea=ge,e.geoAzimuthalEqualArea=bd,e.geoAzimuthalEqualAreaRaw=wc,e.geoAzimuthalEquidistant=Pf,e.geoAzimuthalEquidistantRaw=xf,e.geoBounds=Ge,e.geoCentroid=Er,e.geoCircle=ua,e.geoClipAntimeridian=Jr,e.geoClipCircle=Mn,e.geoClipExtent=Ma,e.geoClipRectangle=Jn,e.geoConicConformal=If,e.geoConicConformalRaw=Hh,e.geoConicEqualArea=_u,e.geoConicEqualAreaRaw=hu,e.geoConicEquidistant=Wf,e.geoConicEquidistantRaw=ku,e.geoContains=bs,e.geoDistance=As,e.geoEqualEarth=Xf,e.geoEqualEarthRaw=uf,e.geoEquirectangular=du,e.geoEquirectangularRaw=Cs,e.geoGnomonic=ah,e.geoGnomonicRaw=Wl,e.geoGraticule=Vu,e.geoGraticule10=Ol,e.geoIdentity=Zu,e.geoInterpolate=xo,e.geoLength=zl,e.geoMercator=bf,e.geoMercatorRaw=Ou,e.geoNaturalEarth1=Tc,e.geoNaturalEarth1Raw=Oc,e.geoOrthographic=vu,e.geoOrthographicRaw=wl,e.geoPath=eh,e.geoProjection=Eu,e.geoProjectionMutator=Dc,e.geoRotation=yn,e.geoStereographic=cf,e.geoStereographicRaw=qc,e.geoStream=H,e.geoTransform=th,e.geoTransverseMercator=Bc,e.geoTransverseMercatorRaw=fc,Object.defineProperty(e,"__esModule",{value:!0})})});var PDe=ye((PF,LDe)=>{(function(e,t){typeof PF=="object"&&typeof LDe!="undefined"?t(PF,RZ(),nk()):t(e.d3=e.d3||{},e.d3,e.d3)})(PF,function(e,t,r){"use strict";var n=Math.abs,i=Math.atan,a=Math.atan2,o=Math.cos,s=Math.exp,l=Math.floor,u=Math.log,c=Math.max,f=Math.min,h=Math.pow,d=Math.round,v=Math.sign||function(ve){return ve>0?1:ve<0?-1:0},_=Math.sin,b=Math.tan,p=1e-6,k=1e-12,E=Math.PI,S=E/2,L=E/4,x=Math.SQRT1_2,C=G(2),M=G(E),g=E*2,P=180/E,T=E/180;function z(ve){return ve?ve/Math.sin(ve):1}function O(ve){return ve>1?S:ve<-1?-S:Math.asin(ve)}function V(ve){return ve>1?0:ve<-1?E:Math.acos(ve)}function G(ve){return ve>0?Math.sqrt(ve):0}function Z(ve){return ve=s(2*ve),(ve-1)/(ve+1)}function j(ve){return(s(ve)-s(-ve))/2}function N(ve){return(s(ve)+s(-ve))/2}function H(ve){return u(ve+G(ve*ve+1))}function te(ve){return u(ve+G(ve*ve-1))}function oe(ve){var be=b(ve/2),De=2*u(o(ve/2))/(be*be);function Be(et,We){var it=o(et),Ft=o(We),Ht=_(We),tr=Ft*it,dr=-((1-tr?u((1+tr)/2)/(1-tr):-.5)+De/(1+tr));return[dr*Ft*_(et),dr*Ht]}return Be.invert=function(et,We){var it=G(et*et+We*We),Ft=-ve/2,Ht=50,tr;if(!it)return[0,0];do{var dr=Ft/2,Sr=o(dr),Or=_(dr),Wr=Or/Sr,ni=-u(n(Sr));Ft-=tr=(2/Wr*ni-De*Wr-it)/(-ni/(Or*Or)+1-De/(2*Sr*Sr))*(Sr<0?.7:1)}while(n(tr)>p&&--Ht>0);var Pi=_(Ft);return[a(et*Pi,it*o(Ft)),O(We*Pi/it)]},Be}function _e(){var ve=S,be=t.geoProjectionMutator(oe),De=be(ve);return De.radius=function(Be){return arguments.length?be(ve=Be*T):ve*P},De.scale(179.976).clipAngle(147)}function Ee(ve,be){var De=o(be),Be=z(V(De*o(ve/=2)));return[2*De*_(ve)*Be,_(be)*Be]}Ee.invert=function(ve,be){if(!(ve*ve+4*be*be>E*E+p)){var De=ve,Be=be,et=25;do{var We=_(De),it=_(De/2),Ft=o(De/2),Ht=_(Be),tr=o(Be),dr=_(2*Be),Sr=Ht*Ht,Or=tr*tr,Wr=it*it,ni=1-Or*Ft*Ft,Pi=ni?V(tr*Ft)*G(cn=1/ni):cn=0,cn,ln=2*Pi*tr*it-ve,Cn=Pi*Ht-be,Kn=cn*(Or*Wr+Pi*tr*Ft*Sr),Ta=cn*(.5*We*dr-Pi*2*Ht*it),fa=cn*.25*(dr*it-Pi*Ht*Or*We),$a=cn*(Sr*Ft+Pi*Wr*tr),Co=Ta*fa-$a*Kn;if(!Co)break;var Qa=(Cn*Ta-ln*$a)/Co,mo=(ln*fa-Cn*Kn)/Co;De-=Qa,Be-=mo}while((n(Qa)>p||n(mo)>p)&&--et>0);return[De,Be]}};function Ce(){return t.geoProjection(Ee).scale(152.63)}function me(ve){var be=_(ve),De=o(ve),Be=ve>=0?1:-1,et=b(Be*ve),We=(1+be-De)/2;function it(Ft,Ht){var tr=o(Ht),dr=o(Ft/=2);return[(1+tr)*_(Ft),(Be*Ht>-a(dr,et)-.001?0:-Be*10)+We+_(Ht)*De-(1+tr)*be*dr]}return it.invert=function(Ft,Ht){var tr=0,dr=0,Sr=50;do{var Or=o(tr),Wr=_(tr),ni=o(dr),Pi=_(dr),cn=1+ni,ln=cn*Wr-Ft,Cn=We+Pi*De-cn*be*Or-Ht,Kn=cn*Or/2,Ta=-Wr*Pi,fa=be*cn*Wr/2,$a=De*ni+be*Or*Pi,Co=Ta*fa-$a*Kn,Qa=(Cn*Ta-ln*$a)/Co/2,mo=(ln*fa-Cn*Kn)/Co;n(mo)>2&&(mo/=2),tr-=Qa,dr-=mo}while((n(Qa)>p||n(mo)>p)&&--Sr>0);return Be*dr>-a(o(tr),et)-.001?[tr*2,dr]:null},it}function ie(){var ve=20*T,be=ve>=0?1:-1,De=b(be*ve),Be=t.geoProjectionMutator(me),et=Be(ve),We=et.stream;return et.parallel=function(it){return arguments.length?(De=b((be=(ve=it*T)>=0?1:-1)*ve),Be(ve)):ve*P},et.stream=function(it){var Ft=et.rotate(),Ht=We(it),tr=(et.rotate([0,0]),We(it)),dr=et.precision();return et.rotate(Ft),Ht.sphere=function(){tr.polygonStart(),tr.lineStart();for(var Sr=be*-180;be*Sr<180;Sr+=be*90)tr.point(Sr,be*90);if(ve)for(;be*(Sr-=3*be*dr)>=-180;)tr.point(Sr,be*-a(o(Sr*T/2),De)*P);tr.lineEnd(),tr.polygonEnd()},Ht},et.scale(218.695).center([0,28.0974])}function Se(ve,be){var De=b(be/2),Be=G(1-De*De),et=1+Be*o(ve/=2),We=_(ve)*Be/et,it=De/et,Ft=We*We,Ht=it*it;return[4/3*We*(3+Ft-3*Ht),4/3*it*(3+3*Ft-Ht)]}Se.invert=function(ve,be){if(ve*=3/8,be*=3/8,!ve&&n(be)>1)return null;var De=ve*ve,Be=be*be,et=1+De+Be,We=G((et-G(et*et-4*be*be))/2),it=O(We)/3,Ft=We?te(n(be/We))/3:H(n(ve))/3,Ht=o(it),tr=N(Ft),dr=tr*tr-Ht*Ht;return[v(ve)*2*a(j(Ft)*Ht,.25-dr),v(be)*2*a(tr*_(it),.25+dr)]};function Le(){return t.geoProjection(Se).scale(66.1603)}var Ae=G(8),Fe=u(1+C);function Pe(ve,be){var De=n(be);return De<L?[ve,u(b(L+be/2))]:[ve*o(De)*(2*C-1/_(De)),v(be)*(2*C*(De-L)-u(b(De/2)))]}Pe.invert=function(ve,be){if((We=n(be))<Fe)return[ve,2*i(s(be))-S];var De=L,Be=25,et,We;do{var it=o(De/2),Ft=b(De/2);De-=et=(Ae*(De-L)-u(Ft)-We)/(Ae-it*it/(2*Ft))}while(n(et)>k&&--Be>0);return[ve/(o(De)*(Ae-1/_(De))),v(be)*De]};function ge(){return t.geoProjection(Pe).scale(112.314)}function Re(ve){var be=2*E/ve;function De(Be,et){var We=t.geoAzimuthalEquidistantRaw(Be,et);if(n(Be)>S){var it=a(We[1],We[0]),Ft=G(We[0]*We[0]+We[1]*We[1]),Ht=be*d((it-S)/be)+S,tr=a(_(it-=Ht),2-o(it));it=Ht+O(E/Ft*_(tr))-tr,We[0]=Ft*o(it),We[1]=Ft*_(it)}return We}return De.invert=function(Be,et){var We=G(Be*Be+et*et);if(We>S){var it=a(et,Be),Ft=be*d((it-S)/be)+S,Ht=it>Ft?-1:1,tr=We*o(Ft-it),dr=1/b(Ht*V((tr-E)/G(E*(E-2*tr)+We*We)));it=Ft+2*i((dr+Ht*G(dr*dr-3))/3),Be=We*o(it),et=We*_(it)}return t.geoAzimuthalEquidistantRaw.invert(Be,et)},De}function ce(){var ve=5,be=t.geoProjectionMutator(Re),De=be(ve),Be=De.stream,et=.01,We=-o(et*T),it=_(et*T);return De.lobes=function(Ft){return arguments.length?be(ve=+Ft):ve},De.stream=function(Ft){var Ht=De.rotate(),tr=Be(Ft),dr=(De.rotate([0,0]),Be(Ft));return De.rotate(Ht),tr.sphere=function(){dr.polygonStart(),dr.lineStart();for(var Sr=0,Or=360/ve,Wr=2*E/ve,ni=90-180/ve,Pi=S;Sr<ve;++Sr,ni-=Or,Pi-=Wr)dr.point(a(it*o(Pi),We)*P,O(it*_(Pi))*P),ni<-90?(dr.point(-90,-180-ni-et),dr.point(-90,-180-ni+et)):(dr.point(90,ni+et),dr.point(90,ni-et));dr.lineEnd(),dr.polygonEnd()},tr},De.scale(87.8076).center([0,17.1875]).clipAngle(180-.001)}function Ze(ve,be){if(arguments.length<2&&(be=ve),be===1)return t.geoAzimuthalEqualAreaRaw;if(be===1/0)return ut;function De(Be,et){var We=t.geoAzimuthalEqualAreaRaw(Be/be,et);return We[0]*=ve,We}return De.invert=function(Be,et){var We=t.geoAzimuthalEqualAreaRaw.invert(Be/ve,et);return We[0]*=be,We},De}function ut(ve,be){return[ve*o(be)/o(be/=2),2*_(be)]}ut.invert=function(ve,be){var De=2*O(be/2);return[ve*o(De/2)/o(De),De]};function pt(){var ve=2,be=t.geoProjectionMutator(Ze),De=be(ve);return De.coefficient=function(Be){return arguments.length?be(ve=+Be):ve},De.scale(169.529)}function Zt(ve,be,De){var Be=100,et,We,it;De=De===void 0?0:+De,be=+be;do We=ve(De),it=ve(De+p),We===it&&(it=We+p),De-=et=-1*p*(We-be)/(We-it);while(Be-- >0&&n(et)>p);return Be<0?NaN:De}function st(ve,be,De){return be===void 0&&(be=40),De===void 0&&(De=k),function(Be,et,We,it){var Ft,Ht,tr;We=We===void 0?0:+We,it=it===void 0?0:+it;for(var dr=0;dr<be;dr++){var Sr=ve(We,it),Or=Sr[0]-Be,Wr=Sr[1]-et;if(n(Or)<De&&n(Wr)<De)break;var ni=Or*Or+Wr*Wr;if(ni>Ft){We-=Ht/=2,it-=tr/=2;continue}Ft=ni;var Pi=(We>0?-1:1)*De,cn=(it>0?-1:1)*De,ln=ve(We+Pi,it),Cn=ve(We,it+cn),Kn=(ln[0]-Sr[0])/Pi,Ta=(ln[1]-Sr[1])/Pi,fa=(Cn[0]-Sr[0])/cn,$a=(Cn[1]-Sr[1])/cn,Co=$a*Kn-Ta*fa,Qa=(n(Co)<.5?.5:1)/Co;if(Ht=(Wr*fa-Or*$a)*Qa,tr=(Or*Ta-Wr*Kn)*Qa,We+=Ht,it+=tr,n(Ht)<De&&n(tr)<De)break}return[We,it]}}function lt(){var ve=Ze(1.68,2),be=1.4,De=12;function Be(et,We){if(et+We<-be){var it=(et-We+1.6)*(et+We+be)/8;et+=it,We-=.8*it*_(We+E/2)}var Ft=ve(et,We),Ht=(1-o(et*We))/De;return Ft[1]<0&&(Ft[0]*=1+Ht),Ft[1]>0&&(Ft[1]*=1+Ht/1.5*Ft[0]*Ft[0]),Ft}return Be.invert=st(Be),Be}function Gt(){return t.geoProjection(lt()).rotate([-16.5,-42]).scale(176.57).center([7.93,.09])}function Nt(ve,be){var De=ve*_(be),Be=30,et;do be-=et=(be+_(be)-De)/(1+o(be));while(n(et)>p&&--Be>0);return be/2}function Jt(ve,be,De){function Be(et,We){return[ve*et*o(We=Nt(De,We)),be*_(We)]}return Be.invert=function(et,We){return We=O(We/be),[et/(ve*o(We)),O((2*We+_(2*We))/De)]},Be}var sr=Jt(C/S,C,E);function wr(){return t.geoProjection(sr).scale(169.529)}var cr=2.00276,$e=1.11072;function St(ve,be){var De=Nt(E,be);return[cr*ve/(1/o(be)+$e/o(De)),(be+C*_(De))/cr]}St.invert=function(ve,be){var De=cr*be,Be=be<0?-L:L,et=25,We,it;do it=De-C*_(Be),Be-=We=(_(2*Be)+2*Be-E*_(it))/(2*o(2*Be)+2+E*o(it)*C*o(Be));while(n(We)>p&&--et>0);return it=De-C*_(Be),[ve*(1/o(it)+$e/o(Be))/cr,it]};function Qt(){return t.geoProjection(St).scale(160.857)}function Vt(ve){var be=0,De=t.geoProjectionMutator(ve),Be=De(be);return Be.parallel=function(et){return arguments.length?De(be=et*T):be*P},Be}function _t(ve,be){return[ve*o(be),be]}_t.invert=function(ve,be){return[ve/o(be),be]};function It(){return t.geoProjection(_t).scale(152.63)}function mt(ve){if(!ve)return _t;var be=1/b(ve);function De(Be,et){var We=be+ve-et,it=We&&Be*o(et)/We;return[We*_(it),be-We*o(it)]}return De.invert=function(Be,et){var We=G(Be*Be+(et=be-et)*et),it=be+ve-We;return[We/o(it)*a(Be,et),it]},De}function er(){return Vt(mt).scale(123.082).center([0,26.1441]).parallel(45)}function lr(ve){function be(De,Be){var et=S-Be,We=et&&De*ve*_(et)/et;return[et*_(We)/ve,S-et*o(We)]}return be.invert=function(De,Be){var et=De*ve,We=S-Be,it=G(et*et+We*We),Ft=a(et,We);return[(it?it/_(it):1)*Ft/ve,S-it]},be}function Tr(){var ve=.5,be=t.geoProjectionMutator(lr),De=be(ve);return De.fraction=function(Be){return arguments.length?be(ve=+Be):ve},De.scale(158.837)}var Lr=Jt(1,4/E,E);function ti(){return t.geoProjection(Lr).scale(152.63)}function Br(ve,be,De,Be,et,We){var it=o(We),Ft;if(n(ve)>1||n(We)>1)Ft=V(De*et+be*Be*it);else{var Ht=_(ve/2),tr=_(We/2);Ft=2*O(G(Ht*Ht+be*Be*tr*tr))}return n(Ft)>p?[Ft,a(Be*_(We),be*et-De*Be*it)]:[0,0]}function Vr(ve,be,De){return V((ve*ve+be*be-De*De)/(2*ve*be))}function dt(ve){return ve-2*E*l((ve+E)/(2*E))}function Ge(ve,be,De){for(var Be=[[ve[0],ve[1],_(ve[1]),o(ve[1])],[be[0],be[1],_(be[1]),o(be[1])],[De[0],De[1],_(De[1]),o(De[1])]],et=Be[2],We,it=0;it<3;++it,et=We)We=Be[it],et.v=Br(We[1]-et[1],et[3],et[2],We[3],We[2],We[0]-et[0]),et.point=[0,0];var Ft=Vr(Be[0].v[0],Be[2].v[0],Be[1].v[0]),Ht=Vr(Be[0].v[0],Be[1].v[0],Be[2].v[0]),tr=E-Ft;Be[2].point[1]=0,Be[0].point[0]=-(Be[1].point[0]=Be[0].v[0]/2);var dr=[Be[2].point[0]=Be[0].point[0]+Be[2].v[0]*o(Ft),2*(Be[0].point[1]=Be[1].point[1]=Be[2].v[0]*_(Ft))];function Sr(Or,Wr){var ni=_(Wr),Pi=o(Wr),cn=new Array(3),ln;for(ln=0;ln<3;++ln){var Cn=Be[ln];if(cn[ln]=Br(Wr-Cn[1],Cn[3],Cn[2],Pi,ni,Or-Cn[0]),!cn[ln][0])return Cn.point;cn[ln][1]=dt(cn[ln][1]-Cn.v[1])}var Kn=dr.slice();for(ln=0;ln<3;++ln){var Ta=ln==2?0:ln+1,fa=Vr(Be[ln].v[0],cn[ln][0],cn[Ta][0]);cn[ln][1]<0&&(fa=-fa),ln?ln==1?(fa=Ht-fa,Kn[0]-=cn[ln][0]*o(fa),Kn[1]-=cn[ln][0]*_(fa)):(fa=tr-fa,Kn[0]+=cn[ln][0]*o(fa),Kn[1]+=cn[ln][0]*_(fa)):(Kn[0]+=cn[ln][0]*o(fa),Kn[1]-=cn[ln][0]*_(fa))}return Kn[0]/=3,Kn[1]/=3,Kn}return Sr}function Je(ve){return ve[0]*=T,ve[1]*=T,ve}function je(){return tt([0,22],[45,22],[22.5,-22]).scale(380).center([22.5,2])}function tt(ve,be,De){var Be=t.geoCentroid({type:"MultiPoint",coordinates:[ve,be,De]}),et=[-Be[0],-Be[1]],We=t.geoRotation(et),it=Ge(Je(We(ve)),Je(We(be)),Je(We(De)));it.invert=st(it);var Ft=t.geoProjection(it).rotate(et),Ht=Ft.center;return delete Ft.rotate,Ft.center=function(tr){return arguments.length?Ht(We(tr)):We.invert(Ht())},Ft.clipAngle(90)}function xt(ve,be){var De=G(1-_(be));return[2/M*ve*De,M*(1-De)]}xt.invert=function(ve,be){var De=(De=be/M-1)*De;return[De>0?ve*G(E/De)/2:0,O(1-De)]};function Ie(){return t.geoProjection(xt).scale(95.6464).center([0,30])}function xe(ve){var be=b(ve);function De(Be,et){return[Be,(Be?Be/_(Be):1)*(_(et)*o(Be)-be*o(et))]}return De.invert=be?function(Be,et){Be&&(et*=_(Be)/Be);var We=o(Be);return[Be,2*a(G(We*We+be*be-et*et)-We,be-et)]}:function(Be,et){return[Be,O(Be?et*b(Be)/Be:et)]},De}function ke(){return Vt(xe).scale(249.828).clipAngle(90)}var vt=G(3);function ir(ve,be){return[vt*ve*(2*o(2*be/3)-1)/M,vt*M*_(be/3)]}ir.invert=function(ve,be){var De=3*O(be/(vt*M));return[M*ve/(vt*(2*o(2*De/3)-1)),De]};function ar(){return t.geoProjection(ir).scale(156.19)}function vr(ve){var be=o(ve);function De(Be,et){return[Be*be,_(et)/be]}return De.invert=function(Be,et){return[Be/be,O(et*be)]},De}function ii(){return Vt(vr).parallel(38.58).scale(195.044)}function pi(ve){var be=o(ve);function De(Be,et){return[Be*be,(1+be)*b(et/2)]}return De.invert=function(Be,et){return[Be/be,i(et/(1+be))*2]},De}function $r(){return Vt(pi).scale(124.75)}function di(ve,be){var De=G(8/(3*E));return[De*ve*(1-n(be)/E),De*be]}di.invert=function(ve,be){var De=G(8/(3*E)),Be=be/De;return[ve/(De*(1-n(Be)/E)),Be]};function ji(){return t.geoProjection(di).scale(165.664)}function In(ve,be){var De=G(4-3*_(n(be)));return[2/G(6*E)*ve*De,v(be)*G(2*E/3)*(2-De)]}In.invert=function(ve,be){var De=2-n(be)/G(2*E/3);return[ve*G(6*E)/(2*De),v(be)*O((4-De*De)/3)]};function wi(){return t.geoProjection(In).scale(165.664)}function On(ve,be){var De=G(E*(4+E));return[2/De*ve*(1+G(1-4*be*be/(E*E))),4/De*be]}On.invert=function(ve,be){var De=G(E*(4+E))/2;return[ve*De/(1+G(1-be*be*(4+E)/(4*E))),be*De/2]};function qn(){return t.geoProjection(On).scale(180.739)}function Fn(ve,be){var De=(2+S)*_(be);be/=2;for(var Be=0,et=1/0;Be<10&&n(et)>p;Be++){var We=o(be);be-=et=(be+_(be)*(We+2)-De)/(2*We*(1+We))}return[2/G(E*(4+E))*ve*(1+o(be)),2*G(E/(4+E))*_(be)]}Fn.invert=function(ve,be){var De=be*G((4+E)/E)/2,Be=O(De),et=o(Be);return[ve/(2/G(E*(4+E))*(1+et)),O((Be+De*(et+2))/(2+S))]};function ra(){return t.geoProjection(Fn).scale(180.739)}function la(ve,be){return[ve*(1+o(be))/G(2+E),2*be/G(2+E)]}la.invert=function(ve,be){var De=G(2+E),Be=be*De/2;return[De*ve/(1+o(Be)),Be]};function Ut(){return t.geoProjection(la).scale(173.044)}function wt(ve,be){for(var De=(1+S)*_(be),Be=0,et=1/0;Be<10&&n(et)>p;Be++)be-=et=(be+_(be)-De)/(1+o(be));return De=G(2+E),[ve*(1+o(be))/De,2*be/De]}wt.invert=function(ve,be){var De=1+S,Be=G(De/2);return[ve*2*Be/(1+o(be*=Be)),O((be+_(be))/De)]};function rr(){return t.geoProjection(wt).scale(173.044)}var nr=3+2*C;function Er(ve,be){var De=_(ve/=2),Be=o(ve),et=G(o(be)),We=o(be/=2),it=_(be)/(We+C*Be*et),Ft=G(2/(1+it*it)),Ht=G((C*We+(Be+De)*et)/(C*We+(Be-De)*et));return[nr*(Ft*(Ht-1/Ht)-2*u(Ht)),nr*(Ft*it*(Ht+1/Ht)-2*i(it))]}Er.invert=function(ve,be){if(!(We=Se.invert(ve/1.2,be*1.065)))return null;var De=We[0],Be=We[1],et=20,We;ve/=nr,be/=nr;do{var it=De/2,Ft=Be/2,Ht=_(it),tr=o(it),dr=_(Ft),Sr=o(Ft),Or=o(Be),Wr=G(Or),ni=dr/(Sr+C*tr*Wr),Pi=ni*ni,cn=G(2/(1+Pi)),ln=C*Sr+(tr+Ht)*Wr,Cn=C*Sr+(tr-Ht)*Wr,Kn=ln/Cn,Ta=G(Kn),fa=Ta-1/Ta,$a=Ta+1/Ta,Co=cn*fa-2*u(Ta)-ve,Qa=cn*ni*$a-2*i(ni)-be,mo=dr&&x*Wr*Ht*Pi/dr,Bo=(C*tr*Sr+Wr)/(2*(Sr+C*tr*Wr)*(Sr+C*tr*Wr)*Wr),Ps=-.5*ni*cn*cn*cn,Ts=Ps*mo,wo=Ps*Bo,To=(To=2*Sr+C*Wr*(tr-Ht))*To*Ta,hl=(C*tr*Sr*Wr+Or)/To,Ul=-(C*Ht*dr)/(Wr*To),Lu=fa*Ts-2*hl/Ta+cn*(hl+hl/Kn),au=fa*wo-2*Ul/Ta+cn*(Ul+Ul/Kn),Js=ni*$a*Ts-2*mo/(1+Pi)+cn*$a*mo+cn*ni*(hl-hl/Kn),Ql=ni*$a*wo-2*Bo/(1+Pi)+cn*$a*Bo+cn*ni*(Ul-Ul/Kn),dc=au*Js-Ql*Lu;if(!dc)break;var Tl=(Qa*au-Co*Ql)/dc,Al=(Co*Js-Qa*Lu)/dc;De-=Tl,Be=c(-S,f(S,Be-Al))}while((n(Tl)>p||n(Al)>p)&&--et>0);return n(n(Be)-S)<p?[0,Be]:et&&[De,Be]};function Xr(){return t.geoProjection(Er).scale(62.5271)}var ri=o(35*T);function Qr(ve,be){var De=b(be/2);return[ve*ri*G(1-De*De),(1+ri)*De]}Qr.invert=function(ve,be){var De=be/(1+ri);return[ve&&ve/(ri*G(1-De*De)),2*i(De)]};function Oi(){return t.geoProjection(Qr).scale(137.152)}function $i(ve,be){var De=be/2,Be=o(De);return[2*ve/M*o(be)*Be*Be,M*b(De)]}$i.invert=function(ve,be){var De=i(be/M),Be=o(De),et=2*De;return[ve*M/2/(o(et)*Be*Be),et]};function tn(){return t.geoProjection($i).scale(135.264)}function fn(ve){var be=1-ve,De=We(E,0)[0]-We(-E,0)[0],Be=We(0,S)[1]-We(0,-S)[1],et=G(2*Be/De);function We(Ht,tr){var dr=o(tr),Sr=_(tr);return[dr/(be+ve*dr)*Ht,be*tr+ve*Sr]}function it(Ht,tr){var dr=We(Ht,tr);return[dr[0]*et,dr[1]/et]}function Ft(Ht){return it(0,Ht)[1]}return it.invert=function(Ht,tr){var dr=Zt(Ft,tr),Sr=Ht/et*(ve+be/o(dr));return[Sr,dr]},it}function yn(){var ve=.5,be=t.geoProjectionMutator(fn),De=be(ve);return De.alpha=function(Be){return arguments.length?be(ve=+Be):ve},De.scale(168.725)}function Sn(ve){return[ve[0]/2,O(b(ve[1]/2*T))*P]}function Ba(ve){return[ve[0]*2,2*i(_(ve[1]*T))*P]}function ua(ve){ve==null&&(ve=t.geoOrthographic);var be=ve(),De=t.geoEquirectangular().scale(P).precision(0).clipAngle(null).translate([0,0]);function Be(We){return be(Sn(We))}be.invert&&(Be.invert=function(We){return Ba(be.invert(We))}),Be.stream=function(We){var it=be.stream(We),Ft=De.stream({point:function(Ht,tr){it.point(Ht/2,O(b(-tr/2*T))*P)},lineStart:function(){it.lineStart()},lineEnd:function(){it.lineEnd()},polygonStart:function(){it.polygonStart()},polygonEnd:function(){it.polygonEnd()}});return Ft.sphere=it.sphere,Ft};function et(We){Be[We]=function(){return arguments.length?(be[We].apply(be,arguments),Be):be[We]()}}return Be.rotate=function(We){return arguments.length?(De.rotate(We),Be):De.rotate()},Be.center=function(We){return arguments.length?(be.center(Sn(We)),Be):Ba(be.center())},et("angle"),et("clipAngle"),et("clipExtent"),et("fitExtent"),et("fitHeight"),et("fitSize"),et("fitWidth"),et("scale"),et("translate"),et("precision"),Be.scale(249.5)}function ma(ve,be){var De=2*E/be,Be=ve*ve;function et(We,it){var Ft=t.geoAzimuthalEquidistantRaw(We,it),Ht=Ft[0],tr=Ft[1],dr=Ht*Ht+tr*tr;if(dr>Be){var Sr=G(dr),Or=a(tr,Ht),Wr=De*d(Or/De),ni=Or-Wr,Pi=ve*o(ni),cn=(ve*_(ni)-ni*_(Pi))/(S-Pi),ln=Wa(ni,cn),Cn=(E-ve)/Fa(ln,Pi,E);Ht=Sr;var Kn=50,Ta;do Ht-=Ta=(ve+Fa(ln,Pi,Ht)*Cn-Sr)/(ln(Ht)*Cn);while(n(Ta)>p&&--Kn>0);tr=ni*_(Ht),Ht<S&&(tr-=cn*(Ht-S));var fa=_(Wr),$a=o(Wr);Ft[0]=Ht*$a-tr*fa,Ft[1]=Ht*fa+tr*$a}return Ft}return et.invert=function(We,it){var Ft=We*We+it*it;if(Ft>Be){var Ht=G(Ft),tr=a(it,We),dr=De*d(tr/De),Sr=tr-dr;We=Ht*o(Sr),it=Ht*_(Sr);for(var Or=We-S,Wr=_(We),ni=it/Wr,Pi=We<S?1/0:0,cn=10;;){var ln=ve*_(ni),Cn=ve*o(ni),Kn=_(Cn),Ta=S-Cn,fa=(ln-ni*Kn)/Ta,$a=Wa(ni,fa);if(n(Pi)<k||!--cn)break;ni-=Pi=(ni*Wr-fa*Or-it)/(Wr-Or*2*(Ta*(Cn+ni*ln*o(Cn)-Kn)-ln*(ln-ni*Kn))/(Ta*Ta))}Ht=ve+Fa($a,Cn,We)*(E-ve)/Fa($a,Cn,E),tr=dr+ni,We=Ht*o(tr),it=Ht*_(tr)}return t.geoAzimuthalEquidistantRaw.invert(We,it)},et}function Wa(ve,be){return function(De){var Be=ve*o(De);return De<S&&(Be-=be),G(1+Be*Be)}}function Fa(ve,be,De){for(var Be=50,et=(De-be)/Be,We=ve(be)+ve(De),it=1,Ft=be;it<Be;++it)We+=2*ve(Ft+=et);return We*.5*et}function Wo(){var ve=6,be=30*T,De=o(be),Be=_(be),et=t.geoProjectionMutator(ma),We=et(be,ve),it=We.stream,Ft=.01,Ht=-o(Ft*T),tr=_(Ft*T);return We.radius=function(dr){return arguments.length?(De=o(be=dr*T),Be=_(be),et(be,ve)):be*P},We.lobes=function(dr){return arguments.length?et(be,ve=+dr):ve},We.stream=function(dr){var Sr=We.rotate(),Or=it(dr),Wr=(We.rotate([0,0]),it(dr));return We.rotate(Sr),Or.sphere=function(){Wr.polygonStart(),Wr.lineStart();for(var ni=0,Pi=2*E/ve,cn=0;ni<ve;++ni,cn-=Pi)Wr.point(a(tr*o(cn),Ht)*P,O(tr*_(cn))*P),Wr.point(a(Be*o(cn-Pi/2),De)*P,O(Be*_(cn-Pi/2))*P);Wr.lineEnd(),Wr.polygonEnd()},Or},We.rotate([90,-40]).scale(91.7095).clipAngle(180-.001)}function da(ve,be,De,Be,et,We,it,Ft){arguments.length<8&&(Ft=0);function Ht(tr,dr){if(!dr)return[ve*tr/E,0];var Sr=dr*dr,Or=ve+Sr*(be+Sr*(De+Sr*Be)),Wr=dr*(et-1+Sr*(We-Ft+Sr*it)),ni=(Or*Or+Wr*Wr)/(2*Wr),Pi=tr*O(Or/ni)/E;return[ni*_(Pi),dr*(1+Sr*Ft)+ni*(1-o(Pi))]}return Ht.invert=function(tr,dr){var Sr=E*tr/ve,Or=dr,Wr,ni,Pi=50;do{var cn=Or*Or,ln=ve+cn*(be+cn*(De+cn*Be)),Cn=Or*(et-1+cn*(We-Ft+cn*it)),Kn=ln*ln+Cn*Cn,Ta=2*Cn,fa=Kn/Ta,$a=fa*fa,Co=O(ln/fa)/E,Qa=Sr*Co,mo=ln*ln,Bo=(2*be+cn*(4*De+cn*6*Be))*Or,Ps=et+cn*(3*We+cn*5*it),Ts=2*(ln*Bo+Cn*(Ps-1)),wo=2*(Ps-1),To=(Ts*Ta-Kn*wo)/(Ta*Ta),hl=o(Qa),Ul=_(Qa),Lu=fa*hl,au=fa*Ul,Js=Sr/E*(1/G(1-mo/$a))*(Bo*fa-ln*To)/$a,Ql=au-tr,dc=Or*(1+cn*Ft)+fa-Lu-dr,Tl=To*Ul+Lu*Js,Al=Lu*Co,X=1+To-(To*hl-au*Js),se=au*Co,Te=Tl*se-X*Al;if(!Te)break;Sr-=Wr=(dc*Tl-Ql*X)/Te,Or-=ni=(Ql*se-dc*Al)/Te}while((n(Wr)>p||n(ni)>p)&&--Pi>0);return[Sr,Or]},Ht}var Wn=da(2.8284,-1.6988,.75432,-.18071,1.76003,-.38914,.042555);function Ha(){return t.geoProjection(Wn).scale(149.995)}var vo=da(2.583819,-.835827,.170354,-.038094,1.543313,-.411435,.082742);function jn(){return t.geoProjection(vo).scale(153.93)}var Mt=da(5/6*E,-.62636,-.0344,0,1.3493,-.05524,0,.045);function kr(){return t.geoProjection(Mt).scale(130.945)}function Jr(ve,be){var De=ve*ve,Be=be*be;return[ve*(1-.162388*Be)*(.87-952426e-9*De*De),be*(1+Be/12)]}Jr.invert=function(ve,be){var De=ve,Be=be,et=50,We;do{var it=Be*Be;Be-=We=(Be*(1+it/12)-be)/(1+it/4)}while(n(We)>p&&--et>0);et=50,ve/=1-.162388*it;do{var Ft=(Ft=De*De)*Ft;De-=We=(De*(.87-952426e-9*Ft)-ve)/(.87-.00476213*Ft)}while(n(We)>p&&--et>0);return[De,Be]};function vi(){return t.geoProjection(Jr).scale(131.747)}var hn=da(2.6516,-.76534,.19123,-.047094,1.36289,-.13965,.031762);function An(){return t.geoProjection(hn).scale(131.087)}function Mn(ve){var be=ve(S,0)[0]-ve(-S,0)[0];function De(Be,et){var We=Be>0?-.5:.5,it=ve(Be+We*E,et);return it[0]-=We*be,it}return ve.invert&&(De.invert=function(Be,et){var We=Be>0?-.5:.5,it=ve.invert(Be+We*be,et),Ft=it[0]-We*E;return Ft<-E?Ft+=2*E:Ft>E&&(Ft-=2*E),it[0]=Ft,it}),De}function Li(ve,be){var De=v(ve),Be=v(be),et=o(be),We=o(ve)*et,it=_(ve)*et,Ft=_(Be*be);ve=n(a(it,Ft)),be=O(We),n(ve-S)>p&&(ve%=S);var Ht=_n(ve>E/4?S-ve:ve,be);return ve>E/4&&(Ft=Ht[0],Ht[0]=-Ht[1],Ht[1]=-Ft),Ht[0]*=De,Ht[1]*=-Be,Ht}Li.invert=function(ve,be){n(ve)>1&&(ve=v(ve)*2-ve),n(be)>1&&(be=v(be)*2-be);var De=v(ve),Be=v(be),et=-De*ve,We=-Be*be,it=We/et<1,Ft=ya(it?We:et,it?et:We),Ht=Ft[0],tr=Ft[1],dr=o(tr);return it&&(Ht=-S-Ht),[De*(a(_(Ht)*dr,-_(tr))+E),Be*O(o(Ht)*dr)]};function _n(ve,be){if(be===S)return[0,0];var De=_(be),Be=De*De,et=Be*Be,We=1+et,it=1+3*et,Ft=1-et,Ht=O(1/G(We)),tr=Ft+Be*We*Ht,dr=(1-De)/tr,Sr=G(dr),Or=dr*We,Wr=G(Or),ni=Sr*Ft,Pi,cn;if(ve===0)return[0,-(ni+Be*Wr)];var ln=o(be),Cn=1/ln,Kn=2*De*ln,Ta=(-3*Be+Ht*it)*Kn,fa=(-tr*ln-(1-De)*Ta)/(tr*tr),$a=.5*fa/Sr,Co=Ft*$a-2*Be*Sr*Kn,Qa=Be*We*fa+dr*it*Kn,mo=-Cn*Kn,Bo=-Cn*Qa,Ps=-2*Cn*Co,Ts=4*ve/E,wo;if(ve>.222*E||be<E/4&&ve>.175*E){if(Pi=(ni+Be*G(Or*(1+et)-ni*ni))/(1+et),ve>E/4)return[Pi,Pi];var To=Pi,hl=.5*Pi;Pi=.5*(hl+To),cn=50;do{var Ul=G(Or-Pi*Pi),Lu=Pi*(Ps+mo*Ul)+Bo*O(Pi/Wr)-Ts;if(!Lu)break;Lu<0?hl=Pi:To=Pi,Pi=.5*(hl+To)}while(n(To-hl)>p&&--cn>0)}else{Pi=p,cn=25;do{var au=Pi*Pi,Js=G(Or-au),Ql=Ps+mo*Js,dc=Pi*Ql+Bo*O(Pi/Wr)-Ts,Tl=Ql+(Bo-mo*au)/Js;Pi-=wo=Js?dc/Tl:0}while(n(wo)>p&&--cn>0)}return[Pi,-ni-Be*G(Or-Pi*Pi)]}function ya(ve,be){for(var De=0,Be=1,et=.5,We=50;;){var it=et*et,Ft=G(et),Ht=O(1/G(1+it)),tr=1-it+et*(1+it)*Ht,dr=(1-Ft)/tr,Sr=G(dr),Or=dr*(1+it),Wr=Sr*(1-it),ni=Or-ve*ve,Pi=G(ni),cn=be+Wr+et*Pi;if(n(Be-De)<k||--We===0||cn===0)break;cn>0?De=et:Be=et,et=.5*(De+Be)}if(!We)return null;var ln=O(Ft),Cn=o(ln),Kn=1/Cn,Ta=2*Ft*Cn,fa=(-3*et+Ht*(1+3*it))*Ta,$a=(-tr*Cn-(1-Ft)*fa)/(tr*tr),Co=.5*$a/Sr,Qa=(1-it)*Co-2*et*Sr*Ta,mo=-2*Kn*Qa,Bo=-Kn*Ta,Ps=-Kn*(et*(1+it)*$a+dr*(1+3*it)*Ta);return[E/4*(ve*(mo+Bo*Pi)+Ps*O(ve/G(Or))),ln]}function Jn(){return t.geoProjection(Mn(Li)).scale(239.75)}function Ma(ve,be,De){var Be,et,We;return ve?(Be=_o(ve,De),be?(et=_o(be,1-De),We=et[1]*et[1]+De*Be[0]*Be[0]*et[0]*et[0],[[Be[0]*et[2]/We,Be[1]*Be[2]*et[0]*et[1]/We],[Be[1]*et[1]/We,-Be[0]*Be[2]*et[0]*et[2]/We],[Be[2]*et[1]*et[2]/We,-De*Be[0]*Be[1]*et[0]/We]]):[[Be[0],0],[Be[1],0],[Be[2],0]]):(et=_o(be,1-De),[[0,et[0]/et[1]],[1/et[1],0],[et[2]/et[1],0]])}function _o(ve,be){var De,Be,et,We,it;if(be<p)return We=_(ve),Be=o(ve),De=be*(ve-We*Be)/4,[We-De*Be,Be+De*We,1-be*We*We/2,ve-De];if(be>=1-p)return De=(1-be)/4,Be=N(ve),We=Z(ve),et=1/Be,it=Be*j(ve),[We+De*(it-ve)/(Be*Be),et-De*We*et*(it-ve),et+De*We*et*(it+ve),2*i(s(ve))-S+De*(it-ve)/Be];var Ft=[1,0,0,0,0,0,0,0,0],Ht=[G(be),0,0,0,0,0,0,0,0],tr=0;for(Be=G(1-be),it=1;n(Ht[tr]/Ft[tr])>p&&tr<8;)De=Ft[tr++],Ht[tr]=(De-Be)/2,Ft[tr]=(De+Be)/2,Be=G(De*Be),it*=2;et=it*Ft[tr]*ve;do We=Ht[tr]*_(Be=et)/Ft[tr],et=(O(We)+et)/2;while(--tr);return[_(et),We=o(et),We/o(et-Be),et]}function No(ve,be,De){var Be=n(ve),et=n(be),We=j(et);if(Be){var it=1/_(Be),Ft=1/(b(Be)*b(Be)),Ht=-(Ft+De*(We*We*it*it)-1+De),tr=(De-1)*Ft,dr=(-Ht+G(Ht*Ht-4*tr))/2;return[po(i(1/G(dr)),De)*v(ve),po(i(G((dr/Ft-1)/De)),1-De)*v(be)]}return[0,po(i(We),1-De)*v(be)]}function po(ve,be){if(!be)return ve;if(be===1)return u(b(ve/2+L));for(var De=1,Be=G(1-be),et=G(be),We=0;n(et)>p;We++){if(ve%E){var it=i(Be*b(ve)/De);it<0&&(it+=E),ve+=it+~~(ve/E)*E}else ve+=ve;et=(De+Be)/2,Be=G(De*Be),et=((De=et)-Be)/2}return ve/(h(2,We)*De)}function Lo(ve,be){var De=(C-1)/(C+1),Be=G(1-De*De),et=po(S,Be*Be),We=-1,it=u(b(E/4+n(be)/2)),Ft=s(We*it)/G(De),Ht=ko(Ft*o(We*ve),Ft*_(We*ve)),tr=No(Ht[0],Ht[1],Be*Be);return[-tr[1],(be>=0?1:-1)*(.5*et-tr[0])]}function ko(ve,be){var De=ve*ve,Be=be+1,et=1-De-be*be;return[.5*((ve>=0?S:-S)-a(et,2*ve)),-.25*u(et*et+4*De)+.5*u(Be*Be+De)]}function Ds(ve,be){var De=be[0]*be[0]+be[1]*be[1];return[(ve[0]*be[0]+ve[1]*be[1])/De,(ve[1]*be[0]-ve[0]*be[1])/De]}Lo.invert=function(ve,be){var De=(C-1)/(C+1),Be=G(1-De*De),et=po(S,Be*Be),We=-1,it=Ma(.5*et-be,-ve,Be*Be),Ft=Ds(it[0],it[1]),Ht=a(Ft[1],Ft[0])/We;return[Ht,2*i(s(.5/We*u(De*Ft[0]*Ft[0]+De*Ft[1]*Ft[1])))-S]};function Fs(){return t.geoProjection(Mn(Lo)).scale(151.496)}function ll(ve){var be=_(ve),De=o(ve),Be=ul(ve);Be.invert=ul(-ve);function et(We,it){var Ft=Be(We,it);We=Ft[0],it=Ft[1];var Ht=_(it),tr=o(it),dr=o(We),Sr=V(be*Ht+De*tr*dr),Or=_(Sr),Wr=n(Or)>p?Sr/Or:1;return[Wr*De*_(We),(n(We)>S?Wr:-Wr)*(be*tr-De*Ht*dr)]}return et.invert=function(We,it){var Ft=G(We*We+it*it),Ht=-_(Ft),tr=o(Ft),dr=Ft*tr,Sr=-it*Ht,Or=Ft*be,Wr=G(dr*dr+Sr*Sr-Or*Or),ni=a(dr*Or+Sr*Wr,Sr*Or-dr*Wr),Pi=(Ft>S?-1:1)*a(We*Ht,Ft*o(ni)*tr+it*_(ni)*Ht);return Be.invert(Pi,ni)},et}function ul(ve){var be=_(ve),De=o(ve);return function(Be,et){var We=o(et),it=o(Be)*We,Ft=_(Be)*We,Ht=_(et);return[a(Ft,it*De-Ht*be),O(Ht*De+it*be)]}}function zl(){var ve=0,be=t.geoProjectionMutator(ll),De=be(ve),Be=De.rotate,et=De.stream,We=t.geoCircle();return De.parallel=function(it){if(!arguments.length)return ve*P;var Ft=De.rotate();return be(ve=it*T).rotate(Ft)},De.rotate=function(it){return arguments.length?(Be.call(De,[it[0],it[1]-ve*P]),We.center([-it[0],-it[1]]),De):(it=Be.call(De),it[1]+=ve*P,it)},De.stream=function(it){return it=et(it),it.sphere=function(){it.polygonStart();var Ft=.01,Ht=We.radius(90-Ft)().coordinates[0],tr=Ht.length-1,dr=-1,Sr;for(it.lineStart();++dr<tr;)it.point((Sr=Ht[dr])[0],Sr[1]);for(it.lineEnd(),Ht=We.radius(90+Ft)().coordinates[0],tr=Ht.length-1,it.lineStart();--dr>=0;)it.point((Sr=Ht[dr])[0],Sr[1]);it.lineEnd(),it.polygonEnd()},it},De.scale(79.4187).parallel(45).clipAngle(180-.001)}var us=3,il=O(1-1/us)*P,As=vr(0);function cl(ve){var be=il*T,De=xt(E,be)[0]-xt(-E,be)[0],Be=As(0,be)[1],et=xt(0,be)[1],We=M-et,it=g/ve,Ft=4/g,Ht=Be+We*We*4/g;function tr(dr,Sr){var Or,Wr=n(Sr);if(Wr>be){var ni=f(ve-1,c(0,l((dr+E)/it)));dr+=E*(ve-1)/ve-ni*it,Or=xt(dr,Wr),Or[0]=Or[0]*g/De-g*(ve-1)/(2*ve)+ni*g/ve,Or[1]=Be+(Or[1]-et)*4*We/g,Sr<0&&(Or[1]=-Or[1])}else Or=As(dr,Sr);return Or[0]*=Ft,Or[1]/=Ht,Or}return tr.invert=function(dr,Sr){dr/=Ft,Sr*=Ht;var Or=n(Sr);if(Or>Be){var Wr=f(ve-1,c(0,l((dr+E)/it)));dr=(dr+E*(ve-1)/ve-Wr*it)*De/g;var ni=xt.invert(dr,.25*(Or-Be)*g/We+et);return ni[0]-=E*(ve-1)/ve-Wr*it,Sr<0&&(ni[1]=-ni[1]),ni}return As.invert(dr,Sr)},tr}function Ks(ve,be){return[ve,be&1?90-p:il]}function zs(ve,be){return[ve,be&1?-90+p:-il]}function Io(ve){return[ve[0]*(1-p),ve[1]]}function ls(ve){var be=[].concat(r.range(-180,180+ve/2,ve).map(Ks),r.range(180,-180-ve/2,-ve).map(zs));return{type:"Polygon",coordinates:[ve===180?be.map(Io):be]}}function Zl(){var ve=4,be=t.geoProjectionMutator(cl),De=be(ve),Be=De.stream;return De.lobes=function(et){return arguments.length?be(ve=+et):ve},De.stream=function(et){var We=De.rotate(),it=Be(et),Ft=(De.rotate([0,0]),Be(et));return De.rotate(We),it.sphere=function(){t.geoStream(ls(180/ve),Ft)},it},De.scale(239.75)}function Su(ve){var be=1+ve,De=_(1/be),Be=O(De),et=2*G(E/(We=E+4*Be*be)),We,it=.5*et*(be+G(ve*(2+ve))),Ft=ve*ve,Ht=be*be;function tr(dr,Sr){var Or=1-_(Sr),Wr,ni;if(Or&&Or<2){var Pi=S-Sr,cn=25,ln;do{var Cn=_(Pi),Kn=o(Pi),Ta=Be+a(Cn,be-Kn),fa=1+Ht-2*be*Kn;Pi-=ln=(Pi-Ft*Be-be*Cn+fa*Ta-.5*Or*We)/(2*be*Cn*Ta)}while(n(ln)>k&&--cn>0);Wr=et*G(fa),ni=dr*Ta/E}else Wr=et*(ve+Or),ni=dr*Be/E;return[Wr*_(ni),it-Wr*o(ni)]}return tr.invert=function(dr,Sr){var Or=dr*dr+(Sr-=it)*Sr,Wr=(1+Ht-Or/(et*et))/(2*be),ni=V(Wr),Pi=_(ni),cn=Be+a(Pi,be-Wr);return[O(dr/G(Or))*E/cn,O(1-2*(ni-Ft*Be-be*Pi+(1+Ht-2*be*Wr)*cn)/We)]},tr}function nc(){var ve=1,be=t.geoProjectionMutator(Su),De=be(ve);return De.ratio=function(Be){return arguments.length?be(ve=+Be):ve},De.scale(167.774).center([0,18.67])}var bs=.7109889596207567,Rn=.0528035274542;function _a(ve,be){return be>-bs?(ve=sr(ve,be),ve[1]+=Rn,ve):_t(ve,be)}_a.invert=function(ve,be){return be>-bs?sr.invert(ve,be-Rn):_t.invert(ve,be)};function Vu(){return t.geoProjection(_a).rotate([-20,-55]).scale(164.263).center([0,-5.4036])}function Ol(ve,be){return n(be)>bs?(ve=sr(ve,be),ve[1]-=be>0?Rn:-Rn,ve):_t(ve,be)}Ol.invert=function(ve,be){return n(be)>bs?sr.invert(ve,be+(be>0?Rn:-Rn)):_t.invert(ve,be)};function xo(){return t.geoProjection(Ol).scale(152.63)}function Yl(ve,be,De,Be){var et=G(4*E/(2*De+(1+ve-be/2)*_(2*De)+(ve+be)/2*_(4*De)+be/2*_(6*De))),We=G(Be*_(De)*G((1+ve*o(2*De)+be*o(4*De))/(1+ve+be))),it=De*Ht(1);function Ft(Sr){return G(1+ve*o(2*Sr)+be*o(4*Sr))}function Ht(Sr){var Or=Sr*De;return(2*Or+(1+ve-be/2)*_(2*Or)+(ve+be)/2*_(4*Or)+be/2*_(6*Or))/De}function tr(Sr){return Ft(Sr)*_(Sr)}var dr=function(Sr,Or){var Wr=De*Zt(Ht,it*_(Or)/De,Or/E);isNaN(Wr)&&(Wr=De*v(Or));var ni=et*Ft(Wr);return[ni*We*Sr/E*o(Wr),ni/We*_(Wr)]};return dr.invert=function(Sr,Or){var Wr=Zt(tr,Or*We/et);return[Sr*E/(o(Wr)*et*We*Ft(Wr)),O(De*Ht(Wr/De)/it)]},De===0&&(et=G(Be/E),dr=function(Sr,Or){return[Sr*et,_(Or)/et]},dr.invert=function(Sr,Or){return[Sr/et,O(Or*et)]}),dr}function Ns(){var ve=1,be=0,De=45*T,Be=2,et=t.geoProjectionMutator(Yl),We=et(ve,be,De,Be);return We.a=function(it){return arguments.length?et(ve=+it,be,De,Be):ve},We.b=function(it){return arguments.length?et(ve,be=+it,De,Be):be},We.psiMax=function(it){return arguments.length?et(ve,be,De=+it*T,Be):De*P},We.ratio=function(it){return arguments.length?et(ve,be,De,Be=+it):Be},We.scale(180.739)}function Hl(ve,be,De,Be,et,We,it,Ft,Ht,tr,dr){if(dr.nanEncountered)return NaN;var Sr,Or,Wr,ni,Pi,cn,ln,Cn,Kn,Ta;if(Sr=De-be,Or=ve(be+Sr*.25),Wr=ve(De-Sr*.25),isNaN(Or)){dr.nanEncountered=!0;return}if(isNaN(Wr)){dr.nanEncountered=!0;return}return ni=Sr*(Be+4*Or+et)/12,Pi=Sr*(et+4*Wr+We)/12,cn=ni+Pi,Ta=(cn-it)/15,tr>Ht?(dr.maxDepthCount++,cn+Ta):Math.abs(Ta)<Ft?cn+Ta:(ln=be+Sr*.5,Cn=Hl(ve,be,ln,Be,Or,et,ni,Ft*.5,Ht,tr+1,dr),isNaN(Cn)?(dr.nanEncountered=!0,NaN):(Kn=Hl(ve,ln,De,et,Wr,We,Pi,Ft*.5,Ht,tr+1,dr),isNaN(Kn)?(dr.nanEncountered=!0,NaN):Cn+Kn))}function ac(ve,be,De,Be,et){var We={maxDepthCount:0,nanEncountered:!1};Be===void 0&&(Be=1e-8),et===void 0&&(et=20);var it=ve(be),Ft=ve(.5*(be+De)),Ht=ve(De),tr=(it+4*Ft+Ht)*(De-be)/6,dr=Hl(ve,be,De,it,Ft,Ht,tr,Be,et,1,We);return dr}function aa(ve,be,De){function Be(Wr){return ve+(1-ve)*h(1-h(Wr,be),1/be)}function et(Wr){return ac(Be,0,Wr,1e-4)}for(var We=1/et(1),it=1e3,Ft=(1+1e-8)*We,Ht=[],tr=0;tr<=it;tr++)Ht.push(et(tr/it)*Ft);function dr(Wr){var ni=0,Pi=it,cn=it>>1;do Ht[cn]>Wr?Pi=cn:ni=cn,cn=ni+Pi>>1;while(cn>ni);var ln=Ht[cn+1]-Ht[cn];return ln&&(ln=(Wr-Ht[cn+1])/ln),(cn+1+ln)/it}var Sr=2*dr(1)/E*We/De,Or=function(Wr,ni){var Pi=dr(n(_(ni))),cn=Be(Pi)*Wr;return Pi/=Sr,[cn,ni>=0?Pi:-Pi]};return Or.invert=function(Wr,ni){var Pi;return ni*=Sr,n(ni)<1&&(Pi=v(ni)*O(et(n(ni))*We)),[Wr/Be(n(ni)),Pi]},Or}function Oo(){var ve=0,be=2.5,De=1.183136,Be=t.geoProjectionMutator(aa),et=Be(ve,be,De);return et.alpha=function(We){return arguments.length?Be(ve=+We,be,De):ve},et.k=function(We){return arguments.length?Be(ve,be=+We,De):be},et.gamma=function(We){return arguments.length?Be(ve,be,De=+We):De},et.scale(152.63)}function qo(ve,be){return n(ve[0]-be[0])<p&&n(ve[1]-be[1])<p}function ql(ve,be){for(var De=-1,Be=ve.length,et=ve[0],We,it,Ft,Ht=[];++De<Be;){We=ve[De],it=(We[0]-et[0])/be,Ft=(We[1]-et[1])/be;for(var tr=0;tr<be;++tr)Ht.push([et[0]+tr*it,et[1]+tr*Ft]);et=We}return Ht.push(We),Ht}function Pc(ve){var be=[],De,Be,et,We,it,Ft,Ht,tr=ve[0].length;for(Ht=0;Ht<tr;++Ht)De=ve[0][Ht],Be=De[0][0],et=De[0][1],We=De[1][1],it=De[2][0],Ft=De[2][1],be.push(ql([[Be+p,et+p],[Be+p,We-p],[it-p,We-p],[it-p,Ft+p]],30));for(Ht=ve[1].length-1;Ht>=0;--Ht)De=ve[1][Ht],Be=De[0][0],et=De[0][1],We=De[1][1],it=De[2][0],Ft=De[2][1],be.push(ql([[it-p,Ft-p],[it-p,We+p],[Be+p,We+p],[Be+p,et-p]],30));return{type:"Polygon",coordinates:[r.merge(be)]}}function Do(ve,be,De){var Be,et;function We(Ht,tr){for(var dr=tr<0?-1:1,Sr=be[+(tr<0)],Or=0,Wr=Sr.length-1;Or<Wr&&Ht>Sr[Or][2][0];++Or);var ni=ve(Ht-Sr[Or][1][0],tr);return ni[0]+=ve(Sr[Or][1][0],dr*tr>dr*Sr[Or][0][1]?Sr[Or][0][1]:tr)[0],ni}De?We.invert=De(We):ve.invert&&(We.invert=function(Ht,tr){for(var dr=et[+(tr<0)],Sr=be[+(tr<0)],Or=0,Wr=dr.length;Or<Wr;++Or){var ni=dr[Or];if(ni[0][0]<=Ht&&Ht<ni[1][0]&&ni[0][1]<=tr&&tr<ni[1][1]){var Pi=ve.invert(Ht-ve(Sr[Or][1][0],0)[0],tr);return Pi[0]+=Sr[Or][1][0],qo(We(Pi[0],Pi[1]),[Ht,tr])?Pi:null}}});var it=t.geoProjection(We),Ft=it.stream;return it.stream=function(Ht){var tr=it.rotate(),dr=Ft(Ht),Sr=(it.rotate([0,0]),Ft(Ht));return it.rotate(tr),dr.sphere=function(){t.geoStream(Be,Sr)},dr},it.lobes=function(Ht){return arguments.length?(Be=Pc(Ht),be=Ht.map(function(tr){return tr.map(function(dr){return[[dr[0][0]*T,dr[0][1]*T],[dr[1][0]*T,dr[1][1]*T],[dr[2][0]*T,dr[2][1]*T]]})}),et=be.map(function(tr){return tr.map(function(dr){var Sr=ve(dr[0][0],dr[0][1])[0],Or=ve(dr[2][0],dr[2][1])[0],Wr=ve(dr[1][0],dr[0][1])[1],ni=ve(dr[1][0],dr[1][1])[1],Pi;return Wr>ni&&(Pi=Wr,Wr=ni,ni=Pi),[[Sr,Wr],[Or,ni]]})}),it):be.map(function(tr){return tr.map(function(dr){return[[dr[0][0]*P,dr[0][1]*P],[dr[1][0]*P,dr[1][1]*P],[dr[2][0]*P,dr[2][1]*P]]})})},be!=null&&it.lobes(be),it}var rf=[[[[-180,0],[-100,90],[-40,0]],[[-40,0],[30,90],[180,0]]],[[[-180,0],[-160,-90],[-100,0]],[[-100,0],[-60,-90],[-20,0]],[[-20,0],[20,-90],[80,0]],[[80,0],[140,-90],[180,0]]]];function Uf(){return Do(St,rf).scale(160.857)}var pl=[[[[-180,0],[-100,90],[-40,0]],[[-40,0],[30,90],[180,0]]],[[[-180,0],[-160,-90],[-100,0]],[[-100,0],[-60,-90],[-20,0]],[[-20,0],[20,-90],[80,0]],[[80,0],[140,-90],[180,0]]]];function Zc(){return Do(Ol,pl).scale(152.63)}var Kl=[[[[-180,0],[-100,90],[-40,0]],[[-40,0],[30,90],[180,0]]],[[[-180,0],[-160,-90],[-100,0]],[[-100,0],[-60,-90],[-20,0]],[[-20,0],[20,-90],[80,0]],[[80,0],[140,-90],[180,0]]]];function Os(){return Do(sr,Kl).scale(169.529)}var yu=[[[[-180,0],[-90,90],[0,0]],[[0,0],[90,90],[180,0]]],[[[-180,0],[-90,-90],[0,0]],[[0,0],[90,-90],[180,0]]]];function oc(){return Do(sr,yu).scale(169.529).rotate([20,0])}var Cf=[[[[-180,35],[-30,90],[0,35]],[[0,35],[30,90],[180,35]]],[[[-180,-10],[-102,-90],[-65,-10]],[[-65,-10],[5,-90],[77,-10]],[[77,-10],[103,-90],[180,-10]]]];function sc(){return Do(_a,Cf,st).rotate([-20,-55]).scale(164.263).center([0,-5.4036])}var Vh=[[[[-180,0],[-110,90],[-40,0]],[[-40,0],[0,90],[40,0]],[[40,0],[110,90],[180,0]]],[[[-180,0],[-110,-90],[-40,0]],[[-40,0],[0,-90],[40,0]],[[40,0],[110,-90],[180,0]]]];function Lf(){return Do(_t,Vh).scale(152.63).rotate([-20,0])}function cs(ve,be){return[3/g*ve*G(E*E/3-be*be),be]}cs.invert=function(ve,be){return[g/3*ve/G(E*E/3-be*be),be]};function nf(){return t.geoProjection(cs).scale(158.837)}function Vf(ve){function be(De,Be){if(n(n(Be)-S)<p)return[0,Be<0?-2:2];var et=_(Be),We=h((1+et)/(1-et),ve/2),it=.5*(We+1/We)+o(De*=ve);return[2*_(De)/it,(We-1/We)/it]}return be.invert=function(De,Be){var et=n(Be);if(n(et-2)<p)return De?null:[0,v(Be)*S];if(et>2)return null;De/=2,Be/=2;var We=De*De,it=Be*Be,Ft=2*Be/(1+We+it);return Ft=h((1+Ft)/(1-Ft),1/ve),[a(2*De,1-We-it)/ve,O((Ft-1)/(Ft+1))]},be}function Jl(){var ve=.5,be=t.geoProjectionMutator(Vf),De=be(ve);return De.spacing=function(Be){return arguments.length?be(ve=+Be):ve},De.scale(124.75)}var fl=E/C;function lc(ve,be){return[ve*(1+G(o(be)))/2,be/(o(be/2)*o(ve/6))]}lc.invert=function(ve,be){var De=n(ve),Be=n(be),et=p,We=S;Be<fl?We*=Be/fl:et+=6*V(fl/Be);for(var it=0;it<25;it++){var Ft=_(We),Ht=G(o(We)),tr=_(We/2),dr=o(We/2),Sr=_(et/6),Or=o(et/6),Wr=.5*et*(1+Ht)-De,ni=We/(dr*Or)-Be,Pi=Ht?-.25*et*Ft/Ht:0,cn=.5*(1+Ht),ln=(1+.5*We*tr/dr)/(dr*Or),Cn=We/dr*(Sr/6)/(Or*Or),Kn=Pi*Cn-ln*cn,Ta=(Wr*Cn-ni*cn)/Kn,fa=(ni*Pi-Wr*ln)/Kn;if(We-=Ta,et-=fa,n(Ta)<p&&n(fa)<p)break}return[ve<0?-et:et,be<0?-We:We]};function Fu(){return t.geoProjection(lc).scale(97.2672)}function Es(ve,be){var De=ve*ve,Be=be*be;return[ve*(.975534+Be*(-.119161+De*-.0143059+Be*-.0547009)),be*(1.00384+De*(.0802894+Be*-.02855+De*199025e-9)+Be*(.0998909+Be*-.0491032))]}Es.invert=function(ve,be){var De=v(ve)*E,Be=be/2,et=50;do{var We=De*De,it=Be*Be,Ft=De*Be,Ht=De*(.975534+it*(-.119161+We*-.0143059+it*-.0547009))-ve,tr=Be*(1.00384+We*(.0802894+it*-.02855+We*199025e-9)+it*(.0998909+it*-.0491032))-be,dr=.975534-it*(.119161+3*We*.0143059+it*.0547009),Sr=-Ft*(2*.119161+4*.0547009*it+2*.0143059*We),Or=Ft*(2*.0802894+4*199025e-9*We+2*-.02855*it),Wr=1.00384+We*(.0802894+199025e-9*We)+it*(3*(.0998909-.02855*We)-5*.0491032*it),ni=Sr*Or-Wr*dr,Pi=(tr*Sr-Ht*Wr)/ni,cn=(Ht*Or-tr*dr)/ni;De-=Pi,Be-=cn}while((n(Pi)>p||n(cn)>p)&&--et>0);return et&&[De,Be]};function Hs(){return t.geoProjection(Es).scale(139.98)}function Go(ve,be){return[_(ve)/o(be),b(be)*o(ve)]}Go.invert=function(ve,be){var De=ve*ve,Be=be*be,et=Be+1,We=De+et,it=ve?x*G((We-G(We*We-4*De))/De):1/G(et);return[O(ve*it),v(be)*V(it)]};function ps(){return t.geoProjection(Go).scale(144.049).clipAngle(90-.001)}function uc(ve){var be=o(ve),De=b(L+ve/2);function Be(et,We){var it=We-ve,Ft=n(it)<p?et*be:n(Ft=L+We/2)<p||n(n(Ft)-S)<p?0:et*it/u(b(Ft)/De);return[Ft,it]}return Be.invert=function(et,We){var it,Ft=We+ve;return[n(We)<p?et/be:n(it=L+Ft/2)<p||n(n(it)-S)<p?0:et*u(b(it)/De)/We,Ft]},Be}function xl(){return Vt(uc).parallel(40).scale(158.837)}function Gu(ve,be){return[ve,1.25*u(b(L+.4*be))]}Gu.invert=function(ve,be){return[ve,2.5*i(s(.8*be))-.625*E]};function qs(){return t.geoProjection(Gu).scale(108.318)}function ad(ve){var be=ve.length-1;function De(Be,et){for(var We=o(et),it=2/(1+We*o(Be)),Ft=it*We*_(Be),Ht=it*_(et),tr=be,dr=ve[tr],Sr=dr[0],Or=dr[1],Wr;--tr>=0;)dr=ve[tr],Sr=dr[0]+Ft*(Wr=Sr)-Ht*Or,Or=dr[1]+Ft*Or+Ht*Wr;return Sr=Ft*(Wr=Sr)-Ht*Or,Or=Ft*Or+Ht*Wr,[Sr,Or]}return De.invert=function(Be,et){var We=20,it=Be,Ft=et;do{for(var Ht=be,tr=ve[Ht],dr=tr[0],Sr=tr[1],Or=0,Wr=0,ni;--Ht>=0;)tr=ve[Ht],Or=dr+it*(ni=Or)-Ft*Wr,Wr=Sr+it*Wr+Ft*ni,dr=tr[0]+it*(ni=dr)-Ft*Sr,Sr=tr[1]+it*Sr+Ft*ni;Or=dr+it*(ni=Or)-Ft*Wr,Wr=Sr+it*Wr+Ft*ni,dr=it*(ni=dr)-Ft*Sr-Be,Sr=it*Sr+Ft*ni-et;var Pi=Or*Or+Wr*Wr,cn,ln;it-=cn=(dr*Or+Sr*Wr)/Pi,Ft-=ln=(Sr*Or-dr*Wr)/Pi}while(n(cn)+n(ln)>p*p&&--We>0);if(We){var Cn=G(it*it+Ft*Ft),Kn=2*i(Cn*.5),Ta=_(Kn);return[a(it*Ta,Cn*o(Kn)),Cn?O(Ft*Ta/Cn):0]}},De}var Po=[[.9972523,0],[.0052513,-.0041175],[.0074606,.0048125],[-.0153783,-.1968253],[.0636871,-.1408027],[.3660976,-.2937382]],od=[[.98879,0],[0,0],[-.050909,0],[0,0],[.075528,0]],Yo=[[.984299,0],[.0211642,.0037608],[-.1036018,-.0575102],[-.0329095,-.0320119],[.0499471,.1223335],[.026046,.0899805],[7388e-7,-.1435792],[.0075848,-.1334108],[-.0216473,.0776645],[-.0225161,.0853673]],Pa=[[.9245,0],[0,0],[.01943,0]],af=[[.721316,0],[0,0],[-.00881625,-.00617325]];function Hu(){return Bl(Po,[152,-64]).scale(1400).center([-160.908,62.4864]).clipAngle(30).angle(7.8)}function bl(){return Bl(od,[95,-38]).scale(1e3).clipAngle(55).center([-96.5563,38.8675])}function Gf(){return Bl(Yo,[120,-45]).scale(359.513).clipAngle(55).center([-117.474,53.0628])}function Ic(){return Bl(Pa,[-20,-18]).scale(209.091).center([20,16.7214]).clipAngle(82)}function yf(){return Bl(af,[165,10]).scale(250).clipAngle(130).center([-165,-10])}function Bl(ve,be){var De=t.geoProjection(ad(ve)).rotate(be).clipAngle(90),Be=t.geoRotation(be),et=De.center;return delete De.rotate,De.center=function(We){return arguments.length?et(Be(We)):Be.invert(et())},De}var wh=G(6),Qf=G(7);function _f(ve,be){var De=O(7*_(be)/(3*wh));return[wh*ve*(2*o(2*De/3)-1)/Qf,9*_(De/3)/Qf]}_f.invert=function(ve,be){var De=3*O(be*Qf/9);return[ve*Qf/(wh*(2*o(2*De/3)-1)),O(_(De)*3*wh/7)]};function Yc(){return t.geoProjection(_f).scale(164.859)}function eh(ve,be){for(var De=(1+x)*_(be),Be=be,et=0,We;et<25&&(Be-=We=(_(Be/2)+_(Be)-De)/(.5*o(Be/2)+o(Be)),!(n(We)<p));et++);return[ve*(1+2*o(Be)/o(Be/2))/(3*C),2*G(3)*_(Be/2)/G(2+C)]}eh.invert=function(ve,be){var De=be*G(2+C)/(2*G(3)),Be=2*O(De);return[3*C*ve/(1+2*o(Be)/o(Be/2)),O((De+_(Be))/(1+x))]};function th(){return t.geoProjection(eh).scale(188.209)}function ju(ve,be){for(var De=G(6/(4+E)),Be=(1+E/4)*_(be),et=be/2,We=0,it;We<25&&(et-=it=(et/2+_(et)-Be)/(.5+o(et)),!(n(it)<p));We++);return[De*(.5+o(et))*ve/1.5,De*et]}ju.invert=function(ve,be){var De=G(6/(4+E)),Be=be/De;return n(n(Be)-S)<p&&(Be=Be<0?-S:S),[1.5*ve/(De*(.5+o(Be))),O((Be/2+_(Be))/(1+E/4))]};function Hf(){return t.geoProjection(ju).scale(166.518)}function cc(ve,be){var De=be*be,Be=De*De,et=De*Be;return[ve*(.84719-.13063*De+et*et*(-.04515+.05494*De-.02326*Be+.00331*et)),be*(1.01183+Be*Be*(-.02625+.01926*De-.00396*Be))]}cc.invert=function(ve,be){var De=be,Be=25,et,We,it,Ft;do We=De*De,it=We*We,De-=et=(De*(1.01183+it*it*(-.02625+.01926*We-.00396*it))-be)/(1.01183+it*it*(9*-.02625+11*.01926*We+13*-.00396*it));while(n(et)>k&&--Be>0);return We=De*De,it=We*We,Ft=We*it,[ve/(.84719-.13063*We+Ft*Ft*(-.04515+.05494*We-.02326*it+.00331*Ft)),De]};function of(){return t.geoProjection(cc).scale(175.295)}function Nl(ve,be){return[ve*(1+o(be))/2,2*(be-b(be/2))]}Nl.invert=function(ve,be){for(var De=be/2,Be=0,et=1/0;Be<10&&n(et)>p;++Be){var We=o(be/2);be-=et=(be-b(be/2)-De)/(1-.5/(We*We))}return[2*ve/(1+o(be)),be]};function Kc(){return t.geoProjection(Nl).scale(152.63)}var Rc=[[[[-180,0],[-90,90],[0,0]],[[0,0],[90,90],[180,0]]],[[[-180,0],[-90,-90],[0,0]],[[0,0],[90,-90],[180,0]]]];function gs(){return Do(Ze(1/0),Rc).rotate([20,0]).scale(152.63)}function jf(ve,be){var De=_(be),Be=o(be),et=v(ve);if(ve===0||n(be)===S)return[0,be];if(be===0)return[ve,0];if(n(ve)===S)return[ve*Be,S*De];var We=E/(2*ve)-2*ve/E,it=2*be/E,Ft=(1-it*it)/(De-it),Ht=We*We,tr=Ft*Ft,dr=1+Ht/tr,Sr=1+tr/Ht,Or=(We*De/Ft-We/2)/dr,Wr=(tr*De/Ht+Ft/2)/Sr,ni=Or*Or+Be*Be/dr,Pi=Wr*Wr-(tr*De*De/Ht+Ft*De-1)/Sr;return[S*(Or+G(ni)*et),S*(Wr+G(Pi<0?0:Pi)*v(-be*We)*et)]}jf.invert=function(ve,be){ve/=S,be/=S;var De=ve*ve,Be=be*be,et=De+Be,We=E*E;return[ve?(et-1+G((1-et)*(1-et)+4*De))/(2*ve)*S:0,Zt(function(it){return et*(E*_(it)-2*it)*E+4*it*it*(be-_(it))+2*E*it-We*be},0)]};function Gh(){return t.geoProjection(jf).scale(127.267)}var rh=1.0148,sf=.23185,Th=-.14499,Mu=.02406,ih=rh,js=5*sf,Eu=7*Th,Dc=9*Mu,ks=1.790857183;function bc(ve,be){var De=be*be;return[ve,be*(rh+De*De*(sf+De*(Th+Mu*De)))]}bc.invert=function(ve,be){be>ks?be=ks:be<-ks&&(be=-ks);var De=be,Be;do{var et=De*De;De-=Be=(De*(rh+et*et*(sf+et*(Th+Mu*et)))-be)/(ih+et*et*(js+et*(Eu+Dc*et)))}while(n(Be)>p);return[ve,De]};function hu(){return t.geoProjection(bc).scale(139.319)}function _u(ve,be){if(n(be)<p)return[ve,0];var De=b(be),Be=ve*_(be);return[_(Be)/De,be+(1-o(Be))/De]}_u.invert=function(ve,be){if(n(be)<p)return[ve,0];var De=ve*ve+be*be,Be=be*.5,et=10,We;do{var it=b(Be),Ft=1/o(Be),Ht=De-2*be*Be+Be*Be;Be-=We=(it*Ht+2*(Be-be))/(2+Ht*Ft*Ft+2*(Be-be)*it)}while(n(We)>p&&--et>0);return it=b(Be),[(n(be)<n(Be+1/it)?O(ve*it):v(be)*v(ve)*(V(n(ve*it))+S))/_(Be),Be]};function nl(){return t.geoProjection(_u).scale(103.74)}function nh(ve,be){var De=Fc(ve[1],ve[0]),Be=Fc(be[1],be[0]),et=bd(De,Be),We=wc(De)/wc(Be);return zu([1,0,ve[0][0],0,1,ve[0][1]],zu([We,0,0,0,We,0],zu([o(et),_(et),0,-_(et),o(et),0],[1,0,-be[0][0],0,1,-be[0][1]])))}function Ah(ve){var be=1/(ve[0]*ve[4]-ve[1]*ve[3]);return[be*ve[4],-be*ve[1],be*(ve[1]*ve[5]-ve[2]*ve[4]),-be*ve[3],be*ve[0],be*(ve[2]*ve[3]-ve[0]*ve[5])]}function zu(ve,be){return[ve[0]*be[0]+ve[1]*be[3],ve[0]*be[1]+ve[1]*be[4],ve[0]*be[2]+ve[1]*be[5]+ve[2],ve[3]*be[0]+ve[4]*be[3],ve[3]*be[1]+ve[4]*be[4],ve[3]*be[2]+ve[4]*be[5]+ve[5]]}function Fc(ve,be){return[ve[0]-be[0],ve[1]-be[1]]}function wc(ve){return G(ve[0]*ve[0]+ve[1]*ve[1])}function bd(ve,be){return a(ve[0]*be[1]-ve[1]*be[0],ve[0]*be[0]+ve[1]*be[1])}function xf(ve,be,De){Be(ve,{transform:null});function Be(tr,dr){if(tr.edges=jl(tr.face),dr.face){var Sr=tr.shared=bf(tr.face,dr.face),Or=nh(Sr.map(dr.project),Sr.map(tr.project));tr.transform=dr.transform?zu(dr.transform,Or):Or;for(var Wr=dr.edges,ni=0,Pi=Wr.length;ni<Pi;++ni)Ou(Sr[0],Wr[ni][1])&&Ou(Sr[1],Wr[ni][0])&&(Wr[ni]=tr),Ou(Sr[0],Wr[ni][0])&&Ou(Sr[1],Wr[ni][1])&&(Wr[ni]=tr);for(Wr=tr.edges,ni=0,Pi=Wr.length;ni<Pi;++ni)Ou(Sr[0],Wr[ni][0])&&Ou(Sr[1],Wr[ni][1])&&(Wr[ni]=dr),Ou(Sr[0],Wr[ni][1])&&Ou(Sr[1],Wr[ni][0])&&(Wr[ni]=dr)}else tr.transform=dr.transform;return tr.children&&tr.children.forEach(function(cn){Be(cn,tr)}),tr}function et(tr,dr){var Sr=be(tr,dr),Or=Sr.project([tr*P,dr*P]),Wr;return(Wr=Sr.transform)?[Wr[0]*Or[0]+Wr[1]*Or[1]+Wr[2],-(Wr[3]*Or[0]+Wr[4]*Or[1]+Wr[5])]:(Or[1]=-Or[1],Or)}lf(ve)&&(et.invert=function(tr,dr){var Sr=We(ve,[tr,-dr]);return Sr&&(Sr[0]*=T,Sr[1]*=T,Sr)});function We(tr,dr){var Sr=tr.project.invert,Or=tr.transform,Wr=dr;if(Or&&(Or=Ah(Or),Wr=[Or[0]*Wr[0]+Or[1]*Wr[1]+Or[2],Or[3]*Wr[0]+Or[4]*Wr[1]+Or[5]]),Sr&&tr===it(ni=Sr(Wr)))return ni;for(var ni,Pi=tr.children,cn=0,ln=Pi&&Pi.length;cn<ln;++cn)if(ni=We(Pi[cn],dr))return ni}function it(tr){return be(tr[0]*T,tr[1]*T)}var Ft=t.geoProjection(et),Ht=Ft.stream;return Ft.stream=function(tr){var dr=Ft.rotate(),Sr=Ht(tr),Or=(Ft.rotate([0,0]),Ht(tr));return Ft.rotate(dr),Sr.sphere=function(){Or.polygonStart(),Or.lineStart(),Pf(Or,ve),Or.lineEnd(),Or.polygonEnd()},Sr},Ft.angle(De==null?-30:De*P)}function Pf(ve,be,De){var Be,et=be.edges,We=et.length,it,Ft={type:"MultiPoint",coordinates:be.face},Ht=be.face.filter(function(Pi){return n(Pi[1])!==90}),tr=t.geoBounds({type:"MultiPoint",coordinates:Ht}),dr=!1,Sr=-1,Or=tr[1][0]-tr[0][0],Wr=Or===180||Or===360?[(tr[0][0]+tr[1][0])/2,(tr[0][1]+tr[1][1])/2]:t.geoCentroid(Ft);if(De)for(;++Sr<We&&et[Sr]!==De;);++Sr;for(var ni=0;ni<We;++ni)it=et[(ni+Sr)%We],Array.isArray(it)?(dr||(ve.point((Be=t.geoInterpolate(it[0],Wr)(p))[0],Be[1]),dr=!0),ve.point((Be=t.geoInterpolate(it[1],Wr)(p))[0],Be[1])):(dr=!1,it!==De&&Pf(ve,it,be))}function Ou(ve,be){return ve&&be&&ve[0]===be[0]&&ve[1]===be[1]}function bf(ve,be){for(var De,Be,et=ve.length,We=null,it=0;it<et;++it){De=ve[it];for(var Ft=be.length;--Ft>=0;)if(Be=be[Ft],De[0]===Be[0]&&De[1]===Be[1]){if(We)return[We,De];We=De}}}function jl(ve){for(var be=ve.length,De=[],Be=ve[be-1],et=0;et<be;++et)De.push([Be,Be=ve[et]]);return De}function lf(ve){return ve.project.invert||ve.children&&ve.children.some(lf)}var Hh=[[0,90],[-90,0],[0,0],[90,0],[180,0],[0,-90]],If=[[0,2,1],[0,3,2],[5,1,2],[5,2,3],[0,1,4],[0,4,3],[5,4,1],[5,3,4]].map(function(ve){return ve.map(function(be){return Hh[be]})});function Cs(ve){ve=ve||function(De){var Be=t.geoCentroid({type:"MultiPoint",coordinates:De});return t.geoGnomonic().scale(1).translate([0,0]).rotate([-Be[0],-Be[1]])};var be=If.map(function(De){return{face:De,project:ve(De)}});return[-1,0,0,1,0,1,4,5].forEach(function(De,Be){var et=be[De];et&&(et.children||(et.children=[])).push(be[Be])}),xf(be[0],function(De,Be){return be[De<-E/2?Be<0?6:4:De<0?Be<0?2:0:De<E/2?Be<0?3:1:Be<0?7:5]}).angle(-30).scale(101.858).center([0,45])}var du=2/G(3);function ku(ve,be){var De=xt(ve,be);return[De[0]*du,De[1]]}ku.invert=function(ve,be){return xt.invert(ve/du,be)};function Wf(ve){ve=ve||function(De){var Be=t.geoCentroid({type:"MultiPoint",coordinates:De});return t.geoProjection(ku).translate([0,0]).scale(1).rotate(Be[1]>0?[-Be[0],0]:[180-Be[0],180])};var be=If.map(function(De){return{face:De,project:ve(De)}});return[-1,0,0,1,0,1,4,5].forEach(function(De,Be){var et=be[De];et&&(et.children||(et.children=[])).push(be[Be])}),xf(be[0],function(De,Be){return be[De<-E/2?Be<0?6:4:De<0?Be<0?2:0:De<E/2?Be<0?3:1:Be<0?7:5]}).angle(-30).scale(121.906).center([0,48.5904])}function Us(ve){ve=ve||function(it){var Ft=it.length===6?t.geoCentroid({type:"MultiPoint",coordinates:it}):it[0];return t.geoGnomonic().scale(1).translate([0,0]).rotate([-Ft[0],-Ft[1]])};var be=If.map(function(it){for(var Ft=it.map(Rf),Ht=Ft.length,tr=Ft[Ht-1],dr,Sr=[],Or=0;Or<Ht;++Or)dr=Ft[Or],Sr.push(Wu([tr[0]*.9486832980505138+dr[0]*.31622776601683794,tr[1]*.9486832980505138+dr[1]*.31622776601683794,tr[2]*.9486832980505138+dr[2]*.31622776601683794]),Wu([dr[0]*.9486832980505138+tr[0]*.31622776601683794,dr[1]*.9486832980505138+tr[1]*.31622776601683794,dr[2]*.9486832980505138+tr[2]*.31622776601683794])),tr=dr;return Sr}),De=[],Be=[-1,0,0,1,0,1,4,5];be.forEach(function(it,Ft){for(var Ht=If[Ft],tr=Ht.length,dr=De[Ft]=[],Sr=0;Sr<tr;++Sr)be.push([Ht[Sr],it[(Sr*2+2)%(2*tr)],it[(Sr*2+1)%(2*tr)]]),Be.push(Ft),dr.push(zc(Rf(it[(Sr*2+2)%(2*tr)]),Rf(it[(Sr*2+1)%(2*tr)])))});var et=be.map(function(it){return{project:ve(it),face:it}});Be.forEach(function(it,Ft){var Ht=et[it];Ht&&(Ht.children||(Ht.children=[])).push(et[Ft])});function We(it,Ft){var Ht=o(Ft),tr=[Ht*o(it),Ht*_(it),_(Ft)],dr=it<-E/2?Ft<0?6:4:it<0?Ft<0?2:0:it<E/2?Ft<0?3:1:Ft<0?7:5,Sr=De[dr];return et[wf(Sr[0],tr)<0?8+3*dr:wf(Sr[1],tr)<0?8+3*dr+1:wf(Sr[2],tr)<0?8+3*dr+2:dr]}return xf(et[0],We).angle(-30).scale(110.625).center([0,45])}function wf(ve,be){for(var De=0,Be=ve.length,et=0;De<Be;++De)et+=ve[De]*be[De];return et}function zc(ve,be){return[ve[1]*be[2]-ve[2]*be[1],ve[2]*be[0]-ve[0]*be[2],ve[0]*be[1]-ve[1]*be[0]]}function Wu(ve){return[a(ve[1],ve[0])*P,O(c(-1,f(1,ve[2])))*P]}function Rf(ve){var be=ve[0]*T,De=ve[1]*T,Be=o(De);return[Be*o(be),Be*_(be),_(De)]}function Xu(){}function uf(ve){if((De=ve.length)<4)return!1;for(var be=0,De,Be=ve[De-1][1]*ve[0][0]-ve[De-1][0]*ve[0][1];++be<De;)Be+=ve[be-1][1]*ve[be][0]-ve[be-1][0]*ve[be][1];return Be<=0}function Xf(ve,be){for(var De=be[0],Be=be[1],et=!1,We=0,it=ve.length,Ft=it-1;We<it;Ft=We++){var Ht=ve[We],tr=Ht[0],dr=Ht[1],Sr=ve[Ft],Or=Sr[0],Wr=Sr[1];dr>Be^Wr>Be&&De<(Or-tr)*(Be-dr)/(Wr-dr)+tr&&(et=!et)}return et}function Wl(ve,be){var De=be.stream,Be;if(!De)throw new Error("invalid projection");switch(ve&&ve.type){case"Feature":Be=Zu;break;case"FeatureCollection":Be=ah;break;default:Be=Tc;break}return Be(ve,De)}function ah(ve,be){return{type:"FeatureCollection",features:ve.features.map(function(De){return Zu(De,be)})}}function Zu(ve,be){return{type:"Feature",id:ve.id,properties:ve.properties,geometry:Tc(ve.geometry,be)}}function Oc(ve,be){return{type:"GeometryCollection",geometries:ve.geometries.map(function(De){return Tc(De,be)})}}function Tc(ve,be){if(!ve)return null;if(ve.type==="GeometryCollection")return Oc(ve,be);var De;switch(ve.type){case"Point":De=qc;break;case"MultiPoint":De=qc;break;case"LineString":De=cf;break;case"MultiLineString":De=cf;break;case"Polygon":De=fc;break;case"MultiPolygon":De=fc;break;case"Sphere":De=fc;break;default:return null}return t.geoStream(ve,be(De)),De.result()}var wl=[],vu=[],qc={point:function(ve,be){wl.push([ve,be])},result:function(){var ve=wl.length?wl.length<2?{type:"Point",coordinates:wl[0]}:{type:"MultiPoint",coordinates:wl}:null;return wl=[],ve}},cf={lineStart:Xu,point:function(ve,be){wl.push([ve,be])},lineEnd:function(){wl.length&&(vu.push(wl),wl=[])},result:function(){var ve=vu.length?vu.length<2?{type:"LineString",coordinates:vu[0]}:{type:"MultiLineString",coordinates:vu}:null;return vu=[],ve}},fc={polygonStart:Xu,lineStart:Xu,point:function(ve,be){wl.push([ve,be])},lineEnd:function(){var ve=wl.length;if(ve){do wl.push(wl[0].slice());while(++ve<4);vu.push(wl),wl=[]}},polygonEnd:Xu,result:function(){if(!vu.length)return null;var ve=[],be=[];return vu.forEach(function(De){uf(De)?ve.push([De]):be.push(De)}),be.forEach(function(De){var Be=De[0];ve.some(function(et){if(Xf(et[0],Be))return et.push(De),!0})||ve.push([De])}),vu=[],ve.length?ve.length>1?{type:"MultiPolygon",coordinates:ve}:{type:"Polygon",coordinates:ve[0]}:null}};function Bc(ve){var be=ve(S,0)[0]-ve(-S,0)[0];function De(Be,et){var We=n(Be)<S,it=ve(We?Be:Be>0?Be-E:Be+E,et),Ft=(it[0]-it[1])*x,Ht=(it[0]+it[1])*x;if(We)return[Ft,Ht];var tr=be*x,dr=Ft>0^Ht>0?-1:1;return[dr*Ft-v(Ht)*tr,dr*Ht-v(Ft)*tr]}return ve.invert&&(De.invert=function(Be,et){var We=(Be+et)*x,it=(et-Be)*x,Ft=n(We)<.5*be&&n(it)<.5*be;if(!Ft){var Ht=be*x,tr=We>0^it>0?-1:1,dr=-tr*Be+(it>0?1:-1)*Ht,Sr=-tr*et+(We>0?1:-1)*Ht;We=(-dr-Sr)*x,it=(dr-Sr)*x}var Or=ve.invert(We,it);return Ft||(Or[0]+=We>0?E:-E),Or}),t.geoProjection(De).rotate([-90,-90,45]).clipAngle(180-.001)}function At(){return Bc(Li).scale(176.423)}function Wt(){return Bc(Lo).scale(111.48)}function Cr(ve,be){if(!(0<=(be=+be)&&be<=20))throw new Error("invalid digits");function De(tr){var dr=tr.length,Sr=2,Or=new Array(dr);for(Or[0]=+tr[0].toFixed(be),Or[1]=+tr[1].toFixed(be);Sr<dr;)Or[Sr]=tr[Sr],++Sr;return Or}function Be(tr){return tr.map(De)}function et(tr){for(var dr=De(tr[0]),Sr=[dr],Or=1;Or<tr.length;Or++){var Wr=De(tr[Or]);(Wr.length>2||Wr[0]!=dr[0]||Wr[1]!=dr[1])&&(Sr.push(Wr),dr=Wr)}return Sr.length===1&&tr.length>1&&Sr.push(De(tr[tr.length-1])),Sr}function We(tr){return tr.map(et)}function it(tr){if(tr==null)return tr;var dr;switch(tr.type){case"GeometryCollection":dr={type:"GeometryCollection",geometries:tr.geometries.map(it)};break;case"Point":dr={type:"Point",coordinates:De(tr.coordinates)};break;case"MultiPoint":dr={type:tr.type,coordinates:Be(tr.coordinates)};break;case"LineString":dr={type:tr.type,coordinates:et(tr.coordinates)};break;case"MultiLineString":case"Polygon":dr={type:tr.type,coordinates:We(tr.coordinates)};break;case"MultiPolygon":dr={type:"MultiPolygon",coordinates:tr.coordinates.map(We)};break;default:return tr}return tr.bbox!=null&&(dr.bbox=tr.bbox),dr}function Ft(tr){var dr={type:"Feature",properties:tr.properties,geometry:it(tr.geometry)};return tr.id!=null&&(dr.id=tr.id),tr.bbox!=null&&(dr.bbox=tr.bbox),dr}if(ve!=null)switch(ve.type){case"Feature":return Ft(ve);case"FeatureCollection":{var Ht={type:"FeatureCollection",features:ve.features.map(Ft)};return ve.bbox!=null&&(Ht.bbox=ve.bbox),Ht}default:return it(ve)}return ve}function Ar(ve){var be=_(ve);function De(Be,et){var We=be?b(Be*be/2)/be:Be/2;if(!et)return[2*We,-ve];var it=2*i(We*_(et)),Ft=1/b(et);return[_(it)*Ft,et+(1-o(it))*Ft-ve]}return De.invert=function(Be,et){if(n(et+=ve)<p)return[be?2*i(be*Be/2)/be:Be,0];var We=Be*Be+et*et,it=0,Ft=10,Ht;do{var tr=b(it),dr=1/o(it),Sr=We-2*et*it+it*it;it-=Ht=(tr*Sr+2*(it-et))/(2+Sr*dr*dr+2*(it-et)*tr)}while(n(Ht)>p&&--Ft>0);var Or=Be*(tr=b(it)),Wr=b(n(et)<n(it+1/tr)?O(Or)*.5:V(Or)*.5+E/4)/_(it);return[be?2*i(be*Wr)/be:2*Wr,it]},De}function Kr(){return Vt(Ar).scale(131.215)}var ki=[[.9986,-.062],[1,0],[.9986,.062],[.9954,.124],[.99,.186],[.9822,.248],[.973,.31],[.96,.372],[.9427,.434],[.9216,.4958],[.8962,.5571],[.8679,.6176],[.835,.6769],[.7986,.7346],[.7597,.7903],[.7186,.8435],[.6732,.8936],[.6213,.9394],[.5722,.9761],[.5322,1]];ki.forEach(function(ve){ve[1]*=1.0144});function Xi(ve,be){var De=f(18,n(be)*36/E),Be=l(De),et=De-Be,We=(Sr=ki[Be])[0],it=Sr[1],Ft=(Sr=ki[++Be])[0],Ht=Sr[1],tr=(Sr=ki[f(19,++Be)])[0],dr=Sr[1],Sr;return[ve*(Ft+et*(tr-We)/2+et*et*(tr-2*Ft+We)/2),(be>0?S:-S)*(Ht+et*(dr-it)/2+et*et*(dr-2*Ht+it)/2)]}Xi.invert=function(ve,be){var De=be/S,Be=De*90,et=f(18,n(Be/5)),We=c(0,l(et));do{var it=ki[We][1],Ft=ki[We+1][1],Ht=ki[f(19,We+2)][1],tr=Ht-it,dr=Ht-2*Ft+it,Sr=2*(n(De)-Ft)/tr,Or=dr/tr,Wr=Sr*(1-Or*Sr*(1-2*Or*Sr));if(Wr>=0||We===1){Be=(be>=0?5:-5)*(Wr+et);var ni=50,Pi;do et=f(18,n(Be)/5),We=l(et),Wr=et-We,it=ki[We][1],Ft=ki[We+1][1],Ht=ki[f(19,We+2)][1],Be-=(Pi=(be>=0?S:-S)*(Ft+Wr*(Ht-it)/2+Wr*Wr*(Ht-2*Ft+it)/2)-be)*P;while(n(Pi)>k&&--ni>0);break}}while(--We>=0);var cn=ki[We][0],ln=ki[We+1][0],Cn=ki[f(19,We+2)][0];return[ve/(ln+Wr*(Cn-cn)/2+Wr*Wr*(Cn-2*ln+cn)/2),Be*T]};function dn(){return t.geoProjection(Xi).scale(152.63)}function wn(ve){function be(De,Be){var et=o(Be),We=(ve-1)/(ve-et*o(De));return[We*et*_(De),We*_(Be)]}return be.invert=function(De,Be){var et=De*De+Be*Be,We=G(et),it=(ve-G(1-et*(ve+1)/(ve-1)))/((ve-1)/We+We/(ve-1));return[a(De*it,We*G(1-it*it)),We?O(Be*it/We):0]},be}function Nn(ve,be){var De=wn(ve);if(!be)return De;var Be=o(be),et=_(be);function We(it,Ft){var Ht=De(it,Ft),tr=Ht[1],dr=tr*et/(ve-1)+Be;return[Ht[0]*Be/dr,tr/dr]}return We.invert=function(it,Ft){var Ht=(ve-1)/(ve-1-Ft*et);return De.invert(Ht*it,Ht*Ft*Be)},We}function Yi(){var ve=2,be=0,De=t.geoProjectionMutator(Nn),Be=De(ve,be);return Be.distance=function(et){return arguments.length?De(ve=+et,be):ve},Be.tilt=function(et){return arguments.length?De(ve,be=et*T):be*P},Be.scale(432.147).clipAngle(V(1/ve)*P-1e-6)}var Qi=1e-4,on=1e4,Fi=-180,$n=Fi+Qi,Ca=180,Ra=Ca-Qi,La=-90,Na=La+Qi,Yn=90,Dn=Yn-Qi;function Ka(ve){return ve.length>0}function bo(ve){return Math.floor(ve*on)/on}function Xo(ve){return ve===La||ve===Yn?[0,ve]:[Fi,bo(ve)]}function Ss(ve){var be=ve[0],De=ve[1],Be=!1;return be<=$n?(be=Fi,Be=!0):be>=Ra&&(be=Ca,Be=!0),De<=Na?(De=La,Be=!0):De>=Dn&&(De=Yn,Be=!0),Be?[be,De]:ve}function as(ve){return ve.map(Ss)}function ws(ve,be,De){for(var Be=0,et=ve.length;Be<et;++Be){var We=ve[Be].slice();De.push({index:-1,polygon:be,ring:We});for(var it=0,Ft=We.length;it<Ft;++it){var Ht=We[it],tr=Ht[0],dr=Ht[1];if(tr<=$n||tr>=Ra||dr<=Na||dr>=Dn){We[it]=Ss(Ht);for(var Sr=it+1;Sr<Ft;++Sr){var Or=We[Sr],Wr=Or[0],ni=Or[1];if(Wr>$n&&Wr<Ra&&ni>Na&&ni<Dn)break}if(Sr===it+1)continue;if(it){var Pi={index:-1,polygon:be,ring:We.slice(0,it+1)};Pi.ring[Pi.ring.length-1]=Xo(dr),De[De.length-1]=Pi}else De.pop();if(Sr>=Ft)break;De.push({index:-1,polygon:be,ring:We=We.slice(Sr-1)}),We[0]=Xo(We[0][1]),it=-1,Ft=We.length}}}}function Ho(ve){var be,De=ve.length,Be={},et={},We,it,Ft,Ht,tr;for(be=0;be<De;++be){if(We=ve[be],it=We.ring[0],Ht=We.ring[We.ring.length-1],it[0]===Ht[0]&&it[1]===Ht[1]){We.polygon.push(We.ring),ve[be]=null;continue}We.index=be,Be[it]=et[Ht]=We}for(be=0;be<De;++be)if(We=ve[be],We){if(it=We.ring[0],Ht=We.ring[We.ring.length-1],Ft=et[it],tr=Be[Ht],delete Be[it],delete et[Ht],it[0]===Ht[0]&&it[1]===Ht[1]){We.polygon.push(We.ring);continue}Ft?(delete et[it],delete Be[Ft.ring[0]],Ft.ring.pop(),ve[Ft.index]=null,We={index:-1,polygon:Ft.polygon,ring:Ft.ring.concat(We.ring)},Ft===tr?We.polygon.push(We.ring):(We.index=De++,ve.push(Be[We.ring[0]]=et[We.ring[We.ring.length-1]]=We))):tr?(delete Be[Ht],delete et[tr.ring[tr.ring.length-1]],We.ring.pop(),We={index:De++,polygon:tr.polygon,ring:We.ring.concat(tr.ring)},ve[tr.index]=null,ve.push(Be[We.ring[0]]=et[We.ring[We.ring.length-1]]=We)):(We.ring.push(We.ring[0]),We.polygon.push(We.ring))}}function ml(ve){var be={type:"Feature",geometry:Ws(ve.geometry)};return ve.id!=null&&(be.id=ve.id),ve.bbox!=null&&(be.bbox=ve.bbox),ve.properties!=null&&(be.properties=ve.properties),be}function Ws(ve){if(ve==null)return ve;var be,De,Be,et;switch(ve.type){case"GeometryCollection":be={type:"GeometryCollection",geometries:ve.geometries.map(Ws)};break;case"Point":be={type:"Point",coordinates:Ss(ve.coordinates)};break;case"MultiPoint":case"LineString":be={type:ve.type,coordinates:as(ve.coordinates)};break;case"MultiLineString":be={type:"MultiLineString",coordinates:ve.coordinates.map(as)};break;case"Polygon":{var We=[];ws(ve.coordinates,We,De=[]),Ho(De),be={type:"Polygon",coordinates:We};break}case"MultiPolygon":{De=[],Be=-1,et=ve.coordinates.length;for(var it=new Array(et);++Be<et;)ws(ve.coordinates[Be],it[Be]=[],De);Ho(De),be={type:"MultiPolygon",coordinates:it.filter(Ka)};break}default:return ve}return ve.bbox!=null&&(be.bbox=ve.bbox),be}function Ls(ve){if(ve==null)return ve;switch(ve.type){case"Feature":return ml(ve);case"FeatureCollection":{var be={type:"FeatureCollection",features:ve.features.map(ml)};return ve.bbox!=null&&(be.bbox=ve.bbox),be}default:return Ws(ve)}}function va(ve,be){var De=b(be/2),Be=_(L*De);return[ve*(.74482-.34588*Be*Be),1.70711*De]}va.invert=function(ve,be){var De=be/1.70711,Be=_(L*De);return[ve/(.74482-.34588*Be*Be),2*i(De)]};function no(){return t.geoProjection(va).scale(146.153)}function ys(ve,be,De){var Be=t.geoInterpolate(be,De),et=Be(.5),We=t.geoRotation([-et[0],-et[1]])(be),it=Be.distance/2,Ft=-O(_(We[1]*T)/_(it)),Ht=[-et[0],-et[1],-(We[0]>0?E-Ft:Ft)*P],tr=t.geoProjection(ve(it)).rotate(Ht),dr=t.geoRotation(Ht),Sr=tr.center;return delete tr.rotate,tr.center=function(Or){return arguments.length?Sr(dr(Or)):dr.invert(Sr())},tr.clipAngle(90)}function rs(ve){var be=o(ve);function De(Be,et){var We=t.geoGnomonicRaw(Be,et);return We[0]*=be,We}return De.invert=function(Be,et){return t.geoGnomonicRaw.invert(Be/be,et)},De}function $l(){return Cu([-158,21.5],[-77,39]).clipAngle(60).scale(400)}function Cu(ve,be){return ys(rs,ve,be)}function Yu(ve){if(!(ve*=2))return t.geoAzimuthalEquidistantRaw;var be=-ve/2,De=-be,Be=ve*ve,et=b(De),We=.5/_(De);function it(Ft,Ht){var tr=V(o(Ht)*o(Ft-be)),dr=V(o(Ht)*o(Ft-De)),Sr=Ht<0?-1:1;return tr*=tr,dr*=dr,[(tr-dr)/(2*ve),Sr*G(4*Be*dr-(Be-tr+dr)*(Be-tr+dr))/(2*ve)]}return it.invert=function(Ft,Ht){var tr=Ht*Ht,dr=o(G(tr+(Or=Ft+be)*Or)),Sr=o(G(tr+(Or=Ft+De)*Or)),Or,Wr;return[a(Wr=dr-Sr,Or=(dr+Sr)*et),(Ht<0?-1:1)*V(G(Or*Or+Wr*Wr)*We)]},it}function Nc(){return pu([-158,21.5],[-77,39]).clipAngle(130).scale(122.571)}function pu(ve,be){return ys(Yu,ve,be)}function Uc(ve,be){if(n(be)<p)return[ve,0];var De=n(be/S),Be=O(De);if(n(ve)<p||n(n(be)-S)<p)return[0,v(be)*E*b(Be/2)];var et=o(Be),We=n(E/ve-ve/E)/2,it=We*We,Ft=et/(De+et-1),Ht=Ft*(2/De-1),tr=Ht*Ht,dr=tr+it,Sr=Ft-tr,Or=it+Ft;return[v(ve)*E*(We*Sr+G(it*Sr*Sr-dr*(Ft*Ft-tr)))/dr,v(be)*E*(Ht*Or-We*G((it+1)*dr-Or*Or))/dr]}Uc.invert=function(ve,be){if(n(be)<p)return[ve,0];if(n(ve)<p)return[0,S*_(2*i(be/E))];var De=(ve/=E)*ve,Be=(be/=E)*be,et=De+Be,We=et*et,it=-n(be)*(1+et),Ft=it-2*Be+De,Ht=-2*it+1+2*Be+We,tr=Be/Ht+(2*Ft*Ft*Ft/(Ht*Ht*Ht)-9*it*Ft/(Ht*Ht))/27,dr=(it-Ft*Ft/(3*Ht))/Ht,Sr=2*G(-dr/3),Or=V(3*tr/(dr*Sr))/3;return[E*(et-1+G(1+2*(De-Be)+We))/(2*ve),v(be)*E*(-Sr*o(Or+E/3)-Ft/(3*Ht))]};function xu(){return t.geoProjection(Uc).scale(79.4183)}function Ac(ve,be){if(n(be)<p)return[ve,0];var De=n(be/S),Be=O(De);if(n(ve)<p||n(n(be)-S)<p)return[0,v(be)*E*b(Be/2)];var et=o(Be),We=n(E/ve-ve/E)/2,it=We*We,Ft=et*(G(1+it)-We*et)/(1+it*De*De);return[v(ve)*E*Ft,v(be)*E*G(1-Ft*(2*We+Ft))]}Ac.invert=function(ve,be){if(!ve)return[0,S*_(2*i(be/E))];var De=n(ve/E),Be=(1-De*De-(be/=E)*be)/(2*De),et=Be*Be,We=G(et+1);return[v(ve)*E*(We-Be),v(be)*S*_(2*a(G((1-2*Be*De)*(Be+We)-De),G(We+Be+De)))]};function Ua(){return t.geoProjection(Ac).scale(79.4183)}function oo(ve,be){if(n(be)<p)return[ve,0];var De=be/S,Be=O(De);if(n(ve)<p||n(n(be)-S)<p)return[0,E*b(Be/2)];var et=(E/ve-ve/E)/2,We=De/(1+o(Be));return[E*(v(ve)*G(et*et+1-We*We)-et),E*We]}oo.invert=function(ve,be){if(!be)return[ve,0];var De=be/E,Be=(E*E*(1-De*De)-ve*ve)/(2*E*ve);return[ve?E*(v(ve)*G(Be*Be+1)-Be):0,S*_(2*i(De))]};function Vc(){return t.geoProjection(oo).scale(79.4183)}function hc(ve,be){if(!be)return[ve,0];var De=n(be);if(!ve||De===S)return[0,be];var Be=De/S,et=Be*Be,We=(8*Be-et*(et+2)-5)/(2*et*(Be-1)),it=We*We,Ft=Be*We,Ht=et+it+2*Ft,tr=Be+3*We,dr=ve/S,Sr=dr+1/dr,Or=v(n(ve)-S)*G(Sr*Sr-4),Wr=Or*Or,ni=Ht*(et+it*Wr-1)+(1-et)*(et*(tr*tr+4*it)+12*Ft*it+4*it*it),Pi=(Or*(Ht+it-1)+2*G(ni))/(4*Ht+Wr);return[v(ve)*S*Pi,v(be)*S*G(1+Or*n(Pi)-Pi*Pi)]}hc.invert=function(ve,be){var De;if(!ve||!be)return[ve,be];be/=E;var Be=v(ve)*ve/S,et=(Be*Be-1+4*be*be)/n(Be),We=et*et,it=2*be,Ft=50;do{var Ht=it*it,tr=(8*it-Ht*(Ht+2)-5)/(2*Ht*(it-1)),dr=(3*it-Ht*it-10)/(2*Ht*it),Sr=tr*tr,Or=it*tr,Wr=it+tr,ni=Wr*Wr,Pi=it+3*tr,cn=ni*(Ht+Sr*We-1)+(1-Ht)*(Ht*(Pi*Pi+4*Sr)+Sr*(12*Or+4*Sr)),ln=-2*Wr*(4*Or*Sr+(1-4*Ht+3*Ht*Ht)*(1+dr)+Sr*(-6+14*Ht-We+(-8+8*Ht-2*We)*dr)+Or*(-8+12*Ht+(-10+10*Ht-We)*dr)),Cn=G(cn),Kn=et*(ni+Sr-1)+2*Cn-Be*(4*ni+We),Ta=et*(2*tr*dr+2*Wr*(1+dr))+ln/Cn-8*Wr*(et*(-1+Sr+ni)+2*Cn)*(1+dr)/(We+4*ni);it-=De=Kn/Ta}while(De>p&&--Ft>0);return[v(ve)*(G(et*et+4)+et)*E/4,S*it]};function Ku(){return t.geoProjection(hc).scale(127.16)}function ue(ve,be,De,Be,et){function We(it,Ft){var Ht=De*_(Be*Ft),tr=G(1-Ht*Ht),dr=G(2/(1+tr*o(it*=et)));return[ve*tr*dr*_(it),be*Ht*dr]}return We.invert=function(it,Ft){var Ht=it/ve,tr=Ft/be,dr=G(Ht*Ht+tr*tr),Sr=2*O(dr/2);return[a(it*b(Sr),ve*dr)/et,dr&&O(Ft*_(Sr)/(be*De*dr))/Be]},We}function w(ve,be,De,Be){var et=E/3;ve=c(ve,p),be=c(be,p),ve=f(ve,S),be=f(be,E-p),De=c(De,0),De=f(De,100-p),Be=c(Be,p);var We=De/100+1,it=Be/100,Ft=V(We*o(et))/et,Ht=_(ve)/_(Ft*S),tr=be/E,dr=G(it*_(ve/2)/_(be/2)),Sr=dr/G(tr*Ht*Ft),Or=1/(dr*G(tr*Ht*Ft));return ue(Sr,Or,Ht,Ft,tr)}function B(){var ve=65*T,be=60*T,De=20,Be=200,et=t.geoProjectionMutator(w),We=et(ve,be,De,Be);return We.poleline=function(it){return arguments.length?et(ve=+it*T,be,De,Be):ve*P},We.parallels=function(it){return arguments.length?et(ve,be=+it*T,De,Be):be*P},We.inflation=function(it){return arguments.length?et(ve,be,De=+it,Be):De},We.ratio=function(it){return arguments.length?et(ve,be,De,Be=+it):Be},We.scale(163.775)}function Q(){return B().poleline(65).parallels(60).inflation(0).ratio(200).scale(172.633)}var ee=4*E+3*G(3),le=2*G(2*E*G(3)/ee),qe=Jt(le*G(3)/E,le,ee/6);function Xe(){return t.geoProjection(qe).scale(176.84)}function ot(ve,be){return[ve*G(1-3*be*be/(E*E)),be]}ot.invert=function(ve,be){return[ve/G(1-3*be*be/(E*E)),be]};function Tt(){return t.geoProjection(ot).scale(152.63)}function Yt(ve,be){var De=o(be),Be=o(ve)*De,et=1-Be,We=o(ve=a(_(ve)*De,-_(be))),it=_(ve);return De=G(1-Be*Be),[it*De-We*et,-We*De-it*et]}Yt.invert=function(ve,be){var De=(ve*ve+be*be)/-2,Be=G(-De*(2+De)),et=be*De+ve*Be,We=ve*De-be*Be,it=G(We*We+et*et);return[a(Be*et,it*(1+De)),it?-O(Be*We/it):0]};function Kt(){return t.geoProjection(Yt).rotate([0,-90,45]).scale(124.75).clipAngle(180-.001)}function xr(ve,be){var De=Ee(ve,be);return[(De[0]+ve/S)/2,(De[1]+be)/2]}xr.invert=function(ve,be){var De=ve,Be=be,et=25;do{var We=o(Be),it=_(Be),Ft=_(2*Be),Ht=it*it,tr=We*We,dr=_(De),Sr=o(De/2),Or=_(De/2),Wr=Or*Or,ni=1-tr*Sr*Sr,Pi=ni?V(We*Sr)*G(cn=1/ni):cn=0,cn,ln=.5*(2*Pi*We*Or+De/S)-ve,Cn=.5*(Pi*it+Be)-be,Kn=.5*cn*(tr*Wr+Pi*We*Sr*Ht)+.5/S,Ta=cn*(dr*Ft/4-Pi*it*Or),fa=.125*cn*(Ft*Or-Pi*it*tr*dr),$a=.5*cn*(Ht*Sr+Pi*Wr*We)+.5,Co=Ta*fa-$a*Kn,Qa=(Cn*Ta-ln*$a)/Co,mo=(ln*fa-Cn*Kn)/Co;De-=Qa,Be-=mo}while((n(Qa)>p||n(mo)>p)&&--et>0);return[De,Be]};function Ir(){return t.geoProjection(xr).scale(158.837)}e.geoNaturalEarth=t.geoNaturalEarth1,e.geoNaturalEarthRaw=t.geoNaturalEarth1Raw,e.geoAiry=_e,e.geoAiryRaw=oe,e.geoAitoff=Ce,e.geoAitoffRaw=Ee,e.geoArmadillo=ie,e.geoArmadilloRaw=me,e.geoAugust=Le,e.geoAugustRaw=Se,e.geoBaker=ge,e.geoBakerRaw=Pe,e.geoBerghaus=ce,e.geoBerghausRaw=Re,e.geoBertin1953=Gt,e.geoBertin1953Raw=lt,e.geoBoggs=Qt,e.geoBoggsRaw=St,e.geoBonne=er,e.geoBonneRaw=mt,e.geoBottomley=Tr,e.geoBottomleyRaw=lr,e.geoBromley=ti,e.geoBromleyRaw=Lr,e.geoChamberlin=tt,e.geoChamberlinRaw=Ge,e.geoChamberlinAfrica=je,e.geoCollignon=Ie,e.geoCollignonRaw=xt,e.geoCraig=ke,e.geoCraigRaw=xe,e.geoCraster=ar,e.geoCrasterRaw=ir,e.geoCylindricalEqualArea=ii,e.geoCylindricalEqualAreaRaw=vr,e.geoCylindricalStereographic=$r,e.geoCylindricalStereographicRaw=pi,e.geoEckert1=ji,e.geoEckert1Raw=di,e.geoEckert2=wi,e.geoEckert2Raw=In,e.geoEckert3=qn,e.geoEckert3Raw=On,e.geoEckert4=ra,e.geoEckert4Raw=Fn,e.geoEckert5=Ut,e.geoEckert5Raw=la,e.geoEckert6=rr,e.geoEckert6Raw=wt,e.geoEisenlohr=Xr,e.geoEisenlohrRaw=Er,e.geoFahey=Oi,e.geoFaheyRaw=Qr,e.geoFoucaut=tn,e.geoFoucautRaw=$i,e.geoFoucautSinusoidal=yn,e.geoFoucautSinusoidalRaw=fn,e.geoGilbert=ua,e.geoGingery=Wo,e.geoGingeryRaw=ma,e.geoGinzburg4=Ha,e.geoGinzburg4Raw=Wn,e.geoGinzburg5=jn,e.geoGinzburg5Raw=vo,e.geoGinzburg6=kr,e.geoGinzburg6Raw=Mt,e.geoGinzburg8=vi,e.geoGinzburg8Raw=Jr,e.geoGinzburg9=An,e.geoGinzburg9Raw=hn,e.geoGringorten=Jn,e.geoGringortenRaw=Li,e.geoGuyou=Fs,e.geoGuyouRaw=Lo,e.geoHammer=pt,e.geoHammerRaw=Ze,e.geoHammerRetroazimuthal=zl,e.geoHammerRetroazimuthalRaw=ll,e.geoHealpix=Zl,e.geoHealpixRaw=cl,e.geoHill=nc,e.geoHillRaw=Su,e.geoHomolosine=xo,e.geoHomolosineRaw=Ol,e.geoHufnagel=Ns,e.geoHufnagelRaw=Yl,e.geoHyperelliptical=Oo,e.geoHyperellipticalRaw=aa,e.geoInterrupt=Do,e.geoInterruptedBoggs=Uf,e.geoInterruptedHomolosine=Zc,e.geoInterruptedMollweide=Os,e.geoInterruptedMollweideHemispheres=oc,e.geoInterruptedSinuMollweide=sc,e.geoInterruptedSinusoidal=Lf,e.geoKavrayskiy7=nf,e.geoKavrayskiy7Raw=cs,e.geoLagrange=Jl,e.geoLagrangeRaw=Vf,e.geoLarrivee=Fu,e.geoLarriveeRaw=lc,e.geoLaskowski=Hs,e.geoLaskowskiRaw=Es,e.geoLittrow=ps,e.geoLittrowRaw=Go,e.geoLoximuthal=xl,e.geoLoximuthalRaw=uc,e.geoMiller=qs,e.geoMillerRaw=Gu,e.geoModifiedStereographic=Bl,e.geoModifiedStereographicRaw=ad,e.geoModifiedStereographicAlaska=Hu,e.geoModifiedStereographicGs48=bl,e.geoModifiedStereographicGs50=Gf,e.geoModifiedStereographicMiller=Ic,e.geoModifiedStereographicLee=yf,e.geoMollweide=wr,e.geoMollweideRaw=sr,e.geoMtFlatPolarParabolic=Yc,e.geoMtFlatPolarParabolicRaw=_f,e.geoMtFlatPolarQuartic=th,e.geoMtFlatPolarQuarticRaw=eh,e.geoMtFlatPolarSinusoidal=Hf,e.geoMtFlatPolarSinusoidalRaw=ju,e.geoNaturalEarth2=of,e.geoNaturalEarth2Raw=cc,e.geoNellHammer=Kc,e.geoNellHammerRaw=Nl,e.geoInterruptedQuarticAuthalic=gs,e.geoNicolosi=Gh,e.geoNicolosiRaw=jf,e.geoPatterson=hu,e.geoPattersonRaw=bc,e.geoPolyconic=nl,e.geoPolyconicRaw=_u,e.geoPolyhedral=xf,e.geoPolyhedralButterfly=Cs,e.geoPolyhedralCollignon=Wf,e.geoPolyhedralWaterman=Us,e.geoProject=Wl,e.geoGringortenQuincuncial=At,e.geoPeirceQuincuncial=Wt,e.geoPierceQuincuncial=Wt,e.geoQuantize=Cr,e.geoQuincuncial=Bc,e.geoRectangularPolyconic=Kr,e.geoRectangularPolyconicRaw=Ar,e.geoRobinson=dn,e.geoRobinsonRaw=Xi,e.geoSatellite=Yi,e.geoSatelliteRaw=Nn,e.geoSinuMollweide=Vu,e.geoSinuMollweideRaw=_a,e.geoSinusoidal=It,e.geoSinusoidalRaw=_t,e.geoStitch=Ls,e.geoTimes=no,e.geoTimesRaw=va,e.geoTwoPointAzimuthal=Cu,e.geoTwoPointAzimuthalRaw=rs,e.geoTwoPointAzimuthalUsa=$l,e.geoTwoPointEquidistant=pu,e.geoTwoPointEquidistantRaw=Yu,e.geoTwoPointEquidistantUsa=Nc,e.geoVanDerGrinten=xu,e.geoVanDerGrintenRaw=Uc,e.geoVanDerGrinten2=Ua,e.geoVanDerGrinten2Raw=Ac,e.geoVanDerGrinten3=Vc,e.geoVanDerGrinten3Raw=oo,e.geoVanDerGrinten4=Ku,e.geoVanDerGrinten4Raw=hc,e.geoWagner=B,e.geoWagner7=Q,e.geoWagnerRaw=w,e.geoWagner4=Xe,e.geoWagner4Raw=qe,e.geoWagner6=Tt,e.geoWagner6Raw=ot,e.geoWiechel=Kt,e.geoWiechelRaw=Yt,e.geoWinkel3=Ir,e.geoWinkel3Raw=xr,Object.defineProperty(e,"__esModule",{value:!0})})});var zDe=ye((xmr,FDe)=>{"use strict";var id=Oa(),DZ=Dr(),Mzt=qa(),$A=Math.PI/180,W2=180/Math.PI,zZ={cursor:"pointer"},OZ={cursor:"auto"};function Ezt(e,t){var r=e.projection,n;return t._isScoped?n=kzt:t._isClipped?n=Lzt:n=Czt,n(e,r)}FDe.exports=Ezt;function qZ(e,t){return id.behavior.zoom().translate(t.translate()).scale(t.scale())}function BZ(e,t,r){var n=e.id,i=e.graphDiv,a=i.layout,o=a[n],s=i._fullLayout,l=s[n],u={},c={};function f(h,d){u[n+"."+h]=DZ.nestedProperty(o,h).get(),Mzt.call("_storeDirectGUIEdit",a,s._preGUI,u);var v=DZ.nestedProperty(l,h);v.get()!==d&&(v.set(d),DZ.nestedProperty(o,h).set(d),c[n+"."+h]=d)}r(f),f("projection.scale",t.scale()/e.fitScale),f("fitbounds",!1),i.emit("plotly_relayout",c)}function kzt(e,t){var r=qZ(e,t);function n(){id.select(this).style(zZ)}function i(){t.scale(id.event.scale).translate(id.event.translate),e.render(!0);var s=t.invert(e.midPt);e.graphDiv.emit("plotly_relayouting",{"geo.projection.scale":t.scale()/e.fitScale,"geo.center.lon":s[0],"geo.center.lat":s[1]})}function a(s){var l=t.invert(e.midPt);s("center.lon",l[0]),s("center.lat",l[1])}function o(){id.select(this).style(OZ),BZ(e,t,a)}return r.on("zoomstart",n).on("zoom",i).on("zoomend",o),r}function Czt(e,t){var r=qZ(e,t),n=2,i,a,o,s,l,u,c,f,h;function d(E){return t.invert(E)}function v(E){var S=d(E);if(!S)return!0;var L=t(S);return Math.abs(L[0]-E[0])>n||Math.abs(L[1]-E[1])>n}function _(){id.select(this).style(zZ),i=id.mouse(this),a=t.rotate(),o=t.translate(),s=a,l=d(i)}function b(){if(u=id.mouse(this),v(i)){r.scale(t.scale()),r.translate(t.translate());return}t.scale(id.event.scale),t.translate([o[0],id.event.translate[1]]),l?d(u)&&(f=d(u),c=[s[0]+(f[0]-l[0]),a[1],a[2]],t.rotate(c),s=c):(i=u,l=d(i)),h=!0,e.render(!0);var E=t.rotate(),S=t.invert(e.midPt);e.graphDiv.emit("plotly_relayouting",{"geo.projection.scale":t.scale()/e.fitScale,"geo.center.lon":S[0],"geo.center.lat":S[1],"geo.projection.rotation.lon":-E[0]})}function p(){id.select(this).style(OZ),h&&BZ(e,t,k)}function k(E){var S=t.rotate(),L=t.invert(e.midPt);E("projection.rotation.lon",-S[0]),E("center.lon",L[0]),E("center.lat",L[1])}return r.on("zoomstart",_).on("zoom",b).on("zoomend",p),r}function Lzt(e,t){var r={r:t.rotate(),k:t.scale()},n=qZ(e,t),i=qzt(n,"zoomstart","zoom","zoomend"),a=0,o=n.on,s;n.on("zoomstart",function(){id.select(this).style(zZ);var h=id.mouse(this),d=t.rotate(),v=d,_=t.translate(),b=Pzt(d);s=IF(t,h),o.call(n,"zoom",function(){var p=id.mouse(this);if(t.scale(r.k=id.event.scale),!s)h=p,s=IF(t,h);else if(IF(t,p)){t.rotate(d).translate(_);var k=IF(t,p),E=Rzt(s,k),S=Fzt(Izt(b,E)),L=r.r=Dzt(S,s,v);(!isFinite(L[0])||!isFinite(L[1])||!isFinite(L[2]))&&(L=v),t.rotate(L),v=L}u(i.of(this,arguments))}),l(i.of(this,arguments))}).on("zoomend",function(){id.select(this).style(OZ),o.call(n,"zoom",null),c(i.of(this,arguments)),BZ(e,t,f)}).on("zoom.redraw",function(){e.render(!0);var h=t.rotate();e.graphDiv.emit("plotly_relayouting",{"geo.projection.scale":t.scale()/e.fitScale,"geo.projection.rotation.lon":-h[0],"geo.projection.rotation.lat":-h[1]})});function l(h){a++||h({type:"zoomstart"})}function u(h){h({type:"zoom"})}function c(h){--a||h({type:"zoomend"})}function f(h){var d=t.rotate();h("projection.rotation.lon",-d[0]),h("projection.rotation.lat",-d[1])}return id.rebind(n,i,"on")}function IF(e,t){var r=e.invert(t);return r&&isFinite(r[0])&&isFinite(r[1])&&zzt(r)}function Pzt(e){var t=.5*e[0]*$A,r=.5*e[1]*$A,n=.5*e[2]*$A,i=Math.sin(t),a=Math.cos(t),o=Math.sin(r),s=Math.cos(r),l=Math.sin(n),u=Math.cos(n);return[a*s*u+i*o*l,i*s*u-a*o*l,a*o*u+i*s*l,a*s*l-i*o*u]}function Izt(e,t){var r=e[0],n=e[1],i=e[2],a=e[3],o=t[0],s=t[1],l=t[2],u=t[3];return[r*o-n*s-i*l-a*u,r*s+n*o+i*u-a*l,r*l-n*u+i*o+a*s,r*u+n*l-i*s+a*o]}function Rzt(e,t){if(!(!e||!t)){var r=Ozt(e,t),n=Math.sqrt(DDe(r,r)),i=.5*Math.acos(Math.max(-1,Math.min(1,DDe(e,t)))),a=Math.sin(i)/n;return n&&[Math.cos(i),r[2]*a,-r[1]*a,r[0]*a]}}function Dzt(e,t,r){var n=FZ(t,2,e[0]);n=FZ(n,1,e[1]),n=FZ(n,0,e[2]-r[2]);var i=t[0],a=t[1],o=t[2],s=n[0],l=n[1],u=n[2],c=Math.atan2(a,i)*W2,f=Math.sqrt(i*i+a*a),h,d;Math.abs(l)>f?(d=(l>0?90:-90)-c,h=0):(d=Math.asin(l/f)*W2-c,h=Math.sqrt(f*f-l*l));var v=180-d-2*c,_=(Math.atan2(u,s)-Math.atan2(o,h))*W2,b=(Math.atan2(u,s)-Math.atan2(o,-h))*W2,p=IDe(r[0],r[1],d,_),k=IDe(r[0],r[1],v,b);return p<=k?[d,_,r[2]]:[v,b,r[2]]}function IDe(e,t,r,n){var i=RDe(r-e),a=RDe(n-t);return Math.sqrt(i*i+a*a)}function RDe(e){return(e%360+540)%360-180}function FZ(e,t,r){var n=r*$A,i=e.slice(),a=t===0?1:0,o=t===2?1:2,s=Math.cos(n),l=Math.sin(n);return i[a]=e[a]*s-e[o]*l,i[o]=e[o]*s+e[a]*l,i}function Fzt(e){return[Math.atan2(2*(e[0]*e[1]+e[2]*e[3]),1-2*(e[1]*e[1]+e[2]*e[2]))*W2,Math.asin(Math.max(-1,Math.min(1,2*(e[0]*e[2]-e[3]*e[1]))))*W2,Math.atan2(2*(e[0]*e[3]+e[1]*e[2]),1-2*(e[2]*e[2]+e[3]*e[3]))*W2]}function zzt(e){var t=e[0]*$A,r=e[1]*$A,n=Math.cos(r);return[n*Math.cos(t),n*Math.sin(t),Math.sin(r)]}function DDe(e,t){for(var r=0,n=0,i=e.length;n<i;++n)r+=e[n]*t[n];return r}function Ozt(e,t){return[e[1]*t[2]-e[2]*t[1],e[2]*t[0]-e[0]*t[2],e[0]*t[1]-e[1]*t[0]]}function qzt(e){for(var t=0,r=arguments.length,n=[];++t<r;)n.push(arguments[t]);var i=id.dispatch.apply(null,n);return i.of=function(a,o){return function(s){var l;try{l=s.sourceEvent=id.event,s.target=e,id.event=s,i[s.type].apply(a,o)}finally{id.event=l}}},i}});var GDe=ye((bmr,VDe)=>{"use strict";var t1=Oa(),VZ=RZ(),Bzt=VZ.geoPath,Nzt=VZ.geoDistance,Uzt=PDe(),Vzt=qa(),ok=Dr(),Gzt=ok.strTranslate,RF=ka(),ak=So(),ODe=vf(),Hzt=Mc(),UZ=ho(),qDe=Ag().getAutoRange,NZ=yv(),jzt=Of().prepSelect,Wzt=Of().clearOutline,Xzt=Of().selectOnClick,Zzt=zDe(),fp=tk(),Yzt=ix(),NDe=mF(),Kzt=yZ().feature;function UDe(e){this.id=e.id,this.graphDiv=e.graphDiv,this.container=e.container,this.topojsonURL=e.topojsonURL,this.isStatic=e.staticPlot,this.topojsonName=null,this.topojson=null,this.projection=null,this.scope=null,this.viewInitial=null,this.fitScale=null,this.bounds=null,this.midPt=null,this.hasChoropleth=!1,this.traceHash={},this.layers={},this.basePaths={},this.dataPaths={},this.dataPoints={},this.clipDef=null,this.clipRect=null,this.bgRect=null,this.makeFramework()}var Qg=UDe.prototype;VDe.exports=function(t){return new UDe(t)};Qg.plot=function(e,t,r,n){var i=this;if(n)return i.update(e,t,!0);i._geoCalcData=e,i._fullLayout=t;var a=t[this.id],o=[],s=!1;for(var l in fp.layerNameToAdjective)if(l!=="frame"&&a["show"+l]){s=!0;break}for(var u=!1,c=0;c<e.length;c++){var f=e[0][0].trace;f._geo=i,f.locationmode&&(s=!0);var h=f.marker;if(h){var d=h.angle,v=h.angleref;(d||v==="north"||v==="previous")&&(u=!0)}}if(this._hasMarkerAngles=u,s){var _=NDe.getTopojsonName(a);(i.topojson===null||_!==i.topojsonName)&&(i.topojsonName=_,PlotlyGeoAssets.topojson[i.topojsonName]===void 0&&o.push(i.fetchTopojson()))}o=o.concat(Yzt.fetchTraceGeoData(e)),r.push(new Promise(function(b,p){Promise.all(o).then(function(){i.topojson=PlotlyGeoAssets.topojson[i.topojsonName],i.update(e,t),b()}).catch(p)}))};Qg.fetchTopojson=function(){var e=this,t=NDe.getTopojsonPath(e.topojsonURL,e.topojsonName);return new Promise(function(r,n){t1.json(t,function(i,a){if(i)return i.status===404?n(new Error(["plotly.js could not find topojson file at",t+".","Make sure the *topojsonURL* plot config option","is set properly."].join(" "))):n(new Error(["unexpected error while fetching topojson file at",t].join(" ")));PlotlyGeoAssets.topojson[e.topojsonName]=a,r()})})};Qg.update=function(e,t,r){var n=t[this.id];this.hasChoropleth=!1;for(var i=0;i<e.length;i++){var a=e[i],o=a[0].trace;o.type==="choropleth"&&(this.hasChoropleth=!0),o.visible===!0&&o._length>0&&o._module.calcGeoJSON(a,t)}if(!r){var s=this.updateProjection(e,t);if(s)return;(!this.viewInitial||this.scope!==n.scope)&&this.saveViewInitial(n)}this.scope=n.scope,this.updateBaseLayers(t,n),this.updateDims(t,n),this.updateFx(t,n),Hzt.generalUpdatePerTraceModule(this.graphDiv,this,e,n);var l=this.layers.frontplot.select(".scatterlayer");this.dataPoints.point=l.selectAll(".point"),this.dataPoints.text=l.selectAll("text"),this.dataPaths.line=l.selectAll(".js-line");var u=this.layers.backplot.select(".choroplethlayer");this.dataPaths.choropleth=u.selectAll("path"),this._render()};Qg.updateProjection=function(e,t){var r=this.graphDiv,n=t[this.id],i=t._size,a=n.domain,o=n.projection,s=n.lonaxis,l=n.lataxis,u=s._ax,c=l._ax,f=this.projection=Jzt(n),h=[[i.l+i.w*a.x[0],i.t+i.h*(1-a.y[1])],[i.l+i.w*a.x[1],i.t+i.h*(1-a.y[0])]],d=n.center||{},v=o.rotation||{},_=s.range||[],b=l.range||[];if(n.fitbounds){u._length=h[1][0]-h[0][0],c._length=h[1][1]-h[0][1],u.range=qDe(r,u),c.range=qDe(r,c);var p=(u.range[0]+u.range[1])/2,k=(c.range[0]+c.range[1])/2;if(n._isScoped)d={lon:p,lat:k};else if(n._isClipped){d={lon:p,lat:k},v={lon:p,lat:k,roll:v.roll};var E=o.type,S=fp.lonaxisSpan[E]/2||180,L=fp.lataxisSpan[E]/2||90;_=[p-S,p+S],b=[k-L,k+L]}else d={lon:p,lat:k},v={lon:p,lat:v.lat,roll:v.roll}}f.center([d.lon-v.lon,d.lat-v.lat]).rotate([-v.lon,-v.lat,v.roll]).parallels(o.parallels);var x=BDe(_,b);f.fitExtent(h,x);var C=this.bounds=f.getBounds(x),M=this.fitScale=f.scale(),g=f.translate();if(n.fitbounds){var P=f.getBounds(BDe(u.range,c.range)),T=Math.min((C[1][0]-C[0][0])/(P[1][0]-P[0][0]),(C[1][1]-C[0][1])/(P[1][1]-P[0][1]));isFinite(T)?f.scale(T*M):ok.warn("Something went wrong during"+this.id+"fitbounds computations.")}else f.scale(o.scale*M);var z=this.midPt=[(C[0][0]+C[1][0])/2,(C[0][1]+C[1][1])/2];if(f.translate([g[0]+(z[0]-g[0]),g[1]+(z[1]-g[1])]).clipExtent(C),n._isAlbersUsa){var O=f([d.lon,d.lat]),V=f.translate();f.translate([V[0]-(O[0]-V[0]),V[1]-(O[1]-V[1])])}};Qg.updateBaseLayers=function(e,t){var r=this,n=r.topojson,i=r.layers,a=r.basePaths;function o(h){return h==="lonaxis"||h==="lataxis"}function s(h){return!!fp.lineLayers[h]}function l(h){return!!fp.fillLayers[h]}var u=this.hasChoropleth?fp.layersForChoropleth:fp.layers,c=u.filter(function(h){return s(h)||l(h)?t["show"+h]:o(h)?t[h].showgrid:!0}),f=r.framework.selectAll(".layer").data(c,String);f.exit().each(function(h){delete i[h],delete a[h],t1.select(this).remove()}),f.enter().append("g").attr("class",function(h){return"layer "+h}).each(function(h){var d=i[h]=t1.select(this);h==="bg"?r.bgRect=d.append("rect").style("pointer-events","all"):o(h)?a[h]=d.append("path").style("fill","none"):h==="backplot"?d.append("g").classed("choroplethlayer",!0):h==="frontplot"?d.append("g").classed("scatterlayer",!0):s(h)?a[h]=d.append("path").style("fill","none").style("stroke-miterlimit",2):l(h)&&(a[h]=d.append("path").style("stroke","none"))}),f.order(),f.each(function(h){var d=a[h],v=fp.layerNameToAdjective[h];h==="frame"?d.datum(fp.sphereSVG):s(h)||l(h)?d.datum(Kzt(n,n.objects[h])):o(h)&&d.datum($zt(h,t,e)).call(RF.stroke,t[h].gridcolor).call(ak.dashLine,t[h].griddash,t[h].gridwidth),s(h)?d.call(RF.stroke,t[v+"color"]).call(ak.dashLine,"",t[v+"width"]):l(h)&&d.call(RF.fill,t[v+"color"])})};Qg.updateDims=function(e,t){var r=this.bounds,n=(t.framewidth||0)/2,i=r[0][0]-n,a=r[0][1]-n,o=r[1][0]-i+n,s=r[1][1]-a+n;ak.setRect(this.clipRect,i,a,o,s),this.bgRect.call(ak.setRect,i,a,o,s).call(RF.fill,t.bgcolor),this.xaxis._offset=i,this.xaxis._length=o,this.yaxis._offset=a,this.yaxis._length=s};Qg.updateFx=function(e,t){var r=this,n=r.graphDiv,i=r.bgRect,a=e.dragmode,o=e.clickmode;if(r.isStatic)return;function s(){var f=r.viewInitial,h={};for(var d in f)h[r.id+"."+d]=f[d];Vzt.call("_guiRelayout",n,h),n.emit("plotly_doubleclick",null)}function l(f){return r.projection.invert([f[0]+r.xaxis._offset,f[1]+r.yaxis._offset])}var u=function(f,h){if(h.isRect){var d=f.range={};d[r.id]=[l([h.xmin,h.ymin]),l([h.xmax,h.ymax])]}else{var v=f.lassoPoints={};v[r.id]=h.map(l)}},c={element:r.bgRect.node(),gd:n,plotinfo:{id:r.id,xaxis:r.xaxis,yaxis:r.yaxis,fillRangeItems:u},xaxes:[r.xaxis],yaxes:[r.yaxis],subplot:r.id,clickFn:function(f){f===2&&Wzt(n)}};a==="pan"?(i.node().onmousedown=null,i.call(Zzt(r,t)),i.on("dblclick.zoom",s),n._context._scrollZoom.geo||i.on("wheel.zoom",null)):(a==="select"||a==="lasso")&&(i.on(".zoom",null),c.prepFn=function(f,h,d){jzt(f,h,d,c,a)},NZ.init(c)),i.on("mousemove",function(){var f=r.projection.invert(ok.getPositionFromD3Event());if(!f)return NZ.unhover(n,t1.event);r.xaxis.p2c=function(){return f[0]},r.yaxis.p2c=function(){return f[1]},ODe.hover(n,t1.event,r.id)}),i.on("mouseout",function(){n._dragging||NZ.unhover(n,t1.event)}),i.on("click",function(){a!=="select"&&a!=="lasso"&&(o.indexOf("select")>-1&&Xzt(t1.event,n,[r.xaxis],[r.yaxis],r.id,c),o.indexOf("event")>-1&&ODe.click(n,t1.event))})};Qg.makeFramework=function(){var e=this,t=e.graphDiv,r=t._fullLayout,n="clip"+r._uid+e.id;e.clipDef=r._clips.append("clipPath").attr("id",n),e.clipRect=e.clipDef.append("rect"),e.framework=t1.select(e.container).append("g").attr("class","geo "+e.id).call(ak.setClipUrl,n,t),e.project=function(i){var a=e.projection(i);return a?[a[0]-e.xaxis._offset,a[1]-e.yaxis._offset]:[null,null]},e.xaxis={_id:"x",c2p:function(i){return e.project(i)[0]}},e.yaxis={_id:"y",c2p:function(i){return e.project(i)[1]}},e.mockAxis={type:"linear",showexponent:"all",exponentformat:"B"},UZ.setConvert(e.mockAxis,r)};Qg.saveViewInitial=function(e){var t=e.center||{},r=e.projection,n=r.rotation||{};this.viewInitial={fitbounds:e.fitbounds,"projection.scale":r.scale};var i;e._isScoped?i={"center.lon":t.lon,"center.lat":t.lat}:e._isClipped?i={"projection.rotation.lon":n.lon,"projection.rotation.lat":n.lat}:i={"center.lon":t.lon,"center.lat":t.lat,"projection.rotation.lon":n.lon},ok.extendFlat(this.viewInitial,i)};Qg.render=function(e){this._hasMarkerAngles&&e?this.plot(this._geoCalcData,this._fullLayout,[],!0):this._render()};Qg._render=function(){var e=this.projection,t=e.getPath(),r;function n(a){var o=e(a.lonlat);return o?Gzt(o[0],o[1]):null}function i(a){return e.isLonLatOverEdges(a.lonlat)?"none":null}for(r in this.basePaths)this.basePaths[r].attr("d",t);for(r in this.dataPaths)this.dataPaths[r].attr("d",function(a){return t(a.geojson)});for(r in this.dataPoints)this.dataPoints[r].attr("display",i).attr("transform",n)};function Jzt(e){var t=e.projection,r=t.type,n=fp.projNames[r];n="geo"+ok.titleCase(n);for(var i=VZ[n]||Uzt[n],a=i(),o=e._isSatellite?Math.acos(1/t.distance)*180/Math.PI:e._isClipped?fp.lonaxisSpan[r]/2:null,s=["center","rotate","parallels","clipExtent"],l=function(f){return f?a:[]},u=0;u<s.length;u++){var c=s[u];typeof a[c]!="function"&&(a[c]=l)}return a.isLonLatOverEdges=function(f){if(a(f)===null)return!0;if(o){var h=a.rotate(),d=Nzt(f,[-h[0],-h[1]]),v=o*Math.PI/180;return d>v}else return!1},a.getPath=function(){return Bzt().projection(a)},a.getBounds=function(f){return a.getPath().bounds(f)},a.precision(fp.precision),e._isSatellite&&a.tilt(t.tilt).distance(t.distance),o&&a.clipAngle(o-fp.clipPad),a}function $zt(e,t,r){var n=1e-6,i=2.5,a=t[e],o=fp.scopeDefaults[t.scope],s,l,u;e==="lonaxis"?(s=o.lonaxisRange,l=o.lataxisRange,u=function(k,E){return[k,E]}):e==="lataxis"&&(s=o.lataxisRange,l=o.lonaxisRange,u=function(k,E){return[E,k]});var c={type:"linear",range:[s[0],s[1]-n],tick0:a.tick0,dtick:a.dtick};UZ.setConvert(c,r);var f=UZ.calcTicks(c);!t.isScoped&&e==="lonaxis"&&f.pop();for(var h=f.length,d=new Array(h),v=0;v<h;v++)for(var _=f[v].x,b=d[v]=[],p=l[0];p<l[1]+i;p+=i)b.push(u(_,p));return{type:"MultiLineString",coordinates:d}}function BDe(e,t){var r=fp.clipPad,n=e[0]+r,i=e[1]-r,a=t[0]+r,o=t[1]-r;n>0&&i<0&&(i+=360);var s=(i-n)/4;return{type:"Polygon",coordinates:[[[n,a],[n,o],[n+s,o],[n+2*s,o],[n+3*s,o],[i,o],[i,a],[i-s,a],[i-2*s,a],[i-3*s,a],[n,a]]]}}});var GZ=ye((wmr,WDe)=>{"use strict";var e5=Lh(),Qzt=Cc().attributes,e7t=Pd().dash,QA=tk(),t7t=mc().overrideAll,HDe=Z1(),jDe={range:{valType:"info_array",items:[{valType:"number"},{valType:"number"}]},showgrid:{valType:"boolean",dflt:!1},tick0:{valType:"number",dflt:0},dtick:{valType:"number"},gridcolor:{valType:"color",dflt:e5.lightLine},gridwidth:{valType:"number",min:0,dflt:1},griddash:e7t},r7t=WDe.exports=t7t({domain:Qzt({name:"geo"},{}),fitbounds:{valType:"enumerated",values:[!1,"locations","geojson"],dflt:!1,editType:"plot"},resolution:{valType:"enumerated",values:[110,50],dflt:110,coerceNumber:!0},scope:{valType:"enumerated",values:HDe(QA.scopeDefaults),dflt:"world"},projection:{type:{valType:"enumerated",values:HDe(QA.projNames)},rotation:{lon:{valType:"number"},lat:{valType:"number"},roll:{valType:"number"}},tilt:{valType:"number",dflt:0},distance:{valType:"number",min:1.001,dflt:2},parallels:{valType:"info_array",items:[{valType:"number"},{valType:"number"}]},scale:{valType:"number",min:0,dflt:1}},center:{lon:{valType:"number"},lat:{valType:"number"}},visible:{valType:"boolean",dflt:!0},showcoastlines:{valType:"boolean"},coastlinecolor:{valType:"color",dflt:e5.defaultLine},coastlinewidth:{valType:"number",min:0,dflt:1},showland:{valType:"boolean",dflt:!1},landcolor:{valType:"color",dflt:QA.landColor},showocean:{valType:"boolean",dflt:!1},oceancolor:{valType:"color",dflt:QA.waterColor},showlakes:{valType:"boolean",dflt:!1},lakecolor:{valType:"color",dflt:QA.waterColor},showrivers:{valType:"boolean",dflt:!1},rivercolor:{valType:"color",dflt:QA.waterColor},riverwidth:{valType:"number",min:0,dflt:1},showcountries:{valType:"boolean"},countrycolor:{valType:"color",dflt:e5.defaultLine},countrywidth:{valType:"number",min:0,dflt:1},showsubunits:{valType:"boolean"},subunitcolor:{valType:"color",dflt:e5.defaultLine},subunitwidth:{valType:"number",min:0,dflt:1},showframe:{valType:"boolean"},framecolor:{valType:"color",dflt:e5.defaultLine},framewidth:{valType:"number",min:0,dflt:1},bgcolor:{valType:"color",dflt:e5.background},lonaxis:jDe,lataxis:jDe},"plot","from-root");r7t.uirevision={valType:"any",editType:"none"}});var YDe=ye((Tmr,ZDe)=>{"use strict";var DF=Dr(),i7t=k_(),n7t=Id().getSubplotData,FF=tk(),a7t=GZ(),XDe=FF.axesNames;ZDe.exports=function(t,r,n){i7t(t,r,n,{type:"geo",attributes:a7t,handleDefaults:o7t,fullData:n,partition:"y"})};function o7t(e,t,r,n){var i=n7t(n.fullData,"geo",n.id),a=i.map(function(oe){return oe.index}),o=r("resolution"),s=r("scope"),l=FF.scopeDefaults[s],u=r("projection.type",l.projType),c=t._isAlbersUsa=u==="albers usa";c&&(s=t.scope="usa");var f=t._isScoped=s!=="world",h=t._isSatellite=u==="satellite",d=t._isConic=u.indexOf("conic")!==-1||u==="albers",v=t._isClipped=!!FF.lonaxisSpan[u];if(e.visible===!1){var _=DF.extendDeep({},t._template);_.showcoastlines=!1,_.showcountries=!1,_.showframe=!1,_.showlakes=!1,_.showland=!1,_.showocean=!1,_.showrivers=!1,_.showsubunits=!1,_.lonaxis&&(_.lonaxis.showgrid=!1),_.lataxis&&(_.lataxis.showgrid=!1),t._template=_}for(var b=r("visible"),p,k=0;k<XDe.length;k++){var E=XDe[k],S=[30,10][k],L;if(f)L=l[E+"Range"];else{var x=FF[E+"Span"],C=(x[u]||x["*"])/2,M=r("projection.rotation."+E.slice(0,3),l.projRotate[k]);L=[M-C,M+C]}var g=r(E+".range",L);r(E+".tick0"),r(E+".dtick",S),p=r(E+".showgrid",b?void 0:!1),p&&(r(E+".gridcolor"),r(E+".gridwidth"),r(E+".griddash")),t[E]._ax={type:"linear",_id:E.slice(0,3),_traceIndices:a,setScale:DF.identity,c2l:DF.identity,r2l:DF.identity,autorange:!0,range:g.slice(),_m:1,_input:{}}}var P=t.lonaxis.range,T=t.lataxis.range,z=P[0],O=P[1];z>0&&O<0&&(O+=360);var V=(z+O)/2,G;if(!c){var Z=f?l.projRotate:[V,0,0];G=r("projection.rotation.lon",Z[0]),r("projection.rotation.lat",Z[1]),r("projection.rotation.roll",Z[2]),p=r("showcoastlines",!f&&b),p&&(r("coastlinecolor"),r("coastlinewidth")),p=r("showocean",b?void 0:!1),p&&r("oceancolor")}var j,N;if(c?(j=-96.6,N=38.7):(j=f?V:G,N=(T[0]+T[1])/2),r("center.lon",j),r("center.lat",N),h&&(r("projection.tilt"),r("projection.distance")),d){var H=l.projParallels||[0,60];r("projection.parallels",H)}r("projection.scale"),p=r("showland",b?void 0:!1),p&&r("landcolor"),p=r("showlakes",b?void 0:!1),p&&r("lakecolor"),p=r("showrivers",b?void 0:!1),p&&(r("rivercolor"),r("riverwidth")),p=r("showcountries",f&&s!=="usa"&&b),p&&(r("countrycolor"),r("countrywidth")),(s==="usa"||s==="north america"&&o===50)&&(r("showsubunits",b),r("subunitcolor"),r("subunitwidth")),f||(p=r("showframe",b),p&&(r("framecolor"),r("framewidth"))),r("bgcolor");var te=r("fitbounds");te&&(delete t.projection.scale,f?(delete t.center.lon,delete t.center.lat):v?(delete t.center.lon,delete t.center.lat,delete t.projection.rotation.lon,delete t.projection.rotation.lat,delete t.lonaxis.range,delete t.lataxis.range):(delete t.center.lon,delete t.center.lat,delete t.projection.rotation.lon))}});var HZ=ye((Amr,$De)=>{"use strict";var s7t=Id().getSubplotCalcData,l7t=Dr().counterRegex,u7t=GDe(),Xm="geo",KDe=l7t(Xm),JDe={};JDe[Xm]={valType:"subplotid",dflt:Xm,editType:"calc"};function c7t(e){for(var t=e._fullLayout,r=e.calcdata,n=t._subplots[Xm],i=0;i<n.length;i++){var a=n[i],o=s7t(r,Xm,a),s=t[a],l=s._subplot;l||(l=u7t({id:a,graphDiv:e,container:t._geolayer.node(),topojsonURL:e._context.topojsonURL,staticPlot:e._context.staticPlot}),t[a]._subplot=l),l.plot(o,t,e._promises)}}function f7t(e,t,r,n){for(var i=n._subplots[Xm]||[],a=0;a<i.length;a++){var o=i[a],s=n[o]._subplot;!t[o]&&s&&(s.framework.remove(),s.clipDef.remove())}}function h7t(e){for(var t=e._fullLayout,r=t._subplots[Xm],n=0;n<r.length;n++){var i=t[r[n]],a=i._subplot;a.updateFx(t,i)}}$De.exports={attr:Xm,name:Xm,idRoot:Xm,idRegex:KDe,attrRegex:KDe,attributes:JDe,layoutAttributes:GZ(),supplyLayoutDefaults:YDe(),plot:c7t,updateFx:h7t,clean:f7t}});var eFe=ye((Smr,QDe)=>{"use strict";QDe.exports={attributes:j2(),supplyDefaults:DRe(),colorbar:$d(),formatLabels:ORe(),calc:pF(),calcGeoJSON:IZ().calcGeoJSON,plot:IZ().plot,style:kZ(),styleOnSelect:op().styleOnSelect,hoverPoints:wDe(),eventData:ADe(),selectPoints:EDe(),moduleType:"trace",name:"scattergeo",basePlotModule:HZ(),categories:["geo","symbols","showLegend","scatter-like"],meta:{}}});var rFe=ye((Mmr,tFe)=>{"use strict";tFe.exports=eFe()});var t5=ye((Emr,aFe)=>{"use strict";var{hovertemplateAttrs:d7t,templatefallbackAttrs:v7t}=Ll(),ax=j2(),p7t=Tu(),iFe=Gl(),g7t=Lh().defaultLine,nx=Ao().extendFlat,nFe=ax.marker.line;aFe.exports=nx({locations:{valType:"data_array",editType:"calc"},locationmode:ax.locationmode,z:{valType:"data_array",editType:"calc"},geojson:nx({},ax.geojson,{}),featureidkey:ax.featureidkey,text:nx({},ax.text,{}),hovertext:nx({},ax.hovertext,{}),marker:{line:{color:nx({},nFe.color,{dflt:g7t}),width:nx({},nFe.width,{dflt:1}),editType:"calc"},opacity:{valType:"number",arrayOk:!0,min:0,max:1,dflt:1,editType:"style"},editType:"calc"},selected:{marker:{opacity:ax.selected.marker.opacity,editType:"plot"},editType:"plot"},unselected:{marker:{opacity:ax.unselected.marker.opacity,editType:"plot"},editType:"plot"},hoverinfo:nx({},iFe.hoverinfo,{editType:"calc",flags:["location","z","text","name"]}),hovertemplate:d7t(),hovertemplatefallback:v7t(),showlegend:nx({},iFe.showlegend,{dflt:!1})},p7t("",{cLetter:"z",editTypeOverride:"calc"}))});var sFe=ye((kmr,oFe)=>{"use strict";var sk=Dr(),m7t=Qh(),y7t=t5(),_7t=["The library used by the *country names* `locationmode` option is changing in the next major version.","Some country names in existing plots may not work in the new version.","To ensure consistent behavior, consider setting `locationmode` to *ISO-3*."].join(" ");oFe.exports=function(t,r,n,i){function a(h,d){return sk.coerce(t,r,y7t,h,d)}var o=a("locations"),s=a("z");if(!(o&&o.length&&sk.isArrayOrTypedArray(s)&&s.length)){r.visible=!1;return}r._length=Math.min(o.length,s.length);var l=a("geojson"),u;(typeof l=="string"&&l!==""||sk.isPlainObject(l))&&(u="geojson-id");var c=a("locationmode",u);c==="country names"&&sk.warn(_7t),c==="geojson-id"&&a("featureidkey"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var f=a("marker.line.width");f&&a("marker.line.color"),a("marker.opacity"),m7t(t,r,i,a,{prefix:"",cLetter:"z"}),sk.coerceSelectionMarkerOpacity(r,a)}});var zF=ye((Cmr,cFe)=>{"use strict";var lFe=Eo(),x7t=fs().BADNUM,b7t=gv(),w7t=km(),T7t=z0();function uFe(e){return e&&typeof e=="string"}cFe.exports=function(t,r){var n=r._length,i=new Array(n),a;r.geojson?a=function(c){return uFe(c)||lFe(c)}:a=uFe;for(var o=0;o<n;o++){var s=i[o]={},l=r.locations[o],u=r.z[o];a(l)&&lFe(u)?(s.loc=l,s.z=u):(s.loc=null,s.z=x7t),s.index=o}return w7t(i,r),b7t(t,r,{vals:r.z,containerStr:"",cLetter:"z"}),T7t(i,r),i}});var OF=ye((Lmr,hFe)=>{"use strict";var A7t=Oa(),S7t=ka(),jZ=So(),M7t=tc();function E7t(e,t){t&&fFe(e,t)}function fFe(e,t){var r=t[0].trace,n=t[0].node3,i=n.selectAll(".choroplethlocation"),a=r.marker||{},o=a.line||{},s=M7t.makeColorScaleFuncFromTrace(r);i.each(function(l){A7t.select(this).attr("fill",s(l.z)).call(S7t.stroke,l.mlc||o.color).call(jZ.dashLine,"",l.mlw||o.width||0).style("opacity",a.opacity)}),jZ.selectedPointStyle(i,r)}function k7t(e,t){var r=t[0].node3,n=t[0].trace;n.selectedpoints?jZ.selectedPointStyle(r.selectAll(".choroplethlocation"),n):fFe(e,t)}hFe.exports={style:E7t,styleOnSelect:k7t}});var WZ=ye((Pmr,pFe)=>{"use strict";var C7t=Oa(),dFe=Dr(),r5=ix(),L7t=mF().getTopojsonFeatures,vFe=Ag().findExtremes,P7t=OF().style;function I7t(e,t,r){var n=t.layers.backplot.select(".choroplethlayer");dFe.makeTraceGroups(n,r,"trace choropleth").each(function(i){var a=C7t.select(this),o=a.selectAll("path.choroplethlocation").data(dFe.identity);o.enter().append("path").classed("choroplethlocation",!0),o.exit().remove(),P7t(e,i)})}function R7t(e,t){for(var r=e[0].trace,n=t[r.geo],i=n._subplot,a=r.locationmode,o=r._length,s=a==="geojson-id"?r5.extractTraceFeature(e):L7t(r,i.topojson),l=[],u=[],c=0;c<o;c++){var f=e[c],h=a==="geojson-id"?f.fOut:r5.locationToFeature(a,f.loc,s);if(h){f.geojson=h,f.ct=h.properties.ct,f._polygons=r5.feature2polygons(h);var d=r5.computeBbox(h);l.push(d[0],d[2]),u.push(d[1],d[3])}else f.geojson=null}if(n.fitbounds==="geojson"&&a==="geojson-id"){var v=r5.computeBbox(r5.getTraceGeojson(r));l=[v[0],v[2]],u=[v[1],v[3]]}var _={padded:!0};r._extremes.lon=vFe(n.lonaxis._ax,l,_),r._extremes.lat=vFe(n.lataxis._ax,u,_)}pFe.exports={calcGeoJSON:R7t,plot:I7t}});var qF=ye((Imr,gFe)=>{"use strict";var D7t=ho(),F7t=t5(),z7t=Dr().fillText;gFe.exports=function(t,r,n){var i=t.cd,a=i[0].trace,o=t.subplot,s,l,u,c,f=[r,n],h=[r+360,n];for(l=0;l<i.length;l++)if(s=i[l],c=!1,s._polygons){for(u=0;u<s._polygons.length;u++)s._polygons[u].contains(f)&&(c=!c),s._polygons[u].contains(h)&&(c=!c);if(c)break}if(!(!c||!s))return t.x0=t.x1=t.xa.c2p(s.ct),t.y0=t.y1=t.ya.c2p(s.ct),t.index=s.index,t.location=s.loc,t.z=s.z,t.zLabel=D7t.tickText(o.mockAxis,o.mockAxis.c2l(s.z),"hover").text,t.hovertemplate=s.hovertemplate,O7t(t,a,s),[t]};function O7t(e,t,r){if(!t.hovertemplate){var n=r.hi||t.hoverinfo,i=String(r.loc),a=n==="all"?F7t.hoverinfo.flags:n.split("+"),o=a.indexOf("name")!==-1,s=a.indexOf("location")!==-1,l=a.indexOf("z")!==-1,u=a.indexOf("text")!==-1,c=!o&&s,f=[];c?e.nameOverride=i:(o&&(e.nameOverride=t.name),s&&f.push(i)),l&&f.push(e.zLabel),u&&z7t(r,t,f),e.extraText=f.join("<br>")}}});var BF=ye((Rmr,mFe)=>{"use strict";mFe.exports=function(t,r,n,i,a){t.location=r.location,t.z=r.z;var o=i[a];return o.fIn&&o.fIn.properties&&(t.properties=o.fIn.properties),t.ct=o.ct,t}});var NF=ye((Dmr,yFe)=>{"use strict";yFe.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s,l,u,c,f;if(r===!1)for(s=0;s<n.length;s++)n[s].selected=0;else for(s=0;s<n.length;s++)l=n[s],u=l.ct,u&&(c=i.c2p(u),f=a.c2p(u),r.contains([c,f],null,s,t)?(o.push({pointNumber:s,lon:u[0],lat:u[1]}),l.selected=1):l.selected=0);return o}});var xFe=ye((Fmr,_Fe)=>{"use strict";_Fe.exports={attributes:t5(),supplyDefaults:sFe(),colorbar:S_(),calc:zF(),calcGeoJSON:WZ().calcGeoJSON,plot:WZ().plot,style:OF().style,styleOnSelect:OF().styleOnSelect,hoverPoints:qF(),eventData:BF(),selectPoints:NF(),moduleType:"trace",name:"choropleth",basePlotModule:HZ(),categories:["geo","noOpacity","showLegend"],meta:{}}});var wFe=ye((zmr,bFe)=>{"use strict";bFe.exports=xFe()});var UF=ye((Omr,AFe)=>{"use strict";var q7t=qa(),s0=Dr(),B7t=cT();function N7t(e,t,r,n){var i=e.cd,a=i[0].t,o=i[0].trace,s=e.xa,l=e.ya,u=a.x,c=a.y,f=s.c2p(t),h=l.c2p(r),d=e.distance,v;if(a.tree){var _=s.p2c(f-d),b=s.p2c(f+d),p=l.p2c(h-d),k=l.p2c(h+d);n==="x"?v=a.tree.range(Math.min(_,b),Math.min(l._rl[0],l._rl[1]),Math.max(_,b),Math.max(l._rl[0],l._rl[1])):v=a.tree.range(Math.min(_,b),Math.min(p,k),Math.max(_,b),Math.max(p,k))}else v=a.ids;var E,S,L,x,C,M,g,P,T,z=d;if(n==="x"){var O=!!o.xperiodalignment,V=!!o.yperiodalignment;for(C=0;C<v.length;C++){if(E=v[C],L=u[E],M=Math.abs(s.c2p(L)-f),O){var G=s.c2p(o._xStarts[E]),Z=s.c2p(o._xEnds[E]);M=f>=Math.min(G,Z)&&f<=Math.max(G,Z)?0:1/0}if(M<z){if(z=M,x=c[E],g=l.c2p(x)-h,V){var j=l.c2p(o._yStarts[E]),N=l.c2p(o._yEnds[E]);g=h>=Math.min(j,N)&&h<=Math.max(j,N)?0:1/0}T=Math.sqrt(M*M+g*g),S=v[C]}}}else for(C=v.length-1;C>-1;C--)E=v[C],L=u[E],x=c[E],M=s.c2p(L)-f,g=l.c2p(x)-h,P=Math.sqrt(M*M+g*g),P<z&&(z=T=P,S=E);return e.index=S,e.distance=z,e.dxy=T,S===void 0?[e]:[TFe(e,u,c,o)]}function TFe(e,t,r,n){var i=e.xa,a=e.ya,o=e.distance,s=e.dxy,l=e.index,u={pointNumber:l,x:t[l],y:r[l]};u.tx=s0.isArrayOrTypedArray(n.text)?n.text[l]:n.text,u.htx=Array.isArray(n.hovertext)?n.hovertext[l]:n.hovertext,u.data=Array.isArray(n.customdata)?n.customdata[l]:n.customdata,u.tp=Array.isArray(n.textposition)?n.textposition[l]:n.textposition;var c=n.textfont;c&&(u.ts=s0.isArrayOrTypedArray(c.size)?c.size[l]:c.size,u.tc=s0.isArrayOrTypedArray(c.color)?c.color[l]:c.color,u.tf=Array.isArray(c.family)?c.family[l]:c.family,u.tw=Array.isArray(c.weight)?c.weight[l]:c.weight,u.ty=Array.isArray(c.style)?c.style[l]:c.style,u.tv=Array.isArray(c.variant)?c.variant[l]:c.variant);var f=n.marker;f&&(u.ms=s0.isArrayOrTypedArray(f.size)?f.size[l]:f.size,u.mo=s0.isArrayOrTypedArray(f.opacity)?f.opacity[l]:f.opacity,u.mx=s0.isArrayOrTypedArray(f.symbol)?f.symbol[l]:f.symbol,u.ma=s0.isArrayOrTypedArray(f.angle)?f.angle[l]:f.angle,u.mc=s0.isArrayOrTypedArray(f.color)?f.color[l]:f.color);var h=f&&f.line;h&&(u.mlc=Array.isArray(h.color)?h.color[l]:h.color,u.mlw=s0.isArrayOrTypedArray(h.width)?h.width[l]:h.width);var d=f&&f.gradient;d&&d.type!=="none"&&(u.mgt=Array.isArray(d.type)?d.type[l]:d.type,u.mgc=Array.isArray(d.color)?d.color[l]:d.color);var v=i.c2p(u.x,!0),_=a.c2p(u.y,!0),b=u.mrc||1,p=n.hoverlabel;p&&(u.hbg=Array.isArray(p.bgcolor)?p.bgcolor[l]:p.bgcolor,u.hbc=Array.isArray(p.bordercolor)?p.bordercolor[l]:p.bordercolor,u.hts=s0.isArrayOrTypedArray(p.font.size)?p.font.size[l]:p.font.size,u.htc=Array.isArray(p.font.color)?p.font.color[l]:p.font.color,u.htf=Array.isArray(p.font.family)?p.font.family[l]:p.font.family,u.hnl=s0.isArrayOrTypedArray(p.namelength)?p.namelength[l]:p.namelength);var k=n.hoverinfo;k&&(u.hi=Array.isArray(k)?k[l]:k);var E=n.hovertemplate;E&&(u.ht=Array.isArray(E)?E[l]:E);var S={};S[e.index]=u;var L=n._origX,x=n._origY,C=s0.extendFlat({},e,{color:B7t(n,u),x0:v-b,x1:v+b,xLabelVal:L?L[l]:u.x,y0:_-b,y1:_+b,yLabelVal:x?x[l]:u.y,cd:S,distance:o,spikeDistance:s,hovertemplate:u.ht});return u.htx?C.text=u.htx:u.tx?C.text=u.tx:n.text&&(C.text=n.text),s0.fillText(u,n,C),q7t.getComponentMethod("errorbars","hoverInfo")(u,n,C),C}AFe.exports={hoverPoints:N7t,calcHover:TFe}});var ox=ye((qmr,MFe)=>{"use strict";var SFe=20;MFe.exports={TOO_MANY_POINTS:1e5,SYMBOL_SDF_SIZE:200,SYMBOL_SIZE:SFe,SYMBOL_STROKE:SFe/20,DOT_RE:/-dot/,OPEN_RE:/-open/,DASHES:{solid:[1],dot:[1,1],dash:[4,1],longdash:[8,1],dashdot:[4,1,1,1],longdashdot:[8,1,1,1]}}});var lk=ye((Bmr,LFe)=>{"use strict";var U7t=Gl(),V7t=ec(),G7t=Cg(),mf=pf(),EFe=df().axisHoverFormat,kFe=Tu(),H7t=Z1(),XZ=Ao().extendFlat,j7t=mc().overrideAll,W7t=ox().DASHES,CFe=mf.line,r1=mf.marker,X7t=r1.line,sx=LFe.exports=j7t({x:mf.x,x0:mf.x0,dx:mf.dx,y:mf.y,y0:mf.y0,dy:mf.dy,xperiod:mf.xperiod,yperiod:mf.yperiod,xperiod0:mf.xperiod0,yperiod0:mf.yperiod0,xperiodalignment:mf.xperiodalignment,yperiodalignment:mf.yperiodalignment,xhoverformat:EFe("x"),yhoverformat:EFe("y"),text:mf.text,hovertext:mf.hovertext,textposition:mf.textposition,textfont:V7t({noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0,editType:"calc",colorEditType:"style",arrayOk:!0,noNumericWeightValues:!0,variantValues:["normal","small-caps"]}),mode:{valType:"flaglist",flags:["lines","markers","text"],extras:["none"]},line:{color:CFe.color,width:CFe.width,shape:{valType:"enumerated",values:["linear","hv","vh","hvh","vhv"],dflt:"linear",editType:"plot"},dash:{valType:"enumerated",values:H7t(W7t),dflt:"solid"}},marker:XZ({},kFe("marker"),{symbol:r1.symbol,angle:r1.angle,size:r1.size,sizeref:r1.sizeref,sizemin:r1.sizemin,sizemode:r1.sizemode,opacity:r1.opacity,colorbar:r1.colorbar,line:XZ({},kFe("marker.line"),{width:X7t.width})}),connectgaps:mf.connectgaps,fill:XZ({},mf.fill,{dflt:"none"}),fillcolor:G7t(),selected:{marker:mf.selected.marker,textfont:mf.selected.textfont},unselected:{marker:mf.unselected.marker,textfont:mf.unselected.textfont},opacity:U7t.opacity},"calc","nested");sx.x.editType=sx.y.editType=sx.x0.editType=sx.y0.editType="calc+clearAxisTypes";sx.hovertemplate=mf.hovertemplate;sx.hovertemplatefallback=mf.hovertemplatefallback;sx.texttemplate=mf.texttemplate;sx.texttemplatefallback=mf.texttemplatefallback});var VF=ye(ZZ=>{"use strict";var PFe=ox();ZZ.isOpenSymbol=function(e){return typeof e=="string"?PFe.OPEN_RE.test(e):e%200>100};ZZ.isDotSymbol=function(e){return typeof e=="string"?PFe.DOT_RE.test(e):e>200}});var DFe=ye((Umr,RFe)=>{"use strict";var IFe=Dr(),Z7t=qa(),Y7t=VF(),K7t=lk(),J7t=Sm(),GF=Ru(),$7t=eT(),Q7t=Ig(),e9t=$p(),t9t=R0(),r9t=Rg(),i9t=D0();RFe.exports=function(t,r,n,i){function a(d,v){return IFe.coerce(t,r,K7t,d,v)}var o=t.marker?Y7t.isOpenSymbol(t.marker.symbol):!1,s=GF.isBubble(t),l=$7t(t,r,i,a);if(!l){r.visible=!1;return}Q7t(t,r,i,a),a("xhoverformat"),a("yhoverformat");var u=l<J7t.PTS_LINESONLY?"lines+markers":"lines";a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("mode",u),GF.hasMarkers(r)&&(e9t(t,r,n,i,a,{noAngleRef:!0,noStandOff:!0}),a("marker.line.width",o||s?1:0)),GF.hasLines(r)&&(a("connectgaps"),t9t(t,r,n,i,a),a("line.shape")),GF.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),i9t(t,r,i,a,{noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0}));var c=(r.line||{}).color,f=(r.marker||{}).color;a("fill"),r.fill!=="none"&&r9t(t,r,n,a);var h=Z7t.getComponentMethod("errorbars","supplyDefaults");h(t,r,c||f||n,{axis:"y"}),h(t,r,c||f||n,{axis:"x",inherit:"y"}),IFe.coerceSelectionMarkerOpacity(r,a)}});var zFe=ye((Vmr,FFe)=>{"use strict";var n9t=nI();FFe.exports=function(t,r,n){var i=t.i;return"x"in t||(t.x=r._x[i]),"y"in t||(t.y=r._y[i]),n9t(t,r,n)}});var qFe=ye((Gmr,OFe)=>{"use strict";function a9t(e,t,r,n,i){for(var a=i+1;n<=i;){var o=n+i>>>1,s=e[o],l=r!==void 0?r(s,t):s-t;l>=0?(a=o,i=o-1):n=o+1}return a}function o9t(e,t,r,n,i){for(var a=i+1;n<=i;){var o=n+i>>>1,s=e[o],l=r!==void 0?r(s,t):s-t;l>0?(a=o,i=o-1):n=o+1}return a}function s9t(e,t,r,n,i){for(var a=n-1;n<=i;){var o=n+i>>>1,s=e[o],l=r!==void 0?r(s,t):s-t;l<0?(a=o,n=o+1):i=o-1}return a}function l9t(e,t,r,n,i){for(var a=n-1;n<=i;){var o=n+i>>>1,s=e[o],l=r!==void 0?r(s,t):s-t;l<=0?(a=o,n=o+1):i=o-1}return a}function u9t(e,t,r,n,i){for(;n<=i;){var a=n+i>>>1,o=e[a],s=r!==void 0?r(o,t):o-t;if(s===0)return a;s<=0?n=a+1:i=a-1}return-1}function uk(e,t,r,n,i,a){return typeof r=="function"?a(e,t,r,n===void 0?0:n|0,i===void 0?e.length-1:i|0):a(e,t,void 0,r===void 0?0:r|0,n===void 0?e.length-1:n|0)}OFe.exports={ge:function(e,t,r,n,i){return uk(e,t,r,n,i,a9t)},gt:function(e,t,r,n,i){return uk(e,t,r,n,i,o9t)},lt:function(e,t,r,n,i){return uk(e,t,r,n,i,s9t)},le:function(e,t,r,n,i){return uk(e,t,r,n,i,l9t)},eq:function(e,t,r,n,i){return uk(e,t,r,n,i,u9t)}}});var Zm=ye((Hmr,NFe)=>{"use strict";NFe.exports=function(t,r,n){var i={},a,o;if(typeof r=="string"&&(r=BFe(r)),Array.isArray(r)){var s={};for(o=0;o<r.length;o++)s[r[o]]=!0;r=s}for(a in r)r[a]=BFe(r[a]);var l={};for(a in r){var u=r[a];if(Array.isArray(u))for(o=0;o<u.length;o++){var c=u[o];if(n&&(l[c]=!0),c in t){if(i[a]=t[c],n)for(var f=o;f<u.length;f++)l[u[f]]=!0;break}}else a in t&&(r[a]&&(i[a]=t[a]),n&&(l[a]=!0))}if(n)for(a in t)l[a]||(i[a]=t[a]);return i};var YZ={};function BFe(e){return YZ[e]?YZ[e]:(typeof e=="string"&&(e=YZ[e]=e.split(/\s*,\s*|\s+/)),e)}});var i5=ye((jmr,UFe)=>{"use strict";var c9t=Zm();UFe.exports=f9t;function f9t(e){var t;return arguments.length>1&&(e=arguments),typeof e=="string"?e=e.split(/\s/).map(parseFloat):typeof e=="number"&&(e=[e]),e.length&&typeof e[0]=="number"?e.length===1?t={width:e[0],height:e[0],x:0,y:0}:e.length===2?t={width:e[0],height:e[1],x:0,y:0}:t={x:e[0],y:e[1],width:e[2]-e[0]||0,height:e[3]-e[1]||0}:e&&(e=c9t(e,{left:"x l left Left",top:"y t top Top",width:"w width W Width",height:"h height W Width",bottom:"b bottom Bottom",right:"r right Right"}),t={x:e.left||0,y:e.top||0},e.width==null?e.right?t.width=e.right-t.x:t.width=0:t.width=e.width,e.height==null?e.bottom?t.height=e.bottom-t.y:t.height=0:t.height=e.height),t}});var X2=ye((Wmr,VFe)=>{"use strict";VFe.exports=h9t;function h9t(e,t){if(!e||e.length==null)throw Error("Argument should be an array");t==null?t=1:t=Math.floor(t);for(var r=Array(t*2),n=0;n<t;n++){for(var i=-1/0,a=1/0,o=n,s=e.length;o<s;o+=t)e[o]>i&&(i=e[o]),e[o]<a&&(a=e[o]);r[n]=a,r[t+n]=i}return r}});var HFe=ye((Xmr,GFe)=>{GFe.exports=function(){for(var e=0;e<arguments.length;e++)if(arguments[e]!==void 0)return arguments[e]}});var Z2=ye((Zmr,WFe)=>{var jFe=YD();WFe.exports=d9t;function d9t(e,t,r){if(!e)throw new TypeError("must specify data as first parameter");if(r=+(r||0)|0,Array.isArray(e)&&e[0]&&typeof e[0][0]=="number"){var n=e[0].length,i=e.length*n,a,o,s,l;(!t||typeof t=="string")&&(t=new(jFe(t||"float32"))(i+r));var u=t.length-r;if(i!==u)throw new Error("source length "+i+" ("+n+"x"+e.length+") does not match destination length "+u);for(a=0,s=r;a<e.length;a++)for(o=0;o<n;o++)t[s++]=e[a][o]===null?NaN:e[a][o]}else if(!t||typeof t=="string"){var c=jFe(t||"float32");if(Array.isArray(e)||t==="array")for(t=new c(e.length+r),a=0,s=r,l=t.length;s<l;s++,a++)t[s]=e[a]===null?NaN:e[a];else r===0?t=new c(e):(t=new c(e.length+r),t.set(e,r))}else t.set(e,r);return t}});var ZFe=ye((Ymr,XFe)=>{"use strict";XFe.exports=function(e){var t=typeof e;return e!==null&&(t==="object"||t==="function")}});var KFe=ye((Kmr,YFe)=>{"use strict";YFe.exports=Math.log2||function(e){return Math.log(e)*Math.LOG2E}});var ize=ye((Jmr,rze)=>{"use strict";var JFe=qFe(),$Fe=ZE(),v9t=i5(),p9t=X2(),QFe=Zm(),KZ=HFe(),g9t=Z2(),m9t=ZFe(),y9t=YD(),eze=KFe(),_9t=1073741824;rze.exports=function(t,r){r||(r={}),t=g9t(t,"float64"),r=QFe(r,{bounds:"range bounds dataBox databox",maxDepth:"depth maxDepth maxdepth level maxLevel maxlevel levels",dtype:"type dtype format out dst output destination"});let n=KZ(r.maxDepth,255),i=KZ(r.bounds,p9t(t,2));i[0]===i[2]&&i[2]++,i[1]===i[3]&&i[3]++;let a=tze(t,i),o=t.length>>>1,s;r.dtype||(r.dtype="array"),typeof r.dtype=="string"?s=new(y9t(r.dtype))(o):r.dtype&&(s=r.dtype,Array.isArray(s)&&(s.length=o));for(let p=0;p<o;++p)s[p]=p;let l=[],u=[],c=[],f=[];d(0,0,1,s,0,1);let h=0;for(let p=0;p<l.length;p++){let k=l[p];if(s.set)s.set(k,h);else for(let S=0,L=k.length;S<L;S++)s[S+h]=k[S];let E=h+l[p].length;f[p]=[h,E],h=E}return s.range=v,s;function d(p,k,E,S,L,x){if(!S.length)return null;let C=l[L]||(l[L]=[]),M=c[L]||(c[L]=[]),g=u[L]||(u[L]=[]),P=C.length;if(L++,L>n||x>_9t){for(let N=0;N<S.length;N++)C.push(S[N]),M.push(x),g.push(null,null,null,null);return P}if(C.push(S[0]),M.push(x),S.length<=1)return g.push(null,null,null,null),P;let T=E*.5,z=p+T,O=k+T,V=[],G=[],Z=[],j=[];for(let N=1,H=S.length;N<H;N++){let te=S[N],oe=a[te*2],_e=a[te*2+1];oe<z?_e<O?V.push(te):G.push(te):_e<O?Z.push(te):j.push(te)}return x<<=2,g.push(d(p,k,T,V,L,x),d(p,O,T,G,L,x+1),d(z,k,T,Z,L,x+2),d(z,O,T,j,L,x+3)),P}function v(...p){let k;if(m9t(p[p.length-1])){let Z=p.pop();!p.length&&(Z.x!=null||Z.l!=null||Z.left!=null)&&(p=[Z],k={}),k=QFe(Z,{level:"level maxLevel",d:"d diam diameter r radius px pxSize pixel pixelSize maxD size minSize",lod:"lod details ranges offsets"})}else k={};p.length||(p=i);let E=v9t(...p),[S,L,x,C]=[Math.min(E.x,E.x+E.width),Math.min(E.y,E.y+E.height),Math.max(E.x,E.x+E.width),Math.max(E.y,E.y+E.height)],[M,g,P,T]=tze([S,L,x,C],i),z=KZ(k.level,l.length);if(k.d!=null){let Z;typeof k.d=="number"?Z=[k.d,k.d]:k.d.length&&(Z=k.d),z=Math.min(Math.max(Math.ceil(-eze(Math.abs(Z[0])/(i[2]-i[0]))),Math.ceil(-eze(Math.abs(Z[1])/(i[3]-i[1])))),z)}if(z=Math.min(z,l.length),k.lod)return _(M,g,P,T,z);let O=[];V(0,0,1,0,0,1);function V(Z,j,N,H,te,oe){if(te===null||oe===null)return;let _e=Z+N,Ee=j+N;if(M>_e||g>Ee||P<Z||T<j||H>=z||te===oe)return;let Ce=l[H];oe===void 0&&(oe=Ce.length);for(let Re=te;Re<oe;Re++){let ce=Ce[Re],Ze=t[ce*2],ut=t[ce*2+1];Ze>=S&&Ze<=x&&ut>=L&&ut<=C&&O.push(ce)}let me=u[H],ie=me[te*4+0],Se=me[te*4+1],Le=me[te*4+2],Ae=me[te*4+3],Fe=G(me,te+1),Pe=N*.5,ge=H+1;V(Z,j,Pe,ge,ie,Se||Le||Ae||Fe),V(Z,j+Pe,Pe,ge,Se,Le||Ae||Fe),V(Z+Pe,j,Pe,ge,Le,Ae||Fe),V(Z+Pe,j+Pe,Pe,ge,Ae,Fe)}function G(Z,j){let N=null,H=0;for(;N===null;)if(N=Z[j*4+H],H++,H>Z.length)return null;return N}return O}function _(p,k,E,S,L){let x=[];for(let C=0;C<L;C++){let M=c[C],g=f[C][0],P=b(p,k,C),T=b(E,S,C),z=JFe.ge(M,P),O=JFe.gt(M,T,z,M.length-1);x[C]=[z+g,O+g]}return x}function b(p,k,E){let S=1,L=.5,x=.5,C=.5;for(let M=0;M<E;M++)S<<=2,S+=p<L?k<x?0:1:k<x?2:3,C*=.5,L+=p<L?-C:C,x+=k<x?-C:C;return S}};function tze(e,t){let[r,n,i,a]=t,o=1/(i-r),s=1/(a-n),l=new Array(e.length);for(let u=0,c=e.length/2;u<c;u++)l[2*u]=$Fe((e[2*u]-r)*o,0,1),l[2*u+1]=$Fe((e[2*u+1]-n)*s,0,1);return l}});var HF=ye(($mr,nze)=>{"use strict";nze.exports=ize()});var JZ=ye((Qmr,aze)=>{aze.exports=x9t;function x9t(e){var t=0,r=0,n=0,i=0;return e.map(function(a){a=a.slice();var o=a[0],s=o.toUpperCase();if(o!=s)switch(a[0]=s,o){case"a":a[6]+=n,a[7]+=i;break;case"v":a[1]+=i;break;case"h":a[1]+=n;break;default:for(var l=1;l<a.length;)a[l++]+=n,a[l++]+=i}switch(s){case"Z":n=t,i=r;break;case"H":n=a[1];break;case"V":i=a[1];break;case"M":n=t=a[1],i=r=a[2];break;default:n=a[a.length-2],i=a[a.length-1]}return a})}});var lze=ye((jF,sze)=>{"use strict";Object.defineProperty(jF,"__esModule",{value:!0});var b9t=function(){function e(t,r){var n=[],i=!0,a=!1,o=void 0;try{for(var s=t[Symbol.iterator](),l;!(i=(l=s.next()).done)&&(n.push(l.value),!(r&&n.length===r));i=!0);}catch(u){a=!0,o=u}finally{try{!i&&s.return&&s.return()}finally{if(a)throw o}}return n}return function(t,r){if(Array.isArray(t))return t;if(Symbol.iterator in Object(t))return e(t,r);throw new TypeError("Invalid attempt to destructure non-iterable instance")}}(),ck=Math.PI*2,$Z=function(t,r,n,i,a,o,s){var l=t.x,u=t.y;l*=r,u*=n;var c=i*l-a*u,f=a*l+i*u;return{x:c+o,y:f+s}},w9t=function(t,r){var n=r===1.5707963267948966?.551915024494:r===-1.5707963267948966?-.551915024494:1.3333333333333333*Math.tan(r/4),i=Math.cos(t),a=Math.sin(t),o=Math.cos(t+r),s=Math.sin(t+r);return[{x:i-a*n,y:a+i*n},{x:o+s*n,y:s-o*n},{x:o,y:s}]},oze=function(t,r,n,i){var a=t*i-r*n<0?-1:1,o=t*n+r*i;return o>1&&(o=1),o<-1&&(o=-1),a*Math.acos(o)},T9t=function(t,r,n,i,a,o,s,l,u,c,f,h){var d=Math.pow(a,2),v=Math.pow(o,2),_=Math.pow(f,2),b=Math.pow(h,2),p=d*v-d*b-v*_;p<0&&(p=0),p/=d*b+v*_,p=Math.sqrt(p)*(s===l?-1:1);var k=p*a/o*h,E=p*-o/a*f,S=c*k-u*E+(t+n)/2,L=u*k+c*E+(r+i)/2,x=(f-k)/a,C=(h-E)/o,M=(-f-k)/a,g=(-h-E)/o,P=oze(1,0,x,C),T=oze(x,C,M,g);return l===0&&T>0&&(T-=ck),l===1&&T<0&&(T+=ck),[S,L,P,T]},A9t=function(t){var r=t.px,n=t.py,i=t.cx,a=t.cy,o=t.rx,s=t.ry,l=t.xAxisRotation,u=l===void 0?0:l,c=t.largeArcFlag,f=c===void 0?0:c,h=t.sweepFlag,d=h===void 0?0:h,v=[];if(o===0||s===0)return[];var _=Math.sin(u*ck/360),b=Math.cos(u*ck/360),p=b*(r-i)/2+_*(n-a)/2,k=-_*(r-i)/2+b*(n-a)/2;if(p===0&&k===0)return[];o=Math.abs(o),s=Math.abs(s);var E=Math.pow(p,2)/Math.pow(o,2)+Math.pow(k,2)/Math.pow(s,2);E>1&&(o*=Math.sqrt(E),s*=Math.sqrt(E));var S=T9t(r,n,i,a,o,s,f,d,_,b,p,k),L=b9t(S,4),x=L[0],C=L[1],M=L[2],g=L[3],P=Math.abs(g)/(ck/4);Math.abs(1-P)<1e-7&&(P=1);var T=Math.max(Math.ceil(P),1);g/=T;for(var z=0;z<T;z++)v.push(w9t(M,g)),M+=g;return v.map(function(O){var V=$Z(O[0],o,s,b,_,x,C),G=V.x,Z=V.y,j=$Z(O[1],o,s,b,_,x,C),N=j.x,H=j.y,te=$Z(O[2],o,s,b,_,x,C),oe=te.x,_e=te.y;return{x1:G,y1:Z,x2:N,y2:H,x:oe,y:_e}})};jF.default=A9t;sze.exports=jF.default});var fze=ye((eyr,cze)=>{"use strict";cze.exports=M9t;var S9t=lze();function M9t(e){for(var t,r=[],n=0,i=0,a=0,o=0,s=null,l=null,u=0,c=0,f=0,h=e.length;f<h;f++){var d=e[f],v=d[0];switch(v){case"M":a=d[1],o=d[2];break;case"A":var _=S9t({px:u,py:c,cx:d[6],cy:d[7],rx:d[1],ry:d[2],xAxisRotation:d[3],largeArcFlag:d[4],sweepFlag:d[5]});if(!_.length)continue;for(var b=0,p;b<_.length;b++)p=_[b],d=["C",p.x1,p.y1,p.x2,p.y2,p.x,p.y],b<_.length-1&&r.push(d);break;case"S":var k=u,E=c;(t=="C"||t=="S")&&(k+=k-n,E+=E-i),d=["C",k,E,d[1],d[2],d[3],d[4]];break;case"T":t=="Q"||t=="T"?(s=u*2-s,l=c*2-l):(s=u,l=c),d=uze(u,c,s,l,d[1],d[2]);break;case"Q":s=d[1],l=d[2],d=uze(u,c,d[1],d[2],d[3],d[4]);break;case"L":d=WF(u,c,d[1],d[2]);break;case"H":d=WF(u,c,d[1],c);break;case"V":d=WF(u,c,u,d[1]);break;case"Z":d=WF(u,c,a,o);break}t=v,u=d[d.length-2],c=d[d.length-1],d.length>4?(n=d[d.length-4],i=d[d.length-3]):(n=u,i=c),r.push(d)}return r}function WF(e,t,r,n){return["C",e,t,r,n,r,n]}function uze(e,t,r,n,i,a){return["C",e/3+2/3*r,t/3+2/3*n,i/3+2/3*r,a/3+2/3*n,i,a]}});var QZ=ye((tyr,hze)=>{"use strict";hze.exports=function(t){return typeof t!="string"?!1:(t=t.trim(),!!(/^[mzlhvcsqta]\s*[-+.0-9][^mlhvzcsqta]+/i.test(t)&&/[\dz]$/i.test(t)&&t.length>4))}});var pze=ye((ryr,vze)=>{"use strict";var E9t=eM(),k9t=JZ(),C9t=fze(),L9t=QZ(),dze=lE();vze.exports=P9t;function P9t(e){if(Array.isArray(e)&&e.length===1&&typeof e[0]=="string"&&(e=e[0]),typeof e=="string"&&(dze(L9t(e),"String is not an SVG path."),e=E9t(e)),dze(Array.isArray(e),"Argument should be a string or an array of path segments."),e=k9t(e),e=C9t(e),!e.length)return[0,0,0,0];for(var t=[1/0,1/0,-1/0,-1/0],r=0,n=e.length;r<n;r++)for(var i=e[r].slice(1),a=0;a<i.length;a+=2)i[a+0]<t[0]&&(t[0]=i[a+0]),i[a+1]<t[1]&&(t[1]=i[a+1]),i[a+0]>t[2]&&(t[2]=i[a+0]),i[a+1]>t[3]&&(t[3]=i[a+1]);return t}});var bze=ye((iyr,xze)=>{var Y2=Math.PI,gze=_ze(120);xze.exports=I9t;function I9t(e){for(var t,r=[],n=0,i=0,a=0,o=0,s=null,l=null,u=0,c=0,f=0,h=e.length;f<h;f++){var d=e[f],v=d[0];switch(v){case"M":a=d[1],o=d[2];break;case"A":d=yze(u,c,d[1],d[2],_ze(d[3]),d[4],d[5],d[6],d[7]),d.unshift("C"),d.length>7&&(r.push(d.splice(0,7)),d.unshift("C"));break;case"S":var _=u,b=c;(t=="C"||t=="S")&&(_+=_-n,b+=b-i),d=["C",_,b,d[1],d[2],d[3],d[4]];break;case"T":t=="Q"||t=="T"?(s=u*2-s,l=c*2-l):(s=u,l=c),d=mze(u,c,s,l,d[1],d[2]);break;case"Q":s=d[1],l=d[2],d=mze(u,c,d[1],d[2],d[3],d[4]);break;case"L":d=XF(u,c,d[1],d[2]);break;case"H":d=XF(u,c,d[1],c);break;case"V":d=XF(u,c,u,d[1]);break;case"Z":d=XF(u,c,a,o);break}t=v,u=d[d.length-2],c=d[d.length-1],d.length>4?(n=d[d.length-4],i=d[d.length-3]):(n=u,i=c),r.push(d)}return r}function XF(e,t,r,n){return["C",e,t,r,n,r,n]}function mze(e,t,r,n,i,a){return["C",e/3+2/3*r,t/3+2/3*n,i/3+2/3*r,a/3+2/3*n,i,a]}function yze(e,t,r,n,i,a,o,s,l,u){if(u)E=u[0],S=u[1],p=u[2],k=u[3];else{var c=eY(e,t,-i);e=c.x,t=c.y,c=eY(s,l,-i),s=c.x,l=c.y;var f=(e-s)/2,h=(t-l)/2,d=f*f/(r*r)+h*h/(n*n);d>1&&(d=Math.sqrt(d),r=d*r,n=d*n);var v=r*r,_=n*n,b=(a==o?-1:1)*Math.sqrt(Math.abs((v*_-v*h*h-_*f*f)/(v*h*h+_*f*f)));b==1/0&&(b=1);var p=b*r*h/n+(e+s)/2,k=b*-n*f/r+(t+l)/2,E=Math.asin(((t-k)/n).toFixed(9)),S=Math.asin(((l-k)/n).toFixed(9));E=e<p?Y2-E:E,S=s<p?Y2-S:S,E<0&&(E=Y2*2+E),S<0&&(S=Y2*2+S),o&&E>S&&(E=E-Y2*2),!o&&S>E&&(S=S-Y2*2)}if(Math.abs(S-E)>gze){var L=S,x=s,C=l;S=E+gze*(o&&S>E?1:-1),s=p+r*Math.cos(S),l=k+n*Math.sin(S);var M=yze(s,l,r,n,i,0,o,x,C,[S,L,p,k])}var g=Math.tan((S-E)/4),P=4/3*r*g,T=4/3*n*g,z=[2*e-(e+P*Math.sin(E)),2*t-(t-T*Math.cos(E)),s+P*Math.sin(S),l-T*Math.cos(S),s,l];if(u)return z;M&&(z=z.concat(M));for(var O=0;O<z.length;){var V=eY(z[O],z[O+1],i);z[O++]=V.x,z[O++]=V.y}return z}function eY(e,t,r){return{x:e*Math.cos(r)-t*Math.sin(r),y:e*Math.sin(r)+t*Math.cos(r)}}function _ze(e){return e*(Y2/180)}});var Tze=ye((nyr,wze)=>{var R9t=JZ(),D9t=bze(),F9t={M:"moveTo",C:"bezierCurveTo"};wze.exports=function(e,t){e.beginPath(),D9t(R9t(t)).forEach(function(r){var n=r[0],i=r.slice(1);e[F9t[n]].apply(e,i)}),e.closePath()}});var Eze=ye((ayr,Mze)=>{"use strict";var z9t=ZE();Mze.exports=O9t;var fk=1e20;function O9t(e,t){t||(t={});var r=t.cutoff==null?.25:t.cutoff,n=t.radius==null?8:t.radius,i=t.channel||0,a,o,s,l,u,c,f,h,d,v,_;if(ArrayBuffer.isView(e)||Array.isArray(e)){if(!t.width||!t.height)throw Error("For raw data width and height should be provided by options");a=t.width,o=t.height,l=e,t.stride?c=t.stride:c=Math.floor(e.length/a/o)}else window.HTMLCanvasElement&&e instanceof window.HTMLCanvasElement?(h=e,f=h.getContext("2d"),a=h.width,o=h.height,d=f.getImageData(0,0,a,o),l=d.data,c=4):window.CanvasRenderingContext2D&&e instanceof window.CanvasRenderingContext2D?(h=e.canvas,f=e,a=h.width,o=h.height,d=f.getImageData(0,0,a,o),l=d.data,c=4):window.ImageData&&e instanceof window.ImageData&&(d=e,a=e.width,o=e.height,l=d.data,c=4);if(s=Math.max(a,o),window.Uint8ClampedArray&&l instanceof window.Uint8ClampedArray||window.Uint8Array&&l instanceof window.Uint8Array)for(u=l,l=Array(a*o),v=0,_=u.length;v<_;v++)l[v]=u[v*c+i]/255;else if(c!==1)throw Error("Raw data can have only 1 value per pixel");var b=Array(a*o),p=Array(a*o),k=Array(s),E=Array(s),S=Array(s+1),L=Array(s);for(v=0,_=a*o;v<_;v++){var x=l[v];b[v]=x===1?0:x===0?fk:Math.pow(Math.max(0,.5-x),2),p[v]=x===1?fk:x===0?0:Math.pow(Math.max(0,x-.5),2)}Aze(b,a,o,k,E,L,S),Aze(p,a,o,k,E,L,S);var C=window.Float32Array?new Float32Array(a*o):new Array(a*o);for(v=0,_=a*o;v<_;v++)C[v]=z9t(1-((b[v]-p[v])/n+r),0,1);return C}function Aze(e,t,r,n,i,a,o){for(var s=0;s<t;s++){for(var l=0;l<r;l++)n[l]=e[l*t+s];for(Sze(n,i,a,o,r),l=0;l<r;l++)e[l*t+s]=i[l]}for(l=0;l<r;l++){for(s=0;s<t;s++)n[s]=e[l*t+s];for(Sze(n,i,a,o,t),s=0;s<t;s++)e[l*t+s]=Math.sqrt(i[s])}}function Sze(e,t,r,n,i){r[0]=0,n[0]=-fk,n[1]=+fk;for(var a=1,o=0;a<i;a++){for(var s=(e[a]+a*a-(e[r[o]]+r[o]*r[o]))/(2*a-2*r[o]);s<=n[o];)o--,s=(e[a]+a*a-(e[r[o]]+r[o]*r[o]))/(2*a-2*r[o]);o++,r[o]=a,n[o]=s,n[o+1]=+fk}for(a=0,o=0;a<i;a++){for(;n[o+1]<a;)o++;t[a]=(a-r[o])*(a-r[o])+e[r[o]]}}});var Cze=ye((oyr,kze)=>{"use strict";var q9t=pze(),B9t=eM(),N9t=Tze(),U9t=QZ(),V9t=Eze(),tY=document.createElement("canvas"),hp=tY.getContext("2d");kze.exports=G9t;function G9t(e,t){if(!U9t(e))throw Error("Argument should be valid svg path string");t||(t={});var r,n;t.shape?(r=t.shape[0],n=t.shape[1]):(r=tY.width=t.w||t.width||200,n=tY.height=t.h||t.height||200);var i=Math.min(r,n),a=t.stroke||0,o=t.viewbox||t.viewBox||q9t(e),s=[r/(o[2]-o[0]),n/(o[3]-o[1])],l=Math.min(s[0]||0,s[1]||0)/2;if(hp.fillStyle="black",hp.fillRect(0,0,r,n),hp.fillStyle="white",a&&(typeof a!="number"&&(a=1),a>0?hp.strokeStyle="white":hp.strokeStyle="black",hp.lineWidth=Math.abs(a)),hp.translate(r*.5,n*.5),hp.scale(l,l),H9t()){var u=new Path2D(e);hp.fill(u),a&&hp.stroke(u)}else{var c=B9t(e);N9t(hp,c),hp.fill(),a&&hp.stroke()}hp.setTransform(1,0,0,1,0,0);var f=V9t(hp,{cutoff:t.cutoff!=null?t.cutoff:.5,radius:t.radius!=null?t.radius:i*.5});return f}var ZF;function H9t(){if(ZF!=null)return ZF;var e=document.createElement("canvas").getContext("2d");if(e.canvas.width=e.canvas.height=1,!window.Path2D)return ZF=!1;var t=new Path2D("M0,0h1v1h-1v-1Z");e.fillStyle="black",e.fill(t);var r=e.getImageData(0,0,1,1);return ZF=r&&r.data&&r.data[3]===255}});var J2=ye((syr,Nze)=>{"use strict";var KF=Eo(),j9t=Cze(),YF=J_(),W9t=qa(),o5=Dr(),_h=o5.isArrayOrTypedArray,n5=So(),Lze=hf(),Pze=$y().formatColor,a5=Ru(),X9t=k3(),iY=VF(),hk=ox(),Z9t=N1().DESELECTDIM,Ize={start:1,left:1,end:-1,right:-1,middle:0,center:0,bottom:1,top:-1},Y9t=ip().appendArrayPointValue;function K9t(e,t){var r,n={marker:void 0,markerSel:void 0,markerUnsel:void 0,line:void 0,fill:void 0,errorX:void 0,errorY:void 0,text:void 0,textSel:void 0,textUnsel:void 0},i=e._context.plotGlPixelRatio;if(t.visible!==!0)return n;if(a5.hasText(t)&&(n.text=Bze(e,t),n.textSel=Dze(e,t,t.selected),n.textUnsel=Dze(e,t,t.unselected)),a5.hasMarkers(t)&&(n.marker=aY(e,t),n.markerSel=nY(e,t,t.selected),n.markerUnsel=nY(e,t,t.unselected),!t.unselected&&_h(t.marker.opacity))){var a=t.marker.opacity;for(n.markerUnsel.opacity=new Array(a.length),r=0;r<a.length;r++)n.markerUnsel.opacity[r]=Z9t*a[r]}if(a5.hasLines(t)){n.line={overlay:!0,thickness:t.line.width*i,color:t.line.color,opacity:t.opacity};var o=(hk.DASHES[t.line.dash]||[1]).slice();for(r=0;r<o.length;++r)o[r]*=t.line.width*i;n.line.dashes=o}return t.error_x&&t.error_x.visible&&(n.errorX=Fze(t,t.error_x,i)),t.error_y&&t.error_y.visible&&(n.errorY=Fze(t,t.error_y,i)),t.fill&&t.fill!=="none"&&(n.fill={closed:!0,fill:t.fillcolor,thickness:0}),n}function Bze(e,t){var r=e._fullLayout,n=t._length,i=t.textfont,a=t.textposition,o=_h(a)?a:[a],s=i.color,l=i.size,u=i.family,c=i.weight,f=i.style,h=i.variant,d={},v,_=e._context.plotGlPixelRatio,b=t.texttemplate;if(b){d.text=[];var p=r._d3locale,k=Array.isArray(b),E=k?Math.min(b.length,n):n,S=k?function(P){return b[P]}:function(){return b};for(v=0;v<E;v++){var L={i:v},x=t._module.formatLabels(L,t,r),C={};Y9t(C,t,v),d.text.push(o5.texttemplateString({data:[C,L,t._meta],fallback:t.texttemplatefallback,labels:x,locale:p,template:S(v)}))}}else _h(t.text)&&t.text.length<n?d.text=t.text.slice():d.text=t.text;if(_h(d.text))for(v=d.text.length;v<n;v++)d.text[v]="";for(d.opacity=t.opacity,d.font={},d.align=[],d.baseline=[],v=0;v<o.length;v++){var M=o[v].split(/\s+/);switch(M[1]){case"left":d.align.push("right");break;case"right":d.align.push("left");break;default:d.align.push(M[1])}switch(M[0]){case"top":d.baseline.push("bottom");break;case"bottom":d.baseline.push("top");break;default:d.baseline.push(M[0])}}if(_h(s))for(d.color=new Array(n),v=0;v<n;v++)d.color[v]=s[v];else d.color=s;if(_h(l)||Array.isArray(u)||_h(c)||Array.isArray(f)||Array.isArray(h))for(d.font=new Array(n),v=0;v<n;v++){var g=d.font[v]={};g.size=(o5.isTypedArray(l)?l[v]:_h(l)?KF(l[v])?l[v]:0:l)*_,g.family=Array.isArray(u)?u[v]:u,g.weight=Rze(_h(c)?c[v]:c),g.style=Array.isArray(f)?f[v]:f,g.variant=Array.isArray(h)?h[v]:h}else d.font={size:l*_,family:u,weight:Rze(c),style:f,variant:h};return d}function Rze(e){return e<=1e3?e>500?"bold":"normal":e}function aY(e,t){var r=t._length,n=t.marker,i={},a,o=_h(n.symbol),s=_h(n.angle),l=_h(n.color),u=_h(n.line.color),c=_h(n.opacity),f=_h(n.size),h=_h(n.line.width),d;if(o||(d=iY.isOpenSymbol(n.symbol)),o||l||u||c||s){i.symbols=new Array(r),i.angles=new Array(r),i.colors=new Array(r),i.borderColors=new Array(r);var v=n.symbol,_=n.angle,b=Pze(n,n.opacity,r),p=Pze(n.line,n.opacity,r);if(!_h(p[0])){var k=p;for(p=Array(r),a=0;a<r;a++)p[a]=k}if(!_h(b[0])){var E=b;for(b=Array(r),a=0;a<r;a++)b[a]=E}if(!_h(v)){var S=v;for(v=Array(r),a=0;a<r;a++)v[a]=S}if(!_h(_)){var L=_;for(_=Array(r),a=0;a<r;a++)_[a]=L}for(i.symbols=v,i.angles=_,i.colors=b,i.borderColors=p,a=0;a<r;a++)o&&(d=iY.isOpenSymbol(n.symbol[a])),d&&(p[a]=b[a].slice(),b[a]=b[a].slice(),b[a][3]=0);for(i.opacity=t.opacity,i.markers=new Array(r),a=0;a<r;a++)i.markers[a]=qze({mx:i.symbols[a],ma:i.angles[a]},t)}else d?(i.color=YF(n.color,"uint8"),i.color[3]=0,i.borderColor=YF(n.color,"uint8")):(i.color=YF(n.color,"uint8"),i.borderColor=YF(n.line.color,"uint8")),i.opacity=t.opacity*n.opacity,i.marker=qze({mx:n.symbol,ma:n.angle},t);var x=1,C=X9t(t,x),M;if(f||h){var g=i.sizes=new Array(r),P=i.borderSizes=new Array(r),T=0,z;if(f){for(a=0;a<r;a++)g[a]=C(n.size[a]),T+=g[a];z=T/r}else for(M=C(n.size),a=0;a<r;a++)g[a]=M;if(h)for(a=0;a<r;a++)P[a]=n.line.width[a];else for(M=n.line.width,a=0;a<r;a++)P[a]=M;i.sizeAvg=z}else i.size=C(n&&n.size||10),i.borderSizes=C(n.line.width);return i}function nY(e,t,r){var n=t.marker,i={};return r&&(r.marker&&r.marker.symbol?i=aY(e,o5.extendFlat({},n,r.marker)):r.marker&&(r.marker.size&&(i.size=r.marker.size),r.marker.color&&(i.colors=r.marker.color),r.marker.opacity!==void 0&&(i.opacity=r.marker.opacity))),i}function Dze(e,t,r){var n={};if(!r)return n;if(r.textfont){var i={opacity:1,text:t.text,texttemplate:t.texttemplate,textposition:t.textposition,textfont:o5.extendFlat({},t.textfont)};r.textfont&&o5.extendFlat(i.textfont,r.textfont),n=Bze(e,i)}return n}function Fze(e,t,r){var n={capSize:t.width*2*r,lineWidth:t.thickness*r,color:t.color};return t.copy_ystyle&&(n=e.error_y),n}var zze=hk.SYMBOL_SDF_SIZE,K2=hk.SYMBOL_SIZE,Oze=hk.SYMBOL_STROKE,rY={},J9t=n5.symbolFuncs[0](K2*.05);function qze(e,t){var r=e.mx;if(r==="circle")return null;var n,i,a=n5.symbolNumber(r),o=n5.symbolFuncs[a%100],s=!!n5.symbolNoDot[a%100],l=!!n5.symbolNoFill[a%100],u=iY.isDotSymbol(r);if(e.ma&&(r+="_"+e.ma),rY[r])return rY[r];var c=n5.getMarkerAngle(e,t);return u&&!s?n=o(K2*1.1,c)+J9t:n=o(K2,c),i=j9t(n,{w:zze,h:zze,viewBox:[-K2,-K2,K2,K2],stroke:l?Oze:-Oze}),rY[r]=i,i||null}function $9t(e,t,r){var n=r.length,i=n/2,a,o;if(a5.hasLines(t)&&i)if(t.line.shape==="hv"){for(a=[],o=0;o<i-1;o++)isNaN(r[o*2])||isNaN(r[o*2+1])?a.push(NaN,NaN,NaN,NaN):(a.push(r[o*2],r[o*2+1]),!isNaN(r[o*2+2])&&!isNaN(r[o*2+3])?a.push(r[o*2+2],r[o*2+1]):a.push(NaN,NaN));a.push(r[n-2],r[n-1])}else if(t.line.shape==="hvh"){for(a=[],o=0;o<i-1;o++)if(isNaN(r[o*2])||isNaN(r[o*2+1])||isNaN(r[o*2+2])||isNaN(r[o*2+3]))!isNaN(r[o*2])&&!isNaN(r[o*2+1])?a.push(r[o*2],r[o*2+1]):a.push(NaN,NaN),a.push(NaN,NaN);else{var s=(r[o*2]+r[o*2+2])/2;a.push(r[o*2],r[o*2+1],s,r[o*2+1],s,r[o*2+3])}a.push(r[n-2],r[n-1])}else if(t.line.shape==="vhv"){for(a=[],o=0;o<i-1;o++)if(isNaN(r[o*2])||isNaN(r[o*2+1])||isNaN(r[o*2+2])||isNaN(r[o*2+3]))!isNaN(r[o*2])&&!isNaN(r[o*2+1])?a.push(r[o*2],r[o*2+1]):a.push(NaN,NaN),a.push(NaN,NaN);else{var l=(r[o*2+1]+r[o*2+3])/2;a.push(r[o*2],r[o*2+1],r[o*2],l,r[o*2+2],l)}a.push(r[n-2],r[n-1])}else if(t.line.shape==="vh"){for(a=[],o=0;o<i-1;o++)isNaN(r[o*2])||isNaN(r[o*2+1])?a.push(NaN,NaN,NaN,NaN):(a.push(r[o*2],r[o*2+1]),!isNaN(r[o*2+2])&&!isNaN(r[o*2+3])?a.push(r[o*2],r[o*2+3]):a.push(NaN,NaN));a.push(r[n-2],r[n-1])}else a=r;var u=!1;for(o=0;o<a.length;o++)if(isNaN(a[o])){u=!0;break}var c=u||a.length>hk.TOO_MANY_POINTS||a5.hasMarkers(t)?"rect":"round";if(u&&t.connectgaps){var f=a[0],h=a[1];for(o=0;o<a.length;o+=2)isNaN(a[o])||isNaN(a[o+1])?(a[o]=f,a[o+1]=h):(f=a[o],h=a[o+1])}return{join:c,positions:a}}function Q9t(e,t,r,n,i){var a=W9t.getComponentMethod("errorbars","makeComputeError"),o=Lze.getFromId(e,t.xaxis,"x"),s=Lze.getFromId(e,t.yaxis,"y"),l=r.length/2,u={};function c(f,h){var d=h._id.charAt(0),v=t["error_"+d];if(v&&v.visible&&(h.type==="linear"||h.type==="log")){for(var _=a(v),b={x:0,y:1}[d],p={x:[0,1,2,3],y:[2,3,0,1]}[d],k=new Float64Array(4*l),E=1/0,S=-1/0,L=0,x=0;L<l;L++,x+=4){var C=f[L];if(KF(C)){var M=r[L*2+b],g=_(C,L),P=g[0],T=g[1];if(KF(P)&&KF(T)){var z=C-P,O=C+T;k[x+p[0]]=M-h.c2l(z),k[x+p[1]]=h.c2l(O)-M,k[x+p[2]]=0,k[x+p[3]]=0,E=Math.min(E,C-P),S=Math.max(S,C+T)}}}u[d]={positions:r,errors:k,_bnds:[E,S]}}}return c(n,o),c(i,s),u}function eOt(e,t,r,n){var i=t._length,a={},o;if(a5.hasMarkers(t)){var s=r.font,l=r.align,u=r.baseline;for(a.offset=new Array(i),o=0;o<i;o++){var c=n.sizes?n.sizes[o]:n.size,f=_h(s)?s[o].size:s.size,h=_h(l)?l.length>1?l[o]:l[0]:l,d=_h(u)?u.length>1?u[o]:u[0]:u,v=Ize[h],_=Ize[d],b=c?c/.8+1:0,p=-_*b-_*.5;a.offset[o]=[v*b/f,p/f]}}return a}Nze.exports={style:K9t,markerStyle:aY,markerSelection:nY,linePositions:$9t,errorBarPositions:Q9t,textPosition:eOt}});var oY=ye((lyr,Uze)=>{"use strict";var JF=Dr();Uze.exports=function(t,r){var n=r._scene,i={count:0,dirty:!0,lineOptions:[],fillOptions:[],markerOptions:[],markerSelectedOptions:[],markerUnselectedOptions:[],errorXOptions:[],errorYOptions:[],textOptions:[],textSelectedOptions:[],textUnselectedOptions:[],selectBatch:[],unselectBatch:[]},a={fill2d:!1,scatter2d:!1,error2d:!1,line2d:!1,glText:!1,select2d:!1};return r._scene||(n=r._scene={},n.init=function(){JF.extendFlat(n,a,i)},n.init(),n.update=function(s){var l=JF.repeat(s,n.count);if(n.fill2d&&n.fill2d.update(l),n.scatter2d&&n.scatter2d.update(l),n.line2d&&n.line2d.update(l),n.error2d&&n.error2d.update(l.concat(l)),n.select2d&&n.select2d.update(l),n.glText)for(var u=0;u<n.count;u++)n.glText[u].update(s)},n.draw=function(){for(var s=n.count,l=n.fill2d,u=n.error2d,c=n.line2d,f=n.scatter2d,h=n.glText,d=n.select2d,v=n.selectBatch,_=n.unselectBatch,b=0;b<s;b++){if(l&&n.fillOrder[b]&&l.draw(n.fillOrder[b]),c&&n.lineOptions[b]&&c.draw(b),u&&(n.errorXOptions[b]&&u.draw(b),n.errorYOptions[b]&&u.draw(b+s)),f&&n.markerOptions[b])if(_[b].length){var p=JF.repeat([],n.count);p[b]=_[b],f.draw(p)}else v[b].length||f.draw(b);h[b]&&n.textOptions[b]&&h[b].render()}d&&d.draw(v),n.dirty=!1},n.destroy=function(){n.fill2d&&n.fill2d.destroy&&n.fill2d.destroy(),n.scatter2d&&n.scatter2d.destroy&&n.scatter2d.destroy(),n.error2d&&n.error2d.destroy&&n.error2d.destroy(),n.line2d&&n.line2d.destroy&&n.line2d.destroy(),n.select2d&&n.select2d.destroy&&n.select2d.destroy(),n.glText&&n.glText.forEach(function(s){s.destroy&&s.destroy()}),n.lineOptions=null,n.fillOptions=null,n.markerOptions=null,n.markerSelectedOptions=null,n.markerUnselectedOptions=null,n.errorXOptions=null,n.errorYOptions=null,n.textOptions=null,n.textSelectedOptions=null,n.textUnselectedOptions=null,n.selectBatch=null,n.unselectBatch=null,r._scene=null}),n.dirty||JF.extendFlat(n,i),n}});var Xze=ye((uyr,Wze)=>{"use strict";var tOt=HF(),s5=Dr(),Vze=hf(),rOt=Ag().findExtremes,Gze=Dg(),sY=O0(),iOt=sY.calcMarkerSize,nOt=sY.calcAxisExpansion,aOt=sY.setFirstScatter,oOt=F0(),l5=J2(),sOt=oY(),Hze=fs().BADNUM,lOt=ox().TOO_MANY_POINTS;Wze.exports=function(t,r){var n=t._fullLayout,i=r._xA=Vze.getFromId(t,r.xaxis,"x"),a=r._yA=Vze.getFromId(t,r.yaxis,"y"),o=n._plots[r.xaxis+r.yaxis],s=r._length,l=s>=lOt,u=s*2,c={},f,h=i.makeCalcdata(r,"x"),d=a.makeCalcdata(r,"y"),v=Gze(r,i,"x",h),_=Gze(r,a,"y",d),b=v.vals,p=_.vals;r._x=b,r._y=p,r.xperiodalignment&&(r._origX=h,r._xStarts=v.starts,r._xEnds=v.ends),r.yperiodalignment&&(r._origY=d,r._yStarts=_.starts,r._yEnds=_.ends);var k=new Array(u),E=new Array(s);for(f=0;f<s;f++)k[f*2]=b[f]===Hze?NaN:b[f],k[f*2+1]=p[f]===Hze?NaN:p[f],E[f]=f;if(i.type==="log")for(f=0;f<u;f+=2)k[f]=i.c2l(k[f]);if(a.type==="log")for(f=1;f<u;f+=2)k[f]=a.c2l(k[f]);l&&i.type!=="log"&&a.type!=="log"?c.tree=tOt(k):c.ids=E,oOt(t,r);var S=uOt(t,o,r,k,b,p),L=sOt(t,o);aOt(n,r);var x;return l?S.marker&&(x=S.marker.sizeAvg||Math.max(S.marker.size,3)):x=iOt(r,s),nOt(t,r,i,a,b,p,x),S.errorX&&jze(r,i,S.errorX),S.errorY&&jze(r,a,S.errorY),S.fill&&!L.fill2d&&(L.fill2d=!0),S.marker&&!L.scatter2d&&(L.scatter2d=!0),S.line&&!L.line2d&&(L.line2d=!0),(S.errorX||S.errorY)&&!L.error2d&&(L.error2d=!0),S.text&&!L.glText&&(L.glText=!0),S.marker&&(S.marker.snap=s),L.lineOptions.push(S.line),L.errorXOptions.push(S.errorX),L.errorYOptions.push(S.errorY),L.fillOptions.push(S.fill),L.markerOptions.push(S.marker),L.markerSelectedOptions.push(S.markerSel),L.markerUnselectedOptions.push(S.markerUnsel),L.textOptions.push(S.text),L.textSelectedOptions.push(S.textSel),L.textUnselectedOptions.push(S.textUnsel),L.selectBatch.push([]),L.unselectBatch.push([]),c._scene=L,c.index=L.count,c.x=b,c.y=p,c.positions=k,L.count++,[{x:!1,y:!1,t:c,trace:r}]};function jze(e,t,r){var n=e._extremes[t._id],i=rOt(t,r._bnds,{padded:!0});n.min=n.min.concat(i.min),n.max=n.max.concat(i.max)}function uOt(e,t,r,n,i,a){var o=l5.style(e,r);if(o.marker&&(o.marker.positions=n),o.line&&n.length>1&&s5.extendFlat(o.line,l5.linePositions(e,r,n)),o.errorX||o.errorY){var s=l5.errorBarPositions(e,r,n,i,a);o.errorX&&s5.extendFlat(o.errorX,s.x),o.errorY&&s5.extendFlat(o.errorY,s.y)}return o.text&&(s5.extendFlat(o.text,{positions:n},l5.textPosition(e,r,o.text,o.marker)),s5.extendFlat(o.textSel,{positions:n},l5.textPosition(e,r,o.text,o.markerSel)),s5.extendFlat(o.textUnsel,{positions:n},l5.textPosition(e,r,o.text,o.markerUnsel))),o}});var lY=ye((cyr,Yze)=>{"use strict";var Zze=Dr(),cOt=ka(),fOt=N1().DESELECTDIM;function hOt(e){var t=e[0],r=t.trace,n=t.t,i=n._scene,a=n.index,o=i.selectBatch[a],s=i.unselectBatch[a],l=i.textOptions[a],u=i.textSelectedOptions[a]||{},c=i.textUnselectedOptions[a]||{},f=Zze.extendFlat({},l),h,d;if(o.length||s.length){var v=u.color,_=c.color,b=l.color,p=Zze.isArrayOrTypedArray(b);for(f.color=new Array(r._length),h=0;h<o.length;h++)d=o[h],f.color[d]=v||(p?b[d]:b);for(h=0;h<s.length;h++){d=s[h];var k=p?b[d]:b;f.color[d]=_||(v?k:cOt.addOpacity(k,fOt))}}i.glText[a].update(f)}Yze.exports={styleTextSelection:hOt}});var uY=ye((fyr,Jze)=>{"use strict";var Kze=Ru(),dOt=lY().styleTextSelection;Jze.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s=n[0].trace,l=n[0].t,u=s._length,c=l.x,f=l.y,h=l._scene,d=l.index;if(!h)return o;var v=Kze.hasText(s),_=Kze.hasMarkers(s),b=!_&&!v;if(s.visible!==!0||b)return o;var p=[],k=[];if(r!==!1&&!r.degenerate)for(var E=0;E<u;E++)r.contains([l.xpx[E],l.ypx[E]],!1,E,t)?(p.push(E),o.push({pointNumber:E,x:i.c2d(c[E]),y:a.c2d(f[E])})):k.push(E);if(_){var S=h.scatter2d;if(!p.length&&!k.length){var L=new Array(h.count);L[d]=h.markerOptions[d],S.update.apply(S,L)}else if(!h.selectBatch[d].length&&!h.unselectBatch[d].length){var x=new Array(h.count);x[d]=h.markerUnselectedOptions[d],S.update.apply(S,x)}}return h.selectBatch[d]=p,h.unselectBatch[d]=k,v&&dOt(n),o}});var Qze=ye((hyr,$ze)=>{"use strict";var vOt=UF();$ze.exports={moduleType:"trace",name:"scattergl",basePlotModule:ph(),categories:["gl","regl","cartesian","symbols","errorBarsOK","showLegend","scatter-like"],attributes:lk(),supplyDefaults:DFe(),crossTraceDefaults:aU(),colorbar:$d(),formatLabels:zFe(),calc:Xze(),hoverPoints:vOt.hoverPoints,selectPoints:uY(),meta:{}}});var t7e=ye((dyr,QF)=>{"use strict";var $F=ZE();QF.exports=e7e;QF.exports.to=e7e;QF.exports.from=pOt;function e7e(e,t){t==null&&(t=!0);var r=e[0],n=e[1],i=e[2],a=e[3];a==null&&(a=t?1:255),t&&(r*=255,n*=255,i*=255,a*=255),r=$F(r,0,255)&255,n=$F(n,0,255)&255,i=$F(i,0,255)&255,a=$F(a,0,255)&255;var o=r*16777216+(n<<16)+(i<<8)+a;return o}function pOt(e,t){e=+e;var r=e>>>24,n=(e&16711680)>>>16,i=(e&65280)>>>8,a=e&255;return t===!1?[r,n,i,a]:[r/255,n/255,i/255,a/255]}});var Oh=ye((vyr,i7e)=>{"use strict";var r7e=Object.getOwnPropertySymbols,gOt=Object.prototype.hasOwnProperty,mOt=Object.prototype.propertyIsEnumerable;function yOt(e){if(e==null)throw new TypeError("Object.assign cannot be called with null or undefined");return Object(e)}function _Ot(){try{if(!Object.assign)return!1;var e=new String("abc");if(e[5]="de",Object.getOwnPropertyNames(e)[0]==="5")return!1;for(var t={},r=0;r<10;r++)t["_"+String.fromCharCode(r)]=r;var n=Object.getOwnPropertyNames(t).map(function(a){return t[a]});if(n.join("")!=="0123456789")return!1;var i={};return"abcdefghijklmnopqrst".split("").forEach(function(a){i[a]=a}),Object.keys(Object.assign({},i)).join("")==="abcdefghijklmnopqrst"}catch(a){return!1}}i7e.exports=_Ot()?Object.assign:function(e,t){for(var r,n=yOt(e),i,a=1;a<arguments.length;a++){r=Object(arguments[a]);for(var o in r)gOt.call(r,o)&&(n[o]=r[o]);if(r7e){i=r7e(r);for(var s=0;s<i.length;s++)mOt.call(r,i[s])&&(n[i[s]]=r[i[s]])}}return n}});var a7e=ye((pyr,n7e)=>{n7e.exports=function(e){typeof e=="string"&&(e=[e]);for(var t=[].slice.call(arguments,1),r=[],n=0;n<e.length-1;n++)r.push(e[n],t[n]||"");return r.push(e[n]),r.join("")}});var cY=ye((gyr,o7e)=>{"use strict";o7e.exports=function(t,r,n){Array.isArray(n)||(n=[].slice.call(arguments,2));for(var i=0,a=n.length;i<a;i++){var o=n[i];for(var s in o)if(!(r[s]!==void 0&&!Array.isArray(r[s])&&t[s]===r[s])&&s in r){var l;if(o[s]===!0)l=r[s];else{if(o[s]===!1)continue;if(typeof o[s]=="function"&&(l=o[s](r[s],t,r),l===void 0))continue}t[s]=l}}return t}});var l7e=ye((myr,s7e)=>{"use strict";s7e.exports=typeof navigator!="undefined"&&(/MSIE/.test(navigator.userAgent)||/Trident\//.test(navigator.appVersion))});var ez=ye((yyr,u5)=>{"use strict";u5.exports=dk;u5.exports.float32=u5.exports.float=dk;u5.exports.fract32=u5.exports.fract=xOt;var u7e=new Float32Array(1);function xOt(e,t){if(e.length){if(e instanceof Float32Array)return new Float32Array(e.length);t instanceof Float32Array||(t=dk(e));for(var r=0,n=t.length;r<n;r++)t[r]=e[r]-t[r];return t}return dk(e-dk(e))}function dk(e){return e.length?e instanceof Float32Array?e:new Float32Array(e):(u7e[0]=e,u7e[0])}});var hY=ye((_yr,h7e)=>{"use strict";function bOt(e,t){var r=e==null?null:typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(r!=null){var n,i,a,o,s=[],l=!0,u=!1;try{if(a=(r=r.call(e)).next,t===0){if(Object(r)!==r)return;l=!1}else for(;!(l=(n=a.call(r)).done)&&(s.push(n.value),s.length!==t);l=!0);}catch(c){u=!0,i=c}finally{try{if(!l&&r.return!=null&&(o=r.return(),Object(o)!==o))return}finally{if(u)throw i}}return s}}function wOt(e,t){return SOt(e)||bOt(e,t)||f7e(e,t)||kOt()}function TOt(e){return AOt(e)||MOt(e)||f7e(e)||EOt()}function AOt(e){if(Array.isArray(e))return fY(e)}function SOt(e){if(Array.isArray(e))return e}function MOt(e){if(typeof Symbol!="undefined"&&e[Symbol.iterator]!=null||e["@@iterator"]!=null)return Array.from(e)}function f7e(e,t){if(e){if(typeof e=="string")return fY(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);if(r==="Object"&&e.constructor&&(r=e.constructor.name),r==="Map"||r==="Set")return Array.from(e);if(r==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return fY(e,t)}}function fY(e,t){(t==null||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r<t;r++)n[r]=e[r];return n}function EOt(){throw new TypeError(`Invalid attempt to spread non-iterable instance.
+In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function kOt(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
+In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}var COt=J_(),LOt=X2(),POt=t7e(),IOt=HF(),$2=Oh(),tz=a7e(),ROt=Zm(),DOt=cY(),FOt=Z2(),c7e=l7e(),rz=ez(),zOt=i5(),OOt=nv;function nv(e,t){var r=this;if(!(this instanceof nv))return new nv(e,t);typeof e=="function"?(t||(t={}),t.regl=e):(t=e,e=null),t&&t.length&&(t.positions=t),e=t.regl;var n=e._gl,i,a=[],o={},s=[],l=[null],u=[null],c=255,f=100;this.tooManyColors=c7e,i=e.texture({data:new Uint8Array(c*4),width:c,height:1,type:"uint8",format:"rgba",wrapS:"clamp",wrapT:"clamp",mag:"nearest",min:"nearest"}),$2(this,{regl:e,gl:n,groups:s,markerCache:u,markerTextures:l,palette:a,paletteIds:o,paletteTexture:i,maxColors:c,maxSize:f,canvas:n.canvas}),this.update(t);var h={uniforms:{constPointSize:!!t.constPointSize,opacity:e.prop("opacity"),paletteSize:function(b,p){return[r.tooManyColors?0:c,i.height]},pixelRatio:e.context("pixelRatio"),scale:e.prop("scale"),scaleFract:e.prop("scaleFract"),translate:e.prop("translate"),translateFract:e.prop("translateFract"),markerTexture:e.prop("markerTexture"),paletteTexture:i},attributes:{x:function(b,p){return p.xAttr||{buffer:p.positionBuffer,stride:8,offset:0}},y:function(b,p){return p.yAttr||{buffer:p.positionBuffer,stride:8,offset:4}},xFract:function(b,p){return p.xAttr?{constant:[0,0]}:{buffer:p.positionFractBuffer,stride:8,offset:0}},yFract:function(b,p){return p.yAttr?{constant:[0,0]}:{buffer:p.positionFractBuffer,stride:8,offset:4}},size:function(b,p){return p.size.length?{buffer:p.sizeBuffer,stride:2,offset:0}:{constant:[Math.round(p.size*255/r.maxSize)]}},borderSize:function(b,p){return p.borderSize.length?{buffer:p.sizeBuffer,stride:2,offset:1}:{constant:[Math.round(p.borderSize*255/r.maxSize)]}},colorId:function(b,p){return p.color.length?{buffer:p.colorBuffer,stride:r.tooManyColors?8:4,offset:0}:{constant:r.tooManyColors?a.slice(p.color*4,p.color*4+4):[p.color]}},borderColorId:function(b,p){return p.borderColor.length?{buffer:p.colorBuffer,stride:r.tooManyColors?8:4,offset:r.tooManyColors?4:2}:{constant:r.tooManyColors?a.slice(p.borderColor*4,p.borderColor*4+4):[p.borderColor]}},isActive:function(b,p){return p.activation===!0?{constant:[1]}:p.activation?p.activation:{constant:[0]}}},blend:{enable:!0,color:[0,0,0,1],func:{srcRGB:"src alpha",dstRGB:"one minus src alpha",srcAlpha:"one minus dst alpha",dstAlpha:"one"}},scissor:{enable:!0,box:e.prop("viewport")},viewport:e.prop("viewport"),stencil:{enable:!1},depth:{enable:!1},elements:e.prop("elements"),count:e.prop("count"),offset:e.prop("offset"),primitive:"points"},d=$2({},h);d.frag=tz([`precision highp float;
+#define GLSLIFY 1
+
+uniform float opacity;
+uniform sampler2D markerTexture;
+
+varying vec4 fragColor, fragBorderColor;
+varying float fragWidth, fragBorderColorLevel, fragColorLevel;
+
+float smoothStep(float x, float y) {
+  return 1.0 / (1.0 + exp(50.0*(x - y)));
+}
+
+void main() {
+  float dist = texture2D(markerTexture, gl_PointCoord).r, delta = fragWidth;
+
+  // max-distance alpha
+  if (dist < 0.003) discard;
+
+  // null-border case
+  if (fragBorderColorLevel == fragColorLevel || fragBorderColor.a == 0.) {
+    float colorAmt = smoothstep(.5 - delta, .5 + delta, dist);
+    gl_FragColor = vec4(fragColor.rgb, colorAmt * fragColor.a * opacity);
+  }
+  else {
+    float borderColorAmt = smoothstep(fragBorderColorLevel - delta, fragBorderColorLevel + delta, dist);
+    float colorAmt = smoothstep(fragColorLevel - delta, fragColorLevel + delta, dist);
+
+    vec4 color = fragBorderColor;
+    color.a *= borderColorAmt;
+    color = mix(color, fragColor, colorAmt);
+    color.a *= opacity;
+
+    gl_FragColor = color;
+  }
+
+}
+`]),d.vert=tz([`precision highp float;
+#define GLSLIFY 1
+
+attribute float x, y, xFract, yFract;
+attribute float size, borderSize;
+attribute vec4 colorId, borderColorId;
+attribute float isActive;
+
+// \`invariant\` effectively turns off optimizations for the position.
+// We need this because -fast-math on M1 Macs is re-ordering
+// floating point operations in a way that causes floating point
+// precision limits to put points in the wrong locations.
+invariant gl_Position;
+
+uniform bool constPointSize;
+uniform float pixelRatio;
+uniform vec2 scale, scaleFract, translate, translateFract, paletteSize;
+uniform sampler2D paletteTexture;
+
+const float maxSize = 100.;
+const float borderLevel = .5;
+
+varying vec4 fragColor, fragBorderColor;
+varying float fragPointSize, fragBorderRadius, fragWidth, fragBorderColorLevel, fragColorLevel;
+
+float pointSizeScale = (constPointSize) ? 2. : pixelRatio;
+
+bool isDirect = (paletteSize.x < 1.);
+
+vec4 getColor(vec4 id) {
+  return isDirect ? id / 255. : texture2D(paletteTexture,
+    vec2(
+      (id.x + .5) / paletteSize.x,
+      (id.y + .5) / paletteSize.y
+    )
+  );
+}
+
+void main() {
+  // ignore inactive points
+  if (isActive == 0.) return;
+
+  vec2 position = vec2(x, y);
+  vec2 positionFract = vec2(xFract, yFract);
+
+  vec4 color = getColor(colorId);
+  vec4 borderColor = getColor(borderColorId);
+
+  float size = size * maxSize / 255.;
+  float borderSize = borderSize * maxSize / 255.;
+
+  gl_PointSize = 2. * size * pointSizeScale;
+  fragPointSize = size * pixelRatio;
+
+  vec2 pos = (position + translate) * scale
+      + (positionFract + translateFract) * scale
+      + (position + translate) * scaleFract
+      + (positionFract + translateFract) * scaleFract;
+
+  gl_Position = vec4(pos * 2. - 1., 0., 1.);
+
+  fragColor = color;
+  fragBorderColor = borderColor;
+  fragWidth = 1. / gl_PointSize;
+
+  fragBorderColorLevel = clamp(borderLevel - borderLevel * borderSize / size, 0., 1.);
+  fragColorLevel = clamp(borderLevel + (1. - borderLevel) * borderSize / size, 0., 1.);
+}
+`]),this.drawMarker=e(d);var v=$2({},h);v.frag=tz([`precision highp float;
+#define GLSLIFY 1
+
+varying vec4 fragColor, fragBorderColor;
+varying float fragBorderRadius, fragWidth;
+
+uniform float opacity;
+
+float smoothStep(float edge0, float edge1, float x) {
+	float t;
+	t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);
+	return t * t * (3.0 - 2.0 * t);
+}
+
+void main() {
+	float radius, alpha = 1.0, delta = fragWidth;
+
+	radius = length(2.0 * gl_PointCoord.xy - 1.0);
+
+	if (radius > 1.0 + delta) {
+		discard;
+	}
+
+	alpha -= smoothstep(1.0 - delta, 1.0 + delta, radius);
+
+	float borderRadius = fragBorderRadius;
+	float ratio = smoothstep(borderRadius - delta, borderRadius + delta, radius);
+	vec4 color = mix(fragColor, fragBorderColor, ratio);
+	color.a *= alpha * opacity;
+	gl_FragColor = color;
+}
+`]),v.vert=tz([`precision highp float;
+#define GLSLIFY 1
+
+attribute float x, y, xFract, yFract;
+attribute float size, borderSize;
+attribute vec4 colorId, borderColorId;
+attribute float isActive;
+
+// \`invariant\` effectively turns off optimizations for the position.
+// We need this because -fast-math on M1 Macs is re-ordering
+// floating point operations in a way that causes floating point
+// precision limits to put points in the wrong locations.
+invariant gl_Position;
+
+uniform bool constPointSize;
+uniform float pixelRatio;
+uniform vec2 paletteSize, scale, scaleFract, translate, translateFract;
+uniform sampler2D paletteTexture;
+
+const float maxSize = 100.;
+
+varying vec4 fragColor, fragBorderColor;
+varying float fragBorderRadius, fragWidth;
+
+float pointSizeScale = (constPointSize) ? 2. : pixelRatio;
+
+bool isDirect = (paletteSize.x < 1.);
+
+vec4 getColor(vec4 id) {
+  return isDirect ? id / 255. : texture2D(paletteTexture,
+    vec2(
+      (id.x + .5) / paletteSize.x,
+      (id.y + .5) / paletteSize.y
+    )
+  );
+}
+
+void main() {
+  // ignore inactive points
+  if (isActive == 0.) return;
+
+  vec2 position = vec2(x, y);
+  vec2 positionFract = vec2(xFract, yFract);
+
+  vec4 color = getColor(colorId);
+  vec4 borderColor = getColor(borderColorId);
+
+  float size = size * maxSize / 255.;
+  float borderSize = borderSize * maxSize / 255.;
+
+  gl_PointSize = (size + borderSize) * pointSizeScale;
+
+  vec2 pos = (position + translate) * scale
+      + (positionFract + translateFract) * scale
+      + (position + translate) * scaleFract
+      + (positionFract + translateFract) * scaleFract;
+
+  gl_Position = vec4(pos * 2. - 1., 0., 1.);
+
+  fragBorderRadius = 1. - 2. * borderSize / (size + borderSize);
+  fragColor = color;
+  fragBorderColor = borderColor.a == 0. || borderSize == 0. ? vec4(color.rgb, 0.) : borderColor;
+  fragWidth = 1. / gl_PointSize;
+}
+`]),c7e&&(v.frag=v.frag.replace("smoothstep","smoothStep"),d.frag=d.frag.replace("smoothstep","smoothStep")),this.drawCircle=e(v)}nv.defaults={color:"black",borderColor:"transparent",borderSize:0,size:12,opacity:1,marker:void 0,viewport:null,range:null,pixelSize:null,count:0,offset:0,bounds:null,positions:[],snap:1e4};nv.prototype.render=function(){return arguments.length&&this.update.apply(this,arguments),this.draw(),this};nv.prototype.draw=function(){for(var e=this,t=arguments.length,r=new Array(t),n=0;n<t;n++)r[n]=arguments[n];var i=this.groups;if(r.length===1&&Array.isArray(r[0])&&(r[0][0]===null||Array.isArray(r[0][0]))&&(r=r[0]),this.regl._refresh(),r.length)for(var a=0;a<r.length;a++)this.drawItem(a,r[a]);else i.forEach(function(o,s){e.drawItem(s)});return this};nv.prototype.drawItem=function(e,t){var r=this.groups,n=r[e];if(typeof t=="number"&&(e=t,n=r[t],t=null),!!(n&&n.count&&n.opacity)){n.activation[0]&&this.drawCircle(this.getMarkerDrawOptions(0,n,t));for(var i=[],a=1;a<n.activation.length;a++)!n.activation[a]||n.activation[a]!==!0&&!n.activation[a].data.length||i.push.apply(i,TOt(this.getMarkerDrawOptions(a,n,t)));i.length&&this.drawMarker(i)}};nv.prototype.getMarkerDrawOptions=function(e,t,r){var n=t.range,i=t.tree,a=t.viewport,o=t.activation,s=t.selectionBuffer,l=t.count,u=this.regl;if(!i)return r?[$2({},t,{markerTexture:this.markerTextures[e],activation:o[e],count:r.length,elements:r,offset:0})]:[$2({},t,{markerTexture:this.markerTextures[e],activation:o[e],offset:0})];var c=[],f=i.range(n,{lod:!0,px:[(n[2]-n[0])/a.width,(n[3]-n[1])/a.height]});if(r){for(var h=o[e],d=h.data,v=new Uint8Array(l),_=0;_<r.length;_++){var b=r[_];v[b]=d?d[b]:1}s.subdata(v)}for(var p=f.length;p--;){var k=wOt(f[p],2),E=k[0],S=k[1];c.push($2({},t,{markerTexture:this.markerTextures[e],activation:r?s:o[e],offset:E,count:S-E}))}return c};nv.prototype.update=function(){for(var e=this,t=arguments.length,r=new Array(t),n=0;n<t;n++)r[n]=arguments[n];if(r.length){r.length===1&&Array.isArray(r[0])&&(r=r[0]);var i=this.groups,a=this.gl,o=this.regl,s=this.maxSize,l=this.maxColors,u=this.palette;this.groups=i=r.map(function(c,f){var h=i[f];if(c===void 0)return h;c===null?c={positions:null}:typeof c=="function"?c={ondraw:c}:typeof c[0]=="number"&&(c={positions:c}),c=ROt(c,{positions:"positions data points",snap:"snap cluster lod tree",size:"sizes size radius",borderSize:"borderSizes borderSize border-size bordersize borderWidth borderWidths border-width borderwidth stroke-width strokeWidth strokewidth outline",color:"colors color fill fill-color fillColor",borderColor:"borderColors borderColor stroke stroke-color strokeColor",marker:"markers marker shape",range:"range dataBox databox",viewport:"viewport viewPort viewBox viewbox",opacity:"opacity alpha transparency",bounds:"bound bounds boundaries limits",tooManyColors:"tooManyColors palette paletteMode optimizePalette enablePalette"}),c.positions===null&&(c.positions=[]),c.tooManyColors!=null&&(e.tooManyColors=c.tooManyColors),h||(i[f]=h={id:f,scale:null,translate:null,scaleFract:null,translateFract:null,activation:[],selectionBuffer:o.buffer({data:new Uint8Array(0),usage:"stream",type:"uint8"}),sizeBuffer:o.buffer({data:new Uint8Array(0),usage:"dynamic",type:"uint8"}),colorBuffer:o.buffer({data:new Uint8Array(0),usage:"dynamic",type:"uint8"}),positionBuffer:o.buffer({data:new Uint8Array(0),usage:"dynamic",type:"float"}),positionFractBuffer:o.buffer({data:new Uint8Array(0),usage:"dynamic",type:"float"})},c=$2({},nv.defaults,c)),c.positions&&!("marker"in c)&&(c.marker=h.marker,delete h.marker),c.marker&&!("positions"in c)&&(c.positions=h.positions,delete h.positions);var d=0,v=0;if(DOt(h,c,[{snap:!0,size:function(j,N){return j==null&&(j=nv.defaults.size),d+=j&&j.length?1:0,j},borderSize:function(j,N){return j==null&&(j=nv.defaults.borderSize),d+=j&&j.length?1:0,j},opacity:parseFloat,color:function(j,N){return j==null&&(j=nv.defaults.color),j=e.updateColor(j),v++,j},borderColor:function(j,N){return j==null&&(j=nv.defaults.borderColor),j=e.updateColor(j),v++,j},bounds:function(j,N,H){return"range"in H||(H.range=null),j},positions:function(j,N,H){var te=N.snap,oe=N.positionBuffer,_e=N.positionFractBuffer,Ee=N.selectionBuffer;if(j.x||j.y)return j.x.length?N.xAttr={buffer:o.buffer(j.x),offset:0,stride:4,count:j.x.length}:N.xAttr={buffer:j.x.buffer,offset:j.x.offset*4||0,stride:(j.x.stride||1)*4,count:j.x.count},j.y.length?N.yAttr={buffer:o.buffer(j.y),offset:0,stride:4,count:j.y.length}:N.yAttr={buffer:j.y.buffer,offset:j.y.offset*4||0,stride:(j.y.stride||1)*4,count:j.y.count},N.count=Math.max(N.xAttr.count,N.yAttr.count),j;j=FOt(j,"float64");var Ce=N.count=Math.floor(j.length/2),me=N.bounds=Ce?LOt(j,2):null;if(!H.range&&!N.range&&(delete N.range,H.range=me),!H.marker&&!N.marker&&(delete N.marker,H.marker=null),te&&(te===!0||Ce>te)?N.tree=IOt(j,{bounds:me}):te&&te.length&&(N.tree=te),N.tree){var ie={primitive:"points",usage:"static",data:N.tree,type:"uint32"};N.elements?N.elements(ie):N.elements=o.elements(ie)}var Se=rz.float32(j);oe({data:Se,usage:"dynamic"});var Le=rz.fract32(j,Se);return _e({data:Le,usage:"dynamic"}),Ee({data:new Uint8Array(Ce),type:"uint8",usage:"stream"}),j}},{marker:function(j,N,H){var te=N.activation;if(te.forEach(function(Le){return Le&&Le.destroy&&Le.destroy()}),te.length=0,!j||typeof j[0]=="number"){var oe=e.addMarker(j);te[oe]=!0}else{for(var _e=[],Ee=0,Ce=Math.min(j.length,N.count);Ee<Ce;Ee++){var me=e.addMarker(j[Ee]);_e[me]||(_e[me]=new Uint8Array(N.count)),_e[me][Ee]=1}for(var ie=0;ie<_e.length;ie++)if(_e[ie]){var Se={data:_e[ie],type:"uint8",usage:"static"};te[ie]?te[ie](Se):te[ie]=o.buffer(Se),te[ie].data=_e[ie]}}return j},range:function(j,N,H){var te=N.bounds;if(te)return j||(j=te),N.scale=[1/(j[2]-j[0]),1/(j[3]-j[1])],N.translate=[-j[0],-j[1]],N.scaleFract=rz.fract(N.scale),N.translateFract=rz.fract(N.translate),j},viewport:function(j){var N=zOt(j||[a.drawingBufferWidth,a.drawingBufferHeight]);return N}}]),d){var _=h,b=_.count,p=_.size,k=_.borderSize,E=_.sizeBuffer,S=new Uint8Array(b*2);if(p.length||k.length)for(var L=0;L<b;L++)S[L*2]=Math.round((p[L]==null?p:p[L])*255/s),S[L*2+1]=Math.round((k[L]==null?k:k[L])*255/s);E({data:S,usage:"dynamic"})}if(v){var x=h,C=x.count,M=x.color,g=x.borderColor,P=x.colorBuffer,T;if(e.tooManyColors){if(M.length||g.length){T=new Uint8Array(C*8);for(var z=0;z<C;z++){var O=M[z];T[z*8]=u[O*4],T[z*8+1]=u[O*4+1],T[z*8+2]=u[O*4+2],T[z*8+3]=u[O*4+3];var V=g[z];T[z*8+4]=u[V*4],T[z*8+5]=u[V*4+1],T[z*8+6]=u[V*4+2],T[z*8+7]=u[V*4+3]}}}else if(M.length||g.length){T=new Uint8Array(C*4+2);for(var G=0;G<C;G++)M[G]!=null&&(T[G*4]=M[G]%l,T[G*4+1]=Math.floor(M[G]/l)),g[G]!=null&&(T[G*4+2]=g[G]%l,T[G*4+3]=Math.floor(g[G]/l))}P({data:T||new Uint8Array(0),type:"uint8",usage:"dynamic"})}return h})}};nv.prototype.addMarker=function(e){var t=this.markerTextures,r=this.regl,n=this.markerCache,i=e==null?0:n.indexOf(e);if(i>=0)return i;var a;if(e instanceof Uint8Array||e instanceof Uint8ClampedArray)a=e;else{a=new Uint8Array(e.length);for(var o=0,s=e.length;o<s;o++)a[o]=e[o]*255}var l=Math.floor(Math.sqrt(a.length));return i=t.length,n.push(e),t.push(r.texture({channels:1,data:a,radius:l,mag:"linear",min:"linear"})),i};nv.prototype.updateColor=function(e){var t=this.paletteIds,r=this.palette,n=this.maxColors;Array.isArray(e)||(e=[e]);var i=[];if(typeof e[0]=="number"){var a=[];if(Array.isArray(e))for(var o=0;o<e.length;o+=4)a.push(e.slice(o,o+4));else for(var s=0;s<e.length;s+=4)a.push(e.subarray(s,s+4));e=a}for(var l=0;l<e.length;l++){var u=e[l];u=COt(u,"uint8");var c=POt(u,!1);if(t[c]==null){var f=r.length;t[c]=Math.floor(f/4),r[f]=u[0],r[f+1]=u[1],r[f+2]=u[2],r[f+3]=u[3]}i[l]=t[c]}return!this.tooManyColors&&r.length>n*4&&(this.tooManyColors=!0),this.updatePalette(r),i.length===1?i[0]:i};nv.prototype.updatePalette=function(e){if(!this.tooManyColors){var t=this.maxColors,r=this.paletteTexture,n=Math.ceil(e.length*.25/t);if(n>1){e=e.slice();for(var i=e.length*.25%t;i<n*t;i++)e.push(0,0,0,0)}r.height<n&&r.resize(t,n),r.subimage({width:Math.min(e.length*.25,t),height:n,data:e},0,0)}};nv.prototype.destroy=function(){return this.groups.forEach(function(e){e.sizeBuffer.destroy(),e.positionBuffer.destroy(),e.positionFractBuffer.destroy(),e.colorBuffer.destroy(),e.activation.forEach(function(t){return t&&t.destroy&&t.destroy()}),e.selectionBuffer.destroy(),e.elements&&e.elements.destroy()}),this.groups.length=0,this.paletteTexture.destroy(),this.markerTextures.forEach(function(e){return e&&e.destroy&&e.destroy()}),this};var qOt=Oh(),BOt=function(t,r){var n=new OOt(t,r),i=n.render.bind(n);return qOt(i,{render:i,update:n.update.bind(n),draw:n.draw.bind(n),destroy:n.destroy.bind(n),regl:n.regl,gl:n.gl,canvas:n.gl.canvas,groups:n.groups,markers:n.markerCache,palette:n.palette}),i};h7e.exports=BOt});var m7e=ye((xyr,gY)=>{"use strict";gY.exports=az;gY.exports.default=az;function az(e,t,r){r=r||2;var n=t&&t.length,i=n?t[0]*r:e.length,a=v7e(e,0,i,r,!0),o=[];if(!a||a.next===a.prev)return o;var s,l,u,c,f,h,d;if(n&&(a=HOt(e,t,a,r)),e.length>80*r){s=u=e[0],l=c=e[1];for(var v=r;v<i;v+=r)f=e[v],h=e[v+1],f<s&&(s=f),h<l&&(l=h),f>u&&(u=f),h>c&&(c=h);d=Math.max(u-s,c-l),d=d!==0?32767/d:0}return vk(a,o,r,s,l,d,0),o}function v7e(e,t,r,n,i){var a,o;if(i===pY(e,t,r,n)>0)for(a=t;a<r;a+=n)o=d7e(a,e[a],e[a+1],o);else for(a=r-n;a>=t;a-=n)o=d7e(a,e[a],e[a+1],o);return o&&oz(o,o.next)&&(gk(o),o=o.next),o}function Q2(e,t){if(!e)return e;t||(t=e);var r=e,n;do if(n=!1,!r.steiner&&(oz(r,r.next)||xh(r.prev,r,r.next)===0)){if(gk(r),r=t=r.prev,r===r.next)break;n=!0}else r=r.next;while(n||r!==t);return t}function vk(e,t,r,n,i,a,o){if(e){!o&&a&&YOt(e,n,i,a);for(var s=e,l,u;e.prev!==e.next;){if(l=e.prev,u=e.next,a?UOt(e,n,i,a):NOt(e)){t.push(l.i/r|0),t.push(e.i/r|0),t.push(u.i/r|0),gk(e),e=u.next,s=u.next;continue}if(e=u,e===s){o?o===1?(e=VOt(Q2(e),t,r),vk(e,t,r,n,i,a,2)):o===2&&GOt(e,t,r,n,i,a):vk(Q2(e),t,r,n,i,a,1);break}}}}function NOt(e){var t=e.prev,r=e,n=e.next;if(xh(t,r,n)>=0)return!1;for(var i=t.x,a=r.x,o=n.x,s=t.y,l=r.y,u=n.y,c=i<a?i<o?i:o:a<o?a:o,f=s<l?s<u?s:u:l<u?l:u,h=i>a?i>o?i:o:a>o?a:o,d=s>l?s>u?s:u:l>u?l:u,v=n.next;v!==t;){if(v.x>=c&&v.x<=h&&v.y>=f&&v.y<=d&&c5(i,s,a,l,o,u,v.x,v.y)&&xh(v.prev,v,v.next)>=0)return!1;v=v.next}return!0}function UOt(e,t,r,n){var i=e.prev,a=e,o=e.next;if(xh(i,a,o)>=0)return!1;for(var s=i.x,l=a.x,u=o.x,c=i.y,f=a.y,h=o.y,d=s<l?s<u?s:u:l<u?l:u,v=c<f?c<h?c:h:f<h?f:h,_=s>l?s>u?s:u:l>u?l:u,b=c>f?c>h?c:h:f>h?f:h,p=dY(d,v,t,r,n),k=dY(_,b,t,r,n),E=e.prevZ,S=e.nextZ;E&&E.z>=p&&S&&S.z<=k;){if(E.x>=d&&E.x<=_&&E.y>=v&&E.y<=b&&E!==i&&E!==o&&c5(s,c,l,f,u,h,E.x,E.y)&&xh(E.prev,E,E.next)>=0||(E=E.prevZ,S.x>=d&&S.x<=_&&S.y>=v&&S.y<=b&&S!==i&&S!==o&&c5(s,c,l,f,u,h,S.x,S.y)&&xh(S.prev,S,S.next)>=0))return!1;S=S.nextZ}for(;E&&E.z>=p;){if(E.x>=d&&E.x<=_&&E.y>=v&&E.y<=b&&E!==i&&E!==o&&c5(s,c,l,f,u,h,E.x,E.y)&&xh(E.prev,E,E.next)>=0)return!1;E=E.prevZ}for(;S&&S.z<=k;){if(S.x>=d&&S.x<=_&&S.y>=v&&S.y<=b&&S!==i&&S!==o&&c5(s,c,l,f,u,h,S.x,S.y)&&xh(S.prev,S,S.next)>=0)return!1;S=S.nextZ}return!0}function VOt(e,t,r){var n=e;do{var i=n.prev,a=n.next.next;!oz(i,a)&&p7e(i,n,n.next,a)&&pk(i,a)&&pk(a,i)&&(t.push(i.i/r|0),t.push(n.i/r|0),t.push(a.i/r|0),gk(n),gk(n.next),n=e=a),n=n.next}while(n!==e);return Q2(n)}function GOt(e,t,r,n,i,a){var o=e;do{for(var s=o.next.next;s!==o.prev;){if(o.i!==s.i&&$Ot(o,s)){var l=g7e(o,s);o=Q2(o,o.next),l=Q2(l,l.next),vk(o,t,r,n,i,a,0),vk(l,t,r,n,i,a,0);return}s=s.next}o=o.next}while(o!==e)}function HOt(e,t,r,n){var i=[],a,o,s,l,u;for(a=0,o=t.length;a<o;a++)s=t[a]*n,l=a<o-1?t[a+1]*n:e.length,u=v7e(e,s,l,n,!1),u===u.next&&(u.steiner=!0),i.push(JOt(u));for(i.sort(jOt),a=0;a<i.length;a++)r=WOt(i[a],r);return r}function jOt(e,t){return e.x-t.x}function WOt(e,t){var r=XOt(e,t);if(!r)return t;var n=g7e(r,e);return Q2(n,n.next),Q2(r,r.next)}function XOt(e,t){var r=t,n=e.x,i=e.y,a=-1/0,o;do{if(i<=r.y&&i>=r.next.y&&r.next.y!==r.y){var s=r.x+(i-r.y)*(r.next.x-r.x)/(r.next.y-r.y);if(s<=n&&s>a&&(a=s,o=r.x<r.next.x?r:r.next,s===n))return o}r=r.next}while(r!==t);if(!o)return null;var l=o,u=o.x,c=o.y,f=1/0,h;r=o;do n>=r.x&&r.x>=u&&n!==r.x&&c5(i<c?n:a,i,u,c,i<c?a:n,i,r.x,r.y)&&(h=Math.abs(i-r.y)/(n-r.x),pk(r,e)&&(h<f||h===f&&(r.x>o.x||r.x===o.x&&ZOt(o,r)))&&(o=r,f=h)),r=r.next;while(r!==l);return o}function ZOt(e,t){return xh(e.prev,e,t.prev)<0&&xh(t.next,e,e.next)<0}function YOt(e,t,r,n){var i=e;do i.z===0&&(i.z=dY(i.x,i.y,t,r,n)),i.prevZ=i.prev,i.nextZ=i.next,i=i.next;while(i!==e);i.prevZ.nextZ=null,i.prevZ=null,KOt(i)}function KOt(e){var t,r,n,i,a,o,s,l,u=1;do{for(r=e,e=null,a=null,o=0;r;){for(o++,n=r,s=0,t=0;t<u&&(s++,n=n.nextZ,!!n);t++);for(l=u;s>0||l>0&&n;)s!==0&&(l===0||!n||r.z<=n.z)?(i=r,r=r.nextZ,s--):(i=n,n=n.nextZ,l--),a?a.nextZ=i:e=i,i.prevZ=a,a=i;r=n}a.nextZ=null,u*=2}while(o>1);return e}function dY(e,t,r,n,i){return e=(e-r)*i|0,t=(t-n)*i|0,e=(e|e<<8)&16711935,e=(e|e<<4)&252645135,e=(e|e<<2)&858993459,e=(e|e<<1)&1431655765,t=(t|t<<8)&16711935,t=(t|t<<4)&252645135,t=(t|t<<2)&858993459,t=(t|t<<1)&1431655765,e|t<<1}function JOt(e){var t=e,r=e;do(t.x<r.x||t.x===r.x&&t.y<r.y)&&(r=t),t=t.next;while(t!==e);return r}function c5(e,t,r,n,i,a,o,s){return(i-o)*(t-s)>=(e-o)*(a-s)&&(e-o)*(n-s)>=(r-o)*(t-s)&&(r-o)*(a-s)>=(i-o)*(n-s)}function $Ot(e,t){return e.next.i!==t.i&&e.prev.i!==t.i&&!QOt(e,t)&&(pk(e,t)&&pk(t,e)&&eqt(e,t)&&(xh(e.prev,e,t.prev)||xh(e,t.prev,t))||oz(e,t)&&xh(e.prev,e,e.next)>0&&xh(t.prev,t,t.next)>0)}function xh(e,t,r){return(t.y-e.y)*(r.x-t.x)-(t.x-e.x)*(r.y-t.y)}function oz(e,t){return e.x===t.x&&e.y===t.y}function p7e(e,t,r,n){var i=nz(xh(e,t,r)),a=nz(xh(e,t,n)),o=nz(xh(r,n,e)),s=nz(xh(r,n,t));return!!(i!==a&&o!==s||i===0&&iz(e,r,t)||a===0&&iz(e,n,t)||o===0&&iz(r,e,n)||s===0&&iz(r,t,n))}function iz(e,t,r){return t.x<=Math.max(e.x,r.x)&&t.x>=Math.min(e.x,r.x)&&t.y<=Math.max(e.y,r.y)&&t.y>=Math.min(e.y,r.y)}function nz(e){return e>0?1:e<0?-1:0}function QOt(e,t){var r=e;do{if(r.i!==e.i&&r.next.i!==e.i&&r.i!==t.i&&r.next.i!==t.i&&p7e(r,r.next,e,t))return!0;r=r.next}while(r!==e);return!1}function pk(e,t){return xh(e.prev,e,e.next)<0?xh(e,t,e.next)>=0&&xh(e,e.prev,t)>=0:xh(e,t,e.prev)<0||xh(e,e.next,t)<0}function eqt(e,t){var r=e,n=!1,i=(e.x+t.x)/2,a=(e.y+t.y)/2;do r.y>a!=r.next.y>a&&r.next.y!==r.y&&i<(r.next.x-r.x)*(a-r.y)/(r.next.y-r.y)+r.x&&(n=!n),r=r.next;while(r!==e);return n}function g7e(e,t){var r=new vY(e.i,e.x,e.y),n=new vY(t.i,t.x,t.y),i=e.next,a=t.prev;return e.next=t,t.prev=e,r.next=i,i.prev=r,n.next=r,r.prev=n,a.next=n,n.prev=a,n}function d7e(e,t,r,n){var i=new vY(e,t,r);return n?(i.next=n.next,i.prev=n,n.next.prev=i,n.next=i):(i.prev=i,i.next=i),i}function gk(e){e.next.prev=e.prev,e.prev.next=e.next,e.prevZ&&(e.prevZ.nextZ=e.nextZ),e.nextZ&&(e.nextZ.prevZ=e.prevZ)}function vY(e,t,r){this.i=e,this.x=t,this.y=r,this.prev=null,this.next=null,this.z=0,this.prevZ=null,this.nextZ=null,this.steiner=!1}az.deviation=function(e,t,r,n){var i=t&&t.length,a=i?t[0]*r:e.length,o=Math.abs(pY(e,0,a,r));if(i)for(var s=0,l=t.length;s<l;s++){var u=t[s]*r,c=s<l-1?t[s+1]*r:e.length;o-=Math.abs(pY(e,u,c,r))}var f=0;for(s=0;s<n.length;s+=3){var h=n[s]*r,d=n[s+1]*r,v=n[s+2]*r;f+=Math.abs((e[h]-e[v])*(e[d+1]-e[h+1])-(e[h]-e[d])*(e[v+1]-e[h+1]))}return o===0&&f===0?0:Math.abs((f-o)/o)};function pY(e,t,r,n){for(var i=0,a=t,o=r-n;a<r;a+=n)i+=(e[o]-e[a])*(e[a+1]+e[o+1]),o=a;return i}az.flatten=function(e){for(var t=e[0][0].length,r={vertices:[],holes:[],dimensions:t},n=0,i=0;i<e.length;i++){for(var a=0;a<e[i].length;a++)for(var o=0;o<t;o++)r.vertices.push(e[i][a][o]);i>0&&(n+=e[i-1].length,r.holes.push(n))}return r}});var _7e=ye((byr,y7e)=>{"use strict";var tqt=X2();y7e.exports=rqt;function rqt(e,t,r){if(!e||e.length==null)throw Error("Argument should be an array");t==null&&(t=1),r==null&&(r=tqt(e,t));for(var n=0;n<t;n++){var i=r[t+n],a=r[n],o=n,s=e.length;if(i===1/0&&a===-1/0)for(o=n;o<s;o+=t)e[o]=e[o]===i?1:e[o]===a?0:.5;else if(i===1/0)for(o=n;o<s;o+=t)e[o]=e[o]===i?1:0;else if(a===-1/0)for(o=n;o<s;o+=t)e[o]=e[o]===a?0:1;else{var l=i-a;for(o=n;o<s;o+=t)isNaN(e[o])||(e[o]=l===0?.5:(e[o]-a)/l)}}return e}});var b7e=ye((wyr,x7e)=>{"use strict";x7e.exports=function(){var e,t;if(typeof WeakMap!="function")return!1;try{e=new WeakMap([[t={},"one"],[{},"two"],[{},"three"]])}catch(r){return!1}return!(String(e)!=="[object WeakMap]"||typeof e.set!="function"||e.set({},1)!==e||typeof e.delete!="function"||typeof e.has!="function"||e.get(t)!=="one")}});var T7e=ye((Tyr,w7e)=>{"use strict";w7e.exports=function(){}});var lx=ye((Ayr,A7e)=>{"use strict";var iqt=T7e()();A7e.exports=function(e){return e!==iqt&&e!==null}});var mY=ye((Syr,M7e)=>{"use strict";var nqt=Object.create,aqt=Object.getPrototypeOf,S7e={};M7e.exports=function(){var e=Object.setPrototypeOf,t=arguments[0]||nqt;return typeof e!="function"?!1:aqt(e(t(null),S7e))===S7e}});var yY=ye((Myr,E7e)=>{"use strict";var oqt=lx(),sqt={function:!0,object:!0};E7e.exports=function(e){return oqt(e)&&sqt[typeof e]||!1}});var i1=ye((Eyr,k7e)=>{"use strict";var lqt=lx();k7e.exports=function(e){if(!lqt(e))throw new TypeError("Cannot use null or undefined");return e}});var L7e=ye((kyr,C7e)=>{"use strict";var _Y=Object.create,sz;mY()()||(sz=xY());C7e.exports=function(){var e,t,r;return!sz||sz.level!==1?_Y:(e={},t={},r={configurable:!1,enumerable:!1,writable:!0,value:void 0},Object.getOwnPropertyNames(Object.prototype).forEach(function(n){if(n==="__proto__"){t[n]={configurable:!0,enumerable:!1,writable:!0,value:void 0};return}t[n]=r}),Object.defineProperties(e,t),Object.defineProperty(sz,"nullPolyfill",{configurable:!1,enumerable:!1,writable:!1,value:e}),function(n,i){return _Y(n===null?e:n,i)})}()});var xY=ye((Cyr,P7e)=>{"use strict";var uqt=yY(),cqt=i1(),fqt=Object.prototype.isPrototypeOf,hqt=Object.defineProperty,dqt={configurable:!0,enumerable:!1,writable:!0,value:void 0},lz;lz=function(e,t){if(cqt(e),t===null||uqt(t))return e;throw new TypeError("Prototype must be null or an object")};P7e.exports=function(e){var t,r;return e?(e.level===2?e.set?(r=e.set,t=function(n,i){return r.call(lz(n,i),i),n}):t=function(n,i){return lz(n,i).__proto__=i,n}:t=function n(i,a){var o;return lz(i,a),o=fqt.call(n.nullPolyfill,i),o&&delete n.nullPolyfill.__proto__,a===null&&(a=n.nullPolyfill),i.__proto__=a,o&&hqt(n.nullPolyfill,"__proto__",dqt),i},Object.defineProperty(t,"level",{configurable:!1,enumerable:!1,writable:!1,value:e.level})):null}(function(){var e=Object.create(null),t={},r,n=Object.getOwnPropertyDescriptor(Object.prototype,"__proto__");if(n){try{r=n.set,r.call(e,t)}catch(i){}if(Object.getPrototypeOf(e)===t)return{set:r,level:2}}return e.__proto__=t,Object.getPrototypeOf(e)===t?{level:2}:(e={},e.__proto__=t,Object.getPrototypeOf(e)===t?{level:1}:!1)}());L7e()});var uz=ye((Lyr,I7e)=>{"use strict";I7e.exports=mY()()?Object.setPrototypeOf:xY()});var D7e=ye((Pyr,R7e)=>{"use strict";var vqt=yY();R7e.exports=function(e){if(!vqt(e))throw new TypeError(e+" is not an Object");return e}});var z7e=ye((Iyr,F7e)=>{"use strict";var pqt=Object.create(null),gqt=Math.random;F7e.exports=function(){var e;do e=gqt().toString(36).slice(2);while(pqt[e]);return e}});var ew=ye((Ryr,O7e)=>{"use strict";var mqt=void 0;O7e.exports=function(e){return e!==mqt&&e!==null}});var cz=ye((Dyr,q7e)=>{"use strict";var yqt=ew(),_qt={object:!0,function:!0,undefined:!0};q7e.exports=function(e){return yqt(e)?hasOwnProperty.call(_qt,typeof e):!1}});var N7e=ye((Fyr,B7e)=>{"use strict";var xqt=cz();B7e.exports=function(e){if(!xqt(e))return!1;try{return e.constructor?e.constructor.prototype===e:!1}catch(t){return!1}}});var V7e=ye((zyr,U7e)=>{"use strict";var bqt=N7e();U7e.exports=function(e){if(typeof e!="function"||!hasOwnProperty.call(e,"length"))return!1;try{if(typeof e.length!="number"||typeof e.call!="function"||typeof e.apply!="function")return!1}catch(t){return!1}return!bqt(e)}});var bY=ye((Oyr,G7e)=>{"use strict";var wqt=V7e(),Tqt=/^\s*class[\s{/}]/,Aqt=Function.prototype.toString;G7e.exports=function(e){return!(!wqt(e)||Tqt.test(Aqt.call(e)))}});var j7e=ye((qyr,H7e)=>{"use strict";H7e.exports=function(){var e=Object.assign,t;return typeof e!="function"?!1:(t={foo:"raz"},e(t,{bar:"dwa"},{trzy:"trzy"}),t.foo+t.bar+t.trzy==="razdwatrzy")}});var X7e=ye((Byr,W7e)=>{"use strict";W7e.exports=function(){try{return Object.keys("primitive"),!0}catch(e){return!1}}});var Y7e=ye((Nyr,Z7e)=>{"use strict";var Sqt=lx(),Mqt=Object.keys;Z7e.exports=function(e){return Mqt(Sqt(e)?Object(e):e)}});var J7e=ye((Uyr,K7e)=>{"use strict";K7e.exports=X7e()()?Object.keys:Y7e()});var Q7e=ye((Vyr,$7e)=>{"use strict";var Eqt=J7e(),kqt=i1(),Cqt=Math.max;$7e.exports=function(e,t){var r,n,i=Cqt(arguments.length,2),a;for(e=Object(kqt(e)),a=function(o){try{e[o]=t[o]}catch(s){r||(r=s)}},n=1;n<i;++n)t=arguments[n],Eqt(t).forEach(a);if(r!==void 0)throw r;return e}});var fz=ye((Gyr,e9e)=>{"use strict";e9e.exports=j7e()()?Object.assign:Q7e()});var wY=ye((Hyr,t9e)=>{"use strict";var Lqt=lx(),Pqt=Array.prototype.forEach,Iqt=Object.create,Rqt=function(e,t){var r;for(r in e)t[r]=e[r]};t9e.exports=function(e){var t=Iqt(null);return Pqt.call(arguments,function(r){Lqt(r)&&Rqt(Object(r),t)}),t}});var i9e=ye((jyr,r9e)=>{"use strict";var TY="razdwatrzy";r9e.exports=function(){return typeof TY.contains!="function"?!1:TY.contains("dwa")===!0&&TY.contains("foo")===!1}});var a9e=ye((Wyr,n9e)=>{"use strict";var Dqt=String.prototype.indexOf;n9e.exports=function(e){return Dqt.call(this,e,arguments[1])>-1}});var AY=ye((Xyr,o9e)=>{"use strict";o9e.exports=i9e()()?String.prototype.contains:a9e()});var n1=ye((Zyr,c9e)=>{"use strict";var hz=ew(),s9e=bY(),l9e=fz(),u9e=wY(),mk=AY(),Fqt=c9e.exports=function(e,t){var r,n,i,a,o;return arguments.length<2||typeof e!="string"?(a=t,t=e,e=null):a=arguments[2],hz(e)?(r=mk.call(e,"c"),n=mk.call(e,"e"),i=mk.call(e,"w")):(r=i=!0,n=!1),o={value:t,configurable:r,enumerable:n,writable:i},a?l9e(u9e(a),o):o};Fqt.gs=function(e,t,r){var n,i,a,o;return typeof e!="string"?(a=r,r=t,t=e,e=null):a=arguments[3],hz(t)?s9e(t)?hz(r)?s9e(r)||(a=r,r=void 0):r=void 0:(a=t,t=r=void 0):t=void 0,hz(e)?(n=mk.call(e,"c"),i=mk.call(e,"e")):(n=!0,i=!1),o={get:t,set:r,configurable:n,enumerable:i},a?l9e(u9e(a),o):o}});var yk=ye((Yyr,h9e)=>{"use strict";var f9e=Object.prototype.toString,zqt=f9e.call(function(){return arguments}());h9e.exports=function(e){return f9e.call(e)===zqt}});var _k=ye((Kyr,v9e)=>{"use strict";var d9e=Object.prototype.toString,Oqt=d9e.call("");v9e.exports=function(e){return typeof e=="string"||e&&typeof e=="object"&&(e instanceof String||d9e.call(e)===Oqt)||!1}});var g9e=ye((Jyr,p9e)=>{"use strict";p9e.exports=function(){return typeof globalThis!="object"||!globalThis?!1:globalThis.Array===Array}});var _9e=ye(($yr,y9e)=>{var m9e=function(){if(typeof self=="object"&&self)return self;if(typeof window=="object"&&window)return window;throw new Error("Unable to resolve global `this`")};y9e.exports=function(){if(this)return this;try{Object.defineProperty(Object.prototype,"__global__",{get:function(){return this},configurable:!0})}catch(e){return m9e()}try{return __global__||m9e()}finally{delete Object.prototype.__global__}}()});var xk=ye((Qyr,x9e)=>{"use strict";x9e.exports=g9e()()?globalThis:_9e()});var w9e=ye((e1r,b9e)=>{"use strict";var qqt=xk(),SY={object:!0,symbol:!0};b9e.exports=function(){var e=qqt.Symbol,t;if(typeof e!="function")return!1;t=e("test symbol");try{String(t)}catch(r){return!1}return!(!SY[typeof e.iterator]||!SY[typeof e.toPrimitive]||!SY[typeof e.toStringTag])}});var A9e=ye((t1r,T9e)=>{"use strict";T9e.exports=function(e){return e?typeof e=="symbol"?!0:!e.constructor||e.constructor.name!=="Symbol"?!1:e[e.constructor.toStringTag]==="Symbol":!1}});var MY=ye((r1r,S9e)=>{"use strict";var Bqt=A9e();S9e.exports=function(e){if(!Bqt(e))throw new TypeError(e+" is not a symbol");return e}});var L9e=ye((i1r,C9e)=>{"use strict";var M9e=n1(),Nqt=Object.create,E9e=Object.defineProperty,Uqt=Object.prototype,k9e=Nqt(null);C9e.exports=function(e){for(var t=0,r,n;k9e[e+(t||"")];)++t;return e+=t||"",k9e[e]=!0,r="@@"+e,E9e(Uqt,r,M9e.gs(null,function(i){n||(n=!0,E9e(this,r,M9e(i)),n=!1)})),r}});var I9e=ye((n1r,P9e)=>{"use strict";var em=n1(),qh=xk().Symbol;P9e.exports=function(e){return Object.defineProperties(e,{hasInstance:em("",qh&&qh.hasInstance||e("hasInstance")),isConcatSpreadable:em("",qh&&qh.isConcatSpreadable||e("isConcatSpreadable")),iterator:em("",qh&&qh.iterator||e("iterator")),match:em("",qh&&qh.match||e("match")),replace:em("",qh&&qh.replace||e("replace")),search:em("",qh&&qh.search||e("search")),species:em("",qh&&qh.species||e("species")),split:em("",qh&&qh.split||e("split")),toPrimitive:em("",qh&&qh.toPrimitive||e("toPrimitive")),toStringTag:em("",qh&&qh.toStringTag||e("toStringTag")),unscopables:em("",qh&&qh.unscopables||e("unscopables"))})}});var F9e=ye((a1r,D9e)=>{"use strict";var R9e=n1(),Vqt=MY(),bk=Object.create(null);D9e.exports=function(e){return Object.defineProperties(e,{for:R9e(function(t){return bk[t]?bk[t]:bk[t]=e(String(t))}),keyFor:R9e(function(t){var r;Vqt(t);for(r in bk)if(bk[r]===t)return r})})}});var q9e=ye((o1r,O9e)=>{"use strict";var Ym=n1(),EY=MY(),dz=xk().Symbol,Gqt=L9e(),Hqt=I9e(),jqt=F9e(),Wqt=Object.create,kY=Object.defineProperties,vz=Object.defineProperty,Xv,f5,z9e;if(typeof dz=="function")try{String(dz()),z9e=!0}catch(e){}else dz=null;f5=function(t){if(this instanceof f5)throw new TypeError("Symbol is not a constructor");return Xv(t)};O9e.exports=Xv=function e(t){var r;if(this instanceof e)throw new TypeError("Symbol is not a constructor");return z9e?dz(t):(r=Wqt(f5.prototype),t=t===void 0?"":String(t),kY(r,{__description__:Ym("",t),__name__:Ym("",Gqt(t))}))};Hqt(Xv);jqt(Xv);kY(f5.prototype,{constructor:Ym(Xv),toString:Ym("",function(){return this.__name__})});kY(Xv.prototype,{toString:Ym(function(){return"Symbol ("+EY(this).__description__+")"}),valueOf:Ym(function(){return EY(this)})});vz(Xv.prototype,Xv.toPrimitive,Ym("",function(){var e=EY(this);return typeof e=="symbol"?e:e.toString()}));vz(Xv.prototype,Xv.toStringTag,Ym("c","Symbol"));vz(f5.prototype,Xv.toStringTag,Ym("c",Xv.prototype[Xv.toStringTag]));vz(f5.prototype,Xv.toPrimitive,Ym("c",Xv.prototype[Xv.toPrimitive]))});var ux=ye((s1r,B9e)=>{"use strict";B9e.exports=w9e()()?xk().Symbol:q9e()});var U9e=ye((l1r,N9e)=>{"use strict";var Xqt=i1();N9e.exports=function(){return Xqt(this).length=0,this}});var h5=ye((u1r,V9e)=>{"use strict";V9e.exports=function(e){if(typeof e!="function")throw new TypeError(e+" is not a function");return e}});var H9e=ye((c1r,G9e)=>{"use strict";var Zqt=ew(),Yqt=cz(),Kqt=Object.prototype.toString;G9e.exports=function(e){if(!Zqt(e))return null;if(Yqt(e)){var t=e.toString;if(typeof t!="function"||t===Kqt)return null}try{return""+e}catch(r){return null}}});var W9e=ye((f1r,j9e)=>{"use strict";j9e.exports=function(e){try{return e.toString()}catch(t){try{return String(e)}catch(r){return null}}}});var Z9e=ye((h1r,X9e)=>{"use strict";var Jqt=W9e(),$qt=/[\n\r\u2028\u2029]/g;X9e.exports=function(e){var t=Jqt(e);return t===null?"<Non-coercible to string value>":(t.length>100&&(t=t.slice(0,99)+"\u2026"),t=t.replace($qt,function(r){switch(r){case`
+`:return"\\n";case"\r":return"\\r";case"\u2028":return"\\u2028";case"\u2029":return"\\u2029";default:throw new Error("Unexpected character")}}),t)}});var CY=ye((d1r,J9e)=>{"use strict";var Y9e=ew(),Qqt=cz(),eBt=H9e(),tBt=Z9e(),K9e=function(e,t){return e.replace("%v",tBt(t))};J9e.exports=function(e,t,r){if(!Qqt(r))throw new TypeError(K9e(t,e));if(!Y9e(e)){if("default"in r)return r.default;if(r.isOptional)return null}var n=eBt(r.errorMessage);throw Y9e(n)||(n=t),new TypeError(K9e(n,e))}});var Q9e=ye((v1r,$9e)=>{"use strict";var rBt=CY(),iBt=ew();$9e.exports=function(e){return iBt(e)?e:rBt(e,"Cannot use %v",arguments[1])}});var tOe=ye((p1r,eOe)=>{"use strict";var nBt=CY(),aBt=bY();eOe.exports=function(e){return aBt(e)?e:nBt(e,"%v is not a plain function",arguments[1])}});var iOe=ye((g1r,rOe)=>{"use strict";rOe.exports=function(){var e=Array.from,t,r;return typeof e!="function"?!1:(t=["raz","dwa"],r=e(t),!!(r&&r!==t&&r[1]==="dwa"))}});var aOe=ye((m1r,nOe)=>{"use strict";var oBt=Object.prototype.toString,sBt=RegExp.prototype.test.bind(/^[object [A-Za-z0-9]*Function]$/);nOe.exports=function(e){return typeof e=="function"&&sBt(oBt.call(e))}});var sOe=ye((y1r,oOe)=>{"use strict";oOe.exports=function(){var e=Math.sign;return typeof e!="function"?!1:e(10)===1&&e(-20)===-1}});var uOe=ye((_1r,lOe)=>{"use strict";lOe.exports=function(e){return e=Number(e),isNaN(e)||e===0?e:e>0?1:-1}});var fOe=ye((x1r,cOe)=>{"use strict";cOe.exports=sOe()()?Math.sign:uOe()});var dOe=ye((b1r,hOe)=>{"use strict";var lBt=fOe(),uBt=Math.abs,cBt=Math.floor;hOe.exports=function(e){return isNaN(e)?0:(e=Number(e),e===0||!isFinite(e)?e:lBt(e)*cBt(uBt(e)))}});var pOe=ye((w1r,vOe)=>{"use strict";var fBt=dOe(),hBt=Math.max;vOe.exports=function(e){return hBt(0,fBt(e))}});var _Oe=ye((T1r,yOe)=>{"use strict";var dBt=ux().iterator,vBt=yk(),pBt=aOe(),gBt=pOe(),gOe=h5(),mBt=i1(),yBt=lx(),_Bt=_k(),mOe=Array.isArray,LY=Function.prototype.call,tw={configurable:!0,enumerable:!0,writable:!0,value:null},PY=Object.defineProperty;yOe.exports=function(e){var t=arguments[1],r=arguments[2],n,i,a,o,s,l,u,c,f,h;if(e=Object(mBt(e)),yBt(t)&&gOe(t),!this||this===Array||!pBt(this)){if(!t){if(vBt(e))return s=e.length,s!==1?Array.apply(null,e):(o=new Array(1),o[0]=e[0],o);if(mOe(e)){for(o=new Array(s=e.length),i=0;i<s;++i)o[i]=e[i];return o}}o=[]}else n=this;if(!mOe(e)){if((f=e[dBt])!==void 0){for(u=gOe(f).call(e),n&&(o=new n),c=u.next(),i=0;!c.done;)h=t?LY.call(t,r,c.value,i):c.value,n?(tw.value=h,PY(o,i,tw)):o[i]=h,c=u.next(),++i;s=i}else if(_Bt(e)){for(s=e.length,n&&(o=new n),i=0,a=0;i<s;++i)h=e[i],i+1<s&&(l=h.charCodeAt(0),l>=55296&&l<=56319&&(h+=e[++i])),h=t?LY.call(t,r,h,a):h,n?(tw.value=h,PY(o,a,tw)):o[a]=h,++a;s=a}}if(s===void 0)for(s=gBt(e.length),n&&(o=new n(s)),i=0;i<s;++i)h=t?LY.call(t,r,e[i],i):e[i],n?(tw.value=h,PY(o,i,tw)):o[i]=h;return n&&(tw.value=null,o.length=s),o}});var bOe=ye((A1r,xOe)=>{"use strict";xOe.exports=iOe()()?Array.from:_Oe()});var TOe=ye((S1r,wOe)=>{"use strict";var xBt=bOe(),bBt=fz(),wBt=i1();wOe.exports=function(e){var t=Object(wBt(e)),r=arguments[1],n=Object(arguments[2]);if(t!==e&&!r)return t;var i={};return r?xBt(r,function(a){(n.ensure||a in e)&&(i[a]=e[a])}):bBt(i,e),i}});var MOe=ye((M1r,SOe)=>{"use strict";var TBt=h5(),ABt=i1(),SBt=Function.prototype.bind,AOe=Function.prototype.call,MBt=Object.keys,EBt=Object.prototype.propertyIsEnumerable;SOe.exports=function(e,t){return function(r,n){var i,a=arguments[2],o=arguments[3];return r=Object(ABt(r)),TBt(n),i=MBt(r),o&&i.sort(typeof o=="function"?SBt.call(o,r):void 0),typeof e!="function"&&(e=i[e]),AOe.call(e,i,function(s,l){return EBt.call(r,s)?AOe.call(n,a,r[s],s,r,l):t})}}});var kOe=ye((E1r,EOe)=>{"use strict";EOe.exports=MOe()("forEach")});var LOe=ye((k1r,COe)=>{"use strict";var kBt=h5(),CBt=kOe(),LBt=Function.prototype.call;COe.exports=function(e,t){var r={},n=arguments[2];return kBt(t),CBt(e,function(i,a,o,s){r[a]=LBt.call(t,n,i,a,o,s)}),r}});var DOe=ye((C1r,ROe)=>{"use strict";var PBt=ew(),IBt=Q9e(),POe=tOe(),RBt=TOe(),DBt=wY(),FBt=LOe(),zBt=Function.prototype.bind,OBt=Object.defineProperty,qBt=Object.prototype.hasOwnProperty,IOe;IOe=function(e,t,r){var n=IBt(t)&&POe(t.value),i;return i=RBt(t),delete i.writable,delete i.value,i.get=function(){return!r.overwriteDefinition&&qBt.call(this,e)?n:(t.value=zBt.call(n,r.resolveContext?r.resolveContext(this):this),OBt(this,e,t),this[e])},i};ROe.exports=function(e){var t=DBt(arguments[1]);return PBt(t.resolveContext)&&POe(t.resolveContext),FBt(e,function(r,n){return IOe(n,r,t)})}});var IY=ye((L1r,qOe)=>{"use strict";var BBt=U9e(),NBt=fz(),UBt=h5(),VBt=i1(),qp=n1(),GBt=DOe(),FOe=ux(),zOe=Object.defineProperty,OOe=Object.defineProperties,wk;qOe.exports=wk=function(e,t){if(!(this instanceof wk))throw new TypeError("Constructor requires 'new'");OOe(this,{__list__:qp("w",VBt(e)),__context__:qp("w",t),__nextIndex__:qp("w",0)}),t&&(UBt(t.on),t.on("_add",this._onAdd),t.on("_delete",this._onDelete),t.on("_clear",this._onClear))};delete wk.prototype.constructor;OOe(wk.prototype,NBt({_next:qp(function(){var e;if(this.__list__){if(this.__redo__&&(e=this.__redo__.shift(),e!==void 0))return e;if(this.__nextIndex__<this.__list__.length)return this.__nextIndex__++;this._unBind()}}),next:qp(function(){return this._createResult(this._next())}),_createResult:qp(function(e){return e===void 0?{done:!0,value:void 0}:{done:!1,value:this._resolve(e)}}),_resolve:qp(function(e){return this.__list__[e]}),_unBind:qp(function(){this.__list__=null,delete this.__redo__,this.__context__&&(this.__context__.off("_add",this._onAdd),this.__context__.off("_delete",this._onDelete),this.__context__.off("_clear",this._onClear),this.__context__=null)}),toString:qp(function(){return"[object "+(this[FOe.toStringTag]||"Object")+"]"})},GBt({_onAdd:qp(function(e){if(!(e>=this.__nextIndex__)){if(++this.__nextIndex__,!this.__redo__){zOe(this,"__redo__",qp("c",[e]));return}this.__redo__.forEach(function(t,r){t>=e&&(this.__redo__[r]=++t)},this),this.__redo__.push(e)}}),_onDelete:qp(function(e){var t;e>=this.__nextIndex__||(--this.__nextIndex__,this.__redo__&&(t=this.__redo__.indexOf(e),t!==-1&&this.__redo__.splice(t,1),this.__redo__.forEach(function(r,n){r>e&&(this.__redo__[n]=--r)},this)))}),_onClear:qp(function(){this.__redo__&&BBt.call(this.__redo__),this.__nextIndex__=0})})));zOe(wk.prototype,FOe.iterator,qp(function(){return this}))});var GOe=ye((P1r,VOe)=>{"use strict";var BOe=uz(),NOe=AY(),RY=n1(),HBt=ux(),DY=IY(),UOe=Object.defineProperty,d5;d5=VOe.exports=function(e,t){if(!(this instanceof d5))throw new TypeError("Constructor requires 'new'");DY.call(this,e),t?NOe.call(t,"key+value")?t="key+value":NOe.call(t,"key")?t="key":t="value":t="value",UOe(this,"__kind__",RY("",t))};BOe&&BOe(d5,DY);delete d5.prototype.constructor;d5.prototype=Object.create(DY.prototype,{_resolve:RY(function(e){return this.__kind__==="value"?this.__list__[e]:this.__kind__==="key+value"?[e,this.__list__[e]]:e})});UOe(d5.prototype,HBt.toStringTag,RY("c","Array Iterator"))});var XOe=ye((I1r,WOe)=>{"use strict";var HOe=uz(),pz=n1(),jBt=ux(),FY=IY(),jOe=Object.defineProperty,v5;v5=WOe.exports=function(e){if(!(this instanceof v5))throw new TypeError("Constructor requires 'new'");e=String(e),FY.call(this,e),jOe(this,"__length__",pz("",e.length))};HOe&&HOe(v5,FY);delete v5.prototype.constructor;v5.prototype=Object.create(FY.prototype,{_next:pz(function(){if(this.__list__){if(this.__nextIndex__<this.__length__)return this.__nextIndex__++;this._unBind()}}),_resolve:pz(function(e){var t=this.__list__[e],r;return this.__nextIndex__===this.__length__?t:(r=t.charCodeAt(0),r>=55296&&r<=56319?t+this.__list__[this.__nextIndex__++]:t)})});jOe(v5.prototype,jBt.toStringTag,pz("c","String Iterator"))});var YOe=ye((R1r,ZOe)=>{"use strict";var WBt=yk(),XBt=lx(),ZBt=_k(),YBt=ux().iterator,KBt=Array.isArray;ZOe.exports=function(e){return XBt(e)?KBt(e)||ZBt(e)||WBt(e)?!0:typeof e[YBt]=="function":!1}});var JOe=ye((D1r,KOe)=>{"use strict";var JBt=YOe();KOe.exports=function(e){if(!JBt(e))throw new TypeError(e+" is not iterable");return e}});var zY=ye((F1r,eqe)=>{"use strict";var $Bt=yk(),QBt=_k(),$Oe=GOe(),eNt=XOe(),tNt=JOe(),QOe=ux().iterator;eqe.exports=function(e){return typeof tNt(e)[QOe]=="function"?e[QOe]():$Bt(e)?new $Oe(e):QBt(e)?new eNt(e):new $Oe(e)}});var rqe=ye((z1r,tqe)=>{"use strict";var rNt=yk(),iNt=h5(),nNt=_k(),aNt=zY(),oNt=Array.isArray,OY=Function.prototype.call,sNt=Array.prototype.some;tqe.exports=function(e,t){var r,n=arguments[2],i,a,o,s,l,u,c;if(oNt(e)||rNt(e)?r="array":nNt(e)?r="string":e=aNt(e),iNt(t),a=function(){o=!0},r==="array"){sNt.call(e,function(f){return OY.call(t,n,f,a),o});return}if(r==="string"){for(l=e.length,s=0;s<l&&(u=e[s],s+1<l&&(c=u.charCodeAt(0),c>=55296&&c<=56319&&(u+=e[++s])),OY.call(t,n,u,a),!o);++s);return}for(i=e.next();!i.done;){if(OY.call(t,n,i.value,a),o)return;i=e.next()}}});var nqe=ye((O1r,iqe)=>{"use strict";iqe.exports=function(){return typeof WeakMap!="function"?!1:Object.prototype.toString.call(new WeakMap)==="[object WeakMap]"}()});var sqe=ye((q1r,oqe)=>{"use strict";var lNt=lx(),mz=uz(),gz=D7e(),uNt=i1(),cNt=z7e(),a1=n1(),fNt=zY(),hNt=rqe(),dNt=ux().toStringTag,aqe=nqe(),vNt=Array.isArray,BY=Object.defineProperty,qY=Object.prototype.hasOwnProperty,pNt=Object.getPrototypeOf,cx;oqe.exports=cx=function(){var e=arguments[0],t;if(!(this instanceof cx))throw new TypeError("Constructor requires 'new'");return t=aqe&&mz&&WeakMap!==cx?mz(new WeakMap,pNt(this)):this,lNt(e)&&(vNt(e)||(e=fNt(e))),BY(t,"__weakMapData__",a1("c","$weakMap$"+cNt())),e&&hNt(e,function(r){uNt(r),t.set(r[0],r[1])}),t};aqe&&(mz&&mz(cx,WeakMap),cx.prototype=Object.create(WeakMap.prototype,{constructor:a1(cx)}));Object.defineProperties(cx.prototype,{delete:a1(function(e){return qY.call(gz(e),this.__weakMapData__)?(delete e[this.__weakMapData__],!0):!1}),get:a1(function(e){if(qY.call(gz(e),this.__weakMapData__))return e[this.__weakMapData__]}),has:a1(function(e){return qY.call(gz(e),this.__weakMapData__)}),set:a1(function(e,t){return BY(gz(e),this.__weakMapData__,a1("c",t)),this}),toString:a1(function(){return"[object WeakMap]"})});BY(cx.prototype,dNt,a1("c","WeakMap"))});var NY=ye((B1r,lqe)=>{"use strict";lqe.exports=b7e()()?WeakMap:sqe()});var cqe=ye((N1r,uqe)=>{"use strict";uqe.exports=function(e,t,r){if(typeof Array.prototype.findIndex=="function")return e.findIndex(t,r);if(typeof t!="function")throw new TypeError("predicate must be a function");var n=Object(e),i=n.length;if(i===0)return-1;for(var a=0;a<i;a++)if(t.call(r,n[a],a,n))return a;return-1}});var GY=ye((U1r,dqe)=>{"use strict";var yz=J_(),gNt=X2(),VY=Oh(),mNt=Zm(),yNt=Z2(),fqe=m7e(),_Nt=_7e(),{float32:xNt,fract32:UY}=ez(),bNt=NY(),hqe=i5(),wNt=cqe(),TNt=`
+precision highp float;
+
+attribute vec2 aCoord, bCoord, aCoordFract, bCoordFract;
+attribute vec4 color;
+attribute float lineEnd, lineTop;
+
+uniform vec2 scale, scaleFract, translate, translateFract;
+uniform float thickness, pixelRatio, id, depth;
+uniform vec4 viewport;
+
+varying vec4 fragColor;
+varying vec2 tangent;
+
+vec2 project(vec2 position, vec2 positionFract, vec2 scale, vec2 scaleFract, vec2 translate, vec2 translateFract) {
+	// the order is important
+	return position * scale + translate
+       + positionFract * scale + translateFract
+       + position * scaleFract
+       + positionFract * scaleFract;
+}
+
+void main() {
+	float lineStart = 1. - lineEnd;
+	float lineOffset = lineTop * 2. - 1.;
+
+	vec2 diff = (bCoord + bCoordFract - aCoord - aCoordFract);
+	tangent = normalize(diff * scale * viewport.zw);
+	vec2 normal = vec2(-tangent.y, tangent.x);
+
+	vec2 position = project(aCoord, aCoordFract, scale, scaleFract, translate, translateFract) * lineStart
+		+ project(bCoord, bCoordFract, scale, scaleFract, translate, translateFract) * lineEnd
+
+		+ thickness * normal * .5 * lineOffset / viewport.zw;
+
+	gl_Position = vec4(position * 2.0 - 1.0, depth, 1);
+
+	fragColor = color / 255.;
+}
+`,ANt=`
+precision highp float;
+
+uniform float dashLength, pixelRatio, thickness, opacity, id;
+uniform sampler2D dashTexture;
+
+varying vec4 fragColor;
+varying vec2 tangent;
+
+void main() {
+	float alpha = 1.;
+
+	float t = fract(dot(tangent, gl_FragCoord.xy) / dashLength) * .5 + .25;
+	float dash = texture2D(dashTexture, vec2(t, .5)).r;
+
+	gl_FragColor = fragColor;
+	gl_FragColor.a *= alpha * opacity * dash;
+}
+`,SNt=`
+precision highp float;
+
+attribute vec2 position, positionFract;
+
+uniform vec4 color;
+uniform vec2 scale, scaleFract, translate, translateFract;
+uniform float pixelRatio, id;
+uniform vec4 viewport;
+uniform float opacity;
+
+varying vec4 fragColor;
+
+const float MAX_LINES = 256.;
+
+void main() {
+	float depth = (MAX_LINES - 4. - id) / (MAX_LINES);
+
+	vec2 position = position * scale + translate
+       + positionFract * scale + translateFract
+       + position * scaleFract
+       + positionFract * scaleFract;
+
+	gl_Position = vec4(position * 2.0 - 1.0, depth, 1);
+
+	fragColor = color / 255.;
+	fragColor.a *= opacity;
+}
+`,MNt=`
+precision highp float;
+varying vec4 fragColor;
+
+void main() {
+	gl_FragColor = fragColor;
+}
+`,ENt=`
+precision highp float;
+
+attribute vec2 aCoord, bCoord, nextCoord, prevCoord;
+attribute vec4 aColor, bColor;
+attribute float lineEnd, lineTop;
+
+uniform vec2 scale, translate;
+uniform float thickness, pixelRatio, id, depth;
+uniform vec4 viewport;
+uniform float miterLimit, miterMode;
+
+varying vec4 fragColor;
+varying vec4 startCutoff, endCutoff;
+varying vec2 tangent;
+varying vec2 startCoord, endCoord;
+varying float enableStartMiter, enableEndMiter;
+
+const float REVERSE_THRESHOLD = -.875;
+const float MIN_DIFF = 1e-6;
+
+// TODO: possible optimizations: avoid overcalculating all for vertices and calc just one instead
+// TODO: precalculate dot products, normalize things beforehead etc.
+// TODO: refactor to rectangular algorithm
+
+float distToLine(vec2 p, vec2 a, vec2 b) {
+	vec2 diff = b - a;
+	vec2 perp = normalize(vec2(-diff.y, diff.x));
+	return dot(p - a, perp);
+}
+
+bool isNaN( float val ){
+  return ( val < 0.0 || 0.0 < val || val == 0.0 ) ? false : true;
+}
+
+void main() {
+	vec2 aCoord = aCoord, bCoord = bCoord, prevCoord = prevCoord, nextCoord = nextCoord;
+
+  vec2 adjustedScale;
+  adjustedScale.x = (abs(scale.x) < MIN_DIFF) ? MIN_DIFF : scale.x;
+  adjustedScale.y = (abs(scale.y) < MIN_DIFF) ? MIN_DIFF : scale.y;
+
+  vec2 scaleRatio = adjustedScale * viewport.zw;
+	vec2 normalWidth = thickness / scaleRatio;
+
+	float lineStart = 1. - lineEnd;
+	float lineBot = 1. - lineTop;
+
+	fragColor = (lineStart * aColor + lineEnd * bColor) / 255.;
+
+	if (isNaN(aCoord.x) || isNaN(aCoord.y) || isNaN(bCoord.x) || isNaN(bCoord.y)) return;
+
+	if (aCoord == prevCoord) prevCoord = aCoord + normalize(bCoord - aCoord);
+	if (bCoord == nextCoord) nextCoord = bCoord - normalize(bCoord - aCoord);
+
+
+	vec2 prevDiff = aCoord - prevCoord;
+	vec2 currDiff = bCoord - aCoord;
+	vec2 nextDiff = nextCoord - bCoord;
+
+	vec2 prevTangent = normalize(prevDiff * scaleRatio);
+	vec2 currTangent = normalize(currDiff * scaleRatio);
+	vec2 nextTangent = normalize(nextDiff * scaleRatio);
+
+	vec2 prevNormal = vec2(-prevTangent.y, prevTangent.x);
+	vec2 currNormal = vec2(-currTangent.y, currTangent.x);
+	vec2 nextNormal = vec2(-nextTangent.y, nextTangent.x);
+
+	vec2 startJoinDirection = normalize(prevTangent - currTangent);
+	vec2 endJoinDirection = normalize(currTangent - nextTangent);
+
+	// collapsed/unidirectional segment cases
+	// FIXME: there should be more elegant solution
+	vec2 prevTanDiff = abs(prevTangent - currTangent);
+	vec2 nextTanDiff = abs(nextTangent - currTangent);
+	if (max(prevTanDiff.x, prevTanDiff.y) < MIN_DIFF) {
+		startJoinDirection = currNormal;
+	}
+	if (max(nextTanDiff.x, nextTanDiff.y) < MIN_DIFF) {
+		endJoinDirection = currNormal;
+	}
+	if (aCoord == bCoord) {
+		endJoinDirection = startJoinDirection;
+		currNormal = prevNormal;
+		currTangent = prevTangent;
+	}
+
+	tangent = currTangent;
+
+	//calculate join shifts relative to normals
+	float startJoinShift = dot(currNormal, startJoinDirection);
+	float endJoinShift = dot(currNormal, endJoinDirection);
+
+	float startMiterRatio = abs(1. / startJoinShift);
+	float endMiterRatio = abs(1. / endJoinShift);
+
+	vec2 startJoin = startJoinDirection * startMiterRatio;
+	vec2 endJoin = endJoinDirection * endMiterRatio;
+
+	vec2 startTopJoin, startBotJoin, endTopJoin, endBotJoin;
+	startTopJoin = sign(startJoinShift) * startJoin * .5;
+	startBotJoin = -startTopJoin;
+
+	endTopJoin = sign(endJoinShift) * endJoin * .5;
+	endBotJoin = -endTopJoin;
+
+	vec2 aTopCoord = aCoord + normalWidth * startTopJoin;
+	vec2 bTopCoord = bCoord + normalWidth * endTopJoin;
+	vec2 aBotCoord = aCoord + normalWidth * startBotJoin;
+	vec2 bBotCoord = bCoord + normalWidth * endBotJoin;
+
+	//miter anti-clipping
+	float baClipping = distToLine(bCoord, aCoord, aBotCoord) / dot(normalize(normalWidth * endBotJoin), normalize(normalWidth.yx * vec2(-startBotJoin.y, startBotJoin.x)));
+	float abClipping = distToLine(aCoord, bCoord, bTopCoord) / dot(normalize(normalWidth * startBotJoin), normalize(normalWidth.yx * vec2(-endBotJoin.y, endBotJoin.x)));
+
+	//prevent close to reverse direction switch
+	bool prevReverse = dot(currTangent, prevTangent) <= REVERSE_THRESHOLD && abs(dot(currTangent, prevNormal)) * min(length(prevDiff), length(currDiff)) <  length(normalWidth * currNormal);
+	bool nextReverse = dot(currTangent, nextTangent) <= REVERSE_THRESHOLD && abs(dot(currTangent, nextNormal)) * min(length(nextDiff), length(currDiff)) <  length(normalWidth * currNormal);
+
+	if (prevReverse) {
+		//make join rectangular
+		vec2 miterShift = normalWidth * startJoinDirection * miterLimit * .5;
+		float normalAdjust = 1. - min(miterLimit / startMiterRatio, 1.);
+		aBotCoord = aCoord + miterShift - normalAdjust * normalWidth * currNormal * .5;
+		aTopCoord = aCoord + miterShift + normalAdjust * normalWidth * currNormal * .5;
+	}
+	else if (!nextReverse && baClipping > 0. && baClipping < length(normalWidth * endBotJoin)) {
+		//handle miter clipping
+		bTopCoord -= normalWidth * endTopJoin;
+		bTopCoord += normalize(endTopJoin * normalWidth) * baClipping;
+	}
+
+	if (nextReverse) {
+		//make join rectangular
+		vec2 miterShift = normalWidth * endJoinDirection * miterLimit * .5;
+		float normalAdjust = 1. - min(miterLimit / endMiterRatio, 1.);
+		bBotCoord = bCoord + miterShift - normalAdjust * normalWidth * currNormal * .5;
+		bTopCoord = bCoord + miterShift + normalAdjust * normalWidth * currNormal * .5;
+	}
+	else if (!prevReverse && abClipping > 0. && abClipping < length(normalWidth * startBotJoin)) {
+		//handle miter clipping
+		aBotCoord -= normalWidth * startBotJoin;
+		aBotCoord += normalize(startBotJoin * normalWidth) * abClipping;
+	}
+
+	vec2 aTopPosition = (aTopCoord) * adjustedScale + translate;
+	vec2 aBotPosition = (aBotCoord) * adjustedScale + translate;
+
+	vec2 bTopPosition = (bTopCoord) * adjustedScale + translate;
+	vec2 bBotPosition = (bBotCoord) * adjustedScale + translate;
+
+	//position is normalized 0..1 coord on the screen
+	vec2 position = (aTopPosition * lineTop + aBotPosition * lineBot) * lineStart + (bTopPosition * lineTop + bBotPosition * lineBot) * lineEnd;
+
+	startCoord = aCoord * scaleRatio + translate * viewport.zw + viewport.xy;
+	endCoord = bCoord * scaleRatio + translate * viewport.zw + viewport.xy;
+
+	gl_Position = vec4(position  * 2.0 - 1.0, depth, 1);
+
+	enableStartMiter = step(dot(currTangent, prevTangent), .5);
+	enableEndMiter = step(dot(currTangent, nextTangent), .5);
+
+	//bevel miter cutoffs
+	if (miterMode == 1.) {
+		if (enableStartMiter == 1.) {
+			vec2 startMiterWidth = vec2(startJoinDirection) * thickness * miterLimit * .5;
+			startCutoff = vec4(aCoord, aCoord);
+			startCutoff.zw += vec2(-startJoinDirection.y, startJoinDirection.x) / scaleRatio;
+			startCutoff = startCutoff * scaleRatio.xyxy + translate.xyxy * viewport.zwzw;
+			startCutoff += viewport.xyxy;
+			startCutoff += startMiterWidth.xyxy;
+		}
+
+		if (enableEndMiter == 1.) {
+			vec2 endMiterWidth = vec2(endJoinDirection) * thickness * miterLimit * .5;
+			endCutoff = vec4(bCoord, bCoord);
+			endCutoff.zw += vec2(-endJoinDirection.y, endJoinDirection.x)  / scaleRatio;
+			endCutoff = endCutoff * scaleRatio.xyxy + translate.xyxy * viewport.zwzw;
+			endCutoff += viewport.xyxy;
+			endCutoff += endMiterWidth.xyxy;
+		}
+	}
+
+	//round miter cutoffs
+	else if (miterMode == 2.) {
+		if (enableStartMiter == 1.) {
+			vec2 startMiterWidth = vec2(startJoinDirection) * thickness * abs(dot(startJoinDirection, currNormal)) * .5;
+			startCutoff = vec4(aCoord, aCoord);
+			startCutoff.zw += vec2(-startJoinDirection.y, startJoinDirection.x) / scaleRatio;
+			startCutoff = startCutoff * scaleRatio.xyxy + translate.xyxy * viewport.zwzw;
+			startCutoff += viewport.xyxy;
+			startCutoff += startMiterWidth.xyxy;
+		}
+
+		if (enableEndMiter == 1.) {
+			vec2 endMiterWidth = vec2(endJoinDirection) * thickness * abs(dot(endJoinDirection, currNormal)) * .5;
+			endCutoff = vec4(bCoord, bCoord);
+			endCutoff.zw += vec2(-endJoinDirection.y, endJoinDirection.x)  / scaleRatio;
+			endCutoff = endCutoff * scaleRatio.xyxy + translate.xyxy * viewport.zwzw;
+			endCutoff += viewport.xyxy;
+			endCutoff += endMiterWidth.xyxy;
+		}
+	}
+}
+`,kNt=`
+precision highp float;
+
+uniform float dashLength, pixelRatio, thickness, opacity, id, miterMode;
+uniform sampler2D dashTexture;
+
+varying vec4 fragColor;
+varying vec2 tangent;
+varying vec4 startCutoff, endCutoff;
+varying vec2 startCoord, endCoord;
+varying float enableStartMiter, enableEndMiter;
+
+float distToLine(vec2 p, vec2 a, vec2 b) {
+	vec2 diff = b - a;
+	vec2 perp = normalize(vec2(-diff.y, diff.x));
+	return dot(p - a, perp);
+}
+
+void main() {
+	float alpha = 1., distToStart, distToEnd;
+	float cutoff = thickness * .5;
+
+	//bevel miter
+	if (miterMode == 1.) {
+		if (enableStartMiter == 1.) {
+			distToStart = distToLine(gl_FragCoord.xy, startCutoff.xy, startCutoff.zw);
+			if (distToStart < -1.) {
+				discard;
+				return;
+			}
+			alpha *= min(max(distToStart + 1., 0.), 1.);
+		}
+
+		if (enableEndMiter == 1.) {
+			distToEnd = distToLine(gl_FragCoord.xy, endCutoff.xy, endCutoff.zw);
+			if (distToEnd < -1.) {
+				discard;
+				return;
+			}
+			alpha *= min(max(distToEnd + 1., 0.), 1.);
+		}
+	}
+
+	// round miter
+	else if (miterMode == 2.) {
+		if (enableStartMiter == 1.) {
+			distToStart = distToLine(gl_FragCoord.xy, startCutoff.xy, startCutoff.zw);
+			if (distToStart < 0.) {
+				float radius = length(gl_FragCoord.xy - startCoord);
+
+				if(radius > cutoff + .5) {
+					discard;
+					return;
+				}
+
+				alpha -= smoothstep(cutoff - .5, cutoff + .5, radius);
+			}
+		}
+
+		if (enableEndMiter == 1.) {
+			distToEnd = distToLine(gl_FragCoord.xy, endCutoff.xy, endCutoff.zw);
+			if (distToEnd < 0.) {
+				float radius = length(gl_FragCoord.xy - endCoord);
+
+				if(radius > cutoff + .5) {
+					discard;
+					return;
+				}
+
+				alpha -= smoothstep(cutoff - .5, cutoff + .5, radius);
+			}
+		}
+	}
+
+	float t = fract(dot(tangent, gl_FragCoord.xy) / dashLength) * .5 + .25;
+	float dash = texture2D(dashTexture, vec2(t, .5)).r;
+
+	gl_FragColor = fragColor;
+	gl_FragColor.a *= alpha * opacity * dash;
+}
+`;dqe.exports=Wc;function Wc(e,t){if(!(this instanceof Wc))return new Wc(e,t);if(typeof e=="function"?(t||(t={}),t.regl=e):t=e,t.length&&(t.positions=t),e=t.regl,!e.hasExtension("ANGLE_instanced_arrays"))throw Error("regl-error2d: `ANGLE_instanced_arrays` extension should be enabled");this.gl=e._gl,this.regl=e,this.passes=[],this.shaders=Wc.shaders.has(e)?Wc.shaders.get(e):Wc.shaders.set(e,Wc.createShaders(e)).get(e),this.update(t)}Wc.dashMult=2;Wc.maxPatternLength=256;Wc.precisionThreshold=3e6;Wc.maxPoints=1e4;Wc.maxLines=2048;Wc.shaders=new bNt;Wc.createShaders=function(e){let t=e.buffer({usage:"static",type:"float",data:[0,1,0,0,1,1,1,0]}),r={primitive:"triangle strip",instances:e.prop("count"),count:4,offset:0,uniforms:{miterMode:(o,s)=>s.join==="round"?2:1,miterLimit:e.prop("miterLimit"),scale:e.prop("scale"),scaleFract:e.prop("scaleFract"),translateFract:e.prop("translateFract"),translate:e.prop("translate"),thickness:e.prop("thickness"),dashTexture:e.prop("dashTexture"),opacity:e.prop("opacity"),pixelRatio:e.context("pixelRatio"),id:e.prop("id"),dashLength:e.prop("dashLength"),viewport:(o,s)=>[s.viewport.x,s.viewport.y,o.viewportWidth,o.viewportHeight],depth:e.prop("depth")},blend:{enable:!0,color:[0,0,0,0],equation:{rgb:"add",alpha:"add"},func:{srcRGB:"src alpha",dstRGB:"one minus src alpha",srcAlpha:"one minus dst alpha",dstAlpha:"one"}},depth:{enable:(o,s)=>!s.overlay},stencil:{enable:!1},scissor:{enable:!0,box:e.prop("viewport")},viewport:e.prop("viewport")},n=e(VY({vert:TNt,frag:ANt,attributes:{lineEnd:{buffer:t,divisor:0,stride:8,offset:0},lineTop:{buffer:t,divisor:0,stride:8,offset:4},aCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:8,divisor:1},bCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:16,divisor:1},aCoordFract:{buffer:e.prop("positionFractBuffer"),stride:8,offset:8,divisor:1},bCoordFract:{buffer:e.prop("positionFractBuffer"),stride:8,offset:16,divisor:1},color:{buffer:e.prop("colorBuffer"),stride:4,offset:0,divisor:1}}},r)),i;try{i=e(VY({cull:{enable:!0,face:"back"},vert:ENt,frag:kNt,attributes:{lineEnd:{buffer:t,divisor:0,stride:8,offset:0},lineTop:{buffer:t,divisor:0,stride:8,offset:4},aColor:{buffer:e.prop("colorBuffer"),stride:4,offset:0,divisor:1},bColor:{buffer:e.prop("colorBuffer"),stride:4,offset:4,divisor:1},prevCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:0,divisor:1},aCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:8,divisor:1},bCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:16,divisor:1},nextCoord:{buffer:e.prop("positionBuffer"),stride:8,offset:24,divisor:1}}},r))}catch(o){i=n}return{fill:e({primitive:"triangle",elements:(o,s)=>s.triangles,offset:0,vert:SNt,frag:MNt,uniforms:{scale:e.prop("scale"),color:e.prop("fill"),scaleFract:e.prop("scaleFract"),translateFract:e.prop("translateFract"),translate:e.prop("translate"),opacity:e.prop("opacity"),pixelRatio:e.context("pixelRatio"),id:e.prop("id"),viewport:(o,s)=>[s.viewport.x,s.viewport.y,o.viewportWidth,o.viewportHeight]},attributes:{position:{buffer:e.prop("positionBuffer"),stride:8,offset:8},positionFract:{buffer:e.prop("positionFractBuffer"),stride:8,offset:8}},blend:r.blend,depth:{enable:!1},scissor:r.scissor,stencil:r.stencil,viewport:r.viewport}),rect:n,miter:i}};Wc.defaults={dashes:null,join:"miter",miterLimit:1,thickness:10,cap:"square",color:"black",opacity:1,overlay:!1,viewport:null,range:null,close:!1,fill:null};Wc.prototype.render=function(...e){e.length&&this.update(...e),this.draw()};Wc.prototype.draw=function(...e){return(e.length?e:this.passes).forEach((t,r)=>{if(t&&Array.isArray(t))return this.draw(...t);typeof t=="number"&&(t=this.passes[t]),t&&t.count>1&&t.opacity&&(this.regl._refresh(),t.fill&&t.triangles&&t.triangles.length>2&&this.shaders.fill(t),t.thickness&&(t.scale[0]*t.viewport.width>Wc.precisionThreshold||t.scale[1]*t.viewport.height>Wc.precisionThreshold?this.shaders.rect(t):t.join==="rect"||!t.join&&(t.thickness<=2||t.count>=Wc.maxPoints)?this.shaders.rect(t):this.shaders.miter(t)))}),this};Wc.prototype.update=function(e){if(!e)return;e.length!=null?typeof e[0]=="number"&&(e=[{positions:e}]):Array.isArray(e)||(e=[e]);let{regl:t,gl:r}=this;if(e.forEach((i,a)=>{let o=this.passes[a];if(i!==void 0){if(i===null){this.passes[a]=null;return}if(typeof i[0]=="number"&&(i={positions:i}),i=mNt(i,{positions:"positions points data coords",thickness:"thickness lineWidth lineWidths line-width linewidth width stroke-width strokewidth strokeWidth",join:"lineJoin linejoin join type mode",miterLimit:"miterlimit miterLimit",dashes:"dash dashes dasharray dash-array dashArray",color:"color colour stroke colors colours stroke-color strokeColor",fill:"fill fill-color fillColor",opacity:"alpha opacity",overlay:"overlay crease overlap intersect",close:"closed close closed-path closePath",range:"range dataBox",viewport:"viewport viewBox",hole:"holes hole hollow",splitNull:"splitNull"}),o||(this.passes[a]=o={id:a,scale:null,scaleFract:null,translate:null,translateFract:null,count:0,hole:[],depth:0,dashLength:1,dashTexture:t.texture({channels:1,data:new Uint8Array([255]),width:1,height:1,mag:"linear",min:"linear"}),colorBuffer:t.buffer({usage:"dynamic",type:"uint8",data:new Uint8Array}),positionBuffer:t.buffer({usage:"dynamic",type:"float",data:new Uint8Array}),positionFractBuffer:t.buffer({usage:"dynamic",type:"float",data:new Uint8Array})},i=VY({},Wc.defaults,i)),i.thickness!=null&&(o.thickness=parseFloat(i.thickness)),i.opacity!=null&&(o.opacity=parseFloat(i.opacity)),i.miterLimit!=null&&(o.miterLimit=parseFloat(i.miterLimit)),i.overlay!=null&&(o.overlay=!!i.overlay,a<Wc.maxLines&&(o.depth=2*(Wc.maxLines-1-a%Wc.maxLines)/Wc.maxLines-1)),i.join!=null&&(o.join=i.join),i.hole!=null&&(o.hole=i.hole),i.fill!=null&&(o.fill=i.fill?yz(i.fill,"uint8"):null),i.viewport!=null&&(o.viewport=hqe(i.viewport)),o.viewport||(o.viewport=hqe([r.drawingBufferWidth,r.drawingBufferHeight])),i.close!=null&&(o.close=i.close),i.positions===null&&(i.positions=[]),i.positions){let u,c;if(i.positions.x&&i.positions.y){let v=i.positions.x,_=i.positions.y;c=o.count=Math.max(v.length,_.length),u=new Float64Array(c*2);for(let b=0;b<c;b++)u[b*2]=v[b],u[b*2+1]=_[b]}else u=yNt(i.positions,"float64"),c=o.count=Math.floor(u.length/2);let f=o.bounds=gNt(u,2);if(o.fill){let v=[],_={},b=0;for(let p=0,k=0,E=o.count;p<E;p++){let S=u[p*2],L=u[p*2+1];isNaN(S)||isNaN(L)||S==null||L==null?(S=u[b*2],L=u[b*2+1],_[p]=b):b=p,v[k++]=S,v[k++]=L}if(i.splitNull){o.count-1 in _||(_[o.count]=o.count-1);let p=Object.keys(_).map(Number).sort((L,x)=>L-x),k=[],E=0,S=o.hole!=null?o.hole[0]:null;if(S!=null){let L=wNt(p,x=>x>=S);p=p.slice(0,L),p.push(S)}for(let L=0;L<p.length;L++){let x=v.slice(E*2,p[L]*2).concat(S?v.slice(S*2):[]),C=(o.hole||[]).map(g=>g-S+(p[L]-E)),M=fqe(x,C);M=M.map(g=>g+E+(g+E<p[L]?0:S-p[L])),k.push(...M),E=p[L]+1}for(let L=0,x=k.length;L<x;L++)_[k[L]]!=null&&(k[L]=_[k[L]]);o.triangles=k}else{let p=fqe(v,o.hole||[]);for(let k=0,E=p.length;k<E;k++)_[p[k]]!=null&&(p[k]=_[p[k]]);o.triangles=p}}let h=new Float64Array(u);_Nt(h,2,f);let d=new Float64Array(c*2+6);o.close?u[0]===u[c*2-2]&&u[1]===u[c*2-1]?(d[0]=h[c*2-4],d[1]=h[c*2-3]):(d[0]=h[c*2-2],d[1]=h[c*2-1]):(d[0]=h[0],d[1]=h[1]),d.set(h,2),o.close?u[0]===u[c*2-2]&&u[1]===u[c*2-1]?(d[c*2+2]=h[2],d[c*2+3]=h[3],o.count-=1):(d[c*2+2]=h[0],d[c*2+3]=h[1],d[c*2+4]=h[2],d[c*2+5]=h[3]):(d[c*2+2]=h[c*2-2],d[c*2+3]=h[c*2-1],d[c*2+4]=h[c*2-2],d[c*2+5]=h[c*2-1]);var s=xNt(d);o.positionBuffer(s);var l=UY(d,s);o.positionFractBuffer(l)}if(i.range?o.range=i.range:o.range||(o.range=o.bounds),(i.range||i.positions)&&o.count){let u=o.bounds,c=u[2]-u[0],f=u[3]-u[1],h=o.range[2]-o.range[0],d=o.range[3]-o.range[1];o.scale=[c/h,f/d],o.translate=[-o.range[0]/h+u[0]/h||0,-o.range[1]/d+u[1]/d||0],o.scaleFract=UY(o.scale),o.translateFract=UY(o.translate)}if(i.dashes){let u=0,c;if(!i.dashes||i.dashes.length<2)u=1,c=new Uint8Array([255,255,255,255,255,255,255,255]);else{u=0;for(let d=0;d<i.dashes.length;++d)u+=i.dashes[d];c=new Uint8Array(u*Wc.dashMult);let f=0,h=255;for(let d=0;d<2;d++)for(let v=0;v<i.dashes.length;++v){for(let _=0,b=i.dashes[v]*Wc.dashMult*.5;_<b;++_)c[f++]=h;h^=255}}o.dashLength=u,o.dashTexture({channels:1,data:c,width:c.length,height:1,mag:"linear",min:"linear"},0,0)}if(i.color){let u=o.count,c=i.color;c||(c="transparent");let f=new Uint8Array(u*4+4);if(!Array.isArray(c)||typeof c[0]=="number"){let h=yz(c,"uint8");for(let d=0;d<u+1;d++)f.set(h,d*4)}else{for(let h=0;h<u;h++){let d=yz(c[h],"uint8");f.set(d,h*4)}f.set(yz(c[0],"uint8"),u*4)}o.colorBuffer({usage:"dynamic",type:"uint8",data:f})}}}),e.length<this.passes.length){for(let i=e.length;i<this.passes.length;i++){let a=this.passes[i];a&&(a.colorBuffer.destroy(),a.positionBuffer.destroy(),a.dashTexture.destroy())}this.passes.length=e.length}let n=[];for(let i=0;i<this.passes.length;i++)this.passes[i]!==null&&n.push(this.passes[i]);return this.passes=n,this};Wc.prototype.destroy=function(){return this.passes.forEach(e=>{e.colorBuffer.destroy(),e.positionBuffer.destroy(),e.dashTexture.destroy()}),this.passes.length=0,this}});var yqe=ye((V1r,mqe)=>{"use strict";var CNt=X2(),LNt=J_(),PNt=cY(),INt=Zm(),vqe=Oh(),pqe=Z2(),{float32:RNt,fract32:HY}=ez();mqe.exports=DNt;var gqe=[[1,0,0,1,0,0],[1,0,0,-1,0,0],[-1,0,0,-1,0,0],[-1,0,0,-1,0,0],[-1,0,0,1,0,0],[1,0,0,1,0,0],[1,0,-1,0,0,1],[1,0,-1,0,0,-1],[1,0,1,0,0,-1],[1,0,1,0,0,-1],[1,0,1,0,0,1],[1,0,-1,0,0,1],[-1,0,-1,0,0,1],[-1,0,-1,0,0,-1],[-1,0,1,0,0,-1],[-1,0,1,0,0,-1],[-1,0,1,0,0,1],[-1,0,-1,0,0,1],[0,1,1,0,0,0],[0,1,-1,0,0,0],[0,-1,-1,0,0,0],[0,-1,-1,0,0,0],[0,1,1,0,0,0],[0,-1,1,0,0,0],[0,1,0,-1,1,0],[0,1,0,-1,-1,0],[0,1,0,1,-1,0],[0,1,0,1,1,0],[0,1,0,-1,1,0],[0,1,0,1,-1,0],[0,-1,0,-1,1,0],[0,-1,0,-1,-1,0],[0,-1,0,1,-1,0],[0,-1,0,1,1,0],[0,-1,0,-1,1,0],[0,-1,0,1,-1,0]];function DNt(e,t){if(typeof e=="function"?(t||(t={}),t.regl=e):t=e,t.length&&(t.positions=t),e=t.regl,!e.hasExtension("ANGLE_instanced_arrays"))throw Error("regl-error2d: `ANGLE_instanced_arrays` extension should be enabled");let r=e._gl,n,i,a,o,s,l,u={color:"black",capSize:5,lineWidth:1,opacity:1,viewport:null,range:null,offset:0,count:0,bounds:null,positions:[],errors:[]},c=[];return o=e.buffer({usage:"dynamic",type:"uint8",data:new Uint8Array(0)}),i=e.buffer({usage:"dynamic",type:"float",data:new Uint8Array(0)}),a=e.buffer({usage:"dynamic",type:"float",data:new Uint8Array(0)}),s=e.buffer({usage:"dynamic",type:"float",data:new Uint8Array(0)}),l=e.buffer({usage:"static",type:"float",data:gqe}),v(t),n=e({vert:`
+		precision highp float;
+
+		attribute vec2 position, positionFract;
+		attribute vec4 error;
+		attribute vec4 color;
+
+		attribute vec2 direction, lineOffset, capOffset;
+
+		uniform vec4 viewport;
+		uniform float lineWidth, capSize;
+		uniform vec2 scale, scaleFract, translate, translateFract;
+
+		varying vec4 fragColor;
+
+		void main() {
+			fragColor = color / 255.;
+
+			vec2 pixelOffset = lineWidth * lineOffset + (capSize + lineWidth) * capOffset;
+
+			vec2 dxy = -step(.5, direction.xy) * error.xz + step(direction.xy, vec2(-.5)) * error.yw;
+
+			vec2 position = position + dxy;
+
+			vec2 pos = (position + translate) * scale
+				+ (positionFract + translateFract) * scale
+				+ (position + translate) * scaleFract
+				+ (positionFract + translateFract) * scaleFract;
+
+			pos += pixelOffset / viewport.zw;
+
+			gl_Position = vec4(pos * 2. - 1., 0, 1);
+		}
+		`,frag:`
+		precision highp float;
+
+		varying vec4 fragColor;
+
+		uniform float opacity;
+
+		void main() {
+			gl_FragColor = fragColor;
+			gl_FragColor.a *= opacity;
+		}
+		`,uniforms:{range:e.prop("range"),lineWidth:e.prop("lineWidth"),capSize:e.prop("capSize"),opacity:e.prop("opacity"),scale:e.prop("scale"),translate:e.prop("translate"),scaleFract:e.prop("scaleFract"),translateFract:e.prop("translateFract"),viewport:(b,p)=>[p.viewport.x,p.viewport.y,b.viewportWidth,b.viewportHeight]},attributes:{color:{buffer:o,offset:(b,p)=>p.offset*4,divisor:1},position:{buffer:i,offset:(b,p)=>p.offset*8,divisor:1},positionFract:{buffer:a,offset:(b,p)=>p.offset*8,divisor:1},error:{buffer:s,offset:(b,p)=>p.offset*16,divisor:1},direction:{buffer:l,stride:24,offset:0},lineOffset:{buffer:l,stride:24,offset:8},capOffset:{buffer:l,stride:24,offset:16}},primitive:"triangles",blend:{enable:!0,color:[0,0,0,0],equation:{rgb:"add",alpha:"add"},func:{srcRGB:"src alpha",dstRGB:"one minus src alpha",srcAlpha:"one minus dst alpha",dstAlpha:"one"}},depth:{enable:!1},scissor:{enable:!0,box:e.prop("viewport")},viewport:e.prop("viewport"),stencil:!1,instances:e.prop("count"),count:gqe.length}),vqe(f,{update:v,draw:h,destroy:_,regl:e,gl:r,canvas:r.canvas,groups:c}),f;function f(b){b?v(b):b===null&&_(),h()}function h(b){if(typeof b=="number")return d(b);b&&!Array.isArray(b)&&(b=[b]),e._refresh(),c.forEach((p,k)=>{if(p){if(b&&(b[k]?p.draw=!0:p.draw=!1),!p.draw){p.draw=!0;return}d(k)}})}function d(b){typeof b=="number"&&(b=c[b]),b!=null&&b&&b.count&&b.color&&b.opacity&&b.positions&&b.positions.length>1&&(b.scaleRatio=[b.scale[0]*b.viewport.width,b.scale[1]*b.viewport.height],n(b),b.after&&b.after(b))}function v(b){if(!b)return;b.length!=null?typeof b[0]=="number"&&(b=[{positions:b}]):Array.isArray(b)||(b=[b]);let p=0,k=0;if(f.groups=c=b.map((L,x)=>{let C=c[x];if(L)typeof L=="function"?L={after:L}:typeof L[0]=="number"&&(L={positions:L});else return C;return L=INt(L,{color:"color colors fill",capSize:"capSize cap capsize cap-size",lineWidth:"lineWidth line-width width line thickness",opacity:"opacity alpha",range:"range dataBox",viewport:"viewport viewBox",errors:"errors error",positions:"positions position data points"}),C||(c[x]=C={id:x,scale:null,translate:null,scaleFract:null,translateFract:null,draw:!0},L=vqe({},u,L)),PNt(C,L,[{lineWidth:M=>+M*.5,capSize:M=>+M*.5,opacity:parseFloat,errors:M=>(M=pqe(M),k+=M.length,M),positions:(M,g)=>(M=pqe(M,"float64"),g.count=Math.floor(M.length/2),g.bounds=CNt(M,2),g.offset=p,p+=g.count,M)},{color:(M,g)=>{let P=g.count;if(M||(M="transparent"),!Array.isArray(M)||typeof M[0]=="number"){let z=M;M=Array(P);for(let O=0;O<P;O++)M[O]=z}if(M.length<P)throw Error("Not enough colors");let T=new Uint8Array(P*4);for(let z=0;z<P;z++){let O=LNt(M[z],"uint8");T.set(O,z*4)}return T},range:(M,g,P)=>{let T=g.bounds;return M||(M=T),g.scale=[1/(M[2]-M[0]),1/(M[3]-M[1])],g.translate=[-M[0],-M[1]],g.scaleFract=HY(g.scale),g.translateFract=HY(g.translate),M},viewport:M=>{let g;return Array.isArray(M)?g={x:M[0],y:M[1],width:M[2]-M[0],height:M[3]-M[1]}:M?(g={x:M.x||M.left||0,y:M.y||M.top||0},M.right?g.width=M.right-g.x:g.width=M.w||M.width||0,M.bottom?g.height=M.bottom-g.y:g.height=M.h||M.height||0):g={x:0,y:0,width:r.drawingBufferWidth,height:r.drawingBufferHeight},g}}]),C}),p||k){let L=c.reduce((g,P,T)=>g+(P?P.count:0),0),x=new Float64Array(L*2),C=new Uint8Array(L*4),M=new Float32Array(L*4);c.forEach((g,P)=>{if(!g)return;let{positions:T,count:z,offset:O,color:V,errors:G}=g;z&&(C.set(V,O*4),M.set(G,O*4),x.set(T,O*2))});var E=RNt(x);i(E);var S=HY(x,E);a(S),o(C),s(M)}}function _(){i.destroy(),a.destroy(),o.destroy(),s.destroy(),l.destroy()}}});var bqe=ye((G1r,xqe)=>{var _qe=/[\'\"]/;xqe.exports=function(t){return t?(_qe.test(t.charAt(0))&&(t=t.substr(1)),_qe.test(t.charAt(t.length-1))&&(t=t.substr(0,t.length-1)),t):""}});var jY=ye((H1r,FNt)=>{FNt.exports=["inherit","initial","unset"]});var WY=ye((j1r,zNt)=>{zNt.exports=["caption","icon","menu","message-box","small-caption","status-bar"]});var XY=ye((W1r,ONt)=>{ONt.exports=["normal","bold","bolder","lighter","100","200","300","400","500","600","700","800","900"]});var ZY=ye((X1r,qNt)=>{qNt.exports=["normal","italic","oblique"]});var YY=ye((Z1r,BNt)=>{BNt.exports=["normal","condensed","semi-condensed","extra-condensed","ultra-condensed","expanded","semi-expanded","extra-expanded","ultra-expanded"]});var Sqe=ye((Y1r,Aqe)=>{"use strict";function wqe(e,t){if(typeof e!="string")return[e];var r=[e];typeof t=="string"||Array.isArray(t)?t={brackets:t}:t||(t={});var n=t.brackets?Array.isArray(t.brackets)?t.brackets:[t.brackets]:["{}","[]","()"],i=t.escape||"___",a=!!t.flat;n.forEach(function(l){var u=new RegExp(["\\",l[0],"[^\\",l[0],"\\",l[1],"]*\\",l[1]].join("")),c=[];function f(h,d,v){var _=r.push(h.slice(l[0].length,-l[1].length))-1;return c.push(_),i+_+i}r.forEach(function(h,d){for(var v,_=0;h!=v;)if(v=h,h=h.replace(u,f),_++>1e4)throw Error("References have circular dependency. Please, check them.");r[d]=h}),c=c.reverse(),r=r.map(function(h){return c.forEach(function(d){h=h.replace(new RegExp("(\\"+i+d+"\\"+i+")","g"),l[0]+"$1"+l[1])}),h})});var o=new RegExp("\\"+i+"([0-9]+)\\"+i);function s(l,u,c){for(var f=[],h,d=0;h=o.exec(l);){if(d++>1e4)throw Error("Circular references in parenthesis");f.push(l.slice(0,h.index)),f.push(s(u[h[1]],u)),l=l.slice(h.index+h[0].length)}return f.push(l),f}return a?r:s(r[0],r)}function Tqe(e,t){if(t&&t.flat){var r=t&&t.escape||"___",n=e[0],i;if(!n)return"";for(var a=new RegExp("\\"+r+"([0-9]+)\\"+r),o=0;n!=i;){if(o++>1e4)throw Error("Circular references in "+e);i=n,n=n.replace(a,s)}return n}return e.reduce(function l(u,c){return Array.isArray(c)&&(c=c.reduce(l,"")),u+c},"");function s(l,u){if(e[u]==null)throw Error("Reference "+u+"is undefined");return e[u]}}function KY(e,t){return Array.isArray(e)?Tqe(e,t):wqe(e,t)}KY.parse=wqe;KY.stringify=Tqe;Aqe.exports=KY});var kqe=ye((K1r,Eqe)=>{"use strict";var Mqe=Sqe();Eqe.exports=function(t,r,n){if(t==null)throw Error("First argument should be a string");if(r==null)throw Error("Separator should be a string or a RegExp");n?(typeof n=="string"||Array.isArray(n))&&(n={ignore:n}):n={},n.escape==null&&(n.escape=!0),n.ignore==null?n.ignore=["[]","()","{}","<>",'""',"''","``","\u201C\u201D","\xAB\xBB"]:(typeof n.ignore=="string"&&(n.ignore=[n.ignore]),n.ignore=n.ignore.map(function(f){return f.length===1&&(f=f+f),f}));var i=Mqe.parse(t,{flat:!0,brackets:n.ignore}),a=i[0],o=a.split(r);if(n.escape){for(var s=[],l=0;l<o.length;l++){var u=o[l],c=o[l+1];u[u.length-1]==="\\"&&u[u.length-2]!=="\\"?(s.push(u+r+c),l++):s.push(u)}o=s}for(var l=0;l<o.length;l++)i[0]=o[l],o[l]=Mqe.stringify(i,{flat:!0});return o}});var Cqe=ye((J1r,NNt)=>{NNt.exports=["xx-small","x-small","small","medium","large","x-large","xx-large","larger","smaller"]});var JY=ye(($1r,Lqe)=>{"use strict";var UNt=Cqe();Lqe.exports={isSize:function(t){return/^[\d\.]/.test(t)||t.indexOf("/")!==-1||UNt.indexOf(t)!==-1}}});var Dqe=ye((Q1r,Rqe)=>{"use strict";var VNt=bqe(),GNt=jY(),HNt=WY(),jNt=XY(),WNt=ZY(),XNt=YY(),$Y=kqe(),ZNt=JY().isSize;Rqe.exports=Iqe;var Tk=Iqe.cache={};function Iqe(e){if(typeof e!="string")throw new Error("Font argument must be a string.");if(Tk[e])return Tk[e];if(e==="")throw new Error("Cannot parse an empty string.");if(HNt.indexOf(e)!==-1)return Tk[e]={system:e};for(var t={style:"normal",variant:"normal",weight:"normal",stretch:"normal",lineHeight:"normal",size:"1rem",family:["serif"]},r=$Y(e,/\s+/),n;n=r.shift();){if(GNt.indexOf(n)!==-1)return["style","variant","weight","stretch"].forEach(function(a){t[a]=n}),Tk[e]=t;if(WNt.indexOf(n)!==-1){t.style=n;continue}if(n==="normal"||n==="small-caps"){t.variant=n;continue}if(XNt.indexOf(n)!==-1){t.stretch=n;continue}if(jNt.indexOf(n)!==-1){t.weight=n;continue}if(ZNt(n)){var i=$Y(n,"/");if(t.size=i[0],i[1]!=null?t.lineHeight=Pqe(i[1]):r[0]==="/"&&(r.shift(),t.lineHeight=Pqe(r.shift())),!r.length)throw new Error("Missing required font-family.");return t.family=$Y(r.join(" "),/\s*,\s*/).map(VNt),Tk[e]=t}throw new Error("Unknown or unsupported font token: "+n)}throw new Error("Missing required font-size.")}function Pqe(e){var t=parseFloat(e);return t.toString()===e?t:e}});var eK=ye((e_r,Fqe)=>{"use strict";var YNt=Zm(),KNt=JY().isSize,JNt=Sk(jY()),$Nt=Sk(WY()),QNt=Sk(XY()),eUt=Sk(ZY()),tUt=Sk(YY()),rUt={normal:1,"small-caps":1},iUt={serif:1,"sans-serif":1,monospace:1,cursive:1,fantasy:1,"system-ui":1},QY={style:"normal",variant:"normal",weight:"normal",stretch:"normal",size:"1rem",lineHeight:"normal",family:"serif"};Fqe.exports=function(t){if(t=YNt(t,{style:"style fontstyle fontStyle font-style slope distinction",variant:"variant font-variant fontVariant fontvariant var capitalization",weight:"weight w font-weight fontWeight fontweight",stretch:"stretch font-stretch fontStretch fontstretch width",size:"size s font-size fontSize fontsize height em emSize",lineHeight:"lh line-height lineHeight lineheight leading",family:"font family fontFamily font-family fontfamily type typeface face",system:"system reserved default global"}),t.system)return t.system&&Ak(t.system,$Nt),t.system;if(Ak(t.style,eUt),Ak(t.variant,rUt),Ak(t.weight,QNt),Ak(t.stretch,tUt),t.size==null&&(t.size=QY.size),typeof t.size=="number"&&(t.size+="px"),!KNt)throw Error("Bad size value `"+t.size+"`");t.family||(t.family=QY.family),Array.isArray(t.family)&&(t.family.length||(t.family=[QY.family]),t.family=t.family.map(function(n){return iUt[n]?n:'"'+n+'"'}).join(", "));var r=[];return r.push(t.style),t.variant!==t.style&&r.push(t.variant),t.weight!==t.variant&&t.weight!==t.style&&r.push(t.weight),t.stretch!==t.weight&&t.stretch!==t.variant&&t.stretch!==t.style&&r.push(t.stretch),r.push(t.size+(t.lineHeight==null||t.lineHeight==="normal"||t.lineHeight+""=="1"?"":"/"+t.lineHeight)),r.push(t.family),r.filter(Boolean).join(" ")};function Ak(e,t){if(e&&!t[e]&&!JNt[e])throw Error("Unknown keyword `"+e+"`");return e}function Sk(e){for(var t={},r=0;r<e.length;r++)t[e[r]]=1;return t}});var Oqe=ye((t_r,zqe)=>{"use strict";zqe.exports={parse:Dqe(),stringify:eK()}});var qqe=ye((tK,rK)=>{(function(e,t){typeof tK=="object"&&typeof rK!="undefined"?rK.exports=t():e.createREGL=t()})(tK,function(){"use strict";var e=function(Me,bt){for(var zt=Object.keys(bt),Rr=0;Rr<zt.length;++Rr)Me[zt[Rr]]=bt[zt[Rr]];return Me},t=0,r=0,n=5,i=6;function a(Me,bt){this.id=t++,this.type=Me,this.data=bt}function o(Me){return Me.replace(/\\/g,"\\\\").replace(/"/g,'\\"')}function s(Me){if(Me.length===0)return[];var bt=Me.charAt(0),zt=Me.charAt(Me.length-1);if(Me.length>1&&bt===zt&&(bt==='"'||bt==="'"))return['"'+o(Me.substr(1,Me.length-2))+'"'];var Rr=/\[(false|true|null|\d+|'[^']*'|"[^"]*")\]/.exec(Me);if(Rr)return s(Me.substr(0,Rr.index)).concat(s(Rr[1])).concat(s(Me.substr(Rr.index+Rr[0].length)));var jr=Me.split(".");if(jr.length===1)return['"'+o(Me)+'"'];for(var Nr=[],Gr=0;Gr<jr.length;++Gr)Nr=Nr.concat(s(jr[Gr]));return Nr}function l(Me){return"["+s(Me).join("][")+"]"}function u(Me,bt){return new a(Me,l(bt+""))}function c(Me){return typeof Me=="function"&&!Me._reglType||Me instanceof a}function f(Me,bt){if(typeof Me=="function")return new a(r,Me);if(typeof Me=="number"||typeof Me=="boolean")return new a(n,Me);if(Array.isArray(Me))return new a(i,Me.map(function(zt,Rr){return f(zt,bt+"["+Rr+"]")}));if(Me instanceof a)return Me}var h={DynamicVariable:a,define:u,isDynamic:c,unbox:f,accessor:l},d={next:typeof requestAnimationFrame=="function"?function(Me){return requestAnimationFrame(Me)}:function(Me){return setTimeout(Me,16)},cancel:typeof cancelAnimationFrame=="function"?function(Me){return cancelAnimationFrame(Me)}:clearTimeout},v=typeof performance!="undefined"&&performance.now?function(){return performance.now()}:function(){return+new Date};function _(){var Me={"":0},bt=[""];return{id:function(zt){var Rr=Me[zt];return Rr||(Rr=Me[zt]=bt.length,bt.push(zt),Rr)},str:function(zt){return bt[zt]}}}function b(Me,bt,zt){var Rr=document.createElement("canvas");e(Rr.style,{border:0,margin:0,padding:0,top:0,left:0,width:"100%",height:"100%"}),Me.appendChild(Rr),Me===document.body&&(Rr.style.position="absolute",e(Me.style,{margin:0,padding:0}));function jr(){var mi=window.innerWidth,Ui=window.innerHeight;if(Me!==document.body){var qi=Rr.getBoundingClientRect();mi=qi.right-qi.left,Ui=qi.bottom-qi.top}Rr.width=zt*mi,Rr.height=zt*Ui}var Nr;Me!==document.body&&typeof ResizeObserver=="function"?(Nr=new ResizeObserver(function(){setTimeout(jr)}),Nr.observe(Me)):window.addEventListener("resize",jr,!1);function Gr(){Nr?Nr.disconnect():window.removeEventListener("resize",jr),Me.removeChild(Rr)}return jr(),{canvas:Rr,onDestroy:Gr}}function p(Me,bt){function zt(Rr){try{return Me.getContext(Rr,bt)}catch(jr){return null}}return zt("webgl")||zt("experimental-webgl")||zt("webgl-experimental")}function k(Me){return typeof Me.nodeName=="string"&&typeof Me.appendChild=="function"&&typeof Me.getBoundingClientRect=="function"}function E(Me){return typeof Me.drawArrays=="function"||typeof Me.drawElements=="function"}function S(Me){return typeof Me=="string"?Me.split():Me}function L(Me){return typeof Me=="string"?document.querySelector(Me):Me}function x(Me){var bt=Me||{},zt,Rr,jr,Nr,Gr={},mi=[],Ui=[],qi=typeof window=="undefined"?1:window.devicePixelRatio,Ei=!1,Hn={},en=function(Mr){},Wi=function(){};if(typeof bt=="string"?zt=document.querySelector(bt):typeof bt=="object"&&(k(bt)?zt=bt:E(bt)?(Nr=bt,jr=Nr.canvas):("gl"in bt?Nr=bt.gl:"canvas"in bt?jr=L(bt.canvas):"container"in bt&&(Rr=L(bt.container)),"attributes"in bt&&(Gr=bt.attributes),"extensions"in bt&&(mi=S(bt.extensions)),"optionalExtensions"in bt&&(Ui=S(bt.optionalExtensions)),"onDone"in bt&&(en=bt.onDone),"profile"in bt&&(Ei=!!bt.profile),"pixelRatio"in bt&&(qi=+bt.pixelRatio),"cachedCode"in bt&&(Hn=bt.cachedCode))),zt&&(zt.nodeName.toLowerCase()==="canvas"?jr=zt:Rr=zt),!Nr){if(!jr){var si=b(Rr||document.body,en,qi);if(!si)return null;jr=si.canvas,Wi=si.onDestroy}Gr.premultipliedAlpha===void 0&&(Gr.premultipliedAlpha=!0),Nr=p(jr,Gr)}return Nr?{gl:Nr,canvas:jr,container:Rr,extensions:mi,optionalExtensions:Ui,pixelRatio:qi,profile:Ei,cachedCode:Hn,onDone:en,onDestroy:Wi}:(Wi(),en("webgl not supported, try upgrading your browser or graphics drivers http://get.webgl.org"),null)}function C(Me,bt){var zt={};function Rr(Gr){var mi=Gr.toLowerCase(),Ui;try{Ui=zt[mi]=Me.getExtension(mi)}catch(qi){}return!!Ui}for(var jr=0;jr<bt.extensions.length;++jr){var Nr=bt.extensions[jr];if(!Rr(Nr))return bt.onDestroy(),bt.onDone('"'+Nr+'" extension is not supported by the current WebGL context, try upgrading your system or a different browser'),null}return bt.optionalExtensions.forEach(Rr),{extensions:zt,restore:function(){Object.keys(zt).forEach(function(Gr){if(zt[Gr]&&!Rr(Gr))throw new Error("(regl): error restoring extension "+Gr)})}}}function M(Me,bt){for(var zt=Array(Me),Rr=0;Rr<Me;++Rr)zt[Rr]=bt(Rr);return zt}var g=5120,P=5121,T=5122,z=5123,O=5124,V=5125,G=5126;function Z(Me){for(var bt=16;bt<=1<<28;bt*=16)if(Me<=bt)return bt;return 0}function j(Me){var bt,zt;return bt=(Me>65535)<<4,Me>>>=bt,zt=(Me>255)<<3,Me>>>=zt,bt|=zt,zt=(Me>15)<<2,Me>>>=zt,bt|=zt,zt=(Me>3)<<1,Me>>>=zt,bt|=zt,bt|Me>>1}function N(){var Me=M(8,function(){return[]});function bt(Nr){var Gr=Z(Nr),mi=Me[j(Gr)>>2];return mi.length>0?mi.pop():new ArrayBuffer(Gr)}function zt(Nr){Me[j(Nr.byteLength)>>2].push(Nr)}function Rr(Nr,Gr){var mi=null;switch(Nr){case g:mi=new Int8Array(bt(Gr),0,Gr);break;case P:mi=new Uint8Array(bt(Gr),0,Gr);break;case T:mi=new Int16Array(bt(2*Gr),0,Gr);break;case z:mi=new Uint16Array(bt(2*Gr),0,Gr);break;case O:mi=new Int32Array(bt(4*Gr),0,Gr);break;case V:mi=new Uint32Array(bt(4*Gr),0,Gr);break;case G:mi=new Float32Array(bt(4*Gr),0,Gr);break;default:return null}return mi.length!==Gr?mi.subarray(0,Gr):mi}function jr(Nr){zt(Nr.buffer)}return{alloc:bt,free:zt,allocType:Rr,freeType:jr}}var H=N();H.zero=N();var te=3408,oe=3410,_e=3411,Ee=3412,Ce=3413,me=3414,ie=3415,Se=33901,Le=33902,Ae=3379,Fe=3386,Pe=34921,ge=36347,Re=36348,ce=35661,Ze=35660,ut=34930,pt=36349,Zt=34076,st=34024,lt=7936,Gt=7937,Nt=7938,Jt=35724,sr=34047,wr=36063,cr=34852,$e=3553,St=34067,Qt=34069,Vt=33984,_t=6408,It=5126,mt=5121,er=36160,lr=36053,Tr=36064,Lr=16384,ti=function(Me,bt){var zt=1;bt.ext_texture_filter_anisotropic&&(zt=Me.getParameter(sr));var Rr=1,jr=1;bt.webgl_draw_buffers&&(Rr=Me.getParameter(cr),jr=Me.getParameter(wr));var Nr=!!bt.oes_texture_float;if(Nr){var Gr=Me.createTexture();Me.bindTexture($e,Gr),Me.texImage2D($e,0,_t,1,1,0,_t,It,null);var mi=Me.createFramebuffer();if(Me.bindFramebuffer(er,mi),Me.framebufferTexture2D(er,Tr,$e,Gr,0),Me.bindTexture($e,null),Me.checkFramebufferStatus(er)!==lr)Nr=!1;else{Me.viewport(0,0,1,1),Me.clearColor(1,0,0,1),Me.clear(Lr);var Ui=H.allocType(It,4);Me.readPixels(0,0,1,1,_t,It,Ui),Me.getError()?Nr=!1:(Me.deleteFramebuffer(mi),Me.deleteTexture(Gr),Nr=Ui[0]===1),H.freeType(Ui)}}var qi=typeof navigator!="undefined"&&(/MSIE/.test(navigator.userAgent)||/Trident\//.test(navigator.appVersion)||/Edge/.test(navigator.userAgent)),Ei=!0;if(!qi){var Hn=Me.createTexture(),en=H.allocType(mt,36);Me.activeTexture(Vt),Me.bindTexture(St,Hn),Me.texImage2D(Qt,0,_t,3,3,0,_t,mt,en),H.freeType(en),Me.bindTexture(St,null),Me.deleteTexture(Hn),Ei=!Me.getError()}return{colorBits:[Me.getParameter(oe),Me.getParameter(_e),Me.getParameter(Ee),Me.getParameter(Ce)],depthBits:Me.getParameter(me),stencilBits:Me.getParameter(ie),subpixelBits:Me.getParameter(te),extensions:Object.keys(bt).filter(function(Wi){return!!bt[Wi]}),maxAnisotropic:zt,maxDrawbuffers:Rr,maxColorAttachments:jr,pointSizeDims:Me.getParameter(Se),lineWidthDims:Me.getParameter(Le),maxViewportDims:Me.getParameter(Fe),maxCombinedTextureUnits:Me.getParameter(ce),maxCubeMapSize:Me.getParameter(Zt),maxRenderbufferSize:Me.getParameter(st),maxTextureUnits:Me.getParameter(ut),maxTextureSize:Me.getParameter(Ae),maxAttributes:Me.getParameter(Pe),maxVertexUniforms:Me.getParameter(ge),maxVertexTextureUnits:Me.getParameter(Ze),maxVaryingVectors:Me.getParameter(Re),maxFragmentUniforms:Me.getParameter(pt),glsl:Me.getParameter(Jt),renderer:Me.getParameter(Gt),vendor:Me.getParameter(lt),version:Me.getParameter(Nt),readFloat:Nr,npotTextureCube:Ei}},Br=function(Me){return Me instanceof Uint8Array||Me instanceof Uint16Array||Me instanceof Uint32Array||Me instanceof Int8Array||Me instanceof Int16Array||Me instanceof Int32Array||Me instanceof Float32Array||Me instanceof Float64Array||Me instanceof Uint8ClampedArray};function Vr(Me){return!!Me&&typeof Me=="object"&&Array.isArray(Me.shape)&&Array.isArray(Me.stride)&&typeof Me.offset=="number"&&Me.shape.length===Me.stride.length&&(Array.isArray(Me.data)||Br(Me.data))}var dt=function(Me){return Object.keys(Me).map(function(bt){return Me[bt]})},Ge={shape:xe,flatten:Ie};function Je(Me,bt,zt){for(var Rr=0;Rr<bt;++Rr)zt[Rr]=Me[Rr]}function je(Me,bt,zt,Rr){for(var jr=0,Nr=0;Nr<bt;++Nr)for(var Gr=Me[Nr],mi=0;mi<zt;++mi)Rr[jr++]=Gr[mi]}function tt(Me,bt,zt,Rr,jr,Nr){for(var Gr=Nr,mi=0;mi<bt;++mi)for(var Ui=Me[mi],qi=0;qi<zt;++qi)for(var Ei=Ui[qi],Hn=0;Hn<Rr;++Hn)jr[Gr++]=Ei[Hn]}function xt(Me,bt,zt,Rr,jr){for(var Nr=1,Gr=zt+1;Gr<bt.length;++Gr)Nr*=bt[Gr];var mi=bt[zt];if(bt.length-zt===4){var Ui=bt[zt+1],qi=bt[zt+2],Ei=bt[zt+3];for(Gr=0;Gr<mi;++Gr)tt(Me[Gr],Ui,qi,Ei,Rr,jr),jr+=Nr}else for(Gr=0;Gr<mi;++Gr)xt(Me[Gr],bt,zt+1,Rr,jr),jr+=Nr}function Ie(Me,bt,zt,Rr){var jr=1;if(bt.length)for(var Nr=0;Nr<bt.length;++Nr)jr*=bt[Nr];else jr=0;var Gr=Rr||H.allocType(zt,jr);switch(bt.length){case 0:break;case 1:Je(Me,bt[0],Gr);break;case 2:je(Me,bt[0],bt[1],Gr);break;case 3:tt(Me,bt[0],bt[1],bt[2],Gr,0);break;default:xt(Me,bt,0,Gr,0)}return Gr}function xe(Me){for(var bt=[],zt=Me;zt.length;zt=zt[0])bt.push(zt.length);return bt}var ke={"[object Int8Array]":5120,"[object Int16Array]":5122,"[object Int32Array]":5124,"[object Uint8Array]":5121,"[object Uint8ClampedArray]":5121,"[object Uint16Array]":5123,"[object Uint32Array]":5125,"[object Float32Array]":5126,"[object Float64Array]":5121,"[object ArrayBuffer]":5121},vt=5120,ir=5122,ar=5124,vr=5121,ii=5123,pi=5125,$r=5126,di=5126,ji={int8:vt,int16:ir,int32:ar,uint8:vr,uint16:ii,uint32:pi,float:$r,float32:di},In=35048,wi=35040,On={dynamic:In,stream:wi,static:35044},qn=Ge.flatten,Fn=Ge.shape,ra=35044,la=35040,Ut=5121,wt=5126,rr=[];rr[5120]=1,rr[5122]=2,rr[5124]=4,rr[5121]=1,rr[5123]=2,rr[5125]=4,rr[5126]=4;function nr(Me){return ke[Object.prototype.toString.call(Me)]|0}function Er(Me,bt){for(var zt=0;zt<bt.length;++zt)Me[zt]=bt[zt]}function Xr(Me,bt,zt,Rr,jr,Nr,Gr){for(var mi=0,Ui=0;Ui<zt;++Ui)for(var qi=0;qi<Rr;++qi)Me[mi++]=bt[jr*Ui+Nr*qi+Gr]}function ri(Me,bt,zt,Rr){var jr=0,Nr={};function Gr(Mr){this.id=jr++,this.buffer=Me.createBuffer(),this.type=Mr,this.usage=ra,this.byteLength=0,this.dimension=1,this.dtype=Ut,this.persistentData=null,zt.profile&&(this.stats={size:0})}Gr.prototype.bind=function(){Me.bindBuffer(this.type,this.buffer)},Gr.prototype.destroy=function(){en(this)};var mi=[];function Ui(Mr,Yr){var xi=mi.pop();return xi||(xi=new Gr(Mr)),xi.bind(),Hn(xi,Yr,la,0,1,!1),xi}function qi(Mr){mi.push(Mr)}function Ei(Mr,Yr,xi){Mr.byteLength=Yr.byteLength,Me.bufferData(Mr.type,Yr,xi)}function Hn(Mr,Yr,xi,Ri,ci,an){var Zi;if(Mr.usage=xi,Array.isArray(Yr)){if(Mr.dtype=Ri||wt,Yr.length>0){var Bn;if(Array.isArray(Yr[0])){Zi=Fn(Yr);for(var hi=1,li=1;li<Zi.length;++li)hi*=Zi[li];Mr.dimension=hi,Bn=qn(Yr,Zi,Mr.dtype),Ei(Mr,Bn,xi),an?Mr.persistentData=Bn:H.freeType(Bn)}else if(typeof Yr[0]=="number"){Mr.dimension=ci;var mn=H.allocType(Mr.dtype,Yr.length);Er(mn,Yr),Ei(Mr,mn,xi),an?Mr.persistentData=mn:H.freeType(mn)}else Br(Yr[0])&&(Mr.dimension=Yr[0].length,Mr.dtype=Ri||nr(Yr[0])||wt,Bn=qn(Yr,[Yr.length,Yr[0].length],Mr.dtype),Ei(Mr,Bn,xi),an?Mr.persistentData=Bn:H.freeType(Bn))}}else if(Br(Yr))Mr.dtype=Ri||nr(Yr),Mr.dimension=ci,Ei(Mr,Yr,xi),an&&(Mr.persistentData=new Uint8Array(new Uint8Array(Yr.buffer)));else if(Vr(Yr)){Zi=Yr.shape;var Ji=Yr.stride,Vi=Yr.offset,Ni=0,pn=0,Vn=0,na=0;Zi.length===1?(Ni=Zi[0],pn=1,Vn=Ji[0],na=0):Zi.length===2&&(Ni=Zi[0],pn=Zi[1],Vn=Ji[0],na=Ji[1]),Mr.dtype=Ri||nr(Yr.data)||wt,Mr.dimension=pn;var Ki=H.allocType(Mr.dtype,Ni*pn);Xr(Ki,Yr.data,Ni,pn,Vn,na,Vi),Ei(Mr,Ki,xi),an?Mr.persistentData=Ki:H.freeType(Ki)}else Yr instanceof ArrayBuffer&&(Mr.dtype=Ut,Mr.dimension=ci,Ei(Mr,Yr,xi),an&&(Mr.persistentData=new Uint8Array(new Uint8Array(Yr))))}function en(Mr){bt.bufferCount--,Rr(Mr);var Yr=Mr.buffer;Me.deleteBuffer(Yr),Mr.buffer=null,delete Nr[Mr.id]}function Wi(Mr,Yr,xi,Ri){bt.bufferCount++;var ci=new Gr(Yr);Nr[ci.id]=ci;function an(hi){var li=ra,mn=null,Ji=0,Vi=0,Ni=1;return Array.isArray(hi)||Br(hi)||Vr(hi)||hi instanceof ArrayBuffer?mn=hi:typeof hi=="number"?Ji=hi|0:hi&&("data"in hi&&(mn=hi.data),"usage"in hi&&(li=On[hi.usage]),"type"in hi&&(Vi=ji[hi.type]),"dimension"in hi&&(Ni=hi.dimension|0),"length"in hi&&(Ji=hi.length|0)),ci.bind(),mn?Hn(ci,mn,li,Vi,Ni,Ri):(Ji&&Me.bufferData(ci.type,Ji,li),ci.dtype=Vi||Ut,ci.usage=li,ci.dimension=Ni,ci.byteLength=Ji),zt.profile&&(ci.stats.size=ci.byteLength*rr[ci.dtype]),an}function Zi(hi,li){Me.bufferSubData(ci.type,li,hi)}function Bn(hi,li){var mn=(li||0)|0,Ji;if(ci.bind(),Br(hi)||hi instanceof ArrayBuffer)Zi(hi,mn);else if(Array.isArray(hi)){if(hi.length>0){if(typeof hi[0]=="number"){var Vi=H.allocType(ci.dtype,hi.length);Er(Vi,hi),Zi(Vi,mn),H.freeType(Vi)}else if(Array.isArray(hi[0])||Br(hi[0])){Ji=Fn(hi);var Ni=qn(hi,Ji,ci.dtype);Zi(Ni,mn),H.freeType(Ni)}}}else if(Vr(hi)){Ji=hi.shape;var pn=hi.stride,Vn=0,na=0,Ki=0,kn=0;Ji.length===1?(Vn=Ji[0],na=1,Ki=pn[0],kn=0):Ji.length===2&&(Vn=Ji[0],na=Ji[1],Ki=pn[0],kn=pn[1]);var ta=Array.isArray(hi.data)?ci.dtype:nr(hi.data),oa=H.allocType(ta,Vn*na);Xr(oa,hi.data,Vn,na,Ki,kn,hi.offset),Zi(oa,mn),H.freeType(oa)}return an}return xi||an(Mr),an._reglType="buffer",an._buffer=ci,an.subdata=Bn,zt.profile&&(an.stats=ci.stats),an.destroy=function(){en(ci)},an}function si(){dt(Nr).forEach(function(Mr){Mr.buffer=Me.createBuffer(),Me.bindBuffer(Mr.type,Mr.buffer),Me.bufferData(Mr.type,Mr.persistentData||Mr.byteLength,Mr.usage)})}return zt.profile&&(bt.getTotalBufferSize=function(){var Mr=0;return Object.keys(Nr).forEach(function(Yr){Mr+=Nr[Yr].stats.size}),Mr}),{create:Wi,createStream:Ui,destroyStream:qi,clear:function(){dt(Nr).forEach(en),mi.forEach(en)},getBuffer:function(Mr){return Mr&&Mr._buffer instanceof Gr?Mr._buffer:null},restore:si,_initBuffer:Hn}}var Qr=0,Oi=0,$i=1,tn=1,fn=4,yn=4,Sn={points:Qr,point:Oi,lines:$i,line:tn,triangles:fn,triangle:yn,"line loop":2,"line strip":3,"triangle strip":5,"triangle fan":6},Ba=0,ua=1,ma=4,Wa=5120,Fa=5121,Wo=5122,da=5123,Wn=5124,Ha=5125,vo=34963,jn=35040,Mt=35044;function kr(Me,bt,zt,Rr){var jr={},Nr=0,Gr={uint8:Fa,uint16:da};bt.oes_element_index_uint&&(Gr.uint32=Ha);function mi(si){this.id=Nr++,jr[this.id]=this,this.buffer=si,this.primType=ma,this.vertCount=0,this.type=0}mi.prototype.bind=function(){this.buffer.bind()};var Ui=[];function qi(si){var Mr=Ui.pop();return Mr||(Mr=new mi(zt.create(null,vo,!0,!1)._buffer)),Hn(Mr,si,jn,-1,-1,0,0),Mr}function Ei(si){Ui.push(si)}function Hn(si,Mr,Yr,xi,Ri,ci,an){si.buffer.bind();var Zi;if(Mr){var Bn=an;!an&&(!Br(Mr)||Vr(Mr)&&!Br(Mr.data))&&(Bn=bt.oes_element_index_uint?Ha:da),zt._initBuffer(si.buffer,Mr,Yr,Bn,3)}else Me.bufferData(vo,ci,Yr),si.buffer.dtype=Zi||Fa,si.buffer.usage=Yr,si.buffer.dimension=3,si.buffer.byteLength=ci;if(Zi=an,!an){switch(si.buffer.dtype){case Fa:case Wa:Zi=Fa;break;case da:case Wo:Zi=da;break;case Ha:case Wn:Zi=Ha;break;default:}si.buffer.dtype=Zi}si.type=Zi;var hi=Ri;hi<0&&(hi=si.buffer.byteLength,Zi===da?hi>>=1:Zi===Ha&&(hi>>=2)),si.vertCount=hi;var li=xi;if(xi<0){li=ma;var mn=si.buffer.dimension;mn===1&&(li=Ba),mn===2&&(li=ua),mn===3&&(li=ma)}si.primType=li}function en(si){Rr.elementsCount--,delete jr[si.id],si.buffer.destroy(),si.buffer=null}function Wi(si,Mr){var Yr=zt.create(null,vo,!0),xi=new mi(Yr._buffer);Rr.elementsCount++;function Ri(ci){if(!ci)Yr(),xi.primType=ma,xi.vertCount=0,xi.type=Fa;else if(typeof ci=="number")Yr(ci),xi.primType=ma,xi.vertCount=ci|0,xi.type=Fa;else{var an=null,Zi=Mt,Bn=-1,hi=-1,li=0,mn=0;Array.isArray(ci)||Br(ci)||Vr(ci)?an=ci:("data"in ci&&(an=ci.data),"usage"in ci&&(Zi=On[ci.usage]),"primitive"in ci&&(Bn=Sn[ci.primitive]),"count"in ci&&(hi=ci.count|0),"type"in ci&&(mn=Gr[ci.type]),"length"in ci?li=ci.length|0:(li=hi,mn===da||mn===Wo?li*=2:(mn===Ha||mn===Wn)&&(li*=4))),Hn(xi,an,Zi,Bn,hi,li,mn)}return Ri}return Ri(si),Ri._reglType="elements",Ri._elements=xi,Ri.subdata=function(ci,an){return Yr.subdata(ci,an),Ri},Ri.destroy=function(){en(xi)},Ri}return{create:Wi,createStream:qi,destroyStream:Ei,getElements:function(si){return typeof si=="function"&&si._elements instanceof mi?si._elements:null},clear:function(){dt(jr).forEach(en)}}}var Jr=new Float32Array(1),vi=new Uint32Array(Jr.buffer),hn=5123;function An(Me){for(var bt=H.allocType(hn,Me.length),zt=0;zt<Me.length;++zt)if(isNaN(Me[zt]))bt[zt]=65535;else if(Me[zt]===1/0)bt[zt]=31744;else if(Me[zt]===-1/0)bt[zt]=64512;else{Jr[0]=Me[zt];var Rr=vi[0],jr=Rr>>>31<<15,Nr=(Rr<<1>>>24)-127,Gr=Rr>>13&1023;if(Nr<-24)bt[zt]=jr;else if(Nr<-14){var mi=-14-Nr;bt[zt]=jr+(Gr+1024>>mi)}else Nr>15?bt[zt]=jr+31744:bt[zt]=jr+(Nr+15<<10)+Gr}return bt}function Mn(Me){return Array.isArray(Me)||Br(Me)}var Li=34467,_n=3553,ya=34067,Jn=34069,Ma=6408,_o=6406,No=6407,po=6409,Lo=6410,ko=32854,Ds=32855,Fs=36194,ll=32819,ul=32820,zl=33635,us=34042,il=6402,As=34041,cl=35904,Ks=35906,zs=36193,Io=33776,ls=33777,Zl=33778,Su=33779,nc=35986,bs=35987,Rn=34798,_a=35840,Vu=35841,Ol=35842,xo=35843,Yl=36196,Ns=5121,Hl=5123,ac=5125,aa=5126,Oo=10242,qo=10243,ql=10497,Pc=33071,Do=33648,rf=10240,Uf=10241,pl=9728,Zc=9729,Kl=9984,Os=9985,yu=9986,oc=9987,Cf=33170,sc=4352,Vh=4353,Lf=4354,cs=34046,nf=3317,Vf=37440,Jl=37441,fl=37443,lc=37444,Fu=33984,Es=[Kl,yu,Os,oc],Hs=[0,po,Lo,No,Ma],Go={};Go[po]=Go[_o]=Go[il]=1,Go[As]=Go[Lo]=2,Go[No]=Go[cl]=3,Go[Ma]=Go[Ks]=4;function ps(Me){return"[object "+Me+"]"}var uc=ps("HTMLCanvasElement"),xl=ps("OffscreenCanvas"),Gu=ps("CanvasRenderingContext2D"),qs=ps("ImageBitmap"),ad=ps("HTMLImageElement"),Po=ps("HTMLVideoElement"),od=Object.keys(ke).concat([uc,xl,Gu,qs,ad,Po]),Yo=[];Yo[Ns]=1,Yo[aa]=4,Yo[zs]=2,Yo[Hl]=2,Yo[ac]=4;var Pa=[];Pa[ko]=2,Pa[Ds]=2,Pa[Fs]=2,Pa[As]=4,Pa[Io]=.5,Pa[ls]=.5,Pa[Zl]=1,Pa[Su]=1,Pa[nc]=.5,Pa[bs]=1,Pa[Rn]=1,Pa[_a]=.5,Pa[Vu]=.25,Pa[Ol]=.5,Pa[xo]=.25,Pa[Yl]=.5;function af(Me){return Array.isArray(Me)&&(Me.length===0||typeof Me[0]=="number")}function Hu(Me){if(!Array.isArray(Me))return!1;var bt=Me.length;return!(bt===0||!Mn(Me[0]))}function bl(Me){return Object.prototype.toString.call(Me)}function Gf(Me){return bl(Me)===uc}function Ic(Me){return bl(Me)===xl}function yf(Me){return bl(Me)===Gu}function Bl(Me){return bl(Me)===qs}function wh(Me){return bl(Me)===ad}function Qf(Me){return bl(Me)===Po}function _f(Me){if(!Me)return!1;var bt=bl(Me);return od.indexOf(bt)>=0?!0:af(Me)||Hu(Me)||Vr(Me)}function Yc(Me){return ke[Object.prototype.toString.call(Me)]|0}function eh(Me,bt){var zt=bt.length;switch(Me.type){case Ns:case Hl:case ac:case aa:var Rr=H.allocType(Me.type,zt);Rr.set(bt),Me.data=Rr;break;case zs:Me.data=An(bt);break;default:}}function th(Me,bt){return H.allocType(Me.type===zs?aa:Me.type,bt)}function ju(Me,bt){Me.type===zs?(Me.data=An(bt),H.freeType(bt)):Me.data=bt}function Hf(Me,bt,zt,Rr,jr,Nr){for(var Gr=Me.width,mi=Me.height,Ui=Me.channels,qi=Gr*mi*Ui,Ei=th(Me,qi),Hn=0,en=0;en<mi;++en)for(var Wi=0;Wi<Gr;++Wi)for(var si=0;si<Ui;++si)Ei[Hn++]=bt[zt*Wi+Rr*en+jr*si+Nr];ju(Me,Ei)}function cc(Me,bt,zt,Rr,jr,Nr){var Gr;if(typeof Pa[Me]!="undefined"?Gr=Pa[Me]:Gr=Go[Me]*Yo[bt],Nr&&(Gr*=6),jr){for(var mi=0,Ui=zt;Ui>=1;)mi+=Gr*Ui*Ui,Ui/=2;return mi}else return Gr*zt*Rr}function of(Me,bt,zt,Rr,jr,Nr,Gr){var mi={"don't care":sc,"dont care":sc,nice:Lf,fast:Vh},Ui={repeat:ql,clamp:Pc,mirror:Do},qi={nearest:pl,linear:Zc},Ei=e({mipmap:oc,"nearest mipmap nearest":Kl,"linear mipmap nearest":Os,"nearest mipmap linear":yu,"linear mipmap linear":oc},qi),Hn={none:0,browser:lc},en={uint8:Ns,rgba4:ll,rgb565:zl,"rgb5 a1":ul},Wi={alpha:_o,luminance:po,"luminance alpha":Lo,rgb:No,rgba:Ma,rgba4:ko,"rgb5 a1":Ds,rgb565:Fs},si={};bt.ext_srgb&&(Wi.srgb=cl,Wi.srgba=Ks),bt.oes_texture_float&&(en.float32=en.float=aa),bt.oes_texture_half_float&&(en.float16=en["half float"]=zs),bt.webgl_depth_texture&&(e(Wi,{depth:il,"depth stencil":As}),e(en,{uint16:Hl,uint32:ac,"depth stencil":us})),bt.webgl_compressed_texture_s3tc&&e(si,{"rgb s3tc dxt1":Io,"rgba s3tc dxt1":ls,"rgba s3tc dxt3":Zl,"rgba s3tc dxt5":Su}),bt.webgl_compressed_texture_atc&&e(si,{"rgb atc":nc,"rgba atc explicit alpha":bs,"rgba atc interpolated alpha":Rn}),bt.webgl_compressed_texture_pvrtc&&e(si,{"rgb pvrtc 4bppv1":_a,"rgb pvrtc 2bppv1":Vu,"rgba pvrtc 4bppv1":Ol,"rgba pvrtc 2bppv1":xo}),bt.webgl_compressed_texture_etc1&&(si["rgb etc1"]=Yl);var Mr=Array.prototype.slice.call(Me.getParameter(Li));Object.keys(si).forEach(function(ne){var we=si[ne];Mr.indexOf(we)>=0&&(Wi[ne]=we)});var Yr=Object.keys(Wi);zt.textureFormats=Yr;var xi=[];Object.keys(Wi).forEach(function(ne){var we=Wi[ne];xi[we]=ne});var Ri=[];Object.keys(en).forEach(function(ne){var we=en[ne];Ri[we]=ne});var ci=[];Object.keys(qi).forEach(function(ne){var we=qi[ne];ci[we]=ne});var an=[];Object.keys(Ei).forEach(function(ne){var we=Ei[ne];an[we]=ne});var Zi=[];Object.keys(Ui).forEach(function(ne){var we=Ui[ne];Zi[we]=ne});var Bn=Yr.reduce(function(ne,we){var Ue=Wi[we];return Ue===po||Ue===_o||Ue===po||Ue===Lo||Ue===il||Ue===As||bt.ext_srgb&&(Ue===cl||Ue===Ks)?ne[Ue]=Ue:Ue===Ds||we.indexOf("rgba")>=0?ne[Ue]=Ma:ne[Ue]=No,ne},{});function hi(){this.internalformat=Ma,this.format=Ma,this.type=Ns,this.compressed=!1,this.premultiplyAlpha=!1,this.flipY=!1,this.unpackAlignment=1,this.colorSpace=lc,this.width=0,this.height=0,this.channels=0}function li(ne,we){ne.internalformat=we.internalformat,ne.format=we.format,ne.type=we.type,ne.compressed=we.compressed,ne.premultiplyAlpha=we.premultiplyAlpha,ne.flipY=we.flipY,ne.unpackAlignment=we.unpackAlignment,ne.colorSpace=we.colorSpace,ne.width=we.width,ne.height=we.height,ne.channels=we.channels}function mn(ne,we){if(!(typeof we!="object"||!we)){if("premultiplyAlpha"in we&&(ne.premultiplyAlpha=we.premultiplyAlpha),"flipY"in we&&(ne.flipY=we.flipY),"alignment"in we&&(ne.unpackAlignment=we.alignment),"colorSpace"in we&&(ne.colorSpace=Hn[we.colorSpace]),"type"in we){var Ue=we.type;ne.type=en[Ue]}var ft=ne.width,Xt=ne.height,hr=ne.channels,qt=!1;"shape"in we?(ft=we.shape[0],Xt=we.shape[1],we.shape.length===3&&(hr=we.shape[2],qt=!0)):("radius"in we&&(ft=Xt=we.radius),"width"in we&&(ft=we.width),"height"in we&&(Xt=we.height),"channels"in we&&(hr=we.channels,qt=!0)),ne.width=ft|0,ne.height=Xt|0,ne.channels=hr|0;var Ve=!1;if("format"in we){var Qe=we.format,at=ne.internalformat=Wi[Qe];ne.format=Bn[at],Qe in en&&("type"in we||(ne.type=en[Qe])),Qe in si&&(ne.compressed=!0),Ve=!0}!qt&&Ve?ne.channels=Go[ne.format]:qt&&!Ve&&ne.channels!==Hs[ne.format]&&(ne.format=ne.internalformat=Hs[ne.channels])}}function Ji(ne){Me.pixelStorei(Vf,ne.flipY),Me.pixelStorei(Jl,ne.premultiplyAlpha),Me.pixelStorei(fl,ne.colorSpace),Me.pixelStorei(nf,ne.unpackAlignment)}function Vi(){hi.call(this),this.xOffset=0,this.yOffset=0,this.data=null,this.needsFree=!1,this.element=null,this.needsCopy=!1}function Ni(ne,we){var Ue=null;if(_f(we)?Ue=we:we&&(mn(ne,we),"x"in we&&(ne.xOffset=we.x|0),"y"in we&&(ne.yOffset=we.y|0),_f(we.data)&&(Ue=we.data)),we.copy){var ft=jr.viewportWidth,Xt=jr.viewportHeight;ne.width=ne.width||ft-ne.xOffset,ne.height=ne.height||Xt-ne.yOffset,ne.needsCopy=!0}else if(!Ue)ne.width=ne.width||1,ne.height=ne.height||1,ne.channels=ne.channels||4;else if(Br(Ue))ne.channels=ne.channels||4,ne.data=Ue,!("type"in we)&&ne.type===Ns&&(ne.type=Yc(Ue));else if(af(Ue))ne.channels=ne.channels||4,eh(ne,Ue),ne.alignment=1,ne.needsFree=!0;else if(Vr(Ue)){var hr=Ue.data;!Array.isArray(hr)&&ne.type===Ns&&(ne.type=Yc(hr));var qt=Ue.shape,Ve=Ue.stride,Qe,at,Ct,Ot,Rt,Bt;qt.length===3?(Ct=qt[2],Bt=Ve[2]):(Ct=1,Bt=1),Qe=qt[0],at=qt[1],Ot=Ve[0],Rt=Ve[1],ne.alignment=1,ne.width=Qe,ne.height=at,ne.channels=Ct,ne.format=ne.internalformat=Hs[Ct],ne.needsFree=!0,Hf(ne,hr,Ot,Rt,Bt,Ue.offset)}else if(Gf(Ue)||Ic(Ue)||yf(Ue))Gf(Ue)||Ic(Ue)?ne.element=Ue:ne.element=Ue.canvas,ne.width=ne.element.width,ne.height=ne.element.height,ne.channels=4;else if(Bl(Ue))ne.element=Ue,ne.width=Ue.width,ne.height=Ue.height,ne.channels=4;else if(wh(Ue))ne.element=Ue,ne.width=Ue.naturalWidth,ne.height=Ue.naturalHeight,ne.channels=4;else if(Qf(Ue))ne.element=Ue,ne.width=Ue.videoWidth,ne.height=Ue.videoHeight,ne.channels=4;else if(Hu(Ue)){var Dt=ne.width||Ue[0].length,yt=ne.height||Ue.length,Pt=ne.channels;Mn(Ue[0][0])?Pt=Pt||Ue[0][0].length:Pt=Pt||1;for(var ht=Ge.shape(Ue),ur=1,br=0;br<ht.length;++br)ur*=ht[br];var Ur=th(ne,ur);Ge.flatten(Ue,ht,"",Ur),ju(ne,Ur),ne.alignment=1,ne.width=Dt,ne.height=yt,ne.channels=Pt,ne.format=ne.internalformat=Hs[Pt],ne.needsFree=!0}ne.type===aa||ne.type}function pn(ne,we,Ue){var ft=ne.element,Xt=ne.data,hr=ne.internalformat,qt=ne.format,Ve=ne.type,Qe=ne.width,at=ne.height;Ji(ne),ft?Me.texImage2D(we,Ue,qt,qt,Ve,ft):ne.compressed?Me.compressedTexImage2D(we,Ue,hr,Qe,at,0,Xt):ne.needsCopy?(Rr(),Me.copyTexImage2D(we,Ue,qt,ne.xOffset,ne.yOffset,Qe,at,0)):Me.texImage2D(we,Ue,qt,Qe,at,0,qt,Ve,Xt||null)}function Vn(ne,we,Ue,ft,Xt){var hr=ne.element,qt=ne.data,Ve=ne.internalformat,Qe=ne.format,at=ne.type,Ct=ne.width,Ot=ne.height;Ji(ne),hr?Me.texSubImage2D(we,Xt,Ue,ft,Qe,at,hr):ne.compressed?Me.compressedTexSubImage2D(we,Xt,Ue,ft,Ve,Ct,Ot,qt):ne.needsCopy?(Rr(),Me.copyTexSubImage2D(we,Xt,Ue,ft,ne.xOffset,ne.yOffset,Ct,Ot)):Me.texSubImage2D(we,Xt,Ue,ft,Ct,Ot,Qe,at,qt)}var na=[];function Ki(){return na.pop()||new Vi}function kn(ne){ne.needsFree&&H.freeType(ne.data),Vi.call(ne),na.push(ne)}function ta(){hi.call(this),this.genMipmaps=!1,this.mipmapHint=sc,this.mipmask=0,this.images=Array(16)}function oa(ne,we,Ue){var ft=ne.images[0]=Ki();ne.mipmask=1,ft.width=ne.width=we,ft.height=ne.height=Ue,ft.channels=ne.channels=4}function ba(ne,we){var Ue=null;if(_f(we))Ue=ne.images[0]=Ki(),li(Ue,ne),Ni(Ue,we),ne.mipmask=1;else if(mn(ne,we),Array.isArray(we.mipmap))for(var ft=we.mipmap,Xt=0;Xt<ft.length;++Xt)Ue=ne.images[Xt]=Ki(),li(Ue,ne),Ue.width>>=Xt,Ue.height>>=Xt,Ni(Ue,ft[Xt]),ne.mipmask|=1<<Xt;else Ue=ne.images[0]=Ki(),li(Ue,ne),Ni(Ue,we),ne.mipmask=1;li(ne,ne.images[0]),ne.compressed&&(ne.internalformat===Io||ne.internalformat===ls||ne.internalformat===Zl||ne.internalformat)}function is(ne,we){for(var Ue=ne.images,ft=0;ft<Ue.length;++ft){if(!Ue[ft])return;pn(Ue[ft],we,ft)}}var Zs=[];function Va(){var ne=Zs.pop()||new ta;hi.call(ne),ne.mipmask=0;for(var we=0;we<16;++we)ne.images[we]=null;return ne}function Ml(ne){for(var we=ne.images,Ue=0;Ue<we.length;++Ue)we[Ue]&&kn(we[Ue]),we[Ue]=null;Zs.push(ne)}function zo(){this.minFilter=pl,this.magFilter=pl,this.wrapS=Pc,this.wrapT=Pc,this.anisotropic=1,this.genMipmaps=!1,this.mipmapHint=sc}function Qs(ne,we){if("min"in we){var Ue=we.min;ne.minFilter=Ei[Ue],Es.indexOf(ne.minFilter)>=0&&!("faces"in we)&&(ne.genMipmaps=!0)}if("mag"in we){var ft=we.mag;ne.magFilter=qi[ft]}var Xt=ne.wrapS,hr=ne.wrapT;if("wrap"in we){var qt=we.wrap;typeof qt=="string"?Xt=hr=Ui[qt]:Array.isArray(qt)&&(Xt=Ui[qt[0]],hr=Ui[qt[1]])}else{if("wrapS"in we){var Ve=we.wrapS;Xt=Ui[Ve]}if("wrapT"in we){var Qe=we.wrapT;hr=Ui[Qe]}}if(ne.wrapS=Xt,ne.wrapT=hr,"anisotropic"in we){var at=we.anisotropic;ne.anisotropic=we.anisotropic}if("mipmap"in we){var Ct=!1;switch(typeof we.mipmap){case"string":ne.mipmapHint=mi[we.mipmap],ne.genMipmaps=!0,Ct=!0;break;case"boolean":Ct=ne.genMipmaps=we.mipmap;break;case"object":ne.genMipmaps=!1,Ct=!0;break;default:}Ct&&!("min"in we)&&(ne.minFilter=Kl)}}function al(ne,we){Me.texParameteri(we,Uf,ne.minFilter),Me.texParameteri(we,rf,ne.magFilter),Me.texParameteri(we,Oo,ne.wrapS),Me.texParameteri(we,qo,ne.wrapT),bt.ext_texture_filter_anisotropic&&Me.texParameteri(we,cs,ne.anisotropic),ne.genMipmaps&&(Me.hint(Cf,ne.mipmapHint),Me.generateMipmap(we))}var Vl=0,ss={},Vs=zt.maxTextureUnits,Ys=Array(Vs).map(function(){return null});function wa(ne){hi.call(this),this.mipmask=0,this.internalformat=Ma,this.id=Vl++,this.refCount=1,this.target=ne,this.texture=Me.createTexture(),this.unit=-1,this.bindCount=0,this.texInfo=new zo,Gr.profile&&(this.stats={size:0})}function ol(ne){Me.activeTexture(Fu),Me.bindTexture(ne.target,ne.texture)}function io(){var ne=Ys[0];ne?Me.bindTexture(ne.target,ne.texture):Me.bindTexture(_n,null)}function Y(ne){var we=ne.texture,Ue=ne.unit,ft=ne.target;Ue>=0&&(Me.activeTexture(Fu+Ue),Me.bindTexture(ft,null),Ys[Ue]=null),Me.deleteTexture(we),ne.texture=null,ne.params=null,ne.pixels=null,ne.refCount=0,delete ss[ne.id],Nr.textureCount--}e(wa.prototype,{bind:function(){var ne=this;ne.bindCount+=1;var we=ne.unit;if(we<0){for(var Ue=0;Ue<Vs;++Ue){var ft=Ys[Ue];if(ft){if(ft.bindCount>0)continue;ft.unit=-1}Ys[Ue]=ne,we=Ue;break}we>=Vs,Gr.profile&&Nr.maxTextureUnits<we+1&&(Nr.maxTextureUnits=we+1),ne.unit=we,Me.activeTexture(Fu+we),Me.bindTexture(ne.target,ne.texture)}return we},unbind:function(){this.bindCount-=1},decRef:function(){--this.refCount<=0&&Y(this)}});function D(ne,we){var Ue=new wa(_n);ss[Ue.id]=Ue,Nr.textureCount++;function ft(qt,Ve){var Qe=Ue.texInfo;zo.call(Qe);var at=Va();return typeof qt=="number"?typeof Ve=="number"?oa(at,qt|0,Ve|0):oa(at,qt|0,qt|0):qt?(Qs(Qe,qt),ba(at,qt)):oa(at,1,1),Qe.genMipmaps&&(at.mipmask=(at.width<<1)-1),Ue.mipmask=at.mipmask,li(Ue,at),Ue.internalformat=at.internalformat,ft.width=at.width,ft.height=at.height,ol(Ue),is(at,_n),al(Qe,_n),io(),Ml(at),Gr.profile&&(Ue.stats.size=cc(Ue.internalformat,Ue.type,at.width,at.height,Qe.genMipmaps,!1)),ft.format=xi[Ue.internalformat],ft.type=Ri[Ue.type],ft.mag=ci[Qe.magFilter],ft.min=an[Qe.minFilter],ft.wrapS=Zi[Qe.wrapS],ft.wrapT=Zi[Qe.wrapT],ft}function Xt(qt,Ve,Qe,at){var Ct=Ve|0,Ot=Qe|0,Rt=at|0,Bt=Ki();return li(Bt,Ue),Bt.width=0,Bt.height=0,Ni(Bt,qt),Bt.width=Bt.width||(Ue.width>>Rt)-Ct,Bt.height=Bt.height||(Ue.height>>Rt)-Ot,ol(Ue),Vn(Bt,_n,Ct,Ot,Rt),io(),kn(Bt),ft}function hr(qt,Ve){var Qe=qt|0,at=Ve|0||Qe;if(Qe===Ue.width&&at===Ue.height)return ft;ft.width=Ue.width=Qe,ft.height=Ue.height=at,ol(Ue);for(var Ct=0;Ue.mipmask>>Ct;++Ct){var Ot=Qe>>Ct,Rt=at>>Ct;if(!Ot||!Rt)break;Me.texImage2D(_n,Ct,Ue.format,Ot,Rt,0,Ue.format,Ue.type,null)}return io(),Gr.profile&&(Ue.stats.size=cc(Ue.internalformat,Ue.type,Qe,at,!1,!1)),ft}return ft(ne,we),ft.subimage=Xt,ft.resize=hr,ft._reglType="texture2d",ft._texture=Ue,Gr.profile&&(ft.stats=Ue.stats),ft.destroy=function(){Ue.decRef()},ft}function J(ne,we,Ue,ft,Xt,hr){var qt=new wa(ya);ss[qt.id]=qt,Nr.cubeCount++;var Ve=new Array(6);function Qe(Ot,Rt,Bt,Dt,yt,Pt){var ht,ur=qt.texInfo;for(zo.call(ur),ht=0;ht<6;++ht)Ve[ht]=Va();if(typeof Ot=="number"||!Ot){var br=Ot|0||1;for(ht=0;ht<6;++ht)oa(Ve[ht],br,br)}else if(typeof Ot=="object")if(Rt)ba(Ve[0],Ot),ba(Ve[1],Rt),ba(Ve[2],Bt),ba(Ve[3],Dt),ba(Ve[4],yt),ba(Ve[5],Pt);else if(Qs(ur,Ot),mn(qt,Ot),"faces"in Ot){var Ur=Ot.faces;for(ht=0;ht<6;++ht)li(Ve[ht],qt),ba(Ve[ht],Ur[ht])}else for(ht=0;ht<6;++ht)ba(Ve[ht],Ot);for(li(qt,Ve[0]),ur.genMipmaps?qt.mipmask=(Ve[0].width<<1)-1:qt.mipmask=Ve[0].mipmask,qt.internalformat=Ve[0].internalformat,Qe.width=Ve[0].width,Qe.height=Ve[0].height,ol(qt),ht=0;ht<6;++ht)is(Ve[ht],Jn+ht);for(al(ur,ya),io(),Gr.profile&&(qt.stats.size=cc(qt.internalformat,qt.type,Qe.width,Qe.height,ur.genMipmaps,!0)),Qe.format=xi[qt.internalformat],Qe.type=Ri[qt.type],Qe.mag=ci[ur.magFilter],Qe.min=an[ur.minFilter],Qe.wrapS=Zi[ur.wrapS],Qe.wrapT=Zi[ur.wrapT],ht=0;ht<6;++ht)Ml(Ve[ht]);return Qe}function at(Ot,Rt,Bt,Dt,yt){var Pt=Bt|0,ht=Dt|0,ur=yt|0,br=Ki();return li(br,qt),br.width=0,br.height=0,Ni(br,Rt),br.width=br.width||(qt.width>>ur)-Pt,br.height=br.height||(qt.height>>ur)-ht,ol(qt),Vn(br,Jn+Ot,Pt,ht,ur),io(),kn(br),Qe}function Ct(Ot){var Rt=Ot|0;if(Rt!==qt.width){Qe.width=qt.width=Rt,Qe.height=qt.height=Rt,ol(qt);for(var Bt=0;Bt<6;++Bt)for(var Dt=0;qt.mipmask>>Dt;++Dt)Me.texImage2D(Jn+Bt,Dt,qt.format,Rt>>Dt,Rt>>Dt,0,qt.format,qt.type,null);return io(),Gr.profile&&(qt.stats.size=cc(qt.internalformat,qt.type,Qe.width,Qe.height,!1,!0)),Qe}}return Qe(ne,we,Ue,ft,Xt,hr),Qe.subimage=at,Qe.resize=Ct,Qe._reglType="textureCube",Qe._texture=qt,Gr.profile&&(Qe.stats=qt.stats),Qe.destroy=function(){qt.decRef()},Qe}function q(){for(var ne=0;ne<Vs;++ne)Me.activeTexture(Fu+ne),Me.bindTexture(_n,null),Ys[ne]=null;dt(ss).forEach(Y),Nr.cubeCount=0,Nr.textureCount=0}Gr.profile&&(Nr.getTotalTextureSize=function(){var ne=0;return Object.keys(ss).forEach(function(we){ne+=ss[we].stats.size}),ne});function K(){for(var ne=0;ne<Vs;++ne){var we=Ys[ne];we&&(we.bindCount=0,we.unit=-1,Ys[ne]=null)}dt(ss).forEach(function(Ue){Ue.texture=Me.createTexture(),Me.bindTexture(Ue.target,Ue.texture);for(var ft=0;ft<32;++ft)if((Ue.mipmask&1<<ft)!==0)if(Ue.target===_n)Me.texImage2D(_n,ft,Ue.internalformat,Ue.width>>ft,Ue.height>>ft,0,Ue.internalformat,Ue.type,null);else for(var Xt=0;Xt<6;++Xt)Me.texImage2D(Jn+Xt,ft,Ue.internalformat,Ue.width>>ft,Ue.height>>ft,0,Ue.internalformat,Ue.type,null);al(Ue.texInfo,Ue.target)})}function de(){for(var ne=0;ne<Vs;++ne){var we=Ys[ne];we&&(we.bindCount=0,we.unit=-1,Ys[ne]=null),Me.activeTexture(Fu+ne),Me.bindTexture(_n,null),Me.bindTexture(ya,null)}}return{create2D:D,createCube:J,clear:q,getTexture:function(ne){return null},restore:K,refresh:de}}var Nl=36161,Kc=32854,Rc=32855,gs=36194,jf=33189,Gh=36168,rh=34041,sf=35907,Th=34836,Mu=34842,ih=34843,js=[];js[Kc]=2,js[Rc]=2,js[gs]=2,js[jf]=2,js[Gh]=1,js[rh]=4,js[sf]=4,js[Th]=16,js[Mu]=8,js[ih]=6;function Eu(Me,bt,zt){return js[Me]*bt*zt}var Dc=function(Me,bt,zt,Rr,jr){var Nr={rgba4:Kc,rgb565:gs,"rgb5 a1":Rc,depth:jf,stencil:Gh,"depth stencil":rh};bt.ext_srgb&&(Nr.srgba=sf),bt.ext_color_buffer_half_float&&(Nr.rgba16f=Mu,Nr.rgb16f=ih),bt.webgl_color_buffer_float&&(Nr.rgba32f=Th);var Gr=[];Object.keys(Nr).forEach(function(Wi){var si=Nr[Wi];Gr[si]=Wi});var mi=0,Ui={};function qi(Wi){this.id=mi++,this.refCount=1,this.renderbuffer=Wi,this.format=Kc,this.width=0,this.height=0,jr.profile&&(this.stats={size:0})}qi.prototype.decRef=function(){--this.refCount<=0&&Ei(this)};function Ei(Wi){var si=Wi.renderbuffer;Me.bindRenderbuffer(Nl,null),Me.deleteRenderbuffer(si),Wi.renderbuffer=null,Wi.refCount=0,delete Ui[Wi.id],Rr.renderbufferCount--}function Hn(Wi,si){var Mr=new qi(Me.createRenderbuffer());Ui[Mr.id]=Mr,Rr.renderbufferCount++;function Yr(Ri,ci){var an=0,Zi=0,Bn=Kc;if(typeof Ri=="object"&&Ri){var hi=Ri;if("shape"in hi){var li=hi.shape;an=li[0]|0,Zi=li[1]|0}else"radius"in hi&&(an=Zi=hi.radius|0),"width"in hi&&(an=hi.width|0),"height"in hi&&(Zi=hi.height|0);"format"in hi&&(Bn=Nr[hi.format])}else typeof Ri=="number"?(an=Ri|0,typeof ci=="number"?Zi=ci|0:Zi=an):Ri||(an=Zi=1);if(!(an===Mr.width&&Zi===Mr.height&&Bn===Mr.format))return Yr.width=Mr.width=an,Yr.height=Mr.height=Zi,Mr.format=Bn,Me.bindRenderbuffer(Nl,Mr.renderbuffer),Me.renderbufferStorage(Nl,Bn,an,Zi),jr.profile&&(Mr.stats.size=Eu(Mr.format,Mr.width,Mr.height)),Yr.format=Gr[Mr.format],Yr}function xi(Ri,ci){var an=Ri|0,Zi=ci|0||an;return an===Mr.width&&Zi===Mr.height||(Yr.width=Mr.width=an,Yr.height=Mr.height=Zi,Me.bindRenderbuffer(Nl,Mr.renderbuffer),Me.renderbufferStorage(Nl,Mr.format,an,Zi),jr.profile&&(Mr.stats.size=Eu(Mr.format,Mr.width,Mr.height))),Yr}return Yr(Wi,si),Yr.resize=xi,Yr._reglType="renderbuffer",Yr._renderbuffer=Mr,jr.profile&&(Yr.stats=Mr.stats),Yr.destroy=function(){Mr.decRef()},Yr}jr.profile&&(Rr.getTotalRenderbufferSize=function(){var Wi=0;return Object.keys(Ui).forEach(function(si){Wi+=Ui[si].stats.size}),Wi});function en(){dt(Ui).forEach(function(Wi){Wi.renderbuffer=Me.createRenderbuffer(),Me.bindRenderbuffer(Nl,Wi.renderbuffer),Me.renderbufferStorage(Nl,Wi.format,Wi.width,Wi.height)}),Me.bindRenderbuffer(Nl,null)}return{create:Hn,clear:function(){dt(Ui).forEach(Ei)},restore:en}},ks=36160,bc=36161,hu=3553,_u=34069,nl=36064,nh=36096,Ah=36128,zu=33306,Fc=36053,wc=36193,bd=5121,xf=5126,Pf=6407,Ou=6408,bf=[];bf[Ou]=4,bf[Pf]=3;var jl=[];jl[bd]=1,jl[xf]=4,jl[wc]=2;function lf(Me,bt,zt,Rr,jr,Nr){var Gr={cur:null,next:null,dirty:!1,setFBO:null},mi=["rgba"],Ui=["rgba4","rgb565","rgb5 a1"];bt.ext_srgb&&Ui.push("srgba"),bt.ext_color_buffer_half_float&&Ui.push("rgba16f","rgb16f"),bt.webgl_color_buffer_float&&Ui.push("rgba32f");var qi=["uint8"];bt.oes_texture_half_float&&qi.push("half float","float16"),bt.oes_texture_float&&qi.push("float","float32");function Ei(Vi,Ni,pn){this.target=Vi,this.texture=Ni,this.renderbuffer=pn;var Vn=0,na=0;Ni?(Vn=Ni.width,na=Ni.height):pn&&(Vn=pn.width,na=pn.height),this.width=Vn,this.height=na}function Hn(Vi){Vi&&(Vi.texture&&Vi.texture._texture.decRef(),Vi.renderbuffer&&Vi.renderbuffer._renderbuffer.decRef())}function en(Vi,Ni,pn){if(Vi)if(Vi.texture){var Vn=Vi.texture._texture,na=Math.max(1,Vn.width),Ki=Math.max(1,Vn.height);Vn.refCount+=1}else{var kn=Vi.renderbuffer._renderbuffer;kn.refCount+=1}}function Wi(Vi,Ni){Ni&&(Ni.texture?Me.framebufferTexture2D(ks,Vi,Ni.target,Ni.texture._texture.texture,0):Me.framebufferRenderbuffer(ks,Vi,bc,Ni.renderbuffer._renderbuffer.renderbuffer))}function si(Vi){var Ni=hu,pn=null,Vn=null,na=Vi;typeof Vi=="object"&&(na=Vi.data,"target"in Vi&&(Ni=Vi.target|0));var Ki=na._reglType;return Ki==="texture2d"||Ki==="textureCube"?pn=na:Ki==="renderbuffer"&&(Vn=na,Ni=bc),new Ei(Ni,pn,Vn)}function Mr(Vi,Ni,pn,Vn,na){if(pn){var Ki=Rr.create2D({width:Vi,height:Ni,format:Vn,type:na});return Ki._texture.refCount=0,new Ei(hu,Ki,null)}else{var kn=jr.create({width:Vi,height:Ni,format:Vn});return kn._renderbuffer.refCount=0,new Ei(bc,null,kn)}}function Yr(Vi){return Vi&&(Vi.texture||Vi.renderbuffer)}function xi(Vi,Ni,pn){Vi&&(Vi.texture?Vi.texture.resize(Ni,pn):Vi.renderbuffer&&Vi.renderbuffer.resize(Ni,pn),Vi.width=Ni,Vi.height=pn)}var Ri=0,ci={};function an(){this.id=Ri++,ci[this.id]=this,this.framebuffer=Me.createFramebuffer(),this.width=0,this.height=0,this.colorAttachments=[],this.depthAttachment=null,this.stencilAttachment=null,this.depthStencilAttachment=null}function Zi(Vi){Vi.colorAttachments.forEach(Hn),Hn(Vi.depthAttachment),Hn(Vi.stencilAttachment),Hn(Vi.depthStencilAttachment)}function Bn(Vi){var Ni=Vi.framebuffer;Me.deleteFramebuffer(Ni),Vi.framebuffer=null,Nr.framebufferCount--,delete ci[Vi.id]}function hi(Vi){var Ni;Me.bindFramebuffer(ks,Vi.framebuffer);var pn=Vi.colorAttachments;for(Ni=0;Ni<pn.length;++Ni)Wi(nl+Ni,pn[Ni]);for(Ni=pn.length;Ni<zt.maxColorAttachments;++Ni)Me.framebufferTexture2D(ks,nl+Ni,hu,null,0);Me.framebufferTexture2D(ks,zu,hu,null,0),Me.framebufferTexture2D(ks,nh,hu,null,0),Me.framebufferTexture2D(ks,Ah,hu,null,0),Wi(nh,Vi.depthAttachment),Wi(Ah,Vi.stencilAttachment),Wi(zu,Vi.depthStencilAttachment);var Vn=Me.checkFramebufferStatus(ks);Me.isContextLost(),Me.bindFramebuffer(ks,Gr.next?Gr.next.framebuffer:null),Gr.cur=Gr.next,Me.getError()}function li(Vi,Ni){var pn=new an;Nr.framebufferCount++;function Vn(Ki,kn){var ta,oa=0,ba=0,is=!0,Zs=!0,Va=null,Ml=!0,zo="rgba",Qs="uint8",al=1,Vl=null,ss=null,Vs=null,Ys=!1;if(typeof Ki=="number")oa=Ki|0,ba=kn|0||oa;else if(!Ki)oa=ba=1;else{var wa=Ki;if("shape"in wa){var ol=wa.shape;oa=ol[0],ba=ol[1]}else"radius"in wa&&(oa=ba=wa.radius),"width"in wa&&(oa=wa.width),"height"in wa&&(ba=wa.height);("color"in wa||"colors"in wa)&&(Va=wa.color||wa.colors,Array.isArray(Va)),Va||("colorCount"in wa&&(al=wa.colorCount|0),"colorTexture"in wa&&(Ml=!!wa.colorTexture,zo="rgba4"),"colorType"in wa&&(Qs=wa.colorType,Ml||(Qs==="half float"||Qs==="float16"?zo="rgba16f":(Qs==="float"||Qs==="float32")&&(zo="rgba32f"))),"colorFormat"in wa&&(zo=wa.colorFormat,mi.indexOf(zo)>=0?Ml=!0:Ui.indexOf(zo)>=0&&(Ml=!1))),("depthTexture"in wa||"depthStencilTexture"in wa)&&(Ys=!!(wa.depthTexture||wa.depthStencilTexture)),"depth"in wa&&(typeof wa.depth=="boolean"?is=wa.depth:(Vl=wa.depth,Zs=!1)),"stencil"in wa&&(typeof wa.stencil=="boolean"?Zs=wa.stencil:(ss=wa.stencil,is=!1)),"depthStencil"in wa&&(typeof wa.depthStencil=="boolean"?is=Zs=wa.depthStencil:(Vs=wa.depthStencil,is=!1,Zs=!1))}var io=null,Y=null,D=null,J=null;if(Array.isArray(Va))io=Va.map(si);else if(Va)io=[si(Va)];else for(io=new Array(al),ta=0;ta<al;++ta)io[ta]=Mr(oa,ba,Ml,zo,Qs);oa=oa||io[0].width,ba=ba||io[0].height,Vl?Y=si(Vl):is&&!Zs&&(Y=Mr(oa,ba,Ys,"depth","uint32")),ss?D=si(ss):Zs&&!is&&(D=Mr(oa,ba,!1,"stencil","uint8")),Vs?J=si(Vs):!Vl&&!ss&&Zs&&is&&(J=Mr(oa,ba,Ys,"depth stencil","depth stencil"));var q=null;for(ta=0;ta<io.length;++ta)if(en(io[ta],oa,ba),io[ta]&&io[ta].texture){var K=bf[io[ta].texture._texture.format]*jl[io[ta].texture._texture.type];q===null&&(q=K)}return en(Y,oa,ba),en(D,oa,ba),en(J,oa,ba),Zi(pn),pn.width=oa,pn.height=ba,pn.colorAttachments=io,pn.depthAttachment=Y,pn.stencilAttachment=D,pn.depthStencilAttachment=J,Vn.color=io.map(Yr),Vn.depth=Yr(Y),Vn.stencil=Yr(D),Vn.depthStencil=Yr(J),Vn.width=pn.width,Vn.height=pn.height,hi(pn),Vn}function na(Ki,kn){var ta=Math.max(Ki|0,1),oa=Math.max(kn|0||ta,1);if(ta===pn.width&&oa===pn.height)return Vn;for(var ba=pn.colorAttachments,is=0;is<ba.length;++is)xi(ba[is],ta,oa);return xi(pn.depthAttachment,ta,oa),xi(pn.stencilAttachment,ta,oa),xi(pn.depthStencilAttachment,ta,oa),pn.width=Vn.width=ta,pn.height=Vn.height=oa,hi(pn),Vn}return Vn(Vi,Ni),e(Vn,{resize:na,_reglType:"framebuffer",_framebuffer:pn,destroy:function(){Bn(pn),Zi(pn)},use:function(Ki){Gr.setFBO({framebuffer:Vn},Ki)}})}function mn(Vi){var Ni=Array(6);function pn(na){var Ki,kn={color:null},ta=0,oa=null,ba="rgba",is="uint8",Zs=1;if(typeof na=="number")ta=na|0;else if(!na)ta=1;else{var Va=na;if("shape"in Va){var Ml=Va.shape;ta=Ml[0]}else"radius"in Va&&(ta=Va.radius|0),"width"in Va?(ta=Va.width|0,"height"in Va):"height"in Va&&(ta=Va.height|0);("color"in Va||"colors"in Va)&&(oa=Va.color||Va.colors,Array.isArray(oa)),oa||("colorCount"in Va&&(Zs=Va.colorCount|0),"colorType"in Va&&(is=Va.colorType),"colorFormat"in Va&&(ba=Va.colorFormat)),"depth"in Va&&(kn.depth=Va.depth),"stencil"in Va&&(kn.stencil=Va.stencil),"depthStencil"in Va&&(kn.depthStencil=Va.depthStencil)}var zo;if(oa)if(Array.isArray(oa))for(zo=[],Ki=0;Ki<oa.length;++Ki)zo[Ki]=oa[Ki];else zo=[oa];else{zo=Array(Zs);var Qs={radius:ta,format:ba,type:is};for(Ki=0;Ki<Zs;++Ki)zo[Ki]=Rr.createCube(Qs)}for(kn.color=Array(zo.length),Ki=0;Ki<zo.length;++Ki){var al=zo[Ki];ta=ta||al.width,kn.color[Ki]={target:_u,data:zo[Ki]}}for(Ki=0;Ki<6;++Ki){for(var Vl=0;Vl<zo.length;++Vl)kn.color[Vl].target=_u+Ki;Ki>0&&(kn.depth=Ni[0].depth,kn.stencil=Ni[0].stencil,kn.depthStencil=Ni[0].depthStencil),Ni[Ki]?Ni[Ki](kn):Ni[Ki]=li(kn)}return e(pn,{width:ta,height:ta,color:zo})}function Vn(na){var Ki,kn=na|0;if(kn===pn.width)return pn;var ta=pn.color;for(Ki=0;Ki<ta.length;++Ki)ta[Ki].resize(kn);for(Ki=0;Ki<6;++Ki)Ni[Ki].resize(kn);return pn.width=pn.height=kn,pn}return pn(Vi),e(pn,{faces:Ni,resize:Vn,_reglType:"framebufferCube",destroy:function(){Ni.forEach(function(na){na.destroy()})}})}function Ji(){Gr.cur=null,Gr.next=null,Gr.dirty=!0,dt(ci).forEach(function(Vi){Vi.framebuffer=Me.createFramebuffer(),hi(Vi)})}return e(Gr,{getFramebuffer:function(Vi){if(typeof Vi=="function"&&Vi._reglType==="framebuffer"){var Ni=Vi._framebuffer;if(Ni instanceof an)return Ni}return null},create:li,createCube:mn,clear:function(){dt(ci).forEach(Bn)},restore:Ji})}var Hh=5126,If=34962,Cs=34963;function du(){this.state=0,this.x=0,this.y=0,this.z=0,this.w=0,this.buffer=null,this.size=0,this.normalized=!1,this.type=Hh,this.offset=0,this.stride=0,this.divisor=0}function ku(Me,bt,zt,Rr,jr,Nr,Gr){for(var mi=zt.maxAttributes,Ui=new Array(mi),qi=0;qi<mi;++qi)Ui[qi]=new du;var Ei=0,Hn={},en={Record:du,scope:{},state:Ui,currentVAO:null,targetVAO:null,restore:si()?Zi:function(){},createVAO:Bn,getVAO:Yr,destroyBuffer:Wi,setVAO:si()?xi:Ri,clear:si()?ci:function(){}};function Wi(hi){for(var li=0;li<Ui.length;++li){var mn=Ui[li];mn.buffer===hi&&(Me.disableVertexAttribArray(li),mn.buffer=null)}}function si(){return bt.oes_vertex_array_object}function Mr(){return bt.angle_instanced_arrays}function Yr(hi){return typeof hi=="function"&&hi._vao?hi._vao:null}function xi(hi){if(hi!==en.currentVAO){var li=si();hi?li.bindVertexArrayOES(hi.vao):li.bindVertexArrayOES(null),en.currentVAO=hi}}function Ri(hi){if(hi!==en.currentVAO){if(hi)hi.bindAttrs();else{for(var li=Mr(),mn=0;mn<Ui.length;++mn){var Ji=Ui[mn];Ji.buffer?(Me.enableVertexAttribArray(mn),Ji.buffer.bind(),Me.vertexAttribPointer(mn,Ji.size,Ji.type,Ji.normalized,Ji.stride,Ji.offfset),li&&Ji.divisor&&li.vertexAttribDivisorANGLE(mn,Ji.divisor)):(Me.disableVertexAttribArray(mn),Me.vertexAttrib4f(mn,Ji.x,Ji.y,Ji.z,Ji.w))}Gr.elements?Me.bindBuffer(Cs,Gr.elements.buffer.buffer):Me.bindBuffer(Cs,null)}en.currentVAO=hi}}function ci(){dt(Hn).forEach(function(hi){hi.destroy()})}function an(){this.id=++Ei,this.attributes=[],this.elements=null,this.ownsElements=!1,this.count=0,this.offset=0,this.instances=-1,this.primitive=4;var hi=si();hi?this.vao=hi.createVertexArrayOES():this.vao=null,Hn[this.id]=this,this.buffers=[]}an.prototype.bindAttrs=function(){for(var hi=Mr(),li=this.attributes,mn=0;mn<li.length;++mn){var Ji=li[mn];Ji.buffer?(Me.enableVertexAttribArray(mn),Me.bindBuffer(If,Ji.buffer.buffer),Me.vertexAttribPointer(mn,Ji.size,Ji.type,Ji.normalized,Ji.stride,Ji.offset),hi&&Ji.divisor&&hi.vertexAttribDivisorANGLE(mn,Ji.divisor)):(Me.disableVertexAttribArray(mn),Me.vertexAttrib4f(mn,Ji.x,Ji.y,Ji.z,Ji.w))}for(var Vi=li.length;Vi<mi;++Vi)Me.disableVertexAttribArray(Vi);var Ni=Nr.getElements(this.elements);Ni?Me.bindBuffer(Cs,Ni.buffer.buffer):Me.bindBuffer(Cs,null)},an.prototype.refresh=function(){var hi=si();hi&&(hi.bindVertexArrayOES(this.vao),this.bindAttrs(),en.currentVAO=null,hi.bindVertexArrayOES(null))},an.prototype.destroy=function(){if(this.vao){var hi=si();this===en.currentVAO&&(en.currentVAO=null,hi.bindVertexArrayOES(null)),hi.deleteVertexArrayOES(this.vao),this.vao=null}this.ownsElements&&(this.elements.destroy(),this.elements=null,this.ownsElements=!1),Hn[this.id]&&(delete Hn[this.id],Rr.vaoCount-=1)};function Zi(){var hi=si();hi&&dt(Hn).forEach(function(li){li.refresh()})}function Bn(hi){var li=new an;Rr.vaoCount+=1;function mn(Ji){var Vi;if(Array.isArray(Ji))Vi=Ji,li.elements&&li.ownsElements&&li.elements.destroy(),li.elements=null,li.ownsElements=!1,li.offset=0,li.count=0,li.instances=-1,li.primitive=4;else{if(Ji.elements){var Ni=Ji.elements;li.ownsElements?typeof Ni=="function"&&Ni._reglType==="elements"?(li.elements.destroy(),li.ownsElements=!1):(li.elements(Ni),li.ownsElements=!1):Nr.getElements(Ji.elements)?(li.elements=Ji.elements,li.ownsElements=!1):(li.elements=Nr.create(Ji.elements),li.ownsElements=!0)}else li.elements=null,li.ownsElements=!1;Vi=Ji.attributes,li.offset=0,li.count=-1,li.instances=-1,li.primitive=4,li.elements&&(li.count=li.elements._elements.vertCount,li.primitive=li.elements._elements.primType),"offset"in Ji&&(li.offset=Ji.offset|0),"count"in Ji&&(li.count=Ji.count|0),"instances"in Ji&&(li.instances=Ji.instances|0),"primitive"in Ji&&(li.primitive=Sn[Ji.primitive])}var pn={},Vn=li.attributes;Vn.length=Vi.length;for(var na=0;na<Vi.length;++na){var Ki=Vi[na],kn=Vn[na]=new du,ta=Ki.data||Ki;if(Array.isArray(ta)||Br(ta)||Vr(ta)){var oa;li.buffers[na]&&(oa=li.buffers[na],Br(ta)&&oa._buffer.byteLength>=ta.byteLength?oa.subdata(ta):(oa.destroy(),li.buffers[na]=null)),li.buffers[na]||(oa=li.buffers[na]=jr.create(Ki,If,!1,!0)),kn.buffer=jr.getBuffer(oa),kn.size=kn.buffer.dimension|0,kn.normalized=!1,kn.type=kn.buffer.dtype,kn.offset=0,kn.stride=0,kn.divisor=0,kn.state=1,pn[na]=1}else jr.getBuffer(Ki)?(kn.buffer=jr.getBuffer(Ki),kn.size=kn.buffer.dimension|0,kn.normalized=!1,kn.type=kn.buffer.dtype,kn.offset=0,kn.stride=0,kn.divisor=0,kn.state=1):jr.getBuffer(Ki.buffer)?(kn.buffer=jr.getBuffer(Ki.buffer),kn.size=(+Ki.size||kn.buffer.dimension)|0,kn.normalized=!!Ki.normalized||!1,"type"in Ki?kn.type=ji[Ki.type]:kn.type=kn.buffer.dtype,kn.offset=(Ki.offset||0)|0,kn.stride=(Ki.stride||0)|0,kn.divisor=(Ki.divisor||0)|0,kn.state=1):"x"in Ki&&(kn.x=+Ki.x||0,kn.y=+Ki.y||0,kn.z=+Ki.z||0,kn.w=+Ki.w||0,kn.state=2)}for(var ba=0;ba<li.buffers.length;++ba)!pn[ba]&&li.buffers[ba]&&(li.buffers[ba].destroy(),li.buffers[ba]=null);return li.refresh(),mn}return mn.destroy=function(){for(var Ji=0;Ji<li.buffers.length;++Ji)li.buffers[Ji]&&li.buffers[Ji].destroy();li.buffers.length=0,li.ownsElements&&(li.elements.destroy(),li.elements=null,li.ownsElements=!1),li.destroy()},mn._vao=li,mn._reglType="vao",mn(hi)}return en}var Wf=35632,Us=35633,wf=35718,zc=35721;function Wu(Me,bt,zt,Rr){var jr={},Nr={};function Gr(Mr,Yr,xi,Ri){this.name=Mr,this.id=Yr,this.location=xi,this.info=Ri}function mi(Mr,Yr){for(var xi=0;xi<Mr.length;++xi)if(Mr[xi].id===Yr.id){Mr[xi].location=Yr.location;return}Mr.push(Yr)}function Ui(Mr,Yr,xi){var Ri=Mr===Wf?jr:Nr,ci=Ri[Yr];if(!ci){var an=bt.str(Yr);ci=Me.createShader(Mr),Me.shaderSource(ci,an),Me.compileShader(ci),Ri[Yr]=ci}return ci}var qi={},Ei=[],Hn=0;function en(Mr,Yr){this.id=Hn++,this.fragId=Mr,this.vertId=Yr,this.program=null,this.uniforms=[],this.attributes=[],this.refCount=1,Rr.profile&&(this.stats={uniformsCount:0,attributesCount:0})}function Wi(Mr,Yr,xi){var Ri,ci,an=Ui(Wf,Mr.fragId),Zi=Ui(Us,Mr.vertId),Bn=Mr.program=Me.createProgram();if(Me.attachShader(Bn,an),Me.attachShader(Bn,Zi),xi)for(Ri=0;Ri<xi.length;++Ri){var hi=xi[Ri];Me.bindAttribLocation(Bn,hi[0],hi[1])}Me.linkProgram(Bn);var li=Me.getProgramParameter(Bn,wf);Rr.profile&&(Mr.stats.uniformsCount=li);var mn=Mr.uniforms;for(Ri=0;Ri<li;++Ri)if(ci=Me.getActiveUniform(Bn,Ri),ci)if(ci.size>1)for(var Ji=0;Ji<ci.size;++Ji){var Vi=ci.name.replace("[0]","["+Ji+"]");mi(mn,new Gr(Vi,bt.id(Vi),Me.getUniformLocation(Bn,Vi),ci))}else mi(mn,new Gr(ci.name,bt.id(ci.name),Me.getUniformLocation(Bn,ci.name),ci));var Ni=Me.getProgramParameter(Bn,zc);Rr.profile&&(Mr.stats.attributesCount=Ni);var pn=Mr.attributes;for(Ri=0;Ri<Ni;++Ri)ci=Me.getActiveAttrib(Bn,Ri),ci&&mi(pn,new Gr(ci.name,bt.id(ci.name),Me.getAttribLocation(Bn,ci.name),ci))}Rr.profile&&(zt.getMaxUniformsCount=function(){var Mr=0;return Ei.forEach(function(Yr){Yr.stats.uniformsCount>Mr&&(Mr=Yr.stats.uniformsCount)}),Mr},zt.getMaxAttributesCount=function(){var Mr=0;return Ei.forEach(function(Yr){Yr.stats.attributesCount>Mr&&(Mr=Yr.stats.attributesCount)}),Mr});function si(){jr={},Nr={};for(var Mr=0;Mr<Ei.length;++Mr)Wi(Ei[Mr],null,Ei[Mr].attributes.map(function(Yr){return[Yr.location,Yr.name]}))}return{clear:function(){var Mr=Me.deleteShader.bind(Me);dt(jr).forEach(Mr),jr={},dt(Nr).forEach(Mr),Nr={},Ei.forEach(function(Yr){Me.deleteProgram(Yr.program)}),Ei.length=0,qi={},zt.shaderCount=0},program:function(Mr,Yr,xi,Ri){var ci=qi[Yr];ci||(ci=qi[Yr]={});var an=ci[Mr];if(an&&(an.refCount++,!Ri))return an;var Zi=new en(Yr,Mr);return zt.shaderCount++,Wi(Zi,xi,Ri),an||(ci[Mr]=Zi),Ei.push(Zi),e(Zi,{destroy:function(){if(Zi.refCount--,Zi.refCount<=0){Me.deleteProgram(Zi.program);var Bn=Ei.indexOf(Zi);Ei.splice(Bn,1),zt.shaderCount--}ci[Zi.vertId].refCount<=0&&(Me.deleteShader(Nr[Zi.vertId]),delete Nr[Zi.vertId],delete qi[Zi.fragId][Zi.vertId]),Object.keys(qi[Zi.fragId]).length||(Me.deleteShader(jr[Zi.fragId]),delete jr[Zi.fragId],delete qi[Zi.fragId])}})},restore:si,shader:Ui,frag:-1,vert:-1}}var Rf=6408,Xu=5121,uf=3333,Xf=5126;function Wl(Me,bt,zt,Rr,jr,Nr,Gr){function mi(Ei){var Hn;bt.next===null?Hn=Xu:Hn=bt.next.colorAttachments[0].texture._texture.type;var en=0,Wi=0,si=Rr.framebufferWidth,Mr=Rr.framebufferHeight,Yr=null;Br(Ei)?Yr=Ei:Ei&&(en=Ei.x|0,Wi=Ei.y|0,si=(Ei.width||Rr.framebufferWidth-en)|0,Mr=(Ei.height||Rr.framebufferHeight-Wi)|0,Yr=Ei.data||null),zt();var xi=si*Mr*4;return Yr||(Hn===Xu?Yr=new Uint8Array(xi):Hn===Xf&&(Yr=Yr||new Float32Array(xi))),Me.pixelStorei(uf,4),Me.readPixels(en,Wi,si,Mr,Rf,Hn,Yr),Yr}function Ui(Ei){var Hn;return bt.setFBO({framebuffer:Ei.framebuffer},function(){Hn=mi(Ei)}),Hn}function qi(Ei){return!Ei||!("framebuffer"in Ei)?mi(Ei):Ui(Ei)}return qi}var ah=0,Zu="";function Oc(Me){return vu(Tc(fc(Me)))}function Tc(Me){return At(Yi(Bc(Me),Me.length*8))}function wl(Me,bt){var zt=Bc(Me);zt.length>16&&(zt=Yi(zt,Me.length*8));for(var Rr=Array(16),jr=Array(16),Nr=0;Nr<16;Nr++)Rr[Nr]=zt[Nr]^909522486,jr[Nr]=zt[Nr]^1549556828;var Gr=Yi(Rr.concat(Bc(bt)),512+bt.length*8);return At(Yi(jr.concat(Gr),768))}function vu(Me){for(var bt=ah?"0123456789ABCDEF":"0123456789abcdef",zt="",Rr,jr=0;jr<Me.length;jr++)Rr=Me.charCodeAt(jr),zt+=bt.charAt(Rr>>>4&15)+bt.charAt(Rr&15);return zt}function qc(Me){for(var bt="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",zt="",Rr=Me.length,jr=0;jr<Rr;jr+=3)for(var Nr=Me.charCodeAt(jr)<<16|(jr+1<Rr?Me.charCodeAt(jr+1)<<8:0)|(jr+2<Rr?Me.charCodeAt(jr+2):0),Gr=0;Gr<4;Gr++)jr*8+Gr*6>Me.length*8?zt+=Zu:zt+=bt.charAt(Nr>>>6*(3-Gr)&63);return zt}function cf(Me,bt){var zt=bt.length,Rr=Array(),jr,Nr,Gr,mi,Ui=Array(Math.ceil(Me.length/2));for(jr=0;jr<Ui.length;jr++)Ui[jr]=Me.charCodeAt(jr*2)<<8|Me.charCodeAt(jr*2+1);for(;Ui.length>0;){for(mi=Array(),Gr=0,jr=0;jr<Ui.length;jr++)Gr=(Gr<<16)+Ui[jr],Nr=Math.floor(Gr/zt),Gr-=Nr*zt,(mi.length>0||Nr>0)&&(mi[mi.length]=Nr);Rr[Rr.length]=Gr,Ui=mi}var qi="";for(jr=Rr.length-1;jr>=0;jr--)qi+=bt.charAt(Rr[jr]);var Ei=Math.ceil(Me.length*8/(Math.log(bt.length)/Math.log(2)));for(jr=qi.length;jr<Ei;jr++)qi=bt[0]+qi;return qi}function fc(Me){for(var bt="",zt=-1,Rr,jr;++zt<Me.length;)Rr=Me.charCodeAt(zt),jr=zt+1<Me.length?Me.charCodeAt(zt+1):0,55296<=Rr&&Rr<=56319&&56320<=jr&&jr<=57343&&(Rr=65536+((Rr&1023)<<10)+(jr&1023),zt++),Rr<=127?bt+=String.fromCharCode(Rr):Rr<=2047?bt+=String.fromCharCode(192|Rr>>>6&31,128|Rr&63):Rr<=65535?bt+=String.fromCharCode(224|Rr>>>12&15,128|Rr>>>6&63,128|Rr&63):Rr<=2097151&&(bt+=String.fromCharCode(240|Rr>>>18&7,128|Rr>>>12&63,128|Rr>>>6&63,128|Rr&63));return bt}function Bc(Me){for(var bt=Array(Me.length>>2),zt=0;zt<bt.length;zt++)bt[zt]=0;for(var zt=0;zt<Me.length*8;zt+=8)bt[zt>>5]|=(Me.charCodeAt(zt/8)&255)<<24-zt%32;return bt}function At(Me){for(var bt="",zt=0;zt<Me.length*32;zt+=8)bt+=String.fromCharCode(Me[zt>>5]>>>24-zt%32&255);return bt}function Wt(Me,bt){return Me>>>bt|Me<<32-bt}function Cr(Me,bt){return Me>>>bt}function Ar(Me,bt,zt){return Me&bt^~Me&zt}function Kr(Me,bt,zt){return Me&bt^Me&zt^bt&zt}function ki(Me){return Wt(Me,2)^Wt(Me,13)^Wt(Me,22)}function Xi(Me){return Wt(Me,6)^Wt(Me,11)^Wt(Me,25)}function dn(Me){return Wt(Me,7)^Wt(Me,18)^Cr(Me,3)}function wn(Me){return Wt(Me,17)^Wt(Me,19)^Cr(Me,10)}var Nn=new Array(1116352408,1899447441,-1245643825,-373957723,961987163,1508970993,-1841331548,-1424204075,-670586216,310598401,607225278,1426881987,1925078388,-2132889090,-1680079193,-1046744716,-459576895,-272742522,264347078,604807628,770255983,1249150122,1555081692,1996064986,-1740746414,-1473132947,-1341970488,-1084653625,-958395405,-710438585,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,-2117940946,-1838011259,-1564481375,-1474664885,-1035236496,-949202525,-778901479,-694614492,-200395387,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,-2067236844,-1933114872,-1866530822,-1538233109,-1090935817,-965641998);function Yi(Me,bt){var zt=new Array(1779033703,-1150833019,1013904242,-1521486534,1359893119,-1694144372,528734635,1541459225),Rr=new Array(64),jr,Nr,Gr,mi,Ui,qi,Ei,Hn,en,Wi,si,Mr;for(Me[bt>>5]|=128<<24-bt%32,Me[(bt+64>>9<<4)+15]=bt,en=0;en<Me.length;en+=16){for(jr=zt[0],Nr=zt[1],Gr=zt[2],mi=zt[3],Ui=zt[4],qi=zt[5],Ei=zt[6],Hn=zt[7],Wi=0;Wi<64;Wi++)Wi<16?Rr[Wi]=Me[Wi+en]:Rr[Wi]=Qi(Qi(Qi(wn(Rr[Wi-2]),Rr[Wi-7]),dn(Rr[Wi-15])),Rr[Wi-16]),si=Qi(Qi(Qi(Qi(Hn,Xi(Ui)),Ar(Ui,qi,Ei)),Nn[Wi]),Rr[Wi]),Mr=Qi(ki(jr),Kr(jr,Nr,Gr)),Hn=Ei,Ei=qi,qi=Ui,Ui=Qi(mi,si),mi=Gr,Gr=Nr,Nr=jr,jr=Qi(si,Mr);zt[0]=Qi(jr,zt[0]),zt[1]=Qi(Nr,zt[1]),zt[2]=Qi(Gr,zt[2]),zt[3]=Qi(mi,zt[3]),zt[4]=Qi(Ui,zt[4]),zt[5]=Qi(qi,zt[5]),zt[6]=Qi(Ei,zt[6]),zt[7]=Qi(Hn,zt[7])}return zt}function Qi(Me,bt){var zt=(Me&65535)+(bt&65535),Rr=(Me>>16)+(bt>>16)+(zt>>16);return Rr<<16|zt&65535}function on(Me){return Array.prototype.slice.call(Me)}function Fi(Me){return on(Me).join("")}function $n(Me){var bt=Me&&Me.cache,zt=0,Rr=[],jr=[],Nr=[];function Gr(si,Mr){var Yr=Mr&&Mr.stable;if(!Yr){for(var xi=0;xi<jr.length;++xi)if(jr[xi]===si&&!Nr[xi])return Rr[xi]}var Ri="g"+zt++;return Rr.push(Ri),jr.push(si),Nr.push(Yr),Ri}function mi(){var si=[];function Mr(){si.push.apply(si,on(arguments))}var Yr=[];function xi(){var Ri="v"+zt++;return Yr.push(Ri),arguments.length>0&&(si.push(Ri,"="),si.push.apply(si,on(arguments)),si.push(";")),Ri}return e(Mr,{def:xi,toString:function(){return Fi([Yr.length>0?"var "+Yr.join(",")+";":"",Fi(si)])}})}function Ui(){var si=mi(),Mr=mi(),Yr=si.toString,xi=Mr.toString;function Ri(ci,an){Mr(ci,an,"=",si.def(ci,an),";")}return e(function(){si.apply(si,on(arguments))},{def:si.def,entry:si,exit:Mr,save:Ri,set:function(ci,an,Zi){Ri(ci,an),si(ci,an,"=",Zi,";")},toString:function(){return Yr()+xi()}})}function qi(){var si=Fi(arguments),Mr=Ui(),Yr=Ui(),xi=Mr.toString,Ri=Yr.toString;return e(Mr,{then:function(){return Mr.apply(Mr,on(arguments)),this},else:function(){return Yr.apply(Yr,on(arguments)),this},toString:function(){var ci=Ri();return ci&&(ci="else{"+ci+"}"),Fi(["if(",si,"){",xi(),"}",ci])}})}var Ei=mi(),Hn={};function en(si,Mr){var Yr=[];function xi(){var Bn="a"+Yr.length;return Yr.push(Bn),Bn}Mr=Mr||0;for(var Ri=0;Ri<Mr;++Ri)xi();var ci=Ui(),an=ci.toString,Zi=Hn[si]=e(ci,{arg:xi,toString:function(){return Fi(["function(",Yr.join(),"){",an(),"}"])}});return Zi}function Wi(){var si=['"use strict";',Ei,"return {"];Object.keys(Hn).forEach(function(Ri){si.push('"',Ri,'":',Hn[Ri].toString(),",")}),si.push("}");var Mr=Fi(si).replace(/;/g,`;
+`).replace(/}/g,`}
+`).replace(/{/g,`{
+`),Yr;if(bt&&(Yr=Oc(Mr),bt[Yr]))return bt[Yr].apply(null,jr);var xi=Function.apply(null,Rr.concat(Mr));return bt&&(bt[Yr]=xi),xi.apply(null,jr)}return{global:Ei,link:Gr,block:mi,proc:en,scope:Ui,cond:qi,compile:Wi}}var Ca="xyzw".split(""),Ra=5121,La=1,Na=2,Yn=0,Dn=1,Ka=2,bo=3,Xo=4,Ss=5,as=6,ws="dither",Ho="blend.enable",ml="blend.color",Ws="blend.equation",Ls="blend.func",va="depth.enable",no="depth.func",ys="depth.range",rs="depth.mask",$l="colorMask",Cu="cull.enable",Yu="cull.face",Nc="frontFace",pu="lineWidth",Uc="polygonOffset.enable",xu="polygonOffset.offset",Ac="sample.alpha",Ua="sample.enable",oo="sample.coverage",Vc="stencil.enable",hc="stencil.mask",Ku="stencil.func",ue="stencil.opFront",w="stencil.opBack",B="scissor.enable",Q="scissor.box",ee="viewport",le="profile",qe="framebuffer",Xe="vert",ot="frag",Tt="elements",Yt="primitive",Kt="count",xr="offset",Ir="instances",ve="vao",be="Width",De="Height",Be=qe+be,et=qe+De,We=ee+be,it=ee+De,Ft="drawingBuffer",Ht=Ft+be,tr=Ft+De,dr=[Ls,Ws,Ku,ue,w,oo,ee,Q,xu],Sr=34962,Or=34963,Wr=2884,ni=3042,Pi=3024,cn=2960,ln=2929,Cn=3089,Kn=32823,Ta=32926,fa=32928,$a=5126,Co=35664,Qa=35665,mo=35666,Bo=5124,Ps=35667,Ts=35668,wo=35669,To=35670,hl=35671,Ul=35672,Lu=35673,au=35674,Js=35675,Ql=35676,dc=35678,Tl=35680,Al=4,X=1028,se=1029,Te=2304,Ne=2305,He=32775,Ye=32776,kt=519,nt=7680,jt=0,gr=1,yr=32774,Hr=513,qr=36160,_i=36064,bi={0:0,1:1,zero:0,one:1,"src color":768,"one minus src color":769,"src alpha":770,"one minus src alpha":771,"dst color":774,"one minus dst color":775,"dst alpha":772,"one minus dst alpha":773,"constant color":32769,"one minus constant color":32770,"constant alpha":32771,"one minus constant alpha":32772,"src alpha saturate":776},Zr={never:512,less:513,"<":513,equal:514,"=":514,"==":514,"===":514,lequal:515,"<=":515,greater:516,">":516,notequal:517,"!=":517,"!==":517,gequal:518,">=":518,always:519},ai={0:0,zero:0,keep:7680,replace:7681,increment:7682,decrement:7683,"increment wrap":34055,"decrement wrap":34056,invert:5386},gi={cw:Te,ccw:Ne};function Ii(Me){return Array.isArray(Me)||Br(Me)||Vr(Me)}function Si(Me){return Me.sort(function(bt,zt){return bt===ee?-1:zt===ee?1:bt<zt?-1:1})}function ei(Me,bt,zt,Rr){this.thisDep=Me,this.contextDep=bt,this.propDep=zt,this.append=Rr}function Ln(Me){return Me&&!(Me.thisDep||Me.contextDep||Me.propDep)}function En(Me){return new ei(!1,!1,!1,Me)}function Un(Me,bt){var zt=Me.type;if(zt===Yn){var Rr=Me.data.length;return new ei(!0,Rr>=1,Rr>=2,bt)}else if(zt===Xo){var jr=Me.data;return new ei(jr.thisDep,jr.contextDep,jr.propDep,bt)}else{if(zt===Ss)return new ei(!1,!1,!1,bt);if(zt===as){for(var Nr=!1,Gr=!1,mi=!1,Ui=0;Ui<Me.data.length;++Ui){var qi=Me.data[Ui];if(qi.type===Dn)mi=!0;else if(qi.type===Ka)Gr=!0;else if(qi.type===bo)Nr=!0;else if(qi.type===Yn){Nr=!0;var Ei=qi.data;Ei>=1&&(Gr=!0),Ei>=2&&(mi=!0)}else qi.type===Xo&&(Nr=Nr||qi.data.thisDep,Gr=Gr||qi.data.contextDep,mi=mi||qi.data.propDep)}return new ei(Nr,Gr,mi,bt)}else return new ei(zt===bo,zt===Ka,zt===Dn,bt)}}var ia=new ei(!1,!1,!1,function(){});function Ea(Me,bt,zt,Rr,jr,Nr,Gr,mi,Ui,qi,Ei,Hn,en,Wi,si,Mr){var Yr=qi.Record,xi={add:32774,subtract:32778,"reverse subtract":32779};zt.ext_blend_minmax&&(xi.min=He,xi.max=Ye);var Ri=zt.angle_instanced_arrays,ci=zt.webgl_draw_buffers,an=zt.oes_vertex_array_object,Zi={dirty:!0,profile:Mr.profile},Bn={},hi=[],li={},mn={};function Ji(Ve){return Ve.replace(".","_")}function Vi(Ve,Qe,at){var Ct=Ji(Ve);hi.push(Ve),Bn[Ct]=Zi[Ct]=!!at,li[Ct]=Qe}function Ni(Ve,Qe,at){var Ct=Ji(Ve);hi.push(Ve),Array.isArray(at)?(Zi[Ct]=at.slice(),Bn[Ct]=at.slice()):Zi[Ct]=Bn[Ct]=at,mn[Ct]=Qe}function pn(Ve){return!!isNaN(Ve)}Vi(ws,Pi),Vi(Ho,ni),Ni(ml,"blendColor",[0,0,0,0]),Ni(Ws,"blendEquationSeparate",[yr,yr]),Ni(Ls,"blendFuncSeparate",[gr,jt,gr,jt]),Vi(va,ln,!0),Ni(no,"depthFunc",Hr),Ni(ys,"depthRange",[0,1]),Ni(rs,"depthMask",!0),Ni($l,$l,[!0,!0,!0,!0]),Vi(Cu,Wr),Ni(Yu,"cullFace",se),Ni(Nc,Nc,Ne),Ni(pu,pu,1),Vi(Uc,Kn),Ni(xu,"polygonOffset",[0,0]),Vi(Ac,Ta),Vi(Ua,fa),Ni(oo,"sampleCoverage",[1,!1]),Vi(Vc,cn),Ni(hc,"stencilMask",-1),Ni(Ku,"stencilFunc",[kt,0,-1]),Ni(ue,"stencilOpSeparate",[X,nt,nt,nt]),Ni(w,"stencilOpSeparate",[se,nt,nt,nt]),Vi(B,Cn),Ni(Q,"scissor",[0,0,Me.drawingBufferWidth,Me.drawingBufferHeight]),Ni(ee,ee,[0,0,Me.drawingBufferWidth,Me.drawingBufferHeight]);var Vn={gl:Me,context:en,strings:bt,next:Bn,current:Zi,draw:Hn,elements:Nr,buffer:jr,shader:Ei,attributes:qi.state,vao:qi,uniforms:Ui,framebuffer:mi,extensions:zt,timer:Wi,isBufferArgs:Ii},na={primTypes:Sn,compareFuncs:Zr,blendFuncs:bi,blendEquations:xi,stencilOps:ai,glTypes:ji,orientationType:gi};ci&&(na.backBuffer=[se],na.drawBuffer=M(Rr.maxDrawbuffers,function(Ve){return Ve===0?[0]:M(Ve,function(Qe){return _i+Qe})}));var Ki=0;function kn(){var Ve=$n({cache:si}),Qe=Ve.link,at=Ve.global;Ve.id=Ki++,Ve.batchId="0";var Ct=Qe(Vn),Ot=Ve.shared={props:"a0"};Object.keys(Vn).forEach(function(Pt){Ot[Pt]=at.def(Ct,".",Pt)});var Rt=Ve.next={},Bt=Ve.current={};Object.keys(mn).forEach(function(Pt){Array.isArray(Zi[Pt])&&(Rt[Pt]=at.def(Ot.next,".",Pt),Bt[Pt]=at.def(Ot.current,".",Pt))});var Dt=Ve.constants={};Object.keys(na).forEach(function(Pt){Dt[Pt]=at.def(JSON.stringify(na[Pt]))}),Ve.invoke=function(Pt,ht){switch(ht.type){case Yn:var ur=["this",Ot.context,Ot.props,Ve.batchId];return Pt.def(Qe(ht.data),".call(",ur.slice(0,Math.max(ht.data.length+1,4)),")");case Dn:return Pt.def(Ot.props,ht.data);case Ka:return Pt.def(Ot.context,ht.data);case bo:return Pt.def("this",ht.data);case Xo:return ht.data.append(Ve,Pt),ht.data.ref;case Ss:return ht.data.toString();case as:return ht.data.map(function(br){return Ve.invoke(Pt,br)})}},Ve.attribCache={};var yt={};return Ve.scopeAttrib=function(Pt){var ht=bt.id(Pt);if(ht in yt)return yt[ht];var ur=qi.scope[ht];ur||(ur=qi.scope[ht]=new Yr);var br=yt[ht]=Qe(ur);return br},Ve}function ta(Ve){var Qe=Ve.static,at=Ve.dynamic,Ct;if(le in Qe){var Ot=!!Qe[le];Ct=En(function(Bt,Dt){return Ot}),Ct.enable=Ot}else if(le in at){var Rt=at[le];Ct=Un(Rt,function(Bt,Dt){return Bt.invoke(Dt,Rt)})}return Ct}function oa(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic;if(qe in at){var Ot=at[qe];return Ot?(Ot=mi.getFramebuffer(Ot),En(function(Bt,Dt){var yt=Bt.link(Ot),Pt=Bt.shared;Dt.set(Pt.framebuffer,".next",yt);var ht=Pt.context;return Dt.set(ht,"."+Be,yt+".width"),Dt.set(ht,"."+et,yt+".height"),yt})):En(function(Bt,Dt){var yt=Bt.shared;Dt.set(yt.framebuffer,".next","null");var Pt=yt.context;return Dt.set(Pt,"."+Be,Pt+"."+Ht),Dt.set(Pt,"."+et,Pt+"."+tr),"null"})}else if(qe in Ct){var Rt=Ct[qe];return Un(Rt,function(Bt,Dt){var yt=Bt.invoke(Dt,Rt),Pt=Bt.shared,ht=Pt.framebuffer,ur=Dt.def(ht,".getFramebuffer(",yt,")");Dt.set(ht,".next",ur);var br=Pt.context;return Dt.set(br,"."+Be,ur+"?"+ur+".width:"+br+"."+Ht),Dt.set(br,"."+et,ur+"?"+ur+".height:"+br+"."+tr),ur})}else return null}function ba(Ve,Qe,at){var Ct=Ve.static,Ot=Ve.dynamic;function Rt(yt){if(yt in Ct){var Pt=Ct[yt],ht=!0,ur=Pt.x|0,br=Pt.y|0,Ur,Di;return"width"in Pt?Ur=Pt.width|0:ht=!1,"height"in Pt?Di=Pt.height|0:ht=!1,new ei(!ht&&Qe&&Qe.thisDep,!ht&&Qe&&Qe.contextDep,!ht&&Qe&&Qe.propDep,function(gn,rn){var Ci=gn.shared.context,Bi=Ur;"width"in Pt||(Bi=rn.def(Ci,".",Be,"-",ur));var Gi=Di;return"height"in Pt||(Gi=rn.def(Ci,".",et,"-",br)),[ur,br,Bi,Gi]})}else if(yt in Ot){var fi=Ot[yt],Ti=Un(fi,function(gn,rn){var Ci=gn.invoke(rn,fi),Bi=gn.shared.context,Gi=rn.def(Ci,".x|0"),sn=rn.def(Ci,".y|0"),zn=rn.def('"width" in ',Ci,"?",Ci,".width|0:","(",Bi,".",Be,"-",Gi,")"),Ja=rn.def('"height" in ',Ci,"?",Ci,".height|0:","(",Bi,".",et,"-",sn,")");return[Gi,sn,zn,Ja]});return Qe&&(Ti.thisDep=Ti.thisDep||Qe.thisDep,Ti.contextDep=Ti.contextDep||Qe.contextDep,Ti.propDep=Ti.propDep||Qe.propDep),Ti}else return Qe?new ei(Qe.thisDep,Qe.contextDep,Qe.propDep,function(gn,rn){var Ci=gn.shared.context;return[0,0,rn.def(Ci,".",Be),rn.def(Ci,".",et)]}):null}var Bt=Rt(ee);if(Bt){var Dt=Bt;Bt=new ei(Bt.thisDep,Bt.contextDep,Bt.propDep,function(yt,Pt){var ht=Dt.append(yt,Pt),ur=yt.shared.context;return Pt.set(ur,"."+We,ht[2]),Pt.set(ur,"."+it,ht[3]),ht})}return{viewport:Bt,scissor_box:Rt(Q)}}function is(Ve,Qe){var at=Ve.static,Ct=typeof at[ot]=="string"&&typeof at[Xe]=="string";if(Ct){if(Object.keys(Qe.dynamic).length>0)return null;var Ot=Qe.static,Rt=Object.keys(Ot);if(Rt.length>0&&typeof Ot[Rt[0]]=="number"){for(var Bt=[],Dt=0;Dt<Rt.length;++Dt)Bt.push([Ot[Rt[Dt]]|0,Rt[Dt]]);return Bt}}return null}function Zs(Ve,Qe,at){var Ct=Ve.static,Ot=Ve.dynamic;function Rt(ht){if(ht in Ct){var ur=bt.id(Ct[ht]),br=En(function(){return ur});return br.id=ur,br}else if(ht in Ot){var Ur=Ot[ht];return Un(Ur,function(Di,fi){var Ti=Di.invoke(fi,Ur),gn=fi.def(Di.shared.strings,".id(",Ti,")");return gn})}return null}var Bt=Rt(ot),Dt=Rt(Xe),yt=null,Pt;return Ln(Bt)&&Ln(Dt)?(yt=Ei.program(Dt.id,Bt.id,null,at),Pt=En(function(ht,ur){return ht.link(yt)})):Pt=new ei(Bt&&Bt.thisDep||Dt&&Dt.thisDep,Bt&&Bt.contextDep||Dt&&Dt.contextDep,Bt&&Bt.propDep||Dt&&Dt.propDep,function(ht,ur){var br=ht.shared.shader,Ur;Bt?Ur=Bt.append(ht,ur):Ur=ur.def(br,".",ot);var Di;Dt?Di=Dt.append(ht,ur):Di=ur.def(br,".",Xe);var fi=br+".program("+Di+","+Ur;return ur.def(fi+")")}),{frag:Bt,vert:Dt,progVar:Pt,program:yt}}function Va(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={},Rt=!1;function Bt(){if(ve in at){var rn=at[ve];return rn!==null&&qi.getVAO(rn)===null&&(rn=qi.createVAO(rn)),Rt=!0,Ot.vao=rn,En(function(Bi){var Gi=qi.getVAO(rn);return Gi?Bi.link(Gi):"null"})}else if(ve in Ct){Rt=!0;var Ci=Ct[ve];return Un(Ci,function(Bi,Gi){var sn=Bi.invoke(Gi,Ci);return Gi.def(Bi.shared.vao+".getVAO("+sn+")")})}return null}var Dt=Bt(),yt=!1;function Pt(){if(Tt in at){var rn=at[Tt];if(Ot.elements=rn,Ii(rn)){var Ci=Ot.elements=Nr.create(rn,!0);rn=Nr.getElements(Ci),yt=!0}else rn&&(rn=Nr.getElements(rn),yt=!0);var Bi=En(function(sn,zn){if(rn){var Ja=sn.link(rn);return sn.ELEMENTS=Ja,Ja}return sn.ELEMENTS=null,null});return Bi.value=rn,Bi}else if(Tt in Ct){yt=!0;var Gi=Ct[Tt];return Un(Gi,function(sn,zn){var Ja=sn.shared,co=Ja.isBufferArgs,ts=Ja.elements,so=sn.invoke(zn,Gi),Zo=zn.def("null"),ms=zn.def(co,"(",so,")"),ou=sn.cond(ms).then(Zo,"=",ts,".createStream(",so,");").else(Zo,"=",ts,".getElements(",so,");");return zn.entry(ou),zn.exit(sn.cond(ms).then(ts,".destroyStream(",Zo,");")),sn.ELEMENTS=Zo,Zo})}else if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.elements+".getElements("+sn.shared.vao+".currentVAO.elements):null")});return null}var ht=Pt();function ur(){if(Yt in at){var rn=at[Yt];return Ot.primitive=rn,En(function(Bi,Gi){return Sn[rn]})}else if(Yt in Ct){var Ci=Ct[Yt];return Un(Ci,function(Bi,Gi){var sn=Bi.constants.primTypes,zn=Bi.invoke(Gi,Ci);return Gi.def(sn,"[",zn,"]")})}else{if(yt)return Ln(ht)?ht.value?En(function(Bi,Gi){return Gi.def(Bi.ELEMENTS,".primType")}):En(function(){return Al}):new ei(ht.thisDep,ht.contextDep,ht.propDep,function(Bi,Gi){var sn=Bi.ELEMENTS;return Gi.def(sn,"?",sn,".primType:",Al)});if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(Bi,Gi){return Gi.def(Bi.shared.vao+".currentVAO?"+Bi.shared.vao+".currentVAO.primitive:"+Al)})}return null}function br(rn,Ci){if(rn in at){var Bi=at[rn]|0;return Ci?Ot.offset=Bi:Ot.instances=Bi,En(function(sn,zn){return Ci&&(sn.OFFSET=Bi),Bi})}else if(rn in Ct){var Gi=Ct[rn];return Un(Gi,function(sn,zn){var Ja=sn.invoke(zn,Gi);return Ci&&(sn.OFFSET=Ja),Ja})}else if(Ci){if(yt)return En(function(sn,zn){return sn.OFFSET=0,0});if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.vao+".currentVAO.offset:0")})}else if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.vao+".currentVAO.instances:-1")});return null}var Ur=br(xr,!0);function Di(){if(Kt in at){var rn=at[Kt]|0;return Ot.count=rn,En(function(){return rn})}else if(Kt in Ct){var Ci=Ct[Kt];return Un(Ci,function(zn,Ja){var co=zn.invoke(Ja,Ci);return co})}else if(yt)if(Ln(ht)){if(ht)return Ur?new ei(Ur.thisDep,Ur.contextDep,Ur.propDep,function(zn,Ja){var co=Ja.def(zn.ELEMENTS,".vertCount-",zn.OFFSET);return co}):En(function(zn,Ja){return Ja.def(zn.ELEMENTS,".vertCount")});var Bi=En(function(){return-1});return Bi}else{var Gi=new ei(ht.thisDep||Ur.thisDep,ht.contextDep||Ur.contextDep,ht.propDep||Ur.propDep,function(zn,Ja){var co=zn.ELEMENTS;return zn.OFFSET?Ja.def(co,"?",co,".vertCount-",zn.OFFSET,":-1"):Ja.def(co,"?",co,".vertCount:-1")});return Gi}else if(Rt){var sn=new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(zn,Ja){return Ja.def(zn.shared.vao,".currentVAO?",zn.shared.vao,".currentVAO.count:-1")});return sn}return null}var fi=ur(),Ti=Di(),gn=br(Ir,!1);return{elements:ht,primitive:fi,count:Ti,instances:gn,offset:Ur,vao:Dt,vaoActive:Rt,elementsActive:yt,static:Ot}}function Ml(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return hi.forEach(function(Rt){var Bt=Ji(Rt);function Dt(yt,Pt){if(Rt in at){var ht=yt(at[Rt]);Ot[Bt]=En(function(){return ht})}else if(Rt in Ct){var ur=Ct[Rt];Ot[Bt]=Un(ur,function(br,Ur){return Pt(br,Ur,br.invoke(Ur,ur))})}}switch(Rt){case Cu:case Ho:case ws:case Vc:case va:case B:case Uc:case Ac:case Ua:case rs:return Dt(function(yt){return yt},function(yt,Pt,ht){return ht});case no:return Dt(function(yt){return Zr[yt]},function(yt,Pt,ht){var ur=yt.constants.compareFuncs;return Pt.def(ur,"[",ht,"]")});case ys:return Dt(function(yt){return yt},function(yt,Pt,ht){var ur=Pt.def("+",ht,"[0]"),br=Pt.def("+",ht,"[1]");return[ur,br]});case Ls:return Dt(function(yt){var Pt="srcRGB"in yt?yt.srcRGB:yt.src,ht="srcAlpha"in yt?yt.srcAlpha:yt.src,ur="dstRGB"in yt?yt.dstRGB:yt.dst,br="dstAlpha"in yt?yt.dstAlpha:yt.dst;return[bi[Pt],bi[ur],bi[ht],bi[br]]},function(yt,Pt,ht){var ur=yt.constants.blendFuncs;function br(Ci,Bi){var Gi=Pt.def('"',Ci,Bi,'" in ',ht,"?",ht,".",Ci,Bi,":",ht,".",Ci);return Gi}var Ur=br("src","RGB"),Di=br("dst","RGB"),fi=Pt.def(ur,"[",Ur,"]"),Ti=Pt.def(ur,"[",br("src","Alpha"),"]"),gn=Pt.def(ur,"[",Di,"]"),rn=Pt.def(ur,"[",br("dst","Alpha"),"]");return[fi,gn,Ti,rn]});case Ws:return Dt(function(yt){if(typeof yt=="string")return[xi[yt],xi[yt]];if(typeof yt=="object")return[xi[yt.rgb],xi[yt.alpha]]},function(yt,Pt,ht){var ur=yt.constants.blendEquations,br=Pt.def(),Ur=Pt.def(),Di=yt.cond("typeof ",ht,'==="string"');return Di.then(br,"=",Ur,"=",ur,"[",ht,"];"),Di.else(br,"=",ur,"[",ht,".rgb];",Ur,"=",ur,"[",ht,".alpha];"),Pt(Di),[br,Ur]});case ml:return Dt(function(yt){return M(4,function(Pt){return+yt[Pt]})},function(yt,Pt,ht){return M(4,function(ur){return Pt.def("+",ht,"[",ur,"]")})});case hc:return Dt(function(yt){return yt|0},function(yt,Pt,ht){return Pt.def(ht,"|0")});case Ku:return Dt(function(yt){var Pt=yt.cmp||"keep",ht=yt.ref||0,ur="mask"in yt?yt.mask:-1;return[Zr[Pt],ht,ur]},function(yt,Pt,ht){var ur=yt.constants.compareFuncs,br=Pt.def('"cmp" in ',ht,"?",ur,"[",ht,".cmp]",":",nt),Ur=Pt.def(ht,".ref|0"),Di=Pt.def('"mask" in ',ht,"?",ht,".mask|0:-1");return[br,Ur,Di]});case ue:case w:return Dt(function(yt){var Pt=yt.fail||"keep",ht=yt.zfail||"keep",ur=yt.zpass||"keep";return[Rt===w?se:X,ai[Pt],ai[ht],ai[ur]]},function(yt,Pt,ht){var ur=yt.constants.stencilOps;function br(Ur){return Pt.def('"',Ur,'" in ',ht,"?",ur,"[",ht,".",Ur,"]:",nt)}return[Rt===w?se:X,br("fail"),br("zfail"),br("zpass")]});case xu:return Dt(function(yt){var Pt=yt.factor|0,ht=yt.units|0;return[Pt,ht]},function(yt,Pt,ht){var ur=Pt.def(ht,".factor|0"),br=Pt.def(ht,".units|0");return[ur,br]});case Yu:return Dt(function(yt){var Pt=0;return yt==="front"?Pt=X:yt==="back"&&(Pt=se),Pt},function(yt,Pt,ht){return Pt.def(ht,'==="front"?',X,":",se)});case pu:return Dt(function(yt){return yt},function(yt,Pt,ht){return ht});case Nc:return Dt(function(yt){return gi[yt]},function(yt,Pt,ht){return Pt.def(ht+'==="cw"?'+Te+":"+Ne)});case $l:return Dt(function(yt){return yt.map(function(Pt){return!!Pt})},function(yt,Pt,ht){return M(4,function(ur){return"!!"+ht+"["+ur+"]"})});case oo:return Dt(function(yt){var Pt="value"in yt?yt.value:1,ht=!!yt.invert;return[Pt,ht]},function(yt,Pt,ht){var ur=Pt.def('"value" in ',ht,"?+",ht,".value:1"),br=Pt.def("!!",ht,".invert");return[ur,br]})}}),Ot}function zo(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return Object.keys(at).forEach(function(Rt){var Bt=at[Rt],Dt;if(typeof Bt=="number"||typeof Bt=="boolean")Dt=En(function(){return Bt});else if(typeof Bt=="function"){var yt=Bt._reglType;yt==="texture2d"||yt==="textureCube"?Dt=En(function(Pt){return Pt.link(Bt)}):(yt==="framebuffer"||yt==="framebufferCube")&&(Dt=En(function(Pt){return Pt.link(Bt.color[0])}))}else Mn(Bt)&&(Dt=En(function(Pt){var ht=Pt.global.def("[",M(Bt.length,function(ur){return Bt[ur]}),"]");return ht}));Dt.value=Bt,Ot[Rt]=Dt}),Object.keys(Ct).forEach(function(Rt){var Bt=Ct[Rt];Ot[Rt]=Un(Bt,function(Dt,yt){return Dt.invoke(yt,Bt)})}),Ot}function Qs(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return Object.keys(at).forEach(function(Rt){var Bt=at[Rt],Dt=bt.id(Rt),yt=new Yr;if(Ii(Bt))yt.state=La,yt.buffer=jr.getBuffer(jr.create(Bt,Sr,!1,!0)),yt.type=0;else{var Pt=jr.getBuffer(Bt);if(Pt)yt.state=La,yt.buffer=Pt,yt.type=0;else if("constant"in Bt){var ht=Bt.constant;yt.buffer="null",yt.state=Na,typeof ht=="number"?yt.x=ht:Ca.forEach(function(gn,rn){rn<ht.length&&(yt[gn]=ht[rn])})}else{Ii(Bt.buffer)?Pt=jr.getBuffer(jr.create(Bt.buffer,Sr,!1,!0)):Pt=jr.getBuffer(Bt.buffer);var ur=Bt.offset|0,br=Bt.stride|0,Ur=Bt.size|0,Di=!!Bt.normalized,fi=0;"type"in Bt&&(fi=ji[Bt.type]);var Ti=Bt.divisor|0;yt.buffer=Pt,yt.state=La,yt.size=Ur,yt.normalized=Di,yt.type=fi||Pt.dtype,yt.offset=ur,yt.stride=br,yt.divisor=Ti}}Ot[Rt]=En(function(gn,rn){var Ci=gn.attribCache;if(Dt in Ci)return Ci[Dt];var Bi={isStream:!1};return Object.keys(yt).forEach(function(Gi){Bi[Gi]=yt[Gi]}),yt.buffer&&(Bi.buffer=gn.link(yt.buffer),Bi.type=Bi.type||Bi.buffer+".dtype"),Ci[Dt]=Bi,Bi})}),Object.keys(Ct).forEach(function(Rt){var Bt=Ct[Rt];function Dt(yt,Pt){var ht=yt.invoke(Pt,Bt),ur=yt.shared,br=yt.constants,Ur=ur.isBufferArgs,Di=ur.buffer,fi={isStream:Pt.def(!1)},Ti=new Yr;Ti.state=La,Object.keys(Ti).forEach(function(Bi){fi[Bi]=Pt.def(""+Ti[Bi])});var gn=fi.buffer,rn=fi.type;Pt("if(",Ur,"(",ht,")){",fi.isStream,"=true;",gn,"=",Di,".createStream(",Sr,",",ht,");",rn,"=",gn,".dtype;","}else{",gn,"=",Di,".getBuffer(",ht,");","if(",gn,"){",rn,"=",gn,".dtype;",'}else if("constant" in ',ht,"){",fi.state,"=",Na,";","if(typeof "+ht+'.constant === "number"){',fi[Ca[0]],"=",ht,".constant;",Ca.slice(1).map(function(Bi){return fi[Bi]}).join("="),"=0;","}else{",Ca.map(function(Bi,Gi){return fi[Bi]+"="+ht+".constant.length>"+Gi+"?"+ht+".constant["+Gi+"]:0;"}).join(""),"}}else{","if(",Ur,"(",ht,".buffer)){",gn,"=",Di,".createStream(",Sr,",",ht,".buffer);","}else{",gn,"=",Di,".getBuffer(",ht,".buffer);","}",rn,'="type" in ',ht,"?",br.glTypes,"[",ht,".type]:",gn,".dtype;",fi.normalized,"=!!",ht,".normalized;");function Ci(Bi){Pt(fi[Bi],"=",ht,".",Bi,"|0;")}return Ci("size"),Ci("offset"),Ci("stride"),Ci("divisor"),Pt("}}"),Pt.exit("if(",fi.isStream,"){",Di,".destroyStream(",gn,");","}"),fi}Ot[Rt]=Un(Bt,Dt)}),Ot}function al(Ve){var Qe=Ve.static,at=Ve.dynamic,Ct={};return Object.keys(Qe).forEach(function(Ot){var Rt=Qe[Ot];Ct[Ot]=En(function(Bt,Dt){return typeof Rt=="number"||typeof Rt=="boolean"?""+Rt:Bt.link(Rt)})}),Object.keys(at).forEach(function(Ot){var Rt=at[Ot];Ct[Ot]=Un(Rt,function(Bt,Dt){return Bt.invoke(Dt,Rt)})}),Ct}function Vl(Ve,Qe,at,Ct,Ot){var Rt=Ve.static,Bt=Ve.dynamic,Dt=is(Ve,Qe),yt=oa(Ve,Ot),Pt=ba(Ve,yt,Ot),ht=Va(Ve,Ot),ur=Ml(Ve,Ot),br=Zs(Ve,Ot,Dt);function Ur(Ci){var Bi=Pt[Ci];Bi&&(ur[Ci]=Bi)}Ur(ee),Ur(Ji(Q));var Di=Object.keys(ur).length>0,fi={framebuffer:yt,draw:ht,shader:br,state:ur,dirty:Di,scopeVAO:null,drawVAO:null,useVAO:!1,attributes:{}};if(fi.profile=ta(Ve,Ot),fi.uniforms=zo(at,Ot),fi.drawVAO=fi.scopeVAO=ht.vao,!fi.drawVAO&&br.program&&!Dt&&zt.angle_instanced_arrays&&ht.static.elements){var Ti=!0,gn=br.program.attributes.map(function(Ci){var Bi=Qe.static[Ci];return Ti=Ti&&!!Bi,Bi});if(Ti&&gn.length>0){var rn=qi.getVAO(qi.createVAO({attributes:gn,elements:ht.static.elements}));fi.drawVAO=new ei(null,null,null,function(Ci,Bi){return Ci.link(rn)}),fi.useVAO=!0}}return Dt?fi.useVAO=!0:fi.attributes=Qs(Qe,Ot),fi.context=al(Ct,Ot),fi}function ss(Ve,Qe,at){var Ct=Ve.shared,Ot=Ct.context,Rt=Ve.scope();Object.keys(at).forEach(function(Bt){Qe.save(Ot,"."+Bt);var Dt=at[Bt],yt=Dt.append(Ve,Qe);Array.isArray(yt)?Rt(Ot,".",Bt,"=[",yt.join(),"];"):Rt(Ot,".",Bt,"=",yt,";")}),Qe(Rt)}function Vs(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.framebuffer,Dt;ci&&(Dt=Qe.def(Ot.extensions,".webgl_draw_buffers"));var yt=Ve.constants,Pt=yt.drawBuffer,ht=yt.backBuffer,ur;at?ur=at.append(Ve,Qe):ur=Qe.def(Bt,".next"),Ct||Qe("if(",ur,"!==",Bt,".cur){"),Qe("if(",ur,"){",Rt,".bindFramebuffer(",qr,",",ur,".framebuffer);"),ci&&Qe(Dt,".drawBuffersWEBGL(",Pt,"[",ur,".colorAttachments.length]);"),Qe("}else{",Rt,".bindFramebuffer(",qr,",null);"),ci&&Qe(Dt,".drawBuffersWEBGL(",ht,");"),Qe("}",Bt,".cur=",ur,";"),Ct||Qe("}")}function Ys(Ve,Qe,at){var Ct=Ve.shared,Ot=Ct.gl,Rt=Ve.current,Bt=Ve.next,Dt=Ct.current,yt=Ct.next,Pt=Ve.cond(Dt,".dirty");hi.forEach(function(ht){var ur=Ji(ht);if(!(ur in at.state)){var br,Ur;if(ur in Bt){br=Bt[ur],Ur=Rt[ur];var Di=M(Zi[ur].length,function(Ti){return Pt.def(br,"[",Ti,"]")});Pt(Ve.cond(Di.map(function(Ti,gn){return Ti+"!=="+Ur+"["+gn+"]"}).join("||")).then(Ot,".",mn[ur],"(",Di,");",Di.map(function(Ti,gn){return Ur+"["+gn+"]="+Ti}).join(";"),";"))}else{br=Pt.def(yt,".",ur);var fi=Ve.cond(br,"!==",Dt,".",ur);Pt(fi),ur in li?fi(Ve.cond(br).then(Ot,".enable(",li[ur],");").else(Ot,".disable(",li[ur],");"),Dt,".",ur,"=",br,";"):fi(Ot,".",mn[ur],"(",br,");",Dt,".",ur,"=",br,";")}}}),Object.keys(at.state).length===0&&Pt(Dt,".dirty=false;"),Qe(Pt)}function wa(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ve.current,Bt=Ot.current,Dt=Ot.gl,yt;Si(Object.keys(at)).forEach(function(Pt){var ht=at[Pt];if(!(Ct&&!Ct(ht))){var ur=ht.append(Ve,Qe);if(li[Pt]){var br=li[Pt];Ln(ht)?(yt=Ve.link(ur,{stable:!0}),Qe(Ve.cond(yt).then(Dt,".enable(",br,");").else(Dt,".disable(",br,");")),Qe(Bt,".",Pt,"=",yt,";")):(Qe(Ve.cond(ur).then(Dt,".enable(",br,");").else(Dt,".disable(",br,");")),Qe(Bt,".",Pt,"=",ur,";"))}else if(Mn(ur)){var Ur=Rt[Pt];Qe(Dt,".",mn[Pt],"(",ur,");",ur.map(function(Di,fi){return Ur+"["+fi+"]="+Di}).join(";"),";")}else Ln(ht)?(yt=Ve.link(ur,{stable:!0}),Qe(Dt,".",mn[Pt],"(",yt,");",Bt,".",Pt,"=",yt,";")):Qe(Dt,".",mn[Pt],"(",ur,");",Bt,".",Pt,"=",ur,";")}})}function ol(Ve,Qe){Ri&&(Ve.instancing=Qe.def(Ve.shared.extensions,".angle_instanced_arrays"))}function io(Ve,Qe,at,Ct,Ot){var Rt=Ve.shared,Bt=Ve.stats,Dt=Rt.current,yt=Rt.timer,Pt=at.profile;function ht(){return typeof performance=="undefined"?"Date.now()":"performance.now()"}var ur,br;function Ur(Ci){ur=Qe.def(),Ci(ur,"=",ht(),";"),typeof Ot=="string"?Ci(Bt,".count+=",Ot,";"):Ci(Bt,".count++;"),Wi&&(Ct?(br=Qe.def(),Ci(br,"=",yt,".getNumPendingQueries();")):Ci(yt,".beginQuery(",Bt,");"))}function Di(Ci){Ci(Bt,".cpuTime+=",ht(),"-",ur,";"),Wi&&(Ct?Ci(yt,".pushScopeStats(",br,",",yt,".getNumPendingQueries(),",Bt,");"):Ci(yt,".endQuery();"))}function fi(Ci){var Bi=Qe.def(Dt,".profile");Qe(Dt,".profile=",Ci,";"),Qe.exit(Dt,".profile=",Bi,";")}var Ti;if(Pt){if(Ln(Pt)){Pt.enable?(Ur(Qe),Di(Qe.exit),fi("true")):fi("false");return}Ti=Pt.append(Ve,Qe),fi(Ti)}else Ti=Qe.def(Dt,".profile");var gn=Ve.block();Ur(gn),Qe("if(",Ti,"){",gn,"}");var rn=Ve.block();Di(rn),Qe.exit("if(",Ti,"){",rn,"}")}function Y(Ve,Qe,at,Ct,Ot){var Rt=Ve.shared;function Bt(yt){switch(yt){case Co:case Ps:case hl:return 2;case Qa:case Ts:case Ul:return 3;case mo:case wo:case Lu:return 4;default:return 1}}function Dt(yt,Pt,ht){var ur=Rt.gl,br=Qe.def(yt,".location"),Ur=Qe.def(Rt.attributes,"[",br,"]"),Di=ht.state,fi=ht.buffer,Ti=[ht.x,ht.y,ht.z,ht.w],gn=["buffer","normalized","offset","stride"];function rn(){Qe("if(!",Ur,".buffer){",ur,".enableVertexAttribArray(",br,");}");var Bi=ht.type,Gi;if(ht.size?Gi=Qe.def(ht.size,"||",Pt):Gi=Pt,Qe("if(",Ur,".type!==",Bi,"||",Ur,".size!==",Gi,"||",gn.map(function(zn){return Ur+"."+zn+"!=="+ht[zn]}).join("||"),"){",ur,".bindBuffer(",Sr,",",fi,".buffer);",ur,".vertexAttribPointer(",[br,Gi,Bi,ht.normalized,ht.stride,ht.offset],");",Ur,".type=",Bi,";",Ur,".size=",Gi,";",gn.map(function(zn){return Ur+"."+zn+"="+ht[zn]+";"}).join(""),"}"),Ri){var sn=ht.divisor;Qe("if(",Ur,".divisor!==",sn,"){",Ve.instancing,".vertexAttribDivisorANGLE(",[br,sn],");",Ur,".divisor=",sn,";}")}}function Ci(){Qe("if(",Ur,".buffer){",ur,".disableVertexAttribArray(",br,");",Ur,".buffer=null;","}if(",Ca.map(function(Bi,Gi){return Ur+"."+Bi+"!=="+Ti[Gi]}).join("||"),"){",ur,".vertexAttrib4f(",br,",",Ti,");",Ca.map(function(Bi,Gi){return Ur+"."+Bi+"="+Ti[Gi]+";"}).join(""),"}")}Di===La?rn():Di===Na?Ci():(Qe("if(",Di,"===",La,"){"),rn(),Qe("}else{"),Ci(),Qe("}"))}Ct.forEach(function(yt){var Pt=yt.name,ht=at.attributes[Pt],ur;if(ht){if(!Ot(ht))return;ur=ht.append(Ve,Qe)}else{if(!Ot(ia))return;var br=Ve.scopeAttrib(Pt);ur={},Object.keys(new Yr).forEach(function(Ur){ur[Ur]=Qe.def(br,".",Ur)})}Dt(Ve.link(yt),Bt(yt.info.type),ur)})}function D(Ve,Qe,at,Ct,Ot,Rt){for(var Bt=Ve.shared,Dt=Bt.gl,yt,Pt=0;Pt<Ct.length;++Pt){var ht=Ct[Pt],ur=ht.name,br=ht.info.type,Ur=at.uniforms[ur],Di=Ve.link(ht),fi=Di+".location",Ti;if(Ur){if(!Ot(Ur))continue;if(Ln(Ur)){var gn=Ur.value;if(br===dc||br===Tl){var rn=Ve.link(gn._texture||gn.color[0]._texture);Qe(Dt,".uniform1i(",fi,",",rn+".bind());"),Qe.exit(rn,".unbind();")}else if(br===au||br===Js||br===Ql){var Ci=Ve.global.def("new Float32Array(["+Array.prototype.slice.call(gn)+"])"),Bi=2;br===Js?Bi=3:br===Ql&&(Bi=4),Qe(Dt,".uniformMatrix",Bi,"fv(",fi,",false,",Ci,");")}else{switch(br){case $a:yt="1f";break;case Co:yt="2f";break;case Qa:yt="3f";break;case mo:yt="4f";break;case To:yt="1i";break;case Bo:yt="1i";break;case hl:yt="2i";break;case Ps:yt="2i";break;case Ul:yt="3i";break;case Ts:yt="3i";break;case Lu:yt="4i";break;case wo:yt="4i";break}Qe(Dt,".uniform",yt,"(",fi,",",Mn(gn)?Array.prototype.slice.call(gn):gn,");")}continue}else Ti=Ur.append(Ve,Qe)}else{if(!Ot(ia))continue;Ti=Qe.def(Bt.uniforms,"[",bt.id(ur),"]")}br===dc?Qe("if(",Ti,"&&",Ti,'._reglType==="framebuffer"){',Ti,"=",Ti,".color[0];","}"):br===Tl&&Qe("if(",Ti,"&&",Ti,'._reglType==="framebufferCube"){',Ti,"=",Ti,".color[0];","}");var Gi=1;switch(br){case dc:case Tl:var sn=Qe.def(Ti,"._texture");Qe(Dt,".uniform1i(",fi,",",sn,".bind());"),Qe.exit(sn,".unbind();");continue;case Bo:case To:yt="1i";break;case Ps:case hl:yt="2i",Gi=2;break;case Ts:case Ul:yt="3i",Gi=3;break;case wo:case Lu:yt="4i",Gi=4;break;case $a:yt="1f";break;case Co:yt="2f",Gi=2;break;case Qa:yt="3f",Gi=3;break;case mo:yt="4f",Gi=4;break;case au:yt="Matrix2fv";break;case Js:yt="Matrix3fv";break;case Ql:yt="Matrix4fv";break}if(yt.charAt(0)==="M"){Qe(Dt,".uniform",yt,"(",fi,",");var zn=Math.pow(br-au+2,2),Ja=Ve.global.def("new Float32Array(",zn,")");Array.isArray(Ti)?Qe("false,(",M(zn,function(ms){return Ja+"["+ms+"]="+Ti[ms]}),",",Ja,")"):Qe("false,(Array.isArray(",Ti,")||",Ti," instanceof Float32Array)?",Ti,":(",M(zn,function(ms){return Ja+"["+ms+"]="+Ti+"["+ms+"]"}),",",Ja,")"),Qe(");")}else if(Gi>1){for(var co=[],ts=[],so=0;so<Gi;++so)Array.isArray(Ti)?ts.push(Ti[so]):ts.push(Qe.def(Ti+"["+so+"]")),Rt&&co.push(Qe.def());Rt&&Qe("if(!",Ve.batchId,"||",co.map(function(ms,ou){return ms+"!=="+ts[ou]}).join("||"),"){",co.map(function(ms,ou){return ms+"="+ts[ou]+";"}).join("")),Qe(Dt,".uniform",yt,"(",fi,",",ts.join(","),");"),Rt&&Qe("}")}else{if(Rt){var Zo=Qe.def();Qe("if(!",Ve.batchId,"||",Zo,"!==",Ti,"){",Zo,"=",Ti,";")}Qe(Dt,".uniform",yt,"(",fi,",",Ti,");"),Rt&&Qe("}")}}}function J(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.draw,Dt=Ct.draw;function yt(){var Gi=Dt.elements,sn,zn=Qe;return Gi?((Gi.contextDep&&Ct.contextDynamic||Gi.propDep)&&(zn=at),sn=Gi.append(Ve,zn),Dt.elementsActive&&zn("if("+sn+")"+Rt+".bindBuffer("+Or+","+sn+".buffer.buffer);")):(sn=zn.def(),zn(sn,"=",Bt,".",Tt,";","if(",sn,"){",Rt,".bindBuffer(",Or,",",sn,".buffer.buffer);}","else if(",Ot.vao,".currentVAO){",sn,"=",Ve.shared.elements+".getElements("+Ot.vao,".currentVAO.elements);",an?"":"if("+sn+")"+Rt+".bindBuffer("+Or+","+sn+".buffer.buffer);","}")),sn}function Pt(){var Gi=Dt.count,sn,zn=Qe;return Gi?((Gi.contextDep&&Ct.contextDynamic||Gi.propDep)&&(zn=at),sn=Gi.append(Ve,zn)):sn=zn.def(Bt,".",Kt),sn}var ht=yt();function ur(Gi){var sn=Dt[Gi];return sn?sn.contextDep&&Ct.contextDynamic||sn.propDep?sn.append(Ve,at):sn.append(Ve,Qe):Qe.def(Bt,".",Gi)}var br=ur(Yt),Ur=ur(xr),Di=Pt();if(typeof Di=="number"){if(Di===0)return}else at("if(",Di,"){"),at.exit("}");var fi,Ti;Ri&&(fi=ur(Ir),Ti=Ve.instancing);var gn=ht+".type",rn=Dt.elements&&Ln(Dt.elements)&&!Dt.vaoActive;function Ci(){function Gi(){at(Ti,".drawElementsInstancedANGLE(",[br,Di,gn,Ur+"<<(("+gn+"-"+Ra+")>>1)",fi],");")}function sn(){at(Ti,".drawArraysInstancedANGLE(",[br,Ur,Di,fi],");")}ht&&ht!=="null"?rn?Gi():(at("if(",ht,"){"),Gi(),at("}else{"),sn(),at("}")):sn()}function Bi(){function Gi(){at(Rt+".drawElements("+[br,Di,gn,Ur+"<<(("+gn+"-"+Ra+")>>1)"]+");")}function sn(){at(Rt+".drawArrays("+[br,Ur,Di]+");")}ht&&ht!=="null"?rn?Gi():(at("if(",ht,"){"),Gi(),at("}else{"),sn(),at("}")):sn()}Ri&&(typeof fi!="number"||fi>=0)?typeof fi=="string"?(at("if(",fi,">0){"),Ci(),at("}else if(",fi,"<0){"),Bi(),at("}")):Ci():Bi()}function q(Ve,Qe,at,Ct,Ot){var Rt=kn(),Bt=Rt.proc("body",Ot);return Ri&&(Rt.instancing=Bt.def(Rt.shared.extensions,".angle_instanced_arrays")),Ve(Rt,Bt,at,Ct),Rt.compile().body}function K(Ve,Qe,at,Ct){ol(Ve,Qe),at.useVAO?at.drawVAO?Qe(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,Qe),");"):Qe(Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"):(Qe(Ve.shared.vao,".setVAO(null);"),Y(Ve,Qe,at,Ct.attributes,function(){return!0})),D(Ve,Qe,at,Ct.uniforms,function(){return!0},!1),J(Ve,Qe,Qe,at)}function de(Ve,Qe){var at=Ve.proc("draw",1);ol(Ve,at),ss(Ve,at,Qe.context),Vs(Ve,at,Qe.framebuffer),Ys(Ve,at,Qe),wa(Ve,at,Qe.state),io(Ve,at,Qe,!1,!0);var Ct=Qe.shader.progVar.append(Ve,at);if(at(Ve.shared.gl,".useProgram(",Ct,".program);"),Qe.shader.program)K(Ve,at,Qe,Qe.shader.program);else{at(Ve.shared.vao,".setVAO(null);");var Ot=Ve.global.def("{}"),Rt=at.def(Ct,".id"),Bt=at.def(Ot,"[",Rt,"]");at(Ve.cond(Bt).then(Bt,".call(this,a0);").else(Bt,"=",Ot,"[",Rt,"]=",Ve.link(function(Dt){return q(K,Ve,Qe,Dt,1)}),"(",Ct,");",Bt,".call(this,a0);"))}Object.keys(Qe.state).length>0&&at(Ve.shared.current,".dirty=true;"),Ve.shared.vao&&at(Ve.shared.vao,".setVAO(null);")}function ne(Ve,Qe,at,Ct){Ve.batchId="a1",ol(Ve,Qe);function Ot(){return!0}Y(Ve,Qe,at,Ct.attributes,Ot),D(Ve,Qe,at,Ct.uniforms,Ot,!1),J(Ve,Qe,Qe,at)}function we(Ve,Qe,at,Ct){ol(Ve,Qe);var Ot=at.contextDep,Rt=Qe.def(),Bt="a0",Dt="a1",yt=Qe.def();Ve.shared.props=yt,Ve.batchId=Rt;var Pt=Ve.scope(),ht=Ve.scope();Qe(Pt.entry,"for(",Rt,"=0;",Rt,"<",Dt,";++",Rt,"){",yt,"=",Bt,"[",Rt,"];",ht,"}",Pt.exit);function ur(gn){return gn.contextDep&&Ot||gn.propDep}function br(gn){return!ur(gn)}if(at.needsContext&&ss(Ve,ht,at.context),at.needsFramebuffer&&Vs(Ve,ht,at.framebuffer),wa(Ve,ht,at.state,ur),at.profile&&ur(at.profile)&&io(Ve,ht,at,!1,!0),Ct)at.useVAO?at.drawVAO?ur(at.drawVAO)?ht(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,ht),");"):Pt(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,Pt),");"):Pt(Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"):(Pt(Ve.shared.vao,".setVAO(null);"),Y(Ve,Pt,at,Ct.attributes,br),Y(Ve,ht,at,Ct.attributes,ur)),D(Ve,Pt,at,Ct.uniforms,br,!1),D(Ve,ht,at,Ct.uniforms,ur,!0),J(Ve,Pt,ht,at);else{var Ur=Ve.global.def("{}"),Di=at.shader.progVar.append(Ve,ht),fi=ht.def(Di,".id"),Ti=ht.def(Ur,"[",fi,"]");ht(Ve.shared.gl,".useProgram(",Di,".program);","if(!",Ti,"){",Ti,"=",Ur,"[",fi,"]=",Ve.link(function(gn){return q(ne,Ve,at,gn,2)}),"(",Di,");}",Ti,".call(this,a0[",Rt,"],",Rt,");")}}function Ue(Ve,Qe){var at=Ve.proc("batch",2);Ve.batchId="0",ol(Ve,at);var Ct=!1,Ot=!0;Object.keys(Qe.context).forEach(function(Ur){Ct=Ct||Qe.context[Ur].propDep}),Ct||(ss(Ve,at,Qe.context),Ot=!1);var Rt=Qe.framebuffer,Bt=!1;Rt?(Rt.propDep?Ct=Bt=!0:Rt.contextDep&&Ct&&(Bt=!0),Bt||Vs(Ve,at,Rt)):Vs(Ve,at,null),Qe.state.viewport&&Qe.state.viewport.propDep&&(Ct=!0);function Dt(Ur){return Ur.contextDep&&Ct||Ur.propDep}Ys(Ve,at,Qe),wa(Ve,at,Qe.state,function(Ur){return!Dt(Ur)}),(!Qe.profile||!Dt(Qe.profile))&&io(Ve,at,Qe,!1,"a1"),Qe.contextDep=Ct,Qe.needsContext=Ot,Qe.needsFramebuffer=Bt;var yt=Qe.shader.progVar;if(yt.contextDep&&Ct||yt.propDep)we(Ve,at,Qe,null);else{var Pt=yt.append(Ve,at);if(at(Ve.shared.gl,".useProgram(",Pt,".program);"),Qe.shader.program)we(Ve,at,Qe,Qe.shader.program);else{at(Ve.shared.vao,".setVAO(null);");var ht=Ve.global.def("{}"),ur=at.def(Pt,".id"),br=at.def(ht,"[",ur,"]");at(Ve.cond(br).then(br,".call(this,a0,a1);").else(br,"=",ht,"[",ur,"]=",Ve.link(function(Ur){return q(we,Ve,Qe,Ur,2)}),"(",Pt,");",br,".call(this,a0,a1);"))}}Object.keys(Qe.state).length>0&&at(Ve.shared.current,".dirty=true;"),Ve.shared.vao&&at(Ve.shared.vao,".setVAO(null);")}function ft(Ve,Qe){var at=Ve.proc("scope",3);Ve.batchId="a2";var Ct=Ve.shared,Ot=Ct.current;if(ss(Ve,at,Qe.context),Qe.framebuffer&&Qe.framebuffer.append(Ve,at),Si(Object.keys(Qe.state)).forEach(function(Dt){var yt=Qe.state[Dt],Pt=yt.append(Ve,at);Mn(Pt)?Pt.forEach(function(ht,ur){pn(ht)?at.set(Ve.next[Dt],"["+ur+"]",ht):at.set(Ve.next[Dt],"["+ur+"]",Ve.link(ht,{stable:!0}))}):Ln(yt)?at.set(Ct.next,"."+Dt,Ve.link(Pt,{stable:!0})):at.set(Ct.next,"."+Dt,Pt)}),io(Ve,at,Qe,!0,!0),[Tt,xr,Kt,Ir,Yt].forEach(function(Dt){var yt=Qe.draw[Dt];if(yt){var Pt=yt.append(Ve,at);pn(Pt)?at.set(Ct.draw,"."+Dt,Pt):at.set(Ct.draw,"."+Dt,Ve.link(Pt),{stable:!0})}}),Object.keys(Qe.uniforms).forEach(function(Dt){var yt=Qe.uniforms[Dt].append(Ve,at);Array.isArray(yt)&&(yt="["+yt.map(function(Pt){return pn(Pt)?Pt:Ve.link(Pt,{stable:!0})})+"]"),at.set(Ct.uniforms,"["+Ve.link(bt.id(Dt),{stable:!0})+"]",yt)}),Object.keys(Qe.attributes).forEach(function(Dt){var yt=Qe.attributes[Dt].append(Ve,at),Pt=Ve.scopeAttrib(Dt);Object.keys(new Yr).forEach(function(ht){at.set(Pt,"."+ht,yt[ht])})}),Qe.scopeVAO){var Rt=Qe.scopeVAO.append(Ve,at);pn(Rt)?at.set(Ct.vao,".targetVAO",Rt):at.set(Ct.vao,".targetVAO",Ve.link(Rt,{stable:!0}))}function Bt(Dt){var yt=Qe.shader[Dt];if(yt){var Pt=yt.append(Ve,at);pn(Pt)?at.set(Ct.shader,"."+Dt,Pt):at.set(Ct.shader,"."+Dt,Ve.link(Pt,{stable:!0}))}}Bt(Xe),Bt(ot),Object.keys(Qe.state).length>0&&(at(Ot,".dirty=true;"),at.exit(Ot,".dirty=true;")),at("a1(",Ve.shared.context,",a0,",Ve.batchId,");")}function Xt(Ve){if(!(typeof Ve!="object"||Mn(Ve))){for(var Qe=Object.keys(Ve),at=0;at<Qe.length;++at)if(h.isDynamic(Ve[Qe[at]]))return!0;return!1}}function hr(Ve,Qe,at){var Ct=Qe.static[at];if(!Ct||!Xt(Ct))return;var Ot=Ve.global,Rt=Object.keys(Ct),Bt=!1,Dt=!1,yt=!1,Pt=Ve.global.def("{}");Rt.forEach(function(ur){var br=Ct[ur];if(h.isDynamic(br)){typeof br=="function"&&(br=Ct[ur]=h.unbox(br));var Ur=Un(br,null);Bt=Bt||Ur.thisDep,yt=yt||Ur.propDep,Dt=Dt||Ur.contextDep}else{switch(Ot(Pt,".",ur,"="),typeof br){case"number":Ot(br);break;case"string":Ot('"',br,'"');break;case"object":Array.isArray(br)&&Ot("[",br.join(),"]");break;default:Ot(Ve.link(br));break}Ot(";")}});function ht(ur,br){Rt.forEach(function(Ur){var Di=Ct[Ur];if(h.isDynamic(Di)){var fi=ur.invoke(br,Di);br(Pt,".",Ur,"=",fi,";")}})}Qe.dynamic[at]=new h.DynamicVariable(Xo,{thisDep:Bt,contextDep:Dt,propDep:yt,ref:Pt,append:ht}),delete Qe.static[at]}function qt(Ve,Qe,at,Ct,Ot){var Rt=kn();Rt.stats=Rt.link(Ot),Object.keys(Qe.static).forEach(function(Dt){hr(Rt,Qe,Dt)}),dr.forEach(function(Dt){hr(Rt,Ve,Dt)});var Bt=Vl(Ve,Qe,at,Ct,Rt);return Bt.shader.program&&(Bt.shader.program.attributes.sort(function(Dt,yt){return Dt.name<yt.name?-1:1}),Bt.shader.program.uniforms.sort(function(Dt,yt){return Dt.name<yt.name?-1:1})),de(Rt,Bt),ft(Rt,Bt),Ue(Rt,Bt),e(Rt.compile(),{destroy:function(){Bt.shader.program.destroy()}})}return{next:Bn,current:Zi,procs:function(){var Ve=kn(),Qe=Ve.proc("poll"),at=Ve.proc("refresh"),Ct=Ve.block();Qe(Ct),at(Ct);var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.next,Dt=Ot.current;Ct(Dt,".dirty=false;"),Vs(Ve,Qe),Vs(Ve,at,null,!0);var yt;Ri&&(yt=Ve.link(Ri)),zt.oes_vertex_array_object&&at(Ve.link(zt.oes_vertex_array_object),".bindVertexArrayOES(null);");var Pt=at.def(Ot.attributes),ht=at.def(0),ur=Ve.cond(ht,".buffer");ur.then(Rt,".enableVertexAttribArray(i);",Rt,".bindBuffer(",Sr,",",ht,".buffer.buffer);",Rt,".vertexAttribPointer(i,",ht,".size,",ht,".type,",ht,".normalized,",ht,".stride,",ht,".offset);").else(Rt,".disableVertexAttribArray(i);",Rt,".vertexAttrib4f(i,",ht,".x,",ht,".y,",ht,".z,",ht,".w);",ht,".buffer=null;");var br=Ve.link(Rr.maxAttributes,{stable:!0});return at("for(var i=0;i<",br,";++i){",ht,"=",Pt,"[i];",ur,"}"),Ri&&at("for(var i=0;i<",br,";++i){",yt,".vertexAttribDivisorANGLE(i,",Pt,"[i].divisor);","}"),at(Ve.shared.vao,".currentVAO=null;",Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"),Object.keys(li).forEach(function(Ur){var Di=li[Ur],fi=Ct.def(Bt,".",Ur),Ti=Ve.block();Ti("if(",fi,"){",Rt,".enable(",Di,")}else{",Rt,".disable(",Di,")}",Dt,".",Ur,"=",fi,";"),at(Ti),Qe("if(",fi,"!==",Dt,".",Ur,"){",Ti,"}")}),Object.keys(mn).forEach(function(Ur){var Di=mn[Ur],fi=Zi[Ur],Ti,gn,rn=Ve.block();if(rn(Rt,".",Di,"("),Mn(fi)){var Ci=fi.length;Ti=Ve.global.def(Bt,".",Ur),gn=Ve.global.def(Dt,".",Ur),rn(M(Ci,function(Bi){return Ti+"["+Bi+"]"}),");",M(Ci,function(Bi){return gn+"["+Bi+"]="+Ti+"["+Bi+"];"}).join("")),Qe("if(",M(Ci,function(Bi){return Ti+"["+Bi+"]!=="+gn+"["+Bi+"]"}).join("||"),"){",rn,"}")}else Ti=Ct.def(Bt,".",Ur),gn=Ct.def(Dt,".",Ur),rn(Ti,");",Dt,".",Ur,"=",Ti,";"),Qe("if(",Ti,"!==",gn,"){",rn,"}");at(rn)}),Ve.compile()}(),compile:qt}}function Ia(){return{vaoCount:0,bufferCount:0,elementsCount:0,framebufferCount:0,shaderCount:0,textureCount:0,cubeCount:0,renderbufferCount:0,maxTextureUnits:0}}var yo=34918,Da=34919,go=35007,Is=function(Me,bt){if(!bt.ext_disjoint_timer_query)return null;var zt=[];function Rr(){return zt.pop()||bt.ext_disjoint_timer_query.createQueryEXT()}function jr(xi){zt.push(xi)}var Nr=[];function Gr(xi){var Ri=Rr();bt.ext_disjoint_timer_query.beginQueryEXT(go,Ri),Nr.push(Ri),Wi(Nr.length-1,Nr.length,xi)}function mi(){bt.ext_disjoint_timer_query.endQueryEXT(go)}function Ui(){this.startQueryIndex=-1,this.endQueryIndex=-1,this.sum=0,this.stats=null}var qi=[];function Ei(){return qi.pop()||new Ui}function Hn(xi){qi.push(xi)}var en=[];function Wi(xi,Ri,ci){var an=Ei();an.startQueryIndex=xi,an.endQueryIndex=Ri,an.sum=0,an.stats=ci,en.push(an)}var si=[],Mr=[];function Yr(){var xi,Ri,ci=Nr.length;if(ci!==0){Mr.length=Math.max(Mr.length,ci+1),si.length=Math.max(si.length,ci+1),si[0]=0,Mr[0]=0;var an=0;for(xi=0,Ri=0;Ri<Nr.length;++Ri){var Zi=Nr[Ri];bt.ext_disjoint_timer_query.getQueryObjectEXT(Zi,Da)?(an+=bt.ext_disjoint_timer_query.getQueryObjectEXT(Zi,yo),jr(Zi)):Nr[xi++]=Zi,si[Ri+1]=an,Mr[Ri+1]=xi}for(Nr.length=xi,xi=0,Ri=0;Ri<en.length;++Ri){var Bn=en[Ri],hi=Bn.startQueryIndex,li=Bn.endQueryIndex;Bn.sum+=si[li]-si[hi];var mn=Mr[hi],Ji=Mr[li];Ji===mn?(Bn.stats.gpuTime+=Bn.sum/1e6,Hn(Bn)):(Bn.startQueryIndex=mn,Bn.endQueryIndex=Ji,en[xi++]=Bn)}en.length=xi}}return{beginQuery:Gr,endQuery:mi,pushScopeStats:Wi,update:Yr,getNumPendingQueries:function(){return Nr.length},clear:function(){zt.push.apply(zt,Nr);for(var xi=0;xi<zt.length;xi++)bt.ext_disjoint_timer_query.deleteQueryEXT(zt[xi]);Nr.length=0,zt.length=0},restore:function(){Nr.length=0,zt.length=0}}},Ms=16384,Xs=256,Gn=1024,ja=34962,Fo="webglcontextlost",Uo="webglcontextrestored",$s=1,Sl=2,bu=3;function dl(Me,bt){for(var zt=0;zt<Me.length;++zt)if(Me[zt]===bt)return zt;return-1}function Sc(Me){var bt=x(Me);if(!bt)return null;var zt=bt.gl,Rr=zt.getContextAttributes(),jr=zt.isContextLost(),Nr=C(zt,bt);if(!Nr)return null;var Gr=_(),mi=Ia(),Ui=bt.cachedCode||{},qi=Nr.extensions,Ei=Is(zt,qi),Hn=v(),en=zt.drawingBufferWidth,Wi=zt.drawingBufferHeight,si={tick:0,time:0,viewportWidth:en,viewportHeight:Wi,framebufferWidth:en,framebufferHeight:Wi,drawingBufferWidth:en,drawingBufferHeight:Wi,pixelRatio:bt.pixelRatio},Mr={},Yr={elements:null,primitive:4,count:-1,offset:0,instances:-1},xi=ti(zt,qi),Ri=ri(zt,mi,bt,Zi),ci=kr(zt,qi,Ri,mi),an=ku(zt,qi,xi,mi,Ri,ci,Yr);function Zi(q){return an.destroyBuffer(q)}var Bn=Wu(zt,Gr,mi,bt),hi=of(zt,qi,xi,function(){Ji.procs.poll()},si,mi,bt),li=Dc(zt,qi,xi,mi,bt),mn=lf(zt,qi,xi,hi,li,mi),Ji=Ea(zt,Gr,qi,xi,Ri,ci,hi,mn,Mr,an,Bn,Yr,si,Ei,Ui,bt),Vi=Wl(zt,mn,Ji.procs.poll,si,Rr,qi,xi),Ni=Ji.next,pn=zt.canvas,Vn=[],na=[],Ki=[],kn=[bt.onDestroy],ta=null;function oa(){if(Vn.length===0){Ei&&Ei.update(),ta=null;return}ta=d.next(oa),Ys();for(var q=Vn.length-1;q>=0;--q){var K=Vn[q];K&&K(si,null,0)}zt.flush(),Ei&&Ei.update()}function ba(){!ta&&Vn.length>0&&(ta=d.next(oa))}function is(){ta&&(d.cancel(oa),ta=null)}function Zs(q){q.preventDefault(),jr=!0,is(),na.forEach(function(K){K()})}function Va(q){zt.getError(),jr=!1,Nr.restore(),Bn.restore(),Ri.restore(),hi.restore(),li.restore(),mn.restore(),an.restore(),Ei&&Ei.restore(),Ji.procs.refresh(),ba(),Ki.forEach(function(K){K()})}pn&&(pn.addEventListener(Fo,Zs,!1),pn.addEventListener(Uo,Va,!1));function Ml(){Vn.length=0,is(),pn&&(pn.removeEventListener(Fo,Zs),pn.removeEventListener(Uo,Va)),Bn.clear(),mn.clear(),li.clear(),an.clear(),hi.clear(),ci.clear(),Ri.clear(),Ei&&Ei.clear(),kn.forEach(function(q){q()})}function zo(q){function K(Rt){var Bt=e({},Rt);delete Bt.uniforms,delete Bt.attributes,delete Bt.context,delete Bt.vao,"stencil"in Bt&&Bt.stencil.op&&(Bt.stencil.opBack=Bt.stencil.opFront=Bt.stencil.op,delete Bt.stencil.op);function Dt(yt){if(yt in Bt){var Pt=Bt[yt];delete Bt[yt],Object.keys(Pt).forEach(function(ht){Bt[yt+"."+ht]=Pt[ht]})}}return Dt("blend"),Dt("depth"),Dt("cull"),Dt("stencil"),Dt("polygonOffset"),Dt("scissor"),Dt("sample"),"vao"in Rt&&(Bt.vao=Rt.vao),Bt}function de(Rt,Bt){var Dt={},yt={};return Object.keys(Rt).forEach(function(Pt){var ht=Rt[Pt];if(h.isDynamic(ht)){yt[Pt]=h.unbox(ht,Pt);return}else if(Bt&&Array.isArray(ht)){for(var ur=0;ur<ht.length;++ur)if(h.isDynamic(ht[ur])){yt[Pt]=h.unbox(ht,Pt);return}}Dt[Pt]=ht}),{dynamic:yt,static:Dt}}var ne=de(q.context||{},!0),we=de(q.uniforms||{},!0),Ue=de(q.attributes||{},!1),ft=de(K(q),!1),Xt={gpuTime:0,cpuTime:0,count:0},hr=Ji.compile(ft,Ue,we,ne,Xt),qt=hr.draw,Ve=hr.batch,Qe=hr.scope,at=[];function Ct(Rt){for(;at.length<Rt;)at.push(null);return at}function Ot(Rt,Bt){var Dt;if(typeof Rt=="function")return Qe.call(this,null,Rt,0);if(typeof Bt=="function")if(typeof Rt=="number")for(Dt=0;Dt<Rt;++Dt)Qe.call(this,null,Bt,Dt);else if(Array.isArray(Rt))for(Dt=0;Dt<Rt.length;++Dt)Qe.call(this,Rt[Dt],Bt,Dt);else return Qe.call(this,Rt,Bt,0);else if(typeof Rt=="number"){if(Rt>0)return Ve.call(this,Ct(Rt|0),Rt|0)}else if(Array.isArray(Rt)){if(Rt.length)return Ve.call(this,Rt,Rt.length)}else return qt.call(this,Rt)}return e(Ot,{stats:Xt,destroy:function(){hr.destroy()}})}var Qs=mn.setFBO=zo({framebuffer:h.define.call(null,$s,"framebuffer")});function al(q,K){var de=0;Ji.procs.poll();var ne=K.color;ne&&(zt.clearColor(+ne[0]||0,+ne[1]||0,+ne[2]||0,+ne[3]||0),de|=Ms),"depth"in K&&(zt.clearDepth(+K.depth),de|=Xs),"stencil"in K&&(zt.clearStencil(K.stencil|0),de|=Gn),zt.clear(de)}function Vl(q){if("framebuffer"in q)if(q.framebuffer&&q.framebuffer_reglType==="framebufferCube")for(var K=0;K<6;++K)Qs(e({framebuffer:q.framebuffer.faces[K]},q),al);else Qs(q,al);else al(null,q)}function ss(q){Vn.push(q);function K(){var de=dl(Vn,q);function ne(){var we=dl(Vn,ne);Vn[we]=Vn[Vn.length-1],Vn.length-=1,Vn.length<=0&&is()}Vn[de]=ne}return ba(),{cancel:K}}function Vs(){var q=Ni.viewport,K=Ni.scissor_box;q[0]=q[1]=K[0]=K[1]=0,si.viewportWidth=si.framebufferWidth=si.drawingBufferWidth=q[2]=K[2]=zt.drawingBufferWidth,si.viewportHeight=si.framebufferHeight=si.drawingBufferHeight=q[3]=K[3]=zt.drawingBufferHeight}function Ys(){si.tick+=1,si.time=ol(),Vs(),Ji.procs.poll()}function wa(){hi.refresh(),Vs(),Ji.procs.refresh(),Ei&&Ei.update()}function ol(){return(v()-Hn)/1e3}wa();function io(q,K){var de;switch(q){case"frame":return ss(K);case"lost":de=na;break;case"restore":de=Ki;break;case"destroy":de=kn;break;default:}return de.push(K),{cancel:function(){for(var ne=0;ne<de.length;++ne)if(de[ne]===K){de[ne]=de[de.length-1],de.pop();return}}}}function Y(){return Ui}function D(q){Object.entries(q).forEach(function(K){Ui[K[0]]=K[1]})}var J=e(zo,{clear:Vl,prop:h.define.bind(null,$s),context:h.define.bind(null,Sl),this:h.define.bind(null,bu),draw:zo({}),buffer:function(q){return Ri.create(q,ja,!1,!1)},elements:function(q){return ci.create(q,!1)},texture:hi.create2D,cube:hi.createCube,renderbuffer:li.create,framebuffer:mn.create,framebufferCube:mn.createCube,vao:an.createVAO,attributes:Rr,frame:ss,on:io,limits:xi,hasExtension:function(q){return xi.extensions.indexOf(q.toLowerCase())>=0},read:Vi,destroy:Ml,_gl:zt,_refresh:wa,poll:function(){Ys(),Ei&&Ei.update()},now:ol,stats:mi,getCachedCode:Y,preloadCachedCode:D});return bt.onDone(null,J),J}return Sc})});var Gqe=ye((r_r,Vqe)=>{"use strict";var nUt=Zm();Vqe.exports=function(t){if(t?typeof t=="string"&&(t={container:t}):t={},Nqe(t)?t={container:t}:aUt(t)?t={container:t}:oUt(t)?t={gl:t}:t=nUt(t,{container:"container target element el canvas holder parent parentNode wrapper use ref root node",gl:"gl context webgl glContext",attrs:"attributes attrs contextAttributes",pixelRatio:"pixelRatio pxRatio px ratio pxratio pixelratio",width:"w width",height:"h height"},!0),t.pixelRatio||(t.pixelRatio=window.pixelRatio||1),t.gl)return t.gl;if(t.canvas&&(t.container=t.canvas.parentNode),t.container){if(typeof t.container=="string"){var r=document.querySelector(t.container);if(!r)throw Error("Element "+t.container+" is not found");t.container=r}Nqe(t.container)?(t.canvas=t.container,t.container=t.canvas.parentNode):t.canvas||(t.canvas=Uqe(),t.container.appendChild(t.canvas),Bqe(t))}else if(!t.canvas)if(typeof document!="undefined")t.container=document.body||document.documentElement,t.canvas=Uqe(),t.container.appendChild(t.canvas),Bqe(t);else throw Error("Not DOM environment. Use headless-gl.");return t.gl||["webgl","experimental-webgl","webgl-experimental"].some(function(n){try{t.gl=t.canvas.getContext(n,t.attrs)}catch(i){}return t.gl}),t.gl};function Bqe(e){if(e.container)if(e.container==document.body)document.body.style.width||(e.canvas.width=e.width||e.pixelRatio*window.innerWidth),document.body.style.height||(e.canvas.height=e.height||e.pixelRatio*window.innerHeight);else{var t=e.container.getBoundingClientRect();e.canvas.width=e.width||t.right-t.left,e.canvas.height=e.height||t.bottom-t.top}}function Nqe(e){return typeof e.getContext=="function"&&"width"in e&&"height"in e}function aUt(e){return typeof e.nodeName=="string"&&typeof e.appendChild=="function"&&typeof e.getBoundingClientRect=="function"}function oUt(e){return typeof e.drawArrays=="function"||typeof e.drawElements=="function"}function Uqe(){var e=document.createElement("canvas");return e.style.position="absolute",e.style.top=0,e.style.left=0,e}});var jqe=ye((i_r,Hqe)=>{"use strict";var sUt=eK(),lUt=[32,126];Hqe.exports=uUt;function uUt(e){e=e||{};var t=e.shape?e.shape:e.canvas?[e.canvas.width,e.canvas.height]:[512,512],r=e.canvas||document.createElement("canvas"),n=e.font,i=typeof e.step=="number"?[e.step,e.step]:e.step||[32,32],a=e.chars||lUt;if(n&&typeof n!="string"&&(n=sUt(n)),!Array.isArray(a))a=String(a).split("");else if(a.length===2&&typeof a[0]=="number"&&typeof a[1]=="number"){for(var o=[],s=a[0],l=0;s<=a[1];s++)o[l++]=String.fromCharCode(s);a=o}t=t.slice(),r.width=t[0],r.height=t[1];var u=r.getContext("2d");u.fillStyle="#000",u.fillRect(0,0,r.width,r.height),u.font=n,u.textAlign="center",u.textBaseline="middle",u.fillStyle="#fff";for(var c=i[0]/2,f=i[1]/2,s=0;s<a.length;s++)u.fillText(a[s],c,f),(c+=i[0])>t[0]-i[0]/2&&(c=i[0]/2,f+=i[1]);return r}});var nK=ye(Bh=>{"use strict";"use restrict";var iK=32;Bh.INT_BITS=iK;Bh.INT_MAX=2147483647;Bh.INT_MIN=-1<<iK-1;Bh.sign=function(e){return(e>0)-(e<0)};Bh.abs=function(e){var t=e>>iK-1;return(e^t)-t};Bh.min=function(e,t){return t^(e^t)&-(e<t)};Bh.max=function(e,t){return e^(e^t)&-(e<t)};Bh.isPow2=function(e){return!(e&e-1)&&!!e};Bh.log2=function(e){var t,r;return t=(e>65535)<<4,e>>>=t,r=(e>255)<<3,e>>>=r,t|=r,r=(e>15)<<2,e>>>=r,t|=r,r=(e>3)<<1,e>>>=r,t|=r,t|e>>1};Bh.log10=function(e){return e>=1e9?9:e>=1e8?8:e>=1e7?7:e>=1e6?6:e>=1e5?5:e>=1e4?4:e>=1e3?3:e>=100?2:e>=10?1:0};Bh.popCount=function(e){return e=e-(e>>>1&1431655765),e=(e&858993459)+(e>>>2&858993459),(e+(e>>>4)&252645135)*16843009>>>24};function Wqe(e){var t=32;return e&=-e,e&&t--,e&65535&&(t-=16),e&16711935&&(t-=8),e&252645135&&(t-=4),e&858993459&&(t-=2),e&1431655765&&(t-=1),t}Bh.countTrailingZeros=Wqe;Bh.nextPow2=function(e){return e+=e===0,--e,e|=e>>>1,e|=e>>>2,e|=e>>>4,e|=e>>>8,e|=e>>>16,e+1};Bh.prevPow2=function(e){return e|=e>>>1,e|=e>>>2,e|=e>>>4,e|=e>>>8,e|=e>>>16,e-(e>>>1)};Bh.parity=function(e){return e^=e>>>16,e^=e>>>8,e^=e>>>4,e&=15,27030>>>e&1};var Mk=new Array(256);(function(e){for(var t=0;t<256;++t){var r=t,n=t,i=7;for(r>>>=1;r;r>>>=1)n<<=1,n|=r&1,--i;e[t]=n<<i&255}})(Mk);Bh.reverse=function(e){return Mk[e&255]<<24|Mk[e>>>8&255]<<16|Mk[e>>>16&255]<<8|Mk[e>>>24&255]};Bh.interleave2=function(e,t){return e&=65535,e=(e|e<<8)&16711935,e=(e|e<<4)&252645135,e=(e|e<<2)&858993459,e=(e|e<<1)&1431655765,t&=65535,t=(t|t<<8)&16711935,t=(t|t<<4)&252645135,t=(t|t<<2)&858993459,t=(t|t<<1)&1431655765,e|t<<1};Bh.deinterleave2=function(e,t){return e=e>>>t&1431655765,e=(e|e>>>1)&858993459,e=(e|e>>>2)&252645135,e=(e|e>>>4)&16711935,e=(e|e>>>16)&65535,e<<16>>16};Bh.interleave3=function(e,t,r){return e&=1023,e=(e|e<<16)&4278190335,e=(e|e<<8)&251719695,e=(e|e<<4)&3272356035,e=(e|e<<2)&1227133513,t&=1023,t=(t|t<<16)&4278190335,t=(t|t<<8)&251719695,t=(t|t<<4)&3272356035,t=(t|t<<2)&1227133513,e|=t<<1,r&=1023,r=(r|r<<16)&4278190335,r=(r|r<<8)&251719695,r=(r|r<<4)&3272356035,r=(r|r<<2)&1227133513,e|r<<2};Bh.deinterleave3=function(e,t){return e=e>>>t&1227133513,e=(e|e>>>2)&3272356035,e=(e|e>>>4)&251719695,e=(e|e>>>8)&4278190335,e=(e|e>>>16)&1023,e<<22>>22};Bh.nextCombination=function(e){var t=e|e-1;return t+1|(~t&-~t)-1>>>Wqe(e)+1}});var Yqe=ye((a_r,Zqe)=>{"use strict";function Xqe(e,t,r){var n=e[r]|0;if(n<=0)return[];var i=new Array(n),a;if(r===e.length-1)for(a=0;a<n;++a)i[a]=t;else for(a=0;a<n;++a)i[a]=Xqe(e,t,r+1);return i}function cUt(e,t){var r,n;for(r=new Array(e),n=0;n<e;++n)r[n]=t;return r}function fUt(e,t){switch(typeof t=="undefined"&&(t=0),typeof e){case"number":if(e>0)return cUt(e|0,t);break;case"object":if(typeof e.length=="number")return Xqe(e,t,0);break}return[]}Zqe.exports=fUt});var cBe=ye(mu=>{"use strict";var fx=nK(),Ev=Yqe(),Kqe=c2().Buffer;window.__TYPEDARRAY_POOL||(window.__TYPEDARRAY_POOL={UINT8:Ev([32,0]),UINT16:Ev([32,0]),UINT32:Ev([32,0]),BIGUINT64:Ev([32,0]),INT8:Ev([32,0]),INT16:Ev([32,0]),INT32:Ev([32,0]),BIGINT64:Ev([32,0]),FLOAT:Ev([32,0]),DOUBLE:Ev([32,0]),DATA:Ev([32,0]),UINT8C:Ev([32,0]),BUFFER:Ev([32,0])});var hUt=typeof Uint8ClampedArray!="undefined",dUt=typeof BigUint64Array!="undefined",vUt=typeof BigInt64Array!="undefined",nd=window.__TYPEDARRAY_POOL;nd.UINT8C||(nd.UINT8C=Ev([32,0]));nd.BIGUINT64||(nd.BIGUINT64=Ev([32,0]));nd.BIGINT64||(nd.BIGINT64=Ev([32,0]));nd.BUFFER||(nd.BUFFER=Ev([32,0]));var _z=nd.DATA,xz=nd.BUFFER;mu.free=function(t){if(Kqe.isBuffer(t))xz[fx.log2(t.length)].push(t);else{if(Object.prototype.toString.call(t)!=="[object ArrayBuffer]"&&(t=t.buffer),!t)return;var r=t.length||t.byteLength,n=fx.log2(r)|0;_z[n].push(t)}};function Jqe(e){if(e){var t=e.length||e.byteLength,r=fx.log2(t);_z[r].push(e)}}function pUt(e){Jqe(e.buffer)}mu.freeUint8=mu.freeUint16=mu.freeUint32=mu.freeBigUint64=mu.freeInt8=mu.freeInt16=mu.freeInt32=mu.freeBigInt64=mu.freeFloat32=mu.freeFloat=mu.freeFloat64=mu.freeDouble=mu.freeUint8Clamped=mu.freeDataView=pUt;mu.freeArrayBuffer=Jqe;mu.freeBuffer=function(t){xz[fx.log2(t.length)].push(t)};mu.malloc=function(t,r){if(r===void 0||r==="arraybuffer")return Bp(t);switch(r){case"uint8":return aK(t);case"uint16":return $qe(t);case"uint32":return Qqe(t);case"int8":return eBe(t);case"int16":return tBe(t);case"int32":return rBe(t);case"float":case"float32":return iBe(t);case"double":case"float64":return nBe(t);case"uint8_clamped":return aBe(t);case"bigint64":return sBe(t);case"biguint64":return oBe(t);case"buffer":return uBe(t);case"data":case"dataview":return lBe(t);default:return null}return null};function Bp(t){var t=fx.nextPow2(t),r=fx.log2(t),n=_z[r];return n.length>0?n.pop():new ArrayBuffer(t)}mu.mallocArrayBuffer=Bp;function aK(e){return new Uint8Array(Bp(e),0,e)}mu.mallocUint8=aK;function $qe(e){return new Uint16Array(Bp(2*e),0,e)}mu.mallocUint16=$qe;function Qqe(e){return new Uint32Array(Bp(4*e),0,e)}mu.mallocUint32=Qqe;function eBe(e){return new Int8Array(Bp(e),0,e)}mu.mallocInt8=eBe;function tBe(e){return new Int16Array(Bp(2*e),0,e)}mu.mallocInt16=tBe;function rBe(e){return new Int32Array(Bp(4*e),0,e)}mu.mallocInt32=rBe;function iBe(e){return new Float32Array(Bp(4*e),0,e)}mu.mallocFloat32=mu.mallocFloat=iBe;function nBe(e){return new Float64Array(Bp(8*e),0,e)}mu.mallocFloat64=mu.mallocDouble=nBe;function aBe(e){return hUt?new Uint8ClampedArray(Bp(e),0,e):aK(e)}mu.mallocUint8Clamped=aBe;function oBe(e){return dUt?new BigUint64Array(Bp(8*e),0,e):null}mu.mallocBigUint64=oBe;function sBe(e){return vUt?new BigInt64Array(Bp(8*e),0,e):null}mu.mallocBigInt64=sBe;function lBe(e){return new DataView(Bp(e),0,e)}mu.mallocDataView=lBe;function uBe(e){e=fx.nextPow2(e);var t=fx.log2(e),r=xz[t];return r.length>0?r.pop():new Kqe(e)}mu.mallocBuffer=uBe;mu.clearCache=function(){for(var t=0;t<32;++t)nd.UINT8[t].length=0,nd.UINT16[t].length=0,nd.UINT32[t].length=0,nd.INT8[t].length=0,nd.INT16[t].length=0,nd.INT32[t].length=0,nd.FLOAT[t].length=0,nd.DOUBLE[t].length=0,nd.BIGUINT64[t].length=0,nd.BIGINT64[t].length=0,nd.UINT8C[t].length=0,_z[t].length=0,xz[t].length=0}});var hBe=ye((s_r,fBe)=>{"use strict";var gUt=Object.prototype.toString;fBe.exports=function(e){var t;return gUt.call(e)==="[object Object]"&&(t=Object.getPrototypeOf(e),t===null||t===Object.getPrototypeOf({}))}});var oK=ye((l_r,dBe)=>{dBe.exports=function(t,r){r||(r=[0,""]),t=String(t);var n=parseFloat(t,10);return r[0]=n,r[1]=t.match(/[\d.\-\+]*\s*(.*)/)[1]||"",r}});var gBe=ye((u_r,pBe)=>{"use strict";var mUt=oK();pBe.exports=vBe;var Ek=96;function sK(e,t){var r=mUt(getComputedStyle(e).getPropertyValue(t));return r[0]*vBe(r[1],e)}function yUt(e,t){var r=document.createElement("div");r.style["font-size"]="128"+e,t.appendChild(r);var n=sK(r,"font-size")/128;return t.removeChild(r),n}function vBe(e,t){switch(t=t||document.body,e=(e||"px").trim().toLowerCase(),(t===window||t===document)&&(t=document.body),e){case"%":return t.clientHeight/100;case"ch":case"ex":return yUt(e,t);case"em":return sK(t,"font-size");case"rem":return sK(document.body,"font-size");case"vw":return window.innerWidth/100;case"vh":return window.innerHeight/100;case"vmin":return Math.min(window.innerWidth,window.innerHeight)/100;case"vmax":return Math.max(window.innerWidth,window.innerHeight)/100;case"in":return Ek;case"cm":return Ek/2.54;case"mm":return Ek/25.4;case"pt":return Ek/72;case"pc":return Ek/6}return 1}});var _Be=ye((c_r,yBe)=>{"use strict";yBe.exports=Tz;var _Ut=Tz.canvas=document.createElement("canvas"),bz=_Ut.getContext("2d"),mBe=wz([32,126]);Tz.createPairs=wz;Tz.ascii=mBe;function Tz(e,t){Array.isArray(e)&&(e=e.join(", "));var r={},n,i=16,a=.05;t&&(t.length===2&&typeof t[0]=="number"?n=wz(t):Array.isArray(t)?n=t:(t.o?n=wz(t.o):t.pairs&&(n=t.pairs),t.fontSize&&(i=t.fontSize),t.threshold!=null&&(a=t.threshold))),n||(n=mBe),bz.font=i+"px "+e;for(var o=0;o<n.length;o++){var s=n[o],l=bz.measureText(s[0]).width+bz.measureText(s[1]).width,u=bz.measureText(s).width;if(Math.abs(l-u)>i*a){var c=(u-l)/i;r[s]=c*1e3}}return r}function wz(e){for(var t=[],r=e[0];r<=e[1];r++)for(var n=String.fromCharCode(r),i=e[0];i<e[1];i++){var a=String.fromCharCode(i),o=n+a;t.push(o)}return t}});var TBe=ye((f_r,wBe)=>{"use strict";wBe.exports=hx;hx.canvas=document.createElement("canvas");hx.cache={};function hx(o,t){t||(t={}),(typeof o=="string"||Array.isArray(o))&&(t.family=o);var r=Array.isArray(t.family)?t.family.join(", "):t.family;if(!r)throw Error("`family` must be defined");var n=t.size||t.fontSize||t.em||48,i=t.weight||t.fontWeight||"",a=t.style||t.fontStyle||"",o=[a,i,n].join(" ")+"px "+r,s=t.origin||"top";if(hx.cache[r]&&n<=hx.cache[r].em)return xBe(hx.cache[r],s);var l=t.canvas||hx.canvas,u=l.getContext("2d"),c={upper:t.upper!==void 0?t.upper:"H",lower:t.lower!==void 0?t.lower:"x",descent:t.descent!==void 0?t.descent:"p",ascent:t.ascent!==void 0?t.ascent:"h",tittle:t.tittle!==void 0?t.tittle:"i",overshoot:t.overshoot!==void 0?t.overshoot:"O"},f=Math.ceil(n*1.5);l.height=f,l.width=f*.5,u.font=o;var h="H",d={top:0};u.clearRect(0,0,f,f),u.textBaseline="top",u.fillStyle="black",u.fillText(h,0,0);var v=Km(u.getImageData(0,0,f,f));u.clearRect(0,0,f,f),u.textBaseline="bottom",u.fillText(h,0,f);var _=Km(u.getImageData(0,0,f,f));d.lineHeight=d.bottom=f-_+v,u.clearRect(0,0,f,f),u.textBaseline="alphabetic",u.fillText(h,0,f);var b=Km(u.getImageData(0,0,f,f)),p=f-b-1+v;d.baseline=d.alphabetic=p,u.clearRect(0,0,f,f),u.textBaseline="middle",u.fillText(h,0,f*.5);var k=Km(u.getImageData(0,0,f,f));d.median=d.middle=f-k-1+v-f*.5,u.clearRect(0,0,f,f),u.textBaseline="hanging",u.fillText(h,0,f*.5);var E=Km(u.getImageData(0,0,f,f));d.hanging=f-E-1+v-f*.5,u.clearRect(0,0,f,f),u.textBaseline="ideographic",u.fillText(h,0,f);var S=Km(u.getImageData(0,0,f,f));if(d.ideographic=f-S-1+v,c.upper&&(u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.upper,0,0),d.upper=Km(u.getImageData(0,0,f,f)),d.capHeight=d.baseline-d.upper),c.lower&&(u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.lower,0,0),d.lower=Km(u.getImageData(0,0,f,f)),d.xHeight=d.baseline-d.lower),c.tittle&&(u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.tittle,0,0),d.tittle=Km(u.getImageData(0,0,f,f))),c.ascent&&(u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.ascent,0,0),d.ascent=Km(u.getImageData(0,0,f,f))),c.descent&&(u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.descent,0,0),d.descent=bBe(u.getImageData(0,0,f,f))),c.overshoot){u.clearRect(0,0,f,f),u.textBaseline="top",u.fillText(c.overshoot,0,0);var L=bBe(u.getImageData(0,0,f,f));d.overshoot=L-p}for(var x in d)d[x]/=n;return d.em=n,hx.cache[r]=d,xBe(d,s)}function xBe(e,t){var r={};typeof t=="string"&&(t=e[t]);for(var n in e)n!=="em"&&(r[n]=e[n]-t);return r}function Km(e){for(var t=e.height,r=e.data,n=3;n<r.length;n+=4)if(r[n]!==0)return Math.floor((n-3)*.25/t)}function bBe(e){for(var t=e.height,r=e.data,n=r.length-1;n>0;n-=4)if(r[n]!==0)return Math.floor((n-3)*.25/t)}});var EBe=ye((h_r,MBe)=>{"use strict";var p5=Oqe(),xUt=Zm(),bUt=qqe(),wUt=Gqe(),TUt=NY(),lK=J_(),AUt=jqe(),dx=cBe(),SUt=i5(),MUt=hBe(),EUt=oK(),kUt=gBe(),CUt=_Be(),LUt=Oh(),PUt=TBe(),IUt=Z2(),RUt=nK(),ABe=RUt.nextPow2,SBe=new TUt,Sz=!1;document.body&&(Az=document.body.appendChild(document.createElement("div")),Az.style.font="italic small-caps bold condensed 16px/2 cursive",getComputedStyle(Az).fontStretch&&(Sz=!0),document.body.removeChild(Az));var Az,xc=function(t){DUt(t)?(t={regl:t},this.gl=t.regl._gl):this.gl=wUt(t),this.shader=SBe.get(this.gl),this.shader?this.regl=this.shader.regl:this.regl=t.regl||bUt({gl:this.gl}),this.charBuffer=this.regl.buffer({type:"uint8",usage:"stream"}),this.sizeBuffer=this.regl.buffer({type:"float",usage:"stream"}),this.shader||(this.shader=this.createShader(),SBe.set(this.gl,this.shader)),this.batch=[],this.fontSize=[],this.font=[],this.fontAtlas=[],this.draw=this.shader.draw.bind(this),this.render=function(){this.regl._refresh(),this.draw(this.batch)},this.canvas=this.gl.canvas,this.update(MUt(t)?t:{})};xc.prototype.createShader=function(){var t=this.regl,r=t({blend:{enable:!0,color:[0,0,0,1],func:{srcRGB:"src alpha",dstRGB:"one minus src alpha",srcAlpha:"one minus dst alpha",dstAlpha:"one"}},stencil:{enable:!1},depth:{enable:!1},count:t.prop("count"),offset:t.prop("offset"),attributes:{charOffset:{offset:4,stride:8,buffer:t.this("sizeBuffer")},width:{offset:0,stride:8,buffer:t.this("sizeBuffer")},char:t.this("charBuffer"),position:t.this("position")},uniforms:{atlasSize:function(i,a){return[a.atlas.width,a.atlas.height]},atlasDim:function(i,a){return[a.atlas.cols,a.atlas.rows]},atlas:function(i,a){return a.atlas.texture},charStep:function(i,a){return a.atlas.step},em:function(i,a){return a.atlas.em},color:t.prop("color"),opacity:t.prop("opacity"),viewport:t.this("viewportArray"),scale:t.this("scale"),align:t.prop("align"),baseline:t.prop("baseline"),translate:t.this("translate"),positionOffset:t.prop("positionOffset")},primitive:"points",viewport:t.this("viewport"),vert:`
+			precision highp float;
+			attribute float width, charOffset, char;
+			attribute vec2 position;
+			uniform float fontSize, charStep, em, align, baseline;
+			uniform vec4 viewport;
+			uniform vec4 color;
+			uniform vec2 atlasSize, atlasDim, scale, translate, positionOffset;
+			varying vec2 charCoord, charId;
+			varying float charWidth;
+			varying vec4 fontColor;
+			void main () {
+				vec2 offset = floor(em * (vec2(align + charOffset, baseline)
+					+ vec2(positionOffset.x, -positionOffset.y)))
+					/ (viewport.zw * scale.xy);
+
+				vec2 position = (position + translate) * scale;
+				position += offset * scale;
+
+				charCoord = position * viewport.zw + viewport.xy;
+
+				gl_Position = vec4(position * 2. - 1., 0, 1);
+
+				gl_PointSize = charStep;
+
+				charId.x = mod(char, atlasDim.x);
+				charId.y = floor(char / atlasDim.x);
+
+				charWidth = width * em;
+
+				fontColor = color / 255.;
+			}`,frag:`
+			precision highp float;
+			uniform float fontSize, charStep, opacity;
+			uniform vec2 atlasSize;
+			uniform vec4 viewport;
+			uniform sampler2D atlas;
+			varying vec4 fontColor;
+			varying vec2 charCoord, charId;
+			varying float charWidth;
+
+			float lightness(vec4 color) {
+				return color.r * 0.299 + color.g * 0.587 + color.b * 0.114;
+			}
+
+			void main () {
+				vec2 uv = gl_FragCoord.xy - charCoord + charStep * .5;
+				float halfCharStep = floor(charStep * .5 + .5);
+
+				// invert y and shift by 1px (FF expecially needs that)
+				uv.y = charStep - uv.y;
+
+				// ignore points outside of character bounding box
+				float halfCharWidth = ceil(charWidth * .5);
+				if (floor(uv.x) > halfCharStep + halfCharWidth ||
+					floor(uv.x) < halfCharStep - halfCharWidth) return;
+
+				uv += charId * charStep;
+				uv = uv / atlasSize;
+
+				vec4 color = fontColor;
+				vec4 mask = texture2D(atlas, uv);
+
+				float maskY = lightness(mask);
+				// float colorY = lightness(color);
+				color.a *= maskY;
+				color.a *= opacity;
+
+				// color.a += .1;
+
+				// antialiasing, see yiq color space y-channel formula
+				// color.rgb += (1. - color.rgb) * (1. - mask.rgb);
+
+				gl_FragColor = color;
+			}`}),n={};return{regl:t,draw:r,atlas:n}};xc.prototype.update=function(t){var r=this;if(typeof t=="string")t={text:t};else if(!t)return;t=xUt(t,{position:"position positions coord coords coordinates",font:"font fontFace fontface typeface cssFont css-font family fontFamily",fontSize:"fontSize fontsize size font-size",text:"text texts chars characters value values symbols",align:"align alignment textAlign textbaseline",baseline:"baseline textBaseline textbaseline",direction:"dir direction textDirection",color:"color colour fill fill-color fillColor textColor textcolor",kerning:"kerning kern",range:"range dataBox",viewport:"vp viewport viewBox viewbox viewPort",opacity:"opacity alpha transparency visible visibility opaque",offset:"offset positionOffset padding shift indent indentation"},!0),t.opacity!=null&&(Array.isArray(t.opacity)?this.opacity=t.opacity.map(function(Ce){return parseFloat(Ce)}):this.opacity=parseFloat(t.opacity)),t.viewport!=null&&(this.viewport=SUt(t.viewport),this.viewportArray=[this.viewport.x,this.viewport.y,this.viewport.width,this.viewport.height]),this.viewport==null&&(this.viewport={x:0,y:0,width:this.gl.drawingBufferWidth,height:this.gl.drawingBufferHeight},this.viewportArray=[this.viewport.x,this.viewport.y,this.viewport.width,this.viewport.height]),t.kerning!=null&&(this.kerning=t.kerning),t.offset!=null&&(typeof t.offset=="number"&&(t.offset=[t.offset,0]),this.positionOffset=IUt(t.offset)),t.direction&&(this.direction=t.direction),t.range&&(this.range=t.range,this.scale=[1/(t.range[2]-t.range[0]),1/(t.range[3]-t.range[1])],this.translate=[-t.range[0],-t.range[1]]),t.scale&&(this.scale=t.scale),t.translate&&(this.translate=t.translate),this.scale||(this.scale=[1/this.viewport.width,1/this.viewport.height]),this.translate||(this.translate=[0,0]),!this.font.length&&!t.font&&(t.font=xc.baseFontSize+"px sans-serif");var n=!1,i=!1;if(t.font&&(Array.isArray(t.font)?t.font:[t.font]).forEach(function(Ce,me){if(typeof Ce=="string")try{Ce=p5.parse(Ce)}catch(Ze){Ce=p5.parse(xc.baseFontSize+"px "+Ce)}else{var ie=Ce.style,Se=Ce.weight,Le=Ce.stretch,Ae=Ce.variant;Ce=p5.parse(p5.stringify(Ce)),ie&&(Ce.style=ie),Se&&(Ce.weight=Se),Le&&(Ce.stretch=Le),Ae&&(Ce.variant=Ae)}var Fe=p5.stringify({size:xc.baseFontSize,family:Ce.family,stretch:Sz?Ce.stretch:void 0,variant:Ce.variant,weight:Ce.weight,style:Ce.style}),Pe=EUt(Ce.size),ge=Math.round(Pe[0]*kUt(Pe[1]));if(ge!==r.fontSize[me]&&(i=!0,r.fontSize[me]=ge),(!r.font[me]||Fe!=r.font[me].baseString)&&(n=!0,r.font[me]=xc.fonts[Fe],!r.font[me])){var Re=Ce.family.join(", "),ce=[Ce.style];Ce.style!=Ce.variant&&ce.push(Ce.variant),Ce.variant!=Ce.weight&&ce.push(Ce.weight),Sz&&Ce.weight!=Ce.stretch&&ce.push(Ce.stretch),r.font[me]={baseString:Fe,family:Re,weight:Ce.weight,stretch:Ce.stretch,style:Ce.style,variant:Ce.variant,width:{},kerning:{},metrics:PUt(Re,{origin:"top",fontSize:xc.baseFontSize,fontStyle:ce.join(" ")})},xc.fonts[Fe]=r.font[me]}}),(n||i)&&this.font.forEach(function(Ce,me){var ie=p5.stringify({size:r.fontSize[me],family:Ce.family,stretch:Sz?Ce.stretch:void 0,variant:Ce.variant,weight:Ce.weight,style:Ce.style});if(r.fontAtlas[me]=r.shader.atlas[ie],!r.fontAtlas[me]){var Se=Ce.metrics;r.shader.atlas[ie]=r.fontAtlas[me]={fontString:ie,step:Math.ceil(r.fontSize[me]*Se.bottom*.5)*2,em:r.fontSize[me],cols:0,rows:0,height:0,width:0,chars:[],ids:{},texture:r.regl.texture()}}t.text==null&&(t.text=r.text)}),typeof t.text=="string"&&t.position&&t.position.length>2){for(var a=Array(t.position.length*.5),o=0;o<a.length;o++)a[o]=t.text;t.text=a}var s;if(t.text!=null||n){if(this.textOffsets=[0],Array.isArray(t.text)){this.count=t.text[0].length,this.counts=[this.count];for(var l=1;l<t.text.length;l++)this.textOffsets[l]=this.textOffsets[l-1]+t.text[l-1].length,this.count+=t.text[l].length,this.counts.push(t.text[l].length);this.text=t.text.join("")}else this.text=t.text,this.count=this.text.length,this.counts=[this.count];s=[],this.font.forEach(function(Ce,me){xc.atlasContext.font=Ce.baseString;for(var ie=r.fontAtlas[me],Se=0;Se<r.text.length;Se++){var Le=r.text.charAt(Se);if(ie.ids[Le]==null&&(ie.ids[Le]=ie.chars.length,ie.chars.push(Le),s.push(Le)),Ce.width[Le]==null&&(Ce.width[Le]=xc.atlasContext.measureText(Le).width/xc.baseFontSize,r.kerning)){var Ae=[];for(var Fe in Ce.width)Ae.push(Fe+Le,Le+Fe);LUt(Ce.kerning,CUt(Ce.family,{pairs:Ae}))}}})}if(t.position)if(t.position.length>2){for(var u=!t.position[0].length,c=dx.mallocFloat(this.count*2),f=0,h=0;f<this.counts.length;f++){var d=this.counts[f];if(u)for(var v=0;v<d;v++)c[h++]=t.position[f*2],c[h++]=t.position[f*2+1];else for(var _=0;_<d;_++)c[h++]=t.position[f][0],c[h++]=t.position[f][1]}this.position.call?this.position({type:"float",data:c}):this.position=this.regl.buffer({type:"float",data:c}),dx.freeFloat(c)}else this.position.destroy&&this.position.destroy(),this.position={constant:t.position};if(t.text||n){var b=dx.mallocUint8(this.count),p=dx.mallocFloat(this.count*2);this.textWidth=[];for(var k=0,E=0;k<this.counts.length;k++){for(var S=this.counts[k],L=this.font[k]||this.font[0],x=this.fontAtlas[k]||this.fontAtlas[0],C=0;C<S;C++){var M=this.text.charAt(E),g=this.text.charAt(E-1);if(b[E]=x.ids[M],p[E*2]=L.width[M],C){var P=p[E*2-2],T=p[E*2],z=p[E*2-1],O=z+P*.5+T*.5;if(this.kerning){var V=L.kerning[g+M];V&&(O+=V*.001)}p[E*2+1]=O}else p[E*2+1]=p[E*2]*.5;E++}this.textWidth.push(p.length?p[E*2-2]*.5+p[E*2-1]:0)}t.align||(t.align=this.align),this.charBuffer({data:b,type:"uint8",usage:"stream"}),this.sizeBuffer({data:p,type:"float",usage:"stream"}),dx.freeUint8(b),dx.freeFloat(p),s.length&&this.font.forEach(function(Ce,me){var ie=r.fontAtlas[me],Se=ie.step,Le=Math.floor(xc.maxAtlasSize/Se),Ae=Math.min(Le,ie.chars.length),Fe=Math.ceil(ie.chars.length/Ae),Pe=ABe(Ae*Se),ge=ABe(Fe*Se);ie.width=Pe,ie.height=ge,ie.rows=Fe,ie.cols=Ae,ie.em&&ie.texture({data:AUt({canvas:xc.atlasCanvas,font:ie.fontString,chars:ie.chars,shape:[Pe,ge],step:[Se,Se]})})})}if(t.align&&(this.align=t.align,this.alignOffset=this.textWidth.map(function(Ce,me){var ie=Array.isArray(r.align)?r.align.length>1?r.align[me]:r.align[0]:r.align;if(typeof ie=="number")return ie;switch(ie){case"right":case"end":return-Ce;case"center":case"centre":case"middle":return-Ce*.5}return 0})),this.baseline==null&&t.baseline==null&&(t.baseline=0),t.baseline!=null&&(this.baseline=t.baseline,Array.isArray(this.baseline)||(this.baseline=[this.baseline]),this.baselineOffset=this.baseline.map(function(Ce,me){var ie=(r.font[me]||r.font[0]).metrics,Se=0;return Se+=ie.bottom*.5,typeof Ce=="number"?Se+=Ce-ie.baseline:Se+=-ie[Ce],Se*=-1,Se})),t.color!=null)if(t.color||(t.color="transparent"),typeof t.color=="string"||!isNaN(t.color))this.color=lK(t.color,"uint8");else{var G;if(typeof t.color[0]=="number"&&t.color.length>this.counts.length){var Z=t.color.length;G=dx.mallocUint8(Z);for(var j=(t.color.subarray||t.color.slice).bind(t.color),N=0;N<Z;N+=4)G.set(lK(j(N,N+4),"uint8"),N)}else{var H=t.color.length;G=dx.mallocUint8(H*4);for(var te=0;te<H;te++)G.set(lK(t.color[te]||0,"uint8"),te*4)}this.color=G}if(t.position||t.text||t.color||t.baseline||t.align||t.font||t.offset||t.opacity){var oe=this.color.length>4||this.baselineOffset.length>1||this.align&&this.align.length>1||this.fontAtlas.length>1||this.positionOffset.length>2;if(oe){var _e=Math.max(this.position.length*.5||0,this.color.length*.25||0,this.baselineOffset.length||0,this.alignOffset.length||0,this.font.length||0,this.opacity.length||0,this.positionOffset.length*.5||0);this.batch=Array(_e);for(var Ee=0;Ee<this.batch.length;Ee++)this.batch[Ee]={count:this.counts.length>1?this.counts[Ee]:this.counts[0],offset:this.textOffsets.length>1?this.textOffsets[Ee]:this.textOffsets[0],color:this.color?this.color.length<=4?this.color:this.color.subarray(Ee*4,Ee*4+4):[0,0,0,255],opacity:Array.isArray(this.opacity)?this.opacity[Ee]:this.opacity,baseline:this.baselineOffset[Ee]!=null?this.baselineOffset[Ee]:this.baselineOffset[0],align:this.align?this.alignOffset[Ee]!=null?this.alignOffset[Ee]:this.alignOffset[0]:0,atlas:this.fontAtlas[Ee]||this.fontAtlas[0],positionOffset:this.positionOffset.length>2?this.positionOffset.subarray(Ee*2,Ee*2+2):this.positionOffset}}else this.count?this.batch=[{count:this.count,offset:0,color:this.color||[0,0,0,255],opacity:Array.isArray(this.opacity)?this.opacity[0]:this.opacity,baseline:this.baselineOffset[0],align:this.alignOffset?this.alignOffset[0]:0,atlas:this.fontAtlas[0],positionOffset:this.positionOffset}]:this.batch=[]}};xc.prototype.destroy=function(){};xc.prototype.kerning=!0;xc.prototype.position={constant:new Float32Array(2)};xc.prototype.translate=null;xc.prototype.scale=null;xc.prototype.font=null;xc.prototype.text="";xc.prototype.positionOffset=[0,0];xc.prototype.opacity=1;xc.prototype.color=new Uint8Array([0,0,0,255]);xc.prototype.alignOffset=[0,0];xc.maxAtlasSize=1024;xc.atlasCanvas=document.createElement("canvas");xc.atlasContext=xc.atlasCanvas.getContext("2d",{alpha:!1});xc.baseFontSize=64;xc.fonts={};function DUt(e){return typeof e=="function"&&e._gl&&e.prop&&e.texture&&e.buffer}MBe.exports=xc});var kBe=ye((uK,cK)=>{(function(e,t){typeof uK=="object"&&typeof cK!="undefined"?cK.exports=t():e.createREGL=t()})(uK,function(){"use strict";var e=function(Me,bt){for(var zt=Object.keys(bt),Rr=0;Rr<zt.length;++Rr)Me[zt[Rr]]=bt[zt[Rr]];return Me},t=0,r=0,n=5,i=6;function a(Me,bt){this.id=t++,this.type=Me,this.data=bt}function o(Me){return Me.replace(/\\/g,"\\\\").replace(/"/g,'\\"')}function s(Me){if(Me.length===0)return[];var bt=Me.charAt(0),zt=Me.charAt(Me.length-1);if(Me.length>1&&bt===zt&&(bt==='"'||bt==="'"))return['"'+o(Me.substr(1,Me.length-2))+'"'];var Rr=/\[(false|true|null|\d+|'[^']*'|"[^"]*")\]/.exec(Me);if(Rr)return s(Me.substr(0,Rr.index)).concat(s(Rr[1])).concat(s(Me.substr(Rr.index+Rr[0].length)));var jr=Me.split(".");if(jr.length===1)return['"'+o(Me)+'"'];for(var Nr=[],Gr=0;Gr<jr.length;++Gr)Nr=Nr.concat(s(jr[Gr]));return Nr}function l(Me){return"["+s(Me).join("][")+"]"}function u(Me,bt){return new a(Me,l(bt+""))}function c(Me){return typeof Me=="function"&&!Me._reglType||Me instanceof a}function f(Me,bt){if(typeof Me=="function")return new a(r,Me);if(typeof Me=="number"||typeof Me=="boolean")return new a(n,Me);if(Array.isArray(Me))return new a(i,Me.map(function(zt,Rr){return f(zt,bt+"["+Rr+"]")}));if(Me instanceof a)return Me}var h={DynamicVariable:a,define:u,isDynamic:c,unbox:f,accessor:l},d={next:typeof requestAnimationFrame=="function"?function(Me){return requestAnimationFrame(Me)}:function(Me){return setTimeout(Me,16)},cancel:typeof cancelAnimationFrame=="function"?function(Me){return cancelAnimationFrame(Me)}:clearTimeout},v=typeof performance!="undefined"&&performance.now?function(){return performance.now()}:function(){return+new Date};function _(){var Me={"":0},bt=[""];return{id:function(zt){var Rr=Me[zt];return Rr||(Rr=Me[zt]=bt.length,bt.push(zt),Rr)},str:function(zt){return bt[zt]}}}function b(Me,bt,zt){var Rr=document.createElement("canvas");e(Rr.style,{border:0,margin:0,padding:0,top:0,left:0,width:"100%",height:"100%"}),Me.appendChild(Rr),Me===document.body&&(Rr.style.position="absolute",e(Me.style,{margin:0,padding:0}));function jr(){var mi=window.innerWidth,Ui=window.innerHeight;if(Me!==document.body){var qi=Rr.getBoundingClientRect();mi=qi.right-qi.left,Ui=qi.bottom-qi.top}Rr.width=zt*mi,Rr.height=zt*Ui}var Nr;Me!==document.body&&typeof ResizeObserver=="function"?(Nr=new ResizeObserver(function(){setTimeout(jr)}),Nr.observe(Me)):window.addEventListener("resize",jr,!1);function Gr(){Nr?Nr.disconnect():window.removeEventListener("resize",jr),Me.removeChild(Rr)}return jr(),{canvas:Rr,onDestroy:Gr}}function p(Me,bt){function zt(Rr){try{return Me.getContext(Rr,bt)}catch(jr){return null}}return zt("webgl")||zt("experimental-webgl")||zt("webgl-experimental")}function k(Me){return typeof Me.nodeName=="string"&&typeof Me.appendChild=="function"&&typeof Me.getBoundingClientRect=="function"}function E(Me){return typeof Me.drawArrays=="function"||typeof Me.drawElements=="function"}function S(Me){return typeof Me=="string"?Me.split():Me}function L(Me){return typeof Me=="string"?document.querySelector(Me):Me}function x(Me){var bt=Me||{},zt,Rr,jr,Nr,Gr={},mi=[],Ui=[],qi=typeof window=="undefined"?1:window.devicePixelRatio,Ei=!1,Hn={},en=function(Mr){},Wi=function(){};if(typeof bt=="string"?zt=document.querySelector(bt):typeof bt=="object"&&(k(bt)?zt=bt:E(bt)?(Nr=bt,jr=Nr.canvas):("gl"in bt?Nr=bt.gl:"canvas"in bt?jr=L(bt.canvas):"container"in bt&&(Rr=L(bt.container)),"attributes"in bt&&(Gr=bt.attributes),"extensions"in bt&&(mi=S(bt.extensions)),"optionalExtensions"in bt&&(Ui=S(bt.optionalExtensions)),"onDone"in bt&&(en=bt.onDone),"profile"in bt&&(Ei=!!bt.profile),"pixelRatio"in bt&&(qi=+bt.pixelRatio),"cachedCode"in bt&&(Hn=bt.cachedCode))),zt&&(zt.nodeName.toLowerCase()==="canvas"?jr=zt:Rr=zt),!Nr){if(!jr){var si=b(Rr||document.body,en,qi);if(!si)return null;jr=si.canvas,Wi=si.onDestroy}Gr.premultipliedAlpha===void 0&&(Gr.premultipliedAlpha=!0),Nr=p(jr,Gr)}return Nr?{gl:Nr,canvas:jr,container:Rr,extensions:mi,optionalExtensions:Ui,pixelRatio:qi,profile:Ei,cachedCode:Hn,onDone:en,onDestroy:Wi}:(Wi(),en("webgl not supported, try upgrading your browser or graphics drivers http://get.webgl.org"),null)}function C(Me,bt){var zt={};function Rr(Gr){var mi=Gr.toLowerCase(),Ui;try{Ui=zt[mi]=Me.getExtension(mi)}catch(qi){}return!!Ui}for(var jr=0;jr<bt.extensions.length;++jr){var Nr=bt.extensions[jr];if(!Rr(Nr))return bt.onDestroy(),bt.onDone('"'+Nr+'" extension is not supported by the current WebGL context, try upgrading your system or a different browser'),null}return bt.optionalExtensions.forEach(Rr),{extensions:zt,restore:function(){Object.keys(zt).forEach(function(Gr){if(zt[Gr]&&!Rr(Gr))throw new Error("(regl): error restoring extension "+Gr)})}}}function M(Me,bt){for(var zt=Array(Me),Rr=0;Rr<Me;++Rr)zt[Rr]=bt(Rr);return zt}var g=5120,P=5121,T=5122,z=5123,O=5124,V=5125,G=5126;function Z(Me){for(var bt=16;bt<=1<<28;bt*=16)if(Me<=bt)return bt;return 0}function j(Me){var bt,zt;return bt=(Me>65535)<<4,Me>>>=bt,zt=(Me>255)<<3,Me>>>=zt,bt|=zt,zt=(Me>15)<<2,Me>>>=zt,bt|=zt,zt=(Me>3)<<1,Me>>>=zt,bt|=zt,bt|Me>>1}function N(){var Me=M(8,function(){return[]});function bt(Nr){var Gr=Z(Nr),mi=Me[j(Gr)>>2];return mi.length>0?mi.pop():new ArrayBuffer(Gr)}function zt(Nr){Me[j(Nr.byteLength)>>2].push(Nr)}function Rr(Nr,Gr){var mi=null;switch(Nr){case g:mi=new Int8Array(bt(Gr),0,Gr);break;case P:mi=new Uint8Array(bt(Gr),0,Gr);break;case T:mi=new Int16Array(bt(2*Gr),0,Gr);break;case z:mi=new Uint16Array(bt(2*Gr),0,Gr);break;case O:mi=new Int32Array(bt(4*Gr),0,Gr);break;case V:mi=new Uint32Array(bt(4*Gr),0,Gr);break;case G:mi=new Float32Array(bt(4*Gr),0,Gr);break;default:return null}return mi.length!==Gr?mi.subarray(0,Gr):mi}function jr(Nr){zt(Nr.buffer)}return{alloc:bt,free:zt,allocType:Rr,freeType:jr}}var H=N();H.zero=N();var te=3408,oe=3410,_e=3411,Ee=3412,Ce=3413,me=3414,ie=3415,Se=33901,Le=33902,Ae=3379,Fe=3386,Pe=34921,ge=36347,Re=36348,ce=35661,Ze=35660,ut=34930,pt=36349,Zt=34076,st=34024,lt=7936,Gt=7937,Nt=7938,Jt=35724,sr=34047,wr=36063,cr=34852,$e=3553,St=34067,Qt=34069,Vt=33984,_t=6408,It=5126,mt=5121,er=36160,lr=36053,Tr=36064,Lr=16384,ti=function(Me,bt){var zt=1;bt.ext_texture_filter_anisotropic&&(zt=Me.getParameter(sr));var Rr=1,jr=1;bt.webgl_draw_buffers&&(Rr=Me.getParameter(cr),jr=Me.getParameter(wr));var Nr=!!bt.oes_texture_float;if(Nr){var Gr=Me.createTexture();Me.bindTexture($e,Gr),Me.texImage2D($e,0,_t,1,1,0,_t,It,null);var mi=Me.createFramebuffer();if(Me.bindFramebuffer(er,mi),Me.framebufferTexture2D(er,Tr,$e,Gr,0),Me.bindTexture($e,null),Me.checkFramebufferStatus(er)!==lr)Nr=!1;else{Me.viewport(0,0,1,1),Me.clearColor(1,0,0,1),Me.clear(Lr);var Ui=H.allocType(It,4);Me.readPixels(0,0,1,1,_t,It,Ui),Me.getError()?Nr=!1:(Me.deleteFramebuffer(mi),Me.deleteTexture(Gr),Nr=Ui[0]===1),H.freeType(Ui)}}var qi=typeof navigator!="undefined"&&(/MSIE/.test(navigator.userAgent)||/Trident\//.test(navigator.appVersion)||/Edge/.test(navigator.userAgent)),Ei=!0;if(!qi){var Hn=Me.createTexture(),en=H.allocType(mt,36);Me.activeTexture(Vt),Me.bindTexture(St,Hn),Me.texImage2D(Qt,0,_t,3,3,0,_t,mt,en),H.freeType(en),Me.bindTexture(St,null),Me.deleteTexture(Hn),Ei=!Me.getError()}return{colorBits:[Me.getParameter(oe),Me.getParameter(_e),Me.getParameter(Ee),Me.getParameter(Ce)],depthBits:Me.getParameter(me),stencilBits:Me.getParameter(ie),subpixelBits:Me.getParameter(te),extensions:Object.keys(bt).filter(function(Wi){return!!bt[Wi]}),maxAnisotropic:zt,maxDrawbuffers:Rr,maxColorAttachments:jr,pointSizeDims:Me.getParameter(Se),lineWidthDims:Me.getParameter(Le),maxViewportDims:Me.getParameter(Fe),maxCombinedTextureUnits:Me.getParameter(ce),maxCubeMapSize:Me.getParameter(Zt),maxRenderbufferSize:Me.getParameter(st),maxTextureUnits:Me.getParameter(ut),maxTextureSize:Me.getParameter(Ae),maxAttributes:Me.getParameter(Pe),maxVertexUniforms:Me.getParameter(ge),maxVertexTextureUnits:Me.getParameter(Ze),maxVaryingVectors:Me.getParameter(Re),maxFragmentUniforms:Me.getParameter(pt),glsl:Me.getParameter(Jt),renderer:Me.getParameter(Gt),vendor:Me.getParameter(lt),version:Me.getParameter(Nt),readFloat:Nr,npotTextureCube:Ei}},Br=function(Me){return Me instanceof Uint8Array||Me instanceof Uint16Array||Me instanceof Uint32Array||Me instanceof Int8Array||Me instanceof Int16Array||Me instanceof Int32Array||Me instanceof Float32Array||Me instanceof Float64Array||Me instanceof Uint8ClampedArray};function Vr(Me){return!!Me&&typeof Me=="object"&&Array.isArray(Me.shape)&&Array.isArray(Me.stride)&&typeof Me.offset=="number"&&Me.shape.length===Me.stride.length&&(Array.isArray(Me.data)||Br(Me.data))}var dt=function(Me){return Object.keys(Me).map(function(bt){return Me[bt]})},Ge={shape:xe,flatten:Ie};function Je(Me,bt,zt){for(var Rr=0;Rr<bt;++Rr)zt[Rr]=Me[Rr]}function je(Me,bt,zt,Rr){for(var jr=0,Nr=0;Nr<bt;++Nr)for(var Gr=Me[Nr],mi=0;mi<zt;++mi)Rr[jr++]=Gr[mi]}function tt(Me,bt,zt,Rr,jr,Nr){for(var Gr=Nr,mi=0;mi<bt;++mi)for(var Ui=Me[mi],qi=0;qi<zt;++qi)for(var Ei=Ui[qi],Hn=0;Hn<Rr;++Hn)jr[Gr++]=Ei[Hn]}function xt(Me,bt,zt,Rr,jr){for(var Nr=1,Gr=zt+1;Gr<bt.length;++Gr)Nr*=bt[Gr];var mi=bt[zt];if(bt.length-zt===4){var Ui=bt[zt+1],qi=bt[zt+2],Ei=bt[zt+3];for(Gr=0;Gr<mi;++Gr)tt(Me[Gr],Ui,qi,Ei,Rr,jr),jr+=Nr}else for(Gr=0;Gr<mi;++Gr)xt(Me[Gr],bt,zt+1,Rr,jr),jr+=Nr}function Ie(Me,bt,zt,Rr){var jr=1;if(bt.length)for(var Nr=0;Nr<bt.length;++Nr)jr*=bt[Nr];else jr=0;var Gr=Rr||H.allocType(zt,jr);switch(bt.length){case 0:break;case 1:Je(Me,bt[0],Gr);break;case 2:je(Me,bt[0],bt[1],Gr);break;case 3:tt(Me,bt[0],bt[1],bt[2],Gr,0);break;default:xt(Me,bt,0,Gr,0)}return Gr}function xe(Me){for(var bt=[],zt=Me;zt.length;zt=zt[0])bt.push(zt.length);return bt}var ke={"[object Int8Array]":5120,"[object Int16Array]":5122,"[object Int32Array]":5124,"[object Uint8Array]":5121,"[object Uint8ClampedArray]":5121,"[object Uint16Array]":5123,"[object Uint32Array]":5125,"[object Float32Array]":5126,"[object Float64Array]":5121,"[object ArrayBuffer]":5121},vt=5120,ir=5122,ar=5124,vr=5121,ii=5123,pi=5125,$r=5126,di=5126,ji={int8:vt,int16:ir,int32:ar,uint8:vr,uint16:ii,uint32:pi,float:$r,float32:di},In=35048,wi=35040,On={dynamic:In,stream:wi,static:35044},qn=Ge.flatten,Fn=Ge.shape,ra=35044,la=35040,Ut=5121,wt=5126,rr=[];rr[5120]=1,rr[5122]=2,rr[5124]=4,rr[5121]=1,rr[5123]=2,rr[5125]=4,rr[5126]=4;function nr(Me){return ke[Object.prototype.toString.call(Me)]|0}function Er(Me,bt){for(var zt=0;zt<bt.length;++zt)Me[zt]=bt[zt]}function Xr(Me,bt,zt,Rr,jr,Nr,Gr){for(var mi=0,Ui=0;Ui<zt;++Ui)for(var qi=0;qi<Rr;++qi)Me[mi++]=bt[jr*Ui+Nr*qi+Gr]}function ri(Me,bt,zt,Rr){var jr=0,Nr={};function Gr(Mr){this.id=jr++,this.buffer=Me.createBuffer(),this.type=Mr,this.usage=ra,this.byteLength=0,this.dimension=1,this.dtype=Ut,this.persistentData=null,zt.profile&&(this.stats={size:0})}Gr.prototype.bind=function(){Me.bindBuffer(this.type,this.buffer)},Gr.prototype.destroy=function(){en(this)};var mi=[];function Ui(Mr,Yr){var xi=mi.pop();return xi||(xi=new Gr(Mr)),xi.bind(),Hn(xi,Yr,la,0,1,!1),xi}function qi(Mr){mi.push(Mr)}function Ei(Mr,Yr,xi){Mr.byteLength=Yr.byteLength,Me.bufferData(Mr.type,Yr,xi)}function Hn(Mr,Yr,xi,Ri,ci,an){var Zi;if(Mr.usage=xi,Array.isArray(Yr)){if(Mr.dtype=Ri||wt,Yr.length>0){var Bn;if(Array.isArray(Yr[0])){Zi=Fn(Yr);for(var hi=1,li=1;li<Zi.length;++li)hi*=Zi[li];Mr.dimension=hi,Bn=qn(Yr,Zi,Mr.dtype),Ei(Mr,Bn,xi),an?Mr.persistentData=Bn:H.freeType(Bn)}else if(typeof Yr[0]=="number"){Mr.dimension=ci;var mn=H.allocType(Mr.dtype,Yr.length);Er(mn,Yr),Ei(Mr,mn,xi),an?Mr.persistentData=mn:H.freeType(mn)}else Br(Yr[0])&&(Mr.dimension=Yr[0].length,Mr.dtype=Ri||nr(Yr[0])||wt,Bn=qn(Yr,[Yr.length,Yr[0].length],Mr.dtype),Ei(Mr,Bn,xi),an?Mr.persistentData=Bn:H.freeType(Bn))}}else if(Br(Yr))Mr.dtype=Ri||nr(Yr),Mr.dimension=ci,Ei(Mr,Yr,xi),an&&(Mr.persistentData=new Uint8Array(new Uint8Array(Yr.buffer)));else if(Vr(Yr)){Zi=Yr.shape;var Ji=Yr.stride,Vi=Yr.offset,Ni=0,pn=0,Vn=0,na=0;Zi.length===1?(Ni=Zi[0],pn=1,Vn=Ji[0],na=0):Zi.length===2&&(Ni=Zi[0],pn=Zi[1],Vn=Ji[0],na=Ji[1]),Mr.dtype=Ri||nr(Yr.data)||wt,Mr.dimension=pn;var Ki=H.allocType(Mr.dtype,Ni*pn);Xr(Ki,Yr.data,Ni,pn,Vn,na,Vi),Ei(Mr,Ki,xi),an?Mr.persistentData=Ki:H.freeType(Ki)}else Yr instanceof ArrayBuffer&&(Mr.dtype=Ut,Mr.dimension=ci,Ei(Mr,Yr,xi),an&&(Mr.persistentData=new Uint8Array(new Uint8Array(Yr))))}function en(Mr){bt.bufferCount--,Rr(Mr);var Yr=Mr.buffer;Me.deleteBuffer(Yr),Mr.buffer=null,delete Nr[Mr.id]}function Wi(Mr,Yr,xi,Ri){bt.bufferCount++;var ci=new Gr(Yr);Nr[ci.id]=ci;function an(hi){var li=ra,mn=null,Ji=0,Vi=0,Ni=1;return Array.isArray(hi)||Br(hi)||Vr(hi)||hi instanceof ArrayBuffer?mn=hi:typeof hi=="number"?Ji=hi|0:hi&&("data"in hi&&(mn=hi.data),"usage"in hi&&(li=On[hi.usage]),"type"in hi&&(Vi=ji[hi.type]),"dimension"in hi&&(Ni=hi.dimension|0),"length"in hi&&(Ji=hi.length|0)),ci.bind(),mn?Hn(ci,mn,li,Vi,Ni,Ri):(Ji&&Me.bufferData(ci.type,Ji,li),ci.dtype=Vi||Ut,ci.usage=li,ci.dimension=Ni,ci.byteLength=Ji),zt.profile&&(ci.stats.size=ci.byteLength*rr[ci.dtype]),an}function Zi(hi,li){Me.bufferSubData(ci.type,li,hi)}function Bn(hi,li){var mn=(li||0)|0,Ji;if(ci.bind(),Br(hi)||hi instanceof ArrayBuffer)Zi(hi,mn);else if(Array.isArray(hi)){if(hi.length>0){if(typeof hi[0]=="number"){var Vi=H.allocType(ci.dtype,hi.length);Er(Vi,hi),Zi(Vi,mn),H.freeType(Vi)}else if(Array.isArray(hi[0])||Br(hi[0])){Ji=Fn(hi);var Ni=qn(hi,Ji,ci.dtype);Zi(Ni,mn),H.freeType(Ni)}}}else if(Vr(hi)){Ji=hi.shape;var pn=hi.stride,Vn=0,na=0,Ki=0,kn=0;Ji.length===1?(Vn=Ji[0],na=1,Ki=pn[0],kn=0):Ji.length===2&&(Vn=Ji[0],na=Ji[1],Ki=pn[0],kn=pn[1]);var ta=Array.isArray(hi.data)?ci.dtype:nr(hi.data),oa=H.allocType(ta,Vn*na);Xr(oa,hi.data,Vn,na,Ki,kn,hi.offset),Zi(oa,mn),H.freeType(oa)}return an}return xi||an(Mr),an._reglType="buffer",an._buffer=ci,an.subdata=Bn,zt.profile&&(an.stats=ci.stats),an.destroy=function(){en(ci)},an}function si(){dt(Nr).forEach(function(Mr){Mr.buffer=Me.createBuffer(),Me.bindBuffer(Mr.type,Mr.buffer),Me.bufferData(Mr.type,Mr.persistentData||Mr.byteLength,Mr.usage)})}return zt.profile&&(bt.getTotalBufferSize=function(){var Mr=0;return Object.keys(Nr).forEach(function(Yr){Mr+=Nr[Yr].stats.size}),Mr}),{create:Wi,createStream:Ui,destroyStream:qi,clear:function(){dt(Nr).forEach(en),mi.forEach(en)},getBuffer:function(Mr){return Mr&&Mr._buffer instanceof Gr?Mr._buffer:null},restore:si,_initBuffer:Hn}}var Qr=0,Oi=0,$i=1,tn=1,fn=4,yn=4,Sn={points:Qr,point:Oi,lines:$i,line:tn,triangles:fn,triangle:yn,"line loop":2,"line strip":3,"triangle strip":5,"triangle fan":6},Ba=0,ua=1,ma=4,Wa=5120,Fa=5121,Wo=5122,da=5123,Wn=5124,Ha=5125,vo=34963,jn=35040,Mt=35044;function kr(Me,bt,zt,Rr){var jr={},Nr=0,Gr={uint8:Fa,uint16:da};bt.oes_element_index_uint&&(Gr.uint32=Ha);function mi(si){this.id=Nr++,jr[this.id]=this,this.buffer=si,this.primType=ma,this.vertCount=0,this.type=0}mi.prototype.bind=function(){this.buffer.bind()};var Ui=[];function qi(si){var Mr=Ui.pop();return Mr||(Mr=new mi(zt.create(null,vo,!0,!1)._buffer)),Hn(Mr,si,jn,-1,-1,0,0),Mr}function Ei(si){Ui.push(si)}function Hn(si,Mr,Yr,xi,Ri,ci,an){si.buffer.bind();var Zi;if(Mr){var Bn=an;!an&&(!Br(Mr)||Vr(Mr)&&!Br(Mr.data))&&(Bn=bt.oes_element_index_uint?Ha:da),zt._initBuffer(si.buffer,Mr,Yr,Bn,3)}else Me.bufferData(vo,ci,Yr),si.buffer.dtype=Zi||Fa,si.buffer.usage=Yr,si.buffer.dimension=3,si.buffer.byteLength=ci;if(Zi=an,!an){switch(si.buffer.dtype){case Fa:case Wa:Zi=Fa;break;case da:case Wo:Zi=da;break;case Ha:case Wn:Zi=Ha;break;default:}si.buffer.dtype=Zi}si.type=Zi;var hi=Ri;hi<0&&(hi=si.buffer.byteLength,Zi===da?hi>>=1:Zi===Ha&&(hi>>=2)),si.vertCount=hi;var li=xi;if(xi<0){li=ma;var mn=si.buffer.dimension;mn===1&&(li=Ba),mn===2&&(li=ua),mn===3&&(li=ma)}si.primType=li}function en(si){Rr.elementsCount--,delete jr[si.id],si.buffer.destroy(),si.buffer=null}function Wi(si,Mr){var Yr=zt.create(null,vo,!0),xi=new mi(Yr._buffer);Rr.elementsCount++;function Ri(ci){if(!ci)Yr(),xi.primType=ma,xi.vertCount=0,xi.type=Fa;else if(typeof ci=="number")Yr(ci),xi.primType=ma,xi.vertCount=ci|0,xi.type=Fa;else{var an=null,Zi=Mt,Bn=-1,hi=-1,li=0,mn=0;Array.isArray(ci)||Br(ci)||Vr(ci)?an=ci:("data"in ci&&(an=ci.data),"usage"in ci&&(Zi=On[ci.usage]),"primitive"in ci&&(Bn=Sn[ci.primitive]),"count"in ci&&(hi=ci.count|0),"type"in ci&&(mn=Gr[ci.type]),"length"in ci?li=ci.length|0:(li=hi,mn===da||mn===Wo?li*=2:(mn===Ha||mn===Wn)&&(li*=4))),Hn(xi,an,Zi,Bn,hi,li,mn)}return Ri}return Ri(si),Ri._reglType="elements",Ri._elements=xi,Ri.subdata=function(ci,an){return Yr.subdata(ci,an),Ri},Ri.destroy=function(){en(xi)},Ri}return{create:Wi,createStream:qi,destroyStream:Ei,getElements:function(si){return typeof si=="function"&&si._elements instanceof mi?si._elements:null},clear:function(){dt(jr).forEach(en)}}}var Jr=new Float32Array(1),vi=new Uint32Array(Jr.buffer),hn=5123;function An(Me){for(var bt=H.allocType(hn,Me.length),zt=0;zt<Me.length;++zt)if(isNaN(Me[zt]))bt[zt]=65535;else if(Me[zt]===1/0)bt[zt]=31744;else if(Me[zt]===-1/0)bt[zt]=64512;else{Jr[0]=Me[zt];var Rr=vi[0],jr=Rr>>>31<<15,Nr=(Rr<<1>>>24)-127,Gr=Rr>>13&1023;if(Nr<-24)bt[zt]=jr;else if(Nr<-14){var mi=-14-Nr;bt[zt]=jr+(Gr+1024>>mi)}else Nr>15?bt[zt]=jr+31744:bt[zt]=jr+(Nr+15<<10)+Gr}return bt}function Mn(Me){return Array.isArray(Me)||Br(Me)}var Li=34467,_n=3553,ya=34067,Jn=34069,Ma=6408,_o=6406,No=6407,po=6409,Lo=6410,ko=32854,Ds=32855,Fs=36194,ll=32819,ul=32820,zl=33635,us=34042,il=6402,As=34041,cl=35904,Ks=35906,zs=36193,Io=33776,ls=33777,Zl=33778,Su=33779,nc=35986,bs=35987,Rn=34798,_a=35840,Vu=35841,Ol=35842,xo=35843,Yl=36196,Ns=5121,Hl=5123,ac=5125,aa=5126,Oo=10242,qo=10243,ql=10497,Pc=33071,Do=33648,rf=10240,Uf=10241,pl=9728,Zc=9729,Kl=9984,Os=9985,yu=9986,oc=9987,Cf=33170,sc=4352,Vh=4353,Lf=4354,cs=34046,nf=3317,Vf=37440,Jl=37441,fl=37443,lc=37444,Fu=33984,Es=[Kl,yu,Os,oc],Hs=[0,po,Lo,No,Ma],Go={};Go[po]=Go[_o]=Go[il]=1,Go[As]=Go[Lo]=2,Go[No]=Go[cl]=3,Go[Ma]=Go[Ks]=4;function ps(Me){return"[object "+Me+"]"}var uc=ps("HTMLCanvasElement"),xl=ps("OffscreenCanvas"),Gu=ps("CanvasRenderingContext2D"),qs=ps("ImageBitmap"),ad=ps("HTMLImageElement"),Po=ps("HTMLVideoElement"),od=Object.keys(ke).concat([uc,xl,Gu,qs,ad,Po]),Yo=[];Yo[Ns]=1,Yo[aa]=4,Yo[zs]=2,Yo[Hl]=2,Yo[ac]=4;var Pa=[];Pa[ko]=2,Pa[Ds]=2,Pa[Fs]=2,Pa[As]=4,Pa[Io]=.5,Pa[ls]=.5,Pa[Zl]=1,Pa[Su]=1,Pa[nc]=.5,Pa[bs]=1,Pa[Rn]=1,Pa[_a]=.5,Pa[Vu]=.25,Pa[Ol]=.5,Pa[xo]=.25,Pa[Yl]=.5;function af(Me){return Array.isArray(Me)&&(Me.length===0||typeof Me[0]=="number")}function Hu(Me){if(!Array.isArray(Me))return!1;var bt=Me.length;return!(bt===0||!Mn(Me[0]))}function bl(Me){return Object.prototype.toString.call(Me)}function Gf(Me){return bl(Me)===uc}function Ic(Me){return bl(Me)===xl}function yf(Me){return bl(Me)===Gu}function Bl(Me){return bl(Me)===qs}function wh(Me){return bl(Me)===ad}function Qf(Me){return bl(Me)===Po}function _f(Me){if(!Me)return!1;var bt=bl(Me);return od.indexOf(bt)>=0?!0:af(Me)||Hu(Me)||Vr(Me)}function Yc(Me){return ke[Object.prototype.toString.call(Me)]|0}function eh(Me,bt){var zt=bt.length;switch(Me.type){case Ns:case Hl:case ac:case aa:var Rr=H.allocType(Me.type,zt);Rr.set(bt),Me.data=Rr;break;case zs:Me.data=An(bt);break;default:}}function th(Me,bt){return H.allocType(Me.type===zs?aa:Me.type,bt)}function ju(Me,bt){Me.type===zs?(Me.data=An(bt),H.freeType(bt)):Me.data=bt}function Hf(Me,bt,zt,Rr,jr,Nr){for(var Gr=Me.width,mi=Me.height,Ui=Me.channels,qi=Gr*mi*Ui,Ei=th(Me,qi),Hn=0,en=0;en<mi;++en)for(var Wi=0;Wi<Gr;++Wi)for(var si=0;si<Ui;++si)Ei[Hn++]=bt[zt*Wi+Rr*en+jr*si+Nr];ju(Me,Ei)}function cc(Me,bt,zt,Rr,jr,Nr){var Gr;if(typeof Pa[Me]!="undefined"?Gr=Pa[Me]:Gr=Go[Me]*Yo[bt],Nr&&(Gr*=6),jr){for(var mi=0,Ui=zt;Ui>=1;)mi+=Gr*Ui*Ui,Ui/=2;return mi}else return Gr*zt*Rr}function of(Me,bt,zt,Rr,jr,Nr,Gr){var mi={"don't care":sc,"dont care":sc,nice:Lf,fast:Vh},Ui={repeat:ql,clamp:Pc,mirror:Do},qi={nearest:pl,linear:Zc},Ei=e({mipmap:oc,"nearest mipmap nearest":Kl,"linear mipmap nearest":Os,"nearest mipmap linear":yu,"linear mipmap linear":oc},qi),Hn={none:0,browser:lc},en={uint8:Ns,rgba4:ll,rgb565:zl,"rgb5 a1":ul},Wi={alpha:_o,luminance:po,"luminance alpha":Lo,rgb:No,rgba:Ma,rgba4:ko,"rgb5 a1":Ds,rgb565:Fs},si={};bt.ext_srgb&&(Wi.srgb=cl,Wi.srgba=Ks),bt.oes_texture_float&&(en.float32=en.float=aa),bt.oes_texture_half_float&&(en.float16=en["half float"]=zs),bt.webgl_depth_texture&&(e(Wi,{depth:il,"depth stencil":As}),e(en,{uint16:Hl,uint32:ac,"depth stencil":us})),bt.webgl_compressed_texture_s3tc&&e(si,{"rgb s3tc dxt1":Io,"rgba s3tc dxt1":ls,"rgba s3tc dxt3":Zl,"rgba s3tc dxt5":Su}),bt.webgl_compressed_texture_atc&&e(si,{"rgb atc":nc,"rgba atc explicit alpha":bs,"rgba atc interpolated alpha":Rn}),bt.webgl_compressed_texture_pvrtc&&e(si,{"rgb pvrtc 4bppv1":_a,"rgb pvrtc 2bppv1":Vu,"rgba pvrtc 4bppv1":Ol,"rgba pvrtc 2bppv1":xo}),bt.webgl_compressed_texture_etc1&&(si["rgb etc1"]=Yl);var Mr=Array.prototype.slice.call(Me.getParameter(Li));Object.keys(si).forEach(function(ne){var we=si[ne];Mr.indexOf(we)>=0&&(Wi[ne]=we)});var Yr=Object.keys(Wi);zt.textureFormats=Yr;var xi=[];Object.keys(Wi).forEach(function(ne){var we=Wi[ne];xi[we]=ne});var Ri=[];Object.keys(en).forEach(function(ne){var we=en[ne];Ri[we]=ne});var ci=[];Object.keys(qi).forEach(function(ne){var we=qi[ne];ci[we]=ne});var an=[];Object.keys(Ei).forEach(function(ne){var we=Ei[ne];an[we]=ne});var Zi=[];Object.keys(Ui).forEach(function(ne){var we=Ui[ne];Zi[we]=ne});var Bn=Yr.reduce(function(ne,we){var Ue=Wi[we];return Ue===po||Ue===_o||Ue===po||Ue===Lo||Ue===il||Ue===As||bt.ext_srgb&&(Ue===cl||Ue===Ks)?ne[Ue]=Ue:Ue===Ds||we.indexOf("rgba")>=0?ne[Ue]=Ma:ne[Ue]=No,ne},{});function hi(){this.internalformat=Ma,this.format=Ma,this.type=Ns,this.compressed=!1,this.premultiplyAlpha=!1,this.flipY=!1,this.unpackAlignment=1,this.colorSpace=lc,this.width=0,this.height=0,this.channels=0}function li(ne,we){ne.internalformat=we.internalformat,ne.format=we.format,ne.type=we.type,ne.compressed=we.compressed,ne.premultiplyAlpha=we.premultiplyAlpha,ne.flipY=we.flipY,ne.unpackAlignment=we.unpackAlignment,ne.colorSpace=we.colorSpace,ne.width=we.width,ne.height=we.height,ne.channels=we.channels}function mn(ne,we){if(!(typeof we!="object"||!we)){if("premultiplyAlpha"in we&&(ne.premultiplyAlpha=we.premultiplyAlpha),"flipY"in we&&(ne.flipY=we.flipY),"alignment"in we&&(ne.unpackAlignment=we.alignment),"colorSpace"in we&&(ne.colorSpace=Hn[we.colorSpace]),"type"in we){var Ue=we.type;ne.type=en[Ue]}var ft=ne.width,Xt=ne.height,hr=ne.channels,qt=!1;"shape"in we?(ft=we.shape[0],Xt=we.shape[1],we.shape.length===3&&(hr=we.shape[2],qt=!0)):("radius"in we&&(ft=Xt=we.radius),"width"in we&&(ft=we.width),"height"in we&&(Xt=we.height),"channels"in we&&(hr=we.channels,qt=!0)),ne.width=ft|0,ne.height=Xt|0,ne.channels=hr|0;var Ve=!1;if("format"in we){var Qe=we.format,at=ne.internalformat=Wi[Qe];ne.format=Bn[at],Qe in en&&("type"in we||(ne.type=en[Qe])),Qe in si&&(ne.compressed=!0),Ve=!0}!qt&&Ve?ne.channels=Go[ne.format]:qt&&!Ve&&ne.channels!==Hs[ne.format]&&(ne.format=ne.internalformat=Hs[ne.channels])}}function Ji(ne){Me.pixelStorei(Vf,ne.flipY),Me.pixelStorei(Jl,ne.premultiplyAlpha),Me.pixelStorei(fl,ne.colorSpace),Me.pixelStorei(nf,ne.unpackAlignment)}function Vi(){hi.call(this),this.xOffset=0,this.yOffset=0,this.data=null,this.needsFree=!1,this.element=null,this.needsCopy=!1}function Ni(ne,we){var Ue=null;if(_f(we)?Ue=we:we&&(mn(ne,we),"x"in we&&(ne.xOffset=we.x|0),"y"in we&&(ne.yOffset=we.y|0),_f(we.data)&&(Ue=we.data)),we.copy){var ft=jr.viewportWidth,Xt=jr.viewportHeight;ne.width=ne.width||ft-ne.xOffset,ne.height=ne.height||Xt-ne.yOffset,ne.needsCopy=!0}else if(!Ue)ne.width=ne.width||1,ne.height=ne.height||1,ne.channels=ne.channels||4;else if(Br(Ue))ne.channels=ne.channels||4,ne.data=Ue,!("type"in we)&&ne.type===Ns&&(ne.type=Yc(Ue));else if(af(Ue))ne.channels=ne.channels||4,eh(ne,Ue),ne.alignment=1,ne.needsFree=!0;else if(Vr(Ue)){var hr=Ue.data;!Array.isArray(hr)&&ne.type===Ns&&(ne.type=Yc(hr));var qt=Ue.shape,Ve=Ue.stride,Qe,at,Ct,Ot,Rt,Bt;qt.length===3?(Ct=qt[2],Bt=Ve[2]):(Ct=1,Bt=1),Qe=qt[0],at=qt[1],Ot=Ve[0],Rt=Ve[1],ne.alignment=1,ne.width=Qe,ne.height=at,ne.channels=Ct,ne.format=ne.internalformat=Hs[Ct],ne.needsFree=!0,Hf(ne,hr,Ot,Rt,Bt,Ue.offset)}else if(Gf(Ue)||Ic(Ue)||yf(Ue))Gf(Ue)||Ic(Ue)?ne.element=Ue:ne.element=Ue.canvas,ne.width=ne.element.width,ne.height=ne.element.height,ne.channels=4;else if(Bl(Ue))ne.element=Ue,ne.width=Ue.width,ne.height=Ue.height,ne.channels=4;else if(wh(Ue))ne.element=Ue,ne.width=Ue.naturalWidth,ne.height=Ue.naturalHeight,ne.channels=4;else if(Qf(Ue))ne.element=Ue,ne.width=Ue.videoWidth,ne.height=Ue.videoHeight,ne.channels=4;else if(Hu(Ue)){var Dt=ne.width||Ue[0].length,yt=ne.height||Ue.length,Pt=ne.channels;Mn(Ue[0][0])?Pt=Pt||Ue[0][0].length:Pt=Pt||1;for(var ht=Ge.shape(Ue),ur=1,br=0;br<ht.length;++br)ur*=ht[br];var Ur=th(ne,ur);Ge.flatten(Ue,ht,"",Ur),ju(ne,Ur),ne.alignment=1,ne.width=Dt,ne.height=yt,ne.channels=Pt,ne.format=ne.internalformat=Hs[Pt],ne.needsFree=!0}ne.type===aa||ne.type}function pn(ne,we,Ue){var ft=ne.element,Xt=ne.data,hr=ne.internalformat,qt=ne.format,Ve=ne.type,Qe=ne.width,at=ne.height;Ji(ne),ft?Me.texImage2D(we,Ue,qt,qt,Ve,ft):ne.compressed?Me.compressedTexImage2D(we,Ue,hr,Qe,at,0,Xt):ne.needsCopy?(Rr(),Me.copyTexImage2D(we,Ue,qt,ne.xOffset,ne.yOffset,Qe,at,0)):Me.texImage2D(we,Ue,qt,Qe,at,0,qt,Ve,Xt||null)}function Vn(ne,we,Ue,ft,Xt){var hr=ne.element,qt=ne.data,Ve=ne.internalformat,Qe=ne.format,at=ne.type,Ct=ne.width,Ot=ne.height;Ji(ne),hr?Me.texSubImage2D(we,Xt,Ue,ft,Qe,at,hr):ne.compressed?Me.compressedTexSubImage2D(we,Xt,Ue,ft,Ve,Ct,Ot,qt):ne.needsCopy?(Rr(),Me.copyTexSubImage2D(we,Xt,Ue,ft,ne.xOffset,ne.yOffset,Ct,Ot)):Me.texSubImage2D(we,Xt,Ue,ft,Ct,Ot,Qe,at,qt)}var na=[];function Ki(){return na.pop()||new Vi}function kn(ne){ne.needsFree&&H.freeType(ne.data),Vi.call(ne),na.push(ne)}function ta(){hi.call(this),this.genMipmaps=!1,this.mipmapHint=sc,this.mipmask=0,this.images=Array(16)}function oa(ne,we,Ue){var ft=ne.images[0]=Ki();ne.mipmask=1,ft.width=ne.width=we,ft.height=ne.height=Ue,ft.channels=ne.channels=4}function ba(ne,we){var Ue=null;if(_f(we))Ue=ne.images[0]=Ki(),li(Ue,ne),Ni(Ue,we),ne.mipmask=1;else if(mn(ne,we),Array.isArray(we.mipmap))for(var ft=we.mipmap,Xt=0;Xt<ft.length;++Xt)Ue=ne.images[Xt]=Ki(),li(Ue,ne),Ue.width>>=Xt,Ue.height>>=Xt,Ni(Ue,ft[Xt]),ne.mipmask|=1<<Xt;else Ue=ne.images[0]=Ki(),li(Ue,ne),Ni(Ue,we),ne.mipmask=1;li(ne,ne.images[0]),ne.compressed&&(ne.internalformat===Io||ne.internalformat===ls||ne.internalformat===Zl||ne.internalformat)}function is(ne,we){for(var Ue=ne.images,ft=0;ft<Ue.length;++ft){if(!Ue[ft])return;pn(Ue[ft],we,ft)}}var Zs=[];function Va(){var ne=Zs.pop()||new ta;hi.call(ne),ne.mipmask=0;for(var we=0;we<16;++we)ne.images[we]=null;return ne}function Ml(ne){for(var we=ne.images,Ue=0;Ue<we.length;++Ue)we[Ue]&&kn(we[Ue]),we[Ue]=null;Zs.push(ne)}function zo(){this.minFilter=pl,this.magFilter=pl,this.wrapS=Pc,this.wrapT=Pc,this.anisotropic=1,this.genMipmaps=!1,this.mipmapHint=sc}function Qs(ne,we){if("min"in we){var Ue=we.min;ne.minFilter=Ei[Ue],Es.indexOf(ne.minFilter)>=0&&!("faces"in we)&&(ne.genMipmaps=!0)}if("mag"in we){var ft=we.mag;ne.magFilter=qi[ft]}var Xt=ne.wrapS,hr=ne.wrapT;if("wrap"in we){var qt=we.wrap;typeof qt=="string"?Xt=hr=Ui[qt]:Array.isArray(qt)&&(Xt=Ui[qt[0]],hr=Ui[qt[1]])}else{if("wrapS"in we){var Ve=we.wrapS;Xt=Ui[Ve]}if("wrapT"in we){var Qe=we.wrapT;hr=Ui[Qe]}}if(ne.wrapS=Xt,ne.wrapT=hr,"anisotropic"in we){var at=we.anisotropic;ne.anisotropic=we.anisotropic}if("mipmap"in we){var Ct=!1;switch(typeof we.mipmap){case"string":ne.mipmapHint=mi[we.mipmap],ne.genMipmaps=!0,Ct=!0;break;case"boolean":Ct=ne.genMipmaps=we.mipmap;break;case"object":ne.genMipmaps=!1,Ct=!0;break;default:}Ct&&!("min"in we)&&(ne.minFilter=Kl)}}function al(ne,we){Me.texParameteri(we,Uf,ne.minFilter),Me.texParameteri(we,rf,ne.magFilter),Me.texParameteri(we,Oo,ne.wrapS),Me.texParameteri(we,qo,ne.wrapT),bt.ext_texture_filter_anisotropic&&Me.texParameteri(we,cs,ne.anisotropic),ne.genMipmaps&&(Me.hint(Cf,ne.mipmapHint),Me.generateMipmap(we))}var Vl=0,ss={},Vs=zt.maxTextureUnits,Ys=Array(Vs).map(function(){return null});function wa(ne){hi.call(this),this.mipmask=0,this.internalformat=Ma,this.id=Vl++,this.refCount=1,this.target=ne,this.texture=Me.createTexture(),this.unit=-1,this.bindCount=0,this.texInfo=new zo,Gr.profile&&(this.stats={size:0})}function ol(ne){Me.activeTexture(Fu),Me.bindTexture(ne.target,ne.texture)}function io(){var ne=Ys[0];ne?Me.bindTexture(ne.target,ne.texture):Me.bindTexture(_n,null)}function Y(ne){var we=ne.texture,Ue=ne.unit,ft=ne.target;Ue>=0&&(Me.activeTexture(Fu+Ue),Me.bindTexture(ft,null),Ys[Ue]=null),Me.deleteTexture(we),ne.texture=null,ne.params=null,ne.pixels=null,ne.refCount=0,delete ss[ne.id],Nr.textureCount--}e(wa.prototype,{bind:function(){var ne=this;ne.bindCount+=1;var we=ne.unit;if(we<0){for(var Ue=0;Ue<Vs;++Ue){var ft=Ys[Ue];if(ft){if(ft.bindCount>0)continue;ft.unit=-1}Ys[Ue]=ne,we=Ue;break}we>=Vs,Gr.profile&&Nr.maxTextureUnits<we+1&&(Nr.maxTextureUnits=we+1),ne.unit=we,Me.activeTexture(Fu+we),Me.bindTexture(ne.target,ne.texture)}return we},unbind:function(){this.bindCount-=1},decRef:function(){--this.refCount<=0&&Y(this)}});function D(ne,we){var Ue=new wa(_n);ss[Ue.id]=Ue,Nr.textureCount++;function ft(qt,Ve){var Qe=Ue.texInfo;zo.call(Qe);var at=Va();return typeof qt=="number"?typeof Ve=="number"?oa(at,qt|0,Ve|0):oa(at,qt|0,qt|0):qt?(Qs(Qe,qt),ba(at,qt)):oa(at,1,1),Qe.genMipmaps&&(at.mipmask=(at.width<<1)-1),Ue.mipmask=at.mipmask,li(Ue,at),Ue.internalformat=at.internalformat,ft.width=at.width,ft.height=at.height,ol(Ue),is(at,_n),al(Qe,_n),io(),Ml(at),Gr.profile&&(Ue.stats.size=cc(Ue.internalformat,Ue.type,at.width,at.height,Qe.genMipmaps,!1)),ft.format=xi[Ue.internalformat],ft.type=Ri[Ue.type],ft.mag=ci[Qe.magFilter],ft.min=an[Qe.minFilter],ft.wrapS=Zi[Qe.wrapS],ft.wrapT=Zi[Qe.wrapT],ft}function Xt(qt,Ve,Qe,at){var Ct=Ve|0,Ot=Qe|0,Rt=at|0,Bt=Ki();return li(Bt,Ue),Bt.width=0,Bt.height=0,Ni(Bt,qt),Bt.width=Bt.width||(Ue.width>>Rt)-Ct,Bt.height=Bt.height||(Ue.height>>Rt)-Ot,ol(Ue),Vn(Bt,_n,Ct,Ot,Rt),io(),kn(Bt),ft}function hr(qt,Ve){var Qe=qt|0,at=Ve|0||Qe;if(Qe===Ue.width&&at===Ue.height)return ft;ft.width=Ue.width=Qe,ft.height=Ue.height=at,ol(Ue);for(var Ct=0;Ue.mipmask>>Ct;++Ct){var Ot=Qe>>Ct,Rt=at>>Ct;if(!Ot||!Rt)break;Me.texImage2D(_n,Ct,Ue.format,Ot,Rt,0,Ue.format,Ue.type,null)}return io(),Gr.profile&&(Ue.stats.size=cc(Ue.internalformat,Ue.type,Qe,at,!1,!1)),ft}return ft(ne,we),ft.subimage=Xt,ft.resize=hr,ft._reglType="texture2d",ft._texture=Ue,Gr.profile&&(ft.stats=Ue.stats),ft.destroy=function(){Ue.decRef()},ft}function J(ne,we,Ue,ft,Xt,hr){var qt=new wa(ya);ss[qt.id]=qt,Nr.cubeCount++;var Ve=new Array(6);function Qe(Ot,Rt,Bt,Dt,yt,Pt){var ht,ur=qt.texInfo;for(zo.call(ur),ht=0;ht<6;++ht)Ve[ht]=Va();if(typeof Ot=="number"||!Ot){var br=Ot|0||1;for(ht=0;ht<6;++ht)oa(Ve[ht],br,br)}else if(typeof Ot=="object")if(Rt)ba(Ve[0],Ot),ba(Ve[1],Rt),ba(Ve[2],Bt),ba(Ve[3],Dt),ba(Ve[4],yt),ba(Ve[5],Pt);else if(Qs(ur,Ot),mn(qt,Ot),"faces"in Ot){var Ur=Ot.faces;for(ht=0;ht<6;++ht)li(Ve[ht],qt),ba(Ve[ht],Ur[ht])}else for(ht=0;ht<6;++ht)ba(Ve[ht],Ot);for(li(qt,Ve[0]),ur.genMipmaps?qt.mipmask=(Ve[0].width<<1)-1:qt.mipmask=Ve[0].mipmask,qt.internalformat=Ve[0].internalformat,Qe.width=Ve[0].width,Qe.height=Ve[0].height,ol(qt),ht=0;ht<6;++ht)is(Ve[ht],Jn+ht);for(al(ur,ya),io(),Gr.profile&&(qt.stats.size=cc(qt.internalformat,qt.type,Qe.width,Qe.height,ur.genMipmaps,!0)),Qe.format=xi[qt.internalformat],Qe.type=Ri[qt.type],Qe.mag=ci[ur.magFilter],Qe.min=an[ur.minFilter],Qe.wrapS=Zi[ur.wrapS],Qe.wrapT=Zi[ur.wrapT],ht=0;ht<6;++ht)Ml(Ve[ht]);return Qe}function at(Ot,Rt,Bt,Dt,yt){var Pt=Bt|0,ht=Dt|0,ur=yt|0,br=Ki();return li(br,qt),br.width=0,br.height=0,Ni(br,Rt),br.width=br.width||(qt.width>>ur)-Pt,br.height=br.height||(qt.height>>ur)-ht,ol(qt),Vn(br,Jn+Ot,Pt,ht,ur),io(),kn(br),Qe}function Ct(Ot){var Rt=Ot|0;if(Rt!==qt.width){Qe.width=qt.width=Rt,Qe.height=qt.height=Rt,ol(qt);for(var Bt=0;Bt<6;++Bt)for(var Dt=0;qt.mipmask>>Dt;++Dt)Me.texImage2D(Jn+Bt,Dt,qt.format,Rt>>Dt,Rt>>Dt,0,qt.format,qt.type,null);return io(),Gr.profile&&(qt.stats.size=cc(qt.internalformat,qt.type,Qe.width,Qe.height,!1,!0)),Qe}}return Qe(ne,we,Ue,ft,Xt,hr),Qe.subimage=at,Qe.resize=Ct,Qe._reglType="textureCube",Qe._texture=qt,Gr.profile&&(Qe.stats=qt.stats),Qe.destroy=function(){qt.decRef()},Qe}function q(){for(var ne=0;ne<Vs;++ne)Me.activeTexture(Fu+ne),Me.bindTexture(_n,null),Ys[ne]=null;dt(ss).forEach(Y),Nr.cubeCount=0,Nr.textureCount=0}Gr.profile&&(Nr.getTotalTextureSize=function(){var ne=0;return Object.keys(ss).forEach(function(we){ne+=ss[we].stats.size}),ne});function K(){for(var ne=0;ne<Vs;++ne){var we=Ys[ne];we&&(we.bindCount=0,we.unit=-1,Ys[ne]=null)}dt(ss).forEach(function(Ue){Ue.texture=Me.createTexture(),Me.bindTexture(Ue.target,Ue.texture);for(var ft=0;ft<32;++ft)if((Ue.mipmask&1<<ft)!==0)if(Ue.target===_n)Me.texImage2D(_n,ft,Ue.internalformat,Ue.width>>ft,Ue.height>>ft,0,Ue.internalformat,Ue.type,null);else for(var Xt=0;Xt<6;++Xt)Me.texImage2D(Jn+Xt,ft,Ue.internalformat,Ue.width>>ft,Ue.height>>ft,0,Ue.internalformat,Ue.type,null);al(Ue.texInfo,Ue.target)})}function de(){for(var ne=0;ne<Vs;++ne){var we=Ys[ne];we&&(we.bindCount=0,we.unit=-1,Ys[ne]=null),Me.activeTexture(Fu+ne),Me.bindTexture(_n,null),Me.bindTexture(ya,null)}}return{create2D:D,createCube:J,clear:q,getTexture:function(ne){return null},restore:K,refresh:de}}var Nl=36161,Kc=32854,Rc=32855,gs=36194,jf=33189,Gh=36168,rh=34041,sf=35907,Th=34836,Mu=34842,ih=34843,js=[];js[Kc]=2,js[Rc]=2,js[gs]=2,js[jf]=2,js[Gh]=1,js[rh]=4,js[sf]=4,js[Th]=16,js[Mu]=8,js[ih]=6;function Eu(Me,bt,zt){return js[Me]*bt*zt}var Dc=function(Me,bt,zt,Rr,jr){var Nr={rgba4:Kc,rgb565:gs,"rgb5 a1":Rc,depth:jf,stencil:Gh,"depth stencil":rh};bt.ext_srgb&&(Nr.srgba=sf),bt.ext_color_buffer_half_float&&(Nr.rgba16f=Mu,Nr.rgb16f=ih),bt.webgl_color_buffer_float&&(Nr.rgba32f=Th);var Gr=[];Object.keys(Nr).forEach(function(Wi){var si=Nr[Wi];Gr[si]=Wi});var mi=0,Ui={};function qi(Wi){this.id=mi++,this.refCount=1,this.renderbuffer=Wi,this.format=Kc,this.width=0,this.height=0,jr.profile&&(this.stats={size:0})}qi.prototype.decRef=function(){--this.refCount<=0&&Ei(this)};function Ei(Wi){var si=Wi.renderbuffer;Me.bindRenderbuffer(Nl,null),Me.deleteRenderbuffer(si),Wi.renderbuffer=null,Wi.refCount=0,delete Ui[Wi.id],Rr.renderbufferCount--}function Hn(Wi,si){var Mr=new qi(Me.createRenderbuffer());Ui[Mr.id]=Mr,Rr.renderbufferCount++;function Yr(Ri,ci){var an=0,Zi=0,Bn=Kc;if(typeof Ri=="object"&&Ri){var hi=Ri;if("shape"in hi){var li=hi.shape;an=li[0]|0,Zi=li[1]|0}else"radius"in hi&&(an=Zi=hi.radius|0),"width"in hi&&(an=hi.width|0),"height"in hi&&(Zi=hi.height|0);"format"in hi&&(Bn=Nr[hi.format])}else typeof Ri=="number"?(an=Ri|0,typeof ci=="number"?Zi=ci|0:Zi=an):Ri||(an=Zi=1);if(!(an===Mr.width&&Zi===Mr.height&&Bn===Mr.format))return Yr.width=Mr.width=an,Yr.height=Mr.height=Zi,Mr.format=Bn,Me.bindRenderbuffer(Nl,Mr.renderbuffer),Me.renderbufferStorage(Nl,Bn,an,Zi),jr.profile&&(Mr.stats.size=Eu(Mr.format,Mr.width,Mr.height)),Yr.format=Gr[Mr.format],Yr}function xi(Ri,ci){var an=Ri|0,Zi=ci|0||an;return an===Mr.width&&Zi===Mr.height||(Yr.width=Mr.width=an,Yr.height=Mr.height=Zi,Me.bindRenderbuffer(Nl,Mr.renderbuffer),Me.renderbufferStorage(Nl,Mr.format,an,Zi),jr.profile&&(Mr.stats.size=Eu(Mr.format,Mr.width,Mr.height))),Yr}return Yr(Wi,si),Yr.resize=xi,Yr._reglType="renderbuffer",Yr._renderbuffer=Mr,jr.profile&&(Yr.stats=Mr.stats),Yr.destroy=function(){Mr.decRef()},Yr}jr.profile&&(Rr.getTotalRenderbufferSize=function(){var Wi=0;return Object.keys(Ui).forEach(function(si){Wi+=Ui[si].stats.size}),Wi});function en(){dt(Ui).forEach(function(Wi){Wi.renderbuffer=Me.createRenderbuffer(),Me.bindRenderbuffer(Nl,Wi.renderbuffer),Me.renderbufferStorage(Nl,Wi.format,Wi.width,Wi.height)}),Me.bindRenderbuffer(Nl,null)}return{create:Hn,clear:function(){dt(Ui).forEach(Ei)},restore:en}},ks=36160,bc=36161,hu=3553,_u=34069,nl=36064,nh=36096,Ah=36128,zu=33306,Fc=36053,wc=36193,bd=5121,xf=5126,Pf=6407,Ou=6408,bf=[];bf[Ou]=4,bf[Pf]=3;var jl=[];jl[bd]=1,jl[xf]=4,jl[wc]=2;function lf(Me,bt,zt,Rr,jr,Nr){var Gr={cur:null,next:null,dirty:!1,setFBO:null},mi=["rgba"],Ui=["rgba4","rgb565","rgb5 a1"];bt.ext_srgb&&Ui.push("srgba"),bt.ext_color_buffer_half_float&&Ui.push("rgba16f","rgb16f"),bt.webgl_color_buffer_float&&Ui.push("rgba32f");var qi=["uint8"];bt.oes_texture_half_float&&qi.push("half float","float16"),bt.oes_texture_float&&qi.push("float","float32");function Ei(Vi,Ni,pn){this.target=Vi,this.texture=Ni,this.renderbuffer=pn;var Vn=0,na=0;Ni?(Vn=Ni.width,na=Ni.height):pn&&(Vn=pn.width,na=pn.height),this.width=Vn,this.height=na}function Hn(Vi){Vi&&(Vi.texture&&Vi.texture._texture.decRef(),Vi.renderbuffer&&Vi.renderbuffer._renderbuffer.decRef())}function en(Vi,Ni,pn){if(Vi)if(Vi.texture){var Vn=Vi.texture._texture,na=Math.max(1,Vn.width),Ki=Math.max(1,Vn.height);Vn.refCount+=1}else{var kn=Vi.renderbuffer._renderbuffer;kn.refCount+=1}}function Wi(Vi,Ni){Ni&&(Ni.texture?Me.framebufferTexture2D(ks,Vi,Ni.target,Ni.texture._texture.texture,0):Me.framebufferRenderbuffer(ks,Vi,bc,Ni.renderbuffer._renderbuffer.renderbuffer))}function si(Vi){var Ni=hu,pn=null,Vn=null,na=Vi;typeof Vi=="object"&&(na=Vi.data,"target"in Vi&&(Ni=Vi.target|0));var Ki=na._reglType;return Ki==="texture2d"||Ki==="textureCube"?pn=na:Ki==="renderbuffer"&&(Vn=na,Ni=bc),new Ei(Ni,pn,Vn)}function Mr(Vi,Ni,pn,Vn,na){if(pn){var Ki=Rr.create2D({width:Vi,height:Ni,format:Vn,type:na});return Ki._texture.refCount=0,new Ei(hu,Ki,null)}else{var kn=jr.create({width:Vi,height:Ni,format:Vn});return kn._renderbuffer.refCount=0,new Ei(bc,null,kn)}}function Yr(Vi){return Vi&&(Vi.texture||Vi.renderbuffer)}function xi(Vi,Ni,pn){Vi&&(Vi.texture?Vi.texture.resize(Ni,pn):Vi.renderbuffer&&Vi.renderbuffer.resize(Ni,pn),Vi.width=Ni,Vi.height=pn)}var Ri=0,ci={};function an(){this.id=Ri++,ci[this.id]=this,this.framebuffer=Me.createFramebuffer(),this.width=0,this.height=0,this.colorAttachments=[],this.depthAttachment=null,this.stencilAttachment=null,this.depthStencilAttachment=null}function Zi(Vi){Vi.colorAttachments.forEach(Hn),Hn(Vi.depthAttachment),Hn(Vi.stencilAttachment),Hn(Vi.depthStencilAttachment)}function Bn(Vi){var Ni=Vi.framebuffer;Me.deleteFramebuffer(Ni),Vi.framebuffer=null,Nr.framebufferCount--,delete ci[Vi.id]}function hi(Vi){var Ni;Me.bindFramebuffer(ks,Vi.framebuffer);var pn=Vi.colorAttachments;for(Ni=0;Ni<pn.length;++Ni)Wi(nl+Ni,pn[Ni]);for(Ni=pn.length;Ni<zt.maxColorAttachments;++Ni)Me.framebufferTexture2D(ks,nl+Ni,hu,null,0);Me.framebufferTexture2D(ks,zu,hu,null,0),Me.framebufferTexture2D(ks,nh,hu,null,0),Me.framebufferTexture2D(ks,Ah,hu,null,0),Wi(nh,Vi.depthAttachment),Wi(Ah,Vi.stencilAttachment),Wi(zu,Vi.depthStencilAttachment);var Vn=Me.checkFramebufferStatus(ks);Me.isContextLost(),Me.bindFramebuffer(ks,Gr.next?Gr.next.framebuffer:null),Gr.cur=Gr.next,Me.getError()}function li(Vi,Ni){var pn=new an;Nr.framebufferCount++;function Vn(Ki,kn){var ta,oa=0,ba=0,is=!0,Zs=!0,Va=null,Ml=!0,zo="rgba",Qs="uint8",al=1,Vl=null,ss=null,Vs=null,Ys=!1;if(typeof Ki=="number")oa=Ki|0,ba=kn|0||oa;else if(!Ki)oa=ba=1;else{var wa=Ki;if("shape"in wa){var ol=wa.shape;oa=ol[0],ba=ol[1]}else"radius"in wa&&(oa=ba=wa.radius),"width"in wa&&(oa=wa.width),"height"in wa&&(ba=wa.height);("color"in wa||"colors"in wa)&&(Va=wa.color||wa.colors,Array.isArray(Va)),Va||("colorCount"in wa&&(al=wa.colorCount|0),"colorTexture"in wa&&(Ml=!!wa.colorTexture,zo="rgba4"),"colorType"in wa&&(Qs=wa.colorType,Ml||(Qs==="half float"||Qs==="float16"?zo="rgba16f":(Qs==="float"||Qs==="float32")&&(zo="rgba32f"))),"colorFormat"in wa&&(zo=wa.colorFormat,mi.indexOf(zo)>=0?Ml=!0:Ui.indexOf(zo)>=0&&(Ml=!1))),("depthTexture"in wa||"depthStencilTexture"in wa)&&(Ys=!!(wa.depthTexture||wa.depthStencilTexture)),"depth"in wa&&(typeof wa.depth=="boolean"?is=wa.depth:(Vl=wa.depth,Zs=!1)),"stencil"in wa&&(typeof wa.stencil=="boolean"?Zs=wa.stencil:(ss=wa.stencil,is=!1)),"depthStencil"in wa&&(typeof wa.depthStencil=="boolean"?is=Zs=wa.depthStencil:(Vs=wa.depthStencil,is=!1,Zs=!1))}var io=null,Y=null,D=null,J=null;if(Array.isArray(Va))io=Va.map(si);else if(Va)io=[si(Va)];else for(io=new Array(al),ta=0;ta<al;++ta)io[ta]=Mr(oa,ba,Ml,zo,Qs);oa=oa||io[0].width,ba=ba||io[0].height,Vl?Y=si(Vl):is&&!Zs&&(Y=Mr(oa,ba,Ys,"depth","uint32")),ss?D=si(ss):Zs&&!is&&(D=Mr(oa,ba,!1,"stencil","uint8")),Vs?J=si(Vs):!Vl&&!ss&&Zs&&is&&(J=Mr(oa,ba,Ys,"depth stencil","depth stencil"));var q=null;for(ta=0;ta<io.length;++ta)if(en(io[ta],oa,ba),io[ta]&&io[ta].texture){var K=bf[io[ta].texture._texture.format]*jl[io[ta].texture._texture.type];q===null&&(q=K)}return en(Y,oa,ba),en(D,oa,ba),en(J,oa,ba),Zi(pn),pn.width=oa,pn.height=ba,pn.colorAttachments=io,pn.depthAttachment=Y,pn.stencilAttachment=D,pn.depthStencilAttachment=J,Vn.color=io.map(Yr),Vn.depth=Yr(Y),Vn.stencil=Yr(D),Vn.depthStencil=Yr(J),Vn.width=pn.width,Vn.height=pn.height,hi(pn),Vn}function na(Ki,kn){var ta=Math.max(Ki|0,1),oa=Math.max(kn|0||ta,1);if(ta===pn.width&&oa===pn.height)return Vn;for(var ba=pn.colorAttachments,is=0;is<ba.length;++is)xi(ba[is],ta,oa);return xi(pn.depthAttachment,ta,oa),xi(pn.stencilAttachment,ta,oa),xi(pn.depthStencilAttachment,ta,oa),pn.width=Vn.width=ta,pn.height=Vn.height=oa,hi(pn),Vn}return Vn(Vi,Ni),e(Vn,{resize:na,_reglType:"framebuffer",_framebuffer:pn,destroy:function(){Bn(pn),Zi(pn)},use:function(Ki){Gr.setFBO({framebuffer:Vn},Ki)}})}function mn(Vi){var Ni=Array(6);function pn(na){var Ki,kn={color:null},ta=0,oa=null,ba="rgba",is="uint8",Zs=1;if(typeof na=="number")ta=na|0;else if(!na)ta=1;else{var Va=na;if("shape"in Va){var Ml=Va.shape;ta=Ml[0]}else"radius"in Va&&(ta=Va.radius|0),"width"in Va?(ta=Va.width|0,"height"in Va):"height"in Va&&(ta=Va.height|0);("color"in Va||"colors"in Va)&&(oa=Va.color||Va.colors,Array.isArray(oa)),oa||("colorCount"in Va&&(Zs=Va.colorCount|0),"colorType"in Va&&(is=Va.colorType),"colorFormat"in Va&&(ba=Va.colorFormat)),"depth"in Va&&(kn.depth=Va.depth),"stencil"in Va&&(kn.stencil=Va.stencil),"depthStencil"in Va&&(kn.depthStencil=Va.depthStencil)}var zo;if(oa)if(Array.isArray(oa))for(zo=[],Ki=0;Ki<oa.length;++Ki)zo[Ki]=oa[Ki];else zo=[oa];else{zo=Array(Zs);var Qs={radius:ta,format:ba,type:is};for(Ki=0;Ki<Zs;++Ki)zo[Ki]=Rr.createCube(Qs)}for(kn.color=Array(zo.length),Ki=0;Ki<zo.length;++Ki){var al=zo[Ki];ta=ta||al.width,kn.color[Ki]={target:_u,data:zo[Ki]}}for(Ki=0;Ki<6;++Ki){for(var Vl=0;Vl<zo.length;++Vl)kn.color[Vl].target=_u+Ki;Ki>0&&(kn.depth=Ni[0].depth,kn.stencil=Ni[0].stencil,kn.depthStencil=Ni[0].depthStencil),Ni[Ki]?Ni[Ki](kn):Ni[Ki]=li(kn)}return e(pn,{width:ta,height:ta,color:zo})}function Vn(na){var Ki,kn=na|0;if(kn===pn.width)return pn;var ta=pn.color;for(Ki=0;Ki<ta.length;++Ki)ta[Ki].resize(kn);for(Ki=0;Ki<6;++Ki)Ni[Ki].resize(kn);return pn.width=pn.height=kn,pn}return pn(Vi),e(pn,{faces:Ni,resize:Vn,_reglType:"framebufferCube",destroy:function(){Ni.forEach(function(na){na.destroy()})}})}function Ji(){Gr.cur=null,Gr.next=null,Gr.dirty=!0,dt(ci).forEach(function(Vi){Vi.framebuffer=Me.createFramebuffer(),hi(Vi)})}return e(Gr,{getFramebuffer:function(Vi){if(typeof Vi=="function"&&Vi._reglType==="framebuffer"){var Ni=Vi._framebuffer;if(Ni instanceof an)return Ni}return null},create:li,createCube:mn,clear:function(){dt(ci).forEach(Bn)},restore:Ji})}var Hh=5126,If=34962,Cs=34963;function du(){this.state=0,this.x=0,this.y=0,this.z=0,this.w=0,this.buffer=null,this.size=0,this.normalized=!1,this.type=Hh,this.offset=0,this.stride=0,this.divisor=0}function ku(Me,bt,zt,Rr,jr,Nr,Gr){for(var mi=zt.maxAttributes,Ui=new Array(mi),qi=0;qi<mi;++qi)Ui[qi]=new du;var Ei=0,Hn={},en={Record:du,scope:{},state:Ui,currentVAO:null,targetVAO:null,restore:si()?Zi:function(){},createVAO:Bn,getVAO:Yr,destroyBuffer:Wi,setVAO:si()?xi:Ri,clear:si()?ci:function(){}};function Wi(hi){for(var li=0;li<Ui.length;++li){var mn=Ui[li];mn.buffer===hi&&(Me.disableVertexAttribArray(li),mn.buffer=null)}}function si(){return bt.oes_vertex_array_object}function Mr(){return bt.angle_instanced_arrays}function Yr(hi){return typeof hi=="function"&&hi._vao?hi._vao:null}function xi(hi){if(hi!==en.currentVAO){var li=si();hi?li.bindVertexArrayOES(hi.vao):li.bindVertexArrayOES(null),en.currentVAO=hi}}function Ri(hi){if(hi!==en.currentVAO){if(hi)hi.bindAttrs();else{for(var li=Mr(),mn=0;mn<Ui.length;++mn){var Ji=Ui[mn];Ji.buffer?(Me.enableVertexAttribArray(mn),Ji.buffer.bind(),Me.vertexAttribPointer(mn,Ji.size,Ji.type,Ji.normalized,Ji.stride,Ji.offfset),li&&Ji.divisor&&li.vertexAttribDivisorANGLE(mn,Ji.divisor)):(Me.disableVertexAttribArray(mn),Me.vertexAttrib4f(mn,Ji.x,Ji.y,Ji.z,Ji.w))}Gr.elements?Me.bindBuffer(Cs,Gr.elements.buffer.buffer):Me.bindBuffer(Cs,null)}en.currentVAO=hi}}function ci(){dt(Hn).forEach(function(hi){hi.destroy()})}function an(){this.id=++Ei,this.attributes=[],this.elements=null,this.ownsElements=!1,this.count=0,this.offset=0,this.instances=-1,this.primitive=4;var hi=si();hi?this.vao=hi.createVertexArrayOES():this.vao=null,Hn[this.id]=this,this.buffers=[]}an.prototype.bindAttrs=function(){for(var hi=Mr(),li=this.attributes,mn=0;mn<li.length;++mn){var Ji=li[mn];Ji.buffer?(Me.enableVertexAttribArray(mn),Me.bindBuffer(If,Ji.buffer.buffer),Me.vertexAttribPointer(mn,Ji.size,Ji.type,Ji.normalized,Ji.stride,Ji.offset),hi&&Ji.divisor&&hi.vertexAttribDivisorANGLE(mn,Ji.divisor)):(Me.disableVertexAttribArray(mn),Me.vertexAttrib4f(mn,Ji.x,Ji.y,Ji.z,Ji.w))}for(var Vi=li.length;Vi<mi;++Vi)Me.disableVertexAttribArray(Vi);var Ni=Nr.getElements(this.elements);Ni?Me.bindBuffer(Cs,Ni.buffer.buffer):Me.bindBuffer(Cs,null)},an.prototype.refresh=function(){var hi=si();hi&&(hi.bindVertexArrayOES(this.vao),this.bindAttrs(),en.currentVAO=null,hi.bindVertexArrayOES(null))},an.prototype.destroy=function(){if(this.vao){var hi=si();this===en.currentVAO&&(en.currentVAO=null,hi.bindVertexArrayOES(null)),hi.deleteVertexArrayOES(this.vao),this.vao=null}this.ownsElements&&(this.elements.destroy(),this.elements=null,this.ownsElements=!1),Hn[this.id]&&(delete Hn[this.id],Rr.vaoCount-=1)};function Zi(){var hi=si();hi&&dt(Hn).forEach(function(li){li.refresh()})}function Bn(hi){var li=new an;Rr.vaoCount+=1;function mn(Ji){var Vi;if(Array.isArray(Ji))Vi=Ji,li.elements&&li.ownsElements&&li.elements.destroy(),li.elements=null,li.ownsElements=!1,li.offset=0,li.count=0,li.instances=-1,li.primitive=4;else{if(Ji.elements){var Ni=Ji.elements;li.ownsElements?typeof Ni=="function"&&Ni._reglType==="elements"?(li.elements.destroy(),li.ownsElements=!1):(li.elements(Ni),li.ownsElements=!1):Nr.getElements(Ji.elements)?(li.elements=Ji.elements,li.ownsElements=!1):(li.elements=Nr.create(Ji.elements),li.ownsElements=!0)}else li.elements=null,li.ownsElements=!1;Vi=Ji.attributes,li.offset=0,li.count=-1,li.instances=-1,li.primitive=4,li.elements&&(li.count=li.elements._elements.vertCount,li.primitive=li.elements._elements.primType),"offset"in Ji&&(li.offset=Ji.offset|0),"count"in Ji&&(li.count=Ji.count|0),"instances"in Ji&&(li.instances=Ji.instances|0),"primitive"in Ji&&(li.primitive=Sn[Ji.primitive])}var pn={},Vn=li.attributes;Vn.length=Vi.length;for(var na=0;na<Vi.length;++na){var Ki=Vi[na],kn=Vn[na]=new du,ta=Ki.data||Ki;if(Array.isArray(ta)||Br(ta)||Vr(ta)){var oa;li.buffers[na]&&(oa=li.buffers[na],Br(ta)&&oa._buffer.byteLength>=ta.byteLength?oa.subdata(ta):(oa.destroy(),li.buffers[na]=null)),li.buffers[na]||(oa=li.buffers[na]=jr.create(Ki,If,!1,!0)),kn.buffer=jr.getBuffer(oa),kn.size=kn.buffer.dimension|0,kn.normalized=!1,kn.type=kn.buffer.dtype,kn.offset=0,kn.stride=0,kn.divisor=0,kn.state=1,pn[na]=1}else jr.getBuffer(Ki)?(kn.buffer=jr.getBuffer(Ki),kn.size=kn.buffer.dimension|0,kn.normalized=!1,kn.type=kn.buffer.dtype,kn.offset=0,kn.stride=0,kn.divisor=0,kn.state=1):jr.getBuffer(Ki.buffer)?(kn.buffer=jr.getBuffer(Ki.buffer),kn.size=(+Ki.size||kn.buffer.dimension)|0,kn.normalized=!!Ki.normalized||!1,"type"in Ki?kn.type=ji[Ki.type]:kn.type=kn.buffer.dtype,kn.offset=(Ki.offset||0)|0,kn.stride=(Ki.stride||0)|0,kn.divisor=(Ki.divisor||0)|0,kn.state=1):"x"in Ki&&(kn.x=+Ki.x||0,kn.y=+Ki.y||0,kn.z=+Ki.z||0,kn.w=+Ki.w||0,kn.state=2)}for(var ba=0;ba<li.buffers.length;++ba)!pn[ba]&&li.buffers[ba]&&(li.buffers[ba].destroy(),li.buffers[ba]=null);return li.refresh(),mn}return mn.destroy=function(){for(var Ji=0;Ji<li.buffers.length;++Ji)li.buffers[Ji]&&li.buffers[Ji].destroy();li.buffers.length=0,li.ownsElements&&(li.elements.destroy(),li.elements=null,li.ownsElements=!1),li.destroy()},mn._vao=li,mn._reglType="vao",mn(hi)}return en}var Wf=35632,Us=35633,wf=35718,zc=35721;function Wu(Me,bt,zt,Rr){var jr={},Nr={};function Gr(Mr,Yr,xi,Ri){this.name=Mr,this.id=Yr,this.location=xi,this.info=Ri}function mi(Mr,Yr){for(var xi=0;xi<Mr.length;++xi)if(Mr[xi].id===Yr.id){Mr[xi].location=Yr.location;return}Mr.push(Yr)}function Ui(Mr,Yr,xi){var Ri=Mr===Wf?jr:Nr,ci=Ri[Yr];if(!ci){var an=bt.str(Yr);ci=Me.createShader(Mr),Me.shaderSource(ci,an),Me.compileShader(ci),Ri[Yr]=ci}return ci}var qi={},Ei=[],Hn=0;function en(Mr,Yr){this.id=Hn++,this.fragId=Mr,this.vertId=Yr,this.program=null,this.uniforms=[],this.attributes=[],this.refCount=1,Rr.profile&&(this.stats={uniformsCount:0,attributesCount:0})}function Wi(Mr,Yr,xi){var Ri,ci,an=Ui(Wf,Mr.fragId),Zi=Ui(Us,Mr.vertId),Bn=Mr.program=Me.createProgram();if(Me.attachShader(Bn,an),Me.attachShader(Bn,Zi),xi)for(Ri=0;Ri<xi.length;++Ri){var hi=xi[Ri];Me.bindAttribLocation(Bn,hi[0],hi[1])}Me.linkProgram(Bn);var li=Me.getProgramParameter(Bn,wf);Rr.profile&&(Mr.stats.uniformsCount=li);var mn=Mr.uniforms;for(Ri=0;Ri<li;++Ri)if(ci=Me.getActiveUniform(Bn,Ri),ci)if(ci.size>1)for(var Ji=0;Ji<ci.size;++Ji){var Vi=ci.name.replace("[0]","["+Ji+"]");mi(mn,new Gr(Vi,bt.id(Vi),Me.getUniformLocation(Bn,Vi),ci))}else mi(mn,new Gr(ci.name,bt.id(ci.name),Me.getUniformLocation(Bn,ci.name),ci));var Ni=Me.getProgramParameter(Bn,zc);Rr.profile&&(Mr.stats.attributesCount=Ni);var pn=Mr.attributes;for(Ri=0;Ri<Ni;++Ri)ci=Me.getActiveAttrib(Bn,Ri),ci&&mi(pn,new Gr(ci.name,bt.id(ci.name),Me.getAttribLocation(Bn,ci.name),ci))}Rr.profile&&(zt.getMaxUniformsCount=function(){var Mr=0;return Ei.forEach(function(Yr){Yr.stats.uniformsCount>Mr&&(Mr=Yr.stats.uniformsCount)}),Mr},zt.getMaxAttributesCount=function(){var Mr=0;return Ei.forEach(function(Yr){Yr.stats.attributesCount>Mr&&(Mr=Yr.stats.attributesCount)}),Mr});function si(){jr={},Nr={};for(var Mr=0;Mr<Ei.length;++Mr)Wi(Ei[Mr],null,Ei[Mr].attributes.map(function(Yr){return[Yr.location,Yr.name]}))}return{clear:function(){var Mr=Me.deleteShader.bind(Me);dt(jr).forEach(Mr),jr={},dt(Nr).forEach(Mr),Nr={},Ei.forEach(function(Yr){Me.deleteProgram(Yr.program)}),Ei.length=0,qi={},zt.shaderCount=0},program:function(Mr,Yr,xi,Ri){var ci=qi[Yr];ci||(ci=qi[Yr]={});var an=ci[Mr];if(an&&(an.refCount++,!Ri))return an;var Zi=new en(Yr,Mr);return zt.shaderCount++,Wi(Zi,xi,Ri),an||(ci[Mr]=Zi),Ei.push(Zi),e(Zi,{destroy:function(){if(Zi.refCount--,Zi.refCount<=0){Me.deleteProgram(Zi.program);var Bn=Ei.indexOf(Zi);Ei.splice(Bn,1),zt.shaderCount--}ci[Zi.vertId].refCount<=0&&(Me.deleteShader(Nr[Zi.vertId]),delete Nr[Zi.vertId],delete qi[Zi.fragId][Zi.vertId]),Object.keys(qi[Zi.fragId]).length||(Me.deleteShader(jr[Zi.fragId]),delete jr[Zi.fragId],delete qi[Zi.fragId])}})},restore:si,shader:Ui,frag:-1,vert:-1}}var Rf=6408,Xu=5121,uf=3333,Xf=5126;function Wl(Me,bt,zt,Rr,jr,Nr,Gr){function mi(Ei){var Hn;bt.next===null?Hn=Xu:Hn=bt.next.colorAttachments[0].texture._texture.type;var en=0,Wi=0,si=Rr.framebufferWidth,Mr=Rr.framebufferHeight,Yr=null;Br(Ei)?Yr=Ei:Ei&&(en=Ei.x|0,Wi=Ei.y|0,si=(Ei.width||Rr.framebufferWidth-en)|0,Mr=(Ei.height||Rr.framebufferHeight-Wi)|0,Yr=Ei.data||null),zt();var xi=si*Mr*4;return Yr||(Hn===Xu?Yr=new Uint8Array(xi):Hn===Xf&&(Yr=Yr||new Float32Array(xi))),Me.pixelStorei(uf,4),Me.readPixels(en,Wi,si,Mr,Rf,Hn,Yr),Yr}function Ui(Ei){var Hn;return bt.setFBO({framebuffer:Ei.framebuffer},function(){Hn=mi(Ei)}),Hn}function qi(Ei){return!Ei||!("framebuffer"in Ei)?mi(Ei):Ui(Ei)}return qi}var ah=0,Zu="";function Oc(Me){return vu(Tc(fc(Me)))}function Tc(Me){return At(Yi(Bc(Me),Me.length*8))}function wl(Me,bt){var zt=Bc(Me);zt.length>16&&(zt=Yi(zt,Me.length*8));for(var Rr=Array(16),jr=Array(16),Nr=0;Nr<16;Nr++)Rr[Nr]=zt[Nr]^909522486,jr[Nr]=zt[Nr]^1549556828;var Gr=Yi(Rr.concat(Bc(bt)),512+bt.length*8);return At(Yi(jr.concat(Gr),768))}function vu(Me){for(var bt=ah?"0123456789ABCDEF":"0123456789abcdef",zt="",Rr,jr=0;jr<Me.length;jr++)Rr=Me.charCodeAt(jr),zt+=bt.charAt(Rr>>>4&15)+bt.charAt(Rr&15);return zt}function qc(Me){for(var bt="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",zt="",Rr=Me.length,jr=0;jr<Rr;jr+=3)for(var Nr=Me.charCodeAt(jr)<<16|(jr+1<Rr?Me.charCodeAt(jr+1)<<8:0)|(jr+2<Rr?Me.charCodeAt(jr+2):0),Gr=0;Gr<4;Gr++)jr*8+Gr*6>Me.length*8?zt+=Zu:zt+=bt.charAt(Nr>>>6*(3-Gr)&63);return zt}function cf(Me,bt){var zt=bt.length,Rr=Array(),jr,Nr,Gr,mi,Ui=Array(Math.ceil(Me.length/2));for(jr=0;jr<Ui.length;jr++)Ui[jr]=Me.charCodeAt(jr*2)<<8|Me.charCodeAt(jr*2+1);for(;Ui.length>0;){for(mi=Array(),Gr=0,jr=0;jr<Ui.length;jr++)Gr=(Gr<<16)+Ui[jr],Nr=Math.floor(Gr/zt),Gr-=Nr*zt,(mi.length>0||Nr>0)&&(mi[mi.length]=Nr);Rr[Rr.length]=Gr,Ui=mi}var qi="";for(jr=Rr.length-1;jr>=0;jr--)qi+=bt.charAt(Rr[jr]);var Ei=Math.ceil(Me.length*8/(Math.log(bt.length)/Math.log(2)));for(jr=qi.length;jr<Ei;jr++)qi=bt[0]+qi;return qi}function fc(Me){for(var bt="",zt=-1,Rr,jr;++zt<Me.length;)Rr=Me.charCodeAt(zt),jr=zt+1<Me.length?Me.charCodeAt(zt+1):0,55296<=Rr&&Rr<=56319&&56320<=jr&&jr<=57343&&(Rr=65536+((Rr&1023)<<10)+(jr&1023),zt++),Rr<=127?bt+=String.fromCharCode(Rr):Rr<=2047?bt+=String.fromCharCode(192|Rr>>>6&31,128|Rr&63):Rr<=65535?bt+=String.fromCharCode(224|Rr>>>12&15,128|Rr>>>6&63,128|Rr&63):Rr<=2097151&&(bt+=String.fromCharCode(240|Rr>>>18&7,128|Rr>>>12&63,128|Rr>>>6&63,128|Rr&63));return bt}function Bc(Me){for(var bt=Array(Me.length>>2),zt=0;zt<bt.length;zt++)bt[zt]=0;for(var zt=0;zt<Me.length*8;zt+=8)bt[zt>>5]|=(Me.charCodeAt(zt/8)&255)<<24-zt%32;return bt}function At(Me){for(var bt="",zt=0;zt<Me.length*32;zt+=8)bt+=String.fromCharCode(Me[zt>>5]>>>24-zt%32&255);return bt}function Wt(Me,bt){return Me>>>bt|Me<<32-bt}function Cr(Me,bt){return Me>>>bt}function Ar(Me,bt,zt){return Me&bt^~Me&zt}function Kr(Me,bt,zt){return Me&bt^Me&zt^bt&zt}function ki(Me){return Wt(Me,2)^Wt(Me,13)^Wt(Me,22)}function Xi(Me){return Wt(Me,6)^Wt(Me,11)^Wt(Me,25)}function dn(Me){return Wt(Me,7)^Wt(Me,18)^Cr(Me,3)}function wn(Me){return Wt(Me,17)^Wt(Me,19)^Cr(Me,10)}var Nn=new Array(1116352408,1899447441,-1245643825,-373957723,961987163,1508970993,-1841331548,-1424204075,-670586216,310598401,607225278,1426881987,1925078388,-2132889090,-1680079193,-1046744716,-459576895,-272742522,264347078,604807628,770255983,1249150122,1555081692,1996064986,-1740746414,-1473132947,-1341970488,-1084653625,-958395405,-710438585,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,-2117940946,-1838011259,-1564481375,-1474664885,-1035236496,-949202525,-778901479,-694614492,-200395387,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,-2067236844,-1933114872,-1866530822,-1538233109,-1090935817,-965641998);function Yi(Me,bt){var zt=new Array(1779033703,-1150833019,1013904242,-1521486534,1359893119,-1694144372,528734635,1541459225),Rr=new Array(64),jr,Nr,Gr,mi,Ui,qi,Ei,Hn,en,Wi,si,Mr;for(Me[bt>>5]|=128<<24-bt%32,Me[(bt+64>>9<<4)+15]=bt,en=0;en<Me.length;en+=16){for(jr=zt[0],Nr=zt[1],Gr=zt[2],mi=zt[3],Ui=zt[4],qi=zt[5],Ei=zt[6],Hn=zt[7],Wi=0;Wi<64;Wi++)Wi<16?Rr[Wi]=Me[Wi+en]:Rr[Wi]=Qi(Qi(Qi(wn(Rr[Wi-2]),Rr[Wi-7]),dn(Rr[Wi-15])),Rr[Wi-16]),si=Qi(Qi(Qi(Qi(Hn,Xi(Ui)),Ar(Ui,qi,Ei)),Nn[Wi]),Rr[Wi]),Mr=Qi(ki(jr),Kr(jr,Nr,Gr)),Hn=Ei,Ei=qi,qi=Ui,Ui=Qi(mi,si),mi=Gr,Gr=Nr,Nr=jr,jr=Qi(si,Mr);zt[0]=Qi(jr,zt[0]),zt[1]=Qi(Nr,zt[1]),zt[2]=Qi(Gr,zt[2]),zt[3]=Qi(mi,zt[3]),zt[4]=Qi(Ui,zt[4]),zt[5]=Qi(qi,zt[5]),zt[6]=Qi(Ei,zt[6]),zt[7]=Qi(Hn,zt[7])}return zt}function Qi(Me,bt){var zt=(Me&65535)+(bt&65535),Rr=(Me>>16)+(bt>>16)+(zt>>16);return Rr<<16|zt&65535}function on(Me){return Array.prototype.slice.call(Me)}function Fi(Me){return on(Me).join("")}function $n(Me){var bt=Me&&Me.cache,zt=0,Rr=[],jr=[],Nr=[];function Gr(si,Mr){var Yr=Mr&&Mr.stable;if(!Yr){for(var xi=0;xi<jr.length;++xi)if(jr[xi]===si&&!Nr[xi])return Rr[xi]}var Ri="g"+zt++;return Rr.push(Ri),jr.push(si),Nr.push(Yr),Ri}function mi(){var si=[];function Mr(){si.push.apply(si,on(arguments))}var Yr=[];function xi(){var Ri="v"+zt++;return Yr.push(Ri),arguments.length>0&&(si.push(Ri,"="),si.push.apply(si,on(arguments)),si.push(";")),Ri}return e(Mr,{def:xi,toString:function(){return Fi([Yr.length>0?"var "+Yr.join(",")+";":"",Fi(si)])}})}function Ui(){var si=mi(),Mr=mi(),Yr=si.toString,xi=Mr.toString;function Ri(ci,an){Mr(ci,an,"=",si.def(ci,an),";")}return e(function(){si.apply(si,on(arguments))},{def:si.def,entry:si,exit:Mr,save:Ri,set:function(ci,an,Zi){Ri(ci,an),si(ci,an,"=",Zi,";")},toString:function(){return Yr()+xi()}})}function qi(){var si=Fi(arguments),Mr=Ui(),Yr=Ui(),xi=Mr.toString,Ri=Yr.toString;return e(Mr,{then:function(){return Mr.apply(Mr,on(arguments)),this},else:function(){return Yr.apply(Yr,on(arguments)),this},toString:function(){var ci=Ri();return ci&&(ci="else{"+ci+"}"),Fi(["if(",si,"){",xi(),"}",ci])}})}var Ei=mi(),Hn={};function en(si,Mr){var Yr=[];function xi(){var Bn="a"+Yr.length;return Yr.push(Bn),Bn}Mr=Mr||0;for(var Ri=0;Ri<Mr;++Ri)xi();var ci=Ui(),an=ci.toString,Zi=Hn[si]=e(ci,{arg:xi,toString:function(){return Fi(["function(",Yr.join(),"){",an(),"}"])}});return Zi}function Wi(){var si=['"use strict";',Ei,"return {"];Object.keys(Hn).forEach(function(Ri){si.push('"',Ri,'":',Hn[Ri].toString(),",")}),si.push("}");var Mr=Fi(si).replace(/;/g,`;
+`).replace(/}/g,`}
+`).replace(/{/g,`{
+`),Yr;if(bt&&(Yr=Oc(Mr),bt[Yr]))return bt[Yr].apply(null,jr);var xi=Function.apply(null,Rr.concat(Mr));return bt&&(bt[Yr]=xi),xi.apply(null,jr)}return{global:Ei,link:Gr,block:mi,proc:en,scope:Ui,cond:qi,compile:Wi}}var Ca="xyzw".split(""),Ra=5121,La=1,Na=2,Yn=0,Dn=1,Ka=2,bo=3,Xo=4,Ss=5,as=6,ws="dither",Ho="blend.enable",ml="blend.color",Ws="blend.equation",Ls="blend.func",va="depth.enable",no="depth.func",ys="depth.range",rs="depth.mask",$l="colorMask",Cu="cull.enable",Yu="cull.face",Nc="frontFace",pu="lineWidth",Uc="polygonOffset.enable",xu="polygonOffset.offset",Ac="sample.alpha",Ua="sample.enable",oo="sample.coverage",Vc="stencil.enable",hc="stencil.mask",Ku="stencil.func",ue="stencil.opFront",w="stencil.opBack",B="scissor.enable",Q="scissor.box",ee="viewport",le="profile",qe="framebuffer",Xe="vert",ot="frag",Tt="elements",Yt="primitive",Kt="count",xr="offset",Ir="instances",ve="vao",be="Width",De="Height",Be=qe+be,et=qe+De,We=ee+be,it=ee+De,Ft="drawingBuffer",Ht=Ft+be,tr=Ft+De,dr=[Ls,Ws,Ku,ue,w,oo,ee,Q,xu],Sr=34962,Or=34963,Wr=2884,ni=3042,Pi=3024,cn=2960,ln=2929,Cn=3089,Kn=32823,Ta=32926,fa=32928,$a=5126,Co=35664,Qa=35665,mo=35666,Bo=5124,Ps=35667,Ts=35668,wo=35669,To=35670,hl=35671,Ul=35672,Lu=35673,au=35674,Js=35675,Ql=35676,dc=35678,Tl=35680,Al=4,X=1028,se=1029,Te=2304,Ne=2305,He=32775,Ye=32776,kt=519,nt=7680,jt=0,gr=1,yr=32774,Hr=513,qr=36160,_i=36064,bi={0:0,1:1,zero:0,one:1,"src color":768,"one minus src color":769,"src alpha":770,"one minus src alpha":771,"dst color":774,"one minus dst color":775,"dst alpha":772,"one minus dst alpha":773,"constant color":32769,"one minus constant color":32770,"constant alpha":32771,"one minus constant alpha":32772,"src alpha saturate":776},Zr={never:512,less:513,"<":513,equal:514,"=":514,"==":514,"===":514,lequal:515,"<=":515,greater:516,">":516,notequal:517,"!=":517,"!==":517,gequal:518,">=":518,always:519},ai={0:0,zero:0,keep:7680,replace:7681,increment:7682,decrement:7683,"increment wrap":34055,"decrement wrap":34056,invert:5386},gi={cw:Te,ccw:Ne};function Ii(Me){return Array.isArray(Me)||Br(Me)||Vr(Me)}function Si(Me){return Me.sort(function(bt,zt){return bt===ee?-1:zt===ee?1:bt<zt?-1:1})}function ei(Me,bt,zt,Rr){this.thisDep=Me,this.contextDep=bt,this.propDep=zt,this.append=Rr}function Ln(Me){return Me&&!(Me.thisDep||Me.contextDep||Me.propDep)}function En(Me){return new ei(!1,!1,!1,Me)}function Un(Me,bt){var zt=Me.type;if(zt===Yn){var Rr=Me.data.length;return new ei(!0,Rr>=1,Rr>=2,bt)}else if(zt===Xo){var jr=Me.data;return new ei(jr.thisDep,jr.contextDep,jr.propDep,bt)}else{if(zt===Ss)return new ei(!1,!1,!1,bt);if(zt===as){for(var Nr=!1,Gr=!1,mi=!1,Ui=0;Ui<Me.data.length;++Ui){var qi=Me.data[Ui];if(qi.type===Dn)mi=!0;else if(qi.type===Ka)Gr=!0;else if(qi.type===bo)Nr=!0;else if(qi.type===Yn){Nr=!0;var Ei=qi.data;Ei>=1&&(Gr=!0),Ei>=2&&(mi=!0)}else qi.type===Xo&&(Nr=Nr||qi.data.thisDep,Gr=Gr||qi.data.contextDep,mi=mi||qi.data.propDep)}return new ei(Nr,Gr,mi,bt)}else return new ei(zt===bo,zt===Ka,zt===Dn,bt)}}var ia=new ei(!1,!1,!1,function(){});function Ea(Me,bt,zt,Rr,jr,Nr,Gr,mi,Ui,qi,Ei,Hn,en,Wi,si,Mr){var Yr=qi.Record,xi={add:32774,subtract:32778,"reverse subtract":32779};zt.ext_blend_minmax&&(xi.min=He,xi.max=Ye);var Ri=zt.angle_instanced_arrays,ci=zt.webgl_draw_buffers,an=zt.oes_vertex_array_object,Zi={dirty:!0,profile:Mr.profile},Bn={},hi=[],li={},mn={};function Ji(Ve){return Ve.replace(".","_")}function Vi(Ve,Qe,at){var Ct=Ji(Ve);hi.push(Ve),Bn[Ct]=Zi[Ct]=!!at,li[Ct]=Qe}function Ni(Ve,Qe,at){var Ct=Ji(Ve);hi.push(Ve),Array.isArray(at)?(Zi[Ct]=at.slice(),Bn[Ct]=at.slice()):Zi[Ct]=Bn[Ct]=at,mn[Ct]=Qe}function pn(Ve){return!!isNaN(Ve)}Vi(ws,Pi),Vi(Ho,ni),Ni(ml,"blendColor",[0,0,0,0]),Ni(Ws,"blendEquationSeparate",[yr,yr]),Ni(Ls,"blendFuncSeparate",[gr,jt,gr,jt]),Vi(va,ln,!0),Ni(no,"depthFunc",Hr),Ni(ys,"depthRange",[0,1]),Ni(rs,"depthMask",!0),Ni($l,$l,[!0,!0,!0,!0]),Vi(Cu,Wr),Ni(Yu,"cullFace",se),Ni(Nc,Nc,Ne),Ni(pu,pu,1),Vi(Uc,Kn),Ni(xu,"polygonOffset",[0,0]),Vi(Ac,Ta),Vi(Ua,fa),Ni(oo,"sampleCoverage",[1,!1]),Vi(Vc,cn),Ni(hc,"stencilMask",-1),Ni(Ku,"stencilFunc",[kt,0,-1]),Ni(ue,"stencilOpSeparate",[X,nt,nt,nt]),Ni(w,"stencilOpSeparate",[se,nt,nt,nt]),Vi(B,Cn),Ni(Q,"scissor",[0,0,Me.drawingBufferWidth,Me.drawingBufferHeight]),Ni(ee,ee,[0,0,Me.drawingBufferWidth,Me.drawingBufferHeight]);var Vn={gl:Me,context:en,strings:bt,next:Bn,current:Zi,draw:Hn,elements:Nr,buffer:jr,shader:Ei,attributes:qi.state,vao:qi,uniforms:Ui,framebuffer:mi,extensions:zt,timer:Wi,isBufferArgs:Ii},na={primTypes:Sn,compareFuncs:Zr,blendFuncs:bi,blendEquations:xi,stencilOps:ai,glTypes:ji,orientationType:gi};ci&&(na.backBuffer=[se],na.drawBuffer=M(Rr.maxDrawbuffers,function(Ve){return Ve===0?[0]:M(Ve,function(Qe){return _i+Qe})}));var Ki=0;function kn(){var Ve=$n({cache:si}),Qe=Ve.link,at=Ve.global;Ve.id=Ki++,Ve.batchId="0";var Ct=Qe(Vn),Ot=Ve.shared={props:"a0"};Object.keys(Vn).forEach(function(Pt){Ot[Pt]=at.def(Ct,".",Pt)});var Rt=Ve.next={},Bt=Ve.current={};Object.keys(mn).forEach(function(Pt){Array.isArray(Zi[Pt])&&(Rt[Pt]=at.def(Ot.next,".",Pt),Bt[Pt]=at.def(Ot.current,".",Pt))});var Dt=Ve.constants={};Object.keys(na).forEach(function(Pt){Dt[Pt]=at.def(JSON.stringify(na[Pt]))}),Ve.invoke=function(Pt,ht){switch(ht.type){case Yn:var ur=["this",Ot.context,Ot.props,Ve.batchId];return Pt.def(Qe(ht.data),".call(",ur.slice(0,Math.max(ht.data.length+1,4)),")");case Dn:return Pt.def(Ot.props,ht.data);case Ka:return Pt.def(Ot.context,ht.data);case bo:return Pt.def("this",ht.data);case Xo:return ht.data.append(Ve,Pt),ht.data.ref;case Ss:return ht.data.toString();case as:return ht.data.map(function(br){return Ve.invoke(Pt,br)})}},Ve.attribCache={};var yt={};return Ve.scopeAttrib=function(Pt){var ht=bt.id(Pt);if(ht in yt)return yt[ht];var ur=qi.scope[ht];ur||(ur=qi.scope[ht]=new Yr);var br=yt[ht]=Qe(ur);return br},Ve}function ta(Ve){var Qe=Ve.static,at=Ve.dynamic,Ct;if(le in Qe){var Ot=!!Qe[le];Ct=En(function(Bt,Dt){return Ot}),Ct.enable=Ot}else if(le in at){var Rt=at[le];Ct=Un(Rt,function(Bt,Dt){return Bt.invoke(Dt,Rt)})}return Ct}function oa(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic;if(qe in at){var Ot=at[qe];return Ot?(Ot=mi.getFramebuffer(Ot),En(function(Bt,Dt){var yt=Bt.link(Ot),Pt=Bt.shared;Dt.set(Pt.framebuffer,".next",yt);var ht=Pt.context;return Dt.set(ht,"."+Be,yt+".width"),Dt.set(ht,"."+et,yt+".height"),yt})):En(function(Bt,Dt){var yt=Bt.shared;Dt.set(yt.framebuffer,".next","null");var Pt=yt.context;return Dt.set(Pt,"."+Be,Pt+"."+Ht),Dt.set(Pt,"."+et,Pt+"."+tr),"null"})}else if(qe in Ct){var Rt=Ct[qe];return Un(Rt,function(Bt,Dt){var yt=Bt.invoke(Dt,Rt),Pt=Bt.shared,ht=Pt.framebuffer,ur=Dt.def(ht,".getFramebuffer(",yt,")");Dt.set(ht,".next",ur);var br=Pt.context;return Dt.set(br,"."+Be,ur+"?"+ur+".width:"+br+"."+Ht),Dt.set(br,"."+et,ur+"?"+ur+".height:"+br+"."+tr),ur})}else return null}function ba(Ve,Qe,at){var Ct=Ve.static,Ot=Ve.dynamic;function Rt(yt){if(yt in Ct){var Pt=Ct[yt],ht=!0,ur=Pt.x|0,br=Pt.y|0,Ur,Di;return"width"in Pt?Ur=Pt.width|0:ht=!1,"height"in Pt?Di=Pt.height|0:ht=!1,new ei(!ht&&Qe&&Qe.thisDep,!ht&&Qe&&Qe.contextDep,!ht&&Qe&&Qe.propDep,function(gn,rn){var Ci=gn.shared.context,Bi=Ur;"width"in Pt||(Bi=rn.def(Ci,".",Be,"-",ur));var Gi=Di;return"height"in Pt||(Gi=rn.def(Ci,".",et,"-",br)),[ur,br,Bi,Gi]})}else if(yt in Ot){var fi=Ot[yt],Ti=Un(fi,function(gn,rn){var Ci=gn.invoke(rn,fi),Bi=gn.shared.context,Gi=rn.def(Ci,".x|0"),sn=rn.def(Ci,".y|0"),zn=rn.def('"width" in ',Ci,"?",Ci,".width|0:","(",Bi,".",Be,"-",Gi,")"),Ja=rn.def('"height" in ',Ci,"?",Ci,".height|0:","(",Bi,".",et,"-",sn,")");return[Gi,sn,zn,Ja]});return Qe&&(Ti.thisDep=Ti.thisDep||Qe.thisDep,Ti.contextDep=Ti.contextDep||Qe.contextDep,Ti.propDep=Ti.propDep||Qe.propDep),Ti}else return Qe?new ei(Qe.thisDep,Qe.contextDep,Qe.propDep,function(gn,rn){var Ci=gn.shared.context;return[0,0,rn.def(Ci,".",Be),rn.def(Ci,".",et)]}):null}var Bt=Rt(ee);if(Bt){var Dt=Bt;Bt=new ei(Bt.thisDep,Bt.contextDep,Bt.propDep,function(yt,Pt){var ht=Dt.append(yt,Pt),ur=yt.shared.context;return Pt.set(ur,"."+We,ht[2]),Pt.set(ur,"."+it,ht[3]),ht})}return{viewport:Bt,scissor_box:Rt(Q)}}function is(Ve,Qe){var at=Ve.static,Ct=typeof at[ot]=="string"&&typeof at[Xe]=="string";if(Ct){if(Object.keys(Qe.dynamic).length>0)return null;var Ot=Qe.static,Rt=Object.keys(Ot);if(Rt.length>0&&typeof Ot[Rt[0]]=="number"){for(var Bt=[],Dt=0;Dt<Rt.length;++Dt)Bt.push([Ot[Rt[Dt]]|0,Rt[Dt]]);return Bt}}return null}function Zs(Ve,Qe,at){var Ct=Ve.static,Ot=Ve.dynamic;function Rt(ht){if(ht in Ct){var ur=bt.id(Ct[ht]),br=En(function(){return ur});return br.id=ur,br}else if(ht in Ot){var Ur=Ot[ht];return Un(Ur,function(Di,fi){var Ti=Di.invoke(fi,Ur),gn=fi.def(Di.shared.strings,".id(",Ti,")");return gn})}return null}var Bt=Rt(ot),Dt=Rt(Xe),yt=null,Pt;return Ln(Bt)&&Ln(Dt)?(yt=Ei.program(Dt.id,Bt.id,null,at),Pt=En(function(ht,ur){return ht.link(yt)})):Pt=new ei(Bt&&Bt.thisDep||Dt&&Dt.thisDep,Bt&&Bt.contextDep||Dt&&Dt.contextDep,Bt&&Bt.propDep||Dt&&Dt.propDep,function(ht,ur){var br=ht.shared.shader,Ur;Bt?Ur=Bt.append(ht,ur):Ur=ur.def(br,".",ot);var Di;Dt?Di=Dt.append(ht,ur):Di=ur.def(br,".",Xe);var fi=br+".program("+Di+","+Ur;return ur.def(fi+")")}),{frag:Bt,vert:Dt,progVar:Pt,program:yt}}function Va(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={},Rt=!1;function Bt(){if(ve in at){var rn=at[ve];return rn!==null&&qi.getVAO(rn)===null&&(rn=qi.createVAO(rn)),Rt=!0,Ot.vao=rn,En(function(Bi){var Gi=qi.getVAO(rn);return Gi?Bi.link(Gi):"null"})}else if(ve in Ct){Rt=!0;var Ci=Ct[ve];return Un(Ci,function(Bi,Gi){var sn=Bi.invoke(Gi,Ci);return Gi.def(Bi.shared.vao+".getVAO("+sn+")")})}return null}var Dt=Bt(),yt=!1;function Pt(){if(Tt in at){var rn=at[Tt];if(Ot.elements=rn,Ii(rn)){var Ci=Ot.elements=Nr.create(rn,!0);rn=Nr.getElements(Ci),yt=!0}else rn&&(rn=Nr.getElements(rn),yt=!0);var Bi=En(function(sn,zn){if(rn){var Ja=sn.link(rn);return sn.ELEMENTS=Ja,Ja}return sn.ELEMENTS=null,null});return Bi.value=rn,Bi}else if(Tt in Ct){yt=!0;var Gi=Ct[Tt];return Un(Gi,function(sn,zn){var Ja=sn.shared,co=Ja.isBufferArgs,ts=Ja.elements,so=sn.invoke(zn,Gi),Zo=zn.def("null"),ms=zn.def(co,"(",so,")"),ou=sn.cond(ms).then(Zo,"=",ts,".createStream(",so,");").else(Zo,"=",ts,".getElements(",so,");");return zn.entry(ou),zn.exit(sn.cond(ms).then(ts,".destroyStream(",Zo,");")),sn.ELEMENTS=Zo,Zo})}else if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.elements+".getElements("+sn.shared.vao+".currentVAO.elements):null")});return null}var ht=Pt();function ur(){if(Yt in at){var rn=at[Yt];return Ot.primitive=rn,En(function(Bi,Gi){return Sn[rn]})}else if(Yt in Ct){var Ci=Ct[Yt];return Un(Ci,function(Bi,Gi){var sn=Bi.constants.primTypes,zn=Bi.invoke(Gi,Ci);return Gi.def(sn,"[",zn,"]")})}else{if(yt)return Ln(ht)?ht.value?En(function(Bi,Gi){return Gi.def(Bi.ELEMENTS,".primType")}):En(function(){return Al}):new ei(ht.thisDep,ht.contextDep,ht.propDep,function(Bi,Gi){var sn=Bi.ELEMENTS;return Gi.def(sn,"?",sn,".primType:",Al)});if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(Bi,Gi){return Gi.def(Bi.shared.vao+".currentVAO?"+Bi.shared.vao+".currentVAO.primitive:"+Al)})}return null}function br(rn,Ci){if(rn in at){var Bi=at[rn]|0;return Ci?Ot.offset=Bi:Ot.instances=Bi,En(function(sn,zn){return Ci&&(sn.OFFSET=Bi),Bi})}else if(rn in Ct){var Gi=Ct[rn];return Un(Gi,function(sn,zn){var Ja=sn.invoke(zn,Gi);return Ci&&(sn.OFFSET=Ja),Ja})}else if(Ci){if(yt)return En(function(sn,zn){return sn.OFFSET=0,0});if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.vao+".currentVAO.offset:0")})}else if(Rt)return new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(sn,zn){return zn.def(sn.shared.vao+".currentVAO?"+sn.shared.vao+".currentVAO.instances:-1")});return null}var Ur=br(xr,!0);function Di(){if(Kt in at){var rn=at[Kt]|0;return Ot.count=rn,En(function(){return rn})}else if(Kt in Ct){var Ci=Ct[Kt];return Un(Ci,function(zn,Ja){var co=zn.invoke(Ja,Ci);return co})}else if(yt)if(Ln(ht)){if(ht)return Ur?new ei(Ur.thisDep,Ur.contextDep,Ur.propDep,function(zn,Ja){var co=Ja.def(zn.ELEMENTS,".vertCount-",zn.OFFSET);return co}):En(function(zn,Ja){return Ja.def(zn.ELEMENTS,".vertCount")});var Bi=En(function(){return-1});return Bi}else{var Gi=new ei(ht.thisDep||Ur.thisDep,ht.contextDep||Ur.contextDep,ht.propDep||Ur.propDep,function(zn,Ja){var co=zn.ELEMENTS;return zn.OFFSET?Ja.def(co,"?",co,".vertCount-",zn.OFFSET,":-1"):Ja.def(co,"?",co,".vertCount:-1")});return Gi}else if(Rt){var sn=new ei(Dt.thisDep,Dt.contextDep,Dt.propDep,function(zn,Ja){return Ja.def(zn.shared.vao,".currentVAO?",zn.shared.vao,".currentVAO.count:-1")});return sn}return null}var fi=ur(),Ti=Di(),gn=br(Ir,!1);return{elements:ht,primitive:fi,count:Ti,instances:gn,offset:Ur,vao:Dt,vaoActive:Rt,elementsActive:yt,static:Ot}}function Ml(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return hi.forEach(function(Rt){var Bt=Ji(Rt);function Dt(yt,Pt){if(Rt in at){var ht=yt(at[Rt]);Ot[Bt]=En(function(){return ht})}else if(Rt in Ct){var ur=Ct[Rt];Ot[Bt]=Un(ur,function(br,Ur){return Pt(br,Ur,br.invoke(Ur,ur))})}}switch(Rt){case Cu:case Ho:case ws:case Vc:case va:case B:case Uc:case Ac:case Ua:case rs:return Dt(function(yt){return yt},function(yt,Pt,ht){return ht});case no:return Dt(function(yt){return Zr[yt]},function(yt,Pt,ht){var ur=yt.constants.compareFuncs;return Pt.def(ur,"[",ht,"]")});case ys:return Dt(function(yt){return yt},function(yt,Pt,ht){var ur=Pt.def("+",ht,"[0]"),br=Pt.def("+",ht,"[1]");return[ur,br]});case Ls:return Dt(function(yt){var Pt="srcRGB"in yt?yt.srcRGB:yt.src,ht="srcAlpha"in yt?yt.srcAlpha:yt.src,ur="dstRGB"in yt?yt.dstRGB:yt.dst,br="dstAlpha"in yt?yt.dstAlpha:yt.dst;return[bi[Pt],bi[ur],bi[ht],bi[br]]},function(yt,Pt,ht){var ur=yt.constants.blendFuncs;function br(Ci,Bi){var Gi=Pt.def('"',Ci,Bi,'" in ',ht,"?",ht,".",Ci,Bi,":",ht,".",Ci);return Gi}var Ur=br("src","RGB"),Di=br("dst","RGB"),fi=Pt.def(ur,"[",Ur,"]"),Ti=Pt.def(ur,"[",br("src","Alpha"),"]"),gn=Pt.def(ur,"[",Di,"]"),rn=Pt.def(ur,"[",br("dst","Alpha"),"]");return[fi,gn,Ti,rn]});case Ws:return Dt(function(yt){if(typeof yt=="string")return[xi[yt],xi[yt]];if(typeof yt=="object")return[xi[yt.rgb],xi[yt.alpha]]},function(yt,Pt,ht){var ur=yt.constants.blendEquations,br=Pt.def(),Ur=Pt.def(),Di=yt.cond("typeof ",ht,'==="string"');return Di.then(br,"=",Ur,"=",ur,"[",ht,"];"),Di.else(br,"=",ur,"[",ht,".rgb];",Ur,"=",ur,"[",ht,".alpha];"),Pt(Di),[br,Ur]});case ml:return Dt(function(yt){return M(4,function(Pt){return+yt[Pt]})},function(yt,Pt,ht){return M(4,function(ur){return Pt.def("+",ht,"[",ur,"]")})});case hc:return Dt(function(yt){return yt|0},function(yt,Pt,ht){return Pt.def(ht,"|0")});case Ku:return Dt(function(yt){var Pt=yt.cmp||"keep",ht=yt.ref||0,ur="mask"in yt?yt.mask:-1;return[Zr[Pt],ht,ur]},function(yt,Pt,ht){var ur=yt.constants.compareFuncs,br=Pt.def('"cmp" in ',ht,"?",ur,"[",ht,".cmp]",":",nt),Ur=Pt.def(ht,".ref|0"),Di=Pt.def('"mask" in ',ht,"?",ht,".mask|0:-1");return[br,Ur,Di]});case ue:case w:return Dt(function(yt){var Pt=yt.fail||"keep",ht=yt.zfail||"keep",ur=yt.zpass||"keep";return[Rt===w?se:X,ai[Pt],ai[ht],ai[ur]]},function(yt,Pt,ht){var ur=yt.constants.stencilOps;function br(Ur){return Pt.def('"',Ur,'" in ',ht,"?",ur,"[",ht,".",Ur,"]:",nt)}return[Rt===w?se:X,br("fail"),br("zfail"),br("zpass")]});case xu:return Dt(function(yt){var Pt=yt.factor|0,ht=yt.units|0;return[Pt,ht]},function(yt,Pt,ht){var ur=Pt.def(ht,".factor|0"),br=Pt.def(ht,".units|0");return[ur,br]});case Yu:return Dt(function(yt){var Pt=0;return yt==="front"?Pt=X:yt==="back"&&(Pt=se),Pt},function(yt,Pt,ht){return Pt.def(ht,'==="front"?',X,":",se)});case pu:return Dt(function(yt){return yt},function(yt,Pt,ht){return ht});case Nc:return Dt(function(yt){return gi[yt]},function(yt,Pt,ht){return Pt.def(ht+'==="cw"?'+Te+":"+Ne)});case $l:return Dt(function(yt){return yt.map(function(Pt){return!!Pt})},function(yt,Pt,ht){return M(4,function(ur){return"!!"+ht+"["+ur+"]"})});case oo:return Dt(function(yt){var Pt="value"in yt?yt.value:1,ht=!!yt.invert;return[Pt,ht]},function(yt,Pt,ht){var ur=Pt.def('"value" in ',ht,"?+",ht,".value:1"),br=Pt.def("!!",ht,".invert");return[ur,br]})}}),Ot}function zo(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return Object.keys(at).forEach(function(Rt){var Bt=at[Rt],Dt;if(typeof Bt=="number"||typeof Bt=="boolean")Dt=En(function(){return Bt});else if(typeof Bt=="function"){var yt=Bt._reglType;yt==="texture2d"||yt==="textureCube"?Dt=En(function(Pt){return Pt.link(Bt)}):(yt==="framebuffer"||yt==="framebufferCube")&&(Dt=En(function(Pt){return Pt.link(Bt.color[0])}))}else Mn(Bt)&&(Dt=En(function(Pt){var ht=Pt.global.def("[",M(Bt.length,function(ur){return Bt[ur]}),"]");return ht}));Dt.value=Bt,Ot[Rt]=Dt}),Object.keys(Ct).forEach(function(Rt){var Bt=Ct[Rt];Ot[Rt]=Un(Bt,function(Dt,yt){return Dt.invoke(yt,Bt)})}),Ot}function Qs(Ve,Qe){var at=Ve.static,Ct=Ve.dynamic,Ot={};return Object.keys(at).forEach(function(Rt){var Bt=at[Rt],Dt=bt.id(Rt),yt=new Yr;if(Ii(Bt))yt.state=La,yt.buffer=jr.getBuffer(jr.create(Bt,Sr,!1,!0)),yt.type=0;else{var Pt=jr.getBuffer(Bt);if(Pt)yt.state=La,yt.buffer=Pt,yt.type=0;else if("constant"in Bt){var ht=Bt.constant;yt.buffer="null",yt.state=Na,typeof ht=="number"?yt.x=ht:Ca.forEach(function(gn,rn){rn<ht.length&&(yt[gn]=ht[rn])})}else{Ii(Bt.buffer)?Pt=jr.getBuffer(jr.create(Bt.buffer,Sr,!1,!0)):Pt=jr.getBuffer(Bt.buffer);var ur=Bt.offset|0,br=Bt.stride|0,Ur=Bt.size|0,Di=!!Bt.normalized,fi=0;"type"in Bt&&(fi=ji[Bt.type]);var Ti=Bt.divisor|0;yt.buffer=Pt,yt.state=La,yt.size=Ur,yt.normalized=Di,yt.type=fi||Pt.dtype,yt.offset=ur,yt.stride=br,yt.divisor=Ti}}Ot[Rt]=En(function(gn,rn){var Ci=gn.attribCache;if(Dt in Ci)return Ci[Dt];var Bi={isStream:!1};return Object.keys(yt).forEach(function(Gi){Bi[Gi]=yt[Gi]}),yt.buffer&&(Bi.buffer=gn.link(yt.buffer),Bi.type=Bi.type||Bi.buffer+".dtype"),Ci[Dt]=Bi,Bi})}),Object.keys(Ct).forEach(function(Rt){var Bt=Ct[Rt];function Dt(yt,Pt){var ht=yt.invoke(Pt,Bt),ur=yt.shared,br=yt.constants,Ur=ur.isBufferArgs,Di=ur.buffer,fi={isStream:Pt.def(!1)},Ti=new Yr;Ti.state=La,Object.keys(Ti).forEach(function(Bi){fi[Bi]=Pt.def(""+Ti[Bi])});var gn=fi.buffer,rn=fi.type;Pt("if(",Ur,"(",ht,")){",fi.isStream,"=true;",gn,"=",Di,".createStream(",Sr,",",ht,");",rn,"=",gn,".dtype;","}else{",gn,"=",Di,".getBuffer(",ht,");","if(",gn,"){",rn,"=",gn,".dtype;",'}else if("constant" in ',ht,"){",fi.state,"=",Na,";","if(typeof "+ht+'.constant === "number"){',fi[Ca[0]],"=",ht,".constant;",Ca.slice(1).map(function(Bi){return fi[Bi]}).join("="),"=0;","}else{",Ca.map(function(Bi,Gi){return fi[Bi]+"="+ht+".constant.length>"+Gi+"?"+ht+".constant["+Gi+"]:0;"}).join(""),"}}else{","if(",Ur,"(",ht,".buffer)){",gn,"=",Di,".createStream(",Sr,",",ht,".buffer);","}else{",gn,"=",Di,".getBuffer(",ht,".buffer);","}",rn,'="type" in ',ht,"?",br.glTypes,"[",ht,".type]:",gn,".dtype;",fi.normalized,"=!!",ht,".normalized;");function Ci(Bi){Pt(fi[Bi],"=",ht,".",Bi,"|0;")}return Ci("size"),Ci("offset"),Ci("stride"),Ci("divisor"),Pt("}}"),Pt.exit("if(",fi.isStream,"){",Di,".destroyStream(",gn,");","}"),fi}Ot[Rt]=Un(Bt,Dt)}),Ot}function al(Ve){var Qe=Ve.static,at=Ve.dynamic,Ct={};return Object.keys(Qe).forEach(function(Ot){var Rt=Qe[Ot];Ct[Ot]=En(function(Bt,Dt){return typeof Rt=="number"||typeof Rt=="boolean"?""+Rt:Bt.link(Rt)})}),Object.keys(at).forEach(function(Ot){var Rt=at[Ot];Ct[Ot]=Un(Rt,function(Bt,Dt){return Bt.invoke(Dt,Rt)})}),Ct}function Vl(Ve,Qe,at,Ct,Ot){var Rt=Ve.static,Bt=Ve.dynamic,Dt=is(Ve,Qe),yt=oa(Ve,Ot),Pt=ba(Ve,yt,Ot),ht=Va(Ve,Ot),ur=Ml(Ve,Ot),br=Zs(Ve,Ot,Dt);function Ur(Ci){var Bi=Pt[Ci];Bi&&(ur[Ci]=Bi)}Ur(ee),Ur(Ji(Q));var Di=Object.keys(ur).length>0,fi={framebuffer:yt,draw:ht,shader:br,state:ur,dirty:Di,scopeVAO:null,drawVAO:null,useVAO:!1,attributes:{}};if(fi.profile=ta(Ve,Ot),fi.uniforms=zo(at,Ot),fi.drawVAO=fi.scopeVAO=ht.vao,!fi.drawVAO&&br.program&&!Dt&&zt.angle_instanced_arrays&&ht.static.elements){var Ti=!0,gn=br.program.attributes.map(function(Ci){var Bi=Qe.static[Ci];return Ti=Ti&&!!Bi,Bi});if(Ti&&gn.length>0){var rn=qi.getVAO(qi.createVAO({attributes:gn,elements:ht.static.elements}));fi.drawVAO=new ei(null,null,null,function(Ci,Bi){return Ci.link(rn)}),fi.useVAO=!0}}return Dt?fi.useVAO=!0:fi.attributes=Qs(Qe,Ot),fi.context=al(Ct,Ot),fi}function ss(Ve,Qe,at){var Ct=Ve.shared,Ot=Ct.context,Rt=Ve.scope();Object.keys(at).forEach(function(Bt){Qe.save(Ot,"."+Bt);var Dt=at[Bt],yt=Dt.append(Ve,Qe);Array.isArray(yt)?Rt(Ot,".",Bt,"=[",yt.join(),"];"):Rt(Ot,".",Bt,"=",yt,";")}),Qe(Rt)}function Vs(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.framebuffer,Dt;ci&&(Dt=Qe.def(Ot.extensions,".webgl_draw_buffers"));var yt=Ve.constants,Pt=yt.drawBuffer,ht=yt.backBuffer,ur;at?ur=at.append(Ve,Qe):ur=Qe.def(Bt,".next"),Ct||Qe("if(",ur,"!==",Bt,".cur){"),Qe("if(",ur,"){",Rt,".bindFramebuffer(",qr,",",ur,".framebuffer);"),ci&&Qe(Dt,".drawBuffersWEBGL(",Pt,"[",ur,".colorAttachments.length]);"),Qe("}else{",Rt,".bindFramebuffer(",qr,",null);"),ci&&Qe(Dt,".drawBuffersWEBGL(",ht,");"),Qe("}",Bt,".cur=",ur,";"),Ct||Qe("}")}function Ys(Ve,Qe,at){var Ct=Ve.shared,Ot=Ct.gl,Rt=Ve.current,Bt=Ve.next,Dt=Ct.current,yt=Ct.next,Pt=Ve.cond(Dt,".dirty");hi.forEach(function(ht){var ur=Ji(ht);if(!(ur in at.state)){var br,Ur;if(ur in Bt){br=Bt[ur],Ur=Rt[ur];var Di=M(Zi[ur].length,function(Ti){return Pt.def(br,"[",Ti,"]")});Pt(Ve.cond(Di.map(function(Ti,gn){return Ti+"!=="+Ur+"["+gn+"]"}).join("||")).then(Ot,".",mn[ur],"(",Di,");",Di.map(function(Ti,gn){return Ur+"["+gn+"]="+Ti}).join(";"),";"))}else{br=Pt.def(yt,".",ur);var fi=Ve.cond(br,"!==",Dt,".",ur);Pt(fi),ur in li?fi(Ve.cond(br).then(Ot,".enable(",li[ur],");").else(Ot,".disable(",li[ur],");"),Dt,".",ur,"=",br,";"):fi(Ot,".",mn[ur],"(",br,");",Dt,".",ur,"=",br,";")}}}),Object.keys(at.state).length===0&&Pt(Dt,".dirty=false;"),Qe(Pt)}function wa(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ve.current,Bt=Ot.current,Dt=Ot.gl,yt;Si(Object.keys(at)).forEach(function(Pt){var ht=at[Pt];if(!(Ct&&!Ct(ht))){var ur=ht.append(Ve,Qe);if(li[Pt]){var br=li[Pt];Ln(ht)?(yt=Ve.link(ur,{stable:!0}),Qe(Ve.cond(yt).then(Dt,".enable(",br,");").else(Dt,".disable(",br,");")),Qe(Bt,".",Pt,"=",yt,";")):(Qe(Ve.cond(ur).then(Dt,".enable(",br,");").else(Dt,".disable(",br,");")),Qe(Bt,".",Pt,"=",ur,";"))}else if(Mn(ur)){var Ur=Rt[Pt];Qe(Dt,".",mn[Pt],"(",ur,");",ur.map(function(Di,fi){return Ur+"["+fi+"]="+Di}).join(";"),";")}else Ln(ht)?(yt=Ve.link(ur,{stable:!0}),Qe(Dt,".",mn[Pt],"(",yt,");",Bt,".",Pt,"=",yt,";")):Qe(Dt,".",mn[Pt],"(",ur,");",Bt,".",Pt,"=",ur,";")}})}function ol(Ve,Qe){Ri&&(Ve.instancing=Qe.def(Ve.shared.extensions,".angle_instanced_arrays"))}function io(Ve,Qe,at,Ct,Ot){var Rt=Ve.shared,Bt=Ve.stats,Dt=Rt.current,yt=Rt.timer,Pt=at.profile;function ht(){return typeof performance=="undefined"?"Date.now()":"performance.now()"}var ur,br;function Ur(Ci){ur=Qe.def(),Ci(ur,"=",ht(),";"),typeof Ot=="string"?Ci(Bt,".count+=",Ot,";"):Ci(Bt,".count++;"),Wi&&(Ct?(br=Qe.def(),Ci(br,"=",yt,".getNumPendingQueries();")):Ci(yt,".beginQuery(",Bt,");"))}function Di(Ci){Ci(Bt,".cpuTime+=",ht(),"-",ur,";"),Wi&&(Ct?Ci(yt,".pushScopeStats(",br,",",yt,".getNumPendingQueries(),",Bt,");"):Ci(yt,".endQuery();"))}function fi(Ci){var Bi=Qe.def(Dt,".profile");Qe(Dt,".profile=",Ci,";"),Qe.exit(Dt,".profile=",Bi,";")}var Ti;if(Pt){if(Ln(Pt)){Pt.enable?(Ur(Qe),Di(Qe.exit),fi("true")):fi("false");return}Ti=Pt.append(Ve,Qe),fi(Ti)}else Ti=Qe.def(Dt,".profile");var gn=Ve.block();Ur(gn),Qe("if(",Ti,"){",gn,"}");var rn=Ve.block();Di(rn),Qe.exit("if(",Ti,"){",rn,"}")}function Y(Ve,Qe,at,Ct,Ot){var Rt=Ve.shared;function Bt(yt){switch(yt){case Co:case Ps:case hl:return 2;case Qa:case Ts:case Ul:return 3;case mo:case wo:case Lu:return 4;default:return 1}}function Dt(yt,Pt,ht){var ur=Rt.gl,br=Qe.def(yt,".location"),Ur=Qe.def(Rt.attributes,"[",br,"]"),Di=ht.state,fi=ht.buffer,Ti=[ht.x,ht.y,ht.z,ht.w],gn=["buffer","normalized","offset","stride"];function rn(){Qe("if(!",Ur,".buffer){",ur,".enableVertexAttribArray(",br,");}");var Bi=ht.type,Gi;if(ht.size?Gi=Qe.def(ht.size,"||",Pt):Gi=Pt,Qe("if(",Ur,".type!==",Bi,"||",Ur,".size!==",Gi,"||",gn.map(function(zn){return Ur+"."+zn+"!=="+ht[zn]}).join("||"),"){",ur,".bindBuffer(",Sr,",",fi,".buffer);",ur,".vertexAttribPointer(",[br,Gi,Bi,ht.normalized,ht.stride,ht.offset],");",Ur,".type=",Bi,";",Ur,".size=",Gi,";",gn.map(function(zn){return Ur+"."+zn+"="+ht[zn]+";"}).join(""),"}"),Ri){var sn=ht.divisor;Qe("if(",Ur,".divisor!==",sn,"){",Ve.instancing,".vertexAttribDivisorANGLE(",[br,sn],");",Ur,".divisor=",sn,";}")}}function Ci(){Qe("if(",Ur,".buffer){",ur,".disableVertexAttribArray(",br,");",Ur,".buffer=null;","}if(",Ca.map(function(Bi,Gi){return Ur+"."+Bi+"!=="+Ti[Gi]}).join("||"),"){",ur,".vertexAttrib4f(",br,",",Ti,");",Ca.map(function(Bi,Gi){return Ur+"."+Bi+"="+Ti[Gi]+";"}).join(""),"}")}Di===La?rn():Di===Na?Ci():(Qe("if(",Di,"===",La,"){"),rn(),Qe("}else{"),Ci(),Qe("}"))}Ct.forEach(function(yt){var Pt=yt.name,ht=at.attributes[Pt],ur;if(ht){if(!Ot(ht))return;ur=ht.append(Ve,Qe)}else{if(!Ot(ia))return;var br=Ve.scopeAttrib(Pt);ur={},Object.keys(new Yr).forEach(function(Ur){ur[Ur]=Qe.def(br,".",Ur)})}Dt(Ve.link(yt),Bt(yt.info.type),ur)})}function D(Ve,Qe,at,Ct,Ot,Rt){for(var Bt=Ve.shared,Dt=Bt.gl,yt,Pt=0;Pt<Ct.length;++Pt){var ht=Ct[Pt],ur=ht.name,br=ht.info.type,Ur=at.uniforms[ur],Di=Ve.link(ht),fi=Di+".location",Ti;if(Ur){if(!Ot(Ur))continue;if(Ln(Ur)){var gn=Ur.value;if(br===dc||br===Tl){var rn=Ve.link(gn._texture||gn.color[0]._texture);Qe(Dt,".uniform1i(",fi,",",rn+".bind());"),Qe.exit(rn,".unbind();")}else if(br===au||br===Js||br===Ql){var Ci=Ve.global.def("new Float32Array(["+Array.prototype.slice.call(gn)+"])"),Bi=2;br===Js?Bi=3:br===Ql&&(Bi=4),Qe(Dt,".uniformMatrix",Bi,"fv(",fi,",false,",Ci,");")}else{switch(br){case $a:yt="1f";break;case Co:yt="2f";break;case Qa:yt="3f";break;case mo:yt="4f";break;case To:yt="1i";break;case Bo:yt="1i";break;case hl:yt="2i";break;case Ps:yt="2i";break;case Ul:yt="3i";break;case Ts:yt="3i";break;case Lu:yt="4i";break;case wo:yt="4i";break}Qe(Dt,".uniform",yt,"(",fi,",",Mn(gn)?Array.prototype.slice.call(gn):gn,");")}continue}else Ti=Ur.append(Ve,Qe)}else{if(!Ot(ia))continue;Ti=Qe.def(Bt.uniforms,"[",bt.id(ur),"]")}br===dc?Qe("if(",Ti,"&&",Ti,'._reglType==="framebuffer"){',Ti,"=",Ti,".color[0];","}"):br===Tl&&Qe("if(",Ti,"&&",Ti,'._reglType==="framebufferCube"){',Ti,"=",Ti,".color[0];","}");var Gi=1;switch(br){case dc:case Tl:var sn=Qe.def(Ti,"._texture");Qe(Dt,".uniform1i(",fi,",",sn,".bind());"),Qe.exit(sn,".unbind();");continue;case Bo:case To:yt="1i";break;case Ps:case hl:yt="2i",Gi=2;break;case Ts:case Ul:yt="3i",Gi=3;break;case wo:case Lu:yt="4i",Gi=4;break;case $a:yt="1f";break;case Co:yt="2f",Gi=2;break;case Qa:yt="3f",Gi=3;break;case mo:yt="4f",Gi=4;break;case au:yt="Matrix2fv";break;case Js:yt="Matrix3fv";break;case Ql:yt="Matrix4fv";break}if(yt.charAt(0)==="M"){Qe(Dt,".uniform",yt,"(",fi,",");var zn=Math.pow(br-au+2,2),Ja=Ve.global.def("new Float32Array(",zn,")");Array.isArray(Ti)?Qe("false,(",M(zn,function(ms){return Ja+"["+ms+"]="+Ti[ms]}),",",Ja,")"):Qe("false,(Array.isArray(",Ti,")||",Ti," instanceof Float32Array)?",Ti,":(",M(zn,function(ms){return Ja+"["+ms+"]="+Ti+"["+ms+"]"}),",",Ja,")"),Qe(");")}else if(Gi>1){for(var co=[],ts=[],so=0;so<Gi;++so)Array.isArray(Ti)?ts.push(Ti[so]):ts.push(Qe.def(Ti+"["+so+"]")),Rt&&co.push(Qe.def());Rt&&Qe("if(!",Ve.batchId,"||",co.map(function(ms,ou){return ms+"!=="+ts[ou]}).join("||"),"){",co.map(function(ms,ou){return ms+"="+ts[ou]+";"}).join("")),Qe(Dt,".uniform",yt,"(",fi,",",ts.join(","),");"),Rt&&Qe("}")}else{if(Rt){var Zo=Qe.def();Qe("if(!",Ve.batchId,"||",Zo,"!==",Ti,"){",Zo,"=",Ti,";")}Qe(Dt,".uniform",yt,"(",fi,",",Ti,");"),Rt&&Qe("}")}}}function J(Ve,Qe,at,Ct){var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.draw,Dt=Ct.draw;function yt(){var Gi=Dt.elements,sn,zn=Qe;return Gi?((Gi.contextDep&&Ct.contextDynamic||Gi.propDep)&&(zn=at),sn=Gi.append(Ve,zn),Dt.elementsActive&&zn("if("+sn+")"+Rt+".bindBuffer("+Or+","+sn+".buffer.buffer);")):(sn=zn.def(),zn(sn,"=",Bt,".",Tt,";","if(",sn,"){",Rt,".bindBuffer(",Or,",",sn,".buffer.buffer);}","else if(",Ot.vao,".currentVAO){",sn,"=",Ve.shared.elements+".getElements("+Ot.vao,".currentVAO.elements);",an?"":"if("+sn+")"+Rt+".bindBuffer("+Or+","+sn+".buffer.buffer);","}")),sn}function Pt(){var Gi=Dt.count,sn,zn=Qe;return Gi?((Gi.contextDep&&Ct.contextDynamic||Gi.propDep)&&(zn=at),sn=Gi.append(Ve,zn)):sn=zn.def(Bt,".",Kt),sn}var ht=yt();function ur(Gi){var sn=Dt[Gi];return sn?sn.contextDep&&Ct.contextDynamic||sn.propDep?sn.append(Ve,at):sn.append(Ve,Qe):Qe.def(Bt,".",Gi)}var br=ur(Yt),Ur=ur(xr),Di=Pt();if(typeof Di=="number"){if(Di===0)return}else at("if(",Di,"){"),at.exit("}");var fi,Ti;Ri&&(fi=ur(Ir),Ti=Ve.instancing);var gn=ht+".type",rn=Dt.elements&&Ln(Dt.elements)&&!Dt.vaoActive;function Ci(){function Gi(){at(Ti,".drawElementsInstancedANGLE(",[br,Di,gn,Ur+"<<(("+gn+"-"+Ra+")>>1)",fi],");")}function sn(){at(Ti,".drawArraysInstancedANGLE(",[br,Ur,Di,fi],");")}ht&&ht!=="null"?rn?Gi():(at("if(",ht,"){"),Gi(),at("}else{"),sn(),at("}")):sn()}function Bi(){function Gi(){at(Rt+".drawElements("+[br,Di,gn,Ur+"<<(("+gn+"-"+Ra+")>>1)"]+");")}function sn(){at(Rt+".drawArrays("+[br,Ur,Di]+");")}ht&&ht!=="null"?rn?Gi():(at("if(",ht,"){"),Gi(),at("}else{"),sn(),at("}")):sn()}Ri&&(typeof fi!="number"||fi>=0)?typeof fi=="string"?(at("if(",fi,">0){"),Ci(),at("}else if(",fi,"<0){"),Bi(),at("}")):Ci():Bi()}function q(Ve,Qe,at,Ct,Ot){var Rt=kn(),Bt=Rt.proc("body",Ot);return Ri&&(Rt.instancing=Bt.def(Rt.shared.extensions,".angle_instanced_arrays")),Ve(Rt,Bt,at,Ct),Rt.compile().body}function K(Ve,Qe,at,Ct){ol(Ve,Qe),at.useVAO?at.drawVAO?Qe(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,Qe),");"):Qe(Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"):(Qe(Ve.shared.vao,".setVAO(null);"),Y(Ve,Qe,at,Ct.attributes,function(){return!0})),D(Ve,Qe,at,Ct.uniforms,function(){return!0},!1),J(Ve,Qe,Qe,at)}function de(Ve,Qe){var at=Ve.proc("draw",1);ol(Ve,at),ss(Ve,at,Qe.context),Vs(Ve,at,Qe.framebuffer),Ys(Ve,at,Qe),wa(Ve,at,Qe.state),io(Ve,at,Qe,!1,!0);var Ct=Qe.shader.progVar.append(Ve,at);if(at(Ve.shared.gl,".useProgram(",Ct,".program);"),Qe.shader.program)K(Ve,at,Qe,Qe.shader.program);else{at(Ve.shared.vao,".setVAO(null);");var Ot=Ve.global.def("{}"),Rt=at.def(Ct,".id"),Bt=at.def(Ot,"[",Rt,"]");at(Ve.cond(Bt).then(Bt,".call(this,a0);").else(Bt,"=",Ot,"[",Rt,"]=",Ve.link(function(Dt){return q(K,Ve,Qe,Dt,1)}),"(",Ct,");",Bt,".call(this,a0);"))}Object.keys(Qe.state).length>0&&at(Ve.shared.current,".dirty=true;"),Ve.shared.vao&&at(Ve.shared.vao,".setVAO(null);")}function ne(Ve,Qe,at,Ct){Ve.batchId="a1",ol(Ve,Qe);function Ot(){return!0}Y(Ve,Qe,at,Ct.attributes,Ot),D(Ve,Qe,at,Ct.uniforms,Ot,!1),J(Ve,Qe,Qe,at)}function we(Ve,Qe,at,Ct){ol(Ve,Qe);var Ot=at.contextDep,Rt=Qe.def(),Bt="a0",Dt="a1",yt=Qe.def();Ve.shared.props=yt,Ve.batchId=Rt;var Pt=Ve.scope(),ht=Ve.scope();Qe(Pt.entry,"for(",Rt,"=0;",Rt,"<",Dt,";++",Rt,"){",yt,"=",Bt,"[",Rt,"];",ht,"}",Pt.exit);function ur(gn){return gn.contextDep&&Ot||gn.propDep}function br(gn){return!ur(gn)}if(at.needsContext&&ss(Ve,ht,at.context),at.needsFramebuffer&&Vs(Ve,ht,at.framebuffer),wa(Ve,ht,at.state,ur),at.profile&&ur(at.profile)&&io(Ve,ht,at,!1,!0),Ct)at.useVAO?at.drawVAO?ur(at.drawVAO)?ht(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,ht),");"):Pt(Ve.shared.vao,".setVAO(",at.drawVAO.append(Ve,Pt),");"):Pt(Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"):(Pt(Ve.shared.vao,".setVAO(null);"),Y(Ve,Pt,at,Ct.attributes,br),Y(Ve,ht,at,Ct.attributes,ur)),D(Ve,Pt,at,Ct.uniforms,br,!1),D(Ve,ht,at,Ct.uniforms,ur,!0),J(Ve,Pt,ht,at);else{var Ur=Ve.global.def("{}"),Di=at.shader.progVar.append(Ve,ht),fi=ht.def(Di,".id"),Ti=ht.def(Ur,"[",fi,"]");ht(Ve.shared.gl,".useProgram(",Di,".program);","if(!",Ti,"){",Ti,"=",Ur,"[",fi,"]=",Ve.link(function(gn){return q(ne,Ve,at,gn,2)}),"(",Di,");}",Ti,".call(this,a0[",Rt,"],",Rt,");")}}function Ue(Ve,Qe){var at=Ve.proc("batch",2);Ve.batchId="0",ol(Ve,at);var Ct=!1,Ot=!0;Object.keys(Qe.context).forEach(function(Ur){Ct=Ct||Qe.context[Ur].propDep}),Ct||(ss(Ve,at,Qe.context),Ot=!1);var Rt=Qe.framebuffer,Bt=!1;Rt?(Rt.propDep?Ct=Bt=!0:Rt.contextDep&&Ct&&(Bt=!0),Bt||Vs(Ve,at,Rt)):Vs(Ve,at,null),Qe.state.viewport&&Qe.state.viewport.propDep&&(Ct=!0);function Dt(Ur){return Ur.contextDep&&Ct||Ur.propDep}Ys(Ve,at,Qe),wa(Ve,at,Qe.state,function(Ur){return!Dt(Ur)}),(!Qe.profile||!Dt(Qe.profile))&&io(Ve,at,Qe,!1,"a1"),Qe.contextDep=Ct,Qe.needsContext=Ot,Qe.needsFramebuffer=Bt;var yt=Qe.shader.progVar;if(yt.contextDep&&Ct||yt.propDep)we(Ve,at,Qe,null);else{var Pt=yt.append(Ve,at);if(at(Ve.shared.gl,".useProgram(",Pt,".program);"),Qe.shader.program)we(Ve,at,Qe,Qe.shader.program);else{at(Ve.shared.vao,".setVAO(null);");var ht=Ve.global.def("{}"),ur=at.def(Pt,".id"),br=at.def(ht,"[",ur,"]");at(Ve.cond(br).then(br,".call(this,a0,a1);").else(br,"=",ht,"[",ur,"]=",Ve.link(function(Ur){return q(we,Ve,Qe,Ur,2)}),"(",Pt,");",br,".call(this,a0,a1);"))}}Object.keys(Qe.state).length>0&&at(Ve.shared.current,".dirty=true;"),Ve.shared.vao&&at(Ve.shared.vao,".setVAO(null);")}function ft(Ve,Qe){var at=Ve.proc("scope",3);Ve.batchId="a2";var Ct=Ve.shared,Ot=Ct.current;if(ss(Ve,at,Qe.context),Qe.framebuffer&&Qe.framebuffer.append(Ve,at),Si(Object.keys(Qe.state)).forEach(function(Dt){var yt=Qe.state[Dt],Pt=yt.append(Ve,at);Mn(Pt)?Pt.forEach(function(ht,ur){pn(ht)?at.set(Ve.next[Dt],"["+ur+"]",ht):at.set(Ve.next[Dt],"["+ur+"]",Ve.link(ht,{stable:!0}))}):Ln(yt)?at.set(Ct.next,"."+Dt,Ve.link(Pt,{stable:!0})):at.set(Ct.next,"."+Dt,Pt)}),io(Ve,at,Qe,!0,!0),[Tt,xr,Kt,Ir,Yt].forEach(function(Dt){var yt=Qe.draw[Dt];if(yt){var Pt=yt.append(Ve,at);pn(Pt)?at.set(Ct.draw,"."+Dt,Pt):at.set(Ct.draw,"."+Dt,Ve.link(Pt),{stable:!0})}}),Object.keys(Qe.uniforms).forEach(function(Dt){var yt=Qe.uniforms[Dt].append(Ve,at);Array.isArray(yt)&&(yt="["+yt.map(function(Pt){return pn(Pt)?Pt:Ve.link(Pt,{stable:!0})})+"]"),at.set(Ct.uniforms,"["+Ve.link(bt.id(Dt),{stable:!0})+"]",yt)}),Object.keys(Qe.attributes).forEach(function(Dt){var yt=Qe.attributes[Dt].append(Ve,at),Pt=Ve.scopeAttrib(Dt);Object.keys(new Yr).forEach(function(ht){at.set(Pt,"."+ht,yt[ht])})}),Qe.scopeVAO){var Rt=Qe.scopeVAO.append(Ve,at);pn(Rt)?at.set(Ct.vao,".targetVAO",Rt):at.set(Ct.vao,".targetVAO",Ve.link(Rt,{stable:!0}))}function Bt(Dt){var yt=Qe.shader[Dt];if(yt){var Pt=yt.append(Ve,at);pn(Pt)?at.set(Ct.shader,"."+Dt,Pt):at.set(Ct.shader,"."+Dt,Ve.link(Pt,{stable:!0}))}}Bt(Xe),Bt(ot),Object.keys(Qe.state).length>0&&(at(Ot,".dirty=true;"),at.exit(Ot,".dirty=true;")),at("a1(",Ve.shared.context,",a0,",Ve.batchId,");")}function Xt(Ve){if(!(typeof Ve!="object"||Mn(Ve))){for(var Qe=Object.keys(Ve),at=0;at<Qe.length;++at)if(h.isDynamic(Ve[Qe[at]]))return!0;return!1}}function hr(Ve,Qe,at){var Ct=Qe.static[at];if(!Ct||!Xt(Ct))return;var Ot=Ve.global,Rt=Object.keys(Ct),Bt=!1,Dt=!1,yt=!1,Pt=Ve.global.def("{}");Rt.forEach(function(ur){var br=Ct[ur];if(h.isDynamic(br)){typeof br=="function"&&(br=Ct[ur]=h.unbox(br));var Ur=Un(br,null);Bt=Bt||Ur.thisDep,yt=yt||Ur.propDep,Dt=Dt||Ur.contextDep}else{switch(Ot(Pt,".",ur,"="),typeof br){case"number":Ot(br);break;case"string":Ot('"',br,'"');break;case"object":Array.isArray(br)&&Ot("[",br.join(),"]");break;default:Ot(Ve.link(br));break}Ot(";")}});function ht(ur,br){Rt.forEach(function(Ur){var Di=Ct[Ur];if(h.isDynamic(Di)){var fi=ur.invoke(br,Di);br(Pt,".",Ur,"=",fi,";")}})}Qe.dynamic[at]=new h.DynamicVariable(Xo,{thisDep:Bt,contextDep:Dt,propDep:yt,ref:Pt,append:ht}),delete Qe.static[at]}function qt(Ve,Qe,at,Ct,Ot){var Rt=kn();Rt.stats=Rt.link(Ot),Object.keys(Qe.static).forEach(function(Dt){hr(Rt,Qe,Dt)}),dr.forEach(function(Dt){hr(Rt,Ve,Dt)});var Bt=Vl(Ve,Qe,at,Ct,Rt);return Bt.shader.program&&(Bt.shader.program.attributes.sort(function(Dt,yt){return Dt.name<yt.name?-1:1}),Bt.shader.program.uniforms.sort(function(Dt,yt){return Dt.name<yt.name?-1:1})),de(Rt,Bt),ft(Rt,Bt),Ue(Rt,Bt),e(Rt.compile(),{destroy:function(){Bt.shader.program.destroy()}})}return{next:Bn,current:Zi,procs:function(){var Ve=kn(),Qe=Ve.proc("poll"),at=Ve.proc("refresh"),Ct=Ve.block();Qe(Ct),at(Ct);var Ot=Ve.shared,Rt=Ot.gl,Bt=Ot.next,Dt=Ot.current;Ct(Dt,".dirty=false;"),Vs(Ve,Qe),Vs(Ve,at,null,!0);var yt;Ri&&(yt=Ve.link(Ri)),zt.oes_vertex_array_object&&at(Ve.link(zt.oes_vertex_array_object),".bindVertexArrayOES(null);");var Pt=at.def(Ot.attributes),ht=at.def(0),ur=Ve.cond(ht,".buffer");ur.then(Rt,".enableVertexAttribArray(i);",Rt,".bindBuffer(",Sr,",",ht,".buffer.buffer);",Rt,".vertexAttribPointer(i,",ht,".size,",ht,".type,",ht,".normalized,",ht,".stride,",ht,".offset);").else(Rt,".disableVertexAttribArray(i);",Rt,".vertexAttrib4f(i,",ht,".x,",ht,".y,",ht,".z,",ht,".w);",ht,".buffer=null;");var br=Ve.link(Rr.maxAttributes,{stable:!0});return at("for(var i=0;i<",br,";++i){",ht,"=",Pt,"[i];",ur,"}"),Ri&&at("for(var i=0;i<",br,";++i){",yt,".vertexAttribDivisorANGLE(i,",Pt,"[i].divisor);","}"),at(Ve.shared.vao,".currentVAO=null;",Ve.shared.vao,".setVAO(",Ve.shared.vao,".targetVAO);"),Object.keys(li).forEach(function(Ur){var Di=li[Ur],fi=Ct.def(Bt,".",Ur),Ti=Ve.block();Ti("if(",fi,"){",Rt,".enable(",Di,")}else{",Rt,".disable(",Di,")}",Dt,".",Ur,"=",fi,";"),at(Ti),Qe("if(",fi,"!==",Dt,".",Ur,"){",Ti,"}")}),Object.keys(mn).forEach(function(Ur){var Di=mn[Ur],fi=Zi[Ur],Ti,gn,rn=Ve.block();if(rn(Rt,".",Di,"("),Mn(fi)){var Ci=fi.length;Ti=Ve.global.def(Bt,".",Ur),gn=Ve.global.def(Dt,".",Ur),rn(M(Ci,function(Bi){return Ti+"["+Bi+"]"}),");",M(Ci,function(Bi){return gn+"["+Bi+"]="+Ti+"["+Bi+"];"}).join("")),Qe("if(",M(Ci,function(Bi){return Ti+"["+Bi+"]!=="+gn+"["+Bi+"]"}).join("||"),"){",rn,"}")}else Ti=Ct.def(Bt,".",Ur),gn=Ct.def(Dt,".",Ur),rn(Ti,");",Dt,".",Ur,"=",Ti,";"),Qe("if(",Ti,"!==",gn,"){",rn,"}");at(rn)}),Ve.compile()}(),compile:qt}}function Ia(){return{vaoCount:0,bufferCount:0,elementsCount:0,framebufferCount:0,shaderCount:0,textureCount:0,cubeCount:0,renderbufferCount:0,maxTextureUnits:0}}var yo=34918,Da=34919,go=35007,Is=function(Me,bt){if(!bt.ext_disjoint_timer_query)return null;var zt=[];function Rr(){return zt.pop()||bt.ext_disjoint_timer_query.createQueryEXT()}function jr(xi){zt.push(xi)}var Nr=[];function Gr(xi){var Ri=Rr();bt.ext_disjoint_timer_query.beginQueryEXT(go,Ri),Nr.push(Ri),Wi(Nr.length-1,Nr.length,xi)}function mi(){bt.ext_disjoint_timer_query.endQueryEXT(go)}function Ui(){this.startQueryIndex=-1,this.endQueryIndex=-1,this.sum=0,this.stats=null}var qi=[];function Ei(){return qi.pop()||new Ui}function Hn(xi){qi.push(xi)}var en=[];function Wi(xi,Ri,ci){var an=Ei();an.startQueryIndex=xi,an.endQueryIndex=Ri,an.sum=0,an.stats=ci,en.push(an)}var si=[],Mr=[];function Yr(){var xi,Ri,ci=Nr.length;if(ci!==0){Mr.length=Math.max(Mr.length,ci+1),si.length=Math.max(si.length,ci+1),si[0]=0,Mr[0]=0;var an=0;for(xi=0,Ri=0;Ri<Nr.length;++Ri){var Zi=Nr[Ri];bt.ext_disjoint_timer_query.getQueryObjectEXT(Zi,Da)?(an+=bt.ext_disjoint_timer_query.getQueryObjectEXT(Zi,yo),jr(Zi)):Nr[xi++]=Zi,si[Ri+1]=an,Mr[Ri+1]=xi}for(Nr.length=xi,xi=0,Ri=0;Ri<en.length;++Ri){var Bn=en[Ri],hi=Bn.startQueryIndex,li=Bn.endQueryIndex;Bn.sum+=si[li]-si[hi];var mn=Mr[hi],Ji=Mr[li];Ji===mn?(Bn.stats.gpuTime+=Bn.sum/1e6,Hn(Bn)):(Bn.startQueryIndex=mn,Bn.endQueryIndex=Ji,en[xi++]=Bn)}en.length=xi}}return{beginQuery:Gr,endQuery:mi,pushScopeStats:Wi,update:Yr,getNumPendingQueries:function(){return Nr.length},clear:function(){zt.push.apply(zt,Nr);for(var xi=0;xi<zt.length;xi++)bt.ext_disjoint_timer_query.deleteQueryEXT(zt[xi]);Nr.length=0,zt.length=0},restore:function(){Nr.length=0,zt.length=0}}},Ms=16384,Xs=256,Gn=1024,ja=34962,Fo="webglcontextlost",Uo="webglcontextrestored",$s=1,Sl=2,bu=3;function dl(Me,bt){for(var zt=0;zt<Me.length;++zt)if(Me[zt]===bt)return zt;return-1}function Sc(Me){var bt=x(Me);if(!bt)return null;var zt=bt.gl,Rr=zt.getContextAttributes(),jr=zt.isContextLost(),Nr=C(zt,bt);if(!Nr)return null;var Gr=_(),mi=Ia(),Ui=bt.cachedCode||{},qi=Nr.extensions,Ei=Is(zt,qi),Hn=v(),en=zt.drawingBufferWidth,Wi=zt.drawingBufferHeight,si={tick:0,time:0,viewportWidth:en,viewportHeight:Wi,framebufferWidth:en,framebufferHeight:Wi,drawingBufferWidth:en,drawingBufferHeight:Wi,pixelRatio:bt.pixelRatio},Mr={},Yr={elements:null,primitive:4,count:-1,offset:0,instances:-1},xi=ti(zt,qi),Ri=ri(zt,mi,bt,Zi),ci=kr(zt,qi,Ri,mi),an=ku(zt,qi,xi,mi,Ri,ci,Yr);function Zi(q){return an.destroyBuffer(q)}var Bn=Wu(zt,Gr,mi,bt),hi=of(zt,qi,xi,function(){Ji.procs.poll()},si,mi,bt),li=Dc(zt,qi,xi,mi,bt),mn=lf(zt,qi,xi,hi,li,mi),Ji=Ea(zt,Gr,qi,xi,Ri,ci,hi,mn,Mr,an,Bn,Yr,si,Ei,Ui,bt),Vi=Wl(zt,mn,Ji.procs.poll,si,Rr,qi,xi),Ni=Ji.next,pn=zt.canvas,Vn=[],na=[],Ki=[],kn=[bt.onDestroy],ta=null;function oa(){if(Vn.length===0){Ei&&Ei.update(),ta=null;return}ta=d.next(oa),Ys();for(var q=Vn.length-1;q>=0;--q){var K=Vn[q];K&&K(si,null,0)}zt.flush(),Ei&&Ei.update()}function ba(){!ta&&Vn.length>0&&(ta=d.next(oa))}function is(){ta&&(d.cancel(oa),ta=null)}function Zs(q){q.preventDefault(),jr=!0,is(),na.forEach(function(K){K()})}function Va(q){zt.getError(),jr=!1,Nr.restore(),Bn.restore(),Ri.restore(),hi.restore(),li.restore(),mn.restore(),an.restore(),Ei&&Ei.restore(),Ji.procs.refresh(),ba(),Ki.forEach(function(K){K()})}pn&&(pn.addEventListener(Fo,Zs,!1),pn.addEventListener(Uo,Va,!1));function Ml(){Vn.length=0,is(),pn&&(pn.removeEventListener(Fo,Zs),pn.removeEventListener(Uo,Va)),Bn.clear(),mn.clear(),li.clear(),an.clear(),hi.clear(),ci.clear(),Ri.clear(),Ei&&Ei.clear(),kn.forEach(function(q){q()})}function zo(q){function K(Rt){var Bt=e({},Rt);delete Bt.uniforms,delete Bt.attributes,delete Bt.context,delete Bt.vao,"stencil"in Bt&&Bt.stencil.op&&(Bt.stencil.opBack=Bt.stencil.opFront=Bt.stencil.op,delete Bt.stencil.op);function Dt(yt){if(yt in Bt){var Pt=Bt[yt];delete Bt[yt],Object.keys(Pt).forEach(function(ht){Bt[yt+"."+ht]=Pt[ht]})}}return Dt("blend"),Dt("depth"),Dt("cull"),Dt("stencil"),Dt("polygonOffset"),Dt("scissor"),Dt("sample"),"vao"in Rt&&(Bt.vao=Rt.vao),Bt}function de(Rt,Bt){var Dt={},yt={};return Object.keys(Rt).forEach(function(Pt){var ht=Rt[Pt];if(h.isDynamic(ht)){yt[Pt]=h.unbox(ht,Pt);return}else if(Bt&&Array.isArray(ht)){for(var ur=0;ur<ht.length;++ur)if(h.isDynamic(ht[ur])){yt[Pt]=h.unbox(ht,Pt);return}}Dt[Pt]=ht}),{dynamic:yt,static:Dt}}var ne=de(q.context||{},!0),we=de(q.uniforms||{},!0),Ue=de(q.attributes||{},!1),ft=de(K(q),!1),Xt={gpuTime:0,cpuTime:0,count:0},hr=Ji.compile(ft,Ue,we,ne,Xt),qt=hr.draw,Ve=hr.batch,Qe=hr.scope,at=[];function Ct(Rt){for(;at.length<Rt;)at.push(null);return at}function Ot(Rt,Bt){var Dt;if(typeof Rt=="function")return Qe.call(this,null,Rt,0);if(typeof Bt=="function")if(typeof Rt=="number")for(Dt=0;Dt<Rt;++Dt)Qe.call(this,null,Bt,Dt);else if(Array.isArray(Rt))for(Dt=0;Dt<Rt.length;++Dt)Qe.call(this,Rt[Dt],Bt,Dt);else return Qe.call(this,Rt,Bt,0);else if(typeof Rt=="number"){if(Rt>0)return Ve.call(this,Ct(Rt|0),Rt|0)}else if(Array.isArray(Rt)){if(Rt.length)return Ve.call(this,Rt,Rt.length)}else return qt.call(this,Rt)}return e(Ot,{stats:Xt,destroy:function(){hr.destroy()}})}var Qs=mn.setFBO=zo({framebuffer:h.define.call(null,$s,"framebuffer")});function al(q,K){var de=0;Ji.procs.poll();var ne=K.color;ne&&(zt.clearColor(+ne[0]||0,+ne[1]||0,+ne[2]||0,+ne[3]||0),de|=Ms),"depth"in K&&(zt.clearDepth(+K.depth),de|=Xs),"stencil"in K&&(zt.clearStencil(K.stencil|0),de|=Gn),zt.clear(de)}function Vl(q){if("framebuffer"in q)if(q.framebuffer&&q.framebuffer_reglType==="framebufferCube")for(var K=0;K<6;++K)Qs(e({framebuffer:q.framebuffer.faces[K]},q),al);else Qs(q,al);else al(null,q)}function ss(q){Vn.push(q);function K(){var de=dl(Vn,q);function ne(){var we=dl(Vn,ne);Vn[we]=Vn[Vn.length-1],Vn.length-=1,Vn.length<=0&&is()}Vn[de]=ne}return ba(),{cancel:K}}function Vs(){var q=Ni.viewport,K=Ni.scissor_box;q[0]=q[1]=K[0]=K[1]=0,si.viewportWidth=si.framebufferWidth=si.drawingBufferWidth=q[2]=K[2]=zt.drawingBufferWidth,si.viewportHeight=si.framebufferHeight=si.drawingBufferHeight=q[3]=K[3]=zt.drawingBufferHeight}function Ys(){si.tick+=1,si.time=ol(),Vs(),Ji.procs.poll()}function wa(){hi.refresh(),Vs(),Ji.procs.refresh(),Ei&&Ei.update()}function ol(){return(v()-Hn)/1e3}wa();function io(q,K){var de;switch(q){case"frame":return ss(K);case"lost":de=na;break;case"restore":de=Ki;break;case"destroy":de=kn;break;default:}return de.push(K),{cancel:function(){for(var ne=0;ne<de.length;++ne)if(de[ne]===K){de[ne]=de[de.length-1],de.pop();return}}}}function Y(){return Ui}function D(q){Object.entries(q).forEach(function(K){Ui[K[0]]=K[1]})}var J=e(zo,{clear:Vl,prop:h.define.bind(null,$s),context:h.define.bind(null,Sl),this:h.define.bind(null,bu),draw:zo({}),buffer:function(q){return Ri.create(q,ja,!1,!1)},elements:function(q){return ci.create(q,!1)},texture:hi.create2D,cube:hi.createCube,renderbuffer:li.create,framebuffer:mn.create,framebufferCube:mn.createCube,vao:an.createVAO,attributes:Rr,frame:ss,on:io,limits:xi,hasExtension:function(q){return xi.extensions.indexOf(q.toLowerCase())>=0},read:Vi,destroy:Ml,_gl:zt,_refresh:wa,poll:function(){Ys(),Ei&&Ei.update()},now:ol,stats:mi,getCachedCode:Y,preloadCachedCode:D});return bt.onDone(null,J),J}return Sc})});var Mz=ye((d_r,CBe)=>{"use strict";var FUt=CX(),zUt=kBe();CBe.exports=function(t,r,n){var i=t._fullLayout,a=!0;return i._glcanvas.each(function(o){if(o.regl){o.regl.preloadCachedCode(n);return}if(!(o.pick&&!i._has("parcoords"))){try{o.regl=zUt({canvas:this,attributes:{antialias:!o.pick,preserveDrawingBuffer:!0},pixelRatio:t._context.plotGlPixelRatio||window.devicePixelRatio,extensions:r||[],cachedCode:n||{}})}catch(s){a=!1}o.regl||(a=!1),a&&this.addEventListener("webglcontextlost",function(s){t&&t.emit&&t.emit("plotly_webglcontextlost",{event:s,layer:o.key})},!1)}}),a||FUt({container:i._glcontainer.node()}),a}});var dK=ye((hK,DBe)=>{"use strict";var LBe=hY(),PBe=GY(),OUt=yqe(),IBe=EBe(),fK=Dr(),qUt=Eg().selectMode,BUt=Mz(),NUt=Ru(),UUt=_U(),VUt=lY().styleTextSelection,RBe={};function GUt(e,t,r,n){var i=e._size,a=e.width*n,o=e.height*n,s=i.l*n,l=i.b*n,u=i.r*n,c=i.t*n,f=i.w*n,h=i.h*n;return[s+t.domain[0]*f,l+r.domain[0]*h,a-u-(1-t.domain[1])*f,o-c-(1-r.domain[1])*h]}var hK=DBe.exports=function(t,r,n){if(n.length){var i=t._fullLayout,a=r._scene,o=r.xaxis,s=r.yaxis,l,u;if(a){var c=BUt(t,["ANGLE_instanced_arrays","OES_element_index_uint"],RBe);if(!c){a.init();return}var f=a.count,h=i._glcanvas.data()[0].regl;if(UUt(t,r,n),a.dirty){if((a.line2d||a.error2d)&&!(a.scatter2d||a.fill2d||a.glText)&&h.clear({color:!0,depth:!0}),a.error2d===!0&&(a.error2d=OUt(h)),a.line2d===!0&&(a.line2d=PBe(h)),a.scatter2d===!0&&(a.scatter2d=LBe(h)),a.fill2d===!0&&(a.fill2d=PBe(h)),a.glText===!0)for(a.glText=new Array(f),l=0;l<f;l++)a.glText[l]=new IBe(h);if(a.glText){if(f>a.glText.length){var d=f-a.glText.length;for(l=0;l<d;l++)a.glText.push(new IBe(h))}else if(f<a.glText.length){var v=a.glText.length-f,_=a.glText.splice(f,v);_.forEach(function(H){H.destroy()})}for(l=0;l<f;l++)a.glText[l].update(a.textOptions[l])}if(a.line2d&&(a.line2d.update(a.lineOptions),a.lineOptions=a.lineOptions.map(function(H){if(H&&H.positions){for(var te=H.positions,oe=0;oe<te.length&&(isNaN(te[oe])||isNaN(te[oe+1]));)oe+=2;for(var _e=te.length-2;_e>oe&&(isNaN(te[_e])||isNaN(te[_e+1]));)_e-=2;H.positions=te.slice(oe,_e+2)}return H}),a.line2d.update(a.lineOptions)),a.error2d){var b=(a.errorXOptions||[]).concat(a.errorYOptions||[]);a.error2d.update(b)}a.scatter2d&&a.scatter2d.update(a.markerOptions),a.fillOrder=fK.repeat(null,f),a.fill2d&&(a.fillOptions=a.fillOptions.map(function(H,te){var oe=n[te];if(!(!H||!oe||!oe[0]||!oe[0].trace)){var _e=oe[0],Ee=_e.trace,Ce=_e.t,me=a.lineOptions[te],ie,Se,Le=[];Ee._ownfill&&Le.push(te),Ee._nexttrace&&Le.push(te+1),Le.length&&(a.fillOrder[te]=Le);var Ae=[],Fe=me&&me.positions||Ce.positions,Pe,ge;if(Ee.fill==="tozeroy"){for(Pe=0;Pe<Fe.length&&isNaN(Fe[Pe+1]);)Pe+=2;for(ge=Fe.length-2;ge>Pe&&isNaN(Fe[ge+1]);)ge-=2;Fe[Pe+1]!==0&&(Ae=[Fe[Pe],0]),Ae=Ae.concat(Fe.slice(Pe,ge+2)),Fe[ge+1]!==0&&(Ae=Ae.concat([Fe[ge],0]))}else if(Ee.fill==="tozerox"){for(Pe=0;Pe<Fe.length&&isNaN(Fe[Pe]);)Pe+=2;for(ge=Fe.length-2;ge>Pe&&isNaN(Fe[ge]);)ge-=2;Fe[Pe]!==0&&(Ae=[0,Fe[Pe+1]]),Ae=Ae.concat(Fe.slice(Pe,ge+2)),Fe[ge]!==0&&(Ae=Ae.concat([0,Fe[ge+1]]))}else if(Ee.fill==="toself"||Ee.fill==="tonext"){for(Ae=[],ie=0,H.splitNull=!0,Se=0;Se<Fe.length;Se+=2)(isNaN(Fe[Se])||isNaN(Fe[Se+1]))&&(Ae=Ae.concat(Fe.slice(ie,Se)),Ae.push(Fe[ie],Fe[ie+1]),Ae.push(null,null),ie=Se+2);Ae=Ae.concat(Fe.slice(ie)),ie&&Ae.push(Fe[ie],Fe[ie+1])}else{var Re=Ee._nexttrace;if(Re){var ce=a.lineOptions[te+1];if(ce){var Ze=ce.positions;if(Ee.fill==="tonexty"){for(Ae=Fe.slice(),te=Math.floor(Ze.length/2);te--;){var ut=Ze[te*2],pt=Ze[te*2+1];isNaN(ut)||isNaN(pt)||Ae.push(ut,pt)}H.fill=Re.fillcolor}}}}if(Ee._prevtrace&&Ee._prevtrace.fill==="tonext"){var Zt=a.lineOptions[te-1].positions,st=Ae.length/2;ie=st;var lt=[ie];for(Se=0;Se<Zt.length;Se+=2)(isNaN(Zt[Se])||isNaN(Zt[Se+1]))&&(lt.push(Se/2+st+1),ie=Se+2);Ae=Ae.concat(Zt),H.hole=lt}return H.fillmode=Ee.fill,H.opacity=Ee.opacity,H.positions=Ae,H}}),a.fill2d.update(a.fillOptions))}var p=i.dragmode,k=qUt(p),E=i.clickmode.indexOf("select")>-1;for(l=0;l<f;l++){var S=n[l][0],L=S.trace,x=S.t,C=x.index,M=L._length,g=x.x,P=x.y;if(L.selectedpoints||k||E){if(k||(k=!0),L.selectedpoints){var T=a.selectBatch[C]=fK.selIndices2selPoints(L),z={};for(u=0;u<T.length;u++)z[T[u]]=1;var O=[];for(u=0;u<M;u++)z[u]||O.push(u);a.unselectBatch[C]=O}var V=x.xpx=new Array(M),G=x.ypx=new Array(M);for(u=0;u<M;u++)V[u]=o.c2p(g[u]),G[u]=s.c2p(P[u])}else x.xpx=x.ypx=null}if(k){if(a.select2d||(a.select2d=LBe(i._glcanvas.data()[1].regl)),a.scatter2d){var Z=new Array(f);for(l=0;l<f;l++)Z[l]=a.selectBatch[l].length||a.unselectBatch[l].length?a.markerUnselectedOptions[l]:{};a.scatter2d.update(Z)}a.select2d&&(a.select2d.update(a.markerOptions),a.select2d.update(a.markerSelectedOptions)),a.glText&&n.forEach(function(H){var te=((H||[])[0]||{}).trace||{};NUt.hasText(te)&&VUt(H)})}else a.scatter2d&&a.scatter2d.update(a.markerOptions);var j={viewport:GUt(i,o,s,t._context.plotGlPixelRatio),range:[(o._rl||o.range)[0],(s._rl||s.range)[0],(o._rl||o.range)[1],(s._rl||s.range)[1]]},N=fK.repeat(j,a.count);a.fill2d&&a.fill2d.update(N),a.line2d&&a.line2d.update(N),a.error2d&&a.error2d.update(N.concat(N)),a.scatter2d&&a.scatter2d.update(N),a.select2d&&a.select2d.update(N),a.glText&&a.glText.forEach(function(H){H.update(j)})}}};hK.reglPrecompiled=RBe});var OBe=ye((v_r,zBe)=>{"use strict";var FBe=Qze();FBe.plot=dK();zBe.exports=FBe});var BBe=ye((p_r,qBe)=>{"use strict";qBe.exports=OBe()});var vK=ye((g_r,GBe)=>{"use strict";var HUt=pf(),VBe=Tu(),NBe=df().axisHoverFormat,{hovertemplateAttrs:jUt,templatefallbackAttrs:WUt}=Ll(),kk=lk(),XUt=hd().idRegex,ZUt=vl().templatedArray,g5=Ao().extendFlat,o1=HUt.marker,YUt=o1.line,KUt=g5(VBe("marker.line",{editTypeOverride:"calc"}),{width:g5({},YUt.width,{editType:"calc"}),editType:"calc"}),Ez=g5(VBe("marker"),{symbol:o1.symbol,angle:o1.angle,size:g5({},o1.size,{editType:"markerSize"}),sizeref:o1.sizeref,sizemin:o1.sizemin,sizemode:o1.sizemode,opacity:o1.opacity,colorbar:o1.colorbar,line:KUt,editType:"calc"});Ez.color.editType=Ez.cmin.editType=Ez.cmax.editType="style";function UBe(e){return{valType:"info_array",freeLength:!0,editType:"calc",items:{valType:"subplotid",regex:XUt[e],editType:"plot"}}}GBe.exports={dimensions:ZUt("dimension",{visible:{valType:"boolean",dflt:!0,editType:"calc"},label:{valType:"string",editType:"calc"},values:{valType:"data_array",editType:"calc+clearAxisTypes"},axis:{type:{valType:"enumerated",values:["linear","log","date","category"],editType:"calc+clearAxisTypes"},matches:{valType:"boolean",dflt:!1,editType:"calc"},editType:"calc+clearAxisTypes"},editType:"calc+clearAxisTypes"}),text:g5({},kk.text,{}),hovertext:g5({},kk.hovertext,{}),hovertemplate:jUt(),hovertemplatefallback:WUt(),xhoverformat:NBe("x"),yhoverformat:NBe("y"),marker:Ez,xaxes:UBe("x"),yaxes:UBe("y"),diagonal:{visible:{valType:"boolean",dflt:!0,editType:"calc"},editType:"calc"},showupperhalf:{valType:"boolean",dflt:!0,editType:"calc"},showlowerhalf:{valType:"boolean",dflt:!0,editType:"calc"},selected:{marker:kk.selected.marker,editType:"calc"},unselected:{marker:kk.unselected.marker,editType:"calc"},opacity:kk.opacity}});var kz=ye((m_r,HBe)=>{"use strict";HBe.exports=function(e,t,r,n){n||(n=1/0);var i,a;for(i=0;i<t.length;i++)a=t[i],a.visible&&(n=Math.min(n,a[r].length));for(n===1/0&&(n=0),e._length=n,i=0;i<t.length;i++)a=t[i],a.visible&&(a._length=n);return n}});var XBe=ye((y_r,WBe)=>{"use strict";var pK=Dr(),JUt=Yd(),jBe=vK(),$Ut=Ru(),QUt=$p(),eVt=kz(),tVt=VF().isOpenSymbol;WBe.exports=function(t,r,n,i){function a(d,v){return pK.coerce(t,r,jBe,d,v)}var o=JUt(t,r,{name:"dimensions",handleItemDefaults:rVt}),s=a("diagonal.visible"),l=a("showupperhalf"),u=a("showlowerhalf"),c=eVt(r,o,"values");if(!c||!s&&!l&&!u){r.visible=!1;return}a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("xhoverformat"),a("yhoverformat"),QUt(t,r,n,i,a,{noAngleRef:!0,noStandOff:!0});var f=tVt(r.marker.symbol),h=$Ut.isBubble(r);a("marker.line.width",f||h?1:0),iVt(t,r,i,a),pK.coerceSelectionMarkerOpacity(r,a)};function rVt(e,t){function r(i,a){return pK.coerce(e,t,jBe.dimensions,i,a)}r("label");var n=r("values");n&&n.length?r("visible"):t.visible=!1,r("axis.type"),r("axis.matches")}function iVt(e,t,r,n){var i=t.dimensions,a=i.length,o=t.showupperhalf,s=t.showlowerhalf,l=t.diagonal.visible,u,c,f=new Array(a),h=new Array(a);for(u=0;u<a;u++){var d=u?u+1:"";f[u]="x"+d,h[u]="y"+d}var v=n("xaxes",f),_=n("yaxes",h),b=t._diag=new Array(a);t._xaxes={},t._yaxes={};var p=[],k=[];function E(z,O,V,G){if(z){var Z=z.charAt(0),j=r._splomAxes[Z];if(t["_"+Z+"axes"][z]=1,G.push(z),!(z in j)){var N=j[z]={};V&&(N.label=V.label||"",V.visible&&V.axis&&(V.axis.type&&(N.type=V.axis.type),V.axis.matches&&(N.matches=O)))}}}var S=!l&&!s,L=!l&&!o;for(t._axesDim={},u=0;u<a;u++){var x=i[u],C=u===0,M=u===a-1,g=C&&S||M&&L?void 0:v[u],P=C&&L||M&&S?void 0:_[u];E(g,P,x,p),E(P,g,x,k),b[u]=[g,P],t._axesDim[g]=u,t._axesDim[P]=u}for(u=0;u<p.length;u++)for(c=0;c<k.length;c++){var T=p[u]+k[c];(u>c&&o||u<c&&s||u===c&&(l||!s||!o))&&(r._splomSubplots[T]=1)}(!s||!l&&o&&s)&&(r._splomGridDflt.xside="bottom",r._splomGridDflt.yside="left")}});var KBe=ye((__r,YBe)=>{"use strict";var ZBe=Dr();YBe.exports=function(t,r){var n=t._fullLayout,i=r.uid,a=n._splomScenes;a||(a=n._splomScenes={});var o={dirty:!0,selectBatch:[],unselectBatch:[]},s={matrix:!1,selectBatch:[],unselectBatch:[]},l=a[r.uid];return l||(l=a[i]=ZBe.extendFlat({},o,s),l.draw=function(){l.matrix&&l.matrix.draw&&(l.selectBatch.length||l.unselectBatch.length?l.matrix.draw(l.unselectBatch,l.selectBatch):l.matrix.draw()),l.dirty=!1},l.destroy=function(){l.matrix&&l.matrix.destroy&&l.matrix.destroy(),l.matrixOptions=null,l.selectBatch=null,l.unselectBatch=null,l=null}),l.dirty||ZBe.extendFlat(l,o),l}});var QBe=ye((x_r,$Be)=>{"use strict";var gK=Dr(),Cz=hf(),nVt=O0().calcMarkerSize,aVt=O0().calcAxisExpansion,oVt=F0(),JBe=J2().markerSelection,sVt=J2().markerStyle,lVt=KBe(),uVt=fs().BADNUM,cVt=ox().TOO_MANY_POINTS;$Be.exports=function(t,r){var n=r.dimensions,i=r._length,a={},o=a.cdata=[],s=a.data=[],l=r._visibleDims=[],u,c,f,h,d;function v(E,S){for(var L=E.makeCalcdata({v:S.values,vcalendar:r.calendar},"v"),x=0;x<L.length;x++)L[x]=L[x]===uVt?NaN:L[x];o.push(L),s.push(E.type==="log"?gK.simpleMap(L,E.c2l):L)}for(u=0;u<n.length;u++)if(f=n[u],f.visible){if(h=Cz.getFromId(t,r._diag[u][0]),d=Cz.getFromId(t,r._diag[u][1]),h&&d&&h.type!==d.type){gK.log("Skipping splom dimension "+u+" with conflicting axis types");continue}h?(v(h,f),d&&d.type==="category"&&(d._categories=h._categories.slice())):v(d,f),l.push(u)}oVt(t,r),gK.extendFlat(a,sVt(t,r));var _=o.length,b=_*i>cVt,p;for(b?p=a.sizeAvg||Math.max(a.size,3):p=nVt(r,i),c=0;c<l.length;c++)u=l[c],f=n[u],h=Cz.getFromId(t,r._diag[u][0])||{},d=Cz.getFromId(t,r._diag[u][1])||{},aVt(t,r,h,d,o[c],o[c],p);var k=lVt(t,r);return k.matrix||(k.matrix=!0),k.matrixOptions=a,k.selectedOptions=JBe(t,r,r.selected),k.unselectedOptions=JBe(t,r,r.unselected),[{x:!1,y:!1,t:{},trace:r}]}});var tNe=ye((eNe,Ck)=>{(function(){var e,t,r,n,i,a;typeof performance!="undefined"&&performance!==null&&performance.now?Ck.exports=function(){return performance.now()}:typeof process!="undefined"&&process!==null&&process.hrtime?(Ck.exports=function(){return(e()-i)/1e6},t=process.hrtime,e=function(){var o;return o=t(),o[0]*1e9+o[1]},n=e(),a=process.uptime()*1e9,i=n-a):Date.now?(Ck.exports=function(){return Date.now()-r},r=Date.now()):(Ck.exports=function(){return new Date().getTime()-r},r=new Date().getTime())}).call(eNe)});var iNe=ye((b_r,Iz)=>{var fVt=tNe(),s1=window,Lz=["moz","webkit"],y5="AnimationFrame",_5=s1["request"+y5],Lk=s1["cancel"+y5]||s1["cancelRequest"+y5];for(m5=0;!_5&&m5<Lz.length;m5++)_5=s1[Lz[m5]+"Request"+y5],Lk=s1[Lz[m5]+"Cancel"+y5]||s1[Lz[m5]+"CancelRequest"+y5];var m5;(!_5||!Lk)&&(Pz=0,mK=0,vx=[],rNe=1e3/60,_5=function(e){if(vx.length===0){var t=fVt(),r=Math.max(0,rNe-(t-Pz));Pz=r+t,setTimeout(function(){var n=vx.slice(0);vx.length=0;for(var i=0;i<n.length;i++)if(!n[i].cancelled)try{n[i].callback(Pz)}catch(a){setTimeout(function(){throw a},0)}},Math.round(r))}return vx.push({handle:++mK,callback:e,cancelled:!1}),mK},Lk=function(e){for(var t=0;t<vx.length;t++)vx[t].handle===e&&(vx[t].cancelled=!0)});var Pz,mK,vx,rNe;Iz.exports=function(e){return _5.call(s1,e)};Iz.exports.cancel=function(){Lk.apply(s1,arguments)};Iz.exports.polyfill=function(e){e||(e=s1),e.requestAnimationFrame=_5,e.cancelAnimationFrame=Lk}});var aNe=ye((w_r,nNe)=>{nNe.exports=function(t,r){var n=typeof t=="number",i=typeof r=="number";n&&!i?(r=t,t=0):!n&&!i&&(t=0,r=0),t=t|0,r=r|0;var a=r-t;if(a<0)throw new Error("array length must be positive");for(var o=new Array(a),s=0,l=t;s<a;s++,l++)o[s]=l;return o}});var uNe=ye((T_r,lNe)=>{"use strict";var hVt=hY(),dVt=Zm(),vVt=X2(),oNe=iNe(),pVt=aNe(),yK=i5(),gVt=Z2();lNe.exports=px;function px(e,t){if(!(this instanceof px))return new px(e,t);this.traces=[],this.passes={},this.regl=e,this.scatter=hVt(e),this.canvas=this.scatter.canvas}px.prototype.render=function(...e){return e.length&&this.update(...e),this.regl.attributes.preserveDrawingBuffer?this.draw():(this.dirty?this.planned==null&&(this.planned=oNe(()=>{this.draw(),this.dirty=!0,this.planned=null})):(this.draw(),this.dirty=!0,oNe(()=>{this.dirty=!1})),this)};px.prototype.update=function(...e){if(!e.length)return;for(let n=0;n<e.length;n++)this.updateItem(n,e[n]);this.traces=this.traces.filter(Boolean);let t=[],r=0;for(let n=0;n<this.traces.length;n++){let i=this.traces[n],a=this.traces[n].passes;for(let o=0;o<a.length;o++)t.push(this.passes[a[o]]);i.passOffset=r,r+=i.passes.length}return this.scatter.update(...t),this};px.prototype.updateItem=function(e,t){let{regl:r}=this;if(t===null)return this.traces[e]=null,this;if(!t)return this;let n=dVt(t,{data:"data items columns rows values dimensions samples x",snap:"snap cluster",size:"sizes size radius",color:"colors color fill fill-color fillColor",opacity:"opacity alpha transparency opaque",borderSize:"borderSizes borderSize border-size bordersize borderWidth borderWidths border-width borderwidth stroke-width strokeWidth strokewidth outline",borderColor:"borderColors borderColor bordercolor stroke stroke-color strokeColor",marker:"markers marker shape",range:"range ranges databox dataBox",viewport:"viewport viewBox viewbox",domain:"domain domains area areas",padding:"pad padding paddings pads margin margins",transpose:"transpose transposed",diagonal:"diagonal diag showDiagonal",upper:"upper up top upperhalf upperHalf showupperhalf showUpper showUpperHalf",lower:"lower low bottom lowerhalf lowerHalf showlowerhalf showLowerHalf showLower"}),i=this.traces[e]||(this.traces[e]={id:e,buffer:r.buffer({usage:"dynamic",type:"float",data:new Uint8Array}),color:"black",marker:null,size:12,borderColor:"transparent",borderSize:1,viewport:yK([r._gl.drawingBufferWidth,r._gl.drawingBufferHeight]),padding:[0,0,0,0],opacity:1,diagonal:!0,upper:!0,lower:!0});if(n.color!=null&&(i.color=n.color),n.size!=null&&(i.size=n.size),n.marker!=null&&(i.marker=n.marker),n.borderColor!=null&&(i.borderColor=n.borderColor),n.borderSize!=null&&(i.borderSize=n.borderSize),n.opacity!=null&&(i.opacity=n.opacity),n.viewport&&(i.viewport=yK(n.viewport)),n.diagonal!=null&&(i.diagonal=n.diagonal),n.upper!=null&&(i.upper=n.upper),n.lower!=null&&(i.lower=n.lower),n.data){i.buffer(gVt(n.data)),i.columns=n.data.length,i.count=n.data[0].length,i.bounds=[];for(let _=0;_<i.columns;_++)i.bounds[_]=vVt(n.data[_],1)}let a;n.range&&(i.range=n.range,a=i.range&&typeof i.range[0]!="number"),n.domain&&(i.domain=n.domain);let o=!1;n.padding!=null&&(Array.isArray(n.padding)&&n.padding.length===i.columns&&typeof n.padding[n.padding.length-1]=="number"?(i.padding=n.padding.map(sNe),o=!0):i.padding=sNe(n.padding));let s=i.columns,l=i.count,u=i.viewport.width,c=i.viewport.height,f=i.viewport.x,h=i.viewport.y,d=u/s,v=c/s;i.passes=[];for(let _=0;_<s;_++)for(let b=0;b<s;b++){if(!i.diagonal&&b===_||!i.upper&&_>b||!i.lower&&_<b)continue;let p=mVt(i.id,_,b),k=this.passes[p]||(this.passes[p]={});if(n.data&&(n.transpose?k.positions={x:{buffer:i.buffer,offset:b,count:l,stride:s},y:{buffer:i.buffer,offset:_,count:l,stride:s}}:k.positions={x:{buffer:i.buffer,offset:b*l,count:l},y:{buffer:i.buffer,offset:_*l,count:l}},k.bounds=Rz(i.bounds,_,b)),n.domain||n.viewport||n.data){let E=o?Rz(i.padding,_,b):i.padding;if(i.domain){let[S,L,x,C]=Rz(i.domain,_,b);k.viewport=[f+S*u+E[0],h+L*c+E[1],f+x*u-E[2],h+C*c-E[3]]}else k.viewport=[f+b*d+d*E[0],h+_*v+v*E[1],f+(b+1)*d-d*E[2],h+(_+1)*v-v*E[3]]}n.color&&(k.color=i.color),n.size&&(k.size=i.size),n.marker&&(k.marker=i.marker),n.borderSize&&(k.borderSize=i.borderSize),n.borderColor&&(k.borderColor=i.borderColor),n.opacity&&(k.opacity=i.opacity),n.range&&(k.range=a?Rz(i.range,_,b):i.range||k.bounds),i.passes.push(p)}return this};px.prototype.draw=function(...e){if(!e.length)this.scatter.draw();else{let t=[];for(let r=0;r<e.length;r++)if(typeof e[r]=="number"){let{passes:n,passOffset:i}=this.traces[e[r]];t.push(...pVt(i,i+n.length))}else if(e[r].length){let n=e[r],{passes:i,passOffset:a}=this.traces[r];i=i.map((o,s)=>{t[a+s]=n})}this.scatter.draw(...t)}return this};px.prototype.destroy=function(){return this.traces.forEach(e=>{e.buffer&&e.buffer.destroy&&e.buffer.destroy()}),this.traces=null,this.passes=null,this.scatter.destroy(),this};function mVt(e,t,r){let n=e.id!=null?e.id:e,i=t,a=r;return n<<16|(i&255)<<8|a&255}function Rz(e,t,r){let n,i,a,o,s,l,u,c,f=e[t],h=e[r];return f.length>2?(n=f[0],a=f[2],i=f[1],o=f[3]):f.length?(n=i=f[0],a=o=f[1]):(n=f.x,i=f.y,a=f.x+f.width,o=f.y+f.height),h.length>2?(s=h[0],u=h[2],l=h[1],c=h[3]):h.length?(s=l=h[0],u=c=h[1]):(s=h.x,l=h.y,u=h.x+h.width,c=h.y+h.height),[s,i,u,o]}function sNe(e){if(typeof e=="number")return[e,e,e,e];if(e.length===2)return[e[0],e[1],e[0],e[1]];{let t=yK(e);return[t.x,t.y,t.x+t.width,t.y+t.height]}}});var fNe=ye((A_r,cNe)=>{"use strict";var yVt=uNe(),_K=Dr(),Dz=hf(),_Vt=Eg().selectMode;cNe.exports=function(t,r,n){if(n.length)for(var i=0;i<n.length;i++)xVt(t,n[i][0])};function xVt(e,t){var r=e._fullLayout,n=r._size,i=t.trace,a=t.t,o=r._splomScenes[i.uid],s=o.matrixOptions,l=s.cdata,u=r._glcanvas.data()[0].regl,c=r.dragmode,f,h,d,v,_;if(l.length!==0){s.lower=i.showupperhalf,s.upper=i.showlowerhalf,s.diagonal=i.diagonal.visible;var b=i._visibleDims,p=l.length,k=o.viewOpts={};for(k.ranges=new Array(p),k.domains=new Array(p),_=0;_<b.length;_++){d=b[_];var E=k.ranges[_]=new Array(4),S=k.domains[_]=new Array(4);f=Dz.getFromId(e,i._diag[d][0]),f&&(E[0]=f._rl[0],E[2]=f._rl[1],S[0]=f.domain[0],S[2]=f.domain[1]),h=Dz.getFromId(e,i._diag[d][1]),h&&(E[1]=h._rl[0],E[3]=h._rl[1],S[1]=h.domain[0],S[3]=h.domain[1])}var L=e._context.plotGlPixelRatio,x=n.l*L,C=n.b*L,M=n.w*L,g=n.h*L;k.viewport=[x,C,M+x,g+C],o.matrix===!0&&(o.matrix=yVt(u));var P=r.clickmode.indexOf("select")>-1,T=_Vt(c)||!!i.selectedpoints||P,z=!0;if(T){var O=i._length;if(i.selectedpoints){o.selectBatch=i.selectedpoints;var V=i.selectedpoints,G={};for(d=0;d<V.length;d++)G[V[d]]=!0;var Z=[];for(d=0;d<O;d++)G[d]||Z.push(d);o.unselectBatch=Z}var j=a.xpx=new Array(p),N=a.ypx=new Array(p);for(_=0;_<b.length;_++){if(d=b[_],f=Dz.getFromId(e,i._diag[d][0]),f)for(j[_]=new Array(O),v=0;v<O;v++)j[_][v]=f.c2p(l[_][v]);if(h=Dz.getFromId(e,i._diag[d][1]),h)for(N[_]=new Array(O),v=0;v<O;v++)N[_][v]=h.c2p(l[_][v])}if(o.selectBatch.length||o.unselectBatch.length){var H=_K.extendFlat({},s,o.unselectedOptions,k),te=_K.extendFlat({},s,o.selectedOptions,k);o.matrix.update(H,te),z=!1}}else a.xpx=a.ypx=null;if(z){var oe=_K.extendFlat({},s,k);o.matrix.update(oe,null)}}}});var xK=ye(hNe=>{"use strict";hNe.getDimIndex=function(t,r){for(var n=r._id,i=n.charAt(0),a={x:0,y:1}[i],o=t._visibleDims,s=0;s<o.length;s++){var l=o[s];if(t._diag[l][a]===n)return s}return!1}});var mNe=ye((M_r,gNe)=>{"use strict";var dNe=xK(),bVt=UF().calcHover,vNe=ho().getFromId,wVt=Ao().extendFlat;function TVt(e,t,r,n,i){i||(i={});var a=(n||"").charAt(0)==="x",o=(n||"").charAt(0)==="y",s=pNe(e,t,r);if((a||o)&&i.hoversubplots==="axis"&&s[0])for(var l=(a?e.xa:e.ya)._subplotsWith,u=i.gd,c=wVt({},e),f=0;f<l.length;f++){var h=l[f];if(h!==e.xa._id+e.ya._id){o?c.xa=vNe(u,h,"x"):c.ya=vNe(u,h,"y");var d=a||o,v=pNe(c,t,r,d);s=s.concat(v)}}return s}function pNe(e,t,r,n){var i=e.cd,a=i[0].trace,o=e.scene,s=o.matrixOptions.cdata,l=e.xa,u=e.ya,c=l.c2p(t),f=u.c2p(r),h=e.distance,d=dNe.getDimIndex(a,l),v=dNe.getDimIndex(a,u);if(d===!1||v===!1)return[e];for(var _=s[d],b=s[v],p,k,E=h,S=0;S<_.length;S++)if(!(n&&S!==e.index)){var L=_[S],x=b[S],C=l.c2p(L)-c,M=u.c2p(x)-f,g=Math.sqrt(C*C+M*M);(n||g<E)&&(E=k=g,p=S)}return e.index=p,e.distance=E,e.dxy=k,p===void 0?[e]:[bVt(e,_,b,a)]}gNe.exports={hoverPoints:TVt}});var TNe=ye((E_r,wNe)=>{"use strict";var bNe=Dr(),yNe=bNe.pushUnique,_Ne=Ru(),xNe=xK();wNe.exports=function(t,r){var n=t.cd,i=n[0].trace,a=n[0].t,o=t.scene,s=o.matrixOptions.cdata,l=t.xaxis,u=t.yaxis,c=[];if(!o)return c;var f=!_Ne.hasMarkers(i)&&!_Ne.hasText(i);if(i.visible!==!0||f)return c;var h=xNe.getDimIndex(i,l),d=xNe.getDimIndex(i,u);if(h===!1||d===!1)return c;var v=a.xpx[h],_=a.ypx[d],b=s[h],p=s[d],k=(t.scene.selectBatch||[]).slice(),E=[];if(r!==!1&&!r.degenerate)for(var S=0;S<b.length;S++)r.contains([v[S],_[S]],null,S,t)?(c.push({pointNumber:S,x:b[S],y:p[S]}),yNe(k,S)):k.indexOf(S)!==-1?yNe(k,S):E.push(S);var L=o.matrixOptions;return!k.length&&!E.length?o.matrix.update(L,null):!o.selectBatch.length&&!o.unselectBatch.length&&o.matrix.update(o.unselectedOptions,bNe.extendFlat({},L,o.selectedOptions,o.viewOpts)),o.selectBatch=k,o.unselectBatch=E,c}});var MNe=ye((k_r,SNe)=>{"use strict";var ANe=Dr(),AVt=F0(),SVt=J2().markerStyle;SNe.exports=function(t,r){var n=r.trace,i=t._fullLayout._splomScenes[n.uid];if(i){AVt(t,n),ANe.extendFlat(i.matrixOptions,SVt(t,n));var a=ANe.extendFlat({},i.matrixOptions,i.viewOpts);i.matrix.update(a,null)}}});var kNe=ye((C_r,ENe)=>{"use strict";var MVt=qa(),EVt=fV();ENe.exports={moduleType:"trace",name:"splom",categories:["gl","regl","cartesian","symbols","showLegend","scatter-like"],attributes:vK(),supplyDefaults:XBe(),colorbar:$d(),calc:QBe(),plot:fNe(),hoverPoints:mNe().hoverPoints,selectPoints:TNe(),editStyle:MNe(),meta:{}};MVt.register(EVt)});var DNe=ye((L_r,RNe)=>{"use strict";var kVt=GY(),CVt=qa(),LVt=Mz(),PVt=Id().getModuleCalcData,gx=ph(),CNe=hf().getFromId,LNe=ho().shouldShowZeroLine,PNe="splom",INe={};function IVt(e){var t=e._fullLayout,r=CVt.getModule(PNe),n=PVt(e.calcdata,r)[0],i=LVt(e,["ANGLE_instanced_arrays","OES_element_index_uint"],INe);i&&(t._hasOnlyLargeSploms&&bK(e),r.plot(e,{},n))}function RVt(e){var t=e.calcdata,r=e._fullLayout;r._hasOnlyLargeSploms&&bK(e);for(var n=0;n<t.length;n++){var i=t[n][0],a=i.trace,o=r._splomScenes[a.uid];a.type==="splom"&&o&&o.matrix&&DVt(e,a,o)}}function DVt(e,t,r){for(var n=r.matrixOptions.data.length,i=t._visibleDims,a=r.viewOpts.ranges=new Array(n),o=0;o<i.length;o++){var s=i[o],l=a[o]=new Array(4),u=CNe(e,t._diag[s][0]);u&&(l[0]=u.r2l(u.range[0]),l[2]=u.r2l(u.range[1]));var c=CNe(e,t._diag[s][1]);c&&(l[1]=c.r2l(c.range[0]),l[3]=c.r2l(c.range[1]))}r.selectBatch.length||r.unselectBatch.length?r.matrix.update({ranges:a},{ranges:a}):r.matrix.update({ranges:a})}function bK(e){var t=e._fullLayout,r=t._glcanvas.data()[0].regl,n=t._splomGrid;n||(n=t._splomGrid=kVt(r)),n.update(FVt(e))}function FVt(e){var t=e._context.plotGlPixelRatio,r=e._fullLayout,n=r._size,i=[0,0,r.width*t,r.height*t],a={},o;function s(x,C,M,g,P,T){M*=t,g*=t,P*=t,T*=t;var z=C[x+"color"],O=C[x+"width"],V=String(z+O);V in a?a[V].data.push(NaN,NaN,M,g,P,T):a[V]={data:[M,g,P,T],join:"rect",thickness:O*t,color:z,viewport:i,range:i,overlay:!1}}for(o in r._splomSubplots){var l=r._plots[o],u=l.xaxis,c=l.yaxis,f=u._gridVals,h=c._gridVals,d=u._offset,v=u._length,_=c._length,b=n.b+c.domain[0]*n.h,p=-c._m,k=-p*c.r2l(c.range[0],c.calendar),E,S;if(u.showgrid)for(o=0;o<f.length;o++)E=d+u.l2p(f[o].x),s("grid",u,E,b,E,b+_);if(c.showgrid)for(o=0;o<h.length;o++)S=b+k+p*h[o].x,s("grid",c,d,S,d+v,S);LNe(e,u,c)&&(E=d+u.l2p(0),s("zeroline",u,E,b,E,b+_)),LNe(e,c,u)&&(S=b+k+0,s("zeroline",c,d,S,d+v,S))}var L=[];for(o in a)L.push(a[o]);return L}function zVt(e,t,r,n){var i={},a;if(n._splomScenes){for(a=0;a<e.length;a++){var o=e[a];o.type==="splom"&&(i[o.uid]=1)}for(a=0;a<r.length;a++){var s=r[a];if(!i[s.uid]){var l=n._splomScenes[s.uid];l&&l.destroy&&l.destroy(),n._splomScenes[s.uid]=null,delete n._splomScenes[s.uid]}}}Object.keys(n._splomScenes||{}).length===0&&delete n._splomScenes,n._splomGrid&&!t._hasOnlyLargeSploms&&n._hasOnlyLargeSploms&&(n._splomGrid.destroy(),n._splomGrid=null,delete n._splomGrid),gx.clean(e,t,r,n)}RNe.exports={name:PNe,attr:gx.attr,attrRegex:gx.attrRegex,layoutAttributes:gx.layoutAttributes,supplyLayoutDefaults:gx.supplyLayoutDefaults,drawFramework:gx.drawFramework,plot:IVt,drag:RVt,updateGrid:bK,clean:zVt,updateFx:gx.updateFx,toSVG:gx.toSVG,reglPrecompiled:INe}});var ONe=ye((P_r,zNe)=>{"use strict";var FNe=kNe();FNe.basePlotModule=DNe(),zNe.exports=FNe});var BNe=ye((I_r,qNe)=>{"use strict";qNe.exports=ONe()});var AK=ye((R_r,NNe)=>{"use strict";var OVt=Tu(),wK=Rd(),TK=ec(),qVt=Cc().attributes,Fz=Ao().extendFlat,BVt=vl().templatedArray;NNe.exports={domain:qVt({name:"parcoords",trace:!0,editType:"plot"}),labelangle:{valType:"angle",dflt:0,editType:"plot"},labelside:{valType:"enumerated",values:["top","bottom"],dflt:"top",editType:"plot"},labelfont:TK({editType:"plot"}),tickfont:TK({autoShadowDflt:!0,editType:"plot"}),rangefont:TK({editType:"plot"}),dimensions:BVt("dimension",{label:{valType:"string",editType:"plot"},tickvals:Fz({},wK.tickvals,{editType:"plot"}),ticktext:Fz({},wK.ticktext,{editType:"plot"}),tickformat:Fz({},wK.tickformat,{editType:"plot"}),visible:{valType:"boolean",dflt:!0,editType:"plot"},range:{valType:"info_array",items:[{valType:"number",editType:"plot"},{valType:"number",editType:"plot"}],editType:"plot"},constraintrange:{valType:"info_array",freeLength:!0,dimensions:"1-2",items:[{valType:"any",editType:"plot"},{valType:"any",editType:"plot"}],editType:"plot"},multiselect:{valType:"boolean",dflt:!0,editType:"plot"},values:{valType:"data_array",editType:"calc"},editType:"calc"}),line:Fz({editType:"calc"},OVt("line",{colorscaleDflt:"Viridis",autoColorDflt:!1,editTypeOverride:"calc"})),unselected:{line:{color:{valType:"color",dflt:"#7f7f7f",editType:"plot"},opacity:{valType:"number",min:0,max:1,dflt:"auto",editType:"plot"},editType:"plot"},editType:"plot"}}});var Pk=ye((D_r,UNe)=>{"use strict";UNe.exports={maxDimensionCount:60,overdrag:45,verticalPadding:2,tickDistance:50,canvasPixelRatio:1,blockLineCount:5e3,layers:["contextLineLayer","focusLineLayer","pickLineLayer"],axisTitleOffset:28,axisExtentOffset:10,bar:{width:4,captureWidth:10,fillColor:"magenta",fillOpacity:1,snapDuration:150,snapRatio:.25,snapClose:.01,strokeOpacity:1,strokeWidth:1,handleHeight:8,handleOpacity:1,handleOverlap:0},cn:{axisExtentText:"axis-extent-text",parcoordsLineLayers:"parcoords-line-layers",parcoordsLineLayer:"parcoords-lines",parcoords:"parcoords",parcoordsControlView:"parcoords-control-view",yAxis:"y-axis",axisOverlays:"axis-overlays",axis:"axis",axisHeading:"axis-heading",axisTitle:"axis-title",axisExtent:"axis-extent",axisExtentTop:"axis-extent-top",axisExtentTopText:"axis-extent-top-text",axisExtentBottom:"axis-extent-bottom",axisExtentBottomText:"axis-extent-bottom-text",axisBrush:"axis-brush"},id:{filterBarPattern:"filter-bar-pattern"}}});var Jm=ye((F_r,GNe)=>{"use strict";var NVt=HS();function VNe(e){return[e]}GNe.exports={keyFun:function(e){return e.key},repeat:VNe,descend:NVt,wrap:VNe,unwrap:function(e){return e[0]}}});var EK=ye((z_r,eUe)=>{"use strict";var bh=Pk(),tm=Oa(),UVt=Jm().keyFun,zz=Jm().repeat,x5=Dr().sorterAsc,VVt=Dr().strTranslate,HNe=bh.bar.snapRatio;function jNe(e,t){return e*(1-HNe)+t*HNe}var WNe=bh.bar.snapClose;function GVt(e,t){return e*(1-WNe)+t*WNe}function qz(e,t,r,n){if(HVt(r,n))return r;var i=e?-1:1,a=0,o=t.length-1;if(i<0){var s=a;a=o,o=s}for(var l=t[a],u=l,c=a;i*c<i*o;c+=i){var f=c+i,h=t[f];if(i*r<i*GVt(l,h))return jNe(l,u);if(i*r<i*h||f===o)return jNe(h,l);u=l,l=h}}function HVt(e,t){for(var r=0;r<t.length;r++)if(e>=t[r][0]&&e<=t[r][1])return!0;return!1}function jVt(e){e.attr("x",-bh.bar.captureWidth/2).attr("width",bh.bar.captureWidth)}function WVt(e){e.attr("visibility","visible").style("visibility","visible").attr("fill","yellow").attr("opacity",0)}function XVt(e){if(!e.brush.filterSpecified)return"0,"+e.height;for(var t=XNe(e.brush.filter.getConsolidated(),e.height),r=[0],n,i,a,o=t.length?t[0][0]:null,s=0;s<t.length;s++)n=t[s],i=n[1]-n[0],r.push(o),r.push(i),a=s+1,a<t.length&&(o=t[a][0]-n[1]);return r.push(e.height),r}function XNe(e,t){return e.map(function(r){return r.map(function(n){return Math.max(0,n*t)}).sort(x5)})}function ZVt(e,t){var r=bh.bar.handleHeight;if(!(t>e[1]+r||t<e[0]-r))return t>=.9*e[1]+.1*e[0]?"n":t<=.9*e[0]+.1*e[1]?"s":"ns"}function ZNe(){tm.select(document.body).style("cursor",null)}function MK(e){e.attr("stroke-dasharray",XVt)}function Oz(e,t){var r=tm.select(e).selectAll(".highlight, .highlight-shadow"),n=t?r.transition().duration(bh.bar.snapDuration).each("end",t):r;MK(n)}function YNe(e,t){var r=e.brush,n=r.filterSpecified,i=NaN,a={},o;if(n){var s=e.height,l=r.filter.getConsolidated(),u=XNe(l,s),c=NaN,f=NaN,h=NaN;for(o=0;o<=u.length;o++){var d=u[o];if(d&&d[0]<=t&&t<=d[1]){c=o;break}else if(f=o?o-1:NaN,d&&d[0]>t){h=o;break}}if(i=c,isNaN(i)&&(isNaN(f)||isNaN(h)?i=isNaN(f)?h:f:i=t-u[f][1]<u[h][0]-t?f:h),!isNaN(i)){var v=u[i],_=ZVt(v,t);_&&(a.interval=l[i],a.intervalPix=v,a.region=_)}}if(e.ordinal&&!a.region){var b=e.unitTickvals,p=e.unitToPaddedPx.invert(t);for(o=0;o<b.length;o++){var k=[b[Math.max(o-1,0)]*.25+b[o]*.75,b[Math.min(o+1,b.length-1)]*.25+b[o]*.75];if(p>=k[0]&&p<=k[1]){a.clickableOrdinalRange=k;break}}}return a}function YVt(e,t){tm.event.sourceEvent.stopPropagation();var r=t.height-tm.mouse(e)[1]-2*bh.verticalPadding,n=t.unitToPaddedPx.invert(r),i=t.brush,a=YNe(t,r),o=a.interval,s=i.svgBrush;if(s.wasDragged=!1,s.grabbingBar=a.region==="ns",s.grabbingBar){var l=o.map(t.unitToPaddedPx);s.grabPoint=r-l[0]-bh.verticalPadding,s.barLength=l[1]-l[0]}s.clickableOrdinalRange=a.clickableOrdinalRange,s.stayingIntervals=t.multiselect&&i.filterSpecified?i.filter.getConsolidated():[],o&&(s.stayingIntervals=s.stayingIntervals.filter(function(u){return u[0]!==o[0]&&u[1]!==o[1]})),s.startExtent=a.region?o[a.region==="s"?1:0]:n,t.parent.inBrushDrag=!0,s.brushStartCallback()}function KNe(e,t){tm.event.sourceEvent.stopPropagation();var r=t.height-tm.mouse(e)[1]-2*bh.verticalPadding,n=t.brush.svgBrush;n.wasDragged=!0,n._dragging=!0,n.grabbingBar?n.newExtent=[r-n.grabPoint,r+n.barLength-n.grabPoint].map(t.unitToPaddedPx.invert):n.newExtent=[n.startExtent,t.unitToPaddedPx.invert(r)].sort(x5),t.brush.filterSpecified=!0,n.extent=n.stayingIntervals.concat([n.newExtent]),n.brushCallback(t),Oz(e.parentNode)}function KVt(e,t){var r=t.brush,n=r.filter,i=r.svgBrush;i._dragging||(JNe(e,t),KNe(e,t),t.brush.svgBrush.wasDragged=!1),i._dragging=!1;var a=tm.event;a.sourceEvent.stopPropagation();var o=i.grabbingBar;if(i.grabbingBar=!1,i.grabLocation=void 0,t.parent.inBrushDrag=!1,ZNe(),!i.wasDragged){i.wasDragged=void 0,i.clickableOrdinalRange?r.filterSpecified&&t.multiselect?i.extent.push(i.clickableOrdinalRange):(i.extent=[i.clickableOrdinalRange],r.filterSpecified=!0):o?(i.extent=i.stayingIntervals,i.extent.length===0&&SK(r)):SK(r),i.brushCallback(t),Oz(e.parentNode),i.brushEndCallback(r.filterSpecified?n.getConsolidated():[]);return}var s=function(){n.set(n.getConsolidated())};if(t.ordinal){var l=t.unitTickvals;l[l.length-1]<l[0]&&l.reverse(),i.newExtent=[qz(0,l,i.newExtent[0],i.stayingIntervals),qz(1,l,i.newExtent[1],i.stayingIntervals)];var u=i.newExtent[1]>i.newExtent[0];i.extent=i.stayingIntervals.concat(u?[i.newExtent]:[]),i.extent.length||SK(r),i.brushCallback(t),u?Oz(e.parentNode,s):(s(),Oz(e.parentNode))}else s();i.brushEndCallback(r.filterSpecified?n.getConsolidated():[])}function JNe(e,t){var r=t.height-tm.mouse(e)[1]-2*bh.verticalPadding,n=YNe(t,r),i="crosshair";n.clickableOrdinalRange?i="pointer":n.region&&(i=n.region+"-resize"),tm.select(document.body).style("cursor",i)}function JVt(e){e.on("mousemove",function(t){tm.event.preventDefault(),t.parent.inBrushDrag||JNe(this,t)}).on("mouseleave",function(t){t.parent.inBrushDrag||ZNe()}).call(tm.behavior.drag().on("dragstart",function(t){YVt(this,t)}).on("drag",function(t){KNe(this,t)}).on("dragend",function(t){KVt(this,t)}))}function $Ne(e,t){return e[0]-t[0]}function $Vt(e,t,r){var n=r._context.staticPlot,i=e.selectAll(".background").data(zz);i.enter().append("rect").classed("background",!0).call(jVt).call(WVt).style("pointer-events",n?"none":"auto").attr("transform",VVt(0,bh.verticalPadding)),i.call(JVt).attr("height",function(s){return s.height-bh.verticalPadding});var a=e.selectAll(".highlight-shadow").data(zz);a.enter().append("line").classed("highlight-shadow",!0).attr("x",-bh.bar.width/2).attr("stroke-width",bh.bar.width+bh.bar.strokeWidth).attr("stroke",t).attr("opacity",bh.bar.strokeOpacity).attr("stroke-linecap","butt"),a.attr("y1",function(s){return s.height}).call(MK);var o=e.selectAll(".highlight").data(zz);o.enter().append("line").classed("highlight",!0).attr("x",-bh.bar.width/2).attr("stroke-width",bh.bar.width-bh.bar.strokeWidth).attr("stroke",bh.bar.fillColor).attr("opacity",bh.bar.fillOpacity).attr("stroke-linecap","butt"),o.attr("y1",function(s){return s.height}).call(MK)}function QVt(e,t,r){var n=e.selectAll("."+bh.cn.axisBrush).data(zz,UVt);n.enter().append("g").classed(bh.cn.axisBrush,!0),$Vt(n,t,r)}function eGt(e){return e.svgBrush.extent.map(function(t){return t.slice()})}function SK(e){e.filterSpecified=!1,e.svgBrush.extent=[[-1/0,1/0]]}function tGt(e){return function(r){var n=r.brush,i=eGt(n),a=i.slice();n.filter.set(a),e()}}function QNe(e){for(var t=e.slice(),r=[],n,i=t.shift();i;){for(n=i.slice();(i=t.shift())&&i[0]<=n[1];)n[1]=Math.max(n[1],i[1]);r.push(n)}return r.length===1&&r[0][0]>r[0][1]&&(r=[]),r}function rGt(){var e=[],t,r;return{set:function(n){e=n.map(function(i){return i.slice().sort(x5)}).sort($Ne),e.length===1&&e[0][0]===-1/0&&e[0][1]===1/0&&(e=[[0,-1]]),t=QNe(e),r=e.reduce(function(i,a){return[Math.min(i[0],a[0]),Math.max(i[1],a[1])]},[1/0,-1/0])},get:function(){return e.slice()},getConsolidated:function(){return t},getBounds:function(){return r}}}function iGt(e,t,r,n,i,a){var o=rGt();return o.set(r),{filter:o,filterSpecified:t,svgBrush:{extent:[],brushStartCallback:n,brushCallback:tGt(i),brushEndCallback:a}}}function nGt(e,t){if(Array.isArray(e[0])?(e=e.map(function(n){return n.sort(x5)}),t.multiselect?e=QNe(e.sort($Ne)):e=[e[0]]):e=[e.sort(x5)],t.tickvals){var r=t.tickvals.slice().sort(x5);if(e=e.map(function(n){var i=[qz(0,r,n[0],[]),qz(1,r,n[1],[])];if(i[1]>i[0])return i}).filter(function(n){return n}),!e.length)return}return e.length>1?e:e[0]}eUe.exports={makeBrush:iGt,ensureAxisBrush:QVt,cleanRanges:nGt}});var iUe=ye((O_r,rUe)=>{"use strict";var mx=Dr(),aGt=pv().hasColorscale,oGt=Qh(),sGt=Cc().defaults,lGt=Yd(),uGt=ho(),tUe=AK(),cGt=EK(),kK=Pk().maxDimensionCount,fGt=kz();function hGt(e,t,r,n,i){var a=i("line.color",r);if(aGt(e,"line")&&mx.isArrayOrTypedArray(a)){if(a.length)return i("line.colorscale"),oGt(e,t,n,i,{prefix:"line.",cLetter:"c"}),a.length;t.line.color=r}return 1/0}function dGt(e,t,r,n){function i(u,c){return mx.coerce(e,t,tUe.dimensions,u,c)}var a=i("values"),o=i("visible");if(a&&a.length||(o=t.visible=!1),o){i("label"),i("tickvals"),i("ticktext"),i("tickformat");var s=i("range");t._ax={_id:"y",type:"linear",showexponent:"all",exponentformat:"B",range:s},uGt.setConvert(t._ax,n.layout),i("multiselect");var l=i("constraintrange");l&&(t.constraintrange=cGt.cleanRanges(l,t))}}rUe.exports=function(t,r,n,i){function a(c,f){return mx.coerce(t,r,tUe,c,f)}var o=t.dimensions;Array.isArray(o)&&o.length>kK&&(mx.log("parcoords traces support up to "+kK+" dimensions at the moment"),o.splice(kK));var s=lGt(t,r,{name:"dimensions",layout:i,handleItemDefaults:dGt}),l=hGt(t,r,n,i,a);sGt(r,i,a),(!Array.isArray(s)||!s.length)&&(r.visible=!1),fGt(r,s,"values",l);var u=mx.extendFlat({},i.font,{size:Math.round(i.font.size/1.2)});mx.coerceFont(a,"labelfont",u),mx.coerceFont(a,"tickfont",u,{autoShadowDflt:!0}),mx.coerceFont(a,"rangefont",u),a("labelangle"),a("labelside"),a("unselected.line.color"),a("unselected.line.opacity")}});var aUe=ye((q_r,nUe)=>{"use strict";var vGt=Dr().isArrayOrTypedArray,CK=tc(),pGt=Jm().wrap;nUe.exports=function(t,r){var n,i;return CK.hasColorscale(r,"line")&&vGt(r.line.color)?(n=r.line.color,i=CK.extractOpts(r.line).colorscale,CK.calc(t,r,{vals:n,containerStr:"line",cLetter:"c"})):(n=gGt(r._length),i=[[0,r.line.color],[1,r.line.color]]),pGt({lineColor:n,cscale:i})};function gGt(e){for(var t=new Array(e),r=0;r<e;r++)t[r]=.5;return t}});function mGt(e){var c,f;var t,r=[],n=1,i;if(typeof e=="number")return{space:"rgb",values:[e>>>16,(e&65280)>>>8,e&255],alpha:1};if(typeof e=="number")return{space:"rgb",values:[e>>>16,(e&65280)>>>8,e&255],alpha:1};if(e=String(e).toLowerCase(),LK.default[e])r=LK.default[e].slice(),i="rgb";else if(e==="transparent")n=0,i="rgb",r=[0,0,0];else if(e[0]==="#"){var a=e.slice(1),o=a.length,s=o<=4;n=1,s?(r=[parseInt(a[0]+a[0],16),parseInt(a[1]+a[1],16),parseInt(a[2]+a[2],16)],o===4&&(n=parseInt(a[3]+a[3],16)/255)):(r=[parseInt(a[0]+a[1],16),parseInt(a[2]+a[3],16),parseInt(a[4]+a[5],16)],o===8&&(n=parseInt(a[6]+a[7],16)/255)),r[0]||(r[0]=0),r[1]||(r[1]=0),r[2]||(r[2]=0),i="rgb"}else if(t=/^((?:rgba?|hs[lvb]a?|hwba?|cmyk?|xy[zy]|gray|lab|lchu?v?|[ly]uv|lms|oklch|oklab|color))\s*\(([^\)]*)\)/.exec(e)){var l=t[1];i=l.replace(/a$/,"");var u=i==="cmyk"?4:i==="gray"?1:3;r=t[2].trim().split(/\s*[,\/]\s*|\s+/),i==="color"&&(i=r.shift()),r=r.map(function(h,d){if(h[h.length-1]==="%")return h=parseFloat(h)/100,d===3?h:i==="rgb"?h*255:i[0]==="h"||i[0]==="l"&&!d?h*100:i==="lab"?h*125:i==="lch"?d<2?h*150:h*360:i[0]==="o"&&!d?h:i==="oklab"?h*.4:i==="oklch"?d<2?h*.4:h*360:h;if(i[d]==="h"||d===2&&i[i.length-1]==="h"){if(oUe[h]!==void 0)return oUe[h];if(h.endsWith("deg"))return parseFloat(h);if(h.endsWith("turn"))return parseFloat(h)*360;if(h.endsWith("grad"))return parseFloat(h)*360/400;if(h.endsWith("rad"))return parseFloat(h)*180/Math.PI}return h==="none"?0:parseFloat(h)}),n=r.length>u?r.pop():1}else/[0-9](?:\s|\/|,)/.test(e)&&(r=e.match(/([0-9]+)/g).map(function(h){return parseFloat(h)}),i=((f=(c=e.match(/([a-z])/ig))==null?void 0:c.join(""))==null?void 0:f.toLowerCase())||"rgb");return{space:i,values:r,alpha:n}}var LK,sUe,oUe,lUe=gu(()=>{LK=gtt(gX(),1),sUe=mGt,oUe={red:0,orange:60,yellow:120,green:180,blue:240,purple:300}});var Ik,PK=gu(()=>{Ik={name:"rgb",min:[0,0,0],max:[255,255,255],channel:["red","green","blue"],alias:["RGB"]}});var Bz,uUe=gu(()=>{PK();Bz={name:"hsl",min:[0,0,0],max:[360,100,100],channel:["hue","saturation","lightness"],alias:["HSL"],rgb:function(e){var t=e[0]/360,r=e[1]/100,n=e[2]/100,i,a,o,s,l,u=0;if(r===0)return l=n*255,[l,l,l];for(a=n<.5?n*(1+r):n+r-n*r,i=2*n-a,s=[0,0,0];u<3;)o=t+1/3*-(u-1),o<0?o++:o>1&&o--,l=6*o<1?i+(a-i)*6*o:2*o<1?a:3*o<2?i+(a-i)*(2/3-o)*6:i,s[u++]=l*255;return s}};Ik.hsl=function(e){var t=e[0]/255,r=e[1]/255,n=e[2]/255,i=Math.min(t,r,n),a=Math.max(t,r,n),o=a-i,s,l,u;return a===i?s=0:t===a?s=(r-n)/o:r===a?s=2+(n-t)/o:n===a&&(s=4+(t-r)/o),s=Math.min(s*60,360),s<0&&(s+=360),u=(i+a)/2,a===i?l=0:u<=.5?l=o/(a+i):l=o/(2-a-i),[s,l*100,u*100]}});var fUe={};cee(fUe,{default:()=>cUe});function cUe(e){Array.isArray(e)&&e.raw&&(e=String.raw(...arguments)),e instanceof Number&&(e=+e);var t,r,n,i=sUe(e);if(!i.space)return[];let a=i.space[0]==="h"?Bz.min:Ik.min,o=i.space[0]==="h"?Bz.max:Ik.max;return t=Array(3),t[0]=Math.min(Math.max(i.values[0],a[0]),o[0]),t[1]=Math.min(Math.max(i.values[1],a[1]),o[1]),t[2]=Math.min(Math.max(i.values[2],a[2]),o[2]),i.space[0]==="h"&&(t=Bz.rgb(t)),t.push(Math.min(Math.max(i.alpha,0),1)),t}var hUe=gu(()=>{lUe();PK();uUe()});var IK=ye(Nz=>{"use strict";var yGt=Dr().isTypedArray;Nz.convertTypedArray=function(e){return yGt(e)?Array.prototype.slice.call(e):e};Nz.isOrdinal=function(e){return!!e.tickvals};Nz.isVisible=function(e){return e.visible||!("visible"in e)}});var wUe=ye((X_r,bUe)=>{"use strict";var _Gt=["precision highp float;","","varying vec4 fragColor;","","attribute vec4 p01_04, p05_08, p09_12, p13_16,","               p17_20, p21_24, p25_28, p29_32,","               p33_36, p37_40, p41_44, p45_48,","               p49_52, p53_56, p57_60, colors;","","uniform mat4 dim0A, dim1A, dim0B, dim1B, dim0C, dim1C, dim0D, dim1D,","             loA, hiA, loB, hiB, loC, hiC, loD, hiD;","","uniform vec2 resolution, viewBoxPos, viewBoxSize;","uniform float maskHeight;","uniform float drwLayer; // 0: context, 1: focus, 2: pick","uniform vec4 contextColor;","uniform sampler2D maskTexture, palette;","","bool isPick    = (drwLayer > 1.5);","bool isContext = (drwLayer < 0.5);","","const vec4 ZEROS = vec4(0.0, 0.0, 0.0, 0.0);","const vec4 UNITS = vec4(1.0, 1.0, 1.0, 1.0);","","float val(mat4 p, mat4 v) {","    return dot(matrixCompMult(p, v) * UNITS, UNITS);","}","","float axisY(float ratio, mat4 A, mat4 B, mat4 C, mat4 D) {","    float y1 = val(A, dim0A) + val(B, dim0B) + val(C, dim0C) + val(D, dim0D);","    float y2 = val(A, dim1A) + val(B, dim1B) + val(C, dim1C) + val(D, dim1D);","    return y1 * (1.0 - ratio) + y2 * ratio;","}","","int iMod(int a, int b) {","    return a - b * (a / b);","}","","bool fOutside(float p, float lo, float hi) {","    return (lo < hi) && (lo > p || p > hi);","}","","bool vOutside(vec4 p, vec4 lo, vec4 hi) {","    return (","        fOutside(p[0], lo[0], hi[0]) ||","        fOutside(p[1], lo[1], hi[1]) ||","        fOutside(p[2], lo[2], hi[2]) ||","        fOutside(p[3], lo[3], hi[3])","    );","}","","bool mOutside(mat4 p, mat4 lo, mat4 hi) {","    return (","        vOutside(p[0], lo[0], hi[0]) ||","        vOutside(p[1], lo[1], hi[1]) ||","        vOutside(p[2], lo[2], hi[2]) ||","        vOutside(p[3], lo[3], hi[3])","    );","}","","bool outsideBoundingBox(mat4 A, mat4 B, mat4 C, mat4 D) {","    return mOutside(A, loA, hiA) ||","           mOutside(B, loB, hiB) ||","           mOutside(C, loC, hiC) ||","           mOutside(D, loD, hiD);","}","","bool outsideRasterMask(mat4 A, mat4 B, mat4 C, mat4 D) {","    mat4 pnts[4];","    pnts[0] = A;","    pnts[1] = B;","    pnts[2] = C;","    pnts[3] = D;","","    for(int i = 0; i < 4; ++i) {","        for(int j = 0; j < 4; ++j) {","            for(int k = 0; k < 4; ++k) {","                if(0 == iMod(","                    int(255.0 * texture2D(maskTexture,","                        vec2(","                            (float(i * 2 + j / 2) + 0.5) / 8.0,","                            (pnts[i][j][k] * (maskHeight - 1.0) + 1.0) / maskHeight","                        ))[3]","                    ) / int(pow(2.0, float(iMod(j * 4 + k, 8)))),","                    2","                )) return true;","            }","        }","    }","    return false;","}","","vec4 position(bool isContext, float v, mat4 A, mat4 B, mat4 C, mat4 D) {","    float x = 0.5 * sign(v) + 0.5;","    float y = axisY(x, A, B, C, D);","    float z = 1.0 - abs(v);","","    z += isContext ? 0.0 : 2.0 * float(","        outsideBoundingBox(A, B, C, D) ||","        outsideRasterMask(A, B, C, D)","    );","","    return vec4(","        2.0 * (vec2(x, y) * viewBoxSize + viewBoxPos) / resolution - 1.0,","        z,","        1.0","    );","}","","void main() {","    mat4 A = mat4(p01_04, p05_08, p09_12, p13_16);","    mat4 B = mat4(p17_20, p21_24, p25_28, p29_32);","    mat4 C = mat4(p33_36, p37_40, p41_44, p45_48);","    mat4 D = mat4(p49_52, p53_56, p57_60, ZEROS);","","    float v = colors[3];","","    gl_Position = position(isContext, v, A, B, C, D);","","    fragColor =","        isContext ? vec4(contextColor) :","        isPick ? vec4(colors.rgb, 1.0) : texture2D(palette, vec2(abs(v), 0.5));","}"].join(`
+`),xGt=["precision highp float;","","varying vec4 fragColor;","","void main() {","    gl_FragColor = fragColor;","}"].join(`
+`),Rk=Pk().maxDimensionCount,yUe=Dr(),dUe=1e-6,Uz=2048,bGt=new Uint8Array(4),vUe=new Uint8Array(4),pUe={shape:[256,1],format:"rgba",type:"uint8",mag:"nearest",min:"nearest"};function wGt(e){e.read({x:0,y:0,width:1,height:1,data:bGt})}function _Ue(e,t,r,n,i){var a=e._gl;a.enable(a.SCISSOR_TEST),a.scissor(t,r,n,i),e.clear({color:[0,0,0,0],depth:1})}function TGt(e,t,r,n,i,a){var o=a.key;function s(l){var u=Math.min(n,i-l*n);l===0&&(window.cancelAnimationFrame(r.currentRafs[o]),delete r.currentRafs[o],_Ue(e,a.scissorX,a.scissorY,a.scissorWidth,a.viewBoxSize[1])),!r.clearOnly&&(a.count=2*u,a.offset=2*l*n,t(a),l*n+u<i&&(r.currentRafs[o]=window.requestAnimationFrame(function(){s(l+1)})),r.drawCompleted=!1)}r.drawCompleted||(wGt(e),r.drawCompleted=!0),s(0)}function AGt(e){return Math.max(dUe,Math.min(1-dUe,e))}function SGt(e,t){for(var r=new Array(256),n=0;n<256;n++)r[n]=e(n/255).concat(t);return r}function RK(e,t){return(e>>>8*t)%256/255}function MGt(e,t,r){for(var n=new Array(e*(Rk+4)),i=0,a=0;a<e;a++){for(var o=0;o<Rk;o++)n[i++]=o<t.length?t[o].paddedUnitValues[a]:.5;n[i++]=RK(a,2),n[i++]=RK(a,1),n[i++]=RK(a,0),n[i++]=AGt(r[a])}return n}function EGt(e,t,r){for(var n=new Array(t*8),i=0,a=0;a<t;a++)for(var o=0;o<2;o++)for(var s=0;s<4;s++){var l=e*4+s,u=r[a*64+l];l===63&&o===0&&(u*=-1),n[i++]=u}return n}function gUe(e){var t="0"+e;return t.slice(-2)}function xUe(e){return e<Rk?"p"+gUe(e+1)+"_"+gUe(e+4):"colors"}function kGt(e,t,r){for(var n=0;n<=Rk;n+=4)e[xUe(n)](EGt(n/4,t,r))}function CGt(e){for(var t={},r=0;r<=Rk;r+=4)t[xUe(r)]=e.buffer({usage:"dynamic",type:"float",data:new Uint8Array(0)});return t}function LGt(e,t,r,n,i,a,o,s,l,u,c,f,h,d){for(var v=[[],[]],_=0;_<64;_++)v[0][_]=_===i?1:0,v[1][_]=_===a?1:0;o*=d,s*=d,l*=d,u*=d;var b=e.lines.canvasOverdrag*d,p=e.domain,k=e.canvasWidth*d,E=e.canvasHeight*d,S=e.pad.l*d,L=e.pad.b*d,x=e.layoutHeight*d,C=e.layoutWidth*d,M=e.deselectedLines.color,g=e.deselectedLines.opacity,P=yUe.extendFlat({key:c,resolution:[k,E],viewBoxPos:[o+b,s],viewBoxSize:[l,u],i0:i,i1:a,dim0A:v[0].slice(0,16),dim0B:v[0].slice(16,32),dim0C:v[0].slice(32,48),dim0D:v[0].slice(48,64),dim1A:v[1].slice(0,16),dim1B:v[1].slice(16,32),dim1C:v[1].slice(32,48),dim1D:v[1].slice(48,64),drwLayer:f,contextColor:[M[0]/255,M[1]/255,M[2]/255,g!=="auto"?M[3]*g:Math.max(1/255,Math.pow(1/e.lines.color.length,1/3))],scissorX:(n===t?0:o+b)+(S-b)+C*p.x[0],scissorWidth:(n===r?k-o+b:l+.5)+(n===t?o+b:0),scissorY:s+L+x*p.y[0],scissorHeight:u,viewportX:S-b+C*p.x[0],viewportY:L+x*p.y[0],viewportWidth:k,viewportHeight:E},h);return P}function mUe(e){var t=Uz-1,r=Math.max(0,Math.floor(e[0]*t),0),n=Math.min(t,Math.ceil(e[1]*t),t);return[Math.min(r,n),Math.max(r,n)]}bUe.exports=function(e,t){var r=t.context,n=t.pick,i=t.regl,a=i._gl,o=a.getParameter(a.ALIASED_LINE_WIDTH_RANGE),s=Math.max(o[0],Math.min(o[1],t.viewModel.plotGlPixelRatio)),l={currentRafs:{},drawCompleted:!0,clearOnly:!1},u,c,f,h,d=CGt(i),v,_=i.texture(pUe),b=[];k(t);var p=i({profile:!1,blend:{enable:r,func:{srcRGB:"src alpha",dstRGB:"one minus src alpha",srcAlpha:1,dstAlpha:1},equation:{rgb:"add",alpha:"add"},color:[0,0,0,0]},depth:{enable:!r,mask:!0,func:"less",range:[0,1]},cull:{enable:!0,face:"back"},scissor:{enable:!0,box:{x:i.prop("scissorX"),y:i.prop("scissorY"),width:i.prop("scissorWidth"),height:i.prop("scissorHeight")}},viewport:{x:i.prop("viewportX"),y:i.prop("viewportY"),width:i.prop("viewportWidth"),height:i.prop("viewportHeight")},dither:!1,vert:_Gt,frag:xGt,primitive:"lines",lineWidth:s,attributes:d,uniforms:{resolution:i.prop("resolution"),viewBoxPos:i.prop("viewBoxPos"),viewBoxSize:i.prop("viewBoxSize"),dim0A:i.prop("dim0A"),dim1A:i.prop("dim1A"),dim0B:i.prop("dim0B"),dim1B:i.prop("dim1B"),dim0C:i.prop("dim0C"),dim1C:i.prop("dim1C"),dim0D:i.prop("dim0D"),dim1D:i.prop("dim1D"),loA:i.prop("loA"),hiA:i.prop("hiA"),loB:i.prop("loB"),hiB:i.prop("hiB"),loC:i.prop("loC"),hiC:i.prop("hiC"),loD:i.prop("loD"),hiD:i.prop("hiD"),palette:_,contextColor:i.prop("contextColor"),maskTexture:i.prop("maskTexture"),drwLayer:i.prop("drwLayer"),maskHeight:i.prop("maskHeight")},offset:i.prop("offset"),count:i.prop("count")});function k(M){u=M.model,c=M.viewModel,f=c.dimensions.slice(),h=f[0]?f[0].values.length:0;var g=u.lines,P=n?g.color.map(function(z,O){return O/g.color.length}):g.color,T=MGt(h,f,P);kGt(d,h,T),!r&&!n&&(_=i.texture(yUe.extendFlat({data:SGt(u.unitToColor,255)},pUe)))}function E(M){var g,P,T,z=[[],[]];for(T=0;T<64;T++){var O=!M&&T<f.length?f[T].brush.filter.getBounds():[-1/0,1/0];z[0][T]=O[0],z[1][T]=O[1]}var V=Uz*8,G=new Array(V);for(g=0;g<V;g++)G[g]=255;if(!M)for(g=0;g<f.length;g++){var Z=g%8,j=(g-Z)/8,N=Math.pow(2,Z),H=f[g],te=H.brush.filter.get();if(!(te.length<2)){var oe=mUe(te[0])[1];for(P=1;P<te.length;P++){var _e=mUe(te[P]);for(T=oe+1;T<_e[0];T++)G[T*8+j]&=~N;oe=Math.max(oe,_e[1])}}}var Ee={shape:[8,Uz],format:"alpha",type:"uint8",mag:"nearest",min:"nearest",data:G};return v?v(Ee):v=i.texture(Ee),{maskTexture:v,maskHeight:Uz,loA:z[0].slice(0,16),loB:z[0].slice(16,32),loC:z[0].slice(32,48),loD:z[0].slice(48,64),hiA:z[1].slice(0,16),hiB:z[1].slice(16,32),hiC:z[1].slice(32,48),hiD:z[1].slice(48,64)}}function S(M,g,P){var T=M.length,z,O,V,G=1/0,Z=-1/0;for(z=0;z<T;z++)M[z].dim0.canvasX<G&&(G=M[z].dim0.canvasX,O=z),M[z].dim1.canvasX>Z&&(Z=M[z].dim1.canvasX,V=z);T===0&&_Ue(i,0,0,u.canvasWidth,u.canvasHeight);var j=E(r);for(z=0;z<T;z++){var N=M[z],H=N.dim0.crossfilterDimensionIndex,te=N.dim1.crossfilterDimensionIndex,oe=N.canvasX,_e=N.canvasY,Ee=oe+N.panelSizeX,Ce=N.plotGlPixelRatio;if(g||!b[H]||b[H][0]!==oe||b[H][1]!==Ee){b[H]=[oe,Ee];var me=LGt(u,O,V,z,H,te,oe,_e,N.panelSizeX,N.panelSizeY,N.dim0.crossfilterDimensionIndex,r?0:n?2:1,j,Ce);l.clearOnly=P;var ie=g?u.lines.blockLineCount:h;TGt(i,p,l,ie,h,me)}}}function L(M,g){return i.read({x:M,y:g,width:1,height:1,data:vUe}),vUe}function x(M,g,P,T){var z=new Uint8Array(4*P*T);return i.read({x:M,y:g,width:P,height:T,data:z}),z}function C(){e.style["pointer-events"]="none",_.destroy(),v&&v.destroy();for(var M in d)d[M].destroy()}return{render:S,readPixel:L,readPixels:x,destroy:C,update:k}}});var zUe=ye((Z_r,FUe)=>{"use strict";var Bd=Oa(),l1=Dr(),DK=l1.isArrayOrTypedArray,kUe=l1.numberFormat,CUe=(hUe(),ob(fUe)).default,LUe=ho(),PGt=l1.strRotate,$m=l1.strTranslate,IGt=ru(),Vz=So(),TUe=tc(),OK=Jm(),tg=OK.keyFun,Qm=OK.repeat,PUe=OK.unwrap,b5=IK(),Dl=Pk(),IUe=EK(),RGt=wUe();function AUe(e,t,r){return l1.aggNums(e,null,t,r)}function RUe(e,t){return qK(AUe(Math.min,e,t),AUe(Math.max,e,t))}function Gz(e){var t=e.range;return t?qK(t[0],t[1]):RUe(e.values,e._length)}function qK(e,t){return(isNaN(e)||!isFinite(e))&&(e=0),(isNaN(t)||!isFinite(t))&&(t=0),e===t&&(e===0?(e-=1,t+=1):(e*=.9,t*=1.1)),[e,t]}function DGt(e,t){return t?function(r,n){var i=t[n];return i==null?e(r):i}:e}function FGt(e,t,r,n,i){var a=Gz(r);return n?Bd.scale.ordinal().domain(n.map(DGt(kUe(r.tickformat),i))).range(n.map(function(o){var s=(o-a[0])/(a[1]-a[0]);return e-t+s*(2*t-e)})):Bd.scale.linear().domain(a).range([e-t,t])}function zGt(e,t){return Bd.scale.linear().range([t,e-t])}function OGt(e,t){return Bd.scale.linear().domain(Gz(e)).range([t,1-t])}function qGt(e){if(e.tickvals){var t=Gz(e);return Bd.scale.ordinal().domain(e.tickvals).range(e.tickvals.map(function(r){return(r-t[0])/(t[1]-t[0])}))}}function BGt(e){var t=e.map(function(a){return a[0]}),r=e.map(function(a){var o=CUe(a[1]);return Bd.rgb("rgb("+o[0]+","+o[1]+","+o[2]+")")}),n=function(a){return function(o){return o[a]}},i="rgb".split("").map(function(a){return Bd.scale.linear().clamp(!0).domain(t).range(r.map(n(a)))});return function(a){return i.map(function(o){return o(a)})}}function zK(e){return e.dimensions.some(function(t){return t.brush.filterSpecified})}function NGt(e,t,r){var n=PUe(t),i=n.trace,a=b5.convertTypedArray(n.lineColor),o=i.line,s={color:CUe(i.unselected.line.color),opacity:i.unselected.line.opacity},l=TUe.extractOpts(o),u=l.reversescale?TUe.flipScale(n.cscale):n.cscale,c=i.domain,f=i.dimensions,h=e.width,d=i.labelangle,v=i.labelside,_=i.labelfont,b=i.tickfont,p=i.rangefont,k=l1.extendDeepNoArrays({},o,{color:a.map(Bd.scale.linear().domain(Gz({values:a,range:[l.min,l.max],_length:i._length}))),blockLineCount:Dl.blockLineCount,canvasOverdrag:Dl.overdrag*Dl.canvasPixelRatio}),E=Math.floor(h*(c.x[1]-c.x[0])),S=Math.floor(e.height*(c.y[1]-c.y[0])),L=e.margin||{l:80,r:80,t:100,b:80},x=E,C=S;return{key:r,colCount:f.filter(b5.isVisible).length,dimensions:f,tickDistance:Dl.tickDistance,unitToColor:BGt(u),lines:k,deselectedLines:s,labelAngle:d,labelSide:v,labelFont:_,tickFont:b,rangeFont:p,layoutWidth:h,layoutHeight:e.height,domain:c,translateX:c.x[0]*h,translateY:e.height-c.y[1]*e.height,pad:L,canvasWidth:x*Dl.canvasPixelRatio+2*k.canvasOverdrag,canvasHeight:C*Dl.canvasPixelRatio,width:x,height:C,canvasPixelRatio:Dl.canvasPixelRatio}}function UGt(e,t,r){var n=r.width,i=r.height,a=r.dimensions,o=r.canvasPixelRatio,s=function(h){return n*h/Math.max(1,r.colCount-1)},l=Dl.verticalPadding/i,u=zGt(i,Dl.verticalPadding),c={key:r.key,xScale:s,model:r,inBrushDrag:!1},f={};return c.dimensions=a.filter(b5.isVisible).map(function(h,d){var v=OGt(h,l),_=f[h.label];f[h.label]=(_||0)+1;var b=h.label+(_?"__"+_:""),p=h.constraintrange,k=p&&p.length;k&&!DK(p[0])&&(p=[p]);var E=k?p.map(function(O){return O.map(v)}):[[-1/0,1/0]],S=function(){var O=c;O.focusLayer&&O.focusLayer.render(O.panels,!0);var V=zK(O);!e.contextShown()&&V?(O.contextLayer&&O.contextLayer.render(O.panels,!0),e.contextShown(!0)):e.contextShown()&&!V&&(O.contextLayer&&O.contextLayer.render(O.panels,!0,!0),e.contextShown(!1))},L=h.values;L.length>h._length&&(L=L.slice(0,h._length));var x=h.tickvals,C;function M(O,V){return{val:O,text:C[V]}}function g(O,V){return O.val-V.val}if(DK(x)&&x.length){l1.isTypedArray(x)&&(x=Array.from(x)),C=h.ticktext,!DK(C)||!C.length?C=x.map(kUe(h.tickformat)):C.length>x.length?C=C.slice(0,x.length):x.length>C.length&&(x=x.slice(0,C.length));for(var P=1;P<x.length;P++)if(x[P]<x[P-1]){for(var T=x.map(M).sort(g),z=0;z<x.length;z++)x[z]=T[z].val,C[z]=T[z].text;break}}else x=void 0;return L=b5.convertTypedArray(L),{key:b,label:h.label,tickFormat:h.tickformat,tickvals:x,ticktext:C,ordinal:b5.isOrdinal(h),multiselect:h.multiselect,xIndex:d,crossfilterDimensionIndex:d,visibleIndex:h._index,height:i,values:L,paddedUnitValues:L.map(v),unitTickvals:x&&x.map(v),xScale:s,x:s(d),canvasX:s(d)*o,unitToPaddedPx:u,domainScale:FGt(i,Dl.verticalPadding,h,x,C),ordinalScale:qGt(h),parent:c,model:r,brush:IUe.makeBrush(e,k,E,function(){e.linePickActive(!1)},S,function(O){if(c.focusLayer.render(c.panels,!0),c.pickLayer&&c.pickLayer.render(c.panels,!0),e.linePickActive(!0),t&&t.filterChanged){var V=v.invert,G=O.map(function(Z){return Z.map(V).sort(l1.sorterAsc)}).sort(function(Z,j){return Z[0]-j[0]});t.filterChanged(c.key,h._index,G)}})}}),c}function SUe(e){e.classed(Dl.cn.axisExtentText,!0).attr("text-anchor","middle").style("cursor","default")}function VGt(){var e=!0,t=!1;return{linePickActive:function(r){return arguments.length?e=!!r:e},contextShown:function(r){return arguments.length?t=!!r:t}}}function MUe(e,t){var r=t==="top"?1:-1,n=e*Math.PI/180,i=Math.sin(n),a=Math.cos(n);return{dir:r,dx:i,dy:a,degrees:e}}function FK(e,t,r){for(var n=t.panels||(t.panels=[]),i=e.data(),a=0;a<i.length-1;a++){var o=n[a]||(n[a]={}),s=i[a],l=i[a+1];o.dim0=s,o.dim1=l,o.canvasX=s.canvasX,o.panelSizeX=l.canvasX-s.canvasX,o.panelSizeY=t.model.canvasHeight,o.y=0,o.canvasY=0,o.plotGlPixelRatio=r}}function GGt(e){for(var t=0;t<e.length;t++)for(var r=0;r<e[t].length;r++)for(var n=e[t][r].trace,i=n.dimensions,a=0;a<i.length;a++){var o=i[a].values,s=i[a]._ax;s&&(s.range?s.range=qK(s.range[0],s.range[1]):s.range=RUe(o,n._length),s.dtick||(s.dtick=.01*(Math.abs(s.range[1]-s.range[0])||1)),s.tickformat=i[a].tickformat,LUe.calcTicks(s),s.cleanRange())}}function DUe(e,t){return LUe.tickText(e._ax,t,!1).text}function EUe(e,t){if(e.ordinal)return"";var r=e.domainScale.domain(),n=r[t?r.length-1:0];return DUe(e.model.dimensions[e.visibleIndex],n)}FUe.exports=function(t,r,n,i){var a=t._context.staticPlot,o=t._fullLayout,s=o._toppaper,l=o._glcontainer,u=t._context.plotGlPixelRatio,c=t._fullLayout.paper_bgcolor;GGt(r);var f=VGt(),h=r.filter(function(z){return PUe(z).trace.visible}).map(NGt.bind(0,n)).map(UGt.bind(0,f,i));l.each(function(z,O){return l1.extendFlat(z,h[O])});var d=l.selectAll(".gl-canvas").each(function(z){z.viewModel=h[0],z.viewModel.plotGlPixelRatio=u,z.viewModel.paperColor=c,z.model=z.viewModel?z.viewModel.model:null}),v=null,_=d.filter(function(z){return z.pick});_.style("pointer-events",a?"none":"auto").on("mousemove",function(z){if(f.linePickActive()&&z.lineLayer&&i&&i.hover){var O=Bd.event,V=this.width,G=this.height,Z=Bd.mouse(this),j=Z[0],N=Z[1];if(j<0||N<0||j>=V||N>=G)return;var H=z.lineLayer.readPixel(j,G-1-N),te=H[3]!==0,oe=te?H[2]+256*(H[1]+256*H[0]):null,_e={x:j,y:N,clientX:O.clientX,clientY:O.clientY,dataIndex:z.model.key,curveNumber:oe};oe!==v&&(te?i.hover(_e):i.unhover&&i.unhover(_e),v=oe)}}),d.style("opacity",function(z){return z.pick?0:1}),s.style("background","rgba(255, 255, 255, 0)");var b=s.selectAll("."+Dl.cn.parcoords).data(h,tg);b.exit().remove(),b.enter().append("g").classed(Dl.cn.parcoords,!0).style("shape-rendering","crispEdges").style("pointer-events","none"),b.attr("transform",function(z){return $m(z.model.translateX,z.model.translateY)});var p=b.selectAll("."+Dl.cn.parcoordsControlView).data(Qm,tg);p.enter().append("g").classed(Dl.cn.parcoordsControlView,!0),p.attr("transform",function(z){return $m(z.model.pad.l,z.model.pad.t)});var k=p.selectAll("."+Dl.cn.yAxis).data(function(z){return z.dimensions},tg);k.enter().append("g").classed(Dl.cn.yAxis,!0),p.each(function(z){FK(k,z,u)}),d.each(function(z){if(z.viewModel){!z.lineLayer||i?z.lineLayer=RGt(this,z):z.lineLayer.update(z),(z.key||z.key===0)&&(z.viewModel[z.key]=z.lineLayer);var O=!z.context||i;z.lineLayer.render(z.viewModel.panels,O)}}),k.attr("transform",function(z){return $m(z.xScale(z.xIndex),0)}),k.call(Bd.behavior.drag().origin(function(z){return z}).on("drag",function(z){var O=z.parent;f.linePickActive(!1),z.x=Math.max(-Dl.overdrag,Math.min(z.model.width+Dl.overdrag,Bd.event.x)),z.canvasX=z.x*z.model.canvasPixelRatio,k.sort(function(V,G){return V.x-G.x}).each(function(V,G){V.xIndex=G,V.x=z===V?V.x:V.xScale(V.xIndex),V.canvasX=V.x*V.model.canvasPixelRatio}),FK(k,O,u),k.filter(function(V){return Math.abs(z.xIndex-V.xIndex)!==0}).attr("transform",function(V){return $m(V.xScale(V.xIndex),0)}),Bd.select(this).attr("transform",$m(z.x,0)),k.each(function(V,G,Z){Z===z.parent.key&&(O.dimensions[G]=V)}),O.contextLayer&&O.contextLayer.render(O.panels,!1,!zK(O)),O.focusLayer.render&&O.focusLayer.render(O.panels)}).on("dragend",function(z){var O=z.parent;z.x=z.xScale(z.xIndex),z.canvasX=z.x*z.model.canvasPixelRatio,FK(k,O,u),Bd.select(this).attr("transform",function(V){return $m(V.x,0)}),O.contextLayer&&O.contextLayer.render(O.panels,!1,!zK(O)),O.focusLayer&&O.focusLayer.render(O.panels),O.pickLayer&&O.pickLayer.render(O.panels,!0),f.linePickActive(!0),i&&i.axesMoved&&i.axesMoved(O.key,O.dimensions.map(function(V){return V.crossfilterDimensionIndex}))})),k.exit().remove();var E=k.selectAll("."+Dl.cn.axisOverlays).data(Qm,tg);E.enter().append("g").classed(Dl.cn.axisOverlays,!0),E.selectAll("."+Dl.cn.axis).remove();var S=E.selectAll("."+Dl.cn.axis).data(Qm,tg);S.enter().append("g").classed(Dl.cn.axis,!0),S.each(function(z){var O=z.model.height/z.model.tickDistance,V=z.domainScale,G=V.domain();Bd.select(this).call(Bd.svg.axis().orient("left").tickSize(4).outerTickSize(2).ticks(O,z.tickFormat).tickValues(z.ordinal?G:null).tickFormat(function(Z){return b5.isOrdinal(z)?Z:DUe(z.model.dimensions[z.visibleIndex],Z)}).scale(V)),Vz.font(S.selectAll("text"),z.model.tickFont)}),S.selectAll(".domain, .tick>line").attr("fill","none").attr("stroke","black").attr("stroke-opacity",.25).attr("stroke-width","1px"),S.selectAll("text").style("cursor","default");var L=E.selectAll("."+Dl.cn.axisHeading).data(Qm,tg);L.enter().append("g").classed(Dl.cn.axisHeading,!0);var x=L.selectAll("."+Dl.cn.axisTitle).data(Qm,tg);x.enter().append("text").classed(Dl.cn.axisTitle,!0).attr("text-anchor","middle").style("cursor","ew-resize").style("pointer-events",a?"none":"auto"),x.text(function(z){return z.label}).each(function(z){var O=Bd.select(this);Vz.font(O,z.model.labelFont),IGt.convertToTspans(O,t)}).attr("transform",function(z){var O=MUe(z.model.labelAngle,z.model.labelSide),V=Dl.axisTitleOffset;return(O.dir>0?"":$m(0,2*V+z.model.height))+PGt(O.degrees)+$m(-V*O.dx,-V*O.dy)}).attr("text-anchor",function(z){var O=MUe(z.model.labelAngle,z.model.labelSide),V=Math.abs(O.dx),G=Math.abs(O.dy);return 2*V>G?O.dir*O.dx<0?"start":"end":"middle"});var C=E.selectAll("."+Dl.cn.axisExtent).data(Qm,tg);C.enter().append("g").classed(Dl.cn.axisExtent,!0);var M=C.selectAll("."+Dl.cn.axisExtentTop).data(Qm,tg);M.enter().append("g").classed(Dl.cn.axisExtentTop,!0),M.attr("transform",$m(0,-Dl.axisExtentOffset));var g=M.selectAll("."+Dl.cn.axisExtentTopText).data(Qm,tg);g.enter().append("text").classed(Dl.cn.axisExtentTopText,!0).call(SUe),g.text(function(z){return EUe(z,!0)}).each(function(z){Vz.font(Bd.select(this),z.model.rangeFont)});var P=C.selectAll("."+Dl.cn.axisExtentBottom).data(Qm,tg);P.enter().append("g").classed(Dl.cn.axisExtentBottom,!0),P.attr("transform",function(z){return $m(0,z.model.height+Dl.axisExtentOffset)});var T=P.selectAll("."+Dl.cn.axisExtentBottomText).data(Qm,tg);T.enter().append("text").classed(Dl.cn.axisExtentBottomText,!0).attr("dy","0.75em").call(SUe),T.text(function(z){return EUe(z,!1)}).each(function(z){Vz.font(Bd.select(this),z.model.rangeFont)}),IUe.ensureAxisBrush(E,c,t)}});var NK=ye((BK,NUe)=>{"use strict";var HGt=zUe(),jGt=Mz(),OUe=IK().isVisible,BUe={};function qUe(e,t,r){var n=t.indexOf(r),i=e.indexOf(n);return i===-1&&(i+=t.length),i}function WGt(e,t){return function(n,i){return qUe(e,t,n)-qUe(e,t,i)}}var BK=NUe.exports=function(t,r){var n=t._fullLayout,i=jGt(t,[],BUe);if(i){var a={},o={},s={},l={},u=n._size;r.forEach(function(v,_){var b=v[0].trace;s[_]=b.index;var p=l[_]=b.index;a[_]=t.data[p].dimensions,o[_]=t.data[p].dimensions.slice()});var c=function(v,_,b){var p=o[v][_],k=b.map(function(M){return M.slice()}),E="dimensions["+_+"].constraintrange",S=n._tracePreGUI[t._fullData[s[v]]._fullInput.uid];if(S[E]===void 0){var L=p.constraintrange;S[E]=L||null}var x=t._fullData[s[v]].dimensions[_];k.length?(k.length===1&&(k=k[0]),p.constraintrange=k,x.constraintrange=k.slice(),k=[k]):(delete p.constraintrange,delete x.constraintrange,k=null);var C={};C[E]=k,t.emit("plotly_restyle",[C,[l[v]]])},f=function(v){t.emit("plotly_hover",v)},h=function(v){t.emit("plotly_unhover",v)},d=function(v,_){var b=WGt(_,o[v].filter(OUe));a[v].sort(b),o[v].filter(function(p){return!OUe(p)}).sort(function(p){return o[v].indexOf(p)}).forEach(function(p){a[v].splice(a[v].indexOf(p),1),a[v].splice(o[v].indexOf(p),0,p)}),t.emit("plotly_restyle",[{dimensions:[a[v]]},[l[v]]])};HGt(t,r,{width:u.w,height:u.h,margin:{t:u.t,r:u.r,b:u.b,l:u.l}},{filterChanged:c,hover:f,unhover:h,axesMoved:d})}};BK.reglPrecompiled=BUe});var VUe=ye(Dk=>{"use strict";var UUe=Oa(),XGt=Id().getModuleCalcData,ZGt=NK(),YGt=Wp();Dk.name="parcoords";Dk.plot=function(e){var t=XGt(e.calcdata,"parcoords")[0];t.length&&ZGt(e,t)};Dk.clean=function(e,t,r,n){var i=n._has&&n._has("parcoords"),a=t._has&&t._has("parcoords");i&&!a&&(n._paperdiv.selectAll(".parcoords").remove(),n._glimages.selectAll("*").remove())};Dk.toSVG=function(e){var t=e._fullLayout._glimages,r=UUe.select(e).selectAll(".svg-container"),n=r.filter(function(a,o){return o===r.size()-1}).selectAll(".gl-canvas-context, .gl-canvas-focus");function i(){var a=this,o=a.toDataURL("image/png"),s=t.append("svg:image");s.attr({xmlns:YGt.svg,"xlink:href":o,preserveAspectRatio:"none",x:0,y:0,width:a.style.width,height:a.style.height})}n.each(i),window.setTimeout(function(){UUe.selectAll("#filterBarPattern").attr("id","filterBarPattern")},60)}});var HUe=ye((K_r,GUe)=>{"use strict";GUe.exports={attributes:AK(),supplyDefaults:iUe(),calc:aUe(),colorbar:{container:"line",min:"cmin",max:"cmax"},moduleType:"trace",name:"parcoords",basePlotModule:VUe(),categories:["gl","regl","noOpacity","noHover"],meta:{}}});var XUe=ye((J_r,WUe)=>{"use strict";var jUe=HUe();jUe.plot=NK();WUe.exports=jUe});var YUe=ye(($_r,ZUe)=>{"use strict";ZUe.exports=XUe()});var UK=ye((Q_r,eVe)=>{"use strict";var JUe=Ao().extendFlat,KGt=Gl(),KUe=ec(),JGt=Tu(),{hovertemplateAttrs:$Ue,templatefallbackAttrs:QUe}=Ll(),$Gt=Cc().attributes,QGt=JUe({editType:"calc"},JGt("line",{editTypeOverride:"calc"}),{shape:{valType:"enumerated",values:["linear","hspline"],dflt:"linear",editType:"plot"},hovertemplate:$Ue({editType:"plot",arrayOk:!1},{keys:["count","probability"]}),hovertemplatefallback:QUe({editType:"plot"})});eVe.exports={domain:$Gt({name:"parcats",trace:!0,editType:"calc"}),hoverinfo:JUe({},KGt.hoverinfo,{flags:["count","probability"],editType:"plot",arrayOk:!1}),hoveron:{valType:"enumerated",values:["category","color","dimension"],dflt:"category",editType:"plot"},hovertemplate:$Ue({editType:"plot",arrayOk:!1},{keys:["count","probability","category","categorycount","colorcount","bandcolorcount"]}),hovertemplatefallback:QUe({editType:"plot"}),arrangement:{valType:"enumerated",values:["perpendicular","freeform","fixed"],dflt:"perpendicular",editType:"plot"},bundlecolors:{valType:"boolean",dflt:!0,editType:"plot"},sortpaths:{valType:"enumerated",values:["forward","backward"],dflt:"forward",editType:"plot"},labelfont:KUe({editType:"calc"}),tickfont:KUe({autoShadowDflt:!0,editType:"calc"}),dimensions:{_isLinkedToArray:"dimension",label:{valType:"string",editType:"calc"},categoryorder:{valType:"enumerated",values:["trace","category ascending","category descending","array"],dflt:"trace",editType:"calc"},categoryarray:{valType:"data_array",editType:"calc"},ticktext:{valType:"data_array",editType:"calc"},values:{valType:"data_array",dflt:[],editType:"calc"},displayindex:{valType:"integer",editType:"calc"},editType:"calc",visible:{valType:"boolean",dflt:!0,editType:"calc"}},line:QGt,counts:{valType:"number",min:0,dflt:1,arrayOk:!0,editType:"calc"},customdata:void 0,hoverlabel:void 0,ids:void 0,legend:void 0,legendgroup:void 0,legendrank:void 0,opacity:void 0,selectedpoints:void 0,showlegend:void 0}});var iVe=ye((exr,rVe)=>{"use strict";var w5=Dr(),eHt=pv().hasColorscale,tHt=Qh(),rHt=Cc().defaults,iHt=Yd(),tVe=UK(),nHt=kz(),aHt=vv().isTypedArraySpec;function oHt(e,t,r,n,i){i("line.shape"),i("line.hovertemplate"),i("line.hovertemplatefallback");var a=i("line.color",n.colorway[0]);if(eHt(e,"line")&&w5.isArrayOrTypedArray(a)){if(a.length)return i("line.colorscale"),tHt(e,t,n,i,{prefix:"line.",cLetter:"c"}),a.length;t.line.color=r}return 1/0}function sHt(e,t){function r(u,c){return w5.coerce(e,t,tVe.dimensions,u,c)}var n=r("values"),i=r("visible");if(n&&n.length||(i=t.visible=!1),i){r("label"),r("displayindex",t._index);var a=e.categoryarray,o=w5.isArrayOrTypedArray(a)&&a.length>0||aHt(a),s;o&&(s="array");var l=r("categoryorder",s);l==="array"?(r("categoryarray"),r("ticktext")):(delete e.categoryarray,delete e.ticktext),!o&&l==="array"&&(t.categoryorder="trace")}}rVe.exports=function(t,r,n,i){function a(u,c){return w5.coerce(t,r,tVe,u,c)}var o=iHt(t,r,{name:"dimensions",handleItemDefaults:sHt}),s=oHt(t,r,n,i,a);rHt(r,i,a),(!Array.isArray(o)||!o.length)&&(r.visible=!1),nHt(r,o,"values",s),a("hoveron"),a("hovertemplate"),a("hovertemplatefallback"),a("arrangement"),a("bundlecolors"),a("sortpaths"),a("counts");var l=i.font;w5.coerceFont(a,"labelfont",l,{overrideDflt:{size:Math.round(l.size)}}),w5.coerceFont(a,"tickfont",l,{autoShadowDflt:!0,overrideDflt:{size:Math.round(l.size/1.2)}})}});var aVe=ye((txr,nVe)=>{"use strict";var lHt=Jm().wrap,uHt=pv().hasColorscale,cHt=gv(),fHt=JO(),hHt=So(),Fk=Dr(),dHt=Eo();nVe.exports=function(t,r){var n=Fk.filterVisible(r.dimensions);if(n.length===0)return[];var i=n.map(function(g){var P;if(g.categoryorder==="trace")P=null;else if(g.categoryorder==="array")P=g.categoryarray;else{P=fHt(g.values);for(var T=!0,z=0;z<P.length;z++)if(!dHt(P[z])){T=!1;break}P.sort(T?Fk.sorterAsc:void 0),g.categoryorder==="category descending"&&(P=P.reverse())}return xHt(g.values,P)}),a,o,s;Fk.isArrayOrTypedArray(r.counts)?a=r.counts:a=[r.counts],bHt(n),n.forEach(function(g,P){wHt(g,i[P])});var l=r.line,u;l?(uHt(r,"line")&&cHt(t,r,{vals:r.line.color,containerStr:"line",cLetter:"c"}),u=hHt.tryColorscale(l)):u=Fk.identity;function c(g){var P,T;return Fk.isArrayOrTypedArray(l.color)?(P=l.color[g%l.color.length],T=P):P=l.color,{color:u(P),rawColor:T}}var f=n[0].values.length,h={},d=i.map(function(g){return g.inds});s=0;var v,_;for(v=0;v<f;v++){var b=[];for(_=0;_<d.length;_++)b.push(d[_][v]);o=a[v%a.length],s+=o;var p=c(v),k=b+"-"+p.rawColor;h[k]===void 0&&(h[k]=yHt(b,p.color,p.rawColor)),_Ht(h[k],v,o)}var E=n.map(function(g,P){return pHt(P,g._index,g._displayindex,g.label,s)});for(v=0;v<f;v++)for(o=a[v%a.length],_=0;_<E.length;_++){var S=E[_].containerInd,L=i[_].inds[v],x=E[_].categories;if(x[L]===void 0){var C=r.dimensions[S]._categoryarray[L],M=r.dimensions[S]._ticktext[L];x[L]=gHt(_,L,C,M)}mHt(x[L],v,o)}return lHt(vHt(E,h,s))};function vHt(e,t,r){var n=e.map(function(i){return i.categories.length}).reduce(function(i,a){return Math.max(i,a)});return{dimensions:e,paths:t,trace:void 0,maxCats:n,count:r}}function pHt(e,t,r,n,i){return{dimensionInd:e,containerInd:t,displayInd:r,dimensionLabel:n,count:i,categories:[],dragX:null}}function gHt(e,t,r,n){return{dimensionInd:e,categoryInd:t,categoryValue:r,displayInd:t,categoryLabel:n,valueInds:[],count:0,dragY:null}}function mHt(e,t,r){e.valueInds.push(t),e.count+=r}function yHt(e,t,r){return{categoryInds:e,color:t,rawColor:r,valueInds:[],count:0}}function _Ht(e,t,r){e.valueInds.push(t),e.count+=r}function xHt(e,t){t==null?t=[]:t=t.map(function(u){return u});var r={},n={},i=[];t.forEach(function(u,c){r[u]=0,n[u]=c});for(var a=0;a<e.length;a++){var o=e[a],s;r[o]===void 0?(r[o]=1,s=t.push(o)-1,n[o]=s):(r[o]++,s=n[o]),i.push(s)}var l=t.map(function(u){return r[u]});return{uniqueValues:t,uniqueCounts:l,inds:i}}function bHt(e){var t=e.map(function(n){return n.displayindex}),r;if(THt(t))for(r=0;r<e.length;r++)e[r]._displayindex=e[r].displayindex;else for(r=0;r<e.length;r++)e[r]._displayindex=r}function wHt(e,t){e._categoryarray=t.uniqueValues,e.ticktext===null||e.ticktext===void 0?e._ticktext=[]:e._ticktext=e.ticktext.slice();for(var r=e._ticktext.length;r<t.uniqueValues.length;r++)e._ticktext.push(t.uniqueValues[r])}function THt(e){for(var t=new Array(e.length),r=0;r<e.length;r++){if(e[r]<0||e[r]>=e.length||t[e[r]]!==void 0)return!1;t[e[r]]=!0}return!0}});var vVe=ye((rxr,dVe)=>{"use strict";var Fl=Oa(),AHt=(F2(),ob(D2)).interpolateNumber,SHt=HP(),qk=vf(),yx=Dr(),zk=yx.strTranslate,oVe=So(),VK=cd(),MHt=ru();function EHt(e,t,r,n){var i=t._context.staticPlot,a=e.map(VHt.bind(0,t,r)),o=n.selectAll("g.parcatslayer").data([null]);o.enter().append("g").attr("class","parcatslayer").style("pointer-events",i?"none":"all");var s=o.selectAll("g.trace.parcats").data(a,u1),l=s.enter().append("g").attr("class","trace parcats");s.attr("transform",function(k){return zk(k.x,k.y)}),l.append("g").attr("class","paths");var u=s.select("g.paths"),c=u.selectAll("path.path").data(function(k){return k.paths},u1);c.attr("fill",function(k){return k.model.color});var f=c.enter().append("path").attr("class","path").attr("stroke-opacity",0).attr("fill",function(k){return k.model.color}).attr("fill-opacity",0);jK(f),c.attr("d",function(k){return k.svgD}),f.empty()||c.sort(GK),c.exit().remove(),c.on("mouseover",kHt).on("mouseout",CHt).on("click",LHt),l.append("g").attr("class","dimensions");var h=s.select("g.dimensions"),d=h.selectAll("g.dimension").data(function(k){return k.dimensions},u1);d.enter().append("g").attr("class","dimension"),d.attr("transform",function(k){return zk(k.x,0)}),d.exit().remove();var v=d.selectAll("g.category").data(function(k){return k.categories},u1),_=v.enter().append("g").attr("class","category");v.attr("transform",function(k){return zk(0,k.y)}),_.append("rect").attr("class","catrect").attr("pointer-events","none"),v.select("rect.catrect").attr("fill","none").attr("width",function(k){return k.width}).attr("height",function(k){return k.height}),lVe(_);var b=v.selectAll("rect.bandrect").data(function(k){return k.bands},u1);b.each(function(){yx.raiseToTop(this)}),b.attr("fill",function(k){return k.color});var p=b.enter().append("rect").attr("class","bandrect").attr("stroke-opacity",0).attr("fill",function(k){return k.color}).attr("fill-opacity",0);b.attr("fill",function(k){return k.color}).attr("width",function(k){return k.width}).attr("height",function(k){return k.height}).attr("y",function(k){return k.y}).attr("cursor",function(k){return k.parcatsViewModel.arrangement==="fixed"?"default":k.parcatsViewModel.arrangement==="perpendicular"?"ns-resize":"move"}),XK(p),b.exit().remove(),_.append("text").attr("class","catlabel").attr("pointer-events","none"),v.select("text.catlabel").attr("text-anchor",function(k){return Ok(k)?"start":"end"}).attr("alignment-baseline","middle").style("fill","rgb(0, 0, 0)").attr("x",function(k){return Ok(k)?k.width+5:-5}).attr("y",function(k){return k.height/2}).text(function(k){return k.model.categoryLabel}).each(function(k){oVe.font(Fl.select(this),k.parcatsViewModel.categorylabelfont),MHt.convertToTspans(Fl.select(this),t)}),_.append("text").attr("class","dimlabel"),v.select("text.dimlabel").attr("text-anchor","middle").attr("alignment-baseline","baseline").attr("cursor",function(k){return k.parcatsViewModel.arrangement==="fixed"?"default":"ew-resize"}).attr("x",function(k){return k.width/2}).attr("y",-5).text(function(k,E){return E===0?k.parcatsViewModel.model.dimensions[k.model.dimensionInd].dimensionLabel:null}).each(function(k){oVe.font(Fl.select(this),k.parcatsViewModel.labelfont)}),v.selectAll("rect.bandrect").on("mouseover",OHt).on("mouseout",qHt),v.exit().remove(),d.call(Fl.behavior.drag().origin(function(k){return{x:k.x,y:0}}).on("dragstart",BHt).on("drag",NHt).on("dragend",UHt)),s.each(function(k){k.traceSelection=Fl.select(this),k.pathSelection=Fl.select(this).selectAll("g.paths").selectAll("path.path"),k.dimensionSelection=Fl.select(this).selectAll("g.dimensions").selectAll("g.dimension")}),s.exit().remove()}dVe.exports=function(e,t,r,n){EHt(r,e,n,t)};function u1(e){return e.key}function Ok(e){var t=e.parcatsViewModel.dimensions.length,r=e.parcatsViewModel.dimensions[t-1].model.dimensionInd;return e.model.dimensionInd===r}function GK(e,t){return e.model.rawColor>t.model.rawColor?1:e.model.rawColor<t.model.rawColor?-1:0}function kHt(e){if(!e.parcatsViewModel.dragDimension&&e.parcatsViewModel.hoverinfoItems.indexOf("skip")===-1){yx.raiseToTop(this),WK(Fl.select(this));var t=Bk(e),r=HK(e);if(e.parcatsViewModel.graphDiv.emit("plotly_hover",{points:t,event:Fl.event,constraints:r}),e.parcatsViewModel.hoverinfoItems.indexOf("none")===-1){var n=Fl.mouse(this)[0],i=e.parcatsViewModel.graphDiv,a=e.parcatsViewModel.trace,o=i._fullLayout,s=o._paperdiv.node().getBoundingClientRect(),l=e.parcatsViewModel.graphDiv.getBoundingClientRect(),u,c,f;for(f=0;f<e.leftXs.length-1;f++)if(e.leftXs[f]+e.dimWidths[f]-2<=n&&n<=e.leftXs[f+1]+2){var h=e.parcatsViewModel.dimensions[f],d=e.parcatsViewModel.dimensions[f+1];u=(h.x+h.width+d.x)/2,c=(e.topYs[f]+e.topYs[f+1]+e.height)/2;break}var v=e.parcatsViewModel.x+u,_=e.parcatsViewModel.y+c,b=VK.mostReadable(e.model.color,["black","white"]),p=e.model.count,k=p/e.parcatsViewModel.model.count,E={countLabel:p,probabilityLabel:k.toFixed(3)},S=[];e.parcatsViewModel.hoverinfoItems.indexOf("count")!==-1&&S.push(["Count:",E.countLabel].join(" ")),e.parcatsViewModel.hoverinfoItems.indexOf("probability")!==-1&&S.push(["P:",E.probabilityLabel].join(" "));var L=S.join("<br>"),x=Fl.mouse(i)[0];qk.loneHover({trace:a,x:v-s.left+l.left,y:_-s.top+l.top,text:L,color:e.model.color,borderColor:"black",fontFamily:'Monaco, "Courier New", monospace',fontSize:10,fontColor:b,idealAlign:x<v?"right":"left",hovertemplate:(a.line||{}).hovertemplate,hovertemplateLabels:E,eventData:[{data:a._input,fullData:a,count:p,probability:k}]},{container:o._hoverlayer.node(),outerContainer:o._paper.node(),gd:i})}}}function CHt(e){if(!e.parcatsViewModel.dragDimension&&(jK(Fl.select(this)),qk.loneUnhover(e.parcatsViewModel.graphDiv._fullLayout._hoverlayer.node()),e.parcatsViewModel.pathSelection.sort(GK),e.parcatsViewModel.hoverinfoItems.indexOf("skip")===-1)){var t=Bk(e),r=HK(e);e.parcatsViewModel.graphDiv.emit("plotly_unhover",{points:t,event:Fl.event,constraints:r})}}function Bk(e){for(var t=[],r=cVe(e.parcatsViewModel),n=0;n<e.model.valueInds.length;n++){var i=e.model.valueInds[n];t.push({curveNumber:r,pointNumber:i})}return t}function HK(e){for(var t={},r=e.parcatsViewModel.model.dimensions,n=0;n<r.length;n++){var i=r[n],a=i.categories[e.model.categoryInds[n]];t[i.containerInd]=a.categoryValue}return e.model.rawColor!==void 0&&(t.color=e.model.rawColor),t}function LHt(e){if(e.parcatsViewModel.hoverinfoItems.indexOf("skip")===-1){var t=Bk(e),r=HK(e);e.parcatsViewModel.graphDiv.emit("plotly_click",{points:t,event:Fl.event,constraints:r})}}function jK(e){e.attr("fill",function(t){return t.model.color}).attr("fill-opacity",.6).attr("stroke","lightgray").attr("stroke-width",.2).attr("stroke-opacity",1)}function WK(e){e.attr("fill-opacity",.8).attr("stroke",function(t){return VK.mostReadable(t.model.color,["black","white"])}).attr("stroke-width",.3)}function PHt(e){e.select("rect.catrect").attr("stroke","black").attr("stroke-width",2.5)}function lVe(e){e.select("rect.catrect").attr("stroke","black").attr("stroke-width",1).attr("stroke-opacity",1)}function IHt(e){e.attr("stroke","black").attr("stroke-width",1.5)}function XK(e){e.attr("stroke","black").attr("stroke-width",.2).attr("stroke-opacity",1).attr("fill-opacity",1)}function Hz(e){var t=e.parcatsViewModel.pathSelection,r=e.categoryViewModel.model.dimensionInd,n=e.categoryViewModel.model.categoryInd;return t.filter(function(i){return i.model.categoryInds[r]===n&&i.model.color===e.color})}function RHt(e){var t=Fl.select(e.parentNode).selectAll("rect.bandrect");t.each(function(r){var n=Hz(r);WK(n),n.each(function(){yx.raiseToTop(this)})}),PHt(Fl.select(e.parentNode))}function DHt(e){var t=Fl.select(e).datum(),r=Hz(t);WK(r),r.each(function(){yx.raiseToTop(this)}),Fl.select(e.parentNode).selectAll("rect.bandrect").filter(function(n){return n.color===t.color}).each(function(){yx.raiseToTop(this),IHt(Fl.select(this))})}function ZK(e,t,r){var n=Fl.select(e).datum(),i=n.categoryViewModel.model,a=n.parcatsViewModel.graphDiv,o=Fl.select(e.parentNode).selectAll("rect.bandrect"),s=[];o.each(function(u){var c=Hz(u);c.each(function(f){Array.prototype.push.apply(s,Bk(f))})});var l={};l[i.dimensionInd]=i.categoryValue,a.emit(t,{points:s,event:r,constraints:l})}function YK(e,t,r){var n=Fl.select(e).datum(),i=n.categoryViewModel.model,a=n.parcatsViewModel.graphDiv,o=Hz(n),s=[];o.each(function(u){Array.prototype.push.apply(s,Bk(u))});var l={};l[i.dimensionInd]=i.categoryValue,n.rawColor!==void 0&&(l.color=n.rawColor),a.emit(t,{points:s,event:r,constraints:l})}function uVe(e,t,r){e._fullLayout._calcInverseTransform(e);var n=e._fullLayout._invScaleX,i=e._fullLayout._invScaleY,a=Fl.select(r.parentNode).select("rect.catrect"),o=a.node().getBoundingClientRect(),s=a.datum(),l=s.parcatsViewModel,u=l.model.dimensions[s.model.dimensionInd],c=l.trace,f=o.top+o.height/2,h,d;l.dimensions.length>1&&u.displayInd===l.dimensions.length-1?(h=o.left,d="left"):(h=o.left+o.width,d="right");var v=s.model.count,_=s.model.categoryLabel,b=v/s.parcatsViewModel.model.count,p={countLabel:v,categoryLabel:_,probabilityLabel:b.toFixed(3)},k=[];s.parcatsViewModel.hoverinfoItems.indexOf("count")!==-1&&k.push(["Count:",p.countLabel].join(" ")),s.parcatsViewModel.hoverinfoItems.indexOf("probability")!==-1&&k.push(["P("+p.categoryLabel+"):",p.probabilityLabel].join(" "));var E=k.join("<br>");return{trace:c,x:n*(h-t.left),y:i*(f-t.top),text:E,color:"lightgray",borderColor:"black",fontFamily:'Monaco, "Courier New", monospace',fontSize:12,fontColor:"black",idealAlign:d,hovertemplate:c.hovertemplate,hovertemplateLabels:p,eventData:[{data:c._input,fullData:c,count:v,category:_,probability:b}]}}function FHt(e,t,r){var n=[];return Fl.select(r.parentNode.parentNode).selectAll("g.category").select("rect.catrect").each(function(){var i=this;n.push(uVe(e,t,i))}),n}function zHt(e,t,r){e._fullLayout._calcInverseTransform(e);var n=e._fullLayout._invScaleX,i=e._fullLayout._invScaleY,a=r.getBoundingClientRect(),o=Fl.select(r).datum(),s=o.categoryViewModel,l=s.parcatsViewModel,u=l.model.dimensions[s.model.dimensionInd],c=l.trace,f=a.y+a.height/2,h,d;l.dimensions.length>1&&u.displayInd===l.dimensions.length-1?(h=a.left,d="left"):(h=a.left+a.width,d="right");var v=s.model.categoryLabel,_=o.parcatsViewModel.model.count,b=0;o.categoryViewModel.bands.forEach(function(P){P.color===o.color&&(b+=P.count)});var p=s.model.count,k=0;l.pathSelection.each(function(P){P.model.color===o.color&&(k+=P.model.count)});var E=b/_,S=b/k,L=b/p,x={countLabel:b,categoryLabel:v,probabilityLabel:E.toFixed(3)},C=[];s.parcatsViewModel.hoverinfoItems.indexOf("count")!==-1&&C.push(["Count:",x.countLabel].join(" ")),s.parcatsViewModel.hoverinfoItems.indexOf("probability")!==-1&&(C.push("P(color \u2229 "+v+"): "+x.probabilityLabel),C.push("P("+v+" | color): "+S.toFixed(3)),C.push("P(color | "+v+"): "+L.toFixed(3)));var M=C.join("<br>"),g=VK.mostReadable(o.color,["black","white"]);return{trace:c,x:n*(h-t.left),y:i*(f-t.top),text:M,color:o.color,borderColor:"black",fontFamily:'Monaco, "Courier New", monospace',fontColor:g,fontSize:10,idealAlign:d,hovertemplate:c.hovertemplate,hovertemplateLabels:x,eventData:[{data:c._input,fullData:c,category:v,count:_,probability:E,categorycount:p,colorcount:k,bandcolorcount:b}]}}function OHt(e){if(!e.parcatsViewModel.dragDimension&&e.parcatsViewModel.hoverinfoItems.indexOf("skip")===-1){var t=Fl.mouse(this)[1];if(t<-1)return;var r=e.parcatsViewModel.graphDiv,n=r._fullLayout,i=n._paperdiv.node().getBoundingClientRect(),a=e.parcatsViewModel.hoveron,o=this;if(a==="color"?(DHt(o),YK(o,"plotly_hover",Fl.event)):(RHt(o),ZK(o,"plotly_hover",Fl.event)),e.parcatsViewModel.hoverinfoItems.indexOf("none")===-1){var s;a==="category"?s=uVe(r,i,o):a==="color"?s=zHt(r,i,o):a==="dimension"&&(s=FHt(r,i,o)),s&&qk.loneHover(s,{container:n._hoverlayer.node(),outerContainer:n._paper.node(),gd:r})}}}function qHt(e){var t=e.parcatsViewModel;if(!t.dragDimension&&(jK(t.pathSelection),lVe(t.dimensionSelection.selectAll("g.category")),XK(t.dimensionSelection.selectAll("g.category").selectAll("rect.bandrect")),qk.loneUnhover(t.graphDiv._fullLayout._hoverlayer.node()),t.pathSelection.sort(GK),t.hoverinfoItems.indexOf("skip")===-1)){var r=e.parcatsViewModel.hoveron,n=this;r==="color"?YK(n,"plotly_unhover",Fl.event):ZK(n,"plotly_unhover",Fl.event)}}function BHt(e){e.parcatsViewModel.arrangement!=="fixed"&&(e.dragDimensionDisplayInd=e.model.displayInd,e.initialDragDimensionDisplayInds=e.parcatsViewModel.model.dimensions.map(function(t){return t.displayInd}),e.dragHasMoved=!1,e.dragCategoryDisplayInd=null,Fl.select(this).selectAll("g.category").select("rect.catrect").each(function(t){var r=Fl.mouse(this)[0],n=Fl.mouse(this)[1];-2<=r&&r<=t.width+2&&-2<=n&&n<=t.height+2&&(e.dragCategoryDisplayInd=t.model.displayInd,e.initialDragCategoryDisplayInds=e.model.categories.map(function(i){return i.displayInd}),t.model.dragY=t.y,yx.raiseToTop(this.parentNode),Fl.select(this.parentNode).selectAll("rect.bandrect").each(function(i){i.y<n&&n<=i.y+i.height&&(e.potentialClickBand=this)}))}),e.parcatsViewModel.dragDimension=e,qk.loneUnhover(e.parcatsViewModel.graphDiv._fullLayout._hoverlayer.node()))}function NHt(e){if(e.parcatsViewModel.arrangement!=="fixed"&&(e.dragHasMoved=!0,e.dragDimensionDisplayInd!==null)){var t=e.dragDimensionDisplayInd,r=t-1,n=t+1,i=e.parcatsViewModel.dimensions[t];if(e.dragCategoryDisplayInd!==null){var a=i.categories[e.dragCategoryDisplayInd];a.model.dragY+=Fl.event.dy;var o=a.model.dragY,s=a.model.displayInd,l=i.categories,u=l[s-1],c=l[s+1];u!==void 0&&o<u.y+u.height/2&&(a.model.displayInd=u.model.displayInd,u.model.displayInd=s),c!==void 0&&o+a.height>c.y+c.height/2&&(a.model.displayInd=c.model.displayInd,c.model.displayInd=s),e.dragCategoryDisplayInd=a.model.displayInd}if(e.dragCategoryDisplayInd===null||e.parcatsViewModel.arrangement==="freeform"){i.model.dragX=Fl.event.x;var f=e.parcatsViewModel.dimensions[r],h=e.parcatsViewModel.dimensions[n];f!==void 0&&i.model.dragX<f.x+f.width&&(i.model.displayInd=f.model.displayInd,f.model.displayInd=t),h!==void 0&&i.model.dragX+i.width>h.x&&(i.model.displayInd=h.model.displayInd,h.model.displayInd=e.dragDimensionDisplayInd),e.dragDimensionDisplayInd=i.model.displayInd}JK(e.parcatsViewModel),KK(e.parcatsViewModel),hVe(e.parcatsViewModel),fVe(e.parcatsViewModel)}}function UHt(e){if(e.parcatsViewModel.arrangement!=="fixed"&&e.dragDimensionDisplayInd!==null){Fl.select(this).selectAll("text").attr("font-weight","normal");var t={},r=cVe(e.parcatsViewModel),n=e.parcatsViewModel.model.dimensions.map(function(h){return h.displayInd}),i=e.initialDragDimensionDisplayInds.some(function(h,d){return h!==n[d]});i&&n.forEach(function(h,d){var v=e.parcatsViewModel.model.dimensions[d].containerInd;t["dimensions["+v+"].displayindex"]=h});var a=!1;if(e.dragCategoryDisplayInd!==null){var o=e.model.categories.map(function(h){return h.displayInd});if(a=e.initialDragCategoryDisplayInds.some(function(h,d){return h!==o[d]}),a){var s=e.model.categories.slice().sort(function(h,d){return h.displayInd-d.displayInd}),l=s.map(function(h){return h.categoryValue}),u=s.map(function(h){return h.categoryLabel});t["dimensions["+e.model.containerInd+"].categoryarray"]=[l],t["dimensions["+e.model.containerInd+"].ticktext"]=[u],t["dimensions["+e.model.containerInd+"].categoryorder"]="array"}}if(e.parcatsViewModel.hoverinfoItems.indexOf("skip")===-1&&!e.dragHasMoved&&e.potentialClickBand&&(e.parcatsViewModel.hoveron==="color"?YK(e.potentialClickBand,"plotly_click",Fl.event.sourceEvent):ZK(e.potentialClickBand,"plotly_click",Fl.event.sourceEvent)),e.model.dragX=null,e.dragCategoryDisplayInd!==null){var c=e.parcatsViewModel.dimensions[e.dragDimensionDisplayInd].categories[e.dragCategoryDisplayInd];c.model.dragY=null,e.dragCategoryDisplayInd=null}e.dragDimensionDisplayInd=null,e.parcatsViewModel.dragDimension=null,e.dragHasMoved=null,e.potentialClickBand=null,JK(e.parcatsViewModel),KK(e.parcatsViewModel);var f=Fl.transition().duration(300).ease("cubic-in-out");f.each(function(){hVe(e.parcatsViewModel,!0),fVe(e.parcatsViewModel,!0)}).each("end",function(){(i||a)&&SHt.restyle(e.parcatsViewModel.graphDiv,t,[r])})}}function cVe(e){for(var t,r=e.graphDiv._fullData,n=0;n<r.length;n++)if(e.key===r[n].uid){t=n;break}return t}function fVe(e,t){t===void 0&&(t=!1);function r(n){return t?n.transition():n}e.pathSelection.data(function(n){return n.paths},u1),r(e.pathSelection).attr("d",function(n){return n.svgD})}function hVe(e,t){t===void 0&&(t=!1);function r(l){return t?l.transition():l}e.dimensionSelection.data(function(l){return l.dimensions},u1);var n=e.dimensionSelection.selectAll("g.category").data(function(l){return l.categories},u1);r(e.dimensionSelection).attr("transform",function(l){return zk(l.x,0)}),r(n).attr("transform",function(l){return zk(0,l.y)});var i=n.select(".dimlabel");i.text(function(l,u){return u===0?l.parcatsViewModel.model.dimensions[l.model.dimensionInd].dimensionLabel:null});var a=n.select(".catlabel");a.attr("text-anchor",function(l){return Ok(l)?"start":"end"}).attr("x",function(l){return Ok(l)?l.width+5:-5}).each(function(l){var u,c;Ok(l)?(u=l.width+5,c="start"):(u=-5,c="end"),Fl.select(this).selectAll("tspan").attr("x",u).attr("text-anchor",c)});var o=n.selectAll("rect.bandrect").data(function(l){return l.bands},u1),s=o.enter().append("rect").attr("class","bandrect").attr("cursor","move").attr("stroke-opacity",0).attr("fill",function(l){return l.color}).attr("fill-opacity",0);o.attr("fill",function(l){return l.color}).attr("width",function(l){return l.width}).attr("height",function(l){return l.height}).attr("y",function(l){return l.y}),XK(s),o.each(function(){yx.raiseToTop(this)}),o.exit().remove()}function VHt(e,t,r){var n=r[0],i=t.margin||{l:80,r:80,t:100,b:80},a=n.trace,o=a.domain,s=t.width,l=t.height,u=Math.floor(s*(o.x[1]-o.x[0])),c=Math.floor(l*(o.y[1]-o.y[0])),f=o.x[0]*s+i.l,h=t.height-o.y[1]*t.height+i.t,d=a.line.shape,v;a.hoverinfo==="all"?v=["count","probability"]:v=(a.hoverinfo||"").split("+");var _={trace:a,key:a.uid,model:n,x:f,y:h,width:u,height:c,hoveron:a.hoveron,hoverinfoItems:v,arrangement:a.arrangement,bundlecolors:a.bundlecolors,sortpaths:a.sortpaths,labelfont:a.labelfont,categorylabelfont:a.tickfont,pathShape:d,dragDimension:null,margin:i,paths:[],dimensions:[],graphDiv:e,traceSelection:null,pathSelection:null,dimensionSelection:null};return n.dimensions&&(JK(_),KK(_)),_}function sVe(e,t,r,n,i){var a=[],o=[],s,l;for(l=0;l<r.length-1;l++)s=AHt(r[l]+e[l],e[l+1]),a.push(s(i)),o.push(s(1-i));var u="M "+e[0]+","+t[0];for(u+="l"+r[0]+",0 ",l=1;l<r.length;l++)u+="C"+a[l-1]+","+t[l-1]+" "+o[l-1]+","+t[l]+" "+e[l]+","+t[l],u+="l"+r[l]+",0 ";for(u+="l0,"+n+" ",u+="l -"+r[r.length-1]+",0 ",l=r.length-2;l>=0;l--)u+="C"+o[l]+","+(t[l+1]+n)+" "+a[l]+","+(t[l]+n)+" "+(e[l]+r[l])+","+(t[l]+n),u+="l-"+r[l]+",0 ";return u+="Z",u}function KK(e){var t=e.dimensions,r=e.model,n=t.map(function(O){return O.categories.map(function(V){return V.y})}),i=e.model.dimensions.map(function(O){return O.categories.map(function(V){return V.displayInd})}),a=e.model.dimensions.map(function(O){return O.displayInd}),o=e.dimensions.map(function(O){return O.model.dimensionInd}),s=t.map(function(O){return O.x}),l=t.map(function(O){return O.width}),u=[];for(var c in r.paths)r.paths.hasOwnProperty(c)&&u.push(r.paths[c]);function f(O){var V=O.categoryInds.map(function(Z,j){return i[j][Z]}),G=o.map(function(Z){return V[Z]});return G}u.sort(function(O,V){var G=f(O),Z=f(V);return e.sortpaths==="backward"&&(G.reverse(),Z.reverse()),G.push(O.valueInds[0]),Z.push(V.valueInds[0]),e.bundlecolors&&(G.unshift(O.rawColor),Z.unshift(V.rawColor)),G<Z?-1:G>Z?1:0});for(var h=new Array(u.length),d=t[0].model.count,v=t[0].categories.map(function(O){return O.height}).reduce(function(O,V){return O+V}),_=0;_<u.length;_++){var b=u[_],p;d>0?p=v*(b.count/d):p=0;for(var k=new Array(n.length),E=0;E<b.categoryInds.length;E++){var S=b.categoryInds[E],L=i[E][S],x=a[E];k[x]=n[x][L],n[x][L]+=p;var C=e.dimensions[x].categories[L],M=C.bands.length,g=C.bands[M-1];if(g===void 0||b.rawColor!==g.rawColor){var P=g===void 0?0:g.y+g.height;C.bands.push({key:P,color:b.color,rawColor:b.rawColor,height:p,width:C.width,count:b.count,y:P,categoryViewModel:C,parcatsViewModel:e})}else{var T=C.bands[M-1];T.height+=p,T.count+=b.count}}var z;e.pathShape==="hspline"?z=sVe(s,k,l,p,.5):z=sVe(s,k,l,p,0),h[_]={key:b.valueInds[0],model:b,height:p,leftXs:s,topYs:k,dimWidths:l,svgD:z,parcatsViewModel:e}}e.paths=h}function JK(e){var t=e.model.dimensions.map(function(o){return{displayInd:o.displayInd,dimensionInd:o.dimensionInd}});t.sort(function(o,s){return o.displayInd-s.displayInd});var r=[];for(var n in t){var i=t[n].dimensionInd,a=e.model.dimensions[i];r.push(GHt(e,a))}e.dimensions=r}function GHt(e,t){var r=40,n=16,i=e.model.dimensions.length,a=t.displayInd,o,s,l;i>1?o=(e.width-2*r-n)/(i-1):o=0,s=r,l=s+o*a;var u=[],c=e.model.maxCats,f=t.categories.length,h=8,d=t.count,v=e.height-h*(c-1),_,b,p,k,E,S=(c-f)*h/2,L=t.categories.map(function(x){return{displayInd:x.displayInd,categoryInd:x.categoryInd}});for(L.sort(function(x,C){return x.displayInd-C.displayInd}),E=0;E<f;E++)k=L[E].categoryInd,b=t.categories[k],d>0?_=b.count/d*v:_=0,p={key:b.valueInds[0],model:b,width:n,height:_,y:b.dragY!==null?b.dragY:S,bands:[],parcatsViewModel:e},S=S+_+h,u.push(p);return{key:t.dimensionInd,x:t.dragX!==null?t.dragX:l,y:0,width:n,model:t,categories:u,parcatsViewModel:e,dragCategoryDisplayInd:null,dragDimensionDisplayInd:null,initialDragDimensionDisplayInds:null,initialDragCategoryDisplayInds:null,dragHasMoved:null,potentialClickBand:null}}});var $K=ye((ixr,pVe)=>{"use strict";var HHt=vVe();pVe.exports=function(t,r,n,i){var a=t._fullLayout,o=a._paper,s=a._size;HHt(t,o,r,{width:s.w,height:s.h,margin:{t:s.t,r:s.r,b:s.b,l:s.l}},n,i)}});var mVe=ye(jz=>{"use strict";var jHt=Id().getModuleCalcData,WHt=$K(),gVe="parcats";jz.name=gVe;jz.plot=function(e,t,r,n){var i=jHt(e.calcdata,gVe);if(i.length){var a=i[0];WHt(e,a,r,n)}};jz.clean=function(e,t,r,n){var i=n._has&&n._has("parcats"),a=t._has&&t._has("parcats");i&&!a&&n._paperdiv.selectAll(".parcats").remove()}});var _Ve=ye((axr,yVe)=>{"use strict";yVe.exports={attributes:UK(),supplyDefaults:iVe(),calc:aVe(),plot:$K(),colorbar:{container:"line",min:"cmin",max:"cmax"},moduleType:"trace",name:"parcats",basePlotModule:mVe(),categories:["noOpacity"],meta:{}}});var bVe=ye((oxr,xVe)=>{"use strict";xVe.exports=_Ve()});var c1=ye((sxr,kVe)=>{"use strict";var XHt=Z1(),wVe="1.13.4",MVe='\xA9 <a target="_blank" href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors',TVe=['\xA9 <a target="_blank" href="https://carto.com/">Carto</a>',MVe].join(" "),AVe=['Map tiles by <a target="_blank" href="https://stamen.com">Stamen Design</a>','under <a target="_blank" href="https://creativecommons.org/licenses/by/3.0">CC BY 3.0</a>',"|",'Data by <a target="_blank" href="https://openstreetmap.org">OpenStreetMap</a> contributors','under <a target="_blank" href="https://www.openstreetmap.org/copyright">ODbL</a>'].join(" "),ZHt=['Map tiles by <a target="_blank" href="https://stamen.com">Stamen Design</a>','under <a target="_blank" href="https://creativecommons.org/licenses/by/3.0">CC BY 3.0</a>',"|",'Data by <a target="_blank" href="https://openstreetmap.org">OpenStreetMap</a> contributors','under <a target="_blank" href="https://creativecommons.org/licenses/by-sa/3.0">CC BY SA</a>'].join(" "),EVe={"open-street-map":{id:"osm",version:8,sources:{"plotly-osm-tiles":{type:"raster",attribution:MVe,tiles:["https://a.tile.openstreetmap.org/{z}/{x}/{y}.png","https://b.tile.openstreetmap.org/{z}/{x}/{y}.png"],tileSize:256}},layers:[{id:"plotly-osm-tiles",type:"raster",source:"plotly-osm-tiles",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"white-bg":{id:"white-bg",version:8,sources:{},layers:[{id:"white-bg",type:"background",paint:{"background-color":"#FFFFFF"},minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"carto-positron":{id:"carto-positron",version:8,sources:{"plotly-carto-positron":{type:"raster",attribution:TVe,tiles:["https://cartodb-basemaps-c.global.ssl.fastly.net/light_all/{z}/{x}/{y}.png"],tileSize:256}},layers:[{id:"plotly-carto-positron",type:"raster",source:"plotly-carto-positron",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"carto-darkmatter":{id:"carto-darkmatter",version:8,sources:{"plotly-carto-darkmatter":{type:"raster",attribution:TVe,tiles:["https://cartodb-basemaps-c.global.ssl.fastly.net/dark_all/{z}/{x}/{y}.png"],tileSize:256}},layers:[{id:"plotly-carto-darkmatter",type:"raster",source:"plotly-carto-darkmatter",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"stamen-terrain":{id:"stamen-terrain",version:8,sources:{"plotly-stamen-terrain":{type:"raster",attribution:AVe,tiles:["https://tiles.stadiamaps.com/tiles/stamen_terrain/{z}/{x}/{y}.png?api_key="],tileSize:256}},layers:[{id:"plotly-stamen-terrain",type:"raster",source:"plotly-stamen-terrain",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"stamen-toner":{id:"stamen-toner",version:8,sources:{"plotly-stamen-toner":{type:"raster",attribution:AVe,tiles:["https://tiles.stadiamaps.com/tiles/stamen_toner/{z}/{x}/{y}.png?api_key="],tileSize:256}},layers:[{id:"plotly-stamen-toner",type:"raster",source:"plotly-stamen-toner",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"stamen-watercolor":{id:"stamen-watercolor",version:8,sources:{"plotly-stamen-watercolor":{type:"raster",attribution:ZHt,tiles:["https://tiles.stadiamaps.com/tiles/stamen_watercolor/{z}/{x}/{y}.jpg?api_key="],tileSize:256}},layers:[{id:"plotly-stamen-watercolor",type:"raster",source:"plotly-stamen-watercolor",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"}},SVe=XHt(EVe);kVe.exports={requiredVersion:wVe,styleUrlPrefix:"mapbox://styles/mapbox/",styleUrlSuffix:"v9",styleValuesMapbox:["basic","streets","outdoors","light","dark","satellite","satellite-streets"],styleValueDflt:"basic",stylesNonMapbox:EVe,styleValuesNonMapbox:SVe,traceLayerPrefix:"plotly-trace-layer-",layoutLayerPrefix:"plotly-layout-layer-",wrongVersionErrorMsg:["Your custom plotly.js bundle is not using the correct mapbox-gl version","Please install @plotly/mapbox-gl@"+wVe+"."].join(`
+`),noAccessTokenErrorMsg:["Missing Mapbox access token.","Mapbox trace type require a Mapbox access token to be registered.","For example:","  Plotly.newPlot(gd, data, layout, { mapboxAccessToken: 'my-access-token' });","More info here: https://www.mapbox.com/help/define-access-token/"].join(`
+`),missingStyleErrorMsg:["No valid mapbox style found, please set `mapbox.style` to one of:",SVe.join(", "),"or register a Mapbox access token to use a Mapbox-served style."].join(`
+`),multipleTokensErrorMsg:["Set multiple mapbox access token across different mapbox subplot,","using first token found as mapbox-gl does not allow multipleaccess tokens on the same page."].join(`
+`),mapOnErrorMsg:"Mapbox error.",mapboxLogo:{path0:"m 10.5,1.24 c -5.11,0 -9.25,4.15 -9.25,9.25 0,5.1 4.15,9.25 9.25,9.25 5.1,0 9.25,-4.15 9.25,-9.25 0,-5.11 -4.14,-9.25 -9.25,-9.25 z m 4.39,11.53 c -1.93,1.93 -4.78,2.31 -6.7,2.31 -0.7,0 -1.41,-0.05 -2.1,-0.16 0,0 -1.02,-5.64 2.14,-8.81 0.83,-0.83 1.95,-1.28 3.13,-1.28 1.27,0 2.49,0.51 3.39,1.42 1.84,1.84 1.89,4.75 0.14,6.52 z",path1:"M 10.5,-0.01 C 4.7,-0.01 0,4.7 0,10.49 c 0,5.79 4.7,10.5 10.5,10.5 5.8,0 10.5,-4.7 10.5,-10.5 C 20.99,4.7 16.3,-0.01 10.5,-0.01 Z m 0,19.75 c -5.11,0 -9.25,-4.15 -9.25,-9.25 0,-5.1 4.14,-9.26 9.25,-9.26 5.11,0 9.25,4.15 9.25,9.25 0,5.13 -4.14,9.26 -9.25,9.26 z",path2:"M 14.74,6.25 C 12.9,4.41 9.98,4.35 8.23,6.1 5.07,9.27 6.09,14.91 6.09,14.91 c 0,0 5.64,1.02 8.81,-2.14 C 16.64,11 16.59,8.09 14.74,6.25 Z m -2.27,4.09 -0.91,1.87 -0.9,-1.87 -1.86,-0.91 1.86,-0.9 0.9,-1.87 0.91,1.87 1.86,0.9 z",polygon:"11.56,12.21 10.66,10.34 8.8,9.43 10.66,8.53 11.56,6.66 12.47,8.53 14.33,9.43 12.47,10.34"},styleRules:{map:"overflow:hidden;position:relative;","missing-css":"display:none;",canary:"background-color:salmon;","ctrl-bottom-left":"position: absolute; pointer-events: none; z-index: 2; bottom: 0; left: 0;","ctrl-bottom-right":"position: absolute; pointer-events: none; z-index: 2; right: 0; bottom: 0;",ctrl:"clear: both; pointer-events: auto; transform: translate(0, 0);","ctrl-attrib.mapboxgl-compact .mapboxgl-ctrl-attrib-inner":"display: none;","ctrl-attrib.mapboxgl-compact:hover .mapboxgl-ctrl-attrib-inner":"display: block; margin-top:2px","ctrl-attrib.mapboxgl-compact:hover":"padding: 2px 24px 2px 4px; visibility: visible; margin-top: 6px;","ctrl-attrib.mapboxgl-compact::after":`content: ""; cursor: pointer; position: absolute; background-image: url('data:image/svg+xml;charset=utf-8,%3Csvg viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"%3E %3Cpath fill="%23333333" fill-rule="evenodd" d="M4,10a6,6 0 1,0 12,0a6,6 0 1,0 -12,0 M9,7a1,1 0 1,0 2,0a1,1 0 1,0 -2,0 M9,10a1,1 0 1,1 2,0l0,3a1,1 0 1,1 -2,0"/%3E %3C/svg%3E'); background-color: rgba(255, 255, 255, 0.5); width: 24px; height: 24px; box-sizing: border-box; border-radius: 12px;`,"ctrl-attrib.mapboxgl-compact":"min-height: 20px; padding: 0; margin: 10px; position: relative; background-color: #fff; border-radius: 3px 12px 12px 3px;","ctrl-bottom-right > .mapboxgl-ctrl-attrib.mapboxgl-compact::after":"bottom: 0; right: 0","ctrl-bottom-left > .mapboxgl-ctrl-attrib.mapboxgl-compact::after":"bottom: 0; left: 0","ctrl-bottom-left .mapboxgl-ctrl":"margin: 0 0 10px 10px; float: left;","ctrl-bottom-right .mapboxgl-ctrl":"margin: 0 10px 10px 0; float: right;","ctrl-attrib":"color: rgba(0, 0, 0, 0.75); text-decoration: none; font-size: 12px","ctrl-attrib a":"color: rgba(0, 0, 0, 0.75); text-decoration: none; font-size: 12px","ctrl-attrib a:hover":"color: inherit; text-decoration: underline;","ctrl-attrib .mapbox-improve-map":"font-weight: bold; margin-left: 2px;","attrib-empty":"display: none;","ctrl-logo":`display:block; width: 21px; height: 21px; background-image: url('data:image/svg+xml;charset=utf-8,%3C?xml version="1.0" encoding="utf-8"?%3E %3Csvg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 21 21" style="enable-background:new 0 0 21 21;" xml:space="preserve"%3E%3Cg transform="translate(0,0.01)"%3E%3Cpath d="m 10.5,1.24 c -5.11,0 -9.25,4.15 -9.25,9.25 0,5.1 4.15,9.25 9.25,9.25 5.1,0 9.25,-4.15 9.25,-9.25 0,-5.11 -4.14,-9.25 -9.25,-9.25 z m 4.39,11.53 c -1.93,1.93 -4.78,2.31 -6.7,2.31 -0.7,0 -1.41,-0.05 -2.1,-0.16 0,0 -1.02,-5.64 2.14,-8.81 0.83,-0.83 1.95,-1.28 3.13,-1.28 1.27,0 2.49,0.51 3.39,1.42 1.84,1.84 1.89,4.75 0.14,6.52 z" style="opacity:0.9;fill:%23ffffff;enable-background:new" class="st0"/%3E%3Cpath d="M 10.5,-0.01 C 4.7,-0.01 0,4.7 0,10.49 c 0,5.79 4.7,10.5 10.5,10.5 5.8,0 10.5,-4.7 10.5,-10.5 C 20.99,4.7 16.3,-0.01 10.5,-0.01 Z m 0,19.75 c -5.11,0 -9.25,-4.15 -9.25,-9.25 0,-5.1 4.14,-9.26 9.25,-9.26 5.11,0 9.25,4.15 9.25,9.25 0,5.13 -4.14,9.26 -9.25,9.26 z" style="opacity:0.35;enable-background:new" class="st1"/%3E%3Cpath d="M 14.74,6.25 C 12.9,4.41 9.98,4.35 8.23,6.1 5.07,9.27 6.09,14.91 6.09,14.91 c 0,0 5.64,1.02 8.81,-2.14 C 16.64,11 16.59,8.09 14.74,6.25 Z m -2.27,4.09 -0.91,1.87 -0.9,-1.87 -1.86,-0.91 1.86,-0.9 0.9,-1.87 0.91,1.87 1.86,0.9 z" style="opacity:0.35;enable-background:new" class="st1"/%3E%3Cpolygon points="11.56,12.21 10.66,10.34 8.8,9.43 10.66,8.53 11.56,6.66 12.47,8.53 14.33,9.43 12.47,10.34 " style="opacity:0.9;fill:%23ffffff;enable-background:new" class="st0"/%3E%3C/g%3E%3C/svg%3E')`}}});var Nk=ye((lxr,IVe)=>{"use strict";var CVe=Dr(),LVe=ka().defaultLine,YHt=Cc().attributes,KHt=ec(),JHt=pf().textposition,$Ht=mc().overrideAll,QHt=vl().templatedArray,QK=c1(),PVe=KHt({noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0});PVe.family.dflt="Open Sans Regular, Arial Unicode MS Regular";var ejt=IVe.exports=$Ht({_arrayAttrRegexps:[CVe.counterRegex("mapbox",".layers",!0)],domain:YHt({name:"mapbox"}),accesstoken:{valType:"string",noBlank:!0,strict:!0},style:{valType:"any",values:QK.styleValuesMapbox.concat(QK.styleValuesNonMapbox),dflt:QK.styleValueDflt},center:{lon:{valType:"number",dflt:0},lat:{valType:"number",dflt:0}},zoom:{valType:"number",dflt:1},bearing:{valType:"number",dflt:0},pitch:{valType:"number",dflt:0},bounds:{west:{valType:"number"},east:{valType:"number"},south:{valType:"number"},north:{valType:"number"}},layers:QHt("layer",{visible:{valType:"boolean",dflt:!0},sourcetype:{valType:"enumerated",values:["geojson","vector","raster","image"],dflt:"geojson"},source:{valType:"any"},sourcelayer:{valType:"string",dflt:""},sourceattribution:{valType:"string"},type:{valType:"enumerated",values:["circle","line","fill","symbol","raster"],dflt:"circle"},coordinates:{valType:"any"},below:{valType:"string"},color:{valType:"color",dflt:LVe},opacity:{valType:"number",min:0,max:1,dflt:1},minzoom:{valType:"number",min:0,max:24,dflt:0},maxzoom:{valType:"number",min:0,max:24,dflt:24},circle:{radius:{valType:"number",dflt:15}},line:{width:{valType:"number",dflt:2},dash:{valType:"data_array"}},fill:{outlinecolor:{valType:"color",dflt:LVe}},symbol:{icon:{valType:"string",dflt:"marker"},iconsize:{valType:"number",dflt:10},text:{valType:"string",dflt:""},placement:{valType:"enumerated",values:["point","line","line-center"],dflt:"point"},textfont:PVe,textposition:CVe.extendFlat({},JHt,{arrayOk:!1})}})},"plot","from-root");ejt.uirevision={valType:"any",editType:"none"}});var Wz=ye((uxr,zVe)=>{"use strict";var{hovertemplateAttrs:tjt,texttemplateAttrs:rjt,templatefallbackAttrs:RVe}=Ll(),ijt=Cg(),Uk=j2(),T5=pf(),DVe=Nk(),njt=Gl(),ajt=Tu(),rw=Ao().extendFlat,ojt=mc().overrideAll,sjt=Nk(),FVe=Uk.line,A5=Uk.marker;zVe.exports=ojt({lon:Uk.lon,lat:Uk.lat,cluster:{enabled:{valType:"boolean"},maxzoom:rw({},sjt.layers.maxzoom,{}),step:{valType:"number",arrayOk:!0,dflt:-1,min:-1},size:{valType:"number",arrayOk:!0,dflt:20,min:0},color:{valType:"color",arrayOk:!0},opacity:rw({},A5.opacity,{dflt:1})},mode:rw({},T5.mode,{dflt:"markers"}),text:rw({},T5.text,{}),texttemplate:rjt({editType:"plot"},{keys:["lat","lon","text"]}),texttemplatefallback:RVe({editType:"plot"}),hovertext:rw({},T5.hovertext,{}),line:{color:FVe.color,width:FVe.width},connectgaps:T5.connectgaps,marker:rw({symbol:{valType:"string",dflt:"circle",arrayOk:!0},angle:{valType:"number",dflt:"auto",arrayOk:!0},allowoverlap:{valType:"boolean",dflt:!1},opacity:A5.opacity,size:A5.size,sizeref:A5.sizeref,sizemin:A5.sizemin,sizemode:A5.sizemode},ajt("marker")),fill:Uk.fill,fillcolor:ijt(),textfont:DVe.layers.symbol.textfont,textposition:DVe.layers.symbol.textposition,below:{valType:"string"},selected:{marker:T5.selected.marker},unselected:{marker:T5.unselected.marker},hoverinfo:rw({},njt.hoverinfo,{flags:["lon","lat","text","name"]}),hovertemplate:tjt(),hovertemplatefallback:RVe()},"calc","nested")});var eJ=ye((cxr,OVe)=>{"use strict";var ljt=["Metropolis Black Italic","Metropolis Black","Metropolis Bold Italic","Metropolis Bold","Metropolis Extra Bold Italic","Metropolis Extra Bold","Metropolis Extra Light Italic","Metropolis Extra Light","Metropolis Light Italic","Metropolis Light","Metropolis Medium Italic","Metropolis Medium","Metropolis Regular Italic","Metropolis Regular","Metropolis Semi Bold Italic","Metropolis Semi Bold","Metropolis Thin Italic","Metropolis Thin","Open Sans Bold Italic","Open Sans Bold","Open Sans Extrabold Italic","Open Sans Extrabold","Open Sans Italic","Open Sans Light Italic","Open Sans Light","Open Sans Regular","Open Sans Semibold Italic","Open Sans Semibold","Klokantech Noto Sans Bold","Klokantech Noto Sans CJK Bold","Klokantech Noto Sans CJK Regular","Klokantech Noto Sans Italic","Klokantech Noto Sans Regular"];OVe.exports={isSupportedFont:function(e){return ljt.indexOf(e)!==-1}}});var NVe=ye((fxr,BVe)=>{"use strict";var Vk=Dr(),tJ=Ru(),ujt=$p(),cjt=R0(),fjt=D0(),hjt=Rg(),qVe=Wz(),djt=eJ().isSupportedFont;BVe.exports=function(t,r,n,i){function a(p,k){return Vk.coerce(t,r,qVe,p,k)}function o(p,k){return Vk.coerce2(t,r,qVe,p,k)}var s=vjt(t,r,a);if(!s){r.visible=!1;return}if(a("text"),a("texttemplate"),a("texttemplatefallback"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("mode"),a("below"),tJ.hasMarkers(r)){ujt(t,r,n,i,a,{noLine:!0,noAngle:!0}),a("marker.allowoverlap"),a("marker.angle");var l=r.marker;l.symbol!=="circle"&&(Vk.isArrayOrTypedArray(l.size)&&(l.size=l.size[0]),Vk.isArrayOrTypedArray(l.color)&&(l.color=l.color[0]))}tJ.hasLines(r)&&(cjt(t,r,n,i,a,{noDash:!0}),a("connectgaps"));var u=o("cluster.maxzoom"),c=o("cluster.step"),f=o("cluster.color",r.marker&&r.marker.color||n),h=o("cluster.size"),d=o("cluster.opacity"),v=u!==!1||c!==!1||f!==!1||h!==!1||d!==!1,_=a("cluster.enabled",v);if(_||tJ.hasText(r)){var b=i.font.family;fjt(t,r,i,a,{noSelect:!0,noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0,font:{family:djt(b)?b:"Open Sans Regular",weight:i.font.weight,style:i.font.style,size:i.font.size,color:i.font.color}})}a("fill"),r.fill!=="none"&&hjt(t,r,n,a),Vk.coerceSelectionMarkerOpacity(r,a)};function vjt(e,t,r){var n=r("lon")||[],i=r("lat")||[],a=Math.min(n.length,i.length);return t._length=a,a}});var rJ=ye((hxr,VVe)=>{"use strict";var UVe=ho();VVe.exports=function(t,r,n){var i={},a=n[r.subplot]._subplot,o=a.mockAxis,s=t.lonlat;return i.lonLabel=UVe.tickText(o,o.c2l(s[0]),!0).text,i.latLabel=UVe.tickText(o,o.c2l(s[1]),!0).text,i}});var iJ=ye((dxr,HVe)=>{"use strict";var GVe=Dr();HVe.exports=function(t,r){var n=t.split(" "),i=n[0],a=n[1],o=GVe.isArrayOrTypedArray(r)?GVe.mean(r):r,s=.5+o/100,l=1.5+o/100,u=["",""],c=[0,0];switch(i){case"top":u[0]="top",c[1]=-l;break;case"bottom":u[0]="bottom",c[1]=l;break}switch(a){case"left":u[1]="right",c[0]=-s;break;case"right":u[1]="left",c[0]=s;break}var f;return u[0]&&u[1]?f=u.join("-"):u[0]?f=u[0]:u[1]?f=u[1]:f="center",{anchor:f,offset:c}}});var KVe=ye((vxr,YVe)=>{"use strict";var XVe=Eo(),av=Dr(),pjt=fs().BADNUM,Zz=tx(),jVe=tc(),gjt=So(),mjt=k3(),Yz=Ru(),yjt=eJ().isSupportedFont,_jt=iJ(),xjt=ip().appendArrayPointValue,bjt=ru().NEWLINES,wjt=ru().BR_TAG_ALL;YVe.exports=function(t,r){var n=r[0].trace,i=n.visible===!0&&n._length!==0,a=n.fill!=="none",o=Yz.hasLines(n),s=Yz.hasMarkers(n),l=Yz.hasText(n),u=s&&n.marker.symbol==="circle",c=s&&n.marker.symbol!=="circle",f=n.cluster&&n.cluster.enabled,h=Xz("fill"),d=Xz("line"),v=Xz("circle"),_=Xz("symbol"),b={fill:h,line:d,circle:v,symbol:_};if(!i)return b;var p;if((a||o)&&(p=Zz.calcTraceToLineCoords(r)),a&&(h.geojson=Zz.makePolygon(p),h.layout.visibility="visible",av.extendFlat(h.paint,{"fill-color":n.fillcolor})),o&&(d.geojson=Zz.makeLine(p),d.layout.visibility="visible",av.extendFlat(d.paint,{"line-width":n.line.width,"line-color":n.line.color,"line-opacity":n.opacity})),u){var k=Tjt(r);v.geojson=k.geojson,v.layout.visibility="visible",f&&(v.filter=["!",["has","point_count"]],b.cluster={type:"circle",filter:["has","point_count"],layout:{visibility:"visible"},paint:{"circle-color":aJ(n.cluster.color,n.cluster.step),"circle-radius":aJ(n.cluster.size,n.cluster.step),"circle-opacity":aJ(n.cluster.opacity,n.cluster.step)}},b.clusterCount={type:"symbol",filter:["has","point_count"],paint:{},layout:{"text-field":"{point_count_abbreviated}","text-font":WVe(n),"text-size":12}}),av.extendFlat(v.paint,{"circle-color":k.mcc,"circle-radius":k.mrc,"circle-opacity":k.mo})}if(u&&f&&(v.filter=["!",["has","point_count"]]),(c||l)&&(_.geojson=Ajt(r,t),av.extendFlat(_.layout,{visibility:"visible","icon-image":"{symbol}-15","text-field":"{text}"}),c&&(av.extendFlat(_.layout,{"icon-size":n.marker.size/10}),"angle"in n.marker&&n.marker.angle!=="auto"&&av.extendFlat(_.layout,{"icon-rotate":{type:"identity",property:"angle"},"icon-rotation-alignment":"map"}),_.layout["icon-allow-overlap"]=n.marker.allowoverlap,av.extendFlat(_.paint,{"icon-opacity":n.opacity*n.marker.opacity,"icon-color":n.marker.color})),l)){var E=(n.marker||{}).size,S=_jt(n.textposition,E);av.extendFlat(_.layout,{"text-size":n.textfont.size,"text-anchor":S.anchor,"text-offset":S.offset,"text-font":WVe(n)}),av.extendFlat(_.paint,{"text-color":n.textfont.color,"text-opacity":n.opacity})}return b};function Xz(e){return{type:e,geojson:Zz.makeBlank(),layout:{visibility:"none"},filter:null,paint:{}}}function Tjt(e){var t=e[0].trace,r=t.marker,n=t.selectedpoints,i=av.isArrayOrTypedArray(r.color),a=av.isArrayOrTypedArray(r.size),o=av.isArrayOrTypedArray(r.opacity),s;function l(E){return t.opacity*E}function u(E){return E/2}var c;i&&(jVe.hasColorscale(t,"marker")?c=jVe.makeColorScaleFuncFromTrace(r):c=av.identity);var f;a&&(f=mjt(t));var h;o&&(h=function(E){var S=XVe(E)?+av.constrain(E,0,1):0;return l(S)});var d=[];for(s=0;s<e.length;s++){var v=e[s],_=v.lonlat;if(!ZVe(_)){var b={};c&&(b.mcc=v.mcc=c(v.mc)),f&&(b.mrc=v.mrc=f(v.ms)),h&&(b.mo=h(v.mo)),n&&(b.selected=v.selected||0),d.push({type:"Feature",id:s+1,geometry:{type:"Point",coordinates:_},properties:b})}}var p;if(n)for(p=gjt.makeSelectedPointStyleFns(t),s=0;s<d.length;s++){var k=d[s].properties;p.selectedOpacityFn&&(k.mo=l(p.selectedOpacityFn(k))),p.selectedColorFn&&(k.mcc=p.selectedColorFn(k)),p.selectedSizeFn&&(k.mrc=p.selectedSizeFn(k))}return{geojson:{type:"FeatureCollection",features:d},mcc:i||p&&p.selectedColorFn?{type:"identity",property:"mcc"}:r.color,mrc:a||p&&p.selectedSizeFn?{type:"identity",property:"mrc"}:u(r.size),mo:o||p&&p.selectedOpacityFn?{type:"identity",property:"mo"}:l(r.opacity)}}function Ajt(e,t){for(var r=t._fullLayout,n=e[0].trace,i=n.marker||{},a=i.symbol,o=i.angle,s=a!=="circle"?nJ(a):Kz,l=o!=="auto"?nJ(o,!0):Kz,u=Yz.hasText(n)?nJ(n.text):Kz,c=[],f=0;f<e.length;f++){var h=e[f];if(!ZVe(h.lonlat)){var d=n.texttemplate,v;if(d){var _=Array.isArray(d)?d[f]||"":d,b=n._module.formatLabels(h,n,r),p={};xjt(p,n,h.i),v=av.texttemplateString({data:[p,h,n._meta],fallback:n.texttemplatefallback,labels:b,locale:r._d3locale,template:_})}else v=u(f);v&&(v=v.replace(bjt,"").replace(wjt,`
+`)),c.push({type:"Feature",geometry:{type:"Point",coordinates:h.lonlat},properties:{symbol:s(f),angle:l(f),text:v}})}}return{type:"FeatureCollection",features:c}}function nJ(e,t){return av.isArrayOrTypedArray(e)?t?function(r){return XVe(e[r])?+e[r]:0}:function(r){return e[r]}:e?function(){return e}:Kz}function Kz(){return""}function ZVe(e){return e[0]===pjt}function aJ(e,t){var r;if(av.isArrayOrTypedArray(e)&&av.isArrayOrTypedArray(t)){r=["step",["get","point_count"],e[0]];for(var n=1;n<e.length;n++)r.push(t[n-1],e[n])}else r=e;return r}function WVe(e){var t=e.textfont,r=t.family,n=t.style,i=t.weight,a=r.split(" "),o=a[a.length-1]==="Italic";o&&a.pop(),o=o||n==="italic";var s=a.join(" ");i==="bold"&&a.indexOf("Bold")===-1?s+=" Bold":i<=1e3&&(a[0]==="Metropolis"?(s="Metropolis",i>850?s+=" Black":i>750?s+=" Extra Bold":i>650?s+=" Bold":i>550?s+=" Semi Bold":i>450?s+=" Medium":i>350?s+=" Regular":i>250?s+=" Light":i>150?s+=" Extra Light":s+=" Thin"):a.slice(0,2).join(" ")==="Open Sans"?(s="Open Sans",i>750?s+=" Extrabold":i>650?s+=" Bold":i>550?s+=" Semibold":i>350?s+=" Regular":s+=" Light"):a.slice(0,3).join(" ")==="Klokantech Noto Sans"&&(s="Klokantech Noto Sans",a[3]==="CJK"&&(s+=" CJK"),s+=i>500?" Bold":" Regular")),o&&(s+=" Italic"),s==="Open Sans Regular Italic"?s="Open Sans Italic":s==="Open Sans Regular Bold"?s="Open Sans Bold":s==="Open Sans Regular Bold Italic"?s="Open Sans Bold Italic":s==="Klokantech Noto Sans Regular Italic"&&(s="Klokantech Noto Sans Italic"),yjt(s)||(s=r);var l=s.split(", ");return l}});var eGe=ye((pxr,QVe)=>{"use strict";var Sjt=Dr(),JVe=KVe(),S5=c1().traceLayerPrefix,rg={cluster:["cluster","clusterCount","circle"],nonCluster:["fill","line","circle","symbol"]};function $Ve(e,t,r,n){this.type="scattermapbox",this.subplot=e,this.uid=t,this.clusterEnabled=r,this.isHidden=n,this.sourceIds={fill:"source-"+t+"-fill",line:"source-"+t+"-line",circle:"source-"+t+"-circle",symbol:"source-"+t+"-symbol",cluster:"source-"+t+"-circle",clusterCount:"source-"+t+"-circle"},this.layerIds={fill:S5+t+"-fill",line:S5+t+"-line",circle:S5+t+"-circle",symbol:S5+t+"-symbol",cluster:S5+t+"-cluster",clusterCount:S5+t+"-cluster-count"},this.below=null}var Gk=$Ve.prototype;Gk.addSource=function(e,t,r){var n={type:"geojson",data:t.geojson};r&&r.enabled&&Sjt.extendFlat(n,{cluster:!0,clusterMaxZoom:r.maxzoom});var i=this.subplot.map.getSource(this.sourceIds[e]);i?i.setData(t.geojson):this.subplot.map.addSource(this.sourceIds[e],n)};Gk.setSourceData=function(e,t){this.subplot.map.getSource(this.sourceIds[e]).setData(t.geojson)};Gk.addLayer=function(e,t,r){var n={type:t.type,id:this.layerIds[e],source:this.sourceIds[e],layout:t.layout,paint:t.paint};t.filter&&(n.filter=t.filter);for(var i=this.layerIds[e],a,o=this.subplot.getMapLayers(),s=0;s<o.length;s++)if(o[s].id===i){a=!0;break}a?(this.subplot.setOptions(i,"setLayoutProperty",n.layout),n.layout.visibility==="visible"&&this.subplot.setOptions(i,"setPaintProperty",n.paint)):this.subplot.addLayer(n,r)};Gk.update=function(t){var r=t[0].trace,n=this.subplot,i=n.map,a=JVe(n.gd,t),o=n.belowLookup["trace-"+this.uid],s=!!(r.cluster&&r.cluster.enabled),l=!!this.clusterEnabled,u=this;function c(E){E||u.addSource("circle",a.circle,r.cluster);for(var S=rg.cluster,L=0;L<S.length;L++){var x=S[L],C=a[x];u.addLayer(x,C,o)}}function f(E){for(var S=rg.cluster,L=S.length-1;L>=0;L--){var x=S[L];i.removeLayer(u.layerIds[x])}E||i.removeSource(u.sourceIds.circle)}function h(E){for(var S=rg.nonCluster,L=0;L<S.length;L++){var x=S[L],C=a[x];E||u.addSource(x,C),u.addLayer(x,C,o)}}function d(E){for(var S=rg.nonCluster,L=S.length-1;L>=0;L--){var x=S[L];i.removeLayer(u.layerIds[x]),E||i.removeSource(u.sourceIds[x])}}function v(E){l?f(E):d(E)}function _(E){s?c(E):h(E)}function b(){for(var E=s?rg.cluster:rg.nonCluster,S=0;S<E.length;S++){var L=E[S],x=a[L];x&&(n.setOptions(u.layerIds[L],"setLayoutProperty",x.layout),x.layout.visibility==="visible"&&(L!=="cluster"&&u.setSourceData(L,x),n.setOptions(u.layerIds[L],"setPaintProperty",x.paint)))}}var p=this.isHidden,k=r.visible!==!0;k?p||v():p?k||_():l!==s?(v(),_()):(this.below!==o&&(v(!0),_(!0)),b()),this.clusterEnabled=s,this.isHidden=k,this.below=o,t[0].trace._glTrace=this};Gk.dispose=function(){for(var t=this.subplot.map,r=this.clusterEnabled?rg.cluster:rg.nonCluster,n=r.length-1;n>=0;n--){var i=r[n];t.removeLayer(this.layerIds[i]),t.removeSource(this.sourceIds[i])}};QVe.exports=function(t,r){var n=r[0].trace,i=n.cluster&&n.cluster.enabled,a=n.visible!==!0,o=new $Ve(t,n.uid,i,a),s=JVe(t.gd,r),l=o.below=t.belowLookup["trace-"+n.uid],u,c,f;if(i)for(o.addSource("circle",s.circle,n.cluster),u=0;u<rg.cluster.length;u++)c=rg.cluster[u],f=s[c],o.addLayer(c,f,l);else for(u=0;u<rg.nonCluster.length;u++)c=rg.nonCluster[u],f=s[c],o.addSource(c,f,n.cluster),o.addLayer(c,f,l);return r[0].trace._glTrace=o,o}});var Jz=ye((gxr,rGe)=>{"use strict";var Mjt=vf(),oJ=Dr(),Ejt=cT(),kjt=oJ.fillText,Cjt=fs().BADNUM,Ljt=c1().traceLayerPrefix;function Pjt(e,t,r){var n=e.cd,i=n[0].trace,a=e.xa,o=e.ya,s=e.subplot,l=[],u=Ljt+i.uid+"-circle",c=i.cluster&&i.cluster.enabled;if(c){var f=s.map.queryRenderedFeatures(null,{layers:[u]});l=f.map(function(M){return M.id})}var h=t>=0?Math.floor((t+180)/360):Math.ceil((t-180)/360),d=h*360,v=t-d;function _(M){var g=M.lonlat;if(g[0]===Cjt||c&&l.indexOf(M.i+1)===-1)return 1/0;var P=oJ.modHalf(g[0],360),T=g[1],z=s.project([P,T]),O=z.x-a.c2p([v,T]),V=z.y-o.c2p([P,r]),G=Math.max(3,M.mrc||0);return Math.max(Math.sqrt(O*O+V*V)-G,1-3/G)}if(Mjt.getClosest(n,_,e),e.index!==!1){var b=n[e.index],p=b.lonlat,k=[oJ.modHalf(p[0],360)+d,p[1]],E=a.c2p(k),S=o.c2p(k),L=b.mrc||1;e.x0=E-L,e.x1=E+L,e.y0=S-L,e.y1=S+L;var x={};x[i.subplot]={_subplot:s};var C=i._module.formatLabels(b,i,x);return e.lonLabel=C.lonLabel,e.latLabel=C.latLabel,e.color=Ejt(i,b),e.extraText=tGe(i,b,n[0].t.labels),e.hovertemplate=i.hovertemplate,[e]}}function tGe(e,t,r){if(e.hovertemplate)return;var n=t.hi||e.hoverinfo,i=n.split("+"),a=i.indexOf("all")!==-1,o=i.indexOf("lon")!==-1,s=i.indexOf("lat")!==-1,l=t.lonlat,u=[];function c(f){return f+"\xB0"}return a||o&&s?u.push("("+c(l[1])+", "+c(l[0])+")"):o?u.push(r.lon+c(l[0])):s&&u.push(r.lat+c(l[1])),(a||i.indexOf("text")!==-1)&&kjt(t,e,u),u.join("<br>")}rGe.exports={hoverPoints:Pjt,getExtraText:tGe}});var nGe=ye((mxr,iGe)=>{"use strict";iGe.exports=function(t,r){return t.lon=r.lon,t.lat=r.lat,t}});var oGe=ye((yxr,aGe)=>{"use strict";var Ijt=Dr(),Rjt=Ru(),Djt=fs().BADNUM;aGe.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s=n[0].trace,l;if(!Rjt.hasMarkers(s))return[];if(r===!1)for(l=0;l<n.length;l++)n[l].selected=0;else for(l=0;l<n.length;l++){var u=n[l],c=u.lonlat;if(c[0]!==Djt){var f=[Ijt.modHalf(c[0],360),c[1]],h=[i.c2p(f),a.c2p(f)];r.contains(h,null,l,t)?(o.push({pointNumber:l,lon:c[0],lat:c[1]}),u.selected=1):u.selected=0}}return o}});var uJ=ye((sJ,lJ)=>{(function(e,t){typeof sJ=="object"&&typeof lJ!="undefined"?lJ.exports=t():(e=e||self,e.mapboxgl=t())})(sJ,function(){"use strict";var e,t,r;function n(i,a){if(!e)e=a;else if(!t)t=a;else{var o="var sharedChunk = {}; ("+e+")(sharedChunk); ("+t+")(sharedChunk);",s={};e(s),r=a(s),typeof window!="undefined"&&(r.workerUrl=window.URL.createObjectURL(new Blob([o],{type:"text/javascript"})))}}return n(["exports"],function(i){"use strict";function a(m,y){return y={exports:{}},m(y,y.exports),y.exports}var o="1.13.4",s=l;function l(m,y,I,U){this.cx=3*m,this.bx=3*(I-m)-this.cx,this.ax=1-this.cx-this.bx,this.cy=3*y,this.by=3*(U-y)-this.cy,this.ay=1-this.cy-this.by,this.p1x=m,this.p1y=U,this.p2x=I,this.p2y=U}l.prototype.sampleCurveX=function(m){return((this.ax*m+this.bx)*m+this.cx)*m},l.prototype.sampleCurveY=function(m){return((this.ay*m+this.by)*m+this.cy)*m},l.prototype.sampleCurveDerivativeX=function(m){return(3*this.ax*m+2*this.bx)*m+this.cx},l.prototype.solveCurveX=function(m,y){typeof y=="undefined"&&(y=1e-6);var I,U,$,ae,he;for($=m,he=0;he<8;he++){if(ae=this.sampleCurveX($)-m,Math.abs(ae)<y)return $;var Oe=this.sampleCurveDerivativeX($);if(Math.abs(Oe)<1e-6)break;$=$-ae/Oe}if(I=0,U=1,$=m,$<I)return I;if($>U)return U;for(;I<U;){if(ae=this.sampleCurveX($),Math.abs(ae-m)<y)return $;m>ae?I=$:U=$,$=(U-I)*.5+I}return $},l.prototype.solve=function(m,y){return this.sampleCurveY(this.solveCurveX(m,y))};var u=c;function c(m,y){this.x=m,this.y=y}c.prototype={clone:function(){return new c(this.x,this.y)},add:function(m){return this.clone()._add(m)},sub:function(m){return this.clone()._sub(m)},multByPoint:function(m){return this.clone()._multByPoint(m)},divByPoint:function(m){return this.clone()._divByPoint(m)},mult:function(m){return this.clone()._mult(m)},div:function(m){return this.clone()._div(m)},rotate:function(m){return this.clone()._rotate(m)},rotateAround:function(m,y){return this.clone()._rotateAround(m,y)},matMult:function(m){return this.clone()._matMult(m)},unit:function(){return this.clone()._unit()},perp:function(){return this.clone()._perp()},round:function(){return this.clone()._round()},mag:function(){return Math.sqrt(this.x*this.x+this.y*this.y)},equals:function(m){return this.x===m.x&&this.y===m.y},dist:function(m){return Math.sqrt(this.distSqr(m))},distSqr:function(m){var y=m.x-this.x,I=m.y-this.y;return y*y+I*I},angle:function(){return Math.atan2(this.y,this.x)},angleTo:function(m){return Math.atan2(this.y-m.y,this.x-m.x)},angleWith:function(m){return this.angleWithSep(m.x,m.y)},angleWithSep:function(m,y){return Math.atan2(this.x*y-this.y*m,this.x*m+this.y*y)},_matMult:function(m){var y=m[0]*this.x+m[1]*this.y,I=m[2]*this.x+m[3]*this.y;return this.x=y,this.y=I,this},_add:function(m){return this.x+=m.x,this.y+=m.y,this},_sub:function(m){return this.x-=m.x,this.y-=m.y,this},_mult:function(m){return this.x*=m,this.y*=m,this},_div:function(m){return this.x/=m,this.y/=m,this},_multByPoint:function(m){return this.x*=m.x,this.y*=m.y,this},_divByPoint:function(m){return this.x/=m.x,this.y/=m.y,this},_unit:function(){return this._div(this.mag()),this},_perp:function(){var m=this.y;return this.y=this.x,this.x=-m,this},_rotate:function(m){var y=Math.cos(m),I=Math.sin(m),U=y*this.x-I*this.y,$=I*this.x+y*this.y;return this.x=U,this.y=$,this},_rotateAround:function(m,y){var I=Math.cos(m),U=Math.sin(m),$=y.x+I*(this.x-y.x)-U*(this.y-y.y),ae=y.y+U*(this.x-y.x)+I*(this.y-y.y);return this.x=$,this.y=ae,this},_round:function(){return this.x=Math.round(this.x),this.y=Math.round(this.y),this}},c.convert=function(m){return m instanceof c?m:Array.isArray(m)?new c(m[0],m[1]):m};var f=typeof self!="undefined"?self:{};function h(m,y){if(Array.isArray(m)){if(!Array.isArray(y)||m.length!==y.length)return!1;for(var I=0;I<m.length;I++)if(!h(m[I],y[I]))return!1;return!0}if(typeof m=="object"&&m!==null&&y!==null){if(typeof y!="object")return!1;var U=Object.keys(m);if(U.length!==Object.keys(y).length)return!1;for(var $ in m)if(!h(m[$],y[$]))return!1;return!0}return m===y}var d=Math.pow(2,53)-1;function v(m){if(m<=0)return 0;if(m>=1)return 1;var y=m*m,I=y*m;return 4*(m<.5?I:3*(m-y)+I-.75)}function _(m,y,I,U){var $=new s(m,y,I,U);return function(ae){return $.solve(ae)}}var b=_(.25,.1,.25,1);function p(m,y,I){return Math.min(I,Math.max(y,m))}function k(m,y,I){var U=I-y,$=((m-y)%U+U)%U+y;return $===y?I:$}function E(m,y,I){if(!m.length)return I(null,[]);var U=m.length,$=new Array(m.length),ae=null;m.forEach(function(he,Oe){y(he,function(rt,gt){rt&&(ae=rt),$[Oe]=gt,--U===0&&I(ae,$)})})}function S(m){var y=[];for(var I in m)y.push(m[I]);return y}function L(m,y){var I=[];for(var U in m)U in y||I.push(U);return I}function x(m){for(var y=[],I=arguments.length-1;I-- >0;)y[I]=arguments[I+1];for(var U=0,$=y;U<$.length;U+=1){var ae=$[U];for(var he in ae)m[he]=ae[he]}return m}function C(m,y){for(var I={},U=0;U<y.length;U++){var $=y[U];$ in m&&(I[$]=m[$])}return I}var M=1;function g(){return M++}function P(){function m(y){return y?(y^Math.random()*16>>y/4).toString(16):([1e7]+-[1e3]+-4e3+-8e3+-1e11).replace(/[018]/g,m)}return m()}function T(m){return m<=1?1:Math.pow(2,Math.ceil(Math.log(m)/Math.LN2))}function z(m){return m?/^[0-9a-f]{8}-[0-9a-f]{4}-[4][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.test(m):!1}function O(m,y){m.forEach(function(I){y[I]&&(y[I]=y[I].bind(y))})}function V(m,y){return m.indexOf(y,m.length-y.length)!==-1}function G(m,y,I){var U={};for(var $ in m)U[$]=y.call(I||this,m[$],$,m);return U}function Z(m,y,I){var U={};for(var $ in m)y.call(I||this,m[$],$,m)&&(U[$]=m[$]);return U}function j(m){return Array.isArray(m)?m.map(j):typeof m=="object"&&m?G(m,j):m}function N(m,y){for(var I=0;I<m.length;I++)if(y.indexOf(m[I])>=0)return!0;return!1}var H={};function te(m){H[m]||(typeof console!="undefined"&&console.warn(m),H[m]=!0)}function oe(m,y,I){return(I.y-m.y)*(y.x-m.x)>(y.y-m.y)*(I.x-m.x)}function _e(m){for(var y=0,I=0,U=m.length,$=U-1,ae=void 0,he=void 0;I<U;$=I++)ae=m[I],he=m[$],y+=(he.x-ae.x)*(ae.y+he.y);return y}function Ee(m){var y=m[0],I=m[1],U=m[2];return I+=90,I*=Math.PI/180,U*=Math.PI/180,{x:y*Math.cos(I)*Math.sin(U),y:y*Math.sin(I)*Math.sin(U),z:y*Math.cos(U)}}function Ce(){return typeof WorkerGlobalScope!="undefined"&&typeof self!="undefined"&&self instanceof WorkerGlobalScope}function me(m){var y=/(?:^|(?:\s*\,\s*))([^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+)(?:\=(?:([^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+)|(?:\"((?:[^"\\]|\\.)*)\")))?/g,I={};if(m.replace(y,function($,ae,he,Oe){var rt=he||Oe;return I[ae]=rt?rt.toLowerCase():!0,""}),I["max-age"]){var U=parseInt(I["max-age"],10);isNaN(U)?delete I["max-age"]:I["max-age"]=U}return I}var ie=null;function Se(m){if(ie==null){var y=m.navigator?m.navigator.userAgent:null;ie=!!m.safari||!!(y&&(/\b(iPad|iPhone|iPod)\b/.test(y)||y.match("Safari")&&!y.match("Chrome")))}return ie}function Le(m){try{var y=f[m];return y.setItem("_mapbox_test_",1),y.removeItem("_mapbox_test_"),!0}catch(I){return!1}}function Ae(m){return f.btoa(encodeURIComponent(m).replace(/%([0-9A-F]{2})/g,function(y,I){return String.fromCharCode(+("0x"+I))}))}function Fe(m){return decodeURIComponent(f.atob(m).split("").map(function(y){return"%"+("00"+y.charCodeAt(0).toString(16)).slice(-2)}).join(""))}var Pe=f.performance&&f.performance.now?f.performance.now.bind(f.performance):Date.now.bind(Date),ge=f.requestAnimationFrame||f.mozRequestAnimationFrame||f.webkitRequestAnimationFrame||f.msRequestAnimationFrame,Re=f.cancelAnimationFrame||f.mozCancelAnimationFrame||f.webkitCancelAnimationFrame||f.msCancelAnimationFrame,ce,Ze,ut={now:Pe,frame:function(y){var I=ge(y);return{cancel:function(){return Re(I)}}},getImageData:function(y,I){I===void 0&&(I=0);var U=f.document.createElement("canvas"),$=U.getContext("2d");if(!$)throw new Error("failed to create canvas 2d context");return U.width=y.width,U.height=y.height,$.drawImage(y,0,0,y.width,y.height),$.getImageData(-I,-I,y.width+2*I,y.height+2*I)},resolveURL:function(y){return ce||(ce=f.document.createElement("a")),ce.href=y,ce.href},hardwareConcurrency:f.navigator&&f.navigator.hardwareConcurrency||4,get devicePixelRatio(){return f.devicePixelRatio},get prefersReducedMotion(){return f.matchMedia?(Ze==null&&(Ze=f.matchMedia("(prefers-reduced-motion: reduce)")),Ze.matches):!1}},pt={API_URL:"https://api.mapbox.com",get EVENTS_URL(){return this.API_URL?this.API_URL.indexOf("https://api.mapbox.cn")===0?"https://events.mapbox.cn/events/v2":this.API_URL.indexOf("https://api.mapbox.com")===0?"https://events.mapbox.com/events/v2":null:null},FEEDBACK_URL:"https://apps.mapbox.com/feedback",REQUIRE_ACCESS_TOKEN:!0,ACCESS_TOKEN:null,MAX_PARALLEL_IMAGE_REQUESTS:16},Zt={supported:!1,testSupport:Jt},st,lt=!1,Gt,Nt=!1;f.document&&(Gt=f.document.createElement("img"),Gt.onload=function(){st&&sr(st),st=null,Nt=!0},Gt.onerror=function(){lt=!0,st=null},Gt.src="data:image/webp;base64,UklGRh4AAABXRUJQVlA4TBEAAAAvAQAAAAfQ//73v/+BiOh/AAA=");function Jt(m){lt||!Gt||(Nt?sr(m):st=m)}function sr(m){var y=m.createTexture();m.bindTexture(m.TEXTURE_2D,y);try{if(m.texImage2D(m.TEXTURE_2D,0,m.RGBA,m.RGBA,m.UNSIGNED_BYTE,Gt),m.isContextLost())return;Zt.supported=!0}catch(I){}m.deleteTexture(y),lt=!0}var wr="01";function cr(){for(var m="1",y="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",I="",U=0;U<10;U++)I+=y[Math.floor(Math.random()*62)];var $=12*60*60*1e3,ae=[m,wr,I].join(""),he=Date.now()+$;return{token:ae,tokenExpiresAt:he}}var $e=function(y,I){this._transformRequestFn=y,this._customAccessToken=I,this._createSkuToken()};$e.prototype._createSkuToken=function(){var y=cr();this._skuToken=y.token,this._skuTokenExpiresAt=y.tokenExpiresAt},$e.prototype._isSkuTokenExpired=function(){return Date.now()>this._skuTokenExpiresAt},$e.prototype.transformRequest=function(y,I){return this._transformRequestFn?this._transformRequestFn(y,I)||{url:y}:{url:y}},$e.prototype.normalizeStyleURL=function(y,I){if(!St(y))return y;var U=er(y);return U.path="/styles/v1"+U.path,this._makeAPIURL(U,this._customAccessToken||I)},$e.prototype.normalizeGlyphsURL=function(y,I){if(!St(y))return y;var U=er(y);return U.path="/fonts/v1"+U.path,this._makeAPIURL(U,this._customAccessToken||I)},$e.prototype.normalizeSourceURL=function(y,I){if(!St(y))return y;var U=er(y);return U.path="/v4/"+U.authority+".json",U.params.push("secure"),this._makeAPIURL(U,this._customAccessToken||I)},$e.prototype.normalizeSpriteURL=function(y,I,U,$){var ae=er(y);return St(y)?(ae.path="/styles/v1"+ae.path+"/sprite"+I+U,this._makeAPIURL(ae,this._customAccessToken||$)):(ae.path+=""+I+U,lr(ae))},$e.prototype.normalizeTileURL=function(y,I){if(this._isSkuTokenExpired()&&this._createSkuToken(),y&&!St(y))return y;var U=er(y),$=/(\.(png|jpg)\d*)(?=$)/,ae=/^.+\/v4\//,he=ut.devicePixelRatio>=2||I===512?"@2x":"",Oe=Zt.supported?".webp":"$1";U.path=U.path.replace($,""+he+Oe),U.path=U.path.replace(ae,"/"),U.path="/v4"+U.path;var rt=this._customAccessToken||It(U.params)||pt.ACCESS_TOKEN;return pt.REQUIRE_ACCESS_TOKEN&&rt&&this._skuToken&&U.params.push("sku="+this._skuToken),this._makeAPIURL(U,rt)},$e.prototype.canonicalizeTileURL=function(y,I){var U="/v4/",$=/\.[\w]+$/,ae=er(y);if(!ae.path.match(/(^\/v4\/)/)||!ae.path.match($))return y;var he="mapbox://tiles/";he+=ae.path.replace(U,"");var Oe=ae.params;return I&&(Oe=Oe.filter(function(rt){return!rt.match(/^access_token=/)})),Oe.length&&(he+="?"+Oe.join("&")),he},$e.prototype.canonicalizeTileset=function(y,I){for(var U=I?St(I):!1,$=[],ae=0,he=y.tiles||[];ae<he.length;ae+=1){var Oe=he[ae];Vt(Oe)?$.push(this.canonicalizeTileURL(Oe,U)):$.push(Oe)}return $},$e.prototype._makeAPIURL=function(y,I){var U="See https://www.mapbox.com/api-documentation/#access-tokens-and-token-scopes",$=er(pt.API_URL);if(y.protocol=$.protocol,y.authority=$.authority,y.protocol==="http"){var ae=y.params.indexOf("secure");ae>=0&&y.params.splice(ae,1)}if($.path!=="/"&&(y.path=""+$.path+y.path),!pt.REQUIRE_ACCESS_TOKEN)return lr(y);if(I=I||pt.ACCESS_TOKEN,!I)throw new Error("An API access token is required to use Mapbox GL. "+U);if(I[0]==="s")throw new Error("Use a public access token (pk.*) with Mapbox GL, not a secret access token (sk.*). "+U);return y.params=y.params.filter(function(he){return he.indexOf("access_token")===-1}),y.params.push("access_token="+I),lr(y)};function St(m){return m.indexOf("mapbox:")===0}var Qt=/^((https?:)?\/\/)?([^\/]+\.)?mapbox\.c(n|om)(\/|\?|$)/i;function Vt(m){return Qt.test(m)}function _t(m){return m.indexOf("sku=")>0&&Vt(m)}function It(m){for(var y=0,I=m;y<I.length;y+=1){var U=I[y],$=U.match(/^access_token=(.*)$/);if($)return $[1]}return null}var mt=/^(\w+):\/\/([^/?]*)(\/[^?]+)?\??(.+)?/;function er(m){var y=m.match(mt);if(!y)throw new Error("Unable to parse URL object");return{protocol:y[1],authority:y[2],path:y[3]||"/",params:y[4]?y[4].split("&"):[]}}function lr(m){var y=m.params.length?"?"+m.params.join("&"):"";return m.protocol+"://"+m.authority+m.path+y}var Tr="mapbox.eventData";function Lr(m){if(!m)return null;var y=m.split(".");if(!y||y.length!==3)return null;try{var I=JSON.parse(Fe(y[1]));return I}catch(U){return null}}var ti=function(y){this.type=y,this.anonId=null,this.eventData={},this.queue=[],this.pendingRequest=null};ti.prototype.getStorageKey=function(y){var I=Lr(pt.ACCESS_TOKEN),U="";return I&&I.u?U=Ae(I.u):U=pt.ACCESS_TOKEN||"",y?Tr+"."+y+":"+U:Tr+":"+U},ti.prototype.fetchEventData=function(){var y=Le("localStorage"),I=this.getStorageKey(),U=this.getStorageKey("uuid");if(y)try{var $=f.localStorage.getItem(I);$&&(this.eventData=JSON.parse($));var ae=f.localStorage.getItem(U);ae&&(this.anonId=ae)}catch(he){te("Unable to read from LocalStorage")}},ti.prototype.saveEventData=function(){var y=Le("localStorage"),I=this.getStorageKey(),U=this.getStorageKey("uuid");if(y)try{f.localStorage.setItem(U,this.anonId),Object.keys(this.eventData).length>=1&&f.localStorage.setItem(I,JSON.stringify(this.eventData))}catch($){te("Unable to write to LocalStorage")}},ti.prototype.processRequests=function(y){},ti.prototype.postEvent=function(y,I,U,$){var ae=this;if(pt.EVENTS_URL){var he=er(pt.EVENTS_URL);he.params.push("access_token="+($||pt.ACCESS_TOKEN||""));var Oe={event:this.type,created:new Date(y).toISOString(),sdkIdentifier:"mapbox-gl-js",sdkVersion:o,skuId:wr,userId:this.anonId},rt=I?x(Oe,I):Oe,gt={url:lr(he),headers:{"Content-Type":"text/plain"},body:JSON.stringify([rt])};this.pendingRequest=Qr(gt,function(Et){ae.pendingRequest=null,U(Et),ae.saveEventData(),ae.processRequests($)})}},ti.prototype.queueRequest=function(y,I){this.queue.push(y),this.processRequests(I)};var Br=function(m){function y(){m.call(this,"map.load"),this.success={},this.skuToken=""}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.postMapLoadEvent=function(U,$,ae,he){this.skuToken=ae,(pt.EVENTS_URL&&he||pt.ACCESS_TOKEN&&Array.isArray(U)&&U.some(function(Oe){return St(Oe)||Vt(Oe)}))&&this.queueRequest({id:$,timestamp:Date.now()},he)},y.prototype.processRequests=function(U){var $=this;if(!(this.pendingRequest||this.queue.length===0)){var ae=this.queue.shift(),he=ae.id,Oe=ae.timestamp;he&&this.success[he]||(this.anonId||this.fetchEventData(),z(this.anonId)||(this.anonId=P()),this.postEvent(Oe,{skuToken:this.skuToken},function(rt){rt||he&&($.success[he]=!0)},U))}},y}(ti),Vr=function(m){function y(I){m.call(this,"appUserTurnstile"),this._customAccessToken=I}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.postTurnstileEvent=function(U,$){pt.EVENTS_URL&&pt.ACCESS_TOKEN&&Array.isArray(U)&&U.some(function(ae){return St(ae)||Vt(ae)})&&this.queueRequest(Date.now(),$)},y.prototype.processRequests=function(U){var $=this;if(!(this.pendingRequest||this.queue.length===0)){(!this.anonId||!this.eventData.lastSuccess||!this.eventData.tokenU)&&this.fetchEventData();var ae=Lr(pt.ACCESS_TOKEN),he=ae?ae.u:pt.ACCESS_TOKEN,Oe=he!==this.eventData.tokenU;z(this.anonId)||(this.anonId=P(),Oe=!0);var rt=this.queue.shift();if(this.eventData.lastSuccess){var gt=new Date(this.eventData.lastSuccess),Et=new Date(rt),or=(rt-this.eventData.lastSuccess)/(24*60*60*1e3);Oe=Oe||or>=1||or<-1||gt.getDate()!==Et.getDate()}else Oe=!0;if(!Oe)return this.processRequests();this.postEvent(rt,{"enabled.telemetry":!1},function(_r){_r||($.eventData.lastSuccess=rt,$.eventData.tokenU=he)},U)}},y}(ti),dt=new Vr,Ge=dt.postTurnstileEvent.bind(dt),Je=new Br,je=Je.postMapLoadEvent.bind(Je),tt="mapbox-tiles",xt=500,Ie=50,xe=1e3*60*7,ke;function vt(){f.caches&&!ke&&(ke=f.caches.open(tt))}var ir;function ar(m,y){if(ir===void 0)try{new Response(new ReadableStream),ir=!0}catch(I){ir=!1}ir?y(m.body):m.blob().then(y)}function vr(m,y,I){if(vt(),!!ke){var U={status:y.status,statusText:y.statusText,headers:new f.Headers};y.headers.forEach(function(he,Oe){return U.headers.set(Oe,he)});var $=me(y.headers.get("Cache-Control")||"");if(!$["no-store"]){$["max-age"]&&U.headers.set("Expires",new Date(I+$["max-age"]*1e3).toUTCString());var ae=new Date(U.headers.get("Expires")).getTime()-I;ae<xe||ar(y,function(he){var Oe=new f.Response(he,U);vt(),ke&&ke.then(function(rt){return rt.put(ii(m.url),Oe)}).catch(function(rt){return te(rt.message)})})}}}function ii(m){var y=m.indexOf("?");return y<0?m:m.slice(0,y)}function pi(m,y){if(vt(),!ke)return y(null);var I=ii(m.url);ke.then(function(U){U.match(I).then(function($){var ae=$r($);U.delete(I),ae&&U.put(I,$.clone()),y(null,$,ae)}).catch(y)}).catch(y)}function $r(m){if(!m)return!1;var y=new Date(m.headers.get("Expires")||0),I=me(m.headers.get("Cache-Control")||"");return y>Date.now()&&!I["no-cache"]}var di=1/0;function ji(m){di++,di>Ie&&(m.getActor().send("enforceCacheSizeLimit",xt),di=0)}function In(m){vt(),ke&&ke.then(function(y){y.keys().then(function(I){for(var U=0;U<I.length-m;U++)y.delete(I[U])})})}function wi(m){var y=f.caches.delete(tt);m&&y.catch(m).then(function(){return m()})}function On(m,y){xt=m,Ie=y}var qn;function Fn(){return qn==null&&(qn=f.OffscreenCanvas&&new f.OffscreenCanvas(1,1).getContext("2d")&&typeof f.createImageBitmap=="function"),qn}var ra={Unknown:"Unknown",Style:"Style",Source:"Source",Tile:"Tile",Glyphs:"Glyphs",SpriteImage:"SpriteImage",SpriteJSON:"SpriteJSON",Image:"Image"};typeof Object.freeze=="function"&&Object.freeze(ra);var la=function(m){function y(I,U,$){U===401&&Vt($)&&(I+=": you may have provided an invalid Mapbox access token. See https://www.mapbox.com/api-documentation/#access-tokens-and-token-scopes"),m.call(this,I),this.status=U,this.url=$,this.name=this.constructor.name,this.message=I}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.toString=function(){return this.name+": "+this.message+" ("+this.status+"): "+this.url},y}(Error),Ut=Ce()?function(){return self.worker&&self.worker.referrer}:function(){return(f.location.protocol==="blob:"?f.parent:f).location.href},wt=function(m){return/^file:/.test(m)||/^file:/.test(Ut())&&!/^\w+:/.test(m)};function rr(m,y){var I=new f.AbortController,U=new f.Request(m.url,{method:m.method||"GET",body:m.body,credentials:m.credentials,headers:m.headers,referrer:Ut(),signal:I.signal}),$=!1,ae=!1,he=_t(U.url);m.type==="json"&&U.headers.set("Accept","application/json");var Oe=function(gt,Et,or){if(!ae){if(gt&&gt.message!=="SecurityError"&&te(gt),Et&&or)return rt(Et);var _r=Date.now();f.fetch(U).then(function(pr){if(pr.ok){var Fr=he?pr.clone():null;return rt(pr,Fr,_r)}else return y(new la(pr.statusText,pr.status,m.url))}).catch(function(pr){pr.code!==20&&y(new Error(pr.message))})}},rt=function(gt,Et,or){(m.type==="arrayBuffer"?gt.arrayBuffer():m.type==="json"?gt.json():gt.text()).then(function(_r){ae||(Et&&or&&vr(U,Et,or),$=!0,y(null,_r,gt.headers.get("Cache-Control"),gt.headers.get("Expires")))}).catch(function(_r){ae||y(new Error(_r.message))})};return he?pi(U,Oe):Oe(null,null),{cancel:function(){ae=!0,$||I.abort()}}}function nr(m,y){var I=new f.XMLHttpRequest;I.open(m.method||"GET",m.url,!0),m.type==="arrayBuffer"&&(I.responseType="arraybuffer");for(var U in m.headers)I.setRequestHeader(U,m.headers[U]);return m.type==="json"&&(I.responseType="text",I.setRequestHeader("Accept","application/json")),I.withCredentials=m.credentials==="include",I.onerror=function(){y(new Error(I.statusText))},I.onload=function(){if((I.status>=200&&I.status<300||I.status===0)&&I.response!==null){var $=I.response;if(m.type==="json")try{$=JSON.parse(I.response)}catch(ae){return y(ae)}y(null,$,I.getResponseHeader("Cache-Control"),I.getResponseHeader("Expires"))}else y(new la(I.statusText,I.status,m.url))},I.send(m.body),{cancel:function(){return I.abort()}}}var Er=function(m,y){if(!wt(m.url)){if(f.fetch&&f.Request&&f.AbortController&&f.Request.prototype.hasOwnProperty("signal"))return rr(m,y);if(Ce()&&self.worker&&self.worker.actor){var I=!0;return self.worker.actor.send("getResource",m,y,void 0,I)}}return nr(m,y)},Xr=function(m,y){return Er(x(m,{type:"json"}),y)},ri=function(m,y){return Er(x(m,{type:"arrayBuffer"}),y)},Qr=function(m,y){return Er(x(m,{method:"POST"}),y)};function Oi(m){var y=f.document.createElement("a");return y.href=m,y.protocol===f.document.location.protocol&&y.host===f.document.location.host}var $i="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAC0lEQVQYV2NgAAIAAAUAAarVyFEAAAAASUVORK5CYII=";function tn(m,y,I,U){var $=new f.Image,ae=f.URL;$.onload=function(){y(null,$),ae.revokeObjectURL($.src),$.onload=null,f.requestAnimationFrame(function(){$.src=$i})},$.onerror=function(){return y(new Error("Could not load image. Please make sure to use a supported image type such as PNG or JPEG. Note that SVGs are not supported."))};var he=new f.Blob([new Uint8Array(m)],{type:"image/png"});$.cacheControl=I,$.expires=U,$.src=m.byteLength?ae.createObjectURL(he):$i}function fn(m,y){var I=new f.Blob([new Uint8Array(m)],{type:"image/png"});f.createImageBitmap(I).then(function(U){y(null,U)}).catch(function(U){y(new Error("Could not load image because of "+U.message+". Please make sure to use a supported image type such as PNG or JPEG. Note that SVGs are not supported."))})}var yn,Sn,Ba=function(){yn=[],Sn=0};Ba();var ua=function(m,y){if(Zt.supported&&(m.headers||(m.headers={}),m.headers.accept="image/webp,*/*"),Sn>=pt.MAX_PARALLEL_IMAGE_REQUESTS){var I={requestParameters:m,callback:y,cancelled:!1,cancel:function(){this.cancelled=!0}};return yn.push(I),I}Sn++;var U=!1,$=function(){if(!U)for(U=!0,Sn--;yn.length&&Sn<pt.MAX_PARALLEL_IMAGE_REQUESTS;){var he=yn.shift(),Oe=he.requestParameters,rt=he.callback,gt=he.cancelled;gt||(he.cancel=ua(Oe,rt).cancel)}},ae=ri(m,function(he,Oe,rt,gt){$(),he?y(he):Oe&&(Fn()?fn(Oe,y):tn(Oe,y,rt,gt))});return{cancel:function(){ae.cancel(),$()}}},ma=function(m,y){var I=f.document.createElement("video");I.muted=!0,I.onloadstart=function(){y(null,I)};for(var U=0;U<m.length;U++){var $=f.document.createElement("source");Oi(m[U])||(I.crossOrigin="Anonymous"),$.src=m[U],I.appendChild($)}return{cancel:function(){}}};function Wa(m,y,I){var U=I[m]&&I[m].indexOf(y)!==-1;U||(I[m]=I[m]||[],I[m].push(y))}function Fa(m,y,I){if(I&&I[m]){var U=I[m].indexOf(y);U!==-1&&I[m].splice(U,1)}}var Wo=function(y,I){I===void 0&&(I={}),x(this,I),this.type=y},da=function(m){function y(I,U){U===void 0&&(U={}),m.call(this,"error",x({error:I},U))}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y}(Wo),Wn=function(){};Wn.prototype.on=function(y,I){return this._listeners=this._listeners||{},Wa(y,I,this._listeners),this},Wn.prototype.off=function(y,I){return Fa(y,I,this._listeners),Fa(y,I,this._oneTimeListeners),this},Wn.prototype.once=function(y,I){return this._oneTimeListeners=this._oneTimeListeners||{},Wa(y,I,this._oneTimeListeners),this},Wn.prototype.fire=function(y,I){typeof y=="string"&&(y=new Wo(y,I||{}));var U=y.type;if(this.listens(U)){y.target=this;for(var $=this._listeners&&this._listeners[U]?this._listeners[U].slice():[],ae=0,he=$;ae<he.length;ae+=1){var Oe=he[ae];Oe.call(this,y)}for(var rt=this._oneTimeListeners&&this._oneTimeListeners[U]?this._oneTimeListeners[U].slice():[],gt=0,Et=rt;gt<Et.length;gt+=1){var or=Et[gt];Fa(U,or,this._oneTimeListeners),or.call(this,y)}var _r=this._eventedParent;_r&&(x(y,typeof this._eventedParentData=="function"?this._eventedParentData():this._eventedParentData),_r.fire(y))}else y instanceof da&&console.error(y.error);return this},Wn.prototype.listens=function(y){return this._listeners&&this._listeners[y]&&this._listeners[y].length>0||this._oneTimeListeners&&this._oneTimeListeners[y]&&this._oneTimeListeners[y].length>0||this._eventedParent&&this._eventedParent.listens(y)},Wn.prototype.setEventedParent=function(y,I){return this._eventedParent=y,this._eventedParentData=I,this};var Ha=8,vo={version:{required:!0,type:"enum",values:[8]},name:{type:"string"},metadata:{type:"*"},center:{type:"array",value:"number"},zoom:{type:"number"},bearing:{type:"number",default:0,period:360,units:"degrees"},pitch:{type:"number",default:0,units:"degrees"},light:{type:"light"},sources:{required:!0,type:"sources"},sprite:{type:"string"},glyphs:{type:"string"},transition:{type:"transition"},layers:{required:!0,type:"array",value:"layer"}},jn={"*":{type:"source"}},Mt=["source_vector","source_raster","source_raster_dem","source_geojson","source_video","source_image"],kr={type:{required:!0,type:"enum",values:{vector:{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},scheme:{type:"enum",values:{xyz:{},tms:{}},default:"xyz"},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},attribution:{type:"string"},promoteId:{type:"promoteId"},volatile:{type:"boolean",default:!1},"*":{type:"*"}},Jr={type:{required:!0,type:"enum",values:{raster:{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},tileSize:{type:"number",default:512,units:"pixels"},scheme:{type:"enum",values:{xyz:{},tms:{}},default:"xyz"},attribution:{type:"string"},volatile:{type:"boolean",default:!1},"*":{type:"*"}},vi={type:{required:!0,type:"enum",values:{"raster-dem":{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},tileSize:{type:"number",default:512,units:"pixels"},attribution:{type:"string"},encoding:{type:"enum",values:{terrarium:{},mapbox:{}},default:"mapbox"},volatile:{type:"boolean",default:!1},"*":{type:"*"}},hn={type:{required:!0,type:"enum",values:{geojson:{}}},data:{type:"*"},maxzoom:{type:"number",default:18},attribution:{type:"string"},buffer:{type:"number",default:128,maximum:512,minimum:0},filter:{type:"*"},tolerance:{type:"number",default:.375},cluster:{type:"boolean",default:!1},clusterRadius:{type:"number",default:50,minimum:0},clusterMaxZoom:{type:"number"},clusterMinPoints:{type:"number"},clusterProperties:{type:"*"},lineMetrics:{type:"boolean",default:!1},generateId:{type:"boolean",default:!1},promoteId:{type:"promoteId"}},An={type:{required:!0,type:"enum",values:{video:{}}},urls:{required:!0,type:"array",value:"string"},coordinates:{required:!0,type:"array",length:4,value:{type:"array",length:2,value:"number"}}},Mn={type:{required:!0,type:"enum",values:{image:{}}},url:{required:!0,type:"string"},coordinates:{required:!0,type:"array",length:4,value:{type:"array",length:2,value:"number"}}},Li={id:{type:"string",required:!0},type:{type:"enum",values:{fill:{},line:{},symbol:{},circle:{},heatmap:{},"fill-extrusion":{},raster:{},hillshade:{},background:{}},required:!0},metadata:{type:"*"},source:{type:"string"},"source-layer":{type:"string"},minzoom:{type:"number",minimum:0,maximum:24},maxzoom:{type:"number",minimum:0,maximum:24},filter:{type:"filter"},layout:{type:"layout"},paint:{type:"paint"}},_n=["layout_fill","layout_line","layout_circle","layout_heatmap","layout_fill-extrusion","layout_symbol","layout_raster","layout_hillshade","layout_background"],ya={visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},Jn={"fill-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},Ma={"circle-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},_o={visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},No={"line-cap":{type:"enum",values:{butt:{},round:{},square:{}},default:"butt",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"line-join":{type:"enum",values:{bevel:{},round:{},miter:{}},default:"miter",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"line-miter-limit":{type:"number",default:2,requires:[{"line-join":"miter"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-round-limit":{type:"number",default:1.05,requires:[{"line-join":"round"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},po={"symbol-placement":{type:"enum",values:{point:{},line:{},"line-center":{}},default:"point",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"symbol-spacing":{type:"number",default:250,minimum:1,units:"pixels",requires:[{"symbol-placement":"line"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"symbol-avoid-edges":{type:"boolean",default:!1,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"symbol-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"symbol-z-order":{type:"enum",values:{auto:{},"viewport-y":{},source:{}},default:"auto",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-allow-overlap":{type:"boolean",default:!1,requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-ignore-placement":{type:"boolean",default:!1,requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-optional":{type:"boolean",default:!1,requires:["icon-image","text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-rotation-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-size":{type:"number",default:1,minimum:0,units:"factor of the original icon size",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-text-fit":{type:"enum",values:{none:{},width:{},height:{},both:{}},default:"none",requires:["icon-image","text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-text-fit-padding":{type:"array",value:"number",length:4,default:[0,0,0,0],units:"pixels",requires:["icon-image","text-field",{"icon-text-fit":["both","width","height"]}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"icon-image":{type:"resolvedImage",tokens:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-rotate":{type:"number",default:0,period:360,units:"degrees",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-padding":{type:"number",default:2,minimum:0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"icon-keep-upright":{type:"boolean",default:!1,requires:["icon-image",{"icon-rotation-alignment":"map"},{"symbol-placement":["line","line-center"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-offset":{type:"array",value:"number",length:2,default:[0,0],requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-anchor":{type:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},default:"center",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-pitch-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-pitch-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-rotation-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-field":{type:"formatted",default:"",tokens:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-font":{type:"array",value:"string",default:["Open Sans Regular","Arial Unicode MS Regular"],requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-size":{type:"number",default:16,minimum:0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-max-width":{type:"number",default:10,minimum:0,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-line-height":{type:"number",default:1.2,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-letter-spacing":{type:"number",default:0,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-justify":{type:"enum",values:{auto:{},left:{},center:{},right:{}},default:"center",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-radial-offset":{type:"number",units:"ems",default:0,requires:["text-field"],"property-type":"data-driven",expression:{interpolated:!0,parameters:["zoom","feature"]}},"text-variable-anchor":{type:"array",value:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},requires:["text-field",{"symbol-placement":["point"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-anchor":{type:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},default:"center",requires:["text-field",{"!":"text-variable-anchor"}],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-max-angle":{type:"number",default:45,units:"degrees",requires:["text-field",{"symbol-placement":["line","line-center"]}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-writing-mode":{type:"array",value:"enum",values:{horizontal:{},vertical:{}},requires:["text-field",{"symbol-placement":["point"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-rotate":{type:"number",default:0,period:360,units:"degrees",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-padding":{type:"number",default:2,minimum:0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-keep-upright":{type:"boolean",default:!0,requires:["text-field",{"text-rotation-alignment":"map"},{"symbol-placement":["line","line-center"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-transform":{type:"enum",values:{none:{},uppercase:{},lowercase:{}},default:"none",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-offset":{type:"array",value:"number",units:"ems",length:2,default:[0,0],requires:["text-field",{"!":"text-radial-offset"}],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-allow-overlap":{type:"boolean",default:!1,requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-ignore-placement":{type:"boolean",default:!1,requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-optional":{type:"boolean",default:!1,requires:["text-field","icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},Lo={visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},ko={visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},Ds={type:"array",value:"*"},Fs={type:"enum",values:{"==":{},"!=":{},">":{},">=":{},"<":{},"<=":{},in:{},"!in":{},all:{},any:{},none:{},has:{},"!has":{},within:{}}},ll={type:"enum",values:{Point:{},LineString:{},Polygon:{}}},ul={type:"array",minimum:0,maximum:24,value:["number","color"],length:2},zl={type:"array",value:"*",minimum:1},us={anchor:{type:"enum",default:"viewport",values:{map:{},viewport:{}},"property-type":"data-constant",transition:!1,expression:{interpolated:!1,parameters:["zoom"]}},position:{type:"array",default:[1.15,210,30],length:3,value:"number","property-type":"data-constant",transition:!0,expression:{interpolated:!0,parameters:["zoom"]}},color:{type:"color","property-type":"data-constant",default:"#ffffff",expression:{interpolated:!0,parameters:["zoom"]},transition:!0},intensity:{type:"number","property-type":"data-constant",default:.5,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0}},il=["paint_fill","paint_line","paint_circle","paint_heatmap","paint_fill-extrusion","paint_symbol","paint_raster","paint_hillshade","paint_background"],As={"fill-antialias":{type:"boolean",default:!0,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"fill-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-outline-color":{type:"color",transition:!0,requires:[{"!":"fill-pattern"},{"fill-antialias":!0}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["fill-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"}},cl={"line-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"line-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["line-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"line-width":{type:"number",default:1,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-gap-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-offset":{type:"number",default:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-dasharray":{type:"array",value:"number",minimum:0,transition:!0,units:"line widths",requires:[{"!":"line-pattern"}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"cross-faded"},"line-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"},"line-gradient":{type:"color",transition:!1,requires:[{"!":"line-dasharray"},{"!":"line-pattern"},{source:"geojson",has:{lineMetrics:!0}}],expression:{interpolated:!0,parameters:["line-progress"]},"property-type":"color-ramp"}},Ks={"circle-radius":{type:"number",default:5,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-blur":{type:"number",default:0,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"circle-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["circle-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-pitch-scale":{type:"enum",values:{map:{},viewport:{}},default:"map",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-pitch-alignment":{type:"enum",values:{map:{},viewport:{}},default:"viewport",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-stroke-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-stroke-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-stroke-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"}},zs={"heatmap-radius":{type:"number",default:30,minimum:1,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"heatmap-weight":{type:"number",default:1,minimum:0,transition:!1,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"heatmap-intensity":{type:"number",default:1,minimum:0,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"heatmap-color":{type:"color",default:["interpolate",["linear"],["heatmap-density"],0,"rgba(0, 0, 255, 0)",.1,"royalblue",.3,"cyan",.5,"lime",.7,"yellow",1,"red"],transition:!1,expression:{interpolated:!0,parameters:["heatmap-density"]},"property-type":"color-ramp"},"heatmap-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},Io={"icon-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-color":{type:"color",default:"#000000",transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-color":{type:"color",default:"rgba(0, 0, 0, 0)",transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"icon-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["icon-image","icon-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-color":{type:"color",default:"#000000",transition:!0,overridable:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-color":{type:"color",default:"rgba(0, 0, 0, 0)",transition:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["text-field","text-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"}},ls={"raster-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-hue-rotate":{type:"number",default:0,period:360,transition:!0,units:"degrees",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-brightness-min":{type:"number",default:0,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-brightness-max":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-saturation":{type:"number",default:0,minimum:-1,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-contrast":{type:"number",default:0,minimum:-1,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-resampling":{type:"enum",values:{linear:{},nearest:{}},default:"linear",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"raster-fade-duration":{type:"number",default:300,minimum:0,transition:!1,units:"milliseconds",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},Zl={"hillshade-illumination-direction":{type:"number",default:335,minimum:0,maximum:359,transition:!1,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-illumination-anchor":{type:"enum",values:{map:{},viewport:{}},default:"viewport",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-exaggeration":{type:"number",default:.5,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-shadow-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-highlight-color":{type:"color",default:"#FFFFFF",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-accent-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},Su={"background-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"background-pattern"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"background-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"cross-faded"},"background-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},nc={duration:{type:"number",default:300,minimum:0,units:"milliseconds"},delay:{type:"number",default:0,minimum:0,units:"milliseconds"}},bs={"*":{type:"string"}},Rn={$version:Ha,$root:vo,sources:jn,source:Mt,source_vector:kr,source_raster:Jr,source_raster_dem:vi,source_geojson:hn,source_video:An,source_image:Mn,layer:Li,layout:_n,layout_background:ya,layout_fill:Jn,layout_circle:Ma,layout_heatmap:_o,"layout_fill-extrusion":{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_line:No,layout_symbol:po,layout_raster:Lo,layout_hillshade:ko,filter:Ds,filter_operator:Fs,geometry_type:ll,function:{expression:{type:"expression"},stops:{type:"array",value:"function_stop"},base:{type:"number",default:1,minimum:0},property:{type:"string",default:"$zoom"},type:{type:"enum",values:{identity:{},exponential:{},interval:{},categorical:{}},default:"exponential"},colorSpace:{type:"enum",values:{rgb:{},lab:{},hcl:{}},default:"rgb"},default:{type:"*",required:!1}},function_stop:ul,expression:zl,light:us,paint:il,paint_fill:As,"paint_fill-extrusion":{"fill-extrusion-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"fill-extrusion-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["fill-extrusion-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"},"fill-extrusion-height":{type:"number",default:0,minimum:0,units:"meters",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-base":{type:"number",default:0,minimum:0,units:"meters",transition:!0,requires:["fill-extrusion-height"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-vertical-gradient":{type:"boolean",default:!0,transition:!1,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"}},paint_line:cl,paint_circle:Ks,paint_heatmap:zs,paint_symbol:Io,paint_raster:ls,paint_hillshade:Zl,paint_background:Su,transition:nc,"property-type":{"data-driven":{type:"property-type"},"cross-faded":{type:"property-type"},"cross-faded-data-driven":{type:"property-type"},"color-ramp":{type:"property-type"},"data-constant":{type:"property-type"},constant:{type:"property-type"}},promoteId:bs},_a=function(y,I,U,$){this.message=(y?y+": ":"")+U,$&&(this.identifier=$),I!=null&&I.__line__&&(this.line=I.__line__)};function Vu(m){var y=m.key,I=m.value;return I?[new _a(y,I,"constants have been deprecated as of v8")]:[]}function Ol(m){for(var y=[],I=arguments.length-1;I-- >0;)y[I]=arguments[I+1];for(var U=0,$=y;U<$.length;U+=1){var ae=$[U];for(var he in ae)m[he]=ae[he]}return m}function xo(m){return m instanceof Number||m instanceof String||m instanceof Boolean?m.valueOf():m}function Yl(m){if(Array.isArray(m))return m.map(Yl);if(m instanceof Object&&!(m instanceof Number||m instanceof String||m instanceof Boolean)){var y={};for(var I in m)y[I]=Yl(m[I]);return y}return xo(m)}var Ns=function(m){function y(I,U){m.call(this,U),this.message=U,this.key=I}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y}(Error),Hl=function(y,I){I===void 0&&(I=[]),this.parent=y,this.bindings={};for(var U=0,$=I;U<$.length;U+=1){var ae=$[U],he=ae[0],Oe=ae[1];this.bindings[he]=Oe}};Hl.prototype.concat=function(y){return new Hl(this,y)},Hl.prototype.get=function(y){if(this.bindings[y])return this.bindings[y];if(this.parent)return this.parent.get(y);throw new Error(y+" not found in scope.")},Hl.prototype.has=function(y){return this.bindings[y]?!0:this.parent?this.parent.has(y):!1};var ac={kind:"null"},aa={kind:"number"},Oo={kind:"string"},qo={kind:"boolean"},ql={kind:"color"},Pc={kind:"object"},Do={kind:"value"},rf={kind:"error"},Uf={kind:"collator"},pl={kind:"formatted"},Zc={kind:"resolvedImage"};function Kl(m,y){return{kind:"array",itemType:m,N:y}}function Os(m){if(m.kind==="array"){var y=Os(m.itemType);return typeof m.N=="number"?"array<"+y+", "+m.N+">":m.itemType.kind==="value"?"array":"array<"+y+">"}else return m.kind}var yu=[ac,aa,Oo,qo,ql,pl,Pc,Kl(Do),Zc];function oc(m,y){if(y.kind==="error")return null;if(m.kind==="array"){if(y.kind==="array"&&(y.N===0&&y.itemType.kind==="value"||!oc(m.itemType,y.itemType))&&(typeof m.N!="number"||m.N===y.N))return null}else{if(m.kind===y.kind)return null;if(m.kind==="value")for(var I=0,U=yu;I<U.length;I+=1){var $=U[I];if(!oc($,y))return null}}return"Expected "+Os(m)+" but found "+Os(y)+" instead."}function Cf(m,y){return y.some(function(I){return I.kind===m.kind})}function sc(m,y){return y.some(function(I){return I==="null"?m===null:I==="array"?Array.isArray(m):I==="object"?m&&!Array.isArray(m)&&typeof m=="object":I===typeof m})}var Vh=a(function(m,y){var I={transparent:[0,0,0,0],aliceblue:[240,248,255,1],antiquewhite:[250,235,215,1],aqua:[0,255,255,1],aquamarine:[127,255,212,1],azure:[240,255,255,1],beige:[245,245,220,1],bisque:[255,228,196,1],black:[0,0,0,1],blanchedalmond:[255,235,205,1],blue:[0,0,255,1],blueviolet:[138,43,226,1],brown:[165,42,42,1],burlywood:[222,184,135,1],cadetblue:[95,158,160,1],chartreuse:[127,255,0,1],chocolate:[210,105,30,1],coral:[255,127,80,1],cornflowerblue:[100,149,237,1],cornsilk:[255,248,220,1],crimson:[220,20,60,1],cyan:[0,255,255,1],darkblue:[0,0,139,1],darkcyan:[0,139,139,1],darkgoldenrod:[184,134,11,1],darkgray:[169,169,169,1],darkgreen:[0,100,0,1],darkgrey:[169,169,169,1],darkkhaki:[189,183,107,1],darkmagenta:[139,0,139,1],darkolivegreen:[85,107,47,1],darkorange:[255,140,0,1],darkorchid:[153,50,204,1],darkred:[139,0,0,1],darksalmon:[233,150,122,1],darkseagreen:[143,188,143,1],darkslateblue:[72,61,139,1],darkslategray:[47,79,79,1],darkslategrey:[47,79,79,1],darkturquoise:[0,206,209,1],darkviolet:[148,0,211,1],deeppink:[255,20,147,1],deepskyblue:[0,191,255,1],dimgray:[105,105,105,1],dimgrey:[105,105,105,1],dodgerblue:[30,144,255,1],firebrick:[178,34,34,1],floralwhite:[255,250,240,1],forestgreen:[34,139,34,1],fuchsia:[255,0,255,1],gainsboro:[220,220,220,1],ghostwhite:[248,248,255,1],gold:[255,215,0,1],goldenrod:[218,165,32,1],gray:[128,128,128,1],green:[0,128,0,1],greenyellow:[173,255,47,1],grey:[128,128,128,1],honeydew:[240,255,240,1],hotpink:[255,105,180,1],indianred:[205,92,92,1],indigo:[75,0,130,1],ivory:[255,255,240,1],khaki:[240,230,140,1],lavender:[230,230,250,1],lavenderblush:[255,240,245,1],lawngreen:[124,252,0,1],lemonchiffon:[255,250,205,1],lightblue:[173,216,230,1],lightcoral:[240,128,128,1],lightcyan:[224,255,255,1],lightgoldenrodyellow:[250,250,210,1],lightgray:[211,211,211,1],lightgreen:[144,238,144,1],lightgrey:[211,211,211,1],lightpink:[255,182,193,1],lightsalmon:[255,160,122,1],lightseagreen:[32,178,170,1],lightskyblue:[135,206,250,1],lightslategray:[119,136,153,1],lightslategrey:[119,136,153,1],lightsteelblue:[176,196,222,1],lightyellow:[255,255,224,1],lime:[0,255,0,1],limegreen:[50,205,50,1],linen:[250,240,230,1],magenta:[255,0,255,1],maroon:[128,0,0,1],mediumaquamarine:[102,205,170,1],mediumblue:[0,0,205,1],mediumorchid:[186,85,211,1],mediumpurple:[147,112,219,1],mediumseagreen:[60,179,113,1],mediumslateblue:[123,104,238,1],mediumspringgreen:[0,250,154,1],mediumturquoise:[72,209,204,1],mediumvioletred:[199,21,133,1],midnightblue:[25,25,112,1],mintcream:[245,255,250,1],mistyrose:[255,228,225,1],moccasin:[255,228,181,1],navajowhite:[255,222,173,1],navy:[0,0,128,1],oldlace:[253,245,230,1],olive:[128,128,0,1],olivedrab:[107,142,35,1],orange:[255,165,0,1],orangered:[255,69,0,1],orchid:[218,112,214,1],palegoldenrod:[238,232,170,1],palegreen:[152,251,152,1],paleturquoise:[175,238,238,1],palevioletred:[219,112,147,1],papayawhip:[255,239,213,1],peachpuff:[255,218,185,1],peru:[205,133,63,1],pink:[255,192,203,1],plum:[221,160,221,1],powderblue:[176,224,230,1],purple:[128,0,128,1],rebeccapurple:[102,51,153,1],red:[255,0,0,1],rosybrown:[188,143,143,1],royalblue:[65,105,225,1],saddlebrown:[139,69,19,1],salmon:[250,128,114,1],sandybrown:[244,164,96,1],seagreen:[46,139,87,1],seashell:[255,245,238,1],sienna:[160,82,45,1],silver:[192,192,192,1],skyblue:[135,206,235,1],slateblue:[106,90,205,1],slategray:[112,128,144,1],slategrey:[112,128,144,1],snow:[255,250,250,1],springgreen:[0,255,127,1],steelblue:[70,130,180,1],tan:[210,180,140,1],teal:[0,128,128,1],thistle:[216,191,216,1],tomato:[255,99,71,1],turquoise:[64,224,208,1],violet:[238,130,238,1],wheat:[245,222,179,1],white:[255,255,255,1],whitesmoke:[245,245,245,1],yellow:[255,255,0,1],yellowgreen:[154,205,50,1]};function U(gt){return gt=Math.round(gt),gt<0?0:gt>255?255:gt}function $(gt){return gt<0?0:gt>1?1:gt}function ae(gt){return gt[gt.length-1]==="%"?U(parseFloat(gt)/100*255):U(parseInt(gt))}function he(gt){return gt[gt.length-1]==="%"?$(parseFloat(gt)/100):$(parseFloat(gt))}function Oe(gt,Et,or){return or<0?or+=1:or>1&&(or-=1),or*6<1?gt+(Et-gt)*or*6:or*2<1?Et:or*3<2?gt+(Et-gt)*(2/3-or)*6:gt}function rt(gt){var Et=gt.replace(/ /g,"").toLowerCase();if(Et in I)return I[Et].slice();if(Et[0]==="#"){if(Et.length===4){var or=parseInt(Et.substr(1),16);return or>=0&&or<=4095?[(or&3840)>>4|(or&3840)>>8,or&240|(or&240)>>4,or&15|(or&15)<<4,1]:null}else if(Et.length===7){var or=parseInt(Et.substr(1),16);return or>=0&&or<=16777215?[(or&16711680)>>16,(or&65280)>>8,or&255,1]:null}return null}var _r=Et.indexOf("("),pr=Et.indexOf(")");if(_r!==-1&&pr+1===Et.length){var Fr=Et.substr(0,_r),oi=Et.substr(_r+1,pr-(_r+1)).split(","),Hi=1;switch(Fr){case"rgba":if(oi.length!==4)return null;Hi=he(oi.pop());case"rgb":return oi.length!==3?null:[ae(oi[0]),ae(oi[1]),ae(oi[2]),Hi];case"hsla":if(oi.length!==4)return null;Hi=he(oi.pop());case"hsl":if(oi.length!==3)return null;var Ai=(parseFloat(oi[0])%360+360)%360/360,bn=he(oi[1]),nn=he(oi[2]),xn=nn<=.5?nn*(bn+1):nn+bn-nn*bn,Pn=nn*2-xn;return[U(Oe(Pn,xn,Ai+1/3)*255),U(Oe(Pn,xn,Ai)*255),U(Oe(Pn,xn,Ai-1/3)*255),Hi];default:return null}}return null}try{y.parseCSSColor=rt}catch(gt){}}),Lf=Vh.parseCSSColor,cs=function(y,I,U,$){$===void 0&&($=1),this.r=y,this.g=I,this.b=U,this.a=$};cs.parse=function(y){if(y){if(y instanceof cs)return y;if(typeof y=="string"){var I=Lf(y);if(I)return new cs(I[0]/255*I[3],I[1]/255*I[3],I[2]/255*I[3],I[3])}}},cs.prototype.toString=function(){var y=this.toArray(),I=y[0],U=y[1],$=y[2],ae=y[3];return"rgba("+Math.round(I)+","+Math.round(U)+","+Math.round($)+","+ae+")"},cs.prototype.toArray=function(){var y=this,I=y.r,U=y.g,$=y.b,ae=y.a;return ae===0?[0,0,0,0]:[I*255/ae,U*255/ae,$*255/ae,ae]},cs.black=new cs(0,0,0,1),cs.white=new cs(1,1,1,1),cs.transparent=new cs(0,0,0,0),cs.red=new cs(1,0,0,1);var nf=function(y,I,U){y?this.sensitivity=I?"variant":"case":this.sensitivity=I?"accent":"base",this.locale=U,this.collator=new Intl.Collator(this.locale?this.locale:[],{sensitivity:this.sensitivity,usage:"search"})};nf.prototype.compare=function(y,I){return this.collator.compare(y,I)},nf.prototype.resolvedLocale=function(){return new Intl.Collator(this.locale?this.locale:[]).resolvedOptions().locale};var Vf=function(y,I,U,$,ae){this.text=y,this.image=I,this.scale=U,this.fontStack=$,this.textColor=ae},Jl=function(y){this.sections=y};Jl.fromString=function(y){return new Jl([new Vf(y,null,null,null,null)])},Jl.prototype.isEmpty=function(){return this.sections.length===0?!0:!this.sections.some(function(y){return y.text.length!==0||y.image&&y.image.name.length!==0})},Jl.factory=function(y){return y instanceof Jl?y:Jl.fromString(y)},Jl.prototype.toString=function(){return this.sections.length===0?"":this.sections.map(function(y){return y.text}).join("")},Jl.prototype.serialize=function(){for(var y=["format"],I=0,U=this.sections;I<U.length;I+=1){var $=U[I];if($.image){y.push(["image",$.image.name]);continue}y.push($.text);var ae={};$.fontStack&&(ae["text-font"]=["literal",$.fontStack.split(",")]),$.scale&&(ae["font-scale"]=$.scale),$.textColor&&(ae["text-color"]=["rgba"].concat($.textColor.toArray())),y.push(ae)}return y};var fl=function(y){this.name=y.name,this.available=y.available};fl.prototype.toString=function(){return this.name},fl.fromString=function(y){return y?new fl({name:y,available:!1}):null},fl.prototype.serialize=function(){return["image",this.name]};function lc(m,y,I,U){if(!(typeof m=="number"&&m>=0&&m<=255&&typeof y=="number"&&y>=0&&y<=255&&typeof I=="number"&&I>=0&&I<=255)){var $=typeof U=="number"?[m,y,I,U]:[m,y,I];return"Invalid rgba value ["+$.join(", ")+"]: 'r', 'g', and 'b' must be between 0 and 255."}return typeof U=="undefined"||typeof U=="number"&&U>=0&&U<=1?null:"Invalid rgba value ["+[m,y,I,U].join(", ")+"]: 'a' must be between 0 and 1."}function Fu(m){if(m===null)return!0;if(typeof m=="string")return!0;if(typeof m=="boolean")return!0;if(typeof m=="number")return!0;if(m instanceof cs)return!0;if(m instanceof nf)return!0;if(m instanceof Jl)return!0;if(m instanceof fl)return!0;if(Array.isArray(m)){for(var y=0,I=m;y<I.length;y+=1){var U=I[y];if(!Fu(U))return!1}return!0}else if(typeof m=="object"){for(var $ in m)if(!Fu(m[$]))return!1;return!0}else return!1}function Es(m){if(m===null)return ac;if(typeof m=="string")return Oo;if(typeof m=="boolean")return qo;if(typeof m=="number")return aa;if(m instanceof cs)return ql;if(m instanceof nf)return Uf;if(m instanceof Jl)return pl;if(m instanceof fl)return Zc;if(Array.isArray(m)){for(var y=m.length,I,U=0,$=m;U<$.length;U+=1){var ae=$[U],he=Es(ae);if(!I)I=he;else{if(I===he)continue;I=Do;break}}return Kl(I||Do,y)}else return Pc}function Hs(m){var y=typeof m;return m===null?"":y==="string"||y==="number"||y==="boolean"?String(m):m instanceof cs||m instanceof Jl||m instanceof fl?m.toString():JSON.stringify(m)}var Go=function(y,I){this.type=y,this.value=I};Go.parse=function(y,I){if(y.length!==2)return I.error("'literal' expression requires exactly one argument, but found "+(y.length-1)+" instead.");if(!Fu(y[1]))return I.error("invalid value");var U=y[1],$=Es(U),ae=I.expectedType;return $.kind==="array"&&$.N===0&&ae&&ae.kind==="array"&&(typeof ae.N!="number"||ae.N===0)&&($=ae),new Go($,U)},Go.prototype.evaluate=function(){return this.value},Go.prototype.eachChild=function(){},Go.prototype.outputDefined=function(){return!0},Go.prototype.serialize=function(){return this.type.kind==="array"||this.type.kind==="object"?["literal",this.value]:this.value instanceof cs?["rgba"].concat(this.value.toArray()):this.value instanceof Jl?this.value.serialize():this.value};var ps=function(y){this.name="ExpressionEvaluationError",this.message=y};ps.prototype.toJSON=function(){return this.message};var uc={string:Oo,number:aa,boolean:qo,object:Pc},xl=function(y,I){this.type=y,this.args=I};xl.parse=function(y,I){if(y.length<2)return I.error("Expected at least one argument.");var U=1,$,ae=y[0];if(ae==="array"){var he;if(y.length>2){var Oe=y[1];if(typeof Oe!="string"||!(Oe in uc)||Oe==="object")return I.error('The item type argument of "array" must be one of string, number, boolean',1);he=uc[Oe],U++}else he=Do;var rt;if(y.length>3){if(y[2]!==null&&(typeof y[2]!="number"||y[2]<0||y[2]!==Math.floor(y[2])))return I.error('The length argument to "array" must be a positive integer literal',2);rt=y[2],U++}$=Kl(he,rt)}else $=uc[ae];for(var gt=[];U<y.length;U++){var Et=I.parse(y[U],U,Do);if(!Et)return null;gt.push(Et)}return new xl($,gt)},xl.prototype.evaluate=function(y){for(var I=0;I<this.args.length;I++){var U=this.args[I].evaluate(y),$=oc(this.type,Es(U));if($){if(I===this.args.length-1)throw new ps("Expected value to be of type "+Os(this.type)+", but found "+Os(Es(U))+" instead.")}else return U}return null},xl.prototype.eachChild=function(y){this.args.forEach(y)},xl.prototype.outputDefined=function(){return this.args.every(function(y){return y.outputDefined()})},xl.prototype.serialize=function(){var y=this.type,I=[y.kind];if(y.kind==="array"){var U=y.itemType;if(U.kind==="string"||U.kind==="number"||U.kind==="boolean"){I.push(U.kind);var $=y.N;(typeof $=="number"||this.args.length>1)&&I.push($)}}return I.concat(this.args.map(function(ae){return ae.serialize()}))};var Gu=function(y){this.type=pl,this.sections=y};Gu.parse=function(y,I){if(y.length<2)return I.error("Expected at least one argument.");var U=y[1];if(!Array.isArray(U)&&typeof U=="object")return I.error("First argument must be an image or text section.");for(var $=[],ae=!1,he=1;he<=y.length-1;++he){var Oe=y[he];if(ae&&typeof Oe=="object"&&!Array.isArray(Oe)){ae=!1;var rt=null;if(Oe["font-scale"]&&(rt=I.parse(Oe["font-scale"],1,aa),!rt))return null;var gt=null;if(Oe["text-font"]&&(gt=I.parse(Oe["text-font"],1,Kl(Oo)),!gt))return null;var Et=null;if(Oe["text-color"]&&(Et=I.parse(Oe["text-color"],1,ql),!Et))return null;var or=$[$.length-1];or.scale=rt,or.font=gt,or.textColor=Et}else{var _r=I.parse(y[he],1,Do);if(!_r)return null;var pr=_r.type.kind;if(pr!=="string"&&pr!=="value"&&pr!=="null"&&pr!=="resolvedImage")return I.error("Formatted text type must be 'string', 'value', 'image' or 'null'.");ae=!0,$.push({content:_r,scale:null,font:null,textColor:null})}}return new Gu($)},Gu.prototype.evaluate=function(y){var I=function(U){var $=U.content.evaluate(y);return Es($)===Zc?new Vf("",$,null,null,null):new Vf(Hs($),null,U.scale?U.scale.evaluate(y):null,U.font?U.font.evaluate(y).join(","):null,U.textColor?U.textColor.evaluate(y):null)};return new Jl(this.sections.map(I))},Gu.prototype.eachChild=function(y){for(var I=0,U=this.sections;I<U.length;I+=1){var $=U[I];y($.content),$.scale&&y($.scale),$.font&&y($.font),$.textColor&&y($.textColor)}},Gu.prototype.outputDefined=function(){return!1},Gu.prototype.serialize=function(){for(var y=["format"],I=0,U=this.sections;I<U.length;I+=1){var $=U[I];y.push($.content.serialize());var ae={};$.scale&&(ae["font-scale"]=$.scale.serialize()),$.font&&(ae["text-font"]=$.font.serialize()),$.textColor&&(ae["text-color"]=$.textColor.serialize()),y.push(ae)}return y};var qs=function(y){this.type=Zc,this.input=y};qs.parse=function(y,I){if(y.length!==2)return I.error("Expected two arguments.");var U=I.parse(y[1],1,Oo);return U?new qs(U):I.error("No image name provided.")},qs.prototype.evaluate=function(y){var I=this.input.evaluate(y),U=fl.fromString(I);return U&&y.availableImages&&(U.available=y.availableImages.indexOf(I)>-1),U},qs.prototype.eachChild=function(y){y(this.input)},qs.prototype.outputDefined=function(){return!1},qs.prototype.serialize=function(){return["image",this.input.serialize()]};var ad={"to-boolean":qo,"to-color":ql,"to-number":aa,"to-string":Oo},Po=function(y,I){this.type=y,this.args=I};Po.parse=function(y,I){if(y.length<2)return I.error("Expected at least one argument.");var U=y[0];if((U==="to-boolean"||U==="to-string")&&y.length!==2)return I.error("Expected one argument.");for(var $=ad[U],ae=[],he=1;he<y.length;he++){var Oe=I.parse(y[he],he,Do);if(!Oe)return null;ae.push(Oe)}return new Po($,ae)},Po.prototype.evaluate=function(y){if(this.type.kind==="boolean")return!!this.args[0].evaluate(y);if(this.type.kind==="color"){for(var I,U,$=0,ae=this.args;$<ae.length;$+=1){var he=ae[$];if(I=he.evaluate(y),U=null,I instanceof cs)return I;if(typeof I=="string"){var Oe=y.parseColor(I);if(Oe)return Oe}else if(Array.isArray(I)&&(I.length<3||I.length>4?U="Invalid rbga value "+JSON.stringify(I)+": expected an array containing either three or four numeric values.":U=lc(I[0],I[1],I[2],I[3]),!U))return new cs(I[0]/255,I[1]/255,I[2]/255,I[3])}throw new ps(U||"Could not parse color from value '"+(typeof I=="string"?I:String(JSON.stringify(I)))+"'")}else if(this.type.kind==="number"){for(var rt=null,gt=0,Et=this.args;gt<Et.length;gt+=1){var or=Et[gt];if(rt=or.evaluate(y),rt===null)return 0;var _r=Number(rt);if(!isNaN(_r))return _r}throw new ps("Could not convert "+JSON.stringify(rt)+" to number.")}else return this.type.kind==="formatted"?Jl.fromString(Hs(this.args[0].evaluate(y))):this.type.kind==="resolvedImage"?fl.fromString(Hs(this.args[0].evaluate(y))):Hs(this.args[0].evaluate(y))},Po.prototype.eachChild=function(y){this.args.forEach(y)},Po.prototype.outputDefined=function(){return this.args.every(function(y){return y.outputDefined()})},Po.prototype.serialize=function(){if(this.type.kind==="formatted")return new Gu([{content:this.args[0],scale:null,font:null,textColor:null}]).serialize();if(this.type.kind==="resolvedImage")return new qs(this.args[0]).serialize();var y=["to-"+this.type.kind];return this.eachChild(function(I){y.push(I.serialize())}),y};var od=["Unknown","Point","LineString","Polygon"],Yo=function(){this.globals=null,this.feature=null,this.featureState=null,this.formattedSection=null,this._parseColorCache={},this.availableImages=null,this.canonical=null};Yo.prototype.id=function(){return this.feature&&"id"in this.feature?this.feature.id:null},Yo.prototype.geometryType=function(){return this.feature?typeof this.feature.type=="number"?od[this.feature.type]:this.feature.type:null},Yo.prototype.geometry=function(){return this.feature&&"geometry"in this.feature?this.feature.geometry:null},Yo.prototype.canonicalID=function(){return this.canonical},Yo.prototype.properties=function(){return this.feature&&this.feature.properties||{}},Yo.prototype.parseColor=function(y){var I=this._parseColorCache[y];return I||(I=this._parseColorCache[y]=cs.parse(y)),I};var Pa=function(y,I,U,$){this.name=y,this.type=I,this._evaluate=U,this.args=$};Pa.prototype.evaluate=function(y){return this._evaluate(y,this.args)},Pa.prototype.eachChild=function(y){this.args.forEach(y)},Pa.prototype.outputDefined=function(){return!1},Pa.prototype.serialize=function(){return[this.name].concat(this.args.map(function(y){return y.serialize()}))},Pa.parse=function(y,I){var U,$=y[0],ae=Pa.definitions[$];if(!ae)return I.error('Unknown expression "'+$+'". If you wanted a literal array, use ["literal", [...]].',0);for(var he=Array.isArray(ae)?ae[0]:ae.type,Oe=Array.isArray(ae)?[[ae[1],ae[2]]]:ae.overloads,rt=Oe.filter(function(to){var ao=to[0];return!Array.isArray(ao)||ao.length===y.length-1}),gt=null,Et=0,or=rt;Et<or.length;Et+=1){var _r=or[Et],pr=_r[0],Fr=_r[1];gt=new ks(I.registry,I.path,null,I.scope);for(var oi=[],Hi=!1,Ai=1;Ai<y.length;Ai++){var bn=y[Ai],nn=Array.isArray(pr)?pr[Ai-1]:pr.type,xn=gt.parse(bn,1+oi.length,nn);if(!xn){Hi=!0;break}oi.push(xn)}if(!Hi){if(Array.isArray(pr)&&pr.length!==oi.length){gt.error("Expected "+pr.length+" arguments, but found "+oi.length+" instead.");continue}for(var Pn=0;Pn<oi.length;Pn++){var Zn=Array.isArray(pr)?pr[Pn]:pr.type,ga=oi[Pn];gt.concat(Pn+1).checkSubtype(Zn,ga.type)}if(gt.errors.length===0)return new Pa($,he,Fr,oi)}}if(rt.length===1)(U=I.errors).push.apply(U,gt.errors);else{for(var ha=rt.length?rt:Oe,eo=ha.map(function(to){var ao=to[0];return af(ao)}).join(" | "),za=[],Za=1;Za<y.length;Za++){var Ko=I.parse(y[Za],1+za.length);if(!Ko)return null;za.push(Os(Ko.type))}I.error("Expected arguments of type "+eo+", but found ("+za.join(", ")+") instead.")}return null},Pa.register=function(y,I){Pa.definitions=I;for(var U in I)y[U]=Pa};function af(m){return Array.isArray(m)?"("+m.map(Os).join(", ")+")":"("+Os(m.type)+"...)"}var Hu=function(y,I,U){this.type=Uf,this.locale=U,this.caseSensitive=y,this.diacriticSensitive=I};Hu.parse=function(y,I){if(y.length!==2)return I.error("Expected one argument.");var U=y[1];if(typeof U!="object"||Array.isArray(U))return I.error("Collator options argument must be an object.");var $=I.parse(U["case-sensitive"]===void 0?!1:U["case-sensitive"],1,qo);if(!$)return null;var ae=I.parse(U["diacritic-sensitive"]===void 0?!1:U["diacritic-sensitive"],1,qo);if(!ae)return null;var he=null;return U.locale&&(he=I.parse(U.locale,1,Oo),!he)?null:new Hu($,ae,he)},Hu.prototype.evaluate=function(y){return new nf(this.caseSensitive.evaluate(y),this.diacriticSensitive.evaluate(y),this.locale?this.locale.evaluate(y):null)},Hu.prototype.eachChild=function(y){y(this.caseSensitive),y(this.diacriticSensitive),this.locale&&y(this.locale)},Hu.prototype.outputDefined=function(){return!1},Hu.prototype.serialize=function(){var y={};return y["case-sensitive"]=this.caseSensitive.serialize(),y["diacritic-sensitive"]=this.diacriticSensitive.serialize(),this.locale&&(y.locale=this.locale.serialize()),["collator",y]};var bl=8192;function Gf(m,y){m[0]=Math.min(m[0],y[0]),m[1]=Math.min(m[1],y[1]),m[2]=Math.max(m[2],y[0]),m[3]=Math.max(m[3],y[1])}function Ic(m){return(180+m)/360}function yf(m){return(180-180/Math.PI*Math.log(Math.tan(Math.PI/4+m*Math.PI/360)))/360}function Bl(m,y){return!(m[0]<=y[0]||m[2]>=y[2]||m[1]<=y[1]||m[3]>=y[3])}function wh(m,y){var I=Ic(m[0]),U=yf(m[1]),$=Math.pow(2,y.z);return[Math.round(I*$*bl),Math.round(U*$*bl)]}function Qf(m,y,I){var U=m[0]-y[0],$=m[1]-y[1],ae=m[0]-I[0],he=m[1]-I[1];return U*he-ae*$===0&&U*ae<=0&&$*he<=0}function _f(m,y,I){return y[1]>m[1]!=I[1]>m[1]&&m[0]<(I[0]-y[0])*(m[1]-y[1])/(I[1]-y[1])+y[0]}function Yc(m,y){for(var I=!1,U=0,$=y.length;U<$;U++)for(var ae=y[U],he=0,Oe=ae.length;he<Oe-1;he++){if(Qf(m,ae[he],ae[he+1]))return!1;_f(m,ae[he],ae[he+1])&&(I=!I)}return I}function eh(m,y){for(var I=0;I<y.length;I++)if(Yc(m,y[I]))return!0;return!1}function th(m,y){return m[0]*y[1]-m[1]*y[0]}function ju(m,y,I,U){var $=m[0]-I[0],ae=m[1]-I[1],he=y[0]-I[0],Oe=y[1]-I[1],rt=U[0]-I[0],gt=U[1]-I[1],Et=$*gt-rt*ae,or=he*gt-rt*Oe;return Et>0&&or<0||Et<0&&or>0}function Hf(m,y,I,U){var $=[y[0]-m[0],y[1]-m[1]],ae=[U[0]-I[0],U[1]-I[1]];return th(ae,$)===0?!1:!!(ju(m,y,I,U)&&ju(I,U,m,y))}function cc(m,y,I){for(var U=0,$=I;U<$.length;U+=1)for(var ae=$[U],he=0;he<ae.length-1;++he)if(Hf(m,y,ae[he],ae[he+1]))return!0;return!1}function of(m,y){for(var I=0;I<m.length;++I)if(!Yc(m[I],y))return!1;for(var U=0;U<m.length-1;++U)if(cc(m[U],m[U+1],y))return!1;return!0}function Nl(m,y){for(var I=0;I<y.length;I++)if(of(m,y[I]))return!0;return!1}function Kc(m,y,I){for(var U=[],$=0;$<m.length;$++){for(var ae=[],he=0;he<m[$].length;he++){var Oe=wh(m[$][he],I);Gf(y,Oe),ae.push(Oe)}U.push(ae)}return U}function Rc(m,y,I){for(var U=[],$=0;$<m.length;$++){var ae=Kc(m[$],y,I);U.push(ae)}return U}function gs(m,y,I,U){if(m[0]<I[0]||m[0]>I[2]){var $=U*.5,ae=m[0]-I[0]>$?-U:I[0]-m[0]>$?U:0;ae===0&&(ae=m[0]-I[2]>$?-U:I[2]-m[0]>$?U:0),m[0]+=ae}Gf(y,m)}function jf(m){m[0]=m[1]=1/0,m[2]=m[3]=-1/0}function Gh(m,y,I,U){for(var $=Math.pow(2,U.z)*bl,ae=[U.x*bl,U.y*bl],he=[],Oe=0,rt=m;Oe<rt.length;Oe+=1)for(var gt=rt[Oe],Et=0,or=gt;Et<or.length;Et+=1){var _r=or[Et],pr=[_r.x+ae[0],_r.y+ae[1]];gs(pr,y,I,$),he.push(pr)}return he}function rh(m,y,I,U){for(var $=Math.pow(2,U.z)*bl,ae=[U.x*bl,U.y*bl],he=[],Oe=0,rt=m;Oe<rt.length;Oe+=1){for(var gt=rt[Oe],Et=[],or=0,_r=gt;or<_r.length;or+=1){var pr=_r[or],Fr=[pr.x+ae[0],pr.y+ae[1]];Gf(y,Fr),Et.push(Fr)}he.push(Et)}if(y[2]-y[0]<=$/2){jf(y);for(var oi=0,Hi=he;oi<Hi.length;oi+=1)for(var Ai=Hi[oi],bn=0,nn=Ai;bn<nn.length;bn+=1){var xn=nn[bn];gs(xn,y,I,$)}}return he}function sf(m,y){var I=[1/0,1/0,-1/0,-1/0],U=[1/0,1/0,-1/0,-1/0],$=m.canonicalID();if(y.type==="Polygon"){var ae=Kc(y.coordinates,U,$),he=Gh(m.geometry(),I,U,$);if(!Bl(I,U))return!1;for(var Oe=0,rt=he;Oe<rt.length;Oe+=1){var gt=rt[Oe];if(!Yc(gt,ae))return!1}}if(y.type==="MultiPolygon"){var Et=Rc(y.coordinates,U,$),or=Gh(m.geometry(),I,U,$);if(!Bl(I,U))return!1;for(var _r=0,pr=or;_r<pr.length;_r+=1){var Fr=pr[_r];if(!eh(Fr,Et))return!1}}return!0}function Th(m,y){var I=[1/0,1/0,-1/0,-1/0],U=[1/0,1/0,-1/0,-1/0],$=m.canonicalID();if(y.type==="Polygon"){var ae=Kc(y.coordinates,U,$),he=rh(m.geometry(),I,U,$);if(!Bl(I,U))return!1;for(var Oe=0,rt=he;Oe<rt.length;Oe+=1){var gt=rt[Oe];if(!of(gt,ae))return!1}}if(y.type==="MultiPolygon"){var Et=Rc(y.coordinates,U,$),or=rh(m.geometry(),I,U,$);if(!Bl(I,U))return!1;for(var _r=0,pr=or;_r<pr.length;_r+=1){var Fr=pr[_r];if(!Nl(Fr,Et))return!1}}return!0}var Mu=function(y,I){this.type=qo,this.geojson=y,this.geometries=I};Mu.parse=function(y,I){if(y.length!==2)return I.error("'within' expression requires exactly one argument, but found "+(y.length-1)+" instead.");if(Fu(y[1])){var U=y[1];if(U.type==="FeatureCollection")for(var $=0;$<U.features.length;++$){var ae=U.features[$].geometry.type;if(ae==="Polygon"||ae==="MultiPolygon")return new Mu(U,U.features[$].geometry)}else if(U.type==="Feature"){var he=U.geometry.type;if(he==="Polygon"||he==="MultiPolygon")return new Mu(U,U.geometry)}else if(U.type==="Polygon"||U.type==="MultiPolygon")return new Mu(U,U)}return I.error("'within' expression requires valid geojson object that contains polygon geometry type.")},Mu.prototype.evaluate=function(y){if(y.geometry()!=null&&y.canonicalID()!=null){if(y.geometryType()==="Point")return sf(y,this.geometries);if(y.geometryType()==="LineString")return Th(y,this.geometries)}return!1},Mu.prototype.eachChild=function(){},Mu.prototype.outputDefined=function(){return!0},Mu.prototype.serialize=function(){return["within",this.geojson]};function ih(m){if(m instanceof Pa){if(m.name==="get"&&m.args.length===1)return!1;if(m.name==="feature-state")return!1;if(m.name==="has"&&m.args.length===1)return!1;if(m.name==="properties"||m.name==="geometry-type"||m.name==="id")return!1;if(/^filter-/.test(m.name))return!1}if(m instanceof Mu)return!1;var y=!0;return m.eachChild(function(I){y&&!ih(I)&&(y=!1)}),y}function js(m){if(m instanceof Pa&&m.name==="feature-state")return!1;var y=!0;return m.eachChild(function(I){y&&!js(I)&&(y=!1)}),y}function Eu(m,y){if(m instanceof Pa&&y.indexOf(m.name)>=0)return!1;var I=!0;return m.eachChild(function(U){I&&!Eu(U,y)&&(I=!1)}),I}var Dc=function(y,I){this.type=I.type,this.name=y,this.boundExpression=I};Dc.parse=function(y,I){if(y.length!==2||typeof y[1]!="string")return I.error("'var' expression requires exactly one string literal argument.");var U=y[1];return I.scope.has(U)?new Dc(U,I.scope.get(U)):I.error('Unknown variable "'+U+'". Make sure "'+U+'" has been bound in an enclosing "let" expression before using it.',1)},Dc.prototype.evaluate=function(y){return this.boundExpression.evaluate(y)},Dc.prototype.eachChild=function(){},Dc.prototype.outputDefined=function(){return!1},Dc.prototype.serialize=function(){return["var",this.name]};var ks=function(y,I,U,$,ae){I===void 0&&(I=[]),$===void 0&&($=new Hl),ae===void 0&&(ae=[]),this.registry=y,this.path=I,this.key=I.map(function(he){return"["+he+"]"}).join(""),this.scope=$,this.errors=ae,this.expectedType=U};ks.prototype.parse=function(y,I,U,$,ae){return ae===void 0&&(ae={}),I?this.concat(I,U,$)._parse(y,ae):this._parse(y,ae)},ks.prototype._parse=function(y,I){(y===null||typeof y=="string"||typeof y=="boolean"||typeof y=="number")&&(y=["literal",y]);function U(Et,or,_r){return _r==="assert"?new xl(or,[Et]):_r==="coerce"?new Po(or,[Et]):Et}if(Array.isArray(y)){if(y.length===0)return this.error('Expected an array with at least one element. If you wanted a literal array, use ["literal", []].');var $=y[0];if(typeof $!="string")return this.error("Expression name must be a string, but found "+typeof $+' instead. If you wanted a literal array, use ["literal", [...]].',0),null;var ae=this.registry[$];if(ae){var he=ae.parse(y,this);if(!he)return null;if(this.expectedType){var Oe=this.expectedType,rt=he.type;if((Oe.kind==="string"||Oe.kind==="number"||Oe.kind==="boolean"||Oe.kind==="object"||Oe.kind==="array")&&rt.kind==="value")he=U(he,Oe,I.typeAnnotation||"assert");else if((Oe.kind==="color"||Oe.kind==="formatted"||Oe.kind==="resolvedImage")&&(rt.kind==="value"||rt.kind==="string"))he=U(he,Oe,I.typeAnnotation||"coerce");else if(this.checkSubtype(Oe,rt))return null}if(!(he instanceof Go)&&he.type.kind!=="resolvedImage"&&bc(he)){var gt=new Yo;try{he=new Go(he.type,he.evaluate(gt))}catch(Et){return this.error(Et.message),null}}return he}return this.error('Unknown expression "'+$+'". If you wanted a literal array, use ["literal", [...]].',0)}else return typeof y=="undefined"?this.error("'undefined' value invalid. Use null instead."):typeof y=="object"?this.error('Bare objects invalid. Use ["literal", {...}] instead.'):this.error("Expected an array, but found "+typeof y+" instead.")},ks.prototype.concat=function(y,I,U){var $=typeof y=="number"?this.path.concat(y):this.path,ae=U?this.scope.concat(U):this.scope;return new ks(this.registry,$,I||null,ae,this.errors)},ks.prototype.error=function(y){for(var I=[],U=arguments.length-1;U-- >0;)I[U]=arguments[U+1];var $=""+this.key+I.map(function(ae){return"["+ae+"]"}).join("");this.errors.push(new Ns($,y))},ks.prototype.checkSubtype=function(y,I){var U=oc(y,I);return U&&this.error(U),U};function bc(m){if(m instanceof Dc)return bc(m.boundExpression);if(m instanceof Pa&&m.name==="error")return!1;if(m instanceof Hu)return!1;if(m instanceof Mu)return!1;var y=m instanceof Po||m instanceof xl,I=!0;return m.eachChild(function(U){y?I=I&&bc(U):I=I&&U instanceof Go}),I?ih(m)&&Eu(m,["zoom","heatmap-density","line-progress","accumulated","is-supported-script"]):!1}function hu(m,y){for(var I=m.length-1,U=0,$=I,ae=0,he,Oe;U<=$;)if(ae=Math.floor((U+$)/2),he=m[ae],Oe=m[ae+1],he<=y){if(ae===I||y<Oe)return ae;U=ae+1}else if(he>y)$=ae-1;else throw new ps("Input is not a number.");return 0}var _u=function(y,I,U){this.type=y,this.input=I,this.labels=[],this.outputs=[];for(var $=0,ae=U;$<ae.length;$+=1){var he=ae[$],Oe=he[0],rt=he[1];this.labels.push(Oe),this.outputs.push(rt)}};_u.parse=function(y,I){if(y.length-1<4)return I.error("Expected at least 4 arguments, but found only "+(y.length-1)+".");if((y.length-1)%2!==0)return I.error("Expected an even number of arguments.");var U=I.parse(y[1],1,aa);if(!U)return null;var $=[],ae=null;I.expectedType&&I.expectedType.kind!=="value"&&(ae=I.expectedType);for(var he=1;he<y.length;he+=2){var Oe=he===1?-1/0:y[he],rt=y[he+1],gt=he,Et=he+1;if(typeof Oe!="number")return I.error('Input/output pairs for "step" expressions must be defined using literal numeric values (not computed expressions) for the input values.',gt);if($.length&&$[$.length-1][0]>=Oe)return I.error('Input/output pairs for "step" expressions must be arranged with input values in strictly ascending order.',gt);var or=I.parse(rt,Et,ae);if(!or)return null;ae=ae||or.type,$.push([Oe,or])}return new _u(ae,U,$)},_u.prototype.evaluate=function(y){var I=this.labels,U=this.outputs;if(I.length===1)return U[0].evaluate(y);var $=this.input.evaluate(y);if($<=I[0])return U[0].evaluate(y);var ae=I.length;if($>=I[ae-1])return U[ae-1].evaluate(y);var he=hu(I,$);return U[he].evaluate(y)},_u.prototype.eachChild=function(y){y(this.input);for(var I=0,U=this.outputs;I<U.length;I+=1){var $=U[I];y($)}},_u.prototype.outputDefined=function(){return this.outputs.every(function(y){return y.outputDefined()})},_u.prototype.serialize=function(){for(var y=["step",this.input.serialize()],I=0;I<this.labels.length;I++)I>0&&y.push(this.labels[I]),y.push(this.outputs[I].serialize());return y};function nl(m,y,I){return m*(1-I)+y*I}function nh(m,y,I){return new cs(nl(m.r,y.r,I),nl(m.g,y.g,I),nl(m.b,y.b,I),nl(m.a,y.a,I))}function Ah(m,y,I){return m.map(function(U,$){return nl(U,y[$],I)})}var zu=Object.freeze({__proto__:null,number:nl,color:nh,array:Ah}),Fc=.95047,wc=1,bd=1.08883,xf=4/29,Pf=6/29,Ou=3*Pf*Pf,bf=Pf*Pf*Pf,jl=Math.PI/180,lf=180/Math.PI;function Hh(m){return m>bf?Math.pow(m,1/3):m/Ou+xf}function If(m){return m>Pf?m*m*m:Ou*(m-xf)}function Cs(m){return 255*(m<=.0031308?12.92*m:1.055*Math.pow(m,1/2.4)-.055)}function du(m){return m/=255,m<=.04045?m/12.92:Math.pow((m+.055)/1.055,2.4)}function ku(m){var y=du(m.r),I=du(m.g),U=du(m.b),$=Hh((.4124564*y+.3575761*I+.1804375*U)/Fc),ae=Hh((.2126729*y+.7151522*I+.072175*U)/wc),he=Hh((.0193339*y+.119192*I+.9503041*U)/bd);return{l:116*ae-16,a:500*($-ae),b:200*(ae-he),alpha:m.a}}function Wf(m){var y=(m.l+16)/116,I=isNaN(m.a)?y:y+m.a/500,U=isNaN(m.b)?y:y-m.b/200;return y=wc*If(y),I=Fc*If(I),U=bd*If(U),new cs(Cs(3.2404542*I-1.5371385*y-.4985314*U),Cs(-.969266*I+1.8760108*y+.041556*U),Cs(.0556434*I-.2040259*y+1.0572252*U),m.alpha)}function Us(m,y,I){return{l:nl(m.l,y.l,I),a:nl(m.a,y.a,I),b:nl(m.b,y.b,I),alpha:nl(m.alpha,y.alpha,I)}}function wf(m){var y=ku(m),I=y.l,U=y.a,$=y.b,ae=Math.atan2($,U)*lf;return{h:ae<0?ae+360:ae,c:Math.sqrt(U*U+$*$),l:I,alpha:m.a}}function zc(m){var y=m.h*jl,I=m.c,U=m.l;return Wf({l:U,a:Math.cos(y)*I,b:Math.sin(y)*I,alpha:m.alpha})}function Wu(m,y,I){var U=y-m;return m+I*(U>180||U<-180?U-360*Math.round(U/360):U)}function Rf(m,y,I){return{h:Wu(m.h,y.h,I),c:nl(m.c,y.c,I),l:nl(m.l,y.l,I),alpha:nl(m.alpha,y.alpha,I)}}var Xu={forward:ku,reverse:Wf,interpolate:Us},uf={forward:wf,reverse:zc,interpolate:Rf},Xf=Object.freeze({__proto__:null,lab:Xu,hcl:uf}),Wl=function(y,I,U,$,ae){this.type=y,this.operator=I,this.interpolation=U,this.input=$,this.labels=[],this.outputs=[];for(var he=0,Oe=ae;he<Oe.length;he+=1){var rt=Oe[he],gt=rt[0],Et=rt[1];this.labels.push(gt),this.outputs.push(Et)}};Wl.interpolationFactor=function(y,I,U,$){var ae=0;if(y.name==="exponential")ae=ah(I,y.base,U,$);else if(y.name==="linear")ae=ah(I,1,U,$);else if(y.name==="cubic-bezier"){var he=y.controlPoints,Oe=new s(he[0],he[1],he[2],he[3]);ae=Oe.solve(ah(I,1,U,$))}return ae},Wl.parse=function(y,I){var U=y[0],$=y[1],ae=y[2],he=y.slice(3);if(!Array.isArray($)||$.length===0)return I.error("Expected an interpolation type expression.",1);if($[0]==="linear")$={name:"linear"};else if($[0]==="exponential"){var Oe=$[1];if(typeof Oe!="number")return I.error("Exponential interpolation requires a numeric base.",1,1);$={name:"exponential",base:Oe}}else if($[0]==="cubic-bezier"){var rt=$.slice(1);if(rt.length!==4||rt.some(function(Ai){return typeof Ai!="number"||Ai<0||Ai>1}))return I.error("Cubic bezier interpolation requires four numeric arguments with values between 0 and 1.",1);$={name:"cubic-bezier",controlPoints:rt}}else return I.error("Unknown interpolation type "+String($[0]),1,0);if(y.length-1<4)return I.error("Expected at least 4 arguments, but found only "+(y.length-1)+".");if((y.length-1)%2!==0)return I.error("Expected an even number of arguments.");if(ae=I.parse(ae,2,aa),!ae)return null;var gt=[],Et=null;U==="interpolate-hcl"||U==="interpolate-lab"?Et=ql:I.expectedType&&I.expectedType.kind!=="value"&&(Et=I.expectedType);for(var or=0;or<he.length;or+=2){var _r=he[or],pr=he[or+1],Fr=or+3,oi=or+4;if(typeof _r!="number")return I.error('Input/output pairs for "interpolate" expressions must be defined using literal numeric values (not computed expressions) for the input values.',Fr);if(gt.length&&gt[gt.length-1][0]>=_r)return I.error('Input/output pairs for "interpolate" expressions must be arranged with input values in strictly ascending order.',Fr);var Hi=I.parse(pr,oi,Et);if(!Hi)return null;Et=Et||Hi.type,gt.push([_r,Hi])}return Et.kind!=="number"&&Et.kind!=="color"&&!(Et.kind==="array"&&Et.itemType.kind==="number"&&typeof Et.N=="number")?I.error("Type "+Os(Et)+" is not interpolatable."):new Wl(Et,U,$,ae,gt)},Wl.prototype.evaluate=function(y){var I=this.labels,U=this.outputs;if(I.length===1)return U[0].evaluate(y);var $=this.input.evaluate(y);if($<=I[0])return U[0].evaluate(y);var ae=I.length;if($>=I[ae-1])return U[ae-1].evaluate(y);var he=hu(I,$),Oe=I[he],rt=I[he+1],gt=Wl.interpolationFactor(this.interpolation,$,Oe,rt),Et=U[he].evaluate(y),or=U[he+1].evaluate(y);return this.operator==="interpolate"?zu[this.type.kind.toLowerCase()](Et,or,gt):this.operator==="interpolate-hcl"?uf.reverse(uf.interpolate(uf.forward(Et),uf.forward(or),gt)):Xu.reverse(Xu.interpolate(Xu.forward(Et),Xu.forward(or),gt))},Wl.prototype.eachChild=function(y){y(this.input);for(var I=0,U=this.outputs;I<U.length;I+=1){var $=U[I];y($)}},Wl.prototype.outputDefined=function(){return this.outputs.every(function(y){return y.outputDefined()})},Wl.prototype.serialize=function(){var y;this.interpolation.name==="linear"?y=["linear"]:this.interpolation.name==="exponential"?this.interpolation.base===1?y=["linear"]:y=["exponential",this.interpolation.base]:y=["cubic-bezier"].concat(this.interpolation.controlPoints);for(var I=[this.operator,y,this.input.serialize()],U=0;U<this.labels.length;U++)I.push(this.labels[U],this.outputs[U].serialize());return I};function ah(m,y,I,U){var $=U-I,ae=m-I;return $===0?0:y===1?ae/$:(Math.pow(y,ae)-1)/(Math.pow(y,$)-1)}var Zu=function(y,I){this.type=y,this.args=I};Zu.parse=function(y,I){if(y.length<2)return I.error("Expectected at least one argument.");var U=null,$=I.expectedType;$&&$.kind!=="value"&&(U=$);for(var ae=[],he=0,Oe=y.slice(1);he<Oe.length;he+=1){var rt=Oe[he],gt=I.parse(rt,1+ae.length,U,void 0,{typeAnnotation:"omit"});if(!gt)return null;U=U||gt.type,ae.push(gt)}var Et=$&&ae.some(function(or){return oc($,or.type)});return Et?new Zu(Do,ae):new Zu(U,ae)},Zu.prototype.evaluate=function(y){for(var I=null,U=0,$,ae=0,he=this.args;ae<he.length;ae+=1){var Oe=he[ae];if(U++,I=Oe.evaluate(y),I&&I instanceof fl&&!I.available&&($||($=I.name),I=null,U===this.args.length&&(I=$)),I!==null)break}return I},Zu.prototype.eachChild=function(y){this.args.forEach(y)},Zu.prototype.outputDefined=function(){return this.args.every(function(y){return y.outputDefined()})},Zu.prototype.serialize=function(){var y=["coalesce"];return this.eachChild(function(I){y.push(I.serialize())}),y};var Oc=function(y,I){this.type=I.type,this.bindings=[].concat(y),this.result=I};Oc.prototype.evaluate=function(y){return this.result.evaluate(y)},Oc.prototype.eachChild=function(y){for(var I=0,U=this.bindings;I<U.length;I+=1){var $=U[I];y($[1])}y(this.result)},Oc.parse=function(y,I){if(y.length<4)return I.error("Expected at least 3 arguments, but found "+(y.length-1)+" instead.");for(var U=[],$=1;$<y.length-1;$+=2){var ae=y[$];if(typeof ae!="string")return I.error("Expected string, but found "+typeof ae+" instead.",$);if(/[^a-zA-Z0-9_]/.test(ae))return I.error("Variable names must contain only alphanumeric characters or '_'.",$);var he=I.parse(y[$+1],$+1);if(!he)return null;U.push([ae,he])}var Oe=I.parse(y[y.length-1],y.length-1,I.expectedType,U);return Oe?new Oc(U,Oe):null},Oc.prototype.outputDefined=function(){return this.result.outputDefined()},Oc.prototype.serialize=function(){for(var y=["let"],I=0,U=this.bindings;I<U.length;I+=1){var $=U[I],ae=$[0],he=$[1];y.push(ae,he.serialize())}return y.push(this.result.serialize()),y};var Tc=function(y,I,U){this.type=y,this.index=I,this.input=U};Tc.parse=function(y,I){if(y.length!==3)return I.error("Expected 2 arguments, but found "+(y.length-1)+" instead.");var U=I.parse(y[1],1,aa),$=I.parse(y[2],2,Kl(I.expectedType||Do));if(!U||!$)return null;var ae=$.type;return new Tc(ae.itemType,U,$)},Tc.prototype.evaluate=function(y){var I=this.index.evaluate(y),U=this.input.evaluate(y);if(I<0)throw new ps("Array index out of bounds: "+I+" < 0.");if(I>=U.length)throw new ps("Array index out of bounds: "+I+" > "+(U.length-1)+".");if(I!==Math.floor(I))throw new ps("Array index must be an integer, but found "+I+" instead.");return U[I]},Tc.prototype.eachChild=function(y){y(this.index),y(this.input)},Tc.prototype.outputDefined=function(){return!1},Tc.prototype.serialize=function(){return["at",this.index.serialize(),this.input.serialize()]};var wl=function(y,I){this.type=qo,this.needle=y,this.haystack=I};wl.parse=function(y,I){if(y.length!==3)return I.error("Expected 2 arguments, but found "+(y.length-1)+" instead.");var U=I.parse(y[1],1,Do),$=I.parse(y[2],2,Do);return!U||!$?null:Cf(U.type,[qo,Oo,aa,ac,Do])?new wl(U,$):I.error("Expected first argument to be of type boolean, string, number or null, but found "+Os(U.type)+" instead")},wl.prototype.evaluate=function(y){var I=this.needle.evaluate(y),U=this.haystack.evaluate(y);if(!U)return!1;if(!sc(I,["boolean","string","number","null"]))throw new ps("Expected first argument to be of type boolean, string, number or null, but found "+Os(Es(I))+" instead.");if(!sc(U,["string","array"]))throw new ps("Expected second argument to be of type array or string, but found "+Os(Es(U))+" instead.");return U.indexOf(I)>=0},wl.prototype.eachChild=function(y){y(this.needle),y(this.haystack)},wl.prototype.outputDefined=function(){return!0},wl.prototype.serialize=function(){return["in",this.needle.serialize(),this.haystack.serialize()]};var vu=function(y,I,U){this.type=aa,this.needle=y,this.haystack=I,this.fromIndex=U};vu.parse=function(y,I){if(y.length<=2||y.length>=5)return I.error("Expected 3 or 4 arguments, but found "+(y.length-1)+" instead.");var U=I.parse(y[1],1,Do),$=I.parse(y[2],2,Do);if(!U||!$)return null;if(!Cf(U.type,[qo,Oo,aa,ac,Do]))return I.error("Expected first argument to be of type boolean, string, number or null, but found "+Os(U.type)+" instead");if(y.length===4){var ae=I.parse(y[3],3,aa);return ae?new vu(U,$,ae):null}else return new vu(U,$)},vu.prototype.evaluate=function(y){var I=this.needle.evaluate(y),U=this.haystack.evaluate(y);if(!sc(I,["boolean","string","number","null"]))throw new ps("Expected first argument to be of type boolean, string, number or null, but found "+Os(Es(I))+" instead.");if(!sc(U,["string","array"]))throw new ps("Expected second argument to be of type array or string, but found "+Os(Es(U))+" instead.");if(this.fromIndex){var $=this.fromIndex.evaluate(y);return U.indexOf(I,$)}return U.indexOf(I)},vu.prototype.eachChild=function(y){y(this.needle),y(this.haystack),this.fromIndex&&y(this.fromIndex)},vu.prototype.outputDefined=function(){return!1},vu.prototype.serialize=function(){if(this.fromIndex!=null&&this.fromIndex!==void 0){var y=this.fromIndex.serialize();return["index-of",this.needle.serialize(),this.haystack.serialize(),y]}return["index-of",this.needle.serialize(),this.haystack.serialize()]};var qc=function(y,I,U,$,ae,he){this.inputType=y,this.type=I,this.input=U,this.cases=$,this.outputs=ae,this.otherwise=he};qc.parse=function(y,I){if(y.length<5)return I.error("Expected at least 4 arguments, but found only "+(y.length-1)+".");if(y.length%2!==1)return I.error("Expected an even number of arguments.");var U,$;I.expectedType&&I.expectedType.kind!=="value"&&($=I.expectedType);for(var ae={},he=[],Oe=2;Oe<y.length-1;Oe+=2){var rt=y[Oe],gt=y[Oe+1];Array.isArray(rt)||(rt=[rt]);var Et=I.concat(Oe);if(rt.length===0)return Et.error("Expected at least one branch label.");for(var or=0,_r=rt;or<_r.length;or+=1){var pr=_r[or];if(typeof pr!="number"&&typeof pr!="string")return Et.error("Branch labels must be numbers or strings.");if(typeof pr=="number"&&Math.abs(pr)>Number.MAX_SAFE_INTEGER)return Et.error("Branch labels must be integers no larger than "+Number.MAX_SAFE_INTEGER+".");if(typeof pr=="number"&&Math.floor(pr)!==pr)return Et.error("Numeric branch labels must be integer values.");if(!U)U=Es(pr);else if(Et.checkSubtype(U,Es(pr)))return null;if(typeof ae[String(pr)]!="undefined")return Et.error("Branch labels must be unique.");ae[String(pr)]=he.length}var Fr=I.parse(gt,Oe,$);if(!Fr)return null;$=$||Fr.type,he.push(Fr)}var oi=I.parse(y[1],1,Do);if(!oi)return null;var Hi=I.parse(y[y.length-1],y.length-1,$);return!Hi||oi.type.kind!=="value"&&I.concat(1).checkSubtype(U,oi.type)?null:new qc(U,$,oi,ae,he,Hi)},qc.prototype.evaluate=function(y){var I=this.input.evaluate(y),U=Es(I)===this.inputType&&this.outputs[this.cases[I]]||this.otherwise;return U.evaluate(y)},qc.prototype.eachChild=function(y){y(this.input),this.outputs.forEach(y),y(this.otherwise)},qc.prototype.outputDefined=function(){return this.outputs.every(function(y){return y.outputDefined()})&&this.otherwise.outputDefined()},qc.prototype.serialize=function(){for(var y=this,I=["match",this.input.serialize()],U=Object.keys(this.cases).sort(),$=[],ae={},he=0,Oe=U;he<Oe.length;he+=1){var rt=Oe[he],gt=ae[this.cases[rt]];gt===void 0?(ae[this.cases[rt]]=$.length,$.push([this.cases[rt],[rt]])):$[gt][1].push(rt)}for(var Et=function(oi){return y.inputType.kind==="number"?Number(oi):oi},or=0,_r=$;or<_r.length;or+=1){var pr=_r[or],gt=pr[0],Fr=pr[1];Fr.length===1?I.push(Et(Fr[0])):I.push(Fr.map(Et)),I.push(this.outputs[outputIndex$1].serialize())}return I.push(this.otherwise.serialize()),I};var cf=function(y,I,U){this.type=y,this.branches=I,this.otherwise=U};cf.parse=function(y,I){if(y.length<4)return I.error("Expected at least 3 arguments, but found only "+(y.length-1)+".");if(y.length%2!==0)return I.error("Expected an odd number of arguments.");var U;I.expectedType&&I.expectedType.kind!=="value"&&(U=I.expectedType);for(var $=[],ae=1;ae<y.length-1;ae+=2){var he=I.parse(y[ae],ae,qo);if(!he)return null;var Oe=I.parse(y[ae+1],ae+1,U);if(!Oe)return null;$.push([he,Oe]),U=U||Oe.type}var rt=I.parse(y[y.length-1],y.length-1,U);return rt?new cf(U,$,rt):null},cf.prototype.evaluate=function(y){for(var I=0,U=this.branches;I<U.length;I+=1){var $=U[I],ae=$[0],he=$[1];if(ae.evaluate(y))return he.evaluate(y)}return this.otherwise.evaluate(y)},cf.prototype.eachChild=function(y){for(var I=0,U=this.branches;I<U.length;I+=1){var $=U[I],ae=$[0],he=$[1];y(ae),y(he)}y(this.otherwise)},cf.prototype.outputDefined=function(){return this.branches.every(function(y){var I=y[0],U=y[1];return U.outputDefined()})&&this.otherwise.outputDefined()},cf.prototype.serialize=function(){var y=["case"];return this.eachChild(function(I){y.push(I.serialize())}),y};var fc=function(y,I,U,$){this.type=y,this.input=I,this.beginIndex=U,this.endIndex=$};fc.parse=function(y,I){if(y.length<=2||y.length>=5)return I.error("Expected 3 or 4 arguments, but found "+(y.length-1)+" instead.");var U=I.parse(y[1],1,Do),$=I.parse(y[2],2,aa);if(!U||!$)return null;if(!Cf(U.type,[Kl(Do),Oo,Do]))return I.error("Expected first argument to be of type array or string, but found "+Os(U.type)+" instead");if(y.length===4){var ae=I.parse(y[3],3,aa);return ae?new fc(U.type,U,$,ae):null}else return new fc(U.type,U,$)},fc.prototype.evaluate=function(y){var I=this.input.evaluate(y),U=this.beginIndex.evaluate(y);if(!sc(I,["string","array"]))throw new ps("Expected first argument to be of type array or string, but found "+Os(Es(I))+" instead.");if(this.endIndex){var $=this.endIndex.evaluate(y);return I.slice(U,$)}return I.slice(U)},fc.prototype.eachChild=function(y){y(this.input),y(this.beginIndex),this.endIndex&&y(this.endIndex)},fc.prototype.outputDefined=function(){return!1},fc.prototype.serialize=function(){if(this.endIndex!=null&&this.endIndex!==void 0){var y=this.endIndex.serialize();return["slice",this.input.serialize(),this.beginIndex.serialize(),y]}return["slice",this.input.serialize(),this.beginIndex.serialize()]};function Bc(m,y){return m==="=="||m==="!="?y.kind==="boolean"||y.kind==="string"||y.kind==="number"||y.kind==="null"||y.kind==="value":y.kind==="string"||y.kind==="number"||y.kind==="value"}function At(m,y,I){return y===I}function Wt(m,y,I){return y!==I}function Cr(m,y,I){return y<I}function Ar(m,y,I){return y>I}function Kr(m,y,I){return y<=I}function ki(m,y,I){return y>=I}function Xi(m,y,I,U){return U.compare(y,I)===0}function dn(m,y,I,U){return!Xi(m,y,I,U)}function wn(m,y,I,U){return U.compare(y,I)<0}function Nn(m,y,I,U){return U.compare(y,I)>0}function Yi(m,y,I,U){return U.compare(y,I)<=0}function Qi(m,y,I,U){return U.compare(y,I)>=0}function on(m,y,I){var U=m!=="=="&&m!=="!=";return function(){function $(ae,he,Oe){this.type=qo,this.lhs=ae,this.rhs=he,this.collator=Oe,this.hasUntypedArgument=ae.type.kind==="value"||he.type.kind==="value"}return $.parse=function(he,Oe){if(he.length!==3&&he.length!==4)return Oe.error("Expected two or three arguments.");var rt=he[0],gt=Oe.parse(he[1],1,Do);if(!gt)return null;if(!Bc(rt,gt.type))return Oe.concat(1).error('"'+rt+`" comparisons are not supported for type '`+Os(gt.type)+"'.");var Et=Oe.parse(he[2],2,Do);if(!Et)return null;if(!Bc(rt,Et.type))return Oe.concat(2).error('"'+rt+`" comparisons are not supported for type '`+Os(Et.type)+"'.");if(gt.type.kind!==Et.type.kind&&gt.type.kind!=="value"&&Et.type.kind!=="value")return Oe.error("Cannot compare types '"+Os(gt.type)+"' and '"+Os(Et.type)+"'.");U&&(gt.type.kind==="value"&&Et.type.kind!=="value"?gt=new xl(Et.type,[gt]):gt.type.kind!=="value"&&Et.type.kind==="value"&&(Et=new xl(gt.type,[Et])));var or=null;if(he.length===4){if(gt.type.kind!=="string"&&Et.type.kind!=="string"&&gt.type.kind!=="value"&&Et.type.kind!=="value")return Oe.error("Cannot use collator to compare non-string types.");if(or=Oe.parse(he[3],3,Uf),!or)return null}return new $(gt,Et,or)},$.prototype.evaluate=function(he){var Oe=this.lhs.evaluate(he),rt=this.rhs.evaluate(he);if(U&&this.hasUntypedArgument){var gt=Es(Oe),Et=Es(rt);if(gt.kind!==Et.kind||!(gt.kind==="string"||gt.kind==="number"))throw new ps('Expected arguments for "'+m+'" to be (string, string) or (number, number), but found ('+gt.kind+", "+Et.kind+") instead.")}if(this.collator&&!U&&this.hasUntypedArgument){var or=Es(Oe),_r=Es(rt);if(or.kind!=="string"||_r.kind!=="string")return y(he,Oe,rt)}return this.collator?I(he,Oe,rt,this.collator.evaluate(he)):y(he,Oe,rt)},$.prototype.eachChild=function(he){he(this.lhs),he(this.rhs),this.collator&&he(this.collator)},$.prototype.outputDefined=function(){return!0},$.prototype.serialize=function(){var he=[m];return this.eachChild(function(Oe){he.push(Oe.serialize())}),he},$}()}var Fi=on("==",At,Xi),$n=on("!=",Wt,dn),Ca=on("<",Cr,wn),Ra=on(">",Ar,Nn),La=on("<=",Kr,Yi),Na=on(">=",ki,Qi),Yn=function(y,I,U,$,ae){this.type=Oo,this.number=y,this.locale=I,this.currency=U,this.minFractionDigits=$,this.maxFractionDigits=ae};Yn.parse=function(y,I){if(y.length!==3)return I.error("Expected two arguments.");var U=I.parse(y[1],1,aa);if(!U)return null;var $=y[2];if(typeof $!="object"||Array.isArray($))return I.error("NumberFormat options argument must be an object.");var ae=null;if($.locale&&(ae=I.parse($.locale,1,Oo),!ae))return null;var he=null;if($.currency&&(he=I.parse($.currency,1,Oo),!he))return null;var Oe=null;if($["min-fraction-digits"]&&(Oe=I.parse($["min-fraction-digits"],1,aa),!Oe))return null;var rt=null;return $["max-fraction-digits"]&&(rt=I.parse($["max-fraction-digits"],1,aa),!rt)?null:new Yn(U,ae,he,Oe,rt)},Yn.prototype.evaluate=function(y){return new Intl.NumberFormat(this.locale?this.locale.evaluate(y):[],{style:this.currency?"currency":"decimal",currency:this.currency?this.currency.evaluate(y):void 0,minimumFractionDigits:this.minFractionDigits?this.minFractionDigits.evaluate(y):void 0,maximumFractionDigits:this.maxFractionDigits?this.maxFractionDigits.evaluate(y):void 0}).format(this.number.evaluate(y))},Yn.prototype.eachChild=function(y){y(this.number),this.locale&&y(this.locale),this.currency&&y(this.currency),this.minFractionDigits&&y(this.minFractionDigits),this.maxFractionDigits&&y(this.maxFractionDigits)},Yn.prototype.outputDefined=function(){return!1},Yn.prototype.serialize=function(){var y={};return this.locale&&(y.locale=this.locale.serialize()),this.currency&&(y.currency=this.currency.serialize()),this.minFractionDigits&&(y["min-fraction-digits"]=this.minFractionDigits.serialize()),this.maxFractionDigits&&(y["max-fraction-digits"]=this.maxFractionDigits.serialize()),["number-format",this.number.serialize(),y]};var Dn=function(y){this.type=aa,this.input=y};Dn.parse=function(y,I){if(y.length!==2)return I.error("Expected 1 argument, but found "+(y.length-1)+" instead.");var U=I.parse(y[1],1);return U?U.type.kind!=="array"&&U.type.kind!=="string"&&U.type.kind!=="value"?I.error("Expected argument of type string or array, but found "+Os(U.type)+" instead."):new Dn(U):null},Dn.prototype.evaluate=function(y){var I=this.input.evaluate(y);if(typeof I=="string")return I.length;if(Array.isArray(I))return I.length;throw new ps("Expected value to be of type string or array, but found "+Os(Es(I))+" instead.")},Dn.prototype.eachChild=function(y){y(this.input)},Dn.prototype.outputDefined=function(){return!1},Dn.prototype.serialize=function(){var y=["length"];return this.eachChild(function(I){y.push(I.serialize())}),y};var Ka={"==":Fi,"!=":$n,">":Ra,"<":Ca,">=":Na,"<=":La,array:xl,at:Tc,boolean:xl,case:cf,coalesce:Zu,collator:Hu,format:Gu,image:qs,in:wl,"index-of":vu,interpolate:Wl,"interpolate-hcl":Wl,"interpolate-lab":Wl,length:Dn,let:Oc,literal:Go,match:qc,number:xl,"number-format":Yn,object:xl,slice:fc,step:_u,string:xl,"to-boolean":Po,"to-color":Po,"to-number":Po,"to-string":Po,var:Dc,within:Mu};function bo(m,y){var I=y[0],U=y[1],$=y[2],ae=y[3];I=I.evaluate(m),U=U.evaluate(m),$=$.evaluate(m);var he=ae?ae.evaluate(m):1,Oe=lc(I,U,$,he);if(Oe)throw new ps(Oe);return new cs(I/255*he,U/255*he,$/255*he,he)}function Xo(m,y){return m in y}function Ss(m,y){var I=y[m];return typeof I=="undefined"?null:I}function as(m,y,I,U){for(;I<=U;){var $=I+U>>1;if(y[$]===m)return!0;y[$]>m?U=$-1:I=$+1}return!1}function ws(m){return{type:m}}Pa.register(Ka,{error:[rf,[Oo],function(m,y){var I=y[0];throw new ps(I.evaluate(m))}],typeof:[Oo,[Do],function(m,y){var I=y[0];return Os(Es(I.evaluate(m)))}],"to-rgba":[Kl(aa,4),[ql],function(m,y){var I=y[0];return I.evaluate(m).toArray()}],rgb:[ql,[aa,aa,aa],bo],rgba:[ql,[aa,aa,aa,aa],bo],has:{type:qo,overloads:[[[Oo],function(m,y){var I=y[0];return Xo(I.evaluate(m),m.properties())}],[[Oo,Pc],function(m,y){var I=y[0],U=y[1];return Xo(I.evaluate(m),U.evaluate(m))}]]},get:{type:Do,overloads:[[[Oo],function(m,y){var I=y[0];return Ss(I.evaluate(m),m.properties())}],[[Oo,Pc],function(m,y){var I=y[0],U=y[1];return Ss(I.evaluate(m),U.evaluate(m))}]]},"feature-state":[Do,[Oo],function(m,y){var I=y[0];return Ss(I.evaluate(m),m.featureState||{})}],properties:[Pc,[],function(m){return m.properties()}],"geometry-type":[Oo,[],function(m){return m.geometryType()}],id:[Do,[],function(m){return m.id()}],zoom:[aa,[],function(m){return m.globals.zoom}],"heatmap-density":[aa,[],function(m){return m.globals.heatmapDensity||0}],"line-progress":[aa,[],function(m){return m.globals.lineProgress||0}],accumulated:[Do,[],function(m){return m.globals.accumulated===void 0?null:m.globals.accumulated}],"+":[aa,ws(aa),function(m,y){for(var I=0,U=0,$=y;U<$.length;U+=1){var ae=$[U];I+=ae.evaluate(m)}return I}],"*":[aa,ws(aa),function(m,y){for(var I=1,U=0,$=y;U<$.length;U+=1){var ae=$[U];I*=ae.evaluate(m)}return I}],"-":{type:aa,overloads:[[[aa,aa],function(m,y){var I=y[0],U=y[1];return I.evaluate(m)-U.evaluate(m)}],[[aa],function(m,y){var I=y[0];return-I.evaluate(m)}]]},"/":[aa,[aa,aa],function(m,y){var I=y[0],U=y[1];return I.evaluate(m)/U.evaluate(m)}],"%":[aa,[aa,aa],function(m,y){var I=y[0],U=y[1];return I.evaluate(m)%U.evaluate(m)}],ln2:[aa,[],function(){return Math.LN2}],pi:[aa,[],function(){return Math.PI}],e:[aa,[],function(){return Math.E}],"^":[aa,[aa,aa],function(m,y){var I=y[0],U=y[1];return Math.pow(I.evaluate(m),U.evaluate(m))}],sqrt:[aa,[aa],function(m,y){var I=y[0];return Math.sqrt(I.evaluate(m))}],log10:[aa,[aa],function(m,y){var I=y[0];return Math.log(I.evaluate(m))/Math.LN10}],ln:[aa,[aa],function(m,y){var I=y[0];return Math.log(I.evaluate(m))}],log2:[aa,[aa],function(m,y){var I=y[0];return Math.log(I.evaluate(m))/Math.LN2}],sin:[aa,[aa],function(m,y){var I=y[0];return Math.sin(I.evaluate(m))}],cos:[aa,[aa],function(m,y){var I=y[0];return Math.cos(I.evaluate(m))}],tan:[aa,[aa],function(m,y){var I=y[0];return Math.tan(I.evaluate(m))}],asin:[aa,[aa],function(m,y){var I=y[0];return Math.asin(I.evaluate(m))}],acos:[aa,[aa],function(m,y){var I=y[0];return Math.acos(I.evaluate(m))}],atan:[aa,[aa],function(m,y){var I=y[0];return Math.atan(I.evaluate(m))}],min:[aa,ws(aa),function(m,y){return Math.min.apply(Math,y.map(function(I){return I.evaluate(m)}))}],max:[aa,ws(aa),function(m,y){return Math.max.apply(Math,y.map(function(I){return I.evaluate(m)}))}],abs:[aa,[aa],function(m,y){var I=y[0];return Math.abs(I.evaluate(m))}],round:[aa,[aa],function(m,y){var I=y[0],U=I.evaluate(m);return U<0?-Math.round(-U):Math.round(U)}],floor:[aa,[aa],function(m,y){var I=y[0];return Math.floor(I.evaluate(m))}],ceil:[aa,[aa],function(m,y){var I=y[0];return Math.ceil(I.evaluate(m))}],"filter-==":[qo,[Oo,Do],function(m,y){var I=y[0],U=y[1];return m.properties()[I.value]===U.value}],"filter-id-==":[qo,[Do],function(m,y){var I=y[0];return m.id()===I.value}],"filter-type-==":[qo,[Oo],function(m,y){var I=y[0];return m.geometryType()===I.value}],"filter-<":[qo,[Oo,Do],function(m,y){var I=y[0],U=y[1],$=m.properties()[I.value],ae=U.value;return typeof $==typeof ae&&$<ae}],"filter-id-<":[qo,[Do],function(m,y){var I=y[0],U=m.id(),$=I.value;return typeof U==typeof $&&U<$}],"filter->":[qo,[Oo,Do],function(m,y){var I=y[0],U=y[1],$=m.properties()[I.value],ae=U.value;return typeof $==typeof ae&&$>ae}],"filter-id->":[qo,[Do],function(m,y){var I=y[0],U=m.id(),$=I.value;return typeof U==typeof $&&U>$}],"filter-<=":[qo,[Oo,Do],function(m,y){var I=y[0],U=y[1],$=m.properties()[I.value],ae=U.value;return typeof $==typeof ae&&$<=ae}],"filter-id-<=":[qo,[Do],function(m,y){var I=y[0],U=m.id(),$=I.value;return typeof U==typeof $&&U<=$}],"filter->=":[qo,[Oo,Do],function(m,y){var I=y[0],U=y[1],$=m.properties()[I.value],ae=U.value;return typeof $==typeof ae&&$>=ae}],"filter-id->=":[qo,[Do],function(m,y){var I=y[0],U=m.id(),$=I.value;return typeof U==typeof $&&U>=$}],"filter-has":[qo,[Do],function(m,y){var I=y[0];return I.value in m.properties()}],"filter-has-id":[qo,[],function(m){return m.id()!==null&&m.id()!==void 0}],"filter-type-in":[qo,[Kl(Oo)],function(m,y){var I=y[0];return I.value.indexOf(m.geometryType())>=0}],"filter-id-in":[qo,[Kl(Do)],function(m,y){var I=y[0];return I.value.indexOf(m.id())>=0}],"filter-in-small":[qo,[Oo,Kl(Do)],function(m,y){var I=y[0],U=y[1];return U.value.indexOf(m.properties()[I.value])>=0}],"filter-in-large":[qo,[Oo,Kl(Do)],function(m,y){var I=y[0],U=y[1];return as(m.properties()[I.value],U.value,0,U.value.length-1)}],all:{type:qo,overloads:[[[qo,qo],function(m,y){var I=y[0],U=y[1];return I.evaluate(m)&&U.evaluate(m)}],[ws(qo),function(m,y){for(var I=0,U=y;I<U.length;I+=1){var $=U[I];if(!$.evaluate(m))return!1}return!0}]]},any:{type:qo,overloads:[[[qo,qo],function(m,y){var I=y[0],U=y[1];return I.evaluate(m)||U.evaluate(m)}],[ws(qo),function(m,y){for(var I=0,U=y;I<U.length;I+=1){var $=U[I];if($.evaluate(m))return!0}return!1}]]},"!":[qo,[qo],function(m,y){var I=y[0];return!I.evaluate(m)}],"is-supported-script":[qo,[Oo],function(m,y){var I=y[0],U=m.globals&&m.globals.isSupportedScript;return U?U(I.evaluate(m)):!0}],upcase:[Oo,[Oo],function(m,y){var I=y[0];return I.evaluate(m).toUpperCase()}],downcase:[Oo,[Oo],function(m,y){var I=y[0];return I.evaluate(m).toLowerCase()}],concat:[Oo,ws(Do),function(m,y){return y.map(function(I){return Hs(I.evaluate(m))}).join("")}],"resolved-locale":[Oo,[Uf],function(m,y){var I=y[0];return I.evaluate(m).resolvedLocale()}]});function Ho(m){return{result:"success",value:m}}function ml(m){return{result:"error",value:m}}function Ws(m){return m["property-type"]==="data-driven"||m["property-type"]==="cross-faded-data-driven"}function Ls(m){return!!m.expression&&m.expression.parameters.indexOf("zoom")>-1}function va(m){return!!m.expression&&m.expression.interpolated}function no(m){return m instanceof Number?"number":m instanceof String?"string":m instanceof Boolean?"boolean":Array.isArray(m)?"array":m===null?"null":typeof m}function ys(m){return typeof m=="object"&&m!==null&&!Array.isArray(m)}function rs(m){return m}function $l(m,y){var I=y.type==="color",U=m.stops&&typeof m.stops[0][0]=="object",$=U||m.property!==void 0,ae=U||!$,he=m.type||(va(y)?"exponential":"interval");if(I&&(m=Ol({},m),m.stops&&(m.stops=m.stops.map(function(ha){return[ha[0],cs.parse(ha[1])]})),m.default?m.default=cs.parse(m.default):m.default=cs.parse(y.default)),m.colorSpace&&m.colorSpace!=="rgb"&&!Xf[m.colorSpace])throw new Error("Unknown color space: "+m.colorSpace);var Oe,rt,gt;if(he==="exponential")Oe=pu;else if(he==="interval")Oe=Nc;else if(he==="categorical"){Oe=Yu,rt=Object.create(null);for(var Et=0,or=m.stops;Et<or.length;Et+=1){var _r=or[Et];rt[_r[0]]=_r[1]}gt=typeof m.stops[0][0]}else if(he==="identity")Oe=Uc;else throw new Error('Unknown function type "'+he+'"');if(U){for(var pr={},Fr=[],oi=0;oi<m.stops.length;oi++){var Hi=m.stops[oi],Ai=Hi[0].zoom;pr[Ai]===void 0&&(pr[Ai]={zoom:Ai,type:m.type,property:m.property,default:m.default,stops:[]},Fr.push(Ai)),pr[Ai].stops.push([Hi[0].value,Hi[1]])}for(var bn=[],nn=0,xn=Fr;nn<xn.length;nn+=1){var Pn=xn[nn];bn.push([pr[Pn].zoom,$l(pr[Pn],y)])}var Zn={name:"linear"};return{kind:"composite",interpolationType:Zn,interpolationFactor:Wl.interpolationFactor.bind(void 0,Zn),zoomStops:bn.map(function(ha){return ha[0]}),evaluate:function(eo,za){var Za=eo.zoom;return pu({stops:bn,base:m.base},y,Za).evaluate(Za,za)}}}else if(ae){var ga=he==="exponential"?{name:"exponential",base:m.base!==void 0?m.base:1}:null;return{kind:"camera",interpolationType:ga,interpolationFactor:Wl.interpolationFactor.bind(void 0,ga),zoomStops:m.stops.map(function(ha){return ha[0]}),evaluate:function(ha){var eo=ha.zoom;return Oe(m,y,eo,rt,gt)}}}else return{kind:"source",evaluate:function(eo,za){var Za=za&&za.properties?za.properties[m.property]:void 0;return Za===void 0?Cu(m.default,y.default):Oe(m,y,Za,rt,gt)}}}function Cu(m,y,I){if(m!==void 0)return m;if(y!==void 0)return y;if(I!==void 0)return I}function Yu(m,y,I,U,$){var ae=typeof I===$?U[I]:void 0;return Cu(ae,m.default,y.default)}function Nc(m,y,I){if(no(I)!=="number")return Cu(m.default,y.default);var U=m.stops.length;if(U===1||I<=m.stops[0][0])return m.stops[0][1];if(I>=m.stops[U-1][0])return m.stops[U-1][1];var $=hu(m.stops.map(function(ae){return ae[0]}),I);return m.stops[$][1]}function pu(m,y,I){var U=m.base!==void 0?m.base:1;if(no(I)!=="number")return Cu(m.default,y.default);var $=m.stops.length;if($===1||I<=m.stops[0][0])return m.stops[0][1];if(I>=m.stops[$-1][0])return m.stops[$-1][1];var ae=hu(m.stops.map(function(or){return or[0]}),I),he=xu(I,U,m.stops[ae][0],m.stops[ae+1][0]),Oe=m.stops[ae][1],rt=m.stops[ae+1][1],gt=zu[y.type]||rs;if(m.colorSpace&&m.colorSpace!=="rgb"){var Et=Xf[m.colorSpace];gt=function(or,_r){return Et.reverse(Et.interpolate(Et.forward(or),Et.forward(_r),he))}}return typeof Oe.evaluate=="function"?{evaluate:function(){for(var _r=[],pr=arguments.length;pr--;)_r[pr]=arguments[pr];var Fr=Oe.evaluate.apply(void 0,_r),oi=rt.evaluate.apply(void 0,_r);if(!(Fr===void 0||oi===void 0))return gt(Fr,oi,he)}}:gt(Oe,rt,he)}function Uc(m,y,I){return y.type==="color"?I=cs.parse(I):y.type==="formatted"?I=Jl.fromString(I.toString()):y.type==="resolvedImage"?I=fl.fromString(I.toString()):no(I)!==y.type&&(y.type!=="enum"||!y.values[I])&&(I=void 0),Cu(I,m.default,y.default)}function xu(m,y,I,U){var $=U-I,ae=m-I;return $===0?0:y===1?ae/$:(Math.pow(y,ae)-1)/(Math.pow(y,$)-1)}var Ac=function(y,I){this.expression=y,this._warningHistory={},this._evaluator=new Yo,this._defaultValue=I?ee(I):null,this._enumValues=I&&I.type==="enum"?I.values:null};Ac.prototype.evaluateWithoutErrorHandling=function(y,I,U,$,ae,he){return this._evaluator.globals=y,this._evaluator.feature=I,this._evaluator.featureState=U,this._evaluator.canonical=$,this._evaluator.availableImages=ae||null,this._evaluator.formattedSection=he,this.expression.evaluate(this._evaluator)},Ac.prototype.evaluate=function(y,I,U,$,ae,he){this._evaluator.globals=y,this._evaluator.feature=I||null,this._evaluator.featureState=U||null,this._evaluator.canonical=$,this._evaluator.availableImages=ae||null,this._evaluator.formattedSection=he||null;try{var Oe=this.expression.evaluate(this._evaluator);if(Oe==null||typeof Oe=="number"&&Oe!==Oe)return this._defaultValue;if(this._enumValues&&!(Oe in this._enumValues))throw new ps("Expected value to be one of "+Object.keys(this._enumValues).map(function(rt){return JSON.stringify(rt)}).join(", ")+", but found "+JSON.stringify(Oe)+" instead.");return Oe}catch(rt){return this._warningHistory[rt.message]||(this._warningHistory[rt.message]=!0,typeof console!="undefined"&&console.warn(rt.message)),this._defaultValue}};function Ua(m){return Array.isArray(m)&&m.length>0&&typeof m[0]=="string"&&m[0]in Ka}function oo(m,y){var I=new ks(Ka,[],y?Q(y):void 0),U=I.parse(m,void 0,void 0,void 0,y&&y.type==="string"?{typeAnnotation:"coerce"}:void 0);return U?Ho(new Ac(U,y)):ml(I.errors)}var Vc=function(y,I){this.kind=y,this._styleExpression=I,this.isStateDependent=y!=="constant"&&!js(I.expression)};Vc.prototype.evaluateWithoutErrorHandling=function(y,I,U,$,ae,he){return this._styleExpression.evaluateWithoutErrorHandling(y,I,U,$,ae,he)},Vc.prototype.evaluate=function(y,I,U,$,ae,he){return this._styleExpression.evaluate(y,I,U,$,ae,he)};var hc=function(y,I,U,$){this.kind=y,this.zoomStops=U,this._styleExpression=I,this.isStateDependent=y!=="camera"&&!js(I.expression),this.interpolationType=$};hc.prototype.evaluateWithoutErrorHandling=function(y,I,U,$,ae,he){return this._styleExpression.evaluateWithoutErrorHandling(y,I,U,$,ae,he)},hc.prototype.evaluate=function(y,I,U,$,ae,he){return this._styleExpression.evaluate(y,I,U,$,ae,he)},hc.prototype.interpolationFactor=function(y,I,U){return this.interpolationType?Wl.interpolationFactor(this.interpolationType,y,I,U):0};function Ku(m,y){if(m=oo(m,y),m.result==="error")return m;var I=m.value.expression,U=ih(I);if(!U&&!Ws(y))return ml([new Ns("","data expressions not supported")]);var $=Eu(I,["zoom"]);if(!$&&!Ls(y))return ml([new Ns("","zoom expressions not supported")]);var ae=B(I);if(!ae&&!$)return ml([new Ns("",'"zoom" expression may only be used as input to a top-level "step" or "interpolate" expression.')]);if(ae instanceof Ns)return ml([ae]);if(ae instanceof Wl&&!va(y))return ml([new Ns("",'"interpolate" expressions cannot be used with this property')]);if(!ae)return Ho(U?new Vc("constant",m.value):new Vc("source",m.value));var he=ae instanceof Wl?ae.interpolation:void 0;return Ho(U?new hc("camera",m.value,ae.labels,he):new hc("composite",m.value,ae.labels,he))}var ue=function(y,I){this._parameters=y,this._specification=I,Ol(this,$l(this._parameters,this._specification))};ue.deserialize=function(y){return new ue(y._parameters,y._specification)},ue.serialize=function(y){return{_parameters:y._parameters,_specification:y._specification}};function w(m,y){if(ys(m))return new ue(m,y);if(Ua(m)){var I=Ku(m,y);if(I.result==="error")throw new Error(I.value.map(function($){return $.key+": "+$.message}).join(", "));return I.value}else{var U=m;return typeof m=="string"&&y.type==="color"&&(U=cs.parse(m)),{kind:"constant",evaluate:function(){return U}}}}function B(m){var y=null;if(m instanceof Oc)y=B(m.result);else if(m instanceof Zu)for(var I=0,U=m.args;I<U.length;I+=1){var $=U[I];if(y=B($),y)break}else(m instanceof _u||m instanceof Wl)&&m.input instanceof Pa&&m.input.name==="zoom"&&(y=m);return y instanceof Ns||m.eachChild(function(ae){var he=B(ae);he instanceof Ns?y=he:!y&&he?y=new Ns("",'"zoom" expression may only be used as input to a top-level "step" or "interpolate" expression.'):y&&he&&y!==he&&(y=new Ns("",'Only one zoom-based "step" or "interpolate" subexpression may be used in an expression.'))}),y}function Q(m){var y={color:ql,string:Oo,number:aa,enum:Oo,boolean:qo,formatted:pl,resolvedImage:Zc};return m.type==="array"?Kl(y[m.value]||Do,m.length):y[m.type]}function ee(m){return m.type==="color"&&ys(m.default)?new cs(0,0,0,0):m.type==="color"?cs.parse(m.default)||null:m.default===void 0?null:m.default}function le(m){var y=m.key,I=m.value,U=m.valueSpec||{},$=m.objectElementValidators||{},ae=m.style,he=m.styleSpec,Oe=[],rt=no(I);if(rt!=="object")return[new _a(y,I,"object expected, "+rt+" found")];for(var gt in I){var Et=gt.split(".")[0],or=U[Et]||U["*"],_r=void 0;if($[Et])_r=$[Et];else if(U[Et])_r=Qa;else if($["*"])_r=$["*"];else if(U["*"])_r=Qa;else{Oe.push(new _a(y,I[gt],'unknown property "'+gt+'"'));continue}Oe=Oe.concat(_r({key:(y&&y+".")+gt,value:I[gt],valueSpec:or,style:ae,styleSpec:he,object:I,objectKey:gt},I))}for(var pr in U)$[pr]||U[pr].required&&U[pr].default===void 0&&I[pr]===void 0&&Oe.push(new _a(y,I,'missing required property "'+pr+'"'));return Oe}function qe(m){var y=m.value,I=m.valueSpec,U=m.style,$=m.styleSpec,ae=m.key,he=m.arrayElementValidator||Qa;if(no(y)!=="array")return[new _a(ae,y,"array expected, "+no(y)+" found")];if(I.length&&y.length!==I.length)return[new _a(ae,y,"array length "+I.length+" expected, length "+y.length+" found")];if(I["min-length"]&&y.length<I["min-length"])return[new _a(ae,y,"array length at least "+I["min-length"]+" expected, length "+y.length+" found")];var Oe={type:I.value,values:I.values};$.$version<7&&(Oe.function=I.function),no(I.value)==="object"&&(Oe=I.value);for(var rt=[],gt=0;gt<y.length;gt++)rt=rt.concat(he({array:y,arrayIndex:gt,value:y[gt],valueSpec:Oe,style:U,styleSpec:$,key:ae+"["+gt+"]"}));return rt}function Xe(m){var y=m.key,I=m.value,U=m.valueSpec,$=no(I);return $==="number"&&I!==I&&($="NaN"),$!=="number"?[new _a(y,I,"number expected, "+$+" found")]:"minimum"in U&&I<U.minimum?[new _a(y,I,I+" is less than the minimum value "+U.minimum)]:"maximum"in U&&I>U.maximum?[new _a(y,I,I+" is greater than the maximum value "+U.maximum)]:[]}function ot(m){var y=m.valueSpec,I=xo(m.value.type),U,$={},ae,he,Oe=I!=="categorical"&&m.value.property===void 0,rt=!Oe,gt=no(m.value.stops)==="array"&&no(m.value.stops[0])==="array"&&no(m.value.stops[0][0])==="object",Et=le({key:m.key,value:m.value,valueSpec:m.styleSpec.function,style:m.style,styleSpec:m.styleSpec,objectElementValidators:{stops:or,default:Fr}});return I==="identity"&&Oe&&Et.push(new _a(m.key,m.value,'missing required property "property"')),I!=="identity"&&!m.value.stops&&Et.push(new _a(m.key,m.value,'missing required property "stops"')),I==="exponential"&&m.valueSpec.expression&&!va(m.valueSpec)&&Et.push(new _a(m.key,m.value,"exponential functions not supported")),m.styleSpec.$version>=8&&(rt&&!Ws(m.valueSpec)?Et.push(new _a(m.key,m.value,"property functions not supported")):Oe&&!Ls(m.valueSpec)&&Et.push(new _a(m.key,m.value,"zoom functions not supported"))),(I==="categorical"||gt)&&m.value.property===void 0&&Et.push(new _a(m.key,m.value,'"property" property is required')),Et;function or(oi){if(I==="identity")return[new _a(oi.key,oi.value,'identity function may not have a "stops" property')];var Hi=[],Ai=oi.value;return Hi=Hi.concat(qe({key:oi.key,value:Ai,valueSpec:oi.valueSpec,style:oi.style,styleSpec:oi.styleSpec,arrayElementValidator:_r})),no(Ai)==="array"&&Ai.length===0&&Hi.push(new _a(oi.key,Ai,"array must have at least one stop")),Hi}function _r(oi){var Hi=[],Ai=oi.value,bn=oi.key;if(no(Ai)!=="array")return[new _a(bn,Ai,"array expected, "+no(Ai)+" found")];if(Ai.length!==2)return[new _a(bn,Ai,"array length 2 expected, length "+Ai.length+" found")];if(gt){if(no(Ai[0])!=="object")return[new _a(bn,Ai,"object expected, "+no(Ai[0])+" found")];if(Ai[0].zoom===void 0)return[new _a(bn,Ai,"object stop key must have zoom")];if(Ai[0].value===void 0)return[new _a(bn,Ai,"object stop key must have value")];if(he&&he>xo(Ai[0].zoom))return[new _a(bn,Ai[0].zoom,"stop zoom values must appear in ascending order")];xo(Ai[0].zoom)!==he&&(he=xo(Ai[0].zoom),ae=void 0,$={}),Hi=Hi.concat(le({key:bn+"[0]",value:Ai[0],valueSpec:{zoom:{}},style:oi.style,styleSpec:oi.styleSpec,objectElementValidators:{zoom:Xe,value:pr}}))}else Hi=Hi.concat(pr({key:bn+"[0]",value:Ai[0],valueSpec:{},style:oi.style,styleSpec:oi.styleSpec},Ai));return Ua(Yl(Ai[1]))?Hi.concat([new _a(bn+"[1]",Ai[1],"expressions are not allowed in function stops.")]):Hi.concat(Qa({key:bn+"[1]",value:Ai[1],valueSpec:y,style:oi.style,styleSpec:oi.styleSpec}))}function pr(oi,Hi){var Ai=no(oi.value),bn=xo(oi.value),nn=oi.value!==null?oi.value:Hi;if(!U)U=Ai;else if(Ai!==U)return[new _a(oi.key,nn,Ai+" stop domain type must match previous stop domain type "+U)];if(Ai!=="number"&&Ai!=="string"&&Ai!=="boolean")return[new _a(oi.key,nn,"stop domain value must be a number, string, or boolean")];if(Ai!=="number"&&I!=="categorical"){var xn="number expected, "+Ai+" found";return Ws(y)&&I===void 0&&(xn+='\nIf you intended to use a categorical function, specify `"type": "categorical"`.'),[new _a(oi.key,nn,xn)]}return I==="categorical"&&Ai==="number"&&(!isFinite(bn)||Math.floor(bn)!==bn)?[new _a(oi.key,nn,"integer expected, found "+bn)]:I!=="categorical"&&Ai==="number"&&ae!==void 0&&bn<ae?[new _a(oi.key,nn,"stop domain values must appear in ascending order")]:(ae=bn,I==="categorical"&&bn in $?[new _a(oi.key,nn,"stop domain values must be unique")]:($[bn]=!0,[]))}function Fr(oi){return Qa({key:oi.key,value:oi.value,valueSpec:y,style:oi.style,styleSpec:oi.styleSpec})}}function Tt(m){var y=(m.expressionContext==="property"?Ku:oo)(Yl(m.value),m.valueSpec);if(y.result==="error")return y.value.map(function(U){return new _a(""+m.key+U.key,m.value,U.message)});var I=y.value.expression||y.value._styleExpression.expression;if(m.expressionContext==="property"&&m.propertyKey==="text-font"&&!I.outputDefined())return[new _a(m.key,m.value,'Invalid data expression for "'+m.propertyKey+'". Output values must be contained as literals within the expression.')];if(m.expressionContext==="property"&&m.propertyType==="layout"&&!js(I))return[new _a(m.key,m.value,'"feature-state" data expressions are not supported with layout properties.')];if(m.expressionContext==="filter"&&!js(I))return[new _a(m.key,m.value,'"feature-state" data expressions are not supported with filters.')];if(m.expressionContext&&m.expressionContext.indexOf("cluster")===0){if(!Eu(I,["zoom","feature-state"]))return[new _a(m.key,m.value,'"zoom" and "feature-state" expressions are not supported with cluster properties.')];if(m.expressionContext==="cluster-initial"&&!ih(I))return[new _a(m.key,m.value,"Feature data expressions are not supported with initial expression part of cluster properties.")]}return[]}function Yt(m){var y=m.value,I=m.key,U=no(y);return U!=="boolean"?[new _a(I,y,"boolean expected, "+U+" found")]:[]}function Kt(m){var y=m.key,I=m.value,U=no(I);return U!=="string"?[new _a(y,I,"color expected, "+U+" found")]:Lf(I)===null?[new _a(y,I,'color expected, "'+I+'" found')]:[]}function xr(m){var y=m.key,I=m.value,U=m.valueSpec,$=[];return Array.isArray(U.values)?U.values.indexOf(xo(I))===-1&&$.push(new _a(y,I,"expected one of ["+U.values.join(", ")+"], "+JSON.stringify(I)+" found")):Object.keys(U.values).indexOf(xo(I))===-1&&$.push(new _a(y,I,"expected one of ["+Object.keys(U.values).join(", ")+"], "+JSON.stringify(I)+" found")),$}function Ir(m){if(m===!0||m===!1)return!0;if(!Array.isArray(m)||m.length===0)return!1;switch(m[0]){case"has":return m.length>=2&&m[1]!=="$id"&&m[1]!=="$type";case"in":return m.length>=3&&(typeof m[1]!="string"||Array.isArray(m[2]));case"!in":case"!has":case"none":return!1;case"==":case"!=":case">":case">=":case"<":case"<=":return m.length!==3||Array.isArray(m[1])||Array.isArray(m[2]);case"any":case"all":for(var y=0,I=m.slice(1);y<I.length;y+=1){var U=I[y];if(!Ir(U)&&typeof U!="boolean")return!1}return!0;default:return!0}}var ve={type:"boolean",default:!1,transition:!1,"property-type":"data-driven",expression:{interpolated:!1,parameters:["zoom","feature"]}};function be(m){if(m==null)return{filter:function(){return!0},needGeometry:!1};Ir(m)||(m=et(m));var y=oo(m,ve);if(y.result==="error")throw new Error(y.value.map(function(U){return U.key+": "+U.message}).join(", "));var I=Be(m);return{filter:function(U,$,ae){return y.value.evaluate(U,$,{},ae)},needGeometry:I}}function De(m,y){return m<y?-1:m>y?1:0}function Be(m){if(!Array.isArray(m))return!1;if(m[0]==="within")return!0;for(var y=1;y<m.length;y++)if(Be(m[y]))return!0;return!1}function et(m){if(!m)return!0;var y=m[0];if(m.length<=1)return y!=="any";var I=y==="=="?We(m[1],m[2],"=="):y==="!="?tr(We(m[1],m[2],"==")):y==="<"||y===">"||y==="<="||y===">="?We(m[1],m[2],y):y==="any"?it(m.slice(1)):y==="all"?["all"].concat(m.slice(1).map(et)):y==="none"?["all"].concat(m.slice(1).map(et).map(tr)):y==="in"?Ft(m[1],m.slice(2)):y==="!in"?tr(Ft(m[1],m.slice(2))):y==="has"?Ht(m[1]):y==="!has"?tr(Ht(m[1])):y==="within"?m:!0;return I}function We(m,y,I){switch(m){case"$type":return["filter-type-"+I,y];case"$id":return["filter-id-"+I,y];default:return["filter-"+I,m,y]}}function it(m){return["any"].concat(m.map(et))}function Ft(m,y){if(y.length===0)return!1;switch(m){case"$type":return["filter-type-in",["literal",y]];case"$id":return["filter-id-in",["literal",y]];default:return y.length>200&&!y.some(function(I){return typeof I!=typeof y[0]})?["filter-in-large",m,["literal",y.sort(De)]]:["filter-in-small",m,["literal",y]]}}function Ht(m){switch(m){case"$type":return!0;case"$id":return["filter-has-id"];default:return["filter-has",m]}}function tr(m){return["!",m]}function dr(m){return Ir(Yl(m.value))?Tt(Ol({},m,{expressionContext:"filter",valueSpec:{value:"boolean"}})):Sr(m)}function Sr(m){var y=m.value,I=m.key;if(no(y)!=="array")return[new _a(I,y,"array expected, "+no(y)+" found")];var U=m.styleSpec,$,ae=[];if(y.length<1)return[new _a(I,y,"filter array must have at least 1 element")];switch(ae=ae.concat(xr({key:I+"[0]",value:y[0],valueSpec:U.filter_operator,style:m.style,styleSpec:m.styleSpec})),xo(y[0])){case"<":case"<=":case">":case">=":y.length>=2&&xo(y[1])==="$type"&&ae.push(new _a(I,y,'"$type" cannot be use with operator "'+y[0]+'"'));case"==":case"!=":y.length!==3&&ae.push(new _a(I,y,'filter array for operator "'+y[0]+'" must have 3 elements'));case"in":case"!in":y.length>=2&&($=no(y[1]),$!=="string"&&ae.push(new _a(I+"[1]",y[1],"string expected, "+$+" found")));for(var he=2;he<y.length;he++)$=no(y[he]),xo(y[1])==="$type"?ae=ae.concat(xr({key:I+"["+he+"]",value:y[he],valueSpec:U.geometry_type,style:m.style,styleSpec:m.styleSpec})):$!=="string"&&$!=="number"&&$!=="boolean"&&ae.push(new _a(I+"["+he+"]",y[he],"string, number, or boolean expected, "+$+" found"));break;case"any":case"all":case"none":for(var Oe=1;Oe<y.length;Oe++)ae=ae.concat(Sr({key:I+"["+Oe+"]",value:y[Oe],style:m.style,styleSpec:m.styleSpec}));break;case"has":case"!has":$=no(y[1]),y.length!==2?ae.push(new _a(I,y,'filter array for "'+y[0]+'" operator must have 2 elements')):$!=="string"&&ae.push(new _a(I+"[1]",y[1],"string expected, "+$+" found"));break;case"within":$=no(y[1]),y.length!==2?ae.push(new _a(I,y,'filter array for "'+y[0]+'" operator must have 2 elements')):$!=="object"&&ae.push(new _a(I+"[1]",y[1],"object expected, "+$+" found"));break}return ae}function Or(m,y){var I=m.key,U=m.style,$=m.styleSpec,ae=m.value,he=m.objectKey,Oe=$[y+"_"+m.layerType];if(!Oe)return[];var rt=he.match(/^(.*)-transition$/);if(y==="paint"&&rt&&Oe[rt[1]]&&Oe[rt[1]].transition)return Qa({key:I,value:ae,valueSpec:$.transition,style:U,styleSpec:$});var gt=m.valueSpec||Oe[he];if(!gt)return[new _a(I,ae,'unknown property "'+he+'"')];var Et;if(no(ae)==="string"&&Ws(gt)&&!gt.tokens&&(Et=/^{([^}]+)}$/.exec(ae)))return[new _a(I,ae,'"'+he+'" does not support interpolation syntax\nUse an identity property function instead: `{ "type": "identity", "property": '+JSON.stringify(Et[1])+" }`.")];var or=[];return m.layerType==="symbol"&&(he==="text-field"&&U&&!U.glyphs&&or.push(new _a(I,ae,'use of "text-field" requires a style "glyphs" property')),he==="text-font"&&ys(Yl(ae))&&xo(ae.type)==="identity"&&or.push(new _a(I,ae,'"text-font" does not support identity functions'))),or.concat(Qa({key:m.key,value:ae,valueSpec:gt,style:U,styleSpec:$,expressionContext:"property",propertyType:y,propertyKey:he}))}function Wr(m){return Or(m,"paint")}function ni(m){return Or(m,"layout")}function Pi(m){var y=[],I=m.value,U=m.key,$=m.style,ae=m.styleSpec;!I.type&&!I.ref&&y.push(new _a(U,I,'either "type" or "ref" is required'));var he=xo(I.type),Oe=xo(I.ref);if(I.id)for(var rt=xo(I.id),gt=0;gt<m.arrayIndex;gt++){var Et=$.layers[gt];xo(Et.id)===rt&&y.push(new _a(U,I.id,'duplicate layer id "'+I.id+'", previously used at line '+Et.id.__line__))}if("ref"in I){["type","source","source-layer","filter","layout"].forEach(function(Fr){Fr in I&&y.push(new _a(U,I[Fr],'"'+Fr+'" is prohibited for ref layers'))});var or;$.layers.forEach(function(Fr){xo(Fr.id)===Oe&&(or=Fr)}),or?or.ref?y.push(new _a(U,I.ref,"ref cannot reference another ref layer")):he=xo(or.type):y.push(new _a(U,I.ref,'ref layer "'+Oe+'" not found'))}else if(he!=="background")if(!I.source)y.push(new _a(U,I,'missing required property "source"'));else{var _r=$.sources&&$.sources[I.source],pr=_r&&xo(_r.type);_r?pr==="vector"&&he==="raster"?y.push(new _a(U,I.source,'layer "'+I.id+'" requires a raster source')):pr==="raster"&&he!=="raster"?y.push(new _a(U,I.source,'layer "'+I.id+'" requires a vector source')):pr==="vector"&&!I["source-layer"]?y.push(new _a(U,I,'layer "'+I.id+'" must specify a "source-layer"')):pr==="raster-dem"&&he!=="hillshade"?y.push(new _a(U,I.source,"raster-dem source can only be used with layer type 'hillshade'.")):he==="line"&&I.paint&&I.paint["line-gradient"]&&(pr!=="geojson"||!_r.lineMetrics)&&y.push(new _a(U,I,'layer "'+I.id+'" specifies a line-gradient, which requires a GeoJSON source with `lineMetrics` enabled.')):y.push(new _a(U,I.source,'source "'+I.source+'" not found'))}return y=y.concat(le({key:U,value:I,valueSpec:ae.layer,style:m.style,styleSpec:m.styleSpec,objectElementValidators:{"*":function(){return[]},type:function(){return Qa({key:U+".type",value:I.type,valueSpec:ae.layer.type,style:m.style,styleSpec:m.styleSpec,object:I,objectKey:"type"})},filter:dr,layout:function(oi){return le({layer:I,key:oi.key,value:oi.value,style:oi.style,styleSpec:oi.styleSpec,objectElementValidators:{"*":function(Ai){return ni(Ol({layerType:he},Ai))}}})},paint:function(oi){return le({layer:I,key:oi.key,value:oi.value,style:oi.style,styleSpec:oi.styleSpec,objectElementValidators:{"*":function(Ai){return Wr(Ol({layerType:he},Ai))}}})}}})),y}function cn(m){var y=m.value,I=m.key,U=no(y);return U!=="string"?[new _a(I,y,"string expected, "+U+" found")]:[]}var ln={promoteId:Kn};function Cn(m){var y=m.value,I=m.key,U=m.styleSpec,$=m.style;if(!y.type)return[new _a(I,y,'"type" is required')];var ae=xo(y.type),he;switch(ae){case"vector":case"raster":case"raster-dem":return he=le({key:I,value:y,valueSpec:U["source_"+ae.replace("-","_")],style:m.style,styleSpec:U,objectElementValidators:ln}),he;case"geojson":if(he=le({key:I,value:y,valueSpec:U.source_geojson,style:$,styleSpec:U,objectElementValidators:ln}),y.cluster)for(var Oe in y.clusterProperties){var rt=y.clusterProperties[Oe],gt=rt[0],Et=rt[1],or=typeof gt=="string"?[gt,["accumulated"],["get",Oe]]:gt;he.push.apply(he,Tt({key:I+"."+Oe+".map",value:Et,expressionContext:"cluster-map"})),he.push.apply(he,Tt({key:I+"."+Oe+".reduce",value:or,expressionContext:"cluster-reduce"}))}return he;case"video":return le({key:I,value:y,valueSpec:U.source_video,style:$,styleSpec:U});case"image":return le({key:I,value:y,valueSpec:U.source_image,style:$,styleSpec:U});case"canvas":return[new _a(I,null,"Please use runtime APIs to add canvas sources, rather than including them in stylesheets.","source.canvas")];default:return xr({key:I+".type",value:y.type,valueSpec:{values:["vector","raster","raster-dem","geojson","video","image"]},style:$,styleSpec:U})}}function Kn(m){var y=m.key,I=m.value;if(no(I)==="string")return cn({key:y,value:I});var U=[];for(var $ in I)U.push.apply(U,cn({key:y+"."+$,value:I[$]}));return U}function Ta(m){var y=m.value,I=m.styleSpec,U=I.light,$=m.style,ae=[],he=no(y);if(y===void 0)return ae;if(he!=="object")return ae=ae.concat([new _a("light",y,"object expected, "+he+" found")]),ae;for(var Oe in y){var rt=Oe.match(/^(.*)-transition$/);rt&&U[rt[1]]&&U[rt[1]].transition?ae=ae.concat(Qa({key:Oe,value:y[Oe],valueSpec:I.transition,style:$,styleSpec:I})):U[Oe]?ae=ae.concat(Qa({key:Oe,value:y[Oe],valueSpec:U[Oe],style:$,styleSpec:I})):ae=ae.concat([new _a(Oe,y[Oe],'unknown property "'+Oe+'"')])}return ae}function fa(m){return cn(m).length===0?[]:Tt(m)}function $a(m){return cn(m).length===0?[]:Tt(m)}var Co={"*":function(){return[]},array:qe,boolean:Yt,number:Xe,color:Kt,constants:Vu,enum:xr,filter:dr,function:ot,layer:Pi,object:le,source:Cn,light:Ta,string:cn,formatted:fa,resolvedImage:$a};function Qa(m){var y=m.value,I=m.valueSpec,U=m.styleSpec;if(I.expression&&ys(xo(y)))return ot(m);if(I.expression&&Ua(Yl(y)))return Tt(m);if(I.type&&Co[I.type])return Co[I.type](m);var $=le(Ol({},m,{valueSpec:I.type?U[I.type]:I}));return $}function mo(m){var y=m.value,I=m.key,U=cn(m);return U.length||(y.indexOf("{fontstack}")===-1&&U.push(new _a(I,y,'"glyphs" url must include a "{fontstack}" token')),y.indexOf("{range}")===-1&&U.push(new _a(I,y,'"glyphs" url must include a "{range}" token'))),U}function Bo(m,y){y===void 0&&(y=Rn);var I=[];return I=I.concat(Qa({key:"",value:m,valueSpec:y.$root,styleSpec:y,style:m,objectElementValidators:{glyphs:mo,"*":function(){return[]}}})),m.constants&&(I=I.concat(Vu({key:"constants",value:m.constants,style:m,styleSpec:y}))),Ps(I)}Bo.source=Ts(Cn),Bo.light=Ts(Ta),Bo.layer=Ts(Pi),Bo.filter=Ts(dr),Bo.paintProperty=Ts(Wr),Bo.layoutProperty=Ts(ni);function Ps(m){return[].concat(m).sort(function(y,I){return y.line-I.line})}function Ts(m){return function(){for(var y=[],I=arguments.length;I--;)y[I]=arguments[I];return Ps(m.apply(this,y))}}var wo=Bo,To=wo.light,hl=wo.paintProperty,Ul=wo.layoutProperty;function Lu(m,y){var I=!1;if(y&&y.length)for(var U=0,$=y;U<$.length;U+=1){var ae=$[U];m.fire(new da(new Error(ae.message))),I=!0}return I}var au=Ql,Js=3;function Ql(m,y,I){var U=this.cells=[];if(m instanceof ArrayBuffer){this.arrayBuffer=m;var $=new Int32Array(this.arrayBuffer);m=$[0],y=$[1],I=$[2],this.d=y+2*I;for(var ae=0;ae<this.d*this.d;ae++){var he=$[Js+ae],Oe=$[Js+ae+1];U.push(he===Oe?null:$.subarray(he,Oe))}var rt=$[Js+U.length],gt=$[Js+U.length+1];this.keys=$.subarray(rt,gt),this.bboxes=$.subarray(gt),this.insert=this._insertReadonly}else{this.d=y+2*I;for(var Et=0;Et<this.d*this.d;Et++)U.push([]);this.keys=[],this.bboxes=[]}this.n=y,this.extent=m,this.padding=I,this.scale=y/m,this.uid=0;var or=I/y*m;this.min=-or,this.max=m+or}Ql.prototype.insert=function(m,y,I,U,$){this._forEachCell(y,I,U,$,this._insertCell,this.uid++),this.keys.push(m),this.bboxes.push(y),this.bboxes.push(I),this.bboxes.push(U),this.bboxes.push($)},Ql.prototype._insertReadonly=function(){throw"Cannot insert into a GridIndex created from an ArrayBuffer."},Ql.prototype._insertCell=function(m,y,I,U,$,ae){this.cells[$].push(ae)},Ql.prototype.query=function(m,y,I,U,$){var ae=this.min,he=this.max;if(m<=ae&&y<=ae&&he<=I&&he<=U&&!$)return Array.prototype.slice.call(this.keys);var Oe=[],rt={};return this._forEachCell(m,y,I,U,this._queryCell,Oe,rt,$),Oe},Ql.prototype._queryCell=function(m,y,I,U,$,ae,he,Oe){var rt=this.cells[$];if(rt!==null)for(var gt=this.keys,Et=this.bboxes,or=0;or<rt.length;or++){var _r=rt[or];if(he[_r]===void 0){var pr=_r*4;(Oe?Oe(Et[pr+0],Et[pr+1],Et[pr+2],Et[pr+3]):m<=Et[pr+2]&&y<=Et[pr+3]&&I>=Et[pr+0]&&U>=Et[pr+1])?(he[_r]=!0,ae.push(gt[_r])):he[_r]=!1}}},Ql.prototype._forEachCell=function(m,y,I,U,$,ae,he,Oe){for(var rt=this._convertToCellCoord(m),gt=this._convertToCellCoord(y),Et=this._convertToCellCoord(I),or=this._convertToCellCoord(U),_r=rt;_r<=Et;_r++)for(var pr=gt;pr<=or;pr++){var Fr=this.d*pr+_r;if(!(Oe&&!Oe(this._convertFromCellCoord(_r),this._convertFromCellCoord(pr),this._convertFromCellCoord(_r+1),this._convertFromCellCoord(pr+1)))&&$.call(this,m,y,I,U,Fr,ae,he,Oe))return}},Ql.prototype._convertFromCellCoord=function(m){return(m-this.padding)/this.scale},Ql.prototype._convertToCellCoord=function(m){return Math.max(0,Math.min(this.d-1,Math.floor(m*this.scale)+this.padding))},Ql.prototype.toArrayBuffer=function(){if(this.arrayBuffer)return this.arrayBuffer;for(var m=this.cells,y=Js+this.cells.length+1+1,I=0,U=0;U<this.cells.length;U++)I+=this.cells[U].length;var $=new Int32Array(y+I+this.keys.length+this.bboxes.length);$[0]=this.extent,$[1]=this.n,$[2]=this.padding;for(var ae=y,he=0;he<m.length;he++){var Oe=m[he];$[Js+he]=ae,$.set(Oe,ae),ae+=Oe.length}return $[Js+m.length]=ae,$.set(this.keys,ae),ae+=this.keys.length,$[Js+m.length+1]=ae,$.set(this.bboxes,ae),ae+=this.bboxes.length,$.buffer};var dc=f.ImageData,Tl=f.ImageBitmap,Al={};function X(m,y,I){I===void 0&&(I={}),Object.defineProperty(y,"_classRegistryKey",{value:m,writeable:!1}),Al[m]={klass:y,omit:I.omit||[],shallow:I.shallow||[]}}X("Object",Object),au.serialize=function(y,I){var U=y.toArrayBuffer();return I&&I.push(U),{buffer:U}},au.deserialize=function(y){return new au(y.buffer)},X("Grid",au),X("Color",cs),X("Error",Error),X("ResolvedImage",fl),X("StylePropertyFunction",ue),X("StyleExpression",Ac,{omit:["_evaluator"]}),X("ZoomDependentExpression",hc),X("ZoomConstantExpression",Vc),X("CompoundExpression",Pa,{omit:["_evaluate"]});for(var se in Ka)Ka[se]._classRegistryKey||X("Expression_"+se,Ka[se]);function Te(m){return m&&typeof ArrayBuffer!="undefined"&&(m instanceof ArrayBuffer||m.constructor&&m.constructor.name==="ArrayBuffer")}function Ne(m){return Tl&&m instanceof Tl}function He(m,y){if(m==null||typeof m=="boolean"||typeof m=="number"||typeof m=="string"||m instanceof Boolean||m instanceof Number||m instanceof String||m instanceof Date||m instanceof RegExp)return m;if(Te(m)||Ne(m))return y&&y.push(m),m;if(ArrayBuffer.isView(m)){var I=m;return y&&y.push(I.buffer),I}if(m instanceof dc)return y&&y.push(m.data.buffer),m;if(Array.isArray(m)){for(var U=[],$=0,ae=m;$<ae.length;$+=1){var he=ae[$];U.push(He(he,y))}return U}if(typeof m=="object"){var Oe=m.constructor,rt=Oe._classRegistryKey;if(!rt)throw new Error("can't serialize object of unregistered class");var gt=Oe.serialize?Oe.serialize(m,y):{};if(!Oe.serialize){for(var Et in m)if(m.hasOwnProperty(Et)&&!(Al[rt].omit.indexOf(Et)>=0)){var or=m[Et];gt[Et]=Al[rt].shallow.indexOf(Et)>=0?or:He(or,y)}m instanceof Error&&(gt.message=m.message)}if(gt.$name)throw new Error("$name property is reserved for worker serialization logic.");return rt!=="Object"&&(gt.$name=rt),gt}throw new Error("can't serialize object of type "+typeof m)}function Ye(m){if(m==null||typeof m=="boolean"||typeof m=="number"||typeof m=="string"||m instanceof Boolean||m instanceof Number||m instanceof String||m instanceof Date||m instanceof RegExp||Te(m)||Ne(m)||ArrayBuffer.isView(m)||m instanceof dc)return m;if(Array.isArray(m))return m.map(Ye);if(typeof m=="object"){var y=m.$name||"Object",I=Al[y],U=I.klass;if(!U)throw new Error("can't deserialize unregistered class "+y);if(U.deserialize)return U.deserialize(m);for(var $=Object.create(U.prototype),ae=0,he=Object.keys(m);ae<he.length;ae+=1){var Oe=he[ae];if(Oe!=="$name"){var rt=m[Oe];$[Oe]=Al[y].shallow.indexOf(Oe)>=0?rt:Ye(rt)}}return $}throw new Error("can't deserialize object of type "+typeof m)}var kt=function(){this.first=!0};kt.prototype.update=function(y,I){var U=Math.floor(y);return this.first?(this.first=!1,this.lastIntegerZoom=U,this.lastIntegerZoomTime=0,this.lastZoom=y,this.lastFloorZoom=U,!0):(this.lastFloorZoom>U?(this.lastIntegerZoom=U+1,this.lastIntegerZoomTime=I):this.lastFloorZoom<U&&(this.lastIntegerZoom=U,this.lastIntegerZoomTime=I),y!==this.lastZoom?(this.lastZoom=y,this.lastFloorZoom=U,!0):!1)};var nt={"Latin-1 Supplement":function(m){return m>=128&&m<=255},Arabic:function(m){return m>=1536&&m<=1791},"Arabic Supplement":function(m){return m>=1872&&m<=1919},"Arabic Extended-A":function(m){return m>=2208&&m<=2303},"Hangul Jamo":function(m){return m>=4352&&m<=4607},"Unified Canadian Aboriginal Syllabics":function(m){return m>=5120&&m<=5759},Khmer:function(m){return m>=6016&&m<=6143},"Unified Canadian Aboriginal Syllabics Extended":function(m){return m>=6320&&m<=6399},"General Punctuation":function(m){return m>=8192&&m<=8303},"Letterlike Symbols":function(m){return m>=8448&&m<=8527},"Number Forms":function(m){return m>=8528&&m<=8591},"Miscellaneous Technical":function(m){return m>=8960&&m<=9215},"Control Pictures":function(m){return m>=9216&&m<=9279},"Optical Character Recognition":function(m){return m>=9280&&m<=9311},"Enclosed Alphanumerics":function(m){return m>=9312&&m<=9471},"Geometric Shapes":function(m){return m>=9632&&m<=9727},"Miscellaneous Symbols":function(m){return m>=9728&&m<=9983},"Miscellaneous Symbols and Arrows":function(m){return m>=11008&&m<=11263},"CJK Radicals Supplement":function(m){return m>=11904&&m<=12031},"Kangxi Radicals":function(m){return m>=12032&&m<=12255},"Ideographic Description Characters":function(m){return m>=12272&&m<=12287},"CJK Symbols and Punctuation":function(m){return m>=12288&&m<=12351},Hiragana:function(m){return m>=12352&&m<=12447},Katakana:function(m){return m>=12448&&m<=12543},Bopomofo:function(m){return m>=12544&&m<=12591},"Hangul Compatibility Jamo":function(m){return m>=12592&&m<=12687},Kanbun:function(m){return m>=12688&&m<=12703},"Bopomofo Extended":function(m){return m>=12704&&m<=12735},"CJK Strokes":function(m){return m>=12736&&m<=12783},"Katakana Phonetic Extensions":function(m){return m>=12784&&m<=12799},"Enclosed CJK Letters and Months":function(m){return m>=12800&&m<=13055},"CJK Compatibility":function(m){return m>=13056&&m<=13311},"CJK Unified Ideographs Extension A":function(m){return m>=13312&&m<=19903},"Yijing Hexagram Symbols":function(m){return m>=19904&&m<=19967},"CJK Unified Ideographs":function(m){return m>=19968&&m<=40959},"Yi Syllables":function(m){return m>=40960&&m<=42127},"Yi Radicals":function(m){return m>=42128&&m<=42191},"Hangul Jamo Extended-A":function(m){return m>=43360&&m<=43391},"Hangul Syllables":function(m){return m>=44032&&m<=55215},"Hangul Jamo Extended-B":function(m){return m>=55216&&m<=55295},"Private Use Area":function(m){return m>=57344&&m<=63743},"CJK Compatibility Ideographs":function(m){return m>=63744&&m<=64255},"Arabic Presentation Forms-A":function(m){return m>=64336&&m<=65023},"Vertical Forms":function(m){return m>=65040&&m<=65055},"CJK Compatibility Forms":function(m){return m>=65072&&m<=65103},"Small Form Variants":function(m){return m>=65104&&m<=65135},"Arabic Presentation Forms-B":function(m){return m>=65136&&m<=65279},"Halfwidth and Fullwidth Forms":function(m){return m>=65280&&m<=65519}};function jt(m){for(var y=0,I=m;y<I.length;y+=1){var U=I[y];if(qr(U.charCodeAt(0)))return!0}return!1}function gr(m){for(var y=0,I=m;y<I.length;y+=1){var U=I[y];if(!yr(U.charCodeAt(0)))return!1}return!0}function yr(m){return!(nt.Arabic(m)||nt["Arabic Supplement"](m)||nt["Arabic Extended-A"](m)||nt["Arabic Presentation Forms-A"](m)||nt["Arabic Presentation Forms-B"](m))}function Hr(m){return m<11904?!1:!!(nt["Bopomofo Extended"](m)||nt.Bopomofo(m)||nt["CJK Compatibility Forms"](m)||nt["CJK Compatibility Ideographs"](m)||nt["CJK Compatibility"](m)||nt["CJK Radicals Supplement"](m)||nt["CJK Strokes"](m)||nt["CJK Symbols and Punctuation"](m)||nt["CJK Unified Ideographs Extension A"](m)||nt["CJK Unified Ideographs"](m)||nt["Enclosed CJK Letters and Months"](m)||nt["Halfwidth and Fullwidth Forms"](m)||nt.Hiragana(m)||nt["Ideographic Description Characters"](m)||nt["Kangxi Radicals"](m)||nt["Katakana Phonetic Extensions"](m)||nt.Katakana(m)||nt["Vertical Forms"](m)||nt["Yi Radicals"](m)||nt["Yi Syllables"](m))}function qr(m){return m===746||m===747?!0:m<4352?!1:!!(nt["Bopomofo Extended"](m)||nt.Bopomofo(m)||nt["CJK Compatibility Forms"](m)&&!(m>=65097&&m<=65103)||nt["CJK Compatibility Ideographs"](m)||nt["CJK Compatibility"](m)||nt["CJK Radicals Supplement"](m)||nt["CJK Strokes"](m)||nt["CJK Symbols and Punctuation"](m)&&!(m>=12296&&m<=12305)&&!(m>=12308&&m<=12319)&&m!==12336||nt["CJK Unified Ideographs Extension A"](m)||nt["CJK Unified Ideographs"](m)||nt["Enclosed CJK Letters and Months"](m)||nt["Hangul Compatibility Jamo"](m)||nt["Hangul Jamo Extended-A"](m)||nt["Hangul Jamo Extended-B"](m)||nt["Hangul Jamo"](m)||nt["Hangul Syllables"](m)||nt.Hiragana(m)||nt["Ideographic Description Characters"](m)||nt.Kanbun(m)||nt["Kangxi Radicals"](m)||nt["Katakana Phonetic Extensions"](m)||nt.Katakana(m)&&m!==12540||nt["Halfwidth and Fullwidth Forms"](m)&&m!==65288&&m!==65289&&m!==65293&&!(m>=65306&&m<=65310)&&m!==65339&&m!==65341&&m!==65343&&!(m>=65371&&m<=65503)&&m!==65507&&!(m>=65512&&m<=65519)||nt["Small Form Variants"](m)&&!(m>=65112&&m<=65118)&&!(m>=65123&&m<=65126)||nt["Unified Canadian Aboriginal Syllabics"](m)||nt["Unified Canadian Aboriginal Syllabics Extended"](m)||nt["Vertical Forms"](m)||nt["Yijing Hexagram Symbols"](m)||nt["Yi Syllables"](m)||nt["Yi Radicals"](m))}function _i(m){return!!(nt["Latin-1 Supplement"](m)&&(m===167||m===169||m===174||m===177||m===188||m===189||m===190||m===215||m===247)||nt["General Punctuation"](m)&&(m===8214||m===8224||m===8225||m===8240||m===8241||m===8251||m===8252||m===8258||m===8263||m===8264||m===8265||m===8273)||nt["Letterlike Symbols"](m)||nt["Number Forms"](m)||nt["Miscellaneous Technical"](m)&&(m>=8960&&m<=8967||m>=8972&&m<=8991||m>=8996&&m<=9e3||m===9003||m>=9085&&m<=9114||m>=9150&&m<=9165||m===9167||m>=9169&&m<=9179||m>=9186&&m<=9215)||nt["Control Pictures"](m)&&m!==9251||nt["Optical Character Recognition"](m)||nt["Enclosed Alphanumerics"](m)||nt["Geometric Shapes"](m)||nt["Miscellaneous Symbols"](m)&&!(m>=9754&&m<=9759)||nt["Miscellaneous Symbols and Arrows"](m)&&(m>=11026&&m<=11055||m>=11088&&m<=11097||m>=11192&&m<=11243)||nt["CJK Symbols and Punctuation"](m)||nt.Katakana(m)||nt["Private Use Area"](m)||nt["CJK Compatibility Forms"](m)||nt["Small Form Variants"](m)||nt["Halfwidth and Fullwidth Forms"](m)||m===8734||m===8756||m===8757||m>=9984&&m<=10087||m>=10102&&m<=10131||m===65532||m===65533)}function bi(m){return!(qr(m)||_i(m))}function Zr(m){return nt.Arabic(m)||nt["Arabic Supplement"](m)||nt["Arabic Extended-A"](m)||nt["Arabic Presentation Forms-A"](m)||nt["Arabic Presentation Forms-B"](m)}function ai(m){return m>=1424&&m<=2303||nt["Arabic Presentation Forms-A"](m)||nt["Arabic Presentation Forms-B"](m)}function gi(m,y){return!(!y&&ai(m)||m>=2304&&m<=3583||m>=3840&&m<=4255||nt.Khmer(m))}function Ii(m){for(var y=0,I=m;y<I.length;y+=1){var U=I[y];if(ai(U.charCodeAt(0)))return!0}return!1}function Si(m,y){for(var I=0,U=m;I<U.length;I+=1){var $=U[I];if(!gi($.charCodeAt(0),y))return!1}return!0}var ei={unavailable:"unavailable",deferred:"deferred",loading:"loading",loaded:"loaded",error:"error"},Ln=null,En=ei.unavailable,Un=null,ia=function(m){m&&typeof m=="string"&&m.indexOf("NetworkError")>-1&&(En=ei.error),Ln&&Ln(m)};function Ea(){Ia.fire(new Wo("pluginStateChange",{pluginStatus:En,pluginURL:Un}))}var Ia=new Wn,yo=function(){return En},Da=function(m){return m({pluginStatus:En,pluginURL:Un}),Ia.on("pluginStateChange",m),m},go=function(m,y,I){if(I===void 0&&(I=!1),En===ei.deferred||En===ei.loading||En===ei.loaded)throw new Error("setRTLTextPlugin cannot be called multiple times.");Un=ut.resolveURL(m),En=ei.deferred,Ln=y,Ea(),I||Is()},Is=function(){if(En!==ei.deferred||!Un)throw new Error("rtl-text-plugin cannot be downloaded unless a pluginURL is specified");En=ei.loading,Ea(),Un&&ri({url:Un},function(m){m?ia(m):(En=ei.loaded,Ea())})},Ms={applyArabicShaping:null,processBidirectionalText:null,processStyledBidirectionalText:null,isLoaded:function(){return En===ei.loaded||Ms.applyArabicShaping!=null},isLoading:function(){return En===ei.loading},setState:function(y){En=y.pluginStatus,Un=y.pluginURL},isParsed:function(){return Ms.applyArabicShaping!=null&&Ms.processBidirectionalText!=null&&Ms.processStyledBidirectionalText!=null},getPluginURL:function(){return Un}},Xs=function(){!Ms.isLoading()&&!Ms.isLoaded()&&yo()==="deferred"&&Is()},Gn=function(y,I){this.zoom=y,I?(this.now=I.now,this.fadeDuration=I.fadeDuration,this.zoomHistory=I.zoomHistory,this.transition=I.transition):(this.now=0,this.fadeDuration=0,this.zoomHistory=new kt,this.transition={})};Gn.prototype.isSupportedScript=function(y){return Si(y,Ms.isLoaded())},Gn.prototype.crossFadingFactor=function(){return this.fadeDuration===0?1:Math.min((this.now-this.zoomHistory.lastIntegerZoomTime)/this.fadeDuration,1)},Gn.prototype.getCrossfadeParameters=function(){var y=this.zoom,I=y-Math.floor(y),U=this.crossFadingFactor();return y>this.zoomHistory.lastIntegerZoom?{fromScale:2,toScale:1,t:I+(1-I)*U}:{fromScale:.5,toScale:1,t:1-(1-U)*I}};var ja=function(y,I){this.property=y,this.value=I,this.expression=w(I===void 0?y.specification.default:I,y.specification)};ja.prototype.isDataDriven=function(){return this.expression.kind==="source"||this.expression.kind==="composite"},ja.prototype.possiblyEvaluate=function(y,I,U){return this.property.possiblyEvaluate(this,y,I,U)};var Fo=function(y){this.property=y,this.value=new ja(y,void 0)};Fo.prototype.transitioned=function(y,I){return new $s(this.property,this.value,I,x({},y.transition,this.transition),y.now)},Fo.prototype.untransitioned=function(){return new $s(this.property,this.value,null,{},0)};var Uo=function(y){this._properties=y,this._values=Object.create(y.defaultTransitionablePropertyValues)};Uo.prototype.getValue=function(y){return j(this._values[y].value.value)},Uo.prototype.setValue=function(y,I){this._values.hasOwnProperty(y)||(this._values[y]=new Fo(this._values[y].property)),this._values[y].value=new ja(this._values[y].property,I===null?void 0:j(I))},Uo.prototype.getTransition=function(y){return j(this._values[y].transition)},Uo.prototype.setTransition=function(y,I){this._values.hasOwnProperty(y)||(this._values[y]=new Fo(this._values[y].property)),this._values[y].transition=j(I)||void 0},Uo.prototype.serialize=function(){for(var y={},I=0,U=Object.keys(this._values);I<U.length;I+=1){var $=U[I],ae=this.getValue($);ae!==void 0&&(y[$]=ae);var he=this.getTransition($);he!==void 0&&(y[$+"-transition"]=he)}return y},Uo.prototype.transitioned=function(y,I){for(var U=new Sl(this._properties),$=0,ae=Object.keys(this._values);$<ae.length;$+=1){var he=ae[$];U._values[he]=this._values[he].transitioned(y,I._values[he])}return U},Uo.prototype.untransitioned=function(){for(var y=new Sl(this._properties),I=0,U=Object.keys(this._values);I<U.length;I+=1){var $=U[I];y._values[$]=this._values[$].untransitioned()}return y};var $s=function(y,I,U,$,ae){this.property=y,this.value=I,this.begin=ae+$.delay||0,this.end=this.begin+$.duration||0,y.specification.transition&&($.delay||$.duration)&&(this.prior=U)};$s.prototype.possiblyEvaluate=function(y,I,U){var $=y.now||0,ae=this.value.possiblyEvaluate(y,I,U),he=this.prior;if(he){if($>this.end)return this.prior=null,ae;if(this.value.isDataDriven())return this.prior=null,ae;if($<this.begin)return he.possiblyEvaluate(y,I,U);var Oe=($-this.begin)/(this.end-this.begin);return this.property.interpolate(he.possiblyEvaluate(y,I,U),ae,v(Oe))}else return ae};var Sl=function(y){this._properties=y,this._values=Object.create(y.defaultTransitioningPropertyValues)};Sl.prototype.possiblyEvaluate=function(y,I,U){for(var $=new Sc(this._properties),ae=0,he=Object.keys(this._values);ae<he.length;ae+=1){var Oe=he[ae];$._values[Oe]=this._values[Oe].possiblyEvaluate(y,I,U)}return $},Sl.prototype.hasTransition=function(){for(var y=0,I=Object.keys(this._values);y<I.length;y+=1){var U=I[y];if(this._values[U].prior)return!0}return!1};var bu=function(y){this._properties=y,this._values=Object.create(y.defaultPropertyValues)};bu.prototype.getValue=function(y){return j(this._values[y].value)},bu.prototype.setValue=function(y,I){this._values[y]=new ja(this._values[y].property,I===null?void 0:j(I))},bu.prototype.serialize=function(){for(var y={},I=0,U=Object.keys(this._values);I<U.length;I+=1){var $=U[I],ae=this.getValue($);ae!==void 0&&(y[$]=ae)}return y},bu.prototype.possiblyEvaluate=function(y,I,U){for(var $=new Sc(this._properties),ae=0,he=Object.keys(this._values);ae<he.length;ae+=1){var Oe=he[ae];$._values[Oe]=this._values[Oe].possiblyEvaluate(y,I,U)}return $};var dl=function(y,I,U){this.property=y,this.value=I,this.parameters=U};dl.prototype.isConstant=function(){return this.value.kind==="constant"},dl.prototype.constantOr=function(y){return this.value.kind==="constant"?this.value.value:y},dl.prototype.evaluate=function(y,I,U,$){return this.property.evaluate(this.value,this.parameters,y,I,U,$)};var Sc=function(y){this._properties=y,this._values=Object.create(y.defaultPossiblyEvaluatedValues)};Sc.prototype.get=function(y){return this._values[y]};var Me=function(y){this.specification=y};Me.prototype.possiblyEvaluate=function(y,I){return y.expression.evaluate(I)},Me.prototype.interpolate=function(y,I,U){var $=zu[this.specification.type];return $?$(y,I,U):y};var bt=function(y,I){this.specification=y,this.overrides=I};bt.prototype.possiblyEvaluate=function(y,I,U,$){return y.expression.kind==="constant"||y.expression.kind==="camera"?new dl(this,{kind:"constant",value:y.expression.evaluate(I,null,{},U,$)},I):new dl(this,y.expression,I)},bt.prototype.interpolate=function(y,I,U){if(y.value.kind!=="constant"||I.value.kind!=="constant")return y;if(y.value.value===void 0||I.value.value===void 0)return new dl(this,{kind:"constant",value:void 0},y.parameters);var $=zu[this.specification.type];return $?new dl(this,{kind:"constant",value:$(y.value.value,I.value.value,U)},y.parameters):y},bt.prototype.evaluate=function(y,I,U,$,ae,he){return y.kind==="constant"?y.value:y.evaluate(I,U,$,ae,he)};var zt=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.possiblyEvaluate=function(U,$,ae,he){if(U.value===void 0)return new dl(this,{kind:"constant",value:void 0},$);if(U.expression.kind==="constant"){var Oe=U.expression.evaluate($,null,{},ae,he),rt=U.property.specification.type==="resolvedImage",gt=rt&&typeof Oe!="string"?Oe.name:Oe,Et=this._calculate(gt,gt,gt,$);return new dl(this,{kind:"constant",value:Et},$)}else if(U.expression.kind==="camera"){var or=this._calculate(U.expression.evaluate({zoom:$.zoom-1}),U.expression.evaluate({zoom:$.zoom}),U.expression.evaluate({zoom:$.zoom+1}),$);return new dl(this,{kind:"constant",value:or},$)}else return new dl(this,U.expression,$)},y.prototype.evaluate=function(U,$,ae,he,Oe,rt){if(U.kind==="source"){var gt=U.evaluate($,ae,he,Oe,rt);return this._calculate(gt,gt,gt,$)}else return U.kind==="composite"?this._calculate(U.evaluate({zoom:Math.floor($.zoom)-1},ae,he),U.evaluate({zoom:Math.floor($.zoom)},ae,he),U.evaluate({zoom:Math.floor($.zoom)+1},ae,he),$):U.value},y.prototype._calculate=function(U,$,ae,he){var Oe=he.zoom;return Oe>he.zoomHistory.lastIntegerZoom?{from:U,to:$}:{from:ae,to:$}},y.prototype.interpolate=function(U){return U},y}(bt),Rr=function(y){this.specification=y};Rr.prototype.possiblyEvaluate=function(y,I,U,$){if(y.value!==void 0)if(y.expression.kind==="constant"){var ae=y.expression.evaluate(I,null,{},U,$);return this._calculate(ae,ae,ae,I)}else return this._calculate(y.expression.evaluate(new Gn(Math.floor(I.zoom-1),I)),y.expression.evaluate(new Gn(Math.floor(I.zoom),I)),y.expression.evaluate(new Gn(Math.floor(I.zoom+1),I)),I)},Rr.prototype._calculate=function(y,I,U,$){var ae=$.zoom;return ae>$.zoomHistory.lastIntegerZoom?{from:y,to:I}:{from:U,to:I}},Rr.prototype.interpolate=function(y){return y};var jr=function(y){this.specification=y};jr.prototype.possiblyEvaluate=function(y,I,U,$){return!!y.expression.evaluate(I,null,{},U,$)},jr.prototype.interpolate=function(){return!1};var Nr=function(y){this.properties=y,this.defaultPropertyValues={},this.defaultTransitionablePropertyValues={},this.defaultTransitioningPropertyValues={},this.defaultPossiblyEvaluatedValues={},this.overridableProperties=[];for(var I in y){var U=y[I];U.specification.overridable&&this.overridableProperties.push(I);var $=this.defaultPropertyValues[I]=new ja(U,void 0),ae=this.defaultTransitionablePropertyValues[I]=new Fo(U);this.defaultTransitioningPropertyValues[I]=ae.untransitioned(),this.defaultPossiblyEvaluatedValues[I]=$.possiblyEvaluate({})}};X("DataDrivenProperty",bt),X("DataConstantProperty",Me),X("CrossFadedDataDrivenProperty",zt),X("CrossFadedProperty",Rr),X("ColorRampProperty",jr);var Gr="-transition",mi=function(m){function y(I,U){if(m.call(this),this.id=I.id,this.type=I.type,this._featureFilter={filter:function(){return!0},needGeometry:!1},I.type!=="custom"&&(I=I,this.metadata=I.metadata,this.minzoom=I.minzoom,this.maxzoom=I.maxzoom,I.type!=="background"&&(this.source=I.source,this.sourceLayer=I["source-layer"],this.filter=I.filter),U.layout&&(this._unevaluatedLayout=new bu(U.layout)),U.paint)){this._transitionablePaint=new Uo(U.paint);for(var $ in I.paint)this.setPaintProperty($,I.paint[$],{validate:!1});for(var ae in I.layout)this.setLayoutProperty(ae,I.layout[ae],{validate:!1});this._transitioningPaint=this._transitionablePaint.untransitioned(),this.paint=new Sc(U.paint)}}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.getCrossfadeParameters=function(){return this._crossfadeParameters},y.prototype.getLayoutProperty=function(U){return U==="visibility"?this.visibility:this._unevaluatedLayout.getValue(U)},y.prototype.setLayoutProperty=function(U,$,ae){if(ae===void 0&&(ae={}),$!=null){var he="layers."+this.id+".layout."+U;if(this._validate(Ul,he,U,$,ae))return}if(U==="visibility"){this.visibility=$;return}this._unevaluatedLayout.setValue(U,$)},y.prototype.getPaintProperty=function(U){return V(U,Gr)?this._transitionablePaint.getTransition(U.slice(0,-Gr.length)):this._transitionablePaint.getValue(U)},y.prototype.setPaintProperty=function(U,$,ae){if(ae===void 0&&(ae={}),$!=null){var he="layers."+this.id+".paint."+U;if(this._validate(hl,he,U,$,ae))return!1}if(V(U,Gr))return this._transitionablePaint.setTransition(U.slice(0,-Gr.length),$||void 0),!1;var Oe=this._transitionablePaint._values[U],rt=Oe.property.specification["property-type"]==="cross-faded-data-driven",gt=Oe.value.isDataDriven(),Et=Oe.value;this._transitionablePaint.setValue(U,$),this._handleSpecialPaintPropertyUpdate(U);var or=this._transitionablePaint._values[U].value,_r=or.isDataDriven();return _r||gt||rt||this._handleOverridablePaintPropertyUpdate(U,Et,or)},y.prototype._handleSpecialPaintPropertyUpdate=function(U){},y.prototype._handleOverridablePaintPropertyUpdate=function(U,$,ae){return!1},y.prototype.isHidden=function(U){return this.minzoom&&U<this.minzoom||this.maxzoom&&U>=this.maxzoom?!0:this.visibility==="none"},y.prototype.updateTransitions=function(U){this._transitioningPaint=this._transitionablePaint.transitioned(U,this._transitioningPaint)},y.prototype.hasTransition=function(){return this._transitioningPaint.hasTransition()},y.prototype.recalculate=function(U,$){U.getCrossfadeParameters&&(this._crossfadeParameters=U.getCrossfadeParameters()),this._unevaluatedLayout&&(this.layout=this._unevaluatedLayout.possiblyEvaluate(U,void 0,$)),this.paint=this._transitioningPaint.possiblyEvaluate(U,void 0,$)},y.prototype.serialize=function(){var U={id:this.id,type:this.type,source:this.source,"source-layer":this.sourceLayer,metadata:this.metadata,minzoom:this.minzoom,maxzoom:this.maxzoom,filter:this.filter,layout:this._unevaluatedLayout&&this._unevaluatedLayout.serialize(),paint:this._transitionablePaint&&this._transitionablePaint.serialize()};return this.visibility&&(U.layout=U.layout||{},U.layout.visibility=this.visibility),Z(U,function($,ae){return $!==void 0&&!(ae==="layout"&&!Object.keys($).length)&&!(ae==="paint"&&!Object.keys($).length)})},y.prototype._validate=function(U,$,ae,he,Oe){return Oe===void 0&&(Oe={}),Oe&&Oe.validate===!1?!1:Lu(this,U.call(wo,{key:$,layerType:this.type,objectKey:ae,value:he,styleSpec:Rn,style:{glyphs:!0,sprite:!0}}))},y.prototype.is3D=function(){return!1},y.prototype.isTileClipped=function(){return!1},y.prototype.hasOffscreenPass=function(){return!1},y.prototype.resize=function(){},y.prototype.isStateDependent=function(){for(var U in this.paint._values){var $=this.paint.get(U);if(!(!($ instanceof dl)||!Ws($.property.specification))&&($.value.kind==="source"||$.value.kind==="composite")&&$.value.isStateDependent)return!0}return!1},y}(Wn),Ui={Int8:Int8Array,Uint8:Uint8Array,Int16:Int16Array,Uint16:Uint16Array,Int32:Int32Array,Uint32:Uint32Array,Float32:Float32Array},qi=function(y,I){this._structArray=y,this._pos1=I*this.size,this._pos2=this._pos1/2,this._pos4=this._pos1/4,this._pos8=this._pos1/8},Ei=128,Hn=5,en=function(){this.isTransferred=!1,this.capacity=-1,this.resize(0)};en.serialize=function(y,I){return y._trim(),I&&(y.isTransferred=!0,I.push(y.arrayBuffer)),{length:y.length,arrayBuffer:y.arrayBuffer}},en.deserialize=function(y){var I=Object.create(this.prototype);return I.arrayBuffer=y.arrayBuffer,I.length=y.length,I.capacity=y.arrayBuffer.byteLength/I.bytesPerElement,I._refreshViews(),I},en.prototype._trim=function(){this.length!==this.capacity&&(this.capacity=this.length,this.arrayBuffer=this.arrayBuffer.slice(0,this.length*this.bytesPerElement),this._refreshViews())},en.prototype.clear=function(){this.length=0},en.prototype.resize=function(y){this.reserve(y),this.length=y},en.prototype.reserve=function(y){if(y>this.capacity){this.capacity=Math.max(y,Math.floor(this.capacity*Hn),Ei),this.arrayBuffer=new ArrayBuffer(this.capacity*this.bytesPerElement);var I=this.uint8;this._refreshViews(),I&&this.uint8.set(I)}},en.prototype._refreshViews=function(){throw new Error("_refreshViews() must be implemented by each concrete StructArray layout")};function Wi(m,y){y===void 0&&(y=1);var I=0,U=0,$=m.map(function(he){var Oe=si(he.type),rt=I=Mr(I,Math.max(y,Oe)),gt=he.components||1;return U=Math.max(U,Oe),I+=Oe*gt,{name:he.name,type:he.type,components:gt,offset:rt}}),ae=Mr(I,Math.max(U,y));return{members:$,size:ae,alignment:y}}function si(m){return Ui[m].BYTES_PER_ELEMENT}function Mr(m,y){return Math.ceil(m/y)*y}var Yr=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$){var ae=this.length;return this.resize(ae+1),this.emplace(ae,U,$)},y.prototype.emplace=function(U,$,ae){var he=U*2;return this.int16[he+0]=$,this.int16[he+1]=ae,U},y}(en);Yr.prototype.bytesPerElement=4,X("StructArrayLayout2i4",Yr);var xi=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he){var Oe=this.length;return this.resize(Oe+1),this.emplace(Oe,U,$,ae,he)},y.prototype.emplace=function(U,$,ae,he,Oe){var rt=U*4;return this.int16[rt+0]=$,this.int16[rt+1]=ae,this.int16[rt+2]=he,this.int16[rt+3]=Oe,U},y}(en);xi.prototype.bytesPerElement=8,X("StructArrayLayout4i8",xi);var Ri=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt){var gt=this.length;return this.resize(gt+1),this.emplace(gt,U,$,ae,he,Oe,rt)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt){var Et=U*6;return this.int16[Et+0]=$,this.int16[Et+1]=ae,this.int16[Et+2]=he,this.int16[Et+3]=Oe,this.int16[Et+4]=rt,this.int16[Et+5]=gt,U},y}(en);Ri.prototype.bytesPerElement=12,X("StructArrayLayout2i4i12",Ri);var ci=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt){var gt=this.length;return this.resize(gt+1),this.emplace(gt,U,$,ae,he,Oe,rt)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt){var Et=U*4,or=U*8;return this.int16[Et+0]=$,this.int16[Et+1]=ae,this.uint8[or+4]=he,this.uint8[or+5]=Oe,this.uint8[or+6]=rt,this.uint8[or+7]=gt,U},y}(en);ci.prototype.bytesPerElement=8,X("StructArrayLayout2i4ub8",ci);var an=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$){var ae=this.length;return this.resize(ae+1),this.emplace(ae,U,$)},y.prototype.emplace=function(U,$,ae){var he=U*2;return this.float32[he+0]=$,this.float32[he+1]=ae,U},y}(en);an.prototype.bytesPerElement=8,X("StructArrayLayout2f8",an);var Zi=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt,gt,Et,or,_r){var pr=this.length;return this.resize(pr+1),this.emplace(pr,U,$,ae,he,Oe,rt,gt,Et,or,_r)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr){var Fr=U*10;return this.uint16[Fr+0]=$,this.uint16[Fr+1]=ae,this.uint16[Fr+2]=he,this.uint16[Fr+3]=Oe,this.uint16[Fr+4]=rt,this.uint16[Fr+5]=gt,this.uint16[Fr+6]=Et,this.uint16[Fr+7]=or,this.uint16[Fr+8]=_r,this.uint16[Fr+9]=pr,U},y}(en);Zi.prototype.bytesPerElement=20,X("StructArrayLayout10ui20",Zi);var Bn=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr){var oi=this.length;return this.resize(oi+1),this.emplace(oi,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi){var Hi=U*12;return this.int16[Hi+0]=$,this.int16[Hi+1]=ae,this.int16[Hi+2]=he,this.int16[Hi+3]=Oe,this.uint16[Hi+4]=rt,this.uint16[Hi+5]=gt,this.uint16[Hi+6]=Et,this.uint16[Hi+7]=or,this.int16[Hi+8]=_r,this.int16[Hi+9]=pr,this.int16[Hi+10]=Fr,this.int16[Hi+11]=oi,U},y}(en);Bn.prototype.bytesPerElement=24,X("StructArrayLayout4i4ui4i24",Bn);var hi=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae){var he=this.length;return this.resize(he+1),this.emplace(he,U,$,ae)},y.prototype.emplace=function(U,$,ae,he){var Oe=U*3;return this.float32[Oe+0]=$,this.float32[Oe+1]=ae,this.float32[Oe+2]=he,U},y}(en);hi.prototype.bytesPerElement=12,X("StructArrayLayout3f12",hi);var li=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U){var $=this.length;return this.resize($+1),this.emplace($,U)},y.prototype.emplace=function(U,$){var ae=U*1;return this.uint32[ae+0]=$,U},y}(en);li.prototype.bytesPerElement=4,X("StructArrayLayout1ul4",li);var mn=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt,gt,Et,or){var _r=this.length;return this.resize(_r+1),this.emplace(_r,U,$,ae,he,Oe,rt,gt,Et,or)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt,Et,or,_r){var pr=U*10,Fr=U*5;return this.int16[pr+0]=$,this.int16[pr+1]=ae,this.int16[pr+2]=he,this.int16[pr+3]=Oe,this.int16[pr+4]=rt,this.int16[pr+5]=gt,this.uint32[Fr+3]=Et,this.uint16[pr+8]=or,this.uint16[pr+9]=_r,U},y}(en);mn.prototype.bytesPerElement=20,X("StructArrayLayout6i1ul2ui20",mn);var Ji=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt){var gt=this.length;return this.resize(gt+1),this.emplace(gt,U,$,ae,he,Oe,rt)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt){var Et=U*6;return this.int16[Et+0]=$,this.int16[Et+1]=ae,this.int16[Et+2]=he,this.int16[Et+3]=Oe,this.int16[Et+4]=rt,this.int16[Et+5]=gt,U},y}(en);Ji.prototype.bytesPerElement=12,X("StructArrayLayout2i2i2i12",Ji);var Vi=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe){var rt=this.length;return this.resize(rt+1),this.emplace(rt,U,$,ae,he,Oe)},y.prototype.emplace=function(U,$,ae,he,Oe,rt){var gt=U*4,Et=U*8;return this.float32[gt+0]=$,this.float32[gt+1]=ae,this.float32[gt+2]=he,this.int16[Et+6]=Oe,this.int16[Et+7]=rt,U},y}(en);Vi.prototype.bytesPerElement=16,X("StructArrayLayout2f1f2i16",Vi);var Ni=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he){var Oe=this.length;return this.resize(Oe+1),this.emplace(Oe,U,$,ae,he)},y.prototype.emplace=function(U,$,ae,he,Oe){var rt=U*12,gt=U*3;return this.uint8[rt+0]=$,this.uint8[rt+1]=ae,this.float32[gt+1]=he,this.float32[gt+2]=Oe,U},y}(en);Ni.prototype.bytesPerElement=12,X("StructArrayLayout2ub2f12",Ni);var pn=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae){var he=this.length;return this.resize(he+1),this.emplace(he,U,$,ae)},y.prototype.emplace=function(U,$,ae,he){var Oe=U*3;return this.uint16[Oe+0]=$,this.uint16[Oe+1]=ae,this.uint16[Oe+2]=he,U},y}(en);pn.prototype.bytesPerElement=6,X("StructArrayLayout3ui6",pn);var Vn=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn){var xn=this.length;return this.resize(xn+1),this.emplace(xn,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn,xn){var Pn=U*24,Zn=U*12,ga=U*48;return this.int16[Pn+0]=$,this.int16[Pn+1]=ae,this.uint16[Pn+2]=he,this.uint16[Pn+3]=Oe,this.uint32[Zn+2]=rt,this.uint32[Zn+3]=gt,this.uint32[Zn+4]=Et,this.uint16[Pn+10]=or,this.uint16[Pn+11]=_r,this.uint16[Pn+12]=pr,this.float32[Zn+7]=Fr,this.float32[Zn+8]=oi,this.uint8[ga+36]=Hi,this.uint8[ga+37]=Ai,this.uint8[ga+38]=bn,this.uint32[Zn+10]=nn,this.int16[Pn+22]=xn,U},y}(en);Vn.prototype.bytesPerElement=48,X("StructArrayLayout2i2ui3ul3ui2f3ub1ul1i48",Vn);var na=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn,xn,Pn,Zn,ga,ha,eo,za,Za,Ko,to,ao){var _s=this.length;return this.resize(_s+1),this.emplace(_s,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn,xn,Pn,Zn,ga,ha,eo,za,Za,Ko,to,ao)},y.prototype.emplace=function(U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn,xn,Pn,Zn,ga,ha,eo,za,Za,Ko,to,ao,_s){var jo=U*34,El=U*17;return this.int16[jo+0]=$,this.int16[jo+1]=ae,this.int16[jo+2]=he,this.int16[jo+3]=Oe,this.int16[jo+4]=rt,this.int16[jo+5]=gt,this.int16[jo+6]=Et,this.int16[jo+7]=or,this.uint16[jo+8]=_r,this.uint16[jo+9]=pr,this.uint16[jo+10]=Fr,this.uint16[jo+11]=oi,this.uint16[jo+12]=Hi,this.uint16[jo+13]=Ai,this.uint16[jo+14]=bn,this.uint16[jo+15]=nn,this.uint16[jo+16]=xn,this.uint16[jo+17]=Pn,this.uint16[jo+18]=Zn,this.uint16[jo+19]=ga,this.uint16[jo+20]=ha,this.uint16[jo+21]=eo,this.uint16[jo+22]=za,this.uint32[El+12]=Za,this.float32[El+13]=Ko,this.float32[El+14]=to,this.float32[El+15]=ao,this.float32[El+16]=_s,U},y}(en);na.prototype.bytesPerElement=68,X("StructArrayLayout8i15ui1ul4f68",na);var Ki=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U){var $=this.length;return this.resize($+1),this.emplace($,U)},y.prototype.emplace=function(U,$){var ae=U*1;return this.float32[ae+0]=$,U},y}(en);Ki.prototype.bytesPerElement=4,X("StructArrayLayout1f4",Ki);var kn=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae){var he=this.length;return this.resize(he+1),this.emplace(he,U,$,ae)},y.prototype.emplace=function(U,$,ae,he){var Oe=U*3;return this.int16[Oe+0]=$,this.int16[Oe+1]=ae,this.int16[Oe+2]=he,U},y}(en);kn.prototype.bytesPerElement=6,X("StructArrayLayout3i6",kn);var ta=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae){var he=this.length;return this.resize(he+1),this.emplace(he,U,$,ae)},y.prototype.emplace=function(U,$,ae,he){var Oe=U*2,rt=U*4;return this.uint32[Oe+0]=$,this.uint16[rt+2]=ae,this.uint16[rt+3]=he,U},y}(en);ta.prototype.bytesPerElement=8,X("StructArrayLayout1ul2ui8",ta);var oa=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$){var ae=this.length;return this.resize(ae+1),this.emplace(ae,U,$)},y.prototype.emplace=function(U,$,ae){var he=U*2;return this.uint16[he+0]=$,this.uint16[he+1]=ae,U},y}(en);oa.prototype.bytesPerElement=4,X("StructArrayLayout2ui4",oa);var ba=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U){var $=this.length;return this.resize($+1),this.emplace($,U)},y.prototype.emplace=function(U,$){var ae=U*1;return this.uint16[ae+0]=$,U},y}(en);ba.prototype.bytesPerElement=2,X("StructArrayLayout1ui2",ba);var is=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._refreshViews=function(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)},y.prototype.emplaceBack=function(U,$,ae,he){var Oe=this.length;return this.resize(Oe+1),this.emplace(Oe,U,$,ae,he)},y.prototype.emplace=function(U,$,ae,he,Oe){var rt=U*4;return this.float32[rt+0]=$,this.float32[rt+1]=ae,this.float32[rt+2]=he,this.float32[rt+3]=Oe,U},y}(en);is.prototype.bytesPerElement=16,X("StructArrayLayout4f16",is);var Zs=function(m){function y(){m.apply(this,arguments)}m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y;var I={anchorPointX:{configurable:!0},anchorPointY:{configurable:!0},x1:{configurable:!0},y1:{configurable:!0},x2:{configurable:!0},y2:{configurable:!0},featureIndex:{configurable:!0},sourceLayerIndex:{configurable:!0},bucketIndex:{configurable:!0},anchorPoint:{configurable:!0}};return I.anchorPointX.get=function(){return this._structArray.int16[this._pos2+0]},I.anchorPointY.get=function(){return this._structArray.int16[this._pos2+1]},I.x1.get=function(){return this._structArray.int16[this._pos2+2]},I.y1.get=function(){return this._structArray.int16[this._pos2+3]},I.x2.get=function(){return this._structArray.int16[this._pos2+4]},I.y2.get=function(){return this._structArray.int16[this._pos2+5]},I.featureIndex.get=function(){return this._structArray.uint32[this._pos4+3]},I.sourceLayerIndex.get=function(){return this._structArray.uint16[this._pos2+8]},I.bucketIndex.get=function(){return this._structArray.uint16[this._pos2+9]},I.anchorPoint.get=function(){return new u(this.anchorPointX,this.anchorPointY)},Object.defineProperties(y.prototype,I),y}(qi);Zs.prototype.size=20;var Va=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.get=function(U){return new Zs(this,U)},y}(mn);X("CollisionBoxArray",Va);var Ml=function(m){function y(){m.apply(this,arguments)}m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y;var I={anchorX:{configurable:!0},anchorY:{configurable:!0},glyphStartIndex:{configurable:!0},numGlyphs:{configurable:!0},vertexStartIndex:{configurable:!0},lineStartIndex:{configurable:!0},lineLength:{configurable:!0},segment:{configurable:!0},lowerSize:{configurable:!0},upperSize:{configurable:!0},lineOffsetX:{configurable:!0},lineOffsetY:{configurable:!0},writingMode:{configurable:!0},placedOrientation:{configurable:!0},hidden:{configurable:!0},crossTileID:{configurable:!0},associatedIconIndex:{configurable:!0}};return I.anchorX.get=function(){return this._structArray.int16[this._pos2+0]},I.anchorY.get=function(){return this._structArray.int16[this._pos2+1]},I.glyphStartIndex.get=function(){return this._structArray.uint16[this._pos2+2]},I.numGlyphs.get=function(){return this._structArray.uint16[this._pos2+3]},I.vertexStartIndex.get=function(){return this._structArray.uint32[this._pos4+2]},I.lineStartIndex.get=function(){return this._structArray.uint32[this._pos4+3]},I.lineLength.get=function(){return this._structArray.uint32[this._pos4+4]},I.segment.get=function(){return this._structArray.uint16[this._pos2+10]},I.lowerSize.get=function(){return this._structArray.uint16[this._pos2+11]},I.upperSize.get=function(){return this._structArray.uint16[this._pos2+12]},I.lineOffsetX.get=function(){return this._structArray.float32[this._pos4+7]},I.lineOffsetY.get=function(){return this._structArray.float32[this._pos4+8]},I.writingMode.get=function(){return this._structArray.uint8[this._pos1+36]},I.placedOrientation.get=function(){return this._structArray.uint8[this._pos1+37]},I.placedOrientation.set=function(U){this._structArray.uint8[this._pos1+37]=U},I.hidden.get=function(){return this._structArray.uint8[this._pos1+38]},I.hidden.set=function(U){this._structArray.uint8[this._pos1+38]=U},I.crossTileID.get=function(){return this._structArray.uint32[this._pos4+10]},I.crossTileID.set=function(U){this._structArray.uint32[this._pos4+10]=U},I.associatedIconIndex.get=function(){return this._structArray.int16[this._pos2+22]},Object.defineProperties(y.prototype,I),y}(qi);Ml.prototype.size=48;var zo=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.get=function(U){return new Ml(this,U)},y}(Vn);X("PlacedSymbolArray",zo);var Qs=function(m){function y(){m.apply(this,arguments)}m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y;var I={anchorX:{configurable:!0},anchorY:{configurable:!0},rightJustifiedTextSymbolIndex:{configurable:!0},centerJustifiedTextSymbolIndex:{configurable:!0},leftJustifiedTextSymbolIndex:{configurable:!0},verticalPlacedTextSymbolIndex:{configurable:!0},placedIconSymbolIndex:{configurable:!0},verticalPlacedIconSymbolIndex:{configurable:!0},key:{configurable:!0},textBoxStartIndex:{configurable:!0},textBoxEndIndex:{configurable:!0},verticalTextBoxStartIndex:{configurable:!0},verticalTextBoxEndIndex:{configurable:!0},iconBoxStartIndex:{configurable:!0},iconBoxEndIndex:{configurable:!0},verticalIconBoxStartIndex:{configurable:!0},verticalIconBoxEndIndex:{configurable:!0},featureIndex:{configurable:!0},numHorizontalGlyphVertices:{configurable:!0},numVerticalGlyphVertices:{configurable:!0},numIconVertices:{configurable:!0},numVerticalIconVertices:{configurable:!0},useRuntimeCollisionCircles:{configurable:!0},crossTileID:{configurable:!0},textBoxScale:{configurable:!0},textOffset0:{configurable:!0},textOffset1:{configurable:!0},collisionCircleDiameter:{configurable:!0}};return I.anchorX.get=function(){return this._structArray.int16[this._pos2+0]},I.anchorY.get=function(){return this._structArray.int16[this._pos2+1]},I.rightJustifiedTextSymbolIndex.get=function(){return this._structArray.int16[this._pos2+2]},I.centerJustifiedTextSymbolIndex.get=function(){return this._structArray.int16[this._pos2+3]},I.leftJustifiedTextSymbolIndex.get=function(){return this._structArray.int16[this._pos2+4]},I.verticalPlacedTextSymbolIndex.get=function(){return this._structArray.int16[this._pos2+5]},I.placedIconSymbolIndex.get=function(){return this._structArray.int16[this._pos2+6]},I.verticalPlacedIconSymbolIndex.get=function(){return this._structArray.int16[this._pos2+7]},I.key.get=function(){return this._structArray.uint16[this._pos2+8]},I.textBoxStartIndex.get=function(){return this._structArray.uint16[this._pos2+9]},I.textBoxEndIndex.get=function(){return this._structArray.uint16[this._pos2+10]},I.verticalTextBoxStartIndex.get=function(){return this._structArray.uint16[this._pos2+11]},I.verticalTextBoxEndIndex.get=function(){return this._structArray.uint16[this._pos2+12]},I.iconBoxStartIndex.get=function(){return this._structArray.uint16[this._pos2+13]},I.iconBoxEndIndex.get=function(){return this._structArray.uint16[this._pos2+14]},I.verticalIconBoxStartIndex.get=function(){return this._structArray.uint16[this._pos2+15]},I.verticalIconBoxEndIndex.get=function(){return this._structArray.uint16[this._pos2+16]},I.featureIndex.get=function(){return this._structArray.uint16[this._pos2+17]},I.numHorizontalGlyphVertices.get=function(){return this._structArray.uint16[this._pos2+18]},I.numVerticalGlyphVertices.get=function(){return this._structArray.uint16[this._pos2+19]},I.numIconVertices.get=function(){return this._structArray.uint16[this._pos2+20]},I.numVerticalIconVertices.get=function(){return this._structArray.uint16[this._pos2+21]},I.useRuntimeCollisionCircles.get=function(){return this._structArray.uint16[this._pos2+22]},I.crossTileID.get=function(){return this._structArray.uint32[this._pos4+12]},I.crossTileID.set=function(U){this._structArray.uint32[this._pos4+12]=U},I.textBoxScale.get=function(){return this._structArray.float32[this._pos4+13]},I.textOffset0.get=function(){return this._structArray.float32[this._pos4+14]},I.textOffset1.get=function(){return this._structArray.float32[this._pos4+15]},I.collisionCircleDiameter.get=function(){return this._structArray.float32[this._pos4+16]},Object.defineProperties(y.prototype,I),y}(qi);Qs.prototype.size=68;var al=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.get=function(U){return new Qs(this,U)},y}(na);X("SymbolInstanceArray",al);var Vl=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.getoffsetX=function(U){return this.float32[U*1+0]},y}(Ki);X("GlyphOffsetArray",Vl);var ss=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.getx=function(U){return this.int16[U*3+0]},y.prototype.gety=function(U){return this.int16[U*3+1]},y.prototype.gettileUnitDistanceFromAnchor=function(U){return this.int16[U*3+2]},y}(kn);X("SymbolLineVertexArray",ss);var Vs=function(m){function y(){m.apply(this,arguments)}m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y;var I={featureIndex:{configurable:!0},sourceLayerIndex:{configurable:!0},bucketIndex:{configurable:!0}};return I.featureIndex.get=function(){return this._structArray.uint32[this._pos4+0]},I.sourceLayerIndex.get=function(){return this._structArray.uint16[this._pos2+2]},I.bucketIndex.get=function(){return this._structArray.uint16[this._pos2+3]},Object.defineProperties(y.prototype,I),y}(qi);Vs.prototype.size=8;var Ys=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.get=function(U){return new Vs(this,U)},y}(ta);X("FeatureIndexArray",Ys);var wa=Wi([{name:"a_pos",components:2,type:"Int16"}],4),ol=wa.members,io=function(y){y===void 0&&(y=[]),this.segments=y};io.prototype.prepareSegment=function(y,I,U,$){var ae=this.segments[this.segments.length-1];return y>io.MAX_VERTEX_ARRAY_LENGTH&&te("Max vertices per segment is "+io.MAX_VERTEX_ARRAY_LENGTH+": bucket requested "+y),(!ae||ae.vertexLength+y>io.MAX_VERTEX_ARRAY_LENGTH||ae.sortKey!==$)&&(ae={vertexOffset:I.length,primitiveOffset:U.length,vertexLength:0,primitiveLength:0},$!==void 0&&(ae.sortKey=$),this.segments.push(ae)),ae},io.prototype.get=function(){return this.segments},io.prototype.destroy=function(){for(var y=0,I=this.segments;y<I.length;y+=1){var U=I[y];for(var $ in U.vaos)U.vaos[$].destroy()}},io.simpleSegment=function(y,I,U,$){return new io([{vertexOffset:y,primitiveOffset:I,vertexLength:U,primitiveLength:$,vaos:{},sortKey:0}])},io.MAX_VERTEX_ARRAY_LENGTH=Math.pow(2,16)-1,X("SegmentVector",io);function Y(m,y){return m=p(Math.floor(m),0,255),y=p(Math.floor(y),0,255),256*m+y}var D=Wi([{name:"a_pattern_from",components:4,type:"Uint16"},{name:"a_pattern_to",components:4,type:"Uint16"},{name:"a_pixel_ratio_from",components:1,type:"Uint16"},{name:"a_pixel_ratio_to",components:1,type:"Uint16"}]),J=a(function(m){function y(I,U){var $,ae,he,Oe,rt,gt,Et,or;for($=I.length&3,ae=I.length-$,he=U,rt=3432918353,gt=461845907,or=0;or<ae;)Et=I.charCodeAt(or)&255|(I.charCodeAt(++or)&255)<<8|(I.charCodeAt(++or)&255)<<16|(I.charCodeAt(++or)&255)<<24,++or,Et=(Et&65535)*rt+(((Et>>>16)*rt&65535)<<16)&4294967295,Et=Et<<15|Et>>>17,Et=(Et&65535)*gt+(((Et>>>16)*gt&65535)<<16)&4294967295,he^=Et,he=he<<13|he>>>19,Oe=(he&65535)*5+(((he>>>16)*5&65535)<<16)&4294967295,he=(Oe&65535)+27492+(((Oe>>>16)+58964&65535)<<16);switch(Et=0,$){case 3:Et^=(I.charCodeAt(or+2)&255)<<16;case 2:Et^=(I.charCodeAt(or+1)&255)<<8;case 1:Et^=I.charCodeAt(or)&255,Et=(Et&65535)*rt+(((Et>>>16)*rt&65535)<<16)&4294967295,Et=Et<<15|Et>>>17,Et=(Et&65535)*gt+(((Et>>>16)*gt&65535)<<16)&4294967295,he^=Et}return he^=I.length,he^=he>>>16,he=(he&65535)*2246822507+(((he>>>16)*2246822507&65535)<<16)&4294967295,he^=he>>>13,he=(he&65535)*3266489909+(((he>>>16)*3266489909&65535)<<16)&4294967295,he^=he>>>16,he>>>0}m.exports=y}),q=a(function(m){function y(I,U){for(var $=I.length,ae=U^$,he=0,Oe;$>=4;)Oe=I.charCodeAt(he)&255|(I.charCodeAt(++he)&255)<<8|(I.charCodeAt(++he)&255)<<16|(I.charCodeAt(++he)&255)<<24,Oe=(Oe&65535)*1540483477+(((Oe>>>16)*1540483477&65535)<<16),Oe^=Oe>>>24,Oe=(Oe&65535)*1540483477+(((Oe>>>16)*1540483477&65535)<<16),ae=(ae&65535)*1540483477+(((ae>>>16)*1540483477&65535)<<16)^Oe,$-=4,++he;switch($){case 3:ae^=(I.charCodeAt(he+2)&255)<<16;case 2:ae^=(I.charCodeAt(he+1)&255)<<8;case 1:ae^=I.charCodeAt(he)&255,ae=(ae&65535)*1540483477+(((ae>>>16)*1540483477&65535)<<16)}return ae^=ae>>>13,ae=(ae&65535)*1540483477+(((ae>>>16)*1540483477&65535)<<16),ae^=ae>>>15,ae>>>0}m.exports=y}),K=J,de=J,ne=q;K.murmur3=de,K.murmur2=ne;var we=function(){this.ids=[],this.positions=[],this.indexed=!1};we.prototype.add=function(y,I,U,$){this.ids.push(ft(y)),this.positions.push(I,U,$)},we.prototype.getPositions=function(y){for(var I=ft(y),U=0,$=this.ids.length-1;U<$;){var ae=U+$>>1;this.ids[ae]>=I?$=ae:U=ae+1}for(var he=[];this.ids[U]===I;){var Oe=this.positions[3*U],rt=this.positions[3*U+1],gt=this.positions[3*U+2];he.push({index:Oe,start:rt,end:gt}),U++}return he},we.serialize=function(y,I){var U=new Float64Array(y.ids),$=new Uint32Array(y.positions);return Xt(U,$,0,U.length-1),I&&I.push(U.buffer,$.buffer),{ids:U,positions:$}},we.deserialize=function(y){var I=new we;return I.ids=y.ids,I.positions=y.positions,I.indexed=!0,I};var Ue=Math.pow(2,53)-1;function ft(m){var y=+m;return!isNaN(y)&&y<=Ue?y:K(String(m))}function Xt(m,y,I,U){for(;I<U;){for(var $=m[I+U>>1],ae=I-1,he=U+1;;){do ae++;while(m[ae]<$);do he--;while(m[he]>$);if(ae>=he)break;hr(m,ae,he),hr(y,3*ae,3*he),hr(y,3*ae+1,3*he+1),hr(y,3*ae+2,3*he+2)}he-I<U-he?(Xt(m,y,I,he),I=he+1):(Xt(m,y,he+1,U),U=he)}}function hr(m,y,I){var U=m[y];m[y]=m[I],m[I]=U}X("FeaturePositionMap",we);var qt=function(y,I){this.gl=y.gl,this.location=I},Ve=function(m){function y(I,U){m.call(this,I,U),this.current=0}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){this.current!==U&&(this.current=U,this.gl.uniform1i(this.location,U))},y}(qt),Qe=function(m){function y(I,U){m.call(this,I,U),this.current=0}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){this.current!==U&&(this.current=U,this.gl.uniform1f(this.location,U))},y}(qt),at=function(m){function y(I,U){m.call(this,I,U),this.current=[0,0]}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){(U[0]!==this.current[0]||U[1]!==this.current[1])&&(this.current=U,this.gl.uniform2f(this.location,U[0],U[1]))},y}(qt),Ct=function(m){function y(I,U){m.call(this,I,U),this.current=[0,0,0]}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){(U[0]!==this.current[0]||U[1]!==this.current[1]||U[2]!==this.current[2])&&(this.current=U,this.gl.uniform3f(this.location,U[0],U[1],U[2]))},y}(qt),Ot=function(m){function y(I,U){m.call(this,I,U),this.current=[0,0,0,0]}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){(U[0]!==this.current[0]||U[1]!==this.current[1]||U[2]!==this.current[2]||U[3]!==this.current[3])&&(this.current=U,this.gl.uniform4f(this.location,U[0],U[1],U[2],U[3]))},y}(qt),Rt=function(m){function y(I,U){m.call(this,I,U),this.current=cs.transparent}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){(U.r!==this.current.r||U.g!==this.current.g||U.b!==this.current.b||U.a!==this.current.a)&&(this.current=U,this.gl.uniform4f(this.location,U.r,U.g,U.b,U.a))},y}(qt),Bt=new Float32Array(16),Dt=function(m){function y(I,U){m.call(this,I,U),this.current=Bt}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.set=function(U){if(U[12]!==this.current[12]||U[0]!==this.current[0]){this.current=U,this.gl.uniformMatrix4fv(this.location,!1,U);return}for(var $=1;$<16;$++)if(U[$]!==this.current[$]){this.current=U,this.gl.uniformMatrix4fv(this.location,!1,U);break}},y}(qt);function yt(m){return[Y(255*m.r,255*m.g),Y(255*m.b,255*m.a)]}var Pt=function(y,I,U){this.value=y,this.uniformNames=I.map(function($){return"u_"+$}),this.type=U};Pt.prototype.setUniform=function(y,I,U){y.set(U.constantOr(this.value))},Pt.prototype.getBinding=function(y,I,U){return this.type==="color"?new Rt(y,I):new Qe(y,I)};var ht=function(y,I){this.uniformNames=I.map(function(U){return"u_"+U}),this.patternFrom=null,this.patternTo=null,this.pixelRatioFrom=1,this.pixelRatioTo=1};ht.prototype.setConstantPatternPositions=function(y,I){this.pixelRatioFrom=I.pixelRatio,this.pixelRatioTo=y.pixelRatio,this.patternFrom=I.tlbr,this.patternTo=y.tlbr},ht.prototype.setUniform=function(y,I,U,$){var ae=$==="u_pattern_to"?this.patternTo:$==="u_pattern_from"?this.patternFrom:$==="u_pixel_ratio_to"?this.pixelRatioTo:$==="u_pixel_ratio_from"?this.pixelRatioFrom:null;ae&&y.set(ae)},ht.prototype.getBinding=function(y,I,U){return U.substr(0,9)==="u_pattern"?new Ot(y,I):new Qe(y,I)};var ur=function(y,I,U,$){this.expression=y,this.type=U,this.maxValue=0,this.paintVertexAttributes=I.map(function(ae){return{name:"a_"+ae,type:"Float32",components:U==="color"?2:1,offset:0}}),this.paintVertexArray=new $};ur.prototype.populatePaintArray=function(y,I,U,$,ae){var he=this.paintVertexArray.length,Oe=this.expression.evaluate(new Gn(0),I,{},$,[],ae);this.paintVertexArray.resize(y),this._setPaintValue(he,y,Oe)},ur.prototype.updatePaintArray=function(y,I,U,$){var ae=this.expression.evaluate({zoom:0},U,$);this._setPaintValue(y,I,ae)},ur.prototype._setPaintValue=function(y,I,U){if(this.type==="color")for(var $=yt(U),ae=y;ae<I;ae++)this.paintVertexArray.emplace(ae,$[0],$[1]);else{for(var he=y;he<I;he++)this.paintVertexArray.emplace(he,U);this.maxValue=Math.max(this.maxValue,Math.abs(U))}},ur.prototype.upload=function(y){this.paintVertexArray&&this.paintVertexArray.arrayBuffer&&(this.paintVertexBuffer&&this.paintVertexBuffer.buffer?this.paintVertexBuffer.updateData(this.paintVertexArray):this.paintVertexBuffer=y.createVertexBuffer(this.paintVertexArray,this.paintVertexAttributes,this.expression.isStateDependent))},ur.prototype.destroy=function(){this.paintVertexBuffer&&this.paintVertexBuffer.destroy()};var br=function(y,I,U,$,ae,he){this.expression=y,this.uniformNames=I.map(function(Oe){return"u_"+Oe+"_t"}),this.type=U,this.useIntegerZoom=$,this.zoom=ae,this.maxValue=0,this.paintVertexAttributes=I.map(function(Oe){return{name:"a_"+Oe,type:"Float32",components:U==="color"?4:2,offset:0}}),this.paintVertexArray=new he};br.prototype.populatePaintArray=function(y,I,U,$,ae){var he=this.expression.evaluate(new Gn(this.zoom),I,{},$,[],ae),Oe=this.expression.evaluate(new Gn(this.zoom+1),I,{},$,[],ae),rt=this.paintVertexArray.length;this.paintVertexArray.resize(y),this._setPaintValue(rt,y,he,Oe)},br.prototype.updatePaintArray=function(y,I,U,$){var ae=this.expression.evaluate({zoom:this.zoom},U,$),he=this.expression.evaluate({zoom:this.zoom+1},U,$);this._setPaintValue(y,I,ae,he)},br.prototype._setPaintValue=function(y,I,U,$){if(this.type==="color")for(var ae=yt(U),he=yt($),Oe=y;Oe<I;Oe++)this.paintVertexArray.emplace(Oe,ae[0],ae[1],he[0],he[1]);else{for(var rt=y;rt<I;rt++)this.paintVertexArray.emplace(rt,U,$);this.maxValue=Math.max(this.maxValue,Math.abs(U),Math.abs($))}},br.prototype.upload=function(y){this.paintVertexArray&&this.paintVertexArray.arrayBuffer&&(this.paintVertexBuffer&&this.paintVertexBuffer.buffer?this.paintVertexBuffer.updateData(this.paintVertexArray):this.paintVertexBuffer=y.createVertexBuffer(this.paintVertexArray,this.paintVertexAttributes,this.expression.isStateDependent))},br.prototype.destroy=function(){this.paintVertexBuffer&&this.paintVertexBuffer.destroy()},br.prototype.setUniform=function(y,I){var U=this.useIntegerZoom?Math.floor(I.zoom):I.zoom,$=p(this.expression.interpolationFactor(U,this.zoom,this.zoom+1),0,1);y.set($)},br.prototype.getBinding=function(y,I,U){return new Qe(y,I)};var Ur=function(y,I,U,$,ae,he){this.expression=y,this.type=I,this.useIntegerZoom=U,this.zoom=$,this.layerId=he,this.zoomInPaintVertexArray=new ae,this.zoomOutPaintVertexArray=new ae};Ur.prototype.populatePaintArray=function(y,I,U){var $=this.zoomInPaintVertexArray.length;this.zoomInPaintVertexArray.resize(y),this.zoomOutPaintVertexArray.resize(y),this._setPaintValues($,y,I.patterns&&I.patterns[this.layerId],U)},Ur.prototype.updatePaintArray=function(y,I,U,$,ae){this._setPaintValues(y,I,U.patterns&&U.patterns[this.layerId],ae)},Ur.prototype._setPaintValues=function(y,I,U,$){if(!(!$||!U)){var ae=U.min,he=U.mid,Oe=U.max,rt=$[ae],gt=$[he],Et=$[Oe];if(!(!rt||!gt||!Et))for(var or=y;or<I;or++)this.zoomInPaintVertexArray.emplace(or,gt.tl[0],gt.tl[1],gt.br[0],gt.br[1],rt.tl[0],rt.tl[1],rt.br[0],rt.br[1],gt.pixelRatio,rt.pixelRatio),this.zoomOutPaintVertexArray.emplace(or,gt.tl[0],gt.tl[1],gt.br[0],gt.br[1],Et.tl[0],Et.tl[1],Et.br[0],Et.br[1],gt.pixelRatio,Et.pixelRatio)}},Ur.prototype.upload=function(y){this.zoomInPaintVertexArray&&this.zoomInPaintVertexArray.arrayBuffer&&this.zoomOutPaintVertexArray&&this.zoomOutPaintVertexArray.arrayBuffer&&(this.zoomInPaintVertexBuffer=y.createVertexBuffer(this.zoomInPaintVertexArray,D.members,this.expression.isStateDependent),this.zoomOutPaintVertexBuffer=y.createVertexBuffer(this.zoomOutPaintVertexArray,D.members,this.expression.isStateDependent))},Ur.prototype.destroy=function(){this.zoomOutPaintVertexBuffer&&this.zoomOutPaintVertexBuffer.destroy(),this.zoomInPaintVertexBuffer&&this.zoomInPaintVertexBuffer.destroy()};var Di=function(y,I,U){this.binders={},this._buffers=[];var $=[];for(var ae in y.paint._values)if(U(ae)){var he=y.paint.get(ae);if(!(!(he instanceof dl)||!Ws(he.property.specification))){var Oe=Ti(ae,y.type),rt=he.value,gt=he.property.specification.type,Et=he.property.useIntegerZoom,or=he.property.specification["property-type"],_r=or==="cross-faded"||or==="cross-faded-data-driven";if(rt.kind==="constant")this.binders[ae]=_r?new ht(rt.value,Oe):new Pt(rt.value,Oe,gt),$.push("/u_"+ae);else if(rt.kind==="source"||_r){var pr=rn(ae,gt,"source");this.binders[ae]=_r?new Ur(rt,gt,Et,I,pr,y.id):new ur(rt,Oe,gt,pr),$.push("/a_"+ae)}else{var Fr=rn(ae,gt,"composite");this.binders[ae]=new br(rt,Oe,gt,Et,I,Fr),$.push("/z_"+ae)}}}this.cacheKey=$.sort().join("")};Di.prototype.getMaxValue=function(y){var I=this.binders[y];return I instanceof ur||I instanceof br?I.maxValue:0},Di.prototype.populatePaintArrays=function(y,I,U,$,ae){for(var he in this.binders){var Oe=this.binders[he];(Oe instanceof ur||Oe instanceof br||Oe instanceof Ur)&&Oe.populatePaintArray(y,I,U,$,ae)}},Di.prototype.setConstantPatternPositions=function(y,I){for(var U in this.binders){var $=this.binders[U];$ instanceof ht&&$.setConstantPatternPositions(y,I)}},Di.prototype.updatePaintArrays=function(y,I,U,$,ae){var he=!1;for(var Oe in y)for(var rt=I.getPositions(Oe),gt=0,Et=rt;gt<Et.length;gt+=1){var or=Et[gt],_r=U.feature(or.index);for(var pr in this.binders){var Fr=this.binders[pr];if((Fr instanceof ur||Fr instanceof br||Fr instanceof Ur)&&Fr.expression.isStateDependent===!0){var oi=$.paint.get(pr);Fr.expression=oi.value,Fr.updatePaintArray(or.start,or.end,_r,y[Oe],ae),he=!0}}}return he},Di.prototype.defines=function(){var y=[];for(var I in this.binders){var U=this.binders[I];(U instanceof Pt||U instanceof ht)&&y.push.apply(y,U.uniformNames.map(function($){return"#define HAS_UNIFORM_"+$}))}return y},Di.prototype.getBinderAttributes=function(){var y=[];for(var I in this.binders){var U=this.binders[I];if(U instanceof ur||U instanceof br)for(var $=0;$<U.paintVertexAttributes.length;$++)y.push(U.paintVertexAttributes[$].name);else if(U instanceof Ur)for(var ae=0;ae<D.members.length;ae++)y.push(D.members[ae].name)}return y},Di.prototype.getBinderUniforms=function(){var y=[];for(var I in this.binders){var U=this.binders[I];if(U instanceof Pt||U instanceof ht||U instanceof br)for(var $=0,ae=U.uniformNames;$<ae.length;$+=1){var he=ae[$];y.push(he)}}return y},Di.prototype.getPaintVertexBuffers=function(){return this._buffers},Di.prototype.getUniforms=function(y,I){var U=[];for(var $ in this.binders){var ae=this.binders[$];if(ae instanceof Pt||ae instanceof ht||ae instanceof br)for(var he=0,Oe=ae.uniformNames;he<Oe.length;he+=1){var rt=Oe[he];if(I[rt]){var gt=ae.getBinding(y,I[rt],rt);U.push({name:rt,property:$,binding:gt})}}}return U},Di.prototype.setUniforms=function(y,I,U,$){for(var ae=0,he=I;ae<he.length;ae+=1){var Oe=he[ae],rt=Oe.name,gt=Oe.property,Et=Oe.binding;this.binders[gt].setUniform(Et,$,U.get(gt),rt)}},Di.prototype.updatePaintBuffers=function(y){this._buffers=[];for(var I in this.binders){var U=this.binders[I];if(y&&U instanceof Ur){var $=y.fromScale===2?U.zoomInPaintVertexBuffer:U.zoomOutPaintVertexBuffer;$&&this._buffers.push($)}else(U instanceof ur||U instanceof br)&&U.paintVertexBuffer&&this._buffers.push(U.paintVertexBuffer)}},Di.prototype.upload=function(y){for(var I in this.binders){var U=this.binders[I];(U instanceof ur||U instanceof br||U instanceof Ur)&&U.upload(y)}this.updatePaintBuffers()},Di.prototype.destroy=function(){for(var y in this.binders){var I=this.binders[y];(I instanceof ur||I instanceof br||I instanceof Ur)&&I.destroy()}};var fi=function(y,I,U){U===void 0&&(U=function(){return!0}),this.programConfigurations={};for(var $=0,ae=y;$<ae.length;$+=1){var he=ae[$];this.programConfigurations[he.id]=new Di(he,I,U)}this.needsUpload=!1,this._featureMap=new we,this._bufferOffset=0};fi.prototype.populatePaintArrays=function(y,I,U,$,ae,he){for(var Oe in this.programConfigurations)this.programConfigurations[Oe].populatePaintArrays(y,I,$,ae,he);I.id!==void 0&&this._featureMap.add(I.id,U,this._bufferOffset,y),this._bufferOffset=y,this.needsUpload=!0},fi.prototype.updatePaintArrays=function(y,I,U,$){for(var ae=0,he=U;ae<he.length;ae+=1){var Oe=he[ae];this.needsUpload=this.programConfigurations[Oe.id].updatePaintArrays(y,this._featureMap,I,Oe,$)||this.needsUpload}},fi.prototype.get=function(y){return this.programConfigurations[y]},fi.prototype.upload=function(y){if(this.needsUpload){for(var I in this.programConfigurations)this.programConfigurations[I].upload(y);this.needsUpload=!1}},fi.prototype.destroy=function(){for(var y in this.programConfigurations)this.programConfigurations[y].destroy()};function Ti(m,y){var I={"text-opacity":["opacity"],"icon-opacity":["opacity"],"text-color":["fill_color"],"icon-color":["fill_color"],"text-halo-color":["halo_color"],"icon-halo-color":["halo_color"],"text-halo-blur":["halo_blur"],"icon-halo-blur":["halo_blur"],"text-halo-width":["halo_width"],"icon-halo-width":["halo_width"],"line-gap-width":["gapwidth"],"line-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"],"fill-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"],"fill-extrusion-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"]};return I[m]||[m.replace(y+"-","").replace(/-/g,"_")]}function gn(m){var y={"line-pattern":{source:Zi,composite:Zi},"fill-pattern":{source:Zi,composite:Zi},"fill-extrusion-pattern":{source:Zi,composite:Zi}};return y[m]}function rn(m,y,I){var U={color:{source:an,composite:is},number:{source:Ki,composite:an}},$=gn(m);return $&&$[I]||U[y][I]}X("ConstantBinder",Pt),X("CrossFadedConstantBinder",ht),X("SourceExpressionBinder",ur),X("CrossFadedCompositeBinder",Ur),X("CompositeExpressionBinder",br),X("ProgramConfiguration",Di,{omit:["_buffers"]}),X("ProgramConfigurationSet",fi);var Ci=8192,Bi=15,Gi=Math.pow(2,Bi-1)-1,sn=-Gi-1;function zn(m){for(var y=Ci/m.extent,I=m.loadGeometry(),U=0;U<I.length;U++)for(var $=I[U],ae=0;ae<$.length;ae++){var he=$[ae],Oe=Math.round(he.x*y),rt=Math.round(he.y*y);he.x=p(Oe,sn,Gi),he.y=p(rt,sn,Gi),(Oe<he.x||Oe>he.x+1||rt<he.y||rt>he.y+1)&&te("Geometry exceeds allowed extent, reduce your vector tile buffer size")}return I}function Ja(m,y){return{type:m.type,id:m.id,properties:m.properties,geometry:y?zn(m):[]}}function co(m,y,I,U,$){m.emplaceBack(y*2+(U+1)/2,I*2+($+1)/2)}var ts=function(y){this.zoom=y.zoom,this.overscaling=y.overscaling,this.layers=y.layers,this.layerIds=this.layers.map(function(I){return I.id}),this.index=y.index,this.hasPattern=!1,this.layoutVertexArray=new Yr,this.indexArray=new pn,this.segments=new io,this.programConfigurations=new fi(y.layers,y.zoom),this.stateDependentLayerIds=this.layers.filter(function(I){return I.isStateDependent()}).map(function(I){return I.id})};ts.prototype.populate=function(y,I,U){var $=this.layers[0],ae=[],he=null;$.type==="circle"&&(he=$.layout.get("circle-sort-key"));for(var Oe=0,rt=y;Oe<rt.length;Oe+=1){var gt=rt[Oe],Et=gt.feature,or=gt.id,_r=gt.index,pr=gt.sourceLayerIndex,Fr=this.layers[0]._featureFilter.needGeometry,oi=Ja(Et,Fr);if(this.layers[0]._featureFilter.filter(new Gn(this.zoom),oi,U)){var Hi=he?he.evaluate(oi,{},U):void 0,Ai={id:or,properties:Et.properties,type:Et.type,sourceLayerIndex:pr,index:_r,geometry:Fr?oi.geometry:zn(Et),patterns:{},sortKey:Hi};ae.push(Ai)}}he&&ae.sort(function(za,Za){return za.sortKey-Za.sortKey});for(var bn=0,nn=ae;bn<nn.length;bn+=1){var xn=nn[bn],Pn=xn,Zn=Pn.geometry,ga=Pn.index,ha=Pn.sourceLayerIndex,eo=y[ga].feature;this.addFeature(xn,Zn,ga,U),I.featureIndex.insert(eo,Zn,ga,ha,this.index)}},ts.prototype.update=function(y,I,U){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(y,I,this.stateDependentLayers,U)},ts.prototype.isEmpty=function(){return this.layoutVertexArray.length===0},ts.prototype.uploadPending=function(){return!this.uploaded||this.programConfigurations.needsUpload},ts.prototype.upload=function(y){this.uploaded||(this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,ol),this.indexBuffer=y.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(y),this.uploaded=!0},ts.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy())},ts.prototype.addFeature=function(y,I,U,$){for(var ae=0,he=I;ae<he.length;ae+=1)for(var Oe=he[ae],rt=0,gt=Oe;rt<gt.length;rt+=1){var Et=gt[rt],or=Et.x,_r=Et.y;if(!(or<0||or>=Ci||_r<0||_r>=Ci)){var pr=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray,y.sortKey),Fr=pr.vertexLength;co(this.layoutVertexArray,or,_r,-1,-1),co(this.layoutVertexArray,or,_r,1,-1),co(this.layoutVertexArray,or,_r,1,1),co(this.layoutVertexArray,or,_r,-1,1),this.indexArray.emplaceBack(Fr,Fr+1,Fr+2),this.indexArray.emplaceBack(Fr,Fr+3,Fr+2),pr.vertexLength+=4,pr.primitiveLength+=2}}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,y,U,{},$)},X("CircleBucket",ts,{omit:["layers"]});function so(m,y){for(var I=0;I<m.length;I++)if(Td(y,m[I]))return!0;for(var U=0;U<y.length;U++)if(Td(m,y[U]))return!0;return!!Lv(m,y)}function Zo(m,y,I){return!!(Td(m,y)||Kv(y,m,I))}function ms(m,y){if(m.length===1)return pp(y,m[0]);for(var I=0;I<y.length;I++)for(var U=y[I],$=0;$<U.length;$++)if(Td(m,U[$]))return!0;for(var ae=0;ae<m.length;ae++)if(pp(y,m[ae]))return!0;for(var he=0;he<y.length;he++)if(Lv(m,y[he]))return!0;return!1}function ou(m,y,I){for(var U=0;U<y.length;U++){var $=y[U];if(m.length>=3){for(var ae=0;ae<$.length;ae++)if(Td(m,$[ae]))return!0}if(Cv(m,$,I))return!0}return!1}function Cv(m,y,I){if(m.length>1){if(Lv(m,y))return!0;for(var U=0;U<y.length;U++)if(Kv(y[U],m,I))return!0}for(var $=0;$<m.length;$++)if(Kv(m[$],y,I))return!0;return!1}function Lv(m,y){if(m.length===0||y.length===0)return!1;for(var I=0;I<m.length-1;I++)for(var U=m[I],$=m[I+1],ae=0;ae<y.length-1;ae++){var he=y[ae],Oe=y[ae+1];if(wd(U,$,he,Oe))return!0}return!1}function wd(m,y,I,U){return oe(m,I,U)!==oe(y,I,U)&&oe(m,y,I)!==oe(m,y,U)}function Kv(m,y,I){var U=I*I;if(y.length===1)return m.distSqr(y[0])<U;for(var $=1;$<y.length;$++){var ae=y[$-1],he=y[$];if(cg(m,ae,he)<U)return!0}return!1}function cg(m,y,I){var U=y.distSqr(I);if(U===0)return m.distSqr(y);var $=((m.x-y.x)*(I.x-y.x)+(m.y-y.y)*(I.y-y.y))/U;return $<0?m.distSqr(y):$>1?m.distSqr(I):m.distSqr(I.sub(y)._mult($)._add(y))}function pp(m,y){for(var I=!1,U,$,ae,he=0;he<m.length;he++){U=m[he];for(var Oe=0,rt=U.length-1;Oe<U.length;rt=Oe++)$=U[Oe],ae=U[rt],$.y>y.y!=ae.y>y.y&&y.x<(ae.x-$.x)*(y.y-$.y)/(ae.y-$.y)+$.x&&(I=!I)}return I}function Td(m,y){for(var I=!1,U=0,$=m.length-1;U<m.length;$=U++){var ae=m[U],he=m[$];ae.y>y.y!=he.y>y.y&&y.x<(he.x-ae.x)*(y.y-ae.y)/(he.y-ae.y)+ae.x&&(I=!I)}return I}function gp(m,y,I,U,$){for(var ae=0,he=m;ae<he.length;ae+=1){var Oe=he[ae];if(y<=Oe.x&&I<=Oe.y&&U>=Oe.x&&$>=Oe.y)return!0}var rt=[new u(y,I),new u(y,$),new u(U,$),new u(U,I)];if(m.length>2)for(var gt=0,Et=rt;gt<Et.length;gt+=1){var or=Et[gt];if(Td(m,or))return!0}for(var _r=0;_r<m.length-1;_r++){var pr=m[_r],Fr=m[_r+1];if(Vd(pr,Fr,rt))return!0}return!1}function Vd(m,y,I){var U=I[0],$=I[2];if(m.x<U.x&&y.x<U.x||m.x>$.x&&y.x>$.x||m.y<U.y&&y.y<U.y||m.y>$.y&&y.y>$.y)return!1;var ae=oe(m,y,I[0]);return ae!==oe(m,y,I[1])||ae!==oe(m,y,I[2])||ae!==oe(m,y,I[3])}function Ad(m,y,I){var U=y.paint.get(m).value;return U.kind==="constant"?U.value:I.programConfigurations.get(y.id).getMaxValue(m)}function Pv(m){return Math.sqrt(m[0]*m[0]+m[1]*m[1])}function Jv(m,y,I,U,$){if(!y[0]&&!y[1])return m;var ae=u.convert(y)._mult($);I==="viewport"&&ae._rotate(-U);for(var he=[],Oe=0;Oe<m.length;Oe++){var rt=m[Oe];he.push(rt.sub(ae))}return he}var Iv=new Nr({"circle-sort-key":new bt(Rn.layout_circle["circle-sort-key"])}),ay=new Nr({"circle-radius":new bt(Rn.paint_circle["circle-radius"]),"circle-color":new bt(Rn.paint_circle["circle-color"]),"circle-blur":new bt(Rn.paint_circle["circle-blur"]),"circle-opacity":new bt(Rn.paint_circle["circle-opacity"]),"circle-translate":new Me(Rn.paint_circle["circle-translate"]),"circle-translate-anchor":new Me(Rn.paint_circle["circle-translate-anchor"]),"circle-pitch-scale":new Me(Rn.paint_circle["circle-pitch-scale"]),"circle-pitch-alignment":new Me(Rn.paint_circle["circle-pitch-alignment"]),"circle-stroke-width":new bt(Rn.paint_circle["circle-stroke-width"]),"circle-stroke-color":new bt(Rn.paint_circle["circle-stroke-color"]),"circle-stroke-opacity":new bt(Rn.paint_circle["circle-stroke-opacity"])}),fg={paint:ay,layout:Iv},oh=typeof Float32Array!="undefined"?Float32Array:Array;Math.hypot||(Math.hypot=function(){for(var m=arguments,y=0,I=arguments.length;I--;)y+=m[I]*m[I];return Math.sqrt(y)});function hg(){var m=new oh(4);return oh!=Float32Array&&(m[1]=0,m[2]=0),m[0]=1,m[3]=1,m}function oy(m,y,I){var U=y[0],$=y[1],ae=y[2],he=y[3],Oe=Math.sin(I),rt=Math.cos(I);return m[0]=U*rt+ae*Oe,m[1]=$*rt+he*Oe,m[2]=U*-Oe+ae*rt,m[3]=$*-Oe+he*rt,m}function jh(){var m=new oh(9);return oh!=Float32Array&&(m[1]=0,m[2]=0,m[3]=0,m[5]=0,m[6]=0,m[7]=0),m[0]=1,m[4]=1,m[8]=1,m}function im(m,y){var I=Math.sin(y),U=Math.cos(y);return m[0]=U,m[1]=I,m[2]=0,m[3]=-I,m[4]=U,m[5]=0,m[6]=0,m[7]=0,m[8]=1,m}function b1(){var m=new oh(16);return oh!=Float32Array&&(m[1]=0,m[2]=0,m[3]=0,m[4]=0,m[6]=0,m[7]=0,m[8]=0,m[9]=0,m[11]=0,m[12]=0,m[13]=0,m[14]=0),m[0]=1,m[5]=1,m[10]=1,m[15]=1,m}function w1(m){var y=new oh(16);return y[0]=m[0],y[1]=m[1],y[2]=m[2],y[3]=m[3],y[4]=m[4],y[5]=m[5],y[6]=m[6],y[7]=m[7],y[8]=m[8],y[9]=m[9],y[10]=m[10],y[11]=m[11],y[12]=m[12],y[13]=m[13],y[14]=m[14],y[15]=m[15],y}function sy(m){return m[0]=1,m[1]=0,m[2]=0,m[3]=0,m[4]=0,m[5]=1,m[6]=0,m[7]=0,m[8]=0,m[9]=0,m[10]=1,m[11]=0,m[12]=0,m[13]=0,m[14]=0,m[15]=1,m}function nm(m,y){var I=y[0],U=y[1],$=y[2],ae=y[3],he=y[4],Oe=y[5],rt=y[6],gt=y[7],Et=y[8],or=y[9],_r=y[10],pr=y[11],Fr=y[12],oi=y[13],Hi=y[14],Ai=y[15],bn=I*Oe-U*he,nn=I*rt-$*he,xn=I*gt-ae*he,Pn=U*rt-$*Oe,Zn=U*gt-ae*Oe,ga=$*gt-ae*rt,ha=Et*oi-or*Fr,eo=Et*Hi-_r*Fr,za=Et*Ai-pr*Fr,Za=or*Hi-_r*oi,Ko=or*Ai-pr*oi,to=_r*Ai-pr*Hi,ao=bn*to-nn*Ko+xn*Za+Pn*za-Zn*eo+ga*ha;return ao?(ao=1/ao,m[0]=(Oe*to-rt*Ko+gt*Za)*ao,m[1]=($*Ko-U*to-ae*Za)*ao,m[2]=(oi*ga-Hi*Zn+Ai*Pn)*ao,m[3]=(_r*Zn-or*ga-pr*Pn)*ao,m[4]=(rt*za-he*to-gt*eo)*ao,m[5]=(I*to-$*za+ae*eo)*ao,m[6]=(Hi*xn-Fr*ga-Ai*nn)*ao,m[7]=(Et*ga-_r*xn+pr*nn)*ao,m[8]=(he*Ko-Oe*za+gt*ha)*ao,m[9]=(U*za-I*Ko-ae*ha)*ao,m[10]=(Fr*Zn-oi*xn+Ai*bn)*ao,m[11]=(or*xn-Et*Zn-pr*bn)*ao,m[12]=(Oe*eo-he*Za-rt*ha)*ao,m[13]=(I*Za-U*eo+$*ha)*ao,m[14]=(oi*nn-Fr*Pn-Hi*bn)*ao,m[15]=(Et*Pn-or*nn+_r*bn)*ao,m):null}function am(m,y,I){var U=y[0],$=y[1],ae=y[2],he=y[3],Oe=y[4],rt=y[5],gt=y[6],Et=y[7],or=y[8],_r=y[9],pr=y[10],Fr=y[11],oi=y[12],Hi=y[13],Ai=y[14],bn=y[15],nn=I[0],xn=I[1],Pn=I[2],Zn=I[3];return m[0]=nn*U+xn*Oe+Pn*or+Zn*oi,m[1]=nn*$+xn*rt+Pn*_r+Zn*Hi,m[2]=nn*ae+xn*gt+Pn*pr+Zn*Ai,m[3]=nn*he+xn*Et+Pn*Fr+Zn*bn,nn=I[4],xn=I[5],Pn=I[6],Zn=I[7],m[4]=nn*U+xn*Oe+Pn*or+Zn*oi,m[5]=nn*$+xn*rt+Pn*_r+Zn*Hi,m[6]=nn*ae+xn*gt+Pn*pr+Zn*Ai,m[7]=nn*he+xn*Et+Pn*Fr+Zn*bn,nn=I[8],xn=I[9],Pn=I[10],Zn=I[11],m[8]=nn*U+xn*Oe+Pn*or+Zn*oi,m[9]=nn*$+xn*rt+Pn*_r+Zn*Hi,m[10]=nn*ae+xn*gt+Pn*pr+Zn*Ai,m[11]=nn*he+xn*Et+Pn*Fr+Zn*bn,nn=I[12],xn=I[13],Pn=I[14],Zn=I[15],m[12]=nn*U+xn*Oe+Pn*or+Zn*oi,m[13]=nn*$+xn*rt+Pn*_r+Zn*Hi,m[14]=nn*ae+xn*gt+Pn*pr+Zn*Ai,m[15]=nn*he+xn*Et+Pn*Fr+Zn*bn,m}function vc(m,y,I){var U=I[0],$=I[1],ae=I[2],he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai;return y===m?(m[12]=y[0]*U+y[4]*$+y[8]*ae+y[12],m[13]=y[1]*U+y[5]*$+y[9]*ae+y[13],m[14]=y[2]*U+y[6]*$+y[10]*ae+y[14],m[15]=y[3]*U+y[7]*$+y[11]*ae+y[15]):(he=y[0],Oe=y[1],rt=y[2],gt=y[3],Et=y[4],or=y[5],_r=y[6],pr=y[7],Fr=y[8],oi=y[9],Hi=y[10],Ai=y[11],m[0]=he,m[1]=Oe,m[2]=rt,m[3]=gt,m[4]=Et,m[5]=or,m[6]=_r,m[7]=pr,m[8]=Fr,m[9]=oi,m[10]=Hi,m[11]=Ai,m[12]=he*U+Et*$+Fr*ae+y[12],m[13]=Oe*U+or*$+oi*ae+y[13],m[14]=rt*U+_r*$+Hi*ae+y[14],m[15]=gt*U+pr*$+Ai*ae+y[15]),m}function eu(m,y,I){var U=I[0],$=I[1],ae=I[2];return m[0]=y[0]*U,m[1]=y[1]*U,m[2]=y[2]*U,m[3]=y[3]*U,m[4]=y[4]*$,m[5]=y[5]*$,m[6]=y[6]*$,m[7]=y[7]*$,m[8]=y[8]*ae,m[9]=y[9]*ae,m[10]=y[10]*ae,m[11]=y[11]*ae,m[12]=y[12],m[13]=y[13],m[14]=y[14],m[15]=y[15],m}function Sd(m,y,I){var U=Math.sin(I),$=Math.cos(I),ae=y[4],he=y[5],Oe=y[6],rt=y[7],gt=y[8],Et=y[9],or=y[10],_r=y[11];return y!==m&&(m[0]=y[0],m[1]=y[1],m[2]=y[2],m[3]=y[3],m[12]=y[12],m[13]=y[13],m[14]=y[14],m[15]=y[15]),m[4]=ae*$+gt*U,m[5]=he*$+Et*U,m[6]=Oe*$+or*U,m[7]=rt*$+_r*U,m[8]=gt*$-ae*U,m[9]=Et*$-he*U,m[10]=or*$-Oe*U,m[11]=_r*$-rt*U,m}function ly(m,y,I){var U=Math.sin(I),$=Math.cos(I),ae=y[0],he=y[1],Oe=y[2],rt=y[3],gt=y[4],Et=y[5],or=y[6],_r=y[7];return y!==m&&(m[8]=y[8],m[9]=y[9],m[10]=y[10],m[11]=y[11],m[12]=y[12],m[13]=y[13],m[14]=y[14],m[15]=y[15]),m[0]=ae*$+gt*U,m[1]=he*$+Et*U,m[2]=Oe*$+or*U,m[3]=rt*$+_r*U,m[4]=gt*$-ae*U,m[5]=Et*$-he*U,m[6]=or*$-Oe*U,m[7]=_r*$-rt*U,m}function T1(m,y,I,U,$){var ae=1/Math.tan(y/2),he;return m[0]=ae/I,m[1]=0,m[2]=0,m[3]=0,m[4]=0,m[5]=ae,m[6]=0,m[7]=0,m[8]=0,m[9]=0,m[11]=-1,m[12]=0,m[13]=0,m[15]=0,$!=null&&$!==1/0?(he=1/(U-$),m[10]=($+U)*he,m[14]=2*$*U*he):(m[10]=-1,m[14]=-2*U),m}function wu(m,y,I,U,$,ae,he){var Oe=1/(y-I),rt=1/(U-$),gt=1/(ae-he);return m[0]=-2*Oe,m[1]=0,m[2]=0,m[3]=0,m[4]=0,m[5]=-2*rt,m[6]=0,m[7]=0,m[8]=0,m[9]=0,m[10]=2*gt,m[11]=0,m[12]=(y+I)*Oe,m[13]=($+U)*rt,m[14]=(he+ae)*gt,m[15]=1,m}var Nx=am;function om(){var m=new oh(3);return oh!=Float32Array&&(m[0]=0,m[1]=0,m[2]=0),m}function kw(m){var y=new oh(3);return y[0]=m[0],y[1]=m[1],y[2]=m[2],y}function Rv(m,y,I){return m[0]=y[0]+I[0],m[1]=y[1]+I[1],m[2]=y[2]+I[2],m}function sm(m,y,I){return m[0]=y[0]-I[0],m[1]=y[1]-I[1],m[2]=y[2]-I[2],m}function Cw(m,y,I){return m[0]=y[0]*I,m[1]=y[1]*I,m[2]=y[2]*I,m}function Ux(m,y){var I=y[0],U=y[1],$=y[2],ae=I*I+U*U+$*$;return ae>0&&(ae=1/Math.sqrt(ae)),m[0]=y[0]*ae,m[1]=y[1]*ae,m[2]=y[2]*ae,m}function q9(m,y){return m[0]*y[0]+m[1]*y[1]+m[2]*y[2]}function B9(m,y,I){var U=y[0],$=y[1],ae=y[2],he=I[0],Oe=I[1],rt=I[2];return m[0]=$*rt-ae*Oe,m[1]=ae*he-U*rt,m[2]=U*Oe-$*he,m}function N9(m,y,I){var U=y[0],$=y[1],ae=y[2];return m[0]=U*I[0]+$*I[3]+ae*I[6],m[1]=U*I[1]+$*I[4]+ae*I[7],m[2]=U*I[2]+$*I[5]+ae*I[8],m}var U9=sm,LQ=function(){var m=om();return function(y,I,U,$,ae,he){var Oe,rt;for(I||(I=3),U||(U=0),$?rt=Math.min($*I+U,y.length):rt=y.length,Oe=U;Oe<rt;Oe+=I)m[0]=y[Oe],m[1]=y[Oe+1],m[2]=y[Oe+2],ae(m,m,he),y[Oe]=m[0],y[Oe+1]=m[1],y[Oe+2]=m[2];return y}}();function V9(){var m=new oh(4);return oh!=Float32Array&&(m[0]=0,m[1]=0,m[2]=0,m[3]=0),m}function G9(m,y,I){return m[0]=y[0]*I,m[1]=y[1]*I,m[2]=y[2]*I,m[3]=y[3]*I,m}function H9(m,y){return m[0]*y[0]+m[1]*y[1]+m[2]*y[2]+m[3]*y[3]}function uy(m,y,I){var U=y[0],$=y[1],ae=y[2],he=y[3];return m[0]=I[0]*U+I[4]*$+I[8]*ae+I[12]*he,m[1]=I[1]*U+I[5]*$+I[9]*ae+I[13]*he,m[2]=I[2]*U+I[6]*$+I[10]*ae+I[14]*he,m[3]=I[3]*U+I[7]*$+I[11]*ae+I[15]*he,m}var Vx=function(){var m=V9();return function(y,I,U,$,ae,he){var Oe,rt;for(I||(I=4),U||(U=0),$?rt=Math.min($*I+U,y.length):rt=y.length,Oe=U;Oe<rt;Oe+=I)m[0]=y[Oe],m[1]=y[Oe+1],m[2]=y[Oe+2],m[3]=y[Oe+3],ae(m,m,he),y[Oe]=m[0],y[Oe+1]=m[1],y[Oe+2]=m[2],y[Oe+3]=m[3];return y}}();function mC(){var m=new oh(2);return oh!=Float32Array&&(m[0]=0,m[1]=0),m}function nS(m){var y=m[0],I=m[1];return y*y+I*I}var aS=nS,PQ=function(){var m=mC();return function(y,I,U,$,ae,he){var Oe,rt;for(I||(I=2),U||(U=0),$?rt=Math.min($*I+U,y.length):rt=y.length,Oe=U;Oe<rt;Oe+=I)m[0]=y[Oe],m[1]=y[Oe+1],ae(m,m,he),y[Oe]=m[0],y[Oe+1]=m[1];return y}}(),j9=function(m){function y(I){m.call(this,I,fg)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.createBucket=function(U){return new ts(U)},y.prototype.queryRadius=function(U){var $=U;return Ad("circle-radius",this,$)+Ad("circle-stroke-width",this,$)+Pv(this.paint.get("circle-translate"))},y.prototype.queryIntersectsFeature=function(U,$,ae,he,Oe,rt,gt,Et){for(var or=Jv(U,this.paint.get("circle-translate"),this.paint.get("circle-translate-anchor"),rt.angle,gt),_r=this.paint.get("circle-radius").evaluate($,ae),pr=this.paint.get("circle-stroke-width").evaluate($,ae),Fr=_r+pr,oi=this.paint.get("circle-pitch-alignment")==="map",Hi=oi?or:_C(or,Et),Ai=oi?Fr*gt:Fr,bn=0,nn=he;bn<nn.length;bn+=1)for(var xn=nn[bn],Pn=0,Zn=xn;Pn<Zn.length;Pn+=1){var ga=Zn[Pn],ha=oi?ga:yC(ga,Et),eo=Ai,za=uy([],[ga.x,ga.y,0,1],Et);if(this.paint.get("circle-pitch-scale")==="viewport"&&this.paint.get("circle-pitch-alignment")==="map"?eo*=za[3]/rt.cameraToCenterDistance:this.paint.get("circle-pitch-scale")==="map"&&this.paint.get("circle-pitch-alignment")==="viewport"&&(eo*=rt.cameraToCenterDistance/za[3]),Zo(Hi,ha,eo))return!0}return!1},y}(mi);function yC(m,y){var I=uy([],[m.x,m.y,0,1],y);return new u(I[0]/I[3],I[1]/I[3])}function _C(m,y){return m.map(function(I){return yC(I,y)})}var oS=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y}(ts);X("HeatmapBucket",oS,{omit:["layers"]});function Md(m,y,I,U){var $=y.width,ae=y.height;if(!U)U=new Uint8Array($*ae*I);else if(U instanceof Uint8ClampedArray)U=new Uint8Array(U.buffer);else if(U.length!==$*ae*I)throw new RangeError("mismatched image size");return m.width=$,m.height=ae,m.data=U,m}function Lw(m,y,I){var U=y.width,$=y.height;if(!(U===m.width&&$===m.height)){var ae=Md({},{width:U,height:$},I);Pw(m,ae,{x:0,y:0},{x:0,y:0},{width:Math.min(m.width,U),height:Math.min(m.height,$)},I),m.width=U,m.height=$,m.data=ae.data}}function Pw(m,y,I,U,$,ae){if($.width===0||$.height===0)return y;if($.width>m.width||$.height>m.height||I.x>m.width-$.width||I.y>m.height-$.height)throw new RangeError("out of range source coordinates for image copy");if($.width>y.width||$.height>y.height||U.x>y.width-$.width||U.y>y.height-$.height)throw new RangeError("out of range destination coordinates for image copy");for(var he=m.data,Oe=y.data,rt=0;rt<$.height;rt++)for(var gt=((I.y+rt)*m.width+I.x)*ae,Et=((U.y+rt)*y.width+U.x)*ae,or=0;or<$.width*ae;or++)Oe[Et+or]=he[gt+or];return y}var Dv=function(y,I){Md(this,y,1,I)};Dv.prototype.resize=function(y){Lw(this,y,1)},Dv.prototype.clone=function(){return new Dv({width:this.width,height:this.height},new Uint8Array(this.data))},Dv.copy=function(y,I,U,$,ae){Pw(y,I,U,$,ae,1)};var Sh=function(y,I){Md(this,y,4,I)};Sh.prototype.resize=function(y){Lw(this,y,4)},Sh.prototype.replace=function(y,I){I?this.data.set(y):y instanceof Uint8ClampedArray?this.data=new Uint8Array(y.buffer):this.data=y},Sh.prototype.clone=function(){return new Sh({width:this.width,height:this.height},new Uint8Array(this.data))},Sh.copy=function(y,I,U,$,ae){Pw(y,I,U,$,ae,4)},X("AlphaImage",Dv),X("RGBAImage",Sh);var Gx=new Nr({"heatmap-radius":new bt(Rn.paint_heatmap["heatmap-radius"]),"heatmap-weight":new bt(Rn.paint_heatmap["heatmap-weight"]),"heatmap-intensity":new Me(Rn.paint_heatmap["heatmap-intensity"]),"heatmap-color":new jr(Rn.paint_heatmap["heatmap-color"]),"heatmap-opacity":new Me(Rn.paint_heatmap["heatmap-opacity"])}),A1={paint:Gx};function Hx(m){var y={},I=m.resolution||256,U=m.clips?m.clips.length:1,$=m.image||new Sh({width:I,height:U}),ae=function(bn,nn,xn){y[m.evaluationKey]=xn;var Pn=m.expression.evaluate(y);$.data[bn+nn+0]=Math.floor(Pn.r*255/Pn.a),$.data[bn+nn+1]=Math.floor(Pn.g*255/Pn.a),$.data[bn+nn+2]=Math.floor(Pn.b*255/Pn.a),$.data[bn+nn+3]=Math.floor(Pn.a*255)};if(m.clips)for(var gt=0,Et=0;gt<U;++gt,Et+=I*4)for(var or=0,_r=0;or<I;or++,_r+=4){var pr=or/(I-1),Fr=m.clips[gt],oi=Fr.start,Hi=Fr.end,Ai=oi*(1-pr)+Hi*pr;ae(Et,_r,Ai)}else for(var he=0,Oe=0;he<I;he++,Oe+=4){var rt=he/(I-1);ae(0,Oe,rt)}return $}var Iw=function(m){function y(I){m.call(this,I,A1),this._updateColorRamp()}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.createBucket=function(U){return new oS(U)},y.prototype._handleSpecialPaintPropertyUpdate=function(U){U==="heatmap-color"&&this._updateColorRamp()},y.prototype._updateColorRamp=function(){var U=this._transitionablePaint._values["heatmap-color"].value.expression;this.colorRamp=Hx({expression:U,evaluationKey:"heatmapDensity",image:this.colorRamp}),this.colorRampTexture=null},y.prototype.resize=function(){this.heatmapFbo&&(this.heatmapFbo.destroy(),this.heatmapFbo=null)},y.prototype.queryRadius=function(){return 0},y.prototype.queryIntersectsFeature=function(){return!1},y.prototype.hasOffscreenPass=function(){return this.paint.get("heatmap-opacity")!==0&&this.visibility!=="none"},y}(mi),W9=new Nr({"hillshade-illumination-direction":new Me(Rn.paint_hillshade["hillshade-illumination-direction"]),"hillshade-illumination-anchor":new Me(Rn.paint_hillshade["hillshade-illumination-anchor"]),"hillshade-exaggeration":new Me(Rn.paint_hillshade["hillshade-exaggeration"]),"hillshade-shadow-color":new Me(Rn.paint_hillshade["hillshade-shadow-color"]),"hillshade-highlight-color":new Me(Rn.paint_hillshade["hillshade-highlight-color"]),"hillshade-accent-color":new Me(Rn.paint_hillshade["hillshade-accent-color"])}),X9={paint:W9},xC=function(m){function y(I){m.call(this,I,X9)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.hasOffscreenPass=function(){return this.paint.get("hillshade-exaggeration")!==0&&this.visibility!=="none"},y}(mi),bC=Wi([{name:"a_pos",components:2,type:"Int16"}],4),Z9=bC.members,Rw=S1,wC=S1;function S1(m,y,I){I=I||2;var U=y&&y.length,$=U?y[0]*I:m.length,ae=Dw(m,0,$,I,!0),he=[];if(!ae||ae.next===ae.prev)return he;var Oe,rt,gt,Et,or,_r,pr;if(U&&(ae=um(m,y,ae,I)),m.length>80*I){Oe=gt=m[0],rt=Et=m[1];for(var Fr=I;Fr<$;Fr+=I)or=m[Fr],_r=m[Fr+1],or<Oe&&(Oe=or),_r<rt&&(rt=_r),or>gt&&(gt=or),_r>Et&&(Et=_r);pr=Math.max(gt-Oe,Et-rt),pr=pr!==0?1/pr:0}return jx(ae,he,I,Oe,rt,pr),he}function Dw(m,y,I,U,$){var ae,he;if($===dS(m,y,I,U)>0)for(ae=y;ae<I;ae+=U)he=EC(ae,m[ae],m[ae+1],he);else for(ae=I-U;ae>=y;ae-=U)he=EC(ae,m[ae],m[ae+1],he);return he&&Xx(he,he.next)&&(Kx(he),he=he.next),he}function lm(m,y){if(!m)return m;y||(y=m);var I=m,U;do if(U=!1,!I.steiner&&(Xx(I,I.next)||Tf(I.prev,I,I.next)===0)){if(Kx(I),I=y=I.prev,I===I.next)break;U=!0}else I=I.next;while(U||I!==y);return y}function jx(m,y,I,U,$,ae,he){if(m){!he&&ae&&Fw(m,U,$,ae);for(var Oe=m,rt,gt;m.prev!==m.next;){if(rt=m.prev,gt=m.next,ae?AC(m,U,$,ae):TC(m)){y.push(rt.i/I),y.push(m.i/I),y.push(gt.i/I),Kx(m),m=gt.next,Oe=gt.next;continue}if(m=gt,m===Oe){he?he===1?(m=Wx(lm(m),y,I),jx(m,y,I,U,$,ae,2)):he===2&&d0(m,y,I,U,$,ae):jx(lm(m),y,I,U,$,ae,1);break}}}}function TC(m){var y=m.prev,I=m,U=m.next;if(Tf(y,I,U)>=0)return!1;for(var $=m.next.next;$!==m.prev;){if(cm(y.x,y.y,I.x,I.y,U.x,U.y,$.x,$.y)&&Tf($.prev,$,$.next)>=0)return!1;$=$.next}return!0}function AC(m,y,I,U){var $=m.prev,ae=m,he=m.next;if(Tf($,ae,he)>=0)return!1;for(var Oe=$.x<ae.x?$.x<he.x?$.x:he.x:ae.x<he.x?ae.x:he.x,rt=$.y<ae.y?$.y<he.y?$.y:he.y:ae.y<he.y?ae.y:he.y,gt=$.x>ae.x?$.x>he.x?$.x:he.x:ae.x>he.x?ae.x:he.x,Et=$.y>ae.y?$.y>he.y?$.y:he.y:ae.y>he.y?ae.y:he.y,or=uS(Oe,rt,y,I,U),_r=uS(gt,Et,y,I,U),pr=m.prevZ,Fr=m.nextZ;pr&&pr.z>=or&&Fr&&Fr.z<=_r;){if(pr!==m.prev&&pr!==m.next&&cm($.x,$.y,ae.x,ae.y,he.x,he.y,pr.x,pr.y)&&Tf(pr.prev,pr,pr.next)>=0||(pr=pr.prevZ,Fr!==m.prev&&Fr!==m.next&&cm($.x,$.y,ae.x,ae.y,he.x,he.y,Fr.x,Fr.y)&&Tf(Fr.prev,Fr,Fr.next)>=0))return!1;Fr=Fr.nextZ}for(;pr&&pr.z>=or;){if(pr!==m.prev&&pr!==m.next&&cm($.x,$.y,ae.x,ae.y,he.x,he.y,pr.x,pr.y)&&Tf(pr.prev,pr,pr.next)>=0)return!1;pr=pr.prevZ}for(;Fr&&Fr.z<=_r;){if(Fr!==m.prev&&Fr!==m.next&&cm($.x,$.y,ae.x,ae.y,he.x,he.y,Fr.x,Fr.y)&&Tf(Fr.prev,Fr,Fr.next)>=0)return!1;Fr=Fr.nextZ}return!0}function Wx(m,y,I){var U=m;do{var $=U.prev,ae=U.next.next;!Xx($,ae)&&zw($,U,U.next,ae)&&Yx($,ae)&&Yx(ae,$)&&(y.push($.i/I),y.push(U.i/I),y.push(ae.i/I),Kx(U),Kx(U.next),U=m=ae),U=U.next}while(U!==m);return lm(U)}function d0(m,y,I,U,$,ae){var he=m;do{for(var Oe=he.next.next;Oe!==he.prev;){if(he.i!==Oe.i&&M1(he,Oe)){var rt=fS(he,Oe);he=lm(he,he.next),rt=lm(rt,rt.next),jx(he,y,I,U,$,ae),jx(rt,y,I,U,$,ae);return}Oe=Oe.next}he=he.next}while(he!==m)}function um(m,y,I,U){var $=[],ae,he,Oe,rt,gt;for(ae=0,he=y.length;ae<he;ae++)Oe=y[ae]*U,rt=ae<he-1?y[ae+1]*U:m.length,gt=Dw(m,Oe,rt,U,!1),gt===gt.next&&(gt.steiner=!0),$.push(cS(gt));for($.sort(SC),ae=0;ae<$.length;ae++)sS($[ae],I),I=lm(I,I.next);return I}function SC(m,y){return m.x-y.x}function sS(m,y){if(y=Y9(m,y),y){var I=fS(y,m);lm(y,y.next),lm(I,I.next)}}function Y9(m,y){var I=y,U=m.x,$=m.y,ae=-1/0,he;do{if($<=I.y&&$>=I.next.y&&I.next.y!==I.y){var Oe=I.x+($-I.y)*(I.next.x-I.x)/(I.next.y-I.y);if(Oe<=U&&Oe>ae){if(ae=Oe,Oe===U){if($===I.y)return I;if($===I.next.y)return I.next}he=I.x<I.next.x?I:I.next}}I=I.next}while(I!==y);if(!he)return null;if(U===ae)return he;var rt=he,gt=he.x,Et=he.y,or=1/0,_r;I=he;do U>=I.x&&I.x>=gt&&U!==I.x&&cm($<Et?U:ae,$,gt,Et,$<Et?ae:U,$,I.x,I.y)&&(_r=Math.abs($-I.y)/(U-I.x),Yx(I,m)&&(_r<or||_r===or&&(I.x>he.x||I.x===he.x&&K9(he,I)))&&(he=I,or=_r)),I=I.next;while(I!==rt);return he}function K9(m,y){return Tf(m.prev,m,y.prev)<0&&Tf(y.next,m,m.next)<0}function Fw(m,y,I,U){var $=m;do $.z===null&&($.z=uS($.x,$.y,y,I,U)),$.prevZ=$.prev,$.nextZ=$.next,$=$.next;while($!==m);$.prevZ.nextZ=null,$.prevZ=null,lS($)}function lS(m){var y,I,U,$,ae,he,Oe,rt,gt=1;do{for(I=m,m=null,ae=null,he=0;I;){for(he++,U=I,Oe=0,y=0;y<gt&&(Oe++,U=U.nextZ,!!U);y++);for(rt=gt;Oe>0||rt>0&&U;)Oe!==0&&(rt===0||!U||I.z<=U.z)?($=I,I=I.nextZ,Oe--):($=U,U=U.nextZ,rt--),ae?ae.nextZ=$:m=$,$.prevZ=ae,ae=$;I=U}ae.nextZ=null,gt*=2}while(he>1);return m}function uS(m,y,I,U,$){return m=32767*(m-I)*$,y=32767*(y-U)*$,m=(m|m<<8)&16711935,m=(m|m<<4)&252645135,m=(m|m<<2)&858993459,m=(m|m<<1)&1431655765,y=(y|y<<8)&16711935,y=(y|y<<4)&252645135,y=(y|y<<2)&858993459,y=(y|y<<1)&1431655765,m|y<<1}function cS(m){var y=m,I=m;do(y.x<I.x||y.x===I.x&&y.y<I.y)&&(I=y),y=y.next;while(y!==m);return I}function cm(m,y,I,U,$,ae,he,Oe){return($-he)*(y-Oe)-(m-he)*(ae-Oe)>=0&&(m-he)*(U-Oe)-(I-he)*(y-Oe)>=0&&(I-he)*(ae-Oe)-($-he)*(U-Oe)>=0}function M1(m,y){return m.next.i!==y.i&&m.prev.i!==y.i&&!MC(m,y)&&(Yx(m,y)&&Yx(y,m)&&J9(m,y)&&(Tf(m.prev,m,y.prev)||Tf(m,y.prev,y))||Xx(m,y)&&Tf(m.prev,m,m.next)>0&&Tf(y.prev,y,y.next)>0)}function Tf(m,y,I){return(y.y-m.y)*(I.x-y.x)-(y.x-m.x)*(I.y-y.y)}function Xx(m,y){return m.x===y.x&&m.y===y.y}function zw(m,y,I,U){var $=cy(Tf(m,y,I)),ae=cy(Tf(m,y,U)),he=cy(Tf(I,U,m)),Oe=cy(Tf(I,U,y));return!!($!==ae&&he!==Oe||$===0&&Zx(m,I,y)||ae===0&&Zx(m,U,y)||he===0&&Zx(I,m,U)||Oe===0&&Zx(I,y,U))}function Zx(m,y,I){return y.x<=Math.max(m.x,I.x)&&y.x>=Math.min(m.x,I.x)&&y.y<=Math.max(m.y,I.y)&&y.y>=Math.min(m.y,I.y)}function cy(m){return m>0?1:m<0?-1:0}function MC(m,y){var I=m;do{if(I.i!==m.i&&I.next.i!==m.i&&I.i!==y.i&&I.next.i!==y.i&&zw(I,I.next,m,y))return!0;I=I.next}while(I!==m);return!1}function Yx(m,y){return Tf(m.prev,m,m.next)<0?Tf(m,y,m.next)>=0&&Tf(m,m.prev,y)>=0:Tf(m,y,m.prev)<0||Tf(m,m.next,y)<0}function J9(m,y){var I=m,U=!1,$=(m.x+y.x)/2,ae=(m.y+y.y)/2;do I.y>ae!=I.next.y>ae&&I.next.y!==I.y&&$<(I.next.x-I.x)*(ae-I.y)/(I.next.y-I.y)+I.x&&(U=!U),I=I.next;while(I!==m);return U}function fS(m,y){var I=new hS(m.i,m.x,m.y),U=new hS(y.i,y.x,y.y),$=m.next,ae=y.prev;return m.next=y,y.prev=m,I.next=$,$.prev=I,U.next=I,I.prev=U,ae.next=U,U.prev=ae,U}function EC(m,y,I,U){var $=new hS(m,y,I);return U?($.next=U.next,$.prev=U,U.next.prev=$,U.next=$):($.prev=$,$.next=$),$}function Kx(m){m.next.prev=m.prev,m.prev.next=m.next,m.prevZ&&(m.prevZ.nextZ=m.nextZ),m.nextZ&&(m.nextZ.prevZ=m.prevZ)}function hS(m,y,I){this.i=m,this.x=y,this.y=I,this.prev=null,this.next=null,this.z=null,this.prevZ=null,this.nextZ=null,this.steiner=!1}S1.deviation=function(m,y,I,U){var $=y&&y.length,ae=$?y[0]*I:m.length,he=Math.abs(dS(m,0,ae,I));if($)for(var Oe=0,rt=y.length;Oe<rt;Oe++){var gt=y[Oe]*I,Et=Oe<rt-1?y[Oe+1]*I:m.length;he-=Math.abs(dS(m,gt,Et,I))}var or=0;for(Oe=0;Oe<U.length;Oe+=3){var _r=U[Oe]*I,pr=U[Oe+1]*I,Fr=U[Oe+2]*I;or+=Math.abs((m[_r]-m[Fr])*(m[pr+1]-m[_r+1])-(m[_r]-m[pr])*(m[Fr+1]-m[_r+1]))}return he===0&&or===0?0:Math.abs((or-he)/he)};function dS(m,y,I,U){for(var $=0,ae=y,he=I-U;ae<I;ae+=U)$+=(m[he]-m[ae])*(m[ae+1]+m[he+1]),he=ae;return $}S1.flatten=function(m){for(var y=m[0][0].length,I={vertices:[],holes:[],dimensions:y},U=0,$=0;$<m.length;$++){for(var ae=0;ae<m[$].length;ae++)for(var he=0;he<y;he++)I.vertices.push(m[$][ae][he]);$>0&&(U+=m[$-1].length,I.holes.push(U))}return I},Rw.default=wC;function vS(m,y,I,U,$){dg(m,y,I||0,U||m.length-1,$||kC)}function dg(m,y,I,U,$){for(;U>I;){if(U-I>600){var ae=U-I+1,he=y-I+1,Oe=Math.log(ae),rt=.5*Math.exp(2*Oe/3),gt=.5*Math.sqrt(Oe*rt*(ae-rt)/ae)*(he-ae/2<0?-1:1),Et=Math.max(I,Math.floor(y-he*rt/ae+gt)),or=Math.min(U,Math.floor(y+(ae-he)*rt/ae+gt));dg(m,y,Et,or,$)}var _r=m[y],pr=I,Fr=U;for(E1(m,I,y),$(m[U],_r)>0&&E1(m,I,U);pr<Fr;){for(E1(m,pr,Fr),pr++,Fr--;$(m[pr],_r)<0;)pr++;for(;$(m[Fr],_r)>0;)Fr--}$(m[I],_r)===0?E1(m,I,Fr):(Fr++,E1(m,Fr,U)),Fr<=y&&(I=Fr+1),y<=Fr&&(U=Fr-1)}}function E1(m,y,I){var U=m[y];m[y]=m[I],m[I]=U}function kC(m,y){return m<y?-1:m>y?1:0}function Ow(m,y){var I=m.length;if(I<=1)return[m];for(var U=[],$,ae,he=0;he<I;he++){var Oe=_e(m[he]);Oe!==0&&(m[he].area=Math.abs(Oe),ae===void 0&&(ae=Oe<0),ae===Oe<0?($&&U.push($),$=[m[he]]):$.push(m[he]))}if($&&U.push($),y>1)for(var rt=0;rt<U.length;rt++)U[rt].length<=y||(vS(U[rt],y,1,U[rt].length-1,CC),U[rt]=U[rt].slice(0,y));return U}function CC(m,y){return y.area-m.area}function qw(m,y,I){for(var U=I.patternDependencies,$=!1,ae=0,he=y;ae<he.length;ae+=1){var Oe=he[ae],rt=Oe.paint.get(m+"-pattern");rt.isConstant()||($=!0);var gt=rt.constantOr(null);gt&&($=!0,U[gt.to]=!0,U[gt.from]=!0)}return $}function Jx(m,y,I,U,$){for(var ae=$.patternDependencies,he=0,Oe=y;he<Oe.length;he+=1){var rt=Oe[he],gt=rt.paint.get(m+"-pattern"),Et=gt.value;if(Et.kind!=="constant"){var or=Et.evaluate({zoom:U-1},I,{},$.availableImages),_r=Et.evaluate({zoom:U},I,{},$.availableImages),pr=Et.evaluate({zoom:U+1},I,{},$.availableImages);or=or&&or.name?or.name:or,_r=_r&&_r.name?_r.name:_r,pr=pr&&pr.name?pr.name:pr,ae[or]=!0,ae[_r]=!0,ae[pr]=!0,I.patterns[rt.id]={min:or,mid:_r,max:pr}}}return I}var $x=500,mp=function(y){this.zoom=y.zoom,this.overscaling=y.overscaling,this.layers=y.layers,this.layerIds=this.layers.map(function(I){return I.id}),this.index=y.index,this.hasPattern=!1,this.patternFeatures=[],this.layoutVertexArray=new Yr,this.indexArray=new pn,this.indexArray2=new oa,this.programConfigurations=new fi(y.layers,y.zoom),this.segments=new io,this.segments2=new io,this.stateDependentLayerIds=this.layers.filter(function(I){return I.isStateDependent()}).map(function(I){return I.id})};mp.prototype.populate=function(y,I,U){this.hasPattern=qw("fill",this.layers,I);for(var $=this.layers[0].layout.get("fill-sort-key"),ae=[],he=0,Oe=y;he<Oe.length;he+=1){var rt=Oe[he],gt=rt.feature,Et=rt.id,or=rt.index,_r=rt.sourceLayerIndex,pr=this.layers[0]._featureFilter.needGeometry,Fr=Ja(gt,pr);if(this.layers[0]._featureFilter.filter(new Gn(this.zoom),Fr,U)){var oi=$?$.evaluate(Fr,{},U,I.availableImages):void 0,Hi={id:Et,properties:gt.properties,type:gt.type,sourceLayerIndex:_r,index:or,geometry:pr?Fr.geometry:zn(gt),patterns:{},sortKey:oi};ae.push(Hi)}}$&&ae.sort(function(za,Za){return za.sortKey-Za.sortKey});for(var Ai=0,bn=ae;Ai<bn.length;Ai+=1){var nn=bn[Ai],xn=nn,Pn=xn.geometry,Zn=xn.index,ga=xn.sourceLayerIndex;if(this.hasPattern){var ha=Jx("fill",this.layers,nn,this.zoom,I);this.patternFeatures.push(ha)}else this.addFeature(nn,Pn,Zn,U,{});var eo=y[Zn].feature;I.featureIndex.insert(eo,Pn,Zn,ga,this.index)}},mp.prototype.update=function(y,I,U){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(y,I,this.stateDependentLayers,U)},mp.prototype.addFeatures=function(y,I,U){for(var $=0,ae=this.patternFeatures;$<ae.length;$+=1){var he=ae[$];this.addFeature(he,he.geometry,he.index,I,U)}},mp.prototype.isEmpty=function(){return this.layoutVertexArray.length===0},mp.prototype.uploadPending=function(){return!this.uploaded||this.programConfigurations.needsUpload},mp.prototype.upload=function(y){this.uploaded||(this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,Z9),this.indexBuffer=y.createIndexBuffer(this.indexArray),this.indexBuffer2=y.createIndexBuffer(this.indexArray2)),this.programConfigurations.upload(y),this.uploaded=!0},mp.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.indexBuffer2.destroy(),this.programConfigurations.destroy(),this.segments.destroy(),this.segments2.destroy())},mp.prototype.addFeature=function(y,I,U,$,ae){for(var he=0,Oe=Ow(I,$x);he<Oe.length;he+=1){for(var rt=Oe[he],gt=0,Et=0,or=rt;Et<or.length;Et+=1){var _r=or[Et];gt+=_r.length}for(var pr=this.segments.prepareSegment(gt,this.layoutVertexArray,this.indexArray),Fr=pr.vertexLength,oi=[],Hi=[],Ai=0,bn=rt;Ai<bn.length;Ai+=1){var nn=bn[Ai];if(nn.length!==0){nn!==rt[0]&&Hi.push(oi.length/2);var xn=this.segments2.prepareSegment(nn.length,this.layoutVertexArray,this.indexArray2),Pn=xn.vertexLength;this.layoutVertexArray.emplaceBack(nn[0].x,nn[0].y),this.indexArray2.emplaceBack(Pn+nn.length-1,Pn),oi.push(nn[0].x),oi.push(nn[0].y);for(var Zn=1;Zn<nn.length;Zn++)this.layoutVertexArray.emplaceBack(nn[Zn].x,nn[Zn].y),this.indexArray2.emplaceBack(Pn+Zn-1,Pn+Zn),oi.push(nn[Zn].x),oi.push(nn[Zn].y);xn.vertexLength+=nn.length,xn.primitiveLength+=nn.length}}for(var ga=Rw(oi,Hi),ha=0;ha<ga.length;ha+=3)this.indexArray.emplaceBack(Fr+ga[ha],Fr+ga[ha+1],Fr+ga[ha+2]);pr.vertexLength+=gt,pr.primitiveLength+=ga.length/3}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,y,U,ae,$)},X("FillBucket",mp,{omit:["layers","patternFeatures"]});var pS=new Nr({"fill-sort-key":new bt(Rn.layout_fill["fill-sort-key"])}),LC=new Nr({"fill-antialias":new Me(Rn.paint_fill["fill-antialias"]),"fill-opacity":new bt(Rn.paint_fill["fill-opacity"]),"fill-color":new bt(Rn.paint_fill["fill-color"]),"fill-outline-color":new bt(Rn.paint_fill["fill-outline-color"]),"fill-translate":new Me(Rn.paint_fill["fill-translate"]),"fill-translate-anchor":new Me(Rn.paint_fill["fill-translate-anchor"]),"fill-pattern":new zt(Rn.paint_fill["fill-pattern"])}),$v={paint:LC,layout:pS},Qx=function(m){function y(I){m.call(this,I,$v)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.recalculate=function(U,$){m.prototype.recalculate.call(this,U,$);var ae=this.paint._values["fill-outline-color"];ae.value.kind==="constant"&&ae.value.value===void 0&&(this.paint._values["fill-outline-color"]=this.paint._values["fill-color"])},y.prototype.createBucket=function(U){return new mp(U)},y.prototype.queryRadius=function(){return Pv(this.paint.get("fill-translate"))},y.prototype.queryIntersectsFeature=function(U,$,ae,he,Oe,rt,gt){var Et=Jv(U,this.paint.get("fill-translate"),this.paint.get("fill-translate-anchor"),rt.angle,gt);return ms(Et,he)},y.prototype.isTileClipped=function(){return!0},y}(mi),PC=Wi([{name:"a_pos",components:2,type:"Int16"},{name:"a_normal_ed",components:4,type:"Int16"}],4),IC=PC.members,gS=fy;function fy(m,y,I,U,$){this.properties={},this.extent=I,this.type=0,this._pbf=m,this._geometry=-1,this._keys=U,this._values=$,m.readFields(RC,this,y)}function RC(m,y,I){m==1?y.id=I.readVarint():m==2?DC(I,y):m==3?y.type=I.readVarint():m==4&&(y._geometry=I.pos)}function DC(m,y){for(var I=m.readVarint()+m.pos;m.pos<I;){var U=y._keys[m.readVarint()],$=y._values[m.readVarint()];y.properties[U]=$}}fy.types=["Unknown","Point","LineString","Polygon"],fy.prototype.loadGeometry=function(){var m=this._pbf;m.pos=this._geometry;for(var y=m.readVarint()+m.pos,I=1,U=0,$=0,ae=0,he=[],Oe;m.pos<y;){if(U<=0){var rt=m.readVarint();I=rt&7,U=rt>>3}if(U--,I===1||I===2)$+=m.readSVarint(),ae+=m.readSVarint(),I===1&&(Oe&&he.push(Oe),Oe=[]),Oe.push(new u($,ae));else if(I===7)Oe&&Oe.push(Oe[0].clone());else throw new Error("unknown command "+I)}return Oe&&he.push(Oe),he},fy.prototype.bbox=function(){var m=this._pbf;m.pos=this._geometry;for(var y=m.readVarint()+m.pos,I=1,U=0,$=0,ae=0,he=1/0,Oe=-1/0,rt=1/0,gt=-1/0;m.pos<y;){if(U<=0){var Et=m.readVarint();I=Et&7,U=Et>>3}if(U--,I===1||I===2)$+=m.readSVarint(),ae+=m.readSVarint(),$<he&&(he=$),$>Oe&&(Oe=$),ae<rt&&(rt=ae),ae>gt&&(gt=ae);else if(I!==7)throw new Error("unknown command "+I)}return[he,rt,Oe,gt]},fy.prototype.toGeoJSON=function(m,y,I){var U=this.extent*Math.pow(2,I),$=this.extent*m,ae=this.extent*y,he=this.loadGeometry(),Oe=fy.types[this.type],rt,gt;function Et(pr){for(var Fr=0;Fr<pr.length;Fr++){var oi=pr[Fr],Hi=180-(oi.y+ae)*360/U;pr[Fr]=[(oi.x+$)*360/U-180,360/Math.PI*Math.atan(Math.exp(Hi*Math.PI/180))-90]}}switch(this.type){case 1:var or=[];for(rt=0;rt<he.length;rt++)or[rt]=he[rt][0];he=or,Et(he);break;case 2:for(rt=0;rt<he.length;rt++)Et(he[rt]);break;case 3:for(he=$9(he),rt=0;rt<he.length;rt++)for(gt=0;gt<he[rt].length;gt++)Et(he[rt][gt]);break}he.length===1?he=he[0]:Oe="Multi"+Oe;var _r={type:"Feature",geometry:{type:Oe,coordinates:he},properties:this.properties};return"id"in this&&(_r.id=this.id),_r};function $9(m){var y=m.length;if(y<=1)return[m];for(var I=[],U,$,ae=0;ae<y;ae++){var he=FC(m[ae]);he!==0&&($===void 0&&($=he<0),$===he<0?(U&&I.push(U),U=[m[ae]]):U.push(m[ae]))}return U&&I.push(U),I}function FC(m){for(var y=0,I=0,U=m.length,$=U-1,ae,he;I<U;$=I++)ae=m[I],he=m[$],y+=(he.x-ae.x)*(ae.y+he.y);return y}var vg=mS;function mS(m,y){this.version=1,this.name=null,this.extent=4096,this.length=0,this._pbf=m,this._keys=[],this._values=[],this._features=[],m.readFields(zC,this,y),this.length=this._features.length}function zC(m,y,I){m===15?y.version=I.readVarint():m===1?y.name=I.readString():m===5?y.extent=I.readVarint():m===2?y._features.push(I.pos):m===3?y._keys.push(I.readString()):m===4&&y._values.push(OC(I))}function OC(m){for(var y=null,I=m.readVarint()+m.pos;m.pos<I;){var U=m.readVarint()>>3;y=U===1?m.readString():U===2?m.readFloat():U===3?m.readDouble():U===4?m.readVarint64():U===5?m.readVarint():U===6?m.readSVarint():U===7?m.readBoolean():null}return y}mS.prototype.feature=function(m){if(m<0||m>=this._features.length)throw new Error("feature index out of bounds");this._pbf.pos=this._features[m];var y=this._pbf.readVarint()+this._pbf.pos;return new gS(this._pbf,y,this.extent,this._keys,this._values)};var qC=Q9;function Q9(m,y){this.layers=m.readFields(eO,{},y)}function eO(m,y,I){if(m===3){var U=new vg(I,I.readVarint()+I.pos);U.length&&(y[U.name]=U)}}var BC=qC,k1=gS,NC=vg,pg={VectorTile:BC,VectorTileFeature:k1,VectorTileLayer:NC},UC=pg.VectorTileFeature.types,Bw=500,C1=Math.pow(2,13);function fm(m,y,I,U,$,ae,he,Oe){m.emplaceBack(y,I,Math.floor(U*C1)*2+he,$*C1*2,ae*C1*2,Math.round(Oe))}var Vp=function(y){this.zoom=y.zoom,this.overscaling=y.overscaling,this.layers=y.layers,this.layerIds=this.layers.map(function(I){return I.id}),this.index=y.index,this.hasPattern=!1,this.layoutVertexArray=new Ri,this.indexArray=new pn,this.programConfigurations=new fi(y.layers,y.zoom),this.segments=new io,this.stateDependentLayerIds=this.layers.filter(function(I){return I.isStateDependent()}).map(function(I){return I.id})};Vp.prototype.populate=function(y,I,U){this.features=[],this.hasPattern=qw("fill-extrusion",this.layers,I);for(var $=0,ae=y;$<ae.length;$+=1){var he=ae[$],Oe=he.feature,rt=he.id,gt=he.index,Et=he.sourceLayerIndex,or=this.layers[0]._featureFilter.needGeometry,_r=Ja(Oe,or);if(this.layers[0]._featureFilter.filter(new Gn(this.zoom),_r,U)){var pr={id:rt,sourceLayerIndex:Et,index:gt,geometry:or?_r.geometry:zn(Oe),properties:Oe.properties,type:Oe.type,patterns:{}};this.hasPattern?this.features.push(Jx("fill-extrusion",this.layers,pr,this.zoom,I)):this.addFeature(pr,pr.geometry,gt,U,{}),I.featureIndex.insert(Oe,pr.geometry,gt,Et,this.index,!0)}}},Vp.prototype.addFeatures=function(y,I,U){for(var $=0,ae=this.features;$<ae.length;$+=1){var he=ae[$],Oe=he.geometry;this.addFeature(he,Oe,he.index,I,U)}},Vp.prototype.update=function(y,I,U){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(y,I,this.stateDependentLayers,U)},Vp.prototype.isEmpty=function(){return this.layoutVertexArray.length===0},Vp.prototype.uploadPending=function(){return!this.uploaded||this.programConfigurations.needsUpload},Vp.prototype.upload=function(y){this.uploaded||(this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,IC),this.indexBuffer=y.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(y),this.uploaded=!0},Vp.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy())},Vp.prototype.addFeature=function(y,I,U,$,ae){for(var he=0,Oe=Ow(I,Bw);he<Oe.length;he+=1){for(var rt=Oe[he],gt=0,Et=0,or=rt;Et<or.length;Et+=1){var _r=or[Et];gt+=_r.length}for(var pr=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray),Fr=0,oi=rt;Fr<oi.length;Fr+=1){var Hi=oi[Fr];if(Hi.length!==0&&!rO(Hi))for(var Ai=0,bn=0;bn<Hi.length;bn++){var nn=Hi[bn];if(bn>=1){var xn=Hi[bn-1];if(!tO(nn,xn)){pr.vertexLength+4>io.MAX_VERTEX_ARRAY_LENGTH&&(pr=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray));var Pn=nn.sub(xn)._perp()._unit(),Zn=xn.dist(nn);Ai+Zn>32768&&(Ai=0),fm(this.layoutVertexArray,nn.x,nn.y,Pn.x,Pn.y,0,0,Ai),fm(this.layoutVertexArray,nn.x,nn.y,Pn.x,Pn.y,0,1,Ai),Ai+=Zn,fm(this.layoutVertexArray,xn.x,xn.y,Pn.x,Pn.y,0,0,Ai),fm(this.layoutVertexArray,xn.x,xn.y,Pn.x,Pn.y,0,1,Ai);var ga=pr.vertexLength;this.indexArray.emplaceBack(ga,ga+2,ga+1),this.indexArray.emplaceBack(ga+1,ga+2,ga+3),pr.vertexLength+=4,pr.primitiveLength+=2}}}}if(pr.vertexLength+gt>io.MAX_VERTEX_ARRAY_LENGTH&&(pr=this.segments.prepareSegment(gt,this.layoutVertexArray,this.indexArray)),UC[y.type]==="Polygon"){for(var ha=[],eo=[],za=pr.vertexLength,Za=0,Ko=rt;Za<Ko.length;Za+=1){var to=Ko[Za];if(to.length!==0){to!==rt[0]&&eo.push(ha.length/2);for(var ao=0;ao<to.length;ao++){var _s=to[ao];fm(this.layoutVertexArray,_s.x,_s.y,0,0,1,1,0),ha.push(_s.x),ha.push(_s.y)}}}for(var jo=Rw(ha,eo),El=0;El<jo.length;El+=3)this.indexArray.emplaceBack(za+jo[El],za+jo[El+2],za+jo[El+1]);pr.primitiveLength+=jo.length/3,pr.vertexLength+=gt}}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,y,U,ae,$)},X("FillExtrusionBucket",Vp,{omit:["layers","features"]});function tO(m,y){return m.x===y.x&&(m.x<0||m.x>Ci)||m.y===y.y&&(m.y<0||m.y>Ci)}function rO(m){return m.every(function(y){return y.x<0})||m.every(function(y){return y.x>Ci})||m.every(function(y){return y.y<0})||m.every(function(y){return y.y>Ci})}var L1=new Nr({"fill-extrusion-opacity":new Me(Rn["paint_fill-extrusion"]["fill-extrusion-opacity"]),"fill-extrusion-color":new bt(Rn["paint_fill-extrusion"]["fill-extrusion-color"]),"fill-extrusion-translate":new Me(Rn["paint_fill-extrusion"]["fill-extrusion-translate"]),"fill-extrusion-translate-anchor":new Me(Rn["paint_fill-extrusion"]["fill-extrusion-translate-anchor"]),"fill-extrusion-pattern":new zt(Rn["paint_fill-extrusion"]["fill-extrusion-pattern"]),"fill-extrusion-height":new bt(Rn["paint_fill-extrusion"]["fill-extrusion-height"]),"fill-extrusion-base":new bt(Rn["paint_fill-extrusion"]["fill-extrusion-base"]),"fill-extrusion-vertical-gradient":new Me(Rn["paint_fill-extrusion"]["fill-extrusion-vertical-gradient"])}),sd={paint:L1},hm=function(m){function y(I){m.call(this,I,sd)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.createBucket=function(U){return new Vp(U)},y.prototype.queryRadius=function(){return Pv(this.paint.get("fill-extrusion-translate"))},y.prototype.is3D=function(){return!0},y.prototype.queryIntersectsFeature=function(U,$,ae,he,Oe,rt,gt,Et){var or=Jv(U,this.paint.get("fill-extrusion-translate"),this.paint.get("fill-extrusion-translate-anchor"),rt.angle,gt),_r=this.paint.get("fill-extrusion-height").evaluate($,ae),pr=this.paint.get("fill-extrusion-base").evaluate($,ae),Fr=iO(or,Et,rt,0),oi=_S(he,pr,_r,Et),Hi=oi[0],Ai=oi[1];return VC(Hi,Ai,Fr)},y}(mi);function hy(m,y){return m.x*y.x+m.y*y.y}function yS(m,y){if(m.length===1){for(var I=0,U=y[I++],$;!$||U.equals($);)if($=y[I++],!$)return 1/0;for(;I<y.length;I++){var ae=y[I],he=m[0],Oe=$.sub(U),rt=ae.sub(U),gt=he.sub(U),Et=hy(Oe,Oe),or=hy(Oe,rt),_r=hy(rt,rt),pr=hy(gt,Oe),Fr=hy(gt,rt),oi=Et*_r-or*or,Hi=(_r*pr-or*Fr)/oi,Ai=(Et*Fr-or*pr)/oi,bn=1-Hi-Ai,nn=U.z*bn+$.z*Hi+ae.z*Ai;if(isFinite(nn))return nn}return 1/0}else{for(var xn=1/0,Pn=0,Zn=y;Pn<Zn.length;Pn+=1){var ga=Zn[Pn];xn=Math.min(xn,ga.z)}return xn}}function VC(m,y,I){var U=1/0;ms(I,y)&&(U=yS(I,y[0]));for(var $=0;$<y.length;$++)for(var ae=y[$],he=m[$],Oe=0;Oe<ae.length-1;Oe++){var rt=ae[Oe],gt=ae[Oe+1],Et=he[Oe],or=he[Oe+1],_r=[rt,gt,or,Et,rt];so(I,_r)&&(U=Math.min(U,yS(I,_r)))}return U===1/0?!1:U}function _S(m,y,I,U){for(var $=[],ae=[],he=U[8]*y,Oe=U[9]*y,rt=U[10]*y,gt=U[11]*y,Et=U[8]*I,or=U[9]*I,_r=U[10]*I,pr=U[11]*I,Fr=0,oi=m;Fr<oi.length;Fr+=1){for(var Hi=oi[Fr],Ai=[],bn=[],nn=0,xn=Hi;nn<xn.length;nn+=1){var Pn=xn[nn],Zn=Pn.x,ga=Pn.y,ha=U[0]*Zn+U[4]*ga+U[12],eo=U[1]*Zn+U[5]*ga+U[13],za=U[2]*Zn+U[6]*ga+U[14],Za=U[3]*Zn+U[7]*ga+U[15],Ko=ha+he,to=eo+Oe,ao=za+rt,_s=Za+gt,jo=ha+Et,El=eo+or,Iu=za+_r,kl=Za+pr,Cl=new u(Ko/_s,to/_s);Cl.z=ao/_s,Ai.push(Cl);var yl=new u(jo/kl,El/kl);yl.z=Iu/kl,bn.push(yl)}$.push(Ai),ae.push(bn)}return[$,ae]}function iO(m,y,I,U){for(var $=[],ae=0,he=m;ae<he.length;ae+=1){var Oe=he[ae],rt=[Oe.x,Oe.y,U,1];uy(rt,rt,y),$.push(new u(rt[0]/rt[3],rt[1]/rt[3]))}return $}var GC=Wi([{name:"a_pos_normal",components:2,type:"Int16"},{name:"a_data",components:4,type:"Uint8"}],4),HC=GC.members,nO=Wi([{name:"a_uv_x",components:1,type:"Float32"},{name:"a_split_index",components:1,type:"Float32"}]),jC=nO.members,xS=pg.VectorTileFeature.types,Nw=63,WC=Math.cos(75/2*(Math.PI/180)),eb=15,bS=20,XC=15,Uw=1/2,tb=Math.pow(2,XC-1)/Uw,sh=function(y){var I=this;this.zoom=y.zoom,this.overscaling=y.overscaling,this.layers=y.layers,this.layerIds=this.layers.map(function(U){return U.id}),this.index=y.index,this.hasPattern=!1,this.patternFeatures=[],this.lineClipsArray=[],this.gradients={},this.layers.forEach(function(U){I.gradients[U.id]={}}),this.layoutVertexArray=new ci,this.layoutVertexArray2=new an,this.indexArray=new pn,this.programConfigurations=new fi(y.layers,y.zoom),this.segments=new io,this.maxLineLength=0,this.stateDependentLayerIds=this.layers.filter(function(U){return U.isStateDependent()}).map(function(U){return U.id})};sh.prototype.populate=function(y,I,U){this.hasPattern=qw("line",this.layers,I);for(var $=this.layers[0].layout.get("line-sort-key"),ae=[],he=0,Oe=y;he<Oe.length;he+=1){var rt=Oe[he],gt=rt.feature,Et=rt.id,or=rt.index,_r=rt.sourceLayerIndex,pr=this.layers[0]._featureFilter.needGeometry,Fr=Ja(gt,pr);if(this.layers[0]._featureFilter.filter(new Gn(this.zoom),Fr,U)){var oi=$?$.evaluate(Fr,{},U):void 0,Hi={id:Et,properties:gt.properties,type:gt.type,sourceLayerIndex:_r,index:or,geometry:pr?Fr.geometry:zn(gt),patterns:{},sortKey:oi};ae.push(Hi)}}$&&ae.sort(function(za,Za){return za.sortKey-Za.sortKey});for(var Ai=0,bn=ae;Ai<bn.length;Ai+=1){var nn=bn[Ai],xn=nn,Pn=xn.geometry,Zn=xn.index,ga=xn.sourceLayerIndex;if(this.hasPattern){var ha=Jx("line",this.layers,nn,this.zoom,I);this.patternFeatures.push(ha)}else this.addFeature(nn,Pn,Zn,U,{});var eo=y[Zn].feature;I.featureIndex.insert(eo,Pn,Zn,ga,this.index)}},sh.prototype.update=function(y,I,U){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(y,I,this.stateDependentLayers,U)},sh.prototype.addFeatures=function(y,I,U){for(var $=0,ae=this.patternFeatures;$<ae.length;$+=1){var he=ae[$];this.addFeature(he,he.geometry,he.index,I,U)}},sh.prototype.isEmpty=function(){return this.layoutVertexArray.length===0},sh.prototype.uploadPending=function(){return!this.uploaded||this.programConfigurations.needsUpload},sh.prototype.upload=function(y){this.uploaded||(this.layoutVertexArray2.length!==0&&(this.layoutVertexBuffer2=y.createVertexBuffer(this.layoutVertexArray2,jC)),this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,HC),this.indexBuffer=y.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(y),this.uploaded=!0},sh.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy())},sh.prototype.lineFeatureClips=function(y){if(y.properties&&y.properties.hasOwnProperty("mapbox_clip_start")&&y.properties.hasOwnProperty("mapbox_clip_end")){var I=+y.properties.mapbox_clip_start,U=+y.properties.mapbox_clip_end;return{start:I,end:U}}},sh.prototype.addFeature=function(y,I,U,$,ae){var he=this.layers[0].layout,Oe=he.get("line-join").evaluate(y,{}),rt=he.get("line-cap"),gt=he.get("line-miter-limit"),Et=he.get("line-round-limit");this.lineClips=this.lineFeatureClips(y);for(var or=0,_r=I;or<_r.length;or+=1){var pr=_r[or];this.addLine(pr,y,Oe,rt,gt,Et)}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,y,U,ae,$)},sh.prototype.addLine=function(y,I,U,$,ae,he){if(this.distance=0,this.scaledDistance=0,this.totalDistance=0,this.lineClips){this.lineClipsArray.push(this.lineClips);for(var Oe=0;Oe<y.length-1;Oe++)this.totalDistance+=y[Oe].dist(y[Oe+1]);this.updateScaledDistance(),this.maxLineLength=Math.max(this.maxLineLength,this.totalDistance)}for(var rt=xS[I.type]==="Polygon",gt=y.length;gt>=2&&y[gt-1].equals(y[gt-2]);)gt--;for(var Et=0;Et<gt-1&&y[Et].equals(y[Et+1]);)Et++;if(!(gt<(rt?3:2))){U==="bevel"&&(ae=1.05);var or=this.overscaling<=16?eb*Ci/(512*this.overscaling):0,_r=this.segments.prepareSegment(gt*10,this.layoutVertexArray,this.indexArray),pr,Fr=void 0,oi=void 0,Hi=void 0,Ai=void 0;this.e1=this.e2=-1,rt&&(pr=y[gt-2],Ai=y[Et].sub(pr)._unit()._perp());for(var bn=Et;bn<gt;bn++)if(oi=bn===gt-1?rt?y[Et+1]:void 0:y[bn+1],!(oi&&y[bn].equals(oi))){Ai&&(Hi=Ai),pr&&(Fr=pr),pr=y[bn],Ai=oi?oi.sub(pr)._unit()._perp():Hi,Hi=Hi||Ai;var nn=Hi.add(Ai);(nn.x!==0||nn.y!==0)&&nn._unit();var xn=Hi.x*Ai.x+Hi.y*Ai.y,Pn=nn.x*Ai.x+nn.y*Ai.y,Zn=Pn!==0?1/Pn:1/0,ga=2*Math.sqrt(2-2*Pn),ha=Pn<WC&&Fr&&oi,eo=Hi.x*Ai.y-Hi.y*Ai.x>0;if(ha&&bn>Et){var za=pr.dist(Fr);if(za>2*or){var Za=pr.sub(pr.sub(Fr)._mult(or/za)._round());this.updateDistance(Fr,Za),this.addCurrentVertex(Za,Hi,0,0,_r),Fr=Za}}var Ko=Fr&&oi,to=Ko?U:rt?"butt":$;if(Ko&&to==="round"&&(Zn<he?to="miter":Zn<=2&&(to="fakeround")),to==="miter"&&Zn>ae&&(to="bevel"),to==="bevel"&&(Zn>2&&(to="flipbevel"),Zn<ae&&(to="miter")),Fr&&this.updateDistance(Fr,pr),to==="miter")nn._mult(Zn),this.addCurrentVertex(pr,nn,0,0,_r);else if(to==="flipbevel"){if(Zn>100)nn=Ai.mult(-1);else{var ao=Zn*Hi.add(Ai).mag()/Hi.sub(Ai).mag();nn._perp()._mult(ao*(eo?-1:1))}this.addCurrentVertex(pr,nn,0,0,_r),this.addCurrentVertex(pr,nn.mult(-1),0,0,_r)}else if(to==="bevel"||to==="fakeround"){var _s=-Math.sqrt(Zn*Zn-1),jo=eo?_s:0,El=eo?0:_s;if(Fr&&this.addCurrentVertex(pr,Hi,jo,El,_r),to==="fakeround")for(var Iu=Math.round(ga*180/Math.PI/bS),kl=1;kl<Iu;kl++){var Cl=kl/Iu;if(Cl!==.5){var yl=Cl-.5,Qu=1.0904+xn*(-3.2452+xn*(3.55645-xn*1.43519)),gc=.848013+xn*(-1.06021+xn*.215638);Cl=Cl+Cl*yl*(Cl-1)*(Qu*yl*yl+gc)}var Sf=Ai.sub(Hi)._mult(Cl)._add(Hi)._unit()._mult(eo?-1:1);this.addHalfVertex(pr,Sf.x,Sf.y,!1,eo,0,_r)}oi&&this.addCurrentVertex(pr,Ai,-jo,-El,_r)}else if(to==="butt")this.addCurrentVertex(pr,nn,0,0,_r);else if(to==="square"){var Ff=Fr?1:-1;this.addCurrentVertex(pr,nn,Ff,Ff,_r)}else to==="round"&&(Fr&&(this.addCurrentVertex(pr,Hi,0,0,_r),this.addCurrentVertex(pr,Hi,1,1,_r,!0)),oi&&(this.addCurrentVertex(pr,Ai,-1,-1,_r,!0),this.addCurrentVertex(pr,Ai,0,0,_r)));if(ha&&bn<gt-1){var Yh=pr.dist(oi);if(Yh>2*or){var ch=pr.add(oi.sub(pr)._mult(or/Yh)._round());this.updateDistance(pr,ch),this.addCurrentVertex(ch,Ai,0,0,_r),pr=ch}}}}},sh.prototype.addCurrentVertex=function(y,I,U,$,ae,he){he===void 0&&(he=!1);var Oe=I.x+I.y*U,rt=I.y-I.x*U,gt=-I.x+I.y*$,Et=-I.y-I.x*$;this.addHalfVertex(y,Oe,rt,he,!1,U,ae),this.addHalfVertex(y,gt,Et,he,!0,-$,ae),this.distance>tb/2&&this.totalDistance===0&&(this.distance=0,this.addCurrentVertex(y,I,U,$,ae,he))},sh.prototype.addHalfVertex=function(y,I,U,$,ae,he,Oe){var rt=y.x,gt=y.y,Et=this.lineClips?this.scaledDistance*(tb-1):this.scaledDistance,or=Et*Uw;if(this.layoutVertexArray.emplaceBack((rt<<1)+($?1:0),(gt<<1)+(ae?1:0),Math.round(Nw*I)+128,Math.round(Nw*U)+128,(he===0?0:he<0?-1:1)+1|(or&63)<<2,or>>6),this.lineClips){var _r=this.scaledDistance-this.lineClips.start,pr=this.lineClips.end-this.lineClips.start,Fr=_r/pr;this.layoutVertexArray2.emplaceBack(Fr,this.lineClipsArray.length)}var oi=Oe.vertexLength++;this.e1>=0&&this.e2>=0&&(this.indexArray.emplaceBack(this.e1,this.e2,oi),Oe.primitiveLength++),ae?this.e2=oi:this.e1=oi},sh.prototype.updateScaledDistance=function(){this.scaledDistance=this.lineClips?this.lineClips.start+(this.lineClips.end-this.lineClips.start)*this.distance/this.totalDistance:this.distance},sh.prototype.updateDistance=function(y,I){this.distance+=y.dist(I),this.updateScaledDistance()},X("LineBucket",sh,{omit:["layers","patternFeatures"]});var wS=new Nr({"line-cap":new Me(Rn.layout_line["line-cap"]),"line-join":new bt(Rn.layout_line["line-join"]),"line-miter-limit":new Me(Rn.layout_line["line-miter-limit"]),"line-round-limit":new Me(Rn.layout_line["line-round-limit"]),"line-sort-key":new bt(Rn.layout_line["line-sort-key"])}),TS=new Nr({"line-opacity":new bt(Rn.paint_line["line-opacity"]),"line-color":new bt(Rn.paint_line["line-color"]),"line-translate":new Me(Rn.paint_line["line-translate"]),"line-translate-anchor":new Me(Rn.paint_line["line-translate-anchor"]),"line-width":new bt(Rn.paint_line["line-width"]),"line-gap-width":new bt(Rn.paint_line["line-gap-width"]),"line-offset":new bt(Rn.paint_line["line-offset"]),"line-blur":new bt(Rn.paint_line["line-blur"]),"line-dasharray":new Rr(Rn.paint_line["line-dasharray"]),"line-pattern":new zt(Rn.paint_line["line-pattern"]),"line-gradient":new jr(Rn.paint_line["line-gradient"])}),Vw={paint:TS,layout:wS},aO=function(m){function y(){m.apply(this,arguments)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.possiblyEvaluate=function(U,$){return $=new Gn(Math.floor($.zoom),{now:$.now,fadeDuration:$.fadeDuration,zoomHistory:$.zoomHistory,transition:$.transition}),m.prototype.possiblyEvaluate.call(this,U,$)},y.prototype.evaluate=function(U,$,ae,he){return $=x({},$,{zoom:Math.floor($.zoom)}),m.prototype.evaluate.call(this,U,$,ae,he)},y}(bt),R=new aO(Vw.paint.properties["line-width"].specification);R.useIntegerZoom=!0;var A=function(m){function y(I){m.call(this,I,Vw),this.gradientVersion=0}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype._handleSpecialPaintPropertyUpdate=function(U){if(U==="line-gradient"){var $=this._transitionablePaint._values["line-gradient"].value.expression;this.stepInterpolant=$._styleExpression.expression instanceof _u,this.gradientVersion=(this.gradientVersion+1)%d}},y.prototype.gradientExpression=function(){return this._transitionablePaint._values["line-gradient"].value.expression},y.prototype.recalculate=function(U,$){m.prototype.recalculate.call(this,U,$),this.paint._values["line-floorwidth"]=R.possiblyEvaluate(this._transitioningPaint._values["line-width"].value,U)},y.prototype.createBucket=function(U){return new sh(U)},y.prototype.queryRadius=function(U){var $=U,ae=F(Ad("line-width",this,$),Ad("line-gap-width",this,$)),he=Ad("line-offset",this,$);return ae/2+Math.abs(he)+Pv(this.paint.get("line-translate"))},y.prototype.queryIntersectsFeature=function(U,$,ae,he,Oe,rt,gt){var Et=Jv(U,this.paint.get("line-translate"),this.paint.get("line-translate-anchor"),rt.angle,gt),or=gt/2*F(this.paint.get("line-width").evaluate($,ae),this.paint.get("line-gap-width").evaluate($,ae)),_r=this.paint.get("line-offset").evaluate($,ae);return _r&&(he=W(he,_r*gt)),ou(Et,he,or)},y.prototype.isTileClipped=function(){return!0},y}(mi);function F(m,y){return y>0?y+2*m:m}function W(m,y){for(var I=[],U=new u(0,0),$=0;$<m.length;$++){for(var ae=m[$],he=[],Oe=0;Oe<ae.length;Oe++){var rt=ae[Oe-1],gt=ae[Oe],Et=ae[Oe+1],or=Oe===0?U:gt.sub(rt)._unit()._perp(),_r=Oe===ae.length-1?U:Et.sub(gt)._unit()._perp(),pr=or._add(_r)._unit(),Fr=pr.x*_r.x+pr.y*_r.y;pr._mult(1/Fr),he.push(pr._mult(y)._add(gt))}I.push(he)}return I}var re=Wi([{name:"a_pos_offset",components:4,type:"Int16"},{name:"a_data",components:4,type:"Uint16"},{name:"a_pixeloffset",components:4,type:"Int16"}],4),fe=Wi([{name:"a_projected_pos",components:3,type:"Float32"}],4),pe=Wi([{name:"a_fade_opacity",components:1,type:"Uint32"}],4),ze=Wi([{name:"a_placed",components:2,type:"Uint8"},{name:"a_shift",components:2,type:"Float32"}]),Ke=Wi([{type:"Int16",name:"anchorPointX"},{type:"Int16",name:"anchorPointY"},{type:"Int16",name:"x1"},{type:"Int16",name:"y1"},{type:"Int16",name:"x2"},{type:"Int16",name:"y2"},{type:"Uint32",name:"featureIndex"},{type:"Uint16",name:"sourceLayerIndex"},{type:"Uint16",name:"bucketIndex"}]),ct=Wi([{name:"a_pos",components:2,type:"Int16"},{name:"a_anchor_pos",components:2,type:"Int16"},{name:"a_extrude",components:2,type:"Int16"}],4),Lt=Wi([{name:"a_pos",components:2,type:"Float32"},{name:"a_radius",components:1,type:"Float32"},{name:"a_flags",components:2,type:"Int16"}],4),$t=Wi([{name:"triangle",components:3,type:"Uint16"}]),fr=Wi([{type:"Int16",name:"anchorX"},{type:"Int16",name:"anchorY"},{type:"Uint16",name:"glyphStartIndex"},{type:"Uint16",name:"numGlyphs"},{type:"Uint32",name:"vertexStartIndex"},{type:"Uint32",name:"lineStartIndex"},{type:"Uint32",name:"lineLength"},{type:"Uint16",name:"segment"},{type:"Uint16",name:"lowerSize"},{type:"Uint16",name:"upperSize"},{type:"Float32",name:"lineOffsetX"},{type:"Float32",name:"lineOffsetY"},{type:"Uint8",name:"writingMode"},{type:"Uint8",name:"placedOrientation"},{type:"Uint8",name:"hidden"},{type:"Uint32",name:"crossTileID"},{type:"Int16",name:"associatedIconIndex"}]),mr=Wi([{type:"Int16",name:"anchorX"},{type:"Int16",name:"anchorY"},{type:"Int16",name:"rightJustifiedTextSymbolIndex"},{type:"Int16",name:"centerJustifiedTextSymbolIndex"},{type:"Int16",name:"leftJustifiedTextSymbolIndex"},{type:"Int16",name:"verticalPlacedTextSymbolIndex"},{type:"Int16",name:"placedIconSymbolIndex"},{type:"Int16",name:"verticalPlacedIconSymbolIndex"},{type:"Uint16",name:"key"},{type:"Uint16",name:"textBoxStartIndex"},{type:"Uint16",name:"textBoxEndIndex"},{type:"Uint16",name:"verticalTextBoxStartIndex"},{type:"Uint16",name:"verticalTextBoxEndIndex"},{type:"Uint16",name:"iconBoxStartIndex"},{type:"Uint16",name:"iconBoxEndIndex"},{type:"Uint16",name:"verticalIconBoxStartIndex"},{type:"Uint16",name:"verticalIconBoxEndIndex"},{type:"Uint16",name:"featureIndex"},{type:"Uint16",name:"numHorizontalGlyphVertices"},{type:"Uint16",name:"numVerticalGlyphVertices"},{type:"Uint16",name:"numIconVertices"},{type:"Uint16",name:"numVerticalIconVertices"},{type:"Uint16",name:"useRuntimeCollisionCircles"},{type:"Uint32",name:"crossTileID"},{type:"Float32",name:"textBoxScale"},{type:"Float32",components:2,name:"textOffset"},{type:"Float32",name:"collisionCircleDiameter"}]),Pr=Wi([{type:"Float32",name:"offsetX"}]),zr=Wi([{type:"Int16",name:"x"},{type:"Int16",name:"y"},{type:"Int16",name:"tileUnitDistanceFromAnchor"}]);function ui(m,y,I){var U=y.layout.get("text-transform").evaluate(I,{});return U==="uppercase"?m=m.toLocaleUpperCase():U==="lowercase"&&(m=m.toLocaleLowerCase()),Ms.applyArabicShaping&&(m=Ms.applyArabicShaping(m)),m}function yi(m,y,I){return m.sections.forEach(function(U){U.text=ui(U.text,y,I)}),m}function vn(m){var y={},I={},U=[],$=0;function ae(Ai){U.push(m[Ai]),$++}function he(Ai,bn,nn){var xn=I[Ai];return delete I[Ai],I[bn]=xn,U[xn].geometry[0].pop(),U[xn].geometry[0]=U[xn].geometry[0].concat(nn[0]),xn}function Oe(Ai,bn,nn){var xn=y[bn];return delete y[bn],y[Ai]=xn,U[xn].geometry[0].shift(),U[xn].geometry[0]=nn[0].concat(U[xn].geometry[0]),xn}function rt(Ai,bn,nn){var xn=nn?bn[0][bn[0].length-1]:bn[0][0];return Ai+":"+xn.x+":"+xn.y}for(var gt=0;gt<m.length;gt++){var Et=m[gt],or=Et.geometry,_r=Et.text?Et.text.toString():null;if(!_r){ae(gt);continue}var pr=rt(_r,or),Fr=rt(_r,or,!0);if(pr in I&&Fr in y&&I[pr]!==y[Fr]){var oi=Oe(pr,Fr,or),Hi=he(pr,Fr,U[oi].geometry);delete y[pr],delete I[Fr],I[rt(_r,U[Hi].geometry,!0)]=Hi,U[oi].geometry=null}else pr in I?he(pr,Fr,or):Fr in y?Oe(pr,Fr,or):(ae(gt),y[pr]=$-1,I[Fr]=$-1)}return U.filter(function(Ai){return Ai.geometry})}var zi={"!":"\uFE15","#":"\uFF03",$:"\uFF04","%":"\uFF05","&":"\uFF06","(":"\uFE35",")":"\uFE36","*":"\uFF0A","+":"\uFF0B",",":"\uFE10","-":"\uFE32",".":"\u30FB","/":"\uFF0F",":":"\uFE13",";":"\uFE14","<":"\uFE3F","=":"\uFF1D",">":"\uFE40","?":"\uFE16","@":"\uFF20","[":"\uFE47","\\":"\uFF3C","]":"\uFE48","^":"\uFF3E",_:"\uFE33","`":"\uFF40","{":"\uFE37","|":"\u2015","}":"\uFE38","~":"\uFF5E","\xA2":"\uFFE0","\xA3":"\uFFE1","\xA5":"\uFFE5","\xA6":"\uFFE4","\xAC":"\uFFE2","\xAF":"\uFFE3","\u2013":"\uFE32","\u2014":"\uFE31","\u2018":"\uFE43","\u2019":"\uFE44","\u201C":"\uFE41","\u201D":"\uFE42","\u2026":"\uFE19","\u2027":"\u30FB","\u20A9":"\uFFE6","\u3001":"\uFE11","\u3002":"\uFE12","\u3008":"\uFE3F","\u3009":"\uFE40","\u300A":"\uFE3D","\u300B":"\uFE3E","\u300C":"\uFE41","\u300D":"\uFE42","\u300E":"\uFE43","\u300F":"\uFE44","\u3010":"\uFE3B","\u3011":"\uFE3C","\u3014":"\uFE39","\u3015":"\uFE3A","\u3016":"\uFE17","\u3017":"\uFE18","\uFF01":"\uFE15","\uFF08":"\uFE35","\uFF09":"\uFE36","\uFF0C":"\uFE10","\uFF0D":"\uFE32","\uFF0E":"\u30FB","\uFF1A":"\uFE13","\uFF1B":"\uFE14","\uFF1C":"\uFE3F","\uFF1E":"\uFE40","\uFF1F":"\uFE16","\uFF3B":"\uFE47","\uFF3D":"\uFE48","\uFF3F":"\uFE33","\uFF5B":"\uFE37","\uFF5C":"\u2015","\uFF5D":"\uFE38","\uFF5F":"\uFE35","\uFF60":"\uFE36","\uFF61":"\uFE12","\uFF62":"\uFE41","\uFF63":"\uFE42"};function un(m){for(var y="",I=0;I<m.length;I++){var U=m.charCodeAt(I+1)||null,$=m.charCodeAt(I-1)||null,ae=(!U||!bi(U)||zi[m[I+1]])&&(!$||!bi($)||zi[m[I-1]]);ae&&zi[m[I]]?y+=zi[m[I]]:y+=m[I]}return y}var Tn=24,pa=function(m,y,I,U,$){var ae,he,Oe=$*8-U-1,rt=(1<<Oe)-1,gt=rt>>1,Et=-7,or=I?$-1:0,_r=I?-1:1,pr=m[y+or];for(or+=_r,ae=pr&(1<<-Et)-1,pr>>=-Et,Et+=Oe;Et>0;ae=ae*256+m[y+or],or+=_r,Et-=8);for(he=ae&(1<<-Et)-1,ae>>=-Et,Et+=U;Et>0;he=he*256+m[y+or],or+=_r,Et-=8);if(ae===0)ae=1-gt;else{if(ae===rt)return he?NaN:(pr?-1:1)*(1/0);he=he+Math.pow(2,U),ae=ae-gt}return(pr?-1:1)*he*Math.pow(2,ae-U)},ro=function(m,y,I,U,$,ae){var he,Oe,rt,gt=ae*8-$-1,Et=(1<<gt)-1,or=Et>>1,_r=$===23?Math.pow(2,-24)-Math.pow(2,-77):0,pr=U?0:ae-1,Fr=U?1:-1,oi=y<0||y===0&&1/y<0?1:0;for(y=Math.abs(y),isNaN(y)||y===1/0?(Oe=isNaN(y)?1:0,he=Et):(he=Math.floor(Math.log(y)/Math.LN2),y*(rt=Math.pow(2,-he))<1&&(he--,rt*=2),he+or>=1?y+=_r/rt:y+=_r*Math.pow(2,1-or),y*rt>=2&&(he++,rt/=2),he+or>=Et?(Oe=0,he=Et):he+or>=1?(Oe=(y*rt-1)*Math.pow(2,$),he=he+or):(Oe=y*Math.pow(2,or-1)*Math.pow(2,$),he=0));$>=8;m[I+pr]=Oe&255,pr+=Fr,Oe/=256,$-=8);for(he=he<<$|Oe,gt+=$;gt>0;m[I+pr]=he&255,pr+=Fr,he/=256,gt-=8);m[I+pr-Fr]|=oi*128},Vo={read:pa,write:ro},Xa=sa;function sa(m){this.buf=ArrayBuffer.isView&&ArrayBuffer.isView(m)?m:new Uint8Array(m||0),this.pos=0,this.type=0,this.length=this.buf.length}sa.Varint=0,sa.Fixed64=1,sa.Bytes=2,sa.Fixed32=5;var Mo=65536*65536,fo=1/Mo,lo=12,Xn=typeof TextDecoder=="undefined"?null:new TextDecoder("utf8");sa.prototype={destroy:function(){this.buf=null},readFields:function(m,y,I){for(I=I||this.length;this.pos<I;){var U=this.readVarint(),$=U>>3,ae=this.pos;this.type=U&7,m($,y,this),this.pos===ae&&this.skip(U)}return y},readMessage:function(m,y){return this.readFields(m,y,this.readVarint()+this.pos)},readFixed32:function(){var m=Wh(this.buf,this.pos);return this.pos+=4,m},readSFixed32:function(){var m=Fv(this.buf,this.pos);return this.pos+=4,m},readFixed64:function(){var m=Wh(this.buf,this.pos)+Wh(this.buf,this.pos+4)*Mo;return this.pos+=8,m},readSFixed64:function(){var m=Wh(this.buf,this.pos)+Fv(this.buf,this.pos+4)*Mo;return this.pos+=8,m},readFloat:function(){var m=Vo.read(this.buf,this.pos,!0,23,4);return this.pos+=4,m},readDouble:function(){var m=Vo.read(this.buf,this.pos,!0,52,8);return this.pos+=8,m},readVarint:function(m){var y=this.buf,I,U;return U=y[this.pos++],I=U&127,U<128||(U=y[this.pos++],I|=(U&127)<<7,U<128)||(U=y[this.pos++],I|=(U&127)<<14,U<128)||(U=y[this.pos++],I|=(U&127)<<21,U<128)?I:(U=y[this.pos],I|=(U&15)<<28,Ro(I,m,this))},readVarint64:function(){return this.readVarint(!0)},readSVarint:function(){var m=this.readVarint();return m%2===1?(m+1)/-2:m/2},readBoolean:function(){return!!this.readVarint()},readString:function(){var m=this.readVarint()+this.pos,y=this.pos;return this.pos=m,m-y>=lo&&Xn?tu(this.buf,y,m):lv(this.buf,y,m)},readBytes:function(){var m=this.readVarint()+this.pos,y=this.buf.subarray(this.pos,m);return this.pos=m,y},readPackedVarint:function(m,y){if(this.type!==sa.Bytes)return m.push(this.readVarint(y));var I=uo(this);for(m=m||[];this.pos<I;)m.push(this.readVarint(y));return m},readPackedSVarint:function(m){if(this.type!==sa.Bytes)return m.push(this.readSVarint());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readSVarint());return m},readPackedBoolean:function(m){if(this.type!==sa.Bytes)return m.push(this.readBoolean());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readBoolean());return m},readPackedFloat:function(m){if(this.type!==sa.Bytes)return m.push(this.readFloat());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readFloat());return m},readPackedDouble:function(m){if(this.type!==sa.Bytes)return m.push(this.readDouble());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readDouble());return m},readPackedFixed32:function(m){if(this.type!==sa.Bytes)return m.push(this.readFixed32());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readFixed32());return m},readPackedSFixed32:function(m){if(this.type!==sa.Bytes)return m.push(this.readSFixed32());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readSFixed32());return m},readPackedFixed64:function(m){if(this.type!==sa.Bytes)return m.push(this.readFixed64());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readFixed64());return m},readPackedSFixed64:function(m){if(this.type!==sa.Bytes)return m.push(this.readSFixed64());var y=uo(this);for(m=m||[];this.pos<y;)m.push(this.readSFixed64());return m},skip:function(m){var y=m&7;if(y===sa.Varint)for(;this.buf[this.pos++]>127;);else if(y===sa.Bytes)this.pos=this.readVarint()+this.pos;else if(y===sa.Fixed32)this.pos+=4;else if(y===sa.Fixed64)this.pos+=8;else throw new Error("Unimplemented type: "+y)},writeTag:function(m,y){this.writeVarint(m<<3|y)},realloc:function(m){for(var y=this.length||16;y<this.pos+m;)y*=2;if(y!==this.length){var I=new Uint8Array(y);I.set(this.buf),this.buf=I,this.length=y}},finish:function(){return this.length=this.pos,this.pos=0,this.buf.subarray(0,this.length)},writeFixed32:function(m){this.realloc(4),Df(this.buf,m,this.pos),this.pos+=4},writeSFixed32:function(m){this.realloc(4),Df(this.buf,m,this.pos),this.pos+=4},writeFixed64:function(m){this.realloc(8),Df(this.buf,m&-1,this.pos),Df(this.buf,Math.floor(m*fo),this.pos+4),this.pos+=8},writeSFixed64:function(m){this.realloc(8),Df(this.buf,m&-1,this.pos),Df(this.buf,Math.floor(m*fo),this.pos+4),this.pos+=8},writeVarint:function(m){if(m=+m||0,m>268435455||m<0){Ju(m,this);return}this.realloc(4),this.buf[this.pos++]=m&127|(m>127?128:0),!(m<=127)&&(this.buf[this.pos++]=(m>>>=7)&127|(m>127?128:0),!(m<=127)&&(this.buf[this.pos++]=(m>>>=7)&127|(m>127?128:0),!(m<=127)&&(this.buf[this.pos++]=m>>>7&127)))},writeSVarint:function(m){this.writeVarint(m<0?-m*2-1:m*2)},writeBoolean:function(m){this.writeVarint(!!m)},writeString:function(m){m=String(m),this.realloc(m.length*4),this.pos++;var y=this.pos;this.pos=pc(this.buf,m,this.pos);var I=this.pos-y;I>=128&&Qv(y,I,this),this.pos=y-1,this.writeVarint(I),this.pos+=I},writeFloat:function(m){this.realloc(4),Vo.write(this.buf,m,this.pos,!0,23,4),this.pos+=4},writeDouble:function(m){this.realloc(8),Vo.write(this.buf,m,this.pos,!0,52,8),this.pos+=8},writeBytes:function(m){var y=m.length;this.writeVarint(y),this.realloc(y);for(var I=0;I<y;I++)this.buf[this.pos++]=m[I]},writeRawMessage:function(m,y){this.pos++;var I=this.pos;m(y,this);var U=this.pos-I;U>=128&&Qv(I,U,this),this.pos=I-1,this.writeVarint(U),this.pos+=U},writeMessage:function(m,y,I){this.writeTag(m,sa.Bytes),this.writeRawMessage(y,I)},writePackedVarint:function(m,y){y.length&&this.writeMessage(m,ld,y)},writePackedSVarint:function(m,y){y.length&&this.writeMessage(m,Eh,y)},writePackedBoolean:function(m,y){y.length&&this.writeMessage(m,jd,y)},writePackedFloat:function(m,y){y.length&&this.writeMessage(m,Gd,y)},writePackedDouble:function(m,y){y.length&&this.writeMessage(m,Hd,y)},writePackedFixed32:function(m,y){y.length&&this.writeMessage(m,Af,y)},writePackedSFixed32:function(m,y){y.length&&this.writeMessage(m,kh,y)},writePackedFixed64:function(m,y){y.length&&this.writeMessage(m,Ed,y)},writePackedSFixed64:function(m,y){y.length&&this.writeMessage(m,ud,y)},writeBytesField:function(m,y){this.writeTag(m,sa.Bytes),this.writeBytes(y)},writeFixed32Field:function(m,y){this.writeTag(m,sa.Fixed32),this.writeFixed32(y)},writeSFixed32Field:function(m,y){this.writeTag(m,sa.Fixed32),this.writeSFixed32(y)},writeFixed64Field:function(m,y){this.writeTag(m,sa.Fixed64),this.writeFixed64(y)},writeSFixed64Field:function(m,y){this.writeTag(m,sa.Fixed64),this.writeSFixed64(y)},writeVarintField:function(m,y){this.writeTag(m,sa.Varint),this.writeVarint(y)},writeSVarintField:function(m,y){this.writeTag(m,sa.Varint),this.writeSVarint(y)},writeStringField:function(m,y){this.writeTag(m,sa.Bytes),this.writeString(y)},writeFloatField:function(m,y){this.writeTag(m,sa.Fixed32),this.writeFloat(y)},writeDoubleField:function(m,y){this.writeTag(m,sa.Fixed64),this.writeDouble(y)},writeBooleanField:function(m,y){this.writeVarintField(m,!!y)}};function Ro(m,y,I){var U=I.buf,$,ae;if(ae=U[I.pos++],$=(ae&112)>>4,ae<128||(ae=U[I.pos++],$|=(ae&127)<<3,ae<128)||(ae=U[I.pos++],$|=(ae&127)<<10,ae<128)||(ae=U[I.pos++],$|=(ae&127)<<17,ae<128)||(ae=U[I.pos++],$|=(ae&127)<<24,ae<128)||(ae=U[I.pos++],$|=(ae&1)<<31,ae<128))return $o(m,$,y);throw new Error("Expected varint not more than 10 bytes")}function uo(m){return m.type===sa.Bytes?m.readVarint()+m.pos:m.pos+1}function $o(m,y,I){return I?y*4294967296+(m>>>0):(y>>>0)*4294967296+(m>>>0)}function Ju(m,y){var I,U;if(m>=0?(I=m%4294967296|0,U=m/4294967296|0):(I=~(-m%4294967296),U=~(-m/4294967296),I^4294967295?I=I+1|0:(I=0,U=U+1|0)),m>=18446744073709552e3||m<-18446744073709552e3)throw new Error("Given varint doesn't fit into 10 bytes");y.realloc(10),qu(I,U,y),Mh(U,y)}function qu(m,y,I){I.buf[I.pos++]=m&127|128,m>>>=7,I.buf[I.pos++]=m&127|128,m>>>=7,I.buf[I.pos++]=m&127|128,m>>>=7,I.buf[I.pos++]=m&127|128,m>>>=7,I.buf[I.pos]=m&127}function Mh(m,y){var I=(m&7)<<4;y.buf[y.pos++]|=I|((m>>>=3)?128:0),m&&(y.buf[y.pos++]=m&127|((m>>>=7)?128:0),m&&(y.buf[y.pos++]=m&127|((m>>>=7)?128:0),m&&(y.buf[y.pos++]=m&127|((m>>>=7)?128:0),m&&(y.buf[y.pos++]=m&127|((m>>>=7)?128:0),m&&(y.buf[y.pos++]=m&127)))))}function Qv(m,y,I){var U=y<=16383?1:y<=2097151?2:y<=268435455?3:Math.floor(Math.log(y)/(Math.LN2*7));I.realloc(U);for(var $=I.pos-1;$>=m;$--)I.buf[$+U]=I.buf[$]}function ld(m,y){for(var I=0;I<m.length;I++)y.writeVarint(m[I])}function Eh(m,y){for(var I=0;I<m.length;I++)y.writeSVarint(m[I])}function Gd(m,y){for(var I=0;I<m.length;I++)y.writeFloat(m[I])}function Hd(m,y){for(var I=0;I<m.length;I++)y.writeDouble(m[I])}function jd(m,y){for(var I=0;I<m.length;I++)y.writeBoolean(m[I])}function Af(m,y){for(var I=0;I<m.length;I++)y.writeFixed32(m[I])}function kh(m,y){for(var I=0;I<m.length;I++)y.writeSFixed32(m[I])}function Ed(m,y){for(var I=0;I<m.length;I++)y.writeFixed64(m[I])}function ud(m,y){for(var I=0;I<m.length;I++)y.writeSFixed64(m[I])}function Wh(m,y){return(m[y]|m[y+1]<<8|m[y+2]<<16)+m[y+3]*16777216}function Df(m,y,I){m[I]=y,m[I+1]=y>>>8,m[I+2]=y>>>16,m[I+3]=y>>>24}function Fv(m,y){return(m[y]|m[y+1]<<8|m[y+2]<<16)+(m[y+3]<<24)}function lv(m,y,I){for(var U="",$=y;$<I;){var ae=m[$],he=null,Oe=ae>239?4:ae>223?3:ae>191?2:1;if($+Oe>I)break;var rt,gt,Et;Oe===1?ae<128&&(he=ae):Oe===2?(rt=m[$+1],(rt&192)===128&&(he=(ae&31)<<6|rt&63,he<=127&&(he=null))):Oe===3?(rt=m[$+1],gt=m[$+2],(rt&192)===128&&(gt&192)===128&&(he=(ae&15)<<12|(rt&63)<<6|gt&63,(he<=2047||he>=55296&&he<=57343)&&(he=null))):Oe===4&&(rt=m[$+1],gt=m[$+2],Et=m[$+3],(rt&192)===128&&(gt&192)===128&&(Et&192)===128&&(he=(ae&15)<<18|(rt&63)<<12|(gt&63)<<6|Et&63,(he<=65535||he>=1114112)&&(he=null))),he===null?(he=65533,Oe=1):he>65535&&(he-=65536,U+=String.fromCharCode(he>>>10&1023|55296),he=56320|he&1023),U+=String.fromCharCode(he),$+=Oe}return U}function tu(m,y,I){return Xn.decode(m.subarray(y,I))}function pc(m,y,I){for(var U=0,$,ae;U<y.length;U++){if($=y.charCodeAt(U),$>55295&&$<57344)if(ae)if($<56320){m[I++]=239,m[I++]=191,m[I++]=189,ae=$;continue}else $=ae-55296<<10|$-56320|65536,ae=null;else{$>56319||U+1===y.length?(m[I++]=239,m[I++]=191,m[I++]=189):ae=$;continue}else ae&&(m[I++]=239,m[I++]=191,m[I++]=189,ae=null);$<128?m[I++]=$:($<2048?m[I++]=$>>6|192:($<65536?m[I++]=$>>12|224:(m[I++]=$>>18|240,m[I++]=$>>12&63|128),m[I++]=$>>6&63|128),m[I++]=$&63|128)}return I}var $u=3;function zv(m,y,I){m===1&&I.readMessage(ff,y)}function ff(m,y,I){if(m===3){var U=I.readMessage(P1,{}),$=U.id,ae=U.bitmap,he=U.width,Oe=U.height,rt=U.left,gt=U.top,Et=U.advance;y.push({id:$,bitmap:new Dv({width:he+2*$u,height:Oe+2*$u},ae),metrics:{width:he,height:Oe,left:rt,top:gt,advance:Et}})}}function P1(m,y,I){m===1?y.id=I.readVarint():m===2?y.bitmap=I.readBytes():m===3?y.width=I.readVarint():m===4?y.height=I.readVarint():m===5?y.left=I.readSVarint():m===6?y.top=I.readSVarint():m===7&&(y.advance=I.readVarint())}function v0(m){return new Xa(m).readFields(zv,[])}var Gp=$u;function ep(m){for(var y=0,I=0,U=0,$=m;U<$.length;U+=1){var ae=$[U];y+=ae.w*ae.h,I=Math.max(I,ae.w)}m.sort(function(Hi,Ai){return Ai.h-Hi.h});for(var he=Math.max(Math.ceil(Math.sqrt(y/.95)),I),Oe=[{x:0,y:0,w:he,h:1/0}],rt=0,gt=0,Et=0,or=m;Et<or.length;Et+=1)for(var _r=or[Et],pr=Oe.length-1;pr>=0;pr--){var Fr=Oe[pr];if(!(_r.w>Fr.w||_r.h>Fr.h)){if(_r.x=Fr.x,_r.y=Fr.y,gt=Math.max(gt,_r.y+_r.h),rt=Math.max(rt,_r.x+_r.w),_r.w===Fr.w&&_r.h===Fr.h){var oi=Oe.pop();pr<Oe.length&&(Oe[pr]=oi)}else _r.h===Fr.h?(Fr.x+=_r.w,Fr.w-=_r.w):_r.w===Fr.w?(Fr.y+=_r.h,Fr.h-=_r.h):(Oe.push({x:Fr.x+_r.w,y:Fr.y,w:Fr.w-_r.w,h:_r.h}),Fr.y+=_r.h,Fr.h-=_r.h);break}}return{w:rt,h:gt,fill:y/(rt*gt)||0}}var Gc=1,Zf=function(y,I){var U=I.pixelRatio,$=I.version,ae=I.stretchX,he=I.stretchY,Oe=I.content;this.paddedRect=y,this.pixelRatio=U,this.stretchX=ae,this.stretchY=he,this.content=Oe,this.version=$},tp={tl:{configurable:!0},br:{configurable:!0},tlbr:{configurable:!0},displaySize:{configurable:!0}};tp.tl.get=function(){return[this.paddedRect.x+Gc,this.paddedRect.y+Gc]},tp.br.get=function(){return[this.paddedRect.x+this.paddedRect.w-Gc,this.paddedRect.y+this.paddedRect.h-Gc]},tp.tlbr.get=function(){return this.tl.concat(this.br)},tp.displaySize.get=function(){return[(this.paddedRect.w-Gc*2)/this.pixelRatio,(this.paddedRect.h-Gc*2)/this.pixelRatio]},Object.defineProperties(Zf.prototype,tp);var gg=function(y,I){var U={},$={};this.haveRenderCallbacks=[];var ae=[];this.addImages(y,U,ae),this.addImages(I,$,ae);var he=ep(ae),Oe=he.w,rt=he.h,gt=new Sh({width:Oe||1,height:rt||1});for(var Et in y){var or=y[Et],_r=U[Et].paddedRect;Sh.copy(or.data,gt,{x:0,y:0},{x:_r.x+Gc,y:_r.y+Gc},or.data)}for(var pr in I){var Fr=I[pr],oi=$[pr].paddedRect,Hi=oi.x+Gc,Ai=oi.y+Gc,bn=Fr.data.width,nn=Fr.data.height;Sh.copy(Fr.data,gt,{x:0,y:0},{x:Hi,y:Ai},Fr.data),Sh.copy(Fr.data,gt,{x:0,y:nn-1},{x:Hi,y:Ai-1},{width:bn,height:1}),Sh.copy(Fr.data,gt,{x:0,y:0},{x:Hi,y:Ai+nn},{width:bn,height:1}),Sh.copy(Fr.data,gt,{x:bn-1,y:0},{x:Hi-1,y:Ai},{width:1,height:nn}),Sh.copy(Fr.data,gt,{x:0,y:0},{x:Hi+bn,y:Ai},{width:1,height:nn})}this.image=gt,this.iconPositions=U,this.patternPositions=$};gg.prototype.addImages=function(y,I,U){for(var $ in y){var ae=y[$],he={x:0,y:0,w:ae.data.width+2*Gc,h:ae.data.height+2*Gc};U.push(he),I[$]=new Zf(he,ae),ae.hasRenderCallback&&this.haveRenderCallbacks.push($)}},gg.prototype.patchUpdatedImages=function(y,I){y.dispatchRenderCallbacks(this.haveRenderCallbacks);for(var U in y.updatedImages)this.patchUpdatedImage(this.iconPositions[U],y.getImage(U),I),this.patchUpdatedImage(this.patternPositions[U],y.getImage(U),I)},gg.prototype.patchUpdatedImage=function(y,I,U){if(!(!y||!I)&&y.version!==I.version){y.version=I.version;var $=y.tl,ae=$[0],he=$[1];U.update(I.data,void 0,{x:ae,y:he})}},X("ImagePosition",Zf),X("ImageAtlas",gg);var uv={horizontal:1,vertical:2,horizontalOnly:3},I1=-17;function AS(m){for(var y=0,I=m;y<I.length;y+=1){var U=I[y];if(U.positionedGlyphs.length!==0)return!1}return!0}var Gw=57344,p0=63743,dy=function(){this.scale=1,this.fontStack="",this.imageName=null};dy.forText=function(y,I){var U=new dy;return U.scale=y||1,U.fontStack=I,U},dy.forImage=function(y){var I=new dy;return I.imageName=y,I};var Xh=function(){this.text="",this.sectionIndex=[],this.sections=[],this.imageSectionID=null};Xh.fromFeature=function(y,I){for(var U=new Xh,$=0;$<y.sections.length;$++){var ae=y.sections[$];ae.image?U.addImageSection(ae):U.addTextSection(ae,I)}return U},Xh.prototype.length=function(){return this.text.length},Xh.prototype.getSection=function(y){return this.sections[this.sectionIndex[y]]},Xh.prototype.getSectionIndex=function(y){return this.sectionIndex[y]},Xh.prototype.getCharCode=function(y){return this.text.charCodeAt(y)},Xh.prototype.verticalizePunctuation=function(){this.text=un(this.text)},Xh.prototype.trim=function(){for(var y=0,I=0;I<this.text.length&&g0[this.text.charCodeAt(I)];I++)y++;for(var U=this.text.length,$=this.text.length-1;$>=0&&$>=y&&g0[this.text.charCodeAt($)];$--)U--;this.text=this.text.substring(y,U),this.sectionIndex=this.sectionIndex.slice(y,U)},Xh.prototype.substring=function(y,I){var U=new Xh;return U.text=this.text.substring(y,I),U.sectionIndex=this.sectionIndex.slice(y,I),U.sections=this.sections,U},Xh.prototype.toString=function(){return this.text},Xh.prototype.getMaxScale=function(){var y=this;return this.sectionIndex.reduce(function(I,U){return Math.max(I,y.sections[U].scale)},0)},Xh.prototype.addTextSection=function(y,I){this.text+=y.text,this.sections.push(dy.forText(y.scale,y.fontStack||I));for(var U=this.sections.length-1,$=0;$<y.text.length;++$)this.sectionIndex.push(U)},Xh.prototype.addImageSection=function(y){var I=y.image?y.image.name:"";if(I.length===0){te("Can't add FormattedSection with an empty image.");return}var U=this.getNextImageSectionCharCode();if(!U){te("Reached maximum number of images "+(p0-Gw+2));return}this.text+=String.fromCharCode(U),this.sections.push(dy.forImage(I)),this.sectionIndex.push(this.sections.length-1)},Xh.prototype.getNextImageSectionCharCode=function(){return this.imageSectionID?this.imageSectionID>=p0?null:++this.imageSectionID:(this.imageSectionID=Gw,this.imageSectionID)};function oO(m,y){for(var I=[],U=m.text,$=0,ae=0,he=y;ae<he.length;ae+=1){var Oe=he[ae];I.push(m.substring($,Oe)),$=Oe}return $<U.length&&I.push(m.substring($,U.length)),I}function Hw(m,y,I,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi){var Hi=Xh.fromFeature(m,$);or===uv.vertical&&Hi.verticalizePunctuation();var Ai,bn=Ms.processBidirectionalText,nn=Ms.processStyledBidirectionalText;if(bn&&Hi.sections.length===1){Ai=[];for(var xn=bn(Hi.toString(),jw(Hi,gt,ae,y,U,pr,Fr)),Pn=0,Zn=xn;Pn<Zn.length;Pn+=1){var ga=Zn[Pn],ha=new Xh;ha.text=ga,ha.sections=Hi.sections;for(var eo=0;eo<ga.length;eo++)ha.sectionIndex.push(0);Ai.push(ha)}}else if(nn){Ai=[];for(var za=nn(Hi.text,Hi.sectionIndex,jw(Hi,gt,ae,y,U,pr,Fr)),Za=0,Ko=za;Za<Ko.length;Za+=1){var to=Ko[Za],ao=new Xh;ao.text=to[0],ao.sectionIndex=to[1],ao.sections=Hi.sections,Ai.push(ao)}}else Ai=oO(Hi,jw(Hi,gt,ae,y,U,pr,Fr));var _s=[],jo={positionedLines:_s,text:Hi.toString(),top:Et[1],bottom:Et[1],left:Et[0],right:Et[0],writingMode:or,iconsInText:!1,verticalizable:!1};return uO(jo,y,I,U,Ai,he,Oe,rt,or,gt,_r,oi),AS(_s)?!1:jo}var g0={};g0[9]=!0,g0[10]=!0,g0[11]=!0,g0[12]=!0,g0[13]=!0,g0[32]=!0;var cv={};cv[10]=!0,cv[32]=!0,cv[38]=!0,cv[40]=!0,cv[41]=!0,cv[43]=!0,cv[45]=!0,cv[47]=!0,cv[173]=!0,cv[183]=!0,cv[8203]=!0,cv[8208]=!0,cv[8211]=!0,cv[8231]=!0;function ZC(m,y,I,U,$,ae){if(y.imageName){var rt=U[y.imageName];return rt?rt.displaySize[0]*y.scale*Tn/ae+$:0}else{var he=I[y.fontStack],Oe=he&&he[m];return Oe?Oe.metrics.advance*y.scale+$:0}}function sO(m,y,I,U,$,ae){for(var he=0,Oe=0;Oe<m.length();Oe++){var rt=m.getSection(Oe);he+=ZC(m.getCharCode(Oe),rt,U,$,y,ae)}var gt=Math.max(1,Math.ceil(he/I));return he/gt}function YC(m,y,I,U){var $=Math.pow(m-y,2);return U?m<y?$/2:$*2:$+Math.abs(I)*I}function lO(m,y,I){var U=0;return m===10&&(U-=1e4),I&&(U+=150),(m===40||m===65288)&&(U+=50),(y===41||y===65289)&&(U+=50),U}function yp(m,y,I,U,$,ae){for(var he=null,Oe=YC(y,I,$,ae),rt=0,gt=U;rt<gt.length;rt+=1){var Et=gt[rt],or=y-Et.x,_r=YC(or,I,$,ae)+Et.badness;_r<=Oe&&(he=Et,Oe=_r)}return{index:m,x:y,priorBreak:he,badness:Oe}}function rb(m){return m?rb(m.priorBreak).concat(m.index):[]}function jw(m,y,I,U,$,ae,he){if(ae!=="point")return[];if(!m)return[];for(var Oe=[],rt=sO(m,y,I,U,$,he),gt=m.text.indexOf("\u200B")>=0,Et=0,or=0;or<m.length();or++){var _r=m.getSection(or),pr=m.getCharCode(or);if(g0[pr]||(Et+=ZC(pr,_r,U,$,y,he)),or<m.length()-1){var Fr=Hr(pr);(cv[pr]||Fr||_r.imageName)&&Oe.push(yp(or+1,Et,rt,Oe,lO(pr,m.getCharCode(or+1),Fr&&gt),!1))}}return rb(yp(m.length(),Et,rt,Oe,0,!0))}function SS(m){var y=.5,I=.5;switch(m){case"right":case"top-right":case"bottom-right":y=1;break;case"left":case"top-left":case"bottom-left":y=0;break}switch(m){case"bottom":case"bottom-right":case"bottom-left":I=1;break;case"top":case"top-right":case"top-left":I=0;break}return{horizontalAlign:y,verticalAlign:I}}function uO(m,y,I,U,$,ae,he,Oe,rt,gt,Et,or){for(var _r=0,pr=I1,Fr=0,oi=0,Hi=Oe==="right"?1:Oe==="left"?0:.5,Ai=0,bn=0,nn=$;bn<nn.length;bn+=1){var xn=nn[bn];xn.trim();var Pn=xn.getMaxScale(),Zn=(Pn-1)*Tn,ga={positionedGlyphs:[],lineOffset:0};m.positionedLines[Ai]=ga;var ha=ga.positionedGlyphs,eo=0;if(!xn.length()){pr+=ae,++Ai;continue}for(var za=0;za<xn.length();za++){var Za=xn.getSection(za),Ko=xn.getSectionIndex(za),to=xn.getCharCode(za),ao=0,_s=null,jo=null,El=null,Iu=Tn,kl=!(rt===uv.horizontal||!Et&&!qr(to)||Et&&(g0[to]||Zr(to)));if(Za.imageName){var Sf=U[Za.imageName];if(!Sf)continue;El=Za.imageName,m.iconsInText=m.iconsInText||!0,jo=Sf.paddedRect;var Ff=Sf.displaySize;Za.scale=Za.scale*Tn/or,_s={width:Ff[0],height:Ff[1],left:Gc,top:-Gp,advance:kl?Ff[1]:Ff[0]};var Yh=Tn-Ff[1]*Za.scale;ao=Zn+Yh,Iu=_s.advance;var ch=kl?Ff[0]*Za.scale-Tn*Pn:Ff[1]*Za.scale-Tn*Pn;ch>0&&ch>eo&&(eo=ch)}else{var Cl=I[Za.fontStack],yl=Cl&&Cl[to];if(yl&&yl.rect)jo=yl.rect,_s=yl.metrics;else{var Qu=y[Za.fontStack],gc=Qu&&Qu[to];if(!gc)continue;_s=gc.metrics}ao=(Pn-Za.scale)*Tn}kl?(m.verticalizable=!0,ha.push({glyph:to,imageName:El,x:_r,y:pr+ao,vertical:kl,scale:Za.scale,fontStack:Za.fontStack,sectionIndex:Ko,metrics:_s,rect:jo}),_r+=Iu*Za.scale+gt):(ha.push({glyph:to,imageName:El,x:_r,y:pr+ao,vertical:kl,scale:Za.scale,fontStack:Za.fontStack,sectionIndex:Ko,metrics:_s,rect:jo}),_r+=_s.advance*Za.scale+gt)}if(ha.length!==0){var Xd=_r-gt;Fr=Math.max(Xd,Fr),cO(ha,0,ha.length-1,Hi,eo)}_r=0;var Zd=ae*Pn+eo;ga.lineOffset=Math.max(eo,Zn),pr+=Zd,oi=Math.max(Zd,oi),++Ai}var Kh=pr-I1,fv=SS(he),hv=fv.horizontalAlign,Ch=fv.verticalAlign;kd(m.positionedLines,Hi,hv,Ch,Fr,oi,ae,Kh,$.length),m.top+=-Ch*Kh,m.bottom=m.top+Kh,m.left+=-hv*Fr,m.right=m.left+Fr}function cO(m,y,I,U,$){if(!(!U&&!$))for(var ae=m[I],he=ae.metrics.advance*ae.scale,Oe=(m[I].x+he)*U,rt=y;rt<=I;rt++)m[rt].x-=Oe,m[rt].y+=$}function kd(m,y,I,U,$,ae,he,Oe,rt){var gt=(y-I)*$,Et=0;ae!==he?Et=-Oe*U-I1:Et=(-U*rt+.5)*he;for(var or=0,_r=m;or<_r.length;or+=1)for(var pr=_r[or],Fr=0,oi=pr.positionedGlyphs;Fr<oi.length;Fr+=1){var Hi=oi[Fr];Hi.x+=gt,Hi.y+=Et}}function rp(m,y,I){var U=SS(I),$=U.horizontalAlign,ae=U.verticalAlign,he=y[0],Oe=y[1],rt=he-m.displaySize[0]*$,gt=rt+m.displaySize[0],Et=Oe-m.displaySize[1]*ae,or=Et+m.displaySize[1];return{image:m,top:Et,bottom:or,left:rt,right:gt}}function dm(m,y,I,U,$,ae){var he=m.image,Oe;if(he.content){var rt=he.content,gt=he.pixelRatio||1;Oe=[rt[0]/gt,rt[1]/gt,he.displaySize[0]-rt[2]/gt,he.displaySize[1]-rt[3]/gt]}var Et=y.left*ae,or=y.right*ae,_r,pr,Fr,oi;I==="width"||I==="both"?(oi=$[0]+Et-U[3],pr=$[0]+or+U[1]):(oi=$[0]+(Et+or-he.displaySize[0])/2,pr=oi+he.displaySize[0]);var Hi=y.top*ae,Ai=y.bottom*ae;return I==="height"||I==="both"?(_r=$[1]+Hi-U[0],Fr=$[1]+Ai+U[2]):(_r=$[1]+(Hi+Ai-he.displaySize[1])/2,Fr=_r+he.displaySize[1]),{image:he,top:_r,right:pr,bottom:Fr,left:oi,collisionPadding:Oe}}var Wd=function(m){function y(I,U,$,ae){m.call(this,I,U),this.angle=$,ae!==void 0&&(this.segment=ae)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.clone=function(){return new y(this.x,this.y,this.angle,this.segment)},y}(u);X("Anchor",Wd);var Cd=128;function _p(m,y){var I=y.expression;if(I.kind==="constant"){var U=I.evaluate(new Gn(m+1));return{kind:"constant",layoutSize:U}}else{if(I.kind==="source")return{kind:"source"};for(var $=I.zoomStops,ae=I.interpolationType,he=0;he<$.length&&$[he]<=m;)he++;he=Math.max(0,he-1);for(var Oe=he;Oe<$.length&&$[Oe]<m+1;)Oe++;Oe=Math.min($.length-1,Oe);var rt=$[he],gt=$[Oe];if(I.kind==="composite")return{kind:"composite",minZoom:rt,maxZoom:gt,interpolationType:ae};var Et=I.evaluate(new Gn(rt)),or=I.evaluate(new Gn(gt));return{kind:"camera",minZoom:rt,maxZoom:gt,minSize:Et,maxSize:or,interpolationType:ae}}}function IQ(m,y,I){var U=y.uSize,$=y.uSizeT,ae=I.lowerSize,he=I.upperSize;return m.kind==="source"?ae/Cd:m.kind==="composite"?nl(ae/Cd,he/Cd,$):U}function RQ(m,y){var I=0,U=0;if(m.kind==="constant")U=m.layoutSize;else if(m.kind!=="source"){var $=m.interpolationType,ae=m.minZoom,he=m.maxZoom,Oe=$?p(Wl.interpolationFactor($,y,ae,he),0,1):0;m.kind==="camera"?U=nl(m.minSize,m.maxSize,Oe):I=Oe}return{uSizeT:I,uSize:U}}var Aet=Object.freeze({__proto__:null,getSizeData:_p,evaluateSizeForFeature:IQ,evaluateSizeForZoom:RQ,SIZE_PACK_FACTOR:Cd});function DQ(m,y,I,U,$){if(y.segment===void 0)return!0;for(var ae=y,he=y.segment+1,Oe=0;Oe>-I/2;){if(he--,he<0)return!1;Oe-=m[he].dist(ae),ae=m[he]}Oe+=m[he].dist(m[he+1]),he++;for(var rt=[],gt=0;Oe<I/2;){var Et=m[he-1],or=m[he],_r=m[he+1];if(!_r)return!1;var pr=Et.angleTo(or)-or.angleTo(_r);for(pr=Math.abs((pr+3*Math.PI)%(Math.PI*2)-Math.PI),rt.push({distance:Oe,angleDelta:pr}),gt+=pr;Oe-rt[0].distance>U;)gt-=rt.shift().angleDelta;if(gt>$)return!1;he++,Oe+=or.dist(_r)}return!0}function FQ(m){for(var y=0,I=0;I<m.length-1;I++)y+=m[I].dist(m[I+1]);return y}function zQ(m,y,I){return m?3/5*y*I:0}function OQ(m,y){return Math.max(m?m.right-m.left:0,y?y.right-y.left:0)}function Met(m,y,I,U,$,ae){for(var he=zQ(I,$,ae),Oe=OQ(I,U)*ae,rt=0,gt=FQ(m)/2,Et=0;Et<m.length-1;Et++){var or=m[Et],_r=m[Et+1],pr=or.dist(_r);if(rt+pr>gt){var Fr=(gt-rt)/pr,oi=nl(or.x,_r.x,Fr),Hi=nl(or.y,_r.y,Fr),Ai=new Wd(oi,Hi,_r.angleTo(or),Et);return Ai._round(),!he||DQ(m,Ai,Oe,he,y)?Ai:void 0}rt+=pr}}function Eet(m,y,I,U,$,ae,he,Oe,rt){var gt=zQ(U,ae,he),Et=OQ(U,$),or=Et*he,_r=m[0].x===0||m[0].x===rt||m[0].y===0||m[0].y===rt;y-or<y/4&&(y=or+y/4);var pr=ae*2,Fr=_r?y/2*Oe%y:(Et/2+pr)*he*Oe%y;return qQ(m,Fr,y,gt,I,or,_r,!1,rt)}function qQ(m,y,I,U,$,ae,he,Oe,rt){for(var gt=ae/2,Et=FQ(m),or=0,_r=y-I,pr=[],Fr=0;Fr<m.length-1;Fr++){for(var oi=m[Fr],Hi=m[Fr+1],Ai=oi.dist(Hi),bn=Hi.angleTo(oi);_r+I<or+Ai;){_r+=I;var nn=(_r-or)/Ai,xn=nl(oi.x,Hi.x,nn),Pn=nl(oi.y,Hi.y,nn);if(xn>=0&&xn<rt&&Pn>=0&&Pn<rt&&_r-gt>=0&&_r+gt<=Et){var Zn=new Wd(xn,Pn,bn,Fr);Zn._round(),(!U||DQ(m,Zn,ae,U,$))&&pr.push(Zn)}}or+=Ai}return!Oe&&!pr.length&&!he&&(pr=qQ(m,or/2,I,U,$,ae,he,!0,rt)),pr}function BQ(m,y,I,U,$){for(var ae=[],he=0;he<m.length;he++)for(var Oe=m[he],rt=void 0,gt=0;gt<Oe.length-1;gt++){var Et=Oe[gt],or=Oe[gt+1];Et.x<y&&or.x<y||(Et.x<y?Et=new u(y,Et.y+(or.y-Et.y)*((y-Et.x)/(or.x-Et.x)))._round():or.x<y&&(or=new u(y,Et.y+(or.y-Et.y)*((y-Et.x)/(or.x-Et.x)))._round()),!(Et.y<I&&or.y<I)&&(Et.y<I?Et=new u(Et.x+(or.x-Et.x)*((I-Et.y)/(or.y-Et.y)),I)._round():or.y<I&&(or=new u(Et.x+(or.x-Et.x)*((I-Et.y)/(or.y-Et.y)),I)._round()),!(Et.x>=U&&or.x>=U)&&(Et.x>=U?Et=new u(U,Et.y+(or.y-Et.y)*((U-Et.x)/(or.x-Et.x)))._round():or.x>=U&&(or=new u(U,Et.y+(or.y-Et.y)*((U-Et.x)/(or.x-Et.x)))._round()),!(Et.y>=$&&or.y>=$)&&(Et.y>=$?Et=new u(Et.x+(or.x-Et.x)*(($-Et.y)/(or.y-Et.y)),$)._round():or.y>=$&&(or=new u(Et.x+(or.x-Et.x)*(($-Et.y)/(or.y-Et.y)),$)._round()),(!rt||!Et.equals(rt[rt.length-1]))&&(rt=[Et],ae.push(rt)),rt.push(or)))))}return ae}var Ww=Gc;function NQ(m,y,I,U){var $=[],ae=m.image,he=ae.pixelRatio,Oe=ae.paddedRect.w-2*Ww,rt=ae.paddedRect.h-2*Ww,gt=m.right-m.left,Et=m.bottom-m.top,or=ae.stretchX||[[0,Oe]],_r=ae.stretchY||[[0,rt]],pr=function(Cl,yl){return Cl+yl[1]-yl[0]},Fr=or.reduce(pr,0),oi=_r.reduce(pr,0),Hi=Oe-Fr,Ai=rt-oi,bn=0,nn=Fr,xn=0,Pn=oi,Zn=0,ga=Hi,ha=0,eo=Ai;if(ae.content&&U){var za=ae.content;bn=KC(or,0,za[0]),xn=KC(_r,0,za[1]),nn=KC(or,za[0],za[2]),Pn=KC(_r,za[1],za[3]),Zn=za[0]-bn,ha=za[1]-xn,ga=za[2]-za[0]-nn,eo=za[3]-za[1]-Pn}var Za=function(Cl,yl,Qu,gc){var Sf=JC(Cl.stretch-bn,nn,gt,m.left),Ff=$C(Cl.fixed-Zn,ga,Cl.stretch,Fr),Yh=JC(yl.stretch-xn,Pn,Et,m.top),ch=$C(yl.fixed-ha,eo,yl.stretch,oi),Xd=JC(Qu.stretch-bn,nn,gt,m.left),Zd=$C(Qu.fixed-Zn,ga,Qu.stretch,Fr),Kh=JC(gc.stretch-xn,Pn,Et,m.top),fv=$C(gc.fixed-ha,eo,gc.stretch,oi),hv=new u(Sf,Yh),Ch=new u(Xd,Yh),dv=new u(Xd,Kh),xp=new u(Sf,Kh),gy=new u(Ff/he,ch/he),F1=new u(Zd/he,fv/he),z1=y*Math.PI/180;if(z1){var O1=Math.sin(z1),e3=Math.cos(z1),m0=[e3,-O1,O1,e3];hv._matMult(m0),Ch._matMult(m0),xp._matMult(m0),dv._matMult(m0)}var n6=Cl.stretch+Cl.fixed,yO=Qu.stretch+Qu.fixed,a6=yl.stretch+yl.fixed,_O=gc.stretch+gc.fixed,Hp={x:ae.paddedRect.x+Ww+n6,y:ae.paddedRect.y+Ww+a6,w:yO-n6,h:_O-a6},t3=ga/he/gt,o6=eo/he/Et;return{tl:hv,tr:Ch,bl:xp,br:dv,tex:Hp,writingMode:void 0,glyphOffset:[0,0],sectionIndex:0,pixelOffsetTL:gy,pixelOffsetBR:F1,minFontScaleX:t3,minFontScaleY:o6,isSDF:I}};if(!U||!ae.stretchX&&!ae.stretchY)$.push(Za({fixed:0,stretch:-1},{fixed:0,stretch:-1},{fixed:0,stretch:Oe+1},{fixed:0,stretch:rt+1}));else for(var Ko=UQ(or,Hi,Fr),to=UQ(_r,Ai,oi),ao=0;ao<Ko.length-1;ao++)for(var _s=Ko[ao],jo=Ko[ao+1],El=0;El<to.length-1;El++){var Iu=to[El],kl=to[El+1];$.push(Za(_s,Iu,jo,kl))}return $}function KC(m,y,I){for(var U=0,$=0,ae=m;$<ae.length;$+=1){var he=ae[$];U+=Math.max(y,Math.min(I,he[1]))-Math.max(y,Math.min(I,he[0]))}return U}function UQ(m,y,I){for(var U=[{fixed:-Ww,stretch:0}],$=0,ae=m;$<ae.length;$+=1){var he=ae[$],Oe=he[0],rt=he[1],gt=U[U.length-1];U.push({fixed:Oe-gt.stretch,stretch:gt.stretch}),U.push({fixed:Oe-gt.stretch,stretch:gt.stretch+(rt-Oe)})}return U.push({fixed:y+Ww,stretch:I}),U}function JC(m,y,I,U){return m/y*I+U}function $C(m,y,I,U){return m-y*I/U}function ket(m,y,I,U,$,ae,he,Oe){for(var rt=U.layout.get("text-rotate").evaluate(ae,{})*Math.PI/180,gt=[],Et=0,or=y.positionedLines;Et<or.length;Et+=1)for(var _r=or[Et],pr=0,Fr=_r.positionedGlyphs;pr<Fr.length;pr+=1){var oi=Fr[pr];if(oi.rect){var Hi=oi.rect||{},Ai=1,bn=Gp+Ai,nn=!0,xn=1,Pn=0,Zn=($||Oe)&&oi.vertical,ga=oi.metrics.advance*oi.scale/2;if(Oe&&y.verticalizable){var ha=(oi.scale-1)*Tn,eo=(Tn-oi.metrics.width*oi.scale)/2;Pn=_r.lineOffset/2-(oi.imageName?-eo:ha)}if(oi.imageName){var za=he[oi.imageName];nn=za.sdf,xn=za.pixelRatio,bn=Gc/xn}var Za=$?[oi.x+ga,oi.y]:[0,0],Ko=$?[0,0]:[oi.x+ga+I[0],oi.y+I[1]-Pn],to=[0,0];Zn&&(to=Ko,Ko=[0,0]);var ao=(oi.metrics.left-bn)*oi.scale-ga+Ko[0],_s=(-oi.metrics.top-bn)*oi.scale+Ko[1],jo=ao+Hi.w*oi.scale/xn,El=_s+Hi.h*oi.scale/xn,Iu=new u(ao,_s),kl=new u(jo,_s),Cl=new u(ao,El),yl=new u(jo,El);if(Zn){var Qu=new u(-ga,ga-I1),gc=-Math.PI/2,Sf=Tn/2-ga,Ff=oi.imageName?Sf:0,Yh=new u(5-I1-Sf,-Ff),ch=new(Function.prototype.bind.apply(u,[null].concat(to)));Iu._rotateAround(gc,Qu)._add(Yh)._add(ch),kl._rotateAround(gc,Qu)._add(Yh)._add(ch),Cl._rotateAround(gc,Qu)._add(Yh)._add(ch),yl._rotateAround(gc,Qu)._add(Yh)._add(ch)}if(rt){var Xd=Math.sin(rt),Zd=Math.cos(rt),Kh=[Zd,-Xd,Xd,Zd];Iu._matMult(Kh),kl._matMult(Kh),Cl._matMult(Kh),yl._matMult(Kh)}var fv=new u(0,0),hv=new u(0,0),Ch=0,dv=0;gt.push({tl:Iu,tr:kl,bl:Cl,br:yl,tex:Hi,writingMode:y.writingMode,glyphOffset:Za,sectionIndex:oi.sectionIndex,isSDF:nn,pixelOffsetTL:fv,pixelOffsetBR:hv,minFontScaleX:Ch,minFontScaleY:dv})}}return gt}var QC=function(y,I,U,$,ae,he,Oe,rt,gt,Et){if(this.boxStartIndex=y.length,gt){var or=he.top,_r=he.bottom,pr=he.collisionPadding;pr&&(or-=pr[1],_r+=pr[3]);var Fr=_r-or;Fr>0&&(Fr=Math.max(10,Fr),this.circleDiameter=Fr)}else{var oi=he.top*Oe-rt,Hi=he.bottom*Oe+rt,Ai=he.left*Oe-rt,bn=he.right*Oe+rt,nn=he.collisionPadding;if(nn&&(Ai-=nn[0]*Oe,oi-=nn[1]*Oe,bn+=nn[2]*Oe,Hi+=nn[3]*Oe),Et){var xn=new u(Ai,oi),Pn=new u(bn,oi),Zn=new u(Ai,Hi),ga=new u(bn,Hi),ha=Et*Math.PI/180;xn._rotate(ha),Pn._rotate(ha),Zn._rotate(ha),ga._rotate(ha),Ai=Math.min(xn.x,Pn.x,Zn.x,ga.x),bn=Math.max(xn.x,Pn.x,Zn.x,ga.x),oi=Math.min(xn.y,Pn.y,Zn.y,ga.y),Hi=Math.max(xn.y,Pn.y,Zn.y,ga.y)}y.emplaceBack(I.x,I.y,Ai,oi,bn,Hi,U,$,ae)}this.boxEndIndex=y.length},Xw=function(y,I){if(y===void 0&&(y=[]),I===void 0&&(I=Cet),this.data=y,this.length=this.data.length,this.compare=I,this.length>0)for(var U=(this.length>>1)-1;U>=0;U--)this._down(U)};Xw.prototype.push=function(y){this.data.push(y),this.length++,this._up(this.length-1)},Xw.prototype.pop=function(){if(this.length!==0){var y=this.data[0],I=this.data.pop();return this.length--,this.length>0&&(this.data[0]=I,this._down(0)),y}},Xw.prototype.peek=function(){return this.data[0]},Xw.prototype._up=function(y){for(var I=this,U=I.data,$=I.compare,ae=U[y];y>0;){var he=y-1>>1,Oe=U[he];if($(ae,Oe)>=0)break;U[y]=Oe,y=he}U[y]=ae},Xw.prototype._down=function(y){for(var I=this,U=I.data,$=I.compare,ae=this.length>>1,he=U[y];y<ae;){var Oe=(y<<1)+1,rt=U[Oe],gt=Oe+1;if(gt<this.length&&$(U[gt],rt)<0&&(Oe=gt,rt=U[gt]),$(rt,he)>=0)break;U[y]=rt,y=Oe}U[y]=he};function Cet(m,y){return m<y?-1:m>y?1:0}function Let(m,y,I){y===void 0&&(y=1),I===void 0&&(I=!1);for(var U=1/0,$=1/0,ae=-1/0,he=-1/0,Oe=m[0],rt=0;rt<Oe.length;rt++){var gt=Oe[rt];(!rt||gt.x<U)&&(U=gt.x),(!rt||gt.y<$)&&($=gt.y),(!rt||gt.x>ae)&&(ae=gt.x),(!rt||gt.y>he)&&(he=gt.y)}var Et=ae-U,or=he-$,_r=Math.min(Et,or),pr=_r/2,Fr=new Xw([],Pet);if(_r===0)return new u(U,$);for(var oi=U;oi<ae;oi+=_r)for(var Hi=$;Hi<he;Hi+=_r)Fr.push(new Zw(oi+pr,Hi+pr,pr,m));for(var Ai=Ret(m),bn=Fr.length;Fr.length;){var nn=Fr.pop();(nn.d>Ai.d||!Ai.d)&&(Ai=nn,I&&console.log("found best %d after %d probes",Math.round(1e4*nn.d)/1e4,bn)),!(nn.max-Ai.d<=y)&&(pr=nn.h/2,Fr.push(new Zw(nn.p.x-pr,nn.p.y-pr,pr,m)),Fr.push(new Zw(nn.p.x+pr,nn.p.y-pr,pr,m)),Fr.push(new Zw(nn.p.x-pr,nn.p.y+pr,pr,m)),Fr.push(new Zw(nn.p.x+pr,nn.p.y+pr,pr,m)),bn+=4)}return I&&(console.log("num probes: "+bn),console.log("best distance: "+Ai.d)),Ai.p}function Pet(m,y){return y.max-m.max}function Zw(m,y,I,U){this.p=new u(m,y),this.h=I,this.d=Iet(this.p,U),this.max=this.d+this.h*Math.SQRT2}function Iet(m,y){for(var I=!1,U=1/0,$=0;$<y.length;$++)for(var ae=y[$],he=0,Oe=ae.length,rt=Oe-1;he<Oe;rt=he++){var gt=ae[he],Et=ae[rt];gt.y>m.y!=Et.y>m.y&&m.x<(Et.x-gt.x)*(m.y-gt.y)/(Et.y-gt.y)+gt.x&&(I=!I),U=Math.min(U,cg(m,gt,Et))}return(I?1:-1)*Math.sqrt(U)}function Ret(m){for(var y=0,I=0,U=0,$=m[0],ae=0,he=$.length,Oe=he-1;ae<he;Oe=ae++){var rt=$[ae],gt=$[Oe],Et=rt.x*gt.y-gt.x*rt.y;I+=(rt.x+gt.x)*Et,U+=(rt.y+gt.y)*Et,y+=Et*3}return new Zw(I/y,U/y,0,m)}var Yw=7,fO=Number.POSITIVE_INFINITY;function VQ(m,y){function I($,ae){var he=0,Oe=0;ae<0&&(ae=0);var rt=ae/Math.sqrt(2);switch($){case"top-right":case"top-left":Oe=rt-Yw;break;case"bottom-right":case"bottom-left":Oe=-rt+Yw;break;case"bottom":Oe=-ae+Yw;break;case"top":Oe=ae-Yw;break}switch($){case"top-right":case"bottom-right":he=-rt;break;case"top-left":case"bottom-left":he=rt;break;case"left":he=ae;break;case"right":he=-ae;break}return[he,Oe]}function U($,ae,he){var Oe=0,rt=0;switch(ae=Math.abs(ae),he=Math.abs(he),$){case"top-right":case"top-left":case"top":rt=he-Yw;break;case"bottom-right":case"bottom-left":case"bottom":rt=-he+Yw;break}switch($){case"top-right":case"bottom-right":case"right":Oe=-ae;break;case"top-left":case"bottom-left":case"left":Oe=ae;break}return[Oe,rt]}return y[1]!==fO?U(m,y[0],y[1]):I(m,y[0])}function Det(m,y,I,U,$,ae,he){m.createArrays();var Oe=512*m.overscaling;m.tilePixelRatio=Ci/Oe,m.compareText={},m.iconsNeedLinear=!1;var rt=m.layers[0].layout,gt=m.layers[0]._unevaluatedLayout._values,Et={};if(m.textSizeData.kind==="composite"){var or=m.textSizeData,_r=or.minZoom,pr=or.maxZoom;Et.compositeTextSizes=[gt["text-size"].possiblyEvaluate(new Gn(_r),he),gt["text-size"].possiblyEvaluate(new Gn(pr),he)]}if(m.iconSizeData.kind==="composite"){var Fr=m.iconSizeData,oi=Fr.minZoom,Hi=Fr.maxZoom;Et.compositeIconSizes=[gt["icon-size"].possiblyEvaluate(new Gn(oi),he),gt["icon-size"].possiblyEvaluate(new Gn(Hi),he)]}Et.layoutTextSize=gt["text-size"].possiblyEvaluate(new Gn(m.zoom+1),he),Et.layoutIconSize=gt["icon-size"].possiblyEvaluate(new Gn(m.zoom+1),he),Et.textMaxSize=gt["text-size"].possiblyEvaluate(new Gn(18));for(var Ai=rt.get("text-line-height")*Tn,bn=rt.get("text-rotation-alignment")==="map"&&rt.get("symbol-placement")!=="point",nn=rt.get("text-keep-upright"),xn=rt.get("text-size"),Pn=function(){var ha=ga[Zn],eo=rt.get("text-font").evaluate(ha,{},he).join(","),za=xn.evaluate(ha,{},he),Za=Et.layoutTextSize.evaluate(ha,{},he),Ko=Et.layoutIconSize.evaluate(ha,{},he),to={horizontal:{},vertical:void 0},ao=ha.text,_s=[0,0];if(ao){var jo=ao.toString(),El=rt.get("text-letter-spacing").evaluate(ha,{},he)*Tn,Iu=gr(jo)?El:0,kl=rt.get("text-anchor").evaluate(ha,{},he),Cl=rt.get("text-variable-anchor");if(!Cl){var yl=rt.get("text-radial-offset").evaluate(ha,{},he);yl?_s=VQ(kl,[yl*Tn,fO]):_s=rt.get("text-offset").evaluate(ha,{},he).map(function(gy){return gy*Tn})}var Qu=bn?"center":rt.get("text-justify").evaluate(ha,{},he),gc=rt.get("symbol-placement"),Sf=gc==="point"?rt.get("text-max-width").evaluate(ha,{},he)*Tn:0,Ff=function(){m.allowVerticalPlacement&&jt(jo)&&(to.vertical=Hw(ao,y,I,$,eo,Sf,Ai,kl,"left",Iu,_s,uv.vertical,!0,gc,Za,za))};if(!bn&&Cl){for(var Yh=Qu==="auto"?Cl.map(function(gy){return hO(gy)}):[Qu],ch=!1,Xd=0;Xd<Yh.length;Xd++){var Zd=Yh[Xd];if(!to.horizontal[Zd])if(ch)to.horizontal[Zd]=to.horizontal[0];else{var Kh=Hw(ao,y,I,$,eo,Sf,Ai,"center",Zd,Iu,_s,uv.horizontal,!1,gc,Za,za);Kh&&(to.horizontal[Zd]=Kh,ch=Kh.positionedLines.length===1)}}Ff()}else{Qu==="auto"&&(Qu=hO(kl));var fv=Hw(ao,y,I,$,eo,Sf,Ai,kl,Qu,Iu,_s,uv.horizontal,!1,gc,Za,za);fv&&(to.horizontal[Qu]=fv),Ff(),jt(jo)&&bn&&nn&&(to.vertical=Hw(ao,y,I,$,eo,Sf,Ai,kl,Qu,Iu,_s,uv.vertical,!1,gc,Za,za))}}var hv=void 0,Ch=!1;if(ha.icon&&ha.icon.name){var dv=U[ha.icon.name];dv&&(hv=rp($[ha.icon.name],rt.get("icon-offset").evaluate(ha,{},he),rt.get("icon-anchor").evaluate(ha,{},he)),Ch=dv.sdf,m.sdfIcons===void 0?m.sdfIcons=dv.sdf:m.sdfIcons!==dv.sdf&&te("Style sheet warning: Cannot mix SDF and non-SDF icons in one buffer"),(dv.pixelRatio!==m.pixelRatio||rt.get("icon-rotate").constantOr(1)!==0)&&(m.iconsNeedLinear=!0))}var xp=HQ(to.horizontal)||to.vertical;m.iconsInText=xp?xp.iconsInText:!1,(xp||hv)&&Fet(m,ha,to,hv,U,Et,Za,Ko,_s,Ch,he)},Zn=0,ga=m.features;Zn<ga.length;Zn+=1)Pn();ae&&m.generateCollisionDebugBuffers()}function hO(m){switch(m){case"right":case"top-right":case"bottom-right":return"right";case"left":case"top-left":case"bottom-left":return"left"}return"center"}function Fet(m,y,I,U,$,ae,he,Oe,rt,gt,Et){var or=ae.textMaxSize.evaluate(y,{});or===void 0&&(or=he);var _r=m.layers[0].layout,pr=_r.get("icon-offset").evaluate(y,{},Et),Fr=HQ(I.horizontal),oi=24,Hi=he/oi,Ai=m.tilePixelRatio*Hi,bn=m.tilePixelRatio*or/oi,nn=m.tilePixelRatio*Oe,xn=m.tilePixelRatio*_r.get("symbol-spacing"),Pn=_r.get("text-padding")*m.tilePixelRatio,Zn=_r.get("icon-padding")*m.tilePixelRatio,ga=_r.get("text-max-angle")/180*Math.PI,ha=_r.get("text-rotation-alignment")==="map"&&_r.get("symbol-placement")!=="point",eo=_r.get("icon-rotation-alignment")==="map"&&_r.get("symbol-placement")!=="point",za=_r.get("symbol-placement"),Za=xn/2,Ko=_r.get("icon-text-fit"),to;U&&Ko!=="none"&&(m.allowVerticalPlacement&&I.vertical&&(to=dm(U,I.vertical,Ko,_r.get("icon-text-fit-padding"),pr,Hi)),Fr&&(U=dm(U,Fr,Ko,_r.get("icon-text-fit-padding"),pr,Hi)));var ao=function(e3,m0){m0.x<0||m0.x>=Ci||m0.y<0||m0.y>=Ci||zet(m,m0,e3,I,U,$,to,m.layers[0],m.collisionBoxArray,y.index,y.sourceLayerIndex,m.index,Ai,Pn,ha,rt,nn,Zn,eo,pr,y,ae,gt,Et,he)};if(za==="line")for(var _s=0,jo=BQ(y.geometry,0,0,Ci,Ci);_s<jo.length;_s+=1)for(var El=jo[_s],Iu=Eet(El,xn,ga,I.vertical||Fr,U,oi,bn,m.overscaling,Ci),kl=0,Cl=Iu;kl<Cl.length;kl+=1){var yl=Cl[kl],Qu=Fr;(!Qu||!Oet(m,Qu.text,Za,yl))&&ao(El,yl)}else if(za==="line-center")for(var gc=0,Sf=y.geometry;gc<Sf.length;gc+=1){var Ff=Sf[gc];if(Ff.length>1){var Yh=Met(Ff,ga,I.vertical||Fr,U,oi,bn);Yh&&ao(Ff,Yh)}}else if(y.type==="Polygon")for(var ch=0,Xd=Ow(y.geometry,0);ch<Xd.length;ch+=1){var Zd=Xd[ch],Kh=Let(Zd,16);ao(Zd[0],new Wd(Kh.x,Kh.y,0))}else if(y.type==="LineString")for(var fv=0,hv=y.geometry;fv<hv.length;fv+=1){var Ch=hv[fv];ao(Ch,new Wd(Ch[0].x,Ch[0].y,0))}else if(y.type==="Point")for(var dv=0,xp=y.geometry;dv<xp.length;dv+=1)for(var gy=xp[dv],F1=0,z1=gy;F1<z1.length;F1+=1){var O1=z1[F1];ao([O1],new Wd(O1.x,O1.y,0))}}var MS=255,R1=MS*Cd;function GQ(m,y,I,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr){var oi=ket(y,I,Oe,$,ae,he,U,m.allowVerticalPlacement),Hi=m.textSizeData,Ai=null;Hi.kind==="source"?(Ai=[Cd*$.layout.get("text-size").evaluate(he,{})],Ai[0]>R1&&te(m.layerIds[0]+': Value for "text-size" is >= '+MS+'. Reduce your "text-size".')):Hi.kind==="composite"&&(Ai=[Cd*pr.compositeTextSizes[0].evaluate(he,{},Fr),Cd*pr.compositeTextSizes[1].evaluate(he,{},Fr)],(Ai[0]>R1||Ai[1]>R1)&&te(m.layerIds[0]+': Value for "text-size" is >= '+MS+'. Reduce your "text-size".')),m.addSymbols(m.text,oi,Ai,Oe,ae,he,gt,y,rt.lineStartIndex,rt.lineLength,_r,Fr);for(var bn=0,nn=Et;bn<nn.length;bn+=1){var xn=nn[bn];or[xn]=m.text.placedSymbolArray.length-1}return oi.length*4}function HQ(m){for(var y in m)return m[y];return null}function zet(m,y,I,U,$,ae,he,Oe,rt,gt,Et,or,_r,pr,Fr,oi,Hi,Ai,bn,nn,xn,Pn,Zn,ga,ha){var eo,za=m.addToLineVertexArray(y,I),Za,Ko,to,ao,_s=0,jo=0,El=0,Iu=0,kl=-1,Cl=-1,yl={},Qu=K(""),gc=0,Sf=0;if(Oe._unevaluatedLayout.getValue("text-radial-offset")===void 0?(eo=Oe.layout.get("text-offset").evaluate(xn,{},ga).map(function(CS){return CS*Tn}),gc=eo[0],Sf=eo[1]):(gc=Oe.layout.get("text-radial-offset").evaluate(xn,{},ga)*Tn,Sf=fO),m.allowVerticalPlacement&&U.vertical){var Ff=Oe.layout.get("text-rotate").evaluate(xn,{},ga),Yh=Ff+90,ch=U.vertical;to=new QC(rt,y,gt,Et,or,ch,_r,pr,Fr,Yh),he&&(ao=new QC(rt,y,gt,Et,or,he,Hi,Ai,Fr,Yh))}if($){var Xd=Oe.layout.get("icon-rotate").evaluate(xn,{}),Zd=Oe.layout.get("icon-text-fit")!=="none",Kh=NQ($,Xd,Zn,Zd),fv=he?NQ(he,Xd,Zn,Zd):void 0;Ko=new QC(rt,y,gt,Et,or,$,Hi,Ai,!1,Xd),_s=Kh.length*4;var hv=m.iconSizeData,Ch=null;hv.kind==="source"?(Ch=[Cd*Oe.layout.get("icon-size").evaluate(xn,{})],Ch[0]>R1&&te(m.layerIds[0]+': Value for "icon-size" is >= '+MS+'. Reduce your "icon-size".')):hv.kind==="composite"&&(Ch=[Cd*Pn.compositeIconSizes[0].evaluate(xn,{},ga),Cd*Pn.compositeIconSizes[1].evaluate(xn,{},ga)],(Ch[0]>R1||Ch[1]>R1)&&te(m.layerIds[0]+': Value for "icon-size" is >= '+MS+'. Reduce your "icon-size".')),m.addSymbols(m.icon,Kh,Ch,nn,bn,xn,!1,y,za.lineStartIndex,za.lineLength,-1,ga),kl=m.icon.placedSymbolArray.length-1,fv&&(jo=fv.length*4,m.addSymbols(m.icon,fv,Ch,nn,bn,xn,uv.vertical,y,za.lineStartIndex,za.lineLength,-1,ga),Cl=m.icon.placedSymbolArray.length-1)}for(var dv in U.horizontal){var xp=U.horizontal[dv];if(!Za){Qu=K(xp.text);var gy=Oe.layout.get("text-rotate").evaluate(xn,{},ga);Za=new QC(rt,y,gt,Et,or,xp,_r,pr,Fr,gy)}var F1=xp.positionedLines.length===1;if(El+=GQ(m,y,xp,ae,Oe,Fr,xn,oi,za,U.vertical?uv.horizontal:uv.horizontalOnly,F1?Object.keys(U.horizontal):[dv],yl,kl,Pn,ga),F1)break}U.vertical&&(Iu+=GQ(m,y,U.vertical,ae,Oe,Fr,xn,oi,za,uv.vertical,["vertical"],yl,Cl,Pn,ga));var z1=Za?Za.boxStartIndex:m.collisionBoxArray.length,O1=Za?Za.boxEndIndex:m.collisionBoxArray.length,e3=to?to.boxStartIndex:m.collisionBoxArray.length,m0=to?to.boxEndIndex:m.collisionBoxArray.length,n6=Ko?Ko.boxStartIndex:m.collisionBoxArray.length,yO=Ko?Ko.boxEndIndex:m.collisionBoxArray.length,a6=ao?ao.boxStartIndex:m.collisionBoxArray.length,_O=ao?ao.boxEndIndex:m.collisionBoxArray.length,Hp=-1,t3=function(CS,oee){return CS&&CS.circleDiameter?Math.max(CS.circleDiameter,oee):oee};Hp=t3(Za,Hp),Hp=t3(to,Hp),Hp=t3(Ko,Hp),Hp=t3(ao,Hp);var o6=Hp>-1?1:0;o6&&(Hp*=ha/Tn),m.glyphOffsetArray.length>=Pu.MAX_GLYPHS&&te("Too many glyphs being rendered in a tile. See https://github.com/mapbox/mapbox-gl-js/issues/2907"),xn.sortKey!==void 0&&m.addToSortKeyRanges(m.symbolInstances.length,xn.sortKey),m.symbolInstances.emplaceBack(y.x,y.y,yl.right>=0?yl.right:-1,yl.center>=0?yl.center:-1,yl.left>=0?yl.left:-1,yl.vertical||-1,kl,Cl,Qu,z1,O1,e3,m0,n6,yO,a6,_O,gt,El,Iu,_s,jo,o6,0,_r,gc,Sf,Hp)}function Oet(m,y,I,U){var $=m.compareText;if(!(y in $))$[y]=[];else for(var ae=$[y],he=ae.length-1;he>=0;he--)if(U.dist(ae[he])<I)return!0;return $[y].push(U),!1}var qet=pg.VectorTileFeature.types,Bet=[{name:"a_fade_opacity",components:1,type:"Uint8",offset:0}];function e6(m,y,I,U,$,ae,he,Oe,rt,gt,Et,or,_r){var pr=Oe?Math.min(R1,Math.round(Oe[0])):0,Fr=Oe?Math.min(R1,Math.round(Oe[1])):0;m.emplaceBack(y,I,Math.round(U*32),Math.round($*32),ae,he,(pr<<1)+(rt?1:0),Fr,gt*16,Et*16,or*256,_r*256)}function dO(m,y,I){m.emplaceBack(y.x,y.y,I),m.emplaceBack(y.x,y.y,I),m.emplaceBack(y.x,y.y,I),m.emplaceBack(y.x,y.y,I)}function Net(m){for(var y=0,I=m.sections;y<I.length;y+=1){var U=I[y];if(Ii(U.text))return!0}return!1}var Kw=function(y){this.layoutVertexArray=new Bn,this.indexArray=new pn,this.programConfigurations=y,this.segments=new io,this.dynamicLayoutVertexArray=new hi,this.opacityVertexArray=new li,this.placedSymbolArray=new zo};Kw.prototype.isEmpty=function(){return this.layoutVertexArray.length===0&&this.indexArray.length===0&&this.dynamicLayoutVertexArray.length===0&&this.opacityVertexArray.length===0},Kw.prototype.upload=function(y,I,U,$){this.isEmpty()||(U&&(this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,re.members),this.indexBuffer=y.createIndexBuffer(this.indexArray,I),this.dynamicLayoutVertexBuffer=y.createVertexBuffer(this.dynamicLayoutVertexArray,fe.members,!0),this.opacityVertexBuffer=y.createVertexBuffer(this.opacityVertexArray,Bet,!0),this.opacityVertexBuffer.itemSize=1),(U||$)&&this.programConfigurations.upload(y))},Kw.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy(),this.dynamicLayoutVertexBuffer.destroy(),this.opacityVertexBuffer.destroy())},X("SymbolBuffers",Kw);var ES=function(y,I,U){this.layoutVertexArray=new y,this.layoutAttributes=I,this.indexArray=new U,this.segments=new io,this.collisionVertexArray=new Ni};ES.prototype.upload=function(y){this.layoutVertexBuffer=y.createVertexBuffer(this.layoutVertexArray,this.layoutAttributes),this.indexBuffer=y.createIndexBuffer(this.indexArray),this.collisionVertexBuffer=y.createVertexBuffer(this.collisionVertexArray,ze.members,!0)},ES.prototype.destroy=function(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.segments.destroy(),this.collisionVertexBuffer.destroy())},X("CollisionBuffers",ES);var Pu=function(y){this.collisionBoxArray=y.collisionBoxArray,this.zoom=y.zoom,this.overscaling=y.overscaling,this.layers=y.layers,this.layerIds=this.layers.map(function(rt){return rt.id}),this.index=y.index,this.pixelRatio=y.pixelRatio,this.sourceLayerIndex=y.sourceLayerIndex,this.hasPattern=!1,this.hasRTLText=!1,this.sortKeyRanges=[],this.collisionCircleArray=[],this.placementInvProjMatrix=sy([]),this.placementViewportMatrix=sy([]);var I=this.layers[0],U=I._unevaluatedLayout._values;this.textSizeData=_p(this.zoom,U["text-size"]),this.iconSizeData=_p(this.zoom,U["icon-size"]);var $=this.layers[0].layout,ae=$.get("symbol-sort-key"),he=$.get("symbol-z-order");this.canOverlap=$.get("text-allow-overlap")||$.get("icon-allow-overlap")||$.get("text-ignore-placement")||$.get("icon-ignore-placement"),this.sortFeaturesByKey=he!=="viewport-y"&&ae.constantOr(1)!==void 0;var Oe=he==="viewport-y"||he==="auto"&&!this.sortFeaturesByKey;this.sortFeaturesByY=Oe&&this.canOverlap,$.get("symbol-placement")==="point"&&(this.writingModes=$.get("text-writing-mode").map(function(rt){return uv[rt]})),this.stateDependentLayerIds=this.layers.filter(function(rt){return rt.isStateDependent()}).map(function(rt){return rt.id}),this.sourceID=y.sourceID};Pu.prototype.createArrays=function(){this.text=new Kw(new fi(this.layers,this.zoom,function(y){return/^text/.test(y)})),this.icon=new Kw(new fi(this.layers,this.zoom,function(y){return/^icon/.test(y)})),this.glyphOffsetArray=new Vl,this.lineVertexArray=new ss,this.symbolInstances=new al},Pu.prototype.calculateGlyphDependencies=function(y,I,U,$,ae){for(var he=0;he<y.length;he++)if(I[y.charCodeAt(he)]=!0,(U||$)&&ae){var Oe=zi[y.charAt(he)];Oe&&(I[Oe.charCodeAt(0)]=!0)}},Pu.prototype.populate=function(y,I,U){var $=this.layers[0],ae=$.layout,he=ae.get("text-font"),Oe=ae.get("text-field"),rt=ae.get("icon-image"),gt=(Oe.value.kind!=="constant"||Oe.value.value instanceof Jl&&!Oe.value.value.isEmpty()||Oe.value.value.toString().length>0)&&(he.value.kind!=="constant"||he.value.value.length>0),Et=rt.value.kind!=="constant"||!!rt.value.value||Object.keys(rt.parameters).length>0,or=ae.get("symbol-sort-key");if(this.features=[],!(!gt&&!Et)){for(var _r=I.iconDependencies,pr=I.glyphDependencies,Fr=I.availableImages,oi=new Gn(this.zoom),Hi=0,Ai=y;Hi<Ai.length;Hi+=1){var bn=Ai[Hi],nn=bn.feature,xn=bn.id,Pn=bn.index,Zn=bn.sourceLayerIndex,ga=$._featureFilter.needGeometry,ha=Ja(nn,ga);if($._featureFilter.filter(oi,ha,U)){ga||(ha.geometry=zn(nn));var eo=void 0;if(gt){var za=$.getValueAndResolveTokens("text-field",ha,U,Fr),Za=Jl.factory(za);Net(Za)&&(this.hasRTLText=!0),(!this.hasRTLText||yo()==="unavailable"||this.hasRTLText&&Ms.isParsed())&&(eo=yi(Za,$,ha))}var Ko=void 0;if(Et){var to=$.getValueAndResolveTokens("icon-image",ha,U,Fr);to instanceof fl?Ko=to:Ko=fl.fromString(to)}if(!(!eo&&!Ko)){var ao=this.sortFeaturesByKey?or.evaluate(ha,{},U):void 0,_s={id:xn,text:eo,icon:Ko,index:Pn,sourceLayerIndex:Zn,geometry:ha.geometry,properties:nn.properties,type:qet[nn.type],sortKey:ao};if(this.features.push(_s),Ko&&(_r[Ko.name]=!0),eo){var jo=he.evaluate(ha,{},U).join(","),El=ae.get("text-rotation-alignment")==="map"&&ae.get("symbol-placement")!=="point";this.allowVerticalPlacement=this.writingModes&&this.writingModes.indexOf(uv.vertical)>=0;for(var Iu=0,kl=eo.sections;Iu<kl.length;Iu+=1){var Cl=kl[Iu];if(Cl.image)_r[Cl.image.name]=!0;else{var yl=jt(eo.toString()),Qu=Cl.fontStack||jo,gc=pr[Qu]=pr[Qu]||{};this.calculateGlyphDependencies(Cl.text,gc,El,this.allowVerticalPlacement,yl)}}}}}}ae.get("symbol-placement")==="line"&&(this.features=vn(this.features)),this.sortFeaturesByKey&&this.features.sort(function(Sf,Ff){return Sf.sortKey-Ff.sortKey})}},Pu.prototype.update=function(y,I,U){this.stateDependentLayers.length&&(this.text.programConfigurations.updatePaintArrays(y,I,this.layers,U),this.icon.programConfigurations.updatePaintArrays(y,I,this.layers,U))},Pu.prototype.isEmpty=function(){return this.symbolInstances.length===0&&!this.hasRTLText},Pu.prototype.uploadPending=function(){return!this.uploaded||this.text.programConfigurations.needsUpload||this.icon.programConfigurations.needsUpload},Pu.prototype.upload=function(y){!this.uploaded&&this.hasDebugData()&&(this.textCollisionBox.upload(y),this.iconCollisionBox.upload(y)),this.text.upload(y,this.sortFeaturesByY,!this.uploaded,this.text.programConfigurations.needsUpload),this.icon.upload(y,this.sortFeaturesByY,!this.uploaded,this.icon.programConfigurations.needsUpload),this.uploaded=!0},Pu.prototype.destroyDebugData=function(){this.textCollisionBox.destroy(),this.iconCollisionBox.destroy()},Pu.prototype.destroy=function(){this.text.destroy(),this.icon.destroy(),this.hasDebugData()&&this.destroyDebugData()},Pu.prototype.addToLineVertexArray=function(y,I){var U=this.lineVertexArray.length;if(y.segment!==void 0){for(var $=y.dist(I[y.segment+1]),ae=y.dist(I[y.segment]),he={},Oe=y.segment+1;Oe<I.length;Oe++)he[Oe]={x:I[Oe].x,y:I[Oe].y,tileUnitDistanceFromAnchor:$},Oe<I.length-1&&($+=I[Oe+1].dist(I[Oe]));for(var rt=y.segment||0;rt>=0;rt--)he[rt]={x:I[rt].x,y:I[rt].y,tileUnitDistanceFromAnchor:ae},rt>0&&(ae+=I[rt-1].dist(I[rt]));for(var gt=0;gt<I.length;gt++){var Et=he[gt];this.lineVertexArray.emplaceBack(Et.x,Et.y,Et.tileUnitDistanceFromAnchor)}}return{lineStartIndex:U,lineLength:this.lineVertexArray.length-U}},Pu.prototype.addSymbols=function(y,I,U,$,ae,he,Oe,rt,gt,Et,or,_r){for(var pr=y.indexArray,Fr=y.layoutVertexArray,oi=y.segments.prepareSegment(4*I.length,Fr,pr,this.canOverlap?he.sortKey:void 0),Hi=this.glyphOffsetArray.length,Ai=oi.vertexLength,bn=this.allowVerticalPlacement&&Oe===uv.vertical?Math.PI/2:0,nn=he.text&&he.text.sections,xn=0;xn<I.length;xn++){var Pn=I[xn],Zn=Pn.tl,ga=Pn.tr,ha=Pn.bl,eo=Pn.br,za=Pn.tex,Za=Pn.pixelOffsetTL,Ko=Pn.pixelOffsetBR,to=Pn.minFontScaleX,ao=Pn.minFontScaleY,_s=Pn.glyphOffset,jo=Pn.isSDF,El=Pn.sectionIndex,Iu=oi.vertexLength,kl=_s[1];e6(Fr,rt.x,rt.y,Zn.x,kl+Zn.y,za.x,za.y,U,jo,Za.x,Za.y,to,ao),e6(Fr,rt.x,rt.y,ga.x,kl+ga.y,za.x+za.w,za.y,U,jo,Ko.x,Za.y,to,ao),e6(Fr,rt.x,rt.y,ha.x,kl+ha.y,za.x,za.y+za.h,U,jo,Za.x,Ko.y,to,ao),e6(Fr,rt.x,rt.y,eo.x,kl+eo.y,za.x+za.w,za.y+za.h,U,jo,Ko.x,Ko.y,to,ao),dO(y.dynamicLayoutVertexArray,rt,bn),pr.emplaceBack(Iu,Iu+1,Iu+2),pr.emplaceBack(Iu+1,Iu+2,Iu+3),oi.vertexLength+=4,oi.primitiveLength+=2,this.glyphOffsetArray.emplaceBack(_s[0]),(xn===I.length-1||El!==I[xn+1].sectionIndex)&&y.programConfigurations.populatePaintArrays(Fr.length,he,he.index,{},_r,nn&&nn[El])}y.placedSymbolArray.emplaceBack(rt.x,rt.y,Hi,this.glyphOffsetArray.length-Hi,Ai,gt,Et,rt.segment,U?U[0]:0,U?U[1]:0,$[0],$[1],Oe,0,!1,0,or)},Pu.prototype._addCollisionDebugVertex=function(y,I,U,$,ae,he){return I.emplaceBack(0,0),y.emplaceBack(U.x,U.y,$,ae,Math.round(he.x),Math.round(he.y))},Pu.prototype.addCollisionDebugVertices=function(y,I,U,$,ae,he,Oe){var rt=ae.segments.prepareSegment(4,ae.layoutVertexArray,ae.indexArray),gt=rt.vertexLength,Et=ae.layoutVertexArray,or=ae.collisionVertexArray,_r=Oe.anchorX,pr=Oe.anchorY;this._addCollisionDebugVertex(Et,or,he,_r,pr,new u(y,I)),this._addCollisionDebugVertex(Et,or,he,_r,pr,new u(U,I)),this._addCollisionDebugVertex(Et,or,he,_r,pr,new u(U,$)),this._addCollisionDebugVertex(Et,or,he,_r,pr,new u(y,$)),rt.vertexLength+=4;var Fr=ae.indexArray;Fr.emplaceBack(gt,gt+1),Fr.emplaceBack(gt+1,gt+2),Fr.emplaceBack(gt+2,gt+3),Fr.emplaceBack(gt+3,gt),rt.primitiveLength+=4},Pu.prototype.addDebugCollisionBoxes=function(y,I,U,$){for(var ae=y;ae<I;ae++){var he=this.collisionBoxArray.get(ae),Oe=he.x1,rt=he.y1,gt=he.x2,Et=he.y2;this.addCollisionDebugVertices(Oe,rt,gt,Et,$?this.textCollisionBox:this.iconCollisionBox,he.anchorPoint,U)}},Pu.prototype.generateCollisionDebugBuffers=function(){this.hasDebugData()&&this.destroyDebugData(),this.textCollisionBox=new ES(Ji,ct.members,oa),this.iconCollisionBox=new ES(Ji,ct.members,oa);for(var y=0;y<this.symbolInstances.length;y++){var I=this.symbolInstances.get(y);this.addDebugCollisionBoxes(I.textBoxStartIndex,I.textBoxEndIndex,I,!0),this.addDebugCollisionBoxes(I.verticalTextBoxStartIndex,I.verticalTextBoxEndIndex,I,!0),this.addDebugCollisionBoxes(I.iconBoxStartIndex,I.iconBoxEndIndex,I,!1),this.addDebugCollisionBoxes(I.verticalIconBoxStartIndex,I.verticalIconBoxEndIndex,I,!1)}},Pu.prototype._deserializeCollisionBoxesForSymbol=function(y,I,U,$,ae,he,Oe,rt,gt){for(var Et={},or=I;or<U;or++){var _r=y.get(or);Et.textBox={x1:_r.x1,y1:_r.y1,x2:_r.x2,y2:_r.y2,anchorPointX:_r.anchorPointX,anchorPointY:_r.anchorPointY},Et.textFeatureIndex=_r.featureIndex;break}for(var pr=$;pr<ae;pr++){var Fr=y.get(pr);Et.verticalTextBox={x1:Fr.x1,y1:Fr.y1,x2:Fr.x2,y2:Fr.y2,anchorPointX:Fr.anchorPointX,anchorPointY:Fr.anchorPointY},Et.verticalTextFeatureIndex=Fr.featureIndex;break}for(var oi=he;oi<Oe;oi++){var Hi=y.get(oi);Et.iconBox={x1:Hi.x1,y1:Hi.y1,x2:Hi.x2,y2:Hi.y2,anchorPointX:Hi.anchorPointX,anchorPointY:Hi.anchorPointY},Et.iconFeatureIndex=Hi.featureIndex;break}for(var Ai=rt;Ai<gt;Ai++){var bn=y.get(Ai);Et.verticalIconBox={x1:bn.x1,y1:bn.y1,x2:bn.x2,y2:bn.y2,anchorPointX:bn.anchorPointX,anchorPointY:bn.anchorPointY},Et.verticalIconFeatureIndex=bn.featureIndex;break}return Et},Pu.prototype.deserializeCollisionBoxes=function(y){this.collisionArrays=[];for(var I=0;I<this.symbolInstances.length;I++){var U=this.symbolInstances.get(I);this.collisionArrays.push(this._deserializeCollisionBoxesForSymbol(y,U.textBoxStartIndex,U.textBoxEndIndex,U.verticalTextBoxStartIndex,U.verticalTextBoxEndIndex,U.iconBoxStartIndex,U.iconBoxEndIndex,U.verticalIconBoxStartIndex,U.verticalIconBoxEndIndex))}},Pu.prototype.hasTextData=function(){return this.text.segments.get().length>0},Pu.prototype.hasIconData=function(){return this.icon.segments.get().length>0},Pu.prototype.hasDebugData=function(){return this.textCollisionBox&&this.iconCollisionBox},Pu.prototype.hasTextCollisionBoxData=function(){return this.hasDebugData()&&this.textCollisionBox.segments.get().length>0},Pu.prototype.hasIconCollisionBoxData=function(){return this.hasDebugData()&&this.iconCollisionBox.segments.get().length>0},Pu.prototype.addIndicesForPlacedSymbol=function(y,I){for(var U=y.placedSymbolArray.get(I),$=U.vertexStartIndex+U.numGlyphs*4,ae=U.vertexStartIndex;ae<$;ae+=4)y.indexArray.emplaceBack(ae,ae+1,ae+2),y.indexArray.emplaceBack(ae+1,ae+2,ae+3)},Pu.prototype.getSortedSymbolIndexes=function(y){if(this.sortedAngle===y&&this.symbolInstanceIndexes!==void 0)return this.symbolInstanceIndexes;for(var I=Math.sin(y),U=Math.cos(y),$=[],ae=[],he=[],Oe=0;Oe<this.symbolInstances.length;++Oe){he.push(Oe);var rt=this.symbolInstances.get(Oe);$.push(Math.round(I*rt.anchorX+U*rt.anchorY)|0),ae.push(rt.featureIndex)}return he.sort(function(gt,Et){return $[gt]-$[Et]||ae[Et]-ae[gt]}),he},Pu.prototype.addToSortKeyRanges=function(y,I){var U=this.sortKeyRanges[this.sortKeyRanges.length-1];U&&U.sortKey===I?U.symbolInstanceEnd=y+1:this.sortKeyRanges.push({sortKey:I,symbolInstanceStart:y,symbolInstanceEnd:y+1})},Pu.prototype.sortFeatures=function(y){var I=this;if(this.sortFeaturesByY&&this.sortedAngle!==y&&!(this.text.segments.get().length>1||this.icon.segments.get().length>1)){this.symbolInstanceIndexes=this.getSortedSymbolIndexes(y),this.sortedAngle=y,this.text.indexArray.clear(),this.icon.indexArray.clear(),this.featureSortOrder=[];for(var U=0,$=this.symbolInstanceIndexes;U<$.length;U+=1){var ae=$[U],he=this.symbolInstances.get(ae);this.featureSortOrder.push(he.featureIndex),[he.rightJustifiedTextSymbolIndex,he.centerJustifiedTextSymbolIndex,he.leftJustifiedTextSymbolIndex].forEach(function(Oe,rt,gt){Oe>=0&&gt.indexOf(Oe)===rt&&I.addIndicesForPlacedSymbol(I.text,Oe)}),he.verticalPlacedTextSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.text,he.verticalPlacedTextSymbolIndex),he.placedIconSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.icon,he.placedIconSymbolIndex),he.verticalPlacedIconSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.icon,he.verticalPlacedIconSymbolIndex)}this.text.indexBuffer&&this.text.indexBuffer.updateData(this.text.indexArray),this.icon.indexBuffer&&this.icon.indexBuffer.updateData(this.icon.indexArray)}},X("SymbolBucket",Pu,{omit:["layers","collisionBoxArray","features","compareText"]}),Pu.MAX_GLYPHS=65535,Pu.addDynamicAttributes=dO;function Uet(m,y){return y.replace(/{([^{}]+)}/g,function(I,U){return U in m?String(m[U]):""})}var Vet=new Nr({"symbol-placement":new Me(Rn.layout_symbol["symbol-placement"]),"symbol-spacing":new Me(Rn.layout_symbol["symbol-spacing"]),"symbol-avoid-edges":new Me(Rn.layout_symbol["symbol-avoid-edges"]),"symbol-sort-key":new bt(Rn.layout_symbol["symbol-sort-key"]),"symbol-z-order":new Me(Rn.layout_symbol["symbol-z-order"]),"icon-allow-overlap":new Me(Rn.layout_symbol["icon-allow-overlap"]),"icon-ignore-placement":new Me(Rn.layout_symbol["icon-ignore-placement"]),"icon-optional":new Me(Rn.layout_symbol["icon-optional"]),"icon-rotation-alignment":new Me(Rn.layout_symbol["icon-rotation-alignment"]),"icon-size":new bt(Rn.layout_symbol["icon-size"]),"icon-text-fit":new Me(Rn.layout_symbol["icon-text-fit"]),"icon-text-fit-padding":new Me(Rn.layout_symbol["icon-text-fit-padding"]),"icon-image":new bt(Rn.layout_symbol["icon-image"]),"icon-rotate":new bt(Rn.layout_symbol["icon-rotate"]),"icon-padding":new Me(Rn.layout_symbol["icon-padding"]),"icon-keep-upright":new Me(Rn.layout_symbol["icon-keep-upright"]),"icon-offset":new bt(Rn.layout_symbol["icon-offset"]),"icon-anchor":new bt(Rn.layout_symbol["icon-anchor"]),"icon-pitch-alignment":new Me(Rn.layout_symbol["icon-pitch-alignment"]),"text-pitch-alignment":new Me(Rn.layout_symbol["text-pitch-alignment"]),"text-rotation-alignment":new Me(Rn.layout_symbol["text-rotation-alignment"]),"text-field":new bt(Rn.layout_symbol["text-field"]),"text-font":new bt(Rn.layout_symbol["text-font"]),"text-size":new bt(Rn.layout_symbol["text-size"]),"text-max-width":new bt(Rn.layout_symbol["text-max-width"]),"text-line-height":new Me(Rn.layout_symbol["text-line-height"]),"text-letter-spacing":new bt(Rn.layout_symbol["text-letter-spacing"]),"text-justify":new bt(Rn.layout_symbol["text-justify"]),"text-radial-offset":new bt(Rn.layout_symbol["text-radial-offset"]),"text-variable-anchor":new Me(Rn.layout_symbol["text-variable-anchor"]),"text-anchor":new bt(Rn.layout_symbol["text-anchor"]),"text-max-angle":new Me(Rn.layout_symbol["text-max-angle"]),"text-writing-mode":new Me(Rn.layout_symbol["text-writing-mode"]),"text-rotate":new bt(Rn.layout_symbol["text-rotate"]),"text-padding":new Me(Rn.layout_symbol["text-padding"]),"text-keep-upright":new Me(Rn.layout_symbol["text-keep-upright"]),"text-transform":new bt(Rn.layout_symbol["text-transform"]),"text-offset":new bt(Rn.layout_symbol["text-offset"]),"text-allow-overlap":new Me(Rn.layout_symbol["text-allow-overlap"]),"text-ignore-placement":new Me(Rn.layout_symbol["text-ignore-placement"]),"text-optional":new Me(Rn.layout_symbol["text-optional"])}),Get=new Nr({"icon-opacity":new bt(Rn.paint_symbol["icon-opacity"]),"icon-color":new bt(Rn.paint_symbol["icon-color"]),"icon-halo-color":new bt(Rn.paint_symbol["icon-halo-color"]),"icon-halo-width":new bt(Rn.paint_symbol["icon-halo-width"]),"icon-halo-blur":new bt(Rn.paint_symbol["icon-halo-blur"]),"icon-translate":new Me(Rn.paint_symbol["icon-translate"]),"icon-translate-anchor":new Me(Rn.paint_symbol["icon-translate-anchor"]),"text-opacity":new bt(Rn.paint_symbol["text-opacity"]),"text-color":new bt(Rn.paint_symbol["text-color"],{runtimeType:ql,getOverride:function(m){return m.textColor},hasOverride:function(m){return!!m.textColor}}),"text-halo-color":new bt(Rn.paint_symbol["text-halo-color"]),"text-halo-width":new bt(Rn.paint_symbol["text-halo-width"]),"text-halo-blur":new bt(Rn.paint_symbol["text-halo-blur"]),"text-translate":new Me(Rn.paint_symbol["text-translate"]),"text-translate-anchor":new Me(Rn.paint_symbol["text-translate-anchor"])}),vO={paint:Get,layout:Vet},Jw=function(y){this.type=y.property.overrides?y.property.overrides.runtimeType:ac,this.defaultValue=y};Jw.prototype.evaluate=function(y){if(y.formattedSection){var I=this.defaultValue.property.overrides;if(I&&I.hasOverride(y.formattedSection))return I.getOverride(y.formattedSection)}return y.feature&&y.featureState?this.defaultValue.evaluate(y.feature,y.featureState):this.defaultValue.property.specification.default},Jw.prototype.eachChild=function(y){if(!this.defaultValue.isConstant()){var I=this.defaultValue.value;y(I._styleExpression.expression)}},Jw.prototype.outputDefined=function(){return!1},Jw.prototype.serialize=function(){return null},X("FormatSectionOverride",Jw,{omit:["defaultValue"]});var Het=function(m){function y(I){m.call(this,I,vO)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.recalculate=function(U,$){if(m.prototype.recalculate.call(this,U,$),this.layout.get("icon-rotation-alignment")==="auto"&&(this.layout.get("symbol-placement")!=="point"?this.layout._values["icon-rotation-alignment"]="map":this.layout._values["icon-rotation-alignment"]="viewport"),this.layout.get("text-rotation-alignment")==="auto"&&(this.layout.get("symbol-placement")!=="point"?this.layout._values["text-rotation-alignment"]="map":this.layout._values["text-rotation-alignment"]="viewport"),this.layout.get("text-pitch-alignment")==="auto"&&(this.layout._values["text-pitch-alignment"]=this.layout.get("text-rotation-alignment")),this.layout.get("icon-pitch-alignment")==="auto"&&(this.layout._values["icon-pitch-alignment"]=this.layout.get("icon-rotation-alignment")),this.layout.get("symbol-placement")==="point"){var ae=this.layout.get("text-writing-mode");if(ae){for(var he=[],Oe=0,rt=ae;Oe<rt.length;Oe+=1){var gt=rt[Oe];he.indexOf(gt)<0&&he.push(gt)}this.layout._values["text-writing-mode"]=he}else this.layout._values["text-writing-mode"]=["horizontal"]}this._setPaintOverrides()},y.prototype.getValueAndResolveTokens=function(U,$,ae,he){var Oe=this.layout.get(U).evaluate($,{},ae,he),rt=this._unevaluatedLayout._values[U];return!rt.isDataDriven()&&!Ua(rt.value)&&Oe?Uet($.properties,Oe):Oe},y.prototype.createBucket=function(U){return new Pu(U)},y.prototype.queryRadius=function(){return 0},y.prototype.queryIntersectsFeature=function(){return!1},y.prototype._setPaintOverrides=function(){for(var U=0,$=vO.paint.overridableProperties;U<$.length;U+=1){var ae=$[U];if(y.hasPaintOverride(this.layout,ae)){var he=this.paint.get(ae),Oe=new Jw(he),rt=new Ac(Oe,he.property.specification),gt=null;he.value.kind==="constant"||he.value.kind==="source"?gt=new Vc("source",rt):gt=new hc("composite",rt,he.value.zoomStops,he.value._interpolationType),this.paint._values[ae]=new dl(he.property,gt,he.parameters)}}},y.prototype._handleOverridablePaintPropertyUpdate=function(U,$,ae){return!this.layout||$.isDataDriven()||ae.isDataDriven()?!1:y.hasPaintOverride(this.layout,U)},y.hasPaintOverride=function(U,$){var ae=U.get("text-field"),he=vO.paint.properties[$],Oe=!1,rt=function(or){for(var _r=0,pr=or;_r<pr.length;_r+=1){var Fr=pr[_r];if(he.overrides&&he.overrides.hasOverride(Fr)){Oe=!0;return}}};if(ae.value.kind==="constant"&&ae.value.value instanceof Jl)rt(ae.value.value.sections);else if(ae.value.kind==="source"){var gt=function(or){if(!Oe)if(or instanceof Go&&Es(or.value)===pl){var _r=or.value;rt(_r.sections)}else or instanceof Gu?rt(or.sections):or.eachChild(gt)},Et=ae.value;Et._styleExpression&&gt(Et._styleExpression.expression)}return Oe},y}(mi),jet=new Nr({"background-color":new Me(Rn.paint_background["background-color"]),"background-pattern":new Rr(Rn.paint_background["background-pattern"]),"background-opacity":new Me(Rn.paint_background["background-opacity"])}),Wet={paint:jet},Xet=function(m){function y(I){m.call(this,I,Wet)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y}(mi),Zet=new Nr({"raster-opacity":new Me(Rn.paint_raster["raster-opacity"]),"raster-hue-rotate":new Me(Rn.paint_raster["raster-hue-rotate"]),"raster-brightness-min":new Me(Rn.paint_raster["raster-brightness-min"]),"raster-brightness-max":new Me(Rn.paint_raster["raster-brightness-max"]),"raster-saturation":new Me(Rn.paint_raster["raster-saturation"]),"raster-contrast":new Me(Rn.paint_raster["raster-contrast"]),"raster-resampling":new Me(Rn.paint_raster["raster-resampling"]),"raster-fade-duration":new Me(Rn.paint_raster["raster-fade-duration"])}),Yet={paint:Zet},Ket=function(m){function y(I){m.call(this,I,Yet)}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y}(mi);function Jet(m){var y=[],I=m.id;return I===void 0&&y.push({message:"layers."+I+': missing required property "id"'}),m.render===void 0&&y.push({message:"layers."+I+': missing required method "render"'}),m.renderingMode&&m.renderingMode!=="2d"&&m.renderingMode!=="3d"&&y.push({message:"layers."+I+': property "renderingMode" must be either "2d" or "3d"'}),y}var $et=function(m){function y(I){m.call(this,I,{}),this.implementation=I}return m&&(y.__proto__=m),y.prototype=Object.create(m&&m.prototype),y.prototype.constructor=y,y.prototype.is3D=function(){return this.implementation.renderingMode==="3d"},y.prototype.hasOffscreenPass=function(){return this.implementation.prerender!==void 0},y.prototype.recalculate=function(){},y.prototype.updateTransitions=function(){},y.prototype.hasTransition=function(){},y.prototype.serialize=function(){},y.prototype.onAdd=function(U){this.implementation.onAdd&&this.implementation.onAdd(U,U.painter.context.gl)},y.prototype.onRemove=function(U){this.implementation.onRemove&&this.implementation.onRemove(U,U.painter.context.gl)},y}(mi),Qet={circle:j9,heatmap:Iw,hillshade:xC,fill:Qx,"fill-extrusion":hm,line:A,symbol:Het,background:Xet,raster:Ket};function ett(m){return m.type==="custom"?new $et(m):new Qet[m.type](m)}var jQ=f.HTMLImageElement,WQ=f.HTMLCanvasElement,XQ=f.HTMLVideoElement,ZQ=f.ImageData,t6=f.ImageBitmap,ib=function(y,I,U,$){this.context=y,this.format=U,this.texture=y.gl.createTexture(),this.update(I,$)};ib.prototype.update=function(y,I,U){var $=y.width,ae=y.height,he=(!this.size||this.size[0]!==$||this.size[1]!==ae)&&!U,Oe=this,rt=Oe.context,gt=rt.gl;if(this.useMipmap=!!(I&&I.useMipmap),gt.bindTexture(gt.TEXTURE_2D,this.texture),rt.pixelStoreUnpackFlipY.set(!1),rt.pixelStoreUnpack.set(1),rt.pixelStoreUnpackPremultiplyAlpha.set(this.format===gt.RGBA&&(!I||I.premultiply!==!1)),he)this.size=[$,ae],y instanceof jQ||y instanceof WQ||y instanceof XQ||y instanceof ZQ||t6&&y instanceof t6?gt.texImage2D(gt.TEXTURE_2D,0,this.format,this.format,gt.UNSIGNED_BYTE,y):gt.texImage2D(gt.TEXTURE_2D,0,this.format,$,ae,0,this.format,gt.UNSIGNED_BYTE,y.data);else{var Et=U||{x:0,y:0},or=Et.x,_r=Et.y;y instanceof jQ||y instanceof WQ||y instanceof XQ||y instanceof ZQ||t6&&y instanceof t6?gt.texSubImage2D(gt.TEXTURE_2D,0,or,_r,gt.RGBA,gt.UNSIGNED_BYTE,y):gt.texSubImage2D(gt.TEXTURE_2D,0,or,_r,$,ae,gt.RGBA,gt.UNSIGNED_BYTE,y.data)}this.useMipmap&&this.isSizePowerOfTwo()&&gt.generateMipmap(gt.TEXTURE_2D)},ib.prototype.bind=function(y,I,U){var $=this,ae=$.context,he=ae.gl;he.bindTexture(he.TEXTURE_2D,this.texture),U===he.LINEAR_MIPMAP_NEAREST&&!this.isSizePowerOfTwo()&&(U=he.LINEAR),y!==this.filter&&(he.texParameteri(he.TEXTURE_2D,he.TEXTURE_MAG_FILTER,y),he.texParameteri(he.TEXTURE_2D,he.TEXTURE_MIN_FILTER,U||y),this.filter=y),I!==this.wrap&&(he.texParameteri(he.TEXTURE_2D,he.TEXTURE_WRAP_S,I),he.texParameteri(he.TEXTURE_2D,he.TEXTURE_WRAP_T,I),this.wrap=I)},ib.prototype.isSizePowerOfTwo=function(){return this.size[0]===this.size[1]&&Math.log(this.size[0])/Math.LN2%1===0},ib.prototype.destroy=function(){var y=this.context,I=y.gl;I.deleteTexture(this.texture),this.texture=null};var pO=function(y){var I=this;this._callback=y,this._triggered=!1,typeof MessageChannel!="undefined"&&(this._channel=new MessageChannel,this._channel.port2.onmessage=function(){I._triggered=!1,I._callback()})};pO.prototype.trigger=function(){var y=this;this._triggered||(this._triggered=!0,this._channel?this._channel.port1.postMessage(!0):setTimeout(function(){y._triggered=!1,y._callback()},0))},pO.prototype.remove=function(){delete this._channel,this._callback=function(){}};var $w=function(y,I,U){this.target=y,this.parent=I,this.mapId=U,this.callbacks={},this.tasks={},this.taskQueue=[],this.cancelCallbacks={},O(["receive","process"],this),this.invoker=new pO(this.process),this.target.addEventListener("message",this.receive,!1),this.globalScope=Ce()?y:f};$w.prototype.send=function(y,I,U,$,ae){var he=this;ae===void 0&&(ae=!1);var Oe=Math.round(Math.random()*1e18).toString(36).substring(0,10);U&&(this.callbacks[Oe]=U);var rt=Se(this.globalScope)?void 0:[];return this.target.postMessage({id:Oe,type:y,hasCallback:!!U,targetMapId:$,mustQueue:ae,sourceMapId:this.mapId,data:He(I,rt)},rt),{cancel:function(){U&&delete he.callbacks[Oe],he.target.postMessage({id:Oe,type:"<cancel>",targetMapId:$,sourceMapId:he.mapId})}}},$w.prototype.receive=function(y){var I=y.data,U=I.id;if(U&&!(I.targetMapId&&this.mapId!==I.targetMapId))if(I.type==="<cancel>"){delete this.tasks[U];var $=this.cancelCallbacks[U];delete this.cancelCallbacks[U],$&&$()}else Ce()||I.mustQueue?(this.tasks[U]=I,this.taskQueue.push(U),this.invoker.trigger()):this.processTask(U,I)},$w.prototype.process=function(){if(this.taskQueue.length){var y=this.taskQueue.shift(),I=this.tasks[y];delete this.tasks[y],this.taskQueue.length&&this.invoker.trigger(),I&&this.processTask(y,I)}},$w.prototype.processTask=function(y,I){var U=this;if(I.type==="<response>"){var $=this.callbacks[y];delete this.callbacks[y],$&&(I.error?$(Ye(I.error)):$(null,Ye(I.data)))}else{var ae=!1,he=Se(this.globalScope)?void 0:[],Oe=I.hasCallback?function(_r,pr){ae=!0,delete U.cancelCallbacks[y],U.target.postMessage({id:y,type:"<response>",sourceMapId:U.mapId,error:_r?He(_r):null,data:He(pr,he)},he)}:function(_r){ae=!0},rt=null,gt=Ye(I.data);if(this.parent[I.type])rt=this.parent[I.type](I.sourceMapId,gt,Oe);else if(this.parent.getWorkerSource){var Et=I.type.split("."),or=this.parent.getWorkerSource(I.sourceMapId,Et[0],gt.source);rt=or[Et[1]](gt,Oe)}else Oe(new Error("Could not find function "+I.type));!ae&&rt&&rt.cancel&&(this.cancelCallbacks[y]=rt.cancel)}},$w.prototype.remove=function(){this.invoker.remove(),this.target.removeEventListener("message",this.receive,!1)};function ttt(m,y,I){y=Math.pow(2,I)-y-1;var U=YQ(m*256,y*256,I),$=YQ((m+1)*256,(y+1)*256,I);return U[0]+","+U[1]+","+$[0]+","+$[1]}function YQ(m,y,I){var U=2*Math.PI*6378137/256/Math.pow(2,I),$=m*U-2*Math.PI*6378137/2,ae=y*U-2*Math.PI*6378137/2;return[$,ae]}var lh=function(y,I){y&&(I?this.setSouthWest(y).setNorthEast(I):y.length===4?this.setSouthWest([y[0],y[1]]).setNorthEast([y[2],y[3]]):this.setSouthWest(y[0]).setNorthEast(y[1]))};lh.prototype.setNorthEast=function(y){return this._ne=y instanceof Hc?new Hc(y.lng,y.lat):Hc.convert(y),this},lh.prototype.setSouthWest=function(y){return this._sw=y instanceof Hc?new Hc(y.lng,y.lat):Hc.convert(y),this},lh.prototype.extend=function(y){var I=this._sw,U=this._ne,$,ae;if(y instanceof Hc)$=y,ae=y;else if(y instanceof lh){if($=y._sw,ae=y._ne,!$||!ae)return this}else{if(Array.isArray(y))if(y.length===4||y.every(Array.isArray)){var he=y;return this.extend(lh.convert(he))}else{var Oe=y;return this.extend(Hc.convert(Oe))}return this}return!I&&!U?(this._sw=new Hc($.lng,$.lat),this._ne=new Hc(ae.lng,ae.lat)):(I.lng=Math.min($.lng,I.lng),I.lat=Math.min($.lat,I.lat),U.lng=Math.max(ae.lng,U.lng),U.lat=Math.max(ae.lat,U.lat)),this},lh.prototype.getCenter=function(){return new Hc((this._sw.lng+this._ne.lng)/2,(this._sw.lat+this._ne.lat)/2)},lh.prototype.getSouthWest=function(){return this._sw},lh.prototype.getNorthEast=function(){return this._ne},lh.prototype.getNorthWest=function(){return new Hc(this.getWest(),this.getNorth())},lh.prototype.getSouthEast=function(){return new Hc(this.getEast(),this.getSouth())},lh.prototype.getWest=function(){return this._sw.lng},lh.prototype.getSouth=function(){return this._sw.lat},lh.prototype.getEast=function(){return this._ne.lng},lh.prototype.getNorth=function(){return this._ne.lat},lh.prototype.toArray=function(){return[this._sw.toArray(),this._ne.toArray()]},lh.prototype.toString=function(){return"LngLatBounds("+this._sw.toString()+", "+this._ne.toString()+")"},lh.prototype.isEmpty=function(){return!(this._sw&&this._ne)},lh.prototype.contains=function(y){var I=Hc.convert(y),U=I.lng,$=I.lat,ae=this._sw.lat<=$&&$<=this._ne.lat,he=this._sw.lng<=U&&U<=this._ne.lng;return this._sw.lng>this._ne.lng&&(he=this._sw.lng>=U&&U>=this._ne.lng),ae&&he},lh.convert=function(y){return!y||y instanceof lh?y:new lh(y)};var KQ=63710088e-1,Hc=function(y,I){if(isNaN(y)||isNaN(I))throw new Error("Invalid LngLat object: ("+y+", "+I+")");if(this.lng=+y,this.lat=+I,this.lat>90||this.lat<-90)throw new Error("Invalid LngLat latitude value: must be between -90 and 90")};Hc.prototype.wrap=function(){return new Hc(k(this.lng,-180,180),this.lat)},Hc.prototype.toArray=function(){return[this.lng,this.lat]},Hc.prototype.toString=function(){return"LngLat("+this.lng+", "+this.lat+")"},Hc.prototype.distanceTo=function(y){var I=Math.PI/180,U=this.lat*I,$=y.lat*I,ae=Math.sin(U)*Math.sin($)+Math.cos(U)*Math.cos($)*Math.cos((y.lng-this.lng)*I),he=KQ*Math.acos(Math.min(ae,1));return he},Hc.prototype.toBounds=function(y){y===void 0&&(y=0);var I=40075017,U=360*y/I,$=U/Math.cos(Math.PI/180*this.lat);return new lh(new Hc(this.lng-$,this.lat-U),new Hc(this.lng+$,this.lat+U))},Hc.convert=function(y){if(y instanceof Hc)return y;if(Array.isArray(y)&&(y.length===2||y.length===3))return new Hc(Number(y[0]),Number(y[1]));if(!Array.isArray(y)&&typeof y=="object"&&y!==null)return new Hc(Number("lng"in y?y.lng:y.lon),Number(y.lat));throw new Error("`LngLatLike` argument must be specified as a LngLat instance, an object {lng: <lng>, lat: <lat>}, an object {lon: <lng>, lat: <lat>}, or an array of [<lng>, <lat>]")};var JQ=2*Math.PI*KQ;function $Q(m){return JQ*Math.cos(m*Math.PI/180)}function QQ(m){return(180+m)/360}function eee(m){return(180-180/Math.PI*Math.log(Math.tan(Math.PI/4+m*Math.PI/360)))/360}function tee(m,y){return m/$Q(y)}function rtt(m){return m*360-180}function gO(m){var y=180-m*360;return 360/Math.PI*Math.atan(Math.exp(y*Math.PI/180))-90}function itt(m,y){return m*$Q(gO(y))}function ntt(m){return 1/Math.cos(m*Math.PI/180)}var nb=function(y,I,U){U===void 0&&(U=0),this.x=+y,this.y=+I,this.z=+U};nb.fromLngLat=function(y,I){I===void 0&&(I=0);var U=Hc.convert(y);return new nb(QQ(U.lng),eee(U.lat),tee(I,U.lat))},nb.prototype.toLngLat=function(){return new Hc(rtt(this.x),gO(this.y))},nb.prototype.toAltitude=function(){return itt(this.z,this.y)},nb.prototype.meterInMercatorCoordinateUnits=function(){return 1/JQ*ntt(gO(this.y))};var ab=function(y,I,U){this.z=y,this.x=I,this.y=U,this.key=kS(0,y,y,I,U)};ab.prototype.equals=function(y){return this.z===y.z&&this.x===y.x&&this.y===y.y},ab.prototype.url=function(y,I){var U=ttt(this.x,this.y,this.z),$=att(this.z,this.x,this.y);return y[(this.x+this.y)%y.length].replace("{prefix}",(this.x%16).toString(16)+(this.y%16).toString(16)).replace("{z}",String(this.z)).replace("{x}",String(this.x)).replace("{y}",String(I==="tms"?Math.pow(2,this.z)-this.y-1:this.y)).replace("{quadkey}",$).replace("{bbox-epsg-3857}",U)},ab.prototype.getTilePoint=function(y){var I=Math.pow(2,this.z);return new u((y.x*I-this.x)*Ci,(y.y*I-this.y)*Ci)},ab.prototype.toString=function(){return this.z+"/"+this.x+"/"+this.y};var ree=function(y,I){this.wrap=y,this.canonical=I,this.key=kS(y,I.z,I.z,I.x,I.y)},uh=function(y,I,U,$,ae){this.overscaledZ=y,this.wrap=I,this.canonical=new ab(U,+$,+ae),this.key=kS(I,y,U,$,ae)};uh.prototype.equals=function(y){return this.overscaledZ===y.overscaledZ&&this.wrap===y.wrap&&this.canonical.equals(y.canonical)},uh.prototype.scaledTo=function(y){var I=this.canonical.z-y;return y>this.canonical.z?new uh(y,this.wrap,this.canonical.z,this.canonical.x,this.canonical.y):new uh(y,this.wrap,y,this.canonical.x>>I,this.canonical.y>>I)},uh.prototype.calculateScaledKey=function(y,I){var U=this.canonical.z-y;return y>this.canonical.z?kS(this.wrap*+I,y,this.canonical.z,this.canonical.x,this.canonical.y):kS(this.wrap*+I,y,y,this.canonical.x>>U,this.canonical.y>>U)},uh.prototype.isChildOf=function(y){if(y.wrap!==this.wrap)return!1;var I=this.canonical.z-y.canonical.z;return y.overscaledZ===0||y.overscaledZ<this.overscaledZ&&y.canonical.x===this.canonical.x>>I&&y.canonical.y===this.canonical.y>>I},uh.prototype.children=function(y){if(this.overscaledZ>=y)return[new uh(this.overscaledZ+1,this.wrap,this.canonical.z,this.canonical.x,this.canonical.y)];var I=this.canonical.z+1,U=this.canonical.x*2,$=this.canonical.y*2;return[new uh(I,this.wrap,I,U,$),new uh(I,this.wrap,I,U+1,$),new uh(I,this.wrap,I,U,$+1),new uh(I,this.wrap,I,U+1,$+1)]},uh.prototype.isLessThan=function(y){return this.wrap<y.wrap?!0:this.wrap>y.wrap?!1:this.overscaledZ<y.overscaledZ?!0:this.overscaledZ>y.overscaledZ?!1:this.canonical.x<y.canonical.x?!0:this.canonical.x>y.canonical.x?!1:this.canonical.y<y.canonical.y},uh.prototype.wrapped=function(){return new uh(this.overscaledZ,0,this.canonical.z,this.canonical.x,this.canonical.y)},uh.prototype.unwrapTo=function(y){return new uh(this.overscaledZ,y,this.canonical.z,this.canonical.x,this.canonical.y)},uh.prototype.overscaleFactor=function(){return Math.pow(2,this.overscaledZ-this.canonical.z)},uh.prototype.toUnwrapped=function(){return new ree(this.wrap,this.canonical)},uh.prototype.toString=function(){return this.overscaledZ+"/"+this.canonical.x+"/"+this.canonical.y},uh.prototype.getTilePoint=function(y){return this.canonical.getTilePoint(new nb(y.x-this.wrap,y.y))};function kS(m,y,I,U,$){m*=2,m<0&&(m=m*-1-1);var ae=1<<I;return(ae*ae*m+ae*$+U).toString(36)+I.toString(36)+y.toString(36)}function att(m,y,I){for(var U="",$,ae=m;ae>0;ae--)$=1<<ae-1,U+=(y&$?1:0)+(I&$?2:0);return U}X("CanonicalTileID",ab),X("OverscaledTileID",uh,{omit:["posMatrix"]});var vy=function(y,I,U){if(this.uid=y,I.height!==I.width)throw new RangeError("DEM tiles must be square");if(U&&U!=="mapbox"&&U!=="terrarium")return te('"'+U+'" is not a valid encoding type. Valid types include "mapbox" and "terrarium".');this.stride=I.height;var $=this.dim=I.height-2;this.data=new Uint32Array(I.data.buffer),this.encoding=U||"mapbox";for(var ae=0;ae<$;ae++)this.data[this._idx(-1,ae)]=this.data[this._idx(0,ae)],this.data[this._idx($,ae)]=this.data[this._idx($-1,ae)],this.data[this._idx(ae,-1)]=this.data[this._idx(ae,0)],this.data[this._idx(ae,$)]=this.data[this._idx(ae,$-1)];this.data[this._idx(-1,-1)]=this.data[this._idx(0,0)],this.data[this._idx($,-1)]=this.data[this._idx($-1,0)],this.data[this._idx(-1,$)]=this.data[this._idx(0,$-1)],this.data[this._idx($,$)]=this.data[this._idx($-1,$-1)]};vy.prototype.get=function(y,I){var U=new Uint8Array(this.data.buffer),$=this._idx(y,I)*4,ae=this.encoding==="terrarium"?this._unpackTerrarium:this._unpackMapbox;return ae(U[$],U[$+1],U[$+2])},vy.prototype.getUnpackVector=function(){return this.encoding==="terrarium"?[256,1,1/256,32768]:[6553.6,25.6,.1,1e4]},vy.prototype._idx=function(y,I){if(y<-1||y>=this.dim+1||I<-1||I>=this.dim+1)throw new RangeError("out of range source coordinates for DEM data");return(I+1)*this.stride+(y+1)},vy.prototype._unpackMapbox=function(y,I,U){return(y*256*256+I*256+U)/10-1e4},vy.prototype._unpackTerrarium=function(y,I,U){return y*256+I+U/256-32768},vy.prototype.getPixels=function(){return new Sh({width:this.stride,height:this.stride},new Uint8Array(this.data.buffer))},vy.prototype.backfillBorder=function(y,I,U){if(this.dim!==y.dim)throw new Error("dem dimension mismatch");var $=I*this.dim,ae=I*this.dim+this.dim,he=U*this.dim,Oe=U*this.dim+this.dim;switch(I){case-1:$=ae-1;break;case 1:ae=$+1;break}switch(U){case-1:he=Oe-1;break;case 1:Oe=he+1;break}for(var rt=-I*this.dim,gt=-U*this.dim,Et=he;Et<Oe;Et++)for(var or=$;or<ae;or++)this.data[this._idx(or,Et)]=y.data[this._idx(or+rt,Et+gt)]},X("DEMData",vy);function ott(m,y){var I={};if(!y)return I;for(var U=function(){var he=ae[$],Oe=he.layerIds.map(function(or){return y.getLayer(or)}).filter(Boolean);if(Oe.length!==0){he.layers=Oe,he.stateDependentLayerIds&&(he.stateDependentLayers=he.stateDependentLayerIds.map(function(or){return Oe.filter(function(_r){return _r.id===or})[0]}));for(var rt=0,gt=Oe;rt<gt.length;rt+=1){var Et=gt[rt];I[Et.id]=he}}},$=0,ae=m;$<ae.length;$+=1)U();return I}var r6=function(y){this._stringToNumber={},this._numberToString=[];for(var I=0;I<y.length;I++){var U=y[I];this._stringToNumber[U]=I,this._numberToString[I]=U}};r6.prototype.encode=function(y){return this._stringToNumber[y]},r6.prototype.decode=function(y){return this._numberToString[y]};var i6=function(y,I,U,$,ae){this.type="Feature",this._vectorTileFeature=y,y._z=I,y._x=U,y._y=$,this.properties=y.properties,this.id=ae},mO={geometry:{configurable:!0}};mO.geometry.get=function(){return this._geometry===void 0&&(this._geometry=this._vectorTileFeature.toGeoJSON(this._vectorTileFeature._x,this._vectorTileFeature._y,this._vectorTileFeature._z).geometry),this._geometry},mO.geometry.set=function(m){this._geometry=m},i6.prototype.toJSON=function(){var y={geometry:this.geometry};for(var I in this)I==="_geometry"||I==="_vectorTileFeature"||(y[I]=this[I]);return y},Object.defineProperties(i6.prototype,mO);var Qw=function(){this.state={},this.stateChanges={},this.deletedStates={}};Qw.prototype.updateState=function(y,I,U){var $=String(I);if(this.stateChanges[y]=this.stateChanges[y]||{},this.stateChanges[y][$]=this.stateChanges[y][$]||{},x(this.stateChanges[y][$],U),this.deletedStates[y]===null){this.deletedStates[y]={};for(var ae in this.state[y])ae!==$&&(this.deletedStates[y][ae]=null)}else{var he=this.deletedStates[y]&&this.deletedStates[y][$]===null;if(he){this.deletedStates[y][$]={};for(var Oe in this.state[y][$])U[Oe]||(this.deletedStates[y][$][Oe]=null)}else for(var rt in U){var gt=this.deletedStates[y]&&this.deletedStates[y][$]&&this.deletedStates[y][$][rt]===null;gt&&delete this.deletedStates[y][$][rt]}}},Qw.prototype.removeFeatureState=function(y,I,U){var $=this.deletedStates[y]===null;if(!$){var ae=String(I);if(this.deletedStates[y]=this.deletedStates[y]||{},U&&I!==void 0)this.deletedStates[y][ae]!==null&&(this.deletedStates[y][ae]=this.deletedStates[y][ae]||{},this.deletedStates[y][ae][U]=null);else if(I!==void 0){var he=this.stateChanges[y]&&this.stateChanges[y][ae];if(he){this.deletedStates[y][ae]={};for(U in this.stateChanges[y][ae])this.deletedStates[y][ae][U]=null}else this.deletedStates[y][ae]=null}else this.deletedStates[y]=null}},Qw.prototype.getState=function(y,I){var U=String(I),$=this.state[y]||{},ae=this.stateChanges[y]||{},he=x({},$[U],ae[U]);if(this.deletedStates[y]===null)return{};if(this.deletedStates[y]){var Oe=this.deletedStates[y][I];if(Oe===null)return{};for(var rt in Oe)delete he[rt]}return he},Qw.prototype.initializeTileState=function(y,I){y.setFeatureState(this.state,I)},Qw.prototype.coalesceChanges=function(y,I){var U={};for(var $ in this.stateChanges){this.state[$]=this.state[$]||{};var ae={};for(var he in this.stateChanges[$])this.state[$][he]||(this.state[$][he]={}),x(this.state[$][he],this.stateChanges[$][he]),ae[he]=this.state[$][he];U[$]=ae}for(var Oe in this.deletedStates){this.state[Oe]=this.state[Oe]||{};var rt={};if(this.deletedStates[Oe]===null)for(var gt in this.state[Oe])rt[gt]={},this.state[Oe][gt]={};else for(var Et in this.deletedStates[Oe]){var or=this.deletedStates[Oe][Et]===null;if(or)this.state[Oe][Et]={};else for(var _r=0,pr=Object.keys(this.deletedStates[Oe][Et]);_r<pr.length;_r+=1){var Fr=pr[_r];delete this.state[Oe][Et][Fr]}rt[Et]=this.state[Oe][Et]}U[Oe]=U[Oe]||{},x(U[Oe],rt)}if(this.stateChanges={},this.deletedStates={},Object.keys(U).length!==0)for(var oi in y){var Hi=y[oi];Hi.setFeatureState(U,I)}};var py=function(y,I){this.tileID=y,this.x=y.canonical.x,this.y=y.canonical.y,this.z=y.canonical.z,this.grid=new au(Ci,16,0),this.grid3D=new au(Ci,16,0),this.featureIndexArray=new Ys,this.promoteId=I};py.prototype.insert=function(y,I,U,$,ae,he){var Oe=this.featureIndexArray.length;this.featureIndexArray.emplaceBack(U,$,ae);for(var rt=he?this.grid3D:this.grid,gt=0;gt<I.length;gt++){for(var Et=I[gt],or=[1/0,1/0,-1/0,-1/0],_r=0;_r<Et.length;_r++){var pr=Et[_r];or[0]=Math.min(or[0],pr.x),or[1]=Math.min(or[1],pr.y),or[2]=Math.max(or[2],pr.x),or[3]=Math.max(or[3],pr.y)}or[0]<Ci&&or[1]<Ci&&or[2]>=0&&or[3]>=0&&rt.insert(Oe,or[0],or[1],or[2],or[3])}},py.prototype.loadVTLayers=function(){return this.vtLayers||(this.vtLayers=new pg.VectorTile(new Xa(this.rawTileData)).layers,this.sourceLayerCoder=new r6(this.vtLayers?Object.keys(this.vtLayers).sort():["_geojsonTileLayer"])),this.vtLayers},py.prototype.query=function(y,I,U,$){var ae=this;this.loadVTLayers();for(var he=y.params||{},Oe=Ci/y.tileSize/y.scale,rt=be(he.filter),gt=y.queryGeometry,Et=y.queryPadding*Oe,or=nee(gt),_r=this.grid.query(or.minX-Et,or.minY-Et,or.maxX+Et,or.maxY+Et),pr=nee(y.cameraQueryGeometry),Fr=this.grid3D.query(pr.minX-Et,pr.minY-Et,pr.maxX+Et,pr.maxY+Et,function(Zn,ga,ha,eo){return gp(y.cameraQueryGeometry,Zn-Et,ga-Et,ha+Et,eo+Et)}),oi=0,Hi=Fr;oi<Hi.length;oi+=1){var Ai=Hi[oi];_r.push(Ai)}_r.sort(stt);for(var bn={},nn,xn=function(Zn){var ga=_r[Zn];if(ga!==nn){nn=ga;var ha=ae.featureIndexArray.get(ga),eo=null;ae.loadMatchingFeature(bn,ha.bucketIndex,ha.sourceLayerIndex,ha.featureIndex,rt,he.layers,he.availableImages,I,U,$,function(za,Za,Ko){return eo||(eo=zn(za)),Za.queryIntersectsFeature(gt,za,Ko,eo,ae.z,y.transform,Oe,y.pixelPosMatrix)})}},Pn=0;Pn<_r.length;Pn++)xn(Pn);return bn},py.prototype.loadMatchingFeature=function(y,I,U,$,ae,he,Oe,rt,gt,Et,or){var _r=this.bucketLayerIDs[I];if(!(he&&!N(he,_r))){var pr=this.sourceLayerCoder.decode(U),Fr=this.vtLayers[pr],oi=Fr.feature($);if(ae.needGeometry){var Hi=Ja(oi,!0);if(!ae.filter(new Gn(this.tileID.overscaledZ),Hi,this.tileID.canonical))return}else if(!ae.filter(new Gn(this.tileID.overscaledZ),oi))return;for(var Ai=this.getId(oi,pr),bn=0;bn<_r.length;bn++){var nn=_r[bn];if(!(he&&he.indexOf(nn)<0)){var xn=rt[nn];if(xn){var Pn={};Ai!==void 0&&Et&&(Pn=Et.getState(xn.sourceLayer||"_geojsonTileLayer",Ai));var Zn=x({},gt[nn]);Zn.paint=iee(Zn.paint,xn.paint,oi,Pn,Oe),Zn.layout=iee(Zn.layout,xn.layout,oi,Pn,Oe);var ga=!or||or(oi,xn,Pn);if(ga){var ha=new i6(oi,this.z,this.x,this.y,Ai);ha.layer=Zn;var eo=y[nn];eo===void 0&&(eo=y[nn]=[]),eo.push({featureIndex:$,feature:ha,intersectionZ:ga})}}}}}},py.prototype.lookupSymbolFeatures=function(y,I,U,$,ae,he,Oe,rt){var gt={};this.loadVTLayers();for(var Et=be(ae),or=0,_r=y;or<_r.length;or+=1){var pr=_r[or];this.loadMatchingFeature(gt,U,$,pr,Et,he,Oe,rt,I)}return gt},py.prototype.hasLayer=function(y){for(var I=0,U=this.bucketLayerIDs;I<U.length;I+=1)for(var $=U[I],ae=0,he=$;ae<he.length;ae+=1){var Oe=he[ae];if(y===Oe)return!0}return!1},py.prototype.getId=function(y,I){var U=y.id;if(this.promoteId){var $=typeof this.promoteId=="string"?this.promoteId:this.promoteId[I];U=y.properties[$],typeof U=="boolean"&&(U=Number(U))}return U},X("FeatureIndex",py,{omit:["rawTileData","sourceLayerCoder"]});function iee(m,y,I,U,$){return G(m,function(ae,he){var Oe=y instanceof Sc?y.get(he):null;return Oe&&Oe.evaluate?Oe.evaluate(I,U,$):Oe})}function nee(m){for(var y=1/0,I=1/0,U=-1/0,$=-1/0,ae=0,he=m;ae<he.length;ae+=1){var Oe=he[ae];y=Math.min(y,Oe.x),I=Math.min(I,Oe.y),U=Math.max(U,Oe.x),$=Math.max($,Oe.y)}return{minX:y,minY:I,maxX:U,maxY:$}}function stt(m,y){return y-m}var ltt=3e4,Zh=function(y,I){this.tileID=y,this.uid=g(),this.uses=0,this.tileSize=I,this.buckets={},this.expirationTime=null,this.queryPadding=0,this.hasSymbolBuckets=!1,this.hasRTLText=!1,this.dependencies={},this.expiredRequestCount=0,this.state="loading"};Zh.prototype.registerFadeDuration=function(y){var I=y+this.timeAdded;I<ut.now()||this.fadeEndTime&&I<this.fadeEndTime||(this.fadeEndTime=I)},Zh.prototype.wasRequested=function(){return this.state==="errored"||this.state==="loaded"||this.state==="reloading"},Zh.prototype.loadVectorData=function(y,I,U){if(this.hasData()&&this.unloadVectorData(),this.state="loaded",!y){this.collisionBoxArray=new Va;return}y.featureIndex&&(this.latestFeatureIndex=y.featureIndex,y.rawTileData?(this.latestRawTileData=y.rawTileData,this.latestFeatureIndex.rawTileData=y.rawTileData):this.latestRawTileData&&(this.latestFeatureIndex.rawTileData=this.latestRawTileData)),this.collisionBoxArray=y.collisionBoxArray,this.buckets=ott(y.buckets,I.style),this.hasSymbolBuckets=!1;for(var $ in this.buckets){var ae=this.buckets[$];if(ae instanceof Pu)if(this.hasSymbolBuckets=!0,U)ae.justReloaded=!0;else break}if(this.hasRTLText=!1,this.hasSymbolBuckets)for(var he in this.buckets){var Oe=this.buckets[he];if(Oe instanceof Pu&&Oe.hasRTLText){this.hasRTLText=!0,Xs();break}}this.queryPadding=0;for(var rt in this.buckets){var gt=this.buckets[rt];this.queryPadding=Math.max(this.queryPadding,I.style.getLayer(rt).queryRadius(gt))}y.imageAtlas&&(this.imageAtlas=y.imageAtlas),y.glyphAtlasImage&&(this.glyphAtlasImage=y.glyphAtlasImage)},Zh.prototype.unloadVectorData=function(){for(var y in this.buckets)this.buckets[y].destroy();this.buckets={},this.imageAtlasTexture&&this.imageAtlasTexture.destroy(),this.imageAtlas&&(this.imageAtlas=null),this.glyphAtlasTexture&&this.glyphAtlasTexture.destroy(),this.latestFeatureIndex=null,this.state="unloaded"},Zh.prototype.getBucket=function(y){return this.buckets[y.id]},Zh.prototype.upload=function(y){for(var I in this.buckets){var U=this.buckets[I];U.uploadPending()&&U.upload(y)}var $=y.gl;this.imageAtlas&&!this.imageAtlas.uploaded&&(this.imageAtlasTexture=new ib(y,this.imageAtlas.image,$.RGBA),this.imageAtlas.uploaded=!0),this.glyphAtlasImage&&(this.glyphAtlasTexture=new ib(y,this.glyphAtlasImage,$.ALPHA),this.glyphAtlasImage=null)},Zh.prototype.prepare=function(y){this.imageAtlas&&this.imageAtlas.patchUpdatedImages(y,this.imageAtlasTexture)},Zh.prototype.queryRenderedFeatures=function(y,I,U,$,ae,he,Oe,rt,gt,Et){return!this.latestFeatureIndex||!this.latestFeatureIndex.rawTileData?{}:this.latestFeatureIndex.query({queryGeometry:$,cameraQueryGeometry:ae,scale:he,tileSize:this.tileSize,pixelPosMatrix:Et,transform:rt,params:Oe,queryPadding:this.queryPadding*gt},y,I,U)},Zh.prototype.querySourceFeatures=function(y,I){var U=this.latestFeatureIndex;if(!(!U||!U.rawTileData)){var $=U.loadVTLayers(),ae=I?I.sourceLayer:"",he=$._geojsonTileLayer||$[ae];if(he)for(var Oe=be(I&&I.filter),rt=this.tileID.canonical,gt=rt.z,Et=rt.x,or=rt.y,_r={z:gt,x:Et,y:or},pr=0;pr<he.length;pr++){var Fr=he.feature(pr);if(Oe.needGeometry){var oi=Ja(Fr,!0);if(!Oe.filter(new Gn(this.tileID.overscaledZ),oi,this.tileID.canonical))continue}else if(!Oe.filter(new Gn(this.tileID.overscaledZ),Fr))continue;var Hi=U.getId(Fr,ae),Ai=new i6(Fr,gt,Et,or,Hi);Ai.tile=_r,y.push(Ai)}}},Zh.prototype.hasData=function(){return this.state==="loaded"||this.state==="reloading"||this.state==="expired"},Zh.prototype.patternsLoaded=function(){return this.imageAtlas&&!!Object.keys(this.imageAtlas.patternPositions).length},Zh.prototype.setExpiryData=function(y){var I=this.expirationTime;if(y.cacheControl){var U=me(y.cacheControl);U["max-age"]&&(this.expirationTime=Date.now()+U["max-age"]*1e3)}else y.expires&&(this.expirationTime=new Date(y.expires).getTime());if(this.expirationTime){var $=Date.now(),ae=!1;if(this.expirationTime>$)ae=!1;else if(!I)ae=!0;else if(this.expirationTime<I)ae=!0;else{var he=this.expirationTime-I;he?this.expirationTime=$+Math.max(he,ltt):ae=!0}ae?(this.expiredRequestCount++,this.state="expired"):this.expiredRequestCount=0}},Zh.prototype.getExpiryTimeout=function(){if(this.expirationTime)return this.expiredRequestCount?1e3*(1<<Math.min(this.expiredRequestCount-1,31)):Math.min(this.expirationTime-new Date().getTime(),Math.pow(2,31)-1)},Zh.prototype.setFeatureState=function(y,I){if(!(!this.latestFeatureIndex||!this.latestFeatureIndex.rawTileData||Object.keys(y).length===0)){var U=this.latestFeatureIndex.loadVTLayers();for(var $ in this.buckets)if(I.style.hasLayer($)){var ae=this.buckets[$],he=ae.layers[0].sourceLayer||"_geojsonTileLayer",Oe=U[he],rt=y[he];if(!(!Oe||!rt||Object.keys(rt).length===0)){ae.update(rt,Oe,this.imageAtlas&&this.imageAtlas.patternPositions||{});var gt=I&&I.style&&I.style.getLayer($);gt&&(this.queryPadding=Math.max(this.queryPadding,gt.queryRadius(ae)))}}}},Zh.prototype.holdingForFade=function(){return this.symbolFadeHoldUntil!==void 0},Zh.prototype.symbolFadeFinished=function(){return!this.symbolFadeHoldUntil||this.symbolFadeHoldUntil<ut.now()},Zh.prototype.clearFadeHold=function(){this.symbolFadeHoldUntil=void 0},Zh.prototype.setHoldDuration=function(y){this.symbolFadeHoldUntil=ut.now()+y},Zh.prototype.setDependencies=function(y,I){for(var U={},$=0,ae=I;$<ae.length;$+=1){var he=ae[$];U[he]=!0}this.dependencies[y]=U},Zh.prototype.hasDependency=function(y,I){for(var U=0,$=y;U<$.length;U+=1){var ae=$[U],he=this.dependencies[ae];if(he)for(var Oe=0,rt=I;Oe<rt.length;Oe+=1){var gt=rt[Oe];if(he[gt])return!0}}return!1};var utt=["type","source","source-layer","minzoom","maxzoom","filter","layout"],D1=f.performance,aee=function(y){this._marks={start:[y.url,"start"].join("#"),end:[y.url,"end"].join("#"),measure:y.url.toString()},D1.mark(this._marks.start)};aee.prototype.finish=function(){D1.mark(this._marks.end);var y=D1.getEntriesByName(this._marks.measure);return y.length===0&&(D1.measure(this._marks.measure,this._marks.start,this._marks.end),y=D1.getEntriesByName(this._marks.measure),D1.clearMarks(this._marks.start),D1.clearMarks(this._marks.end),D1.clearMeasures(this._marks.measure)),y},i.Actor=$w,i.AlphaImage=Dv,i.CanonicalTileID=ab,i.CollisionBoxArray=Va,i.Color=cs,i.DEMData=vy,i.DataConstantProperty=Me,i.DictionaryCoder=r6,i.EXTENT=Ci,i.ErrorEvent=da,i.EvaluationParameters=Gn,i.Event=Wo,i.Evented=Wn,i.FeatureIndex=py,i.FillBucket=mp,i.FillExtrusionBucket=Vp,i.ImageAtlas=gg,i.ImagePosition=Zf,i.LineBucket=sh,i.LngLat=Hc,i.LngLatBounds=lh,i.MercatorCoordinate=nb,i.ONE_EM=Tn,i.OverscaledTileID=uh,i.Point=u,i.Point$1=u,i.Properties=Nr,i.Protobuf=Xa,i.RGBAImage=Sh,i.RequestManager=$e,i.RequestPerformance=aee,i.ResourceType=ra,i.SegmentVector=io,i.SourceFeatureState=Qw,i.StructArrayLayout1ui2=ba,i.StructArrayLayout2f1f2i16=Vi,i.StructArrayLayout2i4=Yr,i.StructArrayLayout3ui6=pn,i.StructArrayLayout4i8=xi,i.SymbolBucket=Pu,i.Texture=ib,i.Tile=Zh,i.Transitionable=Uo,i.Uniform1f=Qe,i.Uniform1i=Ve,i.Uniform2f=at,i.Uniform3f=Ct,i.Uniform4f=Ot,i.UniformColor=Rt,i.UniformMatrix4f=Dt,i.UnwrappedTileID=ree,i.ValidationError=_a,i.WritingMode=uv,i.ZoomHistory=kt,i.add=Rv,i.addDynamicAttributes=dO,i.asyncAll=E,i.bezier=_,i.bindAll=O,i.browser=ut,i.cacheEntryPossiblyAdded=ji,i.clamp=p,i.clearTileCache=wi,i.clipLine=BQ,i.clone=w1,i.clone$1=j,i.clone$2=kw,i.collisionCircleLayout=Lt,i.config=pt,i.create=b1,i.create$1=jh,i.create$2=hg,i.createCommonjsModule=a,i.createExpression=oo,i.createLayout=Wi,i.createStyleLayer=ett,i.cross=B9,i.deepEqual=h,i.dot=q9,i.dot$1=H9,i.ease=b,i.emitValidationErrors=Lu,i.endsWith=V,i.enforceCacheSizeLimit=In,i.evaluateSizeForFeature=IQ,i.evaluateSizeForZoom=RQ,i.evaluateVariableOffset=VQ,i.evented=Ia,i.extend=x,i.featureFilter=be,i.filterObject=Z,i.fromRotation=im,i.getAnchorAlignment=SS,i.getAnchorJustification=hO,i.getArrayBuffer=ri,i.getImage=ua,i.getJSON=Xr,i.getRTLTextPluginStatus=yo,i.getReferrer=Ut,i.getVideo=ma,i.identity=sy,i.invert=nm,i.isChar=nt,i.isMapboxURL=St,i.keysDifference=L,i.makeRequest=Er,i.mapObject=G,i.mercatorXfromLng=QQ,i.mercatorYfromLat=eee,i.mercatorZfromAltitude=tee,i.mul=Nx,i.multiply=am,i.mvt=pg,i.nextPowerOfTwo=T,i.normalize=Ux,i.number=nl,i.offscreenCanvasSupported=Fn,i.ortho=wu,i.parseGlyphPBF=v0,i.pbf=Xa,i.performSymbolLayout=Det,i.perspective=T1,i.pick=C,i.plugin=Ms,i.polygonIntersectsPolygon=so,i.postMapLoadEvent=je,i.postTurnstileEvent=Ge,i.potpack=ep,i.refProperties=utt,i.register=X,i.registerForPluginStateChange=Da,i.renderColorRamp=Hx,i.rotate=oy,i.rotateX=Sd,i.rotateZ=ly,i.scale=eu,i.scale$1=G9,i.scale$2=Cw,i.setCacheLimits=On,i.setRTLTextPlugin=go,i.sphericalToCartesian=Ee,i.sqrLen=aS,i.styleSpec=Rn,i.sub=U9,i.symbolSize=Aet,i.transformMat3=N9,i.transformMat4=uy,i.translate=vc,i.triggerPluginCompletionEvent=ia,i.uniqueId=g,i.validateCustomStyleLayer=Jet,i.validateLight=To,i.validateStyle=wo,i.values=S,i.vectorTile=pg,i.version=o,i.warnOnce=te,i.webpSupported=Zt,i.window=f,i.wrap=k}),n(["./shared"],function(i){"use strict";function a(Ut){var wt=typeof Ut;if(wt==="number"||wt==="boolean"||wt==="string"||Ut===void 0||Ut===null)return JSON.stringify(Ut);if(Array.isArray(Ut)){for(var rr="[",nr=0,Er=Ut;nr<Er.length;nr+=1){var Xr=Er[nr];rr+=a(Xr)+","}return rr+"]"}for(var ri=Object.keys(Ut).sort(),Qr="{",Oi=0;Oi<ri.length;Oi++)Qr+=JSON.stringify(ri[Oi])+":"+a(Ut[ri[Oi]])+",";return Qr+"}"}function o(Ut){for(var wt="",rr=0,nr=i.refProperties;rr<nr.length;rr+=1){var Er=nr[rr];wt+="/"+a(Ut[Er])}return wt}function s(Ut,wt){for(var rr={},nr=0;nr<Ut.length;nr++){var Er=wt&&wt[Ut[nr].id]||o(Ut[nr]);wt&&(wt[Ut[nr].id]=Er);var Xr=rr[Er];Xr||(Xr=rr[Er]=[]),Xr.push(Ut[nr])}var ri=[];for(var Qr in rr)ri.push(rr[Qr]);return ri}var l=function(wt){this.keyCache={},wt&&this.replace(wt)};l.prototype.replace=function(wt){this._layerConfigs={},this._layers={},this.update(wt,[])},l.prototype.update=function(wt,rr){for(var nr=this,Er=0,Xr=wt;Er<Xr.length;Er+=1){var ri=Xr[Er];this._layerConfigs[ri.id]=ri;var Qr=this._layers[ri.id]=i.createStyleLayer(ri);Qr._featureFilter=i.featureFilter(Qr.filter),this.keyCache[ri.id]&&delete this.keyCache[ri.id]}for(var Oi=0,$i=rr;Oi<$i.length;Oi+=1){var tn=$i[Oi];delete this.keyCache[tn],delete this._layerConfigs[tn],delete this._layers[tn]}this.familiesBySource={};for(var fn=s(i.values(this._layerConfigs),this.keyCache),yn=0,Sn=fn;yn<Sn.length;yn+=1){var Ba=Sn[yn],ua=Ba.map(function(Wn){return nr._layers[Wn.id]}),ma=ua[0];if(ma.visibility!=="none"){var Wa=ma.source||"",Fa=this.familiesBySource[Wa];Fa||(Fa=this.familiesBySource[Wa]={});var Wo=ma.sourceLayer||"_geojsonTileLayer",da=Fa[Wo];da||(da=Fa[Wo]=[]),da.push(ua)}}};var u=1,c=function(wt){var rr={},nr=[];for(var Er in wt){var Xr=wt[Er],ri=rr[Er]={};for(var Qr in Xr){var Oi=Xr[+Qr];if(!(!Oi||Oi.bitmap.width===0||Oi.bitmap.height===0)){var $i={x:0,y:0,w:Oi.bitmap.width+2*u,h:Oi.bitmap.height+2*u};nr.push($i),ri[Qr]={rect:$i,metrics:Oi.metrics}}}}var tn=i.potpack(nr),fn=tn.w,yn=tn.h,Sn=new i.AlphaImage({width:fn||1,height:yn||1});for(var Ba in wt){var ua=wt[Ba];for(var ma in ua){var Wa=ua[+ma];if(!(!Wa||Wa.bitmap.width===0||Wa.bitmap.height===0)){var Fa=rr[Ba][ma].rect;i.AlphaImage.copy(Wa.bitmap,Sn,{x:0,y:0},{x:Fa.x+u,y:Fa.y+u},Wa.bitmap)}}}this.image=Sn,this.positions=rr};i.register("GlyphAtlas",c);var f=function(wt){this.tileID=new i.OverscaledTileID(wt.tileID.overscaledZ,wt.tileID.wrap,wt.tileID.canonical.z,wt.tileID.canonical.x,wt.tileID.canonical.y),this.uid=wt.uid,this.zoom=wt.zoom,this.pixelRatio=wt.pixelRatio,this.tileSize=wt.tileSize,this.source=wt.source,this.overscaling=this.tileID.overscaleFactor(),this.showCollisionBoxes=wt.showCollisionBoxes,this.collectResourceTiming=!!wt.collectResourceTiming,this.returnDependencies=!!wt.returnDependencies,this.promoteId=wt.promoteId};f.prototype.parse=function(wt,rr,nr,Er,Xr){var ri=this;this.status="parsing",this.data=wt,this.collisionBoxArray=new i.CollisionBoxArray;var Qr=new i.DictionaryCoder(Object.keys(wt.layers).sort()),Oi=new i.FeatureIndex(this.tileID,this.promoteId);Oi.bucketLayerIDs=[];var $i={},tn={featureIndex:Oi,iconDependencies:{},patternDependencies:{},glyphDependencies:{},availableImages:nr},fn=rr.familiesBySource[this.source];for(var yn in fn){var Sn=wt.layers[yn];if(Sn){Sn.version===1&&i.warnOnce('Vector tile source "'+this.source+'" layer "'+yn+'" does not use vector tile spec v2 and therefore may have some rendering errors.');for(var Ba=Qr.encode(yn),ua=[],ma=0;ma<Sn.length;ma++){var Wa=Sn.feature(ma),Fa=Oi.getId(Wa,yn);ua.push({feature:Wa,id:Fa,index:ma,sourceLayerIndex:Ba})}for(var Wo=0,da=fn[yn];Wo<da.length;Wo+=1){var Wn=da[Wo],Ha=Wn[0];if(!(Ha.minzoom&&this.zoom<Math.floor(Ha.minzoom))&&!(Ha.maxzoom&&this.zoom>=Ha.maxzoom)&&Ha.visibility!=="none"){h(Wn,this.zoom,nr);var vo=$i[Ha.id]=Ha.createBucket({index:Oi.bucketLayerIDs.length,layers:Wn,zoom:this.zoom,pixelRatio:this.pixelRatio,overscaling:this.overscaling,collisionBoxArray:this.collisionBoxArray,sourceLayerIndex:Ba,sourceID:this.source});vo.populate(ua,tn,this.tileID.canonical),Oi.bucketLayerIDs.push(Wn.map(function(Li){return Li.id}))}}}}var jn,Mt,kr,Jr,vi=i.mapObject(tn.glyphDependencies,function(Li){return Object.keys(Li).map(Number)});Object.keys(vi).length?Er.send("getGlyphs",{uid:this.uid,stacks:vi},function(Li,_n){jn||(jn=Li,Mt=_n,Mn.call(ri))}):Mt={};var hn=Object.keys(tn.iconDependencies);hn.length?Er.send("getImages",{icons:hn,source:this.source,tileID:this.tileID,type:"icons"},function(Li,_n){jn||(jn=Li,kr=_n,Mn.call(ri))}):kr={};var An=Object.keys(tn.patternDependencies);An.length?Er.send("getImages",{icons:An,source:this.source,tileID:this.tileID,type:"patterns"},function(Li,_n){jn||(jn=Li,Jr=_n,Mn.call(ri))}):Jr={},Mn.call(this);function Mn(){if(jn)return Xr(jn);if(Mt&&kr&&Jr){var Li=new c(Mt),_n=new i.ImageAtlas(kr,Jr);for(var ya in $i){var Jn=$i[ya];Jn instanceof i.SymbolBucket?(h(Jn.layers,this.zoom,nr),i.performSymbolLayout(Jn,Mt,Li.positions,kr,_n.iconPositions,this.showCollisionBoxes,this.tileID.canonical)):Jn.hasPattern&&(Jn instanceof i.LineBucket||Jn instanceof i.FillBucket||Jn instanceof i.FillExtrusionBucket)&&(h(Jn.layers,this.zoom,nr),Jn.addFeatures(tn,this.tileID.canonical,_n.patternPositions))}this.status="done",Xr(null,{buckets:i.values($i).filter(function(Ma){return!Ma.isEmpty()}),featureIndex:Oi,collisionBoxArray:this.collisionBoxArray,glyphAtlasImage:Li.image,imageAtlas:_n,glyphMap:this.returnDependencies?Mt:null,iconMap:this.returnDependencies?kr:null,glyphPositions:this.returnDependencies?Li.positions:null})}}};function h(Ut,wt,rr){for(var nr=new i.EvaluationParameters(wt),Er=0,Xr=Ut;Er<Xr.length;Er+=1){var ri=Xr[Er];ri.recalculate(nr,rr)}}function d(Ut,wt){var rr=i.getArrayBuffer(Ut.request,function(nr,Er,Xr,ri){nr?wt(nr):Er&&wt(null,{vectorTile:new i.vectorTile.VectorTile(new i.pbf(Er)),rawData:Er,cacheControl:Xr,expires:ri})});return function(){rr.cancel(),wt()}}var v=function(wt,rr,nr,Er){this.actor=wt,this.layerIndex=rr,this.availableImages=nr,this.loadVectorData=Er||d,this.loading={},this.loaded={}};v.prototype.loadTile=function(wt,rr){var nr=this,Er=wt.uid;this.loading||(this.loading={});var Xr=wt&&wt.request&&wt.request.collectResourceTiming?new i.RequestPerformance(wt.request):!1,ri=this.loading[Er]=new f(wt);ri.abort=this.loadVectorData(wt,function(Qr,Oi){if(delete nr.loading[Er],Qr||!Oi)return ri.status="done",nr.loaded[Er]=ri,rr(Qr);var $i=Oi.rawData,tn={};Oi.expires&&(tn.expires=Oi.expires),Oi.cacheControl&&(tn.cacheControl=Oi.cacheControl);var fn={};if(Xr){var yn=Xr.finish();yn&&(fn.resourceTiming=JSON.parse(JSON.stringify(yn)))}ri.vectorTile=Oi.vectorTile,ri.parse(Oi.vectorTile,nr.layerIndex,nr.availableImages,nr.actor,function(Sn,Ba){if(Sn||!Ba)return rr(Sn);rr(null,i.extend({rawTileData:$i.slice(0)},Ba,tn,fn))}),nr.loaded=nr.loaded||{},nr.loaded[Er]=ri})},v.prototype.reloadTile=function(wt,rr){var nr=this,Er=this.loaded,Xr=wt.uid,ri=this;if(Er&&Er[Xr]){var Qr=Er[Xr];Qr.showCollisionBoxes=wt.showCollisionBoxes;var Oi=function($i,tn){var fn=Qr.reloadCallback;fn&&(delete Qr.reloadCallback,Qr.parse(Qr.vectorTile,ri.layerIndex,nr.availableImages,ri.actor,fn)),rr($i,tn)};Qr.status==="parsing"?Qr.reloadCallback=Oi:Qr.status==="done"&&(Qr.vectorTile?Qr.parse(Qr.vectorTile,this.layerIndex,this.availableImages,this.actor,Oi):Oi())}},v.prototype.abortTile=function(wt,rr){var nr=this.loading,Er=wt.uid;nr&&nr[Er]&&nr[Er].abort&&(nr[Er].abort(),delete nr[Er]),rr()},v.prototype.removeTile=function(wt,rr){var nr=this.loaded,Er=wt.uid;nr&&nr[Er]&&delete nr[Er],rr()};var _=i.window.ImageBitmap,b=function(){this.loaded={}};b.prototype.loadTile=function(wt,rr){var nr=wt.uid,Er=wt.encoding,Xr=wt.rawImageData,ri=_&&Xr instanceof _?this.getImageData(Xr):Xr,Qr=new i.DEMData(nr,ri,Er);this.loaded=this.loaded||{},this.loaded[nr]=Qr,rr(null,Qr)},b.prototype.getImageData=function(wt){(!this.offscreenCanvas||!this.offscreenCanvasContext)&&(this.offscreenCanvas=new OffscreenCanvas(wt.width,wt.height),this.offscreenCanvasContext=this.offscreenCanvas.getContext("2d")),this.offscreenCanvas.width=wt.width,this.offscreenCanvas.height=wt.height,this.offscreenCanvasContext.drawImage(wt,0,0,wt.width,wt.height);var rr=this.offscreenCanvasContext.getImageData(-1,-1,wt.width+2,wt.height+2);return this.offscreenCanvasContext.clearRect(0,0,this.offscreenCanvas.width,this.offscreenCanvas.height),new i.RGBAImage({width:rr.width,height:rr.height},rr.data)},b.prototype.removeTile=function(wt){var rr=this.loaded,nr=wt.uid;rr&&rr[nr]&&delete rr[nr]};var p=k;function k(Ut,wt){var rr=Ut&&Ut.type,nr;if(rr==="FeatureCollection")for(nr=0;nr<Ut.features.length;nr++)k(Ut.features[nr],wt);else if(rr==="GeometryCollection")for(nr=0;nr<Ut.geometries.length;nr++)k(Ut.geometries[nr],wt);else if(rr==="Feature")k(Ut.geometry,wt);else if(rr==="Polygon")E(Ut.coordinates,wt);else if(rr==="MultiPolygon")for(nr=0;nr<Ut.coordinates.length;nr++)E(Ut.coordinates[nr],wt);return Ut}function E(Ut,wt){if(Ut.length!==0){S(Ut[0],wt);for(var rr=1;rr<Ut.length;rr++)S(Ut[rr],!wt)}}function S(Ut,wt){for(var rr=0,nr=0,Er=Ut.length,Xr=Er-1;nr<Er;Xr=nr++)rr+=(Ut[nr][0]-Ut[Xr][0])*(Ut[Xr][1]+Ut[nr][1]);rr>=0!=!!wt&&Ut.reverse()}var L=i.vectorTile.VectorTileFeature.prototype.toGeoJSON,x=function(wt){this._feature=wt,this.extent=i.EXTENT,this.type=wt.type,this.properties=wt.tags,"id"in wt&&!isNaN(wt.id)&&(this.id=parseInt(wt.id,10))};x.prototype.loadGeometry=function(){if(this._feature.type===1){for(var wt=[],rr=0,nr=this._feature.geometry;rr<nr.length;rr+=1){var Er=nr[rr];wt.push([new i.Point$1(Er[0],Er[1])])}return wt}else{for(var Xr=[],ri=0,Qr=this._feature.geometry;ri<Qr.length;ri+=1){for(var Oi=Qr[ri],$i=[],tn=0,fn=Oi;tn<fn.length;tn+=1){var yn=fn[tn];$i.push(new i.Point$1(yn[0],yn[1]))}Xr.push($i)}return Xr}},x.prototype.toGeoJSON=function(wt,rr,nr){return L.call(this,wt,rr,nr)};var C=function(wt){this.layers={_geojsonTileLayer:this},this.name="_geojsonTileLayer",this.extent=i.EXTENT,this.length=wt.length,this._features=wt};C.prototype.feature=function(wt){return new x(this._features[wt])};var M=i.vectorTile.VectorTileFeature,g=P;function P(Ut,wt){this.options=wt||{},this.features=Ut,this.length=Ut.length}P.prototype.feature=function(Ut){return new T(this.features[Ut],this.options.extent)};function T(Ut,wt){this.id=typeof Ut.id=="number"?Ut.id:void 0,this.type=Ut.type,this.rawGeometry=Ut.type===1?[Ut.geometry]:Ut.geometry,this.properties=Ut.tags,this.extent=wt||4096}T.prototype.loadGeometry=function(){var Ut=this.rawGeometry;this.geometry=[];for(var wt=0;wt<Ut.length;wt++){for(var rr=Ut[wt],nr=[],Er=0;Er<rr.length;Er++)nr.push(new i.Point$1(rr[Er][0],rr[Er][1]));this.geometry.push(nr)}return this.geometry},T.prototype.bbox=function(){this.geometry||this.loadGeometry();for(var Ut=this.geometry,wt=1/0,rr=-1/0,nr=1/0,Er=-1/0,Xr=0;Xr<Ut.length;Xr++)for(var ri=Ut[Xr],Qr=0;Qr<ri.length;Qr++){var Oi=ri[Qr];wt=Math.min(wt,Oi.x),rr=Math.max(rr,Oi.x),nr=Math.min(nr,Oi.y),Er=Math.max(Er,Oi.y)}return[wt,nr,rr,Er]},T.prototype.toGeoJSON=M.prototype.toGeoJSON;var z=Z,O=Z,V=j,G=g;function Z(Ut){var wt=new i.pbf;return N(Ut,wt),wt.finish()}function j(Ut,wt){wt=wt||{};var rr={};for(var nr in Ut)rr[nr]=new g(Ut[nr].features,wt),rr[nr].name=nr,rr[nr].version=wt.version,rr[nr].extent=wt.extent;return Z({layers:rr})}function N(Ut,wt){for(var rr in Ut.layers)wt.writeMessage(3,H,Ut.layers[rr])}function H(Ut,wt){wt.writeVarintField(15,Ut.version||1),wt.writeStringField(1,Ut.name||""),wt.writeVarintField(5,Ut.extent||4096);var rr,nr={keys:[],values:[],keycache:{},valuecache:{}};for(rr=0;rr<Ut.length;rr++)nr.feature=Ut.feature(rr),wt.writeMessage(2,te,nr);var Er=nr.keys;for(rr=0;rr<Er.length;rr++)wt.writeStringField(3,Er[rr]);var Xr=nr.values;for(rr=0;rr<Xr.length;rr++)wt.writeMessage(4,me,Xr[rr])}function te(Ut,wt){var rr=Ut.feature;rr.id!==void 0&&wt.writeVarintField(1,rr.id),wt.writeMessage(2,oe,Ut),wt.writeVarintField(3,rr.type),wt.writeMessage(4,Ce,rr)}function oe(Ut,wt){var rr=Ut.feature,nr=Ut.keys,Er=Ut.values,Xr=Ut.keycache,ri=Ut.valuecache;for(var Qr in rr.properties){var Oi=Xr[Qr];typeof Oi=="undefined"&&(nr.push(Qr),Oi=nr.length-1,Xr[Qr]=Oi),wt.writeVarint(Oi);var $i=rr.properties[Qr],tn=typeof $i;tn!=="string"&&tn!=="boolean"&&tn!=="number"&&($i=JSON.stringify($i));var fn=tn+":"+$i,yn=ri[fn];typeof yn=="undefined"&&(Er.push($i),yn=Er.length-1,ri[fn]=yn),wt.writeVarint(yn)}}function _e(Ut,wt){return(wt<<3)+(Ut&7)}function Ee(Ut){return Ut<<1^Ut>>31}function Ce(Ut,wt){for(var rr=Ut.loadGeometry(),nr=Ut.type,Er=0,Xr=0,ri=rr.length,Qr=0;Qr<ri;Qr++){var Oi=rr[Qr],$i=1;nr===1&&($i=Oi.length),wt.writeVarint(_e(1,$i));for(var tn=nr===3?Oi.length-1:Oi.length,fn=0;fn<tn;fn++){fn===1&&nr!==1&&wt.writeVarint(_e(2,tn-1));var yn=Oi[fn].x-Er,Sn=Oi[fn].y-Xr;wt.writeVarint(Ee(yn)),wt.writeVarint(Ee(Sn)),Er+=yn,Xr+=Sn}nr===3&&wt.writeVarint(_e(7,1))}}function me(Ut,wt){var rr=typeof Ut;rr==="string"?wt.writeStringField(1,Ut):rr==="boolean"?wt.writeBooleanField(7,Ut):rr==="number"&&(Ut%1!==0?wt.writeDoubleField(3,Ut):Ut<0?wt.writeSVarintField(6,Ut):wt.writeVarintField(5,Ut))}z.fromVectorTileJs=O,z.fromGeojsonVt=V,z.GeoJSONWrapper=G;function ie(Ut,wt,rr,nr,Er,Xr){if(!(Er-nr<=rr)){var ri=nr+Er>>1;Se(Ut,wt,ri,nr,Er,Xr%2),ie(Ut,wt,rr,nr,ri-1,Xr+1),ie(Ut,wt,rr,ri+1,Er,Xr+1)}}function Se(Ut,wt,rr,nr,Er,Xr){for(;Er>nr;){if(Er-nr>600){var ri=Er-nr+1,Qr=rr-nr+1,Oi=Math.log(ri),$i=.5*Math.exp(2*Oi/3),tn=.5*Math.sqrt(Oi*$i*(ri-$i)/ri)*(Qr-ri/2<0?-1:1),fn=Math.max(nr,Math.floor(rr-Qr*$i/ri+tn)),yn=Math.min(Er,Math.floor(rr+(ri-Qr)*$i/ri+tn));Se(Ut,wt,rr,fn,yn,Xr)}var Sn=wt[2*rr+Xr],Ba=nr,ua=Er;for(Le(Ut,wt,nr,rr),wt[2*Er+Xr]>Sn&&Le(Ut,wt,nr,Er);Ba<ua;){for(Le(Ut,wt,Ba,ua),Ba++,ua--;wt[2*Ba+Xr]<Sn;)Ba++;for(;wt[2*ua+Xr]>Sn;)ua--}wt[2*nr+Xr]===Sn?Le(Ut,wt,nr,ua):(ua++,Le(Ut,wt,ua,Er)),ua<=rr&&(nr=ua+1),rr<=ua&&(Er=ua-1)}}function Le(Ut,wt,rr,nr){Ae(Ut,rr,nr),Ae(wt,2*rr,2*nr),Ae(wt,2*rr+1,2*nr+1)}function Ae(Ut,wt,rr){var nr=Ut[wt];Ut[wt]=Ut[rr],Ut[rr]=nr}function Fe(Ut,wt,rr,nr,Er,Xr,ri){for(var Qr=[0,Ut.length-1,0],Oi=[],$i,tn;Qr.length;){var fn=Qr.pop(),yn=Qr.pop(),Sn=Qr.pop();if(yn-Sn<=ri){for(var Ba=Sn;Ba<=yn;Ba++)$i=wt[2*Ba],tn=wt[2*Ba+1],$i>=rr&&$i<=Er&&tn>=nr&&tn<=Xr&&Oi.push(Ut[Ba]);continue}var ua=Math.floor((Sn+yn)/2);$i=wt[2*ua],tn=wt[2*ua+1],$i>=rr&&$i<=Er&&tn>=nr&&tn<=Xr&&Oi.push(Ut[ua]);var ma=(fn+1)%2;(fn===0?rr<=$i:nr<=tn)&&(Qr.push(Sn),Qr.push(ua-1),Qr.push(ma)),(fn===0?Er>=$i:Xr>=tn)&&(Qr.push(ua+1),Qr.push(yn),Qr.push(ma))}return Oi}function Pe(Ut,wt,rr,nr,Er,Xr){for(var ri=[0,Ut.length-1,0],Qr=[],Oi=Er*Er;ri.length;){var $i=ri.pop(),tn=ri.pop(),fn=ri.pop();if(tn-fn<=Xr){for(var yn=fn;yn<=tn;yn++)ge(wt[2*yn],wt[2*yn+1],rr,nr)<=Oi&&Qr.push(Ut[yn]);continue}var Sn=Math.floor((fn+tn)/2),Ba=wt[2*Sn],ua=wt[2*Sn+1];ge(Ba,ua,rr,nr)<=Oi&&Qr.push(Ut[Sn]);var ma=($i+1)%2;($i===0?rr-Er<=Ba:nr-Er<=ua)&&(ri.push(fn),ri.push(Sn-1),ri.push(ma)),($i===0?rr+Er>=Ba:nr+Er>=ua)&&(ri.push(Sn+1),ri.push(tn),ri.push(ma))}return Qr}function ge(Ut,wt,rr,nr){var Er=Ut-rr,Xr=wt-nr;return Er*Er+Xr*Xr}var Re=function(Ut){return Ut[0]},ce=function(Ut){return Ut[1]},Ze=function(wt,rr,nr,Er,Xr){rr===void 0&&(rr=Re),nr===void 0&&(nr=ce),Er===void 0&&(Er=64),Xr===void 0&&(Xr=Float64Array),this.nodeSize=Er,this.points=wt;for(var ri=wt.length<65536?Uint16Array:Uint32Array,Qr=this.ids=new ri(wt.length),Oi=this.coords=new Xr(wt.length*2),$i=0;$i<wt.length;$i++)Qr[$i]=$i,Oi[2*$i]=rr(wt[$i]),Oi[2*$i+1]=nr(wt[$i]);ie(Qr,Oi,Er,0,Qr.length-1,0)};Ze.prototype.range=function(wt,rr,nr,Er){return Fe(this.ids,this.coords,wt,rr,nr,Er,this.nodeSize)},Ze.prototype.within=function(wt,rr,nr){return Pe(this.ids,this.coords,wt,rr,nr,this.nodeSize)};var ut={minZoom:0,maxZoom:16,minPoints:2,radius:40,extent:512,nodeSize:64,log:!1,generateId:!1,reduce:null,map:function(Ut){return Ut}},pt=function(wt){this.options=cr(Object.create(ut),wt),this.trees=new Array(this.options.maxZoom+1)};pt.prototype.load=function(wt){var rr=this.options,nr=rr.log,Er=rr.minZoom,Xr=rr.maxZoom,ri=rr.nodeSize;nr&&console.time("total time");var Qr="prepare "+wt.length+" points";nr&&console.time(Qr),this.points=wt;for(var Oi=[],$i=0;$i<wt.length;$i++)wt[$i].geometry&&Oi.push(st(wt[$i],$i));this.trees[Xr+1]=new Ze(Oi,$e,St,ri,Float32Array),nr&&console.timeEnd(Qr);for(var tn=Xr;tn>=Er;tn--){var fn=+Date.now();Oi=this._cluster(Oi,tn),this.trees[tn]=new Ze(Oi,$e,St,ri,Float32Array),nr&&console.log("z%d: %d clusters in %dms",tn,Oi.length,+Date.now()-fn)}return nr&&console.timeEnd("total time"),this},pt.prototype.getClusters=function(wt,rr){var nr=((wt[0]+180)%360+360)%360-180,Er=Math.max(-90,Math.min(90,wt[1])),Xr=wt[2]===180?180:((wt[2]+180)%360+360)%360-180,ri=Math.max(-90,Math.min(90,wt[3]));if(wt[2]-wt[0]>=360)nr=-180,Xr=180;else if(nr>Xr){var Qr=this.getClusters([nr,Er,180,ri],rr),Oi=this.getClusters([-180,Er,Xr,ri],rr);return Qr.concat(Oi)}for(var $i=this.trees[this._limitZoom(rr)],tn=$i.range(Nt(nr),Jt(ri),Nt(Xr),Jt(Er)),fn=[],yn=0,Sn=tn;yn<Sn.length;yn+=1){var Ba=Sn[yn],ua=$i.points[Ba];fn.push(ua.numPoints?lt(ua):this.points[ua.index])}return fn},pt.prototype.getChildren=function(wt){var rr=this._getOriginId(wt),nr=this._getOriginZoom(wt),Er="No cluster with the specified id.",Xr=this.trees[nr];if(!Xr)throw new Error(Er);var ri=Xr.points[rr];if(!ri)throw new Error(Er);for(var Qr=this.options.radius/(this.options.extent*Math.pow(2,nr-1)),Oi=Xr.within(ri.x,ri.y,Qr),$i=[],tn=0,fn=Oi;tn<fn.length;tn+=1){var yn=fn[tn],Sn=Xr.points[yn];Sn.parentId===wt&&$i.push(Sn.numPoints?lt(Sn):this.points[Sn.index])}if($i.length===0)throw new Error(Er);return $i},pt.prototype.getLeaves=function(wt,rr,nr){rr=rr||10,nr=nr||0;var Er=[];return this._appendLeaves(Er,wt,rr,nr,0),Er},pt.prototype.getTile=function(wt,rr,nr){var Er=this.trees[this._limitZoom(wt)],Xr=Math.pow(2,wt),ri=this.options,Qr=ri.extent,Oi=ri.radius,$i=Oi/Qr,tn=(nr-$i)/Xr,fn=(nr+1+$i)/Xr,yn={features:[]};return this._addTileFeatures(Er.range((rr-$i)/Xr,tn,(rr+1+$i)/Xr,fn),Er.points,rr,nr,Xr,yn),rr===0&&this._addTileFeatures(Er.range(1-$i/Xr,tn,1,fn),Er.points,Xr,nr,Xr,yn),rr===Xr-1&&this._addTileFeatures(Er.range(0,tn,$i/Xr,fn),Er.points,-1,nr,Xr,yn),yn.features.length?yn:null},pt.prototype.getClusterExpansionZoom=function(wt){for(var rr=this._getOriginZoom(wt)-1;rr<=this.options.maxZoom;){var nr=this.getChildren(wt);if(rr++,nr.length!==1)break;wt=nr[0].properties.cluster_id}return rr},pt.prototype._appendLeaves=function(wt,rr,nr,Er,Xr){for(var ri=this.getChildren(rr),Qr=0,Oi=ri;Qr<Oi.length;Qr+=1){var $i=Oi[Qr],tn=$i.properties;if(tn&&tn.cluster?Xr+tn.point_count<=Er?Xr+=tn.point_count:Xr=this._appendLeaves(wt,tn.cluster_id,nr,Er,Xr):Xr<Er?Xr++:wt.push($i),wt.length===nr)break}return Xr},pt.prototype._addTileFeatures=function(wt,rr,nr,Er,Xr,ri){for(var Qr=0,Oi=wt;Qr<Oi.length;Qr+=1){var $i=Oi[Qr],tn=rr[$i],fn=tn.numPoints,yn={type:1,geometry:[[Math.round(this.options.extent*(tn.x*Xr-nr)),Math.round(this.options.extent*(tn.y*Xr-Er))]],tags:fn?Gt(tn):this.points[tn.index].properties},Sn=void 0;fn?Sn=tn.id:this.options.generateId?Sn=tn.index:this.points[tn.index].id&&(Sn=this.points[tn.index].id),Sn!==void 0&&(yn.id=Sn),ri.features.push(yn)}},pt.prototype._limitZoom=function(wt){return Math.max(this.options.minZoom,Math.min(+wt,this.options.maxZoom+1))},pt.prototype._cluster=function(wt,rr){for(var nr=[],Er=this.options,Xr=Er.radius,ri=Er.extent,Qr=Er.reduce,Oi=Er.minPoints,$i=Xr/(ri*Math.pow(2,rr)),tn=0;tn<wt.length;tn++){var fn=wt[tn];if(!(fn.zoom<=rr)){fn.zoom=rr;for(var yn=this.trees[rr+1],Sn=yn.within(fn.x,fn.y,$i),Ba=fn.numPoints||1,ua=Ba,ma=0,Wa=Sn;ma<Wa.length;ma+=1){var Fa=Wa[ma],Wo=yn.points[Fa];Wo.zoom>rr&&(ua+=Wo.numPoints||1)}if(ua>=Oi){for(var da=fn.x*Ba,Wn=fn.y*Ba,Ha=Qr&&Ba>1?this._map(fn,!0):null,vo=(tn<<5)+(rr+1)+this.points.length,jn=0,Mt=Sn;jn<Mt.length;jn+=1){var kr=Mt[jn],Jr=yn.points[kr];if(!(Jr.zoom<=rr)){Jr.zoom=rr;var vi=Jr.numPoints||1;da+=Jr.x*vi,Wn+=Jr.y*vi,Jr.parentId=vo,Qr&&(Ha||(Ha=this._map(fn,!0)),Qr(Ha,this._map(Jr)))}}fn.parentId=vo,nr.push(Zt(da/ua,Wn/ua,vo,ua,Ha))}else if(nr.push(fn),ua>1)for(var hn=0,An=Sn;hn<An.length;hn+=1){var Mn=An[hn],Li=yn.points[Mn];Li.zoom<=rr||(Li.zoom=rr,nr.push(Li))}}}return nr},pt.prototype._getOriginId=function(wt){return wt-this.points.length>>5},pt.prototype._getOriginZoom=function(wt){return(wt-this.points.length)%32},pt.prototype._map=function(wt,rr){if(wt.numPoints)return rr?cr({},wt.properties):wt.properties;var nr=this.points[wt.index].properties,Er=this.options.map(nr);return rr&&Er===nr?cr({},Er):Er};function Zt(Ut,wt,rr,nr,Er){return{x:Ut,y:wt,zoom:1/0,id:rr,parentId:-1,numPoints:nr,properties:Er}}function st(Ut,wt){var rr=Ut.geometry.coordinates,nr=rr[0],Er=rr[1];return{x:Nt(nr),y:Jt(Er),zoom:1/0,index:wt,parentId:-1}}function lt(Ut){return{type:"Feature",id:Ut.id,properties:Gt(Ut),geometry:{type:"Point",coordinates:[sr(Ut.x),wr(Ut.y)]}}}function Gt(Ut){var wt=Ut.numPoints,rr=wt>=1e4?Math.round(wt/1e3)+"k":wt>=1e3?Math.round(wt/100)/10+"k":wt;return cr(cr({},Ut.properties),{cluster:!0,cluster_id:Ut.id,point_count:wt,point_count_abbreviated:rr})}function Nt(Ut){return Ut/360+.5}function Jt(Ut){var wt=Math.sin(Ut*Math.PI/180),rr=.5-.25*Math.log((1+wt)/(1-wt))/Math.PI;return rr<0?0:rr>1?1:rr}function sr(Ut){return(Ut-.5)*360}function wr(Ut){var wt=(180-Ut*360)*Math.PI/180;return 360*Math.atan(Math.exp(wt))/Math.PI-90}function cr(Ut,wt){for(var rr in wt)Ut[rr]=wt[rr];return Ut}function $e(Ut){return Ut.x}function St(Ut){return Ut.y}function Qt(Ut,wt,rr,nr){for(var Er=nr,Xr=rr-wt>>1,ri=rr-wt,Qr,Oi=Ut[wt],$i=Ut[wt+1],tn=Ut[rr],fn=Ut[rr+1],yn=wt+3;yn<rr;yn+=3){var Sn=Vt(Ut[yn],Ut[yn+1],Oi,$i,tn,fn);if(Sn>Er)Qr=yn,Er=Sn;else if(Sn===Er){var Ba=Math.abs(yn-Xr);Ba<ri&&(Qr=yn,ri=Ba)}}Er>nr&&(Qr-wt>3&&Qt(Ut,wt,Qr,nr),Ut[Qr+2]=Er,rr-Qr>3&&Qt(Ut,Qr,rr,nr))}function Vt(Ut,wt,rr,nr,Er,Xr){var ri=Er-rr,Qr=Xr-nr;if(ri!==0||Qr!==0){var Oi=((Ut-rr)*ri+(wt-nr)*Qr)/(ri*ri+Qr*Qr);Oi>1?(rr=Er,nr=Xr):Oi>0&&(rr+=ri*Oi,nr+=Qr*Oi)}return ri=Ut-rr,Qr=wt-nr,ri*ri+Qr*Qr}function _t(Ut,wt,rr,nr){var Er={id:typeof Ut=="undefined"?null:Ut,type:wt,geometry:rr,tags:nr,minX:1/0,minY:1/0,maxX:-1/0,maxY:-1/0};return It(Er),Er}function It(Ut){var wt=Ut.geometry,rr=Ut.type;if(rr==="Point"||rr==="MultiPoint"||rr==="LineString")mt(Ut,wt);else if(rr==="Polygon"||rr==="MultiLineString")for(var nr=0;nr<wt.length;nr++)mt(Ut,wt[nr]);else if(rr==="MultiPolygon")for(nr=0;nr<wt.length;nr++)for(var Er=0;Er<wt[nr].length;Er++)mt(Ut,wt[nr][Er])}function mt(Ut,wt){for(var rr=0;rr<wt.length;rr+=3)Ut.minX=Math.min(Ut.minX,wt[rr]),Ut.minY=Math.min(Ut.minY,wt[rr+1]),Ut.maxX=Math.max(Ut.maxX,wt[rr]),Ut.maxY=Math.max(Ut.maxY,wt[rr+1])}function er(Ut,wt){var rr=[];if(Ut.type==="FeatureCollection")for(var nr=0;nr<Ut.features.length;nr++)lr(rr,Ut.features[nr],wt,nr);else Ut.type==="Feature"?lr(rr,Ut,wt):lr(rr,{geometry:Ut},wt);return rr}function lr(Ut,wt,rr,nr){if(wt.geometry){var Er=wt.geometry.coordinates,Xr=wt.geometry.type,ri=Math.pow(rr.tolerance/((1<<rr.maxZoom)*rr.extent),2),Qr=[],Oi=wt.id;if(rr.promoteId?Oi=wt.properties[rr.promoteId]:rr.generateId&&(Oi=nr||0),Xr==="Point")Tr(Er,Qr);else if(Xr==="MultiPoint")for(var $i=0;$i<Er.length;$i++)Tr(Er[$i],Qr);else if(Xr==="LineString")Lr(Er,Qr,ri,!1);else if(Xr==="MultiLineString")if(rr.lineMetrics){for($i=0;$i<Er.length;$i++)Qr=[],Lr(Er[$i],Qr,ri,!1),Ut.push(_t(Oi,"LineString",Qr,wt.properties));return}else ti(Er,Qr,ri,!1);else if(Xr==="Polygon")ti(Er,Qr,ri,!0);else if(Xr==="MultiPolygon")for($i=0;$i<Er.length;$i++){var tn=[];ti(Er[$i],tn,ri,!0),Qr.push(tn)}else if(Xr==="GeometryCollection"){for($i=0;$i<wt.geometry.geometries.length;$i++)lr(Ut,{id:Oi,geometry:wt.geometry.geometries[$i],properties:wt.properties},rr,nr);return}else throw new Error("Input data is not a valid GeoJSON object.");Ut.push(_t(Oi,Xr,Qr,wt.properties))}}function Tr(Ut,wt){wt.push(Br(Ut[0])),wt.push(Vr(Ut[1])),wt.push(0)}function Lr(Ut,wt,rr,nr){for(var Er,Xr,ri=0,Qr=0;Qr<Ut.length;Qr++){var Oi=Br(Ut[Qr][0]),$i=Vr(Ut[Qr][1]);wt.push(Oi),wt.push($i),wt.push(0),Qr>0&&(nr?ri+=(Er*$i-Oi*Xr)/2:ri+=Math.sqrt(Math.pow(Oi-Er,2)+Math.pow($i-Xr,2))),Er=Oi,Xr=$i}var tn=wt.length-3;wt[2]=1,Qt(wt,0,tn,rr),wt[tn+2]=1,wt.size=Math.abs(ri),wt.start=0,wt.end=wt.size}function ti(Ut,wt,rr,nr){for(var Er=0;Er<Ut.length;Er++){var Xr=[];Lr(Ut[Er],Xr,rr,nr),wt.push(Xr)}}function Br(Ut){return Ut/360+.5}function Vr(Ut){var wt=Math.sin(Ut*Math.PI/180),rr=.5-.25*Math.log((1+wt)/(1-wt))/Math.PI;return rr<0?0:rr>1?1:rr}function dt(Ut,wt,rr,nr,Er,Xr,ri,Qr){if(rr/=wt,nr/=wt,Xr>=rr&&ri<nr)return Ut;if(ri<rr||Xr>=nr)return null;for(var Oi=[],$i=0;$i<Ut.length;$i++){var tn=Ut[$i],fn=tn.geometry,yn=tn.type,Sn=Er===0?tn.minX:tn.minY,Ba=Er===0?tn.maxX:tn.maxY;if(Sn>=rr&&Ba<nr){Oi.push(tn);continue}else if(Ba<rr||Sn>=nr)continue;var ua=[];if(yn==="Point"||yn==="MultiPoint")Ge(fn,ua,rr,nr,Er);else if(yn==="LineString")Je(fn,ua,rr,nr,Er,!1,Qr.lineMetrics);else if(yn==="MultiLineString")tt(fn,ua,rr,nr,Er,!1);else if(yn==="Polygon")tt(fn,ua,rr,nr,Er,!0);else if(yn==="MultiPolygon")for(var ma=0;ma<fn.length;ma++){var Wa=[];tt(fn[ma],Wa,rr,nr,Er,!0),Wa.length&&ua.push(Wa)}if(ua.length){if(Qr.lineMetrics&&yn==="LineString"){for(ma=0;ma<ua.length;ma++)Oi.push(_t(tn.id,yn,ua[ma],tn.tags));continue}(yn==="LineString"||yn==="MultiLineString")&&(ua.length===1?(yn="LineString",ua=ua[0]):yn="MultiLineString"),(yn==="Point"||yn==="MultiPoint")&&(yn=ua.length===3?"Point":"MultiPoint"),Oi.push(_t(tn.id,yn,ua,tn.tags))}}return Oi.length?Oi:null}function Ge(Ut,wt,rr,nr,Er){for(var Xr=0;Xr<Ut.length;Xr+=3){var ri=Ut[Xr+Er];ri>=rr&&ri<=nr&&(wt.push(Ut[Xr]),wt.push(Ut[Xr+1]),wt.push(Ut[Xr+2]))}}function Je(Ut,wt,rr,nr,Er,Xr,ri){for(var Qr=je(Ut),Oi=Er===0?Ie:xe,$i=Ut.start,tn,fn,yn=0;yn<Ut.length-3;yn+=3){var Sn=Ut[yn],Ba=Ut[yn+1],ua=Ut[yn+2],ma=Ut[yn+3],Wa=Ut[yn+4],Fa=Er===0?Sn:Ba,Wo=Er===0?ma:Wa,da=!1;ri&&(tn=Math.sqrt(Math.pow(Sn-ma,2)+Math.pow(Ba-Wa,2))),Fa<rr?Wo>rr&&(fn=Oi(Qr,Sn,Ba,ma,Wa,rr),ri&&(Qr.start=$i+tn*fn)):Fa>nr?Wo<nr&&(fn=Oi(Qr,Sn,Ba,ma,Wa,nr),ri&&(Qr.start=$i+tn*fn)):xt(Qr,Sn,Ba,ua),Wo<rr&&Fa>=rr&&(fn=Oi(Qr,Sn,Ba,ma,Wa,rr),da=!0),Wo>nr&&Fa<=nr&&(fn=Oi(Qr,Sn,Ba,ma,Wa,nr),da=!0),!Xr&&da&&(ri&&(Qr.end=$i+tn*fn),wt.push(Qr),Qr=je(Ut)),ri&&($i+=tn)}var Wn=Ut.length-3;Sn=Ut[Wn],Ba=Ut[Wn+1],ua=Ut[Wn+2],Fa=Er===0?Sn:Ba,Fa>=rr&&Fa<=nr&&xt(Qr,Sn,Ba,ua),Wn=Qr.length-3,Xr&&Wn>=3&&(Qr[Wn]!==Qr[0]||Qr[Wn+1]!==Qr[1])&&xt(Qr,Qr[0],Qr[1],Qr[2]),Qr.length&&wt.push(Qr)}function je(Ut){var wt=[];return wt.size=Ut.size,wt.start=Ut.start,wt.end=Ut.end,wt}function tt(Ut,wt,rr,nr,Er,Xr){for(var ri=0;ri<Ut.length;ri++)Je(Ut[ri],wt,rr,nr,Er,Xr,!1)}function xt(Ut,wt,rr,nr){Ut.push(wt),Ut.push(rr),Ut.push(nr)}function Ie(Ut,wt,rr,nr,Er,Xr){var ri=(Xr-wt)/(nr-wt);return Ut.push(Xr),Ut.push(rr+(Er-rr)*ri),Ut.push(1),ri}function xe(Ut,wt,rr,nr,Er,Xr){var ri=(Xr-rr)/(Er-rr);return Ut.push(wt+(nr-wt)*ri),Ut.push(Xr),Ut.push(1),ri}function ke(Ut,wt){var rr=wt.buffer/wt.extent,nr=Ut,Er=dt(Ut,1,-1-rr,rr,0,-1,2,wt),Xr=dt(Ut,1,1-rr,2+rr,0,-1,2,wt);return(Er||Xr)&&(nr=dt(Ut,1,-rr,1+rr,0,-1,2,wt)||[],Er&&(nr=vt(Er,1).concat(nr)),Xr&&(nr=nr.concat(vt(Xr,-1)))),nr}function vt(Ut,wt){for(var rr=[],nr=0;nr<Ut.length;nr++){var Er=Ut[nr],Xr=Er.type,ri;if(Xr==="Point"||Xr==="MultiPoint"||Xr==="LineString")ri=ir(Er.geometry,wt);else if(Xr==="MultiLineString"||Xr==="Polygon"){ri=[];for(var Qr=0;Qr<Er.geometry.length;Qr++)ri.push(ir(Er.geometry[Qr],wt))}else if(Xr==="MultiPolygon")for(ri=[],Qr=0;Qr<Er.geometry.length;Qr++){for(var Oi=[],$i=0;$i<Er.geometry[Qr].length;$i++)Oi.push(ir(Er.geometry[Qr][$i],wt));ri.push(Oi)}rr.push(_t(Er.id,Xr,ri,Er.tags))}return rr}function ir(Ut,wt){var rr=[];rr.size=Ut.size,Ut.start!==void 0&&(rr.start=Ut.start,rr.end=Ut.end);for(var nr=0;nr<Ut.length;nr+=3)rr.push(Ut[nr]+wt,Ut[nr+1],Ut[nr+2]);return rr}function ar(Ut,wt){if(Ut.transformed)return Ut;var rr=1<<Ut.z,nr=Ut.x,Er=Ut.y,Xr,ri,Qr;for(Xr=0;Xr<Ut.features.length;Xr++){var Oi=Ut.features[Xr],$i=Oi.geometry,tn=Oi.type;if(Oi.geometry=[],tn===1)for(ri=0;ri<$i.length;ri+=2)Oi.geometry.push(vr($i[ri],$i[ri+1],wt,rr,nr,Er));else for(ri=0;ri<$i.length;ri++){var fn=[];for(Qr=0;Qr<$i[ri].length;Qr+=2)fn.push(vr($i[ri][Qr],$i[ri][Qr+1],wt,rr,nr,Er));Oi.geometry.push(fn)}}return Ut.transformed=!0,Ut}function vr(Ut,wt,rr,nr,Er,Xr){return[Math.round(rr*(Ut*nr-Er)),Math.round(rr*(wt*nr-Xr))]}function ii(Ut,wt,rr,nr,Er){for(var Xr=wt===Er.maxZoom?0:Er.tolerance/((1<<wt)*Er.extent),ri={features:[],numPoints:0,numSimplified:0,numFeatures:0,source:null,x:rr,y:nr,z:wt,transformed:!1,minX:2,minY:1,maxX:-1,maxY:0},Qr=0;Qr<Ut.length;Qr++){ri.numFeatures++,pi(ri,Ut[Qr],Xr,Er);var Oi=Ut[Qr].minX,$i=Ut[Qr].minY,tn=Ut[Qr].maxX,fn=Ut[Qr].maxY;Oi<ri.minX&&(ri.minX=Oi),$i<ri.minY&&(ri.minY=$i),tn>ri.maxX&&(ri.maxX=tn),fn>ri.maxY&&(ri.maxY=fn)}return ri}function pi(Ut,wt,rr,nr){var Er=wt.geometry,Xr=wt.type,ri=[];if(Xr==="Point"||Xr==="MultiPoint")for(var Qr=0;Qr<Er.length;Qr+=3)ri.push(Er[Qr]),ri.push(Er[Qr+1]),Ut.numPoints++,Ut.numSimplified++;else if(Xr==="LineString")$r(ri,Er,Ut,rr,!1,!1);else if(Xr==="MultiLineString"||Xr==="Polygon")for(Qr=0;Qr<Er.length;Qr++)$r(ri,Er[Qr],Ut,rr,Xr==="Polygon",Qr===0);else if(Xr==="MultiPolygon")for(var Oi=0;Oi<Er.length;Oi++){var $i=Er[Oi];for(Qr=0;Qr<$i.length;Qr++)$r(ri,$i[Qr],Ut,rr,!0,Qr===0)}if(ri.length){var tn=wt.tags||null;if(Xr==="LineString"&&nr.lineMetrics){tn={};for(var fn in wt.tags)tn[fn]=wt.tags[fn];tn.mapbox_clip_start=Er.start/Er.size,tn.mapbox_clip_end=Er.end/Er.size}var yn={geometry:ri,type:Xr==="Polygon"||Xr==="MultiPolygon"?3:Xr==="LineString"||Xr==="MultiLineString"?2:1,tags:tn};wt.id!==null&&(yn.id=wt.id),Ut.features.push(yn)}}function $r(Ut,wt,rr,nr,Er,Xr){var ri=nr*nr;if(nr>0&&wt.size<(Er?ri:nr)){rr.numPoints+=wt.length/3;return}for(var Qr=[],Oi=0;Oi<wt.length;Oi+=3)(nr===0||wt[Oi+2]>ri)&&(rr.numSimplified++,Qr.push(wt[Oi]),Qr.push(wt[Oi+1])),rr.numPoints++;Er&&di(Qr,Xr),Ut.push(Qr)}function di(Ut,wt){for(var rr=0,nr=0,Er=Ut.length,Xr=Er-2;nr<Er;Xr=nr,nr+=2)rr+=(Ut[nr]-Ut[Xr])*(Ut[nr+1]+Ut[Xr+1]);if(rr>0===wt)for(nr=0,Er=Ut.length;nr<Er/2;nr+=2){var ri=Ut[nr],Qr=Ut[nr+1];Ut[nr]=Ut[Er-2-nr],Ut[nr+1]=Ut[Er-1-nr],Ut[Er-2-nr]=ri,Ut[Er-1-nr]=Qr}}function ji(Ut,wt){return new In(Ut,wt)}function In(Ut,wt){wt=this.options=On(Object.create(this.options),wt);var rr=wt.debug;if(rr&&console.time("preprocess data"),wt.maxZoom<0||wt.maxZoom>24)throw new Error("maxZoom should be in the 0-24 range");if(wt.promoteId&&wt.generateId)throw new Error("promoteId and generateId cannot be used together.");var nr=er(Ut,wt);this.tiles={},this.tileCoords=[],rr&&(console.timeEnd("preprocess data"),console.log("index: maxZoom: %d, maxPoints: %d",wt.indexMaxZoom,wt.indexMaxPoints),console.time("generate tiles"),this.stats={},this.total=0),nr=ke(nr,wt),nr.length&&this.splitTile(nr,0,0,0),rr&&(nr.length&&console.log("features: %d, points: %d",this.tiles[0].numFeatures,this.tiles[0].numPoints),console.timeEnd("generate tiles"),console.log("tiles generated:",this.total,JSON.stringify(this.stats)))}In.prototype.options={maxZoom:14,indexMaxZoom:5,indexMaxPoints:1e5,tolerance:3,extent:4096,buffer:64,lineMetrics:!1,promoteId:null,generateId:!1,debug:0},In.prototype.splitTile=function(Ut,wt,rr,nr,Er,Xr,ri){for(var Qr=[Ut,wt,rr,nr],Oi=this.options,$i=Oi.debug;Qr.length;){nr=Qr.pop(),rr=Qr.pop(),wt=Qr.pop(),Ut=Qr.pop();var tn=1<<wt,fn=wi(wt,rr,nr),yn=this.tiles[fn];if(!yn&&($i>1&&console.time("creation"),yn=this.tiles[fn]=ii(Ut,wt,rr,nr,Oi),this.tileCoords.push({z:wt,x:rr,y:nr}),$i)){$i>1&&(console.log("tile z%d-%d-%d (features: %d, points: %d, simplified: %d)",wt,rr,nr,yn.numFeatures,yn.numPoints,yn.numSimplified),console.timeEnd("creation"));var Sn="z"+wt;this.stats[Sn]=(this.stats[Sn]||0)+1,this.total++}if(yn.source=Ut,Er){if(wt===Oi.maxZoom||wt===Er)continue;var Ba=1<<Er-wt;if(rr!==Math.floor(Xr/Ba)||nr!==Math.floor(ri/Ba))continue}else if(wt===Oi.indexMaxZoom||yn.numPoints<=Oi.indexMaxPoints)continue;if(yn.source=null,Ut.length!==0){$i>1&&console.time("clipping");var ua=.5*Oi.buffer/Oi.extent,ma=.5-ua,Wa=.5+ua,Fa=1+ua,Wo,da,Wn,Ha,vo,jn;Wo=da=Wn=Ha=null,vo=dt(Ut,tn,rr-ua,rr+Wa,0,yn.minX,yn.maxX,Oi),jn=dt(Ut,tn,rr+ma,rr+Fa,0,yn.minX,yn.maxX,Oi),Ut=null,vo&&(Wo=dt(vo,tn,nr-ua,nr+Wa,1,yn.minY,yn.maxY,Oi),da=dt(vo,tn,nr+ma,nr+Fa,1,yn.minY,yn.maxY,Oi),vo=null),jn&&(Wn=dt(jn,tn,nr-ua,nr+Wa,1,yn.minY,yn.maxY,Oi),Ha=dt(jn,tn,nr+ma,nr+Fa,1,yn.minY,yn.maxY,Oi),jn=null),$i>1&&console.timeEnd("clipping"),Qr.push(Wo||[],wt+1,rr*2,nr*2),Qr.push(da||[],wt+1,rr*2,nr*2+1),Qr.push(Wn||[],wt+1,rr*2+1,nr*2),Qr.push(Ha||[],wt+1,rr*2+1,nr*2+1)}}},In.prototype.getTile=function(Ut,wt,rr){var nr=this.options,Er=nr.extent,Xr=nr.debug;if(Ut<0||Ut>24)return null;var ri=1<<Ut;wt=(wt%ri+ri)%ri;var Qr=wi(Ut,wt,rr);if(this.tiles[Qr])return ar(this.tiles[Qr],Er);Xr>1&&console.log("drilling down to z%d-%d-%d",Ut,wt,rr);for(var Oi=Ut,$i=wt,tn=rr,fn;!fn&&Oi>0;)Oi--,$i=Math.floor($i/2),tn=Math.floor(tn/2),fn=this.tiles[wi(Oi,$i,tn)];return!fn||!fn.source?null:(Xr>1&&console.log("found parent tile z%d-%d-%d",Oi,$i,tn),Xr>1&&console.time("drilling down"),this.splitTile(fn.source,Oi,$i,tn,Ut,wt,rr),Xr>1&&console.timeEnd("drilling down"),this.tiles[Qr]?ar(this.tiles[Qr],Er):null)};function wi(Ut,wt,rr){return((1<<Ut)*rr+wt)*32+Ut}function On(Ut,wt){for(var rr in wt)Ut[rr]=wt[rr];return Ut}function qn(Ut,wt){var rr=Ut.tileID.canonical;if(!this._geoJSONIndex)return wt(null,null);var nr=this._geoJSONIndex.getTile(rr.z,rr.x,rr.y);if(!nr)return wt(null,null);var Er=new C(nr.features),Xr=z(Er);(Xr.byteOffset!==0||Xr.byteLength!==Xr.buffer.byteLength)&&(Xr=new Uint8Array(Xr)),wt(null,{vectorTile:Er,rawData:Xr.buffer})}var Fn=function(Ut){function wt(rr,nr,Er,Xr){Ut.call(this,rr,nr,Er,qn),Xr&&(this.loadGeoJSON=Xr)}return Ut&&(wt.__proto__=Ut),wt.prototype=Object.create(Ut&&Ut.prototype),wt.prototype.constructor=wt,wt.prototype.loadData=function(nr,Er){this._pendingCallback&&this._pendingCallback(null,{abandoned:!0}),this._pendingCallback=Er,this._pendingLoadDataParams=nr,this._state&&this._state!=="Idle"?this._state="NeedsLoadData":(this._state="Coalescing",this._loadData())},wt.prototype._loadData=function(){var nr=this;if(!(!this._pendingCallback||!this._pendingLoadDataParams)){var Er=this._pendingCallback,Xr=this._pendingLoadDataParams;delete this._pendingCallback,delete this._pendingLoadDataParams;var ri=Xr&&Xr.request&&Xr.request.collectResourceTiming?new i.RequestPerformance(Xr.request):!1;this.loadGeoJSON(Xr,function(Qr,Oi){if(Qr||!Oi)return Er(Qr);if(typeof Oi!="object")return Er(new Error("Input data given to '"+Xr.source+"' is not a valid GeoJSON object."));p(Oi,!0);try{if(Xr.filter){var $i=i.createExpression(Xr.filter,{type:"boolean","property-type":"data-driven",overridable:!1,transition:!1});if($i.result==="error")throw new Error($i.value.map(function(Sn){return Sn.key+": "+Sn.message}).join(", "));var tn=Oi.features.filter(function(Sn){return $i.value.evaluate({zoom:0},Sn)});Oi={type:"FeatureCollection",features:tn}}nr._geoJSONIndex=Xr.cluster?new pt(ra(Xr)).load(Oi.features):ji(Oi,Xr.geojsonVtOptions)}catch(Sn){return Er(Sn)}nr.loaded={};var fn={};if(ri){var yn=ri.finish();yn&&(fn.resourceTiming={},fn.resourceTiming[Xr.source]=JSON.parse(JSON.stringify(yn)))}Er(null,fn)})}},wt.prototype.coalesce=function(){this._state==="Coalescing"?this._state="Idle":this._state==="NeedsLoadData"&&(this._state="Coalescing",this._loadData())},wt.prototype.reloadTile=function(nr,Er){var Xr=this.loaded,ri=nr.uid;return Xr&&Xr[ri]?Ut.prototype.reloadTile.call(this,nr,Er):this.loadTile(nr,Er)},wt.prototype.loadGeoJSON=function(nr,Er){if(nr.request)i.getJSON(nr.request,Er);else if(typeof nr.data=="string")try{return Er(null,JSON.parse(nr.data))}catch(Xr){return Er(new Error("Input data given to '"+nr.source+"' is not a valid GeoJSON object."))}else return Er(new Error("Input data given to '"+nr.source+"' is not a valid GeoJSON object."))},wt.prototype.removeSource=function(nr,Er){this._pendingCallback&&this._pendingCallback(null,{abandoned:!0}),Er()},wt.prototype.getClusterExpansionZoom=function(nr,Er){try{Er(null,this._geoJSONIndex.getClusterExpansionZoom(nr.clusterId))}catch(Xr){Er(Xr)}},wt.prototype.getClusterChildren=function(nr,Er){try{Er(null,this._geoJSONIndex.getChildren(nr.clusterId))}catch(Xr){Er(Xr)}},wt.prototype.getClusterLeaves=function(nr,Er){try{Er(null,this._geoJSONIndex.getLeaves(nr.clusterId,nr.limit,nr.offset))}catch(Xr){Er(Xr)}},wt}(v);function ra(Ut){var wt=Ut.superclusterOptions,rr=Ut.clusterProperties;if(!rr||!wt)return wt;for(var nr={},Er={},Xr={accumulated:null,zoom:0},ri={properties:null},Qr=Object.keys(rr),Oi=0,$i=Qr;Oi<$i.length;Oi+=1){var tn=$i[Oi],fn=rr[tn],yn=fn[0],Sn=fn[1],Ba=i.createExpression(Sn),ua=i.createExpression(typeof yn=="string"?[yn,["accumulated"],["get",tn]]:yn);nr[tn]=Ba.value,Er[tn]=ua.value}return wt.map=function(ma){ri.properties=ma;for(var Wa={},Fa=0,Wo=Qr;Fa<Wo.length;Fa+=1){var da=Wo[Fa];Wa[da]=nr[da].evaluate(Xr,ri)}return Wa},wt.reduce=function(ma,Wa){ri.properties=Wa;for(var Fa=0,Wo=Qr;Fa<Wo.length;Fa+=1){var da=Wo[Fa];Xr.accumulated=ma[da],ma[da]=Er[da].evaluate(Xr,ri)}},wt}var la=function(wt){var rr=this;this.self=wt,this.actor=new i.Actor(wt,this),this.layerIndexes={},this.availableImages={},this.workerSourceTypes={vector:v,geojson:Fn},this.workerSources={},this.demWorkerSources={},this.self.registerWorkerSource=function(nr,Er){if(rr.workerSourceTypes[nr])throw new Error('Worker source with name "'+nr+'" already registered.');rr.workerSourceTypes[nr]=Er},this.self.registerRTLTextPlugin=function(nr){if(i.plugin.isParsed())throw new Error("RTL text plugin already registered.");i.plugin.applyArabicShaping=nr.applyArabicShaping,i.plugin.processBidirectionalText=nr.processBidirectionalText,i.plugin.processStyledBidirectionalText=nr.processStyledBidirectionalText}};return la.prototype.setReferrer=function(wt,rr){this.referrer=rr},la.prototype.setImages=function(wt,rr,nr){this.availableImages[wt]=rr;for(var Er in this.workerSources[wt]){var Xr=this.workerSources[wt][Er];for(var ri in Xr)Xr[ri].availableImages=rr}nr()},la.prototype.setLayers=function(wt,rr,nr){this.getLayerIndex(wt).replace(rr),nr()},la.prototype.updateLayers=function(wt,rr,nr){this.getLayerIndex(wt).update(rr.layers,rr.removedIds),nr()},la.prototype.loadTile=function(wt,rr,nr){this.getWorkerSource(wt,rr.type,rr.source).loadTile(rr,nr)},la.prototype.loadDEMTile=function(wt,rr,nr){this.getDEMWorkerSource(wt,rr.source).loadTile(rr,nr)},la.prototype.reloadTile=function(wt,rr,nr){this.getWorkerSource(wt,rr.type,rr.source).reloadTile(rr,nr)},la.prototype.abortTile=function(wt,rr,nr){this.getWorkerSource(wt,rr.type,rr.source).abortTile(rr,nr)},la.prototype.removeTile=function(wt,rr,nr){this.getWorkerSource(wt,rr.type,rr.source).removeTile(rr,nr)},la.prototype.removeDEMTile=function(wt,rr){this.getDEMWorkerSource(wt,rr.source).removeTile(rr)},la.prototype.removeSource=function(wt,rr,nr){if(!(!this.workerSources[wt]||!this.workerSources[wt][rr.type]||!this.workerSources[wt][rr.type][rr.source])){var Er=this.workerSources[wt][rr.type][rr.source];delete this.workerSources[wt][rr.type][rr.source],Er.removeSource!==void 0?Er.removeSource(rr,nr):nr()}},la.prototype.loadWorkerSource=function(wt,rr,nr){try{this.self.importScripts(rr.url),nr()}catch(Er){nr(Er.toString())}},la.prototype.syncRTLPluginState=function(wt,rr,nr){try{i.plugin.setState(rr);var Er=i.plugin.getPluginURL();if(i.plugin.isLoaded()&&!i.plugin.isParsed()&&Er!=null){this.self.importScripts(Er);var Xr=i.plugin.isParsed(),ri=Xr?void 0:new Error("RTL Text Plugin failed to import scripts from "+Er);nr(ri,Xr)}}catch(Qr){nr(Qr.toString())}},la.prototype.getAvailableImages=function(wt){var rr=this.availableImages[wt];return rr||(rr=[]),rr},la.prototype.getLayerIndex=function(wt){var rr=this.layerIndexes[wt];return rr||(rr=this.layerIndexes[wt]=new l),rr},la.prototype.getWorkerSource=function(wt,rr,nr){var Er=this;if(this.workerSources[wt]||(this.workerSources[wt]={}),this.workerSources[wt][rr]||(this.workerSources[wt][rr]={}),!this.workerSources[wt][rr][nr]){var Xr={send:function(ri,Qr,Oi){Er.actor.send(ri,Qr,Oi,wt)}};this.workerSources[wt][rr][nr]=new this.workerSourceTypes[rr](Xr,this.getLayerIndex(wt),this.getAvailableImages(wt))}return this.workerSources[wt][rr][nr]},la.prototype.getDEMWorkerSource=function(wt,rr){return this.demWorkerSources[wt]||(this.demWorkerSources[wt]={}),this.demWorkerSources[wt][rr]||(this.demWorkerSources[wt][rr]=new b),this.demWorkerSources[wt][rr]},la.prototype.enforceCacheSizeLimit=function(wt,rr){i.enforceCacheSizeLimit(rr)},typeof WorkerGlobalScope!="undefined"&&typeof self!="undefined"&&self instanceof WorkerGlobalScope&&(self.worker=new la(self)),la}),n(["./shared"],function(i){"use strict";var a=i.createCommonjsModule(function(Y){Y.exports?Y.exports=D:window&&(window.mapboxgl=window.mapboxgl||{},window.mapboxgl.supported=D,window.mapboxgl.notSupportedReason=J);function D(Ct){return!J(Ct)}function J(Ct){if(!q())return"not a browser";if(!K())return"insufficent Array support";if(!de())return"insufficient Function support";if(!ne())return"insufficient Object support";if(!we())return"insufficient JSON support";if(!Ue())return"insufficient worker support";if(!ft())return"insufficient Uint8ClampedArray support";if(!Xt())return"insufficient ArrayBuffer support";if(!hr())return"insufficient Canvas/getImageData support";if(!Ve(Ct&&Ct.failIfMajorPerformanceCaveat))return"insufficient WebGL support"}function q(){return typeof window!="undefined"&&typeof document!="undefined"}function K(){return Array.prototype&&Array.prototype.every&&Array.prototype.filter&&Array.prototype.forEach&&Array.prototype.indexOf&&Array.prototype.lastIndexOf&&Array.prototype.map&&Array.prototype.some&&Array.prototype.reduce&&Array.prototype.reduceRight&&Array.isArray}function de(){return Function.prototype&&Function.prototype.bind}function ne(){return Object.keys&&Object.create&&Object.getPrototypeOf&&Object.getOwnPropertyNames&&Object.isSealed&&Object.isFrozen&&Object.isExtensible&&Object.getOwnPropertyDescriptor&&Object.defineProperty&&Object.defineProperties&&Object.seal&&Object.freeze&&Object.preventExtensions}function we(){return"JSON"in window&&"parse"in JSON&&"stringify"in JSON}function Ue(){if(!("Worker"in window&&"Blob"in window&&"URL"in window))return!1;var Ct=new Blob([""],{type:"text/javascript"}),Ot=URL.createObjectURL(Ct),Rt,Bt;try{Bt=new Worker(Ot),Rt=!0}catch(Dt){Rt=!1}return Bt&&Bt.terminate(),URL.revokeObjectURL(Ot),Rt}function ft(){return"Uint8ClampedArray"in window}function Xt(){return ArrayBuffer.isView}function hr(){var Ct=document.createElement("canvas");Ct.width=Ct.height=1;var Ot=Ct.getContext("2d");if(!Ot)return!1;var Rt=Ot.getImageData(0,0,1,1);return Rt&&Rt.width===Ct.width}var qt={};function Ve(Ct){return qt[Ct]===void 0&&(qt[Ct]=at(Ct)),qt[Ct]}D.webGLContextAttributes={antialias:!1,alpha:!0,stencil:!0,depth:!0};function Qe(Ct){var Ot=document.createElement("canvas"),Rt=Object.create(D.webGLContextAttributes);return Rt.failIfMajorPerformanceCaveat=Ct,Ot.probablySupportsContext?Ot.probablySupportsContext("webgl",Rt)||Ot.probablySupportsContext("experimental-webgl",Rt):Ot.supportsContext?Ot.supportsContext("webgl",Rt)||Ot.supportsContext("experimental-webgl",Rt):Ot.getContext("webgl",Rt)||Ot.getContext("experimental-webgl",Rt)}function at(Ct){var Ot=Qe(Ct);if(!Ot)return!1;var Rt=Ot.createShader(Ot.VERTEX_SHADER);return!Rt||Ot.isContextLost()?!1:(Ot.shaderSource(Rt,"void main() {}"),Ot.compileShader(Rt),Ot.getShaderParameter(Rt,Ot.COMPILE_STATUS)===!0)}}),o={};o.create=function(Y,D,J){var q=i.window.document.createElement(Y);return D!==void 0&&(q.className=D),J&&J.appendChild(q),q},o.createNS=function(Y,D){var J=i.window.document.createElementNS(Y,D);return J};var s=i.window.document&&i.window.document.documentElement.style;function l(Y){if(!s)return Y[0];for(var D=0;D<Y.length;D++)if(Y[D]in s)return Y[D];return Y[0]}var u=l(["userSelect","MozUserSelect","WebkitUserSelect","msUserSelect"]),c;o.disableDrag=function(){s&&u&&(c=s[u],s[u]="none")},o.enableDrag=function(){s&&u&&(s[u]=c)};var f=l(["transform","WebkitTransform"]);o.setTransform=function(Y,D){Y.style[f]=D};var h=!1;try{var d=Object.defineProperty({},"passive",{get:function(){h=!0}});i.window.addEventListener("test",d,d),i.window.removeEventListener("test",d,d)}catch(Y){h=!1}o.addEventListener=function(Y,D,J,q){q===void 0&&(q={}),"passive"in q&&h?Y.addEventListener(D,J,q):Y.addEventListener(D,J,q.capture)},o.removeEventListener=function(Y,D,J,q){q===void 0&&(q={}),"passive"in q&&h?Y.removeEventListener(D,J,q):Y.removeEventListener(D,J,q.capture)};var v=function(Y){Y.preventDefault(),Y.stopPropagation(),i.window.removeEventListener("click",v,!0)};o.suppressClick=function(){i.window.addEventListener("click",v,!0),i.window.setTimeout(function(){i.window.removeEventListener("click",v,!0)},0)},o.mousePos=function(Y,D){var J=Y.getBoundingClientRect();return new i.Point(D.clientX-J.left-Y.clientLeft,D.clientY-J.top-Y.clientTop)},o.touchPos=function(Y,D){for(var J=Y.getBoundingClientRect(),q=[],K=0;K<D.length;K++)q.push(new i.Point(D[K].clientX-J.left-Y.clientLeft,D[K].clientY-J.top-Y.clientTop));return q},o.mouseButton=function(Y){return typeof i.window.InstallTrigger!="undefined"&&Y.button===2&&Y.ctrlKey&&i.window.navigator.platform.toUpperCase().indexOf("MAC")>=0?0:Y.button},o.remove=function(Y){Y.parentNode&&Y.parentNode.removeChild(Y)};function _(Y,D,J){var q,K,de,ne=i.browser.devicePixelRatio>1?"@2x":"",we=i.getJSON(D.transformRequest(D.normalizeSpriteURL(Y,ne,".json"),i.ResourceType.SpriteJSON),function(Xt,hr){we=null,de||(de=Xt,q=hr,ft())}),Ue=i.getImage(D.transformRequest(D.normalizeSpriteURL(Y,ne,".png"),i.ResourceType.SpriteImage),function(Xt,hr){Ue=null,de||(de=Xt,K=hr,ft())});function ft(){if(de)J(de);else if(q&&K){var Xt=i.browser.getImageData(K),hr={};for(var qt in q){var Ve=q[qt],Qe=Ve.width,at=Ve.height,Ct=Ve.x,Ot=Ve.y,Rt=Ve.sdf,Bt=Ve.pixelRatio,Dt=Ve.stretchX,yt=Ve.stretchY,Pt=Ve.content,ht=new i.RGBAImage({width:Qe,height:at});i.RGBAImage.copy(Xt,ht,{x:Ct,y:Ot},{x:0,y:0},{width:Qe,height:at}),hr[qt]={data:ht,pixelRatio:Bt,sdf:Rt,stretchX:Dt,stretchY:yt,content:Pt}}J(null,hr)}}return{cancel:function(){we&&(we.cancel(),we=null),Ue&&(Ue.cancel(),Ue=null)}}}function b(Y){var D=Y.userImage;if(D&&D.render){var J=D.render();if(J)return Y.data.replace(new Uint8Array(D.data.buffer)),!0}return!1}var p=1,k=function(Y){function D(){Y.call(this),this.images={},this.updatedImages={},this.callbackDispatchedThisFrame={},this.loaded=!1,this.requestors=[],this.patterns={},this.atlasImage=new i.RGBAImage({width:1,height:1}),this.dirty=!0}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.isLoaded=function(){return this.loaded},D.prototype.setLoaded=function(q){if(this.loaded!==q&&(this.loaded=q,q)){for(var K=0,de=this.requestors;K<de.length;K+=1){var ne=de[K],we=ne.ids,Ue=ne.callback;this._notify(we,Ue)}this.requestors=[]}},D.prototype.getImage=function(q){return this.images[q]},D.prototype.addImage=function(q,K){this._validate(q,K)&&(this.images[q]=K)},D.prototype._validate=function(q,K){var de=!0;return this._validateStretch(K.stretchX,K.data&&K.data.width)||(this.fire(new i.ErrorEvent(new Error('Image "'+q+'" has invalid "stretchX" value'))),de=!1),this._validateStretch(K.stretchY,K.data&&K.data.height)||(this.fire(new i.ErrorEvent(new Error('Image "'+q+'" has invalid "stretchY" value'))),de=!1),this._validateContent(K.content,K)||(this.fire(new i.ErrorEvent(new Error('Image "'+q+'" has invalid "content" value'))),de=!1),de},D.prototype._validateStretch=function(q,K){if(!q)return!0;for(var de=0,ne=0,we=q;ne<we.length;ne+=1){var Ue=we[ne];if(Ue[0]<de||Ue[1]<Ue[0]||K<Ue[1])return!1;de=Ue[1]}return!0},D.prototype._validateContent=function(q,K){return q?!(q.length!==4||q[0]<0||K.data.width<q[0]||q[1]<0||K.data.height<q[1]||q[2]<0||K.data.width<q[2]||q[3]<0||K.data.height<q[3]||q[2]<q[0]||q[3]<q[1]):!0},D.prototype.updateImage=function(q,K){var de=this.images[q];K.version=de.version+1,this.images[q]=K,this.updatedImages[q]=!0},D.prototype.removeImage=function(q){var K=this.images[q];delete this.images[q],delete this.patterns[q],K.userImage&&K.userImage.onRemove&&K.userImage.onRemove()},D.prototype.listImages=function(){return Object.keys(this.images)},D.prototype.getImages=function(q,K){var de=!0;if(!this.isLoaded())for(var ne=0,we=q;ne<we.length;ne+=1){var Ue=we[ne];this.images[Ue]||(de=!1)}this.isLoaded()||de?this._notify(q,K):this.requestors.push({ids:q,callback:K})},D.prototype._notify=function(q,K){for(var de={},ne=0,we=q;ne<we.length;ne+=1){var Ue=we[ne];this.images[Ue]||this.fire(new i.Event("styleimagemissing",{id:Ue}));var ft=this.images[Ue];ft?de[Ue]={data:ft.data.clone(),pixelRatio:ft.pixelRatio,sdf:ft.sdf,version:ft.version,stretchX:ft.stretchX,stretchY:ft.stretchY,content:ft.content,hasRenderCallback:!!(ft.userImage&&ft.userImage.render)}:i.warnOnce('Image "'+Ue+'" could not be loaded. Please make sure you have added the image with map.addImage() or a "sprite" property in your style. You can provide missing images by listening for the "styleimagemissing" map event.')}K(null,de)},D.prototype.getPixelSize=function(){var q=this.atlasImage,K=q.width,de=q.height;return{width:K,height:de}},D.prototype.getPattern=function(q){var K=this.patterns[q],de=this.getImage(q);if(!de)return null;if(K&&K.position.version===de.version)return K.position;if(K)K.position.version=de.version;else{var ne=de.data.width+p*2,we=de.data.height+p*2,Ue={w:ne,h:we,x:0,y:0},ft=new i.ImagePosition(Ue,de);this.patterns[q]={bin:Ue,position:ft}}return this._updatePatternAtlas(),this.patterns[q].position},D.prototype.bind=function(q){var K=q.gl;this.atlasTexture?this.dirty&&(this.atlasTexture.update(this.atlasImage),this.dirty=!1):this.atlasTexture=new i.Texture(q,this.atlasImage,K.RGBA),this.atlasTexture.bind(K.LINEAR,K.CLAMP_TO_EDGE)},D.prototype._updatePatternAtlas=function(){var q=[];for(var K in this.patterns)q.push(this.patterns[K].bin);var de=i.potpack(q),ne=de.w,we=de.h,Ue=this.atlasImage;Ue.resize({width:ne||1,height:we||1});for(var ft in this.patterns){var Xt=this.patterns[ft],hr=Xt.bin,qt=hr.x+p,Ve=hr.y+p,Qe=this.images[ft].data,at=Qe.width,Ct=Qe.height;i.RGBAImage.copy(Qe,Ue,{x:0,y:0},{x:qt,y:Ve},{width:at,height:Ct}),i.RGBAImage.copy(Qe,Ue,{x:0,y:Ct-1},{x:qt,y:Ve-1},{width:at,height:1}),i.RGBAImage.copy(Qe,Ue,{x:0,y:0},{x:qt,y:Ve+Ct},{width:at,height:1}),i.RGBAImage.copy(Qe,Ue,{x:at-1,y:0},{x:qt-1,y:Ve},{width:1,height:Ct}),i.RGBAImage.copy(Qe,Ue,{x:0,y:0},{x:qt+at,y:Ve},{width:1,height:Ct})}this.dirty=!0},D.prototype.beginFrame=function(){this.callbackDispatchedThisFrame={}},D.prototype.dispatchRenderCallbacks=function(q){for(var K=0,de=q;K<de.length;K+=1){var ne=de[K];if(!this.callbackDispatchedThisFrame[ne]){this.callbackDispatchedThisFrame[ne]=!0;var we=this.images[ne],Ue=b(we);Ue&&this.updateImage(ne,we)}}},D}(i.Evented);function E(Y,D,J,q,K){var de=D*256,ne=de+255,we=q.transformRequest(q.normalizeGlyphsURL(J).replace("{fontstack}",Y).replace("{range}",de+"-"+ne),i.ResourceType.Glyphs);i.getArrayBuffer(we,function(Ue,ft){if(Ue)K(Ue);else if(ft){for(var Xt={},hr=0,qt=i.parseGlyphPBF(ft);hr<qt.length;hr+=1){var Ve=qt[hr];Xt[Ve.id]=Ve}K(null,Xt)}})}var S=C,L=C,x=1e20;function C(Y,D,J,q,K,de){this.fontSize=Y||24,this.buffer=D===void 0?3:D,this.cutoff=q||.25,this.fontFamily=K||"sans-serif",this.fontWeight=de||"normal",this.radius=J||8;var ne=this.size=this.fontSize+this.buffer*2;this.canvas=document.createElement("canvas"),this.canvas.width=this.canvas.height=ne,this.ctx=this.canvas.getContext("2d"),this.ctx.font=this.fontWeight+" "+this.fontSize+"px "+this.fontFamily,this.ctx.textBaseline="middle",this.ctx.fillStyle="black",this.gridOuter=new Float64Array(ne*ne),this.gridInner=new Float64Array(ne*ne),this.f=new Float64Array(ne),this.d=new Float64Array(ne),this.z=new Float64Array(ne+1),this.v=new Int16Array(ne),this.middle=Math.round(ne/2*(navigator.userAgent.indexOf("Gecko/")>=0?1.2:1))}C.prototype.draw=function(Y){this.ctx.clearRect(0,0,this.size,this.size),this.ctx.fillText(Y,this.buffer,this.middle);for(var D=this.ctx.getImageData(0,0,this.size,this.size),J=new Uint8ClampedArray(this.size*this.size),q=0;q<this.size*this.size;q++){var K=D.data[q*4+3]/255;this.gridOuter[q]=K===1?0:K===0?x:Math.pow(Math.max(0,.5-K),2),this.gridInner[q]=K===1?x:K===0?0:Math.pow(Math.max(0,K-.5),2)}for(M(this.gridOuter,this.size,this.size,this.f,this.d,this.v,this.z),M(this.gridInner,this.size,this.size,this.f,this.d,this.v,this.z),q=0;q<this.size*this.size;q++){var de=this.gridOuter[q]-this.gridInner[q];J[q]=Math.max(0,Math.min(255,Math.round(255-255*(de/this.radius+this.cutoff))))}return J};function M(Y,D,J,q,K,de,ne){for(var we=0;we<D;we++){for(var Ue=0;Ue<J;Ue++)q[Ue]=Y[Ue*D+we];for(g(q,K,de,ne,J),Ue=0;Ue<J;Ue++)Y[Ue*D+we]=K[Ue]}for(Ue=0;Ue<J;Ue++){for(we=0;we<D;we++)q[we]=Y[Ue*D+we];for(g(q,K,de,ne,D),we=0;we<D;we++)Y[Ue*D+we]=Math.sqrt(K[we])}}function g(Y,D,J,q,K){J[0]=0,q[0]=-x,q[1]=+x;for(var de=1,ne=0;de<K;de++){for(var we=(Y[de]+de*de-(Y[J[ne]]+J[ne]*J[ne]))/(2*de-2*J[ne]);we<=q[ne];)ne--,we=(Y[de]+de*de-(Y[J[ne]]+J[ne]*J[ne]))/(2*de-2*J[ne]);ne++,J[ne]=de,q[ne]=we,q[ne+1]=+x}for(de=0,ne=0;de<K;de++){for(;q[ne+1]<de;)ne++;D[de]=(de-J[ne])*(de-J[ne])+Y[J[ne]]}}S.default=L;var P=function(D,J){this.requestManager=D,this.localIdeographFontFamily=J,this.entries={}};P.prototype.setURL=function(D){this.url=D},P.prototype.getGlyphs=function(D,J){var q=this,K=[];for(var de in D)for(var ne=0,we=D[de];ne<we.length;ne+=1){var Ue=we[ne];K.push({stack:de,id:Ue})}i.asyncAll(K,function(ft,Xt){var hr=ft.stack,qt=ft.id,Ve=q.entries[hr];Ve||(Ve=q.entries[hr]={glyphs:{},requests:{},ranges:{}});var Qe=Ve.glyphs[qt];if(Qe!==void 0){Xt(null,{stack:hr,id:qt,glyph:Qe});return}if(Qe=q._tinySDF(Ve,hr,qt),Qe){Ve.glyphs[qt]=Qe,Xt(null,{stack:hr,id:qt,glyph:Qe});return}var at=Math.floor(qt/256);if(at*256>65535){Xt(new Error("glyphs > 65535 not supported"));return}if(Ve.ranges[at]){Xt(null,{stack:hr,id:qt,glyph:Qe});return}var Ct=Ve.requests[at];Ct||(Ct=Ve.requests[at]=[],P.loadGlyphRange(hr,at,q.url,q.requestManager,function(Ot,Rt){if(Rt){for(var Bt in Rt)q._doesCharSupportLocalGlyph(+Bt)||(Ve.glyphs[+Bt]=Rt[+Bt]);Ve.ranges[at]=!0}for(var Dt=0,yt=Ct;Dt<yt.length;Dt+=1){var Pt=yt[Dt];Pt(Ot,Rt)}delete Ve.requests[at]})),Ct.push(function(Ot,Rt){Ot?Xt(Ot):Rt&&Xt(null,{stack:hr,id:qt,glyph:Rt[qt]||null})})},function(ft,Xt){if(ft)J(ft);else if(Xt){for(var hr={},qt=0,Ve=Xt;qt<Ve.length;qt+=1){var Qe=Ve[qt],at=Qe.stack,Ct=Qe.id,Ot=Qe.glyph;(hr[at]||(hr[at]={}))[Ct]=Ot&&{id:Ot.id,bitmap:Ot.bitmap.clone(),metrics:Ot.metrics}}J(null,hr)}})},P.prototype._doesCharSupportLocalGlyph=function(D){return!!this.localIdeographFontFamily&&(i.isChar["CJK Unified Ideographs"](D)||i.isChar["Hangul Syllables"](D)||i.isChar.Hiragana(D)||i.isChar.Katakana(D))},P.prototype._tinySDF=function(D,J,q){var K=this.localIdeographFontFamily;if(K&&this._doesCharSupportLocalGlyph(q)){var de=D.tinySDF;if(!de){var ne="400";/bold/i.test(J)?ne="900":/medium/i.test(J)?ne="500":/light/i.test(J)&&(ne="200"),de=D.tinySDF=new P.TinySDF(24,3,8,.25,K,ne)}return{id:q,bitmap:new i.AlphaImage({width:30,height:30},de.draw(String.fromCharCode(q))),metrics:{width:24,height:24,left:0,top:-8,advance:24}}}},P.loadGlyphRange=E,P.TinySDF=S;var T=function(){this.specification=i.styleSpec.light.position};T.prototype.possiblyEvaluate=function(D,J){return i.sphericalToCartesian(D.expression.evaluate(J))},T.prototype.interpolate=function(D,J,q){return{x:i.number(D.x,J.x,q),y:i.number(D.y,J.y,q),z:i.number(D.z,J.z,q)}};var z=new i.Properties({anchor:new i.DataConstantProperty(i.styleSpec.light.anchor),position:new T,color:new i.DataConstantProperty(i.styleSpec.light.color),intensity:new i.DataConstantProperty(i.styleSpec.light.intensity)}),O="-transition",V=function(Y){function D(J){Y.call(this),this._transitionable=new i.Transitionable(z),this.setLight(J),this._transitioning=this._transitionable.untransitioned()}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getLight=function(){return this._transitionable.serialize()},D.prototype.setLight=function(q,K){if(K===void 0&&(K={}),!this._validate(i.validateLight,q,K))for(var de in q){var ne=q[de];i.endsWith(de,O)?this._transitionable.setTransition(de.slice(0,-O.length),ne):this._transitionable.setValue(de,ne)}},D.prototype.updateTransitions=function(q){this._transitioning=this._transitionable.transitioned(q,this._transitioning)},D.prototype.hasTransition=function(){return this._transitioning.hasTransition()},D.prototype.recalculate=function(q){this.properties=this._transitioning.possiblyEvaluate(q)},D.prototype._validate=function(q,K,de){return de&&de.validate===!1?!1:i.emitValidationErrors(this,q.call(i.validateStyle,i.extend({value:K,style:{glyphs:!0,sprite:!0},styleSpec:i.styleSpec})))},D}(i.Evented),G=function(D,J){this.width=D,this.height=J,this.nextRow=0,this.data=new Uint8Array(this.width*this.height),this.dashEntry={}};G.prototype.getDash=function(D,J){var q=D.join(",")+String(J);return this.dashEntry[q]||(this.dashEntry[q]=this.addDash(D,J)),this.dashEntry[q]},G.prototype.getDashRanges=function(D,J,q){var K=D.length%2===1,de=[],ne=K?-D[D.length-1]*q:0,we=D[0]*q,Ue=!0;de.push({left:ne,right:we,isDash:Ue,zeroLength:D[0]===0});for(var ft=D[0],Xt=1;Xt<D.length;Xt++){Ue=!Ue;var hr=D[Xt];ne=ft*q,ft+=hr,we=ft*q,de.push({left:ne,right:we,isDash:Ue,zeroLength:hr===0})}return de},G.prototype.addRoundDash=function(D,J,q){for(var K=J/2,de=-q;de<=q;de++)for(var ne=this.nextRow+q+de,we=this.width*ne,Ue=0,ft=D[Ue],Xt=0;Xt<this.width;Xt++){Xt/ft.right>1&&(ft=D[++Ue]);var hr=Math.abs(Xt-ft.left),qt=Math.abs(Xt-ft.right),Ve=Math.min(hr,qt),Qe=void 0,at=de/q*(K+1);if(ft.isDash){var Ct=K-Math.abs(at);Qe=Math.sqrt(Ve*Ve+Ct*Ct)}else Qe=K-Math.sqrt(Ve*Ve+at*at);this.data[we+Xt]=Math.max(0,Math.min(255,Qe+128))}},G.prototype.addRegularDash=function(D){for(var J=D.length-1;J>=0;--J){var q=D[J],K=D[J+1];q.zeroLength?D.splice(J,1):K&&K.isDash===q.isDash&&(K.left=q.left,D.splice(J,1))}var de=D[0],ne=D[D.length-1];de.isDash===ne.isDash&&(de.left=ne.left-this.width,ne.right=de.right+this.width);for(var we=this.width*this.nextRow,Ue=0,ft=D[Ue],Xt=0;Xt<this.width;Xt++){Xt/ft.right>1&&(ft=D[++Ue]);var hr=Math.abs(Xt-ft.left),qt=Math.abs(Xt-ft.right),Ve=Math.min(hr,qt),Qe=ft.isDash?Ve:-Ve;this.data[we+Xt]=Math.max(0,Math.min(255,Qe+128))}},G.prototype.addDash=function(D,J){var q=J?7:0,K=2*q+1;if(this.nextRow+K>this.height)return i.warnOnce("LineAtlas out of space"),null;for(var de=0,ne=0;ne<D.length;ne++)de+=D[ne];if(de!==0){var we=this.width/de,Ue=this.getDashRanges(D,this.width,we);J?this.addRoundDash(Ue,we,q):this.addRegularDash(Ue)}var ft={y:(this.nextRow+q+.5)/this.height,height:2*q/this.height,width:de};return this.nextRow+=K,this.dirty=!0,ft},G.prototype.bind=function(D){var J=D.gl;this.texture?(J.bindTexture(J.TEXTURE_2D,this.texture),this.dirty&&(this.dirty=!1,J.texSubImage2D(J.TEXTURE_2D,0,0,0,this.width,this.height,J.ALPHA,J.UNSIGNED_BYTE,this.data))):(this.texture=J.createTexture(),J.bindTexture(J.TEXTURE_2D,this.texture),J.texParameteri(J.TEXTURE_2D,J.TEXTURE_WRAP_S,J.REPEAT),J.texParameteri(J.TEXTURE_2D,J.TEXTURE_WRAP_T,J.REPEAT),J.texParameteri(J.TEXTURE_2D,J.TEXTURE_MIN_FILTER,J.LINEAR),J.texParameteri(J.TEXTURE_2D,J.TEXTURE_MAG_FILTER,J.LINEAR),J.texImage2D(J.TEXTURE_2D,0,J.ALPHA,this.width,this.height,0,J.ALPHA,J.UNSIGNED_BYTE,this.data))};var Z=function Y(D,J){this.workerPool=D,this.actors=[],this.currentActor=0,this.id=i.uniqueId();for(var q=this.workerPool.acquire(this.id),K=0;K<q.length;K++){var de=q[K],ne=new Y.Actor(de,J,this.id);ne.name="Worker "+K,this.actors.push(ne)}};Z.prototype.broadcast=function(D,J,q){q=q||function(){},i.asyncAll(this.actors,function(K,de){K.send(D,J,de)},q)},Z.prototype.getActor=function(){return this.currentActor=(this.currentActor+1)%this.actors.length,this.actors[this.currentActor]},Z.prototype.remove=function(){this.actors.forEach(function(D){D.remove()}),this.actors=[],this.workerPool.release(this.id)},Z.Actor=i.Actor;function j(Y,D,J){var q=function(K,de){if(K)return J(K);if(de){var ne=i.pick(i.extend(de,Y),["tiles","minzoom","maxzoom","attribution","mapbox_logo","bounds","scheme","tileSize","encoding"]);de.vector_layers&&(ne.vectorLayers=de.vector_layers,ne.vectorLayerIds=ne.vectorLayers.map(function(we){return we.id})),ne.tiles=D.canonicalizeTileset(ne,Y.url),J(null,ne)}};return Y.url?i.getJSON(D.transformRequest(D.normalizeSourceURL(Y.url),i.ResourceType.Source),q):i.browser.frame(function(){return q(null,Y)})}var N=function(D,J,q){this.bounds=i.LngLatBounds.convert(this.validateBounds(D)),this.minzoom=J||0,this.maxzoom=q||24};N.prototype.validateBounds=function(D){return!Array.isArray(D)||D.length!==4?[-180,-90,180,90]:[Math.max(-180,D[0]),Math.max(-90,D[1]),Math.min(180,D[2]),Math.min(90,D[3])]},N.prototype.contains=function(D){var J=Math.pow(2,D.z),q={minX:Math.floor(i.mercatorXfromLng(this.bounds.getWest())*J),minY:Math.floor(i.mercatorYfromLat(this.bounds.getNorth())*J),maxX:Math.ceil(i.mercatorXfromLng(this.bounds.getEast())*J),maxY:Math.ceil(i.mercatorYfromLat(this.bounds.getSouth())*J)},K=D.x>=q.minX&&D.x<q.maxX&&D.y>=q.minY&&D.y<q.maxY;return K};var H=function(Y){function D(J,q,K,de){if(Y.call(this),this.id=J,this.dispatcher=K,this.type="vector",this.minzoom=0,this.maxzoom=22,this.scheme="xyz",this.tileSize=512,this.reparseOverscaled=!0,this.isTileClipped=!0,this._loaded=!1,i.extend(this,i.pick(q,["url","scheme","tileSize","promoteId"])),this._options=i.extend({type:"vector"},q),this._collectResourceTiming=q.collectResourceTiming,this.tileSize!==512)throw new Error("vector tile sources must have a tileSize of 512");this.setEventedParent(de)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(){var q=this;this._loaded=!1,this.fire(new i.Event("dataloading",{dataType:"source"})),this._tileJSONRequest=j(this._options,this.map._requestManager,function(K,de){q._tileJSONRequest=null,q._loaded=!0,K?q.fire(new i.ErrorEvent(K)):de&&(i.extend(q,de),de.bounds&&(q.tileBounds=new N(de.bounds,q.minzoom,q.maxzoom)),i.postTurnstileEvent(de.tiles,q.map._requestManager._customAccessToken),i.postMapLoadEvent(de.tiles,q.map._getMapId(),q.map._requestManager._skuToken,q.map._requestManager._customAccessToken),q.fire(new i.Event("data",{dataType:"source",sourceDataType:"metadata"})),q.fire(new i.Event("data",{dataType:"source",sourceDataType:"content"})))})},D.prototype.loaded=function(){return this._loaded},D.prototype.hasTile=function(q){return!this.tileBounds||this.tileBounds.contains(q.canonical)},D.prototype.onAdd=function(q){this.map=q,this.load()},D.prototype.setSourceProperty=function(q){this._tileJSONRequest&&this._tileJSONRequest.cancel(),q();var K=this.map.style.sourceCaches[this.id];K.clearTiles(),this.load()},D.prototype.setTiles=function(q){var K=this;return this.setSourceProperty(function(){K._options.tiles=q}),this},D.prototype.setUrl=function(q){var K=this;return this.setSourceProperty(function(){K.url=q,K._options.url=q}),this},D.prototype.onRemove=function(){this._tileJSONRequest&&(this._tileJSONRequest.cancel(),this._tileJSONRequest=null)},D.prototype.serialize=function(){return i.extend({},this._options)},D.prototype.loadTile=function(q,K){var de=this.map._requestManager.normalizeTileURL(q.tileID.canonical.url(this.tiles,this.scheme)),ne={request:this.map._requestManager.transformRequest(de,i.ResourceType.Tile),uid:q.uid,tileID:q.tileID,zoom:q.tileID.overscaledZ,tileSize:this.tileSize*q.tileID.overscaleFactor(),type:this.type,source:this.id,pixelRatio:i.browser.devicePixelRatio,showCollisionBoxes:this.map.showCollisionBoxes,promoteId:this.promoteId};ne.request.collectResourceTiming=this._collectResourceTiming,!q.actor||q.state==="expired"?(q.actor=this.dispatcher.getActor(),q.request=q.actor.send("loadTile",ne,we.bind(this))):q.state==="loading"?q.reloadCallback=K:q.request=q.actor.send("reloadTile",ne,we.bind(this));function we(Ue,ft){if(delete q.request,q.aborted)return K(null);if(Ue&&Ue.status!==404)return K(Ue);ft&&ft.resourceTiming&&(q.resourceTiming=ft.resourceTiming),this.map._refreshExpiredTiles&&ft&&q.setExpiryData(ft),q.loadVectorData(ft,this.map.painter),i.cacheEntryPossiblyAdded(this.dispatcher),K(null),q.reloadCallback&&(this.loadTile(q,q.reloadCallback),q.reloadCallback=null)}},D.prototype.abortTile=function(q){q.request&&(q.request.cancel(),delete q.request),q.actor&&q.actor.send("abortTile",{uid:q.uid,type:this.type,source:this.id},void 0)},D.prototype.unloadTile=function(q){q.unloadVectorData(),q.actor&&q.actor.send("removeTile",{uid:q.uid,type:this.type,source:this.id},void 0)},D.prototype.hasTransition=function(){return!1},D}(i.Evented),te=function(Y){function D(J,q,K,de){Y.call(this),this.id=J,this.dispatcher=K,this.setEventedParent(de),this.type="raster",this.minzoom=0,this.maxzoom=22,this.roundZoom=!0,this.scheme="xyz",this.tileSize=512,this._loaded=!1,this._options=i.extend({type:"raster"},q),i.extend(this,i.pick(q,["url","scheme","tileSize"]))}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(){var q=this;this._loaded=!1,this.fire(new i.Event("dataloading",{dataType:"source"})),this._tileJSONRequest=j(this._options,this.map._requestManager,function(K,de){q._tileJSONRequest=null,q._loaded=!0,K?q.fire(new i.ErrorEvent(K)):de&&(i.extend(q,de),de.bounds&&(q.tileBounds=new N(de.bounds,q.minzoom,q.maxzoom)),i.postTurnstileEvent(de.tiles),i.postMapLoadEvent(de.tiles,q.map._getMapId(),q.map._requestManager._skuToken),q.fire(new i.Event("data",{dataType:"source",sourceDataType:"metadata"})),q.fire(new i.Event("data",{dataType:"source",sourceDataType:"content"})))})},D.prototype.loaded=function(){return this._loaded},D.prototype.onAdd=function(q){this.map=q,this.load()},D.prototype.onRemove=function(){this._tileJSONRequest&&(this._tileJSONRequest.cancel(),this._tileJSONRequest=null)},D.prototype.serialize=function(){return i.extend({},this._options)},D.prototype.hasTile=function(q){return!this.tileBounds||this.tileBounds.contains(q.canonical)},D.prototype.loadTile=function(q,K){var de=this,ne=this.map._requestManager.normalizeTileURL(q.tileID.canonical.url(this.tiles,this.scheme),this.tileSize);q.request=i.getImage(this.map._requestManager.transformRequest(ne,i.ResourceType.Tile),function(we,Ue){if(delete q.request,q.aborted)q.state="unloaded",K(null);else if(we)q.state="errored",K(we);else if(Ue){de.map._refreshExpiredTiles&&q.setExpiryData(Ue),delete Ue.cacheControl,delete Ue.expires;var ft=de.map.painter.context,Xt=ft.gl;q.texture=de.map.painter.getTileTexture(Ue.width),q.texture?q.texture.update(Ue,{useMipmap:!0}):(q.texture=new i.Texture(ft,Ue,Xt.RGBA,{useMipmap:!0}),q.texture.bind(Xt.LINEAR,Xt.CLAMP_TO_EDGE,Xt.LINEAR_MIPMAP_NEAREST),ft.extTextureFilterAnisotropic&&Xt.texParameterf(Xt.TEXTURE_2D,ft.extTextureFilterAnisotropic.TEXTURE_MAX_ANISOTROPY_EXT,ft.extTextureFilterAnisotropicMax)),q.state="loaded",i.cacheEntryPossiblyAdded(de.dispatcher),K(null)}})},D.prototype.abortTile=function(q,K){q.request&&(q.request.cancel(),delete q.request),K()},D.prototype.unloadTile=function(q,K){q.texture&&this.map.painter.saveTileTexture(q.texture),K()},D.prototype.hasTransition=function(){return!1},D}(i.Evented),oe=function(Y){function D(J,q,K,de){Y.call(this,J,q,K,de),this.type="raster-dem",this.maxzoom=22,this._options=i.extend({type:"raster-dem"},q),this.encoding=q.encoding||"mapbox"}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.serialize=function(){return{type:"raster-dem",url:this.url,tileSize:this.tileSize,tiles:this.tiles,bounds:this.bounds,encoding:this.encoding}},D.prototype.loadTile=function(q,K){var de=this.map._requestManager.normalizeTileURL(q.tileID.canonical.url(this.tiles,this.scheme),this.tileSize);q.request=i.getImage(this.map._requestManager.transformRequest(de,i.ResourceType.Tile),ne.bind(this)),q.neighboringTiles=this._getNeighboringTiles(q.tileID);function ne(Ue,ft){if(delete q.request,q.aborted)q.state="unloaded",K(null);else if(Ue)q.state="errored",K(Ue);else if(ft){this.map._refreshExpiredTiles&&q.setExpiryData(ft),delete ft.cacheControl,delete ft.expires;var Xt=i.window.ImageBitmap&&ft instanceof i.window.ImageBitmap&&i.offscreenCanvasSupported(),hr=Xt?ft:i.browser.getImageData(ft,1),qt={uid:q.uid,coord:q.tileID,source:this.id,rawImageData:hr,encoding:this.encoding};(!q.actor||q.state==="expired")&&(q.actor=this.dispatcher.getActor(),q.actor.send("loadDEMTile",qt,we.bind(this)))}}function we(Ue,ft){Ue&&(q.state="errored",K(Ue)),ft&&(q.dem=ft,q.needsHillshadePrepare=!0,q.state="loaded",K(null))}},D.prototype._getNeighboringTiles=function(q){var K=q.canonical,de=Math.pow(2,K.z),ne=(K.x-1+de)%de,we=K.x===0?q.wrap-1:q.wrap,Ue=(K.x+1+de)%de,ft=K.x+1===de?q.wrap+1:q.wrap,Xt={};return Xt[new i.OverscaledTileID(q.overscaledZ,we,K.z,ne,K.y).key]={backfilled:!1},Xt[new i.OverscaledTileID(q.overscaledZ,ft,K.z,Ue,K.y).key]={backfilled:!1},K.y>0&&(Xt[new i.OverscaledTileID(q.overscaledZ,we,K.z,ne,K.y-1).key]={backfilled:!1},Xt[new i.OverscaledTileID(q.overscaledZ,q.wrap,K.z,K.x,K.y-1).key]={backfilled:!1},Xt[new i.OverscaledTileID(q.overscaledZ,ft,K.z,Ue,K.y-1).key]={backfilled:!1}),K.y+1<de&&(Xt[new i.OverscaledTileID(q.overscaledZ,we,K.z,ne,K.y+1).key]={backfilled:!1},Xt[new i.OverscaledTileID(q.overscaledZ,q.wrap,K.z,K.x,K.y+1).key]={backfilled:!1},Xt[new i.OverscaledTileID(q.overscaledZ,ft,K.z,Ue,K.y+1).key]={backfilled:!1}),Xt},D.prototype.unloadTile=function(q){q.demTexture&&this.map.painter.saveTileTexture(q.demTexture),q.fbo&&(q.fbo.destroy(),delete q.fbo),q.dem&&delete q.dem,delete q.neighboringTiles,q.state="unloaded",q.actor&&q.actor.send("removeDEMTile",{uid:q.uid,source:this.id})},D}(te),_e=function(Y){function D(J,q,K,de){Y.call(this),this.id=J,this.type="geojson",this.minzoom=0,this.maxzoom=18,this.tileSize=512,this.isTileClipped=!0,this.reparseOverscaled=!0,this._removed=!1,this._loaded=!1,this.actor=K.getActor(),this.setEventedParent(de),this._data=q.data,this._options=i.extend({},q),this._collectResourceTiming=q.collectResourceTiming,this._resourceTiming=[],q.maxzoom!==void 0&&(this.maxzoom=q.maxzoom),q.type&&(this.type=q.type),q.attribution&&(this.attribution=q.attribution),this.promoteId=q.promoteId;var ne=i.EXTENT/this.tileSize;this.workerOptions=i.extend({source:this.id,cluster:q.cluster||!1,geojsonVtOptions:{buffer:(q.buffer!==void 0?q.buffer:128)*ne,tolerance:(q.tolerance!==void 0?q.tolerance:.375)*ne,extent:i.EXTENT,maxZoom:this.maxzoom,lineMetrics:q.lineMetrics||!1,generateId:q.generateId||!1},superclusterOptions:{maxZoom:q.clusterMaxZoom!==void 0?Math.min(q.clusterMaxZoom,this.maxzoom-1):this.maxzoom-1,minPoints:Math.max(2,q.clusterMinPoints||2),extent:i.EXTENT,radius:(q.clusterRadius||50)*ne,log:!1,generateId:q.generateId||!1},clusterProperties:q.clusterProperties,filter:q.filter},q.workerOptions)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(){var q=this;this.fire(new i.Event("dataloading",{dataType:"source"})),this._updateWorkerData(function(K){if(K){q.fire(new i.ErrorEvent(K));return}var de={dataType:"source",sourceDataType:"metadata"};q._collectResourceTiming&&q._resourceTiming&&q._resourceTiming.length>0&&(de.resourceTiming=q._resourceTiming,q._resourceTiming=[]),q.fire(new i.Event("data",de))})},D.prototype.onAdd=function(q){this.map=q,this.load()},D.prototype.setData=function(q){var K=this;return this._data=q,this.fire(new i.Event("dataloading",{dataType:"source"})),this._updateWorkerData(function(de){if(de){K.fire(new i.ErrorEvent(de));return}var ne={dataType:"source",sourceDataType:"content"};K._collectResourceTiming&&K._resourceTiming&&K._resourceTiming.length>0&&(ne.resourceTiming=K._resourceTiming,K._resourceTiming=[]),K.fire(new i.Event("data",ne))}),this},D.prototype.getClusterExpansionZoom=function(q,K){return this.actor.send("geojson.getClusterExpansionZoom",{clusterId:q,source:this.id},K),this},D.prototype.getClusterChildren=function(q,K){return this.actor.send("geojson.getClusterChildren",{clusterId:q,source:this.id},K),this},D.prototype.getClusterLeaves=function(q,K,de,ne){return this.actor.send("geojson.getClusterLeaves",{source:this.id,clusterId:q,limit:K,offset:de},ne),this},D.prototype._updateWorkerData=function(q){var K=this;this._loaded=!1;var de=i.extend({},this.workerOptions),ne=this._data;typeof ne=="string"?(de.request=this.map._requestManager.transformRequest(i.browser.resolveURL(ne),i.ResourceType.Source),de.request.collectResourceTiming=this._collectResourceTiming):de.data=JSON.stringify(ne),this.actor.send(this.type+".loadData",de,function(we,Ue){K._removed||Ue&&Ue.abandoned||(K._loaded=!0,Ue&&Ue.resourceTiming&&Ue.resourceTiming[K.id]&&(K._resourceTiming=Ue.resourceTiming[K.id].slice(0)),K.actor.send(K.type+".coalesce",{source:de.source},null),q(we))})},D.prototype.loaded=function(){return this._loaded},D.prototype.loadTile=function(q,K){var de=this,ne=q.actor?"reloadTile":"loadTile";q.actor=this.actor;var we={type:this.type,uid:q.uid,tileID:q.tileID,zoom:q.tileID.overscaledZ,maxZoom:this.maxzoom,tileSize:this.tileSize,source:this.id,pixelRatio:i.browser.devicePixelRatio,showCollisionBoxes:this.map.showCollisionBoxes,promoteId:this.promoteId};q.request=this.actor.send(ne,we,function(Ue,ft){return delete q.request,q.unloadVectorData(),q.aborted?K(null):Ue?K(Ue):(q.loadVectorData(ft,de.map.painter,ne==="reloadTile"),K(null))})},D.prototype.abortTile=function(q){q.request&&(q.request.cancel(),delete q.request),q.aborted=!0},D.prototype.unloadTile=function(q){q.unloadVectorData(),this.actor.send("removeTile",{uid:q.uid,type:this.type,source:this.id})},D.prototype.onRemove=function(){this._removed=!0,this.actor.send("removeSource",{type:this.type,source:this.id})},D.prototype.serialize=function(){return i.extend({},this._options,{type:this.type,data:this._data})},D.prototype.hasTransition=function(){return!1},D}(i.Evented),Ee=i.createLayout([{name:"a_pos",type:"Int16",components:2},{name:"a_texture_pos",type:"Int16",components:2}]),Ce=function(Y){function D(J,q,K,de){Y.call(this),this.id=J,this.dispatcher=K,this.coordinates=q.coordinates,this.type="image",this.minzoom=0,this.maxzoom=22,this.tileSize=512,this.tiles={},this._loaded=!1,this.setEventedParent(de),this.options=q}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(q,K){var de=this;this._loaded=!1,this.fire(new i.Event("dataloading",{dataType:"source"})),this.url=this.options.url,i.getImage(this.map._requestManager.transformRequest(this.url,i.ResourceType.Image),function(ne,we){de._loaded=!0,ne?de.fire(new i.ErrorEvent(ne)):we&&(de.image=we,q&&(de.coordinates=q),K&&K(),de._finishLoading())})},D.prototype.loaded=function(){return this._loaded},D.prototype.updateImage=function(q){var K=this;return!this.image||!q.url?this:(this.options.url=q.url,this.load(q.coordinates,function(){K.texture=null}),this)},D.prototype._finishLoading=function(){this.map&&(this.setCoordinates(this.coordinates),this.fire(new i.Event("data",{dataType:"source",sourceDataType:"metadata"})))},D.prototype.onAdd=function(q){this.map=q,this.load()},D.prototype.setCoordinates=function(q){var K=this;this.coordinates=q;var de=q.map(i.MercatorCoordinate.fromLngLat);this.tileID=me(de),this.minzoom=this.maxzoom=this.tileID.z;var ne=de.map(function(we){return K.tileID.getTilePoint(we)._round()});return this._boundsArray=new i.StructArrayLayout4i8,this._boundsArray.emplaceBack(ne[0].x,ne[0].y,0,0),this._boundsArray.emplaceBack(ne[1].x,ne[1].y,i.EXTENT,0),this._boundsArray.emplaceBack(ne[3].x,ne[3].y,0,i.EXTENT),this._boundsArray.emplaceBack(ne[2].x,ne[2].y,i.EXTENT,i.EXTENT),this.boundsBuffer&&(this.boundsBuffer.destroy(),delete this.boundsBuffer),this.fire(new i.Event("data",{dataType:"source",sourceDataType:"content"})),this},D.prototype.prepare=function(){if(!(Object.keys(this.tiles).length===0||!this.image)){var q=this.map.painter.context,K=q.gl;this.boundsBuffer||(this.boundsBuffer=q.createVertexBuffer(this._boundsArray,Ee.members)),this.boundsSegments||(this.boundsSegments=i.SegmentVector.simpleSegment(0,0,4,2)),this.texture||(this.texture=new i.Texture(q,this.image,K.RGBA),this.texture.bind(K.LINEAR,K.CLAMP_TO_EDGE));for(var de in this.tiles){var ne=this.tiles[de];ne.state!=="loaded"&&(ne.state="loaded",ne.texture=this.texture)}}},D.prototype.loadTile=function(q,K){this.tileID&&this.tileID.equals(q.tileID.canonical)?(this.tiles[String(q.tileID.wrap)]=q,q.buckets={},K(null)):(q.state="errored",K(null))},D.prototype.serialize=function(){return{type:"image",url:this.options.url,coordinates:this.coordinates}},D.prototype.hasTransition=function(){return!1},D}(i.Evented);function me(Y){for(var D=1/0,J=1/0,q=-1/0,K=-1/0,de=0,ne=Y;de<ne.length;de+=1){var we=ne[de];D=Math.min(D,we.x),J=Math.min(J,we.y),q=Math.max(q,we.x),K=Math.max(K,we.y)}var Ue=q-D,ft=K-J,Xt=Math.max(Ue,ft),hr=Math.max(0,Math.floor(-Math.log(Xt)/Math.LN2)),qt=Math.pow(2,hr);return new i.CanonicalTileID(hr,Math.floor((D+q)/2*qt),Math.floor((J+K)/2*qt))}var ie=function(Y){function D(J,q,K,de){Y.call(this,J,q,K,de),this.roundZoom=!0,this.type="video",this.options=q}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(){var q=this;this._loaded=!1;var K=this.options;this.urls=[];for(var de=0,ne=K.urls;de<ne.length;de+=1){var we=ne[de];this.urls.push(this.map._requestManager.transformRequest(we,i.ResourceType.Source).url)}i.getVideo(this.urls,function(Ue,ft){q._loaded=!0,Ue?q.fire(new i.ErrorEvent(Ue)):ft&&(q.video=ft,q.video.loop=!0,q.video.addEventListener("playing",function(){q.map.triggerRepaint()}),q.map&&q.video.play(),q._finishLoading())})},D.prototype.pause=function(){this.video&&this.video.pause()},D.prototype.play=function(){this.video&&this.video.play()},D.prototype.seek=function(q){if(this.video){var K=this.video.seekable;q<K.start(0)||q>K.end(0)?this.fire(new i.ErrorEvent(new i.ValidationError("sources."+this.id,null,"Playback for this video can be set only between the "+K.start(0)+" and "+K.end(0)+"-second mark."))):this.video.currentTime=q}},D.prototype.getVideo=function(){return this.video},D.prototype.onAdd=function(q){this.map||(this.map=q,this.load(),this.video&&(this.video.play(),this.setCoordinates(this.coordinates)))},D.prototype.prepare=function(){if(!(Object.keys(this.tiles).length===0||this.video.readyState<2)){var q=this.map.painter.context,K=q.gl;this.boundsBuffer||(this.boundsBuffer=q.createVertexBuffer(this._boundsArray,Ee.members)),this.boundsSegments||(this.boundsSegments=i.SegmentVector.simpleSegment(0,0,4,2)),this.texture?this.video.paused||(this.texture.bind(K.LINEAR,K.CLAMP_TO_EDGE),K.texSubImage2D(K.TEXTURE_2D,0,0,0,K.RGBA,K.UNSIGNED_BYTE,this.video)):(this.texture=new i.Texture(q,this.video,K.RGBA),this.texture.bind(K.LINEAR,K.CLAMP_TO_EDGE));for(var de in this.tiles){var ne=this.tiles[de];ne.state!=="loaded"&&(ne.state="loaded",ne.texture=this.texture)}}},D.prototype.serialize=function(){return{type:"video",urls:this.urls,coordinates:this.coordinates}},D.prototype.hasTransition=function(){return this.video&&!this.video.paused},D}(Ce),Se=function(Y){function D(J,q,K,de){Y.call(this,J,q,K,de),q.coordinates?(!Array.isArray(q.coordinates)||q.coordinates.length!==4||q.coordinates.some(function(ne){return!Array.isArray(ne)||ne.length!==2||ne.some(function(we){return typeof we!="number"})}))&&this.fire(new i.ErrorEvent(new i.ValidationError("sources."+J,null,'"coordinates" property must be an array of 4 longitude/latitude array pairs'))):this.fire(new i.ErrorEvent(new i.ValidationError("sources."+J,null,'missing required property "coordinates"'))),q.animate&&typeof q.animate!="boolean"&&this.fire(new i.ErrorEvent(new i.ValidationError("sources."+J,null,'optional "animate" property must be a boolean value'))),q.canvas?typeof q.canvas!="string"&&!(q.canvas instanceof i.window.HTMLCanvasElement)&&this.fire(new i.ErrorEvent(new i.ValidationError("sources."+J,null,'"canvas" must be either a string representing the ID of the canvas element from which to read, or an HTMLCanvasElement instance'))):this.fire(new i.ErrorEvent(new i.ValidationError("sources."+J,null,'missing required property "canvas"'))),this.options=q,this.animate=q.animate!==void 0?q.animate:!0}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.load=function(){if(this._loaded=!0,this.canvas||(this.canvas=this.options.canvas instanceof i.window.HTMLCanvasElement?this.options.canvas:i.window.document.getElementById(this.options.canvas)),this.width=this.canvas.width,this.height=this.canvas.height,this._hasInvalidDimensions()){this.fire(new i.ErrorEvent(new Error("Canvas dimensions cannot be less than or equal to zero.")));return}this.play=function(){this._playing=!0,this.map.triggerRepaint()},this.pause=function(){this._playing&&(this.prepare(),this._playing=!1)},this._finishLoading()},D.prototype.getCanvas=function(){return this.canvas},D.prototype.onAdd=function(q){this.map=q,this.load(),this.canvas&&this.animate&&this.play()},D.prototype.onRemove=function(){this.pause()},D.prototype.prepare=function(){var q=!1;if(this.canvas.width!==this.width&&(this.width=this.canvas.width,q=!0),this.canvas.height!==this.height&&(this.height=this.canvas.height,q=!0),!this._hasInvalidDimensions()&&Object.keys(this.tiles).length!==0){var K=this.map.painter.context,de=K.gl;this.boundsBuffer||(this.boundsBuffer=K.createVertexBuffer(this._boundsArray,Ee.members)),this.boundsSegments||(this.boundsSegments=i.SegmentVector.simpleSegment(0,0,4,2)),this.texture?(q||this._playing)&&this.texture.update(this.canvas,{premultiply:!0}):this.texture=new i.Texture(K,this.canvas,de.RGBA,{premultiply:!0});for(var ne in this.tiles){var we=this.tiles[ne];we.state!=="loaded"&&(we.state="loaded",we.texture=this.texture)}}},D.prototype.serialize=function(){return{type:"canvas",coordinates:this.coordinates}},D.prototype.hasTransition=function(){return this._playing},D.prototype._hasInvalidDimensions=function(){for(var q=0,K=[this.canvas.width,this.canvas.height];q<K.length;q+=1){var de=K[q];if(isNaN(de)||de<=0)return!0}return!1},D}(Ce),Le={vector:H,raster:te,"raster-dem":oe,geojson:_e,video:ie,image:Ce,canvas:Se},Ae=function(Y,D,J,q){var K=new Le[D.type](Y,D,J,q);if(K.id!==Y)throw new Error("Expected Source id to be "+Y+" instead of "+K.id);return i.bindAll(["load","abort","unload","serialize","prepare"],K),K},Fe=function(Y){return Le[Y]},Pe=function(Y,D){Le[Y]=D};function ge(Y,D){var J=i.identity([]);return i.translate(J,J,[1,1,0]),i.scale(J,J,[Y.width*.5,Y.height*.5,1]),i.multiply(J,J,Y.calculatePosMatrix(D.toUnwrapped()))}function Re(Y,D,J){if(Y)for(var q=0,K=Y;q<K.length;q+=1){var de=K[q],ne=D[de];if(ne&&ne.source===J&&ne.type==="fill-extrusion")return!0}else for(var we in D){var Ue=D[we];if(Ue.source===J&&Ue.type==="fill-extrusion")return!0}return!1}function ce(Y,D,J,q,K,de){var ne=Re(K&&K.layers,D,Y.id),we=de.maxPitchScaleFactor(),Ue=Y.tilesIn(q,we,ne);Ue.sort(pt);for(var ft=[],Xt=0,hr=Ue;Xt<hr.length;Xt+=1){var qt=hr[Xt];ft.push({wrappedTileID:qt.tileID.wrapped().key,queryResults:qt.tile.queryRenderedFeatures(D,J,Y._state,qt.queryGeometry,qt.cameraQueryGeometry,qt.scale,K,de,we,ge(Y.transform,qt.tileID))})}var Ve=Zt(ft);for(var Qe in Ve)Ve[Qe].forEach(function(at){var Ct=at.feature,Ot=Y.getFeatureState(Ct.layer["source-layer"],Ct.id);Ct.source=Ct.layer.source,Ct.layer["source-layer"]&&(Ct.sourceLayer=Ct.layer["source-layer"]),Ct.state=Ot});return Ve}function Ze(Y,D,J,q,K,de,ne){for(var we={},Ue=de.queryRenderedSymbols(q),ft=[],Xt=0,hr=Object.keys(Ue).map(Number);Xt<hr.length;Xt+=1){var qt=hr[Xt];ft.push(ne[qt])}ft.sort(pt);for(var Ve=function(){var Rt=at[Qe],Bt=Rt.featureIndex.lookupSymbolFeatures(Ue[Rt.bucketInstanceId],D,Rt.bucketIndex,Rt.sourceLayerIndex,K.filter,K.layers,K.availableImages,Y);for(var Dt in Bt){var yt=we[Dt]=we[Dt]||[],Pt=Bt[Dt];Pt.sort(function(Ur,Di){var fi=Rt.featureSortOrder;if(fi){var Ti=fi.indexOf(Ur.featureIndex),gn=fi.indexOf(Di.featureIndex);return gn-Ti}else return Di.featureIndex-Ur.featureIndex});for(var ht=0,ur=Pt;ht<ur.length;ht+=1){var br=ur[ht];yt.push(br)}}},Qe=0,at=ft;Qe<at.length;Qe+=1)Ve();var Ct=function(Rt){we[Rt].forEach(function(Bt){var Dt=Bt.feature,yt=Y[Rt],Pt=J[yt.source],ht=Pt.getFeatureState(Dt.layer["source-layer"],Dt.id);Dt.source=Dt.layer.source,Dt.layer["source-layer"]&&(Dt.sourceLayer=Dt.layer["source-layer"]),Dt.state=ht})};for(var Ot in we)Ct(Ot);return we}function ut(Y,D){for(var J=Y.getRenderableIds().map(function(Ue){return Y.getTileByID(Ue)}),q=[],K={},de=0;de<J.length;de++){var ne=J[de],we=ne.tileID.canonical.key;K[we]||(K[we]=!0,ne.querySourceFeatures(q,D))}return q}function pt(Y,D){var J=Y.tileID,q=D.tileID;return J.overscaledZ-q.overscaledZ||J.canonical.y-q.canonical.y||J.wrap-q.wrap||J.canonical.x-q.canonical.x}function Zt(Y){for(var D={},J={},q=0,K=Y;q<K.length;q+=1){var de=K[q],ne=de.queryResults,we=de.wrappedTileID,Ue=J[we]=J[we]||{};for(var ft in ne)for(var Xt=ne[ft],hr=Ue[ft]=Ue[ft]||{},qt=D[ft]=D[ft]||[],Ve=0,Qe=Xt;Ve<Qe.length;Ve+=1){var at=Qe[Ve];hr[at.featureIndex]||(hr[at.featureIndex]=!0,qt.push(at))}}return D}var st=function(D,J){this.max=D,this.onRemove=J,this.reset()};st.prototype.reset=function(){for(var D in this.data)for(var J=0,q=this.data[D];J<q.length;J+=1){var K=q[J];K.timeout&&clearTimeout(K.timeout),this.onRemove(K.value)}return this.data={},this.order=[],this},st.prototype.add=function(D,J,q){var K=this,de=D.wrapped().key;this.data[de]===void 0&&(this.data[de]=[]);var ne={value:J,timeout:void 0};if(q!==void 0&&(ne.timeout=setTimeout(function(){K.remove(D,ne)},q)),this.data[de].push(ne),this.order.push(de),this.order.length>this.max){var we=this._getAndRemoveByKey(this.order[0]);we&&this.onRemove(we)}return this},st.prototype.has=function(D){return D.wrapped().key in this.data},st.prototype.getAndRemove=function(D){return this.has(D)?this._getAndRemoveByKey(D.wrapped().key):null},st.prototype._getAndRemoveByKey=function(D){var J=this.data[D].shift();return J.timeout&&clearTimeout(J.timeout),this.data[D].length===0&&delete this.data[D],this.order.splice(this.order.indexOf(D),1),J.value},st.prototype.getByKey=function(D){var J=this.data[D];return J?J[0].value:null},st.prototype.get=function(D){if(!this.has(D))return null;var J=this.data[D.wrapped().key][0];return J.value},st.prototype.remove=function(D,J){if(!this.has(D))return this;var q=D.wrapped().key,K=J===void 0?0:this.data[q].indexOf(J),de=this.data[q][K];return this.data[q].splice(K,1),de.timeout&&clearTimeout(de.timeout),this.data[q].length===0&&delete this.data[q],this.onRemove(de.value),this.order.splice(this.order.indexOf(q),1),this},st.prototype.setMaxSize=function(D){for(this.max=D;this.order.length>this.max;){var J=this._getAndRemoveByKey(this.order[0]);J&&this.onRemove(J)}return this},st.prototype.filter=function(D){var J=[];for(var q in this.data)for(var K=0,de=this.data[q];K<de.length;K+=1){var ne=de[K];D(ne.value)||J.push(ne)}for(var we=0,Ue=J;we<Ue.length;we+=1){var ft=Ue[we];this.remove(ft.value.tileID,ft)}};var lt=function(D,J,q){this.context=D;var K=D.gl;this.buffer=K.createBuffer(),this.dynamicDraw=!!q,this.context.unbindVAO(),D.bindElementBuffer.set(this.buffer),K.bufferData(K.ELEMENT_ARRAY_BUFFER,J.arrayBuffer,this.dynamicDraw?K.DYNAMIC_DRAW:K.STATIC_DRAW),this.dynamicDraw||delete J.arrayBuffer};lt.prototype.bind=function(){this.context.bindElementBuffer.set(this.buffer)},lt.prototype.updateData=function(D){var J=this.context.gl;this.context.unbindVAO(),this.bind(),J.bufferSubData(J.ELEMENT_ARRAY_BUFFER,0,D.arrayBuffer)},lt.prototype.destroy=function(){var D=this.context.gl;this.buffer&&(D.deleteBuffer(this.buffer),delete this.buffer)};var Gt={Int8:"BYTE",Uint8:"UNSIGNED_BYTE",Int16:"SHORT",Uint16:"UNSIGNED_SHORT",Int32:"INT",Uint32:"UNSIGNED_INT",Float32:"FLOAT"},Nt=function(D,J,q,K){this.length=J.length,this.attributes=q,this.itemSize=J.bytesPerElement,this.dynamicDraw=K,this.context=D;var de=D.gl;this.buffer=de.createBuffer(),D.bindVertexBuffer.set(this.buffer),de.bufferData(de.ARRAY_BUFFER,J.arrayBuffer,this.dynamicDraw?de.DYNAMIC_DRAW:de.STATIC_DRAW),this.dynamicDraw||delete J.arrayBuffer};Nt.prototype.bind=function(){this.context.bindVertexBuffer.set(this.buffer)},Nt.prototype.updateData=function(D){var J=this.context.gl;this.bind(),J.bufferSubData(J.ARRAY_BUFFER,0,D.arrayBuffer)},Nt.prototype.enableAttributes=function(D,J){for(var q=0;q<this.attributes.length;q++){var K=this.attributes[q],de=J.attributes[K.name];de!==void 0&&D.enableVertexAttribArray(de)}},Nt.prototype.setVertexAttribPointers=function(D,J,q){for(var K=0;K<this.attributes.length;K++){var de=this.attributes[K],ne=J.attributes[de.name];ne!==void 0&&D.vertexAttribPointer(ne,de.components,D[Gt[de.type]],!1,this.itemSize,de.offset+this.itemSize*(q||0))}},Nt.prototype.destroy=function(){var D=this.context.gl;this.buffer&&(D.deleteBuffer(this.buffer),delete this.buffer)};var Jt=function(D){this.gl=D.gl,this.default=this.getDefault(),this.current=this.default,this.dirty=!1};Jt.prototype.get=function(){return this.current},Jt.prototype.set=function(D){},Jt.prototype.getDefault=function(){return this.default},Jt.prototype.setDefault=function(){this.set(this.default)};var sr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return i.Color.transparent},D.prototype.set=function(q){var K=this.current;q.r===K.r&&q.g===K.g&&q.b===K.b&&q.a===K.a&&!this.dirty||(this.gl.clearColor(q.r,q.g,q.b,q.a),this.current=q,this.dirty=!1)},D}(Jt),wr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return 1},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.clearDepth(q),this.current=q,this.dirty=!1)},D}(Jt),cr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return 0},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.clearStencil(q),this.current=q,this.dirty=!1)},D}(Jt),$e=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return[!0,!0,!0,!0]},D.prototype.set=function(q){var K=this.current;q[0]===K[0]&&q[1]===K[1]&&q[2]===K[2]&&q[3]===K[3]&&!this.dirty||(this.gl.colorMask(q[0],q[1],q[2],q[3]),this.current=q,this.dirty=!1)},D}(Jt),St=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!0},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.depthMask(q),this.current=q,this.dirty=!1)},D}(Jt),Qt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return 255},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.stencilMask(q),this.current=q,this.dirty=!1)},D}(Jt),Vt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return{func:this.gl.ALWAYS,ref:0,mask:255}},D.prototype.set=function(q){var K=this.current;q.func===K.func&&q.ref===K.ref&&q.mask===K.mask&&!this.dirty||(this.gl.stencilFunc(q.func,q.ref,q.mask),this.current=q,this.dirty=!1)},D}(Jt),_t=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){var q=this.gl;return[q.KEEP,q.KEEP,q.KEEP]},D.prototype.set=function(q){var K=this.current;q[0]===K[0]&&q[1]===K[1]&&q[2]===K[2]&&!this.dirty||(this.gl.stencilOp(q[0],q[1],q[2]),this.current=q,this.dirty=!1)},D}(Jt),It=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;q?K.enable(K.STENCIL_TEST):K.disable(K.STENCIL_TEST),this.current=q,this.dirty=!1}},D}(Jt),mt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return[0,1]},D.prototype.set=function(q){var K=this.current;q[0]===K[0]&&q[1]===K[1]&&!this.dirty||(this.gl.depthRange(q[0],q[1]),this.current=q,this.dirty=!1)},D}(Jt),er=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;q?K.enable(K.DEPTH_TEST):K.disable(K.DEPTH_TEST),this.current=q,this.dirty=!1}},D}(Jt),lr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return this.gl.LESS},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.depthFunc(q),this.current=q,this.dirty=!1)},D}(Jt),Tr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;q?K.enable(K.BLEND):K.disable(K.BLEND),this.current=q,this.dirty=!1}},D}(Jt),Lr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){var q=this.gl;return[q.ONE,q.ZERO]},D.prototype.set=function(q){var K=this.current;q[0]===K[0]&&q[1]===K[1]&&!this.dirty||(this.gl.blendFunc(q[0],q[1]),this.current=q,this.dirty=!1)},D}(Jt),ti=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return i.Color.transparent},D.prototype.set=function(q){var K=this.current;q.r===K.r&&q.g===K.g&&q.b===K.b&&q.a===K.a&&!this.dirty||(this.gl.blendColor(q.r,q.g,q.b,q.a),this.current=q,this.dirty=!1)},D}(Jt),Br=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return this.gl.FUNC_ADD},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.blendEquation(q),this.current=q,this.dirty=!1)},D}(Jt),Vr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;q?K.enable(K.CULL_FACE):K.disable(K.CULL_FACE),this.current=q,this.dirty=!1}},D}(Jt),dt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return this.gl.BACK},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.cullFace(q),this.current=q,this.dirty=!1)},D}(Jt),Ge=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return this.gl.CCW},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.frontFace(q),this.current=q,this.dirty=!1)},D}(Jt),Je=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.useProgram(q),this.current=q,this.dirty=!1)},D}(Jt),je=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return this.gl.TEXTURE0},D.prototype.set=function(q){q===this.current&&!this.dirty||(this.gl.activeTexture(q),this.current=q,this.dirty=!1)},D}(Jt),tt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){var q=this.gl;return[0,0,q.drawingBufferWidth,q.drawingBufferHeight]},D.prototype.set=function(q){var K=this.current;q[0]===K[0]&&q[1]===K[1]&&q[2]===K[2]&&q[3]===K[3]&&!this.dirty||(this.gl.viewport(q[0],q[1],q[2],q[3]),this.current=q,this.dirty=!1)},D}(Jt),xt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.bindFramebuffer(K.FRAMEBUFFER,q),this.current=q,this.dirty=!1}},D}(Jt),Ie=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.bindRenderbuffer(K.RENDERBUFFER,q),this.current=q,this.dirty=!1}},D}(Jt),xe=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.bindTexture(K.TEXTURE_2D,q),this.current=q,this.dirty=!1}},D}(Jt),ke=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.bindBuffer(K.ARRAY_BUFFER,q),this.current=q,this.dirty=!1}},D}(Jt),vt=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){var K=this.gl;K.bindBuffer(K.ELEMENT_ARRAY_BUFFER,q),this.current=q,this.dirty=!1},D}(Jt),ir=function(Y){function D(J){Y.call(this,J),this.vao=J.extVertexArrayObject}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D.prototype.set=function(q){!this.vao||q===this.current&&!this.dirty||(this.vao.bindVertexArrayOES(q),this.current=q,this.dirty=!1)},D}(Jt),ar=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return 4},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.pixelStorei(K.UNPACK_ALIGNMENT,q),this.current=q,this.dirty=!1}},D}(Jt),vr=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.pixelStorei(K.UNPACK_PREMULTIPLY_ALPHA_WEBGL,q),this.current=q,this.dirty=!1}},D}(Jt),ii=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return!1},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){var K=this.gl;K.pixelStorei(K.UNPACK_FLIP_Y_WEBGL,q),this.current=q,this.dirty=!1}},D}(Jt),pi=function(Y){function D(J,q){Y.call(this,J),this.context=J,this.parent=q}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getDefault=function(){return null},D}(Jt),$r=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.setDirty=function(){this.dirty=!0},D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){this.context.bindFramebuffer.set(this.parent);var K=this.gl;K.framebufferTexture2D(K.FRAMEBUFFER,K.COLOR_ATTACHMENT0,K.TEXTURE_2D,q,0),this.current=q,this.dirty=!1}},D}(pi),di=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.set=function(q){if(!(q===this.current&&!this.dirty)){this.context.bindFramebuffer.set(this.parent);var K=this.gl;K.framebufferRenderbuffer(K.FRAMEBUFFER,K.DEPTH_ATTACHMENT,K.RENDERBUFFER,q),this.current=q,this.dirty=!1}},D}(pi),ji=function(D,J,q,K){this.context=D,this.width=J,this.height=q;var de=D.gl,ne=this.framebuffer=de.createFramebuffer();this.colorAttachment=new $r(D,ne),K&&(this.depthAttachment=new di(D,ne))};ji.prototype.destroy=function(){var D=this.context.gl,J=this.colorAttachment.get();if(J&&D.deleteTexture(J),this.depthAttachment){var q=this.depthAttachment.get();q&&D.deleteRenderbuffer(q)}D.deleteFramebuffer(this.framebuffer)};var In=519,wi=function(D,J,q){this.func=D,this.mask=J,this.range=q};wi.ReadOnly=!1,wi.ReadWrite=!0,wi.disabled=new wi(In,wi.ReadOnly,[0,1]);var On=519,qn=7680,Fn=function(D,J,q,K,de,ne){this.test=D,this.ref=J,this.mask=q,this.fail=K,this.depthFail=de,this.pass=ne};Fn.disabled=new Fn({func:On,mask:0},0,0,qn,qn,qn);var ra=0,la=1,Ut=771,wt=function(D,J,q){this.blendFunction=D,this.blendColor=J,this.mask=q};wt.Replace=[la,ra],wt.disabled=new wt(wt.Replace,i.Color.transparent,[!1,!1,!1,!1]),wt.unblended=new wt(wt.Replace,i.Color.transparent,[!0,!0,!0,!0]),wt.alphaBlended=new wt([la,Ut],i.Color.transparent,[!0,!0,!0,!0]);var rr=1029,nr=2305,Er=function(D,J,q){this.enable=D,this.mode=J,this.frontFace=q};Er.disabled=new Er(!1,rr,nr),Er.backCCW=new Er(!0,rr,nr);var Xr=function(D){this.gl=D,this.extVertexArrayObject=this.gl.getExtension("OES_vertex_array_object"),this.clearColor=new sr(this),this.clearDepth=new wr(this),this.clearStencil=new cr(this),this.colorMask=new $e(this),this.depthMask=new St(this),this.stencilMask=new Qt(this),this.stencilFunc=new Vt(this),this.stencilOp=new _t(this),this.stencilTest=new It(this),this.depthRange=new mt(this),this.depthTest=new er(this),this.depthFunc=new lr(this),this.blend=new Tr(this),this.blendFunc=new Lr(this),this.blendColor=new ti(this),this.blendEquation=new Br(this),this.cullFace=new Vr(this),this.cullFaceSide=new dt(this),this.frontFace=new Ge(this),this.program=new Je(this),this.activeTexture=new je(this),this.viewport=new tt(this),this.bindFramebuffer=new xt(this),this.bindRenderbuffer=new Ie(this),this.bindTexture=new xe(this),this.bindVertexBuffer=new ke(this),this.bindElementBuffer=new vt(this),this.bindVertexArrayOES=this.extVertexArrayObject&&new ir(this),this.pixelStoreUnpack=new ar(this),this.pixelStoreUnpackPremultiplyAlpha=new vr(this),this.pixelStoreUnpackFlipY=new ii(this),this.extTextureFilterAnisotropic=D.getExtension("EXT_texture_filter_anisotropic")||D.getExtension("MOZ_EXT_texture_filter_anisotropic")||D.getExtension("WEBKIT_EXT_texture_filter_anisotropic"),this.extTextureFilterAnisotropic&&(this.extTextureFilterAnisotropicMax=D.getParameter(this.extTextureFilterAnisotropic.MAX_TEXTURE_MAX_ANISOTROPY_EXT)),this.extTextureHalfFloat=D.getExtension("OES_texture_half_float"),this.extTextureHalfFloat&&(D.getExtension("OES_texture_half_float_linear"),this.extRenderToTextureHalfFloat=D.getExtension("EXT_color_buffer_half_float")),this.extTimerQuery=D.getExtension("EXT_disjoint_timer_query"),this.maxTextureSize=D.getParameter(D.MAX_TEXTURE_SIZE)};Xr.prototype.setDefault=function(){this.unbindVAO(),this.clearColor.setDefault(),this.clearDepth.setDefault(),this.clearStencil.setDefault(),this.colorMask.setDefault(),this.depthMask.setDefault(),this.stencilMask.setDefault(),this.stencilFunc.setDefault(),this.stencilOp.setDefault(),this.stencilTest.setDefault(),this.depthRange.setDefault(),this.depthTest.setDefault(),this.depthFunc.setDefault(),this.blend.setDefault(),this.blendFunc.setDefault(),this.blendColor.setDefault(),this.blendEquation.setDefault(),this.cullFace.setDefault(),this.cullFaceSide.setDefault(),this.frontFace.setDefault(),this.program.setDefault(),this.activeTexture.setDefault(),this.bindFramebuffer.setDefault(),this.pixelStoreUnpack.setDefault(),this.pixelStoreUnpackPremultiplyAlpha.setDefault(),this.pixelStoreUnpackFlipY.setDefault()},Xr.prototype.setDirty=function(){this.clearColor.dirty=!0,this.clearDepth.dirty=!0,this.clearStencil.dirty=!0,this.colorMask.dirty=!0,this.depthMask.dirty=!0,this.stencilMask.dirty=!0,this.stencilFunc.dirty=!0,this.stencilOp.dirty=!0,this.stencilTest.dirty=!0,this.depthRange.dirty=!0,this.depthTest.dirty=!0,this.depthFunc.dirty=!0,this.blend.dirty=!0,this.blendFunc.dirty=!0,this.blendColor.dirty=!0,this.blendEquation.dirty=!0,this.cullFace.dirty=!0,this.cullFaceSide.dirty=!0,this.frontFace.dirty=!0,this.program.dirty=!0,this.activeTexture.dirty=!0,this.viewport.dirty=!0,this.bindFramebuffer.dirty=!0,this.bindRenderbuffer.dirty=!0,this.bindTexture.dirty=!0,this.bindVertexBuffer.dirty=!0,this.bindElementBuffer.dirty=!0,this.extVertexArrayObject&&(this.bindVertexArrayOES.dirty=!0),this.pixelStoreUnpack.dirty=!0,this.pixelStoreUnpackPremultiplyAlpha.dirty=!0,this.pixelStoreUnpackFlipY.dirty=!0},Xr.prototype.createIndexBuffer=function(D,J){return new lt(this,D,J)},Xr.prototype.createVertexBuffer=function(D,J,q){return new Nt(this,D,J,q)},Xr.prototype.createRenderbuffer=function(D,J,q){var K=this.gl,de=K.createRenderbuffer();return this.bindRenderbuffer.set(de),K.renderbufferStorage(K.RENDERBUFFER,D,J,q),this.bindRenderbuffer.set(null),de},Xr.prototype.createFramebuffer=function(D,J,q){return new ji(this,D,J,q)},Xr.prototype.clear=function(D){var J=D.color,q=D.depth,K=this.gl,de=0;J&&(de|=K.COLOR_BUFFER_BIT,this.clearColor.set(J),this.colorMask.set([!0,!0,!0,!0])),typeof q!="undefined"&&(de|=K.DEPTH_BUFFER_BIT,this.depthRange.set([0,1]),this.clearDepth.set(q),this.depthMask.set(!0)),K.clear(de)},Xr.prototype.setCullFace=function(D){D.enable===!1?this.cullFace.set(!1):(this.cullFace.set(!0),this.cullFaceSide.set(D.mode),this.frontFace.set(D.frontFace))},Xr.prototype.setDepthMode=function(D){D.func===this.gl.ALWAYS&&!D.mask?this.depthTest.set(!1):(this.depthTest.set(!0),this.depthFunc.set(D.func),this.depthMask.set(D.mask),this.depthRange.set(D.range))},Xr.prototype.setStencilMode=function(D){D.test.func===this.gl.ALWAYS&&!D.mask?this.stencilTest.set(!1):(this.stencilTest.set(!0),this.stencilMask.set(D.mask),this.stencilOp.set([D.fail,D.depthFail,D.pass]),this.stencilFunc.set({func:D.test.func,ref:D.ref,mask:D.test.mask}))},Xr.prototype.setColorMode=function(D){i.deepEqual(D.blendFunction,wt.Replace)?this.blend.set(!1):(this.blend.set(!0),this.blendFunc.set(D.blendFunction),this.blendColor.set(D.blendColor)),this.colorMask.set(D.mask)},Xr.prototype.unbindVAO=function(){this.extVertexArrayObject&&this.bindVertexArrayOES.set(null)};var ri=function(Y){function D(J,q,K){var de=this;Y.call(this),this.id=J,this.dispatcher=K,this.on("data",function(ne){ne.dataType==="source"&&ne.sourceDataType==="metadata"&&(de._sourceLoaded=!0),de._sourceLoaded&&!de._paused&&ne.dataType==="source"&&ne.sourceDataType==="content"&&(de.reload(),de.transform&&de.update(de.transform))}),this.on("error",function(){de._sourceErrored=!0}),this._source=Ae(J,q,K,this),this._tiles={},this._cache=new st(0,this._unloadTile.bind(this)),this._timers={},this._cacheTimers={},this._maxTileCacheSize=null,this._loadedParentTiles={},this._coveredTiles={},this._state=new i.SourceFeatureState}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.onAdd=function(q){this.map=q,this._maxTileCacheSize=q?q._maxTileCacheSize:null,this._source&&this._source.onAdd&&this._source.onAdd(q)},D.prototype.onRemove=function(q){this._source&&this._source.onRemove&&this._source.onRemove(q)},D.prototype.loaded=function(){if(this._sourceErrored)return!0;if(!this._sourceLoaded||!this._source.loaded())return!1;for(var q in this._tiles){var K=this._tiles[q];if(K.state!=="loaded"&&K.state!=="errored")return!1}return!0},D.prototype.getSource=function(){return this._source},D.prototype.pause=function(){this._paused=!0},D.prototype.resume=function(){if(this._paused){var q=this._shouldReloadOnResume;this._paused=!1,this._shouldReloadOnResume=!1,q&&this.reload(),this.transform&&this.update(this.transform)}},D.prototype._loadTile=function(q,K){return this._source.loadTile(q,K)},D.prototype._unloadTile=function(q){if(this._source.unloadTile)return this._source.unloadTile(q,function(){})},D.prototype._abortTile=function(q){if(this._source.abortTile)return this._source.abortTile(q,function(){})},D.prototype.serialize=function(){return this._source.serialize()},D.prototype.prepare=function(q){this._source.prepare&&this._source.prepare(),this._state.coalesceChanges(this._tiles,this.map?this.map.painter:null);for(var K in this._tiles){var de=this._tiles[K];de.upload(q),de.prepare(this.map.style.imageManager)}},D.prototype.getIds=function(){return i.values(this._tiles).map(function(q){return q.tileID}).sort(Qr).map(function(q){return q.key})},D.prototype.getRenderableIds=function(q){var K=this,de=[];for(var ne in this._tiles)this._isIdRenderable(ne,q)&&de.push(this._tiles[ne]);return q?de.sort(function(we,Ue){var ft=we.tileID,Xt=Ue.tileID,hr=new i.Point(ft.canonical.x,ft.canonical.y)._rotate(K.transform.angle),qt=new i.Point(Xt.canonical.x,Xt.canonical.y)._rotate(K.transform.angle);return ft.overscaledZ-Xt.overscaledZ||qt.y-hr.y||qt.x-hr.x}).map(function(we){return we.tileID.key}):de.map(function(we){return we.tileID}).sort(Qr).map(function(we){return we.key})},D.prototype.hasRenderableParent=function(q){var K=this.findLoadedParent(q,0);return K?this._isIdRenderable(K.tileID.key):!1},D.prototype._isIdRenderable=function(q,K){return this._tiles[q]&&this._tiles[q].hasData()&&!this._coveredTiles[q]&&(K||!this._tiles[q].holdingForFade())},D.prototype.reload=function(){if(this._paused){this._shouldReloadOnResume=!0;return}this._cache.reset();for(var q in this._tiles)this._tiles[q].state!=="errored"&&this._reloadTile(q,"reloading")},D.prototype._reloadTile=function(q,K){var de=this._tiles[q];de&&(de.state!=="loading"&&(de.state=K),this._loadTile(de,this._tileLoaded.bind(this,de,q,K)))},D.prototype._tileLoaded=function(q,K,de,ne){if(ne){q.state="errored",ne.status!==404?this._source.fire(new i.ErrorEvent(ne,{tile:q})):this.update(this.transform);return}q.timeAdded=i.browser.now(),de==="expired"&&(q.refreshedUponExpiration=!0),this._setTileReloadTimer(K,q),this.getSource().type==="raster-dem"&&q.dem&&this._backfillDEM(q),this._state.initializeTileState(q,this.map?this.map.painter:null),this._source.fire(new i.Event("data",{dataType:"source",tile:q,coord:q.tileID}))},D.prototype._backfillDEM=function(q){for(var K=this.getRenderableIds(),de=0;de<K.length;de++){var ne=K[de];if(q.neighboringTiles&&q.neighboringTiles[ne]){var we=this.getTileByID(ne);Ue(q,we),Ue(we,q)}}function Ue(ft,Xt){ft.needsHillshadePrepare=!0;var hr=Xt.tileID.canonical.x-ft.tileID.canonical.x,qt=Xt.tileID.canonical.y-ft.tileID.canonical.y,Ve=Math.pow(2,ft.tileID.canonical.z),Qe=Xt.tileID.key;hr===0&&qt===0||Math.abs(qt)>1||(Math.abs(hr)>1&&(Math.abs(hr+Ve)===1?hr+=Ve:Math.abs(hr-Ve)===1&&(hr-=Ve)),!(!Xt.dem||!ft.dem)&&(ft.dem.backfillBorder(Xt.dem,hr,qt),ft.neighboringTiles&&ft.neighboringTiles[Qe]&&(ft.neighboringTiles[Qe].backfilled=!0)))}},D.prototype.getTile=function(q){return this.getTileByID(q.key)},D.prototype.getTileByID=function(q){return this._tiles[q]},D.prototype._retainLoadedChildren=function(q,K,de,ne){for(var we in this._tiles){var Ue=this._tiles[we];if(!(ne[we]||!Ue.hasData()||Ue.tileID.overscaledZ<=K||Ue.tileID.overscaledZ>de)){for(var ft=Ue.tileID;Ue&&Ue.tileID.overscaledZ>K+1;){var Xt=Ue.tileID.scaledTo(Ue.tileID.overscaledZ-1);Ue=this._tiles[Xt.key],Ue&&Ue.hasData()&&(ft=Xt)}for(var hr=ft;hr.overscaledZ>K;)if(hr=hr.scaledTo(hr.overscaledZ-1),q[hr.key]){ne[ft.key]=ft;break}}}},D.prototype.findLoadedParent=function(q,K){if(q.key in this._loadedParentTiles){var de=this._loadedParentTiles[q.key];return de&&de.tileID.overscaledZ>=K?de:null}for(var ne=q.overscaledZ-1;ne>=K;ne--){var we=q.scaledTo(ne),Ue=this._getLoadedTile(we);if(Ue)return Ue}},D.prototype._getLoadedTile=function(q){var K=this._tiles[q.key];if(K&&K.hasData())return K;var de=this._cache.getByKey(q.wrapped().key);return de},D.prototype.updateCacheSize=function(q){var K=Math.ceil(q.width/this._source.tileSize)+1,de=Math.ceil(q.height/this._source.tileSize)+1,ne=K*de,we=5,Ue=Math.floor(ne*we),ft=typeof this._maxTileCacheSize=="number"?Math.min(this._maxTileCacheSize,Ue):Ue;this._cache.setMaxSize(ft)},D.prototype.handleWrapJump=function(q){var K=this._prevLng===void 0?q:this._prevLng,de=q-K,ne=de/360,we=Math.round(ne);if(this._prevLng=q,we){var Ue={};for(var ft in this._tiles){var Xt=this._tiles[ft];Xt.tileID=Xt.tileID.unwrapTo(Xt.tileID.wrap+we),Ue[Xt.tileID.key]=Xt}this._tiles=Ue;for(var hr in this._timers)clearTimeout(this._timers[hr]),delete this._timers[hr];for(var qt in this._tiles){var Ve=this._tiles[qt];this._setTileReloadTimer(qt,Ve)}}},D.prototype.update=function(q){var K=this;if(this.transform=q,!(!this._sourceLoaded||this._paused)){this.updateCacheSize(q),this.handleWrapJump(this.transform.center.lng),this._coveredTiles={};var de;this.used?this._source.tileID?de=q.getVisibleUnwrappedCoordinates(this._source.tileID).map(function(Ur){return new i.OverscaledTileID(Ur.canonical.z,Ur.wrap,Ur.canonical.z,Ur.canonical.x,Ur.canonical.y)}):(de=q.coveringTiles({tileSize:this._source.tileSize,minzoom:this._source.minzoom,maxzoom:this._source.maxzoom,roundZoom:this._source.roundZoom,reparseOverscaled:this._source.reparseOverscaled}),this._source.hasTile&&(de=de.filter(function(Ur){return K._source.hasTile(Ur)}))):de=[];var ne=q.coveringZoomLevel(this._source),we=Math.max(ne-D.maxOverzooming,this._source.minzoom),Ue=Math.max(ne+D.maxUnderzooming,this._source.minzoom),ft=this._updateRetainedTiles(de,ne);if(Oi(this._source.type)){for(var Xt={},hr={},qt=Object.keys(ft),Ve=0,Qe=qt;Ve<Qe.length;Ve+=1){var at=Qe[Ve],Ct=ft[at],Ot=this._tiles[at];if(!(!Ot||Ot.fadeEndTime&&Ot.fadeEndTime<=i.browser.now())){var Rt=this.findLoadedParent(Ct,we);Rt&&(this._addTile(Rt.tileID),Xt[Rt.tileID.key]=Rt.tileID),hr[at]=Ct}}this._retainLoadedChildren(hr,ne,Ue,ft);for(var Bt in Xt)ft[Bt]||(this._coveredTiles[Bt]=!0,ft[Bt]=Xt[Bt])}for(var Dt in ft)this._tiles[Dt].clearFadeHold();for(var yt=i.keysDifference(this._tiles,ft),Pt=0,ht=yt;Pt<ht.length;Pt+=1){var ur=ht[Pt],br=this._tiles[ur];br.hasSymbolBuckets&&!br.holdingForFade()?br.setHoldDuration(this.map._fadeDuration):(!br.hasSymbolBuckets||br.symbolFadeFinished())&&this._removeTile(ur)}this._updateLoadedParentTileCache()}},D.prototype.releaseSymbolFadeTiles=function(){for(var q in this._tiles)this._tiles[q].holdingForFade()&&this._removeTile(q)},D.prototype._updateRetainedTiles=function(q,K){for(var de={},ne={},we=Math.max(K-D.maxOverzooming,this._source.minzoom),Ue=Math.max(K+D.maxUnderzooming,this._source.minzoom),ft={},Xt=0,hr=q;Xt<hr.length;Xt+=1){var qt=hr[Xt],Ve=this._addTile(qt);de[qt.key]=qt,!Ve.hasData()&&K<this._source.maxzoom&&(ft[qt.key]=qt)}this._retainLoadedChildren(ft,K,Ue,de);for(var Qe=0,at=q;Qe<at.length;Qe+=1){var Ct=at[Qe],Ot=this._tiles[Ct.key];if(!Ot.hasData()){if(K+1>this._source.maxzoom){var Rt=Ct.children(this._source.maxzoom)[0],Bt=this.getTile(Rt);if(Bt&&Bt.hasData()){de[Rt.key]=Rt;continue}}else{var Dt=Ct.children(this._source.maxzoom);if(de[Dt[0].key]&&de[Dt[1].key]&&de[Dt[2].key]&&de[Dt[3].key])continue}for(var yt=Ot.wasRequested(),Pt=Ct.overscaledZ-1;Pt>=we;--Pt){var ht=Ct.scaledTo(Pt);if(ne[ht.key]||(ne[ht.key]=!0,Ot=this.getTile(ht),!Ot&&yt&&(Ot=this._addTile(ht)),Ot&&(de[ht.key]=ht,yt=Ot.wasRequested(),Ot.hasData())))break}}}return de},D.prototype._updateLoadedParentTileCache=function(){this._loadedParentTiles={};for(var q in this._tiles){for(var K=[],de=void 0,ne=this._tiles[q].tileID;ne.overscaledZ>0;){if(ne.key in this._loadedParentTiles){de=this._loadedParentTiles[ne.key];break}K.push(ne.key);var we=ne.scaledTo(ne.overscaledZ-1);if(de=this._getLoadedTile(we),de)break;ne=we}for(var Ue=0,ft=K;Ue<ft.length;Ue+=1){var Xt=ft[Ue];this._loadedParentTiles[Xt]=de}}},D.prototype._addTile=function(q){var K=this._tiles[q.key];if(K)return K;K=this._cache.getAndRemove(q),K&&(this._setTileReloadTimer(q.key,K),K.tileID=q,this._state.initializeTileState(K,this.map?this.map.painter:null),this._cacheTimers[q.key]&&(clearTimeout(this._cacheTimers[q.key]),delete this._cacheTimers[q.key],this._setTileReloadTimer(q.key,K)));var de=!!K;return de||(K=new i.Tile(q,this._source.tileSize*q.overscaleFactor()),this._loadTile(K,this._tileLoaded.bind(this,K,q.key,K.state))),K?(K.uses++,this._tiles[q.key]=K,de||this._source.fire(new i.Event("dataloading",{tile:K,coord:K.tileID,dataType:"source"})),K):null},D.prototype._setTileReloadTimer=function(q,K){var de=this;q in this._timers&&(clearTimeout(this._timers[q]),delete this._timers[q]);var ne=K.getExpiryTimeout();ne&&(this._timers[q]=setTimeout(function(){de._reloadTile(q,"expired"),delete de._timers[q]},ne))},D.prototype._removeTile=function(q){var K=this._tiles[q];K&&(K.uses--,delete this._tiles[q],this._timers[q]&&(clearTimeout(this._timers[q]),delete this._timers[q]),!(K.uses>0)&&(K.hasData()&&K.state!=="reloading"?this._cache.add(K.tileID,K,K.getExpiryTimeout()):(K.aborted=!0,this._abortTile(K),this._unloadTile(K))))},D.prototype.clearTiles=function(){this._shouldReloadOnResume=!1,this._paused=!1;for(var q in this._tiles)this._removeTile(q);this._cache.reset()},D.prototype.tilesIn=function(q,K,de){var ne=this,we=[],Ue=this.transform;if(!Ue)return we;for(var ft=de?Ue.getCameraQueryGeometry(q):q,Xt=q.map(function(Pt){return Ue.pointCoordinate(Pt)}),hr=ft.map(function(Pt){return Ue.pointCoordinate(Pt)}),qt=this.getIds(),Ve=1/0,Qe=1/0,at=-1/0,Ct=-1/0,Ot=0,Rt=hr;Ot<Rt.length;Ot+=1){var Bt=Rt[Ot];Ve=Math.min(Ve,Bt.x),Qe=Math.min(Qe,Bt.y),at=Math.max(at,Bt.x),Ct=Math.max(Ct,Bt.y)}for(var Dt=function(Pt){var ht=ne._tiles[qt[Pt]];if(!ht.holdingForFade()){var ur=ht.tileID,br=Math.pow(2,Ue.zoom-ht.tileID.overscaledZ),Ur=K*ht.queryPadding*i.EXTENT/ht.tileSize/br,Di=[ur.getTilePoint(new i.MercatorCoordinate(Ve,Qe)),ur.getTilePoint(new i.MercatorCoordinate(at,Ct))];if(Di[0].x-Ur<i.EXTENT&&Di[0].y-Ur<i.EXTENT&&Di[1].x+Ur>=0&&Di[1].y+Ur>=0){var fi=Xt.map(function(gn){return ur.getTilePoint(gn)}),Ti=hr.map(function(gn){return ur.getTilePoint(gn)});we.push({tile:ht,tileID:ur,queryGeometry:fi,cameraQueryGeometry:Ti,scale:br})}}},yt=0;yt<qt.length;yt++)Dt(yt);return we},D.prototype.getVisibleCoordinates=function(q){for(var K=this,de=this.getRenderableIds(q).map(function(ft){return K._tiles[ft].tileID}),ne=0,we=de;ne<we.length;ne+=1){var Ue=we[ne];Ue.posMatrix=this.transform.calculatePosMatrix(Ue.toUnwrapped())}return de},D.prototype.hasTransition=function(){if(this._source.hasTransition())return!0;if(Oi(this._source.type))for(var q in this._tiles){var K=this._tiles[q];if(K.fadeEndTime!==void 0&&K.fadeEndTime>=i.browser.now())return!0}return!1},D.prototype.setFeatureState=function(q,K,de){q=q||"_geojsonTileLayer",this._state.updateState(q,K,de)},D.prototype.removeFeatureState=function(q,K,de){q=q||"_geojsonTileLayer",this._state.removeFeatureState(q,K,de)},D.prototype.getFeatureState=function(q,K){return q=q||"_geojsonTileLayer",this._state.getState(q,K)},D.prototype.setDependencies=function(q,K,de){var ne=this._tiles[q];ne&&ne.setDependencies(K,de)},D.prototype.reloadTilesForDependencies=function(q,K){for(var de in this._tiles){var ne=this._tiles[de];ne.hasDependency(q,K)&&this._reloadTile(de,"reloading")}this._cache.filter(function(we){return!we.hasDependency(q,K)})},D}(i.Evented);ri.maxOverzooming=10,ri.maxUnderzooming=3;function Qr(Y,D){var J=Math.abs(Y.wrap*2)-+(Y.wrap<0),q=Math.abs(D.wrap*2)-+(D.wrap<0);return Y.overscaledZ-D.overscaledZ||q-J||D.canonical.y-Y.canonical.y||D.canonical.x-Y.canonical.x}function Oi(Y){return Y==="raster"||Y==="image"||Y==="video"}function $i(){return new i.window.Worker(io.workerUrl)}var tn="mapboxgl_preloaded_worker_pool",fn=function(){this.active={}};fn.prototype.acquire=function(D){if(!this.workers)for(this.workers=[];this.workers.length<fn.workerCount;)this.workers.push(new $i);return this.active[D]=!0,this.workers.slice()},fn.prototype.release=function(D){delete this.active[D],this.numActive()===0&&(this.workers.forEach(function(J){J.terminate()}),this.workers=null)},fn.prototype.isPreloaded=function(){return!!this.active[tn]},fn.prototype.numActive=function(){return Object.keys(this.active).length};var yn=Math.floor(i.browser.hardwareConcurrency/2);fn.workerCount=Math.max(Math.min(yn,6),1);var Sn;function Ba(){return Sn||(Sn=new fn),Sn}function ua(){var Y=Ba();Y.acquire(tn)}function ma(){var Y=Sn;Y&&(Y.isPreloaded()&&Y.numActive()===1?(Y.release(tn),Sn=null):console.warn("Could not clear WebWorkers since there are active Map instances that still reference it. The pre-warmed WebWorker pool can only be cleared when all map instances have been removed with map.remove()"))}function Wa(Y,D){var J={};for(var q in Y)q!=="ref"&&(J[q]=Y[q]);return i.refProperties.forEach(function(K){K in D&&(J[K]=D[K])}),J}function Fa(Y){Y=Y.slice();for(var D=Object.create(null),J=0;J<Y.length;J++)D[Y[J].id]=Y[J];for(var q=0;q<Y.length;q++)"ref"in Y[q]&&(Y[q]=Wa(Y[q],D[Y[q].ref]));return Y}function Wo(){var Y={},D=i.styleSpec.$version;for(var J in i.styleSpec.$root){var q=i.styleSpec.$root[J];if(q.required){var K=null;J==="version"?K=D:q.type==="array"?K=[]:K={},K!=null&&(Y[J]=K)}}return Y}var da={setStyle:"setStyle",addLayer:"addLayer",removeLayer:"removeLayer",setPaintProperty:"setPaintProperty",setLayoutProperty:"setLayoutProperty",setFilter:"setFilter",addSource:"addSource",removeSource:"removeSource",setGeoJSONSourceData:"setGeoJSONSourceData",setLayerZoomRange:"setLayerZoomRange",setLayerProperty:"setLayerProperty",setCenter:"setCenter",setZoom:"setZoom",setBearing:"setBearing",setPitch:"setPitch",setSprite:"setSprite",setGlyphs:"setGlyphs",setTransition:"setTransition",setLight:"setLight"};function Wn(Y,D,J){J.push({command:da.addSource,args:[Y,D[Y]]})}function Ha(Y,D,J){D.push({command:da.removeSource,args:[Y]}),J[Y]=!0}function vo(Y,D,J,q){Ha(Y,J,q),Wn(Y,D,J)}function jn(Y,D,J){var q;for(q in Y[J])if(Y[J].hasOwnProperty(q)&&q!=="data"&&!i.deepEqual(Y[J][q],D[J][q]))return!1;for(q in D[J])if(D[J].hasOwnProperty(q)&&q!=="data"&&!i.deepEqual(Y[J][q],D[J][q]))return!1;return!0}function Mt(Y,D,J,q){Y=Y||{},D=D||{};var K;for(K in Y)Y.hasOwnProperty(K)&&(D.hasOwnProperty(K)||Ha(K,J,q));for(K in D)D.hasOwnProperty(K)&&(Y.hasOwnProperty(K)?i.deepEqual(Y[K],D[K])||(Y[K].type==="geojson"&&D[K].type==="geojson"&&jn(Y,D,K)?J.push({command:da.setGeoJSONSourceData,args:[K,D[K].data]}):vo(K,D,J,q)):Wn(K,D,J))}function kr(Y,D,J,q,K,de){Y=Y||{},D=D||{};var ne;for(ne in Y)Y.hasOwnProperty(ne)&&(i.deepEqual(Y[ne],D[ne])||J.push({command:de,args:[q,ne,D[ne],K]}));for(ne in D)!D.hasOwnProperty(ne)||Y.hasOwnProperty(ne)||i.deepEqual(Y[ne],D[ne])||J.push({command:de,args:[q,ne,D[ne],K]})}function Jr(Y){return Y.id}function vi(Y,D){return Y[D.id]=D,Y}function hn(Y,D,J){Y=Y||[],D=D||[];var q=Y.map(Jr),K=D.map(Jr),de=Y.reduce(vi,{}),ne=D.reduce(vi,{}),we=q.slice(),Ue=Object.create(null),ft,Xt,hr,qt,Ve,Qe,at;for(ft=0,Xt=0;ft<q.length;ft++)hr=q[ft],ne.hasOwnProperty(hr)?Xt++:(J.push({command:da.removeLayer,args:[hr]}),we.splice(we.indexOf(hr,Xt),1));for(ft=0,Xt=0;ft<K.length;ft++)hr=K[K.length-1-ft],we[we.length-1-ft]!==hr&&(de.hasOwnProperty(hr)?(J.push({command:da.removeLayer,args:[hr]}),we.splice(we.lastIndexOf(hr,we.length-Xt),1)):Xt++,Qe=we[we.length-ft],J.push({command:da.addLayer,args:[ne[hr],Qe]}),we.splice(we.length-ft,0,hr),Ue[hr]=!0);for(ft=0;ft<K.length;ft++)if(hr=K[ft],qt=de[hr],Ve=ne[hr],!(Ue[hr]||i.deepEqual(qt,Ve))){if(!i.deepEqual(qt.source,Ve.source)||!i.deepEqual(qt["source-layer"],Ve["source-layer"])||!i.deepEqual(qt.type,Ve.type)){J.push({command:da.removeLayer,args:[hr]}),Qe=we[we.lastIndexOf(hr)+1],J.push({command:da.addLayer,args:[Ve,Qe]});continue}kr(qt.layout,Ve.layout,J,hr,null,da.setLayoutProperty),kr(qt.paint,Ve.paint,J,hr,null,da.setPaintProperty),i.deepEqual(qt.filter,Ve.filter)||J.push({command:da.setFilter,args:[hr,Ve.filter]}),(!i.deepEqual(qt.minzoom,Ve.minzoom)||!i.deepEqual(qt.maxzoom,Ve.maxzoom))&&J.push({command:da.setLayerZoomRange,args:[hr,Ve.minzoom,Ve.maxzoom]});for(at in qt)qt.hasOwnProperty(at)&&(at==="layout"||at==="paint"||at==="filter"||at==="metadata"||at==="minzoom"||at==="maxzoom"||(at.indexOf("paint.")===0?kr(qt[at],Ve[at],J,hr,at.slice(6),da.setPaintProperty):i.deepEqual(qt[at],Ve[at])||J.push({command:da.setLayerProperty,args:[hr,at,Ve[at]]})));for(at in Ve)!Ve.hasOwnProperty(at)||qt.hasOwnProperty(at)||at==="layout"||at==="paint"||at==="filter"||at==="metadata"||at==="minzoom"||at==="maxzoom"||(at.indexOf("paint.")===0?kr(qt[at],Ve[at],J,hr,at.slice(6),da.setPaintProperty):i.deepEqual(qt[at],Ve[at])||J.push({command:da.setLayerProperty,args:[hr,at,Ve[at]]}))}}function An(Y,D){if(!Y)return[{command:da.setStyle,args:[D]}];var J=[];try{if(!i.deepEqual(Y.version,D.version))return[{command:da.setStyle,args:[D]}];i.deepEqual(Y.center,D.center)||J.push({command:da.setCenter,args:[D.center]}),i.deepEqual(Y.zoom,D.zoom)||J.push({command:da.setZoom,args:[D.zoom]}),i.deepEqual(Y.bearing,D.bearing)||J.push({command:da.setBearing,args:[D.bearing]}),i.deepEqual(Y.pitch,D.pitch)||J.push({command:da.setPitch,args:[D.pitch]}),i.deepEqual(Y.sprite,D.sprite)||J.push({command:da.setSprite,args:[D.sprite]}),i.deepEqual(Y.glyphs,D.glyphs)||J.push({command:da.setGlyphs,args:[D.glyphs]}),i.deepEqual(Y.transition,D.transition)||J.push({command:da.setTransition,args:[D.transition]}),i.deepEqual(Y.light,D.light)||J.push({command:da.setLight,args:[D.light]});var q={},K=[];Mt(Y.sources,D.sources,K,q);var de=[];Y.layers&&Y.layers.forEach(function(ne){q[ne.source]?J.push({command:da.removeLayer,args:[ne.id]}):de.push(ne)}),J=J.concat(K),hn(de,D.layers,J)}catch(ne){console.warn("Unable to compute style diff:",ne),J=[{command:da.setStyle,args:[D]}]}return J}var Mn=function(D,J){this.reset(D,J)};Mn.prototype.reset=function(D,J){this.points=D||[],this._distances=[0];for(var q=1;q<this.points.length;q++)this._distances[q]=this._distances[q-1]+this.points[q].dist(this.points[q-1]);this.length=this._distances[this._distances.length-1],this.padding=Math.min(J||0,this.length*.5),this.paddedLength=this.length-this.padding*2},Mn.prototype.lerp=function(D){if(this.points.length===1)return this.points[0];D=i.clamp(D,0,1);for(var J=1,q=this._distances[J],K=D*this.paddedLength+this.padding;q<K&&J<this._distances.length;)q=this._distances[++J];var de=J-1,ne=this._distances[de],we=q-ne,Ue=we>0?(K-ne)/we:0;return this.points[de].mult(1-Ue).add(this.points[J].mult(Ue))};var Li=function(D,J,q){var K=this.boxCells=[],de=this.circleCells=[];this.xCellCount=Math.ceil(D/q),this.yCellCount=Math.ceil(J/q);for(var ne=0;ne<this.xCellCount*this.yCellCount;ne++)K.push([]),de.push([]);this.circleKeys=[],this.boxKeys=[],this.bboxes=[],this.circles=[],this.width=D,this.height=J,this.xScale=this.xCellCount/D,this.yScale=this.yCellCount/J,this.boxUid=0,this.circleUid=0};Li.prototype.keysLength=function(){return this.boxKeys.length+this.circleKeys.length},Li.prototype.insert=function(D,J,q,K,de){this._forEachCell(J,q,K,de,this._insertBoxCell,this.boxUid++),this.boxKeys.push(D),this.bboxes.push(J),this.bboxes.push(q),this.bboxes.push(K),this.bboxes.push(de)},Li.prototype.insertCircle=function(D,J,q,K){this._forEachCell(J-K,q-K,J+K,q+K,this._insertCircleCell,this.circleUid++),this.circleKeys.push(D),this.circles.push(J),this.circles.push(q),this.circles.push(K)},Li.prototype._insertBoxCell=function(D,J,q,K,de,ne){this.boxCells[de].push(ne)},Li.prototype._insertCircleCell=function(D,J,q,K,de,ne){this.circleCells[de].push(ne)},Li.prototype._query=function(D,J,q,K,de,ne){if(q<0||D>this.width||K<0||J>this.height)return de?!1:[];var we=[];if(D<=0&&J<=0&&this.width<=q&&this.height<=K){if(de)return!0;for(var Ue=0;Ue<this.boxKeys.length;Ue++)we.push({key:this.boxKeys[Ue],x1:this.bboxes[Ue*4],y1:this.bboxes[Ue*4+1],x2:this.bboxes[Ue*4+2],y2:this.bboxes[Ue*4+3]});for(var ft=0;ft<this.circleKeys.length;ft++){var Xt=this.circles[ft*3],hr=this.circles[ft*3+1],qt=this.circles[ft*3+2];we.push({key:this.circleKeys[ft],x1:Xt-qt,y1:hr-qt,x2:Xt+qt,y2:hr+qt})}return ne?we.filter(ne):we}else{var Ve={hitTest:de,seenUids:{box:{},circle:{}}};return this._forEachCell(D,J,q,K,this._queryCell,we,Ve,ne),de?we.length>0:we}},Li.prototype._queryCircle=function(D,J,q,K,de){var ne=D-q,we=D+q,Ue=J-q,ft=J+q;if(we<0||ne>this.width||ft<0||Ue>this.height)return K?!1:[];var Xt=[],hr={hitTest:K,circle:{x:D,y:J,radius:q},seenUids:{box:{},circle:{}}};return this._forEachCell(ne,Ue,we,ft,this._queryCellCircle,Xt,hr,de),K?Xt.length>0:Xt},Li.prototype.query=function(D,J,q,K,de){return this._query(D,J,q,K,!1,de)},Li.prototype.hitTest=function(D,J,q,K,de){return this._query(D,J,q,K,!0,de)},Li.prototype.hitTestCircle=function(D,J,q,K){return this._queryCircle(D,J,q,!0,K)},Li.prototype._queryCell=function(D,J,q,K,de,ne,we,Ue){var ft=we.seenUids,Xt=this.boxCells[de];if(Xt!==null)for(var hr=this.bboxes,qt=0,Ve=Xt;qt<Ve.length;qt+=1){var Qe=Ve[qt];if(!ft.box[Qe]){ft.box[Qe]=!0;var at=Qe*4;if(D<=hr[at+2]&&J<=hr[at+3]&&q>=hr[at+0]&&K>=hr[at+1]&&(!Ue||Ue(this.boxKeys[Qe]))){if(we.hitTest)return ne.push(!0),!0;ne.push({key:this.boxKeys[Qe],x1:hr[at],y1:hr[at+1],x2:hr[at+2],y2:hr[at+3]})}}}var Ct=this.circleCells[de];if(Ct!==null)for(var Ot=this.circles,Rt=0,Bt=Ct;Rt<Bt.length;Rt+=1){var Dt=Bt[Rt];if(!ft.circle[Dt]){ft.circle[Dt]=!0;var yt=Dt*3;if(this._circleAndRectCollide(Ot[yt],Ot[yt+1],Ot[yt+2],D,J,q,K)&&(!Ue||Ue(this.circleKeys[Dt]))){if(we.hitTest)return ne.push(!0),!0;var Pt=Ot[yt],ht=Ot[yt+1],ur=Ot[yt+2];ne.push({key:this.circleKeys[Dt],x1:Pt-ur,y1:ht-ur,x2:Pt+ur,y2:ht+ur})}}}},Li.prototype._queryCellCircle=function(D,J,q,K,de,ne,we,Ue){var ft=we.circle,Xt=we.seenUids,hr=this.boxCells[de];if(hr!==null)for(var qt=this.bboxes,Ve=0,Qe=hr;Ve<Qe.length;Ve+=1){var at=Qe[Ve];if(!Xt.box[at]){Xt.box[at]=!0;var Ct=at*4;if(this._circleAndRectCollide(ft.x,ft.y,ft.radius,qt[Ct+0],qt[Ct+1],qt[Ct+2],qt[Ct+3])&&(!Ue||Ue(this.boxKeys[at])))return ne.push(!0),!0}}var Ot=this.circleCells[de];if(Ot!==null)for(var Rt=this.circles,Bt=0,Dt=Ot;Bt<Dt.length;Bt+=1){var yt=Dt[Bt];if(!Xt.circle[yt]){Xt.circle[yt]=!0;var Pt=yt*3;if(this._circlesCollide(Rt[Pt],Rt[Pt+1],Rt[Pt+2],ft.x,ft.y,ft.radius)&&(!Ue||Ue(this.circleKeys[yt])))return ne.push(!0),!0}}},Li.prototype._forEachCell=function(D,J,q,K,de,ne,we,Ue){for(var ft=this._convertToXCellCoord(D),Xt=this._convertToYCellCoord(J),hr=this._convertToXCellCoord(q),qt=this._convertToYCellCoord(K),Ve=ft;Ve<=hr;Ve++)for(var Qe=Xt;Qe<=qt;Qe++){var at=this.xCellCount*Qe+Ve;if(de.call(this,D,J,q,K,at,ne,we,Ue))return}},Li.prototype._convertToXCellCoord=function(D){return Math.max(0,Math.min(this.xCellCount-1,Math.floor(D*this.xScale)))},Li.prototype._convertToYCellCoord=function(D){return Math.max(0,Math.min(this.yCellCount-1,Math.floor(D*this.yScale)))},Li.prototype._circlesCollide=function(D,J,q,K,de,ne){var we=K-D,Ue=de-J,ft=q+ne;return ft*ft>we*we+Ue*Ue},Li.prototype._circleAndRectCollide=function(D,J,q,K,de,ne,we){var Ue=(ne-K)/2,ft=Math.abs(D-(K+Ue));if(ft>Ue+q)return!1;var Xt=(we-de)/2,hr=Math.abs(J-(de+Xt));if(hr>Xt+q)return!1;if(ft<=Ue||hr<=Xt)return!0;var qt=ft-Ue,Ve=hr-Xt;return qt*qt+Ve*Ve<=q*q};function _n(Y,D,J,q,K){var de=i.create();return D?(i.scale(de,de,[1/K,1/K,1]),J||i.rotateZ(de,de,q.angle)):i.multiply(de,q.labelPlaneMatrix,Y),de}function ya(Y,D,J,q,K){if(D){var de=i.clone(Y);return i.scale(de,de,[K,K,1]),J||i.rotateZ(de,de,-q.angle),de}else return q.glCoordMatrix}function Jn(Y,D){var J=[Y.x,Y.y,0,1];zl(J,J,D);var q=J[3];return{point:new i.Point(J[0]/q,J[1]/q),signedDistanceFromCamera:q}}function Ma(Y,D){return .5+.5*(Y/D)}function _o(Y,D){var J=Y[0]/Y[3],q=Y[1]/Y[3],K=J>=-D[0]&&J<=D[0]&&q>=-D[1]&&q<=D[1];return K}function No(Y,D,J,q,K,de,ne,we){var Ue=q?Y.textSizeData:Y.iconSizeData,ft=i.evaluateSizeForZoom(Ue,J.transform.zoom),Xt=[256/J.width*2+1,256/J.height*2+1],hr=q?Y.text.dynamicLayoutVertexArray:Y.icon.dynamicLayoutVertexArray;hr.clear();for(var qt=Y.lineVertexArray,Ve=q?Y.text.placedSymbolArray:Y.icon.placedSymbolArray,Qe=J.transform.width/J.transform.height,at=!1,Ct=0;Ct<Ve.length;Ct++){var Ot=Ve.get(Ct);if(Ot.hidden||Ot.writingMode===i.WritingMode.vertical&&!at){ul(Ot.numGlyphs,hr);continue}at=!1;var Rt=[Ot.anchorX,Ot.anchorY,0,1];if(i.transformMat4(Rt,Rt,D),!_o(Rt,Xt)){ul(Ot.numGlyphs,hr);continue}var Bt=Rt[3],Dt=Ma(J.transform.cameraToCenterDistance,Bt),yt=i.evaluateSizeForFeature(Ue,ft,Ot),Pt=ne?yt/Dt:yt*Dt,ht=new i.Point(Ot.anchorX,Ot.anchorY),ur=Jn(ht,K).point,br={},Ur=ko(Ot,Pt,!1,we,D,K,de,Y.glyphOffsetArray,qt,hr,ur,ht,br,Qe);at=Ur.useVertical,(Ur.notEnoughRoom||at||Ur.needsFlipping&&ko(Ot,Pt,!0,we,D,K,de,Y.glyphOffsetArray,qt,hr,ur,ht,br,Qe).notEnoughRoom)&&ul(Ot.numGlyphs,hr)}q?Y.text.dynamicLayoutVertexBuffer.updateData(hr):Y.icon.dynamicLayoutVertexBuffer.updateData(hr)}function po(Y,D,J,q,K,de,ne,we,Ue,ft,Xt){var hr=we.glyphStartIndex+we.numGlyphs,qt=we.lineStartIndex,Ve=we.lineStartIndex+we.lineLength,Qe=D.getoffsetX(we.glyphStartIndex),at=D.getoffsetX(hr-1),Ct=Fs(Y*Qe,J,q,K,de,ne,we.segment,qt,Ve,Ue,ft,Xt);if(!Ct)return null;var Ot=Fs(Y*at,J,q,K,de,ne,we.segment,qt,Ve,Ue,ft,Xt);return Ot?{first:Ct,last:Ot}:null}function Lo(Y,D,J,q){if(Y===i.WritingMode.horizontal){var K=Math.abs(J.y-D.y),de=Math.abs(J.x-D.x)*q;if(K>de)return{useVertical:!0}}return(Y===i.WritingMode.vertical?D.y<J.y:D.x>J.x)?{needsFlipping:!0}:null}function ko(Y,D,J,q,K,de,ne,we,Ue,ft,Xt,hr,qt,Ve){var Qe=D/24,at=Y.lineOffsetX*Qe,Ct=Y.lineOffsetY*Qe,Ot;if(Y.numGlyphs>1){var Rt=Y.glyphStartIndex+Y.numGlyphs,Bt=Y.lineStartIndex,Dt=Y.lineStartIndex+Y.lineLength,yt=po(Qe,we,at,Ct,J,Xt,hr,Y,Ue,de,qt);if(!yt)return{notEnoughRoom:!0};var Pt=Jn(yt.first.point,ne).point,ht=Jn(yt.last.point,ne).point;if(q&&!J){var ur=Lo(Y.writingMode,Pt,ht,Ve);if(ur)return ur}Ot=[yt.first];for(var br=Y.glyphStartIndex+1;br<Rt-1;br++)Ot.push(Fs(Qe*we.getoffsetX(br),at,Ct,J,Xt,hr,Y.segment,Bt,Dt,Ue,de,qt));Ot.push(yt.last)}else{if(q&&!J){var Ur=Jn(hr,K).point,Di=Y.lineStartIndex+Y.segment+1,fi=new i.Point(Ue.getx(Di),Ue.gety(Di)),Ti=Jn(fi,K),gn=Ti.signedDistanceFromCamera>0?Ti.point:Ds(hr,fi,Ur,1,K),rn=Lo(Y.writingMode,Ur,gn,Ve);if(rn)return rn}var Ci=Fs(Qe*we.getoffsetX(Y.glyphStartIndex),at,Ct,J,Xt,hr,Y.segment,Y.lineStartIndex,Y.lineStartIndex+Y.lineLength,Ue,de,qt);if(!Ci)return{notEnoughRoom:!0};Ot=[Ci]}for(var Bi=0,Gi=Ot;Bi<Gi.length;Bi+=1){var sn=Gi[Bi];i.addDynamicAttributes(ft,sn.point,sn.angle)}return{}}function Ds(Y,D,J,q,K){var de=Jn(Y.add(Y.sub(D)._unit()),K).point,ne=J.sub(de);return J.add(ne._mult(q/ne.mag()))}function Fs(Y,D,J,q,K,de,ne,we,Ue,ft,Xt,hr){var qt=q?Y-D:Y+D,Ve=qt>0?1:-1,Qe=0;q&&(Ve*=-1,Qe=Math.PI),Ve<0&&(Qe+=Math.PI);for(var at=Ve>0?we+ne:we+ne+1,Ct=K,Ot=K,Rt=0,Bt=0,Dt=Math.abs(qt),yt=[];Rt+Bt<=Dt;){if(at+=Ve,at<we||at>=Ue)return null;if(Ot=Ct,yt.push(Ct),Ct=hr[at],Ct===void 0){var Pt=new i.Point(ft.getx(at),ft.gety(at)),ht=Jn(Pt,Xt);if(ht.signedDistanceFromCamera>0)Ct=hr[at]=ht.point;else{var ur=at-Ve,br=Rt===0?de:new i.Point(ft.getx(ur),ft.gety(ur));Ct=Ds(br,Pt,Ot,Dt-Rt+1,Xt)}}Rt+=Bt,Bt=Ot.dist(Ct)}var Ur=(Dt-Rt)/Bt,Di=Ct.sub(Ot),fi=Di.mult(Ur)._add(Ot);fi._add(Di._unit()._perp()._mult(J*Ve));var Ti=Qe+Math.atan2(Ct.y-Ot.y,Ct.x-Ot.x);return yt.push(fi),{point:fi,angle:Ti,path:yt}}var ll=new Float32Array([-1/0,-1/0,0,-1/0,-1/0,0,-1/0,-1/0,0,-1/0,-1/0,0]);function ul(Y,D){for(var J=0;J<Y;J++){var q=D.length;D.resize(q+4),D.float32.set(ll,q*3)}}function zl(Y,D,J){var q=D[0],K=D[1];return Y[0]=J[0]*q+J[4]*K+J[12],Y[1]=J[1]*q+J[5]*K+J[13],Y[3]=J[3]*q+J[7]*K+J[15],Y}var us=100,il=function(D,J,q){J===void 0&&(J=new Li(D.width+2*us,D.height+2*us,25)),q===void 0&&(q=new Li(D.width+2*us,D.height+2*us,25)),this.transform=D,this.grid=J,this.ignoredGrid=q,this.pitchfactor=Math.cos(D._pitch)*D.cameraToCenterDistance,this.screenRightBoundary=D.width+us,this.screenBottomBoundary=D.height+us,this.gridRightBoundary=D.width+2*us,this.gridBottomBoundary=D.height+2*us};il.prototype.placeCollisionBox=function(D,J,q,K,de){var ne=this.projectAndGetPerspectiveRatio(K,D.anchorPointX,D.anchorPointY),we=q*ne.perspectiveRatio,Ue=D.x1*we+ne.point.x,ft=D.y1*we+ne.point.y,Xt=D.x2*we+ne.point.x,hr=D.y2*we+ne.point.y;return!this.isInsideGrid(Ue,ft,Xt,hr)||!J&&this.grid.hitTest(Ue,ft,Xt,hr,de)?{box:[],offscreen:!1}:{box:[Ue,ft,Xt,hr],offscreen:this.isOffscreen(Ue,ft,Xt,hr)}},il.prototype.placeCollisionCircles=function(D,J,q,K,de,ne,we,Ue,ft,Xt,hr,qt,Ve){var Qe=[],at=new i.Point(J.anchorX,J.anchorY),Ct=Jn(at,ne),Ot=Ma(this.transform.cameraToCenterDistance,Ct.signedDistanceFromCamera),Rt=Xt?de/Ot:de*Ot,Bt=Rt/i.ONE_EM,Dt=Jn(at,we).point,yt={},Pt=J.lineOffsetX*Bt,ht=J.lineOffsetY*Bt,ur=po(Bt,K,Pt,ht,!1,Dt,at,J,q,we,yt),br=!1,Ur=!1,Di=!0;if(ur){for(var fi=qt*.5*Ot+Ve,Ti=new i.Point(-us,-us),gn=new i.Point(this.screenRightBoundary,this.screenBottomBoundary),rn=new Mn,Ci=ur.first,Bi=ur.last,Gi=[],sn=Ci.path.length-1;sn>=1;sn--)Gi.push(Ci.path[sn]);for(var zn=1;zn<Bi.path.length;zn++)Gi.push(Bi.path[zn]);var Ja=fi*2.5;if(Ue){var co=Gi.map(function(Iv){return Jn(Iv,Ue)});co.some(function(Iv){return Iv.signedDistanceFromCamera<=0})?Gi=[]:Gi=co.map(function(Iv){return Iv.point})}var ts=[];if(Gi.length>0){for(var so=Gi[0].clone(),Zo=Gi[0].clone(),ms=1;ms<Gi.length;ms++)so.x=Math.min(so.x,Gi[ms].x),so.y=Math.min(so.y,Gi[ms].y),Zo.x=Math.max(Zo.x,Gi[ms].x),Zo.y=Math.max(Zo.y,Gi[ms].y);so.x>=Ti.x&&Zo.x<=gn.x&&so.y>=Ti.y&&Zo.y<=gn.y?ts=[Gi]:Zo.x<Ti.x||so.x>gn.x||Zo.y<Ti.y||so.y>gn.y?ts=[]:ts=i.clipLine([Gi],Ti.x,Ti.y,gn.x,gn.y)}for(var ou=0,Cv=ts;ou<Cv.length;ou+=1){var Lv=Cv[ou];rn.reset(Lv,fi*.25);var wd=0;rn.length<=.5*fi?wd=1:wd=Math.ceil(rn.paddedLength/Ja)+1;for(var Kv=0;Kv<wd;Kv++){var cg=Kv/Math.max(wd-1,1),pp=rn.lerp(cg),Td=pp.x+us,gp=pp.y+us;Qe.push(Td,gp,fi,0);var Vd=Td-fi,Ad=gp-fi,Pv=Td+fi,Jv=gp+fi;if(Di=Di&&this.isOffscreen(Vd,Ad,Pv,Jv),Ur=Ur||this.isInsideGrid(Vd,Ad,Pv,Jv),!D&&this.grid.hitTestCircle(Td,gp,fi,hr)&&(br=!0,!ft))return{circles:[],offscreen:!1,collisionDetected:br}}}}return{circles:!ft&&br||!Ur?[]:Qe,offscreen:Di,collisionDetected:br}},il.prototype.queryRenderedSymbols=function(D){if(D.length===0||this.grid.keysLength()===0&&this.ignoredGrid.keysLength()===0)return{};for(var J=[],q=1/0,K=1/0,de=-1/0,ne=-1/0,we=0,Ue=D;we<Ue.length;we+=1){var ft=Ue[we],Xt=new i.Point(ft.x+us,ft.y+us);q=Math.min(q,Xt.x),K=Math.min(K,Xt.y),de=Math.max(de,Xt.x),ne=Math.max(ne,Xt.y),J.push(Xt)}for(var hr=this.grid.query(q,K,de,ne).concat(this.ignoredGrid.query(q,K,de,ne)),qt={},Ve={},Qe=0,at=hr;Qe<at.length;Qe+=1){var Ct=at[Qe],Ot=Ct.key;if(qt[Ot.bucketInstanceId]===void 0&&(qt[Ot.bucketInstanceId]={}),!qt[Ot.bucketInstanceId][Ot.featureIndex]){var Rt=[new i.Point(Ct.x1,Ct.y1),new i.Point(Ct.x2,Ct.y1),new i.Point(Ct.x2,Ct.y2),new i.Point(Ct.x1,Ct.y2)];i.polygonIntersectsPolygon(J,Rt)&&(qt[Ot.bucketInstanceId][Ot.featureIndex]=!0,Ve[Ot.bucketInstanceId]===void 0&&(Ve[Ot.bucketInstanceId]=[]),Ve[Ot.bucketInstanceId].push(Ot.featureIndex))}}return Ve},il.prototype.insertCollisionBox=function(D,J,q,K,de){var ne=J?this.ignoredGrid:this.grid,we={bucketInstanceId:q,featureIndex:K,collisionGroupID:de};ne.insert(we,D[0],D[1],D[2],D[3])},il.prototype.insertCollisionCircles=function(D,J,q,K,de){for(var ne=J?this.ignoredGrid:this.grid,we={bucketInstanceId:q,featureIndex:K,collisionGroupID:de},Ue=0;Ue<D.length;Ue+=4)ne.insertCircle(we,D[Ue],D[Ue+1],D[Ue+2])},il.prototype.projectAndGetPerspectiveRatio=function(D,J,q){var K=[J,q,0,1];zl(K,K,D);var de=new i.Point((K[0]/K[3]+1)/2*this.transform.width+us,(-K[1]/K[3]+1)/2*this.transform.height+us);return{point:de,perspectiveRatio:.5+.5*(this.transform.cameraToCenterDistance/K[3])}},il.prototype.isOffscreen=function(D,J,q,K){return q<us||D>=this.screenRightBoundary||K<us||J>this.screenBottomBoundary},il.prototype.isInsideGrid=function(D,J,q,K){return q>=0&&D<this.gridRightBoundary&&K>=0&&J<this.gridBottomBoundary},il.prototype.getViewportMatrix=function(){var D=i.identity([]);return i.translate(D,D,[-us,-us,0]),D};function As(Y,D,J){return D*(i.EXTENT/(Y.tileSize*Math.pow(2,J-Y.tileID.overscaledZ)))}var cl=function(D,J,q,K){D?this.opacity=Math.max(0,Math.min(1,D.opacity+(D.placed?J:-J))):this.opacity=K&&q?1:0,this.placed=q};cl.prototype.isHidden=function(){return this.opacity===0&&!this.placed};var Ks=function(D,J,q,K,de){this.text=new cl(D?D.text:null,J,q,de),this.icon=new cl(D?D.icon:null,J,K,de)};Ks.prototype.isHidden=function(){return this.text.isHidden()&&this.icon.isHidden()};var zs=function(D,J,q){this.text=D,this.icon=J,this.skipFade=q},Io=function(){this.invProjMatrix=i.create(),this.viewportMatrix=i.create(),this.circles=[]},ls=function(D,J,q,K,de){this.bucketInstanceId=D,this.featureIndex=J,this.sourceLayerIndex=q,this.bucketIndex=K,this.tileID=de},Zl=function(D){this.crossSourceCollisions=D,this.maxGroupID=0,this.collisionGroups={}};Zl.prototype.get=function(D){if(this.crossSourceCollisions)return{ID:0,predicate:null};if(!this.collisionGroups[D]){var J=++this.maxGroupID;this.collisionGroups[D]={ID:J,predicate:function(q){return q.collisionGroupID===J}}}return this.collisionGroups[D]};function Su(Y,D,J,q,K){var de=i.getAnchorAlignment(Y),ne=de.horizontalAlign,we=de.verticalAlign,Ue=-(ne-.5)*D,ft=-(we-.5)*J,Xt=i.evaluateVariableOffset(Y,q);return new i.Point(Ue+Xt[0]*K,ft+Xt[1]*K)}function nc(Y,D,J,q,K,de){var ne=Y.x1,we=Y.x2,Ue=Y.y1,ft=Y.y2,Xt=Y.anchorPointX,hr=Y.anchorPointY,qt=new i.Point(D,J);return q&&qt._rotate(K?de:-de),{x1:ne+qt.x,y1:Ue+qt.y,x2:we+qt.x,y2:ft+qt.y,anchorPointX:Xt,anchorPointY:hr}}var bs=function(D,J,q,K){this.transform=D.clone(),this.collisionIndex=new il(this.transform),this.placements={},this.opacities={},this.variableOffsets={},this.stale=!1,this.commitTime=0,this.fadeDuration=J,this.retainedQueryData={},this.collisionGroups=new Zl(q),this.collisionCircleArrays={},this.prevPlacement=K,K&&(K.prevPlacement=void 0),this.placedOrientations={}};bs.prototype.getBucketParts=function(D,J,q,K){var de=q.getBucket(J),ne=q.latestFeatureIndex;if(!(!de||!ne||J.id!==de.layerIds[0])){var we=q.collisionBoxArray,Ue=de.layers[0].layout,ft=Math.pow(2,this.transform.zoom-q.tileID.overscaledZ),Xt=q.tileSize/i.EXTENT,hr=this.transform.calculatePosMatrix(q.tileID.toUnwrapped()),qt=Ue.get("text-pitch-alignment")==="map",Ve=Ue.get("text-rotation-alignment")==="map",Qe=As(q,1,this.transform.zoom),at=_n(hr,qt,Ve,this.transform,Qe),Ct=null;if(qt){var Ot=ya(hr,qt,Ve,this.transform,Qe);Ct=i.multiply([],this.transform.labelPlaneMatrix,Ot)}this.retainedQueryData[de.bucketInstanceId]=new ls(de.bucketInstanceId,ne,de.sourceLayerIndex,de.index,q.tileID);var Rt={bucket:de,layout:Ue,posMatrix:hr,textLabelPlaneMatrix:at,labelToScreenMatrix:Ct,scale:ft,textPixelRatio:Xt,holdingForFade:q.holdingForFade(),collisionBoxArray:we,partiallyEvaluatedTextSize:i.evaluateSizeForZoom(de.textSizeData,this.transform.zoom),collisionGroup:this.collisionGroups.get(de.sourceID)};if(K)for(var Bt=0,Dt=de.sortKeyRanges;Bt<Dt.length;Bt+=1){var yt=Dt[Bt],Pt=yt.sortKey,ht=yt.symbolInstanceStart,ur=yt.symbolInstanceEnd;D.push({sortKey:Pt,symbolInstanceStart:ht,symbolInstanceEnd:ur,parameters:Rt})}else D.push({symbolInstanceStart:0,symbolInstanceEnd:de.symbolInstances.length,parameters:Rt})}},bs.prototype.attemptAnchorPlacement=function(D,J,q,K,de,ne,we,Ue,ft,Xt,hr,qt,Ve,Qe,at){var Ct=[qt.textOffset0,qt.textOffset1],Ot=Su(D,q,K,Ct,de),Rt=this.collisionIndex.placeCollisionBox(nc(J,Ot.x,Ot.y,ne,we,this.transform.angle),hr,Ue,ft,Xt.predicate);if(at){var Bt=this.collisionIndex.placeCollisionBox(nc(at,Ot.x,Ot.y,ne,we,this.transform.angle),hr,Ue,ft,Xt.predicate);if(Bt.box.length===0)return}if(Rt.box.length>0){var Dt;return this.prevPlacement&&this.prevPlacement.variableOffsets[qt.crossTileID]&&this.prevPlacement.placements[qt.crossTileID]&&this.prevPlacement.placements[qt.crossTileID].text&&(Dt=this.prevPlacement.variableOffsets[qt.crossTileID].anchor),this.variableOffsets[qt.crossTileID]={textOffset:Ct,width:q,height:K,anchor:D,textBoxScale:de,prevAnchor:Dt},this.markUsedJustification(Ve,D,qt,Qe),Ve.allowVerticalPlacement&&(this.markUsedOrientation(Ve,Qe,qt),this.placedOrientations[qt.crossTileID]=Qe),{shift:Ot,placedGlyphBoxes:Rt}}},bs.prototype.placeLayerBucketPart=function(D,J,q){var K=this,de=D.parameters,ne=de.bucket,we=de.layout,Ue=de.posMatrix,ft=de.textLabelPlaneMatrix,Xt=de.labelToScreenMatrix,hr=de.textPixelRatio,qt=de.holdingForFade,Ve=de.collisionBoxArray,Qe=de.partiallyEvaluatedTextSize,at=de.collisionGroup,Ct=we.get("text-optional"),Ot=we.get("icon-optional"),Rt=we.get("text-allow-overlap"),Bt=we.get("icon-allow-overlap"),Dt=we.get("text-rotation-alignment")==="map",yt=we.get("text-pitch-alignment")==="map",Pt=we.get("icon-text-fit")!=="none",ht=we.get("symbol-z-order")==="viewport-y",ur=Rt&&(Bt||!ne.hasIconData()||Ot),br=Bt&&(Rt||!ne.hasTextData()||Ct);!ne.collisionArrays&&Ve&&ne.deserializeCollisionBoxes(Ve);var Ur=function(Ci,Bi){if(!J[Ci.crossTileID]){if(qt){K.placements[Ci.crossTileID]=new zs(!1,!1,!1);return}var Gi=!1,sn=!1,zn=!0,Ja=null,co={box:null,offscreen:null},ts={box:null,offscreen:null},so=null,Zo=null,ms=null,ou=0,Cv=0,Lv=0;Bi.textFeatureIndex?ou=Bi.textFeatureIndex:Ci.useRuntimeCollisionCircles&&(ou=Ci.featureIndex),Bi.verticalTextFeatureIndex&&(Cv=Bi.verticalTextFeatureIndex);var wd=Bi.textBox;if(wd){var Kv=function(vc){var eu=i.WritingMode.horizontal;if(ne.allowVerticalPlacement&&!vc&&K.prevPlacement){var Sd=K.prevPlacement.placedOrientations[Ci.crossTileID];Sd&&(K.placedOrientations[Ci.crossTileID]=Sd,eu=Sd,K.markUsedOrientation(ne,eu,Ci))}return eu},cg=function(vc,eu){if(ne.allowVerticalPlacement&&Ci.numVerticalGlyphVertices>0&&Bi.verticalTextBox)for(var Sd=0,ly=ne.writingModes;Sd<ly.length;Sd+=1){var T1=ly[Sd];if(T1===i.WritingMode.vertical?(co=eu(),ts=co):co=vc(),co&&co.box&&co.box.length)break}else co=vc()};if(we.get("text-variable-anchor")){var Vd=we.get("text-variable-anchor");if(K.prevPlacement&&K.prevPlacement.variableOffsets[Ci.crossTileID]){var Ad=K.prevPlacement.variableOffsets[Ci.crossTileID];Vd.indexOf(Ad.anchor)>0&&(Vd=Vd.filter(function(vc){return vc!==Ad.anchor}),Vd.unshift(Ad.anchor))}var Pv=function(vc,eu,Sd){for(var ly=vc.x2-vc.x1,T1=vc.y2-vc.y1,wu=Ci.textBoxScale,Nx=Pt&&!Bt?eu:null,om={box:[],offscreen:!1},kw=Rt?Vd.length*2:Vd.length,Rv=0;Rv<kw;++Rv){var sm=Vd[Rv%Vd.length],Cw=Rv>=Vd.length,Ux=K.attemptAnchorPlacement(sm,vc,ly,T1,wu,Dt,yt,hr,Ue,at,Cw,Ci,ne,Sd,Nx);if(Ux&&(om=Ux.placedGlyphBoxes,om&&om.box&&om.box.length)){Gi=!0,Ja=Ux.shift;break}}return om},Jv=function(){return Pv(wd,Bi.iconBox,i.WritingMode.horizontal)},Iv=function(){var vc=Bi.verticalTextBox,eu=co&&co.box&&co.box.length;return ne.allowVerticalPlacement&&!eu&&Ci.numVerticalGlyphVertices>0&&vc?Pv(vc,Bi.verticalIconBox,i.WritingMode.vertical):{box:null,offscreen:null}};cg(Jv,Iv),co&&(Gi=co.box,zn=co.offscreen);var ay=Kv(co&&co.box);if(!Gi&&K.prevPlacement){var fg=K.prevPlacement.variableOffsets[Ci.crossTileID];fg&&(K.variableOffsets[Ci.crossTileID]=fg,K.markUsedJustification(ne,fg.anchor,Ci,ay))}}else{var pp=function(vc,eu){var Sd=K.collisionIndex.placeCollisionBox(vc,Rt,hr,Ue,at.predicate);return Sd&&Sd.box&&Sd.box.length&&(K.markUsedOrientation(ne,eu,Ci),K.placedOrientations[Ci.crossTileID]=eu),Sd},Td=function(){return pp(wd,i.WritingMode.horizontal)},gp=function(){var vc=Bi.verticalTextBox;return ne.allowVerticalPlacement&&Ci.numVerticalGlyphVertices>0&&vc?pp(vc,i.WritingMode.vertical):{box:null,offscreen:null}};cg(Td,gp),Kv(co&&co.box&&co.box.length)}}if(so=co,Gi=so&&so.box&&so.box.length>0,zn=so&&so.offscreen,Ci.useRuntimeCollisionCircles){var oh=ne.text.placedSymbolArray.get(Ci.centerJustifiedTextSymbolIndex),hg=i.evaluateSizeForFeature(ne.textSizeData,Qe,oh),oy=we.get("text-padding"),jh=Ci.collisionCircleDiameter;Zo=K.collisionIndex.placeCollisionCircles(Rt,oh,ne.lineVertexArray,ne.glyphOffsetArray,hg,Ue,ft,Xt,q,yt,at.predicate,jh,oy),Gi=Rt||Zo.circles.length>0&&!Zo.collisionDetected,zn=zn&&Zo.offscreen}if(Bi.iconFeatureIndex&&(Lv=Bi.iconFeatureIndex),Bi.iconBox){var im=function(vc){var eu=Pt&&Ja?nc(vc,Ja.x,Ja.y,Dt,yt,K.transform.angle):vc;return K.collisionIndex.placeCollisionBox(eu,Bt,hr,Ue,at.predicate)};ts&&ts.box&&ts.box.length&&Bi.verticalIconBox?(ms=im(Bi.verticalIconBox),sn=ms.box.length>0):(ms=im(Bi.iconBox),sn=ms.box.length>0),zn=zn&&ms.offscreen}var b1=Ct||Ci.numHorizontalGlyphVertices===0&&Ci.numVerticalGlyphVertices===0,w1=Ot||Ci.numIconVertices===0;if(!b1&&!w1?sn=Gi=sn&&Gi:w1?b1||(sn=sn&&Gi):Gi=sn&&Gi,Gi&&so&&so.box&&(ts&&ts.box&&Cv?K.collisionIndex.insertCollisionBox(so.box,we.get("text-ignore-placement"),ne.bucketInstanceId,Cv,at.ID):K.collisionIndex.insertCollisionBox(so.box,we.get("text-ignore-placement"),ne.bucketInstanceId,ou,at.ID)),sn&&ms&&K.collisionIndex.insertCollisionBox(ms.box,we.get("icon-ignore-placement"),ne.bucketInstanceId,Lv,at.ID),Zo&&(Gi&&K.collisionIndex.insertCollisionCircles(Zo.circles,we.get("text-ignore-placement"),ne.bucketInstanceId,ou,at.ID),q)){var sy=ne.bucketInstanceId,nm=K.collisionCircleArrays[sy];nm===void 0&&(nm=K.collisionCircleArrays[sy]=new Io);for(var am=0;am<Zo.circles.length;am+=4)nm.circles.push(Zo.circles[am+0]),nm.circles.push(Zo.circles[am+1]),nm.circles.push(Zo.circles[am+2]),nm.circles.push(Zo.collisionDetected?1:0)}K.placements[Ci.crossTileID]=new zs(Gi||ur,sn||br,zn||ne.justReloaded),J[Ci.crossTileID]=!0}};if(ht)for(var Di=ne.getSortedSymbolIndexes(this.transform.angle),fi=Di.length-1;fi>=0;--fi){var Ti=Di[fi];Ur(ne.symbolInstances.get(Ti),ne.collisionArrays[Ti])}else for(var gn=D.symbolInstanceStart;gn<D.symbolInstanceEnd;gn++)Ur(ne.symbolInstances.get(gn),ne.collisionArrays[gn]);if(q&&ne.bucketInstanceId in this.collisionCircleArrays){var rn=this.collisionCircleArrays[ne.bucketInstanceId];i.invert(rn.invProjMatrix,Ue),rn.viewportMatrix=this.collisionIndex.getViewportMatrix()}ne.justReloaded=!1},bs.prototype.markUsedJustification=function(D,J,q,K){var de={left:q.leftJustifiedTextSymbolIndex,center:q.centerJustifiedTextSymbolIndex,right:q.rightJustifiedTextSymbolIndex},ne;K===i.WritingMode.vertical?ne=q.verticalPlacedTextSymbolIndex:ne=de[i.getAnchorJustification(J)];for(var we=[q.leftJustifiedTextSymbolIndex,q.centerJustifiedTextSymbolIndex,q.rightJustifiedTextSymbolIndex,q.verticalPlacedTextSymbolIndex],Ue=0,ft=we;Ue<ft.length;Ue+=1){var Xt=ft[Ue];Xt>=0&&(ne>=0&&Xt!==ne?D.text.placedSymbolArray.get(Xt).crossTileID=0:D.text.placedSymbolArray.get(Xt).crossTileID=q.crossTileID)}},bs.prototype.markUsedOrientation=function(D,J,q){for(var K=J===i.WritingMode.horizontal||J===i.WritingMode.horizontalOnly?J:0,de=J===i.WritingMode.vertical?J:0,ne=[q.leftJustifiedTextSymbolIndex,q.centerJustifiedTextSymbolIndex,q.rightJustifiedTextSymbolIndex],we=0,Ue=ne;we<Ue.length;we+=1){var ft=Ue[we];D.text.placedSymbolArray.get(ft).placedOrientation=K}q.verticalPlacedTextSymbolIndex&&(D.text.placedSymbolArray.get(q.verticalPlacedTextSymbolIndex).placedOrientation=de)},bs.prototype.commit=function(D){this.commitTime=D,this.zoomAtLastRecencyCheck=this.transform.zoom;var J=this.prevPlacement,q=!1;this.prevZoomAdjustment=J?J.zoomAdjustment(this.transform.zoom):0;var K=J?J.symbolFadeChange(D):1,de=J?J.opacities:{},ne=J?J.variableOffsets:{},we=J?J.placedOrientations:{};for(var Ue in this.placements){var ft=this.placements[Ue],Xt=de[Ue];Xt?(this.opacities[Ue]=new Ks(Xt,K,ft.text,ft.icon),q=q||ft.text!==Xt.text.placed||ft.icon!==Xt.icon.placed):(this.opacities[Ue]=new Ks(null,K,ft.text,ft.icon,ft.skipFade),q=q||ft.text||ft.icon)}for(var hr in de){var qt=de[hr];if(!this.opacities[hr]){var Ve=new Ks(qt,K,!1,!1);Ve.isHidden()||(this.opacities[hr]=Ve,q=q||qt.text.placed||qt.icon.placed)}}for(var Qe in ne)!this.variableOffsets[Qe]&&this.opacities[Qe]&&!this.opacities[Qe].isHidden()&&(this.variableOffsets[Qe]=ne[Qe]);for(var at in we)!this.placedOrientations[at]&&this.opacities[at]&&!this.opacities[at].isHidden()&&(this.placedOrientations[at]=we[at]);q?this.lastPlacementChangeTime=D:typeof this.lastPlacementChangeTime!="number"&&(this.lastPlacementChangeTime=J?J.lastPlacementChangeTime:D)},bs.prototype.updateLayerOpacities=function(D,J){for(var q={},K=0,de=J;K<de.length;K+=1){var ne=de[K],we=ne.getBucket(D);we&&ne.latestFeatureIndex&&D.id===we.layerIds[0]&&this.updateBucketOpacities(we,q,ne.collisionBoxArray)}},bs.prototype.updateBucketOpacities=function(D,J,q){var K=this;D.hasTextData()&&D.text.opacityVertexArray.clear(),D.hasIconData()&&D.icon.opacityVertexArray.clear(),D.hasIconCollisionBoxData()&&D.iconCollisionBox.collisionVertexArray.clear(),D.hasTextCollisionBoxData()&&D.textCollisionBox.collisionVertexArray.clear();var de=D.layers[0].layout,ne=new Ks(null,0,!1,!1,!0),we=de.get("text-allow-overlap"),Ue=de.get("icon-allow-overlap"),ft=de.get("text-variable-anchor"),Xt=de.get("text-rotation-alignment")==="map",hr=de.get("text-pitch-alignment")==="map",qt=de.get("icon-text-fit")!=="none",Ve=new Ks(null,0,we&&(Ue||!D.hasIconData()||de.get("icon-optional")),Ue&&(we||!D.hasTextData()||de.get("text-optional")),!0);!D.collisionArrays&&q&&(D.hasIconCollisionBoxData()||D.hasTextCollisionBoxData())&&D.deserializeCollisionBoxes(q);for(var Qe=function(Rt,Bt,Dt){for(var yt=0;yt<Bt/4;yt++)Rt.opacityVertexArray.emplaceBack(Dt)},at=function(Rt){var Bt=D.symbolInstances.get(Rt),Dt=Bt.numHorizontalGlyphVertices,yt=Bt.numVerticalGlyphVertices,Pt=Bt.crossTileID,ht=J[Pt],ur=K.opacities[Pt];ht?ur=ne:ur||(ur=Ve,K.opacities[Pt]=ur),J[Pt]=!0;var br=Dt>0||yt>0,Ur=Bt.numIconVertices>0,Di=K.placedOrientations[Bt.crossTileID],fi=Di===i.WritingMode.vertical,Ti=Di===i.WritingMode.horizontal||Di===i.WritingMode.horizontalOnly;if(br){var gn=ac(ur.text),rn=fi?aa:gn;Qe(D.text,Dt,rn);var Ci=Ti?aa:gn;Qe(D.text,yt,Ci);var Bi=ur.text.isHidden();[Bt.rightJustifiedTextSymbolIndex,Bt.centerJustifiedTextSymbolIndex,Bt.leftJustifiedTextSymbolIndex].forEach(function(Lv){Lv>=0&&(D.text.placedSymbolArray.get(Lv).hidden=Bi||fi?1:0)}),Bt.verticalPlacedTextSymbolIndex>=0&&(D.text.placedSymbolArray.get(Bt.verticalPlacedTextSymbolIndex).hidden=Bi||Ti?1:0);var Gi=K.variableOffsets[Bt.crossTileID];Gi&&K.markUsedJustification(D,Gi.anchor,Bt,Di);var sn=K.placedOrientations[Bt.crossTileID];sn&&(K.markUsedJustification(D,"left",Bt,sn),K.markUsedOrientation(D,sn,Bt))}if(Ur){var zn=ac(ur.icon),Ja=!(qt&&Bt.verticalPlacedIconSymbolIndex&&fi);if(Bt.placedIconSymbolIndex>=0){var co=Ja?zn:aa;Qe(D.icon,Bt.numIconVertices,co),D.icon.placedSymbolArray.get(Bt.placedIconSymbolIndex).hidden=ur.icon.isHidden()}if(Bt.verticalPlacedIconSymbolIndex>=0){var ts=Ja?aa:zn;Qe(D.icon,Bt.numVerticalIconVertices,ts),D.icon.placedSymbolArray.get(Bt.verticalPlacedIconSymbolIndex).hidden=ur.icon.isHidden()}}if(D.hasIconCollisionBoxData()||D.hasTextCollisionBoxData()){var so=D.collisionArrays[Rt];if(so){var Zo=new i.Point(0,0);if(so.textBox||so.verticalTextBox){var ms=!0;if(ft){var ou=K.variableOffsets[Pt];ou?(Zo=Su(ou.anchor,ou.width,ou.height,ou.textOffset,ou.textBoxScale),Xt&&Zo._rotate(hr?K.transform.angle:-K.transform.angle)):ms=!1}so.textBox&&Rn(D.textCollisionBox.collisionVertexArray,ur.text.placed,!ms||fi,Zo.x,Zo.y),so.verticalTextBox&&Rn(D.textCollisionBox.collisionVertexArray,ur.text.placed,!ms||Ti,Zo.x,Zo.y)}var Cv=!!(!Ti&&so.verticalIconBox);so.iconBox&&Rn(D.iconCollisionBox.collisionVertexArray,ur.icon.placed,Cv,qt?Zo.x:0,qt?Zo.y:0),so.verticalIconBox&&Rn(D.iconCollisionBox.collisionVertexArray,ur.icon.placed,!Cv,qt?Zo.x:0,qt?Zo.y:0)}}},Ct=0;Ct<D.symbolInstances.length;Ct++)at(Ct);if(D.sortFeatures(this.transform.angle),this.retainedQueryData[D.bucketInstanceId]&&(this.retainedQueryData[D.bucketInstanceId].featureSortOrder=D.featureSortOrder),D.hasTextData()&&D.text.opacityVertexBuffer&&D.text.opacityVertexBuffer.updateData(D.text.opacityVertexArray),D.hasIconData()&&D.icon.opacityVertexBuffer&&D.icon.opacityVertexBuffer.updateData(D.icon.opacityVertexArray),D.hasIconCollisionBoxData()&&D.iconCollisionBox.collisionVertexBuffer&&D.iconCollisionBox.collisionVertexBuffer.updateData(D.iconCollisionBox.collisionVertexArray),D.hasTextCollisionBoxData()&&D.textCollisionBox.collisionVertexBuffer&&D.textCollisionBox.collisionVertexBuffer.updateData(D.textCollisionBox.collisionVertexArray),D.bucketInstanceId in this.collisionCircleArrays){var Ot=this.collisionCircleArrays[D.bucketInstanceId];D.placementInvProjMatrix=Ot.invProjMatrix,D.placementViewportMatrix=Ot.viewportMatrix,D.collisionCircleArray=Ot.circles,delete this.collisionCircleArrays[D.bucketInstanceId]}},bs.prototype.symbolFadeChange=function(D){return this.fadeDuration===0?1:(D-this.commitTime)/this.fadeDuration+this.prevZoomAdjustment},bs.prototype.zoomAdjustment=function(D){return Math.max(0,(this.transform.zoom-D)/1.5)},bs.prototype.hasTransitions=function(D){return this.stale||D-this.lastPlacementChangeTime<this.fadeDuration},bs.prototype.stillRecent=function(D,J){var q=this.zoomAtLastRecencyCheck===J?1-this.zoomAdjustment(J):1;return this.zoomAtLastRecencyCheck=J,this.commitTime+this.fadeDuration*q>D},bs.prototype.setStale=function(){this.stale=!0};function Rn(Y,D,J,q,K){Y.emplaceBack(D?1:0,J?1:0,q||0,K||0),Y.emplaceBack(D?1:0,J?1:0,q||0,K||0),Y.emplaceBack(D?1:0,J?1:0,q||0,K||0),Y.emplaceBack(D?1:0,J?1:0,q||0,K||0)}var _a=Math.pow(2,25),Vu=Math.pow(2,24),Ol=Math.pow(2,17),xo=Math.pow(2,16),Yl=Math.pow(2,9),Ns=Math.pow(2,8),Hl=Math.pow(2,1);function ac(Y){if(Y.opacity===0&&!Y.placed)return 0;if(Y.opacity===1&&Y.placed)return 4294967295;var D=Y.placed?1:0,J=Math.floor(Y.opacity*127);return J*_a+D*Vu+J*Ol+D*xo+J*Yl+D*Ns+J*Hl+D}var aa=0,Oo=function(D){this._sortAcrossTiles=D.layout.get("symbol-z-order")!=="viewport-y"&&D.layout.get("symbol-sort-key").constantOr(1)!==void 0,this._currentTileIndex=0,this._currentPartIndex=0,this._seenCrossTileIDs={},this._bucketParts=[]};Oo.prototype.continuePlacement=function(D,J,q,K,de){for(var ne=this._bucketParts;this._currentTileIndex<D.length;){var we=D[this._currentTileIndex];if(J.getBucketParts(ne,K,we,this._sortAcrossTiles),this._currentTileIndex++,de())return!0}for(this._sortAcrossTiles&&(this._sortAcrossTiles=!1,ne.sort(function(ft,Xt){return ft.sortKey-Xt.sortKey}));this._currentPartIndex<ne.length;){var Ue=ne[this._currentPartIndex];if(J.placeLayerBucketPart(Ue,this._seenCrossTileIDs,q),this._currentPartIndex++,de())return!0}return!1};var qo=function(D,J,q,K,de,ne,we){this.placement=new bs(D,de,ne,we),this._currentPlacementIndex=J.length-1,this._forceFullPlacement=q,this._showCollisionBoxes=K,this._done=!1};qo.prototype.isDone=function(){return this._done},qo.prototype.continuePlacement=function(D,J,q){for(var K=this,de=i.browser.now(),ne=function(){var hr=i.browser.now()-de;return K._forceFullPlacement?!1:hr>2};this._currentPlacementIndex>=0;){var we=D[this._currentPlacementIndex],Ue=J[we],ft=this.placement.collisionIndex.transform.zoom;if(Ue.type==="symbol"&&(!Ue.minzoom||Ue.minzoom<=ft)&&(!Ue.maxzoom||Ue.maxzoom>ft)){this._inProgressLayer||(this._inProgressLayer=new Oo(Ue));var Xt=this._inProgressLayer.continuePlacement(q[Ue.source],this.placement,this._showCollisionBoxes,Ue,ne);if(Xt)return;delete this._inProgressLayer}this._currentPlacementIndex--}this._done=!0},qo.prototype.commit=function(D){return this.placement.commit(D),this.placement};var ql=512/i.EXTENT/2,Pc=function(D,J,q){this.tileID=D,this.indexedSymbolInstances={},this.bucketInstanceId=q;for(var K=0;K<J.length;K++){var de=J.get(K),ne=de.key;this.indexedSymbolInstances[ne]||(this.indexedSymbolInstances[ne]=[]),this.indexedSymbolInstances[ne].push({crossTileID:de.crossTileID,coord:this.getScaledCoordinates(de,D)})}};Pc.prototype.getScaledCoordinates=function(D,J){var q=J.canonical.z-this.tileID.canonical.z,K=ql/Math.pow(2,q);return{x:Math.floor((J.canonical.x*i.EXTENT+D.anchorX)*K),y:Math.floor((J.canonical.y*i.EXTENT+D.anchorY)*K)}},Pc.prototype.findMatches=function(D,J,q){for(var K=this.tileID.canonical.z<J.canonical.z?1:Math.pow(2,this.tileID.canonical.z-J.canonical.z),de=0;de<D.length;de++){var ne=D.get(de);if(!ne.crossTileID){var we=this.indexedSymbolInstances[ne.key];if(we)for(var Ue=this.getScaledCoordinates(ne,J),ft=0,Xt=we;ft<Xt.length;ft+=1){var hr=Xt[ft];if(Math.abs(hr.coord.x-Ue.x)<=K&&Math.abs(hr.coord.y-Ue.y)<=K&&!q[hr.crossTileID]){q[hr.crossTileID]=!0,ne.crossTileID=hr.crossTileID;break}}}}};var Do=function(){this.maxCrossTileID=0};Do.prototype.generate=function(){return++this.maxCrossTileID};var rf=function(){this.indexes={},this.usedCrossTileIDs={},this.lng=0};rf.prototype.handleWrapJump=function(D){var J=Math.round((D-this.lng)/360);if(J!==0)for(var q in this.indexes){var K=this.indexes[q],de={};for(var ne in K){var we=K[ne];we.tileID=we.tileID.unwrapTo(we.tileID.wrap+J),de[we.tileID.key]=we}this.indexes[q]=de}this.lng=D},rf.prototype.addBucket=function(D,J,q){if(this.indexes[D.overscaledZ]&&this.indexes[D.overscaledZ][D.key]){if(this.indexes[D.overscaledZ][D.key].bucketInstanceId===J.bucketInstanceId)return!1;this.removeBucketCrossTileIDs(D.overscaledZ,this.indexes[D.overscaledZ][D.key])}for(var K=0;K<J.symbolInstances.length;K++){var de=J.symbolInstances.get(K);de.crossTileID=0}this.usedCrossTileIDs[D.overscaledZ]||(this.usedCrossTileIDs[D.overscaledZ]={});var ne=this.usedCrossTileIDs[D.overscaledZ];for(var we in this.indexes){var Ue=this.indexes[we];if(Number(we)>D.overscaledZ)for(var ft in Ue){var Xt=Ue[ft];Xt.tileID.isChildOf(D)&&Xt.findMatches(J.symbolInstances,D,ne)}else{var hr=D.scaledTo(Number(we)),qt=Ue[hr.key];qt&&qt.findMatches(J.symbolInstances,D,ne)}}for(var Ve=0;Ve<J.symbolInstances.length;Ve++){var Qe=J.symbolInstances.get(Ve);Qe.crossTileID||(Qe.crossTileID=q.generate(),ne[Qe.crossTileID]=!0)}return this.indexes[D.overscaledZ]===void 0&&(this.indexes[D.overscaledZ]={}),this.indexes[D.overscaledZ][D.key]=new Pc(D,J.symbolInstances,J.bucketInstanceId),!0},rf.prototype.removeBucketCrossTileIDs=function(D,J){for(var q in J.indexedSymbolInstances)for(var K=0,de=J.indexedSymbolInstances[q];K<de.length;K+=1){var ne=de[K];delete this.usedCrossTileIDs[D][ne.crossTileID]}},rf.prototype.removeStaleBuckets=function(D){var J=!1;for(var q in this.indexes){var K=this.indexes[q];for(var de in K)D[K[de].bucketInstanceId]||(this.removeBucketCrossTileIDs(q,K[de]),delete K[de],J=!0)}return J};var Uf=function(){this.layerIndexes={},this.crossTileIDs=new Do,this.maxBucketInstanceId=0,this.bucketsInCurrentPlacement={}};Uf.prototype.addLayer=function(D,J,q){var K=this.layerIndexes[D.id];K===void 0&&(K=this.layerIndexes[D.id]=new rf);var de=!1,ne={};K.handleWrapJump(q);for(var we=0,Ue=J;we<Ue.length;we+=1){var ft=Ue[we],Xt=ft.getBucket(D);!Xt||D.id!==Xt.layerIds[0]||(Xt.bucketInstanceId||(Xt.bucketInstanceId=++this.maxBucketInstanceId),K.addBucket(ft.tileID,Xt,this.crossTileIDs)&&(de=!0),ne[Xt.bucketInstanceId]=!0)}return K.removeStaleBuckets(ne)&&(de=!0),de},Uf.prototype.pruneUnusedLayers=function(D){var J={};D.forEach(function(K){J[K]=!0});for(var q in this.layerIndexes)J[q]||delete this.layerIndexes[q]};var pl=function(Y,D){return i.emitValidationErrors(Y,D&&D.filter(function(J){return J.identifier!=="source.canvas"}))},Zc=i.pick(da,["addLayer","removeLayer","setPaintProperty","setLayoutProperty","setFilter","addSource","removeSource","setLayerZoomRange","setLight","setTransition","setGeoJSONSourceData"]),Kl=i.pick(da,["setCenter","setZoom","setBearing","setPitch"]),Os=Wo(),yu=function(Y){function D(J,q){var K=this;q===void 0&&(q={}),Y.call(this),this.map=J,this.dispatcher=new Z(Ba(),this),this.imageManager=new k,this.imageManager.setEventedParent(this),this.glyphManager=new P(J._requestManager,q.localIdeographFontFamily),this.lineAtlas=new G(256,512),this.crossTileSymbolIndex=new Uf,this._layers={},this._serializedLayers={},this._order=[],this.sourceCaches={},this.zoomHistory=new i.ZoomHistory,this._loaded=!1,this._availableImages=[],this._resetUpdates(),this.dispatcher.broadcast("setReferrer",i.getReferrer());var de=this;this._rtlTextPluginCallback=D.registerForPluginStateChange(function(ne){var we={pluginStatus:ne.pluginStatus,pluginURL:ne.pluginURL};de.dispatcher.broadcast("syncRTLPluginState",we,function(Ue,ft){if(i.triggerPluginCompletionEvent(Ue),ft){var Xt=ft.every(function(qt){return qt});if(Xt)for(var hr in de.sourceCaches)de.sourceCaches[hr].reload()}})}),this.on("data",function(ne){if(!(ne.dataType!=="source"||ne.sourceDataType!=="metadata")){var we=K.sourceCaches[ne.sourceId];if(we){var Ue=we.getSource();if(!(!Ue||!Ue.vectorLayerIds))for(var ft in K._layers){var Xt=K._layers[ft];Xt.source===Ue.id&&K._validateLayer(Xt)}}}})}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.loadURL=function(q,K){var de=this;K===void 0&&(K={}),this.fire(new i.Event("dataloading",{dataType:"style"}));var ne=typeof K.validate=="boolean"?K.validate:!i.isMapboxURL(q);q=this.map._requestManager.normalizeStyleURL(q,K.accessToken);var we=this.map._requestManager.transformRequest(q,i.ResourceType.Style);this._request=i.getJSON(we,function(Ue,ft){de._request=null,Ue?de.fire(new i.ErrorEvent(Ue)):ft&&de._load(ft,ne)})},D.prototype.loadJSON=function(q,K){var de=this;K===void 0&&(K={}),this.fire(new i.Event("dataloading",{dataType:"style"})),this._request=i.browser.frame(function(){de._request=null,de._load(q,K.validate!==!1)})},D.prototype.loadEmpty=function(){this.fire(new i.Event("dataloading",{dataType:"style"})),this._load(Os,!1)},D.prototype._load=function(q,K){if(!(K&&pl(this,i.validateStyle(q)))){this._loaded=!0,this.stylesheet=q;for(var de in q.sources)this.addSource(de,q.sources[de],{validate:!1});q.sprite?this._loadSprite(q.sprite):this.imageManager.setLoaded(!0),this.glyphManager.setURL(q.glyphs);var ne=Fa(this.stylesheet.layers);this._order=ne.map(function(Xt){return Xt.id}),this._layers={},this._serializedLayers={};for(var we=0,Ue=ne;we<Ue.length;we+=1){var ft=Ue[we];ft=i.createStyleLayer(ft),ft.setEventedParent(this,{layer:{id:ft.id}}),this._layers[ft.id]=ft,this._serializedLayers[ft.id]=ft.serialize()}this.dispatcher.broadcast("setLayers",this._serializeLayers(this._order)),this.light=new V(this.stylesheet.light),this.fire(new i.Event("data",{dataType:"style"})),this.fire(new i.Event("style.load"))}},D.prototype._loadSprite=function(q){var K=this;this._spriteRequest=_(q,this.map._requestManager,function(de,ne){if(K._spriteRequest=null,de)K.fire(new i.ErrorEvent(de));else if(ne)for(var we in ne)K.imageManager.addImage(we,ne[we]);K.imageManager.setLoaded(!0),K._availableImages=K.imageManager.listImages(),K.dispatcher.broadcast("setImages",K._availableImages),K.fire(new i.Event("data",{dataType:"style"}))})},D.prototype._validateLayer=function(q){var K=this.sourceCaches[q.source];if(K){var de=q.sourceLayer;if(de){var ne=K.getSource();(ne.type==="geojson"||ne.vectorLayerIds&&ne.vectorLayerIds.indexOf(de)===-1)&&this.fire(new i.ErrorEvent(new Error('Source layer "'+de+'" does not exist on source "'+ne.id+'" as specified by style layer "'+q.id+'"')))}}},D.prototype.loaded=function(){if(!this._loaded||Object.keys(this._updatedSources).length)return!1;for(var q in this.sourceCaches)if(!this.sourceCaches[q].loaded())return!1;return!!this.imageManager.isLoaded()},D.prototype._serializeLayers=function(q){for(var K=[],de=0,ne=q;de<ne.length;de+=1){var we=ne[de],Ue=this._layers[we];Ue.type!=="custom"&&K.push(Ue.serialize())}return K},D.prototype.hasTransitions=function(){if(this.light&&this.light.hasTransition())return!0;for(var q in this.sourceCaches)if(this.sourceCaches[q].hasTransition())return!0;for(var K in this._layers)if(this._layers[K].hasTransition())return!0;return!1},D.prototype._checkLoaded=function(){if(!this._loaded)throw new Error("Style is not done loading")},D.prototype.update=function(q){if(this._loaded){var K=this._changed;if(this._changed){var de=Object.keys(this._updatedLayers),ne=Object.keys(this._removedLayers);(de.length||ne.length)&&this._updateWorkerLayers(de,ne);for(var we in this._updatedSources){var Ue=this._updatedSources[we];Ue==="reload"?this._reloadSource(we):Ue==="clear"&&this._clearSource(we)}this._updateTilesForChangedImages();for(var ft in this._updatedPaintProps)this._layers[ft].updateTransitions(q);this.light.updateTransitions(q),this._resetUpdates()}var Xt={};for(var hr in this.sourceCaches){var qt=this.sourceCaches[hr];Xt[hr]=qt.used,qt.used=!1}for(var Ve=0,Qe=this._order;Ve<Qe.length;Ve+=1){var at=Qe[Ve],Ct=this._layers[at];Ct.recalculate(q,this._availableImages),!Ct.isHidden(q.zoom)&&Ct.source&&(this.sourceCaches[Ct.source].used=!0)}for(var Ot in Xt){var Rt=this.sourceCaches[Ot];Xt[Ot]!==Rt.used&&Rt.fire(new i.Event("data",{sourceDataType:"visibility",dataType:"source",sourceId:Ot}))}this.light.recalculate(q),this.z=q.zoom,K&&this.fire(new i.Event("data",{dataType:"style"}))}},D.prototype._updateTilesForChangedImages=function(){var q=Object.keys(this._changedImages);if(q.length){for(var K in this.sourceCaches)this.sourceCaches[K].reloadTilesForDependencies(["icons","patterns"],q);this._changedImages={}}},D.prototype._updateWorkerLayers=function(q,K){this.dispatcher.broadcast("updateLayers",{layers:this._serializeLayers(q),removedIds:K})},D.prototype._resetUpdates=function(){this._changed=!1,this._updatedLayers={},this._removedLayers={},this._updatedSources={},this._updatedPaintProps={},this._changedImages={}},D.prototype.setState=function(q){var K=this;if(this._checkLoaded(),pl(this,i.validateStyle(q)))return!1;q=i.clone$1(q),q.layers=Fa(q.layers);var de=An(this.serialize(),q).filter(function(we){return!(we.command in Kl)});if(de.length===0)return!1;var ne=de.filter(function(we){return!(we.command in Zc)});if(ne.length>0)throw new Error("Unimplemented: "+ne.map(function(we){return we.command}).join(", ")+".");return de.forEach(function(we){we.command!=="setTransition"&&K[we.command].apply(K,we.args)}),this.stylesheet=q,!0},D.prototype.addImage=function(q,K){if(this.getImage(q))return this.fire(new i.ErrorEvent(new Error("An image with this name already exists.")));this.imageManager.addImage(q,K),this._afterImageUpdated(q)},D.prototype.updateImage=function(q,K){this.imageManager.updateImage(q,K)},D.prototype.getImage=function(q){return this.imageManager.getImage(q)},D.prototype.removeImage=function(q){if(!this.getImage(q))return this.fire(new i.ErrorEvent(new Error("No image with this name exists.")));this.imageManager.removeImage(q),this._afterImageUpdated(q)},D.prototype._afterImageUpdated=function(q){this._availableImages=this.imageManager.listImages(),this._changedImages[q]=!0,this._changed=!0,this.dispatcher.broadcast("setImages",this._availableImages),this.fire(new i.Event("data",{dataType:"style"}))},D.prototype.listImages=function(){return this._checkLoaded(),this.imageManager.listImages()},D.prototype.addSource=function(q,K,de){var ne=this;if(de===void 0&&(de={}),this._checkLoaded(),this.sourceCaches[q]!==void 0)throw new Error("There is already a source with this ID");if(!K.type)throw new Error("The type property must be defined, but only the following properties were given: "+Object.keys(K).join(", ")+".");var we=["vector","raster","geojson","video","image"],Ue=we.indexOf(K.type)>=0;if(!(Ue&&this._validate(i.validateStyle.source,"sources."+q,K,null,de))){this.map&&this.map._collectResourceTiming&&(K.collectResourceTiming=!0);var ft=this.sourceCaches[q]=new ri(q,K,this.dispatcher);ft.style=this,ft.setEventedParent(this,function(){return{isSourceLoaded:ne.loaded(),source:ft.serialize(),sourceId:q}}),ft.onAdd(this.map),this._changed=!0}},D.prototype.removeSource=function(q){if(this._checkLoaded(),this.sourceCaches[q]===void 0)throw new Error("There is no source with this ID");for(var K in this._layers)if(this._layers[K].source===q)return this.fire(new i.ErrorEvent(new Error('Source "'+q+'" cannot be removed while layer "'+K+'" is using it.')));var de=this.sourceCaches[q];delete this.sourceCaches[q],delete this._updatedSources[q],de.fire(new i.Event("data",{sourceDataType:"metadata",dataType:"source",sourceId:q})),de.setEventedParent(null),de.clearTiles(),de.onRemove&&de.onRemove(this.map),this._changed=!0},D.prototype.setGeoJSONSourceData=function(q,K){this._checkLoaded();var de=this.sourceCaches[q].getSource();de.setData(K),this._changed=!0},D.prototype.getSource=function(q){return this.sourceCaches[q]&&this.sourceCaches[q].getSource()},D.prototype.addLayer=function(q,K,de){de===void 0&&(de={}),this._checkLoaded();var ne=q.id;if(this.getLayer(ne)){this.fire(new i.ErrorEvent(new Error('Layer with id "'+ne+'" already exists on this map')));return}var we;if(q.type==="custom"){if(pl(this,i.validateCustomStyleLayer(q)))return;we=i.createStyleLayer(q)}else{if(typeof q.source=="object"&&(this.addSource(ne,q.source),q=i.clone$1(q),q=i.extend(q,{source:ne})),this._validate(i.validateStyle.layer,"layers."+ne,q,{arrayIndex:-1},de))return;we=i.createStyleLayer(q),this._validateLayer(we),we.setEventedParent(this,{layer:{id:ne}}),this._serializedLayers[we.id]=we.serialize()}var Ue=K?this._order.indexOf(K):this._order.length;if(K&&Ue===-1){this.fire(new i.ErrorEvent(new Error('Layer with id "'+K+'" does not exist on this map.')));return}if(this._order.splice(Ue,0,ne),this._layerOrderChanged=!0,this._layers[ne]=we,this._removedLayers[ne]&&we.source&&we.type!=="custom"){var ft=this._removedLayers[ne];delete this._removedLayers[ne],ft.type!==we.type?this._updatedSources[we.source]="clear":(this._updatedSources[we.source]="reload",this.sourceCaches[we.source].pause())}this._updateLayer(we),we.onAdd&&we.onAdd(this.map)},D.prototype.moveLayer=function(q,K){this._checkLoaded(),this._changed=!0;var de=this._layers[q];if(!de){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot be moved.")));return}if(q!==K){var ne=this._order.indexOf(q);this._order.splice(ne,1);var we=K?this._order.indexOf(K):this._order.length;if(K&&we===-1){this.fire(new i.ErrorEvent(new Error('Layer with id "'+K+'" does not exist on this map.')));return}this._order.splice(we,0,q),this._layerOrderChanged=!0}},D.prototype.removeLayer=function(q){this._checkLoaded();var K=this._layers[q];if(!K){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot be removed.")));return}K.setEventedParent(null);var de=this._order.indexOf(q);this._order.splice(de,1),this._layerOrderChanged=!0,this._changed=!0,this._removedLayers[q]=K,delete this._layers[q],delete this._serializedLayers[q],delete this._updatedLayers[q],delete this._updatedPaintProps[q],K.onRemove&&K.onRemove(this.map)},D.prototype.getLayer=function(q){return this._layers[q]},D.prototype.hasLayer=function(q){return q in this._layers},D.prototype.setLayerZoomRange=function(q,K,de){this._checkLoaded();var ne=this.getLayer(q);if(!ne){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot have zoom extent.")));return}ne.minzoom===K&&ne.maxzoom===de||(K!=null&&(ne.minzoom=K),de!=null&&(ne.maxzoom=de),this._updateLayer(ne))},D.prototype.setFilter=function(q,K,de){de===void 0&&(de={}),this._checkLoaded();var ne=this.getLayer(q);if(!ne){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot be filtered.")));return}if(!i.deepEqual(ne.filter,K)){if(K==null){ne.filter=void 0,this._updateLayer(ne);return}this._validate(i.validateStyle.filter,"layers."+ne.id+".filter",K,null,de)||(ne.filter=i.clone$1(K),this._updateLayer(ne))}},D.prototype.getFilter=function(q){return i.clone$1(this.getLayer(q).filter)},D.prototype.setLayoutProperty=function(q,K,de,ne){ne===void 0&&(ne={}),this._checkLoaded();var we=this.getLayer(q);if(!we){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot be styled.")));return}i.deepEqual(we.getLayoutProperty(K),de)||(we.setLayoutProperty(K,de,ne),this._updateLayer(we))},D.prototype.getLayoutProperty=function(q,K){var de=this.getLayer(q);if(!de){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style.")));return}return de.getLayoutProperty(K)},D.prototype.setPaintProperty=function(q,K,de,ne){ne===void 0&&(ne={}),this._checkLoaded();var we=this.getLayer(q);if(!we){this.fire(new i.ErrorEvent(new Error("The layer '"+q+"' does not exist in the map's style and cannot be styled.")));return}if(!i.deepEqual(we.getPaintProperty(K),de)){var Ue=we.setPaintProperty(K,de,ne);Ue&&this._updateLayer(we),this._changed=!0,this._updatedPaintProps[q]=!0}},D.prototype.getPaintProperty=function(q,K){return this.getLayer(q).getPaintProperty(K)},D.prototype.setFeatureState=function(q,K){this._checkLoaded();var de=q.source,ne=q.sourceLayer,we=this.sourceCaches[de];if(we===void 0){this.fire(new i.ErrorEvent(new Error("The source '"+de+"' does not exist in the map's style.")));return}var Ue=we.getSource().type;if(Ue==="geojson"&&ne){this.fire(new i.ErrorEvent(new Error("GeoJSON sources cannot have a sourceLayer parameter.")));return}if(Ue==="vector"&&!ne){this.fire(new i.ErrorEvent(new Error("The sourceLayer parameter must be provided for vector source types.")));return}q.id===void 0&&this.fire(new i.ErrorEvent(new Error("The feature id parameter must be provided."))),we.setFeatureState(ne,q.id,K)},D.prototype.removeFeatureState=function(q,K){this._checkLoaded();var de=q.source,ne=this.sourceCaches[de];if(ne===void 0){this.fire(new i.ErrorEvent(new Error("The source '"+de+"' does not exist in the map's style.")));return}var we=ne.getSource().type,Ue=we==="vector"?q.sourceLayer:void 0;if(we==="vector"&&!Ue){this.fire(new i.ErrorEvent(new Error("The sourceLayer parameter must be provided for vector source types.")));return}if(K&&typeof q.id!="string"&&typeof q.id!="number"){this.fire(new i.ErrorEvent(new Error("A feature id is required to remove its specific state property.")));return}ne.removeFeatureState(Ue,q.id,K)},D.prototype.getFeatureState=function(q){this._checkLoaded();var K=q.source,de=q.sourceLayer,ne=this.sourceCaches[K];if(ne===void 0){this.fire(new i.ErrorEvent(new Error("The source '"+K+"' does not exist in the map's style.")));return}var we=ne.getSource().type;if(we==="vector"&&!de){this.fire(new i.ErrorEvent(new Error("The sourceLayer parameter must be provided for vector source types.")));return}return q.id===void 0&&this.fire(new i.ErrorEvent(new Error("The feature id parameter must be provided."))),ne.getFeatureState(de,q.id)},D.prototype.getTransition=function(){return i.extend({duration:300,delay:0},this.stylesheet&&this.stylesheet.transition)},D.prototype.serialize=function(){return i.filterObject({version:this.stylesheet.version,name:this.stylesheet.name,metadata:this.stylesheet.metadata,light:this.stylesheet.light,center:this.stylesheet.center,zoom:this.stylesheet.zoom,bearing:this.stylesheet.bearing,pitch:this.stylesheet.pitch,sprite:this.stylesheet.sprite,glyphs:this.stylesheet.glyphs,transition:this.stylesheet.transition,sources:i.mapObject(this.sourceCaches,function(q){return q.serialize()}),layers:this._serializeLayers(this._order)},function(q){return q!==void 0})},D.prototype._updateLayer=function(q){this._updatedLayers[q.id]=!0,q.source&&!this._updatedSources[q.source]&&this.sourceCaches[q.source].getSource().type!=="raster"&&(this._updatedSources[q.source]="reload",this.sourceCaches[q.source].pause()),this._changed=!0},D.prototype._flattenAndSortRenderedFeatures=function(q){for(var K=this,de=function(Ti){return K._layers[Ti].type==="fill-extrusion"},ne={},we=[],Ue=this._order.length-1;Ue>=0;Ue--){var ft=this._order[Ue];if(de(ft)){ne[ft]=Ue;for(var Xt=0,hr=q;Xt<hr.length;Xt+=1){var qt=hr[Xt],Ve=qt[ft];if(Ve)for(var Qe=0,at=Ve;Qe<at.length;Qe+=1){var Ct=at[Qe];we.push(Ct)}}}}we.sort(function(Ti,gn){return gn.intersectionZ-Ti.intersectionZ});for(var Ot=[],Rt=this._order.length-1;Rt>=0;Rt--){var Bt=this._order[Rt];if(de(Bt))for(var Dt=we.length-1;Dt>=0;Dt--){var yt=we[Dt].feature;if(ne[yt.layer.id]<Rt)break;Ot.push(yt),we.pop()}else for(var Pt=0,ht=q;Pt<ht.length;Pt+=1){var ur=ht[Pt],br=ur[Bt];if(br)for(var Ur=0,Di=br;Ur<Di.length;Ur+=1){var fi=Di[Ur];Ot.push(fi.feature)}}}return Ot},D.prototype.queryRenderedFeatures=function(q,K,de){K&&K.filter&&this._validate(i.validateStyle.filter,"queryRenderedFeatures.filter",K.filter,null,K);var ne={};if(K&&K.layers){if(!Array.isArray(K.layers))return this.fire(new i.ErrorEvent(new Error("parameters.layers must be an Array."))),[];for(var we=0,Ue=K.layers;we<Ue.length;we+=1){var ft=Ue[we],Xt=this._layers[ft];if(!Xt)return this.fire(new i.ErrorEvent(new Error("The layer '"+ft+"' does not exist in the map's style and cannot be queried for features."))),[];ne[Xt.source]=!0}}var hr=[];K.availableImages=this._availableImages;for(var qt in this.sourceCaches)K.layers&&!ne[qt]||hr.push(ce(this.sourceCaches[qt],this._layers,this._serializedLayers,q,K,de));return this.placement&&hr.push(Ze(this._layers,this._serializedLayers,this.sourceCaches,q,K,this.placement.collisionIndex,this.placement.retainedQueryData)),this._flattenAndSortRenderedFeatures(hr)},D.prototype.querySourceFeatures=function(q,K){K&&K.filter&&this._validate(i.validateStyle.filter,"querySourceFeatures.filter",K.filter,null,K);var de=this.sourceCaches[q];return de?ut(de,K):[]},D.prototype.addSourceType=function(q,K,de){if(D.getSourceType(q))return de(new Error('A source type called "'+q+'" already exists.'));if(D.setSourceType(q,K),!K.workerSourceURL)return de(null,null);this.dispatcher.broadcast("loadWorkerSource",{name:q,url:K.workerSourceURL},de)},D.prototype.getLight=function(){return this.light.getLight()},D.prototype.setLight=function(q,K){K===void 0&&(K={}),this._checkLoaded();var de=this.light.getLight(),ne=!1;for(var we in q)if(!i.deepEqual(q[we],de[we])){ne=!0;break}if(ne){var Ue={now:i.browser.now(),transition:i.extend({duration:300,delay:0},this.stylesheet.transition)};this.light.setLight(q,K),this.light.updateTransitions(Ue)}},D.prototype._validate=function(q,K,de,ne,we){return we===void 0&&(we={}),we&&we.validate===!1?!1:pl(this,q.call(i.validateStyle,i.extend({key:K,style:this.serialize(),value:de,styleSpec:i.styleSpec},ne)))},D.prototype._remove=function(){this._request&&(this._request.cancel(),this._request=null),this._spriteRequest&&(this._spriteRequest.cancel(),this._spriteRequest=null),i.evented.off("pluginStateChange",this._rtlTextPluginCallback);for(var q in this._layers){var K=this._layers[q];K.setEventedParent(null)}for(var de in this.sourceCaches)this.sourceCaches[de].clearTiles(),this.sourceCaches[de].setEventedParent(null);this.imageManager.setEventedParent(null),this.setEventedParent(null),this.dispatcher.remove()},D.prototype._clearSource=function(q){this.sourceCaches[q].clearTiles()},D.prototype._reloadSource=function(q){this.sourceCaches[q].resume(),this.sourceCaches[q].reload()},D.prototype._updateSources=function(q){for(var K in this.sourceCaches)this.sourceCaches[K].update(q)},D.prototype._generateCollisionBoxes=function(){for(var q in this.sourceCaches)this._reloadSource(q)},D.prototype._updatePlacement=function(q,K,de,ne,we){we===void 0&&(we=!1);for(var Ue=!1,ft=!1,Xt={},hr=0,qt=this._order;hr<qt.length;hr+=1){var Ve=qt[hr],Qe=this._layers[Ve];if(Qe.type==="symbol"){if(!Xt[Qe.source]){var at=this.sourceCaches[Qe.source];Xt[Qe.source]=at.getRenderableIds(!0).map(function(Pt){return at.getTileByID(Pt)}).sort(function(Pt,ht){return ht.tileID.overscaledZ-Pt.tileID.overscaledZ||(Pt.tileID.isLessThan(ht.tileID)?-1:1)})}var Ct=this.crossTileSymbolIndex.addLayer(Qe,Xt[Qe.source],q.center.lng);Ue=Ue||Ct}}if(this.crossTileSymbolIndex.pruneUnusedLayers(this._order),we=we||this._layerOrderChanged||de===0,(we||!this.pauseablePlacement||this.pauseablePlacement.isDone()&&!this.placement.stillRecent(i.browser.now(),q.zoom))&&(this.pauseablePlacement=new qo(q,this._order,we,K,de,ne,this.placement),this._layerOrderChanged=!1),this.pauseablePlacement.isDone()?this.placement.setStale():(this.pauseablePlacement.continuePlacement(this._order,this._layers,Xt),this.pauseablePlacement.isDone()&&(this.placement=this.pauseablePlacement.commit(i.browser.now()),ft=!0),Ue&&this.pauseablePlacement.placement.setStale()),ft||Ue)for(var Ot=0,Rt=this._order;Ot<Rt.length;Ot+=1){var Bt=Rt[Ot],Dt=this._layers[Bt];Dt.type==="symbol"&&this.placement.updateLayerOpacities(Dt,Xt[Dt.source])}var yt=!this.pauseablePlacement.isDone()||this.placement.hasTransitions(i.browser.now());return yt},D.prototype._releaseSymbolFadeTiles=function(){for(var q in this.sourceCaches)this.sourceCaches[q].releaseSymbolFadeTiles()},D.prototype.getImages=function(q,K,de){this.imageManager.getImages(K.icons,de),this._updateTilesForChangedImages();var ne=this.sourceCaches[K.source];ne&&ne.setDependencies(K.tileID.key,K.type,K.icons)},D.prototype.getGlyphs=function(q,K,de){this.glyphManager.getGlyphs(K.stacks,de)},D.prototype.getResource=function(q,K,de){return i.makeRequest(K,de)},D}(i.Evented);yu.getSourceType=Fe,yu.setSourceType=Pe,yu.registerForPluginStateChange=i.registerForPluginStateChange;var oc=i.createLayout([{name:"a_pos",type:"Int16",components:2}]),Cf=`#ifdef GL_ES
+precision mediump float;
+#else
+#if !defined(lowp)
+#define lowp
+#endif
+#if !defined(mediump)
+#define mediump
+#endif
+#if !defined(highp)
+#define highp
+#endif
+#endif`,sc=`#ifdef GL_ES
+precision highp float;
+#else
+#if !defined(lowp)
+#define lowp
+#endif
+#if !defined(mediump)
+#define mediump
+#endif
+#if !defined(highp)
+#define highp
+#endif
+#endif
+vec2 unpack_float(const float packedValue) {int packedIntValue=int(packedValue);int v0=packedIntValue/256;return vec2(v0,packedIntValue-v0*256);}vec2 unpack_opacity(const float packedOpacity) {int intOpacity=int(packedOpacity)/2;return vec2(float(intOpacity)/127.0,mod(packedOpacity,2.0));}vec4 decode_color(const vec2 encodedColor) {return vec4(unpack_float(encodedColor[0])/255.0,unpack_float(encodedColor[1])/255.0
+);}float unpack_mix_vec2(const vec2 packedValue,const float t) {return mix(packedValue[0],packedValue[1],t);}vec4 unpack_mix_color(const vec4 packedColors,const float t) {vec4 minColor=decode_color(vec2(packedColors[0],packedColors[1]));vec4 maxColor=decode_color(vec2(packedColors[2],packedColors[3]));return mix(minColor,maxColor,t);}vec2 get_pattern_pos(const vec2 pixel_coord_upper,const vec2 pixel_coord_lower,const vec2 pattern_size,const float tile_units_to_pixels,const vec2 pos) {vec2 offset=mod(mod(mod(pixel_coord_upper,pattern_size)*256.0,pattern_size)*256.0+pixel_coord_lower,pattern_size);return (tile_units_to_pixels*pos+offset)/pattern_size;}`,Vh=`uniform vec4 u_color;uniform float u_opacity;void main() {gl_FragColor=u_color*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Lf="attribute vec2 a_pos;uniform mat4 u_matrix;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);}",cs=`uniform vec2 u_pattern_tl_a;uniform vec2 u_pattern_br_a;uniform vec2 u_pattern_tl_b;uniform vec2 u_pattern_br_b;uniform vec2 u_texsize;uniform float u_mix;uniform float u_opacity;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;void main() {vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(u_pattern_tl_a/u_texsize,u_pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(u_pattern_tl_b/u_texsize,u_pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);gl_FragColor=mix(color1,color2,u_mix)*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,nf="uniform mat4 u_matrix;uniform vec2 u_pattern_size_a;uniform vec2 u_pattern_size_b;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform float u_scale_a;uniform float u_scale_b;uniform float u_tile_units_to_pixels;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,u_scale_a*u_pattern_size_a,u_tile_units_to_pixels,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,u_scale_b*u_pattern_size_b,u_tile_units_to_pixels,a_pos);}",Vf=`varying vec3 v_data;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define mediump float radius
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define highp vec4 stroke_color
+#pragma mapbox: define mediump float stroke_width
+#pragma mapbox: define lowp float stroke_opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize mediump float radius
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize highp vec4 stroke_color
+#pragma mapbox: initialize mediump float stroke_width
+#pragma mapbox: initialize lowp float stroke_opacity
+vec2 extrude=v_data.xy;float extrude_length=length(extrude);lowp float antialiasblur=v_data.z;float antialiased_blur=-max(blur,antialiasblur);float opacity_t=smoothstep(0.0,antialiased_blur,extrude_length-1.0);float color_t=stroke_width < 0.01 ? 0.0 : smoothstep(antialiased_blur,0.0,extrude_length-radius/(radius+stroke_width));gl_FragColor=opacity_t*mix(color*opacity,stroke_color*stroke_opacity,color_t);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Jl=`uniform mat4 u_matrix;uniform bool u_scale_with_map;uniform bool u_pitch_with_map;uniform vec2 u_extrude_scale;uniform lowp float u_device_pixel_ratio;uniform highp float u_camera_to_center_distance;attribute vec2 a_pos;varying vec3 v_data;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define mediump float radius
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define highp vec4 stroke_color
+#pragma mapbox: define mediump float stroke_width
+#pragma mapbox: define lowp float stroke_opacity
+void main(void) {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize mediump float radius
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize highp vec4 stroke_color
+#pragma mapbox: initialize mediump float stroke_width
+#pragma mapbox: initialize lowp float stroke_opacity
+vec2 extrude=vec2(mod(a_pos,2.0)*2.0-1.0);vec2 circle_center=floor(a_pos*0.5);if (u_pitch_with_map) {vec2 corner_position=circle_center;if (u_scale_with_map) {corner_position+=extrude*(radius+stroke_width)*u_extrude_scale;} else {vec4 projected_center=u_matrix*vec4(circle_center,0,1);corner_position+=extrude*(radius+stroke_width)*u_extrude_scale*(projected_center.w/u_camera_to_center_distance);}gl_Position=u_matrix*vec4(corner_position,0,1);} else {gl_Position=u_matrix*vec4(circle_center,0,1);if (u_scale_with_map) {gl_Position.xy+=extrude*(radius+stroke_width)*u_extrude_scale*u_camera_to_center_distance;} else {gl_Position.xy+=extrude*(radius+stroke_width)*u_extrude_scale*gl_Position.w;}}lowp float antialiasblur=1.0/u_device_pixel_ratio/(radius+stroke_width);v_data=vec3(extrude.x,extrude.y,antialiasblur);}`,fl="void main() {gl_FragColor=vec4(1.0);}",lc="attribute vec2 a_pos;uniform mat4 u_matrix;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);}",Fu=`uniform highp float u_intensity;varying vec2 v_extrude;
+#pragma mapbox: define highp float weight
+#define GAUSS_COEF 0.3989422804014327
+void main() {
+#pragma mapbox: initialize highp float weight
+float d=-0.5*3.0*3.0*dot(v_extrude,v_extrude);float val=weight*u_intensity*GAUSS_COEF*exp(d);gl_FragColor=vec4(val,1.0,1.0,1.0);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Es=`uniform mat4 u_matrix;uniform float u_extrude_scale;uniform float u_opacity;uniform float u_intensity;attribute vec2 a_pos;varying vec2 v_extrude;
+#pragma mapbox: define highp float weight
+#pragma mapbox: define mediump float radius
+const highp float ZERO=1.0/255.0/16.0;
+#define GAUSS_COEF 0.3989422804014327
+void main(void) {
+#pragma mapbox: initialize highp float weight
+#pragma mapbox: initialize mediump float radius
+vec2 unscaled_extrude=vec2(mod(a_pos,2.0)*2.0-1.0);float S=sqrt(-2.0*log(ZERO/weight/u_intensity/GAUSS_COEF))/3.0;v_extrude=S*unscaled_extrude;vec2 extrude=v_extrude*radius*u_extrude_scale;vec4 pos=vec4(floor(a_pos*0.5)+extrude,0,1);gl_Position=u_matrix*pos;}`,Hs=`uniform sampler2D u_image;uniform sampler2D u_color_ramp;uniform float u_opacity;varying vec2 v_pos;void main() {float t=texture2D(u_image,v_pos).r;vec4 color=texture2D(u_color_ramp,vec2(t,0.5));gl_FragColor=color*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(0.0);
+#endif
+}`,Go="uniform mat4 u_matrix;uniform vec2 u_world;attribute vec2 a_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos*u_world,0,1);v_pos.x=a_pos.x;v_pos.y=1.0-a_pos.y;}",ps="varying float v_placed;varying float v_notUsed;void main() {float alpha=0.5;gl_FragColor=vec4(1.0,0.0,0.0,1.0)*alpha;if (v_placed > 0.5) {gl_FragColor=vec4(0.0,0.0,1.0,0.5)*alpha;}if (v_notUsed > 0.5) {gl_FragColor*=.1;}}",uc="attribute vec2 a_pos;attribute vec2 a_anchor_pos;attribute vec2 a_extrude;attribute vec2 a_placed;attribute vec2 a_shift;uniform mat4 u_matrix;uniform vec2 u_extrude_scale;uniform float u_camera_to_center_distance;varying float v_placed;varying float v_notUsed;void main() {vec4 projectedPoint=u_matrix*vec4(a_anchor_pos,0,1);highp float camera_to_anchor_distance=projectedPoint.w;highp float collision_perspective_ratio=clamp(0.5+0.5*(u_camera_to_center_distance/camera_to_anchor_distance),0.0,4.0);gl_Position=u_matrix*vec4(a_pos,0.0,1.0);gl_Position.xy+=(a_extrude+a_shift)*u_extrude_scale*gl_Position.w*collision_perspective_ratio;v_placed=a_placed.x;v_notUsed=a_placed.y;}",xl="varying float v_radius;varying vec2 v_extrude;varying float v_perspective_ratio;varying float v_collision;void main() {float alpha=0.5*min(v_perspective_ratio,1.0);float stroke_radius=0.9*max(v_perspective_ratio,1.0);float distance_to_center=length(v_extrude);float distance_to_edge=abs(distance_to_center-v_radius);float opacity_t=smoothstep(-stroke_radius,0.0,-distance_to_edge);vec4 color=mix(vec4(0.0,0.0,1.0,0.5),vec4(1.0,0.0,0.0,1.0),v_collision);gl_FragColor=color*alpha*opacity_t;}",Gu="attribute vec2 a_pos;attribute float a_radius;attribute vec2 a_flags;uniform mat4 u_matrix;uniform mat4 u_inv_matrix;uniform vec2 u_viewport_size;uniform float u_camera_to_center_distance;varying float v_radius;varying vec2 v_extrude;varying float v_perspective_ratio;varying float v_collision;vec3 toTilePosition(vec2 screenPos) {vec4 rayStart=u_inv_matrix*vec4(screenPos,-1.0,1.0);vec4 rayEnd  =u_inv_matrix*vec4(screenPos, 1.0,1.0);rayStart.xyz/=rayStart.w;rayEnd.xyz  /=rayEnd.w;highp float t=(0.0-rayStart.z)/(rayEnd.z-rayStart.z);return mix(rayStart.xyz,rayEnd.xyz,t);}void main() {vec2 quadCenterPos=a_pos;float radius=a_radius;float collision=a_flags.x;float vertexIdx=a_flags.y;vec2 quadVertexOffset=vec2(mix(-1.0,1.0,float(vertexIdx >=2.0)),mix(-1.0,1.0,float(vertexIdx >=1.0 && vertexIdx <=2.0)));vec2 quadVertexExtent=quadVertexOffset*radius;vec3 tilePos=toTilePosition(quadCenterPos);vec4 clipPos=u_matrix*vec4(tilePos,1.0);highp float camera_to_anchor_distance=clipPos.w;highp float collision_perspective_ratio=clamp(0.5+0.5*(u_camera_to_center_distance/camera_to_anchor_distance),0.0,4.0);float padding_factor=1.2;v_radius=radius;v_extrude=quadVertexExtent*padding_factor;v_perspective_ratio=collision_perspective_ratio;v_collision=collision;gl_Position=vec4(clipPos.xyz/clipPos.w,1.0)+vec4(quadVertexExtent*padding_factor/u_viewport_size*2.0,0.0,0.0);}",qs="uniform highp vec4 u_color;uniform sampler2D u_overlay;varying vec2 v_uv;void main() {vec4 overlay_color=texture2D(u_overlay,v_uv);gl_FragColor=mix(u_color,overlay_color,overlay_color.a);}",ad="attribute vec2 a_pos;varying vec2 v_uv;uniform mat4 u_matrix;uniform float u_overlay_scale;void main() {v_uv=a_pos/8192.0;gl_Position=u_matrix*vec4(a_pos*u_overlay_scale,0,1);}",Po=`#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float opacity
+gl_FragColor=color*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,od=`attribute vec2 a_pos;uniform mat4 u_matrix;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float opacity
+gl_Position=u_matrix*vec4(a_pos,0,1);}`,Yo=`varying vec2 v_pos;
+#pragma mapbox: define highp vec4 outline_color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 outline_color
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_pos-gl_FragCoord.xy);float alpha=1.0-smoothstep(0.0,1.0,dist);gl_FragColor=outline_color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Pa=`attribute vec2 a_pos;uniform mat4 u_matrix;uniform vec2 u_world;varying vec2 v_pos;
+#pragma mapbox: define highp vec4 outline_color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 outline_color
+#pragma mapbox: initialize lowp float opacity
+gl_Position=u_matrix*vec4(a_pos,0,1);v_pos=(gl_Position.xy/gl_Position.w+1.0)/2.0*u_world;}`,af=`uniform vec2 u_texsize;uniform sampler2D u_image;uniform float u_fade;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec2 v_pos;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);float dist=length(v_pos-gl_FragCoord.xy);float alpha=1.0-smoothstep(0.0,1.0,dist);gl_FragColor=mix(color1,color2,u_fade)*alpha*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Hu=`uniform mat4 u_matrix;uniform vec2 u_world;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform vec3 u_scale;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec2 v_pos;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;gl_Position=u_matrix*vec4(a_pos,0,1);vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileRatio,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileRatio,a_pos);v_pos=(gl_Position.xy/gl_Position.w+1.0)/2.0*u_world;}`,bl=`uniform vec2 u_texsize;uniform float u_fade;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);gl_FragColor=mix(color1,color2,u_fade)*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Gf=`uniform mat4 u_matrix;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform vec3 u_scale;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileZoomRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;gl_Position=u_matrix*vec4(a_pos,0,1);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileZoomRatio,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileZoomRatio,a_pos);}`,Ic=`varying vec4 v_color;void main() {gl_FragColor=v_color;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,yf=`uniform mat4 u_matrix;uniform vec3 u_lightcolor;uniform lowp vec3 u_lightpos;uniform lowp float u_lightintensity;uniform float u_vertical_gradient;uniform lowp float u_opacity;attribute vec2 a_pos;attribute vec4 a_normal_ed;varying vec4 v_color;
+#pragma mapbox: define highp float base
+#pragma mapbox: define highp float height
+#pragma mapbox: define highp vec4 color
+void main() {
+#pragma mapbox: initialize highp float base
+#pragma mapbox: initialize highp float height
+#pragma mapbox: initialize highp vec4 color
+vec3 normal=a_normal_ed.xyz;base=max(0.0,base);height=max(0.0,height);float t=mod(normal.x,2.0);gl_Position=u_matrix*vec4(a_pos,t > 0.0 ? height : base,1);float colorvalue=color.r*0.2126+color.g*0.7152+color.b*0.0722;v_color=vec4(0.0,0.0,0.0,1.0);vec4 ambientlight=vec4(0.03,0.03,0.03,1.0);color+=ambientlight;float directional=clamp(dot(normal/16384.0,u_lightpos),0.0,1.0);directional=mix((1.0-u_lightintensity),max((1.0-colorvalue+u_lightintensity),1.0),directional);if (normal.y !=0.0) {directional*=((1.0-u_vertical_gradient)+(u_vertical_gradient*clamp((t+base)*pow(height/150.0,0.5),mix(0.7,0.98,1.0-u_lightintensity),1.0)));}v_color.r+=clamp(color.r*directional*u_lightcolor.r,mix(0.0,0.3,1.0-u_lightcolor.r),1.0);v_color.g+=clamp(color.g*directional*u_lightcolor.g,mix(0.0,0.3,1.0-u_lightcolor.g),1.0);v_color.b+=clamp(color.b*directional*u_lightcolor.b,mix(0.0,0.3,1.0-u_lightcolor.b),1.0);v_color*=u_opacity;}`,Bl=`uniform vec2 u_texsize;uniform float u_fade;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec4 v_lighting;
+#pragma mapbox: define lowp float base
+#pragma mapbox: define lowp float height
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float base
+#pragma mapbox: initialize lowp float height
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);vec4 mixedColor=mix(color1,color2,u_fade);gl_FragColor=mixedColor*v_lighting;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,wh=`uniform mat4 u_matrix;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform float u_height_factor;uniform vec3 u_scale;uniform float u_vertical_gradient;uniform lowp float u_opacity;uniform vec3 u_lightcolor;uniform lowp vec3 u_lightpos;uniform lowp float u_lightintensity;attribute vec2 a_pos;attribute vec4 a_normal_ed;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec4 v_lighting;
+#pragma mapbox: define lowp float base
+#pragma mapbox: define lowp float height
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float base
+#pragma mapbox: initialize lowp float height
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec3 normal=a_normal_ed.xyz;float edgedistance=a_normal_ed.w;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;base=max(0.0,base);height=max(0.0,height);float t=mod(normal.x,2.0);float z=t > 0.0 ? height : base;gl_Position=u_matrix*vec4(a_pos,z,1);vec2 pos=normal.x==1.0 && normal.y==0.0 && normal.z==16384.0
+? a_pos
+: vec2(edgedistance,z*u_height_factor);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileRatio,pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileRatio,pos);v_lighting=vec4(0.0,0.0,0.0,1.0);float directional=clamp(dot(normal/16383.0,u_lightpos),0.0,1.0);directional=mix((1.0-u_lightintensity),max((0.5+u_lightintensity),1.0),directional);if (normal.y !=0.0) {directional*=((1.0-u_vertical_gradient)+(u_vertical_gradient*clamp((t+base)*pow(height/150.0,0.5),mix(0.7,0.98,1.0-u_lightintensity),1.0)));}v_lighting.rgb+=clamp(directional*u_lightcolor,mix(vec3(0.0),vec3(0.3),1.0-u_lightcolor),vec3(1.0));v_lighting*=u_opacity;}`,Qf=`#ifdef GL_ES
+precision highp float;
+#endif
+uniform sampler2D u_image;varying vec2 v_pos;uniform vec2 u_dimension;uniform float u_zoom;uniform vec4 u_unpack;float getElevation(vec2 coord,float bias) {vec4 data=texture2D(u_image,coord)*255.0;data.a=-1.0;return dot(data,u_unpack)/4.0;}void main() {vec2 epsilon=1.0/u_dimension;float a=getElevation(v_pos+vec2(-epsilon.x,-epsilon.y),0.0);float b=getElevation(v_pos+vec2(0,-epsilon.y),0.0);float c=getElevation(v_pos+vec2(epsilon.x,-epsilon.y),0.0);float d=getElevation(v_pos+vec2(-epsilon.x,0),0.0);float e=getElevation(v_pos,0.0);float f=getElevation(v_pos+vec2(epsilon.x,0),0.0);float g=getElevation(v_pos+vec2(-epsilon.x,epsilon.y),0.0);float h=getElevation(v_pos+vec2(0,epsilon.y),0.0);float i=getElevation(v_pos+vec2(epsilon.x,epsilon.y),0.0);float exaggerationFactor=u_zoom < 2.0 ? 0.4 : u_zoom < 4.5 ? 0.35 : 0.3;float exaggeration=u_zoom < 15.0 ? (u_zoom-15.0)*exaggerationFactor : 0.0;vec2 deriv=vec2((c+f+f+i)-(a+d+d+g),(g+h+h+i)-(a+b+b+c))/pow(2.0,exaggeration+(19.2562-u_zoom));gl_FragColor=clamp(vec4(deriv.x/2.0+0.5,deriv.y/2.0+0.5,1.0,1.0),0.0,1.0);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,_f="uniform mat4 u_matrix;uniform vec2 u_dimension;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);highp vec2 epsilon=1.0/u_dimension;float scale=(u_dimension.x-2.0)/u_dimension.x;v_pos=(a_texture_pos/8192.0)*scale+epsilon;}",Yc=`uniform sampler2D u_image;varying vec2 v_pos;uniform vec2 u_latrange;uniform vec2 u_light;uniform vec4 u_shadow;uniform vec4 u_highlight;uniform vec4 u_accent;
+#define PI 3.141592653589793
+void main() {vec4 pixel=texture2D(u_image,v_pos);vec2 deriv=((pixel.rg*2.0)-1.0);float scaleFactor=cos(radians((u_latrange[0]-u_latrange[1])*(1.0-v_pos.y)+u_latrange[1]));float slope=atan(1.25*length(deriv)/scaleFactor);float aspect=deriv.x !=0.0 ? atan(deriv.y,-deriv.x) : PI/2.0*(deriv.y > 0.0 ? 1.0 :-1.0);float intensity=u_light.x;float azimuth=u_light.y+PI;float base=1.875-intensity*1.75;float maxValue=0.5*PI;float scaledSlope=intensity !=0.5 ? ((pow(base,slope)-1.0)/(pow(base,maxValue)-1.0))*maxValue : slope;float accent=cos(scaledSlope);vec4 accent_color=(1.0-accent)*u_accent*clamp(intensity*2.0,0.0,1.0);float shade=abs(mod((aspect+azimuth)/PI+0.5,2.0)-1.0);vec4 shade_color=mix(u_shadow,u_highlight,shade)*sin(scaledSlope)*clamp(intensity*2.0,0.0,1.0);gl_FragColor=accent_color*(1.0-shade_color.a)+shade_color;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,eh="uniform mat4 u_matrix;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos=a_texture_pos/8192.0;}",th=`uniform lowp float u_device_pixel_ratio;varying vec2 v_width2;varying vec2 v_normal;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,ju=`
+#define scale 0.015873016
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform vec2 u_units_to_pixels;uniform lowp float u_device_pixel_ratio;varying vec2 v_normal;varying vec2 v_width2;varying float v_gamma_scale;varying highp float v_linesofar;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;v_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*2.0;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;v_width2=vec2(outset,inset);}`,Hf=`uniform lowp float u_device_pixel_ratio;uniform sampler2D u_image;varying vec2 v_width2;varying vec2 v_normal;varying float v_gamma_scale;varying highp vec2 v_uv;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);vec4 color=texture2D(u_image,v_uv);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,cc=`
+#define scale 0.015873016
+attribute vec2 a_pos_normal;attribute vec4 a_data;attribute float a_uv_x;attribute float a_split_index;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;uniform vec2 u_units_to_pixels;uniform float u_image_height;varying vec2 v_normal;varying vec2 v_width2;varying float v_gamma_scale;varying highp vec2 v_uv;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;highp float texel_height=1.0/u_image_height;highp float half_texel_height=0.5*texel_height;v_uv=vec2(a_uv_x,a_split_index*texel_height-half_texel_height);vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;v_width2=vec2(outset,inset);}`,of=`uniform lowp float u_device_pixel_ratio;uniform vec2 u_texsize;uniform float u_fade;uniform mediump vec3 u_scale;uniform sampler2D u_image;varying vec2 v_normal;varying vec2 v_width2;varying float v_linesofar;varying float v_gamma_scale;varying float v_width;
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileZoomRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;vec2 pattern_size_a=vec2(display_size_a.x*fromScale/tileZoomRatio,display_size_a.y);vec2 pattern_size_b=vec2(display_size_b.x*toScale/tileZoomRatio,display_size_b.y);float aspect_a=display_size_a.y/v_width;float aspect_b=display_size_b.y/v_width;float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);float x_a=mod(v_linesofar/pattern_size_a.x*aspect_a,1.0);float x_b=mod(v_linesofar/pattern_size_b.x*aspect_b,1.0);float y=0.5*v_normal.y+0.5;vec2 texel_size=1.0/u_texsize;vec2 pos_a=mix(pattern_tl_a*texel_size-texel_size,pattern_br_a*texel_size+texel_size,vec2(x_a,y));vec2 pos_b=mix(pattern_tl_b*texel_size-texel_size,pattern_br_b*texel_size+texel_size,vec2(x_b,y));vec4 color=mix(texture2D(u_image,pos_a),texture2D(u_image,pos_b),u_fade);gl_FragColor=color*alpha*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Nl=`
+#define scale 0.015873016
+#define LINE_DISTANCE_SCALE 2.0
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform vec2 u_units_to_pixels;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;varying vec2 v_normal;varying vec2 v_width2;varying float v_linesofar;varying float v_gamma_scale;varying float v_width;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;float a_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*LINE_DISTANCE_SCALE;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;v_linesofar=a_linesofar;v_width2=vec2(outset,inset);v_width=floorwidth;}`,Kc=`uniform lowp float u_device_pixel_ratio;uniform sampler2D u_image;uniform float u_sdfgamma;uniform float u_mix;varying vec2 v_normal;varying vec2 v_width2;varying vec2 v_tex_a;varying vec2 v_tex_b;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);float sdfdist_a=texture2D(u_image,v_tex_a).a;float sdfdist_b=texture2D(u_image,v_tex_b).a;float sdfdist=mix(sdfdist_a,sdfdist_b,u_mix);alpha*=smoothstep(0.5-u_sdfgamma/floorwidth,0.5+u_sdfgamma/floorwidth,sdfdist);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Rc=`
+#define scale 0.015873016
+#define LINE_DISTANCE_SCALE 2.0
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;uniform vec2 u_patternscale_a;uniform float u_tex_y_a;uniform vec2 u_patternscale_b;uniform float u_tex_y_b;uniform vec2 u_units_to_pixels;varying vec2 v_normal;varying vec2 v_width2;varying vec2 v_tex_a;varying vec2 v_tex_b;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;float a_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*LINE_DISTANCE_SCALE;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;v_tex_a=vec2(a_linesofar*u_patternscale_a.x/floorwidth,normal.y*u_patternscale_a.y+u_tex_y_a);v_tex_b=vec2(a_linesofar*u_patternscale_b.x/floorwidth,normal.y*u_patternscale_b.y+u_tex_y_b);v_width2=vec2(outset,inset);}`,gs=`uniform float u_fade_t;uniform float u_opacity;uniform sampler2D u_image0;uniform sampler2D u_image1;varying vec2 v_pos0;varying vec2 v_pos1;uniform float u_brightness_low;uniform float u_brightness_high;uniform float u_saturation_factor;uniform float u_contrast_factor;uniform vec3 u_spin_weights;void main() {vec4 color0=texture2D(u_image0,v_pos0);vec4 color1=texture2D(u_image1,v_pos1);if (color0.a > 0.0) {color0.rgb=color0.rgb/color0.a;}if (color1.a > 0.0) {color1.rgb=color1.rgb/color1.a;}vec4 color=mix(color0,color1,u_fade_t);color.a*=u_opacity;vec3 rgb=color.rgb;rgb=vec3(dot(rgb,u_spin_weights.xyz),dot(rgb,u_spin_weights.zxy),dot(rgb,u_spin_weights.yzx));float average=(color.r+color.g+color.b)/3.0;rgb+=(average-rgb)*u_saturation_factor;rgb=(rgb-0.5)*u_contrast_factor+0.5;vec3 u_high_vec=vec3(u_brightness_low,u_brightness_low,u_brightness_low);vec3 u_low_vec=vec3(u_brightness_high,u_brightness_high,u_brightness_high);gl_FragColor=vec4(mix(u_high_vec,u_low_vec,rgb)*color.a,color.a);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,jf="uniform mat4 u_matrix;uniform vec2 u_tl_parent;uniform float u_scale_parent;uniform float u_buffer_scale;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos0;varying vec2 v_pos1;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos0=(((a_texture_pos/8192.0)-0.5)/u_buffer_scale )+0.5;v_pos1=(v_pos0*u_scale_parent)+u_tl_parent;}",Gh=`uniform sampler2D u_texture;varying vec2 v_tex;varying float v_fade_opacity;
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float opacity
+lowp float alpha=opacity*v_fade_opacity;gl_FragColor=texture2D(u_texture,v_tex)*alpha;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,rh=`const float PI=3.141592653589793;attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec4 a_pixeloffset;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform highp float u_camera_to_center_distance;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform float u_fade_change;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform vec2 u_texsize;varying vec2 v_tex;varying float v_fade_opacity;
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float opacity
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);vec2 a_pxoffset=a_pixeloffset.xy;vec2 a_minFontScale=a_pixeloffset.zw/256.0;highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec4 projectedPoint=u_matrix*vec4(a_pos,0,1);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=u_is_text ? size/24.0 : size;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=u_matrix*vec4(a_pos+vec2(1,0),0,1);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy,0.0,1.0);gl_Position=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*max(a_minFontScale,fontScale)+a_pxoffset/16.0),0.0,1.0);v_tex=a_tex/u_texsize;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;v_fade_opacity=max(0.0,min(1.0,fade_opacity[0]+fade_change));}`,sf=`#define SDF_PX 8.0
+uniform bool u_is_halo;uniform sampler2D u_texture;uniform highp float u_gamma_scale;uniform lowp float u_device_pixel_ratio;uniform bool u_is_text;varying vec2 v_data0;varying vec3 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+float EDGE_GAMMA=0.105/u_device_pixel_ratio;vec2 tex=v_data0.xy;float gamma_scale=v_data1.x;float size=v_data1.y;float fade_opacity=v_data1[2];float fontScale=u_is_text ? size/24.0 : size;lowp vec4 color=fill_color;highp float gamma=EDGE_GAMMA/(fontScale*u_gamma_scale);lowp float buff=(256.0-64.0)/256.0;if (u_is_halo) {color=halo_color;gamma=(halo_blur*1.19/SDF_PX+EDGE_GAMMA)/(fontScale*u_gamma_scale);buff=(6.0-halo_width/fontScale)/SDF_PX;}lowp float dist=texture2D(u_texture,tex).a;highp float gamma_scaled=gamma*gamma_scale;highp float alpha=smoothstep(buff-gamma_scaled,buff+gamma_scaled,dist);gl_FragColor=color*(alpha*opacity*fade_opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,Th=`const float PI=3.141592653589793;attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec4 a_pixeloffset;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform highp float u_camera_to_center_distance;uniform float u_fade_change;uniform vec2 u_texsize;varying vec2 v_data0;varying vec3 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);vec2 a_pxoffset=a_pixeloffset.xy;highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec4 projectedPoint=u_matrix*vec4(a_pos,0,1);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=u_is_text ? size/24.0 : size;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=u_matrix*vec4(a_pos+vec2(1,0),0,1);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy,0.0,1.0);gl_Position=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*fontScale+a_pxoffset),0.0,1.0);float gamma_scale=gl_Position.w;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;float interpolated_fade_opacity=max(0.0,min(1.0,fade_opacity[0]+fade_change));v_data0=a_tex/u_texsize;v_data1=vec3(gamma_scale,size,interpolated_fade_opacity);}`,Mu=`#define SDF_PX 8.0
+#define SDF 1.0
+#define ICON 0.0
+uniform bool u_is_halo;uniform sampler2D u_texture;uniform sampler2D u_texture_icon;uniform highp float u_gamma_scale;uniform lowp float u_device_pixel_ratio;varying vec4 v_data0;varying vec4 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+float fade_opacity=v_data1[2];if (v_data1.w==ICON) {vec2 tex_icon=v_data0.zw;lowp float alpha=opacity*fade_opacity;gl_FragColor=texture2D(u_texture_icon,tex_icon)*alpha;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+return;}vec2 tex=v_data0.xy;float EDGE_GAMMA=0.105/u_device_pixel_ratio;float gamma_scale=v_data1.x;float size=v_data1.y;float fontScale=size/24.0;lowp vec4 color=fill_color;highp float gamma=EDGE_GAMMA/(fontScale*u_gamma_scale);lowp float buff=(256.0-64.0)/256.0;if (u_is_halo) {color=halo_color;gamma=(halo_blur*1.19/SDF_PX+EDGE_GAMMA)/(fontScale*u_gamma_scale);buff=(6.0-halo_width/fontScale)/SDF_PX;}lowp float dist=texture2D(u_texture,tex).a;highp float gamma_scaled=gamma*gamma_scale;highp float alpha=smoothstep(buff-gamma_scaled,buff+gamma_scaled,dist);gl_FragColor=color*(alpha*opacity*fade_opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,ih=`const float PI=3.141592653589793;attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform highp float u_camera_to_center_distance;uniform float u_fade_change;uniform vec2 u_texsize;uniform vec2 u_texsize_icon;varying vec4 v_data0;varying vec4 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);float is_sdf=a_size[0]-2.0*a_size_min;highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec4 projectedPoint=u_matrix*vec4(a_pos,0,1);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=size/24.0;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=u_matrix*vec4(a_pos+vec2(1,0),0,1);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy,0.0,1.0);gl_Position=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*fontScale),0.0,1.0);float gamma_scale=gl_Position.w;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;float interpolated_fade_opacity=max(0.0,min(1.0,fade_opacity[0]+fade_change));v_data0.xy=a_tex/u_texsize;v_data0.zw=a_tex/u_texsize_icon;v_data1=vec4(gamma_scale,size,interpolated_fade_opacity,is_sdf);}`,js=Us(Cf,sc),Eu=Us(Vh,Lf),Dc=Us(cs,nf),ks=Us(Vf,Jl),bc=Us(fl,lc),hu=Us(Fu,Es),_u=Us(Hs,Go),nl=Us(ps,uc),nh=Us(xl,Gu),Ah=Us(qs,ad),zu=Us(Po,od),Fc=Us(Yo,Pa),wc=Us(af,Hu),bd=Us(bl,Gf),xf=Us(Ic,yf),Pf=Us(Bl,wh),Ou=Us(Qf,_f),bf=Us(Yc,eh),jl=Us(th,ju),lf=Us(Hf,cc),Hh=Us(of,Nl),If=Us(Kc,Rc),Cs=Us(gs,jf),du=Us(Gh,rh),ku=Us(sf,Th),Wf=Us(Mu,ih);function Us(Y,D){var J=/#pragma mapbox: ([\w]+) ([\w]+) ([\w]+) ([\w]+)/g,q=D.match(/attribute ([\w]+) ([\w]+)/g),K=Y.match(/uniform ([\w]+) ([\w]+)([\s]*)([\w]*)/g),de=D.match(/uniform ([\w]+) ([\w]+)([\s]*)([\w]*)/g),ne=de?de.concat(K):K,we={};return Y=Y.replace(J,function(Ue,ft,Xt,hr,qt){return we[qt]=!0,ft==="define"?`
+#ifndef HAS_UNIFORM_u_`+qt+`
+varying `+Xt+" "+hr+" "+qt+`;
+#else
+uniform `+Xt+" "+hr+" u_"+qt+`;
+#endif
+`:`
+#ifdef HAS_UNIFORM_u_`+qt+`
+    `+Xt+" "+hr+" "+qt+" = u_"+qt+`;
+#endif
+`}),D=D.replace(J,function(Ue,ft,Xt,hr,qt){var Ve=hr==="float"?"vec2":"vec4",Qe=qt.match(/color/)?"color":Ve;return we[qt]?ft==="define"?`
+#ifndef HAS_UNIFORM_u_`+qt+`
+uniform lowp float u_`+qt+`_t;
+attribute `+Xt+" "+Ve+" a_"+qt+`;
+varying `+Xt+" "+hr+" "+qt+`;
+#else
+uniform `+Xt+" "+hr+" u_"+qt+`;
+#endif
+`:Qe==="vec4"?`
+#ifndef HAS_UNIFORM_u_`+qt+`
+    `+qt+" = a_"+qt+`;
+#else
+    `+Xt+" "+hr+" "+qt+" = u_"+qt+`;
+#endif
+`:`
+#ifndef HAS_UNIFORM_u_`+qt+`
+    `+qt+" = unpack_mix_"+Qe+"(a_"+qt+", u_"+qt+`_t);
+#else
+    `+Xt+" "+hr+" "+qt+" = u_"+qt+`;
+#endif
+`:ft==="define"?`
+#ifndef HAS_UNIFORM_u_`+qt+`
+uniform lowp float u_`+qt+`_t;
+attribute `+Xt+" "+Ve+" a_"+qt+`;
+#else
+uniform `+Xt+" "+hr+" u_"+qt+`;
+#endif
+`:Qe==="vec4"?`
+#ifndef HAS_UNIFORM_u_`+qt+`
+    `+Xt+" "+hr+" "+qt+" = a_"+qt+`;
+#else
+    `+Xt+" "+hr+" "+qt+" = u_"+qt+`;
+#endif
+`:`
+#ifndef HAS_UNIFORM_u_`+qt+`
+    `+Xt+" "+hr+" "+qt+" = unpack_mix_"+Qe+"(a_"+qt+", u_"+qt+`_t);
+#else
+    `+Xt+" "+hr+" "+qt+" = u_"+qt+`;
+#endif
+`}),{fragmentSource:Y,vertexSource:D,staticAttributes:q,staticUniforms:ne}}var wf=Object.freeze({__proto__:null,prelude:js,background:Eu,backgroundPattern:Dc,circle:ks,clippingMask:bc,heatmap:hu,heatmapTexture:_u,collisionBox:nl,collisionCircle:nh,debug:Ah,fill:zu,fillOutline:Fc,fillOutlinePattern:wc,fillPattern:bd,fillExtrusion:xf,fillExtrusionPattern:Pf,hillshadePrepare:Ou,hillshade:bf,line:jl,lineGradient:lf,linePattern:Hh,lineSDF:If,raster:Cs,symbolIcon:du,symbolSDF:ku,symbolTextAndIcon:Wf}),zc=function(){this.boundProgram=null,this.boundLayoutVertexBuffer=null,this.boundPaintVertexBuffers=[],this.boundIndexBuffer=null,this.boundVertexOffset=null,this.boundDynamicVertexBuffer=null,this.vao=null};zc.prototype.bind=function(D,J,q,K,de,ne,we,Ue){this.context=D;for(var ft=this.boundPaintVertexBuffers.length!==K.length,Xt=0;!ft&&Xt<K.length;Xt++)this.boundPaintVertexBuffers[Xt]!==K[Xt]&&(ft=!0);var hr=!this.vao||this.boundProgram!==J||this.boundLayoutVertexBuffer!==q||ft||this.boundIndexBuffer!==de||this.boundVertexOffset!==ne||this.boundDynamicVertexBuffer!==we||this.boundDynamicVertexBuffer2!==Ue;!D.extVertexArrayObject||hr?this.freshBind(J,q,K,de,ne,we,Ue):(D.bindVertexArrayOES.set(this.vao),we&&we.bind(),de&&de.dynamicDraw&&de.bind(),Ue&&Ue.bind())},zc.prototype.freshBind=function(D,J,q,K,de,ne,we){var Ue,ft=D.numAttributes,Xt=this.context,hr=Xt.gl;if(Xt.extVertexArrayObject)this.vao&&this.destroy(),this.vao=Xt.extVertexArrayObject.createVertexArrayOES(),Xt.bindVertexArrayOES.set(this.vao),Ue=0,this.boundProgram=D,this.boundLayoutVertexBuffer=J,this.boundPaintVertexBuffers=q,this.boundIndexBuffer=K,this.boundVertexOffset=de,this.boundDynamicVertexBuffer=ne,this.boundDynamicVertexBuffer2=we;else{Ue=Xt.currentNumAttributes||0;for(var qt=ft;qt<Ue;qt++)hr.disableVertexAttribArray(qt)}J.enableAttributes(hr,D);for(var Ve=0,Qe=q;Ve<Qe.length;Ve+=1){var at=Qe[Ve];at.enableAttributes(hr,D)}ne&&ne.enableAttributes(hr,D),we&&we.enableAttributes(hr,D),J.bind(),J.setVertexAttribPointers(hr,D,de);for(var Ct=0,Ot=q;Ct<Ot.length;Ct+=1){var Rt=Ot[Ct];Rt.bind(),Rt.setVertexAttribPointers(hr,D,de)}ne&&(ne.bind(),ne.setVertexAttribPointers(hr,D,de)),K&&K.bind(),we&&(we.bind(),we.setVertexAttribPointers(hr,D,de)),Xt.currentNumAttributes=ft},zc.prototype.destroy=function(){this.vao&&(this.context.extVertexArrayObject.deleteVertexArrayOES(this.vao),this.vao=null)};function Wu(Y){for(var D=[],J=0;J<Y.length;J++)if(Y[J]!==null){var q=Y[J].split(" ");D.push(q.pop())}return D}var Rf=function(D,J,q,K,de,ne){var we=D.gl;this.program=we.createProgram();for(var Ue=Wu(q.staticAttributes),ft=K?K.getBinderAttributes():[],Xt=Ue.concat(ft),hr=q.staticUniforms?Wu(q.staticUniforms):[],qt=K?K.getBinderUniforms():[],Ve=hr.concat(qt),Qe=[],at=0,Ct=Ve;at<Ct.length;at+=1){var Ot=Ct[at];Qe.indexOf(Ot)<0&&Qe.push(Ot)}var Rt=K?K.defines():[];ne&&Rt.push("#define OVERDRAW_INSPECTOR;");var Bt=Rt.concat(js.fragmentSource,q.fragmentSource).join(`
+`),Dt=Rt.concat(js.vertexSource,q.vertexSource).join(`
+`),yt=we.createShader(we.FRAGMENT_SHADER);if(we.isContextLost()){this.failedToCreate=!0;return}we.shaderSource(yt,Bt),we.compileShader(yt),we.attachShader(this.program,yt);var Pt=we.createShader(we.VERTEX_SHADER);if(we.isContextLost()){this.failedToCreate=!0;return}we.shaderSource(Pt,Dt),we.compileShader(Pt),we.attachShader(this.program,Pt),this.attributes={};var ht={};this.numAttributes=Xt.length;for(var ur=0;ur<this.numAttributes;ur++)Xt[ur]&&(we.bindAttribLocation(this.program,ur,Xt[ur]),this.attributes[Xt[ur]]=ur);we.linkProgram(this.program),we.deleteShader(Pt),we.deleteShader(yt);for(var br=0;br<Qe.length;br++){var Ur=Qe[br];if(Ur&&!ht[Ur]){var Di=we.getUniformLocation(this.program,Ur);Di&&(ht[Ur]=Di)}}this.fixedUniforms=de(D,ht),this.binderUniforms=K?K.getUniforms(D,ht):[]};Rf.prototype.draw=function(D,J,q,K,de,ne,we,Ue,ft,Xt,hr,qt,Ve,Qe,at,Ct){var Ot,Rt=D.gl;if(!this.failedToCreate){D.program.set(this.program),D.setDepthMode(q),D.setStencilMode(K),D.setColorMode(de),D.setCullFace(ne);for(var Bt in this.fixedUniforms)this.fixedUniforms[Bt].set(we[Bt]);Qe&&Qe.setUniforms(D,this.binderUniforms,qt,{zoom:Ve});for(var Dt=(Ot={},Ot[Rt.LINES]=2,Ot[Rt.TRIANGLES]=3,Ot[Rt.LINE_STRIP]=1,Ot)[J],yt=0,Pt=hr.get();yt<Pt.length;yt+=1){var ht=Pt[yt],ur=ht.vaos||(ht.vaos={}),br=ur[Ue]||(ur[Ue]=new zc);br.bind(D,this,ft,Qe?Qe.getPaintVertexBuffers():[],Xt,ht.vertexOffset,at,Ct),Rt.drawElements(J,ht.primitiveLength*Dt,Rt.UNSIGNED_SHORT,ht.primitiveOffset*Dt*2)}}};function Xu(Y,D,J){var q=1/As(J,1,D.transform.tileZoom),K=Math.pow(2,J.tileID.overscaledZ),de=J.tileSize*Math.pow(2,D.transform.tileZoom)/K,ne=de*(J.tileID.canonical.x+J.tileID.wrap*K),we=de*J.tileID.canonical.y;return{u_image:0,u_texsize:J.imageAtlasTexture.size,u_scale:[q,Y.fromScale,Y.toScale],u_fade:Y.t,u_pixel_coord_upper:[ne>>16,we>>16],u_pixel_coord_lower:[ne&65535,we&65535]}}function uf(Y,D,J,q){var K=J.imageManager.getPattern(Y.from.toString()),de=J.imageManager.getPattern(Y.to.toString()),ne=J.imageManager.getPixelSize(),we=ne.width,Ue=ne.height,ft=Math.pow(2,q.tileID.overscaledZ),Xt=q.tileSize*Math.pow(2,J.transform.tileZoom)/ft,hr=Xt*(q.tileID.canonical.x+q.tileID.wrap*ft),qt=Xt*q.tileID.canonical.y;return{u_image:0,u_pattern_tl_a:K.tl,u_pattern_br_a:K.br,u_pattern_tl_b:de.tl,u_pattern_br_b:de.br,u_texsize:[we,Ue],u_mix:D.t,u_pattern_size_a:K.displaySize,u_pattern_size_b:de.displaySize,u_scale_a:D.fromScale,u_scale_b:D.toScale,u_tile_units_to_pixels:1/As(q,1,J.transform.tileZoom),u_pixel_coord_upper:[hr>>16,qt>>16],u_pixel_coord_lower:[hr&65535,qt&65535]}}var Xf=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_lightpos:new i.Uniform3f(Y,D.u_lightpos),u_lightintensity:new i.Uniform1f(Y,D.u_lightintensity),u_lightcolor:new i.Uniform3f(Y,D.u_lightcolor),u_vertical_gradient:new i.Uniform1f(Y,D.u_vertical_gradient),u_opacity:new i.Uniform1f(Y,D.u_opacity)}},Wl=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_lightpos:new i.Uniform3f(Y,D.u_lightpos),u_lightintensity:new i.Uniform1f(Y,D.u_lightintensity),u_lightcolor:new i.Uniform3f(Y,D.u_lightcolor),u_vertical_gradient:new i.Uniform1f(Y,D.u_vertical_gradient),u_height_factor:new i.Uniform1f(Y,D.u_height_factor),u_image:new i.Uniform1i(Y,D.u_image),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_pixel_coord_upper:new i.Uniform2f(Y,D.u_pixel_coord_upper),u_pixel_coord_lower:new i.Uniform2f(Y,D.u_pixel_coord_lower),u_scale:new i.Uniform3f(Y,D.u_scale),u_fade:new i.Uniform1f(Y,D.u_fade),u_opacity:new i.Uniform1f(Y,D.u_opacity)}},ah=function(Y,D,J,q){var K=D.style.light,de=K.properties.get("position"),ne=[de.x,de.y,de.z],we=i.create$1();K.properties.get("anchor")==="viewport"&&i.fromRotation(we,-D.transform.angle),i.transformMat3(ne,ne,we);var Ue=K.properties.get("color");return{u_matrix:Y,u_lightpos:ne,u_lightintensity:K.properties.get("intensity"),u_lightcolor:[Ue.r,Ue.g,Ue.b],u_vertical_gradient:+J,u_opacity:q}},Zu=function(Y,D,J,q,K,de,ne){return i.extend(ah(Y,D,J,q),Xu(de,D,ne),{u_height_factor:-Math.pow(2,K.overscaledZ)/ne.tileSize/8})},Oc=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix)}},Tc=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_image:new i.Uniform1i(Y,D.u_image),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_pixel_coord_upper:new i.Uniform2f(Y,D.u_pixel_coord_upper),u_pixel_coord_lower:new i.Uniform2f(Y,D.u_pixel_coord_lower),u_scale:new i.Uniform3f(Y,D.u_scale),u_fade:new i.Uniform1f(Y,D.u_fade)}},wl=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_world:new i.Uniform2f(Y,D.u_world)}},vu=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_world:new i.Uniform2f(Y,D.u_world),u_image:new i.Uniform1i(Y,D.u_image),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_pixel_coord_upper:new i.Uniform2f(Y,D.u_pixel_coord_upper),u_pixel_coord_lower:new i.Uniform2f(Y,D.u_pixel_coord_lower),u_scale:new i.Uniform3f(Y,D.u_scale),u_fade:new i.Uniform1f(Y,D.u_fade)}},qc=function(Y){return{u_matrix:Y}},cf=function(Y,D,J,q){return i.extend(qc(Y),Xu(J,D,q))},fc=function(Y,D){return{u_matrix:Y,u_world:D}},Bc=function(Y,D,J,q,K){return i.extend(cf(Y,D,J,q),{u_world:K})},At=function(Y,D){return{u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_scale_with_map:new i.Uniform1i(Y,D.u_scale_with_map),u_pitch_with_map:new i.Uniform1i(Y,D.u_pitch_with_map),u_extrude_scale:new i.Uniform2f(Y,D.u_extrude_scale),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix)}},Wt=function(Y,D,J,q){var K=Y.transform,de,ne;if(q.paint.get("circle-pitch-alignment")==="map"){var we=As(J,1,K.zoom);de=!0,ne=[we,we]}else de=!1,ne=K.pixelsToGLUnits;return{u_camera_to_center_distance:K.cameraToCenterDistance,u_scale_with_map:+(q.paint.get("circle-pitch-scale")==="map"),u_matrix:Y.translatePosMatrix(D.posMatrix,J,q.paint.get("circle-translate"),q.paint.get("circle-translate-anchor")),u_pitch_with_map:+de,u_device_pixel_ratio:i.browser.devicePixelRatio,u_extrude_scale:ne}},Cr=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_pixels_to_tile_units:new i.Uniform1f(Y,D.u_pixels_to_tile_units),u_extrude_scale:new i.Uniform2f(Y,D.u_extrude_scale),u_overscale_factor:new i.Uniform1f(Y,D.u_overscale_factor)}},Ar=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_inv_matrix:new i.UniformMatrix4f(Y,D.u_inv_matrix),u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_viewport_size:new i.Uniform2f(Y,D.u_viewport_size)}},Kr=function(Y,D,J){var q=As(J,1,D.zoom),K=Math.pow(2,D.zoom-J.tileID.overscaledZ),de=J.tileID.overscaleFactor();return{u_matrix:Y,u_camera_to_center_distance:D.cameraToCenterDistance,u_pixels_to_tile_units:q,u_extrude_scale:[D.pixelsToGLUnits[0]/(q*K),D.pixelsToGLUnits[1]/(q*K)],u_overscale_factor:de}},ki=function(Y,D,J){return{u_matrix:Y,u_inv_matrix:D,u_camera_to_center_distance:J.cameraToCenterDistance,u_viewport_size:[J.width,J.height]}},Xi=function(Y,D){return{u_color:new i.UniformColor(Y,D.u_color),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_overlay:new i.Uniform1i(Y,D.u_overlay),u_overlay_scale:new i.Uniform1f(Y,D.u_overlay_scale)}},dn=function(Y,D,J){return J===void 0&&(J=1),{u_matrix:Y,u_color:D,u_overlay:0,u_overlay_scale:J}},wn=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix)}},Nn=function(Y){return{u_matrix:Y}},Yi=function(Y,D){return{u_extrude_scale:new i.Uniform1f(Y,D.u_extrude_scale),u_intensity:new i.Uniform1f(Y,D.u_intensity),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix)}},Qi=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_world:new i.Uniform2f(Y,D.u_world),u_image:new i.Uniform1i(Y,D.u_image),u_color_ramp:new i.Uniform1i(Y,D.u_color_ramp),u_opacity:new i.Uniform1f(Y,D.u_opacity)}},on=function(Y,D,J,q){return{u_matrix:Y,u_extrude_scale:As(D,1,J),u_intensity:q}},Fi=function(Y,D,J,q){var K=i.create();i.ortho(K,0,Y.width,Y.height,0,0,1);var de=Y.context.gl;return{u_matrix:K,u_world:[de.drawingBufferWidth,de.drawingBufferHeight],u_image:J,u_color_ramp:q,u_opacity:D.paint.get("heatmap-opacity")}},$n=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_image:new i.Uniform1i(Y,D.u_image),u_latrange:new i.Uniform2f(Y,D.u_latrange),u_light:new i.Uniform2f(Y,D.u_light),u_shadow:new i.UniformColor(Y,D.u_shadow),u_highlight:new i.UniformColor(Y,D.u_highlight),u_accent:new i.UniformColor(Y,D.u_accent)}},Ca=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_image:new i.Uniform1i(Y,D.u_image),u_dimension:new i.Uniform2f(Y,D.u_dimension),u_zoom:new i.Uniform1f(Y,D.u_zoom),u_unpack:new i.Uniform4f(Y,D.u_unpack)}},Ra=function(Y,D,J){var q=J.paint.get("hillshade-shadow-color"),K=J.paint.get("hillshade-highlight-color"),de=J.paint.get("hillshade-accent-color"),ne=J.paint.get("hillshade-illumination-direction")*(Math.PI/180);J.paint.get("hillshade-illumination-anchor")==="viewport"&&(ne-=Y.transform.angle);var we=!Y.options.moving;return{u_matrix:Y.transform.calculatePosMatrix(D.tileID.toUnwrapped(),we),u_image:0,u_latrange:Na(Y,D.tileID),u_light:[J.paint.get("hillshade-exaggeration"),ne],u_shadow:q,u_highlight:K,u_accent:de}},La=function(Y,D){var J=D.stride,q=i.create();return i.ortho(q,0,i.EXTENT,-i.EXTENT,0,0,1),i.translate(q,q,[0,-i.EXTENT,0]),{u_matrix:q,u_image:1,u_dimension:[J,J],u_zoom:Y.overscaledZ,u_unpack:D.getUnpackVector()}};function Na(Y,D){var J=Math.pow(2,D.canonical.z),q=D.canonical.y;return[new i.MercatorCoordinate(0,q/J).toLngLat().lat,new i.MercatorCoordinate(0,(q+1)/J).toLngLat().lat]}var Yn=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_ratio:new i.Uniform1f(Y,D.u_ratio),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_units_to_pixels:new i.Uniform2f(Y,D.u_units_to_pixels)}},Dn=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_ratio:new i.Uniform1f(Y,D.u_ratio),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_units_to_pixels:new i.Uniform2f(Y,D.u_units_to_pixels),u_image:new i.Uniform1i(Y,D.u_image),u_image_height:new i.Uniform1f(Y,D.u_image_height)}},Ka=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_ratio:new i.Uniform1f(Y,D.u_ratio),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_image:new i.Uniform1i(Y,D.u_image),u_units_to_pixels:new i.Uniform2f(Y,D.u_units_to_pixels),u_scale:new i.Uniform3f(Y,D.u_scale),u_fade:new i.Uniform1f(Y,D.u_fade)}},bo=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_ratio:new i.Uniform1f(Y,D.u_ratio),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_units_to_pixels:new i.Uniform2f(Y,D.u_units_to_pixels),u_patternscale_a:new i.Uniform2f(Y,D.u_patternscale_a),u_patternscale_b:new i.Uniform2f(Y,D.u_patternscale_b),u_sdfgamma:new i.Uniform1f(Y,D.u_sdfgamma),u_image:new i.Uniform1i(Y,D.u_image),u_tex_y_a:new i.Uniform1f(Y,D.u_tex_y_a),u_tex_y_b:new i.Uniform1f(Y,D.u_tex_y_b),u_mix:new i.Uniform1f(Y,D.u_mix)}},Xo=function(Y,D,J){var q=Y.transform;return{u_matrix:ml(Y,D,J),u_ratio:1/As(D,1,q.zoom),u_device_pixel_ratio:i.browser.devicePixelRatio,u_units_to_pixels:[1/q.pixelsToGLUnits[0],1/q.pixelsToGLUnits[1]]}},Ss=function(Y,D,J,q){return i.extend(Xo(Y,D,J),{u_image:0,u_image_height:q})},as=function(Y,D,J,q){var K=Y.transform,de=Ho(D,K);return{u_matrix:ml(Y,D,J),u_texsize:D.imageAtlasTexture.size,u_ratio:1/As(D,1,K.zoom),u_device_pixel_ratio:i.browser.devicePixelRatio,u_image:0,u_scale:[de,q.fromScale,q.toScale],u_fade:q.t,u_units_to_pixels:[1/K.pixelsToGLUnits[0],1/K.pixelsToGLUnits[1]]}},ws=function(Y,D,J,q,K){var de=Y.transform,ne=Y.lineAtlas,we=Ho(D,de),Ue=J.layout.get("line-cap")==="round",ft=ne.getDash(q.from,Ue),Xt=ne.getDash(q.to,Ue),hr=ft.width*K.fromScale,qt=Xt.width*K.toScale;return i.extend(Xo(Y,D,J),{u_patternscale_a:[we/hr,-ft.height/2],u_patternscale_b:[we/qt,-Xt.height/2],u_sdfgamma:ne.width/(Math.min(hr,qt)*256*i.browser.devicePixelRatio)/2,u_image:0,u_tex_y_a:ft.y,u_tex_y_b:Xt.y,u_mix:K.t})};function Ho(Y,D){return 1/As(Y,1,D.tileZoom)}function ml(Y,D,J){return Y.translatePosMatrix(D.tileID.posMatrix,D,J.paint.get("line-translate"),J.paint.get("line-translate-anchor"))}var Ws=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_tl_parent:new i.Uniform2f(Y,D.u_tl_parent),u_scale_parent:new i.Uniform1f(Y,D.u_scale_parent),u_buffer_scale:new i.Uniform1f(Y,D.u_buffer_scale),u_fade_t:new i.Uniform1f(Y,D.u_fade_t),u_opacity:new i.Uniform1f(Y,D.u_opacity),u_image0:new i.Uniform1i(Y,D.u_image0),u_image1:new i.Uniform1i(Y,D.u_image1),u_brightness_low:new i.Uniform1f(Y,D.u_brightness_low),u_brightness_high:new i.Uniform1f(Y,D.u_brightness_high),u_saturation_factor:new i.Uniform1f(Y,D.u_saturation_factor),u_contrast_factor:new i.Uniform1f(Y,D.u_contrast_factor),u_spin_weights:new i.Uniform3f(Y,D.u_spin_weights)}},Ls=function(Y,D,J,q,K){return{u_matrix:Y,u_tl_parent:D,u_scale_parent:J,u_buffer_scale:1,u_fade_t:q.mix,u_opacity:q.opacity*K.paint.get("raster-opacity"),u_image0:0,u_image1:1,u_brightness_low:K.paint.get("raster-brightness-min"),u_brightness_high:K.paint.get("raster-brightness-max"),u_saturation_factor:ys(K.paint.get("raster-saturation")),u_contrast_factor:no(K.paint.get("raster-contrast")),u_spin_weights:va(K.paint.get("raster-hue-rotate"))}};function va(Y){Y*=Math.PI/180;var D=Math.sin(Y),J=Math.cos(Y);return[(2*J+1)/3,(-Math.sqrt(3)*D-J+1)/3,(Math.sqrt(3)*D-J+1)/3]}function no(Y){return Y>0?1/(1-Y):1+Y}function ys(Y){return Y>0?1-1/(1.001-Y):-Y}var rs=function(Y,D){return{u_is_size_zoom_constant:new i.Uniform1i(Y,D.u_is_size_zoom_constant),u_is_size_feature_constant:new i.Uniform1i(Y,D.u_is_size_feature_constant),u_size_t:new i.Uniform1f(Y,D.u_size_t),u_size:new i.Uniform1f(Y,D.u_size),u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_pitch:new i.Uniform1f(Y,D.u_pitch),u_rotate_symbol:new i.Uniform1i(Y,D.u_rotate_symbol),u_aspect_ratio:new i.Uniform1f(Y,D.u_aspect_ratio),u_fade_change:new i.Uniform1f(Y,D.u_fade_change),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_label_plane_matrix:new i.UniformMatrix4f(Y,D.u_label_plane_matrix),u_coord_matrix:new i.UniformMatrix4f(Y,D.u_coord_matrix),u_is_text:new i.Uniform1i(Y,D.u_is_text),u_pitch_with_map:new i.Uniform1i(Y,D.u_pitch_with_map),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_texture:new i.Uniform1i(Y,D.u_texture)}},$l=function(Y,D){return{u_is_size_zoom_constant:new i.Uniform1i(Y,D.u_is_size_zoom_constant),u_is_size_feature_constant:new i.Uniform1i(Y,D.u_is_size_feature_constant),u_size_t:new i.Uniform1f(Y,D.u_size_t),u_size:new i.Uniform1f(Y,D.u_size),u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_pitch:new i.Uniform1f(Y,D.u_pitch),u_rotate_symbol:new i.Uniform1i(Y,D.u_rotate_symbol),u_aspect_ratio:new i.Uniform1f(Y,D.u_aspect_ratio),u_fade_change:new i.Uniform1f(Y,D.u_fade_change),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_label_plane_matrix:new i.UniformMatrix4f(Y,D.u_label_plane_matrix),u_coord_matrix:new i.UniformMatrix4f(Y,D.u_coord_matrix),u_is_text:new i.Uniform1i(Y,D.u_is_text),u_pitch_with_map:new i.Uniform1i(Y,D.u_pitch_with_map),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_texture:new i.Uniform1i(Y,D.u_texture),u_gamma_scale:new i.Uniform1f(Y,D.u_gamma_scale),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_is_halo:new i.Uniform1i(Y,D.u_is_halo)}},Cu=function(Y,D){return{u_is_size_zoom_constant:new i.Uniform1i(Y,D.u_is_size_zoom_constant),u_is_size_feature_constant:new i.Uniform1i(Y,D.u_is_size_feature_constant),u_size_t:new i.Uniform1f(Y,D.u_size_t),u_size:new i.Uniform1f(Y,D.u_size),u_camera_to_center_distance:new i.Uniform1f(Y,D.u_camera_to_center_distance),u_pitch:new i.Uniform1f(Y,D.u_pitch),u_rotate_symbol:new i.Uniform1i(Y,D.u_rotate_symbol),u_aspect_ratio:new i.Uniform1f(Y,D.u_aspect_ratio),u_fade_change:new i.Uniform1f(Y,D.u_fade_change),u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_label_plane_matrix:new i.UniformMatrix4f(Y,D.u_label_plane_matrix),u_coord_matrix:new i.UniformMatrix4f(Y,D.u_coord_matrix),u_is_text:new i.Uniform1i(Y,D.u_is_text),u_pitch_with_map:new i.Uniform1i(Y,D.u_pitch_with_map),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_texsize_icon:new i.Uniform2f(Y,D.u_texsize_icon),u_texture:new i.Uniform1i(Y,D.u_texture),u_texture_icon:new i.Uniform1i(Y,D.u_texture_icon),u_gamma_scale:new i.Uniform1f(Y,D.u_gamma_scale),u_device_pixel_ratio:new i.Uniform1f(Y,D.u_device_pixel_ratio),u_is_halo:new i.Uniform1i(Y,D.u_is_halo)}},Yu=function(Y,D,J,q,K,de,ne,we,Ue,ft){var Xt=K.transform;return{u_is_size_zoom_constant:+(Y==="constant"||Y==="source"),u_is_size_feature_constant:+(Y==="constant"||Y==="camera"),u_size_t:D?D.uSizeT:0,u_size:D?D.uSize:0,u_camera_to_center_distance:Xt.cameraToCenterDistance,u_pitch:Xt.pitch/360*2*Math.PI,u_rotate_symbol:+J,u_aspect_ratio:Xt.width/Xt.height,u_fade_change:K.options.fadeDuration?K.symbolFadeChange:1,u_matrix:de,u_label_plane_matrix:ne,u_coord_matrix:we,u_is_text:+Ue,u_pitch_with_map:+q,u_texsize:ft,u_texture:0}},Nc=function(Y,D,J,q,K,de,ne,we,Ue,ft,Xt){var hr=K.transform;return i.extend(Yu(Y,D,J,q,K,de,ne,we,Ue,ft),{u_gamma_scale:q?Math.cos(hr._pitch)*hr.cameraToCenterDistance:1,u_device_pixel_ratio:i.browser.devicePixelRatio,u_is_halo:+Xt})},pu=function(Y,D,J,q,K,de,ne,we,Ue,ft){return i.extend(Nc(Y,D,J,q,K,de,ne,we,!0,Ue,!0),{u_texsize_icon:ft,u_texture_icon:1})},Uc=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_opacity:new i.Uniform1f(Y,D.u_opacity),u_color:new i.UniformColor(Y,D.u_color)}},xu=function(Y,D){return{u_matrix:new i.UniformMatrix4f(Y,D.u_matrix),u_opacity:new i.Uniform1f(Y,D.u_opacity),u_image:new i.Uniform1i(Y,D.u_image),u_pattern_tl_a:new i.Uniform2f(Y,D.u_pattern_tl_a),u_pattern_br_a:new i.Uniform2f(Y,D.u_pattern_br_a),u_pattern_tl_b:new i.Uniform2f(Y,D.u_pattern_tl_b),u_pattern_br_b:new i.Uniform2f(Y,D.u_pattern_br_b),u_texsize:new i.Uniform2f(Y,D.u_texsize),u_mix:new i.Uniform1f(Y,D.u_mix),u_pattern_size_a:new i.Uniform2f(Y,D.u_pattern_size_a),u_pattern_size_b:new i.Uniform2f(Y,D.u_pattern_size_b),u_scale_a:new i.Uniform1f(Y,D.u_scale_a),u_scale_b:new i.Uniform1f(Y,D.u_scale_b),u_pixel_coord_upper:new i.Uniform2f(Y,D.u_pixel_coord_upper),u_pixel_coord_lower:new i.Uniform2f(Y,D.u_pixel_coord_lower),u_tile_units_to_pixels:new i.Uniform1f(Y,D.u_tile_units_to_pixels)}},Ac=function(Y,D,J){return{u_matrix:Y,u_opacity:D,u_color:J}},Ua=function(Y,D,J,q,K,de){return i.extend(uf(q,de,J,K),{u_matrix:Y,u_opacity:D})},oo={fillExtrusion:Xf,fillExtrusionPattern:Wl,fill:Oc,fillPattern:Tc,fillOutline:wl,fillOutlinePattern:vu,circle:At,collisionBox:Cr,collisionCircle:Ar,debug:Xi,clippingMask:wn,heatmap:Yi,heatmapTexture:Qi,hillshade:$n,hillshadePrepare:Ca,line:Yn,lineGradient:Dn,linePattern:Ka,lineSDF:bo,raster:Ws,symbolIcon:rs,symbolSDF:$l,symbolTextAndIcon:Cu,background:Uc,backgroundPattern:xu},Vc;function hc(Y,D,J,q,K,de,ne){for(var we=Y.context,Ue=we.gl,ft=Y.useProgram("collisionBox"),Xt=[],hr=0,qt=0,Ve=0;Ve<q.length;Ve++){var Qe=q[Ve],at=D.getTile(Qe),Ct=at.getBucket(J);if(Ct){var Ot=Qe.posMatrix;(K[0]!==0||K[1]!==0)&&(Ot=Y.translatePosMatrix(Qe.posMatrix,at,K,de));var Rt=ne?Ct.textCollisionBox:Ct.iconCollisionBox,Bt=Ct.collisionCircleArray;if(Bt.length>0){var Dt=i.create(),yt=Ot;i.mul(Dt,Ct.placementInvProjMatrix,Y.transform.glCoordMatrix),i.mul(Dt,Dt,Ct.placementViewportMatrix),Xt.push({circleArray:Bt,circleOffset:qt,transform:yt,invTransform:Dt}),hr+=Bt.length/4,qt=hr}Rt&&ft.draw(we,Ue.LINES,wi.disabled,Fn.disabled,Y.colorModeForRenderPass(),Er.disabled,Kr(Ot,Y.transform,at),J.id,Rt.layoutVertexBuffer,Rt.indexBuffer,Rt.segments,null,Y.transform.zoom,null,null,Rt.collisionVertexBuffer)}}if(!(!ne||!Xt.length)){var Pt=Y.useProgram("collisionCircle"),ht=new i.StructArrayLayout2f1f2i16;ht.resize(hr*4),ht._trim();for(var ur=0,br=0,Ur=Xt;br<Ur.length;br+=1)for(var Di=Ur[br],fi=0;fi<Di.circleArray.length/4;fi++){var Ti=fi*4,gn=Di.circleArray[Ti+0],rn=Di.circleArray[Ti+1],Ci=Di.circleArray[Ti+2],Bi=Di.circleArray[Ti+3];ht.emplace(ur++,gn,rn,Ci,Bi,0),ht.emplace(ur++,gn,rn,Ci,Bi,1),ht.emplace(ur++,gn,rn,Ci,Bi,2),ht.emplace(ur++,gn,rn,Ci,Bi,3)}(!Vc||Vc.length<hr*2)&&(Vc=Ku(hr));for(var Gi=we.createIndexBuffer(Vc,!0),sn=we.createVertexBuffer(ht,i.collisionCircleLayout.members,!0),zn=0,Ja=Xt;zn<Ja.length;zn+=1){var co=Ja[zn],ts=ki(co.transform,co.invTransform,Y.transform);Pt.draw(we,Ue.TRIANGLES,wi.disabled,Fn.disabled,Y.colorModeForRenderPass(),Er.disabled,ts,J.id,sn,Gi,i.SegmentVector.simpleSegment(0,co.circleOffset*2,co.circleArray.length,co.circleArray.length/2),null,Y.transform.zoom,null,null,null)}sn.destroy(),Gi.destroy()}}function Ku(Y){var D=Y*2,J=new i.StructArrayLayout3ui6;J.resize(D),J._trim();for(var q=0;q<D;q++){var K=q*6;J.uint16[K+0]=q*4+0,J.uint16[K+1]=q*4+1,J.uint16[K+2]=q*4+2,J.uint16[K+3]=q*4+2,J.uint16[K+4]=q*4+3,J.uint16[K+5]=q*4+0}return J}var ue=i.identity(new Float32Array(16));function w(Y,D,J,q,K){if(Y.renderPass==="translucent"){var de=Fn.disabled,ne=Y.colorModeForRenderPass(),we=J.layout.get("text-variable-anchor");we&&Q(q,Y,J,D,J.layout.get("text-rotation-alignment"),J.layout.get("text-pitch-alignment"),K),J.paint.get("icon-opacity").constantOr(1)!==0&&qe(Y,D,J,q,!1,J.paint.get("icon-translate"),J.paint.get("icon-translate-anchor"),J.layout.get("icon-rotation-alignment"),J.layout.get("icon-pitch-alignment"),J.layout.get("icon-keep-upright"),de,ne),J.paint.get("text-opacity").constantOr(1)!==0&&qe(Y,D,J,q,!0,J.paint.get("text-translate"),J.paint.get("text-translate-anchor"),J.layout.get("text-rotation-alignment"),J.layout.get("text-pitch-alignment"),J.layout.get("text-keep-upright"),de,ne),D.map.showCollisionBoxes&&(hc(Y,D,J,q,J.paint.get("text-translate"),J.paint.get("text-translate-anchor"),!0),hc(Y,D,J,q,J.paint.get("icon-translate"),J.paint.get("icon-translate-anchor"),!1))}}function B(Y,D,J,q,K,de){var ne=i.getAnchorAlignment(Y),we=ne.horizontalAlign,Ue=ne.verticalAlign,ft=-(we-.5)*D,Xt=-(Ue-.5)*J,hr=i.evaluateVariableOffset(Y,q);return new i.Point((ft/K+hr[0])*de,(Xt/K+hr[1])*de)}function Q(Y,D,J,q,K,de,ne){for(var we=D.transform,Ue=K==="map",ft=de==="map",Xt=0,hr=Y;Xt<hr.length;Xt+=1){var qt=hr[Xt],Ve=q.getTile(qt),Qe=Ve.getBucket(J);if(!(!Qe||!Qe.text||!Qe.text.segments.get().length)){var at=Qe.textSizeData,Ct=i.evaluateSizeForZoom(at,we.zoom),Ot=As(Ve,1,D.transform.zoom),Rt=_n(qt.posMatrix,ft,Ue,D.transform,Ot),Bt=J.layout.get("icon-text-fit")!=="none"&&Qe.hasIconData();if(Ct){var Dt=Math.pow(2,we.zoom-Ve.tileID.overscaledZ);ee(Qe,Ue,ft,ne,i.symbolSize,we,Rt,qt.posMatrix,Dt,Ct,Bt)}}}}function ee(Y,D,J,q,K,de,ne,we,Ue,ft,Xt){var hr=Y.text.placedSymbolArray,qt=Y.text.dynamicLayoutVertexArray,Ve=Y.icon.dynamicLayoutVertexArray,Qe={};qt.clear();for(var at=0;at<hr.length;at++){var Ct=hr.get(at),Ot=Y.allowVerticalPlacement&&!Ct.placedOrientation,Rt=!Ct.hidden&&Ct.crossTileID&&!Ot?q[Ct.crossTileID]:null;if(!Rt)ul(Ct.numGlyphs,qt);else{var Bt=new i.Point(Ct.anchorX,Ct.anchorY),Dt=Jn(Bt,J?we:ne),yt=Ma(de.cameraToCenterDistance,Dt.signedDistanceFromCamera),Pt=K.evaluateSizeForFeature(Y.textSizeData,ft,Ct)*yt/i.ONE_EM;J&&(Pt*=Y.tilePixelRatio/Ue);for(var ht=Rt.width,ur=Rt.height,br=Rt.anchor,Ur=Rt.textOffset,Di=Rt.textBoxScale,fi=B(br,ht,ur,Ur,Di,Pt),Ti=J?Jn(Bt.add(fi),ne).point:Dt.point.add(D?fi.rotate(-de.angle):fi),gn=Y.allowVerticalPlacement&&Ct.placedOrientation===i.WritingMode.vertical?Math.PI/2:0,rn=0;rn<Ct.numGlyphs;rn++)i.addDynamicAttributes(qt,Ti,gn);Xt&&Ct.associatedIconIndex>=0&&(Qe[Ct.associatedIconIndex]={shiftedAnchor:Ti,angle:gn})}}if(Xt){Ve.clear();for(var Ci=Y.icon.placedSymbolArray,Bi=0;Bi<Ci.length;Bi++){var Gi=Ci.get(Bi);if(Gi.hidden)ul(Gi.numGlyphs,Ve);else{var sn=Qe[Bi];if(!sn)ul(Gi.numGlyphs,Ve);else for(var zn=0;zn<Gi.numGlyphs;zn++)i.addDynamicAttributes(Ve,sn.shiftedAnchor,sn.angle)}}Y.icon.dynamicLayoutVertexBuffer.updateData(Ve)}Y.text.dynamicLayoutVertexBuffer.updateData(qt)}function le(Y,D,J){return J.iconsInText&&D?"symbolTextAndIcon":Y?"symbolSDF":"symbolIcon"}function qe(Y,D,J,q,K,de,ne,we,Ue,ft,Xt,hr){for(var qt=Y.context,Ve=qt.gl,Qe=Y.transform,at=we==="map",Ct=Ue==="map",Ot=at&&J.layout.get("symbol-placement")!=="point",Rt=at&&!Ct&&!Ot,Bt=J.layout.get("symbol-sort-key").constantOr(1)!==void 0,Dt=!1,yt=Y.depthModeForSublayer(0,wi.ReadOnly),Pt=J.layout.get("text-variable-anchor"),ht=[],ur=0,br=q;ur<br.length;ur+=1){var Ur=br[ur],Di=D.getTile(Ur),fi=Di.getBucket(J);if(fi){var Ti=K?fi.text:fi.icon;if(!(!Ti||!Ti.segments.get().length)){var gn=Ti.programConfigurations.get(J.id),rn=K||fi.sdfIcons,Ci=K?fi.textSizeData:fi.iconSizeData,Bi=Ct||Qe.pitch!==0,Gi=Y.useProgram(le(rn,K,fi),gn),sn=i.evaluateSizeForZoom(Ci,Qe.zoom),zn=void 0,Ja=[0,0],co=void 0,ts=void 0,so=null,Zo=void 0;if(K){if(co=Di.glyphAtlasTexture,ts=Ve.LINEAR,zn=Di.glyphAtlasTexture.size,fi.iconsInText){Ja=Di.imageAtlasTexture.size,so=Di.imageAtlasTexture;var ms=Ci.kind==="composite"||Ci.kind==="camera";Zo=Bi||Y.options.rotating||Y.options.zooming||ms?Ve.LINEAR:Ve.NEAREST}}else{var ou=J.layout.get("icon-size").constantOr(0)!==1||fi.iconsNeedLinear;co=Di.imageAtlasTexture,ts=rn||Y.options.rotating||Y.options.zooming||ou||Bi?Ve.LINEAR:Ve.NEAREST,zn=Di.imageAtlasTexture.size}var Cv=As(Di,1,Y.transform.zoom),Lv=_n(Ur.posMatrix,Ct,at,Y.transform,Cv),wd=ya(Ur.posMatrix,Ct,at,Y.transform,Cv),Kv=Pt&&fi.hasTextData(),cg=J.layout.get("icon-text-fit")!=="none"&&Kv&&fi.hasIconData();Ot&&No(fi,Ur.posMatrix,Y,K,Lv,wd,Ct,ft);var pp=Y.translatePosMatrix(Ur.posMatrix,Di,de,ne),Td=Ot||K&&Pt||cg?ue:Lv,gp=Y.translatePosMatrix(wd,Di,de,ne,!0),Vd=rn&&J.paint.get(K?"text-halo-width":"icon-halo-width").constantOr(1)!==0,Ad=void 0;rn?fi.iconsInText?Ad=pu(Ci.kind,sn,Rt,Ct,Y,pp,Td,gp,zn,Ja):Ad=Nc(Ci.kind,sn,Rt,Ct,Y,pp,Td,gp,K,zn,!0):Ad=Yu(Ci.kind,sn,Rt,Ct,Y,pp,Td,gp,K,zn);var Pv={program:Gi,buffers:Ti,uniformValues:Ad,atlasTexture:co,atlasTextureIcon:so,atlasInterpolation:ts,atlasInterpolationIcon:Zo,isSDF:rn,hasHalo:Vd};if(Bt&&fi.canOverlap){Dt=!0;for(var Jv=Ti.segments.get(),Iv=0,ay=Jv;Iv<ay.length;Iv+=1){var fg=ay[Iv];ht.push({segments:new i.SegmentVector([fg]),sortKey:fg.sortKey,state:Pv})}}else ht.push({segments:Ti.segments,sortKey:0,state:Pv})}}}Dt&&ht.sort(function(b1,w1){return b1.sortKey-w1.sortKey});for(var oh=0,hg=ht;oh<hg.length;oh+=1){var oy=hg[oh],jh=oy.state;if(qt.activeTexture.set(Ve.TEXTURE0),jh.atlasTexture.bind(jh.atlasInterpolation,Ve.CLAMP_TO_EDGE),jh.atlasTextureIcon&&(qt.activeTexture.set(Ve.TEXTURE1),jh.atlasTextureIcon&&jh.atlasTextureIcon.bind(jh.atlasInterpolationIcon,Ve.CLAMP_TO_EDGE)),jh.isSDF){var im=jh.uniformValues;jh.hasHalo&&(im.u_is_halo=1,Xe(jh.buffers,oy.segments,J,Y,jh.program,yt,Xt,hr,im)),im.u_is_halo=0}Xe(jh.buffers,oy.segments,J,Y,jh.program,yt,Xt,hr,jh.uniformValues)}}function Xe(Y,D,J,q,K,de,ne,we,Ue){var ft=q.context,Xt=ft.gl;K.draw(ft,Xt.TRIANGLES,de,ne,we,Er.disabled,Ue,J.id,Y.layoutVertexBuffer,Y.indexBuffer,D,J.paint,q.transform.zoom,Y.programConfigurations.get(J.id),Y.dynamicLayoutVertexBuffer,Y.opacityVertexBuffer)}function ot(Y,D,J,q){if(Y.renderPass==="translucent"){var K=J.paint.get("circle-opacity"),de=J.paint.get("circle-stroke-width"),ne=J.paint.get("circle-stroke-opacity"),we=J.layout.get("circle-sort-key").constantOr(1)!==void 0;if(!(K.constantOr(1)===0&&(de.constantOr(1)===0||ne.constantOr(1)===0))){for(var Ue=Y.context,ft=Ue.gl,Xt=Y.depthModeForSublayer(0,wi.ReadOnly),hr=Fn.disabled,qt=Y.colorModeForRenderPass(),Ve=[],Qe=0;Qe<q.length;Qe++){var at=q[Qe],Ct=D.getTile(at),Ot=Ct.getBucket(J);if(Ot){var Rt=Ot.programConfigurations.get(J.id),Bt=Y.useProgram("circle",Rt),Dt=Ot.layoutVertexBuffer,yt=Ot.indexBuffer,Pt=Wt(Y,at,Ct,J),ht={programConfiguration:Rt,program:Bt,layoutVertexBuffer:Dt,indexBuffer:yt,uniformValues:Pt};if(we)for(var ur=Ot.segments.get(),br=0,Ur=ur;br<Ur.length;br+=1){var Di=Ur[br];Ve.push({segments:new i.SegmentVector([Di]),sortKey:Di.sortKey,state:ht})}else Ve.push({segments:Ot.segments,sortKey:0,state:ht})}}we&&Ve.sort(function(co,ts){return co.sortKey-ts.sortKey});for(var fi=0,Ti=Ve;fi<Ti.length;fi+=1){var gn=Ti[fi],rn=gn.state,Ci=rn.programConfiguration,Bi=rn.program,Gi=rn.layoutVertexBuffer,sn=rn.indexBuffer,zn=rn.uniformValues,Ja=gn.segments;Bi.draw(Ue,ft.TRIANGLES,Xt,hr,qt,Er.disabled,zn,J.id,Gi,sn,Ja,J.paint,Y.transform.zoom,Ci)}}}}function Tt(Y,D,J,q){if(J.paint.get("heatmap-opacity")!==0)if(Y.renderPass==="offscreen"){var K=Y.context,de=K.gl,ne=Fn.disabled,we=new wt([de.ONE,de.ONE],i.Color.transparent,[!0,!0,!0,!0]);Yt(K,Y,J),K.clear({color:i.Color.transparent});for(var Ue=0;Ue<q.length;Ue++){var ft=q[Ue];if(!D.hasRenderableParent(ft)){var Xt=D.getTile(ft),hr=Xt.getBucket(J);if(hr){var qt=hr.programConfigurations.get(J.id),Ve=Y.useProgram("heatmap",qt),Qe=Y.transform,at=Qe.zoom;Ve.draw(K,de.TRIANGLES,wi.disabled,ne,we,Er.disabled,on(ft.posMatrix,Xt,at,J.paint.get("heatmap-intensity")),J.id,hr.layoutVertexBuffer,hr.indexBuffer,hr.segments,J.paint,Y.transform.zoom,qt)}}}K.viewport.set([0,0,Y.width,Y.height])}else Y.renderPass==="translucent"&&(Y.context.setColorMode(Y.colorModeForRenderPass()),xr(Y,J))}function Yt(Y,D,J){var q=Y.gl;Y.activeTexture.set(q.TEXTURE1),Y.viewport.set([0,0,D.width/4,D.height/4]);var K=J.heatmapFbo;if(K)q.bindTexture(q.TEXTURE_2D,K.colorAttachment.get()),Y.bindFramebuffer.set(K.framebuffer);else{var de=q.createTexture();q.bindTexture(q.TEXTURE_2D,de),q.texParameteri(q.TEXTURE_2D,q.TEXTURE_WRAP_S,q.CLAMP_TO_EDGE),q.texParameteri(q.TEXTURE_2D,q.TEXTURE_WRAP_T,q.CLAMP_TO_EDGE),q.texParameteri(q.TEXTURE_2D,q.TEXTURE_MIN_FILTER,q.LINEAR),q.texParameteri(q.TEXTURE_2D,q.TEXTURE_MAG_FILTER,q.LINEAR),K=J.heatmapFbo=Y.createFramebuffer(D.width/4,D.height/4,!1),Kt(Y,D,de,K)}}function Kt(Y,D,J,q){var K=Y.gl,de=Y.extRenderToTextureHalfFloat?Y.extTextureHalfFloat.HALF_FLOAT_OES:K.UNSIGNED_BYTE;K.texImage2D(K.TEXTURE_2D,0,K.RGBA,D.width/4,D.height/4,0,K.RGBA,de,null),q.colorAttachment.set(J)}function xr(Y,D){var J=Y.context,q=J.gl,K=D.heatmapFbo;if(K){J.activeTexture.set(q.TEXTURE0),q.bindTexture(q.TEXTURE_2D,K.colorAttachment.get()),J.activeTexture.set(q.TEXTURE1);var de=D.colorRampTexture;de||(de=D.colorRampTexture=new i.Texture(J,D.colorRamp,q.RGBA)),de.bind(q.LINEAR,q.CLAMP_TO_EDGE),Y.useProgram("heatmapTexture").draw(J,q.TRIANGLES,wi.disabled,Fn.disabled,Y.colorModeForRenderPass(),Er.disabled,Fi(Y,D,0,1),D.id,Y.viewportBuffer,Y.quadTriangleIndexBuffer,Y.viewportSegments,D.paint,Y.transform.zoom)}}function Ir(Y,D,J,q){if(Y.renderPass==="translucent"){var K=J.paint.get("line-opacity"),de=J.paint.get("line-width");if(!(K.constantOr(1)===0||de.constantOr(1)===0))for(var ne=Y.depthModeForSublayer(0,wi.ReadOnly),we=Y.colorModeForRenderPass(),Ue=J.paint.get("line-dasharray"),ft=J.paint.get("line-pattern"),Xt=ft.constantOr(1),hr=J.paint.get("line-gradient"),qt=J.getCrossfadeParameters(),Ve=Xt?"linePattern":Ue?"lineSDF":hr?"lineGradient":"line",Qe=Y.context,at=Qe.gl,Ct=!0,Ot=0,Rt=q;Ot<Rt.length;Ot+=1){var Bt=Rt[Ot],Dt=D.getTile(Bt);if(!(Xt&&!Dt.patternsLoaded())){var yt=Dt.getBucket(J);if(yt){var Pt=yt.programConfigurations.get(J.id),ht=Y.context.program.get(),ur=Y.useProgram(Ve,Pt),br=Ct||ur.program!==ht,Ur=ft.constantOr(null);if(Ur&&Dt.imageAtlas){var Di=Dt.imageAtlas,fi=Di.patternPositions[Ur.to.toString()],Ti=Di.patternPositions[Ur.from.toString()];fi&&Ti&&Pt.setConstantPatternPositions(fi,Ti)}var gn=Xt?as(Y,Dt,J,qt):Ue?ws(Y,Dt,J,Ue,qt):hr?Ss(Y,Dt,J,yt.lineClipsArray.length):Xo(Y,Dt,J);if(Xt)Qe.activeTexture.set(at.TEXTURE0),Dt.imageAtlasTexture.bind(at.LINEAR,at.CLAMP_TO_EDGE),Pt.updatePaintBuffers(qt);else if(Ue&&(br||Y.lineAtlas.dirty))Qe.activeTexture.set(at.TEXTURE0),Y.lineAtlas.bind(Qe);else if(hr){var rn=yt.gradients[J.id],Ci=rn.texture;if(J.gradientVersion!==rn.version){var Bi=256;if(J.stepInterpolant){var Gi=D.getSource().maxzoom,sn=Bt.canonical.z===Gi?Math.ceil(1<<Y.transform.maxZoom-Bt.canonical.z):1,zn=yt.maxLineLength/i.EXTENT,Ja=1024,co=zn*Ja*sn;Bi=i.clamp(i.nextPowerOfTwo(co),256,Qe.maxTextureSize)}rn.gradient=i.renderColorRamp({expression:J.gradientExpression(),evaluationKey:"lineProgress",resolution:Bi,image:rn.gradient||void 0,clips:yt.lineClipsArray}),rn.texture?rn.texture.update(rn.gradient):rn.texture=new i.Texture(Qe,rn.gradient,at.RGBA),rn.version=J.gradientVersion,Ci=rn.texture}Qe.activeTexture.set(at.TEXTURE0),Ci.bind(J.stepInterpolant?at.NEAREST:at.LINEAR,at.CLAMP_TO_EDGE)}ur.draw(Qe,at.TRIANGLES,ne,Y.stencilModeForClipping(Bt),we,Er.disabled,gn,J.id,yt.layoutVertexBuffer,yt.indexBuffer,yt.segments,J.paint,Y.transform.zoom,Pt,yt.layoutVertexBuffer2),Ct=!1}}}}}function ve(Y,D,J,q){var K=J.paint.get("fill-color"),de=J.paint.get("fill-opacity");if(de.constantOr(1)!==0){var ne=Y.colorModeForRenderPass(),we=J.paint.get("fill-pattern"),Ue=Y.opaquePassEnabledForLayer()&&!we.constantOr(1)&&K.constantOr(i.Color.transparent).a===1&&de.constantOr(0)===1?"opaque":"translucent";if(Y.renderPass===Ue){var ft=Y.depthModeForSublayer(1,Y.renderPass==="opaque"?wi.ReadWrite:wi.ReadOnly);be(Y,D,J,q,ft,ne,!1)}if(Y.renderPass==="translucent"&&J.paint.get("fill-antialias")){var Xt=Y.depthModeForSublayer(J.getPaintProperty("fill-outline-color")?2:0,wi.ReadOnly);be(Y,D,J,q,Xt,ne,!0)}}}function be(Y,D,J,q,K,de,ne){var we=Y.context.gl,Ue=J.paint.get("fill-pattern"),ft=Ue&&Ue.constantOr(1),Xt=J.getCrossfadeParameters(),hr,qt,Ve,Qe,at;ne?(qt=ft&&!J.getPaintProperty("fill-outline-color")?"fillOutlinePattern":"fillOutline",hr=we.LINES):(qt=ft?"fillPattern":"fill",hr=we.TRIANGLES);for(var Ct=0,Ot=q;Ct<Ot.length;Ct+=1){var Rt=Ot[Ct],Bt=D.getTile(Rt);if(!(ft&&!Bt.patternsLoaded())){var Dt=Bt.getBucket(J);if(Dt){var yt=Dt.programConfigurations.get(J.id),Pt=Y.useProgram(qt,yt);ft&&(Y.context.activeTexture.set(we.TEXTURE0),Bt.imageAtlasTexture.bind(we.LINEAR,we.CLAMP_TO_EDGE),yt.updatePaintBuffers(Xt));var ht=Ue.constantOr(null);if(ht&&Bt.imageAtlas){var ur=Bt.imageAtlas,br=ur.patternPositions[ht.to.toString()],Ur=ur.patternPositions[ht.from.toString()];br&&Ur&&yt.setConstantPatternPositions(br,Ur)}var Di=Y.translatePosMatrix(Rt.posMatrix,Bt,J.paint.get("fill-translate"),J.paint.get("fill-translate-anchor"));if(!ne)Qe=Dt.indexBuffer,at=Dt.segments,Ve=ft?cf(Di,Y,Xt,Bt):qc(Di);else{Qe=Dt.indexBuffer2,at=Dt.segments2;var fi=[we.drawingBufferWidth,we.drawingBufferHeight];Ve=qt==="fillOutlinePattern"&&ft?Bc(Di,Y,Xt,Bt,fi):fc(Di,fi)}Pt.draw(Y.context,hr,K,Y.stencilModeForClipping(Rt),de,Er.disabled,Ve,J.id,Dt.layoutVertexBuffer,Qe,at,J.paint,Y.transform.zoom,yt)}}}}function De(Y,D,J,q){var K=J.paint.get("fill-extrusion-opacity");if(K!==0&&Y.renderPass==="translucent"){var de=new wi(Y.context.gl.LEQUAL,wi.ReadWrite,Y.depthRangeFor3D);if(K===1&&!J.paint.get("fill-extrusion-pattern").constantOr(1)){var ne=Y.colorModeForRenderPass();Be(Y,D,J,q,de,Fn.disabled,ne)}else Be(Y,D,J,q,de,Fn.disabled,wt.disabled),Be(Y,D,J,q,de,Y.stencilModeFor3D(),Y.colorModeForRenderPass())}}function Be(Y,D,J,q,K,de,ne){for(var we=Y.context,Ue=we.gl,ft=J.paint.get("fill-extrusion-pattern"),Xt=ft.constantOr(1),hr=J.getCrossfadeParameters(),qt=J.paint.get("fill-extrusion-opacity"),Ve=0,Qe=q;Ve<Qe.length;Ve+=1){var at=Qe[Ve],Ct=D.getTile(at),Ot=Ct.getBucket(J);if(Ot){var Rt=Ot.programConfigurations.get(J.id),Bt=Y.useProgram(Xt?"fillExtrusionPattern":"fillExtrusion",Rt);Xt&&(Y.context.activeTexture.set(Ue.TEXTURE0),Ct.imageAtlasTexture.bind(Ue.LINEAR,Ue.CLAMP_TO_EDGE),Rt.updatePaintBuffers(hr));var Dt=ft.constantOr(null);if(Dt&&Ct.imageAtlas){var yt=Ct.imageAtlas,Pt=yt.patternPositions[Dt.to.toString()],ht=yt.patternPositions[Dt.from.toString()];Pt&&ht&&Rt.setConstantPatternPositions(Pt,ht)}var ur=Y.translatePosMatrix(at.posMatrix,Ct,J.paint.get("fill-extrusion-translate"),J.paint.get("fill-extrusion-translate-anchor")),br=J.paint.get("fill-extrusion-vertical-gradient"),Ur=Xt?Zu(ur,Y,br,qt,at,hr,Ct):ah(ur,Y,br,qt);Bt.draw(we,we.gl.TRIANGLES,K,de,ne,Er.backCCW,Ur,J.id,Ot.layoutVertexBuffer,Ot.indexBuffer,Ot.segments,J.paint,Y.transform.zoom,Rt)}}}function et(Y,D,J,q){if(!(Y.renderPass!=="offscreen"&&Y.renderPass!=="translucent")){for(var K=Y.context,de=Y.depthModeForSublayer(0,wi.ReadOnly),ne=Y.colorModeForRenderPass(),we=Y.renderPass==="translucent"?Y.stencilConfigForOverlap(q):[{},q],Ue=we[0],ft=we[1],Xt=0,hr=ft;Xt<hr.length;Xt+=1){var qt=hr[Xt],Ve=D.getTile(qt);Ve.needsHillshadePrepare&&Y.renderPass==="offscreen"?it(Y,Ve,J,de,Fn.disabled,ne):Y.renderPass==="translucent"&&We(Y,Ve,J,de,Ue[qt.overscaledZ],ne)}K.viewport.set([0,0,Y.width,Y.height])}}function We(Y,D,J,q,K,de){var ne=Y.context,we=ne.gl,Ue=D.fbo;if(Ue){var ft=Y.useProgram("hillshade");ne.activeTexture.set(we.TEXTURE0),we.bindTexture(we.TEXTURE_2D,Ue.colorAttachment.get());var Xt=Ra(Y,D,J);ft.draw(ne,we.TRIANGLES,q,K,de,Er.disabled,Xt,J.id,Y.rasterBoundsBuffer,Y.quadTriangleIndexBuffer,Y.rasterBoundsSegments)}}function it(Y,D,J,q,K,de){var ne=Y.context,we=ne.gl,Ue=D.dem;if(Ue&&Ue.data){var ft=Ue.dim,Xt=Ue.stride,hr=Ue.getPixels();if(ne.activeTexture.set(we.TEXTURE1),ne.pixelStoreUnpackPremultiplyAlpha.set(!1),D.demTexture=D.demTexture||Y.getTileTexture(Xt),D.demTexture){var qt=D.demTexture;qt.update(hr,{premultiply:!1}),qt.bind(we.NEAREST,we.CLAMP_TO_EDGE)}else D.demTexture=new i.Texture(ne,hr,we.RGBA,{premultiply:!1}),D.demTexture.bind(we.NEAREST,we.CLAMP_TO_EDGE);ne.activeTexture.set(we.TEXTURE0);var Ve=D.fbo;if(!Ve){var Qe=new i.Texture(ne,{width:ft,height:ft,data:null},we.RGBA);Qe.bind(we.LINEAR,we.CLAMP_TO_EDGE),Ve=D.fbo=ne.createFramebuffer(ft,ft,!0),Ve.colorAttachment.set(Qe.texture)}ne.bindFramebuffer.set(Ve.framebuffer),ne.viewport.set([0,0,ft,ft]),Y.useProgram("hillshadePrepare").draw(ne,we.TRIANGLES,q,K,de,Er.disabled,La(D.tileID,Ue),J.id,Y.rasterBoundsBuffer,Y.quadTriangleIndexBuffer,Y.rasterBoundsSegments),D.needsHillshadePrepare=!1}}function Ft(Y,D,J,q){if(Y.renderPass==="translucent"&&J.paint.get("raster-opacity")!==0&&q.length)for(var K=Y.context,de=K.gl,ne=D.getSource(),we=Y.useProgram("raster"),Ue=Y.colorModeForRenderPass(),ft=ne instanceof Ce?[{},q]:Y.stencilConfigForOverlap(q),Xt=ft[0],hr=ft[1],qt=hr[hr.length-1].overscaledZ,Ve=!Y.options.moving,Qe=0,at=hr;Qe<at.length;Qe+=1){var Ct=at[Qe],Ot=Y.depthModeForSublayer(Ct.overscaledZ-qt,J.paint.get("raster-opacity")===1?wi.ReadWrite:wi.ReadOnly,de.LESS),Rt=D.getTile(Ct),Bt=Y.transform.calculatePosMatrix(Ct.toUnwrapped(),Ve);Rt.registerFadeDuration(J.paint.get("raster-fade-duration"));var Dt=D.findLoadedParent(Ct,0),yt=Ht(Rt,Dt,D,J,Y.transform),Pt=void 0,ht=void 0,ur=J.paint.get("raster-resampling")==="nearest"?de.NEAREST:de.LINEAR;K.activeTexture.set(de.TEXTURE0),Rt.texture.bind(ur,de.CLAMP_TO_EDGE,de.LINEAR_MIPMAP_NEAREST),K.activeTexture.set(de.TEXTURE1),Dt?(Dt.texture.bind(ur,de.CLAMP_TO_EDGE,de.LINEAR_MIPMAP_NEAREST),Pt=Math.pow(2,Dt.tileID.overscaledZ-Rt.tileID.overscaledZ),ht=[Rt.tileID.canonical.x*Pt%1,Rt.tileID.canonical.y*Pt%1]):Rt.texture.bind(ur,de.CLAMP_TO_EDGE,de.LINEAR_MIPMAP_NEAREST);var br=Ls(Bt,ht||[0,0],Pt||1,yt,J);ne instanceof Ce?we.draw(K,de.TRIANGLES,Ot,Fn.disabled,Ue,Er.disabled,br,J.id,ne.boundsBuffer,Y.quadTriangleIndexBuffer,ne.boundsSegments):we.draw(K,de.TRIANGLES,Ot,Xt[Ct.overscaledZ],Ue,Er.disabled,br,J.id,Y.rasterBoundsBuffer,Y.quadTriangleIndexBuffer,Y.rasterBoundsSegments)}}function Ht(Y,D,J,q,K){var de=q.paint.get("raster-fade-duration");if(de>0){var ne=i.browser.now(),we=(ne-Y.timeAdded)/de,Ue=D?(ne-D.timeAdded)/de:-1,ft=J.getSource(),Xt=K.coveringZoomLevel({tileSize:ft.tileSize,roundZoom:ft.roundZoom}),hr=!D||Math.abs(D.tileID.overscaledZ-Xt)>Math.abs(Y.tileID.overscaledZ-Xt),qt=hr&&Y.refreshedUponExpiration?1:i.clamp(hr?we:1-Ue,0,1);return Y.refreshedUponExpiration&&we>=1&&(Y.refreshedUponExpiration=!1),D?{opacity:1,mix:1-qt}:{opacity:qt,mix:0}}else return{opacity:1,mix:0}}function tr(Y,D,J){var q=J.paint.get("background-color"),K=J.paint.get("background-opacity");if(K!==0){var de=Y.context,ne=de.gl,we=Y.transform,Ue=we.tileSize,ft=J.paint.get("background-pattern");if(!Y.isPatternMissing(ft)){var Xt=!ft&&q.a===1&&K===1&&Y.opaquePassEnabledForLayer()?"opaque":"translucent";if(Y.renderPass===Xt){var hr=Fn.disabled,qt=Y.depthModeForSublayer(0,Xt==="opaque"?wi.ReadWrite:wi.ReadOnly),Ve=Y.colorModeForRenderPass(),Qe=Y.useProgram(ft?"backgroundPattern":"background"),at=we.coveringTiles({tileSize:Ue});ft&&(de.activeTexture.set(ne.TEXTURE0),Y.imageManager.bind(Y.context));for(var Ct=J.getCrossfadeParameters(),Ot=0,Rt=at;Ot<Rt.length;Ot+=1){var Bt=Rt[Ot],Dt=Y.transform.calculatePosMatrix(Bt.toUnwrapped()),yt=ft?Ua(Dt,K,Y,ft,{tileID:Bt,tileSize:Ue},Ct):Ac(Dt,K,q);Qe.draw(de,ne.TRIANGLES,qt,hr,Ve,Er.disabled,yt,J.id,Y.tileExtentBuffer,Y.quadTriangleIndexBuffer,Y.tileExtentSegments)}}}}}var dr=new i.Color(1,0,0,1),Sr=new i.Color(0,1,0,1),Or=new i.Color(0,0,1,1),Wr=new i.Color(1,0,1,1),ni=new i.Color(0,1,1,1);function Pi(Y){var D=Y.transform.padding,J=3;ln(Y,Y.transform.height-(D.top||0),J,dr),ln(Y,D.bottom||0,J,Sr),Cn(Y,D.left||0,J,Or),Cn(Y,Y.transform.width-(D.right||0),J,Wr);var q=Y.transform.centerPoint;cn(Y,q.x,Y.transform.height-q.y,ni)}function cn(Y,D,J,q){var K=20,de=2;Kn(Y,D-de/2,J-K/2,de,K,q),Kn(Y,D-K/2,J-de/2,K,de,q)}function ln(Y,D,J,q){Kn(Y,0,D+J/2,Y.transform.width,J,q)}function Cn(Y,D,J,q){Kn(Y,D-J/2,0,J,Y.transform.height,q)}function Kn(Y,D,J,q,K,de){var ne=Y.context,we=ne.gl;we.enable(we.SCISSOR_TEST),we.scissor(D*i.browser.devicePixelRatio,J*i.browser.devicePixelRatio,q*i.browser.devicePixelRatio,K*i.browser.devicePixelRatio),ne.clear({color:de}),we.disable(we.SCISSOR_TEST)}function Ta(Y,D,J){for(var q=0;q<J.length;q++)fa(Y,D,J[q])}function fa(Y,D,J){var q=Y.context,K=q.gl,de=J.posMatrix,ne=Y.useProgram("debug"),we=wi.disabled,Ue=Fn.disabled,ft=Y.colorModeForRenderPass(),Xt="$debug";q.activeTexture.set(K.TEXTURE0),Y.emptyTexture.bind(K.LINEAR,K.CLAMP_TO_EDGE),ne.draw(q,K.LINE_STRIP,we,Ue,ft,Er.disabled,dn(de,i.Color.red),Xt,Y.debugBuffer,Y.tileBorderIndexBuffer,Y.debugSegments);var hr=D.getTileByID(J.key).latestRawTileData,qt=hr&&hr.byteLength||0,Ve=Math.floor(qt/1024),Qe=D.getTile(J).tileSize,at=512/Math.min(Qe,512)*(J.overscaledZ/Y.transform.zoom)*.5,Ct=J.canonical.toString();J.overscaledZ!==J.canonical.z&&(Ct+=" => "+J.overscaledZ);var Ot=Ct+" "+Ve+"kb";$a(Y,Ot),ne.draw(q,K.TRIANGLES,we,Ue,wt.alphaBlended,Er.disabled,dn(de,i.Color.transparent,at),Xt,Y.debugBuffer,Y.quadTriangleIndexBuffer,Y.debugSegments)}function $a(Y,D){Y.initDebugOverlayCanvas();var J=Y.debugOverlayCanvas,q=Y.context.gl,K=Y.debugOverlayCanvas.getContext("2d");K.clearRect(0,0,J.width,J.height),K.shadowColor="white",K.shadowBlur=2,K.lineWidth=1.5,K.strokeStyle="white",K.textBaseline="top",K.font="bold 36px Open Sans, sans-serif",K.fillText(D,5,5),K.strokeText(D,5,5),Y.debugOverlayTexture.update(J),Y.debugOverlayTexture.bind(q.LINEAR,q.CLAMP_TO_EDGE)}function Co(Y,D,J){var q=Y.context,K=J.implementation;if(Y.renderPass==="offscreen"){var de=K.prerender;de&&(Y.setCustomLayerDefaults(),q.setColorMode(Y.colorModeForRenderPass()),de.call(K,q.gl,Y.transform.customLayerMatrix()),q.setDirty(),Y.setBaseState())}else if(Y.renderPass==="translucent"){Y.setCustomLayerDefaults(),q.setColorMode(Y.colorModeForRenderPass()),q.setStencilMode(Fn.disabled);var ne=K.renderingMode==="3d"?new wi(Y.context.gl.LEQUAL,wi.ReadWrite,Y.depthRangeFor3D):Y.depthModeForSublayer(0,wi.ReadOnly);q.setDepthMode(ne),K.render(q.gl,Y.transform.customLayerMatrix()),q.setDirty(),Y.setBaseState(),q.bindFramebuffer.set(null)}}var Qa={symbol:w,circle:ot,heatmap:Tt,line:Ir,fill:ve,"fill-extrusion":De,hillshade:et,raster:Ft,background:tr,debug:Ta,custom:Co},mo=function(D,J){this.context=new Xr(D),this.transform=J,this._tileTextures={},this.setup(),this.numSublayers=ri.maxUnderzooming+ri.maxOverzooming+1,this.depthEpsilon=1/Math.pow(2,16),this.crossTileSymbolIndex=new Uf,this.gpuTimers={}};mo.prototype.resize=function(D,J){if(this.width=D*i.browser.devicePixelRatio,this.height=J*i.browser.devicePixelRatio,this.context.viewport.set([0,0,this.width,this.height]),this.style)for(var q=0,K=this.style._order;q<K.length;q+=1){var de=K[q];this.style._layers[de].resize()}},mo.prototype.setup=function(){var D=this.context,J=new i.StructArrayLayout2i4;J.emplaceBack(0,0),J.emplaceBack(i.EXTENT,0),J.emplaceBack(0,i.EXTENT),J.emplaceBack(i.EXTENT,i.EXTENT),this.tileExtentBuffer=D.createVertexBuffer(J,oc.members),this.tileExtentSegments=i.SegmentVector.simpleSegment(0,0,4,2);var q=new i.StructArrayLayout2i4;q.emplaceBack(0,0),q.emplaceBack(i.EXTENT,0),q.emplaceBack(0,i.EXTENT),q.emplaceBack(i.EXTENT,i.EXTENT),this.debugBuffer=D.createVertexBuffer(q,oc.members),this.debugSegments=i.SegmentVector.simpleSegment(0,0,4,5);var K=new i.StructArrayLayout4i8;K.emplaceBack(0,0,0,0),K.emplaceBack(i.EXTENT,0,i.EXTENT,0),K.emplaceBack(0,i.EXTENT,0,i.EXTENT),K.emplaceBack(i.EXTENT,i.EXTENT,i.EXTENT,i.EXTENT),this.rasterBoundsBuffer=D.createVertexBuffer(K,Ee.members),this.rasterBoundsSegments=i.SegmentVector.simpleSegment(0,0,4,2);var de=new i.StructArrayLayout2i4;de.emplaceBack(0,0),de.emplaceBack(1,0),de.emplaceBack(0,1),de.emplaceBack(1,1),this.viewportBuffer=D.createVertexBuffer(de,oc.members),this.viewportSegments=i.SegmentVector.simpleSegment(0,0,4,2);var ne=new i.StructArrayLayout1ui2;ne.emplaceBack(0),ne.emplaceBack(1),ne.emplaceBack(3),ne.emplaceBack(2),ne.emplaceBack(0),this.tileBorderIndexBuffer=D.createIndexBuffer(ne);var we=new i.StructArrayLayout3ui6;we.emplaceBack(0,1,2),we.emplaceBack(2,1,3),this.quadTriangleIndexBuffer=D.createIndexBuffer(we),this.emptyTexture=new i.Texture(D,{width:1,height:1,data:new Uint8Array([0,0,0,0])},D.gl.RGBA);var Ue=this.context.gl;this.stencilClearMode=new Fn({func:Ue.ALWAYS,mask:0},0,255,Ue.ZERO,Ue.ZERO,Ue.ZERO)},mo.prototype.clearStencil=function(){var D=this.context,J=D.gl;this.nextStencilID=1,this.currentStencilSource=void 0;var q=i.create();i.ortho(q,0,this.width,this.height,0,0,1),i.scale(q,q,[J.drawingBufferWidth,J.drawingBufferHeight,0]),this.useProgram("clippingMask").draw(D,J.TRIANGLES,wi.disabled,this.stencilClearMode,wt.disabled,Er.disabled,Nn(q),"$clipping",this.viewportBuffer,this.quadTriangleIndexBuffer,this.viewportSegments)},mo.prototype._renderTileClippingMasks=function(D,J){if(!(this.currentStencilSource===D.source||!D.isTileClipped()||!J||!J.length)){this.currentStencilSource=D.source;var q=this.context,K=q.gl;this.nextStencilID+J.length>256&&this.clearStencil(),q.setColorMode(wt.disabled),q.setDepthMode(wi.disabled);var de=this.useProgram("clippingMask");this._tileClippingMaskIDs={};for(var ne=0,we=J;ne<we.length;ne+=1){var Ue=we[ne],ft=this._tileClippingMaskIDs[Ue.key]=this.nextStencilID++;de.draw(q,K.TRIANGLES,wi.disabled,new Fn({func:K.ALWAYS,mask:0},ft,255,K.KEEP,K.KEEP,K.REPLACE),wt.disabled,Er.disabled,Nn(Ue.posMatrix),"$clipping",this.tileExtentBuffer,this.quadTriangleIndexBuffer,this.tileExtentSegments)}}},mo.prototype.stencilModeFor3D=function(){this.currentStencilSource=void 0,this.nextStencilID+1>256&&this.clearStencil();var D=this.nextStencilID++,J=this.context.gl;return new Fn({func:J.NOTEQUAL,mask:255},D,255,J.KEEP,J.KEEP,J.REPLACE)},mo.prototype.stencilModeForClipping=function(D){var J=this.context.gl;return new Fn({func:J.EQUAL,mask:255},this._tileClippingMaskIDs[D.key],0,J.KEEP,J.KEEP,J.REPLACE)},mo.prototype.stencilConfigForOverlap=function(D){var J,q=this.context.gl,K=D.sort(function(ft,Xt){return Xt.overscaledZ-ft.overscaledZ}),de=K[K.length-1].overscaledZ,ne=K[0].overscaledZ-de+1;if(ne>1){this.currentStencilSource=void 0,this.nextStencilID+ne>256&&this.clearStencil();for(var we={},Ue=0;Ue<ne;Ue++)we[Ue+de]=new Fn({func:q.GEQUAL,mask:255},Ue+this.nextStencilID,255,q.KEEP,q.KEEP,q.REPLACE);return this.nextStencilID+=ne,[we,K]}return[(J={},J[de]=Fn.disabled,J),K]},mo.prototype.colorModeForRenderPass=function(){var D=this.context.gl;if(this._showOverdrawInspector){var J=8,q=1/J;return new wt([D.CONSTANT_COLOR,D.ONE],new i.Color(q,q,q,0),[!0,!0,!0,!0])}else return this.renderPass==="opaque"?wt.unblended:wt.alphaBlended},mo.prototype.depthModeForSublayer=function(D,J,q){if(!this.opaquePassEnabledForLayer())return wi.disabled;var K=1-((1+this.currentLayer)*this.numSublayers+D)*this.depthEpsilon;return new wi(q||this.context.gl.LEQUAL,J,[K,K])},mo.prototype.opaquePassEnabledForLayer=function(){return this.currentLayer<this.opaquePassCutoff},mo.prototype.render=function(D,J){var q=this;this.style=D,this.options=J,this.lineAtlas=D.lineAtlas,this.imageManager=D.imageManager,this.glyphManager=D.glyphManager,this.symbolFadeChange=D.placement.symbolFadeChange(i.browser.now()),this.imageManager.beginFrame();var K=this.style._order,de=this.style.sourceCaches;for(var ne in de){var we=de[ne];we.used&&we.prepare(this.context)}var Ue={},ft={},Xt={};for(var hr in de){var qt=de[hr];Ue[hr]=qt.getVisibleCoordinates(),ft[hr]=Ue[hr].slice().reverse(),Xt[hr]=qt.getVisibleCoordinates(!0).reverse()}this.opaquePassCutoff=1/0;for(var Ve=0;Ve<K.length;Ve++){var Qe=K[Ve];if(this.style._layers[Qe].is3D()){this.opaquePassCutoff=Ve;break}}this.renderPass="offscreen";for(var at=0,Ct=K;at<Ct.length;at+=1){var Ot=Ct[at],Rt=this.style._layers[Ot];if(!(!Rt.hasOffscreenPass()||Rt.isHidden(this.transform.zoom))){var Bt=ft[Rt.source];Rt.type!=="custom"&&!Bt.length||this.renderLayer(this,de[Rt.source],Rt,Bt)}}for(this.context.bindFramebuffer.set(null),this.context.clear({color:J.showOverdrawInspector?i.Color.black:i.Color.transparent,depth:1}),this.clearStencil(),this._showOverdrawInspector=J.showOverdrawInspector,this.depthRangeFor3D=[0,1-(D._order.length+2)*this.numSublayers*this.depthEpsilon],this.renderPass="opaque",this.currentLayer=K.length-1;this.currentLayer>=0;this.currentLayer--){var Dt=this.style._layers[K[this.currentLayer]],yt=de[Dt.source],Pt=Ue[Dt.source];this._renderTileClippingMasks(Dt,Pt),this.renderLayer(this,yt,Dt,Pt)}for(this.renderPass="translucent",this.currentLayer=0;this.currentLayer<K.length;this.currentLayer++){var ht=this.style._layers[K[this.currentLayer]],ur=de[ht.source],br=(ht.type==="symbol"?Xt:ft)[ht.source];this._renderTileClippingMasks(ht,Ue[ht.source]),this.renderLayer(this,ur,ht,br)}if(this.options.showTileBoundaries){var Ur,Di,fi=i.values(this.style._layers);fi.forEach(function(Ti){Ti.source&&!Ti.isHidden(q.transform.zoom)&&(Ti.source!==(Di&&Di.id)&&(Di=q.style.sourceCaches[Ti.source]),(!Ur||Ur.getSource().maxzoom<Di.getSource().maxzoom)&&(Ur=Di))}),Ur&&Qa.debug(this,Ur,Ur.getVisibleCoordinates())}this.options.showPadding&&Pi(this),this.context.setDefault()},mo.prototype.renderLayer=function(D,J,q,K){q.isHidden(this.transform.zoom)||q.type!=="background"&&q.type!=="custom"&&!K.length||(this.id=q.id,this.gpuTimingStart(q),Qa[q.type](D,J,q,K,this.style.placement.variableOffsets),this.gpuTimingEnd())},mo.prototype.gpuTimingStart=function(D){if(this.options.gpuTiming){var J=this.context.extTimerQuery,q=this.gpuTimers[D.id];q||(q=this.gpuTimers[D.id]={calls:0,cpuTime:0,query:J.createQueryEXT()}),q.calls++,J.beginQueryEXT(J.TIME_ELAPSED_EXT,q.query)}},mo.prototype.gpuTimingEnd=function(){if(this.options.gpuTiming){var D=this.context.extTimerQuery;D.endQueryEXT(D.TIME_ELAPSED_EXT)}},mo.prototype.collectGpuTimers=function(){var D=this.gpuTimers;return this.gpuTimers={},D},mo.prototype.queryGpuTimers=function(D){var J={};for(var q in D){var K=D[q],de=this.context.extTimerQuery,ne=de.getQueryObjectEXT(K.query,de.QUERY_RESULT_EXT)/(1e3*1e3);de.deleteQueryEXT(K.query),J[q]=ne}return J},mo.prototype.translatePosMatrix=function(D,J,q,K,de){if(!q[0]&&!q[1])return D;var ne=de?K==="map"?this.transform.angle:0:K==="viewport"?-this.transform.angle:0;if(ne){var we=Math.sin(ne),Ue=Math.cos(ne);q=[q[0]*Ue-q[1]*we,q[0]*we+q[1]*Ue]}var ft=[de?q[0]:As(J,q[0],this.transform.zoom),de?q[1]:As(J,q[1],this.transform.zoom),0],Xt=new Float32Array(16);return i.translate(Xt,D,ft),Xt},mo.prototype.saveTileTexture=function(D){var J=this._tileTextures[D.size[0]];J?J.push(D):this._tileTextures[D.size[0]]=[D]},mo.prototype.getTileTexture=function(D){var J=this._tileTextures[D];return J&&J.length>0?J.pop():null},mo.prototype.isPatternMissing=function(D){if(!D)return!1;if(!D.from||!D.to)return!0;var J=this.imageManager.getPattern(D.from.toString()),q=this.imageManager.getPattern(D.to.toString());return!J||!q},mo.prototype.useProgram=function(D,J){this.cache=this.cache||{};var q=""+D+(J?J.cacheKey:"")+(this._showOverdrawInspector?"/overdraw":"");return this.cache[q]||(this.cache[q]=new Rf(this.context,D,wf[D],J,oo[D],this._showOverdrawInspector)),this.cache[q]},mo.prototype.setCustomLayerDefaults=function(){this.context.unbindVAO(),this.context.cullFace.setDefault(),this.context.activeTexture.setDefault(),this.context.pixelStoreUnpack.setDefault(),this.context.pixelStoreUnpackPremultiplyAlpha.setDefault(),this.context.pixelStoreUnpackFlipY.setDefault()},mo.prototype.setBaseState=function(){var D=this.context.gl;this.context.cullFace.set(!1),this.context.viewport.set([0,0,this.width,this.height]),this.context.blendEquation.set(D.FUNC_ADD)},mo.prototype.initDebugOverlayCanvas=function(){if(this.debugOverlayCanvas==null){this.debugOverlayCanvas=i.window.document.createElement("canvas"),this.debugOverlayCanvas.width=512,this.debugOverlayCanvas.height=512;var D=this.context.gl;this.debugOverlayTexture=new i.Texture(this.context,this.debugOverlayCanvas,D.RGBA)}},mo.prototype.destroy=function(){this.emptyTexture.destroy(),this.debugOverlayTexture&&this.debugOverlayTexture.destroy()};var Bo=function(D,J){this.points=D,this.planes=J};Bo.fromInvProjectionMatrix=function(D,J,q){var K=[[-1,1,-1,1],[1,1,-1,1],[1,-1,-1,1],[-1,-1,-1,1],[-1,1,1,1],[1,1,1,1],[1,-1,1,1],[-1,-1,1,1]],de=Math.pow(2,q),ne=K.map(function(ft){return i.transformMat4([],ft,D)}).map(function(ft){return i.scale$1([],ft,1/ft[3]/J*de)}),we=[[0,1,2],[6,5,4],[0,3,7],[2,1,5],[3,2,6],[0,4,5]],Ue=we.map(function(ft){var Xt=i.sub([],ne[ft[0]],ne[ft[1]]),hr=i.sub([],ne[ft[2]],ne[ft[1]]),qt=i.normalize([],i.cross([],Xt,hr)),Ve=-i.dot(qt,ne[ft[1]]);return qt.concat(Ve)});return new Bo(ne,Ue)};var Ps=function(D,J){this.min=D,this.max=J,this.center=i.scale$2([],i.add([],this.min,this.max),.5)};Ps.prototype.quadrant=function(D){for(var J=[D%2===0,D<2],q=i.clone$2(this.min),K=i.clone$2(this.max),de=0;de<J.length;de++)q[de]=J[de]?this.min[de]:this.center[de],K[de]=J[de]?this.center[de]:this.max[de];return K[2]=this.max[2],new Ps(q,K)},Ps.prototype.distanceX=function(D){var J=Math.max(Math.min(this.max[0],D[0]),this.min[0]);return J-D[0]},Ps.prototype.distanceY=function(D){var J=Math.max(Math.min(this.max[1],D[1]),this.min[1]);return J-D[1]},Ps.prototype.intersects=function(D){for(var J=[[this.min[0],this.min[1],0,1],[this.max[0],this.min[1],0,1],[this.max[0],this.max[1],0,1],[this.min[0],this.max[1],0,1]],q=!0,K=0;K<D.planes.length;K++){for(var de=D.planes[K],ne=0,we=0;we<J.length;we++)ne+=i.dot$1(de,J[we])>=0;if(ne===0)return 0;ne!==J.length&&(q=!1)}if(q)return 2;for(var Ue=0;Ue<3;Ue++){for(var ft=Number.MAX_VALUE,Xt=-Number.MAX_VALUE,hr=0;hr<D.points.length;hr++){var qt=D.points[hr][Ue]-this.min[Ue];ft=Math.min(ft,qt),Xt=Math.max(Xt,qt)}if(Xt<0||ft>this.max[Ue]-this.min[Ue])return 0}return 1};var Ts=function(D,J,q,K){if(D===void 0&&(D=0),J===void 0&&(J=0),q===void 0&&(q=0),K===void 0&&(K=0),isNaN(D)||D<0||isNaN(J)||J<0||isNaN(q)||q<0||isNaN(K)||K<0)throw new Error("Invalid value for edge-insets, top, bottom, left and right must all be numbers");this.top=D,this.bottom=J,this.left=q,this.right=K};Ts.prototype.interpolate=function(D,J,q){return J.top!=null&&D.top!=null&&(this.top=i.number(D.top,J.top,q)),J.bottom!=null&&D.bottom!=null&&(this.bottom=i.number(D.bottom,J.bottom,q)),J.left!=null&&D.left!=null&&(this.left=i.number(D.left,J.left,q)),J.right!=null&&D.right!=null&&(this.right=i.number(D.right,J.right,q)),this},Ts.prototype.getCenter=function(D,J){var q=i.clamp((this.left+D-this.right)/2,0,D),K=i.clamp((this.top+J-this.bottom)/2,0,J);return new i.Point(q,K)},Ts.prototype.equals=function(D){return this.top===D.top&&this.bottom===D.bottom&&this.left===D.left&&this.right===D.right},Ts.prototype.clone=function(){return new Ts(this.top,this.bottom,this.left,this.right)},Ts.prototype.toJSON=function(){return{top:this.top,bottom:this.bottom,left:this.left,right:this.right}};var wo=function(D,J,q,K,de){this.tileSize=512,this.maxValidLatitude=85.051129,this._renderWorldCopies=de===void 0?!0:de,this._minZoom=D||0,this._maxZoom=J||22,this._minPitch=q==null?0:q,this._maxPitch=K==null?60:K,this.setMaxBounds(),this.width=0,this.height=0,this._center=new i.LngLat(0,0),this.zoom=0,this.angle=0,this._fov=.6435011087932844,this._pitch=0,this._unmodified=!0,this._edgeInsets=new Ts,this._posMatrixCache={},this._alignedPosMatrixCache={}},To={minZoom:{configurable:!0},maxZoom:{configurable:!0},minPitch:{configurable:!0},maxPitch:{configurable:!0},renderWorldCopies:{configurable:!0},worldSize:{configurable:!0},centerOffset:{configurable:!0},size:{configurable:!0},bearing:{configurable:!0},pitch:{configurable:!0},fov:{configurable:!0},zoom:{configurable:!0},center:{configurable:!0},padding:{configurable:!0},centerPoint:{configurable:!0},unmodified:{configurable:!0},point:{configurable:!0}};wo.prototype.clone=function(){var D=new wo(this._minZoom,this._maxZoom,this._minPitch,this.maxPitch,this._renderWorldCopies);return D.tileSize=this.tileSize,D.latRange=this.latRange,D.width=this.width,D.height=this.height,D._center=this._center,D.zoom=this.zoom,D.angle=this.angle,D._fov=this._fov,D._pitch=this._pitch,D._unmodified=this._unmodified,D._edgeInsets=this._edgeInsets.clone(),D._calcMatrices(),D},To.minZoom.get=function(){return this._minZoom},To.minZoom.set=function(Y){this._minZoom!==Y&&(this._minZoom=Y,this.zoom=Math.max(this.zoom,Y))},To.maxZoom.get=function(){return this._maxZoom},To.maxZoom.set=function(Y){this._maxZoom!==Y&&(this._maxZoom=Y,this.zoom=Math.min(this.zoom,Y))},To.minPitch.get=function(){return this._minPitch},To.minPitch.set=function(Y){this._minPitch!==Y&&(this._minPitch=Y,this.pitch=Math.max(this.pitch,Y))},To.maxPitch.get=function(){return this._maxPitch},To.maxPitch.set=function(Y){this._maxPitch!==Y&&(this._maxPitch=Y,this.pitch=Math.min(this.pitch,Y))},To.renderWorldCopies.get=function(){return this._renderWorldCopies},To.renderWorldCopies.set=function(Y){Y===void 0?Y=!0:Y===null&&(Y=!1),this._renderWorldCopies=Y},To.worldSize.get=function(){return this.tileSize*this.scale},To.centerOffset.get=function(){return this.centerPoint._sub(this.size._div(2))},To.size.get=function(){return new i.Point(this.width,this.height)},To.bearing.get=function(){return-this.angle/Math.PI*180},To.bearing.set=function(Y){var D=-i.wrap(Y,-180,180)*Math.PI/180;this.angle!==D&&(this._unmodified=!1,this.angle=D,this._calcMatrices(),this.rotationMatrix=i.create$2(),i.rotate(this.rotationMatrix,this.rotationMatrix,this.angle))},To.pitch.get=function(){return this._pitch/Math.PI*180},To.pitch.set=function(Y){var D=i.clamp(Y,this.minPitch,this.maxPitch)/180*Math.PI;this._pitch!==D&&(this._unmodified=!1,this._pitch=D,this._calcMatrices())},To.fov.get=function(){return this._fov/Math.PI*180},To.fov.set=function(Y){Y=Math.max(.01,Math.min(60,Y)),this._fov!==Y&&(this._unmodified=!1,this._fov=Y/180*Math.PI,this._calcMatrices())},To.zoom.get=function(){return this._zoom},To.zoom.set=function(Y){var D=Math.min(Math.max(Y,this.minZoom),this.maxZoom);this._zoom!==D&&(this._unmodified=!1,this._zoom=D,this.scale=this.zoomScale(D),this.tileZoom=Math.floor(D),this.zoomFraction=D-this.tileZoom,this._constrain(),this._calcMatrices())},To.center.get=function(){return this._center},To.center.set=function(Y){Y.lat===this._center.lat&&Y.lng===this._center.lng||(this._unmodified=!1,this._center=Y,this._constrain(),this._calcMatrices())},To.padding.get=function(){return this._edgeInsets.toJSON()},To.padding.set=function(Y){this._edgeInsets.equals(Y)||(this._unmodified=!1,this._edgeInsets.interpolate(this._edgeInsets,Y,1),this._calcMatrices())},To.centerPoint.get=function(){return this._edgeInsets.getCenter(this.width,this.height)},wo.prototype.isPaddingEqual=function(D){return this._edgeInsets.equals(D)},wo.prototype.interpolatePadding=function(D,J,q){this._unmodified=!1,this._edgeInsets.interpolate(D,J,q),this._constrain(),this._calcMatrices()},wo.prototype.coveringZoomLevel=function(D){var J=(D.roundZoom?Math.round:Math.floor)(this.zoom+this.scaleZoom(this.tileSize/D.tileSize));return Math.max(0,J)},wo.prototype.getVisibleUnwrappedCoordinates=function(D){var J=[new i.UnwrappedTileID(0,D)];if(this._renderWorldCopies)for(var q=this.pointCoordinate(new i.Point(0,0)),K=this.pointCoordinate(new i.Point(this.width,0)),de=this.pointCoordinate(new i.Point(this.width,this.height)),ne=this.pointCoordinate(new i.Point(0,this.height)),we=Math.floor(Math.min(q.x,K.x,de.x,ne.x)),Ue=Math.floor(Math.max(q.x,K.x,de.x,ne.x)),ft=1,Xt=we-ft;Xt<=Ue+ft;Xt++)Xt!==0&&J.push(new i.UnwrappedTileID(Xt,D));return J},wo.prototype.coveringTiles=function(D){var J=this.coveringZoomLevel(D),q=J;if(D.minzoom!==void 0&&J<D.minzoom)return[];D.maxzoom!==void 0&&J>D.maxzoom&&(J=D.maxzoom);var K=i.MercatorCoordinate.fromLngLat(this.center),de=Math.pow(2,J),ne=[de*K.x,de*K.y,0],we=Bo.fromInvProjectionMatrix(this.invProjMatrix,this.worldSize,J),Ue=D.minzoom||0;this.pitch<=60&&this._edgeInsets.top<.1&&(Ue=J);var ft=3,Xt=function(fi){return{aabb:new Ps([fi*de,0,0],[(fi+1)*de,de,0]),zoom:0,x:0,y:0,wrap:fi,fullyVisible:!1}},hr=[],qt=[],Ve=J,Qe=D.reparseOverscaled?q:J;if(this._renderWorldCopies)for(var at=1;at<=3;at++)hr.push(Xt(-at)),hr.push(Xt(at));for(hr.push(Xt(0));hr.length>0;){var Ct=hr.pop(),Ot=Ct.x,Rt=Ct.y,Bt=Ct.fullyVisible;if(!Bt){var Dt=Ct.aabb.intersects(we);if(Dt===0)continue;Bt=Dt===2}var yt=Ct.aabb.distanceX(ne),Pt=Ct.aabb.distanceY(ne),ht=Math.max(Math.abs(yt),Math.abs(Pt)),ur=ft+(1<<Ve-Ct.zoom)-2;if(Ct.zoom===Ve||ht>ur&&Ct.zoom>=Ue){qt.push({tileID:new i.OverscaledTileID(Ct.zoom===Ve?Qe:Ct.zoom,Ct.wrap,Ct.zoom,Ot,Rt),distanceSq:i.sqrLen([ne[0]-.5-Ot,ne[1]-.5-Rt])});continue}for(var br=0;br<4;br++){var Ur=(Ot<<1)+br%2,Di=(Rt<<1)+(br>>1);hr.push({aabb:Ct.aabb.quadrant(br),zoom:Ct.zoom+1,x:Ur,y:Di,wrap:Ct.wrap,fullyVisible:Bt})}}return qt.sort(function(fi,Ti){return fi.distanceSq-Ti.distanceSq}).map(function(fi){return fi.tileID})},wo.prototype.resize=function(D,J){this.width=D,this.height=J,this.pixelsToGLUnits=[2/D,-2/J],this._constrain(),this._calcMatrices()},To.unmodified.get=function(){return this._unmodified},wo.prototype.zoomScale=function(D){return Math.pow(2,D)},wo.prototype.scaleZoom=function(D){return Math.log(D)/Math.LN2},wo.prototype.project=function(D){var J=i.clamp(D.lat,-this.maxValidLatitude,this.maxValidLatitude);return new i.Point(i.mercatorXfromLng(D.lng)*this.worldSize,i.mercatorYfromLat(J)*this.worldSize)},wo.prototype.unproject=function(D){return new i.MercatorCoordinate(D.x/this.worldSize,D.y/this.worldSize).toLngLat()},To.point.get=function(){return this.project(this.center)},wo.prototype.setLocationAtPoint=function(D,J){var q=this.pointCoordinate(J),K=this.pointCoordinate(this.centerPoint),de=this.locationCoordinate(D),ne=new i.MercatorCoordinate(de.x-(q.x-K.x),de.y-(q.y-K.y));this.center=this.coordinateLocation(ne),this._renderWorldCopies&&(this.center=this.center.wrap())},wo.prototype.locationPoint=function(D){return this.coordinatePoint(this.locationCoordinate(D))},wo.prototype.pointLocation=function(D){return this.coordinateLocation(this.pointCoordinate(D))},wo.prototype.locationCoordinate=function(D){return i.MercatorCoordinate.fromLngLat(D)},wo.prototype.coordinateLocation=function(D){return D.toLngLat()},wo.prototype.pointCoordinate=function(D){var J=0,q=[D.x,D.y,0,1],K=[D.x,D.y,1,1];i.transformMat4(q,q,this.pixelMatrixInverse),i.transformMat4(K,K,this.pixelMatrixInverse);var de=q[3],ne=K[3],we=q[0]/de,Ue=K[0]/ne,ft=q[1]/de,Xt=K[1]/ne,hr=q[2]/de,qt=K[2]/ne,Ve=hr===qt?0:(J-hr)/(qt-hr);return new i.MercatorCoordinate(i.number(we,Ue,Ve)/this.worldSize,i.number(ft,Xt,Ve)/this.worldSize)},wo.prototype.coordinatePoint=function(D){var J=[D.x*this.worldSize,D.y*this.worldSize,0,1];return i.transformMat4(J,J,this.pixelMatrix),new i.Point(J[0]/J[3],J[1]/J[3])},wo.prototype.getBounds=function(){return new i.LngLatBounds().extend(this.pointLocation(new i.Point(0,0))).extend(this.pointLocation(new i.Point(this.width,0))).extend(this.pointLocation(new i.Point(this.width,this.height))).extend(this.pointLocation(new i.Point(0,this.height)))},wo.prototype.getMaxBounds=function(){return!this.latRange||this.latRange.length!==2||!this.lngRange||this.lngRange.length!==2?null:new i.LngLatBounds([this.lngRange[0],this.latRange[0]],[this.lngRange[1],this.latRange[1]])},wo.prototype.setMaxBounds=function(D){D?(this.lngRange=[D.getWest(),D.getEast()],this.latRange=[D.getSouth(),D.getNorth()],this._constrain()):(this.lngRange=null,this.latRange=[-this.maxValidLatitude,this.maxValidLatitude])},wo.prototype.calculatePosMatrix=function(D,J){J===void 0&&(J=!1);var q=D.key,K=J?this._alignedPosMatrixCache:this._posMatrixCache;if(K[q])return K[q];var de=D.canonical,ne=this.worldSize/this.zoomScale(de.z),we=de.x+Math.pow(2,de.z)*D.wrap,Ue=i.identity(new Float64Array(16));return i.translate(Ue,Ue,[we*ne,de.y*ne,0]),i.scale(Ue,Ue,[ne/i.EXTENT,ne/i.EXTENT,1]),i.multiply(Ue,J?this.alignedProjMatrix:this.projMatrix,Ue),K[q]=new Float32Array(Ue),K[q]},wo.prototype.customLayerMatrix=function(){return this.mercatorMatrix.slice()},wo.prototype._constrain=function(){if(!(!this.center||!this.width||!this.height||this._constraining)){this._constraining=!0;var D=-90,J=90,q=-180,K=180,de,ne,we,Ue,ft=this.size,Xt=this._unmodified;if(this.latRange){var hr=this.latRange;D=i.mercatorYfromLat(hr[1])*this.worldSize,J=i.mercatorYfromLat(hr[0])*this.worldSize,de=J-D<ft.y?ft.y/(J-D):0}if(this.lngRange){var qt=this.lngRange;q=i.mercatorXfromLng(qt[0])*this.worldSize,K=i.mercatorXfromLng(qt[1])*this.worldSize,ne=K-q<ft.x?ft.x/(K-q):0}var Ve=this.point,Qe=Math.max(ne||0,de||0);if(Qe){this.center=this.unproject(new i.Point(ne?(K+q)/2:Ve.x,de?(J+D)/2:Ve.y)),this.zoom+=this.scaleZoom(Qe),this._unmodified=Xt,this._constraining=!1;return}if(this.latRange){var at=Ve.y,Ct=ft.y/2;at-Ct<D&&(Ue=D+Ct),at+Ct>J&&(Ue=J-Ct)}if(this.lngRange){var Ot=Ve.x,Rt=ft.x/2;Ot-Rt<q&&(we=q+Rt),Ot+Rt>K&&(we=K-Rt)}(we!==void 0||Ue!==void 0)&&(this.center=this.unproject(new i.Point(we!==void 0?we:Ve.x,Ue!==void 0?Ue:Ve.y))),this._unmodified=Xt,this._constraining=!1}},wo.prototype._calcMatrices=function(){if(this.height){var D=this._fov/2,J=this.centerOffset;this.cameraToCenterDistance=.5/Math.tan(D)*this.height;var q=Math.PI/2+this._pitch,K=this._fov*(.5+J.y/this.height),de=Math.sin(K)*this.cameraToCenterDistance/Math.sin(i.clamp(Math.PI-q-K,.01,Math.PI-.01)),ne=this.point,we=ne.x,Ue=ne.y,ft=Math.cos(Math.PI/2-this._pitch)*de+this.cameraToCenterDistance,Xt=ft*1.01,hr=this.height/50,qt=new Float64Array(16);i.perspective(qt,this._fov,this.width/this.height,hr,Xt),qt[8]=-J.x*2/this.width,qt[9]=J.y*2/this.height,i.scale(qt,qt,[1,-1,1]),i.translate(qt,qt,[0,0,-this.cameraToCenterDistance]),i.rotateX(qt,qt,this._pitch),i.rotateZ(qt,qt,this.angle),i.translate(qt,qt,[-we,-Ue,0]),this.mercatorMatrix=i.scale([],qt,[this.worldSize,this.worldSize,this.worldSize]),i.scale(qt,qt,[1,1,i.mercatorZfromAltitude(1,this.center.lat)*this.worldSize,1]),this.projMatrix=qt,this.invProjMatrix=i.invert([],this.projMatrix);var Ve=this.width%2/2,Qe=this.height%2/2,at=Math.cos(this.angle),Ct=Math.sin(this.angle),Ot=we-Math.round(we)+at*Ve+Ct*Qe,Rt=Ue-Math.round(Ue)+at*Qe+Ct*Ve,Bt=new Float64Array(qt);if(i.translate(Bt,Bt,[Ot>.5?Ot-1:Ot,Rt>.5?Rt-1:Rt,0]),this.alignedProjMatrix=Bt,qt=i.create(),i.scale(qt,qt,[this.width/2,-this.height/2,1]),i.translate(qt,qt,[1,-1,0]),this.labelPlaneMatrix=qt,qt=i.create(),i.scale(qt,qt,[1,-1,1]),i.translate(qt,qt,[-1,-1,0]),i.scale(qt,qt,[2/this.width,2/this.height,1]),this.glCoordMatrix=qt,this.pixelMatrix=i.multiply(new Float64Array(16),this.labelPlaneMatrix,this.projMatrix),qt=i.invert(new Float64Array(16),this.pixelMatrix),!qt)throw new Error("failed to invert matrix");this.pixelMatrixInverse=qt,this._posMatrixCache={},this._alignedPosMatrixCache={}}},wo.prototype.maxPitchScaleFactor=function(){if(!this.pixelMatrixInverse)return 1;var D=this.pointCoordinate(new i.Point(0,0)),J=[D.x*this.worldSize,D.y*this.worldSize,0,1],q=i.transformMat4(J,J,this.pixelMatrix);return q[3]/this.cameraToCenterDistance},wo.prototype.getCameraPoint=function(){var D=this._pitch,J=Math.tan(D)*(this.cameraToCenterDistance||1);return this.centerPoint.add(new i.Point(0,J))},wo.prototype.getCameraQueryGeometry=function(D){var J=this.getCameraPoint();if(D.length===1)return[D[0],J];for(var q=J.x,K=J.y,de=J.x,ne=J.y,we=0,Ue=D;we<Ue.length;we+=1){var ft=Ue[we];q=Math.min(q,ft.x),K=Math.min(K,ft.y),de=Math.max(de,ft.x),ne=Math.max(ne,ft.y)}return[new i.Point(q,K),new i.Point(de,K),new i.Point(de,ne),new i.Point(q,ne),new i.Point(q,K)]},Object.defineProperties(wo.prototype,To);function hl(Y,D){var J=!1,q=null,K=function(){q=null,J&&(Y(),q=setTimeout(K,D),J=!1)};return function(){return J=!0,q||K(),q}}var Ul=function(D){this._hashName=D&&encodeURIComponent(D),i.bindAll(["_getCurrentHash","_onHashChange","_updateHash"],this),this._updateHash=hl(this._updateHashUnthrottled.bind(this),30*1e3/100)};Ul.prototype.addTo=function(D){return this._map=D,i.window.addEventListener("hashchange",this._onHashChange,!1),this._map.on("moveend",this._updateHash),this},Ul.prototype.remove=function(){return i.window.removeEventListener("hashchange",this._onHashChange,!1),this._map.off("moveend",this._updateHash),clearTimeout(this._updateHash()),delete this._map,this},Ul.prototype.getHashString=function(D){var J=this._map.getCenter(),q=Math.round(this._map.getZoom()*100)/100,K=Math.ceil((q*Math.LN2+Math.log(512/360/.5))/Math.LN10),de=Math.pow(10,K),ne=Math.round(J.lng*de)/de,we=Math.round(J.lat*de)/de,Ue=this._map.getBearing(),ft=this._map.getPitch(),Xt="";if(D?Xt+="/"+ne+"/"+we+"/"+q:Xt+=q+"/"+we+"/"+ne,(Ue||ft)&&(Xt+="/"+Math.round(Ue*10)/10),ft&&(Xt+="/"+Math.round(ft)),this._hashName){var hr=this._hashName,qt=!1,Ve=i.window.location.hash.slice(1).split("&").map(function(Qe){var at=Qe.split("=")[0];return at===hr?(qt=!0,at+"="+Xt):Qe}).filter(function(Qe){return Qe});return qt||Ve.push(hr+"="+Xt),"#"+Ve.join("&")}return"#"+Xt},Ul.prototype._getCurrentHash=function(){var D=this,J=i.window.location.hash.replace("#","");if(this._hashName){var q;return J.split("&").map(function(K){return K.split("=")}).forEach(function(K){K[0]===D._hashName&&(q=K)}),(q&&q[1]||"").split("/")}return J.split("/")},Ul.prototype._onHashChange=function(){var D=this._getCurrentHash();if(D.length>=3&&!D.some(function(q){return isNaN(q)})){var J=this._map.dragRotate.isEnabled()&&this._map.touchZoomRotate.isEnabled()?+(D[3]||0):this._map.getBearing();return this._map.jumpTo({center:[+D[2],+D[1]],zoom:+D[0],bearing:J,pitch:+(D[4]||0)}),!0}return!1},Ul.prototype._updateHashUnthrottled=function(){var D=i.window.location.href.replace(/(#.+)?$/,this.getHashString());try{i.window.history.replaceState(i.window.history.state,null,D)}catch(J){}};var Lu={linearity:.3,easing:i.bezier(0,0,.3,1)},au=i.extend({deceleration:2500,maxSpeed:1400},Lu),Js=i.extend({deceleration:20,maxSpeed:1400},Lu),Ql=i.extend({deceleration:1e3,maxSpeed:360},Lu),dc=i.extend({deceleration:1e3,maxSpeed:90},Lu),Tl=function(D){this._map=D,this.clear()};Tl.prototype.clear=function(){this._inertiaBuffer=[]},Tl.prototype.record=function(D){this._drainInertiaBuffer(),this._inertiaBuffer.push({time:i.browser.now(),settings:D})},Tl.prototype._drainInertiaBuffer=function(){for(var D=this._inertiaBuffer,J=i.browser.now(),q=160;D.length>0&&J-D[0].time>q;)D.shift()},Tl.prototype._onMoveEnd=function(D){if(this._drainInertiaBuffer(),!(this._inertiaBuffer.length<2)){for(var J={zoom:0,bearing:0,pitch:0,pan:new i.Point(0,0),pinchAround:void 0,around:void 0},q=0,K=this._inertiaBuffer;q<K.length;q+=1){var de=K[q],ne=de.settings;J.zoom+=ne.zoomDelta||0,J.bearing+=ne.bearingDelta||0,J.pitch+=ne.pitchDelta||0,ne.panDelta&&J.pan._add(ne.panDelta),ne.around&&(J.around=ne.around),ne.pinchAround&&(J.pinchAround=ne.pinchAround)}var we=this._inertiaBuffer[this._inertiaBuffer.length-1],Ue=we.time-this._inertiaBuffer[0].time,ft={};if(J.pan.mag()){var Xt=X(J.pan.mag(),Ue,i.extend({},au,D||{}));ft.offset=J.pan.mult(Xt.amount/J.pan.mag()),ft.center=this._map.transform.center,Al(ft,Xt)}if(J.zoom){var hr=X(J.zoom,Ue,Js);ft.zoom=this._map.transform.zoom+hr.amount,Al(ft,hr)}if(J.bearing){var qt=X(J.bearing,Ue,Ql);ft.bearing=this._map.transform.bearing+i.clamp(qt.amount,-179,179),Al(ft,qt)}if(J.pitch){var Ve=X(J.pitch,Ue,dc);ft.pitch=this._map.transform.pitch+Ve.amount,Al(ft,Ve)}if(ft.zoom||ft.bearing){var Qe=J.pinchAround===void 0?J.around:J.pinchAround;ft.around=Qe?this._map.unproject(Qe):this._map.getCenter()}return this.clear(),i.extend(ft,{noMoveStart:!0})}};function Al(Y,D){(!Y.duration||Y.duration<D.duration)&&(Y.duration=D.duration,Y.easing=D.easing)}function X(Y,D,J){var q=J.maxSpeed,K=J.linearity,de=J.deceleration,ne=i.clamp(Y*K/(D/1e3),-q,q),we=Math.abs(ne)/(de*K);return{easing:J.easing,duration:we*1e3,amount:ne*(we/2)}}var se=function(Y){function D(q,K,de,ne){ne===void 0&&(ne={});var we=o.mousePos(K.getCanvasContainer(),de),Ue=K.unproject(we);Y.call(this,q,i.extend({point:we,lngLat:Ue,originalEvent:de},ne)),this._defaultPrevented=!1,this.target=K}Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D;var J={defaultPrevented:{configurable:!0}};return D.prototype.preventDefault=function(){this._defaultPrevented=!0},J.defaultPrevented.get=function(){return this._defaultPrevented},Object.defineProperties(D.prototype,J),D}(i.Event),Te=function(Y){function D(q,K,de){var ne=q==="touchend"?de.changedTouches:de.touches,we=o.touchPos(K.getCanvasContainer(),ne),Ue=we.map(function(hr){return K.unproject(hr)}),ft=we.reduce(function(hr,qt,Ve,Qe){return hr.add(qt.div(Qe.length))},new i.Point(0,0)),Xt=K.unproject(ft);Y.call(this,q,{points:we,point:ft,lngLats:Ue,lngLat:Xt,originalEvent:de}),this._defaultPrevented=!1}Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D;var J={defaultPrevented:{configurable:!0}};return D.prototype.preventDefault=function(){this._defaultPrevented=!0},J.defaultPrevented.get=function(){return this._defaultPrevented},Object.defineProperties(D.prototype,J),D}(i.Event),Ne=function(Y){function D(q,K,de){Y.call(this,q,{originalEvent:de}),this._defaultPrevented=!1}Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D;var J={defaultPrevented:{configurable:!0}};return D.prototype.preventDefault=function(){this._defaultPrevented=!0},J.defaultPrevented.get=function(){return this._defaultPrevented},Object.defineProperties(D.prototype,J),D}(i.Event),He=function(D,J){this._map=D,this._clickTolerance=J.clickTolerance};He.prototype.reset=function(){delete this._mousedownPos},He.prototype.wheel=function(D){return this._firePreventable(new Ne(D.type,this._map,D))},He.prototype.mousedown=function(D,J){return this._mousedownPos=J,this._firePreventable(new se(D.type,this._map,D))},He.prototype.mouseup=function(D){this._map.fire(new se(D.type,this._map,D))},He.prototype.click=function(D,J){this._mousedownPos&&this._mousedownPos.dist(J)>=this._clickTolerance||this._map.fire(new se(D.type,this._map,D))},He.prototype.dblclick=function(D){return this._firePreventable(new se(D.type,this._map,D))},He.prototype.mouseover=function(D){this._map.fire(new se(D.type,this._map,D))},He.prototype.mouseout=function(D){this._map.fire(new se(D.type,this._map,D))},He.prototype.touchstart=function(D){return this._firePreventable(new Te(D.type,this._map,D))},He.prototype.touchmove=function(D){this._map.fire(new Te(D.type,this._map,D))},He.prototype.touchend=function(D){this._map.fire(new Te(D.type,this._map,D))},He.prototype.touchcancel=function(D){this._map.fire(new Te(D.type,this._map,D))},He.prototype._firePreventable=function(D){if(this._map.fire(D),D.defaultPrevented)return{}},He.prototype.isEnabled=function(){return!0},He.prototype.isActive=function(){return!1},He.prototype.enable=function(){},He.prototype.disable=function(){};var Ye=function(D){this._map=D};Ye.prototype.reset=function(){this._delayContextMenu=!1,delete this._contextMenuEvent},Ye.prototype.mousemove=function(D){this._map.fire(new se(D.type,this._map,D))},Ye.prototype.mousedown=function(){this._delayContextMenu=!0},Ye.prototype.mouseup=function(){this._delayContextMenu=!1,this._contextMenuEvent&&(this._map.fire(new se("contextmenu",this._map,this._contextMenuEvent)),delete this._contextMenuEvent)},Ye.prototype.contextmenu=function(D){this._delayContextMenu?this._contextMenuEvent=D:this._map.fire(new se(D.type,this._map,D)),this._map.listens("contextmenu")&&D.preventDefault()},Ye.prototype.isEnabled=function(){return!0},Ye.prototype.isActive=function(){return!1},Ye.prototype.enable=function(){},Ye.prototype.disable=function(){};var kt=function(D,J){this._map=D,this._el=D.getCanvasContainer(),this._container=D.getContainer(),this._clickTolerance=J.clickTolerance||1};kt.prototype.isEnabled=function(){return!!this._enabled},kt.prototype.isActive=function(){return!!this._active},kt.prototype.enable=function(){this.isEnabled()||(this._enabled=!0)},kt.prototype.disable=function(){this.isEnabled()&&(this._enabled=!1)},kt.prototype.mousedown=function(D,J){this.isEnabled()&&D.shiftKey&&D.button===0&&(o.disableDrag(),this._startPos=this._lastPos=J,this._active=!0)},kt.prototype.mousemoveWindow=function(D,J){if(this._active){var q=J;if(!(this._lastPos.equals(q)||!this._box&&q.dist(this._startPos)<this._clickTolerance)){var K=this._startPos;this._lastPos=q,this._box||(this._box=o.create("div","mapboxgl-boxzoom",this._container),this._container.classList.add("mapboxgl-crosshair"),this._fireEvent("boxzoomstart",D));var de=Math.min(K.x,q.x),ne=Math.max(K.x,q.x),we=Math.min(K.y,q.y),Ue=Math.max(K.y,q.y);o.setTransform(this._box,"translate("+de+"px,"+we+"px)"),this._box.style.width=ne-de+"px",this._box.style.height=Ue-we+"px"}}},kt.prototype.mouseupWindow=function(D,J){var q=this;if(this._active&&D.button===0){var K=this._startPos,de=J;if(this.reset(),o.suppressClick(),K.x===de.x&&K.y===de.y)this._fireEvent("boxzoomcancel",D);else return this._map.fire(new i.Event("boxzoomend",{originalEvent:D})),{cameraAnimation:function(ne){return ne.fitScreenCoordinates(K,de,q._map.getBearing(),{linear:!0})}}}},kt.prototype.keydown=function(D){this._active&&D.keyCode===27&&(this.reset(),this._fireEvent("boxzoomcancel",D))},kt.prototype.reset=function(){this._active=!1,this._container.classList.remove("mapboxgl-crosshair"),this._box&&(o.remove(this._box),this._box=null),o.enableDrag(),delete this._startPos,delete this._lastPos},kt.prototype._fireEvent=function(D,J){return this._map.fire(new i.Event(D,{originalEvent:J}))};function nt(Y,D){for(var J={},q=0;q<Y.length;q++)J[Y[q].identifier]=D[q];return J}function jt(Y){for(var D=new i.Point(0,0),J=0,q=Y;J<q.length;J+=1){var K=q[J];D._add(K)}return D.div(Y.length)}var gr=500,yr=500,Hr=30,qr=function(D){this.reset(),this.numTouches=D.numTouches};qr.prototype.reset=function(){delete this.centroid,delete this.startTime,delete this.touches,this.aborted=!1},qr.prototype.touchstart=function(D,J,q){(this.centroid||q.length>this.numTouches)&&(this.aborted=!0),!this.aborted&&(this.startTime===void 0&&(this.startTime=D.timeStamp),q.length===this.numTouches&&(this.centroid=jt(J),this.touches=nt(q,J)))},qr.prototype.touchmove=function(D,J,q){if(!(this.aborted||!this.centroid)){var K=nt(q,J);for(var de in this.touches){var ne=this.touches[de],we=K[de];(!we||we.dist(ne)>Hr)&&(this.aborted=!0)}}},qr.prototype.touchend=function(D,J,q){if((!this.centroid||D.timeStamp-this.startTime>yr)&&(this.aborted=!0),q.length===0){var K=!this.aborted&&this.centroid;if(this.reset(),K)return K}};var _i=function(D){this.singleTap=new qr(D),this.numTaps=D.numTaps,this.reset()};_i.prototype.reset=function(){this.lastTime=1/0,delete this.lastTap,this.count=0,this.singleTap.reset()},_i.prototype.touchstart=function(D,J,q){this.singleTap.touchstart(D,J,q)},_i.prototype.touchmove=function(D,J,q){this.singleTap.touchmove(D,J,q)},_i.prototype.touchend=function(D,J,q){var K=this.singleTap.touchend(D,J,q);if(K){var de=D.timeStamp-this.lastTime<gr,ne=!this.lastTap||this.lastTap.dist(K)<Hr;if((!de||!ne)&&this.reset(),this.count++,this.lastTime=D.timeStamp,this.lastTap=K,this.count===this.numTaps)return this.reset(),K}};var bi=function(){this._zoomIn=new _i({numTouches:1,numTaps:2}),this._zoomOut=new _i({numTouches:2,numTaps:1}),this.reset()};bi.prototype.reset=function(){this._active=!1,this._zoomIn.reset(),this._zoomOut.reset()},bi.prototype.touchstart=function(D,J,q){this._zoomIn.touchstart(D,J,q),this._zoomOut.touchstart(D,J,q)},bi.prototype.touchmove=function(D,J,q){this._zoomIn.touchmove(D,J,q),this._zoomOut.touchmove(D,J,q)},bi.prototype.touchend=function(D,J,q){var K=this,de=this._zoomIn.touchend(D,J,q),ne=this._zoomOut.touchend(D,J,q);if(de)return this._active=!0,D.preventDefault(),setTimeout(function(){return K.reset()},0),{cameraAnimation:function(we){return we.easeTo({duration:300,zoom:we.getZoom()+1,around:we.unproject(de)},{originalEvent:D})}};if(ne)return this._active=!0,D.preventDefault(),setTimeout(function(){return K.reset()},0),{cameraAnimation:function(we){return we.easeTo({duration:300,zoom:we.getZoom()-1,around:we.unproject(ne)},{originalEvent:D})}}},bi.prototype.touchcancel=function(){this.reset()},bi.prototype.enable=function(){this._enabled=!0},bi.prototype.disable=function(){this._enabled=!1,this.reset()},bi.prototype.isEnabled=function(){return this._enabled},bi.prototype.isActive=function(){return this._active};var Zr=0,ai=2,gi={};gi[Zr]=1,gi[ai]=2;function Ii(Y,D){var J=gi[D];return Y.buttons===void 0||(Y.buttons&J)!==J}var Si=function(D){this.reset(),this._clickTolerance=D.clickTolerance||1};Si.prototype.reset=function(){this._active=!1,this._moved=!1,delete this._lastPoint,delete this._eventButton},Si.prototype._correctButton=function(D,J){return!1},Si.prototype._move=function(D,J){return{}},Si.prototype.mousedown=function(D,J){if(!this._lastPoint){var q=o.mouseButton(D);this._correctButton(D,q)&&(this._lastPoint=J,this._eventButton=q)}},Si.prototype.mousemoveWindow=function(D,J){var q=this._lastPoint;if(q){if(D.preventDefault(),Ii(D,this._eventButton)){this.reset();return}if(!(!this._moved&&J.dist(q)<this._clickTolerance))return this._moved=!0,this._lastPoint=J,this._move(q,J)}},Si.prototype.mouseupWindow=function(D){if(this._lastPoint){var J=o.mouseButton(D);J===this._eventButton&&(this._moved&&o.suppressClick(),this.reset())}},Si.prototype.enable=function(){this._enabled=!0},Si.prototype.disable=function(){this._enabled=!1,this.reset()},Si.prototype.isEnabled=function(){return this._enabled},Si.prototype.isActive=function(){return this._active};var ei=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.mousedown=function(q,K){Y.prototype.mousedown.call(this,q,K),this._lastPoint&&(this._active=!0)},D.prototype._correctButton=function(q,K){return K===Zr&&!q.ctrlKey},D.prototype._move=function(q,K){return{around:K,panDelta:K.sub(q)}},D}(Si),Ln=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype._correctButton=function(q,K){return K===Zr&&q.ctrlKey||K===ai},D.prototype._move=function(q,K){var de=.8,ne=(K.x-q.x)*de;if(ne)return this._active=!0,{bearingDelta:ne}},D.prototype.contextmenu=function(q){q.preventDefault()},D}(Si),En=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype._correctButton=function(q,K){return K===Zr&&q.ctrlKey||K===ai},D.prototype._move=function(q,K){var de=-.5,ne=(K.y-q.y)*de;if(ne)return this._active=!0,{pitchDelta:ne}},D.prototype.contextmenu=function(q){q.preventDefault()},D}(Si),Un=function(D){this._minTouches=1,this._clickTolerance=D.clickTolerance||1,this.reset()};Un.prototype.reset=function(){this._active=!1,this._touches={},this._sum=new i.Point(0,0)},Un.prototype.touchstart=function(D,J,q){return this._calculateTransform(D,J,q)},Un.prototype.touchmove=function(D,J,q){if(!(!this._active||q.length<this._minTouches))return D.preventDefault(),this._calculateTransform(D,J,q)},Un.prototype.touchend=function(D,J,q){this._calculateTransform(D,J,q),this._active&&q.length<this._minTouches&&this.reset()},Un.prototype.touchcancel=function(){this.reset()},Un.prototype._calculateTransform=function(D,J,q){q.length>0&&(this._active=!0);var K=nt(q,J),de=new i.Point(0,0),ne=new i.Point(0,0),we=0;for(var Ue in K){var ft=K[Ue],Xt=this._touches[Ue];Xt&&(de._add(ft),ne._add(ft.sub(Xt)),we++,K[Ue]=ft)}if(this._touches=K,!(we<this._minTouches||!ne.mag())){var hr=ne.div(we);if(this._sum._add(hr),!(this._sum.mag()<this._clickTolerance)){var qt=de.div(we);return{around:qt,panDelta:hr}}}},Un.prototype.enable=function(){this._enabled=!0},Un.prototype.disable=function(){this._enabled=!1,this.reset()},Un.prototype.isEnabled=function(){return this._enabled},Un.prototype.isActive=function(){return this._active};var ia=function(){this.reset()};ia.prototype.reset=function(){this._active=!1,delete this._firstTwoTouches},ia.prototype._start=function(D){},ia.prototype._move=function(D,J,q){return{}},ia.prototype.touchstart=function(D,J,q){this._firstTwoTouches||q.length<2||(this._firstTwoTouches=[q[0].identifier,q[1].identifier],this._start([J[0],J[1]]))},ia.prototype.touchmove=function(D,J,q){if(this._firstTwoTouches){D.preventDefault();var K=this._firstTwoTouches,de=K[0],ne=K[1],we=Ea(q,J,de),Ue=Ea(q,J,ne);if(!(!we||!Ue)){var ft=this._aroundCenter?null:we.add(Ue).div(2);return this._move([we,Ue],ft,D)}}},ia.prototype.touchend=function(D,J,q){if(this._firstTwoTouches){var K=this._firstTwoTouches,de=K[0],ne=K[1],we=Ea(q,J,de),Ue=Ea(q,J,ne);we&&Ue||(this._active&&o.suppressClick(),this.reset())}},ia.prototype.touchcancel=function(){this.reset()},ia.prototype.enable=function(D){this._enabled=!0,this._aroundCenter=!!D&&D.around==="center"},ia.prototype.disable=function(){this._enabled=!1,this.reset()},ia.prototype.isEnabled=function(){return this._enabled},ia.prototype.isActive=function(){return this._active};function Ea(Y,D,J){for(var q=0;q<Y.length;q++)if(Y[q].identifier===J)return D[q]}var Ia=.1;function yo(Y,D){return Math.log(Y/D)/Math.LN2}var Da=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.reset=function(){Y.prototype.reset.call(this),delete this._distance,delete this._startDistance},D.prototype._start=function(q){this._startDistance=this._distance=q[0].dist(q[1])},D.prototype._move=function(q,K){var de=this._distance;if(this._distance=q[0].dist(q[1]),!(!this._active&&Math.abs(yo(this._distance,this._startDistance))<Ia))return this._active=!0,{zoomDelta:yo(this._distance,de),pinchAround:K}},D}(ia),go=25;function Is(Y,D){return Y.angleWith(D)*180/Math.PI}var Ms=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.reset=function(){Y.prototype.reset.call(this),delete this._minDiameter,delete this._startVector,delete this._vector},D.prototype._start=function(q){this._startVector=this._vector=q[0].sub(q[1]),this._minDiameter=q[0].dist(q[1])},D.prototype._move=function(q,K){var de=this._vector;if(this._vector=q[0].sub(q[1]),!(!this._active&&this._isBelowThreshold(this._vector)))return this._active=!0,{bearingDelta:Is(this._vector,de),pinchAround:K}},D.prototype._isBelowThreshold=function(q){this._minDiameter=Math.min(this._minDiameter,q.mag());var K=Math.PI*this._minDiameter,de=go/K*360,ne=Is(q,this._startVector);return Math.abs(ne)<de},D}(ia);function Xs(Y){return Math.abs(Y.y)>Math.abs(Y.x)}var Gn=100,ja=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.reset=function(){Y.prototype.reset.call(this),this._valid=void 0,delete this._firstMove,delete this._lastPoints},D.prototype._start=function(q){this._lastPoints=q,Xs(q[0].sub(q[1]))&&(this._valid=!1)},D.prototype._move=function(q,K,de){var ne=q[0].sub(this._lastPoints[0]),we=q[1].sub(this._lastPoints[1]);if(this._valid=this.gestureBeginsVertically(ne,we,de.timeStamp),!!this._valid){this._lastPoints=q,this._active=!0;var Ue=(ne.y+we.y)/2,ft=-.5;return{pitchDelta:Ue*ft}}},D.prototype.gestureBeginsVertically=function(q,K,de){if(this._valid!==void 0)return this._valid;var ne=2,we=q.mag()>=ne,Ue=K.mag()>=ne;if(!(!we&&!Ue)){if(!we||!Ue)return this._firstMove===void 0&&(this._firstMove=de),de-this._firstMove<Gn?void 0:!1;var ft=q.y>0==K.y>0;return Xs(q)&&Xs(K)&&ft}},D}(ia),Fo={panStep:100,bearingStep:15,pitchStep:10},Uo=function(){var D=Fo;this._panStep=D.panStep,this._bearingStep=D.bearingStep,this._pitchStep=D.pitchStep,this._rotationDisabled=!1};Uo.prototype.reset=function(){this._active=!1},Uo.prototype.keydown=function(D){var J=this;if(!(D.altKey||D.ctrlKey||D.metaKey)){var q=0,K=0,de=0,ne=0,we=0;switch(D.keyCode){case 61:case 107:case 171:case 187:q=1;break;case 189:case 109:case 173:q=-1;break;case 37:D.shiftKey?K=-1:(D.preventDefault(),ne=-1);break;case 39:D.shiftKey?K=1:(D.preventDefault(),ne=1);break;case 38:D.shiftKey?de=1:(D.preventDefault(),we=-1);break;case 40:D.shiftKey?de=-1:(D.preventDefault(),we=1);break;default:return}return this._rotationDisabled&&(K=0,de=0),{cameraAnimation:function(Ue){var ft=Ue.getZoom();Ue.easeTo({duration:300,easeId:"keyboardHandler",easing:$s,zoom:q?Math.round(ft)+q*(D.shiftKey?2:1):ft,bearing:Ue.getBearing()+K*J._bearingStep,pitch:Ue.getPitch()+de*J._pitchStep,offset:[-ne*J._panStep,-we*J._panStep],center:Ue.getCenter()},{originalEvent:D})}}}},Uo.prototype.enable=function(){this._enabled=!0},Uo.prototype.disable=function(){this._enabled=!1,this.reset()},Uo.prototype.isEnabled=function(){return this._enabled},Uo.prototype.isActive=function(){return this._active},Uo.prototype.disableRotation=function(){this._rotationDisabled=!0},Uo.prototype.enableRotation=function(){this._rotationDisabled=!1};function $s(Y){return Y*(2-Y)}var Sl=4.000244140625,bu=1/100,dl=1/450,Sc=2,Me=function(D,J){this._map=D,this._el=D.getCanvasContainer(),this._handler=J,this._delta=0,this._defaultZoomRate=bu,this._wheelZoomRate=dl,i.bindAll(["_onTimeout"],this)};Me.prototype.setZoomRate=function(D){this._defaultZoomRate=D},Me.prototype.setWheelZoomRate=function(D){this._wheelZoomRate=D},Me.prototype.isEnabled=function(){return!!this._enabled},Me.prototype.isActive=function(){return!!this._active||this._finishTimeout!==void 0},Me.prototype.isZooming=function(){return!!this._zooming},Me.prototype.enable=function(D){this.isEnabled()||(this._enabled=!0,this._aroundCenter=D&&D.around==="center")},Me.prototype.disable=function(){this.isEnabled()&&(this._enabled=!1)},Me.prototype.wheel=function(D){if(this.isEnabled()){var J=D.deltaMode===i.window.WheelEvent.DOM_DELTA_LINE?D.deltaY*40:D.deltaY,q=i.browser.now(),K=q-(this._lastWheelEventTime||0);this._lastWheelEventTime=q,J!==0&&J%Sl===0?this._type="wheel":J!==0&&Math.abs(J)<4?this._type="trackpad":K>400?(this._type=null,this._lastValue=J,this._timeout=setTimeout(this._onTimeout,40,D)):this._type||(this._type=Math.abs(K*J)<200?"trackpad":"wheel",this._timeout&&(clearTimeout(this._timeout),this._timeout=null,J+=this._lastValue)),D.shiftKey&&J&&(J=J/4),this._type&&(this._lastWheelEvent=D,this._delta-=J,this._active||this._start(D)),D.preventDefault()}},Me.prototype._onTimeout=function(D){this._type="wheel",this._delta-=this._lastValue,this._active||this._start(D)},Me.prototype._start=function(D){if(this._delta){this._frameId&&(this._frameId=null),this._active=!0,this.isZooming()||(this._zooming=!0),this._finishTimeout&&(clearTimeout(this._finishTimeout),delete this._finishTimeout);var J=o.mousePos(this._el,D);this._around=i.LngLat.convert(this._aroundCenter?this._map.getCenter():this._map.unproject(J)),this._aroundPoint=this._map.transform.locationPoint(this._around),this._frameId||(this._frameId=!0,this._handler._triggerRenderFrame())}},Me.prototype.renderFrame=function(){var D=this;if(this._frameId&&(this._frameId=null,!!this.isActive())){var J=this._map.transform;if(this._delta!==0){var q=this._type==="wheel"&&Math.abs(this._delta)>Sl?this._wheelZoomRate:this._defaultZoomRate,K=Sc/(1+Math.exp(-Math.abs(this._delta*q)));this._delta<0&&K!==0&&(K=1/K);var de=typeof this._targetZoom=="number"?J.zoomScale(this._targetZoom):J.scale;this._targetZoom=Math.min(J.maxZoom,Math.max(J.minZoom,J.scaleZoom(de*K))),this._type==="wheel"&&(this._startZoom=J.zoom,this._easing=this._smoothOutEasing(200)),this._delta=0}var ne=typeof this._targetZoom=="number"?this._targetZoom:J.zoom,we=this._startZoom,Ue=this._easing,ft=!1,Xt;if(this._type==="wheel"&&we&&Ue){var hr=Math.min((i.browser.now()-this._lastWheelEventTime)/200,1),qt=Ue(hr);Xt=i.number(we,ne,qt),hr<1?this._frameId||(this._frameId=!0):ft=!0}else Xt=ne,ft=!0;return this._active=!0,ft&&(this._active=!1,this._finishTimeout=setTimeout(function(){D._zooming=!1,D._handler._triggerRenderFrame(),delete D._targetZoom,delete D._finishTimeout},200)),{noInertia:!0,needsRenderFrame:!ft,zoomDelta:Xt-J.zoom,around:this._aroundPoint,originalEvent:this._lastWheelEvent}}},Me.prototype._smoothOutEasing=function(D){var J=i.ease;if(this._prevEase){var q=this._prevEase,K=(i.browser.now()-q.start)/q.duration,de=q.easing(K+.01)-q.easing(K),ne=.27/Math.sqrt(de*de+1e-4)*.01,we=Math.sqrt(.27*.27-ne*ne);J=i.bezier(ne,we,.25,1)}return this._prevEase={start:i.browser.now(),duration:D,easing:J},J},Me.prototype.reset=function(){this._active=!1};var bt=function(D,J){this._clickZoom=D,this._tapZoom=J};bt.prototype.enable=function(){this._clickZoom.enable(),this._tapZoom.enable()},bt.prototype.disable=function(){this._clickZoom.disable(),this._tapZoom.disable()},bt.prototype.isEnabled=function(){return this._clickZoom.isEnabled()&&this._tapZoom.isEnabled()},bt.prototype.isActive=function(){return this._clickZoom.isActive()||this._tapZoom.isActive()};var zt=function(){this.reset()};zt.prototype.reset=function(){this._active=!1},zt.prototype.dblclick=function(D,J){return D.preventDefault(),{cameraAnimation:function(q){q.easeTo({duration:300,zoom:q.getZoom()+(D.shiftKey?-1:1),around:q.unproject(J)},{originalEvent:D})}}},zt.prototype.enable=function(){this._enabled=!0},zt.prototype.disable=function(){this._enabled=!1,this.reset()},zt.prototype.isEnabled=function(){return this._enabled},zt.prototype.isActive=function(){return this._active};var Rr=function(){this._tap=new _i({numTouches:1,numTaps:1}),this.reset()};Rr.prototype.reset=function(){this._active=!1,delete this._swipePoint,delete this._swipeTouch,delete this._tapTime,this._tap.reset()},Rr.prototype.touchstart=function(D,J,q){this._swipePoint||(this._tapTime&&D.timeStamp-this._tapTime>gr&&this.reset(),this._tapTime?q.length>0&&(this._swipePoint=J[0],this._swipeTouch=q[0].identifier):this._tap.touchstart(D,J,q))},Rr.prototype.touchmove=function(D,J,q){if(!this._tapTime)this._tap.touchmove(D,J,q);else if(this._swipePoint){if(q[0].identifier!==this._swipeTouch)return;var K=J[0],de=K.y-this._swipePoint.y;return this._swipePoint=K,D.preventDefault(),this._active=!0,{zoomDelta:de/128}}},Rr.prototype.touchend=function(D,J,q){if(this._tapTime)this._swipePoint&&q.length===0&&this.reset();else{var K=this._tap.touchend(D,J,q);K&&(this._tapTime=D.timeStamp)}},Rr.prototype.touchcancel=function(){this.reset()},Rr.prototype.enable=function(){this._enabled=!0},Rr.prototype.disable=function(){this._enabled=!1,this.reset()},Rr.prototype.isEnabled=function(){return this._enabled},Rr.prototype.isActive=function(){return this._active};var jr=function(D,J,q){this._el=D,this._mousePan=J,this._touchPan=q};jr.prototype.enable=function(D){this._inertiaOptions=D||{},this._mousePan.enable(),this._touchPan.enable(),this._el.classList.add("mapboxgl-touch-drag-pan")},jr.prototype.disable=function(){this._mousePan.disable(),this._touchPan.disable(),this._el.classList.remove("mapboxgl-touch-drag-pan")},jr.prototype.isEnabled=function(){return this._mousePan.isEnabled()&&this._touchPan.isEnabled()},jr.prototype.isActive=function(){return this._mousePan.isActive()||this._touchPan.isActive()};var Nr=function(D,J,q){this._pitchWithRotate=D.pitchWithRotate,this._mouseRotate=J,this._mousePitch=q};Nr.prototype.enable=function(){this._mouseRotate.enable(),this._pitchWithRotate&&this._mousePitch.enable()},Nr.prototype.disable=function(){this._mouseRotate.disable(),this._mousePitch.disable()},Nr.prototype.isEnabled=function(){return this._mouseRotate.isEnabled()&&(!this._pitchWithRotate||this._mousePitch.isEnabled())},Nr.prototype.isActive=function(){return this._mouseRotate.isActive()||this._mousePitch.isActive()};var Gr=function(D,J,q,K){this._el=D,this._touchZoom=J,this._touchRotate=q,this._tapDragZoom=K,this._rotationDisabled=!1,this._enabled=!0};Gr.prototype.enable=function(D){this._touchZoom.enable(D),this._rotationDisabled||this._touchRotate.enable(D),this._tapDragZoom.enable(),this._el.classList.add("mapboxgl-touch-zoom-rotate")},Gr.prototype.disable=function(){this._touchZoom.disable(),this._touchRotate.disable(),this._tapDragZoom.disable(),this._el.classList.remove("mapboxgl-touch-zoom-rotate")},Gr.prototype.isEnabled=function(){return this._touchZoom.isEnabled()&&(this._rotationDisabled||this._touchRotate.isEnabled())&&this._tapDragZoom.isEnabled()},Gr.prototype.isActive=function(){return this._touchZoom.isActive()||this._touchRotate.isActive()||this._tapDragZoom.isActive()},Gr.prototype.disableRotation=function(){this._rotationDisabled=!0,this._touchRotate.disable()},Gr.prototype.enableRotation=function(){this._rotationDisabled=!1,this._touchZoom.isEnabled()&&this._touchRotate.enable()};var mi=function(Y){return Y.zoom||Y.drag||Y.pitch||Y.rotate},Ui=function(Y){function D(){Y.apply(this,arguments)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D}(i.Event);function qi(Y){return Y.panDelta&&Y.panDelta.mag()||Y.zoomDelta||Y.bearingDelta||Y.pitchDelta}var Ei=function(D,J){this._map=D,this._el=this._map.getCanvasContainer(),this._handlers=[],this._handlersById={},this._changes=[],this._inertia=new Tl(D),this._bearingSnap=J.bearingSnap,this._previousActiveHandlers={},this._eventsInProgress={},this._addDefaultHandlers(J),i.bindAll(["handleEvent","handleWindowEvent"],this);var q=this._el;this._listeners=[[q,"touchstart",{passive:!0}],[q,"touchmove",{passive:!1}],[q,"touchend",void 0],[q,"touchcancel",void 0],[q,"mousedown",void 0],[q,"mousemove",void 0],[q,"mouseup",void 0],[i.window.document,"mousemove",{capture:!0}],[i.window.document,"mouseup",void 0],[q,"mouseover",void 0],[q,"mouseout",void 0],[q,"dblclick",void 0],[q,"click",void 0],[q,"keydown",{capture:!1}],[q,"keyup",void 0],[q,"wheel",{passive:!1}],[q,"contextmenu",void 0],[i.window,"blur",void 0]];for(var K=0,de=this._listeners;K<de.length;K+=1){var ne=de[K],we=ne[0],Ue=ne[1],ft=ne[2];o.addEventListener(we,Ue,we===i.window.document?this.handleWindowEvent:this.handleEvent,ft)}};Ei.prototype.destroy=function(){for(var D=0,J=this._listeners;D<J.length;D+=1){var q=J[D],K=q[0],de=q[1],ne=q[2];o.removeEventListener(K,de,K===i.window.document?this.handleWindowEvent:this.handleEvent,ne)}},Ei.prototype._addDefaultHandlers=function(D){var J=this._map,q=J.getCanvasContainer();this._add("mapEvent",new He(J,D));var K=J.boxZoom=new kt(J,D);this._add("boxZoom",K);var de=new bi,ne=new zt;J.doubleClickZoom=new bt(ne,de),this._add("tapZoom",de),this._add("clickZoom",ne);var we=new Rr;this._add("tapDragZoom",we);var Ue=J.touchPitch=new ja;this._add("touchPitch",Ue);var ft=new Ln(D),Xt=new En(D);J.dragRotate=new Nr(D,ft,Xt),this._add("mouseRotate",ft,["mousePitch"]),this._add("mousePitch",Xt,["mouseRotate"]);var hr=new ei(D),qt=new Un(D);J.dragPan=new jr(q,hr,qt),this._add("mousePan",hr),this._add("touchPan",qt,["touchZoom","touchRotate"]);var Ve=new Ms,Qe=new Da;J.touchZoomRotate=new Gr(q,Qe,Ve,we),this._add("touchRotate",Ve,["touchPan","touchZoom"]),this._add("touchZoom",Qe,["touchPan","touchRotate"]);var at=J.scrollZoom=new Me(J,this);this._add("scrollZoom",at,["mousePan"]);var Ct=J.keyboard=new Uo;this._add("keyboard",Ct),this._add("blockableMapEvent",new Ye(J));for(var Ot=0,Rt=["boxZoom","doubleClickZoom","tapDragZoom","touchPitch","dragRotate","dragPan","touchZoomRotate","scrollZoom","keyboard"];Ot<Rt.length;Ot+=1){var Bt=Rt[Ot];D.interactive&&D[Bt]&&J[Bt].enable(D[Bt])}},Ei.prototype._add=function(D,J,q){this._handlers.push({handlerName:D,handler:J,allowed:q}),this._handlersById[D]=J},Ei.prototype.stop=function(D){if(!this._updatingCamera){for(var J=0,q=this._handlers;J<q.length;J+=1){var K=q[J],de=K.handler;de.reset()}this._inertia.clear(),this._fireEvents({},{},D),this._changes=[]}},Ei.prototype.isActive=function(){for(var D=0,J=this._handlers;D<J.length;D+=1){var q=J[D],K=q.handler;if(K.isActive())return!0}return!1},Ei.prototype.isZooming=function(){return!!this._eventsInProgress.zoom||this._map.scrollZoom.isZooming()},Ei.prototype.isRotating=function(){return!!this._eventsInProgress.rotate},Ei.prototype.isMoving=function(){return!!mi(this._eventsInProgress)||this.isZooming()},Ei.prototype._blockedByActive=function(D,J,q){for(var K in D)if(K!==q&&(!J||J.indexOf(K)<0))return!0;return!1},Ei.prototype.handleWindowEvent=function(D){this.handleEvent(D,D.type+"Window")},Ei.prototype._getMapTouches=function(D){for(var J=[],q=0,K=D;q<K.length;q+=1){var de=K[q],ne=de.target;this._el.contains(ne)&&J.push(de)}return J},Ei.prototype.handleEvent=function(D,J){if(D.type==="blur"){this.stop(!0);return}this._updatingCamera=!0;for(var q=D.type==="renderFrame"?void 0:D,K={needsRenderFrame:!1},de={},ne={},we=D.touches?this._getMapTouches(D.touches):void 0,Ue=we?o.touchPos(this._el,we):o.mousePos(this._el,D),ft=0,Xt=this._handlers;ft<Xt.length;ft+=1){var hr=Xt[ft],qt=hr.handlerName,Ve=hr.handler,Qe=hr.allowed;if(Ve.isEnabled()){var at=void 0;this._blockedByActive(ne,Qe,qt)?Ve.reset():Ve[J||D.type]&&(at=Ve[J||D.type](D,Ue,we),this.mergeHandlerResult(K,de,at,qt,q),at&&at.needsRenderFrame&&this._triggerRenderFrame()),(at||Ve.isActive())&&(ne[qt]=Ve)}}var Ct={};for(var Ot in this._previousActiveHandlers)ne[Ot]||(Ct[Ot]=q);this._previousActiveHandlers=ne,(Object.keys(Ct).length||qi(K))&&(this._changes.push([K,de,Ct]),this._triggerRenderFrame()),(Object.keys(ne).length||qi(K))&&this._map._stop(!0),this._updatingCamera=!1;var Rt=K.cameraAnimation;Rt&&(this._inertia.clear(),this._fireEvents({},{},!0),this._changes=[],Rt(this._map))},Ei.prototype.mergeHandlerResult=function(D,J,q,K,de){if(q){i.extend(D,q);var ne={handlerName:K,originalEvent:q.originalEvent||de};q.zoomDelta!==void 0&&(J.zoom=ne),q.panDelta!==void 0&&(J.drag=ne),q.pitchDelta!==void 0&&(J.pitch=ne),q.bearingDelta!==void 0&&(J.rotate=ne)}},Ei.prototype._applyChanges=function(){for(var D={},J={},q={},K=0,de=this._changes;K<de.length;K+=1){var ne=de[K],we=ne[0],Ue=ne[1],ft=ne[2];we.panDelta&&(D.panDelta=(D.panDelta||new i.Point(0,0))._add(we.panDelta)),we.zoomDelta&&(D.zoomDelta=(D.zoomDelta||0)+we.zoomDelta),we.bearingDelta&&(D.bearingDelta=(D.bearingDelta||0)+we.bearingDelta),we.pitchDelta&&(D.pitchDelta=(D.pitchDelta||0)+we.pitchDelta),we.around!==void 0&&(D.around=we.around),we.pinchAround!==void 0&&(D.pinchAround=we.pinchAround),we.noInertia&&(D.noInertia=we.noInertia),i.extend(J,Ue),i.extend(q,ft)}this._updateMapTransform(D,J,q),this._changes=[]},Ei.prototype._updateMapTransform=function(D,J,q){var K=this._map,de=K.transform;if(!qi(D))return this._fireEvents(J,q,!0);var ne=D.panDelta,we=D.zoomDelta,Ue=D.bearingDelta,ft=D.pitchDelta,Xt=D.around,hr=D.pinchAround;hr!==void 0&&(Xt=hr),K._stop(!0),Xt=Xt||K.transform.centerPoint;var qt=de.pointLocation(ne?Xt.sub(ne):Xt);Ue&&(de.bearing+=Ue),ft&&(de.pitch+=ft),we&&(de.zoom+=we),de.setLocationAtPoint(qt,Xt),this._map._update(),D.noInertia||this._inertia.record(D),this._fireEvents(J,q,!0)},Ei.prototype._fireEvents=function(D,J,q){var K=this,de=mi(this._eventsInProgress),ne=mi(D),we={};for(var Ue in D){var ft=D[Ue],Xt=ft.originalEvent;this._eventsInProgress[Ue]||(we[Ue+"start"]=Xt),this._eventsInProgress[Ue]=D[Ue]}!de&&ne&&this._fireEvent("movestart",ne.originalEvent);for(var hr in we)this._fireEvent(hr,we[hr]);ne&&this._fireEvent("move",ne.originalEvent);for(var qt in D){var Ve=D[qt],Qe=Ve.originalEvent;this._fireEvent(qt,Qe)}var at={},Ct;for(var Ot in this._eventsInProgress){var Rt=this._eventsInProgress[Ot],Bt=Rt.handlerName,Dt=Rt.originalEvent;this._handlersById[Bt].isActive()||(delete this._eventsInProgress[Ot],Ct=J[Bt]||Dt,at[Ot+"end"]=Ct)}for(var yt in at)this._fireEvent(yt,at[yt]);var Pt=mi(this._eventsInProgress);if(q&&(de||ne)&&!Pt){this._updatingCamera=!0;var ht=this._inertia._onMoveEnd(this._map.dragPan._inertiaOptions),ur=function(br){return br!==0&&-K._bearingSnap<br&&br<K._bearingSnap};ht?(ur(ht.bearing||this._map.getBearing())&&(ht.bearing=0),this._map.easeTo(ht,{originalEvent:Ct})):(this._map.fire(new i.Event("moveend",{originalEvent:Ct})),ur(this._map.getBearing())&&this._map.resetNorth()),this._updatingCamera=!1}},Ei.prototype._fireEvent=function(D,J){this._map.fire(new i.Event(D,J?{originalEvent:J}:{}))},Ei.prototype._requestFrame=function(){var D=this;return this._map.triggerRepaint(),this._map._renderTaskQueue.add(function(J){delete D._frameId,D.handleEvent(new Ui("renderFrame",{timeStamp:J})),D._applyChanges()})},Ei.prototype._triggerRenderFrame=function(){this._frameId===void 0&&(this._frameId=this._requestFrame())};var Hn=function(Y){function D(J,q){Y.call(this),this._moving=!1,this._zooming=!1,this.transform=J,this._bearingSnap=q.bearingSnap,i.bindAll(["_renderFrameCallback"],this)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.getCenter=function(){return new i.LngLat(this.transform.center.lng,this.transform.center.lat)},D.prototype.setCenter=function(q,K){return this.jumpTo({center:q},K)},D.prototype.panBy=function(q,K,de){return q=i.Point.convert(q).mult(-1),this.panTo(this.transform.center,i.extend({offset:q},K),de)},D.prototype.panTo=function(q,K,de){return this.easeTo(i.extend({center:q},K),de)},D.prototype.getZoom=function(){return this.transform.zoom},D.prototype.setZoom=function(q,K){return this.jumpTo({zoom:q},K),this},D.prototype.zoomTo=function(q,K,de){return this.easeTo(i.extend({zoom:q},K),de)},D.prototype.zoomIn=function(q,K){return this.zoomTo(this.getZoom()+1,q,K),this},D.prototype.zoomOut=function(q,K){return this.zoomTo(this.getZoom()-1,q,K),this},D.prototype.getBearing=function(){return this.transform.bearing},D.prototype.setBearing=function(q,K){return this.jumpTo({bearing:q},K),this},D.prototype.getPadding=function(){return this.transform.padding},D.prototype.setPadding=function(q,K){return this.jumpTo({padding:q},K),this},D.prototype.rotateTo=function(q,K,de){return this.easeTo(i.extend({bearing:q},K),de)},D.prototype.resetNorth=function(q,K){return this.rotateTo(0,i.extend({duration:1e3},q),K),this},D.prototype.resetNorthPitch=function(q,K){return this.easeTo(i.extend({bearing:0,pitch:0,duration:1e3},q),K),this},D.prototype.snapToNorth=function(q,K){return Math.abs(this.getBearing())<this._bearingSnap?this.resetNorth(q,K):this},D.prototype.getPitch=function(){return this.transform.pitch},D.prototype.setPitch=function(q,K){return this.jumpTo({pitch:q},K),this},D.prototype.cameraForBounds=function(q,K){q=i.LngLatBounds.convert(q);var de=K&&K.bearing||0;return this._cameraForBoxAndBearing(q.getNorthWest(),q.getSouthEast(),de,K)},D.prototype._cameraForBoxAndBearing=function(q,K,de,ne){var we={top:0,bottom:0,right:0,left:0};if(ne=i.extend({padding:we,offset:[0,0],maxZoom:this.transform.maxZoom},ne),typeof ne.padding=="number"){var Ue=ne.padding;ne.padding={top:Ue,bottom:Ue,right:Ue,left:Ue}}ne.padding=i.extend(we,ne.padding);var ft=this.transform,Xt=ft.padding,hr=ft.project(i.LngLat.convert(q)),qt=ft.project(i.LngLat.convert(K)),Ve=hr.rotate(-de*Math.PI/180),Qe=qt.rotate(-de*Math.PI/180),at=new i.Point(Math.max(Ve.x,Qe.x),Math.max(Ve.y,Qe.y)),Ct=new i.Point(Math.min(Ve.x,Qe.x),Math.min(Ve.y,Qe.y)),Ot=at.sub(Ct),Rt=(ft.width-(Xt.left+Xt.right+ne.padding.left+ne.padding.right))/Ot.x,Bt=(ft.height-(Xt.top+Xt.bottom+ne.padding.top+ne.padding.bottom))/Ot.y;if(Bt<0||Rt<0){i.warnOnce("Map cannot fit within canvas with the given bounds, padding, and/or offset.");return}var Dt=Math.min(ft.scaleZoom(ft.scale*Math.min(Rt,Bt)),ne.maxZoom),yt=typeof ne.offset.x=="number"?new i.Point(ne.offset.x,ne.offset.y):i.Point.convert(ne.offset),Pt=(ne.padding.left-ne.padding.right)/2,ht=(ne.padding.top-ne.padding.bottom)/2,ur=new i.Point(Pt,ht),br=ur.rotate(de*Math.PI/180),Ur=yt.add(br),Di=Ur.mult(ft.scale/ft.zoomScale(Dt)),fi=ft.unproject(hr.add(qt).div(2).sub(Di));return{center:fi,zoom:Dt,bearing:de}},D.prototype.fitBounds=function(q,K,de){return this._fitInternal(this.cameraForBounds(q,K),K,de)},D.prototype.fitScreenCoordinates=function(q,K,de,ne,we){return this._fitInternal(this._cameraForBoxAndBearing(this.transform.pointLocation(i.Point.convert(q)),this.transform.pointLocation(i.Point.convert(K)),de,ne),ne,we)},D.prototype._fitInternal=function(q,K,de){return q?(K=i.extend(q,K),delete K.padding,K.linear?this.easeTo(K,de):this.flyTo(K,de)):this},D.prototype.jumpTo=function(q,K){this.stop();var de=this.transform,ne=!1,we=!1,Ue=!1;return"zoom"in q&&de.zoom!==+q.zoom&&(ne=!0,de.zoom=+q.zoom),q.center!==void 0&&(de.center=i.LngLat.convert(q.center)),"bearing"in q&&de.bearing!==+q.bearing&&(we=!0,de.bearing=+q.bearing),"pitch"in q&&de.pitch!==+q.pitch&&(Ue=!0,de.pitch=+q.pitch),q.padding!=null&&!de.isPaddingEqual(q.padding)&&(de.padding=q.padding),this.fire(new i.Event("movestart",K)).fire(new i.Event("move",K)),ne&&this.fire(new i.Event("zoomstart",K)).fire(new i.Event("zoom",K)).fire(new i.Event("zoomend",K)),we&&this.fire(new i.Event("rotatestart",K)).fire(new i.Event("rotate",K)).fire(new i.Event("rotateend",K)),Ue&&this.fire(new i.Event("pitchstart",K)).fire(new i.Event("pitch",K)).fire(new i.Event("pitchend",K)),this.fire(new i.Event("moveend",K))},D.prototype.easeTo=function(q,K){var de=this;this._stop(!1,q.easeId),q=i.extend({offset:[0,0],duration:500,easing:i.ease},q),(q.animate===!1||!q.essential&&i.browser.prefersReducedMotion)&&(q.duration=0);var ne=this.transform,we=this.getZoom(),Ue=this.getBearing(),ft=this.getPitch(),Xt=this.getPadding(),hr="zoom"in q?+q.zoom:we,qt="bearing"in q?this._normalizeBearing(q.bearing,Ue):Ue,Ve="pitch"in q?+q.pitch:ft,Qe="padding"in q?q.padding:ne.padding,at=i.Point.convert(q.offset),Ct=ne.centerPoint.add(at),Ot=ne.pointLocation(Ct),Rt=i.LngLat.convert(q.center||Ot);this._normalizeCenter(Rt);var Bt=ne.project(Ot),Dt=ne.project(Rt).sub(Bt),yt=ne.zoomScale(hr-we),Pt,ht;q.around&&(Pt=i.LngLat.convert(q.around),ht=ne.locationPoint(Pt));var ur={moving:this._moving,zooming:this._zooming,rotating:this._rotating,pitching:this._pitching};return this._zooming=this._zooming||hr!==we,this._rotating=this._rotating||Ue!==qt,this._pitching=this._pitching||Ve!==ft,this._padding=!ne.isPaddingEqual(Qe),this._easeId=q.easeId,this._prepareEase(K,q.noMoveStart,ur),this._ease(function(br){if(de._zooming&&(ne.zoom=i.number(we,hr,br)),de._rotating&&(ne.bearing=i.number(Ue,qt,br)),de._pitching&&(ne.pitch=i.number(ft,Ve,br)),de._padding&&(ne.interpolatePadding(Xt,Qe,br),Ct=ne.centerPoint.add(at)),Pt)ne.setLocationAtPoint(Pt,ht);else{var Ur=ne.zoomScale(ne.zoom-we),Di=hr>we?Math.min(2,yt):Math.max(.5,yt),fi=Math.pow(Di,1-br),Ti=ne.unproject(Bt.add(Dt.mult(br*fi)).mult(Ur));ne.setLocationAtPoint(ne.renderWorldCopies?Ti.wrap():Ti,Ct)}de._fireMoveEvents(K)},function(br){de._afterEase(K,br)},q),this},D.prototype._prepareEase=function(q,K,de){de===void 0&&(de={}),this._moving=!0,!K&&!de.moving&&this.fire(new i.Event("movestart",q)),this._zooming&&!de.zooming&&this.fire(new i.Event("zoomstart",q)),this._rotating&&!de.rotating&&this.fire(new i.Event("rotatestart",q)),this._pitching&&!de.pitching&&this.fire(new i.Event("pitchstart",q))},D.prototype._fireMoveEvents=function(q){this.fire(new i.Event("move",q)),this._zooming&&this.fire(new i.Event("zoom",q)),this._rotating&&this.fire(new i.Event("rotate",q)),this._pitching&&this.fire(new i.Event("pitch",q))},D.prototype._afterEase=function(q,K){if(!(this._easeId&&K&&this._easeId===K)){delete this._easeId;var de=this._zooming,ne=this._rotating,we=this._pitching;this._moving=!1,this._zooming=!1,this._rotating=!1,this._pitching=!1,this._padding=!1,de&&this.fire(new i.Event("zoomend",q)),ne&&this.fire(new i.Event("rotateend",q)),we&&this.fire(new i.Event("pitchend",q)),this.fire(new i.Event("moveend",q))}},D.prototype.flyTo=function(q,K){var de=this;if(!q.essential&&i.browser.prefersReducedMotion){var ne=i.pick(q,["center","zoom","bearing","pitch","around"]);return this.jumpTo(ne,K)}this.stop(),q=i.extend({offset:[0,0],speed:1.2,curve:1.42,easing:i.ease},q);var we=this.transform,Ue=this.getZoom(),ft=this.getBearing(),Xt=this.getPitch(),hr=this.getPadding(),qt="zoom"in q?i.clamp(+q.zoom,we.minZoom,we.maxZoom):Ue,Ve="bearing"in q?this._normalizeBearing(q.bearing,ft):ft,Qe="pitch"in q?+q.pitch:Xt,at="padding"in q?q.padding:we.padding,Ct=we.zoomScale(qt-Ue),Ot=i.Point.convert(q.offset),Rt=we.centerPoint.add(Ot),Bt=we.pointLocation(Rt),Dt=i.LngLat.convert(q.center||Bt);this._normalizeCenter(Dt);var yt=we.project(Bt),Pt=we.project(Dt).sub(yt),ht=q.curve,ur=Math.max(we.width,we.height),br=ur/Ct,Ur=Pt.mag();if("minZoom"in q){var Di=i.clamp(Math.min(q.minZoom,Ue,qt),we.minZoom,we.maxZoom),fi=ur/we.zoomScale(Di-Ue);ht=Math.sqrt(fi/Ur*2)}var Ti=ht*ht;function gn(so){var Zo=(br*br-ur*ur+(so?-1:1)*Ti*Ti*Ur*Ur)/(2*(so?br:ur)*Ti*Ur);return Math.log(Math.sqrt(Zo*Zo+1)-Zo)}function rn(so){return(Math.exp(so)-Math.exp(-so))/2}function Ci(so){return(Math.exp(so)+Math.exp(-so))/2}function Bi(so){return rn(so)/Ci(so)}var Gi=gn(0),sn=function(so){return Ci(Gi)/Ci(Gi+ht*so)},zn=function(so){return ur*((Ci(Gi)*Bi(Gi+ht*so)-rn(Gi))/Ti)/Ur},Ja=(gn(1)-Gi)/ht;if(Math.abs(Ur)<1e-6||!isFinite(Ja)){if(Math.abs(ur-br)<1e-6)return this.easeTo(q,K);var co=br<ur?-1:1;Ja=Math.abs(Math.log(br/ur))/ht,zn=function(){return 0},sn=function(so){return Math.exp(co*ht*so)}}if("duration"in q)q.duration=+q.duration;else{var ts="screenSpeed"in q?+q.screenSpeed/ht:+q.speed;q.duration=1e3*Ja/ts}return q.maxDuration&&q.duration>q.maxDuration&&(q.duration=0),this._zooming=!0,this._rotating=ft!==Ve,this._pitching=Qe!==Xt,this._padding=!we.isPaddingEqual(at),this._prepareEase(K,!1),this._ease(function(so){var Zo=so*Ja,ms=1/sn(Zo);we.zoom=so===1?qt:Ue+we.scaleZoom(ms),de._rotating&&(we.bearing=i.number(ft,Ve,so)),de._pitching&&(we.pitch=i.number(Xt,Qe,so)),de._padding&&(we.interpolatePadding(hr,at,so),Rt=we.centerPoint.add(Ot));var ou=so===1?Dt:we.unproject(yt.add(Pt.mult(zn(Zo))).mult(ms));we.setLocationAtPoint(we.renderWorldCopies?ou.wrap():ou,Rt),de._fireMoveEvents(K)},function(){return de._afterEase(K)},q),this},D.prototype.isEasing=function(){return!!this._easeFrameId},D.prototype.stop=function(){return this._stop()},D.prototype._stop=function(q,K){if(this._easeFrameId&&(this._cancelRenderFrame(this._easeFrameId),delete this._easeFrameId,delete this._onEaseFrame),this._onEaseEnd){var de=this._onEaseEnd;delete this._onEaseEnd,de.call(this,K)}if(!q){var ne=this.handlers;ne&&ne.stop(!1)}return this},D.prototype._ease=function(q,K,de){de.animate===!1||de.duration===0?(q(1),K()):(this._easeStart=i.browser.now(),this._easeOptions=de,this._onEaseFrame=q,this._onEaseEnd=K,this._easeFrameId=this._requestRenderFrame(this._renderFrameCallback))},D.prototype._renderFrameCallback=function(){var q=Math.min((i.browser.now()-this._easeStart)/this._easeOptions.duration,1);this._onEaseFrame(this._easeOptions.easing(q)),q<1?this._easeFrameId=this._requestRenderFrame(this._renderFrameCallback):this.stop()},D.prototype._normalizeBearing=function(q,K){q=i.wrap(q,-180,180);var de=Math.abs(q-K);return Math.abs(q-360-K)<de&&(q-=360),Math.abs(q+360-K)<de&&(q+=360),q},D.prototype._normalizeCenter=function(q){var K=this.transform;if(!(!K.renderWorldCopies||K.lngRange)){var de=q.lng-K.center.lng;q.lng+=de>180?-360:de<-180?360:0}},D}(i.Evented),en=function(D){D===void 0&&(D={}),this.options=D,i.bindAll(["_toggleAttribution","_updateEditLink","_updateData","_updateCompact"],this)};en.prototype.getDefaultPosition=function(){return"bottom-right"},en.prototype.onAdd=function(D){var J=this.options&&this.options.compact;return this._map=D,this._container=o.create("div","mapboxgl-ctrl mapboxgl-ctrl-attrib"),this._compactButton=o.create("button","mapboxgl-ctrl-attrib-button",this._container),this._compactButton.addEventListener("click",this._toggleAttribution),this._setElementTitle(this._compactButton,"ToggleAttribution"),this._innerContainer=o.create("div","mapboxgl-ctrl-attrib-inner",this._container),this._innerContainer.setAttribute("role","list"),J&&this._container.classList.add("mapboxgl-compact"),this._updateAttributions(),this._updateEditLink(),this._map.on("styledata",this._updateData),this._map.on("sourcedata",this._updateData),this._map.on("moveend",this._updateEditLink),J===void 0&&(this._map.on("resize",this._updateCompact),this._updateCompact()),this._container},en.prototype.onRemove=function(){o.remove(this._container),this._map.off("styledata",this._updateData),this._map.off("sourcedata",this._updateData),this._map.off("moveend",this._updateEditLink),this._map.off("resize",this._updateCompact),this._map=void 0,this._attribHTML=void 0},en.prototype._setElementTitle=function(D,J){var q=this._map._getUIString("AttributionControl."+J);D.title=q,D.setAttribute("aria-label",q)},en.prototype._toggleAttribution=function(){this._container.classList.contains("mapboxgl-compact-show")?(this._container.classList.remove("mapboxgl-compact-show"),this._compactButton.setAttribute("aria-pressed","false")):(this._container.classList.add("mapboxgl-compact-show"),this._compactButton.setAttribute("aria-pressed","true"))},en.prototype._updateEditLink=function(){var D=this._editLink;D||(D=this._editLink=this._container.querySelector(".mapbox-improve-map"));var J=[{key:"owner",value:this.styleOwner},{key:"id",value:this.styleId},{key:"access_token",value:this._map._requestManager._customAccessToken||i.config.ACCESS_TOKEN}];if(D){var q=J.reduce(function(K,de,ne){return de.value&&(K+=de.key+"="+de.value+(ne<J.length-1?"&":"")),K},"?");D.href=i.config.FEEDBACK_URL+"/"+q+(this._map._hash?this._map._hash.getHashString(!0):""),D.rel="noopener nofollow",this._setElementTitle(D,"MapFeedback")}},en.prototype._updateData=function(D){D&&(D.sourceDataType==="metadata"||D.sourceDataType==="visibility"||D.dataType==="style")&&(this._updateAttributions(),this._updateEditLink())},en.prototype._updateAttributions=function(){if(this._map.style){var D=[];if(this.options.customAttribution&&(Array.isArray(this.options.customAttribution)?D=D.concat(this.options.customAttribution.map(function(Ue){return typeof Ue!="string"?"":Ue})):typeof this.options.customAttribution=="string"&&D.push(this.options.customAttribution)),this._map.style.stylesheet){var J=this._map.style.stylesheet;this.styleOwner=J.owner,this.styleId=J.id}var q=this._map.style.sourceCaches;for(var K in q){var de=q[K];if(de.used){var ne=de.getSource();ne.attribution&&D.indexOf(ne.attribution)<0&&D.push(ne.attribution)}}D.sort(function(Ue,ft){return Ue.length-ft.length}),D=D.filter(function(Ue,ft){for(var Xt=ft+1;Xt<D.length;Xt++)if(D[Xt].indexOf(Ue)>=0)return!1;return!0});var we=D.join(" | ");we!==this._attribHTML&&(this._attribHTML=we,D.length?(this._innerContainer.innerHTML=we,this._container.classList.remove("mapboxgl-attrib-empty")):this._container.classList.add("mapboxgl-attrib-empty"),this._editLink=null)}},en.prototype._updateCompact=function(){this._map.getCanvasContainer().offsetWidth<=640?this._container.classList.add("mapboxgl-compact"):this._container.classList.remove("mapboxgl-compact","mapboxgl-compact-show")};var Wi=function(){i.bindAll(["_updateLogo"],this),i.bindAll(["_updateCompact"],this)};Wi.prototype.onAdd=function(D){this._map=D,this._container=o.create("div","mapboxgl-ctrl");var J=o.create("a","mapboxgl-ctrl-logo");return J.target="_blank",J.rel="noopener nofollow",J.href="https://www.mapbox.com/",J.setAttribute("aria-label",this._map._getUIString("LogoControl.Title")),J.setAttribute("rel","noopener nofollow"),this._container.appendChild(J),this._container.style.display="none",this._map.on("sourcedata",this._updateLogo),this._updateLogo(),this._map.on("resize",this._updateCompact),this._updateCompact(),this._container},Wi.prototype.onRemove=function(){o.remove(this._container),this._map.off("sourcedata",this._updateLogo),this._map.off("resize",this._updateCompact)},Wi.prototype.getDefaultPosition=function(){return"bottom-left"},Wi.prototype._updateLogo=function(D){(!D||D.sourceDataType==="metadata")&&(this._container.style.display=this._logoRequired()?"block":"none")},Wi.prototype._logoRequired=function(){if(this._map.style){var D=this._map.style.sourceCaches;for(var J in D){var q=D[J].getSource();if(q.mapbox_logo)return!0}return!1}},Wi.prototype._updateCompact=function(){var D=this._container.children;if(D.length){var J=D[0];this._map.getCanvasContainer().offsetWidth<250?J.classList.add("mapboxgl-compact"):J.classList.remove("mapboxgl-compact")}};var si=function(){this._queue=[],this._id=0,this._cleared=!1,this._currentlyRunning=!1};si.prototype.add=function(D){var J=++this._id,q=this._queue;return q.push({callback:D,id:J,cancelled:!1}),J},si.prototype.remove=function(D){for(var J=this._currentlyRunning,q=J?this._queue.concat(J):this._queue,K=0,de=q;K<de.length;K+=1){var ne=de[K];if(ne.id===D){ne.cancelled=!0;return}}},si.prototype.run=function(D){D===void 0&&(D=0);var J=this._currentlyRunning=this._queue;this._queue=[];for(var q=0,K=J;q<K.length;q+=1){var de=K[q];if(!de.cancelled&&(de.callback(D),this._cleared))break}this._cleared=!1,this._currentlyRunning=!1},si.prototype.clear=function(){this._currentlyRunning&&(this._cleared=!0),this._queue=[]};var Mr={"AttributionControl.ToggleAttribution":"Toggle attribution","AttributionControl.MapFeedback":"Map feedback","FullscreenControl.Enter":"Enter fullscreen","FullscreenControl.Exit":"Exit fullscreen","GeolocateControl.FindMyLocation":"Find my location","GeolocateControl.LocationNotAvailable":"Location not available","LogoControl.Title":"Mapbox logo","NavigationControl.ResetBearing":"Reset bearing to north","NavigationControl.ZoomIn":"Zoom in","NavigationControl.ZoomOut":"Zoom out","ScaleControl.Feet":"ft","ScaleControl.Meters":"m","ScaleControl.Kilometers":"km","ScaleControl.Miles":"mi","ScaleControl.NauticalMiles":"nm"},Yr=i.window.HTMLImageElement,xi=i.window.HTMLElement,Ri=i.window.ImageBitmap,ci=-2,an=22,Zi=0,Bn=60,hi={center:[0,0],zoom:0,bearing:0,pitch:0,minZoom:ci,maxZoom:an,minPitch:Zi,maxPitch:Bn,interactive:!0,scrollZoom:!0,boxZoom:!0,dragRotate:!0,dragPan:!0,keyboard:!0,doubleClickZoom:!0,touchZoomRotate:!0,touchPitch:!0,bearingSnap:7,clickTolerance:3,pitchWithRotate:!0,hash:!1,attributionControl:!0,failIfMajorPerformanceCaveat:!1,preserveDrawingBuffer:!1,trackResize:!0,renderWorldCopies:!0,refreshExpiredTiles:!0,maxTileCacheSize:null,localIdeographFontFamily:"sans-serif",transformRequest:null,accessToken:null,fadeDuration:300,crossSourceCollisions:!0},li=function(Y){function D(q){var K=this;if(q=i.extend({},hi,q),q.minZoom!=null&&q.maxZoom!=null&&q.minZoom>q.maxZoom)throw new Error("maxZoom must be greater than or equal to minZoom");if(q.minPitch!=null&&q.maxPitch!=null&&q.minPitch>q.maxPitch)throw new Error("maxPitch must be greater than or equal to minPitch");if(q.minPitch!=null&&q.minPitch<Zi)throw new Error("minPitch must be greater than or equal to "+Zi);if(q.maxPitch!=null&&q.maxPitch>Bn)throw new Error("maxPitch must be less than or equal to "+Bn);var de=new wo(q.minZoom,q.maxZoom,q.minPitch,q.maxPitch,q.renderWorldCopies);if(Y.call(this,de,q),this._interactive=q.interactive,this._maxTileCacheSize=q.maxTileCacheSize,this._failIfMajorPerformanceCaveat=q.failIfMajorPerformanceCaveat,this._preserveDrawingBuffer=q.preserveDrawingBuffer,this._antialias=q.antialias,this._trackResize=q.trackResize,this._bearingSnap=q.bearingSnap,this._refreshExpiredTiles=q.refreshExpiredTiles,this._fadeDuration=q.fadeDuration,this._crossSourceCollisions=q.crossSourceCollisions,this._crossFadingFactor=1,this._collectResourceTiming=q.collectResourceTiming,this._renderTaskQueue=new si,this._controls=[],this._mapId=i.uniqueId(),this._locale=i.extend({},Mr,q.locale),this._clickTolerance=q.clickTolerance,this._requestManager=new i.RequestManager(q.transformRequest,q.accessToken),typeof q.container=="string"){if(this._container=i.window.document.getElementById(q.container),!this._container)throw new Error("Container '"+q.container+"' not found.")}else if(q.container instanceof xi)this._container=q.container;else throw new Error("Invalid type: 'container' must be a String or HTMLElement.");if(q.maxBounds&&this.setMaxBounds(q.maxBounds),i.bindAll(["_onWindowOnline","_onWindowResize","_onMapScroll","_contextLost","_contextRestored"],this),this._setupContainer(),this._setupPainter(),this.painter===void 0)throw new Error("Failed to initialize WebGL.");this.on("move",function(){return K._update(!1)}),this.on("moveend",function(){return K._update(!1)}),this.on("zoom",function(){return K._update(!0)}),typeof i.window!="undefined"&&(i.window.addEventListener("online",this._onWindowOnline,!1),i.window.addEventListener("resize",this._onWindowResize,!1),i.window.addEventListener("orientationchange",this._onWindowResize,!1)),this.handlers=new Ei(this,q);var ne=typeof q.hash=="string"&&q.hash||void 0;this._hash=q.hash&&new Ul(ne).addTo(this),(!this._hash||!this._hash._onHashChange())&&(this.jumpTo({center:q.center,zoom:q.zoom,bearing:q.bearing,pitch:q.pitch}),q.bounds&&(this.resize(),this.fitBounds(q.bounds,i.extend({},q.fitBoundsOptions,{duration:0})))),this.resize(),this._localIdeographFontFamily=q.localIdeographFontFamily,q.style&&this.setStyle(q.style,{localIdeographFontFamily:q.localIdeographFontFamily}),q.attributionControl&&this.addControl(new en({customAttribution:q.customAttribution})),this.addControl(new Wi,q.logoPosition),this.on("style.load",function(){K.transform.unmodified&&K.jumpTo(K.style.stylesheet)}),this.on("data",function(we){K._update(we.dataType==="style"),K.fire(new i.Event(we.dataType+"data",we))}),this.on("dataloading",function(we){K.fire(new i.Event(we.dataType+"dataloading",we))})}Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D;var J={showTileBoundaries:{configurable:!0},showPadding:{configurable:!0},showCollisionBoxes:{configurable:!0},showOverdrawInspector:{configurable:!0},repaint:{configurable:!0},vertices:{configurable:!0},version:{configurable:!0}};return D.prototype._getMapId=function(){return this._mapId},D.prototype.addControl=function(K,de){if(de===void 0&&(K.getDefaultPosition?de=K.getDefaultPosition():de="top-right"),!K||!K.onAdd)return this.fire(new i.ErrorEvent(new Error("Invalid argument to map.addControl(). Argument must be a control with onAdd and onRemove methods.")));var ne=K.onAdd(this);this._controls.push(K);var we=this._controlPositions[de];return de.indexOf("bottom")!==-1?we.insertBefore(ne,we.firstChild):we.appendChild(ne),this},D.prototype.removeControl=function(K){if(!K||!K.onRemove)return this.fire(new i.ErrorEvent(new Error("Invalid argument to map.removeControl(). Argument must be a control with onAdd and onRemove methods.")));var de=this._controls.indexOf(K);return de>-1&&this._controls.splice(de,1),K.onRemove(this),this},D.prototype.hasControl=function(K){return this._controls.indexOf(K)>-1},D.prototype.resize=function(K){var de=this._containerDimensions(),ne=de[0],we=de[1];this._resizeCanvas(ne,we),this.transform.resize(ne,we),this.painter.resize(ne,we);var Ue=!this._moving;return Ue&&(this.stop(),this.fire(new i.Event("movestart",K)).fire(new i.Event("move",K))),this.fire(new i.Event("resize",K)),Ue&&this.fire(new i.Event("moveend",K)),this},D.prototype.getBounds=function(){return this.transform.getBounds()},D.prototype.getMaxBounds=function(){return this.transform.getMaxBounds()},D.prototype.setMaxBounds=function(K){return this.transform.setMaxBounds(i.LngLatBounds.convert(K)),this._update()},D.prototype.setMinZoom=function(K){if(K=K==null?ci:K,K>=ci&&K<=this.transform.maxZoom)return this.transform.minZoom=K,this._update(),this.getZoom()<K&&this.setZoom(K),this;throw new Error("minZoom must be between "+ci+" and the current maxZoom, inclusive")},D.prototype.getMinZoom=function(){return this.transform.minZoom},D.prototype.setMaxZoom=function(K){if(K=K==null?an:K,K>=this.transform.minZoom)return this.transform.maxZoom=K,this._update(),this.getZoom()>K&&this.setZoom(K),this;throw new Error("maxZoom must be greater than the current minZoom")},D.prototype.getMaxZoom=function(){return this.transform.maxZoom},D.prototype.setMinPitch=function(K){if(K=K==null?Zi:K,K<Zi)throw new Error("minPitch must be greater than or equal to "+Zi);if(K>=Zi&&K<=this.transform.maxPitch)return this.transform.minPitch=K,this._update(),this.getPitch()<K&&this.setPitch(K),this;throw new Error("minPitch must be between "+Zi+" and the current maxPitch, inclusive")},D.prototype.getMinPitch=function(){return this.transform.minPitch},D.prototype.setMaxPitch=function(K){if(K=K==null?Bn:K,K>Bn)throw new Error("maxPitch must be less than or equal to "+Bn);if(K>=this.transform.minPitch)return this.transform.maxPitch=K,this._update(),this.getPitch()>K&&this.setPitch(K),this;throw new Error("maxPitch must be greater than the current minPitch")},D.prototype.getMaxPitch=function(){return this.transform.maxPitch},D.prototype.getRenderWorldCopies=function(){return this.transform.renderWorldCopies},D.prototype.setRenderWorldCopies=function(K){return this.transform.renderWorldCopies=K,this._update()},D.prototype.project=function(K){return this.transform.locationPoint(i.LngLat.convert(K))},D.prototype.unproject=function(K){return this.transform.pointLocation(i.Point.convert(K))},D.prototype.isMoving=function(){return this._moving||this.handlers.isMoving()},D.prototype.isZooming=function(){return this._zooming||this.handlers.isZooming()},D.prototype.isRotating=function(){return this._rotating||this.handlers.isRotating()},D.prototype._createDelegatedListener=function(K,de,ne){var we=this,Ue;if(K==="mouseenter"||K==="mouseover"){var ft=!1,Xt=function(Ct){var Ot=we.getLayer(de)?we.queryRenderedFeatures(Ct.point,{layers:[de]}):[];Ot.length?ft||(ft=!0,ne.call(we,new se(K,we,Ct.originalEvent,{features:Ot}))):ft=!1},hr=function(){ft=!1};return{layer:de,listener:ne,delegates:{mousemove:Xt,mouseout:hr}}}else if(K==="mouseleave"||K==="mouseout"){var qt=!1,Ve=function(Ct){var Ot=we.getLayer(de)?we.queryRenderedFeatures(Ct.point,{layers:[de]}):[];Ot.length?qt=!0:qt&&(qt=!1,ne.call(we,new se(K,we,Ct.originalEvent)))},Qe=function(Ct){qt&&(qt=!1,ne.call(we,new se(K,we,Ct.originalEvent)))};return{layer:de,listener:ne,delegates:{mousemove:Ve,mouseout:Qe}}}else{var at=function(Ct){var Ot=we.getLayer(de)?we.queryRenderedFeatures(Ct.point,{layers:[de]}):[];Ot.length&&(Ct.features=Ot,ne.call(we,Ct),delete Ct.features)};return{layer:de,listener:ne,delegates:(Ue={},Ue[K]=at,Ue)}}},D.prototype.on=function(K,de,ne){if(ne===void 0)return Y.prototype.on.call(this,K,de);var we=this._createDelegatedListener(K,de,ne);this._delegatedListeners=this._delegatedListeners||{},this._delegatedListeners[K]=this._delegatedListeners[K]||[],this._delegatedListeners[K].push(we);for(var Ue in we.delegates)this.on(Ue,we.delegates[Ue]);return this},D.prototype.once=function(K,de,ne){if(ne===void 0)return Y.prototype.once.call(this,K,de);var we=this._createDelegatedListener(K,de,ne);for(var Ue in we.delegates)this.once(Ue,we.delegates[Ue]);return this},D.prototype.off=function(K,de,ne){var we=this;if(ne===void 0)return Y.prototype.off.call(this,K,de);var Ue=function(ft){for(var Xt=ft[K],hr=0;hr<Xt.length;hr++){var qt=Xt[hr];if(qt.layer===de&&qt.listener===ne){for(var Ve in qt.delegates)we.off(Ve,qt.delegates[Ve]);return Xt.splice(hr,1),we}}};return this._delegatedListeners&&this._delegatedListeners[K]&&Ue(this._delegatedListeners),this},D.prototype.queryRenderedFeatures=function(K,de){if(!this.style)return[];de===void 0&&K!==void 0&&!(K instanceof i.Point)&&!Array.isArray(K)&&(de=K,K=void 0),de=de||{},K=K||[[0,0],[this.transform.width,this.transform.height]];var ne;if(K instanceof i.Point||typeof K[0]=="number")ne=[i.Point.convert(K)];else{var we=i.Point.convert(K[0]),Ue=i.Point.convert(K[1]);ne=[we,new i.Point(Ue.x,we.y),Ue,new i.Point(we.x,Ue.y),we]}return this.style.queryRenderedFeatures(ne,de,this.transform)},D.prototype.querySourceFeatures=function(K,de){return this.style.querySourceFeatures(K,de)},D.prototype.setStyle=function(K,de){return de=i.extend({},{localIdeographFontFamily:this._localIdeographFontFamily},de),de.diff!==!1&&de.localIdeographFontFamily===this._localIdeographFontFamily&&this.style&&K?(this._diffStyle(K,de),this):(this._localIdeographFontFamily=de.localIdeographFontFamily,this._updateStyle(K,de))},D.prototype._getUIString=function(K){var de=this._locale[K];if(de==null)throw new Error("Missing UI string '"+K+"'");return de},D.prototype._updateStyle=function(K,de){if(this.style&&(this.style.setEventedParent(null),this.style._remove()),K)this.style=new yu(this,de||{});else return delete this.style,this;return this.style.setEventedParent(this,{style:this.style}),typeof K=="string"?this.style.loadURL(K):this.style.loadJSON(K),this},D.prototype._lazyInitEmptyStyle=function(){this.style||(this.style=new yu(this,{}),this.style.setEventedParent(this,{style:this.style}),this.style.loadEmpty())},D.prototype._diffStyle=function(K,de){var ne=this;if(typeof K=="string"){var we=this._requestManager.normalizeStyleURL(K),Ue=this._requestManager.transformRequest(we,i.ResourceType.Style);i.getJSON(Ue,function(ft,Xt){ft?ne.fire(new i.ErrorEvent(ft)):Xt&&ne._updateDiff(Xt,de)})}else typeof K=="object"&&this._updateDiff(K,de)},D.prototype._updateDiff=function(K,de){try{this.style.setState(K)&&this._update(!0)}catch(ne){i.warnOnce("Unable to perform style diff: "+(ne.message||ne.error||ne)+".  Rebuilding the style from scratch."),this._updateStyle(K,de)}},D.prototype.getStyle=function(){if(this.style)return this.style.serialize()},D.prototype.isStyleLoaded=function(){return this.style?this.style.loaded():i.warnOnce("There is no style added to the map.")},D.prototype.addSource=function(K,de){return this._lazyInitEmptyStyle(),this.style.addSource(K,de),this._update(!0)},D.prototype.isSourceLoaded=function(K){var de=this.style&&this.style.sourceCaches[K];if(de===void 0){this.fire(new i.ErrorEvent(new Error("There is no source with ID '"+K+"'")));return}return de.loaded()},D.prototype.areTilesLoaded=function(){var K=this.style&&this.style.sourceCaches;for(var de in K){var ne=K[de],we=ne._tiles;for(var Ue in we){var ft=we[Ue];if(!(ft.state==="loaded"||ft.state==="errored"))return!1}}return!0},D.prototype.addSourceType=function(K,de,ne){return this._lazyInitEmptyStyle(),this.style.addSourceType(K,de,ne)},D.prototype.removeSource=function(K){return this.style.removeSource(K),this._update(!0)},D.prototype.getSource=function(K){return this.style.getSource(K)},D.prototype.addImage=function(K,de,ne){ne===void 0&&(ne={});var we=ne.pixelRatio;we===void 0&&(we=1);var Ue=ne.sdf;Ue===void 0&&(Ue=!1);var ft=ne.stretchX,Xt=ne.stretchY,hr=ne.content;this._lazyInitEmptyStyle();var qt=0;if(de instanceof Yr||Ri&&de instanceof Ri){var Ve=i.browser.getImageData(de),Qe=Ve.width,at=Ve.height,Ct=Ve.data;this.style.addImage(K,{data:new i.RGBAImage({width:Qe,height:at},Ct),pixelRatio:we,stretchX:ft,stretchY:Xt,content:hr,sdf:Ue,version:qt})}else{if(de.width===void 0||de.height===void 0)return this.fire(new i.ErrorEvent(new Error("Invalid arguments to map.addImage(). The second argument must be an `HTMLImageElement`, `ImageData`, `ImageBitmap`, or object with `width`, `height`, and `data` properties with the same format as `ImageData`")));var Ot=de.width,Rt=de.height,Bt=de.data,Dt=de;this.style.addImage(K,{data:new i.RGBAImage({width:Ot,height:Rt},new Uint8Array(Bt)),pixelRatio:we,stretchX:ft,stretchY:Xt,content:hr,sdf:Ue,version:qt,userImage:Dt}),Dt.onAdd&&Dt.onAdd(this,K)}},D.prototype.updateImage=function(K,de){var ne=this.style.getImage(K);if(!ne)return this.fire(new i.ErrorEvent(new Error("The map has no image with that id. If you are adding a new image use `map.addImage(...)` instead.")));var we=de instanceof Yr||Ri&&de instanceof Ri?i.browser.getImageData(de):de,Ue=we.width,ft=we.height,Xt=we.data;if(Ue===void 0||ft===void 0)return this.fire(new i.ErrorEvent(new Error("Invalid arguments to map.updateImage(). The second argument must be an `HTMLImageElement`, `ImageData`, `ImageBitmap`, or object with `width`, `height`, and `data` properties with the same format as `ImageData`")));if(Ue!==ne.data.width||ft!==ne.data.height)return this.fire(new i.ErrorEvent(new Error("The width and height of the updated image must be that same as the previous version of the image")));var hr=!(de instanceof Yr||Ri&&de instanceof Ri);ne.data.replace(Xt,hr),this.style.updateImage(K,ne)},D.prototype.hasImage=function(K){return K?!!this.style.getImage(K):(this.fire(new i.ErrorEvent(new Error("Missing required image id"))),!1)},D.prototype.removeImage=function(K){this.style.removeImage(K)},D.prototype.loadImage=function(K,de){i.getImage(this._requestManager.transformRequest(K,i.ResourceType.Image),de)},D.prototype.listImages=function(){return this.style.listImages()},D.prototype.addLayer=function(K,de){return this._lazyInitEmptyStyle(),this.style.addLayer(K,de),this._update(!0)},D.prototype.moveLayer=function(K,de){return this.style.moveLayer(K,de),this._update(!0)},D.prototype.removeLayer=function(K){return this.style.removeLayer(K),this._update(!0)},D.prototype.getLayer=function(K){return this.style.getLayer(K)},D.prototype.setLayerZoomRange=function(K,de,ne){return this.style.setLayerZoomRange(K,de,ne),this._update(!0)},D.prototype.setFilter=function(K,de,ne){return ne===void 0&&(ne={}),this.style.setFilter(K,de,ne),this._update(!0)},D.prototype.getFilter=function(K){return this.style.getFilter(K)},D.prototype.setPaintProperty=function(K,de,ne,we){return we===void 0&&(we={}),this.style.setPaintProperty(K,de,ne,we),this._update(!0)},D.prototype.getPaintProperty=function(K,de){return this.style.getPaintProperty(K,de)},D.prototype.setLayoutProperty=function(K,de,ne,we){return we===void 0&&(we={}),this.style.setLayoutProperty(K,de,ne,we),this._update(!0)},D.prototype.getLayoutProperty=function(K,de){return this.style.getLayoutProperty(K,de)},D.prototype.setLight=function(K,de){return de===void 0&&(de={}),this._lazyInitEmptyStyle(),this.style.setLight(K,de),this._update(!0)},D.prototype.getLight=function(){return this.style.getLight()},D.prototype.setFeatureState=function(K,de){return this.style.setFeatureState(K,de),this._update()},D.prototype.removeFeatureState=function(K,de){return this.style.removeFeatureState(K,de),this._update()},D.prototype.getFeatureState=function(K){return this.style.getFeatureState(K)},D.prototype.getContainer=function(){return this._container},D.prototype.getCanvasContainer=function(){return this._canvasContainer},D.prototype.getCanvas=function(){return this._canvas},D.prototype._containerDimensions=function(){var K=0,de=0;return this._container&&(K=this._container.clientWidth||400,de=this._container.clientHeight||300),[K,de]},D.prototype._detectMissingCSS=function(){var K=i.window.getComputedStyle(this._missingCSSCanary).getPropertyValue("background-color");K!=="rgb(250, 128, 114)"&&i.warnOnce("This page appears to be missing CSS declarations for Mapbox GL JS, which may cause the map to display incorrectly. Please ensure your page includes mapbox-gl.css, as described in https://www.mapbox.com/mapbox-gl-js/api/.")},D.prototype._setupContainer=function(){var K=this._container;K.classList.add("mapboxgl-map");var de=this._missingCSSCanary=o.create("div","mapboxgl-canary",K);de.style.visibility="hidden",this._detectMissingCSS();var ne=this._canvasContainer=o.create("div","mapboxgl-canvas-container",K);this._interactive&&ne.classList.add("mapboxgl-interactive"),this._canvas=o.create("canvas","mapboxgl-canvas",ne),this._canvas.addEventListener("webglcontextlost",this._contextLost,!1),this._canvas.addEventListener("webglcontextrestored",this._contextRestored,!1),this._canvas.setAttribute("tabindex","0"),this._canvas.setAttribute("aria-label","Map"),this._canvas.setAttribute("role","region");var we=this._containerDimensions();this._resizeCanvas(we[0],we[1]);var Ue=this._controlContainer=o.create("div","mapboxgl-control-container",K),ft=this._controlPositions={};["top-left","top-right","bottom-left","bottom-right"].forEach(function(Xt){ft[Xt]=o.create("div","mapboxgl-ctrl-"+Xt,Ue)}),this._container.addEventListener("scroll",this._onMapScroll,!1)},D.prototype._resizeCanvas=function(K,de){var ne=i.browser.devicePixelRatio||1;this._canvas.width=ne*K,this._canvas.height=ne*de,this._canvas.style.width=K+"px",this._canvas.style.height=de+"px"},D.prototype._setupPainter=function(){var K=i.extend({},a.webGLContextAttributes,{failIfMajorPerformanceCaveat:this._failIfMajorPerformanceCaveat,preserveDrawingBuffer:this._preserveDrawingBuffer,antialias:this._antialias||!1}),de=this._canvas.getContext("webgl",K)||this._canvas.getContext("experimental-webgl",K);if(!de){this.fire(new i.ErrorEvent(new Error("Failed to initialize WebGL")));return}this.painter=new mo(de,this.transform),i.webpSupported.testSupport(de)},D.prototype._contextLost=function(K){K.preventDefault(),this._frame&&(this._frame.cancel(),this._frame=null),this.fire(new i.Event("webglcontextlost",{originalEvent:K}))},D.prototype._contextRestored=function(K){this._setupPainter(),this.resize(),this._update(),this.fire(new i.Event("webglcontextrestored",{originalEvent:K}))},D.prototype._onMapScroll=function(K){if(K.target===this._container)return this._container.scrollTop=0,this._container.scrollLeft=0,!1},D.prototype.loaded=function(){return!this._styleDirty&&!this._sourcesDirty&&!!this.style&&this.style.loaded()},D.prototype._update=function(K){return this.style?(this._styleDirty=this._styleDirty||K,this._sourcesDirty=!0,this.triggerRepaint(),this):this},D.prototype._requestRenderFrame=function(K){return this._update(),this._renderTaskQueue.add(K)},D.prototype._cancelRenderFrame=function(K){this._renderTaskQueue.remove(K)},D.prototype._render=function(K){var de=this,ne,we=0,Ue=this.painter.context.extTimerQuery;if(this.listens("gpu-timing-frame")&&(ne=Ue.createQueryEXT(),Ue.beginQueryEXT(Ue.TIME_ELAPSED_EXT,ne),we=i.browser.now()),this.painter.context.setDirty(),this.painter.setBaseState(),this._renderTaskQueue.run(K),!this._removed){var ft=!1;if(this.style&&this._styleDirty){this._styleDirty=!1;var Xt=this.transform.zoom,hr=i.browser.now();this.style.zoomHistory.update(Xt,hr);var qt=new i.EvaluationParameters(Xt,{now:hr,fadeDuration:this._fadeDuration,zoomHistory:this.style.zoomHistory,transition:this.style.getTransition()}),Ve=qt.crossFadingFactor();(Ve!==1||Ve!==this._crossFadingFactor)&&(ft=!0,this._crossFadingFactor=Ve),this.style.update(qt)}if(this.style&&this._sourcesDirty&&(this._sourcesDirty=!1,this.style._updateSources(this.transform)),this._placementDirty=this.style&&this.style._updatePlacement(this.painter.transform,this.showCollisionBoxes,this._fadeDuration,this._crossSourceCollisions),this.painter.render(this.style,{showTileBoundaries:this.showTileBoundaries,showOverdrawInspector:this._showOverdrawInspector,rotating:this.isRotating(),zooming:this.isZooming(),moving:this.isMoving(),fadeDuration:this._fadeDuration,showPadding:this.showPadding,gpuTiming:!!this.listens("gpu-timing-layer")}),this.fire(new i.Event("render")),this.loaded()&&!this._loaded&&(this._loaded=!0,this.fire(new i.Event("load"))),this.style&&(this.style.hasTransitions()||ft)&&(this._styleDirty=!0),this.style&&!this._placementDirty&&this.style._releaseSymbolFadeTiles(),this.listens("gpu-timing-frame")){var Qe=i.browser.now()-we;Ue.endQueryEXT(Ue.TIME_ELAPSED_EXT,ne),setTimeout(function(){var Ot=Ue.getQueryObjectEXT(ne,Ue.QUERY_RESULT_EXT)/1e6;Ue.deleteQueryEXT(ne),de.fire(new i.Event("gpu-timing-frame",{cpuTime:Qe,gpuTime:Ot}))},50)}if(this.listens("gpu-timing-layer")){var at=this.painter.collectGpuTimers();setTimeout(function(){var Ot=de.painter.queryGpuTimers(at);de.fire(new i.Event("gpu-timing-layer",{layerTimes:Ot}))},50)}var Ct=this._sourcesDirty||this._styleDirty||this._placementDirty;return Ct||this._repaint?this.triggerRepaint():!this.isMoving()&&this.loaded()&&this.fire(new i.Event("idle")),this._loaded&&!this._fullyLoaded&&!Ct&&(this._fullyLoaded=!0),this}},D.prototype.remove=function(){this._hash&&this._hash.remove();for(var K=0,de=this._controls;K<de.length;K+=1){var ne=de[K];ne.onRemove(this)}this._controls=[],this._frame&&(this._frame.cancel(),this._frame=null),this._renderTaskQueue.clear(),this.painter.destroy(),this.handlers.destroy(),delete this.handlers,this.setStyle(null),typeof i.window!="undefined"&&(i.window.removeEventListener("resize",this._onWindowResize,!1),i.window.removeEventListener("orientationchange",this._onWindowResize,!1),i.window.removeEventListener("online",this._onWindowOnline,!1));var we=this.painter.context.gl.getExtension("WEBGL_lose_context");we&&we.loseContext&&we.loseContext(),mn(this._canvasContainer),mn(this._controlContainer),mn(this._missingCSSCanary),this._container.classList.remove("mapboxgl-map"),this._removed=!0,this.fire(new i.Event("remove"))},D.prototype.triggerRepaint=function(){var K=this;this.style&&!this._frame&&(this._frame=i.browser.frame(function(de){K._frame=null,K._render(de)}))},D.prototype._onWindowOnline=function(){this._update()},D.prototype._onWindowResize=function(K){this._trackResize&&this.resize({originalEvent:K})._update()},J.showTileBoundaries.get=function(){return!!this._showTileBoundaries},J.showTileBoundaries.set=function(q){this._showTileBoundaries!==q&&(this._showTileBoundaries=q,this._update())},J.showPadding.get=function(){return!!this._showPadding},J.showPadding.set=function(q){this._showPadding!==q&&(this._showPadding=q,this._update())},J.showCollisionBoxes.get=function(){return!!this._showCollisionBoxes},J.showCollisionBoxes.set=function(q){this._showCollisionBoxes!==q&&(this._showCollisionBoxes=q,q?this.style._generateCollisionBoxes():this._update())},J.showOverdrawInspector.get=function(){return!!this._showOverdrawInspector},J.showOverdrawInspector.set=function(q){this._showOverdrawInspector!==q&&(this._showOverdrawInspector=q,this._update())},J.repaint.get=function(){return!!this._repaint},J.repaint.set=function(q){this._repaint!==q&&(this._repaint=q,this.triggerRepaint())},J.vertices.get=function(){return!!this._vertices},J.vertices.set=function(q){this._vertices=q,this._update()},D.prototype._setCacheLimits=function(K,de){i.setCacheLimits(K,de)},J.version.get=function(){return i.version},Object.defineProperties(D.prototype,J),D}(Hn);function mn(Y){Y.parentNode&&Y.parentNode.removeChild(Y)}var Ji={showCompass:!0,showZoom:!0,visualizePitch:!1},Vi=function(D){var J=this;this.options=i.extend({},Ji,D),this._container=o.create("div","mapboxgl-ctrl mapboxgl-ctrl-group"),this._container.addEventListener("contextmenu",function(q){return q.preventDefault()}),this.options.showZoom&&(i.bindAll(["_setButtonTitle","_updateZoomButtons"],this),this._zoomInButton=this._createButton("mapboxgl-ctrl-zoom-in",function(q){return J._map.zoomIn({},{originalEvent:q})}),o.create("span","mapboxgl-ctrl-icon",this._zoomInButton).setAttribute("aria-hidden",!0),this._zoomOutButton=this._createButton("mapboxgl-ctrl-zoom-out",function(q){return J._map.zoomOut({},{originalEvent:q})}),o.create("span","mapboxgl-ctrl-icon",this._zoomOutButton).setAttribute("aria-hidden",!0)),this.options.showCompass&&(i.bindAll(["_rotateCompassArrow"],this),this._compass=this._createButton("mapboxgl-ctrl-compass",function(q){J.options.visualizePitch?J._map.resetNorthPitch({},{originalEvent:q}):J._map.resetNorth({},{originalEvent:q})}),this._compassIcon=o.create("span","mapboxgl-ctrl-icon",this._compass),this._compassIcon.setAttribute("aria-hidden",!0))};Vi.prototype._updateZoomButtons=function(){var D=this._map.getZoom(),J=D===this._map.getMaxZoom(),q=D===this._map.getMinZoom();this._zoomInButton.disabled=J,this._zoomOutButton.disabled=q,this._zoomInButton.setAttribute("aria-disabled",J.toString()),this._zoomOutButton.setAttribute("aria-disabled",q.toString())},Vi.prototype._rotateCompassArrow=function(){var D=this.options.visualizePitch?"scale("+1/Math.pow(Math.cos(this._map.transform.pitch*(Math.PI/180)),.5)+") rotateX("+this._map.transform.pitch+"deg) rotateZ("+this._map.transform.angle*(180/Math.PI)+"deg)":"rotate("+this._map.transform.angle*(180/Math.PI)+"deg)";this._compassIcon.style.transform=D},Vi.prototype.onAdd=function(D){return this._map=D,this.options.showZoom&&(this._setButtonTitle(this._zoomInButton,"ZoomIn"),this._setButtonTitle(this._zoomOutButton,"ZoomOut"),this._map.on("zoom",this._updateZoomButtons),this._updateZoomButtons()),this.options.showCompass&&(this._setButtonTitle(this._compass,"ResetBearing"),this.options.visualizePitch&&this._map.on("pitch",this._rotateCompassArrow),this._map.on("rotate",this._rotateCompassArrow),this._rotateCompassArrow(),this._handler=new Ni(this._map,this._compass,this.options.visualizePitch)),this._container},Vi.prototype.onRemove=function(){o.remove(this._container),this.options.showZoom&&this._map.off("zoom",this._updateZoomButtons),this.options.showCompass&&(this.options.visualizePitch&&this._map.off("pitch",this._rotateCompassArrow),this._map.off("rotate",this._rotateCompassArrow),this._handler.off(),delete this._handler),delete this._map},Vi.prototype._createButton=function(D,J){var q=o.create("button",D,this._container);return q.type="button",q.addEventListener("click",J),q},Vi.prototype._setButtonTitle=function(D,J){var q=this._map._getUIString("NavigationControl."+J);D.title=q,D.setAttribute("aria-label",q)};var Ni=function(D,J,q){q===void 0&&(q=!1),this._clickTolerance=10,this.element=J,this.mouseRotate=new Ln({clickTolerance:D.dragRotate._mouseRotate._clickTolerance}),this.map=D,q&&(this.mousePitch=new En({clickTolerance:D.dragRotate._mousePitch._clickTolerance})),i.bindAll(["mousedown","mousemove","mouseup","touchstart","touchmove","touchend","reset"],this),o.addEventListener(J,"mousedown",this.mousedown),o.addEventListener(J,"touchstart",this.touchstart,{passive:!1}),o.addEventListener(J,"touchmove",this.touchmove),o.addEventListener(J,"touchend",this.touchend),o.addEventListener(J,"touchcancel",this.reset)};Ni.prototype.down=function(D,J){this.mouseRotate.mousedown(D,J),this.mousePitch&&this.mousePitch.mousedown(D,J),o.disableDrag()},Ni.prototype.move=function(D,J){var q=this.map,K=this.mouseRotate.mousemoveWindow(D,J);if(K&&K.bearingDelta&&q.setBearing(q.getBearing()+K.bearingDelta),this.mousePitch){var de=this.mousePitch.mousemoveWindow(D,J);de&&de.pitchDelta&&q.setPitch(q.getPitch()+de.pitchDelta)}},Ni.prototype.off=function(){var D=this.element;o.removeEventListener(D,"mousedown",this.mousedown),o.removeEventListener(D,"touchstart",this.touchstart,{passive:!1}),o.removeEventListener(D,"touchmove",this.touchmove),o.removeEventListener(D,"touchend",this.touchend),o.removeEventListener(D,"touchcancel",this.reset),this.offTemp()},Ni.prototype.offTemp=function(){o.enableDrag(),o.removeEventListener(i.window,"mousemove",this.mousemove),o.removeEventListener(i.window,"mouseup",this.mouseup)},Ni.prototype.mousedown=function(D){this.down(i.extend({},D,{ctrlKey:!0,preventDefault:function(){return D.preventDefault()}}),o.mousePos(this.element,D)),o.addEventListener(i.window,"mousemove",this.mousemove),o.addEventListener(i.window,"mouseup",this.mouseup)},Ni.prototype.mousemove=function(D){this.move(D,o.mousePos(this.element,D))},Ni.prototype.mouseup=function(D){this.mouseRotate.mouseupWindow(D),this.mousePitch&&this.mousePitch.mouseupWindow(D),this.offTemp()},Ni.prototype.touchstart=function(D){D.targetTouches.length!==1?this.reset():(this._startPos=this._lastPos=o.touchPos(this.element,D.targetTouches)[0],this.down({type:"mousedown",button:0,ctrlKey:!0,preventDefault:function(){return D.preventDefault()}},this._startPos))},Ni.prototype.touchmove=function(D){D.targetTouches.length!==1?this.reset():(this._lastPos=o.touchPos(this.element,D.targetTouches)[0],this.move({preventDefault:function(){return D.preventDefault()}},this._lastPos))},Ni.prototype.touchend=function(D){D.targetTouches.length===0&&this._startPos&&this._lastPos&&this._startPos.dist(this._lastPos)<this._clickTolerance&&this.element.click(),this.reset()},Ni.prototype.reset=function(){this.mouseRotate.reset(),this.mousePitch&&this.mousePitch.reset(),delete this._startPos,delete this._lastPos,this.offTemp()};function pn(Y,D,J){if(Y=new i.LngLat(Y.lng,Y.lat),D){var q=new i.LngLat(Y.lng-360,Y.lat),K=new i.LngLat(Y.lng+360,Y.lat),de=J.locationPoint(Y).distSqr(D);J.locationPoint(q).distSqr(D)<de?Y=q:J.locationPoint(K).distSqr(D)<de&&(Y=K)}for(;Math.abs(Y.lng-J.center.lng)>180;){var ne=J.locationPoint(Y);if(ne.x>=0&&ne.y>=0&&ne.x<=J.width&&ne.y<=J.height)break;Y.lng>J.center.lng?Y.lng-=360:Y.lng+=360}return Y}var Vn={center:"translate(-50%,-50%)",top:"translate(-50%,0)","top-left":"translate(0,0)","top-right":"translate(-100%,0)",bottom:"translate(-50%,-100%)","bottom-left":"translate(0,-100%)","bottom-right":"translate(-100%,-100%)",left:"translate(0,-50%)",right:"translate(-100%,-50%)"};function na(Y,D,J){var q=Y.classList;for(var K in Vn)q.remove("mapboxgl-"+J+"-anchor-"+K);q.add("mapboxgl-"+J+"-anchor-"+D)}var Ki=function(Y){function D(J,q){if(Y.call(this),(J instanceof i.window.HTMLElement||q)&&(J=i.extend({element:J},q)),i.bindAll(["_update","_onMove","_onUp","_addDragHandler","_onMapClick","_onKeyPress"],this),this._anchor=J&&J.anchor||"center",this._color=J&&J.color||"#3FB1CE",this._scale=J&&J.scale||1,this._draggable=J&&J.draggable||!1,this._clickTolerance=J&&J.clickTolerance||0,this._isDragging=!1,this._state="inactive",this._rotation=J&&J.rotation||0,this._rotationAlignment=J&&J.rotationAlignment||"auto",this._pitchAlignment=J&&J.pitchAlignment&&J.pitchAlignment!=="auto"?J.pitchAlignment:this._rotationAlignment,!J||!J.element){this._defaultMarker=!0,this._element=o.create("div"),this._element.setAttribute("aria-label","Map marker");var K=o.createNS("http://www.w3.org/2000/svg","svg"),de=41,ne=27;K.setAttributeNS(null,"display","block"),K.setAttributeNS(null,"height",de+"px"),K.setAttributeNS(null,"width",ne+"px"),K.setAttributeNS(null,"viewBox","0 0 "+ne+" "+de);var we=o.createNS("http://www.w3.org/2000/svg","g");we.setAttributeNS(null,"stroke","none"),we.setAttributeNS(null,"stroke-width","1"),we.setAttributeNS(null,"fill","none"),we.setAttributeNS(null,"fill-rule","evenodd");var Ue=o.createNS("http://www.w3.org/2000/svg","g");Ue.setAttributeNS(null,"fill-rule","nonzero");var ft=o.createNS("http://www.w3.org/2000/svg","g");ft.setAttributeNS(null,"transform","translate(3.0, 29.0)"),ft.setAttributeNS(null,"fill","#000000");for(var Xt=[{rx:"10.5",ry:"5.25002273"},{rx:"10.5",ry:"5.25002273"},{rx:"9.5",ry:"4.77275007"},{rx:"8.5",ry:"4.29549936"},{rx:"7.5",ry:"3.81822308"},{rx:"6.5",ry:"3.34094679"},{rx:"5.5",ry:"2.86367051"},{rx:"4.5",ry:"2.38636864"}],hr=0,qt=Xt;hr<qt.length;hr+=1){var Ve=qt[hr],Qe=o.createNS("http://www.w3.org/2000/svg","ellipse");Qe.setAttributeNS(null,"opacity","0.04"),Qe.setAttributeNS(null,"cx","10.5"),Qe.setAttributeNS(null,"cy","5.80029008"),Qe.setAttributeNS(null,"rx",Ve.rx),Qe.setAttributeNS(null,"ry",Ve.ry),ft.appendChild(Qe)}var at=o.createNS("http://www.w3.org/2000/svg","g");at.setAttributeNS(null,"fill",this._color);var Ct=o.createNS("http://www.w3.org/2000/svg","path");Ct.setAttributeNS(null,"d","M27,13.5 C27,19.074644 20.250001,27.000002 14.75,34.500002 C14.016665,35.500004 12.983335,35.500004 12.25,34.500002 C6.7499993,27.000002 0,19.222562 0,13.5 C0,6.0441559 6.0441559,0 13.5,0 C20.955844,0 27,6.0441559 27,13.5 Z"),at.appendChild(Ct);var Ot=o.createNS("http://www.w3.org/2000/svg","g");Ot.setAttributeNS(null,"opacity","0.25"),Ot.setAttributeNS(null,"fill","#000000");var Rt=o.createNS("http://www.w3.org/2000/svg","path");Rt.setAttributeNS(null,"d","M13.5,0 C6.0441559,0 0,6.0441559 0,13.5 C0,19.222562 6.7499993,27 12.25,34.5 C13,35.522727 14.016664,35.500004 14.75,34.5 C20.250001,27 27,19.074644 27,13.5 C27,6.0441559 20.955844,0 13.5,0 Z M13.5,1 C20.415404,1 26,6.584596 26,13.5 C26,15.898657 24.495584,19.181431 22.220703,22.738281 C19.945823,26.295132 16.705119,30.142167 13.943359,33.908203 C13.743445,34.180814 13.612715,34.322738 13.5,34.441406 C13.387285,34.322738 13.256555,34.180814 13.056641,33.908203 C10.284481,30.127985 7.4148684,26.314159 5.015625,22.773438 C2.6163816,19.232715 1,15.953538 1,13.5 C1,6.584596 6.584596,1 13.5,1 Z"),Ot.appendChild(Rt);var Bt=o.createNS("http://www.w3.org/2000/svg","g");Bt.setAttributeNS(null,"transform","translate(6.0, 7.0)"),Bt.setAttributeNS(null,"fill","#FFFFFF");var Dt=o.createNS("http://www.w3.org/2000/svg","g");Dt.setAttributeNS(null,"transform","translate(8.0, 8.0)");var yt=o.createNS("http://www.w3.org/2000/svg","circle");yt.setAttributeNS(null,"fill","#000000"),yt.setAttributeNS(null,"opacity","0.25"),yt.setAttributeNS(null,"cx","5.5"),yt.setAttributeNS(null,"cy","5.5"),yt.setAttributeNS(null,"r","5.4999962");var Pt=o.createNS("http://www.w3.org/2000/svg","circle");Pt.setAttributeNS(null,"fill","#FFFFFF"),Pt.setAttributeNS(null,"cx","5.5"),Pt.setAttributeNS(null,"cy","5.5"),Pt.setAttributeNS(null,"r","5.4999962"),Dt.appendChild(yt),Dt.appendChild(Pt),Ue.appendChild(ft),Ue.appendChild(at),Ue.appendChild(Ot),Ue.appendChild(Bt),Ue.appendChild(Dt),K.appendChild(Ue),K.setAttributeNS(null,"height",de*this._scale+"px"),K.setAttributeNS(null,"width",ne*this._scale+"px"),this._element.appendChild(K),this._offset=i.Point.convert(J&&J.offset||[0,-14])}else this._element=J.element,this._offset=i.Point.convert(J&&J.offset||[0,0]);this._element.classList.add("mapboxgl-marker"),this._element.addEventListener("dragstart",function(ht){ht.preventDefault()}),this._element.addEventListener("mousedown",function(ht){ht.preventDefault()}),na(this._element,this._anchor,"marker"),this._popup=null}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.addTo=function(q){return this.remove(),this._map=q,q.getCanvasContainer().appendChild(this._element),q.on("move",this._update),q.on("moveend",this._update),this.setDraggable(this._draggable),this._update(),this._map.on("click",this._onMapClick),this},D.prototype.remove=function(){return this._map&&(this._map.off("click",this._onMapClick),this._map.off("move",this._update),this._map.off("moveend",this._update),this._map.off("mousedown",this._addDragHandler),this._map.off("touchstart",this._addDragHandler),this._map.off("mouseup",this._onUp),this._map.off("touchend",this._onUp),this._map.off("mousemove",this._onMove),this._map.off("touchmove",this._onMove),delete this._map),o.remove(this._element),this._popup&&this._popup.remove(),this},D.prototype.getLngLat=function(){return this._lngLat},D.prototype.setLngLat=function(q){return this._lngLat=i.LngLat.convert(q),this._pos=null,this._popup&&this._popup.setLngLat(this._lngLat),this._update(),this},D.prototype.getElement=function(){return this._element},D.prototype.setPopup=function(q){if(this._popup&&(this._popup.remove(),this._popup=null,this._element.removeEventListener("keypress",this._onKeyPress),this._originalTabIndex||this._element.removeAttribute("tabindex")),q){if(!("offset"in q.options)){var K=38.1,de=13.5,ne=Math.sqrt(Math.pow(de,2)/2);q.options.offset=this._defaultMarker?{top:[0,0],"top-left":[0,0],"top-right":[0,0],bottom:[0,-K],"bottom-left":[ne,(K-de+ne)*-1],"bottom-right":[-ne,(K-de+ne)*-1],left:[de,(K-de)*-1],right:[-de,(K-de)*-1]}:this._offset}this._popup=q,this._lngLat&&this._popup.setLngLat(this._lngLat),this._originalTabIndex=this._element.getAttribute("tabindex"),this._originalTabIndex||this._element.setAttribute("tabindex","0"),this._element.addEventListener("keypress",this._onKeyPress)}return this},D.prototype._onKeyPress=function(q){var K=q.code,de=q.charCode||q.keyCode;(K==="Space"||K==="Enter"||de===32||de===13)&&this.togglePopup()},D.prototype._onMapClick=function(q){var K=q.originalEvent.target,de=this._element;this._popup&&(K===de||de.contains(K))&&this.togglePopup()},D.prototype.getPopup=function(){return this._popup},D.prototype.togglePopup=function(){var q=this._popup;if(q)q.isOpen()?q.remove():q.addTo(this._map);else return this;return this},D.prototype._update=function(q){if(this._map){this._map.transform.renderWorldCopies&&(this._lngLat=pn(this._lngLat,this._pos,this._map.transform)),this._pos=this._map.project(this._lngLat)._add(this._offset);var K="";this._rotationAlignment==="viewport"||this._rotationAlignment==="auto"?K="rotateZ("+this._rotation+"deg)":this._rotationAlignment==="map"&&(K="rotateZ("+(this._rotation-this._map.getBearing())+"deg)");var de="";this._pitchAlignment==="viewport"||this._pitchAlignment==="auto"?de="rotateX(0deg)":this._pitchAlignment==="map"&&(de="rotateX("+this._map.getPitch()+"deg)"),(!q||q.type==="moveend")&&(this._pos=this._pos.round()),o.setTransform(this._element,Vn[this._anchor]+" translate("+this._pos.x+"px, "+this._pos.y+"px) "+de+" "+K)}},D.prototype.getOffset=function(){return this._offset},D.prototype.setOffset=function(q){return this._offset=i.Point.convert(q),this._update(),this},D.prototype._onMove=function(q){if(!this._isDragging){var K=this._clickTolerance||this._map._clickTolerance;this._isDragging=q.point.dist(this._pointerdownPos)>=K}this._isDragging&&(this._pos=q.point.sub(this._positionDelta),this._lngLat=this._map.unproject(this._pos),this.setLngLat(this._lngLat),this._element.style.pointerEvents="none",this._state==="pending"&&(this._state="active",this.fire(new i.Event("dragstart"))),this.fire(new i.Event("drag")))},D.prototype._onUp=function(){this._element.style.pointerEvents="auto",this._positionDelta=null,this._pointerdownPos=null,this._isDragging=!1,this._map.off("mousemove",this._onMove),this._map.off("touchmove",this._onMove),this._state==="active"&&this.fire(new i.Event("dragend")),this._state="inactive"},D.prototype._addDragHandler=function(q){this._element.contains(q.originalEvent.target)&&(q.preventDefault(),this._positionDelta=q.point.sub(this._pos).add(this._offset),this._pointerdownPos=q.point,this._state="pending",this._map.on("mousemove",this._onMove),this._map.on("touchmove",this._onMove),this._map.once("mouseup",this._onUp),this._map.once("touchend",this._onUp))},D.prototype.setDraggable=function(q){return this._draggable=!!q,this._map&&(q?(this._map.on("mousedown",this._addDragHandler),this._map.on("touchstart",this._addDragHandler)):(this._map.off("mousedown",this._addDragHandler),this._map.off("touchstart",this._addDragHandler))),this},D.prototype.isDraggable=function(){return this._draggable},D.prototype.setRotation=function(q){return this._rotation=q||0,this._update(),this},D.prototype.getRotation=function(){return this._rotation},D.prototype.setRotationAlignment=function(q){return this._rotationAlignment=q||"auto",this._update(),this},D.prototype.getRotationAlignment=function(){return this._rotationAlignment},D.prototype.setPitchAlignment=function(q){return this._pitchAlignment=q&&q!=="auto"?q:this._rotationAlignment,this._update(),this},D.prototype.getPitchAlignment=function(){return this._pitchAlignment},D}(i.Evented),kn={positionOptions:{enableHighAccuracy:!1,maximumAge:0,timeout:6e3},fitBoundsOptions:{maxZoom:15},trackUserLocation:!1,showAccuracyCircle:!0,showUserLocation:!0},ta;function oa(Y){ta!==void 0?Y(ta):i.window.navigator.permissions!==void 0?i.window.navigator.permissions.query({name:"geolocation"}).then(function(D){ta=D.state!=="denied",Y(ta)}):(ta=!!i.window.navigator.geolocation,Y(ta))}var ba=0,is=!1,Zs=function(Y){function D(J){Y.call(this),this.options=i.extend({},kn,J),i.bindAll(["_onSuccess","_onError","_onZoom","_finish","_setupUI","_updateCamera","_updateMarker"],this)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.onAdd=function(q){return this._map=q,this._container=o.create("div","mapboxgl-ctrl mapboxgl-ctrl-group"),oa(this._setupUI),this._container},D.prototype.onRemove=function(){this._geolocationWatchID!==void 0&&(i.window.navigator.geolocation.clearWatch(this._geolocationWatchID),this._geolocationWatchID=void 0),this.options.showUserLocation&&this._userLocationDotMarker&&this._userLocationDotMarker.remove(),this.options.showAccuracyCircle&&this._accuracyCircleMarker&&this._accuracyCircleMarker.remove(),o.remove(this._container),this._map.off("zoom",this._onZoom),this._map=void 0,ba=0,is=!1},D.prototype._isOutOfMapMaxBounds=function(q){var K=this._map.getMaxBounds(),de=q.coords;return K&&(de.longitude<K.getWest()||de.longitude>K.getEast()||de.latitude<K.getSouth()||de.latitude>K.getNorth())},D.prototype._setErrorState=function(){switch(this._watchState){case"WAITING_ACTIVE":this._watchState="ACTIVE_ERROR",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active-error");break;case"ACTIVE_LOCK":this._watchState="ACTIVE_ERROR",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active-error"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting");break;case"BACKGROUND":this._watchState="BACKGROUND_ERROR",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-background-error"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting");break}},D.prototype._onSuccess=function(q){if(this._map){if(this._isOutOfMapMaxBounds(q)){this._setErrorState(),this.fire(new i.Event("outofmaxbounds",q)),this._updateMarker(),this._finish();return}if(this.options.trackUserLocation)switch(this._lastKnownPosition=q,this._watchState){case"WAITING_ACTIVE":case"ACTIVE_LOCK":case"ACTIVE_ERROR":this._watchState="ACTIVE_LOCK",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active-error"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active");break;case"BACKGROUND":case"BACKGROUND_ERROR":this._watchState="BACKGROUND",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background-error"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-background");break}this.options.showUserLocation&&this._watchState!=="OFF"&&this._updateMarker(q),(!this.options.trackUserLocation||this._watchState==="ACTIVE_LOCK")&&this._updateCamera(q),this.options.showUserLocation&&this._dotElement.classList.remove("mapboxgl-user-location-dot-stale"),this.fire(new i.Event("geolocate",q)),this._finish()}},D.prototype._updateCamera=function(q){var K=new i.LngLat(q.coords.longitude,q.coords.latitude),de=q.coords.accuracy,ne=this._map.getBearing(),we=i.extend({bearing:ne},this.options.fitBoundsOptions);this._map.fitBounds(K.toBounds(de),we,{geolocateSource:!0})},D.prototype._updateMarker=function(q){if(q){var K=new i.LngLat(q.coords.longitude,q.coords.latitude);this._accuracyCircleMarker.setLngLat(K).addTo(this._map),this._userLocationDotMarker.setLngLat(K).addTo(this._map),this._accuracy=q.coords.accuracy,this.options.showUserLocation&&this.options.showAccuracyCircle&&this._updateCircleRadius()}else this._userLocationDotMarker.remove(),this._accuracyCircleMarker.remove()},D.prototype._updateCircleRadius=function(){var q=this._map._container.clientHeight/2,K=this._map.unproject([0,q]),de=this._map.unproject([1,q]),ne=K.distanceTo(de),we=Math.ceil(2*this._accuracy/ne);this._circleElement.style.width=we+"px",this._circleElement.style.height=we+"px"},D.prototype._onZoom=function(){this.options.showUserLocation&&this.options.showAccuracyCircle&&this._updateCircleRadius()},D.prototype._onError=function(q){if(this._map){if(this.options.trackUserLocation)if(q.code===1){this._watchState="OFF",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active-error"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background-error"),this._geolocateButton.disabled=!0;var K=this._map._getUIString("GeolocateControl.LocationNotAvailable");this._geolocateButton.title=K,this._geolocateButton.setAttribute("aria-label",K),this._geolocationWatchID!==void 0&&this._clearWatch()}else{if(q.code===3&&is)return;this._setErrorState()}this._watchState!=="OFF"&&this.options.showUserLocation&&this._dotElement.classList.add("mapboxgl-user-location-dot-stale"),this.fire(new i.Event("error",q)),this._finish()}},D.prototype._finish=function(){this._timeoutId&&clearTimeout(this._timeoutId),this._timeoutId=void 0},D.prototype._setupUI=function(q){var K=this;if(this._container.addEventListener("contextmenu",function(we){return we.preventDefault()}),this._geolocateButton=o.create("button","mapboxgl-ctrl-geolocate",this._container),o.create("span","mapboxgl-ctrl-icon",this._geolocateButton).setAttribute("aria-hidden",!0),this._geolocateButton.type="button",q===!1){i.warnOnce("Geolocation support is not available so the GeolocateControl will be disabled.");var de=this._map._getUIString("GeolocateControl.LocationNotAvailable");this._geolocateButton.disabled=!0,this._geolocateButton.title=de,this._geolocateButton.setAttribute("aria-label",de)}else{var ne=this._map._getUIString("GeolocateControl.FindMyLocation");this._geolocateButton.title=ne,this._geolocateButton.setAttribute("aria-label",ne)}this.options.trackUserLocation&&(this._geolocateButton.setAttribute("aria-pressed","false"),this._watchState="OFF"),this.options.showUserLocation&&(this._dotElement=o.create("div","mapboxgl-user-location-dot"),this._userLocationDotMarker=new Ki(this._dotElement),this._circleElement=o.create("div","mapboxgl-user-location-accuracy-circle"),this._accuracyCircleMarker=new Ki({element:this._circleElement,pitchAlignment:"map"}),this.options.trackUserLocation&&(this._watchState="OFF"),this._map.on("zoom",this._onZoom)),this._geolocateButton.addEventListener("click",this.trigger.bind(this)),this._setup=!0,this.options.trackUserLocation&&this._map.on("movestart",function(we){var Ue=we.originalEvent&&we.originalEvent.type==="resize";!we.geolocateSource&&K._watchState==="ACTIVE_LOCK"&&!Ue&&(K._watchState="BACKGROUND",K._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-background"),K._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active"),K.fire(new i.Event("trackuserlocationend")))})},D.prototype.trigger=function(){if(!this._setup)return i.warnOnce("Geolocate control triggered before added to a map"),!1;if(this.options.trackUserLocation){switch(this._watchState){case"OFF":this._watchState="WAITING_ACTIVE",this.fire(new i.Event("trackuserlocationstart"));break;case"WAITING_ACTIVE":case"ACTIVE_LOCK":case"ACTIVE_ERROR":case"BACKGROUND_ERROR":ba--,is=!1,this._watchState="OFF",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-active-error"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background"),this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background-error"),this.fire(new i.Event("trackuserlocationend"));break;case"BACKGROUND":this._watchState="ACTIVE_LOCK",this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-background"),this._lastKnownPosition&&this._updateCamera(this._lastKnownPosition),this.fire(new i.Event("trackuserlocationstart"));break}switch(this._watchState){case"WAITING_ACTIVE":this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active");break;case"ACTIVE_LOCK":this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active");break;case"ACTIVE_ERROR":this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-active-error");break;case"BACKGROUND":this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-background");break;case"BACKGROUND_ERROR":this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-background-error");break}if(this._watchState==="OFF"&&this._geolocationWatchID!==void 0)this._clearWatch();else if(this._geolocationWatchID===void 0){this._geolocateButton.classList.add("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.setAttribute("aria-pressed","true"),ba++;var q;ba>1?(q={maximumAge:6e5,timeout:0},is=!0):(q=this.options.positionOptions,is=!1),this._geolocationWatchID=i.window.navigator.geolocation.watchPosition(this._onSuccess,this._onError,q)}}else i.window.navigator.geolocation.getCurrentPosition(this._onSuccess,this._onError,this.options.positionOptions),this._timeoutId=setTimeout(this._finish,1e4);return!0},D.prototype._clearWatch=function(){i.window.navigator.geolocation.clearWatch(this._geolocationWatchID),this._geolocationWatchID=void 0,this._geolocateButton.classList.remove("mapboxgl-ctrl-geolocate-waiting"),this._geolocateButton.setAttribute("aria-pressed","false"),this.options.showUserLocation&&this._updateMarker(null)},D}(i.Evented),Va={maxWidth:100,unit:"metric"},Ml=function(D){this.options=i.extend({},Va,D),i.bindAll(["_onMove","setUnit"],this)};Ml.prototype.getDefaultPosition=function(){return"bottom-left"},Ml.prototype._onMove=function(){zo(this._map,this._container,this.options)},Ml.prototype.onAdd=function(D){return this._map=D,this._container=o.create("div","mapboxgl-ctrl mapboxgl-ctrl-scale",D.getContainer()),this._map.on("move",this._onMove),this._onMove(),this._container},Ml.prototype.onRemove=function(){o.remove(this._container),this._map.off("move",this._onMove),this._map=void 0},Ml.prototype.setUnit=function(D){this.options.unit=D,zo(this._map,this._container,this.options)};function zo(Y,D,J){var q=J&&J.maxWidth||100,K=Y._container.clientHeight/2,de=Y.unproject([0,K]),ne=Y.unproject([q,K]),we=de.distanceTo(ne);if(J&&J.unit==="imperial"){var Ue=3.2808*we;if(Ue>5280){var ft=Ue/5280;Qs(D,q,ft,Y._getUIString("ScaleControl.Miles"))}else Qs(D,q,Ue,Y._getUIString("ScaleControl.Feet"))}else if(J&&J.unit==="nautical"){var Xt=we/1852;Qs(D,q,Xt,Y._getUIString("ScaleControl.NauticalMiles"))}else we>=1e3?Qs(D,q,we/1e3,Y._getUIString("ScaleControl.Kilometers")):Qs(D,q,we,Y._getUIString("ScaleControl.Meters"))}function Qs(Y,D,J,q){var K=Vl(J),de=K/J;Y.style.width=D*de+"px",Y.innerHTML=K+"&nbsp;"+q}function al(Y){var D=Math.pow(10,Math.ceil(-Math.log(Y)/Math.LN10));return Math.round(Y*D)/D}function Vl(Y){var D=Math.pow(10,(""+Math.floor(Y)).length-1),J=Y/D;return J=J>=10?10:J>=5?5:J>=3?3:J>=2?2:J>=1?1:al(J),D*J}var ss=function(D){this._fullscreen=!1,D&&D.container&&(D.container instanceof i.window.HTMLElement?this._container=D.container:i.warnOnce("Full screen control 'container' must be a DOM element.")),i.bindAll(["_onClickFullscreen","_changeIcon"],this),"onfullscreenchange"in i.window.document?this._fullscreenchange="fullscreenchange":"onmozfullscreenchange"in i.window.document?this._fullscreenchange="mozfullscreenchange":"onwebkitfullscreenchange"in i.window.document?this._fullscreenchange="webkitfullscreenchange":"onmsfullscreenchange"in i.window.document&&(this._fullscreenchange="MSFullscreenChange")};ss.prototype.onAdd=function(D){return this._map=D,this._container||(this._container=this._map.getContainer()),this._controlContainer=o.create("div","mapboxgl-ctrl mapboxgl-ctrl-group"),this._checkFullscreenSupport()?this._setupUI():(this._controlContainer.style.display="none",i.warnOnce("This device does not support fullscreen mode.")),this._controlContainer},ss.prototype.onRemove=function(){o.remove(this._controlContainer),this._map=null,i.window.document.removeEventListener(this._fullscreenchange,this._changeIcon)},ss.prototype._checkFullscreenSupport=function(){return!!(i.window.document.fullscreenEnabled||i.window.document.mozFullScreenEnabled||i.window.document.msFullscreenEnabled||i.window.document.webkitFullscreenEnabled)},ss.prototype._setupUI=function(){var D=this._fullscreenButton=o.create("button","mapboxgl-ctrl-fullscreen",this._controlContainer);o.create("span","mapboxgl-ctrl-icon",D).setAttribute("aria-hidden",!0),D.type="button",this._updateTitle(),this._fullscreenButton.addEventListener("click",this._onClickFullscreen),i.window.document.addEventListener(this._fullscreenchange,this._changeIcon)},ss.prototype._updateTitle=function(){var D=this._getTitle();this._fullscreenButton.setAttribute("aria-label",D),this._fullscreenButton.title=D},ss.prototype._getTitle=function(){return this._map._getUIString(this._isFullscreen()?"FullscreenControl.Exit":"FullscreenControl.Enter")},ss.prototype._isFullscreen=function(){return this._fullscreen},ss.prototype._changeIcon=function(){var D=i.window.document.fullscreenElement||i.window.document.mozFullScreenElement||i.window.document.webkitFullscreenElement||i.window.document.msFullscreenElement;D===this._container!==this._fullscreen&&(this._fullscreen=!this._fullscreen,this._fullscreenButton.classList.toggle("mapboxgl-ctrl-shrink"),this._fullscreenButton.classList.toggle("mapboxgl-ctrl-fullscreen"),this._updateTitle())},ss.prototype._onClickFullscreen=function(){this._isFullscreen()?i.window.document.exitFullscreen?i.window.document.exitFullscreen():i.window.document.mozCancelFullScreen?i.window.document.mozCancelFullScreen():i.window.document.msExitFullscreen?i.window.document.msExitFullscreen():i.window.document.webkitCancelFullScreen&&i.window.document.webkitCancelFullScreen():this._container.requestFullscreen?this._container.requestFullscreen():this._container.mozRequestFullScreen?this._container.mozRequestFullScreen():this._container.msRequestFullscreen?this._container.msRequestFullscreen():this._container.webkitRequestFullscreen&&this._container.webkitRequestFullscreen()};var Vs={closeButton:!0,closeOnClick:!0,focusAfterOpen:!0,className:"",maxWidth:"240px"},Ys=["a[href]","[tabindex]:not([tabindex='-1'])","[contenteditable]:not([contenteditable='false'])","button:not([disabled])","input:not([disabled])","select:not([disabled])","textarea:not([disabled])"].join(", "),wa=function(Y){function D(J){Y.call(this),this.options=i.extend(Object.create(Vs),J),i.bindAll(["_update","_onClose","remove","_onMouseMove","_onMouseUp","_onDrag"],this)}return Y&&(D.__proto__=Y),D.prototype=Object.create(Y&&Y.prototype),D.prototype.constructor=D,D.prototype.addTo=function(q){return this._map&&this.remove(),this._map=q,this.options.closeOnClick&&this._map.on("click",this._onClose),this.options.closeOnMove&&this._map.on("move",this._onClose),this._map.on("remove",this.remove),this._update(),this._focusFirstElement(),this._trackPointer?(this._map.on("mousemove",this._onMouseMove),this._map.on("mouseup",this._onMouseUp),this._container&&this._container.classList.add("mapboxgl-popup-track-pointer"),this._map._canvasContainer.classList.add("mapboxgl-track-pointer")):this._map.on("move",this._update),this.fire(new i.Event("open")),this},D.prototype.isOpen=function(){return!!this._map},D.prototype.remove=function(){return this._content&&o.remove(this._content),this._container&&(o.remove(this._container),delete this._container),this._map&&(this._map.off("move",this._update),this._map.off("move",this._onClose),this._map.off("click",this._onClose),this._map.off("remove",this.remove),this._map.off("mousemove",this._onMouseMove),this._map.off("mouseup",this._onMouseUp),this._map.off("drag",this._onDrag),delete this._map),this.fire(new i.Event("close")),this},D.prototype.getLngLat=function(){return this._lngLat},D.prototype.setLngLat=function(q){return this._lngLat=i.LngLat.convert(q),this._pos=null,this._trackPointer=!1,this._update(),this._map&&(this._map.on("move",this._update),this._map.off("mousemove",this._onMouseMove),this._container&&this._container.classList.remove("mapboxgl-popup-track-pointer"),this._map._canvasContainer.classList.remove("mapboxgl-track-pointer")),this},D.prototype.trackPointer=function(){return this._trackPointer=!0,this._pos=null,this._update(),this._map&&(this._map.off("move",this._update),this._map.on("mousemove",this._onMouseMove),this._map.on("drag",this._onDrag),this._container&&this._container.classList.add("mapboxgl-popup-track-pointer"),this._map._canvasContainer.classList.add("mapboxgl-track-pointer")),this},D.prototype.getElement=function(){return this._container},D.prototype.setText=function(q){return this.setDOMContent(i.window.document.createTextNode(q))},D.prototype.setHTML=function(q){var K=i.window.document.createDocumentFragment(),de=i.window.document.createElement("body"),ne;for(de.innerHTML=q;ne=de.firstChild,!!ne;)K.appendChild(ne);return this.setDOMContent(K)},D.prototype.getMaxWidth=function(){return this._container&&this._container.style.maxWidth},D.prototype.setMaxWidth=function(q){return this.options.maxWidth=q,this._update(),this},D.prototype.setDOMContent=function(q){if(this._content)for(;this._content.hasChildNodes();)this._content.firstChild&&this._content.removeChild(this._content.firstChild);else this._content=o.create("div","mapboxgl-popup-content",this._container);return this._content.appendChild(q),this._createCloseButton(),this._update(),this._focusFirstElement(),this},D.prototype.addClassName=function(q){this._container&&this._container.classList.add(q)},D.prototype.removeClassName=function(q){this._container&&this._container.classList.remove(q)},D.prototype.setOffset=function(q){return this.options.offset=q,this._update(),this},D.prototype.toggleClassName=function(q){if(this._container)return this._container.classList.toggle(q)},D.prototype._createCloseButton=function(){this.options.closeButton&&(this._closeButton=o.create("button","mapboxgl-popup-close-button",this._content),this._closeButton.type="button",this._closeButton.setAttribute("aria-label","Close popup"),this._closeButton.innerHTML="&#215;",this._closeButton.addEventListener("click",this._onClose))},D.prototype._onMouseUp=function(q){this._update(q.point)},D.prototype._onMouseMove=function(q){this._update(q.point)},D.prototype._onDrag=function(q){this._update(q.point)},D.prototype._update=function(q){var K=this,de=this._lngLat||this._trackPointer;if(!(!this._map||!de||!this._content)&&(this._container||(this._container=o.create("div","mapboxgl-popup",this._map.getContainer()),this._tip=o.create("div","mapboxgl-popup-tip",this._container),this._container.appendChild(this._content),this.options.className&&this.options.className.split(" ").forEach(function(Ve){return K._container.classList.add(Ve)}),this._trackPointer&&this._container.classList.add("mapboxgl-popup-track-pointer")),this.options.maxWidth&&this._container.style.maxWidth!==this.options.maxWidth&&(this._container.style.maxWidth=this.options.maxWidth),this._map.transform.renderWorldCopies&&!this._trackPointer&&(this._lngLat=pn(this._lngLat,this._pos,this._map.transform)),!(this._trackPointer&&!q))){var ne=this._pos=this._trackPointer&&q?q:this._map.project(this._lngLat),we=this.options.anchor,Ue=ol(this.options.offset);if(!we){var ft=this._container.offsetWidth,Xt=this._container.offsetHeight,hr;ne.y+Ue.bottom.y<Xt?hr=["top"]:ne.y>this._map.transform.height-Xt?hr=["bottom"]:hr=[],ne.x<ft/2?hr.push("left"):ne.x>this._map.transform.width-ft/2&&hr.push("right"),hr.length===0?we="bottom":we=hr.join("-")}var qt=ne.add(Ue[we]).round();o.setTransform(this._container,Vn[we]+" translate("+qt.x+"px,"+qt.y+"px)"),na(this._container,we,"popup")}},D.prototype._focusFirstElement=function(){if(!(!this.options.focusAfterOpen||!this._container)){var q=this._container.querySelector(Ys);q&&q.focus()}},D.prototype._onClose=function(){this.remove()},D}(i.Evented);function ol(Y){if(Y)if(typeof Y=="number"){var D=Math.round(Math.sqrt(.5*Math.pow(Y,2)));return{center:new i.Point(0,0),top:new i.Point(0,Y),"top-left":new i.Point(D,D),"top-right":new i.Point(-D,D),bottom:new i.Point(0,-Y),"bottom-left":new i.Point(D,-D),"bottom-right":new i.Point(-D,-D),left:new i.Point(Y,0),right:new i.Point(-Y,0)}}else if(Y instanceof i.Point||Array.isArray(Y)){var J=i.Point.convert(Y);return{center:J,top:J,"top-left":J,"top-right":J,bottom:J,"bottom-left":J,"bottom-right":J,left:J,right:J}}else return{center:i.Point.convert(Y.center||[0,0]),top:i.Point.convert(Y.top||[0,0]),"top-left":i.Point.convert(Y["top-left"]||[0,0]),"top-right":i.Point.convert(Y["top-right"]||[0,0]),bottom:i.Point.convert(Y.bottom||[0,0]),"bottom-left":i.Point.convert(Y["bottom-left"]||[0,0]),"bottom-right":i.Point.convert(Y["bottom-right"]||[0,0]),left:i.Point.convert(Y.left||[0,0]),right:i.Point.convert(Y.right||[0,0])};else return ol(new i.Point(0,0))}var io={version:i.version,supported:a,setRTLTextPlugin:i.setRTLTextPlugin,getRTLTextPluginStatus:i.getRTLTextPluginStatus,Map:li,NavigationControl:Vi,GeolocateControl:Zs,AttributionControl:en,ScaleControl:Ml,FullscreenControl:ss,Popup:wa,Marker:Ki,Style:yu,LngLat:i.LngLat,LngLatBounds:i.LngLatBounds,Point:i.Point,MercatorCoordinate:i.MercatorCoordinate,Evented:i.Evented,config:i.config,prewarm:ua,clearPrewarmedResources:ma,get accessToken(){return i.config.ACCESS_TOKEN},set accessToken(Y){i.config.ACCESS_TOKEN=Y},get baseApiUrl(){return i.config.API_URL},set baseApiUrl(Y){i.config.API_URL=Y},get workerCount(){return fn.workerCount},set workerCount(Y){fn.workerCount=Y},get maxParallelImageRequests(){return i.config.MAX_PARALLEL_IMAGE_REQUESTS},set maxParallelImageRequests(Y){i.config.MAX_PARALLEL_IMAGE_REQUESTS=Y},clearStorage:function(D){i.clearTileCache(D)},workerUrl:""};return io}),r})});var fGe=ye((_xr,cGe)=>{"use strict";var iw=Dr(),Fjt=ru().sanitizeHTML,zjt=iJ(),sGe=c1();function lGe(e,t){this.subplot=e,this.uid=e.uid+"-"+t,this.index=t,this.idSource="source-"+this.uid,this.idLayer=sGe.layoutLayerPrefix+this.uid,this.sourceType=null,this.source=null,this.layerType=null,this.below=null,this.visible=!1}var ig=lGe.prototype;ig.update=function(t){this.visible?this.needsNewImage(t)?this.updateImage(t):this.needsNewSource(t)?(this.removeLayer(),this.updateSource(t),this.updateLayer(t)):this.needsNewLayer(t)?this.updateLayer(t):this.updateStyle(t):(this.updateSource(t),this.updateLayer(t)),this.visible=$z(t)};ig.needsNewImage=function(e){var t=this.subplot.map;return t.getSource(this.idSource)&&this.sourceType==="image"&&e.sourcetype==="image"&&(this.source!==e.source||JSON.stringify(this.coordinates)!==JSON.stringify(e.coordinates))};ig.needsNewSource=function(e){return this.sourceType!==e.sourcetype||JSON.stringify(this.source)!==JSON.stringify(e.source)||this.layerType!==e.type};ig.needsNewLayer=function(e){return this.layerType!==e.type||this.below!==this.subplot.belowLookup["layout-"+this.index]};ig.lookupBelow=function(){return this.subplot.belowLookup["layout-"+this.index]};ig.updateImage=function(e){var t=this.subplot.map;t.getSource(this.idSource).updateImage({url:e.source,coordinates:e.coordinates});var r=this.findFollowingMapboxLayerId(this.lookupBelow());r!==null&&this.subplot.map.moveLayer(this.idLayer,r)};ig.updateSource=function(e){var t=this.subplot.map;if(t.getSource(this.idSource)&&t.removeSource(this.idSource),this.sourceType=e.sourcetype,this.source=e.source,!!$z(e)){var r=Ojt(e);t.addSource(this.idSource,r)}};ig.findFollowingMapboxLayerId=function(e){if(e==="traces")for(var t=this.subplot.getMapLayers(),r=0;r<t.length;r++){var n=t[r].id;if(typeof n=="string"&&n.indexOf(sGe.traceLayerPrefix)===0){e=n;break}}return e};ig.updateLayer=function(e){var t=this.subplot,r=uGe(e),n=this.lookupBelow(),i=this.findFollowingMapboxLayerId(n);this.removeLayer(),$z(e)&&t.addLayer({id:this.idLayer,source:this.idSource,"source-layer":e.sourcelayer||"",type:e.type,minzoom:e.minzoom,maxzoom:e.maxzoom,layout:r.layout,paint:r.paint},i),this.layerType=e.type,this.below=n};ig.updateStyle=function(e){if($z(e)){var t=uGe(e);this.subplot.setOptions(this.idLayer,"setLayoutProperty",t.layout),this.subplot.setOptions(this.idLayer,"setPaintProperty",t.paint)}};ig.removeLayer=function(){var e=this.subplot.map;e.getLayer(this.idLayer)&&e.removeLayer(this.idLayer)};ig.dispose=function(){var e=this.subplot.map;e.getLayer(this.idLayer)&&e.removeLayer(this.idLayer),e.getSource(this.idSource)&&e.removeSource(this.idSource)};function $z(e){if(!e.visible)return!1;var t=e.source;if(Array.isArray(t)&&t.length>0){for(var r=0;r<t.length;r++)if(typeof t[r]!="string"||t[r].length===0)return!1;return!0}return iw.isPlainObject(t)||typeof t=="string"&&t.length>0}function uGe(e){var t={},r={};switch(e.type){case"circle":iw.extendFlat(r,{"circle-radius":e.circle.radius,"circle-color":e.color,"circle-opacity":e.opacity});break;case"line":iw.extendFlat(r,{"line-width":e.line.width,"line-color":e.color,"line-opacity":e.opacity,"line-dasharray":e.line.dash});break;case"fill":iw.extendFlat(r,{"fill-color":e.color,"fill-outline-color":e.fill.outlinecolor,"fill-opacity":e.opacity});break;case"symbol":var n=e.symbol,i=zjt(n.textposition,n.iconsize);iw.extendFlat(t,{"icon-image":n.icon+"-15","icon-size":n.iconsize/10,"text-field":n.text,"text-size":n.textfont.size,"text-anchor":i.anchor,"text-offset":i.offset,"symbol-placement":n.placement}),iw.extendFlat(r,{"icon-color":e.color,"text-color":n.textfont.color,"text-opacity":e.opacity});break;case"raster":iw.extendFlat(r,{"raster-fade-duration":0,"raster-opacity":e.opacity});break}return{layout:t,paint:r}}function Ojt(e){var t=e.sourcetype,r=e.source,n={type:t},i;return t==="geojson"?i="data":t==="vector"?i=typeof r=="string"?"url":"tiles":t==="raster"?(i="tiles",n.tileSize=256):t==="image"&&(i="url",n.coordinates=e.coordinates),n[i]=r,e.sourceattribution&&(n.attribution=Fjt(e.sourceattribution)),n}cGe.exports=function(t,r,n){var i=new lGe(t,r);return i.update(n),i}});var xGe=ye((xxr,_Ge)=>{"use strict";var cJ=uJ(),fJ=Dr(),pGe=ix(),hGe=qa(),qjt=ho(),Bjt=yv(),Qz=vf(),gGe=Eg(),Njt=gGe.drawMode,Ujt=gGe.selectMode,Vjt=Of().prepSelect,Gjt=Of().clearOutline,Hjt=Of().clearSelectionsCache,jjt=Of().selectOnClick,_x=c1(),Wjt=fGe();function mGe(e,t){this.id=t,this.gd=e;var r=e._fullLayout,n=e._context;this.container=r._glcontainer.node(),this.isStatic=n.staticPlot,this.uid=r._uid+"-"+this.id,this.div=null,this.xaxis=null,this.yaxis=null,this.createFramework(r),this.map=null,this.accessToken=null,this.styleObj=null,this.traceHash={},this.layerList=[],this.belowLookup={},this.dragging=!1,this.wheeling=!1}var Nh=mGe.prototype;Nh.plot=function(e,t,r){var n=this,i=t[n.id];n.map&&i.accesstoken!==n.accessToken&&(n.map.remove(),n.map=null,n.styleObj=null,n.traceHash={},n.layerList=[]);var a;n.map?a=new Promise(function(o,s){n.updateMap(e,t,o,s)}):a=new Promise(function(o,s){n.createMap(e,t,o,s)}),r.push(a)};Nh.createMap=function(e,t,r,n){var i=this,a=t[i.id],o=i.styleObj=yGe(a.style,t);i.accessToken=a.accesstoken;var s=a.bounds,l=s?[[s.west,s.south],[s.east,s.north]]:null,u=i.map=new cJ.Map({container:i.div,style:o.style,center:hJ(a.center),zoom:a.zoom,bearing:a.bearing,pitch:a.pitch,maxBounds:l,interactive:!i.isStatic,preserveDrawingBuffer:i.isStatic,doubleClickZoom:!1,boxZoom:!1,attributionControl:!1}).addControl(new cJ.AttributionControl({compact:!0}));u._canvas.style.left="0px",u._canvas.style.top="0px",i.rejectOnError(n),i.isStatic||i.initFx(e,t);var c=[];c.push(new Promise(function(f){u.once("load",f)})),c=c.concat(pGe.fetchTraceGeoData(e)),Promise.all(c).then(function(){i.fillBelowLookup(e,t),i.updateData(e),i.updateLayout(t),i.resolveOnRender(r)}).catch(n)};Nh.updateMap=function(e,t,r,n){var i=this,a=i.map,o=t[this.id];i.rejectOnError(n);var s=[],l=yGe(o.style,t);JSON.stringify(i.styleObj)!==JSON.stringify(l)&&(i.styleObj=l,a.setStyle(l.style),i.traceHash={},s.push(new Promise(function(u){a.once("styledata",u)}))),s=s.concat(pGe.fetchTraceGeoData(e)),Promise.all(s).then(function(){i.fillBelowLookup(e,t),i.updateData(e),i.updateLayout(t),i.resolveOnRender(r)}).catch(n)};Nh.fillBelowLookup=function(e,t){var r=t[this.id],n=r.layers,i,a,o=this.belowLookup={},s=!1;for(i=0;i<e.length;i++){var l=e[i][0].trace,u=l._module;typeof l.below=="string"?a=l.below:u.getBelow&&(a=u.getBelow(l,this)),a===""&&(s=!0),o["trace-"+l.uid]=a||""}for(i=0;i<n.length;i++){var c=n[i];typeof c.below=="string"?a=c.below:s?a="traces":a="",o["layout-"+i]=a}var f={},h,d;for(h in o)a=o[h],f[a]?f[a].push(h):f[a]=[h];for(a in f){var v=f[a];if(v.length>1)for(i=0;i<v.length;i++)h=v[i],h.indexOf("trace-")===0?(d=h.split("trace-")[1],this.traceHash[d]&&(this.traceHash[d].below=null)):h.indexOf("layout-")===0&&(d=h.split("layout-")[1],this.layerList[d]&&(this.layerList[d].below=null))}};var dGe={choroplethmapbox:0,densitymapbox:1,scattermapbox:2};Nh.updateData=function(e){var t=this.traceHash,r,n,i,a,o=e.slice().sort(function(f,h){return dGe[f[0].trace.type]-dGe[h[0].trace.type]});for(i=0;i<o.length;i++){var s=o[i];n=s[0].trace,r=t[n.uid];var l=!1;r&&(r.type===n.type?(r.update(s),l=!0):r.dispose()),!l&&n._module&&(t[n.uid]=n._module.plot(this,s))}var u=Object.keys(t);e:for(i=0;i<u.length;i++){var c=u[i];for(a=0;a<e.length;a++)if(n=e[a][0].trace,c===n.uid)continue e;r=t[c],r.dispose(),delete t[c]}};Nh.updateLayout=function(e){var t=this.map,r=e[this.id];!this.dragging&&!this.wheeling&&(t.setCenter(hJ(r.center)),t.setZoom(r.zoom),t.setBearing(r.bearing),t.setPitch(r.pitch)),this.updateLayers(e),this.updateFramework(e),this.updateFx(e),this.map.resize(),this.gd._context._scrollZoom.mapbox?t.scrollZoom.enable():t.scrollZoom.disable()};Nh.resolveOnRender=function(e){var t=this.map;t.on("render",function r(){t.loaded()&&(t.off("render",r),setTimeout(e,10))})};Nh.rejectOnError=function(e){var t=this.map;function r(){e(new Error(_x.mapOnErrorMsg))}t.once("error",r),t.once("style.error",r),t.once("source.error",r),t.once("tile.error",r),t.once("layer.error",r)};Nh.createFramework=function(e){var t=this,r=t.div=document.createElement("div");r.id=t.uid,r.style.position="absolute",t.container.appendChild(r),t.xaxis={_id:"x",c2p:function(n){return t.project(n).x}},t.yaxis={_id:"y",c2p:function(n){return t.project(n).y}},t.updateFramework(e),t.mockAxis={type:"linear",showexponent:"all",exponentformat:"B"},qjt.setConvert(t.mockAxis,e)};Nh.initFx=function(e,t){var r=this,n=r.gd,i=r.map;i.on("moveend",function(s){if(r.map){var l=n._fullLayout;if(s.originalEvent||r.wheeling){var u=l[r.id];hGe.call("_storeDirectGUIEdit",n.layout,l._preGUI,r.getViewEdits(u));var c=r.getView();u._input.center=u.center=c.center,u._input.zoom=u.zoom=c.zoom,u._input.bearing=u.bearing=c.bearing,u._input.pitch=u.pitch=c.pitch,n.emit("plotly_relayout",r.getViewEditsWithDerived(c))}s.originalEvent&&s.originalEvent.type==="mouseup"?r.dragging=!1:r.wheeling&&(r.wheeling=!1),l._rehover&&l._rehover()}}),i.on("wheel",function(){r.wheeling=!0}),i.on("mousemove",function(s){var l=r.div.getBoundingClientRect(),u=[s.originalEvent.offsetX,s.originalEvent.offsetY];s.target.getBoundingClientRect=function(){return l},r.xaxis.p2c=function(){return i.unproject(u).lng},r.yaxis.p2c=function(){return i.unproject(u).lat},n._fullLayout._rehover=function(){n._fullLayout._hoversubplot===r.id&&n._fullLayout[r.id]&&Qz.hover(n,s,r.id)},Qz.hover(n,s,r.id),n._fullLayout._hoversubplot=r.id});function a(){Qz.loneUnhover(t._hoverlayer)}i.on("dragstart",function(){r.dragging=!0,a()}),i.on("zoomstart",a),i.on("mouseout",function(){n._fullLayout._hoversubplot=null});function o(){var s=r.getView();n.emit("plotly_relayouting",r.getViewEditsWithDerived(s))}i.on("drag",o),i.on("zoom",o),i.on("dblclick",function(){var s=n._fullLayout[r.id];hGe.call("_storeDirectGUIEdit",n.layout,n._fullLayout._preGUI,r.getViewEdits(s));var l=r.viewInitial;i.setCenter(hJ(l.center)),i.setZoom(l.zoom),i.setBearing(l.bearing),i.setPitch(l.pitch);var u=r.getView();s._input.center=s.center=u.center,s._input.zoom=s.zoom=u.zoom,s._input.bearing=s.bearing=u.bearing,s._input.pitch=s.pitch=u.pitch,n.emit("plotly_doubleclick",null),n.emit("plotly_relayout",r.getViewEditsWithDerived(u))}),r.clearOutline=function(){Hjt(r.dragOptions),Gjt(r.dragOptions.gd)},r.onClickInPanFn=function(s){return function(l){var u=n._fullLayout.clickmode;u.indexOf("select")>-1&&jjt(l.originalEvent,n,[r.xaxis],[r.yaxis],r.id,s),u.indexOf("event")>-1&&Qz.click(n,l.originalEvent)}}};Nh.updateFx=function(e){var t=this,r=t.map,n=t.gd;if(t.isStatic)return;function i(l){var u=t.map.unproject(l);return[u.lng,u.lat]}var a=e.dragmode,o;o=function(l,u){if(u.isRect){var c=l.range={};c[t.id]=[i([u.xmin,u.ymin]),i([u.xmax,u.ymax])]}else{var f=l.lassoPoints={};f[t.id]=u.map(i)}};var s=t.dragOptions;t.dragOptions=fJ.extendDeep(s||{},{dragmode:e.dragmode,element:t.div,gd:n,plotinfo:{id:t.id,domain:e[t.id].domain,xaxis:t.xaxis,yaxis:t.yaxis,fillRangeItems:o},xaxes:[t.xaxis],yaxes:[t.yaxis],subplot:t.id}),r.off("click",t.onClickInPanHandler),Ujt(a)||Njt(a)?(r.dragPan.disable(),r.on("zoomstart",t.clearOutline),t.dragOptions.prepFn=function(l,u,c){Vjt(l,u,c,t.dragOptions,a)},Bjt.init(t.dragOptions)):(r.dragPan.enable(),r.off("zoomstart",t.clearOutline),t.div.onmousedown=null,t.div.ontouchstart=null,t.div.removeEventListener("touchstart",t.div._ontouchstart),t.onClickInPanHandler=t.onClickInPanFn(t.dragOptions),r.on("click",t.onClickInPanHandler))};Nh.updateFramework=function(e){var t=e[this.id].domain,r=e._size,n=this.div.style;n.width=r.w*(t.x[1]-t.x[0])+"px",n.height=r.h*(t.y[1]-t.y[0])+"px",n.left=r.l+t.x[0]*r.w+"px",n.top=r.t+(1-t.y[1])*r.h+"px",this.xaxis._offset=r.l+t.x[0]*r.w,this.xaxis._length=r.w*(t.x[1]-t.x[0]),this.yaxis._offset=r.t+(1-t.y[1])*r.h,this.yaxis._length=r.h*(t.y[1]-t.y[0])};Nh.updateLayers=function(e){var t=e[this.id],r=t.layers,n=this.layerList,i;if(r.length!==n.length){for(i=0;i<n.length;i++)n[i].dispose();for(n=this.layerList=[],i=0;i<r.length;i++)n.push(Wjt(this,i,r[i]))}else for(i=0;i<r.length;i++)n[i].update(r[i])};Nh.destroy=function(){this.map&&(this.map.remove(),this.map=null,this.container.removeChild(this.div))};Nh.toImage=function(){return this.map.stop(),this.map.getCanvas().toDataURL()};Nh.setOptions=function(e,t,r){for(var n in r)this.map[t](e,n,r[n])};Nh.getMapLayers=function(){return this.map.getStyle().layers};Nh.addLayer=function(e,t){var r=this.map;if(typeof t=="string"){if(t===""){r.addLayer(e,t);return}for(var n=this.getMapLayers(),i=0;i<n.length;i++)if(t===n[i].id){r.addLayer(e,t);return}fJ.warn(["Trying to add layer with *below* value",t,"referencing a layer that does not exist","or that does not yet exist."].join(" "))}r.addLayer(e)};Nh.project=function(e){return this.map.project(new cJ.LngLat(e[0],e[1]))};Nh.getView=function(){var e=this.map,t=e.getCenter(),r=t.lng,n=t.lat,i={lon:r,lat:n},a=e.getCanvas(),o=parseInt(a.style.width),s=parseInt(a.style.height);return{center:i,zoom:e.getZoom(),bearing:e.getBearing(),pitch:e.getPitch(),_derived:{coordinates:[e.unproject([0,0]).toArray(),e.unproject([o,0]).toArray(),e.unproject([o,s]).toArray(),e.unproject([0,s]).toArray()]}}};Nh.getViewEdits=function(e){for(var t=this.id,r=["center","zoom","bearing","pitch"],n={},i=0;i<r.length;i++){var a=r[i];n[t+"."+a]=e[a]}return n};Nh.getViewEditsWithDerived=function(e){var t=this.id,r=this.getViewEdits(e);return r[t+"._derived"]=e._derived,r};function yGe(e,t){var r={};if(fJ.isPlainObject(e))r.id=e.id,r.style=e;else if(typeof e=="string")if(r.id=e,_x.styleValuesMapbox.indexOf(e)!==-1)r.style=vGe(e);else if(_x.stylesNonMapbox[e]){r.style=_x.stylesNonMapbox[e];var n=r.style.sources["plotly-"+e],i=n?n.tiles:void 0;i&&i[0]&&i[0].slice(-9)==="?api_key="&&(i[0]+=t._mapboxAccessToken)}else r.style=e;else r.id=_x.styleValueDflt,r.style=vGe(_x.styleValueDflt);return r.transition={duration:0,delay:0},r}function vGe(e){return _x.styleUrlPrefix+e+"-"+_x.styleUrlSuffix}function hJ(e){return[e.lon,e.lat]}_Ge.exports=mGe});var TGe=ye((bxr,wGe)=>{"use strict";var dJ=Dr(),Xjt=k_(),Zjt=Yd(),bGe=Nk();wGe.exports=function(t,r,n){Xjt(t,r,n,{type:"mapbox",attributes:bGe,handleDefaults:Yjt,partition:"y",accessToken:r._mapboxAccessToken})};function Yjt(e,t,r,n){r("accesstoken",n.accessToken),r("style"),r("center.lon"),r("center.lat"),r("zoom"),r("bearing"),r("pitch");var i=r("bounds.west"),a=r("bounds.east"),o=r("bounds.south"),s=r("bounds.north");(i===void 0||a===void 0||o===void 0||s===void 0)&&delete t.bounds,Zjt(e,t,{name:"layers",handleItemDefaults:Kjt}),t._input=e}function Kjt(e,t){function r(l,u){return dJ.coerce(e,t,bGe.layers,l,u)}var n=r("visible");if(n){var i=r("sourcetype"),a=i==="raster"||i==="image";r("source"),r("sourceattribution"),i==="vector"&&r("sourcelayer"),i==="image"&&r("coordinates");var o;a&&(o="raster");var s=r("type",o);a&&s!=="raster"&&(s=t.type="raster",dJ.log("Source types *raster* and *image* must drawn *raster* layer type.")),r("below"),r("color"),r("opacity"),r("minzoom"),r("maxzoom"),s==="circle"&&r("circle.radius"),s==="line"&&(r("line.width"),r("line.dash")),s==="fill"&&r("fill.outlinecolor"),s==="symbol"&&(r("symbol.icon"),r("symbol.iconsize"),r("symbol.text"),dJ.coerceFont(r,"symbol.textfont",void 0,{noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0}),r("symbol.textposition"),r("symbol.placement"))}}});var e7=ye(Np=>{"use strict";var AGe=uJ(),rm=Dr(),vJ=rm.strTranslate,Jjt=rm.strScale,$jt=Id().getSubplotCalcData,Qjt=Wp(),eWt=Oa(),SGe=So(),tWt=ru(),rWt=xGe(),xx="mapbox",ey=Np.constants=c1();Np.name=xx;Np.attr="subplot";Np.idRoot=xx;Np.idRegex=Np.attrRegex=rm.counterRegex(xx);var iWt=["mapbox subplots and traces are deprecated!","Please consider switching to `map` subplots and traces.","Learn more at: https://plotly.com/python/maplibre-migration/","as well as https://plotly.com/javascript/maplibre-migration/"].join(" ");Np.attributes={subplot:{valType:"subplotid",dflt:"mapbox",editType:"calc"}};Np.layoutAttributes=Nk();Np.supplyLayoutDefaults=TGe();var MGe=!0;Np.plot=function(t){MGe&&(MGe=!1,rm.warn(iWt));var r=t._fullLayout,n=t.calcdata,i=r._subplots[xx];if(AGe.version!==ey.requiredVersion)throw new Error(ey.wrongVersionErrorMsg);var a=nWt(t,i);AGe.accessToken=a;for(var o=0;o<i.length;o++){var s=i[o],l=$jt(n,xx,s),u=r[s],c=u._subplot;c||(c=new rWt(t,s),r[s]._subplot=c),c.viewInitial||(c.viewInitial={center:rm.extendFlat({},u.center),zoom:u.zoom,bearing:u.bearing,pitch:u.pitch}),c.plot(l,r,t._promises)}};Np.clean=function(e,t,r,n){for(var i=n._subplots[xx]||[],a=0;a<i.length;a++){var o=i[a];!t[o]&&n[o]._subplot&&n[o]._subplot.destroy()}};Np.toSVG=function(e){for(var t=e._fullLayout,r=t._subplots[xx],n=t._size,i=0;i<r.length;i++){var a=t[r[i]],o=a.domain,s=a._subplot,l=s.toImage("png"),u=t._glimages.append("svg:image");u.attr({xmlns:Qjt.svg,"xlink:href":l,x:n.l+n.w*o.x[0],y:n.t+n.h*(1-o.y[1]),width:n.w*(o.x[1]-o.x[0]),height:n.h*(o.y[1]-o.y[0]),preserveAspectRatio:"none"});var c=eWt.select(a._subplot.div),f=c.select(".mapboxgl-ctrl-logo").node().offsetParent===null;if(!f){var h=t._glimages.append("g");h.attr("transform",vJ(n.l+n.w*o.x[0]+10,n.t+n.h*(1-o.y[0])-31)),h.append("path").attr("d",ey.mapboxLogo.path0).style({opacity:.9,fill:"#ffffff","enable-background":"new"}),h.append("path").attr("d",ey.mapboxLogo.path1).style("opacity",.35).style("enable-background","new"),h.append("path").attr("d",ey.mapboxLogo.path2).style("opacity",.35).style("enable-background","new"),h.append("polygon").attr("points",ey.mapboxLogo.polygon).style({opacity:.9,fill:"#ffffff","enable-background":"new"})}var d=c.select(".mapboxgl-ctrl-attrib").text().replace("Improve this map",""),v=t._glimages.append("g"),_=v.append("text");_.text(d).classed("static-attribution",!0).attr({"font-size":12,"font-family":"Arial",color:"rgba(0, 0, 0, 0.75)","text-anchor":"end","data-unformatted":d});var b=SGe.bBox(_.node()),p=n.w*(o.x[1]-o.x[0]);if(b.width>p/2){var k=d.split("|").join("<br>");_.text(k).attr("data-unformatted",k).call(tWt.convertToTspans,e),b=SGe.bBox(_.node())}_.attr("transform",vJ(-3,-b.height+8)),v.insert("rect",".static-attribution").attr({x:-b.width-6,y:-b.height-3,width:b.width+6,height:b.height+3,fill:"rgba(255, 255, 255, 0.75)"});var E=1;b.width+6>p&&(E=p/(b.width+6));var S=[n.l+n.w*o.x[1],n.t+n.h*(1-o.y[0])];v.attr("transform",vJ(S[0],S[1])+Jjt(E))}};function nWt(e,t){var r=e._fullLayout,n=e._context;if(n.mapboxAccessToken==="")return"";for(var i=[],a=[],o=!1,s=!1,l=0;l<t.length;l++){var u=r[t[l]],c=u.accesstoken;EGe(u.style)&&(c?rm.pushUnique(i,c):(EGe(u._input.style)&&(rm.error("Uses Mapbox map style, but did not set an access token."),o=!0),s=!0)),c&&rm.pushUnique(a,c)}if(s){var f=o?ey.noAccessTokenErrorMsg:ey.missingStyleErrorMsg;throw rm.error(f),new Error(f)}return i.length?(i.length>1&&rm.warn(ey.multipleTokensErrorMsg),i[0]):(a.length&&rm.log(["Listed mapbox access token(s)",a.join(","),"but did not use a Mapbox map style, ignoring token(s)."].join(" ")),"")}function EGe(e){return typeof e=="string"&&(ey.styleValuesMapbox.indexOf(e)!==-1||e.indexOf("mapbox://")===0||e.indexOf("stamen")===0)}Np.updateFx=function(e){for(var t=e._fullLayout,r=t._subplots[xx],n=0;n<r.length;n++){var i=t[r[n]]._subplot;i.updateFx(t)}}});var CGe=ye((Axr,kGe)=>{"use strict";var Txr=["*scattermapbox* trace is deprecated!","Please consider switching to the *scattermap* trace type and `map` subplots.","Learn more at: https://plotly.com/python/maplibre-migration/","as well as https://plotly.com/javascript/maplibre-migration/"].join(" ");kGe.exports={attributes:Wz(),supplyDefaults:NVe(),colorbar:$d(),formatLabels:rJ(),calc:pF(),plot:eGe(),hoverPoints:Jz().hoverPoints,eventData:nGe(),selectPoints:oGe(),styleOnSelect:function(e,t){if(t){var r=t[0].trace;r._glTrace.update(t)}},moduleType:"trace",name:"scattermapbox",basePlotModule:e7(),categories:["mapbox","gl","symbols","showLegend","scatter-like"],meta:{}}});var PGe=ye((Sxr,LGe)=>{"use strict";LGe.exports=CGe()});var pJ=ye((Mxr,IGe)=>{"use strict";var f1=t5(),aWt=Tu(),{hovertemplateAttrs:oWt,templatefallbackAttrs:sWt}=Ll(),lWt=Gl(),bx=Ao().extendFlat;IGe.exports=bx({locations:{valType:"data_array",editType:"calc"},z:{valType:"data_array",editType:"calc"},geojson:{valType:"any",editType:"calc"},featureidkey:bx({},f1.featureidkey,{}),below:{valType:"string",editType:"plot"},text:f1.text,hovertext:f1.hovertext,marker:{line:{color:bx({},f1.marker.line.color,{editType:"plot"}),width:bx({},f1.marker.line.width,{editType:"plot"}),editType:"calc"},opacity:bx({},f1.marker.opacity,{editType:"plot"}),editType:"calc"},selected:{marker:{opacity:bx({},f1.selected.marker.opacity,{editType:"plot"}),editType:"plot"},editType:"plot"},unselected:{marker:{opacity:bx({},f1.unselected.marker.opacity,{editType:"plot"}),editType:"plot"},editType:"plot"},hoverinfo:f1.hoverinfo,hovertemplate:oWt({},{keys:["properties"]}),hovertemplatefallback:sWt(),showlegend:bx({},lWt.showlegend,{dflt:!1})},aWt("",{cLetter:"z",editTypeOverride:"calc"}))});var DGe=ye((Exr,RGe)=>{"use strict";var Hk=Dr(),uWt=Qh(),cWt=pJ();RGe.exports=function(t,r,n,i){function a(c,f){return Hk.coerce(t,r,cWt,c,f)}var o=a("locations"),s=a("z"),l=a("geojson");if(!Hk.isArrayOrTypedArray(o)||!o.length||!Hk.isArrayOrTypedArray(s)||!s.length||!(typeof l=="string"&&l!==""||Hk.isPlainObject(l))){r.visible=!1;return}a("featureidkey"),r._length=Math.min(o.length,s.length),a("below"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var u=a("marker.line.width");u&&a("marker.line.color"),a("marker.opacity"),uWt(t,r,i,a,{prefix:"",cLetter:"z"}),Hk.coerceSelectionMarkerOpacity(r,a)}});var gJ=ye((kxr,OGe)=>{"use strict";var fWt=Eo(),h1=Dr(),hWt=tc(),dWt=So(),vWt=tx().makeBlank,FGe=ix();function pWt(e){var t=e[0].trace,r=t.visible===!0&&t._length!==0,n={layout:{visibility:"none"},paint:{}},i={layout:{visibility:"none"},paint:{}},a=t._opts={fill:n,line:i,geojson:vWt()};if(!r)return a;var o=FGe.extractTraceFeature(e);if(!o)return a;var s=hWt.makeColorScaleFuncFromTrace(t),l=t.marker,u=l.line||{},c;h1.isArrayOrTypedArray(l.opacity)&&(c=function(k){var E=k.mo;return fWt(E)?+h1.constrain(E,0,1):0});var f;h1.isArrayOrTypedArray(u.color)&&(f=function(k){return k.mlc});var h;h1.isArrayOrTypedArray(u.width)&&(h=function(k){return k.mlw});for(var d=0;d<e.length;d++){var v=e[d],_=v.fOut;if(_){var b=_.properties;b.fc=s(v.z),c&&(b.mo=c(v)),f&&(b.mlc=f(v)),h&&(b.mlw=h(v)),v.ct=b.ct,v._polygons=FGe.feature2polygons(_)}}var p=c?{type:"identity",property:"mo"}:l.opacity;return h1.extendFlat(n.paint,{"fill-color":{type:"identity",property:"fc"},"fill-opacity":p}),h1.extendFlat(i.paint,{"line-color":f?{type:"identity",property:"mlc"}:u.color,"line-width":h?{type:"identity",property:"mlw"}:u.width,"line-opacity":p}),n.layout.visibility="visible",i.layout.visibility="visible",a.geojson={type:"FeatureCollection",features:o},zGe(e),a}function zGe(e){var t=e[0].trace,r=t._opts,n;if(t.selectedpoints){for(var i=dWt.makeSelectedPointStyleFns(t),a=0;a<e.length;a++){var o=e[a];o.fOut&&(o.fOut.properties.mo2=i.selectedOpacityFn(o))}n={type:"identity",property:"mo2"}}else n=h1.isArrayOrTypedArray(t.marker.opacity)?{type:"identity",property:"mo"}:t.marker.opacity;return h1.extendFlat(r.fill.paint,{"fill-opacity":n}),h1.extendFlat(r.line.paint,{"line-opacity":n}),r}OGe.exports={convert:pWt,convertOnSelect:zGe}});var VGe=ye((Cxr,UGe)=>{"use strict";var BGe=gJ().convert,gWt=gJ().convertOnSelect,qGe=c1().traceLayerPrefix;function NGe(e,t){this.type="choroplethmapbox",this.subplot=e,this.uid=t,this.sourceId="source-"+t,this.layerList=[["fill",qGe+t+"-fill"],["line",qGe+t+"-line"]],this.below=null}var M5=NGe.prototype;M5.update=function(e){this._update(BGe(e)),e[0].trace._glTrace=this};M5.updateOnSelect=function(e){this._update(gWt(e))};M5._update=function(e){var t=this.subplot,r=this.layerList,n=t.belowLookup["trace-"+this.uid];t.map.getSource(this.sourceId).setData(e.geojson),n!==this.below&&(this._removeLayers(),this._addLayers(e,n),this.below=n);for(var i=0;i<r.length;i++){var a=r[i],o=a[0],s=a[1],l=e[o];t.setOptions(s,"setLayoutProperty",l.layout),l.layout.visibility==="visible"&&t.setOptions(s,"setPaintProperty",l.paint)}};M5._addLayers=function(e,t){for(var r=this.subplot,n=this.layerList,i=this.sourceId,a=0;a<n.length;a++){var o=n[a],s=o[0],l=e[s];r.addLayer({type:s,id:o[1],source:i,layout:l.layout,paint:l.paint},t)}};M5._removeLayers=function(){for(var e=this.subplot.map,t=this.layerList,r=t.length-1;r>=0;r--)e.removeLayer(t[r][1])};M5.dispose=function(){var e=this.subplot.map;this._removeLayers(),e.removeSource(this.sourceId)};UGe.exports=function(t,r){var n=r[0].trace,i=new NGe(t,n.uid),a=i.sourceId,o=BGe(r),s=i.below=t.belowLookup["trace-"+n.uid];return t.map.addSource(a,{type:"geojson",data:o.geojson}),i._addLayers(o,s),r[0].trace._glTrace=i,i}});var HGe=ye((Pxr,GGe)=>{"use strict";var Lxr=["*choroplethmapbox* trace is deprecated!","Please consider switching to the *choroplethmap* trace type and `map` subplots.","Learn more at: https://plotly.com/python/maplibre-migration/","as well as https://plotly.com/javascript/maplibre-migration/"].join(" ");GGe.exports={attributes:pJ(),supplyDefaults:DGe(),colorbar:S_(),calc:zF(),plot:VGe(),hoverPoints:qF(),eventData:BF(),selectPoints:NF(),styleOnSelect:function(e,t){if(t){var r=t[0].trace;r._glTrace.updateOnSelect(t)}},getBelow:function(e,t){for(var r=t.getMapLayers(),n=r.length-2;n>=0;n--){var i=r[n].id;if(typeof i=="string"&&i.indexOf("water")===0){for(var a=n+1;a<r.length;a++)if(i=r[a].id,typeof i=="string"&&i.indexOf("plotly-")===-1)return i}}},moduleType:"trace",name:"choroplethmapbox",basePlotModule:e7(),categories:["mapbox","gl","noOpacity","showLegend"],meta:{hr_name:"choropleth_mapbox"}}});var WGe=ye((Ixr,jGe)=>{"use strict";jGe.exports=HGe()});var yJ=ye((Rxr,ZGe)=>{"use strict";var mWt=Tu(),{hovertemplateAttrs:yWt,templatefallbackAttrs:_Wt}=Ll(),XGe=Gl(),t7=Wz(),mJ=Ao().extendFlat;ZGe.exports=mJ({lon:t7.lon,lat:t7.lat,z:{valType:"data_array",editType:"calc"},radius:{valType:"number",editType:"plot",arrayOk:!0,min:1,dflt:30},below:{valType:"string",editType:"plot"},text:t7.text,hovertext:t7.hovertext,hoverinfo:mJ({},XGe.hoverinfo,{flags:["lon","lat","z","text","name"]}),hovertemplate:yWt(),hovertemplatefallback:_Wt(),showlegend:mJ({},XGe.showlegend,{dflt:!1})},mWt("",{cLetter:"z",editTypeOverride:"calc"}))});var KGe=ye((Dxr,YGe)=>{"use strict";var xWt=Dr(),bWt=Qh(),wWt=yJ();YGe.exports=function(t,r,n,i){function a(u,c){return xWt.coerce(t,r,wWt,u,c)}var o=a("lon")||[],s=a("lat")||[],l=Math.min(o.length,s.length);if(!l){r.visible=!1;return}r._length=l,a("z"),a("radius"),a("below"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),bWt(t,r,i,a,{prefix:"",cLetter:"z"})}});var QGe=ye((Fxr,$Ge)=>{"use strict";var _J=Eo(),TWt=Dr().isArrayOrTypedArray,xJ=fs().BADNUM,AWt=gv(),JGe=Dr()._;$Ge.exports=function(t,r){for(var n=r._length,i=new Array(n),a=r.z,o=TWt(a)&&a.length,s=0;s<n;s++){var l=i[s]={},u=r.lon[s],c=r.lat[s];if(l.lonlat=_J(u)&&_J(c)?[+u,+c]:[xJ,xJ],o){var f=a[s];l.z=_J(f)?f:xJ}}return AWt(t,r,{vals:o?a:[0,1],containerStr:"",cLetter:"z"}),n&&(i[0].t={labels:{lat:JGe(t,"lat:")+" ",lon:JGe(t,"lon:")+" "}}),i}});var nHe=ye((zxr,iHe)=>{"use strict";var SWt=Eo(),bJ=Dr(),eHe=ka(),tHe=tc(),rHe=fs().BADNUM,MWt=tx().makeBlank;iHe.exports=function(t){var r=t[0].trace,n=r.visible===!0&&r._length!==0,i={layout:{visibility:"none"},paint:{}},a=r._opts={heatmap:i,geojson:MWt()};if(!n)return a;var o=[],s,l=r.z,u=r.radius,c=bJ.isArrayOrTypedArray(l)&&l.length,f=bJ.isArrayOrTypedArray(u);for(s=0;s<t.length;s++){var h=t[s],d=h.lonlat;if(d[0]!==rHe){var v={};if(c){var _=h.z;v.z=_!==rHe?_:0}f&&(v.r=SWt(u[s])&&u[s]>0?+u[s]:0),o.push({type:"Feature",geometry:{type:"Point",coordinates:d},properties:v})}}var b=tHe.extractOpts(r),p=b.reversescale?tHe.flipScale(b.colorscale):b.colorscale,k=p[0][1],E=eHe.opacity(k)<1?k:eHe.addOpacity(k,0),S=["interpolate",["linear"],["heatmap-density"],0,E];for(s=1;s<p.length;s++)S.push(p[s][0],p[s][1]);var L=["interpolate",["linear"],["get","z"],b.min,0,b.max,1];return bJ.extendFlat(a.heatmap.paint,{"heatmap-weight":c?L:1/(b.max-b.min),"heatmap-color":S,"heatmap-radius":f?{type:"identity",property:"r"}:r.radius,"heatmap-opacity":r.opacity}),a.geojson={type:"FeatureCollection",features:o},a.heatmap.layout.visibility="visible",a}});var lHe=ye((Oxr,sHe)=>{"use strict";var aHe=nHe(),EWt=c1().traceLayerPrefix;function oHe(e,t){this.type="densitymapbox",this.subplot=e,this.uid=t,this.sourceId="source-"+t,this.layerList=[["heatmap",EWt+t+"-heatmap"]],this.below=null}var r7=oHe.prototype;r7.update=function(e){var t=this.subplot,r=this.layerList,n=aHe(e),i=t.belowLookup["trace-"+this.uid];t.map.getSource(this.sourceId).setData(n.geojson),i!==this.below&&(this._removeLayers(),this._addLayers(n,i),this.below=i);for(var a=0;a<r.length;a++){var o=r[a],s=o[0],l=o[1],u=n[s];t.setOptions(l,"setLayoutProperty",u.layout),u.layout.visibility==="visible"&&t.setOptions(l,"setPaintProperty",u.paint)}};r7._addLayers=function(e,t){for(var r=this.subplot,n=this.layerList,i=this.sourceId,a=0;a<n.length;a++){var o=n[a],s=o[0],l=e[s];r.addLayer({type:s,id:o[1],source:i,layout:l.layout,paint:l.paint},t)}};r7._removeLayers=function(){for(var e=this.subplot.map,t=this.layerList,r=t.length-1;r>=0;r--)e.removeLayer(t[r][1])};r7.dispose=function(){var e=this.subplot.map;this._removeLayers(),e.removeSource(this.sourceId)};sHe.exports=function(t,r){var n=r[0].trace,i=new oHe(t,n.uid),a=i.sourceId,o=aHe(r),s=i.below=t.belowLookup["trace-"+n.uid];return t.map.addSource(a,{type:"geojson",data:o.geojson}),i._addLayers(o,s),i}});var cHe=ye((qxr,uHe)=>{"use strict";var kWt=ho(),CWt=Jz().hoverPoints,LWt=Jz().getExtraText;uHe.exports=function(t,r,n){var i=CWt(t,r,n);if(i){var a=i[0],o=a.cd,s=o[0].trace,l=o[a.index];if(delete a.color,"z"in l){var u=a.subplot.mockAxis;a.z=l.z,a.zLabel=kWt.tickText(u,u.c2l(l.z),"hover").text}return a.extraText=LWt(s,l,o[0].t.labels),[a]}}});var hHe=ye((Bxr,fHe)=>{"use strict";fHe.exports=function(t,r){return t.lon=r.lon,t.lat=r.lat,t.z=r.z,t}});var vHe=ye((Uxr,dHe)=>{"use strict";var Nxr=["*densitymapbox* trace is deprecated!","Please consider switching to the *densitymap* trace type and `map` subplots.","Learn more at: https://plotly.com/python/maplibre-migration/","as well as https://plotly.com/javascript/maplibre-migration/"].join(" ");dHe.exports={attributes:yJ(),supplyDefaults:KGe(),colorbar:S_(),formatLabels:rJ(),calc:QGe(),plot:lHe(),hoverPoints:cHe(),eventData:hHe(),getBelow:function(e,t){for(var r=t.getMapLayers(),n=0;n<r.length;n++){var i=r[n],a=i.id;if(i.type==="symbol"&&typeof a=="string"&&a.indexOf("plotly-")===-1)return a}},moduleType:"trace",name:"densitymapbox",basePlotModule:e7(),categories:["mapbox","gl","showLegend"],meta:{hr_name:"density_mapbox"}}});var gHe=ye((Vxr,pHe)=>{"use strict";pHe.exports=vHe()});var yHe=ye((Gxr,mHe)=>{mHe.exports={version:8,name:"orto",metadata:{"maputnik:renderer":"mlgljs"},center:[1.537786,41.837539],zoom:12,bearing:0,pitch:0,light:{anchor:"viewport",color:"white",intensity:.4,position:[1.15,45,30]},sources:{ortoEsri:{type:"raster",tiles:["https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"],tileSize:256,maxzoom:18,attribution:"ESRI &copy; <a href='http://www.esri.com'>ESRI</a>"},ortoInstaMaps:{type:"raster",tiles:["https://tilemaps.icgc.cat/mapfactory/wmts/orto_8_12/CAT3857/{z}/{x}/{y}.png"],tileSize:256,maxzoom:13},ortoICGC:{type:"raster",tiles:["https://geoserveis.icgc.cat/icc_mapesmultibase/noutm/wmts/orto/GRID3857/{z}/{x}/{y}.jpeg"],tileSize:256,minzoom:13.1,maxzoom:20},openmaptiles:{type:"vector",url:"https://geoserveis.icgc.cat/contextmaps/basemap.json"}},sprite:"https://geoserveis.icgc.cat/contextmaps/sprites/sprite@1",glyphs:"https://geoserveis.icgc.cat/contextmaps/glyphs/{fontstack}/{range}.pbf",layers:[{id:"background",type:"background",paint:{"background-color":"#F4F9F4"}},{id:"ortoEsri",type:"raster",source:"ortoEsri",maxzoom:16,layout:{visibility:"visible"}},{id:"ortoICGC",type:"raster",source:"ortoICGC",minzoom:13.1,maxzoom:19,layout:{visibility:"visible"}},{id:"ortoInstaMaps",type:"raster",source:"ortoInstaMaps",maxzoom:13,layout:{visibility:"visible"}},{id:"waterway_tunnel",type:"line",source:"openmaptiles","source-layer":"waterway",minzoom:14,filter:["all",["in","class","river","stream","canal"],["==","brunnel","tunnel"]],layout:{"line-cap":"round"},paint:{"line-color":"#a0c8f0","line-width":{base:1.3,stops:[[13,.5],[20,6]]},"line-dasharray":[2,4]}},{id:"waterway-other",type:"line",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"waterway",filter:["!in","class","canal","river","stream"],layout:{"line-cap":"round"},paint:{"line-color":"#a0c8f0","line-width":{base:1.3,stops:[[13,.5],[20,2]]}}},{id:"waterway-stream-canal",type:"line",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"waterway",filter:["all",["in","class","canal","stream"],["!=","brunnel","tunnel"]],layout:{"line-cap":"round"},paint:{"line-color":"#a0c8f0","line-width":{base:1.3,stops:[[13,.5],[20,6]]}}},{id:"waterway-river",type:"line",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"waterway",filter:["all",["==","class","river"],["!=","brunnel","tunnel"]],layout:{"line-cap":"round"},paint:{"line-color":"#a0c8f0","line-width":{base:1.2,stops:[[10,.8],[20,4]]},"line-opacity":.5}},{id:"water-offset",type:"fill",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"water",maxzoom:8,filter:["==","$type","Polygon"],layout:{visibility:"visible"},paint:{"fill-opacity":0,"fill-color":"#a0c8f0","fill-translate":{base:1,stops:[[6,[2,0]],[8,[0,0]]]}}},{id:"water",type:"fill",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"water",layout:{visibility:"visible"},paint:{"fill-color":"hsl(210, 67%, 85%)","fill-opacity":0}},{id:"water-pattern",type:"fill",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"water",layout:{visibility:"visible"},paint:{"fill-translate":[0,2.5],"fill-pattern":"wave","fill-opacity":1}},{id:"landcover-ice-shelf",type:"fill",metadata:{"mapbox:group":"1444849382550.77"},source:"openmaptiles","source-layer":"landcover",filter:["==","subclass","ice_shelf"],layout:{visibility:"visible"},paint:{"fill-color":"#fff","fill-opacity":{base:1,stops:[[0,.9],[10,.3]]}}},{id:"tunnel-service-track-casing",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","service","track"]],layout:{"line-join":"round"},paint:{"line-color":"#cfcdca","line-dasharray":[.5,.25],"line-width":{base:1.2,stops:[[15,1],[16,4],[20,11]]}}},{id:"tunnel-minor-casing",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["==","class","minor"]],layout:{"line-join":"round"},paint:{"line-color":"#cfcdca","line-opacity":{stops:[[12,0],[12.5,1]]},"line-width":{base:1.2,stops:[[12,.5],[13,1],[14,4],[20,15]]}}},{id:"tunnel-secondary-tertiary-casing",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","secondary","tertiary"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[8,1.5],[20,17]]}}},{id:"tunnel-trunk-primary-casing",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","primary","trunk"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-width":{base:1.2,stops:[[5,.4],[6,.6],[7,1.5],[20,22]]},"line-opacity":.7}},{id:"tunnel-motorway-casing",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["==","class","motorway"]],layout:{"line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-dasharray":[.5,.25],"line-width":{base:1.2,stops:[[5,.4],[6,.6],[7,1.5],[20,22]]},"line-opacity":.5}},{id:"tunnel-path",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","brunnel","tunnel"],["==","class","path"]]],paint:{"line-color":"#cba","line-dasharray":[1.5,.75],"line-width":{base:1.2,stops:[[15,1.2],[20,4]]}}},{id:"tunnel-service-track",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","service","track"]],layout:{"line-join":"round"},paint:{"line-color":"#fff","line-width":{base:1.2,stops:[[15.5,0],[16,2],[20,7.5]]}}},{id:"tunnel-minor",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["==","class","minor_road"]],layout:{"line-join":"round"},paint:{"line-color":"#fff","line-opacity":1,"line-width":{base:1.2,stops:[[13.5,0],[14,2.5],[20,11.5]]}}},{id:"tunnel-secondary-tertiary",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","secondary","tertiary"]],layout:{"line-join":"round"},paint:{"line-color":"#fff4c6","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,10]]}}},{id:"tunnel-trunk-primary",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["in","class","primary","trunk"]],layout:{"line-join":"round"},paint:{"line-color":"#fff4c6","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]},"line-opacity":.5}},{id:"tunnel-motorway",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["==","class","motorway"]],layout:{"line-join":"round",visibility:"visible"},paint:{"line-color":"#ffdaa6","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]},"line-opacity":.5}},{id:"tunnel-railway",type:"line",metadata:{"mapbox:group":"1444849354174.1904"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","tunnel"],["==","class","rail"]],paint:{"line-color":"#bbb","line-width":{base:1.4,stops:[[14,.4],[15,.75],[20,2]]},"line-dasharray":[2,2]}},{id:"ferry",type:"line",source:"openmaptiles","source-layer":"transportation",filter:["all",["in","class","ferry"]],layout:{"line-join":"round",visibility:"visible"},paint:{"line-color":"rgba(108, 159, 182, 1)","line-width":1.1,"line-dasharray":[2,2]}},{id:"aeroway-taxiway-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"aeroway",minzoom:12,filter:["all",["in","class","taxiway"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"rgba(153, 153, 153, 1)","line-width":{base:1.5,stops:[[11,2],[17,12]]},"line-opacity":1}},{id:"aeroway-runway-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"aeroway",minzoom:12,filter:["all",["in","class","runway"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"rgba(153, 153, 153, 1)","line-width":{base:1.5,stops:[[11,5],[17,55]]},"line-opacity":1}},{id:"aeroway-taxiway",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"aeroway",minzoom:4,filter:["all",["in","class","taxiway"],["==","$type","LineString"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"rgba(255, 255, 255, 1)","line-width":{base:1.5,stops:[[11,1],[17,10]]},"line-opacity":{base:1,stops:[[11,0],[12,1]]}}},{id:"aeroway-runway",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"aeroway",minzoom:4,filter:["all",["in","class","runway"],["==","$type","LineString"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"rgba(255, 255, 255, 1)","line-width":{base:1.5,stops:[[11,4],[17,50]]},"line-opacity":{base:1,stops:[[11,0],[12,1]]}}},{id:"highway-motorway-link-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:12,filter:["all",["!in","brunnel","bridge","tunnel"],["==","class","motorway_link"]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[12,1],[13,3],[14,4],[20,15]]}}},{id:"highway-link-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:13,filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","primary_link","secondary_link","tertiary_link","trunk_link"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[12,1],[13,3],[14,4],[20,15]]}}},{id:"highway-minor-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!=","brunnel","tunnel"],["in","class","minor","service","track"]]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"#cfcdca","line-opacity":{stops:[[12,0],[12.5,0]]},"line-width":{base:1.2,stops:[[12,.5],[13,1],[14,4],[20,15]]}}},{id:"highway-secondary-tertiary-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","secondary","tertiary"]],layout:{"line-cap":"butt","line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-opacity":.5,"line-width":{base:1.2,stops:[[8,1.5],[20,17]]}}},{id:"highway-primary-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:5,filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","primary"]],layout:{"line-cap":"butt","line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-opacity":{stops:[[7,0],[8,.6]]},"line-width":{base:1.2,stops:[[7,0],[8,.6],[9,1.5],[20,22]]}}},{id:"highway-trunk-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:5,filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","trunk"]],layout:{"line-cap":"butt","line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-opacity":{stops:[[5,0],[6,.5]]},"line-width":{base:1.2,stops:[[5,0],[6,.6],[7,1.5],[20,22]]}}},{id:"highway-motorway-casing",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:4,filter:["all",["!in","brunnel","bridge","tunnel"],["==","class","motorway"]],layout:{"line-cap":"butt","line-join":"round",visibility:"visible"},paint:{"line-color":"#e9ac77","line-width":{base:1.2,stops:[[4,0],[5,.4],[6,.6],[7,1.5],[20,22]]},"line-opacity":{stops:[[4,0],[5,.5]]}}},{id:"highway-path",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!in","brunnel","bridge","tunnel"],["==","class","path"]]],paint:{"line-color":"#cba","line-dasharray":[1.5,.75],"line-width":{base:1.2,stops:[[15,1.2],[20,4]]}}},{id:"highway-motorway-link",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:12,filter:["all",["!in","brunnel","bridge","tunnel"],["==","class","motorway_link"]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"#fc8","line-width":{base:1.2,stops:[[12.5,0],[13,1.5],[14,2.5],[20,11.5]]}}},{id:"highway-link",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:13,filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","primary_link","secondary_link","tertiary_link","trunk_link"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[12.5,0],[13,1.5],[14,2.5],[20,11.5]]}}},{id:"highway-minor",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!=","brunnel","tunnel"],["in","class","minor","service","track"]]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"#fff","line-opacity":.5,"line-width":{base:1.2,stops:[[13.5,0],[14,2.5],[20,11.5]]}}},{id:"highway-secondary-tertiary",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["!in","brunnel","bridge","tunnel"],["in","class","secondary","tertiary"]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[6.5,0],[8,.5],[20,13]]},"line-opacity":.5}},{id:"highway-primary",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!in","brunnel","bridge","tunnel"],["in","class","primary"]]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[8.5,0],[9,.5],[20,18]]},"line-opacity":0}},{id:"highway-trunk",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!in","brunnel","bridge","tunnel"],["in","class","trunk"]]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]},"line-opacity":.5}},{id:"highway-motorway",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",minzoom:5,filter:["all",["==","$type","LineString"],["all",["!in","brunnel","bridge","tunnel"],["==","class","motorway"]]],layout:{"line-cap":"round","line-join":"round",visibility:"visible"},paint:{"line-color":"#fc8","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]},"line-opacity":.5}},{id:"railway-transit",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","class","transit"],["!in","brunnel","tunnel"]]],layout:{visibility:"visible"},paint:{"line-color":"hsla(0, 0%, 73%, 0.77)","line-width":{base:1.4,stops:[[14,.4],[20,1]]}}},{id:"railway-transit-hatching",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","class","transit"],["!in","brunnel","tunnel"]]],layout:{visibility:"visible"},paint:{"line-color":"hsla(0, 0%, 73%, 0.68)","line-dasharray":[.2,8],"line-width":{base:1.4,stops:[[14.5,0],[15,2],[20,6]]}}},{id:"railway-service",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","class","rail"],["has","service"]]],paint:{"line-color":"hsla(0, 0%, 73%, 0.77)","line-width":{base:1.4,stops:[[14,.4],[20,1]]}}},{id:"railway-service-hatching",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","class","rail"],["has","service"]]],layout:{visibility:"visible"},paint:{"line-color":"hsla(0, 0%, 73%, 0.68)","line-dasharray":[.2,8],"line-width":{base:1.4,stops:[[14.5,0],[15,2],[20,6]]}}},{id:"railway",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!has","service"],["!in","brunnel","bridge","tunnel"],["==","class","rail"]]],paint:{"line-color":"#bbb","line-width":{base:1.4,stops:[[14,.4],[15,.75],[20,2]]}}},{id:"railway-hatching",type:"line",metadata:{"mapbox:group":"1444849345966.4436"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["!has","service"],["!in","brunnel","bridge","tunnel"],["==","class","rail"]]],paint:{"line-color":"#bbb","line-dasharray":[.2,8],"line-width":{base:1.4,stops:[[14.5,0],[15,3],[20,8]]}}},{id:"bridge-motorway-link-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","motorway_link"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[12,1],[13,3],[14,4],[20,15]]}}},{id:"bridge-link-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","primary_link","secondary_link","tertiary_link","trunk_link"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[12,1],[13,3],[14,4],[20,15]]}}},{id:"bridge-secondary-tertiary-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","secondary","tertiary"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-opacity":1,"line-width":{base:1.2,stops:[[8,1.5],[20,28]]}}},{id:"bridge-trunk-primary-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","primary","trunk"]],layout:{"line-join":"round"},paint:{"line-color":"hsl(28, 76%, 67%)","line-width":{base:1.2,stops:[[5,.4],[6,.6],[7,1.5],[20,26]]}}},{id:"bridge-motorway-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","motorway"]],layout:{"line-join":"round"},paint:{"line-color":"#e9ac77","line-width":{base:1.2,stops:[[5,.4],[6,.6],[7,1.5],[20,22]]},"line-opacity":.5}},{id:"bridge-path-casing",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","brunnel","bridge"],["==","class","path"]]],paint:{"line-color":"#f8f4f0","line-width":{base:1.2,stops:[[15,1.2],[20,18]]}}},{id:"bridge-path",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","$type","LineString"],["all",["==","brunnel","bridge"],["==","class","path"]]],paint:{"line-color":"#cba","line-width":{base:1.2,stops:[[15,1.2],[20,4]]},"line-dasharray":[1.5,.75]}},{id:"bridge-motorway-link",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","motorway_link"]],layout:{"line-join":"round"},paint:{"line-color":"#fc8","line-width":{base:1.2,stops:[[12.5,0],[13,1.5],[14,2.5],[20,11.5]]}}},{id:"bridge-link",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","primary_link","secondary_link","tertiary_link","trunk_link"]],layout:{"line-join":"round"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[12.5,0],[13,1.5],[14,2.5],[20,11.5]]}}},{id:"bridge-secondary-tertiary",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","secondary","tertiary"]],layout:{"line-join":"round"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,20]]}}},{id:"bridge-trunk-primary",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["in","class","primary","trunk"]],layout:{"line-join":"round"},paint:{"line-color":"#fea","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]}}},{id:"bridge-motorway",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","motorway"]],layout:{"line-join":"round"},paint:{"line-color":"#fc8","line-width":{base:1.2,stops:[[6.5,0],[7,.5],[20,18]]},"line-opacity":.5}},{id:"bridge-railway",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","rail"]],paint:{"line-color":"#bbb","line-width":{base:1.4,stops:[[14,.4],[15,.75],[20,2]]}}},{id:"bridge-railway-hatching",type:"line",metadata:{"mapbox:group":"1444849334699.1902"},source:"openmaptiles","source-layer":"transportation",filter:["all",["==","brunnel","bridge"],["==","class","rail"]],paint:{"line-color":"#bbb","line-dasharray":[.2,8],"line-width":{base:1.4,stops:[[14.5,0],[15,3],[20,8]]}}},{id:"cablecar",type:"line",source:"openmaptiles","source-layer":"transportation",minzoom:13,filter:["==","class","cable_car"],layout:{visibility:"visible","line-cap":"round"},paint:{"line-color":"hsl(0, 0%, 70%)","line-width":{base:1,stops:[[11,1],[19,2.5]]}}},{id:"cablecar-dash",type:"line",source:"openmaptiles","source-layer":"transportation",minzoom:13,filter:["==","class","cable_car"],layout:{visibility:"visible","line-cap":"round"},paint:{"line-color":"hsl(0, 0%, 70%)","line-width":{base:1,stops:[[11,3],[19,5.5]]},"line-dasharray":[2,3]}},{id:"boundary-land-level-4",type:"line",source:"openmaptiles","source-layer":"boundary",filter:["all",[">=","admin_level",4],["<=","admin_level",8],["!=","maritime",1]],layout:{"line-join":"round"},paint:{"line-color":"#9e9cab","line-dasharray":[3,1,1,1],"line-width":{base:1.4,stops:[[4,.4],[5,1],[12,3]]},"line-opacity":.6}},{id:"boundary-land-level-2",type:"line",source:"openmaptiles","source-layer":"boundary",filter:["all",["==","admin_level",2],["!=","maritime",1],["!=","disputed",1]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"hsl(248, 7%, 66%)","line-width":{base:1,stops:[[0,.6],[4,1.4],[5,2],[12,2]]}}},{id:"boundary-land-disputed",type:"line",source:"openmaptiles","source-layer":"boundary",filter:["all",["!=","maritime",1],["==","disputed",1]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"hsl(248, 7%, 70%)","line-dasharray":[1,3],"line-width":{base:1,stops:[[0,.6],[4,1.4],[5,2],[12,8]]}}},{id:"boundary-water",type:"line",source:"openmaptiles","source-layer":"boundary",filter:["all",["in","admin_level",2,4],["==","maritime",1]],layout:{"line-cap":"round","line-join":"round"},paint:{"line-color":"rgba(154, 189, 214, 1)","line-width":{base:1,stops:[[0,.6],[4,1],[5,1],[12,1]]},"line-opacity":{stops:[[6,0],[10,0]]}}},{id:"waterway-name",type:"symbol",source:"openmaptiles","source-layer":"waterway",minzoom:13,filter:["all",["==","$type","LineString"],["has","name"]],layout:{"text-font":["Noto Sans Italic"],"text-size":14,"text-field":"{name:latin} {name:nonlatin}","text-max-width":5,"text-rotation-alignment":"map","symbol-placement":"line","text-letter-spacing":.2,"symbol-spacing":350},paint:{"text-color":"#74aee9","text-halo-width":1.5,"text-halo-color":"rgba(255,255,255,0.7)"}},{id:"water-name-lakeline",type:"symbol",source:"openmaptiles","source-layer":"water_name",filter:["==","$type","LineString"],layout:{"text-font":["Noto Sans Italic"],"text-size":14,"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":5,"text-rotation-alignment":"map","symbol-placement":"line","symbol-spacing":350,"text-letter-spacing":.2},paint:{"text-color":"#74aee9","text-halo-width":1.5,"text-halo-color":"rgba(255,255,255,0.7)"}},{id:"water-name-ocean",type:"symbol",source:"openmaptiles","source-layer":"water_name",filter:["all",["==","$type","Point"],["==","class","ocean"]],layout:{"text-font":["Noto Sans Italic"],"text-size":14,"text-field":"{name:latin}","text-max-width":5,"text-rotation-alignment":"map","symbol-placement":"point","symbol-spacing":350,"text-letter-spacing":.2},paint:{"text-color":"#74aee9","text-halo-width":1.5,"text-halo-color":"rgba(255,255,255,0.7)"}},{id:"water-name-other",type:"symbol",source:"openmaptiles","source-layer":"water_name",filter:["all",["==","$type","Point"],["!in","class","ocean"]],layout:{"text-font":["Noto Sans Italic"],"text-size":{stops:[[0,10],[6,14]]},"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":5,"text-rotation-alignment":"map","symbol-placement":"point","symbol-spacing":350,"text-letter-spacing":.2,visibility:"visible"},paint:{"text-color":"#74aee9","text-halo-width":1.5,"text-halo-color":"rgba(255,255,255,0.7)"}},{id:"poi-level-3",type:"symbol",source:"openmaptiles","source-layer":"poi",minzoom:16,filter:["all",["==","$type","Point"],[">=","rank",25]],layout:{"text-padding":2,"text-font":["Noto Sans Regular"],"text-anchor":"top","icon-image":"{class}_11","text-field":`{name:latin}
+{name:nonlatin}`,"text-offset":[0,.6],"text-size":12,"text-max-width":9},paint:{"text-halo-blur":.5,"text-color":"#666","text-halo-width":1,"text-halo-color":"#ffffff"}},{id:"poi-level-2",type:"symbol",source:"openmaptiles","source-layer":"poi",minzoom:15,filter:["all",["==","$type","Point"],["<=","rank",24],[">=","rank",15]],layout:{"text-padding":2,"text-font":["Noto Sans Regular"],"text-anchor":"top","icon-image":"{class}_11","text-field":`{name:latin}
+{name:nonlatin}`,"text-offset":[0,.6],"text-size":12,"text-max-width":9},paint:{"text-halo-blur":.5,"text-color":"#666","text-halo-width":1,"text-halo-color":"#ffffff"}},{id:"poi-level-1",type:"symbol",source:"openmaptiles","source-layer":"poi",minzoom:14,filter:["all",["==","$type","Point"],["<=","rank",14],["has","name"]],layout:{"text-padding":2,"text-font":["Noto Sans Regular"],"text-anchor":"top","icon-image":"{class}_11","text-field":`{name:latin}
+{name:nonlatin}`,"text-offset":[0,.6],"text-size":11,"text-max-width":9},paint:{"text-halo-blur":.5,"text-color":"rgba(191, 228, 172, 1)","text-halo-width":1,"text-halo-color":"rgba(30, 29, 29, 1)"}},{id:"poi-railway",type:"symbol",source:"openmaptiles","source-layer":"poi",minzoom:13,filter:["all",["==","$type","Point"],["has","name"],["==","class","railway"],["==","subclass","station"]],layout:{"text-padding":2,"text-font":["Noto Sans Regular"],"text-anchor":"top","icon-image":"{class}_11","text-field":`{name:latin}
+{name:nonlatin}`,"text-offset":[0,.6],"text-size":12,"text-max-width":9,"icon-optional":!1,"icon-ignore-placement":!1,"icon-allow-overlap":!1,"text-ignore-placement":!1,"text-allow-overlap":!1,"text-optional":!0},paint:{"text-halo-blur":.5,"text-color":"#666","text-halo-width":1,"text-halo-color":"#ffffff"}},{id:"road_oneway",type:"symbol",source:"openmaptiles","source-layer":"transportation",minzoom:15,filter:["all",["==","oneway",1],["in","class","motorway","trunk","primary","secondary","tertiary","minor","service"]],layout:{"symbol-placement":"line","icon-image":"oneway","symbol-spacing":75,"icon-padding":2,"icon-rotation-alignment":"map","icon-rotate":90,"icon-size":{stops:[[15,.5],[19,1]]}},paint:{"icon-opacity":.5}},{id:"road_oneway_opposite",type:"symbol",source:"openmaptiles","source-layer":"transportation",minzoom:15,filter:["all",["==","oneway",-1],["in","class","motorway","trunk","primary","secondary","tertiary","minor","service"]],layout:{"symbol-placement":"line","icon-image":"oneway","symbol-spacing":75,"icon-padding":2,"icon-rotation-alignment":"map","icon-rotate":-90,"icon-size":{stops:[[15,.5],[19,1]]}},paint:{"icon-opacity":.5}},{id:"highway-name-path",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:15.5,filter:["==","class","path"],layout:{"text-size":{base:1,stops:[[13,12],[14,13]]},"text-font":["Noto Sans Regular"],"text-field":"{name:latin} {name:nonlatin}","symbol-placement":"line","text-rotation-alignment":"map"},paint:{"text-halo-color":"#f8f4f0","text-color":"hsl(30, 23%, 62%)","text-halo-width":.5}},{id:"highway-name-minor",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:15,filter:["all",["==","$type","LineString"],["in","class","minor","service","track"]],layout:{"text-size":{base:1,stops:[[13,12],[14,13]]},"text-font":["Noto Sans Regular"],"text-field":"{name:latin} {name:nonlatin}","symbol-placement":"line","text-rotation-alignment":"map"},paint:{"text-halo-blur":.5,"text-color":"#765","text-halo-width":1}},{id:"highway-name-major",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:12.2,filter:["in","class","primary","secondary","tertiary","trunk"],layout:{"text-size":{base:1,stops:[[13,12],[14,13]]},"text-font":["Noto Sans Regular"],"text-field":"{name:latin} {name:nonlatin}","symbol-placement":"line","text-rotation-alignment":"map"},paint:{"text-halo-blur":.5,"text-color":"#765","text-halo-width":1}},{id:"highway-shield",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:8,filter:["all",["<=","ref_length",6],["==","$type","LineString"],["!in","network","us-interstate","us-highway","us-state"]],layout:{"text-size":10,"icon-image":"road_{ref_length}","icon-rotation-alignment":"viewport","symbol-spacing":200,"text-font":["Noto Sans Regular"],"symbol-placement":{base:1,stops:[[10,"point"],[11,"line"]]},"text-rotation-alignment":"viewport","icon-size":1,"text-field":"{ref}"},paint:{"text-opacity":1,"text-color":"rgba(20, 19, 19, 1)","text-halo-color":"rgba(230, 221, 221, 0)","text-halo-width":2,"icon-color":"rgba(183, 18, 18, 1)","icon-opacity":.3,"icon-halo-color":"rgba(183, 55, 55, 0)"}},{id:"highway-shield-us-interstate",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:7,filter:["all",["<=","ref_length",6],["==","$type","LineString"],["in","network","us-interstate"]],layout:{"text-size":10,"icon-image":"{network}_{ref_length}","icon-rotation-alignment":"viewport","symbol-spacing":200,"text-font":["Noto Sans Regular"],"symbol-placement":{base:1,stops:[[7,"point"],[7,"line"],[8,"line"]]},"text-rotation-alignment":"viewport","icon-size":1,"text-field":"{ref}"},paint:{"text-color":"rgba(0, 0, 0, 1)"}},{id:"highway-shield-us-other",type:"symbol",source:"openmaptiles","source-layer":"transportation_name",minzoom:9,filter:["all",["<=","ref_length",6],["==","$type","LineString"],["in","network","us-highway","us-state"]],layout:{"text-size":10,"icon-image":"{network}_{ref_length}","icon-rotation-alignment":"viewport","symbol-spacing":200,"text-font":["Noto Sans Regular"],"symbol-placement":{base:1,stops:[[10,"point"],[11,"line"]]},"text-rotation-alignment":"viewport","icon-size":1,"text-field":"{ref}"},paint:{"text-color":"rgba(0, 0, 0, 1)"}},{id:"place-other",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",minzoom:12,filter:["!in","class","city","town","village","country","continent"],layout:{"text-letter-spacing":.1,"text-size":{base:1.2,stops:[[12,10],[15,14]]},"text-font":["Noto Sans Bold"],"text-field":`{name:latin}
+{name:nonlatin}`,"text-transform":"uppercase","text-max-width":9,visibility:"visible"},paint:{"text-color":"rgba(255,255,255,1)","text-halo-width":1.2,"text-halo-color":"rgba(57, 28, 28, 1)"}},{id:"place-village",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",minzoom:10,filter:["==","class","village"],layout:{"text-font":["Noto Sans Regular"],"text-size":{base:1.2,stops:[[10,12],[15,16]]},"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":8,visibility:"visible"},paint:{"text-color":"rgba(255, 255, 255, 1)","text-halo-width":1.2,"text-halo-color":"rgba(10, 9, 9, 0.8)"}},{id:"place-town",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["==","class","town"],layout:{"text-font":["Noto Sans Regular"],"text-size":{base:1.2,stops:[[10,14],[15,24]]},"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":8,visibility:"visible"},paint:{"text-color":"rgba(255, 255, 255, 1)","text-halo-width":1.2,"text-halo-color":"rgba(22, 22, 22, 0.8)"}},{id:"place-city",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["!=","capital",2],["==","class","city"]],layout:{"text-font":["Noto Sans Regular"],"text-size":{base:1.2,stops:[[7,14],[11,24]]},"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":8,visibility:"visible"},paint:{"text-color":"rgba(0, 0, 0, 1)","text-halo-width":1.2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-city-capital",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["==","capital",2],["==","class","city"]],layout:{"text-font":["Noto Sans Regular"],"text-size":{base:1.2,stops:[[7,14],[11,24]]},"text-field":`{name:latin}
+{name:nonlatin}`,"text-max-width":8,"icon-image":"star_11","text-offset":[.4,0],"icon-size":.8,"text-anchor":"left",visibility:"visible"},paint:{"text-color":"#333","text-halo-width":1.2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-country-other",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["==","class","country"],[">=","rank",3],["!has","iso_a2"]],layout:{"text-font":["Noto Sans Italic"],"text-field":"{name:latin}","text-size":{stops:[[3,11],[7,17]]},"text-transform":"uppercase","text-max-width":6.25,visibility:"visible"},paint:{"text-halo-blur":1,"text-color":"#334","text-halo-width":2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-country-3",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["==","class","country"],[">=","rank",3],["has","iso_a2"]],layout:{"text-font":["Noto Sans Bold"],"text-field":"{name:latin}","text-size":{stops:[[3,11],[7,17]]},"text-transform":"uppercase","text-max-width":6.25,visibility:"visible"},paint:{"text-halo-blur":1,"text-color":"#334","text-halo-width":2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-country-2",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["==","class","country"],["==","rank",2],["has","iso_a2"]],layout:{"text-font":["Noto Sans Bold"],"text-field":"{name:latin}","text-size":{stops:[[2,11],[5,17]]},"text-transform":"uppercase","text-max-width":6.25,visibility:"visible"},paint:{"text-halo-blur":1,"text-color":"#334","text-halo-width":2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-country-1",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",filter:["all",["==","class","country"],["==","rank",1],["has","iso_a2"]],layout:{"text-font":["Noto Sans Bold"],"text-field":"{name:latin}","text-size":{stops:[[1,11],[4,17]]},"text-transform":"uppercase","text-max-width":6.25,visibility:"visible"},paint:{"text-halo-blur":1,"text-color":"#334","text-halo-width":2,"text-halo-color":"rgba(255,255,255,0.8)"}},{id:"place-continent",type:"symbol",metadata:{"mapbox:group":"1444849242106.713"},source:"openmaptiles","source-layer":"place",maxzoom:1,filter:["==","class","continent"],layout:{"text-font":["Noto Sans Bold"],"text-field":"{name:latin}","text-size":14,"text-max-width":6.25,"text-transform":"uppercase",visibility:"visible"},paint:{"text-halo-blur":1,"text-color":"#334","text-halo-width":2,"text-halo-color":"rgba(255,255,255,0.8)"}}],id:"qebnlkra6"}});var xHe=ye((Hxr,_He)=>{_He.exports={version:8,name:"orto",metadata:{},center:[1.537786,41.837539],zoom:12,bearing:0,pitch:0,light:{anchor:"viewport",color:"white",intensity:.4,position:[1.15,45,30]},sources:{ortoEsri:{type:"raster",tiles:["https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"],tileSize:256,maxzoom:18,attribution:"ESRI &copy; <a href='http://www.esri.com'>ESRI</a>"},ortoInstaMaps:{type:"raster",tiles:["https://tilemaps.icgc.cat/mapfactory/wmts/orto_8_12/CAT3857/{z}/{x}/{y}.png"],tileSize:256,maxzoom:13},ortoICGC:{type:"raster",tiles:["https://geoserveis.icgc.cat/icc_mapesmultibase/noutm/wmts/orto/GRID3857/{z}/{x}/{y}.jpeg"],tileSize:256,minzoom:13.1,maxzoom:20},openmaptiles:{type:"vector",url:"https://geoserveis.icgc.cat/contextmaps/basemap.json"}},sprite:"https://geoserveis.icgc.cat/contextmaps/sprites/sprite@1",glyphs:"https://geoserveis.icgc.cat/contextmaps/glyphs/{fontstack}/{range}.pbf",layers:[{id:"background",type:"background",paint:{"background-color":"#F4F9F4"}},{id:"ortoEsri",type:"raster",source:"ortoEsri",maxzoom:16,layout:{visibility:"visible"}},{id:"ortoICGC",type:"raster",source:"ortoICGC",minzoom:13.1,maxzoom:19,layout:{visibility:"visible"}},{id:"ortoInstaMaps",type:"raster",source:"ortoInstaMaps",maxzoom:13,layout:{visibility:"visible"}}]}});var wx=ye((jxr,SHe)=>{"use strict";var PWt=Z1(),IWt=yHe(),RWt=xHe(),DWt='\xA9 <a target="_blank" href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors',bHe="https://basemaps.cartocdn.com/gl/positron-gl-style/style.json",wHe="https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json",i7="https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json",FWt="https://basemaps.cartocdn.com/gl/positron-nolabels-gl-style/style.json",zWt="https://basemaps.cartocdn.com/gl/dark-matter-nolabels-gl-style/style.json",OWt="https://basemaps.cartocdn.com/gl/voyager-nolabels-gl-style/style.json",AHe={basic:i7,streets:i7,outdoors:i7,light:bHe,dark:wHe,satellite:RWt,"satellite-streets":IWt,"open-street-map":{id:"osm",version:8,sources:{"plotly-osm-tiles":{type:"raster",attribution:DWt,tiles:["https://tile.openstreetmap.org/{z}/{x}/{y}.png"],tileSize:256}},layers:[{id:"plotly-osm-tiles",type:"raster",source:"plotly-osm-tiles",minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"white-bg":{id:"white-bg",version:8,sources:{},layers:[{id:"white-bg",type:"background",paint:{"background-color":"#FFFFFF"},minzoom:0,maxzoom:22}],glyphs:"https://fonts.openmaptiles.org/{fontstack}/{range}.pbf"},"carto-positron":bHe,"carto-darkmatter":wHe,"carto-voyager":i7,"carto-positron-nolabels":FWt,"carto-darkmatter-nolabels":zWt,"carto-voyager-nolabels":OWt},THe=PWt(AHe);SHe.exports={styleValueDflt:"basic",stylesMap:AHe,styleValuesMap:THe,traceLayerPrefix:"plotly-trace-layer-",layoutLayerPrefix:"plotly-layout-layer-",missingStyleErrorMsg:["No valid maplibre style found, please set `map.style` to one of:",THe.join(", "),"or use a tile service."].join(`
+`),mapOnErrorMsg:"Map error."}});var jk=ye((Wxr,LHe)=>{"use strict";var MHe=Dr(),EHe=ka().defaultLine,qWt=Cc().attributes,BWt=ec(),NWt=pf().textposition,UWt=mc().overrideAll,VWt=vl().templatedArray,kHe=wx(),CHe=BWt({noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0});CHe.family.dflt="Open Sans Regular, Arial Unicode MS Regular";var GWt=LHe.exports=UWt({_arrayAttrRegexps:[MHe.counterRegex("map",".layers",!0)],domain:qWt({name:"map"}),style:{valType:"any",values:kHe.styleValuesMap,dflt:kHe.styleValueDflt},center:{lon:{valType:"number",dflt:0},lat:{valType:"number",dflt:0}},zoom:{valType:"number",dflt:1},bearing:{valType:"number",dflt:0},pitch:{valType:"number",dflt:0},bounds:{west:{valType:"number"},east:{valType:"number"},south:{valType:"number"},north:{valType:"number"}},layers:VWt("layer",{visible:{valType:"boolean",dflt:!0},sourcetype:{valType:"enumerated",values:["geojson","vector","raster","image"],dflt:"geojson"},source:{valType:"any"},sourcelayer:{valType:"string",dflt:""},sourceattribution:{valType:"string"},type:{valType:"enumerated",values:["circle","line","fill","symbol","raster"],dflt:"circle"},coordinates:{valType:"any"},below:{valType:"string"},color:{valType:"color",dflt:EHe},opacity:{valType:"number",min:0,max:1,dflt:1},minzoom:{valType:"number",min:0,max:24,dflt:0},maxzoom:{valType:"number",min:0,max:24,dflt:24},circle:{radius:{valType:"number",dflt:15}},line:{width:{valType:"number",dflt:2},dash:{valType:"data_array"}},fill:{outlinecolor:{valType:"color",dflt:EHe}},symbol:{icon:{valType:"string",dflt:"marker"},iconsize:{valType:"number",dflt:10},text:{valType:"string",dflt:""},placement:{valType:"enumerated",values:["point","line","line-center"],dflt:"point"},textfont:CHe,textposition:MHe.extendFlat({},NWt,{arrayOk:!1})}})},"plot","from-root");GWt.uirevision={valType:"any",editType:"none"}});var n7=ye((Xxr,DHe)=>{"use strict";var{hovertemplateAttrs:HWt,texttemplateAttrs:jWt,templatefallbackAttrs:PHe}=Ll(),WWt=Cg(),Wk=j2(),E5=pf(),IHe=jk(),XWt=Gl(),ZWt=Tu(),nw=Ao().extendFlat,YWt=mc().overrideAll,KWt=jk(),RHe=Wk.line,k5=Wk.marker;DHe.exports=YWt({lon:Wk.lon,lat:Wk.lat,cluster:{enabled:{valType:"boolean"},maxzoom:nw({},KWt.layers.maxzoom,{}),step:{valType:"number",arrayOk:!0,dflt:-1,min:-1},size:{valType:"number",arrayOk:!0,dflt:20,min:0},color:{valType:"color",arrayOk:!0},opacity:nw({},k5.opacity,{dflt:1})},mode:nw({},E5.mode,{dflt:"markers"}),text:nw({},E5.text,{}),texttemplate:jWt({editType:"plot"},{keys:["lat","lon","text"]}),texttemplatefallback:PHe({editType:"plot"}),hovertext:nw({},E5.hovertext,{}),line:{color:RHe.color,width:RHe.width},connectgaps:E5.connectgaps,marker:nw({symbol:{valType:"string",dflt:"circle",arrayOk:!0},angle:{valType:"number",dflt:"auto",arrayOk:!0},allowoverlap:{valType:"boolean",dflt:!1},opacity:k5.opacity,size:k5.size,sizeref:k5.sizeref,sizemin:k5.sizemin,sizemode:k5.sizemode},ZWt("marker")),fill:Wk.fill,fillcolor:WWt(),textfont:IHe.layers.symbol.textfont,textposition:IHe.layers.symbol.textposition,below:{valType:"string"},selected:{marker:E5.selected.marker},unselected:{marker:E5.unselected.marker},hoverinfo:nw({},XWt.hoverinfo,{flags:["lon","lat","text","name"]}),hovertemplate:HWt(),hovertemplatefallback:PHe()},"calc","nested")});var wJ=ye((Zxr,FHe)=>{"use strict";var JWt=["Metropolis Black Italic","Metropolis Black","Metropolis Bold Italic","Metropolis Bold","Metropolis Extra Bold Italic","Metropolis Extra Bold","Metropolis Extra Light Italic","Metropolis Extra Light","Metropolis Light Italic","Metropolis Light","Metropolis Medium Italic","Metropolis Medium","Metropolis Regular Italic","Metropolis Regular","Metropolis Semi Bold Italic","Metropolis Semi Bold","Metropolis Thin Italic","Metropolis Thin","Open Sans Bold Italic","Open Sans Bold","Open Sans Extrabold Italic","Open Sans Extrabold","Open Sans Italic","Open Sans Light Italic","Open Sans Light","Open Sans Regular","Open Sans Semibold Italic","Open Sans Semibold","Klokantech Noto Sans Bold","Klokantech Noto Sans CJK Bold","Klokantech Noto Sans CJK Regular","Klokantech Noto Sans Italic","Klokantech Noto Sans Regular"];FHe.exports={isSupportedFont:function(e){return JWt.indexOf(e)!==-1}}});var qHe=ye((Yxr,OHe)=>{"use strict";var Xk=Dr(),TJ=Ru(),$Wt=$p(),QWt=R0(),eXt=D0(),tXt=Rg(),zHe=n7(),rXt=wJ().isSupportedFont;OHe.exports=function(t,r,n,i){function a(p,k){return Xk.coerce(t,r,zHe,p,k)}function o(p,k){return Xk.coerce2(t,r,zHe,p,k)}var s=iXt(t,r,a);if(!s){r.visible=!1;return}if(a("text"),a("texttemplate"),a("texttemplatefallback"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("mode"),a("below"),TJ.hasMarkers(r)){$Wt(t,r,n,i,a,{noLine:!0,noAngle:!0}),a("marker.allowoverlap"),a("marker.angle");var l=r.marker;l.symbol!=="circle"&&(Xk.isArrayOrTypedArray(l.size)&&(l.size=l.size[0]),Xk.isArrayOrTypedArray(l.color)&&(l.color=l.color[0]))}TJ.hasLines(r)&&(QWt(t,r,n,i,a,{noDash:!0}),a("connectgaps"));var u=o("cluster.maxzoom"),c=o("cluster.step"),f=o("cluster.color",r.marker&&r.marker.color||n),h=o("cluster.size"),d=o("cluster.opacity"),v=u!==!1||c!==!1||f!==!1||h!==!1||d!==!1,_=a("cluster.enabled",v);if(_||TJ.hasText(r)){var b=i.font.family;eXt(t,r,i,a,{noSelect:!0,noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0,font:{family:rXt(b)?b:"Open Sans Regular",weight:i.font.weight,style:i.font.style,size:i.font.size,color:i.font.color}})}a("fill"),r.fill!=="none"&&tXt(t,r,n,a),Xk.coerceSelectionMarkerOpacity(r,a)};function iXt(e,t,r){var n=r("lon")||[],i=r("lat")||[],a=Math.min(n.length,i.length);return t._length=a,a}});var AJ=ye((Kxr,NHe)=>{"use strict";var BHe=ho();NHe.exports=function(t,r,n){var i={},a=n[r.subplot]._subplot,o=a.mockAxis,s=t.lonlat;return i.lonLabel=BHe.tickText(o,o.c2l(s[0]),!0).text,i.latLabel=BHe.tickText(o,o.c2l(s[1]),!0).text,i}});var SJ=ye((Jxr,VHe)=>{"use strict";var UHe=Dr();VHe.exports=function(t,r){var n=t.split(" "),i=n[0],a=n[1],o=UHe.isArrayOrTypedArray(r)?UHe.mean(r):r,s=.5+o/100,l=1.5+o/100,u=["",""],c=[0,0];switch(i){case"top":u[0]="top",c[1]=-l;break;case"bottom":u[0]="bottom",c[1]=l;break}switch(a){case"left":u[1]="right",c[0]=-s;break;case"right":u[1]="left",c[0]=s;break}var f;return u[0]&&u[1]?f=u.join("-"):u[0]?f=u[0]:u[1]?f=u[1]:f="center",{anchor:f,offset:c}}});var ZHe=ye(($xr,XHe)=>{"use strict";var jHe=Eo(),ov=Dr(),nXt=fs().BADNUM,o7=tx(),GHe=tc(),aXt=So(),oXt=k3(),s7=Ru(),sXt=wJ().isSupportedFont,lXt=SJ(),uXt=ip().appendArrayPointValue,cXt=ru().NEWLINES,fXt=ru().BR_TAG_ALL;XHe.exports=function(t,r){var n=r[0].trace,i=n.visible===!0&&n._length!==0,a=n.fill!=="none",o=s7.hasLines(n),s=s7.hasMarkers(n),l=s7.hasText(n),u=s&&n.marker.symbol==="circle",c=s&&n.marker.symbol!=="circle",f=n.cluster&&n.cluster.enabled,h=a7("fill"),d=a7("line"),v=a7("circle"),_=a7("symbol"),b={fill:h,line:d,circle:v,symbol:_};if(!i)return b;var p;if((a||o)&&(p=o7.calcTraceToLineCoords(r)),a&&(h.geojson=o7.makePolygon(p),h.layout.visibility="visible",ov.extendFlat(h.paint,{"fill-color":n.fillcolor})),o&&(d.geojson=o7.makeLine(p),d.layout.visibility="visible",ov.extendFlat(d.paint,{"line-width":n.line.width,"line-color":n.line.color,"line-opacity":n.opacity})),u){var k=hXt(r);v.geojson=k.geojson,v.layout.visibility="visible",f&&(v.filter=["!",["has","point_count"]],b.cluster={type:"circle",filter:["has","point_count"],layout:{visibility:"visible"},paint:{"circle-color":EJ(n.cluster.color,n.cluster.step),"circle-radius":EJ(n.cluster.size,n.cluster.step),"circle-opacity":EJ(n.cluster.opacity,n.cluster.step)}},b.clusterCount={type:"symbol",filter:["has","point_count"],paint:{},layout:{"text-field":"{point_count_abbreviated}","text-font":HHe(n),"text-size":12}}),ov.extendFlat(v.paint,{"circle-color":k.mcc,"circle-radius":k.mrc,"circle-opacity":k.mo})}if(u&&f&&(v.filter=["!",["has","point_count"]]),(c||l)&&(_.geojson=dXt(r,t),ov.extendFlat(_.layout,{visibility:"visible","icon-image":"{symbol}-15","text-field":"{text}"}),c&&(ov.extendFlat(_.layout,{"icon-size":n.marker.size/10}),"angle"in n.marker&&n.marker.angle!=="auto"&&ov.extendFlat(_.layout,{"icon-rotate":{type:"identity",property:"angle"},"icon-rotation-alignment":"map"}),_.layout["icon-allow-overlap"]=n.marker.allowoverlap,ov.extendFlat(_.paint,{"icon-opacity":n.opacity*n.marker.opacity,"icon-color":n.marker.color})),l)){var E=(n.marker||{}).size,S=lXt(n.textposition,E);ov.extendFlat(_.layout,{"text-size":n.textfont.size,"text-anchor":S.anchor,"text-offset":S.offset,"text-font":HHe(n)}),ov.extendFlat(_.paint,{"text-color":n.textfont.color,"text-opacity":n.opacity})}return b};function a7(e){return{type:e,geojson:o7.makeBlank(),layout:{visibility:"none"},filter:null,paint:{}}}function hXt(e){var t=e[0].trace,r=t.marker,n=t.selectedpoints,i=ov.isArrayOrTypedArray(r.color),a=ov.isArrayOrTypedArray(r.size),o=ov.isArrayOrTypedArray(r.opacity),s;function l(E){return t.opacity*E}function u(E){return E/2}var c;i&&(GHe.hasColorscale(t,"marker")?c=GHe.makeColorScaleFuncFromTrace(r):c=ov.identity);var f;a&&(f=oXt(t));var h;o&&(h=function(E){var S=jHe(E)?+ov.constrain(E,0,1):0;return l(S)});var d=[];for(s=0;s<e.length;s++){var v=e[s],_=v.lonlat;if(!WHe(_)){var b={};c&&(b.mcc=v.mcc=c(v.mc)),f&&(b.mrc=v.mrc=f(v.ms)),h&&(b.mo=h(v.mo)),n&&(b.selected=v.selected||0),d.push({type:"Feature",id:s+1,geometry:{type:"Point",coordinates:_},properties:b})}}var p;if(n)for(p=aXt.makeSelectedPointStyleFns(t),s=0;s<d.length;s++){var k=d[s].properties;p.selectedOpacityFn&&(k.mo=l(p.selectedOpacityFn(k))),p.selectedColorFn&&(k.mcc=p.selectedColorFn(k)),p.selectedSizeFn&&(k.mrc=p.selectedSizeFn(k))}return{geojson:{type:"FeatureCollection",features:d},mcc:i||p&&p.selectedColorFn?{type:"identity",property:"mcc"}:r.color,mrc:a||p&&p.selectedSizeFn?{type:"identity",property:"mrc"}:u(r.size),mo:o||p&&p.selectedOpacityFn?{type:"identity",property:"mo"}:l(r.opacity)}}function dXt(e,t){for(var r=t._fullLayout,n=e[0].trace,i=n.marker||{},a=i.symbol,o=i.angle,s=a!=="circle"?MJ(a):l7,l=o!=="auto"?MJ(o,!0):l7,u=s7.hasText(n)?MJ(n.text):l7,c=[],f=0;f<e.length;f++){var h=e[f];if(!WHe(h.lonlat)){var d=n.texttemplate,v;if(d){var _=Array.isArray(d)?d[f]||"":d,b=n._module.formatLabels(h,n,r),p={};uXt(p,n,h.i),v=ov.texttemplateString({data:[p,h,n._meta],fallback:n.texttemplatefallback,labels:b,locale:r._d3locale,template:_})}else v=u(f);v&&(v=v.replace(cXt,"").replace(fXt,`
+`)),c.push({type:"Feature",geometry:{type:"Point",coordinates:h.lonlat},properties:{symbol:s(f),angle:l(f),text:v}})}}return{type:"FeatureCollection",features:c}}function MJ(e,t){return ov.isArrayOrTypedArray(e)?t?function(r){return jHe(e[r])?+e[r]:0}:function(r){return e[r]}:e?function(){return e}:l7}function l7(){return""}function WHe(e){return e[0]===nXt}function EJ(e,t){var r;if(ov.isArrayOrTypedArray(e)&&ov.isArrayOrTypedArray(t)){r=["step",["get","point_count"],e[0]];for(var n=1;n<e.length;n++)r.push(t[n-1],e[n])}else r=e;return r}function HHe(e){var t=e.textfont,r=t.family,n=t.style,i=t.weight,a=r.split(" "),o=a[a.length-1]==="Italic";o&&a.pop(),o=o||n==="italic";var s=a.join(" ");i==="bold"&&a.indexOf("Bold")===-1?s+=" Bold":i<=1e3&&(a[0]==="Metropolis"?(s="Metropolis",i>850?s+=" Black":i>750?s+=" Extra Bold":i>650?s+=" Bold":i>550?s+=" Semi Bold":i>450?s+=" Medium":i>350?s+=" Regular":i>250?s+=" Light":i>150?s+=" Extra Light":s+=" Thin"):a.slice(0,2).join(" ")==="Open Sans"?(s="Open Sans",i>750?s+=" Extrabold":i>650?s+=" Bold":i>550?s+=" Semibold":i>350?s+=" Regular":s+=" Light"):a.slice(0,3).join(" ")==="Klokantech Noto Sans"&&(s="Klokantech Noto Sans",a[3]==="CJK"&&(s+=" CJK"),s+=i>500?" Bold":" Regular")),o&&(s+=" Italic"),s==="Open Sans Regular Italic"?s="Open Sans Italic":s==="Open Sans Regular Bold"?s="Open Sans Bold":s==="Open Sans Regular Bold Italic"?s="Open Sans Bold Italic":s==="Klokantech Noto Sans Regular Italic"&&(s="Klokantech Noto Sans Italic"),sXt(s)||(s=r);var l=s.split(", ");return l}});var $He=ye((Qxr,JHe)=>{"use strict";var vXt=Dr(),YHe=ZHe(),C5=wx().traceLayerPrefix,ng={cluster:["cluster","clusterCount","circle"],nonCluster:["fill","line","circle","symbol"]};function KHe(e,t,r,n){this.type="scattermap",this.subplot=e,this.uid=t,this.clusterEnabled=r,this.isHidden=n,this.sourceIds={fill:"source-"+t+"-fill",line:"source-"+t+"-line",circle:"source-"+t+"-circle",symbol:"source-"+t+"-symbol",cluster:"source-"+t+"-circle",clusterCount:"source-"+t+"-circle"},this.layerIds={fill:C5+t+"-fill",line:C5+t+"-line",circle:C5+t+"-circle",symbol:C5+t+"-symbol",cluster:C5+t+"-cluster",clusterCount:C5+t+"-cluster-count"},this.below=null}var Zk=KHe.prototype;Zk.addSource=function(e,t,r){var n={type:"geojson",data:t.geojson};r&&r.enabled&&vXt.extendFlat(n,{cluster:!0,clusterMaxZoom:r.maxzoom});var i=this.subplot.map.getSource(this.sourceIds[e]);i?i.setData(t.geojson):this.subplot.map.addSource(this.sourceIds[e],n)};Zk.setSourceData=function(e,t){this.subplot.map.getSource(this.sourceIds[e]).setData(t.geojson)};Zk.addLayer=function(e,t,r){var n={type:t.type,id:this.layerIds[e],source:this.sourceIds[e],layout:t.layout,paint:t.paint};t.filter&&(n.filter=t.filter);for(var i=this.layerIds[e],a,o=this.subplot.getMapLayers(),s=0;s<o.length;s++)if(o[s].id===i){a=!0;break}a?(this.subplot.setOptions(i,"setLayoutProperty",n.layout),n.layout.visibility==="visible"&&this.subplot.setOptions(i,"setPaintProperty",n.paint)):this.subplot.addLayer(n,r)};Zk.update=function(t){var r=t[0].trace,n=this.subplot,i=n.map,a=YHe(n.gd,t),o=n.belowLookup["trace-"+this.uid],s=!!(r.cluster&&r.cluster.enabled),l=!!this.clusterEnabled,u=this;function c(E){E||u.addSource("circle",a.circle,r.cluster);for(var S=ng.cluster,L=0;L<S.length;L++){var x=S[L],C=a[x];u.addLayer(x,C,o)}}function f(E){for(var S=ng.cluster,L=S.length-1;L>=0;L--){var x=S[L];i.removeLayer(u.layerIds[x])}E||i.removeSource(u.sourceIds.circle)}function h(E){for(var S=ng.nonCluster,L=0;L<S.length;L++){var x=S[L],C=a[x];E||u.addSource(x,C),u.addLayer(x,C,o)}}function d(E){for(var S=ng.nonCluster,L=S.length-1;L>=0;L--){var x=S[L];i.removeLayer(u.layerIds[x]),E||i.removeSource(u.sourceIds[x])}}function v(E){l?f(E):d(E)}function _(E){s?c(E):h(E)}function b(){for(var E=s?ng.cluster:ng.nonCluster,S=0;S<E.length;S++){var L=E[S],x=a[L];x&&(n.setOptions(u.layerIds[L],"setLayoutProperty",x.layout),x.layout.visibility==="visible"&&(L!=="cluster"&&u.setSourceData(L,x),n.setOptions(u.layerIds[L],"setPaintProperty",x.paint)))}}var p=this.isHidden,k=r.visible!==!0;k?p||v():p?k||_():l!==s?(v(),_()):(this.below!==o&&(v(!0),_(!0)),b()),this.clusterEnabled=s,this.isHidden=k,this.below=o,t[0].trace._glTrace=this};Zk.dispose=function(){for(var t=this.subplot.map,r=this.clusterEnabled?ng.cluster:ng.nonCluster,n=r.length-1;n>=0;n--){var i=r[n];t.removeLayer(this.layerIds[i]),t.removeSource(this.sourceIds[i])}};JHe.exports=function(t,r){var n=r[0].trace,i=n.cluster&&n.cluster.enabled,a=n.visible!==!0,o=new KHe(t,n.uid,i,a),s=YHe(t.gd,r),l=o.below=t.belowLookup["trace-"+n.uid],u,c,f;if(i)for(o.addSource("circle",s.circle,n.cluster),u=0;u<ng.cluster.length;u++)c=ng.cluster[u],f=s[c],o.addLayer(c,f,l);else for(u=0;u<ng.nonCluster.length;u++)c=ng.nonCluster[u],f=s[c],o.addSource(c,f,n.cluster),o.addLayer(c,f,l);return r[0].trace._glTrace=o,o}});var u7=ye((ebr,eje)=>{"use strict";var pXt=vf(),kJ=Dr(),gXt=cT(),mXt=kJ.fillText,yXt=fs().BADNUM,_Xt=wx().traceLayerPrefix;function xXt(e,t,r){var n=e.cd,i=n[0].trace,a=e.xa,o=e.ya,s=e.subplot,l=[],u=_Xt+i.uid+"-circle",c=i.cluster&&i.cluster.enabled;if(c){var f=s.map.queryRenderedFeatures(null,{layers:[u]});l=f.map(function(M){return M.id})}var h=t>=0?Math.floor((t+180)/360):Math.ceil((t-180)/360),d=h*360,v=t-d;function _(M){var g=M.lonlat;if(g[0]===yXt||c&&l.indexOf(M.i+1)===-1)return 1/0;var P=kJ.modHalf(g[0],360),T=g[1],z=s.project([P,T]),O=z.x-a.c2p([v,T]),V=z.y-o.c2p([P,r]),G=Math.max(3,M.mrc||0);return Math.max(Math.sqrt(O*O+V*V)-G,1-3/G)}if(pXt.getClosest(n,_,e),e.index!==!1){var b=n[e.index],p=b.lonlat,k=[kJ.modHalf(p[0],360)+d,p[1]],E=a.c2p(k),S=o.c2p(k),L=b.mrc||1;e.x0=E-L,e.x1=E+L,e.y0=S-L,e.y1=S+L;var x={};x[i.subplot]={_subplot:s};var C=i._module.formatLabels(b,i,x);return e.lonLabel=C.lonLabel,e.latLabel=C.latLabel,e.color=gXt(i,b),e.extraText=QHe(i,b,n[0].t.labels),e.hovertemplate=i.hovertemplate,[e]}}function QHe(e,t,r){if(e.hovertemplate)return;var n=t.hi||e.hoverinfo,i=n.split("+"),a=i.indexOf("all")!==-1,o=i.indexOf("lon")!==-1,s=i.indexOf("lat")!==-1,l=t.lonlat,u=[];function c(f){return f+"\xB0"}return a||o&&s?u.push("("+c(l[1])+", "+c(l[0])+")"):o?u.push(r.lon+c(l[0])):s&&u.push(r.lat+c(l[1])),(a||i.indexOf("text")!==-1)&&mXt(t,e,u),u.join("<br>")}eje.exports={hoverPoints:xXt,getExtraText:QHe}});var rje=ye((tbr,tje)=>{"use strict";tje.exports=function(t,r){return t.lon=r.lon,t.lat=r.lat,t}});var nje=ye((rbr,ije)=>{"use strict";var bXt=Dr(),wXt=Ru(),TXt=fs().BADNUM;ije.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s=n[0].trace,l;if(!wXt.hasMarkers(s))return[];if(r===!1)for(l=0;l<n.length;l++)n[l].selected=0;else for(l=0;l<n.length;l++){var u=n[l],c=u.lonlat;if(c[0]!==TXt){var f=[bXt.modHalf(c[0],360),c[1]],h=[i.c2p(f),a.c2p(f)];r.contains(h,null,l,t)?(o.push({pointNumber:l,lon:c[0],lat:c[1]}),u.selected=1):u.selected=0}}return o}});var oje=ye((CJ,LJ)=>{(function(e,t){typeof CJ=="object"&&typeof LJ!="undefined"?LJ.exports=t():(e=typeof globalThis!="undefined"?globalThis:e||self,e.maplibregl=t())})(CJ,function(){"use strict";var e={},t={};function r(i,a,o){if(t[i]=o,i==="index"){var s="var sharedModule = {}; ("+t.shared+")(sharedModule); ("+t.worker+")(sharedModule);",l={};return t.shared(l),t.index(e,l),typeof window!="undefined"&&e.setWorkerUrl(window.URL.createObjectURL(new Blob([s],{type:"text/javascript"}))),e}}r("shared",["exports"],function(i){"use strict";function a(R,A,F,W){return new(F||(F=Promise))(function(re,fe){function pe(ct){try{Ke(W.next(ct))}catch(Lt){fe(Lt)}}function ze(ct){try{Ke(W.throw(ct))}catch(Lt){fe(Lt)}}function Ke(ct){var Lt;ct.done?re(ct.value):(Lt=ct.value,Lt instanceof F?Lt:new F(function($t){$t(Lt)})).then(pe,ze)}Ke((W=W.apply(R,A||[])).next())})}function o(R){return R&&R.__esModule&&Object.prototype.hasOwnProperty.call(R,"default")?R.default:R}typeof SuppressedError=="function"&&SuppressedError;var s=l;function l(R,A){this.x=R,this.y=A}l.prototype={clone:function(){return new l(this.x,this.y)},add:function(R){return this.clone()._add(R)},sub:function(R){return this.clone()._sub(R)},multByPoint:function(R){return this.clone()._multByPoint(R)},divByPoint:function(R){return this.clone()._divByPoint(R)},mult:function(R){return this.clone()._mult(R)},div:function(R){return this.clone()._div(R)},rotate:function(R){return this.clone()._rotate(R)},rotateAround:function(R,A){return this.clone()._rotateAround(R,A)},matMult:function(R){return this.clone()._matMult(R)},unit:function(){return this.clone()._unit()},perp:function(){return this.clone()._perp()},round:function(){return this.clone()._round()},mag:function(){return Math.sqrt(this.x*this.x+this.y*this.y)},equals:function(R){return this.x===R.x&&this.y===R.y},dist:function(R){return Math.sqrt(this.distSqr(R))},distSqr:function(R){var A=R.x-this.x,F=R.y-this.y;return A*A+F*F},angle:function(){return Math.atan2(this.y,this.x)},angleTo:function(R){return Math.atan2(this.y-R.y,this.x-R.x)},angleWith:function(R){return this.angleWithSep(R.x,R.y)},angleWithSep:function(R,A){return Math.atan2(this.x*A-this.y*R,this.x*R+this.y*A)},_matMult:function(R){var A=R[2]*this.x+R[3]*this.y;return this.x=R[0]*this.x+R[1]*this.y,this.y=A,this},_add:function(R){return this.x+=R.x,this.y+=R.y,this},_sub:function(R){return this.x-=R.x,this.y-=R.y,this},_mult:function(R){return this.x*=R,this.y*=R,this},_div:function(R){return this.x/=R,this.y/=R,this},_multByPoint:function(R){return this.x*=R.x,this.y*=R.y,this},_divByPoint:function(R){return this.x/=R.x,this.y/=R.y,this},_unit:function(){return this._div(this.mag()),this},_perp:function(){var R=this.y;return this.y=this.x,this.x=-R,this},_rotate:function(R){var A=Math.cos(R),F=Math.sin(R),W=F*this.x+A*this.y;return this.x=A*this.x-F*this.y,this.y=W,this},_rotateAround:function(R,A){var F=Math.cos(R),W=Math.sin(R),re=A.y+W*(this.x-A.x)+F*(this.y-A.y);return this.x=A.x+F*(this.x-A.x)-W*(this.y-A.y),this.y=re,this},_round:function(){return this.x=Math.round(this.x),this.y=Math.round(this.y),this}},l.convert=function(R){return R instanceof l?R:Array.isArray(R)?new l(R[0],R[1]):R};var u=o(s),c=f;function f(R,A,F,W){this.cx=3*R,this.bx=3*(F-R)-this.cx,this.ax=1-this.cx-this.bx,this.cy=3*A,this.by=3*(W-A)-this.cy,this.ay=1-this.cy-this.by,this.p1x=R,this.p1y=A,this.p2x=F,this.p2y=W}f.prototype={sampleCurveX:function(R){return((this.ax*R+this.bx)*R+this.cx)*R},sampleCurveY:function(R){return((this.ay*R+this.by)*R+this.cy)*R},sampleCurveDerivativeX:function(R){return(3*this.ax*R+2*this.bx)*R+this.cx},solveCurveX:function(R,A){if(A===void 0&&(A=1e-6),R<0)return 0;if(R>1)return 1;for(var F=R,W=0;W<8;W++){var re=this.sampleCurveX(F)-R;if(Math.abs(re)<A)return F;var fe=this.sampleCurveDerivativeX(F);if(Math.abs(fe)<1e-6)break;F-=re/fe}var pe=0,ze=1;for(F=R,W=0;W<20&&(re=this.sampleCurveX(F),!(Math.abs(re-R)<A));W++)R>re?pe=F:ze=F,F=.5*(ze-pe)+pe;return F},solve:function(R,A){return this.sampleCurveY(this.solveCurveX(R,A))}};var h=o(c);let d,v;function _(){return d==null&&(d=typeof OffscreenCanvas!="undefined"&&new OffscreenCanvas(1,1).getContext("2d")&&typeof createImageBitmap=="function"),d}function b(){if(v==null&&(v=!1,_())){let A=new OffscreenCanvas(5,5).getContext("2d",{willReadFrequently:!0});if(A){for(let W=0;W<5*5;W++){let re=4*W;A.fillStyle=`rgb(${re},${re+1},${re+2})`,A.fillRect(W%5,Math.floor(W/5),1,1)}let F=A.getImageData(0,0,5,5).data;for(let W=0;W<5*5*4;W++)if(W%4!=3&&F[W]!==W){v=!0;break}}}return v||!1}function p(R,A,F,W){let re=new h(R,A,F,W);return fe=>re.solve(fe)}let k=p(.25,.1,.25,1);function E(R,A,F){return Math.min(F,Math.max(A,R))}function S(R,A,F){let W=F-A,re=((R-A)%W+W)%W+A;return re===A?F:re}function L(R,...A){for(let F of A)for(let W in F)R[W]=F[W];return R}let x=1;function C(R,A,F){let W={};for(let re in R)W[re]=A.call(this,R[re],re,R);return W}function M(R,A,F){let W={};for(let re in R)A.call(this,R[re],re,R)&&(W[re]=R[re]);return W}function g(R){return Array.isArray(R)?R.map(g):typeof R=="object"&&R?C(R,g):R}let P={};function T(R){P[R]||(typeof console!="undefined"&&console.warn(R),P[R]=!0)}function z(R,A,F){return(F.y-R.y)*(A.x-R.x)>(A.y-R.y)*(F.x-R.x)}function O(R){return typeof WorkerGlobalScope!="undefined"&&R!==void 0&&R instanceof WorkerGlobalScope}let V=null;function G(R){return typeof ImageBitmap!="undefined"&&R instanceof ImageBitmap}let Z="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAC0lEQVQYV2NgAAIAAAUAAarVyFEAAAAASUVORK5CYII=";function j(R,A,F,W,re){return a(this,void 0,void 0,function*(){if(typeof VideoFrame=="undefined")throw new Error("VideoFrame not supported");let fe=new VideoFrame(R,{timestamp:0});try{let pe=fe==null?void 0:fe.format;if(!pe||!pe.startsWith("BGR")&&!pe.startsWith("RGB"))throw new Error(`Unrecognized format ${pe}`);let ze=pe.startsWith("BGR"),Ke=new Uint8ClampedArray(W*re*4);if(yield fe.copyTo(Ke,function(ct,Lt,$t,fr,mr){let Pr=4*Math.max(-Lt,0),zr=(Math.max(0,$t)-$t)*fr*4+Pr,ui=4*fr,yi=Math.max(0,Lt),vn=Math.max(0,$t);return{rect:{x:yi,y:vn,width:Math.min(ct.width,Lt+fr)-yi,height:Math.min(ct.height,$t+mr)-vn},layout:[{offset:zr,stride:ui}]}}(R,A,F,W,re)),ze)for(let ct=0;ct<Ke.length;ct+=4){let Lt=Ke[ct];Ke[ct]=Ke[ct+2],Ke[ct+2]=Lt}return Ke}finally{fe.close()}})}let N,H,te="AbortError";function oe(){return new Error(te)}let _e={MAX_PARALLEL_IMAGE_REQUESTS:16,MAX_PARALLEL_IMAGE_REQUESTS_PER_FRAME:8,MAX_TILE_CACHE_ZOOM_LEVELS:5,REGISTERED_PROTOCOLS:{},WORKER_URL:""};function Ee(R){return _e.REGISTERED_PROTOCOLS[R.substring(0,R.indexOf("://"))]}let Ce="global-dispatcher";class me extends Error{constructor(A,F,W,re){super(`AJAXError: ${F} (${A}): ${W}`),this.status=A,this.statusText=F,this.url=W,this.body=re}}let ie=()=>O(self)?self.worker&&self.worker.referrer:(window.location.protocol==="blob:"?window.parent:window).location.href,Se=function(R,A){if(/:\/\//.test(R.url)&&!/^https?:|^file:/.test(R.url)){let W=Ee(R.url);if(W)return W(R,A);if(O(self)&&self.worker&&self.worker.actor)return self.worker.actor.sendAsync({type:"GR",data:R,targetMapId:Ce},A)}if(!(/^file:/.test(F=R.url)||/^file:/.test(ie())&&!/^\w+:/.test(F))){if(fetch&&Request&&AbortController&&Object.prototype.hasOwnProperty.call(Request.prototype,"signal"))return function(W,re){return a(this,void 0,void 0,function*(){let fe=new Request(W.url,{method:W.method||"GET",body:W.body,credentials:W.credentials,headers:W.headers,cache:W.cache,referrer:ie(),signal:re.signal});W.type!=="json"||fe.headers.has("Accept")||fe.headers.set("Accept","application/json");let pe=yield fetch(fe);if(!pe.ok){let ct=yield pe.blob();throw new me(pe.status,pe.statusText,W.url,ct)}let ze;ze=W.type==="arrayBuffer"||W.type==="image"?pe.arrayBuffer():W.type==="json"?pe.json():pe.text();let Ke=yield ze;if(re.signal.aborted)throw oe();return{data:Ke,cacheControl:pe.headers.get("Cache-Control"),expires:pe.headers.get("Expires")}})}(R,A);if(O(self)&&self.worker&&self.worker.actor)return self.worker.actor.sendAsync({type:"GR",data:R,mustQueue:!0,targetMapId:Ce},A)}var F;return function(W,re){return new Promise((fe,pe)=>{var ze;let Ke=new XMLHttpRequest;Ke.open(W.method||"GET",W.url,!0),W.type!=="arrayBuffer"&&W.type!=="image"||(Ke.responseType="arraybuffer");for(let ct in W.headers)Ke.setRequestHeader(ct,W.headers[ct]);W.type==="json"&&(Ke.responseType="text",!((ze=W.headers)===null||ze===void 0)&&ze.Accept||Ke.setRequestHeader("Accept","application/json")),Ke.withCredentials=W.credentials==="include",Ke.onerror=()=>{pe(new Error(Ke.statusText))},Ke.onload=()=>{if(!re.signal.aborted)if((Ke.status>=200&&Ke.status<300||Ke.status===0)&&Ke.response!==null){let ct=Ke.response;if(W.type==="json")try{ct=JSON.parse(Ke.response)}catch(Lt){return void pe(Lt)}fe({data:ct,cacheControl:Ke.getResponseHeader("Cache-Control"),expires:Ke.getResponseHeader("Expires")})}else{let ct=new Blob([Ke.response],{type:Ke.getResponseHeader("Content-Type")});pe(new me(Ke.status,Ke.statusText,W.url,ct))}},re.signal.addEventListener("abort",()=>{Ke.abort(),pe(oe())}),Ke.send(W.body)})}(R,A)};function Le(R){if(!R||R.indexOf("://")<=0||R.indexOf("data:image/")===0||R.indexOf("blob:")===0)return!0;let A=new URL(R),F=window.location;return A.protocol===F.protocol&&A.host===F.host}function Ae(R,A,F){F[R]&&F[R].indexOf(A)!==-1||(F[R]=F[R]||[],F[R].push(A))}function Fe(R,A,F){if(F&&F[R]){let W=F[R].indexOf(A);W!==-1&&F[R].splice(W,1)}}class Pe{constructor(A,F={}){L(this,F),this.type=A}}class ge extends Pe{constructor(A,F={}){super("error",L({error:A},F))}}class Re{on(A,F){return this._listeners=this._listeners||{},Ae(A,F,this._listeners),this}off(A,F){return Fe(A,F,this._listeners),Fe(A,F,this._oneTimeListeners),this}once(A,F){return F?(this._oneTimeListeners=this._oneTimeListeners||{},Ae(A,F,this._oneTimeListeners),this):new Promise(W=>this.once(A,W))}fire(A,F){typeof A=="string"&&(A=new Pe(A,F||{}));let W=A.type;if(this.listens(W)){A.target=this;let re=this._listeners&&this._listeners[W]?this._listeners[W].slice():[];for(let ze of re)ze.call(this,A);let fe=this._oneTimeListeners&&this._oneTimeListeners[W]?this._oneTimeListeners[W].slice():[];for(let ze of fe)Fe(W,ze,this._oneTimeListeners),ze.call(this,A);let pe=this._eventedParent;pe&&(L(A,typeof this._eventedParentData=="function"?this._eventedParentData():this._eventedParentData),pe.fire(A))}else A instanceof ge&&console.error(A.error);return this}listens(A){return this._listeners&&this._listeners[A]&&this._listeners[A].length>0||this._oneTimeListeners&&this._oneTimeListeners[A]&&this._oneTimeListeners[A].length>0||this._eventedParent&&this._eventedParent.listens(A)}setEventedParent(A,F){return this._eventedParent=A,this._eventedParentData=F,this}}var ce={$version:8,$root:{version:{required:!0,type:"enum",values:[8]},name:{type:"string"},metadata:{type:"*"},center:{type:"array",value:"number"},zoom:{type:"number"},bearing:{type:"number",default:0,period:360,units:"degrees"},pitch:{type:"number",default:0,units:"degrees"},light:{type:"light"},sky:{type:"sky"},projection:{type:"projection"},terrain:{type:"terrain"},sources:{required:!0,type:"sources"},sprite:{type:"sprite"},glyphs:{type:"string"},transition:{type:"transition"},layers:{required:!0,type:"array",value:"layer"}},sources:{"*":{type:"source"}},source:["source_vector","source_raster","source_raster_dem","source_geojson","source_video","source_image"],source_vector:{type:{required:!0,type:"enum",values:{vector:{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},scheme:{type:"enum",values:{xyz:{},tms:{}},default:"xyz"},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},attribution:{type:"string"},promoteId:{type:"promoteId"},volatile:{type:"boolean",default:!1},"*":{type:"*"}},source_raster:{type:{required:!0,type:"enum",values:{raster:{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},tileSize:{type:"number",default:512,units:"pixels"},scheme:{type:"enum",values:{xyz:{},tms:{}},default:"xyz"},attribution:{type:"string"},volatile:{type:"boolean",default:!1},"*":{type:"*"}},source_raster_dem:{type:{required:!0,type:"enum",values:{"raster-dem":{}}},url:{type:"string"},tiles:{type:"array",value:"string"},bounds:{type:"array",value:"number",length:4,default:[-180,-85.051129,180,85.051129]},minzoom:{type:"number",default:0},maxzoom:{type:"number",default:22},tileSize:{type:"number",default:512,units:"pixels"},attribution:{type:"string"},encoding:{type:"enum",values:{terrarium:{},mapbox:{},custom:{}},default:"mapbox"},redFactor:{type:"number",default:1},blueFactor:{type:"number",default:1},greenFactor:{type:"number",default:1},baseShift:{type:"number",default:0},volatile:{type:"boolean",default:!1},"*":{type:"*"}},source_geojson:{type:{required:!0,type:"enum",values:{geojson:{}}},data:{required:!0,type:"*"},maxzoom:{type:"number",default:18},attribution:{type:"string"},buffer:{type:"number",default:128,maximum:512,minimum:0},filter:{type:"*"},tolerance:{type:"number",default:.375},cluster:{type:"boolean",default:!1},clusterRadius:{type:"number",default:50,minimum:0},clusterMaxZoom:{type:"number"},clusterMinPoints:{type:"number"},clusterProperties:{type:"*"},lineMetrics:{type:"boolean",default:!1},generateId:{type:"boolean",default:!1},promoteId:{type:"promoteId"}},source_video:{type:{required:!0,type:"enum",values:{video:{}}},urls:{required:!0,type:"array",value:"string"},coordinates:{required:!0,type:"array",length:4,value:{type:"array",length:2,value:"number"}}},source_image:{type:{required:!0,type:"enum",values:{image:{}}},url:{required:!0,type:"string"},coordinates:{required:!0,type:"array",length:4,value:{type:"array",length:2,value:"number"}}},layer:{id:{type:"string",required:!0},type:{type:"enum",values:{fill:{},line:{},symbol:{},circle:{},heatmap:{},"fill-extrusion":{},raster:{},hillshade:{},background:{}},required:!0},metadata:{type:"*"},source:{type:"string"},"source-layer":{type:"string"},minzoom:{type:"number",minimum:0,maximum:24},maxzoom:{type:"number",minimum:0,maximum:24},filter:{type:"filter"},layout:{type:"layout"},paint:{type:"paint"}},layout:["layout_fill","layout_line","layout_circle","layout_heatmap","layout_fill-extrusion","layout_symbol","layout_raster","layout_hillshade","layout_background"],layout_background:{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_fill:{"fill-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_circle:{"circle-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_heatmap:{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},"layout_fill-extrusion":{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_line:{"line-cap":{type:"enum",values:{butt:{},round:{},square:{}},default:"butt",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"line-join":{type:"enum",values:{bevel:{},round:{},miter:{}},default:"miter",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"line-miter-limit":{type:"number",default:2,requires:[{"line-join":"miter"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-round-limit":{type:"number",default:1.05,requires:[{"line-join":"round"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_symbol:{"symbol-placement":{type:"enum",values:{point:{},line:{},"line-center":{}},default:"point",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"symbol-spacing":{type:"number",default:250,minimum:1,units:"pixels",requires:[{"symbol-placement":"line"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"symbol-avoid-edges":{type:"boolean",default:!1,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"symbol-sort-key":{type:"number",expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"symbol-z-order":{type:"enum",values:{auto:{},"viewport-y":{},source:{}},default:"auto",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-allow-overlap":{type:"boolean",default:!1,requires:["icon-image",{"!":"icon-overlap"}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-overlap":{type:"enum",values:{never:{},always:{},cooperative:{}},requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-ignore-placement":{type:"boolean",default:!1,requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-optional":{type:"boolean",default:!1,requires:["icon-image","text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-rotation-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-size":{type:"number",default:1,minimum:0,units:"factor of the original icon size",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-text-fit":{type:"enum",values:{none:{},width:{},height:{},both:{}},default:"none",requires:["icon-image","text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-text-fit-padding":{type:"array",value:"number",length:4,default:[0,0,0,0],units:"pixels",requires:["icon-image","text-field",{"icon-text-fit":["both","width","height"]}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"icon-image":{type:"resolvedImage",tokens:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-rotate":{type:"number",default:0,period:360,units:"degrees",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-padding":{type:"padding",default:[2],units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-keep-upright":{type:"boolean",default:!1,requires:["icon-image",{"icon-rotation-alignment":"map"},{"symbol-placement":["line","line-center"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"icon-offset":{type:"array",value:"number",length:2,default:[0,0],requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-anchor":{type:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},default:"center",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"icon-pitch-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-pitch-alignment":{type:"enum",values:{map:{},viewport:{},auto:{}},default:"auto",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-rotation-alignment":{type:"enum",values:{map:{},viewport:{},"viewport-glyph":{},auto:{}},default:"auto",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-field":{type:"formatted",default:"",tokens:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-font":{type:"array",value:"string",default:["Open Sans Regular","Arial Unicode MS Regular"],requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-size":{type:"number",default:16,minimum:0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-max-width":{type:"number",default:10,minimum:0,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-line-height":{type:"number",default:1.2,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-letter-spacing":{type:"number",default:0,units:"ems",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-justify":{type:"enum",values:{auto:{},left:{},center:{},right:{}},default:"center",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-radial-offset":{type:"number",units:"ems",default:0,requires:["text-field"],"property-type":"data-driven",expression:{interpolated:!0,parameters:["zoom","feature"]}},"text-variable-anchor":{type:"array",value:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},requires:["text-field",{"symbol-placement":["point"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-variable-anchor-offset":{type:"variableAnchorOffsetCollection",requires:["text-field",{"symbol-placement":["point"]}],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-anchor":{type:"enum",values:{center:{},left:{},right:{},top:{},bottom:{},"top-left":{},"top-right":{},"bottom-left":{},"bottom-right":{}},default:"center",requires:["text-field",{"!":"text-variable-anchor"}],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-max-angle":{type:"number",default:45,units:"degrees",requires:["text-field",{"symbol-placement":["line","line-center"]}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-writing-mode":{type:"array",value:"enum",values:{horizontal:{},vertical:{}},requires:["text-field",{"symbol-placement":["point"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-rotate":{type:"number",default:0,period:360,units:"degrees",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-padding":{type:"number",default:2,minimum:0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-keep-upright":{type:"boolean",default:!0,requires:["text-field",{"text-rotation-alignment":"map"},{"symbol-placement":["line","line-center"]}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-transform":{type:"enum",values:{none:{},uppercase:{},lowercase:{}},default:"none",requires:["text-field"],expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-offset":{type:"array",value:"number",units:"ems",length:2,default:[0,0],requires:["text-field",{"!":"text-radial-offset"}],expression:{interpolated:!0,parameters:["zoom","feature"]},"property-type":"data-driven"},"text-allow-overlap":{type:"boolean",default:!1,requires:["text-field",{"!":"text-overlap"}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-overlap":{type:"enum",values:{never:{},always:{},cooperative:{}},requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-ignore-placement":{type:"boolean",default:!1,requires:["text-field"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-optional":{type:"boolean",default:!1,requires:["text-field","icon-image"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_raster:{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},layout_hillshade:{visibility:{type:"enum",values:{visible:{},none:{}},default:"visible","property-type":"constant"}},filter:{type:"array",value:"*"},filter_operator:{type:"enum",values:{"==":{},"!=":{},">":{},">=":{},"<":{},"<=":{},in:{},"!in":{},all:{},any:{},none:{},has:{},"!has":{}}},geometry_type:{type:"enum",values:{Point:{},LineString:{},Polygon:{}}},function:{expression:{type:"expression"},stops:{type:"array",value:"function_stop"},base:{type:"number",default:1,minimum:0},property:{type:"string",default:"$zoom"},type:{type:"enum",values:{identity:{},exponential:{},interval:{},categorical:{}},default:"exponential"},colorSpace:{type:"enum",values:{rgb:{},lab:{},hcl:{}},default:"rgb"},default:{type:"*",required:!1}},function_stop:{type:"array",minimum:0,maximum:24,value:["number","color"],length:2},expression:{type:"array",value:"*",minimum:1},light:{anchor:{type:"enum",default:"viewport",values:{map:{},viewport:{}},"property-type":"data-constant",transition:!1,expression:{interpolated:!1,parameters:["zoom"]}},position:{type:"array",default:[1.15,210,30],length:3,value:"number","property-type":"data-constant",transition:!0,expression:{interpolated:!0,parameters:["zoom"]}},color:{type:"color","property-type":"data-constant",default:"#ffffff",expression:{interpolated:!0,parameters:["zoom"]},transition:!0},intensity:{type:"number","property-type":"data-constant",default:.5,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0}},sky:{"sky-color":{type:"color","property-type":"data-constant",default:"#88C6FC",expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"horizon-color":{type:"color","property-type":"data-constant",default:"#ffffff",expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"fog-color":{type:"color","property-type":"data-constant",default:"#ffffff",expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"fog-ground-blend":{type:"number","property-type":"data-constant",default:.5,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"horizon-fog-blend":{type:"number","property-type":"data-constant",default:.8,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"sky-horizon-blend":{type:"number","property-type":"data-constant",default:.8,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0},"atmosphere-blend":{type:"number","property-type":"data-constant",default:.8,minimum:0,maximum:1,expression:{interpolated:!0,parameters:["zoom"]},transition:!0}},terrain:{source:{type:"string",required:!0},exaggeration:{type:"number",minimum:0,default:1}},projection:{type:{type:"enum",default:"mercator",values:{mercator:{},globe:{}}}},paint:["paint_fill","paint_line","paint_circle","paint_heatmap","paint_fill-extrusion","paint_symbol","paint_raster","paint_hillshade","paint_background"],paint_fill:{"fill-antialias":{type:"boolean",default:!0,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"fill-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-outline-color":{type:"color",transition:!0,requires:[{"!":"fill-pattern"},{"fill-antialias":!0}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["fill-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"}},"paint_fill-extrusion":{"fill-extrusion-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"fill-extrusion-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["fill-extrusion-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"fill-extrusion-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"},"fill-extrusion-height":{type:"number",default:0,minimum:0,units:"meters",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-base":{type:"number",default:0,minimum:0,units:"meters",transition:!0,requires:["fill-extrusion-height"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"fill-extrusion-vertical-gradient":{type:"boolean",default:!0,transition:!1,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"}},paint_line:{"line-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"line-pattern"}],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"line-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["line-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"line-width":{type:"number",default:1,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-gap-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-offset":{type:"number",default:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"line-dasharray":{type:"array",value:"number",minimum:0,transition:!0,units:"line widths",requires:[{"!":"line-pattern"}],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"cross-faded"},"line-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom","feature"]},"property-type":"cross-faded-data-driven"},"line-gradient":{type:"color",transition:!1,requires:[{"!":"line-dasharray"},{"!":"line-pattern"},{source:"geojson",has:{lineMetrics:!0}}],expression:{interpolated:!0,parameters:["line-progress"]},"property-type":"color-ramp"}},paint_circle:{"circle-radius":{type:"number",default:5,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-blur":{type:"number",default:0,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"circle-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["circle-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-pitch-scale":{type:"enum",values:{map:{},viewport:{}},default:"map",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-pitch-alignment":{type:"enum",values:{map:{},viewport:{}},default:"viewport",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"circle-stroke-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-stroke-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"circle-stroke-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"}},paint_heatmap:{"heatmap-radius":{type:"number",default:30,minimum:1,transition:!0,units:"pixels",expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"heatmap-weight":{type:"number",default:1,minimum:0,transition:!1,expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"heatmap-intensity":{type:"number",default:1,minimum:0,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"heatmap-color":{type:"color",default:["interpolate",["linear"],["heatmap-density"],0,"rgba(0, 0, 255, 0)",.1,"royalblue",.3,"cyan",.5,"lime",.7,"yellow",1,"red"],transition:!1,expression:{interpolated:!0,parameters:["heatmap-density"]},"property-type":"color-ramp"},"heatmap-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},paint_symbol:{"icon-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-color":{type:"color",default:"#000000",transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-color":{type:"color",default:"rgba(0, 0, 0, 0)",transition:!0,requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-halo-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"icon-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",requires:["icon-image"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"icon-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["icon-image","icon-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"text-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-color":{type:"color",default:"#000000",transition:!0,overridable:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-color":{type:"color",default:"rgba(0, 0, 0, 0)",transition:!0,requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-width":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-halo-blur":{type:"number",default:0,minimum:0,transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom","feature","feature-state"]},"property-type":"data-driven"},"text-translate":{type:"array",value:"number",length:2,default:[0,0],transition:!0,units:"pixels",requires:["text-field"],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"text-translate-anchor":{type:"enum",values:{map:{},viewport:{}},default:"map",requires:["text-field","text-translate"],expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"}},paint_raster:{"raster-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-hue-rotate":{type:"number",default:0,period:360,transition:!0,units:"degrees",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-brightness-min":{type:"number",default:0,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-brightness-max":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-saturation":{type:"number",default:0,minimum:-1,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-contrast":{type:"number",default:0,minimum:-1,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"raster-resampling":{type:"enum",values:{linear:{},nearest:{}},default:"linear",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"raster-fade-duration":{type:"number",default:300,minimum:0,transition:!1,units:"milliseconds",expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},paint_hillshade:{"hillshade-illumination-direction":{type:"number",default:335,minimum:0,maximum:359,transition:!1,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-illumination-anchor":{type:"enum",values:{map:{},viewport:{}},default:"viewport",expression:{interpolated:!1,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-exaggeration":{type:"number",default:.5,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-shadow-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-highlight-color":{type:"color",default:"#FFFFFF",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"hillshade-accent-color":{type:"color",default:"#000000",transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},paint_background:{"background-color":{type:"color",default:"#000000",transition:!0,requires:[{"!":"background-pattern"}],expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"},"background-pattern":{type:"resolvedImage",transition:!0,expression:{interpolated:!1,parameters:["zoom"]},"property-type":"cross-faded"},"background-opacity":{type:"number",default:1,minimum:0,maximum:1,transition:!0,expression:{interpolated:!0,parameters:["zoom"]},"property-type":"data-constant"}},transition:{duration:{type:"number",default:300,minimum:0,units:"milliseconds"},delay:{type:"number",default:0,minimum:0,units:"milliseconds"}},"property-type":{"data-driven":{type:"property-type"},"cross-faded":{type:"property-type"},"cross-faded-data-driven":{type:"property-type"},"color-ramp":{type:"property-type"},"data-constant":{type:"property-type"},constant:{type:"property-type"}},promoteId:{"*":{type:"string"}}};let Ze=["type","source","source-layer","minzoom","maxzoom","filter","layout"];function ut(R,A){let F={};for(let W in R)W!=="ref"&&(F[W]=R[W]);return Ze.forEach(W=>{W in A&&(F[W]=A[W])}),F}function pt(R,A){if(Array.isArray(R)){if(!Array.isArray(A)||R.length!==A.length)return!1;for(let F=0;F<R.length;F++)if(!pt(R[F],A[F]))return!1;return!0}if(typeof R=="object"&&R!==null&&A!==null){if(typeof A!="object"||Object.keys(R).length!==Object.keys(A).length)return!1;for(let F in R)if(!pt(R[F],A[F]))return!1;return!0}return R===A}function Zt(R,A){R.push(A)}function st(R,A,F){Zt(F,{command:"addSource",args:[R,A[R]]})}function lt(R,A,F){Zt(A,{command:"removeSource",args:[R]}),F[R]=!0}function Gt(R,A,F,W){lt(R,F,W),st(R,A,F)}function Nt(R,A,F){let W;for(W in R[F])if(Object.prototype.hasOwnProperty.call(R[F],W)&&W!=="data"&&!pt(R[F][W],A[F][W]))return!1;for(W in A[F])if(Object.prototype.hasOwnProperty.call(A[F],W)&&W!=="data"&&!pt(R[F][W],A[F][W]))return!1;return!0}function Jt(R,A,F,W,re,fe){R=R||{},A=A||{};for(let pe in R)Object.prototype.hasOwnProperty.call(R,pe)&&(pt(R[pe],A[pe])||F.push({command:fe,args:[W,pe,A[pe],re]}));for(let pe in A)Object.prototype.hasOwnProperty.call(A,pe)&&!Object.prototype.hasOwnProperty.call(R,pe)&&(pt(R[pe],A[pe])||F.push({command:fe,args:[W,pe,A[pe],re]}))}function sr(R){return R.id}function wr(R,A){return R[A.id]=A,R}class cr{constructor(A,F,W,re){this.message=(A?`${A}: `:"")+W,re&&(this.identifier=re),F!=null&&F.__line__&&(this.line=F.__line__)}}function $e(R,...A){for(let F of A)for(let W in F)R[W]=F[W];return R}class St extends Error{constructor(A,F){super(F),this.message=F,this.key=A}}class Qt{constructor(A,F=[]){this.parent=A,this.bindings={};for(let[W,re]of F)this.bindings[W]=re}concat(A){return new Qt(this,A)}get(A){if(this.bindings[A])return this.bindings[A];if(this.parent)return this.parent.get(A);throw new Error(`${A} not found in scope.`)}has(A){return!!this.bindings[A]||!!this.parent&&this.parent.has(A)}}let Vt={kind:"null"},_t={kind:"number"},It={kind:"string"},mt={kind:"boolean"},er={kind:"color"},lr={kind:"object"},Tr={kind:"value"},Lr={kind:"collator"},ti={kind:"formatted"},Br={kind:"padding"},Vr={kind:"resolvedImage"},dt={kind:"variableAnchorOffsetCollection"};function Ge(R,A){return{kind:"array",itemType:R,N:A}}function Je(R){if(R.kind==="array"){let A=Je(R.itemType);return typeof R.N=="number"?`array<${A}, ${R.N}>`:R.itemType.kind==="value"?"array":`array<${A}>`}return R.kind}let je=[Vt,_t,It,mt,er,ti,lr,Ge(Tr),Br,Vr,dt];function tt(R,A){if(A.kind==="error")return null;if(R.kind==="array"){if(A.kind==="array"&&(A.N===0&&A.itemType.kind==="value"||!tt(R.itemType,A.itemType))&&(typeof R.N!="number"||R.N===A.N))return null}else{if(R.kind===A.kind)return null;if(R.kind==="value"){for(let F of je)if(!tt(F,A))return null}}return`Expected ${Je(R)} but found ${Je(A)} instead.`}function xt(R,A){return A.some(F=>F.kind===R.kind)}function Ie(R,A){return A.some(F=>F==="null"?R===null:F==="array"?Array.isArray(R):F==="object"?R&&!Array.isArray(R)&&typeof R=="object":F===typeof R)}function xe(R,A){return R.kind==="array"&&A.kind==="array"?R.itemType.kind===A.itemType.kind&&typeof R.N=="number":R.kind===A.kind}let ke=.96422,vt=.82521,ir=4/29,ar=6/29,vr=3*ar*ar,ii=ar*ar*ar,pi=Math.PI/180,$r=180/Math.PI;function di(R){return(R%=360)<0&&(R+=360),R}function ji([R,A,F,W]){let re,fe,pe=wi((.2225045*(R=In(R))+.7168786*(A=In(A))+.0606169*(F=In(F)))/1);R===A&&A===F?re=fe=pe:(re=wi((.4360747*R+.3850649*A+.1430804*F)/ke),fe=wi((.0139322*R+.0971045*A+.7141733*F)/vt));let ze=116*pe-16;return[ze<0?0:ze,500*(re-pe),200*(pe-fe),W]}function In(R){return R<=.04045?R/12.92:Math.pow((R+.055)/1.055,2.4)}function wi(R){return R>ii?Math.pow(R,1/3):R/vr+ir}function On([R,A,F,W]){let re=(R+16)/116,fe=isNaN(A)?re:re+A/500,pe=isNaN(F)?re:re-F/200;return re=1*Fn(re),fe=ke*Fn(fe),pe=vt*Fn(pe),[qn(3.1338561*fe-1.6168667*re-.4906146*pe),qn(-.9787684*fe+1.9161415*re+.033454*pe),qn(.0719453*fe-.2289914*re+1.4052427*pe),W]}function qn(R){return(R=R<=.00304?12.92*R:1.055*Math.pow(R,1/2.4)-.055)<0?0:R>1?1:R}function Fn(R){return R>ar?R*R*R:vr*(R-ir)}function ra(R){return parseInt(R.padEnd(2,R),16)/255}function la(R,A){return Ut(A?R/100:R,0,1)}function Ut(R,A,F){return Math.min(Math.max(A,R),F)}function wt(R){return!R.some(Number.isNaN)}let rr={aliceblue:[240,248,255],antiquewhite:[250,235,215],aqua:[0,255,255],aquamarine:[127,255,212],azure:[240,255,255],beige:[245,245,220],bisque:[255,228,196],black:[0,0,0],blanchedalmond:[255,235,205],blue:[0,0,255],blueviolet:[138,43,226],brown:[165,42,42],burlywood:[222,184,135],cadetblue:[95,158,160],chartreuse:[127,255,0],chocolate:[210,105,30],coral:[255,127,80],cornflowerblue:[100,149,237],cornsilk:[255,248,220],crimson:[220,20,60],cyan:[0,255,255],darkblue:[0,0,139],darkcyan:[0,139,139],darkgoldenrod:[184,134,11],darkgray:[169,169,169],darkgreen:[0,100,0],darkgrey:[169,169,169],darkkhaki:[189,183,107],darkmagenta:[139,0,139],darkolivegreen:[85,107,47],darkorange:[255,140,0],darkorchid:[153,50,204],darkred:[139,0,0],darksalmon:[233,150,122],darkseagreen:[143,188,143],darkslateblue:[72,61,139],darkslategray:[47,79,79],darkslategrey:[47,79,79],darkturquoise:[0,206,209],darkviolet:[148,0,211],deeppink:[255,20,147],deepskyblue:[0,191,255],dimgray:[105,105,105],dimgrey:[105,105,105],dodgerblue:[30,144,255],firebrick:[178,34,34],floralwhite:[255,250,240],forestgreen:[34,139,34],fuchsia:[255,0,255],gainsboro:[220,220,220],ghostwhite:[248,248,255],gold:[255,215,0],goldenrod:[218,165,32],gray:[128,128,128],green:[0,128,0],greenyellow:[173,255,47],grey:[128,128,128],honeydew:[240,255,240],hotpink:[255,105,180],indianred:[205,92,92],indigo:[75,0,130],ivory:[255,255,240],khaki:[240,230,140],lavender:[230,230,250],lavenderblush:[255,240,245],lawngreen:[124,252,0],lemonchiffon:[255,250,205],lightblue:[173,216,230],lightcoral:[240,128,128],lightcyan:[224,255,255],lightgoldenrodyellow:[250,250,210],lightgray:[211,211,211],lightgreen:[144,238,144],lightgrey:[211,211,211],lightpink:[255,182,193],lightsalmon:[255,160,122],lightseagreen:[32,178,170],lightskyblue:[135,206,250],lightslategray:[119,136,153],lightslategrey:[119,136,153],lightsteelblue:[176,196,222],lightyellow:[255,255,224],lime:[0,255,0],limegreen:[50,205,50],linen:[250,240,230],magenta:[255,0,255],maroon:[128,0,0],mediumaquamarine:[102,205,170],mediumblue:[0,0,205],mediumorchid:[186,85,211],mediumpurple:[147,112,219],mediumseagreen:[60,179,113],mediumslateblue:[123,104,238],mediumspringgreen:[0,250,154],mediumturquoise:[72,209,204],mediumvioletred:[199,21,133],midnightblue:[25,25,112],mintcream:[245,255,250],mistyrose:[255,228,225],moccasin:[255,228,181],navajowhite:[255,222,173],navy:[0,0,128],oldlace:[253,245,230],olive:[128,128,0],olivedrab:[107,142,35],orange:[255,165,0],orangered:[255,69,0],orchid:[218,112,214],palegoldenrod:[238,232,170],palegreen:[152,251,152],paleturquoise:[175,238,238],palevioletred:[219,112,147],papayawhip:[255,239,213],peachpuff:[255,218,185],peru:[205,133,63],pink:[255,192,203],plum:[221,160,221],powderblue:[176,224,230],purple:[128,0,128],rebeccapurple:[102,51,153],red:[255,0,0],rosybrown:[188,143,143],royalblue:[65,105,225],saddlebrown:[139,69,19],salmon:[250,128,114],sandybrown:[244,164,96],seagreen:[46,139,87],seashell:[255,245,238],sienna:[160,82,45],silver:[192,192,192],skyblue:[135,206,235],slateblue:[106,90,205],slategray:[112,128,144],slategrey:[112,128,144],snow:[255,250,250],springgreen:[0,255,127],steelblue:[70,130,180],tan:[210,180,140],teal:[0,128,128],thistle:[216,191,216],tomato:[255,99,71],turquoise:[64,224,208],violet:[238,130,238],wheat:[245,222,179],white:[255,255,255],whitesmoke:[245,245,245],yellow:[255,255,0],yellowgreen:[154,205,50]};class nr{constructor(A,F,W,re=1,fe=!0){this.r=A,this.g=F,this.b=W,this.a=re,fe||(this.r*=re,this.g*=re,this.b*=re,re||this.overwriteGetter("rgb",[A,F,W,re]))}static parse(A){if(A instanceof nr)return A;if(typeof A!="string")return;let F=function(W){if((W=W.toLowerCase().trim())==="transparent")return[0,0,0,0];let re=rr[W];if(re){let[pe,ze,Ke]=re;return[pe/255,ze/255,Ke/255,1]}if(W.startsWith("#")&&/^#(?:[0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})$/.test(W)){let pe=W.length<6?1:2,ze=1;return[ra(W.slice(ze,ze+=pe)),ra(W.slice(ze,ze+=pe)),ra(W.slice(ze,ze+=pe)),ra(W.slice(ze,ze+pe)||"ff")]}if(W.startsWith("rgb")){let pe=W.match(/^rgba?\(\s*([\de.+-]+)(%)?(?:\s+|\s*(,)\s*)([\de.+-]+)(%)?(?:\s+|\s*(,)\s*)([\de.+-]+)(%)?(?:\s*([,\/])\s*([\de.+-]+)(%)?)?\s*\)$/);if(pe){let[ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi,vn]=pe,zi=[Lt||" ",mr||" ",ui].join("");if(zi==="  "||zi==="  /"||zi===",,"||zi===",,,"){let un=[ct,fr,zr].join(""),Tn=un==="%%%"?100:un===""?255:0;if(Tn){let pa=[Ut(+Ke/Tn,0,1),Ut(+$t/Tn,0,1),Ut(+Pr/Tn,0,1),yi?la(+yi,vn):1];if(wt(pa))return pa}}return}}let fe=W.match(/^hsla?\(\s*([\de.+-]+)(?:deg)?(?:\s+|\s*(,)\s*)([\de.+-]+)%(?:\s+|\s*(,)\s*)([\de.+-]+)%(?:\s*([,\/])\s*([\de.+-]+)(%)?)?\s*\)$/);if(fe){let[pe,ze,Ke,ct,Lt,$t,fr,mr,Pr]=fe,zr=[Ke||" ",Lt||" ",fr].join("");if(zr==="  "||zr==="  /"||zr===",,"||zr===",,,"){let ui=[+ze,Ut(+ct,0,100),Ut(+$t,0,100),mr?la(+mr,Pr):1];if(wt(ui))return function([yi,vn,zi,un]){function Tn(pa){let ro=(pa+yi/30)%12,Vo=vn*Math.min(zi,1-zi);return zi-Vo*Math.max(-1,Math.min(ro-3,9-ro,1))}return yi=di(yi),vn/=100,zi/=100,[Tn(0),Tn(8),Tn(4),un]}(ui)}}}(A);return F?new nr(...F,!1):void 0}get rgb(){let{r:A,g:F,b:W,a:re}=this,fe=re||1/0;return this.overwriteGetter("rgb",[A/fe,F/fe,W/fe,re])}get hcl(){return this.overwriteGetter("hcl",function(A){let[F,W,re,fe]=ji(A),pe=Math.sqrt(W*W+re*re);return[Math.round(1e4*pe)?di(Math.atan2(re,W)*$r):NaN,pe,F,fe]}(this.rgb))}get lab(){return this.overwriteGetter("lab",ji(this.rgb))}overwriteGetter(A,F){return Object.defineProperty(this,A,{value:F}),F}toString(){let[A,F,W,re]=this.rgb;return`rgba(${[A,F,W].map(fe=>Math.round(255*fe)).join(",")},${re})`}}nr.black=new nr(0,0,0,1),nr.white=new nr(1,1,1,1),nr.transparent=new nr(0,0,0,0),nr.red=new nr(1,0,0,1);class Er{constructor(A,F,W){this.sensitivity=A?F?"variant":"case":F?"accent":"base",this.locale=W,this.collator=new Intl.Collator(this.locale?this.locale:[],{sensitivity:this.sensitivity,usage:"search"})}compare(A,F){return this.collator.compare(A,F)}resolvedLocale(){return new Intl.Collator(this.locale?this.locale:[]).resolvedOptions().locale}}class Xr{constructor(A,F,W,re,fe){this.text=A,this.image=F,this.scale=W,this.fontStack=re,this.textColor=fe}}class ri{constructor(A){this.sections=A}static fromString(A){return new ri([new Xr(A,null,null,null,null)])}isEmpty(){return this.sections.length===0||!this.sections.some(A=>A.text.length!==0||A.image&&A.image.name.length!==0)}static factory(A){return A instanceof ri?A:ri.fromString(A)}toString(){return this.sections.length===0?"":this.sections.map(A=>A.text).join("")}}class Qr{constructor(A){this.values=A.slice()}static parse(A){if(A instanceof Qr)return A;if(typeof A=="number")return new Qr([A,A,A,A]);if(Array.isArray(A)&&!(A.length<1||A.length>4)){for(let F of A)if(typeof F!="number")return;switch(A.length){case 1:A=[A[0],A[0],A[0],A[0]];break;case 2:A=[A[0],A[1],A[0],A[1]];break;case 3:A=[A[0],A[1],A[2],A[1]]}return new Qr(A)}}toString(){return JSON.stringify(this.values)}}let Oi=new Set(["center","left","right","top","bottom","top-left","top-right","bottom-left","bottom-right"]);class $i{constructor(A){this.values=A.slice()}static parse(A){if(A instanceof $i)return A;if(Array.isArray(A)&&!(A.length<1)&&A.length%2==0){for(let F=0;F<A.length;F+=2){let W=A[F],re=A[F+1];if(typeof W!="string"||!Oi.has(W)||!Array.isArray(re)||re.length!==2||typeof re[0]!="number"||typeof re[1]!="number")return}return new $i(A)}}toString(){return JSON.stringify(this.values)}}class tn{constructor(A){this.name=A.name,this.available=A.available}toString(){return this.name}static fromString(A){return A?new tn({name:A,available:!1}):null}}function fn(R,A,F,W){return typeof R=="number"&&R>=0&&R<=255&&typeof A=="number"&&A>=0&&A<=255&&typeof F=="number"&&F>=0&&F<=255?W===void 0||typeof W=="number"&&W>=0&&W<=1?null:`Invalid rgba value [${[R,A,F,W].join(", ")}]: 'a' must be between 0 and 1.`:`Invalid rgba value [${(typeof W=="number"?[R,A,F,W]:[R,A,F]).join(", ")}]: 'r', 'g', and 'b' must be between 0 and 255.`}function yn(R){if(R===null||typeof R=="string"||typeof R=="boolean"||typeof R=="number"||R instanceof nr||R instanceof Er||R instanceof ri||R instanceof Qr||R instanceof $i||R instanceof tn)return!0;if(Array.isArray(R)){for(let A of R)if(!yn(A))return!1;return!0}if(typeof R=="object"){for(let A in R)if(!yn(R[A]))return!1;return!0}return!1}function Sn(R){if(R===null)return Vt;if(typeof R=="string")return It;if(typeof R=="boolean")return mt;if(typeof R=="number")return _t;if(R instanceof nr)return er;if(R instanceof Er)return Lr;if(R instanceof ri)return ti;if(R instanceof Qr)return Br;if(R instanceof $i)return dt;if(R instanceof tn)return Vr;if(Array.isArray(R)){let A=R.length,F;for(let W of R){let re=Sn(W);if(F){if(F===re)continue;F=Tr;break}F=re}return Ge(F||Tr,A)}return lr}function Ba(R){let A=typeof R;return R===null?"":A==="string"||A==="number"||A==="boolean"?String(R):R instanceof nr||R instanceof ri||R instanceof Qr||R instanceof $i||R instanceof tn?R.toString():JSON.stringify(R)}class ua{constructor(A,F){this.type=A,this.value=F}static parse(A,F){if(A.length!==2)return F.error(`'literal' expression requires exactly one argument, but found ${A.length-1} instead.`);if(!yn(A[1]))return F.error("invalid value");let W=A[1],re=Sn(W),fe=F.expectedType;return re.kind!=="array"||re.N!==0||!fe||fe.kind!=="array"||typeof fe.N=="number"&&fe.N!==0||(re=fe),new ua(re,W)}evaluate(){return this.value}eachChild(){}outputDefined(){return!0}}class ma{constructor(A){this.name="ExpressionEvaluationError",this.message=A}toJSON(){return this.message}}let Wa={string:It,number:_t,boolean:mt,object:lr};class Fa{constructor(A,F){this.type=A,this.args=F}static parse(A,F){if(A.length<2)return F.error("Expected at least one argument.");let W,re=1,fe=A[0];if(fe==="array"){let ze,Ke;if(A.length>2){let ct=A[1];if(typeof ct!="string"||!(ct in Wa)||ct==="object")return F.error('The item type argument of "array" must be one of string, number, boolean',1);ze=Wa[ct],re++}else ze=Tr;if(A.length>3){if(A[2]!==null&&(typeof A[2]!="number"||A[2]<0||A[2]!==Math.floor(A[2])))return F.error('The length argument to "array" must be a positive integer literal',2);Ke=A[2],re++}W=Ge(ze,Ke)}else{if(!Wa[fe])throw new Error(`Types doesn't contain name = ${fe}`);W=Wa[fe]}let pe=[];for(;re<A.length;re++){let ze=F.parse(A[re],re,Tr);if(!ze)return null;pe.push(ze)}return new Fa(W,pe)}evaluate(A){for(let F=0;F<this.args.length;F++){let W=this.args[F].evaluate(A);if(!tt(this.type,Sn(W)))return W;if(F===this.args.length-1)throw new ma(`Expected value to be of type ${Je(this.type)}, but found ${Je(Sn(W))} instead.`)}throw new Error}eachChild(A){this.args.forEach(A)}outputDefined(){return this.args.every(A=>A.outputDefined())}}let Wo={"to-boolean":mt,"to-color":er,"to-number":_t,"to-string":It};class da{constructor(A,F){this.type=A,this.args=F}static parse(A,F){if(A.length<2)return F.error("Expected at least one argument.");let W=A[0];if(!Wo[W])throw new Error(`Can't parse ${W} as it is not part of the known types`);if((W==="to-boolean"||W==="to-string")&&A.length!==2)return F.error("Expected one argument.");let re=Wo[W],fe=[];for(let pe=1;pe<A.length;pe++){let ze=F.parse(A[pe],pe,Tr);if(!ze)return null;fe.push(ze)}return new da(re,fe)}evaluate(A){switch(this.type.kind){case"boolean":return!!this.args[0].evaluate(A);case"color":{let F,W;for(let re of this.args){if(F=re.evaluate(A),W=null,F instanceof nr)return F;if(typeof F=="string"){let fe=A.parseColor(F);if(fe)return fe}else if(Array.isArray(F)&&(W=F.length<3||F.length>4?`Invalid rbga value ${JSON.stringify(F)}: expected an array containing either three or four numeric values.`:fn(F[0],F[1],F[2],F[3]),!W))return new nr(F[0]/255,F[1]/255,F[2]/255,F[3])}throw new ma(W||`Could not parse color from value '${typeof F=="string"?F:JSON.stringify(F)}'`)}case"padding":{let F;for(let W of this.args){F=W.evaluate(A);let re=Qr.parse(F);if(re)return re}throw new ma(`Could not parse padding from value '${typeof F=="string"?F:JSON.stringify(F)}'`)}case"variableAnchorOffsetCollection":{let F;for(let W of this.args){F=W.evaluate(A);let re=$i.parse(F);if(re)return re}throw new ma(`Could not parse variableAnchorOffsetCollection from value '${typeof F=="string"?F:JSON.stringify(F)}'`)}case"number":{let F=null;for(let W of this.args){if(F=W.evaluate(A),F===null)return 0;let re=Number(F);if(!isNaN(re))return re}throw new ma(`Could not convert ${JSON.stringify(F)} to number.`)}case"formatted":return ri.fromString(Ba(this.args[0].evaluate(A)));case"resolvedImage":return tn.fromString(Ba(this.args[0].evaluate(A)));default:return Ba(this.args[0].evaluate(A))}}eachChild(A){this.args.forEach(A)}outputDefined(){return this.args.every(A=>A.outputDefined())}}let Wn=["Unknown","Point","LineString","Polygon"];class Ha{constructor(){this.globals=null,this.feature=null,this.featureState=null,this.formattedSection=null,this._parseColorCache={},this.availableImages=null,this.canonical=null}id(){return this.feature&&"id"in this.feature?this.feature.id:null}geometryType(){return this.feature?typeof this.feature.type=="number"?Wn[this.feature.type]:this.feature.type:null}geometry(){return this.feature&&"geometry"in this.feature?this.feature.geometry:null}canonicalID(){return this.canonical}properties(){return this.feature&&this.feature.properties||{}}parseColor(A){let F=this._parseColorCache[A];return F||(F=this._parseColorCache[A]=nr.parse(A)),F}}class vo{constructor(A,F,W=[],re,fe=new Qt,pe=[]){this.registry=A,this.path=W,this.key=W.map(ze=>`[${ze}]`).join(""),this.scope=fe,this.errors=pe,this.expectedType=re,this._isConstant=F}parse(A,F,W,re,fe={}){return F?this.concat(F,W,re)._parse(A,fe):this._parse(A,fe)}_parse(A,F){function W(re,fe,pe){return pe==="assert"?new Fa(fe,[re]):pe==="coerce"?new da(fe,[re]):re}if(A!==null&&typeof A!="string"&&typeof A!="boolean"&&typeof A!="number"||(A=["literal",A]),Array.isArray(A)){if(A.length===0)return this.error('Expected an array with at least one element. If you wanted a literal array, use ["literal", []].');let re=A[0];if(typeof re!="string")return this.error(`Expression name must be a string, but found ${typeof re} instead. If you wanted a literal array, use ["literal", [...]].`,0),null;let fe=this.registry[re];if(fe){let pe=fe.parse(A,this);if(!pe)return null;if(this.expectedType){let ze=this.expectedType,Ke=pe.type;if(ze.kind!=="string"&&ze.kind!=="number"&&ze.kind!=="boolean"&&ze.kind!=="object"&&ze.kind!=="array"||Ke.kind!=="value")if(ze.kind!=="color"&&ze.kind!=="formatted"&&ze.kind!=="resolvedImage"||Ke.kind!=="value"&&Ke.kind!=="string")if(ze.kind!=="padding"||Ke.kind!=="value"&&Ke.kind!=="number"&&Ke.kind!=="array")if(ze.kind!=="variableAnchorOffsetCollection"||Ke.kind!=="value"&&Ke.kind!=="array"){if(this.checkSubtype(ze,Ke))return null}else pe=W(pe,ze,F.typeAnnotation||"coerce");else pe=W(pe,ze,F.typeAnnotation||"coerce");else pe=W(pe,ze,F.typeAnnotation||"coerce");else pe=W(pe,ze,F.typeAnnotation||"assert")}if(!(pe instanceof ua)&&pe.type.kind!=="resolvedImage"&&this._isConstant(pe)){let ze=new Ha;try{pe=new ua(pe.type,pe.evaluate(ze))}catch(Ke){return this.error(Ke.message),null}}return pe}return this.error(`Unknown expression "${re}". If you wanted a literal array, use ["literal", [...]].`,0)}return this.error(A===void 0?"'undefined' value invalid. Use null instead.":typeof A=="object"?'Bare objects invalid. Use ["literal", {...}] instead.':`Expected an array, but found ${typeof A} instead.`)}concat(A,F,W){let re=typeof A=="number"?this.path.concat(A):this.path,fe=W?this.scope.concat(W):this.scope;return new vo(this.registry,this._isConstant,re,F||null,fe,this.errors)}error(A,...F){let W=`${this.key}${F.map(re=>`[${re}]`).join("")}`;this.errors.push(new St(W,A))}checkSubtype(A,F){let W=tt(A,F);return W&&this.error(W),W}}class jn{constructor(A,F){this.type=F.type,this.bindings=[].concat(A),this.result=F}evaluate(A){return this.result.evaluate(A)}eachChild(A){for(let F of this.bindings)A(F[1]);A(this.result)}static parse(A,F){if(A.length<4)return F.error(`Expected at least 3 arguments, but found ${A.length-1} instead.`);let W=[];for(let fe=1;fe<A.length-1;fe+=2){let pe=A[fe];if(typeof pe!="string")return F.error(`Expected string, but found ${typeof pe} instead.`,fe);if(/[^a-zA-Z0-9_]/.test(pe))return F.error("Variable names must contain only alphanumeric characters or '_'.",fe);let ze=F.parse(A[fe+1],fe+1);if(!ze)return null;W.push([pe,ze])}let re=F.parse(A[A.length-1],A.length-1,F.expectedType,W);return re?new jn(W,re):null}outputDefined(){return this.result.outputDefined()}}class Mt{constructor(A,F){this.type=F.type,this.name=A,this.boundExpression=F}static parse(A,F){if(A.length!==2||typeof A[1]!="string")return F.error("'var' expression requires exactly one string literal argument.");let W=A[1];return F.scope.has(W)?new Mt(W,F.scope.get(W)):F.error(`Unknown variable "${W}". Make sure "${W}" has been bound in an enclosing "let" expression before using it.`,1)}evaluate(A){return this.boundExpression.evaluate(A)}eachChild(){}outputDefined(){return!1}}class kr{constructor(A,F,W){this.type=A,this.index=F,this.input=W}static parse(A,F){if(A.length!==3)return F.error(`Expected 2 arguments, but found ${A.length-1} instead.`);let W=F.parse(A[1],1,_t),re=F.parse(A[2],2,Ge(F.expectedType||Tr));return W&&re?new kr(re.type.itemType,W,re):null}evaluate(A){let F=this.index.evaluate(A),W=this.input.evaluate(A);if(F<0)throw new ma(`Array index out of bounds: ${F} < 0.`);if(F>=W.length)throw new ma(`Array index out of bounds: ${F} > ${W.length-1}.`);if(F!==Math.floor(F))throw new ma(`Array index must be an integer, but found ${F} instead.`);return W[F]}eachChild(A){A(this.index),A(this.input)}outputDefined(){return!1}}class Jr{constructor(A,F){this.type=mt,this.needle=A,this.haystack=F}static parse(A,F){if(A.length!==3)return F.error(`Expected 2 arguments, but found ${A.length-1} instead.`);let W=F.parse(A[1],1,Tr),re=F.parse(A[2],2,Tr);return W&&re?xt(W.type,[mt,It,_t,Vt,Tr])?new Jr(W,re):F.error(`Expected first argument to be of type boolean, string, number or null, but found ${Je(W.type)} instead`):null}evaluate(A){let F=this.needle.evaluate(A),W=this.haystack.evaluate(A);if(!W)return!1;if(!Ie(F,["boolean","string","number","null"]))throw new ma(`Expected first argument to be of type boolean, string, number or null, but found ${Je(Sn(F))} instead.`);if(!Ie(W,["string","array"]))throw new ma(`Expected second argument to be of type array or string, but found ${Je(Sn(W))} instead.`);return W.indexOf(F)>=0}eachChild(A){A(this.needle),A(this.haystack)}outputDefined(){return!0}}class vi{constructor(A,F,W){this.type=_t,this.needle=A,this.haystack=F,this.fromIndex=W}static parse(A,F){if(A.length<=2||A.length>=5)return F.error(`Expected 3 or 4 arguments, but found ${A.length-1} instead.`);let W=F.parse(A[1],1,Tr),re=F.parse(A[2],2,Tr);if(!W||!re)return null;if(!xt(W.type,[mt,It,_t,Vt,Tr]))return F.error(`Expected first argument to be of type boolean, string, number or null, but found ${Je(W.type)} instead`);if(A.length===4){let fe=F.parse(A[3],3,_t);return fe?new vi(W,re,fe):null}return new vi(W,re)}evaluate(A){let F=this.needle.evaluate(A),W=this.haystack.evaluate(A);if(!Ie(F,["boolean","string","number","null"]))throw new ma(`Expected first argument to be of type boolean, string, number or null, but found ${Je(Sn(F))} instead.`);let re;if(this.fromIndex&&(re=this.fromIndex.evaluate(A)),Ie(W,["string"])){let fe=W.indexOf(F,re);return fe===-1?-1:[...W.slice(0,fe)].length}if(Ie(W,["array"]))return W.indexOf(F,re);throw new ma(`Expected second argument to be of type array or string, but found ${Je(Sn(W))} instead.`)}eachChild(A){A(this.needle),A(this.haystack),this.fromIndex&&A(this.fromIndex)}outputDefined(){return!1}}class hn{constructor(A,F,W,re,fe,pe){this.inputType=A,this.type=F,this.input=W,this.cases=re,this.outputs=fe,this.otherwise=pe}static parse(A,F){if(A.length<5)return F.error(`Expected at least 4 arguments, but found only ${A.length-1}.`);if(A.length%2!=1)return F.error("Expected an even number of arguments.");let W,re;F.expectedType&&F.expectedType.kind!=="value"&&(re=F.expectedType);let fe={},pe=[];for(let ct=2;ct<A.length-1;ct+=2){let Lt=A[ct],$t=A[ct+1];Array.isArray(Lt)||(Lt=[Lt]);let fr=F.concat(ct);if(Lt.length===0)return fr.error("Expected at least one branch label.");for(let Pr of Lt){if(typeof Pr!="number"&&typeof Pr!="string")return fr.error("Branch labels must be numbers or strings.");if(typeof Pr=="number"&&Math.abs(Pr)>Number.MAX_SAFE_INTEGER)return fr.error(`Branch labels must be integers no larger than ${Number.MAX_SAFE_INTEGER}.`);if(typeof Pr=="number"&&Math.floor(Pr)!==Pr)return fr.error("Numeric branch labels must be integer values.");if(W){if(fr.checkSubtype(W,Sn(Pr)))return null}else W=Sn(Pr);if(fe[String(Pr)]!==void 0)return fr.error("Branch labels must be unique.");fe[String(Pr)]=pe.length}let mr=F.parse($t,ct,re);if(!mr)return null;re=re||mr.type,pe.push(mr)}let ze=F.parse(A[1],1,Tr);if(!ze)return null;let Ke=F.parse(A[A.length-1],A.length-1,re);return Ke?ze.type.kind!=="value"&&F.concat(1).checkSubtype(W,ze.type)?null:new hn(W,re,ze,fe,pe,Ke):null}evaluate(A){let F=this.input.evaluate(A);return(Sn(F)===this.inputType&&this.outputs[this.cases[F]]||this.otherwise).evaluate(A)}eachChild(A){A(this.input),this.outputs.forEach(A),A(this.otherwise)}outputDefined(){return this.outputs.every(A=>A.outputDefined())&&this.otherwise.outputDefined()}}class An{constructor(A,F,W){this.type=A,this.branches=F,this.otherwise=W}static parse(A,F){if(A.length<4)return F.error(`Expected at least 3 arguments, but found only ${A.length-1}.`);if(A.length%2!=0)return F.error("Expected an odd number of arguments.");let W;F.expectedType&&F.expectedType.kind!=="value"&&(W=F.expectedType);let re=[];for(let pe=1;pe<A.length-1;pe+=2){let ze=F.parse(A[pe],pe,mt);if(!ze)return null;let Ke=F.parse(A[pe+1],pe+1,W);if(!Ke)return null;re.push([ze,Ke]),W=W||Ke.type}let fe=F.parse(A[A.length-1],A.length-1,W);if(!fe)return null;if(!W)throw new Error("Can't infer output type");return new An(W,re,fe)}evaluate(A){for(let[F,W]of this.branches)if(F.evaluate(A))return W.evaluate(A);return this.otherwise.evaluate(A)}eachChild(A){for(let[F,W]of this.branches)A(F),A(W);A(this.otherwise)}outputDefined(){return this.branches.every(([A,F])=>F.outputDefined())&&this.otherwise.outputDefined()}}class Mn{constructor(A,F,W,re){this.type=A,this.input=F,this.beginIndex=W,this.endIndex=re}static parse(A,F){if(A.length<=2||A.length>=5)return F.error(`Expected 3 or 4 arguments, but found ${A.length-1} instead.`);let W=F.parse(A[1],1,Tr),re=F.parse(A[2],2,_t);if(!W||!re)return null;if(!xt(W.type,[Ge(Tr),It,Tr]))return F.error(`Expected first argument to be of type array or string, but found ${Je(W.type)} instead`);if(A.length===4){let fe=F.parse(A[3],3,_t);return fe?new Mn(W.type,W,re,fe):null}return new Mn(W.type,W,re)}evaluate(A){let F=this.input.evaluate(A),W=this.beginIndex.evaluate(A),re;if(this.endIndex&&(re=this.endIndex.evaluate(A)),Ie(F,["string"]))return[...F].slice(W,re).join("");if(Ie(F,["array"]))return F.slice(W,re);throw new ma(`Expected first argument to be of type array or string, but found ${Je(Sn(F))} instead.`)}eachChild(A){A(this.input),A(this.beginIndex),this.endIndex&&A(this.endIndex)}outputDefined(){return!1}}function Li(R,A){let F=R.length-1,W,re,fe=0,pe=F,ze=0;for(;fe<=pe;)if(ze=Math.floor((fe+pe)/2),W=R[ze],re=R[ze+1],W<=A){if(ze===F||A<re)return ze;fe=ze+1}else{if(!(W>A))throw new ma("Input is not a number.");pe=ze-1}return 0}class _n{constructor(A,F,W){this.type=A,this.input=F,this.labels=[],this.outputs=[];for(let[re,fe]of W)this.labels.push(re),this.outputs.push(fe)}static parse(A,F){if(A.length-1<4)return F.error(`Expected at least 4 arguments, but found only ${A.length-1}.`);if((A.length-1)%2!=0)return F.error("Expected an even number of arguments.");let W=F.parse(A[1],1,_t);if(!W)return null;let re=[],fe=null;F.expectedType&&F.expectedType.kind!=="value"&&(fe=F.expectedType);for(let pe=1;pe<A.length;pe+=2){let ze=pe===1?-1/0:A[pe],Ke=A[pe+1],ct=pe,Lt=pe+1;if(typeof ze!="number")return F.error('Input/output pairs for "step" expressions must be defined using literal numeric values (not computed expressions) for the input values.',ct);if(re.length&&re[re.length-1][0]>=ze)return F.error('Input/output pairs for "step" expressions must be arranged with input values in strictly ascending order.',ct);let $t=F.parse(Ke,Lt,fe);if(!$t)return null;fe=fe||$t.type,re.push([ze,$t])}return new _n(fe,W,re)}evaluate(A){let F=this.labels,W=this.outputs;if(F.length===1)return W[0].evaluate(A);let re=this.input.evaluate(A);if(re<=F[0])return W[0].evaluate(A);let fe=F.length;return re>=F[fe-1]?W[fe-1].evaluate(A):W[Li(F,re)].evaluate(A)}eachChild(A){A(this.input);for(let F of this.outputs)A(F)}outputDefined(){return this.outputs.every(A=>A.outputDefined())}}function ya(R){return R&&R.__esModule&&Object.prototype.hasOwnProperty.call(R,"default")?R.default:R}var Jn=Ma;function Ma(R,A,F,W){this.cx=3*R,this.bx=3*(F-R)-this.cx,this.ax=1-this.cx-this.bx,this.cy=3*A,this.by=3*(W-A)-this.cy,this.ay=1-this.cy-this.by,this.p1x=R,this.p1y=A,this.p2x=F,this.p2y=W}Ma.prototype={sampleCurveX:function(R){return((this.ax*R+this.bx)*R+this.cx)*R},sampleCurveY:function(R){return((this.ay*R+this.by)*R+this.cy)*R},sampleCurveDerivativeX:function(R){return(3*this.ax*R+2*this.bx)*R+this.cx},solveCurveX:function(R,A){if(A===void 0&&(A=1e-6),R<0)return 0;if(R>1)return 1;for(var F=R,W=0;W<8;W++){var re=this.sampleCurveX(F)-R;if(Math.abs(re)<A)return F;var fe=this.sampleCurveDerivativeX(F);if(Math.abs(fe)<1e-6)break;F-=re/fe}var pe=0,ze=1;for(F=R,W=0;W<20&&(re=this.sampleCurveX(F),!(Math.abs(re-R)<A));W++)R>re?pe=F:ze=F,F=.5*(ze-pe)+pe;return F},solve:function(R,A){return this.sampleCurveY(this.solveCurveX(R,A))}};var _o=ya(Jn);function No(R,A,F){return R+F*(A-R)}function po(R,A,F){return R.map((W,re)=>No(W,A[re],F))}let Lo={number:No,color:function(R,A,F,W="rgb"){switch(W){case"rgb":{let[re,fe,pe,ze]=po(R.rgb,A.rgb,F);return new nr(re,fe,pe,ze,!1)}case"hcl":{let[re,fe,pe,ze]=R.hcl,[Ke,ct,Lt,$t]=A.hcl,fr,mr;if(isNaN(re)||isNaN(Ke))isNaN(re)?isNaN(Ke)?fr=NaN:(fr=Ke,pe!==1&&pe!==0||(mr=ct)):(fr=re,Lt!==1&&Lt!==0||(mr=fe));else{let vn=Ke-re;Ke>re&&vn>180?vn-=360:Ke<re&&re-Ke>180&&(vn+=360),fr=re+F*vn}let[Pr,zr,ui,yi]=function([vn,zi,un,Tn]){return vn=isNaN(vn)?0:vn*pi,On([un,Math.cos(vn)*zi,Math.sin(vn)*zi,Tn])}([fr,mr!=null?mr:No(fe,ct,F),No(pe,Lt,F),No(ze,$t,F)]);return new nr(Pr,zr,ui,yi,!1)}case"lab":{let[re,fe,pe,ze]=On(po(R.lab,A.lab,F));return new nr(re,fe,pe,ze,!1)}}},array:po,padding:function(R,A,F){return new Qr(po(R.values,A.values,F))},variableAnchorOffsetCollection:function(R,A,F){let W=R.values,re=A.values;if(W.length!==re.length)throw new ma(`Cannot interpolate values of different length. from: ${R.toString()}, to: ${A.toString()}`);let fe=[];for(let pe=0;pe<W.length;pe+=2){if(W[pe]!==re[pe])throw new ma(`Cannot interpolate values containing mismatched anchors. from[${pe}]: ${W[pe]}, to[${pe}]: ${re[pe]}`);fe.push(W[pe]);let[ze,Ke]=W[pe+1],[ct,Lt]=re[pe+1];fe.push([No(ze,ct,F),No(Ke,Lt,F)])}return new $i(fe)}};class ko{constructor(A,F,W,re,fe){this.type=A,this.operator=F,this.interpolation=W,this.input=re,this.labels=[],this.outputs=[];for(let[pe,ze]of fe)this.labels.push(pe),this.outputs.push(ze)}static interpolationFactor(A,F,W,re){let fe=0;if(A.name==="exponential")fe=Ds(F,A.base,W,re);else if(A.name==="linear")fe=Ds(F,1,W,re);else if(A.name==="cubic-bezier"){let pe=A.controlPoints;fe=new _o(pe[0],pe[1],pe[2],pe[3]).solve(Ds(F,1,W,re))}return fe}static parse(A,F){let[W,re,fe,...pe]=A;if(!Array.isArray(re)||re.length===0)return F.error("Expected an interpolation type expression.",1);if(re[0]==="linear")re={name:"linear"};else if(re[0]==="exponential"){let ct=re[1];if(typeof ct!="number")return F.error("Exponential interpolation requires a numeric base.",1,1);re={name:"exponential",base:ct}}else{if(re[0]!=="cubic-bezier")return F.error(`Unknown interpolation type ${String(re[0])}`,1,0);{let ct=re.slice(1);if(ct.length!==4||ct.some(Lt=>typeof Lt!="number"||Lt<0||Lt>1))return F.error("Cubic bezier interpolation requires four numeric arguments with values between 0 and 1.",1);re={name:"cubic-bezier",controlPoints:ct}}}if(A.length-1<4)return F.error(`Expected at least 4 arguments, but found only ${A.length-1}.`);if((A.length-1)%2!=0)return F.error("Expected an even number of arguments.");if(fe=F.parse(fe,2,_t),!fe)return null;let ze=[],Ke=null;W==="interpolate-hcl"||W==="interpolate-lab"?Ke=er:F.expectedType&&F.expectedType.kind!=="value"&&(Ke=F.expectedType);for(let ct=0;ct<pe.length;ct+=2){let Lt=pe[ct],$t=pe[ct+1],fr=ct+3,mr=ct+4;if(typeof Lt!="number")return F.error('Input/output pairs for "interpolate" expressions must be defined using literal numeric values (not computed expressions) for the input values.',fr);if(ze.length&&ze[ze.length-1][0]>=Lt)return F.error('Input/output pairs for "interpolate" expressions must be arranged with input values in strictly ascending order.',fr);let Pr=F.parse($t,mr,Ke);if(!Pr)return null;Ke=Ke||Pr.type,ze.push([Lt,Pr])}return xe(Ke,_t)||xe(Ke,er)||xe(Ke,Br)||xe(Ke,dt)||xe(Ke,Ge(_t))?new ko(Ke,W,re,fe,ze):F.error(`Type ${Je(Ke)} is not interpolatable.`)}evaluate(A){let F=this.labels,W=this.outputs;if(F.length===1)return W[0].evaluate(A);let re=this.input.evaluate(A);if(re<=F[0])return W[0].evaluate(A);let fe=F.length;if(re>=F[fe-1])return W[fe-1].evaluate(A);let pe=Li(F,re),ze=ko.interpolationFactor(this.interpolation,re,F[pe],F[pe+1]),Ke=W[pe].evaluate(A),ct=W[pe+1].evaluate(A);switch(this.operator){case"interpolate":return Lo[this.type.kind](Ke,ct,ze);case"interpolate-hcl":return Lo.color(Ke,ct,ze,"hcl");case"interpolate-lab":return Lo.color(Ke,ct,ze,"lab")}}eachChild(A){A(this.input);for(let F of this.outputs)A(F)}outputDefined(){return this.outputs.every(A=>A.outputDefined())}}function Ds(R,A,F,W){let re=W-F,fe=R-F;return re===0?0:A===1?fe/re:(Math.pow(A,fe)-1)/(Math.pow(A,re)-1)}class Fs{constructor(A,F){this.type=A,this.args=F}static parse(A,F){if(A.length<2)return F.error("Expectected at least one argument.");let W=null,re=F.expectedType;re&&re.kind!=="value"&&(W=re);let fe=[];for(let ze of A.slice(1)){let Ke=F.parse(ze,1+fe.length,W,void 0,{typeAnnotation:"omit"});if(!Ke)return null;W=W||Ke.type,fe.push(Ke)}if(!W)throw new Error("No output type");let pe=re&&fe.some(ze=>tt(re,ze.type));return new Fs(pe?Tr:W,fe)}evaluate(A){let F,W=null,re=0;for(let fe of this.args)if(re++,W=fe.evaluate(A),W&&W instanceof tn&&!W.available&&(F||(F=W.name),W=null,re===this.args.length&&(W=F)),W!==null)break;return W}eachChild(A){this.args.forEach(A)}outputDefined(){return this.args.every(A=>A.outputDefined())}}function ll(R,A){return R==="=="||R==="!="?A.kind==="boolean"||A.kind==="string"||A.kind==="number"||A.kind==="null"||A.kind==="value":A.kind==="string"||A.kind==="number"||A.kind==="value"}function ul(R,A,F,W){return W.compare(A,F)===0}function zl(R,A,F){let W=R!=="=="&&R!=="!=";return class aje{constructor(fe,pe,ze){this.type=mt,this.lhs=fe,this.rhs=pe,this.collator=ze,this.hasUntypedArgument=fe.type.kind==="value"||pe.type.kind==="value"}static parse(fe,pe){if(fe.length!==3&&fe.length!==4)return pe.error("Expected two or three arguments.");let ze=fe[0],Ke=pe.parse(fe[1],1,Tr);if(!Ke)return null;if(!ll(ze,Ke.type))return pe.concat(1).error(`"${ze}" comparisons are not supported for type '${Je(Ke.type)}'.`);let ct=pe.parse(fe[2],2,Tr);if(!ct)return null;if(!ll(ze,ct.type))return pe.concat(2).error(`"${ze}" comparisons are not supported for type '${Je(ct.type)}'.`);if(Ke.type.kind!==ct.type.kind&&Ke.type.kind!=="value"&&ct.type.kind!=="value")return pe.error(`Cannot compare types '${Je(Ke.type)}' and '${Je(ct.type)}'.`);W&&(Ke.type.kind==="value"&&ct.type.kind!=="value"?Ke=new Fa(ct.type,[Ke]):Ke.type.kind!=="value"&&ct.type.kind==="value"&&(ct=new Fa(Ke.type,[ct])));let Lt=null;if(fe.length===4){if(Ke.type.kind!=="string"&&ct.type.kind!=="string"&&Ke.type.kind!=="value"&&ct.type.kind!=="value")return pe.error("Cannot use collator to compare non-string types.");if(Lt=pe.parse(fe[3],3,Lr),!Lt)return null}return new aje(Ke,ct,Lt)}evaluate(fe){let pe=this.lhs.evaluate(fe),ze=this.rhs.evaluate(fe);if(W&&this.hasUntypedArgument){let Ke=Sn(pe),ct=Sn(ze);if(Ke.kind!==ct.kind||Ke.kind!=="string"&&Ke.kind!=="number")throw new ma(`Expected arguments for "${R}" to be (string, string) or (number, number), but found (${Ke.kind}, ${ct.kind}) instead.`)}if(this.collator&&!W&&this.hasUntypedArgument){let Ke=Sn(pe),ct=Sn(ze);if(Ke.kind!=="string"||ct.kind!=="string")return A(fe,pe,ze)}return this.collator?F(fe,pe,ze,this.collator.evaluate(fe)):A(fe,pe,ze)}eachChild(fe){fe(this.lhs),fe(this.rhs),this.collator&&fe(this.collator)}outputDefined(){return!0}}}let us=zl("==",function(R,A,F){return A===F},ul),il=zl("!=",function(R,A,F){return A!==F},function(R,A,F,W){return!ul(0,A,F,W)}),As=zl("<",function(R,A,F){return A<F},function(R,A,F,W){return W.compare(A,F)<0}),cl=zl(">",function(R,A,F){return A>F},function(R,A,F,W){return W.compare(A,F)>0}),Ks=zl("<=",function(R,A,F){return A<=F},function(R,A,F,W){return W.compare(A,F)<=0}),zs=zl(">=",function(R,A,F){return A>=F},function(R,A,F,W){return W.compare(A,F)>=0});class Io{constructor(A,F,W){this.type=Lr,this.locale=W,this.caseSensitive=A,this.diacriticSensitive=F}static parse(A,F){if(A.length!==2)return F.error("Expected one argument.");let W=A[1];if(typeof W!="object"||Array.isArray(W))return F.error("Collator options argument must be an object.");let re=F.parse(W["case-sensitive"]!==void 0&&W["case-sensitive"],1,mt);if(!re)return null;let fe=F.parse(W["diacritic-sensitive"]!==void 0&&W["diacritic-sensitive"],1,mt);if(!fe)return null;let pe=null;return W.locale&&(pe=F.parse(W.locale,1,It),!pe)?null:new Io(re,fe,pe)}evaluate(A){return new Er(this.caseSensitive.evaluate(A),this.diacriticSensitive.evaluate(A),this.locale?this.locale.evaluate(A):null)}eachChild(A){A(this.caseSensitive),A(this.diacriticSensitive),this.locale&&A(this.locale)}outputDefined(){return!1}}class ls{constructor(A,F,W,re,fe){this.type=It,this.number=A,this.locale=F,this.currency=W,this.minFractionDigits=re,this.maxFractionDigits=fe}static parse(A,F){if(A.length!==3)return F.error("Expected two arguments.");let W=F.parse(A[1],1,_t);if(!W)return null;let re=A[2];if(typeof re!="object"||Array.isArray(re))return F.error("NumberFormat options argument must be an object.");let fe=null;if(re.locale&&(fe=F.parse(re.locale,1,It),!fe))return null;let pe=null;if(re.currency&&(pe=F.parse(re.currency,1,It),!pe))return null;let ze=null;if(re["min-fraction-digits"]&&(ze=F.parse(re["min-fraction-digits"],1,_t),!ze))return null;let Ke=null;return re["max-fraction-digits"]&&(Ke=F.parse(re["max-fraction-digits"],1,_t),!Ke)?null:new ls(W,fe,pe,ze,Ke)}evaluate(A){return new Intl.NumberFormat(this.locale?this.locale.evaluate(A):[],{style:this.currency?"currency":"decimal",currency:this.currency?this.currency.evaluate(A):void 0,minimumFractionDigits:this.minFractionDigits?this.minFractionDigits.evaluate(A):void 0,maximumFractionDigits:this.maxFractionDigits?this.maxFractionDigits.evaluate(A):void 0}).format(this.number.evaluate(A))}eachChild(A){A(this.number),this.locale&&A(this.locale),this.currency&&A(this.currency),this.minFractionDigits&&A(this.minFractionDigits),this.maxFractionDigits&&A(this.maxFractionDigits)}outputDefined(){return!1}}class Zl{constructor(A){this.type=ti,this.sections=A}static parse(A,F){if(A.length<2)return F.error("Expected at least one argument.");let W=A[1];if(!Array.isArray(W)&&typeof W=="object")return F.error("First argument must be an image or text section.");let re=[],fe=!1;for(let pe=1;pe<=A.length-1;++pe){let ze=A[pe];if(fe&&typeof ze=="object"&&!Array.isArray(ze)){fe=!1;let Ke=null;if(ze["font-scale"]&&(Ke=F.parse(ze["font-scale"],1,_t),!Ke))return null;let ct=null;if(ze["text-font"]&&(ct=F.parse(ze["text-font"],1,Ge(It)),!ct))return null;let Lt=null;if(ze["text-color"]&&(Lt=F.parse(ze["text-color"],1,er),!Lt))return null;let $t=re[re.length-1];$t.scale=Ke,$t.font=ct,$t.textColor=Lt}else{let Ke=F.parse(A[pe],1,Tr);if(!Ke)return null;let ct=Ke.type.kind;if(ct!=="string"&&ct!=="value"&&ct!=="null"&&ct!=="resolvedImage")return F.error("Formatted text type must be 'string', 'value', 'image' or 'null'.");fe=!0,re.push({content:Ke,scale:null,font:null,textColor:null})}}return new Zl(re)}evaluate(A){return new ri(this.sections.map(F=>{let W=F.content.evaluate(A);return Sn(W)===Vr?new Xr("",W,null,null,null):new Xr(Ba(W),null,F.scale?F.scale.evaluate(A):null,F.font?F.font.evaluate(A).join(","):null,F.textColor?F.textColor.evaluate(A):null)}))}eachChild(A){for(let F of this.sections)A(F.content),F.scale&&A(F.scale),F.font&&A(F.font),F.textColor&&A(F.textColor)}outputDefined(){return!1}}class Su{constructor(A){this.type=Vr,this.input=A}static parse(A,F){if(A.length!==2)return F.error("Expected two arguments.");let W=F.parse(A[1],1,It);return W?new Su(W):F.error("No image name provided.")}evaluate(A){let F=this.input.evaluate(A),W=tn.fromString(F);return W&&A.availableImages&&(W.available=A.availableImages.indexOf(F)>-1),W}eachChild(A){A(this.input)}outputDefined(){return!1}}class nc{constructor(A){this.type=_t,this.input=A}static parse(A,F){if(A.length!==2)return F.error(`Expected 1 argument, but found ${A.length-1} instead.`);let W=F.parse(A[1],1);return W?W.type.kind!=="array"&&W.type.kind!=="string"&&W.type.kind!=="value"?F.error(`Expected argument of type string or array, but found ${Je(W.type)} instead.`):new nc(W):null}evaluate(A){let F=this.input.evaluate(A);if(typeof F=="string")return[...F].length;if(Array.isArray(F))return F.length;throw new ma(`Expected value to be of type string or array, but found ${Je(Sn(F))} instead.`)}eachChild(A){A(this.input)}outputDefined(){return!1}}let bs=8192;function Rn(R,A){let F=(180+R[0])/360,W=(180-180/Math.PI*Math.log(Math.tan(Math.PI/4+R[1]*Math.PI/360)))/360,re=Math.pow(2,A.z);return[Math.round(F*re*bs),Math.round(W*re*bs)]}function _a(R,A){let F=Math.pow(2,A.z);return[(re=(R[0]/bs+A.x)/F,360*re-180),(W=(R[1]/bs+A.y)/F,360/Math.PI*Math.atan(Math.exp((180-360*W)*Math.PI/180))-90)];var W,re}function Vu(R,A){R[0]=Math.min(R[0],A[0]),R[1]=Math.min(R[1],A[1]),R[2]=Math.max(R[2],A[0]),R[3]=Math.max(R[3],A[1])}function Ol(R,A){return!(R[0]<=A[0]||R[2]>=A[2]||R[1]<=A[1]||R[3]>=A[3])}function xo(R,A,F){let W=R[0]-A[0],re=R[1]-A[1],fe=R[0]-F[0],pe=R[1]-F[1];return W*pe-fe*re==0&&W*fe<=0&&re*pe<=0}function Yl(R,A,F,W){return(re=[W[0]-F[0],W[1]-F[1]])[0]*(fe=[A[0]-R[0],A[1]-R[1]])[1]-re[1]*fe[0]!=0&&!(!qo(R,A,F,W)||!qo(F,W,R,A));var re,fe}function Ns(R,A,F){for(let W of F)for(let re=0;re<W.length-1;++re)if(Yl(R,A,W[re],W[re+1]))return!0;return!1}function Hl(R,A,F=!1){let W=!1;for(let ze of A)for(let Ke=0;Ke<ze.length-1;Ke++){if(xo(R,ze[Ke],ze[Ke+1]))return F;(fe=ze[Ke])[1]>(re=R)[1]!=(pe=ze[Ke+1])[1]>re[1]&&re[0]<(pe[0]-fe[0])*(re[1]-fe[1])/(pe[1]-fe[1])+fe[0]&&(W=!W)}var re,fe,pe;return W}function ac(R,A){for(let F of A)if(Hl(R,F))return!0;return!1}function aa(R,A){for(let F of R)if(!Hl(F,A))return!1;for(let F=0;F<R.length-1;++F)if(Ns(R[F],R[F+1],A))return!1;return!0}function Oo(R,A){for(let F of A)if(aa(R,F))return!0;return!1}function qo(R,A,F,W){let re=W[0]-F[0],fe=W[1]-F[1],pe=(R[0]-F[0])*fe-re*(R[1]-F[1]),ze=(A[0]-F[0])*fe-re*(A[1]-F[1]);return pe>0&&ze<0||pe<0&&ze>0}function ql(R,A,F){let W=[];for(let re=0;re<R.length;re++){let fe=[];for(let pe=0;pe<R[re].length;pe++){let ze=Rn(R[re][pe],F);Vu(A,ze),fe.push(ze)}W.push(fe)}return W}function Pc(R,A,F){let W=[];for(let re=0;re<R.length;re++){let fe=ql(R[re],A,F);W.push(fe)}return W}function Do(R,A,F,W){if(R[0]<F[0]||R[0]>F[2]){let re=.5*W,fe=R[0]-F[0]>re?-W:F[0]-R[0]>re?W:0;fe===0&&(fe=R[0]-F[2]>re?-W:F[2]-R[0]>re?W:0),R[0]+=fe}Vu(A,R)}function rf(R,A,F,W){let re=Math.pow(2,W.z)*bs,fe=[W.x*bs,W.y*bs],pe=[];for(let ze of R)for(let Ke of ze){let ct=[Ke.x+fe[0],Ke.y+fe[1]];Do(ct,A,F,re),pe.push(ct)}return pe}function Uf(R,A,F,W){let re=Math.pow(2,W.z)*bs,fe=[W.x*bs,W.y*bs],pe=[];for(let Ke of R){let ct=[];for(let Lt of Ke){let $t=[Lt.x+fe[0],Lt.y+fe[1]];Vu(A,$t),ct.push($t)}pe.push(ct)}if(A[2]-A[0]<=re/2){(ze=A)[0]=ze[1]=1/0,ze[2]=ze[3]=-1/0;for(let Ke of pe)for(let ct of Ke)Do(ct,A,F,re)}var ze;return pe}class pl{constructor(A,F){this.type=mt,this.geojson=A,this.geometries=F}static parse(A,F){if(A.length!==2)return F.error(`'within' expression requires exactly one argument, but found ${A.length-1} instead.`);if(yn(A[1])){let W=A[1];if(W.type==="FeatureCollection"){let re=[];for(let fe of W.features){let{type:pe,coordinates:ze}=fe.geometry;pe==="Polygon"&&re.push(ze),pe==="MultiPolygon"&&re.push(...ze)}if(re.length)return new pl(W,{type:"MultiPolygon",coordinates:re})}else if(W.type==="Feature"){let re=W.geometry.type;if(re==="Polygon"||re==="MultiPolygon")return new pl(W,W.geometry)}else if(W.type==="Polygon"||W.type==="MultiPolygon")return new pl(W,W)}return F.error("'within' expression requires valid geojson object that contains polygon geometry type.")}evaluate(A){if(A.geometry()!=null&&A.canonicalID()!=null){if(A.geometryType()==="Point")return function(F,W){let re=[1/0,1/0,-1/0,-1/0],fe=[1/0,1/0,-1/0,-1/0],pe=F.canonicalID();if(W.type==="Polygon"){let ze=ql(W.coordinates,fe,pe),Ke=rf(F.geometry(),re,fe,pe);if(!Ol(re,fe))return!1;for(let ct of Ke)if(!Hl(ct,ze))return!1}if(W.type==="MultiPolygon"){let ze=Pc(W.coordinates,fe,pe),Ke=rf(F.geometry(),re,fe,pe);if(!Ol(re,fe))return!1;for(let ct of Ke)if(!ac(ct,ze))return!1}return!0}(A,this.geometries);if(A.geometryType()==="LineString")return function(F,W){let re=[1/0,1/0,-1/0,-1/0],fe=[1/0,1/0,-1/0,-1/0],pe=F.canonicalID();if(W.type==="Polygon"){let ze=ql(W.coordinates,fe,pe),Ke=Uf(F.geometry(),re,fe,pe);if(!Ol(re,fe))return!1;for(let ct of Ke)if(!aa(ct,ze))return!1}if(W.type==="MultiPolygon"){let ze=Pc(W.coordinates,fe,pe),Ke=Uf(F.geometry(),re,fe,pe);if(!Ol(re,fe))return!1;for(let ct of Ke)if(!Oo(ct,ze))return!1}return!0}(A,this.geometries)}return!1}eachChild(){}outputDefined(){return!0}}let Zc=class{constructor(R=[],A=(F,W)=>F<W?-1:F>W?1:0){if(this.data=R,this.length=this.data.length,this.compare=A,this.length>0)for(let F=(this.length>>1)-1;F>=0;F--)this._down(F)}push(R){this.data.push(R),this._up(this.length++)}pop(){if(this.length===0)return;let R=this.data[0],A=this.data.pop();return--this.length>0&&(this.data[0]=A,this._down(0)),R}peek(){return this.data[0]}_up(R){let{data:A,compare:F}=this,W=A[R];for(;R>0;){let re=R-1>>1,fe=A[re];if(F(W,fe)>=0)break;A[R]=fe,R=re}A[R]=W}_down(R){let{data:A,compare:F}=this,W=this.length>>1,re=A[R];for(;R<W;){let fe=1+(R<<1),pe=fe+1;if(pe<this.length&&F(A[pe],A[fe])<0&&(fe=pe),F(A[fe],re)>=0)break;A[R]=A[fe],R=fe}A[R]=re}};function Kl(R,A,F,W,re){Os(R,A,F,W||R.length-1,re||oc)}function Os(R,A,F,W,re){for(;W>F;){if(W-F>600){var fe=W-F+1,pe=A-F+1,ze=Math.log(fe),Ke=.5*Math.exp(2*ze/3),ct=.5*Math.sqrt(ze*Ke*(fe-Ke)/fe)*(pe-fe/2<0?-1:1);Os(R,A,Math.max(F,Math.floor(A-pe*Ke/fe+ct)),Math.min(W,Math.floor(A+(fe-pe)*Ke/fe+ct)),re)}var Lt=R[A],$t=F,fr=W;for(yu(R,F,A),re(R[W],Lt)>0&&yu(R,F,W);$t<fr;){for(yu(R,$t,fr),$t++,fr--;re(R[$t],Lt)<0;)$t++;for(;re(R[fr],Lt)>0;)fr--}re(R[F],Lt)===0?yu(R,F,fr):yu(R,++fr,W),fr<=A&&(F=fr+1),A<=fr&&(W=fr-1)}}function yu(R,A,F){var W=R[A];R[A]=R[F],R[F]=W}function oc(R,A){return R<A?-1:R>A?1:0}function Cf(R,A){if(R.length<=1)return[R];let F=[],W,re;for(let fe of R){let pe=Vh(fe);pe!==0&&(fe.area=Math.abs(pe),re===void 0&&(re=pe<0),re===pe<0?(W&&F.push(W),W=[fe]):W.push(fe))}if(W&&F.push(W),A>1)for(let fe=0;fe<F.length;fe++)F[fe].length<=A||(Kl(F[fe],A,1,F[fe].length-1,sc),F[fe]=F[fe].slice(0,A));return F}function sc(R,A){return A.area-R.area}function Vh(R){let A=0;for(let F,W,re=0,fe=R.length,pe=fe-1;re<fe;pe=re++)F=R[re],W=R[pe],A+=(W.x-F.x)*(F.y+W.y);return A}let Lf=1/298.257223563,cs=Lf*(2-Lf),nf=Math.PI/180;class Vf{constructor(A){let F=6378.137*nf*1e3,W=Math.cos(A*nf),re=1/(1-cs*(1-W*W)),fe=Math.sqrt(re);this.kx=F*fe*W,this.ky=F*fe*re*(1-cs)}distance(A,F){let W=this.wrap(A[0]-F[0])*this.kx,re=(A[1]-F[1])*this.ky;return Math.sqrt(W*W+re*re)}pointOnLine(A,F){let W,re,fe,pe,ze=1/0;for(let Ke=0;Ke<A.length-1;Ke++){let ct=A[Ke][0],Lt=A[Ke][1],$t=this.wrap(A[Ke+1][0]-ct)*this.kx,fr=(A[Ke+1][1]-Lt)*this.ky,mr=0;$t===0&&fr===0||(mr=(this.wrap(F[0]-ct)*this.kx*$t+(F[1]-Lt)*this.ky*fr)/($t*$t+fr*fr),mr>1?(ct=A[Ke+1][0],Lt=A[Ke+1][1]):mr>0&&(ct+=$t/this.kx*mr,Lt+=fr/this.ky*mr)),$t=this.wrap(F[0]-ct)*this.kx,fr=(F[1]-Lt)*this.ky;let Pr=$t*$t+fr*fr;Pr<ze&&(ze=Pr,W=ct,re=Lt,fe=Ke,pe=mr)}return{point:[W,re],index:fe,t:Math.max(0,Math.min(1,pe))}}wrap(A){for(;A<-180;)A+=360;for(;A>180;)A-=360;return A}}function Jl(R,A){return A[0]-R[0]}function fl(R){return R[1]-R[0]+1}function lc(R,A){return R[1]>=R[0]&&R[1]<A}function Fu(R,A){if(R[0]>R[1])return[null,null];let F=fl(R);if(A){if(F===2)return[R,null];let re=Math.floor(F/2);return[[R[0],R[0]+re],[R[0]+re,R[1]]]}if(F===1)return[R,null];let W=Math.floor(F/2)-1;return[[R[0],R[0]+W],[R[0]+W+1,R[1]]]}function Es(R,A){if(!lc(A,R.length))return[1/0,1/0,-1/0,-1/0];let F=[1/0,1/0,-1/0,-1/0];for(let W=A[0];W<=A[1];++W)Vu(F,R[W]);return F}function Hs(R){let A=[1/0,1/0,-1/0,-1/0];for(let F of R)for(let W of F)Vu(A,W);return A}function Go(R){return R[0]!==-1/0&&R[1]!==-1/0&&R[2]!==1/0&&R[3]!==1/0}function ps(R,A,F){if(!Go(R)||!Go(A))return NaN;let W=0,re=0;return R[2]<A[0]&&(W=A[0]-R[2]),R[0]>A[2]&&(W=R[0]-A[2]),R[1]>A[3]&&(re=R[1]-A[3]),R[3]<A[1]&&(re=A[1]-R[3]),F.distance([0,0],[W,re])}function uc(R,A,F){let W=F.pointOnLine(A,R);return F.distance(R,W.point)}function xl(R,A,F,W,re){let fe=Math.min(uc(R,[F,W],re),uc(A,[F,W],re)),pe=Math.min(uc(F,[R,A],re),uc(W,[R,A],re));return Math.min(fe,pe)}function Gu(R,A,F,W,re){if(!lc(A,R.length)||!lc(W,F.length))return 1/0;let fe=1/0;for(let pe=A[0];pe<A[1];++pe){let ze=R[pe],Ke=R[pe+1];for(let ct=W[0];ct<W[1];++ct){let Lt=F[ct],$t=F[ct+1];if(Yl(ze,Ke,Lt,$t))return 0;fe=Math.min(fe,xl(ze,Ke,Lt,$t,re))}}return fe}function qs(R,A,F,W,re){if(!lc(A,R.length)||!lc(W,F.length))return NaN;let fe=1/0;for(let pe=A[0];pe<=A[1];++pe)for(let ze=W[0];ze<=W[1];++ze)if(fe=Math.min(fe,re.distance(R[pe],F[ze])),fe===0)return fe;return fe}function ad(R,A,F){if(Hl(R,A,!0))return 0;let W=1/0;for(let re of A){let fe=re[0],pe=re[re.length-1];if(fe!==pe&&(W=Math.min(W,uc(R,[pe,fe],F)),W===0))return W;let ze=F.pointOnLine(re,R);if(W=Math.min(W,F.distance(R,ze.point)),W===0)return W}return W}function Po(R,A,F,W){if(!lc(A,R.length))return NaN;for(let fe=A[0];fe<=A[1];++fe)if(Hl(R[fe],F,!0))return 0;let re=1/0;for(let fe=A[0];fe<A[1];++fe){let pe=R[fe],ze=R[fe+1];for(let Ke of F)for(let ct=0,Lt=Ke.length,$t=Lt-1;ct<Lt;$t=ct++){let fr=Ke[$t],mr=Ke[ct];if(Yl(pe,ze,fr,mr))return 0;re=Math.min(re,xl(pe,ze,fr,mr,W))}}return re}function od(R,A){for(let F of R)for(let W of F)if(Hl(W,A,!0))return!0;return!1}function Yo(R,A,F,W=1/0){let re=Hs(R),fe=Hs(A);if(W!==1/0&&ps(re,fe,F)>=W)return W;if(Ol(re,fe)){if(od(R,A))return 0}else if(od(A,R))return 0;let pe=1/0;for(let ze of R)for(let Ke=0,ct=ze.length,Lt=ct-1;Ke<ct;Lt=Ke++){let $t=ze[Lt],fr=ze[Ke];for(let mr of A)for(let Pr=0,zr=mr.length,ui=zr-1;Pr<zr;ui=Pr++){let yi=mr[ui],vn=mr[Pr];if(Yl($t,fr,yi,vn))return 0;pe=Math.min(pe,xl($t,fr,yi,vn,F))}}return pe}function Pa(R,A,F,W,re,fe){if(!fe)return;let pe=ps(Es(W,fe),re,F);pe<A&&R.push([pe,fe,[0,0]])}function af(R,A,F,W,re,fe,pe){if(!fe||!pe)return;let ze=ps(Es(W,fe),Es(re,pe),F);ze<A&&R.push([ze,fe,pe])}function Hu(R,A,F,W,re=1/0){let fe=Math.min(W.distance(R[0],F[0][0]),re);if(fe===0)return fe;let pe=new Zc([[0,[0,R.length-1],[0,0]]],Jl),ze=Hs(F);for(;pe.length>0;){let Ke=pe.pop();if(Ke[0]>=fe)continue;let ct=Ke[1],Lt=A?50:100;if(fl(ct)<=Lt){if(!lc(ct,R.length))return NaN;if(A){let $t=Po(R,ct,F,W);if(isNaN($t)||$t===0)return $t;fe=Math.min(fe,$t)}else for(let $t=ct[0];$t<=ct[1];++$t){let fr=ad(R[$t],F,W);if(fe=Math.min(fe,fr),fe===0)return 0}}else{let $t=Fu(ct,A);Pa(pe,fe,W,R,ze,$t[0]),Pa(pe,fe,W,R,ze,$t[1])}}return fe}function bl(R,A,F,W,re,fe=1/0){let pe=Math.min(fe,re.distance(R[0],F[0]));if(pe===0)return pe;let ze=new Zc([[0,[0,R.length-1],[0,F.length-1]]],Jl);for(;ze.length>0;){let Ke=ze.pop();if(Ke[0]>=pe)continue;let ct=Ke[1],Lt=Ke[2],$t=A?50:100,fr=W?50:100;if(fl(ct)<=$t&&fl(Lt)<=fr){if(!lc(ct,R.length)&&lc(Lt,F.length))return NaN;let mr;if(A&&W)mr=Gu(R,ct,F,Lt,re),pe=Math.min(pe,mr);else if(A&&!W){let Pr=R.slice(ct[0],ct[1]+1);for(let zr=Lt[0];zr<=Lt[1];++zr)if(mr=uc(F[zr],Pr,re),pe=Math.min(pe,mr),pe===0)return pe}else if(!A&&W){let Pr=F.slice(Lt[0],Lt[1]+1);for(let zr=ct[0];zr<=ct[1];++zr)if(mr=uc(R[zr],Pr,re),pe=Math.min(pe,mr),pe===0)return pe}else mr=qs(R,ct,F,Lt,re),pe=Math.min(pe,mr)}else{let mr=Fu(ct,A),Pr=Fu(Lt,W);af(ze,pe,re,R,F,mr[0],Pr[0]),af(ze,pe,re,R,F,mr[0],Pr[1]),af(ze,pe,re,R,F,mr[1],Pr[0]),af(ze,pe,re,R,F,mr[1],Pr[1])}}return pe}function Gf(R){return R.type==="MultiPolygon"?R.coordinates.map(A=>({type:"Polygon",coordinates:A})):R.type==="MultiLineString"?R.coordinates.map(A=>({type:"LineString",coordinates:A})):R.type==="MultiPoint"?R.coordinates.map(A=>({type:"Point",coordinates:A})):[R]}class Ic{constructor(A,F){this.type=_t,this.geojson=A,this.geometries=F}static parse(A,F){if(A.length!==2)return F.error(`'distance' expression requires exactly one argument, but found ${A.length-1} instead.`);if(yn(A[1])){let W=A[1];if(W.type==="FeatureCollection")return new Ic(W,W.features.map(re=>Gf(re.geometry)).flat());if(W.type==="Feature")return new Ic(W,Gf(W.geometry));if("type"in W&&"coordinates"in W)return new Ic(W,Gf(W))}return F.error("'distance' expression requires valid geojson object that contains polygon geometry type.")}evaluate(A){if(A.geometry()!=null&&A.canonicalID()!=null){if(A.geometryType()==="Point")return function(F,W){let re=F.geometry(),fe=re.flat().map(Ke=>_a([Ke.x,Ke.y],F.canonical));if(re.length===0)return NaN;let pe=new Vf(fe[0][1]),ze=1/0;for(let Ke of W){switch(Ke.type){case"Point":ze=Math.min(ze,bl(fe,!1,[Ke.coordinates],!1,pe,ze));break;case"LineString":ze=Math.min(ze,bl(fe,!1,Ke.coordinates,!0,pe,ze));break;case"Polygon":ze=Math.min(ze,Hu(fe,!1,Ke.coordinates,pe,ze))}if(ze===0)return ze}return ze}(A,this.geometries);if(A.geometryType()==="LineString")return function(F,W){let re=F.geometry(),fe=re.flat().map(Ke=>_a([Ke.x,Ke.y],F.canonical));if(re.length===0)return NaN;let pe=new Vf(fe[0][1]),ze=1/0;for(let Ke of W){switch(Ke.type){case"Point":ze=Math.min(ze,bl(fe,!0,[Ke.coordinates],!1,pe,ze));break;case"LineString":ze=Math.min(ze,bl(fe,!0,Ke.coordinates,!0,pe,ze));break;case"Polygon":ze=Math.min(ze,Hu(fe,!0,Ke.coordinates,pe,ze))}if(ze===0)return ze}return ze}(A,this.geometries);if(A.geometryType()==="Polygon")return function(F,W){let re=F.geometry();if(re.length===0||re[0].length===0)return NaN;let fe=Cf(re,0).map(Ke=>Ke.map(ct=>ct.map(Lt=>_a([Lt.x,Lt.y],F.canonical)))),pe=new Vf(fe[0][0][0][1]),ze=1/0;for(let Ke of W)for(let ct of fe){switch(Ke.type){case"Point":ze=Math.min(ze,Hu([Ke.coordinates],!1,ct,pe,ze));break;case"LineString":ze=Math.min(ze,Hu(Ke.coordinates,!0,ct,pe,ze));break;case"Polygon":ze=Math.min(ze,Yo(ct,Ke.coordinates,pe,ze))}if(ze===0)return ze}return ze}(A,this.geometries)}return NaN}eachChild(){}outputDefined(){return!0}}let yf={"==":us,"!=":il,">":cl,"<":As,">=":zs,"<=":Ks,array:Fa,at:kr,boolean:Fa,case:An,coalesce:Fs,collator:Io,format:Zl,image:Su,in:Jr,"index-of":vi,interpolate:ko,"interpolate-hcl":ko,"interpolate-lab":ko,length:nc,let:jn,literal:ua,match:hn,number:Fa,"number-format":ls,object:Fa,slice:Mn,step:_n,string:Fa,"to-boolean":da,"to-color":da,"to-number":da,"to-string":da,var:Mt,within:pl,distance:Ic};class Bl{constructor(A,F,W,re){this.name=A,this.type=F,this._evaluate=W,this.args=re}evaluate(A){return this._evaluate(A,this.args)}eachChild(A){this.args.forEach(A)}outputDefined(){return!1}static parse(A,F){let W=A[0],re=Bl.definitions[W];if(!re)return F.error(`Unknown expression "${W}". If you wanted a literal array, use ["literal", [...]].`,0);let fe=Array.isArray(re)?re[0]:re.type,pe=Array.isArray(re)?[[re[1],re[2]]]:re.overloads,ze=pe.filter(([ct])=>!Array.isArray(ct)||ct.length===A.length-1),Ke=null;for(let[ct,Lt]of ze){Ke=new vo(F.registry,eh,F.path,null,F.scope);let $t=[],fr=!1;for(let mr=1;mr<A.length;mr++){let Pr=A[mr],zr=Array.isArray(ct)?ct[mr-1]:ct.type,ui=Ke.parse(Pr,1+$t.length,zr);if(!ui){fr=!0;break}$t.push(ui)}if(!fr)if(Array.isArray(ct)&&ct.length!==$t.length)Ke.error(`Expected ${ct.length} arguments, but found ${$t.length} instead.`);else{for(let mr=0;mr<$t.length;mr++){let Pr=Array.isArray(ct)?ct[mr]:ct.type,zr=$t[mr];Ke.concat(mr+1).checkSubtype(Pr,zr.type)}if(Ke.errors.length===0)return new Bl(W,fe,Lt,$t)}}if(ze.length===1)F.errors.push(...Ke.errors);else{let ct=(ze.length?ze:pe).map(([$t])=>{return fr=$t,Array.isArray(fr)?`(${fr.map(Je).join(", ")})`:`(${Je(fr.type)}...)`;var fr}).join(" | "),Lt=[];for(let $t=1;$t<A.length;$t++){let fr=F.parse(A[$t],1+Lt.length);if(!fr)return null;Lt.push(Je(fr.type))}F.error(`Expected arguments of type ${ct}, but found (${Lt.join(", ")}) instead.`)}return null}static register(A,F){Bl.definitions=F;for(let W in F)A[W]=Bl}}function wh(R,[A,F,W,re]){A=A.evaluate(R),F=F.evaluate(R),W=W.evaluate(R);let fe=re?re.evaluate(R):1,pe=fn(A,F,W,fe);if(pe)throw new ma(pe);return new nr(A/255,F/255,W/255,fe,!1)}function Qf(R,A){return R in A}function _f(R,A){let F=A[R];return F===void 0?null:F}function Yc(R){return{type:R}}function eh(R){if(R instanceof Mt)return eh(R.boundExpression);if(R instanceof Bl&&R.name==="error"||R instanceof Io||R instanceof pl||R instanceof Ic)return!1;let A=R instanceof da||R instanceof Fa,F=!0;return R.eachChild(W=>{F=A?F&&eh(W):F&&W instanceof ua}),!!F&&th(R)&&Hf(R,["zoom","heatmap-density","line-progress","accumulated","is-supported-script"])}function th(R){if(R instanceof Bl&&(R.name==="get"&&R.args.length===1||R.name==="feature-state"||R.name==="has"&&R.args.length===1||R.name==="properties"||R.name==="geometry-type"||R.name==="id"||/^filter-/.test(R.name))||R instanceof pl||R instanceof Ic)return!1;let A=!0;return R.eachChild(F=>{A&&!th(F)&&(A=!1)}),A}function ju(R){if(R instanceof Bl&&R.name==="feature-state")return!1;let A=!0;return R.eachChild(F=>{A&&!ju(F)&&(A=!1)}),A}function Hf(R,A){if(R instanceof Bl&&A.indexOf(R.name)>=0)return!1;let F=!0;return R.eachChild(W=>{F&&!Hf(W,A)&&(F=!1)}),F}function cc(R){return{result:"success",value:R}}function of(R){return{result:"error",value:R}}function Nl(R){return R["property-type"]==="data-driven"||R["property-type"]==="cross-faded-data-driven"}function Kc(R){return!!R.expression&&R.expression.parameters.indexOf("zoom")>-1}function Rc(R){return!!R.expression&&R.expression.interpolated}function gs(R){return R instanceof Number?"number":R instanceof String?"string":R instanceof Boolean?"boolean":Array.isArray(R)?"array":R===null?"null":typeof R}function jf(R){return typeof R=="object"&&R!==null&&!Array.isArray(R)}function Gh(R){return R}function rh(R,A){let F=A.type==="color",W=R.stops&&typeof R.stops[0][0]=="object",re=W||!(W||R.property!==void 0),fe=R.type||(Rc(A)?"exponential":"interval");if(F||A.type==="padding"){let Lt=F?nr.parse:Qr.parse;(R=$e({},R)).stops&&(R.stops=R.stops.map($t=>[$t[0],Lt($t[1])])),R.default=Lt(R.default?R.default:A.default)}if(R.colorSpace&&(pe=R.colorSpace)!=="rgb"&&pe!=="hcl"&&pe!=="lab")throw new Error(`Unknown color space: "${R.colorSpace}"`);var pe;let ze,Ke,ct;if(fe==="exponential")ze=ih;else if(fe==="interval")ze=Mu;else if(fe==="categorical"){ze=Th,Ke=Object.create(null);for(let Lt of R.stops)Ke[Lt[0]]=Lt[1];ct=typeof R.stops[0][0]}else{if(fe!=="identity")throw new Error(`Unknown function type "${fe}"`);ze=js}if(W){let Lt={},$t=[];for(let Pr=0;Pr<R.stops.length;Pr++){let zr=R.stops[Pr],ui=zr[0].zoom;Lt[ui]===void 0&&(Lt[ui]={zoom:ui,type:R.type,property:R.property,default:R.default,stops:[]},$t.push(ui)),Lt[ui].stops.push([zr[0].value,zr[1]])}let fr=[];for(let Pr of $t)fr.push([Lt[Pr].zoom,rh(Lt[Pr],A)]);let mr={name:"linear"};return{kind:"composite",interpolationType:mr,interpolationFactor:ko.interpolationFactor.bind(void 0,mr),zoomStops:fr.map(Pr=>Pr[0]),evaluate:({zoom:Pr},zr)=>ih({stops:fr,base:R.base},A,Pr).evaluate(Pr,zr)}}if(re){let Lt=fe==="exponential"?{name:"exponential",base:R.base!==void 0?R.base:1}:null;return{kind:"camera",interpolationType:Lt,interpolationFactor:ko.interpolationFactor.bind(void 0,Lt),zoomStops:R.stops.map($t=>$t[0]),evaluate:({zoom:$t})=>ze(R,A,$t,Ke,ct)}}return{kind:"source",evaluate(Lt,$t){let fr=$t&&$t.properties?$t.properties[R.property]:void 0;return fr===void 0?sf(R.default,A.default):ze(R,A,fr,Ke,ct)}}}function sf(R,A,F){return R!==void 0?R:A!==void 0?A:F!==void 0?F:void 0}function Th(R,A,F,W,re){return sf(typeof F===re?W[F]:void 0,R.default,A.default)}function Mu(R,A,F){if(gs(F)!=="number")return sf(R.default,A.default);let W=R.stops.length;if(W===1||F<=R.stops[0][0])return R.stops[0][1];if(F>=R.stops[W-1][0])return R.stops[W-1][1];let re=Li(R.stops.map(fe=>fe[0]),F);return R.stops[re][1]}function ih(R,A,F){let W=R.base!==void 0?R.base:1;if(gs(F)!=="number")return sf(R.default,A.default);let re=R.stops.length;if(re===1||F<=R.stops[0][0])return R.stops[0][1];if(F>=R.stops[re-1][0])return R.stops[re-1][1];let fe=Li(R.stops.map(Lt=>Lt[0]),F),pe=function(Lt,$t,fr,mr){let Pr=mr-fr,zr=Lt-fr;return Pr===0?0:$t===1?zr/Pr:(Math.pow($t,zr)-1)/(Math.pow($t,Pr)-1)}(F,W,R.stops[fe][0],R.stops[fe+1][0]),ze=R.stops[fe][1],Ke=R.stops[fe+1][1],ct=Lo[A.type]||Gh;return typeof ze.evaluate=="function"?{evaluate(...Lt){let $t=ze.evaluate.apply(void 0,Lt),fr=Ke.evaluate.apply(void 0,Lt);if($t!==void 0&&fr!==void 0)return ct($t,fr,pe,R.colorSpace)}}:ct(ze,Ke,pe,R.colorSpace)}function js(R,A,F){switch(A.type){case"color":F=nr.parse(F);break;case"formatted":F=ri.fromString(F.toString());break;case"resolvedImage":F=tn.fromString(F.toString());break;case"padding":F=Qr.parse(F);break;default:gs(F)===A.type||A.type==="enum"&&A.values[F]||(F=void 0)}return sf(F,R.default,A.default)}Bl.register(yf,{error:[{kind:"error"},[It],(R,[A])=>{throw new ma(A.evaluate(R))}],typeof:[It,[Tr],(R,[A])=>Je(Sn(A.evaluate(R)))],"to-rgba":[Ge(_t,4),[er],(R,[A])=>{let[F,W,re,fe]=A.evaluate(R).rgb;return[255*F,255*W,255*re,fe]}],rgb:[er,[_t,_t,_t],wh],rgba:[er,[_t,_t,_t,_t],wh],has:{type:mt,overloads:[[[It],(R,[A])=>Qf(A.evaluate(R),R.properties())],[[It,lr],(R,[A,F])=>Qf(A.evaluate(R),F.evaluate(R))]]},get:{type:Tr,overloads:[[[It],(R,[A])=>_f(A.evaluate(R),R.properties())],[[It,lr],(R,[A,F])=>_f(A.evaluate(R),F.evaluate(R))]]},"feature-state":[Tr,[It],(R,[A])=>_f(A.evaluate(R),R.featureState||{})],properties:[lr,[],R=>R.properties()],"geometry-type":[It,[],R=>R.geometryType()],id:[Tr,[],R=>R.id()],zoom:[_t,[],R=>R.globals.zoom],"heatmap-density":[_t,[],R=>R.globals.heatmapDensity||0],"line-progress":[_t,[],R=>R.globals.lineProgress||0],accumulated:[Tr,[],R=>R.globals.accumulated===void 0?null:R.globals.accumulated],"+":[_t,Yc(_t),(R,A)=>{let F=0;for(let W of A)F+=W.evaluate(R);return F}],"*":[_t,Yc(_t),(R,A)=>{let F=1;for(let W of A)F*=W.evaluate(R);return F}],"-":{type:_t,overloads:[[[_t,_t],(R,[A,F])=>A.evaluate(R)-F.evaluate(R)],[[_t],(R,[A])=>-A.evaluate(R)]]},"/":[_t,[_t,_t],(R,[A,F])=>A.evaluate(R)/F.evaluate(R)],"%":[_t,[_t,_t],(R,[A,F])=>A.evaluate(R)%F.evaluate(R)],ln2:[_t,[],()=>Math.LN2],pi:[_t,[],()=>Math.PI],e:[_t,[],()=>Math.E],"^":[_t,[_t,_t],(R,[A,F])=>Math.pow(A.evaluate(R),F.evaluate(R))],sqrt:[_t,[_t],(R,[A])=>Math.sqrt(A.evaluate(R))],log10:[_t,[_t],(R,[A])=>Math.log(A.evaluate(R))/Math.LN10],ln:[_t,[_t],(R,[A])=>Math.log(A.evaluate(R))],log2:[_t,[_t],(R,[A])=>Math.log(A.evaluate(R))/Math.LN2],sin:[_t,[_t],(R,[A])=>Math.sin(A.evaluate(R))],cos:[_t,[_t],(R,[A])=>Math.cos(A.evaluate(R))],tan:[_t,[_t],(R,[A])=>Math.tan(A.evaluate(R))],asin:[_t,[_t],(R,[A])=>Math.asin(A.evaluate(R))],acos:[_t,[_t],(R,[A])=>Math.acos(A.evaluate(R))],atan:[_t,[_t],(R,[A])=>Math.atan(A.evaluate(R))],min:[_t,Yc(_t),(R,A)=>Math.min(...A.map(F=>F.evaluate(R)))],max:[_t,Yc(_t),(R,A)=>Math.max(...A.map(F=>F.evaluate(R)))],abs:[_t,[_t],(R,[A])=>Math.abs(A.evaluate(R))],round:[_t,[_t],(R,[A])=>{let F=A.evaluate(R);return F<0?-Math.round(-F):Math.round(F)}],floor:[_t,[_t],(R,[A])=>Math.floor(A.evaluate(R))],ceil:[_t,[_t],(R,[A])=>Math.ceil(A.evaluate(R))],"filter-==":[mt,[It,Tr],(R,[A,F])=>R.properties()[A.value]===F.value],"filter-id-==":[mt,[Tr],(R,[A])=>R.id()===A.value],"filter-type-==":[mt,[It],(R,[A])=>R.geometryType()===A.value],"filter-<":[mt,[It,Tr],(R,[A,F])=>{let W=R.properties()[A.value],re=F.value;return typeof W==typeof re&&W<re}],"filter-id-<":[mt,[Tr],(R,[A])=>{let F=R.id(),W=A.value;return typeof F==typeof W&&F<W}],"filter->":[mt,[It,Tr],(R,[A,F])=>{let W=R.properties()[A.value],re=F.value;return typeof W==typeof re&&W>re}],"filter-id->":[mt,[Tr],(R,[A])=>{let F=R.id(),W=A.value;return typeof F==typeof W&&F>W}],"filter-<=":[mt,[It,Tr],(R,[A,F])=>{let W=R.properties()[A.value],re=F.value;return typeof W==typeof re&&W<=re}],"filter-id-<=":[mt,[Tr],(R,[A])=>{let F=R.id(),W=A.value;return typeof F==typeof W&&F<=W}],"filter->=":[mt,[It,Tr],(R,[A,F])=>{let W=R.properties()[A.value],re=F.value;return typeof W==typeof re&&W>=re}],"filter-id->=":[mt,[Tr],(R,[A])=>{let F=R.id(),W=A.value;return typeof F==typeof W&&F>=W}],"filter-has":[mt,[Tr],(R,[A])=>A.value in R.properties()],"filter-has-id":[mt,[],R=>R.id()!==null&&R.id()!==void 0],"filter-type-in":[mt,[Ge(It)],(R,[A])=>A.value.indexOf(R.geometryType())>=0],"filter-id-in":[mt,[Ge(Tr)],(R,[A])=>A.value.indexOf(R.id())>=0],"filter-in-small":[mt,[It,Ge(Tr)],(R,[A,F])=>F.value.indexOf(R.properties()[A.value])>=0],"filter-in-large":[mt,[It,Ge(Tr)],(R,[A,F])=>function(W,re,fe,pe){for(;fe<=pe;){let ze=fe+pe>>1;if(re[ze]===W)return!0;re[ze]>W?pe=ze-1:fe=ze+1}return!1}(R.properties()[A.value],F.value,0,F.value.length-1)],all:{type:mt,overloads:[[[mt,mt],(R,[A,F])=>A.evaluate(R)&&F.evaluate(R)],[Yc(mt),(R,A)=>{for(let F of A)if(!F.evaluate(R))return!1;return!0}]]},any:{type:mt,overloads:[[[mt,mt],(R,[A,F])=>A.evaluate(R)||F.evaluate(R)],[Yc(mt),(R,A)=>{for(let F of A)if(F.evaluate(R))return!0;return!1}]]},"!":[mt,[mt],(R,[A])=>!A.evaluate(R)],"is-supported-script":[mt,[It],(R,[A])=>{let F=R.globals&&R.globals.isSupportedScript;return!F||F(A.evaluate(R))}],upcase:[It,[It],(R,[A])=>A.evaluate(R).toUpperCase()],downcase:[It,[It],(R,[A])=>A.evaluate(R).toLowerCase()],concat:[It,Yc(Tr),(R,A)=>A.map(F=>Ba(F.evaluate(R))).join("")],"resolved-locale":[It,[Lr],(R,[A])=>A.evaluate(R).resolvedLocale()]});class Eu{constructor(A,F){var W;this.expression=A,this._warningHistory={},this._evaluator=new Ha,this._defaultValue=F?(W=F).type==="color"&&jf(W.default)?new nr(0,0,0,0):W.type==="color"?nr.parse(W.default)||null:W.type==="padding"?Qr.parse(W.default)||null:W.type==="variableAnchorOffsetCollection"?$i.parse(W.default)||null:W.default===void 0?null:W.default:null,this._enumValues=F&&F.type==="enum"?F.values:null}evaluateWithoutErrorHandling(A,F,W,re,fe,pe){return this._evaluator.globals=A,this._evaluator.feature=F,this._evaluator.featureState=W,this._evaluator.canonical=re,this._evaluator.availableImages=fe||null,this._evaluator.formattedSection=pe,this.expression.evaluate(this._evaluator)}evaluate(A,F,W,re,fe,pe){this._evaluator.globals=A,this._evaluator.feature=F||null,this._evaluator.featureState=W||null,this._evaluator.canonical=re,this._evaluator.availableImages=fe||null,this._evaluator.formattedSection=pe||null;try{let ze=this.expression.evaluate(this._evaluator);if(ze==null||typeof ze=="number"&&ze!=ze)return this._defaultValue;if(this._enumValues&&!(ze in this._enumValues))throw new ma(`Expected value to be one of ${Object.keys(this._enumValues).map(Ke=>JSON.stringify(Ke)).join(", ")}, but found ${JSON.stringify(ze)} instead.`);return ze}catch(ze){return this._warningHistory[ze.message]||(this._warningHistory[ze.message]=!0,typeof console!="undefined"&&console.warn(ze.message)),this._defaultValue}}}function Dc(R){return Array.isArray(R)&&R.length>0&&typeof R[0]=="string"&&R[0]in yf}function ks(R,A){let F=new vo(yf,eh,[],A?function(re){let fe={color:er,string:It,number:_t,enum:It,boolean:mt,formatted:ti,padding:Br,resolvedImage:Vr,variableAnchorOffsetCollection:dt};return re.type==="array"?Ge(fe[re.value]||Tr,re.length):fe[re.type]}(A):void 0),W=F.parse(R,void 0,void 0,void 0,A&&A.type==="string"?{typeAnnotation:"coerce"}:void 0);return W?cc(new Eu(W,A)):of(F.errors)}class bc{constructor(A,F){this.kind=A,this._styleExpression=F,this.isStateDependent=A!=="constant"&&!ju(F.expression)}evaluateWithoutErrorHandling(A,F,W,re,fe,pe){return this._styleExpression.evaluateWithoutErrorHandling(A,F,W,re,fe,pe)}evaluate(A,F,W,re,fe,pe){return this._styleExpression.evaluate(A,F,W,re,fe,pe)}}class hu{constructor(A,F,W,re){this.kind=A,this.zoomStops=W,this._styleExpression=F,this.isStateDependent=A!=="camera"&&!ju(F.expression),this.interpolationType=re}evaluateWithoutErrorHandling(A,F,W,re,fe,pe){return this._styleExpression.evaluateWithoutErrorHandling(A,F,W,re,fe,pe)}evaluate(A,F,W,re,fe,pe){return this._styleExpression.evaluate(A,F,W,re,fe,pe)}interpolationFactor(A,F,W){return this.interpolationType?ko.interpolationFactor(this.interpolationType,A,F,W):0}}function _u(R,A){let F=ks(R,A);if(F.result==="error")return F;let W=F.value.expression,re=th(W);if(!re&&!Nl(A))return of([new St("","data expressions not supported")]);let fe=Hf(W,["zoom"]);if(!fe&&!Kc(A))return of([new St("","zoom expressions not supported")]);let pe=nh(W);return pe||fe?pe instanceof St?of([pe]):pe instanceof ko&&!Rc(A)?of([new St("",'"interpolate" expressions cannot be used with this property')]):cc(pe?new hu(re?"camera":"composite",F.value,pe.labels,pe instanceof ko?pe.interpolation:void 0):new bc(re?"constant":"source",F.value)):of([new St("",'"zoom" expression may only be used as input to a top-level "step" or "interpolate" expression.')])}class nl{constructor(A,F){this._parameters=A,this._specification=F,$e(this,rh(this._parameters,this._specification))}static deserialize(A){return new nl(A._parameters,A._specification)}static serialize(A){return{_parameters:A._parameters,_specification:A._specification}}}function nh(R){let A=null;if(R instanceof jn)A=nh(R.result);else if(R instanceof Fs){for(let F of R.args)if(A=nh(F),A)break}else(R instanceof _n||R instanceof ko)&&R.input instanceof Bl&&R.input.name==="zoom"&&(A=R);return A instanceof St||R.eachChild(F=>{let W=nh(F);W instanceof St?A=W:!A&&W?A=new St("",'"zoom" expression may only be used as input to a top-level "step" or "interpolate" expression.'):A&&W&&A!==W&&(A=new St("",'Only one zoom-based "step" or "interpolate" subexpression may be used in an expression.'))}),A}function Ah(R){if(R===!0||R===!1)return!0;if(!Array.isArray(R)||R.length===0)return!1;switch(R[0]){case"has":return R.length>=2&&R[1]!=="$id"&&R[1]!=="$type";case"in":return R.length>=3&&(typeof R[1]!="string"||Array.isArray(R[2]));case"!in":case"!has":case"none":return!1;case"==":case"!=":case">":case">=":case"<":case"<=":return R.length!==3||Array.isArray(R[1])||Array.isArray(R[2]);case"any":case"all":for(let A of R.slice(1))if(!Ah(A)&&typeof A!="boolean")return!1;return!0;default:return!0}}let zu={type:"boolean",default:!1,transition:!1,"property-type":"data-driven",expression:{interpolated:!1,parameters:["zoom","feature"]}};function Fc(R){if(R==null)return{filter:()=>!0,needGeometry:!1};Ah(R)||(R=xf(R));let A=ks(R,zu);if(A.result==="error")throw new Error(A.value.map(F=>`${F.key}: ${F.message}`).join(", "));return{filter:(F,W,re)=>A.value.evaluate(F,W,{},re),needGeometry:bd(R)}}function wc(R,A){return R<A?-1:R>A?1:0}function bd(R){if(!Array.isArray(R))return!1;if(R[0]==="within"||R[0]==="distance")return!0;for(let A=1;A<R.length;A++)if(bd(R[A]))return!0;return!1}function xf(R){if(!R)return!0;let A=R[0];return R.length<=1?A!=="any":A==="=="?Pf(R[1],R[2],"=="):A==="!="?jl(Pf(R[1],R[2],"==")):A==="<"||A===">"||A==="<="||A===">="?Pf(R[1],R[2],A):A==="any"?(F=R.slice(1),["any"].concat(F.map(xf))):A==="all"?["all"].concat(R.slice(1).map(xf)):A==="none"?["all"].concat(R.slice(1).map(xf).map(jl)):A==="in"?Ou(R[1],R.slice(2)):A==="!in"?jl(Ou(R[1],R.slice(2))):A==="has"?bf(R[1]):A!=="!has"||jl(bf(R[1]));var F}function Pf(R,A,F){switch(R){case"$type":return[`filter-type-${F}`,A];case"$id":return[`filter-id-${F}`,A];default:return[`filter-${F}`,R,A]}}function Ou(R,A){if(A.length===0)return!1;switch(R){case"$type":return["filter-type-in",["literal",A]];case"$id":return["filter-id-in",["literal",A]];default:return A.length>200&&!A.some(F=>typeof F!=typeof A[0])?["filter-in-large",R,["literal",A.sort(wc)]]:["filter-in-small",R,["literal",A]]}}function bf(R){switch(R){case"$type":return!0;case"$id":return["filter-has-id"];default:return["filter-has",R]}}function jl(R){return["!",R]}function lf(R){let A=typeof R;if(A==="number"||A==="boolean"||A==="string"||R==null)return JSON.stringify(R);if(Array.isArray(R)){let re="[";for(let fe of R)re+=`${lf(fe)},`;return`${re}]`}let F=Object.keys(R).sort(),W="{";for(let re=0;re<F.length;re++)W+=`${JSON.stringify(F[re])}:${lf(R[F[re]])},`;return`${W}}`}function Hh(R){let A="";for(let F of Ze)A+=`/${lf(R[F])}`;return A}function If(R){let A=R.value;return A?[new cr(R.key,A,"constants have been deprecated as of v8")]:[]}function Cs(R){return R instanceof Number||R instanceof String||R instanceof Boolean?R.valueOf():R}function du(R){if(Array.isArray(R))return R.map(du);if(R instanceof Object&&!(R instanceof Number||R instanceof String||R instanceof Boolean)){let A={};for(let F in R)A[F]=du(R[F]);return A}return Cs(R)}function ku(R){let A=R.key,F=R.value,W=R.valueSpec||{},re=R.objectElementValidators||{},fe=R.style,pe=R.styleSpec,ze=R.validateSpec,Ke=[],ct=gs(F);if(ct!=="object")return[new cr(A,F,`object expected, ${ct} found`)];for(let Lt in F){let $t=Lt.split(".")[0],fr=W[$t]||W["*"],mr;if(re[$t])mr=re[$t];else if(W[$t])mr=ze;else if(re["*"])mr=re["*"];else{if(!W["*"]){Ke.push(new cr(A,F[Lt],`unknown property "${Lt}"`));continue}mr=ze}Ke=Ke.concat(mr({key:(A&&`${A}.`)+Lt,value:F[Lt],valueSpec:fr,style:fe,styleSpec:pe,object:F,objectKey:Lt,validateSpec:ze},F))}for(let Lt in W)re[Lt]||W[Lt].required&&W[Lt].default===void 0&&F[Lt]===void 0&&Ke.push(new cr(A,F,`missing required property "${Lt}"`));return Ke}function Wf(R){let A=R.value,F=R.valueSpec,W=R.style,re=R.styleSpec,fe=R.key,pe=R.arrayElementValidator||R.validateSpec;if(gs(A)!=="array")return[new cr(fe,A,`array expected, ${gs(A)} found`)];if(F.length&&A.length!==F.length)return[new cr(fe,A,`array length ${F.length} expected, length ${A.length} found`)];if(F["min-length"]&&A.length<F["min-length"])return[new cr(fe,A,`array length at least ${F["min-length"]} expected, length ${A.length} found`)];let ze={type:F.value,values:F.values};re.$version<7&&(ze.function=F.function),gs(F.value)==="object"&&(ze=F.value);let Ke=[];for(let ct=0;ct<A.length;ct++)Ke=Ke.concat(pe({array:A,arrayIndex:ct,value:A[ct],valueSpec:ze,validateSpec:R.validateSpec,style:W,styleSpec:re,key:`${fe}[${ct}]`}));return Ke}function Us(R){let A=R.key,F=R.value,W=R.valueSpec,re=gs(F);return re==="number"&&F!=F&&(re="NaN"),re!=="number"?[new cr(A,F,`number expected, ${re} found`)]:"minimum"in W&&F<W.minimum?[new cr(A,F,`${F} is less than the minimum value ${W.minimum}`)]:"maximum"in W&&F>W.maximum?[new cr(A,F,`${F} is greater than the maximum value ${W.maximum}`)]:[]}function wf(R){let A=R.valueSpec,F=Cs(R.value.type),W,re,fe,pe={},ze=F!=="categorical"&&R.value.property===void 0,Ke=!ze,ct=gs(R.value.stops)==="array"&&gs(R.value.stops[0])==="array"&&gs(R.value.stops[0][0])==="object",Lt=ku({key:R.key,value:R.value,valueSpec:R.styleSpec.function,validateSpec:R.validateSpec,style:R.style,styleSpec:R.styleSpec,objectElementValidators:{stops:function(mr){if(F==="identity")return[new cr(mr.key,mr.value,'identity function may not have a "stops" property')];let Pr=[],zr=mr.value;return Pr=Pr.concat(Wf({key:mr.key,value:zr,valueSpec:mr.valueSpec,validateSpec:mr.validateSpec,style:mr.style,styleSpec:mr.styleSpec,arrayElementValidator:$t})),gs(zr)==="array"&&zr.length===0&&Pr.push(new cr(mr.key,zr,"array must have at least one stop")),Pr},default:function(mr){return mr.validateSpec({key:mr.key,value:mr.value,valueSpec:A,validateSpec:mr.validateSpec,style:mr.style,styleSpec:mr.styleSpec})}}});return F==="identity"&&ze&&Lt.push(new cr(R.key,R.value,'missing required property "property"')),F==="identity"||R.value.stops||Lt.push(new cr(R.key,R.value,'missing required property "stops"')),F==="exponential"&&R.valueSpec.expression&&!Rc(R.valueSpec)&&Lt.push(new cr(R.key,R.value,"exponential functions not supported")),R.styleSpec.$version>=8&&(Ke&&!Nl(R.valueSpec)?Lt.push(new cr(R.key,R.value,"property functions not supported")):ze&&!Kc(R.valueSpec)&&Lt.push(new cr(R.key,R.value,"zoom functions not supported"))),F!=="categorical"&&!ct||R.value.property!==void 0||Lt.push(new cr(R.key,R.value,'"property" property is required')),Lt;function $t(mr){let Pr=[],zr=mr.value,ui=mr.key;if(gs(zr)!=="array")return[new cr(ui,zr,`array expected, ${gs(zr)} found`)];if(zr.length!==2)return[new cr(ui,zr,`array length 2 expected, length ${zr.length} found`)];if(ct){if(gs(zr[0])!=="object")return[new cr(ui,zr,`object expected, ${gs(zr[0])} found`)];if(zr[0].zoom===void 0)return[new cr(ui,zr,"object stop key must have zoom")];if(zr[0].value===void 0)return[new cr(ui,zr,"object stop key must have value")];if(fe&&fe>Cs(zr[0].zoom))return[new cr(ui,zr[0].zoom,"stop zoom values must appear in ascending order")];Cs(zr[0].zoom)!==fe&&(fe=Cs(zr[0].zoom),re=void 0,pe={}),Pr=Pr.concat(ku({key:`${ui}[0]`,value:zr[0],valueSpec:{zoom:{}},validateSpec:mr.validateSpec,style:mr.style,styleSpec:mr.styleSpec,objectElementValidators:{zoom:Us,value:fr}}))}else Pr=Pr.concat(fr({key:`${ui}[0]`,value:zr[0],valueSpec:{},validateSpec:mr.validateSpec,style:mr.style,styleSpec:mr.styleSpec},zr));return Dc(du(zr[1]))?Pr.concat([new cr(`${ui}[1]`,zr[1],"expressions are not allowed in function stops.")]):Pr.concat(mr.validateSpec({key:`${ui}[1]`,value:zr[1],valueSpec:A,validateSpec:mr.validateSpec,style:mr.style,styleSpec:mr.styleSpec}))}function fr(mr,Pr){let zr=gs(mr.value),ui=Cs(mr.value),yi=mr.value!==null?mr.value:Pr;if(W){if(zr!==W)return[new cr(mr.key,yi,`${zr} stop domain type must match previous stop domain type ${W}`)]}else W=zr;if(zr!=="number"&&zr!=="string"&&zr!=="boolean")return[new cr(mr.key,yi,"stop domain value must be a number, string, or boolean")];if(zr!=="number"&&F!=="categorical"){let vn=`number expected, ${zr} found`;return Nl(A)&&F===void 0&&(vn+='\nIf you intended to use a categorical function, specify `"type": "categorical"`.'),[new cr(mr.key,yi,vn)]}return F!=="categorical"||zr!=="number"||isFinite(ui)&&Math.floor(ui)===ui?F!=="categorical"&&zr==="number"&&re!==void 0&&ui<re?[new cr(mr.key,yi,"stop domain values must appear in ascending order")]:(re=ui,F==="categorical"&&ui in pe?[new cr(mr.key,yi,"stop domain values must be unique")]:(pe[ui]=!0,[])):[new cr(mr.key,yi,`integer expected, found ${ui}`)]}}function zc(R){let A=(R.expressionContext==="property"?_u:ks)(du(R.value),R.valueSpec);if(A.result==="error")return A.value.map(W=>new cr(`${R.key}${W.key}`,R.value,W.message));let F=A.value.expression||A.value._styleExpression.expression;if(R.expressionContext==="property"&&R.propertyKey==="text-font"&&!F.outputDefined())return[new cr(R.key,R.value,`Invalid data expression for "${R.propertyKey}". Output values must be contained as literals within the expression.`)];if(R.expressionContext==="property"&&R.propertyType==="layout"&&!ju(F))return[new cr(R.key,R.value,'"feature-state" data expressions are not supported with layout properties.')];if(R.expressionContext==="filter"&&!ju(F))return[new cr(R.key,R.value,'"feature-state" data expressions are not supported with filters.')];if(R.expressionContext&&R.expressionContext.indexOf("cluster")===0){if(!Hf(F,["zoom","feature-state"]))return[new cr(R.key,R.value,'"zoom" and "feature-state" expressions are not supported with cluster properties.')];if(R.expressionContext==="cluster-initial"&&!th(F))return[new cr(R.key,R.value,"Feature data expressions are not supported with initial expression part of cluster properties.")]}return[]}function Wu(R){let A=R.key,F=R.value,W=R.valueSpec,re=[];return Array.isArray(W.values)?W.values.indexOf(Cs(F))===-1&&re.push(new cr(A,F,`expected one of [${W.values.join(", ")}], ${JSON.stringify(F)} found`)):Object.keys(W.values).indexOf(Cs(F))===-1&&re.push(new cr(A,F,`expected one of [${Object.keys(W.values).join(", ")}], ${JSON.stringify(F)} found`)),re}function Rf(R){return Ah(du(R.value))?zc($e({},R,{expressionContext:"filter",valueSpec:{value:"boolean"}})):Xu(R)}function Xu(R){let A=R.value,F=R.key;if(gs(A)!=="array")return[new cr(F,A,`array expected, ${gs(A)} found`)];let W=R.styleSpec,re,fe=[];if(A.length<1)return[new cr(F,A,"filter array must have at least 1 element")];switch(fe=fe.concat(Wu({key:`${F}[0]`,value:A[0],valueSpec:W.filter_operator,style:R.style,styleSpec:R.styleSpec})),Cs(A[0])){case"<":case"<=":case">":case">=":A.length>=2&&Cs(A[1])==="$type"&&fe.push(new cr(F,A,`"$type" cannot be use with operator "${A[0]}"`));case"==":case"!=":A.length!==3&&fe.push(new cr(F,A,`filter array for operator "${A[0]}" must have 3 elements`));case"in":case"!in":A.length>=2&&(re=gs(A[1]),re!=="string"&&fe.push(new cr(`${F}[1]`,A[1],`string expected, ${re} found`)));for(let pe=2;pe<A.length;pe++)re=gs(A[pe]),Cs(A[1])==="$type"?fe=fe.concat(Wu({key:`${F}[${pe}]`,value:A[pe],valueSpec:W.geometry_type,style:R.style,styleSpec:R.styleSpec})):re!=="string"&&re!=="number"&&re!=="boolean"&&fe.push(new cr(`${F}[${pe}]`,A[pe],`string, number, or boolean expected, ${re} found`));break;case"any":case"all":case"none":for(let pe=1;pe<A.length;pe++)fe=fe.concat(Xu({key:`${F}[${pe}]`,value:A[pe],style:R.style,styleSpec:R.styleSpec}));break;case"has":case"!has":re=gs(A[1]),A.length!==2?fe.push(new cr(F,A,`filter array for "${A[0]}" operator must have 2 elements`)):re!=="string"&&fe.push(new cr(`${F}[1]`,A[1],`string expected, ${re} found`))}return fe}function uf(R,A){let F=R.key,W=R.validateSpec,re=R.style,fe=R.styleSpec,pe=R.value,ze=R.objectKey,Ke=fe[`${A}_${R.layerType}`];if(!Ke)return[];let ct=ze.match(/^(.*)-transition$/);if(A==="paint"&&ct&&Ke[ct[1]]&&Ke[ct[1]].transition)return W({key:F,value:pe,valueSpec:fe.transition,style:re,styleSpec:fe});let Lt=R.valueSpec||Ke[ze];if(!Lt)return[new cr(F,pe,`unknown property "${ze}"`)];let $t;if(gs(pe)==="string"&&Nl(Lt)&&!Lt.tokens&&($t=/^{([^}]+)}$/.exec(pe)))return[new cr(F,pe,`"${ze}" does not support interpolation syntax
+Use an identity property function instead: \`{ "type": "identity", "property": ${JSON.stringify($t[1])} }\`.`)];let fr=[];return R.layerType==="symbol"&&(ze==="text-field"&&re&&!re.glyphs&&fr.push(new cr(F,pe,'use of "text-field" requires a style "glyphs" property')),ze==="text-font"&&jf(du(pe))&&Cs(pe.type)==="identity"&&fr.push(new cr(F,pe,'"text-font" does not support identity functions'))),fr.concat(W({key:R.key,value:pe,valueSpec:Lt,style:re,styleSpec:fe,expressionContext:"property",propertyType:A,propertyKey:ze}))}function Xf(R){return uf(R,"paint")}function Wl(R){return uf(R,"layout")}function ah(R){let A=[],F=R.value,W=R.key,re=R.style,fe=R.styleSpec;F.type||F.ref||A.push(new cr(W,F,'either "type" or "ref" is required'));let pe=Cs(F.type),ze=Cs(F.ref);if(F.id){let Ke=Cs(F.id);for(let ct=0;ct<R.arrayIndex;ct++){let Lt=re.layers[ct];Cs(Lt.id)===Ke&&A.push(new cr(W,F.id,`duplicate layer id "${F.id}", previously used at line ${Lt.id.__line__}`))}}if("ref"in F){let Ke;["type","source","source-layer","filter","layout"].forEach(ct=>{ct in F&&A.push(new cr(W,F[ct],`"${ct}" is prohibited for ref layers`))}),re.layers.forEach(ct=>{Cs(ct.id)===ze&&(Ke=ct)}),Ke?Ke.ref?A.push(new cr(W,F.ref,"ref cannot reference another ref layer")):pe=Cs(Ke.type):A.push(new cr(W,F.ref,`ref layer "${ze}" not found`))}else if(pe!=="background")if(F.source){let Ke=re.sources&&re.sources[F.source],ct=Ke&&Cs(Ke.type);Ke?ct==="vector"&&pe==="raster"?A.push(new cr(W,F.source,`layer "${F.id}" requires a raster source`)):ct!=="raster-dem"&&pe==="hillshade"?A.push(new cr(W,F.source,`layer "${F.id}" requires a raster-dem source`)):ct==="raster"&&pe!=="raster"?A.push(new cr(W,F.source,`layer "${F.id}" requires a vector source`)):ct!=="vector"||F["source-layer"]?ct==="raster-dem"&&pe!=="hillshade"?A.push(new cr(W,F.source,"raster-dem source can only be used with layer type 'hillshade'.")):pe!=="line"||!F.paint||!F.paint["line-gradient"]||ct==="geojson"&&Ke.lineMetrics||A.push(new cr(W,F,`layer "${F.id}" specifies a line-gradient, which requires a GeoJSON source with \`lineMetrics\` enabled.`)):A.push(new cr(W,F,`layer "${F.id}" must specify a "source-layer"`)):A.push(new cr(W,F.source,`source "${F.source}" not found`))}else A.push(new cr(W,F,'missing required property "source"'));return A=A.concat(ku({key:W,value:F,valueSpec:fe.layer,style:R.style,styleSpec:R.styleSpec,validateSpec:R.validateSpec,objectElementValidators:{"*":()=>[],type:()=>R.validateSpec({key:`${W}.type`,value:F.type,valueSpec:fe.layer.type,style:R.style,styleSpec:R.styleSpec,validateSpec:R.validateSpec,object:F,objectKey:"type"}),filter:Rf,layout:Ke=>ku({layer:F,key:Ke.key,value:Ke.value,style:Ke.style,styleSpec:Ke.styleSpec,validateSpec:Ke.validateSpec,objectElementValidators:{"*":ct=>Wl($e({layerType:pe},ct))}}),paint:Ke=>ku({layer:F,key:Ke.key,value:Ke.value,style:Ke.style,styleSpec:Ke.styleSpec,validateSpec:Ke.validateSpec,objectElementValidators:{"*":ct=>Xf($e({layerType:pe},ct))}})}})),A}function Zu(R){let A=R.value,F=R.key,W=gs(A);return W!=="string"?[new cr(F,A,`string expected, ${W} found`)]:[]}let Oc={promoteId:function({key:R,value:A}){if(gs(A)==="string")return Zu({key:R,value:A});{let F=[];for(let W in A)F.push(...Zu({key:`${R}.${W}`,value:A[W]}));return F}}};function Tc(R){let A=R.value,F=R.key,W=R.styleSpec,re=R.style,fe=R.validateSpec;if(!A.type)return[new cr(F,A,'"type" is required')];let pe=Cs(A.type),ze;switch(pe){case"vector":case"raster":return ze=ku({key:F,value:A,valueSpec:W[`source_${pe.replace("-","_")}`],style:R.style,styleSpec:W,objectElementValidators:Oc,validateSpec:fe}),ze;case"raster-dem":return ze=function(Ke){var ct;let Lt=(ct=Ke.sourceName)!==null&&ct!==void 0?ct:"",$t=Ke.value,fr=Ke.styleSpec,mr=fr.source_raster_dem,Pr=Ke.style,zr=[],ui=gs($t);if($t===void 0)return zr;if(ui!=="object")return zr.push(new cr("source_raster_dem",$t,`object expected, ${ui} found`)),zr;let yi=Cs($t.encoding)==="custom",vn=["redFactor","greenFactor","blueFactor","baseShift"],zi=Ke.value.encoding?`"${Ke.value.encoding}"`:"Default";for(let un in $t)!yi&&vn.includes(un)?zr.push(new cr(un,$t[un],`In "${Lt}": "${un}" is only valid when "encoding" is set to "custom". ${zi} encoding found`)):mr[un]?zr=zr.concat(Ke.validateSpec({key:un,value:$t[un],valueSpec:mr[un],validateSpec:Ke.validateSpec,style:Pr,styleSpec:fr})):zr.push(new cr(un,$t[un],`unknown property "${un}"`));return zr}({sourceName:F,value:A,style:R.style,styleSpec:W,validateSpec:fe}),ze;case"geojson":if(ze=ku({key:F,value:A,valueSpec:W.source_geojson,style:re,styleSpec:W,validateSpec:fe,objectElementValidators:Oc}),A.cluster)for(let Ke in A.clusterProperties){let[ct,Lt]=A.clusterProperties[Ke],$t=typeof ct=="string"?[ct,["accumulated"],["get",Ke]]:ct;ze.push(...zc({key:`${F}.${Ke}.map`,value:Lt,validateSpec:fe,expressionContext:"cluster-map"})),ze.push(...zc({key:`${F}.${Ke}.reduce`,value:$t,validateSpec:fe,expressionContext:"cluster-reduce"}))}return ze;case"video":return ku({key:F,value:A,valueSpec:W.source_video,style:re,validateSpec:fe,styleSpec:W});case"image":return ku({key:F,value:A,valueSpec:W.source_image,style:re,validateSpec:fe,styleSpec:W});case"canvas":return[new cr(F,null,"Please use runtime APIs to add canvas sources, rather than including them in stylesheets.","source.canvas")];default:return Wu({key:`${F}.type`,value:A.type,valueSpec:{values:["vector","raster","raster-dem","geojson","video","image"]},style:re,validateSpec:fe,styleSpec:W})}}function wl(R){let A=R.value,F=R.styleSpec,W=F.light,re=R.style,fe=[],pe=gs(A);if(A===void 0)return fe;if(pe!=="object")return fe=fe.concat([new cr("light",A,`object expected, ${pe} found`)]),fe;for(let ze in A){let Ke=ze.match(/^(.*)-transition$/);fe=fe.concat(Ke&&W[Ke[1]]&&W[Ke[1]].transition?R.validateSpec({key:ze,value:A[ze],valueSpec:F.transition,validateSpec:R.validateSpec,style:re,styleSpec:F}):W[ze]?R.validateSpec({key:ze,value:A[ze],valueSpec:W[ze],validateSpec:R.validateSpec,style:re,styleSpec:F}):[new cr(ze,A[ze],`unknown property "${ze}"`)])}return fe}function vu(R){let A=R.value,F=R.styleSpec,W=F.sky,re=R.style,fe=gs(A);if(A===void 0)return[];if(fe!=="object")return[new cr("sky",A,`object expected, ${fe} found`)];let pe=[];for(let ze in A)pe=pe.concat(W[ze]?R.validateSpec({key:ze,value:A[ze],valueSpec:W[ze],style:re,styleSpec:F}):[new cr(ze,A[ze],`unknown property "${ze}"`)]);return pe}function qc(R){let A=R.value,F=R.styleSpec,W=F.terrain,re=R.style,fe=[],pe=gs(A);if(A===void 0)return fe;if(pe!=="object")return fe=fe.concat([new cr("terrain",A,`object expected, ${pe} found`)]),fe;for(let ze in A)fe=fe.concat(W[ze]?R.validateSpec({key:ze,value:A[ze],valueSpec:W[ze],validateSpec:R.validateSpec,style:re,styleSpec:F}):[new cr(ze,A[ze],`unknown property "${ze}"`)]);return fe}function cf(R){let A=[],F=R.value,W=R.key;if(Array.isArray(F)){let re=[],fe=[];for(let pe in F)F[pe].id&&re.includes(F[pe].id)&&A.push(new cr(W,F,`all the sprites' ids must be unique, but ${F[pe].id} is duplicated`)),re.push(F[pe].id),F[pe].url&&fe.includes(F[pe].url)&&A.push(new cr(W,F,`all the sprites' URLs must be unique, but ${F[pe].url} is duplicated`)),fe.push(F[pe].url),A=A.concat(ku({key:`${W}[${pe}]`,value:F[pe],valueSpec:{id:{type:"string",required:!0},url:{type:"string",required:!0}},validateSpec:R.validateSpec}));return A}return Zu({key:W,value:F})}let fc={"*":()=>[],array:Wf,boolean:function(R){let A=R.value,F=R.key,W=gs(A);return W!=="boolean"?[new cr(F,A,`boolean expected, ${W} found`)]:[]},number:Us,color:function(R){let A=R.key,F=R.value,W=gs(F);return W!=="string"?[new cr(A,F,`color expected, ${W} found`)]:nr.parse(String(F))?[]:[new cr(A,F,`color expected, "${F}" found`)]},constants:If,enum:Wu,filter:Rf,function:wf,layer:ah,object:ku,source:Tc,light:wl,sky:vu,terrain:qc,projection:function(R){let A=R.value,F=R.styleSpec,W=F.projection,re=R.style,fe=gs(A);if(A===void 0)return[];if(fe!=="object")return[new cr("projection",A,`object expected, ${fe} found`)];let pe=[];for(let ze in A)pe=pe.concat(W[ze]?R.validateSpec({key:ze,value:A[ze],valueSpec:W[ze],style:re,styleSpec:F}):[new cr(ze,A[ze],`unknown property "${ze}"`)]);return pe},string:Zu,formatted:function(R){return Zu(R).length===0?[]:zc(R)},resolvedImage:function(R){return Zu(R).length===0?[]:zc(R)},padding:function(R){let A=R.key,F=R.value;if(gs(F)==="array"){if(F.length<1||F.length>4)return[new cr(A,F,`padding requires 1 to 4 values; ${F.length} values found`)];let W={type:"number"},re=[];for(let fe=0;fe<F.length;fe++)re=re.concat(R.validateSpec({key:`${A}[${fe}]`,value:F[fe],validateSpec:R.validateSpec,valueSpec:W}));return re}return Us({key:A,value:F,valueSpec:{}})},variableAnchorOffsetCollection:function(R){let A=R.key,F=R.value,W=gs(F),re=R.styleSpec;if(W!=="array"||F.length<1||F.length%2!=0)return[new cr(A,F,"variableAnchorOffsetCollection requires a non-empty array of even length")];let fe=[];for(let pe=0;pe<F.length;pe+=2)fe=fe.concat(Wu({key:`${A}[${pe}]`,value:F[pe],valueSpec:re.layout_symbol["text-anchor"]})),fe=fe.concat(Wf({key:`${A}[${pe+1}]`,value:F[pe+1],valueSpec:{length:2,value:"number"},validateSpec:R.validateSpec,style:R.style,styleSpec:re}));return fe},sprite:cf};function Bc(R){let A=R.value,F=R.valueSpec,W=R.styleSpec;return R.validateSpec=Bc,F.expression&&jf(Cs(A))?wf(R):F.expression&&Dc(du(A))?zc(R):F.type&&fc[F.type]?fc[F.type](R):ku($e({},R,{valueSpec:F.type?W[F.type]:F}))}function At(R){let A=R.value,F=R.key,W=Zu(R);return W.length||(A.indexOf("{fontstack}")===-1&&W.push(new cr(F,A,'"glyphs" url must include a "{fontstack}" token')),A.indexOf("{range}")===-1&&W.push(new cr(F,A,'"glyphs" url must include a "{range}" token'))),W}function Wt(R,A=ce){let F=[];return F=F.concat(Bc({key:"",value:R,valueSpec:A.$root,styleSpec:A,style:R,validateSpec:Bc,objectElementValidators:{glyphs:At,"*":()=>[]}})),R.constants&&(F=F.concat(If({key:"constants",value:R.constants,style:R,styleSpec:A,validateSpec:Bc}))),Ar(F)}function Cr(R){return function(A){return R(q1(mg({},A),{validateSpec:Bc}))}}function Ar(R){return[].concat(R).sort((A,F)=>A.line-F.line)}function Kr(R){return function(...A){return Ar(R.apply(this,A))}}Wt.source=Kr(Cr(Tc)),Wt.sprite=Kr(Cr(cf)),Wt.glyphs=Kr(Cr(At)),Wt.light=Kr(Cr(wl)),Wt.sky=Kr(Cr(vu)),Wt.terrain=Kr(Cr(qc)),Wt.layer=Kr(Cr(ah)),Wt.filter=Kr(Cr(Rf)),Wt.paintProperty=Kr(Cr(Xf)),Wt.layoutProperty=Kr(Cr(Wl));let ki=Wt,Xi=ki.light,dn=ki.sky,wn=ki.paintProperty,Nn=ki.layoutProperty;function Yi(R,A){let F=!1;if(A&&A.length)for(let W of A)R.fire(new ge(new Error(W.message))),F=!0;return F}class Qi{constructor(A,F,W){let re=this.cells=[];if(A instanceof ArrayBuffer){this.arrayBuffer=A;let pe=new Int32Array(this.arrayBuffer);A=pe[0],this.d=(F=pe[1])+2*(W=pe[2]);for(let Ke=0;Ke<this.d*this.d;Ke++){let ct=pe[3+Ke],Lt=pe[3+Ke+1];re.push(ct===Lt?null:pe.subarray(ct,Lt))}let ze=pe[3+re.length+1];this.keys=pe.subarray(pe[3+re.length],ze),this.bboxes=pe.subarray(ze),this.insert=this._insertReadonly}else{this.d=F+2*W;for(let pe=0;pe<this.d*this.d;pe++)re.push([]);this.keys=[],this.bboxes=[]}this.n=F,this.extent=A,this.padding=W,this.scale=F/A,this.uid=0;let fe=W/F*A;this.min=-fe,this.max=A+fe}insert(A,F,W,re,fe){this._forEachCell(F,W,re,fe,this._insertCell,this.uid++,void 0,void 0),this.keys.push(A),this.bboxes.push(F),this.bboxes.push(W),this.bboxes.push(re),this.bboxes.push(fe)}_insertReadonly(){throw new Error("Cannot insert into a GridIndex created from an ArrayBuffer.")}_insertCell(A,F,W,re,fe,pe){this.cells[fe].push(pe)}query(A,F,W,re,fe){let pe=this.min,ze=this.max;if(A<=pe&&F<=pe&&ze<=W&&ze<=re&&!fe)return Array.prototype.slice.call(this.keys);{let Ke=[];return this._forEachCell(A,F,W,re,this._queryCell,Ke,{},fe),Ke}}_queryCell(A,F,W,re,fe,pe,ze,Ke){let ct=this.cells[fe];if(ct!==null){let Lt=this.keys,$t=this.bboxes;for(let fr=0;fr<ct.length;fr++){let mr=ct[fr];if(ze[mr]===void 0){let Pr=4*mr;(Ke?Ke($t[Pr+0],$t[Pr+1],$t[Pr+2],$t[Pr+3]):A<=$t[Pr+2]&&F<=$t[Pr+3]&&W>=$t[Pr+0]&&re>=$t[Pr+1])?(ze[mr]=!0,pe.push(Lt[mr])):ze[mr]=!1}}}}_forEachCell(A,F,W,re,fe,pe,ze,Ke){let ct=this._convertToCellCoord(A),Lt=this._convertToCellCoord(F),$t=this._convertToCellCoord(W),fr=this._convertToCellCoord(re);for(let mr=ct;mr<=$t;mr++)for(let Pr=Lt;Pr<=fr;Pr++){let zr=this.d*Pr+mr;if((!Ke||Ke(this._convertFromCellCoord(mr),this._convertFromCellCoord(Pr),this._convertFromCellCoord(mr+1),this._convertFromCellCoord(Pr+1)))&&fe.call(this,A,F,W,re,zr,pe,ze,Ke))return}}_convertFromCellCoord(A){return(A-this.padding)/this.scale}_convertToCellCoord(A){return Math.max(0,Math.min(this.d-1,Math.floor(A*this.scale)+this.padding))}toArrayBuffer(){if(this.arrayBuffer)return this.arrayBuffer;let A=this.cells,F=3+this.cells.length+1+1,W=0;for(let pe=0;pe<this.cells.length;pe++)W+=this.cells[pe].length;let re=new Int32Array(F+W+this.keys.length+this.bboxes.length);re[0]=this.extent,re[1]=this.n,re[2]=this.padding;let fe=F;for(let pe=0;pe<A.length;pe++){let ze=A[pe];re[3+pe]=fe,re.set(ze,fe),fe+=ze.length}return re[3+A.length]=fe,re.set(this.keys,fe),fe+=this.keys.length,re[3+A.length+1]=fe,re.set(this.bboxes,fe),fe+=this.bboxes.length,re.buffer}static serialize(A,F){let W=A.toArrayBuffer();return F&&F.push(W),{buffer:W}}static deserialize(A){return new Qi(A.buffer)}}let on={};function Fi(R,A,F={}){if(on[R])throw new Error(`${R} is already registered.`);Object.defineProperty(A,"_classRegistryKey",{value:R,writeable:!1}),on[R]={klass:A,omit:F.omit||[],shallow:F.shallow||[]}}Fi("Object",Object),Fi("TransferableGridIndex",Qi),Fi("Color",nr),Fi("Error",Error),Fi("AJAXError",me),Fi("ResolvedImage",tn),Fi("StylePropertyFunction",nl),Fi("StyleExpression",Eu,{omit:["_evaluator"]}),Fi("ZoomDependentExpression",hu),Fi("ZoomConstantExpression",bc),Fi("CompoundExpression",Bl,{omit:["_evaluate"]});for(let R in yf)yf[R]._classRegistryKey||Fi(`Expression_${R}`,yf[R]);function $n(R){return R&&typeof ArrayBuffer!="undefined"&&(R instanceof ArrayBuffer||R.constructor&&R.constructor.name==="ArrayBuffer")}function Ca(R){return R.$name||R.constructor._classRegistryKey}function Ra(R){return!function(A){if(A===null||typeof A!="object")return!1;let F=Ca(A);return!(!F||F==="Object")}(R)&&(R==null||typeof R=="boolean"||typeof R=="number"||typeof R=="string"||R instanceof Boolean||R instanceof Number||R instanceof String||R instanceof Date||R instanceof RegExp||R instanceof Blob||R instanceof Error||$n(R)||G(R)||ArrayBuffer.isView(R)||R instanceof ImageData)}function La(R,A){if(Ra(R))return($n(R)||G(R))&&A&&A.push(R),ArrayBuffer.isView(R)&&A&&A.push(R.buffer),R instanceof ImageData&&A&&A.push(R.data.buffer),R;if(Array.isArray(R)){let fe=[];for(let pe of R)fe.push(La(pe,A));return fe}if(typeof R!="object")throw new Error("can't serialize object of type "+typeof R);let F=Ca(R);if(!F)throw new Error(`can't serialize object of unregistered class ${R.constructor.name}`);if(!on[F])throw new Error(`${F} is not registered.`);let{klass:W}=on[F],re=W.serialize?W.serialize(R,A):{};if(W.serialize){if(A&&re===A[A.length-1])throw new Error("statically serialized object won't survive transfer of $name property")}else{for(let fe in R){if(!R.hasOwnProperty(fe)||on[F].omit.indexOf(fe)>=0)continue;let pe=R[fe];re[fe]=on[F].shallow.indexOf(fe)>=0?pe:La(pe,A)}R instanceof Error&&(re.message=R.message)}if(re.$name)throw new Error("$name property is reserved for worker serialization logic.");return F!=="Object"&&(re.$name=F),re}function Na(R){if(Ra(R))return R;if(Array.isArray(R))return R.map(Na);if(typeof R!="object")throw new Error("can't deserialize object of type "+typeof R);let A=Ca(R)||"Object";if(!on[A])throw new Error(`can't deserialize unregistered class ${A}`);let{klass:F}=on[A];if(!F)throw new Error(`can't deserialize unregistered class ${A}`);if(F.deserialize)return F.deserialize(R);let W=Object.create(F.prototype);for(let re of Object.keys(R)){if(re==="$name")continue;let fe=R[re];W[re]=on[A].shallow.indexOf(re)>=0?fe:Na(fe)}return W}class Yn{constructor(){this.first=!0}update(A,F){let W=Math.floor(A);return this.first?(this.first=!1,this.lastIntegerZoom=W,this.lastIntegerZoomTime=0,this.lastZoom=A,this.lastFloorZoom=W,!0):(this.lastFloorZoom>W?(this.lastIntegerZoom=W+1,this.lastIntegerZoomTime=F):this.lastFloorZoom<W&&(this.lastIntegerZoom=W,this.lastIntegerZoomTime=F),A!==this.lastZoom&&(this.lastZoom=A,this.lastFloorZoom=W,!0))}}let Dn={"Latin-1 Supplement":R=>R>=128&&R<=255,"Hangul Jamo":R=>R>=4352&&R<=4607,Khmer:R=>R>=6016&&R<=6143,"General Punctuation":R=>R>=8192&&R<=8303,"Letterlike Symbols":R=>R>=8448&&R<=8527,"Number Forms":R=>R>=8528&&R<=8591,"Miscellaneous Technical":R=>R>=8960&&R<=9215,"Control Pictures":R=>R>=9216&&R<=9279,"Optical Character Recognition":R=>R>=9280&&R<=9311,"Enclosed Alphanumerics":R=>R>=9312&&R<=9471,"Geometric Shapes":R=>R>=9632&&R<=9727,"Miscellaneous Symbols":R=>R>=9728&&R<=9983,"Miscellaneous Symbols and Arrows":R=>R>=11008&&R<=11263,"Ideographic Description Characters":R=>R>=12272&&R<=12287,"CJK Symbols and Punctuation":R=>R>=12288&&R<=12351,Katakana:R=>R>=12448&&R<=12543,Kanbun:R=>R>=12688&&R<=12703,"CJK Strokes":R=>R>=12736&&R<=12783,"Enclosed CJK Letters and Months":R=>R>=12800&&R<=13055,"CJK Compatibility":R=>R>=13056&&R<=13311,"Yijing Hexagram Symbols":R=>R>=19904&&R<=19967,"Private Use Area":R=>R>=57344&&R<=63743,"Vertical Forms":R=>R>=65040&&R<=65055,"CJK Compatibility Forms":R=>R>=65072&&R<=65103,"Small Form Variants":R=>R>=65104&&R<=65135,"Halfwidth and Fullwidth Forms":R=>R>=65280&&R<=65519};function Ka(R){for(let A of R)if(Ho(A.charCodeAt(0)))return!0;return!1}function bo(R){for(let A of R)if(!as(A.charCodeAt(0)))return!1;return!0}function Xo(R){let A=R.map(F=>{try{return new RegExp(`\\p{sc=${F}}`,"u").source}catch(W){return null}}).filter(F=>F);return new RegExp(A.join("|"),"u")}let Ss=Xo(["Arab","Dupl","Mong","Ougr","Syrc"]);function as(R){return!Ss.test(String.fromCodePoint(R))}let ws=Xo(["Bopo","Hani","Hira","Kana","Kits","Nshu","Tang","Yiii"]);function Ho(R){return!(R!==746&&R!==747&&(R<4352||!(Dn["CJK Compatibility Forms"](R)&&!(R>=65097&&R<=65103)||Dn["CJK Compatibility"](R)||Dn["CJK Strokes"](R)||!(!Dn["CJK Symbols and Punctuation"](R)||R>=12296&&R<=12305||R>=12308&&R<=12319||R===12336)||Dn["Enclosed CJK Letters and Months"](R)||Dn["Ideographic Description Characters"](R)||Dn.Kanbun(R)||Dn.Katakana(R)&&R!==12540||!(!Dn["Halfwidth and Fullwidth Forms"](R)||R===65288||R===65289||R===65293||R>=65306&&R<=65310||R===65339||R===65341||R===65343||R>=65371&&R<=65503||R===65507||R>=65512&&R<=65519)||!(!Dn["Small Form Variants"](R)||R>=65112&&R<=65118||R>=65123&&R<=65126)||Dn["Vertical Forms"](R)||Dn["Yijing Hexagram Symbols"](R)||new RegExp("\\p{sc=Cans}","u").test(String.fromCodePoint(R))||new RegExp("\\p{sc=Hang}","u").test(String.fromCodePoint(R))||ws.test(String.fromCodePoint(R)))))}function ml(R){return!(Ho(R)||function(A){return!!(Dn["Latin-1 Supplement"](A)&&(A===167||A===169||A===174||A===177||A===188||A===189||A===190||A===215||A===247)||Dn["General Punctuation"](A)&&(A===8214||A===8224||A===8225||A===8240||A===8241||A===8251||A===8252||A===8258||A===8263||A===8264||A===8265||A===8273)||Dn["Letterlike Symbols"](A)||Dn["Number Forms"](A)||Dn["Miscellaneous Technical"](A)&&(A>=8960&&A<=8967||A>=8972&&A<=8991||A>=8996&&A<=9e3||A===9003||A>=9085&&A<=9114||A>=9150&&A<=9165||A===9167||A>=9169&&A<=9179||A>=9186&&A<=9215)||Dn["Control Pictures"](A)&&A!==9251||Dn["Optical Character Recognition"](A)||Dn["Enclosed Alphanumerics"](A)||Dn["Geometric Shapes"](A)||Dn["Miscellaneous Symbols"](A)&&!(A>=9754&&A<=9759)||Dn["Miscellaneous Symbols and Arrows"](A)&&(A>=11026&&A<=11055||A>=11088&&A<=11097||A>=11192&&A<=11243)||Dn["CJK Symbols and Punctuation"](A)||Dn.Katakana(A)||Dn["Private Use Area"](A)||Dn["CJK Compatibility Forms"](A)||Dn["Small Form Variants"](A)||Dn["Halfwidth and Fullwidth Forms"](A)||A===8734||A===8756||A===8757||A>=9984&&A<=10087||A>=10102&&A<=10131||A===65532||A===65533)}(R))}let Ws=Xo(["Adlm","Arab","Armi","Avst","Chrs","Cprt","Egyp","Elym","Gara","Hatr","Hebr","Hung","Khar","Lydi","Mand","Mani","Mend","Merc","Mero","Narb","Nbat","Nkoo","Orkh","Palm","Phli","Phlp","Phnx","Prti","Rohg","Samr","Sarb","Sogo","Syrc","Thaa","Todr","Yezi"]);function Ls(R){return Ws.test(String.fromCodePoint(R))}function va(R,A){return!(!A&&Ls(R)||R>=2304&&R<=3583||R>=3840&&R<=4255||Dn.Khmer(R))}function no(R){for(let A of R)if(Ls(A.charCodeAt(0)))return!0;return!1}let ys=new class{constructor(){this.applyArabicShaping=null,this.processBidirectionalText=null,this.processStyledBidirectionalText=null,this.pluginStatus="unavailable",this.pluginURL=null}setState(R){this.pluginStatus=R.pluginStatus,this.pluginURL=R.pluginURL}getState(){return{pluginStatus:this.pluginStatus,pluginURL:this.pluginURL}}setMethods(R){this.applyArabicShaping=R.applyArabicShaping,this.processBidirectionalText=R.processBidirectionalText,this.processStyledBidirectionalText=R.processStyledBidirectionalText}isParsed(){return this.applyArabicShaping!=null&&this.processBidirectionalText!=null&&this.processStyledBidirectionalText!=null}getPluginURL(){return this.pluginURL}getRTLTextPluginStatus(){return this.pluginStatus}};class rs{constructor(A,F){this.zoom=A,F?(this.now=F.now,this.fadeDuration=F.fadeDuration,this.zoomHistory=F.zoomHistory,this.transition=F.transition):(this.now=0,this.fadeDuration=0,this.zoomHistory=new Yn,this.transition={})}isSupportedScript(A){return function(F,W){for(let re of F)if(!va(re.charCodeAt(0),W))return!1;return!0}(A,ys.getRTLTextPluginStatus()==="loaded")}crossFadingFactor(){return this.fadeDuration===0?1:Math.min((this.now-this.zoomHistory.lastIntegerZoomTime)/this.fadeDuration,1)}getCrossfadeParameters(){let A=this.zoom,F=A-Math.floor(A),W=this.crossFadingFactor();return A>this.zoomHistory.lastIntegerZoom?{fromScale:2,toScale:1,t:F+(1-F)*W}:{fromScale:.5,toScale:1,t:1-(1-W)*F}}}class $l{constructor(A,F){this.property=A,this.value=F,this.expression=function(W,re){if(jf(W))return new nl(W,re);if(Dc(W)){let fe=_u(W,re);if(fe.result==="error")throw new Error(fe.value.map(pe=>`${pe.key}: ${pe.message}`).join(", "));return fe.value}{let fe=W;return re.type==="color"&&typeof W=="string"?fe=nr.parse(W):re.type!=="padding"||typeof W!="number"&&!Array.isArray(W)?re.type==="variableAnchorOffsetCollection"&&Array.isArray(W)&&(fe=$i.parse(W)):fe=Qr.parse(W),{kind:"constant",evaluate:()=>fe}}}(F===void 0?A.specification.default:F,A.specification)}isDataDriven(){return this.expression.kind==="source"||this.expression.kind==="composite"}possiblyEvaluate(A,F,W){return this.property.possiblyEvaluate(this,A,F,W)}}class Cu{constructor(A){this.property=A,this.value=new $l(A,void 0)}transitioned(A,F){return new Nc(this.property,this.value,F,L({},A.transition,this.transition),A.now)}untransitioned(){return new Nc(this.property,this.value,null,{},0)}}class Yu{constructor(A){this._properties=A,this._values=Object.create(A.defaultTransitionablePropertyValues)}getValue(A){return g(this._values[A].value.value)}setValue(A,F){Object.prototype.hasOwnProperty.call(this._values,A)||(this._values[A]=new Cu(this._values[A].property)),this._values[A].value=new $l(this._values[A].property,F===null?void 0:g(F))}getTransition(A){return g(this._values[A].transition)}setTransition(A,F){Object.prototype.hasOwnProperty.call(this._values,A)||(this._values[A]=new Cu(this._values[A].property)),this._values[A].transition=g(F)||void 0}serialize(){let A={};for(let F of Object.keys(this._values)){let W=this.getValue(F);W!==void 0&&(A[F]=W);let re=this.getTransition(F);re!==void 0&&(A[`${F}-transition`]=re)}return A}transitioned(A,F){let W=new pu(this._properties);for(let re of Object.keys(this._values))W._values[re]=this._values[re].transitioned(A,F._values[re]);return W}untransitioned(){let A=new pu(this._properties);for(let F of Object.keys(this._values))A._values[F]=this._values[F].untransitioned();return A}}class Nc{constructor(A,F,W,re,fe){this.property=A,this.value=F,this.begin=fe+re.delay||0,this.end=this.begin+re.duration||0,A.specification.transition&&(re.delay||re.duration)&&(this.prior=W)}possiblyEvaluate(A,F,W){let re=A.now||0,fe=this.value.possiblyEvaluate(A,F,W),pe=this.prior;if(pe){if(re>this.end)return this.prior=null,fe;if(this.value.isDataDriven())return this.prior=null,fe;if(re<this.begin)return pe.possiblyEvaluate(A,F,W);{let ze=(re-this.begin)/(this.end-this.begin);return this.property.interpolate(pe.possiblyEvaluate(A,F,W),fe,function(Ke){if(Ke<=0)return 0;if(Ke>=1)return 1;let ct=Ke*Ke,Lt=ct*Ke;return 4*(Ke<.5?Lt:3*(Ke-ct)+Lt-.75)}(ze))}}return fe}}class pu{constructor(A){this._properties=A,this._values=Object.create(A.defaultTransitioningPropertyValues)}possiblyEvaluate(A,F,W){let re=new Ac(this._properties);for(let fe of Object.keys(this._values))re._values[fe]=this._values[fe].possiblyEvaluate(A,F,W);return re}hasTransition(){for(let A of Object.keys(this._values))if(this._values[A].prior)return!0;return!1}}class Uc{constructor(A){this._properties=A,this._values=Object.create(A.defaultPropertyValues)}hasValue(A){return this._values[A].value!==void 0}getValue(A){return g(this._values[A].value)}setValue(A,F){this._values[A]=new $l(this._values[A].property,F===null?void 0:g(F))}serialize(){let A={};for(let F of Object.keys(this._values)){let W=this.getValue(F);W!==void 0&&(A[F]=W)}return A}possiblyEvaluate(A,F,W){let re=new Ac(this._properties);for(let fe of Object.keys(this._values))re._values[fe]=this._values[fe].possiblyEvaluate(A,F,W);return re}}class xu{constructor(A,F,W){this.property=A,this.value=F,this.parameters=W}isConstant(){return this.value.kind==="constant"}constantOr(A){return this.value.kind==="constant"?this.value.value:A}evaluate(A,F,W,re){return this.property.evaluate(this.value,this.parameters,A,F,W,re)}}class Ac{constructor(A){this._properties=A,this._values=Object.create(A.defaultPossiblyEvaluatedValues)}get(A){return this._values[A]}}class Ua{constructor(A){this.specification=A}possiblyEvaluate(A,F){if(A.isDataDriven())throw new Error("Value should not be data driven");return A.expression.evaluate(F)}interpolate(A,F,W){let re=Lo[this.specification.type];return re?re(A,F,W):A}}class oo{constructor(A,F){this.specification=A,this.overrides=F}possiblyEvaluate(A,F,W,re){return new xu(this,A.expression.kind==="constant"||A.expression.kind==="camera"?{kind:"constant",value:A.expression.evaluate(F,null,{},W,re)}:A.expression,F)}interpolate(A,F,W){if(A.value.kind!=="constant"||F.value.kind!=="constant")return A;if(A.value.value===void 0||F.value.value===void 0)return new xu(this,{kind:"constant",value:void 0},A.parameters);let re=Lo[this.specification.type];if(re){let fe=re(A.value.value,F.value.value,W);return new xu(this,{kind:"constant",value:fe},A.parameters)}return A}evaluate(A,F,W,re,fe,pe){return A.kind==="constant"?A.value:A.evaluate(F,W,re,fe,pe)}}class Vc extends oo{possiblyEvaluate(A,F,W,re){if(A.value===void 0)return new xu(this,{kind:"constant",value:void 0},F);if(A.expression.kind==="constant"){let fe=A.expression.evaluate(F,null,{},W,re),pe=A.property.specification.type==="resolvedImage"&&typeof fe!="string"?fe.name:fe,ze=this._calculate(pe,pe,pe,F);return new xu(this,{kind:"constant",value:ze},F)}if(A.expression.kind==="camera"){let fe=this._calculate(A.expression.evaluate({zoom:F.zoom-1}),A.expression.evaluate({zoom:F.zoom}),A.expression.evaluate({zoom:F.zoom+1}),F);return new xu(this,{kind:"constant",value:fe},F)}return new xu(this,A.expression,F)}evaluate(A,F,W,re,fe,pe){if(A.kind==="source"){let ze=A.evaluate(F,W,re,fe,pe);return this._calculate(ze,ze,ze,F)}return A.kind==="composite"?this._calculate(A.evaluate({zoom:Math.floor(F.zoom)-1},W,re),A.evaluate({zoom:Math.floor(F.zoom)},W,re),A.evaluate({zoom:Math.floor(F.zoom)+1},W,re),F):A.value}_calculate(A,F,W,re){return re.zoom>re.zoomHistory.lastIntegerZoom?{from:A,to:F}:{from:W,to:F}}interpolate(A){return A}}class hc{constructor(A){this.specification=A}possiblyEvaluate(A,F,W,re){if(A.value!==void 0){if(A.expression.kind==="constant"){let fe=A.expression.evaluate(F,null,{},W,re);return this._calculate(fe,fe,fe,F)}return this._calculate(A.expression.evaluate(new rs(Math.floor(F.zoom-1),F)),A.expression.evaluate(new rs(Math.floor(F.zoom),F)),A.expression.evaluate(new rs(Math.floor(F.zoom+1),F)),F)}}_calculate(A,F,W,re){return re.zoom>re.zoomHistory.lastIntegerZoom?{from:A,to:F}:{from:W,to:F}}interpolate(A){return A}}class Ku{constructor(A){this.specification=A}possiblyEvaluate(A,F,W,re){return!!A.expression.evaluate(F,null,{},W,re)}interpolate(){return!1}}class ue{constructor(A){this.properties=A,this.defaultPropertyValues={},this.defaultTransitionablePropertyValues={},this.defaultTransitioningPropertyValues={},this.defaultPossiblyEvaluatedValues={},this.overridableProperties=[];for(let F in A){let W=A[F];W.specification.overridable&&this.overridableProperties.push(F);let re=this.defaultPropertyValues[F]=new $l(W,void 0),fe=this.defaultTransitionablePropertyValues[F]=new Cu(W);this.defaultTransitioningPropertyValues[F]=fe.untransitioned(),this.defaultPossiblyEvaluatedValues[F]=re.possiblyEvaluate({})}}}Fi("DataDrivenProperty",oo),Fi("DataConstantProperty",Ua),Fi("CrossFadedDataDrivenProperty",Vc),Fi("CrossFadedProperty",hc),Fi("ColorRampProperty",Ku);let w="-transition";class B extends Re{constructor(A,F){if(super(),this.id=A.id,this.type=A.type,this._featureFilter={filter:()=>!0,needGeometry:!1},A.type!=="custom"&&(this.metadata=A.metadata,this.minzoom=A.minzoom,this.maxzoom=A.maxzoom,A.type!=="background"&&(this.source=A.source,this.sourceLayer=A["source-layer"],this.filter=A.filter),F.layout&&(this._unevaluatedLayout=new Uc(F.layout)),F.paint)){this._transitionablePaint=new Yu(F.paint);for(let W in A.paint)this.setPaintProperty(W,A.paint[W],{validate:!1});for(let W in A.layout)this.setLayoutProperty(W,A.layout[W],{validate:!1});this._transitioningPaint=this._transitionablePaint.untransitioned(),this.paint=new Ac(F.paint)}}getCrossfadeParameters(){return this._crossfadeParameters}getLayoutProperty(A){return A==="visibility"?this.visibility:this._unevaluatedLayout.getValue(A)}setLayoutProperty(A,F,W={}){F!=null&&this._validate(Nn,`layers.${this.id}.layout.${A}`,A,F,W)||(A!=="visibility"?this._unevaluatedLayout.setValue(A,F):this.visibility=F)}getPaintProperty(A){return A.endsWith(w)?this._transitionablePaint.getTransition(A.slice(0,-11)):this._transitionablePaint.getValue(A)}setPaintProperty(A,F,W={}){if(F!=null&&this._validate(wn,`layers.${this.id}.paint.${A}`,A,F,W))return!1;if(A.endsWith(w))return this._transitionablePaint.setTransition(A.slice(0,-11),F||void 0),!1;{let re=this._transitionablePaint._values[A],fe=re.property.specification["property-type"]==="cross-faded-data-driven",pe=re.value.isDataDriven(),ze=re.value;this._transitionablePaint.setValue(A,F),this._handleSpecialPaintPropertyUpdate(A);let Ke=this._transitionablePaint._values[A].value;return Ke.isDataDriven()||pe||fe||this._handleOverridablePaintPropertyUpdate(A,ze,Ke)}}_handleSpecialPaintPropertyUpdate(A){}_handleOverridablePaintPropertyUpdate(A,F,W){return!1}isHidden(A){return!!(this.minzoom&&A<this.minzoom)||!!(this.maxzoom&&A>=this.maxzoom)||this.visibility==="none"}updateTransitions(A){this._transitioningPaint=this._transitionablePaint.transitioned(A,this._transitioningPaint)}hasTransition(){return this._transitioningPaint.hasTransition()}recalculate(A,F){A.getCrossfadeParameters&&(this._crossfadeParameters=A.getCrossfadeParameters()),this._unevaluatedLayout&&(this.layout=this._unevaluatedLayout.possiblyEvaluate(A,void 0,F)),this.paint=this._transitioningPaint.possiblyEvaluate(A,void 0,F)}serialize(){let A={id:this.id,type:this.type,source:this.source,"source-layer":this.sourceLayer,metadata:this.metadata,minzoom:this.minzoom,maxzoom:this.maxzoom,filter:this.filter,layout:this._unevaluatedLayout&&this._unevaluatedLayout.serialize(),paint:this._transitionablePaint&&this._transitionablePaint.serialize()};return this.visibility&&(A.layout=A.layout||{},A.layout.visibility=this.visibility),M(A,(F,W)=>!(F===void 0||W==="layout"&&!Object.keys(F).length||W==="paint"&&!Object.keys(F).length))}_validate(A,F,W,re,fe={}){return(!fe||fe.validate!==!1)&&Yi(this,A.call(ki,{key:F,layerType:this.type,objectKey:W,value:re,styleSpec:ce,style:{glyphs:!0,sprite:!0}}))}is3D(){return!1}isTileClipped(){return!1}hasOffscreenPass(){return!1}resize(){}isStateDependent(){for(let A in this.paint._values){let F=this.paint.get(A);if(F instanceof xu&&Nl(F.property.specification)&&(F.value.kind==="source"||F.value.kind==="composite")&&F.value.isStateDependent)return!0}return!1}}let Q={Int8:Int8Array,Uint8:Uint8Array,Int16:Int16Array,Uint16:Uint16Array,Int32:Int32Array,Uint32:Uint32Array,Float32:Float32Array};class ee{constructor(A,F){this._structArray=A,this._pos1=F*this.size,this._pos2=this._pos1/2,this._pos4=this._pos1/4,this._pos8=this._pos1/8}}class le{constructor(){this.isTransferred=!1,this.capacity=-1,this.resize(0)}static serialize(A,F){return A._trim(),F&&(A.isTransferred=!0,F.push(A.arrayBuffer)),{length:A.length,arrayBuffer:A.arrayBuffer}}static deserialize(A){let F=Object.create(this.prototype);return F.arrayBuffer=A.arrayBuffer,F.length=A.length,F.capacity=A.arrayBuffer.byteLength/F.bytesPerElement,F._refreshViews(),F}_trim(){this.length!==this.capacity&&(this.capacity=this.length,this.arrayBuffer=this.arrayBuffer.slice(0,this.length*this.bytesPerElement),this._refreshViews())}clear(){this.length=0}resize(A){this.reserve(A),this.length=A}reserve(A){if(A>this.capacity){this.capacity=Math.max(A,Math.floor(5*this.capacity),128),this.arrayBuffer=new ArrayBuffer(this.capacity*this.bytesPerElement);let F=this.uint8;this._refreshViews(),F&&this.uint8.set(F)}}_refreshViews(){throw new Error("_refreshViews() must be implemented by each concrete StructArray layout")}}function qe(R,A=1){let F=0,W=0;return{members:R.map(re=>{let fe=Q[re.type].BYTES_PER_ELEMENT,pe=F=Xe(F,Math.max(A,fe)),ze=re.components||1;return W=Math.max(W,fe),F+=fe*ze,{name:re.name,type:re.type,components:ze,offset:pe}}),size:Xe(F,Math.max(W,A)),alignment:A}}function Xe(R,A){return Math.ceil(R/A)*A}class ot extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F){let W=this.length;return this.resize(W+1),this.emplace(W,A,F)}emplace(A,F,W){let re=2*A;return this.int16[re+0]=F,this.int16[re+1]=W,A}}ot.prototype.bytesPerElement=4,Fi("StructArrayLayout2i4",ot);class Tt extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W){let re=this.length;return this.resize(re+1),this.emplace(re,A,F,W)}emplace(A,F,W,re){let fe=3*A;return this.int16[fe+0]=F,this.int16[fe+1]=W,this.int16[fe+2]=re,A}}Tt.prototype.bytesPerElement=6,Fi("StructArrayLayout3i6",Tt);class Yt extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re){let fe=this.length;return this.resize(fe+1),this.emplace(fe,A,F,W,re)}emplace(A,F,W,re,fe){let pe=4*A;return this.int16[pe+0]=F,this.int16[pe+1]=W,this.int16[pe+2]=re,this.int16[pe+3]=fe,A}}Yt.prototype.bytesPerElement=8,Fi("StructArrayLayout4i8",Yt);class Kt extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe){let ze=this.length;return this.resize(ze+1),this.emplace(ze,A,F,W,re,fe,pe)}emplace(A,F,W,re,fe,pe,ze){let Ke=6*A;return this.int16[Ke+0]=F,this.int16[Ke+1]=W,this.int16[Ke+2]=re,this.int16[Ke+3]=fe,this.int16[Ke+4]=pe,this.int16[Ke+5]=ze,A}}Kt.prototype.bytesPerElement=12,Fi("StructArrayLayout2i4i12",Kt);class xr extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe){let ze=this.length;return this.resize(ze+1),this.emplace(ze,A,F,W,re,fe,pe)}emplace(A,F,W,re,fe,pe,ze){let Ke=4*A,ct=8*A;return this.int16[Ke+0]=F,this.int16[Ke+1]=W,this.uint8[ct+4]=re,this.uint8[ct+5]=fe,this.uint8[ct+6]=pe,this.uint8[ct+7]=ze,A}}xr.prototype.bytesPerElement=8,Fi("StructArrayLayout2i4ub8",xr);class Ir extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F){let W=this.length;return this.resize(W+1),this.emplace(W,A,F)}emplace(A,F,W){let re=2*A;return this.float32[re+0]=F,this.float32[re+1]=W,A}}Ir.prototype.bytesPerElement=8,Fi("StructArrayLayout2f8",Ir);class ve extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe,ze,Ke,ct,Lt){let $t=this.length;return this.resize($t+1),this.emplace($t,A,F,W,re,fe,pe,ze,Ke,ct,Lt)}emplace(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t){let fr=10*A;return this.uint16[fr+0]=F,this.uint16[fr+1]=W,this.uint16[fr+2]=re,this.uint16[fr+3]=fe,this.uint16[fr+4]=pe,this.uint16[fr+5]=ze,this.uint16[fr+6]=Ke,this.uint16[fr+7]=ct,this.uint16[fr+8]=Lt,this.uint16[fr+9]=$t,A}}ve.prototype.bytesPerElement=20,Fi("StructArrayLayout10ui20",ve);class be extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr){let mr=this.length;return this.resize(mr+1),this.emplace(mr,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr)}emplace(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr){let Pr=12*A;return this.int16[Pr+0]=F,this.int16[Pr+1]=W,this.int16[Pr+2]=re,this.int16[Pr+3]=fe,this.uint16[Pr+4]=pe,this.uint16[Pr+5]=ze,this.uint16[Pr+6]=Ke,this.uint16[Pr+7]=ct,this.int16[Pr+8]=Lt,this.int16[Pr+9]=$t,this.int16[Pr+10]=fr,this.int16[Pr+11]=mr,A}}be.prototype.bytesPerElement=24,Fi("StructArrayLayout4i4ui4i24",be);class De extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F,W){let re=this.length;return this.resize(re+1),this.emplace(re,A,F,W)}emplace(A,F,W,re){let fe=3*A;return this.float32[fe+0]=F,this.float32[fe+1]=W,this.float32[fe+2]=re,A}}De.prototype.bytesPerElement=12,Fi("StructArrayLayout3f12",De);class Be extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer)}emplaceBack(A){let F=this.length;return this.resize(F+1),this.emplace(F,A)}emplace(A,F){return this.uint32[1*A+0]=F,A}}Be.prototype.bytesPerElement=4,Fi("StructArrayLayout1ul4",Be);class et extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe,ze,Ke,ct){let Lt=this.length;return this.resize(Lt+1),this.emplace(Lt,A,F,W,re,fe,pe,ze,Ke,ct)}emplace(A,F,W,re,fe,pe,ze,Ke,ct,Lt){let $t=10*A,fr=5*A;return this.int16[$t+0]=F,this.int16[$t+1]=W,this.int16[$t+2]=re,this.int16[$t+3]=fe,this.int16[$t+4]=pe,this.int16[$t+5]=ze,this.uint32[fr+3]=Ke,this.uint16[$t+8]=ct,this.uint16[$t+9]=Lt,A}}et.prototype.bytesPerElement=20,Fi("StructArrayLayout6i1ul2ui20",et);class We extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe){let ze=this.length;return this.resize(ze+1),this.emplace(ze,A,F,W,re,fe,pe)}emplace(A,F,W,re,fe,pe,ze){let Ke=6*A;return this.int16[Ke+0]=F,this.int16[Ke+1]=W,this.int16[Ke+2]=re,this.int16[Ke+3]=fe,this.int16[Ke+4]=pe,this.int16[Ke+5]=ze,A}}We.prototype.bytesPerElement=12,Fi("StructArrayLayout2i2i2i12",We);class it extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe){let pe=this.length;return this.resize(pe+1),this.emplace(pe,A,F,W,re,fe)}emplace(A,F,W,re,fe,pe){let ze=4*A,Ke=8*A;return this.float32[ze+0]=F,this.float32[ze+1]=W,this.float32[ze+2]=re,this.int16[Ke+6]=fe,this.int16[Ke+7]=pe,A}}it.prototype.bytesPerElement=16,Fi("StructArrayLayout2f1f2i16",it);class Ft extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe){let ze=this.length;return this.resize(ze+1),this.emplace(ze,A,F,W,re,fe,pe)}emplace(A,F,W,re,fe,pe,ze){let Ke=16*A,ct=4*A,Lt=8*A;return this.uint8[Ke+0]=F,this.uint8[Ke+1]=W,this.float32[ct+1]=re,this.float32[ct+2]=fe,this.int16[Lt+6]=pe,this.int16[Lt+7]=ze,A}}Ft.prototype.bytesPerElement=16,Fi("StructArrayLayout2ub2f2i16",Ft);class Ht extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F,W){let re=this.length;return this.resize(re+1),this.emplace(re,A,F,W)}emplace(A,F,W,re){let fe=3*A;return this.uint16[fe+0]=F,this.uint16[fe+1]=W,this.uint16[fe+2]=re,A}}Ht.prototype.bytesPerElement=6,Fi("StructArrayLayout3ui6",Ht);class tr extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi){let vn=this.length;return this.resize(vn+1),this.emplace(vn,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi)}emplace(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi,vn){let zi=24*A,un=12*A,Tn=48*A;return this.int16[zi+0]=F,this.int16[zi+1]=W,this.uint16[zi+2]=re,this.uint16[zi+3]=fe,this.uint32[un+2]=pe,this.uint32[un+3]=ze,this.uint32[un+4]=Ke,this.uint16[zi+10]=ct,this.uint16[zi+11]=Lt,this.uint16[zi+12]=$t,this.float32[un+7]=fr,this.float32[un+8]=mr,this.uint8[Tn+36]=Pr,this.uint8[Tn+37]=zr,this.uint8[Tn+38]=ui,this.uint32[un+10]=yi,this.int16[zi+22]=vn,A}}tr.prototype.bytesPerElement=48,Fi("StructArrayLayout2i2ui3ul3ui2f3ub1ul1i48",tr);class dr extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.int16=new Int16Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi,vn,zi,un,Tn,pa,ro,Vo,Xa,sa,Mo,fo){let lo=this.length;return this.resize(lo+1),this.emplace(lo,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi,vn,zi,un,Tn,pa,ro,Vo,Xa,sa,Mo,fo)}emplace(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr,ui,yi,vn,zi,un,Tn,pa,ro,Vo,Xa,sa,Mo,fo,lo){let Xn=32*A,Ro=16*A;return this.int16[Xn+0]=F,this.int16[Xn+1]=W,this.int16[Xn+2]=re,this.int16[Xn+3]=fe,this.int16[Xn+4]=pe,this.int16[Xn+5]=ze,this.int16[Xn+6]=Ke,this.int16[Xn+7]=ct,this.uint16[Xn+8]=Lt,this.uint16[Xn+9]=$t,this.uint16[Xn+10]=fr,this.uint16[Xn+11]=mr,this.uint16[Xn+12]=Pr,this.uint16[Xn+13]=zr,this.uint16[Xn+14]=ui,this.uint16[Xn+15]=yi,this.uint16[Xn+16]=vn,this.uint16[Xn+17]=zi,this.uint16[Xn+18]=un,this.uint16[Xn+19]=Tn,this.uint16[Xn+20]=pa,this.uint16[Xn+21]=ro,this.uint16[Xn+22]=Vo,this.uint32[Ro+12]=Xa,this.float32[Ro+13]=sa,this.float32[Ro+14]=Mo,this.uint16[Xn+30]=fo,this.uint16[Xn+31]=lo,A}}dr.prototype.bytesPerElement=64,Fi("StructArrayLayout8i15ui1ul2f2ui64",dr);class Sr extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A){let F=this.length;return this.resize(F+1),this.emplace(F,A)}emplace(A,F){return this.float32[1*A+0]=F,A}}Sr.prototype.bytesPerElement=4,Fi("StructArrayLayout1f4",Sr);class Or extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F,W){let re=this.length;return this.resize(re+1),this.emplace(re,A,F,W)}emplace(A,F,W,re){let fe=3*A;return this.uint16[6*A+0]=F,this.float32[fe+1]=W,this.float32[fe+2]=re,A}}Or.prototype.bytesPerElement=12,Fi("StructArrayLayout1ui2f12",Or);class Wr extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint32=new Uint32Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F,W){let re=this.length;return this.resize(re+1),this.emplace(re,A,F,W)}emplace(A,F,W,re){let fe=4*A;return this.uint32[2*A+0]=F,this.uint16[fe+2]=W,this.uint16[fe+3]=re,A}}Wr.prototype.bytesPerElement=8,Fi("StructArrayLayout1ul2ui8",Wr);class ni extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A,F){let W=this.length;return this.resize(W+1),this.emplace(W,A,F)}emplace(A,F,W){let re=2*A;return this.uint16[re+0]=F,this.uint16[re+1]=W,A}}ni.prototype.bytesPerElement=4,Fi("StructArrayLayout2ui4",ni);class Pi extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.uint16=new Uint16Array(this.arrayBuffer)}emplaceBack(A){let F=this.length;return this.resize(F+1),this.emplace(F,A)}emplace(A,F){return this.uint16[1*A+0]=F,A}}Pi.prototype.bytesPerElement=2,Fi("StructArrayLayout1ui2",Pi);class cn extends le{_refreshViews(){this.uint8=new Uint8Array(this.arrayBuffer),this.float32=new Float32Array(this.arrayBuffer)}emplaceBack(A,F,W,re){let fe=this.length;return this.resize(fe+1),this.emplace(fe,A,F,W,re)}emplace(A,F,W,re,fe){let pe=4*A;return this.float32[pe+0]=F,this.float32[pe+1]=W,this.float32[pe+2]=re,this.float32[pe+3]=fe,A}}cn.prototype.bytesPerElement=16,Fi("StructArrayLayout4f16",cn);class ln extends ee{get anchorPointX(){return this._structArray.int16[this._pos2+0]}get anchorPointY(){return this._structArray.int16[this._pos2+1]}get x1(){return this._structArray.int16[this._pos2+2]}get y1(){return this._structArray.int16[this._pos2+3]}get x2(){return this._structArray.int16[this._pos2+4]}get y2(){return this._structArray.int16[this._pos2+5]}get featureIndex(){return this._structArray.uint32[this._pos4+3]}get sourceLayerIndex(){return this._structArray.uint16[this._pos2+8]}get bucketIndex(){return this._structArray.uint16[this._pos2+9]}get anchorPoint(){return new u(this.anchorPointX,this.anchorPointY)}}ln.prototype.size=20;class Cn extends et{get(A){return new ln(this,A)}}Fi("CollisionBoxArray",Cn);class Kn extends ee{get anchorX(){return this._structArray.int16[this._pos2+0]}get anchorY(){return this._structArray.int16[this._pos2+1]}get glyphStartIndex(){return this._structArray.uint16[this._pos2+2]}get numGlyphs(){return this._structArray.uint16[this._pos2+3]}get vertexStartIndex(){return this._structArray.uint32[this._pos4+2]}get lineStartIndex(){return this._structArray.uint32[this._pos4+3]}get lineLength(){return this._structArray.uint32[this._pos4+4]}get segment(){return this._structArray.uint16[this._pos2+10]}get lowerSize(){return this._structArray.uint16[this._pos2+11]}get upperSize(){return this._structArray.uint16[this._pos2+12]}get lineOffsetX(){return this._structArray.float32[this._pos4+7]}get lineOffsetY(){return this._structArray.float32[this._pos4+8]}get writingMode(){return this._structArray.uint8[this._pos1+36]}get placedOrientation(){return this._structArray.uint8[this._pos1+37]}set placedOrientation(A){this._structArray.uint8[this._pos1+37]=A}get hidden(){return this._structArray.uint8[this._pos1+38]}set hidden(A){this._structArray.uint8[this._pos1+38]=A}get crossTileID(){return this._structArray.uint32[this._pos4+10]}set crossTileID(A){this._structArray.uint32[this._pos4+10]=A}get associatedIconIndex(){return this._structArray.int16[this._pos2+22]}}Kn.prototype.size=48;class Ta extends tr{get(A){return new Kn(this,A)}}Fi("PlacedSymbolArray",Ta);class fa extends ee{get anchorX(){return this._structArray.int16[this._pos2+0]}get anchorY(){return this._structArray.int16[this._pos2+1]}get rightJustifiedTextSymbolIndex(){return this._structArray.int16[this._pos2+2]}get centerJustifiedTextSymbolIndex(){return this._structArray.int16[this._pos2+3]}get leftJustifiedTextSymbolIndex(){return this._structArray.int16[this._pos2+4]}get verticalPlacedTextSymbolIndex(){return this._structArray.int16[this._pos2+5]}get placedIconSymbolIndex(){return this._structArray.int16[this._pos2+6]}get verticalPlacedIconSymbolIndex(){return this._structArray.int16[this._pos2+7]}get key(){return this._structArray.uint16[this._pos2+8]}get textBoxStartIndex(){return this._structArray.uint16[this._pos2+9]}get textBoxEndIndex(){return this._structArray.uint16[this._pos2+10]}get verticalTextBoxStartIndex(){return this._structArray.uint16[this._pos2+11]}get verticalTextBoxEndIndex(){return this._structArray.uint16[this._pos2+12]}get iconBoxStartIndex(){return this._structArray.uint16[this._pos2+13]}get iconBoxEndIndex(){return this._structArray.uint16[this._pos2+14]}get verticalIconBoxStartIndex(){return this._structArray.uint16[this._pos2+15]}get verticalIconBoxEndIndex(){return this._structArray.uint16[this._pos2+16]}get featureIndex(){return this._structArray.uint16[this._pos2+17]}get numHorizontalGlyphVertices(){return this._structArray.uint16[this._pos2+18]}get numVerticalGlyphVertices(){return this._structArray.uint16[this._pos2+19]}get numIconVertices(){return this._structArray.uint16[this._pos2+20]}get numVerticalIconVertices(){return this._structArray.uint16[this._pos2+21]}get useRuntimeCollisionCircles(){return this._structArray.uint16[this._pos2+22]}get crossTileID(){return this._structArray.uint32[this._pos4+12]}set crossTileID(A){this._structArray.uint32[this._pos4+12]=A}get textBoxScale(){return this._structArray.float32[this._pos4+13]}get collisionCircleDiameter(){return this._structArray.float32[this._pos4+14]}get textAnchorOffsetStartIndex(){return this._structArray.uint16[this._pos2+30]}get textAnchorOffsetEndIndex(){return this._structArray.uint16[this._pos2+31]}}fa.prototype.size=64;class $a extends dr{get(A){return new fa(this,A)}}Fi("SymbolInstanceArray",$a);class Co extends Sr{getoffsetX(A){return this.float32[1*A+0]}}Fi("GlyphOffsetArray",Co);class Qa extends Tt{getx(A){return this.int16[3*A+0]}gety(A){return this.int16[3*A+1]}gettileUnitDistanceFromAnchor(A){return this.int16[3*A+2]}}Fi("SymbolLineVertexArray",Qa);class mo extends ee{get textAnchor(){return this._structArray.uint16[this._pos2+0]}get textOffset0(){return this._structArray.float32[this._pos4+1]}get textOffset1(){return this._structArray.float32[this._pos4+2]}}mo.prototype.size=12;class Bo extends Or{get(A){return new mo(this,A)}}Fi("TextAnchorOffsetArray",Bo);class Ps extends ee{get featureIndex(){return this._structArray.uint32[this._pos4+0]}get sourceLayerIndex(){return this._structArray.uint16[this._pos2+2]}get bucketIndex(){return this._structArray.uint16[this._pos2+3]}}Ps.prototype.size=8;class Ts extends Wr{get(A){return new Ps(this,A)}}Fi("FeatureIndexArray",Ts);class wo extends ot{}class To extends ot{}class hl extends ot{}class Ul extends Kt{}class Lu extends xr{}class au extends Ir{}class Js extends ve{}class Ql extends be{}class dc extends De{}class Tl extends Be{}class Al extends We{}class X extends Ft{}class se extends Ht{}class Te extends ni{}let Ne=qe([{name:"a_pos",components:2,type:"Int16"}],4),{members:He}=Ne;class Ye{constructor(A=[]){this.segments=A}prepareSegment(A,F,W,re){let fe=this.segments[this.segments.length-1];return A>Ye.MAX_VERTEX_ARRAY_LENGTH&&T(`Max vertices per segment is ${Ye.MAX_VERTEX_ARRAY_LENGTH}: bucket requested ${A}`),(!fe||fe.vertexLength+A>Ye.MAX_VERTEX_ARRAY_LENGTH||fe.sortKey!==re)&&(fe={vertexOffset:F.length,primitiveOffset:W.length,vertexLength:0,primitiveLength:0},re!==void 0&&(fe.sortKey=re),this.segments.push(fe)),fe}get(){return this.segments}destroy(){for(let A of this.segments)for(let F in A.vaos)A.vaos[F].destroy()}static simpleSegment(A,F,W,re){return new Ye([{vertexOffset:A,primitiveOffset:F,vertexLength:W,primitiveLength:re,vaos:{},sortKey:0}])}}function kt(R,A){return 256*(R=E(Math.floor(R),0,255))+E(Math.floor(A),0,255)}Ye.MAX_VERTEX_ARRAY_LENGTH=Math.pow(2,16)-1,Fi("SegmentVector",Ye);let nt=qe([{name:"a_pattern_from",components:4,type:"Uint16"},{name:"a_pattern_to",components:4,type:"Uint16"},{name:"a_pixel_ratio_from",components:1,type:"Uint16"},{name:"a_pixel_ratio_to",components:1,type:"Uint16"}]);var jt={exports:{}},gr={exports:{}};gr.exports=function(R,A){var F,W,re,fe,pe,ze,Ke,ct;for(W=R.length-(F=3&R.length),re=A,pe=3432918353,ze=461845907,ct=0;ct<W;)Ke=255&R.charCodeAt(ct)|(255&R.charCodeAt(++ct))<<8|(255&R.charCodeAt(++ct))<<16|(255&R.charCodeAt(++ct))<<24,++ct,re=27492+(65535&(fe=5*(65535&(re=(re^=Ke=(65535&(Ke=(Ke=(65535&Ke)*pe+(((Ke>>>16)*pe&65535)<<16)&4294967295)<<15|Ke>>>17))*ze+(((Ke>>>16)*ze&65535)<<16)&4294967295)<<13|re>>>19))+((5*(re>>>16)&65535)<<16)&4294967295))+((58964+(fe>>>16)&65535)<<16);switch(Ke=0,F){case 3:Ke^=(255&R.charCodeAt(ct+2))<<16;case 2:Ke^=(255&R.charCodeAt(ct+1))<<8;case 1:re^=Ke=(65535&(Ke=(Ke=(65535&(Ke^=255&R.charCodeAt(ct)))*pe+(((Ke>>>16)*pe&65535)<<16)&4294967295)<<15|Ke>>>17))*ze+(((Ke>>>16)*ze&65535)<<16)&4294967295}return re^=R.length,re=2246822507*(65535&(re^=re>>>16))+((2246822507*(re>>>16)&65535)<<16)&4294967295,re=3266489909*(65535&(re^=re>>>13))+((3266489909*(re>>>16)&65535)<<16)&4294967295,(re^=re>>>16)>>>0};var yr=gr.exports,Hr={exports:{}};Hr.exports=function(R,A){for(var F,W=R.length,re=A^W,fe=0;W>=4;)F=1540483477*(65535&(F=255&R.charCodeAt(fe)|(255&R.charCodeAt(++fe))<<8|(255&R.charCodeAt(++fe))<<16|(255&R.charCodeAt(++fe))<<24))+((1540483477*(F>>>16)&65535)<<16),re=1540483477*(65535&re)+((1540483477*(re>>>16)&65535)<<16)^(F=1540483477*(65535&(F^=F>>>24))+((1540483477*(F>>>16)&65535)<<16)),W-=4,++fe;switch(W){case 3:re^=(255&R.charCodeAt(fe+2))<<16;case 2:re^=(255&R.charCodeAt(fe+1))<<8;case 1:re=1540483477*(65535&(re^=255&R.charCodeAt(fe)))+((1540483477*(re>>>16)&65535)<<16)}return re=1540483477*(65535&(re^=re>>>13))+((1540483477*(re>>>16)&65535)<<16),(re^=re>>>15)>>>0};var qr=yr,_i=Hr.exports;jt.exports=qr,jt.exports.murmur3=qr,jt.exports.murmur2=_i;var bi=o(jt.exports);class Zr{constructor(){this.ids=[],this.positions=[],this.indexed=!1}add(A,F,W,re){this.ids.push(ai(A)),this.positions.push(F,W,re)}getPositions(A){if(!this.indexed)throw new Error("Trying to get index, but feature positions are not indexed");let F=ai(A),W=0,re=this.ids.length-1;for(;W<re;){let pe=W+re>>1;this.ids[pe]>=F?re=pe:W=pe+1}let fe=[];for(;this.ids[W]===F;)fe.push({index:this.positions[3*W],start:this.positions[3*W+1],end:this.positions[3*W+2]}),W++;return fe}static serialize(A,F){let W=new Float64Array(A.ids),re=new Uint32Array(A.positions);return gi(W,re,0,W.length-1),F&&F.push(W.buffer,re.buffer),{ids:W,positions:re}}static deserialize(A){let F=new Zr;return F.ids=A.ids,F.positions=A.positions,F.indexed=!0,F}}function ai(R){let A=+R;return!isNaN(A)&&A<=Number.MAX_SAFE_INTEGER?A:bi(String(R))}function gi(R,A,F,W){for(;F<W;){let re=R[F+W>>1],fe=F-1,pe=W+1;for(;;){do fe++;while(R[fe]<re);do pe--;while(R[pe]>re);if(fe>=pe)break;Ii(R,fe,pe),Ii(A,3*fe,3*pe),Ii(A,3*fe+1,3*pe+1),Ii(A,3*fe+2,3*pe+2)}pe-F<W-pe?(gi(R,A,F,pe),F=pe+1):(gi(R,A,pe+1,W),W=pe)}}function Ii(R,A,F){let W=R[A];R[A]=R[F],R[F]=W}Fi("FeaturePositionMap",Zr);class Si{constructor(A,F){this.gl=A.gl,this.location=F}}class ei extends Si{constructor(A,F){super(A,F),this.current=0}set(A){this.current!==A&&(this.current=A,this.gl.uniform1f(this.location,A))}}class Ln extends Si{constructor(A,F){super(A,F),this.current=[0,0,0,0]}set(A){A[0]===this.current[0]&&A[1]===this.current[1]&&A[2]===this.current[2]&&A[3]===this.current[3]||(this.current=A,this.gl.uniform4f(this.location,A[0],A[1],A[2],A[3]))}}class En extends Si{constructor(A,F){super(A,F),this.current=nr.transparent}set(A){A.r===this.current.r&&A.g===this.current.g&&A.b===this.current.b&&A.a===this.current.a||(this.current=A,this.gl.uniform4f(this.location,A.r,A.g,A.b,A.a))}}let Un=new Float32Array(16);function ia(R){return[kt(255*R.r,255*R.g),kt(255*R.b,255*R.a)]}class Ea{constructor(A,F,W){this.value=A,this.uniformNames=F.map(re=>`u_${re}`),this.type=W}setUniform(A,F,W){A.set(W.constantOr(this.value))}getBinding(A,F,W){return this.type==="color"?new En(A,F):new ei(A,F)}}class Ia{constructor(A,F){this.uniformNames=F.map(W=>`u_${W}`),this.patternFrom=null,this.patternTo=null,this.pixelRatioFrom=1,this.pixelRatioTo=1}setConstantPatternPositions(A,F){this.pixelRatioFrom=F.pixelRatio,this.pixelRatioTo=A.pixelRatio,this.patternFrom=F.tlbr,this.patternTo=A.tlbr}setUniform(A,F,W,re){let fe=re==="u_pattern_to"?this.patternTo:re==="u_pattern_from"?this.patternFrom:re==="u_pixel_ratio_to"?this.pixelRatioTo:re==="u_pixel_ratio_from"?this.pixelRatioFrom:null;fe&&A.set(fe)}getBinding(A,F,W){return W.substr(0,9)==="u_pattern"?new Ln(A,F):new ei(A,F)}}class yo{constructor(A,F,W,re){this.expression=A,this.type=W,this.maxValue=0,this.paintVertexAttributes=F.map(fe=>({name:`a_${fe}`,type:"Float32",components:W==="color"?2:1,offset:0})),this.paintVertexArray=new re}populatePaintArray(A,F,W,re,fe){let pe=this.paintVertexArray.length,ze=this.expression.evaluate(new rs(0),F,{},re,[],fe);this.paintVertexArray.resize(A),this._setPaintValue(pe,A,ze)}updatePaintArray(A,F,W,re){let fe=this.expression.evaluate({zoom:0},W,re);this._setPaintValue(A,F,fe)}_setPaintValue(A,F,W){if(this.type==="color"){let re=ia(W);for(let fe=A;fe<F;fe++)this.paintVertexArray.emplace(fe,re[0],re[1])}else{for(let re=A;re<F;re++)this.paintVertexArray.emplace(re,W);this.maxValue=Math.max(this.maxValue,Math.abs(W))}}upload(A){this.paintVertexArray&&this.paintVertexArray.arrayBuffer&&(this.paintVertexBuffer&&this.paintVertexBuffer.buffer?this.paintVertexBuffer.updateData(this.paintVertexArray):this.paintVertexBuffer=A.createVertexBuffer(this.paintVertexArray,this.paintVertexAttributes,this.expression.isStateDependent))}destroy(){this.paintVertexBuffer&&this.paintVertexBuffer.destroy()}}class Da{constructor(A,F,W,re,fe,pe){this.expression=A,this.uniformNames=F.map(ze=>`u_${ze}_t`),this.type=W,this.useIntegerZoom=re,this.zoom=fe,this.maxValue=0,this.paintVertexAttributes=F.map(ze=>({name:`a_${ze}`,type:"Float32",components:W==="color"?4:2,offset:0})),this.paintVertexArray=new pe}populatePaintArray(A,F,W,re,fe){let pe=this.expression.evaluate(new rs(this.zoom),F,{},re,[],fe),ze=this.expression.evaluate(new rs(this.zoom+1),F,{},re,[],fe),Ke=this.paintVertexArray.length;this.paintVertexArray.resize(A),this._setPaintValue(Ke,A,pe,ze)}updatePaintArray(A,F,W,re){let fe=this.expression.evaluate({zoom:this.zoom},W,re),pe=this.expression.evaluate({zoom:this.zoom+1},W,re);this._setPaintValue(A,F,fe,pe)}_setPaintValue(A,F,W,re){if(this.type==="color"){let fe=ia(W),pe=ia(re);for(let ze=A;ze<F;ze++)this.paintVertexArray.emplace(ze,fe[0],fe[1],pe[0],pe[1])}else{for(let fe=A;fe<F;fe++)this.paintVertexArray.emplace(fe,W,re);this.maxValue=Math.max(this.maxValue,Math.abs(W),Math.abs(re))}}upload(A){this.paintVertexArray&&this.paintVertexArray.arrayBuffer&&(this.paintVertexBuffer&&this.paintVertexBuffer.buffer?this.paintVertexBuffer.updateData(this.paintVertexArray):this.paintVertexBuffer=A.createVertexBuffer(this.paintVertexArray,this.paintVertexAttributes,this.expression.isStateDependent))}destroy(){this.paintVertexBuffer&&this.paintVertexBuffer.destroy()}setUniform(A,F){let W=this.useIntegerZoom?Math.floor(F.zoom):F.zoom,re=E(this.expression.interpolationFactor(W,this.zoom,this.zoom+1),0,1);A.set(re)}getBinding(A,F,W){return new ei(A,F)}}class go{constructor(A,F,W,re,fe,pe){this.expression=A,this.type=F,this.useIntegerZoom=W,this.zoom=re,this.layerId=pe,this.zoomInPaintVertexArray=new fe,this.zoomOutPaintVertexArray=new fe}populatePaintArray(A,F,W){let re=this.zoomInPaintVertexArray.length;this.zoomInPaintVertexArray.resize(A),this.zoomOutPaintVertexArray.resize(A),this._setPaintValues(re,A,F.patterns&&F.patterns[this.layerId],W)}updatePaintArray(A,F,W,re,fe){this._setPaintValues(A,F,W.patterns&&W.patterns[this.layerId],fe)}_setPaintValues(A,F,W,re){if(!re||!W)return;let{min:fe,mid:pe,max:ze}=W,Ke=re[fe],ct=re[pe],Lt=re[ze];if(Ke&&ct&&Lt)for(let $t=A;$t<F;$t++)this.zoomInPaintVertexArray.emplace($t,ct.tl[0],ct.tl[1],ct.br[0],ct.br[1],Ke.tl[0],Ke.tl[1],Ke.br[0],Ke.br[1],ct.pixelRatio,Ke.pixelRatio),this.zoomOutPaintVertexArray.emplace($t,ct.tl[0],ct.tl[1],ct.br[0],ct.br[1],Lt.tl[0],Lt.tl[1],Lt.br[0],Lt.br[1],ct.pixelRatio,Lt.pixelRatio)}upload(A){this.zoomInPaintVertexArray&&this.zoomInPaintVertexArray.arrayBuffer&&this.zoomOutPaintVertexArray&&this.zoomOutPaintVertexArray.arrayBuffer&&(this.zoomInPaintVertexBuffer=A.createVertexBuffer(this.zoomInPaintVertexArray,nt.members,this.expression.isStateDependent),this.zoomOutPaintVertexBuffer=A.createVertexBuffer(this.zoomOutPaintVertexArray,nt.members,this.expression.isStateDependent))}destroy(){this.zoomOutPaintVertexBuffer&&this.zoomOutPaintVertexBuffer.destroy(),this.zoomInPaintVertexBuffer&&this.zoomInPaintVertexBuffer.destroy()}}class Is{constructor(A,F,W){this.binders={},this._buffers=[];let re=[];for(let fe in A.paint._values){if(!W(fe))continue;let pe=A.paint.get(fe);if(!(pe instanceof xu&&Nl(pe.property.specification)))continue;let ze=Xs(fe,A.type),Ke=pe.value,ct=pe.property.specification.type,Lt=pe.property.useIntegerZoom,$t=pe.property.specification["property-type"],fr=$t==="cross-faded"||$t==="cross-faded-data-driven";if(Ke.kind==="constant")this.binders[fe]=fr?new Ia(Ke.value,ze):new Ea(Ke.value,ze,ct),re.push(`/u_${fe}`);else if(Ke.kind==="source"||fr){let mr=Gn(fe,ct,"source");this.binders[fe]=fr?new go(Ke,ct,Lt,F,mr,A.id):new yo(Ke,ze,ct,mr),re.push(`/a_${fe}`)}else{let mr=Gn(fe,ct,"composite");this.binders[fe]=new Da(Ke,ze,ct,Lt,F,mr),re.push(`/z_${fe}`)}}this.cacheKey=re.sort().join("")}getMaxValue(A){let F=this.binders[A];return F instanceof yo||F instanceof Da?F.maxValue:0}populatePaintArrays(A,F,W,re,fe){for(let pe in this.binders){let ze=this.binders[pe];(ze instanceof yo||ze instanceof Da||ze instanceof go)&&ze.populatePaintArray(A,F,W,re,fe)}}setConstantPatternPositions(A,F){for(let W in this.binders){let re=this.binders[W];re instanceof Ia&&re.setConstantPatternPositions(A,F)}}updatePaintArrays(A,F,W,re,fe){let pe=!1;for(let ze in A){let Ke=F.getPositions(ze);for(let ct of Ke){let Lt=W.feature(ct.index);for(let $t in this.binders){let fr=this.binders[$t];if((fr instanceof yo||fr instanceof Da||fr instanceof go)&&fr.expression.isStateDependent===!0){let mr=re.paint.get($t);fr.expression=mr.value,fr.updatePaintArray(ct.start,ct.end,Lt,A[ze],fe),pe=!0}}}}return pe}defines(){let A=[];for(let F in this.binders){let W=this.binders[F];(W instanceof Ea||W instanceof Ia)&&A.push(...W.uniformNames.map(re=>`#define HAS_UNIFORM_${re}`))}return A}getBinderAttributes(){let A=[];for(let F in this.binders){let W=this.binders[F];if(W instanceof yo||W instanceof Da)for(let re=0;re<W.paintVertexAttributes.length;re++)A.push(W.paintVertexAttributes[re].name);else if(W instanceof go)for(let re=0;re<nt.members.length;re++)A.push(nt.members[re].name)}return A}getBinderUniforms(){let A=[];for(let F in this.binders){let W=this.binders[F];if(W instanceof Ea||W instanceof Ia||W instanceof Da)for(let re of W.uniformNames)A.push(re)}return A}getPaintVertexBuffers(){return this._buffers}getUniforms(A,F){let W=[];for(let re in this.binders){let fe=this.binders[re];if(fe instanceof Ea||fe instanceof Ia||fe instanceof Da){for(let pe of fe.uniformNames)if(F[pe]){let ze=fe.getBinding(A,F[pe],pe);W.push({name:pe,property:re,binding:ze})}}}return W}setUniforms(A,F,W,re){for(let{name:fe,property:pe,binding:ze}of F)this.binders[pe].setUniform(ze,re,W.get(pe),fe)}updatePaintBuffers(A){this._buffers=[];for(let F in this.binders){let W=this.binders[F];if(A&&W instanceof go){let re=A.fromScale===2?W.zoomInPaintVertexBuffer:W.zoomOutPaintVertexBuffer;re&&this._buffers.push(re)}else(W instanceof yo||W instanceof Da)&&W.paintVertexBuffer&&this._buffers.push(W.paintVertexBuffer)}}upload(A){for(let F in this.binders){let W=this.binders[F];(W instanceof yo||W instanceof Da||W instanceof go)&&W.upload(A)}this.updatePaintBuffers()}destroy(){for(let A in this.binders){let F=this.binders[A];(F instanceof yo||F instanceof Da||F instanceof go)&&F.destroy()}}}class Ms{constructor(A,F,W=()=>!0){this.programConfigurations={};for(let re of A)this.programConfigurations[re.id]=new Is(re,F,W);this.needsUpload=!1,this._featureMap=new Zr,this._bufferOffset=0}populatePaintArrays(A,F,W,re,fe,pe){for(let ze in this.programConfigurations)this.programConfigurations[ze].populatePaintArrays(A,F,re,fe,pe);F.id!==void 0&&this._featureMap.add(F.id,W,this._bufferOffset,A),this._bufferOffset=A,this.needsUpload=!0}updatePaintArrays(A,F,W,re){for(let fe of W)this.needsUpload=this.programConfigurations[fe.id].updatePaintArrays(A,this._featureMap,F,fe,re)||this.needsUpload}get(A){return this.programConfigurations[A]}upload(A){if(this.needsUpload){for(let F in this.programConfigurations)this.programConfigurations[F].upload(A);this.needsUpload=!1}}destroy(){for(let A in this.programConfigurations)this.programConfigurations[A].destroy()}}function Xs(R,A){return{"text-opacity":["opacity"],"icon-opacity":["opacity"],"text-color":["fill_color"],"icon-color":["fill_color"],"text-halo-color":["halo_color"],"icon-halo-color":["halo_color"],"text-halo-blur":["halo_blur"],"icon-halo-blur":["halo_blur"],"text-halo-width":["halo_width"],"icon-halo-width":["halo_width"],"line-gap-width":["gapwidth"],"line-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"],"fill-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"],"fill-extrusion-pattern":["pattern_to","pattern_from","pixel_ratio_to","pixel_ratio_from"]}[R]||[R.replace(`${A}-`,"").replace(/-/g,"_")]}function Gn(R,A,F){let W={color:{source:Ir,composite:cn},number:{source:Sr,composite:Ir}},re=function(fe){return{"line-pattern":{source:Js,composite:Js},"fill-pattern":{source:Js,composite:Js},"fill-extrusion-pattern":{source:Js,composite:Js}}[fe]}(R);return re&&re[F]||W[A][F]}Fi("ConstantBinder",Ea),Fi("CrossFadedConstantBinder",Ia),Fi("SourceExpressionBinder",yo),Fi("CrossFadedCompositeBinder",go),Fi("CompositeExpressionBinder",Da),Fi("ProgramConfiguration",Is,{omit:["_buffers"]}),Fi("ProgramConfigurationSet",Ms);let ja=8192,Fo=Math.pow(2,14)-1,Uo=-Fo-1;function $s(R){let A=ja/R.extent,F=R.loadGeometry();for(let W=0;W<F.length;W++){let re=F[W];for(let fe=0;fe<re.length;fe++){let pe=re[fe],ze=Math.round(pe.x*A),Ke=Math.round(pe.y*A);pe.x=E(ze,Uo,Fo),pe.y=E(Ke,Uo,Fo),(ze<pe.x||ze>pe.x+1||Ke<pe.y||Ke>pe.y+1)&&T("Geometry exceeds allowed extent, reduce your vector tile buffer size")}}return F}function Sl(R,A){return{type:R.type,id:R.id,properties:R.properties,geometry:A?$s(R):[]}}function bu(R,A,F,W,re){R.emplaceBack(2*A+(W+1)/2,2*F+(re+1)/2)}class dl{constructor(A){this.zoom=A.zoom,this.overscaling=A.overscaling,this.layers=A.layers,this.layerIds=this.layers.map(F=>F.id),this.index=A.index,this.hasPattern=!1,this.layoutVertexArray=new To,this.indexArray=new se,this.segments=new Ye,this.programConfigurations=new Ms(A.layers,A.zoom),this.stateDependentLayerIds=this.layers.filter(F=>F.isStateDependent()).map(F=>F.id)}populate(A,F,W){let re=this.layers[0],fe=[],pe=null,ze=!1;re.type==="circle"&&(pe=re.layout.get("circle-sort-key"),ze=!pe.isConstant());for(let{feature:Ke,id:ct,index:Lt,sourceLayerIndex:$t}of A){let fr=this.layers[0]._featureFilter.needGeometry,mr=Sl(Ke,fr);if(!this.layers[0]._featureFilter.filter(new rs(this.zoom),mr,W))continue;let Pr=ze?pe.evaluate(mr,{},W):void 0,zr={id:ct,properties:Ke.properties,type:Ke.type,sourceLayerIndex:$t,index:Lt,geometry:fr?mr.geometry:$s(Ke),patterns:{},sortKey:Pr};fe.push(zr)}ze&&fe.sort((Ke,ct)=>Ke.sortKey-ct.sortKey);for(let Ke of fe){let{geometry:ct,index:Lt,sourceLayerIndex:$t}=Ke,fr=A[Lt].feature;this.addFeature(Ke,ct,Lt,W),F.featureIndex.insert(fr,ct,Lt,$t,this.index)}}update(A,F,W){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(A,F,this.stateDependentLayers,W)}isEmpty(){return this.layoutVertexArray.length===0}uploadPending(){return!this.uploaded||this.programConfigurations.needsUpload}upload(A){this.uploaded||(this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,He),this.indexBuffer=A.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(A),this.uploaded=!0}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy())}addFeature(A,F,W,re){for(let fe of F)for(let pe of fe){let ze=pe.x,Ke=pe.y;if(ze<0||ze>=ja||Ke<0||Ke>=ja)continue;let ct=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray,A.sortKey),Lt=ct.vertexLength;bu(this.layoutVertexArray,ze,Ke,-1,-1),bu(this.layoutVertexArray,ze,Ke,1,-1),bu(this.layoutVertexArray,ze,Ke,1,1),bu(this.layoutVertexArray,ze,Ke,-1,1),this.indexArray.emplaceBack(Lt,Lt+1,Lt+2),this.indexArray.emplaceBack(Lt,Lt+3,Lt+2),ct.vertexLength+=4,ct.primitiveLength+=2}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,A,W,{},re)}}function Sc(R,A){for(let F=0;F<R.length;F++)if(Ui(A,R[F]))return!0;for(let F=0;F<A.length;F++)if(Ui(R,A[F]))return!0;return!!Rr(R,A)}function Me(R,A,F){return!!Ui(R,A)||!!Nr(A,R,F)}function bt(R,A){if(R.length===1)return mi(A,R[0]);for(let F=0;F<A.length;F++){let W=A[F];for(let re=0;re<W.length;re++)if(Ui(R,W[re]))return!0}for(let F=0;F<R.length;F++)if(mi(A,R[F]))return!0;for(let F=0;F<A.length;F++)if(Rr(R,A[F]))return!0;return!1}function zt(R,A,F){if(R.length>1){if(Rr(R,A))return!0;for(let W=0;W<A.length;W++)if(Nr(A[W],R,F))return!0}for(let W=0;W<R.length;W++)if(Nr(R[W],A,F))return!0;return!1}function Rr(R,A){if(R.length===0||A.length===0)return!1;for(let F=0;F<R.length-1;F++){let W=R[F],re=R[F+1];for(let fe=0;fe<A.length-1;fe++)if(jr(W,re,A[fe],A[fe+1]))return!0}return!1}function jr(R,A,F,W){return z(R,F,W)!==z(A,F,W)&&z(R,A,F)!==z(R,A,W)}function Nr(R,A,F){let W=F*F;if(A.length===1)return R.distSqr(A[0])<W;for(let re=1;re<A.length;re++)if(Gr(R,A[re-1],A[re])<W)return!0;return!1}function Gr(R,A,F){let W=A.distSqr(F);if(W===0)return R.distSqr(A);let re=((R.x-A.x)*(F.x-A.x)+(R.y-A.y)*(F.y-A.y))/W;return R.distSqr(re<0?A:re>1?F:F.sub(A)._mult(re)._add(A))}function mi(R,A){let F,W,re,fe=!1;for(let pe=0;pe<R.length;pe++){F=R[pe];for(let ze=0,Ke=F.length-1;ze<F.length;Ke=ze++)W=F[ze],re=F[Ke],W.y>A.y!=re.y>A.y&&A.x<(re.x-W.x)*(A.y-W.y)/(re.y-W.y)+W.x&&(fe=!fe)}return fe}function Ui(R,A){let F=!1;for(let W=0,re=R.length-1;W<R.length;re=W++){let fe=R[W],pe=R[re];fe.y>A.y!=pe.y>A.y&&A.x<(pe.x-fe.x)*(A.y-fe.y)/(pe.y-fe.y)+fe.x&&(F=!F)}return F}function qi(R,A,F){let W=F[0],re=F[2];if(R.x<W.x&&A.x<W.x||R.x>re.x&&A.x>re.x||R.y<W.y&&A.y<W.y||R.y>re.y&&A.y>re.y)return!1;let fe=z(R,A,F[0]);return fe!==z(R,A,F[1])||fe!==z(R,A,F[2])||fe!==z(R,A,F[3])}function Ei(R,A,F){let W=A.paint.get(R).value;return W.kind==="constant"?W.value:F.programConfigurations.get(A.id).getMaxValue(R)}function Hn(R){return Math.sqrt(R[0]*R[0]+R[1]*R[1])}function en(R,A,F,W,re){if(!A[0]&&!A[1])return R;let fe=u.convert(A)._mult(re);F==="viewport"&&fe._rotate(-W);let pe=[];for(let ze=0;ze<R.length;ze++)pe.push(R[ze].sub(fe));return pe}let Wi,si;Fi("CircleBucket",dl,{omit:["layers"]});var Mr={get paint(){return si=si||new ue({"circle-radius":new oo(ce.paint_circle["circle-radius"]),"circle-color":new oo(ce.paint_circle["circle-color"]),"circle-blur":new oo(ce.paint_circle["circle-blur"]),"circle-opacity":new oo(ce.paint_circle["circle-opacity"]),"circle-translate":new Ua(ce.paint_circle["circle-translate"]),"circle-translate-anchor":new Ua(ce.paint_circle["circle-translate-anchor"]),"circle-pitch-scale":new Ua(ce.paint_circle["circle-pitch-scale"]),"circle-pitch-alignment":new Ua(ce.paint_circle["circle-pitch-alignment"]),"circle-stroke-width":new oo(ce.paint_circle["circle-stroke-width"]),"circle-stroke-color":new oo(ce.paint_circle["circle-stroke-color"]),"circle-stroke-opacity":new oo(ce.paint_circle["circle-stroke-opacity"])})},get layout(){return Wi=Wi||new ue({"circle-sort-key":new oo(ce.layout_circle["circle-sort-key"])})}},Yr=1e-6,xi=typeof Float32Array!="undefined"?Float32Array:Array;function Ri(R){return R[0]=1,R[1]=0,R[2]=0,R[3]=0,R[4]=0,R[5]=1,R[6]=0,R[7]=0,R[8]=0,R[9]=0,R[10]=1,R[11]=0,R[12]=0,R[13]=0,R[14]=0,R[15]=1,R}function ci(R,A,F){var W=A[0],re=A[1],fe=A[2],pe=A[3],ze=A[4],Ke=A[5],ct=A[6],Lt=A[7],$t=A[8],fr=A[9],mr=A[10],Pr=A[11],zr=A[12],ui=A[13],yi=A[14],vn=A[15],zi=F[0],un=F[1],Tn=F[2],pa=F[3];return R[0]=zi*W+un*ze+Tn*$t+pa*zr,R[1]=zi*re+un*Ke+Tn*fr+pa*ui,R[2]=zi*fe+un*ct+Tn*mr+pa*yi,R[3]=zi*pe+un*Lt+Tn*Pr+pa*vn,R[4]=(zi=F[4])*W+(un=F[5])*ze+(Tn=F[6])*$t+(pa=F[7])*zr,R[5]=zi*re+un*Ke+Tn*fr+pa*ui,R[6]=zi*fe+un*ct+Tn*mr+pa*yi,R[7]=zi*pe+un*Lt+Tn*Pr+pa*vn,R[8]=(zi=F[8])*W+(un=F[9])*ze+(Tn=F[10])*$t+(pa=F[11])*zr,R[9]=zi*re+un*Ke+Tn*fr+pa*ui,R[10]=zi*fe+un*ct+Tn*mr+pa*yi,R[11]=zi*pe+un*Lt+Tn*Pr+pa*vn,R[12]=(zi=F[12])*W+(un=F[13])*ze+(Tn=F[14])*$t+(pa=F[15])*zr,R[13]=zi*re+un*Ke+Tn*fr+pa*ui,R[14]=zi*fe+un*ct+Tn*mr+pa*yi,R[15]=zi*pe+un*Lt+Tn*Pr+pa*vn,R}Math.hypot||(Math.hypot=function(){for(var R=0,A=arguments.length;A--;)R+=arguments[A]*arguments[A];return Math.sqrt(R)});var an,Zi=ci;function Bn(R,A,F){var W=A[0],re=A[1],fe=A[2],pe=A[3];return R[0]=F[0]*W+F[4]*re+F[8]*fe+F[12]*pe,R[1]=F[1]*W+F[5]*re+F[9]*fe+F[13]*pe,R[2]=F[2]*W+F[6]*re+F[10]*fe+F[14]*pe,R[3]=F[3]*W+F[7]*re+F[11]*fe+F[15]*pe,R}an=new xi(4),xi!=Float32Array&&(an[0]=0,an[1]=0,an[2]=0,an[3]=0);class hi extends B{constructor(A){super(A,Mr)}createBucket(A){return new dl(A)}queryRadius(A){let F=A;return Ei("circle-radius",this,F)+Ei("circle-stroke-width",this,F)+Hn(this.paint.get("circle-translate"))}queryIntersectsFeature(A,F,W,re,fe,pe,ze,Ke){let ct=en(A,this.paint.get("circle-translate"),this.paint.get("circle-translate-anchor"),pe.angle,ze),Lt=this.paint.get("circle-radius").evaluate(F,W)+this.paint.get("circle-stroke-width").evaluate(F,W),$t=this.paint.get("circle-pitch-alignment")==="map",fr=$t?ct:function(Pr,zr){return Pr.map(ui=>li(ui,zr))}(ct,Ke),mr=$t?Lt*ze:Lt;for(let Pr of re)for(let zr of Pr){let ui=$t?zr:li(zr,Ke),yi=mr,vn=Bn([],[zr.x,zr.y,0,1],Ke);if(this.paint.get("circle-pitch-scale")==="viewport"&&this.paint.get("circle-pitch-alignment")==="map"?yi*=vn[3]/pe.cameraToCenterDistance:this.paint.get("circle-pitch-scale")==="map"&&this.paint.get("circle-pitch-alignment")==="viewport"&&(yi*=pe.cameraToCenterDistance/vn[3]),Me(fr,ui,yi))return!0}return!1}}function li(R,A){let F=Bn([],[R.x,R.y,0,1],A);return new u(F[0]/F[3],F[1]/F[3])}class mn extends dl{}let Ji;Fi("HeatmapBucket",mn,{omit:["layers"]});var Vi={get paint(){return Ji=Ji||new ue({"heatmap-radius":new oo(ce.paint_heatmap["heatmap-radius"]),"heatmap-weight":new oo(ce.paint_heatmap["heatmap-weight"]),"heatmap-intensity":new Ua(ce.paint_heatmap["heatmap-intensity"]),"heatmap-color":new Ku(ce.paint_heatmap["heatmap-color"]),"heatmap-opacity":new Ua(ce.paint_heatmap["heatmap-opacity"])})}};function Ni(R,{width:A,height:F},W,re){if(re){if(re instanceof Uint8ClampedArray)re=new Uint8Array(re.buffer);else if(re.length!==A*F*W)throw new RangeError(`mismatched image size. expected: ${re.length} but got: ${A*F*W}`)}else re=new Uint8Array(A*F*W);return R.width=A,R.height=F,R.data=re,R}function pn(R,{width:A,height:F},W){if(A===R.width&&F===R.height)return;let re=Ni({},{width:A,height:F},W);Vn(R,re,{x:0,y:0},{x:0,y:0},{width:Math.min(R.width,A),height:Math.min(R.height,F)},W),R.width=A,R.height=F,R.data=re.data}function Vn(R,A,F,W,re,fe){if(re.width===0||re.height===0)return A;if(re.width>R.width||re.height>R.height||F.x>R.width-re.width||F.y>R.height-re.height)throw new RangeError("out of range source coordinates for image copy");if(re.width>A.width||re.height>A.height||W.x>A.width-re.width||W.y>A.height-re.height)throw new RangeError("out of range destination coordinates for image copy");let pe=R.data,ze=A.data;if(pe===ze)throw new Error("srcData equals dstData, so image is already copied");for(let Ke=0;Ke<re.height;Ke++){let ct=((F.y+Ke)*R.width+F.x)*fe,Lt=((W.y+Ke)*A.width+W.x)*fe;for(let $t=0;$t<re.width*fe;$t++)ze[Lt+$t]=pe[ct+$t]}return A}class na{constructor(A,F){Ni(this,A,1,F)}resize(A){pn(this,A,1)}clone(){return new na({width:this.width,height:this.height},new Uint8Array(this.data))}static copy(A,F,W,re,fe){Vn(A,F,W,re,fe,1)}}class Ki{constructor(A,F){Ni(this,A,4,F)}resize(A){pn(this,A,4)}replace(A,F){F?this.data.set(A):this.data=A instanceof Uint8ClampedArray?new Uint8Array(A.buffer):A}clone(){return new Ki({width:this.width,height:this.height},new Uint8Array(this.data))}static copy(A,F,W,re,fe){Vn(A,F,W,re,fe,4)}}function kn(R){let A={},F=R.resolution||256,W=R.clips?R.clips.length:1,re=R.image||new Ki({width:F,height:W});if(Math.log(F)/Math.LN2%1!=0)throw new Error(`width is not a power of 2 - ${F}`);let fe=(pe,ze,Ke)=>{A[R.evaluationKey]=Ke;let ct=R.expression.evaluate(A);re.data[pe+ze+0]=Math.floor(255*ct.r/ct.a),re.data[pe+ze+1]=Math.floor(255*ct.g/ct.a),re.data[pe+ze+2]=Math.floor(255*ct.b/ct.a),re.data[pe+ze+3]=Math.floor(255*ct.a)};if(R.clips)for(let pe=0,ze=0;pe<W;++pe,ze+=4*F)for(let Ke=0,ct=0;Ke<F;Ke++,ct+=4){let Lt=Ke/(F-1),{start:$t,end:fr}=R.clips[pe];fe(ze,ct,$t*(1-Lt)+fr*Lt)}else for(let pe=0,ze=0;pe<F;pe++,ze+=4)fe(0,ze,pe/(F-1));return re}Fi("AlphaImage",na),Fi("RGBAImage",Ki);let ta="big-fb";class oa extends B{createBucket(A){return new mn(A)}constructor(A){super(A,Vi),this.heatmapFbos=new Map,this._updateColorRamp()}_handleSpecialPaintPropertyUpdate(A){A==="heatmap-color"&&this._updateColorRamp()}_updateColorRamp(){this.colorRamp=kn({expression:this._transitionablePaint._values["heatmap-color"].value.expression,evaluationKey:"heatmapDensity",image:this.colorRamp}),this.colorRampTexture=null}resize(){this.heatmapFbos.has(ta)&&this.heatmapFbos.delete(ta)}queryRadius(){return 0}queryIntersectsFeature(){return!1}hasOffscreenPass(){return this.paint.get("heatmap-opacity")!==0&&this.visibility!=="none"}}let ba;var is={get paint(){return ba=ba||new ue({"hillshade-illumination-direction":new Ua(ce.paint_hillshade["hillshade-illumination-direction"]),"hillshade-illumination-anchor":new Ua(ce.paint_hillshade["hillshade-illumination-anchor"]),"hillshade-exaggeration":new Ua(ce.paint_hillshade["hillshade-exaggeration"]),"hillshade-shadow-color":new Ua(ce.paint_hillshade["hillshade-shadow-color"]),"hillshade-highlight-color":new Ua(ce.paint_hillshade["hillshade-highlight-color"]),"hillshade-accent-color":new Ua(ce.paint_hillshade["hillshade-accent-color"])})}};class Zs extends B{constructor(A){super(A,is)}hasOffscreenPass(){return this.paint.get("hillshade-exaggeration")!==0&&this.visibility!=="none"}}let Va=qe([{name:"a_pos",components:2,type:"Int16"}],4),{members:Ml}=Va;function zo(R,A,F=2){let W=A&&A.length,re=W?A[0]*F:R.length,fe=Qs(R,0,re,F,!0),pe=[];if(!fe||fe.next===fe.prev)return pe;let ze,Ke,ct;if(W&&(fe=function(Lt,$t,fr,mr){let Pr=[];for(let zr=0,ui=$t.length;zr<ui;zr++){let yi=Qs(Lt,$t[zr]*mr,zr<ui-1?$t[zr+1]*mr:Lt.length,mr,!1);yi===yi.next&&(yi.steiner=!0),Pr.push(J(yi))}Pr.sort(ol);for(let zr=0;zr<Pr.length;zr++)fr=io(Pr[zr],fr);return fr}(R,A,fe,F)),R.length>80*F){ze=1/0,Ke=1/0;let Lt=-1/0,$t=-1/0;for(let fr=F;fr<re;fr+=F){let mr=R[fr],Pr=R[fr+1];mr<ze&&(ze=mr),Pr<Ke&&(Ke=Pr),mr>Lt&&(Lt=mr),Pr>$t&&($t=Pr)}ct=Math.max(Lt-ze,$t-Ke),ct=ct!==0?32767/ct:0}return Vl(fe,pe,F,ze,Ke,ct,0),pe}function Qs(R,A,F,W,re){let fe;if(re===function(pe,ze,Ke,ct){let Lt=0;for(let $t=ze,fr=Ke-ct;$t<Ke;$t+=ct)Lt+=(pe[fr]-pe[$t])*(pe[$t+1]+pe[fr+1]),fr=$t;return Lt}(R,A,F,W)>0)for(let pe=A;pe<F;pe+=W)fe=qt(pe/W|0,R[pe],R[pe+1],fe);else for(let pe=F-W;pe>=A;pe-=W)fe=qt(pe/W|0,R[pe],R[pe+1],fe);return fe&&ne(fe,fe.next)&&(Ve(fe),fe=fe.next),fe}function al(R,A){if(!R)return R;A||(A=R);let F,W=R;do if(F=!1,W.steiner||!ne(W,W.next)&&de(W.prev,W,W.next)!==0)W=W.next;else{if(Ve(W),W=A=W.prev,W===W.next)break;F=!0}while(F||W!==A);return A}function Vl(R,A,F,W,re,fe,pe){if(!R)return;!pe&&fe&&function(Ke,ct,Lt,$t){let fr=Ke;do fr.z===0&&(fr.z=D(fr.x,fr.y,ct,Lt,$t)),fr.prevZ=fr.prev,fr.nextZ=fr.next,fr=fr.next;while(fr!==Ke);fr.prevZ.nextZ=null,fr.prevZ=null,function(mr){let Pr,zr=1;do{let ui,yi=mr;mr=null;let vn=null;for(Pr=0;yi;){Pr++;let zi=yi,un=0;for(let pa=0;pa<zr&&(un++,zi=zi.nextZ,zi);pa++);let Tn=zr;for(;un>0||Tn>0&&zi;)un!==0&&(Tn===0||!zi||yi.z<=zi.z)?(ui=yi,yi=yi.nextZ,un--):(ui=zi,zi=zi.nextZ,Tn--),vn?vn.nextZ=ui:mr=ui,ui.prevZ=vn,vn=ui;yi=zi}vn.nextZ=null,zr*=2}while(Pr>1)}(fr)}(R,W,re,fe);let ze=R;for(;R.prev!==R.next;){let Ke=R.prev,ct=R.next;if(fe?Vs(R,W,re,fe):ss(R))A.push(Ke.i,R.i,ct.i),Ve(R),R=ct.next,ze=ct.next;else if((R=ct)===ze){pe?pe===1?Vl(R=Ys(al(R),A),A,F,W,re,fe,2):pe===2&&wa(R,A,F,W,re,fe):Vl(al(R),A,F,W,re,fe,1);break}}}function ss(R){let A=R.prev,F=R,W=R.next;if(de(A,F,W)>=0)return!1;let re=A.x,fe=F.x,pe=W.x,ze=A.y,Ke=F.y,ct=W.y,Lt=re<fe?re<pe?re:pe:fe<pe?fe:pe,$t=ze<Ke?ze<ct?ze:ct:Ke<ct?Ke:ct,fr=re>fe?re>pe?re:pe:fe>pe?fe:pe,mr=ze>Ke?ze>ct?ze:ct:Ke>ct?Ke:ct,Pr=W.next;for(;Pr!==A;){if(Pr.x>=Lt&&Pr.x<=fr&&Pr.y>=$t&&Pr.y<=mr&&q(re,ze,fe,Ke,pe,ct,Pr.x,Pr.y)&&de(Pr.prev,Pr,Pr.next)>=0)return!1;Pr=Pr.next}return!0}function Vs(R,A,F,W){let re=R.prev,fe=R,pe=R.next;if(de(re,fe,pe)>=0)return!1;let ze=re.x,Ke=fe.x,ct=pe.x,Lt=re.y,$t=fe.y,fr=pe.y,mr=ze<Ke?ze<ct?ze:ct:Ke<ct?Ke:ct,Pr=Lt<$t?Lt<fr?Lt:fr:$t<fr?$t:fr,zr=ze>Ke?ze>ct?ze:ct:Ke>ct?Ke:ct,ui=Lt>$t?Lt>fr?Lt:fr:$t>fr?$t:fr,yi=D(mr,Pr,A,F,W),vn=D(zr,ui,A,F,W),zi=R.prevZ,un=R.nextZ;for(;zi&&zi.z>=yi&&un&&un.z<=vn;){if(zi.x>=mr&&zi.x<=zr&&zi.y>=Pr&&zi.y<=ui&&zi!==re&&zi!==pe&&q(ze,Lt,Ke,$t,ct,fr,zi.x,zi.y)&&de(zi.prev,zi,zi.next)>=0||(zi=zi.prevZ,un.x>=mr&&un.x<=zr&&un.y>=Pr&&un.y<=ui&&un!==re&&un!==pe&&q(ze,Lt,Ke,$t,ct,fr,un.x,un.y)&&de(un.prev,un,un.next)>=0))return!1;un=un.nextZ}for(;zi&&zi.z>=yi;){if(zi.x>=mr&&zi.x<=zr&&zi.y>=Pr&&zi.y<=ui&&zi!==re&&zi!==pe&&q(ze,Lt,Ke,$t,ct,fr,zi.x,zi.y)&&de(zi.prev,zi,zi.next)>=0)return!1;zi=zi.prevZ}for(;un&&un.z<=vn;){if(un.x>=mr&&un.x<=zr&&un.y>=Pr&&un.y<=ui&&un!==re&&un!==pe&&q(ze,Lt,Ke,$t,ct,fr,un.x,un.y)&&de(un.prev,un,un.next)>=0)return!1;un=un.nextZ}return!0}function Ys(R,A){let F=R;do{let W=F.prev,re=F.next.next;!ne(W,re)&&we(W,F,F.next,re)&&Xt(W,re)&&Xt(re,W)&&(A.push(W.i,F.i,re.i),Ve(F),Ve(F.next),F=R=re),F=F.next}while(F!==R);return al(F)}function wa(R,A,F,W,re,fe){let pe=R;do{let ze=pe.next.next;for(;ze!==pe.prev;){if(pe.i!==ze.i&&K(pe,ze)){let Ke=hr(pe,ze);return pe=al(pe,pe.next),Ke=al(Ke,Ke.next),Vl(pe,A,F,W,re,fe,0),void Vl(Ke,A,F,W,re,fe,0)}ze=ze.next}pe=pe.next}while(pe!==R)}function ol(R,A){return R.x-A.x}function io(R,A){let F=function(re,fe){let pe=fe,ze=re.x,Ke=re.y,ct,Lt=-1/0;do{if(Ke<=pe.y&&Ke>=pe.next.y&&pe.next.y!==pe.y){let zr=pe.x+(Ke-pe.y)*(pe.next.x-pe.x)/(pe.next.y-pe.y);if(zr<=ze&&zr>Lt&&(Lt=zr,ct=pe.x<pe.next.x?pe:pe.next,zr===ze))return ct}pe=pe.next}while(pe!==fe);if(!ct)return null;let $t=ct,fr=ct.x,mr=ct.y,Pr=1/0;pe=ct;do{if(ze>=pe.x&&pe.x>=fr&&ze!==pe.x&&q(Ke<mr?ze:Lt,Ke,fr,mr,Ke<mr?Lt:ze,Ke,pe.x,pe.y)){let zr=Math.abs(Ke-pe.y)/(ze-pe.x);Xt(pe,re)&&(zr<Pr||zr===Pr&&(pe.x>ct.x||pe.x===ct.x&&Y(ct,pe)))&&(ct=pe,Pr=zr)}pe=pe.next}while(pe!==$t);return ct}(R,A);if(!F)return A;let W=hr(F,R);return al(W,W.next),al(F,F.next)}function Y(R,A){return de(R.prev,R,A.prev)<0&&de(A.next,R,R.next)<0}function D(R,A,F,W,re){return(R=1431655765&((R=858993459&((R=252645135&((R=16711935&((R=(R-F)*re|0)|R<<8))|R<<4))|R<<2))|R<<1))|(A=1431655765&((A=858993459&((A=252645135&((A=16711935&((A=(A-W)*re|0)|A<<8))|A<<4))|A<<2))|A<<1))<<1}function J(R){let A=R,F=R;do(A.x<F.x||A.x===F.x&&A.y<F.y)&&(F=A),A=A.next;while(A!==R);return F}function q(R,A,F,W,re,fe,pe,ze){return(re-pe)*(A-ze)>=(R-pe)*(fe-ze)&&(R-pe)*(W-ze)>=(F-pe)*(A-ze)&&(F-pe)*(fe-ze)>=(re-pe)*(W-ze)}function K(R,A){return R.next.i!==A.i&&R.prev.i!==A.i&&!function(F,W){let re=F;do{if(re.i!==F.i&&re.next.i!==F.i&&re.i!==W.i&&re.next.i!==W.i&&we(re,re.next,F,W))return!0;re=re.next}while(re!==F);return!1}(R,A)&&(Xt(R,A)&&Xt(A,R)&&function(F,W){let re=F,fe=!1,pe=(F.x+W.x)/2,ze=(F.y+W.y)/2;do re.y>ze!=re.next.y>ze&&re.next.y!==re.y&&pe<(re.next.x-re.x)*(ze-re.y)/(re.next.y-re.y)+re.x&&(fe=!fe),re=re.next;while(re!==F);return fe}(R,A)&&(de(R.prev,R,A.prev)||de(R,A.prev,A))||ne(R,A)&&de(R.prev,R,R.next)>0&&de(A.prev,A,A.next)>0)}function de(R,A,F){return(A.y-R.y)*(F.x-A.x)-(A.x-R.x)*(F.y-A.y)}function ne(R,A){return R.x===A.x&&R.y===A.y}function we(R,A,F,W){let re=ft(de(R,A,F)),fe=ft(de(R,A,W)),pe=ft(de(F,W,R)),ze=ft(de(F,W,A));return re!==fe&&pe!==ze||!(re!==0||!Ue(R,F,A))||!(fe!==0||!Ue(R,W,A))||!(pe!==0||!Ue(F,R,W))||!(ze!==0||!Ue(F,A,W))}function Ue(R,A,F){return A.x<=Math.max(R.x,F.x)&&A.x>=Math.min(R.x,F.x)&&A.y<=Math.max(R.y,F.y)&&A.y>=Math.min(R.y,F.y)}function ft(R){return R>0?1:R<0?-1:0}function Xt(R,A){return de(R.prev,R,R.next)<0?de(R,A,R.next)>=0&&de(R,R.prev,A)>=0:de(R,A,R.prev)<0||de(R,R.next,A)<0}function hr(R,A){let F=Qe(R.i,R.x,R.y),W=Qe(A.i,A.x,A.y),re=R.next,fe=A.prev;return R.next=A,A.prev=R,F.next=re,re.prev=F,W.next=F,F.prev=W,fe.next=W,W.prev=fe,W}function qt(R,A,F,W){let re=Qe(R,A,F);return W?(re.next=W.next,re.prev=W,W.next.prev=re,W.next=re):(re.prev=re,re.next=re),re}function Ve(R){R.next.prev=R.prev,R.prev.next=R.next,R.prevZ&&(R.prevZ.nextZ=R.nextZ),R.nextZ&&(R.nextZ.prevZ=R.prevZ)}function Qe(R,A,F){return{i:R,x:A,y:F,prev:null,next:null,z:0,prevZ:null,nextZ:null,steiner:!1}}function at(R,A,F){let W=F.patternDependencies,re=!1;for(let fe of A){let pe=fe.paint.get(`${R}-pattern`);pe.isConstant()||(re=!0);let ze=pe.constantOr(null);ze&&(re=!0,W[ze.to]=!0,W[ze.from]=!0)}return re}function Ct(R,A,F,W,re){let fe=re.patternDependencies;for(let pe of A){let ze=pe.paint.get(`${R}-pattern`).value;if(ze.kind!=="constant"){let Ke=ze.evaluate({zoom:W-1},F,{},re.availableImages),ct=ze.evaluate({zoom:W},F,{},re.availableImages),Lt=ze.evaluate({zoom:W+1},F,{},re.availableImages);Ke=Ke&&Ke.name?Ke.name:Ke,ct=ct&&ct.name?ct.name:ct,Lt=Lt&&Lt.name?Lt.name:Lt,fe[Ke]=!0,fe[ct]=!0,fe[Lt]=!0,F.patterns[pe.id]={min:Ke,mid:ct,max:Lt}}}return F}class Ot{constructor(A){this.zoom=A.zoom,this.overscaling=A.overscaling,this.layers=A.layers,this.layerIds=this.layers.map(F=>F.id),this.index=A.index,this.hasPattern=!1,this.patternFeatures=[],this.layoutVertexArray=new hl,this.indexArray=new se,this.indexArray2=new Te,this.programConfigurations=new Ms(A.layers,A.zoom),this.segments=new Ye,this.segments2=new Ye,this.stateDependentLayerIds=this.layers.filter(F=>F.isStateDependent()).map(F=>F.id)}populate(A,F,W){this.hasPattern=at("fill",this.layers,F);let re=this.layers[0].layout.get("fill-sort-key"),fe=!re.isConstant(),pe=[];for(let{feature:ze,id:Ke,index:ct,sourceLayerIndex:Lt}of A){let $t=this.layers[0]._featureFilter.needGeometry,fr=Sl(ze,$t);if(!this.layers[0]._featureFilter.filter(new rs(this.zoom),fr,W))continue;let mr=fe?re.evaluate(fr,{},W,F.availableImages):void 0,Pr={id:Ke,properties:ze.properties,type:ze.type,sourceLayerIndex:Lt,index:ct,geometry:$t?fr.geometry:$s(ze),patterns:{},sortKey:mr};pe.push(Pr)}fe&&pe.sort((ze,Ke)=>ze.sortKey-Ke.sortKey);for(let ze of pe){let{geometry:Ke,index:ct,sourceLayerIndex:Lt}=ze;if(this.hasPattern){let $t=Ct("fill",this.layers,ze,this.zoom,F);this.patternFeatures.push($t)}else this.addFeature(ze,Ke,ct,W,{});F.featureIndex.insert(A[ct].feature,Ke,ct,Lt,this.index)}}update(A,F,W){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(A,F,this.stateDependentLayers,W)}addFeatures(A,F,W){for(let re of this.patternFeatures)this.addFeature(re,re.geometry,re.index,F,W)}isEmpty(){return this.layoutVertexArray.length===0}uploadPending(){return!this.uploaded||this.programConfigurations.needsUpload}upload(A){this.uploaded||(this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,Ml),this.indexBuffer=A.createIndexBuffer(this.indexArray),this.indexBuffer2=A.createIndexBuffer(this.indexArray2)),this.programConfigurations.upload(A),this.uploaded=!0}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.indexBuffer2.destroy(),this.programConfigurations.destroy(),this.segments.destroy(),this.segments2.destroy())}addFeature(A,F,W,re,fe){for(let pe of Cf(F,500)){let ze=0;for(let mr of pe)ze+=mr.length;let Ke=this.segments.prepareSegment(ze,this.layoutVertexArray,this.indexArray),ct=Ke.vertexLength,Lt=[],$t=[];for(let mr of pe){if(mr.length===0)continue;mr!==pe[0]&&$t.push(Lt.length/2);let Pr=this.segments2.prepareSegment(mr.length,this.layoutVertexArray,this.indexArray2),zr=Pr.vertexLength;this.layoutVertexArray.emplaceBack(mr[0].x,mr[0].y),this.indexArray2.emplaceBack(zr+mr.length-1,zr),Lt.push(mr[0].x),Lt.push(mr[0].y);for(let ui=1;ui<mr.length;ui++)this.layoutVertexArray.emplaceBack(mr[ui].x,mr[ui].y),this.indexArray2.emplaceBack(zr+ui-1,zr+ui),Lt.push(mr[ui].x),Lt.push(mr[ui].y);Pr.vertexLength+=mr.length,Pr.primitiveLength+=mr.length}let fr=zo(Lt,$t);for(let mr=0;mr<fr.length;mr+=3)this.indexArray.emplaceBack(ct+fr[mr],ct+fr[mr+1],ct+fr[mr+2]);Ke.vertexLength+=ze,Ke.primitiveLength+=fr.length/3}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,A,W,fe,re)}}let Rt,Bt;Fi("FillBucket",Ot,{omit:["layers","patternFeatures"]});var Dt={get paint(){return Bt=Bt||new ue({"fill-antialias":new Ua(ce.paint_fill["fill-antialias"]),"fill-opacity":new oo(ce.paint_fill["fill-opacity"]),"fill-color":new oo(ce.paint_fill["fill-color"]),"fill-outline-color":new oo(ce.paint_fill["fill-outline-color"]),"fill-translate":new Ua(ce.paint_fill["fill-translate"]),"fill-translate-anchor":new Ua(ce.paint_fill["fill-translate-anchor"]),"fill-pattern":new Vc(ce.paint_fill["fill-pattern"])})},get layout(){return Rt=Rt||new ue({"fill-sort-key":new oo(ce.layout_fill["fill-sort-key"])})}};class yt extends B{constructor(A){super(A,Dt)}recalculate(A,F){super.recalculate(A,F);let W=this.paint._values["fill-outline-color"];W.value.kind==="constant"&&W.value.value===void 0&&(this.paint._values["fill-outline-color"]=this.paint._values["fill-color"])}createBucket(A){return new Ot(A)}queryRadius(){return Hn(this.paint.get("fill-translate"))}queryIntersectsFeature(A,F,W,re,fe,pe,ze){return bt(en(A,this.paint.get("fill-translate"),this.paint.get("fill-translate-anchor"),pe.angle,ze),re)}isTileClipped(){return!0}}let Pt=qe([{name:"a_pos",components:2,type:"Int16"},{name:"a_normal_ed",components:4,type:"Int16"}],4),ht=qe([{name:"a_centroid",components:2,type:"Int16"}],4),{members:ur}=Pt;var br={},Ur=s,Di=fi;function fi(R,A,F,W,re){this.properties={},this.extent=F,this.type=0,this._pbf=R,this._geometry=-1,this._keys=W,this._values=re,R.readFields(Ti,this,A)}function Ti(R,A,F){R==1?A.id=F.readVarint():R==2?function(W,re){for(var fe=W.readVarint()+W.pos;W.pos<fe;){var pe=re._keys[W.readVarint()],ze=re._values[W.readVarint()];re.properties[pe]=ze}}(F,A):R==3?A.type=F.readVarint():R==4&&(A._geometry=F.pos)}function gn(R){for(var A,F,W=0,re=0,fe=R.length,pe=fe-1;re<fe;pe=re++)W+=((F=R[pe]).x-(A=R[re]).x)*(A.y+F.y);return W}fi.types=["Unknown","Point","LineString","Polygon"],fi.prototype.loadGeometry=function(){var R=this._pbf;R.pos=this._geometry;for(var A,F=R.readVarint()+R.pos,W=1,re=0,fe=0,pe=0,ze=[];R.pos<F;){if(re<=0){var Ke=R.readVarint();W=7&Ke,re=Ke>>3}if(re--,W===1||W===2)fe+=R.readSVarint(),pe+=R.readSVarint(),W===1&&(A&&ze.push(A),A=[]),A.push(new Ur(fe,pe));else{if(W!==7)throw new Error("unknown command "+W);A&&A.push(A[0].clone())}}return A&&ze.push(A),ze},fi.prototype.bbox=function(){var R=this._pbf;R.pos=this._geometry;for(var A=R.readVarint()+R.pos,F=1,W=0,re=0,fe=0,pe=1/0,ze=-1/0,Ke=1/0,ct=-1/0;R.pos<A;){if(W<=0){var Lt=R.readVarint();F=7&Lt,W=Lt>>3}if(W--,F===1||F===2)(re+=R.readSVarint())<pe&&(pe=re),re>ze&&(ze=re),(fe+=R.readSVarint())<Ke&&(Ke=fe),fe>ct&&(ct=fe);else if(F!==7)throw new Error("unknown command "+F)}return[pe,Ke,ze,ct]},fi.prototype.toGeoJSON=function(R,A,F){var W,re,fe=this.extent*Math.pow(2,F),pe=this.extent*R,ze=this.extent*A,Ke=this.loadGeometry(),ct=fi.types[this.type];function Lt(mr){for(var Pr=0;Pr<mr.length;Pr++){var zr=mr[Pr];mr[Pr]=[360*(zr.x+pe)/fe-180,360/Math.PI*Math.atan(Math.exp((180-360*(zr.y+ze)/fe)*Math.PI/180))-90]}}switch(this.type){case 1:var $t=[];for(W=0;W<Ke.length;W++)$t[W]=Ke[W][0];Lt(Ke=$t);break;case 2:for(W=0;W<Ke.length;W++)Lt(Ke[W]);break;case 3:for(Ke=function(mr){var Pr=mr.length;if(Pr<=1)return[mr];for(var zr,ui,yi=[],vn=0;vn<Pr;vn++){var zi=gn(mr[vn]);zi!==0&&(ui===void 0&&(ui=zi<0),ui===zi<0?(zr&&yi.push(zr),zr=[mr[vn]]):zr.push(mr[vn]))}return zr&&yi.push(zr),yi}(Ke),W=0;W<Ke.length;W++)for(re=0;re<Ke[W].length;re++)Lt(Ke[W][re])}Ke.length===1?Ke=Ke[0]:ct="Multi"+ct;var fr={type:"Feature",geometry:{type:ct,coordinates:Ke},properties:this.properties};return"id"in this&&(fr.id=this.id),fr};var rn=Di,Ci=Bi;function Bi(R,A){this.version=1,this.name=null,this.extent=4096,this.length=0,this._pbf=R,this._keys=[],this._values=[],this._features=[],R.readFields(Gi,this,A),this.length=this._features.length}function Gi(R,A,F){R===15?A.version=F.readVarint():R===1?A.name=F.readString():R===5?A.extent=F.readVarint():R===2?A._features.push(F.pos):R===3?A._keys.push(F.readString()):R===4&&A._values.push(function(W){for(var re=null,fe=W.readVarint()+W.pos;W.pos<fe;){var pe=W.readVarint()>>3;re=pe===1?W.readString():pe===2?W.readFloat():pe===3?W.readDouble():pe===4?W.readVarint64():pe===5?W.readVarint():pe===6?W.readSVarint():pe===7?W.readBoolean():null}return re}(F))}Bi.prototype.feature=function(R){if(R<0||R>=this._features.length)throw new Error("feature index out of bounds");this._pbf.pos=this._features[R];var A=this._pbf.readVarint()+this._pbf.pos;return new rn(this._pbf,A,this.extent,this._keys,this._values)};var sn=Ci;function zn(R,A,F){if(R===3){var W=new sn(F,F.readVarint()+F.pos);W.length&&(A[W.name]=W)}}br.VectorTile=function(R,A){this.layers=R.readFields(zn,{},A)},br.VectorTileFeature=Di,br.VectorTileLayer=Ci;let Ja=br.VectorTileFeature.types,co=Math.pow(2,13);function ts(R,A,F,W,re,fe,pe,ze){R.emplaceBack(A,F,2*Math.floor(W*co)+pe,re*co*2,fe*co*2,Math.round(ze))}class so{constructor(A){this.zoom=A.zoom,this.overscaling=A.overscaling,this.layers=A.layers,this.layerIds=this.layers.map(F=>F.id),this.index=A.index,this.hasPattern=!1,this.layoutVertexArray=new Ul,this.centroidVertexArray=new wo,this.indexArray=new se,this.programConfigurations=new Ms(A.layers,A.zoom),this.segments=new Ye,this.stateDependentLayerIds=this.layers.filter(F=>F.isStateDependent()).map(F=>F.id)}populate(A,F,W){this.features=[],this.hasPattern=at("fill-extrusion",this.layers,F);for(let{feature:re,id:fe,index:pe,sourceLayerIndex:ze}of A){let Ke=this.layers[0]._featureFilter.needGeometry,ct=Sl(re,Ke);if(!this.layers[0]._featureFilter.filter(new rs(this.zoom),ct,W))continue;let Lt={id:fe,sourceLayerIndex:ze,index:pe,geometry:Ke?ct.geometry:$s(re),properties:re.properties,type:re.type,patterns:{}};this.hasPattern?this.features.push(Ct("fill-extrusion",this.layers,Lt,this.zoom,F)):this.addFeature(Lt,Lt.geometry,pe,W,{}),F.featureIndex.insert(re,Lt.geometry,pe,ze,this.index,!0)}}addFeatures(A,F,W){for(let re of this.features){let{geometry:fe}=re;this.addFeature(re,fe,re.index,F,W)}}update(A,F,W){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(A,F,this.stateDependentLayers,W)}isEmpty(){return this.layoutVertexArray.length===0&&this.centroidVertexArray.length===0}uploadPending(){return!this.uploaded||this.programConfigurations.needsUpload}upload(A){this.uploaded||(this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,ur),this.centroidVertexBuffer=A.createVertexBuffer(this.centroidVertexArray,ht.members,!0),this.indexBuffer=A.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(A),this.uploaded=!0}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy(),this.centroidVertexBuffer.destroy())}addFeature(A,F,W,re,fe){for(let pe of Cf(F,500)){let ze={x:0,y:0,vertexCount:0},Ke=0;for(let Pr of pe)Ke+=Pr.length;let ct=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray);for(let Pr of pe){if(Pr.length===0||ms(Pr))continue;let zr=0;for(let ui=0;ui<Pr.length;ui++){let yi=Pr[ui];if(ui>=1){let vn=Pr[ui-1];if(!Zo(yi,vn)){ct.vertexLength+4>Ye.MAX_VERTEX_ARRAY_LENGTH&&(ct=this.segments.prepareSegment(4,this.layoutVertexArray,this.indexArray));let zi=yi.sub(vn)._perp()._unit(),un=vn.dist(yi);zr+un>32768&&(zr=0),ts(this.layoutVertexArray,yi.x,yi.y,zi.x,zi.y,0,0,zr),ts(this.layoutVertexArray,yi.x,yi.y,zi.x,zi.y,0,1,zr),ze.x+=2*yi.x,ze.y+=2*yi.y,ze.vertexCount+=2,zr+=un,ts(this.layoutVertexArray,vn.x,vn.y,zi.x,zi.y,0,0,zr),ts(this.layoutVertexArray,vn.x,vn.y,zi.x,zi.y,0,1,zr),ze.x+=2*vn.x,ze.y+=2*vn.y,ze.vertexCount+=2;let Tn=ct.vertexLength;this.indexArray.emplaceBack(Tn,Tn+2,Tn+1),this.indexArray.emplaceBack(Tn+1,Tn+2,Tn+3),ct.vertexLength+=4,ct.primitiveLength+=2}}}}if(ct.vertexLength+Ke>Ye.MAX_VERTEX_ARRAY_LENGTH&&(ct=this.segments.prepareSegment(Ke,this.layoutVertexArray,this.indexArray)),Ja[A.type]!=="Polygon")continue;let Lt=[],$t=[],fr=ct.vertexLength;for(let Pr of pe)if(Pr.length!==0){Pr!==pe[0]&&$t.push(Lt.length/2);for(let zr=0;zr<Pr.length;zr++){let ui=Pr[zr];ts(this.layoutVertexArray,ui.x,ui.y,0,0,1,1,0),ze.x+=ui.x,ze.y+=ui.y,ze.vertexCount+=1,Lt.push(ui.x),Lt.push(ui.y)}}let mr=zo(Lt,$t);for(let Pr=0;Pr<mr.length;Pr+=3)this.indexArray.emplaceBack(fr+mr[Pr],fr+mr[Pr+2],fr+mr[Pr+1]);ct.primitiveLength+=mr.length/3,ct.vertexLength+=Ke;for(let Pr=0;Pr<ze.vertexCount;Pr++){let zr=Math.floor(ze.x/ze.vertexCount),ui=Math.floor(ze.y/ze.vertexCount);this.centroidVertexArray.emplaceBack(zr,ui)}}this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,A,W,fe,re)}}function Zo(R,A){return R.x===A.x&&(R.x<0||R.x>ja)||R.y===A.y&&(R.y<0||R.y>ja)}function ms(R){return R.every(A=>A.x<0)||R.every(A=>A.x>ja)||R.every(A=>A.y<0)||R.every(A=>A.y>ja)}let ou;Fi("FillExtrusionBucket",so,{omit:["layers","features"]});var Cv={get paint(){return ou=ou||new ue({"fill-extrusion-opacity":new Ua(ce["paint_fill-extrusion"]["fill-extrusion-opacity"]),"fill-extrusion-color":new oo(ce["paint_fill-extrusion"]["fill-extrusion-color"]),"fill-extrusion-translate":new Ua(ce["paint_fill-extrusion"]["fill-extrusion-translate"]),"fill-extrusion-translate-anchor":new Ua(ce["paint_fill-extrusion"]["fill-extrusion-translate-anchor"]),"fill-extrusion-pattern":new Vc(ce["paint_fill-extrusion"]["fill-extrusion-pattern"]),"fill-extrusion-height":new oo(ce["paint_fill-extrusion"]["fill-extrusion-height"]),"fill-extrusion-base":new oo(ce["paint_fill-extrusion"]["fill-extrusion-base"]),"fill-extrusion-vertical-gradient":new Ua(ce["paint_fill-extrusion"]["fill-extrusion-vertical-gradient"])})}};class Lv extends B{constructor(A){super(A,Cv)}createBucket(A){return new so(A)}queryRadius(){return Hn(this.paint.get("fill-extrusion-translate"))}is3D(){return!0}queryIntersectsFeature(A,F,W,re,fe,pe,ze,Ke){let ct=en(A,this.paint.get("fill-extrusion-translate"),this.paint.get("fill-extrusion-translate-anchor"),pe.angle,ze),Lt=this.paint.get("fill-extrusion-height").evaluate(F,W),$t=this.paint.get("fill-extrusion-base").evaluate(F,W),fr=function(Pr,zr,ui,yi){let vn=[];for(let zi of Pr){let un=[zi.x,zi.y,0,1];Bn(un,un,zr),vn.push(new u(un[0]/un[3],un[1]/un[3]))}return vn}(ct,Ke),mr=function(Pr,zr,ui,yi){let vn=[],zi=[],un=yi[8]*zr,Tn=yi[9]*zr,pa=yi[10]*zr,ro=yi[11]*zr,Vo=yi[8]*ui,Xa=yi[9]*ui,sa=yi[10]*ui,Mo=yi[11]*ui;for(let fo of Pr){let lo=[],Xn=[];for(let Ro of fo){let uo=Ro.x,$o=Ro.y,Ju=yi[0]*uo+yi[4]*$o+yi[12],qu=yi[1]*uo+yi[5]*$o+yi[13],Mh=yi[2]*uo+yi[6]*$o+yi[14],Qv=yi[3]*uo+yi[7]*$o+yi[15],ld=Mh+pa,Eh=Qv+ro,Gd=Ju+Vo,Hd=qu+Xa,jd=Mh+sa,Af=Qv+Mo,kh=new u((Ju+un)/Eh,(qu+Tn)/Eh);kh.z=ld/Eh,lo.push(kh);let Ed=new u(Gd/Af,Hd/Af);Ed.z=jd/Af,Xn.push(Ed)}vn.push(lo),zi.push(Xn)}return[vn,zi]}(re,$t,Lt,Ke);return function(Pr,zr,ui){let yi=1/0;bt(ui,zr)&&(yi=Kv(ui,zr[0]));for(let vn=0;vn<zr.length;vn++){let zi=zr[vn],un=Pr[vn];for(let Tn=0;Tn<zi.length-1;Tn++){let pa=zi[Tn],ro=[pa,zi[Tn+1],un[Tn+1],un[Tn],pa];Sc(ui,ro)&&(yi=Math.min(yi,Kv(ui,ro)))}}return yi!==1/0&&yi}(mr[0],mr[1],fr)}}function wd(R,A){return R.x*A.x+R.y*A.y}function Kv(R,A){if(R.length===1){let F=0,W=A[F++],re;for(;!re||W.equals(re);)if(re=A[F++],!re)return 1/0;for(;F<A.length;F++){let fe=A[F],pe=R[0],ze=re.sub(W),Ke=fe.sub(W),ct=pe.sub(W),Lt=wd(ze,ze),$t=wd(ze,Ke),fr=wd(Ke,Ke),mr=wd(ct,ze),Pr=wd(ct,Ke),zr=Lt*fr-$t*$t,ui=(fr*mr-$t*Pr)/zr,yi=(Lt*Pr-$t*mr)/zr,vn=W.z*(1-ui-yi)+re.z*ui+fe.z*yi;if(isFinite(vn))return vn}return 1/0}{let F=1/0;for(let W of A)F=Math.min(F,W.z);return F}}let cg=qe([{name:"a_pos_normal",components:2,type:"Int16"},{name:"a_data",components:4,type:"Uint8"}],4),{members:pp}=cg,Td=qe([{name:"a_uv_x",components:1,type:"Float32"},{name:"a_split_index",components:1,type:"Float32"}]),{members:gp}=Td,Vd=br.VectorTileFeature.types,Ad=Math.cos(Math.PI/180*37.5),Pv=Math.pow(2,14)/.5;class Jv{constructor(A){this.zoom=A.zoom,this.overscaling=A.overscaling,this.layers=A.layers,this.layerIds=this.layers.map(F=>F.id),this.index=A.index,this.hasPattern=!1,this.patternFeatures=[],this.lineClipsArray=[],this.gradients={},this.layers.forEach(F=>{this.gradients[F.id]={}}),this.layoutVertexArray=new Lu,this.layoutVertexArray2=new au,this.indexArray=new se,this.programConfigurations=new Ms(A.layers,A.zoom),this.segments=new Ye,this.maxLineLength=0,this.stateDependentLayerIds=this.layers.filter(F=>F.isStateDependent()).map(F=>F.id)}populate(A,F,W){this.hasPattern=at("line",this.layers,F);let re=this.layers[0].layout.get("line-sort-key"),fe=!re.isConstant(),pe=[];for(let{feature:ze,id:Ke,index:ct,sourceLayerIndex:Lt}of A){let $t=this.layers[0]._featureFilter.needGeometry,fr=Sl(ze,$t);if(!this.layers[0]._featureFilter.filter(new rs(this.zoom),fr,W))continue;let mr=fe?re.evaluate(fr,{},W):void 0,Pr={id:Ke,properties:ze.properties,type:ze.type,sourceLayerIndex:Lt,index:ct,geometry:$t?fr.geometry:$s(ze),patterns:{},sortKey:mr};pe.push(Pr)}fe&&pe.sort((ze,Ke)=>ze.sortKey-Ke.sortKey);for(let ze of pe){let{geometry:Ke,index:ct,sourceLayerIndex:Lt}=ze;if(this.hasPattern){let $t=Ct("line",this.layers,ze,this.zoom,F);this.patternFeatures.push($t)}else this.addFeature(ze,Ke,ct,W,{});F.featureIndex.insert(A[ct].feature,Ke,ct,Lt,this.index)}}update(A,F,W){this.stateDependentLayers.length&&this.programConfigurations.updatePaintArrays(A,F,this.stateDependentLayers,W)}addFeatures(A,F,W){for(let re of this.patternFeatures)this.addFeature(re,re.geometry,re.index,F,W)}isEmpty(){return this.layoutVertexArray.length===0}uploadPending(){return!this.uploaded||this.programConfigurations.needsUpload}upload(A){this.uploaded||(this.layoutVertexArray2.length!==0&&(this.layoutVertexBuffer2=A.createVertexBuffer(this.layoutVertexArray2,gp)),this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,pp),this.indexBuffer=A.createIndexBuffer(this.indexArray)),this.programConfigurations.upload(A),this.uploaded=!0}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy())}lineFeatureClips(A){if(A.properties&&Object.prototype.hasOwnProperty.call(A.properties,"mapbox_clip_start")&&Object.prototype.hasOwnProperty.call(A.properties,"mapbox_clip_end"))return{start:+A.properties.mapbox_clip_start,end:+A.properties.mapbox_clip_end}}addFeature(A,F,W,re,fe){let pe=this.layers[0].layout,ze=pe.get("line-join").evaluate(A,{}),Ke=pe.get("line-cap"),ct=pe.get("line-miter-limit"),Lt=pe.get("line-round-limit");this.lineClips=this.lineFeatureClips(A);for(let $t of F)this.addLine($t,A,ze,Ke,ct,Lt);this.programConfigurations.populatePaintArrays(this.layoutVertexArray.length,A,W,fe,re)}addLine(A,F,W,re,fe,pe){if(this.distance=0,this.scaledDistance=0,this.totalDistance=0,this.lineClips){this.lineClipsArray.push(this.lineClips);for(let yi=0;yi<A.length-1;yi++)this.totalDistance+=A[yi].dist(A[yi+1]);this.updateScaledDistance(),this.maxLineLength=Math.max(this.maxLineLength,this.totalDistance)}let ze=Vd[F.type]==="Polygon",Ke=A.length;for(;Ke>=2&&A[Ke-1].equals(A[Ke-2]);)Ke--;let ct=0;for(;ct<Ke-1&&A[ct].equals(A[ct+1]);)ct++;if(Ke<(ze?3:2))return;W==="bevel"&&(fe=1.05);let Lt=this.overscaling<=16?15*ja/(512*this.overscaling):0,$t=this.segments.prepareSegment(10*Ke,this.layoutVertexArray,this.indexArray),fr,mr,Pr,zr,ui;this.e1=this.e2=-1,ze&&(fr=A[Ke-2],ui=A[ct].sub(fr)._unit()._perp());for(let yi=ct;yi<Ke;yi++){if(Pr=yi===Ke-1?ze?A[ct+1]:void 0:A[yi+1],Pr&&A[yi].equals(Pr))continue;ui&&(zr=ui),fr&&(mr=fr),fr=A[yi],ui=Pr?Pr.sub(fr)._unit()._perp():zr,zr=zr||ui;let vn=zr.add(ui);vn.x===0&&vn.y===0||vn._unit();let zi=zr.x*ui.x+zr.y*ui.y,un=vn.x*ui.x+vn.y*ui.y,Tn=un!==0?1/un:1/0,pa=2*Math.sqrt(2-2*un),ro=un<Ad&&mr&&Pr,Vo=zr.x*ui.y-zr.y*ui.x>0;if(ro&&yi>ct){let Mo=fr.dist(mr);if(Mo>2*Lt){let fo=fr.sub(fr.sub(mr)._mult(Lt/Mo)._round());this.updateDistance(mr,fo),this.addCurrentVertex(fo,zr,0,0,$t),mr=fo}}let Xa=mr&&Pr,sa=Xa?W:ze?"butt":re;if(Xa&&sa==="round"&&(Tn<pe?sa="miter":Tn<=2&&(sa="fakeround")),sa==="miter"&&Tn>fe&&(sa="bevel"),sa==="bevel"&&(Tn>2&&(sa="flipbevel"),Tn<fe&&(sa="miter")),mr&&this.updateDistance(mr,fr),sa==="miter")vn._mult(Tn),this.addCurrentVertex(fr,vn,0,0,$t);else if(sa==="flipbevel"){if(Tn>100)vn=ui.mult(-1);else{let Mo=Tn*zr.add(ui).mag()/zr.sub(ui).mag();vn._perp()._mult(Mo*(Vo?-1:1))}this.addCurrentVertex(fr,vn,0,0,$t),this.addCurrentVertex(fr,vn.mult(-1),0,0,$t)}else if(sa==="bevel"||sa==="fakeround"){let Mo=-Math.sqrt(Tn*Tn-1),fo=Vo?Mo:0,lo=Vo?0:Mo;if(mr&&this.addCurrentVertex(fr,zr,fo,lo,$t),sa==="fakeround"){let Xn=Math.round(180*pa/Math.PI/20);for(let Ro=1;Ro<Xn;Ro++){let uo=Ro/Xn;if(uo!==.5){let Ju=uo-.5;uo+=uo*Ju*(uo-1)*((1.0904+zi*(zi*(3.55645-1.43519*zi)-3.2452))*Ju*Ju+(.848013+zi*(.215638*zi-1.06021)))}let $o=ui.sub(zr)._mult(uo)._add(zr)._unit()._mult(Vo?-1:1);this.addHalfVertex(fr,$o.x,$o.y,!1,Vo,0,$t)}}Pr&&this.addCurrentVertex(fr,ui,-fo,-lo,$t)}else if(sa==="butt")this.addCurrentVertex(fr,vn,0,0,$t);else if(sa==="square"){let Mo=mr?1:-1;this.addCurrentVertex(fr,vn,Mo,Mo,$t)}else sa==="round"&&(mr&&(this.addCurrentVertex(fr,zr,0,0,$t),this.addCurrentVertex(fr,zr,1,1,$t,!0)),Pr&&(this.addCurrentVertex(fr,ui,-1,-1,$t,!0),this.addCurrentVertex(fr,ui,0,0,$t)));if(ro&&yi<Ke-1){let Mo=fr.dist(Pr);if(Mo>2*Lt){let fo=fr.add(Pr.sub(fr)._mult(Lt/Mo)._round());this.updateDistance(fr,fo),this.addCurrentVertex(fo,ui,0,0,$t),fr=fo}}}}addCurrentVertex(A,F,W,re,fe,pe=!1){let ze=F.y*re-F.x,Ke=-F.y-F.x*re;this.addHalfVertex(A,F.x+F.y*W,F.y-F.x*W,pe,!1,W,fe),this.addHalfVertex(A,ze,Ke,pe,!0,-re,fe),this.distance>Pv/2&&this.totalDistance===0&&(this.distance=0,this.updateScaledDistance(),this.addCurrentVertex(A,F,W,re,fe,pe))}addHalfVertex({x:A,y:F},W,re,fe,pe,ze,Ke){let ct=.5*(this.lineClips?this.scaledDistance*(Pv-1):this.scaledDistance);this.layoutVertexArray.emplaceBack((A<<1)+(fe?1:0),(F<<1)+(pe?1:0),Math.round(63*W)+128,Math.round(63*re)+128,1+(ze===0?0:ze<0?-1:1)|(63&ct)<<2,ct>>6),this.lineClips&&this.layoutVertexArray2.emplaceBack((this.scaledDistance-this.lineClips.start)/(this.lineClips.end-this.lineClips.start),this.lineClipsArray.length);let Lt=Ke.vertexLength++;this.e1>=0&&this.e2>=0&&(this.indexArray.emplaceBack(this.e1,this.e2,Lt),Ke.primitiveLength++),pe?this.e2=Lt:this.e1=Lt}updateScaledDistance(){this.scaledDistance=this.lineClips?this.lineClips.start+(this.lineClips.end-this.lineClips.start)*this.distance/this.totalDistance:this.distance}updateDistance(A,F){this.distance+=A.dist(F),this.updateScaledDistance()}}let Iv,ay;Fi("LineBucket",Jv,{omit:["layers","patternFeatures"]});var fg={get paint(){return ay=ay||new ue({"line-opacity":new oo(ce.paint_line["line-opacity"]),"line-color":new oo(ce.paint_line["line-color"]),"line-translate":new Ua(ce.paint_line["line-translate"]),"line-translate-anchor":new Ua(ce.paint_line["line-translate-anchor"]),"line-width":new oo(ce.paint_line["line-width"]),"line-gap-width":new oo(ce.paint_line["line-gap-width"]),"line-offset":new oo(ce.paint_line["line-offset"]),"line-blur":new oo(ce.paint_line["line-blur"]),"line-dasharray":new hc(ce.paint_line["line-dasharray"]),"line-pattern":new Vc(ce.paint_line["line-pattern"]),"line-gradient":new Ku(ce.paint_line["line-gradient"])})},get layout(){return Iv=Iv||new ue({"line-cap":new Ua(ce.layout_line["line-cap"]),"line-join":new oo(ce.layout_line["line-join"]),"line-miter-limit":new Ua(ce.layout_line["line-miter-limit"]),"line-round-limit":new Ua(ce.layout_line["line-round-limit"]),"line-sort-key":new oo(ce.layout_line["line-sort-key"])})}};class oh extends oo{possiblyEvaluate(A,F){return F=new rs(Math.floor(F.zoom),{now:F.now,fadeDuration:F.fadeDuration,zoomHistory:F.zoomHistory,transition:F.transition}),super.possiblyEvaluate(A,F)}evaluate(A,F,W,re){return F=L({},F,{zoom:Math.floor(F.zoom)}),super.evaluate(A,F,W,re)}}let hg;class oy extends B{constructor(A){super(A,fg),this.gradientVersion=0,hg||(hg=new oh(fg.paint.properties["line-width"].specification),hg.useIntegerZoom=!0)}_handleSpecialPaintPropertyUpdate(A){if(A==="line-gradient"){let F=this.gradientExpression();this.stepInterpolant=!!function(W){return W._styleExpression!==void 0}(F)&&F._styleExpression.expression instanceof _n,this.gradientVersion=(this.gradientVersion+1)%Number.MAX_SAFE_INTEGER}}gradientExpression(){return this._transitionablePaint._values["line-gradient"].value.expression}recalculate(A,F){super.recalculate(A,F),this.paint._values["line-floorwidth"]=hg.possiblyEvaluate(this._transitioningPaint._values["line-width"].value,A)}createBucket(A){return new Jv(A)}queryRadius(A){let F=A,W=jh(Ei("line-width",this,F),Ei("line-gap-width",this,F)),re=Ei("line-offset",this,F);return W/2+Math.abs(re)+Hn(this.paint.get("line-translate"))}queryIntersectsFeature(A,F,W,re,fe,pe,ze){let Ke=en(A,this.paint.get("line-translate"),this.paint.get("line-translate-anchor"),pe.angle,ze),ct=ze/2*jh(this.paint.get("line-width").evaluate(F,W),this.paint.get("line-gap-width").evaluate(F,W)),Lt=this.paint.get("line-offset").evaluate(F,W);return Lt&&(re=function($t,fr){let mr=[];for(let Pr=0;Pr<$t.length;Pr++){let zr=$t[Pr],ui=[];for(let yi=0;yi<zr.length;yi++){let vn=zr[yi-1],zi=zr[yi],un=zr[yi+1],Tn=yi===0?new u(0,0):zi.sub(vn)._unit()._perp(),pa=yi===zr.length-1?new u(0,0):un.sub(zi)._unit()._perp(),ro=Tn._add(pa)._unit(),Vo=ro.x*pa.x+ro.y*pa.y;Vo!==0&&ro._mult(1/Vo),ui.push(ro._mult(fr)._add(zi))}mr.push(ui)}return mr}(re,Lt*ze)),function($t,fr,mr){for(let Pr=0;Pr<fr.length;Pr++){let zr=fr[Pr];if($t.length>=3){for(let ui=0;ui<zr.length;ui++)if(Ui($t,zr[ui]))return!0}if(zt($t,zr,mr))return!0}return!1}(Ke,re,ct)}isTileClipped(){return!0}}function jh(R,A){return A>0?A+2*R:R}let im=qe([{name:"a_pos_offset",components:4,type:"Int16"},{name:"a_data",components:4,type:"Uint16"},{name:"a_pixeloffset",components:4,type:"Int16"}],4),b1=qe([{name:"a_projected_pos",components:3,type:"Float32"}],4);qe([{name:"a_fade_opacity",components:1,type:"Uint32"}],4);let w1=qe([{name:"a_placed",components:2,type:"Uint8"},{name:"a_shift",components:2,type:"Float32"},{name:"a_box_real",components:2,type:"Int16"}]);qe([{type:"Int16",name:"anchorPointX"},{type:"Int16",name:"anchorPointY"},{type:"Int16",name:"x1"},{type:"Int16",name:"y1"},{type:"Int16",name:"x2"},{type:"Int16",name:"y2"},{type:"Uint32",name:"featureIndex"},{type:"Uint16",name:"sourceLayerIndex"},{type:"Uint16",name:"bucketIndex"}]);let sy=qe([{name:"a_pos",components:2,type:"Int16"},{name:"a_anchor_pos",components:2,type:"Int16"},{name:"a_extrude",components:2,type:"Int16"}],4),nm=qe([{name:"a_pos",components:2,type:"Float32"},{name:"a_radius",components:1,type:"Float32"},{name:"a_flags",components:2,type:"Int16"}],4);function am(R,A,F){return R.sections.forEach(W=>{W.text=function(re,fe,pe){let ze=fe.layout.get("text-transform").evaluate(pe,{});return ze==="uppercase"?re=re.toLocaleUpperCase():ze==="lowercase"&&(re=re.toLocaleLowerCase()),ys.applyArabicShaping&&(re=ys.applyArabicShaping(re)),re}(W.text,A,F)}),R}qe([{name:"triangle",components:3,type:"Uint16"}]),qe([{type:"Int16",name:"anchorX"},{type:"Int16",name:"anchorY"},{type:"Uint16",name:"glyphStartIndex"},{type:"Uint16",name:"numGlyphs"},{type:"Uint32",name:"vertexStartIndex"},{type:"Uint32",name:"lineStartIndex"},{type:"Uint32",name:"lineLength"},{type:"Uint16",name:"segment"},{type:"Uint16",name:"lowerSize"},{type:"Uint16",name:"upperSize"},{type:"Float32",name:"lineOffsetX"},{type:"Float32",name:"lineOffsetY"},{type:"Uint8",name:"writingMode"},{type:"Uint8",name:"placedOrientation"},{type:"Uint8",name:"hidden"},{type:"Uint32",name:"crossTileID"},{type:"Int16",name:"associatedIconIndex"}]),qe([{type:"Int16",name:"anchorX"},{type:"Int16",name:"anchorY"},{type:"Int16",name:"rightJustifiedTextSymbolIndex"},{type:"Int16",name:"centerJustifiedTextSymbolIndex"},{type:"Int16",name:"leftJustifiedTextSymbolIndex"},{type:"Int16",name:"verticalPlacedTextSymbolIndex"},{type:"Int16",name:"placedIconSymbolIndex"},{type:"Int16",name:"verticalPlacedIconSymbolIndex"},{type:"Uint16",name:"key"},{type:"Uint16",name:"textBoxStartIndex"},{type:"Uint16",name:"textBoxEndIndex"},{type:"Uint16",name:"verticalTextBoxStartIndex"},{type:"Uint16",name:"verticalTextBoxEndIndex"},{type:"Uint16",name:"iconBoxStartIndex"},{type:"Uint16",name:"iconBoxEndIndex"},{type:"Uint16",name:"verticalIconBoxStartIndex"},{type:"Uint16",name:"verticalIconBoxEndIndex"},{type:"Uint16",name:"featureIndex"},{type:"Uint16",name:"numHorizontalGlyphVertices"},{type:"Uint16",name:"numVerticalGlyphVertices"},{type:"Uint16",name:"numIconVertices"},{type:"Uint16",name:"numVerticalIconVertices"},{type:"Uint16",name:"useRuntimeCollisionCircles"},{type:"Uint32",name:"crossTileID"},{type:"Float32",name:"textBoxScale"},{type:"Float32",name:"collisionCircleDiameter"},{type:"Uint16",name:"textAnchorOffsetStartIndex"},{type:"Uint16",name:"textAnchorOffsetEndIndex"}]),qe([{type:"Float32",name:"offsetX"}]),qe([{type:"Int16",name:"x"},{type:"Int16",name:"y"},{type:"Int16",name:"tileUnitDistanceFromAnchor"}]),qe([{type:"Uint16",name:"textAnchor"},{type:"Float32",components:2,name:"textOffset"}]);let vc={"!":"\uFE15","#":"\uFF03",$:"\uFF04","%":"\uFF05","&":"\uFF06","(":"\uFE35",")":"\uFE36","*":"\uFF0A","+":"\uFF0B",",":"\uFE10","-":"\uFE32",".":"\u30FB","/":"\uFF0F",":":"\uFE13",";":"\uFE14","<":"\uFE3F","=":"\uFF1D",">":"\uFE40","?":"\uFE16","@":"\uFF20","[":"\uFE47","\\":"\uFF3C","]":"\uFE48","^":"\uFF3E",_:"\uFE33","`":"\uFF40","{":"\uFE37","|":"\u2015","}":"\uFE38","~":"\uFF5E","\xA2":"\uFFE0","\xA3":"\uFFE1","\xA5":"\uFFE5","\xA6":"\uFFE4","\xAC":"\uFFE2","\xAF":"\uFFE3","\u2013":"\uFE32","\u2014":"\uFE31","\u2018":"\uFE43","\u2019":"\uFE44","\u201C":"\uFE41","\u201D":"\uFE42","\u2026":"\uFE19","\u2027":"\u30FB","\u20A9":"\uFFE6","\u3001":"\uFE11","\u3002":"\uFE12","\u3008":"\uFE3F","\u3009":"\uFE40","\u300A":"\uFE3D","\u300B":"\uFE3E","\u300C":"\uFE41","\u300D":"\uFE42","\u300E":"\uFE43","\u300F":"\uFE44","\u3010":"\uFE3B","\u3011":"\uFE3C","\u3014":"\uFE39","\u3015":"\uFE3A","\u3016":"\uFE17","\u3017":"\uFE18","\uFF01":"\uFE15","\uFF08":"\uFE35","\uFF09":"\uFE36","\uFF0C":"\uFE10","\uFF0D":"\uFE32","\uFF0E":"\u30FB","\uFF1A":"\uFE13","\uFF1B":"\uFE14","\uFF1C":"\uFE3F","\uFF1E":"\uFE40","\uFF1F":"\uFE16","\uFF3B":"\uFE47","\uFF3D":"\uFE48","\uFF3F":"\uFE33","\uFF5B":"\uFE37","\uFF5C":"\u2015","\uFF5D":"\uFE38","\uFF5F":"\uFE35","\uFF60":"\uFE36","\uFF61":"\uFE12","\uFF62":"\uFE41","\uFF63":"\uFE42"};var eu=24,Sd=wu,ly=function(R,A,F,W,re){var fe,pe,ze=8*re-W-1,Ke=(1<<ze)-1,ct=Ke>>1,Lt=-7,$t=F?re-1:0,fr=F?-1:1,mr=R[A+$t];for($t+=fr,fe=mr&(1<<-Lt)-1,mr>>=-Lt,Lt+=ze;Lt>0;fe=256*fe+R[A+$t],$t+=fr,Lt-=8);for(pe=fe&(1<<-Lt)-1,fe>>=-Lt,Lt+=W;Lt>0;pe=256*pe+R[A+$t],$t+=fr,Lt-=8);if(fe===0)fe=1-ct;else{if(fe===Ke)return pe?NaN:1/0*(mr?-1:1);pe+=Math.pow(2,W),fe-=ct}return(mr?-1:1)*pe*Math.pow(2,fe-W)},T1=function(R,A,F,W,re,fe){var pe,ze,Ke,ct=8*fe-re-1,Lt=(1<<ct)-1,$t=Lt>>1,fr=re===23?Math.pow(2,-24)-Math.pow(2,-77):0,mr=W?0:fe-1,Pr=W?1:-1,zr=A<0||A===0&&1/A<0?1:0;for(A=Math.abs(A),isNaN(A)||A===1/0?(ze=isNaN(A)?1:0,pe=Lt):(pe=Math.floor(Math.log(A)/Math.LN2),A*(Ke=Math.pow(2,-pe))<1&&(pe--,Ke*=2),(A+=pe+$t>=1?fr/Ke:fr*Math.pow(2,1-$t))*Ke>=2&&(pe++,Ke/=2),pe+$t>=Lt?(ze=0,pe=Lt):pe+$t>=1?(ze=(A*Ke-1)*Math.pow(2,re),pe+=$t):(ze=A*Math.pow(2,$t-1)*Math.pow(2,re),pe=0));re>=8;R[F+mr]=255&ze,mr+=Pr,ze/=256,re-=8);for(pe=pe<<re|ze,ct+=re;ct>0;R[F+mr]=255&pe,mr+=Pr,pe/=256,ct-=8);R[F+mr-Pr]|=128*zr};function wu(R){this.buf=ArrayBuffer.isView&&ArrayBuffer.isView(R)?R:new Uint8Array(R||0),this.pos=0,this.type=0,this.length=this.buf.length}wu.Varint=0,wu.Fixed64=1,wu.Bytes=2,wu.Fixed32=5;var Nx=4294967296,om=1/Nx,kw=typeof TextDecoder=="undefined"?null:new TextDecoder("utf-8");function Rv(R){return R.type===wu.Bytes?R.readVarint()+R.pos:R.pos+1}function sm(R,A,F){return F?4294967296*A+(R>>>0):4294967296*(A>>>0)+(R>>>0)}function Cw(R,A,F){var W=A<=16383?1:A<=2097151?2:A<=268435455?3:Math.floor(Math.log(A)/(7*Math.LN2));F.realloc(W);for(var re=F.pos-1;re>=R;re--)F.buf[re+W]=F.buf[re]}function Ux(R,A){for(var F=0;F<R.length;F++)A.writeVarint(R[F])}function q9(R,A){for(var F=0;F<R.length;F++)A.writeSVarint(R[F])}function B9(R,A){for(var F=0;F<R.length;F++)A.writeFloat(R[F])}function N9(R,A){for(var F=0;F<R.length;F++)A.writeDouble(R[F])}function U9(R,A){for(var F=0;F<R.length;F++)A.writeBoolean(R[F])}function LQ(R,A){for(var F=0;F<R.length;F++)A.writeFixed32(R[F])}function V9(R,A){for(var F=0;F<R.length;F++)A.writeSFixed32(R[F])}function G9(R,A){for(var F=0;F<R.length;F++)A.writeFixed64(R[F])}function H9(R,A){for(var F=0;F<R.length;F++)A.writeSFixed64(R[F])}function uy(R,A){return(R[A]|R[A+1]<<8|R[A+2]<<16)+16777216*R[A+3]}function Vx(R,A,F){R[F]=A,R[F+1]=A>>>8,R[F+2]=A>>>16,R[F+3]=A>>>24}function mC(R,A){return(R[A]|R[A+1]<<8|R[A+2]<<16)+(R[A+3]<<24)}wu.prototype={destroy:function(){this.buf=null},readFields:function(R,A,F){for(F=F||this.length;this.pos<F;){var W=this.readVarint(),re=W>>3,fe=this.pos;this.type=7&W,R(re,A,this),this.pos===fe&&this.skip(W)}return A},readMessage:function(R,A){return this.readFields(R,A,this.readVarint()+this.pos)},readFixed32:function(){var R=uy(this.buf,this.pos);return this.pos+=4,R},readSFixed32:function(){var R=mC(this.buf,this.pos);return this.pos+=4,R},readFixed64:function(){var R=uy(this.buf,this.pos)+uy(this.buf,this.pos+4)*Nx;return this.pos+=8,R},readSFixed64:function(){var R=uy(this.buf,this.pos)+mC(this.buf,this.pos+4)*Nx;return this.pos+=8,R},readFloat:function(){var R=ly(this.buf,this.pos,!0,23,4);return this.pos+=4,R},readDouble:function(){var R=ly(this.buf,this.pos,!0,52,8);return this.pos+=8,R},readVarint:function(R){var A,F,W=this.buf;return A=127&(F=W[this.pos++]),F<128?A:(A|=(127&(F=W[this.pos++]))<<7,F<128?A:(A|=(127&(F=W[this.pos++]))<<14,F<128?A:(A|=(127&(F=W[this.pos++]))<<21,F<128?A:function(re,fe,pe){var ze,Ke,ct=pe.buf;if(ze=(112&(Ke=ct[pe.pos++]))>>4,Ke<128||(ze|=(127&(Ke=ct[pe.pos++]))<<3,Ke<128)||(ze|=(127&(Ke=ct[pe.pos++]))<<10,Ke<128)||(ze|=(127&(Ke=ct[pe.pos++]))<<17,Ke<128)||(ze|=(127&(Ke=ct[pe.pos++]))<<24,Ke<128)||(ze|=(1&(Ke=ct[pe.pos++]))<<31,Ke<128))return sm(re,ze,fe);throw new Error("Expected varint not more than 10 bytes")}(A|=(15&(F=W[this.pos]))<<28,R,this))))},readVarint64:function(){return this.readVarint(!0)},readSVarint:function(){var R=this.readVarint();return R%2==1?(R+1)/-2:R/2},readBoolean:function(){return!!this.readVarint()},readString:function(){var R=this.readVarint()+this.pos,A=this.pos;return this.pos=R,R-A>=12&&kw?function(F,W,re){return kw.decode(F.subarray(W,re))}(this.buf,A,R):function(F,W,re){for(var fe="",pe=W;pe<re;){var ze,Ke,ct,Lt=F[pe],$t=null,fr=Lt>239?4:Lt>223?3:Lt>191?2:1;if(pe+fr>re)break;fr===1?Lt<128&&($t=Lt):fr===2?(192&(ze=F[pe+1]))==128&&($t=(31&Lt)<<6|63&ze)<=127&&($t=null):fr===3?(Ke=F[pe+2],(192&(ze=F[pe+1]))==128&&(192&Ke)==128&&(($t=(15&Lt)<<12|(63&ze)<<6|63&Ke)<=2047||$t>=55296&&$t<=57343)&&($t=null)):fr===4&&(Ke=F[pe+2],ct=F[pe+3],(192&(ze=F[pe+1]))==128&&(192&Ke)==128&&(192&ct)==128&&(($t=(15&Lt)<<18|(63&ze)<<12|(63&Ke)<<6|63&ct)<=65535||$t>=1114112)&&($t=null)),$t===null?($t=65533,fr=1):$t>65535&&($t-=65536,fe+=String.fromCharCode($t>>>10&1023|55296),$t=56320|1023&$t),fe+=String.fromCharCode($t),pe+=fr}return fe}(this.buf,A,R)},readBytes:function(){var R=this.readVarint()+this.pos,A=this.buf.subarray(this.pos,R);return this.pos=R,A},readPackedVarint:function(R,A){if(this.type!==wu.Bytes)return R.push(this.readVarint(A));var F=Rv(this);for(R=R||[];this.pos<F;)R.push(this.readVarint(A));return R},readPackedSVarint:function(R){if(this.type!==wu.Bytes)return R.push(this.readSVarint());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readSVarint());return R},readPackedBoolean:function(R){if(this.type!==wu.Bytes)return R.push(this.readBoolean());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readBoolean());return R},readPackedFloat:function(R){if(this.type!==wu.Bytes)return R.push(this.readFloat());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readFloat());return R},readPackedDouble:function(R){if(this.type!==wu.Bytes)return R.push(this.readDouble());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readDouble());return R},readPackedFixed32:function(R){if(this.type!==wu.Bytes)return R.push(this.readFixed32());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readFixed32());return R},readPackedSFixed32:function(R){if(this.type!==wu.Bytes)return R.push(this.readSFixed32());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readSFixed32());return R},readPackedFixed64:function(R){if(this.type!==wu.Bytes)return R.push(this.readFixed64());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readFixed64());return R},readPackedSFixed64:function(R){if(this.type!==wu.Bytes)return R.push(this.readSFixed64());var A=Rv(this);for(R=R||[];this.pos<A;)R.push(this.readSFixed64());return R},skip:function(R){var A=7&R;if(A===wu.Varint)for(;this.buf[this.pos++]>127;);else if(A===wu.Bytes)this.pos=this.readVarint()+this.pos;else if(A===wu.Fixed32)this.pos+=4;else{if(A!==wu.Fixed64)throw new Error("Unimplemented type: "+A);this.pos+=8}},writeTag:function(R,A){this.writeVarint(R<<3|A)},realloc:function(R){for(var A=this.length||16;A<this.pos+R;)A*=2;if(A!==this.length){var F=new Uint8Array(A);F.set(this.buf),this.buf=F,this.length=A}},finish:function(){return this.length=this.pos,this.pos=0,this.buf.subarray(0,this.length)},writeFixed32:function(R){this.realloc(4),Vx(this.buf,R,this.pos),this.pos+=4},writeSFixed32:function(R){this.realloc(4),Vx(this.buf,R,this.pos),this.pos+=4},writeFixed64:function(R){this.realloc(8),Vx(this.buf,-1&R,this.pos),Vx(this.buf,Math.floor(R*om),this.pos+4),this.pos+=8},writeSFixed64:function(R){this.realloc(8),Vx(this.buf,-1&R,this.pos),Vx(this.buf,Math.floor(R*om),this.pos+4),this.pos+=8},writeVarint:function(R){(R=+R||0)>268435455||R<0?function(A,F){var W,re;if(A>=0?(W=A%4294967296|0,re=A/4294967296|0):(re=~(-A/4294967296),4294967295^(W=~(-A%4294967296))?W=W+1|0:(W=0,re=re+1|0)),A>=18446744073709552e3||A<-18446744073709552e3)throw new Error("Given varint doesn't fit into 10 bytes");F.realloc(10),function(fe,pe,ze){ze.buf[ze.pos++]=127&fe|128,fe>>>=7,ze.buf[ze.pos++]=127&fe|128,fe>>>=7,ze.buf[ze.pos++]=127&fe|128,fe>>>=7,ze.buf[ze.pos++]=127&fe|128,ze.buf[ze.pos]=127&(fe>>>=7)}(W,0,F),function(fe,pe){var ze=(7&fe)<<4;pe.buf[pe.pos++]|=ze|((fe>>>=3)?128:0),fe&&(pe.buf[pe.pos++]=127&fe|((fe>>>=7)?128:0),fe&&(pe.buf[pe.pos++]=127&fe|((fe>>>=7)?128:0),fe&&(pe.buf[pe.pos++]=127&fe|((fe>>>=7)?128:0),fe&&(pe.buf[pe.pos++]=127&fe|((fe>>>=7)?128:0),fe&&(pe.buf[pe.pos++]=127&fe)))))}(re,F)}(R,this):(this.realloc(4),this.buf[this.pos++]=127&R|(R>127?128:0),R<=127||(this.buf[this.pos++]=127&(R>>>=7)|(R>127?128:0),R<=127||(this.buf[this.pos++]=127&(R>>>=7)|(R>127?128:0),R<=127||(this.buf[this.pos++]=R>>>7&127))))},writeSVarint:function(R){this.writeVarint(R<0?2*-R-1:2*R)},writeBoolean:function(R){this.writeVarint(!!R)},writeString:function(R){R=String(R),this.realloc(4*R.length),this.pos++;var A=this.pos;this.pos=function(W,re,fe){for(var pe,ze,Ke=0;Ke<re.length;Ke++){if((pe=re.charCodeAt(Ke))>55295&&pe<57344){if(!ze){pe>56319||Ke+1===re.length?(W[fe++]=239,W[fe++]=191,W[fe++]=189):ze=pe;continue}if(pe<56320){W[fe++]=239,W[fe++]=191,W[fe++]=189,ze=pe;continue}pe=ze-55296<<10|pe-56320|65536,ze=null}else ze&&(W[fe++]=239,W[fe++]=191,W[fe++]=189,ze=null);pe<128?W[fe++]=pe:(pe<2048?W[fe++]=pe>>6|192:(pe<65536?W[fe++]=pe>>12|224:(W[fe++]=pe>>18|240,W[fe++]=pe>>12&63|128),W[fe++]=pe>>6&63|128),W[fe++]=63&pe|128)}return fe}(this.buf,R,this.pos);var F=this.pos-A;F>=128&&Cw(A,F,this),this.pos=A-1,this.writeVarint(F),this.pos+=F},writeFloat:function(R){this.realloc(4),T1(this.buf,R,this.pos,!0,23,4),this.pos+=4},writeDouble:function(R){this.realloc(8),T1(this.buf,R,this.pos,!0,52,8),this.pos+=8},writeBytes:function(R){var A=R.length;this.writeVarint(A),this.realloc(A);for(var F=0;F<A;F++)this.buf[this.pos++]=R[F]},writeRawMessage:function(R,A){this.pos++;var F=this.pos;R(A,this);var W=this.pos-F;W>=128&&Cw(F,W,this),this.pos=F-1,this.writeVarint(W),this.pos+=W},writeMessage:function(R,A,F){this.writeTag(R,wu.Bytes),this.writeRawMessage(A,F)},writePackedVarint:function(R,A){A.length&&this.writeMessage(R,Ux,A)},writePackedSVarint:function(R,A){A.length&&this.writeMessage(R,q9,A)},writePackedBoolean:function(R,A){A.length&&this.writeMessage(R,U9,A)},writePackedFloat:function(R,A){A.length&&this.writeMessage(R,B9,A)},writePackedDouble:function(R,A){A.length&&this.writeMessage(R,N9,A)},writePackedFixed32:function(R,A){A.length&&this.writeMessage(R,LQ,A)},writePackedSFixed32:function(R,A){A.length&&this.writeMessage(R,V9,A)},writePackedFixed64:function(R,A){A.length&&this.writeMessage(R,G9,A)},writePackedSFixed64:function(R,A){A.length&&this.writeMessage(R,H9,A)},writeBytesField:function(R,A){this.writeTag(R,wu.Bytes),this.writeBytes(A)},writeFixed32Field:function(R,A){this.writeTag(R,wu.Fixed32),this.writeFixed32(A)},writeSFixed32Field:function(R,A){this.writeTag(R,wu.Fixed32),this.writeSFixed32(A)},writeFixed64Field:function(R,A){this.writeTag(R,wu.Fixed64),this.writeFixed64(A)},writeSFixed64Field:function(R,A){this.writeTag(R,wu.Fixed64),this.writeSFixed64(A)},writeVarintField:function(R,A){this.writeTag(R,wu.Varint),this.writeVarint(A)},writeSVarintField:function(R,A){this.writeTag(R,wu.Varint),this.writeSVarint(A)},writeStringField:function(R,A){this.writeTag(R,wu.Bytes),this.writeString(A)},writeFloatField:function(R,A){this.writeTag(R,wu.Fixed32),this.writeFloat(A)},writeDoubleField:function(R,A){this.writeTag(R,wu.Fixed64),this.writeDouble(A)},writeBooleanField:function(R,A){this.writeVarintField(R,!!A)}};var nS=o(Sd);let aS=3;function PQ(R,A,F){R===1&&F.readMessage(j9,A)}function j9(R,A,F){if(R===3){let{id:W,bitmap:re,width:fe,height:pe,left:ze,top:Ke,advance:ct}=F.readMessage(yC,{});A.push({id:W,bitmap:new na({width:fe+2*aS,height:pe+2*aS},re),metrics:{width:fe,height:pe,left:ze,top:Ke,advance:ct}})}}function yC(R,A,F){R===1?A.id=F.readVarint():R===2?A.bitmap=F.readBytes():R===3?A.width=F.readVarint():R===4?A.height=F.readVarint():R===5?A.left=F.readSVarint():R===6?A.top=F.readSVarint():R===7&&(A.advance=F.readVarint())}let _C=aS;function oS(R){let A=0,F=0;for(let pe of R)A+=pe.w*pe.h,F=Math.max(F,pe.w);R.sort((pe,ze)=>ze.h-pe.h);let W=[{x:0,y:0,w:Math.max(Math.ceil(Math.sqrt(A/.95)),F),h:1/0}],re=0,fe=0;for(let pe of R)for(let ze=W.length-1;ze>=0;ze--){let Ke=W[ze];if(!(pe.w>Ke.w||pe.h>Ke.h)){if(pe.x=Ke.x,pe.y=Ke.y,fe=Math.max(fe,pe.y+pe.h),re=Math.max(re,pe.x+pe.w),pe.w===Ke.w&&pe.h===Ke.h){let ct=W.pop();ze<W.length&&(W[ze]=ct)}else pe.h===Ke.h?(Ke.x+=pe.w,Ke.w-=pe.w):pe.w===Ke.w?(Ke.y+=pe.h,Ke.h-=pe.h):(W.push({x:Ke.x+pe.w,y:Ke.y,w:Ke.w-pe.w,h:pe.h}),Ke.y+=pe.h,Ke.h-=pe.h);break}}return{w:re,h:fe,fill:A/(re*fe)||0}}let Md=1;class Lw{constructor(A,{pixelRatio:F,version:W,stretchX:re,stretchY:fe,content:pe,textFitWidth:ze,textFitHeight:Ke}){this.paddedRect=A,this.pixelRatio=F,this.stretchX=re,this.stretchY=fe,this.content=pe,this.version=W,this.textFitWidth=ze,this.textFitHeight=Ke}get tl(){return[this.paddedRect.x+Md,this.paddedRect.y+Md]}get br(){return[this.paddedRect.x+this.paddedRect.w-Md,this.paddedRect.y+this.paddedRect.h-Md]}get tlbr(){return this.tl.concat(this.br)}get displaySize(){return[(this.paddedRect.w-2*Md)/this.pixelRatio,(this.paddedRect.h-2*Md)/this.pixelRatio]}}class Pw{constructor(A,F){let W={},re={};this.haveRenderCallbacks=[];let fe=[];this.addImages(A,W,fe),this.addImages(F,re,fe);let{w:pe,h:ze}=oS(fe),Ke=new Ki({width:pe||1,height:ze||1});for(let ct in A){let Lt=A[ct],$t=W[ct].paddedRect;Ki.copy(Lt.data,Ke,{x:0,y:0},{x:$t.x+Md,y:$t.y+Md},Lt.data)}for(let ct in F){let Lt=F[ct],$t=re[ct].paddedRect,fr=$t.x+Md,mr=$t.y+Md,Pr=Lt.data.width,zr=Lt.data.height;Ki.copy(Lt.data,Ke,{x:0,y:0},{x:fr,y:mr},Lt.data),Ki.copy(Lt.data,Ke,{x:0,y:zr-1},{x:fr,y:mr-1},{width:Pr,height:1}),Ki.copy(Lt.data,Ke,{x:0,y:0},{x:fr,y:mr+zr},{width:Pr,height:1}),Ki.copy(Lt.data,Ke,{x:Pr-1,y:0},{x:fr-1,y:mr},{width:1,height:zr}),Ki.copy(Lt.data,Ke,{x:0,y:0},{x:fr+Pr,y:mr},{width:1,height:zr})}this.image=Ke,this.iconPositions=W,this.patternPositions=re}addImages(A,F,W){for(let re in A){let fe=A[re],pe={x:0,y:0,w:fe.data.width+2*Md,h:fe.data.height+2*Md};W.push(pe),F[re]=new Lw(pe,fe),fe.hasRenderCallback&&this.haveRenderCallbacks.push(re)}}patchUpdatedImages(A,F){A.dispatchRenderCallbacks(this.haveRenderCallbacks);for(let W in A.updatedImages)this.patchUpdatedImage(this.iconPositions[W],A.getImage(W),F),this.patchUpdatedImage(this.patternPositions[W],A.getImage(W),F)}patchUpdatedImage(A,F,W){if(!A||!F||A.version===F.version)return;A.version=F.version;let[re,fe]=A.tl;W.update(F.data,void 0,{x:re,y:fe})}}var Dv;Fi("ImagePosition",Lw),Fi("ImageAtlas",Pw),i.ah=void 0,(Dv=i.ah||(i.ah={}))[Dv.none=0]="none",Dv[Dv.horizontal=1]="horizontal",Dv[Dv.vertical=2]="vertical",Dv[Dv.horizontalOnly=3]="horizontalOnly";let Sh=-17;class Gx{constructor(){this.scale=1,this.fontStack="",this.imageName=null}static forText(A,F){let W=new Gx;return W.scale=A||1,W.fontStack=F,W}static forImage(A){let F=new Gx;return F.imageName=A,F}}class A1{constructor(){this.text="",this.sectionIndex=[],this.sections=[],this.imageSectionID=null}static fromFeature(A,F){let W=new A1;for(let re=0;re<A.sections.length;re++){let fe=A.sections[re];fe.image?W.addImageSection(fe):W.addTextSection(fe,F)}return W}length(){return this.text.length}getSection(A){return this.sections[this.sectionIndex[A]]}getSectionIndex(A){return this.sectionIndex[A]}getCharCode(A){return this.text.charCodeAt(A)}verticalizePunctuation(){this.text=function(A){let F="";for(let W=0;W<A.length;W++){let re=A.charCodeAt(W+1)||null,fe=A.charCodeAt(W-1)||null;F+=re&&ml(re)&&!vc[A[W+1]]||fe&&ml(fe)&&!vc[A[W-1]]||!vc[A[W]]?A[W]:vc[A[W]]}return F}(this.text)}trim(){let A=0;for(let W=0;W<this.text.length&&Iw[this.text.charCodeAt(W)];W++)A++;let F=this.text.length;for(let W=this.text.length-1;W>=0&&W>=A&&Iw[this.text.charCodeAt(W)];W--)F--;this.text=this.text.substring(A,F),this.sectionIndex=this.sectionIndex.slice(A,F)}substring(A,F){let W=new A1;return W.text=this.text.substring(A,F),W.sectionIndex=this.sectionIndex.slice(A,F),W.sections=this.sections,W}toString(){return this.text}getMaxScale(){return this.sectionIndex.reduce((A,F)=>Math.max(A,this.sections[F].scale),0)}addTextSection(A,F){this.text+=A.text,this.sections.push(Gx.forText(A.scale,A.fontStack||F));let W=this.sections.length-1;for(let re=0;re<A.text.length;++re)this.sectionIndex.push(W)}addImageSection(A){let F=A.image?A.image.name:"";if(F.length===0)return void T("Can't add FormattedSection with an empty image.");let W=this.getNextImageSectionCharCode();W?(this.text+=String.fromCharCode(W),this.sections.push(Gx.forImage(F)),this.sectionIndex.push(this.sections.length-1)):T("Reached maximum number of images 6401")}getNextImageSectionCharCode(){return this.imageSectionID?this.imageSectionID>=63743?null:++this.imageSectionID:(this.imageSectionID=57344,this.imageSectionID)}}function Hx(R,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr){let zr=A1.fromFeature(R,re),ui;$t===i.ah.vertical&&zr.verticalizePunctuation();let{processBidirectionalText:yi,processStyledBidirectionalText:vn}=ys;if(yi&&zr.sections.length===1){ui=[];let Tn=yi(zr.toString(),S1(zr,ct,fe,A,W,mr));for(let pa of Tn){let ro=new A1;ro.text=pa,ro.sections=zr.sections;for(let Vo=0;Vo<pa.length;Vo++)ro.sectionIndex.push(0);ui.push(ro)}}else if(vn){ui=[];let Tn=vn(zr.text,zr.sectionIndex,S1(zr,ct,fe,A,W,mr));for(let pa of Tn){let ro=new A1;ro.text=pa[0],ro.sectionIndex=pa[1],ro.sections=zr.sections,ui.push(ro)}}else ui=function(Tn,pa){let ro=[],Vo=Tn.text,Xa=0;for(let sa of pa)ro.push(Tn.substring(Xa,sa)),Xa=sa;return Xa<Vo.length&&ro.push(Tn.substring(Xa,Vo.length)),ro}(zr,S1(zr,ct,fe,A,W,mr));let zi=[],un={positionedLines:zi,text:zr.toString(),top:Lt[1],bottom:Lt[1],left:Lt[0],right:Lt[0],writingMode:$t,iconsInText:!1,verticalizable:!1};return function(Tn,pa,ro,Vo,Xa,sa,Mo,fo,lo,Xn,Ro,uo){let $o=0,Ju=Sh,qu=0,Mh=0,Qv=fo==="right"?1:fo==="left"?0:.5,ld=0;for(let Af of Xa){Af.trim();let kh=Af.getMaxScale(),Ed=(kh-1)*eu,ud={positionedGlyphs:[],lineOffset:0};Tn.positionedLines[ld]=ud;let Wh=ud.positionedGlyphs,Df=0;if(!Af.length()){Ju+=sa,++ld;continue}for(let lv=0;lv<Af.length();lv++){let tu=Af.getSection(lv),pc=Af.getSectionIndex(lv),$u=Af.getCharCode(lv),zv=0,ff=null,P1=null,v0=null,Gp=eu,ep=!(lo===i.ah.horizontal||!Ro&&!Ho($u)||Ro&&(Iw[$u]||(Eh=$u,new RegExp("\\p{sc=Arab}","u").test(String.fromCodePoint(Eh)))));if(tu.imageName){let Gc=Vo[tu.imageName];if(!Gc)continue;v0=tu.imageName,Tn.iconsInText=Tn.iconsInText||!0,P1=Gc.paddedRect;let Zf=Gc.displaySize;tu.scale=tu.scale*eu/uo,ff={width:Zf[0],height:Zf[1],left:Md,top:-_C,advance:ep?Zf[1]:Zf[0]},zv=Ed+(eu-Zf[1]*tu.scale),Gp=ff.advance;let tp=ep?Zf[0]*tu.scale-eu*kh:Zf[1]*tu.scale-eu*kh;tp>0&&tp>Df&&(Df=tp)}else{let Gc=ro[tu.fontStack],Zf=Gc&&Gc[$u];if(Zf&&Zf.rect)P1=Zf.rect,ff=Zf.metrics;else{let tp=pa[tu.fontStack],gg=tp&&tp[$u];if(!gg)continue;ff=gg.metrics}zv=(kh-tu.scale)*eu}ep?(Tn.verticalizable=!0,Wh.push({glyph:$u,imageName:v0,x:$o,y:Ju+zv,vertical:ep,scale:tu.scale,fontStack:tu.fontStack,sectionIndex:pc,metrics:ff,rect:P1}),$o+=Gp*tu.scale+Xn):(Wh.push({glyph:$u,imageName:v0,x:$o,y:Ju+zv,vertical:ep,scale:tu.scale,fontStack:tu.fontStack,sectionIndex:pc,metrics:ff,rect:P1}),$o+=ff.advance*tu.scale+Xn)}Wh.length!==0&&(qu=Math.max($o-Xn,qu),lm(Wh,0,Wh.length-1,Qv,Df)),$o=0;let Fv=sa*kh+Df;ud.lineOffset=Math.max(Df,Ed),Ju+=Fv,Mh=Math.max(Fv,Mh),++ld}var Eh;let Gd=Ju-Sh,{horizontalAlign:Hd,verticalAlign:jd}=Dw(Mo);(function(Af,kh,Ed,ud,Wh,Df,Fv,lv,tu){let pc=(kh-Ed)*Wh,$u=0;$u=Df!==Fv?-lv*ud-Sh:(-ud*tu+.5)*Fv;for(let zv of Af)for(let ff of zv.positionedGlyphs)ff.x+=pc,ff.y+=$u})(Tn.positionedLines,Qv,Hd,jd,qu,Mh,sa,Gd,Xa.length),Tn.top+=-jd*Gd,Tn.bottom=Tn.top+Gd,Tn.left+=-Hd*qu,Tn.right=Tn.left+qu}(un,A,F,W,ui,pe,ze,Ke,$t,ct,fr,Pr),!function(Tn){for(let pa of Tn)if(pa.positionedGlyphs.length!==0)return!1;return!0}(zi)&&un}let Iw={9:!0,10:!0,11:!0,12:!0,13:!0,32:!0},W9={10:!0,32:!0,38:!0,41:!0,43:!0,45:!0,47:!0,173:!0,183:!0,8203:!0,8208:!0,8211:!0,8231:!0},X9={40:!0};function xC(R,A,F,W,re,fe){if(A.imageName){let pe=W[A.imageName];return pe?pe.displaySize[0]*A.scale*eu/fe+re:0}{let pe=F[A.fontStack],ze=pe&&pe[R];return ze?ze.metrics.advance*A.scale+re:0}}function bC(R,A,F,W){let re=Math.pow(R-A,2);return W?R<A?re/2:2*re:re+Math.abs(F)*F}function Z9(R,A,F){let W=0;return R===10&&(W-=1e4),F&&(W+=150),R!==40&&R!==65288||(W+=50),A!==41&&A!==65289||(W+=50),W}function Rw(R,A,F,W,re,fe){let pe=null,ze=bC(A,F,re,fe);for(let Ke of W){let ct=bC(A-Ke.x,F,re,fe)+Ke.badness;ct<=ze&&(pe=Ke,ze=ct)}return{index:R,x:A,priorBreak:pe,badness:ze}}function wC(R){return R?wC(R.priorBreak).concat(R.index):[]}function S1(R,A,F,W,re,fe){if(!R)return[];let pe=[],ze=function($t,fr,mr,Pr,zr,ui){let yi=0;for(let vn=0;vn<$t.length();vn++){let zi=$t.getSection(vn);yi+=xC($t.getCharCode(vn),zi,Pr,zr,fr,ui)}return yi/Math.max(1,Math.ceil(yi/mr))}(R,A,F,W,re,fe),Ke=R.text.indexOf("\u200B")>=0,ct=0;for(let $t=0;$t<R.length();$t++){let fr=R.getSection($t),mr=R.getCharCode($t);if(Iw[mr]||(ct+=xC(mr,fr,W,re,A,fe)),$t<R.length()-1){let Pr=!((Lt=mr)<11904)&&(!!Dn["CJK Compatibility Forms"](Lt)||!!Dn["CJK Compatibility"](Lt)||!!Dn["CJK Strokes"](Lt)||!!Dn["CJK Symbols and Punctuation"](Lt)||!!Dn["Enclosed CJK Letters and Months"](Lt)||!!Dn["Halfwidth and Fullwidth Forms"](Lt)||!!Dn["Ideographic Description Characters"](Lt)||!!Dn["Vertical Forms"](Lt)||ws.test(String.fromCodePoint(Lt)));(W9[mr]||Pr||fr.imageName||$t!==R.length()-2&&X9[R.getCharCode($t+1)])&&pe.push(Rw($t+1,ct,ze,pe,Z9(mr,R.getCharCode($t+1),Pr&&Ke),!1))}}var Lt;return wC(Rw(R.length(),ct,ze,pe,0,!0))}function Dw(R){let A=.5,F=.5;switch(R){case"right":case"top-right":case"bottom-right":A=1;break;case"left":case"top-left":case"bottom-left":A=0}switch(R){case"bottom":case"bottom-right":case"bottom-left":F=1;break;case"top":case"top-right":case"top-left":F=0}return{horizontalAlign:A,verticalAlign:F}}function lm(R,A,F,W,re){if(!W&&!re)return;let fe=R[F],pe=(R[F].x+fe.metrics.advance*fe.scale)*W;for(let ze=A;ze<=F;ze++)R[ze].x-=pe,R[ze].y+=re}function jx(R,A,F){let{horizontalAlign:W,verticalAlign:re}=Dw(F),fe=A[0]-R.displaySize[0]*W,pe=A[1]-R.displaySize[1]*re;return{image:R,top:pe,bottom:pe+R.displaySize[1],left:fe,right:fe+R.displaySize[0]}}function TC(R){var A,F;let W=R.left,re=R.top,fe=R.right-W,pe=R.bottom-re,ze=(A=R.image.textFitWidth)!==null&&A!==void 0?A:"stretchOrShrink",Ke=(F=R.image.textFitHeight)!==null&&F!==void 0?F:"stretchOrShrink",ct=(R.image.content[2]-R.image.content[0])/(R.image.content[3]-R.image.content[1]);if(Ke==="proportional"){if(ze==="stretchOnly"&&fe/pe<ct||ze==="proportional"){let Lt=Math.ceil(pe*ct);W*=Lt/fe,fe=Lt}}else if(ze==="proportional"&&Ke==="stretchOnly"&&ct!==0&&fe/pe>ct){let Lt=Math.ceil(fe/ct);re*=Lt/pe,pe=Lt}return{x1:W,y1:re,x2:W+fe,y2:re+pe}}function AC(R,A,F,W,re,fe){let pe=R.image,ze;if(pe.content){let ui=pe.content,yi=pe.pixelRatio||1;ze=[ui[0]/yi,ui[1]/yi,pe.displaySize[0]-ui[2]/yi,pe.displaySize[1]-ui[3]/yi]}let Ke=A.left*fe,ct=A.right*fe,Lt,$t,fr,mr;F==="width"||F==="both"?(mr=re[0]+Ke-W[3],$t=re[0]+ct+W[1]):(mr=re[0]+(Ke+ct-pe.displaySize[0])/2,$t=mr+pe.displaySize[0]);let Pr=A.top*fe,zr=A.bottom*fe;return F==="height"||F==="both"?(Lt=re[1]+Pr-W[0],fr=re[1]+zr+W[2]):(Lt=re[1]+(Pr+zr-pe.displaySize[1])/2,fr=Lt+pe.displaySize[1]),{image:pe,top:Lt,right:$t,bottom:fr,left:mr,collisionPadding:ze}}let Wx=255,d0=128,um=Wx*d0;function SC(R,A){let{expression:F}=A;if(F.kind==="constant")return{kind:"constant",layoutSize:F.evaluate(new rs(R+1))};if(F.kind==="source")return{kind:"source"};{let{zoomStops:W,interpolationType:re}=F,fe=0;for(;fe<W.length&&W[fe]<=R;)fe++;fe=Math.max(0,fe-1);let pe=fe;for(;pe<W.length&&W[pe]<R+1;)pe++;pe=Math.min(W.length-1,pe);let ze=W[fe],Ke=W[pe];return F.kind==="composite"?{kind:"composite",minZoom:ze,maxZoom:Ke,interpolationType:re}:{kind:"camera",minZoom:ze,maxZoom:Ke,minSize:F.evaluate(new rs(ze)),maxSize:F.evaluate(new rs(Ke)),interpolationType:re}}}function sS(R,A,F){let W="never",re=R.get(A);return re?W=re:R.get(F)&&(W="always"),W}let Y9=br.VectorTileFeature.types,K9=[{name:"a_fade_opacity",components:1,type:"Uint8",offset:0}];function Fw(R,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr){let mr=ze?Math.min(um,Math.round(ze[0])):0,Pr=ze?Math.min(um,Math.round(ze[1])):0;R.emplaceBack(A,F,Math.round(32*W),Math.round(32*re),fe,pe,(mr<<1)+(Ke?1:0),Pr,16*ct,16*Lt,256*$t,256*fr)}function lS(R,A,F){R.emplaceBack(A.x,A.y,F),R.emplaceBack(A.x,A.y,F),R.emplaceBack(A.x,A.y,F),R.emplaceBack(A.x,A.y,F)}function uS(R){for(let A of R.sections)if(no(A.text))return!0;return!1}class cS{constructor(A){this.layoutVertexArray=new Ql,this.indexArray=new se,this.programConfigurations=A,this.segments=new Ye,this.dynamicLayoutVertexArray=new dc,this.opacityVertexArray=new Tl,this.hasVisibleVertices=!1,this.placedSymbolArray=new Ta}isEmpty(){return this.layoutVertexArray.length===0&&this.indexArray.length===0&&this.dynamicLayoutVertexArray.length===0&&this.opacityVertexArray.length===0}upload(A,F,W,re){this.isEmpty()||(W&&(this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,im.members),this.indexBuffer=A.createIndexBuffer(this.indexArray,F),this.dynamicLayoutVertexBuffer=A.createVertexBuffer(this.dynamicLayoutVertexArray,b1.members,!0),this.opacityVertexBuffer=A.createVertexBuffer(this.opacityVertexArray,K9,!0),this.opacityVertexBuffer.itemSize=1),(W||re)&&this.programConfigurations.upload(A))}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.programConfigurations.destroy(),this.segments.destroy(),this.dynamicLayoutVertexBuffer.destroy(),this.opacityVertexBuffer.destroy())}}Fi("SymbolBuffers",cS);class cm{constructor(A,F,W){this.layoutVertexArray=new A,this.layoutAttributes=F,this.indexArray=new W,this.segments=new Ye,this.collisionVertexArray=new X}upload(A){this.layoutVertexBuffer=A.createVertexBuffer(this.layoutVertexArray,this.layoutAttributes),this.indexBuffer=A.createIndexBuffer(this.indexArray),this.collisionVertexBuffer=A.createVertexBuffer(this.collisionVertexArray,w1.members,!0)}destroy(){this.layoutVertexBuffer&&(this.layoutVertexBuffer.destroy(),this.indexBuffer.destroy(),this.segments.destroy(),this.collisionVertexBuffer.destroy())}}Fi("CollisionBuffers",cm);class M1{constructor(A){this.collisionBoxArray=A.collisionBoxArray,this.zoom=A.zoom,this.overscaling=A.overscaling,this.layers=A.layers,this.layerIds=this.layers.map(pe=>pe.id),this.index=A.index,this.pixelRatio=A.pixelRatio,this.sourceLayerIndex=A.sourceLayerIndex,this.hasPattern=!1,this.hasRTLText=!1,this.sortKeyRanges=[],this.collisionCircleArray=[],this.placementInvProjMatrix=Ri([]),this.placementViewportMatrix=Ri([]);let F=this.layers[0]._unevaluatedLayout._values;this.textSizeData=SC(this.zoom,F["text-size"]),this.iconSizeData=SC(this.zoom,F["icon-size"]);let W=this.layers[0].layout,re=W.get("symbol-sort-key"),fe=W.get("symbol-z-order");this.canOverlap=sS(W,"text-overlap","text-allow-overlap")!=="never"||sS(W,"icon-overlap","icon-allow-overlap")!=="never"||W.get("text-ignore-placement")||W.get("icon-ignore-placement"),this.sortFeaturesByKey=fe!=="viewport-y"&&!re.isConstant(),this.sortFeaturesByY=(fe==="viewport-y"||fe==="auto"&&!this.sortFeaturesByKey)&&this.canOverlap,W.get("symbol-placement")==="point"&&(this.writingModes=W.get("text-writing-mode").map(pe=>i.ah[pe])),this.stateDependentLayerIds=this.layers.filter(pe=>pe.isStateDependent()).map(pe=>pe.id),this.sourceID=A.sourceID}createArrays(){this.text=new cS(new Ms(this.layers,this.zoom,A=>/^text/.test(A))),this.icon=new cS(new Ms(this.layers,this.zoom,A=>/^icon/.test(A))),this.glyphOffsetArray=new Co,this.lineVertexArray=new Qa,this.symbolInstances=new $a,this.textAnchorOffsets=new Bo}calculateGlyphDependencies(A,F,W,re,fe){for(let pe=0;pe<A.length;pe++)if(F[A.charCodeAt(pe)]=!0,(W||re)&&fe){let ze=vc[A.charAt(pe)];ze&&(F[ze.charCodeAt(0)]=!0)}}populate(A,F,W){let re=this.layers[0],fe=re.layout,pe=fe.get("text-font"),ze=fe.get("text-field"),Ke=fe.get("icon-image"),ct=(ze.value.kind!=="constant"||ze.value.value instanceof ri&&!ze.value.value.isEmpty()||ze.value.value.toString().length>0)&&(pe.value.kind!=="constant"||pe.value.value.length>0),Lt=Ke.value.kind!=="constant"||!!Ke.value.value||Object.keys(Ke.parameters).length>0,$t=fe.get("symbol-sort-key");if(this.features=[],!ct&&!Lt)return;let fr=F.iconDependencies,mr=F.glyphDependencies,Pr=F.availableImages,zr=new rs(this.zoom);for(let{feature:ui,id:yi,index:vn,sourceLayerIndex:zi}of A){let un=re._featureFilter.needGeometry,Tn=Sl(ui,un);if(!re._featureFilter.filter(zr,Tn,W))continue;let pa,ro;if(un||(Tn.geometry=$s(ui)),ct){let Xa=re.getValueAndResolveTokens("text-field",Tn,W,Pr),sa=ri.factory(Xa),Mo=this.hasRTLText=this.hasRTLText||uS(sa);(!Mo||ys.getRTLTextPluginStatus()==="unavailable"||Mo&&ys.isParsed())&&(pa=am(sa,re,Tn))}if(Lt){let Xa=re.getValueAndResolveTokens("icon-image",Tn,W,Pr);ro=Xa instanceof tn?Xa:tn.fromString(Xa)}if(!pa&&!ro)continue;let Vo=this.sortFeaturesByKey?$t.evaluate(Tn,{},W):void 0;if(this.features.push({id:yi,text:pa,icon:ro,index:vn,sourceLayerIndex:zi,geometry:Tn.geometry,properties:ui.properties,type:Y9[ui.type],sortKey:Vo}),ro&&(fr[ro.name]=!0),pa){let Xa=pe.evaluate(Tn,{},W).join(","),sa=fe.get("text-rotation-alignment")!=="viewport"&&fe.get("symbol-placement")!=="point";this.allowVerticalPlacement=this.writingModes&&this.writingModes.indexOf(i.ah.vertical)>=0;for(let Mo of pa.sections)if(Mo.image)fr[Mo.image.name]=!0;else{let fo=Ka(pa.toString()),lo=Mo.fontStack||Xa,Xn=mr[lo]=mr[lo]||{};this.calculateGlyphDependencies(Mo.text,Xn,sa,this.allowVerticalPlacement,fo)}}}fe.get("symbol-placement")==="line"&&(this.features=function(ui){let yi={},vn={},zi=[],un=0;function Tn(Xa){zi.push(ui[Xa]),un++}function pa(Xa,sa,Mo){let fo=vn[Xa];return delete vn[Xa],vn[sa]=fo,zi[fo].geometry[0].pop(),zi[fo].geometry[0]=zi[fo].geometry[0].concat(Mo[0]),fo}function ro(Xa,sa,Mo){let fo=yi[sa];return delete yi[sa],yi[Xa]=fo,zi[fo].geometry[0].shift(),zi[fo].geometry[0]=Mo[0].concat(zi[fo].geometry[0]),fo}function Vo(Xa,sa,Mo){let fo=Mo?sa[0][sa[0].length-1]:sa[0][0];return`${Xa}:${fo.x}:${fo.y}`}for(let Xa=0;Xa<ui.length;Xa++){let sa=ui[Xa],Mo=sa.geometry,fo=sa.text?sa.text.toString():null;if(!fo){Tn(Xa);continue}let lo=Vo(fo,Mo),Xn=Vo(fo,Mo,!0);if(lo in vn&&Xn in yi&&vn[lo]!==yi[Xn]){let Ro=ro(lo,Xn,Mo),uo=pa(lo,Xn,zi[Ro].geometry);delete yi[lo],delete vn[Xn],vn[Vo(fo,zi[uo].geometry,!0)]=uo,zi[Ro].geometry=null}else lo in vn?pa(lo,Xn,Mo):Xn in yi?ro(lo,Xn,Mo):(Tn(Xa),yi[lo]=un-1,vn[Xn]=un-1)}return zi.filter(Xa=>Xa.geometry)}(this.features)),this.sortFeaturesByKey&&this.features.sort((ui,yi)=>ui.sortKey-yi.sortKey)}update(A,F,W){this.stateDependentLayers.length&&(this.text.programConfigurations.updatePaintArrays(A,F,this.layers,W),this.icon.programConfigurations.updatePaintArrays(A,F,this.layers,W))}isEmpty(){return this.symbolInstances.length===0&&!this.hasRTLText}uploadPending(){return!this.uploaded||this.text.programConfigurations.needsUpload||this.icon.programConfigurations.needsUpload}upload(A){!this.uploaded&&this.hasDebugData()&&(this.textCollisionBox.upload(A),this.iconCollisionBox.upload(A)),this.text.upload(A,this.sortFeaturesByY,!this.uploaded,this.text.programConfigurations.needsUpload),this.icon.upload(A,this.sortFeaturesByY,!this.uploaded,this.icon.programConfigurations.needsUpload),this.uploaded=!0}destroyDebugData(){this.textCollisionBox.destroy(),this.iconCollisionBox.destroy()}destroy(){this.text.destroy(),this.icon.destroy(),this.hasDebugData()&&this.destroyDebugData()}addToLineVertexArray(A,F){let W=this.lineVertexArray.length;if(A.segment!==void 0){let re=A.dist(F[A.segment+1]),fe=A.dist(F[A.segment]),pe={};for(let ze=A.segment+1;ze<F.length;ze++)pe[ze]={x:F[ze].x,y:F[ze].y,tileUnitDistanceFromAnchor:re},ze<F.length-1&&(re+=F[ze+1].dist(F[ze]));for(let ze=A.segment||0;ze>=0;ze--)pe[ze]={x:F[ze].x,y:F[ze].y,tileUnitDistanceFromAnchor:fe},ze>0&&(fe+=F[ze-1].dist(F[ze]));for(let ze=0;ze<F.length;ze++){let Ke=pe[ze];this.lineVertexArray.emplaceBack(Ke.x,Ke.y,Ke.tileUnitDistanceFromAnchor)}}return{lineStartIndex:W,lineLength:this.lineVertexArray.length-W}}addSymbols(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr){let mr=A.indexArray,Pr=A.layoutVertexArray,zr=A.segments.prepareSegment(4*F.length,Pr,mr,this.canOverlap?pe.sortKey:void 0),ui=this.glyphOffsetArray.length,yi=zr.vertexLength,vn=this.allowVerticalPlacement&&ze===i.ah.vertical?Math.PI/2:0,zi=pe.text&&pe.text.sections;for(let un=0;un<F.length;un++){let{tl:Tn,tr:pa,bl:ro,br:Vo,tex:Xa,pixelOffsetTL:sa,pixelOffsetBR:Mo,minFontScaleX:fo,minFontScaleY:lo,glyphOffset:Xn,isSDF:Ro,sectionIndex:uo}=F[un],$o=zr.vertexLength,Ju=Xn[1];Fw(Pr,Ke.x,Ke.y,Tn.x,Ju+Tn.y,Xa.x,Xa.y,W,Ro,sa.x,sa.y,fo,lo),Fw(Pr,Ke.x,Ke.y,pa.x,Ju+pa.y,Xa.x+Xa.w,Xa.y,W,Ro,Mo.x,sa.y,fo,lo),Fw(Pr,Ke.x,Ke.y,ro.x,Ju+ro.y,Xa.x,Xa.y+Xa.h,W,Ro,sa.x,Mo.y,fo,lo),Fw(Pr,Ke.x,Ke.y,Vo.x,Ju+Vo.y,Xa.x+Xa.w,Xa.y+Xa.h,W,Ro,Mo.x,Mo.y,fo,lo),lS(A.dynamicLayoutVertexArray,Ke,vn),mr.emplaceBack($o,$o+1,$o+2),mr.emplaceBack($o+1,$o+2,$o+3),zr.vertexLength+=4,zr.primitiveLength+=2,this.glyphOffsetArray.emplaceBack(Xn[0]),un!==F.length-1&&uo===F[un+1].sectionIndex||A.programConfigurations.populatePaintArrays(Pr.length,pe,pe.index,{},fr,zi&&zi[uo])}A.placedSymbolArray.emplaceBack(Ke.x,Ke.y,ui,this.glyphOffsetArray.length-ui,yi,ct,Lt,Ke.segment,W?W[0]:0,W?W[1]:0,re[0],re[1],ze,0,!1,0,$t)}_addCollisionDebugVertex(A,F,W,re,fe,pe){return F.emplaceBack(0,0),A.emplaceBack(W.x,W.y,re,fe,Math.round(pe.x),Math.round(pe.y))}addCollisionDebugVertices(A,F,W,re,fe,pe,ze){let Ke=fe.segments.prepareSegment(4,fe.layoutVertexArray,fe.indexArray),ct=Ke.vertexLength,Lt=fe.layoutVertexArray,$t=fe.collisionVertexArray,fr=ze.anchorX,mr=ze.anchorY;this._addCollisionDebugVertex(Lt,$t,pe,fr,mr,new u(A,F)),this._addCollisionDebugVertex(Lt,$t,pe,fr,mr,new u(W,F)),this._addCollisionDebugVertex(Lt,$t,pe,fr,mr,new u(W,re)),this._addCollisionDebugVertex(Lt,$t,pe,fr,mr,new u(A,re)),Ke.vertexLength+=4;let Pr=fe.indexArray;Pr.emplaceBack(ct,ct+1),Pr.emplaceBack(ct+1,ct+2),Pr.emplaceBack(ct+2,ct+3),Pr.emplaceBack(ct+3,ct),Ke.primitiveLength+=4}addDebugCollisionBoxes(A,F,W,re){for(let fe=A;fe<F;fe++){let pe=this.collisionBoxArray.get(fe);this.addCollisionDebugVertices(pe.x1,pe.y1,pe.x2,pe.y2,re?this.textCollisionBox:this.iconCollisionBox,pe.anchorPoint,W)}}generateCollisionDebugBuffers(){this.hasDebugData()&&this.destroyDebugData(),this.textCollisionBox=new cm(Al,sy.members,Te),this.iconCollisionBox=new cm(Al,sy.members,Te);for(let A=0;A<this.symbolInstances.length;A++){let F=this.symbolInstances.get(A);this.addDebugCollisionBoxes(F.textBoxStartIndex,F.textBoxEndIndex,F,!0),this.addDebugCollisionBoxes(F.verticalTextBoxStartIndex,F.verticalTextBoxEndIndex,F,!0),this.addDebugCollisionBoxes(F.iconBoxStartIndex,F.iconBoxEndIndex,F,!1),this.addDebugCollisionBoxes(F.verticalIconBoxStartIndex,F.verticalIconBoxEndIndex,F,!1)}}_deserializeCollisionBoxesForSymbol(A,F,W,re,fe,pe,ze,Ke,ct){let Lt={};for(let $t=F;$t<W;$t++){let fr=A.get($t);Lt.textBox={x1:fr.x1,y1:fr.y1,x2:fr.x2,y2:fr.y2,anchorPointX:fr.anchorPointX,anchorPointY:fr.anchorPointY},Lt.textFeatureIndex=fr.featureIndex;break}for(let $t=re;$t<fe;$t++){let fr=A.get($t);Lt.verticalTextBox={x1:fr.x1,y1:fr.y1,x2:fr.x2,y2:fr.y2,anchorPointX:fr.anchorPointX,anchorPointY:fr.anchorPointY},Lt.verticalTextFeatureIndex=fr.featureIndex;break}for(let $t=pe;$t<ze;$t++){let fr=A.get($t);Lt.iconBox={x1:fr.x1,y1:fr.y1,x2:fr.x2,y2:fr.y2,anchorPointX:fr.anchorPointX,anchorPointY:fr.anchorPointY},Lt.iconFeatureIndex=fr.featureIndex;break}for(let $t=Ke;$t<ct;$t++){let fr=A.get($t);Lt.verticalIconBox={x1:fr.x1,y1:fr.y1,x2:fr.x2,y2:fr.y2,anchorPointX:fr.anchorPointX,anchorPointY:fr.anchorPointY},Lt.verticalIconFeatureIndex=fr.featureIndex;break}return Lt}deserializeCollisionBoxes(A){this.collisionArrays=[];for(let F=0;F<this.symbolInstances.length;F++){let W=this.symbolInstances.get(F);this.collisionArrays.push(this._deserializeCollisionBoxesForSymbol(A,W.textBoxStartIndex,W.textBoxEndIndex,W.verticalTextBoxStartIndex,W.verticalTextBoxEndIndex,W.iconBoxStartIndex,W.iconBoxEndIndex,W.verticalIconBoxStartIndex,W.verticalIconBoxEndIndex))}}hasTextData(){return this.text.segments.get().length>0}hasIconData(){return this.icon.segments.get().length>0}hasDebugData(){return this.textCollisionBox&&this.iconCollisionBox}hasTextCollisionBoxData(){return this.hasDebugData()&&this.textCollisionBox.segments.get().length>0}hasIconCollisionBoxData(){return this.hasDebugData()&&this.iconCollisionBox.segments.get().length>0}addIndicesForPlacedSymbol(A,F){let W=A.placedSymbolArray.get(F),re=W.vertexStartIndex+4*W.numGlyphs;for(let fe=W.vertexStartIndex;fe<re;fe+=4)A.indexArray.emplaceBack(fe,fe+1,fe+2),A.indexArray.emplaceBack(fe+1,fe+2,fe+3)}getSortedSymbolIndexes(A){if(this.sortedAngle===A&&this.symbolInstanceIndexes!==void 0)return this.symbolInstanceIndexes;let F=Math.sin(A),W=Math.cos(A),re=[],fe=[],pe=[];for(let ze=0;ze<this.symbolInstances.length;++ze){pe.push(ze);let Ke=this.symbolInstances.get(ze);re.push(0|Math.round(F*Ke.anchorX+W*Ke.anchorY)),fe.push(Ke.featureIndex)}return pe.sort((ze,Ke)=>re[ze]-re[Ke]||fe[Ke]-fe[ze]),pe}addToSortKeyRanges(A,F){let W=this.sortKeyRanges[this.sortKeyRanges.length-1];W&&W.sortKey===F?W.symbolInstanceEnd=A+1:this.sortKeyRanges.push({sortKey:F,symbolInstanceStart:A,symbolInstanceEnd:A+1})}sortFeatures(A){if(this.sortFeaturesByY&&this.sortedAngle!==A&&!(this.text.segments.get().length>1||this.icon.segments.get().length>1)){this.symbolInstanceIndexes=this.getSortedSymbolIndexes(A),this.sortedAngle=A,this.text.indexArray.clear(),this.icon.indexArray.clear(),this.featureSortOrder=[];for(let F of this.symbolInstanceIndexes){let W=this.symbolInstances.get(F);this.featureSortOrder.push(W.featureIndex),[W.rightJustifiedTextSymbolIndex,W.centerJustifiedTextSymbolIndex,W.leftJustifiedTextSymbolIndex].forEach((re,fe,pe)=>{re>=0&&pe.indexOf(re)===fe&&this.addIndicesForPlacedSymbol(this.text,re)}),W.verticalPlacedTextSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.text,W.verticalPlacedTextSymbolIndex),W.placedIconSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.icon,W.placedIconSymbolIndex),W.verticalPlacedIconSymbolIndex>=0&&this.addIndicesForPlacedSymbol(this.icon,W.verticalPlacedIconSymbolIndex)}this.text.indexBuffer&&this.text.indexBuffer.updateData(this.text.indexArray),this.icon.indexBuffer&&this.icon.indexBuffer.updateData(this.icon.indexArray)}}}let Tf,Xx;Fi("SymbolBucket",M1,{omit:["layers","collisionBoxArray","features","compareText"]}),M1.MAX_GLYPHS=65535,M1.addDynamicAttributes=lS;var zw={get paint(){return Xx=Xx||new ue({"icon-opacity":new oo(ce.paint_symbol["icon-opacity"]),"icon-color":new oo(ce.paint_symbol["icon-color"]),"icon-halo-color":new oo(ce.paint_symbol["icon-halo-color"]),"icon-halo-width":new oo(ce.paint_symbol["icon-halo-width"]),"icon-halo-blur":new oo(ce.paint_symbol["icon-halo-blur"]),"icon-translate":new Ua(ce.paint_symbol["icon-translate"]),"icon-translate-anchor":new Ua(ce.paint_symbol["icon-translate-anchor"]),"text-opacity":new oo(ce.paint_symbol["text-opacity"]),"text-color":new oo(ce.paint_symbol["text-color"],{runtimeType:er,getOverride:R=>R.textColor,hasOverride:R=>!!R.textColor}),"text-halo-color":new oo(ce.paint_symbol["text-halo-color"]),"text-halo-width":new oo(ce.paint_symbol["text-halo-width"]),"text-halo-blur":new oo(ce.paint_symbol["text-halo-blur"]),"text-translate":new Ua(ce.paint_symbol["text-translate"]),"text-translate-anchor":new Ua(ce.paint_symbol["text-translate-anchor"])})},get layout(){return Tf=Tf||new ue({"symbol-placement":new Ua(ce.layout_symbol["symbol-placement"]),"symbol-spacing":new Ua(ce.layout_symbol["symbol-spacing"]),"symbol-avoid-edges":new Ua(ce.layout_symbol["symbol-avoid-edges"]),"symbol-sort-key":new oo(ce.layout_symbol["symbol-sort-key"]),"symbol-z-order":new Ua(ce.layout_symbol["symbol-z-order"]),"icon-allow-overlap":new Ua(ce.layout_symbol["icon-allow-overlap"]),"icon-overlap":new Ua(ce.layout_symbol["icon-overlap"]),"icon-ignore-placement":new Ua(ce.layout_symbol["icon-ignore-placement"]),"icon-optional":new Ua(ce.layout_symbol["icon-optional"]),"icon-rotation-alignment":new Ua(ce.layout_symbol["icon-rotation-alignment"]),"icon-size":new oo(ce.layout_symbol["icon-size"]),"icon-text-fit":new Ua(ce.layout_symbol["icon-text-fit"]),"icon-text-fit-padding":new Ua(ce.layout_symbol["icon-text-fit-padding"]),"icon-image":new oo(ce.layout_symbol["icon-image"]),"icon-rotate":new oo(ce.layout_symbol["icon-rotate"]),"icon-padding":new oo(ce.layout_symbol["icon-padding"]),"icon-keep-upright":new Ua(ce.layout_symbol["icon-keep-upright"]),"icon-offset":new oo(ce.layout_symbol["icon-offset"]),"icon-anchor":new oo(ce.layout_symbol["icon-anchor"]),"icon-pitch-alignment":new Ua(ce.layout_symbol["icon-pitch-alignment"]),"text-pitch-alignment":new Ua(ce.layout_symbol["text-pitch-alignment"]),"text-rotation-alignment":new Ua(ce.layout_symbol["text-rotation-alignment"]),"text-field":new oo(ce.layout_symbol["text-field"]),"text-font":new oo(ce.layout_symbol["text-font"]),"text-size":new oo(ce.layout_symbol["text-size"]),"text-max-width":new oo(ce.layout_symbol["text-max-width"]),"text-line-height":new Ua(ce.layout_symbol["text-line-height"]),"text-letter-spacing":new oo(ce.layout_symbol["text-letter-spacing"]),"text-justify":new oo(ce.layout_symbol["text-justify"]),"text-radial-offset":new oo(ce.layout_symbol["text-radial-offset"]),"text-variable-anchor":new Ua(ce.layout_symbol["text-variable-anchor"]),"text-variable-anchor-offset":new oo(ce.layout_symbol["text-variable-anchor-offset"]),"text-anchor":new oo(ce.layout_symbol["text-anchor"]),"text-max-angle":new Ua(ce.layout_symbol["text-max-angle"]),"text-writing-mode":new Ua(ce.layout_symbol["text-writing-mode"]),"text-rotate":new oo(ce.layout_symbol["text-rotate"]),"text-padding":new Ua(ce.layout_symbol["text-padding"]),"text-keep-upright":new Ua(ce.layout_symbol["text-keep-upright"]),"text-transform":new oo(ce.layout_symbol["text-transform"]),"text-offset":new oo(ce.layout_symbol["text-offset"]),"text-allow-overlap":new Ua(ce.layout_symbol["text-allow-overlap"]),"text-overlap":new Ua(ce.layout_symbol["text-overlap"]),"text-ignore-placement":new Ua(ce.layout_symbol["text-ignore-placement"]),"text-optional":new Ua(ce.layout_symbol["text-optional"])})}};class Zx{constructor(A){if(A.property.overrides===void 0)throw new Error("overrides must be provided to instantiate FormatSectionOverride class");this.type=A.property.overrides?A.property.overrides.runtimeType:Vt,this.defaultValue=A}evaluate(A){if(A.formattedSection){let F=this.defaultValue.property.overrides;if(F&&F.hasOverride(A.formattedSection))return F.getOverride(A.formattedSection)}return A.feature&&A.featureState?this.defaultValue.evaluate(A.feature,A.featureState):this.defaultValue.property.specification.default}eachChild(A){this.defaultValue.isConstant()||A(this.defaultValue.value._styleExpression.expression)}outputDefined(){return!1}serialize(){return null}}Fi("FormatSectionOverride",Zx,{omit:["defaultValue"]});class cy extends B{constructor(A){super(A,zw)}recalculate(A,F){if(super.recalculate(A,F),this.layout.get("icon-rotation-alignment")==="auto"&&(this.layout._values["icon-rotation-alignment"]=this.layout.get("symbol-placement")!=="point"?"map":"viewport"),this.layout.get("text-rotation-alignment")==="auto"&&(this.layout._values["text-rotation-alignment"]=this.layout.get("symbol-placement")!=="point"?"map":"viewport"),this.layout.get("text-pitch-alignment")==="auto"&&(this.layout._values["text-pitch-alignment"]=this.layout.get("text-rotation-alignment")==="map"?"map":"viewport"),this.layout.get("icon-pitch-alignment")==="auto"&&(this.layout._values["icon-pitch-alignment"]=this.layout.get("icon-rotation-alignment")),this.layout.get("symbol-placement")==="point"){let W=this.layout.get("text-writing-mode");if(W){let re=[];for(let fe of W)re.indexOf(fe)<0&&re.push(fe);this.layout._values["text-writing-mode"]=re}else this.layout._values["text-writing-mode"]=["horizontal"]}this._setPaintOverrides()}getValueAndResolveTokens(A,F,W,re){let fe=this.layout.get(A).evaluate(F,{},W,re),pe=this._unevaluatedLayout._values[A];return pe.isDataDriven()||Dc(pe.value)||!fe?fe:function(ze,Ke){return Ke.replace(/{([^{}]+)}/g,(ct,Lt)=>ze&&Lt in ze?String(ze[Lt]):"")}(F.properties,fe)}createBucket(A){return new M1(A)}queryRadius(){return 0}queryIntersectsFeature(){throw new Error("Should take a different path in FeatureIndex")}_setPaintOverrides(){for(let A of zw.paint.overridableProperties){if(!cy.hasPaintOverride(this.layout,A))continue;let F=this.paint.get(A),W=new Zx(F),re=new Eu(W,F.property.specification),fe=null;fe=F.value.kind==="constant"||F.value.kind==="source"?new bc("source",re):new hu("composite",re,F.value.zoomStops),this.paint._values[A]=new xu(F.property,fe,F.parameters)}}_handleOverridablePaintPropertyUpdate(A,F,W){return!(!this.layout||F.isDataDriven()||W.isDataDriven())&&cy.hasPaintOverride(this.layout,A)}static hasPaintOverride(A,F){let W=A.get("text-field"),re=zw.paint.properties[F],fe=!1,pe=ze=>{for(let Ke of ze)if(re.overrides&&re.overrides.hasOverride(Ke))return void(fe=!0)};if(W.value.kind==="constant"&&W.value.value instanceof ri)pe(W.value.value.sections);else if(W.value.kind==="source"){let ze=ct=>{fe||(ct instanceof ua&&Sn(ct.value)===ti?pe(ct.value.sections):ct instanceof Zl?pe(ct.sections):ct.eachChild(ze))},Ke=W.value;Ke._styleExpression&&ze(Ke._styleExpression.expression)}return fe}}let MC;var Yx={get paint(){return MC=MC||new ue({"background-color":new Ua(ce.paint_background["background-color"]),"background-pattern":new hc(ce.paint_background["background-pattern"]),"background-opacity":new Ua(ce.paint_background["background-opacity"])})}};class J9 extends B{constructor(A){super(A,Yx)}}let fS;var EC={get paint(){return fS=fS||new ue({"raster-opacity":new Ua(ce.paint_raster["raster-opacity"]),"raster-hue-rotate":new Ua(ce.paint_raster["raster-hue-rotate"]),"raster-brightness-min":new Ua(ce.paint_raster["raster-brightness-min"]),"raster-brightness-max":new Ua(ce.paint_raster["raster-brightness-max"]),"raster-saturation":new Ua(ce.paint_raster["raster-saturation"]),"raster-contrast":new Ua(ce.paint_raster["raster-contrast"]),"raster-resampling":new Ua(ce.paint_raster["raster-resampling"]),"raster-fade-duration":new Ua(ce.paint_raster["raster-fade-duration"])})}};class Kx extends B{constructor(A){super(A,EC)}}class hS extends B{constructor(A){super(A,{}),this.onAdd=F=>{this.implementation.onAdd&&this.implementation.onAdd(F,F.painter.context.gl)},this.onRemove=F=>{this.implementation.onRemove&&this.implementation.onRemove(F,F.painter.context.gl)},this.implementation=A}is3D(){return this.implementation.renderingMode==="3d"}hasOffscreenPass(){return this.implementation.prerender!==void 0}recalculate(){}updateTransitions(){}hasTransition(){return!1}serialize(){throw new Error("Custom layers cannot be serialized")}}class dS{constructor(A){this._methodToThrottle=A,this._triggered=!1,typeof MessageChannel!="undefined"&&(this._channel=new MessageChannel,this._channel.port2.onmessage=()=>{this._triggered=!1,this._methodToThrottle()})}trigger(){this._triggered||(this._triggered=!0,this._channel?this._channel.port1.postMessage(!0):setTimeout(()=>{this._triggered=!1,this._methodToThrottle()},0))}remove(){delete this._channel,this._methodToThrottle=()=>{}}}let vS=63710088e-1;class dg{constructor(A,F){if(isNaN(A)||isNaN(F))throw new Error(`Invalid LngLat object: (${A}, ${F})`);if(this.lng=+A,this.lat=+F,this.lat>90||this.lat<-90)throw new Error("Invalid LngLat latitude value: must be between -90 and 90")}wrap(){return new dg(S(this.lng,-180,180),this.lat)}toArray(){return[this.lng,this.lat]}toString(){return`LngLat(${this.lng}, ${this.lat})`}distanceTo(A){let F=Math.PI/180,W=this.lat*F,re=A.lat*F,fe=Math.sin(W)*Math.sin(re)+Math.cos(W)*Math.cos(re)*Math.cos((A.lng-this.lng)*F);return vS*Math.acos(Math.min(fe,1))}static convert(A){if(A instanceof dg)return A;if(Array.isArray(A)&&(A.length===2||A.length===3))return new dg(Number(A[0]),Number(A[1]));if(!Array.isArray(A)&&typeof A=="object"&&A!==null)return new dg(Number("lng"in A?A.lng:A.lon),Number(A.lat));throw new Error("`LngLatLike` argument must be specified as a LngLat instance, an object {lng: <lng>, lat: <lat>}, an object {lon: <lng>, lat: <lat>}, or an array of [<lng>, <lat>]")}}let E1=2*Math.PI*vS;function kC(R){return E1*Math.cos(R*Math.PI/180)}function Ow(R){return(180+R)/360}function CC(R){return(180-180/Math.PI*Math.log(Math.tan(Math.PI/4+R*Math.PI/360)))/360}function qw(R,A){return R/kC(A)}function Jx(R){return 360/Math.PI*Math.atan(Math.exp((180-360*R)*Math.PI/180))-90}class $x{constructor(A,F,W=0){this.x=+A,this.y=+F,this.z=+W}static fromLngLat(A,F=0){let W=dg.convert(A);return new $x(Ow(W.lng),CC(W.lat),qw(F,W.lat))}toLngLat(){return new dg(360*this.x-180,Jx(this.y))}toAltitude(){return this.z*kC(Jx(this.y))}meterInMercatorCoordinateUnits(){return 1/E1*(A=Jx(this.y),1/Math.cos(A*Math.PI/180));var A}}function mp(R,A,F){var W=2*Math.PI*6378137/256/Math.pow(2,F);return[R*W-2*Math.PI*6378137/2,A*W-2*Math.PI*6378137/2]}class pS{constructor(A,F,W){if(!function(re,fe,pe){return!(re<0||re>25||pe<0||pe>=Math.pow(2,re)||fe<0||fe>=Math.pow(2,re))}(A,F,W))throw new Error(`x=${F}, y=${W}, z=${A} outside of bounds. 0<=x<${Math.pow(2,A)}, 0<=y<${Math.pow(2,A)} 0<=z<=25 `);this.z=A,this.x=F,this.y=W,this.key=Qx(0,A,A,F,W)}equals(A){return this.z===A.z&&this.x===A.x&&this.y===A.y}url(A,F,W){let re=(pe=this.y,ze=this.z,Ke=mp(256*(fe=this.x),256*(pe=Math.pow(2,ze)-pe-1),ze),ct=mp(256*(fe+1),256*(pe+1),ze),Ke[0]+","+Ke[1]+","+ct[0]+","+ct[1]);var fe,pe,ze,Ke,ct;let Lt=function($t,fr,mr){let Pr,zr="";for(let ui=$t;ui>0;ui--)Pr=1<<ui-1,zr+=(fr&Pr?1:0)+(mr&Pr?2:0);return zr}(this.z,this.x,this.y);return A[(this.x+this.y)%A.length].replace(/{prefix}/g,(this.x%16).toString(16)+(this.y%16).toString(16)).replace(/{z}/g,String(this.z)).replace(/{x}/g,String(this.x)).replace(/{y}/g,String(W==="tms"?Math.pow(2,this.z)-this.y-1:this.y)).replace(/{ratio}/g,F>1?"@2x":"").replace(/{quadkey}/g,Lt).replace(/{bbox-epsg-3857}/g,re)}isChildOf(A){let F=this.z-A.z;return F>0&&A.x===this.x>>F&&A.y===this.y>>F}getTilePoint(A){let F=Math.pow(2,this.z);return new u((A.x*F-this.x)*ja,(A.y*F-this.y)*ja)}toString(){return`${this.z}/${this.x}/${this.y}`}}class LC{constructor(A,F){this.wrap=A,this.canonical=F,this.key=Qx(A,F.z,F.z,F.x,F.y)}}class $v{constructor(A,F,W,re,fe){if(A<W)throw new Error(`overscaledZ should be >= z; overscaledZ = ${A}; z = ${W}`);this.overscaledZ=A,this.wrap=F,this.canonical=new pS(W,+re,+fe),this.key=Qx(F,A,W,re,fe)}clone(){return new $v(this.overscaledZ,this.wrap,this.canonical.z,this.canonical.x,this.canonical.y)}equals(A){return this.overscaledZ===A.overscaledZ&&this.wrap===A.wrap&&this.canonical.equals(A.canonical)}scaledTo(A){if(A>this.overscaledZ)throw new Error(`targetZ > this.overscaledZ; targetZ = ${A}; overscaledZ = ${this.overscaledZ}`);let F=this.canonical.z-A;return A>this.canonical.z?new $v(A,this.wrap,this.canonical.z,this.canonical.x,this.canonical.y):new $v(A,this.wrap,A,this.canonical.x>>F,this.canonical.y>>F)}calculateScaledKey(A,F){if(A>this.overscaledZ)throw new Error(`targetZ > this.overscaledZ; targetZ = ${A}; overscaledZ = ${this.overscaledZ}`);let W=this.canonical.z-A;return A>this.canonical.z?Qx(this.wrap*+F,A,this.canonical.z,this.canonical.x,this.canonical.y):Qx(this.wrap*+F,A,A,this.canonical.x>>W,this.canonical.y>>W)}isChildOf(A){if(A.wrap!==this.wrap)return!1;let F=this.canonical.z-A.canonical.z;return A.overscaledZ===0||A.overscaledZ<this.overscaledZ&&A.canonical.x===this.canonical.x>>F&&A.canonical.y===this.canonical.y>>F}children(A){if(this.overscaledZ>=A)return[new $v(this.overscaledZ+1,this.wrap,this.canonical.z,this.canonical.x,this.canonical.y)];let F=this.canonical.z+1,W=2*this.canonical.x,re=2*this.canonical.y;return[new $v(F,this.wrap,F,W,re),new $v(F,this.wrap,F,W+1,re),new $v(F,this.wrap,F,W,re+1),new $v(F,this.wrap,F,W+1,re+1)]}isLessThan(A){return this.wrap<A.wrap||!(this.wrap>A.wrap)&&(this.overscaledZ<A.overscaledZ||!(this.overscaledZ>A.overscaledZ)&&(this.canonical.x<A.canonical.x||!(this.canonical.x>A.canonical.x)&&this.canonical.y<A.canonical.y))}wrapped(){return new $v(this.overscaledZ,0,this.canonical.z,this.canonical.x,this.canonical.y)}unwrapTo(A){return new $v(this.overscaledZ,A,this.canonical.z,this.canonical.x,this.canonical.y)}overscaleFactor(){return Math.pow(2,this.overscaledZ-this.canonical.z)}toUnwrapped(){return new LC(this.wrap,this.canonical)}toString(){return`${this.overscaledZ}/${this.canonical.x}/${this.canonical.y}`}getTilePoint(A){return this.canonical.getTilePoint(new $x(A.x-this.wrap,A.y))}}function Qx(R,A,F,W,re){(R*=2)<0&&(R=-1*R-1);let fe=1<<F;return(fe*fe*R+fe*re+W).toString(36)+F.toString(36)+A.toString(36)}Fi("CanonicalTileID",pS),Fi("OverscaledTileID",$v,{omit:["posMatrix"]});class PC{constructor(A,F,W,re=1,fe=1,pe=1,ze=0){if(this.uid=A,F.height!==F.width)throw new RangeError("DEM tiles must be square");if(W&&!["mapbox","terrarium","custom"].includes(W))return void T(`"${W}" is not a valid encoding type. Valid types include "mapbox", "terrarium" and "custom".`);this.stride=F.height;let Ke=this.dim=F.height-2;switch(this.data=new Uint32Array(F.data.buffer),W){case"terrarium":this.redFactor=256,this.greenFactor=1,this.blueFactor=1/256,this.baseShift=32768;break;case"custom":this.redFactor=re,this.greenFactor=fe,this.blueFactor=pe,this.baseShift=ze;break;default:this.redFactor=6553.6,this.greenFactor=25.6,this.blueFactor=.1,this.baseShift=1e4}for(let ct=0;ct<Ke;ct++)this.data[this._idx(-1,ct)]=this.data[this._idx(0,ct)],this.data[this._idx(Ke,ct)]=this.data[this._idx(Ke-1,ct)],this.data[this._idx(ct,-1)]=this.data[this._idx(ct,0)],this.data[this._idx(ct,Ke)]=this.data[this._idx(ct,Ke-1)];this.data[this._idx(-1,-1)]=this.data[this._idx(0,0)],this.data[this._idx(Ke,-1)]=this.data[this._idx(Ke-1,0)],this.data[this._idx(-1,Ke)]=this.data[this._idx(0,Ke-1)],this.data[this._idx(Ke,Ke)]=this.data[this._idx(Ke-1,Ke-1)],this.min=Number.MAX_SAFE_INTEGER,this.max=Number.MIN_SAFE_INTEGER;for(let ct=0;ct<Ke;ct++)for(let Lt=0;Lt<Ke;Lt++){let $t=this.get(ct,Lt);$t>this.max&&(this.max=$t),$t<this.min&&(this.min=$t)}}get(A,F){let W=new Uint8Array(this.data.buffer),re=4*this._idx(A,F);return this.unpack(W[re],W[re+1],W[re+2])}getUnpackVector(){return[this.redFactor,this.greenFactor,this.blueFactor,this.baseShift]}_idx(A,F){if(A<-1||A>=this.dim+1||F<-1||F>=this.dim+1)throw new RangeError("out of range source coordinates for DEM data");return(F+1)*this.stride+(A+1)}unpack(A,F,W){return A*this.redFactor+F*this.greenFactor+W*this.blueFactor-this.baseShift}getPixels(){return new Ki({width:this.stride,height:this.stride},new Uint8Array(this.data.buffer))}backfillBorder(A,F,W){if(this.dim!==A.dim)throw new Error("dem dimension mismatch");let re=F*this.dim,fe=F*this.dim+this.dim,pe=W*this.dim,ze=W*this.dim+this.dim;switch(F){case-1:re=fe-1;break;case 1:fe=re+1}switch(W){case-1:pe=ze-1;break;case 1:ze=pe+1}let Ke=-F*this.dim,ct=-W*this.dim;for(let Lt=pe;Lt<ze;Lt++)for(let $t=re;$t<fe;$t++)this.data[this._idx($t,Lt)]=A.data[this._idx($t+Ke,Lt+ct)]}}Fi("DEMData",PC);class IC{constructor(A){this._stringToNumber={},this._numberToString=[];for(let F=0;F<A.length;F++){let W=A[F];this._stringToNumber[W]=F,this._numberToString[F]=W}}encode(A){return this._stringToNumber[A]}decode(A){if(A>=this._numberToString.length)throw new Error(`Out of bounds. Index requested n=${A} can't be >= this._numberToString.length ${this._numberToString.length}`);return this._numberToString[A]}}class gS{constructor(A,F,W,re,fe){this.type="Feature",this._vectorTileFeature=A,A._z=F,A._x=W,A._y=re,this.properties=A.properties,this.id=fe}get geometry(){return this._geometry===void 0&&(this._geometry=this._vectorTileFeature.toGeoJSON(this._vectorTileFeature._x,this._vectorTileFeature._y,this._vectorTileFeature._z).geometry),this._geometry}set geometry(A){this._geometry=A}toJSON(){let A={geometry:this.geometry};for(let F in this)F!=="_geometry"&&F!=="_vectorTileFeature"&&(A[F]=this[F]);return A}}class fy{constructor(A,F){this.tileID=A,this.x=A.canonical.x,this.y=A.canonical.y,this.z=A.canonical.z,this.grid=new Qi(ja,16,0),this.grid3D=new Qi(ja,16,0),this.featureIndexArray=new Ts,this.promoteId=F}insert(A,F,W,re,fe,pe){let ze=this.featureIndexArray.length;this.featureIndexArray.emplaceBack(W,re,fe);let Ke=pe?this.grid3D:this.grid;for(let ct=0;ct<F.length;ct++){let Lt=F[ct],$t=[1/0,1/0,-1/0,-1/0];for(let fr=0;fr<Lt.length;fr++){let mr=Lt[fr];$t[0]=Math.min($t[0],mr.x),$t[1]=Math.min($t[1],mr.y),$t[2]=Math.max($t[2],mr.x),$t[3]=Math.max($t[3],mr.y)}$t[0]<ja&&$t[1]<ja&&$t[2]>=0&&$t[3]>=0&&Ke.insert(ze,$t[0],$t[1],$t[2],$t[3])}}loadVTLayers(){return this.vtLayers||(this.vtLayers=new br.VectorTile(new nS(this.rawTileData)).layers,this.sourceLayerCoder=new IC(this.vtLayers?Object.keys(this.vtLayers).sort():["_geojsonTileLayer"])),this.vtLayers}query(A,F,W,re){this.loadVTLayers();let fe=A.params||{},pe=ja/A.tileSize/A.scale,ze=Fc(fe.filter),Ke=A.queryGeometry,ct=A.queryPadding*pe,Lt=DC(Ke),$t=this.grid.query(Lt.minX-ct,Lt.minY-ct,Lt.maxX+ct,Lt.maxY+ct),fr=DC(A.cameraQueryGeometry),mr=this.grid3D.query(fr.minX-ct,fr.minY-ct,fr.maxX+ct,fr.maxY+ct,(ui,yi,vn,zi)=>function(un,Tn,pa,ro,Vo){for(let sa of un)if(Tn<=sa.x&&pa<=sa.y&&ro>=sa.x&&Vo>=sa.y)return!0;let Xa=[new u(Tn,pa),new u(Tn,Vo),new u(ro,Vo),new u(ro,pa)];if(un.length>2){for(let sa of Xa)if(Ui(un,sa))return!0}for(let sa=0;sa<un.length-1;sa++)if(qi(un[sa],un[sa+1],Xa))return!0;return!1}(A.cameraQueryGeometry,ui-ct,yi-ct,vn+ct,zi+ct));for(let ui of mr)$t.push(ui);$t.sort($9);let Pr={},zr;for(let ui=0;ui<$t.length;ui++){let yi=$t[ui];if(yi===zr)continue;zr=yi;let vn=this.featureIndexArray.get(yi),zi=null;this.loadMatchingFeature(Pr,vn.bucketIndex,vn.sourceLayerIndex,vn.featureIndex,ze,fe.layers,fe.availableImages,F,W,re,(un,Tn,pa)=>(zi||(zi=$s(un)),Tn.queryIntersectsFeature(Ke,un,pa,zi,this.z,A.transform,pe,A.pixelPosMatrix)))}return Pr}loadMatchingFeature(A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t){let fr=this.bucketLayerIDs[F];if(pe&&!function(ui,yi){for(let vn=0;vn<ui.length;vn++)if(yi.indexOf(ui[vn])>=0)return!0;return!1}(pe,fr))return;let mr=this.sourceLayerCoder.decode(W),Pr=this.vtLayers[mr].feature(re);if(fe.needGeometry){let ui=Sl(Pr,!0);if(!fe.filter(new rs(this.tileID.overscaledZ),ui,this.tileID.canonical))return}else if(!fe.filter(new rs(this.tileID.overscaledZ),Pr))return;let zr=this.getId(Pr,mr);for(let ui=0;ui<fr.length;ui++){let yi=fr[ui];if(pe&&pe.indexOf(yi)<0)continue;let vn=Ke[yi];if(!vn)continue;let zi={};zr&&Lt&&(zi=Lt.getState(vn.sourceLayer||"_geojsonTileLayer",zr));let un=L({},ct[yi]);un.paint=RC(un.paint,vn.paint,Pr,zi,ze),un.layout=RC(un.layout,vn.layout,Pr,zi,ze);let Tn=!$t||$t(Pr,vn,zi);if(!Tn)continue;let pa=new gS(Pr,this.z,this.x,this.y,zr);pa.layer=un;let ro=A[yi];ro===void 0&&(ro=A[yi]=[]),ro.push({featureIndex:re,feature:pa,intersectionZ:Tn})}}lookupSymbolFeatures(A,F,W,re,fe,pe,ze,Ke){let ct={};this.loadVTLayers();let Lt=Fc(fe);for(let $t of A)this.loadMatchingFeature(ct,W,re,$t,Lt,pe,ze,Ke,F);return ct}hasLayer(A){for(let F of this.bucketLayerIDs)for(let W of F)if(A===W)return!0;return!1}getId(A,F){let W=A.id;return this.promoteId&&(W=A.properties[typeof this.promoteId=="string"?this.promoteId:this.promoteId[F]],typeof W=="boolean"&&(W=Number(W))),W}}function RC(R,A,F,W,re){return C(R,(fe,pe)=>{let ze=A instanceof Ac?A.get(pe):null;return ze&&ze.evaluate?ze.evaluate(F,W,re):ze})}function DC(R){let A=1/0,F=1/0,W=-1/0,re=-1/0;for(let fe of R)A=Math.min(A,fe.x),F=Math.min(F,fe.y),W=Math.max(W,fe.x),re=Math.max(re,fe.y);return{minX:A,minY:F,maxX:W,maxY:re}}function $9(R,A){return A-R}function FC(R,A,F,W,re){let fe=[];for(let pe=0;pe<R.length;pe++){let ze=R[pe],Ke;for(let ct=0;ct<ze.length-1;ct++){let Lt=ze[ct],$t=ze[ct+1];Lt.x<A&&$t.x<A||(Lt.x<A?Lt=new u(A,Lt.y+(A-Lt.x)/($t.x-Lt.x)*($t.y-Lt.y))._round():$t.x<A&&($t=new u(A,Lt.y+(A-Lt.x)/($t.x-Lt.x)*($t.y-Lt.y))._round()),Lt.y<F&&$t.y<F||(Lt.y<F?Lt=new u(Lt.x+(F-Lt.y)/($t.y-Lt.y)*($t.x-Lt.x),F)._round():$t.y<F&&($t=new u(Lt.x+(F-Lt.y)/($t.y-Lt.y)*($t.x-Lt.x),F)._round()),Lt.x>=W&&$t.x>=W||(Lt.x>=W?Lt=new u(W,Lt.y+(W-Lt.x)/($t.x-Lt.x)*($t.y-Lt.y))._round():$t.x>=W&&($t=new u(W,Lt.y+(W-Lt.x)/($t.x-Lt.x)*($t.y-Lt.y))._round()),Lt.y>=re&&$t.y>=re||(Lt.y>=re?Lt=new u(Lt.x+(re-Lt.y)/($t.y-Lt.y)*($t.x-Lt.x),re)._round():$t.y>=re&&($t=new u(Lt.x+(re-Lt.y)/($t.y-Lt.y)*($t.x-Lt.x),re)._round()),Ke&&Lt.equals(Ke[Ke.length-1])||(Ke=[Lt],fe.push(Ke)),Ke.push($t)))))}}return fe}Fi("FeatureIndex",fy,{omit:["rawTileData","sourceLayerCoder"]});class vg extends u{constructor(A,F,W,re){super(A,F),this.angle=W,re!==void 0&&(this.segment=re)}clone(){return new vg(this.x,this.y,this.angle,this.segment)}}function mS(R,A,F,W,re){if(A.segment===void 0||F===0)return!0;let fe=A,pe=A.segment+1,ze=0;for(;ze>-F/2;){if(pe--,pe<0)return!1;ze-=R[pe].dist(fe),fe=R[pe]}ze+=R[pe].dist(R[pe+1]),pe++;let Ke=[],ct=0;for(;ze<F/2;){let Lt=R[pe],$t=R[pe+1];if(!$t)return!1;let fr=R[pe-1].angleTo(Lt)-Lt.angleTo($t);for(fr=Math.abs((fr+3*Math.PI)%(2*Math.PI)-Math.PI),Ke.push({distance:ze,angleDelta:fr}),ct+=fr;ze-Ke[0].distance>W;)ct-=Ke.shift().angleDelta;if(ct>re)return!1;pe++,ze+=Lt.dist($t)}return!0}function zC(R){let A=0;for(let F=0;F<R.length-1;F++)A+=R[F].dist(R[F+1]);return A}function OC(R,A,F){return R?.6*A*F:0}function qC(R,A){return Math.max(R?R.right-R.left:0,A?A.right-A.left:0)}function Q9(R,A,F,W,re,fe){let pe=OC(F,re,fe),ze=qC(F,W)*fe,Ke=0,ct=zC(R)/2;for(let Lt=0;Lt<R.length-1;Lt++){let $t=R[Lt],fr=R[Lt+1],mr=$t.dist(fr);if(Ke+mr>ct){let Pr=(ct-Ke)/mr,zr=Lo.number($t.x,fr.x,Pr),ui=Lo.number($t.y,fr.y,Pr),yi=new vg(zr,ui,fr.angleTo($t),Lt);return yi._round(),!pe||mS(R,yi,ze,pe,A)?yi:void 0}Ke+=mr}}function eO(R,A,F,W,re,fe,pe,ze,Ke){let ct=OC(W,fe,pe),Lt=qC(W,re),$t=Lt*pe,fr=R[0].x===0||R[0].x===Ke||R[0].y===0||R[0].y===Ke;return A-$t<A/4&&(A=$t+A/4),BC(R,fr?A/2*ze%A:(Lt/2+2*fe)*pe*ze%A,A,ct,F,$t,fr,!1,Ke)}function BC(R,A,F,W,re,fe,pe,ze,Ke){let ct=fe/2,Lt=zC(R),$t=0,fr=A-F,mr=[];for(let Pr=0;Pr<R.length-1;Pr++){let zr=R[Pr],ui=R[Pr+1],yi=zr.dist(ui),vn=ui.angleTo(zr);for(;fr+F<$t+yi;){fr+=F;let zi=(fr-$t)/yi,un=Lo.number(zr.x,ui.x,zi),Tn=Lo.number(zr.y,ui.y,zi);if(un>=0&&un<Ke&&Tn>=0&&Tn<Ke&&fr-ct>=0&&fr+ct<=Lt){let pa=new vg(un,Tn,vn,Pr);pa._round(),W&&!mS(R,pa,fe,W,re)||mr.push(pa)}}$t+=yi}return ze||mr.length||pe||(mr=BC(R,$t/2,F,W,re,fe,pe,!0,Ke)),mr}Fi("Anchor",vg);let k1=Md;function NC(R,A,F,W){let re=[],fe=R.image,pe=fe.pixelRatio,ze=fe.paddedRect.w-2*k1,Ke=fe.paddedRect.h-2*k1,ct={x1:R.left,y1:R.top,x2:R.right,y2:R.bottom},Lt=fe.stretchX||[[0,ze]],$t=fe.stretchY||[[0,Ke]],fr=(Xn,Ro)=>Xn+Ro[1]-Ro[0],mr=Lt.reduce(fr,0),Pr=$t.reduce(fr,0),zr=ze-mr,ui=Ke-Pr,yi=0,vn=mr,zi=0,un=Pr,Tn=0,pa=zr,ro=0,Vo=ui;if(fe.content&&W){let Xn=fe.content,Ro=Xn[2]-Xn[0],uo=Xn[3]-Xn[1];(fe.textFitWidth||fe.textFitHeight)&&(ct=TC(R)),yi=pg(Lt,0,Xn[0]),zi=pg($t,0,Xn[1]),vn=pg(Lt,Xn[0],Xn[2]),un=pg($t,Xn[1],Xn[3]),Tn=Xn[0]-yi,ro=Xn[1]-zi,pa=Ro-vn,Vo=uo-un}let Xa=ct.x1,sa=ct.y1,Mo=ct.x2-Xa,fo=ct.y2-sa,lo=(Xn,Ro,uo,$o)=>{let Ju=Bw(Xn.stretch-yi,vn,Mo,Xa),qu=C1(Xn.fixed-Tn,pa,Xn.stretch,mr),Mh=Bw(Ro.stretch-zi,un,fo,sa),Qv=C1(Ro.fixed-ro,Vo,Ro.stretch,Pr),ld=Bw(uo.stretch-yi,vn,Mo,Xa),Eh=C1(uo.fixed-Tn,pa,uo.stretch,mr),Gd=Bw($o.stretch-zi,un,fo,sa),Hd=C1($o.fixed-ro,Vo,$o.stretch,Pr),jd=new u(Ju,Mh),Af=new u(ld,Mh),kh=new u(ld,Gd),Ed=new u(Ju,Gd),ud=new u(qu/pe,Qv/pe),Wh=new u(Eh/pe,Hd/pe),Df=A*Math.PI/180;if(Df){let tu=Math.sin(Df),pc=Math.cos(Df),$u=[pc,-tu,tu,pc];jd._matMult($u),Af._matMult($u),Ed._matMult($u),kh._matMult($u)}let Fv=Xn.stretch+Xn.fixed,lv=Ro.stretch+Ro.fixed;return{tl:jd,tr:Af,bl:Ed,br:kh,tex:{x:fe.paddedRect.x+k1+Fv,y:fe.paddedRect.y+k1+lv,w:uo.stretch+uo.fixed-Fv,h:$o.stretch+$o.fixed-lv},writingMode:void 0,glyphOffset:[0,0],sectionIndex:0,pixelOffsetTL:ud,pixelOffsetBR:Wh,minFontScaleX:pa/pe/Mo,minFontScaleY:Vo/pe/fo,isSDF:F}};if(W&&(fe.stretchX||fe.stretchY)){let Xn=UC(Lt,zr,mr),Ro=UC($t,ui,Pr);for(let uo=0;uo<Xn.length-1;uo++){let $o=Xn[uo],Ju=Xn[uo+1];for(let qu=0;qu<Ro.length-1;qu++)re.push(lo($o,Ro[qu],Ju,Ro[qu+1]))}}else re.push(lo({fixed:0,stretch:-1},{fixed:0,stretch:-1},{fixed:0,stretch:ze+1},{fixed:0,stretch:Ke+1}));return re}function pg(R,A,F){let W=0;for(let re of R)W+=Math.max(A,Math.min(F,re[1]))-Math.max(A,Math.min(F,re[0]));return W}function UC(R,A,F){let W=[{fixed:-k1,stretch:0}];for(let[re,fe]of R){let pe=W[W.length-1];W.push({fixed:re-pe.stretch,stretch:pe.stretch}),W.push({fixed:re-pe.stretch,stretch:pe.stretch+(fe-re)})}return W.push({fixed:A+k1,stretch:F}),W}function Bw(R,A,F,W){return R/A*F+W}function C1(R,A,F,W){return R-A*F/W}class fm{constructor(A,F,W,re,fe,pe,ze,Ke,ct,Lt){var $t;if(this.boxStartIndex=A.length,ct){let fr=pe.top,mr=pe.bottom,Pr=pe.collisionPadding;Pr&&(fr-=Pr[1],mr+=Pr[3]);let zr=mr-fr;zr>0&&(zr=Math.max(10,zr),this.circleDiameter=zr)}else{let fr=!(($t=pe.image)===null||$t===void 0)&&$t.content&&(pe.image.textFitWidth||pe.image.textFitHeight)?TC(pe):{x1:pe.left,y1:pe.top,x2:pe.right,y2:pe.bottom};fr.y1=fr.y1*ze-Ke[0],fr.y2=fr.y2*ze+Ke[2],fr.x1=fr.x1*ze-Ke[3],fr.x2=fr.x2*ze+Ke[1];let mr=pe.collisionPadding;if(mr&&(fr.x1-=mr[0]*ze,fr.y1-=mr[1]*ze,fr.x2+=mr[2]*ze,fr.y2+=mr[3]*ze),Lt){let Pr=new u(fr.x1,fr.y1),zr=new u(fr.x2,fr.y1),ui=new u(fr.x1,fr.y2),yi=new u(fr.x2,fr.y2),vn=Lt*Math.PI/180;Pr._rotate(vn),zr._rotate(vn),ui._rotate(vn),yi._rotate(vn),fr.x1=Math.min(Pr.x,zr.x,ui.x,yi.x),fr.x2=Math.max(Pr.x,zr.x,ui.x,yi.x),fr.y1=Math.min(Pr.y,zr.y,ui.y,yi.y),fr.y2=Math.max(Pr.y,zr.y,ui.y,yi.y)}A.emplaceBack(F.x,F.y,fr.x1,fr.y1,fr.x2,fr.y2,W,re,fe)}this.boxEndIndex=A.length}}class Vp{constructor(A=[],F=(W,re)=>W<re?-1:W>re?1:0){if(this.data=A,this.length=this.data.length,this.compare=F,this.length>0)for(let W=(this.length>>1)-1;W>=0;W--)this._down(W)}push(A){this.data.push(A),this._up(this.length++)}pop(){if(this.length===0)return;let A=this.data[0],F=this.data.pop();return--this.length>0&&(this.data[0]=F,this._down(0)),A}peek(){return this.data[0]}_up(A){let{data:F,compare:W}=this,re=F[A];for(;A>0;){let fe=A-1>>1,pe=F[fe];if(W(re,pe)>=0)break;F[A]=pe,A=fe}F[A]=re}_down(A){let{data:F,compare:W}=this,re=this.length>>1,fe=F[A];for(;A<re;){let pe=1+(A<<1),ze=pe+1;if(ze<this.length&&W(F[ze],F[pe])<0&&(pe=ze),W(F[pe],fe)>=0)break;F[A]=F[pe],A=pe}F[A]=fe}}function tO(R,A=1,F=!1){let W=1/0,re=1/0,fe=-1/0,pe=-1/0,ze=R[0];for(let mr=0;mr<ze.length;mr++){let Pr=ze[mr];(!mr||Pr.x<W)&&(W=Pr.x),(!mr||Pr.y<re)&&(re=Pr.y),(!mr||Pr.x>fe)&&(fe=Pr.x),(!mr||Pr.y>pe)&&(pe=Pr.y)}let Ke=Math.min(fe-W,pe-re),ct=Ke/2,Lt=new Vp([],rO);if(Ke===0)return new u(W,re);for(let mr=W;mr<fe;mr+=Ke)for(let Pr=re;Pr<pe;Pr+=Ke)Lt.push(new L1(mr+ct,Pr+ct,ct,R));let $t=function(mr){let Pr=0,zr=0,ui=0,yi=mr[0];for(let vn=0,zi=yi.length,un=zi-1;vn<zi;un=vn++){let Tn=yi[vn],pa=yi[un],ro=Tn.x*pa.y-pa.x*Tn.y;zr+=(Tn.x+pa.x)*ro,ui+=(Tn.y+pa.y)*ro,Pr+=3*ro}return new L1(zr/Pr,ui/Pr,0,mr)}(R),fr=Lt.length;for(;Lt.length;){let mr=Lt.pop();(mr.d>$t.d||!$t.d)&&($t=mr,F&&console.log("found best %d after %d probes",Math.round(1e4*mr.d)/1e4,fr)),mr.max-$t.d<=A||(ct=mr.h/2,Lt.push(new L1(mr.p.x-ct,mr.p.y-ct,ct,R)),Lt.push(new L1(mr.p.x+ct,mr.p.y-ct,ct,R)),Lt.push(new L1(mr.p.x-ct,mr.p.y+ct,ct,R)),Lt.push(new L1(mr.p.x+ct,mr.p.y+ct,ct,R)),fr+=4)}return F&&(console.log(`num probes: ${fr}`),console.log(`best distance: ${$t.d}`)),$t.p}function rO(R,A){return A.max-R.max}function L1(R,A,F,W){this.p=new u(R,A),this.h=F,this.d=function(re,fe){let pe=!1,ze=1/0;for(let Ke=0;Ke<fe.length;Ke++){let ct=fe[Ke];for(let Lt=0,$t=ct.length,fr=$t-1;Lt<$t;fr=Lt++){let mr=ct[Lt],Pr=ct[fr];mr.y>re.y!=Pr.y>re.y&&re.x<(Pr.x-mr.x)*(re.y-mr.y)/(Pr.y-mr.y)+mr.x&&(pe=!pe),ze=Math.min(ze,Gr(re,mr,Pr))}}return(pe?1:-1)*Math.sqrt(ze)}(this.p,W),this.max=this.d+this.h*Math.SQRT2}var sd;i.aq=void 0,(sd=i.aq||(i.aq={}))[sd.center=1]="center",sd[sd.left=2]="left",sd[sd.right=3]="right",sd[sd.top=4]="top",sd[sd.bottom=5]="bottom",sd[sd["top-left"]=6]="top-left",sd[sd["top-right"]=7]="top-right",sd[sd["bottom-left"]=8]="bottom-left",sd[sd["bottom-right"]=9]="bottom-right";let hm=7,hy=Number.POSITIVE_INFINITY;function yS(R,A){return A[1]!==hy?function(F,W,re){let fe=0,pe=0;switch(W=Math.abs(W),re=Math.abs(re),F){case"top-right":case"top-left":case"top":pe=re-hm;break;case"bottom-right":case"bottom-left":case"bottom":pe=-re+hm}switch(F){case"top-right":case"bottom-right":case"right":fe=-W;break;case"top-left":case"bottom-left":case"left":fe=W}return[fe,pe]}(R,A[0],A[1]):function(F,W){let re=0,fe=0;W<0&&(W=0);let pe=W/Math.SQRT2;switch(F){case"top-right":case"top-left":fe=pe-hm;break;case"bottom-right":case"bottom-left":fe=-pe+hm;break;case"bottom":fe=-W+hm;break;case"top":fe=W-hm}switch(F){case"top-right":case"bottom-right":re=-pe;break;case"top-left":case"bottom-left":re=pe;break;case"left":re=W;break;case"right":re=-W}return[re,fe]}(R,A[0])}function VC(R,A,F){var W;let re=R.layout,fe=(W=re.get("text-variable-anchor-offset"))===null||W===void 0?void 0:W.evaluate(A,{},F);if(fe){let ze=fe.values,Ke=[];for(let ct=0;ct<ze.length;ct+=2){let Lt=Ke[ct]=ze[ct],$t=ze[ct+1].map(fr=>fr*eu);Lt.startsWith("top")?$t[1]-=hm:Lt.startsWith("bottom")&&($t[1]+=hm),Ke[ct+1]=$t}return new $i(Ke)}let pe=re.get("text-variable-anchor");if(pe){let ze;ze=R._unevaluatedLayout.getValue("text-radial-offset")!==void 0?[re.get("text-radial-offset").evaluate(A,{},F)*eu,hy]:re.get("text-offset").evaluate(A,{},F).map(ct=>ct*eu);let Ke=[];for(let ct of pe)Ke.push(ct,yS(ct,ze));return new $i(Ke)}return null}function _S(R){switch(R){case"right":case"top-right":case"bottom-right":return"right";case"left":case"top-left":case"bottom-left":return"left"}return"center"}function iO(R,A,F,W,re,fe,pe,ze,Ke,ct,Lt){let $t=fe.textMaxSize.evaluate(A,{});$t===void 0&&($t=pe);let fr=R.layers[0].layout,mr=fr.get("icon-offset").evaluate(A,{},Lt),Pr=HC(F.horizontal),zr=pe/24,ui=R.tilePixelRatio*zr,yi=R.tilePixelRatio*$t/24,vn=R.tilePixelRatio*ze,zi=R.tilePixelRatio*fr.get("symbol-spacing"),un=fr.get("text-padding")*R.tilePixelRatio,Tn=function(Xn,Ro,uo,$o=1){let Ju=Xn.get("icon-padding").evaluate(Ro,{},uo),qu=Ju&&Ju.values;return[qu[0]*$o,qu[1]*$o,qu[2]*$o,qu[3]*$o]}(fr,A,Lt,R.tilePixelRatio),pa=fr.get("text-max-angle")/180*Math.PI,ro=fr.get("text-rotation-alignment")!=="viewport"&&fr.get("symbol-placement")!=="point",Vo=fr.get("icon-rotation-alignment")==="map"&&fr.get("symbol-placement")!=="point",Xa=fr.get("symbol-placement"),sa=zi/2,Mo=fr.get("icon-text-fit"),fo;W&&Mo!=="none"&&(R.allowVerticalPlacement&&F.vertical&&(fo=AC(W,F.vertical,Mo,fr.get("icon-text-fit-padding"),mr,zr)),Pr&&(W=AC(W,Pr,Mo,fr.get("icon-text-fit-padding"),mr,zr)));let lo=(Xn,Ro)=>{Ro.x<0||Ro.x>=ja||Ro.y<0||Ro.y>=ja||function(uo,$o,Ju,qu,Mh,Qv,ld,Eh,Gd,Hd,jd,Af,kh,Ed,ud,Wh,Df,Fv,lv,tu,pc,$u,zv,ff,P1){let v0=uo.addToLineVertexArray($o,Ju),Gp,ep,Gc,Zf,tp=0,gg=0,uv=0,I1=0,AS=-1,Gw=-1,p0={},dy=bi("");if(uo.allowVerticalPlacement&&qu.vertical){let kd=Eh.layout.get("text-rotate").evaluate(pc,{},ff)+90;Gc=new fm(Gd,$o,Hd,jd,Af,qu.vertical,kh,Ed,ud,kd),ld&&(Zf=new fm(Gd,$o,Hd,jd,Af,ld,Df,Fv,ud,kd))}if(Mh){let kd=Eh.layout.get("icon-rotate").evaluate(pc,{}),rp=Eh.layout.get("icon-text-fit")!=="none",dm=NC(Mh,kd,zv,rp),Wd=ld?NC(ld,kd,zv,rp):void 0;ep=new fm(Gd,$o,Hd,jd,Af,Mh,Df,Fv,!1,kd),tp=4*dm.length;let Cd=uo.iconSizeData,_p=null;Cd.kind==="source"?(_p=[d0*Eh.layout.get("icon-size").evaluate(pc,{})],_p[0]>um&&T(`${uo.layerIds[0]}: Value for "icon-size" is >= ${Wx}. Reduce your "icon-size".`)):Cd.kind==="composite"&&(_p=[d0*$u.compositeIconSizes[0].evaluate(pc,{},ff),d0*$u.compositeIconSizes[1].evaluate(pc,{},ff)],(_p[0]>um||_p[1]>um)&&T(`${uo.layerIds[0]}: Value for "icon-size" is >= ${Wx}. Reduce your "icon-size".`)),uo.addSymbols(uo.icon,dm,_p,tu,lv,pc,i.ah.none,$o,v0.lineStartIndex,v0.lineLength,-1,ff),AS=uo.icon.placedSymbolArray.length-1,Wd&&(gg=4*Wd.length,uo.addSymbols(uo.icon,Wd,_p,tu,lv,pc,i.ah.vertical,$o,v0.lineStartIndex,v0.lineLength,-1,ff),Gw=uo.icon.placedSymbolArray.length-1)}let Xh=Object.keys(qu.horizontal);for(let kd of Xh){let rp=qu.horizontal[kd];if(!Gp){dy=bi(rp.text);let Wd=Eh.layout.get("text-rotate").evaluate(pc,{},ff);Gp=new fm(Gd,$o,Hd,jd,Af,rp,kh,Ed,ud,Wd)}let dm=rp.positionedLines.length===1;if(uv+=GC(uo,$o,rp,Qv,Eh,ud,pc,Wh,v0,qu.vertical?i.ah.horizontal:i.ah.horizontalOnly,dm?Xh:[kd],p0,AS,$u,ff),dm)break}qu.vertical&&(I1+=GC(uo,$o,qu.vertical,Qv,Eh,ud,pc,Wh,v0,i.ah.vertical,["vertical"],p0,Gw,$u,ff));let oO=Gp?Gp.boxStartIndex:uo.collisionBoxArray.length,Hw=Gp?Gp.boxEndIndex:uo.collisionBoxArray.length,g0=Gc?Gc.boxStartIndex:uo.collisionBoxArray.length,cv=Gc?Gc.boxEndIndex:uo.collisionBoxArray.length,ZC=ep?ep.boxStartIndex:uo.collisionBoxArray.length,sO=ep?ep.boxEndIndex:uo.collisionBoxArray.length,YC=Zf?Zf.boxStartIndex:uo.collisionBoxArray.length,lO=Zf?Zf.boxEndIndex:uo.collisionBoxArray.length,yp=-1,rb=(kd,rp)=>kd&&kd.circleDiameter?Math.max(kd.circleDiameter,rp):rp;yp=rb(Gp,yp),yp=rb(Gc,yp),yp=rb(ep,yp),yp=rb(Zf,yp);let jw=yp>-1?1:0;jw&&(yp*=P1/eu),uo.glyphOffsetArray.length>=M1.MAX_GLYPHS&&T("Too many glyphs being rendered in a tile. See https://github.com/mapbox/mapbox-gl-js/issues/2907"),pc.sortKey!==void 0&&uo.addToSortKeyRanges(uo.symbolInstances.length,pc.sortKey);let SS=VC(Eh,pc,ff),[uO,cO]=function(kd,rp){let dm=kd.length,Wd=rp==null?void 0:rp.values;if((Wd==null?void 0:Wd.length)>0)for(let Cd=0;Cd<Wd.length;Cd+=2){let _p=Wd[Cd+1];kd.emplaceBack(i.aq[Wd[Cd]],_p[0],_p[1])}return[dm,kd.length]}(uo.textAnchorOffsets,SS);uo.symbolInstances.emplaceBack($o.x,$o.y,p0.right>=0?p0.right:-1,p0.center>=0?p0.center:-1,p0.left>=0?p0.left:-1,p0.vertical||-1,AS,Gw,dy,oO,Hw,g0,cv,ZC,sO,YC,lO,Hd,uv,I1,tp,gg,jw,0,kh,yp,uO,cO)}(R,Ro,Xn,F,W,re,fo,R.layers[0],R.collisionBoxArray,A.index,A.sourceLayerIndex,R.index,ui,[un,un,un,un],ro,Ke,vn,Tn,Vo,mr,A,fe,ct,Lt,pe)};if(Xa==="line")for(let Xn of FC(A.geometry,0,0,ja,ja)){let Ro=eO(Xn,zi,pa,F.vertical||Pr,W,24,yi,R.overscaling,ja);for(let uo of Ro)Pr&&nO(R,Pr.text,sa,uo)||lo(Xn,uo)}else if(Xa==="line-center"){for(let Xn of A.geometry)if(Xn.length>1){let Ro=Q9(Xn,pa,F.vertical||Pr,W,24,yi);Ro&&lo(Xn,Ro)}}else if(A.type==="Polygon")for(let Xn of Cf(A.geometry,0)){let Ro=tO(Xn,16);lo(Xn[0],new vg(Ro.x,Ro.y,0))}else if(A.type==="LineString")for(let Xn of A.geometry)lo(Xn,new vg(Xn[0].x,Xn[0].y,0));else if(A.type==="Point")for(let Xn of A.geometry)for(let Ro of Xn)lo([Ro],new vg(Ro.x,Ro.y,0))}function GC(R,A,F,W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr){let zr=function(vn,zi,un,Tn,pa,ro,Vo,Xa){let sa=Tn.layout.get("text-rotate").evaluate(ro,{})*Math.PI/180,Mo=[];for(let fo of zi.positionedLines)for(let lo of fo.positionedGlyphs){if(!lo.rect)continue;let Xn=lo.rect||{},Ro=_C+1,uo=!0,$o=1,Ju=0,qu=(pa||Xa)&&lo.vertical,Mh=lo.metrics.advance*lo.scale/2;if(Xa&&zi.verticalizable&&(Ju=fo.lineOffset/2-(lo.imageName?-(eu-lo.metrics.width*lo.scale)/2:(lo.scale-1)*eu)),lo.imageName){let tu=Vo[lo.imageName];uo=tu.sdf,$o=tu.pixelRatio,Ro=Md/$o}let Qv=pa?[lo.x+Mh,lo.y]:[0,0],ld=pa?[0,0]:[lo.x+Mh+un[0],lo.y+un[1]-Ju],Eh=[0,0];qu&&(Eh=ld,ld=[0,0]);let Gd=lo.metrics.isDoubleResolution?2:1,Hd=(lo.metrics.left-Ro)*lo.scale-Mh+ld[0],jd=(-lo.metrics.top-Ro)*lo.scale+ld[1],Af=Hd+Xn.w/Gd*lo.scale/$o,kh=jd+Xn.h/Gd*lo.scale/$o,Ed=new u(Hd,jd),ud=new u(Af,jd),Wh=new u(Hd,kh),Df=new u(Af,kh);if(qu){let tu=new u(-Mh,Mh-Sh),pc=-Math.PI/2,$u=eu/2-Mh,zv=new u(5-Sh-$u,-(lo.imageName?$u:0)),ff=new u(...Eh);Ed._rotateAround(pc,tu)._add(zv)._add(ff),ud._rotateAround(pc,tu)._add(zv)._add(ff),Wh._rotateAround(pc,tu)._add(zv)._add(ff),Df._rotateAround(pc,tu)._add(zv)._add(ff)}if(sa){let tu=Math.sin(sa),pc=Math.cos(sa),$u=[pc,-tu,tu,pc];Ed._matMult($u),ud._matMult($u),Wh._matMult($u),Df._matMult($u)}let Fv=new u(0,0),lv=new u(0,0);Mo.push({tl:Ed,tr:ud,bl:Wh,br:Df,tex:Xn,writingMode:zi.writingMode,glyphOffset:Qv,sectionIndex:lo.sectionIndex,isSDF:uo,pixelOffsetTL:Fv,pixelOffsetBR:lv,minFontScaleX:0,minFontScaleY:0})}return Mo}(0,F,ze,re,fe,pe,W,R.allowVerticalPlacement),ui=R.textSizeData,yi=null;ui.kind==="source"?(yi=[d0*re.layout.get("text-size").evaluate(pe,{})],yi[0]>um&&T(`${R.layerIds[0]}: Value for "text-size" is >= ${Wx}. Reduce your "text-size".`)):ui.kind==="composite"&&(yi=[d0*mr.compositeTextSizes[0].evaluate(pe,{},Pr),d0*mr.compositeTextSizes[1].evaluate(pe,{},Pr)],(yi[0]>um||yi[1]>um)&&T(`${R.layerIds[0]}: Value for "text-size" is >= ${Wx}. Reduce your "text-size".`)),R.addSymbols(R.text,zr,yi,ze,fe,pe,ct,A,Ke.lineStartIndex,Ke.lineLength,fr,Pr);for(let vn of Lt)$t[vn]=R.text.placedSymbolArray.length-1;return 4*zr.length}function HC(R){for(let A in R)return R[A];return null}function nO(R,A,F,W){let re=R.compareText;if(A in re){let fe=re[A];for(let pe=fe.length-1;pe>=0;pe--)if(W.dist(fe[pe])<F)return!0}else re[A]=[];return re[A].push(W),!1}let jC=[Int8Array,Uint8Array,Uint8ClampedArray,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array];class xS{static from(A){if(!(A instanceof ArrayBuffer))throw new Error("Data must be an instance of ArrayBuffer.");let[F,W]=new Uint8Array(A,0,2);if(F!==219)throw new Error("Data does not appear to be in a KDBush format.");let re=W>>4;if(re!==1)throw new Error(`Got v${re} data when expected v1.`);let fe=jC[15&W];if(!fe)throw new Error("Unrecognized array type.");let[pe]=new Uint16Array(A,2,1),[ze]=new Uint32Array(A,4,1);return new xS(ze,pe,fe,A)}constructor(A,F=64,W=Float64Array,re){if(isNaN(A)||A<0)throw new Error(`Unpexpected numItems value: ${A}.`);this.numItems=+A,this.nodeSize=Math.min(Math.max(+F,2),65535),this.ArrayType=W,this.IndexArrayType=A<65536?Uint16Array:Uint32Array;let fe=jC.indexOf(this.ArrayType),pe=2*A*this.ArrayType.BYTES_PER_ELEMENT,ze=A*this.IndexArrayType.BYTES_PER_ELEMENT,Ke=(8-ze%8)%8;if(fe<0)throw new Error(`Unexpected typed array class: ${W}.`);re&&re instanceof ArrayBuffer?(this.data=re,this.ids=new this.IndexArrayType(this.data,8,A),this.coords=new this.ArrayType(this.data,8+ze+Ke,2*A),this._pos=2*A,this._finished=!0):(this.data=new ArrayBuffer(8+pe+ze+Ke),this.ids=new this.IndexArrayType(this.data,8,A),this.coords=new this.ArrayType(this.data,8+ze+Ke,2*A),this._pos=0,this._finished=!1,new Uint8Array(this.data,0,2).set([219,16+fe]),new Uint16Array(this.data,2,1)[0]=F,new Uint32Array(this.data,4,1)[0]=A)}add(A,F){let W=this._pos>>1;return this.ids[W]=W,this.coords[this._pos++]=A,this.coords[this._pos++]=F,W}finish(){let A=this._pos>>1;if(A!==this.numItems)throw new Error(`Added ${A} items when expected ${this.numItems}.`);return Nw(this.ids,this.coords,this.nodeSize,0,this.numItems-1,0),this._finished=!0,this}range(A,F,W,re){if(!this._finished)throw new Error("Data not yet indexed - call index.finish().");let{ids:fe,coords:pe,nodeSize:ze}=this,Ke=[0,fe.length-1,0],ct=[];for(;Ke.length;){let Lt=Ke.pop()||0,$t=Ke.pop()||0,fr=Ke.pop()||0;if($t-fr<=ze){for(let ui=fr;ui<=$t;ui++){let yi=pe[2*ui],vn=pe[2*ui+1];yi>=A&&yi<=W&&vn>=F&&vn<=re&&ct.push(fe[ui])}continue}let mr=fr+$t>>1,Pr=pe[2*mr],zr=pe[2*mr+1];Pr>=A&&Pr<=W&&zr>=F&&zr<=re&&ct.push(fe[mr]),(Lt===0?A<=Pr:F<=zr)&&(Ke.push(fr),Ke.push(mr-1),Ke.push(1-Lt)),(Lt===0?W>=Pr:re>=zr)&&(Ke.push(mr+1),Ke.push($t),Ke.push(1-Lt))}return ct}within(A,F,W){if(!this._finished)throw new Error("Data not yet indexed - call index.finish().");let{ids:re,coords:fe,nodeSize:pe}=this,ze=[0,re.length-1,0],Ke=[],ct=W*W;for(;ze.length;){let Lt=ze.pop()||0,$t=ze.pop()||0,fr=ze.pop()||0;if($t-fr<=pe){for(let ui=fr;ui<=$t;ui++)XC(fe[2*ui],fe[2*ui+1],A,F)<=ct&&Ke.push(re[ui]);continue}let mr=fr+$t>>1,Pr=fe[2*mr],zr=fe[2*mr+1];XC(Pr,zr,A,F)<=ct&&Ke.push(re[mr]),(Lt===0?A-W<=Pr:F-W<=zr)&&(ze.push(fr),ze.push(mr-1),ze.push(1-Lt)),(Lt===0?A+W>=Pr:F+W>=zr)&&(ze.push(mr+1),ze.push($t),ze.push(1-Lt))}return Ke}}function Nw(R,A,F,W,re,fe){if(re-W<=F)return;let pe=W+re>>1;WC(R,A,pe,W,re,fe),Nw(R,A,F,W,pe-1,1-fe),Nw(R,A,F,pe+1,re,1-fe)}function WC(R,A,F,W,re,fe){for(;re>W;){if(re-W>600){let ct=re-W+1,Lt=F-W+1,$t=Math.log(ct),fr=.5*Math.exp(2*$t/3),mr=.5*Math.sqrt($t*fr*(ct-fr)/ct)*(Lt-ct/2<0?-1:1);WC(R,A,F,Math.max(W,Math.floor(F-Lt*fr/ct+mr)),Math.min(re,Math.floor(F+(ct-Lt)*fr/ct+mr)),fe)}let pe=A[2*F+fe],ze=W,Ke=re;for(eb(R,A,W,F),A[2*re+fe]>pe&&eb(R,A,W,re);ze<Ke;){for(eb(R,A,ze,Ke),ze++,Ke--;A[2*ze+fe]<pe;)ze++;for(;A[2*Ke+fe]>pe;)Ke--}A[2*W+fe]===pe?eb(R,A,W,Ke):(Ke++,eb(R,A,Ke,re)),Ke<=F&&(W=Ke+1),F<=Ke&&(re=Ke-1)}}function eb(R,A,F,W){bS(R,F,W),bS(A,2*F,2*W),bS(A,2*F+1,2*W+1)}function bS(R,A,F){let W=R[A];R[A]=R[F],R[F]=W}function XC(R,A,F,W){let re=R-F,fe=A-W;return re*re+fe*fe}var Uw;i.bg=void 0,(Uw=i.bg||(i.bg={})).create="create",Uw.load="load",Uw.fullLoad="fullLoad";let tb=null,sh=[],wS=1e3/60,TS="loadTime",Vw="fullLoadTime",aO={mark(R){performance.mark(R)},frame(R){let A=R;tb!=null&&sh.push(A-tb),tb=A},clearMetrics(){tb=null,sh=[],performance.clearMeasures(TS),performance.clearMeasures(Vw);for(let R in i.bg)performance.clearMarks(i.bg[R])},getPerformanceMetrics(){performance.measure(TS,i.bg.create,i.bg.load),performance.measure(Vw,i.bg.create,i.bg.fullLoad);let R=performance.getEntriesByName(TS)[0].duration,A=performance.getEntriesByName(Vw)[0].duration,F=sh.length,W=1/(sh.reduce((fe,pe)=>fe+pe,0)/F/1e3),re=sh.filter(fe=>fe>wS).reduce((fe,pe)=>fe+(pe-wS)/wS,0);return{loadTime:R,fullLoadTime:A,fps:W,percentDroppedFrames:re/(F+re)*100,totalFrames:F}}};i.$=class extends Yt{},i.A=xi,i.B=dn,i.C=function(R){if(V==null){let A=R.navigator?R.navigator.userAgent:null;V=!!R.safari||!(!A||!(/\b(iPad|iPhone|iPod)\b/.test(A)||A.match("Safari")&&!A.match("Chrome")))}return V},i.D=Ua,i.E=Re,i.F=class{constructor(R,A){this.target=R,this.mapId=A,this.resolveRejects={},this.tasks={},this.taskQueue=[],this.abortControllers={},this.messageHandlers={},this.invoker=new dS(()=>this.process()),this.subscription=function(F,W,re,fe){return F.addEventListener(W,re,!1),{unsubscribe:()=>{F.removeEventListener(W,re,!1)}}}(this.target,"message",F=>this.receive(F)),this.globalScope=O(self)?R:window}registerMessageHandler(R,A){this.messageHandlers[R]=A}sendAsync(R,A){return new Promise((F,W)=>{let re=Math.round(1e18*Math.random()).toString(36).substring(0,10);this.resolveRejects[re]={resolve:F,reject:W},A&&A.signal.addEventListener("abort",()=>{delete this.resolveRejects[re];let ze={id:re,type:"<cancel>",origin:location.origin,targetMapId:R.targetMapId,sourceMapId:this.mapId};this.target.postMessage(ze)},{once:!0});let fe=[],pe=Object.assign(Object.assign({},R),{id:re,sourceMapId:this.mapId,origin:location.origin,data:La(R.data,fe)});this.target.postMessage(pe,{transfer:fe})})}receive(R){let A=R.data,F=A.id;if(!(A.origin!=="file://"&&location.origin!=="file://"&&A.origin!=="resource://android"&&location.origin!=="resource://android"&&A.origin!==location.origin||A.targetMapId&&this.mapId!==A.targetMapId)){if(A.type==="<cancel>"){delete this.tasks[F];let W=this.abortControllers[F];return delete this.abortControllers[F],void(W&&W.abort())}if(O(self)||A.mustQueue)return this.tasks[F]=A,this.taskQueue.push(F),void this.invoker.trigger();this.processTask(F,A)}}process(){if(this.taskQueue.length===0)return;let R=this.taskQueue.shift(),A=this.tasks[R];delete this.tasks[R],this.taskQueue.length>0&&this.invoker.trigger(),A&&this.processTask(R,A)}processTask(R,A){return a(this,void 0,void 0,function*(){if(A.type==="<response>"){let re=this.resolveRejects[R];return delete this.resolveRejects[R],re?void(A.error?re.reject(Na(A.error)):re.resolve(Na(A.data))):void 0}if(!this.messageHandlers[A.type])return void this.completeTask(R,new Error(`Could not find a registered handler for ${A.type}, map ID: ${this.mapId}, available handlers: ${Object.keys(this.messageHandlers).join(", ")}`));let F=Na(A.data),W=new AbortController;this.abortControllers[R]=W;try{let re=yield this.messageHandlers[A.type](A.sourceMapId,F,W);this.completeTask(R,null,re)}catch(re){this.completeTask(R,re)}})}completeTask(R,A,F){let W=[];delete this.abortControllers[R];let re={id:R,type:"<response>",sourceMapId:this.mapId,origin:location.origin,error:A?La(A):null,data:La(F,W)};this.target.postMessage(re,{transfer:W})}remove(){this.invoker.remove(),this.subscription.unsubscribe()}},i.G=Ce,i.H=function(){var R=new xi(16);return xi!=Float32Array&&(R[1]=0,R[2]=0,R[3]=0,R[4]=0,R[6]=0,R[7]=0,R[8]=0,R[9]=0,R[11]=0,R[12]=0,R[13]=0,R[14]=0),R[0]=1,R[5]=1,R[10]=1,R[15]=1,R},i.I=Lw,i.J=function(R,A,F){var W,re,fe,pe,ze,Ke,ct,Lt,$t,fr,mr,Pr,zr=F[0],ui=F[1],yi=F[2];return A===R?(R[12]=A[0]*zr+A[4]*ui+A[8]*yi+A[12],R[13]=A[1]*zr+A[5]*ui+A[9]*yi+A[13],R[14]=A[2]*zr+A[6]*ui+A[10]*yi+A[14],R[15]=A[3]*zr+A[7]*ui+A[11]*yi+A[15]):(re=A[1],fe=A[2],pe=A[3],ze=A[4],Ke=A[5],ct=A[6],Lt=A[7],$t=A[8],fr=A[9],mr=A[10],Pr=A[11],R[0]=W=A[0],R[1]=re,R[2]=fe,R[3]=pe,R[4]=ze,R[5]=Ke,R[6]=ct,R[7]=Lt,R[8]=$t,R[9]=fr,R[10]=mr,R[11]=Pr,R[12]=W*zr+ze*ui+$t*yi+A[12],R[13]=re*zr+Ke*ui+fr*yi+A[13],R[14]=fe*zr+ct*ui+mr*yi+A[14],R[15]=pe*zr+Lt*ui+Pr*yi+A[15]),R},i.K=function(R,A,F){var W=F[0],re=F[1],fe=F[2];return R[0]=A[0]*W,R[1]=A[1]*W,R[2]=A[2]*W,R[3]=A[3]*W,R[4]=A[4]*re,R[5]=A[5]*re,R[6]=A[6]*re,R[7]=A[7]*re,R[8]=A[8]*fe,R[9]=A[9]*fe,R[10]=A[10]*fe,R[11]=A[11]*fe,R[12]=A[12],R[13]=A[13],R[14]=A[14],R[15]=A[15],R},i.L=ci,i.M=function(R,A){let F={};for(let W=0;W<A.length;W++){let re=A[W];re in R&&(F[re]=R[re])}return F},i.N=dg,i.O=Ow,i.P=u,i.Q=CC,i.R=Ki,i.S=$v,i.T=Yu,i.U=_,i.V=b,i.W=j,i.X=ja,i.Y=qe,i.Z=$x,i._=a,i.a=_e,i.a$=function(R,A){var F=R[0],W=R[1],re=R[2],fe=R[3],pe=R[4],ze=R[5],Ke=R[6],ct=R[7],Lt=R[8],$t=R[9],fr=R[10],mr=R[11],Pr=R[12],zr=R[13],ui=R[14],yi=R[15],vn=A[0],zi=A[1],un=A[2],Tn=A[3],pa=A[4],ro=A[5],Vo=A[6],Xa=A[7],sa=A[8],Mo=A[9],fo=A[10],lo=A[11],Xn=A[12],Ro=A[13],uo=A[14],$o=A[15];return Math.abs(F-vn)<=Yr*Math.max(1,Math.abs(F),Math.abs(vn))&&Math.abs(W-zi)<=Yr*Math.max(1,Math.abs(W),Math.abs(zi))&&Math.abs(re-un)<=Yr*Math.max(1,Math.abs(re),Math.abs(un))&&Math.abs(fe-Tn)<=Yr*Math.max(1,Math.abs(fe),Math.abs(Tn))&&Math.abs(pe-pa)<=Yr*Math.max(1,Math.abs(pe),Math.abs(pa))&&Math.abs(ze-ro)<=Yr*Math.max(1,Math.abs(ze),Math.abs(ro))&&Math.abs(Ke-Vo)<=Yr*Math.max(1,Math.abs(Ke),Math.abs(Vo))&&Math.abs(ct-Xa)<=Yr*Math.max(1,Math.abs(ct),Math.abs(Xa))&&Math.abs(Lt-sa)<=Yr*Math.max(1,Math.abs(Lt),Math.abs(sa))&&Math.abs($t-Mo)<=Yr*Math.max(1,Math.abs($t),Math.abs(Mo))&&Math.abs(fr-fo)<=Yr*Math.max(1,Math.abs(fr),Math.abs(fo))&&Math.abs(mr-lo)<=Yr*Math.max(1,Math.abs(mr),Math.abs(lo))&&Math.abs(Pr-Xn)<=Yr*Math.max(1,Math.abs(Pr),Math.abs(Xn))&&Math.abs(zr-Ro)<=Yr*Math.max(1,Math.abs(zr),Math.abs(Ro))&&Math.abs(ui-uo)<=Yr*Math.max(1,Math.abs(ui),Math.abs(uo))&&Math.abs(yi-$o)<=Yr*Math.max(1,Math.abs(yi),Math.abs($o))},i.a0=Ye,i.a1=pS,i.a2=cr,i.a3=R=>{let A=window.document.createElement("video");return A.muted=!0,new Promise(F=>{A.onloadstart=()=>{F(A)};for(let W of R){let re=window.document.createElement("source");Le(W)||(A.crossOrigin="Anonymous"),re.src=W,A.appendChild(re)}})},i.a4=function(){return x++},i.a5=Cn,i.a6=M1,i.a7=Fc,i.a8=Sl,i.a9=gS,i.aA=function(R){if(R.type==="custom")return new hS(R);switch(R.type){case"background":return new J9(R);case"circle":return new hi(R);case"fill":return new yt(R);case"fill-extrusion":return new Lv(R);case"heatmap":return new oa(R);case"hillshade":return new Zs(R);case"line":return new oy(R);case"raster":return new Kx(R);case"symbol":return new cy(R)}},i.aB=g,i.aC=function(R,A){if(!R)return[{command:"setStyle",args:[A]}];let F=[];try{if(!pt(R.version,A.version))return[{command:"setStyle",args:[A]}];pt(R.center,A.center)||F.push({command:"setCenter",args:[A.center]}),pt(R.zoom,A.zoom)||F.push({command:"setZoom",args:[A.zoom]}),pt(R.bearing,A.bearing)||F.push({command:"setBearing",args:[A.bearing]}),pt(R.pitch,A.pitch)||F.push({command:"setPitch",args:[A.pitch]}),pt(R.sprite,A.sprite)||F.push({command:"setSprite",args:[A.sprite]}),pt(R.glyphs,A.glyphs)||F.push({command:"setGlyphs",args:[A.glyphs]}),pt(R.transition,A.transition)||F.push({command:"setTransition",args:[A.transition]}),pt(R.light,A.light)||F.push({command:"setLight",args:[A.light]}),pt(R.terrain,A.terrain)||F.push({command:"setTerrain",args:[A.terrain]}),pt(R.sky,A.sky)||F.push({command:"setSky",args:[A.sky]}),pt(R.projection,A.projection)||F.push({command:"setProjection",args:[A.projection]});let W={},re=[];(function(pe,ze,Ke,ct){let Lt;for(Lt in ze=ze||{},pe=pe||{})Object.prototype.hasOwnProperty.call(pe,Lt)&&(Object.prototype.hasOwnProperty.call(ze,Lt)||lt(Lt,Ke,ct));for(Lt in ze)Object.prototype.hasOwnProperty.call(ze,Lt)&&(Object.prototype.hasOwnProperty.call(pe,Lt)?pt(pe[Lt],ze[Lt])||(pe[Lt].type==="geojson"&&ze[Lt].type==="geojson"&&Nt(pe,ze,Lt)?Zt(Ke,{command:"setGeoJSONSourceData",args:[Lt,ze[Lt].data]}):Gt(Lt,ze,Ke,ct)):st(Lt,ze,Ke))})(R.sources,A.sources,re,W);let fe=[];R.layers&&R.layers.forEach(pe=>{"source"in pe&&W[pe.source]?F.push({command:"removeLayer",args:[pe.id]}):fe.push(pe)}),F=F.concat(re),function(pe,ze,Ke){ze=ze||[];let ct=(pe=pe||[]).map(sr),Lt=ze.map(sr),$t=pe.reduce(wr,{}),fr=ze.reduce(wr,{}),mr=ct.slice(),Pr=Object.create(null),zr,ui,yi,vn,zi;for(let un=0,Tn=0;un<ct.length;un++)zr=ct[un],Object.prototype.hasOwnProperty.call(fr,zr)?Tn++:(Zt(Ke,{command:"removeLayer",args:[zr]}),mr.splice(mr.indexOf(zr,Tn),1));for(let un=0,Tn=0;un<Lt.length;un++)zr=Lt[Lt.length-1-un],mr[mr.length-1-un]!==zr&&(Object.prototype.hasOwnProperty.call($t,zr)?(Zt(Ke,{command:"removeLayer",args:[zr]}),mr.splice(mr.lastIndexOf(zr,mr.length-Tn),1)):Tn++,vn=mr[mr.length-un],Zt(Ke,{command:"addLayer",args:[fr[zr],vn]}),mr.splice(mr.length-un,0,zr),Pr[zr]=!0);for(let un=0;un<Lt.length;un++)if(zr=Lt[un],ui=$t[zr],yi=fr[zr],!Pr[zr]&&!pt(ui,yi))if(pt(ui.source,yi.source)&&pt(ui["source-layer"],yi["source-layer"])&&pt(ui.type,yi.type)){for(zi in Jt(ui.layout,yi.layout,Ke,zr,null,"setLayoutProperty"),Jt(ui.paint,yi.paint,Ke,zr,null,"setPaintProperty"),pt(ui.filter,yi.filter)||Zt(Ke,{command:"setFilter",args:[zr,yi.filter]}),pt(ui.minzoom,yi.minzoom)&&pt(ui.maxzoom,yi.maxzoom)||Zt(Ke,{command:"setLayerZoomRange",args:[zr,yi.minzoom,yi.maxzoom]}),ui)Object.prototype.hasOwnProperty.call(ui,zi)&&zi!=="layout"&&zi!=="paint"&&zi!=="filter"&&zi!=="metadata"&&zi!=="minzoom"&&zi!=="maxzoom"&&(zi.indexOf("paint.")===0?Jt(ui[zi],yi[zi],Ke,zr,zi.slice(6),"setPaintProperty"):pt(ui[zi],yi[zi])||Zt(Ke,{command:"setLayerProperty",args:[zr,zi,yi[zi]]}));for(zi in yi)Object.prototype.hasOwnProperty.call(yi,zi)&&!Object.prototype.hasOwnProperty.call(ui,zi)&&zi!=="layout"&&zi!=="paint"&&zi!=="filter"&&zi!=="metadata"&&zi!=="minzoom"&&zi!=="maxzoom"&&(zi.indexOf("paint.")===0?Jt(ui[zi],yi[zi],Ke,zr,zi.slice(6),"setPaintProperty"):pt(ui[zi],yi[zi])||Zt(Ke,{command:"setLayerProperty",args:[zr,zi,yi[zi]]}))}else Zt(Ke,{command:"removeLayer",args:[zr]}),vn=mr[mr.lastIndexOf(zr)+1],Zt(Ke,{command:"addLayer",args:[yi,vn]})}(fe,A.layers,F)}catch(W){console.warn("Unable to compute style diff:",W),F=[{command:"setStyle",args:[A]}]}return F},i.aD=function(R){let A=[],F=R.id;return F===void 0&&A.push({message:`layers.${F}: missing required property "id"`}),R.render===void 0&&A.push({message:`layers.${F}: missing required method "render"`}),R.renderingMode&&R.renderingMode!=="2d"&&R.renderingMode!=="3d"&&A.push({message:`layers.${F}: property "renderingMode" must be either "2d" or "3d"`}),A},i.aE=function R(A,F){if(Array.isArray(A)){if(!Array.isArray(F)||A.length!==F.length)return!1;for(let W=0;W<A.length;W++)if(!R(A[W],F[W]))return!1;return!0}if(typeof A=="object"&&A!==null&&F!==null){if(typeof F!="object"||Object.keys(A).length!==Object.keys(F).length)return!1;for(let W in A)if(!R(A[W],F[W]))return!1;return!0}return A===F},i.aF=C,i.aG=M,i.aH=class extends Si{constructor(R,A){super(R,A),this.current=0}set(R){this.current!==R&&(this.current=R,this.gl.uniform1i(this.location,R))}},i.aI=ei,i.aJ=class extends Si{constructor(R,A){super(R,A),this.current=Un}set(R){if(R[12]!==this.current[12]||R[0]!==this.current[0])return this.current=R,void this.gl.uniformMatrix4fv(this.location,!1,R);for(let A=1;A<16;A++)if(R[A]!==this.current[A]){this.current=R,this.gl.uniformMatrix4fv(this.location,!1,R);break}}},i.aK=Ln,i.aL=En,i.aM=nr,i.aN=class extends Si{constructor(R,A){super(R,A),this.current=[0,0,0]}set(R){R[0]===this.current[0]&&R[1]===this.current[1]&&R[2]===this.current[2]||(this.current=R,this.gl.uniform3f(this.location,R[0],R[1],R[2]))}},i.aO=class extends Si{constructor(R,A){super(R,A),this.current=[0,0]}set(R){R[0]===this.current[0]&&R[1]===this.current[1]||(this.current=R,this.gl.uniform2f(this.location,R[0],R[1]))}},i.aP=function(R,A,F,W,re,fe,pe){var ze=1/(A-F),Ke=1/(W-re),ct=1/(fe-pe);return R[0]=-2*ze,R[1]=0,R[2]=0,R[3]=0,R[4]=0,R[5]=-2*Ke,R[6]=0,R[7]=0,R[8]=0,R[9]=0,R[10]=2*ct,R[11]=0,R[12]=(A+F)*ze,R[13]=(re+W)*Ke,R[14]=(pe+fe)*ct,R[15]=1,R},i.aQ=Zi,i.aR=class extends it{},i.aS=nm,i.aT=class extends Ht{},i.aU=ta,i.aV=function(R){return R<=1?1:Math.pow(2,Math.ceil(Math.log(R)/Math.LN2))},i.aW=kn,i.aX=wo,i.aY=se,i.aZ=class extends Pi{},i.a_=function(R,A){return R[0]===A[0]&&R[1]===A[1]&&R[2]===A[2]&&R[3]===A[3]&&R[4]===A[4]&&R[5]===A[5]&&R[6]===A[6]&&R[7]===A[7]&&R[8]===A[8]&&R[9]===A[9]&&R[10]===A[10]&&R[11]===A[11]&&R[12]===A[12]&&R[13]===A[13]&&R[14]===A[14]&&R[15]===A[15]},i.aa=function(R){let A={};if(R.replace(/(?:^|(?:\s*\,\s*))([^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+)(?:\=(?:([^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+)|(?:\"((?:[^"\\]|\\.)*)\")))?/g,(F,W,re,fe)=>{let pe=re||fe;return A[W]=!pe||pe.toLowerCase(),""}),A["max-age"]){let F=parseInt(A["max-age"],10);isNaN(F)?delete A["max-age"]:A["max-age"]=F}return A},i.ab=function(R,A){let F=[];for(let W in R)W in A||F.push(W);return F},i.ac=E,i.ad=function(R,A,F){var W=Math.sin(F),re=Math.cos(F),fe=A[0],pe=A[1],ze=A[2],Ke=A[3],ct=A[4],Lt=A[5],$t=A[6],fr=A[7];return A!==R&&(R[8]=A[8],R[9]=A[9],R[10]=A[10],R[11]=A[11],R[12]=A[12],R[13]=A[13],R[14]=A[14],R[15]=A[15]),R[0]=fe*re+ct*W,R[1]=pe*re+Lt*W,R[2]=ze*re+$t*W,R[3]=Ke*re+fr*W,R[4]=ct*re-fe*W,R[5]=Lt*re-pe*W,R[6]=$t*re-ze*W,R[7]=fr*re-Ke*W,R},i.ae=function(R){var A=new xi(16);return A[0]=R[0],A[1]=R[1],A[2]=R[2],A[3]=R[3],A[4]=R[4],A[5]=R[5],A[6]=R[6],A[7]=R[7],A[8]=R[8],A[9]=R[9],A[10]=R[10],A[11]=R[11],A[12]=R[12],A[13]=R[13],A[14]=R[14],A[15]=R[15],A},i.af=Bn,i.ag=function(R,A){let F=0,W=0;if(R.kind==="constant")W=R.layoutSize;else if(R.kind!=="source"){let{interpolationType:re,minZoom:fe,maxZoom:pe}=R,ze=re?E(ko.interpolationFactor(re,A,fe,pe),0,1):0;R.kind==="camera"?W=Lo.number(R.minSize,R.maxSize,ze):F=ze}return{uSizeT:F,uSize:W}},i.ai=function(R,{uSize:A,uSizeT:F},{lowerSize:W,upperSize:re}){return R.kind==="source"?W/d0:R.kind==="composite"?Lo.number(W/d0,re/d0,F):A},i.aj=lS,i.ak=function(R,A,F,W){let re=A.y-R.y,fe=A.x-R.x,pe=W.y-F.y,ze=W.x-F.x,Ke=pe*fe-ze*re;if(Ke===0)return null;let ct=(ze*(R.y-F.y)-pe*(R.x-F.x))/Ke;return new u(R.x+ct*fe,R.y+ct*re)},i.al=FC,i.am=Sc,i.an=Ri,i.ao=function(R){let A=1/0,F=1/0,W=-1/0,re=-1/0;for(let fe of R)A=Math.min(A,fe.x),F=Math.min(F,fe.y),W=Math.max(W,fe.x),re=Math.max(re,fe.y);return[A,F,W,re]},i.ap=eu,i.ar=sS,i.as=function(R,A){var F=A[0],W=A[1],re=A[2],fe=A[3],pe=A[4],ze=A[5],Ke=A[6],ct=A[7],Lt=A[8],$t=A[9],fr=A[10],mr=A[11],Pr=A[12],zr=A[13],ui=A[14],yi=A[15],vn=F*ze-W*pe,zi=F*Ke-re*pe,un=F*ct-fe*pe,Tn=W*Ke-re*ze,pa=W*ct-fe*ze,ro=re*ct-fe*Ke,Vo=Lt*zr-$t*Pr,Xa=Lt*ui-fr*Pr,sa=Lt*yi-mr*Pr,Mo=$t*ui-fr*zr,fo=$t*yi-mr*zr,lo=fr*yi-mr*ui,Xn=vn*lo-zi*fo+un*Mo+Tn*sa-pa*Xa+ro*Vo;return Xn?(R[0]=(ze*lo-Ke*fo+ct*Mo)*(Xn=1/Xn),R[1]=(re*fo-W*lo-fe*Mo)*Xn,R[2]=(zr*ro-ui*pa+yi*Tn)*Xn,R[3]=(fr*pa-$t*ro-mr*Tn)*Xn,R[4]=(Ke*sa-pe*lo-ct*Xa)*Xn,R[5]=(F*lo-re*sa+fe*Xa)*Xn,R[6]=(ui*un-Pr*ro-yi*zi)*Xn,R[7]=(Lt*ro-fr*un+mr*zi)*Xn,R[8]=(pe*fo-ze*sa+ct*Vo)*Xn,R[9]=(W*sa-F*fo-fe*Vo)*Xn,R[10]=(Pr*pa-zr*un+yi*vn)*Xn,R[11]=($t*un-Lt*pa-mr*vn)*Xn,R[12]=(ze*Xa-pe*Mo-Ke*Vo)*Xn,R[13]=(F*Mo-W*Xa+re*Vo)*Xn,R[14]=(zr*zi-Pr*Tn-ui*vn)*Xn,R[15]=(Lt*Tn-$t*zi+fr*vn)*Xn,R):null},i.at=_S,i.au=Dw,i.av=xS,i.aw=function(){let R={},A=ce.$version;for(let F in ce.$root){let W=ce.$root[F];if(W.required){let re=null;re=F==="version"?A:W.type==="array"?[]:{},re!=null&&(R[F]=re)}}return R},i.ax=Yn,i.ay=ie,i.az=function(R){R=R.slice();let A=Object.create(null);for(let F=0;F<R.length;F++)A[R[F].id]=R[F];for(let F=0;F<R.length;F++)"ref"in R[F]&&(R[F]=ut(R[F],A[R[F].ref]));return R},i.b=G,i.b0=function(R,A){return R[0]=A[0],R[1]=A[1],R[2]=A[2],R[3]=A[3],R[4]=A[4],R[5]=A[5],R[6]=A[6],R[7]=A[7],R[8]=A[8],R[9]=A[9],R[10]=A[10],R[11]=A[11],R[12]=A[12],R[13]=A[13],R[14]=A[14],R[15]=A[15],R},i.b1=function(R,A,F){return R[0]=A[0]*F[0],R[1]=A[1]*F[1],R[2]=A[2]*F[2],R[3]=A[3]*F[3],R},i.b2=function(R,A){return R[0]*A[0]+R[1]*A[1]+R[2]*A[2]+R[3]*A[3]},i.b3=S,i.b4=LC,i.b5=qw,i.b6=function(R,A,F,W,re){var fe,pe=1/Math.tan(A/2);return R[0]=pe/F,R[1]=0,R[2]=0,R[3]=0,R[4]=0,R[5]=pe,R[6]=0,R[7]=0,R[8]=0,R[9]=0,R[11]=-1,R[12]=0,R[13]=0,R[15]=0,re!=null&&re!==1/0?(R[10]=(re+W)*(fe=1/(W-re)),R[14]=2*re*W*fe):(R[10]=-1,R[14]=-2*W),R},i.b7=function(R,A,F){var W=Math.sin(F),re=Math.cos(F),fe=A[4],pe=A[5],ze=A[6],Ke=A[7],ct=A[8],Lt=A[9],$t=A[10],fr=A[11];return A!==R&&(R[0]=A[0],R[1]=A[1],R[2]=A[2],R[3]=A[3],R[12]=A[12],R[13]=A[13],R[14]=A[14],R[15]=A[15]),R[4]=fe*re+ct*W,R[5]=pe*re+Lt*W,R[6]=ze*re+$t*W,R[7]=Ke*re+fr*W,R[8]=ct*re-fe*W,R[9]=Lt*re-pe*W,R[10]=$t*re-ze*W,R[11]=fr*re-Ke*W,R},i.b8=p,i.b9=k,i.bA=Sd,i.bB=function(R){return R.message===te},i.bC=ks,i.bD=ys,i.ba=function(R){return R*Math.PI/180},i.bb=function(R,A){let{x:F,y:W}=$x.fromLngLat(A);return!(R<0||R>25||W<0||W>=1||F<0||F>=1)},i.bc=function(R,A){return R[0]=A[0],R[1]=0,R[2]=0,R[3]=0,R[4]=0,R[5]=A[1],R[6]=0,R[7]=0,R[8]=0,R[9]=0,R[10]=A[2],R[11]=0,R[12]=0,R[13]=0,R[14]=0,R[15]=1,R},i.bd=class extends Tt{},i.be=vS,i.bf=aO,i.bh=me,i.bi=function(R,A){_e.REGISTERED_PROTOCOLS[R]=A},i.bj=function(R){delete _e.REGISTERED_PROTOCOLS[R]},i.bk=function(R,A){let F={};for(let re=0;re<R.length;re++){let fe=A&&A[R[re].id]||Hh(R[re]);A&&(A[R[re].id]=fe);let pe=F[fe];pe||(pe=F[fe]=[]),pe.push(R[re])}let W=[];for(let re in F)W.push(F[re]);return W},i.bl=Fi,i.bm=IC,i.bn=fy,i.bo=Pw,i.bp=function(R){R.bucket.createArrays(),R.bucket.tilePixelRatio=ja/(512*R.bucket.overscaling),R.bucket.compareText={},R.bucket.iconsNeedLinear=!1;let A=R.bucket.layers[0],F=A.layout,W=A._unevaluatedLayout._values,re={layoutIconSize:W["icon-size"].possiblyEvaluate(new rs(R.bucket.zoom+1),R.canonical),layoutTextSize:W["text-size"].possiblyEvaluate(new rs(R.bucket.zoom+1),R.canonical),textMaxSize:W["text-size"].possiblyEvaluate(new rs(18))};if(R.bucket.textSizeData.kind==="composite"){let{minZoom:ct,maxZoom:Lt}=R.bucket.textSizeData;re.compositeTextSizes=[W["text-size"].possiblyEvaluate(new rs(ct),R.canonical),W["text-size"].possiblyEvaluate(new rs(Lt),R.canonical)]}if(R.bucket.iconSizeData.kind==="composite"){let{minZoom:ct,maxZoom:Lt}=R.bucket.iconSizeData;re.compositeIconSizes=[W["icon-size"].possiblyEvaluate(new rs(ct),R.canonical),W["icon-size"].possiblyEvaluate(new rs(Lt),R.canonical)]}let fe=F.get("text-line-height")*eu,pe=F.get("text-rotation-alignment")!=="viewport"&&F.get("symbol-placement")!=="point",ze=F.get("text-keep-upright"),Ke=F.get("text-size");for(let ct of R.bucket.features){let Lt=F.get("text-font").evaluate(ct,{},R.canonical).join(","),$t=Ke.evaluate(ct,{},R.canonical),fr=re.layoutTextSize.evaluate(ct,{},R.canonical),mr=re.layoutIconSize.evaluate(ct,{},R.canonical),Pr={horizontal:{},vertical:void 0},zr=ct.text,ui,yi=[0,0];if(zr){let un=zr.toString(),Tn=F.get("text-letter-spacing").evaluate(ct,{},R.canonical)*eu,pa=bo(un)?Tn:0,ro=F.get("text-anchor").evaluate(ct,{},R.canonical),Vo=VC(A,ct,R.canonical);if(!Vo){let fo=F.get("text-radial-offset").evaluate(ct,{},R.canonical);yi=fo?yS(ro,[fo*eu,hy]):F.get("text-offset").evaluate(ct,{},R.canonical).map(lo=>lo*eu)}let Xa=pe?"center":F.get("text-justify").evaluate(ct,{},R.canonical),sa=F.get("symbol-placement")==="point"?F.get("text-max-width").evaluate(ct,{},R.canonical)*eu:1/0,Mo=()=>{R.bucket.allowVerticalPlacement&&Ka(un)&&(Pr.vertical=Hx(zr,R.glyphMap,R.glyphPositions,R.imagePositions,Lt,sa,fe,ro,"left",pa,yi,i.ah.vertical,!0,fr,$t))};if(!pe&&Vo){let fo=new Set;if(Xa==="auto")for(let Xn=0;Xn<Vo.values.length;Xn+=2)fo.add(_S(Vo.values[Xn]));else fo.add(Xa);let lo=!1;for(let Xn of fo)if(!Pr.horizontal[Xn])if(lo)Pr.horizontal[Xn]=Pr.horizontal[0];else{let Ro=Hx(zr,R.glyphMap,R.glyphPositions,R.imagePositions,Lt,sa,fe,"center",Xn,pa,yi,i.ah.horizontal,!1,fr,$t);Ro&&(Pr.horizontal[Xn]=Ro,lo=Ro.positionedLines.length===1)}Mo()}else{Xa==="auto"&&(Xa=_S(ro));let fo=Hx(zr,R.glyphMap,R.glyphPositions,R.imagePositions,Lt,sa,fe,ro,Xa,pa,yi,i.ah.horizontal,!1,fr,$t);fo&&(Pr.horizontal[Xa]=fo),Mo(),Ka(un)&&pe&&ze&&(Pr.vertical=Hx(zr,R.glyphMap,R.glyphPositions,R.imagePositions,Lt,sa,fe,ro,Xa,pa,yi,i.ah.vertical,!1,fr,$t))}}let vn=!1;if(ct.icon&&ct.icon.name){let un=R.imageMap[ct.icon.name];un&&(ui=jx(R.imagePositions[ct.icon.name],F.get("icon-offset").evaluate(ct,{},R.canonical),F.get("icon-anchor").evaluate(ct,{},R.canonical)),vn=!!un.sdf,R.bucket.sdfIcons===void 0?R.bucket.sdfIcons=vn:R.bucket.sdfIcons!==vn&&T("Style sheet warning: Cannot mix SDF and non-SDF icons in one buffer"),(un.pixelRatio!==R.bucket.pixelRatio||F.get("icon-rotate").constantOr(1)!==0)&&(R.bucket.iconsNeedLinear=!0))}let zi=HC(Pr.horizontal)||Pr.vertical;R.bucket.iconsInText=!!zi&&zi.iconsInText,(zi||ui)&&iO(R.bucket,ct,Pr,ui,R.imageMap,re,fr,mr,yi,vn,R.canonical)}R.showCollisionBoxes&&R.bucket.generateCollisionDebugBuffers()},i.bq=Jv,i.br=Ot,i.bs=so,i.bt=br,i.bu=nS,i.bv=class{constructor(R){this._marks={start:[R.url,"start"].join("#"),end:[R.url,"end"].join("#"),measure:R.url.toString()},performance.mark(this._marks.start)}finish(){performance.mark(this._marks.end);let R=performance.getEntriesByName(this._marks.measure);return R.length===0&&(performance.measure(this._marks.measure,this._marks.start,this._marks.end),R=performance.getEntriesByName(this._marks.measure),performance.clearMarks(this._marks.start),performance.clearMarks(this._marks.end),performance.clearMeasures(this._marks.measure)),R}},i.bw=function(R,A,F,W,re){return a(this,void 0,void 0,function*(){if(b())try{return yield j(R,A,F,W,re)}catch(fe){}return function(fe,pe,ze,Ke,ct){let Lt=fe.width,$t=fe.height;N&&H||(N=new OffscreenCanvas(Lt,$t),H=N.getContext("2d",{willReadFrequently:!0})),N.width=Lt,N.height=$t,H.drawImage(fe,0,0,Lt,$t);let fr=H.getImageData(pe,ze,Ke,ct);return H.clearRect(0,0,Lt,$t),fr.data}(R,A,F,W,re)})},i.bx=PC,i.by=o,i.bz=s,i.c=oe,i.d=R=>a(void 0,void 0,void 0,function*(){if(R.byteLength===0)return createImageBitmap(new ImageData(1,1));let A=new Blob([new Uint8Array(R)],{type:"image/png"});try{return createImageBitmap(A)}catch(F){throw new Error(`Could not load image because of ${F.message}. Please make sure to use a supported image type such as PNG or JPEG. Note that SVGs are not supported.`)}}),i.e=L,i.f=R=>new Promise((A,F)=>{let W=new Image;W.onload=()=>{A(W),URL.revokeObjectURL(W.src),W.onload=null,window.requestAnimationFrame(()=>{W.src=Z})},W.onerror=()=>F(new Error("Could not load image. Please make sure to use a supported image type such as PNG or JPEG. Note that SVGs are not supported."));let re=new Blob([new Uint8Array(R)],{type:"image/png"});W.src=R.byteLength?URL.createObjectURL(re):Z}),i.g=Ee,i.h=(R,A)=>Se(L(R,{type:"json"}),A),i.i=O,i.j=ge,i.k=Pe,i.l=(R,A)=>Se(L(R,{type:"arrayBuffer"}),A),i.m=Se,i.n=function(R){return new nS(R).readFields(PQ,[])},i.o=na,i.p=oS,i.q=ue,i.r=Xi,i.s=Le,i.t=Yi,i.u=ki,i.v=ce,i.w=T,i.x=function([R,A,F]){return A+=90,A*=Math.PI/180,F*=Math.PI/180,{x:R*Math.cos(A)*Math.sin(F),y:R*Math.sin(A)*Math.sin(F),z:R*Math.cos(F)}},i.y=Lo,i.z=rs}),r("worker",["./shared"],function(i){"use strict";class a{constructor(Ge){this.keyCache={},Ge&&this.replace(Ge)}replace(Ge){this._layerConfigs={},this._layers={},this.update(Ge,[])}update(Ge,Je){for(let tt of Ge){this._layerConfigs[tt.id]=tt;let xt=this._layers[tt.id]=i.aA(tt);xt._featureFilter=i.a7(xt.filter),this.keyCache[tt.id]&&delete this.keyCache[tt.id]}for(let tt of Je)delete this.keyCache[tt],delete this._layerConfigs[tt],delete this._layers[tt];this.familiesBySource={};let je=i.bk(Object.values(this._layerConfigs),this.keyCache);for(let tt of je){let xt=tt.map(ar=>this._layers[ar.id]),Ie=xt[0];if(Ie.visibility==="none")continue;let xe=Ie.source||"",ke=this.familiesBySource[xe];ke||(ke=this.familiesBySource[xe]={});let vt=Ie.sourceLayer||"_geojsonTileLayer",ir=ke[vt];ir||(ir=ke[vt]=[]),ir.push(xt)}}}class o{constructor(Ge){let Je={},je=[];for(let xe in Ge){let ke=Ge[xe],vt=Je[xe]={};for(let ir in ke){let ar=ke[+ir];if(!ar||ar.bitmap.width===0||ar.bitmap.height===0)continue;let vr={x:0,y:0,w:ar.bitmap.width+2,h:ar.bitmap.height+2};je.push(vr),vt[ir]={rect:vr,metrics:ar.metrics}}}let{w:tt,h:xt}=i.p(je),Ie=new i.o({width:tt||1,height:xt||1});for(let xe in Ge){let ke=Ge[xe];for(let vt in ke){let ir=ke[+vt];if(!ir||ir.bitmap.width===0||ir.bitmap.height===0)continue;let ar=Je[xe][vt].rect;i.o.copy(ir.bitmap,Ie,{x:0,y:0},{x:ar.x+1,y:ar.y+1},ir.bitmap)}}this.image=Ie,this.positions=Je}}i.bl("GlyphAtlas",o);class s{constructor(Ge){this.tileID=new i.S(Ge.tileID.overscaledZ,Ge.tileID.wrap,Ge.tileID.canonical.z,Ge.tileID.canonical.x,Ge.tileID.canonical.y),this.uid=Ge.uid,this.zoom=Ge.zoom,this.pixelRatio=Ge.pixelRatio,this.tileSize=Ge.tileSize,this.source=Ge.source,this.overscaling=this.tileID.overscaleFactor(),this.showCollisionBoxes=Ge.showCollisionBoxes,this.collectResourceTiming=!!Ge.collectResourceTiming,this.returnDependencies=!!Ge.returnDependencies,this.promoteId=Ge.promoteId,this.inFlightDependencies=[]}parse(Ge,Je,je,tt){return i._(this,void 0,void 0,function*(){this.status="parsing",this.data=Ge,this.collisionBoxArray=new i.a5;let xt=new i.bm(Object.keys(Ge.layers).sort()),Ie=new i.bn(this.tileID,this.promoteId);Ie.bucketLayerIDs=[];let xe={},ke={featureIndex:Ie,iconDependencies:{},patternDependencies:{},glyphDependencies:{},availableImages:je},vt=Je.familiesBySource[this.source];for(let qn in vt){let Fn=Ge.layers[qn];if(!Fn)continue;Fn.version===1&&i.w(`Vector tile source "${this.source}" layer "${qn}" does not use vector tile spec v2 and therefore may have some rendering errors.`);let ra=xt.encode(qn),la=[];for(let Ut=0;Ut<Fn.length;Ut++){let wt=Fn.feature(Ut),rr=Ie.getId(wt,qn);la.push({feature:wt,id:rr,index:Ut,sourceLayerIndex:ra})}for(let Ut of vt[qn]){let wt=Ut[0];wt.source!==this.source&&i.w(`layer.source = ${wt.source} does not equal this.source = ${this.source}`),wt.minzoom&&this.zoom<Math.floor(wt.minzoom)||wt.maxzoom&&this.zoom>=wt.maxzoom||wt.visibility!=="none"&&(l(Ut,this.zoom,je),(xe[wt.id]=wt.createBucket({index:Ie.bucketLayerIDs.length,layers:Ut,zoom:this.zoom,pixelRatio:this.pixelRatio,overscaling:this.overscaling,collisionBoxArray:this.collisionBoxArray,sourceLayerIndex:ra,sourceID:this.source})).populate(la,ke,this.tileID.canonical),Ie.bucketLayerIDs.push(Ut.map(rr=>rr.id)))}}let ir=i.aF(ke.glyphDependencies,qn=>Object.keys(qn).map(Number));this.inFlightDependencies.forEach(qn=>qn==null?void 0:qn.abort()),this.inFlightDependencies=[];let ar=Promise.resolve({});if(Object.keys(ir).length){let qn=new AbortController;this.inFlightDependencies.push(qn),ar=tt.sendAsync({type:"GG",data:{stacks:ir,source:this.source,tileID:this.tileID,type:"glyphs"}},qn)}let vr=Object.keys(ke.iconDependencies),ii=Promise.resolve({});if(vr.length){let qn=new AbortController;this.inFlightDependencies.push(qn),ii=tt.sendAsync({type:"GI",data:{icons:vr,source:this.source,tileID:this.tileID,type:"icons"}},qn)}let pi=Object.keys(ke.patternDependencies),$r=Promise.resolve({});if(pi.length){let qn=new AbortController;this.inFlightDependencies.push(qn),$r=tt.sendAsync({type:"GI",data:{icons:pi,source:this.source,tileID:this.tileID,type:"patterns"}},qn)}let[di,ji,In]=yield Promise.all([ar,ii,$r]),wi=new o(di),On=new i.bo(ji,In);for(let qn in xe){let Fn=xe[qn];Fn instanceof i.a6?(l(Fn.layers,this.zoom,je),i.bp({bucket:Fn,glyphMap:di,glyphPositions:wi.positions,imageMap:ji,imagePositions:On.iconPositions,showCollisionBoxes:this.showCollisionBoxes,canonical:this.tileID.canonical})):Fn.hasPattern&&(Fn instanceof i.bq||Fn instanceof i.br||Fn instanceof i.bs)&&(l(Fn.layers,this.zoom,je),Fn.addFeatures(ke,this.tileID.canonical,On.patternPositions))}return this.status="done",{buckets:Object.values(xe).filter(qn=>!qn.isEmpty()),featureIndex:Ie,collisionBoxArray:this.collisionBoxArray,glyphAtlasImage:wi.image,imageAtlas:On,glyphMap:this.returnDependencies?di:null,iconMap:this.returnDependencies?ji:null,glyphPositions:this.returnDependencies?wi.positions:null}})}}function l(dt,Ge,Je){let je=new i.z(Ge);for(let tt of dt)tt.recalculate(je,Je)}class u{constructor(Ge,Je,je){this.actor=Ge,this.layerIndex=Je,this.availableImages=je,this.fetching={},this.loading={},this.loaded={}}loadVectorTile(Ge,Je){return i._(this,void 0,void 0,function*(){let je=yield i.l(Ge.request,Je);try{return{vectorTile:new i.bt.VectorTile(new i.bu(je.data)),rawData:je.data,cacheControl:je.cacheControl,expires:je.expires}}catch(tt){let xt=new Uint8Array(je.data),Ie=`Unable to parse the tile at ${Ge.request.url}, `;throw Ie+=xt[0]===31&&xt[1]===139?"please make sure the data is not gzipped and that you have configured the relevant header in the server":`got error: ${tt.message}`,new Error(Ie)}})}loadTile(Ge){return i._(this,void 0,void 0,function*(){let Je=Ge.uid,je=!!(Ge&&Ge.request&&Ge.request.collectResourceTiming)&&new i.bv(Ge.request),tt=new s(Ge);this.loading[Je]=tt;let xt=new AbortController;tt.abort=xt;try{let Ie=yield this.loadVectorTile(Ge,xt);if(delete this.loading[Je],!Ie)return null;let xe=Ie.rawData,ke={};Ie.expires&&(ke.expires=Ie.expires),Ie.cacheControl&&(ke.cacheControl=Ie.cacheControl);let vt={};if(je){let ar=je.finish();ar&&(vt.resourceTiming=JSON.parse(JSON.stringify(ar)))}tt.vectorTile=Ie.vectorTile;let ir=tt.parse(Ie.vectorTile,this.layerIndex,this.availableImages,this.actor);this.loaded[Je]=tt,this.fetching[Je]={rawTileData:xe,cacheControl:ke,resourceTiming:vt};try{let ar=yield ir;return i.e({rawTileData:xe.slice(0)},ar,ke,vt)}finally{delete this.fetching[Je]}}catch(Ie){throw delete this.loading[Je],tt.status="done",this.loaded[Je]=tt,Ie}})}reloadTile(Ge){return i._(this,void 0,void 0,function*(){let Je=Ge.uid;if(!this.loaded||!this.loaded[Je])throw new Error("Should not be trying to reload a tile that was never loaded or has been removed");let je=this.loaded[Je];if(je.showCollisionBoxes=Ge.showCollisionBoxes,je.status==="parsing"){let tt=yield je.parse(je.vectorTile,this.layerIndex,this.availableImages,this.actor),xt;if(this.fetching[Je]){let{rawTileData:Ie,cacheControl:xe,resourceTiming:ke}=this.fetching[Je];delete this.fetching[Je],xt=i.e({rawTileData:Ie.slice(0)},tt,xe,ke)}else xt=tt;return xt}if(je.status==="done"&&je.vectorTile)return je.parse(je.vectorTile,this.layerIndex,this.availableImages,this.actor)})}abortTile(Ge){return i._(this,void 0,void 0,function*(){let Je=this.loading,je=Ge.uid;Je&&Je[je]&&Je[je].abort&&(Je[je].abort.abort(),delete Je[je])})}removeTile(Ge){return i._(this,void 0,void 0,function*(){this.loaded&&this.loaded[Ge.uid]&&delete this.loaded[Ge.uid]})}}class c{constructor(){this.loaded={}}loadTile(Ge){return i._(this,void 0,void 0,function*(){let{uid:Je,encoding:je,rawImageData:tt,redFactor:xt,greenFactor:Ie,blueFactor:xe,baseShift:ke}=Ge,vt=tt.width+2,ir=tt.height+2,ar=i.b(tt)?new i.R({width:vt,height:ir},yield i.bw(tt,-1,-1,vt,ir)):tt,vr=new i.bx(Je,ar,je,xt,Ie,xe,ke);return this.loaded=this.loaded||{},this.loaded[Je]=vr,vr})}removeTile(Ge){let Je=this.loaded,je=Ge.uid;Je&&Je[je]&&delete Je[je]}}function f(dt,Ge){if(dt.length!==0){h(dt[0],Ge);for(var Je=1;Je<dt.length;Je++)h(dt[Je],!Ge)}}function h(dt,Ge){for(var Je=0,je=0,tt=0,xt=dt.length,Ie=xt-1;tt<xt;Ie=tt++){var xe=(dt[tt][0]-dt[Ie][0])*(dt[Ie][1]+dt[tt][1]),ke=Je+xe;je+=Math.abs(Je)>=Math.abs(xe)?Je-ke+xe:xe-ke+Je,Je=ke}Je+je>=0!=!!Ge&&dt.reverse()}var d=i.by(function dt(Ge,Je){var je,tt=Ge&&Ge.type;if(tt==="FeatureCollection")for(je=0;je<Ge.features.length;je++)dt(Ge.features[je],Je);else if(tt==="GeometryCollection")for(je=0;je<Ge.geometries.length;je++)dt(Ge.geometries[je],Je);else if(tt==="Feature")dt(Ge.geometry,Je);else if(tt==="Polygon")f(Ge.coordinates,Je);else if(tt==="MultiPolygon")for(je=0;je<Ge.coordinates.length;je++)f(Ge.coordinates[je],Je);return Ge});let v=i.bt.VectorTileFeature.prototype.toGeoJSON;var _={exports:{}},b=i.bz,p=i.bt.VectorTileFeature,k=E;function E(dt,Ge){this.options=Ge||{},this.features=dt,this.length=dt.length}function S(dt,Ge){this.id=typeof dt.id=="number"?dt.id:void 0,this.type=dt.type,this.rawGeometry=dt.type===1?[dt.geometry]:dt.geometry,this.properties=dt.tags,this.extent=Ge||4096}E.prototype.feature=function(dt){return new S(this.features[dt],this.options.extent)},S.prototype.loadGeometry=function(){var dt=this.rawGeometry;this.geometry=[];for(var Ge=0;Ge<dt.length;Ge++){for(var Je=dt[Ge],je=[],tt=0;tt<Je.length;tt++)je.push(new b(Je[tt][0],Je[tt][1]));this.geometry.push(je)}return this.geometry},S.prototype.bbox=function(){this.geometry||this.loadGeometry();for(var dt=this.geometry,Ge=1/0,Je=-1/0,je=1/0,tt=-1/0,xt=0;xt<dt.length;xt++)for(var Ie=dt[xt],xe=0;xe<Ie.length;xe++){var ke=Ie[xe];Ge=Math.min(Ge,ke.x),Je=Math.max(Je,ke.x),je=Math.min(je,ke.y),tt=Math.max(tt,ke.y)}return[Ge,je,Je,tt]},S.prototype.toGeoJSON=p.prototype.toGeoJSON;var L=i.bA,x=k;function C(dt){var Ge=new L;return function(Je,je){for(var tt in Je.layers)je.writeMessage(3,M,Je.layers[tt])}(dt,Ge),Ge.finish()}function M(dt,Ge){var Je;Ge.writeVarintField(15,dt.version||1),Ge.writeStringField(1,dt.name||""),Ge.writeVarintField(5,dt.extent||4096);var je={keys:[],values:[],keycache:{},valuecache:{}};for(Je=0;Je<dt.length;Je++)je.feature=dt.feature(Je),Ge.writeMessage(2,g,je);var tt=je.keys;for(Je=0;Je<tt.length;Je++)Ge.writeStringField(3,tt[Je]);var xt=je.values;for(Je=0;Je<xt.length;Je++)Ge.writeMessage(4,V,xt[Je])}function g(dt,Ge){var Je=dt.feature;Je.id!==void 0&&Ge.writeVarintField(1,Je.id),Ge.writeMessage(2,P,dt),Ge.writeVarintField(3,Je.type),Ge.writeMessage(4,O,Je)}function P(dt,Ge){var Je=dt.feature,je=dt.keys,tt=dt.values,xt=dt.keycache,Ie=dt.valuecache;for(var xe in Je.properties){var ke=Je.properties[xe],vt=xt[xe];if(ke!==null){vt===void 0&&(je.push(xe),xt[xe]=vt=je.length-1),Ge.writeVarint(vt);var ir=typeof ke;ir!=="string"&&ir!=="boolean"&&ir!=="number"&&(ke=JSON.stringify(ke));var ar=ir+":"+ke,vr=Ie[ar];vr===void 0&&(tt.push(ke),Ie[ar]=vr=tt.length-1),Ge.writeVarint(vr)}}}function T(dt,Ge){return(Ge<<3)+(7&dt)}function z(dt){return dt<<1^dt>>31}function O(dt,Ge){for(var Je=dt.loadGeometry(),je=dt.type,tt=0,xt=0,Ie=Je.length,xe=0;xe<Ie;xe++){var ke=Je[xe],vt=1;je===1&&(vt=ke.length),Ge.writeVarint(T(1,vt));for(var ir=je===3?ke.length-1:ke.length,ar=0;ar<ir;ar++){ar===1&&je!==1&&Ge.writeVarint(T(2,ir-1));var vr=ke[ar].x-tt,ii=ke[ar].y-xt;Ge.writeVarint(z(vr)),Ge.writeVarint(z(ii)),tt+=vr,xt+=ii}je===3&&Ge.writeVarint(T(7,1))}}function V(dt,Ge){var Je=typeof dt;Je==="string"?Ge.writeStringField(1,dt):Je==="boolean"?Ge.writeBooleanField(7,dt):Je==="number"&&(dt%1!=0?Ge.writeDoubleField(3,dt):dt<0?Ge.writeSVarintField(6,dt):Ge.writeVarintField(5,dt))}_.exports=C,_.exports.fromVectorTileJs=C,_.exports.fromGeojsonVt=function(dt,Ge){Ge=Ge||{};var Je={};for(var je in dt)Je[je]=new x(dt[je].features,Ge),Je[je].name=je,Je[je].version=Ge.version,Je[je].extent=Ge.extent;return C({layers:Je})},_.exports.GeoJSONWrapper=x;var G=i.by(_.exports);let Z={minZoom:0,maxZoom:16,minPoints:2,radius:40,extent:512,nodeSize:64,log:!1,generateId:!1,reduce:null,map:dt=>dt},j=Math.fround||(N=new Float32Array(1),dt=>(N[0]=+dt,N[0]));var N;let H=3,te=5,oe=6;class _e{constructor(Ge){this.options=Object.assign(Object.create(Z),Ge),this.trees=new Array(this.options.maxZoom+1),this.stride=this.options.reduce?7:6,this.clusterProps=[]}load(Ge){let{log:Je,minZoom:je,maxZoom:tt}=this.options;Je&&console.time("total time");let xt=`prepare ${Ge.length} points`;Je&&console.time(xt),this.points=Ge;let Ie=[];for(let ke=0;ke<Ge.length;ke++){let vt=Ge[ke];if(!vt.geometry)continue;let[ir,ar]=vt.geometry.coordinates,vr=j(me(ir)),ii=j(ie(ar));Ie.push(vr,ii,1/0,ke,-1,1),this.options.reduce&&Ie.push(0)}let xe=this.trees[tt+1]=this._createTree(Ie);Je&&console.timeEnd(xt);for(let ke=tt;ke>=je;ke--){let vt=+Date.now();xe=this.trees[ke]=this._createTree(this._cluster(xe,ke)),Je&&console.log("z%d: %d clusters in %dms",ke,xe.numItems,+Date.now()-vt)}return Je&&console.timeEnd("total time"),this}getClusters(Ge,Je){let je=((Ge[0]+180)%360+360)%360-180,tt=Math.max(-90,Math.min(90,Ge[1])),xt=Ge[2]===180?180:((Ge[2]+180)%360+360)%360-180,Ie=Math.max(-90,Math.min(90,Ge[3]));if(Ge[2]-Ge[0]>=360)je=-180,xt=180;else if(je>xt){let ar=this.getClusters([je,tt,180,Ie],Je),vr=this.getClusters([-180,tt,xt,Ie],Je);return ar.concat(vr)}let xe=this.trees[this._limitZoom(Je)],ke=xe.range(me(je),ie(Ie),me(xt),ie(tt)),vt=xe.data,ir=[];for(let ar of ke){let vr=this.stride*ar;ir.push(vt[vr+te]>1?Ee(vt,vr,this.clusterProps):this.points[vt[vr+H]])}return ir}getChildren(Ge){let Je=this._getOriginId(Ge),je=this._getOriginZoom(Ge),tt="No cluster with the specified id.",xt=this.trees[je];if(!xt)throw new Error(tt);let Ie=xt.data;if(Je*this.stride>=Ie.length)throw new Error(tt);let xe=this.options.radius/(this.options.extent*Math.pow(2,je-1)),ke=xt.within(Ie[Je*this.stride],Ie[Je*this.stride+1],xe),vt=[];for(let ir of ke){let ar=ir*this.stride;Ie[ar+4]===Ge&&vt.push(Ie[ar+te]>1?Ee(Ie,ar,this.clusterProps):this.points[Ie[ar+H]])}if(vt.length===0)throw new Error(tt);return vt}getLeaves(Ge,Je,je){let tt=[];return this._appendLeaves(tt,Ge,Je=Je||10,je=je||0,0),tt}getTile(Ge,Je,je){let tt=this.trees[this._limitZoom(Ge)],xt=Math.pow(2,Ge),{extent:Ie,radius:xe}=this.options,ke=xe/Ie,vt=(je-ke)/xt,ir=(je+1+ke)/xt,ar={features:[]};return this._addTileFeatures(tt.range((Je-ke)/xt,vt,(Je+1+ke)/xt,ir),tt.data,Je,je,xt,ar),Je===0&&this._addTileFeatures(tt.range(1-ke/xt,vt,1,ir),tt.data,xt,je,xt,ar),Je===xt-1&&this._addTileFeatures(tt.range(0,vt,ke/xt,ir),tt.data,-1,je,xt,ar),ar.features.length?ar:null}getClusterExpansionZoom(Ge){let Je=this._getOriginZoom(Ge)-1;for(;Je<=this.options.maxZoom;){let je=this.getChildren(Ge);if(Je++,je.length!==1)break;Ge=je[0].properties.cluster_id}return Je}_appendLeaves(Ge,Je,je,tt,xt){let Ie=this.getChildren(Je);for(let xe of Ie){let ke=xe.properties;if(ke&&ke.cluster?xt+ke.point_count<=tt?xt+=ke.point_count:xt=this._appendLeaves(Ge,ke.cluster_id,je,tt,xt):xt<tt?xt++:Ge.push(xe),Ge.length===je)break}return xt}_createTree(Ge){let Je=new i.av(Ge.length/this.stride|0,this.options.nodeSize,Float32Array);for(let je=0;je<Ge.length;je+=this.stride)Je.add(Ge[je],Ge[je+1]);return Je.finish(),Je.data=Ge,Je}_addTileFeatures(Ge,Je,je,tt,xt,Ie){for(let xe of Ge){let ke=xe*this.stride,vt=Je[ke+te]>1,ir,ar,vr;if(vt)ir=Ce(Je,ke,this.clusterProps),ar=Je[ke],vr=Je[ke+1];else{let $r=this.points[Je[ke+H]];ir=$r.properties;let[di,ji]=$r.geometry.coordinates;ar=me(di),vr=ie(ji)}let ii={type:1,geometry:[[Math.round(this.options.extent*(ar*xt-je)),Math.round(this.options.extent*(vr*xt-tt))]],tags:ir},pi;pi=vt||this.options.generateId?Je[ke+H]:this.points[Je[ke+H]].id,pi!==void 0&&(ii.id=pi),Ie.features.push(ii)}}_limitZoom(Ge){return Math.max(this.options.minZoom,Math.min(Math.floor(+Ge),this.options.maxZoom+1))}_cluster(Ge,Je){let{radius:je,extent:tt,reduce:xt,minPoints:Ie}=this.options,xe=je/(tt*Math.pow(2,Je)),ke=Ge.data,vt=[],ir=this.stride;for(let ar=0;ar<ke.length;ar+=ir){if(ke[ar+2]<=Je)continue;ke[ar+2]=Je;let vr=ke[ar],ii=ke[ar+1],pi=Ge.within(ke[ar],ke[ar+1],xe),$r=ke[ar+te],di=$r;for(let ji of pi){let In=ji*ir;ke[In+2]>Je&&(di+=ke[In+te])}if(di>$r&&di>=Ie){let ji,In=vr*$r,wi=ii*$r,On=-1,qn=((ar/ir|0)<<5)+(Je+1)+this.points.length;for(let Fn of pi){let ra=Fn*ir;if(ke[ra+2]<=Je)continue;ke[ra+2]=Je;let la=ke[ra+te];In+=ke[ra]*la,wi+=ke[ra+1]*la,ke[ra+4]=qn,xt&&(ji||(ji=this._map(ke,ar,!0),On=this.clusterProps.length,this.clusterProps.push(ji)),xt(ji,this._map(ke,ra)))}ke[ar+4]=qn,vt.push(In/di,wi/di,1/0,qn,-1,di),xt&&vt.push(On)}else{for(let ji=0;ji<ir;ji++)vt.push(ke[ar+ji]);if(di>1)for(let ji of pi){let In=ji*ir;if(!(ke[In+2]<=Je)){ke[In+2]=Je;for(let wi=0;wi<ir;wi++)vt.push(ke[In+wi])}}}}return vt}_getOriginId(Ge){return Ge-this.points.length>>5}_getOriginZoom(Ge){return(Ge-this.points.length)%32}_map(Ge,Je,je){if(Ge[Je+te]>1){let Ie=this.clusterProps[Ge[Je+oe]];return je?Object.assign({},Ie):Ie}let tt=this.points[Ge[Je+H]].properties,xt=this.options.map(tt);return je&&xt===tt?Object.assign({},xt):xt}}function Ee(dt,Ge,Je){return{type:"Feature",id:dt[Ge+H],properties:Ce(dt,Ge,Je),geometry:{type:"Point",coordinates:[(je=dt[Ge],360*(je-.5)),Se(dt[Ge+1])]}};var je}function Ce(dt,Ge,Je){let je=dt[Ge+te],tt=je>=1e4?`${Math.round(je/1e3)}k`:je>=1e3?Math.round(je/100)/10+"k":je,xt=dt[Ge+oe],Ie=xt===-1?{}:Object.assign({},Je[xt]);return Object.assign(Ie,{cluster:!0,cluster_id:dt[Ge+H],point_count:je,point_count_abbreviated:tt})}function me(dt){return dt/360+.5}function ie(dt){let Ge=Math.sin(dt*Math.PI/180),Je=.5-.25*Math.log((1+Ge)/(1-Ge))/Math.PI;return Je<0?0:Je>1?1:Je}function Se(dt){let Ge=(180-360*dt)*Math.PI/180;return 360*Math.atan(Math.exp(Ge))/Math.PI-90}function Le(dt,Ge,Je,je){let tt=je,xt=Ge+(Je-Ge>>1),Ie,xe=Je-Ge,ke=dt[Ge],vt=dt[Ge+1],ir=dt[Je],ar=dt[Je+1];for(let vr=Ge+3;vr<Je;vr+=3){let ii=Ae(dt[vr],dt[vr+1],ke,vt,ir,ar);if(ii>tt)Ie=vr,tt=ii;else if(ii===tt){let pi=Math.abs(vr-xt);pi<xe&&(Ie=vr,xe=pi)}}tt>je&&(Ie-Ge>3&&Le(dt,Ge,Ie,je),dt[Ie+2]=tt,Je-Ie>3&&Le(dt,Ie,Je,je))}function Ae(dt,Ge,Je,je,tt,xt){let Ie=tt-Je,xe=xt-je;if(Ie!==0||xe!==0){let ke=((dt-Je)*Ie+(Ge-je)*xe)/(Ie*Ie+xe*xe);ke>1?(Je=tt,je=xt):ke>0&&(Je+=Ie*ke,je+=xe*ke)}return Ie=dt-Je,xe=Ge-je,Ie*Ie+xe*xe}function Fe(dt,Ge,Je,je){let tt={id:dt==null?null:dt,type:Ge,geometry:Je,tags:je,minX:1/0,minY:1/0,maxX:-1/0,maxY:-1/0};if(Ge==="Point"||Ge==="MultiPoint"||Ge==="LineString")Pe(tt,Je);else if(Ge==="Polygon")Pe(tt,Je[0]);else if(Ge==="MultiLineString")for(let xt of Je)Pe(tt,xt);else if(Ge==="MultiPolygon")for(let xt of Je)Pe(tt,xt[0]);return tt}function Pe(dt,Ge){for(let Je=0;Je<Ge.length;Je+=3)dt.minX=Math.min(dt.minX,Ge[Je]),dt.minY=Math.min(dt.minY,Ge[Je+1]),dt.maxX=Math.max(dt.maxX,Ge[Je]),dt.maxY=Math.max(dt.maxY,Ge[Je+1])}function ge(dt,Ge,Je,je){if(!Ge.geometry)return;let tt=Ge.geometry.coordinates;if(tt&&tt.length===0)return;let xt=Ge.geometry.type,Ie=Math.pow(Je.tolerance/((1<<Je.maxZoom)*Je.extent),2),xe=[],ke=Ge.id;if(Je.promoteId?ke=Ge.properties[Je.promoteId]:Je.generateId&&(ke=je||0),xt==="Point")Re(tt,xe);else if(xt==="MultiPoint")for(let vt of tt)Re(vt,xe);else if(xt==="LineString")ce(tt,xe,Ie,!1);else if(xt==="MultiLineString"){if(Je.lineMetrics){for(let vt of tt)xe=[],ce(vt,xe,Ie,!1),dt.push(Fe(ke,"LineString",xe,Ge.properties));return}Ze(tt,xe,Ie,!1)}else if(xt==="Polygon")Ze(tt,xe,Ie,!0);else{if(xt!=="MultiPolygon"){if(xt==="GeometryCollection"){for(let vt of Ge.geometry.geometries)ge(dt,{id:ke,geometry:vt,properties:Ge.properties},Je,je);return}throw new Error("Input data is not a valid GeoJSON object.")}for(let vt of tt){let ir=[];Ze(vt,ir,Ie,!0),xe.push(ir)}}dt.push(Fe(ke,xt,xe,Ge.properties))}function Re(dt,Ge){Ge.push(ut(dt[0]),pt(dt[1]),0)}function ce(dt,Ge,Je,je){let tt,xt,Ie=0;for(let ke=0;ke<dt.length;ke++){let vt=ut(dt[ke][0]),ir=pt(dt[ke][1]);Ge.push(vt,ir,0),ke>0&&(Ie+=je?(tt*ir-vt*xt)/2:Math.sqrt(Math.pow(vt-tt,2)+Math.pow(ir-xt,2))),tt=vt,xt=ir}let xe=Ge.length-3;Ge[2]=1,Le(Ge,0,xe,Je),Ge[xe+2]=1,Ge.size=Math.abs(Ie),Ge.start=0,Ge.end=Ge.size}function Ze(dt,Ge,Je,je){for(let tt=0;tt<dt.length;tt++){let xt=[];ce(dt[tt],xt,Je,je),Ge.push(xt)}}function ut(dt){return dt/360+.5}function pt(dt){let Ge=Math.sin(dt*Math.PI/180),Je=.5-.25*Math.log((1+Ge)/(1-Ge))/Math.PI;return Je<0?0:Je>1?1:Je}function Zt(dt,Ge,Je,je,tt,xt,Ie,xe){if(je/=Ge,xt>=(Je/=Ge)&&Ie<je)return dt;if(Ie<Je||xt>=je)return null;let ke=[];for(let vt of dt){let ir=vt.geometry,ar=vt.type,vr=tt===0?vt.minX:vt.minY,ii=tt===0?vt.maxX:vt.maxY;if(vr>=Je&&ii<je){ke.push(vt);continue}if(ii<Je||vr>=je)continue;let pi=[];if(ar==="Point"||ar==="MultiPoint")st(ir,pi,Je,je,tt);else if(ar==="LineString")lt(ir,pi,Je,je,tt,!1,xe.lineMetrics);else if(ar==="MultiLineString")Nt(ir,pi,Je,je,tt,!1);else if(ar==="Polygon")Nt(ir,pi,Je,je,tt,!0);else if(ar==="MultiPolygon")for(let $r of ir){let di=[];Nt($r,di,Je,je,tt,!0),di.length&&pi.push(di)}if(pi.length){if(xe.lineMetrics&&ar==="LineString"){for(let $r of pi)ke.push(Fe(vt.id,ar,$r,vt.tags));continue}ar!=="LineString"&&ar!=="MultiLineString"||(pi.length===1?(ar="LineString",pi=pi[0]):ar="MultiLineString"),ar!=="Point"&&ar!=="MultiPoint"||(ar=pi.length===3?"Point":"MultiPoint"),ke.push(Fe(vt.id,ar,pi,vt.tags))}}return ke.length?ke:null}function st(dt,Ge,Je,je,tt){for(let xt=0;xt<dt.length;xt+=3){let Ie=dt[xt+tt];Ie>=Je&&Ie<=je&&Jt(Ge,dt[xt],dt[xt+1],dt[xt+2])}}function lt(dt,Ge,Je,je,tt,xt,Ie){let xe=Gt(dt),ke=tt===0?sr:wr,vt,ir,ar=dt.start;for(let di=0;di<dt.length-3;di+=3){let ji=dt[di],In=dt[di+1],wi=dt[di+2],On=dt[di+3],qn=dt[di+4],Fn=tt===0?ji:In,ra=tt===0?On:qn,la=!1;Ie&&(vt=Math.sqrt(Math.pow(ji-On,2)+Math.pow(In-qn,2))),Fn<Je?ra>Je&&(ir=ke(xe,ji,In,On,qn,Je),Ie&&(xe.start=ar+vt*ir)):Fn>je?ra<je&&(ir=ke(xe,ji,In,On,qn,je),Ie&&(xe.start=ar+vt*ir)):Jt(xe,ji,In,wi),ra<Je&&Fn>=Je&&(ir=ke(xe,ji,In,On,qn,Je),la=!0),ra>je&&Fn<=je&&(ir=ke(xe,ji,In,On,qn,je),la=!0),!xt&&la&&(Ie&&(xe.end=ar+vt*ir),Ge.push(xe),xe=Gt(dt)),Ie&&(ar+=vt)}let vr=dt.length-3,ii=dt[vr],pi=dt[vr+1],$r=tt===0?ii:pi;$r>=Je&&$r<=je&&Jt(xe,ii,pi,dt[vr+2]),vr=xe.length-3,xt&&vr>=3&&(xe[vr]!==xe[0]||xe[vr+1]!==xe[1])&&Jt(xe,xe[0],xe[1],xe[2]),xe.length&&Ge.push(xe)}function Gt(dt){let Ge=[];return Ge.size=dt.size,Ge.start=dt.start,Ge.end=dt.end,Ge}function Nt(dt,Ge,Je,je,tt,xt){for(let Ie of dt)lt(Ie,Ge,Je,je,tt,xt,!1)}function Jt(dt,Ge,Je,je){dt.push(Ge,Je,je)}function sr(dt,Ge,Je,je,tt,xt){let Ie=(xt-Ge)/(je-Ge);return Jt(dt,xt,Je+(tt-Je)*Ie,1),Ie}function wr(dt,Ge,Je,je,tt,xt){let Ie=(xt-Je)/(tt-Je);return Jt(dt,Ge+(je-Ge)*Ie,xt,1),Ie}function cr(dt,Ge){let Je=[];for(let je=0;je<dt.length;je++){let tt=dt[je],xt=tt.type,Ie;if(xt==="Point"||xt==="MultiPoint"||xt==="LineString")Ie=$e(tt.geometry,Ge);else if(xt==="MultiLineString"||xt==="Polygon"){Ie=[];for(let xe of tt.geometry)Ie.push($e(xe,Ge))}else if(xt==="MultiPolygon"){Ie=[];for(let xe of tt.geometry){let ke=[];for(let vt of xe)ke.push($e(vt,Ge));Ie.push(ke)}}Je.push(Fe(tt.id,xt,Ie,tt.tags))}return Je}function $e(dt,Ge){let Je=[];Je.size=dt.size,dt.start!==void 0&&(Je.start=dt.start,Je.end=dt.end);for(let je=0;je<dt.length;je+=3)Je.push(dt[je]+Ge,dt[je+1],dt[je+2]);return Je}function St(dt,Ge){if(dt.transformed)return dt;let Je=1<<dt.z,je=dt.x,tt=dt.y;for(let xt of dt.features){let Ie=xt.geometry,xe=xt.type;if(xt.geometry=[],xe===1)for(let ke=0;ke<Ie.length;ke+=2)xt.geometry.push(Qt(Ie[ke],Ie[ke+1],Ge,Je,je,tt));else for(let ke=0;ke<Ie.length;ke++){let vt=[];for(let ir=0;ir<Ie[ke].length;ir+=2)vt.push(Qt(Ie[ke][ir],Ie[ke][ir+1],Ge,Je,je,tt));xt.geometry.push(vt)}}return dt.transformed=!0,dt}function Qt(dt,Ge,Je,je,tt,xt){return[Math.round(Je*(dt*je-tt)),Math.round(Je*(Ge*je-xt))]}function Vt(dt,Ge,Je,je,tt){let xt=Ge===tt.maxZoom?0:tt.tolerance/((1<<Ge)*tt.extent),Ie={features:[],numPoints:0,numSimplified:0,numFeatures:dt.length,source:null,x:Je,y:je,z:Ge,transformed:!1,minX:2,minY:1,maxX:-1,maxY:0};for(let xe of dt)_t(Ie,xe,xt,tt);return Ie}function _t(dt,Ge,Je,je){let tt=Ge.geometry,xt=Ge.type,Ie=[];if(dt.minX=Math.min(dt.minX,Ge.minX),dt.minY=Math.min(dt.minY,Ge.minY),dt.maxX=Math.max(dt.maxX,Ge.maxX),dt.maxY=Math.max(dt.maxY,Ge.maxY),xt==="Point"||xt==="MultiPoint")for(let xe=0;xe<tt.length;xe+=3)Ie.push(tt[xe],tt[xe+1]),dt.numPoints++,dt.numSimplified++;else if(xt==="LineString")It(Ie,tt,dt,Je,!1,!1);else if(xt==="MultiLineString"||xt==="Polygon")for(let xe=0;xe<tt.length;xe++)It(Ie,tt[xe],dt,Je,xt==="Polygon",xe===0);else if(xt==="MultiPolygon")for(let xe=0;xe<tt.length;xe++){let ke=tt[xe];for(let vt=0;vt<ke.length;vt++)It(Ie,ke[vt],dt,Je,!0,vt===0)}if(Ie.length){let xe=Ge.tags||null;if(xt==="LineString"&&je.lineMetrics){xe={};for(let vt in Ge.tags)xe[vt]=Ge.tags[vt];xe.mapbox_clip_start=tt.start/tt.size,xe.mapbox_clip_end=tt.end/tt.size}let ke={geometry:Ie,type:xt==="Polygon"||xt==="MultiPolygon"?3:xt==="LineString"||xt==="MultiLineString"?2:1,tags:xe};Ge.id!==null&&(ke.id=Ge.id),dt.features.push(ke)}}function It(dt,Ge,Je,je,tt,xt){let Ie=je*je;if(je>0&&Ge.size<(tt?Ie:je))return void(Je.numPoints+=Ge.length/3);let xe=[];for(let ke=0;ke<Ge.length;ke+=3)(je===0||Ge[ke+2]>Ie)&&(Je.numSimplified++,xe.push(Ge[ke],Ge[ke+1])),Je.numPoints++;tt&&function(ke,vt){let ir=0;for(let ar=0,vr=ke.length,ii=vr-2;ar<vr;ii=ar,ar+=2)ir+=(ke[ar]-ke[ii])*(ke[ar+1]+ke[ii+1]);if(ir>0===vt)for(let ar=0,vr=ke.length;ar<vr/2;ar+=2){let ii=ke[ar],pi=ke[ar+1];ke[ar]=ke[vr-2-ar],ke[ar+1]=ke[vr-1-ar],ke[vr-2-ar]=ii,ke[vr-1-ar]=pi}}(xe,xt),dt.push(xe)}let mt={maxZoom:14,indexMaxZoom:5,indexMaxPoints:1e5,tolerance:3,extent:4096,buffer:64,lineMetrics:!1,promoteId:null,generateId:!1,debug:0};class er{constructor(Ge,Je){let je=(Je=this.options=function(xt,Ie){for(let xe in Ie)xt[xe]=Ie[xe];return xt}(Object.create(mt),Je)).debug;if(je&&console.time("preprocess data"),Je.maxZoom<0||Je.maxZoom>24)throw new Error("maxZoom should be in the 0-24 range");if(Je.promoteId&&Je.generateId)throw new Error("promoteId and generateId cannot be used together.");let tt=function(xt,Ie){let xe=[];if(xt.type==="FeatureCollection")for(let ke=0;ke<xt.features.length;ke++)ge(xe,xt.features[ke],Ie,ke);else ge(xe,xt.type==="Feature"?xt:{geometry:xt},Ie);return xe}(Ge,Je);this.tiles={},this.tileCoords=[],je&&(console.timeEnd("preprocess data"),console.log("index: maxZoom: %d, maxPoints: %d",Je.indexMaxZoom,Je.indexMaxPoints),console.time("generate tiles"),this.stats={},this.total=0),tt=function(xt,Ie){let xe=Ie.buffer/Ie.extent,ke=xt,vt=Zt(xt,1,-1-xe,xe,0,-1,2,Ie),ir=Zt(xt,1,1-xe,2+xe,0,-1,2,Ie);return(vt||ir)&&(ke=Zt(xt,1,-xe,1+xe,0,-1,2,Ie)||[],vt&&(ke=cr(vt,1).concat(ke)),ir&&(ke=ke.concat(cr(ir,-1)))),ke}(tt,Je),tt.length&&this.splitTile(tt,0,0,0),je&&(tt.length&&console.log("features: %d, points: %d",this.tiles[0].numFeatures,this.tiles[0].numPoints),console.timeEnd("generate tiles"),console.log("tiles generated:",this.total,JSON.stringify(this.stats)))}splitTile(Ge,Je,je,tt,xt,Ie,xe){let ke=[Ge,Je,je,tt],vt=this.options,ir=vt.debug;for(;ke.length;){tt=ke.pop(),je=ke.pop(),Je=ke.pop(),Ge=ke.pop();let ar=1<<Je,vr=lr(Je,je,tt),ii=this.tiles[vr];if(!ii&&(ir>1&&console.time("creation"),ii=this.tiles[vr]=Vt(Ge,Je,je,tt,vt),this.tileCoords.push({z:Je,x:je,y:tt}),ir)){ir>1&&(console.log("tile z%d-%d-%d (features: %d, points: %d, simplified: %d)",Je,je,tt,ii.numFeatures,ii.numPoints,ii.numSimplified),console.timeEnd("creation"));let la=`z${Je}`;this.stats[la]=(this.stats[la]||0)+1,this.total++}if(ii.source=Ge,xt==null){if(Je===vt.indexMaxZoom||ii.numPoints<=vt.indexMaxPoints)continue}else{if(Je===vt.maxZoom||Je===xt)continue;if(xt!=null){let la=xt-Je;if(je!==Ie>>la||tt!==xe>>la)continue}}if(ii.source=null,Ge.length===0)continue;ir>1&&console.time("clipping");let pi=.5*vt.buffer/vt.extent,$r=.5-pi,di=.5+pi,ji=1+pi,In=null,wi=null,On=null,qn=null,Fn=Zt(Ge,ar,je-pi,je+di,0,ii.minX,ii.maxX,vt),ra=Zt(Ge,ar,je+$r,je+ji,0,ii.minX,ii.maxX,vt);Ge=null,Fn&&(In=Zt(Fn,ar,tt-pi,tt+di,1,ii.minY,ii.maxY,vt),wi=Zt(Fn,ar,tt+$r,tt+ji,1,ii.minY,ii.maxY,vt),Fn=null),ra&&(On=Zt(ra,ar,tt-pi,tt+di,1,ii.minY,ii.maxY,vt),qn=Zt(ra,ar,tt+$r,tt+ji,1,ii.minY,ii.maxY,vt),ra=null),ir>1&&console.timeEnd("clipping"),ke.push(In||[],Je+1,2*je,2*tt),ke.push(wi||[],Je+1,2*je,2*tt+1),ke.push(On||[],Je+1,2*je+1,2*tt),ke.push(qn||[],Je+1,2*je+1,2*tt+1)}}getTile(Ge,Je,je){Ge=+Ge,Je=+Je,je=+je;let tt=this.options,{extent:xt,debug:Ie}=tt;if(Ge<0||Ge>24)return null;let xe=1<<Ge,ke=lr(Ge,Je=Je+xe&xe-1,je);if(this.tiles[ke])return St(this.tiles[ke],xt);Ie>1&&console.log("drilling down to z%d-%d-%d",Ge,Je,je);let vt,ir=Ge,ar=Je,vr=je;for(;!vt&&ir>0;)ir--,ar>>=1,vr>>=1,vt=this.tiles[lr(ir,ar,vr)];return vt&&vt.source?(Ie>1&&(console.log("found parent tile z%d-%d-%d",ir,ar,vr),console.time("drilling down")),this.splitTile(vt.source,ir,ar,vr,Ge,Je,je),Ie>1&&console.timeEnd("drilling down"),this.tiles[ke]?St(this.tiles[ke],xt):null):null}}function lr(dt,Ge,Je){return 32*((1<<dt)*Je+Ge)+dt}function Tr(dt,Ge){return Ge?dt.properties[Ge]:dt.id}function Lr(dt,Ge){if(dt==null)return!0;if(dt.type==="Feature")return Tr(dt,Ge)!=null;if(dt.type==="FeatureCollection"){let Je=new Set;for(let je of dt.features){let tt=Tr(je,Ge);if(tt==null||Je.has(tt))return!1;Je.add(tt)}return!0}return!1}function ti(dt,Ge){let Je=new Map;if(dt!=null)if(dt.type==="Feature")Je.set(Tr(dt,Ge),dt);else for(let je of dt.features)Je.set(Tr(je,Ge),je);return Je}class Br extends u{constructor(){super(...arguments),this._dataUpdateable=new Map}loadVectorTile(Ge,Je){return i._(this,void 0,void 0,function*(){let je=Ge.tileID.canonical;if(!this._geoJSONIndex)throw new Error("Unable to parse the data into a cluster or geojson");let tt=this._geoJSONIndex.getTile(je.z,je.x,je.y);if(!tt)return null;let xt=new class{constructor(xe){this.layers={_geojsonTileLayer:this},this.name="_geojsonTileLayer",this.extent=i.X,this.length=xe.length,this._features=xe}feature(xe){return new class{constructor(ke){this._feature=ke,this.extent=i.X,this.type=ke.type,this.properties=ke.tags,"id"in ke&&!isNaN(ke.id)&&(this.id=parseInt(ke.id,10))}loadGeometry(){if(this._feature.type===1){let ke=[];for(let vt of this._feature.geometry)ke.push([new i.P(vt[0],vt[1])]);return ke}{let ke=[];for(let vt of this._feature.geometry){let ir=[];for(let ar of vt)ir.push(new i.P(ar[0],ar[1]));ke.push(ir)}return ke}}toGeoJSON(ke,vt,ir){return v.call(this,ke,vt,ir)}}(this._features[xe])}}(tt.features),Ie=G(xt);return Ie.byteOffset===0&&Ie.byteLength===Ie.buffer.byteLength||(Ie=new Uint8Array(Ie)),{vectorTile:xt,rawData:Ie.buffer}})}loadData(Ge){return i._(this,void 0,void 0,function*(){var Je;(Je=this._pendingRequest)===null||Je===void 0||Je.abort();let je=!!(Ge&&Ge.request&&Ge.request.collectResourceTiming)&&new i.bv(Ge.request);this._pendingRequest=new AbortController;try{this._pendingData=this.loadAndProcessGeoJSON(Ge,this._pendingRequest),this._geoJSONIndex=Ge.cluster?new _e(function({superclusterOptions:Ie,clusterProperties:xe}){if(!xe||!Ie)return Ie;let ke={},vt={},ir={accumulated:null,zoom:0},ar={properties:null},vr=Object.keys(xe);for(let ii of vr){let[pi,$r]=xe[ii],di=i.bC($r),ji=i.bC(typeof pi=="string"?[pi,["accumulated"],["get",ii]]:pi);ke[ii]=di.value,vt[ii]=ji.value}return Ie.map=ii=>{ar.properties=ii;let pi={};for(let $r of vr)pi[$r]=ke[$r].evaluate(ir,ar);return pi},Ie.reduce=(ii,pi)=>{ar.properties=pi;for(let $r of vr)ir.accumulated=ii[$r],ii[$r]=vt[$r].evaluate(ir,ar)},Ie}(Ge)).load((yield this._pendingData).features):(tt=yield this._pendingData,new er(tt,Ge.geojsonVtOptions)),this.loaded={};let xt={};if(je){let Ie=je.finish();Ie&&(xt.resourceTiming={},xt.resourceTiming[Ge.source]=JSON.parse(JSON.stringify(Ie)))}return xt}catch(xt){if(delete this._pendingRequest,i.bB(xt))return{abandoned:!0};throw xt}var tt})}getData(){return i._(this,void 0,void 0,function*(){return this._pendingData})}reloadTile(Ge){let Je=this.loaded;return Je&&Je[Ge.uid]?super.reloadTile(Ge):this.loadTile(Ge)}loadAndProcessGeoJSON(Ge,Je){return i._(this,void 0,void 0,function*(){let je=yield this.loadGeoJSON(Ge,Je);if(delete this._pendingRequest,typeof je!="object")throw new Error(`Input data given to '${Ge.source}' is not a valid GeoJSON object.`);if(d(je,!0),Ge.filter){let tt=i.bC(Ge.filter,{type:"boolean","property-type":"data-driven",overridable:!1,transition:!1});if(tt.result==="error")throw new Error(tt.value.map(Ie=>`${Ie.key}: ${Ie.message}`).join(", "));je={type:"FeatureCollection",features:je.features.filter(Ie=>tt.value.evaluate({zoom:0},Ie))}}return je})}loadGeoJSON(Ge,Je){return i._(this,void 0,void 0,function*(){let{promoteId:je}=Ge;if(Ge.request){let tt=yield i.h(Ge.request,Je);return this._dataUpdateable=Lr(tt.data,je)?ti(tt.data,je):void 0,tt.data}if(typeof Ge.data=="string")try{let tt=JSON.parse(Ge.data);return this._dataUpdateable=Lr(tt,je)?ti(tt,je):void 0,tt}catch(tt){throw new Error(`Input data given to '${Ge.source}' is not a valid GeoJSON object.`)}if(!Ge.dataDiff)throw new Error(`Input data given to '${Ge.source}' is not a valid GeoJSON object.`);if(!this._dataUpdateable)throw new Error(`Cannot update existing geojson data in ${Ge.source}`);return function(tt,xt,Ie){var xe,ke,vt,ir;if(xt.removeAll&&tt.clear(),xt.remove)for(let ar of xt.remove)tt.delete(ar);if(xt.add)for(let ar of xt.add){let vr=Tr(ar,Ie);vr!=null&&tt.set(vr,ar)}if(xt.update)for(let ar of xt.update){let vr=tt.get(ar.id);if(vr==null)continue;let ii=!ar.removeAllProperties&&(((xe=ar.removeProperties)===null||xe===void 0?void 0:xe.length)>0||((ke=ar.addOrUpdateProperties)===null||ke===void 0?void 0:ke.length)>0);if((ar.newGeometry||ar.removeAllProperties||ii)&&(vr=Object.assign({},vr),tt.set(ar.id,vr),ii&&(vr.properties=Object.assign({},vr.properties))),ar.newGeometry&&(vr.geometry=ar.newGeometry),ar.removeAllProperties)vr.properties={};else if(((vt=ar.removeProperties)===null||vt===void 0?void 0:vt.length)>0)for(let pi of ar.removeProperties)Object.prototype.hasOwnProperty.call(vr.properties,pi)&&delete vr.properties[pi];if(((ir=ar.addOrUpdateProperties)===null||ir===void 0?void 0:ir.length)>0)for(let{key:pi,value:$r}of ar.addOrUpdateProperties)vr.properties[pi]=$r}}(this._dataUpdateable,Ge.dataDiff,je),{type:"FeatureCollection",features:Array.from(this._dataUpdateable.values())}})}removeSource(Ge){return i._(this,void 0,void 0,function*(){this._pendingRequest&&this._pendingRequest.abort()})}getClusterExpansionZoom(Ge){return this._geoJSONIndex.getClusterExpansionZoom(Ge.clusterId)}getClusterChildren(Ge){return this._geoJSONIndex.getChildren(Ge.clusterId)}getClusterLeaves(Ge){return this._geoJSONIndex.getLeaves(Ge.clusterId,Ge.limit,Ge.offset)}}class Vr{constructor(Ge){this.self=Ge,this.actor=new i.F(Ge),this.layerIndexes={},this.availableImages={},this.workerSources={},this.demWorkerSources={},this.externalWorkerSourceTypes={},this.self.registerWorkerSource=(Je,je)=>{if(this.externalWorkerSourceTypes[Je])throw new Error(`Worker source with name "${Je}" already registered.`);this.externalWorkerSourceTypes[Je]=je},this.self.addProtocol=i.bi,this.self.removeProtocol=i.bj,this.self.registerRTLTextPlugin=Je=>{if(i.bD.isParsed())throw new Error("RTL text plugin already registered.");i.bD.setMethods(Je)},this.actor.registerMessageHandler("LDT",(Je,je)=>this._getDEMWorkerSource(Je,je.source).loadTile(je)),this.actor.registerMessageHandler("RDT",(Je,je)=>i._(this,void 0,void 0,function*(){this._getDEMWorkerSource(Je,je.source).removeTile(je)})),this.actor.registerMessageHandler("GCEZ",(Je,je)=>i._(this,void 0,void 0,function*(){return this._getWorkerSource(Je,je.type,je.source).getClusterExpansionZoom(je)})),this.actor.registerMessageHandler("GCC",(Je,je)=>i._(this,void 0,void 0,function*(){return this._getWorkerSource(Je,je.type,je.source).getClusterChildren(je)})),this.actor.registerMessageHandler("GCL",(Je,je)=>i._(this,void 0,void 0,function*(){return this._getWorkerSource(Je,je.type,je.source).getClusterLeaves(je)})),this.actor.registerMessageHandler("LD",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).loadData(je)),this.actor.registerMessageHandler("GD",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).getData()),this.actor.registerMessageHandler("LT",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).loadTile(je)),this.actor.registerMessageHandler("RT",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).reloadTile(je)),this.actor.registerMessageHandler("AT",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).abortTile(je)),this.actor.registerMessageHandler("RMT",(Je,je)=>this._getWorkerSource(Je,je.type,je.source).removeTile(je)),this.actor.registerMessageHandler("RS",(Je,je)=>i._(this,void 0,void 0,function*(){if(!this.workerSources[Je]||!this.workerSources[Je][je.type]||!this.workerSources[Je][je.type][je.source])return;let tt=this.workerSources[Je][je.type][je.source];delete this.workerSources[Je][je.type][je.source],tt.removeSource!==void 0&&tt.removeSource(je)})),this.actor.registerMessageHandler("RM",Je=>i._(this,void 0,void 0,function*(){delete this.layerIndexes[Je],delete this.availableImages[Je],delete this.workerSources[Je],delete this.demWorkerSources[Je]})),this.actor.registerMessageHandler("SR",(Je,je)=>i._(this,void 0,void 0,function*(){this.referrer=je})),this.actor.registerMessageHandler("SRPS",(Je,je)=>this._syncRTLPluginState(Je,je)),this.actor.registerMessageHandler("IS",(Je,je)=>i._(this,void 0,void 0,function*(){this.self.importScripts(je)})),this.actor.registerMessageHandler("SI",(Je,je)=>this._setImages(Je,je)),this.actor.registerMessageHandler("UL",(Je,je)=>i._(this,void 0,void 0,function*(){this._getLayerIndex(Je).update(je.layers,je.removedIds)})),this.actor.registerMessageHandler("SL",(Je,je)=>i._(this,void 0,void 0,function*(){this._getLayerIndex(Je).replace(je)}))}_setImages(Ge,Je){return i._(this,void 0,void 0,function*(){this.availableImages[Ge]=Je;for(let je in this.workerSources[Ge]){let tt=this.workerSources[Ge][je];for(let xt in tt)tt[xt].availableImages=Je}})}_syncRTLPluginState(Ge,Je){return i._(this,void 0,void 0,function*(){if(i.bD.isParsed())return i.bD.getState();if(Je.pluginStatus!=="loading")return i.bD.setState(Je),Je;let je=Je.pluginURL;if(this.self.importScripts(je),i.bD.isParsed()){let tt={pluginStatus:"loaded",pluginURL:je};return i.bD.setState(tt),tt}throw i.bD.setState({pluginStatus:"error",pluginURL:""}),new Error(`RTL Text Plugin failed to import scripts from ${je}`)})}_getAvailableImages(Ge){let Je=this.availableImages[Ge];return Je||(Je=[]),Je}_getLayerIndex(Ge){let Je=this.layerIndexes[Ge];return Je||(Je=this.layerIndexes[Ge]=new a),Je}_getWorkerSource(Ge,Je,je){if(this.workerSources[Ge]||(this.workerSources[Ge]={}),this.workerSources[Ge][Je]||(this.workerSources[Ge][Je]={}),!this.workerSources[Ge][Je][je]){let tt={sendAsync:(xt,Ie)=>(xt.targetMapId=Ge,this.actor.sendAsync(xt,Ie))};switch(Je){case"vector":this.workerSources[Ge][Je][je]=new u(tt,this._getLayerIndex(Ge),this._getAvailableImages(Ge));break;case"geojson":this.workerSources[Ge][Je][je]=new Br(tt,this._getLayerIndex(Ge),this._getAvailableImages(Ge));break;default:this.workerSources[Ge][Je][je]=new this.externalWorkerSourceTypes[Je](tt,this._getLayerIndex(Ge),this._getAvailableImages(Ge))}}return this.workerSources[Ge][Je][je]}_getDEMWorkerSource(Ge,Je){return this.demWorkerSources[Ge]||(this.demWorkerSources[Ge]={}),this.demWorkerSources[Ge][Je]||(this.demWorkerSources[Ge][Je]=new c),this.demWorkerSources[Ge][Je]}}return i.i(self)&&(self.worker=new Vr(self)),Vr}),r("index",["exports","./shared"],function(i,a){"use strict";var o="4.7.1";let s,l,u={now:typeof performance!="undefined"&&performance&&performance.now?performance.now.bind(performance):Date.now.bind(Date),frameAsync:ue=>new Promise((w,B)=>{let Q=requestAnimationFrame(w);ue.signal.addEventListener("abort",()=>{cancelAnimationFrame(Q),B(a.c())})}),getImageData(ue,w=0){return this.getImageCanvasContext(ue).getImageData(-w,-w,ue.width+2*w,ue.height+2*w)},getImageCanvasContext(ue){let w=window.document.createElement("canvas"),B=w.getContext("2d",{willReadFrequently:!0});if(!B)throw new Error("failed to create canvas 2d context");return w.width=ue.width,w.height=ue.height,B.drawImage(ue,0,0,ue.width,ue.height),B},resolveURL:ue=>(s||(s=document.createElement("a")),s.href=ue,s.href),hardwareConcurrency:typeof navigator!="undefined"&&navigator.hardwareConcurrency||4,get prefersReducedMotion(){return!!matchMedia&&(l==null&&(l=matchMedia("(prefers-reduced-motion: reduce)")),l.matches)}};class c{static testProp(w){if(!c.docStyle)return w[0];for(let B=0;B<w.length;B++)if(w[B]in c.docStyle)return w[B];return w[0]}static create(w,B,Q){let ee=window.document.createElement(w);return B!==void 0&&(ee.className=B),Q&&Q.appendChild(ee),ee}static createNS(w,B){return window.document.createElementNS(w,B)}static disableDrag(){c.docStyle&&c.selectProp&&(c.userSelect=c.docStyle[c.selectProp],c.docStyle[c.selectProp]="none")}static enableDrag(){c.docStyle&&c.selectProp&&(c.docStyle[c.selectProp]=c.userSelect)}static setTransform(w,B){w.style[c.transformProp]=B}static addEventListener(w,B,Q,ee={}){w.addEventListener(B,Q,"passive"in ee?ee:ee.capture)}static removeEventListener(w,B,Q,ee={}){w.removeEventListener(B,Q,"passive"in ee?ee:ee.capture)}static suppressClickInternal(w){w.preventDefault(),w.stopPropagation(),window.removeEventListener("click",c.suppressClickInternal,!0)}static suppressClick(){window.addEventListener("click",c.suppressClickInternal,!0),window.setTimeout(()=>{window.removeEventListener("click",c.suppressClickInternal,!0)},0)}static getScale(w){let B=w.getBoundingClientRect();return{x:B.width/w.offsetWidth||1,y:B.height/w.offsetHeight||1,boundingClientRect:B}}static getPoint(w,B,Q){let ee=B.boundingClientRect;return new a.P((Q.clientX-ee.left)/B.x-w.clientLeft,(Q.clientY-ee.top)/B.y-w.clientTop)}static mousePos(w,B){let Q=c.getScale(w);return c.getPoint(w,Q,B)}static touchPos(w,B){let Q=[],ee=c.getScale(w);for(let le=0;le<B.length;le++)Q.push(c.getPoint(w,ee,B[le]));return Q}static mouseButton(w){return w.button}static remove(w){w.parentNode&&w.parentNode.removeChild(w)}}c.docStyle=typeof window!="undefined"&&window.document&&window.document.documentElement.style,c.selectProp=c.testProp(["userSelect","MozUserSelect","WebkitUserSelect","msUserSelect"]),c.transformProp=c.testProp(["transform","WebkitTransform"]);let f={supported:!1,testSupport:function(ue){!v&&d&&(_?b(ue):h=ue)}},h,d,v=!1,_=!1;function b(ue){let w=ue.createTexture();ue.bindTexture(ue.TEXTURE_2D,w);try{if(ue.texImage2D(ue.TEXTURE_2D,0,ue.RGBA,ue.RGBA,ue.UNSIGNED_BYTE,d),ue.isContextLost())return;f.supported=!0}catch(B){}ue.deleteTexture(w),v=!0}var p;typeof document!="undefined"&&(d=document.createElement("img"),d.onload=()=>{h&&b(h),h=null,_=!0},d.onerror=()=>{v=!0,h=null},d.src="data:image/webp;base64,UklGRh4AAABXRUJQVlA4TBEAAAAvAQAAAAfQ//73v/+BiOh/AAA="),function(ue){let w,B,Q,ee;ue.resetRequestQueue=()=>{w=[],B=0,Q=0,ee={}},ue.addThrottleControl=ot=>{let Tt=Q++;return ee[Tt]=ot,Tt},ue.removeThrottleControl=ot=>{delete ee[ot],qe()},ue.getImage=(ot,Tt,Yt=!0)=>new Promise((Kt,xr)=>{f.supported&&(ot.headers||(ot.headers={}),ot.headers.accept="image/webp,*/*"),a.e(ot,{type:"image"}),w.push({abortController:Tt,requestParameters:ot,supportImageRefresh:Yt,state:"queued",onError:Ir=>{xr(Ir)},onSuccess:Ir=>{Kt(Ir)}}),qe()});let le=ot=>a._(this,void 0,void 0,function*(){ot.state="running";let{requestParameters:Tt,supportImageRefresh:Yt,onError:Kt,onSuccess:xr,abortController:Ir}=ot,ve=Yt===!1&&!a.i(self)&&!a.g(Tt.url)&&(!Tt.headers||Object.keys(Tt.headers).reduce((Be,et)=>Be&&et==="accept",!0));B++;let be=ve?Xe(Tt,Ir):a.m(Tt,Ir);try{let Be=yield be;delete ot.abortController,ot.state="completed",Be.data instanceof HTMLImageElement||a.b(Be.data)?xr(Be):Be.data&&xr({data:yield(De=Be.data,typeof createImageBitmap=="function"?a.d(De):a.f(De)),cacheControl:Be.cacheControl,expires:Be.expires})}catch(Be){delete ot.abortController,Kt(Be)}finally{B--,qe()}var De}),qe=()=>{let ot=(()=>{for(let Tt of Object.keys(ee))if(ee[Tt]())return!0;return!1})()?a.a.MAX_PARALLEL_IMAGE_REQUESTS_PER_FRAME:a.a.MAX_PARALLEL_IMAGE_REQUESTS;for(let Tt=B;Tt<ot&&w.length>0;Tt++){let Yt=w.shift();Yt.abortController.signal.aborted?Tt--:le(Yt)}},Xe=(ot,Tt)=>new Promise((Yt,Kt)=>{let xr=new Image,Ir=ot.url,ve=ot.credentials;ve&&ve==="include"?xr.crossOrigin="use-credentials":(ve&&ve==="same-origin"||!a.s(Ir))&&(xr.crossOrigin="anonymous"),Tt.signal.addEventListener("abort",()=>{xr.src="",Kt(a.c())}),xr.fetchPriority="high",xr.onload=()=>{xr.onerror=xr.onload=null,Yt({data:xr})},xr.onerror=()=>{xr.onerror=xr.onload=null,Tt.signal.aborted||Kt(new Error("Could not load image. Please make sure to use a supported image type such as PNG or JPEG. Note that SVGs are not supported."))},xr.src=Ir})}(p||(p={})),p.resetRequestQueue();class k{constructor(w){this._transformRequestFn=w}transformRequest(w,B){return this._transformRequestFn&&this._transformRequestFn(w,B)||{url:w}}setTransformRequest(w){this._transformRequestFn=w}}function E(ue){var w=new a.A(3);return w[0]=ue[0],w[1]=ue[1],w[2]=ue[2],w}var S,L=function(ue,w,B){return ue[0]=w[0]-B[0],ue[1]=w[1]-B[1],ue[2]=w[2]-B[2],ue};S=new a.A(3),a.A!=Float32Array&&(S[0]=0,S[1]=0,S[2]=0);var x=function(ue){var w=ue[0],B=ue[1];return w*w+B*B};function C(ue){let w=[];if(typeof ue=="string")w.push({id:"default",url:ue});else if(ue&&ue.length>0){let B=[];for(let{id:Q,url:ee}of ue){let le=`${Q}${ee}`;B.indexOf(le)===-1&&(B.push(le),w.push({id:Q,url:ee}))}}return w}function M(ue,w,B){let Q=ue.split("?");return Q[0]+=`${w}${B}`,Q.join("?")}(function(){var ue=new a.A(2);a.A!=Float32Array&&(ue[0]=0,ue[1]=0)})();class g{constructor(w,B,Q,ee){this.context=w,this.format=Q,this.texture=w.gl.createTexture(),this.update(B,ee)}update(w,B,Q){let{width:ee,height:le}=w,qe=!(this.size&&this.size[0]===ee&&this.size[1]===le||Q),{context:Xe}=this,{gl:ot}=Xe;if(this.useMipmap=!!(B&&B.useMipmap),ot.bindTexture(ot.TEXTURE_2D,this.texture),Xe.pixelStoreUnpackFlipY.set(!1),Xe.pixelStoreUnpack.set(1),Xe.pixelStoreUnpackPremultiplyAlpha.set(this.format===ot.RGBA&&(!B||B.premultiply!==!1)),qe)this.size=[ee,le],w instanceof HTMLImageElement||w instanceof HTMLCanvasElement||w instanceof HTMLVideoElement||w instanceof ImageData||a.b(w)?ot.texImage2D(ot.TEXTURE_2D,0,this.format,this.format,ot.UNSIGNED_BYTE,w):ot.texImage2D(ot.TEXTURE_2D,0,this.format,ee,le,0,this.format,ot.UNSIGNED_BYTE,w.data);else{let{x:Tt,y:Yt}=Q||{x:0,y:0};w instanceof HTMLImageElement||w instanceof HTMLCanvasElement||w instanceof HTMLVideoElement||w instanceof ImageData||a.b(w)?ot.texSubImage2D(ot.TEXTURE_2D,0,Tt,Yt,ot.RGBA,ot.UNSIGNED_BYTE,w):ot.texSubImage2D(ot.TEXTURE_2D,0,Tt,Yt,ee,le,ot.RGBA,ot.UNSIGNED_BYTE,w.data)}this.useMipmap&&this.isSizePowerOfTwo()&&ot.generateMipmap(ot.TEXTURE_2D)}bind(w,B,Q){let{context:ee}=this,{gl:le}=ee;le.bindTexture(le.TEXTURE_2D,this.texture),Q!==le.LINEAR_MIPMAP_NEAREST||this.isSizePowerOfTwo()||(Q=le.LINEAR),w!==this.filter&&(le.texParameteri(le.TEXTURE_2D,le.TEXTURE_MAG_FILTER,w),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_MIN_FILTER,Q||w),this.filter=w),B!==this.wrap&&(le.texParameteri(le.TEXTURE_2D,le.TEXTURE_WRAP_S,B),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_WRAP_T,B),this.wrap=B)}isSizePowerOfTwo(){return this.size[0]===this.size[1]&&Math.log(this.size[0])/Math.LN2%1==0}destroy(){let{gl:w}=this.context;w.deleteTexture(this.texture),this.texture=null}}function P(ue){let{userImage:w}=ue;return!!(w&&w.render&&w.render())&&(ue.data.replace(new Uint8Array(w.data.buffer)),!0)}class T extends a.E{constructor(){super(),this.images={},this.updatedImages={},this.callbackDispatchedThisFrame={},this.loaded=!1,this.requestors=[],this.patterns={},this.atlasImage=new a.R({width:1,height:1}),this.dirty=!0}isLoaded(){return this.loaded}setLoaded(w){if(this.loaded!==w&&(this.loaded=w,w)){for(let{ids:B,promiseResolve:Q}of this.requestors)Q(this._getImagesForIds(B));this.requestors=[]}}getImage(w){let B=this.images[w];if(B&&!B.data&&B.spriteData){let Q=B.spriteData;B.data=new a.R({width:Q.width,height:Q.height},Q.context.getImageData(Q.x,Q.y,Q.width,Q.height).data),B.spriteData=null}return B}addImage(w,B){if(this.images[w])throw new Error(`Image id ${w} already exist, use updateImage instead`);this._validate(w,B)&&(this.images[w]=B)}_validate(w,B){let Q=!0,ee=B.data||B.spriteData;return this._validateStretch(B.stretchX,ee&&ee.width)||(this.fire(new a.j(new Error(`Image "${w}" has invalid "stretchX" value`))),Q=!1),this._validateStretch(B.stretchY,ee&&ee.height)||(this.fire(new a.j(new Error(`Image "${w}" has invalid "stretchY" value`))),Q=!1),this._validateContent(B.content,B)||(this.fire(new a.j(new Error(`Image "${w}" has invalid "content" value`))),Q=!1),Q}_validateStretch(w,B){if(!w)return!0;let Q=0;for(let ee of w){if(ee[0]<Q||ee[1]<ee[0]||B<ee[1])return!1;Q=ee[1]}return!0}_validateContent(w,B){if(!w)return!0;if(w.length!==4)return!1;let Q=B.spriteData,ee=Q&&Q.width||B.data.width,le=Q&&Q.height||B.data.height;return!(w[0]<0||ee<w[0]||w[1]<0||le<w[1]||w[2]<0||ee<w[2]||w[3]<0||le<w[3]||w[2]<w[0]||w[3]<w[1])}updateImage(w,B,Q=!0){let ee=this.getImage(w);if(Q&&(ee.data.width!==B.data.width||ee.data.height!==B.data.height))throw new Error(`size mismatch between old image (${ee.data.width}x${ee.data.height}) and new image (${B.data.width}x${B.data.height}).`);B.version=ee.version+1,this.images[w]=B,this.updatedImages[w]=!0}removeImage(w){let B=this.images[w];delete this.images[w],delete this.patterns[w],B.userImage&&B.userImage.onRemove&&B.userImage.onRemove()}listImages(){return Object.keys(this.images)}getImages(w){return new Promise((B,Q)=>{let ee=!0;if(!this.isLoaded())for(let le of w)this.images[le]||(ee=!1);this.isLoaded()||ee?B(this._getImagesForIds(w)):this.requestors.push({ids:w,promiseResolve:B})})}_getImagesForIds(w){let B={};for(let Q of w){let ee=this.getImage(Q);ee||(this.fire(new a.k("styleimagemissing",{id:Q})),ee=this.getImage(Q)),ee?B[Q]={data:ee.data.clone(),pixelRatio:ee.pixelRatio,sdf:ee.sdf,version:ee.version,stretchX:ee.stretchX,stretchY:ee.stretchY,content:ee.content,textFitWidth:ee.textFitWidth,textFitHeight:ee.textFitHeight,hasRenderCallback:!!(ee.userImage&&ee.userImage.render)}:a.w(`Image "${Q}" could not be loaded. Please make sure you have added the image with map.addImage() or a "sprite" property in your style. You can provide missing images by listening for the "styleimagemissing" map event.`)}return B}getPixelSize(){let{width:w,height:B}=this.atlasImage;return{width:w,height:B}}getPattern(w){let B=this.patterns[w],Q=this.getImage(w);if(!Q)return null;if(B&&B.position.version===Q.version)return B.position;if(B)B.position.version=Q.version;else{let ee={w:Q.data.width+2,h:Q.data.height+2,x:0,y:0},le=new a.I(ee,Q);this.patterns[w]={bin:ee,position:le}}return this._updatePatternAtlas(),this.patterns[w].position}bind(w){let B=w.gl;this.atlasTexture?this.dirty&&(this.atlasTexture.update(this.atlasImage),this.dirty=!1):this.atlasTexture=new g(w,this.atlasImage,B.RGBA),this.atlasTexture.bind(B.LINEAR,B.CLAMP_TO_EDGE)}_updatePatternAtlas(){let w=[];for(let le in this.patterns)w.push(this.patterns[le].bin);let{w:B,h:Q}=a.p(w),ee=this.atlasImage;ee.resize({width:B||1,height:Q||1});for(let le in this.patterns){let{bin:qe}=this.patterns[le],Xe=qe.x+1,ot=qe.y+1,Tt=this.getImage(le).data,Yt=Tt.width,Kt=Tt.height;a.R.copy(Tt,ee,{x:0,y:0},{x:Xe,y:ot},{width:Yt,height:Kt}),a.R.copy(Tt,ee,{x:0,y:Kt-1},{x:Xe,y:ot-1},{width:Yt,height:1}),a.R.copy(Tt,ee,{x:0,y:0},{x:Xe,y:ot+Kt},{width:Yt,height:1}),a.R.copy(Tt,ee,{x:Yt-1,y:0},{x:Xe-1,y:ot},{width:1,height:Kt}),a.R.copy(Tt,ee,{x:0,y:0},{x:Xe+Yt,y:ot},{width:1,height:Kt})}this.dirty=!0}beginFrame(){this.callbackDispatchedThisFrame={}}dispatchRenderCallbacks(w){for(let B of w){if(this.callbackDispatchedThisFrame[B])continue;this.callbackDispatchedThisFrame[B]=!0;let Q=this.getImage(B);Q||a.w(`Image with ID: "${B}" was not found`),P(Q)&&this.updateImage(B,Q)}}}let z=1e20;function O(ue,w,B,Q,ee,le,qe,Xe,ot){for(let Tt=w;Tt<w+Q;Tt++)V(ue,B*le+Tt,le,ee,qe,Xe,ot);for(let Tt=B;Tt<B+ee;Tt++)V(ue,Tt*le+w,1,Q,qe,Xe,ot)}function V(ue,w,B,Q,ee,le,qe){le[0]=0,qe[0]=-z,qe[1]=z,ee[0]=ue[w];for(let Xe=1,ot=0,Tt=0;Xe<Q;Xe++){ee[Xe]=ue[w+Xe*B];let Yt=Xe*Xe;do{let Kt=le[ot];Tt=(ee[Xe]-ee[Kt]+Yt-Kt*Kt)/(Xe-Kt)/2}while(Tt<=qe[ot]&&--ot>-1);ot++,le[ot]=Xe,qe[ot]=Tt,qe[ot+1]=z}for(let Xe=0,ot=0;Xe<Q;Xe++){for(;qe[ot+1]<Xe;)ot++;let Tt=le[ot],Yt=Xe-Tt;ue[w+Xe*B]=ee[Tt]+Yt*Yt}}class G{constructor(w,B){this.requestManager=w,this.localIdeographFontFamily=B,this.entries={}}setURL(w){this.url=w}getGlyphs(w){return a._(this,void 0,void 0,function*(){let B=[];for(let le in w)for(let qe of w[le])B.push(this._getAndCacheGlyphsPromise(le,qe));let Q=yield Promise.all(B),ee={};for(let{stack:le,id:qe,glyph:Xe}of Q)ee[le]||(ee[le]={}),ee[le][qe]=Xe&&{id:Xe.id,bitmap:Xe.bitmap.clone(),metrics:Xe.metrics};return ee})}_getAndCacheGlyphsPromise(w,B){return a._(this,void 0,void 0,function*(){let Q=this.entries[w];Q||(Q=this.entries[w]={glyphs:{},requests:{},ranges:{}});let ee=Q.glyphs[B];if(ee!==void 0)return{stack:w,id:B,glyph:ee};if(ee=this._tinySDF(Q,w,B),ee)return Q.glyphs[B]=ee,{stack:w,id:B,glyph:ee};let le=Math.floor(B/256);if(256*le>65535)throw new Error("glyphs > 65535 not supported");if(Q.ranges[le])return{stack:w,id:B,glyph:ee};if(!this.url)throw new Error("glyphsUrl is not set");if(!Q.requests[le]){let Xe=G.loadGlyphRange(w,le,this.url,this.requestManager);Q.requests[le]=Xe}let qe=yield Q.requests[le];for(let Xe in qe)this._doesCharSupportLocalGlyph(+Xe)||(Q.glyphs[+Xe]=qe[+Xe]);return Q.ranges[le]=!0,{stack:w,id:B,glyph:qe[B]||null}})}_doesCharSupportLocalGlyph(w){return!!this.localIdeographFontFamily&&new RegExp("\\p{Ideo}|\\p{sc=Hang}|\\p{sc=Hira}|\\p{sc=Kana}","u").test(String.fromCodePoint(w))}_tinySDF(w,B,Q){let ee=this.localIdeographFontFamily;if(!ee||!this._doesCharSupportLocalGlyph(Q))return;let le=w.tinySDF;if(!le){let Xe="400";/bold/i.test(B)?Xe="900":/medium/i.test(B)?Xe="500":/light/i.test(B)&&(Xe="200"),le=w.tinySDF=new G.TinySDF({fontSize:48,buffer:6,radius:16,cutoff:.25,fontFamily:ee,fontWeight:Xe})}let qe=le.draw(String.fromCharCode(Q));return{id:Q,bitmap:new a.o({width:qe.width||60,height:qe.height||60},qe.data),metrics:{width:qe.glyphWidth/2||24,height:qe.glyphHeight/2||24,left:qe.glyphLeft/2+.5||0,top:qe.glyphTop/2-27.5||-8,advance:qe.glyphAdvance/2||24,isDoubleResolution:!0}}}}G.loadGlyphRange=function(ue,w,B,Q){return a._(this,void 0,void 0,function*(){let ee=256*w,le=ee+255,qe=Q.transformRequest(B.replace("{fontstack}",ue).replace("{range}",`${ee}-${le}`),"Glyphs"),Xe=yield a.l(qe,new AbortController);if(!Xe||!Xe.data)throw new Error(`Could not load glyph range. range: ${w}, ${ee}-${le}`);let ot={};for(let Tt of a.n(Xe.data))ot[Tt.id]=Tt;return ot})},G.TinySDF=class{constructor({fontSize:ue=24,buffer:w=3,radius:B=8,cutoff:Q=.25,fontFamily:ee="sans-serif",fontWeight:le="normal",fontStyle:qe="normal"}={}){this.buffer=w,this.cutoff=Q,this.radius=B;let Xe=this.size=ue+4*w,ot=this._createCanvas(Xe),Tt=this.ctx=ot.getContext("2d",{willReadFrequently:!0});Tt.font=`${qe} ${le} ${ue}px ${ee}`,Tt.textBaseline="alphabetic",Tt.textAlign="left",Tt.fillStyle="black",this.gridOuter=new Float64Array(Xe*Xe),this.gridInner=new Float64Array(Xe*Xe),this.f=new Float64Array(Xe),this.z=new Float64Array(Xe+1),this.v=new Uint16Array(Xe)}_createCanvas(ue){let w=document.createElement("canvas");return w.width=w.height=ue,w}draw(ue){let{width:w,actualBoundingBoxAscent:B,actualBoundingBoxDescent:Q,actualBoundingBoxLeft:ee,actualBoundingBoxRight:le}=this.ctx.measureText(ue),qe=Math.ceil(B),Xe=Math.max(0,Math.min(this.size-this.buffer,Math.ceil(le-ee))),ot=Math.min(this.size-this.buffer,qe+Math.ceil(Q)),Tt=Xe+2*this.buffer,Yt=ot+2*this.buffer,Kt=Math.max(Tt*Yt,0),xr=new Uint8ClampedArray(Kt),Ir={data:xr,width:Tt,height:Yt,glyphWidth:Xe,glyphHeight:ot,glyphTop:qe,glyphLeft:0,glyphAdvance:w};if(Xe===0||ot===0)return Ir;let{ctx:ve,buffer:be,gridInner:De,gridOuter:Be}=this;ve.clearRect(be,be,Xe,ot),ve.fillText(ue,be,be+qe);let et=ve.getImageData(be,be,Xe,ot);Be.fill(z,0,Kt),De.fill(0,0,Kt);for(let We=0;We<ot;We++)for(let it=0;it<Xe;it++){let Ft=et.data[4*(We*Xe+it)+3]/255;if(Ft===0)continue;let Ht=(We+be)*Tt+it+be;if(Ft===1)Be[Ht]=0,De[Ht]=z;else{let tr=.5-Ft;Be[Ht]=tr>0?tr*tr:0,De[Ht]=tr<0?tr*tr:0}}O(Be,0,0,Tt,Yt,Tt,this.f,this.v,this.z),O(De,be,be,Xe,ot,Tt,this.f,this.v,this.z);for(let We=0;We<Kt;We++){let it=Math.sqrt(Be[We])-Math.sqrt(De[We]);xr[We]=Math.round(255-255*(it/this.radius+this.cutoff))}return Ir}};class Z{constructor(){this.specification=a.v.light.position}possiblyEvaluate(w,B){return a.x(w.expression.evaluate(B))}interpolate(w,B,Q){return{x:a.y.number(w.x,B.x,Q),y:a.y.number(w.y,B.y,Q),z:a.y.number(w.z,B.z,Q)}}}let j;class N extends a.E{constructor(w){super(),j=j||new a.q({anchor:new a.D(a.v.light.anchor),position:new Z,color:new a.D(a.v.light.color),intensity:new a.D(a.v.light.intensity)}),this._transitionable=new a.T(j),this.setLight(w),this._transitioning=this._transitionable.untransitioned()}getLight(){return this._transitionable.serialize()}setLight(w,B={}){if(!this._validate(a.r,w,B))for(let Q in w){let ee=w[Q];Q.endsWith("-transition")?this._transitionable.setTransition(Q.slice(0,-11),ee):this._transitionable.setValue(Q,ee)}}updateTransitions(w){this._transitioning=this._transitionable.transitioned(w,this._transitioning)}hasTransition(){return this._transitioning.hasTransition()}recalculate(w){this.properties=this._transitioning.possiblyEvaluate(w)}_validate(w,B,Q){return(!Q||Q.validate!==!1)&&a.t(this,w.call(a.u,{value:B,style:{glyphs:!0,sprite:!0},styleSpec:a.v}))}}let H=new a.q({"sky-color":new a.D(a.v.sky["sky-color"]),"horizon-color":new a.D(a.v.sky["horizon-color"]),"fog-color":new a.D(a.v.sky["fog-color"]),"fog-ground-blend":new a.D(a.v.sky["fog-ground-blend"]),"horizon-fog-blend":new a.D(a.v.sky["horizon-fog-blend"]),"sky-horizon-blend":new a.D(a.v.sky["sky-horizon-blend"]),"atmosphere-blend":new a.D(a.v.sky["atmosphere-blend"])});class te extends a.E{constructor(w){super(),this._transitionable=new a.T(H),this.setSky(w),this._transitioning=this._transitionable.untransitioned(),this.recalculate(new a.z(0))}setSky(w,B={}){if(!this._validate(a.B,w,B)){w||(w={"sky-color":"transparent","horizon-color":"transparent","fog-color":"transparent","fog-ground-blend":1,"atmosphere-blend":0});for(let Q in w){let ee=w[Q];Q.endsWith("-transition")?this._transitionable.setTransition(Q.slice(0,-11),ee):this._transitionable.setValue(Q,ee)}}}getSky(){return this._transitionable.serialize()}updateTransitions(w){this._transitioning=this._transitionable.transitioned(w,this._transitioning)}hasTransition(){return this._transitioning.hasTransition()}recalculate(w){this.properties=this._transitioning.possiblyEvaluate(w)}_validate(w,B,Q={}){return(Q==null?void 0:Q.validate)!==!1&&a.t(this,w.call(a.u,a.e({value:B,style:{glyphs:!0,sprite:!0},styleSpec:a.v})))}calculateFogBlendOpacity(w){return w<60?0:w<70?(w-60)/10:1}}class oe{constructor(w,B){this.width=w,this.height=B,this.nextRow=0,this.data=new Uint8Array(this.width*this.height),this.dashEntry={}}getDash(w,B){let Q=w.join(",")+String(B);return this.dashEntry[Q]||(this.dashEntry[Q]=this.addDash(w,B)),this.dashEntry[Q]}getDashRanges(w,B,Q){let ee=[],le=w.length%2==1?-w[w.length-1]*Q:0,qe=w[0]*Q,Xe=!0;ee.push({left:le,right:qe,isDash:Xe,zeroLength:w[0]===0});let ot=w[0];for(let Tt=1;Tt<w.length;Tt++){Xe=!Xe;let Yt=w[Tt];le=ot*Q,ot+=Yt,qe=ot*Q,ee.push({left:le,right:qe,isDash:Xe,zeroLength:Yt===0})}return ee}addRoundDash(w,B,Q){let ee=B/2;for(let le=-Q;le<=Q;le++){let qe=this.width*(this.nextRow+Q+le),Xe=0,ot=w[Xe];for(let Tt=0;Tt<this.width;Tt++){Tt/ot.right>1&&(ot=w[++Xe]);let Yt=Math.abs(Tt-ot.left),Kt=Math.abs(Tt-ot.right),xr=Math.min(Yt,Kt),Ir,ve=le/Q*(ee+1);if(ot.isDash){let be=ee-Math.abs(ve);Ir=Math.sqrt(xr*xr+be*be)}else Ir=ee-Math.sqrt(xr*xr+ve*ve);this.data[qe+Tt]=Math.max(0,Math.min(255,Ir+128))}}}addRegularDash(w){for(let Xe=w.length-1;Xe>=0;--Xe){let ot=w[Xe],Tt=w[Xe+1];ot.zeroLength?w.splice(Xe,1):Tt&&Tt.isDash===ot.isDash&&(Tt.left=ot.left,w.splice(Xe,1))}let B=w[0],Q=w[w.length-1];B.isDash===Q.isDash&&(B.left=Q.left-this.width,Q.right=B.right+this.width);let ee=this.width*this.nextRow,le=0,qe=w[le];for(let Xe=0;Xe<this.width;Xe++){Xe/qe.right>1&&(qe=w[++le]);let ot=Math.abs(Xe-qe.left),Tt=Math.abs(Xe-qe.right),Yt=Math.min(ot,Tt);this.data[ee+Xe]=Math.max(0,Math.min(255,(qe.isDash?Yt:-Yt)+128))}}addDash(w,B){let Q=B?7:0,ee=2*Q+1;if(this.nextRow+ee>this.height)return a.w("LineAtlas out of space"),null;let le=0;for(let Xe=0;Xe<w.length;Xe++)le+=w[Xe];if(le!==0){let Xe=this.width/le,ot=this.getDashRanges(w,this.width,Xe);B?this.addRoundDash(ot,Xe,Q):this.addRegularDash(ot)}let qe={y:(this.nextRow+Q+.5)/this.height,height:2*Q/this.height,width:le};return this.nextRow+=ee,this.dirty=!0,qe}bind(w){let B=w.gl;this.texture?(B.bindTexture(B.TEXTURE_2D,this.texture),this.dirty&&(this.dirty=!1,B.texSubImage2D(B.TEXTURE_2D,0,0,0,this.width,this.height,B.ALPHA,B.UNSIGNED_BYTE,this.data))):(this.texture=B.createTexture(),B.bindTexture(B.TEXTURE_2D,this.texture),B.texParameteri(B.TEXTURE_2D,B.TEXTURE_WRAP_S,B.REPEAT),B.texParameteri(B.TEXTURE_2D,B.TEXTURE_WRAP_T,B.REPEAT),B.texParameteri(B.TEXTURE_2D,B.TEXTURE_MIN_FILTER,B.LINEAR),B.texParameteri(B.TEXTURE_2D,B.TEXTURE_MAG_FILTER,B.LINEAR),B.texImage2D(B.TEXTURE_2D,0,B.ALPHA,this.width,this.height,0,B.ALPHA,B.UNSIGNED_BYTE,this.data))}}let _e="maplibre_preloaded_worker_pool";class Ee{constructor(){this.active={}}acquire(w){if(!this.workers)for(this.workers=[];this.workers.length<Ee.workerCount;)this.workers.push(new Worker(a.a.WORKER_URL));return this.active[w]=!0,this.workers.slice()}release(w){delete this.active[w],this.numActive()===0&&(this.workers.forEach(B=>{B.terminate()}),this.workers=null)}isPreloaded(){return!!this.active[_e]}numActive(){return Object.keys(this.active).length}}let Ce=Math.floor(u.hardwareConcurrency/2),me,ie;function Se(){return me||(me=new Ee),me}Ee.workerCount=a.C(globalThis)?Math.max(Math.min(Ce,3),1):1;class Le{constructor(w,B){this.workerPool=w,this.actors=[],this.currentActor=0,this.id=B;let Q=this.workerPool.acquire(B);for(let ee=0;ee<Q.length;ee++){let le=new a.F(Q[ee],B);le.name=`Worker ${ee}`,this.actors.push(le)}if(!this.actors.length)throw new Error("No actors found")}broadcast(w,B){let Q=[];for(let ee of this.actors)Q.push(ee.sendAsync({type:w,data:B}));return Promise.all(Q)}getActor(){return this.currentActor=(this.currentActor+1)%this.actors.length,this.actors[this.currentActor]}remove(w=!0){this.actors.forEach(B=>{B.remove()}),this.actors=[],w&&this.workerPool.release(this.id)}registerMessageHandler(w,B){for(let Q of this.actors)Q.registerMessageHandler(w,B)}}function Ae(){return ie||(ie=new Le(Se(),a.G),ie.registerMessageHandler("GR",(ue,w,B)=>a.m(w,B))),ie}function Fe(ue,w){let B=a.H();return a.J(B,B,[1,1,0]),a.K(B,B,[.5*ue.width,.5*ue.height,1]),a.L(B,B,ue.calculatePosMatrix(w.toUnwrapped()))}function Pe(ue,w,B,Q,ee,le){let qe=function(Kt,xr,Ir){if(Kt)for(let ve of Kt){let be=xr[ve];if(be&&be.source===Ir&&be.type==="fill-extrusion")return!0}else for(let ve in xr){let be=xr[ve];if(be.source===Ir&&be.type==="fill-extrusion")return!0}return!1}(ee&&ee.layers,w,ue.id),Xe=le.maxPitchScaleFactor(),ot=ue.tilesIn(Q,Xe,qe);ot.sort(ge);let Tt=[];for(let Kt of ot)Tt.push({wrappedTileID:Kt.tileID.wrapped().key,queryResults:Kt.tile.queryRenderedFeatures(w,B,ue._state,Kt.queryGeometry,Kt.cameraQueryGeometry,Kt.scale,ee,le,Xe,Fe(ue.transform,Kt.tileID))});let Yt=function(Kt){let xr={},Ir={};for(let ve of Kt){let be=ve.queryResults,De=ve.wrappedTileID,Be=Ir[De]=Ir[De]||{};for(let et in be){let We=be[et],it=Be[et]=Be[et]||{},Ft=xr[et]=xr[et]||[];for(let Ht of We)it[Ht.featureIndex]||(it[Ht.featureIndex]=!0,Ft.push(Ht))}}return xr}(Tt);for(let Kt in Yt)Yt[Kt].forEach(xr=>{let Ir=xr.feature,ve=ue.getFeatureState(Ir.layer["source-layer"],Ir.id);Ir.source=Ir.layer.source,Ir.layer["source-layer"]&&(Ir.sourceLayer=Ir.layer["source-layer"]),Ir.state=ve});return Yt}function ge(ue,w){let B=ue.tileID,Q=w.tileID;return B.overscaledZ-Q.overscaledZ||B.canonical.y-Q.canonical.y||B.wrap-Q.wrap||B.canonical.x-Q.canonical.x}function Re(ue,w,B){return a._(this,void 0,void 0,function*(){let Q=ue;if(ue.url?Q=(yield a.h(w.transformRequest(ue.url,"Source"),B)).data:yield u.frameAsync(B),!Q)return null;let ee=a.M(a.e(Q,ue),["tiles","minzoom","maxzoom","attribution","bounds","scheme","tileSize","encoding"]);return"vector_layers"in Q&&Q.vector_layers&&(ee.vectorLayerIds=Q.vector_layers.map(le=>le.id)),ee})}class ce{constructor(w,B){w&&(B?this.setSouthWest(w).setNorthEast(B):Array.isArray(w)&&(w.length===4?this.setSouthWest([w[0],w[1]]).setNorthEast([w[2],w[3]]):this.setSouthWest(w[0]).setNorthEast(w[1])))}setNorthEast(w){return this._ne=w instanceof a.N?new a.N(w.lng,w.lat):a.N.convert(w),this}setSouthWest(w){return this._sw=w instanceof a.N?new a.N(w.lng,w.lat):a.N.convert(w),this}extend(w){let B=this._sw,Q=this._ne,ee,le;if(w instanceof a.N)ee=w,le=w;else{if(!(w instanceof ce))return Array.isArray(w)?w.length===4||w.every(Array.isArray)?this.extend(ce.convert(w)):this.extend(a.N.convert(w)):w&&("lng"in w||"lon"in w)&&"lat"in w?this.extend(a.N.convert(w)):this;if(ee=w._sw,le=w._ne,!ee||!le)return this}return B||Q?(B.lng=Math.min(ee.lng,B.lng),B.lat=Math.min(ee.lat,B.lat),Q.lng=Math.max(le.lng,Q.lng),Q.lat=Math.max(le.lat,Q.lat)):(this._sw=new a.N(ee.lng,ee.lat),this._ne=new a.N(le.lng,le.lat)),this}getCenter(){return new a.N((this._sw.lng+this._ne.lng)/2,(this._sw.lat+this._ne.lat)/2)}getSouthWest(){return this._sw}getNorthEast(){return this._ne}getNorthWest(){return new a.N(this.getWest(),this.getNorth())}getSouthEast(){return new a.N(this.getEast(),this.getSouth())}getWest(){return this._sw.lng}getSouth(){return this._sw.lat}getEast(){return this._ne.lng}getNorth(){return this._ne.lat}toArray(){return[this._sw.toArray(),this._ne.toArray()]}toString(){return`LngLatBounds(${this._sw.toString()}, ${this._ne.toString()})`}isEmpty(){return!(this._sw&&this._ne)}contains(w){let{lng:B,lat:Q}=a.N.convert(w),ee=this._sw.lng<=B&&B<=this._ne.lng;return this._sw.lng>this._ne.lng&&(ee=this._sw.lng>=B&&B>=this._ne.lng),this._sw.lat<=Q&&Q<=this._ne.lat&&ee}static convert(w){return w instanceof ce?w:w&&new ce(w)}static fromLngLat(w,B=0){let Q=360*B/40075017,ee=Q/Math.cos(Math.PI/180*w.lat);return new ce(new a.N(w.lng-ee,w.lat-Q),new a.N(w.lng+ee,w.lat+Q))}adjustAntiMeridian(){let w=new a.N(this._sw.lng,this._sw.lat),B=new a.N(this._ne.lng,this._ne.lat);return new ce(w,w.lng>B.lng?new a.N(B.lng+360,B.lat):B)}}class Ze{constructor(w,B,Q){this.bounds=ce.convert(this.validateBounds(w)),this.minzoom=B||0,this.maxzoom=Q||24}validateBounds(w){return Array.isArray(w)&&w.length===4?[Math.max(-180,w[0]),Math.max(-90,w[1]),Math.min(180,w[2]),Math.min(90,w[3])]:[-180,-90,180,90]}contains(w){let B=Math.pow(2,w.z),Q=Math.floor(a.O(this.bounds.getWest())*B),ee=Math.floor(a.Q(this.bounds.getNorth())*B),le=Math.ceil(a.O(this.bounds.getEast())*B),qe=Math.ceil(a.Q(this.bounds.getSouth())*B);return w.x>=Q&&w.x<le&&w.y>=ee&&w.y<qe}}class ut extends a.E{constructor(w,B,Q,ee){if(super(),this.id=w,this.dispatcher=Q,this.type="vector",this.minzoom=0,this.maxzoom=22,this.scheme="xyz",this.tileSize=512,this.reparseOverscaled=!0,this.isTileClipped=!0,this._loaded=!1,a.e(this,a.M(B,["url","scheme","tileSize","promoteId"])),this._options=a.e({type:"vector"},B),this._collectResourceTiming=B.collectResourceTiming,this.tileSize!==512)throw new Error("vector tile sources must have a tileSize of 512");this.setEventedParent(ee)}load(){return a._(this,void 0,void 0,function*(){this._loaded=!1,this.fire(new a.k("dataloading",{dataType:"source"})),this._tileJSONRequest=new AbortController;try{let w=yield Re(this._options,this.map._requestManager,this._tileJSONRequest);this._tileJSONRequest=null,this._loaded=!0,this.map.style.sourceCaches[this.id].clearTiles(),w&&(a.e(this,w),w.bounds&&(this.tileBounds=new Ze(w.bounds,this.minzoom,this.maxzoom)),this.fire(new a.k("data",{dataType:"source",sourceDataType:"metadata"})),this.fire(new a.k("data",{dataType:"source",sourceDataType:"content"})))}catch(w){this._tileJSONRequest=null,this.fire(new a.j(w))}})}loaded(){return this._loaded}hasTile(w){return!this.tileBounds||this.tileBounds.contains(w.canonical)}onAdd(w){this.map=w,this.load()}setSourceProperty(w){this._tileJSONRequest&&this._tileJSONRequest.abort(),w(),this.load()}setTiles(w){return this.setSourceProperty(()=>{this._options.tiles=w}),this}setUrl(w){return this.setSourceProperty(()=>{this.url=w,this._options.url=w}),this}onRemove(){this._tileJSONRequest&&(this._tileJSONRequest.abort(),this._tileJSONRequest=null)}serialize(){return a.e({},this._options)}loadTile(w){return a._(this,void 0,void 0,function*(){let B=w.tileID.canonical.url(this.tiles,this.map.getPixelRatio(),this.scheme),Q={request:this.map._requestManager.transformRequest(B,"Tile"),uid:w.uid,tileID:w.tileID,zoom:w.tileID.overscaledZ,tileSize:this.tileSize*w.tileID.overscaleFactor(),type:this.type,source:this.id,pixelRatio:this.map.getPixelRatio(),showCollisionBoxes:this.map.showCollisionBoxes,promoteId:this.promoteId};Q.request.collectResourceTiming=this._collectResourceTiming;let ee="RT";if(w.actor&&w.state!=="expired"){if(w.state==="loading")return new Promise((le,qe)=>{w.reloadPromise={resolve:le,reject:qe}})}else w.actor=this.dispatcher.getActor(),ee="LT";w.abortController=new AbortController;try{let le=yield w.actor.sendAsync({type:ee,data:Q},w.abortController);if(delete w.abortController,w.aborted)return;this._afterTileLoadWorkerResponse(w,le)}catch(le){if(delete w.abortController,w.aborted)return;if(le&&le.status!==404)throw le;this._afterTileLoadWorkerResponse(w,null)}})}_afterTileLoadWorkerResponse(w,B){if(B&&B.resourceTiming&&(w.resourceTiming=B.resourceTiming),B&&this.map._refreshExpiredTiles&&w.setExpiryData(B),w.loadVectorData(B,this.map.painter),w.reloadPromise){let Q=w.reloadPromise;w.reloadPromise=null,this.loadTile(w).then(Q.resolve).catch(Q.reject)}}abortTile(w){return a._(this,void 0,void 0,function*(){w.abortController&&(w.abortController.abort(),delete w.abortController),w.actor&&(yield w.actor.sendAsync({type:"AT",data:{uid:w.uid,type:this.type,source:this.id}}))})}unloadTile(w){return a._(this,void 0,void 0,function*(){w.unloadVectorData(),w.actor&&(yield w.actor.sendAsync({type:"RMT",data:{uid:w.uid,type:this.type,source:this.id}}))})}hasTransition(){return!1}}class pt extends a.E{constructor(w,B,Q,ee){super(),this.id=w,this.dispatcher=Q,this.setEventedParent(ee),this.type="raster",this.minzoom=0,this.maxzoom=22,this.roundZoom=!0,this.scheme="xyz",this.tileSize=512,this._loaded=!1,this._options=a.e({type:"raster"},B),a.e(this,a.M(B,["url","scheme","tileSize"]))}load(){return a._(this,void 0,void 0,function*(){this._loaded=!1,this.fire(new a.k("dataloading",{dataType:"source"})),this._tileJSONRequest=new AbortController;try{let w=yield Re(this._options,this.map._requestManager,this._tileJSONRequest);this._tileJSONRequest=null,this._loaded=!0,w&&(a.e(this,w),w.bounds&&(this.tileBounds=new Ze(w.bounds,this.minzoom,this.maxzoom)),this.fire(new a.k("data",{dataType:"source",sourceDataType:"metadata"})),this.fire(new a.k("data",{dataType:"source",sourceDataType:"content"})))}catch(w){this._tileJSONRequest=null,this.fire(new a.j(w))}})}loaded(){return this._loaded}onAdd(w){this.map=w,this.load()}onRemove(){this._tileJSONRequest&&(this._tileJSONRequest.abort(),this._tileJSONRequest=null)}setSourceProperty(w){this._tileJSONRequest&&(this._tileJSONRequest.abort(),this._tileJSONRequest=null),w(),this.load()}setTiles(w){return this.setSourceProperty(()=>{this._options.tiles=w}),this}setUrl(w){return this.setSourceProperty(()=>{this.url=w,this._options.url=w}),this}serialize(){return a.e({},this._options)}hasTile(w){return!this.tileBounds||this.tileBounds.contains(w.canonical)}loadTile(w){return a._(this,void 0,void 0,function*(){let B=w.tileID.canonical.url(this.tiles,this.map.getPixelRatio(),this.scheme);w.abortController=new AbortController;try{let Q=yield p.getImage(this.map._requestManager.transformRequest(B,"Tile"),w.abortController,this.map._refreshExpiredTiles);if(delete w.abortController,w.aborted)return void(w.state="unloaded");if(Q&&Q.data){this.map._refreshExpiredTiles&&Q.cacheControl&&Q.expires&&w.setExpiryData({cacheControl:Q.cacheControl,expires:Q.expires});let ee=this.map.painter.context,le=ee.gl,qe=Q.data;w.texture=this.map.painter.getTileTexture(qe.width),w.texture?w.texture.update(qe,{useMipmap:!0}):(w.texture=new g(ee,qe,le.RGBA,{useMipmap:!0}),w.texture.bind(le.LINEAR,le.CLAMP_TO_EDGE,le.LINEAR_MIPMAP_NEAREST)),w.state="loaded"}}catch(Q){if(delete w.abortController,w.aborted)w.state="unloaded";else if(Q)throw w.state="errored",Q}})}abortTile(w){return a._(this,void 0,void 0,function*(){w.abortController&&(w.abortController.abort(),delete w.abortController)})}unloadTile(w){return a._(this,void 0,void 0,function*(){w.texture&&this.map.painter.saveTileTexture(w.texture)})}hasTransition(){return!1}}class Zt extends pt{constructor(w,B,Q,ee){super(w,B,Q,ee),this.type="raster-dem",this.maxzoom=22,this._options=a.e({type:"raster-dem"},B),this.encoding=B.encoding||"mapbox",this.redFactor=B.redFactor,this.greenFactor=B.greenFactor,this.blueFactor=B.blueFactor,this.baseShift=B.baseShift}loadTile(w){return a._(this,void 0,void 0,function*(){let B=w.tileID.canonical.url(this.tiles,this.map.getPixelRatio(),this.scheme),Q=this.map._requestManager.transformRequest(B,"Tile");w.neighboringTiles=this._getNeighboringTiles(w.tileID),w.abortController=new AbortController;try{let ee=yield p.getImage(Q,w.abortController,this.map._refreshExpiredTiles);if(delete w.abortController,w.aborted)return void(w.state="unloaded");if(ee&&ee.data){let le=ee.data;this.map._refreshExpiredTiles&&ee.cacheControl&&ee.expires&&w.setExpiryData({cacheControl:ee.cacheControl,expires:ee.expires});let qe=a.b(le)&&a.U()?le:yield this.readImageNow(le),Xe={type:this.type,uid:w.uid,source:this.id,rawImageData:qe,encoding:this.encoding,redFactor:this.redFactor,greenFactor:this.greenFactor,blueFactor:this.blueFactor,baseShift:this.baseShift};if(!w.actor||w.state==="expired"){w.actor=this.dispatcher.getActor();let ot=yield w.actor.sendAsync({type:"LDT",data:Xe});w.dem=ot,w.needsHillshadePrepare=!0,w.needsTerrainPrepare=!0,w.state="loaded"}}}catch(ee){if(delete w.abortController,w.aborted)w.state="unloaded";else if(ee)throw w.state="errored",ee}})}readImageNow(w){return a._(this,void 0,void 0,function*(){if(typeof VideoFrame!="undefined"&&a.V()){let B=w.width+2,Q=w.height+2;try{return new a.R({width:B,height:Q},yield a.W(w,-1,-1,B,Q))}catch(ee){}}return u.getImageData(w,1)})}_getNeighboringTiles(w){let B=w.canonical,Q=Math.pow(2,B.z),ee=(B.x-1+Q)%Q,le=B.x===0?w.wrap-1:w.wrap,qe=(B.x+1+Q)%Q,Xe=B.x+1===Q?w.wrap+1:w.wrap,ot={};return ot[new a.S(w.overscaledZ,le,B.z,ee,B.y).key]={backfilled:!1},ot[new a.S(w.overscaledZ,Xe,B.z,qe,B.y).key]={backfilled:!1},B.y>0&&(ot[new a.S(w.overscaledZ,le,B.z,ee,B.y-1).key]={backfilled:!1},ot[new a.S(w.overscaledZ,w.wrap,B.z,B.x,B.y-1).key]={backfilled:!1},ot[new a.S(w.overscaledZ,Xe,B.z,qe,B.y-1).key]={backfilled:!1}),B.y+1<Q&&(ot[new a.S(w.overscaledZ,le,B.z,ee,B.y+1).key]={backfilled:!1},ot[new a.S(w.overscaledZ,w.wrap,B.z,B.x,B.y+1).key]={backfilled:!1},ot[new a.S(w.overscaledZ,Xe,B.z,qe,B.y+1).key]={backfilled:!1}),ot}unloadTile(w){return a._(this,void 0,void 0,function*(){w.demTexture&&this.map.painter.saveTileTexture(w.demTexture),w.fbo&&(w.fbo.destroy(),delete w.fbo),w.dem&&delete w.dem,delete w.neighboringTiles,w.state="unloaded",w.actor&&(yield w.actor.sendAsync({type:"RDT",data:{type:this.type,uid:w.uid,source:this.id}}))})}}class st extends a.E{constructor(w,B,Q,ee){super(),this.id=w,this.type="geojson",this.minzoom=0,this.maxzoom=18,this.tileSize=512,this.isTileClipped=!0,this.reparseOverscaled=!0,this._removed=!1,this._pendingLoads=0,this.actor=Q.getActor(),this.setEventedParent(ee),this._data=B.data,this._options=a.e({},B),this._collectResourceTiming=B.collectResourceTiming,B.maxzoom!==void 0&&(this.maxzoom=B.maxzoom),B.type&&(this.type=B.type),B.attribution&&(this.attribution=B.attribution),this.promoteId=B.promoteId;let le=a.X/this.tileSize;B.clusterMaxZoom!==void 0&&this.maxzoom<=B.clusterMaxZoom&&a.w(`The maxzoom value "${this.maxzoom}" is expected to be greater than the clusterMaxZoom value "${B.clusterMaxZoom}".`),this.workerOptions=a.e({source:this.id,cluster:B.cluster||!1,geojsonVtOptions:{buffer:(B.buffer!==void 0?B.buffer:128)*le,tolerance:(B.tolerance!==void 0?B.tolerance:.375)*le,extent:a.X,maxZoom:this.maxzoom,lineMetrics:B.lineMetrics||!1,generateId:B.generateId||!1},superclusterOptions:{maxZoom:B.clusterMaxZoom!==void 0?B.clusterMaxZoom:this.maxzoom-1,minPoints:Math.max(2,B.clusterMinPoints||2),extent:a.X,radius:(B.clusterRadius||50)*le,log:!1,generateId:B.generateId||!1},clusterProperties:B.clusterProperties,filter:B.filter},B.workerOptions),typeof this.promoteId=="string"&&(this.workerOptions.promoteId=this.promoteId)}load(){return a._(this,void 0,void 0,function*(){yield this._updateWorkerData()})}onAdd(w){this.map=w,this.load()}setData(w){return this._data=w,this._updateWorkerData(),this}updateData(w){return this._updateWorkerData(w),this}getData(){return a._(this,void 0,void 0,function*(){let w=a.e({type:this.type},this.workerOptions);return this.actor.sendAsync({type:"GD",data:w})})}setClusterOptions(w){return this.workerOptions.cluster=w.cluster,w&&(w.clusterRadius!==void 0&&(this.workerOptions.superclusterOptions.radius=w.clusterRadius),w.clusterMaxZoom!==void 0&&(this.workerOptions.superclusterOptions.maxZoom=w.clusterMaxZoom)),this._updateWorkerData(),this}getClusterExpansionZoom(w){return this.actor.sendAsync({type:"GCEZ",data:{type:this.type,clusterId:w,source:this.id}})}getClusterChildren(w){return this.actor.sendAsync({type:"GCC",data:{type:this.type,clusterId:w,source:this.id}})}getClusterLeaves(w,B,Q){return this.actor.sendAsync({type:"GCL",data:{type:this.type,source:this.id,clusterId:w,limit:B,offset:Q}})}_updateWorkerData(w){return a._(this,void 0,void 0,function*(){let B=a.e({type:this.type},this.workerOptions);w?B.dataDiff=w:typeof this._data=="string"?(B.request=this.map._requestManager.transformRequest(u.resolveURL(this._data),"Source"),B.request.collectResourceTiming=this._collectResourceTiming):B.data=JSON.stringify(this._data),this._pendingLoads++,this.fire(new a.k("dataloading",{dataType:"source"}));try{let Q=yield this.actor.sendAsync({type:"LD",data:B});if(this._pendingLoads--,this._removed||Q.abandoned)return void this.fire(new a.k("dataabort",{dataType:"source"}));let ee=null;Q.resourceTiming&&Q.resourceTiming[this.id]&&(ee=Q.resourceTiming[this.id].slice(0));let le={dataType:"source"};this._collectResourceTiming&&ee&&ee.length>0&&a.e(le,{resourceTiming:ee}),this.fire(new a.k("data",Object.assign(Object.assign({},le),{sourceDataType:"metadata"}))),this.fire(new a.k("data",Object.assign(Object.assign({},le),{sourceDataType:"content"})))}catch(Q){if(this._pendingLoads--,this._removed)return void this.fire(new a.k("dataabort",{dataType:"source"}));this.fire(new a.j(Q))}})}loaded(){return this._pendingLoads===0}loadTile(w){return a._(this,void 0,void 0,function*(){let B=w.actor?"RT":"LT";w.actor=this.actor;let Q={type:this.type,uid:w.uid,tileID:w.tileID,zoom:w.tileID.overscaledZ,maxZoom:this.maxzoom,tileSize:this.tileSize,source:this.id,pixelRatio:this.map.getPixelRatio(),showCollisionBoxes:this.map.showCollisionBoxes,promoteId:this.promoteId};w.abortController=new AbortController;let ee=yield this.actor.sendAsync({type:B,data:Q},w.abortController);delete w.abortController,w.unloadVectorData(),w.aborted||w.loadVectorData(ee,this.map.painter,B==="RT")})}abortTile(w){return a._(this,void 0,void 0,function*(){w.abortController&&(w.abortController.abort(),delete w.abortController),w.aborted=!0})}unloadTile(w){return a._(this,void 0,void 0,function*(){w.unloadVectorData(),yield this.actor.sendAsync({type:"RMT",data:{uid:w.uid,type:this.type,source:this.id}})})}onRemove(){this._removed=!0,this.actor.sendAsync({type:"RS",data:{type:this.type,source:this.id}})}serialize(){return a.e({},this._options,{type:this.type,data:this._data})}hasTransition(){return!1}}var lt=a.Y([{name:"a_pos",type:"Int16",components:2},{name:"a_texture_pos",type:"Int16",components:2}]);class Gt extends a.E{constructor(w,B,Q,ee){super(),this.id=w,this.dispatcher=Q,this.coordinates=B.coordinates,this.type="image",this.minzoom=0,this.maxzoom=22,this.tileSize=512,this.tiles={},this._loaded=!1,this.setEventedParent(ee),this.options=B}load(w){return a._(this,void 0,void 0,function*(){this._loaded=!1,this.fire(new a.k("dataloading",{dataType:"source"})),this.url=this.options.url,this._request=new AbortController;try{let B=yield p.getImage(this.map._requestManager.transformRequest(this.url,"Image"),this._request);this._request=null,this._loaded=!0,B&&B.data&&(this.image=B.data,w&&(this.coordinates=w),this._finishLoading())}catch(B){this._request=null,this._loaded=!0,this.fire(new a.j(B))}})}loaded(){return this._loaded}updateImage(w){return w.url?(this._request&&(this._request.abort(),this._request=null),this.options.url=w.url,this.load(w.coordinates).finally(()=>{this.texture=null}),this):this}_finishLoading(){this.map&&(this.setCoordinates(this.coordinates),this.fire(new a.k("data",{dataType:"source",sourceDataType:"metadata"})))}onAdd(w){this.map=w,this.load()}onRemove(){this._request&&(this._request.abort(),this._request=null)}setCoordinates(w){this.coordinates=w;let B=w.map(a.Z.fromLngLat);this.tileID=function(ee){let le=1/0,qe=1/0,Xe=-1/0,ot=-1/0;for(let xr of ee)le=Math.min(le,xr.x),qe=Math.min(qe,xr.y),Xe=Math.max(Xe,xr.x),ot=Math.max(ot,xr.y);let Tt=Math.max(Xe-le,ot-qe),Yt=Math.max(0,Math.floor(-Math.log(Tt)/Math.LN2)),Kt=Math.pow(2,Yt);return new a.a1(Yt,Math.floor((le+Xe)/2*Kt),Math.floor((qe+ot)/2*Kt))}(B),this.minzoom=this.maxzoom=this.tileID.z;let Q=B.map(ee=>this.tileID.getTilePoint(ee)._round());return this._boundsArray=new a.$,this._boundsArray.emplaceBack(Q[0].x,Q[0].y,0,0),this._boundsArray.emplaceBack(Q[1].x,Q[1].y,a.X,0),this._boundsArray.emplaceBack(Q[3].x,Q[3].y,0,a.X),this._boundsArray.emplaceBack(Q[2].x,Q[2].y,a.X,a.X),this.boundsBuffer&&(this.boundsBuffer.destroy(),delete this.boundsBuffer),this.fire(new a.k("data",{dataType:"source",sourceDataType:"content"})),this}prepare(){if(Object.keys(this.tiles).length===0||!this.image)return;let w=this.map.painter.context,B=w.gl;this.boundsBuffer||(this.boundsBuffer=w.createVertexBuffer(this._boundsArray,lt.members)),this.boundsSegments||(this.boundsSegments=a.a0.simpleSegment(0,0,4,2)),this.texture||(this.texture=new g(w,this.image,B.RGBA),this.texture.bind(B.LINEAR,B.CLAMP_TO_EDGE));let Q=!1;for(let ee in this.tiles){let le=this.tiles[ee];le.state!=="loaded"&&(le.state="loaded",le.texture=this.texture,Q=!0)}Q&&this.fire(new a.k("data",{dataType:"source",sourceDataType:"idle",sourceId:this.id}))}loadTile(w){return a._(this,void 0,void 0,function*(){this.tileID&&this.tileID.equals(w.tileID.canonical)?(this.tiles[String(w.tileID.wrap)]=w,w.buckets={}):w.state="errored"})}serialize(){return{type:"image",url:this.options.url,coordinates:this.coordinates}}hasTransition(){return!1}}class Nt extends Gt{constructor(w,B,Q,ee){super(w,B,Q,ee),this.roundZoom=!0,this.type="video",this.options=B}load(){return a._(this,void 0,void 0,function*(){this._loaded=!1;let w=this.options;this.urls=[];for(let B of w.urls)this.urls.push(this.map._requestManager.transformRequest(B,"Source").url);try{let B=yield a.a3(this.urls);if(this._loaded=!0,!B)return;this.video=B,this.video.loop=!0,this.video.addEventListener("playing",()=>{this.map.triggerRepaint()}),this.map&&this.video.play(),this._finishLoading()}catch(B){this.fire(new a.j(B))}})}pause(){this.video&&this.video.pause()}play(){this.video&&this.video.play()}seek(w){if(this.video){let B=this.video.seekable;w<B.start(0)||w>B.end(0)?this.fire(new a.j(new a.a2(`sources.${this.id}`,null,`Playback for this video can be set only between the ${B.start(0)} and ${B.end(0)}-second mark.`))):this.video.currentTime=w}}getVideo(){return this.video}onAdd(w){this.map||(this.map=w,this.load(),this.video&&(this.video.play(),this.setCoordinates(this.coordinates)))}prepare(){if(Object.keys(this.tiles).length===0||this.video.readyState<2)return;let w=this.map.painter.context,B=w.gl;this.boundsBuffer||(this.boundsBuffer=w.createVertexBuffer(this._boundsArray,lt.members)),this.boundsSegments||(this.boundsSegments=a.a0.simpleSegment(0,0,4,2)),this.texture?this.video.paused||(this.texture.bind(B.LINEAR,B.CLAMP_TO_EDGE),B.texSubImage2D(B.TEXTURE_2D,0,0,0,B.RGBA,B.UNSIGNED_BYTE,this.video)):(this.texture=new g(w,this.video,B.RGBA),this.texture.bind(B.LINEAR,B.CLAMP_TO_EDGE));let Q=!1;for(let ee in this.tiles){let le=this.tiles[ee];le.state!=="loaded"&&(le.state="loaded",le.texture=this.texture,Q=!0)}Q&&this.fire(new a.k("data",{dataType:"source",sourceDataType:"idle",sourceId:this.id}))}serialize(){return{type:"video",urls:this.urls,coordinates:this.coordinates}}hasTransition(){return this.video&&!this.video.paused}}class Jt extends Gt{constructor(w,B,Q,ee){super(w,B,Q,ee),B.coordinates?Array.isArray(B.coordinates)&&B.coordinates.length===4&&!B.coordinates.some(le=>!Array.isArray(le)||le.length!==2||le.some(qe=>typeof qe!="number"))||this.fire(new a.j(new a.a2(`sources.${w}`,null,'"coordinates" property must be an array of 4 longitude/latitude array pairs'))):this.fire(new a.j(new a.a2(`sources.${w}`,null,'missing required property "coordinates"'))),B.animate&&typeof B.animate!="boolean"&&this.fire(new a.j(new a.a2(`sources.${w}`,null,'optional "animate" property must be a boolean value'))),B.canvas?typeof B.canvas=="string"||B.canvas instanceof HTMLCanvasElement||this.fire(new a.j(new a.a2(`sources.${w}`,null,'"canvas" must be either a string representing the ID of the canvas element from which to read, or an HTMLCanvasElement instance'))):this.fire(new a.j(new a.a2(`sources.${w}`,null,'missing required property "canvas"'))),this.options=B,this.animate=B.animate===void 0||B.animate}load(){return a._(this,void 0,void 0,function*(){this._loaded=!0,this.canvas||(this.canvas=this.options.canvas instanceof HTMLCanvasElement?this.options.canvas:document.getElementById(this.options.canvas)),this.width=this.canvas.width,this.height=this.canvas.height,this._hasInvalidDimensions()?this.fire(new a.j(new Error("Canvas dimensions cannot be less than or equal to zero."))):(this.play=function(){this._playing=!0,this.map.triggerRepaint()},this.pause=function(){this._playing&&(this.prepare(),this._playing=!1)},this._finishLoading())})}getCanvas(){return this.canvas}onAdd(w){this.map=w,this.load(),this.canvas&&this.animate&&this.play()}onRemove(){this.pause()}prepare(){let w=!1;if(this.canvas.width!==this.width&&(this.width=this.canvas.width,w=!0),this.canvas.height!==this.height&&(this.height=this.canvas.height,w=!0),this._hasInvalidDimensions()||Object.keys(this.tiles).length===0)return;let B=this.map.painter.context,Q=B.gl;this.boundsBuffer||(this.boundsBuffer=B.createVertexBuffer(this._boundsArray,lt.members)),this.boundsSegments||(this.boundsSegments=a.a0.simpleSegment(0,0,4,2)),this.texture?(w||this._playing)&&this.texture.update(this.canvas,{premultiply:!0}):this.texture=new g(B,this.canvas,Q.RGBA,{premultiply:!0});let ee=!1;for(let le in this.tiles){let qe=this.tiles[le];qe.state!=="loaded"&&(qe.state="loaded",qe.texture=this.texture,ee=!0)}ee&&this.fire(new a.k("data",{dataType:"source",sourceDataType:"idle",sourceId:this.id}))}serialize(){return{type:"canvas",coordinates:this.coordinates}}hasTransition(){return this._playing}_hasInvalidDimensions(){for(let w of[this.canvas.width,this.canvas.height])if(isNaN(w)||w<=0)return!0;return!1}}let sr={},wr=ue=>{switch(ue){case"geojson":return st;case"image":return Gt;case"raster":return pt;case"raster-dem":return Zt;case"vector":return ut;case"video":return Nt;case"canvas":return Jt}return sr[ue]},cr="RTLPluginLoaded";class $e extends a.E{constructor(){super(...arguments),this.status="unavailable",this.url=null,this.dispatcher=Ae()}_syncState(w){return this.status=w,this.dispatcher.broadcast("SRPS",{pluginStatus:w,pluginURL:this.url}).catch(B=>{throw this.status="error",B})}getRTLTextPluginStatus(){return this.status}clearRTLTextPlugin(){this.status="unavailable",this.url=null}setRTLTextPlugin(w){return a._(this,arguments,void 0,function*(B,Q=!1){if(this.url)throw new Error("setRTLTextPlugin cannot be called multiple times.");if(this.url=u.resolveURL(B),!this.url)throw new Error(`requested url ${B} is invalid`);if(this.status==="unavailable"){if(!Q)return this._requestImport();this.status="deferred",this._syncState(this.status)}else if(this.status==="requested")return this._requestImport()})}_requestImport(){return a._(this,void 0,void 0,function*(){yield this._syncState("loading"),this.status="loaded",this.fire(new a.k(cr))})}lazyLoad(){this.status==="unavailable"?this.status="requested":this.status==="deferred"&&this._requestImport()}}let St=null;function Qt(){return St||(St=new $e),St}class Vt{constructor(w,B){this.timeAdded=0,this.fadeEndTime=0,this.tileID=w,this.uid=a.a4(),this.uses=0,this.tileSize=B,this.buckets={},this.expirationTime=null,this.queryPadding=0,this.hasSymbolBuckets=!1,this.hasRTLText=!1,this.dependencies={},this.rtt=[],this.rttCoords={},this.expiredRequestCount=0,this.state="loading"}registerFadeDuration(w){let B=w+this.timeAdded;B<this.fadeEndTime||(this.fadeEndTime=B)}wasRequested(){return this.state==="errored"||this.state==="loaded"||this.state==="reloading"}clearTextures(w){this.demTexture&&w.saveTileTexture(this.demTexture),this.demTexture=null}loadVectorData(w,B,Q){if(this.hasData()&&this.unloadVectorData(),this.state="loaded",w){w.featureIndex&&(this.latestFeatureIndex=w.featureIndex,w.rawTileData?(this.latestRawTileData=w.rawTileData,this.latestFeatureIndex.rawTileData=w.rawTileData):this.latestRawTileData&&(this.latestFeatureIndex.rawTileData=this.latestRawTileData)),this.collisionBoxArray=w.collisionBoxArray,this.buckets=function(ee,le){let qe={};if(!le)return qe;for(let Xe of ee){let ot=Xe.layerIds.map(Tt=>le.getLayer(Tt)).filter(Boolean);if(ot.length!==0){Xe.layers=ot,Xe.stateDependentLayerIds&&(Xe.stateDependentLayers=Xe.stateDependentLayerIds.map(Tt=>ot.filter(Yt=>Yt.id===Tt)[0]));for(let Tt of ot)qe[Tt.id]=Xe}}return qe}(w.buckets,B.style),this.hasSymbolBuckets=!1;for(let ee in this.buckets){let le=this.buckets[ee];if(le instanceof a.a6){if(this.hasSymbolBuckets=!0,!Q)break;le.justReloaded=!0}}if(this.hasRTLText=!1,this.hasSymbolBuckets)for(let ee in this.buckets){let le=this.buckets[ee];if(le instanceof a.a6&&le.hasRTLText){this.hasRTLText=!0,Qt().lazyLoad();break}}this.queryPadding=0;for(let ee in this.buckets){let le=this.buckets[ee];this.queryPadding=Math.max(this.queryPadding,B.style.getLayer(ee).queryRadius(le))}w.imageAtlas&&(this.imageAtlas=w.imageAtlas),w.glyphAtlasImage&&(this.glyphAtlasImage=w.glyphAtlasImage)}else this.collisionBoxArray=new a.a5}unloadVectorData(){for(let w in this.buckets)this.buckets[w].destroy();this.buckets={},this.imageAtlasTexture&&this.imageAtlasTexture.destroy(),this.imageAtlas&&(this.imageAtlas=null),this.glyphAtlasTexture&&this.glyphAtlasTexture.destroy(),this.latestFeatureIndex=null,this.state="unloaded"}getBucket(w){return this.buckets[w.id]}upload(w){for(let Q in this.buckets){let ee=this.buckets[Q];ee.uploadPending()&&ee.upload(w)}let B=w.gl;this.imageAtlas&&!this.imageAtlas.uploaded&&(this.imageAtlasTexture=new g(w,this.imageAtlas.image,B.RGBA),this.imageAtlas.uploaded=!0),this.glyphAtlasImage&&(this.glyphAtlasTexture=new g(w,this.glyphAtlasImage,B.ALPHA),this.glyphAtlasImage=null)}prepare(w){this.imageAtlas&&this.imageAtlas.patchUpdatedImages(w,this.imageAtlasTexture)}queryRenderedFeatures(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt){return this.latestFeatureIndex&&this.latestFeatureIndex.rawTileData?this.latestFeatureIndex.query({queryGeometry:ee,cameraQueryGeometry:le,scale:qe,tileSize:this.tileSize,pixelPosMatrix:Yt,transform:ot,params:Xe,queryPadding:this.queryPadding*Tt},w,B,Q):{}}querySourceFeatures(w,B){let Q=this.latestFeatureIndex;if(!Q||!Q.rawTileData)return;let ee=Q.loadVTLayers(),le=B&&B.sourceLayer?B.sourceLayer:"",qe=ee._geojsonTileLayer||ee[le];if(!qe)return;let Xe=a.a7(B&&B.filter),{z:ot,x:Tt,y:Yt}=this.tileID.canonical,Kt={z:ot,x:Tt,y:Yt};for(let xr=0;xr<qe.length;xr++){let Ir=qe.feature(xr);if(Xe.needGeometry){let De=a.a8(Ir,!0);if(!Xe.filter(new a.z(this.tileID.overscaledZ),De,this.tileID.canonical))continue}else if(!Xe.filter(new a.z(this.tileID.overscaledZ),Ir))continue;let ve=Q.getId(Ir,le),be=new a.a9(Ir,ot,Tt,Yt,ve);be.tile=Kt,w.push(be)}}hasData(){return this.state==="loaded"||this.state==="reloading"||this.state==="expired"}patternsLoaded(){return this.imageAtlas&&!!Object.keys(this.imageAtlas.patternPositions).length}setExpiryData(w){let B=this.expirationTime;if(w.cacheControl){let Q=a.aa(w.cacheControl);Q["max-age"]&&(this.expirationTime=Date.now()+1e3*Q["max-age"])}else w.expires&&(this.expirationTime=new Date(w.expires).getTime());if(this.expirationTime){let Q=Date.now(),ee=!1;if(this.expirationTime>Q)ee=!1;else if(B)if(this.expirationTime<B)ee=!0;else{let le=this.expirationTime-B;le?this.expirationTime=Q+Math.max(le,3e4):ee=!0}else ee=!0;ee?(this.expiredRequestCount++,this.state="expired"):this.expiredRequestCount=0}}getExpiryTimeout(){if(this.expirationTime)return this.expiredRequestCount?1e3*(1<<Math.min(this.expiredRequestCount-1,31)):Math.min(this.expirationTime-new Date().getTime(),Math.pow(2,31)-1)}setFeatureState(w,B){if(!this.latestFeatureIndex||!this.latestFeatureIndex.rawTileData||Object.keys(w).length===0)return;let Q=this.latestFeatureIndex.loadVTLayers();for(let ee in this.buckets){if(!B.style.hasLayer(ee))continue;let le=this.buckets[ee],qe=le.layers[0].sourceLayer||"_geojsonTileLayer",Xe=Q[qe],ot=w[qe];if(!Xe||!ot||Object.keys(ot).length===0)continue;le.update(ot,Xe,this.imageAtlas&&this.imageAtlas.patternPositions||{});let Tt=B&&B.style&&B.style.getLayer(ee);Tt&&(this.queryPadding=Math.max(this.queryPadding,Tt.queryRadius(le)))}}holdingForFade(){return this.symbolFadeHoldUntil!==void 0}symbolFadeFinished(){return!this.symbolFadeHoldUntil||this.symbolFadeHoldUntil<u.now()}clearFadeHold(){this.symbolFadeHoldUntil=void 0}setHoldDuration(w){this.symbolFadeHoldUntil=u.now()+w}setDependencies(w,B){let Q={};for(let ee of B)Q[ee]=!0;this.dependencies[w]=Q}hasDependency(w,B){for(let Q of w){let ee=this.dependencies[Q];if(ee){for(let le of B)if(ee[le])return!0}}return!1}}class _t{constructor(w,B){this.max=w,this.onRemove=B,this.reset()}reset(){for(let w in this.data)for(let B of this.data[w])B.timeout&&clearTimeout(B.timeout),this.onRemove(B.value);return this.data={},this.order=[],this}add(w,B,Q){let ee=w.wrapped().key;this.data[ee]===void 0&&(this.data[ee]=[]);let le={value:B,timeout:void 0};if(Q!==void 0&&(le.timeout=setTimeout(()=>{this.remove(w,le)},Q)),this.data[ee].push(le),this.order.push(ee),this.order.length>this.max){let qe=this._getAndRemoveByKey(this.order[0]);qe&&this.onRemove(qe)}return this}has(w){return w.wrapped().key in this.data}getAndRemove(w){return this.has(w)?this._getAndRemoveByKey(w.wrapped().key):null}_getAndRemoveByKey(w){let B=this.data[w].shift();return B.timeout&&clearTimeout(B.timeout),this.data[w].length===0&&delete this.data[w],this.order.splice(this.order.indexOf(w),1),B.value}getByKey(w){let B=this.data[w];return B?B[0].value:null}get(w){return this.has(w)?this.data[w.wrapped().key][0].value:null}remove(w,B){if(!this.has(w))return this;let Q=w.wrapped().key,ee=B===void 0?0:this.data[Q].indexOf(B),le=this.data[Q][ee];return this.data[Q].splice(ee,1),le.timeout&&clearTimeout(le.timeout),this.data[Q].length===0&&delete this.data[Q],this.onRemove(le.value),this.order.splice(this.order.indexOf(Q),1),this}setMaxSize(w){for(this.max=w;this.order.length>this.max;){let B=this._getAndRemoveByKey(this.order[0]);B&&this.onRemove(B)}return this}filter(w){let B=[];for(let Q in this.data)for(let ee of this.data[Q])w(ee.value)||B.push(ee);for(let Q of B)this.remove(Q.value.tileID,Q)}}class It{constructor(){this.state={},this.stateChanges={},this.deletedStates={}}updateState(w,B,Q){let ee=String(B);if(this.stateChanges[w]=this.stateChanges[w]||{},this.stateChanges[w][ee]=this.stateChanges[w][ee]||{},a.e(this.stateChanges[w][ee],Q),this.deletedStates[w]===null){this.deletedStates[w]={};for(let le in this.state[w])le!==ee&&(this.deletedStates[w][le]=null)}else if(this.deletedStates[w]&&this.deletedStates[w][ee]===null){this.deletedStates[w][ee]={};for(let le in this.state[w][ee])Q[le]||(this.deletedStates[w][ee][le]=null)}else for(let le in Q)this.deletedStates[w]&&this.deletedStates[w][ee]&&this.deletedStates[w][ee][le]===null&&delete this.deletedStates[w][ee][le]}removeFeatureState(w,B,Q){if(this.deletedStates[w]===null)return;let ee=String(B);if(this.deletedStates[w]=this.deletedStates[w]||{},Q&&B!==void 0)this.deletedStates[w][ee]!==null&&(this.deletedStates[w][ee]=this.deletedStates[w][ee]||{},this.deletedStates[w][ee][Q]=null);else if(B!==void 0)if(this.stateChanges[w]&&this.stateChanges[w][ee])for(Q in this.deletedStates[w][ee]={},this.stateChanges[w][ee])this.deletedStates[w][ee][Q]=null;else this.deletedStates[w][ee]=null;else this.deletedStates[w]=null}getState(w,B){let Q=String(B),ee=a.e({},(this.state[w]||{})[Q],(this.stateChanges[w]||{})[Q]);if(this.deletedStates[w]===null)return{};if(this.deletedStates[w]){let le=this.deletedStates[w][B];if(le===null)return{};for(let qe in le)delete ee[qe]}return ee}initializeTileState(w,B){w.setFeatureState(this.state,B)}coalesceChanges(w,B){let Q={};for(let ee in this.stateChanges){this.state[ee]=this.state[ee]||{};let le={};for(let qe in this.stateChanges[ee])this.state[ee][qe]||(this.state[ee][qe]={}),a.e(this.state[ee][qe],this.stateChanges[ee][qe]),le[qe]=this.state[ee][qe];Q[ee]=le}for(let ee in this.deletedStates){this.state[ee]=this.state[ee]||{};let le={};if(this.deletedStates[ee]===null)for(let qe in this.state[ee])le[qe]={},this.state[ee][qe]={};else for(let qe in this.deletedStates[ee]){if(this.deletedStates[ee][qe]===null)this.state[ee][qe]={};else for(let Xe of Object.keys(this.deletedStates[ee][qe]))delete this.state[ee][qe][Xe];le[qe]=this.state[ee][qe]}Q[ee]=Q[ee]||{},a.e(Q[ee],le)}if(this.stateChanges={},this.deletedStates={},Object.keys(Q).length!==0)for(let ee in w)w[ee].setFeatureState(Q,B)}}class mt extends a.E{constructor(w,B,Q){super(),this.id=w,this.dispatcher=Q,this.on("data",ee=>this._dataHandler(ee)),this.on("dataloading",()=>{this._sourceErrored=!1}),this.on("error",()=>{this._sourceErrored=this._source.loaded()}),this._source=((ee,le,qe,Xe)=>{let ot=new(wr(le.type))(ee,le,qe,Xe);if(ot.id!==ee)throw new Error(`Expected Source id to be ${ee} instead of ${ot.id}`);return ot})(w,B,Q,this),this._tiles={},this._cache=new _t(0,ee=>this._unloadTile(ee)),this._timers={},this._cacheTimers={},this._maxTileCacheSize=null,this._maxTileCacheZoomLevels=null,this._loadedParentTiles={},this._coveredTiles={},this._state=new It,this._didEmitContent=!1,this._updated=!1}onAdd(w){this.map=w,this._maxTileCacheSize=w?w._maxTileCacheSize:null,this._maxTileCacheZoomLevels=w?w._maxTileCacheZoomLevels:null,this._source&&this._source.onAdd&&this._source.onAdd(w)}onRemove(w){this.clearTiles(),this._source&&this._source.onRemove&&this._source.onRemove(w)}loaded(){if(this._sourceErrored)return!0;if(!this._sourceLoaded||!this._source.loaded())return!1;if(!(this.used===void 0&&this.usedForTerrain===void 0||this.used||this.usedForTerrain))return!0;if(!this._updated)return!1;for(let w in this._tiles){let B=this._tiles[w];if(B.state!=="loaded"&&B.state!=="errored")return!1}return!0}getSource(){return this._source}pause(){this._paused=!0}resume(){if(!this._paused)return;let w=this._shouldReloadOnResume;this._paused=!1,this._shouldReloadOnResume=!1,w&&this.reload(),this.transform&&this.update(this.transform,this.terrain)}_loadTile(w,B,Q){return a._(this,void 0,void 0,function*(){try{yield this._source.loadTile(w),this._tileLoaded(w,B,Q)}catch(ee){w.state="errored",ee.status!==404?this._source.fire(new a.j(ee,{tile:w})):this.update(this.transform,this.terrain)}})}_unloadTile(w){this._source.unloadTile&&this._source.unloadTile(w)}_abortTile(w){this._source.abortTile&&this._source.abortTile(w),this._source.fire(new a.k("dataabort",{tile:w,coord:w.tileID,dataType:"source"}))}serialize(){return this._source.serialize()}prepare(w){this._source.prepare&&this._source.prepare(),this._state.coalesceChanges(this._tiles,this.map?this.map.painter:null);for(let B in this._tiles){let Q=this._tiles[B];Q.upload(w),Q.prepare(this.map.style.imageManager)}}getIds(){return Object.values(this._tiles).map(w=>w.tileID).sort(er).map(w=>w.key)}getRenderableIds(w){let B=[];for(let Q in this._tiles)this._isIdRenderable(Q,w)&&B.push(this._tiles[Q]);return w?B.sort((Q,ee)=>{let le=Q.tileID,qe=ee.tileID,Xe=new a.P(le.canonical.x,le.canonical.y)._rotate(this.transform.angle),ot=new a.P(qe.canonical.x,qe.canonical.y)._rotate(this.transform.angle);return le.overscaledZ-qe.overscaledZ||ot.y-Xe.y||ot.x-Xe.x}).map(Q=>Q.tileID.key):B.map(Q=>Q.tileID).sort(er).map(Q=>Q.key)}hasRenderableParent(w){let B=this.findLoadedParent(w,0);return!!B&&this._isIdRenderable(B.tileID.key)}_isIdRenderable(w,B){return this._tiles[w]&&this._tiles[w].hasData()&&!this._coveredTiles[w]&&(B||!this._tiles[w].holdingForFade())}reload(){if(this._paused)this._shouldReloadOnResume=!0;else{this._cache.reset();for(let w in this._tiles)this._tiles[w].state!=="errored"&&this._reloadTile(w,"reloading")}}_reloadTile(w,B){return a._(this,void 0,void 0,function*(){let Q=this._tiles[w];Q&&(Q.state!=="loading"&&(Q.state=B),yield this._loadTile(Q,w,B))})}_tileLoaded(w,B,Q){w.timeAdded=u.now(),Q==="expired"&&(w.refreshedUponExpiration=!0),this._setTileReloadTimer(B,w),this.getSource().type==="raster-dem"&&w.dem&&this._backfillDEM(w),this._state.initializeTileState(w,this.map?this.map.painter:null),w.aborted||this._source.fire(new a.k("data",{dataType:"source",tile:w,coord:w.tileID}))}_backfillDEM(w){let B=this.getRenderableIds();for(let ee=0;ee<B.length;ee++){let le=B[ee];if(w.neighboringTiles&&w.neighboringTiles[le]){let qe=this.getTileByID(le);Q(w,qe),Q(qe,w)}}function Q(ee,le){ee.needsHillshadePrepare=!0,ee.needsTerrainPrepare=!0;let qe=le.tileID.canonical.x-ee.tileID.canonical.x,Xe=le.tileID.canonical.y-ee.tileID.canonical.y,ot=Math.pow(2,ee.tileID.canonical.z),Tt=le.tileID.key;qe===0&&Xe===0||Math.abs(Xe)>1||(Math.abs(qe)>1&&(Math.abs(qe+ot)===1?qe+=ot:Math.abs(qe-ot)===1&&(qe-=ot)),le.dem&&ee.dem&&(ee.dem.backfillBorder(le.dem,qe,Xe),ee.neighboringTiles&&ee.neighboringTiles[Tt]&&(ee.neighboringTiles[Tt].backfilled=!0)))}}getTile(w){return this.getTileByID(w.key)}getTileByID(w){return this._tiles[w]}_retainLoadedChildren(w,B,Q,ee){for(let le in this._tiles){let qe=this._tiles[le];if(ee[le]||!qe.hasData()||qe.tileID.overscaledZ<=B||qe.tileID.overscaledZ>Q)continue;let Xe=qe.tileID;for(;qe&&qe.tileID.overscaledZ>B+1;){let Tt=qe.tileID.scaledTo(qe.tileID.overscaledZ-1);qe=this._tiles[Tt.key],qe&&qe.hasData()&&(Xe=Tt)}let ot=Xe;for(;ot.overscaledZ>B;)if(ot=ot.scaledTo(ot.overscaledZ-1),w[ot.key]){ee[Xe.key]=Xe;break}}}findLoadedParent(w,B){if(w.key in this._loadedParentTiles){let Q=this._loadedParentTiles[w.key];return Q&&Q.tileID.overscaledZ>=B?Q:null}for(let Q=w.overscaledZ-1;Q>=B;Q--){let ee=w.scaledTo(Q),le=this._getLoadedTile(ee);if(le)return le}}findLoadedSibling(w){return this._getLoadedTile(w)}_getLoadedTile(w){let B=this._tiles[w.key];return B&&B.hasData()?B:this._cache.getByKey(w.wrapped().key)}updateCacheSize(w){let B=Math.ceil(w.width/this._source.tileSize)+1,Q=Math.ceil(w.height/this._source.tileSize)+1,ee=Math.floor(B*Q*(this._maxTileCacheZoomLevels===null?a.a.MAX_TILE_CACHE_ZOOM_LEVELS:this._maxTileCacheZoomLevels)),le=typeof this._maxTileCacheSize=="number"?Math.min(this._maxTileCacheSize,ee):ee;this._cache.setMaxSize(le)}handleWrapJump(w){let B=Math.round((w-(this._prevLng===void 0?w:this._prevLng))/360);if(this._prevLng=w,B){let Q={};for(let ee in this._tiles){let le=this._tiles[ee];le.tileID=le.tileID.unwrapTo(le.tileID.wrap+B),Q[le.tileID.key]=le}this._tiles=Q;for(let ee in this._timers)clearTimeout(this._timers[ee]),delete this._timers[ee];for(let ee in this._tiles)this._setTileReloadTimer(ee,this._tiles[ee])}}_updateCoveredAndRetainedTiles(w,B,Q,ee,le,qe){let Xe={},ot={},Tt=Object.keys(w),Yt=u.now();for(let Kt of Tt){let xr=w[Kt],Ir=this._tiles[Kt];if(!Ir||Ir.fadeEndTime!==0&&Ir.fadeEndTime<=Yt)continue;let ve=this.findLoadedParent(xr,B),be=this.findLoadedSibling(xr),De=ve||be||null;De&&(this._addTile(De.tileID),Xe[De.tileID.key]=De.tileID),ot[Kt]=xr}this._retainLoadedChildren(ot,ee,Q,w);for(let Kt in Xe)w[Kt]||(this._coveredTiles[Kt]=!0,w[Kt]=Xe[Kt]);if(qe){let Kt={},xr={};for(let Ir of le)this._tiles[Ir.key].hasData()?Kt[Ir.key]=Ir:xr[Ir.key]=Ir;for(let Ir in xr){let ve=xr[Ir].children(this._source.maxzoom);this._tiles[ve[0].key]&&this._tiles[ve[1].key]&&this._tiles[ve[2].key]&&this._tiles[ve[3].key]&&(Kt[ve[0].key]=w[ve[0].key]=ve[0],Kt[ve[1].key]=w[ve[1].key]=ve[1],Kt[ve[2].key]=w[ve[2].key]=ve[2],Kt[ve[3].key]=w[ve[3].key]=ve[3],delete xr[Ir])}for(let Ir in xr){let ve=xr[Ir],be=this.findLoadedParent(ve,this._source.minzoom),De=this.findLoadedSibling(ve),Be=be||De||null;if(Be){Kt[Be.tileID.key]=w[Be.tileID.key]=Be.tileID;for(let et in Kt)Kt[et].isChildOf(Be.tileID)&&delete Kt[et]}}for(let Ir in this._tiles)Kt[Ir]||(this._coveredTiles[Ir]=!0)}}update(w,B){if(!this._sourceLoaded||this._paused)return;let Q;this.transform=w,this.terrain=B,this.updateCacheSize(w),this.handleWrapJump(this.transform.center.lng),this._coveredTiles={},this.used||this.usedForTerrain?this._source.tileID?Q=w.getVisibleUnwrappedCoordinates(this._source.tileID).map(Yt=>new a.S(Yt.canonical.z,Yt.wrap,Yt.canonical.z,Yt.canonical.x,Yt.canonical.y)):(Q=w.coveringTiles({tileSize:this.usedForTerrain?this.tileSize:this._source.tileSize,minzoom:this._source.minzoom,maxzoom:this._source.maxzoom,roundZoom:!this.usedForTerrain&&this._source.roundZoom,reparseOverscaled:this._source.reparseOverscaled,terrain:B}),this._source.hasTile&&(Q=Q.filter(Yt=>this._source.hasTile(Yt)))):Q=[];let ee=w.coveringZoomLevel(this._source),le=Math.max(ee-mt.maxOverzooming,this._source.minzoom),qe=Math.max(ee+mt.maxUnderzooming,this._source.minzoom);if(this.usedForTerrain){let Yt={};for(let Kt of Q)if(Kt.canonical.z>this._source.minzoom){let xr=Kt.scaledTo(Kt.canonical.z-1);Yt[xr.key]=xr;let Ir=Kt.scaledTo(Math.max(this._source.minzoom,Math.min(Kt.canonical.z,5)));Yt[Ir.key]=Ir}Q=Q.concat(Object.values(Yt))}let Xe=Q.length===0&&!this._updated&&this._didEmitContent;this._updated=!0,Xe&&this.fire(new a.k("data",{sourceDataType:"idle",dataType:"source",sourceId:this.id}));let ot=this._updateRetainedTiles(Q,ee);lr(this._source.type)&&this._updateCoveredAndRetainedTiles(ot,le,qe,ee,Q,B);for(let Yt in ot)this._tiles[Yt].clearFadeHold();let Tt=a.ab(this._tiles,ot);for(let Yt of Tt){let Kt=this._tiles[Yt];Kt.hasSymbolBuckets&&!Kt.holdingForFade()?Kt.setHoldDuration(this.map._fadeDuration):Kt.hasSymbolBuckets&&!Kt.symbolFadeFinished()||this._removeTile(Yt)}this._updateLoadedParentTileCache(),this._updateLoadedSiblingTileCache()}releaseSymbolFadeTiles(){for(let w in this._tiles)this._tiles[w].holdingForFade()&&this._removeTile(w)}_updateRetainedTiles(w,B){var Q;let ee={},le={},qe=Math.max(B-mt.maxOverzooming,this._source.minzoom),Xe=Math.max(B+mt.maxUnderzooming,this._source.minzoom),ot={};for(let Tt of w){let Yt=this._addTile(Tt);ee[Tt.key]=Tt,Yt.hasData()||B<this._source.maxzoom&&(ot[Tt.key]=Tt)}this._retainLoadedChildren(ot,B,Xe,ee);for(let Tt of w){let Yt=this._tiles[Tt.key];if(Yt.hasData())continue;if(B+1>this._source.maxzoom){let xr=Tt.children(this._source.maxzoom)[0],Ir=this.getTile(xr);if(Ir&&Ir.hasData()){ee[xr.key]=xr;continue}}else{let xr=Tt.children(this._source.maxzoom);if(ee[xr[0].key]&&ee[xr[1].key]&&ee[xr[2].key]&&ee[xr[3].key])continue}let Kt=Yt.wasRequested();for(let xr=Tt.overscaledZ-1;xr>=qe;--xr){let Ir=Tt.scaledTo(xr);if(le[Ir.key])break;if(le[Ir.key]=!0,Yt=this.getTile(Ir),!Yt&&Kt&&(Yt=this._addTile(Ir)),Yt){let ve=Yt.hasData();if((ve||!(!((Q=this.map)===null||Q===void 0)&&Q.cancelPendingTileRequestsWhileZooming)||Kt)&&(ee[Ir.key]=Ir),Kt=Yt.wasRequested(),ve)break}}}return ee}_updateLoadedParentTileCache(){this._loadedParentTiles={};for(let w in this._tiles){let B=[],Q,ee=this._tiles[w].tileID;for(;ee.overscaledZ>0;){if(ee.key in this._loadedParentTiles){Q=this._loadedParentTiles[ee.key];break}B.push(ee.key);let le=ee.scaledTo(ee.overscaledZ-1);if(Q=this._getLoadedTile(le),Q)break;ee=le}for(let le of B)this._loadedParentTiles[le]=Q}}_updateLoadedSiblingTileCache(){this._loadedSiblingTiles={};for(let w in this._tiles){let B=this._tiles[w].tileID,Q=this._getLoadedTile(B);this._loadedSiblingTiles[B.key]=Q}}_addTile(w){let B=this._tiles[w.key];if(B)return B;B=this._cache.getAndRemove(w),B&&(this._setTileReloadTimer(w.key,B),B.tileID=w,this._state.initializeTileState(B,this.map?this.map.painter:null),this._cacheTimers[w.key]&&(clearTimeout(this._cacheTimers[w.key]),delete this._cacheTimers[w.key],this._setTileReloadTimer(w.key,B)));let Q=B;return B||(B=new Vt(w,this._source.tileSize*w.overscaleFactor()),this._loadTile(B,w.key,B.state)),B.uses++,this._tiles[w.key]=B,Q||this._source.fire(new a.k("dataloading",{tile:B,coord:B.tileID,dataType:"source"})),B}_setTileReloadTimer(w,B){w in this._timers&&(clearTimeout(this._timers[w]),delete this._timers[w]);let Q=B.getExpiryTimeout();Q&&(this._timers[w]=setTimeout(()=>{this._reloadTile(w,"expired"),delete this._timers[w]},Q))}_removeTile(w){let B=this._tiles[w];B&&(B.uses--,delete this._tiles[w],this._timers[w]&&(clearTimeout(this._timers[w]),delete this._timers[w]),B.uses>0||(B.hasData()&&B.state!=="reloading"?this._cache.add(B.tileID,B,B.getExpiryTimeout()):(B.aborted=!0,this._abortTile(B),this._unloadTile(B))))}_dataHandler(w){let B=w.sourceDataType;w.dataType==="source"&&B==="metadata"&&(this._sourceLoaded=!0),this._sourceLoaded&&!this._paused&&w.dataType==="source"&&B==="content"&&(this.reload(),this.transform&&this.update(this.transform,this.terrain),this._didEmitContent=!0)}clearTiles(){this._shouldReloadOnResume=!1,this._paused=!1;for(let w in this._tiles)this._removeTile(w);this._cache.reset()}tilesIn(w,B,Q){let ee=[],le=this.transform;if(!le)return ee;let qe=Q?le.getCameraQueryGeometry(w):w,Xe=w.map(ve=>le.pointCoordinate(ve,this.terrain)),ot=qe.map(ve=>le.pointCoordinate(ve,this.terrain)),Tt=this.getIds(),Yt=1/0,Kt=1/0,xr=-1/0,Ir=-1/0;for(let ve of ot)Yt=Math.min(Yt,ve.x),Kt=Math.min(Kt,ve.y),xr=Math.max(xr,ve.x),Ir=Math.max(Ir,ve.y);for(let ve=0;ve<Tt.length;ve++){let be=this._tiles[Tt[ve]];if(be.holdingForFade())continue;let De=be.tileID,Be=Math.pow(2,le.zoom-be.tileID.overscaledZ),et=B*be.queryPadding*a.X/be.tileSize/Be,We=[De.getTilePoint(new a.Z(Yt,Kt)),De.getTilePoint(new a.Z(xr,Ir))];if(We[0].x-et<a.X&&We[0].y-et<a.X&&We[1].x+et>=0&&We[1].y+et>=0){let it=Xe.map(Ht=>De.getTilePoint(Ht)),Ft=ot.map(Ht=>De.getTilePoint(Ht));ee.push({tile:be,tileID:De,queryGeometry:it,cameraQueryGeometry:Ft,scale:Be})}}return ee}getVisibleCoordinates(w){let B=this.getRenderableIds(w).map(Q=>this._tiles[Q].tileID);for(let Q of B)Q.posMatrix=this.transform.calculatePosMatrix(Q.toUnwrapped());return B}hasTransition(){if(this._source.hasTransition())return!0;if(lr(this._source.type)){let w=u.now();for(let B in this._tiles)if(this._tiles[B].fadeEndTime>=w)return!0}return!1}setFeatureState(w,B,Q){this._state.updateState(w=w||"_geojsonTileLayer",B,Q)}removeFeatureState(w,B,Q){this._state.removeFeatureState(w=w||"_geojsonTileLayer",B,Q)}getFeatureState(w,B){return this._state.getState(w=w||"_geojsonTileLayer",B)}setDependencies(w,B,Q){let ee=this._tiles[w];ee&&ee.setDependencies(B,Q)}reloadTilesForDependencies(w,B){for(let Q in this._tiles)this._tiles[Q].hasDependency(w,B)&&this._reloadTile(Q,"reloading");this._cache.filter(Q=>!Q.hasDependency(w,B))}}function er(ue,w){let B=Math.abs(2*ue.wrap)-+(ue.wrap<0),Q=Math.abs(2*w.wrap)-+(w.wrap<0);return ue.overscaledZ-w.overscaledZ||Q-B||w.canonical.y-ue.canonical.y||w.canonical.x-ue.canonical.x}function lr(ue){return ue==="raster"||ue==="image"||ue==="video"}mt.maxOverzooming=10,mt.maxUnderzooming=3;class Tr{constructor(w,B){this.reset(w,B)}reset(w,B){this.points=w||[],this._distances=[0];for(let Q=1;Q<this.points.length;Q++)this._distances[Q]=this._distances[Q-1]+this.points[Q].dist(this.points[Q-1]);this.length=this._distances[this._distances.length-1],this.padding=Math.min(B||0,.5*this.length),this.paddedLength=this.length-2*this.padding}lerp(w){if(this.points.length===1)return this.points[0];w=a.ac(w,0,1);let B=1,Q=this._distances[B],ee=w*this.paddedLength+this.padding;for(;Q<ee&&B<this._distances.length;)Q=this._distances[++B];let le=B-1,qe=this._distances[le],Xe=Q-qe,ot=Xe>0?(ee-qe)/Xe:0;return this.points[le].mult(1-ot).add(this.points[B].mult(ot))}}function Lr(ue,w){let B=!0;return ue==="always"||ue!=="never"&&w!=="never"||(B=!1),B}class ti{constructor(w,B,Q){let ee=this.boxCells=[],le=this.circleCells=[];this.xCellCount=Math.ceil(w/Q),this.yCellCount=Math.ceil(B/Q);for(let qe=0;qe<this.xCellCount*this.yCellCount;qe++)ee.push([]),le.push([]);this.circleKeys=[],this.boxKeys=[],this.bboxes=[],this.circles=[],this.width=w,this.height=B,this.xScale=this.xCellCount/w,this.yScale=this.yCellCount/B,this.boxUid=0,this.circleUid=0}keysLength(){return this.boxKeys.length+this.circleKeys.length}insert(w,B,Q,ee,le){this._forEachCell(B,Q,ee,le,this._insertBoxCell,this.boxUid++),this.boxKeys.push(w),this.bboxes.push(B),this.bboxes.push(Q),this.bboxes.push(ee),this.bboxes.push(le)}insertCircle(w,B,Q,ee){this._forEachCell(B-ee,Q-ee,B+ee,Q+ee,this._insertCircleCell,this.circleUid++),this.circleKeys.push(w),this.circles.push(B),this.circles.push(Q),this.circles.push(ee)}_insertBoxCell(w,B,Q,ee,le,qe){this.boxCells[le].push(qe)}_insertCircleCell(w,B,Q,ee,le,qe){this.circleCells[le].push(qe)}_query(w,B,Q,ee,le,qe,Xe){if(Q<0||w>this.width||ee<0||B>this.height)return[];let ot=[];if(w<=0&&B<=0&&this.width<=Q&&this.height<=ee){if(le)return[{key:null,x1:w,y1:B,x2:Q,y2:ee}];for(let Tt=0;Tt<this.boxKeys.length;Tt++)ot.push({key:this.boxKeys[Tt],x1:this.bboxes[4*Tt],y1:this.bboxes[4*Tt+1],x2:this.bboxes[4*Tt+2],y2:this.bboxes[4*Tt+3]});for(let Tt=0;Tt<this.circleKeys.length;Tt++){let Yt=this.circles[3*Tt],Kt=this.circles[3*Tt+1],xr=this.circles[3*Tt+2];ot.push({key:this.circleKeys[Tt],x1:Yt-xr,y1:Kt-xr,x2:Yt+xr,y2:Kt+xr})}}else this._forEachCell(w,B,Q,ee,this._queryCell,ot,{hitTest:le,overlapMode:qe,seenUids:{box:{},circle:{}}},Xe);return ot}query(w,B,Q,ee){return this._query(w,B,Q,ee,!1,null)}hitTest(w,B,Q,ee,le,qe){return this._query(w,B,Q,ee,!0,le,qe).length>0}hitTestCircle(w,B,Q,ee,le){let qe=w-Q,Xe=w+Q,ot=B-Q,Tt=B+Q;if(Xe<0||qe>this.width||Tt<0||ot>this.height)return!1;let Yt=[];return this._forEachCell(qe,ot,Xe,Tt,this._queryCellCircle,Yt,{hitTest:!0,overlapMode:ee,circle:{x:w,y:B,radius:Q},seenUids:{box:{},circle:{}}},le),Yt.length>0}_queryCell(w,B,Q,ee,le,qe,Xe,ot){let{seenUids:Tt,hitTest:Yt,overlapMode:Kt}=Xe,xr=this.boxCells[le];if(xr!==null){let ve=this.bboxes;for(let be of xr)if(!Tt.box[be]){Tt.box[be]=!0;let De=4*be,Be=this.boxKeys[be];if(w<=ve[De+2]&&B<=ve[De+3]&&Q>=ve[De+0]&&ee>=ve[De+1]&&(!ot||ot(Be))&&(!Yt||!Lr(Kt,Be.overlapMode))&&(qe.push({key:Be,x1:ve[De],y1:ve[De+1],x2:ve[De+2],y2:ve[De+3]}),Yt))return!0}}let Ir=this.circleCells[le];if(Ir!==null){let ve=this.circles;for(let be of Ir)if(!Tt.circle[be]){Tt.circle[be]=!0;let De=3*be,Be=this.circleKeys[be];if(this._circleAndRectCollide(ve[De],ve[De+1],ve[De+2],w,B,Q,ee)&&(!ot||ot(Be))&&(!Yt||!Lr(Kt,Be.overlapMode))){let et=ve[De],We=ve[De+1],it=ve[De+2];if(qe.push({key:Be,x1:et-it,y1:We-it,x2:et+it,y2:We+it}),Yt)return!0}}}return!1}_queryCellCircle(w,B,Q,ee,le,qe,Xe,ot){let{circle:Tt,seenUids:Yt,overlapMode:Kt}=Xe,xr=this.boxCells[le];if(xr!==null){let ve=this.bboxes;for(let be of xr)if(!Yt.box[be]){Yt.box[be]=!0;let De=4*be,Be=this.boxKeys[be];if(this._circleAndRectCollide(Tt.x,Tt.y,Tt.radius,ve[De+0],ve[De+1],ve[De+2],ve[De+3])&&(!ot||ot(Be))&&!Lr(Kt,Be.overlapMode))return qe.push(!0),!0}}let Ir=this.circleCells[le];if(Ir!==null){let ve=this.circles;for(let be of Ir)if(!Yt.circle[be]){Yt.circle[be]=!0;let De=3*be,Be=this.circleKeys[be];if(this._circlesCollide(ve[De],ve[De+1],ve[De+2],Tt.x,Tt.y,Tt.radius)&&(!ot||ot(Be))&&!Lr(Kt,Be.overlapMode))return qe.push(!0),!0}}}_forEachCell(w,B,Q,ee,le,qe,Xe,ot){let Tt=this._convertToXCellCoord(w),Yt=this._convertToYCellCoord(B),Kt=this._convertToXCellCoord(Q),xr=this._convertToYCellCoord(ee);for(let Ir=Tt;Ir<=Kt;Ir++)for(let ve=Yt;ve<=xr;ve++)if(le.call(this,w,B,Q,ee,this.xCellCount*ve+Ir,qe,Xe,ot))return}_convertToXCellCoord(w){return Math.max(0,Math.min(this.xCellCount-1,Math.floor(w*this.xScale)))}_convertToYCellCoord(w){return Math.max(0,Math.min(this.yCellCount-1,Math.floor(w*this.yScale)))}_circlesCollide(w,B,Q,ee,le,qe){let Xe=ee-w,ot=le-B,Tt=Q+qe;return Tt*Tt>Xe*Xe+ot*ot}_circleAndRectCollide(w,B,Q,ee,le,qe,Xe){let ot=(qe-ee)/2,Tt=Math.abs(w-(ee+ot));if(Tt>ot+Q)return!1;let Yt=(Xe-le)/2,Kt=Math.abs(B-(le+Yt));if(Kt>Yt+Q)return!1;if(Tt<=ot||Kt<=Yt)return!0;let xr=Tt-ot,Ir=Kt-Yt;return xr*xr+Ir*Ir<=Q*Q}}function Br(ue,w,B,Q,ee){let le=a.H();return w?(a.K(le,le,[1/ee,1/ee,1]),B||a.ad(le,le,Q.angle)):a.L(le,Q.labelPlaneMatrix,ue),le}function Vr(ue,w,B,Q,ee){if(w){let le=a.ae(ue);return a.K(le,le,[ee,ee,1]),B||a.ad(le,le,-Q.angle),le}return Q.glCoordMatrix}function dt(ue,w,B,Q){let ee;Q?(ee=[ue,w,Q(ue,w),1],a.af(ee,ee,B)):(ee=[ue,w,0,1],$r(ee,ee,B));let le=ee[3];return{point:new a.P(ee[0]/le,ee[1]/le),signedDistanceFromCamera:le,isOccluded:!1}}function Ge(ue,w){return .5+ue/w*.5}function Je(ue,w){return ue.x>=-w[0]&&ue.x<=w[0]&&ue.y>=-w[1]&&ue.y<=w[1]}function je(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir,ve){let be=Q?ue.textSizeData:ue.iconSizeData,De=a.ag(be,B.transform.zoom),Be=[256/B.width*2+1,256/B.height*2+1],et=Q?ue.text.dynamicLayoutVertexArray:ue.icon.dynamicLayoutVertexArray;et.clear();let We=ue.lineVertexArray,it=Q?ue.text.placedSymbolArray:ue.icon.placedSymbolArray,Ft=B.transform.width/B.transform.height,Ht=!1;for(let tr=0;tr<it.length;tr++){let dr=it.get(tr);if(dr.hidden||dr.writingMode===a.ah.vertical&&!Ht){pi(dr.numGlyphs,et);continue}Ht=!1;let Sr=dt(dr.anchorX,dr.anchorY,w,ve);if(!Je(Sr.point,Be)){pi(dr.numGlyphs,et);continue}let Or=Ge(B.transform.cameraToCenterDistance,Sr.signedDistanceFromCamera),Wr=a.ai(be,De,dr),ni=qe?Wr/Or:Wr*Or,Pi={getElevation:ve,labelPlaneMatrix:ee,lineVertexArray:We,pitchWithMap:qe,projectionCache:{projections:{},offsets:{},cachedAnchorPoint:void 0,anyProjectionOccluded:!1},projection:Tt,tileAnchorPoint:new a.P(dr.anchorX,dr.anchorY),unwrappedTileID:Yt,width:Kt,height:xr,translation:Ir},cn=Ie(Pi,dr,ni,!1,Xe,w,le,ue.glyphOffsetArray,et,Ft,ot);Ht=cn.useVertical,(cn.notEnoughRoom||Ht||cn.needsFlipping&&Ie(Pi,dr,ni,!0,Xe,w,le,ue.glyphOffsetArray,et,Ft,ot).notEnoughRoom)&&pi(dr.numGlyphs,et)}Q?ue.text.dynamicLayoutVertexBuffer.updateData(et):ue.icon.dynamicLayoutVertexBuffer.updateData(et)}function tt(ue,w,B,Q,ee,le,qe,Xe){let ot=le.glyphStartIndex+le.numGlyphs,Tt=le.lineStartIndex,Yt=le.lineStartIndex+le.lineLength,Kt=w.getoffsetX(le.glyphStartIndex),xr=w.getoffsetX(ot-1),Ir=vr(ue*Kt,B,Q,ee,le.segment,Tt,Yt,Xe,qe);if(!Ir)return null;let ve=vr(ue*xr,B,Q,ee,le.segment,Tt,Yt,Xe,qe);return ve?Xe.projectionCache.anyProjectionOccluded?null:{first:Ir,last:ve}:null}function xt(ue,w,B,Q){return ue===a.ah.horizontal&&Math.abs(B.y-w.y)>Math.abs(B.x-w.x)*Q?{useVertical:!0}:(ue===a.ah.vertical?w.y<B.y:w.x>B.x)?{needsFlipping:!0}:null}function Ie(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt){let Kt=B/24,xr=w.lineOffsetX*Kt,Ir=w.lineOffsetY*Kt,ve;if(w.numGlyphs>1){let be=w.glyphStartIndex+w.numGlyphs,De=w.lineStartIndex,Be=w.lineStartIndex+w.lineLength,et=tt(Kt,Xe,xr,Ir,Q,w,Yt,ue);if(!et)return{notEnoughRoom:!0};let We=dt(et.first.point.x,et.first.point.y,qe,ue.getElevation).point,it=dt(et.last.point.x,et.last.point.y,qe,ue.getElevation).point;if(ee&&!Q){let Ft=xt(w.writingMode,We,it,Tt);if(Ft)return Ft}ve=[et.first];for(let Ft=w.glyphStartIndex+1;Ft<be-1;Ft++)ve.push(vr(Kt*Xe.getoffsetX(Ft),xr,Ir,Q,w.segment,De,Be,ue,Yt));ve.push(et.last)}else{if(ee&&!Q){let De=dt(ue.tileAnchorPoint.x,ue.tileAnchorPoint.y,le,ue.getElevation).point,Be=w.lineStartIndex+w.segment+1,et=new a.P(ue.lineVertexArray.getx(Be),ue.lineVertexArray.gety(Be)),We=dt(et.x,et.y,le,ue.getElevation),it=We.signedDistanceFromCamera>0?We.point:function(Ht,tr,dr,Sr,Or,Wr){return xe(Ht,tr,dr,1,Or,Wr)}(ue.tileAnchorPoint,et,De,0,le,ue),Ft=xt(w.writingMode,De,it,Tt);if(Ft)return Ft}let be=vr(Kt*Xe.getoffsetX(w.glyphStartIndex),xr,Ir,Q,w.segment,w.lineStartIndex,w.lineStartIndex+w.lineLength,ue,Yt);if(!be||ue.projectionCache.anyProjectionOccluded)return{notEnoughRoom:!0};ve=[be]}for(let be of ve)a.aj(ot,be.point,be.angle);return{}}function xe(ue,w,B,Q,ee,le){let qe=ue.add(ue.sub(w)._unit()),Xe=ee!==void 0?dt(qe.x,qe.y,ee,le.getElevation).point:vt(qe.x,qe.y,le).point,ot=B.sub(Xe);return B.add(ot._mult(Q/ot.mag()))}function ke(ue,w,B){let Q=w.projectionCache;if(Q.projections[ue])return Q.projections[ue];let ee=new a.P(w.lineVertexArray.getx(ue),w.lineVertexArray.gety(ue)),le=vt(ee.x,ee.y,w);if(le.signedDistanceFromCamera>0)return Q.projections[ue]=le.point,Q.anyProjectionOccluded=Q.anyProjectionOccluded||le.isOccluded,le.point;let qe=ue-B.direction;return function(Xe,ot,Tt,Yt,Kt){return xe(Xe,ot,Tt,Yt,void 0,Kt)}(B.distanceFromAnchor===0?w.tileAnchorPoint:new a.P(w.lineVertexArray.getx(qe),w.lineVertexArray.gety(qe)),ee,B.previousVertex,B.absOffsetX-B.distanceFromAnchor+1,w)}function vt(ue,w,B){let Q=ue+B.translation[0],ee=w+B.translation[1],le;return!B.pitchWithMap&&B.projection.useSpecialProjectionForSymbols?(le=B.projection.projectTileCoordinates(Q,ee,B.unwrappedTileID,B.getElevation),le.point.x=(.5*le.point.x+.5)*B.width,le.point.y=(.5*-le.point.y+.5)*B.height):(le=dt(Q,ee,B.labelPlaneMatrix,B.getElevation),le.isOccluded=!1),le}function ir(ue,w,B){return ue._unit()._perp()._mult(w*B)}function ar(ue,w,B,Q,ee,le,qe,Xe,ot){if(Xe.projectionCache.offsets[ue])return Xe.projectionCache.offsets[ue];let Tt=B.add(w);if(ue+ot.direction<Q||ue+ot.direction>=ee)return Xe.projectionCache.offsets[ue]=Tt,Tt;let Yt=ke(ue+ot.direction,Xe,ot),Kt=ir(Yt.sub(B),qe,ot.direction),xr=B.add(Kt),Ir=Yt.add(Kt);return Xe.projectionCache.offsets[ue]=a.ak(le,Tt,xr,Ir)||Tt,Xe.projectionCache.offsets[ue]}function vr(ue,w,B,Q,ee,le,qe,Xe,ot){let Tt=Q?ue-w:ue+w,Yt=Tt>0?1:-1,Kt=0;Q&&(Yt*=-1,Kt=Math.PI),Yt<0&&(Kt+=Math.PI);let xr,Ir=Yt>0?le+ee:le+ee+1;Xe.projectionCache.cachedAnchorPoint?xr=Xe.projectionCache.cachedAnchorPoint:(xr=vt(Xe.tileAnchorPoint.x,Xe.tileAnchorPoint.y,Xe).point,Xe.projectionCache.cachedAnchorPoint=xr);let ve,be,De=xr,Be=xr,et=0,We=0,it=Math.abs(Tt),Ft=[],Ht;for(;et+We<=it;){if(Ir+=Yt,Ir<le||Ir>=qe)return null;et+=We,Be=De,be=ve;let Sr={absOffsetX:it,direction:Yt,distanceFromAnchor:et,previousVertex:Be};if(De=ke(Ir,Xe,Sr),B===0)Ft.push(Be),Ht=De.sub(Be);else{let Or,Wr=De.sub(Be);Or=Wr.mag()===0?ir(ke(Ir+Yt,Xe,Sr).sub(De),B,Yt):ir(Wr,B,Yt),be||(be=Be.add(Or)),ve=ar(Ir,Or,De,le,qe,be,B,Xe,Sr),Ft.push(be),Ht=ve.sub(be)}We=Ht.mag()}let tr=Ht._mult((it-et)/We)._add(be||Be),dr=Kt+Math.atan2(De.y-Be.y,De.x-Be.x);return Ft.push(tr),{point:tr,angle:ot?dr:0,path:Ft}}let ii=new Float32Array([-1/0,-1/0,0,-1/0,-1/0,0,-1/0,-1/0,0,-1/0,-1/0,0]);function pi(ue,w){for(let B=0;B<ue;B++){let Q=w.length;w.resize(Q+4),w.float32.set(ii,3*Q)}}function $r(ue,w,B){let Q=w[0],ee=w[1];return ue[0]=B[0]*Q+B[4]*ee+B[12],ue[1]=B[1]*Q+B[5]*ee+B[13],ue[3]=B[3]*Q+B[7]*ee+B[15],ue}let di=100;class ji{constructor(w,B,Q=new ti(w.width+200,w.height+200,25),ee=new ti(w.width+200,w.height+200,25)){this.transform=w,this.mapProjection=B,this.grid=Q,this.ignoredGrid=ee,this.pitchFactor=Math.cos(w._pitch)*w.cameraToCenterDistance,this.screenRightBoundary=w.width+di,this.screenBottomBoundary=w.height+di,this.gridRightBoundary=w.width+200,this.gridBottomBoundary=w.height+200,this.perspectiveRatioCutoff=.6}placeCollisionBox(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt){let xr=w.anchorPointX+ot[0],Ir=w.anchorPointY+ot[1],ve=this.projectAndGetPerspectiveRatio(ee,xr,Ir,le,Yt),be=Q*ve.perspectiveRatio,De;if(qe||Xe)De=this._projectCollisionBox(w,be,ee,le,qe,Xe,ot,ve,Yt,Kt);else{let Ft=ve.point.x+(Kt?Kt.x*be:0),Ht=ve.point.y+(Kt?Kt.y*be:0);De={allPointsOccluded:!1,box:[Ft+w.x1*be,Ht+w.y1*be,Ft+w.x2*be,Ht+w.y2*be]}}let[Be,et,We,it]=De.box;return this.mapProjection.useSpecialProjectionForSymbols&&(qe?De.allPointsOccluded:this.mapProjection.isOccluded(xr,Ir,le))||ve.perspectiveRatio<this.perspectiveRatioCutoff||!this.isInsideGrid(Be,et,We,it)||B!=="always"&&this.grid.hitTest(Be,et,We,it,B,Tt)?{box:[Be,et,We,it],placeable:!1,offscreen:!1}:{box:[Be,et,We,it],placeable:!0,offscreen:this.isOffscreen(Be,et,We,it)}}placeCollisionCircles(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir,ve,be,De){let Be=[],et=new a.P(B.anchorX,B.anchorY),We=this.getPerspectiveRatio(qe,et.x,et.y,Xe,De),it=(Kt?le/We:le*We)/a.ap,Ft={getElevation:De,labelPlaneMatrix:ot,lineVertexArray:Q,pitchWithMap:Kt,projectionCache:{projections:{},offsets:{},cachedAnchorPoint:void 0,anyProjectionOccluded:!1},projection:this.mapProjection,tileAnchorPoint:et,unwrappedTileID:Xe,width:this.transform.width,height:this.transform.height,translation:be},Ht=tt(it,ee,B.lineOffsetX*it,B.lineOffsetY*it,!1,B,!1,Ft),tr=!1,dr=!1,Sr=!0;if(Ht){let Or=.5*Ir*We+ve,Wr=new a.P(-100,-100),ni=new a.P(this.screenRightBoundary,this.screenBottomBoundary),Pi=new Tr,cn=Ht.first,ln=Ht.last,Cn=[];for(let fa=cn.path.length-1;fa>=1;fa--)Cn.push(cn.path[fa]);for(let fa=1;fa<ln.path.length;fa++)Cn.push(ln.path[fa]);let Kn=2.5*Or;if(Tt){let fa=this.projectPathToScreenSpace(Cn,Ft,Tt);Cn=fa.some($a=>$a.signedDistanceFromCamera<=0)?[]:fa.map($a=>$a.point)}let Ta=[];if(Cn.length>0){let fa=Cn[0].clone(),$a=Cn[0].clone();for(let Co=1;Co<Cn.length;Co++)fa.x=Math.min(fa.x,Cn[Co].x),fa.y=Math.min(fa.y,Cn[Co].y),$a.x=Math.max($a.x,Cn[Co].x),$a.y=Math.max($a.y,Cn[Co].y);Ta=fa.x>=Wr.x&&$a.x<=ni.x&&fa.y>=Wr.y&&$a.y<=ni.y?[Cn]:$a.x<Wr.x||fa.x>ni.x||$a.y<Wr.y||fa.y>ni.y?[]:a.al([Cn],Wr.x,Wr.y,ni.x,ni.y)}for(let fa of Ta){Pi.reset(fa,.25*Or);let $a=0;$a=Pi.length<=.5*Or?1:Math.ceil(Pi.paddedLength/Kn)+1;for(let Co=0;Co<$a;Co++){let Qa=Co/Math.max($a-1,1),mo=Pi.lerp(Qa),Bo=mo.x+di,Ps=mo.y+di;Be.push(Bo,Ps,Or,0);let Ts=Bo-Or,wo=Ps-Or,To=Bo+Or,hl=Ps+Or;if(Sr=Sr&&this.isOffscreen(Ts,wo,To,hl),dr=dr||this.isInsideGrid(Ts,wo,To,hl),w!=="always"&&this.grid.hitTestCircle(Bo,Ps,Or,w,xr)&&(tr=!0,!Yt))return{circles:[],offscreen:!1,collisionDetected:tr}}}}return{circles:!Yt&&tr||!dr||We<this.perspectiveRatioCutoff?[]:Be,offscreen:Sr,collisionDetected:tr}}projectPathToScreenSpace(w,B,Q){return w.map(ee=>dt(ee.x,ee.y,Q,B.getElevation))}queryRenderedSymbols(w){if(w.length===0||this.grid.keysLength()===0&&this.ignoredGrid.keysLength()===0)return{};let B=[],Q=1/0,ee=1/0,le=-1/0,qe=-1/0;for(let Yt of w){let Kt=new a.P(Yt.x+di,Yt.y+di);Q=Math.min(Q,Kt.x),ee=Math.min(ee,Kt.y),le=Math.max(le,Kt.x),qe=Math.max(qe,Kt.y),B.push(Kt)}let Xe=this.grid.query(Q,ee,le,qe).concat(this.ignoredGrid.query(Q,ee,le,qe)),ot={},Tt={};for(let Yt of Xe){let Kt=Yt.key;if(ot[Kt.bucketInstanceId]===void 0&&(ot[Kt.bucketInstanceId]={}),ot[Kt.bucketInstanceId][Kt.featureIndex])continue;let xr=[new a.P(Yt.x1,Yt.y1),new a.P(Yt.x2,Yt.y1),new a.P(Yt.x2,Yt.y2),new a.P(Yt.x1,Yt.y2)];a.am(B,xr)&&(ot[Kt.bucketInstanceId][Kt.featureIndex]=!0,Tt[Kt.bucketInstanceId]===void 0&&(Tt[Kt.bucketInstanceId]=[]),Tt[Kt.bucketInstanceId].push(Kt.featureIndex))}return Tt}insertCollisionBox(w,B,Q,ee,le,qe){(Q?this.ignoredGrid:this.grid).insert({bucketInstanceId:ee,featureIndex:le,collisionGroupID:qe,overlapMode:B},w[0],w[1],w[2],w[3])}insertCollisionCircles(w,B,Q,ee,le,qe){let Xe=Q?this.ignoredGrid:this.grid,ot={bucketInstanceId:ee,featureIndex:le,collisionGroupID:qe,overlapMode:B};for(let Tt=0;Tt<w.length;Tt+=4)Xe.insertCircle(ot,w[Tt],w[Tt+1],w[Tt+2])}projectAndGetPerspectiveRatio(w,B,Q,ee,le){let qe;le?(qe=[B,Q,le(B,Q),1],a.af(qe,qe,w)):(qe=[B,Q,0,1],$r(qe,qe,w));let Xe=qe[3];return{point:new a.P((qe[0]/Xe+1)/2*this.transform.width+di,(-qe[1]/Xe+1)/2*this.transform.height+di),perspectiveRatio:.5+this.transform.cameraToCenterDistance/Xe*.5,isOccluded:!1,signedDistanceFromCamera:Xe}}getPerspectiveRatio(w,B,Q,ee,le){let qe=this.mapProjection.useSpecialProjectionForSymbols?this.mapProjection.projectTileCoordinates(B,Q,ee,le):dt(B,Q,w,le);return .5+this.transform.cameraToCenterDistance/qe.signedDistanceFromCamera*.5}isOffscreen(w,B,Q,ee){return Q<di||w>=this.screenRightBoundary||ee<di||B>this.screenBottomBoundary}isInsideGrid(w,B,Q,ee){return Q>=0&&w<this.gridRightBoundary&&ee>=0&&B<this.gridBottomBoundary}getViewportMatrix(){let w=a.an([]);return a.J(w,w,[-100,-100,0]),w}_projectCollisionBox(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt){let Kt=new a.P(1,0),xr=new a.P(0,1),Ir=new a.P(w.anchorPointX+Xe[0],w.anchorPointY+Xe[1]);if(qe&&!le){let Sr=this.projectAndGetPerspectiveRatio(Q,Ir.x+1,Ir.y,ee,Tt).point.sub(ot.point).unit(),Or=Math.atan(Sr.y/Sr.x)+(Sr.x<0?Math.PI:0),Wr=Math.sin(Or),ni=Math.cos(Or);Kt=new a.P(ni,Wr),xr=new a.P(-Wr,ni)}else if(!qe&&le){let Sr=-this.transform.angle,Or=Math.sin(Sr),Wr=Math.cos(Sr);Kt=new a.P(Wr,Or),xr=new a.P(-Or,Wr)}let ve=ot.point,be=B;if(le){ve=Ir;let Sr=this.transform.zoom-Math.floor(this.transform.zoom);be=Math.pow(2,-Sr),be*=this.mapProjection.getPitchedTextCorrection(this.transform,Ir,ee),Yt||(be*=a.ac(.5+ot.signedDistanceFromCamera/this.transform.cameraToCenterDistance*.5,0,4))}Yt&&(ve=ve.add(Kt.mult(Yt.x*be)).add(xr.mult(Yt.y*be)));let De=w.x1*be,Be=w.x2*be,et=(De+Be)/2,We=w.y1*be,it=w.y2*be,Ft=(We+it)/2,Ht=[{offsetX:De,offsetY:We},{offsetX:et,offsetY:We},{offsetX:Be,offsetY:We},{offsetX:Be,offsetY:Ft},{offsetX:Be,offsetY:it},{offsetX:et,offsetY:it},{offsetX:De,offsetY:it},{offsetX:De,offsetY:Ft}],tr=[];for(let{offsetX:Sr,offsetY:Or}of Ht)tr.push(new a.P(ve.x+Kt.x*Sr+xr.x*Or,ve.y+Kt.y*Sr+xr.y*Or));let dr=!1;if(le){let Sr=tr.map(Or=>this.projectAndGetPerspectiveRatio(Q,Or.x,Or.y,ee,Tt));dr=Sr.some(Or=>!Or.isOccluded),tr=Sr.map(Or=>Or.point)}else dr=!0;return{box:a.ao(tr),allPointsOccluded:!dr}}}function In(ue,w,B){return w*(a.X/(ue.tileSize*Math.pow(2,B-ue.tileID.overscaledZ)))}class wi{constructor(w,B,Q,ee){this.opacity=w?Math.max(0,Math.min(1,w.opacity+(w.placed?B:-B))):ee&&Q?1:0,this.placed=Q}isHidden(){return this.opacity===0&&!this.placed}}class On{constructor(w,B,Q,ee,le){this.text=new wi(w?w.text:null,B,Q,le),this.icon=new wi(w?w.icon:null,B,ee,le)}isHidden(){return this.text.isHidden()&&this.icon.isHidden()}}class qn{constructor(w,B,Q){this.text=w,this.icon=B,this.skipFade=Q}}class Fn{constructor(){this.invProjMatrix=a.H(),this.viewportMatrix=a.H(),this.circles=[]}}class ra{constructor(w,B,Q,ee,le){this.bucketInstanceId=w,this.featureIndex=B,this.sourceLayerIndex=Q,this.bucketIndex=ee,this.tileID=le}}class la{constructor(w){this.crossSourceCollisions=w,this.maxGroupID=0,this.collisionGroups={}}get(w){if(this.crossSourceCollisions)return{ID:0,predicate:null};if(!this.collisionGroups[w]){let B=++this.maxGroupID;this.collisionGroups[w]={ID:B,predicate:Q=>Q.collisionGroupID===B}}return this.collisionGroups[w]}}function Ut(ue,w,B,Q,ee){let{horizontalAlign:le,verticalAlign:qe}=a.au(ue);return new a.P(-(le-.5)*w+Q[0]*ee,-(qe-.5)*B+Q[1]*ee)}class wt{constructor(w,B,Q,ee,le,qe){this.transform=w.clone(),this.terrain=Q,this.collisionIndex=new ji(this.transform,B),this.placements={},this.opacities={},this.variableOffsets={},this.stale=!1,this.commitTime=0,this.fadeDuration=ee,this.retainedQueryData={},this.collisionGroups=new la(le),this.collisionCircleArrays={},this.collisionBoxArrays=new Map,this.prevPlacement=qe,qe&&(qe.prevPlacement=void 0),this.placedOrientations={}}_getTerrainElevationFunc(w){let B=this.terrain;return B?(Q,ee)=>B.getElevation(w,Q,ee):null}getBucketParts(w,B,Q,ee){let le=Q.getBucket(B),qe=Q.latestFeatureIndex;if(!le||!qe||B.id!==le.layerIds[0])return;let Xe=Q.collisionBoxArray,ot=le.layers[0].layout,Tt=le.layers[0].paint,Yt=Math.pow(2,this.transform.zoom-Q.tileID.overscaledZ),Kt=Q.tileSize/a.X,xr=Q.tileID.toUnwrapped(),Ir=this.transform.calculatePosMatrix(xr),ve=ot.get("text-pitch-alignment")==="map",be=ot.get("text-rotation-alignment")==="map",De=In(Q,1,this.transform.zoom),Be=this.collisionIndex.mapProjection.translatePosition(this.transform,Q,Tt.get("text-translate"),Tt.get("text-translate-anchor")),et=this.collisionIndex.mapProjection.translatePosition(this.transform,Q,Tt.get("icon-translate"),Tt.get("icon-translate-anchor")),We=Br(Ir,ve,be,this.transform,De),it=null;if(ve){let Ht=Vr(Ir,ve,be,this.transform,De);it=a.L([],this.transform.labelPlaneMatrix,Ht)}this.retainedQueryData[le.bucketInstanceId]=new ra(le.bucketInstanceId,qe,le.sourceLayerIndex,le.index,Q.tileID);let Ft={bucket:le,layout:ot,translationText:Be,translationIcon:et,posMatrix:Ir,unwrappedTileID:xr,textLabelPlaneMatrix:We,labelToScreenMatrix:it,scale:Yt,textPixelRatio:Kt,holdingForFade:Q.holdingForFade(),collisionBoxArray:Xe,partiallyEvaluatedTextSize:a.ag(le.textSizeData,this.transform.zoom),collisionGroup:this.collisionGroups.get(le.sourceID)};if(ee)for(let Ht of le.sortKeyRanges){let{sortKey:tr,symbolInstanceStart:dr,symbolInstanceEnd:Sr}=Ht;w.push({sortKey:tr,symbolInstanceStart:dr,symbolInstanceEnd:Sr,parameters:Ft})}else w.push({symbolInstanceStart:0,symbolInstanceEnd:le.symbolInstances.length,parameters:Ft})}attemptAnchorPlacement(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir,ve,be,De,Be,et,We){let it=a.aq[w.textAnchor],Ft=[w.textOffset0,w.textOffset1],Ht=Ut(it,Q,ee,Ft,le),tr=this.collisionIndex.placeCollisionBox(B,xr,ot,Tt,Yt,Xe,qe,De,Kt.predicate,We,Ht);if((!et||this.collisionIndex.placeCollisionBox(et,xr,ot,Tt,Yt,Xe,qe,Be,Kt.predicate,We,Ht).placeable)&&tr.placeable){let dr;if(this.prevPlacement&&this.prevPlacement.variableOffsets[Ir.crossTileID]&&this.prevPlacement.placements[Ir.crossTileID]&&this.prevPlacement.placements[Ir.crossTileID].text&&(dr=this.prevPlacement.variableOffsets[Ir.crossTileID].anchor),Ir.crossTileID===0)throw new Error("symbolInstance.crossTileID can't be 0");return this.variableOffsets[Ir.crossTileID]={textOffset:Ft,width:Q,height:ee,anchor:it,textBoxScale:le,prevAnchor:dr},this.markUsedJustification(ve,it,Ir,be),ve.allowVerticalPlacement&&(this.markUsedOrientation(ve,be,Ir),this.placedOrientations[Ir.crossTileID]=be),{shift:Ht,placedGlyphBoxes:tr}}}placeLayerBucketPart(w,B,Q){let{bucket:ee,layout:le,translationText:qe,translationIcon:Xe,posMatrix:ot,unwrappedTileID:Tt,textLabelPlaneMatrix:Yt,labelToScreenMatrix:Kt,textPixelRatio:xr,holdingForFade:Ir,collisionBoxArray:ve,partiallyEvaluatedTextSize:be,collisionGroup:De}=w.parameters,Be=le.get("text-optional"),et=le.get("icon-optional"),We=a.ar(le,"text-overlap","text-allow-overlap"),it=We==="always",Ft=a.ar(le,"icon-overlap","icon-allow-overlap"),Ht=Ft==="always",tr=le.get("text-rotation-alignment")==="map",dr=le.get("text-pitch-alignment")==="map",Sr=le.get("icon-text-fit")!=="none",Or=le.get("symbol-z-order")==="viewport-y",Wr=it&&(Ht||!ee.hasIconData()||et),ni=Ht&&(it||!ee.hasTextData()||Be);!ee.collisionArrays&&ve&&ee.deserializeCollisionBoxes(ve);let Pi=this._getTerrainElevationFunc(this.retainedQueryData[ee.bucketInstanceId].tileID),cn=(ln,Cn,Kn)=>{var Ta,fa;if(B[ln.crossTileID])return;if(Ir)return void(this.placements[ln.crossTileID]=new qn(!1,!1,!1));let $a=!1,Co=!1,Qa=!0,mo=null,Bo={box:null,placeable:!1,offscreen:null},Ps={box:null,placeable:!1,offscreen:null},Ts=null,wo=null,To=null,hl=0,Ul=0,Lu=0;Cn.textFeatureIndex?hl=Cn.textFeatureIndex:ln.useRuntimeCollisionCircles&&(hl=ln.featureIndex),Cn.verticalTextFeatureIndex&&(Ul=Cn.verticalTextFeatureIndex);let au=Cn.textBox;if(au){let Tl=Te=>{let Ne=a.ah.horizontal;if(ee.allowVerticalPlacement&&!Te&&this.prevPlacement){let He=this.prevPlacement.placedOrientations[ln.crossTileID];He&&(this.placedOrientations[ln.crossTileID]=He,Ne=He,this.markUsedOrientation(ee,Ne,ln))}return Ne},Al=(Te,Ne)=>{if(ee.allowVerticalPlacement&&ln.numVerticalGlyphVertices>0&&Cn.verticalTextBox){for(let He of ee.writingModes)if(He===a.ah.vertical?(Bo=Ne(),Ps=Bo):Bo=Te(),Bo&&Bo.placeable)break}else Bo=Te()},X=ln.textAnchorOffsetStartIndex,se=ln.textAnchorOffsetEndIndex;if(se===X){let Te=(Ne,He)=>{let Ye=this.collisionIndex.placeCollisionBox(Ne,We,xr,ot,Tt,dr,tr,qe,De.predicate,Pi);return Ye&&Ye.placeable&&(this.markUsedOrientation(ee,He,ln),this.placedOrientations[ln.crossTileID]=He),Ye};Al(()=>Te(au,a.ah.horizontal),()=>{let Ne=Cn.verticalTextBox;return ee.allowVerticalPlacement&&ln.numVerticalGlyphVertices>0&&Ne?Te(Ne,a.ah.vertical):{box:null,offscreen:null}}),Tl(Bo&&Bo.placeable)}else{let Te=a.aq[(fa=(Ta=this.prevPlacement)===null||Ta===void 0?void 0:Ta.variableOffsets[ln.crossTileID])===null||fa===void 0?void 0:fa.anchor],Ne=(Ye,kt,nt)=>{let jt=Ye.x2-Ye.x1,gr=Ye.y2-Ye.y1,yr=ln.textBoxScale,Hr=Sr&&Ft==="never"?kt:null,qr=null,_i=We==="never"?1:2,bi="never";Te&&_i++;for(let Zr=0;Zr<_i;Zr++){for(let ai=X;ai<se;ai++){let gi=ee.textAnchorOffsets.get(ai);if(Te&&gi.textAnchor!==Te)continue;let Ii=this.attemptAnchorPlacement(gi,Ye,jt,gr,yr,tr,dr,xr,ot,Tt,De,bi,ln,ee,nt,qe,Xe,Hr,Pi);if(Ii&&(qr=Ii.placedGlyphBoxes,qr&&qr.placeable))return $a=!0,mo=Ii.shift,qr}Te?Te=null:bi=We}return Q&&!qr&&(qr={box:this.collisionIndex.placeCollisionBox(au,"always",xr,ot,Tt,dr,tr,qe,De.predicate,Pi,new a.P(0,0)).box,offscreen:!1,placeable:!1}),qr};Al(()=>Ne(au,Cn.iconBox,a.ah.horizontal),()=>{let Ye=Cn.verticalTextBox;return ee.allowVerticalPlacement&&(!Bo||!Bo.placeable)&&ln.numVerticalGlyphVertices>0&&Ye?Ne(Ye,Cn.verticalIconBox,a.ah.vertical):{box:null,occluded:!0,offscreen:null}}),Bo&&($a=Bo.placeable,Qa=Bo.offscreen);let He=Tl(Bo&&Bo.placeable);if(!$a&&this.prevPlacement){let Ye=this.prevPlacement.variableOffsets[ln.crossTileID];Ye&&(this.variableOffsets[ln.crossTileID]=Ye,this.markUsedJustification(ee,Ye.anchor,ln,He))}}}if(Ts=Bo,$a=Ts&&Ts.placeable,Qa=Ts&&Ts.offscreen,ln.useRuntimeCollisionCircles){let Tl=ee.text.placedSymbolArray.get(ln.centerJustifiedTextSymbolIndex),Al=a.ai(ee.textSizeData,be,Tl),X=le.get("text-padding");wo=this.collisionIndex.placeCollisionCircles(We,Tl,ee.lineVertexArray,ee.glyphOffsetArray,Al,ot,Tt,Yt,Kt,Q,dr,De.predicate,ln.collisionCircleDiameter,X,qe,Pi),wo.circles.length&&wo.collisionDetected&&!Q&&a.w("Collisions detected, but collision boxes are not shown"),$a=it||wo.circles.length>0&&!wo.collisionDetected,Qa=Qa&&wo.offscreen}if(Cn.iconFeatureIndex&&(Lu=Cn.iconFeatureIndex),Cn.iconBox){let Tl=Al=>this.collisionIndex.placeCollisionBox(Al,Ft,xr,ot,Tt,dr,tr,Xe,De.predicate,Pi,Sr&&mo?mo:void 0);Ps&&Ps.placeable&&Cn.verticalIconBox?(To=Tl(Cn.verticalIconBox),Co=To.placeable):(To=Tl(Cn.iconBox),Co=To.placeable),Qa=Qa&&To.offscreen}let Js=Be||ln.numHorizontalGlyphVertices===0&&ln.numVerticalGlyphVertices===0,Ql=et||ln.numIconVertices===0;Js||Ql?Ql?Js||(Co=Co&&$a):$a=Co&&$a:Co=$a=Co&&$a;let dc=Co&&To.placeable;if($a&&Ts.placeable&&this.collisionIndex.insertCollisionBox(Ts.box,We,le.get("text-ignore-placement"),ee.bucketInstanceId,Ps&&Ps.placeable&&Ul?Ul:hl,De.ID),dc&&this.collisionIndex.insertCollisionBox(To.box,Ft,le.get("icon-ignore-placement"),ee.bucketInstanceId,Lu,De.ID),wo&&$a&&this.collisionIndex.insertCollisionCircles(wo.circles,We,le.get("text-ignore-placement"),ee.bucketInstanceId,hl,De.ID),Q&&this.storeCollisionData(ee.bucketInstanceId,Kn,Cn,Ts,To,wo),ln.crossTileID===0)throw new Error("symbolInstance.crossTileID can't be 0");if(ee.bucketInstanceId===0)throw new Error("bucket.bucketInstanceId can't be 0");this.placements[ln.crossTileID]=new qn($a||Wr,Co||ni,Qa||ee.justReloaded),B[ln.crossTileID]=!0};if(Or){if(w.symbolInstanceStart!==0)throw new Error("bucket.bucketInstanceId should be 0");let ln=ee.getSortedSymbolIndexes(this.transform.angle);for(let Cn=ln.length-1;Cn>=0;--Cn){let Kn=ln[Cn];cn(ee.symbolInstances.get(Kn),ee.collisionArrays[Kn],Kn)}}else for(let ln=w.symbolInstanceStart;ln<w.symbolInstanceEnd;ln++)cn(ee.symbolInstances.get(ln),ee.collisionArrays[ln],ln);if(Q&&ee.bucketInstanceId in this.collisionCircleArrays){let ln=this.collisionCircleArrays[ee.bucketInstanceId];a.as(ln.invProjMatrix,ot),ln.viewportMatrix=this.collisionIndex.getViewportMatrix()}ee.justReloaded=!1}storeCollisionData(w,B,Q,ee,le,qe){if(Q.textBox||Q.iconBox){let Xe,ot;this.collisionBoxArrays.has(w)?Xe=this.collisionBoxArrays.get(w):(Xe=new Map,this.collisionBoxArrays.set(w,Xe)),Xe.has(B)?ot=Xe.get(B):(ot={text:null,icon:null},Xe.set(B,ot)),Q.textBox&&(ot.text=ee.box),Q.iconBox&&(ot.icon=le.box)}if(qe){let Xe=this.collisionCircleArrays[w];Xe===void 0&&(Xe=this.collisionCircleArrays[w]=new Fn);for(let ot=0;ot<qe.circles.length;ot+=4)Xe.circles.push(qe.circles[ot+0]),Xe.circles.push(qe.circles[ot+1]),Xe.circles.push(qe.circles[ot+2]),Xe.circles.push(qe.collisionDetected?1:0)}}markUsedJustification(w,B,Q,ee){let le;le=ee===a.ah.vertical?Q.verticalPlacedTextSymbolIndex:{left:Q.leftJustifiedTextSymbolIndex,center:Q.centerJustifiedTextSymbolIndex,right:Q.rightJustifiedTextSymbolIndex}[a.at(B)];let qe=[Q.leftJustifiedTextSymbolIndex,Q.centerJustifiedTextSymbolIndex,Q.rightJustifiedTextSymbolIndex,Q.verticalPlacedTextSymbolIndex];for(let Xe of qe)Xe>=0&&(w.text.placedSymbolArray.get(Xe).crossTileID=le>=0&&Xe!==le?0:Q.crossTileID)}markUsedOrientation(w,B,Q){let ee=B===a.ah.horizontal||B===a.ah.horizontalOnly?B:0,le=B===a.ah.vertical?B:0,qe=[Q.leftJustifiedTextSymbolIndex,Q.centerJustifiedTextSymbolIndex,Q.rightJustifiedTextSymbolIndex];for(let Xe of qe)w.text.placedSymbolArray.get(Xe).placedOrientation=ee;Q.verticalPlacedTextSymbolIndex&&(w.text.placedSymbolArray.get(Q.verticalPlacedTextSymbolIndex).placedOrientation=le)}commit(w){this.commitTime=w,this.zoomAtLastRecencyCheck=this.transform.zoom;let B=this.prevPlacement,Q=!1;this.prevZoomAdjustment=B?B.zoomAdjustment(this.transform.zoom):0;let ee=B?B.symbolFadeChange(w):1,le=B?B.opacities:{},qe=B?B.variableOffsets:{},Xe=B?B.placedOrientations:{};for(let ot in this.placements){let Tt=this.placements[ot],Yt=le[ot];Yt?(this.opacities[ot]=new On(Yt,ee,Tt.text,Tt.icon),Q=Q||Tt.text!==Yt.text.placed||Tt.icon!==Yt.icon.placed):(this.opacities[ot]=new On(null,ee,Tt.text,Tt.icon,Tt.skipFade),Q=Q||Tt.text||Tt.icon)}for(let ot in le){let Tt=le[ot];if(!this.opacities[ot]){let Yt=new On(Tt,ee,!1,!1);Yt.isHidden()||(this.opacities[ot]=Yt,Q=Q||Tt.text.placed||Tt.icon.placed)}}for(let ot in qe)this.variableOffsets[ot]||!this.opacities[ot]||this.opacities[ot].isHidden()||(this.variableOffsets[ot]=qe[ot]);for(let ot in Xe)this.placedOrientations[ot]||!this.opacities[ot]||this.opacities[ot].isHidden()||(this.placedOrientations[ot]=Xe[ot]);if(B&&B.lastPlacementChangeTime===void 0)throw new Error("Last placement time for previous placement is not defined");Q?this.lastPlacementChangeTime=w:typeof this.lastPlacementChangeTime!="number"&&(this.lastPlacementChangeTime=B?B.lastPlacementChangeTime:w)}updateLayerOpacities(w,B){let Q={};for(let ee of B){let le=ee.getBucket(w);le&&ee.latestFeatureIndex&&w.id===le.layerIds[0]&&this.updateBucketOpacities(le,ee.tileID,Q,ee.collisionBoxArray)}}updateBucketOpacities(w,B,Q,ee){w.hasTextData()&&(w.text.opacityVertexArray.clear(),w.text.hasVisibleVertices=!1),w.hasIconData()&&(w.icon.opacityVertexArray.clear(),w.icon.hasVisibleVertices=!1),w.hasIconCollisionBoxData()&&w.iconCollisionBox.collisionVertexArray.clear(),w.hasTextCollisionBoxData()&&w.textCollisionBox.collisionVertexArray.clear();let le=w.layers[0],qe=le.layout,Xe=new On(null,0,!1,!1,!0),ot=qe.get("text-allow-overlap"),Tt=qe.get("icon-allow-overlap"),Yt=le._unevaluatedLayout.hasValue("text-variable-anchor")||le._unevaluatedLayout.hasValue("text-variable-anchor-offset"),Kt=qe.get("text-rotation-alignment")==="map",xr=qe.get("text-pitch-alignment")==="map",Ir=qe.get("icon-text-fit")!=="none",ve=new On(null,0,ot&&(Tt||!w.hasIconData()||qe.get("icon-optional")),Tt&&(ot||!w.hasTextData()||qe.get("text-optional")),!0);!w.collisionArrays&&ee&&(w.hasIconCollisionBoxData()||w.hasTextCollisionBoxData())&&w.deserializeCollisionBoxes(ee);let be=(Be,et,We)=>{for(let it=0;it<et/4;it++)Be.opacityVertexArray.emplaceBack(We);Be.hasVisibleVertices=Be.hasVisibleVertices||We!==fn},De=this.collisionBoxArrays.get(w.bucketInstanceId);for(let Be=0;Be<w.symbolInstances.length;Be++){let et=w.symbolInstances.get(Be),{numHorizontalGlyphVertices:We,numVerticalGlyphVertices:it,crossTileID:Ft}=et,Ht=this.opacities[Ft];Q[Ft]?Ht=Xe:Ht||(Ht=ve,this.opacities[Ft]=Ht),Q[Ft]=!0;let tr=et.numIconVertices>0,dr=this.placedOrientations[et.crossTileID],Sr=dr===a.ah.vertical,Or=dr===a.ah.horizontal||dr===a.ah.horizontalOnly;if(We>0||it>0){let ni=tn(Ht.text);be(w.text,We,Sr?fn:ni),be(w.text,it,Or?fn:ni);let Pi=Ht.text.isHidden();[et.rightJustifiedTextSymbolIndex,et.centerJustifiedTextSymbolIndex,et.leftJustifiedTextSymbolIndex].forEach(Cn=>{Cn>=0&&(w.text.placedSymbolArray.get(Cn).hidden=Pi||Sr?1:0)}),et.verticalPlacedTextSymbolIndex>=0&&(w.text.placedSymbolArray.get(et.verticalPlacedTextSymbolIndex).hidden=Pi||Or?1:0);let cn=this.variableOffsets[et.crossTileID];cn&&this.markUsedJustification(w,cn.anchor,et,dr);let ln=this.placedOrientations[et.crossTileID];ln&&(this.markUsedJustification(w,"left",et,ln),this.markUsedOrientation(w,ln,et))}if(tr){let ni=tn(Ht.icon),Pi=!(Ir&&et.verticalPlacedIconSymbolIndex&&Sr);et.placedIconSymbolIndex>=0&&(be(w.icon,et.numIconVertices,Pi?ni:fn),w.icon.placedSymbolArray.get(et.placedIconSymbolIndex).hidden=Ht.icon.isHidden()),et.verticalPlacedIconSymbolIndex>=0&&(be(w.icon,et.numVerticalIconVertices,Pi?fn:ni),w.icon.placedSymbolArray.get(et.verticalPlacedIconSymbolIndex).hidden=Ht.icon.isHidden())}let Wr=De&&De.has(Be)?De.get(Be):{text:null,icon:null};if(w.hasIconCollisionBoxData()||w.hasTextCollisionBoxData()){let ni=w.collisionArrays[Be];if(ni){let Pi=new a.P(0,0);if(ni.textBox||ni.verticalTextBox){let cn=!0;if(Yt){let ln=this.variableOffsets[Ft];ln?(Pi=Ut(ln.anchor,ln.width,ln.height,ln.textOffset,ln.textBoxScale),Kt&&Pi._rotate(xr?this.transform.angle:-this.transform.angle)):cn=!1}if(ni.textBox||ni.verticalTextBox){let ln;ni.textBox&&(ln=Sr),ni.verticalTextBox&&(ln=Or),rr(w.textCollisionBox.collisionVertexArray,Ht.text.placed,!cn||ln,Wr.text,Pi.x,Pi.y)}}if(ni.iconBox||ni.verticalIconBox){let cn=!!(!Or&&ni.verticalIconBox),ln;ni.iconBox&&(ln=cn),ni.verticalIconBox&&(ln=!cn),rr(w.iconCollisionBox.collisionVertexArray,Ht.icon.placed,ln,Wr.icon,Ir?Pi.x:0,Ir?Pi.y:0)}}}}if(w.sortFeatures(this.transform.angle),this.retainedQueryData[w.bucketInstanceId]&&(this.retainedQueryData[w.bucketInstanceId].featureSortOrder=w.featureSortOrder),w.hasTextData()&&w.text.opacityVertexBuffer&&w.text.opacityVertexBuffer.updateData(w.text.opacityVertexArray),w.hasIconData()&&w.icon.opacityVertexBuffer&&w.icon.opacityVertexBuffer.updateData(w.icon.opacityVertexArray),w.hasIconCollisionBoxData()&&w.iconCollisionBox.collisionVertexBuffer&&w.iconCollisionBox.collisionVertexBuffer.updateData(w.iconCollisionBox.collisionVertexArray),w.hasTextCollisionBoxData()&&w.textCollisionBox.collisionVertexBuffer&&w.textCollisionBox.collisionVertexBuffer.updateData(w.textCollisionBox.collisionVertexArray),w.text.opacityVertexArray.length!==w.text.layoutVertexArray.length/4)throw new Error(`bucket.text.opacityVertexArray.length (= ${w.text.opacityVertexArray.length}) !== bucket.text.layoutVertexArray.length (= ${w.text.layoutVertexArray.length}) / 4`);if(w.icon.opacityVertexArray.length!==w.icon.layoutVertexArray.length/4)throw new Error(`bucket.icon.opacityVertexArray.length (= ${w.icon.opacityVertexArray.length}) !== bucket.icon.layoutVertexArray.length (= ${w.icon.layoutVertexArray.length}) / 4`);if(w.bucketInstanceId in this.collisionCircleArrays){let Be=this.collisionCircleArrays[w.bucketInstanceId];w.placementInvProjMatrix=Be.invProjMatrix,w.placementViewportMatrix=Be.viewportMatrix,w.collisionCircleArray=Be.circles,delete this.collisionCircleArrays[w.bucketInstanceId]}}symbolFadeChange(w){return this.fadeDuration===0?1:(w-this.commitTime)/this.fadeDuration+this.prevZoomAdjustment}zoomAdjustment(w){return Math.max(0,(this.transform.zoom-w)/1.5)}hasTransitions(w){return this.stale||w-this.lastPlacementChangeTime<this.fadeDuration}stillRecent(w,B){let Q=this.zoomAtLastRecencyCheck===B?1-this.zoomAdjustment(B):1;return this.zoomAtLastRecencyCheck=B,this.commitTime+this.fadeDuration*Q>w}setStale(){this.stale=!0}}function rr(ue,w,B,Q,ee,le){Q&&Q.length!==0||(Q=[0,0,0,0]);let qe=Q[0]-di,Xe=Q[1]-di,ot=Q[2]-di,Tt=Q[3]-di;ue.emplaceBack(w?1:0,B?1:0,ee||0,le||0,qe,Xe),ue.emplaceBack(w?1:0,B?1:0,ee||0,le||0,ot,Xe),ue.emplaceBack(w?1:0,B?1:0,ee||0,le||0,ot,Tt),ue.emplaceBack(w?1:0,B?1:0,ee||0,le||0,qe,Tt)}let nr=Math.pow(2,25),Er=Math.pow(2,24),Xr=Math.pow(2,17),ri=Math.pow(2,16),Qr=Math.pow(2,9),Oi=Math.pow(2,8),$i=Math.pow(2,1);function tn(ue){if(ue.opacity===0&&!ue.placed)return 0;if(ue.opacity===1&&ue.placed)return 4294967295;let w=ue.placed?1:0,B=Math.floor(127*ue.opacity);return B*nr+w*Er+B*Xr+w*ri+B*Qr+w*Oi+B*$i+w}let fn=0;function yn(){return{isOccluded:(ue,w,B)=>!1,getPitchedTextCorrection:(ue,w,B)=>1,get useSpecialProjectionForSymbols(){return!1},projectTileCoordinates(ue,w,B,Q){throw new Error("Not implemented.")},translatePosition:(ue,w,B,Q)=>function(ee,le,qe,Xe,ot=!1){if(!qe[0]&&!qe[1])return[0,0];let Tt=ot?Xe==="map"?ee.angle:0:Xe==="viewport"?-ee.angle:0;if(Tt){let Yt=Math.sin(Tt),Kt=Math.cos(Tt);qe=[qe[0]*Kt-qe[1]*Yt,qe[0]*Yt+qe[1]*Kt]}return[ot?qe[0]:In(le,qe[0],ee.zoom),ot?qe[1]:In(le,qe[1],ee.zoom)]}(ue,w,B,Q),getCircleRadiusCorrection:ue=>1}}class Sn{constructor(w){this._sortAcrossTiles=w.layout.get("symbol-z-order")!=="viewport-y"&&!w.layout.get("symbol-sort-key").isConstant(),this._currentTileIndex=0,this._currentPartIndex=0,this._seenCrossTileIDs={},this._bucketParts=[]}continuePlacement(w,B,Q,ee,le){let qe=this._bucketParts;for(;this._currentTileIndex<w.length;)if(B.getBucketParts(qe,ee,w[this._currentTileIndex],this._sortAcrossTiles),this._currentTileIndex++,le())return!0;for(this._sortAcrossTiles&&(this._sortAcrossTiles=!1,qe.sort((Xe,ot)=>Xe.sortKey-ot.sortKey));this._currentPartIndex<qe.length;)if(B.placeLayerBucketPart(qe[this._currentPartIndex],this._seenCrossTileIDs,Q),this._currentPartIndex++,le())return!0;return!1}}class Ba{constructor(w,B,Q,ee,le,qe,Xe,ot){this.placement=new wt(w,yn(),B,qe,Xe,ot),this._currentPlacementIndex=Q.length-1,this._forceFullPlacement=ee,this._showCollisionBoxes=le,this._done=!1}isDone(){return this._done}continuePlacement(w,B,Q){let ee=u.now(),le=()=>!this._forceFullPlacement&&u.now()-ee>2;for(;this._currentPlacementIndex>=0;){let qe=B[w[this._currentPlacementIndex]],Xe=this.placement.collisionIndex.transform.zoom;if(qe.type==="symbol"&&(!qe.minzoom||qe.minzoom<=Xe)&&(!qe.maxzoom||qe.maxzoom>Xe)){if(this._inProgressLayer||(this._inProgressLayer=new Sn(qe)),this._inProgressLayer.continuePlacement(Q[qe.source],this.placement,this._showCollisionBoxes,qe,le))return;delete this._inProgressLayer}this._currentPlacementIndex--}this._done=!0}commit(w){return this.placement.commit(w),this.placement}}let ua=512/a.X/2;class ma{constructor(w,B,Q){this.tileID=w,this.bucketInstanceId=Q,this._symbolsByKey={};let ee=new Map;for(let le=0;le<B.length;le++){let qe=B.get(le),Xe=qe.key,ot=ee.get(Xe);ot?ot.push(qe):ee.set(Xe,[qe])}for(let[le,qe]of ee){let Xe={positions:qe.map(ot=>({x:Math.floor(ot.anchorX*ua),y:Math.floor(ot.anchorY*ua)})),crossTileIDs:qe.map(ot=>ot.crossTileID)};if(Xe.positions.length>128){let ot=new a.av(Xe.positions.length,16,Uint16Array);for(let{x:Tt,y:Yt}of Xe.positions)ot.add(Tt,Yt);ot.finish(),delete Xe.positions,Xe.index=ot}this._symbolsByKey[le]=Xe}}getScaledCoordinates(w,B){let{x:Q,y:ee,z:le}=this.tileID.canonical,{x:qe,y:Xe,z:ot}=B.canonical,Tt=ua/Math.pow(2,ot-le),Yt=(Xe*a.X+w.anchorY)*Tt,Kt=ee*a.X*ua;return{x:Math.floor((qe*a.X+w.anchorX)*Tt-Q*a.X*ua),y:Math.floor(Yt-Kt)}}findMatches(w,B,Q){let ee=this.tileID.canonical.z<B.canonical.z?1:Math.pow(2,this.tileID.canonical.z-B.canonical.z);for(let le=0;le<w.length;le++){let qe=w.get(le);if(qe.crossTileID)continue;let Xe=this._symbolsByKey[qe.key];if(!Xe)continue;let ot=this.getScaledCoordinates(qe,B);if(Xe.index){let Tt=Xe.index.range(ot.x-ee,ot.y-ee,ot.x+ee,ot.y+ee).sort();for(let Yt of Tt){let Kt=Xe.crossTileIDs[Yt];if(!Q[Kt]){Q[Kt]=!0,qe.crossTileID=Kt;break}}}else if(Xe.positions)for(let Tt=0;Tt<Xe.positions.length;Tt++){let Yt=Xe.positions[Tt],Kt=Xe.crossTileIDs[Tt];if(Math.abs(Yt.x-ot.x)<=ee&&Math.abs(Yt.y-ot.y)<=ee&&!Q[Kt]){Q[Kt]=!0,qe.crossTileID=Kt;break}}}}getCrossTileIDsLists(){return Object.values(this._symbolsByKey).map(({crossTileIDs:w})=>w)}}class Wa{constructor(){this.maxCrossTileID=0}generate(){return++this.maxCrossTileID}}class Fa{constructor(){this.indexes={},this.usedCrossTileIDs={},this.lng=0}handleWrapJump(w){let B=Math.round((w-this.lng)/360);if(B!==0)for(let Q in this.indexes){let ee=this.indexes[Q],le={};for(let qe in ee){let Xe=ee[qe];Xe.tileID=Xe.tileID.unwrapTo(Xe.tileID.wrap+B),le[Xe.tileID.key]=Xe}this.indexes[Q]=le}this.lng=w}addBucket(w,B,Q){if(this.indexes[w.overscaledZ]&&this.indexes[w.overscaledZ][w.key]){if(this.indexes[w.overscaledZ][w.key].bucketInstanceId===B.bucketInstanceId)return!1;this.removeBucketCrossTileIDs(w.overscaledZ,this.indexes[w.overscaledZ][w.key])}for(let le=0;le<B.symbolInstances.length;le++)B.symbolInstances.get(le).crossTileID=0;this.usedCrossTileIDs[w.overscaledZ]||(this.usedCrossTileIDs[w.overscaledZ]={});let ee=this.usedCrossTileIDs[w.overscaledZ];for(let le in this.indexes){let qe=this.indexes[le];if(Number(le)>w.overscaledZ)for(let Xe in qe){let ot=qe[Xe];ot.tileID.isChildOf(w)&&ot.findMatches(B.symbolInstances,w,ee)}else{let Xe=qe[w.scaledTo(Number(le)).key];Xe&&Xe.findMatches(B.symbolInstances,w,ee)}}for(let le=0;le<B.symbolInstances.length;le++){let qe=B.symbolInstances.get(le);qe.crossTileID||(qe.crossTileID=Q.generate(),ee[qe.crossTileID]=!0)}return this.indexes[w.overscaledZ]===void 0&&(this.indexes[w.overscaledZ]={}),this.indexes[w.overscaledZ][w.key]=new ma(w,B.symbolInstances,B.bucketInstanceId),!0}removeBucketCrossTileIDs(w,B){for(let Q of B.getCrossTileIDsLists())for(let ee of Q)delete this.usedCrossTileIDs[w][ee]}removeStaleBuckets(w){let B=!1;for(let Q in this.indexes){let ee=this.indexes[Q];for(let le in ee)w[ee[le].bucketInstanceId]||(this.removeBucketCrossTileIDs(Q,ee[le]),delete ee[le],B=!0)}return B}}class Wo{constructor(){this.layerIndexes={},this.crossTileIDs=new Wa,this.maxBucketInstanceId=0,this.bucketsInCurrentPlacement={}}addLayer(w,B,Q){let ee=this.layerIndexes[w.id];ee===void 0&&(ee=this.layerIndexes[w.id]=new Fa);let le=!1,qe={};ee.handleWrapJump(Q);for(let Xe of B){let ot=Xe.getBucket(w);ot&&w.id===ot.layerIds[0]&&(ot.bucketInstanceId||(ot.bucketInstanceId=++this.maxBucketInstanceId),ee.addBucket(Xe.tileID,ot,this.crossTileIDs)&&(le=!0),qe[ot.bucketInstanceId]=!0)}return ee.removeStaleBuckets(qe)&&(le=!0),le}pruneUnusedLayers(w){let B={};w.forEach(Q=>{B[Q]=!0});for(let Q in this.layerIndexes)B[Q]||delete this.layerIndexes[Q]}}let da=(ue,w)=>a.t(ue,w&&w.filter(B=>B.identifier!=="source.canvas")),Wn=a.aw();class Ha extends a.E{constructor(w,B={}){super(),this._rtlPluginLoaded=()=>{for(let Q in this.sourceCaches){let ee=this.sourceCaches[Q].getSource().type;ee!=="vector"&&ee!=="geojson"||this.sourceCaches[Q].reload()}},this.map=w,this.dispatcher=new Le(Se(),w._getMapId()),this.dispatcher.registerMessageHandler("GG",(Q,ee)=>this.getGlyphs(Q,ee)),this.dispatcher.registerMessageHandler("GI",(Q,ee)=>this.getImages(Q,ee)),this.imageManager=new T,this.imageManager.setEventedParent(this),this.glyphManager=new G(w._requestManager,B.localIdeographFontFamily),this.lineAtlas=new oe(256,512),this.crossTileSymbolIndex=new Wo,this._spritesImagesIds={},this._layers={},this._order=[],this.sourceCaches={},this.zoomHistory=new a.ax,this._loaded=!1,this._availableImages=[],this._resetUpdates(),this.dispatcher.broadcast("SR",a.ay()),Qt().on(cr,this._rtlPluginLoaded),this.on("data",Q=>{if(Q.dataType!=="source"||Q.sourceDataType!=="metadata")return;let ee=this.sourceCaches[Q.sourceId];if(!ee)return;let le=ee.getSource();if(le&&le.vectorLayerIds)for(let qe in this._layers){let Xe=this._layers[qe];Xe.source===le.id&&this._validateLayer(Xe)}})}loadURL(w,B={},Q){this.fire(new a.k("dataloading",{dataType:"style"})),B.validate=typeof B.validate!="boolean"||B.validate;let ee=this.map._requestManager.transformRequest(w,"Style");this._loadStyleRequest=new AbortController;let le=this._loadStyleRequest;a.h(ee,this._loadStyleRequest).then(qe=>{this._loadStyleRequest=null,this._load(qe.data,B,Q)}).catch(qe=>{this._loadStyleRequest=null,qe&&!le.signal.aborted&&this.fire(new a.j(qe))})}loadJSON(w,B={},Q){this.fire(new a.k("dataloading",{dataType:"style"})),this._frameRequest=new AbortController,u.frameAsync(this._frameRequest).then(()=>{this._frameRequest=null,B.validate=B.validate!==!1,this._load(w,B,Q)}).catch(()=>{})}loadEmpty(){this.fire(new a.k("dataloading",{dataType:"style"})),this._load(Wn,{validate:!1})}_load(w,B,Q){var ee;let le=B.transformStyle?B.transformStyle(Q,w):w;if(!B.validate||!da(this,a.u(le))){this._loaded=!0,this.stylesheet=le;for(let qe in le.sources)this.addSource(qe,le.sources[qe],{validate:!1});le.sprite?this._loadSprite(le.sprite):this.imageManager.setLoaded(!0),this.glyphManager.setURL(le.glyphs),this._createLayers(),this.light=new N(this.stylesheet.light),this.sky=new te(this.stylesheet.sky),this.map.setTerrain((ee=this.stylesheet.terrain)!==null&&ee!==void 0?ee:null),this.fire(new a.k("data",{dataType:"style"})),this.fire(new a.k("style.load"))}}_createLayers(){let w=a.az(this.stylesheet.layers);this.dispatcher.broadcast("SL",w),this._order=w.map(B=>B.id),this._layers={},this._serializedLayers=null;for(let B of w){let Q=a.aA(B);Q.setEventedParent(this,{layer:{id:B.id}}),this._layers[B.id]=Q}}_loadSprite(w,B=!1,Q=void 0){let ee;this.imageManager.setLoaded(!1),this._spriteRequest=new AbortController,function(le,qe,Xe,ot){return a._(this,void 0,void 0,function*(){let Tt=C(le),Yt=Xe>1?"@2x":"",Kt={},xr={};for(let{id:Ir,url:ve}of Tt){let be=qe.transformRequest(M(ve,Yt,".json"),"SpriteJSON");Kt[Ir]=a.h(be,ot);let De=qe.transformRequest(M(ve,Yt,".png"),"SpriteImage");xr[Ir]=p.getImage(De,ot)}return yield Promise.all([...Object.values(Kt),...Object.values(xr)]),function(Ir,ve){return a._(this,void 0,void 0,function*(){let be={};for(let De in Ir){be[De]={};let Be=u.getImageCanvasContext((yield ve[De]).data),et=(yield Ir[De]).data;for(let We in et){let{width:it,height:Ft,x:Ht,y:tr,sdf:dr,pixelRatio:Sr,stretchX:Or,stretchY:Wr,content:ni,textFitWidth:Pi,textFitHeight:cn}=et[We];be[De][We]={data:null,pixelRatio:Sr,sdf:dr,stretchX:Or,stretchY:Wr,content:ni,textFitWidth:Pi,textFitHeight:cn,spriteData:{width:it,height:Ft,x:Ht,y:tr,context:Be}}}}return be})}(Kt,xr)})}(w,this.map._requestManager,this.map.getPixelRatio(),this._spriteRequest).then(le=>{if(this._spriteRequest=null,le)for(let qe in le){this._spritesImagesIds[qe]=[];let Xe=this._spritesImagesIds[qe]?this._spritesImagesIds[qe].filter(ot=>!(ot in le)):[];for(let ot of Xe)this.imageManager.removeImage(ot),this._changedImages[ot]=!0;for(let ot in le[qe]){let Tt=qe==="default"?ot:`${qe}:${ot}`;this._spritesImagesIds[qe].push(Tt),Tt in this.imageManager.images?this.imageManager.updateImage(Tt,le[qe][ot],!1):this.imageManager.addImage(Tt,le[qe][ot]),B&&(this._changedImages[Tt]=!0)}}}).catch(le=>{this._spriteRequest=null,ee=le,this.fire(new a.j(ee))}).finally(()=>{this.imageManager.setLoaded(!0),this._availableImages=this.imageManager.listImages(),B&&(this._changed=!0),this.dispatcher.broadcast("SI",this._availableImages),this.fire(new a.k("data",{dataType:"style"})),Q&&Q(ee)})}_unloadSprite(){for(let w of Object.values(this._spritesImagesIds).flat())this.imageManager.removeImage(w),this._changedImages[w]=!0;this._spritesImagesIds={},this._availableImages=this.imageManager.listImages(),this._changed=!0,this.dispatcher.broadcast("SI",this._availableImages),this.fire(new a.k("data",{dataType:"style"}))}_validateLayer(w){let B=this.sourceCaches[w.source];if(!B)return;let Q=w.sourceLayer;if(!Q)return;let ee=B.getSource();(ee.type==="geojson"||ee.vectorLayerIds&&ee.vectorLayerIds.indexOf(Q)===-1)&&this.fire(new a.j(new Error(`Source layer "${Q}" does not exist on source "${ee.id}" as specified by style layer "${w.id}".`)))}loaded(){if(!this._loaded||Object.keys(this._updatedSources).length)return!1;for(let w in this.sourceCaches)if(!this.sourceCaches[w].loaded())return!1;return!!this.imageManager.isLoaded()}_serializeByIds(w,B=!1){let Q=this._serializedAllLayers();if(!w||w.length===0)return Object.values(B?a.aB(Q):Q);let ee=[];for(let le of w)if(Q[le]){let qe=B?a.aB(Q[le]):Q[le];ee.push(qe)}return ee}_serializedAllLayers(){let w=this._serializedLayers;if(w)return w;w=this._serializedLayers={};let B=Object.keys(this._layers);for(let Q of B){let ee=this._layers[Q];ee.type!=="custom"&&(w[Q]=ee.serialize())}return w}hasTransitions(){if(this.light&&this.light.hasTransition()||this.sky&&this.sky.hasTransition())return!0;for(let w in this.sourceCaches)if(this.sourceCaches[w].hasTransition())return!0;for(let w in this._layers)if(this._layers[w].hasTransition())return!0;return!1}_checkLoaded(){if(!this._loaded)throw new Error("Style is not done loading.")}update(w){if(!this._loaded)return;let B=this._changed;if(B){let ee=Object.keys(this._updatedLayers),le=Object.keys(this._removedLayers);(ee.length||le.length)&&this._updateWorkerLayers(ee,le);for(let qe in this._updatedSources){let Xe=this._updatedSources[qe];if(Xe==="reload")this._reloadSource(qe);else{if(Xe!=="clear")throw new Error(`Invalid action ${Xe}`);this._clearSource(qe)}}this._updateTilesForChangedImages(),this._updateTilesForChangedGlyphs();for(let qe in this._updatedPaintProps)this._layers[qe].updateTransitions(w);this.light.updateTransitions(w),this.sky.updateTransitions(w),this._resetUpdates()}let Q={};for(let ee in this.sourceCaches){let le=this.sourceCaches[ee];Q[ee]=le.used,le.used=!1}for(let ee of this._order){let le=this._layers[ee];le.recalculate(w,this._availableImages),!le.isHidden(w.zoom)&&le.source&&(this.sourceCaches[le.source].used=!0)}for(let ee in Q){let le=this.sourceCaches[ee];!!Q[ee]!=!!le.used&&le.fire(new a.k("data",{sourceDataType:"visibility",dataType:"source",sourceId:ee}))}this.light.recalculate(w),this.sky.recalculate(w),this.z=w.zoom,B&&this.fire(new a.k("data",{dataType:"style"}))}_updateTilesForChangedImages(){let w=Object.keys(this._changedImages);if(w.length){for(let B in this.sourceCaches)this.sourceCaches[B].reloadTilesForDependencies(["icons","patterns"],w);this._changedImages={}}}_updateTilesForChangedGlyphs(){if(this._glyphsDidChange){for(let w in this.sourceCaches)this.sourceCaches[w].reloadTilesForDependencies(["glyphs"],[""]);this._glyphsDidChange=!1}}_updateWorkerLayers(w,B){this.dispatcher.broadcast("UL",{layers:this._serializeByIds(w,!1),removedIds:B})}_resetUpdates(){this._changed=!1,this._updatedLayers={},this._removedLayers={},this._updatedSources={},this._updatedPaintProps={},this._changedImages={},this._glyphsDidChange=!1}setState(w,B={}){var Q;this._checkLoaded();let ee=this.serialize();if(w=B.transformStyle?B.transformStyle(ee,w):w,((Q=B.validate)===null||Q===void 0||Q)&&da(this,a.u(w)))return!1;(w=a.aB(w)).layers=a.az(w.layers);let le=a.aC(ee,w),qe=this._getOperationsToPerform(le);if(qe.unimplemented.length>0)throw new Error(`Unimplemented: ${qe.unimplemented.join(", ")}.`);if(qe.operations.length===0)return!1;for(let Xe of qe.operations)Xe();return this.stylesheet=w,this._serializedLayers=null,!0}_getOperationsToPerform(w){let B=[],Q=[];for(let ee of w)switch(ee.command){case"setCenter":case"setZoom":case"setBearing":case"setPitch":continue;case"addLayer":B.push(()=>this.addLayer.apply(this,ee.args));break;case"removeLayer":B.push(()=>this.removeLayer.apply(this,ee.args));break;case"setPaintProperty":B.push(()=>this.setPaintProperty.apply(this,ee.args));break;case"setLayoutProperty":B.push(()=>this.setLayoutProperty.apply(this,ee.args));break;case"setFilter":B.push(()=>this.setFilter.apply(this,ee.args));break;case"addSource":B.push(()=>this.addSource.apply(this,ee.args));break;case"removeSource":B.push(()=>this.removeSource.apply(this,ee.args));break;case"setLayerZoomRange":B.push(()=>this.setLayerZoomRange.apply(this,ee.args));break;case"setLight":B.push(()=>this.setLight.apply(this,ee.args));break;case"setGeoJSONSourceData":B.push(()=>this.setGeoJSONSourceData.apply(this,ee.args));break;case"setGlyphs":B.push(()=>this.setGlyphs.apply(this,ee.args));break;case"setSprite":B.push(()=>this.setSprite.apply(this,ee.args));break;case"setSky":B.push(()=>this.setSky.apply(this,ee.args));break;case"setTerrain":B.push(()=>this.map.setTerrain.apply(this,ee.args));break;case"setTransition":B.push(()=>{});break;default:Q.push(ee.command)}return{operations:B,unimplemented:Q}}addImage(w,B){if(this.getImage(w))return this.fire(new a.j(new Error(`An image named "${w}" already exists.`)));this.imageManager.addImage(w,B),this._afterImageUpdated(w)}updateImage(w,B){this.imageManager.updateImage(w,B)}getImage(w){return this.imageManager.getImage(w)}removeImage(w){if(!this.getImage(w))return this.fire(new a.j(new Error(`An image named "${w}" does not exist.`)));this.imageManager.removeImage(w),this._afterImageUpdated(w)}_afterImageUpdated(w){this._availableImages=this.imageManager.listImages(),this._changedImages[w]=!0,this._changed=!0,this.dispatcher.broadcast("SI",this._availableImages),this.fire(new a.k("data",{dataType:"style"}))}listImages(){return this._checkLoaded(),this.imageManager.listImages()}addSource(w,B,Q={}){if(this._checkLoaded(),this.sourceCaches[w]!==void 0)throw new Error(`Source "${w}" already exists.`);if(!B.type)throw new Error(`The type property must be defined, but only the following properties were given: ${Object.keys(B).join(", ")}.`);if(["vector","raster","geojson","video","image"].indexOf(B.type)>=0&&this._validate(a.u.source,`sources.${w}`,B,null,Q))return;this.map&&this.map._collectResourceTiming&&(B.collectResourceTiming=!0);let ee=this.sourceCaches[w]=new mt(w,B,this.dispatcher);ee.style=this,ee.setEventedParent(this,()=>({isSourceLoaded:ee.loaded(),source:ee.serialize(),sourceId:w})),ee.onAdd(this.map),this._changed=!0}removeSource(w){if(this._checkLoaded(),this.sourceCaches[w]===void 0)throw new Error("There is no source with this ID");for(let Q in this._layers)if(this._layers[Q].source===w)return this.fire(new a.j(new Error(`Source "${w}" cannot be removed while layer "${Q}" is using it.`)));let B=this.sourceCaches[w];delete this.sourceCaches[w],delete this._updatedSources[w],B.fire(new a.k("data",{sourceDataType:"metadata",dataType:"source",sourceId:w})),B.setEventedParent(null),B.onRemove(this.map),this._changed=!0}setGeoJSONSourceData(w,B){if(this._checkLoaded(),this.sourceCaches[w]===void 0)throw new Error(`There is no source with this ID=${w}`);let Q=this.sourceCaches[w].getSource();if(Q.type!=="geojson")throw new Error(`geojsonSource.type is ${Q.type}, which is !== 'geojson`);Q.setData(B),this._changed=!0}getSource(w){return this.sourceCaches[w]&&this.sourceCaches[w].getSource()}addLayer(w,B,Q={}){this._checkLoaded();let ee=w.id;if(this.getLayer(ee))return void this.fire(new a.j(new Error(`Layer "${ee}" already exists on this map.`)));let le;if(w.type==="custom"){if(da(this,a.aD(w)))return;le=a.aA(w)}else{if("source"in w&&typeof w.source=="object"&&(this.addSource(ee,w.source),w=a.aB(w),w=a.e(w,{source:ee})),this._validate(a.u.layer,`layers.${ee}`,w,{arrayIndex:-1},Q))return;le=a.aA(w),this._validateLayer(le),le.setEventedParent(this,{layer:{id:ee}})}let qe=B?this._order.indexOf(B):this._order.length;if(B&&qe===-1)this.fire(new a.j(new Error(`Cannot add layer "${ee}" before non-existing layer "${B}".`)));else{if(this._order.splice(qe,0,ee),this._layerOrderChanged=!0,this._layers[ee]=le,this._removedLayers[ee]&&le.source&&le.type!=="custom"){let Xe=this._removedLayers[ee];delete this._removedLayers[ee],Xe.type!==le.type?this._updatedSources[le.source]="clear":(this._updatedSources[le.source]="reload",this.sourceCaches[le.source].pause())}this._updateLayer(le),le.onAdd&&le.onAdd(this.map)}}moveLayer(w,B){if(this._checkLoaded(),this._changed=!0,!this._layers[w])return void this.fire(new a.j(new Error(`The layer '${w}' does not exist in the map's style and cannot be moved.`)));if(w===B)return;let Q=this._order.indexOf(w);this._order.splice(Q,1);let ee=B?this._order.indexOf(B):this._order.length;B&&ee===-1?this.fire(new a.j(new Error(`Cannot move layer "${w}" before non-existing layer "${B}".`))):(this._order.splice(ee,0,w),this._layerOrderChanged=!0)}removeLayer(w){this._checkLoaded();let B=this._layers[w];if(!B)return void this.fire(new a.j(new Error(`Cannot remove non-existing layer "${w}".`)));B.setEventedParent(null);let Q=this._order.indexOf(w);this._order.splice(Q,1),this._layerOrderChanged=!0,this._changed=!0,this._removedLayers[w]=B,delete this._layers[w],this._serializedLayers&&delete this._serializedLayers[w],delete this._updatedLayers[w],delete this._updatedPaintProps[w],B.onRemove&&B.onRemove(this.map)}getLayer(w){return this._layers[w]}getLayersOrder(){return[...this._order]}hasLayer(w){return w in this._layers}setLayerZoomRange(w,B,Q){this._checkLoaded();let ee=this.getLayer(w);ee?ee.minzoom===B&&ee.maxzoom===Q||(B!=null&&(ee.minzoom=B),Q!=null&&(ee.maxzoom=Q),this._updateLayer(ee)):this.fire(new a.j(new Error(`Cannot set the zoom range of non-existing layer "${w}".`)))}setFilter(w,B,Q={}){this._checkLoaded();let ee=this.getLayer(w);if(ee){if(!a.aE(ee.filter,B))return B==null?(ee.filter=void 0,void this._updateLayer(ee)):void(this._validate(a.u.filter,`layers.${ee.id}.filter`,B,null,Q)||(ee.filter=a.aB(B),this._updateLayer(ee)))}else this.fire(new a.j(new Error(`Cannot filter non-existing layer "${w}".`)))}getFilter(w){return a.aB(this.getLayer(w).filter)}setLayoutProperty(w,B,Q,ee={}){this._checkLoaded();let le=this.getLayer(w);le?a.aE(le.getLayoutProperty(B),Q)||(le.setLayoutProperty(B,Q,ee),this._updateLayer(le)):this.fire(new a.j(new Error(`Cannot style non-existing layer "${w}".`)))}getLayoutProperty(w,B){let Q=this.getLayer(w);if(Q)return Q.getLayoutProperty(B);this.fire(new a.j(new Error(`Cannot get style of non-existing layer "${w}".`)))}setPaintProperty(w,B,Q,ee={}){this._checkLoaded();let le=this.getLayer(w);le?a.aE(le.getPaintProperty(B),Q)||(le.setPaintProperty(B,Q,ee)&&this._updateLayer(le),this._changed=!0,this._updatedPaintProps[w]=!0,this._serializedLayers=null):this.fire(new a.j(new Error(`Cannot style non-existing layer "${w}".`)))}getPaintProperty(w,B){return this.getLayer(w).getPaintProperty(B)}setFeatureState(w,B){this._checkLoaded();let Q=w.source,ee=w.sourceLayer,le=this.sourceCaches[Q];if(le===void 0)return void this.fire(new a.j(new Error(`The source '${Q}' does not exist in the map's style.`)));let qe=le.getSource().type;qe==="geojson"&&ee?this.fire(new a.j(new Error("GeoJSON sources cannot have a sourceLayer parameter."))):qe!=="vector"||ee?(w.id===void 0&&this.fire(new a.j(new Error("The feature id parameter must be provided."))),le.setFeatureState(ee,w.id,B)):this.fire(new a.j(new Error("The sourceLayer parameter must be provided for vector source types.")))}removeFeatureState(w,B){this._checkLoaded();let Q=w.source,ee=this.sourceCaches[Q];if(ee===void 0)return void this.fire(new a.j(new Error(`The source '${Q}' does not exist in the map's style.`)));let le=ee.getSource().type,qe=le==="vector"?w.sourceLayer:void 0;le!=="vector"||qe?B&&typeof w.id!="string"&&typeof w.id!="number"?this.fire(new a.j(new Error("A feature id is required to remove its specific state property."))):ee.removeFeatureState(qe,w.id,B):this.fire(new a.j(new Error("The sourceLayer parameter must be provided for vector source types.")))}getFeatureState(w){this._checkLoaded();let B=w.source,Q=w.sourceLayer,ee=this.sourceCaches[B];if(ee!==void 0)return ee.getSource().type!=="vector"||Q?(w.id===void 0&&this.fire(new a.j(new Error("The feature id parameter must be provided."))),ee.getFeatureState(Q,w.id)):void this.fire(new a.j(new Error("The sourceLayer parameter must be provided for vector source types.")));this.fire(new a.j(new Error(`The source '${B}' does not exist in the map's style.`)))}getTransition(){return a.e({duration:300,delay:0},this.stylesheet&&this.stylesheet.transition)}serialize(){if(!this._loaded)return;let w=a.aF(this.sourceCaches,le=>le.serialize()),B=this._serializeByIds(this._order,!0),Q=this.map.getTerrain()||void 0,ee=this.stylesheet;return a.aG({version:ee.version,name:ee.name,metadata:ee.metadata,light:ee.light,sky:ee.sky,center:ee.center,zoom:ee.zoom,bearing:ee.bearing,pitch:ee.pitch,sprite:ee.sprite,glyphs:ee.glyphs,transition:ee.transition,sources:w,layers:B,terrain:Q},le=>le!==void 0)}_updateLayer(w){this._updatedLayers[w.id]=!0,w.source&&!this._updatedSources[w.source]&&this.sourceCaches[w.source].getSource().type!=="raster"&&(this._updatedSources[w.source]="reload",this.sourceCaches[w.source].pause()),this._serializedLayers=null,this._changed=!0}_flattenAndSortRenderedFeatures(w){let B=qe=>this._layers[qe].type==="fill-extrusion",Q={},ee=[];for(let qe=this._order.length-1;qe>=0;qe--){let Xe=this._order[qe];if(B(Xe)){Q[Xe]=qe;for(let ot of w){let Tt=ot[Xe];if(Tt)for(let Yt of Tt)ee.push(Yt)}}}ee.sort((qe,Xe)=>Xe.intersectionZ-qe.intersectionZ);let le=[];for(let qe=this._order.length-1;qe>=0;qe--){let Xe=this._order[qe];if(B(Xe))for(let ot=ee.length-1;ot>=0;ot--){let Tt=ee[ot].feature;if(Q[Tt.layer.id]<qe)break;le.push(Tt),ee.pop()}else for(let ot of w){let Tt=ot[Xe];if(Tt)for(let Yt of Tt)le.push(Yt.feature)}}return le}queryRenderedFeatures(w,B,Q){B&&B.filter&&this._validate(a.u.filter,"queryRenderedFeatures.filter",B.filter,null,B);let ee={};if(B&&B.layers){if(!Array.isArray(B.layers))return this.fire(new a.j(new Error("parameters.layers must be an Array."))),[];for(let Xe of B.layers){let ot=this._layers[Xe];if(!ot)return this.fire(new a.j(new Error(`The layer '${Xe}' does not exist in the map's style and cannot be queried for features.`))),[];ee[ot.source]=!0}}let le=[];B.availableImages=this._availableImages;let qe=this._serializedAllLayers();for(let Xe in this.sourceCaches)B.layers&&!ee[Xe]||le.push(Pe(this.sourceCaches[Xe],this._layers,qe,w,B,Q));return this.placement&&le.push(function(Xe,ot,Tt,Yt,Kt,xr,Ir){let ve={},be=xr.queryRenderedSymbols(Yt),De=[];for(let Be of Object.keys(be).map(Number))De.push(Ir[Be]);De.sort(ge);for(let Be of De){let et=Be.featureIndex.lookupSymbolFeatures(be[Be.bucketInstanceId],ot,Be.bucketIndex,Be.sourceLayerIndex,Kt.filter,Kt.layers,Kt.availableImages,Xe);for(let We in et){let it=ve[We]=ve[We]||[],Ft=et[We];Ft.sort((Ht,tr)=>{let dr=Be.featureSortOrder;if(dr){let Sr=dr.indexOf(Ht.featureIndex);return dr.indexOf(tr.featureIndex)-Sr}return tr.featureIndex-Ht.featureIndex});for(let Ht of Ft)it.push(Ht)}}for(let Be in ve)ve[Be].forEach(et=>{let We=et.feature,it=Tt[Xe[Be].source].getFeatureState(We.layer["source-layer"],We.id);We.source=We.layer.source,We.layer["source-layer"]&&(We.sourceLayer=We.layer["source-layer"]),We.state=it});return ve}(this._layers,qe,this.sourceCaches,w,B,this.placement.collisionIndex,this.placement.retainedQueryData)),this._flattenAndSortRenderedFeatures(le)}querySourceFeatures(w,B){B&&B.filter&&this._validate(a.u.filter,"querySourceFeatures.filter",B.filter,null,B);let Q=this.sourceCaches[w];return Q?function(ee,le){let qe=ee.getRenderableIds().map(Tt=>ee.getTileByID(Tt)),Xe=[],ot={};for(let Tt=0;Tt<qe.length;Tt++){let Yt=qe[Tt],Kt=Yt.tileID.canonical.key;ot[Kt]||(ot[Kt]=!0,Yt.querySourceFeatures(Xe,le))}return Xe}(Q,B):[]}getLight(){return this.light.getLight()}setLight(w,B={}){this._checkLoaded();let Q=this.light.getLight(),ee=!1;for(let qe in w)if(!a.aE(w[qe],Q[qe])){ee=!0;break}if(!ee)return;let le={now:u.now(),transition:a.e({duration:300,delay:0},this.stylesheet.transition)};this.light.setLight(w,B),this.light.updateTransitions(le)}getSky(){var w;return(w=this.stylesheet)===null||w===void 0?void 0:w.sky}setSky(w,B={}){let Q=this.getSky(),ee=!1;if(!w&&!Q)return;if(w&&!Q)ee=!0;else if(!w&&Q)ee=!0;else for(let qe in w)if(!a.aE(w[qe],Q[qe])){ee=!0;break}if(!ee)return;let le={now:u.now(),transition:a.e({duration:300,delay:0},this.stylesheet.transition)};this.stylesheet.sky=w,this.sky.setSky(w,B),this.sky.updateTransitions(le)}_validate(w,B,Q,ee,le={}){return(!le||le.validate!==!1)&&da(this,w.call(a.u,a.e({key:B,style:this.serialize(),value:Q,styleSpec:a.v},ee)))}_remove(w=!0){this._frameRequest&&(this._frameRequest.abort(),this._frameRequest=null),this._loadStyleRequest&&(this._loadStyleRequest.abort(),this._loadStyleRequest=null),this._spriteRequest&&(this._spriteRequest.abort(),this._spriteRequest=null),Qt().off(cr,this._rtlPluginLoaded);for(let B in this._layers)this._layers[B].setEventedParent(null);for(let B in this.sourceCaches){let Q=this.sourceCaches[B];Q.setEventedParent(null),Q.onRemove(this.map)}this.imageManager.setEventedParent(null),this.setEventedParent(null),w&&this.dispatcher.broadcast("RM",void 0),this.dispatcher.remove(w)}_clearSource(w){this.sourceCaches[w].clearTiles()}_reloadSource(w){this.sourceCaches[w].resume(),this.sourceCaches[w].reload()}_updateSources(w){for(let B in this.sourceCaches)this.sourceCaches[B].update(w,this.map.terrain)}_generateCollisionBoxes(){for(let w in this.sourceCaches)this._reloadSource(w)}_updatePlacement(w,B,Q,ee,le=!1){let qe=!1,Xe=!1,ot={};for(let Tt of this._order){let Yt=this._layers[Tt];if(Yt.type!=="symbol")continue;if(!ot[Yt.source]){let xr=this.sourceCaches[Yt.source];ot[Yt.source]=xr.getRenderableIds(!0).map(Ir=>xr.getTileByID(Ir)).sort((Ir,ve)=>ve.tileID.overscaledZ-Ir.tileID.overscaledZ||(Ir.tileID.isLessThan(ve.tileID)?-1:1))}let Kt=this.crossTileSymbolIndex.addLayer(Yt,ot[Yt.source],w.center.lng);qe=qe||Kt}if(this.crossTileSymbolIndex.pruneUnusedLayers(this._order),((le=le||this._layerOrderChanged||Q===0)||!this.pauseablePlacement||this.pauseablePlacement.isDone()&&!this.placement.stillRecent(u.now(),w.zoom))&&(this.pauseablePlacement=new Ba(w,this.map.terrain,this._order,le,B,Q,ee,this.placement),this._layerOrderChanged=!1),this.pauseablePlacement.isDone()?this.placement.setStale():(this.pauseablePlacement.continuePlacement(this._order,this._layers,ot),this.pauseablePlacement.isDone()&&(this.placement=this.pauseablePlacement.commit(u.now()),Xe=!0),qe&&this.pauseablePlacement.placement.setStale()),Xe||qe)for(let Tt of this._order){let Yt=this._layers[Tt];Yt.type==="symbol"&&this.placement.updateLayerOpacities(Yt,ot[Yt.source])}return!this.pauseablePlacement.isDone()||this.placement.hasTransitions(u.now())}_releaseSymbolFadeTiles(){for(let w in this.sourceCaches)this.sourceCaches[w].releaseSymbolFadeTiles()}getImages(w,B){return a._(this,void 0,void 0,function*(){let Q=yield this.imageManager.getImages(B.icons);this._updateTilesForChangedImages();let ee=this.sourceCaches[B.source];return ee&&ee.setDependencies(B.tileID.key,B.type,B.icons),Q})}getGlyphs(w,B){return a._(this,void 0,void 0,function*(){let Q=yield this.glyphManager.getGlyphs(B.stacks),ee=this.sourceCaches[B.source];return ee&&ee.setDependencies(B.tileID.key,B.type,[""]),Q})}getGlyphsUrl(){return this.stylesheet.glyphs||null}setGlyphs(w,B={}){this._checkLoaded(),w&&this._validate(a.u.glyphs,"glyphs",w,null,B)||(this._glyphsDidChange=!0,this.stylesheet.glyphs=w,this.glyphManager.entries={},this.glyphManager.setURL(w))}addSprite(w,B,Q={},ee){this._checkLoaded();let le=[{id:w,url:B}],qe=[...C(this.stylesheet.sprite),...le];this._validate(a.u.sprite,"sprite",qe,null,Q)||(this.stylesheet.sprite=qe,this._loadSprite(le,!0,ee))}removeSprite(w){this._checkLoaded();let B=C(this.stylesheet.sprite);if(B.find(Q=>Q.id===w)){if(this._spritesImagesIds[w])for(let Q of this._spritesImagesIds[w])this.imageManager.removeImage(Q),this._changedImages[Q]=!0;B.splice(B.findIndex(Q=>Q.id===w),1),this.stylesheet.sprite=B.length>0?B:void 0,delete this._spritesImagesIds[w],this._availableImages=this.imageManager.listImages(),this._changed=!0,this.dispatcher.broadcast("SI",this._availableImages),this.fire(new a.k("data",{dataType:"style"}))}else this.fire(new a.j(new Error(`Sprite "${w}" doesn't exists on this map.`)))}getSprite(){return C(this.stylesheet.sprite)}setSprite(w,B={},Q){this._checkLoaded(),w&&this._validate(a.u.sprite,"sprite",w,null,B)||(this.stylesheet.sprite=w,w?this._loadSprite(w,!0,Q):(this._unloadSprite(),Q&&Q(null)))}}var vo=a.Y([{name:"a_pos",type:"Int16",components:2}]);let jn={prelude:Mt(`#ifdef GL_ES
+precision mediump float;
+#else
+#if !defined(lowp)
+#define lowp
+#endif
+#if !defined(mediump)
+#define mediump
+#endif
+#if !defined(highp)
+#define highp
+#endif
+#endif
+`,`#ifdef GL_ES
+precision highp float;
+#else
+#if !defined(lowp)
+#define lowp
+#endif
+#if !defined(mediump)
+#define mediump
+#endif
+#if !defined(highp)
+#define highp
+#endif
+#endif
+vec2 unpack_float(const float packedValue) {int packedIntValue=int(packedValue);int v0=packedIntValue/256;return vec2(v0,packedIntValue-v0*256);}vec2 unpack_opacity(const float packedOpacity) {int intOpacity=int(packedOpacity)/2;return vec2(float(intOpacity)/127.0,mod(packedOpacity,2.0));}vec4 decode_color(const vec2 encodedColor) {return vec4(unpack_float(encodedColor[0])/255.0,unpack_float(encodedColor[1])/255.0
+);}float unpack_mix_vec2(const vec2 packedValue,const float t) {return mix(packedValue[0],packedValue[1],t);}vec4 unpack_mix_color(const vec4 packedColors,const float t) {vec4 minColor=decode_color(vec2(packedColors[0],packedColors[1]));vec4 maxColor=decode_color(vec2(packedColors[2],packedColors[3]));return mix(minColor,maxColor,t);}vec2 get_pattern_pos(const vec2 pixel_coord_upper,const vec2 pixel_coord_lower,const vec2 pattern_size,const float tile_units_to_pixels,const vec2 pos) {vec2 offset=mod(mod(mod(pixel_coord_upper,pattern_size)*256.0,pattern_size)*256.0+pixel_coord_lower,pattern_size);return (tile_units_to_pixels*pos+offset)/pattern_size;}
+#ifdef TERRAIN3D
+uniform sampler2D u_terrain;uniform float u_terrain_dim;uniform mat4 u_terrain_matrix;uniform vec4 u_terrain_unpack;uniform float u_terrain_exaggeration;uniform highp sampler2D u_depth;
+#endif
+const highp vec4 bitSh=vec4(256.*256.*256.,256.*256.,256.,1.);const highp vec4 bitShifts=vec4(1.)/bitSh;highp float unpack(highp vec4 color) {return dot(color,bitShifts);}highp float depthOpacity(vec3 frag) {
+#ifdef TERRAIN3D
+highp float d=unpack(texture2D(u_depth,frag.xy*0.5+0.5))+0.0001-frag.z;return 1.0-max(0.0,min(1.0,-d*500.0));
+#else
+return 1.0;
+#endif
+}float calculate_visibility(vec4 pos) {
+#ifdef TERRAIN3D
+vec3 frag=pos.xyz/pos.w;highp float d=depthOpacity(frag);if (d > 0.95) return 1.0;return (d+depthOpacity(frag+vec3(0.0,0.01,0.0)))/2.0;
+#else
+return 1.0;
+#endif
+}float ele(vec2 pos) {
+#ifdef TERRAIN3D
+vec4 rgb=(texture2D(u_terrain,pos)*255.0)*u_terrain_unpack;return rgb.r+rgb.g+rgb.b-u_terrain_unpack.a;
+#else
+return 0.0;
+#endif
+}float get_elevation(vec2 pos) {
+#ifdef TERRAIN3D
+vec2 coord=(u_terrain_matrix*vec4(pos,0.0,1.0)).xy*u_terrain_dim+1.0;vec2 f=fract(coord);vec2 c=(floor(coord)+0.5)/(u_terrain_dim+2.0);float d=1.0/(u_terrain_dim+2.0);float tl=ele(c);float tr=ele(c+vec2(d,0.0));float bl=ele(c+vec2(0.0,d));float br=ele(c+vec2(d,d));float elevation=mix(mix(tl,tr,f.x),mix(bl,br,f.x),f.y);return elevation*u_terrain_exaggeration;
+#else
+return 0.0;
+#endif
+}`),background:Mt(`uniform vec4 u_color;uniform float u_opacity;void main() {gl_FragColor=u_color*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,"attribute vec2 a_pos;uniform mat4 u_matrix;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);}"),backgroundPattern:Mt(`uniform vec2 u_pattern_tl_a;uniform vec2 u_pattern_br_a;uniform vec2 u_pattern_tl_b;uniform vec2 u_pattern_br_b;uniform vec2 u_texsize;uniform float u_mix;uniform float u_opacity;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;void main() {vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(u_pattern_tl_a/u_texsize,u_pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(u_pattern_tl_b/u_texsize,u_pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);gl_FragColor=mix(color1,color2,u_mix)*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,"uniform mat4 u_matrix;uniform vec2 u_pattern_size_a;uniform vec2 u_pattern_size_b;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform float u_scale_a;uniform float u_scale_b;uniform float u_tile_units_to_pixels;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,u_scale_a*u_pattern_size_a,u_tile_units_to_pixels,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,u_scale_b*u_pattern_size_b,u_tile_units_to_pixels,a_pos);}"),circle:Mt(`varying vec3 v_data;varying float v_visibility;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define mediump float radius
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define highp vec4 stroke_color
+#pragma mapbox: define mediump float stroke_width
+#pragma mapbox: define lowp float stroke_opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize mediump float radius
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize highp vec4 stroke_color
+#pragma mapbox: initialize mediump float stroke_width
+#pragma mapbox: initialize lowp float stroke_opacity
+vec2 extrude=v_data.xy;float extrude_length=length(extrude);float antialiased_blur=v_data.z;float opacity_t=smoothstep(0.0,antialiased_blur,extrude_length-1.0);float color_t=stroke_width < 0.01 ? 0.0 : smoothstep(antialiased_blur,0.0,extrude_length-radius/(radius+stroke_width));gl_FragColor=v_visibility*opacity_t*mix(color*opacity,stroke_color*stroke_opacity,color_t);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform bool u_scale_with_map;uniform bool u_pitch_with_map;uniform vec2 u_extrude_scale;uniform lowp float u_device_pixel_ratio;uniform highp float u_camera_to_center_distance;attribute vec2 a_pos;varying vec3 v_data;varying float v_visibility;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define mediump float radius
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define highp vec4 stroke_color
+#pragma mapbox: define mediump float stroke_width
+#pragma mapbox: define lowp float stroke_opacity
+void main(void) {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize mediump float radius
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize highp vec4 stroke_color
+#pragma mapbox: initialize mediump float stroke_width
+#pragma mapbox: initialize lowp float stroke_opacity
+vec2 extrude=vec2(mod(a_pos,2.0)*2.0-1.0);vec2 circle_center=floor(a_pos*0.5);float ele=get_elevation(circle_center);v_visibility=calculate_visibility(u_matrix*vec4(circle_center,ele,1.0));if (u_pitch_with_map) {vec2 corner_position=circle_center;if (u_scale_with_map) {corner_position+=extrude*(radius+stroke_width)*u_extrude_scale;} else {vec4 projected_center=u_matrix*vec4(circle_center,0,1);corner_position+=extrude*(radius+stroke_width)*u_extrude_scale*(projected_center.w/u_camera_to_center_distance);}gl_Position=u_matrix*vec4(corner_position,ele,1);} else {gl_Position=u_matrix*vec4(circle_center,ele,1);if (u_scale_with_map) {gl_Position.xy+=extrude*(radius+stroke_width)*u_extrude_scale*u_camera_to_center_distance;} else {gl_Position.xy+=extrude*(radius+stroke_width)*u_extrude_scale*gl_Position.w;}}float antialiasblur=-max(1.0/u_device_pixel_ratio/(radius+stroke_width),blur);v_data=vec3(extrude.x,extrude.y,antialiasblur);}`),clippingMask:Mt("void main() {gl_FragColor=vec4(1.0);}","attribute vec2 a_pos;uniform mat4 u_matrix;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);}"),heatmap:Mt(`uniform highp float u_intensity;varying vec2 v_extrude;
+#pragma mapbox: define highp float weight
+#define GAUSS_COEF 0.3989422804014327
+void main() {
+#pragma mapbox: initialize highp float weight
+float d=-0.5*3.0*3.0*dot(v_extrude,v_extrude);float val=weight*u_intensity*GAUSS_COEF*exp(d);gl_FragColor=vec4(val,1.0,1.0,1.0);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform float u_extrude_scale;uniform float u_opacity;uniform float u_intensity;attribute vec2 a_pos;varying vec2 v_extrude;
+#pragma mapbox: define highp float weight
+#pragma mapbox: define mediump float radius
+const highp float ZERO=1.0/255.0/16.0;
+#define GAUSS_COEF 0.3989422804014327
+void main(void) {
+#pragma mapbox: initialize highp float weight
+#pragma mapbox: initialize mediump float radius
+vec2 unscaled_extrude=vec2(mod(a_pos,2.0)*2.0-1.0);float S=sqrt(-2.0*log(ZERO/weight/u_intensity/GAUSS_COEF))/3.0;v_extrude=S*unscaled_extrude;vec2 extrude=v_extrude*radius*u_extrude_scale;vec4 pos=vec4(floor(a_pos*0.5)+extrude,get_elevation(floor(a_pos*0.5)),1);gl_Position=u_matrix*pos;}`),heatmapTexture:Mt(`uniform sampler2D u_image;uniform sampler2D u_color_ramp;uniform float u_opacity;varying vec2 v_pos;void main() {float t=texture2D(u_image,v_pos).r;vec4 color=texture2D(u_color_ramp,vec2(t,0.5));gl_FragColor=color*u_opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(0.0);
+#endif
+}`,"uniform mat4 u_matrix;uniform vec2 u_world;attribute vec2 a_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos*u_world,0,1);v_pos.x=a_pos.x;v_pos.y=1.0-a_pos.y;}"),collisionBox:Mt("varying float v_placed;varying float v_notUsed;void main() {float alpha=0.5;gl_FragColor=vec4(1.0,0.0,0.0,1.0)*alpha;if (v_placed > 0.5) {gl_FragColor=vec4(0.0,0.0,1.0,0.5)*alpha;}if (v_notUsed > 0.5) {gl_FragColor*=.1;}}","attribute vec2 a_anchor_pos;attribute vec2 a_placed;attribute vec2 a_box_real;uniform mat4 u_matrix;uniform vec2 u_pixel_extrude_scale;varying float v_placed;varying float v_notUsed;vec4 projectTileWithElevation(vec2 posInTile,float elevation) {return u_matrix*vec4(posInTile,elevation,1.0);}void main() {gl_Position=projectTileWithElevation(a_anchor_pos,get_elevation(a_anchor_pos));gl_Position.xy=((a_box_real+0.5)*u_pixel_extrude_scale*2.0-1.0)*vec2(1.0,-1.0)*gl_Position.w;if (gl_Position.z/gl_Position.w < 1.1) {gl_Position.z=0.5;}v_placed=a_placed.x;v_notUsed=a_placed.y;}"),collisionCircle:Mt("varying float v_radius;varying vec2 v_extrude;varying float v_perspective_ratio;varying float v_collision;void main() {float alpha=0.5*min(v_perspective_ratio,1.0);float stroke_radius=0.9*max(v_perspective_ratio,1.0);float distance_to_center=length(v_extrude);float distance_to_edge=abs(distance_to_center-v_radius);float opacity_t=smoothstep(-stroke_radius,0.0,-distance_to_edge);vec4 color=mix(vec4(0.0,0.0,1.0,0.5),vec4(1.0,0.0,0.0,1.0),v_collision);gl_FragColor=color*alpha*opacity_t;}","attribute vec2 a_pos;attribute float a_radius;attribute vec2 a_flags;uniform mat4 u_matrix;uniform mat4 u_inv_matrix;uniform vec2 u_viewport_size;uniform float u_camera_to_center_distance;varying float v_radius;varying vec2 v_extrude;varying float v_perspective_ratio;varying float v_collision;vec3 toTilePosition(vec2 screenPos) {vec4 rayStart=u_inv_matrix*vec4(screenPos,-1.0,1.0);vec4 rayEnd  =u_inv_matrix*vec4(screenPos, 1.0,1.0);rayStart.xyz/=rayStart.w;rayEnd.xyz  /=rayEnd.w;highp float t=(0.0-rayStart.z)/(rayEnd.z-rayStart.z);return mix(rayStart.xyz,rayEnd.xyz,t);}void main() {vec2 quadCenterPos=a_pos;float radius=a_radius;float collision=a_flags.x;float vertexIdx=a_flags.y;vec2 quadVertexOffset=vec2(mix(-1.0,1.0,float(vertexIdx >=2.0)),mix(-1.0,1.0,float(vertexIdx >=1.0 && vertexIdx <=2.0)));vec2 quadVertexExtent=quadVertexOffset*radius;vec3 tilePos=toTilePosition(quadCenterPos);vec4 clipPos=u_matrix*vec4(tilePos,1.0);highp float camera_to_anchor_distance=clipPos.w;highp float collision_perspective_ratio=clamp(0.5+0.5*(u_camera_to_center_distance/camera_to_anchor_distance),0.0,4.0);float padding_factor=1.2;v_radius=radius;v_extrude=quadVertexExtent*padding_factor;v_perspective_ratio=collision_perspective_ratio;v_collision=collision;gl_Position=vec4(clipPos.xyz/clipPos.w,1.0)+vec4(quadVertexExtent*padding_factor/u_viewport_size*2.0,0.0,0.0);}"),debug:Mt("uniform highp vec4 u_color;uniform sampler2D u_overlay;varying vec2 v_uv;void main() {vec4 overlay_color=texture2D(u_overlay,v_uv);gl_FragColor=mix(u_color,overlay_color,overlay_color.a);}","attribute vec2 a_pos;varying vec2 v_uv;uniform mat4 u_matrix;uniform float u_overlay_scale;void main() {v_uv=a_pos/8192.0;gl_Position=u_matrix*vec4(a_pos*u_overlay_scale,get_elevation(a_pos),1);}"),fill:Mt(`#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float opacity
+gl_FragColor=color*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`attribute vec2 a_pos;uniform mat4 u_matrix;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float opacity
+gl_Position=u_matrix*vec4(a_pos,0,1);}`),fillOutline:Mt(`varying vec2 v_pos;
+#pragma mapbox: define highp vec4 outline_color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 outline_color
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_pos-gl_FragCoord.xy);float alpha=1.0-smoothstep(0.0,1.0,dist);gl_FragColor=outline_color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`attribute vec2 a_pos;uniform mat4 u_matrix;uniform vec2 u_world;varying vec2 v_pos;
+#pragma mapbox: define highp vec4 outline_color
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 outline_color
+#pragma mapbox: initialize lowp float opacity
+gl_Position=u_matrix*vec4(a_pos,0,1);v_pos=(gl_Position.xy/gl_Position.w+1.0)/2.0*u_world;}`),fillOutlinePattern:Mt(`uniform vec2 u_texsize;uniform sampler2D u_image;uniform float u_fade;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec2 v_pos;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);float dist=length(v_pos-gl_FragCoord.xy);float alpha=1.0-smoothstep(0.0,1.0,dist);gl_FragColor=mix(color1,color2,u_fade)*alpha*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform vec2 u_world;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform vec3 u_scale;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec2 v_pos;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;gl_Position=u_matrix*vec4(a_pos,0,1);vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileRatio,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileRatio,a_pos);v_pos=(gl_Position.xy/gl_Position.w+1.0)/2.0*u_world;}`),fillPattern:Mt(`#ifdef GL_ES
+precision highp float;
+#endif
+uniform vec2 u_texsize;uniform float u_fade;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);gl_FragColor=mix(color1,color2,u_fade)*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform vec3 u_scale;attribute vec2 a_pos;varying vec2 v_pos_a;varying vec2 v_pos_b;
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileZoomRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;gl_Position=u_matrix*vec4(a_pos,0,1);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileZoomRatio,a_pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileZoomRatio,a_pos);}`),fillExtrusion:Mt(`varying vec4 v_color;void main() {gl_FragColor=v_color;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform vec3 u_lightcolor;uniform lowp vec3 u_lightpos;uniform lowp float u_lightintensity;uniform float u_vertical_gradient;uniform lowp float u_opacity;attribute vec2 a_pos;attribute vec4 a_normal_ed;
+#ifdef TERRAIN3D
+attribute vec2 a_centroid;
+#endif
+varying vec4 v_color;
+#pragma mapbox: define highp float base
+#pragma mapbox: define highp float height
+#pragma mapbox: define highp vec4 color
+void main() {
+#pragma mapbox: initialize highp float base
+#pragma mapbox: initialize highp float height
+#pragma mapbox: initialize highp vec4 color
+vec3 normal=a_normal_ed.xyz;
+#ifdef TERRAIN3D
+float height_terrain3d_offset=get_elevation(a_centroid);float base_terrain3d_offset=height_terrain3d_offset-(base > 0.0 ? 0.0 : 10.0);
+#else
+float height_terrain3d_offset=0.0;float base_terrain3d_offset=0.0;
+#endif
+base=max(0.0,base)+base_terrain3d_offset;height=max(0.0,height)+height_terrain3d_offset;float t=mod(normal.x,2.0);gl_Position=u_matrix*vec4(a_pos,t > 0.0 ? height : base,1);float colorvalue=color.r*0.2126+color.g*0.7152+color.b*0.0722;v_color=vec4(0.0,0.0,0.0,1.0);vec4 ambientlight=vec4(0.03,0.03,0.03,1.0);color+=ambientlight;float directional=clamp(dot(normal/16384.0,u_lightpos),0.0,1.0);directional=mix((1.0-u_lightintensity),max((1.0-colorvalue+u_lightintensity),1.0),directional);if (normal.y !=0.0) {directional*=((1.0-u_vertical_gradient)+(u_vertical_gradient*clamp((t+base)*pow(height/150.0,0.5),mix(0.7,0.98,1.0-u_lightintensity),1.0)));}v_color.r+=clamp(color.r*directional*u_lightcolor.r,mix(0.0,0.3,1.0-u_lightcolor.r),1.0);v_color.g+=clamp(color.g*directional*u_lightcolor.g,mix(0.0,0.3,1.0-u_lightcolor.g),1.0);v_color.b+=clamp(color.b*directional*u_lightcolor.b,mix(0.0,0.3,1.0-u_lightcolor.b),1.0);v_color*=u_opacity;}`),fillExtrusionPattern:Mt(`uniform vec2 u_texsize;uniform float u_fade;uniform sampler2D u_image;varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec4 v_lighting;
+#pragma mapbox: define lowp float base
+#pragma mapbox: define lowp float height
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float base
+#pragma mapbox: initialize lowp float height
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;vec2 imagecoord=mod(v_pos_a,1.0);vec2 pos=mix(pattern_tl_a/u_texsize,pattern_br_a/u_texsize,imagecoord);vec4 color1=texture2D(u_image,pos);vec2 imagecoord_b=mod(v_pos_b,1.0);vec2 pos2=mix(pattern_tl_b/u_texsize,pattern_br_b/u_texsize,imagecoord_b);vec4 color2=texture2D(u_image,pos2);vec4 mixedColor=mix(color1,color2,u_fade);gl_FragColor=mixedColor*v_lighting;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`uniform mat4 u_matrix;uniform vec2 u_pixel_coord_upper;uniform vec2 u_pixel_coord_lower;uniform float u_height_factor;uniform vec3 u_scale;uniform float u_vertical_gradient;uniform lowp float u_opacity;uniform vec3 u_lightcolor;uniform lowp vec3 u_lightpos;uniform lowp float u_lightintensity;attribute vec2 a_pos;attribute vec4 a_normal_ed;
+#ifdef TERRAIN3D
+attribute vec2 a_centroid;
+#endif
+varying vec2 v_pos_a;varying vec2 v_pos_b;varying vec4 v_lighting;
+#pragma mapbox: define lowp float base
+#pragma mapbox: define lowp float height
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float base
+#pragma mapbox: initialize lowp float height
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec3 normal=a_normal_ed.xyz;float edgedistance=a_normal_ed.w;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;
+#ifdef TERRAIN3D
+float height_terrain3d_offset=get_elevation(a_centroid);float base_terrain3d_offset=height_terrain3d_offset-(base > 0.0 ? 0.0 : 10.0);
+#else
+float height_terrain3d_offset=0.0;float base_terrain3d_offset=0.0;
+#endif
+base=max(0.0,base)+base_terrain3d_offset;height=max(0.0,height)+height_terrain3d_offset;float t=mod(normal.x,2.0);float z=t > 0.0 ? height : base;gl_Position=u_matrix*vec4(a_pos,z,1);vec2 pos=normal.x==1.0 && normal.y==0.0 && normal.z==16384.0
+? a_pos
+: vec2(edgedistance,z*u_height_factor);v_pos_a=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,fromScale*display_size_a,tileRatio,pos);v_pos_b=get_pattern_pos(u_pixel_coord_upper,u_pixel_coord_lower,toScale*display_size_b,tileRatio,pos);v_lighting=vec4(0.0,0.0,0.0,1.0);float directional=clamp(dot(normal/16383.0,u_lightpos),0.0,1.0);directional=mix((1.0-u_lightintensity),max((0.5+u_lightintensity),1.0),directional);if (normal.y !=0.0) {directional*=((1.0-u_vertical_gradient)+(u_vertical_gradient*clamp((t+base)*pow(height/150.0,0.5),mix(0.7,0.98,1.0-u_lightintensity),1.0)));}v_lighting.rgb+=clamp(directional*u_lightcolor,mix(vec3(0.0),vec3(0.3),1.0-u_lightcolor),vec3(1.0));v_lighting*=u_opacity;}`),hillshadePrepare:Mt(`#ifdef GL_ES
+precision highp float;
+#endif
+uniform sampler2D u_image;varying vec2 v_pos;uniform vec2 u_dimension;uniform float u_zoom;uniform vec4 u_unpack;float getElevation(vec2 coord,float bias) {vec4 data=texture2D(u_image,coord)*255.0;data.a=-1.0;return dot(data,u_unpack)/4.0;}void main() {vec2 epsilon=1.0/u_dimension;float a=getElevation(v_pos+vec2(-epsilon.x,-epsilon.y),0.0);float b=getElevation(v_pos+vec2(0,-epsilon.y),0.0);float c=getElevation(v_pos+vec2(epsilon.x,-epsilon.y),0.0);float d=getElevation(v_pos+vec2(-epsilon.x,0),0.0);float e=getElevation(v_pos,0.0);float f=getElevation(v_pos+vec2(epsilon.x,0),0.0);float g=getElevation(v_pos+vec2(-epsilon.x,epsilon.y),0.0);float h=getElevation(v_pos+vec2(0,epsilon.y),0.0);float i=getElevation(v_pos+vec2(epsilon.x,epsilon.y),0.0);float exaggerationFactor=u_zoom < 2.0 ? 0.4 : u_zoom < 4.5 ? 0.35 : 0.3;float exaggeration=u_zoom < 15.0 ? (u_zoom-15.0)*exaggerationFactor : 0.0;vec2 deriv=vec2((c+f+f+i)-(a+d+d+g),(g+h+h+i)-(a+b+b+c))/pow(2.0,exaggeration+(19.2562-u_zoom));gl_FragColor=clamp(vec4(deriv.x/2.0+0.5,deriv.y/2.0+0.5,1.0,1.0),0.0,1.0);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,"uniform mat4 u_matrix;uniform vec2 u_dimension;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);highp vec2 epsilon=1.0/u_dimension;float scale=(u_dimension.x-2.0)/u_dimension.x;v_pos=(a_texture_pos/8192.0)*scale+epsilon;}"),hillshade:Mt(`uniform sampler2D u_image;varying vec2 v_pos;uniform vec2 u_latrange;uniform vec2 u_light;uniform vec4 u_shadow;uniform vec4 u_highlight;uniform vec4 u_accent;
+#define PI 3.141592653589793
+void main() {vec4 pixel=texture2D(u_image,v_pos);vec2 deriv=((pixel.rg*2.0)-1.0);float scaleFactor=cos(radians((u_latrange[0]-u_latrange[1])*(1.0-v_pos.y)+u_latrange[1]));float slope=atan(1.25*length(deriv)/scaleFactor);float aspect=deriv.x !=0.0 ? atan(deriv.y,-deriv.x) : PI/2.0*(deriv.y > 0.0 ? 1.0 :-1.0);float intensity=u_light.x;float azimuth=u_light.y+PI;float base=1.875-intensity*1.75;float maxValue=0.5*PI;float scaledSlope=intensity !=0.5 ? ((pow(base,slope)-1.0)/(pow(base,maxValue)-1.0))*maxValue : slope;float accent=cos(scaledSlope);vec4 accent_color=(1.0-accent)*u_accent*clamp(intensity*2.0,0.0,1.0);float shade=abs(mod((aspect+azimuth)/PI+0.5,2.0)-1.0);vec4 shade_color=mix(u_shadow,u_highlight,shade)*sin(scaledSlope)*clamp(intensity*2.0,0.0,1.0);gl_FragColor=accent_color*(1.0-shade_color.a)+shade_color;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,"uniform mat4 u_matrix;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos=a_texture_pos/8192.0;}"),line:Mt(`uniform lowp float u_device_pixel_ratio;varying vec2 v_width2;varying vec2 v_normal;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`
+#define scale 0.015873016
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform vec2 u_units_to_pixels;uniform lowp float u_device_pixel_ratio;varying vec2 v_normal;varying vec2 v_width2;varying float v_gamma_scale;varying highp float v_linesofar;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;v_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*2.0;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;
+#ifdef TERRAIN3D
+v_gamma_scale=1.0;
+#else
+float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;
+#endif
+v_width2=vec2(outset,inset);}`),lineGradient:Mt(`uniform lowp float u_device_pixel_ratio;uniform sampler2D u_image;varying vec2 v_width2;varying vec2 v_normal;varying float v_gamma_scale;varying highp vec2 v_uv;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);vec4 color=texture2D(u_image,v_uv);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`
+#define scale 0.015873016
+attribute vec2 a_pos_normal;attribute vec4 a_data;attribute float a_uv_x;attribute float a_split_index;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;uniform vec2 u_units_to_pixels;uniform float u_image_height;varying vec2 v_normal;varying vec2 v_width2;varying float v_gamma_scale;varying highp vec2 v_uv;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;highp float texel_height=1.0/u_image_height;highp float half_texel_height=0.5*texel_height;v_uv=vec2(a_uv_x,a_split_index*texel_height-half_texel_height);vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;
+#ifdef TERRAIN3D
+v_gamma_scale=1.0;
+#else
+float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;
+#endif
+v_width2=vec2(outset,inset);}`),linePattern:Mt(`#ifdef GL_ES
+precision highp float;
+#endif
+uniform lowp float u_device_pixel_ratio;uniform vec2 u_texsize;uniform float u_fade;uniform mediump vec3 u_scale;uniform sampler2D u_image;varying vec2 v_normal;varying vec2 v_width2;varying float v_linesofar;varying float v_gamma_scale;varying float v_width;
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+vec2 pattern_tl_a=pattern_from.xy;vec2 pattern_br_a=pattern_from.zw;vec2 pattern_tl_b=pattern_to.xy;vec2 pattern_br_b=pattern_to.zw;float tileZoomRatio=u_scale.x;float fromScale=u_scale.y;float toScale=u_scale.z;vec2 display_size_a=(pattern_br_a-pattern_tl_a)/pixel_ratio_from;vec2 display_size_b=(pattern_br_b-pattern_tl_b)/pixel_ratio_to;vec2 pattern_size_a=vec2(display_size_a.x*fromScale/tileZoomRatio,display_size_a.y);vec2 pattern_size_b=vec2(display_size_b.x*toScale/tileZoomRatio,display_size_b.y);float aspect_a=display_size_a.y/v_width;float aspect_b=display_size_b.y/v_width;float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);float x_a=mod(v_linesofar/pattern_size_a.x*aspect_a,1.0);float x_b=mod(v_linesofar/pattern_size_b.x*aspect_b,1.0);float y=0.5*v_normal.y+0.5;vec2 texel_size=1.0/u_texsize;vec2 pos_a=mix(pattern_tl_a*texel_size-texel_size,pattern_br_a*texel_size+texel_size,vec2(x_a,y));vec2 pos_b=mix(pattern_tl_b*texel_size-texel_size,pattern_br_b*texel_size+texel_size,vec2(x_b,y));vec4 color=mix(texture2D(u_image,pos_a),texture2D(u_image,pos_b),u_fade);gl_FragColor=color*alpha*opacity;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`
+#define scale 0.015873016
+#define LINE_DISTANCE_SCALE 2.0
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform vec2 u_units_to_pixels;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;varying vec2 v_normal;varying vec2 v_width2;varying float v_linesofar;varying float v_gamma_scale;varying float v_width;
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+#pragma mapbox: define lowp vec4 pattern_from
+#pragma mapbox: define lowp vec4 pattern_to
+#pragma mapbox: define lowp float pixel_ratio_from
+#pragma mapbox: define lowp float pixel_ratio_to
+void main() {
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+#pragma mapbox: initialize mediump vec4 pattern_from
+#pragma mapbox: initialize mediump vec4 pattern_to
+#pragma mapbox: initialize lowp float pixel_ratio_from
+#pragma mapbox: initialize lowp float pixel_ratio_to
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;float a_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*LINE_DISTANCE_SCALE;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;
+#ifdef TERRAIN3D
+v_gamma_scale=1.0;
+#else
+float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;
+#endif
+v_linesofar=a_linesofar;v_width2=vec2(outset,inset);v_width=floorwidth;}`),lineSDF:Mt(`uniform lowp float u_device_pixel_ratio;uniform sampler2D u_image;uniform float u_sdfgamma;uniform float u_mix;varying vec2 v_normal;varying vec2 v_width2;varying vec2 v_tex_a;varying vec2 v_tex_b;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+float dist=length(v_normal)*v_width2.s;float blur2=(blur+1.0/u_device_pixel_ratio)*v_gamma_scale;float alpha=clamp(min(dist-(v_width2.t-blur2),v_width2.s-dist)/blur2,0.0,1.0);float sdfdist_a=texture2D(u_image,v_tex_a).a;float sdfdist_b=texture2D(u_image,v_tex_b).a;float sdfdist=mix(sdfdist_a,sdfdist_b,u_mix);alpha*=smoothstep(0.5-u_sdfgamma/floorwidth,0.5+u_sdfgamma/floorwidth,sdfdist);gl_FragColor=color*(alpha*opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`
+#define scale 0.015873016
+#define LINE_DISTANCE_SCALE 2.0
+attribute vec2 a_pos_normal;attribute vec4 a_data;uniform mat4 u_matrix;uniform mediump float u_ratio;uniform lowp float u_device_pixel_ratio;uniform vec2 u_patternscale_a;uniform float u_tex_y_a;uniform vec2 u_patternscale_b;uniform float u_tex_y_b;uniform vec2 u_units_to_pixels;varying vec2 v_normal;varying vec2 v_width2;varying vec2 v_tex_a;varying vec2 v_tex_b;varying float v_gamma_scale;
+#pragma mapbox: define highp vec4 color
+#pragma mapbox: define lowp float blur
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define mediump float gapwidth
+#pragma mapbox: define lowp float offset
+#pragma mapbox: define mediump float width
+#pragma mapbox: define lowp float floorwidth
+void main() {
+#pragma mapbox: initialize highp vec4 color
+#pragma mapbox: initialize lowp float blur
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize mediump float gapwidth
+#pragma mapbox: initialize lowp float offset
+#pragma mapbox: initialize mediump float width
+#pragma mapbox: initialize lowp float floorwidth
+float ANTIALIASING=1.0/u_device_pixel_ratio/2.0;vec2 a_extrude=a_data.xy-128.0;float a_direction=mod(a_data.z,4.0)-1.0;float a_linesofar=(floor(a_data.z/4.0)+a_data.w*64.0)*LINE_DISTANCE_SCALE;vec2 pos=floor(a_pos_normal*0.5);mediump vec2 normal=a_pos_normal-2.0*pos;normal.y=normal.y*2.0-1.0;v_normal=normal;gapwidth=gapwidth/2.0;float halfwidth=width/2.0;offset=-1.0*offset;float inset=gapwidth+(gapwidth > 0.0 ? ANTIALIASING : 0.0);float outset=gapwidth+halfwidth*(gapwidth > 0.0 ? 2.0 : 1.0)+(halfwidth==0.0 ? 0.0 : ANTIALIASING);mediump vec2 dist=outset*a_extrude*scale;mediump float u=0.5*a_direction;mediump float t=1.0-abs(u);mediump vec2 offset2=offset*a_extrude*scale*normal.y*mat2(t,-u,u,t);vec4 projected_extrude=u_matrix*vec4(dist/u_ratio,0.0,0.0);gl_Position=u_matrix*vec4(pos+offset2/u_ratio,0.0,1.0)+projected_extrude;
+#ifdef TERRAIN3D
+v_gamma_scale=1.0;
+#else
+float extrude_length_without_perspective=length(dist);float extrude_length_with_perspective=length(projected_extrude.xy/gl_Position.w*u_units_to_pixels);v_gamma_scale=extrude_length_without_perspective/extrude_length_with_perspective;
+#endif
+v_tex_a=vec2(a_linesofar*u_patternscale_a.x/floorwidth,normal.y*u_patternscale_a.y+u_tex_y_a);v_tex_b=vec2(a_linesofar*u_patternscale_b.x/floorwidth,normal.y*u_patternscale_b.y+u_tex_y_b);v_width2=vec2(outset,inset);}`),raster:Mt(`uniform float u_fade_t;uniform float u_opacity;uniform sampler2D u_image0;uniform sampler2D u_image1;varying vec2 v_pos0;varying vec2 v_pos1;uniform float u_brightness_low;uniform float u_brightness_high;uniform float u_saturation_factor;uniform float u_contrast_factor;uniform vec3 u_spin_weights;void main() {vec4 color0=texture2D(u_image0,v_pos0);vec4 color1=texture2D(u_image1,v_pos1);if (color0.a > 0.0) {color0.rgb=color0.rgb/color0.a;}if (color1.a > 0.0) {color1.rgb=color1.rgb/color1.a;}vec4 color=mix(color0,color1,u_fade_t);color.a*=u_opacity;vec3 rgb=color.rgb;rgb=vec3(dot(rgb,u_spin_weights.xyz),dot(rgb,u_spin_weights.zxy),dot(rgb,u_spin_weights.yzx));float average=(color.r+color.g+color.b)/3.0;rgb+=(average-rgb)*u_saturation_factor;rgb=(rgb-0.5)*u_contrast_factor+0.5;vec3 u_high_vec=vec3(u_brightness_low,u_brightness_low,u_brightness_low);vec3 u_low_vec=vec3(u_brightness_high,u_brightness_high,u_brightness_high);gl_FragColor=vec4(mix(u_high_vec,u_low_vec,rgb)*color.a,color.a);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,"uniform mat4 u_matrix;uniform vec2 u_tl_parent;uniform float u_scale_parent;uniform float u_buffer_scale;attribute vec2 a_pos;attribute vec2 a_texture_pos;varying vec2 v_pos0;varying vec2 v_pos1;void main() {gl_Position=u_matrix*vec4(a_pos,0,1);v_pos0=(((a_texture_pos/8192.0)-0.5)/u_buffer_scale )+0.5;v_pos1=(v_pos0*u_scale_parent)+u_tl_parent;}"),symbolIcon:Mt(`uniform sampler2D u_texture;varying vec2 v_tex;varying float v_fade_opacity;
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float opacity
+lowp float alpha=opacity*v_fade_opacity;gl_FragColor=texture2D(u_texture,v_tex)*alpha;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec4 a_pixeloffset;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform highp float u_camera_to_center_distance;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform float u_fade_change;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform vec2 u_texsize;uniform bool u_is_along_line;uniform bool u_is_variable_anchor;uniform vec2 u_translation;uniform float u_pitched_scale;varying vec2 v_tex;varying float v_fade_opacity;vec4 projectTileWithElevation(vec2 posInTile,float elevation) {return u_matrix*vec4(posInTile,elevation,1.0);}
+#pragma mapbox: define lowp float opacity
+void main() {
+#pragma mapbox: initialize lowp float opacity
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);vec2 a_pxoffset=a_pixeloffset.xy;vec2 a_minFontScale=a_pixeloffset.zw/256.0;float ele=get_elevation(a_pos);highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec2 translated_a_pos=a_pos+u_translation;vec4 projectedPoint=projectTileWithElevation(translated_a_pos,ele);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=u_is_text ? size/24.0 : size;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=projectTileWithElevation(translated_a_pos+vec2(1,0),ele);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos;if (u_is_along_line || u_is_variable_anchor) {projected_pos=vec4(a_projected_pos.xy,ele,1.0);} else if (u_pitch_with_map) {projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy+u_translation,ele,1.0);} else {projected_pos=u_label_plane_matrix*projectTileWithElevation(a_projected_pos.xy+u_translation,ele);}float z=float(u_pitch_with_map)*projected_pos.z/projected_pos.w;float projectionScaling=1.0;vec4 finalPos=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*max(a_minFontScale,fontScale)+a_pxoffset/16.0)*projectionScaling,z,1.0);if(u_pitch_with_map) {finalPos=projectTileWithElevation(finalPos.xy,finalPos.z);}gl_Position=finalPos;v_tex=a_tex/u_texsize;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;float visibility=calculate_visibility(projectedPoint);v_fade_opacity=max(0.0,min(visibility,fade_opacity[0]+fade_change));}`),symbolSDF:Mt(`#define SDF_PX 8.0
+uniform bool u_is_halo;uniform sampler2D u_texture;uniform highp float u_gamma_scale;uniform lowp float u_device_pixel_ratio;uniform bool u_is_text;varying vec2 v_data0;varying vec3 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+float EDGE_GAMMA=0.105/u_device_pixel_ratio;vec2 tex=v_data0.xy;float gamma_scale=v_data1.x;float size=v_data1.y;float fade_opacity=v_data1[2];float fontScale=u_is_text ? size/24.0 : size;lowp vec4 color=fill_color;highp float gamma=EDGE_GAMMA/(fontScale*u_gamma_scale);lowp float inner_edge=(256.0-64.0)/256.0;if (u_is_halo) {color=halo_color;gamma=(halo_blur*1.19/SDF_PX+EDGE_GAMMA)/(fontScale*u_gamma_scale);inner_edge=inner_edge+gamma*gamma_scale;}lowp float dist=texture2D(u_texture,tex).a;highp float gamma_scaled=gamma*gamma_scale;highp float alpha=smoothstep(inner_edge-gamma_scaled,inner_edge+gamma_scaled,dist);if (u_is_halo) {lowp float halo_edge=(6.0-halo_width/fontScale)/SDF_PX;alpha=min(smoothstep(halo_edge-gamma_scaled,halo_edge+gamma_scaled,dist),1.0-alpha);}gl_FragColor=color*(alpha*opacity*fade_opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec4 a_pixeloffset;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform bool u_is_along_line;uniform bool u_is_variable_anchor;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform highp float u_camera_to_center_distance;uniform float u_fade_change;uniform vec2 u_texsize;uniform vec2 u_translation;uniform float u_pitched_scale;varying vec2 v_data0;varying vec3 v_data1;vec4 projectTileWithElevation(vec2 posInTile,float elevation) {return u_matrix*vec4(posInTile,elevation,1.0);}
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);vec2 a_pxoffset=a_pixeloffset.xy;float ele=get_elevation(a_pos);highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec2 translated_a_pos=a_pos+u_translation;vec4 projectedPoint=projectTileWithElevation(translated_a_pos,ele);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=u_is_text ? size/24.0 : size;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=projectTileWithElevation(translated_a_pos+vec2(1,0),ele);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos;if (u_is_along_line || u_is_variable_anchor) {projected_pos=vec4(a_projected_pos.xy,ele,1.0);} else if (u_pitch_with_map) {projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy+u_translation,ele,1.0);} else {projected_pos=u_label_plane_matrix*projectTileWithElevation(a_projected_pos.xy+u_translation,ele);}float z=float(u_pitch_with_map)*projected_pos.z/projected_pos.w;float projectionScaling=1.0;vec4 finalPos=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*fontScale+a_pxoffset)*projectionScaling,z,1.0);if(u_pitch_with_map) {finalPos=projectTileWithElevation(finalPos.xy,finalPos.z);}float gamma_scale=finalPos.w;gl_Position=finalPos;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float visibility=calculate_visibility(projectedPoint);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;float interpolated_fade_opacity=max(0.0,min(visibility,fade_opacity[0]+fade_change));v_data0=a_tex/u_texsize;v_data1=vec3(gamma_scale,size,interpolated_fade_opacity);}`),symbolTextAndIcon:Mt(`#define SDF_PX 8.0
+#define SDF 1.0
+#define ICON 0.0
+uniform bool u_is_halo;uniform sampler2D u_texture;uniform sampler2D u_texture_icon;uniform highp float u_gamma_scale;uniform lowp float u_device_pixel_ratio;varying vec4 v_data0;varying vec4 v_data1;
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+float fade_opacity=v_data1[2];if (v_data1.w==ICON) {vec2 tex_icon=v_data0.zw;lowp float alpha=opacity*fade_opacity;gl_FragColor=texture2D(u_texture_icon,tex_icon)*alpha;
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+return;}vec2 tex=v_data0.xy;float EDGE_GAMMA=0.105/u_device_pixel_ratio;float gamma_scale=v_data1.x;float size=v_data1.y;float fontScale=size/24.0;lowp vec4 color=fill_color;highp float gamma=EDGE_GAMMA/(fontScale*u_gamma_scale);lowp float buff=(256.0-64.0)/256.0;if (u_is_halo) {color=halo_color;gamma=(halo_blur*1.19/SDF_PX+EDGE_GAMMA)/(fontScale*u_gamma_scale);buff=(6.0-halo_width/fontScale)/SDF_PX;}lowp float dist=texture2D(u_texture,tex).a;highp float gamma_scaled=gamma*gamma_scale;highp float alpha=smoothstep(buff-gamma_scaled,buff+gamma_scaled,dist);gl_FragColor=color*(alpha*opacity*fade_opacity);
+#ifdef OVERDRAW_INSPECTOR
+gl_FragColor=vec4(1.0);
+#endif
+}`,`attribute vec4 a_pos_offset;attribute vec4 a_data;attribute vec3 a_projected_pos;attribute float a_fade_opacity;uniform bool u_is_size_zoom_constant;uniform bool u_is_size_feature_constant;uniform highp float u_size_t;uniform highp float u_size;uniform mat4 u_matrix;uniform mat4 u_label_plane_matrix;uniform mat4 u_coord_matrix;uniform bool u_is_text;uniform bool u_pitch_with_map;uniform highp float u_pitch;uniform bool u_rotate_symbol;uniform highp float u_aspect_ratio;uniform highp float u_camera_to_center_distance;uniform float u_fade_change;uniform vec2 u_texsize;uniform vec2 u_texsize_icon;uniform bool u_is_along_line;uniform bool u_is_variable_anchor;uniform vec2 u_translation;uniform float u_pitched_scale;varying vec4 v_data0;varying vec4 v_data1;vec4 projectTileWithElevation(vec2 posInTile,float elevation) {return u_matrix*vec4(posInTile,elevation,1.0);}
+#pragma mapbox: define highp vec4 fill_color
+#pragma mapbox: define highp vec4 halo_color
+#pragma mapbox: define lowp float opacity
+#pragma mapbox: define lowp float halo_width
+#pragma mapbox: define lowp float halo_blur
+void main() {
+#pragma mapbox: initialize highp vec4 fill_color
+#pragma mapbox: initialize highp vec4 halo_color
+#pragma mapbox: initialize lowp float opacity
+#pragma mapbox: initialize lowp float halo_width
+#pragma mapbox: initialize lowp float halo_blur
+vec2 a_pos=a_pos_offset.xy;vec2 a_offset=a_pos_offset.zw;vec2 a_tex=a_data.xy;vec2 a_size=a_data.zw;float a_size_min=floor(a_size[0]*0.5);float is_sdf=a_size[0]-2.0*a_size_min;float ele=get_elevation(a_pos);highp float segment_angle=-a_projected_pos[2];float size;if (!u_is_size_zoom_constant && !u_is_size_feature_constant) {size=mix(a_size_min,a_size[1],u_size_t)/128.0;} else if (u_is_size_zoom_constant && !u_is_size_feature_constant) {size=a_size_min/128.0;} else {size=u_size;}vec2 translated_a_pos=a_pos+u_translation;vec4 projectedPoint=projectTileWithElevation(translated_a_pos,ele);highp float camera_to_anchor_distance=projectedPoint.w;highp float distance_ratio=u_pitch_with_map ?
+camera_to_anchor_distance/u_camera_to_center_distance :
+u_camera_to_center_distance/camera_to_anchor_distance;highp float perspective_ratio=clamp(0.5+0.5*distance_ratio,0.0,4.0);size*=perspective_ratio;float fontScale=size/24.0;highp float symbol_rotation=0.0;if (u_rotate_symbol) {vec4 offsetProjectedPoint=projectTileWithElevation(translated_a_pos+vec2(1,0),ele);vec2 a=projectedPoint.xy/projectedPoint.w;vec2 b=offsetProjectedPoint.xy/offsetProjectedPoint.w;symbol_rotation=atan((b.y-a.y)/u_aspect_ratio,b.x-a.x);}highp float angle_sin=sin(segment_angle+symbol_rotation);highp float angle_cos=cos(segment_angle+symbol_rotation);mat2 rotation_matrix=mat2(angle_cos,-1.0*angle_sin,angle_sin,angle_cos);vec4 projected_pos;if (u_is_along_line || u_is_variable_anchor) {projected_pos=vec4(a_projected_pos.xy,ele,1.0);} else if (u_pitch_with_map) {projected_pos=u_label_plane_matrix*vec4(a_projected_pos.xy+u_translation,ele,1.0);} else {projected_pos=u_label_plane_matrix*projectTileWithElevation(a_projected_pos.xy+u_translation,ele);}float z=float(u_pitch_with_map)*projected_pos.z/projected_pos.w;float projectionScaling=1.0;vec4 finalPos=u_coord_matrix*vec4(projected_pos.xy/projected_pos.w+rotation_matrix*(a_offset/32.0*fontScale)*projectionScaling,z,1.0);if(u_pitch_with_map) {finalPos=projectTileWithElevation(finalPos.xy,finalPos.z);}float gamma_scale=finalPos.w;gl_Position=finalPos;vec2 fade_opacity=unpack_opacity(a_fade_opacity);float visibility=calculate_visibility(projectedPoint);float fade_change=fade_opacity[1] > 0.5 ? u_fade_change :-u_fade_change;float interpolated_fade_opacity=max(0.0,min(visibility,fade_opacity[0]+fade_change));v_data0.xy=a_tex/u_texsize;v_data0.zw=a_tex/u_texsize_icon;v_data1=vec4(gamma_scale,size,interpolated_fade_opacity,is_sdf);}`),terrain:Mt("uniform sampler2D u_texture;uniform vec4 u_fog_color;uniform vec4 u_horizon_color;uniform float u_fog_ground_blend;uniform float u_fog_ground_blend_opacity;uniform float u_horizon_fog_blend;varying vec2 v_texture_pos;varying float v_fog_depth;const float gamma=2.2;vec4 gammaToLinear(vec4 color) {return pow(color,vec4(gamma));}vec4 linearToGamma(vec4 color) {return pow(color,vec4(1.0/gamma));}void main() {vec4 surface_color=texture2D(u_texture,v_texture_pos);if (v_fog_depth > u_fog_ground_blend) {vec4 surface_color_linear=gammaToLinear(surface_color);float blend_color=smoothstep(0.0,1.0,max((v_fog_depth-u_horizon_fog_blend)/(1.0-u_horizon_fog_blend),0.0));vec4 fog_horizon_color_linear=mix(gammaToLinear(u_fog_color),gammaToLinear(u_horizon_color),blend_color);float factor_fog=max(v_fog_depth-u_fog_ground_blend,0.0)/(1.0-u_fog_ground_blend);gl_FragColor=linearToGamma(mix(surface_color_linear,fog_horizon_color_linear,pow(factor_fog,2.0)*u_fog_ground_blend_opacity));} else {gl_FragColor=surface_color;}}","attribute vec3 a_pos3d;uniform mat4 u_matrix;uniform mat4 u_fog_matrix;uniform float u_ele_delta;varying vec2 v_texture_pos;varying float v_fog_depth;void main() {float ele=get_elevation(a_pos3d.xy);float ele_delta=a_pos3d.z==1.0 ? u_ele_delta : 0.0;v_texture_pos=a_pos3d.xy/8192.0;gl_Position=u_matrix*vec4(a_pos3d.xy,ele-ele_delta,1.0);vec4 pos=u_fog_matrix*vec4(a_pos3d.xy,ele,1.0);v_fog_depth=pos.z/pos.w*0.5+0.5;}"),terrainDepth:Mt("varying float v_depth;const highp vec4 bitSh=vec4(256.*256.*256.,256.*256.,256.,1.);const highp vec4 bitMsk=vec4(0.,vec3(1./256.0));highp vec4 pack(highp float value) {highp vec4 comp=fract(value*bitSh);comp-=comp.xxyz*bitMsk;return comp;}void main() {gl_FragColor=pack(v_depth);}","attribute vec3 a_pos3d;uniform mat4 u_matrix;uniform float u_ele_delta;varying float v_depth;void main() {float ele=get_elevation(a_pos3d.xy);float ele_delta=a_pos3d.z==1.0 ? u_ele_delta : 0.0;gl_Position=u_matrix*vec4(a_pos3d.xy,ele-ele_delta,1.0);v_depth=gl_Position.z/gl_Position.w;}"),terrainCoords:Mt("precision mediump float;uniform sampler2D u_texture;uniform float u_terrain_coords_id;varying vec2 v_texture_pos;void main() {vec4 rgba=texture2D(u_texture,v_texture_pos);gl_FragColor=vec4(rgba.r,rgba.g,rgba.b,u_terrain_coords_id);}","attribute vec3 a_pos3d;uniform mat4 u_matrix;uniform float u_ele_delta;varying vec2 v_texture_pos;void main() {float ele=get_elevation(a_pos3d.xy);float ele_delta=a_pos3d.z==1.0 ? u_ele_delta : 0.0;v_texture_pos=a_pos3d.xy/8192.0;gl_Position=u_matrix*vec4(a_pos3d.xy,ele-ele_delta,1.0);}"),sky:Mt("uniform vec4 u_sky_color;uniform vec4 u_horizon_color;uniform float u_horizon;uniform float u_sky_horizon_blend;void main() {float y=gl_FragCoord.y;if (y > u_horizon) {float blend=y-u_horizon;if (blend < u_sky_horizon_blend) {gl_FragColor=mix(u_sky_color,u_horizon_color,pow(1.0-blend/u_sky_horizon_blend,2.0));} else {gl_FragColor=u_sky_color;}}}","attribute vec2 a_pos;void main() {gl_Position=vec4(a_pos,1.0,1.0);}")};function Mt(ue,w){let B=/#pragma mapbox: ([\w]+) ([\w]+) ([\w]+) ([\w]+)/g,Q=w.match(/attribute ([\w]+) ([\w]+)/g),ee=ue.match(/uniform ([\w]+) ([\w]+)([\s]*)([\w]*)/g),le=w.match(/uniform ([\w]+) ([\w]+)([\s]*)([\w]*)/g),qe=le?le.concat(ee):ee,Xe={};return{fragmentSource:ue=ue.replace(B,(ot,Tt,Yt,Kt,xr)=>(Xe[xr]=!0,Tt==="define"?`
+#ifndef HAS_UNIFORM_u_${xr}
+varying ${Yt} ${Kt} ${xr};
+#else
+uniform ${Yt} ${Kt} u_${xr};
+#endif
+`:`
+#ifdef HAS_UNIFORM_u_${xr}
+    ${Yt} ${Kt} ${xr} = u_${xr};
+#endif
+`)),vertexSource:w=w.replace(B,(ot,Tt,Yt,Kt,xr)=>{let Ir=Kt==="float"?"vec2":"vec4",ve=xr.match(/color/)?"color":Ir;return Xe[xr]?Tt==="define"?`
+#ifndef HAS_UNIFORM_u_${xr}
+uniform lowp float u_${xr}_t;
+attribute ${Yt} ${Ir} a_${xr};
+varying ${Yt} ${Kt} ${xr};
+#else
+uniform ${Yt} ${Kt} u_${xr};
+#endif
+`:ve==="vec4"?`
+#ifndef HAS_UNIFORM_u_${xr}
+    ${xr} = a_${xr};
+#else
+    ${Yt} ${Kt} ${xr} = u_${xr};
+#endif
+`:`
+#ifndef HAS_UNIFORM_u_${xr}
+    ${xr} = unpack_mix_${ve}(a_${xr}, u_${xr}_t);
+#else
+    ${Yt} ${Kt} ${xr} = u_${xr};
+#endif
+`:Tt==="define"?`
+#ifndef HAS_UNIFORM_u_${xr}
+uniform lowp float u_${xr}_t;
+attribute ${Yt} ${Ir} a_${xr};
+#else
+uniform ${Yt} ${Kt} u_${xr};
+#endif
+`:ve==="vec4"?`
+#ifndef HAS_UNIFORM_u_${xr}
+    ${Yt} ${Kt} ${xr} = a_${xr};
+#else
+    ${Yt} ${Kt} ${xr} = u_${xr};
+#endif
+`:`
+#ifndef HAS_UNIFORM_u_${xr}
+    ${Yt} ${Kt} ${xr} = unpack_mix_${ve}(a_${xr}, u_${xr}_t);
+#else
+    ${Yt} ${Kt} ${xr} = u_${xr};
+#endif
+`}),staticAttributes:Q,staticUniforms:qe}}class kr{constructor(){this.boundProgram=null,this.boundLayoutVertexBuffer=null,this.boundPaintVertexBuffers=[],this.boundIndexBuffer=null,this.boundVertexOffset=null,this.boundDynamicVertexBuffer=null,this.vao=null}bind(w,B,Q,ee,le,qe,Xe,ot,Tt){this.context=w;let Yt=this.boundPaintVertexBuffers.length!==ee.length;for(let Kt=0;!Yt&&Kt<ee.length;Kt++)this.boundPaintVertexBuffers[Kt]!==ee[Kt]&&(Yt=!0);!this.vao||this.boundProgram!==B||this.boundLayoutVertexBuffer!==Q||Yt||this.boundIndexBuffer!==le||this.boundVertexOffset!==qe||this.boundDynamicVertexBuffer!==Xe||this.boundDynamicVertexBuffer2!==ot||this.boundDynamicVertexBuffer3!==Tt?this.freshBind(B,Q,ee,le,qe,Xe,ot,Tt):(w.bindVertexArray.set(this.vao),Xe&&Xe.bind(),le&&le.dynamicDraw&&le.bind(),ot&&ot.bind(),Tt&&Tt.bind())}freshBind(w,B,Q,ee,le,qe,Xe,ot){let Tt=w.numAttributes,Yt=this.context,Kt=Yt.gl;this.vao&&this.destroy(),this.vao=Yt.createVertexArray(),Yt.bindVertexArray.set(this.vao),this.boundProgram=w,this.boundLayoutVertexBuffer=B,this.boundPaintVertexBuffers=Q,this.boundIndexBuffer=ee,this.boundVertexOffset=le,this.boundDynamicVertexBuffer=qe,this.boundDynamicVertexBuffer2=Xe,this.boundDynamicVertexBuffer3=ot,B.enableAttributes(Kt,w);for(let xr of Q)xr.enableAttributes(Kt,w);qe&&qe.enableAttributes(Kt,w),Xe&&Xe.enableAttributes(Kt,w),ot&&ot.enableAttributes(Kt,w),B.bind(),B.setVertexAttribPointers(Kt,w,le);for(let xr of Q)xr.bind(),xr.setVertexAttribPointers(Kt,w,le);qe&&(qe.bind(),qe.setVertexAttribPointers(Kt,w,le)),ee&&ee.bind(),Xe&&(Xe.bind(),Xe.setVertexAttribPointers(Kt,w,le)),ot&&(ot.bind(),ot.setVertexAttribPointers(Kt,w,le)),Yt.currentNumAttributes=Tt}destroy(){this.vao&&(this.context.deleteVertexArray(this.vao),this.vao=null)}}let Jr=(ue,w,B,Q,ee)=>({u_matrix:ue,u_texture:0,u_ele_delta:w,u_fog_matrix:B,u_fog_color:Q?Q.properties.get("fog-color"):a.aM.white,u_fog_ground_blend:Q?Q.properties.get("fog-ground-blend"):1,u_fog_ground_blend_opacity:Q?Q.calculateFogBlendOpacity(ee):0,u_horizon_color:Q?Q.properties.get("horizon-color"):a.aM.white,u_horizon_fog_blend:Q?Q.properties.get("horizon-fog-blend"):1});function vi(ue){let w=[];for(let B=0;B<ue.length;B++){if(ue[B]===null)continue;let Q=ue[B].split(" ");w.push(Q.pop())}return w}class hn{constructor(w,B,Q,ee,le,qe){let Xe=w.gl;this.program=Xe.createProgram();let ot=vi(B.staticAttributes),Tt=Q?Q.getBinderAttributes():[],Yt=ot.concat(Tt),Kt=jn.prelude.staticUniforms?vi(jn.prelude.staticUniforms):[],xr=B.staticUniforms?vi(B.staticUniforms):[],Ir=Q?Q.getBinderUniforms():[],ve=Kt.concat(xr).concat(Ir),be=[];for(let Ht of ve)be.indexOf(Ht)<0&&be.push(Ht);let De=Q?Q.defines():[];le&&De.push("#define OVERDRAW_INSPECTOR;"),qe&&De.push("#define TERRAIN3D;");let Be=De.concat(jn.prelude.fragmentSource,B.fragmentSource).join(`
+`),et=De.concat(jn.prelude.vertexSource,B.vertexSource).join(`
+`),We=Xe.createShader(Xe.FRAGMENT_SHADER);if(Xe.isContextLost())return void(this.failedToCreate=!0);if(Xe.shaderSource(We,Be),Xe.compileShader(We),!Xe.getShaderParameter(We,Xe.COMPILE_STATUS))throw new Error(`Could not compile fragment shader: ${Xe.getShaderInfoLog(We)}`);Xe.attachShader(this.program,We);let it=Xe.createShader(Xe.VERTEX_SHADER);if(Xe.isContextLost())return void(this.failedToCreate=!0);if(Xe.shaderSource(it,et),Xe.compileShader(it),!Xe.getShaderParameter(it,Xe.COMPILE_STATUS))throw new Error(`Could not compile vertex shader: ${Xe.getShaderInfoLog(it)}`);Xe.attachShader(this.program,it),this.attributes={};let Ft={};this.numAttributes=Yt.length;for(let Ht=0;Ht<this.numAttributes;Ht++)Yt[Ht]&&(Xe.bindAttribLocation(this.program,Ht,Yt[Ht]),this.attributes[Yt[Ht]]=Ht);if(Xe.linkProgram(this.program),!Xe.getProgramParameter(this.program,Xe.LINK_STATUS))throw new Error(`Program failed to link: ${Xe.getProgramInfoLog(this.program)}`);Xe.deleteShader(it),Xe.deleteShader(We);for(let Ht=0;Ht<be.length;Ht++){let tr=be[Ht];if(tr&&!Ft[tr]){let dr=Xe.getUniformLocation(this.program,tr);dr&&(Ft[tr]=dr)}}this.fixedUniforms=ee(w,Ft),this.terrainUniforms=((Ht,tr)=>({u_depth:new a.aH(Ht,tr.u_depth),u_terrain:new a.aH(Ht,tr.u_terrain),u_terrain_dim:new a.aI(Ht,tr.u_terrain_dim),u_terrain_matrix:new a.aJ(Ht,tr.u_terrain_matrix),u_terrain_unpack:new a.aK(Ht,tr.u_terrain_unpack),u_terrain_exaggeration:new a.aI(Ht,tr.u_terrain_exaggeration)}))(w,Ft),this.binderUniforms=Q?Q.getUniforms(w,Ft):[]}draw(w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir,ve,be,De,Be,et){let We=w.gl;if(this.failedToCreate)return;if(w.program.set(this.program),w.setDepthMode(Q),w.setStencilMode(ee),w.setColorMode(le),w.setCullFace(qe),ot){w.activeTexture.set(We.TEXTURE2),We.bindTexture(We.TEXTURE_2D,ot.depthTexture),w.activeTexture.set(We.TEXTURE3),We.bindTexture(We.TEXTURE_2D,ot.texture);for(let Ft in this.terrainUniforms)this.terrainUniforms[Ft].set(ot[Ft])}for(let Ft in this.fixedUniforms)this.fixedUniforms[Ft].set(Xe[Ft]);be&&be.setUniforms(w,this.binderUniforms,Ir,{zoom:ve});let it=0;switch(B){case We.LINES:it=2;break;case We.TRIANGLES:it=3;break;case We.LINE_STRIP:it=1}for(let Ft of xr.get()){let Ht=Ft.vaos||(Ft.vaos={});(Ht[Tt]||(Ht[Tt]=new kr)).bind(w,this,Yt,be?be.getPaintVertexBuffers():[],Kt,Ft.vertexOffset,De,Be,et),We.drawElements(B,Ft.primitiveLength*it,We.UNSIGNED_SHORT,Ft.primitiveOffset*it*2)}}}function An(ue,w,B){let Q=1/In(B,1,w.transform.tileZoom),ee=Math.pow(2,B.tileID.overscaledZ),le=B.tileSize*Math.pow(2,w.transform.tileZoom)/ee,qe=le*(B.tileID.canonical.x+B.tileID.wrap*ee),Xe=le*B.tileID.canonical.y;return{u_image:0,u_texsize:B.imageAtlasTexture.size,u_scale:[Q,ue.fromScale,ue.toScale],u_fade:ue.t,u_pixel_coord_upper:[qe>>16,Xe>>16],u_pixel_coord_lower:[65535&qe,65535&Xe]}}let Mn=(ue,w,B,Q)=>{let ee=w.style.light,le=ee.properties.get("position"),qe=[le.x,le.y,le.z],Xe=function(){var Tt=new a.A(9);return a.A!=Float32Array&&(Tt[1]=0,Tt[2]=0,Tt[3]=0,Tt[5]=0,Tt[6]=0,Tt[7]=0),Tt[0]=1,Tt[4]=1,Tt[8]=1,Tt}();ee.properties.get("anchor")==="viewport"&&function(Tt,Yt){var Kt=Math.sin(Yt),xr=Math.cos(Yt);Tt[0]=xr,Tt[1]=Kt,Tt[2]=0,Tt[3]=-Kt,Tt[4]=xr,Tt[5]=0,Tt[6]=0,Tt[7]=0,Tt[8]=1}(Xe,-w.transform.angle),function(Tt,Yt,Kt){var xr=Yt[0],Ir=Yt[1],ve=Yt[2];Tt[0]=xr*Kt[0]+Ir*Kt[3]+ve*Kt[6],Tt[1]=xr*Kt[1]+Ir*Kt[4]+ve*Kt[7],Tt[2]=xr*Kt[2]+Ir*Kt[5]+ve*Kt[8]}(qe,qe,Xe);let ot=ee.properties.get("color");return{u_matrix:ue,u_lightpos:qe,u_lightintensity:ee.properties.get("intensity"),u_lightcolor:[ot.r,ot.g,ot.b],u_vertical_gradient:+B,u_opacity:Q}},Li=(ue,w,B,Q,ee,le,qe)=>a.e(Mn(ue,w,B,Q),An(le,w,qe),{u_height_factor:-Math.pow(2,ee.overscaledZ)/qe.tileSize/8}),_n=ue=>({u_matrix:ue}),ya=(ue,w,B,Q)=>a.e(_n(ue),An(B,w,Q)),Jn=(ue,w)=>({u_matrix:ue,u_world:w}),Ma=(ue,w,B,Q,ee)=>a.e(ya(ue,w,B,Q),{u_world:ee}),_o=(ue,w,B,Q)=>{let ee=ue.transform,le,qe;if(Q.paint.get("circle-pitch-alignment")==="map"){let Xe=In(B,1,ee.zoom);le=!0,qe=[Xe,Xe]}else le=!1,qe=ee.pixelsToGLUnits;return{u_camera_to_center_distance:ee.cameraToCenterDistance,u_scale_with_map:+(Q.paint.get("circle-pitch-scale")==="map"),u_matrix:ue.translatePosMatrix(w.posMatrix,B,Q.paint.get("circle-translate"),Q.paint.get("circle-translate-anchor")),u_pitch_with_map:+le,u_device_pixel_ratio:ue.pixelRatio,u_extrude_scale:qe}},No=(ue,w,B)=>({u_matrix:ue,u_inv_matrix:w,u_camera_to_center_distance:B.cameraToCenterDistance,u_viewport_size:[B.width,B.height]}),po=(ue,w,B=1)=>({u_matrix:ue,u_color:w,u_overlay:0,u_overlay_scale:B}),Lo=ue=>({u_matrix:ue}),ko=(ue,w,B,Q)=>({u_matrix:ue,u_extrude_scale:In(w,1,B),u_intensity:Q}),Ds=(ue,w,B,Q)=>{let ee=a.H();a.aP(ee,0,ue.width,ue.height,0,0,1);let le=ue.context.gl;return{u_matrix:ee,u_world:[le.drawingBufferWidth,le.drawingBufferHeight],u_image:B,u_color_ramp:Q,u_opacity:w.paint.get("heatmap-opacity")}};function Fs(ue,w){let B=Math.pow(2,w.canonical.z),Q=w.canonical.y;return[new a.Z(0,Q/B).toLngLat().lat,new a.Z(0,(Q+1)/B).toLngLat().lat]}let ll=(ue,w,B,Q)=>{let ee=ue.transform;return{u_matrix:As(ue,w,B,Q),u_ratio:1/In(w,1,ee.zoom),u_device_pixel_ratio:ue.pixelRatio,u_units_to_pixels:[1/ee.pixelsToGLUnits[0],1/ee.pixelsToGLUnits[1]]}},ul=(ue,w,B,Q,ee)=>a.e(ll(ue,w,B,ee),{u_image:0,u_image_height:Q}),zl=(ue,w,B,Q,ee)=>{let le=ue.transform,qe=il(w,le);return{u_matrix:As(ue,w,B,ee),u_texsize:w.imageAtlasTexture.size,u_ratio:1/In(w,1,le.zoom),u_device_pixel_ratio:ue.pixelRatio,u_image:0,u_scale:[qe,Q.fromScale,Q.toScale],u_fade:Q.t,u_units_to_pixels:[1/le.pixelsToGLUnits[0],1/le.pixelsToGLUnits[1]]}},us=(ue,w,B,Q,ee,le)=>{let qe=ue.lineAtlas,Xe=il(w,ue.transform),ot=B.layout.get("line-cap")==="round",Tt=qe.getDash(Q.from,ot),Yt=qe.getDash(Q.to,ot),Kt=Tt.width*ee.fromScale,xr=Yt.width*ee.toScale;return a.e(ll(ue,w,B,le),{u_patternscale_a:[Xe/Kt,-Tt.height/2],u_patternscale_b:[Xe/xr,-Yt.height/2],u_sdfgamma:qe.width/(256*Math.min(Kt,xr)*ue.pixelRatio)/2,u_image:0,u_tex_y_a:Tt.y,u_tex_y_b:Yt.y,u_mix:ee.t})};function il(ue,w){return 1/In(ue,1,w.tileZoom)}function As(ue,w,B,Q){return ue.translatePosMatrix(Q?Q.posMatrix:w.tileID.posMatrix,w,B.paint.get("line-translate"),B.paint.get("line-translate-anchor"))}let cl=(ue,w,B,Q,ee)=>{return{u_matrix:ue,u_tl_parent:w,u_scale_parent:B,u_buffer_scale:1,u_fade_t:Q.mix,u_opacity:Q.opacity*ee.paint.get("raster-opacity"),u_image0:0,u_image1:1,u_brightness_low:ee.paint.get("raster-brightness-min"),u_brightness_high:ee.paint.get("raster-brightness-max"),u_saturation_factor:(qe=ee.paint.get("raster-saturation"),qe>0?1-1/(1.001-qe):-qe),u_contrast_factor:(le=ee.paint.get("raster-contrast"),le>0?1/(1-le):1+le),u_spin_weights:Ks(ee.paint.get("raster-hue-rotate"))};var le,qe};function Ks(ue){ue*=Math.PI/180;let w=Math.sin(ue),B=Math.cos(ue);return[(2*B+1)/3,(-Math.sqrt(3)*w-B+1)/3,(Math.sqrt(3)*w-B+1)/3]}let zs=(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir)=>{let ve=qe.transform;return{u_is_size_zoom_constant:+(ue==="constant"||ue==="source"),u_is_size_feature_constant:+(ue==="constant"||ue==="camera"),u_size_t:w?w.uSizeT:0,u_size:w?w.uSize:0,u_camera_to_center_distance:ve.cameraToCenterDistance,u_pitch:ve.pitch/360*2*Math.PI,u_rotate_symbol:+B,u_aspect_ratio:ve.width/ve.height,u_fade_change:qe.options.fadeDuration?qe.symbolFadeChange:1,u_matrix:Xe,u_label_plane_matrix:ot,u_coord_matrix:Tt,u_is_text:+Kt,u_pitch_with_map:+Q,u_is_along_line:ee,u_is_variable_anchor:le,u_texsize:xr,u_texture:0,u_translation:Yt,u_pitched_scale:Ir}},Io=(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir,ve)=>{let be=qe.transform;return a.e(zs(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,ve),{u_gamma_scale:Q?Math.cos(be._pitch)*be.cameraToCenterDistance:1,u_device_pixel_ratio:qe.pixelRatio,u_is_halo:+Ir})},ls=(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir)=>a.e(Io(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,!0,Kt,!0,Ir),{u_texsize_icon:xr,u_texture_icon:1}),Zl=(ue,w,B)=>({u_matrix:ue,u_opacity:w,u_color:B}),Su=(ue,w,B,Q,ee,le)=>a.e(function(qe,Xe,ot,Tt){let Yt=ot.imageManager.getPattern(qe.from.toString()),Kt=ot.imageManager.getPattern(qe.to.toString()),{width:xr,height:Ir}=ot.imageManager.getPixelSize(),ve=Math.pow(2,Tt.tileID.overscaledZ),be=Tt.tileSize*Math.pow(2,ot.transform.tileZoom)/ve,De=be*(Tt.tileID.canonical.x+Tt.tileID.wrap*ve),Be=be*Tt.tileID.canonical.y;return{u_image:0,u_pattern_tl_a:Yt.tl,u_pattern_br_a:Yt.br,u_pattern_tl_b:Kt.tl,u_pattern_br_b:Kt.br,u_texsize:[xr,Ir],u_mix:Xe.t,u_pattern_size_a:Yt.displaySize,u_pattern_size_b:Kt.displaySize,u_scale_a:Xe.fromScale,u_scale_b:Xe.toScale,u_tile_units_to_pixels:1/In(Tt,1,ot.transform.tileZoom),u_pixel_coord_upper:[De>>16,Be>>16],u_pixel_coord_lower:[65535&De,65535&Be]}}(Q,le,B,ee),{u_matrix:ue,u_opacity:w}),nc={fillExtrusion:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_lightpos:new a.aN(ue,w.u_lightpos),u_lightintensity:new a.aI(ue,w.u_lightintensity),u_lightcolor:new a.aN(ue,w.u_lightcolor),u_vertical_gradient:new a.aI(ue,w.u_vertical_gradient),u_opacity:new a.aI(ue,w.u_opacity)}),fillExtrusionPattern:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_lightpos:new a.aN(ue,w.u_lightpos),u_lightintensity:new a.aI(ue,w.u_lightintensity),u_lightcolor:new a.aN(ue,w.u_lightcolor),u_vertical_gradient:new a.aI(ue,w.u_vertical_gradient),u_height_factor:new a.aI(ue,w.u_height_factor),u_image:new a.aH(ue,w.u_image),u_texsize:new a.aO(ue,w.u_texsize),u_pixel_coord_upper:new a.aO(ue,w.u_pixel_coord_upper),u_pixel_coord_lower:new a.aO(ue,w.u_pixel_coord_lower),u_scale:new a.aN(ue,w.u_scale),u_fade:new a.aI(ue,w.u_fade),u_opacity:new a.aI(ue,w.u_opacity)}),fill:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix)}),fillPattern:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_image:new a.aH(ue,w.u_image),u_texsize:new a.aO(ue,w.u_texsize),u_pixel_coord_upper:new a.aO(ue,w.u_pixel_coord_upper),u_pixel_coord_lower:new a.aO(ue,w.u_pixel_coord_lower),u_scale:new a.aN(ue,w.u_scale),u_fade:new a.aI(ue,w.u_fade)}),fillOutline:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_world:new a.aO(ue,w.u_world)}),fillOutlinePattern:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_world:new a.aO(ue,w.u_world),u_image:new a.aH(ue,w.u_image),u_texsize:new a.aO(ue,w.u_texsize),u_pixel_coord_upper:new a.aO(ue,w.u_pixel_coord_upper),u_pixel_coord_lower:new a.aO(ue,w.u_pixel_coord_lower),u_scale:new a.aN(ue,w.u_scale),u_fade:new a.aI(ue,w.u_fade)}),circle:(ue,w)=>({u_camera_to_center_distance:new a.aI(ue,w.u_camera_to_center_distance),u_scale_with_map:new a.aH(ue,w.u_scale_with_map),u_pitch_with_map:new a.aH(ue,w.u_pitch_with_map),u_extrude_scale:new a.aO(ue,w.u_extrude_scale),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_matrix:new a.aJ(ue,w.u_matrix)}),collisionBox:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_pixel_extrude_scale:new a.aO(ue,w.u_pixel_extrude_scale)}),collisionCircle:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_inv_matrix:new a.aJ(ue,w.u_inv_matrix),u_camera_to_center_distance:new a.aI(ue,w.u_camera_to_center_distance),u_viewport_size:new a.aO(ue,w.u_viewport_size)}),debug:(ue,w)=>({u_color:new a.aL(ue,w.u_color),u_matrix:new a.aJ(ue,w.u_matrix),u_overlay:new a.aH(ue,w.u_overlay),u_overlay_scale:new a.aI(ue,w.u_overlay_scale)}),clippingMask:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix)}),heatmap:(ue,w)=>({u_extrude_scale:new a.aI(ue,w.u_extrude_scale),u_intensity:new a.aI(ue,w.u_intensity),u_matrix:new a.aJ(ue,w.u_matrix)}),heatmapTexture:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_world:new a.aO(ue,w.u_world),u_image:new a.aH(ue,w.u_image),u_color_ramp:new a.aH(ue,w.u_color_ramp),u_opacity:new a.aI(ue,w.u_opacity)}),hillshade:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_image:new a.aH(ue,w.u_image),u_latrange:new a.aO(ue,w.u_latrange),u_light:new a.aO(ue,w.u_light),u_shadow:new a.aL(ue,w.u_shadow),u_highlight:new a.aL(ue,w.u_highlight),u_accent:new a.aL(ue,w.u_accent)}),hillshadePrepare:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_image:new a.aH(ue,w.u_image),u_dimension:new a.aO(ue,w.u_dimension),u_zoom:new a.aI(ue,w.u_zoom),u_unpack:new a.aK(ue,w.u_unpack)}),line:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_ratio:new a.aI(ue,w.u_ratio),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_units_to_pixels:new a.aO(ue,w.u_units_to_pixels)}),lineGradient:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_ratio:new a.aI(ue,w.u_ratio),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_units_to_pixels:new a.aO(ue,w.u_units_to_pixels),u_image:new a.aH(ue,w.u_image),u_image_height:new a.aI(ue,w.u_image_height)}),linePattern:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_texsize:new a.aO(ue,w.u_texsize),u_ratio:new a.aI(ue,w.u_ratio),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_image:new a.aH(ue,w.u_image),u_units_to_pixels:new a.aO(ue,w.u_units_to_pixels),u_scale:new a.aN(ue,w.u_scale),u_fade:new a.aI(ue,w.u_fade)}),lineSDF:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_ratio:new a.aI(ue,w.u_ratio),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_units_to_pixels:new a.aO(ue,w.u_units_to_pixels),u_patternscale_a:new a.aO(ue,w.u_patternscale_a),u_patternscale_b:new a.aO(ue,w.u_patternscale_b),u_sdfgamma:new a.aI(ue,w.u_sdfgamma),u_image:new a.aH(ue,w.u_image),u_tex_y_a:new a.aI(ue,w.u_tex_y_a),u_tex_y_b:new a.aI(ue,w.u_tex_y_b),u_mix:new a.aI(ue,w.u_mix)}),raster:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_tl_parent:new a.aO(ue,w.u_tl_parent),u_scale_parent:new a.aI(ue,w.u_scale_parent),u_buffer_scale:new a.aI(ue,w.u_buffer_scale),u_fade_t:new a.aI(ue,w.u_fade_t),u_opacity:new a.aI(ue,w.u_opacity),u_image0:new a.aH(ue,w.u_image0),u_image1:new a.aH(ue,w.u_image1),u_brightness_low:new a.aI(ue,w.u_brightness_low),u_brightness_high:new a.aI(ue,w.u_brightness_high),u_saturation_factor:new a.aI(ue,w.u_saturation_factor),u_contrast_factor:new a.aI(ue,w.u_contrast_factor),u_spin_weights:new a.aN(ue,w.u_spin_weights)}),symbolIcon:(ue,w)=>({u_is_size_zoom_constant:new a.aH(ue,w.u_is_size_zoom_constant),u_is_size_feature_constant:new a.aH(ue,w.u_is_size_feature_constant),u_size_t:new a.aI(ue,w.u_size_t),u_size:new a.aI(ue,w.u_size),u_camera_to_center_distance:new a.aI(ue,w.u_camera_to_center_distance),u_pitch:new a.aI(ue,w.u_pitch),u_rotate_symbol:new a.aH(ue,w.u_rotate_symbol),u_aspect_ratio:new a.aI(ue,w.u_aspect_ratio),u_fade_change:new a.aI(ue,w.u_fade_change),u_matrix:new a.aJ(ue,w.u_matrix),u_label_plane_matrix:new a.aJ(ue,w.u_label_plane_matrix),u_coord_matrix:new a.aJ(ue,w.u_coord_matrix),u_is_text:new a.aH(ue,w.u_is_text),u_pitch_with_map:new a.aH(ue,w.u_pitch_with_map),u_is_along_line:new a.aH(ue,w.u_is_along_line),u_is_variable_anchor:new a.aH(ue,w.u_is_variable_anchor),u_texsize:new a.aO(ue,w.u_texsize),u_texture:new a.aH(ue,w.u_texture),u_translation:new a.aO(ue,w.u_translation),u_pitched_scale:new a.aI(ue,w.u_pitched_scale)}),symbolSDF:(ue,w)=>({u_is_size_zoom_constant:new a.aH(ue,w.u_is_size_zoom_constant),u_is_size_feature_constant:new a.aH(ue,w.u_is_size_feature_constant),u_size_t:new a.aI(ue,w.u_size_t),u_size:new a.aI(ue,w.u_size),u_camera_to_center_distance:new a.aI(ue,w.u_camera_to_center_distance),u_pitch:new a.aI(ue,w.u_pitch),u_rotate_symbol:new a.aH(ue,w.u_rotate_symbol),u_aspect_ratio:new a.aI(ue,w.u_aspect_ratio),u_fade_change:new a.aI(ue,w.u_fade_change),u_matrix:new a.aJ(ue,w.u_matrix),u_label_plane_matrix:new a.aJ(ue,w.u_label_plane_matrix),u_coord_matrix:new a.aJ(ue,w.u_coord_matrix),u_is_text:new a.aH(ue,w.u_is_text),u_pitch_with_map:new a.aH(ue,w.u_pitch_with_map),u_is_along_line:new a.aH(ue,w.u_is_along_line),u_is_variable_anchor:new a.aH(ue,w.u_is_variable_anchor),u_texsize:new a.aO(ue,w.u_texsize),u_texture:new a.aH(ue,w.u_texture),u_gamma_scale:new a.aI(ue,w.u_gamma_scale),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_is_halo:new a.aH(ue,w.u_is_halo),u_translation:new a.aO(ue,w.u_translation),u_pitched_scale:new a.aI(ue,w.u_pitched_scale)}),symbolTextAndIcon:(ue,w)=>({u_is_size_zoom_constant:new a.aH(ue,w.u_is_size_zoom_constant),u_is_size_feature_constant:new a.aH(ue,w.u_is_size_feature_constant),u_size_t:new a.aI(ue,w.u_size_t),u_size:new a.aI(ue,w.u_size),u_camera_to_center_distance:new a.aI(ue,w.u_camera_to_center_distance),u_pitch:new a.aI(ue,w.u_pitch),u_rotate_symbol:new a.aH(ue,w.u_rotate_symbol),u_aspect_ratio:new a.aI(ue,w.u_aspect_ratio),u_fade_change:new a.aI(ue,w.u_fade_change),u_matrix:new a.aJ(ue,w.u_matrix),u_label_plane_matrix:new a.aJ(ue,w.u_label_plane_matrix),u_coord_matrix:new a.aJ(ue,w.u_coord_matrix),u_is_text:new a.aH(ue,w.u_is_text),u_pitch_with_map:new a.aH(ue,w.u_pitch_with_map),u_is_along_line:new a.aH(ue,w.u_is_along_line),u_is_variable_anchor:new a.aH(ue,w.u_is_variable_anchor),u_texsize:new a.aO(ue,w.u_texsize),u_texsize_icon:new a.aO(ue,w.u_texsize_icon),u_texture:new a.aH(ue,w.u_texture),u_texture_icon:new a.aH(ue,w.u_texture_icon),u_gamma_scale:new a.aI(ue,w.u_gamma_scale),u_device_pixel_ratio:new a.aI(ue,w.u_device_pixel_ratio),u_is_halo:new a.aH(ue,w.u_is_halo),u_translation:new a.aO(ue,w.u_translation),u_pitched_scale:new a.aI(ue,w.u_pitched_scale)}),background:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_opacity:new a.aI(ue,w.u_opacity),u_color:new a.aL(ue,w.u_color)}),backgroundPattern:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_opacity:new a.aI(ue,w.u_opacity),u_image:new a.aH(ue,w.u_image),u_pattern_tl_a:new a.aO(ue,w.u_pattern_tl_a),u_pattern_br_a:new a.aO(ue,w.u_pattern_br_a),u_pattern_tl_b:new a.aO(ue,w.u_pattern_tl_b),u_pattern_br_b:new a.aO(ue,w.u_pattern_br_b),u_texsize:new a.aO(ue,w.u_texsize),u_mix:new a.aI(ue,w.u_mix),u_pattern_size_a:new a.aO(ue,w.u_pattern_size_a),u_pattern_size_b:new a.aO(ue,w.u_pattern_size_b),u_scale_a:new a.aI(ue,w.u_scale_a),u_scale_b:new a.aI(ue,w.u_scale_b),u_pixel_coord_upper:new a.aO(ue,w.u_pixel_coord_upper),u_pixel_coord_lower:new a.aO(ue,w.u_pixel_coord_lower),u_tile_units_to_pixels:new a.aI(ue,w.u_tile_units_to_pixels)}),terrain:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_texture:new a.aH(ue,w.u_texture),u_ele_delta:new a.aI(ue,w.u_ele_delta),u_fog_matrix:new a.aJ(ue,w.u_fog_matrix),u_fog_color:new a.aL(ue,w.u_fog_color),u_fog_ground_blend:new a.aI(ue,w.u_fog_ground_blend),u_fog_ground_blend_opacity:new a.aI(ue,w.u_fog_ground_blend_opacity),u_horizon_color:new a.aL(ue,w.u_horizon_color),u_horizon_fog_blend:new a.aI(ue,w.u_horizon_fog_blend)}),terrainDepth:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_ele_delta:new a.aI(ue,w.u_ele_delta)}),terrainCoords:(ue,w)=>({u_matrix:new a.aJ(ue,w.u_matrix),u_texture:new a.aH(ue,w.u_texture),u_terrain_coords_id:new a.aI(ue,w.u_terrain_coords_id),u_ele_delta:new a.aI(ue,w.u_ele_delta)}),sky:(ue,w)=>({u_sky_color:new a.aL(ue,w.u_sky_color),u_horizon_color:new a.aL(ue,w.u_horizon_color),u_horizon:new a.aI(ue,w.u_horizon),u_sky_horizon_blend:new a.aI(ue,w.u_sky_horizon_blend)})};class bs{constructor(w,B,Q){this.context=w;let ee=w.gl;this.buffer=ee.createBuffer(),this.dynamicDraw=!!Q,this.context.unbindVAO(),w.bindElementBuffer.set(this.buffer),ee.bufferData(ee.ELEMENT_ARRAY_BUFFER,B.arrayBuffer,this.dynamicDraw?ee.DYNAMIC_DRAW:ee.STATIC_DRAW),this.dynamicDraw||delete B.arrayBuffer}bind(){this.context.bindElementBuffer.set(this.buffer)}updateData(w){let B=this.context.gl;if(!this.dynamicDraw)throw new Error("Attempted to update data while not in dynamic mode.");this.context.unbindVAO(),this.bind(),B.bufferSubData(B.ELEMENT_ARRAY_BUFFER,0,w.arrayBuffer)}destroy(){this.buffer&&(this.context.gl.deleteBuffer(this.buffer),delete this.buffer)}}let Rn={Int8:"BYTE",Uint8:"UNSIGNED_BYTE",Int16:"SHORT",Uint16:"UNSIGNED_SHORT",Int32:"INT",Uint32:"UNSIGNED_INT",Float32:"FLOAT"};class _a{constructor(w,B,Q,ee){this.length=B.length,this.attributes=Q,this.itemSize=B.bytesPerElement,this.dynamicDraw=ee,this.context=w;let le=w.gl;this.buffer=le.createBuffer(),w.bindVertexBuffer.set(this.buffer),le.bufferData(le.ARRAY_BUFFER,B.arrayBuffer,this.dynamicDraw?le.DYNAMIC_DRAW:le.STATIC_DRAW),this.dynamicDraw||delete B.arrayBuffer}bind(){this.context.bindVertexBuffer.set(this.buffer)}updateData(w){if(w.length!==this.length)throw new Error(`Length of new data is ${w.length}, which doesn't match current length of ${this.length}`);let B=this.context.gl;this.bind(),B.bufferSubData(B.ARRAY_BUFFER,0,w.arrayBuffer)}enableAttributes(w,B){for(let Q=0;Q<this.attributes.length;Q++){let ee=B.attributes[this.attributes[Q].name];ee!==void 0&&w.enableVertexAttribArray(ee)}}setVertexAttribPointers(w,B,Q){for(let ee=0;ee<this.attributes.length;ee++){let le=this.attributes[ee],qe=B.attributes[le.name];qe!==void 0&&w.vertexAttribPointer(qe,le.components,w[Rn[le.type]],!1,this.itemSize,le.offset+this.itemSize*(Q||0))}}destroy(){this.buffer&&(this.context.gl.deleteBuffer(this.buffer),delete this.buffer)}}let Vu=new WeakMap;function Ol(ue){var w;if(Vu.has(ue))return Vu.get(ue);{let B=(w=ue.getParameter(ue.VERSION))===null||w===void 0?void 0:w.startsWith("WebGL 2.0");return Vu.set(ue,B),B}}class xo{constructor(w){this.gl=w.gl,this.default=this.getDefault(),this.current=this.default,this.dirty=!1}get(){return this.current}set(w){}getDefault(){return this.default}setDefault(){this.set(this.default)}}class Yl extends xo{getDefault(){return a.aM.transparent}set(w){let B=this.current;(w.r!==B.r||w.g!==B.g||w.b!==B.b||w.a!==B.a||this.dirty)&&(this.gl.clearColor(w.r,w.g,w.b,w.a),this.current=w,this.dirty=!1)}}class Ns extends xo{getDefault(){return 1}set(w){(w!==this.current||this.dirty)&&(this.gl.clearDepth(w),this.current=w,this.dirty=!1)}}class Hl extends xo{getDefault(){return 0}set(w){(w!==this.current||this.dirty)&&(this.gl.clearStencil(w),this.current=w,this.dirty=!1)}}class ac extends xo{getDefault(){return[!0,!0,!0,!0]}set(w){let B=this.current;(w[0]!==B[0]||w[1]!==B[1]||w[2]!==B[2]||w[3]!==B[3]||this.dirty)&&(this.gl.colorMask(w[0],w[1],w[2],w[3]),this.current=w,this.dirty=!1)}}class aa extends xo{getDefault(){return!0}set(w){(w!==this.current||this.dirty)&&(this.gl.depthMask(w),this.current=w,this.dirty=!1)}}class Oo extends xo{getDefault(){return 255}set(w){(w!==this.current||this.dirty)&&(this.gl.stencilMask(w),this.current=w,this.dirty=!1)}}class qo extends xo{getDefault(){return{func:this.gl.ALWAYS,ref:0,mask:255}}set(w){let B=this.current;(w.func!==B.func||w.ref!==B.ref||w.mask!==B.mask||this.dirty)&&(this.gl.stencilFunc(w.func,w.ref,w.mask),this.current=w,this.dirty=!1)}}class ql extends xo{getDefault(){let w=this.gl;return[w.KEEP,w.KEEP,w.KEEP]}set(w){let B=this.current;(w[0]!==B[0]||w[1]!==B[1]||w[2]!==B[2]||this.dirty)&&(this.gl.stencilOp(w[0],w[1],w[2]),this.current=w,this.dirty=!1)}}class Pc extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;w?B.enable(B.STENCIL_TEST):B.disable(B.STENCIL_TEST),this.current=w,this.dirty=!1}}class Do extends xo{getDefault(){return[0,1]}set(w){let B=this.current;(w[0]!==B[0]||w[1]!==B[1]||this.dirty)&&(this.gl.depthRange(w[0],w[1]),this.current=w,this.dirty=!1)}}class rf extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;w?B.enable(B.DEPTH_TEST):B.disable(B.DEPTH_TEST),this.current=w,this.dirty=!1}}class Uf extends xo{getDefault(){return this.gl.LESS}set(w){(w!==this.current||this.dirty)&&(this.gl.depthFunc(w),this.current=w,this.dirty=!1)}}class pl extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;w?B.enable(B.BLEND):B.disable(B.BLEND),this.current=w,this.dirty=!1}}class Zc extends xo{getDefault(){let w=this.gl;return[w.ONE,w.ZERO]}set(w){let B=this.current;(w[0]!==B[0]||w[1]!==B[1]||this.dirty)&&(this.gl.blendFunc(w[0],w[1]),this.current=w,this.dirty=!1)}}class Kl extends xo{getDefault(){return a.aM.transparent}set(w){let B=this.current;(w.r!==B.r||w.g!==B.g||w.b!==B.b||w.a!==B.a||this.dirty)&&(this.gl.blendColor(w.r,w.g,w.b,w.a),this.current=w,this.dirty=!1)}}class Os extends xo{getDefault(){return this.gl.FUNC_ADD}set(w){(w!==this.current||this.dirty)&&(this.gl.blendEquation(w),this.current=w,this.dirty=!1)}}class yu extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;w?B.enable(B.CULL_FACE):B.disable(B.CULL_FACE),this.current=w,this.dirty=!1}}class oc extends xo{getDefault(){return this.gl.BACK}set(w){(w!==this.current||this.dirty)&&(this.gl.cullFace(w),this.current=w,this.dirty=!1)}}class Cf extends xo{getDefault(){return this.gl.CCW}set(w){(w!==this.current||this.dirty)&&(this.gl.frontFace(w),this.current=w,this.dirty=!1)}}class sc extends xo{getDefault(){return null}set(w){(w!==this.current||this.dirty)&&(this.gl.useProgram(w),this.current=w,this.dirty=!1)}}class Vh extends xo{getDefault(){return this.gl.TEXTURE0}set(w){(w!==this.current||this.dirty)&&(this.gl.activeTexture(w),this.current=w,this.dirty=!1)}}class Lf extends xo{getDefault(){let w=this.gl;return[0,0,w.drawingBufferWidth,w.drawingBufferHeight]}set(w){let B=this.current;(w[0]!==B[0]||w[1]!==B[1]||w[2]!==B[2]||w[3]!==B[3]||this.dirty)&&(this.gl.viewport(w[0],w[1],w[2],w[3]),this.current=w,this.dirty=!1)}}class cs extends xo{getDefault(){return null}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.bindFramebuffer(B.FRAMEBUFFER,w),this.current=w,this.dirty=!1}}class nf extends xo{getDefault(){return null}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.bindRenderbuffer(B.RENDERBUFFER,w),this.current=w,this.dirty=!1}}class Vf extends xo{getDefault(){return null}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.bindTexture(B.TEXTURE_2D,w),this.current=w,this.dirty=!1}}class Jl extends xo{getDefault(){return null}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.bindBuffer(B.ARRAY_BUFFER,w),this.current=w,this.dirty=!1}}class fl extends xo{getDefault(){return null}set(w){let B=this.gl;B.bindBuffer(B.ELEMENT_ARRAY_BUFFER,w),this.current=w,this.dirty=!1}}class lc extends xo{getDefault(){return null}set(w){var B;if(w===this.current&&!this.dirty)return;let Q=this.gl;Ol(Q)?Q.bindVertexArray(w):(B=Q.getExtension("OES_vertex_array_object"))===null||B===void 0||B.bindVertexArrayOES(w),this.current=w,this.dirty=!1}}class Fu extends xo{getDefault(){return 4}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.pixelStorei(B.UNPACK_ALIGNMENT,w),this.current=w,this.dirty=!1}}class Es extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.pixelStorei(B.UNPACK_PREMULTIPLY_ALPHA_WEBGL,w),this.current=w,this.dirty=!1}}class Hs extends xo{getDefault(){return!1}set(w){if(w===this.current&&!this.dirty)return;let B=this.gl;B.pixelStorei(B.UNPACK_FLIP_Y_WEBGL,w),this.current=w,this.dirty=!1}}class Go extends xo{constructor(w,B){super(w),this.context=w,this.parent=B}getDefault(){return null}}class ps extends Go{setDirty(){this.dirty=!0}set(w){if(w===this.current&&!this.dirty)return;this.context.bindFramebuffer.set(this.parent);let B=this.gl;B.framebufferTexture2D(B.FRAMEBUFFER,B.COLOR_ATTACHMENT0,B.TEXTURE_2D,w,0),this.current=w,this.dirty=!1}}class uc extends Go{set(w){if(w===this.current&&!this.dirty)return;this.context.bindFramebuffer.set(this.parent);let B=this.gl;B.framebufferRenderbuffer(B.FRAMEBUFFER,B.DEPTH_ATTACHMENT,B.RENDERBUFFER,w),this.current=w,this.dirty=!1}}class xl extends Go{set(w){if(w===this.current&&!this.dirty)return;this.context.bindFramebuffer.set(this.parent);let B=this.gl;B.framebufferRenderbuffer(B.FRAMEBUFFER,B.DEPTH_STENCIL_ATTACHMENT,B.RENDERBUFFER,w),this.current=w,this.dirty=!1}}class Gu{constructor(w,B,Q,ee,le){this.context=w,this.width=B,this.height=Q;let qe=w.gl,Xe=this.framebuffer=qe.createFramebuffer();if(this.colorAttachment=new ps(w,Xe),ee)this.depthAttachment=le?new xl(w,Xe):new uc(w,Xe);else if(le)throw new Error("Stencil cannot be set without depth");if(qe.checkFramebufferStatus(qe.FRAMEBUFFER)!==qe.FRAMEBUFFER_COMPLETE)throw new Error("Framebuffer is not complete")}destroy(){let w=this.context.gl,B=this.colorAttachment.get();if(B&&w.deleteTexture(B),this.depthAttachment){let Q=this.depthAttachment.get();Q&&w.deleteRenderbuffer(Q)}w.deleteFramebuffer(this.framebuffer)}}class qs{constructor(w,B,Q){this.blendFunction=w,this.blendColor=B,this.mask=Q}}qs.Replace=[1,0],qs.disabled=new qs(qs.Replace,a.aM.transparent,[!1,!1,!1,!1]),qs.unblended=new qs(qs.Replace,a.aM.transparent,[!0,!0,!0,!0]),qs.alphaBlended=new qs([1,771],a.aM.transparent,[!0,!0,!0,!0]);class ad{constructor(w){var B,Q;if(this.gl=w,this.clearColor=new Yl(this),this.clearDepth=new Ns(this),this.clearStencil=new Hl(this),this.colorMask=new ac(this),this.depthMask=new aa(this),this.stencilMask=new Oo(this),this.stencilFunc=new qo(this),this.stencilOp=new ql(this),this.stencilTest=new Pc(this),this.depthRange=new Do(this),this.depthTest=new rf(this),this.depthFunc=new Uf(this),this.blend=new pl(this),this.blendFunc=new Zc(this),this.blendColor=new Kl(this),this.blendEquation=new Os(this),this.cullFace=new yu(this),this.cullFaceSide=new oc(this),this.frontFace=new Cf(this),this.program=new sc(this),this.activeTexture=new Vh(this),this.viewport=new Lf(this),this.bindFramebuffer=new cs(this),this.bindRenderbuffer=new nf(this),this.bindTexture=new Vf(this),this.bindVertexBuffer=new Jl(this),this.bindElementBuffer=new fl(this),this.bindVertexArray=new lc(this),this.pixelStoreUnpack=new Fu(this),this.pixelStoreUnpackPremultiplyAlpha=new Es(this),this.pixelStoreUnpackFlipY=new Hs(this),this.extTextureFilterAnisotropic=w.getExtension("EXT_texture_filter_anisotropic")||w.getExtension("MOZ_EXT_texture_filter_anisotropic")||w.getExtension("WEBKIT_EXT_texture_filter_anisotropic"),this.extTextureFilterAnisotropic&&(this.extTextureFilterAnisotropicMax=w.getParameter(this.extTextureFilterAnisotropic.MAX_TEXTURE_MAX_ANISOTROPY_EXT)),this.maxTextureSize=w.getParameter(w.MAX_TEXTURE_SIZE),Ol(w)){this.HALF_FLOAT=w.HALF_FLOAT;let ee=w.getExtension("EXT_color_buffer_half_float");this.RGBA16F=(B=w.RGBA16F)!==null&&B!==void 0?B:ee==null?void 0:ee.RGBA16F_EXT,this.RGB16F=(Q=w.RGB16F)!==null&&Q!==void 0?Q:ee==null?void 0:ee.RGB16F_EXT,w.getExtension("EXT_color_buffer_float")}else{w.getExtension("EXT_color_buffer_half_float"),w.getExtension("OES_texture_half_float_linear");let ee=w.getExtension("OES_texture_half_float");this.HALF_FLOAT=ee==null?void 0:ee.HALF_FLOAT_OES}}setDefault(){this.unbindVAO(),this.clearColor.setDefault(),this.clearDepth.setDefault(),this.clearStencil.setDefault(),this.colorMask.setDefault(),this.depthMask.setDefault(),this.stencilMask.setDefault(),this.stencilFunc.setDefault(),this.stencilOp.setDefault(),this.stencilTest.setDefault(),this.depthRange.setDefault(),this.depthTest.setDefault(),this.depthFunc.setDefault(),this.blend.setDefault(),this.blendFunc.setDefault(),this.blendColor.setDefault(),this.blendEquation.setDefault(),this.cullFace.setDefault(),this.cullFaceSide.setDefault(),this.frontFace.setDefault(),this.program.setDefault(),this.activeTexture.setDefault(),this.bindFramebuffer.setDefault(),this.pixelStoreUnpack.setDefault(),this.pixelStoreUnpackPremultiplyAlpha.setDefault(),this.pixelStoreUnpackFlipY.setDefault()}setDirty(){this.clearColor.dirty=!0,this.clearDepth.dirty=!0,this.clearStencil.dirty=!0,this.colorMask.dirty=!0,this.depthMask.dirty=!0,this.stencilMask.dirty=!0,this.stencilFunc.dirty=!0,this.stencilOp.dirty=!0,this.stencilTest.dirty=!0,this.depthRange.dirty=!0,this.depthTest.dirty=!0,this.depthFunc.dirty=!0,this.blend.dirty=!0,this.blendFunc.dirty=!0,this.blendColor.dirty=!0,this.blendEquation.dirty=!0,this.cullFace.dirty=!0,this.cullFaceSide.dirty=!0,this.frontFace.dirty=!0,this.program.dirty=!0,this.activeTexture.dirty=!0,this.viewport.dirty=!0,this.bindFramebuffer.dirty=!0,this.bindRenderbuffer.dirty=!0,this.bindTexture.dirty=!0,this.bindVertexBuffer.dirty=!0,this.bindElementBuffer.dirty=!0,this.bindVertexArray.dirty=!0,this.pixelStoreUnpack.dirty=!0,this.pixelStoreUnpackPremultiplyAlpha.dirty=!0,this.pixelStoreUnpackFlipY.dirty=!0}createIndexBuffer(w,B){return new bs(this,w,B)}createVertexBuffer(w,B,Q){return new _a(this,w,B,Q)}createRenderbuffer(w,B,Q){let ee=this.gl,le=ee.createRenderbuffer();return this.bindRenderbuffer.set(le),ee.renderbufferStorage(ee.RENDERBUFFER,w,B,Q),this.bindRenderbuffer.set(null),le}createFramebuffer(w,B,Q,ee){return new Gu(this,w,B,Q,ee)}clear({color:w,depth:B,stencil:Q}){let ee=this.gl,le=0;w&&(le|=ee.COLOR_BUFFER_BIT,this.clearColor.set(w),this.colorMask.set([!0,!0,!0,!0])),B!==void 0&&(le|=ee.DEPTH_BUFFER_BIT,this.depthRange.set([0,1]),this.clearDepth.set(B),this.depthMask.set(!0)),Q!==void 0&&(le|=ee.STENCIL_BUFFER_BIT,this.clearStencil.set(Q),this.stencilMask.set(255)),ee.clear(le)}setCullFace(w){w.enable===!1?this.cullFace.set(!1):(this.cullFace.set(!0),this.cullFaceSide.set(w.mode),this.frontFace.set(w.frontFace))}setDepthMode(w){w.func!==this.gl.ALWAYS||w.mask?(this.depthTest.set(!0),this.depthFunc.set(w.func),this.depthMask.set(w.mask),this.depthRange.set(w.range)):this.depthTest.set(!1)}setStencilMode(w){w.test.func!==this.gl.ALWAYS||w.mask?(this.stencilTest.set(!0),this.stencilMask.set(w.mask),this.stencilOp.set([w.fail,w.depthFail,w.pass]),this.stencilFunc.set({func:w.test.func,ref:w.ref,mask:w.test.mask})):this.stencilTest.set(!1)}setColorMode(w){a.aE(w.blendFunction,qs.Replace)?this.blend.set(!1):(this.blend.set(!0),this.blendFunc.set(w.blendFunction),this.blendColor.set(w.blendColor)),this.colorMask.set(w.mask)}createVertexArray(){var w;return Ol(this.gl)?this.gl.createVertexArray():(w=this.gl.getExtension("OES_vertex_array_object"))===null||w===void 0?void 0:w.createVertexArrayOES()}deleteVertexArray(w){var B;return Ol(this.gl)?this.gl.deleteVertexArray(w):(B=this.gl.getExtension("OES_vertex_array_object"))===null||B===void 0?void 0:B.deleteVertexArrayOES(w)}unbindVAO(){this.bindVertexArray.set(null)}}class Po{constructor(w,B,Q){this.func=w,this.mask=B,this.range=Q}}Po.ReadOnly=!1,Po.ReadWrite=!0,Po.disabled=new Po(519,Po.ReadOnly,[0,1]);let od=7680;class Yo{constructor(w,B,Q,ee,le,qe){this.test=w,this.ref=B,this.mask=Q,this.fail=ee,this.depthFail=le,this.pass=qe}}Yo.disabled=new Yo({func:519,mask:0},0,0,od,od,od);class Pa{constructor(w,B,Q){this.enable=w,this.mode=B,this.frontFace=Q}}let af;function Hu(ue,w,B,Q,ee){let le=ue.context,qe=le.gl,Xe=ue.useProgram("collisionBox"),ot=[],Tt=0,Yt=0;for(let Be=0;Be<Q.length;Be++){let et=Q[Be],We=w.getTile(et).getBucket(B);if(!We)continue;let it=ee?We.textCollisionBox:We.iconCollisionBox,Ft=We.collisionCircleArray;if(Ft.length>0){let Ht=a.H();a.aQ(Ht,We.placementInvProjMatrix,ue.transform.glCoordMatrix),a.aQ(Ht,Ht,We.placementViewportMatrix),ot.push({circleArray:Ft,circleOffset:Yt,transform:et.posMatrix,invTransform:Ht,coord:et}),Tt+=Ft.length/4,Yt=Tt}it&&Xe.draw(le,qe.LINES,Po.disabled,Yo.disabled,ue.colorModeForRenderPass(),Pa.disabled,{u_matrix:et.posMatrix,u_pixel_extrude_scale:[1/(Kt=ue.transform).width,1/Kt.height]},ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(et),B.id,it.layoutVertexBuffer,it.indexBuffer,it.segments,null,ue.transform.zoom,null,null,it.collisionVertexBuffer)}var Kt;if(!ee||!ot.length)return;let xr=ue.useProgram("collisionCircle"),Ir=new a.aR;Ir.resize(4*Tt),Ir._trim();let ve=0;for(let Be of ot)for(let et=0;et<Be.circleArray.length/4;et++){let We=4*et,it=Be.circleArray[We+0],Ft=Be.circleArray[We+1],Ht=Be.circleArray[We+2],tr=Be.circleArray[We+3];Ir.emplace(ve++,it,Ft,Ht,tr,0),Ir.emplace(ve++,it,Ft,Ht,tr,1),Ir.emplace(ve++,it,Ft,Ht,tr,2),Ir.emplace(ve++,it,Ft,Ht,tr,3)}(!af||af.length<2*Tt)&&(af=function(Be){let et=2*Be,We=new a.aT;We.resize(et),We._trim();for(let it=0;it<et;it++){let Ft=6*it;We.uint16[Ft+0]=4*it+0,We.uint16[Ft+1]=4*it+1,We.uint16[Ft+2]=4*it+2,We.uint16[Ft+3]=4*it+2,We.uint16[Ft+4]=4*it+3,We.uint16[Ft+5]=4*it+0}return We}(Tt));let be=le.createIndexBuffer(af,!0),De=le.createVertexBuffer(Ir,a.aS.members,!0);for(let Be of ot){let et=No(Be.transform,Be.invTransform,ue.transform);xr.draw(le,qe.TRIANGLES,Po.disabled,Yo.disabled,ue.colorModeForRenderPass(),Pa.disabled,et,ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(Be.coord),B.id,De,be,a.a0.simpleSegment(0,2*Be.circleOffset,Be.circleArray.length,Be.circleArray.length/2),null,ue.transform.zoom,null,null,null)}De.destroy(),be.destroy()}Pa.disabled=new Pa(!1,1029,2305),Pa.backCCW=new Pa(!0,1029,2305);let bl=a.an(new Float32Array(16));function Gf(ue,w,B,Q,ee,le){let{horizontalAlign:qe,verticalAlign:Xe}=a.au(ue);return new a.P((-(qe-.5)*w/ee+Q[0])*le,(-(Xe-.5)*B/ee+Q[1])*le)}function Ic(ue,w,B,Q,ee,le){let qe=w.tileAnchorPoint.add(new a.P(w.translation[0],w.translation[1]));if(w.pitchWithMap){let Xe=Q.mult(le);B||(Xe=Xe.rotate(-ee));let ot=qe.add(Xe);return dt(ot.x,ot.y,w.labelPlaneMatrix,w.getElevation).point}if(B){let Xe=vt(w.tileAnchorPoint.x+1,w.tileAnchorPoint.y,w).point.sub(ue),ot=Math.atan(Xe.y/Xe.x)+(Xe.x<0?Math.PI:0);return ue.add(Q.rotate(ot))}return ue.add(Q)}function yf(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt,xr,Ir){let ve=ue.text.placedSymbolArray,be=ue.text.dynamicLayoutVertexArray,De=ue.icon.dynamicLayoutVertexArray,Be={};be.clear();for(let et=0;et<ve.length;et++){let We=ve.get(et),it=We.hidden||!We.crossTileID||ue.allowVerticalPlacement&&!We.placedOrientation?null:Q[We.crossTileID];if(it){let Ft=new a.P(We.anchorX,We.anchorY),Ht={getElevation:Ir,width:ee.width,height:ee.height,labelPlaneMatrix:le,lineVertexArray:null,pitchWithMap:B,projection:Yt,projectionCache:null,tileAnchorPoint:Ft,translation:Kt,unwrappedTileID:xr},tr=B?dt(Ft.x,Ft.y,qe,Ir):vt(Ft.x,Ft.y,Ht),dr=Ge(ee.cameraToCenterDistance,tr.signedDistanceFromCamera),Sr=a.ai(ue.textSizeData,ot,We)*dr/a.ap;B&&(Sr*=ue.tilePixelRatio/Xe);let{width:Or,height:Wr,anchor:ni,textOffset:Pi,textBoxScale:cn}=it,ln=Gf(ni,Or,Wr,Pi,cn,Sr),Cn=Yt.getPitchedTextCorrection(ee,Ft.add(new a.P(Kt[0],Kt[1])),xr),Kn=Ic(tr.point,Ht,w,ln,ee.angle,Cn),Ta=ue.allowVerticalPlacement&&We.placedOrientation===a.ah.vertical?Math.PI/2:0;for(let fa=0;fa<We.numGlyphs;fa++)a.aj(be,Kn,Ta);Tt&&We.associatedIconIndex>=0&&(Be[We.associatedIconIndex]={shiftedAnchor:Kn,angle:Ta})}else pi(We.numGlyphs,be)}if(Tt){De.clear();let et=ue.icon.placedSymbolArray;for(let We=0;We<et.length;We++){let it=et.get(We);if(it.hidden)pi(it.numGlyphs,De);else{let Ft=Be[We];if(Ft)for(let Ht=0;Ht<it.numGlyphs;Ht++)a.aj(De,Ft.shiftedAnchor,Ft.angle);else pi(it.numGlyphs,De)}}ue.icon.dynamicLayoutVertexBuffer.updateData(De)}ue.text.dynamicLayoutVertexBuffer.updateData(be)}function Bl(ue,w,B){return B.iconsInText&&w?"symbolTextAndIcon":ue?"symbolSDF":"symbolIcon"}function wh(ue,w,B,Q,ee,le,qe,Xe,ot,Tt,Yt,Kt){let xr=ue.context,Ir=xr.gl,ve=ue.transform,be=yn(),De=Xe==="map",Be=ot==="map",et=Xe!=="viewport"&&B.layout.get("symbol-placement")!=="point",We=De&&!Be&&!et,it=!Be&&et,Ft=!B.layout.get("symbol-sort-key").isConstant(),Ht=!1,tr=ue.depthModeForSublayer(0,Po.ReadOnly),dr=B._unevaluatedLayout.hasValue("text-variable-anchor")||B._unevaluatedLayout.hasValue("text-variable-anchor-offset"),Sr=[],Or=be.getCircleRadiusCorrection(ve);for(let Wr of Q){let ni=w.getTile(Wr),Pi=ni.getBucket(B);if(!Pi)continue;let cn=ee?Pi.text:Pi.icon;if(!cn||!cn.segments.get().length||!cn.hasVisibleVertices)continue;let ln=cn.programConfigurations.get(B.id),Cn=ee||Pi.sdfIcons,Kn=ee?Pi.textSizeData:Pi.iconSizeData,Ta=Be||ve.pitch!==0,fa=ue.useProgram(Bl(Cn,ee,Pi),ln),$a=a.ag(Kn,ve.zoom),Co=ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(Wr),Qa,mo,Bo,Ps,Ts=[0,0],wo=null;if(ee)mo=ni.glyphAtlasTexture,Bo=Ir.LINEAR,Qa=ni.glyphAtlasTexture.size,Pi.iconsInText&&(Ts=ni.imageAtlasTexture.size,wo=ni.imageAtlasTexture,Ps=Ta||ue.options.rotating||ue.options.zooming||Kn.kind==="composite"||Kn.kind==="camera"?Ir.LINEAR:Ir.NEAREST);else{let Ye=B.layout.get("icon-size").constantOr(0)!==1||Pi.iconsNeedLinear;mo=ni.imageAtlasTexture,Bo=Cn||ue.options.rotating||ue.options.zooming||Ye||Ta?Ir.LINEAR:Ir.NEAREST,Qa=ni.imageAtlasTexture.size}let To=In(ni,1,ue.transform.zoom),hl=it?Wr.posMatrix:bl,Ul=Br(hl,Be,De,ue.transform,To),Lu=Vr(hl,Be,De,ue.transform,To),au=Vr(Wr.posMatrix,Be,De,ue.transform,To),Js=be.translatePosition(ue.transform,ni,le,qe),Ql=dr&&Pi.hasTextData(),dc=B.layout.get("icon-text-fit")!=="none"&&Ql&&Pi.hasIconData();if(et){let Ye=ue.style.map.terrain?(nt,jt)=>ue.style.map.terrain.getElevation(Wr,nt,jt):null,kt=B.layout.get("text-rotation-alignment")==="map";je(Pi,Wr.posMatrix,ue,ee,Ul,au,Be,Tt,kt,be,Wr.toUnwrapped(),ve.width,ve.height,Js,Ye)}let Tl=Wr.posMatrix,Al=ee&&dr||dc,X=et||Al?bl:Ul,se=Lu,Te=Cn&&B.paint.get(ee?"text-halo-width":"icon-halo-width").constantOr(1)!==0,Ne;Ne=Cn?Pi.iconsInText?ls(Kn.kind,$a,We,Be,et,Al,ue,Tl,X,se,Js,Qa,Ts,Or):Io(Kn.kind,$a,We,Be,et,Al,ue,Tl,X,se,Js,ee,Qa,!0,Or):zs(Kn.kind,$a,We,Be,et,Al,ue,Tl,X,se,Js,ee,Qa,Or);let He={program:fa,buffers:cn,uniformValues:Ne,atlasTexture:mo,atlasTextureIcon:wo,atlasInterpolation:Bo,atlasInterpolationIcon:Ps,isSDF:Cn,hasHalo:Te};if(Ft&&Pi.canOverlap){Ht=!0;let Ye=cn.segments.get();for(let kt of Ye)Sr.push({segments:new a.a0([kt]),sortKey:kt.sortKey,state:He,terrainData:Co})}else Sr.push({segments:cn.segments,sortKey:0,state:He,terrainData:Co})}Ht&&Sr.sort((Wr,ni)=>Wr.sortKey-ni.sortKey);for(let Wr of Sr){let ni=Wr.state;if(xr.activeTexture.set(Ir.TEXTURE0),ni.atlasTexture.bind(ni.atlasInterpolation,Ir.CLAMP_TO_EDGE),ni.atlasTextureIcon&&(xr.activeTexture.set(Ir.TEXTURE1),ni.atlasTextureIcon&&ni.atlasTextureIcon.bind(ni.atlasInterpolationIcon,Ir.CLAMP_TO_EDGE)),ni.isSDF){let Pi=ni.uniformValues;ni.hasHalo&&(Pi.u_is_halo=1,Qf(ni.buffers,Wr.segments,B,ue,ni.program,tr,Yt,Kt,Pi,Wr.terrainData)),Pi.u_is_halo=0}Qf(ni.buffers,Wr.segments,B,ue,ni.program,tr,Yt,Kt,ni.uniformValues,Wr.terrainData)}}function Qf(ue,w,B,Q,ee,le,qe,Xe,ot,Tt){let Yt=Q.context;ee.draw(Yt,Yt.gl.TRIANGLES,le,qe,Xe,Pa.disabled,ot,Tt,B.id,ue.layoutVertexBuffer,ue.indexBuffer,w,B.paint,Q.transform.zoom,ue.programConfigurations.get(B.id),ue.dynamicLayoutVertexBuffer,ue.opacityVertexBuffer)}function _f(ue,w,B,Q){let ee=ue.context,le=ee.gl,qe=Yo.disabled,Xe=new qs([le.ONE,le.ONE],a.aM.transparent,[!0,!0,!0,!0]),ot=w.getBucket(B);if(!ot)return;let Tt=Q.key,Yt=B.heatmapFbos.get(Tt);Yt||(Yt=eh(ee,w.tileSize,w.tileSize),B.heatmapFbos.set(Tt,Yt)),ee.bindFramebuffer.set(Yt.framebuffer),ee.viewport.set([0,0,w.tileSize,w.tileSize]),ee.clear({color:a.aM.transparent});let Kt=ot.programConfigurations.get(B.id),xr=ue.useProgram("heatmap",Kt),Ir=ue.style.map.terrain.getTerrainData(Q);xr.draw(ee,le.TRIANGLES,Po.disabled,qe,Xe,Pa.disabled,ko(Q.posMatrix,w,ue.transform.zoom,B.paint.get("heatmap-intensity")),Ir,B.id,ot.layoutVertexBuffer,ot.indexBuffer,ot.segments,B.paint,ue.transform.zoom,Kt)}function Yc(ue,w,B){let Q=ue.context,ee=Q.gl;Q.setColorMode(ue.colorModeForRenderPass());let le=th(Q,w),qe=B.key,Xe=w.heatmapFbos.get(qe);Xe&&(Q.activeTexture.set(ee.TEXTURE0),ee.bindTexture(ee.TEXTURE_2D,Xe.colorAttachment.get()),Q.activeTexture.set(ee.TEXTURE1),le.bind(ee.LINEAR,ee.CLAMP_TO_EDGE),ue.useProgram("heatmapTexture").draw(Q,ee.TRIANGLES,Po.disabled,Yo.disabled,ue.colorModeForRenderPass(),Pa.disabled,Ds(ue,w,0,1),null,w.id,ue.rasterBoundsBuffer,ue.quadTriangleIndexBuffer,ue.rasterBoundsSegments,w.paint,ue.transform.zoom),Xe.destroy(),w.heatmapFbos.delete(qe))}function eh(ue,w,B){var Q,ee;let le=ue.gl,qe=le.createTexture();le.bindTexture(le.TEXTURE_2D,qe),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_WRAP_S,le.CLAMP_TO_EDGE),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_WRAP_T,le.CLAMP_TO_EDGE),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_MIN_FILTER,le.LINEAR),le.texParameteri(le.TEXTURE_2D,le.TEXTURE_MAG_FILTER,le.LINEAR);let Xe=(Q=ue.HALF_FLOAT)!==null&&Q!==void 0?Q:le.UNSIGNED_BYTE,ot=(ee=ue.RGBA16F)!==null&&ee!==void 0?ee:le.RGBA;le.texImage2D(le.TEXTURE_2D,0,ot,w,B,0,le.RGBA,Xe,null);let Tt=ue.createFramebuffer(w,B,!1,!1);return Tt.colorAttachment.set(qe),Tt}function th(ue,w){return w.colorRampTexture||(w.colorRampTexture=new g(ue,w.colorRamp,ue.gl.RGBA)),w.colorRampTexture}function ju(ue,w,B,Q,ee){if(!B||!Q||!Q.imageAtlas)return;let le=Q.imageAtlas.patternPositions,qe=le[B.to.toString()],Xe=le[B.from.toString()];if(!qe&&Xe&&(qe=Xe),!Xe&&qe&&(Xe=qe),!qe||!Xe){let ot=ee.getPaintProperty(w);qe=le[ot],Xe=le[ot]}qe&&Xe&&ue.setConstantPatternPositions(qe,Xe)}function Hf(ue,w,B,Q,ee,le,qe){let Xe=ue.context.gl,ot="fill-pattern",Tt=B.paint.get(ot),Yt=Tt&&Tt.constantOr(1),Kt=B.getCrossfadeParameters(),xr,Ir,ve,be,De;qe?(Ir=Yt&&!B.getPaintProperty("fill-outline-color")?"fillOutlinePattern":"fillOutline",xr=Xe.LINES):(Ir=Yt?"fillPattern":"fill",xr=Xe.TRIANGLES);let Be=Tt.constantOr(null);for(let et of Q){let We=w.getTile(et);if(Yt&&!We.patternsLoaded())continue;let it=We.getBucket(B);if(!it)continue;let Ft=it.programConfigurations.get(B.id),Ht=ue.useProgram(Ir,Ft),tr=ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(et);Yt&&(ue.context.activeTexture.set(Xe.TEXTURE0),We.imageAtlasTexture.bind(Xe.LINEAR,Xe.CLAMP_TO_EDGE),Ft.updatePaintBuffers(Kt)),ju(Ft,ot,Be,We,B);let dr=tr?et:null,Sr=ue.translatePosMatrix(dr?dr.posMatrix:et.posMatrix,We,B.paint.get("fill-translate"),B.paint.get("fill-translate-anchor"));if(qe){be=it.indexBuffer2,De=it.segments2;let Or=[Xe.drawingBufferWidth,Xe.drawingBufferHeight];ve=Ir==="fillOutlinePattern"&&Yt?Ma(Sr,ue,Kt,We,Or):Jn(Sr,Or)}else be=it.indexBuffer,De=it.segments,ve=Yt?ya(Sr,ue,Kt,We):_n(Sr);Ht.draw(ue.context,xr,ee,ue.stencilModeForClipping(et),le,Pa.disabled,ve,tr,B.id,it.layoutVertexBuffer,be,De,B.paint,ue.transform.zoom,Ft)}}function cc(ue,w,B,Q,ee,le,qe){let Xe=ue.context,ot=Xe.gl,Tt="fill-extrusion-pattern",Yt=B.paint.get(Tt),Kt=Yt.constantOr(1),xr=B.getCrossfadeParameters(),Ir=B.paint.get("fill-extrusion-opacity"),ve=Yt.constantOr(null);for(let be of Q){let De=w.getTile(be),Be=De.getBucket(B);if(!Be)continue;let et=ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(be),We=Be.programConfigurations.get(B.id),it=ue.useProgram(Kt?"fillExtrusionPattern":"fillExtrusion",We);Kt&&(ue.context.activeTexture.set(ot.TEXTURE0),De.imageAtlasTexture.bind(ot.LINEAR,ot.CLAMP_TO_EDGE),We.updatePaintBuffers(xr)),ju(We,Tt,ve,De,B);let Ft=ue.translatePosMatrix(be.posMatrix,De,B.paint.get("fill-extrusion-translate"),B.paint.get("fill-extrusion-translate-anchor")),Ht=B.paint.get("fill-extrusion-vertical-gradient"),tr=Kt?Li(Ft,ue,Ht,Ir,be,xr,De):Mn(Ft,ue,Ht,Ir);it.draw(Xe,Xe.gl.TRIANGLES,ee,le,qe,Pa.backCCW,tr,et,B.id,Be.layoutVertexBuffer,Be.indexBuffer,Be.segments,B.paint,ue.transform.zoom,We,ue.style.map.terrain&&Be.centroidVertexBuffer)}}function of(ue,w,B,Q,ee,le,qe){let Xe=ue.context,ot=Xe.gl,Tt=B.fbo;if(!Tt)return;let Yt=ue.useProgram("hillshade"),Kt=ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(w);Xe.activeTexture.set(ot.TEXTURE0),ot.bindTexture(ot.TEXTURE_2D,Tt.colorAttachment.get()),Yt.draw(Xe,ot.TRIANGLES,ee,le,qe,Pa.disabled,((xr,Ir,ve,be)=>{let De=ve.paint.get("hillshade-shadow-color"),Be=ve.paint.get("hillshade-highlight-color"),et=ve.paint.get("hillshade-accent-color"),We=ve.paint.get("hillshade-illumination-direction")*(Math.PI/180);ve.paint.get("hillshade-illumination-anchor")==="viewport"&&(We-=xr.transform.angle);let it=!xr.options.moving;return{u_matrix:be?be.posMatrix:xr.transform.calculatePosMatrix(Ir.tileID.toUnwrapped(),it),u_image:0,u_latrange:Fs(0,Ir.tileID),u_light:[ve.paint.get("hillshade-exaggeration"),We],u_shadow:De,u_highlight:Be,u_accent:et}})(ue,B,Q,Kt?w:null),Kt,Q.id,ue.rasterBoundsBuffer,ue.quadTriangleIndexBuffer,ue.rasterBoundsSegments)}function Nl(ue,w,B,Q,ee,le){let qe=ue.context,Xe=qe.gl,ot=w.dem;if(ot&&ot.data){let Tt=ot.dim,Yt=ot.stride,Kt=ot.getPixels();if(qe.activeTexture.set(Xe.TEXTURE1),qe.pixelStoreUnpackPremultiplyAlpha.set(!1),w.demTexture=w.demTexture||ue.getTileTexture(Yt),w.demTexture){let Ir=w.demTexture;Ir.update(Kt,{premultiply:!1}),Ir.bind(Xe.NEAREST,Xe.CLAMP_TO_EDGE)}else w.demTexture=new g(qe,Kt,Xe.RGBA,{premultiply:!1}),w.demTexture.bind(Xe.NEAREST,Xe.CLAMP_TO_EDGE);qe.activeTexture.set(Xe.TEXTURE0);let xr=w.fbo;if(!xr){let Ir=new g(qe,{width:Tt,height:Tt,data:null},Xe.RGBA);Ir.bind(Xe.LINEAR,Xe.CLAMP_TO_EDGE),xr=w.fbo=qe.createFramebuffer(Tt,Tt,!0,!1),xr.colorAttachment.set(Ir.texture)}qe.bindFramebuffer.set(xr.framebuffer),qe.viewport.set([0,0,Tt,Tt]),ue.useProgram("hillshadePrepare").draw(qe,Xe.TRIANGLES,Q,ee,le,Pa.disabled,((Ir,ve)=>{let be=ve.stride,De=a.H();return a.aP(De,0,a.X,-a.X,0,0,1),a.J(De,De,[0,-a.X,0]),{u_matrix:De,u_image:1,u_dimension:[be,be],u_zoom:Ir.overscaledZ,u_unpack:ve.getUnpackVector()}})(w.tileID,ot),null,B.id,ue.rasterBoundsBuffer,ue.quadTriangleIndexBuffer,ue.rasterBoundsSegments),w.needsHillshadePrepare=!1}}function Kc(ue,w,B,Q,ee,le){let qe=Q.paint.get("raster-fade-duration");if(!le&&qe>0){let Xe=u.now(),ot=(Xe-ue.timeAdded)/qe,Tt=w?(Xe-w.timeAdded)/qe:-1,Yt=B.getSource(),Kt=ee.coveringZoomLevel({tileSize:Yt.tileSize,roundZoom:Yt.roundZoom}),xr=!w||Math.abs(w.tileID.overscaledZ-Kt)>Math.abs(ue.tileID.overscaledZ-Kt),Ir=xr&&ue.refreshedUponExpiration?1:a.ac(xr?ot:1-Tt,0,1);return ue.refreshedUponExpiration&&ot>=1&&(ue.refreshedUponExpiration=!1),w?{opacity:1,mix:1-Ir}:{opacity:Ir,mix:0}}return{opacity:1,mix:0}}let Rc=new a.aM(1,0,0,1),gs=new a.aM(0,1,0,1),jf=new a.aM(0,0,1,1),Gh=new a.aM(1,0,1,1),rh=new a.aM(0,1,1,1);function sf(ue,w,B,Q){Mu(ue,0,w+B/2,ue.transform.width,B,Q)}function Th(ue,w,B,Q){Mu(ue,w-B/2,0,B,ue.transform.height,Q)}function Mu(ue,w,B,Q,ee,le){let qe=ue.context,Xe=qe.gl;Xe.enable(Xe.SCISSOR_TEST),Xe.scissor(w*ue.pixelRatio,B*ue.pixelRatio,Q*ue.pixelRatio,ee*ue.pixelRatio),qe.clear({color:le}),Xe.disable(Xe.SCISSOR_TEST)}function ih(ue,w,B){let Q=ue.context,ee=Q.gl,le=B.posMatrix,qe=ue.useProgram("debug"),Xe=Po.disabled,ot=Yo.disabled,Tt=ue.colorModeForRenderPass(),Yt="$debug",Kt=ue.style.map.terrain&&ue.style.map.terrain.getTerrainData(B);Q.activeTexture.set(ee.TEXTURE0);let xr=w.getTileByID(B.key).latestRawTileData,Ir=Math.floor((xr&&xr.byteLength||0)/1024),ve=w.getTile(B).tileSize,be=512/Math.min(ve,512)*(B.overscaledZ/ue.transform.zoom)*.5,De=B.canonical.toString();B.overscaledZ!==B.canonical.z&&(De+=` => ${B.overscaledZ}`),function(Be,et){Be.initDebugOverlayCanvas();let We=Be.debugOverlayCanvas,it=Be.context.gl,Ft=Be.debugOverlayCanvas.getContext("2d");Ft.clearRect(0,0,We.width,We.height),Ft.shadowColor="white",Ft.shadowBlur=2,Ft.lineWidth=1.5,Ft.strokeStyle="white",Ft.textBaseline="top",Ft.font="bold 36px Open Sans, sans-serif",Ft.fillText(et,5,5),Ft.strokeText(et,5,5),Be.debugOverlayTexture.update(We),Be.debugOverlayTexture.bind(it.LINEAR,it.CLAMP_TO_EDGE)}(ue,`${De} ${Ir}kB`),qe.draw(Q,ee.TRIANGLES,Xe,ot,qs.alphaBlended,Pa.disabled,po(le,a.aM.transparent,be),null,Yt,ue.debugBuffer,ue.quadTriangleIndexBuffer,ue.debugSegments),qe.draw(Q,ee.LINE_STRIP,Xe,ot,Tt,Pa.disabled,po(le,a.aM.red),Kt,Yt,ue.debugBuffer,ue.tileBorderIndexBuffer,ue.debugSegments)}function js(ue,w,B){let Q=ue.context,ee=Q.gl,le=ue.colorModeForRenderPass(),qe=new Po(ee.LEQUAL,Po.ReadWrite,ue.depthRangeFor3D),Xe=ue.useProgram("terrain"),ot=w.getTerrainMesh();Q.bindFramebuffer.set(null),Q.viewport.set([0,0,ue.width,ue.height]);for(let Tt of B){let Yt=ue.renderToTexture.getTexture(Tt),Kt=w.getTerrainData(Tt.tileID);Q.activeTexture.set(ee.TEXTURE0),ee.bindTexture(ee.TEXTURE_2D,Yt.texture);let xr=ue.transform.calculatePosMatrix(Tt.tileID.toUnwrapped()),Ir=w.getMeshFrameDelta(ue.transform.zoom),ve=ue.transform.calculateFogMatrix(Tt.tileID.toUnwrapped()),be=Jr(xr,Ir,ve,ue.style.sky,ue.transform.pitch);Xe.draw(Q,ee.TRIANGLES,qe,Yo.disabled,le,Pa.backCCW,be,Kt,"terrain",ot.vertexBuffer,ot.indexBuffer,ot.segments)}}class Eu{constructor(w,B,Q){this.vertexBuffer=w,this.indexBuffer=B,this.segments=Q}destroy(){this.vertexBuffer.destroy(),this.indexBuffer.destroy(),this.segments.destroy(),this.vertexBuffer=null,this.indexBuffer=null,this.segments=null}}class Dc{constructor(w,B){this.context=new ad(w),this.transform=B,this._tileTextures={},this.terrainFacilitator={dirty:!0,matrix:a.an(new Float64Array(16)),renderTime:0},this.setup(),this.numSublayers=mt.maxUnderzooming+mt.maxOverzooming+1,this.depthEpsilon=1/Math.pow(2,16),this.crossTileSymbolIndex=new Wo}resize(w,B,Q){if(this.width=Math.floor(w*Q),this.height=Math.floor(B*Q),this.pixelRatio=Q,this.context.viewport.set([0,0,this.width,this.height]),this.style)for(let ee of this.style._order)this.style._layers[ee].resize()}setup(){let w=this.context,B=new a.aX;B.emplaceBack(0,0),B.emplaceBack(a.X,0),B.emplaceBack(0,a.X),B.emplaceBack(a.X,a.X),this.tileExtentBuffer=w.createVertexBuffer(B,vo.members),this.tileExtentSegments=a.a0.simpleSegment(0,0,4,2);let Q=new a.aX;Q.emplaceBack(0,0),Q.emplaceBack(a.X,0),Q.emplaceBack(0,a.X),Q.emplaceBack(a.X,a.X),this.debugBuffer=w.createVertexBuffer(Q,vo.members),this.debugSegments=a.a0.simpleSegment(0,0,4,5);let ee=new a.$;ee.emplaceBack(0,0,0,0),ee.emplaceBack(a.X,0,a.X,0),ee.emplaceBack(0,a.X,0,a.X),ee.emplaceBack(a.X,a.X,a.X,a.X),this.rasterBoundsBuffer=w.createVertexBuffer(ee,lt.members),this.rasterBoundsSegments=a.a0.simpleSegment(0,0,4,2);let le=new a.aX;le.emplaceBack(0,0),le.emplaceBack(1,0),le.emplaceBack(0,1),le.emplaceBack(1,1),this.viewportBuffer=w.createVertexBuffer(le,vo.members),this.viewportSegments=a.a0.simpleSegment(0,0,4,2);let qe=new a.aZ;qe.emplaceBack(0),qe.emplaceBack(1),qe.emplaceBack(3),qe.emplaceBack(2),qe.emplaceBack(0),this.tileBorderIndexBuffer=w.createIndexBuffer(qe);let Xe=new a.aY;Xe.emplaceBack(0,1,2),Xe.emplaceBack(2,1,3),this.quadTriangleIndexBuffer=w.createIndexBuffer(Xe);let ot=this.context.gl;this.stencilClearMode=new Yo({func:ot.ALWAYS,mask:0},0,255,ot.ZERO,ot.ZERO,ot.ZERO)}clearStencil(){let w=this.context,B=w.gl;this.nextStencilID=1,this.currentStencilSource=void 0;let Q=a.H();a.aP(Q,0,this.width,this.height,0,0,1),a.K(Q,Q,[B.drawingBufferWidth,B.drawingBufferHeight,0]),this.useProgram("clippingMask").draw(w,B.TRIANGLES,Po.disabled,this.stencilClearMode,qs.disabled,Pa.disabled,Lo(Q),null,"$clipping",this.viewportBuffer,this.quadTriangleIndexBuffer,this.viewportSegments)}_renderTileClippingMasks(w,B){if(this.currentStencilSource===w.source||!w.isTileClipped()||!B||!B.length)return;this.currentStencilSource=w.source;let Q=this.context,ee=Q.gl;this.nextStencilID+B.length>256&&this.clearStencil(),Q.setColorMode(qs.disabled),Q.setDepthMode(Po.disabled);let le=this.useProgram("clippingMask");this._tileClippingMaskIDs={};for(let qe of B){let Xe=this._tileClippingMaskIDs[qe.key]=this.nextStencilID++,ot=this.style.map.terrain&&this.style.map.terrain.getTerrainData(qe);le.draw(Q,ee.TRIANGLES,Po.disabled,new Yo({func:ee.ALWAYS,mask:0},Xe,255,ee.KEEP,ee.KEEP,ee.REPLACE),qs.disabled,Pa.disabled,Lo(qe.posMatrix),ot,"$clipping",this.tileExtentBuffer,this.quadTriangleIndexBuffer,this.tileExtentSegments)}}stencilModeFor3D(){this.currentStencilSource=void 0,this.nextStencilID+1>256&&this.clearStencil();let w=this.nextStencilID++,B=this.context.gl;return new Yo({func:B.NOTEQUAL,mask:255},w,255,B.KEEP,B.KEEP,B.REPLACE)}stencilModeForClipping(w){let B=this.context.gl;return new Yo({func:B.EQUAL,mask:255},this._tileClippingMaskIDs[w.key],0,B.KEEP,B.KEEP,B.REPLACE)}stencilConfigForOverlap(w){let B=this.context.gl,Q=w.sort((qe,Xe)=>Xe.overscaledZ-qe.overscaledZ),ee=Q[Q.length-1].overscaledZ,le=Q[0].overscaledZ-ee+1;if(le>1){this.currentStencilSource=void 0,this.nextStencilID+le>256&&this.clearStencil();let qe={};for(let Xe=0;Xe<le;Xe++)qe[Xe+ee]=new Yo({func:B.GEQUAL,mask:255},Xe+this.nextStencilID,255,B.KEEP,B.KEEP,B.REPLACE);return this.nextStencilID+=le,[qe,Q]}return[{[ee]:Yo.disabled},Q]}colorModeForRenderPass(){let w=this.context.gl;return this._showOverdrawInspector?new qs([w.CONSTANT_COLOR,w.ONE],new a.aM(.125,.125,.125,0),[!0,!0,!0,!0]):this.renderPass==="opaque"?qs.unblended:qs.alphaBlended}depthModeForSublayer(w,B,Q){if(!this.opaquePassEnabledForLayer())return Po.disabled;let ee=1-((1+this.currentLayer)*this.numSublayers+w)*this.depthEpsilon;return new Po(Q||this.context.gl.LEQUAL,B,[ee,ee])}opaquePassEnabledForLayer(){return this.currentLayer<this.opaquePassCutoff}render(w,B){this.style=w,this.options=B,this.lineAtlas=w.lineAtlas,this.imageManager=w.imageManager,this.glyphManager=w.glyphManager,this.symbolFadeChange=w.placement.symbolFadeChange(u.now()),this.imageManager.beginFrame();let Q=this.style._order,ee=this.style.sourceCaches,le={},qe={},Xe={};for(let ot in ee){let Tt=ee[ot];Tt.used&&Tt.prepare(this.context),le[ot]=Tt.getVisibleCoordinates(),qe[ot]=le[ot].slice().reverse(),Xe[ot]=Tt.getVisibleCoordinates(!0).reverse()}this.opaquePassCutoff=1/0;for(let ot=0;ot<Q.length;ot++)if(this.style._layers[Q[ot]].is3D()){this.opaquePassCutoff=ot;break}this.maybeDrawDepthAndCoords(!1),this.renderToTexture&&(this.renderToTexture.prepareForRender(this.style,this.transform.zoom),this.opaquePassCutoff=0),this.renderPass="offscreen";for(let ot of Q){let Tt=this.style._layers[ot];if(!Tt.hasOffscreenPass()||Tt.isHidden(this.transform.zoom))continue;let Yt=qe[Tt.source];(Tt.type==="custom"||Yt.length)&&this.renderLayer(this,ee[Tt.source],Tt,Yt)}if(this.context.bindFramebuffer.set(null),this.context.clear({color:B.showOverdrawInspector?a.aM.black:a.aM.transparent,depth:1}),this.clearStencil(),this.style.sky&&function(ot,Tt){let Yt=ot.context,Kt=Yt.gl,xr=((Be,et,We)=>({u_sky_color:Be.properties.get("sky-color"),u_horizon_color:Be.properties.get("horizon-color"),u_horizon:(et.height/2+et.getHorizon())*We,u_sky_horizon_blend:Be.properties.get("sky-horizon-blend")*et.height/2*We}))(Tt,ot.style.map.transform,ot.pixelRatio),Ir=new Po(Kt.LEQUAL,Po.ReadWrite,[0,1]),ve=Yo.disabled,be=ot.colorModeForRenderPass(),De=ot.useProgram("sky");if(!Tt.mesh){let Be=new a.aX;Be.emplaceBack(-1,-1),Be.emplaceBack(1,-1),Be.emplaceBack(1,1),Be.emplaceBack(-1,1);let et=new a.aY;et.emplaceBack(0,1,2),et.emplaceBack(0,2,3),Tt.mesh=new Eu(Yt.createVertexBuffer(Be,vo.members),Yt.createIndexBuffer(et),a.a0.simpleSegment(0,0,Be.length,et.length))}De.draw(Yt,Kt.TRIANGLES,Ir,ve,be,Pa.disabled,xr,void 0,"sky",Tt.mesh.vertexBuffer,Tt.mesh.indexBuffer,Tt.mesh.segments)}(this,this.style.sky),this._showOverdrawInspector=B.showOverdrawInspector,this.depthRangeFor3D=[0,1-(w._order.length+2)*this.numSublayers*this.depthEpsilon],!this.renderToTexture)for(this.renderPass="opaque",this.currentLayer=Q.length-1;this.currentLayer>=0;this.currentLayer--){let ot=this.style._layers[Q[this.currentLayer]],Tt=ee[ot.source],Yt=le[ot.source];this._renderTileClippingMasks(ot,Yt),this.renderLayer(this,Tt,ot,Yt)}for(this.renderPass="translucent",this.currentLayer=0;this.currentLayer<Q.length;this.currentLayer++){let ot=this.style._layers[Q[this.currentLayer]],Tt=ee[ot.source];if(this.renderToTexture&&this.renderToTexture.renderLayer(ot))continue;let Yt=(ot.type==="symbol"?Xe:qe)[ot.source];this._renderTileClippingMasks(ot,le[ot.source]),this.renderLayer(this,Tt,ot,Yt)}if(this.options.showTileBoundaries){let ot=function(Tt,Yt){let Kt=null,xr=Object.values(Tt._layers).flatMap(De=>De.source&&!De.isHidden(Yt)?[Tt.sourceCaches[De.source]]:[]),Ir=xr.filter(De=>De.getSource().type==="vector"),ve=xr.filter(De=>De.getSource().type!=="vector"),be=De=>{(!Kt||Kt.getSource().maxzoom<De.getSource().maxzoom)&&(Kt=De)};return Ir.forEach(De=>be(De)),Kt||ve.forEach(De=>be(De)),Kt}(this.style,this.transform.zoom);ot&&function(Tt,Yt,Kt){for(let xr=0;xr<Kt.length;xr++)ih(Tt,Yt,Kt[xr])}(this,ot,ot.getVisibleCoordinates())}this.options.showPadding&&function(ot){let Tt=ot.transform.padding;sf(ot,ot.transform.height-(Tt.top||0),3,Rc),sf(ot,Tt.bottom||0,3,gs),Th(ot,Tt.left||0,3,jf),Th(ot,ot.transform.width-(Tt.right||0),3,Gh);let Yt=ot.transform.centerPoint;(function(Kt,xr,Ir,ve){Mu(Kt,xr-1,Ir-10,2,20,ve),Mu(Kt,xr-10,Ir-1,20,2,ve)})(ot,Yt.x,ot.transform.height-Yt.y,rh)}(this),this.context.setDefault()}maybeDrawDepthAndCoords(w){if(!this.style||!this.style.map||!this.style.map.terrain)return;let B=this.terrainFacilitator.matrix,Q=this.transform.modelViewProjectionMatrix,ee=this.terrainFacilitator.dirty;ee||(ee=w?!a.a_(B,Q):!a.a$(B,Q)),ee||(ee=this.style.map.terrain.sourceCache.tilesAfterTime(this.terrainFacilitator.renderTime).length>0),ee&&(a.b0(B,Q),this.terrainFacilitator.renderTime=Date.now(),this.terrainFacilitator.dirty=!1,function(le,qe){let Xe=le.context,ot=Xe.gl,Tt=qs.unblended,Yt=new Po(ot.LEQUAL,Po.ReadWrite,[0,1]),Kt=qe.getTerrainMesh(),xr=qe.sourceCache.getRenderableTiles(),Ir=le.useProgram("terrainDepth");Xe.bindFramebuffer.set(qe.getFramebuffer("depth").framebuffer),Xe.viewport.set([0,0,le.width/devicePixelRatio,le.height/devicePixelRatio]),Xe.clear({color:a.aM.transparent,depth:1});for(let ve of xr){let be=qe.getTerrainData(ve.tileID),De={u_matrix:le.transform.calculatePosMatrix(ve.tileID.toUnwrapped()),u_ele_delta:qe.getMeshFrameDelta(le.transform.zoom)};Ir.draw(Xe,ot.TRIANGLES,Yt,Yo.disabled,Tt,Pa.backCCW,De,be,"terrain",Kt.vertexBuffer,Kt.indexBuffer,Kt.segments)}Xe.bindFramebuffer.set(null),Xe.viewport.set([0,0,le.width,le.height])}(this,this.style.map.terrain),function(le,qe){let Xe=le.context,ot=Xe.gl,Tt=qs.unblended,Yt=new Po(ot.LEQUAL,Po.ReadWrite,[0,1]),Kt=qe.getTerrainMesh(),xr=qe.getCoordsTexture(),Ir=qe.sourceCache.getRenderableTiles(),ve=le.useProgram("terrainCoords");Xe.bindFramebuffer.set(qe.getFramebuffer("coords").framebuffer),Xe.viewport.set([0,0,le.width/devicePixelRatio,le.height/devicePixelRatio]),Xe.clear({color:a.aM.transparent,depth:1}),qe.coordsIndex=[];for(let be of Ir){let De=qe.getTerrainData(be.tileID);Xe.activeTexture.set(ot.TEXTURE0),ot.bindTexture(ot.TEXTURE_2D,xr.texture);let Be={u_matrix:le.transform.calculatePosMatrix(be.tileID.toUnwrapped()),u_terrain_coords_id:(255-qe.coordsIndex.length)/255,u_texture:0,u_ele_delta:qe.getMeshFrameDelta(le.transform.zoom)};ve.draw(Xe,ot.TRIANGLES,Yt,Yo.disabled,Tt,Pa.backCCW,Be,De,"terrain",Kt.vertexBuffer,Kt.indexBuffer,Kt.segments),qe.coordsIndex.push(be.tileID.key)}Xe.bindFramebuffer.set(null),Xe.viewport.set([0,0,le.width,le.height])}(this,this.style.map.terrain))}renderLayer(w,B,Q,ee){if(!Q.isHidden(this.transform.zoom)&&(Q.type==="background"||Q.type==="custom"||(ee||[]).length))switch(this.id=Q.id,Q.type){case"symbol":(function(le,qe,Xe,ot,Tt){if(le.renderPass!=="translucent")return;let Yt=Yo.disabled,Kt=le.colorModeForRenderPass();(Xe._unevaluatedLayout.hasValue("text-variable-anchor")||Xe._unevaluatedLayout.hasValue("text-variable-anchor-offset"))&&function(xr,Ir,ve,be,De,Be,et,We,it){let Ft=Ir.transform,Ht=yn(),tr=De==="map",dr=Be==="map";for(let Sr of xr){let Or=be.getTile(Sr),Wr=Or.getBucket(ve);if(!Wr||!Wr.text||!Wr.text.segments.get().length)continue;let ni=a.ag(Wr.textSizeData,Ft.zoom),Pi=In(Or,1,Ir.transform.zoom),cn=Br(Sr.posMatrix,dr,tr,Ir.transform,Pi),ln=ve.layout.get("icon-text-fit")!=="none"&&Wr.hasIconData();if(ni){let Cn=Math.pow(2,Ft.zoom-Or.tileID.overscaledZ),Kn=Ir.style.map.terrain?(fa,$a)=>Ir.style.map.terrain.getElevation(Sr,fa,$a):null,Ta=Ht.translatePosition(Ft,Or,et,We);yf(Wr,tr,dr,it,Ft,cn,Sr.posMatrix,Cn,ni,ln,Ht,Ta,Sr.toUnwrapped(),Kn)}}}(ot,le,Xe,qe,Xe.layout.get("text-rotation-alignment"),Xe.layout.get("text-pitch-alignment"),Xe.paint.get("text-translate"),Xe.paint.get("text-translate-anchor"),Tt),Xe.paint.get("icon-opacity").constantOr(1)!==0&&wh(le,qe,Xe,ot,!1,Xe.paint.get("icon-translate"),Xe.paint.get("icon-translate-anchor"),Xe.layout.get("icon-rotation-alignment"),Xe.layout.get("icon-pitch-alignment"),Xe.layout.get("icon-keep-upright"),Yt,Kt),Xe.paint.get("text-opacity").constantOr(1)!==0&&wh(le,qe,Xe,ot,!0,Xe.paint.get("text-translate"),Xe.paint.get("text-translate-anchor"),Xe.layout.get("text-rotation-alignment"),Xe.layout.get("text-pitch-alignment"),Xe.layout.get("text-keep-upright"),Yt,Kt),qe.map.showCollisionBoxes&&(Hu(le,qe,Xe,ot,!0),Hu(le,qe,Xe,ot,!1))})(w,B,Q,ee,this.style.placement.variableOffsets);break;case"circle":(function(le,qe,Xe,ot){if(le.renderPass!=="translucent")return;let Tt=Xe.paint.get("circle-opacity"),Yt=Xe.paint.get("circle-stroke-width"),Kt=Xe.paint.get("circle-stroke-opacity"),xr=!Xe.layout.get("circle-sort-key").isConstant();if(Tt.constantOr(1)===0&&(Yt.constantOr(1)===0||Kt.constantOr(1)===0))return;let Ir=le.context,ve=Ir.gl,be=le.depthModeForSublayer(0,Po.ReadOnly),De=Yo.disabled,Be=le.colorModeForRenderPass(),et=[];for(let We=0;We<ot.length;We++){let it=ot[We],Ft=qe.getTile(it),Ht=Ft.getBucket(Xe);if(!Ht)continue;let tr=Ht.programConfigurations.get(Xe.id),dr=le.useProgram("circle",tr),Sr=Ht.layoutVertexBuffer,Or=Ht.indexBuffer,Wr=le.style.map.terrain&&le.style.map.terrain.getTerrainData(it),ni={programConfiguration:tr,program:dr,layoutVertexBuffer:Sr,indexBuffer:Or,uniformValues:_o(le,it,Ft,Xe),terrainData:Wr};if(xr){let Pi=Ht.segments.get();for(let cn of Pi)et.push({segments:new a.a0([cn]),sortKey:cn.sortKey,state:ni})}else et.push({segments:Ht.segments,sortKey:0,state:ni})}xr&&et.sort((We,it)=>We.sortKey-it.sortKey);for(let We of et){let{programConfiguration:it,program:Ft,layoutVertexBuffer:Ht,indexBuffer:tr,uniformValues:dr,terrainData:Sr}=We.state;Ft.draw(Ir,ve.TRIANGLES,be,De,Be,Pa.disabled,dr,Sr,Xe.id,Ht,tr,We.segments,Xe.paint,le.transform.zoom,it)}})(w,B,Q,ee);break;case"heatmap":(function(le,qe,Xe,ot){if(Xe.paint.get("heatmap-opacity")===0)return;let Tt=le.context;if(le.style.map.terrain){for(let Yt of ot){let Kt=qe.getTile(Yt);qe.hasRenderableParent(Yt)||(le.renderPass==="offscreen"?_f(le,Kt,Xe,Yt):le.renderPass==="translucent"&&Yc(le,Xe,Yt))}Tt.viewport.set([0,0,le.width,le.height])}else le.renderPass==="offscreen"?function(Yt,Kt,xr,Ir){let ve=Yt.context,be=ve.gl,De=Yo.disabled,Be=new qs([be.ONE,be.ONE],a.aM.transparent,[!0,!0,!0,!0]);(function(et,We,it){let Ft=et.gl;et.activeTexture.set(Ft.TEXTURE1),et.viewport.set([0,0,We.width/4,We.height/4]);let Ht=it.heatmapFbos.get(a.aU);Ht?(Ft.bindTexture(Ft.TEXTURE_2D,Ht.colorAttachment.get()),et.bindFramebuffer.set(Ht.framebuffer)):(Ht=eh(et,We.width/4,We.height/4),it.heatmapFbos.set(a.aU,Ht))})(ve,Yt,xr),ve.clear({color:a.aM.transparent});for(let et=0;et<Ir.length;et++){let We=Ir[et];if(Kt.hasRenderableParent(We))continue;let it=Kt.getTile(We),Ft=it.getBucket(xr);if(!Ft)continue;let Ht=Ft.programConfigurations.get(xr.id),tr=Yt.useProgram("heatmap",Ht),{zoom:dr}=Yt.transform;tr.draw(ve,be.TRIANGLES,Po.disabled,De,Be,Pa.disabled,ko(We.posMatrix,it,dr,xr.paint.get("heatmap-intensity")),null,xr.id,Ft.layoutVertexBuffer,Ft.indexBuffer,Ft.segments,xr.paint,Yt.transform.zoom,Ht)}ve.viewport.set([0,0,Yt.width,Yt.height])}(le,qe,Xe,ot):le.renderPass==="translucent"&&function(Yt,Kt){let xr=Yt.context,Ir=xr.gl;xr.setColorMode(Yt.colorModeForRenderPass());let ve=Kt.heatmapFbos.get(a.aU);ve&&(xr.activeTexture.set(Ir.TEXTURE0),Ir.bindTexture(Ir.TEXTURE_2D,ve.colorAttachment.get()),xr.activeTexture.set(Ir.TEXTURE1),th(xr,Kt).bind(Ir.LINEAR,Ir.CLAMP_TO_EDGE),Yt.useProgram("heatmapTexture").draw(xr,Ir.TRIANGLES,Po.disabled,Yo.disabled,Yt.colorModeForRenderPass(),Pa.disabled,Ds(Yt,Kt,0,1),null,Kt.id,Yt.viewportBuffer,Yt.quadTriangleIndexBuffer,Yt.viewportSegments,Kt.paint,Yt.transform.zoom))}(le,Xe)})(w,B,Q,ee);break;case"line":(function(le,qe,Xe,ot){if(le.renderPass!=="translucent")return;let Tt=Xe.paint.get("line-opacity"),Yt=Xe.paint.get("line-width");if(Tt.constantOr(1)===0||Yt.constantOr(1)===0)return;let Kt=le.depthModeForSublayer(0,Po.ReadOnly),xr=le.colorModeForRenderPass(),Ir=Xe.paint.get("line-dasharray"),ve=Xe.paint.get("line-pattern"),be=ve.constantOr(1),De=Xe.paint.get("line-gradient"),Be=Xe.getCrossfadeParameters(),et=be?"linePattern":Ir?"lineSDF":De?"lineGradient":"line",We=le.context,it=We.gl,Ft=!0;for(let Ht of ot){let tr=qe.getTile(Ht);if(be&&!tr.patternsLoaded())continue;let dr=tr.getBucket(Xe);if(!dr)continue;let Sr=dr.programConfigurations.get(Xe.id),Or=le.context.program.get(),Wr=le.useProgram(et,Sr),ni=Ft||Wr.program!==Or,Pi=le.style.map.terrain&&le.style.map.terrain.getTerrainData(Ht),cn=ve.constantOr(null);if(cn&&tr.imageAtlas){let Kn=tr.imageAtlas,Ta=Kn.patternPositions[cn.to.toString()],fa=Kn.patternPositions[cn.from.toString()];Ta&&fa&&Sr.setConstantPatternPositions(Ta,fa)}let ln=Pi?Ht:null,Cn=be?zl(le,tr,Xe,Be,ln):Ir?us(le,tr,Xe,Ir,Be,ln):De?ul(le,tr,Xe,dr.lineClipsArray.length,ln):ll(le,tr,Xe,ln);if(be)We.activeTexture.set(it.TEXTURE0),tr.imageAtlasTexture.bind(it.LINEAR,it.CLAMP_TO_EDGE),Sr.updatePaintBuffers(Be);else if(Ir&&(ni||le.lineAtlas.dirty))We.activeTexture.set(it.TEXTURE0),le.lineAtlas.bind(We);else if(De){let Kn=dr.gradients[Xe.id],Ta=Kn.texture;if(Xe.gradientVersion!==Kn.version){let fa=256;if(Xe.stepInterpolant){let $a=qe.getSource().maxzoom,Co=Ht.canonical.z===$a?Math.ceil(1<<le.transform.maxZoom-Ht.canonical.z):1;fa=a.ac(a.aV(dr.maxLineLength/a.X*1024*Co),256,We.maxTextureSize)}Kn.gradient=a.aW({expression:Xe.gradientExpression(),evaluationKey:"lineProgress",resolution:fa,image:Kn.gradient||void 0,clips:dr.lineClipsArray}),Kn.texture?Kn.texture.update(Kn.gradient):Kn.texture=new g(We,Kn.gradient,it.RGBA),Kn.version=Xe.gradientVersion,Ta=Kn.texture}We.activeTexture.set(it.TEXTURE0),Ta.bind(Xe.stepInterpolant?it.NEAREST:it.LINEAR,it.CLAMP_TO_EDGE)}Wr.draw(We,it.TRIANGLES,Kt,le.stencilModeForClipping(Ht),xr,Pa.disabled,Cn,Pi,Xe.id,dr.layoutVertexBuffer,dr.indexBuffer,dr.segments,Xe.paint,le.transform.zoom,Sr,dr.layoutVertexBuffer2),Ft=!1}})(w,B,Q,ee);break;case"fill":(function(le,qe,Xe,ot){let Tt=Xe.paint.get("fill-color"),Yt=Xe.paint.get("fill-opacity");if(Yt.constantOr(1)===0)return;let Kt=le.colorModeForRenderPass(),xr=Xe.paint.get("fill-pattern"),Ir=le.opaquePassEnabledForLayer()&&!xr.constantOr(1)&&Tt.constantOr(a.aM.transparent).a===1&&Yt.constantOr(0)===1?"opaque":"translucent";if(le.renderPass===Ir){let ve=le.depthModeForSublayer(1,le.renderPass==="opaque"?Po.ReadWrite:Po.ReadOnly);Hf(le,qe,Xe,ot,ve,Kt,!1)}if(le.renderPass==="translucent"&&Xe.paint.get("fill-antialias")){let ve=le.depthModeForSublayer(Xe.getPaintProperty("fill-outline-color")?2:0,Po.ReadOnly);Hf(le,qe,Xe,ot,ve,Kt,!0)}})(w,B,Q,ee);break;case"fill-extrusion":(function(le,qe,Xe,ot){let Tt=Xe.paint.get("fill-extrusion-opacity");if(Tt!==0&&le.renderPass==="translucent"){let Yt=new Po(le.context.gl.LEQUAL,Po.ReadWrite,le.depthRangeFor3D);if(Tt!==1||Xe.paint.get("fill-extrusion-pattern").constantOr(1))cc(le,qe,Xe,ot,Yt,Yo.disabled,qs.disabled),cc(le,qe,Xe,ot,Yt,le.stencilModeFor3D(),le.colorModeForRenderPass());else{let Kt=le.colorModeForRenderPass();cc(le,qe,Xe,ot,Yt,Yo.disabled,Kt)}}})(w,B,Q,ee);break;case"hillshade":(function(le,qe,Xe,ot){if(le.renderPass!=="offscreen"&&le.renderPass!=="translucent")return;let Tt=le.context,Yt=le.depthModeForSublayer(0,Po.ReadOnly),Kt=le.colorModeForRenderPass(),[xr,Ir]=le.renderPass==="translucent"?le.stencilConfigForOverlap(ot):[{},ot];for(let ve of Ir){let be=qe.getTile(ve);be.needsHillshadePrepare!==void 0&&be.needsHillshadePrepare&&le.renderPass==="offscreen"?Nl(le,be,Xe,Yt,Yo.disabled,Kt):le.renderPass==="translucent"&&of(le,ve,be,Xe,Yt,xr[ve.overscaledZ],Kt)}Tt.viewport.set([0,0,le.width,le.height])})(w,B,Q,ee);break;case"raster":(function(le,qe,Xe,ot){if(le.renderPass!=="translucent"||Xe.paint.get("raster-opacity")===0||!ot.length)return;let Tt=le.context,Yt=Tt.gl,Kt=qe.getSource(),xr=le.useProgram("raster"),Ir=le.colorModeForRenderPass(),[ve,be]=Kt instanceof Gt?[{},ot]:le.stencilConfigForOverlap(ot),De=be[be.length-1].overscaledZ,Be=!le.options.moving;for(let et of be){let We=le.depthModeForSublayer(et.overscaledZ-De,Xe.paint.get("raster-opacity")===1?Po.ReadWrite:Po.ReadOnly,Yt.LESS),it=qe.getTile(et);it.registerFadeDuration(Xe.paint.get("raster-fade-duration"));let Ft=qe.findLoadedParent(et,0),Ht=qe.findLoadedSibling(et),tr=Kc(it,Ft||Ht||null,qe,Xe,le.transform,le.style.map.terrain),dr,Sr,Or=Xe.paint.get("raster-resampling")==="nearest"?Yt.NEAREST:Yt.LINEAR;Tt.activeTexture.set(Yt.TEXTURE0),it.texture.bind(Or,Yt.CLAMP_TO_EDGE,Yt.LINEAR_MIPMAP_NEAREST),Tt.activeTexture.set(Yt.TEXTURE1),Ft?(Ft.texture.bind(Or,Yt.CLAMP_TO_EDGE,Yt.LINEAR_MIPMAP_NEAREST),dr=Math.pow(2,Ft.tileID.overscaledZ-it.tileID.overscaledZ),Sr=[it.tileID.canonical.x*dr%1,it.tileID.canonical.y*dr%1]):it.texture.bind(Or,Yt.CLAMP_TO_EDGE,Yt.LINEAR_MIPMAP_NEAREST),it.texture.useMipmap&&Tt.extTextureFilterAnisotropic&&le.transform.pitch>20&&Yt.texParameterf(Yt.TEXTURE_2D,Tt.extTextureFilterAnisotropic.TEXTURE_MAX_ANISOTROPY_EXT,Tt.extTextureFilterAnisotropicMax);let Wr=le.style.map.terrain&&le.style.map.terrain.getTerrainData(et),ni=Wr?et:null,Pi=ni?ni.posMatrix:le.transform.calculatePosMatrix(et.toUnwrapped(),Be),cn=cl(Pi,Sr||[0,0],dr||1,tr,Xe);Kt instanceof Gt?xr.draw(Tt,Yt.TRIANGLES,We,Yo.disabled,Ir,Pa.disabled,cn,Wr,Xe.id,Kt.boundsBuffer,le.quadTriangleIndexBuffer,Kt.boundsSegments):xr.draw(Tt,Yt.TRIANGLES,We,ve[et.overscaledZ],Ir,Pa.disabled,cn,Wr,Xe.id,le.rasterBoundsBuffer,le.quadTriangleIndexBuffer,le.rasterBoundsSegments)}})(w,B,Q,ee);break;case"background":(function(le,qe,Xe,ot){let Tt=Xe.paint.get("background-color"),Yt=Xe.paint.get("background-opacity");if(Yt===0)return;let Kt=le.context,xr=Kt.gl,Ir=le.transform,ve=Ir.tileSize,be=Xe.paint.get("background-pattern");if(le.isPatternMissing(be))return;let De=!be&&Tt.a===1&&Yt===1&&le.opaquePassEnabledForLayer()?"opaque":"translucent";if(le.renderPass!==De)return;let Be=Yo.disabled,et=le.depthModeForSublayer(0,De==="opaque"?Po.ReadWrite:Po.ReadOnly),We=le.colorModeForRenderPass(),it=le.useProgram(be?"backgroundPattern":"background"),Ft=ot||Ir.coveringTiles({tileSize:ve,terrain:le.style.map.terrain});be&&(Kt.activeTexture.set(xr.TEXTURE0),le.imageManager.bind(le.context));let Ht=Xe.getCrossfadeParameters();for(let tr of Ft){let dr=ot?tr.posMatrix:le.transform.calculatePosMatrix(tr.toUnwrapped()),Sr=be?Su(dr,Yt,le,be,{tileID:tr,tileSize:ve},Ht):Zl(dr,Yt,Tt),Or=le.style.map.terrain&&le.style.map.terrain.getTerrainData(tr);it.draw(Kt,xr.TRIANGLES,et,Be,We,Pa.disabled,Sr,Or,Xe.id,le.tileExtentBuffer,le.quadTriangleIndexBuffer,le.tileExtentSegments)}})(w,0,Q,ee);break;case"custom":(function(le,qe,Xe){let ot=le.context,Tt=Xe.implementation;if(le.renderPass==="offscreen"){let Yt=Tt.prerender;Yt&&(le.setCustomLayerDefaults(),ot.setColorMode(le.colorModeForRenderPass()),Yt.call(Tt,ot.gl,le.transform.customLayerMatrix()),ot.setDirty(),le.setBaseState())}else if(le.renderPass==="translucent"){le.setCustomLayerDefaults(),ot.setColorMode(le.colorModeForRenderPass()),ot.setStencilMode(Yo.disabled);let Yt=Tt.renderingMode==="3d"?new Po(le.context.gl.LEQUAL,Po.ReadWrite,le.depthRangeFor3D):le.depthModeForSublayer(0,Po.ReadOnly);ot.setDepthMode(Yt),Tt.render(ot.gl,le.transform.customLayerMatrix(),{farZ:le.transform.farZ,nearZ:le.transform.nearZ,fov:le.transform._fov,modelViewProjectionMatrix:le.transform.modelViewProjectionMatrix,projectionMatrix:le.transform.projectionMatrix}),ot.setDirty(),le.setBaseState(),ot.bindFramebuffer.set(null)}})(w,0,Q)}}translatePosMatrix(w,B,Q,ee,le){if(!Q[0]&&!Q[1])return w;let qe=le?ee==="map"?this.transform.angle:0:ee==="viewport"?-this.transform.angle:0;if(qe){let Tt=Math.sin(qe),Yt=Math.cos(qe);Q=[Q[0]*Yt-Q[1]*Tt,Q[0]*Tt+Q[1]*Yt]}let Xe=[le?Q[0]:In(B,Q[0],this.transform.zoom),le?Q[1]:In(B,Q[1],this.transform.zoom),0],ot=new Float32Array(16);return a.J(ot,w,Xe),ot}saveTileTexture(w){let B=this._tileTextures[w.size[0]];B?B.push(w):this._tileTextures[w.size[0]]=[w]}getTileTexture(w){let B=this._tileTextures[w];return B&&B.length>0?B.pop():null}isPatternMissing(w){if(!w)return!1;if(!w.from||!w.to)return!0;let B=this.imageManager.getPattern(w.from.toString()),Q=this.imageManager.getPattern(w.to.toString());return!B||!Q}useProgram(w,B){this.cache=this.cache||{};let Q=w+(B?B.cacheKey:"")+(this._showOverdrawInspector?"/overdraw":"")+(this.style.map.terrain?"/terrain":"");return this.cache[Q]||(this.cache[Q]=new hn(this.context,jn[w],B,nc[w],this._showOverdrawInspector,this.style.map.terrain)),this.cache[Q]}setCustomLayerDefaults(){this.context.unbindVAO(),this.context.cullFace.setDefault(),this.context.activeTexture.setDefault(),this.context.pixelStoreUnpack.setDefault(),this.context.pixelStoreUnpackPremultiplyAlpha.setDefault(),this.context.pixelStoreUnpackFlipY.setDefault()}setBaseState(){let w=this.context.gl;this.context.cullFace.set(!1),this.context.viewport.set([0,0,this.width,this.height]),this.context.blendEquation.set(w.FUNC_ADD)}initDebugOverlayCanvas(){this.debugOverlayCanvas==null&&(this.debugOverlayCanvas=document.createElement("canvas"),this.debugOverlayCanvas.width=512,this.debugOverlayCanvas.height=512,this.debugOverlayTexture=new g(this.context,this.debugOverlayCanvas,this.context.gl.RGBA))}destroy(){this.debugOverlayTexture&&this.debugOverlayTexture.destroy()}overLimit(){let{drawingBufferWidth:w,drawingBufferHeight:B}=this.context.gl;return this.width!==w||this.height!==B}}class ks{constructor(w,B){this.points=w,this.planes=B}static fromInvProjectionMatrix(w,B,Q){let ee=Math.pow(2,Q),le=[[-1,1,-1,1],[1,1,-1,1],[1,-1,-1,1],[-1,-1,-1,1],[-1,1,1,1],[1,1,1,1],[1,-1,1,1],[-1,-1,1,1]].map(Xe=>{let ot=1/(Xe=a.af([],Xe,w))[3]/B*ee;return a.b1(Xe,Xe,[ot,ot,1/Xe[3],ot])}),qe=[[0,1,2],[6,5,4],[0,3,7],[2,1,5],[3,2,6],[0,4,5]].map(Xe=>{let ot=function(xr,Ir){var ve=Ir[0],be=Ir[1],De=Ir[2],Be=ve*ve+be*be+De*De;return Be>0&&(Be=1/Math.sqrt(Be)),xr[0]=Ir[0]*Be,xr[1]=Ir[1]*Be,xr[2]=Ir[2]*Be,xr}([],function(xr,Ir,ve){var be=Ir[0],De=Ir[1],Be=Ir[2],et=ve[0],We=ve[1],it=ve[2];return xr[0]=De*it-Be*We,xr[1]=Be*et-be*it,xr[2]=be*We-De*et,xr}([],L([],le[Xe[0]],le[Xe[1]]),L([],le[Xe[2]],le[Xe[1]]))),Tt=-((Yt=ot)[0]*(Kt=le[Xe[1]])[0]+Yt[1]*Kt[1]+Yt[2]*Kt[2]);var Yt,Kt;return ot.concat(Tt)});return new ks(le,qe)}}class bc{constructor(w,B){this.min=w,this.max=B,this.center=function(Q,ee,le){return Q[0]=.5*ee[0],Q[1]=.5*ee[1],Q[2]=.5*ee[2],Q}([],function(Q,ee,le){return Q[0]=ee[0]+le[0],Q[1]=ee[1]+le[1],Q[2]=ee[2]+le[2],Q}([],this.min,this.max))}quadrant(w){let B=[w%2==0,w<2],Q=E(this.min),ee=E(this.max);for(let le=0;le<B.length;le++)Q[le]=B[le]?this.min[le]:this.center[le],ee[le]=B[le]?this.center[le]:this.max[le];return ee[2]=this.max[2],new bc(Q,ee)}distanceX(w){return Math.max(Math.min(this.max[0],w[0]),this.min[0])-w[0]}distanceY(w){return Math.max(Math.min(this.max[1],w[1]),this.min[1])-w[1]}intersects(w){let B=[[this.min[0],this.min[1],this.min[2],1],[this.max[0],this.min[1],this.min[2],1],[this.max[0],this.max[1],this.min[2],1],[this.min[0],this.max[1],this.min[2],1],[this.min[0],this.min[1],this.max[2],1],[this.max[0],this.min[1],this.max[2],1],[this.max[0],this.max[1],this.max[2],1],[this.min[0],this.max[1],this.max[2],1]],Q=!0;for(let ee=0;ee<w.planes.length;ee++){let le=w.planes[ee],qe=0;for(let Xe=0;Xe<B.length;Xe++)a.b2(le,B[Xe])>=0&&qe++;if(qe===0)return 0;qe!==B.length&&(Q=!1)}if(Q)return 2;for(let ee=0;ee<3;ee++){let le=Number.MAX_VALUE,qe=-Number.MAX_VALUE;for(let Xe=0;Xe<w.points.length;Xe++){let ot=w.points[Xe][ee]-this.min[ee];le=Math.min(le,ot),qe=Math.max(qe,ot)}if(qe<0||le>this.max[ee]-this.min[ee])return 0}return 1}}class hu{constructor(w=0,B=0,Q=0,ee=0){if(isNaN(w)||w<0||isNaN(B)||B<0||isNaN(Q)||Q<0||isNaN(ee)||ee<0)throw new Error("Invalid value for edge-insets, top, bottom, left and right must all be numbers");this.top=w,this.bottom=B,this.left=Q,this.right=ee}interpolate(w,B,Q){return B.top!=null&&w.top!=null&&(this.top=a.y.number(w.top,B.top,Q)),B.bottom!=null&&w.bottom!=null&&(this.bottom=a.y.number(w.bottom,B.bottom,Q)),B.left!=null&&w.left!=null&&(this.left=a.y.number(w.left,B.left,Q)),B.right!=null&&w.right!=null&&(this.right=a.y.number(w.right,B.right,Q)),this}getCenter(w,B){let Q=a.ac((this.left+w-this.right)/2,0,w),ee=a.ac((this.top+B-this.bottom)/2,0,B);return new a.P(Q,ee)}equals(w){return this.top===w.top&&this.bottom===w.bottom&&this.left===w.left&&this.right===w.right}clone(){return new hu(this.top,this.bottom,this.left,this.right)}toJSON(){return{top:this.top,bottom:this.bottom,left:this.left,right:this.right}}}let _u=85.051129;class nl{constructor(w,B,Q,ee,le){this.tileSize=512,this._renderWorldCopies=le===void 0||!!le,this._minZoom=w||0,this._maxZoom=B||22,this._minPitch=Q==null?0:Q,this._maxPitch=ee==null?60:ee,this.setMaxBounds(),this.width=0,this.height=0,this._center=new a.N(0,0),this._elevation=0,this.zoom=0,this.angle=0,this._fov=.6435011087932844,this._pitch=0,this._unmodified=!0,this._edgeInsets=new hu,this._posMatrixCache={},this._alignedPosMatrixCache={},this._fogMatrixCache={},this.minElevationForCurrentTile=0}clone(){let w=new nl(this._minZoom,this._maxZoom,this._minPitch,this.maxPitch,this._renderWorldCopies);return w.apply(this),w}apply(w){this.tileSize=w.tileSize,this.latRange=w.latRange,this.lngRange=w.lngRange,this.width=w.width,this.height=w.height,this._center=w._center,this._elevation=w._elevation,this.minElevationForCurrentTile=w.minElevationForCurrentTile,this.zoom=w.zoom,this.angle=w.angle,this._fov=w._fov,this._pitch=w._pitch,this._unmodified=w._unmodified,this._edgeInsets=w._edgeInsets.clone(),this._calcMatrices()}get minZoom(){return this._minZoom}set minZoom(w){this._minZoom!==w&&(this._minZoom=w,this.zoom=Math.max(this.zoom,w))}get maxZoom(){return this._maxZoom}set maxZoom(w){this._maxZoom!==w&&(this._maxZoom=w,this.zoom=Math.min(this.zoom,w))}get minPitch(){return this._minPitch}set minPitch(w){this._minPitch!==w&&(this._minPitch=w,this.pitch=Math.max(this.pitch,w))}get maxPitch(){return this._maxPitch}set maxPitch(w){this._maxPitch!==w&&(this._maxPitch=w,this.pitch=Math.min(this.pitch,w))}get renderWorldCopies(){return this._renderWorldCopies}set renderWorldCopies(w){w===void 0?w=!0:w===null&&(w=!1),this._renderWorldCopies=w}get worldSize(){return this.tileSize*this.scale}get centerOffset(){return this.centerPoint._sub(this.size._div(2))}get size(){return new a.P(this.width,this.height)}get bearing(){return-this.angle/Math.PI*180}set bearing(w){let B=-a.b3(w,-180,180)*Math.PI/180;this.angle!==B&&(this._unmodified=!1,this.angle=B,this._calcMatrices(),this.rotationMatrix=function(){var Q=new a.A(4);return a.A!=Float32Array&&(Q[1]=0,Q[2]=0),Q[0]=1,Q[3]=1,Q}(),function(Q,ee,le){var qe=ee[0],Xe=ee[1],ot=ee[2],Tt=ee[3],Yt=Math.sin(le),Kt=Math.cos(le);Q[0]=qe*Kt+ot*Yt,Q[1]=Xe*Kt+Tt*Yt,Q[2]=qe*-Yt+ot*Kt,Q[3]=Xe*-Yt+Tt*Kt}(this.rotationMatrix,this.rotationMatrix,this.angle))}get pitch(){return this._pitch/Math.PI*180}set pitch(w){let B=a.ac(w,this.minPitch,this.maxPitch)/180*Math.PI;this._pitch!==B&&(this._unmodified=!1,this._pitch=B,this._calcMatrices())}get fov(){return this._fov/Math.PI*180}set fov(w){w=Math.max(.01,Math.min(60,w)),this._fov!==w&&(this._unmodified=!1,this._fov=w/180*Math.PI,this._calcMatrices())}get zoom(){return this._zoom}set zoom(w){let B=Math.min(Math.max(w,this.minZoom),this.maxZoom);this._zoom!==B&&(this._unmodified=!1,this._zoom=B,this.tileZoom=Math.max(0,Math.floor(B)),this.scale=this.zoomScale(B),this._constrain(),this._calcMatrices())}get center(){return this._center}set center(w){w.lat===this._center.lat&&w.lng===this._center.lng||(this._unmodified=!1,this._center=w,this._constrain(),this._calcMatrices())}get elevation(){return this._elevation}set elevation(w){w!==this._elevation&&(this._elevation=w,this._constrain(),this._calcMatrices())}get padding(){return this._edgeInsets.toJSON()}set padding(w){this._edgeInsets.equals(w)||(this._unmodified=!1,this._edgeInsets.interpolate(this._edgeInsets,w,1),this._calcMatrices())}get centerPoint(){return this._edgeInsets.getCenter(this.width,this.height)}isPaddingEqual(w){return this._edgeInsets.equals(w)}interpolatePadding(w,B,Q){this._unmodified=!1,this._edgeInsets.interpolate(w,B,Q),this._constrain(),this._calcMatrices()}coveringZoomLevel(w){let B=(w.roundZoom?Math.round:Math.floor)(this.zoom+this.scaleZoom(this.tileSize/w.tileSize));return Math.max(0,B)}getVisibleUnwrappedCoordinates(w){let B=[new a.b4(0,w)];if(this._renderWorldCopies){let Q=this.pointCoordinate(new a.P(0,0)),ee=this.pointCoordinate(new a.P(this.width,0)),le=this.pointCoordinate(new a.P(this.width,this.height)),qe=this.pointCoordinate(new a.P(0,this.height)),Xe=Math.floor(Math.min(Q.x,ee.x,le.x,qe.x)),ot=Math.floor(Math.max(Q.x,ee.x,le.x,qe.x)),Tt=1;for(let Yt=Xe-Tt;Yt<=ot+Tt;Yt++)Yt!==0&&B.push(new a.b4(Yt,w))}return B}coveringTiles(w){var B,Q;let ee=this.coveringZoomLevel(w),le=ee;if(w.minzoom!==void 0&&ee<w.minzoom)return[];w.maxzoom!==void 0&&ee>w.maxzoom&&(ee=w.maxzoom);let qe=this.pointCoordinate(this.getCameraPoint()),Xe=a.Z.fromLngLat(this.center),ot=Math.pow(2,ee),Tt=[ot*qe.x,ot*qe.y,0],Yt=[ot*Xe.x,ot*Xe.y,0],Kt=ks.fromInvProjectionMatrix(this.invModelViewProjectionMatrix,this.worldSize,ee),xr=w.minzoom||0;!w.terrain&&this.pitch<=60&&this._edgeInsets.top<.1&&(xr=ee);let Ir=w.terrain?2/Math.min(this.tileSize,w.tileSize)*this.tileSize:3,ve=We=>({aabb:new bc([We*ot,0,0],[(We+1)*ot,ot,0]),zoom:0,x:0,y:0,wrap:We,fullyVisible:!1}),be=[],De=[],Be=ee,et=w.reparseOverscaled?le:ee;if(this._renderWorldCopies)for(let We=1;We<=3;We++)be.push(ve(-We)),be.push(ve(We));for(be.push(ve(0));be.length>0;){let We=be.pop(),it=We.x,Ft=We.y,Ht=We.fullyVisible;if(!Ht){let Wr=We.aabb.intersects(Kt);if(Wr===0)continue;Ht=Wr===2}let tr=w.terrain?Tt:Yt,dr=We.aabb.distanceX(tr),Sr=We.aabb.distanceY(tr),Or=Math.max(Math.abs(dr),Math.abs(Sr));if(We.zoom===Be||Or>Ir+(1<<Be-We.zoom)-2&&We.zoom>=xr){let Wr=Be-We.zoom,ni=Tt[0]-.5-(it<<Wr),Pi=Tt[1]-.5-(Ft<<Wr);De.push({tileID:new a.S(We.zoom===Be?et:We.zoom,We.wrap,We.zoom,it,Ft),distanceSq:x([Yt[0]-.5-it,Yt[1]-.5-Ft]),tileDistanceToCamera:Math.sqrt(ni*ni+Pi*Pi)})}else for(let Wr=0;Wr<4;Wr++){let ni=(it<<1)+Wr%2,Pi=(Ft<<1)+(Wr>>1),cn=We.zoom+1,ln=We.aabb.quadrant(Wr);if(w.terrain){let Cn=new a.S(cn,We.wrap,cn,ni,Pi),Kn=w.terrain.getMinMaxElevation(Cn),Ta=(B=Kn.minElevation)!==null&&B!==void 0?B:this.elevation,fa=(Q=Kn.maxElevation)!==null&&Q!==void 0?Q:this.elevation;ln=new bc([ln.min[0],ln.min[1],Ta],[ln.max[0],ln.max[1],fa])}be.push({aabb:ln,zoom:cn,x:ni,y:Pi,wrap:We.wrap,fullyVisible:Ht})}}return De.sort((We,it)=>We.distanceSq-it.distanceSq).map(We=>We.tileID)}resize(w,B){this.width=w,this.height=B,this.pixelsToGLUnits=[2/w,-2/B],this._constrain(),this._calcMatrices()}get unmodified(){return this._unmodified}zoomScale(w){return Math.pow(2,w)}scaleZoom(w){return Math.log(w)/Math.LN2}project(w){let B=a.ac(w.lat,-85.051129,_u);return new a.P(a.O(w.lng)*this.worldSize,a.Q(B)*this.worldSize)}unproject(w){return new a.Z(w.x/this.worldSize,w.y/this.worldSize).toLngLat()}get point(){return this.project(this.center)}getCameraPosition(){return{lngLat:this.pointLocation(this.getCameraPoint()),altitude:Math.cos(this._pitch)*this.cameraToCenterDistance/this._pixelPerMeter+this.elevation}}recalculateZoom(w){let B=this.elevation,Q=Math.cos(this._pitch)*this.cameraToCenterDistance/this._pixelPerMeter,ee=this.pointLocation(this.centerPoint,w),le=w.getElevationForLngLatZoom(ee,this.tileZoom);if(!(this.elevation-le))return;let qe=Q+B-le,Xe=Math.cos(this._pitch)*this.cameraToCenterDistance/qe/a.b5(1,ee.lat),ot=this.scaleZoom(Xe/this.tileSize);this._elevation=le,this._center=ee,this.zoom=ot}setLocationAtPoint(w,B){let Q=this.pointCoordinate(B),ee=this.pointCoordinate(this.centerPoint),le=this.locationCoordinate(w),qe=new a.Z(le.x-(Q.x-ee.x),le.y-(Q.y-ee.y));this.center=this.coordinateLocation(qe),this._renderWorldCopies&&(this.center=this.center.wrap())}locationPoint(w,B){return B?this.coordinatePoint(this.locationCoordinate(w),B.getElevationForLngLatZoom(w,this.tileZoom),this.pixelMatrix3D):this.coordinatePoint(this.locationCoordinate(w))}pointLocation(w,B){return this.coordinateLocation(this.pointCoordinate(w,B))}locationCoordinate(w){return a.Z.fromLngLat(w)}coordinateLocation(w){return w&&w.toLngLat()}pointCoordinate(w,B){if(B){let xr=B.pointCoordinate(w);if(xr!=null)return xr}let Q=[w.x,w.y,0,1],ee=[w.x,w.y,1,1];a.af(Q,Q,this.pixelMatrixInverse),a.af(ee,ee,this.pixelMatrixInverse);let le=Q[3],qe=ee[3],Xe=Q[1]/le,ot=ee[1]/qe,Tt=Q[2]/le,Yt=ee[2]/qe,Kt=Tt===Yt?0:(0-Tt)/(Yt-Tt);return new a.Z(a.y.number(Q[0]/le,ee[0]/qe,Kt)/this.worldSize,a.y.number(Xe,ot,Kt)/this.worldSize)}coordinatePoint(w,B=0,Q=this.pixelMatrix){let ee=[w.x*this.worldSize,w.y*this.worldSize,B,1];return a.af(ee,ee,Q),new a.P(ee[0]/ee[3],ee[1]/ee[3])}getBounds(){let w=Math.max(0,this.height/2-this.getHorizon());return new ce().extend(this.pointLocation(new a.P(0,w))).extend(this.pointLocation(new a.P(this.width,w))).extend(this.pointLocation(new a.P(this.width,this.height))).extend(this.pointLocation(new a.P(0,this.height)))}getMaxBounds(){return this.latRange&&this.latRange.length===2&&this.lngRange&&this.lngRange.length===2?new ce([this.lngRange[0],this.latRange[0]],[this.lngRange[1],this.latRange[1]]):null}getHorizon(){return Math.tan(Math.PI/2-this._pitch)*this.cameraToCenterDistance*.85}setMaxBounds(w){w?(this.lngRange=[w.getWest(),w.getEast()],this.latRange=[w.getSouth(),w.getNorth()],this._constrain()):(this.lngRange=null,this.latRange=[-85.051129,_u])}calculateTileMatrix(w){let B=w.canonical,Q=this.worldSize/this.zoomScale(B.z),ee=B.x+Math.pow(2,B.z)*w.wrap,le=a.an(new Float64Array(16));return a.J(le,le,[ee*Q,B.y*Q,0]),a.K(le,le,[Q/a.X,Q/a.X,1]),le}calculatePosMatrix(w,B=!1){let Q=w.key,ee=B?this._alignedPosMatrixCache:this._posMatrixCache;if(ee[Q])return ee[Q];let le=this.calculateTileMatrix(w);return a.L(le,B?this.alignedModelViewProjectionMatrix:this.modelViewProjectionMatrix,le),ee[Q]=new Float32Array(le),ee[Q]}calculateFogMatrix(w){let B=w.key,Q=this._fogMatrixCache;if(Q[B])return Q[B];let ee=this.calculateTileMatrix(w);return a.L(ee,this.fogMatrix,ee),Q[B]=new Float32Array(ee),Q[B]}customLayerMatrix(){return this.mercatorMatrix.slice()}getConstrained(w,B){B=a.ac(+B,this.minZoom,this.maxZoom);let Q={center:new a.N(w.lng,w.lat),zoom:B},ee=this.lngRange;if(!this._renderWorldCopies&&ee===null){let We=179.9999999999;ee=[-We,We]}let le=this.tileSize*this.zoomScale(Q.zoom),qe=0,Xe=le,ot=0,Tt=le,Yt=0,Kt=0,{x:xr,y:Ir}=this.size;if(this.latRange){let We=this.latRange;qe=a.Q(We[1])*le,Xe=a.Q(We[0])*le,Xe-qe<Ir&&(Yt=Ir/(Xe-qe))}ee&&(ot=a.b3(a.O(ee[0])*le,0,le),Tt=a.b3(a.O(ee[1])*le,0,le),Tt<ot&&(Tt+=le),Tt-ot<xr&&(Kt=xr/(Tt-ot)));let{x:ve,y:be}=this.project.call({worldSize:le},w),De,Be,et=Math.max(Kt||0,Yt||0);if(et){let We=new a.P(Kt?(Tt+ot)/2:ve,Yt?(Xe+qe)/2:be);return Q.center=this.unproject.call({worldSize:le},We).wrap(),Q.zoom+=this.scaleZoom(et),Q}if(this.latRange){let We=Ir/2;be-We<qe&&(Be=qe+We),be+We>Xe&&(Be=Xe-We)}if(ee){let We=(ot+Tt)/2,it=ve;this._renderWorldCopies&&(it=a.b3(ve,We-le/2,We+le/2));let Ft=xr/2;it-Ft<ot&&(De=ot+Ft),it+Ft>Tt&&(De=Tt-Ft)}if(De!==void 0||Be!==void 0){let We=new a.P(De!=null?De:ve,Be!=null?Be:be);Q.center=this.unproject.call({worldSize:le},We).wrap()}return Q}_constrain(){if(!this.center||!this.width||!this.height||this._constraining)return;this._constraining=!0;let w=this._unmodified,{center:B,zoom:Q}=this.getConstrained(this.center,this.zoom);this.center=B,this.zoom=Q,this._unmodified=w,this._constraining=!1}_calcMatrices(){if(!this.height)return;let w=this.centerOffset,B=this.point.x,Q=this.point.y;this.cameraToCenterDistance=.5/Math.tan(this._fov/2)*this.height,this._pixelPerMeter=a.b5(1,this.center.lat)*this.worldSize;let ee=a.an(new Float64Array(16));a.K(ee,ee,[this.width/2,-this.height/2,1]),a.J(ee,ee,[1,-1,0]),this.labelPlaneMatrix=ee,ee=a.an(new Float64Array(16)),a.K(ee,ee,[1,-1,1]),a.J(ee,ee,[-1,-1,0]),a.K(ee,ee,[2/this.width,2/this.height,1]),this.glCoordMatrix=ee;let le=this.cameraToCenterDistance+this._elevation*this._pixelPerMeter/Math.cos(this._pitch),qe=Math.min(this.elevation,this.minElevationForCurrentTile),Xe=le-qe*this._pixelPerMeter/Math.cos(this._pitch),ot=qe<0?Xe:le,Tt=Math.PI/2+this._pitch,Yt=this._fov*(.5+w.y/this.height),Kt=Math.sin(Yt)*ot/Math.sin(a.ac(Math.PI-Tt-Yt,.01,Math.PI-.01)),xr=this.getHorizon(),Ir=2*Math.atan(xr/this.cameraToCenterDistance)*(.5+w.y/(2*xr)),ve=Math.sin(Ir)*ot/Math.sin(a.ac(Math.PI-Tt-Ir,.01,Math.PI-.01)),be=Math.min(Kt,ve);this.farZ=1.01*(Math.cos(Math.PI/2-this._pitch)*be+ot),this.nearZ=this.height/50,ee=new Float64Array(16),a.b6(ee,this._fov,this.width/this.height,this.nearZ,this.farZ),ee[8]=2*-w.x/this.width,ee[9]=2*w.y/this.height,this.projectionMatrix=a.ae(ee),a.K(ee,ee,[1,-1,1]),a.J(ee,ee,[0,0,-this.cameraToCenterDistance]),a.b7(ee,ee,this._pitch),a.ad(ee,ee,this.angle),a.J(ee,ee,[-B,-Q,0]),this.mercatorMatrix=a.K([],ee,[this.worldSize,this.worldSize,this.worldSize]),a.K(ee,ee,[1,1,this._pixelPerMeter]),this.pixelMatrix=a.L(new Float64Array(16),this.labelPlaneMatrix,ee),a.J(ee,ee,[0,0,-this.elevation]),this.modelViewProjectionMatrix=ee,this.invModelViewProjectionMatrix=a.as([],ee),this.fogMatrix=new Float64Array(16),a.b6(this.fogMatrix,this._fov,this.width/this.height,le,this.farZ),this.fogMatrix[8]=2*-w.x/this.width,this.fogMatrix[9]=2*w.y/this.height,a.K(this.fogMatrix,this.fogMatrix,[1,-1,1]),a.J(this.fogMatrix,this.fogMatrix,[0,0,-this.cameraToCenterDistance]),a.b7(this.fogMatrix,this.fogMatrix,this._pitch),a.ad(this.fogMatrix,this.fogMatrix,this.angle),a.J(this.fogMatrix,this.fogMatrix,[-B,-Q,0]),a.K(this.fogMatrix,this.fogMatrix,[1,1,this._pixelPerMeter]),a.J(this.fogMatrix,this.fogMatrix,[0,0,-this.elevation]),this.pixelMatrix3D=a.L(new Float64Array(16),this.labelPlaneMatrix,ee);let De=this.width%2/2,Be=this.height%2/2,et=Math.cos(this.angle),We=Math.sin(this.angle),it=B-Math.round(B)+et*De+We*Be,Ft=Q-Math.round(Q)+et*Be+We*De,Ht=new Float64Array(ee);if(a.J(Ht,Ht,[it>.5?it-1:it,Ft>.5?Ft-1:Ft,0]),this.alignedModelViewProjectionMatrix=Ht,ee=a.as(new Float64Array(16),this.pixelMatrix),!ee)throw new Error("failed to invert matrix");this.pixelMatrixInverse=ee,this._posMatrixCache={},this._alignedPosMatrixCache={},this._fogMatrixCache={}}maxPitchScaleFactor(){if(!this.pixelMatrixInverse)return 1;let w=this.pointCoordinate(new a.P(0,0)),B=[w.x*this.worldSize,w.y*this.worldSize,0,1];return a.af(B,B,this.pixelMatrix)[3]/this.cameraToCenterDistance}getCameraPoint(){let w=Math.tan(this._pitch)*(this.cameraToCenterDistance||1);return this.centerPoint.add(new a.P(0,w))}getCameraQueryGeometry(w){let B=this.getCameraPoint();if(w.length===1)return[w[0],B];{let Q=B.x,ee=B.y,le=B.x,qe=B.y;for(let Xe of w)Q=Math.min(Q,Xe.x),ee=Math.min(ee,Xe.y),le=Math.max(le,Xe.x),qe=Math.max(qe,Xe.y);return[new a.P(Q,ee),new a.P(le,ee),new a.P(le,qe),new a.P(Q,qe),new a.P(Q,ee)]}}lngLatToCameraDepth(w,B){let Q=this.locationCoordinate(w),ee=[Q.x*this.worldSize,Q.y*this.worldSize,B,1];return a.af(ee,ee,this.modelViewProjectionMatrix),ee[2]/ee[3]}}function nh(ue,w){let B,Q=!1,ee=null,le=null,qe=()=>{ee=null,Q&&(ue.apply(le,B),ee=setTimeout(qe,w),Q=!1)};return(...Xe)=>(Q=!0,le=this,B=Xe,ee||qe(),ee)}class Ah{constructor(w){this._getCurrentHash=()=>{let B=window.location.hash.replace("#","");if(this._hashName){let Q;return B.split("&").map(ee=>ee.split("=")).forEach(ee=>{ee[0]===this._hashName&&(Q=ee)}),(Q&&Q[1]||"").split("/")}return B.split("/")},this._onHashChange=()=>{let B=this._getCurrentHash();if(B.length>=3&&!B.some(Q=>isNaN(Q))){let Q=this._map.dragRotate.isEnabled()&&this._map.touchZoomRotate.isEnabled()?+(B[3]||0):this._map.getBearing();return this._map.jumpTo({center:[+B[2],+B[1]],zoom:+B[0],bearing:Q,pitch:+(B[4]||0)}),!0}return!1},this._updateHashUnthrottled=()=>{let B=window.location.href.replace(/(#.*)?$/,this.getHashString());window.history.replaceState(window.history.state,null,B)},this._removeHash=()=>{let B=this._getCurrentHash();if(B.length===0)return;let Q=B.join("/"),ee=Q;ee.split("&").length>0&&(ee=ee.split("&")[0]),this._hashName&&(ee=`${this._hashName}=${Q}`);let le=window.location.hash.replace(ee,"");le.startsWith("#&")?le=le.slice(0,1)+le.slice(2):le==="#"&&(le="");let qe=window.location.href.replace(/(#.+)?$/,le);qe=qe.replace("&&","&"),window.history.replaceState(window.history.state,null,qe)},this._updateHash=nh(this._updateHashUnthrottled,300),this._hashName=w&&encodeURIComponent(w)}addTo(w){return this._map=w,addEventListener("hashchange",this._onHashChange,!1),this._map.on("moveend",this._updateHash),this}remove(){return removeEventListener("hashchange",this._onHashChange,!1),this._map.off("moveend",this._updateHash),clearTimeout(this._updateHash()),this._removeHash(),delete this._map,this}getHashString(w){let B=this._map.getCenter(),Q=Math.round(100*this._map.getZoom())/100,ee=Math.ceil((Q*Math.LN2+Math.log(512/360/.5))/Math.LN10),le=Math.pow(10,ee),qe=Math.round(B.lng*le)/le,Xe=Math.round(B.lat*le)/le,ot=this._map.getBearing(),Tt=this._map.getPitch(),Yt="";if(Yt+=w?`/${qe}/${Xe}/${Q}`:`${Q}/${Xe}/${qe}`,(ot||Tt)&&(Yt+="/"+Math.round(10*ot)/10),Tt&&(Yt+=`/${Math.round(Tt)}`),this._hashName){let Kt=this._hashName,xr=!1,Ir=window.location.hash.slice(1).split("&").map(ve=>{let be=ve.split("=")[0];return be===Kt?(xr=!0,`${be}=${Yt}`):ve}).filter(ve=>ve);return xr||Ir.push(`${Kt}=${Yt}`),`#${Ir.join("&")}`}return`#${Yt}`}}let zu={linearity:.3,easing:a.b8(0,0,.3,1)},Fc=a.e({deceleration:2500,maxSpeed:1400},zu),wc=a.e({deceleration:20,maxSpeed:1400},zu),bd=a.e({deceleration:1e3,maxSpeed:360},zu),xf=a.e({deceleration:1e3,maxSpeed:90},zu);class Pf{constructor(w){this._map=w,this.clear()}clear(){this._inertiaBuffer=[]}record(w){this._drainInertiaBuffer(),this._inertiaBuffer.push({time:u.now(),settings:w})}_drainInertiaBuffer(){let w=this._inertiaBuffer,B=u.now();for(;w.length>0&&B-w[0].time>160;)w.shift()}_onMoveEnd(w){if(this._drainInertiaBuffer(),this._inertiaBuffer.length<2)return;let B={zoom:0,bearing:0,pitch:0,pan:new a.P(0,0),pinchAround:void 0,around:void 0};for(let{settings:le}of this._inertiaBuffer)B.zoom+=le.zoomDelta||0,B.bearing+=le.bearingDelta||0,B.pitch+=le.pitchDelta||0,le.panDelta&&B.pan._add(le.panDelta),le.around&&(B.around=le.around),le.pinchAround&&(B.pinchAround=le.pinchAround);let Q=this._inertiaBuffer[this._inertiaBuffer.length-1].time-this._inertiaBuffer[0].time,ee={};if(B.pan.mag()){let le=bf(B.pan.mag(),Q,a.e({},Fc,w||{}));ee.offset=B.pan.mult(le.amount/B.pan.mag()),ee.center=this._map.transform.center,Ou(ee,le)}if(B.zoom){let le=bf(B.zoom,Q,wc);ee.zoom=this._map.transform.zoom+le.amount,Ou(ee,le)}if(B.bearing){let le=bf(B.bearing,Q,bd);ee.bearing=this._map.transform.bearing+a.ac(le.amount,-179,179),Ou(ee,le)}if(B.pitch){let le=bf(B.pitch,Q,xf);ee.pitch=this._map.transform.pitch+le.amount,Ou(ee,le)}if(ee.zoom||ee.bearing){let le=B.pinchAround===void 0?B.around:B.pinchAround;ee.around=le?this._map.unproject(le):this._map.getCenter()}return this.clear(),a.e(ee,{noMoveStart:!0})}}function Ou(ue,w){(!ue.duration||ue.duration<w.duration)&&(ue.duration=w.duration,ue.easing=w.easing)}function bf(ue,w,B){let{maxSpeed:Q,linearity:ee,deceleration:le}=B,qe=a.ac(ue*ee/(w/1e3),-Q,Q),Xe=Math.abs(qe)/(le*ee);return{easing:B.easing,duration:1e3*Xe,amount:qe*(Xe/2)}}class jl extends a.k{preventDefault(){this._defaultPrevented=!0}get defaultPrevented(){return this._defaultPrevented}constructor(w,B,Q,ee={}){let le=c.mousePos(B.getCanvas(),Q),qe=B.unproject(le);super(w,a.e({point:le,lngLat:qe,originalEvent:Q},ee)),this._defaultPrevented=!1,this.target=B}}class lf extends a.k{preventDefault(){this._defaultPrevented=!0}get defaultPrevented(){return this._defaultPrevented}constructor(w,B,Q){let ee=w==="touchend"?Q.changedTouches:Q.touches,le=c.touchPos(B.getCanvasContainer(),ee),qe=le.map(ot=>B.unproject(ot)),Xe=le.reduce((ot,Tt,Yt,Kt)=>ot.add(Tt.div(Kt.length)),new a.P(0,0));super(w,{points:le,point:Xe,lngLats:qe,lngLat:B.unproject(Xe),originalEvent:Q}),this._defaultPrevented=!1}}class Hh extends a.k{preventDefault(){this._defaultPrevented=!0}get defaultPrevented(){return this._defaultPrevented}constructor(w,B,Q){super(w,{originalEvent:Q}),this._defaultPrevented=!1}}class If{constructor(w,B){this._map=w,this._clickTolerance=B.clickTolerance}reset(){delete this._mousedownPos}wheel(w){return this._firePreventable(new Hh(w.type,this._map,w))}mousedown(w,B){return this._mousedownPos=B,this._firePreventable(new jl(w.type,this._map,w))}mouseup(w){this._map.fire(new jl(w.type,this._map,w))}click(w,B){this._mousedownPos&&this._mousedownPos.dist(B)>=this._clickTolerance||this._map.fire(new jl(w.type,this._map,w))}dblclick(w){return this._firePreventable(new jl(w.type,this._map,w))}mouseover(w){this._map.fire(new jl(w.type,this._map,w))}mouseout(w){this._map.fire(new jl(w.type,this._map,w))}touchstart(w){return this._firePreventable(new lf(w.type,this._map,w))}touchmove(w){this._map.fire(new lf(w.type,this._map,w))}touchend(w){this._map.fire(new lf(w.type,this._map,w))}touchcancel(w){this._map.fire(new lf(w.type,this._map,w))}_firePreventable(w){if(this._map.fire(w),w.defaultPrevented)return{}}isEnabled(){return!0}isActive(){return!1}enable(){}disable(){}}class Cs{constructor(w){this._map=w}reset(){this._delayContextMenu=!1,this._ignoreContextMenu=!0,delete this._contextMenuEvent}mousemove(w){this._map.fire(new jl(w.type,this._map,w))}mousedown(){this._delayContextMenu=!0,this._ignoreContextMenu=!1}mouseup(){this._delayContextMenu=!1,this._contextMenuEvent&&(this._map.fire(new jl("contextmenu",this._map,this._contextMenuEvent)),delete this._contextMenuEvent)}contextmenu(w){this._delayContextMenu?this._contextMenuEvent=w:this._ignoreContextMenu||this._map.fire(new jl(w.type,this._map,w)),this._map.listens("contextmenu")&&w.preventDefault()}isEnabled(){return!0}isActive(){return!1}enable(){}disable(){}}class du{constructor(w){this._map=w}get transform(){return this._map._requestedCameraState||this._map.transform}get center(){return{lng:this.transform.center.lng,lat:this.transform.center.lat}}get zoom(){return this.transform.zoom}get pitch(){return this.transform.pitch}get bearing(){return this.transform.bearing}unproject(w){return this.transform.pointLocation(a.P.convert(w),this._map.terrain)}}class ku{constructor(w,B){this._map=w,this._tr=new du(w),this._el=w.getCanvasContainer(),this._container=w.getContainer(),this._clickTolerance=B.clickTolerance||1}isEnabled(){return!!this._enabled}isActive(){return!!this._active}enable(){this.isEnabled()||(this._enabled=!0)}disable(){this.isEnabled()&&(this._enabled=!1)}mousedown(w,B){this.isEnabled()&&w.shiftKey&&w.button===0&&(c.disableDrag(),this._startPos=this._lastPos=B,this._active=!0)}mousemoveWindow(w,B){if(!this._active)return;let Q=B;if(this._lastPos.equals(Q)||!this._box&&Q.dist(this._startPos)<this._clickTolerance)return;let ee=this._startPos;this._lastPos=Q,this._box||(this._box=c.create("div","maplibregl-boxzoom",this._container),this._container.classList.add("maplibregl-crosshair"),this._fireEvent("boxzoomstart",w));let le=Math.min(ee.x,Q.x),qe=Math.max(ee.x,Q.x),Xe=Math.min(ee.y,Q.y),ot=Math.max(ee.y,Q.y);c.setTransform(this._box,`translate(${le}px,${Xe}px)`),this._box.style.width=qe-le+"px",this._box.style.height=ot-Xe+"px"}mouseupWindow(w,B){if(!this._active||w.button!==0)return;let Q=this._startPos,ee=B;if(this.reset(),c.suppressClick(),Q.x!==ee.x||Q.y!==ee.y)return this._map.fire(new a.k("boxzoomend",{originalEvent:w})),{cameraAnimation:le=>le.fitScreenCoordinates(Q,ee,this._tr.bearing,{linear:!0})};this._fireEvent("boxzoomcancel",w)}keydown(w){this._active&&w.keyCode===27&&(this.reset(),this._fireEvent("boxzoomcancel",w))}reset(){this._active=!1,this._container.classList.remove("maplibregl-crosshair"),this._box&&(c.remove(this._box),this._box=null),c.enableDrag(),delete this._startPos,delete this._lastPos}_fireEvent(w,B){return this._map.fire(new a.k(w,{originalEvent:B}))}}function Wf(ue,w){if(ue.length!==w.length)throw new Error(`The number of touches and points are not equal - touches ${ue.length}, points ${w.length}`);let B={};for(let Q=0;Q<ue.length;Q++)B[ue[Q].identifier]=w[Q];return B}class Us{constructor(w){this.reset(),this.numTouches=w.numTouches}reset(){delete this.centroid,delete this.startTime,delete this.touches,this.aborted=!1}touchstart(w,B,Q){(this.centroid||Q.length>this.numTouches)&&(this.aborted=!0),this.aborted||(this.startTime===void 0&&(this.startTime=w.timeStamp),Q.length===this.numTouches&&(this.centroid=function(ee){let le=new a.P(0,0);for(let qe of ee)le._add(qe);return le.div(ee.length)}(B),this.touches=Wf(Q,B)))}touchmove(w,B,Q){if(this.aborted||!this.centroid)return;let ee=Wf(Q,B);for(let le in this.touches){let qe=ee[le];(!qe||qe.dist(this.touches[le])>30)&&(this.aborted=!0)}}touchend(w,B,Q){if((!this.centroid||w.timeStamp-this.startTime>500)&&(this.aborted=!0),Q.length===0){let ee=!this.aborted&&this.centroid;if(this.reset(),ee)return ee}}}class wf{constructor(w){this.singleTap=new Us(w),this.numTaps=w.numTaps,this.reset()}reset(){this.lastTime=1/0,delete this.lastTap,this.count=0,this.singleTap.reset()}touchstart(w,B,Q){this.singleTap.touchstart(w,B,Q)}touchmove(w,B,Q){this.singleTap.touchmove(w,B,Q)}touchend(w,B,Q){let ee=this.singleTap.touchend(w,B,Q);if(ee){let le=w.timeStamp-this.lastTime<500,qe=!this.lastTap||this.lastTap.dist(ee)<30;if(le&&qe||this.reset(),this.count++,this.lastTime=w.timeStamp,this.lastTap=ee,this.count===this.numTaps)return this.reset(),ee}}}class zc{constructor(w){this._tr=new du(w),this._zoomIn=new wf({numTouches:1,numTaps:2}),this._zoomOut=new wf({numTouches:2,numTaps:1}),this.reset()}reset(){this._active=!1,this._zoomIn.reset(),this._zoomOut.reset()}touchstart(w,B,Q){this._zoomIn.touchstart(w,B,Q),this._zoomOut.touchstart(w,B,Q)}touchmove(w,B,Q){this._zoomIn.touchmove(w,B,Q),this._zoomOut.touchmove(w,B,Q)}touchend(w,B,Q){let ee=this._zoomIn.touchend(w,B,Q),le=this._zoomOut.touchend(w,B,Q),qe=this._tr;return ee?(this._active=!0,w.preventDefault(),setTimeout(()=>this.reset(),0),{cameraAnimation:Xe=>Xe.easeTo({duration:300,zoom:qe.zoom+1,around:qe.unproject(ee)},{originalEvent:w})}):le?(this._active=!0,w.preventDefault(),setTimeout(()=>this.reset(),0),{cameraAnimation:Xe=>Xe.easeTo({duration:300,zoom:qe.zoom-1,around:qe.unproject(le)},{originalEvent:w})}):void 0}touchcancel(){this.reset()}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}}class Wu{constructor(w){this._enabled=!!w.enable,this._moveStateManager=w.moveStateManager,this._clickTolerance=w.clickTolerance||1,this._moveFunction=w.move,this._activateOnStart=!!w.activateOnStart,w.assignEvents(this),this.reset()}reset(w){this._active=!1,this._moved=!1,delete this._lastPoint,this._moveStateManager.endMove(w)}_move(...w){let B=this._moveFunction(...w);if(B.bearingDelta||B.pitchDelta||B.around||B.panDelta)return this._active=!0,B}dragStart(w,B){this.isEnabled()&&!this._lastPoint&&this._moveStateManager.isValidStartEvent(w)&&(this._moveStateManager.startMove(w),this._lastPoint=B.length?B[0]:B,this._activateOnStart&&this._lastPoint&&(this._active=!0))}dragMove(w,B){if(!this.isEnabled())return;let Q=this._lastPoint;if(!Q)return;if(w.preventDefault(),!this._moveStateManager.isValidMoveEvent(w))return void this.reset(w);let ee=B.length?B[0]:B;return!this._moved&&ee.dist(Q)<this._clickTolerance?void 0:(this._moved=!0,this._lastPoint=ee,this._move(Q,ee))}dragEnd(w){this.isEnabled()&&this._lastPoint&&this._moveStateManager.isValidEndEvent(w)&&(this._moved&&c.suppressClick(),this.reset(w))}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}getClickTolerance(){return this._clickTolerance}}let Rf={0:1,2:2};class Xu{constructor(w){this._correctEvent=w.checkCorrectEvent}startMove(w){let B=c.mouseButton(w);this._eventButton=B}endMove(w){delete this._eventButton}isValidStartEvent(w){return this._correctEvent(w)}isValidMoveEvent(w){return!function(B,Q){let ee=Rf[Q];return B.buttons===void 0||(B.buttons&ee)!==ee}(w,this._eventButton)}isValidEndEvent(w){return c.mouseButton(w)===this._eventButton}}class uf{constructor(){this._firstTouch=void 0}_isOneFingerTouch(w){return w.targetTouches.length===1}_isSameTouchEvent(w){return w.targetTouches[0].identifier===this._firstTouch}startMove(w){this._firstTouch=w.targetTouches[0].identifier}endMove(w){delete this._firstTouch}isValidStartEvent(w){return this._isOneFingerTouch(w)}isValidMoveEvent(w){return this._isOneFingerTouch(w)&&this._isSameTouchEvent(w)}isValidEndEvent(w){return this._isOneFingerTouch(w)&&this._isSameTouchEvent(w)}}let Xf=ue=>{ue.mousedown=ue.dragStart,ue.mousemoveWindow=ue.dragMove,ue.mouseup=ue.dragEnd,ue.contextmenu=w=>{w.preventDefault()}},Wl=({enable:ue,clickTolerance:w,bearingDegreesPerPixelMoved:B=.8})=>{let Q=new Xu({checkCorrectEvent:ee=>c.mouseButton(ee)===0&&ee.ctrlKey||c.mouseButton(ee)===2});return new Wu({clickTolerance:w,move:(ee,le)=>({bearingDelta:(le.x-ee.x)*B}),moveStateManager:Q,enable:ue,assignEvents:Xf})},ah=({enable:ue,clickTolerance:w,pitchDegreesPerPixelMoved:B=-.5})=>{let Q=new Xu({checkCorrectEvent:ee=>c.mouseButton(ee)===0&&ee.ctrlKey||c.mouseButton(ee)===2});return new Wu({clickTolerance:w,move:(ee,le)=>({pitchDelta:(le.y-ee.y)*B}),moveStateManager:Q,enable:ue,assignEvents:Xf})};class Zu{constructor(w,B){this._clickTolerance=w.clickTolerance||1,this._map=B,this.reset()}reset(){this._active=!1,this._touches={},this._sum=new a.P(0,0)}_shouldBePrevented(w){return w<(this._map.cooperativeGestures.isEnabled()?2:1)}touchstart(w,B,Q){return this._calculateTransform(w,B,Q)}touchmove(w,B,Q){if(this._active){if(!this._shouldBePrevented(Q.length))return w.preventDefault(),this._calculateTransform(w,B,Q);this._map.cooperativeGestures.notifyGestureBlocked("touch_pan",w)}}touchend(w,B,Q){this._calculateTransform(w,B,Q),this._active&&this._shouldBePrevented(Q.length)&&this.reset()}touchcancel(){this.reset()}_calculateTransform(w,B,Q){Q.length>0&&(this._active=!0);let ee=Wf(Q,B),le=new a.P(0,0),qe=new a.P(0,0),Xe=0;for(let Tt in ee){let Yt=ee[Tt],Kt=this._touches[Tt];Kt&&(le._add(Yt),qe._add(Yt.sub(Kt)),Xe++,ee[Tt]=Yt)}if(this._touches=ee,this._shouldBePrevented(Xe)||!qe.mag())return;let ot=qe.div(Xe);return this._sum._add(ot),this._sum.mag()<this._clickTolerance?void 0:{around:le.div(Xe),panDelta:ot}}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}}class Oc{constructor(){this.reset()}reset(){this._active=!1,delete this._firstTwoTouches}touchstart(w,B,Q){this._firstTwoTouches||Q.length<2||(this._firstTwoTouches=[Q[0].identifier,Q[1].identifier],this._start([B[0],B[1]]))}touchmove(w,B,Q){if(!this._firstTwoTouches)return;w.preventDefault();let[ee,le]=this._firstTwoTouches,qe=Tc(Q,B,ee),Xe=Tc(Q,B,le);if(!qe||!Xe)return;let ot=this._aroundCenter?null:qe.add(Xe).div(2);return this._move([qe,Xe],ot,w)}touchend(w,B,Q){if(!this._firstTwoTouches)return;let[ee,le]=this._firstTwoTouches,qe=Tc(Q,B,ee),Xe=Tc(Q,B,le);qe&&Xe||(this._active&&c.suppressClick(),this.reset())}touchcancel(){this.reset()}enable(w){this._enabled=!0,this._aroundCenter=!!w&&w.around==="center"}disable(){this._enabled=!1,this.reset()}isEnabled(){return!!this._enabled}isActive(){return!!this._active}}function Tc(ue,w,B){for(let Q=0;Q<ue.length;Q++)if(ue[Q].identifier===B)return w[Q]}function wl(ue,w){return Math.log(ue/w)/Math.LN2}class vu extends Oc{reset(){super.reset(),delete this._distance,delete this._startDistance}_start(w){this._startDistance=this._distance=w[0].dist(w[1])}_move(w,B){let Q=this._distance;if(this._distance=w[0].dist(w[1]),this._active||!(Math.abs(wl(this._distance,this._startDistance))<.1))return this._active=!0,{zoomDelta:wl(this._distance,Q),pinchAround:B}}}function qc(ue,w){return 180*ue.angleWith(w)/Math.PI}class cf extends Oc{reset(){super.reset(),delete this._minDiameter,delete this._startVector,delete this._vector}_start(w){this._startVector=this._vector=w[0].sub(w[1]),this._minDiameter=w[0].dist(w[1])}_move(w,B,Q){let ee=this._vector;if(this._vector=w[0].sub(w[1]),this._active||!this._isBelowThreshold(this._vector))return this._active=!0,{bearingDelta:qc(this._vector,ee),pinchAround:B}}_isBelowThreshold(w){this._minDiameter=Math.min(this._minDiameter,w.mag());let B=25/(Math.PI*this._minDiameter)*360,Q=qc(w,this._startVector);return Math.abs(Q)<B}}function fc(ue){return Math.abs(ue.y)>Math.abs(ue.x)}class Bc extends Oc{constructor(w){super(),this._currentTouchCount=0,this._map=w}reset(){super.reset(),this._valid=void 0,delete this._firstMove,delete this._lastPoints}touchstart(w,B,Q){super.touchstart(w,B,Q),this._currentTouchCount=Q.length}_start(w){this._lastPoints=w,fc(w[0].sub(w[1]))&&(this._valid=!1)}_move(w,B,Q){if(this._map.cooperativeGestures.isEnabled()&&this._currentTouchCount<3)return;let ee=w[0].sub(this._lastPoints[0]),le=w[1].sub(this._lastPoints[1]);return this._valid=this.gestureBeginsVertically(ee,le,Q.timeStamp),this._valid?(this._lastPoints=w,this._active=!0,{pitchDelta:(ee.y+le.y)/2*-.5}):void 0}gestureBeginsVertically(w,B,Q){if(this._valid!==void 0)return this._valid;let ee=w.mag()>=2,le=B.mag()>=2;if(!ee&&!le)return;if(!ee||!le)return this._firstMove===void 0&&(this._firstMove=Q),Q-this._firstMove<100&&void 0;let qe=w.y>0==B.y>0;return fc(w)&&fc(B)&&qe}}let At={panStep:100,bearingStep:15,pitchStep:10};class Wt{constructor(w){this._tr=new du(w);let B=At;this._panStep=B.panStep,this._bearingStep=B.bearingStep,this._pitchStep=B.pitchStep,this._rotationDisabled=!1}reset(){this._active=!1}keydown(w){if(w.altKey||w.ctrlKey||w.metaKey)return;let B=0,Q=0,ee=0,le=0,qe=0;switch(w.keyCode){case 61:case 107:case 171:case 187:B=1;break;case 189:case 109:case 173:B=-1;break;case 37:w.shiftKey?Q=-1:(w.preventDefault(),le=-1);break;case 39:w.shiftKey?Q=1:(w.preventDefault(),le=1);break;case 38:w.shiftKey?ee=1:(w.preventDefault(),qe=-1);break;case 40:w.shiftKey?ee=-1:(w.preventDefault(),qe=1);break;default:return}return this._rotationDisabled&&(Q=0,ee=0),{cameraAnimation:Xe=>{let ot=this._tr;Xe.easeTo({duration:300,easeId:"keyboardHandler",easing:Cr,zoom:B?Math.round(ot.zoom)+B*(w.shiftKey?2:1):ot.zoom,bearing:ot.bearing+Q*this._bearingStep,pitch:ot.pitch+ee*this._pitchStep,offset:[-le*this._panStep,-qe*this._panStep],center:ot.center},{originalEvent:w})}}}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}disableRotation(){this._rotationDisabled=!0}enableRotation(){this._rotationDisabled=!1}}function Cr(ue){return ue*(2-ue)}let Ar=4.000244140625;class Kr{constructor(w,B){this._onTimeout=Q=>{this._type="wheel",this._delta-=this._lastValue,this._active||this._start(Q)},this._map=w,this._tr=new du(w),this._triggerRenderFrame=B,this._delta=0,this._defaultZoomRate=.01,this._wheelZoomRate=.0022222222222222222}setZoomRate(w){this._defaultZoomRate=w}setWheelZoomRate(w){this._wheelZoomRate=w}isEnabled(){return!!this._enabled}isActive(){return!!this._active||this._finishTimeout!==void 0}isZooming(){return!!this._zooming}enable(w){this.isEnabled()||(this._enabled=!0,this._aroundCenter=!!w&&w.around==="center")}disable(){this.isEnabled()&&(this._enabled=!1)}_shouldBePrevented(w){return!!this._map.cooperativeGestures.isEnabled()&&!(w.ctrlKey||this._map.cooperativeGestures.isBypassed(w))}wheel(w){if(!this.isEnabled())return;if(this._shouldBePrevented(w))return void this._map.cooperativeGestures.notifyGestureBlocked("wheel_zoom",w);let B=w.deltaMode===WheelEvent.DOM_DELTA_LINE?40*w.deltaY:w.deltaY,Q=u.now(),ee=Q-(this._lastWheelEventTime||0);this._lastWheelEventTime=Q,B!==0&&B%Ar==0?this._type="wheel":B!==0&&Math.abs(B)<4?this._type="trackpad":ee>400?(this._type=null,this._lastValue=B,this._timeout=setTimeout(this._onTimeout,40,w)):this._type||(this._type=Math.abs(ee*B)<200?"trackpad":"wheel",this._timeout&&(clearTimeout(this._timeout),this._timeout=null,B+=this._lastValue)),w.shiftKey&&B&&(B/=4),this._type&&(this._lastWheelEvent=w,this._delta-=B,this._active||this._start(w)),w.preventDefault()}_start(w){if(!this._delta)return;this._frameId&&(this._frameId=null),this._active=!0,this.isZooming()||(this._zooming=!0),this._finishTimeout&&(clearTimeout(this._finishTimeout),delete this._finishTimeout);let B=c.mousePos(this._map.getCanvas(),w),Q=this._tr;this._around=B.y>Q.transform.height/2-Q.transform.getHorizon()?a.N.convert(this._aroundCenter?Q.center:Q.unproject(B)):a.N.convert(Q.center),this._aroundPoint=Q.transform.locationPoint(this._around),this._frameId||(this._frameId=!0,this._triggerRenderFrame())}renderFrame(){if(!this._frameId||(this._frameId=null,!this.isActive()))return;let w=this._tr.transform;if(this._delta!==0){let ot=this._type==="wheel"&&Math.abs(this._delta)>Ar?this._wheelZoomRate:this._defaultZoomRate,Tt=2/(1+Math.exp(-Math.abs(this._delta*ot)));this._delta<0&&Tt!==0&&(Tt=1/Tt);let Yt=typeof this._targetZoom=="number"?w.zoomScale(this._targetZoom):w.scale;this._targetZoom=Math.min(w.maxZoom,Math.max(w.minZoom,w.scaleZoom(Yt*Tt))),this._type==="wheel"&&(this._startZoom=w.zoom,this._easing=this._smoothOutEasing(200)),this._delta=0}let B=typeof this._targetZoom=="number"?this._targetZoom:w.zoom,Q=this._startZoom,ee=this._easing,le,qe=!1,Xe=u.now()-this._lastWheelEventTime;if(this._type==="wheel"&&Q&&ee&&Xe){let ot=Math.min(Xe/200,1),Tt=ee(ot);le=a.y.number(Q,B,Tt),ot<1?this._frameId||(this._frameId=!0):qe=!0}else le=B,qe=!0;return this._active=!0,qe&&(this._active=!1,this._finishTimeout=setTimeout(()=>{this._zooming=!1,this._triggerRenderFrame(),delete this._targetZoom,delete this._finishTimeout},200)),{noInertia:!0,needsRenderFrame:!qe,zoomDelta:le-w.zoom,around:this._aroundPoint,originalEvent:this._lastWheelEvent}}_smoothOutEasing(w){let B=a.b9;if(this._prevEase){let Q=this._prevEase,ee=(u.now()-Q.start)/Q.duration,le=Q.easing(ee+.01)-Q.easing(ee),qe=.27/Math.sqrt(le*le+1e-4)*.01,Xe=Math.sqrt(.0729-qe*qe);B=a.b8(qe,Xe,.25,1)}return this._prevEase={start:u.now(),duration:w,easing:B},B}reset(){this._active=!1,this._zooming=!1,delete this._targetZoom,this._finishTimeout&&(clearTimeout(this._finishTimeout),delete this._finishTimeout)}}class ki{constructor(w,B){this._clickZoom=w,this._tapZoom=B}enable(){this._clickZoom.enable(),this._tapZoom.enable()}disable(){this._clickZoom.disable(),this._tapZoom.disable()}isEnabled(){return this._clickZoom.isEnabled()&&this._tapZoom.isEnabled()}isActive(){return this._clickZoom.isActive()||this._tapZoom.isActive()}}class Xi{constructor(w){this._tr=new du(w),this.reset()}reset(){this._active=!1}dblclick(w,B){return w.preventDefault(),{cameraAnimation:Q=>{Q.easeTo({duration:300,zoom:this._tr.zoom+(w.shiftKey?-1:1),around:this._tr.unproject(B)},{originalEvent:w})}}}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}}class dn{constructor(){this._tap=new wf({numTouches:1,numTaps:1}),this.reset()}reset(){this._active=!1,delete this._swipePoint,delete this._swipeTouch,delete this._tapTime,delete this._tapPoint,this._tap.reset()}touchstart(w,B,Q){if(!this._swipePoint)if(this._tapTime){let ee=B[0],le=w.timeStamp-this._tapTime<500,qe=this._tapPoint.dist(ee)<30;le&&qe?Q.length>0&&(this._swipePoint=ee,this._swipeTouch=Q[0].identifier):this.reset()}else this._tap.touchstart(w,B,Q)}touchmove(w,B,Q){if(this._tapTime){if(this._swipePoint){if(Q[0].identifier!==this._swipeTouch)return;let ee=B[0],le=ee.y-this._swipePoint.y;return this._swipePoint=ee,w.preventDefault(),this._active=!0,{zoomDelta:le/128}}}else this._tap.touchmove(w,B,Q)}touchend(w,B,Q){if(this._tapTime)this._swipePoint&&Q.length===0&&this.reset();else{let ee=this._tap.touchend(w,B,Q);ee&&(this._tapTime=w.timeStamp,this._tapPoint=ee)}}touchcancel(){this.reset()}enable(){this._enabled=!0}disable(){this._enabled=!1,this.reset()}isEnabled(){return this._enabled}isActive(){return this._active}}class wn{constructor(w,B,Q){this._el=w,this._mousePan=B,this._touchPan=Q}enable(w){this._inertiaOptions=w||{},this._mousePan.enable(),this._touchPan.enable(),this._el.classList.add("maplibregl-touch-drag-pan")}disable(){this._mousePan.disable(),this._touchPan.disable(),this._el.classList.remove("maplibregl-touch-drag-pan")}isEnabled(){return this._mousePan.isEnabled()&&this._touchPan.isEnabled()}isActive(){return this._mousePan.isActive()||this._touchPan.isActive()}}class Nn{constructor(w,B,Q){this._pitchWithRotate=w.pitchWithRotate,this._mouseRotate=B,this._mousePitch=Q}enable(){this._mouseRotate.enable(),this._pitchWithRotate&&this._mousePitch.enable()}disable(){this._mouseRotate.disable(),this._mousePitch.disable()}isEnabled(){return this._mouseRotate.isEnabled()&&(!this._pitchWithRotate||this._mousePitch.isEnabled())}isActive(){return this._mouseRotate.isActive()||this._mousePitch.isActive()}}class Yi{constructor(w,B,Q,ee){this._el=w,this._touchZoom=B,this._touchRotate=Q,this._tapDragZoom=ee,this._rotationDisabled=!1,this._enabled=!0}enable(w){this._touchZoom.enable(w),this._rotationDisabled||this._touchRotate.enable(w),this._tapDragZoom.enable(),this._el.classList.add("maplibregl-touch-zoom-rotate")}disable(){this._touchZoom.disable(),this._touchRotate.disable(),this._tapDragZoom.disable(),this._el.classList.remove("maplibregl-touch-zoom-rotate")}isEnabled(){return this._touchZoom.isEnabled()&&(this._rotationDisabled||this._touchRotate.isEnabled())&&this._tapDragZoom.isEnabled()}isActive(){return this._touchZoom.isActive()||this._touchRotate.isActive()||this._tapDragZoom.isActive()}disableRotation(){this._rotationDisabled=!0,this._touchRotate.disable()}enableRotation(){this._rotationDisabled=!1,this._touchZoom.isEnabled()&&this._touchRotate.enable()}}class Qi{constructor(w,B){this._bypassKey=navigator.userAgent.indexOf("Mac")!==-1?"metaKey":"ctrlKey",this._map=w,this._options=B,this._enabled=!1}isActive(){return!1}reset(){}_setupUI(){if(this._container)return;let w=this._map.getCanvasContainer();w.classList.add("maplibregl-cooperative-gestures"),this._container=c.create("div","maplibregl-cooperative-gesture-screen",w);let B=this._map._getUIString("CooperativeGesturesHandler.WindowsHelpText");this._bypassKey==="metaKey"&&(B=this._map._getUIString("CooperativeGesturesHandler.MacHelpText"));let Q=this._map._getUIString("CooperativeGesturesHandler.MobileHelpText"),ee=document.createElement("div");ee.className="maplibregl-desktop-message",ee.textContent=B,this._container.appendChild(ee);let le=document.createElement("div");le.className="maplibregl-mobile-message",le.textContent=Q,this._container.appendChild(le),this._container.setAttribute("aria-hidden","true")}_destroyUI(){this._container&&(c.remove(this._container),this._map.getCanvasContainer().classList.remove("maplibregl-cooperative-gestures")),delete this._container}enable(){this._setupUI(),this._enabled=!0}disable(){this._enabled=!1,this._destroyUI()}isEnabled(){return this._enabled}isBypassed(w){return w[this._bypassKey]}notifyGestureBlocked(w,B){this._enabled&&(this._map.fire(new a.k("cooperativegestureprevented",{gestureType:w,originalEvent:B})),this._container.classList.add("maplibregl-show"),setTimeout(()=>{this._container.classList.remove("maplibregl-show")},100))}}let on=ue=>ue.zoom||ue.drag||ue.pitch||ue.rotate;class Fi extends a.k{}function $n(ue){return ue.panDelta&&ue.panDelta.mag()||ue.zoomDelta||ue.bearingDelta||ue.pitchDelta}class Ca{constructor(w,B){this.handleWindowEvent=ee=>{this.handleEvent(ee,`${ee.type}Window`)},this.handleEvent=(ee,le)=>{if(ee.type==="blur")return void this.stop(!0);this._updatingCamera=!0;let qe=ee.type==="renderFrame"?void 0:ee,Xe={needsRenderFrame:!1},ot={},Tt={},Yt=ee.touches,Kt=Yt?this._getMapTouches(Yt):void 0,xr=Kt?c.touchPos(this._map.getCanvas(),Kt):c.mousePos(this._map.getCanvas(),ee);for(let{handlerName:be,handler:De,allowed:Be}of this._handlers){if(!De.isEnabled())continue;let et;this._blockedByActive(Tt,Be,be)?De.reset():De[le||ee.type]&&(et=De[le||ee.type](ee,xr,Kt),this.mergeHandlerResult(Xe,ot,et,be,qe),et&&et.needsRenderFrame&&this._triggerRenderFrame()),(et||De.isActive())&&(Tt[be]=De)}let Ir={};for(let be in this._previousActiveHandlers)Tt[be]||(Ir[be]=qe);this._previousActiveHandlers=Tt,(Object.keys(Ir).length||$n(Xe))&&(this._changes.push([Xe,ot,Ir]),this._triggerRenderFrame()),(Object.keys(Tt).length||$n(Xe))&&this._map._stop(!0),this._updatingCamera=!1;let{cameraAnimation:ve}=Xe;ve&&(this._inertia.clear(),this._fireEvents({},{},!0),this._changes=[],ve(this._map))},this._map=w,this._el=this._map.getCanvasContainer(),this._handlers=[],this._handlersById={},this._changes=[],this._inertia=new Pf(w),this._bearingSnap=B.bearingSnap,this._previousActiveHandlers={},this._eventsInProgress={},this._addDefaultHandlers(B);let Q=this._el;this._listeners=[[Q,"touchstart",{passive:!0}],[Q,"touchmove",{passive:!1}],[Q,"touchend",void 0],[Q,"touchcancel",void 0],[Q,"mousedown",void 0],[Q,"mousemove",void 0],[Q,"mouseup",void 0],[document,"mousemove",{capture:!0}],[document,"mouseup",void 0],[Q,"mouseover",void 0],[Q,"mouseout",void 0],[Q,"dblclick",void 0],[Q,"click",void 0],[Q,"keydown",{capture:!1}],[Q,"keyup",void 0],[Q,"wheel",{passive:!1}],[Q,"contextmenu",void 0],[window,"blur",void 0]];for(let[ee,le,qe]of this._listeners)c.addEventListener(ee,le,ee===document?this.handleWindowEvent:this.handleEvent,qe)}destroy(){for(let[w,B,Q]of this._listeners)c.removeEventListener(w,B,w===document?this.handleWindowEvent:this.handleEvent,Q)}_addDefaultHandlers(w){let B=this._map,Q=B.getCanvasContainer();this._add("mapEvent",new If(B,w));let ee=B.boxZoom=new ku(B,w);this._add("boxZoom",ee),w.interactive&&w.boxZoom&&ee.enable();let le=B.cooperativeGestures=new Qi(B,w.cooperativeGestures);this._add("cooperativeGestures",le),w.cooperativeGestures&&le.enable();let qe=new zc(B),Xe=new Xi(B);B.doubleClickZoom=new ki(Xe,qe),this._add("tapZoom",qe),this._add("clickZoom",Xe),w.interactive&&w.doubleClickZoom&&B.doubleClickZoom.enable();let ot=new dn;this._add("tapDragZoom",ot);let Tt=B.touchPitch=new Bc(B);this._add("touchPitch",Tt),w.interactive&&w.touchPitch&&B.touchPitch.enable(w.touchPitch);let Yt=Wl(w),Kt=ah(w);B.dragRotate=new Nn(w,Yt,Kt),this._add("mouseRotate",Yt,["mousePitch"]),this._add("mousePitch",Kt,["mouseRotate"]),w.interactive&&w.dragRotate&&B.dragRotate.enable();let xr=(({enable:et,clickTolerance:We})=>{let it=new Xu({checkCorrectEvent:Ft=>c.mouseButton(Ft)===0&&!Ft.ctrlKey});return new Wu({clickTolerance:We,move:(Ft,Ht)=>({around:Ht,panDelta:Ht.sub(Ft)}),activateOnStart:!0,moveStateManager:it,enable:et,assignEvents:Xf})})(w),Ir=new Zu(w,B);B.dragPan=new wn(Q,xr,Ir),this._add("mousePan",xr),this._add("touchPan",Ir,["touchZoom","touchRotate"]),w.interactive&&w.dragPan&&B.dragPan.enable(w.dragPan);let ve=new cf,be=new vu;B.touchZoomRotate=new Yi(Q,be,ve,ot),this._add("touchRotate",ve,["touchPan","touchZoom"]),this._add("touchZoom",be,["touchPan","touchRotate"]),w.interactive&&w.touchZoomRotate&&B.touchZoomRotate.enable(w.touchZoomRotate);let De=B.scrollZoom=new Kr(B,()=>this._triggerRenderFrame());this._add("scrollZoom",De,["mousePan"]),w.interactive&&w.scrollZoom&&B.scrollZoom.enable(w.scrollZoom);let Be=B.keyboard=new Wt(B);this._add("keyboard",Be),w.interactive&&w.keyboard&&B.keyboard.enable(),this._add("blockableMapEvent",new Cs(B))}_add(w,B,Q){this._handlers.push({handlerName:w,handler:B,allowed:Q}),this._handlersById[w]=B}stop(w){if(!this._updatingCamera){for(let{handler:B}of this._handlers)B.reset();this._inertia.clear(),this._fireEvents({},{},w),this._changes=[]}}isActive(){for(let{handler:w}of this._handlers)if(w.isActive())return!0;return!1}isZooming(){return!!this._eventsInProgress.zoom||this._map.scrollZoom.isZooming()}isRotating(){return!!this._eventsInProgress.rotate}isMoving(){return!!on(this._eventsInProgress)||this.isZooming()}_blockedByActive(w,B,Q){for(let ee in w)if(ee!==Q&&(!B||B.indexOf(ee)<0))return!0;return!1}_getMapTouches(w){let B=[];for(let Q of w)this._el.contains(Q.target)&&B.push(Q);return B}mergeHandlerResult(w,B,Q,ee,le){if(!Q)return;a.e(w,Q);let qe={handlerName:ee,originalEvent:Q.originalEvent||le};Q.zoomDelta!==void 0&&(B.zoom=qe),Q.panDelta!==void 0&&(B.drag=qe),Q.pitchDelta!==void 0&&(B.pitch=qe),Q.bearingDelta!==void 0&&(B.rotate=qe)}_applyChanges(){let w={},B={},Q={};for(let[ee,le,qe]of this._changes)ee.panDelta&&(w.panDelta=(w.panDelta||new a.P(0,0))._add(ee.panDelta)),ee.zoomDelta&&(w.zoomDelta=(w.zoomDelta||0)+ee.zoomDelta),ee.bearingDelta&&(w.bearingDelta=(w.bearingDelta||0)+ee.bearingDelta),ee.pitchDelta&&(w.pitchDelta=(w.pitchDelta||0)+ee.pitchDelta),ee.around!==void 0&&(w.around=ee.around),ee.pinchAround!==void 0&&(w.pinchAround=ee.pinchAround),ee.noInertia&&(w.noInertia=ee.noInertia),a.e(B,le),a.e(Q,qe);this._updateMapTransform(w,B,Q),this._changes=[]}_updateMapTransform(w,B,Q){let ee=this._map,le=ee._getTransformForUpdate(),qe=ee.terrain;if(!($n(w)||qe&&this._terrainMovement))return this._fireEvents(B,Q,!0);let{panDelta:Xe,zoomDelta:ot,bearingDelta:Tt,pitchDelta:Yt,around:Kt,pinchAround:xr}=w;xr!==void 0&&(Kt=xr),ee._stop(!0),Kt=Kt||ee.transform.centerPoint;let Ir=le.pointLocation(Xe?Kt.sub(Xe):Kt);Tt&&(le.bearing+=Tt),Yt&&(le.pitch+=Yt),ot&&(le.zoom+=ot),qe?this._terrainMovement||!B.drag&&!B.zoom?B.drag&&this._terrainMovement?le.center=le.pointLocation(le.centerPoint.sub(Xe)):le.setLocationAtPoint(Ir,Kt):(this._terrainMovement=!0,this._map._elevationFreeze=!0,le.setLocationAtPoint(Ir,Kt)):le.setLocationAtPoint(Ir,Kt),ee._applyUpdatedTransform(le),this._map._update(),w.noInertia||this._inertia.record(w),this._fireEvents(B,Q,!0)}_fireEvents(w,B,Q){let ee=on(this._eventsInProgress),le=on(w),qe={};for(let Kt in w){let{originalEvent:xr}=w[Kt];this._eventsInProgress[Kt]||(qe[`${Kt}start`]=xr),this._eventsInProgress[Kt]=w[Kt]}!ee&&le&&this._fireEvent("movestart",le.originalEvent);for(let Kt in qe)this._fireEvent(Kt,qe[Kt]);le&&this._fireEvent("move",le.originalEvent);for(let Kt in w){let{originalEvent:xr}=w[Kt];this._fireEvent(Kt,xr)}let Xe={},ot;for(let Kt in this._eventsInProgress){let{handlerName:xr,originalEvent:Ir}=this._eventsInProgress[Kt];this._handlersById[xr].isActive()||(delete this._eventsInProgress[Kt],ot=B[xr]||Ir,Xe[`${Kt}end`]=ot)}for(let Kt in Xe)this._fireEvent(Kt,Xe[Kt]);let Tt=on(this._eventsInProgress),Yt=(ee||le)&&!Tt;if(Yt&&this._terrainMovement){this._map._elevationFreeze=!1,this._terrainMovement=!1;let Kt=this._map._getTransformForUpdate();Kt.recalculateZoom(this._map.terrain),this._map._applyUpdatedTransform(Kt)}if(Q&&Yt){this._updatingCamera=!0;let Kt=this._inertia._onMoveEnd(this._map.dragPan._inertiaOptions),xr=Ir=>Ir!==0&&-this._bearingSnap<Ir&&Ir<this._bearingSnap;!Kt||!Kt.essential&&u.prefersReducedMotion?(this._map.fire(new a.k("moveend",{originalEvent:ot})),xr(this._map.getBearing())&&this._map.resetNorth()):(xr(Kt.bearing||this._map.getBearing())&&(Kt.bearing=0),Kt.freezeElevation=!0,this._map.easeTo(Kt,{originalEvent:ot})),this._updatingCamera=!1}}_fireEvent(w,B){this._map.fire(new a.k(w,B?{originalEvent:B}:{}))}_requestFrame(){return this._map.triggerRepaint(),this._map._renderTaskQueue.add(w=>{delete this._frameId,this.handleEvent(new Fi("renderFrame",{timeStamp:w})),this._applyChanges()})}_triggerRenderFrame(){this._frameId===void 0&&(this._frameId=this._requestFrame())}}class Ra extends a.E{constructor(w,B){super(),this._renderFrameCallback=()=>{let Q=Math.min((u.now()-this._easeStart)/this._easeOptions.duration,1);this._onEaseFrame(this._easeOptions.easing(Q)),Q<1&&this._easeFrameId?this._easeFrameId=this._requestRenderFrame(this._renderFrameCallback):this.stop()},this._moving=!1,this._zooming=!1,this.transform=w,this._bearingSnap=B.bearingSnap,this.on("moveend",()=>{delete this._requestedCameraState})}getCenter(){return new a.N(this.transform.center.lng,this.transform.center.lat)}setCenter(w,B){return this.jumpTo({center:w},B)}panBy(w,B,Q){return w=a.P.convert(w).mult(-1),this.panTo(this.transform.center,a.e({offset:w},B),Q)}panTo(w,B,Q){return this.easeTo(a.e({center:w},B),Q)}getZoom(){return this.transform.zoom}setZoom(w,B){return this.jumpTo({zoom:w},B),this}zoomTo(w,B,Q){return this.easeTo(a.e({zoom:w},B),Q)}zoomIn(w,B){return this.zoomTo(this.getZoom()+1,w,B),this}zoomOut(w,B){return this.zoomTo(this.getZoom()-1,w,B),this}getBearing(){return this.transform.bearing}setBearing(w,B){return this.jumpTo({bearing:w},B),this}getPadding(){return this.transform.padding}setPadding(w,B){return this.jumpTo({padding:w},B),this}rotateTo(w,B,Q){return this.easeTo(a.e({bearing:w},B),Q)}resetNorth(w,B){return this.rotateTo(0,a.e({duration:1e3},w),B),this}resetNorthPitch(w,B){return this.easeTo(a.e({bearing:0,pitch:0,duration:1e3},w),B),this}snapToNorth(w,B){return Math.abs(this.getBearing())<this._bearingSnap?this.resetNorth(w,B):this}getPitch(){return this.transform.pitch}setPitch(w,B){return this.jumpTo({pitch:w},B),this}cameraForBounds(w,B){w=ce.convert(w).adjustAntiMeridian();let Q=B&&B.bearing||0;return this._cameraForBoxAndBearing(w.getNorthWest(),w.getSouthEast(),Q,B)}_cameraForBoxAndBearing(w,B,Q,ee){let le={top:0,bottom:0,right:0,left:0};if(typeof(ee=a.e({padding:le,offset:[0,0],maxZoom:this.transform.maxZoom},ee)).padding=="number"){let Wr=ee.padding;ee.padding={top:Wr,bottom:Wr,right:Wr,left:Wr}}ee.padding=a.e(le,ee.padding);let qe=this.transform,Xe=qe.padding,ot=new ce(w,B),Tt=qe.project(ot.getNorthWest()),Yt=qe.project(ot.getNorthEast()),Kt=qe.project(ot.getSouthEast()),xr=qe.project(ot.getSouthWest()),Ir=a.ba(-Q),ve=Tt.rotate(Ir),be=Yt.rotate(Ir),De=Kt.rotate(Ir),Be=xr.rotate(Ir),et=new a.P(Math.max(ve.x,be.x,Be.x,De.x),Math.max(ve.y,be.y,Be.y,De.y)),We=new a.P(Math.min(ve.x,be.x,Be.x,De.x),Math.min(ve.y,be.y,Be.y,De.y)),it=et.sub(We),Ft=(qe.width-(Xe.left+Xe.right+ee.padding.left+ee.padding.right))/it.x,Ht=(qe.height-(Xe.top+Xe.bottom+ee.padding.top+ee.padding.bottom))/it.y;if(Ht<0||Ft<0)return void a.w("Map cannot fit within canvas with the given bounds, padding, and/or offset.");let tr=Math.min(qe.scaleZoom(qe.scale*Math.min(Ft,Ht)),ee.maxZoom),dr=a.P.convert(ee.offset),Sr=new a.P((ee.padding.left-ee.padding.right)/2,(ee.padding.top-ee.padding.bottom)/2).rotate(a.ba(Q)),Or=dr.add(Sr).mult(qe.scale/qe.zoomScale(tr));return{center:qe.unproject(Tt.add(Kt).div(2).sub(Or)),zoom:tr,bearing:Q}}fitBounds(w,B,Q){return this._fitInternal(this.cameraForBounds(w,B),B,Q)}fitScreenCoordinates(w,B,Q,ee,le){return this._fitInternal(this._cameraForBoxAndBearing(this.transform.pointLocation(a.P.convert(w)),this.transform.pointLocation(a.P.convert(B)),Q,ee),ee,le)}_fitInternal(w,B,Q){return w?(delete(B=a.e(w,B)).padding,B.linear?this.easeTo(B,Q):this.flyTo(B,Q)):this}jumpTo(w,B){this.stop();let Q=this._getTransformForUpdate(),ee=!1,le=!1,qe=!1;return"zoom"in w&&Q.zoom!==+w.zoom&&(ee=!0,Q.zoom=+w.zoom),w.center!==void 0&&(Q.center=a.N.convert(w.center)),"bearing"in w&&Q.bearing!==+w.bearing&&(le=!0,Q.bearing=+w.bearing),"pitch"in w&&Q.pitch!==+w.pitch&&(qe=!0,Q.pitch=+w.pitch),w.padding==null||Q.isPaddingEqual(w.padding)||(Q.padding=w.padding),this._applyUpdatedTransform(Q),this.fire(new a.k("movestart",B)).fire(new a.k("move",B)),ee&&this.fire(new a.k("zoomstart",B)).fire(new a.k("zoom",B)).fire(new a.k("zoomend",B)),le&&this.fire(new a.k("rotatestart",B)).fire(new a.k("rotate",B)).fire(new a.k("rotateend",B)),qe&&this.fire(new a.k("pitchstart",B)).fire(new a.k("pitch",B)).fire(new a.k("pitchend",B)),this.fire(new a.k("moveend",B))}calculateCameraOptionsFromTo(w,B,Q,ee=0){let le=a.Z.fromLngLat(w,B),qe=a.Z.fromLngLat(Q,ee),Xe=qe.x-le.x,ot=qe.y-le.y,Tt=qe.z-le.z,Yt=Math.hypot(Xe,ot,Tt);if(Yt===0)throw new Error("Can't calculate camera options with same From and To");let Kt=Math.hypot(Xe,ot),xr=this.transform.scaleZoom(this.transform.cameraToCenterDistance/Yt/this.transform.tileSize),Ir=180*Math.atan2(Xe,-ot)/Math.PI,ve=180*Math.acos(Kt/Yt)/Math.PI;return ve=Tt<0?90-ve:90+ve,{center:qe.toLngLat(),zoom:xr,pitch:ve,bearing:Ir}}easeTo(w,B){var Q;this._stop(!1,w.easeId),((w=a.e({offset:[0,0],duration:500,easing:a.b9},w)).animate===!1||!w.essential&&u.prefersReducedMotion)&&(w.duration=0);let ee=this._getTransformForUpdate(),le=ee.zoom,qe=ee.bearing,Xe=ee.pitch,ot=ee.padding,Tt="bearing"in w?this._normalizeBearing(w.bearing,qe):qe,Yt="pitch"in w?+w.pitch:Xe,Kt="padding"in w?w.padding:ee.padding,xr=a.P.convert(w.offset),Ir=ee.centerPoint.add(xr),ve=ee.pointLocation(Ir),{center:be,zoom:De}=ee.getConstrained(a.N.convert(w.center||ve),(Q=w.zoom)!==null&&Q!==void 0?Q:le);this._normalizeCenter(be,ee);let Be=ee.project(ve),et=ee.project(be).sub(Be),We=ee.zoomScale(De-le),it,Ft;w.around&&(it=a.N.convert(w.around),Ft=ee.locationPoint(it));let Ht={moving:this._moving,zooming:this._zooming,rotating:this._rotating,pitching:this._pitching};return this._zooming=this._zooming||De!==le,this._rotating=this._rotating||qe!==Tt,this._pitching=this._pitching||Yt!==Xe,this._padding=!ee.isPaddingEqual(Kt),this._easeId=w.easeId,this._prepareEase(B,w.noMoveStart,Ht),this.terrain&&this._prepareElevation(be),this._ease(tr=>{if(this._zooming&&(ee.zoom=a.y.number(le,De,tr)),this._rotating&&(ee.bearing=a.y.number(qe,Tt,tr)),this._pitching&&(ee.pitch=a.y.number(Xe,Yt,tr)),this._padding&&(ee.interpolatePadding(ot,Kt,tr),Ir=ee.centerPoint.add(xr)),this.terrain&&!w.freezeElevation&&this._updateElevation(tr),it)ee.setLocationAtPoint(it,Ft);else{let dr=ee.zoomScale(ee.zoom-le),Sr=De>le?Math.min(2,We):Math.max(.5,We),Or=Math.pow(Sr,1-tr),Wr=ee.unproject(Be.add(et.mult(tr*Or)).mult(dr));ee.setLocationAtPoint(ee.renderWorldCopies?Wr.wrap():Wr,Ir)}this._applyUpdatedTransform(ee),this._fireMoveEvents(B)},tr=>{this.terrain&&w.freezeElevation&&this._finalizeElevation(),this._afterEase(B,tr)},w),this}_prepareEase(w,B,Q={}){this._moving=!0,B||Q.moving||this.fire(new a.k("movestart",w)),this._zooming&&!Q.zooming&&this.fire(new a.k("zoomstart",w)),this._rotating&&!Q.rotating&&this.fire(new a.k("rotatestart",w)),this._pitching&&!Q.pitching&&this.fire(new a.k("pitchstart",w))}_prepareElevation(w){this._elevationCenter=w,this._elevationStart=this.transform.elevation,this._elevationTarget=this.terrain.getElevationForLngLatZoom(w,this.transform.tileZoom),this._elevationFreeze=!0}_updateElevation(w){this.transform.minElevationForCurrentTile=this.terrain.getMinTileElevationForLngLatZoom(this._elevationCenter,this.transform.tileZoom);let B=this.terrain.getElevationForLngLatZoom(this._elevationCenter,this.transform.tileZoom);if(w<1&&B!==this._elevationTarget){let Q=this._elevationTarget-this._elevationStart;this._elevationStart+=w*(Q-(B-(Q*w+this._elevationStart))/(1-w)),this._elevationTarget=B}this.transform.elevation=a.y.number(this._elevationStart,this._elevationTarget,w)}_finalizeElevation(){this._elevationFreeze=!1,this.transform.recalculateZoom(this.terrain)}_getTransformForUpdate(){return this.transformCameraUpdate||this.terrain?(this._requestedCameraState||(this._requestedCameraState=this.transform.clone()),this._requestedCameraState):this.transform}_elevateCameraIfInsideTerrain(w){let B=w.getCameraPosition(),Q=this.terrain.getElevationForLngLatZoom(B.lngLat,w.zoom);if(B.altitude<Q){let ee=this.calculateCameraOptionsFromTo(B.lngLat,Q,w.center,w.elevation);return{pitch:ee.pitch,zoom:ee.zoom}}return{}}_applyUpdatedTransform(w){let B=[];if(this.terrain&&B.push(ee=>this._elevateCameraIfInsideTerrain(ee)),this.transformCameraUpdate&&B.push(ee=>this.transformCameraUpdate(ee)),!B.length)return;let Q=w.clone();for(let ee of B){let le=Q.clone(),{center:qe,zoom:Xe,pitch:ot,bearing:Tt,elevation:Yt}=ee(le);qe&&(le.center=qe),Xe!==void 0&&(le.zoom=Xe),ot!==void 0&&(le.pitch=ot),Tt!==void 0&&(le.bearing=Tt),Yt!==void 0&&(le.elevation=Yt),Q.apply(le)}this.transform.apply(Q)}_fireMoveEvents(w){this.fire(new a.k("move",w)),this._zooming&&this.fire(new a.k("zoom",w)),this._rotating&&this.fire(new a.k("rotate",w)),this._pitching&&this.fire(new a.k("pitch",w))}_afterEase(w,B){if(this._easeId&&B&&this._easeId===B)return;delete this._easeId;let Q=this._zooming,ee=this._rotating,le=this._pitching;this._moving=!1,this._zooming=!1,this._rotating=!1,this._pitching=!1,this._padding=!1,Q&&this.fire(new a.k("zoomend",w)),ee&&this.fire(new a.k("rotateend",w)),le&&this.fire(new a.k("pitchend",w)),this.fire(new a.k("moveend",w))}flyTo(w,B){var Q;if(!w.essential&&u.prefersReducedMotion){let Cn=a.M(w,["center","zoom","bearing","pitch","around"]);return this.jumpTo(Cn,B)}this.stop(),w=a.e({offset:[0,0],speed:1.2,curve:1.42,easing:a.b9},w);let ee=this._getTransformForUpdate(),le=ee.zoom,qe=ee.bearing,Xe=ee.pitch,ot=ee.padding,Tt="bearing"in w?this._normalizeBearing(w.bearing,qe):qe,Yt="pitch"in w?+w.pitch:Xe,Kt="padding"in w?w.padding:ee.padding,xr=a.P.convert(w.offset),Ir=ee.centerPoint.add(xr),ve=ee.pointLocation(Ir),{center:be,zoom:De}=ee.getConstrained(a.N.convert(w.center||ve),(Q=w.zoom)!==null&&Q!==void 0?Q:le);this._normalizeCenter(be,ee);let Be=ee.zoomScale(De-le),et=ee.project(ve),We=ee.project(be).sub(et),it=w.curve,Ft=Math.max(ee.width,ee.height),Ht=Ft/Be,tr=We.mag();if("minZoom"in w){let Cn=a.ac(Math.min(w.minZoom,le,De),ee.minZoom,ee.maxZoom),Kn=Ft/ee.zoomScale(Cn-le);it=Math.sqrt(Kn/tr*2)}let dr=it*it;function Sr(Cn){let Kn=(Ht*Ht-Ft*Ft+(Cn?-1:1)*dr*dr*tr*tr)/(2*(Cn?Ht:Ft)*dr*tr);return Math.log(Math.sqrt(Kn*Kn+1)-Kn)}function Or(Cn){return(Math.exp(Cn)-Math.exp(-Cn))/2}function Wr(Cn){return(Math.exp(Cn)+Math.exp(-Cn))/2}let ni=Sr(!1),Pi=function(Cn){return Wr(ni)/Wr(ni+it*Cn)},cn=function(Cn){return Ft*((Wr(ni)*(Or(Kn=ni+it*Cn)/Wr(Kn))-Or(ni))/dr)/tr;var Kn},ln=(Sr(!0)-ni)/it;if(Math.abs(tr)<1e-6||!isFinite(ln)){if(Math.abs(Ft-Ht)<1e-6)return this.easeTo(w,B);let Cn=Ht<Ft?-1:1;ln=Math.abs(Math.log(Ht/Ft))/it,cn=()=>0,Pi=Kn=>Math.exp(Cn*it*Kn)}return w.duration="duration"in w?+w.duration:1e3*ln/("screenSpeed"in w?+w.screenSpeed/it:+w.speed),w.maxDuration&&w.duration>w.maxDuration&&(w.duration=0),this._zooming=!0,this._rotating=qe!==Tt,this._pitching=Yt!==Xe,this._padding=!ee.isPaddingEqual(Kt),this._prepareEase(B,!1),this.terrain&&this._prepareElevation(be),this._ease(Cn=>{let Kn=Cn*ln,Ta=1/Pi(Kn);ee.zoom=Cn===1?De:le+ee.scaleZoom(Ta),this._rotating&&(ee.bearing=a.y.number(qe,Tt,Cn)),this._pitching&&(ee.pitch=a.y.number(Xe,Yt,Cn)),this._padding&&(ee.interpolatePadding(ot,Kt,Cn),Ir=ee.centerPoint.add(xr)),this.terrain&&!w.freezeElevation&&this._updateElevation(Cn);let fa=Cn===1?be:ee.unproject(et.add(We.mult(cn(Kn))).mult(Ta));ee.setLocationAtPoint(ee.renderWorldCopies?fa.wrap():fa,Ir),this._applyUpdatedTransform(ee),this._fireMoveEvents(B)},()=>{this.terrain&&w.freezeElevation&&this._finalizeElevation(),this._afterEase(B)},w),this}isEasing(){return!!this._easeFrameId}stop(){return this._stop()}_stop(w,B){var Q;if(this._easeFrameId&&(this._cancelRenderFrame(this._easeFrameId),delete this._easeFrameId,delete this._onEaseFrame),this._onEaseEnd){let ee=this._onEaseEnd;delete this._onEaseEnd,ee.call(this,B)}return w||(Q=this.handlers)===null||Q===void 0||Q.stop(!1),this}_ease(w,B,Q){Q.animate===!1||Q.duration===0?(w(1),B()):(this._easeStart=u.now(),this._easeOptions=Q,this._onEaseFrame=w,this._onEaseEnd=B,this._easeFrameId=this._requestRenderFrame(this._renderFrameCallback))}_normalizeBearing(w,B){w=a.b3(w,-180,180);let Q=Math.abs(w-B);return Math.abs(w-360-B)<Q&&(w-=360),Math.abs(w+360-B)<Q&&(w+=360),w}_normalizeCenter(w,B){if(!B.renderWorldCopies||B.lngRange)return;let Q=w.lng-B.center.lng;w.lng+=Q>180?-360:Q<-180?360:0}queryTerrainElevation(w){return this.terrain?this.terrain.getElevationForLngLatZoom(a.N.convert(w),this.transform.tileZoom)-this.transform.elevation:null}}let La={compact:!0,customAttribution:'<a href="https://maplibre.org/" target="_blank">MapLibre</a>'};class Na{constructor(w=La){this._toggleAttribution=()=>{this._container.classList.contains("maplibregl-compact")&&(this._container.classList.contains("maplibregl-compact-show")?(this._container.setAttribute("open",""),this._container.classList.remove("maplibregl-compact-show")):(this._container.classList.add("maplibregl-compact-show"),this._container.removeAttribute("open")))},this._updateData=B=>{!B||B.sourceDataType!=="metadata"&&B.sourceDataType!=="visibility"&&B.dataType!=="style"&&B.type!=="terrain"||this._updateAttributions()},this._updateCompact=()=>{this._map.getCanvasContainer().offsetWidth<=640||this._compact?this._compact===!1?this._container.setAttribute("open",""):this._container.classList.contains("maplibregl-compact")||this._container.classList.contains("maplibregl-attrib-empty")||(this._container.setAttribute("open",""),this._container.classList.add("maplibregl-compact","maplibregl-compact-show")):(this._container.setAttribute("open",""),this._container.classList.contains("maplibregl-compact")&&this._container.classList.remove("maplibregl-compact","maplibregl-compact-show"))},this._updateCompactMinimize=()=>{this._container.classList.contains("maplibregl-compact")&&this._container.classList.contains("maplibregl-compact-show")&&this._container.classList.remove("maplibregl-compact-show")},this.options=w}getDefaultPosition(){return"bottom-right"}onAdd(w){return this._map=w,this._compact=this.options.compact,this._container=c.create("details","maplibregl-ctrl maplibregl-ctrl-attrib"),this._compactButton=c.create("summary","maplibregl-ctrl-attrib-button",this._container),this._compactButton.addEventListener("click",this._toggleAttribution),this._setElementTitle(this._compactButton,"ToggleAttribution"),this._innerContainer=c.create("div","maplibregl-ctrl-attrib-inner",this._container),this._updateAttributions(),this._updateCompact(),this._map.on("styledata",this._updateData),this._map.on("sourcedata",this._updateData),this._map.on("terrain",this._updateData),this._map.on("resize",this._updateCompact),this._map.on("drag",this._updateCompactMinimize),this._container}onRemove(){c.remove(this._container),this._map.off("styledata",this._updateData),this._map.off("sourcedata",this._updateData),this._map.off("terrain",this._updateData),this._map.off("resize",this._updateCompact),this._map.off("drag",this._updateCompactMinimize),this._map=void 0,this._compact=void 0,this._attribHTML=void 0}_setElementTitle(w,B){let Q=this._map._getUIString(`AttributionControl.${B}`);w.title=Q,w.setAttribute("aria-label",Q)}_updateAttributions(){if(!this._map.style)return;let w=[];if(this.options.customAttribution&&(Array.isArray(this.options.customAttribution)?w=w.concat(this.options.customAttribution.map(ee=>typeof ee!="string"?"":ee)):typeof this.options.customAttribution=="string"&&w.push(this.options.customAttribution)),this._map.style.stylesheet){let ee=this._map.style.stylesheet;this.styleOwner=ee.owner,this.styleId=ee.id}let B=this._map.style.sourceCaches;for(let ee in B){let le=B[ee];if(le.used||le.usedForTerrain){let qe=le.getSource();qe.attribution&&w.indexOf(qe.attribution)<0&&w.push(qe.attribution)}}w=w.filter(ee=>String(ee).trim()),w.sort((ee,le)=>ee.length-le.length),w=w.filter((ee,le)=>{for(let qe=le+1;qe<w.length;qe++)if(w[qe].indexOf(ee)>=0)return!1;return!0});let Q=w.join(" | ");Q!==this._attribHTML&&(this._attribHTML=Q,w.length?(this._innerContainer.innerHTML=Q,this._container.classList.remove("maplibregl-attrib-empty")):this._container.classList.add("maplibregl-attrib-empty"),this._updateCompact(),this._editLink=null)}}class Yn{constructor(w={}){this._updateCompact=()=>{let B=this._container.children;if(B.length){let Q=B[0];this._map.getCanvasContainer().offsetWidth<=640||this._compact?this._compact!==!1&&Q.classList.add("maplibregl-compact"):Q.classList.remove("maplibregl-compact")}},this.options=w}getDefaultPosition(){return"bottom-left"}onAdd(w){this._map=w,this._compact=this.options&&this.options.compact,this._container=c.create("div","maplibregl-ctrl");let B=c.create("a","maplibregl-ctrl-logo");return B.target="_blank",B.rel="noopener nofollow",B.href="https://maplibre.org/",B.setAttribute("aria-label",this._map._getUIString("LogoControl.Title")),B.setAttribute("rel","noopener nofollow"),this._container.appendChild(B),this._container.style.display="block",this._map.on("resize",this._updateCompact),this._updateCompact(),this._container}onRemove(){c.remove(this._container),this._map.off("resize",this._updateCompact),this._map=void 0,this._compact=void 0}}class Dn{constructor(){this._queue=[],this._id=0,this._cleared=!1,this._currentlyRunning=!1}add(w){let B=++this._id;return this._queue.push({callback:w,id:B,cancelled:!1}),B}remove(w){let B=this._currentlyRunning,Q=B?this._queue.concat(B):this._queue;for(let ee of Q)if(ee.id===w)return void(ee.cancelled=!0)}run(w=0){if(this._currentlyRunning)throw new Error("Attempting to run(), but is already running.");let B=this._currentlyRunning=this._queue;this._queue=[];for(let Q of B)if(!Q.cancelled&&(Q.callback(w),this._cleared))break;this._cleared=!1,this._currentlyRunning=!1}clear(){this._currentlyRunning&&(this._cleared=!0),this._queue=[]}}var Ka=a.Y([{name:"a_pos3d",type:"Int16",components:3}]);class bo extends a.E{constructor(w){super(),this.sourceCache=w,this._tiles={},this._renderableTilesKeys=[],this._sourceTileCache={},this.minzoom=0,this.maxzoom=22,this.tileSize=512,this.deltaZoom=1,w.usedForTerrain=!0,w.tileSize=this.tileSize*2**this.deltaZoom}destruct(){this.sourceCache.usedForTerrain=!1,this.sourceCache.tileSize=null}update(w,B){this.sourceCache.update(w,B),this._renderableTilesKeys=[];let Q={};for(let ee of w.coveringTiles({tileSize:this.tileSize,minzoom:this.minzoom,maxzoom:this.maxzoom,reparseOverscaled:!1,terrain:B}))Q[ee.key]=!0,this._renderableTilesKeys.push(ee.key),this._tiles[ee.key]||(ee.posMatrix=new Float64Array(16),a.aP(ee.posMatrix,0,a.X,0,a.X,0,1),this._tiles[ee.key]=new Vt(ee,this.tileSize));for(let ee in this._tiles)Q[ee]||delete this._tiles[ee]}freeRtt(w){for(let B in this._tiles){let Q=this._tiles[B];(!w||Q.tileID.equals(w)||Q.tileID.isChildOf(w)||w.isChildOf(Q.tileID))&&(Q.rtt=[])}}getRenderableTiles(){return this._renderableTilesKeys.map(w=>this.getTileByID(w))}getTileByID(w){return this._tiles[w]}getTerrainCoords(w){let B={};for(let Q of this._renderableTilesKeys){let ee=this._tiles[Q].tileID;if(ee.canonical.equals(w.canonical)){let le=w.clone();le.posMatrix=new Float64Array(16),a.aP(le.posMatrix,0,a.X,0,a.X,0,1),B[Q]=le}else if(ee.canonical.isChildOf(w.canonical)){let le=w.clone();le.posMatrix=new Float64Array(16);let qe=ee.canonical.z-w.canonical.z,Xe=ee.canonical.x-(ee.canonical.x>>qe<<qe),ot=ee.canonical.y-(ee.canonical.y>>qe<<qe),Tt=a.X>>qe;a.aP(le.posMatrix,0,Tt,0,Tt,0,1),a.J(le.posMatrix,le.posMatrix,[-Xe*Tt,-ot*Tt,0]),B[Q]=le}else if(w.canonical.isChildOf(ee.canonical)){let le=w.clone();le.posMatrix=new Float64Array(16);let qe=w.canonical.z-ee.canonical.z,Xe=w.canonical.x-(w.canonical.x>>qe<<qe),ot=w.canonical.y-(w.canonical.y>>qe<<qe),Tt=a.X>>qe;a.aP(le.posMatrix,0,a.X,0,a.X,0,1),a.J(le.posMatrix,le.posMatrix,[Xe*Tt,ot*Tt,0]),a.K(le.posMatrix,le.posMatrix,[1/2**qe,1/2**qe,0]),B[Q]=le}}return B}getSourceTile(w,B){let Q=this.sourceCache._source,ee=w.overscaledZ-this.deltaZoom;if(ee>Q.maxzoom&&(ee=Q.maxzoom),ee<Q.minzoom)return null;this._sourceTileCache[w.key]||(this._sourceTileCache[w.key]=w.scaledTo(ee).key);let le=this.sourceCache.getTileByID(this._sourceTileCache[w.key]);if((!le||!le.dem)&&B)for(;ee>=Q.minzoom&&(!le||!le.dem);)le=this.sourceCache.getTileByID(w.scaledTo(ee--).key);return le}tilesAfterTime(w=Date.now()){return Object.values(this._tiles).filter(B=>B.timeAdded>=w)}}class Xo{constructor(w,B,Q){this.painter=w,this.sourceCache=new bo(B),this.options=Q,this.exaggeration=typeof Q.exaggeration=="number"?Q.exaggeration:1,this.qualityFactor=2,this.meshSize=128,this._demMatrixCache={},this.coordsIndex=[],this._coordsTextureSize=1024}getDEMElevation(w,B,Q,ee=a.X){var le;if(!(B>=0&&B<ee&&Q>=0&&Q<ee))return 0;let qe=this.getTerrainData(w),Xe=(le=qe.tile)===null||le===void 0?void 0:le.dem;if(!Xe)return 0;let ot=function(ve,be,De){var Be=be[0],et=be[1];return ve[0]=De[0]*Be+De[4]*et+De[12],ve[1]=De[1]*Be+De[5]*et+De[13],ve}([],[B/ee*a.X,Q/ee*a.X],qe.u_terrain_matrix),Tt=[ot[0]*Xe.dim,ot[1]*Xe.dim],Yt=Math.floor(Tt[0]),Kt=Math.floor(Tt[1]),xr=Tt[0]-Yt,Ir=Tt[1]-Kt;return Xe.get(Yt,Kt)*(1-xr)*(1-Ir)+Xe.get(Yt+1,Kt)*xr*(1-Ir)+Xe.get(Yt,Kt+1)*(1-xr)*Ir+Xe.get(Yt+1,Kt+1)*xr*Ir}getElevationForLngLatZoom(w,B){if(!a.bb(B,w.wrap()))return 0;let{tileID:Q,mercatorX:ee,mercatorY:le}=this._getOverscaledTileIDFromLngLatZoom(w,B);return this.getElevation(Q,ee%a.X,le%a.X,a.X)}getElevation(w,B,Q,ee=a.X){return this.getDEMElevation(w,B,Q,ee)*this.exaggeration}getTerrainData(w){if(!this._emptyDemTexture){let ee=this.painter.context,le=new a.R({width:1,height:1},new Uint8Array(4));this._emptyDepthTexture=new g(ee,le,ee.gl.RGBA,{premultiply:!1}),this._emptyDemUnpack=[0,0,0,0],this._emptyDemTexture=new g(ee,new a.R({width:1,height:1}),ee.gl.RGBA,{premultiply:!1}),this._emptyDemTexture.bind(ee.gl.NEAREST,ee.gl.CLAMP_TO_EDGE),this._emptyDemMatrix=a.an([])}let B=this.sourceCache.getSourceTile(w,!0);if(B&&B.dem&&(!B.demTexture||B.needsTerrainPrepare)){let ee=this.painter.context;B.demTexture=this.painter.getTileTexture(B.dem.stride),B.demTexture?B.demTexture.update(B.dem.getPixels(),{premultiply:!1}):B.demTexture=new g(ee,B.dem.getPixels(),ee.gl.RGBA,{premultiply:!1}),B.demTexture.bind(ee.gl.NEAREST,ee.gl.CLAMP_TO_EDGE),B.needsTerrainPrepare=!1}let Q=B&&B+B.tileID.key+w.key;if(Q&&!this._demMatrixCache[Q]){let ee=this.sourceCache.sourceCache._source.maxzoom,le=w.canonical.z-B.tileID.canonical.z;w.overscaledZ>w.canonical.z&&(w.canonical.z>=ee?le=w.canonical.z-ee:a.w("cannot calculate elevation if elevation maxzoom > source.maxzoom"));let qe=w.canonical.x-(w.canonical.x>>le<<le),Xe=w.canonical.y-(w.canonical.y>>le<<le),ot=a.bc(new Float64Array(16),[1/(a.X<<le),1/(a.X<<le),0]);a.J(ot,ot,[qe*a.X,Xe*a.X,0]),this._demMatrixCache[w.key]={matrix:ot,coord:w}}return{u_depth:2,u_terrain:3,u_terrain_dim:B&&B.dem&&B.dem.dim||1,u_terrain_matrix:Q?this._demMatrixCache[w.key].matrix:this._emptyDemMatrix,u_terrain_unpack:B&&B.dem&&B.dem.getUnpackVector()||this._emptyDemUnpack,u_terrain_exaggeration:this.exaggeration,texture:(B&&B.demTexture||this._emptyDemTexture).texture,depthTexture:(this._fboDepthTexture||this._emptyDepthTexture).texture,tile:B}}getFramebuffer(w){let B=this.painter,Q=B.width/devicePixelRatio,ee=B.height/devicePixelRatio;return!this._fbo||this._fbo.width===Q&&this._fbo.height===ee||(this._fbo.destroy(),this._fboCoordsTexture.destroy(),this._fboDepthTexture.destroy(),delete this._fbo,delete this._fboDepthTexture,delete this._fboCoordsTexture),this._fboCoordsTexture||(this._fboCoordsTexture=new g(B.context,{width:Q,height:ee,data:null},B.context.gl.RGBA,{premultiply:!1}),this._fboCoordsTexture.bind(B.context.gl.NEAREST,B.context.gl.CLAMP_TO_EDGE)),this._fboDepthTexture||(this._fboDepthTexture=new g(B.context,{width:Q,height:ee,data:null},B.context.gl.RGBA,{premultiply:!1}),this._fboDepthTexture.bind(B.context.gl.NEAREST,B.context.gl.CLAMP_TO_EDGE)),this._fbo||(this._fbo=B.context.createFramebuffer(Q,ee,!0,!1),this._fbo.depthAttachment.set(B.context.createRenderbuffer(B.context.gl.DEPTH_COMPONENT16,Q,ee))),this._fbo.colorAttachment.set(w==="coords"?this._fboCoordsTexture.texture:this._fboDepthTexture.texture),this._fbo}getCoordsTexture(){let w=this.painter.context;if(this._coordsTexture)return this._coordsTexture;let B=new Uint8Array(this._coordsTextureSize*this._coordsTextureSize*4);for(let le=0,qe=0;le<this._coordsTextureSize;le++)for(let Xe=0;Xe<this._coordsTextureSize;Xe++,qe+=4)B[qe+0]=255&Xe,B[qe+1]=255&le,B[qe+2]=Xe>>8<<4|le>>8,B[qe+3]=0;let Q=new a.R({width:this._coordsTextureSize,height:this._coordsTextureSize},new Uint8Array(B.buffer)),ee=new g(w,Q,w.gl.RGBA,{premultiply:!1});return ee.bind(w.gl.NEAREST,w.gl.CLAMP_TO_EDGE),this._coordsTexture=ee,ee}pointCoordinate(w){this.painter.maybeDrawDepthAndCoords(!0);let B=new Uint8Array(4),Q=this.painter.context,ee=Q.gl,le=Math.round(w.x*this.painter.pixelRatio/devicePixelRatio),qe=Math.round(w.y*this.painter.pixelRatio/devicePixelRatio),Xe=Math.round(this.painter.height/devicePixelRatio);Q.bindFramebuffer.set(this.getFramebuffer("coords").framebuffer),ee.readPixels(le,Xe-qe-1,1,1,ee.RGBA,ee.UNSIGNED_BYTE,B),Q.bindFramebuffer.set(null);let ot=B[0]+(B[2]>>4<<8),Tt=B[1]+((15&B[2])<<8),Yt=this.coordsIndex[255-B[3]],Kt=Yt&&this.sourceCache.getTileByID(Yt);if(!Kt)return null;let xr=this._coordsTextureSize,Ir=(1<<Kt.tileID.canonical.z)*xr;return new a.Z((Kt.tileID.canonical.x*xr+ot)/Ir+Kt.tileID.wrap,(Kt.tileID.canonical.y*xr+Tt)/Ir,this.getElevation(Kt.tileID,ot,Tt,xr))}depthAtPoint(w){let B=new Uint8Array(4),Q=this.painter.context,ee=Q.gl;return Q.bindFramebuffer.set(this.getFramebuffer("depth").framebuffer),ee.readPixels(w.x,this.painter.height/devicePixelRatio-w.y-1,1,1,ee.RGBA,ee.UNSIGNED_BYTE,B),Q.bindFramebuffer.set(null),(B[0]/16777216+B[1]/65536+B[2]/256+B[3])/256}getTerrainMesh(){if(this._mesh)return this._mesh;let w=this.painter.context,B=new a.bd,Q=new a.aY,ee=this.meshSize,le=a.X/ee,qe=ee*ee;for(let Kt=0;Kt<=ee;Kt++)for(let xr=0;xr<=ee;xr++)B.emplaceBack(xr*le,Kt*le,0);for(let Kt=0;Kt<qe;Kt+=ee+1)for(let xr=0;xr<ee;xr++)Q.emplaceBack(xr+Kt,ee+xr+Kt+1,ee+xr+Kt+2),Q.emplaceBack(xr+Kt,ee+xr+Kt+2,xr+Kt+1);let Xe=B.length,ot=Xe+2*(ee+1);for(let Kt of[0,1])for(let xr=0;xr<=ee;xr++)for(let Ir of[0,1])B.emplaceBack(xr*le,Kt*a.X,Ir);for(let Kt=0;Kt<2*ee;Kt+=2)Q.emplaceBack(ot+Kt,ot+Kt+1,ot+Kt+3),Q.emplaceBack(ot+Kt,ot+Kt+3,ot+Kt+2),Q.emplaceBack(Xe+Kt,Xe+Kt+3,Xe+Kt+1),Q.emplaceBack(Xe+Kt,Xe+Kt+2,Xe+Kt+3);let Tt=B.length,Yt=Tt+2*(ee+1);for(let Kt of[0,1])for(let xr=0;xr<=ee;xr++)for(let Ir of[0,1])B.emplaceBack(Kt*a.X,xr*le,Ir);for(let Kt=0;Kt<2*ee;Kt+=2)Q.emplaceBack(Tt+Kt,Tt+Kt+1,Tt+Kt+3),Q.emplaceBack(Tt+Kt,Tt+Kt+3,Tt+Kt+2),Q.emplaceBack(Yt+Kt,Yt+Kt+3,Yt+Kt+1),Q.emplaceBack(Yt+Kt,Yt+Kt+2,Yt+Kt+3);return this._mesh=new Eu(w.createVertexBuffer(B,Ka.members),w.createIndexBuffer(Q),a.a0.simpleSegment(0,0,B.length,Q.length)),this._mesh}getMeshFrameDelta(w){return 2*Math.PI*a.be/Math.pow(2,w)/5}getMinTileElevationForLngLatZoom(w,B){var Q;let{tileID:ee}=this._getOverscaledTileIDFromLngLatZoom(w,B);return(Q=this.getMinMaxElevation(ee).minElevation)!==null&&Q!==void 0?Q:0}getMinMaxElevation(w){let B=this.getTerrainData(w).tile,Q={minElevation:null,maxElevation:null};return B&&B.dem&&(Q.minElevation=B.dem.min*this.exaggeration,Q.maxElevation=B.dem.max*this.exaggeration),Q}_getOverscaledTileIDFromLngLatZoom(w,B){let Q=a.Z.fromLngLat(w.wrap()),ee=(1<<B)*a.X,le=Q.x*ee,qe=Q.y*ee,Xe=Math.floor(le/a.X),ot=Math.floor(qe/a.X);return{tileID:new a.S(B,0,B,Xe,ot),mercatorX:le,mercatorY:qe}}}class Ss{constructor(w,B,Q){this._context=w,this._size=B,this._tileSize=Q,this._objects=[],this._recentlyUsed=[],this._stamp=0}destruct(){for(let w of this._objects)w.texture.destroy(),w.fbo.destroy()}_createObject(w){let B=this._context.createFramebuffer(this._tileSize,this._tileSize,!0,!0),Q=new g(this._context,{width:this._tileSize,height:this._tileSize,data:null},this._context.gl.RGBA);return Q.bind(this._context.gl.LINEAR,this._context.gl.CLAMP_TO_EDGE),B.depthAttachment.set(this._context.createRenderbuffer(this._context.gl.DEPTH_STENCIL,this._tileSize,this._tileSize)),B.colorAttachment.set(Q.texture),{id:w,fbo:B,texture:Q,stamp:-1,inUse:!1}}getObjectForId(w){return this._objects[w]}useObject(w){w.inUse=!0,this._recentlyUsed=this._recentlyUsed.filter(B=>w.id!==B),this._recentlyUsed.push(w.id)}stampObject(w){w.stamp=++this._stamp}getOrCreateFreeObject(){for(let B of this._recentlyUsed)if(!this._objects[B].inUse)return this._objects[B];if(this._objects.length>=this._size)throw new Error("No free RenderPool available, call freeAllObjects() required!");let w=this._createObject(this._objects.length);return this._objects.push(w),w}freeObject(w){w.inUse=!1}freeAllObjects(){for(let w of this._objects)this.freeObject(w)}isFull(){return!(this._objects.length<this._size)&&this._objects.some(w=>!w.inUse)===!1}}let as={background:!0,fill:!0,line:!0,raster:!0,hillshade:!0};class ws{constructor(w,B){this.painter=w,this.terrain=B,this.pool=new Ss(w.context,30,B.sourceCache.tileSize*B.qualityFactor)}destruct(){this.pool.destruct()}getTexture(w){return this.pool.getObjectForId(w.rtt[this._stacks.length-1].id).texture}prepareForRender(w,B){this._stacks=[],this._prevType=null,this._rttTiles=[],this._renderableTiles=this.terrain.sourceCache.getRenderableTiles(),this._renderableLayerIds=w._order.filter(Q=>!w._layers[Q].isHidden(B)),this._coordsDescendingInv={};for(let Q in w.sourceCaches){this._coordsDescendingInv[Q]={};let ee=w.sourceCaches[Q].getVisibleCoordinates();for(let le of ee){let qe=this.terrain.sourceCache.getTerrainCoords(le);for(let Xe in qe)this._coordsDescendingInv[Q][Xe]||(this._coordsDescendingInv[Q][Xe]=[]),this._coordsDescendingInv[Q][Xe].push(qe[Xe])}}this._coordsDescendingInvStr={};for(let Q of w._order){let ee=w._layers[Q],le=ee.source;if(as[ee.type]&&!this._coordsDescendingInvStr[le]){this._coordsDescendingInvStr[le]={};for(let qe in this._coordsDescendingInv[le])this._coordsDescendingInvStr[le][qe]=this._coordsDescendingInv[le][qe].map(Xe=>Xe.key).sort().join()}}for(let Q of this._renderableTiles)for(let ee in this._coordsDescendingInvStr){let le=this._coordsDescendingInvStr[ee][Q.tileID.key];le&&le!==Q.rttCoords[ee]&&(Q.rtt=[])}}renderLayer(w){if(w.isHidden(this.painter.transform.zoom))return!1;let B=w.type,Q=this.painter,ee=this._renderableLayerIds[this._renderableLayerIds.length-1]===w.id;if(as[B]&&(this._prevType&&as[this._prevType]||this._stacks.push([]),this._prevType=B,this._stacks[this._stacks.length-1].push(w.id),!ee))return!0;if(as[this._prevType]||as[B]&&ee){this._prevType=B;let le=this._stacks.length-1,qe=this._stacks[le]||[];for(let Xe of this._renderableTiles){if(this.pool.isFull()&&(js(this.painter,this.terrain,this._rttTiles),this._rttTiles=[],this.pool.freeAllObjects()),this._rttTiles.push(Xe),Xe.rtt[le]){let Tt=this.pool.getObjectForId(Xe.rtt[le].id);if(Tt.stamp===Xe.rtt[le].stamp){this.pool.useObject(Tt);continue}}let ot=this.pool.getOrCreateFreeObject();this.pool.useObject(ot),this.pool.stampObject(ot),Xe.rtt[le]={id:ot.id,stamp:ot.stamp},Q.context.bindFramebuffer.set(ot.fbo.framebuffer),Q.context.clear({color:a.aM.transparent,stencil:0}),Q.currentStencilSource=void 0;for(let Tt=0;Tt<qe.length;Tt++){let Yt=Q.style._layers[qe[Tt]],Kt=Yt.source?this._coordsDescendingInv[Yt.source][Xe.tileID.key]:[Xe.tileID];Q.context.viewport.set([0,0,ot.fbo.width,ot.fbo.height]),Q._renderTileClippingMasks(Yt,Kt),Q.renderLayer(Q,Q.style.sourceCaches[Yt.source],Yt,Kt),Yt.source&&(Xe.rttCoords[Yt.source]=this._coordsDescendingInvStr[Yt.source][Xe.tileID.key])}}return js(this.painter,this.terrain,this._rttTiles),this._rttTiles=[],this.pool.freeAllObjects(),as[B]}return!1}}let Ho={"AttributionControl.ToggleAttribution":"Toggle attribution","AttributionControl.MapFeedback":"Map feedback","FullscreenControl.Enter":"Enter fullscreen","FullscreenControl.Exit":"Exit fullscreen","GeolocateControl.FindMyLocation":"Find my location","GeolocateControl.LocationNotAvailable":"Location not available","LogoControl.Title":"MapLibre logo","Map.Title":"Map","Marker.Title":"Map marker","NavigationControl.ResetBearing":"Reset bearing to north","NavigationControl.ZoomIn":"Zoom in","NavigationControl.ZoomOut":"Zoom out","Popup.Close":"Close popup","ScaleControl.Feet":"ft","ScaleControl.Meters":"m","ScaleControl.Kilometers":"km","ScaleControl.Miles":"mi","ScaleControl.NauticalMiles":"nm","TerrainControl.Enable":"Enable terrain","TerrainControl.Disable":"Disable terrain","CooperativeGesturesHandler.WindowsHelpText":"Use Ctrl + scroll to zoom the map","CooperativeGesturesHandler.MacHelpText":"Use \u2318 + scroll to zoom the map","CooperativeGesturesHandler.MobileHelpText":"Use two fingers to move the map"},ml=o,Ws={hash:!1,interactive:!0,bearingSnap:7,attributionControl:La,maplibreLogo:!1,failIfMajorPerformanceCaveat:!1,preserveDrawingBuffer:!1,refreshExpiredTiles:!0,scrollZoom:!0,minZoom:-2,maxZoom:22,minPitch:0,maxPitch:60,boxZoom:!0,dragRotate:!0,dragPan:!0,keyboard:!0,doubleClickZoom:!0,touchZoomRotate:!0,touchPitch:!0,cooperativeGestures:!1,trackResize:!0,center:[0,0],zoom:0,bearing:0,pitch:0,renderWorldCopies:!0,maxTileCacheSize:null,maxTileCacheZoomLevels:a.a.MAX_TILE_CACHE_ZOOM_LEVELS,transformRequest:null,transformCameraUpdate:null,fadeDuration:300,crossSourceCollisions:!0,clickTolerance:3,localIdeographFontFamily:"sans-serif",pitchWithRotate:!0,validateStyle:!0,maxCanvasSize:[4096,4096],cancelPendingTileRequestsWhileZooming:!0},Ls=ue=>{ue.touchstart=ue.dragStart,ue.touchmoveWindow=ue.dragMove,ue.touchend=ue.dragEnd},va={showCompass:!0,showZoom:!0,visualizePitch:!1};class no{constructor(w,B,Q=!1){this.mousedown=qe=>{this.startMouse(a.e({},qe,{ctrlKey:!0,preventDefault:()=>qe.preventDefault()}),c.mousePos(this.element,qe)),c.addEventListener(window,"mousemove",this.mousemove),c.addEventListener(window,"mouseup",this.mouseup)},this.mousemove=qe=>{this.moveMouse(qe,c.mousePos(this.element,qe))},this.mouseup=qe=>{this.mouseRotate.dragEnd(qe),this.mousePitch&&this.mousePitch.dragEnd(qe),this.offTemp()},this.touchstart=qe=>{qe.targetTouches.length!==1?this.reset():(this._startPos=this._lastPos=c.touchPos(this.element,qe.targetTouches)[0],this.startTouch(qe,this._startPos),c.addEventListener(window,"touchmove",this.touchmove,{passive:!1}),c.addEventListener(window,"touchend",this.touchend))},this.touchmove=qe=>{qe.targetTouches.length!==1?this.reset():(this._lastPos=c.touchPos(this.element,qe.targetTouches)[0],this.moveTouch(qe,this._lastPos))},this.touchend=qe=>{qe.targetTouches.length===0&&this._startPos&&this._lastPos&&this._startPos.dist(this._lastPos)<this._clickTolerance&&this.element.click(),delete this._startPos,delete this._lastPos,this.offTemp()},this.reset=()=>{this.mouseRotate.reset(),this.mousePitch&&this.mousePitch.reset(),this.touchRotate.reset(),this.touchPitch&&this.touchPitch.reset(),delete this._startPos,delete this._lastPos,this.offTemp()},this._clickTolerance=10;let ee=w.dragRotate._mouseRotate.getClickTolerance(),le=w.dragRotate._mousePitch.getClickTolerance();this.element=B,this.mouseRotate=Wl({clickTolerance:ee,enable:!0}),this.touchRotate=(({enable:qe,clickTolerance:Xe,bearingDegreesPerPixelMoved:ot=.8})=>{let Tt=new uf;return new Wu({clickTolerance:Xe,move:(Yt,Kt)=>({bearingDelta:(Kt.x-Yt.x)*ot}),moveStateManager:Tt,enable:qe,assignEvents:Ls})})({clickTolerance:ee,enable:!0}),this.map=w,Q&&(this.mousePitch=ah({clickTolerance:le,enable:!0}),this.touchPitch=(({enable:qe,clickTolerance:Xe,pitchDegreesPerPixelMoved:ot=-.5})=>{let Tt=new uf;return new Wu({clickTolerance:Xe,move:(Yt,Kt)=>({pitchDelta:(Kt.y-Yt.y)*ot}),moveStateManager:Tt,enable:qe,assignEvents:Ls})})({clickTolerance:le,enable:!0})),c.addEventListener(B,"mousedown",this.mousedown),c.addEventListener(B,"touchstart",this.touchstart,{passive:!1}),c.addEventListener(B,"touchcancel",this.reset)}startMouse(w,B){this.mouseRotate.dragStart(w,B),this.mousePitch&&this.mousePitch.dragStart(w,B),c.disableDrag()}startTouch(w,B){this.touchRotate.dragStart(w,B),this.touchPitch&&this.touchPitch.dragStart(w,B),c.disableDrag()}moveMouse(w,B){let Q=this.map,{bearingDelta:ee}=this.mouseRotate.dragMove(w,B)||{};if(ee&&Q.setBearing(Q.getBearing()+ee),this.mousePitch){let{pitchDelta:le}=this.mousePitch.dragMove(w,B)||{};le&&Q.setPitch(Q.getPitch()+le)}}moveTouch(w,B){let Q=this.map,{bearingDelta:ee}=this.touchRotate.dragMove(w,B)||{};if(ee&&Q.setBearing(Q.getBearing()+ee),this.touchPitch){let{pitchDelta:le}=this.touchPitch.dragMove(w,B)||{};le&&Q.setPitch(Q.getPitch()+le)}}off(){let w=this.element;c.removeEventListener(w,"mousedown",this.mousedown),c.removeEventListener(w,"touchstart",this.touchstart,{passive:!1}),c.removeEventListener(window,"touchmove",this.touchmove,{passive:!1}),c.removeEventListener(window,"touchend",this.touchend),c.removeEventListener(w,"touchcancel",this.reset),this.offTemp()}offTemp(){c.enableDrag(),c.removeEventListener(window,"mousemove",this.mousemove),c.removeEventListener(window,"mouseup",this.mouseup),c.removeEventListener(window,"touchmove",this.touchmove,{passive:!1}),c.removeEventListener(window,"touchend",this.touchend)}}let ys;function rs(ue,w,B){let Q=new a.N(ue.lng,ue.lat);if(ue=new a.N(ue.lng,ue.lat),w){let ee=new a.N(ue.lng-360,ue.lat),le=new a.N(ue.lng+360,ue.lat),qe=B.locationPoint(ue).distSqr(w);B.locationPoint(ee).distSqr(w)<qe?ue=ee:B.locationPoint(le).distSqr(w)<qe&&(ue=le)}for(;Math.abs(ue.lng-B.center.lng)>180;){let ee=B.locationPoint(ue);if(ee.x>=0&&ee.y>=0&&ee.x<=B.width&&ee.y<=B.height)break;ue.lng>B.center.lng?ue.lng-=360:ue.lng+=360}return ue.lng!==Q.lng&&B.locationPoint(ue).y>B.height/2-B.getHorizon()?ue:Q}let $l={center:"translate(-50%,-50%)",top:"translate(-50%,0)","top-left":"translate(0,0)","top-right":"translate(-100%,0)",bottom:"translate(-50%,-100%)","bottom-left":"translate(0,-100%)","bottom-right":"translate(-100%,-100%)",left:"translate(0,-50%)",right:"translate(-100%,-50%)"};function Cu(ue,w,B){let Q=ue.classList;for(let ee in $l)Q.remove(`maplibregl-${B}-anchor-${ee}`);Q.add(`maplibregl-${B}-anchor-${w}`)}class Yu extends a.E{constructor(w){if(super(),this._onKeyPress=B=>{let Q=B.code,ee=B.charCode||B.keyCode;Q!=="Space"&&Q!=="Enter"&&ee!==32&&ee!==13||this.togglePopup()},this._onMapClick=B=>{let Q=B.originalEvent.target,ee=this._element;this._popup&&(Q===ee||ee.contains(Q))&&this.togglePopup()},this._update=B=>{var Q;if(!this._map)return;let ee=this._map.loaded()&&!this._map.isMoving();((B==null?void 0:B.type)==="terrain"||(B==null?void 0:B.type)==="render"&&!ee)&&this._map.once("render",this._update),this._lngLat=this._map.transform.renderWorldCopies?rs(this._lngLat,this._flatPos,this._map.transform):(Q=this._lngLat)===null||Q===void 0?void 0:Q.wrap(),this._flatPos=this._pos=this._map.project(this._lngLat)._add(this._offset),this._map.terrain&&(this._flatPos=this._map.transform.locationPoint(this._lngLat)._add(this._offset));let le="";this._rotationAlignment==="viewport"||this._rotationAlignment==="auto"?le=`rotateZ(${this._rotation}deg)`:this._rotationAlignment==="map"&&(le=`rotateZ(${this._rotation-this._map.getBearing()}deg)`);let qe="";this._pitchAlignment==="viewport"||this._pitchAlignment==="auto"?qe="rotateX(0deg)":this._pitchAlignment==="map"&&(qe=`rotateX(${this._map.getPitch()}deg)`),this._subpixelPositioning||B&&B.type!=="moveend"||(this._pos=this._pos.round()),c.setTransform(this._element,`${$l[this._anchor]} translate(${this._pos.x}px, ${this._pos.y}px) ${qe} ${le}`),u.frameAsync(new AbortController).then(()=>{this._updateOpacity(B&&B.type==="moveend")}).catch(()=>{})},this._onMove=B=>{if(!this._isDragging){let Q=this._clickTolerance||this._map._clickTolerance;this._isDragging=B.point.dist(this._pointerdownPos)>=Q}this._isDragging&&(this._pos=B.point.sub(this._positionDelta),this._lngLat=this._map.unproject(this._pos),this.setLngLat(this._lngLat),this._element.style.pointerEvents="none",this._state==="pending"&&(this._state="active",this.fire(new a.k("dragstart"))),this.fire(new a.k("drag")))},this._onUp=()=>{this._element.style.pointerEvents="auto",this._positionDelta=null,this._pointerdownPos=null,this._isDragging=!1,this._map.off("mousemove",this._onMove),this._map.off("touchmove",this._onMove),this._state==="active"&&this.fire(new a.k("dragend")),this._state="inactive"},this._addDragHandler=B=>{this._element.contains(B.originalEvent.target)&&(B.preventDefault(),this._positionDelta=B.point.sub(this._pos).add(this._offset),this._pointerdownPos=B.point,this._state="pending",this._map.on("mousemove",this._onMove),this._map.on("touchmove",this._onMove),this._map.once("mouseup",this._onUp),this._map.once("touchend",this._onUp))},this._anchor=w&&w.anchor||"center",this._color=w&&w.color||"#3FB1CE",this._scale=w&&w.scale||1,this._draggable=w&&w.draggable||!1,this._clickTolerance=w&&w.clickTolerance||0,this._subpixelPositioning=w&&w.subpixelPositioning||!1,this._isDragging=!1,this._state="inactive",this._rotation=w&&w.rotation||0,this._rotationAlignment=w&&w.rotationAlignment||"auto",this._pitchAlignment=w&&w.pitchAlignment&&w.pitchAlignment!=="auto"?w.pitchAlignment:this._rotationAlignment,this.setOpacity(),this.setOpacity(w==null?void 0:w.opacity,w==null?void 0:w.opacityWhenCovered),w&&w.element)this._element=w.element,this._offset=a.P.convert(w&&w.offset||[0,0]);else{this._defaultMarker=!0,this._element=c.create("div");let B=c.createNS("http://www.w3.org/2000/svg","svg"),Q=41,ee=27;B.setAttributeNS(null,"display","block"),B.setAttributeNS(null,"height",`${Q}px`),B.setAttributeNS(null,"width",`${ee}px`),B.setAttributeNS(null,"viewBox",`0 0 ${ee} ${Q}`);let le=c.createNS("http://www.w3.org/2000/svg","g");le.setAttributeNS(null,"stroke","none"),le.setAttributeNS(null,"stroke-width","1"),le.setAttributeNS(null,"fill","none"),le.setAttributeNS(null,"fill-rule","evenodd");let qe=c.createNS("http://www.w3.org/2000/svg","g");qe.setAttributeNS(null,"fill-rule","nonzero");let Xe=c.createNS("http://www.w3.org/2000/svg","g");Xe.setAttributeNS(null,"transform","translate(3.0, 29.0)"),Xe.setAttributeNS(null,"fill","#000000");let ot=[{rx:"10.5",ry:"5.25002273"},{rx:"10.5",ry:"5.25002273"},{rx:"9.5",ry:"4.77275007"},{rx:"8.5",ry:"4.29549936"},{rx:"7.5",ry:"3.81822308"},{rx:"6.5",ry:"3.34094679"},{rx:"5.5",ry:"2.86367051"},{rx:"4.5",ry:"2.38636864"}];for(let Be of ot){let et=c.createNS("http://www.w3.org/2000/svg","ellipse");et.setAttributeNS(null,"opacity","0.04"),et.setAttributeNS(null,"cx","10.5"),et.setAttributeNS(null,"cy","5.80029008"),et.setAttributeNS(null,"rx",Be.rx),et.setAttributeNS(null,"ry",Be.ry),Xe.appendChild(et)}let Tt=c.createNS("http://www.w3.org/2000/svg","g");Tt.setAttributeNS(null,"fill",this._color);let Yt=c.createNS("http://www.w3.org/2000/svg","path");Yt.setAttributeNS(null,"d","M27,13.5 C27,19.074644 20.250001,27.000002 14.75,34.500002 C14.016665,35.500004 12.983335,35.500004 12.25,34.500002 C6.7499993,27.000002 0,19.222562 0,13.5 C0,6.0441559 6.0441559,0 13.5,0 C20.955844,0 27,6.0441559 27,13.5 Z"),Tt.appendChild(Yt);let Kt=c.createNS("http://www.w3.org/2000/svg","g");Kt.setAttributeNS(null,"opacity","0.25"),Kt.setAttributeNS(null,"fill","#000000");let xr=c.createNS("http://www.w3.org/2000/svg","path");xr.setAttributeNS(null,"d","M13.5,0 C6.0441559,0 0,6.0441559 0,13.5 C0,19.222562 6.7499993,27 12.25,34.5 C13,35.522727 14.016664,35.500004 14.75,34.5 C20.250001,27 27,19.074644 27,13.5 C27,6.0441559 20.955844,0 13.5,0 Z M13.5,1 C20.415404,1 26,6.584596 26,13.5 C26,15.898657 24.495584,19.181431 22.220703,22.738281 C19.945823,26.295132 16.705119,30.142167 13.943359,33.908203 C13.743445,34.180814 13.612715,34.322738 13.5,34.441406 C13.387285,34.322738 13.256555,34.180814 13.056641,33.908203 C10.284481,30.127985 7.4148684,26.314159 5.015625,22.773438 C2.6163816,19.232715 1,15.953538 1,13.5 C1,6.584596 6.584596,1 13.5,1 Z"),Kt.appendChild(xr);let Ir=c.createNS("http://www.w3.org/2000/svg","g");Ir.setAttributeNS(null,"transform","translate(6.0, 7.0)"),Ir.setAttributeNS(null,"fill","#FFFFFF");let ve=c.createNS("http://www.w3.org/2000/svg","g");ve.setAttributeNS(null,"transform","translate(8.0, 8.0)");let be=c.createNS("http://www.w3.org/2000/svg","circle");be.setAttributeNS(null,"fill","#000000"),be.setAttributeNS(null,"opacity","0.25"),be.setAttributeNS(null,"cx","5.5"),be.setAttributeNS(null,"cy","5.5"),be.setAttributeNS(null,"r","5.4999962");let De=c.createNS("http://www.w3.org/2000/svg","circle");De.setAttributeNS(null,"fill","#FFFFFF"),De.setAttributeNS(null,"cx","5.5"),De.setAttributeNS(null,"cy","5.5"),De.setAttributeNS(null,"r","5.4999962"),ve.appendChild(be),ve.appendChild(De),qe.appendChild(Xe),qe.appendChild(Tt),qe.appendChild(Kt),qe.appendChild(Ir),qe.appendChild(ve),B.appendChild(qe),B.setAttributeNS(null,"height",Q*this._scale+"px"),B.setAttributeNS(null,"width",ee*this._scale+"px"),this._element.appendChild(B),this._offset=a.P.convert(w&&w.offset||[0,-14])}if(this._element.classList.add("maplibregl-marker"),this._element.addEventListener("dragstart",B=>{B.preventDefault()}),this._element.addEventListener("mousedown",B=>{B.preventDefault()}),Cu(this._element,this._anchor,"marker"),w&&w.className)for(let B of w.className.split(" "))this._element.classList.add(B);this._popup=null}addTo(w){return this.remove(),this._map=w,this._element.setAttribute("aria-label",w._getUIString("Marker.Title")),w.getCanvasContainer().appendChild(this._element),w.on("move",this._update),w.on("moveend",this._update),w.on("terrain",this._update),this.setDraggable(this._draggable),this._update(),this._map.on("click",this._onMapClick),this}remove(){return this._opacityTimeout&&(clearTimeout(this._opacityTimeout),delete this._opacityTimeout),this._map&&(this._map.off("click",this._onMapClick),this._map.off("move",this._update),this._map.off("moveend",this._update),this._map.off("terrain",this._update),this._map.off("mousedown",this._addDragHandler),this._map.off("touchstart",this._addDragHandler),this._map.off("mouseup",this._onUp),this._map.off("touchend",this._onUp),this._map.off("mousemove",this._onMove),this._map.off("touchmove",this._onMove),delete this._map),c.remove(this._element),this._popup&&this._popup.remove(),this}getLngLat(){return this._lngLat}setLngLat(w){return this._lngLat=a.N.convert(w),this._pos=null,this._popup&&this._popup.setLngLat(this._lngLat),this._update(),this}getElement(){return this._element}setPopup(w){if(this._popup&&(this._popup.remove(),this._popup=null,this._element.removeEventListener("keypress",this._onKeyPress),this._originalTabIndex||this._element.removeAttribute("tabindex")),w){if(!("offset"in w.options)){let ee=Math.abs(13.5)/Math.SQRT2;w.options.offset=this._defaultMarker?{top:[0,0],"top-left":[0,0],"top-right":[0,0],bottom:[0,-38.1],"bottom-left":[ee,-1*(38.1-13.5+ee)],"bottom-right":[-ee,-1*(38.1-13.5+ee)],left:[13.5,-1*(38.1-13.5)],right:[-13.5,-1*(38.1-13.5)]}:this._offset}this._popup=w,this._originalTabIndex=this._element.getAttribute("tabindex"),this._originalTabIndex||this._element.setAttribute("tabindex","0"),this._element.addEventListener("keypress",this._onKeyPress)}return this}setSubpixelPositioning(w){return this._subpixelPositioning=w,this}getPopup(){return this._popup}togglePopup(){let w=this._popup;return this._element.style.opacity===this._opacityWhenCovered?this:w?(w.isOpen()?w.remove():(w.setLngLat(this._lngLat),w.addTo(this._map)),this):this}_updateOpacity(w=!1){var B,Q;if(!(!((B=this._map)===null||B===void 0)&&B.terrain))return void(this._element.style.opacity!==this._opacity&&(this._element.style.opacity=this._opacity));if(w)this._opacityTimeout=null;else{if(this._opacityTimeout)return;this._opacityTimeout=setTimeout(()=>{this._opacityTimeout=null},100)}let ee=this._map,le=ee.terrain.depthAtPoint(this._pos),qe=ee.terrain.getElevationForLngLatZoom(this._lngLat,ee.transform.tileZoom);if(ee.transform.lngLatToCameraDepth(this._lngLat,qe)-le<.006)return void(this._element.style.opacity=this._opacity);let Xe=-this._offset.y/ee.transform._pixelPerMeter,ot=Math.sin(ee.getPitch()*Math.PI/180)*Xe,Tt=ee.terrain.depthAtPoint(new a.P(this._pos.x,this._pos.y-this._offset.y)),Yt=ee.transform.lngLatToCameraDepth(this._lngLat,qe+ot)-Tt>.006;!((Q=this._popup)===null||Q===void 0)&&Q.isOpen()&&Yt&&this._popup.remove(),this._element.style.opacity=Yt?this._opacityWhenCovered:this._opacity}getOffset(){return this._offset}setOffset(w){return this._offset=a.P.convert(w),this._update(),this}addClassName(w){this._element.classList.add(w)}removeClassName(w){this._element.classList.remove(w)}toggleClassName(w){return this._element.classList.toggle(w)}setDraggable(w){return this._draggable=!!w,this._map&&(w?(this._map.on("mousedown",this._addDragHandler),this._map.on("touchstart",this._addDragHandler)):(this._map.off("mousedown",this._addDragHandler),this._map.off("touchstart",this._addDragHandler))),this}isDraggable(){return this._draggable}setRotation(w){return this._rotation=w||0,this._update(),this}getRotation(){return this._rotation}setRotationAlignment(w){return this._rotationAlignment=w||"auto",this._update(),this}getRotationAlignment(){return this._rotationAlignment}setPitchAlignment(w){return this._pitchAlignment=w&&w!=="auto"?w:this._rotationAlignment,this._update(),this}getPitchAlignment(){return this._pitchAlignment}setOpacity(w,B){return w===void 0&&B===void 0&&(this._opacity="1",this._opacityWhenCovered="0.2"),w!==void 0&&(this._opacity=w),B!==void 0&&(this._opacityWhenCovered=B),this._map&&this._updateOpacity(!0),this}}let Nc={positionOptions:{enableHighAccuracy:!1,maximumAge:0,timeout:6e3},fitBoundsOptions:{maxZoom:15},trackUserLocation:!1,showAccuracyCircle:!0,showUserLocation:!0},pu=0,Uc=!1,xu={maxWidth:100,unit:"metric"};function Ac(ue,w,B){let Q=B&&B.maxWidth||100,ee=ue._container.clientHeight/2,le=ue.unproject([0,ee]),qe=ue.unproject([Q,ee]),Xe=le.distanceTo(qe);if(B&&B.unit==="imperial"){let ot=3.2808*Xe;ot>5280?Ua(w,Q,ot/5280,ue._getUIString("ScaleControl.Miles")):Ua(w,Q,ot,ue._getUIString("ScaleControl.Feet"))}else B&&B.unit==="nautical"?Ua(w,Q,Xe/1852,ue._getUIString("ScaleControl.NauticalMiles")):Xe>=1e3?Ua(w,Q,Xe/1e3,ue._getUIString("ScaleControl.Kilometers")):Ua(w,Q,Xe,ue._getUIString("ScaleControl.Meters"))}function Ua(ue,w,B,Q){let ee=function(le){let qe=Math.pow(10,`${Math.floor(le)}`.length-1),Xe=le/qe;return Xe=Xe>=10?10:Xe>=5?5:Xe>=3?3:Xe>=2?2:Xe>=1?1:function(ot){let Tt=Math.pow(10,Math.ceil(-Math.log(ot)/Math.LN10));return Math.round(ot*Tt)/Tt}(Xe),qe*Xe}(B);ue.style.width=w*(ee/B)+"px",ue.innerHTML=`${ee}&nbsp;${Q}`}let oo={closeButton:!0,closeOnClick:!0,focusAfterOpen:!0,className:"",maxWidth:"240px",subpixelPositioning:!1},Vc=["a[href]","[tabindex]:not([tabindex='-1'])","[contenteditable]:not([contenteditable='false'])","button:not([disabled])","input:not([disabled])","select:not([disabled])","textarea:not([disabled])"].join(", ");function hc(ue){if(ue){if(typeof ue=="number"){let w=Math.round(Math.abs(ue)/Math.SQRT2);return{center:new a.P(0,0),top:new a.P(0,ue),"top-left":new a.P(w,w),"top-right":new a.P(-w,w),bottom:new a.P(0,-ue),"bottom-left":new a.P(w,-w),"bottom-right":new a.P(-w,-w),left:new a.P(ue,0),right:new a.P(-ue,0)}}if(ue instanceof a.P||Array.isArray(ue)){let w=a.P.convert(ue);return{center:w,top:w,"top-left":w,"top-right":w,bottom:w,"bottom-left":w,"bottom-right":w,left:w,right:w}}return{center:a.P.convert(ue.center||[0,0]),top:a.P.convert(ue.top||[0,0]),"top-left":a.P.convert(ue["top-left"]||[0,0]),"top-right":a.P.convert(ue["top-right"]||[0,0]),bottom:a.P.convert(ue.bottom||[0,0]),"bottom-left":a.P.convert(ue["bottom-left"]||[0,0]),"bottom-right":a.P.convert(ue["bottom-right"]||[0,0]),left:a.P.convert(ue.left||[0,0]),right:a.P.convert(ue.right||[0,0])}}return hc(new a.P(0,0))}let Ku=o;i.AJAXError=a.bh,i.Evented=a.E,i.LngLat=a.N,i.MercatorCoordinate=a.Z,i.Point=a.P,i.addProtocol=a.bi,i.config=a.a,i.removeProtocol=a.bj,i.AttributionControl=Na,i.BoxZoomHandler=ku,i.CanvasSource=Jt,i.CooperativeGesturesHandler=Qi,i.DoubleClickZoomHandler=ki,i.DragPanHandler=wn,i.DragRotateHandler=Nn,i.EdgeInsets=hu,i.FullscreenControl=class extends a.E{constructor(ue={}){super(),this._onFullscreenChange=()=>{var w;let B=window.document.fullscreenElement||window.document.mozFullScreenElement||window.document.webkitFullscreenElement||window.document.msFullscreenElement;for(;!((w=B==null?void 0:B.shadowRoot)===null||w===void 0)&&w.fullscreenElement;)B=B.shadowRoot.fullscreenElement;B===this._container!==this._fullscreen&&this._handleFullscreenChange()},this._onClickFullscreen=()=>{this._isFullscreen()?this._exitFullscreen():this._requestFullscreen()},this._fullscreen=!1,ue&&ue.container&&(ue.container instanceof HTMLElement?this._container=ue.container:a.w("Full screen control 'container' must be a DOM element.")),"onfullscreenchange"in document?this._fullscreenchange="fullscreenchange":"onmozfullscreenchange"in document?this._fullscreenchange="mozfullscreenchange":"onwebkitfullscreenchange"in document?this._fullscreenchange="webkitfullscreenchange":"onmsfullscreenchange"in document&&(this._fullscreenchange="MSFullscreenChange")}onAdd(ue){return this._map=ue,this._container||(this._container=this._map.getContainer()),this._controlContainer=c.create("div","maplibregl-ctrl maplibregl-ctrl-group"),this._setupUI(),this._controlContainer}onRemove(){c.remove(this._controlContainer),this._map=null,window.document.removeEventListener(this._fullscreenchange,this._onFullscreenChange)}_setupUI(){let ue=this._fullscreenButton=c.create("button","maplibregl-ctrl-fullscreen",this._controlContainer);c.create("span","maplibregl-ctrl-icon",ue).setAttribute("aria-hidden","true"),ue.type="button",this._updateTitle(),this._fullscreenButton.addEventListener("click",this._onClickFullscreen),window.document.addEventListener(this._fullscreenchange,this._onFullscreenChange)}_updateTitle(){let ue=this._getTitle();this._fullscreenButton.setAttribute("aria-label",ue),this._fullscreenButton.title=ue}_getTitle(){return this._map._getUIString(this._isFullscreen()?"FullscreenControl.Exit":"FullscreenControl.Enter")}_isFullscreen(){return this._fullscreen}_handleFullscreenChange(){this._fullscreen=!this._fullscreen,this._fullscreenButton.classList.toggle("maplibregl-ctrl-shrink"),this._fullscreenButton.classList.toggle("maplibregl-ctrl-fullscreen"),this._updateTitle(),this._fullscreen?(this.fire(new a.k("fullscreenstart")),this._prevCooperativeGesturesEnabled=this._map.cooperativeGestures.isEnabled(),this._map.cooperativeGestures.disable()):(this.fire(new a.k("fullscreenend")),this._prevCooperativeGesturesEnabled&&this._map.cooperativeGestures.enable())}_exitFullscreen(){window.document.exitFullscreen?window.document.exitFullscreen():window.document.mozCancelFullScreen?window.document.mozCancelFullScreen():window.document.msExitFullscreen?window.document.msExitFullscreen():window.document.webkitCancelFullScreen?window.document.webkitCancelFullScreen():this._togglePseudoFullScreen()}_requestFullscreen(){this._container.requestFullscreen?this._container.requestFullscreen():this._container.mozRequestFullScreen?this._container.mozRequestFullScreen():this._container.msRequestFullscreen?this._container.msRequestFullscreen():this._container.webkitRequestFullscreen?this._container.webkitRequestFullscreen():this._togglePseudoFullScreen()}_togglePseudoFullScreen(){this._container.classList.toggle("maplibregl-pseudo-fullscreen"),this._handleFullscreenChange(),this._map.resize()}},i.GeoJSONSource=st,i.GeolocateControl=class extends a.E{constructor(ue){super(),this._onSuccess=w=>{if(this._map){if(this._isOutOfMapMaxBounds(w))return this._setErrorState(),this.fire(new a.k("outofmaxbounds",w)),this._updateMarker(),void this._finish();if(this.options.trackUserLocation)switch(this._lastKnownPosition=w,this._watchState){case"WAITING_ACTIVE":case"ACTIVE_LOCK":case"ACTIVE_ERROR":this._watchState="ACTIVE_LOCK",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active-error"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-active");break;case"BACKGROUND":case"BACKGROUND_ERROR":this._watchState="BACKGROUND",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background-error"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-background");break;default:throw new Error(`Unexpected watchState ${this._watchState}`)}this.options.showUserLocation&&this._watchState!=="OFF"&&this._updateMarker(w),this.options.trackUserLocation&&this._watchState!=="ACTIVE_LOCK"||this._updateCamera(w),this.options.showUserLocation&&this._dotElement.classList.remove("maplibregl-user-location-dot-stale"),this.fire(new a.k("geolocate",w)),this._finish()}},this._updateCamera=w=>{let B=new a.N(w.coords.longitude,w.coords.latitude),Q=w.coords.accuracy,ee=this._map.getBearing(),le=a.e({bearing:ee},this.options.fitBoundsOptions),qe=ce.fromLngLat(B,Q);this._map.fitBounds(qe,le,{geolocateSource:!0})},this._updateMarker=w=>{if(w){let B=new a.N(w.coords.longitude,w.coords.latitude);this._accuracyCircleMarker.setLngLat(B).addTo(this._map),this._userLocationDotMarker.setLngLat(B).addTo(this._map),this._accuracy=w.coords.accuracy,this.options.showUserLocation&&this.options.showAccuracyCircle&&this._updateCircleRadius()}else this._userLocationDotMarker.remove(),this._accuracyCircleMarker.remove()},this._onZoom=()=>{this.options.showUserLocation&&this.options.showAccuracyCircle&&this._updateCircleRadius()},this._onError=w=>{if(this._map){if(this.options.trackUserLocation)if(w.code===1){this._watchState="OFF",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active-error"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background-error"),this._geolocateButton.disabled=!0;let B=this._map._getUIString("GeolocateControl.LocationNotAvailable");this._geolocateButton.title=B,this._geolocateButton.setAttribute("aria-label",B),this._geolocationWatchID!==void 0&&this._clearWatch()}else{if(w.code===3&&Uc)return;this._setErrorState()}this._watchState!=="OFF"&&this.options.showUserLocation&&this._dotElement.classList.add("maplibregl-user-location-dot-stale"),this.fire(new a.k("error",w)),this._finish()}},this._finish=()=>{this._timeoutId&&clearTimeout(this._timeoutId),this._timeoutId=void 0},this._setupUI=()=>{this._map&&(this._container.addEventListener("contextmenu",w=>w.preventDefault()),this._geolocateButton=c.create("button","maplibregl-ctrl-geolocate",this._container),c.create("span","maplibregl-ctrl-icon",this._geolocateButton).setAttribute("aria-hidden","true"),this._geolocateButton.type="button",this._geolocateButton.disabled=!0)},this._finishSetupUI=w=>{if(this._map){if(w===!1){a.w("Geolocation support is not available so the GeolocateControl will be disabled.");let B=this._map._getUIString("GeolocateControl.LocationNotAvailable");this._geolocateButton.disabled=!0,this._geolocateButton.title=B,this._geolocateButton.setAttribute("aria-label",B)}else{let B=this._map._getUIString("GeolocateControl.FindMyLocation");this._geolocateButton.disabled=!1,this._geolocateButton.title=B,this._geolocateButton.setAttribute("aria-label",B)}this.options.trackUserLocation&&(this._geolocateButton.setAttribute("aria-pressed","false"),this._watchState="OFF"),this.options.showUserLocation&&(this._dotElement=c.create("div","maplibregl-user-location-dot"),this._userLocationDotMarker=new Yu({element:this._dotElement}),this._circleElement=c.create("div","maplibregl-user-location-accuracy-circle"),this._accuracyCircleMarker=new Yu({element:this._circleElement,pitchAlignment:"map"}),this.options.trackUserLocation&&(this._watchState="OFF"),this._map.on("zoom",this._onZoom)),this._geolocateButton.addEventListener("click",()=>this.trigger()),this._setup=!0,this.options.trackUserLocation&&this._map.on("movestart",B=>{B.geolocateSource||this._watchState!=="ACTIVE_LOCK"||B.originalEvent&&B.originalEvent.type==="resize"||(this._watchState="BACKGROUND",this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-background"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active"),this.fire(new a.k("trackuserlocationend")),this.fire(new a.k("userlocationlostfocus")))})}},this.options=a.e({},Nc,ue)}onAdd(ue){return this._map=ue,this._container=c.create("div","maplibregl-ctrl maplibregl-ctrl-group"),this._setupUI(),function(){return a._(this,arguments,void 0,function*(w=!1){if(ys!==void 0&&!w)return ys;if(window.navigator.permissions===void 0)return ys=!!window.navigator.geolocation,ys;try{ys=(yield window.navigator.permissions.query({name:"geolocation"})).state!=="denied"}catch(B){ys=!!window.navigator.geolocation}return ys})}().then(w=>this._finishSetupUI(w)),this._container}onRemove(){this._geolocationWatchID!==void 0&&(window.navigator.geolocation.clearWatch(this._geolocationWatchID),this._geolocationWatchID=void 0),this.options.showUserLocation&&this._userLocationDotMarker&&this._userLocationDotMarker.remove(),this.options.showAccuracyCircle&&this._accuracyCircleMarker&&this._accuracyCircleMarker.remove(),c.remove(this._container),this._map.off("zoom",this._onZoom),this._map=void 0,pu=0,Uc=!1}_isOutOfMapMaxBounds(ue){let w=this._map.getMaxBounds(),B=ue.coords;return w&&(B.longitude<w.getWest()||B.longitude>w.getEast()||B.latitude<w.getSouth()||B.latitude>w.getNorth())}_setErrorState(){switch(this._watchState){case"WAITING_ACTIVE":this._watchState="ACTIVE_ERROR",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-active-error");break;case"ACTIVE_LOCK":this._watchState="ACTIVE_ERROR",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-active-error"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-waiting");break;case"BACKGROUND":this._watchState="BACKGROUND_ERROR",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-background-error"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-waiting");break;case"ACTIVE_ERROR":break;default:throw new Error(`Unexpected watchState ${this._watchState}`)}}_updateCircleRadius(){let ue=this._map.getBounds(),w=ue.getSouthEast(),B=ue.getNorthEast(),Q=w.distanceTo(B),ee=Math.ceil(this._accuracy/(Q/this._map._container.clientHeight)*2);this._circleElement.style.width=`${ee}px`,this._circleElement.style.height=`${ee}px`}trigger(){if(!this._setup)return a.w("Geolocate control triggered before added to a map"),!1;if(this.options.trackUserLocation){switch(this._watchState){case"OFF":this._watchState="WAITING_ACTIVE",this.fire(new a.k("trackuserlocationstart"));break;case"WAITING_ACTIVE":case"ACTIVE_LOCK":case"ACTIVE_ERROR":case"BACKGROUND_ERROR":pu--,Uc=!1,this._watchState="OFF",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-active-error"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background"),this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background-error"),this.fire(new a.k("trackuserlocationend"));break;case"BACKGROUND":this._watchState="ACTIVE_LOCK",this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-background"),this._lastKnownPosition&&this._updateCamera(this._lastKnownPosition),this.fire(new a.k("trackuserlocationstart")),this.fire(new a.k("userlocationfocus"));break;default:throw new Error(`Unexpected watchState ${this._watchState}`)}switch(this._watchState){case"WAITING_ACTIVE":this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-active");break;case"ACTIVE_LOCK":this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-active");break;case"OFF":break;default:throw new Error(`Unexpected watchState ${this._watchState}`)}if(this._watchState==="OFF"&&this._geolocationWatchID!==void 0)this._clearWatch();else if(this._geolocationWatchID===void 0){let ue;this._geolocateButton.classList.add("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.setAttribute("aria-pressed","true"),pu++,pu>1?(ue={maximumAge:6e5,timeout:0},Uc=!0):(ue=this.options.positionOptions,Uc=!1),this._geolocationWatchID=window.navigator.geolocation.watchPosition(this._onSuccess,this._onError,ue)}}else window.navigator.geolocation.getCurrentPosition(this._onSuccess,this._onError,this.options.positionOptions),this._timeoutId=setTimeout(this._finish,1e4);return!0}_clearWatch(){window.navigator.geolocation.clearWatch(this._geolocationWatchID),this._geolocationWatchID=void 0,this._geolocateButton.classList.remove("maplibregl-ctrl-geolocate-waiting"),this._geolocateButton.setAttribute("aria-pressed","false"),this.options.showUserLocation&&this._updateMarker(null)}},i.Hash=Ah,i.ImageSource=Gt,i.KeyboardHandler=Wt,i.LngLatBounds=ce,i.LogoControl=Yn,i.Map=class extends Ra{constructor(ue){a.bf.mark(a.bg.create);let w=Object.assign(Object.assign({},Ws),ue);if(w.minZoom!=null&&w.maxZoom!=null&&w.minZoom>w.maxZoom)throw new Error("maxZoom must be greater than or equal to minZoom");if(w.minPitch!=null&&w.maxPitch!=null&&w.minPitch>w.maxPitch)throw new Error("maxPitch must be greater than or equal to minPitch");if(w.minPitch!=null&&w.minPitch<0)throw new Error("minPitch must be greater than or equal to 0");if(w.maxPitch!=null&&w.maxPitch>85)throw new Error("maxPitch must be less than or equal to 85");if(super(new nl(w.minZoom,w.maxZoom,w.minPitch,w.maxPitch,w.renderWorldCopies),{bearingSnap:w.bearingSnap}),this._idleTriggered=!1,this._crossFadingFactor=1,this._renderTaskQueue=new Dn,this._controls=[],this._mapId=a.a4(),this._contextLost=B=>{B.preventDefault(),this._frameRequest&&(this._frameRequest.abort(),this._frameRequest=null),this.fire(new a.k("webglcontextlost",{originalEvent:B}))},this._contextRestored=B=>{this._setupPainter(),this.resize(),this._update(),this.fire(new a.k("webglcontextrestored",{originalEvent:B}))},this._onMapScroll=B=>{if(B.target===this._container)return this._container.scrollTop=0,this._container.scrollLeft=0,!1},this._onWindowOnline=()=>{this._update()},this._interactive=w.interactive,this._maxTileCacheSize=w.maxTileCacheSize,this._maxTileCacheZoomLevels=w.maxTileCacheZoomLevels,this._failIfMajorPerformanceCaveat=w.failIfMajorPerformanceCaveat===!0,this._preserveDrawingBuffer=w.preserveDrawingBuffer===!0,this._antialias=w.antialias===!0,this._trackResize=w.trackResize===!0,this._bearingSnap=w.bearingSnap,this._refreshExpiredTiles=w.refreshExpiredTiles===!0,this._fadeDuration=w.fadeDuration,this._crossSourceCollisions=w.crossSourceCollisions===!0,this._collectResourceTiming=w.collectResourceTiming===!0,this._locale=Object.assign(Object.assign({},Ho),w.locale),this._clickTolerance=w.clickTolerance,this._overridePixelRatio=w.pixelRatio,this._maxCanvasSize=w.maxCanvasSize,this.transformCameraUpdate=w.transformCameraUpdate,this.cancelPendingTileRequestsWhileZooming=w.cancelPendingTileRequestsWhileZooming===!0,this._imageQueueHandle=p.addThrottleControl(()=>this.isMoving()),this._requestManager=new k(w.transformRequest),typeof w.container=="string"){if(this._container=document.getElementById(w.container),!this._container)throw new Error(`Container '${w.container}' not found.`)}else{if(!(w.container instanceof HTMLElement))throw new Error("Invalid type: 'container' must be a String or HTMLElement.");this._container=w.container}if(w.maxBounds&&this.setMaxBounds(w.maxBounds),this._setupContainer(),this._setupPainter(),this.on("move",()=>this._update(!1)).on("moveend",()=>this._update(!1)).on("zoom",()=>this._update(!0)).on("terrain",()=>{this.painter.terrainFacilitator.dirty=!0,this._update(!0)}).once("idle",()=>{this._idleTriggered=!0}),typeof window!="undefined"){addEventListener("online",this._onWindowOnline,!1);let B=!1,Q=nh(ee=>{this._trackResize&&!this._removed&&(this.resize(ee),this.redraw())},50);this._resizeObserver=new ResizeObserver(ee=>{B?Q(ee):B=!0}),this._resizeObserver.observe(this._container)}this.handlers=new Ca(this,w),this._hash=w.hash&&new Ah(typeof w.hash=="string"&&w.hash||void 0).addTo(this),this._hash&&this._hash._onHashChange()||(this.jumpTo({center:w.center,zoom:w.zoom,bearing:w.bearing,pitch:w.pitch}),w.bounds&&(this.resize(),this.fitBounds(w.bounds,a.e({},w.fitBoundsOptions,{duration:0})))),this.resize(),this._localIdeographFontFamily=w.localIdeographFontFamily,this._validateStyle=w.validateStyle,w.style&&this.setStyle(w.style,{localIdeographFontFamily:w.localIdeographFontFamily}),w.attributionControl&&this.addControl(new Na(typeof w.attributionControl=="boolean"?void 0:w.attributionControl)),w.maplibreLogo&&this.addControl(new Yn,w.logoPosition),this.on("style.load",()=>{this.transform.unmodified&&this.jumpTo(this.style.stylesheet)}),this.on("data",B=>{this._update(B.dataType==="style"),this.fire(new a.k(`${B.dataType}data`,B))}),this.on("dataloading",B=>{this.fire(new a.k(`${B.dataType}dataloading`,B))}),this.on("dataabort",B=>{this.fire(new a.k("sourcedataabort",B))})}_getMapId(){return this._mapId}addControl(ue,w){if(w===void 0&&(w=ue.getDefaultPosition?ue.getDefaultPosition():"top-right"),!ue||!ue.onAdd)return this.fire(new a.j(new Error("Invalid argument to map.addControl(). Argument must be a control with onAdd and onRemove methods.")));let B=ue.onAdd(this);this._controls.push(ue);let Q=this._controlPositions[w];return w.indexOf("bottom")!==-1?Q.insertBefore(B,Q.firstChild):Q.appendChild(B),this}removeControl(ue){if(!ue||!ue.onRemove)return this.fire(new a.j(new Error("Invalid argument to map.removeControl(). Argument must be a control with onAdd and onRemove methods.")));let w=this._controls.indexOf(ue);return w>-1&&this._controls.splice(w,1),ue.onRemove(this),this}hasControl(ue){return this._controls.indexOf(ue)>-1}calculateCameraOptionsFromTo(ue,w,B,Q){return Q==null&&this.terrain&&(Q=this.terrain.getElevationForLngLatZoom(B,this.transform.tileZoom)),super.calculateCameraOptionsFromTo(ue,w,B,Q)}resize(ue){var w;let B=this._containerDimensions(),Q=B[0],ee=B[1],le=this._getClampedPixelRatio(Q,ee);if(this._resizeCanvas(Q,ee,le),this.painter.resize(Q,ee,le),this.painter.overLimit()){let Xe=this.painter.context.gl;this._maxCanvasSize=[Xe.drawingBufferWidth,Xe.drawingBufferHeight];let ot=this._getClampedPixelRatio(Q,ee);this._resizeCanvas(Q,ee,ot),this.painter.resize(Q,ee,ot)}this.transform.resize(Q,ee),(w=this._requestedCameraState)===null||w===void 0||w.resize(Q,ee);let qe=!this._moving;return qe&&(this.stop(),this.fire(new a.k("movestart",ue)).fire(new a.k("move",ue))),this.fire(new a.k("resize",ue)),qe&&this.fire(new a.k("moveend",ue)),this}_getClampedPixelRatio(ue,w){let{0:B,1:Q}=this._maxCanvasSize,ee=this.getPixelRatio(),le=ue*ee,qe=w*ee;return Math.min(le>B?B/le:1,qe>Q?Q/qe:1)*ee}getPixelRatio(){var ue;return(ue=this._overridePixelRatio)!==null&&ue!==void 0?ue:devicePixelRatio}setPixelRatio(ue){this._overridePixelRatio=ue,this.resize()}getBounds(){return this.transform.getBounds()}getMaxBounds(){return this.transform.getMaxBounds()}setMaxBounds(ue){return this.transform.setMaxBounds(ce.convert(ue)),this._update()}setMinZoom(ue){if((ue=ue==null?-2:ue)>=-2&&ue<=this.transform.maxZoom)return this.transform.minZoom=ue,this._update(),this.getZoom()<ue&&this.setZoom(ue),this;throw new Error("minZoom must be between -2 and the current maxZoom, inclusive")}getMinZoom(){return this.transform.minZoom}setMaxZoom(ue){if((ue=ue==null?22:ue)>=this.transform.minZoom)return this.transform.maxZoom=ue,this._update(),this.getZoom()>ue&&this.setZoom(ue),this;throw new Error("maxZoom must be greater than the current minZoom")}getMaxZoom(){return this.transform.maxZoom}setMinPitch(ue){if((ue=ue==null?0:ue)<0)throw new Error("minPitch must be greater than or equal to 0");if(ue>=0&&ue<=this.transform.maxPitch)return this.transform.minPitch=ue,this._update(),this.getPitch()<ue&&this.setPitch(ue),this;throw new Error("minPitch must be between 0 and the current maxPitch, inclusive")}getMinPitch(){return this.transform.minPitch}setMaxPitch(ue){if((ue=ue==null?60:ue)>85)throw new Error("maxPitch must be less than or equal to 85");if(ue>=this.transform.minPitch)return this.transform.maxPitch=ue,this._update(),this.getPitch()>ue&&this.setPitch(ue),this;throw new Error("maxPitch must be greater than the current minPitch")}getMaxPitch(){return this.transform.maxPitch}getRenderWorldCopies(){return this.transform.renderWorldCopies}setRenderWorldCopies(ue){return this.transform.renderWorldCopies=ue,this._update()}project(ue){return this.transform.locationPoint(a.N.convert(ue),this.style&&this.terrain)}unproject(ue){return this.transform.pointLocation(a.P.convert(ue),this.terrain)}isMoving(){var ue;return this._moving||((ue=this.handlers)===null||ue===void 0?void 0:ue.isMoving())}isZooming(){var ue;return this._zooming||((ue=this.handlers)===null||ue===void 0?void 0:ue.isZooming())}isRotating(){var ue;return this._rotating||((ue=this.handlers)===null||ue===void 0?void 0:ue.isRotating())}_createDelegatedListener(ue,w,B){if(ue==="mouseenter"||ue==="mouseover"){let Q=!1;return{layers:w,listener:B,delegates:{mousemove:le=>{let qe=w.filter(ot=>this.getLayer(ot)),Xe=qe.length!==0?this.queryRenderedFeatures(le.point,{layers:qe}):[];Xe.length?Q||(Q=!0,B.call(this,new jl(ue,this,le.originalEvent,{features:Xe}))):Q=!1},mouseout:()=>{Q=!1}}}}if(ue==="mouseleave"||ue==="mouseout"){let Q=!1;return{layers:w,listener:B,delegates:{mousemove:qe=>{let Xe=w.filter(ot=>this.getLayer(ot));(Xe.length!==0?this.queryRenderedFeatures(qe.point,{layers:Xe}):[]).length?Q=!0:Q&&(Q=!1,B.call(this,new jl(ue,this,qe.originalEvent)))},mouseout:qe=>{Q&&(Q=!1,B.call(this,new jl(ue,this,qe.originalEvent)))}}}}{let Q=ee=>{let le=w.filter(Xe=>this.getLayer(Xe)),qe=le.length!==0?this.queryRenderedFeatures(ee.point,{layers:le}):[];qe.length&&(ee.features=qe,B.call(this,ee),delete ee.features)};return{layers:w,listener:B,delegates:{[ue]:Q}}}}_saveDelegatedListener(ue,w){this._delegatedListeners=this._delegatedListeners||{},this._delegatedListeners[ue]=this._delegatedListeners[ue]||[],this._delegatedListeners[ue].push(w)}_removeDelegatedListener(ue,w,B){if(!this._delegatedListeners||!this._delegatedListeners[ue])return;let Q=this._delegatedListeners[ue];for(let ee=0;ee<Q.length;ee++){let le=Q[ee];if(le.listener===B&&le.layers.length===w.length&&le.layers.every(qe=>w.includes(qe))){for(let qe in le.delegates)this.off(qe,le.delegates[qe]);return void Q.splice(ee,1)}}}on(ue,w,B){if(B===void 0)return super.on(ue,w);let Q=this._createDelegatedListener(ue,typeof w=="string"?[w]:w,B);this._saveDelegatedListener(ue,Q);for(let ee in Q.delegates)this.on(ee,Q.delegates[ee]);return this}once(ue,w,B){if(B===void 0)return super.once(ue,w);let Q=typeof w=="string"?[w]:w,ee=this._createDelegatedListener(ue,Q,B);for(let le in ee.delegates){let qe=ee.delegates[le];ee.delegates[le]=(...Xe)=>{this._removeDelegatedListener(ue,Q,B),qe(...Xe)}}this._saveDelegatedListener(ue,ee);for(let le in ee.delegates)this.once(le,ee.delegates[le]);return this}off(ue,w,B){return B===void 0?super.off(ue,w):(this._removeDelegatedListener(ue,typeof w=="string"?[w]:w,B),this)}queryRenderedFeatures(ue,w){if(!this.style)return[];let B,Q=ue instanceof a.P||Array.isArray(ue),ee=Q?ue:[[0,0],[this.transform.width,this.transform.height]];if(w=w||(Q?{}:ue)||{},ee instanceof a.P||typeof ee[0]=="number")B=[a.P.convert(ee)];else{let le=a.P.convert(ee[0]),qe=a.P.convert(ee[1]);B=[le,new a.P(qe.x,le.y),qe,new a.P(le.x,qe.y),le]}return this.style.queryRenderedFeatures(B,w,this.transform)}querySourceFeatures(ue,w){return this.style.querySourceFeatures(ue,w)}setStyle(ue,w){return(w=a.e({},{localIdeographFontFamily:this._localIdeographFontFamily,validate:this._validateStyle},w)).diff!==!1&&w.localIdeographFontFamily===this._localIdeographFontFamily&&this.style&&ue?(this._diffStyle(ue,w),this):(this._localIdeographFontFamily=w.localIdeographFontFamily,this._updateStyle(ue,w))}setTransformRequest(ue){return this._requestManager.setTransformRequest(ue),this}_getUIString(ue){let w=this._locale[ue];if(w==null)throw new Error(`Missing UI string '${ue}'`);return w}_updateStyle(ue,w){if(w.transformStyle&&this.style&&!this.style._loaded)return void this.style.once("style.load",()=>this._updateStyle(ue,w));let B=this.style&&w.transformStyle?this.style.serialize():void 0;return this.style&&(this.style.setEventedParent(null),this.style._remove(!ue)),ue?(this.style=new Ha(this,w||{}),this.style.setEventedParent(this,{style:this.style}),typeof ue=="string"?this.style.loadURL(ue,w,B):this.style.loadJSON(ue,w,B),this):(delete this.style,this)}_lazyInitEmptyStyle(){this.style||(this.style=new Ha(this,{}),this.style.setEventedParent(this,{style:this.style}),this.style.loadEmpty())}_diffStyle(ue,w){if(typeof ue=="string"){let B=this._requestManager.transformRequest(ue,"Style");a.h(B,new AbortController).then(Q=>{this._updateDiff(Q.data,w)}).catch(Q=>{Q&&this.fire(new a.j(Q))})}else typeof ue=="object"&&this._updateDiff(ue,w)}_updateDiff(ue,w){try{this.style.setState(ue,w)&&this._update(!0)}catch(B){a.w(`Unable to perform style diff: ${B.message||B.error||B}.  Rebuilding the style from scratch.`),this._updateStyle(ue,w)}}getStyle(){if(this.style)return this.style.serialize()}isStyleLoaded(){return this.style?this.style.loaded():a.w("There is no style added to the map.")}addSource(ue,w){return this._lazyInitEmptyStyle(),this.style.addSource(ue,w),this._update(!0)}isSourceLoaded(ue){let w=this.style&&this.style.sourceCaches[ue];if(w!==void 0)return w.loaded();this.fire(new a.j(new Error(`There is no source with ID '${ue}'`)))}setTerrain(ue){if(this.style._checkLoaded(),this._terrainDataCallback&&this.style.off("data",this._terrainDataCallback),ue){let w=this.style.sourceCaches[ue.source];if(!w)throw new Error(`cannot load terrain, because there exists no source with ID: ${ue.source}`);this.terrain===null&&w.reload();for(let B in this.style._layers){let Q=this.style._layers[B];Q.type==="hillshade"&&Q.source===ue.source&&a.w("You are using the same source for a hillshade layer and for 3D terrain. Please consider using two separate sources to improve rendering quality.")}this.terrain=new Xo(this.painter,w,ue),this.painter.renderToTexture=new ws(this.painter,this.terrain),this.transform.minElevationForCurrentTile=this.terrain.getMinTileElevationForLngLatZoom(this.transform.center,this.transform.tileZoom),this.transform.elevation=this.terrain.getElevationForLngLatZoom(this.transform.center,this.transform.tileZoom),this._terrainDataCallback=B=>{B.dataType==="style"?this.terrain.sourceCache.freeRtt():B.dataType==="source"&&B.tile&&(B.sourceId!==ue.source||this._elevationFreeze||(this.transform.minElevationForCurrentTile=this.terrain.getMinTileElevationForLngLatZoom(this.transform.center,this.transform.tileZoom),this.transform.elevation=this.terrain.getElevationForLngLatZoom(this.transform.center,this.transform.tileZoom)),this.terrain.sourceCache.freeRtt(B.tile.tileID))},this.style.on("data",this._terrainDataCallback)}else this.terrain&&this.terrain.sourceCache.destruct(),this.terrain=null,this.painter.renderToTexture&&this.painter.renderToTexture.destruct(),this.painter.renderToTexture=null,this.transform.minElevationForCurrentTile=0,this.transform.elevation=0;return this.fire(new a.k("terrain",{terrain:ue})),this}getTerrain(){var ue,w;return(w=(ue=this.terrain)===null||ue===void 0?void 0:ue.options)!==null&&w!==void 0?w:null}areTilesLoaded(){let ue=this.style&&this.style.sourceCaches;for(let w in ue){let B=ue[w]._tiles;for(let Q in B){let ee=B[Q];if(ee.state!=="loaded"&&ee.state!=="errored")return!1}}return!0}removeSource(ue){return this.style.removeSource(ue),this._update(!0)}getSource(ue){return this.style.getSource(ue)}addImage(ue,w,B={}){let{pixelRatio:Q=1,sdf:ee=!1,stretchX:le,stretchY:qe,content:Xe,textFitWidth:ot,textFitHeight:Tt}=B;if(this._lazyInitEmptyStyle(),!(w instanceof HTMLImageElement||a.b(w))){if(w.width===void 0||w.height===void 0)return this.fire(new a.j(new Error("Invalid arguments to map.addImage(). The second argument must be an `HTMLImageElement`, `ImageData`, `ImageBitmap`, or object with `width`, `height`, and `data` properties with the same format as `ImageData`")));{let{width:Yt,height:Kt,data:xr}=w,Ir=w;return this.style.addImage(ue,{data:new a.R({width:Yt,height:Kt},new Uint8Array(xr)),pixelRatio:Q,stretchX:le,stretchY:qe,content:Xe,textFitWidth:ot,textFitHeight:Tt,sdf:ee,version:0,userImage:Ir}),Ir.onAdd&&Ir.onAdd(this,ue),this}}{let{width:Yt,height:Kt,data:xr}=u.getImageData(w);this.style.addImage(ue,{data:new a.R({width:Yt,height:Kt},xr),pixelRatio:Q,stretchX:le,stretchY:qe,content:Xe,textFitWidth:ot,textFitHeight:Tt,sdf:ee,version:0})}}updateImage(ue,w){let B=this.style.getImage(ue);if(!B)return this.fire(new a.j(new Error("The map has no image with that id. If you are adding a new image use `map.addImage(...)` instead.")));let Q=w instanceof HTMLImageElement||a.b(w)?u.getImageData(w):w,{width:ee,height:le,data:qe}=Q;if(ee===void 0||le===void 0)return this.fire(new a.j(new Error("Invalid arguments to map.updateImage(). The second argument must be an `HTMLImageElement`, `ImageData`, `ImageBitmap`, or object with `width`, `height`, and `data` properties with the same format as `ImageData`")));if(ee!==B.data.width||le!==B.data.height)return this.fire(new a.j(new Error("The width and height of the updated image must be that same as the previous version of the image")));let Xe=!(w instanceof HTMLImageElement||a.b(w));return B.data.replace(qe,Xe),this.style.updateImage(ue,B),this}getImage(ue){return this.style.getImage(ue)}hasImage(ue){return ue?!!this.style.getImage(ue):(this.fire(new a.j(new Error("Missing required image id"))),!1)}removeImage(ue){this.style.removeImage(ue)}loadImage(ue){return p.getImage(this._requestManager.transformRequest(ue,"Image"),new AbortController)}listImages(){return this.style.listImages()}addLayer(ue,w){return this._lazyInitEmptyStyle(),this.style.addLayer(ue,w),this._update(!0)}moveLayer(ue,w){return this.style.moveLayer(ue,w),this._update(!0)}removeLayer(ue){return this.style.removeLayer(ue),this._update(!0)}getLayer(ue){return this.style.getLayer(ue)}getLayersOrder(){return this.style.getLayersOrder()}setLayerZoomRange(ue,w,B){return this.style.setLayerZoomRange(ue,w,B),this._update(!0)}setFilter(ue,w,B={}){return this.style.setFilter(ue,w,B),this._update(!0)}getFilter(ue){return this.style.getFilter(ue)}setPaintProperty(ue,w,B,Q={}){return this.style.setPaintProperty(ue,w,B,Q),this._update(!0)}getPaintProperty(ue,w){return this.style.getPaintProperty(ue,w)}setLayoutProperty(ue,w,B,Q={}){return this.style.setLayoutProperty(ue,w,B,Q),this._update(!0)}getLayoutProperty(ue,w){return this.style.getLayoutProperty(ue,w)}setGlyphs(ue,w={}){return this._lazyInitEmptyStyle(),this.style.setGlyphs(ue,w),this._update(!0)}getGlyphs(){return this.style.getGlyphsUrl()}addSprite(ue,w,B={}){return this._lazyInitEmptyStyle(),this.style.addSprite(ue,w,B,Q=>{Q||this._update(!0)}),this}removeSprite(ue){return this._lazyInitEmptyStyle(),this.style.removeSprite(ue),this._update(!0)}getSprite(){return this.style.getSprite()}setSprite(ue,w={}){return this._lazyInitEmptyStyle(),this.style.setSprite(ue,w,B=>{B||this._update(!0)}),this}setLight(ue,w={}){return this._lazyInitEmptyStyle(),this.style.setLight(ue,w),this._update(!0)}getLight(){return this.style.getLight()}setSky(ue){return this._lazyInitEmptyStyle(),this.style.setSky(ue),this._update(!0)}getSky(){return this.style.getSky()}setFeatureState(ue,w){return this.style.setFeatureState(ue,w),this._update()}removeFeatureState(ue,w){return this.style.removeFeatureState(ue,w),this._update()}getFeatureState(ue){return this.style.getFeatureState(ue)}getContainer(){return this._container}getCanvasContainer(){return this._canvasContainer}getCanvas(){return this._canvas}_containerDimensions(){let ue=0,w=0;return this._container&&(ue=this._container.clientWidth||400,w=this._container.clientHeight||300),[ue,w]}_setupContainer(){let ue=this._container;ue.classList.add("maplibregl-map");let w=this._canvasContainer=c.create("div","maplibregl-canvas-container",ue);this._interactive&&w.classList.add("maplibregl-interactive"),this._canvas=c.create("canvas","maplibregl-canvas",w),this._canvas.addEventListener("webglcontextlost",this._contextLost,!1),this._canvas.addEventListener("webglcontextrestored",this._contextRestored,!1),this._canvas.setAttribute("tabindex",this._interactive?"0":"-1"),this._canvas.setAttribute("aria-label",this._getUIString("Map.Title")),this._canvas.setAttribute("role","region");let B=this._containerDimensions(),Q=this._getClampedPixelRatio(B[0],B[1]);this._resizeCanvas(B[0],B[1],Q);let ee=this._controlContainer=c.create("div","maplibregl-control-container",ue),le=this._controlPositions={};["top-left","top-right","bottom-left","bottom-right"].forEach(qe=>{le[qe]=c.create("div",`maplibregl-ctrl-${qe} `,ee)}),this._container.addEventListener("scroll",this._onMapScroll,!1)}_resizeCanvas(ue,w,B){this._canvas.width=Math.floor(B*ue),this._canvas.height=Math.floor(B*w),this._canvas.style.width=`${ue}px`,this._canvas.style.height=`${w}px`}_setupPainter(){let ue={alpha:!0,stencil:!0,depth:!0,failIfMajorPerformanceCaveat:this._failIfMajorPerformanceCaveat,preserveDrawingBuffer:this._preserveDrawingBuffer,antialias:this._antialias||!1},w=null;this._canvas.addEventListener("webglcontextcreationerror",Q=>{w={requestedAttributes:ue},Q&&(w.statusMessage=Q.statusMessage,w.type=Q.type)},{once:!0});let B=this._canvas.getContext("webgl2",ue)||this._canvas.getContext("webgl",ue);if(!B){let Q="Failed to initialize WebGL";throw w?(w.message=Q,new Error(JSON.stringify(w))):new Error(Q)}this.painter=new Dc(B,this.transform),f.testSupport(B)}loaded(){return!this._styleDirty&&!this._sourcesDirty&&!!this.style&&this.style.loaded()}_update(ue){return this.style&&this.style._loaded?(this._styleDirty=this._styleDirty||ue,this._sourcesDirty=!0,this.triggerRepaint(),this):this}_requestRenderFrame(ue){return this._update(),this._renderTaskQueue.add(ue)}_cancelRenderFrame(ue){this._renderTaskQueue.remove(ue)}_render(ue){let w=this._idleTriggered?this._fadeDuration:0;if(this.painter.context.setDirty(),this.painter.setBaseState(),this._renderTaskQueue.run(ue),this._removed)return;let B=!1;if(this.style&&this._styleDirty){this._styleDirty=!1;let ee=this.transform.zoom,le=u.now();this.style.zoomHistory.update(ee,le);let qe=new a.z(ee,{now:le,fadeDuration:w,zoomHistory:this.style.zoomHistory,transition:this.style.getTransition()}),Xe=qe.crossFadingFactor();Xe===1&&Xe===this._crossFadingFactor||(B=!0,this._crossFadingFactor=Xe),this.style.update(qe)}this.style&&this._sourcesDirty&&(this._sourcesDirty=!1,this.style._updateSources(this.transform)),this.terrain?(this.terrain.sourceCache.update(this.transform,this.terrain),this.transform.minElevationForCurrentTile=this.terrain.getMinTileElevationForLngLatZoom(this.transform.center,this.transform.tileZoom),this._elevationFreeze||(this.transform.elevation=this.terrain.getElevationForLngLatZoom(this.transform.center,this.transform.tileZoom))):(this.transform.minElevationForCurrentTile=0,this.transform.elevation=0),this._placementDirty=this.style&&this.style._updatePlacement(this.painter.transform,this.showCollisionBoxes,w,this._crossSourceCollisions),this.painter.render(this.style,{showTileBoundaries:this.showTileBoundaries,showOverdrawInspector:this._showOverdrawInspector,rotating:this.isRotating(),zooming:this.isZooming(),moving:this.isMoving(),fadeDuration:w,showPadding:this.showPadding}),this.fire(new a.k("render")),this.loaded()&&!this._loaded&&(this._loaded=!0,a.bf.mark(a.bg.load),this.fire(new a.k("load"))),this.style&&(this.style.hasTransitions()||B)&&(this._styleDirty=!0),this.style&&!this._placementDirty&&this.style._releaseSymbolFadeTiles();let Q=this._sourcesDirty||this._styleDirty||this._placementDirty;return Q||this._repaint?this.triggerRepaint():!this.isMoving()&&this.loaded()&&this.fire(new a.k("idle")),!this._loaded||this._fullyLoaded||Q||(this._fullyLoaded=!0,a.bf.mark(a.bg.fullLoad)),this}redraw(){return this.style&&(this._frameRequest&&(this._frameRequest.abort(),this._frameRequest=null),this._render(0)),this}remove(){var ue;this._hash&&this._hash.remove();for(let B of this._controls)B.onRemove(this);this._controls=[],this._frameRequest&&(this._frameRequest.abort(),this._frameRequest=null),this._renderTaskQueue.clear(),this.painter.destroy(),this.handlers.destroy(),delete this.handlers,this.setStyle(null),typeof window!="undefined"&&removeEventListener("online",this._onWindowOnline,!1),p.removeThrottleControl(this._imageQueueHandle),(ue=this._resizeObserver)===null||ue===void 0||ue.disconnect();let w=this.painter.context.gl.getExtension("WEBGL_lose_context");w!=null&&w.loseContext&&w.loseContext(),this._canvas.removeEventListener("webglcontextrestored",this._contextRestored,!1),this._canvas.removeEventListener("webglcontextlost",this._contextLost,!1),c.remove(this._canvasContainer),c.remove(this._controlContainer),this._container.classList.remove("maplibregl-map"),a.bf.clearMetrics(),this._removed=!0,this.fire(new a.k("remove"))}triggerRepaint(){this.style&&!this._frameRequest&&(this._frameRequest=new AbortController,u.frameAsync(this._frameRequest).then(ue=>{a.bf.frame(ue),this._frameRequest=null,this._render(ue)}).catch(()=>{}))}get showTileBoundaries(){return!!this._showTileBoundaries}set showTileBoundaries(ue){this._showTileBoundaries!==ue&&(this._showTileBoundaries=ue,this._update())}get showPadding(){return!!this._showPadding}set showPadding(ue){this._showPadding!==ue&&(this._showPadding=ue,this._update())}get showCollisionBoxes(){return!!this._showCollisionBoxes}set showCollisionBoxes(ue){this._showCollisionBoxes!==ue&&(this._showCollisionBoxes=ue,ue?this.style._generateCollisionBoxes():this._update())}get showOverdrawInspector(){return!!this._showOverdrawInspector}set showOverdrawInspector(ue){this._showOverdrawInspector!==ue&&(this._showOverdrawInspector=ue,this._update())}get repaint(){return!!this._repaint}set repaint(ue){this._repaint!==ue&&(this._repaint=ue,this.triggerRepaint())}get vertices(){return!!this._vertices}set vertices(ue){this._vertices=ue,this._update()}get version(){return ml}getCameraTargetElevation(){return this.transform.elevation}},i.MapMouseEvent=jl,i.MapTouchEvent=lf,i.MapWheelEvent=Hh,i.Marker=Yu,i.NavigationControl=class{constructor(ue){this._updateZoomButtons=()=>{let w=this._map.getZoom(),B=w===this._map.getMaxZoom(),Q=w===this._map.getMinZoom();this._zoomInButton.disabled=B,this._zoomOutButton.disabled=Q,this._zoomInButton.setAttribute("aria-disabled",B.toString()),this._zoomOutButton.setAttribute("aria-disabled",Q.toString())},this._rotateCompassArrow=()=>{let w=this.options.visualizePitch?`scale(${1/Math.pow(Math.cos(this._map.transform.pitch*(Math.PI/180)),.5)}) rotateX(${this._map.transform.pitch}deg) rotateZ(${this._map.transform.angle*(180/Math.PI)}deg)`:`rotate(${this._map.transform.angle*(180/Math.PI)}deg)`;this._compassIcon.style.transform=w},this._setButtonTitle=(w,B)=>{let Q=this._map._getUIString(`NavigationControl.${B}`);w.title=Q,w.setAttribute("aria-label",Q)},this.options=a.e({},va,ue),this._container=c.create("div","maplibregl-ctrl maplibregl-ctrl-group"),this._container.addEventListener("contextmenu",w=>w.preventDefault()),this.options.showZoom&&(this._zoomInButton=this._createButton("maplibregl-ctrl-zoom-in",w=>this._map.zoomIn({},{originalEvent:w})),c.create("span","maplibregl-ctrl-icon",this._zoomInButton).setAttribute("aria-hidden","true"),this._zoomOutButton=this._createButton("maplibregl-ctrl-zoom-out",w=>this._map.zoomOut({},{originalEvent:w})),c.create("span","maplibregl-ctrl-icon",this._zoomOutButton).setAttribute("aria-hidden","true")),this.options.showCompass&&(this._compass=this._createButton("maplibregl-ctrl-compass",w=>{this.options.visualizePitch?this._map.resetNorthPitch({},{originalEvent:w}):this._map.resetNorth({},{originalEvent:w})}),this._compassIcon=c.create("span","maplibregl-ctrl-icon",this._compass),this._compassIcon.setAttribute("aria-hidden","true"))}onAdd(ue){return this._map=ue,this.options.showZoom&&(this._setButtonTitle(this._zoomInButton,"ZoomIn"),this._setButtonTitle(this._zoomOutButton,"ZoomOut"),this._map.on("zoom",this._updateZoomButtons),this._updateZoomButtons()),this.options.showCompass&&(this._setButtonTitle(this._compass,"ResetBearing"),this.options.visualizePitch&&this._map.on("pitch",this._rotateCompassArrow),this._map.on("rotate",this._rotateCompassArrow),this._rotateCompassArrow(),this._handler=new no(this._map,this._compass,this.options.visualizePitch)),this._container}onRemove(){c.remove(this._container),this.options.showZoom&&this._map.off("zoom",this._updateZoomButtons),this.options.showCompass&&(this.options.visualizePitch&&this._map.off("pitch",this._rotateCompassArrow),this._map.off("rotate",this._rotateCompassArrow),this._handler.off(),delete this._handler),delete this._map}_createButton(ue,w){let B=c.create("button",ue,this._container);return B.type="button",B.addEventListener("click",w),B}},i.Popup=class extends a.E{constructor(ue){super(),this.remove=()=>(this._content&&c.remove(this._content),this._container&&(c.remove(this._container),delete this._container),this._map&&(this._map.off("move",this._update),this._map.off("move",this._onClose),this._map.off("click",this._onClose),this._map.off("remove",this.remove),this._map.off("mousemove",this._onMouseMove),this._map.off("mouseup",this._onMouseUp),this._map.off("drag",this._onDrag),this._map._canvasContainer.classList.remove("maplibregl-track-pointer"),delete this._map,this.fire(new a.k("close"))),this),this._onMouseUp=w=>{this._update(w.point)},this._onMouseMove=w=>{this._update(w.point)},this._onDrag=w=>{this._update(w.point)},this._update=w=>{var B;if(!this._map||!this._lngLat&&!this._trackPointer||!this._content)return;if(!this._container){if(this._container=c.create("div","maplibregl-popup",this._map.getContainer()),this._tip=c.create("div","maplibregl-popup-tip",this._container),this._container.appendChild(this._content),this.options.className)for(let Xe of this.options.className.split(" "))this._container.classList.add(Xe);this._closeButton&&this._closeButton.setAttribute("aria-label",this._map._getUIString("Popup.Close")),this._trackPointer&&this._container.classList.add("maplibregl-popup-track-pointer")}if(this.options.maxWidth&&this._container.style.maxWidth!==this.options.maxWidth&&(this._container.style.maxWidth=this.options.maxWidth),this._lngLat=this._map.transform.renderWorldCopies&&!this._trackPointer?rs(this._lngLat,this._flatPos,this._map.transform):(B=this._lngLat)===null||B===void 0?void 0:B.wrap(),this._trackPointer&&!w)return;let Q=this._flatPos=this._pos=this._trackPointer&&w?w:this._map.project(this._lngLat);this._map.terrain&&(this._flatPos=this._trackPointer&&w?w:this._map.transform.locationPoint(this._lngLat));let ee=this.options.anchor,le=hc(this.options.offset);if(!ee){let Xe=this._container.offsetWidth,ot=this._container.offsetHeight,Tt;Tt=Q.y+le.bottom.y<ot?["top"]:Q.y>this._map.transform.height-ot?["bottom"]:[],Q.x<Xe/2?Tt.push("left"):Q.x>this._map.transform.width-Xe/2&&Tt.push("right"),ee=Tt.length===0?"bottom":Tt.join("-")}let qe=Q.add(le[ee]);this.options.subpixelPositioning||(qe=qe.round()),c.setTransform(this._container,`${$l[ee]} translate(${qe.x}px,${qe.y}px)`),Cu(this._container,ee,"popup")},this._onClose=()=>{this.remove()},this.options=a.e(Object.create(oo),ue)}addTo(ue){return this._map&&this.remove(),this._map=ue,this.options.closeOnClick&&this._map.on("click",this._onClose),this.options.closeOnMove&&this._map.on("move",this._onClose),this._map.on("remove",this.remove),this._update(),this._focusFirstElement(),this._trackPointer?(this._map.on("mousemove",this._onMouseMove),this._map.on("mouseup",this._onMouseUp),this._container&&this._container.classList.add("maplibregl-popup-track-pointer"),this._map._canvasContainer.classList.add("maplibregl-track-pointer")):this._map.on("move",this._update),this.fire(new a.k("open")),this}isOpen(){return!!this._map}getLngLat(){return this._lngLat}setLngLat(ue){return this._lngLat=a.N.convert(ue),this._pos=null,this._flatPos=null,this._trackPointer=!1,this._update(),this._map&&(this._map.on("move",this._update),this._map.off("mousemove",this._onMouseMove),this._container&&this._container.classList.remove("maplibregl-popup-track-pointer"),this._map._canvasContainer.classList.remove("maplibregl-track-pointer")),this}trackPointer(){return this._trackPointer=!0,this._pos=null,this._flatPos=null,this._update(),this._map&&(this._map.off("move",this._update),this._map.on("mousemove",this._onMouseMove),this._map.on("drag",this._onDrag),this._container&&this._container.classList.add("maplibregl-popup-track-pointer"),this._map._canvasContainer.classList.add("maplibregl-track-pointer")),this}getElement(){return this._container}setText(ue){return this.setDOMContent(document.createTextNode(ue))}setHTML(ue){let w=document.createDocumentFragment(),B=document.createElement("body"),Q;for(B.innerHTML=ue;Q=B.firstChild,Q;)w.appendChild(Q);return this.setDOMContent(w)}getMaxWidth(){var ue;return(ue=this._container)===null||ue===void 0?void 0:ue.style.maxWidth}setMaxWidth(ue){return this.options.maxWidth=ue,this._update(),this}setDOMContent(ue){if(this._content)for(;this._content.hasChildNodes();)this._content.firstChild&&this._content.removeChild(this._content.firstChild);else this._content=c.create("div","maplibregl-popup-content",this._container);return this._content.appendChild(ue),this._createCloseButton(),this._update(),this._focusFirstElement(),this}addClassName(ue){return this._container&&this._container.classList.add(ue),this}removeClassName(ue){return this._container&&this._container.classList.remove(ue),this}setOffset(ue){return this.options.offset=ue,this._update(),this}toggleClassName(ue){if(this._container)return this._container.classList.toggle(ue)}setSubpixelPositioning(ue){this.options.subpixelPositioning=ue}_createCloseButton(){this.options.closeButton&&(this._closeButton=c.create("button","maplibregl-popup-close-button",this._content),this._closeButton.type="button",this._closeButton.innerHTML="&#215;",this._closeButton.addEventListener("click",this._onClose))}_focusFirstElement(){if(!this.options.focusAfterOpen||!this._container)return;let ue=this._container.querySelector(Vc);ue&&ue.focus()}},i.RasterDEMTileSource=Zt,i.RasterTileSource=pt,i.ScaleControl=class{constructor(ue){this._onMove=()=>{Ac(this._map,this._container,this.options)},this.setUnit=w=>{this.options.unit=w,Ac(this._map,this._container,this.options)},this.options=Object.assign(Object.assign({},xu),ue)}getDefaultPosition(){return"bottom-left"}onAdd(ue){return this._map=ue,this._container=c.create("div","maplibregl-ctrl maplibregl-ctrl-scale",ue.getContainer()),this._map.on("move",this._onMove),this._onMove(),this._container}onRemove(){c.remove(this._container),this._map.off("move",this._onMove),this._map=void 0}},i.ScrollZoomHandler=Kr,i.Style=Ha,i.TerrainControl=class{constructor(ue){this._toggleTerrain=()=>{this._map.getTerrain()?this._map.setTerrain(null):this._map.setTerrain(this.options),this._updateTerrainIcon()},this._updateTerrainIcon=()=>{this._terrainButton.classList.remove("maplibregl-ctrl-terrain"),this._terrainButton.classList.remove("maplibregl-ctrl-terrain-enabled"),this._map.terrain?(this._terrainButton.classList.add("maplibregl-ctrl-terrain-enabled"),this._terrainButton.title=this._map._getUIString("TerrainControl.Disable")):(this._terrainButton.classList.add("maplibregl-ctrl-terrain"),this._terrainButton.title=this._map._getUIString("TerrainControl.Enable"))},this.options=ue}onAdd(ue){return this._map=ue,this._container=c.create("div","maplibregl-ctrl maplibregl-ctrl-group"),this._terrainButton=c.create("button","maplibregl-ctrl-terrain",this._container),c.create("span","maplibregl-ctrl-icon",this._terrainButton).setAttribute("aria-hidden","true"),this._terrainButton.type="button",this._terrainButton.addEventListener("click",this._toggleTerrain),this._updateTerrainIcon(),this._map.on("terrain",this._updateTerrainIcon),this._container}onRemove(){c.remove(this._container),this._map.off("terrain",this._updateTerrainIcon),this._map=void 0}},i.TwoFingersTouchPitchHandler=Bc,i.TwoFingersTouchRotateHandler=cf,i.TwoFingersTouchZoomHandler=vu,i.TwoFingersTouchZoomRotateHandler=Yi,i.VectorTileSource=ut,i.VideoSource=Nt,i.addSourceType=(ue,w)=>a._(void 0,void 0,void 0,function*(){if(wr(ue))throw new Error(`A source type called "${ue}" already exists.`);((B,Q)=>{sr[B]=Q})(ue,w)}),i.clearPrewarmedResources=function(){let ue=me;ue&&(ue.isPreloaded()&&ue.numActive()===1?(ue.release(_e),me=null):console.warn("Could not clear WebWorkers since there are active Map instances that still reference it. The pre-warmed WebWorker pool can only be cleared when all map instances have been removed with map.remove()"))},i.getMaxParallelImageRequests=function(){return a.a.MAX_PARALLEL_IMAGE_REQUESTS},i.getRTLTextPluginStatus=function(){return Qt().getRTLTextPluginStatus()},i.getVersion=function(){return Ku},i.getWorkerCount=function(){return Ee.workerCount},i.getWorkerUrl=function(){return a.a.WORKER_URL},i.importScriptInWorkers=function(ue){return Ae().broadcast("IS",ue)},i.prewarm=function(){Se().acquire(_e)},i.setMaxParallelImageRequests=function(ue){a.a.MAX_PARALLEL_IMAGE_REQUESTS=ue},i.setRTLTextPlugin=function(ue,w){return Qt().setRTLTextPlugin(ue,w)},i.setWorkerCount=function(ue){Ee.workerCount=ue},i.setWorkerUrl=function(ue){a.a.WORKER_URL=ue}});var n=e;return n})});var fje=ye((Ebr,cje)=>{"use strict";var aw=Dr(),AXt=ru().sanitizeHTML,SXt=SJ(),sje=wx();function lje(e,t){this.subplot=e,this.uid=e.uid+"-"+t,this.index=t,this.idSource="source-"+this.uid,this.idLayer=sje.layoutLayerPrefix+this.uid,this.sourceType=null,this.source=null,this.layerType=null,this.below=null,this.visible=!1}var ag=lje.prototype;ag.update=function(t){this.visible?this.needsNewImage(t)?this.updateImage(t):this.needsNewSource(t)?(this.removeLayer(),this.updateSource(t),this.updateLayer(t)):this.needsNewLayer(t)?this.updateLayer(t):this.updateStyle(t):(this.updateSource(t),this.updateLayer(t)),this.visible=c7(t)};ag.needsNewImage=function(e){var t=this.subplot.map;return t.getSource(this.idSource)&&this.sourceType==="image"&&e.sourcetype==="image"&&(this.source!==e.source||JSON.stringify(this.coordinates)!==JSON.stringify(e.coordinates))};ag.needsNewSource=function(e){return this.sourceType!==e.sourcetype||JSON.stringify(this.source)!==JSON.stringify(e.source)||this.layerType!==e.type};ag.needsNewLayer=function(e){return this.layerType!==e.type||this.below!==this.subplot.belowLookup["layout-"+this.index]};ag.lookupBelow=function(){return this.subplot.belowLookup["layout-"+this.index]};ag.updateImage=function(e){var t=this.subplot.map;t.getSource(this.idSource).updateImage({url:e.source,coordinates:e.coordinates});var r=this.findFollowingMapLayerId(this.lookupBelow());r!==null&&this.subplot.map.moveLayer(this.idLayer,r)};ag.updateSource=function(e){var t=this.subplot.map;if(t.getSource(this.idSource)&&t.removeSource(this.idSource),this.sourceType=e.sourcetype,this.source=e.source,!!c7(e)){var r=MXt(e);t.addSource(this.idSource,r)}};ag.findFollowingMapLayerId=function(e){if(e==="traces")for(var t=this.subplot.getMapLayers(),r=0;r<t.length;r++){var n=t[r].id;if(typeof n=="string"&&n.indexOf(sje.traceLayerPrefix)===0){e=n;break}}return e};ag.updateLayer=function(e){var t=this.subplot,r=uje(e),n=this.lookupBelow(),i=this.findFollowingMapLayerId(n);this.removeLayer(),c7(e)&&t.addLayer({id:this.idLayer,source:this.idSource,"source-layer":e.sourcelayer||"",type:e.type,minzoom:e.minzoom,maxzoom:e.maxzoom,layout:r.layout,paint:r.paint},i),this.layerType=e.type,this.below=n};ag.updateStyle=function(e){if(c7(e)){var t=uje(e);this.subplot.setOptions(this.idLayer,"setLayoutProperty",t.layout),this.subplot.setOptions(this.idLayer,"setPaintProperty",t.paint)}};ag.removeLayer=function(){var e=this.subplot.map;e.getLayer(this.idLayer)&&e.removeLayer(this.idLayer)};ag.dispose=function(){var e=this.subplot.map;e.getLayer(this.idLayer)&&e.removeLayer(this.idLayer),e.getSource(this.idSource)&&e.removeSource(this.idSource)};function c7(e){if(!e.visible)return!1;var t=e.source;if(Array.isArray(t)&&t.length>0){for(var r=0;r<t.length;r++)if(typeof t[r]!="string"||t[r].length===0)return!1;return!0}return aw.isPlainObject(t)||typeof t=="string"&&t.length>0}function uje(e){var t={},r={};switch(e.type){case"circle":aw.extendFlat(r,{"circle-radius":e.circle.radius,"circle-color":e.color,"circle-opacity":e.opacity});break;case"line":aw.extendFlat(r,{"line-width":e.line.width,"line-color":e.color,"line-opacity":e.opacity,"line-dasharray":e.line.dash});break;case"fill":aw.extendFlat(r,{"fill-color":e.color,"fill-outline-color":e.fill.outlinecolor,"fill-opacity":e.opacity});break;case"symbol":var n=e.symbol,i=SXt(n.textposition,n.iconsize);aw.extendFlat(t,{"icon-image":n.icon+"-15","icon-size":n.iconsize/10,"text-field":n.text,"text-size":n.textfont.size,"text-anchor":i.anchor,"text-offset":i.offset,"symbol-placement":n.placement}),aw.extendFlat(r,{"icon-color":e.color,"text-color":n.textfont.color,"text-opacity":e.opacity});break;case"raster":aw.extendFlat(r,{"raster-fade-duration":0,"raster-opacity":e.opacity});break}return{layout:t,paint:r}}function MXt(e){var t=e.sourcetype,r=e.source,n={type:t},i;return t==="geojson"?i="data":t==="vector"?i=typeof r=="string"?"url":"tiles":t==="raster"?(i="tiles",n.tileSize=256):t==="image"&&(i="url",n.coordinates=e.coordinates),n[i]=r,e.sourceattribution&&(n.attribution=AXt(e.sourceattribution)),n}cje.exports=function(t,r,n){var i=new lje(t,r);return i.update(n),i}});var _je=ye((kbr,yje)=>{"use strict";var PJ=oje(),IJ=Dr(),vje=ix(),hje=qa(),EXt=ho(),kXt=yv(),f7=vf(),pje=Eg(),CXt=pje.drawMode,LXt=pje.selectMode,PXt=Of().prepSelect,IXt=Of().clearOutline,RXt=Of().clearSelectionsCache,DXt=Of().selectOnClick,ow=wx(),FXt=fje();function gje(e,t){this.id=t,this.gd=e;var r=e._fullLayout,n=e._context;this.container=r._glcontainer.node(),this.isStatic=n.staticPlot,this.uid=r._uid+"-"+this.id,this.div=null,this.xaxis=null,this.yaxis=null,this.createFramework(r),this.map=null,this.styleObj=null,this.traceHash={},this.layerList=[],this.belowLookup={},this.dragging=!1,this.wheeling=!1}var Uh=gje.prototype;Uh.plot=function(e,t,r){var n=this,i;n.map?i=new Promise(function(a,o){n.updateMap(e,t,a,o)}):i=new Promise(function(a,o){n.createMap(e,t,a,o)}),r.push(i)};Uh.createMap=function(e,t,r,n){var i=this,a=t[i.id],o=i.styleObj=mje(a.style),s=a.bounds,l=s?[[s.west,s.south],[s.east,s.north]]:null,u=i.map=new PJ.Map({container:i.div,style:o.style,center:RJ(a.center),zoom:a.zoom,bearing:a.bearing,pitch:a.pitch,maxBounds:l,interactive:!i.isStatic,preserveDrawingBuffer:i.isStatic,doubleClickZoom:!1,boxZoom:!1,attributionControl:!1}).addControl(new PJ.AttributionControl({compact:!0})),c={};u.on("styleimagemissing",function(h){var d=h.id;if(!c[d]&&d.includes("-15")){c[d]=!0;var v=new Image(15,15);v.onload=function(){u.addImage(d,v)},v.crossOrigin="Anonymous",v.src="https://unpkg.com/maki@2.1.0/icons/"+d+".svg"}}),u.setTransformRequest(function(h){return h=h.replace("https://fonts.openmaptiles.org/Open Sans Extrabold","https://fonts.openmaptiles.org/Open Sans Extra Bold"),h=h.replace("https://tiles.basemaps.cartocdn.com/fonts/Open Sans Extrabold","https://fonts.openmaptiles.org/Open Sans Extra Bold"),h=h.replace("https://fonts.openmaptiles.org/Open Sans Regular,Arial Unicode MS Regular","https://fonts.openmaptiles.org/Klokantech Noto Sans Regular"),{url:h}}),u._canvas.style.left="0px",u._canvas.style.top="0px",i.rejectOnError(n),i.isStatic||i.initFx(e,t);var f=[];f.push(new Promise(function(h){u.once("load",h)})),f=f.concat(vje.fetchTraceGeoData(e)),Promise.all(f).then(function(){i.fillBelowLookup(e,t),i.updateData(e),i.updateLayout(t),i.resolveOnRender(r)}).catch(n)};Uh.updateMap=function(e,t,r,n){var i=this,a=i.map,o=t[this.id];i.rejectOnError(n);var s=[],l=mje(o.style);JSON.stringify(i.styleObj)!==JSON.stringify(l)&&(i.styleObj=l,a.setStyle(l.style),i.traceHash={},s.push(new Promise(function(u){a.once("styledata",u)}))),s=s.concat(vje.fetchTraceGeoData(e)),Promise.all(s).then(function(){i.fillBelowLookup(e,t),i.updateData(e),i.updateLayout(t),i.resolveOnRender(r)}).catch(n)};Uh.fillBelowLookup=function(e,t){var r=t[this.id],n=r.layers,i,a,o=this.belowLookup={},s=!1;for(i=0;i<e.length;i++){var l=e[i][0].trace,u=l._module;typeof l.below=="string"?a=l.below:u.getBelow&&(a=u.getBelow(l,this)),a===""&&(s=!0),o["trace-"+l.uid]=a||""}for(i=0;i<n.length;i++){var c=n[i];typeof c.below=="string"?a=c.below:s?a="traces":a="",o["layout-"+i]=a}var f={},h,d;for(h in o)a=o[h],f[a]?f[a].push(h):f[a]=[h];for(a in f){var v=f[a];if(v.length>1)for(i=0;i<v.length;i++)h=v[i],h.indexOf("trace-")===0?(d=h.split("trace-")[1],this.traceHash[d]&&(this.traceHash[d].below=null)):h.indexOf("layout-")===0&&(d=h.split("layout-")[1],this.layerList[d]&&(this.layerList[d].below=null))}};var dje={choroplethmap:0,densitymap:1,scattermap:2};Uh.updateData=function(e){var t=this.traceHash,r,n,i,a,o=e.slice().sort(function(f,h){return dje[f[0].trace.type]-dje[h[0].trace.type]});for(i=0;i<o.length;i++){var s=o[i];n=s[0].trace,r=t[n.uid];var l=!1;r&&(r.type===n.type?(r.update(s),l=!0):r.dispose()),!l&&n._module&&(t[n.uid]=n._module.plot(this,s))}var u=Object.keys(t);e:for(i=0;i<u.length;i++){var c=u[i];for(a=0;a<e.length;a++)if(n=e[a][0].trace,c===n.uid)continue e;r=t[c],r.dispose(),delete t[c]}};Uh.updateLayout=function(e){var t=this.map,r=e[this.id];!this.dragging&&!this.wheeling&&(t.setCenter(RJ(r.center)),t.setZoom(r.zoom),t.setBearing(r.bearing),t.setPitch(r.pitch)),this.updateLayers(e),this.updateFramework(e),this.updateFx(e),this.map.resize(),this.gd._context._scrollZoom.map?t.scrollZoom.enable():t.scrollZoom.disable()};Uh.resolveOnRender=function(e){var t=this.map;t.on("render",function r(){t.loaded()&&(t.off("render",r),setTimeout(e,10))})};Uh.rejectOnError=function(e){var t=this.map;function r(){e(new Error(ow.mapOnErrorMsg))}t.once("error",r),t.once("style.error",r),t.once("source.error",r),t.once("tile.error",r),t.once("layer.error",r)};Uh.createFramework=function(e){var t=this,r=t.div=document.createElement("div");r.id=t.uid,r.style.position="absolute",t.container.appendChild(r),t.xaxis={_id:"x",c2p:function(n){return t.project(n).x}},t.yaxis={_id:"y",c2p:function(n){return t.project(n).y}},t.updateFramework(e),t.mockAxis={type:"linear",showexponent:"all",exponentformat:"B"},EXt.setConvert(t.mockAxis,e)};Uh.initFx=function(e,t){var r=this,n=r.gd,i=r.map;i.on("moveend",function(s){if(r.map){var l=n._fullLayout;if(s.originalEvent||r.wheeling){var u=l[r.id];hje.call("_storeDirectGUIEdit",n.layout,l._preGUI,r.getViewEdits(u));var c=r.getView();u._input.center=u.center=c.center,u._input.zoom=u.zoom=c.zoom,u._input.bearing=u.bearing=c.bearing,u._input.pitch=u.pitch=c.pitch,n.emit("plotly_relayout",r.getViewEditsWithDerived(c))}s.originalEvent&&s.originalEvent.type==="mouseup"?r.dragging=!1:r.wheeling&&(r.wheeling=!1),l&&l._rehover&&l._rehover()}}),i.on("wheel",function(){r.wheeling=!0}),i.on("mousemove",function(s){var l=r.div.getBoundingClientRect(),u=[s.originalEvent.offsetX,s.originalEvent.offsetY];s.target.getBoundingClientRect=function(){return l},r.xaxis.p2c=function(){return i.unproject(u).lng},r.yaxis.p2c=function(){return i.unproject(u).lat},n._fullLayout._rehover=function(){n._fullLayout._hoversubplot===r.id&&n._fullLayout[r.id]&&f7.hover(n,s,r.id)},f7.hover(n,s,r.id),n._fullLayout._hoversubplot=r.id});function a(){f7.loneUnhover(t._hoverlayer)}i.on("dragstart",function(){r.dragging=!0,a()}),i.on("zoomstart",a),i.on("mouseout",function(){n._fullLayout._hoversubplot=null});function o(){var s=r.getView();n.emit("plotly_relayouting",r.getViewEditsWithDerived(s))}i.on("drag",o),i.on("zoom",o),i.on("dblclick",function(){var s=n._fullLayout[r.id];hje.call("_storeDirectGUIEdit",n.layout,n._fullLayout._preGUI,r.getViewEdits(s));var l=r.viewInitial;i.setCenter(RJ(l.center)),i.setZoom(l.zoom),i.setBearing(l.bearing),i.setPitch(l.pitch);var u=r.getView();s._input.center=s.center=u.center,s._input.zoom=s.zoom=u.zoom,s._input.bearing=s.bearing=u.bearing,s._input.pitch=s.pitch=u.pitch,n.emit("plotly_doubleclick",null),n.emit("plotly_relayout",r.getViewEditsWithDerived(u))}),r.clearOutline=function(){RXt(r.dragOptions),IXt(r.dragOptions.gd)},r.onClickInPanFn=function(s){return function(l){var u=n._fullLayout.clickmode;u.indexOf("select")>-1&&DXt(l.originalEvent,n,[r.xaxis],[r.yaxis],r.id,s),u.indexOf("event")>-1&&f7.click(n,l.originalEvent)}}};Uh.updateFx=function(e){var t=this,r=t.map,n=t.gd;if(t.isStatic)return;function i(l){var u=t.map.unproject(l);return[u.lng,u.lat]}var a=e.dragmode,o;o=function(l,u){if(u.isRect){var c=l.range={};c[t.id]=[i([u.xmin,u.ymin]),i([u.xmax,u.ymax])]}else{var f=l.lassoPoints={};f[t.id]=u.map(i)}};var s=t.dragOptions;t.dragOptions=IJ.extendDeep(s||{},{dragmode:e.dragmode,element:t.div,gd:n,plotinfo:{id:t.id,domain:e[t.id].domain,xaxis:t.xaxis,yaxis:t.yaxis,fillRangeItems:o},xaxes:[t.xaxis],yaxes:[t.yaxis],subplot:t.id}),r.off("click",t.onClickInPanHandler),LXt(a)||CXt(a)?(r.dragPan.disable(),r.on("zoomstart",t.clearOutline),t.dragOptions.prepFn=function(l,u,c){PXt(l,u,c,t.dragOptions,a)},kXt.init(t.dragOptions)):(r.dragPan.enable(),r.off("zoomstart",t.clearOutline),t.div.onmousedown=null,t.div.ontouchstart=null,t.div.removeEventListener("touchstart",t.div._ontouchstart),t.onClickInPanHandler=t.onClickInPanFn(t.dragOptions),r.on("click",t.onClickInPanHandler))};Uh.updateFramework=function(e){var t=e[this.id].domain,r=e._size,n=this.div.style;n.width=r.w*(t.x[1]-t.x[0])+"px",n.height=r.h*(t.y[1]-t.y[0])+"px",n.left=r.l+t.x[0]*r.w+"px",n.top=r.t+(1-t.y[1])*r.h+"px",this.xaxis._offset=r.l+t.x[0]*r.w,this.xaxis._length=r.w*(t.x[1]-t.x[0]),this.yaxis._offset=r.t+(1-t.y[1])*r.h,this.yaxis._length=r.h*(t.y[1]-t.y[0])};Uh.updateLayers=function(e){var t=e[this.id],r=t.layers,n=this.layerList,i;if(r.length!==n.length){for(i=0;i<n.length;i++)n[i].dispose();for(n=this.layerList=[],i=0;i<r.length;i++)n.push(FXt(this,i,r[i]))}else for(i=0;i<r.length;i++)n[i].update(r[i])};Uh.destroy=function(){this.map&&(this.map.remove(),this.map=null,this.container.removeChild(this.div))};Uh.toImage=function(){return this.map.stop(),this.map.getCanvas().toDataURL()};Uh.setOptions=function(e,t,r){for(var n in r)this.map[t](e,n,r[n])};Uh.getMapLayers=function(){return this.map.getStyle().layers};Uh.addLayer=function(e,t){var r=this.map;if(typeof t=="string"){if(t===""){r.addLayer(e,t);return}for(var n=this.getMapLayers(),i=0;i<n.length;i++)if(t===n[i].id){r.addLayer(e,t);return}IJ.warn(["Trying to add layer with *below* value",t,"referencing a layer that does not exist","or that does not yet exist."].join(" "))}r.addLayer(e)};Uh.project=function(e){return this.map.project(new PJ.LngLat(e[0],e[1]))};Uh.getView=function(){var e=this.map,t=e.getCenter(),r=t.lng,n=t.lat,i={lon:r,lat:n},a=e.getCanvas(),o=parseInt(a.style.width),s=parseInt(a.style.height);return{center:i,zoom:e.getZoom(),bearing:e.getBearing(),pitch:e.getPitch(),_derived:{coordinates:[e.unproject([0,0]).toArray(),e.unproject([o,0]).toArray(),e.unproject([o,s]).toArray(),e.unproject([0,s]).toArray()]}}};Uh.getViewEdits=function(e){for(var t=this.id,r=["center","zoom","bearing","pitch"],n={},i=0;i<r.length;i++){var a=r[i];n[t+"."+a]=e[a]}return n};Uh.getViewEditsWithDerived=function(e){var t=this.id,r=this.getViewEdits(e);return r[t+"._derived"]=e._derived,r};function mje(e){var t={};return IJ.isPlainObject(e)?(t.id=e.id,t.style=e):typeof e=="string"?(t.id=e,ow.stylesMap[e]?t.style=ow.stylesMap[e]:t.style=e):(t.id=ow.styleValueDflt,t.style=zXt(ow.styleValueDflt)),t.transition={duration:0,delay:0},t}function zXt(e){return ow.styleUrlPrefix+e+"-"+ow.styleUrlSuffix}function RJ(e){return[e.lon,e.lat]}yje.exports=gje});var wje=ye((Cbr,bje)=>{"use strict";var DJ=Dr(),OXt=k_(),qXt=Yd(),xje=jk();bje.exports=function(t,r,n){OXt(t,r,n,{type:"map",attributes:xje,handleDefaults:BXt,partition:"y"})};function BXt(e,t,r){r("style"),r("center.lon"),r("center.lat"),r("zoom"),r("bearing"),r("pitch");var n=r("bounds.west"),i=r("bounds.east"),a=r("bounds.south"),o=r("bounds.north");(n===void 0||i===void 0||a===void 0||o===void 0)&&delete t.bounds,qXt(e,t,{name:"layers",handleItemDefaults:NXt}),t._input=e}function NXt(e,t){function r(l,u){return DJ.coerce(e,t,xje.layers,l,u)}var n=r("visible");if(n){var i=r("sourcetype"),a=i==="raster"||i==="image";r("source"),r("sourceattribution"),i==="vector"&&r("sourcelayer"),i==="image"&&r("coordinates");var o;a&&(o="raster");var s=r("type",o);a&&s!=="raster"&&(s=t.type="raster",DJ.log("Source types *raster* and *image* must drawn *raster* layer type.")),r("below"),r("color"),r("opacity"),r("minzoom"),r("maxzoom"),s==="circle"&&r("circle.radius"),s==="line"&&(r("line.width"),r("line.dash")),s==="fill"&&r("fill.outlinecolor"),s==="symbol"&&(r("symbol.icon"),r("symbol.iconsize"),r("symbol.text"),DJ.coerceFont(r,"symbol.textfont",void 0,{noFontVariant:!0,noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0}),r("symbol.textposition"),r("symbol.placement"))}}});var d7=ye(l0=>{"use strict";var h7=Dr(),Tje=h7.strTranslate,UXt=h7.strScale,VXt=Id().getSubplotCalcData,GXt=Wp(),HXt=Oa(),Aje=So(),jXt=ru(),WXt=_je(),Tx="map";l0.name=Tx;l0.attr="subplot";l0.idRoot=Tx;l0.idRegex=l0.attrRegex=h7.counterRegex(Tx);l0.attributes={subplot:{valType:"subplotid",dflt:"map",editType:"calc"}};l0.layoutAttributes=jk();l0.supplyLayoutDefaults=wje();l0.plot=function(t){for(var r=t._fullLayout,n=t.calcdata,i=r._subplots[Tx],a=0;a<i.length;a++){var o=i[a],s=VXt(n,Tx,o),l=r[o],u=l._subplot;u||(u=new WXt(t,o),r[o]._subplot=u),u.viewInitial||(u.viewInitial={center:h7.extendFlat({},l.center),zoom:l.zoom,bearing:l.bearing,pitch:l.pitch}),u.plot(s,r,t._promises)}};l0.clean=function(e,t,r,n){for(var i=n._subplots[Tx]||[],a=0;a<i.length;a++){var o=i[a];!t[o]&&n[o]._subplot&&n[o]._subplot.destroy()}};l0.toSVG=function(e){for(var t=e._fullLayout,r=t._subplots[Tx],n=t._size,i=0;i<r.length;i++){var a=t[r[i]],o=a.domain,s=a._subplot,l=s.toImage("png"),u=t._glimages.append("svg:image");u.attr({xmlns:GXt.svg,"xlink:href":l,x:n.l+n.w*o.x[0],y:n.t+n.h*(1-o.y[1]),width:n.w*(o.x[1]-o.x[0]),height:n.h*(o.y[1]-o.y[0]),preserveAspectRatio:"none"});var c=HXt.select(a._subplot.div),f=c.select(".maplibregl-ctrl-attrib").text().replace("Improve this map",""),h=t._glimages.append("g"),d=h.append("text");d.text(f).classed("static-attribution",!0).attr({"font-size":12,"font-family":"Arial",color:"rgba(0, 0, 0, 0.75)","text-anchor":"end","data-unformatted":f});var v=Aje.bBox(d.node()),_=n.w*(o.x[1]-o.x[0]);if(v.width>_/2){var b=f.split("|").join("<br>");d.text(b).attr("data-unformatted",b).call(jXt.convertToTspans,e),v=Aje.bBox(d.node())}d.attr("transform",Tje(-3,-v.height+8)),h.insert("rect",".static-attribution").attr({x:-v.width-6,y:-v.height-3,width:v.width+6,height:v.height+3,fill:"rgba(255, 255, 255, 0.75)"});var p=1;v.width+6>_&&(p=_/(v.width+6));var k=[n.l+n.w*o.x[1],n.t+n.h*(1-o.y[0])];h.attr("transform",Tje(k[0],k[1])+UXt(p))}};l0.updateFx=function(e){for(var t=e._fullLayout,r=t._subplots[Tx],n=0;n<r.length;n++){var i=t[r[n]]._subplot;i.updateFx(t)}}});var Mje=ye((Pbr,Sje)=>{"use strict";Sje.exports={attributes:n7(),supplyDefaults:qHe(),colorbar:$d(),formatLabels:AJ(),calc:pF(),plot:$He(),hoverPoints:u7().hoverPoints,eventData:rje(),selectPoints:nje(),styleOnSelect:function(e,t){if(t){var r=t[0].trace;r._glTrace.update(t)}},moduleType:"trace",name:"scattermap",basePlotModule:d7(),categories:["map","gl","symbols","showLegend","scatter-like"],meta:{}}});var kje=ye((Ibr,Eje)=>{"use strict";Eje.exports=Mje()});var FJ=ye((Rbr,Cje)=>{"use strict";var d1=t5(),XXt=Tu(),{hovertemplateAttrs:ZXt,templatefallbackAttrs:YXt}=Ll(),KXt=Gl(),Ax=Ao().extendFlat;Cje.exports=Ax({locations:{valType:"data_array",editType:"calc"},z:{valType:"data_array",editType:"calc"},geojson:{valType:"any",editType:"calc"},featureidkey:Ax({},d1.featureidkey,{}),below:{valType:"string",editType:"plot"},text:d1.text,hovertext:d1.hovertext,marker:{line:{color:Ax({},d1.marker.line.color,{editType:"plot"}),width:Ax({},d1.marker.line.width,{editType:"plot"}),editType:"calc"},opacity:Ax({},d1.marker.opacity,{editType:"plot"}),editType:"calc"},selected:{marker:{opacity:Ax({},d1.selected.marker.opacity,{editType:"plot"}),editType:"plot"},editType:"plot"},unselected:{marker:{opacity:Ax({},d1.unselected.marker.opacity,{editType:"plot"}),editType:"plot"},editType:"plot"},hoverinfo:d1.hoverinfo,hovertemplate:ZXt({},{keys:["properties"]}),hovertemplatefallback:YXt(),showlegend:Ax({},KXt.showlegend,{dflt:!1})},XXt("",{cLetter:"z",editTypeOverride:"calc"}))});var Pje=ye((Dbr,Lje)=>{"use strict";var Yk=Dr(),JXt=Qh(),$Xt=FJ();Lje.exports=function(t,r,n,i){function a(c,f){return Yk.coerce(t,r,$Xt,c,f)}var o=a("locations"),s=a("z"),l=a("geojson");if(!Yk.isArrayOrTypedArray(o)||!o.length||!Yk.isArrayOrTypedArray(s)||!s.length||!(typeof l=="string"&&l!==""||Yk.isPlainObject(l))){r.visible=!1;return}a("featureidkey"),r._length=Math.min(o.length,s.length),a("below"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback");var u=a("marker.line.width");u&&a("marker.line.color"),a("marker.opacity"),JXt(t,r,i,a,{prefix:"",cLetter:"z"}),Yk.coerceSelectionMarkerOpacity(r,a)}});var zJ=ye((Fbr,Dje)=>{"use strict";var QXt=Eo(),v1=Dr(),eZt=tc(),tZt=So(),rZt=tx().makeBlank,Ije=ix();function iZt(e){var t=e[0].trace,r=t.visible===!0&&t._length!==0,n={layout:{visibility:"none"},paint:{}},i={layout:{visibility:"none"},paint:{}},a=t._opts={fill:n,line:i,geojson:rZt()};if(!r)return a;var o=Ije.extractTraceFeature(e);if(!o)return a;var s=eZt.makeColorScaleFuncFromTrace(t),l=t.marker,u=l.line||{},c;v1.isArrayOrTypedArray(l.opacity)&&(c=function(k){var E=k.mo;return QXt(E)?+v1.constrain(E,0,1):0});var f;v1.isArrayOrTypedArray(u.color)&&(f=function(k){return k.mlc});var h;v1.isArrayOrTypedArray(u.width)&&(h=function(k){return k.mlw});for(var d=0;d<e.length;d++){var v=e[d],_=v.fOut;if(_){var b=_.properties;b.fc=s(v.z),c&&(b.mo=c(v)),f&&(b.mlc=f(v)),h&&(b.mlw=h(v)),v.ct=b.ct,v._polygons=Ije.feature2polygons(_)}}var p=c?{type:"identity",property:"mo"}:l.opacity;return v1.extendFlat(n.paint,{"fill-color":{type:"identity",property:"fc"},"fill-opacity":p}),v1.extendFlat(i.paint,{"line-color":f?{type:"identity",property:"mlc"}:u.color,"line-width":h?{type:"identity",property:"mlw"}:u.width,"line-opacity":p}),n.layout.visibility="visible",i.layout.visibility="visible",a.geojson={type:"FeatureCollection",features:o},Rje(e),a}function Rje(e){var t=e[0].trace,r=t._opts,n;if(t.selectedpoints){for(var i=tZt.makeSelectedPointStyleFns(t),a=0;a<e.length;a++){var o=e[a];o.fOut&&(o.fOut.properties.mo2=i.selectedOpacityFn(o))}n={type:"identity",property:"mo2"}}else n=v1.isArrayOrTypedArray(t.marker.opacity)?{type:"identity",property:"mo"}:t.marker.opacity;return v1.extendFlat(r.fill.paint,{"fill-opacity":n}),v1.extendFlat(r.line.paint,{"line-opacity":n}),r}Dje.exports={convert:iZt,convertOnSelect:Rje}});var Bje=ye((zbr,qje)=>{"use strict";var zje=zJ().convert,nZt=zJ().convertOnSelect,Fje=wx().traceLayerPrefix;function Oje(e,t){this.type="choroplethmap",this.subplot=e,this.uid=t,this.sourceId="source-"+t,this.layerList=[["fill",Fje+t+"-fill"],["line",Fje+t+"-line"]],this.below=null}var L5=Oje.prototype;L5.update=function(e){this._update(zje(e)),e[0].trace._glTrace=this};L5.updateOnSelect=function(e){this._update(nZt(e))};L5._update=function(e){var t=this.subplot,r=this.layerList,n=t.belowLookup["trace-"+this.uid];t.map.getSource(this.sourceId).setData(e.geojson),n!==this.below&&(this._removeLayers(),this._addLayers(e,n),this.below=n);for(var i=0;i<r.length;i++){var a=r[i],o=a[0],s=a[1],l=e[o];t.setOptions(s,"setLayoutProperty",l.layout),l.layout.visibility==="visible"&&t.setOptions(s,"setPaintProperty",l.paint)}};L5._addLayers=function(e,t){for(var r=this.subplot,n=this.layerList,i=this.sourceId,a=0;a<n.length;a++){var o=n[a],s=o[0],l=e[s];r.addLayer({type:s,id:o[1],source:i,layout:l.layout,paint:l.paint},t)}};L5._removeLayers=function(){for(var e=this.subplot.map,t=this.layerList,r=t.length-1;r>=0;r--)e.removeLayer(t[r][1])};L5.dispose=function(){var e=this.subplot.map;this._removeLayers(),e.removeSource(this.sourceId)};qje.exports=function(t,r){var n=r[0].trace,i=new Oje(t,n.uid),a=i.sourceId,o=zje(r),s=i.below=t.belowLookup["trace-"+n.uid];return t.map.addSource(a,{type:"geojson",data:o.geojson}),i._addLayers(o,s),r[0].trace._glTrace=i,i}});var Uje=ye((Obr,Nje)=>{"use strict";Nje.exports={attributes:FJ(),supplyDefaults:Pje(),colorbar:S_(),calc:zF(),plot:Bje(),hoverPoints:qF(),eventData:BF(),selectPoints:NF(),styleOnSelect:function(e,t){if(t){var r=t[0].trace;r._glTrace.updateOnSelect(t)}},getBelow:function(e,t){for(var r=t.getMapLayers(),n=r.length-2;n>=0;n--){var i=r[n].id;if(typeof i=="string"&&i.indexOf("water")===0){for(var a=n+1;a<r.length;a++)if(i=r[a].id,typeof i=="string"&&i.indexOf("plotly-")===-1)return i}}},moduleType:"trace",name:"choroplethmap",basePlotModule:d7(),categories:["map","gl","noOpacity","showLegend"],meta:{hr_name:"choropleth_map"}}});var Gje=ye((qbr,Vje)=>{"use strict";Vje.exports=Uje()});var qJ=ye((Bbr,jje)=>{"use strict";var aZt=Tu(),{hovertemplateAttrs:oZt,templatefallbackAttrs:sZt}=Ll(),Hje=Gl(),v7=n7(),OJ=Ao().extendFlat;jje.exports=OJ({lon:v7.lon,lat:v7.lat,z:{valType:"data_array",editType:"calc"},radius:{valType:"number",editType:"plot",arrayOk:!0,min:1,dflt:30},below:{valType:"string",editType:"plot"},text:v7.text,hovertext:v7.hovertext,hoverinfo:OJ({},Hje.hoverinfo,{flags:["lon","lat","z","text","name"]}),hovertemplate:oZt(),hovertemplatefallback:sZt(),showlegend:OJ({},Hje.showlegend,{dflt:!1})},aZt("",{cLetter:"z",editTypeOverride:"calc"}))});var Xje=ye((Nbr,Wje)=>{"use strict";var lZt=Dr(),uZt=Qh(),cZt=qJ();Wje.exports=function(t,r,n,i){function a(u,c){return lZt.coerce(t,r,cZt,u,c)}var o=a("lon")||[],s=a("lat")||[],l=Math.min(o.length,s.length);if(!l){r.visible=!1;return}r._length=l,a("z"),a("radius"),a("below"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),uZt(t,r,i,a,{prefix:"",cLetter:"z"})}});var Kje=ye((Ubr,Yje)=>{"use strict";var BJ=Eo(),fZt=Dr().isArrayOrTypedArray,NJ=fs().BADNUM,hZt=gv(),Zje=Dr()._;Yje.exports=function(t,r){for(var n=r._length,i=new Array(n),a=r.z,o=fZt(a)&&a.length,s=0;s<n;s++){var l=i[s]={},u=r.lon[s],c=r.lat[s];if(l.lonlat=BJ(u)&&BJ(c)?[+u,+c]:[NJ,NJ],o){var f=a[s];l.z=BJ(f)?f:NJ}}return hZt(t,r,{vals:o?a:[0,1],containerStr:"",cLetter:"z"}),n&&(i[0].t={labels:{lat:Zje(t,"lat:")+" ",lon:Zje(t,"lon:")+" "}}),i}});var tWe=ye((Vbr,eWe)=>{"use strict";var dZt=Eo(),UJ=Dr(),Jje=ka(),$je=tc(),Qje=fs().BADNUM,vZt=tx().makeBlank;eWe.exports=function(t){var r=t[0].trace,n=r.visible===!0&&r._length!==0,i={layout:{visibility:"none"},paint:{}},a=r._opts={heatmap:i,geojson:vZt()};if(!n)return a;var o=[],s,l=r.z,u=r.radius,c=UJ.isArrayOrTypedArray(l)&&l.length,f=UJ.isArrayOrTypedArray(u);for(s=0;s<t.length;s++){var h=t[s],d=h.lonlat;if(d[0]!==Qje){var v={};if(c){var _=h.z;v.z=_!==Qje?_:0}f&&(v.r=dZt(u[s])&&u[s]>0?+u[s]:0),o.push({type:"Feature",geometry:{type:"Point",coordinates:d},properties:v})}}var b=$je.extractOpts(r),p=b.reversescale?$je.flipScale(b.colorscale):b.colorscale,k=p[0][1],E=Jje.opacity(k)<1?k:Jje.addOpacity(k,0),S=["interpolate",["linear"],["heatmap-density"],0,E];for(s=1;s<p.length;s++)S.push(p[s][0],p[s][1]);var L=["interpolate",["linear"],["get","z"],b.min,0,b.max,1];return UJ.extendFlat(a.heatmap.paint,{"heatmap-weight":c?L:1/(b.max-b.min),"heatmap-color":S,"heatmap-radius":f?{type:"identity",property:"r"}:r.radius,"heatmap-opacity":r.opacity}),a.geojson={type:"FeatureCollection",features:o},a.heatmap.layout.visibility="visible",a}});var aWe=ye((Gbr,nWe)=>{"use strict";var rWe=tWe(),pZt=wx().traceLayerPrefix;function iWe(e,t){this.type="densitymap",this.subplot=e,this.uid=t,this.sourceId="source-"+t,this.layerList=[["heatmap",pZt+t+"-heatmap"]],this.below=null}var p7=iWe.prototype;p7.update=function(e){var t=this.subplot,r=this.layerList,n=rWe(e),i=t.belowLookup["trace-"+this.uid];t.map.getSource(this.sourceId).setData(n.geojson),i!==this.below&&(this._removeLayers(),this._addLayers(n,i),this.below=i);for(var a=0;a<r.length;a++){var o=r[a],s=o[0],l=o[1],u=n[s];t.setOptions(l,"setLayoutProperty",u.layout),u.layout.visibility==="visible"&&t.setOptions(l,"setPaintProperty",u.paint)}};p7._addLayers=function(e,t){for(var r=this.subplot,n=this.layerList,i=this.sourceId,a=0;a<n.length;a++){var o=n[a],s=o[0],l=e[s];r.addLayer({type:s,id:o[1],source:i,layout:l.layout,paint:l.paint},t)}};p7._removeLayers=function(){for(var e=this.subplot.map,t=this.layerList,r=t.length-1;r>=0;r--)e.removeLayer(t[r][1])};p7.dispose=function(){var e=this.subplot.map;this._removeLayers(),e.removeSource(this.sourceId)};nWe.exports=function(t,r){var n=r[0].trace,i=new iWe(t,n.uid),a=i.sourceId,o=rWe(r),s=i.below=t.belowLookup["trace-"+n.uid];return t.map.addSource(a,{type:"geojson",data:o.geojson}),i._addLayers(o,s),i}});var sWe=ye((Hbr,oWe)=>{"use strict";var gZt=ho(),mZt=u7().hoverPoints,yZt=u7().getExtraText;oWe.exports=function(t,r,n){var i=mZt(t,r,n);if(i){var a=i[0],o=a.cd,s=o[0].trace,l=o[a.index];if(delete a.color,"z"in l){var u=a.subplot.mockAxis;a.z=l.z,a.zLabel=gZt.tickText(u,u.c2l(l.z),"hover").text}return a.extraText=yZt(s,l,o[0].t.labels),[a]}}});var uWe=ye((jbr,lWe)=>{"use strict";lWe.exports=function(t,r){return t.lon=r.lon,t.lat=r.lat,t.z=r.z,t}});var fWe=ye((Wbr,cWe)=>{"use strict";cWe.exports={attributes:qJ(),supplyDefaults:Xje(),colorbar:S_(),formatLabels:AJ(),calc:Kje(),plot:aWe(),hoverPoints:sWe(),eventData:uWe(),getBelow:function(e,t){for(var r=t.getMapLayers(),n=0;n<r.length;n++){var i=r[n],a=i.id;if(i.type==="symbol"&&typeof a=="string"&&a.indexOf("plotly-")===-1)return a}},moduleType:"trace",name:"densitymap",basePlotModule:d7(),categories:["map","gl","showLegend"],meta:{hr_name:"density_map"}}});var dWe=ye((Xbr,hWe)=>{"use strict";hWe.exports=fWe()});var GJ=ye((Ybr,yWe)=>{"use strict";var _Zt=ec(),xZt=Gl(),vWe=Lh(),VJ=a3(),bZt=Cc().attributes,{hovertemplateAttrs:pWe,templatefallbackAttrs:gWe}=Ll(),wZt=Tu(),TZt=vl().templatedArray,AZt=df().descriptionOnlyNumbers,mWe=Ao().extendFlat,SZt=mc().overrideAll,Zbr=yWe.exports=SZt({hoverinfo:mWe({},xZt.hoverinfo,{flags:[],arrayOk:!1}),hoverlabel:VJ.hoverlabel,domain:bZt({name:"sankey",trace:!0}),orientation:{valType:"enumerated",values:["v","h"],dflt:"h"},valueformat:{valType:"string",dflt:".3s",description:AZt("value")},valuesuffix:{valType:"string",dflt:""},arrangement:{valType:"enumerated",values:["snap","perpendicular","freeform","fixed"],dflt:"snap"},textfont:_Zt({autoShadowDflt:!0}),customdata:void 0,node:{label:{valType:"data_array",dflt:[]},groups:{valType:"info_array",impliedEdits:{x:[],y:[]},dimensions:2,freeLength:!0,dflt:[],items:{valType:"number",editType:"calc"}},x:{valType:"data_array",dflt:[]},y:{valType:"data_array",dflt:[]},color:{valType:"color",arrayOk:!0},customdata:{valType:"data_array",editType:"calc"},line:{color:{valType:"color",dflt:vWe.defaultLine,arrayOk:!0},width:{valType:"number",min:0,dflt:.5,arrayOk:!0}},pad:{valType:"number",arrayOk:!1,min:0,dflt:20},thickness:{valType:"number",arrayOk:!1,min:1,dflt:20},hoverinfo:{valType:"enumerated",values:["all","none","skip"],dflt:"all"},hoverlabel:VJ.hoverlabel,hovertemplate:pWe({},{keys:["value","label"]}),hovertemplatefallback:gWe(),align:{valType:"enumerated",values:["justify","left","right","center"],dflt:"justify"}},link:{arrowlen:{valType:"number",min:0,dflt:0},label:{valType:"data_array",dflt:[]},color:{valType:"color",arrayOk:!0},hovercolor:{valType:"color",arrayOk:!0},customdata:{valType:"data_array",editType:"calc"},line:{color:{valType:"color",dflt:vWe.defaultLine,arrayOk:!0},width:{valType:"number",min:0,dflt:0,arrayOk:!0}},source:{valType:"data_array",dflt:[]},target:{valType:"data_array",dflt:[]},value:{valType:"data_array",dflt:[]},hoverinfo:{valType:"enumerated",values:["all","none","skip"],dflt:"all"},hoverlabel:VJ.hoverlabel,hovertemplate:pWe({},{keys:["value","label"]}),hovertemplatefallback:gWe(),colorscales:TZt("concentrationscales",{editType:"calc",label:{valType:"string",editType:"calc",dflt:""},cmax:{valType:"number",editType:"calc",dflt:1},cmin:{valType:"number",editType:"calc",dflt:0},colorscale:mWe(wZt().colorscale,{dflt:[[0,"white"],[1,"black"]]})})}},"calc","nested")});var TWe=ye((Kbr,wWe)=>{"use strict";var P5=Dr(),g7=GJ(),MZt=ka(),_We=cd(),EZt=Cc().defaults,xWe=hM(),bWe=vl(),kZt=Yd();wWe.exports=function(t,r,n,i){function a(S,L){return P5.coerce(t,r,g7,S,L)}var o=P5.extendDeep(i.hoverlabel,t.hoverlabel),s=t.node,l=bWe.newContainer(r,"node");function u(S,L){return P5.coerce(s,l,g7.node,S,L)}u("label"),u("groups"),u("x"),u("y"),u("pad"),u("thickness"),u("line.color"),u("line.width"),u("hoverinfo",t.hoverinfo),xWe(s,l,u,o),u("hovertemplate"),u("align");var c=i.colorway,f=function(S){return c[S%c.length]};u("color",l.label.map(function(S,L){return MZt.addOpacity(f(L),.8)})),u("customdata");var h=t.link||{},d=bWe.newContainer(r,"link");function v(S,L){return P5.coerce(h,d,g7.link,S,L)}v("label"),v("arrowlen"),v("source"),v("target"),v("value"),v("line.color"),v("line.width"),v("hoverinfo",t.hoverinfo),xWe(h,d,v,o),v("hovertemplate");var _=_We(i.paper_bgcolor).getLuminance()<.333,b=_?"rgba(255, 255, 255, 0.6)":"rgba(0, 0, 0, 0.2)",p=v("color",b);function k(S){var L=_We(S);if(!L.isValid())return S;var x=L.getAlpha();return x<=.8?L.setAlpha(x+.2):L=_?L.brighten():L.darken(),L.toRgbString()}v("hovercolor",Array.isArray(p)?p.map(k):k(p)),v("customdata"),kZt(h,d,{name:"colorscales",handleItemDefaults:CZt}),EZt(r,i,a),a("orientation"),a("valueformat"),a("valuesuffix");var E;l.x.length&&l.y.length&&(E="freeform"),a("arrangement",E),P5.coerceFont(a,"textfont",i.font,{autoShadowDflt:!0}),r._length=null};function CZt(e,t){function r(n,i){return P5.coerce(e,t,g7.link.colorscales,n,i)}r("label"),r("cmin"),r("cmax"),r("colorscale")}});var HJ=ye((Jbr,AWe)=>{"use strict";AWe.exports=LZt;function LZt(e){for(var t=e.length,r=new Array(t),n=new Array(t),i=new Array(t),a=new Array(t),o=new Array(t),s=new Array(t),l=0;l<t;++l)r[l]=-1,n[l]=0,i[l]=!1,a[l]=0,o[l]=-1,s[l]=[];var u=0,c=[],f=[];function h(b){var p=[b],k=[b];for(r[b]=n[b]=u,i[b]=!0,u+=1;k.length>0;){b=k[k.length-1];var E=e[b];if(a[b]<E.length){for(var S=a[b];S<E.length;++S){var L=E[S];if(r[L]<0){r[L]=n[L]=u,i[L]=!0,u+=1,p.push(L),k.push(L);break}else i[L]&&(n[b]=Math.min(n[b],n[L])|0);o[L]>=0&&s[b].push(o[L])}a[b]=S}else{if(n[b]===r[b]){for(var x=[],C=[],M=0,S=p.length-1;S>=0;--S){var g=p[S];if(i[g]=!1,x.push(g),C.push(s[g]),M+=s[g].length,o[g]=c.length,g===b){p.length=S;break}}c.push(x);for(var P=new Array(M),S=0;S<C.length;S++)for(var T=0;T<C[S].length;T++)P[--M]=C[S][T];f.push(P)}k.pop()}}}for(var l=0;l<t;++l)r[l]<0&&h(l);for(var d,l=0;l<f.length;l++){var v=f[l];if(v.length!==0){v.sort(function(p,k){return p-k}),d=[v[0]];for(var _=1;_<v.length;_++)v[_]!==v[_-1]&&d.push(v[_]);f[l]=d}}return{components:c,adjacencyList:f}}});var kWe=ye(($br,EWe)=>{"use strict";var PZt=HJ(),I5=Dr(),IZt=Jm().wrap,Kk=I5.isArrayOrTypedArray,SWe=I5.isIndex,MWe=tc();function RZt(e){var t=e.node,r=e.link,n=[],i=Kk(r.color),a=Kk(r.hovercolor),o=Kk(r.customdata),s={},l={},u=r.colorscales.length,c;for(c=0;c<u;c++){var f=r.colorscales[c],h=MWe.extractScale(f,{cLetter:"c"}),d=MWe.makeColorScaleFunc(h);l[f.label]=d}var v=0;for(c=0;c<r.value.length;c++)r.source[c]>v&&(v=r.source[c]),r.target[c]>v&&(v=r.target[c]);var _=v+1;e.node._count=_;var b,p=e.node.groups,k={};for(c=0;c<p.length;c++){var E=p[c];for(b=0;b<E.length;b++){var S=E[b],L=_+c;k.hasOwnProperty(S)?I5.warn("Node "+S+" is already part of a group."):k[S]=L}}var x={source:[],target:[]};for(c=0;c<r.value.length;c++){var C=r.value[c],M=r.source[c],g=r.target[c];if(C>0&&SWe(M,_)&&SWe(g,_)&&!(k.hasOwnProperty(M)&&k.hasOwnProperty(g)&&k[M]===k[g])){k.hasOwnProperty(g)&&(g=k[g]),k.hasOwnProperty(M)&&(M=k[M]),M=+M,g=+g,s[M]=s[g]=!0;var P="";r.label&&r.label[c]&&(P=r.label[c]);var T=null;P&&l.hasOwnProperty(P)&&(T=l[P]),n.push({pointNumber:c,label:P,color:i?r.color[c]:r.color,hovercolor:a?r.hovercolor[c]:r.hovercolor,customdata:o?r.customdata[c]:r.customdata,concentrationscale:T,source:M,target:g,value:+C}),x.source.push(M),x.target.push(g)}}var z=_+p.length,O=Kk(t.color),V=Kk(t.customdata),G=[];for(c=0;c<z;c++)if(s[c]){var Z=t.label[c];G.push({group:c>_-1,childrenNodes:[],pointNumber:c,label:Z,color:O?t.color[c]:t.color,customdata:V?t.customdata[c]:t.customdata})}var j=!1;return DZt(z,x.source,x.target)&&(j=!0),{circular:j,links:n,nodes:G,groups:p,groupLookup:k}}function DZt(e,t,r){for(var n=I5.init2dArray(e,0),i=0;i<Math.min(t.length,r.length);i++)if(I5.isIndex(t[i],e)&&I5.isIndex(r[i],e)){if(t[i]===r[i])return!0;n[t[i]].push(r[i])}var a=PZt(n);return a.components.some(function(o){return o.length>1})}EWe.exports=function(t,r){var n=RZt(r);return IZt({circular:n.circular,_nodes:n.nodes,_links:n.links,_groups:n.groups,_groupLookup:n.groupLookup})}});var LWe=ye((m7,CWe)=>{(function(e,t){typeof m7=="object"&&typeof CWe!="undefined"?t(m7):(e=e||self,t(e.d3=e.d3||{}))})(m7,function(e){"use strict";function t(C){var M=+this._x.call(null,C),g=+this._y.call(null,C);return r(this.cover(M,g),M,g,C)}function r(C,M,g,P){if(isNaN(M)||isNaN(g))return C;var T,z=C._root,O={data:P},V=C._x0,G=C._y0,Z=C._x1,j=C._y1,N,H,te,oe,_e,Ee,Ce,me;if(!z)return C._root=O,C;for(;z.length;)if((_e=M>=(N=(V+Z)/2))?V=N:Z=N,(Ee=g>=(H=(G+j)/2))?G=H:j=H,T=z,!(z=z[Ce=Ee<<1|_e]))return T[Ce]=O,C;if(te=+C._x.call(null,z.data),oe=+C._y.call(null,z.data),M===te&&g===oe)return O.next=z,T?T[Ce]=O:C._root=O,C;do T=T?T[Ce]=new Array(4):C._root=new Array(4),(_e=M>=(N=(V+Z)/2))?V=N:Z=N,(Ee=g>=(H=(G+j)/2))?G=H:j=H;while((Ce=Ee<<1|_e)===(me=(oe>=H)<<1|te>=N));return T[me]=z,T[Ce]=O,C}function n(C){var M,g,P=C.length,T,z,O=new Array(P),V=new Array(P),G=1/0,Z=1/0,j=-1/0,N=-1/0;for(g=0;g<P;++g)isNaN(T=+this._x.call(null,M=C[g]))||isNaN(z=+this._y.call(null,M))||(O[g]=T,V[g]=z,T<G&&(G=T),T>j&&(j=T),z<Z&&(Z=z),z>N&&(N=z));if(G>j||Z>N)return this;for(this.cover(G,Z).cover(j,N),g=0;g<P;++g)r(this,O[g],V[g],C[g]);return this}function i(C,M){if(isNaN(C=+C)||isNaN(M=+M))return this;var g=this._x0,P=this._y0,T=this._x1,z=this._y1;if(isNaN(g))T=(g=Math.floor(C))+1,z=(P=Math.floor(M))+1;else{for(var O=T-g,V=this._root,G,Z;g>C||C>=T||P>M||M>=z;)switch(Z=(M<P)<<1|C<g,G=new Array(4),G[Z]=V,V=G,O*=2,Z){case 0:T=g+O,z=P+O;break;case 1:g=T-O,z=P+O;break;case 2:T=g+O,P=z-O;break;case 3:g=T-O,P=z-O;break}this._root&&this._root.length&&(this._root=V)}return this._x0=g,this._y0=P,this._x1=T,this._y1=z,this}function a(){var C=[];return this.visit(function(M){if(!M.length)do C.push(M.data);while(M=M.next)}),C}function o(C){return arguments.length?this.cover(+C[0][0],+C[0][1]).cover(+C[1][0],+C[1][1]):isNaN(this._x0)?void 0:[[this._x0,this._y0],[this._x1,this._y1]]}function s(C,M,g,P,T){this.node=C,this.x0=M,this.y0=g,this.x1=P,this.y1=T}function l(C,M,g){var P,T=this._x0,z=this._y0,O,V,G,Z,j=this._x1,N=this._y1,H=[],te=this._root,oe,_e;for(te&&H.push(new s(te,T,z,j,N)),g==null?g=1/0:(T=C-g,z=M-g,j=C+g,N=M+g,g*=g);oe=H.pop();)if(!(!(te=oe.node)||(O=oe.x0)>j||(V=oe.y0)>N||(G=oe.x1)<T||(Z=oe.y1)<z))if(te.length){var Ee=(O+G)/2,Ce=(V+Z)/2;H.push(new s(te[3],Ee,Ce,G,Z),new s(te[2],O,Ce,Ee,Z),new s(te[1],Ee,V,G,Ce),new s(te[0],O,V,Ee,Ce)),(_e=(M>=Ce)<<1|C>=Ee)&&(oe=H[H.length-1],H[H.length-1]=H[H.length-1-_e],H[H.length-1-_e]=oe)}else{var me=C-+this._x.call(null,te.data),ie=M-+this._y.call(null,te.data),Se=me*me+ie*ie;if(Se<g){var Le=Math.sqrt(g=Se);T=C-Le,z=M-Le,j=C+Le,N=M+Le,P=te.data}}return P}function u(C){if(isNaN(j=+this._x.call(null,C))||isNaN(N=+this._y.call(null,C)))return this;var M,g=this._root,P,T,z,O=this._x0,V=this._y0,G=this._x1,Z=this._y1,j,N,H,te,oe,_e,Ee,Ce;if(!g)return this;if(g.length)for(;;){if((oe=j>=(H=(O+G)/2))?O=H:G=H,(_e=N>=(te=(V+Z)/2))?V=te:Z=te,M=g,!(g=g[Ee=_e<<1|oe]))return this;if(!g.length)break;(M[Ee+1&3]||M[Ee+2&3]||M[Ee+3&3])&&(P=M,Ce=Ee)}for(;g.data!==C;)if(T=g,!(g=g.next))return this;return(z=g.next)&&delete g.next,T?(z?T.next=z:delete T.next,this):M?(z?M[Ee]=z:delete M[Ee],(g=M[0]||M[1]||M[2]||M[3])&&g===(M[3]||M[2]||M[1]||M[0])&&!g.length&&(P?P[Ce]=g:this._root=g),this):(this._root=z,this)}function c(C){for(var M=0,g=C.length;M<g;++M)this.remove(C[M]);return this}function f(){return this._root}function h(){var C=0;return this.visit(function(M){if(!M.length)do++C;while(M=M.next)}),C}function d(C){var M=[],g,P=this._root,T,z,O,V,G;for(P&&M.push(new s(P,this._x0,this._y0,this._x1,this._y1));g=M.pop();)if(!C(P=g.node,z=g.x0,O=g.y0,V=g.x1,G=g.y1)&&P.length){var Z=(z+V)/2,j=(O+G)/2;(T=P[3])&&M.push(new s(T,Z,j,V,G)),(T=P[2])&&M.push(new s(T,z,j,Z,G)),(T=P[1])&&M.push(new s(T,Z,O,V,j)),(T=P[0])&&M.push(new s(T,z,O,Z,j))}return this}function v(C){var M=[],g=[],P;for(this._root&&M.push(new s(this._root,this._x0,this._y0,this._x1,this._y1));P=M.pop();){var T=P.node;if(T.length){var z,O=P.x0,V=P.y0,G=P.x1,Z=P.y1,j=(O+G)/2,N=(V+Z)/2;(z=T[0])&&M.push(new s(z,O,V,j,N)),(z=T[1])&&M.push(new s(z,j,V,G,N)),(z=T[2])&&M.push(new s(z,O,N,j,Z)),(z=T[3])&&M.push(new s(z,j,N,G,Z))}g.push(P)}for(;P=g.pop();)C(P.node,P.x0,P.y0,P.x1,P.y1);return this}function _(C){return C[0]}function b(C){return arguments.length?(this._x=C,this):this._x}function p(C){return C[1]}function k(C){return arguments.length?(this._y=C,this):this._y}function E(C,M,g){var P=new S(M==null?_:M,g==null?p:g,NaN,NaN,NaN,NaN);return C==null?P:P.addAll(C)}function S(C,M,g,P,T,z){this._x=C,this._y=M,this._x0=g,this._y0=P,this._x1=T,this._y1=z,this._root=void 0}function L(C){for(var M={data:C.data},g=M;C=C.next;)g=g.next={data:C.data};return M}var x=E.prototype=S.prototype;x.copy=function(){var C=new S(this._x,this._y,this._x0,this._y0,this._x1,this._y1),M=this._root,g,P;if(!M)return C;if(!M.length)return C._root=L(M),C;for(g=[{source:M,target:C._root=new Array(4)}];M=g.pop();)for(var T=0;T<4;++T)(P=M.source[T])&&(P.length?g.push({source:P,target:M.target[T]=new Array(4)}):M.target[T]=L(P));return C},x.add=t,x.addAll=n,x.cover=i,x.data=a,x.extent=o,x.find=l,x.remove=u,x.removeAll=c,x.root=f,x.size=h,x.visit=d,x.visitAfter=v,x.x=b,x.y=k,e.quadtree=E,Object.defineProperty(e,"__esModule",{value:!0})})});var _7=ye((y7,PWe)=>{(function(e,t){t(typeof y7=="object"&&typeof PWe!="undefined"?y7:e.d3=e.d3||{})})(y7,function(e){"use strict";var t="$";function r(){}r.prototype=n.prototype={constructor:r,has:function(_){return t+_ in this},get:function(_){return this[t+_]},set:function(_,b){return this[t+_]=b,this},remove:function(_){var b=t+_;return b in this&&delete this[b]},clear:function(){for(var _ in this)_[0]===t&&delete this[_]},keys:function(){var _=[];for(var b in this)b[0]===t&&_.push(b.slice(1));return _},values:function(){var _=[];for(var b in this)b[0]===t&&_.push(this[b]);return _},entries:function(){var _=[];for(var b in this)b[0]===t&&_.push({key:b.slice(1),value:this[b]});return _},size:function(){var _=0;for(var b in this)b[0]===t&&++_;return _},empty:function(){for(var _ in this)if(_[0]===t)return!1;return!0},each:function(_){for(var b in this)b[0]===t&&_(this[b],b.slice(1),this)}};function n(_,b){var p=new r;if(_ instanceof r)_.each(function(x,C){p.set(C,x)});else if(Array.isArray(_)){var k=-1,E=_.length,S;if(b==null)for(;++k<E;)p.set(k,_[k]);else for(;++k<E;)p.set(b(S=_[k],k,_),S)}else if(_)for(var L in _)p.set(L,_[L]);return p}function i(){var _=[],b=[],p,k,E;function S(x,C,M,g){if(C>=_.length)return p!=null&&x.sort(p),k!=null?k(x):x;for(var P=-1,T=x.length,z=_[C++],O,V,G=n(),Z,j=M();++P<T;)(Z=G.get(O=z(V=x[P])+""))?Z.push(V):G.set(O,[V]);return G.each(function(N,H){g(j,H,S(N,C,M,g))}),j}function L(x,C){if(++C>_.length)return x;var M,g=b[C-1];return k!=null&&C>=_.length?M=x.entries():(M=[],x.each(function(P,T){M.push({key:T,values:L(P,C)})})),g!=null?M.sort(function(P,T){return g(P.key,T.key)}):M}return E={object:function(x){return S(x,0,a,o)},map:function(x){return S(x,0,s,l)},entries:function(x){return L(S(x,0,s,l),0)},key:function(x){return _.push(x),E},sortKeys:function(x){return b[_.length-1]=x,E},sortValues:function(x){return p=x,E},rollup:function(x){return k=x,E}}}function a(){return{}}function o(_,b,p){_[b]=p}function s(){return n()}function l(_,b,p){_.set(b,p)}function u(){}var c=n.prototype;u.prototype=f.prototype={constructor:u,has:c.has,add:function(_){return _+="",this[t+_]=_,this},remove:c.remove,clear:c.clear,values:c.keys,size:c.size,empty:c.empty,each:c.each};function f(_,b){var p=new u;if(_ instanceof u)_.each(function(S){p.add(S)});else if(_){var k=-1,E=_.length;if(b==null)for(;++k<E;)p.add(_[k]);else for(;++k<E;)p.add(b(_[k],k,_))}return p}function h(_){var b=[];for(var p in _)b.push(p);return b}function d(_){var b=[];for(var p in _)b.push(_[p]);return b}function v(_){var b=[];for(var p in _)b.push({key:p,value:_[p]});return b}e.nest=i,e.set=f,e.map=n,e.keys=h,e.values=d,e.entries=v,Object.defineProperty(e,"__esModule",{value:!0})})});var RWe=ye((x7,IWe)=>{(function(e,t){typeof x7=="object"&&typeof IWe!="undefined"?t(x7):(e=e||self,t(e.d3=e.d3||{}))})(x7,function(e){"use strict";var t={value:function(){}};function r(){for(var s=0,l=arguments.length,u={},c;s<l;++s){if(!(c=arguments[s]+"")||c in u||/[\s.]/.test(c))throw new Error("illegal type: "+c);u[c]=[]}return new n(u)}function n(s){this._=s}function i(s,l){return s.trim().split(/^|\s+/).map(function(u){var c="",f=u.indexOf(".");if(f>=0&&(c=u.slice(f+1),u=u.slice(0,f)),u&&!l.hasOwnProperty(u))throw new Error("unknown type: "+u);return{type:u,name:c}})}n.prototype=r.prototype={constructor:n,on:function(s,l){var u=this._,c=i(s+"",u),f,h=-1,d=c.length;if(arguments.length<2){for(;++h<d;)if((f=(s=c[h]).type)&&(f=a(u[f],s.name)))return f;return}if(l!=null&&typeof l!="function")throw new Error("invalid callback: "+l);for(;++h<d;)if(f=(s=c[h]).type)u[f]=o(u[f],s.name,l);else if(l==null)for(f in u)u[f]=o(u[f],s.name,null);return this},copy:function(){var s={},l=this._;for(var u in l)s[u]=l[u].slice();return new n(s)},call:function(s,l){if((f=arguments.length-2)>0)for(var u=new Array(f),c=0,f,h;c<f;++c)u[c]=arguments[c+2];if(!this._.hasOwnProperty(s))throw new Error("unknown type: "+s);for(h=this._[s],c=0,f=h.length;c<f;++c)h[c].value.apply(l,u)},apply:function(s,l,u){if(!this._.hasOwnProperty(s))throw new Error("unknown type: "+s);for(var c=this._[s],f=0,h=c.length;f<h;++f)c[f].value.apply(l,u)}};function a(s,l){for(var u=0,c=s.length,f;u<c;++u)if((f=s[u]).name===l)return f.value}function o(s,l,u){for(var c=0,f=s.length;c<f;++c)if(s[c].name===l){s[c]=t,s=s.slice(0,c).concat(s.slice(c+1));break}return u!=null&&s.push({name:l,value:u}),s}e.dispatch=r,Object.defineProperty(e,"__esModule",{value:!0})})});var FWe=ye((b7,DWe)=>{(function(e,t){typeof b7=="object"&&typeof DWe!="undefined"?t(b7):(e=e||self,t(e.d3=e.d3||{}))})(b7,function(e){"use strict";var t=0,r=0,n=0,i=1e3,a,o,s=0,l=0,u=0,c=typeof performance=="object"&&performance.now?performance:Date,f=typeof window=="object"&&window.requestAnimationFrame?window.requestAnimationFrame.bind(window):function(C){setTimeout(C,17)};function h(){return l||(f(d),l=c.now()+u)}function d(){l=0}function v(){this._call=this._time=this._next=null}v.prototype=_.prototype={constructor:v,restart:function(C,M,g){if(typeof C!="function")throw new TypeError("callback is not a function");g=(g==null?h():+g)+(M==null?0:+M),!this._next&&o!==this&&(o?o._next=this:a=this,o=this),this._call=C,this._time=g,S()},stop:function(){this._call&&(this._call=null,this._time=1/0,S())}};function _(C,M,g){var P=new v;return P.restart(C,M,g),P}function b(){h(),++t;for(var C=a,M;C;)(M=l-C._time)>=0&&C._call.call(null,M),C=C._next;--t}function p(){l=(s=c.now())+u,t=r=0;try{b()}finally{t=0,E(),l=0}}function k(){var C=c.now(),M=C-s;M>i&&(u-=M,s=C)}function E(){for(var C,M=a,g,P=1/0;M;)M._call?(P>M._time&&(P=M._time),C=M,M=M._next):(g=M._next,M._next=null,M=C?C._next=g:a=g);o=C,S(P)}function S(C){if(!t){r&&(r=clearTimeout(r));var M=C-l;M>24?(C<1/0&&(r=setTimeout(p,C-c.now()-u)),n&&(n=clearInterval(n))):(n||(s=c.now(),n=setInterval(k,i)),t=1,f(p))}}function L(C,M,g){var P=new v;return M=M==null?0:+M,P.restart(function(T){P.stop(),C(T+M)},M,g),P}function x(C,M,g){var P=new v,T=M;return M==null?(P.restart(C,M,g),P):(M=+M,g=g==null?h():+g,P.restart(function z(O){O+=T,P.restart(z,T+=M,g),C(O)},M,g),P)}e.interval=x,e.now=h,e.timeout=L,e.timer=_,e.timerFlush=b,Object.defineProperty(e,"__esModule",{value:!0})})});var OWe=ye((w7,zWe)=>{(function(e,t){typeof w7=="object"&&typeof zWe!="undefined"?t(w7,LWe(),_7(),RWe(),FWe()):t(e.d3=e.d3||{},e.d3,e.d3,e.d3,e.d3)})(w7,function(e,t,r,n,i){"use strict";function a(C,M){var g;C==null&&(C=0),M==null&&(M=0);function P(){var T,z=g.length,O,V=0,G=0;for(T=0;T<z;++T)O=g[T],V+=O.x,G+=O.y;for(V=V/z-C,G=G/z-M,T=0;T<z;++T)O=g[T],O.x-=V,O.y-=G}return P.initialize=function(T){g=T},P.x=function(T){return arguments.length?(C=+T,P):C},P.y=function(T){return arguments.length?(M=+T,P):M},P}function o(C){return function(){return C}}function s(){return(Math.random()-.5)*1e-6}function l(C){return C.x+C.vx}function u(C){return C.y+C.vy}function c(C){var M,g,P=1,T=1;typeof C!="function"&&(C=o(C==null?1:+C));function z(){for(var G,Z=M.length,j,N,H,te,oe,_e,Ee=0;Ee<T;++Ee)for(j=t.quadtree(M,l,u).visitAfter(O),G=0;G<Z;++G)N=M[G],oe=g[N.index],_e=oe*oe,H=N.x+N.vx,te=N.y+N.vy,j.visit(Ce);function Ce(me,ie,Se,Le,Ae){var Fe=me.data,Pe=me.r,ge=oe+Pe;if(Fe){if(Fe.index>N.index){var Re=H-Fe.x-Fe.vx,ce=te-Fe.y-Fe.vy,Ze=Re*Re+ce*ce;Ze<ge*ge&&(Re===0&&(Re=s(),Ze+=Re*Re),ce===0&&(ce=s(),Ze+=ce*ce),Ze=(ge-(Ze=Math.sqrt(Ze)))/Ze*P,N.vx+=(Re*=Ze)*(ge=(Pe*=Pe)/(_e+Pe)),N.vy+=(ce*=Ze)*ge,Fe.vx-=Re*(ge=1-ge),Fe.vy-=ce*ge)}return}return ie>H+ge||Le<H-ge||Se>te+ge||Ae<te-ge}}function O(G){if(G.data)return G.r=g[G.data.index];for(var Z=G.r=0;Z<4;++Z)G[Z]&&G[Z].r>G.r&&(G.r=G[Z].r)}function V(){if(M){var G,Z=M.length,j;for(g=new Array(Z),G=0;G<Z;++G)j=M[G],g[j.index]=+C(j,G,M)}}return z.initialize=function(G){M=G,V()},z.iterations=function(G){return arguments.length?(T=+G,z):T},z.strength=function(G){return arguments.length?(P=+G,z):P},z.radius=function(G){return arguments.length?(C=typeof G=="function"?G:o(+G),V(),z):C},z}function f(C){return C.index}function h(C,M){var g=C.get(M);if(!g)throw new Error("missing: "+M);return g}function d(C){var M=f,g=j,P,T=o(30),z,O,V,G,Z=1;C==null&&(C=[]);function j(_e){return 1/Math.min(V[_e.source.index],V[_e.target.index])}function N(_e){for(var Ee=0,Ce=C.length;Ee<Z;++Ee)for(var me=0,ie,Se,Le,Ae,Fe,Pe,ge;me<Ce;++me)ie=C[me],Se=ie.source,Le=ie.target,Ae=Le.x+Le.vx-Se.x-Se.vx||s(),Fe=Le.y+Le.vy-Se.y-Se.vy||s(),Pe=Math.sqrt(Ae*Ae+Fe*Fe),Pe=(Pe-z[me])/Pe*_e*P[me],Ae*=Pe,Fe*=Pe,Le.vx-=Ae*(ge=G[me]),Le.vy-=Fe*ge,Se.vx+=Ae*(ge=1-ge),Se.vy+=Fe*ge}function H(){if(O){var _e,Ee=O.length,Ce=C.length,me=r.map(O,M),ie;for(_e=0,V=new Array(Ee);_e<Ce;++_e)ie=C[_e],ie.index=_e,typeof ie.source!="object"&&(ie.source=h(me,ie.source)),typeof ie.target!="object"&&(ie.target=h(me,ie.target)),V[ie.source.index]=(V[ie.source.index]||0)+1,V[ie.target.index]=(V[ie.target.index]||0)+1;for(_e=0,G=new Array(Ce);_e<Ce;++_e)ie=C[_e],G[_e]=V[ie.source.index]/(V[ie.source.index]+V[ie.target.index]);P=new Array(Ce),te(),z=new Array(Ce),oe()}}function te(){if(O)for(var _e=0,Ee=C.length;_e<Ee;++_e)P[_e]=+g(C[_e],_e,C)}function oe(){if(O)for(var _e=0,Ee=C.length;_e<Ee;++_e)z[_e]=+T(C[_e],_e,C)}return N.initialize=function(_e){O=_e,H()},N.links=function(_e){return arguments.length?(C=_e,H(),N):C},N.id=function(_e){return arguments.length?(M=_e,N):M},N.iterations=function(_e){return arguments.length?(Z=+_e,N):Z},N.strength=function(_e){return arguments.length?(g=typeof _e=="function"?_e:o(+_e),te(),N):g},N.distance=function(_e){return arguments.length?(T=typeof _e=="function"?_e:o(+_e),oe(),N):T},N}function v(C){return C.x}function _(C){return C.y}var b=10,p=Math.PI*(3-Math.sqrt(5));function k(C){var M,g=1,P=.001,T=1-Math.pow(P,1/300),z=0,O=.6,V=r.map(),G=i.timer(j),Z=n.dispatch("tick","end");C==null&&(C=[]);function j(){N(),Z.call("tick",M),g<P&&(G.stop(),Z.call("end",M))}function N(oe){var _e,Ee=C.length,Ce;oe===void 0&&(oe=1);for(var me=0;me<oe;++me)for(g+=(z-g)*T,V.each(function(ie){ie(g)}),_e=0;_e<Ee;++_e)Ce=C[_e],Ce.fx==null?Ce.x+=Ce.vx*=O:(Ce.x=Ce.fx,Ce.vx=0),Ce.fy==null?Ce.y+=Ce.vy*=O:(Ce.y=Ce.fy,Ce.vy=0);return M}function H(){for(var oe=0,_e=C.length,Ee;oe<_e;++oe){if(Ee=C[oe],Ee.index=oe,Ee.fx!=null&&(Ee.x=Ee.fx),Ee.fy!=null&&(Ee.y=Ee.fy),isNaN(Ee.x)||isNaN(Ee.y)){var Ce=b*Math.sqrt(oe),me=oe*p;Ee.x=Ce*Math.cos(me),Ee.y=Ce*Math.sin(me)}(isNaN(Ee.vx)||isNaN(Ee.vy))&&(Ee.vx=Ee.vy=0)}}function te(oe){return oe.initialize&&oe.initialize(C),oe}return H(),M={tick:N,restart:function(){return G.restart(j),M},stop:function(){return G.stop(),M},nodes:function(oe){return arguments.length?(C=oe,H(),V.each(te),M):C},alpha:function(oe){return arguments.length?(g=+oe,M):g},alphaMin:function(oe){return arguments.length?(P=+oe,M):P},alphaDecay:function(oe){return arguments.length?(T=+oe,M):+T},alphaTarget:function(oe){return arguments.length?(z=+oe,M):z},velocityDecay:function(oe){return arguments.length?(O=1-oe,M):1-O},force:function(oe,_e){return arguments.length>1?(_e==null?V.remove(oe):V.set(oe,te(_e)),M):V.get(oe)},find:function(oe,_e,Ee){var Ce=0,me=C.length,ie,Se,Le,Ae,Fe;for(Ee==null?Ee=1/0:Ee*=Ee,Ce=0;Ce<me;++Ce)Ae=C[Ce],ie=oe-Ae.x,Se=_e-Ae.y,Le=ie*ie+Se*Se,Le<Ee&&(Fe=Ae,Ee=Le);return Fe},on:function(oe,_e){return arguments.length>1?(Z.on(oe,_e),M):Z.on(oe)}}}function E(){var C,M,g,P=o(-30),T,z=1,O=1/0,V=.81;function G(H){var te,oe=C.length,_e=t.quadtree(C,v,_).visitAfter(j);for(g=H,te=0;te<oe;++te)M=C[te],_e.visit(N)}function Z(){if(C){var H,te=C.length,oe;for(T=new Array(te),H=0;H<te;++H)oe=C[H],T[oe.index]=+P(oe,H,C)}}function j(H){var te=0,oe,_e,Ee=0,Ce,me,ie;if(H.length){for(Ce=me=ie=0;ie<4;++ie)(oe=H[ie])&&(_e=Math.abs(oe.value))&&(te+=oe.value,Ee+=_e,Ce+=_e*oe.x,me+=_e*oe.y);H.x=Ce/Ee,H.y=me/Ee}else{oe=H,oe.x=oe.data.x,oe.y=oe.data.y;do te+=T[oe.data.index];while(oe=oe.next)}H.value=te}function N(H,te,oe,_e){if(!H.value)return!0;var Ee=H.x-M.x,Ce=H.y-M.y,me=_e-te,ie=Ee*Ee+Ce*Ce;if(me*me/V<ie)return ie<O&&(Ee===0&&(Ee=s(),ie+=Ee*Ee),Ce===0&&(Ce=s(),ie+=Ce*Ce),ie<z&&(ie=Math.sqrt(z*ie)),M.vx+=Ee*H.value*g/ie,M.vy+=Ce*H.value*g/ie),!0;if(H.length||ie>=O)return;(H.data!==M||H.next)&&(Ee===0&&(Ee=s(),ie+=Ee*Ee),Ce===0&&(Ce=s(),ie+=Ce*Ce),ie<z&&(ie=Math.sqrt(z*ie)));do H.data!==M&&(me=T[H.data.index]*g/ie,M.vx+=Ee*me,M.vy+=Ce*me);while(H=H.next)}return G.initialize=function(H){C=H,Z()},G.strength=function(H){return arguments.length?(P=typeof H=="function"?H:o(+H),Z(),G):P},G.distanceMin=function(H){return arguments.length?(z=H*H,G):Math.sqrt(z)},G.distanceMax=function(H){return arguments.length?(O=H*H,G):Math.sqrt(O)},G.theta=function(H){return arguments.length?(V=H*H,G):Math.sqrt(V)},G}function S(C,M,g){var P,T=o(.1),z,O;typeof C!="function"&&(C=o(+C)),M==null&&(M=0),g==null&&(g=0);function V(Z){for(var j=0,N=P.length;j<N;++j){var H=P[j],te=H.x-M||1e-6,oe=H.y-g||1e-6,_e=Math.sqrt(te*te+oe*oe),Ee=(O[j]-_e)*z[j]*Z/_e;H.vx+=te*Ee,H.vy+=oe*Ee}}function G(){if(P){var Z,j=P.length;for(z=new Array(j),O=new Array(j),Z=0;Z<j;++Z)O[Z]=+C(P[Z],Z,P),z[Z]=isNaN(O[Z])?0:+T(P[Z],Z,P)}}return V.initialize=function(Z){P=Z,G()},V.strength=function(Z){return arguments.length?(T=typeof Z=="function"?Z:o(+Z),G(),V):T},V.radius=function(Z){return arguments.length?(C=typeof Z=="function"?Z:o(+Z),G(),V):C},V.x=function(Z){return arguments.length?(M=+Z,V):M},V.y=function(Z){return arguments.length?(g=+Z,V):g},V}function L(C){var M=o(.1),g,P,T;typeof C!="function"&&(C=o(C==null?0:+C));function z(V){for(var G=0,Z=g.length,j;G<Z;++G)j=g[G],j.vx+=(T[G]-j.x)*P[G]*V}function O(){if(g){var V,G=g.length;for(P=new Array(G),T=new Array(G),V=0;V<G;++V)P[V]=isNaN(T[V]=+C(g[V],V,g))?0:+M(g[V],V,g)}}return z.initialize=function(V){g=V,O()},z.strength=function(V){return arguments.length?(M=typeof V=="function"?V:o(+V),O(),z):M},z.x=function(V){return arguments.length?(C=typeof V=="function"?V:o(+V),O(),z):C},z}function x(C){var M=o(.1),g,P,T;typeof C!="function"&&(C=o(C==null?0:+C));function z(V){for(var G=0,Z=g.length,j;G<Z;++G)j=g[G],j.vy+=(T[G]-j.y)*P[G]*V}function O(){if(g){var V,G=g.length;for(P=new Array(G),T=new Array(G),V=0;V<G;++V)P[V]=isNaN(T[V]=+C(g[V],V,g))?0:+M(g[V],V,g)}}return z.initialize=function(V){g=V,O()},z.strength=function(V){return arguments.length?(M=typeof V=="function"?V:o(+V),O(),z):M},z.y=function(V){return arguments.length?(C=typeof V=="function"?V:o(+V),O(),z):C},z}e.forceCenter=a,e.forceCollide=c,e.forceLink=d,e.forceManyBody=E,e.forceRadial=S,e.forceSimulation=k,e.forceX=L,e.forceY=x,Object.defineProperty(e,"__esModule",{value:!0})})});var BWe=ye((T7,qWe)=>{(function(e,t){typeof T7=="object"&&typeof qWe!="undefined"?t(T7):(e=e||self,t(e.d3=e.d3||{}))})(T7,function(e){"use strict";var t=Math.PI,r=2*t,n=1e-6,i=r-n;function a(){this._x0=this._y0=this._x1=this._y1=null,this._=""}function o(){return new a}a.prototype=o.prototype={constructor:a,moveTo:function(s,l){this._+="M"+(this._x0=this._x1=+s)+","+(this._y0=this._y1=+l)},closePath:function(){this._x1!==null&&(this._x1=this._x0,this._y1=this._y0,this._+="Z")},lineTo:function(s,l){this._+="L"+(this._x1=+s)+","+(this._y1=+l)},quadraticCurveTo:function(s,l,u,c){this._+="Q"+ +s+","+ +l+","+(this._x1=+u)+","+(this._y1=+c)},bezierCurveTo:function(s,l,u,c,f,h){this._+="C"+ +s+","+ +l+","+ +u+","+ +c+","+(this._x1=+f)+","+(this._y1=+h)},arcTo:function(s,l,u,c,f){s=+s,l=+l,u=+u,c=+c,f=+f;var h=this._x1,d=this._y1,v=u-s,_=c-l,b=h-s,p=d-l,k=b*b+p*p;if(f<0)throw new Error("negative radius: "+f);if(this._x1===null)this._+="M"+(this._x1=s)+","+(this._y1=l);else if(k>n)if(!(Math.abs(p*v-_*b)>n)||!f)this._+="L"+(this._x1=s)+","+(this._y1=l);else{var E=u-h,S=c-d,L=v*v+_*_,x=E*E+S*S,C=Math.sqrt(L),M=Math.sqrt(k),g=f*Math.tan((t-Math.acos((L+k-x)/(2*C*M)))/2),P=g/M,T=g/C;Math.abs(P-1)>n&&(this._+="L"+(s+P*b)+","+(l+P*p)),this._+="A"+f+","+f+",0,0,"+ +(p*E>b*S)+","+(this._x1=s+T*v)+","+(this._y1=l+T*_)}},arc:function(s,l,u,c,f,h){s=+s,l=+l,u=+u,h=!!h;var d=u*Math.cos(c),v=u*Math.sin(c),_=s+d,b=l+v,p=1^h,k=h?c-f:f-c;if(u<0)throw new Error("negative radius: "+u);this._x1===null?this._+="M"+_+","+b:(Math.abs(this._x1-_)>n||Math.abs(this._y1-b)>n)&&(this._+="L"+_+","+b),u&&(k<0&&(k=k%r+r),k>i?this._+="A"+u+","+u+",0,1,"+p+","+(s-d)+","+(l-v)+"A"+u+","+u+",0,1,"+p+","+(this._x1=_)+","+(this._y1=b):k>n&&(this._+="A"+u+","+u+",0,"+ +(k>=t)+","+p+","+(this._x1=s+u*Math.cos(f))+","+(this._y1=l+u*Math.sin(f))))},rect:function(s,l,u,c){this._+="M"+(this._x0=this._x1=+s)+","+(this._y0=this._y1=+l)+"h"+ +u+"v"+ +c+"h"+-u+"Z"},toString:function(){return this._}},e.path=o,Object.defineProperty(e,"__esModule",{value:!0})})});var jJ=ye((A7,NWe)=>{(function(e,t){typeof A7=="object"&&typeof NWe!="undefined"?t(A7,BWe()):(e=e||self,t(e.d3=e.d3||{},e.d3))})(A7,function(e,t){"use strict";function r(Mt){return function(){return Mt}}var n=Math.abs,i=Math.atan2,a=Math.cos,o=Math.max,s=Math.min,l=Math.sin,u=Math.sqrt,c=1e-12,f=Math.PI,h=f/2,d=2*f;function v(Mt){return Mt>1?0:Mt<-1?f:Math.acos(Mt)}function _(Mt){return Mt>=1?h:Mt<=-1?-h:Math.asin(Mt)}function b(Mt){return Mt.innerRadius}function p(Mt){return Mt.outerRadius}function k(Mt){return Mt.startAngle}function E(Mt){return Mt.endAngle}function S(Mt){return Mt&&Mt.padAngle}function L(Mt,kr,Jr,vi,hn,An,Mn,Li){var _n=Jr-Mt,ya=vi-kr,Jn=Mn-hn,Ma=Li-An,_o=Ma*_n-Jn*ya;if(!(_o*_o<c))return _o=(Jn*(kr-An)-Ma*(Mt-hn))/_o,[Mt+_o*_n,kr+_o*ya]}function x(Mt,kr,Jr,vi,hn,An,Mn){var Li=Mt-Jr,_n=kr-vi,ya=(Mn?An:-An)/u(Li*Li+_n*_n),Jn=ya*_n,Ma=-ya*Li,_o=Mt+Jn,No=kr+Ma,po=Jr+Jn,Lo=vi+Ma,ko=(_o+po)/2,Ds=(No+Lo)/2,Fs=po-_o,ll=Lo-No,ul=Fs*Fs+ll*ll,zl=hn-An,us=_o*Lo-po*No,il=(ll<0?-1:1)*u(o(0,zl*zl*ul-us*us)),As=(us*ll-Fs*il)/ul,cl=(-us*Fs-ll*il)/ul,Ks=(us*ll+Fs*il)/ul,zs=(-us*Fs+ll*il)/ul,Io=As-ko,ls=cl-Ds,Zl=Ks-ko,Su=zs-Ds;return Io*Io+ls*ls>Zl*Zl+Su*Su&&(As=Ks,cl=zs),{cx:As,cy:cl,x01:-Jn,y01:-Ma,x11:As*(hn/zl-1),y11:cl*(hn/zl-1)}}function C(){var Mt=b,kr=p,Jr=r(0),vi=null,hn=k,An=E,Mn=S,Li=null;function _n(){var ya,Jn,Ma=+Mt.apply(this,arguments),_o=+kr.apply(this,arguments),No=hn.apply(this,arguments)-h,po=An.apply(this,arguments)-h,Lo=n(po-No),ko=po>No;if(Li||(Li=ya=t.path()),_o<Ma&&(Jn=_o,_o=Ma,Ma=Jn),!(_o>c))Li.moveTo(0,0);else if(Lo>d-c)Li.moveTo(_o*a(No),_o*l(No)),Li.arc(0,0,_o,No,po,!ko),Ma>c&&(Li.moveTo(Ma*a(po),Ma*l(po)),Li.arc(0,0,Ma,po,No,ko));else{var Ds=No,Fs=po,ll=No,ul=po,zl=Lo,us=Lo,il=Mn.apply(this,arguments)/2,As=il>c&&(vi?+vi.apply(this,arguments):u(Ma*Ma+_o*_o)),cl=s(n(_o-Ma)/2,+Jr.apply(this,arguments)),Ks=cl,zs=cl,Io,ls;if(As>c){var Zl=_(As/Ma*l(il)),Su=_(As/_o*l(il));(zl-=Zl*2)>c?(Zl*=ko?1:-1,ll+=Zl,ul-=Zl):(zl=0,ll=ul=(No+po)/2),(us-=Su*2)>c?(Su*=ko?1:-1,Ds+=Su,Fs-=Su):(us=0,Ds=Fs=(No+po)/2)}var nc=_o*a(Ds),bs=_o*l(Ds),Rn=Ma*a(ul),_a=Ma*l(ul);if(cl>c){var Vu=_o*a(Fs),Ol=_o*l(Fs),xo=Ma*a(ll),Yl=Ma*l(ll),Ns;if(Lo<f&&(Ns=L(nc,bs,xo,Yl,Vu,Ol,Rn,_a))){var Hl=nc-Ns[0],ac=bs-Ns[1],aa=Vu-Ns[0],Oo=Ol-Ns[1],qo=1/l(v((Hl*aa+ac*Oo)/(u(Hl*Hl+ac*ac)*u(aa*aa+Oo*Oo)))/2),ql=u(Ns[0]*Ns[0]+Ns[1]*Ns[1]);Ks=s(cl,(Ma-ql)/(qo-1)),zs=s(cl,(_o-ql)/(qo+1))}}us>c?zs>c?(Io=x(xo,Yl,nc,bs,_o,zs,ko),ls=x(Vu,Ol,Rn,_a,_o,zs,ko),Li.moveTo(Io.cx+Io.x01,Io.cy+Io.y01),zs<cl?Li.arc(Io.cx,Io.cy,zs,i(Io.y01,Io.x01),i(ls.y01,ls.x01),!ko):(Li.arc(Io.cx,Io.cy,zs,i(Io.y01,Io.x01),i(Io.y11,Io.x11),!ko),Li.arc(0,0,_o,i(Io.cy+Io.y11,Io.cx+Io.x11),i(ls.cy+ls.y11,ls.cx+ls.x11),!ko),Li.arc(ls.cx,ls.cy,zs,i(ls.y11,ls.x11),i(ls.y01,ls.x01),!ko))):(Li.moveTo(nc,bs),Li.arc(0,0,_o,Ds,Fs,!ko)):Li.moveTo(nc,bs),!(Ma>c)||!(zl>c)?Li.lineTo(Rn,_a):Ks>c?(Io=x(Rn,_a,Vu,Ol,Ma,-Ks,ko),ls=x(nc,bs,xo,Yl,Ma,-Ks,ko),Li.lineTo(Io.cx+Io.x01,Io.cy+Io.y01),Ks<cl?Li.arc(Io.cx,Io.cy,Ks,i(Io.y01,Io.x01),i(ls.y01,ls.x01),!ko):(Li.arc(Io.cx,Io.cy,Ks,i(Io.y01,Io.x01),i(Io.y11,Io.x11),!ko),Li.arc(0,0,Ma,i(Io.cy+Io.y11,Io.cx+Io.x11),i(ls.cy+ls.y11,ls.cx+ls.x11),ko),Li.arc(ls.cx,ls.cy,Ks,i(ls.y11,ls.x11),i(ls.y01,ls.x01),!ko))):Li.arc(0,0,Ma,ul,ll,ko)}if(Li.closePath(),ya)return Li=null,ya+""||null}return _n.centroid=function(){var ya=(+Mt.apply(this,arguments)+ +kr.apply(this,arguments))/2,Jn=(+hn.apply(this,arguments)+ +An.apply(this,arguments))/2-f/2;return[a(Jn)*ya,l(Jn)*ya]},_n.innerRadius=function(ya){return arguments.length?(Mt=typeof ya=="function"?ya:r(+ya),_n):Mt},_n.outerRadius=function(ya){return arguments.length?(kr=typeof ya=="function"?ya:r(+ya),_n):kr},_n.cornerRadius=function(ya){return arguments.length?(Jr=typeof ya=="function"?ya:r(+ya),_n):Jr},_n.padRadius=function(ya){return arguments.length?(vi=ya==null?null:typeof ya=="function"?ya:r(+ya),_n):vi},_n.startAngle=function(ya){return arguments.length?(hn=typeof ya=="function"?ya:r(+ya),_n):hn},_n.endAngle=function(ya){return arguments.length?(An=typeof ya=="function"?ya:r(+ya),_n):An},_n.padAngle=function(ya){return arguments.length?(Mn=typeof ya=="function"?ya:r(+ya),_n):Mn},_n.context=function(ya){return arguments.length?(Li=ya==null?null:ya,_n):Li},_n}function M(Mt){this._context=Mt}M.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2;default:this._context.lineTo(Mt,kr);break}}};function g(Mt){return new M(Mt)}function P(Mt){return Mt[0]}function T(Mt){return Mt[1]}function z(){var Mt=P,kr=T,Jr=r(!0),vi=null,hn=g,An=null;function Mn(Li){var _n,ya=Li.length,Jn,Ma=!1,_o;for(vi==null&&(An=hn(_o=t.path())),_n=0;_n<=ya;++_n)!(_n<ya&&Jr(Jn=Li[_n],_n,Li))===Ma&&((Ma=!Ma)?An.lineStart():An.lineEnd()),Ma&&An.point(+Mt(Jn,_n,Li),+kr(Jn,_n,Li));if(_o)return An=null,_o+""||null}return Mn.x=function(Li){return arguments.length?(Mt=typeof Li=="function"?Li:r(+Li),Mn):Mt},Mn.y=function(Li){return arguments.length?(kr=typeof Li=="function"?Li:r(+Li),Mn):kr},Mn.defined=function(Li){return arguments.length?(Jr=typeof Li=="function"?Li:r(!!Li),Mn):Jr},Mn.curve=function(Li){return arguments.length?(hn=Li,vi!=null&&(An=hn(vi)),Mn):hn},Mn.context=function(Li){return arguments.length?(Li==null?vi=An=null:An=hn(vi=Li),Mn):vi},Mn}function O(){var Mt=P,kr=null,Jr=r(0),vi=T,hn=r(!0),An=null,Mn=g,Li=null;function _n(Jn){var Ma,_o,No,po=Jn.length,Lo,ko=!1,Ds,Fs=new Array(po),ll=new Array(po);for(An==null&&(Li=Mn(Ds=t.path())),Ma=0;Ma<=po;++Ma){if(!(Ma<po&&hn(Lo=Jn[Ma],Ma,Jn))===ko)if(ko=!ko)_o=Ma,Li.areaStart(),Li.lineStart();else{for(Li.lineEnd(),Li.lineStart(),No=Ma-1;No>=_o;--No)Li.point(Fs[No],ll[No]);Li.lineEnd(),Li.areaEnd()}ko&&(Fs[Ma]=+Mt(Lo,Ma,Jn),ll[Ma]=+Jr(Lo,Ma,Jn),Li.point(kr?+kr(Lo,Ma,Jn):Fs[Ma],vi?+vi(Lo,Ma,Jn):ll[Ma]))}if(Ds)return Li=null,Ds+""||null}function ya(){return z().defined(hn).curve(Mn).context(An)}return _n.x=function(Jn){return arguments.length?(Mt=typeof Jn=="function"?Jn:r(+Jn),kr=null,_n):Mt},_n.x0=function(Jn){return arguments.length?(Mt=typeof Jn=="function"?Jn:r(+Jn),_n):Mt},_n.x1=function(Jn){return arguments.length?(kr=Jn==null?null:typeof Jn=="function"?Jn:r(+Jn),_n):kr},_n.y=function(Jn){return arguments.length?(Jr=typeof Jn=="function"?Jn:r(+Jn),vi=null,_n):Jr},_n.y0=function(Jn){return arguments.length?(Jr=typeof Jn=="function"?Jn:r(+Jn),_n):Jr},_n.y1=function(Jn){return arguments.length?(vi=Jn==null?null:typeof Jn=="function"?Jn:r(+Jn),_n):vi},_n.lineX0=_n.lineY0=function(){return ya().x(Mt).y(Jr)},_n.lineY1=function(){return ya().x(Mt).y(vi)},_n.lineX1=function(){return ya().x(kr).y(Jr)},_n.defined=function(Jn){return arguments.length?(hn=typeof Jn=="function"?Jn:r(!!Jn),_n):hn},_n.curve=function(Jn){return arguments.length?(Mn=Jn,An!=null&&(Li=Mn(An)),_n):Mn},_n.context=function(Jn){return arguments.length?(Jn==null?An=Li=null:Li=Mn(An=Jn),_n):An},_n}function V(Mt,kr){return kr<Mt?-1:kr>Mt?1:kr>=Mt?0:NaN}function G(Mt){return Mt}function Z(){var Mt=G,kr=V,Jr=null,vi=r(0),hn=r(d),An=r(0);function Mn(Li){var _n,ya=Li.length,Jn,Ma,_o=0,No=new Array(ya),po=new Array(ya),Lo=+vi.apply(this,arguments),ko=Math.min(d,Math.max(-d,hn.apply(this,arguments)-Lo)),Ds,Fs=Math.min(Math.abs(ko)/ya,An.apply(this,arguments)),ll=Fs*(ko<0?-1:1),ul;for(_n=0;_n<ya;++_n)(ul=po[No[_n]=_n]=+Mt(Li[_n],_n,Li))>0&&(_o+=ul);for(kr!=null?No.sort(function(zl,us){return kr(po[zl],po[us])}):Jr!=null&&No.sort(function(zl,us){return Jr(Li[zl],Li[us])}),_n=0,Ma=_o?(ko-ya*ll)/_o:0;_n<ya;++_n,Lo=Ds)Jn=No[_n],ul=po[Jn],Ds=Lo+(ul>0?ul*Ma:0)+ll,po[Jn]={data:Li[Jn],index:_n,value:ul,startAngle:Lo,endAngle:Ds,padAngle:Fs};return po}return Mn.value=function(Li){return arguments.length?(Mt=typeof Li=="function"?Li:r(+Li),Mn):Mt},Mn.sortValues=function(Li){return arguments.length?(kr=Li,Jr=null,Mn):kr},Mn.sort=function(Li){return arguments.length?(Jr=Li,kr=null,Mn):Jr},Mn.startAngle=function(Li){return arguments.length?(vi=typeof Li=="function"?Li:r(+Li),Mn):vi},Mn.endAngle=function(Li){return arguments.length?(hn=typeof Li=="function"?Li:r(+Li),Mn):hn},Mn.padAngle=function(Li){return arguments.length?(An=typeof Li=="function"?Li:r(+Li),Mn):An},Mn}var j=H(g);function N(Mt){this._curve=Mt}N.prototype={areaStart:function(){this._curve.areaStart()},areaEnd:function(){this._curve.areaEnd()},lineStart:function(){this._curve.lineStart()},lineEnd:function(){this._curve.lineEnd()},point:function(Mt,kr){this._curve.point(kr*Math.sin(Mt),kr*-Math.cos(Mt))}};function H(Mt){function kr(Jr){return new N(Mt(Jr))}return kr._curve=Mt,kr}function te(Mt){var kr=Mt.curve;return Mt.angle=Mt.x,delete Mt.x,Mt.radius=Mt.y,delete Mt.y,Mt.curve=function(Jr){return arguments.length?kr(H(Jr)):kr()._curve},Mt}function oe(){return te(z().curve(j))}function _e(){var Mt=O().curve(j),kr=Mt.curve,Jr=Mt.lineX0,vi=Mt.lineX1,hn=Mt.lineY0,An=Mt.lineY1;return Mt.angle=Mt.x,delete Mt.x,Mt.startAngle=Mt.x0,delete Mt.x0,Mt.endAngle=Mt.x1,delete Mt.x1,Mt.radius=Mt.y,delete Mt.y,Mt.innerRadius=Mt.y0,delete Mt.y0,Mt.outerRadius=Mt.y1,delete Mt.y1,Mt.lineStartAngle=function(){return te(Jr())},delete Mt.lineX0,Mt.lineEndAngle=function(){return te(vi())},delete Mt.lineX1,Mt.lineInnerRadius=function(){return te(hn())},delete Mt.lineY0,Mt.lineOuterRadius=function(){return te(An())},delete Mt.lineY1,Mt.curve=function(Mn){return arguments.length?kr(H(Mn)):kr()._curve},Mt}function Ee(Mt,kr){return[(kr=+kr)*Math.cos(Mt-=Math.PI/2),kr*Math.sin(Mt)]}var Ce=Array.prototype.slice;function me(Mt){return Mt.source}function ie(Mt){return Mt.target}function Se(Mt){var kr=me,Jr=ie,vi=P,hn=T,An=null;function Mn(){var Li,_n=Ce.call(arguments),ya=kr.apply(this,_n),Jn=Jr.apply(this,_n);if(An||(An=Li=t.path()),Mt(An,+vi.apply(this,(_n[0]=ya,_n)),+hn.apply(this,_n),+vi.apply(this,(_n[0]=Jn,_n)),+hn.apply(this,_n)),Li)return An=null,Li+""||null}return Mn.source=function(Li){return arguments.length?(kr=Li,Mn):kr},Mn.target=function(Li){return arguments.length?(Jr=Li,Mn):Jr},Mn.x=function(Li){return arguments.length?(vi=typeof Li=="function"?Li:r(+Li),Mn):vi},Mn.y=function(Li){return arguments.length?(hn=typeof Li=="function"?Li:r(+Li),Mn):hn},Mn.context=function(Li){return arguments.length?(An=Li==null?null:Li,Mn):An},Mn}function Le(Mt,kr,Jr,vi,hn){Mt.moveTo(kr,Jr),Mt.bezierCurveTo(kr=(kr+vi)/2,Jr,kr,hn,vi,hn)}function Ae(Mt,kr,Jr,vi,hn){Mt.moveTo(kr,Jr),Mt.bezierCurveTo(kr,Jr=(Jr+hn)/2,vi,Jr,vi,hn)}function Fe(Mt,kr,Jr,vi,hn){var An=Ee(kr,Jr),Mn=Ee(kr,Jr=(Jr+hn)/2),Li=Ee(vi,Jr),_n=Ee(vi,hn);Mt.moveTo(An[0],An[1]),Mt.bezierCurveTo(Mn[0],Mn[1],Li[0],Li[1],_n[0],_n[1])}function Pe(){return Se(Le)}function ge(){return Se(Ae)}function Re(){var Mt=Se(Fe);return Mt.angle=Mt.x,delete Mt.x,Mt.radius=Mt.y,delete Mt.y,Mt}var ce={draw:function(Mt,kr){var Jr=Math.sqrt(kr/f);Mt.moveTo(Jr,0),Mt.arc(0,0,Jr,0,d)}},Ze={draw:function(Mt,kr){var Jr=Math.sqrt(kr/5)/2;Mt.moveTo(-3*Jr,-Jr),Mt.lineTo(-Jr,-Jr),Mt.lineTo(-Jr,-3*Jr),Mt.lineTo(Jr,-3*Jr),Mt.lineTo(Jr,-Jr),Mt.lineTo(3*Jr,-Jr),Mt.lineTo(3*Jr,Jr),Mt.lineTo(Jr,Jr),Mt.lineTo(Jr,3*Jr),Mt.lineTo(-Jr,3*Jr),Mt.lineTo(-Jr,Jr),Mt.lineTo(-3*Jr,Jr),Mt.closePath()}},ut=Math.sqrt(1/3),pt=ut*2,Zt={draw:function(Mt,kr){var Jr=Math.sqrt(kr/pt),vi=Jr*ut;Mt.moveTo(0,-Jr),Mt.lineTo(vi,0),Mt.lineTo(0,Jr),Mt.lineTo(-vi,0),Mt.closePath()}},st=.8908130915292852,lt=Math.sin(f/10)/Math.sin(7*f/10),Gt=Math.sin(d/10)*lt,Nt=-Math.cos(d/10)*lt,Jt={draw:function(Mt,kr){var Jr=Math.sqrt(kr*st),vi=Gt*Jr,hn=Nt*Jr;Mt.moveTo(0,-Jr),Mt.lineTo(vi,hn);for(var An=1;An<5;++An){var Mn=d*An/5,Li=Math.cos(Mn),_n=Math.sin(Mn);Mt.lineTo(_n*Jr,-Li*Jr),Mt.lineTo(Li*vi-_n*hn,_n*vi+Li*hn)}Mt.closePath()}},sr={draw:function(Mt,kr){var Jr=Math.sqrt(kr),vi=-Jr/2;Mt.rect(vi,vi,Jr,Jr)}},wr=Math.sqrt(3),cr={draw:function(Mt,kr){var Jr=-Math.sqrt(kr/(wr*3));Mt.moveTo(0,Jr*2),Mt.lineTo(-wr*Jr,-Jr),Mt.lineTo(wr*Jr,-Jr),Mt.closePath()}},$e=-.5,St=Math.sqrt(3)/2,Qt=1/Math.sqrt(12),Vt=(Qt/2+1)*3,_t={draw:function(Mt,kr){var Jr=Math.sqrt(kr/Vt),vi=Jr/2,hn=Jr*Qt,An=vi,Mn=Jr*Qt+Jr,Li=-An,_n=Mn;Mt.moveTo(vi,hn),Mt.lineTo(An,Mn),Mt.lineTo(Li,_n),Mt.lineTo($e*vi-St*hn,St*vi+$e*hn),Mt.lineTo($e*An-St*Mn,St*An+$e*Mn),Mt.lineTo($e*Li-St*_n,St*Li+$e*_n),Mt.lineTo($e*vi+St*hn,$e*hn-St*vi),Mt.lineTo($e*An+St*Mn,$e*Mn-St*An),Mt.lineTo($e*Li+St*_n,$e*_n-St*Li),Mt.closePath()}},It=[ce,Ze,Zt,sr,Jt,cr,_t];function mt(){var Mt=r(ce),kr=r(64),Jr=null;function vi(){var hn;if(Jr||(Jr=hn=t.path()),Mt.apply(this,arguments).draw(Jr,+kr.apply(this,arguments)),hn)return Jr=null,hn+""||null}return vi.type=function(hn){return arguments.length?(Mt=typeof hn=="function"?hn:r(hn),vi):Mt},vi.size=function(hn){return arguments.length?(kr=typeof hn=="function"?hn:r(+hn),vi):kr},vi.context=function(hn){return arguments.length?(Jr=hn==null?null:hn,vi):Jr},vi}function er(){}function lr(Mt,kr,Jr){Mt._context.bezierCurveTo((2*Mt._x0+Mt._x1)/3,(2*Mt._y0+Mt._y1)/3,(Mt._x0+2*Mt._x1)/3,(Mt._y0+2*Mt._y1)/3,(Mt._x0+4*Mt._x1+kr)/6,(Mt._y0+4*Mt._y1+Jr)/6)}function Tr(Mt){this._context=Mt}Tr.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){switch(this._point){case 3:lr(this,this._x1,this._y1);case 2:this._context.lineTo(this._x1,this._y1);break}(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2;break;case 2:this._point=3,this._context.lineTo((5*this._x0+this._x1)/6,(5*this._y0+this._y1)/6);default:lr(this,Mt,kr);break}this._x0=this._x1,this._x1=Mt,this._y0=this._y1,this._y1=kr}};function Lr(Mt){return new Tr(Mt)}function ti(Mt){this._context=Mt}ti.prototype={areaStart:er,areaEnd:er,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._y0=this._y1=this._y2=this._y3=this._y4=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:{this._context.moveTo(this._x2,this._y2),this._context.closePath();break}case 2:{this._context.moveTo((this._x2+2*this._x3)/3,(this._y2+2*this._y3)/3),this._context.lineTo((this._x3+2*this._x2)/3,(this._y3+2*this._y2)/3),this._context.closePath();break}case 3:{this.point(this._x2,this._y2),this.point(this._x3,this._y3),this.point(this._x4,this._y4);break}}},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._x2=Mt,this._y2=kr;break;case 1:this._point=2,this._x3=Mt,this._y3=kr;break;case 2:this._point=3,this._x4=Mt,this._y4=kr,this._context.moveTo((this._x0+4*this._x1+Mt)/6,(this._y0+4*this._y1+kr)/6);break;default:lr(this,Mt,kr);break}this._x0=this._x1,this._x1=Mt,this._y0=this._y1,this._y1=kr}};function Br(Mt){return new ti(Mt)}function Vr(Mt){this._context=Mt}Vr.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){(this._line||this._line!==0&&this._point===3)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3;var Jr=(this._x0+4*this._x1+Mt)/6,vi=(this._y0+4*this._y1+kr)/6;this._line?this._context.lineTo(Jr,vi):this._context.moveTo(Jr,vi);break;case 3:this._point=4;default:lr(this,Mt,kr);break}this._x0=this._x1,this._x1=Mt,this._y0=this._y1,this._y1=kr}};function dt(Mt){return new Vr(Mt)}function Ge(Mt,kr){this._basis=new Tr(Mt),this._beta=kr}Ge.prototype={lineStart:function(){this._x=[],this._y=[],this._basis.lineStart()},lineEnd:function(){var Mt=this._x,kr=this._y,Jr=Mt.length-1;if(Jr>0)for(var vi=Mt[0],hn=kr[0],An=Mt[Jr]-vi,Mn=kr[Jr]-hn,Li=-1,_n;++Li<=Jr;)_n=Li/Jr,this._basis.point(this._beta*Mt[Li]+(1-this._beta)*(vi+_n*An),this._beta*kr[Li]+(1-this._beta)*(hn+_n*Mn));this._x=this._y=null,this._basis.lineEnd()},point:function(Mt,kr){this._x.push(+Mt),this._y.push(+kr)}};var Je=function Mt(kr){function Jr(vi){return kr===1?new Tr(vi):new Ge(vi,kr)}return Jr.beta=function(vi){return Mt(+vi)},Jr}(.85);function je(Mt,kr,Jr){Mt._context.bezierCurveTo(Mt._x1+Mt._k*(Mt._x2-Mt._x0),Mt._y1+Mt._k*(Mt._y2-Mt._y0),Mt._x2+Mt._k*(Mt._x1-kr),Mt._y2+Mt._k*(Mt._y1-Jr),Mt._x2,Mt._y2)}function tt(Mt,kr){this._context=Mt,this._k=(1-kr)/6}tt.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:je(this,this._x1,this._y1);break}(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2,this._x1=Mt,this._y1=kr;break;case 2:this._point=3;default:je(this,Mt,kr);break}this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var xt=function Mt(kr){function Jr(vi){return new tt(vi,kr)}return Jr.tension=function(vi){return Mt(+vi)},Jr}(0);function Ie(Mt,kr){this._context=Mt,this._k=(1-kr)/6}Ie.prototype={areaStart:er,areaEnd:er,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:{this._context.moveTo(this._x3,this._y3),this._context.closePath();break}case 2:{this._context.lineTo(this._x3,this._y3),this._context.closePath();break}case 3:{this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5);break}}},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._x3=Mt,this._y3=kr;break;case 1:this._point=2,this._context.moveTo(this._x4=Mt,this._y4=kr);break;case 2:this._point=3,this._x5=Mt,this._y5=kr;break;default:je(this,Mt,kr);break}this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var xe=function Mt(kr){function Jr(vi){return new Ie(vi,kr)}return Jr.tension=function(vi){return Mt(+vi)},Jr}(0);function ke(Mt,kr){this._context=Mt,this._k=(1-kr)/6}ke.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){(this._line||this._line!==0&&this._point===3)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:je(this,Mt,kr);break}this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var vt=function Mt(kr){function Jr(vi){return new ke(vi,kr)}return Jr.tension=function(vi){return Mt(+vi)},Jr}(0);function ir(Mt,kr,Jr){var vi=Mt._x1,hn=Mt._y1,An=Mt._x2,Mn=Mt._y2;if(Mt._l01_a>c){var Li=2*Mt._l01_2a+3*Mt._l01_a*Mt._l12_a+Mt._l12_2a,_n=3*Mt._l01_a*(Mt._l01_a+Mt._l12_a);vi=(vi*Li-Mt._x0*Mt._l12_2a+Mt._x2*Mt._l01_2a)/_n,hn=(hn*Li-Mt._y0*Mt._l12_2a+Mt._y2*Mt._l01_2a)/_n}if(Mt._l23_a>c){var ya=2*Mt._l23_2a+3*Mt._l23_a*Mt._l12_a+Mt._l12_2a,Jn=3*Mt._l23_a*(Mt._l23_a+Mt._l12_a);An=(An*ya+Mt._x1*Mt._l23_2a-kr*Mt._l12_2a)/Jn,Mn=(Mn*ya+Mt._y1*Mt._l23_2a-Jr*Mt._l12_2a)/Jn}Mt._context.bezierCurveTo(vi,hn,An,Mn,Mt._x2,Mt._y2)}function ar(Mt,kr){this._context=Mt,this._alpha=kr}ar.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:this.point(this._x2,this._y2);break}(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){if(Mt=+Mt,kr=+kr,this._point){var Jr=this._x2-Mt,vi=this._y2-kr;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(Jr*Jr+vi*vi,this._alpha))}switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2;break;case 2:this._point=3;default:ir(this,Mt,kr);break}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var vr=function Mt(kr){function Jr(vi){return kr?new ar(vi,kr):new tt(vi,0)}return Jr.alpha=function(vi){return Mt(+vi)},Jr}(.5);function ii(Mt,kr){this._context=Mt,this._alpha=kr}ii.prototype={areaStart:er,areaEnd:er,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 1:{this._context.moveTo(this._x3,this._y3),this._context.closePath();break}case 2:{this._context.lineTo(this._x3,this._y3),this._context.closePath();break}case 3:{this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5);break}}},point:function(Mt,kr){if(Mt=+Mt,kr=+kr,this._point){var Jr=this._x2-Mt,vi=this._y2-kr;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(Jr*Jr+vi*vi,this._alpha))}switch(this._point){case 0:this._point=1,this._x3=Mt,this._y3=kr;break;case 1:this._point=2,this._context.moveTo(this._x4=Mt,this._y4=kr);break;case 2:this._point=3,this._x5=Mt,this._y5=kr;break;default:ir(this,Mt,kr);break}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var pi=function Mt(kr){function Jr(vi){return kr?new ii(vi,kr):new Ie(vi,0)}return Jr.alpha=function(vi){return Mt(+vi)},Jr}(.5);function $r(Mt,kr){this._context=Mt,this._alpha=kr}$r.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){(this._line||this._line!==0&&this._point===3)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){if(Mt=+Mt,kr=+kr,this._point){var Jr=this._x2-Mt,vi=this._y2-kr;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(Jr*Jr+vi*vi,this._alpha))}switch(this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:ir(this,Mt,kr);break}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=Mt,this._y0=this._y1,this._y1=this._y2,this._y2=kr}};var di=function Mt(kr){function Jr(vi){return kr?new $r(vi,kr):new ke(vi,0)}return Jr.alpha=function(vi){return Mt(+vi)},Jr}(.5);function ji(Mt){this._context=Mt}ji.prototype={areaStart:er,areaEnd:er,lineStart:function(){this._point=0},lineEnd:function(){this._point&&this._context.closePath()},point:function(Mt,kr){Mt=+Mt,kr=+kr,this._point?this._context.lineTo(Mt,kr):(this._point=1,this._context.moveTo(Mt,kr))}};function In(Mt){return new ji(Mt)}function wi(Mt){return Mt<0?-1:1}function On(Mt,kr,Jr){var vi=Mt._x1-Mt._x0,hn=kr-Mt._x1,An=(Mt._y1-Mt._y0)/(vi||hn<0&&-0),Mn=(Jr-Mt._y1)/(hn||vi<0&&-0),Li=(An*hn+Mn*vi)/(vi+hn);return(wi(An)+wi(Mn))*Math.min(Math.abs(An),Math.abs(Mn),.5*Math.abs(Li))||0}function qn(Mt,kr){var Jr=Mt._x1-Mt._x0;return Jr?(3*(Mt._y1-Mt._y0)/Jr-kr)/2:kr}function Fn(Mt,kr,Jr){var vi=Mt._x0,hn=Mt._y0,An=Mt._x1,Mn=Mt._y1,Li=(An-vi)/3;Mt._context.bezierCurveTo(vi+Li,hn+Li*kr,An-Li,Mn-Li*Jr,An,Mn)}function ra(Mt){this._context=Mt}ra.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=this._t0=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x1,this._y1);break;case 3:Fn(this,this._t0,qn(this,this._t0));break}(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line=1-this._line},point:function(Mt,kr){var Jr=NaN;if(Mt=+Mt,kr=+kr,!(Mt===this._x1&&kr===this._y1)){switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2;break;case 2:this._point=3,Fn(this,qn(this,Jr=On(this,Mt,kr)),Jr);break;default:Fn(this,this._t0,Jr=On(this,Mt,kr));break}this._x0=this._x1,this._x1=Mt,this._y0=this._y1,this._y1=kr,this._t0=Jr}}};function la(Mt){this._context=new Ut(Mt)}(la.prototype=Object.create(ra.prototype)).point=function(Mt,kr){ra.prototype.point.call(this,kr,Mt)};function Ut(Mt){this._context=Mt}Ut.prototype={moveTo:function(Mt,kr){this._context.moveTo(kr,Mt)},closePath:function(){this._context.closePath()},lineTo:function(Mt,kr){this._context.lineTo(kr,Mt)},bezierCurveTo:function(Mt,kr,Jr,vi,hn,An){this._context.bezierCurveTo(kr,Mt,vi,Jr,An,hn)}};function wt(Mt){return new ra(Mt)}function rr(Mt){return new la(Mt)}function nr(Mt){this._context=Mt}nr.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x=[],this._y=[]},lineEnd:function(){var Mt=this._x,kr=this._y,Jr=Mt.length;if(Jr)if(this._line?this._context.lineTo(Mt[0],kr[0]):this._context.moveTo(Mt[0],kr[0]),Jr===2)this._context.lineTo(Mt[1],kr[1]);else for(var vi=Er(Mt),hn=Er(kr),An=0,Mn=1;Mn<Jr;++An,++Mn)this._context.bezierCurveTo(vi[0][An],hn[0][An],vi[1][An],hn[1][An],Mt[Mn],kr[Mn]);(this._line||this._line!==0&&Jr===1)&&this._context.closePath(),this._line=1-this._line,this._x=this._y=null},point:function(Mt,kr){this._x.push(+Mt),this._y.push(+kr)}};function Er(Mt){var kr,Jr=Mt.length-1,vi,hn=new Array(Jr),An=new Array(Jr),Mn=new Array(Jr);for(hn[0]=0,An[0]=2,Mn[0]=Mt[0]+2*Mt[1],kr=1;kr<Jr-1;++kr)hn[kr]=1,An[kr]=4,Mn[kr]=4*Mt[kr]+2*Mt[kr+1];for(hn[Jr-1]=2,An[Jr-1]=7,Mn[Jr-1]=8*Mt[Jr-1]+Mt[Jr],kr=1;kr<Jr;++kr)vi=hn[kr]/An[kr-1],An[kr]-=vi,Mn[kr]-=vi*Mn[kr-1];for(hn[Jr-1]=Mn[Jr-1]/An[Jr-1],kr=Jr-2;kr>=0;--kr)hn[kr]=(Mn[kr]-hn[kr+1])/An[kr];for(An[Jr-1]=(Mt[Jr]+hn[Jr-1])/2,kr=0;kr<Jr-1;++kr)An[kr]=2*Mt[kr+1]-hn[kr+1];return[hn,An]}function Xr(Mt){return new nr(Mt)}function ri(Mt,kr){this._context=Mt,this._t=kr}ri.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x=this._y=NaN,this._point=0},lineEnd:function(){0<this._t&&this._t<1&&this._point===2&&this._context.lineTo(this._x,this._y),(this._line||this._line!==0&&this._point===1)&&this._context.closePath(),this._line>=0&&(this._t=1-this._t,this._line=1-this._line)},point:function(Mt,kr){switch(Mt=+Mt,kr=+kr,this._point){case 0:this._point=1,this._line?this._context.lineTo(Mt,kr):this._context.moveTo(Mt,kr);break;case 1:this._point=2;default:{if(this._t<=0)this._context.lineTo(this._x,kr),this._context.lineTo(Mt,kr);else{var Jr=this._x*(1-this._t)+Mt*this._t;this._context.lineTo(Jr,this._y),this._context.lineTo(Jr,kr)}break}}this._x=Mt,this._y=kr}};function Qr(Mt){return new ri(Mt,.5)}function Oi(Mt){return new ri(Mt,0)}function $i(Mt){return new ri(Mt,1)}function tn(Mt,kr){if((Mn=Mt.length)>1)for(var Jr=1,vi,hn,An=Mt[kr[0]],Mn,Li=An.length;Jr<Mn;++Jr)for(hn=An,An=Mt[kr[Jr]],vi=0;vi<Li;++vi)An[vi][1]+=An[vi][0]=isNaN(hn[vi][1])?hn[vi][0]:hn[vi][1]}function fn(Mt){for(var kr=Mt.length,Jr=new Array(kr);--kr>=0;)Jr[kr]=kr;return Jr}function yn(Mt,kr){return Mt[kr]}function Sn(){var Mt=r([]),kr=fn,Jr=tn,vi=yn;function hn(An){var Mn=Mt.apply(this,arguments),Li,_n=An.length,ya=Mn.length,Jn=new Array(ya),Ma;for(Li=0;Li<ya;++Li){for(var _o=Mn[Li],No=Jn[Li]=new Array(_n),po=0,Lo;po<_n;++po)No[po]=Lo=[0,+vi(An[po],_o,po,An)],Lo.data=An[po];No.key=_o}for(Li=0,Ma=kr(Jn);Li<ya;++Li)Jn[Ma[Li]].index=Li;return Jr(Jn,Ma),Jn}return hn.keys=function(An){return arguments.length?(Mt=typeof An=="function"?An:r(Ce.call(An)),hn):Mt},hn.value=function(An){return arguments.length?(vi=typeof An=="function"?An:r(+An),hn):vi},hn.order=function(An){return arguments.length?(kr=An==null?fn:typeof An=="function"?An:r(Ce.call(An)),hn):kr},hn.offset=function(An){return arguments.length?(Jr=An==null?tn:An,hn):Jr},hn}function Ba(Mt,kr){if((vi=Mt.length)>0){for(var Jr,vi,hn=0,An=Mt[0].length,Mn;hn<An;++hn){for(Mn=Jr=0;Jr<vi;++Jr)Mn+=Mt[Jr][hn][1]||0;if(Mn)for(Jr=0;Jr<vi;++Jr)Mt[Jr][hn][1]/=Mn}tn(Mt,kr)}}function ua(Mt,kr){if((_n=Mt.length)>0)for(var Jr,vi=0,hn,An,Mn,Li,_n,ya=Mt[kr[0]].length;vi<ya;++vi)for(Mn=Li=0,Jr=0;Jr<_n;++Jr)(An=(hn=Mt[kr[Jr]][vi])[1]-hn[0])>0?(hn[0]=Mn,hn[1]=Mn+=An):An<0?(hn[1]=Li,hn[0]=Li+=An):(hn[0]=0,hn[1]=An)}function ma(Mt,kr){if((hn=Mt.length)>0){for(var Jr=0,vi=Mt[kr[0]],hn,An=vi.length;Jr<An;++Jr){for(var Mn=0,Li=0;Mn<hn;++Mn)Li+=Mt[Mn][Jr][1]||0;vi[Jr][1]+=vi[Jr][0]=-Li/2}tn(Mt,kr)}}function Wa(Mt,kr){if(!(!((Mn=Mt.length)>0)||!((An=(hn=Mt[kr[0]]).length)>0))){for(var Jr=0,vi=1,hn,An,Mn;vi<An;++vi){for(var Li=0,_n=0,ya=0;Li<Mn;++Li){for(var Jn=Mt[kr[Li]],Ma=Jn[vi][1]||0,_o=Jn[vi-1][1]||0,No=(Ma-_o)/2,po=0;po<Li;++po){var Lo=Mt[kr[po]],ko=Lo[vi][1]||0,Ds=Lo[vi-1][1]||0;No+=ko-Ds}_n+=Ma,ya+=No*Ma}hn[vi-1][1]+=hn[vi-1][0]=Jr,_n&&(Jr-=ya/_n)}hn[vi-1][1]+=hn[vi-1][0]=Jr,tn(Mt,kr)}}function Fa(Mt){var kr=Mt.map(Wo);return fn(Mt).sort(function(Jr,vi){return kr[Jr]-kr[vi]})}function Wo(Mt){for(var kr=-1,Jr=0,vi=Mt.length,hn,An=-1/0;++kr<vi;)(hn=+Mt[kr][1])>An&&(An=hn,Jr=kr);return Jr}function da(Mt){var kr=Mt.map(Wn);return fn(Mt).sort(function(Jr,vi){return kr[Jr]-kr[vi]})}function Wn(Mt){for(var kr=0,Jr=-1,vi=Mt.length,hn;++Jr<vi;)(hn=+Mt[Jr][1])&&(kr+=hn);return kr}function Ha(Mt){return da(Mt).reverse()}function vo(Mt){var kr=Mt.length,Jr,vi,hn=Mt.map(Wn),An=Fa(Mt),Mn=0,Li=0,_n=[],ya=[];for(Jr=0;Jr<kr;++Jr)vi=An[Jr],Mn<Li?(Mn+=hn[vi],_n.push(vi)):(Li+=hn[vi],ya.push(vi));return ya.reverse().concat(_n)}function jn(Mt){return fn(Mt).reverse()}e.arc=C,e.area=O,e.areaRadial=_e,e.curveBasis=Lr,e.curveBasisClosed=Br,e.curveBasisOpen=dt,e.curveBundle=Je,e.curveCardinal=xt,e.curveCardinalClosed=xe,e.curveCardinalOpen=vt,e.curveCatmullRom=vr,e.curveCatmullRomClosed=pi,e.curveCatmullRomOpen=di,e.curveLinear=g,e.curveLinearClosed=In,e.curveMonotoneX=wt,e.curveMonotoneY=rr,e.curveNatural=Xr,e.curveStep=Qr,e.curveStepAfter=$i,e.curveStepBefore=Oi,e.line=z,e.lineRadial=oe,e.linkHorizontal=Pe,e.linkRadial=Re,e.linkVertical=ge,e.pie=Z,e.pointRadial=Ee,e.radialArea=_e,e.radialLine=oe,e.stack=Sn,e.stackOffsetDiverging=ua,e.stackOffsetExpand=Ba,e.stackOffsetNone=tn,e.stackOffsetSilhouette=ma,e.stackOffsetWiggle=Wa,e.stackOrderAppearance=Fa,e.stackOrderAscending=da,e.stackOrderDescending=Ha,e.stackOrderInsideOut=vo,e.stackOrderNone=fn,e.stackOrderReverse=jn,e.symbol=mt,e.symbolCircle=ce,e.symbolCross=Ze,e.symbolDiamond=Zt,e.symbolSquare=sr,e.symbolStar=Jt,e.symbolTriangle=cr,e.symbolWye=_t,e.symbols=It,Object.defineProperty(e,"__esModule",{value:!0})})});var VWe=ye((S7,UWe)=>{(function(e,t){typeof S7=="object"&&typeof UWe!="undefined"?t(S7,nk(),_7(),jJ()):t(e.d3=e.d3||{},e.d3,e.d3,e.d3)})(S7,function(e,t,r,n){"use strict";function i(g){return g.target.depth}function a(g){return g.depth}function o(g,P){return P-1-g.height}function s(g,P){return g.sourceLinks.length?g.depth:P-1}function l(g){return g.targetLinks.length?g.depth:g.sourceLinks.length?t.min(g.sourceLinks,i)-1:0}function u(g){return function(){return g}}function c(g,P){return h(g.source,P.source)||g.index-P.index}function f(g,P){return h(g.target,P.target)||g.index-P.index}function h(g,P){return g.y0-P.y0}function d(g){return g.value}function v(g){return(g.y0+g.y1)/2}function _(g){return v(g.source)*g.value}function b(g){return v(g.target)*g.value}function p(g){return g.index}function k(g){return g.nodes}function E(g){return g.links}function S(g,P){var T=g.get(P);if(!T)throw new Error("missing: "+P);return T}var L=function(){var g=0,P=0,T=1,z=1,O=24,V=8,G=p,Z=s,j=k,N=E,H=32,te=2/3;function oe(){var Se={nodes:j.apply(null,arguments),links:N.apply(null,arguments)};return _e(Se),Ee(Se),Ce(Se),me(Se,H),ie(Se),Se}oe.update=function(Se){return ie(Se),Se},oe.nodeId=function(Se){return arguments.length?(G=typeof Se=="function"?Se:u(Se),oe):G},oe.nodeAlign=function(Se){return arguments.length?(Z=typeof Se=="function"?Se:u(Se),oe):Z},oe.nodeWidth=function(Se){return arguments.length?(O=+Se,oe):O},oe.nodePadding=function(Se){return arguments.length?(V=+Se,oe):V},oe.nodes=function(Se){return arguments.length?(j=typeof Se=="function"?Se:u(Se),oe):j},oe.links=function(Se){return arguments.length?(N=typeof Se=="function"?Se:u(Se),oe):N},oe.size=function(Se){return arguments.length?(g=P=0,T=+Se[0],z=+Se[1],oe):[T-g,z-P]},oe.extent=function(Se){return arguments.length?(g=+Se[0][0],T=+Se[1][0],P=+Se[0][1],z=+Se[1][1],oe):[[g,P],[T,z]]},oe.iterations=function(Se){return arguments.length?(H=+Se,oe):H};function _e(Se){Se.nodes.forEach(function(Ae,Fe){Ae.index=Fe,Ae.sourceLinks=[],Ae.targetLinks=[]});var Le=r.map(Se.nodes,G);Se.links.forEach(function(Ae,Fe){Ae.index=Fe;var Pe=Ae.source,ge=Ae.target;typeof Pe!="object"&&(Pe=Ae.source=S(Le,Pe)),typeof ge!="object"&&(ge=Ae.target=S(Le,ge)),Pe.sourceLinks.push(Ae),ge.targetLinks.push(Ae)})}function Ee(Se){Se.nodes.forEach(function(Le){Le.value=Math.max(t.sum(Le.sourceLinks,d),t.sum(Le.targetLinks,d))})}function Ce(Se){var Le,Ae,Fe;for(Le=Se.nodes,Ae=[],Fe=0;Le.length;++Fe,Le=Ae,Ae=[])Le.forEach(function(ge){ge.depth=Fe,ge.sourceLinks.forEach(function(Re){Ae.indexOf(Re.target)<0&&Ae.push(Re.target)})});for(Le=Se.nodes,Ae=[],Fe=0;Le.length;++Fe,Le=Ae,Ae=[])Le.forEach(function(ge){ge.height=Fe,ge.targetLinks.forEach(function(Re){Ae.indexOf(Re.source)<0&&Ae.push(Re.source)})});var Pe=(T-g-O)/(Fe-1);Se.nodes.forEach(function(ge){ge.x1=(ge.x0=g+Math.max(0,Math.min(Fe-1,Math.floor(Z.call(null,ge,Fe))))*Pe)+O})}function me(Se){var Le=r.nest().key(function(Ze){return Ze.x0}).sortKeys(t.ascending).entries(Se.nodes).map(function(Ze){return Ze.values});Pe(),ce();for(var Ae=1,Fe=H;Fe>0;--Fe)Re(Ae*=.99),ce(),ge(Ae),ce();function Pe(){var Ze=t.max(Le,function(Zt){return Zt.length}),ut=te*(z-P)/(Ze-1);V>ut&&(V=ut);var pt=t.min(Le,function(Zt){return(z-P-(Zt.length-1)*V)/t.sum(Zt,d)});Le.forEach(function(Zt){Zt.forEach(function(st,lt){st.y1=(st.y0=lt)+st.value*pt})}),Se.links.forEach(function(Zt){Zt.width=Zt.value*pt})}function ge(Ze){Le.forEach(function(ut){ut.forEach(function(pt){if(pt.targetLinks.length){var Zt=(t.sum(pt.targetLinks,_)/t.sum(pt.targetLinks,d)-v(pt))*Ze;pt.y0+=Zt,pt.y1+=Zt}})})}function Re(Ze){Le.slice().reverse().forEach(function(ut){ut.forEach(function(pt){if(pt.sourceLinks.length){var Zt=(t.sum(pt.sourceLinks,b)/t.sum(pt.sourceLinks,d)-v(pt))*Ze;pt.y0+=Zt,pt.y1+=Zt}})})}function ce(){Le.forEach(function(Ze){var ut,pt,Zt=P,st=Ze.length,lt;for(Ze.sort(h),lt=0;lt<st;++lt)ut=Ze[lt],pt=Zt-ut.y0,pt>0&&(ut.y0+=pt,ut.y1+=pt),Zt=ut.y1+V;if(pt=Zt-V-z,pt>0)for(Zt=ut.y0-=pt,ut.y1-=pt,lt=st-2;lt>=0;--lt)ut=Ze[lt],pt=ut.y1+V-Zt,pt>0&&(ut.y0-=pt,ut.y1-=pt),Zt=ut.y0})}}function ie(Se){Se.nodes.forEach(function(Le){Le.sourceLinks.sort(f),Le.targetLinks.sort(c)}),Se.nodes.forEach(function(Le){var Ae=Le.y0,Fe=Ae;Le.sourceLinks.forEach(function(Pe){Pe.y0=Ae+Pe.width/2,Ae+=Pe.width}),Le.targetLinks.forEach(function(Pe){Pe.y1=Fe+Pe.width/2,Fe+=Pe.width})})}return oe};function x(g){return[g.source.x1,g.y0]}function C(g){return[g.target.x0,g.y1]}var M=function(){return n.linkHorizontal().source(x).target(C)};e.sankey=L,e.sankeyCenter=l,e.sankeyLeft=a,e.sankeyRight=o,e.sankeyJustify=s,e.sankeyLinkHorizontal=M,Object.defineProperty(e,"__esModule",{value:!0})})});var HWe=ye((Qbr,GWe)=>{var FZt=HJ();GWe.exports=function(t,r){var n=[],i=[],a=[],o={},s=[],l;function u(E){a[E]=!1,o.hasOwnProperty(E)&&Object.keys(o[E]).forEach(function(S){delete o[E][S],a[S]&&u(S)})}function c(E){var S=!1;i.push(E),a[E]=!0;var L,x;for(L=0;L<s[E].length;L++)x=s[E][L],x===l?(f(l,i),S=!0):a[x]||(S=c(x));if(S)u(E);else for(L=0;L<s[E].length;L++){x=s[E][L];var C=o[x];C||(C={},o[x]=C),C[x]=!0}return i.pop(),S}function f(E,S){var L=[].concat(S).concat(E);r?r(c):n.push(L)}function h(E){for(var S=0;S<t.length;S++)S<E&&(t[S]=[]),t[S]=t[S].filter(function(L){return L>=E})}function d(E){h(E);for(var S=t,L=FZt(S),x=L.components.filter(function(O){return O.length>1}),C=1/0,M,g=0;g<x.length;g++)for(var P=0;P<x[g].length;P++)x[g][P]<C&&(C=x[g][P],M=g);var T=x[M];if(!T)return!1;var z=t.map(function(O,V){return T.indexOf(V)===-1?[]:O.filter(function(G){return T.indexOf(G)!==-1})});return{leastVertex:C,adjList:z}}l=0;for(var v=t.length;l<v;){var _=d(l);if(l=_.leastVertex,s=_.adjList,s){for(var b=0;b<s.length;b++)for(var p=0;p<s[b].length;p++){var k=s[b][p];a[+k]=!1,o[k]={}}c(l),l=l+1}else l=v}if(!r)return n}});var WWe=ye((M7,jWe)=>{(function(e,t){typeof M7=="object"&&typeof jWe!="undefined"?t(M7,nk(),_7(),jJ(),HWe()):t(e.d3=e.d3||{},e.d3,e.d3,e.d3,null)})(M7,function(e,t,r,n,i){"use strict";i=i&&i.hasOwnProperty("default")?i.default:i;function a(st){return st.target.depth}function o(st){return st.depth}function s(st,lt){return lt-1-st.height}function l(st,lt){return st.sourceLinks.length?st.depth:lt-1}function u(st){return st.targetLinks.length?st.depth:st.sourceLinks.length?t.min(st.sourceLinks,a)-1:0}function c(st){return function(){return st}}var f=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(st){return typeof st}:function(st){return st&&typeof Symbol=="function"&&st.constructor===Symbol&&st!==Symbol.prototype?"symbol":typeof st};function h(st,lt){return v(st.source,lt.source)||st.index-lt.index}function d(st,lt){return v(st.target,lt.target)||st.index-lt.index}function v(st,lt){return st.partOfCycle===lt.partOfCycle?st.y0-lt.y0:st.circularLinkType==="top"||lt.circularLinkType==="bottom"?-1:1}function _(st){return st.value}function b(st){return(st.y0+st.y1)/2}function p(st){return b(st.source)}function k(st){return b(st.target)}function E(st){return st.index}function S(st){return st.nodes}function L(st){return st.links}function x(st,lt){var Gt=st.get(lt);if(!Gt)throw new Error("missing: "+lt);return Gt}function C(st,lt){return lt(st)}var M=25,g=10,P=.3;function T(){var st=0,lt=0,Gt=1,Nt=1,Jt=24,sr,wr=E,cr=l,$e=S,St=L,Qt=32,Vt=2,_t,It=null;function mt(){var dt={nodes:$e.apply(null,arguments),links:St.apply(null,arguments)};er(dt),z(dt,wr,It),lr(dt),ti(dt),O(dt,wr),Br(dt,Qt,wr),Vr(dt);for(var Ge=4,Je=0;Je<Ge;Je++)Re(dt,Nt,wr),ce(dt,Nt,wr),Fe(dt,lt,Nt,wr),Re(dt,Nt,wr),ce(dt,Nt,wr);return Zt(dt,lt,Nt),H(dt,Vt,Nt,wr),dt}mt.nodeId=function(dt){return arguments.length?(wr=typeof dt=="function"?dt:c(dt),mt):wr},mt.nodeAlign=function(dt){return arguments.length?(cr=typeof dt=="function"?dt:c(dt),mt):cr},mt.nodeWidth=function(dt){return arguments.length?(Jt=+dt,mt):Jt},mt.nodePadding=function(dt){return arguments.length?(sr=+dt,mt):sr},mt.nodes=function(dt){return arguments.length?($e=typeof dt=="function"?dt:c(dt),mt):$e},mt.links=function(dt){return arguments.length?(St=typeof dt=="function"?dt:c(dt),mt):St},mt.size=function(dt){return arguments.length?(st=lt=0,Gt=+dt[0],Nt=+dt[1],mt):[Gt-st,Nt-lt]},mt.extent=function(dt){return arguments.length?(st=+dt[0][0],Gt=+dt[1][0],lt=+dt[0][1],Nt=+dt[1][1],mt):[[st,lt],[Gt,Nt]]},mt.iterations=function(dt){return arguments.length?(Qt=+dt,mt):Qt},mt.circularLinkGap=function(dt){return arguments.length?(Vt=+dt,mt):Vt},mt.nodePaddingRatio=function(dt){return arguments.length?(_t=+dt,mt):_t},mt.sortNodes=function(dt){return arguments.length?(It=dt,mt):It},mt.update=function(dt){return O(dt,wr),Vr(dt),dt.links.forEach(function(Ge){Ge.circular&&(Ge.circularLinkType=Ge.y0+Ge.y1<Nt?"top":"bottom",Ge.source.circularLinkType=Ge.circularLinkType,Ge.target.circularLinkType=Ge.circularLinkType)}),Re(dt,Nt,wr,!1),ce(dt,Nt,wr),H(dt,Vt,Nt,wr),dt};function er(dt){dt.nodes.forEach(function(Je,je){Je.index=je,Je.sourceLinks=[],Je.targetLinks=[]});var Ge=r.map(dt.nodes,wr);return dt.links.forEach(function(Je,je){Je.index=je;var tt=Je.source,xt=Je.target;(typeof tt=="undefined"?"undefined":f(tt))!=="object"&&(tt=Je.source=x(Ge,tt)),(typeof xt=="undefined"?"undefined":f(xt))!=="object"&&(xt=Je.target=x(Ge,xt)),tt.sourceLinks.push(Je),xt.targetLinks.push(Je)}),dt}function lr(dt){dt.nodes.forEach(function(Ge){Ge.partOfCycle=!1,Ge.value=Math.max(t.sum(Ge.sourceLinks,_),t.sum(Ge.targetLinks,_)),Ge.sourceLinks.forEach(function(Je){Je.circular&&(Ge.partOfCycle=!0,Ge.circularLinkType=Je.circularLinkType)}),Ge.targetLinks.forEach(function(Je){Je.circular&&(Ge.partOfCycle=!0,Ge.circularLinkType=Je.circularLinkType)})})}function Tr(dt){var Ge=0,Je=0,je=0,tt=0,xt=t.max(dt.nodes,function(Ie){return Ie.column});return dt.links.forEach(function(Ie){Ie.circular&&(Ie.circularLinkType=="top"?Ge=Ge+Ie.width:Je=Je+Ie.width,Ie.target.column==0&&(tt=tt+Ie.width),Ie.source.column==xt&&(je=je+Ie.width))}),Ge=Ge>0?Ge+M+g:Ge,Je=Je>0?Je+M+g:Je,je=je>0?je+M+g:je,tt=tt>0?tt+M+g:tt,{top:Ge,bottom:Je,left:tt,right:je}}function Lr(dt,Ge){var Je=t.max(dt.nodes,function(vt){return vt.column}),je=Gt-st,tt=Nt-lt,xt=je+Ge.right+Ge.left,Ie=tt+Ge.top+Ge.bottom,xe=je/xt,ke=tt/Ie;return st=st*xe+Ge.left,Gt=Ge.right==0?Gt:Gt*xe,lt=lt*ke+Ge.top,Nt=Nt*ke,dt.nodes.forEach(function(vt){vt.x0=st+vt.column*((Gt-st-Jt)/Je),vt.x1=vt.x0+Jt}),ke}function ti(dt){var Ge,Je,je;for(Ge=dt.nodes,Je=[],je=0;Ge.length;++je,Ge=Je,Je=[])Ge.forEach(function(tt){tt.depth=je,tt.sourceLinks.forEach(function(xt){Je.indexOf(xt.target)<0&&!xt.circular&&Je.push(xt.target)})});for(Ge=dt.nodes,Je=[],je=0;Ge.length;++je,Ge=Je,Je=[])Ge.forEach(function(tt){tt.height=je,tt.targetLinks.forEach(function(xt){Je.indexOf(xt.source)<0&&!xt.circular&&Je.push(xt.source)})});dt.nodes.forEach(function(tt){tt.column=Math.floor(cr.call(null,tt,je))})}function Br(dt,Ge,Je){var je=r.nest().key(function(vt){return vt.column}).sortKeys(t.ascending).entries(dt.nodes).map(function(vt){return vt.values});Ie(Je),ke();for(var tt=1,xt=Ge;xt>0;--xt)xe(tt*=.99,Je),ke();function Ie(vt){if(_t){var ir=1/0;je.forEach(function(pi){var $r=Nt*_t/(pi.length+1);ir=$r<ir?$r:ir}),sr=ir}var ar=t.min(je,function(pi){return(Nt-lt-(pi.length-1)*sr)/t.sum(pi,_)});ar=ar*P,dt.links.forEach(function(pi){pi.width=pi.value*ar});var vr=Tr(dt),ii=Lr(dt,vr);ar=ar*ii,dt.links.forEach(function(pi){pi.width=pi.value*ar}),je.forEach(function(pi){var $r=pi.length;pi.forEach(function(di,ji){di.depth==je.length-1&&$r==1||di.depth==0&&$r==1?(di.y0=Nt/2-di.value*ar,di.y1=di.y0+di.value*ar):di.partOfCycle?Z(di,vt)==0?(di.y0=Nt/2+ji,di.y1=di.y0+di.value*ar):di.circularLinkType=="top"?(di.y0=lt+ji,di.y1=di.y0+di.value*ar):(di.y0=Nt-di.value*ar-ji,di.y1=di.y0+di.value*ar):vr.top==0||vr.bottom==0?(di.y0=(Nt-lt)/$r*ji,di.y1=di.y0+di.value*ar):(di.y0=(Nt-lt)/2-$r/2+ji,di.y1=di.y0+di.value*ar)})})}function xe(vt,ir){var ar=je.length;je.forEach(function(vr){var ii=vr.length,pi=vr[0].depth;vr.forEach(function($r){var di;if(($r.sourceLinks.length||$r.targetLinks.length)&&!($r.partOfCycle&&Z($r,ir)>0))if(pi==0&&ii==1)di=$r.y1-$r.y0,$r.y0=Nt/2-di/2,$r.y1=Nt/2+di/2;else if(pi==ar-1&&ii==1)di=$r.y1-$r.y0,$r.y0=Nt/2-di/2,$r.y1=Nt/2+di/2;else{var ji=0,In=t.mean($r.sourceLinks,k),wi=t.mean($r.targetLinks,p);In&&wi?ji=(In+wi)/2:ji=In||wi;var On=(ji-b($r))*vt;$r.y0+=On,$r.y1+=On}})})}function ke(){je.forEach(function(vt){var ir,ar,vr=lt,ii=vt.length,pi;for(vt.sort(v),pi=0;pi<ii;++pi)ir=vt[pi],ar=vr-ir.y0,ar>0&&(ir.y0+=ar,ir.y1+=ar),vr=ir.y1+sr;if(ar=vr-sr-Nt,ar>0)for(vr=ir.y0-=ar,ir.y1-=ar,pi=ii-2;pi>=0;--pi)ir=vt[pi],ar=ir.y1+sr-vr,ar>0&&(ir.y0-=ar,ir.y1-=ar),vr=ir.y0})}}function Vr(dt){dt.nodes.forEach(function(Ge){Ge.sourceLinks.sort(d),Ge.targetLinks.sort(h)}),dt.nodes.forEach(function(Ge){var Je=Ge.y0,je=Je,tt=Ge.y1,xt=tt;Ge.sourceLinks.forEach(function(Ie){Ie.circular?(Ie.y0=tt-Ie.width/2,tt=tt-Ie.width):(Ie.y0=Je+Ie.width/2,Je+=Ie.width)}),Ge.targetLinks.forEach(function(Ie){Ie.circular?(Ie.y1=xt-Ie.width/2,xt=xt-Ie.width):(Ie.y1=je+Ie.width/2,je+=Ie.width)})})}return mt}function z(st,lt,Gt){var Nt=0;if(Gt===null){for(var Jt=[],sr=0;sr<st.links.length;sr++){var wr=st.links[sr],cr=wr.source.index,$e=wr.target.index;Jt[cr]||(Jt[cr]=[]),Jt[$e]||(Jt[$e]=[]),Jt[cr].indexOf($e)===-1&&Jt[cr].push($e)}var St=i(Jt);St.sort(function(It,mt){return It.length-mt.length});var Qt={};for(sr=0;sr<St.length;sr++){var Vt=St[sr],_t=Vt.slice(-2);Qt[_t[0]]||(Qt[_t[0]]={}),Qt[_t[0]][_t[1]]=!0}st.links.forEach(function(It){var mt=It.target.index,er=It.source.index;mt===er||Qt[er]&&Qt[er][mt]?(It.circular=!0,It.circularLinkID=Nt,Nt=Nt+1):It.circular=!1})}else st.links.forEach(function(It){It.source[Gt]<It.target[Gt]?It.circular=!1:(It.circular=!0,It.circularLinkID=Nt,Nt=Nt+1)})}function O(st,lt){var Gt=0,Nt=0;st.links.forEach(function(Jt){Jt.circular&&(Jt.source.circularLinkType||Jt.target.circularLinkType?Jt.circularLinkType=Jt.source.circularLinkType?Jt.source.circularLinkType:Jt.target.circularLinkType:Jt.circularLinkType=Gt<Nt?"top":"bottom",Jt.circularLinkType=="top"?Gt=Gt+1:Nt=Nt+1,st.nodes.forEach(function(sr){(C(sr,lt)==C(Jt.source,lt)||C(sr,lt)==C(Jt.target,lt))&&(sr.circularLinkType=Jt.circularLinkType)}))}),st.links.forEach(function(Jt){Jt.circular&&(Jt.source.circularLinkType==Jt.target.circularLinkType&&(Jt.circularLinkType=Jt.source.circularLinkType),pt(Jt,lt)&&(Jt.circularLinkType=Jt.source.circularLinkType))})}function V(st){var lt=Math.abs(st.y1-st.y0),Gt=Math.abs(st.target.x0-st.source.x1);return Math.atan(Gt/lt)}function G(st,lt){return st.source.column<lt.target.column?!1:!(st.target.column>lt.source.column)}function Z(st,lt){var Gt=0;st.sourceLinks.forEach(function(Jt){Gt=Jt.circular&&!pt(Jt,lt)?Gt+1:Gt});var Nt=0;return st.targetLinks.forEach(function(Jt){Nt=Jt.circular&&!pt(Jt,lt)?Nt+1:Nt}),Gt+Nt}function j(st){var lt=st.source.sourceLinks,Gt=0;lt.forEach(function(sr){Gt=sr.circular?Gt+1:Gt});var Nt=st.target.targetLinks,Jt=0;return Nt.forEach(function(sr){Jt=sr.circular?Jt+1:Jt}),!(Gt>1||Jt>1)}function N(st,lt,Gt){return st.sort(oe),st.forEach(function(Nt,Jt){var sr=0;if(pt(Nt,Gt)&&j(Nt))Nt.circularPathData.verticalBuffer=sr+Nt.width/2;else{var wr=0;for(wr;wr<Jt;wr++)if(G(st[Jt],st[wr])){var cr=st[wr].circularPathData.verticalBuffer+st[wr].width/2+lt;sr=cr>sr?cr:sr}Nt.circularPathData.verticalBuffer=sr+Nt.width/2}}),st}function H(st,lt,Gt,Nt){var Jt=5,sr=t.min(st.links,function($e){return $e.source.y0});st.links.forEach(function($e){$e.circular&&($e.circularPathData={})});var wr=st.links.filter(function($e){return $e.circularLinkType=="top"});N(wr,lt,Nt);var cr=st.links.filter(function($e){return $e.circularLinkType=="bottom"});N(cr,lt,Nt),st.links.forEach(function($e){if($e.circular){if($e.circularPathData.arcRadius=$e.width+g,$e.circularPathData.leftNodeBuffer=Jt,$e.circularPathData.rightNodeBuffer=Jt,$e.circularPathData.sourceWidth=$e.source.x1-$e.source.x0,$e.circularPathData.sourceX=$e.source.x0+$e.circularPathData.sourceWidth,$e.circularPathData.targetX=$e.target.x0,$e.circularPathData.sourceY=$e.y0,$e.circularPathData.targetY=$e.y1,pt($e,Nt)&&j($e))$e.circularPathData.leftSmallArcRadius=g+$e.width/2,$e.circularPathData.leftLargeArcRadius=g+$e.width/2,$e.circularPathData.rightSmallArcRadius=g+$e.width/2,$e.circularPathData.rightLargeArcRadius=g+$e.width/2,$e.circularLinkType=="bottom"?($e.circularPathData.verticalFullExtent=$e.source.y1+M+$e.circularPathData.verticalBuffer,$e.circularPathData.verticalLeftInnerExtent=$e.circularPathData.verticalFullExtent-$e.circularPathData.leftLargeArcRadius,$e.circularPathData.verticalRightInnerExtent=$e.circularPathData.verticalFullExtent-$e.circularPathData.rightLargeArcRadius):($e.circularPathData.verticalFullExtent=$e.source.y0-M-$e.circularPathData.verticalBuffer,$e.circularPathData.verticalLeftInnerExtent=$e.circularPathData.verticalFullExtent+$e.circularPathData.leftLargeArcRadius,$e.circularPathData.verticalRightInnerExtent=$e.circularPathData.verticalFullExtent+$e.circularPathData.rightLargeArcRadius);else{var St=$e.source.column,Qt=$e.circularLinkType,Vt=st.links.filter(function(mt){return mt.source.column==St&&mt.circularLinkType==Qt});$e.circularLinkType=="bottom"?Vt.sort(Ee):Vt.sort(_e);var _t=0;Vt.forEach(function(mt,er){mt.circularLinkID==$e.circularLinkID&&($e.circularPathData.leftSmallArcRadius=g+$e.width/2+_t,$e.circularPathData.leftLargeArcRadius=g+$e.width/2+er*lt+_t),_t=_t+mt.width}),St=$e.target.column,Vt=st.links.filter(function(mt){return mt.target.column==St&&mt.circularLinkType==Qt}),$e.circularLinkType=="bottom"?Vt.sort(me):Vt.sort(Ce),_t=0,Vt.forEach(function(mt,er){mt.circularLinkID==$e.circularLinkID&&($e.circularPathData.rightSmallArcRadius=g+$e.width/2+_t,$e.circularPathData.rightLargeArcRadius=g+$e.width/2+er*lt+_t),_t=_t+mt.width}),$e.circularLinkType=="bottom"?($e.circularPathData.verticalFullExtent=Math.max(Gt,$e.source.y1,$e.target.y1)+M+$e.circularPathData.verticalBuffer,$e.circularPathData.verticalLeftInnerExtent=$e.circularPathData.verticalFullExtent-$e.circularPathData.leftLargeArcRadius,$e.circularPathData.verticalRightInnerExtent=$e.circularPathData.verticalFullExtent-$e.circularPathData.rightLargeArcRadius):($e.circularPathData.verticalFullExtent=sr-M-$e.circularPathData.verticalBuffer,$e.circularPathData.verticalLeftInnerExtent=$e.circularPathData.verticalFullExtent+$e.circularPathData.leftLargeArcRadius,$e.circularPathData.verticalRightInnerExtent=$e.circularPathData.verticalFullExtent+$e.circularPathData.rightLargeArcRadius)}$e.circularPathData.leftInnerExtent=$e.circularPathData.sourceX+$e.circularPathData.leftNodeBuffer,$e.circularPathData.rightInnerExtent=$e.circularPathData.targetX-$e.circularPathData.rightNodeBuffer,$e.circularPathData.leftFullExtent=$e.circularPathData.sourceX+$e.circularPathData.leftLargeArcRadius+$e.circularPathData.leftNodeBuffer,$e.circularPathData.rightFullExtent=$e.circularPathData.targetX-$e.circularPathData.rightLargeArcRadius-$e.circularPathData.rightNodeBuffer}if($e.circular)$e.path=te($e);else{var It=n.linkHorizontal().source(function(mt){var er=mt.source.x0+(mt.source.x1-mt.source.x0),lr=mt.y0;return[er,lr]}).target(function(mt){var er=mt.target.x0,lr=mt.y1;return[er,lr]});$e.path=It($e)}})}function te(st){var lt="";return st.circularLinkType=="top"?lt="M"+st.circularPathData.sourceX+" "+st.circularPathData.sourceY+" L"+st.circularPathData.leftInnerExtent+" "+st.circularPathData.sourceY+" A"+st.circularPathData.leftLargeArcRadius+" "+st.circularPathData.leftSmallArcRadius+" 0 0 0 "+st.circularPathData.leftFullExtent+" "+(st.circularPathData.sourceY-st.circularPathData.leftSmallArcRadius)+" L"+st.circularPathData.leftFullExtent+" "+st.circularPathData.verticalLeftInnerExtent+" A"+st.circularPathData.leftLargeArcRadius+" "+st.circularPathData.leftLargeArcRadius+" 0 0 0 "+st.circularPathData.leftInnerExtent+" "+st.circularPathData.verticalFullExtent+" L"+st.circularPathData.rightInnerExtent+" "+st.circularPathData.verticalFullExtent+" A"+st.circularPathData.rightLargeArcRadius+" "+st.circularPathData.rightLargeArcRadius+" 0 0 0 "+st.circularPathData.rightFullExtent+" "+st.circularPathData.verticalRightInnerExtent+" L"+st.circularPathData.rightFullExtent+" "+(st.circularPathData.targetY-st.circularPathData.rightSmallArcRadius)+" A"+st.circularPathData.rightLargeArcRadius+" "+st.circularPathData.rightSmallArcRadius+" 0 0 0 "+st.circularPathData.rightInnerExtent+" "+st.circularPathData.targetY+" L"+st.circularPathData.targetX+" "+st.circularPathData.targetY:lt="M"+st.circularPathData.sourceX+" "+st.circularPathData.sourceY+" L"+st.circularPathData.leftInnerExtent+" "+st.circularPathData.sourceY+" A"+st.circularPathData.leftLargeArcRadius+" "+st.circularPathData.leftSmallArcRadius+" 0 0 1 "+st.circularPathData.leftFullExtent+" "+(st.circularPathData.sourceY+st.circularPathData.leftSmallArcRadius)+" L"+st.circularPathData.leftFullExtent+" "+st.circularPathData.verticalLeftInnerExtent+" A"+st.circularPathData.leftLargeArcRadius+" "+st.circularPathData.leftLargeArcRadius+" 0 0 1 "+st.circularPathData.leftInnerExtent+" "+st.circularPathData.verticalFullExtent+" L"+st.circularPathData.rightInnerExtent+" "+st.circularPathData.verticalFullExtent+" A"+st.circularPathData.rightLargeArcRadius+" "+st.circularPathData.rightLargeArcRadius+" 0 0 1 "+st.circularPathData.rightFullExtent+" "+st.circularPathData.verticalRightInnerExtent+" L"+st.circularPathData.rightFullExtent+" "+(st.circularPathData.targetY+st.circularPathData.rightSmallArcRadius)+" A"+st.circularPathData.rightLargeArcRadius+" "+st.circularPathData.rightSmallArcRadius+" 0 0 1 "+st.circularPathData.rightInnerExtent+" "+st.circularPathData.targetY+" L"+st.circularPathData.targetX+" "+st.circularPathData.targetY,lt}function oe(st,lt){return ie(st)==ie(lt)?st.circularLinkType=="bottom"?Ee(st,lt):_e(st,lt):ie(lt)-ie(st)}function _e(st,lt){return st.y0-lt.y0}function Ee(st,lt){return lt.y0-st.y0}function Ce(st,lt){return st.y1-lt.y1}function me(st,lt){return lt.y1-st.y1}function ie(st){return st.target.column-st.source.column}function Se(st){return st.target.x0-st.source.x1}function Le(st,lt){var Gt=V(st),Nt=Se(lt)/Math.tan(Gt),Jt=ut(st)=="up"?st.y1+Nt:st.y1-Nt;return Jt}function Ae(st,lt){var Gt=V(st),Nt=Se(lt)/Math.tan(Gt),Jt=ut(st)=="up"?st.y1-Nt:st.y1+Nt;return Jt}function Fe(st,lt,Gt,Nt){st.links.forEach(function(Jt){if(!Jt.circular&&Jt.target.column-Jt.source.column>1){var sr=Jt.source.column+1,wr=Jt.target.column-1,cr=1,$e=wr-sr+1;for(cr=1;sr<=wr;sr++,cr++)st.nodes.forEach(function(St){if(St.column==sr){var Qt=cr/($e+1),Vt=Math.pow(1-Qt,3),_t=3*Qt*Math.pow(1-Qt,2),It=3*Math.pow(Qt,2)*(1-Qt),mt=Math.pow(Qt,3),er=Vt*Jt.y0+_t*Jt.y0+It*Jt.y1+mt*Jt.y1,lr=er-Jt.width/2,Tr=er+Jt.width/2,Lr;lr>St.y0&&lr<St.y1?(Lr=St.y1-lr+10,Lr=St.circularLinkType=="bottom"?Lr:-Lr,St=ge(St,Lr,lt,Gt),st.nodes.forEach(function(ti){C(ti,Nt)==C(St,Nt)||ti.column!=St.column||Pe(St,ti)&&ge(ti,Lr,lt,Gt)})):Tr>St.y0&&Tr<St.y1?(Lr=Tr-St.y0+10,St=ge(St,Lr,lt,Gt),st.nodes.forEach(function(ti){C(ti,Nt)==C(St,Nt)||ti.column!=St.column||ti.y0<St.y1&&ti.y1>St.y1&&ge(ti,Lr,lt,Gt)})):lr<St.y0&&Tr>St.y1&&(Lr=Tr-St.y0+10,St=ge(St,Lr,lt,Gt),st.nodes.forEach(function(ti){C(ti,Nt)==C(St,Nt)||ti.column!=St.column||ti.y0<St.y1&&ti.y1>St.y1&&ge(ti,Lr,lt,Gt)}))}})}})}function Pe(st,lt){return st.y0>lt.y0&&st.y0<lt.y1||st.y1>lt.y0&&st.y1<lt.y1?!0:st.y0<lt.y0&&st.y1>lt.y1}function ge(st,lt,Gt,Nt){return st.y0+lt>=Gt&&st.y1+lt<=Nt&&(st.y0=st.y0+lt,st.y1=st.y1+lt,st.targetLinks.forEach(function(Jt){Jt.y1=Jt.y1+lt}),st.sourceLinks.forEach(function(Jt){Jt.y0=Jt.y0+lt})),st}function Re(st,lt,Gt,Nt){st.nodes.forEach(function(Jt){Nt&&Jt.y+(Jt.y1-Jt.y0)>lt&&(Jt.y=Jt.y-(Jt.y+(Jt.y1-Jt.y0)-lt));var sr=st.links.filter(function($e){return C($e.source,Gt)==C(Jt,Gt)}),wr=sr.length;wr>1&&sr.sort(function($e,St){if(!$e.circular&&!St.circular){if($e.target.column==St.target.column)return $e.y1-St.y1;if(Ze($e,St)){if($e.target.column>St.target.column){var Qt=Ae(St,$e);return $e.y1-Qt}if(St.target.column>$e.target.column){var Vt=Ae($e,St);return Vt-St.y1}}else return $e.y1-St.y1}if($e.circular&&!St.circular)return $e.circularLinkType=="top"?-1:1;if(St.circular&&!$e.circular)return St.circularLinkType=="top"?1:-1;if($e.circular&&St.circular)return $e.circularLinkType===St.circularLinkType&&$e.circularLinkType=="top"?$e.target.column===St.target.column?$e.target.y1-St.target.y1:St.target.column-$e.target.column:$e.circularLinkType===St.circularLinkType&&$e.circularLinkType=="bottom"?$e.target.column===St.target.column?St.target.y1-$e.target.y1:$e.target.column-St.target.column:$e.circularLinkType=="top"?-1:1});var cr=Jt.y0;sr.forEach(function($e){$e.y0=cr+$e.width/2,cr=cr+$e.width}),sr.forEach(function($e,St){if($e.circularLinkType=="bottom"){var Qt=St+1,Vt=0;for(Qt;Qt<wr;Qt++)Vt=Vt+sr[Qt].width;$e.y0=Jt.y1-Vt-$e.width/2}})})}function ce(st,lt,Gt){st.nodes.forEach(function(Nt){var Jt=st.links.filter(function(cr){return C(cr.target,Gt)==C(Nt,Gt)}),sr=Jt.length;sr>1&&Jt.sort(function(cr,$e){if(!cr.circular&&!$e.circular){if(cr.source.column==$e.source.column)return cr.y0-$e.y0;if(Ze(cr,$e)){if($e.source.column<cr.source.column){var St=Le($e,cr);return cr.y0-St}if(cr.source.column<$e.source.column){var Qt=Le(cr,$e);return Qt-$e.y0}}else return cr.y0-$e.y0}if(cr.circular&&!$e.circular)return cr.circularLinkType=="top"?-1:1;if($e.circular&&!cr.circular)return $e.circularLinkType=="top"?1:-1;if(cr.circular&&$e.circular)return cr.circularLinkType===$e.circularLinkType&&cr.circularLinkType=="top"?cr.source.column===$e.source.column?cr.source.y1-$e.source.y1:cr.source.column-$e.source.column:cr.circularLinkType===$e.circularLinkType&&cr.circularLinkType=="bottom"?cr.source.column===$e.source.column?cr.source.y1-$e.source.y1:$e.source.column-cr.source.column:cr.circularLinkType=="top"?-1:1});var wr=Nt.y0;Jt.forEach(function(cr){cr.y1=wr+cr.width/2,wr=wr+cr.width}),Jt.forEach(function(cr,$e){if(cr.circularLinkType=="bottom"){var St=$e+1,Qt=0;for(St;St<sr;St++)Qt=Qt+Jt[St].width;cr.y1=Nt.y1-Qt-cr.width/2}})})}function Ze(st,lt){return ut(st)==ut(lt)}function ut(st){return st.y0-st.y1>0?"up":"down"}function pt(st,lt){return C(st.source,lt)==C(st.target,lt)}function Zt(st,lt,Gt){var Nt=st.nodes,Jt=st.links,sr=!1,wr=!1;if(Jt.forEach(function(_t){_t.circularLinkType=="top"?sr=!0:_t.circularLinkType=="bottom"&&(wr=!0)}),sr==!1||wr==!1){var cr=t.min(Nt,function(_t){return _t.y0}),$e=t.max(Nt,function(_t){return _t.y1}),St=$e-cr,Qt=Gt-lt,Vt=Qt/St;Nt.forEach(function(_t){var It=(_t.y1-_t.y0)*Vt;_t.y0=(_t.y0-cr)*Vt,_t.y1=_t.y0+It}),Jt.forEach(function(_t){_t.y0=(_t.y0-cr)*Vt,_t.y1=(_t.y1-cr)*Vt,_t.width=_t.width*Vt})}}e.sankeyCircular=T,e.sankeyCenter=u,e.sankeyLeft=o,e.sankeyRight=s,e.sankeyJustify=l,Object.defineProperty(e,"__esModule",{value:!0})})});var WJ=ye((e2r,XWe)=>{"use strict";XWe.exports={nodeTextOffsetHorizontal:4,nodeTextOffsetVertical:3,nodePadAcross:10,sankeyIterations:50,forceIterations:5,forceTicksPerFrame:10,duration:500,ease:"linear",cn:{sankey:"sankey",sankeyLinks:"sankey-links",sankeyLink:"sankey-link",sankeyNodeSet:"sankey-node-set",sankeyNode:"sankey-node",nodeRect:"node-rect",nodeLabel:"node-label"}}});var oXe=ye((t2r,aXe)=>{"use strict";var ZWe=OWe(),zZt=(F2(),ob(D2)).interpolateNumber,R5=Oa(),Jk=VWe(),OZt=WWe(),Nu=WJ(),D5=cd(),sw=ka(),qZt=So(),p1=Dr(),YJ=p1.strTranslate,BZt=p1.strRotate,KJ=Jm(),$k=KJ.keyFun,E7=KJ.repeat,eXe=KJ.unwrap,YWe=ru(),NZt=qa(),tXe=$h(),UZt=tXe.CAP_SHIFT,VZt=tXe.LINE_SPACING,GZt=3;function HZt(e,t,r){var n=eXe(t),i=n.trace,a=i.domain,o=i.orientation==="h",s=i.node.pad,l=i.node.thickness,u={justify:Jk.sankeyJustify,left:Jk.sankeyLeft,right:Jk.sankeyRight,center:Jk.sankeyCenter}[i.node.align],c=e.width*(a.x[1]-a.x[0]),f=e.height*(a.y[1]-a.y[0]),h=n._nodes,d=n._links,v=n.circular,_;v?_=OZt.sankeyCircular().circularLinkGap(0):_=Jk.sankey(),_.iterations(Nu.sankeyIterations).size(o?[c,f]:[f,c]).nodeWidth(l).nodePadding(s).nodeId(function(V){return V.pointNumber}).nodeAlign(u).nodes(h).links(d);var b=_();_.nodePadding()<s&&p1.warn("node.pad was reduced to ",_.nodePadding()," to fit within the figure.");var p,k,E;for(var S in n._groupLookup){var L=parseInt(n._groupLookup[S]),x;for(p=0;p<b.nodes.length;p++)if(b.nodes[p].pointNumber===L){x=b.nodes[p];break}if(x){var C={pointNumber:parseInt(S),x0:x.x0,x1:x.x1,y0:x.y0,y1:x.y1,partOfGroup:!0,sourceLinks:[],targetLinks:[]};b.nodes.unshift(C),x.childrenNodes.unshift(C)}}function M(){for(p=0;p<b.nodes.length;p++){var V=b.nodes[p],G={},Z,j;for(k=0;k<V.targetLinks.length;k++)j=V.targetLinks[k],Z=j.source.pointNumber+":"+j.target.pointNumber,G.hasOwnProperty(Z)||(G[Z]=[]),G[Z].push(j);var N=Object.keys(G);for(k=0;k<N.length;k++){Z=N[k];var H=G[Z],te=0,oe={};for(E=0;E<H.length;E++)j=H[E],oe[j.label]||(oe[j.label]=0),oe[j.label]+=j.value,te+=j.value;for(E=0;E<H.length;E++)j=H[E],j.flow={value:te,labelConcentration:oe[j.label]/te,concentration:j.value/te,links:H},j.concentrationscale&&(j.color=D5(j.concentrationscale(j.flow.labelConcentration)))}var _e=0;for(k=0;k<V.sourceLinks.length;k++)_e+=V.sourceLinks[k].value;for(k=0;k<V.sourceLinks.length;k++)j=V.sourceLinks[k],j.concentrationOut=j.value/_e;var Ee=0;for(k=0;k<V.targetLinks.length;k++)Ee+=V.targetLinks[k].value;for(k=0;k<V.targetLinks.length;k++)j=V.targetLinks[k],j.concenrationIn=j.value/Ee}}M();function g(V){V.forEach(function(G){var Z,j,N=0,H=G.length,te;for(G.sort(function(oe,_e){return oe.y0-_e.y0}),te=0;te<H;++te)Z=G[te],Z.y0>=N||(j=N-Z.y0,j>1e-6&&(Z.y0+=j,Z.y1+=j)),N=Z.y1+s})}function P(V){var G=V.map(function(_e,Ee){return{x0:_e.x0,index:Ee}}).sort(function(_e,Ee){return _e.x0-Ee.x0}),Z=[],j=-1,N,H=-1/0,te;for(p=0;p<G.length;p++){var oe=V[G[p].index];oe.x0>H+l&&(j+=1,N=oe.x0),H=oe.x0,Z[j]||(Z[j]=[]),Z[j].push(oe),te=N-oe.x0,oe.x0+=te,oe.x1+=te}return Z}if(i.node.x.length&&i.node.y.length){for(p=0;p<Math.min(i.node.x.length,i.node.y.length,b.nodes.length);p++)if(i.node.x[p]&&i.node.y[p]){var T=[i.node.x[p]*c,i.node.y[p]*f];b.nodes[p].x0=T[0]-l/2,b.nodes[p].x1=T[0]+l/2;var z=b.nodes[p].y1-b.nodes[p].y0;b.nodes[p].y0=T[1]-z/2,b.nodes[p].y1=T[1]+z/2}if(i.arrangement==="snap"){h=b.nodes;var O=P(h);g(O)}_.update(b)}return{circular:v,key:r,trace:i,guid:p1.randstr(),horizontal:o,width:c,height:f,nodePad:i.node.pad,nodeLineColor:i.node.line.color,nodeLineWidth:i.node.line.width,linkLineColor:i.link.line.color,linkLineWidth:i.link.line.width,linkArrowLength:i.link.arrowlen,valueFormat:i.valueformat,valueSuffix:i.valuesuffix,textFont:i.textfont,translateX:a.x[0]*e.width+e.margin.l,translateY:e.height-a.y[1]*e.height+e.margin.t,dragParallel:o?f:c,dragPerpendicular:o?c:f,arrangement:i.arrangement,sankey:_,graph:b,forceLayouts:{},interactionState:{dragInProgress:!1,hovered:!1}}}function jZt(e,t,r){var n=D5(t.color),i=D5(t.hovercolor),a=t.source.label+"|"+t.target.label,o=a+"__"+r;return t.trace=e.trace,t.curveNumber=e.trace.index,{circular:e.circular,key:o,traceId:e.key,pointNumber:t.pointNumber,link:t,tinyColorHue:sw.tinyRGB(n),tinyColorAlpha:n.getAlpha(),tinyColorHoverHue:sw.tinyRGB(i),tinyColorHoverAlpha:i.getAlpha(),linkPath:JJ,linkLineColor:e.linkLineColor,linkLineWidth:e.linkLineWidth,linkArrowLength:e.linkArrowLength,valueFormat:e.valueFormat,valueSuffix:e.valueSuffix,sankey:e.sankey,parent:e,interactionState:e.interactionState,flow:t.flow}}function WZt(e,t){var r="",n=e.width/2,i=e.circularPathData,a=i.sourceX+i.verticalBuffer<i.targetX,o=i.rightFullExtent-i.rightLargeArcRadius-t<=i.leftFullExtent-n,s=Math.abs(i.rightFullExtent-i.leftFullExtent-n)<n;return e.circularLinkType==="top"?(r="M "+(i.targetX-t)+" "+(i.targetY+n)+" L "+(i.rightInnerExtent-t)+" "+(i.targetY+n)+"A "+(i.rightLargeArcRadius+n)+" "+(i.rightSmallArcRadius+n)+" 0 0 1 "+(i.rightFullExtent-n-t)+" "+(i.targetY-i.rightSmallArcRadius)+"L "+(i.rightFullExtent-n-t)+" "+i.verticalRightInnerExtent,a&&o?r+=" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 1 "+(i.rightFullExtent+n-t-(i.rightLargeArcRadius-n))+" "+(i.verticalRightInnerExtent-(i.rightLargeArcRadius+n))+" L "+(i.rightFullExtent+n-(i.rightLargeArcRadius-n)-t)+" "+(i.verticalRightInnerExtent-(i.rightLargeArcRadius+n))+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftLargeArcRadius+n)+" 0 0 1 "+(i.leftFullExtent+n)+" "+i.verticalRightInnerExtent:a?r+=" A "+(i.rightLargeArcRadius-n)+" "+(i.rightLargeArcRadius-n)+" 0 0 0 "+(i.rightFullExtent-n-t-(i.rightLargeArcRadius-n))+" "+(i.verticalRightInnerExtent-(i.rightLargeArcRadius-n))+" L "+(i.leftFullExtent+n+(i.rightLargeArcRadius-n))+" "+(i.verticalRightInnerExtent-(i.rightLargeArcRadius-n))+" A "+(i.leftLargeArcRadius-n)+" "+(i.leftLargeArcRadius-n)+" 0 0 0 "+(i.leftFullExtent+n)+" "+i.verticalLeftInnerExtent:r+=" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 1 "+(i.rightInnerExtent-t)+" "+(i.verticalFullExtent-n)+" L "+i.leftInnerExtent+" "+(i.verticalFullExtent-n)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftLargeArcRadius+n)+" 0 0 1 "+(i.leftFullExtent+n)+" "+i.verticalLeftInnerExtent,r+=" L "+(i.leftFullExtent+n)+" "+(i.sourceY-i.leftSmallArcRadius)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 1 "+i.leftInnerExtent+" "+(i.sourceY+n)+" L "+i.sourceX+" "+(i.sourceY+n)+" L "+i.sourceX+" "+(i.sourceY-n)+" L "+i.leftInnerExtent+" "+(i.sourceY-n)+" A "+(i.leftLargeArcRadius-n)+" "+(i.leftSmallArcRadius-n)+" 0 0 0 "+(i.leftFullExtent-n)+" "+(i.sourceY-i.leftSmallArcRadius)+" L "+(i.leftFullExtent-n)+" "+i.verticalLeftInnerExtent,a&&o?r+=" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 0 "+(i.leftFullExtent-n)+" "+(i.verticalFullExtent+n)+"L"+(i.rightFullExtent+n-t)+" "+(i.verticalFullExtent+n)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 0 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent:a?r+=" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 1 "+(i.leftFullExtent+n)+" "+(i.verticalFullExtent-n)+" L "+(i.rightFullExtent-n-t)+" "+(i.verticalFullExtent-n)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 1 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent:r+=" A "+(i.leftLargeArcRadius-n)+" "+(i.leftLargeArcRadius-n)+" 0 0 0 "+i.leftInnerExtent+" "+(i.verticalFullExtent+n)+" L "+(i.rightInnerExtent-t)+" "+(i.verticalFullExtent+n)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightLargeArcRadius-n)+" 0 0 0 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent,r+=" L "+(i.rightFullExtent+n-t)+" "+(i.targetY-i.rightSmallArcRadius)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 0 "+(i.rightInnerExtent-t)+" "+(i.targetY-n)+" L "+(i.targetX-t)+" "+(i.targetY-n)+(t>0?" L "+i.targetX+" "+i.targetY:"")+"Z"):(r="M "+(i.targetX-t)+" "+(i.targetY-n)+"  L "+(i.rightInnerExtent-t)+" "+(i.targetY-n)+" A "+(i.rightLargeArcRadius+n)+" "+(i.rightSmallArcRadius+n)+" 0 0 0 "+(i.rightFullExtent-n-t)+" "+(i.targetY+i.rightSmallArcRadius)+" L "+(i.rightFullExtent-n-t)+" "+i.verticalRightInnerExtent,a&&o?r+=" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 0 "+(i.rightInnerExtent-n-t)+" "+(i.verticalFullExtent+n)+" L "+(i.rightFullExtent+n-t-(i.rightLargeArcRadius-n))+" "+(i.verticalFullExtent+n)+" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 0 "+(i.leftFullExtent+n)+" "+i.verticalLeftInnerExtent:a?r+=" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 1 "+(i.rightFullExtent-t-n-(i.rightLargeArcRadius-n))+" "+(i.verticalFullExtent-n)+" L "+(i.leftFullExtent+n+(i.rightLargeArcRadius-n))+" "+(i.verticalFullExtent-n)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 1 "+(i.leftFullExtent+n)+" "+i.verticalLeftInnerExtent:r+=" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 0 "+(i.rightInnerExtent-t)+" "+(i.verticalFullExtent+n)+" L "+i.leftInnerExtent+" "+(i.verticalFullExtent+n)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftLargeArcRadius+n)+" 0 0 0 "+(i.leftFullExtent+n)+" "+i.verticalLeftInnerExtent,r+=" L "+(i.leftFullExtent+n)+" "+(i.sourceY+i.leftSmallArcRadius)+" A "+(i.leftLargeArcRadius+n)+" "+(i.leftSmallArcRadius+n)+" 0 0 0 "+i.leftInnerExtent+" "+(i.sourceY-n)+" L "+i.sourceX+" "+(i.sourceY-n)+" L "+i.sourceX+" "+(i.sourceY+n)+" L "+i.leftInnerExtent+" "+(i.sourceY+n)+" A "+(i.leftLargeArcRadius-n)+" "+(i.leftSmallArcRadius-n)+" 0 0 1 "+(i.leftFullExtent-n)+" "+(i.sourceY+i.leftSmallArcRadius)+" L "+(i.leftFullExtent-n)+" "+i.verticalLeftInnerExtent,a&&o?r+=" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 1 "+(i.leftFullExtent-n-(i.rightLargeArcRadius-n))+" "+(i.verticalFullExtent-n)+" L "+(i.rightFullExtent+n-t+(i.rightLargeArcRadius-n))+" "+(i.verticalFullExtent-n)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 1 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent:a?r+=" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 0 "+(i.leftFullExtent+n)+" "+(i.verticalFullExtent+n)+" L "+(i.rightFullExtent-t-n)+" "+(i.verticalFullExtent+n)+" A "+(i.rightLargeArcRadius+n)+" "+(i.rightLargeArcRadius+n)+" 0 0 0 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent:r+=" A "+(i.leftLargeArcRadius-n)+" "+(i.leftLargeArcRadius-n)+" 0 0 1 "+i.leftInnerExtent+" "+(i.verticalFullExtent-n)+" L "+(i.rightInnerExtent-t)+" "+(i.verticalFullExtent-n)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightLargeArcRadius-n)+" 0 0 1 "+(i.rightFullExtent+n-t)+" "+i.verticalRightInnerExtent,r+=" L "+(i.rightFullExtent+n-t)+" "+(i.targetY+i.rightSmallArcRadius)+" A "+(i.rightLargeArcRadius-n)+" "+(i.rightSmallArcRadius-n)+" 0 0 1 "+(i.rightInnerExtent-t)+" "+(i.targetY+n)+" L "+(i.targetX-t)+" "+(i.targetY+n)+(t>0?" L "+i.targetX+" "+i.targetY:"")+"Z"),r}function JJ(){var e=.5;function t(r){var n=r.linkArrowLength;if(r.link.circular)return WZt(r.link,n);var i=Math.abs((r.link.target.x0-r.link.source.x1)/2);n>i&&(n=i);var a=r.link.source.x1,o=r.link.target.x0-n,s=zZt(a,o),l=s(e),u=s(1-e),c=r.link.y0-r.link.width/2,f=r.link.y0+r.link.width/2,h=r.link.y1-r.link.width/2,d=r.link.y1+r.link.width/2,v="M"+a+","+c,_="C"+l+","+c+" "+u+","+h+" "+o+","+h,b="C"+u+","+d+" "+l+","+f+" "+a+","+f,p=n>0?"L"+(o+n)+","+(h+r.link.width/2):"";return p+="L"+o+","+d,v+_+p+b+"Z"}return t}function XZt(e,t){var r=D5(t.color),n=Nu.nodePadAcross,i=e.nodePad/2;t.dx=t.x1-t.x0,t.dy=t.y1-t.y0;var a=t.dx,o=Math.max(.5,t.dy),s="node_"+t.pointNumber;return t.group&&(s=p1.randstr()),t.trace=e.trace,t.curveNumber=e.trace.index,{index:t.pointNumber,key:s,partOfGroup:t.partOfGroup||!1,group:t.group,traceId:e.key,trace:e.trace,node:t,nodePad:e.nodePad,nodeLineColor:e.nodeLineColor,nodeLineWidth:e.nodeLineWidth,textFont:e.textFont,size:e.horizontal?e.height:e.width,visibleWidth:Math.ceil(a),visibleHeight:o,zoneX:-n,zoneY:-i,zoneWidth:a+2*n,zoneHeight:o+2*i,labelY:e.horizontal?t.dy/2+1:t.dx/2+1,left:t.originalLayer===1,sizeAcross:e.width,forceLayouts:e.forceLayouts,horizontal:e.horizontal,darkBackground:r.getBrightness()<=128,tinyColorHue:sw.tinyRGB(r),tinyColorAlpha:r.getAlpha(),valueFormat:e.valueFormat,valueSuffix:e.valueSuffix,sankey:e.sankey,graph:e.graph,arrangement:e.arrangement,uniqueNodeLabelPathId:[e.guid,e.key,s].join("_"),interactionState:e.interactionState,figure:e}}function ZJ(e){e.attr("transform",function(t){return YJ(t.node.x0.toFixed(3),t.node.y0.toFixed(3))})}function ZZt(e){e.call(ZJ)}function rXe(e,t){e.call(ZZt),t.attr("d",JJ())}function KWe(e){e.attr("width",function(t){return t.node.x1-t.node.x0}).attr("height",function(t){return t.visibleHeight})}function XJ(e){return e.link.width>1||e.linkLineWidth>0}function JWe(e){var t=YJ(e.translateX,e.translateY);return t+(e.horizontal?"matrix(1 0 0 1 0 0)":"matrix(0 1 1 0 0 0)")}function $We(e,t,r){e.on(".basic",null).on("mouseover.basic",function(n){!n.interactionState.dragInProgress&&!n.partOfGroup&&(r.hover(this,n,t),n.interactionState.hovered=[this,n])}).on("mousemove.basic",function(n){!n.interactionState.dragInProgress&&!n.partOfGroup&&(r.follow(this,n),n.interactionState.hovered=[this,n])}).on("mouseout.basic",function(n){!n.interactionState.dragInProgress&&!n.partOfGroup&&(r.unhover(this,n,t),n.interactionState.hovered=!1)}).on("click.basic",function(n){n.interactionState.hovered&&(r.unhover(this,n,t),n.interactionState.hovered=!1),!n.interactionState.dragInProgress&&!n.partOfGroup&&r.select(this,n,t)})}function YZt(e,t,r,n){var i=R5.behavior.drag().origin(function(a){return{x:a.node.x0+a.visibleWidth/2,y:a.node.y0+a.visibleHeight/2}}).on("dragstart",function(a){if(a.arrangement!=="fixed"&&(p1.ensureSingle(n._fullLayout._infolayer,"g","dragcover",function(s){n._fullLayout._dragCover=s}),p1.raiseToTop(this),a.interactionState.dragInProgress=a.node,QWe(a.node),a.interactionState.hovered&&(r.nodeEvents.unhover.apply(0,a.interactionState.hovered),a.interactionState.hovered=!1),a.arrangement==="snap")){var o=a.traceId+"|"+a.key;a.forceLayouts[o]?a.forceLayouts[o].alpha(1):KZt(e,o,a,n),JZt(e,t,a,o,n)}}).on("drag",function(a){if(a.arrangement!=="fixed"){var o=R5.event.x,s=R5.event.y;a.arrangement==="snap"?(a.node.x0=o-a.visibleWidth/2,a.node.x1=o+a.visibleWidth/2,a.node.y0=s-a.visibleHeight/2,a.node.y1=s+a.visibleHeight/2):(a.arrangement==="freeform"&&(a.node.x0=o-a.visibleWidth/2,a.node.x1=o+a.visibleWidth/2),s=Math.max(0,Math.min(a.size-a.visibleHeight/2,s)),a.node.y0=s-a.visibleHeight/2,a.node.y1=s+a.visibleHeight/2),QWe(a.node),a.arrangement!=="snap"&&(a.sankey.update(a.graph),rXe(e.filter(nXe(a)),t))}}).on("dragend",function(a){if(a.arrangement!=="fixed"){a.interactionState.dragInProgress=!1;for(var o=0;o<a.node.childrenNodes.length;o++)a.node.childrenNodes[o].x=a.node.x,a.node.childrenNodes[o].y=a.node.y;a.arrangement!=="snap"&&iXe(a,n)}});e.on(".drag",null).call(i)}function KZt(e,t,r,n){eYt(r.graph.nodes);var i=r.graph.nodes.filter(function(a){return a.originalX===r.node.originalX}).filter(function(a){return!a.partOfGroup});r.forceLayouts[t]=ZWe.forceSimulation(i).alphaDecay(0).force("collide",ZWe.forceCollide().radius(function(a){return a.dy/2+r.nodePad/2}).strength(1).iterations(Nu.forceIterations)).force("constrain",$Zt(e,t,i,r,n)).stop()}function JZt(e,t,r,n,i){window.requestAnimationFrame(function a(){var o;for(o=0;o<Nu.forceTicksPerFrame;o++)r.forceLayouts[n].tick();var s=r.graph.nodes;if(tYt(s),r.sankey.update(r.graph),rXe(e.filter(nXe(r)),t),r.forceLayouts[n].alpha()>0)window.requestAnimationFrame(a);else{var l=r.node.originalX;r.node.x0=l-r.visibleWidth/2,r.node.x1=l+r.visibleWidth/2,iXe(r,i)}})}function $Zt(e,t,r,n){return function(){for(var a=0,o=0;o<r.length;o++){var s=r[o];s===n.interactionState.dragInProgress?(s.x=s.lastDraggedX,s.y=s.lastDraggedY):(s.vx=(s.originalX-s.x)/Nu.forceTicksPerFrame,s.y=Math.min(n.size-s.dy/2,Math.max(s.dy/2,s.y))),a=Math.max(a,Math.abs(s.vx),Math.abs(s.vy))}!n.interactionState.dragInProgress&&a<.1&&n.forceLayouts[t].alpha()>0&&n.forceLayouts[t].alpha(0)}}function iXe(e,t){for(var r=[],n=[],i=0;i<e.graph.nodes.length;i++){var a=(e.graph.nodes[i].x0+e.graph.nodes[i].x1)/2,o=(e.graph.nodes[i].y0+e.graph.nodes[i].y1)/2;r.push(a/e.figure.width),n.push(o/e.figure.height)}NZt.call("_guiRestyle",t,{"node.x":[r],"node.y":[n]},e.trace.index).then(function(){t._fullLayout._dragCover&&t._fullLayout._dragCover.remove()})}function QZt(e){var t=[],r;for(r=0;r<e.length;r++)e[r].originalX=(e[r].x0+e[r].x1)/2,e[r].originalY=(e[r].y0+e[r].y1)/2,t.indexOf(e[r].originalX)===-1&&t.push(e[r].originalX);for(t.sort(function(n,i){return n-i}),r=0;r<e.length;r++)e[r].originalLayerIndex=t.indexOf(e[r].originalX),e[r].originalLayer=e[r].originalLayerIndex/(t.length-1)}function QWe(e){e.lastDraggedX=e.x0+e.dx/2,e.lastDraggedY=e.y0+e.dy/2}function nXe(e){return function(t){return t.node.originalX===e.node.originalX}}function eYt(e){for(var t=0;t<e.length;t++)e[t].y=(e[t].y0+e[t].y1)/2,e[t].x=(e[t].x0+e[t].x1)/2}function tYt(e){for(var t=0;t<e.length;t++)e[t].y0=e[t].y-e[t].dy/2,e[t].y1=e[t].y0+e[t].dy,e[t].x0=e[t].x-e[t].dx/2,e[t].x1=e[t].x0+e[t].dx}aXe.exports=function(e,t,r,n,i){var a=e._context.staticPlot,o=!1;p1.ensureSingle(e._fullLayout._infolayer,"g","first-render",function(){o=!0});var s=e._fullLayout._dragCover,l=r.filter(function(b){return eXe(b).trace.visible}).map(HZt.bind(null,n)),u=t.selectAll("."+Nu.cn.sankey).data(l,$k);u.exit().remove(),u.enter().append("g").classed(Nu.cn.sankey,!0).style("box-sizing","content-box").style("position","absolute").style("left",0).style("shape-rendering","geometricPrecision").style("pointer-events",a?"none":"auto").attr("transform",JWe),u.each(function(b,p){e._fullData[p]._sankey=b;var k="bgsankey-"+b.trace.uid+"-"+p;p1.ensureSingle(e._fullLayout._draggers,"rect",k),e._fullData[p]._bgRect=R5.select("."+k),e._fullData[p]._bgRect.style("pointer-events",a?"none":"all").attr("width",b.width).attr("height",b.height).attr("x",b.translateX).attr("y",b.translateY).classed("bgsankey",!0).style({fill:"transparent","stroke-width":0})}),u.transition().ease(Nu.ease).duration(Nu.duration).attr("transform",JWe);var c=u.selectAll("."+Nu.cn.sankeyLinks).data(E7,$k);c.enter().append("g").classed(Nu.cn.sankeyLinks,!0).style("fill","none");var f=c.selectAll("."+Nu.cn.sankeyLink).data(function(b){var p=b.graph.links;return p.filter(function(k){return k.value}).map(jZt.bind(null,b))},$k);f.enter().append("path").classed(Nu.cn.sankeyLink,!0).call($We,u,i.linkEvents),f.style("stroke",function(b){return XJ(b)?sw.tinyRGB(D5(b.linkLineColor)):b.tinyColorHue}).style("stroke-opacity",function(b){return XJ(b)?sw.opacity(b.linkLineColor):b.tinyColorAlpha}).style("fill",function(b){return b.tinyColorHue}).style("fill-opacity",function(b){return b.tinyColorAlpha}).style("stroke-width",function(b){return XJ(b)?b.linkLineWidth:1}).attr("d",JJ()),f.style("opacity",function(){return e._context.staticPlot||o||s?1:0}).transition().ease(Nu.ease).duration(Nu.duration).style("opacity",1),f.exit().transition().ease(Nu.ease).duration(Nu.duration).style("opacity",0).remove();var h=u.selectAll("."+Nu.cn.sankeyNodeSet).data(E7,$k);h.enter().append("g").classed(Nu.cn.sankeyNodeSet,!0),h.style("cursor",function(b){switch(b.arrangement){case"fixed":return"default";case"perpendicular":return"ns-resize";default:return"move"}});var d=h.selectAll("."+Nu.cn.sankeyNode).data(function(b){var p=b.graph.nodes;return QZt(p),p.map(XZt.bind(null,b))},$k);d.enter().append("g").classed(Nu.cn.sankeyNode,!0).call(ZJ).style("opacity",function(b){return(e._context.staticPlot||o)&&!b.partOfGroup?1:0}),d.call($We,u,i.nodeEvents).call(YZt,f,i,e),d.transition().ease(Nu.ease).duration(Nu.duration).call(ZJ).style("opacity",function(b){return b.partOfGroup?0:1}),d.exit().transition().ease(Nu.ease).duration(Nu.duration).style("opacity",0).remove();var v=d.selectAll("."+Nu.cn.nodeRect).data(E7);v.enter().append("rect").classed(Nu.cn.nodeRect,!0).call(KWe),v.style("stroke-width",function(b){return b.nodeLineWidth}).style("stroke",function(b){return sw.tinyRGB(D5(b.nodeLineColor))}).style("stroke-opacity",function(b){return sw.opacity(b.nodeLineColor)}).style("fill",function(b){return b.tinyColorHue}).style("fill-opacity",function(b){return b.tinyColorAlpha}),v.transition().ease(Nu.ease).duration(Nu.duration).call(KWe);var _=d.selectAll("."+Nu.cn.nodeLabel).data(E7);_.enter().append("text").classed(Nu.cn.nodeLabel,!0).style("cursor","default"),_.attr("data-notex",1).text(function(b){return b.node.label}).each(function(b){var p=R5.select(this);qZt.font(p,b.textFont),YWe.convertToTspans(p,e)}).attr("text-anchor",function(b){return b.horizontal&&b.left?"end":"start"}).attr("transform",function(b){var p=R5.select(this),k=YWe.lineCount(p),E=b.textFont.size*((k-1)*VZt-UZt),S=b.nodeLineWidth/2+GZt,L=((b.horizontal?b.visibleHeight:b.visibleWidth)-E)/2;b.horizontal&&(b.left?S=-S:S+=b.visibleWidth);var x=b.horizontal?"":"scale(-1,1)"+BZt(90);return YJ(b.horizontal?S:L,b.horizontal?L:S)+x}),_.transition().ease(Nu.ease).duration(Nu.duration)}});var e$=ye((r2r,pXe)=>{"use strict";var Zv=Oa(),QJ=Dr(),k7=QJ.numberFormat,rYt=oXe(),F5=vf(),iYt=ka(),Sx=WJ().cn,Qk=QJ._;function sXe(e){return e!==""}function z5(e,t){return e.filter(function(r){return r.key===t.traceId})}function lXe(e,t){Zv.select(e).select("path").style("fill-opacity",t),Zv.select(e).select("rect").style("fill-opacity",t)}function uXe(e){Zv.select(e).select("text.name").style("fill","black")}function cXe(e){return function(t){return e.node.sourceLinks.indexOf(t.link)!==-1||e.node.targetLinks.indexOf(t.link)!==-1}}function fXe(e){return function(t){return t.node.sourceLinks.indexOf(e.link)!==-1||t.node.targetLinks.indexOf(e.link)!==-1}}function hXe(e,t,r){t&&r&&z5(r,t).selectAll("."+Sx.sankeyLink).filter(cXe(t)).call(dXe.bind(0,t,r,!1))}function $J(e,t,r){t&&r&&z5(r,t).selectAll("."+Sx.sankeyLink).filter(cXe(t)).call(vXe.bind(0,t,r,!1))}function dXe(e,t,r,n){n.style("fill",function(i){if(!i.link.concentrationscale)return i.tinyColorHoverHue}).style("fill-opacity",function(i){if(!i.link.concentrationscale)return i.tinyColorHoverAlpha}),n.each(function(i){var a=i.link.label;a!==""&&z5(t,e).selectAll("."+Sx.sankeyLink).filter(function(o){return o.link.label===a}).style("fill",function(o){if(!o.link.concentrationscale)return o.tinyColorHoverHue}).style("fill-opacity",function(o){if(!o.link.concentrationscale)return o.tinyColorHoverAlpha})}),r&&z5(t,e).selectAll("."+Sx.sankeyNode).filter(fXe(e)).call(hXe)}function vXe(e,t,r,n){n.style("fill",function(i){return i.tinyColorHue}).style("fill-opacity",function(i){return i.tinyColorAlpha}),n.each(function(i){var a=i.link.label;a!==""&&z5(t,e).selectAll("."+Sx.sankeyLink).filter(function(o){return o.link.label===a}).style("fill",function(o){return o.tinyColorHue}).style("fill-opacity",function(o){return o.tinyColorAlpha})}),r&&z5(t,e).selectAll(Sx.sankeyNode).filter(fXe(e)).call($J)}function kf(e,t){var r=e.hoverlabel||{},n=QJ.nestedProperty(r,t).get();return Array.isArray(n)?!1:n}pXe.exports=function(t,r){for(var n=t._fullLayout,i=n._paper,a=n._size,o=0;o<t._fullData.length;o++)if(t._fullData[o].visible&&t._fullData[o].type===Sx.sankey&&!t._fullData[o]._viewInitial){var s=t._fullData[o].node;t._fullData[o]._viewInitial={node:{groups:s.groups.slice(),x:s.x.slice(),y:s.y.slice()}}}var l=function(L,x){var C=x.link;C.originalEvent=Zv.event,t._hoverdata=[C],F5.click(t,{target:!0})},u=function(L,x,C){t._fullLayout.hovermode!==!1&&(Zv.select(L).call(dXe.bind(0,x,C,!0)),x.link.trace.link.hoverinfo!=="skip"&&(x.link.fullData=x.link.trace,t.emit("plotly_hover",{event:Zv.event,points:[x.link]})))},c=Qk(t,"source:")+" ",f=Qk(t,"target:")+" ",h=Qk(t,"concentration:")+" ",d=Qk(t,"incoming flow count:")+" ",v=Qk(t,"outgoing flow count:")+" ",_=function(L,x){if(t._fullLayout.hovermode===!1)return;var C=x.link.trace.link;if(C.hoverinfo==="none"||C.hoverinfo==="skip")return;var M=[];function g(Z){var j,N;Z.circular?(j=(Z.circularPathData.leftInnerExtent+Z.circularPathData.rightInnerExtent)/2,N=Z.circularPathData.verticalFullExtent):(j=(Z.source.x1+Z.target.x0)/2,N=(Z.y0+Z.y1)/2);var H=[j,N];return Z.trace.orientation==="v"&&H.reverse(),H[0]+=x.parent.translateX,H[1]+=x.parent.translateY,H}for(var P=0,T=0;T<x.flow.links.length;T++){var z=x.flow.links[T];if(!(t._fullLayout.hovermode==="closest"&&x.link.pointNumber!==z.pointNumber)){x.link.pointNumber===z.pointNumber&&(P=T),z.fullData=z.trace,C=x.link.trace.link;var O=g(z),V={valueLabel:k7(x.valueFormat)(z.value)+x.valueSuffix};M.push({x:O[0],y:O[1],name:V.valueLabel,text:[z.label||"",c+z.source.label,f+z.target.label,z.concentrationscale?h+k7("%0.2f")(z.flow.labelConcentration):""].filter(sXe).join("<br>"),color:kf(C,"bgcolor")||iYt.addOpacity(z.color,1),borderColor:kf(C,"bordercolor"),fontFamily:kf(C,"font.family"),fontSize:kf(C,"font.size"),fontColor:kf(C,"font.color"),fontWeight:kf(C,"font.weight"),fontStyle:kf(C,"font.style"),fontVariant:kf(C,"font.variant"),fontTextcase:kf(C,"font.textcase"),fontLineposition:kf(C,"font.lineposition"),fontShadow:kf(C,"font.shadow"),nameLength:kf(C,"namelength"),textAlign:kf(C,"align"),idealAlign:Zv.event.x<O[0]?"right":"left",hovertemplate:C.hovertemplate,hovertemplateLabels:V,eventData:[z]})}}var G=F5.loneHover(M,{container:n._hoverlayer.node(),outerContainer:n._paper.node(),gd:t,anchorIndex:P});G.each(function(){var Z=this;x.link.concentrationscale||lXe(Z,.65),uXe(Z)})},b=function(L,x,C){t._fullLayout.hovermode!==!1&&(Zv.select(L).call(vXe.bind(0,x,C,!0)),x.link.trace.link.hoverinfo!=="skip"&&(x.link.fullData=x.link.trace,t.emit("plotly_unhover",{event:Zv.event,points:[x.link]})),F5.loneUnhover(n._hoverlayer.node()))},p=function(L,x,C){var M=x.node;M.originalEvent=Zv.event,t._hoverdata=[M],Zv.select(L).call($J,x,C),F5.click(t,{target:!0})},k=function(L,x,C){t._fullLayout.hovermode!==!1&&(Zv.select(L).call(hXe,x,C),x.node.trace.node.hoverinfo!=="skip"&&(x.node.fullData=x.node.trace,t.emit("plotly_hover",{event:Zv.event,points:[x.node]})))},E=function(L,x){if(t._fullLayout.hovermode!==!1){var C=x.node.trace.node;if(!(C.hoverinfo==="none"||C.hoverinfo==="skip")){var M=Zv.select(L).select("."+Sx.nodeRect),g=t._fullLayout._paperdiv.node().getBoundingClientRect(),P=M.node().getBoundingClientRect(),T=P.left-2-g.left,z=P.right+2-g.left,O=P.top+P.height/4-g.top,V={valueLabel:k7(x.valueFormat)(x.node.value)+x.valueSuffix};x.node.fullData=x.node.trace,t._fullLayout._calcInverseTransform(t);var G=t._fullLayout._invScaleX,Z=t._fullLayout._invScaleY,j=F5.loneHover({x0:G*T,x1:G*z,y:Z*O,name:k7(x.valueFormat)(x.node.value)+x.valueSuffix,text:[x.node.label,d+x.node.targetLinks.length,v+x.node.sourceLinks.length].filter(sXe).join("<br>"),color:kf(C,"bgcolor")||x.tinyColorHue,borderColor:kf(C,"bordercolor"),fontFamily:kf(C,"font.family"),fontSize:kf(C,"font.size"),fontColor:kf(C,"font.color"),fontWeight:kf(C,"font.weight"),fontStyle:kf(C,"font.style"),fontVariant:kf(C,"font.variant"),fontTextcase:kf(C,"font.textcase"),fontLineposition:kf(C,"font.lineposition"),fontShadow:kf(C,"font.shadow"),nameLength:kf(C,"namelength"),textAlign:kf(C,"align"),idealAlign:"left",hovertemplate:C.hovertemplate,hovertemplateLabels:V,eventData:[x.node]},{container:n._hoverlayer.node(),outerContainer:n._paper.node(),gd:t});lXe(j,.85),uXe(j)}}},S=function(L,x,C){t._fullLayout.hovermode!==!1&&(Zv.select(L).call($J,x,C),x.node.trace.node.hoverinfo!=="skip"&&(x.node.fullData=x.node.trace,t.emit("plotly_unhover",{event:Zv.event,points:[x.node]})),F5.loneUnhover(n._hoverlayer.node()))};rYt(t,i,r,{width:a.w,height:a.h,margin:{t:a.t,r:a.r,b:a.b,l:a.l}},{linkEvents:{hover:u,follow:_,unhover:b,select:l},nodeEvents:{hover:k,follow:E,unhover:S,select:p}})}});var gXe=ye(lw=>{"use strict";var nYt=mc().overrideAll,aYt=Id().getModuleCalcData,oYt=e$(),sYt=B1(),lYt=Sg(),uYt=yv(),cYt=Of().prepSelect,t$=Dr(),fYt=qa(),C7="sankey";lw.name=C7;lw.baseLayoutAttrOverrides=nYt({hoverlabel:sYt.hoverlabel},"plot","nested");lw.plot=function(e){var t=aYt(e.calcdata,C7)[0];oYt(e,t),lw.updateFx(e)};lw.clean=function(e,t,r,n){var i=n._has&&n._has(C7),a=t._has&&t._has(C7);i&&!a&&(n._paperdiv.selectAll(".sankey").remove(),n._paperdiv.selectAll(".bgsankey").remove())};lw.updateFx=function(e){for(var t=0;t<e._fullData.length;t++)hYt(e,t)};function hYt(e,t){var r=e._fullData[t],n=e._fullLayout,i=n.dragmode,a=n.dragmode==="pan"?"move":"crosshair",o=r._bgRect;if(o&&!(i==="pan"||i==="zoom")){lYt(o,a);var s={_id:"x",c2p:t$.identity,_offset:r._sankey.translateX,_length:r._sankey.width},l={_id:"y",c2p:t$.identity,_offset:r._sankey.translateY,_length:r._sankey.height},u={gd:e,element:o.node(),plotinfo:{id:t,xaxis:s,yaxis:l,fillRangeItems:t$.noop},subplot:t,xaxes:[s],yaxes:[l],doneFnCompleted:function(c){var f=e._fullData[t],h,d=f.node.groups.slice(),v=[];function _(E){for(var S=f._sankey.graph.nodes,L=0;L<S.length;L++)if(S[L].pointNumber===E)return S[L]}for(var b=0;b<c.length;b++){var p=_(c[b].pointNumber);if(p)if(p.group){for(var k=0;k<p.childrenNodes.length;k++)v.push(p.childrenNodes[k].pointNumber);d[p.pointNumber-f.node._count]=!1}else v.push(p.pointNumber)}h=d.filter(Boolean).concat([v]),fYt.call("_guiRestyle",e,{"node.groups":[h]},t)}};u.prepFn=function(c,f,h){cYt(c,f,h,u,i)},uYt.init(u)}}});var yXe=ye((n2r,mXe)=>{"use strict";mXe.exports=function(t,r){for(var n=t.cd,i=[],a=n[0].trace,o=a._sankey.graph.nodes,s=0;s<o.length;s++){var l=o[s];if(!l.partOfGroup){var u=[(l.x0+l.x1)/2,(l.y0+l.y1)/2];a.orientation==="v"&&u.reverse(),r&&r.contains(u,!1,s,t)&&i.push({pointNumber:l.pointNumber})}}return i}});var xXe=ye((a2r,_Xe)=>{"use strict";_Xe.exports={attributes:GJ(),supplyDefaults:TWe(),calc:kWe(),plot:e$(),moduleType:"trace",name:"sankey",basePlotModule:gXe(),selectPoints:yXe(),categories:["noOpacity"],meta:{}}});var wXe=ye((o2r,bXe)=>{"use strict";bXe.exports=xXe()});var AXe=ye(O5=>{"use strict";var TXe=Mc();O5.name="indicator";O5.plot=function(e,t,r,n){TXe.plotBasePlot(O5.name,e,t,r,n)};O5.clean=function(e,t,r,n){TXe.cleanBasePlot(O5.name,e,t,r,n)}});var i$=ye((l2r,LXe)=>{"use strict";var Mx=Ao().extendFlat,MXe=Ao().extendDeep,dYt=mc().overrideAll,EXe=ec(),kXe=Lh(),vYt=Cc().attributes,Bf=Rd(),pYt=vl().templatedArray,L7=XT(),SXe=df().descriptionOnlyNumbers,r$=EXe({editType:"plot",colorEditType:"plot"}),eC={color:{valType:"color",editType:"plot"},line:{color:{valType:"color",dflt:kXe.defaultLine,editType:"plot"},width:{valType:"number",min:0,dflt:0,editType:"plot"},editType:"calc"},thickness:{valType:"number",min:0,max:1,dflt:1,editType:"plot"},editType:"calc"},CXe={valType:"info_array",items:[{valType:"number",editType:"plot"},{valType:"number",editType:"plot"}],editType:"plot"},gYt=pYt("step",MXe({},eC,{range:CXe}));LXe.exports={mode:{valType:"flaglist",editType:"calc",flags:["number","delta","gauge"],dflt:"number"},value:{valType:"number",editType:"calc",anim:!0},align:{valType:"enumerated",values:["left","center","right"],editType:"plot"},domain:vYt({name:"indicator",trace:!0,editType:"calc"}),title:{text:{valType:"string",editType:"plot"},align:{valType:"enumerated",values:["left","center","right"],editType:"plot"},font:Mx({},r$,{}),editType:"plot"},number:{valueformat:{valType:"string",dflt:"",editType:"plot",description:SXe("value")},font:Mx({},r$,{}),prefix:{valType:"string",dflt:"",editType:"plot"},suffix:{valType:"string",dflt:"",editType:"plot"},editType:"plot"},delta:{reference:{valType:"number",editType:"calc"},position:{valType:"enumerated",values:["top","bottom","left","right"],dflt:"bottom",editType:"plot"},relative:{valType:"boolean",editType:"plot",dflt:!1},valueformat:{valType:"string",editType:"plot",description:SXe("value")},increasing:{symbol:{valType:"string",dflt:L7.INCREASING.SYMBOL,editType:"plot"},color:{valType:"color",dflt:L7.INCREASING.COLOR,editType:"plot"},editType:"plot"},decreasing:{symbol:{valType:"string",dflt:L7.DECREASING.SYMBOL,editType:"plot"},color:{valType:"color",dflt:L7.DECREASING.COLOR,editType:"plot"},editType:"plot"},font:Mx({},r$,{}),prefix:{valType:"string",dflt:"",editType:"plot"},suffix:{valType:"string",dflt:"",editType:"plot"},editType:"calc"},gauge:{shape:{valType:"enumerated",editType:"plot",dflt:"angular",values:["angular","bullet"]},bar:MXe({},eC,{color:{dflt:"green"}}),bgcolor:{valType:"color",editType:"plot"},bordercolor:{valType:"color",dflt:kXe.defaultLine,editType:"plot"},borderwidth:{valType:"number",min:0,dflt:1,editType:"plot"},axis:dYt({range:CXe,visible:Mx({},Bf.visible,{dflt:!0}),tickmode:Bf.minor.tickmode,nticks:Bf.nticks,tick0:Bf.tick0,dtick:Bf.dtick,tickvals:Bf.tickvals,ticktext:Bf.ticktext,ticks:Mx({},Bf.ticks,{dflt:"outside"}),ticklen:Bf.ticklen,tickwidth:Bf.tickwidth,tickcolor:Bf.tickcolor,ticklabelstep:Bf.ticklabelstep,showticklabels:Bf.showticklabels,labelalias:Bf.labelalias,tickfont:EXe({}),tickangle:Bf.tickangle,tickformat:Bf.tickformat,tickformatstops:Bf.tickformatstops,tickprefix:Bf.tickprefix,showtickprefix:Bf.showtickprefix,ticksuffix:Bf.ticksuffix,showticksuffix:Bf.showticksuffix,separatethousands:Bf.separatethousands,exponentformat:Bf.exponentformat,minexponent:Bf.minexponent,showexponent:Bf.showexponent,editType:"plot"},"plot"),steps:gYt,threshold:{line:{color:Mx({},eC.line.color,{}),width:Mx({},eC.line.width,{dflt:1}),editType:"plot"},thickness:Mx({},eC.thickness,{dflt:.85}),value:{valType:"number",editType:"calc",dflt:!1},editType:"plot"},editType:"plot"}}});var n$=ye((u2r,PXe)=>{"use strict";PXe.exports={defaultNumberFontSize:80,bulletNumberDomainSize:.25,bulletPadding:.025,innerRadius:.75,valueThickness:.5,titlePadding:5,horizontalPadding:10}});var DXe=ye((c2r,RXe)=>{"use strict";var ty=Dr(),I7=i$(),mYt=Cc().defaults,IXe=vl(),yYt=Yd(),P7=n$(),_Yt=bb(),xYt=M3(),bYt=e_(),wYt=t_();function TYt(e,t,r,n){function i(x,C){return ty.coerce(e,t,I7,x,C)}mYt(t,n,i),i("mode"),t._hasNumber=t.mode.indexOf("number")!==-1,t._hasDelta=t.mode.indexOf("delta")!==-1,t._hasGauge=t.mode.indexOf("gauge")!==-1;var a=i("value");t._range=[0,typeof a=="number"?1.5*a:1];var o=new Array(2),s;if(t._hasNumber){i("number.valueformat");var l=ty.extendFlat({},n.font);l.size=void 0,ty.coerceFont(i,"number.font",l),t.number.font.size===void 0&&(t.number.font.size=P7.defaultNumberFontSize,o[0]=!0),i("number.prefix"),i("number.suffix"),s=t.number.font.size}var u;if(t._hasDelta){var c=ty.extendFlat({},n.font);c.size=void 0,ty.coerceFont(i,"delta.font",c),t.delta.font.size===void 0&&(t.delta.font.size=(t._hasNumber?.5:1)*(s||P7.defaultNumberFontSize),o[1]=!0),i("delta.reference",t.value),i("delta.relative"),i("delta.valueformat",t.delta.relative?"2%":""),i("delta.increasing.symbol"),i("delta.increasing.color"),i("delta.decreasing.symbol"),i("delta.decreasing.color"),i("delta.position"),i("delta.prefix"),i("delta.suffix"),u=t.delta.font.size}t._scaleNumbers=(!t._hasNumber||o[0])&&(!t._hasDelta||o[1])||!1;var f=ty.extendFlat({},n.font);f.size=.25*(s||u||P7.defaultNumberFontSize),ty.coerceFont(i,"title.font",f),i("title.text");var h,d,v,_;function b(x,C){return ty.coerce(h,d,I7.gauge,x,C)}function p(x,C){return ty.coerce(v,_,I7.gauge.axis,x,C)}if(t._hasGauge){h=e.gauge,h||(h={}),d=IXe.newContainer(t,"gauge"),b("shape");var k=t._isBullet=t.gauge.shape==="bullet";k||i("title.align","center");var E=t._isAngular=t.gauge.shape==="angular";E||i("align","center"),b("bgcolor",n.paper_bgcolor),b("borderwidth"),b("bordercolor"),b("bar.color"),b("bar.line.color"),b("bar.line.width");var S=P7.valueThickness*(t.gauge.shape==="bullet"?.5:1);b("bar.thickness",S),yYt(h,d,{name:"steps",handleItemDefaults:AYt}),b("threshold.value"),b("threshold.thickness"),b("threshold.line.width"),b("threshold.line.color"),v={},h&&(v=h.axis||{}),_=IXe.newContainer(d,"axis"),p("visible"),t._range=p("range",t._range);var L={font:n.font,noAutotickangles:!0,outerTicks:!0,noTicklabelshift:!0,noTicklabelstandoff:!0};_Yt(v,_,p,"linear"),wYt(v,_,p,"linear",L),bYt(v,_,p,"linear",L),xYt(v,_,p,L)}else i("title.align","center"),i("align","center"),t._isAngular=t._isBullet=!1;t._length=null}function AYt(e,t){function r(n,i){return ty.coerce(e,t,I7.gauge.steps,n,i)}r("color"),r("line.color"),r("line.width"),r("range"),r("thickness")}RXe.exports={supplyDefaults:TYt}});var zXe=ye((f2r,FXe)=>{"use strict";function SYt(e,t){var r=[],n=t.value;typeof t._lastValue!="number"&&(t._lastValue=t.value);var i=t._lastValue,a=i;return t._hasDelta&&typeof t.delta.reference=="number"&&(a=t.delta.reference),r[0]={y:n,lastY:i,delta:n-a,relativeDelta:(n-a)/a},r}FXe.exports={calc:SYt}});var VXe=ye((h2r,UXe)=>{"use strict";var dw=Oa(),MYt=(F2(),ob(D2)).interpolate,OXe=(F2(),ob(D2)).interpolateNumber,Ex=Dr(),EYt=Ex.strScale,rC=Ex.strTranslate,kYt=Ex.rad2deg,CYt=$h().MID_SHIFT,hw=So(),uw=n$(),D7=ru(),sv=ho(),LYt=i4(),PYt=uI(),IYt=Rd(),q5=ka(),a$={left:"start",center:"middle",right:"end"},cw={left:0,center:.5,right:1},qXe=/[yzafpnµmkMGTPEZY]/;function iC(e){return e&&e.duration>0}UXe.exports=function(t,r,n,i){var a=t._fullLayout,o;iC(n)&&i&&(o=i()),Ex.makeTraceGroups(a._indicatorlayer,r,"trace").each(function(s){var l=s[0],u=l.trace,c=dw.select(this),f=u._hasGauge,h=u._isAngular,d=u._isBullet,v=u.domain,_={w:a._size.w*(v.x[1]-v.x[0]),h:a._size.h*(v.y[1]-v.y[0]),l:a._size.l+a._size.w*v.x[0],r:a._size.r+a._size.w*(1-v.x[1]),t:a._size.t+a._size.h*(1-v.y[1]),b:a._size.b+a._size.h*v.y[0]},b=_.l+_.w/2,p=_.t+_.h/2,k=Math.min(_.w/2,_.h),E=uw.innerRadius*k,S,L,x,C=u.align||"center";if(L=p,!f)S=_.l+cw[C]*_.w,x=function(j){return BXe(j,_.w,_.h)};else if(h&&(S=b,L=p+k/2,x=function(j){return OYt(j,.9*E)}),d){var M=uw.bulletPadding,g=1-uw.bulletNumberDomainSize+M;S=_.l+(g+(1-g)*cw[C])*_.w,x=function(j){return BXe(j,(uw.bulletNumberDomainSize-M)*_.w,_.h)}}FYt(t,c,s,{numbersX:S,numbersY:L,numbersScaler:x,transitionOpts:n,onComplete:o});var P,T;f&&(P={range:u.gauge.axis.range,color:u.gauge.bgcolor,line:{color:u.gauge.bordercolor,width:0},thickness:1},T={range:u.gauge.axis.range,color:"rgba(0, 0, 0, 0)",line:{color:u.gauge.bordercolor,width:u.gauge.borderwidth},thickness:1});var z=c.selectAll("g.angular").data(h?s:[]);z.exit().remove();var O=c.selectAll("g.angularaxis").data(h?s:[]);O.exit().remove(),h&&DYt(t,c,s,{radius:k,innerRadius:E,gauge:z,layer:O,size:_,gaugeBg:P,gaugeOutline:T,transitionOpts:n,onComplete:o});var V=c.selectAll("g.bullet").data(d?s:[]);V.exit().remove();var G=c.selectAll("g.bulletaxis").data(d?s:[]);G.exit().remove(),d&&RYt(t,c,s,{gauge:V,layer:G,size:_,gaugeBg:P,gaugeOutline:T,transitionOpts:n,onComplete:o});var Z=c.selectAll("text.title").data(s);Z.exit().remove(),Z.enter().append("text").classed("title",!0),Z.attr("text-anchor",function(){return d?a$.right:a$[u.title.align]}).text(u.title.text).call(hw.font,u.title.font).call(D7.convertToTspans,t),Z.attr("transform",function(){var j=_.l+_.w*cw[u.title.align],N,H=uw.titlePadding,te=hw.bBox(Z.node());if(f){if(h)if(u.gauge.axis.visible){var oe=hw.bBox(O.node());N=oe.top-H-te.bottom}else N=_.t+_.h/2-k/2-te.bottom-H;d&&(N=L-(te.top+te.bottom)/2,j=_.l-uw.bulletPadding*_.w)}else N=u._numbersTop-H-te.bottom;return rC(j,N)})})};function RYt(e,t,r,n){var i=r[0].trace,a=n.gauge,o=n.layer,s=n.gaugeBg,l=n.gaugeOutline,u=n.size,c=i.domain,f=n.transitionOpts,h=n.onComplete,d,v,_,b,p;a.enter().append("g").classed("bullet",!0),a.attr("transform",rC(u.l,u.t)),o.enter().append("g").classed("bulletaxis",!0).classed("crisp",!0),o.selectAll("g.xbulletaxistick,path,text").remove();var k=u.h,E=i.gauge.bar.thickness*k,S=c.x[0],L=c.x[0]+(c.x[1]-c.x[0])*(i._hasNumber||i._hasDelta?1-uw.bulletNumberDomainSize:1);d=tC(e,i.gauge.axis),d._id="xbulletaxis",d.domain=[S,L],d.setScale(),v=sv.calcTicks(d),_=sv.makeTransTickFn(d),b=sv.getTickSigns(d)[2],p=u.t+u.h,d.visible&&(sv.drawTicks(e,d,{vals:d.ticks==="inside"?sv.clipEnds(d,v):v,layer:o,path:sv.makeTickPath(d,p,b),transFn:_}),sv.drawLabels(e,d,{vals:v,layer:o,transFn:_,labelFns:sv.makeLabelFns(d,p)}));function x(O){O.attr("width",function(V){return Math.max(0,d.c2p(V.range[1])-d.c2p(V.range[0]))}).attr("x",function(V){return d.c2p(V.range[0])}).attr("y",function(V){return .5*(1-V.thickness)*k}).attr("height",function(V){return V.thickness*k})}var C=[s].concat(i.gauge.steps),M=a.selectAll("g.bg-bullet").data(C);M.enter().append("g").classed("bg-bullet",!0).append("rect"),M.select("rect").call(x).call(fw),M.exit().remove();var g=a.selectAll("g.value-bullet").data([i.gauge.bar]);g.enter().append("g").classed("value-bullet",!0).append("rect"),g.select("rect").attr("height",E).attr("y",(k-E)/2).call(fw),iC(f)?g.select("rect").transition().duration(f.duration).ease(f.easing).each("end",function(){h&&h()}).each("interrupt",function(){h&&h()}).attr("width",Math.max(0,d.c2p(Math.min(i.gauge.axis.range[1],r[0].y)))):g.select("rect").attr("width",typeof r[0].y=="number"?Math.max(0,d.c2p(Math.min(i.gauge.axis.range[1],r[0].y))):0),g.exit().remove();var P=r.filter(function(){return i.gauge.threshold.value||i.gauge.threshold.value===0}),T=a.selectAll("g.threshold-bullet").data(P);T.enter().append("g").classed("threshold-bullet",!0).append("line"),T.select("line").attr("x1",d.c2p(i.gauge.threshold.value)).attr("x2",d.c2p(i.gauge.threshold.value)).attr("y1",(1-i.gauge.threshold.thickness)/2*k).attr("y2",(1-(1-i.gauge.threshold.thickness)/2)*k).call(q5.stroke,i.gauge.threshold.line.color).style("stroke-width",i.gauge.threshold.line.width),T.exit().remove();var z=a.selectAll("g.gauge-outline").data([l]);z.enter().append("g").classed("gauge-outline",!0).append("rect"),z.select("rect").call(x).call(fw),z.exit().remove()}function DYt(e,t,r,n){var i=r[0].trace,a=n.size,o=n.radius,s=n.innerRadius,l=n.gaugeBg,u=n.gaugeOutline,c=[a.l+a.w/2,a.t+a.h/2+o/2],f=n.gauge,h=n.layer,d=n.transitionOpts,v=n.onComplete,_=Math.PI/2;function b(_e){var Ee=i.gauge.axis.range[0],Ce=i.gauge.axis.range[1],me=(_e-Ee)/(Ce-Ee)*Math.PI-_;return me<-_?-_:me>_?_:me}function p(_e){return dw.svg.arc().innerRadius((s+o)/2-_e/2*(o-s)).outerRadius((s+o)/2+_e/2*(o-s)).startAngle(-_)}function k(_e){_e.attr("d",function(Ee){return p(Ee.thickness).startAngle(b(Ee.range[0])).endAngle(b(Ee.range[1]))()})}var E,S,L,x;f.enter().append("g").classed("angular",!0),f.attr("transform",rC(c[0],c[1])),h.enter().append("g").classed("angularaxis",!0).classed("crisp",!0),h.selectAll("g.xangularaxistick,path,text").remove(),E=tC(e,i.gauge.axis),E.type="linear",E.range=i.gauge.axis.range,E._id="xangularaxis",E.ticklabeloverflow="allow",E.setScale();var C=function(_e){return(E.range[0]-_e.x)/(E.range[1]-E.range[0])*Math.PI+Math.PI},M={},g=sv.makeLabelFns(E,0),P=g.labelStandoff;M.xFn=function(_e){var Ee=C(_e);return Math.cos(Ee)*P},M.yFn=function(_e){var Ee=C(_e),Ce=Math.sin(Ee)>0?.2:1;return-Math.sin(Ee)*(P+_e.fontSize*Ce)+Math.abs(Math.cos(Ee))*(_e.fontSize*CYt)},M.anchorFn=function(_e){var Ee=C(_e),Ce=Math.cos(Ee);return Math.abs(Ce)<.1?"middle":Ce>0?"start":"end"},M.heightFn=function(_e,Ee,Ce){var me=C(_e);return-.5*(1+Math.sin(me))*Ce};var T=function(_e){return rC(c[0]+o*Math.cos(_e),c[1]-o*Math.sin(_e))};L=function(_e){return T(C(_e))};var z=function(_e){var Ee=C(_e);return T(Ee)+"rotate("+-kYt(Ee)+")"};if(S=sv.calcTicks(E),x=sv.getTickSigns(E)[2],E.visible){x=E.ticks==="inside"?-1:1;var O=(E.linewidth||1)/2;sv.drawTicks(e,E,{vals:S,layer:h,path:"M"+x*O+",0h"+x*E.ticklen,transFn:z}),sv.drawLabels(e,E,{vals:S,layer:h,transFn:L,labelFns:M})}var V=[l].concat(i.gauge.steps),G=f.selectAll("g.bg-arc").data(V);G.enter().append("g").classed("bg-arc",!0).append("path"),G.select("path").call(k).call(fw),G.exit().remove();var Z=p(i.gauge.bar.thickness),j=f.selectAll("g.value-arc").data([i.gauge.bar]);j.enter().append("g").classed("value-arc",!0).append("path");var N=j.select("path");iC(d)?(N.transition().duration(d.duration).ease(d.easing).each("end",function(){v&&v()}).each("interrupt",function(){v&&v()}).attrTween("d",zYt(Z,b(r[0].lastY),b(r[0].y))),i._lastValue=r[0].y):N.attr("d",typeof r[0].y=="number"?Z.endAngle(b(r[0].y)):"M0,0Z"),N.call(fw),j.exit().remove(),V=[];var H=i.gauge.threshold.value;(H||H===0)&&V.push({range:[H,H],color:i.gauge.threshold.color,line:{color:i.gauge.threshold.line.color,width:i.gauge.threshold.line.width},thickness:i.gauge.threshold.thickness});var te=f.selectAll("g.threshold-arc").data(V);te.enter().append("g").classed("threshold-arc",!0).append("path"),te.select("path").call(k).call(fw),te.exit().remove();var oe=f.selectAll("g.gauge-outline").data([u]);oe.enter().append("g").classed("gauge-outline",!0).append("path"),oe.select("path").call(k).call(fw),oe.exit().remove()}function FYt(e,t,r,n){var i=r[0].trace,a=n.numbersX,o=n.numbersY,s=i.align||"center",l=a$[s],u=n.transitionOpts,c=n.onComplete,f=Ex.ensureSingle(t,"g","numbers"),h,d,v,_=[];i._hasNumber&&_.push("number"),i._hasDelta&&(_.push("delta"),i.delta.position==="left"&&_.reverse());var b=f.selectAll("text").data(_);b.enter().append("text"),b.attr("text-anchor",function(){return l}).attr("class",function(T){return T}).attr("x",null).attr("y",null).attr("dx",null).attr("dy",null),b.exit().remove();function p(T,z,O,V){if(T.match("s")&&O>=0!=V>=0&&!z(O).slice(-1).match(qXe)&&!z(V).slice(-1).match(qXe)){var G=T.slice().replace("s","f").replace(/\d+/,function(j){return parseInt(j)-1}),Z=tC(e,{tickformat:G});return function(j){return Math.abs(j)<1?sv.tickText(Z,j).text:z(j)}}else return z}function k(){var T=tC(e,{tickformat:i.number.valueformat},i._range);T.setScale(),sv.prepTicks(T);var z=function(j){return sv.tickText(T,j).text},O=i.number.suffix,V=i.number.prefix,G=f.select("text.number");function Z(){var j=typeof r[0].y=="number"?V+z(r[0].y)+O:"-";G.text(j).call(hw.font,i.number.font).call(D7.convertToTspans,e)}return iC(u)?G.transition().duration(u.duration).ease(u.easing).each("end",function(){Z(),c&&c()}).each("interrupt",function(){Z(),c&&c()}).attrTween("text",function(){var j=dw.select(this),N=OXe(r[0].lastY,r[0].y);i._lastValue=r[0].y;var H=p(i.number.valueformat,z,r[0].lastY,r[0].y);return function(te){j.text(V+H(N(te))+O)}}):Z(),h=NXe(V+z(r[0].y)+O,i.number.font,l,e),G}function E(){var T=tC(e,{tickformat:i.delta.valueformat},i._range);T.setScale(),sv.prepTicks(T);var z=function(te){return sv.tickText(T,te).text},O=i.delta.suffix,V=i.delta.prefix,G=function(te){var oe=i.delta.relative?te.relativeDelta:te.delta;return oe},Z=function(te,oe){return te===0||typeof te!="number"||isNaN(te)?"-":(te>0?i.delta.increasing.symbol:i.delta.decreasing.symbol)+V+oe(te)+O},j=function(te){return te.delta>=0?i.delta.increasing.color:i.delta.decreasing.color};i._deltaLastValue===void 0&&(i._deltaLastValue=G(r[0]));var N=f.select("text.delta");N.call(hw.font,i.delta.font).call(q5.fill,j({delta:i._deltaLastValue}));function H(){N.text(Z(G(r[0]),z)).call(q5.fill,j(r[0])).call(D7.convertToTspans,e)}return iC(u)?N.transition().duration(u.duration).ease(u.easing).tween("text",function(){var te=dw.select(this),oe=G(r[0]),_e=i._deltaLastValue,Ee=p(i.delta.valueformat,z,_e,oe),Ce=OXe(_e,oe);return i._deltaLastValue=oe,function(me){te.text(Z(Ce(me),Ee)),te.call(q5.fill,j({delta:Ce(me)}))}}).each("end",function(){H(),c&&c()}).each("interrupt",function(){H(),c&&c()}):H(),d=NXe(Z(G(r[0]),z),i.delta.font,l,e),N}var S=i.mode+i.align,L;if(i._hasDelta&&(L=E(),S+=i.delta.position+i.delta.font.size+i.delta.font.family+i.delta.valueformat,S+=i.delta.increasing.symbol+i.delta.decreasing.symbol,v=d),i._hasNumber&&(k(),S+=i.number.font.size+i.number.font.family+i.number.valueformat+i.number.suffix+i.number.prefix,v=h),i._hasDelta&&i._hasNumber){var x=[(h.left+h.right)/2,(h.top+h.bottom)/2],C=[(d.left+d.right)/2,(d.top+d.bottom)/2],M,g,P=.75*i.delta.font.size;i.delta.position==="left"&&(M=R7(i,"deltaPos",0,-1*(h.width*cw[i.align]+d.width*(1-cw[i.align])+P),S,Math.min),g=x[1]-C[1],v={width:h.width+d.width+P,height:Math.max(h.height,d.height),left:d.left+M,right:h.right,top:Math.min(h.top,d.top+g),bottom:Math.max(h.bottom,d.bottom+g)}),i.delta.position==="right"&&(M=R7(i,"deltaPos",0,h.width*(1-cw[i.align])+d.width*cw[i.align]+P,S,Math.max),g=x[1]-C[1],v={width:h.width+d.width+P,height:Math.max(h.height,d.height),left:h.left,right:d.right+M,top:Math.min(h.top,d.top+g),bottom:Math.max(h.bottom,d.bottom+g)}),i.delta.position==="bottom"&&(M=null,g=d.height,v={width:Math.max(h.width,d.width),height:h.height+d.height,left:Math.min(h.left,d.left),right:Math.max(h.right,d.right),top:h.bottom-h.height,bottom:h.bottom+d.height}),i.delta.position==="top"&&(M=null,g=h.top,v={width:Math.max(h.width,d.width),height:h.height+d.height,left:Math.min(h.left,d.left),right:Math.max(h.right,d.right),top:h.bottom-h.height-d.height,bottom:h.bottom}),L.attr({dx:M,dy:g})}(i._hasNumber||i._hasDelta)&&f.attr("transform",function(){var T=n.numbersScaler(v);S+=T[2];var z=R7(i,"numbersScale",1,T[0],S,Math.min),O;i._scaleNumbers||(z=1),i._isAngular?O=o-z*v.bottom:O=o-z*(v.top+v.bottom)/2,i._numbersTop=z*v.top+O;var V=v[s];s==="center"&&(V=(v.left+v.right)/2);var G=a-z*V;return G=R7(i,"numbersTranslate",0,G,S,Math.max),rC(G,O)+EYt(z)})}function fw(e){e.each(function(t){q5.stroke(dw.select(this),t.line.color)}).each(function(t){q5.fill(dw.select(this),t.color)}).style("stroke-width",function(t){return t.line.width})}function zYt(e,t,r){return function(){var n=MYt(t,r);return function(i){return e.endAngle(n(i))()}}}function tC(e,t,r){var n=e._fullLayout,i=Ex.extendFlat({type:"linear",ticks:"outside",range:r,showline:!0},t),a={type:"linear",_id:"x"+t._id},o={letter:"x",font:n.font,noAutotickangles:!0,noHover:!0,noTickson:!0};function s(l,u){return Ex.coerce(i,a,IYt,l,u)}return LYt(i,a,s,o,n),PYt(i,a,s,o),a}function BXe(e,t,r){var n=Math.min(t/e.width,r/e.height);return[n,e,t+"x"+r]}function OYt(e,t){var r=Math.sqrt(e.width/2*(e.width/2)+e.height*e.height),n=t/r;return[n,e,t]}function NXe(e,t,r,n){var i=document.createElementNS("http://www.w3.org/2000/svg","text"),a=dw.select(i);return a.text(e).attr("x",0).attr("y",0).attr("text-anchor",r).attr("data-unformatted",e).call(D7.convertToTspans,n).call(hw.font,t),hw.bBox(a.node())}function R7(e,t,r,n,i,a){var o="_cache"+t;e[o]&&e[o].key===i||(e[o]={key:i,value:r});var s=Ex.aggNums(a,null,[e[o].value,n],2);return e[o].value=s,s}});var HXe=ye((d2r,GXe)=>{"use strict";GXe.exports={moduleType:"trace",name:"indicator",basePlotModule:AXe(),categories:["svg","noOpacity","noHover"],animatable:!0,attributes:i$(),supplyDefaults:DXe().supplyDefaults,calc:zXe().calc,plot:VXe(),meta:{}}});var WXe=ye((v2r,jXe)=>{"use strict";jXe.exports=HXe()});var o$=ye((g2r,KXe)=>{"use strict";var XXe=Ub(),F7=Ao().extendFlat,qYt=mc().overrideAll,ZXe=ec(),BYt=Cc().attributes,YXe=df().descriptionOnlyNumbers,p2r=KXe.exports=qYt({domain:BYt({name:"table",trace:!0}),columnwidth:{valType:"number",arrayOk:!0,dflt:null},columnorder:{valType:"data_array"},header:{values:{valType:"data_array",dflt:[]},format:{valType:"data_array",dflt:[],description:YXe("cell value")},prefix:{valType:"string",arrayOk:!0,dflt:null},suffix:{valType:"string",arrayOk:!0,dflt:null},height:{valType:"number",dflt:28},align:F7({},XXe.align,{arrayOk:!0}),line:{width:{valType:"number",arrayOk:!0,dflt:1},color:{valType:"color",arrayOk:!0,dflt:"grey"}},fill:{color:{valType:"color",arrayOk:!0,dflt:"white"}},font:F7({},ZXe({arrayOk:!0}))},cells:{values:{valType:"data_array",dflt:[]},format:{valType:"data_array",dflt:[],description:YXe("cell value")},prefix:{valType:"string",arrayOk:!0,dflt:null},suffix:{valType:"string",arrayOk:!0,dflt:null},height:{valType:"number",dflt:20},align:F7({},XXe.align,{arrayOk:!0}),line:{width:{valType:"number",arrayOk:!0,dflt:1},color:{valType:"color",arrayOk:!0,dflt:"grey"}},fill:{color:{valType:"color",arrayOk:!0,dflt:"white"}},font:F7({},ZXe({arrayOk:!0}))}},"calc","from-root")});var $Xe=ye((m2r,JXe)=>{"use strict";var s$=Dr(),NYt=o$(),UYt=Cc().defaults;function VYt(e,t){for(var r=e.columnorder||[],n=e.header.values.length,i=r.slice(0,n),a=i.slice().sort(function(l,u){return l-u}),o=i.map(function(l){return a.indexOf(l)}),s=o.length;s<n;s++)o.push(s);t("columnorder",o)}JXe.exports=function(t,r,n,i){function a(o,s){return s$.coerce(t,r,NYt,o,s)}UYt(r,i,a),a("columnwidth"),a("header.values"),a("header.format"),a("header.align"),a("header.prefix"),a("header.suffix"),a("header.height"),a("header.line.width"),a("header.line.color"),a("header.fill.color"),s$.coerceFont(a,"header.font",i.font),VYt(r,a),a("cells.values"),a("cells.format"),a("cells.align"),a("cells.prefix"),a("cells.suffix"),a("cells.height"),a("cells.line.width"),a("cells.line.color"),a("cells.fill.color"),s$.coerceFont(a,"cells.font",i.font),r._length=null}});var eZe=ye((y2r,QXe)=>{"use strict";var GYt=Jm().wrap;QXe.exports=function(){return GYt({})}});var l$=ye((_2r,tZe)=>{"use strict";tZe.exports={cellPad:8,columnExtentOffset:10,columnTitleOffset:28,emptyHeaderHeight:16,latexCheck:/^\$.*\$$/,goldenRatio:1.618,lineBreaker:"<br>",maxDimensionCount:60,overdrag:45,releaseTransitionDuration:120,releaseTransitionEase:"cubic-out",scrollbarCaptureWidth:18,scrollbarHideDelay:1e3,scrollbarHideDuration:1e3,scrollbarOffset:5,scrollbarWidth:8,transitionDuration:100,transitionEase:"cubic-out",uplift:5,wrapSpacer:" ",wrapSplitCharacter:" ",cn:{table:"table",tableControlView:"table-control-view",scrollBackground:"scroll-background",yColumn:"y-column",columnBlock:"column-block",scrollAreaClip:"scroll-area-clip",scrollAreaClipRect:"scroll-area-clip-rect",columnBoundary:"column-boundary",columnBoundaryClippath:"column-boundary-clippath",columnBoundaryRect:"column-boundary-rect",columnCells:"column-cells",columnCell:"column-cell",cellRect:"cell-rect",cellText:"cell-text",cellTextHolder:"cell-text-holder",scrollbarKit:"scrollbar-kit",scrollbar:"scrollbar",scrollbarSlider:"scrollbar-slider",scrollbarGlyph:"scrollbar-glyph",scrollbarCaptureZone:"scrollbar-capture-zone"}}});var cZe=ye((x2r,uZe)=>{"use strict";var rZe=l$(),c$=Ao().extendFlat,HYt=Eo(),jYt=vv().isTypedArray,z7=vv().isArrayOrTypedArray;uZe.exports=function(t,r){var n=u$(r.cells.values),i=function(g){return g.slice(r.header.values.length,g.length)},a=u$(r.header.values);a.length&&!a[0].length&&(a[0]=[""],a=u$(a));var o=a.concat(i(n).map(function(){return lZe((a[0]||[""]).length)})),s=r.domain,l=Math.floor(t._fullLayout._size.w*(s.x[1]-s.x[0])),u=Math.floor(t._fullLayout._size.h*(s.y[1]-s.y[0])),c=r.header.values.length?o[0].map(function(){return r.header.height}):[rZe.emptyHeaderHeight],f=n.length?n[0].map(function(){return r.cells.height}):[],h=c.reduce(iZe,0),d=u-h,v=d+rZe.uplift,_=oZe(f,v),b=oZe(c,h),p=aZe(b,[]),k=aZe(_,p),E={},S=r._fullInput.columnorder;z7(S)&&(S=Array.from(S)),S=S.concat(i(n.map(function(g,P){return P})));var L=o.map(function(g,P){var T=z7(r.columnwidth)?r.columnwidth[Math.min(P,r.columnwidth.length-1)]:r.columnwidth;return HYt(T)?Number(T):1}),x=L.reduce(iZe,0);L=L.map(function(g){return g/x*l});var C=Math.max(f$(r.header.line.width),f$(r.cells.line.width)),M={key:r.uid+t._context.staticPlot,translateX:s.x[0]*t._fullLayout._size.w,translateY:t._fullLayout._size.h*(1-s.y[1]),size:t._fullLayout._size,width:l,maxLineWidth:C,height:u,columnOrder:S,groupHeight:u,rowBlocks:k,headerRowBlocks:p,scrollY:0,cells:c$({},r.cells,{values:n}),headerCells:c$({},r.header,{values:o}),gdColumns:o.map(function(g){return g[0]}),gdColumnsOriginalOrder:o.map(function(g){return g[0]}),prevPages:[0,0],scrollbarState:{scrollbarScrollInProgress:!1},columns:o.map(function(g,P){var T=E[g];E[g]=(T||0)+1;var z=g+"__"+E[g];return{key:z,label:g,specIndex:P,xIndex:S[P],xScale:nZe,x:void 0,calcdata:void 0,columnWidth:L[P]}})};return M.columns.forEach(function(g){g.calcdata=M,g.x=nZe(g)}),M};function f$(e){if(z7(e)){for(var t=0,r=0;r<e.length;r++)t=Math.max(t,f$(e[r]));return t}return e}function iZe(e,t){return e+t}function u$(e){var t=e.slice(),r=1/0,n=0,i;for(i=0;i<t.length;i++)jYt(t[i])?t[i]=Array.from(t[i]):z7(t[i])||(t[i]=[t[i]]),r=Math.min(r,t[i].length),n=Math.max(n,t[i].length);if(r!==n)for(i=0;i<t.length;i++){var a=n-t[i].length;a&&(t[i]=t[i].concat(lZe(a)))}return t}function lZe(e){for(var t=new Array(e),r=0;r<e;r++)t[r]="";return t}function nZe(e){return e.calcdata.columns.reduce(function(t,r){return r.xIndex<e.xIndex?t+r.columnWidth:t},0)}function aZe(e,t){var r=Object.keys(e);return r.map(function(n){return c$({},e[n],{auxiliaryBlocks:t})})}function oZe(e,t){for(var r={},n,i=0,a=0,o=sZe(),s=0,l=0,u=0;u<e.length;u++)n=e[u],o.rows.push({rowIndex:u,rowHeight:n}),a+=n,(a>=t||u===e.length-1)&&(r[i]=o,o.key=l++,o.firstRowIndex=s,o.lastRowIndex=u,o=sZe(),i+=a,s=u+1,a=0);return r}function sZe(){return{firstRowIndex:null,lastRowIndex:null,rows:[]}}});var fZe=ye(h$=>{"use strict";var O7=Ao().extendFlat;h$.splitToPanels=function(e){var t=[0,0],r=O7({},e,{key:"header",type:"header",page:0,prevPages:t,currentRepaint:[null,null],dragHandle:!0,values:e.calcdata.headerCells.values[e.specIndex],rowBlocks:e.calcdata.headerRowBlocks,calcdata:O7({},e.calcdata,{cells:e.calcdata.headerCells})}),n=O7({},e,{key:"cells1",type:"cells",page:0,prevPages:t,currentRepaint:[null,null],dragHandle:!1,values:e.calcdata.cells.values[e.specIndex],rowBlocks:e.calcdata.rowBlocks}),i=O7({},e,{key:"cells2",type:"cells",page:1,prevPages:t,currentRepaint:[null,null],dragHandle:!1,values:e.calcdata.cells.values[e.specIndex],rowBlocks:e.calcdata.rowBlocks});return[n,i,r]};h$.splitToCells=function(e){var t=WYt(e);return(e.values||[]).slice(t[0],t[1]).map(function(r,n){var i=typeof r=="string"&&r.match(/[<$&> ]/)?"_keybuster_"+Math.random():"";return{keyWithinBlock:n+i,key:t[0]+n,column:e,calcdata:e.calcdata,page:e.page,rowBlocks:e.rowBlocks,value:r}})};function WYt(e){var t=e.rowBlocks[e.page],r=t?t.rows[0].rowIndex:0,n=t?r+t.rows.length:0;return[r,n]}});var w$=ye((w2r,wZe)=>{"use strict";var Ya=l$(),tf=Oa(),d$=Dr(),XYt=d$.numberFormat,Uu=Jm(),v$=So(),ZYt=ru(),YYt=Dr().raiseToTop,og=Dr().strTranslate,KYt=Dr().cancelTransition,JYt=cZe(),yZe=fZe(),hZe=ka();wZe.exports=function(t,r){var n=!t._context.staticPlot,i=t._fullLayout._paper.selectAll("."+Ya.cn.table).data(r.map(function(k){var E=Uu.unwrap(k),S=E.trace;return JYt(t,S)}),Uu.keyFun);i.exit().remove(),i.enter().append("g").classed(Ya.cn.table,!0).attr("overflow","visible").style("box-sizing","content-box").style("position","absolute").style("left",0).style("overflow","visible").style("shape-rendering","crispEdges").style("pointer-events","all"),i.attr("width",function(k){return k.width+k.size.l+k.size.r}).attr("height",function(k){return k.height+k.size.t+k.size.b}).attr("transform",function(k){return og(k.translateX,k.translateY)});var a=i.selectAll("."+Ya.cn.tableControlView).data(Uu.repeat,Uu.keyFun),o=a.enter().append("g").classed(Ya.cn.tableControlView,!0).style("box-sizing","content-box");if(n){var s="onwheel"in document?"wheel":"mousewheel";o.on("mousemove",function(k){a.filter(function(E){return k===E}).call(nC,t)}).on(s,function(k){if(!k.scrollbarState.wheeling){k.scrollbarState.wheeling=!0;var E=k.scrollY+tf.event.deltaY,S=B7(t,a,null,E)(k);S||(tf.event.stopPropagation(),tf.event.preventDefault()),k.scrollbarState.wheeling=!1}}).call(nC,t,!0)}a.attr("transform",function(k){return og(k.size.l,k.size.t)});var l=a.selectAll("."+Ya.cn.scrollBackground).data(Uu.repeat,Uu.keyFun);l.enter().append("rect").classed(Ya.cn.scrollBackground,!0).attr("fill","none"),l.attr("width",function(k){return k.width}).attr("height",function(k){return k.height}),a.each(function(k){v$.setClipUrl(tf.select(this),dZe(t,k),t)});var u=a.selectAll("."+Ya.cn.yColumn).data(function(k){return k.columns},Uu.keyFun);u.enter().append("g").classed(Ya.cn.yColumn,!0),u.exit().remove(),u.attr("transform",function(k){return og(k.x,0)}),n&&u.call(tf.behavior.drag().origin(function(k){var E=tf.select(this);return gZe(E,k,-Ya.uplift),YYt(this),k.calcdata.columnDragInProgress=!0,nC(a.filter(function(S){return k.calcdata.key===S.key}),t),k}).on("drag",function(k){var E=tf.select(this),S=function(C){return(k===C?tf.event.x:C.x)+C.columnWidth/2};k.x=Math.max(-Ya.overdrag,Math.min(k.calcdata.width+Ya.overdrag-k.columnWidth,tf.event.x));var L=_Ze(u).filter(function(C){return C.calcdata.key===k.calcdata.key}),x=L.sort(function(C,M){return S(C)-S(M)});x.forEach(function(C,M){C.xIndex=M,C.x=k===C?C.x:C.xScale(C)}),u.filter(function(C){return k!==C}).transition().ease(Ya.transitionEase).duration(Ya.transitionDuration).attr("transform",function(C){return og(C.x,0)}),E.call(KYt).attr("transform",og(k.x,-Ya.uplift))}).on("dragend",function(k){var E=tf.select(this),S=k.calcdata;k.x=k.xScale(k),k.calcdata.columnDragInProgress=!1,gZe(E,k,0),sKt(t,S,S.columns.map(function(L){return L.xIndex}))})),u.each(function(k){v$.setClipUrl(tf.select(this),vZe(t,k),t)});var c=u.selectAll("."+Ya.cn.columnBlock).data(yZe.splitToPanels,Uu.keyFun);c.enter().append("g").classed(Ya.cn.columnBlock,!0).attr("id",function(k){return k.key}),c.style("cursor",function(k){return k.dragHandle?"ew-resize":k.calcdata.scrollbarState.barWiggleRoom?"ns-resize":"default"});var f=c.filter(lKt),h=c.filter(_$);n&&h.call(tf.behavior.drag().origin(function(k){return tf.event.stopPropagation(),k}).on("drag",B7(t,a,-1)).on("dragend",function(){})),p$(t,a,f,c),p$(t,a,h,c);var d=a.selectAll("."+Ya.cn.scrollAreaClip).data(Uu.repeat,Uu.keyFun);d.enter().append("clipPath").classed(Ya.cn.scrollAreaClip,!0).attr("id",function(k){return dZe(t,k)});var v=d.selectAll("."+Ya.cn.scrollAreaClipRect).data(Uu.repeat,Uu.keyFun);v.enter().append("rect").classed(Ya.cn.scrollAreaClipRect,!0).attr("x",-Ya.overdrag).attr("y",-Ya.uplift).attr("fill","none"),v.attr("width",function(k){return k.width+2*Ya.overdrag}).attr("height",function(k){return k.height+Ya.uplift});var _=u.selectAll("."+Ya.cn.columnBoundary).data(Uu.repeat,Uu.keyFun);_.enter().append("g").classed(Ya.cn.columnBoundary,!0);var b=u.selectAll("."+Ya.cn.columnBoundaryClippath).data(Uu.repeat,Uu.keyFun);b.enter().append("clipPath").classed(Ya.cn.columnBoundaryClippath,!0),b.attr("id",function(k){return vZe(t,k)});var p=b.selectAll("."+Ya.cn.columnBoundaryRect).data(Uu.repeat,Uu.keyFun);p.enter().append("rect").classed(Ya.cn.columnBoundaryRect,!0).attr("fill","none"),p.attr("width",function(k){return k.columnWidth+2*q7(k)}).attr("height",function(k){return k.calcdata.height+2*q7(k)+Ya.uplift}).attr("x",function(k){return-q7(k)}).attr("y",function(k){return-q7(k)}),x$(null,h,a)};function q7(e){return Math.ceil(e.calcdata.maxLineWidth/2)}function dZe(e,t){return"clip"+e._fullLayout._uid+"_scrollAreaBottomClip_"+t.key}function vZe(e,t){return"clip"+e._fullLayout._uid+"_columnBoundaryClippath_"+t.calcdata.key+"_"+t.specIndex}function _Ze(e){return[].concat.apply([],e.map(function(t){return t})).map(function(t){return t.__data__})}function nC(e,t,r){function n(u){var c=u.rowBlocks;return m$(c,c.length-1)+(c.length?N7(c[c.length-1],1/0):1)}var i=e.selectAll("."+Ya.cn.scrollbarKit).data(Uu.repeat,Uu.keyFun);i.enter().append("g").classed(Ya.cn.scrollbarKit,!0).style("shape-rendering","geometricPrecision"),i.each(function(u){var c=u.scrollbarState;c.totalHeight=n(u),c.scrollableAreaHeight=u.groupHeight-g$(u),c.currentlyVisibleHeight=Math.min(c.totalHeight,c.scrollableAreaHeight),c.ratio=c.currentlyVisibleHeight/c.totalHeight,c.barLength=Math.max(c.ratio*c.currentlyVisibleHeight,Ya.goldenRatio*Ya.scrollbarWidth),c.barWiggleRoom=c.currentlyVisibleHeight-c.barLength,c.wiggleRoom=Math.max(0,c.totalHeight-c.scrollableAreaHeight),c.topY=c.barWiggleRoom===0?0:u.scrollY/c.wiggleRoom*c.barWiggleRoom,c.bottomY=c.topY+c.barLength,c.dragMultiplier=c.wiggleRoom/c.barWiggleRoom}).attr("transform",function(u){var c=u.width+Ya.scrollbarWidth/2+Ya.scrollbarOffset;return og(c,g$(u))});var a=i.selectAll("."+Ya.cn.scrollbar).data(Uu.repeat,Uu.keyFun);a.enter().append("g").classed(Ya.cn.scrollbar,!0);var o=a.selectAll("."+Ya.cn.scrollbarSlider).data(Uu.repeat,Uu.keyFun);o.enter().append("g").classed(Ya.cn.scrollbarSlider,!0),o.attr("transform",function(u){return og(0,u.scrollbarState.topY||0)});var s=o.selectAll("."+Ya.cn.scrollbarGlyph).data(Uu.repeat,Uu.keyFun);s.enter().append("line").classed(Ya.cn.scrollbarGlyph,!0).attr("stroke","black").attr("stroke-width",Ya.scrollbarWidth).attr("stroke-linecap","round").attr("y1",Ya.scrollbarWidth/2),s.attr("y2",function(u){return u.scrollbarState.barLength-Ya.scrollbarWidth/2}).attr("stroke-opacity",function(u){return u.columnDragInProgress||!u.scrollbarState.barWiggleRoom||r?0:.4}),s.transition().delay(0).duration(0),s.transition().delay(Ya.scrollbarHideDelay).duration(Ya.scrollbarHideDuration).attr("stroke-opacity",0);var l=a.selectAll("."+Ya.cn.scrollbarCaptureZone).data(Uu.repeat,Uu.keyFun);l.enter().append("line").classed(Ya.cn.scrollbarCaptureZone,!0).attr("stroke","white").attr("stroke-opacity",.01).attr("stroke-width",Ya.scrollbarCaptureWidth).attr("stroke-linecap","butt").attr("y1",0).on("mousedown",function(u){var c=tf.event.y,f=this.getBoundingClientRect(),h=u.scrollbarState,d=c-f.top,v=tf.scale.linear().domain([0,h.scrollableAreaHeight]).range([0,h.totalHeight]).clamp(!0);h.topY<=d&&d<=h.bottomY||B7(t,e,null,v(d-h.barLength/2))(u)}).call(tf.behavior.drag().origin(function(u){return tf.event.stopPropagation(),u.scrollbarState.scrollbarScrollInProgress=!0,u}).on("drag",B7(t,e)).on("dragend",function(){})),l.attr("y2",function(u){return u.scrollbarState.scrollableAreaHeight}),t._context.staticPlot&&(s.remove(),l.remove())}function p$(e,t,r,n){var i=$Yt(r),a=QYt(i);iKt(a);var o=eKt(a);aKt(o);var s=rKt(a),l=tKt(s);nKt(l),xZe(l,t,n,e),b$(a)}function $Yt(e){var t=e.selectAll("."+Ya.cn.columnCells).data(Uu.repeat,Uu.keyFun);return t.enter().append("g").classed(Ya.cn.columnCells,!0),t.exit().remove(),t}function QYt(e){var t=e.selectAll("."+Ya.cn.columnCell).data(yZe.splitToCells,function(r){return r.keyWithinBlock});return t.enter().append("g").classed(Ya.cn.columnCell,!0),t.exit().remove(),t}function eKt(e){var t=e.selectAll("."+Ya.cn.cellRect).data(Uu.repeat,function(r){return r.keyWithinBlock});return t.enter().append("rect").classed(Ya.cn.cellRect,!0),t}function tKt(e){var t=e.selectAll("."+Ya.cn.cellText).data(Uu.repeat,function(r){return r.keyWithinBlock});return t.enter().append("text").classed(Ya.cn.cellText,!0).style("cursor",function(){return"auto"}).on("mousedown",function(){tf.event.stopPropagation()}),t}function rKt(e){var t=e.selectAll("."+Ya.cn.cellTextHolder).data(Uu.repeat,function(r){return r.keyWithinBlock});return t.enter().append("g").classed(Ya.cn.cellTextHolder,!0).style("shape-rendering","geometricPrecision"),t}function iKt(e){e.each(function(t,r){var n=t.calcdata.cells.font,i=t.column.specIndex,a={size:Yv(n.size,i,r),color:Yv(n.color,i,r),family:Yv(n.family,i,r),weight:Yv(n.weight,i,r),style:Yv(n.style,i,r),variant:Yv(n.variant,i,r),textcase:Yv(n.textcase,i,r),lineposition:Yv(n.lineposition,i,r),shadow:Yv(n.shadow,i,r)};t.rowNumber=t.key,t.align=Yv(t.calcdata.cells.align,i,r),t.cellBorderWidth=Yv(t.calcdata.cells.line.width,i,r),t.font=a})}function nKt(e){e.each(function(t){v$.font(tf.select(this),t.font)})}function aKt(e){e.attr("width",function(t){return t.column.columnWidth}).attr("stroke-width",function(t){return t.cellBorderWidth}).each(function(t){var r=tf.select(this);hZe.stroke(r,Yv(t.calcdata.cells.line.color,t.column.specIndex,t.rowNumber)),hZe.fill(r,Yv(t.calcdata.cells.fill.color,t.column.specIndex,t.rowNumber))})}function xZe(e,t,r,n){e.text(function(i){var a=i.column.specIndex,o=i.rowNumber,s=i.value,l=typeof s=="string",u=l&&s.match(/<br>/i),c=!l||u;i.mayHaveMarkup=l&&s.match(/[<&>]/);var f=oKt(s);i.latex=f;var h=f?"":Yv(i.calcdata.cells.prefix,a,o)||"",d=f?"":Yv(i.calcdata.cells.suffix,a,o)||"",v=f?null:Yv(i.calcdata.cells.format,a,o)||null,_=h+(v?XYt(v)(i.value):i.value)+d,b;i.wrappingNeeded=!i.wrapped&&!c&&!f&&(b=pZe(_)),i.cellHeightMayIncrease=u||f||i.mayHaveMarkup||(b===void 0?pZe(_):b),i.needsConvertToTspans=i.mayHaveMarkup||i.wrappingNeeded||i.latex;var p;if(i.wrappingNeeded){var k=Ya.wrapSplitCharacter===" "?_.replace(/<a href=/ig,"<a_href="):_,E=k.split(Ya.wrapSplitCharacter),S=Ya.wrapSplitCharacter===" "?E.map(function(L){return L.replace(/<a_href=/ig,"<a href=")}):E;i.fragments=S.map(function(L){return{text:L,width:null}}),i.fragments.push({fragment:Ya.wrapSpacer,width:null}),p=S.join(Ya.lineBreaker)+Ya.lineBreaker+Ya.wrapSpacer}else delete i.fragments,p=_;return p}).attr("dy",function(i){return i.needsConvertToTspans?0:"0.75em"}).each(function(i){var a=this,o=tf.select(a),s=i.wrappingNeeded?cKt:fKt;i.needsConvertToTspans?ZYt.convertToTspans(o,n,s(r,a,t,n,i)):tf.select(a.parentNode).attr("transform",function(l){return og(bZe(l),Ya.cellPad)}).attr("text-anchor",function(l){return{left:"start",center:"middle",right:"end"}[l.align]})})}function oKt(e){return typeof e=="string"&&e.match(Ya.latexCheck)}function pZe(e){return e.indexOf(Ya.wrapSplitCharacter)!==-1}function sKt(e,t,r){var n=t.gdColumnsOriginalOrder;t.gdColumns.sort(function(i,a){return r[n.indexOf(i)]-r[n.indexOf(a)]}),t.columnorder=r,e.emit("plotly_restyle")}function Yv(e,t,r){if(d$.isArrayOrTypedArray(e)){var n=e[Math.min(t,e.length-1)];return d$.isArrayOrTypedArray(n)?n[Math.min(r,n.length-1)]:n}else return e}function gZe(e,t,r){e.transition().ease(Ya.releaseTransitionEase).duration(Ya.releaseTransitionDuration).attr("transform",og(t.x,r))}function _$(e){return e.type==="cells"}function lKt(e){return e.type==="header"}function g$(e){var t=e.rowBlocks.length?e.rowBlocks[0].auxiliaryBlocks:[];return t.reduce(function(r,n){return r+N7(n,1/0)},0)}function uKt(e,t,r){for(var n=[],i=0,a=0;a<e.length;a++){for(var o=e[a],s=o.rows,l=0,u=0;u<s.length;u++)l+=s[u].rowHeight;o.allRowsHeight=l;var c=i+l,f=t,h=f+r;f<c&&h>i&&n.push(a),i+=l}return n}function x$(e,t,r){var n=_Ze(t)[0];if(n!==void 0){var i=n.rowBlocks,a=n.calcdata,o=m$(i,i.length),s=n.calcdata.groupHeight-g$(n),l=a.scrollY=Math.max(0,Math.min(o-s,a.scrollY)),u=uKt(i,l,s);u.length===1&&(u[0]===i.length-1?u.unshift(u[0]-1):u.push(u[0]+1)),u[0]%2&&u.reverse(),t.each(function(c,f){c.page=u[f],c.scrollY=l}),t.attr("transform",function(c){var f=m$(c.rowBlocks,c.page)-c.scrollY;return og(0,f)}),e&&(mZe(e,r,t,u,n.prevPages,n,0),mZe(e,r,t,u,n.prevPages,n,1),nC(r,e))}}function B7(e,t,r,n){return function(a){var o=a.calcdata?a.calcdata:a,s=t.filter(function(f){return o.key===f.key}),l=r||o.scrollbarState.dragMultiplier,u=o.scrollY;o.scrollY=n===void 0?o.scrollY+l*tf.event.dy:n;var c=s.selectAll("."+Ya.cn.yColumn).selectAll("."+Ya.cn.columnBlock).filter(_$);return x$(e,c,s),o.scrollY===u}}function mZe(e,t,r,n,i,a,o){var s=n[o]!==i[o];s&&(clearTimeout(a.currentRepaint[o]),a.currentRepaint[o]=setTimeout(function(){var l=r.filter(function(u,c){return c===o&&n[c]!==i[c]});p$(e,t,l,r),i[o]=n[o]}))}function cKt(e,t,r,n){return function(){var a=tf.select(t.parentNode);a.each(function(o){var s=o.fragments;a.selectAll("tspan.line").each(function(_,b){s[b].width=this.getComputedTextLength()});var l=s[s.length-1].width,u=s.slice(0,-1),c=[],f,h,d=0,v=o.column.columnWidth-2*Ya.cellPad;for(o.value="";u.length;)f=u.shift(),h=f.width+l,d+h>v&&(o.value+=c.join(Ya.wrapSpacer)+Ya.lineBreaker,c=[],d=0),c.push(f.text),d+=h;d&&(o.value+=c.join(Ya.wrapSpacer)),o.wrapped=!0}),a.selectAll("tspan.line").remove(),xZe(a.select("."+Ya.cn.cellText),r,e,n),tf.select(t.parentNode.parentNode).call(b$)}}function fKt(e,t,r,n,i){return function(){if(!i.settledY){var o=tf.select(t.parentNode),s=y$(i),l=i.key-s.firstRowIndex,u=s.rows[l].rowHeight,c=i.cellHeightMayIncrease?t.parentNode.getBoundingClientRect().height+2*Ya.cellPad:u,f=Math.max(c,u),h=f-s.rows[l].rowHeight;h&&(s.rows[l].rowHeight=f,e.selectAll("."+Ya.cn.columnCell).call(b$),x$(null,e.filter(_$),0),nC(r,n,!0)),o.attr("transform",function(){var d=this,v=d.parentNode,_=v.getBoundingClientRect(),b=tf.select(d.parentNode).select("."+Ya.cn.cellRect).node().getBoundingClientRect(),p=d.transform.baseVal.consolidate(),k=b.top-_.top+(p?p.matrix.f:Ya.cellPad);return og(bZe(i,tf.select(d.parentNode).select("."+Ya.cn.cellTextHolder).node().getBoundingClientRect().width),k)}),i.settledY=!0}}}function bZe(e,t){switch(e.align){case"left":return Ya.cellPad;case"right":return e.column.columnWidth-(t||0)-Ya.cellPad;case"center":return(e.column.columnWidth-(t||0))/2;default:return Ya.cellPad}}function b$(e){e.attr("transform",function(t){var r=t.rowBlocks[0].auxiliaryBlocks.reduce(function(o,s){return o+N7(s,1/0)},0),n=y$(t),i=N7(n,t.key),a=i+r;return og(0,a)}).selectAll("."+Ya.cn.cellRect).attr("height",function(t){return dKt(y$(t),t.key).rowHeight})}function m$(e,t){for(var r=0,n=t-1;n>=0;n--)r+=hKt(e[n]);return r}function N7(e,t){for(var r=0,n=0;n<e.rows.length&&e.rows[n].rowIndex<t;n++)r+=e.rows[n].rowHeight;return r}function hKt(e){var t=e.allRowsHeight;if(t!==void 0)return t;for(var r=0,n=0;n<e.rows.length;n++)r+=e.rows[n].rowHeight;return e.allRowsHeight=r,r}function y$(e){return e.rowBlocks[e.page]}function dKt(e,t){return e.rows[t-e.firstRowIndex]}});var TZe=ye(V7=>{"use strict";var vKt=Id().getModuleCalcData,pKt=w$(),U7="table";V7.name=U7;V7.plot=function(e){var t=vKt(e.calcdata,U7)[0];t.length&&pKt(e,t)};V7.clean=function(e,t,r,n){var i=n._has&&n._has(U7),a=t._has&&t._has(U7);i&&!a&&n._paperdiv.selectAll(".table").remove()}});var SZe=ye((A2r,AZe)=>{"use strict";AZe.exports={attributes:o$(),supplyDefaults:$Xe(),calc:eZe(),plot:w$(),moduleType:"trace",name:"table",basePlotModule:TZe(),categories:["noOpacity"],meta:{}}});var EZe=ye((S2r,MZe)=>{"use strict";MZe.exports=SZe()});var IZe=ye((M2r,PZe)=>{"use strict";var kZe=ec(),CZe=Lh(),T$=Rd(),gKt=df().descriptionWithDates,mKt=mc().overrideAll,LZe=Pd().dash,A$=Ao().extendFlat;PZe.exports={color:{valType:"color",editType:"calc"},smoothing:{valType:"number",dflt:1,min:0,max:1.3,editType:"calc"},title:{text:{valType:"string",dflt:"",editType:"calc"},font:kZe({editType:"calc"}),offset:{valType:"number",dflt:10,editType:"calc"},editType:"calc"},type:{valType:"enumerated",values:["-","linear","date","category"],dflt:"-",editType:"calc"},autotypenumbers:T$.autotypenumbers,autorange:{valType:"enumerated",values:[!0,!1,"reversed"],dflt:!0,editType:"calc"},rangemode:{valType:"enumerated",values:["normal","tozero","nonnegative"],dflt:"normal",editType:"calc"},range:{valType:"info_array",editType:"calc",items:[{valType:"any",editType:"calc"},{valType:"any",editType:"calc"}]},fixedrange:{valType:"boolean",dflt:!1,editType:"calc"},cheatertype:{valType:"enumerated",values:["index","value"],dflt:"value",editType:"calc"},tickmode:{valType:"enumerated",values:["linear","array"],dflt:"array",editType:"calc"},nticks:{valType:"integer",min:0,dflt:0,editType:"calc"},tickvals:{valType:"data_array",editType:"calc"},ticktext:{valType:"data_array",editType:"calc"},showticklabels:{valType:"enumerated",values:["start","end","both","none"],dflt:"start",editType:"calc"},labelalias:A$({},T$.labelalias,{editType:"calc"}),tickfont:kZe({editType:"calc"}),tickangle:{valType:"angle",dflt:"auto",editType:"calc"},tickprefix:{valType:"string",dflt:"",editType:"calc"},showtickprefix:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"calc"},ticksuffix:{valType:"string",dflt:"",editType:"calc"},showticksuffix:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"calc"},showexponent:{valType:"enumerated",values:["all","first","last","none"],dflt:"all",editType:"calc"},exponentformat:{valType:"enumerated",values:["none","e","E","power","SI","B","SI extended"],dflt:"B",editType:"calc"},minexponent:{valType:"number",dflt:3,min:0,editType:"calc"},separatethousands:{valType:"boolean",dflt:!1,editType:"calc"},tickformat:{valType:"string",dflt:"",editType:"calc",description:gKt("tick label")},tickformatstops:mKt(T$.tickformatstops,"calc","from-root"),categoryorder:{valType:"enumerated",values:["trace","category ascending","category descending","array"],dflt:"trace",editType:"calc"},categoryarray:{valType:"data_array",editType:"calc"},labelpadding:{valType:"integer",dflt:10,editType:"calc"},labelprefix:{valType:"string",editType:"calc"},labelsuffix:{valType:"string",dflt:"",editType:"calc"},showline:{valType:"boolean",dflt:!1,editType:"calc"},linecolor:{valType:"color",dflt:CZe.defaultLine,editType:"calc"},linewidth:{valType:"number",min:0,dflt:1,editType:"calc"},gridcolor:{valType:"color",editType:"calc"},gridwidth:{valType:"number",min:0,dflt:1,editType:"calc"},griddash:A$({},LZe,{editType:"calc"}),showgrid:{valType:"boolean",dflt:!0,editType:"calc"},minorgridcount:{valType:"integer",min:0,dflt:0,editType:"calc"},minorgridwidth:{valType:"number",min:0,dflt:1,editType:"calc"},minorgriddash:A$({},LZe,{editType:"calc"}),minorgridcolor:{valType:"color",dflt:CZe.lightLine,editType:"calc"},startline:{valType:"boolean",editType:"calc"},startlinecolor:{valType:"color",editType:"calc"},startlinewidth:{valType:"number",dflt:1,editType:"calc"},endline:{valType:"boolean",editType:"calc"},endlinewidth:{valType:"number",dflt:1,editType:"calc"},endlinecolor:{valType:"color",editType:"calc"},tick0:{valType:"number",min:0,dflt:0,editType:"calc"},dtick:{valType:"number",min:0,dflt:1,editType:"calc"},arraytick0:{valType:"integer",min:0,dflt:0,editType:"calc"},arraydtick:{valType:"integer",min:1,dflt:1,editType:"calc"},editType:"calc"}});var H7=ye((E2r,FZe)=>{"use strict";var yKt=ec(),RZe=IZe(),DZe=Lh(),G7=yKt({editType:"calc"}),_Kt=pf().zorder;G7.family.dflt='"Open Sans", verdana, arial, sans-serif';G7.size.dflt=12;G7.color.dflt=DZe.defaultLine;FZe.exports={carpet:{valType:"string",editType:"calc"},x:{valType:"data_array",editType:"calc+clearAxisTypes"},y:{valType:"data_array",editType:"calc+clearAxisTypes"},a:{valType:"data_array",editType:"calc"},a0:{valType:"number",dflt:0,editType:"calc"},da:{valType:"number",dflt:1,editType:"calc"},b:{valType:"data_array",editType:"calc"},b0:{valType:"number",dflt:0,editType:"calc"},db:{valType:"number",dflt:1,editType:"calc"},cheaterslope:{valType:"number",dflt:1,editType:"calc"},aaxis:RZe,baxis:RZe,font:G7,color:{valType:"color",dflt:DZe.defaultLine,editType:"plot"},zorder:_Kt}});var qZe=ye((k2r,OZe)=>{"use strict";var zZe=Dr().isArray1D;OZe.exports=function(t,r,n){var i=n("x"),a=i&&i.length,o=n("y"),s=o&&o.length;if(!a&&!s)return!1;if(r._cheater=!i,(!a||zZe(i))&&(!s||zZe(o))){var l=a?i.length:1/0;s&&(l=Math.min(l,o.length)),r.a&&r.a.length&&(l=Math.min(l,r.a.length)),r.b&&r.b.length&&(l=Math.min(l,r.b.length)),r._length=l}else r._length=null;return!0}});var UZe=ye((C2r,NZe)=>{"use strict";var xKt=H7(),BZe=ka().addOpacity,bKt=qa(),aC=Dr(),wKt=bb(),TKt=e_(),AKt=t_(),SKt=oI(),MKt=ym(),EKt=R3();NZe.exports=function(t,r,n){var i=n.letter,a=n.font||{},o=xKt[i+"axis"];function s(g,P){return aC.coerce(t,r,o,g,P)}function l(g,P){return aC.coerce2(t,r,o,g,P)}n.name&&(r._name=n.name,r._id=n.name),s("autotypenumbers",n.autotypenumbersDflt);var u=s("type");if(u==="-"&&(n.data&&kKt(r,n.data),r.type==="-"?r.type="linear":u=t.type=r.type),s("smoothing"),s("cheatertype"),s("showticklabels"),s("labelprefix",i+" = "),s("labelsuffix"),s("showtickprefix"),s("showticksuffix"),s("separatethousands"),s("tickformat"),s("exponentformat"),s("minexponent"),s("showexponent"),s("categoryorder"),s("tickmode"),s("tickvals"),s("ticktext"),s("tick0"),s("dtick"),r.tickmode==="array"&&(s("arraytick0"),s("arraydtick")),s("labelpadding"),r._hovertitle=i,u==="date"){var c=bKt.getComponentMethod("calendars","handleDefaults");c(t,r,"calendar",n.calendar)}MKt(r,n.fullLayout),r.c2p=aC.identity;var f=s("color",n.dfltColor),h=f===t.color?f:a.color,d=s("title.text");d&&(aC.coerceFont(s,"title.font",a,{overrideDflt:{size:aC.bigFont(a.size),color:h}}),s("title.offset")),s("tickangle");var v=s("autorange",!r.isValidRange(t.range));v&&s("rangemode"),s("range"),r.cleanRange(),s("fixedrange"),wKt(t,r,s,u),AKt(t,r,s,u,n),TKt(t,r,s,u,n),SKt(t,r,s,{data:n.data,dataAttr:i});var _=l("gridcolor",BZe(f,.3)),b=l("gridwidth"),p=l("griddash"),k=s("showgrid");k||(delete r.gridcolor,delete r.gridwidth,delete r.griddash);var E=l("startlinecolor",f),S=l("startlinewidth",b),L=s("startline",r.showgrid||!!E||!!S);L||(delete r.startlinecolor,delete r.startlinewidth);var x=l("endlinecolor",f),C=l("endlinewidth",b),M=s("endline",r.showgrid||!!x||!!C);return M||(delete r.endlinecolor,delete r.endlinewidth),k?(s("minorgridcount"),s("minorgridwidth",b),s("minorgriddash",p),s("minorgridcolor",BZe(_,.06)),r.minorgridcount||(delete r.minorgridwidth,delete r.minorgriddash,delete r.minorgridcolor)):(delete r.gridcolor,delete r.gridwidth,delete r.griddash),r.showticklabels==="none"&&(delete r.tickfont,delete r.tickangle,delete r.showexponent,delete r.exponentformat,delete r.minexponent,delete r.tickformat,delete r.showticksuffix,delete r.showtickprefix),r.showticksuffix||delete r.ticksuffix,r.showtickprefix||delete r.tickprefix,s("tickmode"),r};function kKt(e,t){if(e.type==="-"){var r=e._id,n=r.charAt(0),i=n+"calendar",a=e[i];e.type=EKt(t,a,{autotypenumbers:e.autotypenumbers})}}});var GZe=ye((L2r,VZe)=>{"use strict";var CKt=UZe(),LKt=vl();VZe.exports=function(t,r,n,i,a){var o=i("a");o||(i("da"),i("a0"));var s=i("b");s||(i("db"),i("b0")),PKt(t,r,n,a)};function PKt(e,t,r,n){var i=["aaxis","baxis"];i.forEach(function(a){var o=a.charAt(0),s=e[a]||{},l=LKt.newContainer(t,a),u={noAutotickangles:!0,noTicklabelshift:!0,noTicklabelstandoff:!0,noTicklabelstep:!0,tickfont:"x",id:o+"axis",letter:o,font:t.font,name:a,data:e[o],calendar:t.calendar,dfltColor:n,bgColor:r.paper_bgcolor,autotypenumbersDflt:r.autotypenumbers,fullLayout:r};CKt(s,l,u),l._categories=l._categories||[],!e[a]&&s.type!=="-"&&(e[a]={type:s.type})})}});var WZe=ye((P2r,jZe)=>{"use strict";var HZe=Dr(),IKt=qZe(),RKt=GZe(),DKt=H7(),FKt=Lh();jZe.exports=function(t,r,n,i){function a(l,u){return HZe.coerce(t,r,DKt,l,u)}r._clipPathId="clip"+r.uid+"carpet";var o=a("color",FKt.defaultLine);if(HZe.coerceFont(a,"font",i.font),a("carpet"),RKt(t,r,i,a,o),!r.a||!r.b){r.visible=!1;return}r.a.length<3&&(r.aaxis.smoothing=0),r.b.length<3&&(r.baxis.smoothing=0);var s=IKt(t,r,a);s||(r.visible=!1),r._cheater&&a("cheaterslope"),a("zorder")}});var S$=ye((I2r,XZe)=>{"use strict";var zKt=Dr().isArrayOrTypedArray;XZe.exports=function(t,r,n){var i;for(zKt(t)?t.length>r.length&&(t=t.slice(0,r.length)):t=[],i=0;i<r.length;i++)t[i]=n(r[i]);return t}});var M$=ye((R2r,ZZe)=>{"use strict";ZZe.exports=function(t,r,n){if(t.length===0)return"";var i,a=[],o=n?3:1;for(i=0;i<t.length;i+=o)a.push(t[i]+","+r[i]),n&&i<t.length-o&&(a.push("C"),a.push([t[i+1]+","+r[i+1],t[i+2]+","+r[i+2]+" "].join(" ")));return a.join(n?"":"L")}});var KZe=ye((D2r,YZe)=>{"use strict";YZe.exports=function(t,r,n,i,a,o){var s=a[0]*t.dpdx(r),l=a[1]*t.dpdy(n),u=1,c=1;if(o){var f=Math.sqrt(a[0]*a[0]+a[1]*a[1]),h=Math.sqrt(o[0]*o[0]+o[1]*o[1]),d=(a[0]*o[0]+a[1]*o[1])/f/h;c=Math.max(0,d)}var v=Math.atan2(l,s)*180/Math.PI;return v<-90?(v+=180,u=-u):v>90&&(v-=180,u=-u),{angle:v,flip:u,p:t.c2p(i,r,n),offsetMultplier:c}}});var nYe=ye((F2r,iYe)=>{"use strict";var Z7=Oa(),j7=So(),W7=S$(),QZe=M$(),oC=KZe(),E$=ru(),Up=Dr(),eYe=Up.strRotate,X7=Up.strTranslate,tYe=$h();iYe.exports=function(t,r,n,i){var a=t._context.staticPlot,o=r.xaxis,s=r.yaxis,l=t._fullLayout,u=l._clips;Up.makeTraceGroups(i,n,"trace").each(function(c){var f=Z7.select(this),h=c[0],d=h.trace,v=d.aaxis,_=d.baxis,b=Up.ensureSingle(f,"g","minorlayer"),p=Up.ensureSingle(f,"g","majorlayer"),k=Up.ensureSingle(f,"g","boundarylayer"),E=Up.ensureSingle(f,"g","labellayer");f.style("opacity",d.opacity),B5(o,s,p,v,"a",v._gridlines,!0,a),B5(o,s,p,_,"b",_._gridlines,!0,a),B5(o,s,b,v,"a",v._minorgridlines,!0,a),B5(o,s,b,_,"b",_._minorgridlines,!0,a),B5(o,s,k,v,"a-boundary",v._boundarylines,a),B5(o,s,k,_,"b-boundary",_._boundarylines,a);var S=JZe(t,o,s,d,h,E,v._labels,"a-label"),L=JZe(t,o,s,d,h,E,_._labels,"b-label");qKt(t,E,d,h,o,s,S,L),OKt(d,h,u,o,s)})};function OKt(e,t,r,n,i){var a,o,s,l,u=r.select("#"+e._clipPathId);u.size()||(u=r.append("clipPath").classed("carpetclip",!0));var c=Up.ensureSingle(u,"path","carpetboundary"),f=t.clipsegments,h=[];for(l=0;l<f.length;l++)a=f[l],o=W7([],a.x,n.c2p),s=W7([],a.y,i.c2p),h.push(QZe(o,s,a.bicubic));var d="M"+h.join("L")+"Z";u.attr("id",e._clipPathId),c.attr("d",d)}function B5(e,t,r,n,i,a,o){var s="const-"+i+"-lines",l=r.selectAll("."+s).data(a);l.enter().append("path").classed(s,!0).style("vector-effect",o?"none":"non-scaling-stroke"),l.each(function(u){var c=u,f=c.x,h=c.y,d=W7([],f,e.c2p),v=W7([],h,t.c2p),_="M"+QZe(d,v,c.smoothing),b=Z7.select(this);b.attr("d",_).style("stroke-width",c.width).style("stroke",c.color).style("stroke-dasharray",j7.dashStyle(c.dash,c.width)).style("fill","none")}),l.exit().remove()}function JZe(e,t,r,n,i,a,o,s){var l=a.selectAll("text."+s).data(o);l.enter().append("text").classed(s,!0);var u=0,c={};return l.each(function(f,h){var d;if(f.axis.tickangle==="auto")d=oC(n,t,r,f.xy,f.dxy);else{var v=(f.axis.tickangle+180)*Math.PI/180;d=oC(n,t,r,f.xy,[Math.cos(v),Math.sin(v)])}h||(c={angle:d.angle,flip:d.flip});var _=(f.endAnchor?-1:1)*d.flip,b=Z7.select(this).attr({"text-anchor":_>0?"start":"end","data-notex":1}).call(j7.font,f.font).text(f.text).call(E$.convertToTspans,e),p=j7.bBox(this);b.attr("transform",X7(d.p[0],d.p[1])+eYe(d.angle)+X7(f.axis.labelpadding*_,p.height*.3)),u=Math.max(u,p.width+f.axis.labelpadding)}),l.exit().remove(),c.maxExtent=u,c}function qKt(e,t,r,n,i,a,o,s){var l,u,c,f,h=Up.aggNums(Math.min,null,r.a),d=Up.aggNums(Math.max,null,r.a),v=Up.aggNums(Math.min,null,r.b),_=Up.aggNums(Math.max,null,r.b);l=.5*(h+d),u=v,c=r.ab2xy(l,u,!0),f=r.dxyda_rough(l,u),o.angle===void 0&&Up.extendFlat(o,oC(r,i,a,c,r.dxydb_rough(l,u))),$Ze(e,t,r,n,c,f,r.aaxis,i,a,o,"a-title"),l=h,u=.5*(v+_),c=r.ab2xy(l,u,!0),f=r.dxydb_rough(l,u),s.angle===void 0&&Up.extendFlat(s,oC(r,i,a,c,r.dxyda_rough(l,u))),$Ze(e,t,r,n,c,f,r.baxis,i,a,s,"b-title")}var rYe=tYe.LINE_SPACING,BKt=(1-tYe.MID_SHIFT)/rYe+1;function $Ze(e,t,r,n,i,a,o,s,l,u,c){var f=[];o.title.text&&f.push(o.title.text);var h=t.selectAll("text."+c).data(f),d=u.maxExtent;h.enter().append("text").classed(c,!0),h.each(function(){var v=oC(r,s,l,i,a);["start","both"].indexOf(o.showticklabels)===-1&&(d=0);var _=o.title.font.size;d+=_+o.title.offset;var b=u.angle+(u.flip<0?180:0),p=(b-v.angle+450)%360,k=p>90&&p<270,E=Z7.select(this);E.text(o.title.text).call(E$.convertToTspans,e),k&&(d=(-E$.lineCount(E)+BKt)*rYe*_-d),E.attr("transform",X7(v.p[0],v.p[1])+eYe(v.angle)+X7(0,d)).attr("text-anchor","middle").call(j7.font,o.title.font)}),h.exit().remove()}});var oYe=ye((z2r,aYe)=>{"use strict";var Y7=Dr().isArrayOrTypedArray;aYe.exports=function(e,t,r){var n,i,a,o,s,l,u=[],c=Y7(e)?e.length:e,f=Y7(t)?t.length:t,h=Y7(e)?e:null,d=Y7(t)?t:null;h&&(a=(h.length-1)/(h[h.length-1]-h[0])/(c-1)),d&&(o=(d.length-1)/(d[d.length-1]-d[0])/(f-1));var v,_=1/0,b=-1/0;for(i=0;i<f;i++)for(u[i]=[],l=d?(d[i]-d[0])*o:i/(f-1),n=0;n<c;n++)s=h?(h[n]-h[0])*a:n/(c-1),v=s-l*r,_=Math.min(v,_),b=Math.max(v,b),u[i][n]=v;var p=1/(b-_),k=-_*p;for(i=0;i<f;i++)for(n=0;n<c;n++)u[i][n]=p*u[i][n]+k;return u}});var cYe=ye((O2r,uYe)=>{"use strict";var sYe=Dr().isArrayOrTypedArray;uYe.exports=function(e){return lYe(e,0)};function lYe(e,t){if(!sYe(e)||t>=10)return null;for(var r=1/0,n=-1/0,i=e.length,a=0;a<i;a++){var o=e[a];if(sYe(o)){var s=lYe(o,t+1);s&&(r=Math.min(s[0],r),n=Math.max(s[1],n))}else r=Math.min(o,r),n=Math.max(o,n)}return[r,n]}});var hYe=ye((q2r,fYe)=>{"use strict";var NKt=ho(),kx=Ao().extendFlat;fYe.exports=function(t,r,n){var i,a,o,s,l,u,c,f,h,d,v,_,b,p,k=t["_"+r],E=t[r+"axis"],S=E._gridlines=[],L=E._minorgridlines=[],x=E._boundarylines=[],C=t["_"+n],M=t[n+"axis"];E.tickmode==="array"&&(E.tickvals=k.slice());var g=t._xctrl,P=t._yctrl,T=g[0].length,z=g.length,O=t._a.length,V=t._b.length;NKt.prepTicks(E),E.tickmode==="array"&&delete E.tickvals;var G=E.smoothing?3:1;function Z(N){var H,te,oe,_e,Ee,Ce,me,ie,Se,Le,Ae,Fe,Pe=[],ge=[],Re={};if(r==="b")for(te=t.b2j(N),oe=Math.floor(Math.max(0,Math.min(V-2,te))),_e=te-oe,Re.length=V,Re.crossLength=O,Re.xy=function(ce){return t.evalxy([],ce,te)},Re.dxy=function(ce,Ze){return t.dxydi([],ce,oe,Ze,_e)},H=0;H<O;H++)Ce=Math.min(O-2,H),me=H-Ce,ie=t.evalxy([],H,te),M.smoothing&&H>0&&(Se=t.dxydi([],H-1,oe,0,_e),Pe.push(Ee[0]+Se[0]/3),ge.push(Ee[1]+Se[1]/3),Le=t.dxydi([],H-1,oe,1,_e),Pe.push(ie[0]-Le[0]/3),ge.push(ie[1]-Le[1]/3)),Pe.push(ie[0]),ge.push(ie[1]),Ee=ie;else for(H=t.a2i(N),Ce=Math.floor(Math.max(0,Math.min(O-2,H))),me=H-Ce,Re.length=O,Re.crossLength=V,Re.xy=function(ce){return t.evalxy([],H,ce)},Re.dxy=function(ce,Ze){return t.dxydj([],Ce,ce,me,Ze)},te=0;te<V;te++)oe=Math.min(V-2,te),_e=te-oe,ie=t.evalxy([],H,te),M.smoothing&&te>0&&(Ae=t.dxydj([],Ce,te-1,me,0),Pe.push(Ee[0]+Ae[0]/3),ge.push(Ee[1]+Ae[1]/3),Fe=t.dxydj([],Ce,te-1,me,1),Pe.push(ie[0]-Fe[0]/3),ge.push(ie[1]-Fe[1]/3)),Pe.push(ie[0]),ge.push(ie[1]),Ee=ie;return Re.axisLetter=r,Re.axis=E,Re.crossAxis=M,Re.value=N,Re.constvar=n,Re.index=f,Re.x=Pe,Re.y=ge,Re.smoothing=M.smoothing,Re}function j(N){var H,te,oe,_e,Ee,Ce=[],me=[],ie={};if(ie.length=k.length,ie.crossLength=C.length,r==="b")for(oe=Math.max(0,Math.min(V-2,N)),Ee=Math.min(1,Math.max(0,N-oe)),ie.xy=function(Se){return t.evalxy([],Se,N)},ie.dxy=function(Se,Le){return t.dxydi([],Se,oe,Le,Ee)},H=0;H<T;H++)Ce[H]=g[N*G][H],me[H]=P[N*G][H];else for(te=Math.max(0,Math.min(O-2,N)),_e=Math.min(1,Math.max(0,N-te)),ie.xy=function(Se){return t.evalxy([],N,Se)},ie.dxy=function(Se,Le){return t.dxydj([],te,Se,_e,Le)},H=0;H<z;H++)Ce[H]=g[H][N*G],me[H]=P[H][N*G];return ie.axisLetter=r,ie.axis=E,ie.crossAxis=M,ie.value=k[N],ie.constvar=n,ie.index=N,ie.x=Ce,ie.y=me,ie.smoothing=M.smoothing,ie}if(E.tickmode==="array"){for(s=5e-15,l=[Math.floor((k.length-1-E.arraytick0)/E.arraydtick*(1+s)),Math.ceil(-E.arraytick0/E.arraydtick/(1+s))].sort(function(N,H){return N-H}),u=l[0]-1,c=l[1]+1,f=u;f<c;f++)a=E.arraytick0+E.arraydtick*f,!(a<0||a>k.length-1)&&S.push(kx(j(a),{color:E.gridcolor,width:E.gridwidth,dash:E.griddash}));for(f=u;f<c;f++)if(o=E.arraytick0+E.arraydtick*f,v=Math.min(o+E.arraydtick,k.length-1),!(o<0||o>k.length-1)&&!(v<0||v>k.length-1))for(_=k[o],b=k[v],i=0;i<E.minorgridcount;i++)p=v-o,!(p<=0)&&(d=_+(b-_)*(i+1)/(E.minorgridcount+1)*(E.arraydtick/p),!(d<k[0]||d>k[k.length-1])&&L.push(kx(Z(d),{color:E.minorgridcolor,width:E.minorgridwidth,dash:E.minorgriddash})));E.startline&&x.push(kx(j(0),{color:E.startlinecolor,width:E.startlinewidth})),E.endline&&x.push(kx(j(k.length-1),{color:E.endlinecolor,width:E.endlinewidth}))}else{for(s=5e-15,l=[Math.floor((k[k.length-1]-E.tick0)/E.dtick*(1+s)),Math.ceil((k[0]-E.tick0)/E.dtick/(1+s))].sort(function(N,H){return N-H}),u=l[0],c=l[1],f=u;f<=c;f++)h=E.tick0+E.dtick*f,S.push(kx(Z(h),{color:E.gridcolor,width:E.gridwidth,dash:E.griddash}));for(f=u-1;f<c+1;f++)for(h=E.tick0+E.dtick*f,i=0;i<E.minorgridcount;i++)d=h+E.dtick*(i+1)/(E.minorgridcount+1),!(d<k[0]||d>k[k.length-1])&&L.push(kx(Z(d),{color:E.minorgridcolor,width:E.minorgridwidth,dash:E.minorgriddash}));E.startline&&x.push(kx(Z(k[0]),{color:E.startlinecolor,width:E.startlinewidth})),E.endline&&x.push(kx(Z(k[k.length-1]),{color:E.endlinecolor,width:E.endlinewidth}))}}});var gYe=ye((B2r,pYe)=>{"use strict";var dYe=ho(),vYe=Ao().extendFlat;pYe.exports=function(t,r){var n,i,a,o,s,l=r._labels=[],u=r._gridlines;for(n=0;n<u.length;n++)s=u[n],["start","both"].indexOf(r.showticklabels)!==-1&&(i=dYe.tickText(r,s.value),vYe(i,{prefix:a,suffix:o,endAnchor:!0,xy:s.xy(0),dxy:s.dxy(0,0),axis:s.axis,length:s.crossAxis.length,font:s.axis.tickfont,isFirst:n===0,isLast:n===u.length-1}),l.push(i)),["end","both"].indexOf(r.showticklabels)!==-1&&(i=dYe.tickText(r,s.value),vYe(i,{endAnchor:!1,xy:s.xy(s.crossLength-1),dxy:s.dxy(s.crossLength-2,1),axis:s.axis,length:s.crossAxis.length,font:s.axis.tickfont,isFirst:n===0,isLast:n===u.length-1}),l.push(i))}});var yYe=ye((N2r,mYe)=>{"use strict";mYe.exports=function(t,r,n,i){var a,o,s,l=[],u=!!n.smoothing,c=!!i.smoothing,f=t[0].length-1,h=t.length-1;for(a=0,o=[],s=[];a<=f;a++)o[a]=t[0][a],s[a]=r[0][a];for(l.push({x:o,y:s,bicubic:u}),a=0,o=[],s=[];a<=h;a++)o[a]=t[a][f],s[a]=r[a][f];for(l.push({x:o,y:s,bicubic:c}),a=f,o=[],s=[];a>=0;a--)o[f-a]=t[h][a],s[f-a]=r[h][a];for(l.push({x:o,y:s,bicubic:u}),a=h,o=[],s=[];a>=0;a--)o[h-a]=t[a][0],s[h-a]=r[a][0];return l.push({x:o,y:s,bicubic:c}),l}});var xYe=ye((U2r,_Ye)=>{"use strict";var UKt=Dr();_Ye.exports=function(t,r,n){var i,a,o,s=[],l=[],u=t[0].length,c=t.length;function f(oe,_e){var Ee=0,Ce,me=0;return oe>0&&(Ce=t[_e][oe-1])!==void 0&&(me++,Ee+=Ce),oe<u-1&&(Ce=t[_e][oe+1])!==void 0&&(me++,Ee+=Ce),_e>0&&(Ce=t[_e-1][oe])!==void 0&&(me++,Ee+=Ce),_e<c-1&&(Ce=t[_e+1][oe])!==void 0&&(me++,Ee+=Ce),Ee/Math.max(1,me)}var h=0;for(i=0;i<u;i++)for(a=0;a<c;a++)t[a][i]===void 0&&(s.push(i),l.push(a),t[a][i]=f(i,a)),h=Math.max(h,Math.abs(t[a][i]));if(!s.length)return t;var d,v,_,b,p,k,E,S,L,x,C,M=1e-5,g=0,P=100,T=0,z=s.length;do{for(g=0,o=0;o<z;o++){i=s[o],a=l[o];var O=0,V=0,G,Z,j,N,H,te;i===0?(H=Math.min(u-1,2),j=r[H],N=r[1],G=t[a][H],Z=t[a][1],V+=Z+(Z-G)*(r[0]-N)/(N-j),O++):i===u-1&&(H=Math.max(0,u-3),j=r[H],N=r[u-2],G=t[a][H],Z=t[a][u-2],V+=Z+(Z-G)*(r[u-1]-N)/(N-j),O++),(i===0||i===u-1)&&a>0&&a<c-1&&(d=n[a+1]-n[a],v=n[a]-n[a-1],V+=(v*t[a+1][i]+d*t[a-1][i])/(v+d),O++),a===0?(te=Math.min(c-1,2),j=n[te],N=n[1],G=t[te][i],Z=t[1][i],V+=Z+(Z-G)*(n[0]-N)/(N-j),O++):a===c-1&&(te=Math.max(0,c-3),j=n[te],N=n[c-2],G=t[te][i],Z=t[c-2][i],V+=Z+(Z-G)*(n[c-1]-N)/(N-j),O++),(a===0||a===c-1)&&i>0&&i<u-1&&(d=r[i+1]-r[i],v=r[i]-r[i-1],V+=(v*t[a][i+1]+d*t[a][i-1])/(v+d),O++),O?V/=O:(_=r[i+1]-r[i],b=r[i]-r[i-1],p=n[a+1]-n[a],k=n[a]-n[a-1],E=_*b*(_+b),S=p*k*(p+k),V=(E*(k*t[a+1][i]+p*t[a-1][i])+S*(b*t[a][i+1]+_*t[a][i-1]))/(S*(b+_)+E*(k+p))),L=V-t[a][i],x=L/h,g+=x*x,C=O?0:.85,t[a][i]+=L*(1+C)}g=Math.sqrt(g)}while(T++<P&&g>M);return UKt.log("Smoother converged to",g,"after",T,"iterations"),t}});var wYe=ye((V2r,bYe)=>{"use strict";bYe.exports={RELATIVE_CULL_TOLERANCE:1e-6}});var SYe=ye((G2r,AYe)=>{"use strict";var TYe=.5;AYe.exports=function(t,r,n,i){var a=t[0]-r[0],o=t[1]-r[1],s=n[0]-r[0],l=n[1]-r[1],u=Math.pow(a*a+o*o,TYe/2),c=Math.pow(s*s+l*l,TYe/2),f=(c*c*a-u*u*s)*i,h=(c*c*o-u*u*l)*i,d=c*(u+c)*3,v=u*(u+c)*3;return[[r[0]+(d&&f/d),r[1]+(d&&h/d)],[r[0]-(v&&f/v),r[1]-(v&&h/v)]]}});var EYe=ye((H2r,MYe)=>{"use strict";var k$=SYe(),K7=Dr().ensureArray;function N5(e,t,r){var n=-.5*r[0]+1.5*t[0],i=-.5*r[1]+1.5*t[1];return[(2*n+e[0])/3,(2*i+e[1])/3]}MYe.exports=function(t,r,n,i,a,o){var s,l,u,c,f,h,d,v,_,b,p=n[0].length,k=n.length,E=a?3*p-2:p,S=o?3*k-2:k;for(t=K7(t,S),r=K7(r,S),u=0;u<S;u++)t[u]=K7(t[u],E),r[u]=K7(r[u],E);for(l=0,c=0;l<k;l++,c+=o?3:1)for(f=t[c],h=r[c],d=n[l],v=i[l],s=0,u=0;s<p;s++,u+=a?3:1)f[u]=d[s],h[u]=v[s];if(a)for(l=0,c=0;l<k;l++,c+=o?3:1){for(s=1,u=3;s<p-1;s++,u+=3)_=k$([n[l][s-1],i[l][s-1]],[n[l][s],i[l][s]],[n[l][s+1],i[l][s+1]],a),t[c][u-1]=_[0][0],r[c][u-1]=_[0][1],t[c][u+1]=_[1][0],r[c][u+1]=_[1][1];b=N5([t[c][0],r[c][0]],[t[c][2],r[c][2]],[t[c][3],r[c][3]]),t[c][1]=b[0],r[c][1]=b[1],b=N5([t[c][E-1],r[c][E-1]],[t[c][E-3],r[c][E-3]],[t[c][E-4],r[c][E-4]]),t[c][E-2]=b[0],r[c][E-2]=b[1]}if(o)for(u=0;u<E;u++){for(c=3;c<S-3;c+=3)_=k$([t[c-3][u],r[c-3][u]],[t[c][u],r[c][u]],[t[c+3][u],r[c+3][u]],o),t[c-1][u]=_[0][0],r[c-1][u]=_[0][1],t[c+1][u]=_[1][0],r[c+1][u]=_[1][1];b=N5([t[0][u],r[0][u]],[t[2][u],r[2][u]],[t[3][u],r[3][u]]),t[1][u]=b[0],r[1][u]=b[1],b=N5([t[S-1][u],r[S-1][u]],[t[S-3][u],r[S-3][u]],[t[S-4][u],r[S-4][u]]),t[S-2][u]=b[0],r[S-2][u]=b[1]}if(a&&o)for(c=1;c<S;c+=(c+1)%3===0?2:1){for(u=3;u<E-3;u+=3)_=k$([t[c][u-3],r[c][u-3]],[t[c][u],r[c][u]],[t[c][u+3],r[c][u+3]],a),t[c][u-1]=.5*(t[c][u-1]+_[0][0]),r[c][u-1]=.5*(r[c][u-1]+_[0][1]),t[c][u+1]=.5*(t[c][u+1]+_[1][0]),r[c][u+1]=.5*(r[c][u+1]+_[1][1]);b=N5([t[c][0],r[c][0]],[t[c][2],r[c][2]],[t[c][3],r[c][3]]),t[c][1]=.5*(t[c][1]+b[0]),r[c][1]=.5*(r[c][1]+b[1]),b=N5([t[c][E-1],r[c][E-1]],[t[c][E-3],r[c][E-3]],[t[c][E-4],r[c][E-4]]),t[c][E-2]=.5*(t[c][E-2]+b[0]),r[c][E-2]=.5*(r[c][E-2]+b[1])}return[t,r]}});var CYe=ye((j2r,kYe)=>{"use strict";kYe.exports=function(e,t,r,n,i){var a=t-2,o=r-2;return n&&i?function(s,l,u){s||(s=[]);var c,f,h,d,v,_,b=Math.max(0,Math.min(Math.floor(l),a)),p=Math.max(0,Math.min(Math.floor(u),o)),k=Math.max(0,Math.min(1,l-b)),E=Math.max(0,Math.min(1,u-p));b*=3,p*=3;var S=k*k,L=S*k,x=1-k,C=x*x,M=C*x,g=E*E,P=g*E,T=1-E,z=T*T,O=z*T;for(_=0;_<e.length;_++)v=e[_],c=M*v[p][b]+3*(C*k*v[p][b+1]+x*S*v[p][b+2])+L*v[p][b+3],f=M*v[p+1][b]+3*(C*k*v[p+1][b+1]+x*S*v[p+1][b+2])+L*v[p+1][b+3],h=M*v[p+2][b]+3*(C*k*v[p+2][b+1]+x*S*v[p+2][b+2])+L*v[p+2][b+3],d=M*v[p+3][b]+3*(C*k*v[p+3][b+1]+x*S*v[p+3][b+2])+L*v[p+3][b+3],s[_]=O*c+3*(z*E*f+T*g*h)+P*d;return s}:n?function(s,l,u){s||(s=[]);var c=Math.max(0,Math.min(Math.floor(l),a)),f=Math.max(0,Math.min(Math.floor(u),o)),h=Math.max(0,Math.min(1,l-c)),d=Math.max(0,Math.min(1,u-f)),v,_,b,p,k,E;c*=3;var S=h*h,L=S*h,x=1-h,C=x*x,M=C*x,g=1-d;for(k=0;k<e.length;k++)E=e[k],v=g*E[f][c]+d*E[f+1][c],_=g*E[f][c+1]+d*E[f+1][c+1],b=g*E[f][c+2]+d*E[f+1][c+1],p=g*E[f][c+3]+d*E[f+1][c+1],s[k]=M*v+3*(C*h*_+x*S*b)+L*p;return s}:i?function(s,l,u){s||(s=[]);var c=Math.max(0,Math.min(Math.floor(l),a)),f=Math.max(0,Math.min(Math.floor(u),o)),h=Math.max(0,Math.min(1,l-c)),d=Math.max(0,Math.min(1,u-f)),v,_,b,p,k,E;f*=3;var S=d*d,L=S*d,x=1-d,C=x*x,M=C*x,g=1-h;for(k=0;k<e.length;k++)E=e[k],v=g*E[f][c]+h*E[f][c+1],_=g*E[f+1][c]+h*E[f+1][c+1],b=g*E[f+2][c]+h*E[f+2][c+1],p=g*E[f+3][c]+h*E[f+3][c+1],s[k]=M*v+3*(C*d*_+x*S*b)+L*p;return s}:function(s,l,u){s||(s=[]);var c=Math.max(0,Math.min(Math.floor(l),a)),f=Math.max(0,Math.min(Math.floor(u),o)),h=Math.max(0,Math.min(1,l-c)),d=Math.max(0,Math.min(1,u-f)),v,_,b,p,k=1-d,E=1-h;for(b=0;b<e.length;b++)p=e[b],v=E*p[f][c]+h*p[f][c+1],_=E*p[f+1][c]+h*p[f+1][c+1],s[b]=k*v+d*_;return s}}});var PYe=ye((W2r,LYe)=>{"use strict";LYe.exports=function(e,t,r){return t&&r?function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h,d;i*=3,a*=3;var v=o*o,_=1-o,b=_*_,p=_*o*2,k=-3*b,E=3*(b-p),S=3*(p-v),L=3*v,x=s*s,C=x*s,M=1-s,g=M*M,P=g*M;for(d=0;d<e.length;d++)h=e[d],l=k*h[a][i]+E*h[a][i+1]+S*h[a][i+2]+L*h[a][i+3],u=k*h[a+1][i]+E*h[a+1][i+1]+S*h[a+1][i+2]+L*h[a+1][i+3],c=k*h[a+2][i]+E*h[a+2][i+1]+S*h[a+2][i+2]+L*h[a+2][i+3],f=k*h[a+3][i]+E*h[a+3][i+1]+S*h[a+3][i+2]+L*h[a+3][i+3],n[d]=P*l+3*(g*s*u+M*x*c)+C*f;return n}:t?function(n,i,a,o,s){n||(n=[]);var l,u,c,f;i*=3;var h=o*o,d=1-o,v=d*d,_=d*o*2,b=-3*v,p=3*(v-_),k=3*(_-h),E=3*h,S=1-s;for(c=0;c<e.length;c++)f=e[c],l=b*f[a][i]+p*f[a][i+1]+k*f[a][i+2]+E*f[a][i+3],u=b*f[a+1][i]+p*f[a+1][i+1]+k*f[a+1][i+2]+E*f[a+1][i+3],n[c]=S*l+s*u;return n}:r?function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h,d;a*=3;var v=s*s,_=v*s,b=1-s,p=b*b,k=p*b;for(h=0;h<e.length;h++)d=e[h],l=d[a][i+1]-d[a][i],u=d[a+1][i+1]-d[a+1][i],c=d[a+2][i+1]-d[a+2][i],f=d[a+3][i+1]-d[a+3][i],n[h]=k*l+3*(p*s*u+b*v*c)+_*f;return n}:function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h=1-s;for(c=0;c<e.length;c++)f=e[c],l=f[a][i+1]-f[a][i],u=f[a+1][i+1]-f[a+1][i],n[c]=h*l+s*u;return n}}});var RYe=ye((X2r,IYe)=>{"use strict";IYe.exports=function(e,t,r){return t&&r?function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h,d;i*=3,a*=3;var v=o*o,_=v*o,b=1-o,p=b*b,k=p*b,E=s*s,S=1-s,L=S*S,x=S*s*2,C=-3*L,M=3*(L-x),g=3*(x-E),P=3*E;for(d=0;d<e.length;d++)h=e[d],l=C*h[a][i]+M*h[a+1][i]+g*h[a+2][i]+P*h[a+3][i],u=C*h[a][i+1]+M*h[a+1][i+1]+g*h[a+2][i+1]+P*h[a+3][i+1],c=C*h[a][i+2]+M*h[a+1][i+2]+g*h[a+2][i+2]+P*h[a+3][i+2],f=C*h[a][i+3]+M*h[a+1][i+3]+g*h[a+2][i+3]+P*h[a+3][i+3],n[d]=k*l+3*(p*o*u+b*v*c)+_*f;return n}:t?function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h,d;i*=3;var v=s*s,_=v*s,b=1-s,p=b*b,k=p*b;for(h=0;h<e.length;h++)d=e[h],l=d[a+1][i]-d[a][i],u=d[a+1][i+1]-d[a][i+1],c=d[a+1][i+2]-d[a][i+2],f=d[a+1][i+3]-d[a][i+3],n[h]=k*l+3*(p*s*u+b*v*c)+_*f;return n}:r?function(n,i,a,o,s){n||(n=[]);var l,u,c,f;a*=3;var h=1-o,d=s*s,v=1-s,_=v*v,b=v*s*2,p=-3*_,k=3*(_-b),E=3*(b-d),S=3*d;for(c=0;c<e.length;c++)f=e[c],l=p*f[a][i]+k*f[a+1][i]+E*f[a+2][i]+S*f[a+3][i],u=p*f[a][i+1]+k*f[a+1][i+1]+E*f[a+2][i+1]+S*f[a+3][i+1],n[c]=h*l+o*u;return n}:function(n,i,a,o,s){n||(n=[]);var l,u,c,f,h=1-o;for(c=0;c<e.length;c++)f=e[c],l=f[a+1][i]-f[a][i],u=f[a+1][i+1]-f[a][i+1],n[c]=h*l+o*u;return n}}});var OYe=ye((Z2r,zYe)=>{"use strict";var DYe=wYe(),FYe=R6().findBin,VKt=EYe(),GKt=CYe(),HKt=PYe(),jKt=RYe();zYe.exports=function(t){var r=t._a,n=t._b,i=r.length,a=n.length,o=t.aaxis,s=t.baxis,l=r[0],u=r[i-1],c=n[0],f=n[a-1],h=r[r.length-1]-r[0],d=n[n.length-1]-n[0],v=h*DYe.RELATIVE_CULL_TOLERANCE,_=d*DYe.RELATIVE_CULL_TOLERANCE;l-=v,u+=v,c-=_,f+=_,t.isVisible=function(b,p){return b>l&&b<u&&p>c&&p<f},t.isOccluded=function(b,p){return b<l||b>u||p<c||p>f},t.setScale=function(){var b=t._x,p=t._y,k=VKt(t._xctrl,t._yctrl,b,p,o.smoothing,s.smoothing);t._xctrl=k[0],t._yctrl=k[1],t.evalxy=GKt([t._xctrl,t._yctrl],i,a,o.smoothing,s.smoothing),t.dxydi=HKt([t._xctrl,t._yctrl],o.smoothing,s.smoothing),t.dxydj=jKt([t._xctrl,t._yctrl],o.smoothing,s.smoothing)},t.i2a=function(b){var p=Math.max(0,Math.floor(b[0]),i-2),k=b[0]-p;return(1-k)*r[p]+k*r[p+1]},t.j2b=function(b){var p=Math.max(0,Math.floor(b[1]),i-2),k=b[1]-p;return(1-k)*n[p]+k*n[p+1]},t.ij2ab=function(b){return[t.i2a(b[0]),t.j2b(b[1])]},t.a2i=function(b){var p=Math.max(0,Math.min(FYe(b,r),i-2)),k=r[p],E=r[p+1];return Math.max(0,Math.min(i-1,p+(b-k)/(E-k)))},t.b2j=function(b){var p=Math.max(0,Math.min(FYe(b,n),a-2)),k=n[p],E=n[p+1];return Math.max(0,Math.min(a-1,p+(b-k)/(E-k)))},t.ab2ij=function(b){return[t.a2i(b[0]),t.b2j(b[1])]},t.i2c=function(b,p){return t.evalxy([],b,p)},t.ab2xy=function(b,p,k){if(!k&&(b<r[0]||b>r[i-1]|p<n[0]||p>n[a-1]))return[!1,!1];var E=t.a2i(b),S=t.b2j(p),L=t.evalxy([],E,S);if(k){var x=0,C=0,M=[],g,P,T,z;b<r[0]?(g=0,P=0,x=(b-r[0])/(r[1]-r[0])):b>r[i-1]?(g=i-2,P=1,x=(b-r[i-1])/(r[i-1]-r[i-2])):(g=Math.max(0,Math.min(i-2,Math.floor(E))),P=E-g),p<n[0]?(T=0,z=0,C=(p-n[0])/(n[1]-n[0])):p>n[a-1]?(T=a-2,z=1,C=(p-n[a-1])/(n[a-1]-n[a-2])):(T=Math.max(0,Math.min(a-2,Math.floor(S))),z=S-T),x&&(t.dxydi(M,g,T,P,z),L[0]+=M[0]*x,L[1]+=M[1]*x),C&&(t.dxydj(M,g,T,P,z),L[0]+=M[0]*C,L[1]+=M[1]*C)}return L},t.c2p=function(b,p,k){return[p.c2p(b[0]),k.c2p(b[1])]},t.p2x=function(b,p,k){return[p.p2c(b[0]),k.p2c(b[1])]},t.dadi=function(b){var p=Math.max(0,Math.min(r.length-2,b));return r[p+1]-r[p]},t.dbdj=function(b){var p=Math.max(0,Math.min(n.length-2,b));return n[p+1]-n[p]},t.dxyda=function(b,p,k,E){var S=t.dxydi(null,b,p,k,E),L=t.dadi(b,k);return[S[0]/L,S[1]/L]},t.dxydb=function(b,p,k,E){var S=t.dxydj(null,b,p,k,E),L=t.dbdj(p,E);return[S[0]/L,S[1]/L]},t.dxyda_rough=function(b,p,k){var E=h*(k||.1),S=t.ab2xy(b+E,p,!0),L=t.ab2xy(b-E,p,!0);return[(S[0]-L[0])*.5/E,(S[1]-L[1])*.5/E]},t.dxydb_rough=function(b,p,k){var E=d*(k||.1),S=t.ab2xy(b,p+E,!0),L=t.ab2xy(b,p-E,!0);return[(S[0]-L[0])*.5/E,(S[1]-L[1])*.5/E]},t.dpdx=function(b){return b._m},t.dpdy=function(b){return b._m}}});var jYe=ye((Y2r,HYe)=>{"use strict";var J7=ho(),qYe=Dr().isArray1D,WKt=oYe(),BYe=cYe(),NYe=hYe(),UYe=gYe(),XKt=yYe(),VYe=a8(),GYe=xYe(),ZKt=i8(),YKt=OYe();HYe.exports=function(t,r){var n=J7.getFromId(t,r.xaxis),i=J7.getFromId(t,r.yaxis),a=r.aaxis,o=r.baxis,s=r.x,l=r.y,u=[];s&&qYe(s)&&u.push("x"),l&&qYe(l)&&u.push("y"),u.length&&ZKt(r,a,o,"a","b",u);var c=r._a=r._a||r.a,f=r._b=r._b||r.b;s=r._x||r.x,l=r._y||r.y;var h={};if(r._cheater){var d=a.cheatertype==="index"?c.length:c,v=o.cheatertype==="index"?f.length:f;s=WKt(d,v,r.cheaterslope)}r._x=s=VYe(s),r._y=l=VYe(l),GYe(s,c,f),GYe(l,c,f),YKt(r),r.setScale();var _=BYe(s),b=BYe(l),p=.5*(_[1]-_[0]),k=.5*(_[1]+_[0]),E=.5*(b[1]-b[0]),S=.5*(b[1]+b[0]),L=1.3;return _=[k-p*L,k+p*L],b=[S-E*L,S+E*L],r._extremes[n._id]=J7.findExtremes(n,_,{padded:!0}),r._extremes[i._id]=J7.findExtremes(i,b,{padded:!0}),NYe(r,"a","b"),NYe(r,"b","a"),UYe(r,a),UYe(r,o),h.clipsegments=XKt(r._xctrl,r._yctrl,a,o),h.x=s,h.y=l,h.a=c,h.b=f,[h]}});var XYe=ye((K2r,WYe)=>{"use strict";WYe.exports={attributes:H7(),supplyDefaults:WZe(),plot:nYe(),calc:jYe(),animatable:!0,isContainer:!0,moduleType:"trace",name:"carpet",basePlotModule:ph(),categories:["cartesian","svg","carpet","carpetAxis","notLegendIsolatable","noMultiCategory","noHover","noSortingByValue"],meta:{}}});var YYe=ye((J2r,ZYe)=>{"use strict";ZYe.exports=XYe()});var C$=ye(($2r,$Ye)=>{"use strict";var KKt=Cg(),u0=pf(),JKt=Gl(),{hovertemplateAttrs:$Kt,texttemplateAttrs:QKt,templatefallbackAttrs:KYe}=Ll(),JYe=Tu(),Cx=Ao().extendFlat,sg=u0.marker,U5=u0.line,eJt=sg.line;$Ye.exports={carpet:{valType:"string",editType:"calc"},a:{valType:"data_array",editType:"calc"},b:{valType:"data_array",editType:"calc"},mode:Cx({},u0.mode,{dflt:"markers"}),text:Cx({},u0.text,{}),texttemplate:QKt({editType:"plot"},{keys:["a","b","text"]}),texttemplatefallback:KYe({editType:"plot"}),hovertext:Cx({},u0.hovertext,{}),line:{color:U5.color,width:U5.width,dash:U5.dash,backoff:U5.backoff,shape:Cx({},U5.shape,{values:["linear","spline"]}),smoothing:U5.smoothing,editType:"calc"},connectgaps:u0.connectgaps,fill:Cx({},u0.fill,{values:["none","toself","tonext"],dflt:"none"}),fillcolor:KKt(),marker:Cx({symbol:sg.symbol,opacity:sg.opacity,maxdisplayed:sg.maxdisplayed,angle:sg.angle,angleref:sg.angleref,standoff:sg.standoff,size:sg.size,sizeref:sg.sizeref,sizemin:sg.sizemin,sizemode:sg.sizemode,line:Cx({width:eJt.width,editType:"calc"},JYe("marker.line")),gradient:sg.gradient,editType:"calc"},JYe("marker")),textfont:u0.textfont,textposition:u0.textposition,selected:u0.selected,unselected:u0.unselected,hoverinfo:Cx({},JKt.hoverinfo,{flags:["a","b","text","name"]}),hoveron:u0.hoveron,hovertemplate:$Kt(),hovertemplatefallback:KYe(),zorder:u0.zorder}});var rKe=ye((Q2r,tKe)=>{"use strict";var QYe=Dr(),tJt=Sm(),V5=Ru(),rJt=$p(),iJt=R0(),eKe=tT(),nJt=D0(),aJt=Rg(),oJt=C$();tKe.exports=function(t,r,n,i){function a(h,d){return QYe.coerce(t,r,oJt,h,d)}a("carpet"),r.xaxis="x",r.yaxis="y";var o=a("a"),s=a("b"),l=Math.min(o.length,s.length);if(!l){r.visible=!1;return}r._length=l,a("text"),a("texttemplate"),a("texttemplatefallback"),a("hovertext");var u=l<tJt.PTS_LINESONLY?"lines+markers":"lines";a("mode",u),V5.hasMarkers(r)&&rJt(t,r,n,i,a,{gradient:!0}),V5.hasLines(r)&&(iJt(t,r,n,i,a,{backoff:!0}),eKe(t,r,a),a("connectgaps")),V5.hasText(r)&&nJt(t,r,i,a);var c=[];(V5.hasMarkers(r)||V5.hasText(r))&&(a("marker.maxdisplayed"),c.push("points")),a("fill"),r.fill!=="none"&&(aJt(t,r,n,a),V5.hasLines(r)||eKe(t,r,a)),(r.fill==="tonext"||r.fill==="toself")&&c.push("fills");var f=a("hoveron",c.join("+")||"points");f!=="fills"&&(a("hovertemplate"),a("hovertemplatefallback")),a("zorder"),QYe.coerceSelectionMarkerOpacity(r,a)}});var nKe=ye((ewr,iKe)=>{"use strict";iKe.exports=function(t,r){var n={},i=r._carpet,a=i.ab2ij([t.a,t.b]),o=Math.floor(a[0]),s=a[0]-o,l=Math.floor(a[1]),u=a[1]-l,c=i.evalxy([],o,l,s,u);return n.yLabel=c[1].toFixed(3),n}});var $7=ye((twr,aKe)=>{"use strict";aKe.exports=function(e,t){for(var r=e._fullData.length,n,i=0;i<r;i++){var a=e._fullData[i];if(a.index!==t.index&&a.type==="carpet"&&(n||(n=a),a.carpet===t.carpet))return a}return n}});var lKe=ye((rwr,sKe)=>{"use strict";var oKe=Eo(),sJt=F0(),lJt=km(),uJt=z0(),cJt=O0().calcMarkerSize,fJt=$7();sKe.exports=function(t,r){var n=r._carpetTrace=fJt(t,r);if(!(!n||!n.visible||n.visible==="legendonly")){var i;r.xaxis=n.xaxis,r.yaxis=n.yaxis;var a=r._length,o=new Array(a),s,l,u=!1;for(i=0;i<a;i++)if(s=r.a[i],l=r.b[i],oKe(s)&&oKe(l)){var c=n.ab2xy(+s,+l,!0),f=n.isVisible(+s,+l);f||(u=!0),o[i]={x:c[0],y:c[1],a:s,b:l,vis:f}}else o[i]={x:!1,y:!1};return r._needsCull=u,o[0].carpet=n,o[0].trace=r,cJt(r,a),sJt(t,r),lJt(o,r),uJt(o,r),o}}});var fKe=ye((iwr,cKe)=>{"use strict";var hJt=sT(),uKe=ho(),dJt=So();cKe.exports=function(t,r,n,i){var a,o,s,l=n[0][0].carpet,u=uKe.getFromId(t,l.xaxis||"x"),c=uKe.getFromId(t,l.yaxis||"y"),f={xaxis:u,yaxis:c,plot:r.plot};for(a=0;a<n.length;a++)o=n[a][0].trace,o._xA=u,o._yA=c;for(hJt(t,f,n,i),a=0;a<n.length;a++)o=n[a][0].trace,s=i.selectAll("g.trace"+o.uid+" .js-line"),dJt.setClipUrl(s,n[a][0].carpet._clipPathId,t)}});var dKe=ye((nwr,hKe)=>{"use strict";var vJt=fT(),pJt=Dr().fillText;hKe.exports=function(t,r,n,i){var a=vJt(t,r,n,i);if(!a||a[0].index===!1)return;var o=a[0];if(o.index===void 0){var s=1-o.y0/t.ya._length,l=t.xa._length,u=l*s/2,c=l-u;return o.x0=Math.max(Math.min(o.x0,c),u),o.x1=Math.max(Math.min(o.x1,c),u),a}var f=o.cd[o.index];o.a=f.a,o.b=f.b,o.xLabelVal=void 0,o.yLabelVal=void 0;var h=o.trace,d=h._carpet,v=h._module.formatLabels(f,h);o.yLabel=v.yLabel,delete o.text;var _=[];function b(E,S){var L;E.labelprefix&&E.labelprefix.length>0?L=E.labelprefix.replace(/ = $/,""):L=E._hovertitle,_.push(L+": "+S.toFixed(3)+E.labelsuffix)}if(!h.hovertemplate){var p=f.hi||h.hoverinfo,k=p.split("+");k.indexOf("all")!==-1&&(k=["a","b","text"]),k.indexOf("a")!==-1&&b(d.aaxis,f.a),k.indexOf("b")!==-1&&b(d.baxis,f.b),_.push("y: "+o.yLabel),k.indexOf("text")!==-1&&pJt(f,h,_),o.extraText=_.join("<br>")}return a}});var pKe=ye((awr,vKe)=>{"use strict";vKe.exports=function(t,r,n,i,a){var o=i[a];return t.a=o.a,t.b=o.b,t.y=o.y,t}});var mKe=ye((owr,gKe)=>{"use strict";gKe.exports={attributes:C$(),supplyDefaults:rKe(),colorbar:$d(),formatLabels:nKe(),calc:lKe(),plot:fKe(),style:op().style,styleOnSelect:op().styleOnSelect,hoverPoints:dKe(),selectPoints:hT(),eventData:pKe(),moduleType:"trace",name:"scattercarpet",basePlotModule:ph(),categories:["svg","carpet","symbols","showLegend","carpetDependent","zoomScale"],meta:{}}});var _Ke=ye((swr,yKe)=>{"use strict";yKe.exports=mKe()});var L$=ye((lwr,xKe)=>{"use strict";var lg=PT(),g1=C4(),gJt=Tu(),mJt=Ao().extendFlat,ry=g1.contours;xKe.exports=mJt({carpet:{valType:"string",editType:"calc"},z:lg.z,a:lg.x,a0:lg.x0,da:lg.dx,b:lg.y,b0:lg.y0,db:lg.dy,text:lg.text,hovertext:lg.hovertext,transpose:lg.transpose,atype:lg.xtype,btype:lg.ytype,fillcolor:g1.fillcolor,autocontour:g1.autocontour,ncontours:g1.ncontours,contours:{type:ry.type,start:ry.start,end:ry.end,size:ry.size,coloring:{valType:"enumerated",values:["fill","lines","none"],dflt:"fill",editType:"calc"},showlines:ry.showlines,showlabels:ry.showlabels,labelfont:ry.labelfont,labelformat:ry.labelformat,operation:ry.operation,value:ry.value,editType:"calc",impliedEdits:{autocontour:!1}},line:{color:g1.line.color,width:g1.line.width,dash:g1.line.dash,smoothing:g1.line.smoothing,editType:"plot"},zorder:g1.zorder},gJt("",{cLetter:"z",autoColorDflt:!1}))});var P$=ye((uwr,TKe)=>{"use strict";var bKe=Dr(),yJt=e8(),wKe=L$(),_Jt=MG(),xJt=A8(),bJt=S8();TKe.exports=function(t,r,n,i){function a(u,c){return bKe.coerce(t,r,wKe,u,c)}function o(u){return bKe.coerce2(t,r,wKe,u)}if(a("carpet"),t.a&&t.b){var s=yJt(t,r,a,i,"a","b");if(!s){r.visible=!1;return}a("text");var l=a("contours.type")==="constraint";l?_Jt(t,r,a,i,n,{hasHover:!1}):(xJt(t,r,a,o),bJt(t,r,a,i,{hasHover:!1}))}else r._defaultColor=n,r._length=null;a("zorder")}});var EKe=ye((cwr,MKe)=>{"use strict";var wJt=gv(),AKe=Dr(),TJt=i8(),AJt=a8(),SJt=o8(),MJt=s8(),SKe=YV(),EJt=P$(),kJt=$7(),CJt=vG();MKe.exports=function(t,r){var n=r._carpetTrace=kJt(t,r);if(!(!n||!n.visible||n.visible==="legendonly")){if(!r.a||!r.b){var i=t.data[n.index],a=t.data[r.index];a.a||(a.a=i.a),a.b||(a.b=i.b),EJt(a,r,r._defaultColor,t._fullLayout)}var o=LJt(t,r);return CJt(r,r._z),o}};function LJt(e,t){var r=t._carpetTrace,n=r.aaxis,i=r.baxis,a,o,s,l,u,c,f;n._minDtick=0,i._minDtick=0,AKe.isArray1D(t.z)&&TJt(t,n,i,"a","b",["z"]),a=t._a=t._a||t.a,l=t._b=t._b||t.b,a=a?n.makeCalcdata(t,"_a"):[],l=l?i.makeCalcdata(t,"_b"):[],o=t.a0||0,s=t.da||1,u=t.b0||0,c=t.db||1,f=t._z=AJt(t._z||t.z,t.transpose),t._emptypoints=MJt(f),SJt(f,t._emptypoints);var h=AKe.maxRowLength(f),d=t.xtype==="scaled"?"":a,v=SKe(t,d,o,s,h,n),_=t.ytype==="scaled"?"":l,b=SKe(t,_,u,c,f.length,i),p={a:v,b,z:f};return t.contours.type==="levels"&&t.contours.coloring!=="none"&&wJt(e,t,{vals:f,containerStr:"",cLetter:"z"}),[p]}});var CKe=ye((fwr,kKe)=>{"use strict";var PJt=Dr().isArrayOrTypedArray;kKe.exports=function(e,t,r,n){var i,a,o,s,l,u,c,f,h,d,v,_,b,p=PJt(r)?"a":"b",k=p==="a"?e.aaxis:e.baxis,E=k.smoothing,S=p==="a"?e.a2i:e.b2j,L=p==="a"?r:n,x=p==="a"?n:r,C=p==="a"?t.a.length:t.b.length,M=p==="a"?t.b.length:t.a.length,g=Math.floor(p==="a"?e.b2j(x):e.a2i(x)),P=p==="a"?function(_e){return e.evalxy([],_e,g)}:function(_e){return e.evalxy([],g,_e)};E&&(o=Math.max(0,Math.min(M-2,g)),s=g-o,a=p==="a"?function(_e,Ee){return e.dxydi([],_e,o,Ee,s)}:function(_e,Ee){return e.dxydj([],o,_e,s,Ee)});var T=S(L[0]),z=S(L[1]),O=T<z?1:-1,V=(z-T)*1e-8,G=O>0?Math.floor:Math.ceil,Z=O>0?Math.ceil:Math.floor,j=O>0?Math.min:Math.max,N=O>0?Math.max:Math.min,H=G(T+V),te=Z(z-V);c=P(T);var oe=[[c]];for(i=H;i*O<te*O;i+=O)l=[],v=N(T,i),_=j(z,i+O),b=_-v,u=Math.max(0,Math.min(C-2,Math.floor(.5*(v+_)))),f=P(_),E&&(h=a(u,v-u),d=a(u,_-u),l.push([c[0]+h[0]/3*b,c[1]+h[1]/3*b]),l.push([f[0]-d[0]/3*b,f[1]-d[1]/3*b])),l.push(f),oe.push(l),c=f;return oe}});var zKe=ye((hwr,FKe)=>{"use strict";var e9=Oa(),t9=S$(),DKe=M$(),sC=So(),m1=Dr(),IJt=gG(),RJt=mG(),vw=k8(),Q7=P4(),DJt=bG(),FJt=xG(),zJt=wG(),OJt=$7(),LKe=CKe();FKe.exports=function(t,r,n,i){var a=r.xaxis,o=r.yaxis;m1.makeTraceGroups(i,n,"contour").each(function(s){var l=e9.select(this),u=s[0],c=u.trace,f=c._carpetTrace=OJt(t,c),h=t.calcdata[f.index][0];if(!f.visible||f.visible==="legendonly")return;var d=u.a,v=u.b,_=c.contours,b=FJt(_,r,u),p=_.type==="constraint",k=_._operation,E=p?k==="="?"lines":"fill":_.coloring;function S(G){var Z=f.ab2xy(G[0],G[1],!0);return[a.c2p(Z[0]),o.c2p(Z[1])]}var L=[[d[0],v[v.length-1]],[d[d.length-1],v[v.length-1]],[d[d.length-1],v[0]],[d[0],v[0]]];IJt(b);var x=(d[d.length-1]-d[0])*1e-8,C=(v[v.length-1]-v[0])*1e-8;RJt(b,x,C);var M=b;_.type==="constraint"&&(M=DJt(b,k)),qJt(b,S);var g,P,T,z,O=[];for(z=h.clipsegments.length-1;z>=0;z--)g=h.clipsegments[z],P=t9([],g.x,a.c2p),T=t9([],g.y,o.c2p),P.reverse(),T.reverse(),O.push(DKe(P,T,g.bicubic));var V="M"+O.join("L")+"Z";UJt(l,h.clipsegments,a,o,p,E),VJt(c,l,a,o,M,L,S,f,h,E,V),BJt(l,b,t,u,_,r,f),sC.setClipUrl(l,f._clipPathId,t)})};function qJt(e,t){var r,n,i,a,o,s,l,u,c;for(r=0;r<e.length;r++){for(a=e[r],o=a.pedgepaths=[],s=a.ppaths=[],n=0;n<a.edgepaths.length;n++){for(c=a.edgepaths[n],l=[],i=0;i<c.length;i++)l[i]=t(c[i]);o.push(l)}for(n=0;n<a.paths.length;n++){for(c=a.paths[n],u=[],i=0;i<c.length;i++)u[i]=t(c[i]);s.push(u)}}}function BJt(e,t,r,n,i,a,o){var s=r._context.staticPlot,l=m1.ensureSingle(e,"g","contourlines"),u=i.showlines!==!1,c=i.showlabels,f=u&&c,h=vw.createLines(l,u||c,t,s),d=vw.createLineClip(l,f,r,n.trace.uid),v=e.selectAll("g.contourlabels").data(c?[0]:[]);if(v.exit().remove(),v.enter().append("g").classed("contourlabels",!0),c){var _=a.xaxis,b=a.yaxis,p=_._length,k=b._length,E=[[[0,0],[p,0],[p,k],[0,k]]],S=[];m1.clearLocationCache();var L=vw.labelFormatter(r,n),x=sC.tester.append("text").attr("data-notex",1).call(sC.font,i.labelfont),C={left:0,right:p,center:p/2,top:0,bottom:k,middle:k/2},M=Math.sqrt(p*p+k*k),g=Q7.LABELDISTANCE*M/Math.max(1,t.length/Q7.LABELINCREASE);h.each(function(P){var T=vw.calcTextOpts(P.level,L,x,r);e9.select(this).selectAll("path").each(function(z){var O=this,V=m1.getVisibleSegment(O,C,T.height/2);if(V&&(NJt(O,z,P,V,o,T.height),!(V.len<(T.width+T.height)*Q7.LABELMIN)))for(var G=Math.min(Math.ceil(V.len/g),Q7.LABELMAX),Z=0;Z<G;Z++){var j=vw.findBestTextLocation(O,V,T,S,C);if(!j)break;vw.addLabelData(j,T,S,E)}})}),x.remove(),vw.drawLabels(v,S,r,d,f?E:null)}c&&!u&&h.remove()}function NJt(e,t,r,n,i,a){for(var o,s=0;s<r.pedgepaths.length;s++)t===r.pedgepaths[s]&&(o=r.edgepaths[s]);if(!o)return;var l=i.a[0],u=i.a[i.a.length-1],c=i.b[0],f=i.b[i.b.length-1];function h(p,k){var E=0,S,L=.1;return(Math.abs(p[0]-l)<L||Math.abs(p[0]-u)<L)&&(S=IKe(i.dxydb_rough(p[0],p[1],L)),E=Math.max(E,a*RKe(k,S)/2)),(Math.abs(p[1]-c)<L||Math.abs(p[1]-f)<L)&&(S=IKe(i.dxyda_rough(p[0],p[1],L)),E=Math.max(E,a*RKe(k,S)/2)),E}var d=PKe(e,0,1),v=PKe(e,n.total,n.total-1),_=h(o[0],d),b=n.total-h(o[o.length-1],v);n.min<_&&(n.min=_),n.max>b&&(n.max=b),n.len=n.max-n.min}function PKe(e,t,r){var n=e.getPointAtLength(t),i=e.getPointAtLength(r),a=i.x-n.x,o=i.y-n.y,s=Math.sqrt(a*a+o*o);return[a/s,o/s]}function IKe(e){var t=Math.sqrt(e[0]*e[0]+e[1]*e[1]);return[e[0]/t,e[1]/t]}function RKe(e,t){var r=Math.abs(e[0]*t[0]+e[1]*t[1]),n=Math.sqrt(1-r*r);return n/r}function UJt(e,t,r,n,i,a){var o,s,l,u,c=m1.ensureSingle(e,"g","contourbg"),f=c.selectAll("path").data(a==="fill"&&!i?[0]:[]);f.enter().append("path"),f.exit().remove();var h=[];for(u=0;u<t.length;u++)o=t[u],s=t9([],o.x,r.c2p),l=t9([],o.y,n.c2p),h.push(DKe(s,l,o.bicubic));f.attr("d","M"+h.join("L")+"Z").style("stroke","none")}function VJt(e,t,r,n,i,a,o,s,l,u,c){var f=u==="fill";f&&zJt(i,e.contours);var h=m1.ensureSingle(t,"g","contourfill"),d=h.selectAll("path").data(f?i:[]);d.enter().append("path"),d.exit().remove(),d.each(function(v){var _=(v.prefixBoundary?c:"")+GJt(e,v,a,o,s,l,r,n);_?e9.select(this).attr("d",_).style("stroke","none"):e9.select(this).remove()})}function GJt(e,t,r,n,i,a,o,s){var l,u="",c=t.edgepaths.map(function(T,z){return z}),f=!0,h,d,v,_,b,p,k=Math.abs(r[0][0]-r[2][0])*1e-4,E=Math.abs(r[0][1]-r[2][1])*1e-4;function S(T){return Math.abs(T[1]-r[0][1])<E}function L(T){return Math.abs(T[1]-r[2][1])<E}function x(T){return Math.abs(T[0]-r[0][0])<k}function C(T){return Math.abs(T[0]-r[2][0])<k}function M(T,z){var O,V,G,Z,j="";for(S(T)&&!C(T)||L(T)&&!x(T)?(Z=i.aaxis,G=LKe(i,a,[T[0],z[0]],.5*(T[1]+z[1]))):(Z=i.baxis,G=LKe(i,a,.5*(T[0]+z[0]),[T[1],z[1]])),O=1;O<G.length;O++)for(j+=Z.smoothing?"C":"L",V=0;V<G[O].length;V++){var N=G[O][V];j+=[o.c2p(N[0]),s.c2p(N[1])]+" "}return j}for(l=0,h=null;c.length;){var g=t.edgepaths[l][0];for(h&&(u+=M(h,g)),p=sC.smoothopen(t.edgepaths[l].map(n),t.smoothing),u+=f?p:p.replace(/^M/,"L"),c.splice(c.indexOf(l),1),h=t.edgepaths[l][t.edgepaths[l].length-1],_=-1,v=0;v<4;v++){if(!h){m1.log("Missing end?",l,t);break}for(S(h)&&!C(h)?d=r[1]:x(h)?d=r[0]:L(h)?d=r[3]:C(h)&&(d=r[2]),b=0;b<t.edgepaths.length;b++){var P=t.edgepaths[b][0];Math.abs(h[0]-d[0])<k?Math.abs(h[0]-P[0])<k&&(P[1]-h[1])*(d[1]-P[1])>=0&&(d=P,_=b):Math.abs(h[1]-d[1])<E?Math.abs(h[1]-P[1])<E&&(P[0]-h[0])*(d[0]-P[0])>=0&&(d=P,_=b):m1.log("endpt to newendpt is not vert. or horz.",h,d,P)}if(_>=0)break;u+=M(h,d),h=d}if(_===t.edgepaths.length){m1.log("unclosed perimeter path");break}l=_,f=c.indexOf(l)===-1,f&&(l=c[0],u+=M(h,d)+"Z",h=null)}for(l=0;l<t.paths.length;l++)u+=sC.smoothclosed(t.paths[l].map(n),t.smoothing);return u}});var qKe=ye((dwr,OKe)=>{"use strict";OKe.exports={attributes:L$(),supplyDefaults:P$(),colorbar:P8(),calc:EKe(),plot:zKe(),style:L8(),moduleType:"trace",name:"contourcarpet",basePlotModule:ph(),categories:["cartesian","svg","carpet","contour","symbols","showLegend","hasLines","carpetDependent","noHover","noSortingByValue"],meta:{}}});var NKe=ye((vwr,BKe)=>{"use strict";BKe.exports=qKe()});var i9=ye((pwr,jKe)=>{"use strict";var r9=Dr().extendFlat,lC=pf(),UKe=df().axisHoverFormat,{hovertemplateAttrs:HJt,templatefallbackAttrs:jJt}=Ll(),GKe=Pd().dash,WJt=a3(),HKe=XT(),XJt=HKe.INCREASING.COLOR,ZJt=HKe.DECREASING.COLOR,I$=lC.line;function VKe(e){return{line:{color:r9({},I$.color,{dflt:e}),width:I$.width,dash:GKe,editType:"style"},editType:"style"}}jKe.exports={xperiod:lC.xperiod,xperiod0:lC.xperiod0,xperiodalignment:lC.xperiodalignment,xhoverformat:UKe("x"),yhoverformat:UKe("y"),x:{valType:"data_array",editType:"calc+clearAxisTypes"},open:{valType:"data_array",editType:"calc"},high:{valType:"data_array",editType:"calc"},low:{valType:"data_array",editType:"calc"},close:{valType:"data_array",editType:"calc"},line:{width:r9({},I$.width,{}),dash:r9({},GKe,{}),editType:"style"},increasing:VKe(XJt),decreasing:VKe(ZJt),text:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertext:{valType:"string",dflt:"",arrayOk:!0,editType:"calc"},hovertemplate:HJt({},{keys:["open","high","low","close"]}),hovertemplatefallback:jJt(),tickwidth:{valType:"number",min:0,max:.5,dflt:.3,editType:"calc"},hoverlabel:r9({},WJt.hoverlabel,{split:{valType:"boolean",dflt:!1,editType:"style"}}),zorder:lC.zorder}});var R$=ye((gwr,WKe)=>{"use strict";var YJt=qa(),KJt=Dr();WKe.exports=function(t,r,n,i){var a=n("x"),o=n("open"),s=n("high"),l=n("low"),u=n("close");n("hoverlabel.split");var c=YJt.getComponentMethod("calendars","handleTraceDefaults");if(c(t,r,["x"],i),!!(o&&s&&l&&u)){var f=Math.min(o.length,s.length,l.length,u.length);return a&&(f=Math.min(f,KJt.minRowLength(a))),r._length=f,f}}});var YKe=ye((mwr,ZKe)=>{"use strict";var JJt=Dr(),$Jt=R$(),QJt=Ig(),e$t=i9();ZKe.exports=function(t,r,n,i){function a(s,l){return JJt.coerce(t,r,e$t,s,l)}var o=$Jt(t,r,a,i);if(!o){r.visible=!1;return}QJt(t,r,i,a,{x:!0}),a("xhoverformat"),a("yhoverformat"),a("line.width"),a("line.dash"),XKe(t,r,a,"increasing"),XKe(t,r,a,"decreasing"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("tickwidth"),i._requestRangeslider[r.xaxis]=!0,a("zorder")};function XKe(e,t,r,n){r(n+".line.color"),r(n+".line.width",t.line.width),r(n+".line.dash",t.line.dash)}});var D$=ye((ywr,JKe)=>{"use strict";var G5=Dr(),n9=G5._,a9=ho(),t$t=Dg(),uC=fs().BADNUM;function r$t(e,t){var r=a9.getFromId(e,t.xaxis),n=a9.getFromId(e,t.yaxis),i=n$t(e,r,t),a=t._minDiff;t._minDiff=null;var o=t._origX;t._origX=null;var s=t._xcalc;t._xcalc=null;var l=KKe(e,t,o,s,n,i$t);return t._extremes[r._id]=a9.findExtremes(r,s,{vpad:a/2}),l.length?(G5.extendFlat(l[0].t,{wHover:a/2,tickLen:i}),l):[{t:{empty:!0}}]}function i$t(e,t,r,n){return{o:e,h:t,l:r,c:n}}function KKe(e,t,r,n,i,a){for(var o=i.makeCalcdata(t,"open"),s=i.makeCalcdata(t,"high"),l=i.makeCalcdata(t,"low"),u=i.makeCalcdata(t,"close"),c=G5.isArrayOrTypedArray(t.text),f=G5.isArrayOrTypedArray(t.hovertext),h=!0,d=null,v=!!t.xperiodalignment,_=[],b=0;b<n.length;b++){var p=n[b],k=o[b],E=s[b],S=l[b],L=u[b];if(p!==uC&&k!==uC&&E!==uC&&S!==uC&&L!==uC){L===k?d!==null&&L!==d&&(h=L>d):h=L>k,d=L;var x=a(k,E,S,L);x.pos=p,x.yc=(k+L)/2,x.i=b,x.dir=h?"increasing":"decreasing",x.x=x.pos,x.y=[S,E],v&&(x.orig_p=r[b]),c&&(x.tx=t.text[b]),f&&(x.htx=t.hovertext[b]),_.push(x)}else _.push({pos:p,empty:!0})}return t._extremes[i._id]=a9.findExtremes(i,G5.concat(l,s),{padded:!0}),_.length&&(_[0].t={labels:{open:n9(e,"open:")+" ",high:n9(e,"high:")+" ",low:n9(e,"low:")+" ",close:n9(e,"close:")+" "}}),_}function n$t(e,t,r){var n=r._minDiff;if(!n){var i=e._fullData,a=[];n=1/0;var o;for(o=0;o<i.length;o++){var s=i[o];if(s.type==="ohlc"&&s.visible===!0&&s.xaxis===t._id){a.push(s);var l=t.makeCalcdata(s,"x");s._origX=l;var u=t$t(r,t,"x",l).vals;s._xcalc=u;var c=G5.distinctVals(u).minDiff;c&&isFinite(c)&&(n=Math.min(n,c))}}for(n===1/0&&(n=1),o=0;o<a.length;o++)a[o]._minDiff=n}return n*r.tickwidth}JKe.exports={calc:r$t,calcCommon:KKe}});var eJe=ye((_wr,QKe)=>{"use strict";var a$t=Oa(),$Ke=Dr();QKe.exports=function(t,r,n,i){var a=r.yaxis,o=r.xaxis,s=!!o.rangebreaks;$Ke.makeTraceGroups(i,n,"trace ohlc").each(function(l){var u=a$t.select(this),c=l[0],f=c.t,h=c.trace;if(h.visible!==!0||f.empty){u.remove();return}var d=f.tickLen,v=u.selectAll("path").data($Ke.identity);v.enter().append("path"),v.exit().remove(),v.attr("d",function(_){if(_.empty)return"M0,0Z";var b=o.c2p(_.pos-d,!0),p=o.c2p(_.pos+d,!0),k=s?(b+p)/2:o.c2p(_.pos,!0),E=a.c2p(_.o,!0),S=a.c2p(_.h,!0),L=a.c2p(_.l,!0),x=a.c2p(_.c,!0);return"M"+b+","+E+"H"+k+"M"+k+","+S+"V"+L+"M"+p+","+x+"H"+k})})}});var rJe=ye((xwr,tJe)=>{"use strict";var F$=Oa(),o$t=So(),s$t=ka();tJe.exports=function(t,r,n){var i=n||F$.select(t).selectAll("g.ohlclayer").selectAll("g.trace");i.style("opacity",function(a){return a[0].trace.opacity}),i.each(function(a){var o=a[0].trace;F$.select(this).selectAll("path").each(function(s){if(!s.empty){var l=o[s.dir].line;F$.select(this).style("fill","none").call(s$t.stroke,l.color).call(o$t.dashLine,l.dash,l.width).style("opacity",o.selectedpoints&&!s.selected?.3:1)}})})}});var O$=ye((bwr,sJe)=>{"use strict";var z$=ho(),l$t=Dr(),o9=vf(),u$t=ka(),c$t=Dr().fillText,iJe=XT(),f$t={increasing:iJe.INCREASING.SYMBOL,decreasing:iJe.DECREASING.SYMBOL};function h$t(e,t,r,n){var i=e.cd,a=i[0].trace;return a.hoverlabel.split?aJe(e,t,r,n):oJe(e,t,r,n)}function nJe(e,t,r,n){var i=e.cd,a=e.xa,o=i[0].trace,s=i[0].t,l=o.type,u=l==="ohlc"?"l":"min",c=l==="ohlc"?"h":"max",f,h,d=s.bPos||0,v=function(P){return P.pos+d-t},_=s.bdPos||s.tickLen,b=s.wHover,p=Math.min(1,_/Math.abs(a.r2c(a.range[1])-a.r2c(a.range[0])));f=e.maxHoverDistance-p,h=e.maxSpikeDistance-p;function k(P){var T=v(P);return o9.inbox(T-b,T+b,f)}function E(P){var T=P[u],z=P[c];return T===z||o9.inbox(T-r,z-r,f)}function S(P){return(k(P)+E(P))/2}var L=o9.getDistanceFunction(n,k,E,S);if(o9.getClosest(i,L,e),e.index===!1)return null;var x=i[e.index];if(x.empty)return null;var C=x.dir,M=o[C],g=M.line.color;return u$t.opacity(g)&&M.line.width?e.color=g:e.color=M.fillcolor,e.x0=a.c2p(x.pos+d-_,!0),e.x1=a.c2p(x.pos+d+_,!0),e.xLabelVal=x.orig_p!==void 0?x.orig_p:x.pos,e.spikeDistance=S(x)*h/f,e.xSpike=a.c2p(x.pos,!0),e}function aJe(e,t,r,n){var i=e.cd,a=e.ya,o=i[0].trace,s=i[0].t,l=[],u=nJe(e,t,r,n);if(!u)return[];var c=i[u.index],f=c.hi||o.hoverinfo||"";if(f==="none"||f==="skip")return[];for(var h=["high","open","close","low"],d={},v=0;v<h.length;v++){var _=h[v],b=o[_][u.index],p=a.c2p(b,!0),k;b in d?(k=d[b],k.yLabel+="<br>"+s.labels[_]+z$.hoverLabelText(a,b,o.yhoverformat)):(k=l$t.extendFlat({},u),k.y0=k.y1=p,k.yLabelVal=b,k.yLabel=s.labels[_]+z$.hoverLabelText(a,b,o.yhoverformat),k.name="",l.push(k),d[b]=k)}return l}function oJe(e,t,r,n){var i=e.cd,a=e.ya,o=i[0].trace,s=i[0].t,l=nJe(e,t,r,n);if(!l)return[];var u=l.index,c=i[u],f=l.index=c.i,h=c.dir;function d(S){return s.labels[S]+z$.hoverLabelText(a,o[S][f],o.yhoverformat)}var v=c.hi||o.hoverinfo||"",_=v.split("+"),b=v==="all",p=b||_.indexOf("y")!==-1,k=b||_.indexOf("text")!==-1,E=p?[d("open"),d("high"),d("low"),d("close")+"  "+f$t[h]]:[];return k&&c$t(c,o,E),l.extraText=E.join("<br>"),l.y0=l.y1=a.c2p(c.yc,!0),[l]}sJe.exports={hoverPoints:h$t,hoverSplit:aJe,hoverOnPoints:oJe}});var q$=ye((wwr,lJe)=>{"use strict";lJe.exports=function(t,r){var n=t.cd,i=t.xaxis,a=t.yaxis,o=[],s,l=n[0].t.bPos||0;if(r===!1)for(s=0;s<n.length;s++)n[s].selected=0;else for(s=0;s<n.length;s++){var u=n[s];r.contains([i.c2p(u.pos+l),a.c2p(u.yc)],null,u.i,t)?(o.push({pointNumber:u.i,x:i.c2d(u.pos),y:a.c2d(u.yc)}),u.selected=1):u.selected=0}return o}});var cJe=ye((Twr,uJe)=>{"use strict";uJe.exports={moduleType:"trace",name:"ohlc",basePlotModule:ph(),categories:["cartesian","svg","showLegend"],meta:{},attributes:i9(),supplyDefaults:YKe(),calc:D$().calc,plot:eJe(),style:rJe(),hoverPoints:O$().hoverPoints,selectPoints:q$()}});var hJe=ye((Awr,fJe)=>{"use strict";fJe.exports=cJe()});var N$=ye((Swr,pJe)=>{"use strict";var B$=Dr().extendFlat,dJe=df().axisHoverFormat,dp=i9(),H5=x4();function vJe(e){return{line:{color:B$({},H5.line.color,{dflt:e}),width:H5.line.width,editType:"style"},fillcolor:H5.fillcolor,editType:"style"}}pJe.exports={xperiod:dp.xperiod,xperiod0:dp.xperiod0,xperiodalignment:dp.xperiodalignment,xhoverformat:dJe("x"),yhoverformat:dJe("y"),x:dp.x,open:dp.open,high:dp.high,low:dp.low,close:dp.close,line:{width:B$({},H5.line.width,{}),editType:"style"},increasing:vJe(dp.increasing.line.color.dflt),decreasing:vJe(dp.decreasing.line.color.dflt),text:dp.text,hovertext:dp.hovertext,hovertemplate:dp.hovertemplate,hovertemplatefallback:dp.hovertemplatefallback,whiskerwidth:B$({},H5.whiskerwidth,{dflt:0}),hoverlabel:dp.hoverlabel,zorder:H5.zorder}});var yJe=ye((Mwr,mJe)=>{"use strict";var d$t=Dr(),v$t=ka(),p$t=R$(),g$t=Ig(),m$t=N$();mJe.exports=function(t,r,n,i){function a(s,l){return d$t.coerce(t,r,m$t,s,l)}var o=p$t(t,r,a,i);if(!o){r.visible=!1;return}g$t(t,r,i,a,{x:!0}),a("xhoverformat"),a("yhoverformat"),a("line.width"),gJe(t,r,a,"increasing"),gJe(t,r,a,"decreasing"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),a("whiskerwidth"),i._requestRangeslider[r.xaxis]=!0,a("zorder")};function gJe(e,t,r,n){var i=r(n+".line.color");r(n+".line.width",t.line.width),r(n+".fillcolor",v$t.addOpacity(i,.5))}});var wJe=ye((Ewr,bJe)=>{"use strict";var _Je=Dr(),xJe=ho(),y$t=Dg(),_$t=D$().calcCommon;bJe.exports=function(e,t){var r=e._fullLayout,n=xJe.getFromId(e,t.xaxis),i=xJe.getFromId(e,t.yaxis),a=n.makeCalcdata(t,"x"),o=y$t(t,n,"x",a).vals,s=_$t(e,t,a,o,i,x$t);return s.length?(_Je.extendFlat(s[0].t,{num:r._numBoxes,dPos:_Je.distinctVals(o).minDiff/2,posLetter:"x",valLetter:"y"}),r._numBoxes++,s):[{t:{empty:!0}}]};function x$t(e,t,r,n){return{min:r,q1:Math.min(e,n),med:n,q3:Math.max(e,n),max:t}}});var AJe=ye((kwr,TJe)=>{"use strict";TJe.exports={moduleType:"trace",name:"candlestick",basePlotModule:ph(),categories:["cartesian","svg","showLegend","candlestick","boxLayout"],meta:{},attributes:N$(),layoutAttributes:b4(),supplyLayoutDefaults:ZI().supplyLayoutDefaults,crossTraceCalc:KI().crossTraceCalc,supplyDefaults:yJe(),calc:wJe(),plot:JI().plot,layerName:"boxlayer",style:$I().style,hoverPoints:O$().hoverPoints,selectPoints:q$()}});var MJe=ye((Cwr,SJe)=>{"use strict";SJe.exports=AJe()});var V$=ye((Lwr,EJe)=>{"use strict";var l9=Dr(),b$t=ym(),s9=l9.deg2rad,U$=l9.rad2deg;EJe.exports=function(t,r,n){switch(b$t(t,n),t._id){case"x":case"radialaxis":w$t(t,r);break;case"angularaxis":S$t(t,r);break}};function w$t(e,t){var r=t._subplot;e.setGeometry=function(){var n=e._rl[0],i=e._rl[1],a=r.innerRadius,o=(r.radius-a)/(i-n),s=a/o,l=n>i?function(u){return u<=0}:function(u){return u>=0};e.c2g=function(u){var c=e.c2l(u)-n;return(l(c)?c:0)+s},e.g2c=function(u){return e.l2c(u+n-s)},e.g2p=function(u){return u*o},e.c2p=function(u){return e.g2p(e.c2g(u))}}}function T$t(e,t){return t==="degrees"?s9(e):e}function A$t(e,t){return t==="degrees"?U$(e):e}function S$t(e,t){var r=e.type;if(r==="linear"){var n=e.d2c,i=e.c2d;e.d2c=function(a,o){return T$t(n(a),o)},e.c2d=function(a,o){return i(A$t(a,o))}}e.makeCalcdata=function(a,o){var s=a[o],l=a._length,u,c,f=function(b){return e.d2c(b,a.thetaunit)};if(s)for(u=new Array(l),c=0;c<l;c++)u[c]=f(s[c]);else{var h=o+"0",d="d"+o,v=h in a?f(a[h]):0,_=a[d]?f(a[d]):(e.period||2*Math.PI)/l;for(u=new Array(l),c=0;c<l;c++)u[c]=v+c*_}return u},e.setGeometry=function(){var a=t.sector,o=a.map(s9),s={clockwise:-1,counterclockwise:1}[e.direction],l=s9(e.rotation),u=function(p){return s*p+l},c=function(p){return(p-l)/s},f,h,d,v;switch(r){case"linear":h=f=l9.identity,v=s9,d=U$,e.range=l9.isFullCircle(o)?[a[0],a[0]+360]:o.map(c).map(U$);break;case"category":var _=e._categories.length,b=e.period?Math.max(e.period,_):_;b===0&&(b=1),h=v=function(p){return p*2*Math.PI/b},f=d=function(p){return p*b/Math.PI/2},e.range=[0,b];break}e.c2g=function(p){return u(h(p))},e.g2c=function(p){return f(c(p))},e.t2g=function(p){return u(v(p))},e.g2t=function(p){return d(c(p))}}}});var u9=ye((Pwr,kJe)=>{"use strict";kJe.exports={attr:"subplot",name:"polar",axisNames:["angularaxis","radialaxis"],axisName2dataArray:{angularaxis:"theta",radialaxis:"r"},layerNames:["draglayer","plotbg","backplot","angular-grid","radial-grid","frontplot","angular-line","radial-line","angular-axis","radial-axis"],radialDragBoxSize:50,angularDragBoxSize:30,cornerLen:25,cornerHalfWidth:2,MINDRAG:8,MINZOOM:20,OFFEDGE:20}});var f9=ye((Iwr,RJe)=>{"use strict";var pw=Dr(),CJe=kM().tester,G$=pw.findIndexOfMin,PJe=pw.isAngleInsideSector,M$t=pw.angleDelta,LJe=pw.angleDist;function E$t(e,t,r,n,i){if(!PJe(t,n))return!1;var a,o;r[0]<r[1]?(a=r[0],o=r[1]):(a=r[1],o=r[0]);var s=CJe(j5(a,n[0],n[1],i)),l=CJe(j5(o,n[0],n[1],i)),u=[e*Math.cos(t),e*Math.sin(t)];return l.contains(u)&&!s.contains(u)}function IJe(e,t,r,n){var i,a,o=n[0],s=n[1],l=c9(Math.sin(t)-Math.sin(e)),u=c9(Math.cos(t)-Math.cos(e)),c=Math.tan(r),f=c9(1/c),h=l/u,d=s-h*o;return f?l&&u?(i=d/(c-h),a=c*i):u?(i=s*f,a=s):(i=o,a=o*c):l&&u?(i=0,a=d):u?(i=0,a=s):i=a=NaN,[i,a]}function k$t(e,t,r,n){var i=-t*r,a=t*t+1,o=2*(t*i-r),s=i*i+r*r-e*e,l=Math.sqrt(o*o-4*a*s),u=(-o+l)/(2*a),c=(-o-l)/(2*a);return[[u,t*u+i+n],[c,t*c+i+n]]}function C$t(e,t){var r=t.length,n=new Array(r+1),i;for(i=0;i<r;i++){var a=t[i];n[i]=[e*Math.cos(a),e*Math.sin(a)]}return n[i]=n[0].slice(),n}function L$t(e,t,r,n){var i=n.length,a=[],o,s;function l(p){return[e*Math.cos(p),e*Math.sin(p)]}function u(p,k,E){return IJe(p,k,E,l(p))}function c(p){return pw.mod(p,i)}function f(p){return PJe(p,[t,r])}var h=G$(n,function(p){return f(p)?LJe(p,t):1/0}),d=u(n[h],n[c(h-1)],t);for(a.push(d),o=h,s=0;s<i;o++,s++){var v=n[c(o)];if(!f(v))break;a.push(l(v))}var _=G$(n,function(p){return f(p)?LJe(p,r):1/0}),b=u(n[_],n[c(_+1)],r);return a.push(b),a.push([0,0]),a.push(a[0].slice()),a}function j5(e,t,r,n){return pw.isFullCircle([t,r])?C$t(e,n):L$t(e,t,r,n)}function P$t(e,t,r,n){for(var i=1/0,a=1/0,o=j5(e,t,r,n),s=0;s<o.length;s++){var l=o[s];i=Math.min(i,l[0]),a=Math.min(a,-l[1])}return[i,a]}function I$t(e,t){var r=function(a){var o=M$t(a,e);return o>0?o:1/0},n=G$(t,r),i=pw.mod(n+1,t.length);return[t[n],t[i]]}function c9(e){return Math.abs(e)>1e-10?e:0}function H$(e,t,r){t=t||0,r=r||0;for(var n=e.length,i=new Array(n),a=0;a<n;a++){var o=e[a];i[a]=[t+o[0],r-o[1]]}return i}function R$t(e,t,r,n,i,a){var o=j5(e,t,r,n);return"M"+H$(o,i,a).join("L")}function D$t(e,t,r,n,i,a,o){var s,l;e<t?(s=e,l=t):(s=t,l=e);var u=H$(j5(s,r,n,i),a,o),c=H$(j5(l,r,n,i),a,o);return"M"+c.reverse().join("L")+"M"+u.join("L")}RJe.exports={isPtInsidePolygon:E$t,findPolygonOffset:P$t,findEnclosingVertexAngles:I$t,findIntersectionXY:IJe,findXYatLength:k$t,clampTiny:c9,pathPolygon:R$t,pathPolygonAnnulus:D$t}});var j$=ye((Rwr,zJe)=>{"use strict";function DJe(e){return e<0?-1:e>0?1:0}function W5(e){var t=e[0],r=e[1];if(!isFinite(t)||!isFinite(r))return[1,0];var n=(t+1)*(t+1)+r*r;return[(t*t+r*r-1)/n,2*r/n]}function X5(e,t){var r=t[0],n=t[1];return[r*e.radius+e.cx,-n*e.radius+e.cy]}function FJe(e,t){return t*e.radius}function F$t(e,t,r,n){var i=X5(e,W5([r,t])),a=i[0],o=i[1],s=X5(e,W5([n,t])),l=s[0],u=s[1];if(t===0)return["M"+a+","+o,"L"+l+","+u].join(" ");var c=FJe(e,1/Math.abs(t));return["M"+a+","+o,"A"+c+","+c+" 0 0,"+(t<0?1:0)+" "+l+","+u].join(" ")}function z$t(e,t,r,n){var i=FJe(e,1/(t+1)),a=X5(e,W5([t,r])),o=a[0],s=a[1],l=X5(e,W5([t,n])),u=l[0],c=l[1];if(DJe(r)!==DJe(n)){var f=X5(e,W5([t,0])),h=f[0],d=f[1];return["M"+o+","+s,"A"+i+","+i+" 0 0,"+(0<r?0:1)+" "+h+","+d,"A"+i+","+i+" 0 0,"+(n<0?0:1)+u+","+c].join(" ")}return["M"+o+","+s,"A"+i+","+i+" 0 0,"+(n<r?0:1)+" "+u+","+c].join(" ")}zJe.exports={smith:W5,reactanceArc:F$t,resistanceArc:z$t,smithTransform:X5}});var Z$=ye((Dwr,jJe)=>{"use strict";var gw=Oa(),O$t=cd(),yw=qa(),Xc=Dr(),iy=Xc.strRotate,xd=Xc.strTranslate,W$=ka(),cC=So(),q$t=Mc(),vp=ho(),B$t=ym(),N$t=V$(),U$t=Ag().doAutoRange,y1=NN(),v9=yv(),OJe=vf(),V$t=Eb(),G$t=Of().prepSelect,H$t=Of().selectOnClick,X$=Of().clearOutline,qJe=Sg(),BJe=vM(),NJe=wM().redrawReglTraces,j$t=$h().MID_SHIFT,Lx=u9(),_1=f9(),p9=j$(),h9=p9.smith,W$t=p9.reactanceArc,X$t=p9.resistanceArc,d9=p9.smithTransform,Z$t=Xc._,UJe=Xc.mod,Px=Xc.deg2rad,mw=Xc.rad2deg;function VJe(e,t,r){this.isSmith=r||!1,this.id=t,this.gd=e,this._hasClipOnAxisFalse=null,this.vangles=null,this.radialAxisAngle=null,this.traceHash={},this.layers={},this.clipPaths={},this.clipIds={},this.viewInitial={};var n=e._fullLayout,i="clip"+n._uid+t;this.clipIds.forTraces=i+"-for-traces",this.clipPaths.forTraces=n._clips.append("clipPath").attr("id",this.clipIds.forTraces),this.clipPaths.forTraces.append("path"),this.framework=n["_"+(r?"smith":"polar")+"layer"].append("g").attr("class",t),this.getHole=function(a){return this.isSmith?0:a.hole},this.getSector=function(a){return this.isSmith?[0,360]:a.sector},this.getRadial=function(a){return this.isSmith?a.realaxis:a.radialaxis},this.getAngular=function(a){return this.isSmith?a.imaginaryaxis:a.angularaxis},r||(this.radialTickLayout=null,this.angularTickLayout=null)}var Nd=VJe.prototype;jJe.exports=function(t,r,n){return new VJe(t,r,n)};Nd.plot=function(e,t){for(var r=this,n=t[r.id],i=!1,a=0;a<e.length;a++){var o=e[a][0].trace;if(o.cliponaxis===!1){i=!0;break}}r._hasClipOnAxisFalse=i,r.updateLayers(t,n),r.updateLayout(t,n),q$t.generalUpdatePerTraceModule(r.gd,r,e,n),r.updateFx(t,n),r.isSmith&&(delete n.realaxis.range,delete n.imaginaryaxis.range)};Nd.updateLayers=function(e,t){var r=this,n=r.isSmith,i=r.layers,a=r.getRadial(t),o=r.getAngular(t),s=Lx.layerNames,l=s.indexOf("frontplot"),u=s.slice(0,l),c=o.layer==="below traces",f=a.layer==="below traces";c&&u.push("angular-line"),f&&u.push("radial-line"),c&&u.push("angular-axis"),f&&u.push("radial-axis"),u.push("frontplot"),c||u.push("angular-line"),f||u.push("radial-line"),c||u.push("angular-axis"),f||u.push("radial-axis");var h=(n?"smith":"polar")+"sublayer",d=r.framework.selectAll("."+h).data(u,String);d.enter().append("g").attr("class",function(v){return h+" "+v}).each(function(v){var _=i[v]=gw.select(this);switch(v){case"frontplot":n||_.append("g").classed("barlayer",!0),_.append("g").classed("scatterlayer",!0);break;case"backplot":_.append("g").classed("maplayer",!0);break;case"plotbg":i.bg=_.append("path");break;case"radial-grid":_.style("fill","none");break;case"angular-grid":_.style("fill","none");break;case"radial-line":_.append("line").style("fill","none");break;case"angular-line":_.append("path").style("fill","none");break}}),d.order()};Nd.updateLayout=function(e,t){var r=this,n=r.layers,i=e._size,a=r.getRadial(t),o=r.getAngular(t),s=t.domain.x,l=t.domain.y;r.xOffset=i.l+i.w*s[0],r.yOffset=i.t+i.h*(1-l[1]);var u=r.xLength=i.w*(s[1]-s[0]),c=r.yLength=i.h*(l[1]-l[0]),f=r.getSector(t);r.sectorInRad=f.map(Px);var h=r.sectorBBox=Y$t(f),d=h[2]-h[0],v=h[3]-h[1],_=c/u,b=Math.abs(v/d),p,k,E,S,L;_>b?(p=u,k=u*b,L=(c-k)/i.h/2,E=[s[0],s[1]],S=[l[0]+L,l[1]-L]):(p=c/b,k=c,L=(u-p)/i.w/2,E=[s[0]+L,s[1]-L],S=[l[0],l[1]]),r.xLength2=p,r.yLength2=k,r.xDomain2=E,r.yDomain2=S;var x=r.xOffset2=i.l+i.w*E[0],C=r.yOffset2=i.t+i.h*(1-S[1]),M=r.radius=p/d,g=r.innerRadius=r.getHole(t)*M,P=r.cx=x-M*h[0],T=r.cy=C+M*h[3],z=r.cxx=P-x,O=r.cyy=T-C,V=a.side,G;V==="counterclockwise"?(G=V,V="top"):V==="clockwise"&&(G=V,V="bottom"),r.radialAxis=r.mockAxis(e,t,a,{_id:"x",side:V,_trueSide:G,domain:[g/i.w,M/i.w]}),r.angularAxis=r.mockAxis(e,t,o,{side:"right",domain:[0,Math.PI],autorange:!1}),r.doAutoRange(e,t),r.updateAngularAxis(e,t),r.updateRadialAxis(e,t),r.updateRadialAxisTitle(e,t),r.xaxis=r.mockCartesianAxis(e,t,{_id:"x",domain:E}),r.yaxis=r.mockCartesianAxis(e,t,{_id:"y",domain:S});var Z=r.pathSubplot();r.clipPaths.forTraces.select("path").attr("d",Z).attr("transform",xd(z,O)),n.frontplot.attr("transform",xd(x,C)).call(cC.setClipUrl,r._hasClipOnAxisFalse?null:r.clipIds.forTraces,r.gd),n.bg.attr("d",Z).attr("transform",xd(P,T)).call(W$.fill,t.bgcolor)};Nd.mockAxis=function(e,t,r,n){var i=Xc.extendFlat({},r,n);return N$t(i,t,e),i};Nd.mockCartesianAxis=function(e,t,r){var n=this,i=n.isSmith,a=r._id,o=Xc.extendFlat({type:"linear"},r);B$t(o,e);var s={x:[0,2],y:[1,3]};return o.setRange=function(){var l=n.sectorBBox,u=s[a],c=n.radialAxis._rl,f=(c[1]-c[0])/(1-n.getHole(t));o.range=[l[u[0]]*f,l[u[1]]*f]},o.isPtWithinRange=a==="x"&&!i?function(l){return n.isPtInside(l)}:function(){return!0},o.setRange(),o.setScale(),o};Nd.doAutoRange=function(e,t){var r=this,n=r.gd,i=r.radialAxis,a=r.getRadial(t);U$t(n,i);var o=i.range;if(a.range=o.slice(),a._input.range=o.slice(),i._rl=[i.r2l(o[0],null,"gregorian"),i.r2l(o[1],null,"gregorian")],i.minallowed!==void 0){var s=i.r2l(i.minallowed);i._rl[0]>i._rl[1]?i._rl[1]=Math.max(i._rl[1],s):i._rl[0]=Math.max(i._rl[0],s)}if(i.maxallowed!==void 0){var l=i.r2l(i.maxallowed);i._rl[0]<i._rl[1]?i._rl[1]=Math.min(i._rl[1],l):i._rl[0]=Math.min(i._rl[0],l)}};Nd.updateRadialAxis=function(e,t){var r=this,n=r.gd,i=r.layers,a=r.radius,o=r.innerRadius,s=r.cx,l=r.cy,u=r.getRadial(t),c=UJe(r.getSector(t)[0],360),f=r.radialAxis,h=o<a,d=r.isSmith;d||(r.fillViewInitialKey("radialaxis.angle",u.angle),r.fillViewInitialKey("radialaxis.range",f.range.slice()),f.setGeometry()),f.tickangle==="auto"&&c>90&&c<=270&&(f.tickangle=180);var v=d?function(M){var g=d9(r,h9([M.x,0]));return xd(g[0]-s,g[1]-l)}:function(M){return xd(f.l2p(M.x)+o,0)},_=d?function(M){return X$t(r,M.x,-1/0,1/0)}:function(M){return r.pathArc(f.r2p(M.x)+o)},b=GJe(u);if(r.radialTickLayout!==b&&(i["radial-axis"].selectAll(".xtick").remove(),r.radialTickLayout=b),h){f.setScale();var p=0,k=d?(f.tickvals||[]).filter(function(M){return M>=0}).map(function(M){return vp.tickText(f,M,!0,!1)}):vp.calcTicks(f),E=d?k:vp.clipEnds(f,k),S=vp.getTickSigns(f)[2];d&&((f.ticks==="top"&&f.side==="bottom"||f.ticks==="bottom"&&f.side==="top")&&(S=-S),f.ticks==="top"&&f.side==="top"&&(p=-f.ticklen),f.ticks==="bottom"&&f.side==="bottom"&&(p=f.ticklen)),vp.drawTicks(n,f,{vals:k,layer:i["radial-axis"],path:vp.makeTickPath(f,0,S),transFn:v,crisp:!1}),vp.drawGrid(n,f,{vals:E,layer:i["radial-grid"],path:_,transFn:Xc.noop,crisp:!1}),vp.drawLabels(n,f,{vals:k,layer:i["radial-axis"],transFn:v,labelFns:vp.makeLabelFns(f,p)})}var L=r.radialAxisAngle=r.vangles?mw(HJe(Px(u.angle),r.vangles)):u.angle,x=xd(s,l),C=x+iy(-L);fC(i["radial-axis"],h&&(u.showticklabels||u.ticks),{transform:C}),fC(i["radial-grid"],h&&u.showgrid,{transform:d?"":x}),fC(i["radial-line"].select("line"),h&&u.showline,{x1:d?-a:o,y1:0,x2:a,y2:0,transform:C}).attr("stroke-width",u.linewidth).call(W$.stroke,u.linecolor)};Nd.updateRadialAxisTitle=function(e,t,r){if(!this.isSmith){var n=this,i=n.gd,a=n.radius,o=n.cx,s=n.cy,l=n.getRadial(t),u=n.id+"title",c=0;if(l.title){var f=cC.bBox(n.layers["radial-axis"].node()).height,h=l.title.font.size,d=l.side;c=d==="top"?h:d==="counterclockwise"?-(f+h*.4):f+h*.8}var v=r!==void 0?r:n.radialAxisAngle,_=Px(v),b=Math.cos(_),p=Math.sin(_),k=o+a/2*b+c*p,E=s-a/2*p+c*b;n.layers["radial-axis-title"]=V$t.draw(i,u,{propContainer:l,propName:n.id+".radialaxis.title.text",placeholder:Z$t(i,"Click to enter radial axis title"),attributes:{x:k,y:E,"text-anchor":"middle"},transform:{rotate:-v}})}};Nd.updateAngularAxis=function(e,t){var r=this,n=r.gd,i=r.layers,a=r.radius,o=r.innerRadius,s=r.cx,l=r.cy,u=r.getAngular(t),c=r.angularAxis,f=r.isSmith;f||(r.fillViewInitialKey("angularaxis.rotation",u.rotation),c.setGeometry(),c.setScale());var h=f?function(g){var P=d9(r,h9([0,g.x]));return Math.atan2(P[0]-s,P[1]-l)-Math.PI/2}:function(g){return c.t2g(g.x)};c.type==="linear"&&c.thetaunit==="radians"&&(c.tick0=mw(c.tick0),c.dtick=mw(c.dtick));var d=function(g){return xd(s+a*Math.cos(g),l-a*Math.sin(g))},v=f?function(g){var P=d9(r,h9([0,g.x]));return xd(P[0],P[1])}:function(g){return d(h(g))},_=f?function(g){var P=d9(r,h9([0,g.x])),T=Math.atan2(P[0]-s,P[1]-l)-Math.PI/2;return xd(P[0],P[1])+iy(-mw(T))}:function(g){var P=h(g);return d(P)+iy(-mw(P))},b=f?function(g){return W$t(r,g.x,0,1/0)}:function(g){var P=h(g),T=Math.cos(P),z=Math.sin(P);return"M"+[s+o*T,l-o*z]+"L"+[s+a*T,l-a*z]},p=vp.makeLabelFns(c,0),k=p.labelStandoff,E={};E.xFn=function(g){var P=h(g);return Math.cos(P)*k},E.yFn=function(g){var P=h(g),T=Math.sin(P)>0?.2:1;return-Math.sin(P)*(k+g.fontSize*T)+Math.abs(Math.cos(P))*(g.fontSize*j$t)},E.anchorFn=function(g){var P=h(g),T=Math.cos(P);return Math.abs(T)<.1?"middle":T>0?"start":"end"},E.heightFn=function(g,P,T){var z=h(g);return-.5*(1+Math.sin(z))*T};var S=GJe(u);r.angularTickLayout!==S&&(i["angular-axis"].selectAll("."+c._id+"tick").remove(),r.angularTickLayout=S);var L=f?[1/0].concat(c.tickvals||[]).map(function(g){return vp.tickText(c,g,!0,!1)}):vp.calcTicks(c);f&&(L[0].text="\u221E",L[0].fontSize*=1.75);var x;if(t.gridshape==="linear"?(x=L.map(h),Xc.angleDelta(x[0],x[1])<0&&(x=x.slice().reverse())):x=null,r.vangles=x,c.type==="category"&&(L=L.filter(function(g){return Xc.isAngleInsideSector(h(g),r.sectorInRad)})),c.visible){var C=c.ticks==="inside"?-1:1,M=(c.linewidth||1)/2;vp.drawTicks(n,c,{vals:L,layer:i["angular-axis"],path:"M"+C*M+",0h"+C*c.ticklen,transFn:_,crisp:!1}),vp.drawGrid(n,c,{vals:L,layer:i["angular-grid"],path:b,transFn:Xc.noop,crisp:!1}),vp.drawLabels(n,c,{vals:L,layer:i["angular-axis"],repositionOnUpdate:!0,transFn:v,labelFns:E})}fC(i["angular-line"].select("path"),u.showline,{d:r.pathSubplot(),transform:xd(s,l)}).attr("stroke-width",u.linewidth).call(W$.stroke,u.linecolor)};Nd.updateFx=function(e,t){if(!this.gd._context.staticPlot){var r=!this.isSmith;r&&(this.updateAngularDrag(e),this.updateRadialDrag(e,t,0),this.updateRadialDrag(e,t,1)),this.updateHoverAndMainDrag(e)}};Nd.updateHoverAndMainDrag=function(e){var t=this,r=t.isSmith,n=t.gd,i=t.layers,a=e._zoomlayer,o=Lx.MINZOOM,s=Lx.OFFEDGE,l=t.radius,u=t.innerRadius,c=t.cx,f=t.cy,h=t.cxx,d=t.cyy,v=t.sectorInRad,_=t.vangles,b=t.radialAxis,p=_1.clampTiny,k=_1.findXYatLength,E=_1.findEnclosingVertexAngles,S=Lx.cornerHalfWidth,L=Lx.cornerLen/2,x,C,M=y1.makeDragger(i,"path","maindrag",e.dragmode===!1?"none":"crosshair");gw.select(M).attr("d",t.pathSubplot()).attr("transform",xd(c,f)),M.onmousemove=function(ce){OJe.hover(n,ce,t.id),n._fullLayout._lasthover=M,n._fullLayout._hoversubplot=t.id},M.onmouseout=function(ce){n._dragging||v9.unhover(n,ce)};var g={element:M,gd:n,subplot:t.id,plotinfo:{id:t.id,xaxis:t.xaxis,yaxis:t.yaxis},xaxes:[t.xaxis],yaxes:[t.yaxis]},P,T,z,O,V,G,Z,j,N;function H(ce,Ze){return Math.sqrt(ce*ce+Ze*Ze)}function te(ce,Ze){return H(ce-h,Ze-d)}function oe(ce,Ze){return Math.atan2(d-Ze,ce-h)}function _e(ce,Ze){return[ce*Math.cos(Ze),ce*Math.sin(-Ze)]}function Ee(ce,Ze){if(ce===0)return t.pathSector(2*S);var ut=L/ce,pt=Ze-ut,Zt=Ze+ut,st=Math.max(0,Math.min(ce,l)),lt=st-S,Gt=st+S;return"M"+_e(lt,pt)+"A"+[lt,lt]+" 0,0,0 "+_e(lt,Zt)+"L"+_e(Gt,Zt)+"A"+[Gt,Gt]+" 0,0,1 "+_e(Gt,pt)+"Z"}function Ce(ce,Ze,ut){if(ce===0)return t.pathSector(2*S);var pt=_e(ce,Ze),Zt=_e(ce,ut),st=p((pt[0]+Zt[0])/2),lt=p((pt[1]+Zt[1])/2),Gt,Nt;if(st&&lt){var Jt=lt/st,sr=-1/Jt,wr=k(S,Jt,st,lt);Gt=k(L,sr,wr[0][0],wr[0][1]),Nt=k(L,sr,wr[1][0],wr[1][1])}else{var cr,$e;lt?(cr=L,$e=S):(cr=S,$e=L),Gt=[[st-cr,lt-$e],[st+cr,lt-$e]],Nt=[[st-cr,lt+$e],[st+cr,lt+$e]]}return"M"+Gt.join("L")+"L"+Nt.reverse().join("L")+"Z"}function me(){z=null,O=null,V=t.pathSubplot(),G=!1;var ce=n._fullLayout[t.id];Z=O$t(ce.bgcolor).getLuminance(),j=y1.makeZoombox(a,Z,c,f,V),j.attr("fill-rule","evenodd"),N=y1.makeCorners(a,c,f),X$(n)}function ie(ce,Ze){return Ze=Math.max(Math.min(Ze,l),u),ce<s?ce=0:l-ce<s?ce=l:Ze<s?Ze=0:l-Ze<s&&(Ze=l),Math.abs(Ze-ce)>o?(ce<Ze?(z=ce,O=Ze):(z=Ze,O=ce),!0):(z=null,O=null,!1)}function Se(ce,Ze){ce=ce||V,Ze=Ze||"M0,0Z",j.attr("d",ce),N.attr("d",Ze),y1.transitionZoombox(j,N,G,Z),G=!0;var ut={};ge(ut),n.emit("plotly_relayouting",ut)}function Le(ce,Ze){ce=ce*x,Ze=Ze*C;var ut=P+ce,pt=T+Ze,Zt=te(P,T),st=Math.min(te(ut,pt),l),lt=oe(P,T),Gt,Nt;ie(Zt,st)&&(Gt=V+t.pathSector(O),z&&(Gt+=t.pathSector(z)),Nt=Ee(z,lt)+Ee(O,lt)),Se(Gt,Nt)}function Ae(ce,Ze,ut,pt){var Zt=_1.findIntersectionXY(ut,pt,ut,[ce-h,d-Ze]);return H(Zt[0],Zt[1])}function Fe(ce,Ze){var ut=P+ce,pt=T+Ze,Zt=oe(P,T),st=oe(ut,pt),lt=E(Zt,_),Gt=E(st,_),Nt=Ae(P,T,lt[0],lt[1]),Jt=Math.min(Ae(ut,pt,Gt[0],Gt[1]),l),sr,wr;ie(Nt,Jt)&&(sr=V+t.pathSector(O),z&&(sr+=t.pathSector(z)),wr=[Ce(z,lt[0],lt[1]),Ce(O,lt[0],lt[1])].join(" ")),Se(sr,wr)}function Pe(){if(y1.removeZoombox(n),!(z===null||O===null)){var ce={};ge(ce),y1.showDoubleClickNotifier(n),yw.call("_guiRelayout",n,ce)}}function ge(ce){var Ze=b._rl,ut=(Ze[1]-Ze[0])/(1-u/l)/l,pt=[Ze[0]+(z-u)*ut,Ze[0]+(O-u)*ut];ce[t.id+".radialaxis.range"]=pt}function Re(ce,Ze){var ut=n._fullLayout.clickmode;if(y1.removeZoombox(n),ce===2){var pt={};for(var Zt in t.viewInitial)pt[t.id+"."+Zt]=t.viewInitial[Zt];n.emit("plotly_doubleclick",null),yw.call("_guiRelayout",n,pt)}ut.indexOf("select")>-1&&ce===1&&H$t(Ze,n,[t.xaxis],[t.yaxis],t.id,g),ut.indexOf("event")>-1&&OJe.click(n,Ze,t.id)}g.prepFn=function(ce,Ze,ut){var pt=n._fullLayout.dragmode,Zt=M.getBoundingClientRect();n._fullLayout._calcInverseTransform(n);var st=n._fullLayout._invTransform;x=n._fullLayout._invScaleX,C=n._fullLayout._invScaleY;var lt=Xc.apply3DTransform(st)(Ze-Zt.left,ut-Zt.top);if(P=lt[0],T=lt[1],_){var Gt=_1.findPolygonOffset(l,v[0],v[1],_);P+=h+Gt[0],T+=d+Gt[1]}switch(pt){case"zoom":g.clickFn=Re,r||(_?g.moveFn=Fe:g.moveFn=Le,g.doneFn=Pe,me(ce,Ze,ut));break;case"select":case"lasso":G$t(ce,Ze,ut,g,pt);break}},v9.init(g)};Nd.updateRadialDrag=function(e,t,r){var n=this,i=n.gd,a=n.layers,o=n.radius,s=n.innerRadius,l=n.cx,u=n.cy,c=n.radialAxis,f=Lx.radialDragBoxSize,h=f/2;if(!c.visible)return;var d=Px(n.radialAxisAngle),v=c._rl,_=v[0],b=v[1],p=v[r],k=.75*(v[1]-v[0])/(1-n.getHole(t))/o,E,S,L;r?(E=l+(o+h)*Math.cos(d),S=u-(o+h)*Math.sin(d),L="radialdrag"):(E=l+(s-h)*Math.cos(d),S=u-(s-h)*Math.sin(d),L="radialdrag-inner");var x=y1.makeRectDragger(a,L,"crosshair",-h,-h,f,f),C={element:x,gd:i};e.dragmode===!1&&(C.dragmode=!1),fC(gw.select(x),c.visible&&s<o,{transform:xd(E,S)});var M,g,P;function T(Z,j){if(M)M(Z,j);else{var N=[Z,-j],H=[Math.cos(d),Math.sin(d)],te=Math.abs(Xc.dot(N,H)/Math.sqrt(Xc.dot(N,N)));isNaN(te)||(M=te<.5?V:G)}var oe={};z(oe),i.emit("plotly_relayouting",oe)}function z(Z){g!==null?Z[n.id+".radialaxis.angle"]=g:P!==null&&(Z[n.id+".radialaxis.range["+r+"]"]=P)}function O(){g!==null?yw.call("_guiRelayout",i,n.id+".radialaxis.angle",g):P!==null&&yw.call("_guiRelayout",i,n.id+".radialaxis.range["+r+"]",P)}function V(Z,j){if(r!==0){var N=E+Z,H=S+j;g=Math.atan2(u-H,N-l),n.vangles&&(g=HJe(g,n.vangles)),g=mw(g);var te=xd(l,u)+iy(-g);a["radial-axis"].attr("transform",te),a["radial-line"].select("line").attr("transform",te);var oe=n.gd._fullLayout,_e=oe[n.id];n.updateRadialAxisTitle(oe,_e,g)}}function G(Z,j){var N=Xc.dot([Z,-j],[Math.cos(d),Math.sin(d)]);if(P=p-k*N,k>0!=(r?P>_:P<b)){P=null;return}var H=i._fullLayout,te=H[n.id];c.range[r]=P,c._rl[r]=P,n.updateRadialAxis(H,te),n.xaxis.setRange(),n.xaxis.setScale(),n.yaxis.setRange(),n.yaxis.setScale();var oe=!1;for(var _e in n.traceHash){var Ee=n.traceHash[_e],Ce=Xc.filterVisible(Ee),me=Ee[0][0].trace._module;me.plot(i,n,Ce,te),yw.traceIs(_e,"gl")&&Ce.length&&(oe=!0)}oe&&(BJe(i),NJe(i))}C.prepFn=function(){M=null,g=null,P=null,C.moveFn=T,C.doneFn=O,X$(i)},C.clampFn=function(Z,j){return Math.sqrt(Z*Z+j*j)<Lx.MINDRAG&&(Z=0,j=0),[Z,j]},v9.init(C)};Nd.updateAngularDrag=function(e){var t=this,r=t.gd,n=t.layers,i=t.radius,a=t.angularAxis,o=t.cx,s=t.cy,l=t.cxx,u=t.cyy,c=Lx.angularDragBoxSize,f=y1.makeDragger(n,"path","angulardrag",e.dragmode===!1?"none":"move"),h={element:f,gd:r};e.dragmode===!1?h.dragmode=!1:gw.select(f).attr("d",t.pathAnnulus(i,i+c)).attr("transform",xd(o,s)).call(qJe,"move");function d(P,T){return Math.atan2(u+c-T,P-l-c)}var v=n.frontplot.select(".scatterlayer").selectAll(".trace"),_=v.selectAll(".point"),b=v.selectAll(".textpoint"),p,k,E,S,L,x;function C(P,T){var z=t.gd._fullLayout,O=z[t.id],V=p+P*e._invScaleX,G=k+T*e._invScaleY,Z=d(V,G),j=mw(Z-x);if(S=E+j,n.frontplot.attr("transform",xd(t.xOffset2,t.yOffset2)+iy([-j,l,u])),t.vangles){L=t.radialAxisAngle+j;var N=xd(o,s)+iy(-j),H=xd(o,s)+iy(-L);n.bg.attr("transform",N),n["radial-grid"].attr("transform",N),n["radial-axis"].attr("transform",H),n["radial-line"].select("line").attr("transform",H),t.updateRadialAxisTitle(z,O,L)}else t.clipPaths.forTraces.select("path").attr("transform",xd(l,u)+iy(j));_.each(function(){var ie=gw.select(this),Se=cC.getTranslate(ie);ie.attr("transform",xd(Se.x,Se.y)+iy([j]))}),b.each(function(){var ie=gw.select(this),Se=ie.select("text"),Le=cC.getTranslate(ie);ie.attr("transform",iy([j,Se.attr("x"),Se.attr("y")])+xd(Le.x,Le.y))}),a.rotation=Xc.modHalf(S,360),t.updateAngularAxis(z,O),t._hasClipOnAxisFalse&&!Xc.isFullCircle(t.sectorInRad)&&v.call(cC.hideOutsideRangePoints,t);var te=!1;for(var oe in t.traceHash)if(yw.traceIs(oe,"gl")){var _e=t.traceHash[oe],Ee=Xc.filterVisible(_e),Ce=_e[0][0].trace._module;Ce.plot(r,t,Ee,O),Ee.length&&(te=!0)}te&&(BJe(r),NJe(r));var me={};M(me),r.emit("plotly_relayouting",me)}function M(P){P[t.id+".angularaxis.rotation"]=S,t.vangles&&(P[t.id+".radialaxis.angle"]=L)}function g(){b.select("text").attr("transform",null);var P={};M(P),yw.call("_guiRelayout",r,P)}h.prepFn=function(P,T,z){var O=e[t.id];E=O.angularaxis.rotation;var V=f.getBoundingClientRect();p=T-V.left,k=z-V.top,r._fullLayout._calcInverseTransform(r);var G=Xc.apply3DTransform(e._invTransform)(p,k);p=G[0],k=G[1],x=d(p,k),h.moveFn=C,h.doneFn=g,X$(r)},t.vangles&&!Xc.isFullCircle(t.sectorInRad)&&(h.prepFn=Xc.noop,qJe(gw.select(f),null)),v9.init(h)};Nd.isPtInside=function(e){if(this.isSmith)return!0;var t=this.sectorInRad,r=this.vangles,n=this.angularAxis.c2g(e.theta),i=this.radialAxis,a=i.c2l(e.r),o=i._rl,s=r?_1.isPtInsidePolygon:Xc.isPtInsideSector;return s(a,n,o,t,r)};Nd.pathArc=function(e){var t=this.sectorInRad,r=this.vangles,n=r?_1.pathPolygon:Xc.pathArc;return n(e,t[0],t[1],r)};Nd.pathSector=function(e){var t=this.sectorInRad,r=this.vangles,n=r?_1.pathPolygon:Xc.pathSector;return n(e,t[0],t[1],r)};Nd.pathAnnulus=function(e,t){var r=this.sectorInRad,n=this.vangles,i=n?_1.pathPolygonAnnulus:Xc.pathAnnulus;return i(e,t,r[0],r[1],n)};Nd.pathSubplot=function(){var e=this.innerRadius,t=this.radius;return e?this.pathAnnulus(e,t):this.pathSector(t)};Nd.fillViewInitialKey=function(e,t){e in this.viewInitial||(this.viewInitial[e]=t)};function GJe(e){var t=e.ticks+String(e.ticklen)+String(e.showticklabels);return"side"in e&&(t+=e.side),t}function Y$t(e){var t=e[0],r=e[1],n=r-t,i=UJe(t,360),a=i+n,o=Math.cos(Px(i)),s=Math.sin(Px(i)),l=Math.cos(Px(a)),u=Math.sin(Px(a)),c,f,h,d;return i<=90&&a>=90||i>90&&a>=450?d=1:s<=0&&u<=0?d=0:d=Math.max(s,u),i<=180&&a>=180||i>180&&a>=540?c=-1:o>=0&&l>=0?c=0:c=Math.min(o,l),i<=270&&a>=270||i>270&&a>=630?f=-1:s>=0&&u>=0?f=0:f=Math.min(s,u),a>=360?h=1:o<=0&&l<=0?h=0:h=Math.max(o,l),[c,f,h,d]}function HJe(e,t){var r=function(i){return Xc.angleDist(e,i)},n=Xc.findIndexOfMin(t,r);return t[n]}function fC(e,t,r){return t?(e.attr("display",null),e.attr(r)):e&&e.attr("display","none"),e}});var Y$=ye((Fwr,JJe)=>{"use strict";var K$t=Lh(),os=Rd(),J$t=Cc().attributes,c0=Dr().extendFlat,WJe=mc().overrideAll,XJe=WJe({color:os.color,showline:c0({},os.showline,{dflt:!0}),linecolor:os.linecolor,linewidth:os.linewidth,showgrid:c0({},os.showgrid,{dflt:!0}),gridcolor:os.gridcolor,gridwidth:os.gridwidth,griddash:os.griddash},"plot","from-root"),ZJe=WJe({tickmode:os.minor.tickmode,nticks:os.nticks,tick0:os.tick0,dtick:os.dtick,tickvals:os.tickvals,ticktext:os.ticktext,ticks:os.ticks,ticklen:os.ticklen,tickwidth:os.tickwidth,tickcolor:os.tickcolor,ticklabelstep:os.ticklabelstep,showticklabels:os.showticklabels,labelalias:os.labelalias,minorloglabels:os.minorloglabels,showtickprefix:os.showtickprefix,tickprefix:os.tickprefix,showticksuffix:os.showticksuffix,ticksuffix:os.ticksuffix,showexponent:os.showexponent,exponentformat:os.exponentformat,minexponent:os.minexponent,separatethousands:os.separatethousands,tickfont:os.tickfont,tickangle:os.tickangle,tickformat:os.tickformat,tickformatstops:os.tickformatstops,layer:os.layer},"plot","from-root"),YJe={visible:c0({},os.visible,{dflt:!0}),type:c0({},os.type,{values:["-","linear","log","date","category"]}),autotypenumbers:os.autotypenumbers,autorangeoptions:{minallowed:os.autorangeoptions.minallowed,maxallowed:os.autorangeoptions.maxallowed,clipmin:os.autorangeoptions.clipmin,clipmax:os.autorangeoptions.clipmax,include:os.autorangeoptions.include,editType:"plot"},autorange:c0({},os.autorange,{editType:"plot"}),rangemode:{valType:"enumerated",values:["tozero","nonnegative","normal"],dflt:"tozero",editType:"calc"},minallowed:c0({},os.minallowed,{editType:"plot"}),maxallowed:c0({},os.maxallowed,{editType:"plot"}),range:c0({},os.range,{items:[{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}},{valType:"any",editType:"plot",impliedEdits:{"^autorange":!1}}],editType:"plot"}),categoryorder:os.categoryorder,categoryarray:os.categoryarray,angle:{valType:"angle",editType:"plot"},autotickangles:os.autotickangles,side:{valType:"enumerated",values:["clockwise","counterclockwise"],dflt:"clockwise",editType:"plot"},title:{text:c0({},os.title.text,{editType:"plot",dflt:""}),font:c0({},os.title.font,{editType:"plot"}),editType:"plot"},hoverformat:os.hoverformat,uirevision:{valType:"any",editType:"none"},editType:"calc"};c0(YJe,XJe,ZJe);var KJe={visible:c0({},os.visible,{dflt:!0}),type:{valType:"enumerated",values:["-","linear","category"],dflt:"-",editType:"calc",_noTemplating:!0},autotypenumbers:os.autotypenumbers,categoryorder:os.categoryorder,categoryarray:os.categoryarray,thetaunit:{valType:"enumerated",values:["radians","degrees"],dflt:"degrees",editType:"calc"},period:{valType:"number",editType:"calc",min:0},direction:{valType:"enumerated",values:["counterclockwise","clockwise"],dflt:"counterclockwise",editType:"calc"},rotation:{valType:"angle",editType:"calc"},hoverformat:os.hoverformat,uirevision:{valType:"any",editType:"none"},editType:"calc"};c0(KJe,XJe,ZJe);JJe.exports={domain:J$t({name:"polar",editType:"plot"}),sector:{valType:"info_array",items:[{valType:"number",editType:"plot"},{valType:"number",editType:"plot"}],dflt:[0,360],editType:"plot"},hole:{valType:"number",min:0,max:1,dflt:0,editType:"plot"},bgcolor:{valType:"color",editType:"plot",dflt:K$t.background},radialaxis:YJe,angularaxis:KJe,gridshape:{valType:"enumerated",values:["circular","linear"],dflt:"circular",editType:"plot"},uirevision:{valType:"any",editType:"none"},editType:"calc"}});var t$e=ye((zwr,e$e)=>{"use strict";var g9=Dr(),$$t=ka(),Q$t=vl(),eQt=k_(),tQt=Id().getSubplotData,rQt=bb(),iQt=M3(),nQt=e_(),aQt=t_(),oQt=oI(),sQt=t4(),lQt=_B(),uQt=R3(),QJe=Y$(),cQt=V$(),m9=u9(),$Je=m9.axisNames;function fQt(e,t,r,n){var i=r("bgcolor");n.bgColor=$$t.combine(i,n.paper_bgcolor);var a=r("sector");r("hole");var o=tQt(n.fullData,m9.name,n.id),s=n.layoutOut,l;function u(j,N){return r(l+"."+j,N)}for(var c=0;c<$Je.length;c++){l=$Je[c],g9.isPlainObject(e[l])||(e[l]={});var f=e[l],h=Q$t.newContainer(t,l);h._id=h._name=l,h._attr=n.id+"."+l,h._traceIndices=o.map(function(j){return j.index});var d=m9.axisName2dataArray[l],v=hQt(f,h,u,o,d,n);oQt(f,h,u,{axData:o,dataAttr:d});var _=u("visible");switch(cQt(h,t,s),u("uirevision",t.uirevision),h._m=1,l){case"radialaxis":u("minallowed"),u("maxallowed");var b=u("range"),p=h.getAutorangeDflt(b),k=u("autorange",p),E;b&&(b[0]===null&&b[1]===null||(b[0]===null||b[1]===null)&&(k==="reversed"||k===!0)||b[0]!==null&&(k==="min"||k==="max reversed")||b[1]!==null&&(k==="max"||k==="min reversed"))&&(b=void 0,delete h.range,h.autorange=!0,E=!0),E||(p=h.getAutorangeDflt(b),k=u("autorange",p)),f.autorange=k,k&&(lQt(u,k,b),(v==="linear"||v==="-")&&u("rangemode"),h.isReversed()&&(h._m=-1)),h.cleanRange("range",{dfltRange:[0,1]});break;case"angularaxis":if(v==="date"){g9.log("Polar plots do not support date angular axes yet.");for(var S=0;S<o.length;S++)o[S].visible=!1;v=f.type=h.type="linear"}u(v==="linear"?"thetaunit":"period");var L=u("direction");u("rotation",{counterclockwise:0,clockwise:90}[L]);break}if(aQt(f,h,u,h.type,{tickSuffixDflt:h.thetaunit==="degrees"?"\xB0":void 0}),_){var x,C,M,g,P,T,z,O,V,G,Z=n.font||{};x=u("color"),C=x===f.color?x:Z.color,M=Z.size,g=Z.family,P=Z.weight,T=Z.style,z=Z.variant,O=Z.textcase,V=Z.lineposition,G=Z.shadow,rQt(f,h,u,h.type),nQt(f,h,u,h.type,{font:{weight:P,style:T,variant:z,textcase:O,lineposition:V,shadow:G,color:C,size:M,family:g},noAutotickangles:l==="angularaxis",noTicklabelshift:!0,noTicklabelstandoff:!0}),iQt(f,h,u,{outerTicks:!0}),sQt(f,h,u,{dfltColor:x,bgColor:n.bgColor,blend:60,showLine:!0,showGrid:!0,noZeroLine:!0,attributes:QJe[l]}),u("layer"),l==="radialaxis"&&(u("side"),u("angle",a[0]),u("title.text"),g9.coerceFont(u,"title.font",{weight:P,style:T,variant:z,textcase:O,lineposition:V,shadow:G,color:C,size:g9.bigFont(M),family:g}))}v!=="category"&&u("hoverformat"),h._input=f}t.angularaxis.type==="category"&&r("gridshape")}function hQt(e,t,r,n,i,a){var o=r("autotypenumbers",a.autotypenumbersDflt),s=r("type");if(s==="-"){for(var l,u=0;u<n.length;u++)if(n[u].visible){l=n[u];break}l&&l[i]&&(t.type=uQt(l[i],"gregorian",{noMultiCategory:!0,autotypenumbers:o})),t.type==="-"?t.type="linear":e.type=t.type}return t.type}e$e.exports=function(t,r,n){eQt(t,r,n,{type:m9.name,attributes:QJe,handleDefaults:fQt,font:r.font,autotypenumbersDflt:r.autotypenumbers,paper_bgcolor:r.paper_bgcolor,fullData:n,layoutOut:r})}});var y9=ye((Owr,o$e)=>{"use strict";var dQt=Id().getSubplotCalcData,vQt=Dr().counterRegex,pQt=Z$(),i$e=u9(),n$e=i$e.attr,_w=i$e.name,r$e=vQt(_w),a$e={};a$e[n$e]={valType:"subplotid",dflt:_w,editType:"calc"};function gQt(e){for(var t=e._fullLayout,r=e.calcdata,n=t._subplots[_w],i=0;i<n.length;i++){var a=n[i],o=dQt(r,_w,a),s=t[a]._subplot;s||(s=pQt(e,a),t[a]._subplot=s),s.plot(o,t,e._promises)}}function mQt(e,t,r,n){for(var i=n._subplots[_w]||[],a=n._has&&n._has("gl"),o=t._has&&t._has("gl"),s=a&&!o,l=0;l<i.length;l++){var u=i[l],c=n[u]._subplot;if(!t[u]&&c){c.framework.remove(),c.layers["radial-axis-title"].remove();for(var f in c.clipPaths)c.clipPaths[f].remove()}s&&c._scene&&(c._scene.destroy(),c._scene=null)}}o$e.exports={attr:n$e,name:_w,idRoot:_w,idRegex:r$e,attrRegex:r$e,attributes:a$e,layoutAttributes:Y$(),supplyLayoutDefaults:t$e(),plot:gQt,clean:mQt,toSVG:ph().toSVG}});var hC=ye((qwr,l$e)=>{"use strict";var{hovertemplateAttrs:yQt,texttemplateAttrs:_Qt,templatefallbackAttrs:s$e}=Ll(),_9=Ao().extendFlat,xQt=Cg(),f0=pf(),bQt=Gl(),Z5=f0.line;l$e.exports={mode:f0.mode,r:{valType:"data_array",editType:"calc+clearAxisTypes"},theta:{valType:"data_array",editType:"calc+clearAxisTypes"},r0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes"},dr:{valType:"number",dflt:1,editType:"calc"},theta0:{valType:"any",dflt:0,editType:"calc+clearAxisTypes"},dtheta:{valType:"number",editType:"calc"},thetaunit:{valType:"enumerated",values:["radians","degrees","gradians"],dflt:"degrees",editType:"calc+clearAxisTypes"},text:f0.text,texttemplate:_Qt({editType:"plot"},{keys:["r","theta","text"]}),texttemplatefallback:s$e({editType:"plot"}),hovertext:f0.hovertext,line:{color:Z5.color,width:Z5.width,dash:Z5.dash,backoff:Z5.backoff,shape:_9({},Z5.shape,{values:["linear","spline"]}),smoothing:Z5.smoothing,editType:"calc"},connectgaps:f0.connectgaps,marker:f0.marker,cliponaxis:_9({},f0.cliponaxis,{dflt:!1}),textposition:f0.textposition,textfont:f0.textfont,fill:_9({},f0.fill,{values:["none","toself","tonext"],dflt:"none"}),fillcolor:xQt(),hoverinfo:_9({},bQt.hoverinfo,{flags:["r","theta","text","name"]}),hoveron:f0.hoveron,hovertemplate:yQt(),hovertemplatefallback:s$e(),selected:f0.selected,unselected:f0.unselected}});var b9=ye((Bwr,f$e)=>{"use strict";var x9=Dr(),Y5=Ru(),wQt=$p(),TQt=R0(),u$e=tT(),AQt=D0(),SQt=Rg(),MQt=Sm().PTS_LINESONLY,EQt=hC();function kQt(e,t,r,n){function i(s,l){return x9.coerce(e,t,EQt,s,l)}var a=c$e(e,t,n,i);if(!a){t.visible=!1;return}i("thetaunit"),i("mode",a<MQt?"lines+markers":"lines"),i("text"),i("hovertext"),t.hoveron!=="fills"&&(i("hovertemplate"),i("hovertemplatefallback")),Y5.hasMarkers(t)&&wQt(e,t,r,n,i,{gradient:!0}),Y5.hasLines(t)&&(TQt(e,t,r,n,i,{backoff:!0}),u$e(e,t,i),i("connectgaps")),Y5.hasText(t)&&(i("texttemplate"),i("texttemplatefallback"),AQt(e,t,n,i));var o=[];(Y5.hasMarkers(t)||Y5.hasText(t))&&(i("cliponaxis"),i("marker.maxdisplayed"),o.push("points")),i("fill"),t.fill!=="none"&&(SQt(e,t,r,i),Y5.hasLines(t)||u$e(e,t,i)),(t.fill==="tonext"||t.fill==="toself")&&o.push("fills"),i("hoveron",o.join("+")||"points"),x9.coerceSelectionMarkerOpacity(t,i)}function c$e(e,t,r,n){var i=n("r"),a=n("theta");x9.isTypedArray(i)&&(t.r=i=Array.from(i)),x9.isTypedArray(a)&&(t.theta=a=Array.from(a));var o;if(i)a?o=Math.min(i.length,a.length):(o=i.length,n("theta0"),n("dtheta"));else{if(!a)return 0;o=t.theta.length,n("r0"),n("dr")}return t._length=o,o}f$e.exports={handleRThetaDefaults:c$e,supplyDefaults:kQt}});var w9=ye((Nwr,d$e)=>{"use strict";var CQt=Dr(),h$e=ho();d$e.exports=function(t,r,n){var i={},a=n[r.subplot]._subplot,o,s;a?(o=a.radialAxis,s=a.angularAxis):(a=n[r.subplot],o=a.radialaxis,s=a.angularaxis);var l=o.c2l(t.r);i.rLabel=h$e.tickText(o,l,!0).text;var u=s.thetaunit==="degrees"?CQt.rad2deg(t.theta):t.theta;return i.thetaLabel=h$e.tickText(s,u,!0).text,i}});var g$e=ye((Uwr,p$e)=>{"use strict";var v$e=Eo(),LQt=fs().BADNUM,PQt=ho(),IQt=F0(),RQt=km(),DQt=z0(),FQt=O0().calcMarkerSize;p$e.exports=function(t,r){for(var n=t._fullLayout,i=r.subplot,a=n[i].radialaxis,o=n[i].angularaxis,s=a.makeCalcdata(r,"r"),l=o.makeCalcdata(r,"theta"),u=r._length,c=new Array(u),f=0;f<u;f++){var h=s[f],d=l[f],v=c[f]={};v$e(h)&&v$e(d)?(v.r=h,v.theta=d):v.r=LQt}var _=FQt(r,u);return r._extremes.x=PQt.findExtremes(a,s,{ppad:_}),IQt(t,r),RQt(c,r),DQt(c,r),c}});var _$e=ye((Vwr,y$e)=>{"use strict";var zQt=sT(),m$e=fs().BADNUM;y$e.exports=function(t,r,n){for(var i=r.layers.frontplot.select("g.scatterlayer"),a=r.xaxis,o=r.yaxis,s={xaxis:a,yaxis:o,plot:r.framework,layerClipId:r._hasClipOnAxisFalse?r.clipIds.forTraces:null},l=r.radialAxis,u=r.angularAxis,c=0;c<n.length;c++)for(var f=n[c],h=0;h<f.length;h++){h===0&&(f[0].trace._xA=a,f[0].trace._yA=o);var d=f[h],v=d.r;if(v===m$e)d.x=d.y=m$e;else{var _=l.c2g(v),b=u.c2g(d.theta);d.x=_*Math.cos(b),d.y=_*Math.sin(b)}}zQt(t,s,n,i)}});var T9=ye((Gwr,b$e)=>{"use strict";var OQt=fT();function qQt(e,t,r,n){var i=OQt(e,t,r,n);if(!(!i||i[0].index===!1)){var a=i[0];if(a.index===void 0)return i;var o=e.subplot,s=a.cd[a.index],l=a.trace;if(o.isPtInside(s))return a.xLabelVal=void 0,a.yLabelVal=void 0,x$e(s,l,o,a),a.hovertemplate=l.hovertemplate,i}}function x$e(e,t,r,n){var i=r.radialAxis,a=r.angularAxis;i._hovertitle="r",a._hovertitle="\u03B8";var o={};o[t.subplot]={_subplot:r};var s=t._module.formatLabels(e,t,o);n.rLabel=s.rLabel,n.thetaLabel=s.thetaLabel;var l=e.hi||t.hoverinfo,u=[];function c(h,d){u.push(h._hovertitle+": "+d)}if(!t.hovertemplate){var f=l.split("+");f.indexOf("all")!==-1&&(f=["r","theta","text"]),f.indexOf("r")!==-1&&c(i,n.rLabel),f.indexOf("theta")!==-1&&c(a,n.thetaLabel),f.indexOf("text")!==-1&&n.text&&(u.push(n.text),delete n.text),n.extraText=u.join("<br>")}}b$e.exports={hoverPoints:qQt,makeHoverPointText:x$e}});var T$e=ye((Hwr,w$e)=>{"use strict";w$e.exports={moduleType:"trace",name:"scatterpolar",basePlotModule:y9(),categories:["polar","symbols","showLegend","scatter-like"],attributes:hC(),supplyDefaults:b9().supplyDefaults,colorbar:$d(),formatLabels:w9(),calc:g$e(),plot:_$e(),style:op().style,styleOnSelect:op().styleOnSelect,hoverPoints:T9().hoverPoints,selectPoints:hT(),meta:{}}});var S$e=ye((jwr,A$e)=>{"use strict";A$e.exports=T$e()});var K$=ye((Zwr,E$e)=>{"use strict";var M$e=hC(),{cliponaxis:Wwr,hoveron:Xwr}=M$e,BQt=uee(M$e,["cliponaxis","hoveron"]),{connectgaps:NQt,line:{color:UQt,dash:VQt,width:GQt},fill:HQt,fillcolor:jQt,marker:WQt,textfont:XQt,textposition:ZQt}=lk();E$e.exports=q1(mg({},BQt),{connectgaps:NQt,fill:HQt,fillcolor:jQt,line:{color:UQt,dash:VQt,editType:"calc",width:GQt},marker:WQt,textfont:XQt,textposition:ZQt})});var L$e=ye((Kwr,C$e)=>{"use strict";var k$e=Dr(),J$=Ru(),YQt=b9().handleRThetaDefaults,KQt=$p(),JQt=R0(),$Qt=D0(),QQt=Rg(),eer=Sm().PTS_LINESONLY,ter=K$();C$e.exports=function(t,r,n,i){function a(s,l){return k$e.coerce(t,r,ter,s,l)}var o=YQt(t,r,i,a);if(!o){r.visible=!1;return}a("thetaunit"),a("mode",o<eer?"lines+markers":"lines"),a("text"),a("hovertext"),r.hoveron!=="fills"&&(a("hovertemplate"),a("hovertemplatefallback")),J$.hasMarkers(r)&&KQt(t,r,n,i,a,{noAngleRef:!0,noStandOff:!0}),J$.hasLines(r)&&(JQt(t,r,n,i,a),a("connectgaps")),J$.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),$Qt(t,r,i,a,{noFontShadow:!0,noFontLineposition:!0,noFontTextcase:!0})),a("fill"),r.fill!=="none"&&QQt(t,r,n,a),k$e.coerceSelectionMarkerOpacity(r,a)}});var I$e=ye((Jwr,P$e)=>{"use strict";var rer=w9();P$e.exports=function(t,r,n){var i=t.i;return"r"in t||(t.r=r._r[i]),"theta"in t||(t.theta=r._theta[i]),rer(t,r,n)}});var D$e=ye(($wr,R$e)=>{"use strict";var ier=F0(),ner=O0().calcMarkerSize,aer=J2(),oer=ho(),ser=ox().TOO_MANY_POINTS;R$e.exports=function(t,r){var n=t._fullLayout,i=r.subplot,a=n[i].radialaxis,o=n[i].angularaxis,s=r._r=a.makeCalcdata(r,"r"),l=r._theta=o.makeCalcdata(r,"theta"),u=r._length,c={};u<s.length&&(s=s.slice(0,u)),u<l.length&&(l=l.slice(0,u)),c.r=s,c.theta=l,ier(t,r);var f=c.opts=aer.style(t,r),h;return u<ser?h=ner(r,u):f.marker&&(h=2*(f.marker.sizeAvg||Math.max(f.marker.size,3))),r._extremes.x=oer.findExtremes(a,s,{ppad:h}),[{x:!1,y:!1,t:c,trace:r}]}});var z$e=ye((Qwr,F$e)=>{"use strict";var ler=UF(),uer=T9().makeHoverPointText;function cer(e,t,r,n){var i=e.cd,a=i[0].t,o=a.r,s=a.theta,l=ler.hoverPoints(e,t,r,n);if(!(!l||l[0].index===!1)){var u=l[0];if(u.index===void 0)return l;var c=e.subplot,f=u.cd[u.index],h=u.trace;if(f.r=o[u.index],f.theta=s[u.index],!!c.isPtInside(f))return u.xLabelVal=void 0,u.yLabelVal=void 0,uer(f,h,c,u),l}}F$e.exports={hoverPoints:cer}});var q$e=ye((e3r,O$e)=>{"use strict";O$e.exports={moduleType:"trace",name:"scatterpolargl",basePlotModule:y9(),categories:["gl","regl","polar","symbols","showLegend","scatter-like"],attributes:K$(),supplyDefaults:L$e(),colorbar:$d(),formatLabels:I$e(),calc:D$e(),hoverPoints:z$e().hoverPoints,selectPoints:uY(),meta:{}}});var B$e=ye((t3r,$$)=>{"use strict";var fer=HF(),her=Eo(),der=dK(),ver=oY(),A9=J2(),S9=Dr(),per=ox().TOO_MANY_POINTS,ger={};$$.exports=function(t,r,n){if(n.length){var i=r.radialAxis,a=r.angularAxis,o=ver(t,r);return n.forEach(function(s){if(!(!s||!s[0]||!s[0].trace)){var l=s[0],u=l.trace,c=l.t,f=u._length,h=c.r,d=c.theta,v=c.opts,_,b=h.slice(),p=d.slice();for(_=0;_<h.length;_++)r.isPtInside({r:h[_],theta:d[_]})||(b[_]=NaN,p[_]=NaN);var k=new Array(f*2),E=Array(f),S=Array(f);for(_=0;_<f;_++){var L=b[_],x,C;if(her(L)){var M=i.c2g(L),g=a.c2g(p[_],u.thetaunit);x=M*Math.cos(g),C=M*Math.sin(g)}else x=C=NaN;E[_]=k[_*2]=x,S[_]=k[_*2+1]=C}c.tree=fer(k),v.marker&&f>=per&&(v.marker.cluster=c.tree),v.marker&&(v.markerSel.positions=v.markerUnsel.positions=v.marker.positions=k),v.line&&k.length>1&&S9.extendFlat(v.line,A9.linePositions(t,u,k)),v.text&&(S9.extendFlat(v.text,{positions:k},A9.textPosition(t,u,v.text,v.marker)),S9.extendFlat(v.textSel,{positions:k},A9.textPosition(t,u,v.text,v.markerSel)),S9.extendFlat(v.textUnsel,{positions:k},A9.textPosition(t,u,v.text,v.markerUnsel))),v.fill&&!o.fill2d&&(o.fill2d=!0),v.marker&&!o.scatter2d&&(o.scatter2d=!0),v.line&&!o.line2d&&(o.line2d=!0),v.text&&!o.glText&&(o.glText=!0),o.lineOptions.push(v.line),o.fillOptions.push(v.fill),o.markerOptions.push(v.marker),o.markerSelectedOptions.push(v.markerSel),o.markerUnselectedOptions.push(v.markerUnsel),o.textOptions.push(v.text),o.textSelectedOptions.push(v.textSel),o.textUnselectedOptions.push(v.textUnsel),o.selectBatch.push([]),o.unselectBatch.push([]),c.x=E,c.y=S,c.rawx=E,c.rawy=S,c.r=h,c.theta=d,c.positions=k,c._scene=o,c.index=o.count,o.count++}}),der(t,r,n)}};$$.exports.reglPrecompiled=ger});var V$e=ye((r3r,U$e)=>{"use strict";var N$e=q$e();N$e.plot=B$e();U$e.exports=N$e});var H$e=ye((i3r,G$e)=>{"use strict";G$e.exports=V$e()});var Q$=ye((n3r,j$e)=>{"use strict";var{hovertemplateAttrs:mer,templatefallbackAttrs:yer}=Ll(),K5=Ao().extendFlat,Ix=hC(),Rx=Lm();j$e.exports={r:Ix.r,theta:Ix.theta,r0:Ix.r0,dr:Ix.dr,theta0:Ix.theta0,dtheta:Ix.dtheta,thetaunit:Ix.thetaunit,base:K5({},Rx.base,{}),offset:K5({},Rx.offset,{}),width:K5({},Rx.width,{}),text:K5({},Rx.text,{}),hovertext:K5({},Rx.hovertext,{}),marker:_er(),hoverinfo:Ix.hoverinfo,hovertemplate:mer(),hovertemplatefallback:yer(),selected:Rx.selected,unselected:Rx.unselected};function _er(){var e=K5({},Rx.marker);return delete e.cornerradius,e}});var eQ=ye((a3r,W$e)=>{"use strict";W$e.exports={barmode:{valType:"enumerated",values:["stack","overlay"],dflt:"stack",editType:"calc"},bargap:{valType:"number",dflt:.1,min:0,max:1,editType:"calc"}}});var Y$e=ye((o3r,Z$e)=>{"use strict";var X$e=Dr(),xer=b9().handleRThetaDefaults,ber=VI(),wer=Q$();Z$e.exports=function(t,r,n,i){function a(s,l){return X$e.coerce(t,r,wer,s,l)}var o=xer(t,r,i,a);if(!o){r.visible=!1;return}a("thetaunit"),a("base"),a("offset"),a("width"),a("text"),a("hovertext"),a("hovertemplate"),a("hovertemplatefallback"),ber(t,r,a,n,i),X$e.coerceSelectionMarkerOpacity(r,a)}});var J$e=ye((s3r,K$e)=>{"use strict";var Ter=Dr(),Aer=eQ();K$e.exports=function(e,t,r){var n={},i;function a(l,u){return Ter.coerce(e[i]||{},t[i],Aer,l,u)}for(var o=0;o<r.length;o++){var s=r[o];s.type==="barpolar"&&s.visible===!0&&(i=s.subplot,n[i]||(a("barmode"),a("bargap"),n[i]=1))}}});var tQ=ye((l3r,eQe)=>{"use strict";var $$e=pv().hasColorscale,Q$e=gv(),Ser=Dr().isArrayOrTypedArray,Mer=g4(),Eer=jb().setGroupPositions,ker=z0(),Cer=qa().traceIs,Ler=Dr().extendFlat;function Per(e,t){for(var r=e._fullLayout,n=t.subplot,i=r[n].radialaxis,a=r[n].angularaxis,o=i.makeCalcdata(t,"r"),s=a.makeCalcdata(t,"theta"),l=t._length,u=new Array(l),c=o,f=s,h=0;h<l;h++)u[h]={p:f[h],s:c[h]};function d(v){var _=t[v];_!==void 0&&(t["_"+v]=Ser(_)?a.makeCalcdata(t,v):a.d2c(_,t.thetaunit))}return a.type==="linear"&&(d("width"),d("offset")),$$e(t,"marker")&&Q$e(e,t,{vals:t.marker.color,containerStr:"marker",cLetter:"c"}),$$e(t,"marker.line")&&Q$e(e,t,{vals:t.marker.line.color,containerStr:"marker.line",cLetter:"c"}),Mer(u,t),ker(u,t),u}function Ier(e,t,r){for(var n=e.calcdata,i=[],a=0;a<n.length;a++){var o=n[a],s=o[0].trace;s.visible===!0&&Cer(s,"bar")&&s.subplot===r&&i.push(o)}var l=Ler({},t.radialaxis,{_id:"x"}),u=t.angularaxis;Eer(e,u,l,i,{mode:t.barmode,norm:t.barnorm,gap:t.bargap,groupgap:t.bargroupgap})}eQe.exports={calc:Per,crossTraceCalc:Ier}});var iQe=ye((u3r,rQe)=>{"use strict";var tQe=Oa(),M9=Eo(),J5=Dr(),Rer=So(),rQ=f9();rQe.exports=function(t,r,n){var i=t._context.staticPlot,a=r.xaxis,o=r.yaxis,s=r.radialAxis,l=r.angularAxis,u=Der(r),c=r.layers.frontplot.select("g.barlayer");J5.makeTraceGroups(c,n,"trace bars").each(function(){var f=tQe.select(this),h=J5.ensureSingle(f,"g","points"),d=h.selectAll("g.point").data(J5.identity);d.enter().append("g").style("vector-effect",i?"none":"non-scaling-stroke").style("stroke-miterlimit",2).classed("point",!0),d.exit().remove(),d.each(function(v){var _=tQe.select(this),b=v.rp0=s.c2p(v.s0),p=v.rp1=s.c2p(v.s1),k=v.thetag0=l.c2g(v.p0),E=v.thetag1=l.c2g(v.p1),S;if(!M9(b)||!M9(p)||!M9(k)||!M9(E)||b===p||k===E)S="M0,0Z";else{var L=s.c2g(v.s1),x=(k+E)/2;v.ct=[a.c2p(L*Math.cos(x)),o.c2p(L*Math.sin(x))],S=u(b,p,k,E)}J5.ensureSingle(_,"path").attr("d",S)}),Rer.setClipUrl(f,r._hasClipOnAxisFalse?r.clipIds.forTraces:null,t)})};function Der(e){var t=e.cxx,r=e.cyy;return e.vangles?function(n,i,a,o){var s,l;J5.angleDelta(a,o)>0?(s=a,l=o):(s=o,l=a);var u=rQ.findEnclosingVertexAngles(s,e.vangles)[0],c=rQ.findEnclosingVertexAngles(l,e.vangles)[1],f=[u,(s+l)/2,c];return rQ.pathPolygonAnnulus(n,i,s,l,f,t,r)}:function(n,i,a,o){return J5.pathAnnulus(n,i,a,o,t,r)}}});var aQe=ye((c3r,nQe)=>{"use strict";var Fer=vf(),iQ=Dr(),zer=ET().getTraceColor,Oer=iQ.fillText,qer=T9().makeHoverPointText,Ber=f9().isPtInsidePolygon;nQe.exports=function(t,r,n){var i=t.cd,a=i[0].trace,o=t.subplot,s=o.radialAxis,l=o.angularAxis,u=o.vangles,c=u?Ber:iQ.isPtInsideSector,f=t.maxHoverDistance,h=l._period||2*Math.PI,d=Math.abs(s.g2p(Math.sqrt(r*r+n*n))),v=Math.atan2(n,r);s.range[0]>s.range[1]&&(v+=Math.PI);var _=function(E){return c(d,v,[E.rp0,E.rp1],[E.thetag0,E.thetag1],u)?f+Math.min(1,Math.abs(E.thetag1-E.thetag0)/h)-1+(E.rp1-d)/(E.rp1-E.rp0)-1:1/0};if(Fer.getClosest(i,_,t),t.index!==!1){var b=t.index,p=i[b];t.x0=t.x1=p.ct[0],t.y0=t.y1=p.ct[1];var k=iQ.extendFlat({},p,{r:p.s,theta:p.p});return Oer(p,a,t),qer(k,a,o,t),t.hovertemplate=a.hovertemplate,t.color=zer(a,p),t.xLabelVal=t.yLabelVal=void 0,p.s<0&&(t.idealAlign="left"),[t]}}});var sQe=ye((f3r,oQe)=>{"use strict";oQe.exports={moduleType:"trace",name:"barpolar",basePlotModule:y9(),categories:["polar","bar","showLegend"],attributes:Q$(),layoutAttributes:eQ(),supplyDefaults:Y$e(),supplyLayoutDefaults:J$e(),calc:tQ().calc,crossTraceCalc:tQ().crossTraceCalc,plot:iQe(),colorbar:$d(),formatLabels:w9(),style:N0().style,styleOnSelect:N0().styleOnSelect,hoverPoints:aQe(),selectPoints:kT(),meta:{}}});var uQe=ye((h3r,lQe)=>{"use strict";lQe.exports=sQe()});var nQ=ye((d3r,cQe)=>{"use strict";cQe.exports={attr:"subplot",name:"smith",axisNames:["realaxis","imaginaryaxis"],axisName2dataArray:{imaginaryaxis:"imag",realaxis:"real"}}});var aQ=ye((v3r,vQe)=>{"use strict";var Ner=Lh(),Nf=Rd(),Uer=Cc().attributes,Dx=Dr().extendFlat,fQe=mc().overrideAll,hQe=fQe({color:Nf.color,showline:Dx({},Nf.showline,{dflt:!0}),linecolor:Nf.linecolor,linewidth:Nf.linewidth,showgrid:Dx({},Nf.showgrid,{dflt:!0}),gridcolor:Nf.gridcolor,gridwidth:Nf.gridwidth,griddash:Nf.griddash},"plot","from-root"),dQe=fQe({ticklen:Nf.ticklen,tickwidth:Dx({},Nf.tickwidth,{dflt:2}),tickcolor:Nf.tickcolor,showticklabels:Nf.showticklabels,labelalias:Nf.labelalias,showtickprefix:Nf.showtickprefix,tickprefix:Nf.tickprefix,showticksuffix:Nf.showticksuffix,ticksuffix:Nf.ticksuffix,tickfont:Nf.tickfont,tickformat:Nf.tickformat,hoverformat:Nf.hoverformat,layer:Nf.layer},"plot","from-root"),Ver=Dx({visible:Dx({},Nf.visible,{dflt:!0}),tickvals:{dflt:[.2,.5,1,2,5],valType:"data_array",editType:"plot"},tickangle:Dx({},Nf.tickangle,{dflt:90}),ticks:{valType:"enumerated",values:["top","bottom",""],editType:"ticks"},side:{valType:"enumerated",values:["top","bottom"],dflt:"top",editType:"plot"},editType:"calc"},hQe,dQe),Ger=Dx({visible:Dx({},Nf.visible,{dflt:!0}),tickvals:{valType:"data_array",editType:"plot"},ticks:Nf.ticks,editType:"calc"},hQe,dQe);vQe.exports={domain:Uer({name:"smith",editType:"plot"}),bgcolor:{valType:"color",editType:"plot",dflt:Ner.background},realaxis:Ver,imaginaryaxis:Ger,editType:"calc"}});var mQe=ye((p3r,gQe)=>{"use strict";var $5=Dr(),Her=ka(),jer=vl(),Wer=k_(),Xer=Id().getSubplotData,Zer=t_(),Yer=e_(),Ker=t4(),Jer=ym(),Q5=aQ(),oQ=nQ(),pQe=oQ.axisNames,$er=etr(function(e){return $5.isTypedArray(e)&&(e=Array.from(e)),e.slice().reverse().map(function(t){return-t}).concat([0]).concat(e)},String);function Qer(e,t,r,n){var i=r("bgcolor");n.bgColor=Her.combine(i,n.paper_bgcolor);var a=Xer(n.fullData,oQ.name,n.id),o=n.layoutOut,s;function l(L,x){return r(s+"."+L,x)}for(var u=0;u<pQe.length;u++){s=pQe[u],$5.isPlainObject(e[s])||(e[s]={});var c=e[s],f=jer.newContainer(t,s);f._id=f._name=s,f._attr=n.id+"."+s,f._traceIndices=a.map(function(L){return L.index});var h=l("visible");if(f.type="linear",Jer(f,o),Zer(c,f,l,f.type),h){var d=s==="realaxis";if(d&&l("side"),d)l("tickvals");else{var v=$er(t.realaxis.tickvals||Q5.realaxis.tickvals.dflt);l("tickvals",v)}$5.isTypedArray(f.tickvals)&&(f.tickvals=Array.from(f.tickvals));var _,b,p,k,E=n.font||{};h&&(_=l("color"),b=_===c.color?_:E.color,p=E.size,k=E.family),Yer(c,f,l,f.type,{noAutotickangles:!0,noTicklabelshift:!0,noTicklabelstandoff:!0,noTicklabelstep:!0,noAng:!d,noExp:!0,font:{color:b,size:p,family:k}}),$5.coerce2(e,t,Q5,s+".ticklen"),$5.coerce2(e,t,Q5,s+".tickwidth"),$5.coerce2(e,t,Q5,s+".tickcolor",t.color);var S=l("ticks");S||(delete t[s].ticklen,delete t[s].tickwidth,delete t[s].tickcolor),Ker(c,f,l,{dfltColor:_,bgColor:n.bgColor,blend:60,showLine:!0,showGrid:!0,noZeroLine:!0,attributes:Q5[s]}),l("layer")}l("hoverformat"),delete f.type,f._input=c}}gQe.exports=function(t,r,n){Wer(t,r,n,{noUirevision:!0,type:oQ.name,attributes:Q5,handleDefaults:Qer,font:r.font,paper_bgcolor:r.paper_bgcolor,fullData:n,layoutOut:r})};function etr(e,t){var r={};return function(n){var i=t?t(n):n;if(i in r)return r[i];var a=e(n);return r[i]=a,a}}});var TQe=ye((g3r,wQe)=>{"use strict";var ttr=Id().getSubplotCalcData,rtr=Dr().counterRegex,itr=Z$(),_Qe=nQ(),xQe=_Qe.attr,xw=_Qe.name,yQe=rtr(xw),bQe={};bQe[xQe]={valType:"subplotid",dflt:xw,editType:"calc"};function ntr(e){for(var t=e._fullLayout,r=e.calcdata,n=t._subplots[xw],i=0;i<n.length;i++){var a=n[i],o=ttr(r,xw,a),s=t[a]._subplot;s||(s=itr(e,a,!0),t[a]._subplot=s),s.plot(o,t,e._promises)}}function atr(e,t,r,n){for(var i=n._subplots[xw]||[],a=0;a<i.length;a++){var o=i[a],s=n[o]._subplot;if(!t[o]&&s){s.framework.remove();for(var l in s.clipPaths)s.clipPaths[l].remove()}}}wQe.exports={attr:xQe,name:xw,idRoot:xw,idRegex:yQe,attrRegex:yQe,attributes:bQe,layoutAttributes:aQ(),supplyLayoutDefaults:mQe(),plot:ntr,clean:atr,toSVG:ph().toSVG}});var sQ=ye((m3r,SQe)=>{"use strict";var{hovertemplateAttrs:otr,texttemplateAttrs:str,templatefallbackAttrs:AQe}=Ll(),E9=Ao().extendFlat,ltr=Cg(),h0=pf(),utr=Gl(),eS=h0.line;SQe.exports={mode:h0.mode,real:{valType:"data_array",editType:"calc+clearAxisTypes"},imag:{valType:"data_array",editType:"calc+clearAxisTypes"},text:h0.text,texttemplate:str({editType:"plot"},{keys:["real","imag","text"]}),texttemplatefallback:AQe({editType:"plot"}),hovertext:h0.hovertext,line:{color:eS.color,width:eS.width,dash:eS.dash,backoff:eS.backoff,shape:E9({},eS.shape,{values:["linear","spline"]}),smoothing:eS.smoothing,editType:"calc"},connectgaps:h0.connectgaps,marker:h0.marker,cliponaxis:E9({},h0.cliponaxis,{dflt:!1}),textposition:h0.textposition,textfont:h0.textfont,fill:E9({},h0.fill,{values:["none","toself","tonext"],dflt:"none"}),fillcolor:ltr(),hoverinfo:E9({},utr.hoverinfo,{flags:["real","imag","text","name"]}),hoveron:h0.hoveron,hovertemplate:otr(),hovertemplatefallback:AQe(),selected:h0.selected,unselected:h0.unselected}});var kQe=ye((y3r,EQe)=>{"use strict";var k9=Dr(),tS=Ru(),ctr=$p(),ftr=R0(),MQe=tT(),htr=D0(),dtr=Rg(),vtr=Sm().PTS_LINESONLY,ptr=sQ();EQe.exports=function(t,r,n,i){function a(l,u){return k9.coerce(t,r,ptr,l,u)}var o=gtr(t,r,i,a);if(!o){r.visible=!1;return}a("mode",o<vtr?"lines+markers":"lines"),a("text"),a("hovertext"),r.hoveron!=="fills"&&(a("hovertemplate"),a("hovertemplatefallback")),tS.hasMarkers(r)&&ctr(t,r,n,i,a,{gradient:!0}),tS.hasLines(r)&&(ftr(t,r,n,i,a,{backoff:!0}),MQe(t,r,a),a("connectgaps")),tS.hasText(r)&&(a("texttemplate"),a("texttemplatefallback"),htr(t,r,i,a));var s=[];(tS.hasMarkers(r)||tS.hasText(r))&&(a("cliponaxis"),a("marker.maxdisplayed"),s.push("points")),a("fill"),r.fill!=="none"&&(dtr(t,r,n,a),tS.hasLines(r)||MQe(t,r,a)),(r.fill==="tonext"||r.fill==="toself")&&s.push("fills"),a("hoveron",s.join("+")||"points"),k9.coerceSelectionMarkerOpacity(r,a)};function gtr(e,t,r,n){var i=n("real"),a=n("imag"),o;return i&&a&&(o=Math.min(i.length,a.length)),k9.isTypedArray(i)&&(t.real=i=Array.from(i)),k9.isTypedArray(a)&&(t.imag=a=Array.from(a)),t._length=o,o}});var PQe=ye((_3r,LQe)=>{"use strict";var CQe=ho();LQe.exports=function(t,r,n){var i={},a=n[r.subplot]._subplot;return i.realLabel=CQe.tickText(a.radialAxis,t.real,!0).text,i.imagLabel=CQe.tickText(a.angularAxis,t.imag,!0).text,i}});var DQe=ye((x3r,RQe)=>{"use strict";var IQe=Eo(),mtr=fs().BADNUM,ytr=F0(),_tr=km(),xtr=z0(),btr=O0().calcMarkerSize;RQe.exports=function(t,r){for(var n=t._fullLayout,i=r.subplot,a=n[i].realaxis,o=n[i].imaginaryaxis,s=a.makeCalcdata(r,"real"),l=o.makeCalcdata(r,"imag"),u=r._length,c=new Array(u),f=0;f<u;f++){var h=s[f],d=l[f],v=c[f]={};IQe(h)&&IQe(d)?(v.real=h,v.imag=d):v.real=mtr}return btr(r,u),ytr(t,r),_tr(c,r),xtr(c,r),c}});var OQe=ye((b3r,zQe)=>{"use strict";var wtr=sT(),FQe=fs().BADNUM,Ttr=j$(),Atr=Ttr.smith;zQe.exports=function(t,r,n){for(var i=r.layers.frontplot.select("g.scatterlayer"),a=r.xaxis,o=r.yaxis,s={xaxis:a,yaxis:o,plot:r.framework,layerClipId:r._hasClipOnAxisFalse?r.clipIds.forTraces:null},l=0;l<n.length;l++)for(var u=n[l],c=0;c<u.length;c++){c===0&&(u[0].trace._xA=a,u[0].trace._yA=o);var f=u[c],h=f.real;if(h===FQe)f.x=f.y=FQe;else{var d=Atr([h,f.imag]);f.x=d[0],f.y=d[1]}}wtr(t,s,n,i)}});var NQe=ye((w3r,BQe)=>{"use strict";var Str=fT();function Mtr(e,t,r,n){var i=Str(e,t,r,n);if(!(!i||i[0].index===!1)){var a=i[0];if(a.index===void 0)return i;var o=e.subplot,s=a.cd[a.index],l=a.trace;if(o.isPtInside(s))return a.xLabelVal=void 0,a.yLabelVal=void 0,qQe(s,l,o,a),a.hovertemplate=l.hovertemplate,i}}function qQe(e,t,r,n){var i=r.radialAxis,a=r.angularAxis;i._hovertitle="real",a._hovertitle="imag";var o={};o[t.subplot]={_subplot:r};var s=t._module.formatLabels(e,t,o);n.realLabel=s.realLabel,n.imagLabel=s.imagLabel;var l=e.hi||t.hoverinfo,u=[];function c(h,d){u.push(h._hovertitle+": "+d)}if(!t.hovertemplate){var f=l.split("+");f.indexOf("all")!==-1&&(f=["real","imag","text"]),f.indexOf("real")!==-1&&c(i,n.realLabel),f.indexOf("imag")!==-1&&c(a,n.imagLabel),f.indexOf("text")!==-1&&n.text&&(u.push(n.text),delete n.text),n.extraText=u.join("<br>")}}BQe.exports={hoverPoints:Mtr,makeHoverPointText:qQe}});var VQe=ye((T3r,UQe)=>{"use strict";UQe.exports={moduleType:"trace",name:"scattersmith",basePlotModule:TQe(),categories:["smith","symbols","showLegend","scatter-like"],attributes:sQ(),supplyDefaults:kQe(),colorbar:$d(),formatLabels:PQe(),calc:DQe(),plot:OQe(),style:op().style,styleOnSelect:op().styleOnSelect,hoverPoints:NQe().hoverPoints,selectPoints:hT(),meta:{}}});var HQe=ye((A3r,GQe)=>{"use strict";GQe.exports=VQe()});var kv=ye((S3r,WQe)=>{var L9=Oh();function jQe(){this.regionalOptions=[],this.regionalOptions[""]={invalidCalendar:"Calendar {0} not found",invalidDate:"Invalid {0} date",invalidMonth:"Invalid {0} month",invalidYear:"Invalid {0} year",differentCalendars:"Cannot mix {0} and {1} dates"},this.local=this.regionalOptions[""],this.calendars={},this._localCals={}}L9(jQe.prototype,{instance:function(e,t){e=(e||"gregorian").toLowerCase(),t=t||"";var r=this._localCals[e+"-"+t];if(!r&&this.calendars[e]&&(r=new this.calendars[e](t),this._localCals[e+"-"+t]=r),!r)throw(this.local.invalidCalendar||this.regionalOptions[""].invalidCalendar).replace(/\{0\}/,e);return r},newDate:function(e,t,r,n,i){return n=(e!=null&&e.year?e.calendar():typeof n=="string"?this.instance(n,i):n)||this.instance(),n.newDate(e,t,r)},substituteDigits:function(e){return function(t){return(t+"").replace(/[0-9]/g,function(r){return e[r]})}},substituteChineseDigits:function(e,t){return function(r){for(var n="",i=0;r>0;){var a=r%10;n=(a===0?"":e[a]+t[i])+n,i++,r=Math.floor(r/10)}return n.indexOf(e[1]+t[1])===0&&(n=n.substr(1)),n||e[0]}}});function lQ(e,t,r,n){if(this._calendar=e,this._year=t,this._month=r,this._day=n,this._calendar._validateLevel===0&&!this._calendar.isValid(this._year,this._month,this._day))throw(Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate).replace(/\{0\}/,this._calendar.local.name)}function C9(e,t){return e=""+e,"000000".substring(0,t-e.length)+e}L9(lQ.prototype,{newDate:function(e,t,r){return this._calendar.newDate(e==null?this:e,t,r)},year:function(e){return arguments.length===0?this._year:this.set(e,"y")},month:function(e){return arguments.length===0?this._month:this.set(e,"m")},day:function(e){return arguments.length===0?this._day:this.set(e,"d")},date:function(e,t,r){if(!this._calendar.isValid(e,t,r))throw(Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate).replace(/\{0\}/,this._calendar.local.name);return this._year=e,this._month=t,this._day=r,this},leapYear:function(){return this._calendar.leapYear(this)},epoch:function(){return this._calendar.epoch(this)},formatYear:function(){return this._calendar.formatYear(this)},monthOfYear:function(){return this._calendar.monthOfYear(this)},weekOfYear:function(){return this._calendar.weekOfYear(this)},daysInYear:function(){return this._calendar.daysInYear(this)},dayOfYear:function(){return this._calendar.dayOfYear(this)},daysInMonth:function(){return this._calendar.daysInMonth(this)},dayOfWeek:function(){return this._calendar.dayOfWeek(this)},weekDay:function(){return this._calendar.weekDay(this)},extraInfo:function(){return this._calendar.extraInfo(this)},add:function(e,t){return this._calendar.add(this,e,t)},set:function(e,t){return this._calendar.set(this,e,t)},compareTo:function(e){if(this._calendar.name!==e._calendar.name)throw(Gs.local.differentCalendars||Gs.regionalOptions[""].differentCalendars).replace(/\{0\}/,this._calendar.local.name).replace(/\{1\}/,e._calendar.local.name);var t=this._year!==e._year?this._year-e._year:this._month!==e._month?this.monthOfYear()-e.monthOfYear():this._day-e._day;return t===0?0:t<0?-1:1},calendar:function(){return this._calendar},toJD:function(){return this._calendar.toJD(this)},fromJD:function(e){return this._calendar.fromJD(e)},toJSDate:function(){return this._calendar.toJSDate(this)},fromJSDate:function(e){return this._calendar.fromJSDate(e)},toString:function(){return(this.year()<0?"-":"")+C9(Math.abs(this.year()),4)+"-"+C9(this.month(),2)+"-"+C9(this.day(),2)}});function uQ(){this.shortYearCutoff="+10"}L9(uQ.prototype,{_validateLevel:0,newDate:function(e,t,r){return e==null?this.today():(e.year&&(this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate),r=e.day(),t=e.month(),e=e.year()),new lQ(this,e,t,r))},today:function(){return this.fromJSDate(new Date)},epoch:function(e){var t=this._validate(e,this.minMonth,this.minDay,Gs.local.invalidYear||Gs.regionalOptions[""].invalidYear);return t.year()<0?this.local.epochs[0]:this.local.epochs[1]},formatYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,Gs.local.invalidYear||Gs.regionalOptions[""].invalidYear);return(t.year()<0?"-":"")+C9(Math.abs(t.year()),4)},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,Gs.local.invalidYear||Gs.regionalOptions[""].invalidYear),12},monthOfYear:function(e,t){var r=this._validate(e,t,this.minDay,Gs.local.invalidMonth||Gs.regionalOptions[""].invalidMonth);return(r.month()+this.monthsInYear(r)-this.firstMonth)%this.monthsInYear(r)+this.minMonth},fromMonthOfYear:function(e,t){var r=(t+this.firstMonth-2*this.minMonth)%this.monthsInYear(e)+this.minMonth;return this._validate(e,r,this.minDay,Gs.local.invalidMonth||Gs.regionalOptions[""].invalidMonth),r},daysInYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,Gs.local.invalidYear||Gs.regionalOptions[""].invalidYear);return this.leapYear(t)?366:365},dayOfYear:function(e,t,r){var n=this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate);return n.toJD()-this.newDate(n.year(),this.fromMonthOfYear(n.year(),this.minMonth),this.minDay).toJD()+1},daysInWeek:function(){return 7},dayOfWeek:function(e,t,r){var n=this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate);return(Math.floor(this.toJD(n))+2)%this.daysInWeek()},extraInfo:function(e,t,r){return this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate),{}},add:function(e,t,r){return this._validate(e,this.minMonth,this.minDay,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate),this._correctAdd(e,this._add(e,t,r),t,r)},_add:function(e,t,r){if(this._validateLevel++,r==="d"||r==="w"){var n=e.toJD()+t*(r==="w"?this.daysInWeek():1),i=e.calendar().fromJD(n);return this._validateLevel--,[i.year(),i.month(),i.day()]}try{var a=e.year()+(r==="y"?t:0),o=e.monthOfYear()+(r==="m"?t:0),i=e.day(),s=function(c){for(;o<c.minMonth;)a--,o+=c.monthsInYear(a);for(var f=c.monthsInYear(a);o>f-1+c.minMonth;)a++,o-=f,f=c.monthsInYear(a)};r==="y"?(e.month()!==this.fromMonthOfYear(a,o)&&(o=this.newDate(a,e.month(),this.minDay).monthOfYear()),o=Math.min(o,this.monthsInYear(a)),i=Math.min(i,this.daysInMonth(a,this.fromMonthOfYear(a,o)))):r==="m"&&(s(this),i=Math.min(i,this.daysInMonth(a,this.fromMonthOfYear(a,o))));var l=[a,this.fromMonthOfYear(a,o),i];return this._validateLevel--,l}catch(u){throw this._validateLevel--,u}},_correctAdd:function(e,t,r,n){if(!this.hasYearZero&&(n==="y"||n==="m")&&(t[0]===0||e.year()>0!=t[0]>0)){var i={y:[1,1,"y"],m:[1,this.monthsInYear(-1),"m"],w:[this.daysInWeek(),this.daysInYear(-1),"d"],d:[1,this.daysInYear(-1),"d"]}[n],a=r<0?-1:1;t=this._add(e,r*i[0]+a*i[1],i[2])}return e.date(t[0],t[1],t[2])},set:function(e,t,r){this._validate(e,this.minMonth,this.minDay,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate);var n=r==="y"?t:e.year(),i=r==="m"?t:e.month(),a=r==="d"?t:e.day();return(r==="y"||r==="m")&&(a=Math.min(a,this.daysInMonth(n,i))),e.date(n,i,a)},isValid:function(e,t,r){this._validateLevel++;var n=this.hasYearZero||e!==0;if(n){var i=this.newDate(e,t,this.minDay);n=t>=this.minMonth&&t-this.minMonth<this.monthsInYear(i)&&r>=this.minDay&&r-this.minDay<this.daysInMonth(i)}return this._validateLevel--,n},toJSDate:function(e,t,r){var n=this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate);return Gs.instance().fromJD(this.toJD(n)).toJSDate()},fromJSDate:function(e){return this.fromJD(Gs.instance().fromJSDate(e).toJD())},_validate:function(e,t,r,n){if(e.year){if(this._validateLevel===0&&this.name!==e.calendar().name)throw(Gs.local.differentCalendars||Gs.regionalOptions[""].differentCalendars).replace(/\{0\}/,this.local.name).replace(/\{1\}/,e.calendar().local.name);return e}try{if(this._validateLevel++,this._validateLevel===1&&!this.isValid(e,t,r))throw n.replace(/\{0\}/,this.local.name);var i=this.newDate(e,t,r);return this._validateLevel--,i}catch(a){throw this._validateLevel--,a}}});function cQ(e){this.local=this.regionalOptions[e]||this.regionalOptions[""]}cQ.prototype=new uQ;L9(cQ.prototype,{name:"Gregorian",jdEpoch:17214255e-1,daysPerMonth:[31,28,31,30,31,30,31,31,30,31,30,31],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Gregorian",epochs:["BCE","CE"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],monthNamesShort:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"mm/dd/yyyy",firstDay:0,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,Gs.local.invalidYear||Gs.regionalOptions[""].invalidYear),r=t.year()+(t.year()<0?1:0);return r%4===0&&(r%100!==0||r%400===0)},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(4-(n.dayOfWeek()||7),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,Gs.local.invalidMonth||Gs.regionalOptions[""].invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===2&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(e,t,r){var n=this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate);e=n.year(),t=n.month(),r=n.day(),e<0&&e++,t<3&&(t+=12,e--);var i=Math.floor(e/100),a=2-i+Math.floor(i/4);return Math.floor(365.25*(e+4716))+Math.floor(30.6001*(t+1))+r+a-1524.5},fromJD:function(e){var t=Math.floor(e+.5),r=Math.floor((t-186721625e-2)/36524.25);r=t+1+r-Math.floor(r/4);var n=r+1524,i=Math.floor((n-122.1)/365.25),a=Math.floor(365.25*i),o=Math.floor((n-a)/30.6001),s=n-a-Math.floor(o*30.6001),l=o-(o>13.5?13:1),u=i-(l>2.5?4716:4715);return u<=0&&u--,this.newDate(u,l,s)},toJSDate:function(e,t,r){var n=this._validate(e,t,r,Gs.local.invalidDate||Gs.regionalOptions[""].invalidDate),i=new Date(n.year(),n.month()-1,n.day());return i.setHours(0),i.setMinutes(0),i.setSeconds(0),i.setMilliseconds(0),i.setHours(i.getHours()>12?i.getHours()+2:0),i},fromJSDate:function(e){return this.newDate(e.getFullYear(),e.getMonth()+1,e.getDate())}});var Gs=WQe.exports=new jQe;Gs.cdate=lQ;Gs.baseCalendar=uQ;Gs.calendars.gregorian=cQ});var XQe=ye(()=>{var fQ=Oh(),Ud=kv();fQ(Ud.regionalOptions[""],{invalidArguments:"Invalid arguments",invalidFormat:"Cannot format a date from another calendar",missingNumberAt:"Missing number at position {0}",unknownNameAt:"Unknown name at position {0}",unexpectedLiteralAt:"Unexpected literal at position {0}",unexpectedText:"Additional text found at end"});Ud.local=Ud.regionalOptions[""];fQ(Ud.cdate.prototype,{formatDate:function(e,t){return typeof e!="string"&&(t=e,e=""),this._calendar.formatDate(e||"",this,t)}});fQ(Ud.baseCalendar.prototype,{UNIX_EPOCH:Ud.instance().newDate(1970,1,1).toJD(),SECS_PER_DAY:24*60*60,TICKS_EPOCH:Ud.instance().jdEpoch,TICKS_PER_DAY:24*60*60*1e7,ATOM:"yyyy-mm-dd",COOKIE:"D, dd M yyyy",FULL:"DD, MM d, yyyy",ISO_8601:"yyyy-mm-dd",JULIAN:"J",RFC_822:"D, d M yy",RFC_850:"DD, dd-M-yy",RFC_1036:"D, d M yy",RFC_1123:"D, d M yyyy",RFC_2822:"D, d M yyyy",RSS:"D, d M yy",TICKS:"!",TIMESTAMP:"@",W3C:"yyyy-mm-dd",formatDate:function(e,t,r){if(typeof e!="string"&&(r=t,t=e,e=""),!t)return"";if(t.calendar()!==this)throw Ud.local.invalidFormat||Ud.regionalOptions[""].invalidFormat;e=e||this.local.dateFormat,r=r||{};for(var n=r.dayNamesShort||this.local.dayNamesShort,i=r.dayNames||this.local.dayNames,a=r.monthNumbers||this.local.monthNumbers,o=r.monthNamesShort||this.local.monthNamesShort,s=r.monthNames||this.local.monthNames,l=r.calculateWeek||this.local.calculateWeek,u=function(S,L){for(var x=1;E+x<e.length&&e.charAt(E+x)===S;)x++;return E+=x-1,Math.floor(x/(L||1))>1},c=function(S,L,x,C){var M=""+L;if(u(S,C))for(;M.length<x;)M="0"+M;return M},f=function(S,L,x,C){return u(S)?C[L]:x[L]},h=this,d=function(S){return typeof a=="function"?a.call(h,S,u("m")):b(c("m",S.month(),2))},v=function(S,L){return L?typeof s=="function"?s.call(h,S):s[S.month()-h.minMonth]:typeof o=="function"?o.call(h,S):o[S.month()-h.minMonth]},_=this.local.digits,b=function(S){return r.localNumbers&&_?_(S):S},p="",k=!1,E=0;E<e.length;E++)if(k)e.charAt(E)==="'"&&!u("'")?k=!1:p+=e.charAt(E);else switch(e.charAt(E)){case"d":p+=b(c("d",t.day(),2));break;case"D":p+=f("D",t.dayOfWeek(),n,i);break;case"o":p+=c("o",t.dayOfYear(),3);break;case"w":p+=c("w",t.weekOfYear(),2);break;case"m":p+=d(t);break;case"M":p+=v(t,u("M"));break;case"y":p+=u("y",2)?t.year():(t.year()%100<10?"0":"")+t.year()%100;break;case"Y":u("Y",2),p+=t.formatYear();break;case"J":p+=t.toJD();break;case"@":p+=(t.toJD()-this.UNIX_EPOCH)*this.SECS_PER_DAY;break;case"!":p+=(t.toJD()-this.TICKS_EPOCH)*this.TICKS_PER_DAY;break;case"'":u("'")?p+="'":k=!0;break;default:p+=e.charAt(E)}return p},parseDate:function(e,t,r){if(t==null)throw Ud.local.invalidArguments||Ud.regionalOptions[""].invalidArguments;if(t=typeof t=="object"?t.toString():t+"",t==="")return null;e=e||this.local.dateFormat,r=r||{};var n=r.shortYearCutoff||this.shortYearCutoff;n=typeof n!="string"?n:this.today().year()%100+parseInt(n,10);for(var i=r.dayNamesShort||this.local.dayNamesShort,a=r.dayNames||this.local.dayNames,o=r.parseMonth||this.local.parseMonth,s=r.monthNumbers||this.local.monthNumbers,l=r.monthNamesShort||this.local.monthNamesShort,u=r.monthNames||this.local.monthNames,c=-1,f=-1,h=-1,d=-1,v=-1,_=!1,b=!1,p=function(z,O){for(var V=1;g+V<e.length&&e.charAt(g+V)===z;)V++;return g+=V-1,Math.floor(V/(O||1))>1},k=function(z,O){var V=p(z,O),G=[2,3,V?4:2,V?4:2,10,11,20]["oyYJ@!".indexOf(z)+1],Z=new RegExp("^-?\\d{1,"+G+"}"),j=t.substring(M).match(Z);if(!j)throw(Ud.local.missingNumberAt||Ud.regionalOptions[""].missingNumberAt).replace(/\{0\}/,M);return M+=j[0].length,parseInt(j[0],10)},E=this,S=function(){if(typeof s=="function"){p("m");var z=s.call(E,t.substring(M));return M+=z.length,z}return k("m")},L=function(z,O,V,G){for(var Z=p(z,G)?V:O,j=0;j<Z.length;j++)if(t.substr(M,Z[j].length).toLowerCase()===Z[j].toLowerCase())return M+=Z[j].length,j+E.minMonth;throw(Ud.local.unknownNameAt||Ud.regionalOptions[""].unknownNameAt).replace(/\{0\}/,M)},x=function(){if(typeof u=="function"){var z=p("M")?u.call(E,t.substring(M)):l.call(E,t.substring(M));return M+=z.length,z}return L("M",l,u)},C=function(){if(t.charAt(M)!==e.charAt(g))throw(Ud.local.unexpectedLiteralAt||Ud.regionalOptions[""].unexpectedLiteralAt).replace(/\{0\}/,M);M++},M=0,g=0;g<e.length;g++)if(b)e.charAt(g)==="'"&&!p("'")?b=!1:C();else switch(e.charAt(g)){case"d":d=k("d");break;case"D":L("D",i,a);break;case"o":v=k("o");break;case"w":k("w");break;case"m":h=S();break;case"M":h=x();break;case"y":var P=g;_=!p("y",2),g=P,f=k("y",2);break;case"Y":f=k("Y",2);break;case"J":c=k("J")+.5,t.charAt(M)==="."&&(M++,k("J"));break;case"@":c=k("@")/this.SECS_PER_DAY+this.UNIX_EPOCH;break;case"!":c=k("!")/this.TICKS_PER_DAY+this.TICKS_EPOCH;break;case"*":M=t.length;break;case"'":p("'")?C():b=!0;break;default:C()}if(M<t.length)throw Ud.local.unexpectedText||Ud.regionalOptions[""].unexpectedText;if(f===-1?f=this.today().year():f<100&&_&&(f+=n===-1?1900:this.today().year()-this.today().year()%100-(f<=n?0:100)),typeof h=="string"&&(h=o.call(this,f,h)),v>-1){h=1,d=v;for(var T=this.daysInMonth(f,h);d>T;T=this.daysInMonth(f,h))h++,d-=T}return c>-1?this.fromJD(c):this.newDate(f,h,d)},determineDate:function(e,t,r,n,i){r&&typeof r!="object"&&(i=n,n=r,r=null),typeof n!="string"&&(i=n,n="");var a=this,o=function(s){try{return a.parseDate(n,s,i)}catch(f){}s=s.toLowerCase();for(var l=(s.match(/^c/)&&r?r.newDate():null)||a.today(),u=/([+-]?[0-9]+)\s*(d|w|m|y)?/g,c=u.exec(s);c;)l.add(parseInt(c[1],10),c[2]||"d"),c=u.exec(s);return l};return t=t?t.newDate():null,e=e==null?t:typeof e=="string"?o(e):typeof e=="number"?isNaN(e)||e===1/0||e===-1/0?t:a.today().add(e,"d"):a.newDate(e),e}})});var ZQe=ye(()=>{var Fx=kv(),Etr=Oh(),hQ=Fx.instance();function P9(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}P9.prototype=new Fx.baseCalendar;Etr(P9.prototype,{name:"Chinese",jdEpoch:17214255e-1,hasYearZero:!1,minMonth:0,firstMonth:0,minDay:1,regionalOptions:{"":{name:"Chinese",epochs:["BEC","EC"],monthNumbers:function(e,t){if(typeof e=="string"){var r=e.match(Ctr);return r?r[0]:""}var n=this._validateYear(e),i=e.month(),a=""+this.toChineseMonth(n,i);return t&&a.length<2&&(a="0"+a),this.isIntercalaryMonth(n,i)&&(a+="i"),a},monthNames:function(e){if(typeof e=="string"){var t=e.match(Ltr);return t?t[0]:""}var r=this._validateYear(e),n=e.month(),i=this.toChineseMonth(r,n),a=["\u4E00\u6708","\u4E8C\u6708","\u4E09\u6708","\u56DB\u6708","\u4E94\u6708","\u516D\u6708","\u4E03\u6708","\u516B\u6708","\u4E5D\u6708","\u5341\u6708","\u5341\u4E00\u6708","\u5341\u4E8C\u6708"][i-1];return this.isIntercalaryMonth(r,n)&&(a="\u95F0"+a),a},monthNamesShort:function(e){if(typeof e=="string"){var t=e.match(Ptr);return t?t[0]:""}var r=this._validateYear(e),n=e.month(),i=this.toChineseMonth(r,n),a=["\u4E00","\u4E8C","\u4E09","\u56DB","\u4E94","\u516D","\u4E03","\u516B","\u4E5D","\u5341","\u5341\u4E00","\u5341\u4E8C"][i-1];return this.isIntercalaryMonth(r,n)&&(a="\u95F0"+a),a},parseMonth:function(e,t){e=this._validateYear(e);var r=parseInt(t),n;if(isNaN(r))t[0]==="\u95F0"&&(n=!0,t=t.substring(1)),t[t.length-1]==="\u6708"&&(t=t.substring(0,t.length-1)),r=1+["\u4E00","\u4E8C","\u4E09","\u56DB","\u4E94","\u516D","\u4E03","\u516B","\u4E5D","\u5341","\u5341\u4E00","\u5341\u4E8C"].indexOf(t);else{var i=t[t.length-1];n=i==="i"||i==="I"}var a=this.toMonthIndex(e,r,n);return a},dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:1,isRTL:!1}},_validateYear:function(e,t){if(e.year&&(e=e.year()),typeof e!="number"||e<1888||e>2111)throw t.replace(/\{0\}/,this.local.name);return e},toMonthIndex:function(e,t,r){var n=this.intercalaryMonth(e),i=r&&t!==n;if(i||t<1||t>12)throw Fx.local.invalidMonth.replace(/\{0\}/,this.local.name);var a;return n?!r&&t<=n?a=t-1:a=t:a=t-1,a},toChineseMonth:function(e,t){e.year&&(e=e.year(),t=e.month());var r=this.intercalaryMonth(e),n=r?12:11;if(t<0||t>n)throw Fx.local.invalidMonth.replace(/\{0\}/,this.local.name);var i;return r?t<r?i=t+1:i=t:i=t+1,i},intercalaryMonth:function(e){e=this._validateYear(e);var t=zx[e-zx[0]],r=t>>13;return r},isIntercalaryMonth:function(e,t){e.year&&(e=e.year(),t=e.month());var r=this.intercalaryMonth(e);return!!r&&r===t},leapYear:function(e){return this.intercalaryMonth(e)!==0},weekOfYear:function(e,t,r){var n=this._validateYear(e,Fx.local.invalidyear),i=Ox[n-Ox[0]],a=i>>9&4095,o=i>>5&15,s=i&31,l;l=hQ.newDate(a,o,s),l.add(4-(l.dayOfWeek()||7),"d");var u=this.toJD(e,t,r)-l.toJD();return 1+Math.floor(u/7)},monthsInYear:function(e){return this.leapYear(e)?13:12},daysInMonth:function(e,t){e.year&&(t=e.month(),e=e.year()),e=this._validateYear(e);var r=zx[e-zx[0]],n=r>>13,i=n?12:11;if(t>i)throw Fx.local.invalidMonth.replace(/\{0\}/,this.local.name);var a=r&1<<12-t?30:29;return a},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(e,t,r){var n=this._validate(e,a,r,Fx.local.invalidDate);e=this._validateYear(n.year()),t=n.month(),r=n.day();var i=this.isIntercalaryMonth(e,t),a=this.toChineseMonth(e,t),o=Rtr(e,a,r,i);return hQ.toJD(o.year,o.month,o.day)},fromJD:function(e){var t=hQ.fromJD(e),r=Itr(t.year(),t.month(),t.day()),n=this.toMonthIndex(r.year,r.month,r.isIntercalary);return this.newDate(r.year,n,r.day)},fromString:function(e){var t=e.match(ktr),r=this._validateYear(+t[1]),n=+t[2],i=!!t[3],a=this.toMonthIndex(r,n,i),o=+t[4];return this.newDate(r,a,o)},add:function(e,t,r){var n=e.year(),i=e.month(),a=this.isIntercalaryMonth(n,i),o=this.toChineseMonth(n,i),s=Object.getPrototypeOf(P9.prototype).add.call(this,e,t,r);if(r==="y"){var l=s.year(),u=s.month(),c=this.isIntercalaryMonth(l,o),f=a&&c?this.toMonthIndex(l,o,!0):this.toMonthIndex(l,o,!1);f!==u&&s.month(f)}return s}});var ktr=/^\s*(-?\d\d\d\d|\d\d)[-/](\d?\d)([iI]?)[-/](\d?\d)/m,Ctr=/^\d?\d[iI]?/m,Ltr=/^闰?十?[一二三四五六七八九]?月/m,Ptr=/^闰?十?[一二三四五六七八九]?/m;Fx.calendars.chinese=P9;var zx=[1887,5780,5802,19157,2742,50359,1198,2646,46378,7466,3412,30122,5482,67949,2396,5294,43597,6732,6954,36181,2772,4954,18781,2396,54427,5274,6730,47781,5800,6868,21210,4790,59703,2350,5270,46667,3402,3496,38325,1388,4782,18735,2350,52374,6804,7498,44457,2906,1388,29294,4700,63789,6442,6804,56138,5802,2772,38235,1210,4698,22827,5418,63125,3476,5802,43701,2484,5302,27223,2646,70954,7466,3412,54698,5482,2412,38062,5294,2636,32038,6954,60245,2772,4826,43357,2394,5274,39501,6730,72357,5800,5844,53978,4790,2358,38039,5270,87627,3402,3496,54708,5484,4782,43311,2350,3222,27978,7498,68965,2904,5484,45677,4700,6444,39573,6804,6986,19285,2772,62811,1210,4698,47403,5418,5780,38570,5546,76469,2420,5302,51799,2646,5414,36501,3412,5546,18869,2412,54446,5276,6732,48422,6822,2900,28010,4826,92509,2394,5274,55883,6730,6820,47956,5812,2778,18779,2358,62615,5270,5450,46757,3492,5556,27318,4718,67887,2350,3222,52554,7498,3428,38252,5468,4700,31022,6444,64149,6804,6986,43861,2772,5338,35421,2650,70955,5418,5780,54954,5546,2740,38074,5302,2646,29991,3366,61011,3412,5546,43445,2412,5294,35406,6732,72998,6820,6996,52586,2778,2396,38045,5274,6698,23333,6820,64338,5812,2746,43355,2358,5270,39499,5450,79525,3492,5548],Ox=[1887,966732,967231,967733,968265,968766,969297,969798,970298,970829,971330,971830,972362,972863,973395,973896,974397,974928,975428,975929,976461,976962,977462,977994,978494,979026,979526,980026,980558,981059,981559,982091,982593,983124,983624,984124,984656,985157,985656,986189,986690,987191,987722,988222,988753,989254,989754,990286,990788,991288,991819,992319,992851,993352,993851,994383,994885,995385,995917,996418,996918,997450,997949,998481,998982,999483,1000014,1000515,1001016,1001548,1002047,1002578,1003080,1003580,1004111,1004613,1005113,1005645,1006146,1006645,1007177,1007678,1008209,1008710,1009211,1009743,1010243,1010743,1011275,1011775,1012306,1012807,1013308,1013840,1014341,1014841,1015373,1015874,1016404,1016905,1017405,1017937,1018438,1018939,1019471,1019972,1020471,1021002,1021503,1022035,1022535,1023036,1023568,1024069,1024568,1025100,1025601,1026102,1026633,1027133,1027666,1028167,1028666,1029198,1029699,1030199,1030730,1031231,1031763,1032264,1032764,1033296,1033797,1034297,1034828,1035329,1035830,1036362,1036861,1037393,1037894,1038394,1038925,1039427,1039927,1040459,1040959,1041491,1041992,1042492,1043023,1043524,1044024,1044556,1045057,1045558,1046090,1046590,1047121,1047622,1048122,1048654,1049154,1049655,1050187,1050689,1051219,1051720,1052220,1052751,1053252,1053752,1054284,1054786,1055285,1055817,1056317,1056849,1057349,1057850,1058382,1058883,1059383,1059915,1060415,1060947,1061447,1061947,1062479,1062981,1063480,1064012,1064514,1065014,1065545,1066045,1066577,1067078,1067578,1068110,1068611,1069112,1069642,1070142,1070674,1071175,1071675,1072207,1072709,1073209,1073740,1074241,1074741,1075273,1075773,1076305,1076807,1077308,1077839,1078340,1078840,1079372,1079871,1080403,1080904];function Itr(e,t,r,n){var i,a;if(typeof e=="object")i=e,a=t||{};else{var o=typeof e=="number"&&e>=1888&&e<=2111;if(!o)throw new Error("Solar year outside range 1888-2111");var s=typeof t=="number"&&t>=1&&t<=12;if(!s)throw new Error("Solar month outside range 1 - 12");var l=typeof r=="number"&&r>=1&&r<=31;if(!l)throw new Error("Solar day outside range 1 - 31");i={year:e,month:t,day:r},a=n||{}}var u=Ox[i.year-Ox[0]],c=i.year<<9|i.month<<5|i.day;a.year=c>=u?i.year:i.year-1,u=Ox[a.year-Ox[0]];var f=u>>9&4095,h=u>>5&15,d=u&31,v,_=new Date(f,h-1,d),b=new Date(i.year,i.month-1,i.day);v=Math.round((b-_)/(24*3600*1e3));var p=zx[a.year-zx[0]],k;for(k=0;k<13;k++){var E=p&1<<12-k?30:29;if(v<E)break;v-=E}var S=p>>13;return!S||k<S?(a.isIntercalary=!1,a.month=1+k):k===S?(a.isIntercalary=!0,a.month=k):(a.isIntercalary=!1,a.month=k),a.day=1+v,a}function Rtr(e,t,r,n,i){var a,o;if(typeof e=="object")o=e,a=t||{};else{var s=typeof e=="number"&&e>=1888&&e<=2111;if(!s)throw new Error("Lunar year outside range 1888-2111");var l=typeof t=="number"&&t>=1&&t<=12;if(!l)throw new Error("Lunar month outside range 1 - 12");var u=typeof r=="number"&&r>=1&&r<=30;if(!u)throw new Error("Lunar day outside range 1 - 30");var c;typeof n=="object"?(c=!1,a=n):(c=!!n,a=i||{}),o={year:e,month:t,day:r,isIntercalary:c}}var f;f=o.day-1;var h=zx[o.year-zx[0]],d=h>>13,v;d&&(o.month>d||o.isIntercalary)?v=o.month:v=o.month-1;for(var _=0;_<v;_++){var b=h&1<<12-_?30:29;f+=b}var p=Ox[o.year-Ox[0]],k=p>>9&4095,E=p>>5&15,S=p&31,L=new Date(k,E-1,S+f);return a.year=L.getFullYear(),a.month=1+L.getMonth(),a.day=L.getDate(),a}});var YQe=ye(()=>{var bw=kv(),Dtr=Oh();function dQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}dQ.prototype=new bw.baseCalendar;Dtr(dQ.prototype,{name:"Coptic",jdEpoch:18250295e-1,daysPerMonth:[30,30,30,30,30,30,30,30,30,30,30,30,5],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Coptic",epochs:["BAM","AM"],monthNames:["Thout","Paopi","Hathor","Koiak","Tobi","Meshir","Paremhat","Paremoude","Pashons","Paoni","Epip","Mesori","Pi Kogi Enavot"],monthNamesShort:["Tho","Pao","Hath","Koi","Tob","Mesh","Pat","Pad","Pash","Pao","Epi","Meso","PiK"],dayNames:["Tkyriaka","Pesnau","Pshoment","Peftoou","Ptiou","Psoou","Psabbaton"],dayNamesShort:["Tky","Pes","Psh","Pef","Pti","Pso","Psa"],dayNamesMin:["Tk","Pes","Psh","Pef","Pt","Pso","Psa"],digits:null,dateFormat:"dd/mm/yyyy",firstDay:0,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,bw.local.invalidYear),r=t.year()+(t.year()<0?1:0);return r%4===3||r%4===-1},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,bw.local.invalidYear||bw.regionalOptions[""].invalidYear),13},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,bw.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===13&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(e,t,r){var n=this._validate(e,t,r,bw.local.invalidDate);return e=n.year(),e<0&&e++,n.day()+(n.month()-1)*30+(e-1)*365+Math.floor(e/4)+this.jdEpoch-1},fromJD:function(e){var t=Math.floor(e)+.5-this.jdEpoch,r=Math.floor((t-Math.floor((t+366)/1461))/365)+1;r<=0&&r--,t=Math.floor(e)+.5-this.newDate(r,1,1).toJD();var n=Math.floor(t/30)+1,i=t-(n-1)*30+1;return this.newDate(r,n,i)}});bw.calendars.coptic=dQ});var KQe=ye(()=>{var x1=kv(),Ftr=Oh();function vQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}vQ.prototype=new x1.baseCalendar;Ftr(vQ.prototype,{name:"Discworld",jdEpoch:17214255e-1,daysPerMonth:[16,32,32,32,32,32,32,32,32,32,32,32,32],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Discworld",epochs:["BUC","UC"],monthNames:["Ick","Offle","February","March","April","May","June","Grune","August","Spune","Sektober","Ember","December"],monthNamesShort:["Ick","Off","Feb","Mar","Apr","May","Jun","Gru","Aug","Spu","Sek","Emb","Dec"],dayNames:["Sunday","Octeday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Oct","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Oc","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:2,isRTL:!1}},leapYear:function(e){return this._validate(e,this.minMonth,this.minDay,x1.local.invalidYear),!1},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,x1.local.invalidYear),13},daysInYear:function(e){return this._validate(e,this.minMonth,this.minDay,x1.local.invalidYear),400},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/8)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,x1.local.invalidMonth);return this.daysPerMonth[r.month()-1]},daysInWeek:function(){return 8},dayOfWeek:function(e,t,r){var n=this._validate(e,t,r,x1.local.invalidDate);return(n.day()+1)%8},weekDay:function(e,t,r){var n=this.dayOfWeek(e,t,r);return n>=2&&n<=6},extraInfo:function(e,t,r){var n=this._validate(e,t,r,x1.local.invalidDate);return{century:ztr[Math.floor((n.year()-1)/100)+1]||""}},toJD:function(e,t,r){var n=this._validate(e,t,r,x1.local.invalidDate);return e=n.year()+(n.year()<0?1:0),t=n.month(),r=n.day(),r+(t>1?16:0)+(t>2?(t-2)*32:0)+(e-1)*400+this.jdEpoch-1},fromJD:function(e){e=Math.floor(e+.5)-Math.floor(this.jdEpoch)-1;var t=Math.floor(e/400)+1;e-=(t-1)*400,e+=e>15?16:0;var r=Math.floor(e/32)+1,n=e-(r-1)*32+1;return this.newDate(t<=0?t-1:t,r,n)}});var ztr={20:"Fruitbat",21:"Anchovy"};x1.calendars.discworld=vQ});var JQe=ye(()=>{var ww=kv(),Otr=Oh();function pQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}pQ.prototype=new ww.baseCalendar;Otr(pQ.prototype,{name:"Ethiopian",jdEpoch:17242205e-1,daysPerMonth:[30,30,30,30,30,30,30,30,30,30,30,30,5],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Ethiopian",epochs:["BEE","EE"],monthNames:["Meskerem","Tikemet","Hidar","Tahesas","Tir","Yekatit","Megabit","Miazia","Genbot","Sene","Hamle","Nehase","Pagume"],monthNamesShort:["Mes","Tik","Hid","Tah","Tir","Yek","Meg","Mia","Gen","Sen","Ham","Neh","Pag"],dayNames:["Ehud","Segno","Maksegno","Irob","Hamus","Arb","Kidame"],dayNamesShort:["Ehu","Seg","Mak","Iro","Ham","Arb","Kid"],dayNamesMin:["Eh","Se","Ma","Ir","Ha","Ar","Ki"],digits:null,dateFormat:"dd/mm/yyyy",firstDay:0,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,ww.local.invalidYear),r=t.year()+(t.year()<0?1:0);return r%4===3||r%4===-1},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,ww.local.invalidYear||ww.regionalOptions[""].invalidYear),13},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,ww.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===13&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(e,t,r){var n=this._validate(e,t,r,ww.local.invalidDate);return e=n.year(),e<0&&e++,n.day()+(n.month()-1)*30+(e-1)*365+Math.floor(e/4)+this.jdEpoch-1},fromJD:function(e){var t=Math.floor(e)+.5-this.jdEpoch,r=Math.floor((t-Math.floor((t+366)/1461))/365)+1;r<=0&&r--,t=Math.floor(e)+.5-this.newDate(r,1,1).toJD();var n=Math.floor(t/30)+1,i=t-(n-1)*30+1;return this.newDate(r,n,i)}});ww.calendars.ethiopian=pQ});var $Qe=ye(()=>{var qx=kv(),qtr=Oh();function gQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}gQ.prototype=new qx.baseCalendar;qtr(gQ.prototype,{name:"Hebrew",jdEpoch:347995.5,daysPerMonth:[30,29,30,29,30,29,30,29,30,29,30,29,29],hasYearZero:!1,minMonth:1,firstMonth:7,minDay:1,regionalOptions:{"":{name:"Hebrew",epochs:["BAM","AM"],monthNames:["Nisan","Iyar","Sivan","Tammuz","Av","Elul","Tishrei","Cheshvan","Kislev","Tevet","Shevat","Adar","Adar II"],monthNamesShort:["Nis","Iya","Siv","Tam","Av","Elu","Tis","Che","Kis","Tev","She","Ada","Ad2"],dayNames:["Yom Rishon","Yom Sheni","Yom Shlishi","Yom Revi'i","Yom Chamishi","Yom Shishi","Yom Shabbat"],dayNamesShort:["Ris","She","Shl","Rev","Cha","Shi","Sha"],dayNamesMin:["Ri","She","Shl","Re","Ch","Shi","Sha"],digits:null,dateFormat:"dd/mm/yyyy",firstDay:0,isRTL:!1}},leapYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,qx.local.invalidYear);return this._leapYear(t.year())},_leapYear:function(e){return e=e<0?e+1:e,I9(e*7+1,19)<7},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,qx.local.invalidYear),this._leapYear(e.year?e.year():e)?13:12},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,qx.local.invalidYear);return e=t.year(),this.toJD(e===-1?1:e+1,7,1)-this.toJD(e,7,1)},daysInMonth:function(e,t){return e.year&&(t=e.month(),e=e.year()),this._validate(e,t,this.minDay,qx.local.invalidMonth),t===12&&this.leapYear(e)||t===8&&I9(this.daysInYear(e),10)===5?30:t===9&&I9(this.daysInYear(e),10)===3?29:this.daysPerMonth[t-1]},weekDay:function(e,t,r){return this.dayOfWeek(e,t,r)!==6},extraInfo:function(e,t,r){var n=this._validate(e,t,r,qx.local.invalidDate);return{yearType:(this.leapYear(n)?"embolismic":"common")+" "+["deficient","regular","complete"][this.daysInYear(n)%10-3]}},toJD:function(e,t,r){var n=this._validate(e,t,r,qx.local.invalidDate);e=n.year(),t=n.month(),r=n.day();var i=e<=0?e+1:e,a=this.jdEpoch+this._delay1(i)+this._delay2(i)+r+1;if(t<7){for(var o=7;o<=this.monthsInYear(e);o++)a+=this.daysInMonth(e,o);for(var o=1;o<t;o++)a+=this.daysInMonth(e,o)}else for(var o=7;o<t;o++)a+=this.daysInMonth(e,o);return a},_delay1:function(e){var t=Math.floor((235*e-234)/19),r=12084+13753*t,n=t*29+Math.floor(r/25920);return I9(3*(n+1),7)<3&&n++,n},_delay2:function(e){var t=this._delay1(e-1),r=this._delay1(e),n=this._delay1(e+1);return n-r===356?2:r-t===382?1:0},fromJD:function(e){e=Math.floor(e)+.5;for(var t=Math.floor((e-this.jdEpoch)*98496/35975351)-1;e>=this.toJD(t===-1?1:t+1,7,1);)t++;for(var r=e<this.toJD(t,1,1)?7:1;e>this.toJD(t,r,this.daysInMonth(t,r));)r++;var n=e-this.toJD(t,r,1)+1;return this.newDate(t,r,n)}});function I9(e,t){return e-t*Math.floor(e/t)}qx.calendars.hebrew=gQ});var QQe=ye(()=>{var dC=kv(),Btr=Oh();function mQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}mQ.prototype=new dC.baseCalendar;Btr(mQ.prototype,{name:"Islamic",jdEpoch:19484395e-1,daysPerMonth:[30,29,30,29,30,29,30,29,30,29,30,29],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Islamic",epochs:["BH","AH"],monthNames:["Muharram","Safar","Rabi' al-awwal","Rabi' al-thani","Jumada al-awwal","Jumada al-thani","Rajab","Sha'aban","Ramadan","Shawwal","Dhu al-Qi'dah","Dhu al-Hijjah"],monthNamesShort:["Muh","Saf","Rab1","Rab2","Jum1","Jum2","Raj","Sha'","Ram","Shaw","DhuQ","DhuH"],dayNames:["Yawm al-ahad","Yawm al-ithnayn","Yawm ath-thulaathaa'","Yawm al-arbi'aa'","Yawm al-kham\u012Bs","Yawm al-jum'a","Yawm as-sabt"],dayNamesShort:["Aha","Ith","Thu","Arb","Kha","Jum","Sab"],dayNamesMin:["Ah","It","Th","Ar","Kh","Ju","Sa"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:6,isRTL:!1}},leapYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,dC.local.invalidYear);return(t.year()*11+14)%30<11},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInYear:function(e){return this.leapYear(e)?355:354},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,dC.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===12&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return this.dayOfWeek(e,t,r)!==5},toJD:function(e,t,r){var n=this._validate(e,t,r,dC.local.invalidDate);return e=n.year(),t=n.month(),r=n.day(),e=e<=0?e+1:e,r+Math.ceil(29.5*(t-1))+(e-1)*354+Math.floor((3+11*e)/30)+this.jdEpoch-1},fromJD:function(e){e=Math.floor(e)+.5;var t=Math.floor((30*(e-this.jdEpoch)+10646)/10631);t=t<=0?t-1:t;var r=Math.min(12,Math.ceil((e-29-this.toJD(t,1,1))/29.5)+1),n=e-this.toJD(t,r,1)+1;return this.newDate(t,r,n)}});dC.calendars.islamic=mQ});var eet=ye(()=>{var vC=kv(),Ntr=Oh();function yQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}yQ.prototype=new vC.baseCalendar;Ntr(yQ.prototype,{name:"Julian",jdEpoch:17214235e-1,daysPerMonth:[31,28,31,30,31,30,31,31,30,31,30,31],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Julian",epochs:["BC","AD"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],monthNamesShort:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"mm/dd/yyyy",firstDay:0,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,vC.local.invalidYear),r=t.year()<0?t.year()+1:t.year();return r%4===0},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(4-(n.dayOfWeek()||7),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,vC.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===2&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(e,t,r){var n=this._validate(e,t,r,vC.local.invalidDate);return e=n.year(),t=n.month(),r=n.day(),e<0&&e++,t<=2&&(e--,t+=12),Math.floor(365.25*(e+4716))+Math.floor(30.6001*(t+1))+r-1524.5},fromJD:function(e){var t=Math.floor(e+.5),r=t+1524,n=Math.floor((r-122.1)/365.25),i=Math.floor(365.25*n),a=Math.floor((r-i)/30.6001),o=a-Math.floor(a<14?1:13),s=n-Math.floor(o>2?4716:4715),l=r-i-Math.floor(30.6001*a);return s<=0&&s--,this.newDate(s,o,l)}});vC.calendars.julian=yQ});var ret=ye(()=>{var ug=kv(),Utr=Oh();function xQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}xQ.prototype=new ug.baseCalendar;Utr(xQ.prototype,{name:"Mayan",jdEpoch:584282.5,hasYearZero:!0,minMonth:0,firstMonth:0,minDay:0,regionalOptions:{"":{name:"Mayan",epochs:["",""],monthNames:["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"],monthNamesShort:["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"],dayNames:["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"],dayNamesShort:["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"],dayNamesMin:["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"],digits:null,dateFormat:"YYYY.m.d",firstDay:0,isRTL:!1,haabMonths:["Pop","Uo","Zip","Zotz","Tzec","Xul","Yaxkin","Mol","Chen","Yax","Zac","Ceh","Mac","Kankin","Muan","Pax","Kayab","Cumku","Uayeb"],tzolkinMonths:["Imix","Ik","Akbal","Kan","Chicchan","Cimi","Manik","Lamat","Muluc","Oc","Chuen","Eb","Ben","Ix","Men","Cib","Caban","Etznab","Cauac","Ahau"]}},leapYear:function(e){return this._validate(e,this.minMonth,this.minDay,ug.local.invalidYear),!1},formatYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,ug.local.invalidYear);e=t.year();var r=Math.floor(e/400);e=e%400,e+=e<0?400:0;var n=Math.floor(e/20);return r+"."+n+"."+e%20},forYear:function(e){if(e=e.split("."),e.length<3)throw"Invalid Mayan year";for(var t=0,r=0;r<e.length;r++){var n=parseInt(e[r],10);if(Math.abs(n)>19||r>0&&n<0)throw"Invalid Mayan year";t=t*20+n}return t},monthsInYear:function(e){return this._validate(e,this.minMonth,this.minDay,ug.local.invalidYear),18},weekOfYear:function(e,t,r){return this._validate(e,t,r,ug.local.invalidDate),0},daysInYear:function(e){return this._validate(e,this.minMonth,this.minDay,ug.local.invalidYear),360},daysInMonth:function(e,t){return this._validate(e,t,this.minDay,ug.local.invalidMonth),20},daysInWeek:function(){return 5},dayOfWeek:function(e,t,r){var n=this._validate(e,t,r,ug.local.invalidDate);return n.day()},weekDay:function(e,t,r){return this._validate(e,t,r,ug.local.invalidDate),!0},extraInfo:function(e,t,r){var n=this._validate(e,t,r,ug.local.invalidDate),i=n.toJD(),a=this._toHaab(i),o=this._toTzolkin(i);return{haabMonthName:this.local.haabMonths[a[0]-1],haabMonth:a[0],haabDay:a[1],tzolkinDayName:this.local.tzolkinMonths[o[0]-1],tzolkinDay:o[0],tzolkinTrecena:o[1]}},_toHaab:function(e){e-=this.jdEpoch;var t=_Q(e+8+17*20,365);return[Math.floor(t/20)+1,_Q(t,20)]},_toTzolkin:function(e){return e-=this.jdEpoch,[tet(e+20,20),tet(e+4,13)]},toJD:function(e,t,r){var n=this._validate(e,t,r,ug.local.invalidDate);return n.day()+n.month()*20+n.year()*360+this.jdEpoch},fromJD:function(e){e=Math.floor(e)+.5-this.jdEpoch;var t=Math.floor(e/360);e=e%360,e+=e<0?360:0;var r=Math.floor(e/20),n=e%20;return this.newDate(t,r,n)}});function _Q(e,t){return e-t*Math.floor(e/t)}function tet(e,t){return _Q(e-1,t)+1}ug.calendars.mayan=xQ});var net=ye(()=>{var Tw=kv(),Vtr=Oh();function bQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}bQ.prototype=new Tw.baseCalendar;var iet=Tw.instance("gregorian");Vtr(bQ.prototype,{name:"Nanakshahi",jdEpoch:22576735e-1,daysPerMonth:[31,31,31,31,31,30,30,30,30,30,30,30],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Nanakshahi",epochs:["BN","AN"],monthNames:["Chet","Vaisakh","Jeth","Harh","Sawan","Bhadon","Assu","Katak","Maghar","Poh","Magh","Phagun"],monthNamesShort:["Che","Vai","Jet","Har","Saw","Bha","Ass","Kat","Mgr","Poh","Mgh","Pha"],dayNames:["Somvaar","Mangalvar","Budhvaar","Veervaar","Shukarvaar","Sanicharvaar","Etvaar"],dayNamesShort:["Som","Mangal","Budh","Veer","Shukar","Sanichar","Et"],dayNamesMin:["So","Ma","Bu","Ve","Sh","Sa","Et"],digits:null,dateFormat:"dd-mm-yyyy",firstDay:0,isRTL:!1}},leapYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,Tw.local.invalidYear||Tw.regionalOptions[""].invalidYear);return iet.leapYear(t.year()+(t.year()<1?1:0)+1469)},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(1-(n.dayOfWeek()||7),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,Tw.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===12&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(i,t,r){var n=this._validate(i,t,r,Tw.local.invalidMonth),i=n.year();i<0&&i++;for(var a=n.day(),o=1;o<n.month();o++)a+=this.daysPerMonth[o-1];return a+iet.toJD(i+1468,3,13)},fromJD:function(e){e=Math.floor(e+.5);for(var t=Math.floor((e-(this.jdEpoch-1))/366);e>=this.toJD(t+1,1,1);)t++;for(var r=e-Math.floor(this.toJD(t,1,1)+.5)+1,n=1;r>this.daysInMonth(t,n);)r-=this.daysInMonth(t,n),n++;return this.newDate(t,n,r)}});Tw.calendars.nanakshahi=bQ});var aet=ye(()=>{var Aw=kv(),Gtr=Oh();function wQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}wQ.prototype=new Aw.baseCalendar;Gtr(wQ.prototype,{name:"Nepali",jdEpoch:17007095e-1,daysPerMonth:[31,31,32,32,31,30,30,29,30,29,30,30],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,daysPerYear:365,regionalOptions:{"":{name:"Nepali",epochs:["BBS","ABS"],monthNames:["Baisakh","Jestha","Ashadh","Shrawan","Bhadra","Ashwin","Kartik","Mangsir","Paush","Mangh","Falgun","Chaitra"],monthNamesShort:["Bai","Je","As","Shra","Bha","Ash","Kar","Mang","Pau","Ma","Fal","Chai"],dayNames:["Aaitabaar","Sombaar","Manglbaar","Budhabaar","Bihibaar","Shukrabaar","Shanibaar"],dayNamesShort:["Aaita","Som","Mangl","Budha","Bihi","Shukra","Shani"],dayNamesMin:["Aai","So","Man","Bu","Bi","Shu","Sha"],digits:null,dateFormat:"dd/mm/yyyy",firstDay:1,isRTL:!1}},leapYear:function(e){return this.daysInYear(e)!==this.daysPerYear},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,Aw.local.invalidYear);if(e=t.year(),typeof this.NEPALI_CALENDAR_DATA[e]=="undefined")return this.daysPerYear;for(var r=0,n=this.minMonth;n<=12;n++)r+=this.NEPALI_CALENDAR_DATA[e][n];return r},daysInMonth:function(e,t){return e.year&&(t=e.month(),e=e.year()),this._validate(e,t,this.minDay,Aw.local.invalidMonth),typeof this.NEPALI_CALENDAR_DATA[e]=="undefined"?this.daysPerMonth[t-1]:this.NEPALI_CALENDAR_DATA[e][t]},weekDay:function(e,t,r){return this.dayOfWeek(e,t,r)!==6},toJD:function(e,t,r){var n=this._validate(e,t,r,Aw.local.invalidDate);e=n.year(),t=n.month(),r=n.day();var i=Aw.instance(),a=0,o=t,s=e;this._createMissingCalendarData(e);var l=e-(o>9||o===9&&r>=this.NEPALI_CALENDAR_DATA[s][0]?56:57);for(t!==9&&(a=r,o--);o!==9;)o<=0&&(o=12,s--),a+=this.NEPALI_CALENDAR_DATA[s][o],o--;return t===9?(a+=r-this.NEPALI_CALENDAR_DATA[s][0],a<0&&(a+=i.daysInYear(l))):a+=this.NEPALI_CALENDAR_DATA[s][9]-this.NEPALI_CALENDAR_DATA[s][0],i.newDate(l,1,1).add(a,"d").toJD()},fromJD:function(e){var t=Aw.instance(),r=t.fromJD(e),n=r.year(),i=r.dayOfYear(),a=n+56;this._createMissingCalendarData(a);for(var o=9,s=this.NEPALI_CALENDAR_DATA[a][0],l=this.NEPALI_CALENDAR_DATA[a][o]-s+1;i>l;)o++,o>12&&(o=1,a++),l+=this.NEPALI_CALENDAR_DATA[a][o];var u=this.NEPALI_CALENDAR_DATA[a][o]-(l-i);return this.newDate(a,o,u)},_createMissingCalendarData:function(e){var t=this.daysPerMonth.slice(0);t.unshift(17);for(var r=e-1;r<e+2;r++)typeof this.NEPALI_CALENDAR_DATA[r]=="undefined"&&(this.NEPALI_CALENDAR_DATA[r]=t)},NEPALI_CALENDAR_DATA:{1970:[18,31,31,32,31,31,31,30,29,30,29,30,30],1971:[18,31,31,32,31,32,30,30,29,30,29,30,30],1972:[17,31,32,31,32,31,30,30,30,29,29,30,30],1973:[19,30,32,31,32,31,30,30,30,29,30,29,31],1974:[19,31,31,32,30,31,31,30,29,30,29,30,30],1975:[18,31,31,32,32,30,31,30,29,30,29,30,30],1976:[17,31,32,31,32,31,30,30,30,29,29,30,31],1977:[18,31,32,31,32,31,31,29,30,29,30,29,31],1978:[18,31,31,32,31,31,31,30,29,30,29,30,30],1979:[18,31,31,32,32,31,30,30,29,30,29,30,30],1980:[17,31,32,31,32,31,30,30,30,29,29,30,31],1981:[18,31,31,31,32,31,31,29,30,30,29,30,30],1982:[18,31,31,32,31,31,31,30,29,30,29,30,30],1983:[18,31,31,32,32,31,30,30,29,30,29,30,30],1984:[17,31,32,31,32,31,30,30,30,29,29,30,31],1985:[18,31,31,31,32,31,31,29,30,30,29,30,30],1986:[18,31,31,32,31,31,31,30,29,30,29,30,30],1987:[18,31,32,31,32,31,30,30,29,30,29,30,30],1988:[17,31,32,31,32,31,30,30,30,29,29,30,31],1989:[18,31,31,31,32,31,31,30,29,30,29,30,30],1990:[18,31,31,32,31,31,31,30,29,30,29,30,30],1991:[18,31,32,31,32,31,30,30,29,30,29,30,30],1992:[17,31,32,31,32,31,30,30,30,29,30,29,31],1993:[18,31,31,31,32,31,31,30,29,30,29,30,30],1994:[18,31,31,32,31,31,31,30,29,30,29,30,30],1995:[17,31,32,31,32,31,30,30,30,29,29,30,30],1996:[17,31,32,31,32,31,30,30,30,29,30,29,31],1997:[18,31,31,32,31,31,31,30,29,30,29,30,30],1998:[18,31,31,32,31,31,31,30,29,30,29,30,30],1999:[17,31,32,31,32,31,30,30,30,29,29,30,31],2e3:[17,30,32,31,32,31,30,30,30,29,30,29,31],2001:[18,31,31,32,31,31,31,30,29,30,29,30,30],2002:[18,31,31,32,32,31,30,30,29,30,29,30,30],2003:[17,31,32,31,32,31,30,30,30,29,29,30,31],2004:[17,30,32,31,32,31,30,30,30,29,30,29,31],2005:[18,31,31,32,31,31,31,30,29,30,29,30,30],2006:[18,31,31,32,32,31,30,30,29,30,29,30,30],2007:[17,31,32,31,32,31,30,30,30,29,29,30,31],2008:[17,31,31,31,32,31,31,29,30,30,29,29,31],2009:[18,31,31,32,31,31,31,30,29,30,29,30,30],2010:[18,31,31,32,32,31,30,30,29,30,29,30,30],2011:[17,31,32,31,32,31,30,30,30,29,29,30,31],2012:[17,31,31,31,32,31,31,29,30,30,29,30,30],2013:[18,31,31,32,31,31,31,30,29,30,29,30,30],2014:[18,31,31,32,32,31,30,30,29,30,29,30,30],2015:[17,31,32,31,32,31,30,30,30,29,29,30,31],2016:[17,31,31,31,32,31,31,29,30,30,29,30,30],2017:[18,31,31,32,31,31,31,30,29,30,29,30,30],2018:[18,31,32,31,32,31,30,30,29,30,29,30,30],2019:[17,31,32,31,32,31,30,30,30,29,30,29,31],2020:[17,31,31,31,32,31,31,30,29,30,29,30,30],2021:[18,31,31,32,31,31,31,30,29,30,29,30,30],2022:[17,31,32,31,32,31,30,30,30,29,29,30,30],2023:[17,31,32,31,32,31,30,30,30,29,30,29,31],2024:[17,31,31,31,32,31,31,30,29,30,29,30,30],2025:[18,31,31,32,31,31,31,30,29,30,29,30,30],2026:[17,31,32,31,32,31,30,30,30,29,29,30,31],2027:[17,30,32,31,32,31,30,30,30,29,30,29,31],2028:[17,31,31,32,31,31,31,30,29,30,29,30,30],2029:[18,31,31,32,31,32,30,30,29,30,29,30,30],2030:[17,31,32,31,32,31,30,30,30,30,30,30,31],2031:[17,31,32,31,32,31,31,31,31,31,31,31,31],2032:[17,32,32,32,32,32,32,32,32,32,32,32,32],2033:[18,31,31,32,32,31,30,30,29,30,29,30,30],2034:[17,31,32,31,32,31,30,30,30,29,29,30,31],2035:[17,30,32,31,32,31,31,29,30,30,29,29,31],2036:[17,31,31,32,31,31,31,30,29,30,29,30,30],2037:[18,31,31,32,32,31,30,30,29,30,29,30,30],2038:[17,31,32,31,32,31,30,30,30,29,29,30,31],2039:[17,31,31,31,32,31,31,29,30,30,29,30,30],2040:[17,31,31,32,31,31,31,30,29,30,29,30,30],2041:[18,31,31,32,32,31,30,30,29,30,29,30,30],2042:[17,31,32,31,32,31,30,30,30,29,29,30,31],2043:[17,31,31,31,32,31,31,29,30,30,29,30,30],2044:[17,31,31,32,31,31,31,30,29,30,29,30,30],2045:[18,31,32,31,32,31,30,30,29,30,29,30,30],2046:[17,31,32,31,32,31,30,30,30,29,29,30,31],2047:[17,31,31,31,32,31,31,30,29,30,29,30,30],2048:[17,31,31,32,31,31,31,30,29,30,29,30,30],2049:[17,31,32,31,32,31,30,30,30,29,29,30,30],2050:[17,31,32,31,32,31,30,30,30,29,30,29,31],2051:[17,31,31,31,32,31,31,30,29,30,29,30,30],2052:[17,31,31,32,31,31,31,30,29,30,29,30,30],2053:[17,31,32,31,32,31,30,30,30,29,29,30,30],2054:[17,31,32,31,32,31,30,30,30,29,30,29,31],2055:[17,31,31,32,31,31,31,30,29,30,30,29,30],2056:[17,31,31,32,31,32,30,30,29,30,29,30,30],2057:[17,31,32,31,32,31,30,30,30,29,29,30,31],2058:[17,30,32,31,32,31,30,30,30,29,30,29,31],2059:[17,31,31,32,31,31,31,30,29,30,29,30,30],2060:[17,31,31,32,32,31,30,30,29,30,29,30,30],2061:[17,31,32,31,32,31,30,30,30,29,29,30,31],2062:[17,30,32,31,32,31,31,29,30,29,30,29,31],2063:[17,31,31,32,31,31,31,30,29,30,29,30,30],2064:[17,31,31,32,32,31,30,30,29,30,29,30,30],2065:[17,31,32,31,32,31,30,30,30,29,29,30,31],2066:[17,31,31,31,32,31,31,29,30,30,29,29,31],2067:[17,31,31,32,31,31,31,30,29,30,29,30,30],2068:[17,31,31,32,32,31,30,30,29,30,29,30,30],2069:[17,31,32,31,32,31,30,30,30,29,29,30,31],2070:[17,31,31,31,32,31,31,29,30,30,29,30,30],2071:[17,31,31,32,31,31,31,30,29,30,29,30,30],2072:[17,31,32,31,32,31,30,30,29,30,29,30,30],2073:[17,31,32,31,32,31,30,30,30,29,29,30,31],2074:[17,31,31,31,32,31,31,30,29,30,29,30,30],2075:[17,31,31,32,31,31,31,30,29,30,29,30,30],2076:[16,31,32,31,32,31,30,30,30,29,29,30,30],2077:[17,31,32,31,32,31,30,30,30,29,30,29,31],2078:[17,31,31,31,32,31,31,30,29,30,29,30,30],2079:[17,31,31,32,31,31,31,30,29,30,29,30,30],2080:[16,31,32,31,32,31,30,30,30,29,29,30,30],2081:[17,31,31,32,32,31,30,30,30,29,30,30,30],2082:[17,31,32,31,32,31,30,30,30,29,30,30,30],2083:[17,31,31,32,31,31,30,30,30,29,30,30,30],2084:[17,31,31,32,31,31,30,30,30,29,30,30,30],2085:[17,31,32,31,32,31,31,30,30,29,30,30,30],2086:[17,31,32,31,32,31,30,30,30,29,30,30,30],2087:[16,31,31,32,31,31,31,30,30,29,30,30,30],2088:[16,30,31,32,32,30,31,30,30,29,30,30,30],2089:[17,31,32,31,32,31,30,30,30,29,30,30,30],2090:[17,31,32,31,32,31,30,30,30,29,30,30,30],2091:[16,31,31,32,31,31,31,30,30,29,30,30,30],2092:[16,31,31,32,32,31,30,30,30,29,30,30,30],2093:[17,31,32,31,32,31,30,30,30,29,30,30,30],2094:[17,31,31,32,31,31,30,30,30,29,30,30,30],2095:[17,31,31,32,31,31,31,30,29,30,30,30,30],2096:[17,30,31,32,32,31,30,30,29,30,29,30,30],2097:[17,31,32,31,32,31,30,30,30,29,30,30,30],2098:[17,31,31,32,31,31,31,29,30,29,30,30,31],2099:[17,31,31,32,31,31,31,30,29,29,30,30,30],2100:[17,31,32,31,32,30,31,30,29,30,29,30,30]}});Aw.calendars.nepali=wQ});var oet=ye(()=>{var rS=kv(),Htr=Oh();function D9(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}function R9(e){var t=e-475;e<0&&t++;var r=.242197,n=r*t,i=r*(t+1),a=n-Math.floor(n),o=i-Math.floor(i);return a>o}D9.prototype=new rS.baseCalendar;Htr(D9.prototype,{name:"Persian",jdEpoch:19483205e-1,daysPerMonth:[31,31,31,31,31,31,30,30,30,30,30,29],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Persian",epochs:["BP","AP"],monthNames:["Farvardin","Ordibehesht","Khordad","Tir","Mordad","Shahrivar","Mehr","Aban","Azar","Dey","Bahman","Esfand"],monthNamesShort:["Far","Ord","Kho","Tir","Mor","Sha","Meh","Aba","Aza","Dey","Bah","Esf"],dayNames:["Yekshanbeh","Doshanbeh","Seshanbeh","Chah\u0101rshanbeh","Panjshanbeh","Jom'eh","Shanbeh"],dayNamesShort:["Yek","Do","Se","Cha","Panj","Jom","Sha"],dayNamesMin:["Ye","Do","Se","Ch","Pa","Jo","Sh"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:6,isRTL:!1}},leapYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,rS.local.invalidYear);return R9(t.year())},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-((n.dayOfWeek()+1)%7),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,rS.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===12&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return this.dayOfWeek(e,t,r)!==5},toJD:function(e,t,r){var n=this._validate(e,t,r,rS.local.invalidDate);e=n.year(),t=n.month(),r=n.day();var i=0;if(e>0)for(var a=1;a<e;a++)R9(a)&&i++;else if(e<0)for(var a=e;a<0;a++)R9(a)&&i--;return r+(t<=7?(t-1)*31:(t-1)*30+6)+(e>0?e-1:e)*365+i+this.jdEpoch-1},fromJD:function(e){e=Math.floor(e)+.5;var t=475+(e-this.toJD(475,1,1))/365.242197,r=Math.floor(t);r<=0&&r--,e>this.toJD(r,12,R9(r)?30:29)&&(r++,r===0&&r++);var n=e-this.toJD(r,1,1)+1,i=n<=186?Math.ceil(n/31):Math.ceil((n-6)/30),a=e-this.toJD(r,i,1)+1;return this.newDate(r,i,a)}});rS.calendars.persian=D9;rS.calendars.jalali=D9});var set=ye(()=>{var Sw=kv(),jtr=Oh(),F9=Sw.instance();function TQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}TQ.prototype=new Sw.baseCalendar;jtr(TQ.prototype,{name:"Taiwan",jdEpoch:24194025e-1,yearsOffset:1911,daysPerMonth:[31,28,31,30,31,30,31,31,30,31,30,31],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Taiwan",epochs:["BROC","ROC"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],monthNamesShort:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:1,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,Sw.local.invalidYear),r=this._t2gYear(t.year());return F9.leapYear(r)},weekOfYear:function(i,t,r){var n=this._validate(i,this.minMonth,this.minDay,Sw.local.invalidYear),i=this._t2gYear(n.year());return F9.weekOfYear(i,n.month(),n.day())},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,Sw.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===2&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(i,t,r){var n=this._validate(i,t,r,Sw.local.invalidDate),i=this._t2gYear(n.year());return F9.toJD(i,n.month(),n.day())},fromJD:function(e){var t=F9.fromJD(e),r=this._g2tYear(t.year());return this.newDate(r,t.month(),t.day())},_t2gYear:function(e){return e+this.yearsOffset+(e>=-this.yearsOffset&&e<=-1?1:0)},_g2tYear:function(e){return e-this.yearsOffset-(e>=1&&e<=this.yearsOffset?1:0)}});Sw.calendars.taiwan=TQ});var uet=ye(()=>{var Mw=kv(),Wtr=Oh(),z9=Mw.instance();function AQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}AQ.prototype=new Mw.baseCalendar;Wtr(AQ.prototype,{name:"Thai",jdEpoch:15230985e-1,yearsOffset:543,daysPerMonth:[31,28,31,30,31,30,31,31,30,31,30,31],hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Thai",epochs:["BBE","BE"],monthNames:["January","February","March","April","May","June","July","August","September","October","November","December"],monthNamesShort:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],dayNames:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],dayNamesShort:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],dayNamesMin:["Su","Mo","Tu","We","Th","Fr","Sa"],digits:null,dateFormat:"dd/mm/yyyy",firstDay:0,isRTL:!1}},leapYear:function(r){var t=this._validate(r,this.minMonth,this.minDay,Mw.local.invalidYear),r=this._t2gYear(t.year());return z9.leapYear(r)},weekOfYear:function(i,t,r){var n=this._validate(i,this.minMonth,this.minDay,Mw.local.invalidYear),i=this._t2gYear(n.year());return z9.weekOfYear(i,n.month(),n.day())},daysInMonth:function(e,t){var r=this._validate(e,t,this.minDay,Mw.local.invalidMonth);return this.daysPerMonth[r.month()-1]+(r.month()===2&&this.leapYear(r.year())?1:0)},weekDay:function(e,t,r){return(this.dayOfWeek(e,t,r)||7)<6},toJD:function(i,t,r){var n=this._validate(i,t,r,Mw.local.invalidDate),i=this._t2gYear(n.year());return z9.toJD(i,n.month(),n.day())},fromJD:function(e){var t=z9.fromJD(e),r=this._g2tYear(t.year());return this.newDate(r,t.month(),t.day())},_t2gYear:function(e){return e-this.yearsOffset-(e>=1&&e<=this.yearsOffset?1:0)},_g2tYear:function(e){return e+this.yearsOffset+(e>=-this.yearsOffset&&e<=-1?1:0)}});Mw.calendars.thai=AQ});var cet=ye(()=>{var Ew=kv(),Xtr=Oh();function SQ(e){this.local=this.regionalOptions[e||""]||this.regionalOptions[""]}SQ.prototype=new Ew.baseCalendar;Xtr(SQ.prototype,{name:"UmmAlQura",hasYearZero:!1,minMonth:1,firstMonth:1,minDay:1,regionalOptions:{"":{name:"Umm al-Qura",epochs:["BH","AH"],monthNames:["Al-Muharram","Safar","Rabi' al-awwal","Rabi' Al-Thani","Jumada Al-Awwal","Jumada Al-Thani","Rajab","Sha'aban","Ramadan","Shawwal","Dhu al-Qi'dah","Dhu al-Hijjah"],monthNamesShort:["Muh","Saf","Rab1","Rab2","Jum1","Jum2","Raj","Sha'","Ram","Shaw","DhuQ","DhuH"],dayNames:["Yawm al-Ahad","Yawm al-Ithnain","Yawm al-Thal\u0101th\u0101\u2019","Yawm al-Arba\u2018\u0101\u2019","Yawm al-Kham\u012Bs","Yawm al-Jum\u2018a","Yawm al-Sabt"],dayNamesMin:["Ah","Ith","Th","Ar","Kh","Ju","Sa"],digits:null,dateFormat:"yyyy/mm/dd",firstDay:6,isRTL:!0}},leapYear:function(e){var t=this._validate(e,this.minMonth,this.minDay,Ew.local.invalidYear);return this.daysInYear(t.year())===355},weekOfYear:function(e,t,r){var n=this.newDate(e,t,r);return n.add(-n.dayOfWeek(),"d"),Math.floor((n.dayOfYear()-1)/7)+1},daysInYear:function(e){for(var t=0,r=1;r<=12;r++)t+=this.daysInMonth(e,r);return t},daysInMonth:function(e,t){for(var r=this._validate(e,t,this.minDay,Ew.local.invalidMonth),n=r.toJD()-24e5+.5,i=0,a=0;a<Bx.length;a++){if(Bx[a]>n)return Bx[i]-Bx[i-1];i++}return 30},weekDay:function(e,t,r){return this.dayOfWeek(e,t,r)!==5},toJD:function(e,t,r){var n=this._validate(e,t,r,Ew.local.invalidDate),i=12*(n.year()-1)+n.month()-15292,a=n.day()+Bx[i-1]-1;return a+24e5-.5},fromJD:function(e){for(var t=e-24e5+.5,r=0,n=0;n<Bx.length&&!(Bx[n]>t);n++)r++;var i=r+15292,a=Math.floor((i-1)/12),o=a+1,s=i-12*a,l=t-Bx[r-1]+1;return this.newDate(o,s,l)},isValid:function(e,t,r){var n=Ew.baseCalendar.prototype.isValid.apply(this,arguments);return n&&(e=e.year!=null?e.year:e,n=e>=1276&&e<=1500),n},_validate:function(e,t,r,n){var i=Ew.baseCalendar.prototype._validate.apply(this,arguments);if(i.year<1276||i.year>1500)throw n.replace(/\{0\}/,this.local.name);return i}});Ew.calendars.ummalqura=SQ;var Bx=[20,50,79,109,138,168,197,227,256,286,315,345,374,404,433,463,492,522,551,581,611,641,670,700,729,759,788,818,847,877,906,936,965,995,1024,1054,1083,1113,1142,1172,1201,1231,1260,1290,1320,1350,1379,1409,1438,1468,1497,1527,1556,1586,1615,1645,1674,1704,1733,1763,1792,1822,1851,1881,1910,1940,1969,1999,2028,2058,2087,2117,2146,2176,2205,2235,2264,2294,2323,2353,2383,2413,2442,2472,2501,2531,2560,2590,2619,2649,2678,2708,2737,2767,2796,2826,2855,2885,2914,2944,2973,3003,3032,3062,3091,3121,3150,3180,3209,3239,3268,3298,3327,3357,3386,3416,3446,3476,3505,3535,3564,3594,3623,3653,3682,3712,3741,3771,3800,3830,3859,3889,3918,3948,3977,4007,4036,4066,4095,4125,4155,4185,4214,4244,4273,4303,4332,4362,4391,4421,4450,4480,4509,4539,4568,4598,4627,4657,4686,4716,4745,4775,4804,4834,4863,4893,4922,4952,4981,5011,5040,5070,5099,5129,5158,5188,5218,5248,5277,5307,5336,5366,5395,5425,5454,5484,5513,5543,5572,5602,5631,5661,5690,5720,5749,5779,5808,5838,5867,5897,5926,5956,5985,6015,6044,6074,6103,6133,6162,6192,6221,6251,6281,6311,6340,6370,6399,6429,6458,6488,6517,6547,6576,6606,6635,6665,6694,6724,6753,6783,6812,6842,6871,6901,6930,6960,6989,7019,7048,7078,7107,7137,7166,7196,7225,7255,7284,7314,7344,7374,7403,7433,7462,7492,7521,7551,7580,7610,7639,7669,7698,7728,7757,7787,7816,7846,7875,7905,7934,7964,7993,8023,8053,8083,8112,8142,8171,8201,8230,8260,8289,8319,8348,8378,8407,8437,8466,8496,8525,8555,8584,8614,8643,8673,8702,8732,8761,8791,8821,8850,8880,8909,8938,8968,8997,9027,9056,9086,9115,9145,9175,9205,9234,9264,9293,9322,9352,9381,9410,9440,9470,9499,9529,9559,9589,9618,9648,9677,9706,9736,9765,9794,9824,9853,9883,9913,9943,9972,10002,10032,10061,10090,10120,10149,10178,10208,10237,10267,10297,10326,10356,10386,10415,10445,10474,10504,10533,10562,10592,10621,10651,10680,10710,10740,10770,10799,10829,10858,10888,10917,10947,10976,11005,11035,11064,11094,11124,11153,11183,11213,11242,11272,11301,11331,11360,11389,11419,11448,11478,11507,11537,11567,11596,11626,11655,11685,11715,11744,11774,11803,11832,11862,11891,11921,11950,11980,12010,12039,12069,12099,12128,12158,12187,12216,12246,12275,12304,12334,12364,12393,12423,12453,12483,12512,12542,12571,12600,12630,12659,12688,12718,12747,12777,12807,12837,12866,12896,12926,12955,12984,13014,13043,13072,13102,13131,13161,13191,13220,13250,13280,13310,13339,13368,13398,13427,13456,13486,13515,13545,13574,13604,13634,13664,13693,13723,13752,13782,13811,13840,13870,13899,13929,13958,13988,14018,14047,14077,14107,14136,14166,14195,14224,14254,14283,14313,14342,14372,14401,14431,14461,14490,14520,14550,14579,14609,14638,14667,14697,14726,14756,14785,14815,14844,14874,14904,14933,14963,14993,15021,15051,15081,15110,15140,15169,15199,15228,15258,15287,15317,15347,15377,15406,15436,15465,15494,15524,15553,15582,15612,15641,15671,15701,15731,15760,15790,15820,15849,15878,15908,15937,15966,15996,16025,16055,16085,16114,16144,16174,16204,16233,16262,16292,16321,16350,16380,16409,16439,16468,16498,16528,16558,16587,16617,16646,16676,16705,16734,16764,16793,16823,16852,16882,16912,16941,16971,17001,17030,17060,17089,17118,17148,17177,17207,17236,17266,17295,17325,17355,17384,17414,17444,17473,17502,17532,17561,17591,17620,17650,17679,17709,17738,17768,17798,17827,17857,17886,17916,17945,17975,18004,18034,18063,18093,18122,18152,18181,18211,18241,18270,18300,18330,18359,18388,18418,18447,18476,18506,18535,18565,18595,18625,18654,18684,18714,18743,18772,18802,18831,18860,18890,18919,18949,18979,19008,19038,19068,19098,19127,19156,19186,19215,19244,19274,19303,19333,19362,19392,19422,19452,19481,19511,19540,19570,19599,19628,19658,19687,19717,19746,19776,19806,19836,19865,19895,19924,19954,19983,20012,20042,20071,20101,20130,20160,20190,20219,20249,20279,20308,20338,20367,20396,20426,20455,20485,20514,20544,20573,20603,20633,20662,20692,20721,20751,20780,20810,20839,20869,20898,20928,20957,20987,21016,21046,21076,21105,21135,21164,21194,21223,21253,21282,21312,21341,21371,21400,21430,21459,21489,21519,21548,21578,21607,21637,21666,21696,21725,21754,21784,21813,21843,21873,21902,21932,21962,21991,22021,22050,22080,22109,22138,22168,22197,22227,22256,22286,22316,22346,22375,22405,22434,22464,22493,22522,22552,22581,22611,22640,22670,22700,22730,22759,22789,22818,22848,22877,22906,22936,22965,22994,23024,23054,23083,23113,23143,23173,23202,23232,23261,23290,23320,23349,23379,23408,23438,23467,23497,23527,23556,23586,23616,23645,23674,23704,23733,23763,23792,23822,23851,23881,23910,23940,23970,23999,24029,24058,24088,24117,24147,24176,24206,24235,24265,24294,24324,24353,24383,24413,24442,24472,24501,24531,24560,24590,24619,24648,24678,24707,24737,24767,24796,24826,24856,24885,24915,24944,24974,25003,25032,25062,25091,25121,25150,25180,25210,25240,25269,25299,25328,25358,25387,25416,25446,25475,25505,25534,25564,25594,25624,25653,25683,25712,25742,25771,25800,25830,25859,25888,25918,25948,25977,26007,26037,26067,26096,26126,26155,26184,26214,26243,26272,26302,26332,26361,26391,26421,26451,26480,26510,26539,26568,26598,26627,26656,26686,26715,26745,26775,26805,26834,26864,26893,26923,26952,26982,27011,27041,27070,27099,27129,27159,27188,27218,27248,27277,27307,27336,27366,27395,27425,27454,27484,27513,27542,27572,27602,27631,27661,27691,27720,27750,27779,27809,27838,27868,27897,27926,27956,27985,28015,28045,28074,28104,28134,28163,28193,28222,28252,28281,28310,28340,28369,28399,28428,28458,28488,28517,28547,28577,28607,28636,28665,28695,28724,28754,28783,28813,28843,28872,28901,28931,28960,28990,29019,29049,29078,29108,29137,29167,29196,29226,29255,29285,29315,29345,29375,29404,29434,29463,29492,29522,29551,29580,29610,29640,29669,29699,29729,29759,29788,29818,29847,29876,29906,29935,29964,29994,30023,30053,30082,30112,30141,30171,30200,30230,30259,30289,30318,30348,30378,30408,30437,30467,30496,30526,30555,30585,30614,30644,30673,30703,30732,30762,30791,30821,30850,30880,30909,30939,30968,30998,31027,31057,31086,31116,31145,31175,31204,31234,31263,31293,31322,31352,31381,31411,31441,31471,31500,31530,31559,31589,31618,31648,31676,31706,31736,31766,31795,31825,31854,31884,31913,31943,31972,32002,32031,32061,32090,32120,32150,32180,32209,32239,32268,32298,32327,32357,32386,32416,32445,32475,32504,32534,32563,32593,32622,32652,32681,32711,32740,32770,32799,32829,32858,32888,32917,32947,32976,33006,33035,33065,33094,33124,33153,33183,33213,33243,33272,33302,33331,33361,33390,33420,33450,33479,33509,33539,33568,33598,33627,33657,33686,33716,33745,33775,33804,33834,33863,33893,33922,33952,33981,34011,34040,34069,34099,34128,34158,34187,34217,34247,34277,34306,34336,34365,34395,34424,34454,34483,34512,34542,34571,34601,34631,34660,34690,34719,34749,34778,34808,34837,34867,34896,34926,34955,34985,35015,35044,35074,35103,35133,35162,35192,35222,35251,35280,35310,35340,35370,35399,35429,35458,35488,35517,35547,35576,35605,35635,35665,35694,35723,35753,35782,35811,35841,35871,35901,35930,35960,35989,36019,36048,36078,36107,36136,36166,36195,36225,36254,36284,36314,36343,36373,36403,36433,36462,36492,36521,36551,36580,36610,36639,36669,36698,36728,36757,36786,36816,36845,36875,36904,36934,36963,36993,37022,37052,37081,37111,37141,37170,37200,37229,37259,37288,37318,37347,37377,37406,37436,37465,37495,37524,37554,37584,37613,37643,37672,37701,37731,37760,37790,37819,37849,37878,37908,37938,37967,37997,38027,38056,38085,38115,38144,38174,38203,38233,38262,38292,38322,38351,38381,38410,38440,38469,38499,38528,38558,38587,38617,38646,38676,38705,38735,38764,38794,38823,38853,38882,38912,38941,38971,39001,39030,39059,39089,39118,39148,39178,39208,39237,39267,39297,39326,39355,39385,39414,39444,39473,39503,39532,39562,39592,39621,39650,39680,39709,39739,39768,39798,39827,39857,39886,39916,39946,39975,40005,40035,40064,40094,40123,40153,40182,40212,40241,40271,40300,40330,40359,40389,40418,40448,40477,40507,40536,40566,40595,40625,40655,40685,40714,40744,40773,40803,40832,40862,40892,40921,40951,40980,41009,41039,41068,41098,41127,41157,41186,41216,41245,41275,41304,41334,41364,41393,41422,41452,41481,41511,41540,41570,41599,41629,41658,41688,41718,41748,41777,41807,41836,41865,41894,41924,41953,41983,42012,42042,42072,42102,42131,42161,42190,42220,42249,42279,42308,42337,42367,42397,42426,42456,42485,42515,42545,42574,42604,42633,42662,42692,42721,42751,42780,42810,42839,42869,42899,42929,42958,42988,43017,43046,43076,43105,43135,43164,43194,43223,43253,43283,43312,43342,43371,43401,43430,43460,43489,43519,43548,43578,43607,43637,43666,43696,43726,43755,43785,43814,43844,43873,43903,43932,43962,43991,44021,44050,44080,44109,44139,44169,44198,44228,44258,44287,44317,44346,44375,44405,44434,44464,44493,44523,44553,44582,44612,44641,44671,44700,44730,44759,44788,44818,44847,44877,44906,44936,44966,44996,45025,45055,45084,45114,45143,45172,45202,45231,45261,45290,45320,45350,45380,45409,45439,45468,45498,45527,45556,45586,45615,45644,45674,45704,45733,45763,45793,45823,45852,45882,45911,45940,45970,45999,46028,46058,46088,46117,46147,46177,46206,46236,46265,46295,46324,46354,46383,46413,46442,46472,46501,46531,46560,46590,46620,46649,46679,46708,46738,46767,46797,46826,46856,46885,46915,46944,46974,47003,47033,47063,47092,47122,47151,47181,47210,47240,47269,47298,47328,47357,47387,47417,47446,47476,47506,47535,47565,47594,47624,47653,47682,47712,47741,47771,47800,47830,47860,47890,47919,47949,47978,48008,48037,48066,48096,48125,48155,48184,48214,48244,48273,48303,48333,48362,48392,48421,48450,48480,48509,48538,48568,48598,48627,48657,48687,48717,48746,48776,48805,48834,48864,48893,48922,48952,48982,49011,49041,49071,49100,49130,49160,49189,49218,49248,49277,49306,49336,49365,49395,49425,49455,49484,49514,49543,49573,49602,49632,49661,49690,49720,49749,49779,49809,49838,49868,49898,49927,49957,49986,50016,50045,50075,50104,50133,50163,50192,50222,50252,50281,50311,50340,50370,50400,50429,50459,50488,50518,50547,50576,50606,50635,50665,50694,50724,50754,50784,50813,50843,50872,50902,50931,50960,50990,51019,51049,51078,51108,51138,51167,51197,51227,51256,51286,51315,51345,51374,51403,51433,51462,51492,51522,51552,51582,51611,51641,51670,51699,51729,51758,51787,51816,51846,51876,51906,51936,51965,51995,52025,52054,52083,52113,52142,52171,52200,52230,52260,52290,52319,52349,52379,52408,52438,52467,52497,52526,52555,52585,52614,52644,52673,52703,52733,52762,52792,52822,52851,52881,52910,52939,52969,52998,53028,53057,53087,53116,53146,53176,53205,53235,53264,53294,53324,53353,53383,53412,53441,53471,53500,53530,53559,53589,53619,53648,53678,53708,53737,53767,53796,53825,53855,53884,53913,53943,53973,54003,54032,54062,54092,54121,54151,54180,54209,54239,54268,54297,54327,54357,54387,54416,54446,54476,54505,54535,54564,54593,54623,54652,54681,54711,54741,54770,54800,54830,54859,54889,54919,54948,54977,55007,55036,55066,55095,55125,55154,55184,55213,55243,55273,55302,55332,55361,55391,55420,55450,55479,55508,55538,55567,55597,55627,55657,55686,55716,55745,55775,55804,55834,55863,55892,55922,55951,55981,56011,56040,56070,56100,56129,56159,56188,56218,56247,56276,56306,56335,56365,56394,56424,56454,56483,56513,56543,56572,56601,56631,56660,56690,56719,56749,56778,56808,56837,56867,56897,56926,56956,56985,57015,57044,57074,57103,57133,57162,57192,57221,57251,57280,57310,57340,57369,57399,57429,57458,57487,57517,57546,57576,57605,57634,57664,57694,57723,57753,57783,57813,57842,57871,57901,57930,57959,57989,58018,58048,58077,58107,58137,58167,58196,58226,58255,58285,58314,58343,58373,58402,58432,58461,58491,58521,58551,58580,58610,58639,58669,58698,58727,58757,58786,58816,58845,58875,58905,58934,58964,58994,59023,59053,59082,59111,59141,59170,59200,59229,59259,59288,59318,59348,59377,59407,59436,59466,59495,59525,59554,59584,59613,59643,59672,59702,59731,59761,59791,59820,59850,59879,59909,59939,59968,59997,60027,60056,60086,60115,60145,60174,60204,60234,60264,60293,60323,60352,60381,60411,60440,60469,60499,60528,60558,60588,60618,60648,60677,60707,60736,60765,60795,60824,60853,60883,60912,60942,60972,61002,61031,61061,61090,61120,61149,61179,61208,61237,61267,61296,61326,61356,61385,61415,61445,61474,61504,61533,61563,61592,61621,61651,61680,61710,61739,61769,61799,61828,61858,61888,61917,61947,61976,62006,62035,62064,62094,62123,62153,62182,62212,62242,62271,62301,62331,62360,62390,62419,62448,62478,62507,62537,62566,62596,62625,62655,62685,62715,62744,62774,62803,62832,62862,62891,62921,62950,62980,63009,63039,63069,63099,63128,63157,63187,63216,63246,63275,63305,63334,63363,63393,63423,63453,63482,63512,63541,63571,63600,63630,63659,63689,63718,63747,63777,63807,63836,63866,63895,63925,63955,63984,64014,64043,64073,64102,64131,64161,64190,64220,64249,64279,64309,64339,64368,64398,64427,64457,64486,64515,64545,64574,64603,64633,64663,64692,64722,64752,64782,64811,64841,64870,64899,64929,64958,64987,65017,65047,65076,65106,65136,65166,65195,65225,65254,65283,65313,65342,65371,65401,65431,65460,65490,65520,65549,65579,65608,65638,65667,65697,65726,65755,65785,65815,65844,65874,65903,65933,65963,65992,66022,66051,66081,66110,66140,66169,66199,66228,66258,66287,66317,66346,66376,66405,66435,66465,66494,66524,66553,66583,66612,66641,66671,66700,66730,66760,66789,66819,66849,66878,66908,66937,66967,66996,67025,67055,67084,67114,67143,67173,67203,67233,67262,67292,67321,67351,67380,67409,67439,67468,67497,67527,67557,67587,67617,67646,67676,67705,67735,67764,67793,67823,67852,67882,67911,67941,67971,68e3,68030,68060,68089,68119,68148,68177,68207,68236,68266,68295,68325,68354,68384,68414,68443,68473,68502,68532,68561,68591,68620,68650,68679,68708,68738,68768,68797,68827,68857,68886,68916,68946,68975,69004,69034,69063,69092,69122,69152,69181,69211,69240,69270,69300,69330,69359,69388,69418,69447,69476,69506,69535,69565,69595,69624,69654,69684,69713,69743,69772,69802,69831,69861,69890,69919,69949,69978,70008,70038,70067,70097,70126,70156,70186,70215,70245,70274,70303,70333,70362,70392,70421,70451,70481,70510,70540,70570,70599,70629,70658,70687,70717,70746,70776,70805,70835,70864,70894,70924,70954,70983,71013,71042,71071,71101,71130,71159,71189,71218,71248,71278,71308,71337,71367,71397,71426,71455,71485,71514,71543,71573,71602,71632,71662,71691,71721,71751,71781,71810,71839,71869,71898,71927,71957,71986,72016,72046,72075,72105,72135,72164,72194,72223,72253,72282,72311,72341,72370,72400,72429,72459,72489,72518,72548,72577,72607,72637,72666,72695,72725,72754,72784,72813,72843,72872,72902,72931,72961,72991,73020,73050,73080,73109,73139,73168,73197,73227,73256,73286,73315,73345,73375,73404,73434,73464,73493,73523,73552,73581,73611,73640,73669,73699,73729,73758,73788,73818,73848,73877,73907,73936,73965,73995,74024,74053,74083,74113,74142,74172,74202,74231,74261,74291,74320,74349,74379,74408,74437,74467,74497,74526,74556,74586,74615,74645,74675,74704,74733,74763,74792,74822,74851,74881,74910,74940,74969,74999,75029,75058,75088,75117,75147,75176,75206,75235,75264,75294,75323,75353,75383,75412,75442,75472,75501,75531,75560,75590,75619,75648,75678,75707,75737,75766,75796,75826,75856,75885,75915,75944,75974,76003,76032,76062,76091,76121,76150,76180,76210,76239,76269,76299,76328,76358,76387,76416,76446,76475,76505,76534,76564,76593,76623,76653,76682,76712,76741,76771,76801,76830,76859,76889,76918,76948,76977,77007,77036,77066,77096,77125,77155,77185,77214,77243,77273,77302,77332,77361,77390,77420,77450,77479,77509,77539,77569,77598,77627,77657,77686,77715,77745,77774,77804,77833,77863,77893,77923,77952,77982,78011,78041,78070,78099,78129,78158,78188,78217,78247,78277,78307,78336,78366,78395,78425,78454,78483,78513,78542,78572,78601,78631,78661,78690,78720,78750,78779,78808,78838,78867,78897,78926,78956,78985,79015,79044,79074,79104,79133,79163,79192,79222,79251,79281,79310,79340,79369,79399,79428,79458,79487,79517,79546,79576,79606,79635,79665,79695,79724,79753,79783,79812,79841,79871,79900,79930,79960,79990]});var het=ye((rTr,fet)=>{"use strict";fet.exports=kv();XQe();ZQe();YQe();KQe();JQe();$Qe();QQe();eet();ret();net();aet();oet();set();uet();cet()});var _et=ye((iTr,yet)=>{"use strict";var vet=het(),pC=Dr(),pet=fs(),Ztr=pet.EPOCHJD,Ytr=pet.ONEDAY,kQ={valType:"enumerated",values:pC.sortObjectKeys(vet.calendars),editType:"calc",dflt:"gregorian"},get=function(e,t,r,n){var i={};return i[r]=kQ,pC.coerce(e,t,i,r,n)},Ktr=function(e,t,r,n){for(var i=0;i<r.length;i++)get(e,t,r[i]+"calendar",n.calendar)},Jtr={chinese:"2000-01-01",coptic:"2000-01-01",discworld:"2000-01-01",ethiopian:"2000-01-01",hebrew:"5000-01-01",islamic:"1000-01-01",julian:"2000-01-01",mayan:"5000-01-01",nanakshahi:"1000-01-01",nepali:"2000-01-01",persian:"1000-01-01",jalali:"1000-01-01",taiwan:"1000-01-01",thai:"2000-01-01",ummalqura:"1400-01-01"},$tr={chinese:"2000-01-02",coptic:"2000-01-03",discworld:"2000-01-03",ethiopian:"2000-01-05",hebrew:"5000-01-01",islamic:"1000-01-02",julian:"2000-01-03",mayan:"5000-01-01",nanakshahi:"1000-01-05",nepali:"2000-01-05",persian:"1000-01-01",jalali:"1000-01-01",taiwan:"1000-01-04",thai:"2000-01-04",ummalqura:"1400-01-06"},Qtr={chinese:["2000-01-01","2001-01-01"],coptic:["1700-01-01","1701-01-01"],discworld:["1800-01-01","1801-01-01"],ethiopian:["2000-01-01","2001-01-01"],hebrew:["5700-01-01","5701-01-01"],islamic:["1400-01-01","1401-01-01"],julian:["2000-01-01","2001-01-01"],mayan:["5200-01-01","5201-01-01"],nanakshahi:["0500-01-01","0501-01-01"],nepali:["2000-01-01","2001-01-01"],persian:["1400-01-01","1401-01-01"],jalali:["1400-01-01","1401-01-01"],taiwan:["0100-01-01","0101-01-01"],thai:["2500-01-01","2501-01-01"],ummalqura:["1400-01-01","1401-01-01"]},O9="##",err={d:{0:"dd","-":"d"},e:{0:"d","-":"d"},a:{0:"D","-":"D"},A:{0:"DD","-":"DD"},j:{0:"oo","-":"o"},W:{0:"ww","-":"w"},m:{0:"mm","-":"m"},b:{0:"M","-":"M"},B:{0:"MM","-":"MM"},y:{0:"yy","-":"yy"},Y:{0:"yyyy","-":"yyyy"},U:O9,w:O9,c:{0:"D M d %X yyyy","-":"D M d %X yyyy"},x:{0:"mm/dd/yyyy","-":"mm/dd/yyyy"}};function trr(e,t,r){for(var n=Math.floor((t+.05)/Ytr)+Ztr,i=met(r).fromJD(n),a=0,o,s,l,u,c;(a=e.indexOf("%",a))!==-1;)o=e.charAt(a+1),o==="0"||o==="-"||o==="_"?(l=3,s=e.charAt(a+2),o==="_"&&(o="-")):(s=o,o="0",l=2),u=err[s],u?(u===O9?c=O9:c=i.formatDate(u[o]),e=e.slice(0,a)+c+e.slice(a+l),a+=c.length):a+=l;return e}var det={};function met(e){var t=det[e];return t||(t=det[e]=vet.instance(e),t)}function gC(e){return pC.extendFlat({},kQ,{description:e})}function CQ(e){return"Sets the calendar system to use with `"+e+"` date data."}var EQ={xcalendar:gC(CQ("x"))},ny=pC.extendFlat({},EQ,{ycalendar:gC(CQ("y"))}),MQ=pC.extendFlat({},ny,{zcalendar:gC(CQ("z"))}),iS=gC(["Sets the calendar system to use for `range` and `tick0`","if this is a date axis. This does not set the calendar for","interpreting data on this axis, that's specified in the trace","or via the global `layout.calendar`"].join(" "));yet.exports={moduleType:"component",name:"calendars",schema:{traces:{scatter:ny,bar:ny,box:ny,heatmap:ny,contour:ny,histogram:ny,histogram2d:ny,histogram2dcontour:ny,scatter3d:MQ,surface:MQ,mesh3d:MQ,scattergl:ny,ohlc:EQ,candlestick:EQ},layout:{calendar:gC(["Sets the default calendar system to use for interpreting and","displaying dates throughout the plot."].join(" "))},subplots:{xaxis:{calendar:iS},yaxis:{calendar:iS},scene:{xaxis:{calendar:iS},yaxis:{calendar:iS},zaxis:{calendar:iS}},polar:{radialaxis:{calendar:iS}}}},layoutAttributes:kQ,handleDefaults:get,handleTraceDefaults:Ktr,CANONICAL_SUNDAY:$tr,CANONICAL_TICK:Jtr,DFLTRANGE:Qtr,getCal:met,worldCalFmt:trr}});var bet=ye((nTr,xet)=>{"use strict";xet.exports=_et()});var rrr=ye((aTr,Tet)=>{var wet=iye();wet.register([o1e(),X1e(),sxe(),kxe(),Vxe(),qbe(),Jbe(),q2e(),dwe(),Ywe(),z3e(),ZEe(),Oke(),k6e(),gLe(),XLe(),mPe(),VIe(),s8e(),S8e(),O8e(),J8e(),hRe(),kRe(),rFe(),wFe(),BBe(),BNe(),YUe(),bVe(),PGe(),WGe(),gHe(),kje(),Gje(),dWe(),wXe(),WXe(),EZe(),YYe(),_Ke(),NKe(),hJe(),MJe(),S$e(),H$e(),uQe(),HQe(),bet()]);Tet.exports=wet});return rrr();})();
+/*!
+ * pad-left <https://github.com/jonschlinkert/pad-left>
+ *
+ * Copyright (c) 2014-2015, Jon Schlinkert.
+ * Licensed under the MIT license.
+ */
+/*!
+ * repeat-string <https://github.com/jonschlinkert/repeat-string>
+ *
+ * Copyright (c) 2014-2015, Jon Schlinkert.
+ * Licensed under the MIT License.
+ */
+/*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> */
+/*!
+ * The buffer module from node.js, for the browser.
+ *
+ * @author   Feross Aboukhadijeh <https://feross.org>
+ * @license  MIT
+ */
+/*!
+ * Determine if an object is a Buffer
+ *
+ * @author   Feross Aboukhadijeh <https://feross.org>
+ * @license  MIT
+ */
+/*! Bundled license information:
+
+native-promise-only/lib/npo.src.js:
+  (*! Native Promise Only
+      v0.8.1 (c) Kyle Simpson
+      MIT License: http://getify.mit-license.org
+  *)
+
+polybooljs/index.js:
+  (*
+   * @copyright 2016 Sean Connelly (@voidqk), http://syntheti.cc
+   * @license MIT
+   * @preserve Project Home: https://github.com/voidqk/polybooljs
+   *)
+
+ieee754/index.js:
+  (*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> *)
+
+buffer/index.js:
+  (*!
+   * The buffer module from node.js, for the browser.
+   *
+   * @author   Feross Aboukhadijeh <https://feross.org>
+   * @license  MIT
+   *)
+
+safe-buffer/index.js:
+  (*! safe-buffer. MIT License. Feross Aboukhadijeh <https://feross.org/opensource> *)
+
+assert/build/internal/util/comparisons.js:
+  (*!
+   * The buffer module from node.js, for the browser.
+   *
+   * @author   Feross Aboukhadijeh <feross@feross.org> <http://feross.org>
+   * @license  MIT
+   *)
+
+object-assign/index.js:
+  (*
+  object-assign
+  (c) Sindre Sorhus
+  @license MIT
+  *)
+
+maplibre-gl/dist/maplibre-gl.js:
+  (**
+   * MapLibre GL JS
+   * @license 3-Clause BSD. Full text of license: https://github.com/maplibre/maplibre-gl-js/blob/v4.7.1/LICENSE.txt
+   *)
+*/
+
+window.Plotly = Plotly;
+return Plotly;
+}));</script>                <div id="24707454-7ec3-4538-833a-79edf1fc24fa" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                window.PLOTLYENV=window.PLOTLYENV || {};                                if (document.getElementById("24707454-7ec3-4538-833a-79edf1fc24fa")) {                    Plotly.newPlot(                        "24707454-7ec3-4538-833a-79edf1fc24fa",                        [{"base":["2026-02-24T00:00:00.000000","2026-02-24T00:00:00.135000","2026-02-24T00:00:00.236000","2026-02-24T00:00:00.322000","2026-02-24T00:00:00.382000","2026-02-24T00:00:00.495000","2026-02-24T00:00:00.558000","2026-02-24T00:00:00.799000","2026-02-24T00:00:01.161000","2026-02-24T00:00:01.213000","2026-02-24T00:00:01.384000","2026-02-24T00:00:01.466000","2026-02-24T00:00:01.557000","2026-02-24T00:00:01.840000","2026-02-24T00:00:01.848000","2026-02-24T00:00:01.858000","2026-02-24T00:00:01.860000","2026-02-24T00:00:02.054000","2026-02-24T00:00:02.219000","2026-02-24T00:00:02.441000","2026-02-24T00:00:02.860000","2026-02-24T00:00:03.034000","2026-02-24T00:00:03.102000","2026-02-24T00:00:03.266000","2026-02-24T00:00:03.281000","2026-02-24T00:00:03.391000","2026-02-24T00:00:03.408000","2026-02-24T00:00:03.723000","2026-02-24T00:00:03.803000","2026-02-24T00:00:03.862000","2026-02-24T00:00:03.895000","2026-02-24T00:00:04.058000","2026-02-24T00:00:04.124000","2026-02-24T00:00:04.216000","2026-02-24T00:00:04.217000","2026-02-24T00:00:04.321000","2026-02-24T00:00:04.425000","2026-02-24T00:00:04.529000","2026-02-24T00:00:04.843000","2026-02-24T00:00:04.967000","2026-02-24T00:00:05.016000","2026-02-24T00:00:05.078000","2026-02-24T00:00:05.209000","2026-02-24T00:00:05.215000","2026-02-24T00:00:05.335000","2026-02-24T00:00:05.456000","2026-02-24T00:00:05.481000","2026-02-24T00:00:05.497000","2026-02-24T00:00:05.538000","2026-02-24T00:00:05.587000","2026-02-24T00:00:05.679000","2026-02-24T00:00:05.742000","2026-02-24T00:00:06.227000","2026-02-24T00:00:06.239000","2026-02-24T00:00:06.264000","2026-02-24T00:00:06.283000","2026-02-24T00:00:06.399000","2026-02-24T00:00:06.430000","2026-02-24T00:00:06.498000","2026-02-24T00:00:06.529000","2026-02-24T00:00:06.548000","2026-02-24T00:00:06.561000","2026-02-24T00:00:06.677000","2026-02-24T00:00:06.693000","2026-02-24T00:00:06.717000","2026-02-24T00:00:06.767000","2026-02-24T00:00:06.954000","2026-02-24T00:00:06.966000","2026-02-24T00:00:07.163000","2026-02-24T00:00:07.174000","2026-02-24T00:00:07.582000","2026-02-24T00:00:07.652000","2026-02-24T00:00:08.062000","2026-02-24T00:00:08.162000","2026-02-24T00:00:08.308000","2026-02-24T00:00:08.313000","2026-02-24T00:00:08.348000","2026-02-24T00:00:08.362000","2026-02-24T00:00:08.401000","2026-02-24T00:00:08.414000","2026-02-24T00:00:08.457000","2026-02-24T00:00:08.514000","2026-02-24T00:00:08.522000","2026-02-24T00:00:08.651000","2026-02-24T00:00:08.741000","2026-02-24T00:00:08.774000","2026-02-24T00:00:08.855000","2026-02-24T00:00:08.866000","2026-02-24T00:00:08.959000","2026-02-24T00:00:09.248000","2026-02-24T00:00:09.290000","2026-02-24T00:00:09.410000","2026-02-24T00:00:09.424000","2026-02-24T00:00:09.562000","2026-02-24T00:00:09.599000","2026-02-24T00:00:09.621000","2026-02-24T00:00:09.717000","2026-02-24T00:00:09.720000","2026-02-24T00:00:09.912000","2026-02-24T00:00:09.913000"],"customdata":[[13,120,"00:00:00.000","00:00:00.192","00:00:00.000","00:00:00.034","0.034s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.135","00:00:00.146","0.010s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.236","00:00:00.250","0.014s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.322","00:00:00.334","0.012s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.382","00:00:00.395","0.013s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.495","00:00:00.519","0.024s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.558","00:00:00.571","0.013s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.799","00:00:00.813","0.014s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.161","00:00:01.170","0.009s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.213","00:00:01.224","0.011s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.384","00:00:01.396","0.012s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.466","00:00:01.475","0.009s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.557","00:00:01.570","0.013s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.840","00:00:01.852","0.012s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.848","00:00:01.857","0.010s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.858","00:00:01.898","0.041s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.860","00:00:01.914","0.054s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.054","00:00:02.069","0.015s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.219","00:00:02.230","0.011s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.441","00:00:02.451","0.010s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.860","00:00:02.874","0.014s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.034","00:00:03.049","0.015s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.102","00:00:03.113","0.011s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.266","00:00:03.281","0.015s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.281","00:00:03.348","0.068s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.391","00:00:03.406","0.014s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.408","00:00:03.478","0.070s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.723","00:00:03.735","0.012s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.803","00:00:03.814","0.011s"],[45,4,"00:00:03.862","00:00:03.879","00:00:03.862","00:00:03.876","0.013s"],[11,5,"00:00:03.895","00:00:03.911","00:00:03.895","00:00:03.906","0.010s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.058","00:00:04.067","0.009s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.124","00:00:04.135","0.012s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.216","00:00:04.232","0.016s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.217","00:00:04.232","0.014s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.321","00:00:04.332","0.011s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.425","00:00:04.442","0.017s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.529","00:00:04.541","0.012s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.843","00:00:04.859","0.017s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.967","00:00:04.981","0.014s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.016","00:00:05.024","0.008s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.078","00:00:05.096","0.017s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.209","00:00:05.226","0.018s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.215","00:00:05.227","0.012s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.335","00:00:05.344","0.008s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.456","00:00:05.471","0.015s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.481","00:00:05.492","0.011s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.497","00:00:05.552","0.055s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.538","00:00:05.584","0.047s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.587","00:00:05.622","0.035s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.679","00:00:05.731","0.051s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.742","00:00:05.760","0.018s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.227","00:00:06.243","0.016s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.239","00:00:06.250","0.011s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.264","00:00:06.275","0.010s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.283","00:00:06.296","0.013s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.399","00:00:06.411","0.012s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.430","00:00:06.442","0.012s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.498","00:00:06.512","0.014s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.529","00:00:06.540","0.011s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.548","00:00:06.558","0.011s"],[15,5,"00:00:06.561","00:00:06.700","00:00:06.561","00:00:06.695","0.134s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.677","00:00:06.706","0.029s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.693","00:00:06.770","0.077s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.717","00:00:06.812","0.095s"],[689,206,"00:00:06.767","00:00:07.528","00:00:06.767","00:00:07.192","0.425s"],[178,151,"00:00:06.954","00:00:07.669","00:00:06.954","00:00:07.428","0.474s"],[9,42,"00:00:06.966","00:00:07.538","00:00:06.966","00:00:07.470","0.505s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.163","00:00:07.531","0.368s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.174","00:00:07.545","0.371s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.582","00:00:07.597","0.014s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.652","00:00:07.668","0.016s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.062","00:00:08.073","0.011s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.162","00:00:08.175","0.012s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.308","00:00:08.327","0.019s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.313","00:00:08.327","0.014s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.348","00:00:08.358","0.010s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.362","00:00:08.379","0.016s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.401","00:00:08.473","0.072s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.414","00:00:08.526","0.111s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.457","00:00:08.543","0.086s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.514","00:00:08.544","0.030s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.522","00:00:08.807","0.285s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.651","00:00:08.907","0.256s"],[13,125,"00:00:08.741","00:00:09.267","00:00:08.741","00:00:09.100","0.359s"],[387,22,"00:00:08.774","00:00:09.302","00:00:08.774","00:00:09.272","0.497s"],[258,26,"00:00:08.855","00:00:09.326","00:00:08.855","00:00:09.288","0.432s"],[32,237,"00:00:08.866","00:00:09.614","00:00:08.866","00:00:09.308","0.442s"],[28,350,"00:00:08.959","00:00:09.780","00:00:08.959","00:00:09.330","0.371s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.248","00:00:09.377","0.129s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.290","00:00:09.398","0.108s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.410","00:00:09.481","0.071s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.424","00:00:09.617","0.192s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.562","00:00:09.689","0.127s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.599","00:00:09.732","0.133s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.621","00:00:09.786","0.165s"],[58,526,"00:00:09.717","00:00:10.670","00:00:09.717","00:00:10.014","0.297s"],[363,16,"00:00:09.720","00:00:10.058","00:00:09.720","00:00:10.040","0.321s"],[203,416,"00:00:09.912","00:00:10.574","00:00:09.912","00:00:10.064","0.152s"],[140,434,"00:00:09.913","00:00:10.850","00:00:09.913","00:00:10.316","0.403s"]],"hovertemplate":"\u003cb\u003e%{y}\u003c\u002fb\u003e\u003cbr\u003eType: %{fullData.name}\u003cbr\u003eStart: %{customdata[4]}\u003cbr\u003eEnd: %{customdata[5]}\u003cbr\u003eDuration: %{customdata[6]}\u003cbr\u003ePrompt Tokens: %{customdata[0]}\u003cbr\u003eOutput Tokens: %{customdata[1]}\u003cbr\u003eRequest Start Time: %{customdata[2]}\u003cbr\u003eRequest End Time: %{customdata[3]}\u003cbr\u003e\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"TTFT","marker":{"color":"#636EFA","pattern":{"shape":""}},"name":"TTFT","orientation":"h","showlegend":true,"textposition":"auto","x":{"dtype":"i2","bdata":"IgALAA4ADAANABgADQAOAAkACwAMAAkADQAMAAkAKAA2AA8ACwAKAA4ADwALAA8AQwAPAEYADAALAA4ACwAJAAsAEAAPAAsAEQAMABAADgAIABIAEQAMAAkADwALADcALgAjADQAEgAQAAsACwANAAwADAAOAAsACgCGAB0ATQBfAKkB2gH4AXABcwEPABAACwANABMADgAKABEASABwAFYAHgAdAQABZwHyAbEBugFzAYEAbABHAMEAfwCFAKUAKQFAAZgAkwE="},"xaxis":"x","y":["Req 0","Req 1","Req 2","Req 3","Req 4","Req 5","Req 6","Req 7","Req 8","Req 9","Req 10","Req 11","Req 12","Req 13","Req 14","Req 15","Req 16","Req 17","Req 18","Req 19","Req 20","Req 21","Req 22","Req 23","Req 24","Req 25","Req 26","Req 27","Req 28","Req 29","Req 30","Req 31","Req 32","Req 33","Req 34","Req 35","Req 36","Req 37","Req 38","Req 39","Req 40","Req 41","Req 42","Req 43","Req 44","Req 45","Req 46","Req 47","Req 48","Req 49","Req 50","Req 51","Req 52","Req 53","Req 54","Req 55","Req 56","Req 57","Req 58","Req 59","Req 60","Req 61","Req 62","Req 63","Req 64","Req 65","Req 66","Req 67","Req 68","Req 69","Req 70","Req 71","Req 72","Req 73","Req 74","Req 75","Req 76","Req 77","Req 78","Req 79","Req 80","Req 81","Req 82","Req 83","Req 84","Req 85","Req 86","Req 87","Req 88","Req 89","Req 90","Req 91","Req 92","Req 93","Req 94","Req 95","Req 96","Req 97","Req 98","Req 99"],"yaxis":"y","type":"bar"},{"base":["2026-02-24T00:00:00.051000","2026-02-24T00:00:00.053000","2026-02-24T00:00:00.055000","2026-02-24T00:00:00.056000","2026-02-24T00:00:00.058000","2026-02-24T00:00:00.059000","2026-02-24T00:00:00.061000","2026-02-24T00:00:00.062000","2026-02-24T00:00:00.064000","2026-02-24T00:00:00.065000","2026-02-24T00:00:00.067000","2026-02-24T00:00:00.068000","2026-02-24T00:00:00.070000","2026-02-24T00:00:00.071000","2026-02-24T00:00:00.073000","2026-02-24T00:00:00.074000","2026-02-24T00:00:00.075000","2026-02-24T00:00:00.076000","2026-02-24T00:00:00.078000","2026-02-24T00:00:00.079000","2026-02-24T00:00:00.080000","2026-02-24T00:00:00.081000","2026-02-24T00:00:00.083000","2026-02-24T00:00:00.084000","2026-02-24T00:00:00.085000","2026-02-24T00:00:00.086000","2026-02-24T00:00:00.088000","2026-02-24T00:00:00.089000","2026-02-24T00:00:00.090000","2026-02-24T00:00:00.091000","2026-02-24T00:00:00.093000","2026-02-24T00:00:00.094000","2026-02-24T00:00:00.095000","2026-02-24T00:00:00.096000","2026-02-24T00:00:00.097000","2026-02-24T00:00:00.098000","2026-02-24T00:00:00.099000","2026-02-24T00:00:00.100000","2026-02-24T00:00:00.102000","2026-02-24T00:00:00.103000","2026-02-24T00:00:00.104000","2026-02-24T00:00:00.105000","2026-02-24T00:00:00.106000","2026-02-24T00:00:00.107000","2026-02-24T00:00:00.109000","2026-02-24T00:00:00.110000","2026-02-24T00:00:00.111000","2026-02-24T00:00:00.112000","2026-02-24T00:00:00.113000","2026-02-24T00:00:00.114000","2026-02-24T00:00:00.115000","2026-02-24T00:00:00.116000","2026-02-24T00:00:00.118000","2026-02-24T00:00:00.119000","2026-02-24T00:00:00.120000","2026-02-24T00:00:00.121000","2026-02-24T00:00:00.122000","2026-02-24T00:00:00.123000","2026-02-24T00:00:00.124000","2026-02-24T00:00:00.125000","2026-02-24T00:00:00.126000","2026-02-24T00:00:00.127000","2026-02-24T00:00:00.129000","2026-02-24T00:00:00.130000","2026-02-24T00:00:00.131000","2026-02-24T00:00:00.132000","2026-02-24T00:00:00.133000","2026-02-24T00:00:00.134000","2026-02-24T00:00:00.135000","2026-02-24T00:00:00.137000","2026-02-24T00:00:00.137000","2026-02-24T00:00:00.138000","2026-02-24T00:00:00.140000","2026-02-24T00:00:00.145000","2026-02-24T00:00:00.146000","2026-02-24T00:00:00.147000","2026-02-24T00:00:00.148000","2026-02-24T00:00:00.149000","2026-02-24T00:00:00.150000","2026-02-24T00:00:00.151000","2026-02-24T00:00:00.152000","2026-02-24T00:00:00.153000","2026-02-24T00:00:00.155000","2026-02-24T00:00:00.156000","2026-02-24T00:00:00.157000","2026-02-24T00:00:00.158000","2026-02-24T00:00:00.159000","2026-02-24T00:00:00.160000","2026-02-24T00:00:00.161000","2026-02-24T00:00:00.162000","2026-02-24T00:00:00.164000","2026-02-24T00:00:00.165000","2026-02-24T00:00:00.166000","2026-02-24T00:00:00.167000","2026-02-24T00:00:00.168000","2026-02-24T00:00:00.169000","2026-02-24T00:00:00.170000","2026-02-24T00:00:00.172000","2026-02-24T00:00:00.173000","2026-02-24T00:00:00.174000","2026-02-24T00:00:00.175000","2026-02-24T00:00:00.176000","2026-02-24T00:00:00.177000","2026-02-24T00:00:00.178000","2026-02-24T00:00:00.180000","2026-02-24T00:00:00.181000","2026-02-24T00:00:00.182000","2026-02-24T00:00:00.183000","2026-02-24T00:00:00.184000","2026-02-24T00:00:00.185000","2026-02-24T00:00:00.186000","2026-02-24T00:00:00.188000","2026-02-24T00:00:00.189000","2026-02-24T00:00:00.190000","2026-02-24T00:00:00.146000","2026-02-24T00:00:00.147000","2026-02-24T00:00:00.148000","2026-02-24T00:00:00.149000","2026-02-24T00:00:00.150000","2026-02-24T00:00:00.151000","2026-02-24T00:00:00.152000","2026-02-24T00:00:00.153000","2026-02-24T00:00:00.154000","2026-02-24T00:00:00.156000","2026-02-24T00:00:00.157000","2026-02-24T00:00:00.158000","2026-02-24T00:00:00.159000","2026-02-24T00:00:00.160000","2026-02-24T00:00:00.161000","2026-02-24T00:00:00.162000","2026-02-24T00:00:00.164000","2026-02-24T00:00:00.165000","2026-02-24T00:00:00.166000","2026-02-24T00:00:00.167000","2026-02-24T00:00:00.168000","2026-02-24T00:00:00.169000","2026-02-24T00:00:00.170000","2026-02-24T00:00:00.172000","2026-02-24T00:00:00.173000","2026-02-24T00:00:00.174000","2026-02-24T00:00:00.175000","2026-02-24T00:00:00.176000","2026-02-24T00:00:00.177000","2026-02-24T00:00:00.178000","2026-02-24T00:00:00.180000","2026-02-24T00:00:00.181000","2026-02-24T00:00:00.182000","2026-02-24T00:00:00.183000","2026-02-24T00:00:00.184000","2026-02-24T00:00:00.185000","2026-02-24T00:00:00.186000","2026-02-24T00:00:00.188000","2026-02-24T00:00:00.189000","2026-02-24T00:00:00.190000","2026-02-24T00:00:00.192000","2026-02-24T00:00:00.192000","2026-02-24T00:00:00.193000","2026-02-24T00:00:00.194000","2026-02-24T00:00:00.196000","2026-02-24T00:00:00.197000","2026-02-24T00:00:00.198000","2026-02-24T00:00:00.199000","2026-02-24T00:00:00.200000","2026-02-24T00:00:00.201000","2026-02-24T00:00:00.202000","2026-02-24T00:00:00.203000","2026-02-24T00:00:00.204000","2026-02-24T00:00:00.205000","2026-02-24T00:00:00.206000","2026-02-24T00:00:00.208000","2026-02-24T00:00:00.209000","2026-02-24T00:00:00.210000","2026-02-24T00:00:00.211000","2026-02-24T00:00:00.212000","2026-02-24T00:00:00.213000","2026-02-24T00:00:00.214000","2026-02-24T00:00:00.215000","2026-02-24T00:00:00.216000","2026-02-24T00:00:00.217000","2026-02-24T00:00:00.218000","2026-02-24T00:00:00.220000","2026-02-24T00:00:00.221000","2026-02-24T00:00:00.222000","2026-02-24T00:00:00.223000","2026-02-24T00:00:00.224000","2026-02-24T00:00:00.225000","2026-02-24T00:00:00.226000","2026-02-24T00:00:00.227000","2026-02-24T00:00:00.228000","2026-02-24T00:00:00.229000","2026-02-24T00:00:00.230000","2026-02-24T00:00:00.232000","2026-02-24T00:00:00.233000","2026-02-24T00:00:00.234000","2026-02-24T00:00:00.235000","2026-02-24T00:00:00.236000","2026-02-24T00:00:00.238000","2026-02-24T00:00:00.239000","2026-02-24T00:00:00.240000","2026-02-24T00:00:00.249000","2026-02-24T00:00:00.250000","2026-02-24T00:00:00.250000","2026-02-24T00:00:00.252000","2026-02-24T00:00:00.253000","2026-02-24T00:00:00.254000","2026-02-24T00:00:00.255000","2026-02-24T00:00:00.256000","2026-02-24T00:00:00.257000","2026-02-24T00:00:00.258000","2026-02-24T00:00:00.259000","2026-02-24T00:00:00.261000","2026-02-24T00:00:00.262000","2026-02-24T00:00:00.263000","2026-02-24T00:00:00.264000","2026-02-24T00:00:00.265000","2026-02-24T00:00:00.266000","2026-02-24T00:00:00.267000","2026-02-24T00:00:00.269000","2026-02-24T00:00:00.270000","2026-02-24T00:00:00.271000","2026-02-24T00:00:00.272000","2026-02-24T00:00:00.273000","2026-02-24T00:00:00.275000","2026-02-24T00:00:00.276000","2026-02-24T00:00:00.277000","2026-02-24T00:00:00.279000","2026-02-24T00:00:00.280000","2026-02-24T00:00:00.281000","2026-02-24T00:00:00.282000","2026-02-24T00:00:00.283000","2026-02-24T00:00:00.284000","2026-02-24T00:00:00.285000","2026-02-24T00:00:00.287000","2026-02-24T00:00:00.288000","2026-02-24T00:00:00.289000","2026-02-24T00:00:00.290000","2026-02-24T00:00:00.291000","2026-02-24T00:00:00.293000","2026-02-24T00:00:00.294000","2026-02-24T00:00:00.295000","2026-02-24T00:00:00.296000","2026-02-24T00:00:00.297000","2026-02-24T00:00:00.298000","2026-02-24T00:00:00.300000","2026-02-24T00:00:00.301000","2026-02-24T00:00:00.302000","2026-02-24T00:00:00.303000","2026-02-24T00:00:00.304000","2026-02-24T00:00:00.306000","2026-02-24T00:00:00.307000","2026-02-24T00:00:00.309000","2026-02-24T00:00:00.310000","2026-02-24T00:00:00.312000","2026-02-24T00:00:00.313000","2026-02-24T00:00:00.315000","2026-02-24T00:00:00.316000","2026-02-24T00:00:00.317000","2026-02-24T00:00:00.318000","2026-02-24T00:00:00.319000","2026-02-24T00:00:00.320000","2026-02-24T00:00:00.321000","2026-02-24T00:00:00.322000","2026-02-24T00:00:00.323000","2026-02-24T00:00:00.325000","2026-02-24T00:00:00.325000","2026-02-24T00:00:00.327000","2026-02-24T00:00:00.328000","2026-02-24T00:00:00.333000","2026-02-24T00:00:00.334000","2026-02-24T00:00:00.335000","2026-02-24T00:00:00.336000","2026-02-24T00:00:00.337000","2026-02-24T00:00:00.338000","2026-02-24T00:00:00.339000","2026-02-24T00:00:00.340000","2026-02-24T00:00:00.341000","2026-02-24T00:00:00.342000","2026-02-24T00:00:00.343000","2026-02-24T00:00:00.345000","2026-02-24T00:00:00.346000","2026-02-24T00:00:00.347000","2026-02-24T00:00:00.348000","2026-02-24T00:00:00.349000","2026-02-24T00:00:00.350000","2026-02-24T00:00:00.351000","2026-02-24T00:00:00.352000","2026-02-24T00:00:00.354000","2026-02-24T00:00:00.355000","2026-02-24T00:00:00.356000","2026-02-24T00:00:00.357000","2026-02-24T00:00:00.358000","2026-02-24T00:00:00.359000","2026-02-24T00:00:00.360000","2026-02-24T00:00:00.361000","2026-02-24T00:00:00.362000","2026-02-24T00:00:00.364000","2026-02-24T00:00:00.365000","2026-02-24T00:00:00.366000","2026-02-24T00:00:00.367000","2026-02-24T00:00:00.368000","2026-02-24T00:00:00.369000","2026-02-24T00:00:00.370000","2026-02-24T00:00:00.371000","2026-02-24T00:00:00.372000","2026-02-24T00:00:00.374000","2026-02-24T00:00:00.375000","2026-02-24T00:00:00.376000","2026-02-24T00:00:00.377000","2026-02-24T00:00:00.378000","2026-02-24T00:00:00.379000","2026-02-24T00:00:00.380000","2026-02-24T00:00:00.381000","2026-02-24T00:00:00.383000","2026-02-24T00:00:00.384000","2026-02-24T00:00:00.385000","2026-02-24T00:00:00.386000","2026-02-24T00:00:00.387000","2026-02-24T00:00:00.388000","2026-02-24T00:00:00.393000","2026-02-24T00:00:00.394000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.396000","2026-02-24T00:00:00.397000","2026-02-24T00:00:00.399000","2026-02-24T00:00:00.400000","2026-02-24T00:00:00.401000","2026-02-24T00:00:00.402000","2026-02-24T00:00:00.403000","2026-02-24T00:00:00.404000","2026-02-24T00:00:00.406000","2026-02-24T00:00:00.407000","2026-02-24T00:00:00.408000","2026-02-24T00:00:00.409000","2026-02-24T00:00:00.410000","2026-02-24T00:00:00.411000","2026-02-24T00:00:00.413000","2026-02-24T00:00:00.414000","2026-02-24T00:00:00.415000","2026-02-24T00:00:00.416000","2026-02-24T00:00:00.417000","2026-02-24T00:00:00.418000","2026-02-24T00:00:00.420000","2026-02-24T00:00:00.421000","2026-02-24T00:00:00.422000","2026-02-24T00:00:00.423000","2026-02-24T00:00:00.424000","2026-02-24T00:00:00.426000","2026-02-24T00:00:00.427000","2026-02-24T00:00:00.428000","2026-02-24T00:00:00.429000","2026-02-24T00:00:00.430000","2026-02-24T00:00:00.431000","2026-02-24T00:00:00.433000","2026-02-24T00:00:00.434000","2026-02-24T00:00:00.435000","2026-02-24T00:00:00.436000","2026-02-24T00:00:00.438000","2026-02-24T00:00:00.439000","2026-02-24T00:00:00.441000","2026-02-24T00:00:00.442000","2026-02-24T00:00:00.443000","2026-02-24T00:00:00.444000","2026-02-24T00:00:00.445000","2026-02-24T00:00:00.446000","2026-02-24T00:00:00.447000","2026-02-24T00:00:00.448000","2026-02-24T00:00:00.449000","2026-02-24T00:00:00.451000","2026-02-24T00:00:00.452000","2026-02-24T00:00:00.453000","2026-02-24T00:00:00.454000","2026-02-24T00:00:00.455000","2026-02-24T00:00:00.457000","2026-02-24T00:00:00.458000","2026-02-24T00:00:00.459000","2026-02-24T00:00:00.460000","2026-02-24T00:00:00.462000","2026-02-24T00:00:00.463000","2026-02-24T00:00:00.464000","2026-02-24T00:00:00.466000","2026-02-24T00:00:00.467000","2026-02-24T00:00:00.468000","2026-02-24T00:00:00.469000","2026-02-24T00:00:00.470000","2026-02-24T00:00:00.471000","2026-02-24T00:00:00.473000","2026-02-24T00:00:00.474000","2026-02-24T00:00:00.475000","2026-02-24T00:00:00.476000","2026-02-24T00:00:00.477000","2026-02-24T00:00:00.479000","2026-02-24T00:00:00.480000","2026-02-24T00:00:00.481000","2026-02-24T00:00:00.482000","2026-02-24T00:00:00.484000","2026-02-24T00:00:00.485000","2026-02-24T00:00:00.486000","2026-02-24T00:00:00.487000","2026-02-24T00:00:00.489000","2026-02-24T00:00:00.490000","2026-02-24T00:00:00.492000","2026-02-24T00:00:00.493000","2026-02-24T00:00:00.493000","2026-02-24T00:00:00.495000","2026-02-24T00:00:00.496000","2026-02-24T00:00:00.497000","2026-02-24T00:00:00.498000","2026-02-24T00:00:00.500000","2026-02-24T00:00:00.501000","2026-02-24T00:00:00.502000","2026-02-24T00:00:00.503000","2026-02-24T00:00:00.504000","2026-02-24T00:00:00.506000","2026-02-24T00:00:00.507000","2026-02-24T00:00:00.508000","2026-02-24T00:00:00.509000","2026-02-24T00:00:00.511000","2026-02-24T00:00:00.512000","2026-02-24T00:00:00.513000","2026-02-24T00:00:00.518000","2026-02-24T00:00:00.519000","2026-02-24T00:00:00.520000","2026-02-24T00:00:00.521000","2026-02-24T00:00:00.522000","2026-02-24T00:00:00.523000","2026-02-24T00:00:00.525000","2026-02-24T00:00:00.526000","2026-02-24T00:00:00.527000","2026-02-24T00:00:00.528000","2026-02-24T00:00:00.529000","2026-02-24T00:00:00.530000","2026-02-24T00:00:00.532000","2026-02-24T00:00:00.533000","2026-02-24T00:00:00.534000","2026-02-24T00:00:00.535000","2026-02-24T00:00:00.536000","2026-02-24T00:00:00.538000","2026-02-24T00:00:00.539000","2026-02-24T00:00:00.540000","2026-02-24T00:00:00.541000","2026-02-24T00:00:00.542000","2026-02-24T00:00:00.543000","2026-02-24T00:00:00.545000","2026-02-24T00:00:00.546000","2026-02-24T00:00:00.547000","2026-02-24T00:00:00.548000","2026-02-24T00:00:00.549000","2026-02-24T00:00:00.550000","2026-02-24T00:00:00.552000","2026-02-24T00:00:00.553000","2026-02-24T00:00:00.554000","2026-02-24T00:00:00.555000","2026-02-24T00:00:00.556000","2026-02-24T00:00:00.557000","2026-02-24T00:00:00.559000","2026-02-24T00:00:00.560000","2026-02-24T00:00:00.561000","2026-02-24T00:00:00.562000","2026-02-24T00:00:00.563000","2026-02-24T00:00:00.565000","2026-02-24T00:00:00.570000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.573000","2026-02-24T00:00:00.574000","2026-02-24T00:00:00.575000","2026-02-24T00:00:00.576000","2026-02-24T00:00:00.577000","2026-02-24T00:00:00.579000","2026-02-24T00:00:00.580000","2026-02-24T00:00:00.581000","2026-02-24T00:00:00.582000","2026-02-24T00:00:00.583000","2026-02-24T00:00:00.585000","2026-02-24T00:00:00.586000","2026-02-24T00:00:00.587000","2026-02-24T00:00:00.589000","2026-02-24T00:00:00.590000","2026-02-24T00:00:00.591000","2026-02-24T00:00:00.592000","2026-02-24T00:00:00.594000","2026-02-24T00:00:00.595000","2026-02-24T00:00:00.596000","2026-02-24T00:00:00.597000","2026-02-24T00:00:00.598000","2026-02-24T00:00:00.599000","2026-02-24T00:00:00.601000","2026-02-24T00:00:00.602000","2026-02-24T00:00:00.603000","2026-02-24T00:00:00.604000","2026-02-24T00:00:00.605000","2026-02-24T00:00:00.607000","2026-02-24T00:00:00.608000","2026-02-24T00:00:00.609000","2026-02-24T00:00:00.610000","2026-02-24T00:00:00.611000","2026-02-24T00:00:00.613000","2026-02-24T00:00:00.614000","2026-02-24T00:00:00.615000","2026-02-24T00:00:00.617000","2026-02-24T00:00:00.618000","2026-02-24T00:00:00.619000","2026-02-24T00:00:00.620000","2026-02-24T00:00:00.621000","2026-02-24T00:00:00.622000","2026-02-24T00:00:00.624000","2026-02-24T00:00:00.625000","2026-02-24T00:00:00.626000","2026-02-24T00:00:00.627000","2026-02-24T00:00:00.629000","2026-02-24T00:00:00.630000","2026-02-24T00:00:00.631000","2026-02-24T00:00:00.632000","2026-02-24T00:00:00.633000","2026-02-24T00:00:00.635000","2026-02-24T00:00:00.636000","2026-02-24T00:00:00.637000","2026-02-24T00:00:00.638000","2026-02-24T00:00:00.640000","2026-02-24T00:00:00.641000","2026-02-24T00:00:00.642000","2026-02-24T00:00:00.644000","2026-02-24T00:00:00.645000","2026-02-24T00:00:00.646000","2026-02-24T00:00:00.647000","2026-02-24T00:00:00.648000","2026-02-24T00:00:00.649000","2026-02-24T00:00:00.650000","2026-02-24T00:00:00.652000","2026-02-24T00:00:00.653000","2026-02-24T00:00:00.654000","2026-02-24T00:00:00.655000","2026-02-24T00:00:00.656000","2026-02-24T00:00:00.658000","2026-02-24T00:00:00.659000","2026-02-24T00:00:00.660000","2026-02-24T00:00:00.661000","2026-02-24T00:00:00.663000","2026-02-24T00:00:00.664000","2026-02-24T00:00:00.665000","2026-02-24T00:00:00.666000","2026-02-24T00:00:00.668000","2026-02-24T00:00:00.669000","2026-02-24T00:00:00.670000","2026-02-24T00:00:00.672000","2026-02-24T00:00:00.672000","2026-02-24T00:00:00.673000","2026-02-24T00:00:00.675000","2026-02-24T00:00:00.676000","2026-02-24T00:00:00.677000","2026-02-24T00:00:00.678000","2026-02-24T00:00:00.680000","2026-02-24T00:00:00.681000","2026-02-24T00:00:00.682000","2026-02-24T00:00:00.683000","2026-02-24T00:00:00.684000","2026-02-24T00:00:00.686000","2026-02-24T00:00:00.687000","2026-02-24T00:00:00.688000","2026-02-24T00:00:00.689000","2026-02-24T00:00:00.691000","2026-02-24T00:00:00.692000","2026-02-24T00:00:00.693000","2026-02-24T00:00:00.695000","2026-02-24T00:00:00.696000","2026-02-24T00:00:00.697000","2026-02-24T00:00:00.698000","2026-02-24T00:00:00.699000","2026-02-24T00:00:00.700000","2026-02-24T00:00:00.701000","2026-02-24T00:00:00.703000","2026-02-24T00:00:00.704000","2026-02-24T00:00:00.705000","2026-02-24T00:00:00.706000","2026-02-24T00:00:00.708000","2026-02-24T00:00:00.709000","2026-02-24T00:00:00.710000","2026-02-24T00:00:00.711000","2026-02-24T00:00:00.712000","2026-02-24T00:00:00.714000","2026-02-24T00:00:00.715000","2026-02-24T00:00:00.716000","2026-02-24T00:00:00.718000","2026-02-24T00:00:00.719000","2026-02-24T00:00:00.720000","2026-02-24T00:00:00.721000","2026-02-24T00:00:00.722000","2026-02-24T00:00:00.723000","2026-02-24T00:00:00.725000","2026-02-24T00:00:00.726000","2026-02-24T00:00:00.727000","2026-02-24T00:00:00.728000","2026-02-24T00:00:00.729000","2026-02-24T00:00:00.731000","2026-02-24T00:00:00.732000","2026-02-24T00:00:00.733000","2026-02-24T00:00:00.734000","2026-02-24T00:00:00.736000","2026-02-24T00:00:00.737000","2026-02-24T00:00:00.738000","2026-02-24T00:00:00.739000","2026-02-24T00:00:00.741000","2026-02-24T00:00:00.742000","2026-02-24T00:00:00.743000","2026-02-24T00:00:00.745000","2026-02-24T00:00:00.746000","2026-02-24T00:00:00.747000","2026-02-24T00:00:00.748000","2026-02-24T00:00:00.750000","2026-02-24T00:00:00.751000","2026-02-24T00:00:00.752000","2026-02-24T00:00:00.753000","2026-02-24T00:00:00.755000","2026-02-24T00:00:00.756000","2026-02-24T00:00:00.757000","2026-02-24T00:00:00.758000","2026-02-24T00:00:00.760000","2026-02-24T00:00:00.761000","2026-02-24T00:00:00.762000","2026-02-24T00:00:00.763000","2026-02-24T00:00:00.765000","2026-02-24T00:00:00.766000","2026-02-24T00:00:00.767000","2026-02-24T00:00:00.769000","2026-02-24T00:00:00.770000","2026-02-24T00:00:00.771000","2026-02-24T00:00:00.772000","2026-02-24T00:00:00.774000","2026-02-24T00:00:00.775000","2026-02-24T00:00:00.776000","2026-02-24T00:00:00.777000","2026-02-24T00:00:00.779000","2026-02-24T00:00:00.780000","2026-02-24T00:00:00.781000","2026-02-24T00:00:00.783000","2026-02-24T00:00:00.784000","2026-02-24T00:00:00.786000","2026-02-24T00:00:00.787000","2026-02-24T00:00:00.788000","2026-02-24T00:00:00.789000","2026-02-24T00:00:00.790000","2026-02-24T00:00:00.791000","2026-02-24T00:00:00.793000","2026-02-24T00:00:00.794000","2026-02-24T00:00:00.795000","2026-02-24T00:00:00.796000","2026-02-24T00:00:00.798000","2026-02-24T00:00:00.799000","2026-02-24T00:00:00.801000","2026-02-24T00:00:00.802000","2026-02-24T00:00:00.803000","2026-02-24T00:00:00.804000","2026-02-24T00:00:00.806000","2026-02-24T00:00:00.807000","2026-02-24T00:00:00.812000","2026-02-24T00:00:00.813000","2026-02-24T00:00:00.814000","2026-02-24T00:00:00.815000","2026-02-24T00:00:00.816000","2026-02-24T00:00:00.817000","2026-02-24T00:00:00.819000","2026-02-24T00:00:00.820000","2026-02-24T00:00:00.821000","2026-02-24T00:00:00.823000","2026-02-24T00:00:00.824000","2026-02-24T00:00:00.825000","2026-02-24T00:00:00.827000","2026-02-24T00:00:00.828000","2026-02-24T00:00:00.829000","2026-02-24T00:00:00.831000","2026-02-24T00:00:00.832000","2026-02-24T00:00:00.833000","2026-02-24T00:00:00.834000","2026-02-24T00:00:00.835000","2026-02-24T00:00:00.837000","2026-02-24T00:00:00.838000","2026-02-24T00:00:00.839000","2026-02-24T00:00:00.840000","2026-02-24T00:00:00.842000","2026-02-24T00:00:00.843000","2026-02-24T00:00:00.844000","2026-02-24T00:00:00.845000","2026-02-24T00:00:00.847000","2026-02-24T00:00:00.848000","2026-02-24T00:00:00.849000","2026-02-24T00:00:00.851000","2026-02-24T00:00:00.852000","2026-02-24T00:00:00.853000","2026-02-24T00:00:00.854000","2026-02-24T00:00:00.856000","2026-02-24T00:00:00.857000","2026-02-24T00:00:00.858000","2026-02-24T00:00:00.860000","2026-02-24T00:00:00.861000","2026-02-24T00:00:00.862000","2026-02-24T00:00:00.863000","2026-02-24T00:00:00.865000","2026-02-24T00:00:00.866000","2026-02-24T00:00:00.867000","2026-02-24T00:00:00.868000","2026-02-24T00:00:00.870000","2026-02-24T00:00:00.871000","2026-02-24T00:00:00.872000","2026-02-24T00:00:00.874000","2026-02-24T00:00:00.875000","2026-02-24T00:00:00.876000","2026-02-24T00:00:00.877000","2026-02-24T00:00:00.879000","2026-02-24T00:00:00.880000","2026-02-24T00:00:00.881000","2026-02-24T00:00:00.882000","2026-02-24T00:00:00.884000","2026-02-24T00:00:00.885000","2026-02-24T00:00:00.886000","2026-02-24T00:00:00.888000","2026-02-24T00:00:00.889000","2026-02-24T00:00:00.890000","2026-02-24T00:00:00.891000","2026-02-24T00:00:00.893000","2026-02-24T00:00:00.894000","2026-02-24T00:00:00.895000","2026-02-24T00:00:00.896000","2026-02-24T00:00:00.898000","2026-02-24T00:00:00.899000","2026-02-24T00:00:00.900000","2026-02-24T00:00:00.902000","2026-02-24T00:00:00.903000","2026-02-24T00:00:00.904000","2026-02-24T00:00:00.905000","2026-02-24T00:00:00.907000","2026-02-24T00:00:00.908000","2026-02-24T00:00:00.909000","2026-02-24T00:00:00.911000","2026-02-24T00:00:00.912000","2026-02-24T00:00:00.913000","2026-02-24T00:00:00.915000","2026-02-24T00:00:00.916000","2026-02-24T00:00:00.917000","2026-02-24T00:00:00.919000","2026-02-24T00:00:00.920000","2026-02-24T00:00:00.921000","2026-02-24T00:00:00.923000","2026-02-24T00:00:00.924000","2026-02-24T00:00:00.925000","2026-02-24T00:00:00.926000","2026-02-24T00:00:00.928000","2026-02-24T00:00:00.929000","2026-02-24T00:00:00.930000","2026-02-24T00:00:00.932000","2026-02-24T00:00:00.933000","2026-02-24T00:00:00.934000","2026-02-24T00:00:00.936000","2026-02-24T00:00:00.937000","2026-02-24T00:00:00.938000","2026-02-24T00:00:00.940000","2026-02-24T00:00:00.941000","2026-02-24T00:00:00.943000","2026-02-24T00:00:00.944000","2026-02-24T00:00:00.945000","2026-02-24T00:00:00.947000","2026-02-24T00:00:00.948000","2026-02-24T00:00:00.950000","2026-02-24T00:00:00.951000","2026-02-24T00:00:00.952000","2026-02-24T00:00:00.953000","2026-02-24T00:00:00.954000","2026-02-24T00:00:00.956000","2026-02-24T00:00:00.957000","2026-02-24T00:00:00.958000","2026-02-24T00:00:00.960000","2026-02-24T00:00:00.961000","2026-02-24T00:00:00.962000","2026-02-24T00:00:00.964000","2026-02-24T00:00:00.965000","2026-02-24T00:00:00.966000","2026-02-24T00:00:00.968000","2026-02-24T00:00:00.969000","2026-02-24T00:00:00.970000","2026-02-24T00:00:00.971000","2026-02-24T00:00:00.973000","2026-02-24T00:00:00.974000","2026-02-24T00:00:00.975000","2026-02-24T00:00:00.977000","2026-02-24T00:00:00.978000","2026-02-24T00:00:00.979000","2026-02-24T00:00:00.981000","2026-02-24T00:00:00.982000","2026-02-24T00:00:00.983000","2026-02-24T00:00:00.985000","2026-02-24T00:00:00.986000","2026-02-24T00:00:00.987000","2026-02-24T00:00:00.989000","2026-02-24T00:00:00.990000","2026-02-24T00:00:00.991000","2026-02-24T00:00:00.993000","2026-02-24T00:00:00.994000","2026-02-24T00:00:00.995000","2026-02-24T00:00:00.997000","2026-02-24T00:00:00.998000","2026-02-24T00:00:00.999000","2026-02-24T00:00:01.001000","2026-02-24T00:00:01.002000","2026-02-24T00:00:01.003000","2026-02-24T00:00:01.004000","2026-02-24T00:00:01.006000","2026-02-24T00:00:01.007000","2026-02-24T00:00:01.008000","2026-02-24T00:00:01.010000","2026-02-24T00:00:01.011000","2026-02-24T00:00:01.013000","2026-02-24T00:00:01.014000","2026-02-24T00:00:01.015000","2026-02-24T00:00:01.017000","2026-02-24T00:00:01.018000","2026-02-24T00:00:01.019000","2026-02-24T00:00:01.020000","2026-02-24T00:00:01.022000","2026-02-24T00:00:01.023000","2026-02-24T00:00:01.024000","2026-02-24T00:00:01.026000","2026-02-24T00:00:01.027000","2026-02-24T00:00:01.028000","2026-02-24T00:00:01.030000","2026-02-24T00:00:01.031000","2026-02-24T00:00:01.032000","2026-02-24T00:00:01.034000","2026-02-24T00:00:01.035000","2026-02-24T00:00:01.036000","2026-02-24T00:00:01.037000","2026-02-24T00:00:01.039000","2026-02-24T00:00:01.040000","2026-02-24T00:00:01.041000","2026-02-24T00:00:01.043000","2026-02-24T00:00:01.044000","2026-02-24T00:00:01.046000","2026-02-24T00:00:01.047000","2026-02-24T00:00:01.048000","2026-02-24T00:00:01.049000","2026-02-24T00:00:01.051000","2026-02-24T00:00:01.052000","2026-02-24T00:00:01.053000","2026-02-24T00:00:01.055000","2026-02-24T00:00:01.056000","2026-02-24T00:00:01.057000","2026-02-24T00:00:01.059000","2026-02-24T00:00:01.060000","2026-02-24T00:00:01.061000","2026-02-24T00:00:01.063000","2026-02-24T00:00:01.064000","2026-02-24T00:00:01.065000","2026-02-24T00:00:01.067000","2026-02-24T00:00:01.068000","2026-02-24T00:00:01.069000","2026-02-24T00:00:01.071000","2026-02-24T00:00:01.072000","2026-02-24T00:00:01.073000","2026-02-24T00:00:01.075000","2026-02-24T00:00:01.076000","2026-02-24T00:00:01.078000","2026-02-24T00:00:01.079000","2026-02-24T00:00:01.080000","2026-02-24T00:00:01.082000","2026-02-24T00:00:01.083000","2026-02-24T00:00:01.084000","2026-02-24T00:00:01.086000","2026-02-24T00:00:01.087000","2026-02-24T00:00:01.089000","2026-02-24T00:00:01.090000","2026-02-24T00:00:01.091000","2026-02-24T00:00:01.093000","2026-02-24T00:00:01.094000","2026-02-24T00:00:01.095000","2026-02-24T00:00:01.097000","2026-02-24T00:00:01.098000","2026-02-24T00:00:01.099000","2026-02-24T00:00:01.101000","2026-02-24T00:00:01.102000","2026-02-24T00:00:00.250000","2026-02-24T00:00:00.250000","2026-02-24T00:00:00.252000","2026-02-24T00:00:00.253000","2026-02-24T00:00:00.254000","2026-02-24T00:00:00.255000","2026-02-24T00:00:00.256000","2026-02-24T00:00:00.257000","2026-02-24T00:00:00.258000","2026-02-24T00:00:00.259000","2026-02-24T00:00:00.261000","2026-02-24T00:00:00.262000","2026-02-24T00:00:00.263000","2026-02-24T00:00:00.264000","2026-02-24T00:00:00.265000","2026-02-24T00:00:00.266000","2026-02-24T00:00:00.267000","2026-02-24T00:00:00.269000","2026-02-24T00:00:00.270000","2026-02-24T00:00:00.271000","2026-02-24T00:00:00.272000","2026-02-24T00:00:00.273000","2026-02-24T00:00:00.275000","2026-02-24T00:00:00.276000","2026-02-24T00:00:00.277000","2026-02-24T00:00:00.279000","2026-02-24T00:00:00.280000","2026-02-24T00:00:00.281000","2026-02-24T00:00:00.282000","2026-02-24T00:00:00.283000","2026-02-24T00:00:00.284000","2026-02-24T00:00:00.285000","2026-02-24T00:00:00.287000","2026-02-24T00:00:00.288000","2026-02-24T00:00:00.289000","2026-02-24T00:00:00.290000","2026-02-24T00:00:00.292000","2026-02-24T00:00:00.293000","2026-02-24T00:00:00.294000","2026-02-24T00:00:00.295000","2026-02-24T00:00:00.296000","2026-02-24T00:00:00.297000","2026-02-24T00:00:00.298000","2026-02-24T00:00:00.300000","2026-02-24T00:00:00.301000","2026-02-24T00:00:00.302000","2026-02-24T00:00:00.303000","2026-02-24T00:00:00.305000","2026-02-24T00:00:00.306000","2026-02-24T00:00:00.307000","2026-02-24T00:00:00.309000","2026-02-24T00:00:00.310000","2026-02-24T00:00:00.312000","2026-02-24T00:00:00.313000","2026-02-24T00:00:00.315000","2026-02-24T00:00:00.316000","2026-02-24T00:00:00.317000","2026-02-24T00:00:00.318000","2026-02-24T00:00:00.319000","2026-02-24T00:00:00.320000","2026-02-24T00:00:00.321000","2026-02-24T00:00:00.322000","2026-02-24T00:00:00.324000","2026-02-24T00:00:00.325000","2026-02-24T00:00:00.326000","2026-02-24T00:00:00.327000","2026-02-24T00:00:00.328000","2026-02-24T00:00:00.333000","2026-02-24T00:00:00.334000","2026-02-24T00:00:00.335000","2026-02-24T00:00:00.336000","2026-02-24T00:00:00.337000","2026-02-24T00:00:00.338000","2026-02-24T00:00:00.339000","2026-02-24T00:00:00.340000","2026-02-24T00:00:00.341000","2026-02-24T00:00:00.342000","2026-02-24T00:00:00.344000","2026-02-24T00:00:00.345000","2026-02-24T00:00:00.346000","2026-02-24T00:00:00.347000","2026-02-24T00:00:00.348000","2026-02-24T00:00:00.349000","2026-02-24T00:00:00.350000","2026-02-24T00:00:00.351000","2026-02-24T00:00:00.352000","2026-02-24T00:00:00.354000","2026-02-24T00:00:00.355000","2026-02-24T00:00:00.356000","2026-02-24T00:00:00.357000","2026-02-24T00:00:00.358000","2026-02-24T00:00:00.359000","2026-02-24T00:00:00.360000","2026-02-24T00:00:00.361000","2026-02-24T00:00:00.363000","2026-02-24T00:00:00.364000","2026-02-24T00:00:00.365000","2026-02-24T00:00:00.366000","2026-02-24T00:00:00.367000","2026-02-24T00:00:00.368000","2026-02-24T00:00:00.369000","2026-02-24T00:00:00.370000","2026-02-24T00:00:00.371000","2026-02-24T00:00:00.373000","2026-02-24T00:00:00.374000","2026-02-24T00:00:00.375000","2026-02-24T00:00:00.376000","2026-02-24T00:00:00.377000","2026-02-24T00:00:00.378000","2026-02-24T00:00:00.379000","2026-02-24T00:00:00.380000","2026-02-24T00:00:00.382000","2026-02-24T00:00:00.383000","2026-02-24T00:00:00.384000","2026-02-24T00:00:00.385000","2026-02-24T00:00:00.386000","2026-02-24T00:00:00.387000","2026-02-24T00:00:00.388000","2026-02-24T00:00:00.393000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.396000","2026-02-24T00:00:00.398000","2026-02-24T00:00:00.399000","2026-02-24T00:00:00.400000","2026-02-24T00:00:00.401000","2026-02-24T00:00:00.402000","2026-02-24T00:00:00.403000","2026-02-24T00:00:00.405000","2026-02-24T00:00:00.406000","2026-02-24T00:00:00.407000","2026-02-24T00:00:00.408000","2026-02-24T00:00:00.409000","2026-02-24T00:00:00.410000","2026-02-24T00:00:00.412000","2026-02-24T00:00:00.413000","2026-02-24T00:00:00.414000","2026-02-24T00:00:00.415000","2026-02-24T00:00:00.416000","2026-02-24T00:00:00.417000","2026-02-24T00:00:00.419000","2026-02-24T00:00:00.420000","2026-02-24T00:00:00.421000","2026-02-24T00:00:00.422000","2026-02-24T00:00:00.423000","2026-02-24T00:00:00.424000","2026-02-24T00:00:00.426000","2026-02-24T00:00:00.427000","2026-02-24T00:00:00.428000","2026-02-24T00:00:00.429000","2026-02-24T00:00:00.430000","2026-02-24T00:00:00.431000","2026-02-24T00:00:00.433000","2026-02-24T00:00:00.434000","2026-02-24T00:00:00.435000","2026-02-24T00:00:00.437000","2026-02-24T00:00:00.438000","2026-02-24T00:00:00.439000","2026-02-24T00:00:00.441000","2026-02-24T00:00:00.442000","2026-02-24T00:00:00.443000","2026-02-24T00:00:00.444000","2026-02-24T00:00:00.445000","2026-02-24T00:00:00.446000","2026-02-24T00:00:00.447000","2026-02-24T00:00:00.448000","2026-02-24T00:00:00.450000","2026-02-24T00:00:00.451000","2026-02-24T00:00:00.452000","2026-02-24T00:00:00.453000","2026-02-24T00:00:00.454000","2026-02-24T00:00:00.456000","2026-02-24T00:00:00.457000","2026-02-24T00:00:00.458000","2026-02-24T00:00:00.459000","2026-02-24T00:00:00.460000","2026-02-24T00:00:00.462000","2026-02-24T00:00:00.463000","2026-02-24T00:00:00.464000","2026-02-24T00:00:00.466000","2026-02-24T00:00:00.467000","2026-02-24T00:00:00.468000","2026-02-24T00:00:00.469000","2026-02-24T00:00:00.470000","2026-02-24T00:00:00.472000","2026-02-24T00:00:00.473000","2026-02-24T00:00:00.474000","2026-02-24T00:00:00.475000","2026-02-24T00:00:00.476000","2026-02-24T00:00:00.478000","2026-02-24T00:00:00.479000","2026-02-24T00:00:00.480000","2026-02-24T00:00:00.481000","2026-02-24T00:00:00.482000","2026-02-24T00:00:00.484000","2026-02-24T00:00:00.485000","2026-02-24T00:00:00.486000","2026-02-24T00:00:00.488000","2026-02-24T00:00:00.489000","2026-02-24T00:00:00.490000","2026-02-24T00:00:00.492000","2026-02-24T00:00:00.493000","2026-02-24T00:00:00.494000","2026-02-24T00:00:00.495000","2026-02-24T00:00:00.496000","2026-02-24T00:00:00.497000","2026-02-24T00:00:00.499000","2026-02-24T00:00:00.500000","2026-02-24T00:00:00.501000","2026-02-24T00:00:00.502000","2026-02-24T00:00:00.503000","2026-02-24T00:00:00.505000","2026-02-24T00:00:00.506000","2026-02-24T00:00:00.507000","2026-02-24T00:00:00.508000","2026-02-24T00:00:00.509000","2026-02-24T00:00:00.511000","2026-02-24T00:00:00.512000","2026-02-24T00:00:00.513000","2026-02-24T00:00:00.518000","2026-02-24T00:00:00.519000","2026-02-24T00:00:00.520000","2026-02-24T00:00:00.521000","2026-02-24T00:00:00.522000","2026-02-24T00:00:00.524000","2026-02-24T00:00:00.525000","2026-02-24T00:00:00.526000","2026-02-24T00:00:00.527000","2026-02-24T00:00:00.528000","2026-02-24T00:00:00.334000","2026-02-24T00:00:00.335000","2026-02-24T00:00:00.336000","2026-02-24T00:00:00.337000","2026-02-24T00:00:00.338000","2026-02-24T00:00:00.339000","2026-02-24T00:00:00.340000","2026-02-24T00:00:00.341000","2026-02-24T00:00:00.342000","2026-02-24T00:00:00.343000","2026-02-24T00:00:00.345000","2026-02-24T00:00:00.346000","2026-02-24T00:00:00.347000","2026-02-24T00:00:00.348000","2026-02-24T00:00:00.349000","2026-02-24T00:00:00.350000","2026-02-24T00:00:00.351000","2026-02-24T00:00:00.352000","2026-02-24T00:00:00.354000","2026-02-24T00:00:00.355000","2026-02-24T00:00:00.356000","2026-02-24T00:00:00.357000","2026-02-24T00:00:00.358000","2026-02-24T00:00:00.359000","2026-02-24T00:00:00.360000","2026-02-24T00:00:00.361000","2026-02-24T00:00:00.363000","2026-02-24T00:00:00.364000","2026-02-24T00:00:00.365000","2026-02-24T00:00:00.366000","2026-02-24T00:00:00.367000","2026-02-24T00:00:00.368000","2026-02-24T00:00:00.369000","2026-02-24T00:00:00.370000","2026-02-24T00:00:00.371000","2026-02-24T00:00:00.373000","2026-02-24T00:00:00.374000","2026-02-24T00:00:00.375000","2026-02-24T00:00:00.376000","2026-02-24T00:00:00.377000","2026-02-24T00:00:00.378000","2026-02-24T00:00:00.379000","2026-02-24T00:00:00.380000","2026-02-24T00:00:00.382000","2026-02-24T00:00:00.383000","2026-02-24T00:00:00.384000","2026-02-24T00:00:00.385000","2026-02-24T00:00:00.386000","2026-02-24T00:00:00.387000","2026-02-24T00:00:00.388000","2026-02-24T00:00:00.393000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.396000","2026-02-24T00:00:00.397000","2026-02-24T00:00:00.399000","2026-02-24T00:00:00.400000","2026-02-24T00:00:00.401000","2026-02-24T00:00:00.402000","2026-02-24T00:00:00.403000","2026-02-24T00:00:00.404000","2026-02-24T00:00:00.406000","2026-02-24T00:00:00.407000","2026-02-24T00:00:00.408000","2026-02-24T00:00:00.409000","2026-02-24T00:00:00.410000","2026-02-24T00:00:00.412000","2026-02-24T00:00:00.413000","2026-02-24T00:00:00.414000","2026-02-24T00:00:00.415000","2026-02-24T00:00:00.416000","2026-02-24T00:00:00.417000","2026-02-24T00:00:00.418000","2026-02-24T00:00:00.420000","2026-02-24T00:00:00.421000","2026-02-24T00:00:00.422000","2026-02-24T00:00:00.423000","2026-02-24T00:00:00.424000","2026-02-24T00:00:00.426000","2026-02-24T00:00:00.427000","2026-02-24T00:00:00.428000","2026-02-24T00:00:00.429000","2026-02-24T00:00:00.430000","2026-02-24T00:00:00.431000","2026-02-24T00:00:00.433000","2026-02-24T00:00:00.434000","2026-02-24T00:00:00.435000","2026-02-24T00:00:00.437000","2026-02-24T00:00:00.438000","2026-02-24T00:00:00.439000","2026-02-24T00:00:00.441000","2026-02-24T00:00:00.442000","2026-02-24T00:00:00.443000","2026-02-24T00:00:00.444000","2026-02-24T00:00:00.445000","2026-02-24T00:00:00.446000","2026-02-24T00:00:00.447000","2026-02-24T00:00:00.448000","2026-02-24T00:00:00.450000","2026-02-24T00:00:00.451000","2026-02-24T00:00:00.452000","2026-02-24T00:00:00.453000","2026-02-24T00:00:00.454000","2026-02-24T00:00:00.455000","2026-02-24T00:00:00.457000","2026-02-24T00:00:00.458000","2026-02-24T00:00:00.459000","2026-02-24T00:00:00.460000","2026-02-24T00:00:00.462000","2026-02-24T00:00:00.463000","2026-02-24T00:00:00.464000","2026-02-24T00:00:00.466000","2026-02-24T00:00:00.467000","2026-02-24T00:00:00.468000","2026-02-24T00:00:00.469000","2026-02-24T00:00:00.470000","2026-02-24T00:00:00.471000","2026-02-24T00:00:00.473000","2026-02-24T00:00:00.474000","2026-02-24T00:00:00.475000","2026-02-24T00:00:00.476000","2026-02-24T00:00:00.478000","2026-02-24T00:00:00.479000","2026-02-24T00:00:00.480000","2026-02-24T00:00:00.481000","2026-02-24T00:00:00.482000","2026-02-24T00:00:00.484000","2026-02-24T00:00:00.485000","2026-02-24T00:00:00.486000","2026-02-24T00:00:00.487000","2026-02-24T00:00:00.489000","2026-02-24T00:00:00.490000","2026-02-24T00:00:00.492000","2026-02-24T00:00:00.493000","2026-02-24T00:00:00.494000","2026-02-24T00:00:00.495000","2026-02-24T00:00:00.496000","2026-02-24T00:00:00.497000","2026-02-24T00:00:00.499000","2026-02-24T00:00:00.500000","2026-02-24T00:00:00.501000","2026-02-24T00:00:00.502000","2026-02-24T00:00:00.503000","2026-02-24T00:00:00.504000","2026-02-24T00:00:00.506000","2026-02-24T00:00:00.507000","2026-02-24T00:00:00.508000","2026-02-24T00:00:00.509000","2026-02-24T00:00:00.511000","2026-02-24T00:00:00.512000","2026-02-24T00:00:00.513000","2026-02-24T00:00:00.518000","2026-02-24T00:00:00.519000","2026-02-24T00:00:00.520000","2026-02-24T00:00:00.521000","2026-02-24T00:00:00.522000","2026-02-24T00:00:00.524000","2026-02-24T00:00:00.525000","2026-02-24T00:00:00.526000","2026-02-24T00:00:00.527000","2026-02-24T00:00:00.528000","2026-02-24T00:00:00.530000","2026-02-24T00:00:00.530000","2026-02-24T00:00:00.532000","2026-02-24T00:00:00.533000","2026-02-24T00:00:00.534000","2026-02-24T00:00:00.535000","2026-02-24T00:00:00.536000","2026-02-24T00:00:00.538000","2026-02-24T00:00:00.539000","2026-02-24T00:00:00.540000","2026-02-24T00:00:00.541000","2026-02-24T00:00:00.542000","2026-02-24T00:00:00.543000","2026-02-24T00:00:00.545000","2026-02-24T00:00:00.546000","2026-02-24T00:00:00.547000","2026-02-24T00:00:00.548000","2026-02-24T00:00:00.549000","2026-02-24T00:00:00.550000","2026-02-24T00:00:00.552000","2026-02-24T00:00:00.553000","2026-02-24T00:00:00.554000","2026-02-24T00:00:00.555000","2026-02-24T00:00:00.556000","2026-02-24T00:00:00.558000","2026-02-24T00:00:00.559000","2026-02-24T00:00:00.560000","2026-02-24T00:00:00.561000","2026-02-24T00:00:00.562000","2026-02-24T00:00:00.563000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.395000","2026-02-24T00:00:00.396000","2026-02-24T00:00:00.398000","2026-02-24T00:00:00.399000","2026-02-24T00:00:00.400000","2026-02-24T00:00:00.401000","2026-02-24T00:00:00.402000","2026-02-24T00:00:00.403000","2026-02-24T00:00:00.404000","2026-02-24T00:00:00.406000","2026-02-24T00:00:00.407000","2026-02-24T00:00:00.408000","2026-02-24T00:00:00.409000","2026-02-24T00:00:00.410000","2026-02-24T00:00:00.412000","2026-02-24T00:00:00.413000","2026-02-24T00:00:00.414000","2026-02-24T00:00:00.415000","2026-02-24T00:00:00.416000","2026-02-24T00:00:00.417000","2026-02-24T00:00:00.419000","2026-02-24T00:00:00.420000","2026-02-24T00:00:00.421000","2026-02-24T00:00:00.422000","2026-02-24T00:00:00.423000","2026-02-24T00:00:00.424000","2026-02-24T00:00:00.426000","2026-02-24T00:00:00.427000","2026-02-24T00:00:00.428000","2026-02-24T00:00:00.429000","2026-02-24T00:00:00.430000","2026-02-24T00:00:00.431000","2026-02-24T00:00:00.433000","2026-02-24T00:00:00.434000","2026-02-24T00:00:00.435000","2026-02-24T00:00:00.437000","2026-02-24T00:00:00.438000","2026-02-24T00:00:00.439000","2026-02-24T00:00:00.441000","2026-02-24T00:00:00.442000","2026-02-24T00:00:00.443000","2026-02-24T00:00:00.444000","2026-02-24T00:00:00.445000","2026-02-24T00:00:00.446000","2026-02-24T00:00:00.447000","2026-02-24T00:00:00.448000","2026-02-24T00:00:00.450000","2026-02-24T00:00:00.451000","2026-02-24T00:00:00.452000","2026-02-24T00:00:00.453000","2026-02-24T00:00:00.454000","2026-02-24T00:00:00.456000","2026-02-24T00:00:00.457000","2026-02-24T00:00:00.458000","2026-02-24T00:00:00.459000","2026-02-24T00:00:00.460000","2026-02-24T00:00:00.462000","2026-02-24T00:00:00.463000","2026-02-24T00:00:00.464000","2026-02-24T00:00:00.466000","2026-02-24T00:00:00.467000","2026-02-24T00:00:00.468000","2026-02-24T00:00:00.469000","2026-02-24T00:00:00.470000","2026-02-24T00:00:00.471000","2026-02-24T00:00:00.473000","2026-02-24T00:00:00.474000","2026-02-24T00:00:00.475000","2026-02-24T00:00:00.476000","2026-02-24T00:00:00.478000","2026-02-24T00:00:00.479000","2026-02-24T00:00:00.480000","2026-02-24T00:00:00.481000","2026-02-24T00:00:00.482000","2026-02-24T00:00:00.484000","2026-02-24T00:00:00.485000","2026-02-24T00:00:00.486000","2026-02-24T00:00:00.488000","2026-02-24T00:00:00.489000","2026-02-24T00:00:00.490000","2026-02-24T00:00:00.492000","2026-02-24T00:00:00.493000","2026-02-24T00:00:00.494000","2026-02-24T00:00:00.495000","2026-02-24T00:00:00.496000","2026-02-24T00:00:00.497000","2026-02-24T00:00:00.499000","2026-02-24T00:00:00.500000","2026-02-24T00:00:00.501000","2026-02-24T00:00:00.502000","2026-02-24T00:00:00.503000","2026-02-24T00:00:00.505000","2026-02-24T00:00:00.506000","2026-02-24T00:00:00.507000","2026-02-24T00:00:00.508000","2026-02-24T00:00:00.509000","2026-02-24T00:00:00.511000","2026-02-24T00:00:00.512000","2026-02-24T00:00:00.513000","2026-02-24T00:00:00.519000","2026-02-24T00:00:00.520000","2026-02-24T00:00:00.521000","2026-02-24T00:00:00.522000","2026-02-24T00:00:00.524000","2026-02-24T00:00:00.525000","2026-02-24T00:00:00.526000","2026-02-24T00:00:00.527000","2026-02-24T00:00:00.528000","2026-02-24T00:00:00.530000","2026-02-24T00:00:00.530000","2026-02-24T00:00:00.532000","2026-02-24T00:00:00.533000","2026-02-24T00:00:00.534000","2026-02-24T00:00:00.535000","2026-02-24T00:00:00.536000","2026-02-24T00:00:00.538000","2026-02-24T00:00:00.539000","2026-02-24T00:00:00.540000","2026-02-24T00:00:00.541000","2026-02-24T00:00:00.542000","2026-02-24T00:00:00.543000","2026-02-24T00:00:00.545000","2026-02-24T00:00:00.546000","2026-02-24T00:00:00.547000","2026-02-24T00:00:00.548000","2026-02-24T00:00:00.549000","2026-02-24T00:00:00.550000","2026-02-24T00:00:00.552000","2026-02-24T00:00:00.553000","2026-02-24T00:00:00.554000","2026-02-24T00:00:00.555000","2026-02-24T00:00:00.556000","2026-02-24T00:00:00.558000","2026-02-24T00:00:00.559000","2026-02-24T00:00:00.560000","2026-02-24T00:00:00.561000","2026-02-24T00:00:00.562000","2026-02-24T00:00:00.563000","2026-02-24T00:00:00.565000","2026-02-24T00:00:00.570000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.573000","2026-02-24T00:00:00.574000","2026-02-24T00:00:00.575000","2026-02-24T00:00:00.576000","2026-02-24T00:00:00.578000","2026-02-24T00:00:00.579000","2026-02-24T00:00:00.580000","2026-02-24T00:00:00.581000","2026-02-24T00:00:00.582000","2026-02-24T00:00:00.584000","2026-02-24T00:00:00.585000","2026-02-24T00:00:00.586000","2026-02-24T00:00:00.587000","2026-02-24T00:00:00.589000","2026-02-24T00:00:00.590000","2026-02-24T00:00:00.591000","2026-02-24T00:00:00.592000","2026-02-24T00:00:00.594000","2026-02-24T00:00:00.595000","2026-02-24T00:00:00.596000","2026-02-24T00:00:00.597000","2026-02-24T00:00:00.598000","2026-02-24T00:00:00.599000","2026-02-24T00:00:00.601000","2026-02-24T00:00:00.602000","2026-02-24T00:00:00.603000","2026-02-24T00:00:00.604000","2026-02-24T00:00:00.605000","2026-02-24T00:00:00.607000","2026-02-24T00:00:00.608000","2026-02-24T00:00:00.609000","2026-02-24T00:00:00.610000","2026-02-24T00:00:00.612000","2026-02-24T00:00:00.613000","2026-02-24T00:00:00.614000","2026-02-24T00:00:00.615000","2026-02-24T00:00:00.617000","2026-02-24T00:00:00.618000","2026-02-24T00:00:00.619000","2026-02-24T00:00:00.620000","2026-02-24T00:00:00.621000","2026-02-24T00:00:00.622000","2026-02-24T00:00:00.624000","2026-02-24T00:00:00.625000","2026-02-24T00:00:00.626000","2026-02-24T00:00:00.627000","2026-02-24T00:00:00.629000","2026-02-24T00:00:00.630000","2026-02-24T00:00:00.631000","2026-02-24T00:00:00.632000","2026-02-24T00:00:00.633000","2026-02-24T00:00:00.635000","2026-02-24T00:00:00.636000","2026-02-24T00:00:00.637000","2026-02-24T00:00:00.638000","2026-02-24T00:00:00.640000","2026-02-24T00:00:00.641000","2026-02-24T00:00:00.642000","2026-02-24T00:00:00.644000","2026-02-24T00:00:00.645000","2026-02-24T00:00:00.646000","2026-02-24T00:00:00.647000","2026-02-24T00:00:00.648000","2026-02-24T00:00:00.649000","2026-02-24T00:00:00.650000","2026-02-24T00:00:00.652000","2026-02-24T00:00:00.653000","2026-02-24T00:00:00.654000","2026-02-24T00:00:00.655000","2026-02-24T00:00:00.657000","2026-02-24T00:00:00.658000","2026-02-24T00:00:00.659000","2026-02-24T00:00:00.660000","2026-02-24T00:00:00.661000","2026-02-24T00:00:00.663000","2026-02-24T00:00:00.664000","2026-02-24T00:00:00.665000","2026-02-24T00:00:00.666000","2026-02-24T00:00:00.668000","2026-02-24T00:00:00.669000","2026-02-24T00:00:00.670000","2026-02-24T00:00:00.672000","2026-02-24T00:00:00.673000","2026-02-24T00:00:00.674000","2026-02-24T00:00:00.675000","2026-02-24T00:00:00.676000","2026-02-24T00:00:00.677000","2026-02-24T00:00:00.678000","2026-02-24T00:00:00.680000","2026-02-24T00:00:00.681000","2026-02-24T00:00:00.682000","2026-02-24T00:00:00.683000","2026-02-24T00:00:00.684000","2026-02-24T00:00:00.686000","2026-02-24T00:00:00.687000","2026-02-24T00:00:00.688000","2026-02-24T00:00:00.689000","2026-02-24T00:00:00.691000","2026-02-24T00:00:00.692000","2026-02-24T00:00:00.693000","2026-02-24T00:00:00.695000","2026-02-24T00:00:00.696000","2026-02-24T00:00:00.697000","2026-02-24T00:00:00.698000","2026-02-24T00:00:00.699000","2026-02-24T00:00:00.700000","2026-02-24T00:00:00.702000","2026-02-24T00:00:00.703000","2026-02-24T00:00:00.704000","2026-02-24T00:00:00.705000","2026-02-24T00:00:00.706000","2026-02-24T00:00:00.708000","2026-02-24T00:00:00.709000","2026-02-24T00:00:00.710000","2026-02-24T00:00:00.711000","2026-02-24T00:00:00.712000","2026-02-24T00:00:00.714000","2026-02-24T00:00:00.715000","2026-02-24T00:00:00.716000","2026-02-24T00:00:00.718000","2026-02-24T00:00:00.719000","2026-02-24T00:00:00.720000","2026-02-24T00:00:00.721000","2026-02-24T00:00:00.722000","2026-02-24T00:00:00.723000","2026-02-24T00:00:00.725000","2026-02-24T00:00:00.726000","2026-02-24T00:00:00.727000","2026-02-24T00:00:00.728000","2026-02-24T00:00:00.729000","2026-02-24T00:00:00.731000","2026-02-24T00:00:00.732000","2026-02-24T00:00:00.733000","2026-02-24T00:00:00.734000","2026-02-24T00:00:00.736000","2026-02-24T00:00:00.737000","2026-02-24T00:00:00.738000","2026-02-24T00:00:00.739000","2026-02-24T00:00:00.741000","2026-02-24T00:00:00.742000","2026-02-24T00:00:00.743000","2026-02-24T00:00:00.745000","2026-02-24T00:00:00.746000","2026-02-24T00:00:00.747000","2026-02-24T00:00:00.748000","2026-02-24T00:00:00.750000","2026-02-24T00:00:00.751000","2026-02-24T00:00:00.752000","2026-02-24T00:00:00.753000","2026-02-24T00:00:00.755000","2026-02-24T00:00:00.756000","2026-02-24T00:00:00.757000","2026-02-24T00:00:00.758000","2026-02-24T00:00:00.760000","2026-02-24T00:00:00.761000","2026-02-24T00:00:00.762000","2026-02-24T00:00:00.763000","2026-02-24T00:00:00.765000","2026-02-24T00:00:00.766000","2026-02-24T00:00:00.767000","2026-02-24T00:00:00.769000","2026-02-24T00:00:00.770000","2026-02-24T00:00:00.771000","2026-02-24T00:00:00.772000","2026-02-24T00:00:00.774000","2026-02-24T00:00:00.775000","2026-02-24T00:00:00.776000","2026-02-24T00:00:00.777000","2026-02-24T00:00:00.779000","2026-02-24T00:00:00.780000","2026-02-24T00:00:00.781000","2026-02-24T00:00:00.783000","2026-02-24T00:00:00.784000","2026-02-24T00:00:00.786000","2026-02-24T00:00:00.787000","2026-02-24T00:00:00.788000","2026-02-24T00:00:00.789000","2026-02-24T00:00:00.790000","2026-02-24T00:00:00.791000","2026-02-24T00:00:00.793000","2026-02-24T00:00:00.794000","2026-02-24T00:00:00.795000","2026-02-24T00:00:00.797000","2026-02-24T00:00:00.798000","2026-02-24T00:00:00.799000","2026-02-24T00:00:00.801000","2026-02-24T00:00:00.802000","2026-02-24T00:00:00.803000","2026-02-24T00:00:00.804000","2026-02-24T00:00:00.806000","2026-02-24T00:00:00.807000","2026-02-24T00:00:00.812000","2026-02-24T00:00:00.813000","2026-02-24T00:00:00.814000","2026-02-24T00:00:00.815000","2026-02-24T00:00:00.816000","2026-02-24T00:00:00.818000","2026-02-24T00:00:00.819000","2026-02-24T00:00:00.820000","2026-02-24T00:00:00.821000","2026-02-24T00:00:00.823000","2026-02-24T00:00:00.824000","2026-02-24T00:00:00.825000","2026-02-24T00:00:00.827000","2026-02-24T00:00:00.828000","2026-02-24T00:00:00.829000","2026-02-24T00:00:00.831000","2026-02-24T00:00:00.832000","2026-02-24T00:00:00.833000","2026-02-24T00:00:00.834000","2026-02-24T00:00:00.835000","2026-02-24T00:00:00.837000","2026-02-24T00:00:00.838000","2026-02-24T00:00:00.839000","2026-02-24T00:00:00.840000","2026-02-24T00:00:00.842000","2026-02-24T00:00:00.843000","2026-02-24T00:00:00.844000","2026-02-24T00:00:00.846000","2026-02-24T00:00:00.847000","2026-02-24T00:00:00.848000","2026-02-24T00:00:00.849000","2026-02-24T00:00:00.851000","2026-02-24T00:00:00.852000","2026-02-24T00:00:00.853000","2026-02-24T00:00:00.855000","2026-02-24T00:00:00.856000","2026-02-24T00:00:00.857000","2026-02-24T00:00:00.859000","2026-02-24T00:00:00.860000","2026-02-24T00:00:00.861000","2026-02-24T00:00:00.862000","2026-02-24T00:00:00.863000","2026-02-24T00:00:00.865000","2026-02-24T00:00:00.866000","2026-02-24T00:00:00.867000","2026-02-24T00:00:00.868000","2026-02-24T00:00:00.870000","2026-02-24T00:00:00.871000","2026-02-24T00:00:00.872000","2026-02-24T00:00:00.874000","2026-02-24T00:00:00.875000","2026-02-24T00:00:00.876000","2026-02-24T00:00:00.877000","2026-02-24T00:00:00.879000","2026-02-24T00:00:00.880000","2026-02-24T00:00:00.881000","2026-02-24T00:00:00.882000","2026-02-24T00:00:00.884000","2026-02-24T00:00:00.885000","2026-02-24T00:00:00.886000","2026-02-24T00:00:00.888000","2026-02-24T00:00:00.889000","2026-02-24T00:00:00.890000","2026-02-24T00:00:00.891000","2026-02-24T00:00:00.893000","2026-02-24T00:00:00.894000","2026-02-24T00:00:00.895000","2026-02-24T00:00:00.896000","2026-02-24T00:00:00.898000","2026-02-24T00:00:00.899000","2026-02-24T00:00:00.900000","2026-02-24T00:00:00.902000","2026-02-24T00:00:00.903000","2026-02-24T00:00:00.904000","2026-02-24T00:00:00.906000","2026-02-24T00:00:00.907000","2026-02-24T00:00:00.908000","2026-02-24T00:00:00.909000","2026-02-24T00:00:00.911000","2026-02-24T00:00:00.912000","2026-02-24T00:00:00.913000","2026-02-24T00:00:00.915000","2026-02-24T00:00:00.916000","2026-02-24T00:00:00.917000","2026-02-24T00:00:00.919000","2026-02-24T00:00:00.920000","2026-02-24T00:00:00.921000","2026-02-24T00:00:00.923000","2026-02-24T00:00:00.924000","2026-02-24T00:00:00.925000","2026-02-24T00:00:00.927000","2026-02-24T00:00:00.928000","2026-02-24T00:00:00.929000","2026-02-24T00:00:00.930000","2026-02-24T00:00:00.932000","2026-02-24T00:00:00.933000","2026-02-24T00:00:00.935000","2026-02-24T00:00:00.936000","2026-02-24T00:00:00.937000","2026-02-24T00:00:00.939000","2026-02-24T00:00:00.940000","2026-02-24T00:00:00.941000","2026-02-24T00:00:00.943000","2026-02-24T00:00:00.944000","2026-02-24T00:00:00.946000","2026-02-24T00:00:00.947000","2026-02-24T00:00:00.948000","2026-02-24T00:00:00.950000","2026-02-24T00:00:00.951000","2026-02-24T00:00:00.952000","2026-02-24T00:00:00.953000","2026-02-24T00:00:00.955000","2026-02-24T00:00:00.956000","2026-02-24T00:00:00.957000","2026-02-24T00:00:00.958000","2026-02-24T00:00:00.960000","2026-02-24T00:00:00.961000","2026-02-24T00:00:00.962000","2026-02-24T00:00:00.964000","2026-02-24T00:00:00.965000","2026-02-24T00:00:00.966000","2026-02-24T00:00:00.968000","2026-02-24T00:00:00.969000","2026-02-24T00:00:00.970000","2026-02-24T00:00:00.971000","2026-02-24T00:00:00.973000","2026-02-24T00:00:00.974000","2026-02-24T00:00:00.975000","2026-02-24T00:00:00.977000","2026-02-24T00:00:00.978000","2026-02-24T00:00:00.980000","2026-02-24T00:00:00.981000","2026-02-24T00:00:00.982000","2026-02-24T00:00:00.983000","2026-02-24T00:00:00.985000","2026-02-24T00:00:00.986000","2026-02-24T00:00:00.987000","2026-02-24T00:00:00.989000","2026-02-24T00:00:00.990000","2026-02-24T00:00:00.991000","2026-02-24T00:00:00.993000","2026-02-24T00:00:00.994000","2026-02-24T00:00:00.996000","2026-02-24T00:00:00.997000","2026-02-24T00:00:00.998000","2026-02-24T00:00:00.999000","2026-02-24T00:00:01.001000","2026-02-24T00:00:01.002000","2026-02-24T00:00:01.003000","2026-02-24T00:00:01.004000","2026-02-24T00:00:01.006000","2026-02-24T00:00:01.007000","2026-02-24T00:00:01.008000","2026-02-24T00:00:01.010000","2026-02-24T00:00:01.011000","2026-02-24T00:00:01.013000","2026-02-24T00:00:01.014000","2026-02-24T00:00:01.015000","2026-02-24T00:00:01.017000","2026-02-24T00:00:01.018000","2026-02-24T00:00:01.019000","2026-02-24T00:00:01.020000","2026-02-24T00:00:01.022000","2026-02-24T00:00:01.023000","2026-02-24T00:00:01.025000","2026-02-24T00:00:01.026000","2026-02-24T00:00:01.027000","2026-02-24T00:00:01.028000","2026-02-24T00:00:01.030000","2026-02-24T00:00:01.031000","2026-02-24T00:00:01.032000","2026-02-24T00:00:01.034000","2026-02-24T00:00:01.035000","2026-02-24T00:00:01.036000","2026-02-24T00:00:01.038000","2026-02-24T00:00:01.039000","2026-02-24T00:00:01.040000","2026-02-24T00:00:01.041000","2026-02-24T00:00:01.043000","2026-02-24T00:00:01.044000","2026-02-24T00:00:01.046000","2026-02-24T00:00:01.047000","2026-02-24T00:00:01.048000","2026-02-24T00:00:01.049000","2026-02-24T00:00:01.051000","2026-02-24T00:00:01.052000","2026-02-24T00:00:01.054000","2026-02-24T00:00:01.055000","2026-02-24T00:00:01.056000","2026-02-24T00:00:01.057000","2026-02-24T00:00:01.059000","2026-02-24T00:00:01.060000","2026-02-24T00:00:01.061000","2026-02-24T00:00:01.063000","2026-02-24T00:00:01.064000","2026-02-24T00:00:01.065000","2026-02-24T00:00:01.067000","2026-02-24T00:00:01.068000","2026-02-24T00:00:01.069000","2026-02-24T00:00:01.071000","2026-02-24T00:00:01.072000","2026-02-24T00:00:01.073000","2026-02-24T00:00:01.075000","2026-02-24T00:00:01.076000","2026-02-24T00:00:01.078000","2026-02-24T00:00:01.079000","2026-02-24T00:00:01.080000","2026-02-24T00:00:01.082000","2026-02-24T00:00:01.083000","2026-02-24T00:00:01.084000","2026-02-24T00:00:01.086000","2026-02-24T00:00:01.087000","2026-02-24T00:00:01.089000","2026-02-24T00:00:01.090000","2026-02-24T00:00:01.091000","2026-02-24T00:00:01.093000","2026-02-24T00:00:01.094000","2026-02-24T00:00:01.095000","2026-02-24T00:00:01.097000","2026-02-24T00:00:01.098000","2026-02-24T00:00:01.100000","2026-02-24T00:00:01.101000","2026-02-24T00:00:01.102000","2026-02-24T00:00:01.104000","2026-02-24T00:00:01.105000","2026-02-24T00:00:01.106000","2026-02-24T00:00:01.107000","2026-02-24T00:00:01.108000","2026-02-24T00:00:01.109000","2026-02-24T00:00:01.110000","2026-02-24T00:00:01.112000","2026-02-24T00:00:01.113000","2026-02-24T00:00:01.114000","2026-02-24T00:00:01.115000","2026-02-24T00:00:01.116000","2026-02-24T00:00:01.118000","2026-02-24T00:00:01.119000","2026-02-24T00:00:01.120000","2026-02-24T00:00:01.121000","2026-02-24T00:00:01.122000","2026-02-24T00:00:01.124000","2026-02-24T00:00:01.125000","2026-02-24T00:00:01.126000","2026-02-24T00:00:01.127000","2026-02-24T00:00:01.128000","2026-02-24T00:00:01.129000","2026-02-24T00:00:01.130000","2026-02-24T00:00:01.132000","2026-02-24T00:00:01.133000","2026-02-24T00:00:01.134000","2026-02-24T00:00:01.135000","2026-02-24T00:00:01.136000","2026-02-24T00:00:01.138000","2026-02-24T00:00:01.139000","2026-02-24T00:00:01.140000","2026-02-24T00:00:01.141000","2026-02-24T00:00:01.143000","2026-02-24T00:00:01.144000","2026-02-24T00:00:01.145000","2026-02-24T00:00:01.146000","2026-02-24T00:00:01.148000","2026-02-24T00:00:01.149000","2026-02-24T00:00:01.150000","2026-02-24T00:00:01.152000","2026-02-24T00:00:01.153000","2026-02-24T00:00:01.154000","2026-02-24T00:00:01.155000","2026-02-24T00:00:01.156000","2026-02-24T00:00:01.157000","2026-02-24T00:00:01.158000","2026-02-24T00:00:01.160000","2026-02-24T00:00:01.161000","2026-02-24T00:00:01.163000","2026-02-24T00:00:01.163000","2026-02-24T00:00:01.165000","2026-02-24T00:00:01.166000","2026-02-24T00:00:01.167000","2026-02-24T00:00:01.169000","2026-02-24T00:00:01.170000","2026-02-24T00:00:01.171000","2026-02-24T00:00:01.172000","2026-02-24T00:00:01.173000","2026-02-24T00:00:01.175000","2026-02-24T00:00:01.176000","2026-02-24T00:00:01.177000","2026-02-24T00:00:01.178000","2026-02-24T00:00:01.180000","2026-02-24T00:00:01.181000","2026-02-24T00:00:01.182000","2026-02-24T00:00:01.183000","2026-02-24T00:00:01.184000","2026-02-24T00:00:01.186000","2026-02-24T00:00:01.187000","2026-02-24T00:00:01.188000","2026-02-24T00:00:01.189000","2026-02-24T00:00:01.191000","2026-02-24T00:00:01.192000","2026-02-24T00:00:01.193000","2026-02-24T00:00:01.195000","2026-02-24T00:00:01.196000","2026-02-24T00:00:01.197000","2026-02-24T00:00:01.199000","2026-02-24T00:00:01.200000","2026-02-24T00:00:01.202000","2026-02-24T00:00:01.203000","2026-02-24T00:00:01.204000","2026-02-24T00:00:01.205000","2026-02-24T00:00:01.207000","2026-02-24T00:00:01.208000","2026-02-24T00:00:01.209000","2026-02-24T00:00:01.210000","2026-02-24T00:00:01.211000","2026-02-24T00:00:01.212000","2026-02-24T00:00:01.214000","2026-02-24T00:00:01.215000","2026-02-24T00:00:01.216000","2026-02-24T00:00:01.217000","2026-02-24T00:00:01.219000","2026-02-24T00:00:01.223000","2026-02-24T00:00:01.224000","2026-02-24T00:00:01.225000","2026-02-24T00:00:01.227000","2026-02-24T00:00:01.228000","2026-02-24T00:00:01.229000","2026-02-24T00:00:01.230000","2026-02-24T00:00:01.232000","2026-02-24T00:00:01.233000","2026-02-24T00:00:01.234000","2026-02-24T00:00:01.236000","2026-02-24T00:00:01.237000","2026-02-24T00:00:01.238000","2026-02-24T00:00:01.239000","2026-02-24T00:00:01.241000","2026-02-24T00:00:01.242000","2026-02-24T00:00:01.243000","2026-02-24T00:00:01.244000","2026-02-24T00:00:01.246000","2026-02-24T00:00:01.247000","2026-02-24T00:00:01.248000","2026-02-24T00:00:01.249000","2026-02-24T00:00:01.251000","2026-02-24T00:00:01.252000","2026-02-24T00:00:01.253000","2026-02-24T00:00:01.254000","2026-02-24T00:00:01.256000","2026-02-24T00:00:01.257000","2026-02-24T00:00:01.258000","2026-02-24T00:00:01.260000","2026-02-24T00:00:01.261000","2026-02-24T00:00:01.262000","2026-02-24T00:00:01.263000","2026-02-24T00:00:01.265000","2026-02-24T00:00:01.266000","2026-02-24T00:00:01.267000","2026-02-24T00:00:01.269000","2026-02-24T00:00:01.270000","2026-02-24T00:00:01.271000","2026-02-24T00:00:01.272000","2026-02-24T00:00:01.274000","2026-02-24T00:00:01.275000","2026-02-24T00:00:01.276000","2026-02-24T00:00:01.277000","2026-02-24T00:00:01.279000","2026-02-24T00:00:01.280000","2026-02-24T00:00:01.281000","2026-02-24T00:00:01.282000","2026-02-24T00:00:01.284000","2026-02-24T00:00:01.285000","2026-02-24T00:00:01.286000","2026-02-24T00:00:01.288000","2026-02-24T00:00:01.289000","2026-02-24T00:00:01.290000","2026-02-24T00:00:01.291000","2026-02-24T00:00:01.293000","2026-02-24T00:00:01.294000","2026-02-24T00:00:01.295000","2026-02-24T00:00:01.296000","2026-02-24T00:00:01.298000","2026-02-24T00:00:01.299000","2026-02-24T00:00:01.300000","2026-02-24T00:00:01.302000","2026-02-24T00:00:01.303000","2026-02-24T00:00:01.304000","2026-02-24T00:00:01.306000","2026-02-24T00:00:01.307000","2026-02-24T00:00:01.308000","2026-02-24T00:00:01.310000","2026-02-24T00:00:01.311000","2026-02-24T00:00:01.312000","2026-02-24T00:00:01.314000","2026-02-24T00:00:01.315000","2026-02-24T00:00:01.316000","2026-02-24T00:00:01.318000","2026-02-24T00:00:01.319000","2026-02-24T00:00:01.320000","2026-02-24T00:00:01.322000","2026-02-24T00:00:01.323000","2026-02-24T00:00:01.324000","2026-02-24T00:00:01.326000","2026-02-24T00:00:01.327000","2026-02-24T00:00:01.328000","2026-02-24T00:00:01.329000","2026-02-24T00:00:01.331000","2026-02-24T00:00:01.332000","2026-02-24T00:00:01.333000","2026-02-24T00:00:01.334000","2026-02-24T00:00:01.336000","2026-02-24T00:00:01.337000","2026-02-24T00:00:01.338000","2026-02-24T00:00:01.339000","2026-02-24T00:00:01.341000","2026-02-24T00:00:01.342000","2026-02-24T00:00:01.343000","2026-02-24T00:00:01.344000","2026-02-24T00:00:01.346000","2026-02-24T00:00:01.347000","2026-02-24T00:00:01.348000","2026-02-24T00:00:01.350000","2026-02-24T00:00:01.351000","2026-02-24T00:00:01.352000","2026-02-24T00:00:01.353000","2026-02-24T00:00:01.355000","2026-02-24T00:00:01.356000","2026-02-24T00:00:01.357000","2026-02-24T00:00:01.359000","2026-02-24T00:00:01.360000","2026-02-24T00:00:01.361000","2026-02-24T00:00:01.362000","2026-02-24T00:00:01.364000","2026-02-24T00:00:01.365000","2026-02-24T00:00:01.366000","2026-02-24T00:00:01.367000","2026-02-24T00:00:01.369000","2026-02-24T00:00:01.370000","2026-02-24T00:00:01.371000","2026-02-24T00:00:01.373000","2026-02-24T00:00:01.374000","2026-02-24T00:00:01.375000","2026-02-24T00:00:01.376000","2026-02-24T00:00:01.378000","2026-02-24T00:00:01.379000","2026-02-24T00:00:01.380000","2026-02-24T00:00:01.382000","2026-02-24T00:00:01.383000","2026-02-24T00:00:01.384000","2026-02-24T00:00:01.385000","2026-02-24T00:00:01.387000","2026-02-24T00:00:01.388000","2026-02-24T00:00:01.395000","2026-02-24T00:00:01.396000","2026-02-24T00:00:01.397000","2026-02-24T00:00:01.398000","2026-02-24T00:00:01.400000","2026-02-24T00:00:01.401000","2026-02-24T00:00:01.402000","2026-02-24T00:00:01.404000","2026-02-24T00:00:01.405000","2026-02-24T00:00:01.406000","2026-02-24T00:00:01.408000","2026-02-24T00:00:01.409000","2026-02-24T00:00:01.410000","2026-02-24T00:00:01.412000","2026-02-24T00:00:01.413000","2026-02-24T00:00:01.414000","2026-02-24T00:00:01.416000","2026-02-24T00:00:01.417000","2026-02-24T00:00:01.418000","2026-02-24T00:00:01.420000","2026-02-24T00:00:01.421000","2026-02-24T00:00:01.422000","2026-02-24T00:00:01.424000","2026-02-24T00:00:01.425000","2026-02-24T00:00:01.426000","2026-02-24T00:00:01.428000","2026-02-24T00:00:01.429000","2026-02-24T00:00:01.430000","2026-02-24T00:00:01.432000","2026-02-24T00:00:01.433000","2026-02-24T00:00:01.434000","2026-02-24T00:00:01.436000","2026-02-24T00:00:01.437000","2026-02-24T00:00:01.438000","2026-02-24T00:00:01.440000","2026-02-24T00:00:01.441000","2026-02-24T00:00:01.442000","2026-02-24T00:00:01.444000","2026-02-24T00:00:01.445000","2026-02-24T00:00:01.446000","2026-02-24T00:00:01.447000","2026-02-24T00:00:01.449000","2026-02-24T00:00:01.450000","2026-02-24T00:00:01.452000","2026-02-24T00:00:01.453000","2026-02-24T00:00:01.454000","2026-02-24T00:00:01.455000","2026-02-24T00:00:01.457000","2026-02-24T00:00:01.458000","2026-02-24T00:00:01.460000","2026-02-24T00:00:01.461000","2026-02-24T00:00:01.462000","2026-02-24T00:00:01.464000","2026-02-24T00:00:01.465000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.571000","2026-02-24T00:00:00.573000","2026-02-24T00:00:00.574000","2026-02-24T00:00:00.575000","2026-02-24T00:00:00.576000","2026-02-24T00:00:00.578000","2026-02-24T00:00:00.579000","2026-02-24T00:00:00.813000","2026-02-24T00:00:00.814000","2026-02-24T00:00:00.815000","2026-02-24T00:00:00.816000","2026-02-24T00:00:00.818000","2026-02-24T00:00:00.819000","2026-02-24T00:00:00.820000","2026-02-24T00:00:00.821000","2026-02-24T00:00:00.823000","2026-02-24T00:00:00.824000","2026-02-24T00:00:00.825000","2026-02-24T00:00:00.827000","2026-02-24T00:00:00.828000","2026-02-24T00:00:00.829000","2026-02-24T00:00:00.831000","2026-02-24T00:00:00.832000","2026-02-24T00:00:00.833000","2026-02-24T00:00:00.834000","2026-02-24T00:00:00.835000","2026-02-24T00:00:00.837000","2026-02-24T00:00:00.838000","2026-02-24T00:00:00.839000","2026-02-24T00:00:00.840000","2026-02-24T00:00:01.170000","2026-02-24T00:00:01.171000","2026-02-24T00:00:01.172000","2026-02-24T00:00:01.173000","2026-02-24T00:00:01.175000","2026-02-24T00:00:01.224000","2026-02-24T00:00:01.225000","2026-02-24T00:00:01.227000","2026-02-24T00:00:01.228000","2026-02-24T00:00:01.229000","2026-02-24T00:00:01.230000","2026-02-24T00:00:01.232000","2026-02-24T00:00:01.233000","2026-02-24T00:00:01.234000","2026-02-24T00:00:01.235000","2026-02-24T00:00:01.237000","2026-02-24T00:00:01.238000","2026-02-24T00:00:01.239000","2026-02-24T00:00:01.241000","2026-02-24T00:00:01.242000","2026-02-24T00:00:01.243000","2026-02-24T00:00:01.244000","2026-02-24T00:00:01.245000","2026-02-24T00:00:01.247000","2026-02-24T00:00:01.248000","2026-02-24T00:00:01.249000","2026-02-24T00:00:01.251000","2026-02-24T00:00:01.252000","2026-02-24T00:00:01.253000","2026-02-24T00:00:01.254000","2026-02-24T00:00:01.256000","2026-02-24T00:00:01.257000","2026-02-24T00:00:01.258000","2026-02-24T00:00:01.260000","2026-02-24T00:00:01.261000","2026-02-24T00:00:01.262000","2026-02-24T00:00:01.263000","2026-02-24T00:00:01.265000","2026-02-24T00:00:01.266000","2026-02-24T00:00:01.267000","2026-02-24T00:00:01.268000","2026-02-24T00:00:01.270000","2026-02-24T00:00:01.271000","2026-02-24T00:00:01.272000","2026-02-24T00:00:01.274000","2026-02-24T00:00:01.275000","2026-02-24T00:00:01.276000","2026-02-24T00:00:01.277000","2026-02-24T00:00:01.279000","2026-02-24T00:00:01.280000","2026-02-24T00:00:01.281000","2026-02-24T00:00:01.282000","2026-02-24T00:00:01.284000","2026-02-24T00:00:01.285000","2026-02-24T00:00:01.286000","2026-02-24T00:00:01.288000","2026-02-24T00:00:01.289000","2026-02-24T00:00:01.290000","2026-02-24T00:00:01.291000","2026-02-24T00:00:01.293000","2026-02-24T00:00:01.294000","2026-02-24T00:00:01.295000","2026-02-24T00:00:01.296000","2026-02-24T00:00:01.298000","2026-02-24T00:00:01.299000","2026-02-24T00:00:01.300000","2026-02-24T00:00:01.302000","2026-02-24T00:00:01.303000","2026-02-24T00:00:01.304000","2026-02-24T00:00:01.306000","2026-02-24T00:00:01.307000","2026-02-24T00:00:01.308000","2026-02-24T00:00:01.310000","2026-02-24T00:00:01.311000","2026-02-24T00:00:01.312000","2026-02-24T00:00:01.314000","2026-02-24T00:00:01.315000","2026-02-24T00:00:01.316000","2026-02-24T00:00:01.317000","2026-02-24T00:00:01.319000","2026-02-24T00:00:01.320000","2026-02-24T00:00:01.322000","2026-02-24T00:00:01.323000","2026-02-24T00:00:01.324000","2026-02-24T00:00:01.396000","2026-02-24T00:00:01.397000","2026-02-24T00:00:01.398000","2026-02-24T00:00:01.400000","2026-02-24T00:00:01.401000","2026-02-24T00:00:01.402000","2026-02-24T00:00:01.404000","2026-02-24T00:00:01.405000","2026-02-24T00:00:01.406000","2026-02-24T00:00:01.408000","2026-02-24T00:00:01.409000","2026-02-24T00:00:01.410000","2026-02-24T00:00:01.412000","2026-02-24T00:00:01.413000","2026-02-24T00:00:01.414000","2026-02-24T00:00:01.416000","2026-02-24T00:00:01.417000","2026-02-24T00:00:01.418000","2026-02-24T00:00:01.420000","2026-02-24T00:00:01.421000","2026-02-24T00:00:01.422000","2026-02-24T00:00:01.424000","2026-02-24T00:00:01.425000","2026-02-24T00:00:01.426000","2026-02-24T00:00:01.428000","2026-02-24T00:00:01.429000","2026-02-24T00:00:01.430000","2026-02-24T00:00:01.432000","2026-02-24T00:00:01.433000","2026-02-24T00:00:01.434000","2026-02-24T00:00:01.436000","2026-02-24T00:00:01.437000","2026-02-24T00:00:01.438000","2026-02-24T00:00:01.440000","2026-02-24T00:00:01.441000","2026-02-24T00:00:01.442000","2026-02-24T00:00:01.444000","2026-02-24T00:00:01.445000","2026-02-24T00:00:01.446000","2026-02-24T00:00:01.448000","2026-02-24T00:00:01.449000","2026-02-24T00:00:01.450000","2026-02-24T00:00:01.452000","2026-02-24T00:00:01.453000","2026-02-24T00:00:01.454000","2026-02-24T00:00:01.455000","2026-02-24T00:00:01.457000","2026-02-24T00:00:01.458000","2026-02-24T00:00:01.460000","2026-02-24T00:00:01.461000","2026-02-24T00:00:01.462000","2026-02-24T00:00:01.464000","2026-02-24T00:00:01.467000","2026-02-24T00:00:01.467000","2026-02-24T00:00:01.468000","2026-02-24T00:00:01.469000","2026-02-24T00:00:01.470000","2026-02-24T00:00:01.471000","2026-02-24T00:00:01.472000","2026-02-24T00:00:01.474000","2026-02-24T00:00:01.475000","2026-02-24T00:00:01.476000","2026-02-24T00:00:01.477000","2026-02-24T00:00:01.478000","2026-02-24T00:00:01.480000","2026-02-24T00:00:01.481000","2026-02-24T00:00:01.482000","2026-02-24T00:00:01.483000","2026-02-24T00:00:01.484000","2026-02-24T00:00:01.485000","2026-02-24T00:00:01.487000","2026-02-24T00:00:01.488000","2026-02-24T00:00:01.489000","2026-02-24T00:00:01.490000","2026-02-24T00:00:01.491000","2026-02-24T00:00:01.493000","2026-02-24T00:00:01.494000","2026-02-24T00:00:01.495000","2026-02-24T00:00:01.496000","2026-02-24T00:00:01.497000","2026-02-24T00:00:01.499000","2026-02-24T00:00:01.500000","2026-02-24T00:00:01.501000","2026-02-24T00:00:01.502000","2026-02-24T00:00:01.503000","2026-02-24T00:00:01.505000","2026-02-24T00:00:01.506000","2026-02-24T00:00:01.507000","2026-02-24T00:00:01.508000","2026-02-24T00:00:01.510000","2026-02-24T00:00:01.511000","2026-02-24T00:00:01.512000","2026-02-24T00:00:01.513000","2026-02-24T00:00:01.514000","2026-02-24T00:00:01.516000","2026-02-24T00:00:01.517000","2026-02-24T00:00:01.518000","2026-02-24T00:00:01.519000","2026-02-24T00:00:01.521000","2026-02-24T00:00:01.522000","2026-02-24T00:00:01.523000","2026-02-24T00:00:01.524000","2026-02-24T00:00:01.525000","2026-02-24T00:00:01.527000","2026-02-24T00:00:01.528000","2026-02-24T00:00:01.529000","2026-02-24T00:00:01.530000","2026-02-24T00:00:01.531000","2026-02-24T00:00:01.533000","2026-02-24T00:00:01.534000","2026-02-24T00:00:01.535000","2026-02-24T00:00:01.536000","2026-02-24T00:00:01.538000","2026-02-24T00:00:01.539000","2026-02-24T00:00:01.540000","2026-02-24T00:00:01.541000","2026-02-24T00:00:01.542000","2026-02-24T00:00:01.544000","2026-02-24T00:00:01.545000","2026-02-24T00:00:01.546000","2026-02-24T00:00:01.547000","2026-02-24T00:00:01.548000","2026-02-24T00:00:01.550000","2026-02-24T00:00:01.551000","2026-02-24T00:00:01.552000","2026-02-24T00:00:01.553000","2026-02-24T00:00:01.555000","2026-02-24T00:00:01.556000","2026-02-24T00:00:01.557000","2026-02-24T00:00:01.559000","2026-02-24T00:00:01.560000","2026-02-24T00:00:01.561000","2026-02-24T00:00:01.562000","2026-02-24T00:00:01.563000","2026-02-24T00:00:01.568000","2026-02-24T00:00:01.570000","2026-02-24T00:00:01.572000","2026-02-24T00:00:01.573000","2026-02-24T00:00:01.574000","2026-02-24T00:00:01.575000","2026-02-24T00:00:01.576000","2026-02-24T00:00:01.578000","2026-02-24T00:00:01.579000","2026-02-24T00:00:01.580000","2026-02-24T00:00:01.581000","2026-02-24T00:00:01.582000","2026-02-24T00:00:01.584000","2026-02-24T00:00:01.585000","2026-02-24T00:00:01.586000","2026-02-24T00:00:01.587000","2026-02-24T00:00:01.589000","2026-02-24T00:00:01.590000","2026-02-24T00:00:01.591000","2026-02-24T00:00:01.592000","2026-02-24T00:00:01.593000","2026-02-24T00:00:01.595000","2026-02-24T00:00:01.596000","2026-02-24T00:00:01.598000","2026-02-24T00:00:01.599000","2026-02-24T00:00:01.600000","2026-02-24T00:00:01.601000","2026-02-24T00:00:01.602000","2026-02-24T00:00:01.603000","2026-02-24T00:00:01.604000","2026-02-24T00:00:01.606000","2026-02-24T00:00:01.607000","2026-02-24T00:00:01.608000","2026-02-24T00:00:01.609000","2026-02-24T00:00:01.610000","2026-02-24T00:00:01.612000","2026-02-24T00:00:01.613000","2026-02-24T00:00:01.614000","2026-02-24T00:00:01.615000","2026-02-24T00:00:01.617000","2026-02-24T00:00:01.618000","2026-02-24T00:00:01.619000","2026-02-24T00:00:01.621000","2026-02-24T00:00:01.622000","2026-02-24T00:00:01.623000","2026-02-24T00:00:01.624000","2026-02-24T00:00:01.626000","2026-02-24T00:00:01.626000","2026-02-24T00:00:01.628000","2026-02-24T00:00:01.629000","2026-02-24T00:00:01.630000","2026-02-24T00:00:01.631000","2026-02-24T00:00:01.632000","2026-02-24T00:00:01.634000","2026-02-24T00:00:01.635000","2026-02-24T00:00:01.636000","2026-02-24T00:00:01.637000","2026-02-24T00:00:01.639000","2026-02-24T00:00:01.640000","2026-02-24T00:00:01.641000","2026-02-24T00:00:01.642000","2026-02-24T00:00:01.644000","2026-02-24T00:00:01.645000","2026-02-24T00:00:01.646000","2026-02-24T00:00:01.648000","2026-02-24T00:00:01.649000","2026-02-24T00:00:01.650000","2026-02-24T00:00:01.651000","2026-02-24T00:00:01.652000","2026-02-24T00:00:01.653000","2026-02-24T00:00:01.655000","2026-02-24T00:00:01.656000","2026-02-24T00:00:01.657000","2026-02-24T00:00:01.658000","2026-02-24T00:00:01.659000","2026-02-24T00:00:01.661000","2026-02-24T00:00:01.662000","2026-02-24T00:00:01.663000","2026-02-24T00:00:01.664000","2026-02-24T00:00:01.666000","2026-02-24T00:00:01.667000","2026-02-24T00:00:01.668000","2026-02-24T00:00:01.669000","2026-02-24T00:00:01.671000","2026-02-24T00:00:01.672000","2026-02-24T00:00:01.673000","2026-02-24T00:00:01.675000","2026-02-24T00:00:01.676000","2026-02-24T00:00:01.677000","2026-02-24T00:00:01.678000","2026-02-24T00:00:01.679000","2026-02-24T00:00:01.680000","2026-02-24T00:00:01.681000","2026-02-24T00:00:01.683000","2026-02-24T00:00:01.684000","2026-02-24T00:00:01.685000","2026-02-24T00:00:01.686000","2026-02-24T00:00:01.688000","2026-02-24T00:00:01.689000","2026-02-24T00:00:01.690000","2026-02-24T00:00:01.691000","2026-02-24T00:00:01.692000","2026-02-24T00:00:01.694000","2026-02-24T00:00:01.695000","2026-02-24T00:00:01.696000","2026-02-24T00:00:01.698000","2026-02-24T00:00:01.699000","2026-02-24T00:00:01.700000","2026-02-24T00:00:01.702000","2026-02-24T00:00:01.703000","2026-02-24T00:00:01.703000","2026-02-24T00:00:01.705000","2026-02-24T00:00:01.706000","2026-02-24T00:00:01.707000","2026-02-24T00:00:01.708000","2026-02-24T00:00:01.710000","2026-02-24T00:00:01.711000","2026-02-24T00:00:01.712000","2026-02-24T00:00:01.714000","2026-02-24T00:00:01.715000","2026-02-24T00:00:01.716000","2026-02-24T00:00:01.717000","2026-02-24T00:00:01.719000","2026-02-24T00:00:01.720000","2026-02-24T00:00:01.721000","2026-02-24T00:00:01.723000","2026-02-24T00:00:01.724000","2026-02-24T00:00:01.725000","2026-02-24T00:00:01.727000","2026-02-24T00:00:01.728000","2026-02-24T00:00:01.729000","2026-02-24T00:00:01.730000","2026-02-24T00:00:01.731000","2026-02-24T00:00:01.733000","2026-02-24T00:00:01.734000","2026-02-24T00:00:01.735000","2026-02-24T00:00:01.736000","2026-02-24T00:00:01.738000","2026-02-24T00:00:01.739000","2026-02-24T00:00:01.740000","2026-02-24T00:00:01.742000","2026-02-24T00:00:01.743000","2026-02-24T00:00:01.744000","2026-02-24T00:00:01.745000","2026-02-24T00:00:01.746000","2026-02-24T00:00:01.748000","2026-02-24T00:00:01.749000","2026-02-24T00:00:01.750000","2026-02-24T00:00:01.752000","2026-02-24T00:00:01.753000","2026-02-24T00:00:01.754000","2026-02-24T00:00:01.755000","2026-02-24T00:00:01.757000","2026-02-24T00:00:01.758000","2026-02-24T00:00:01.759000","2026-02-24T00:00:01.760000","2026-02-24T00:00:01.762000","2026-02-24T00:00:01.763000","2026-02-24T00:00:01.764000","2026-02-24T00:00:01.766000","2026-02-24T00:00:01.767000","2026-02-24T00:00:01.768000","2026-02-24T00:00:01.769000","2026-02-24T00:00:01.771000","2026-02-24T00:00:01.772000","2026-02-24T00:00:01.773000","2026-02-24T00:00:01.774000","2026-02-24T00:00:01.776000","2026-02-24T00:00:01.777000","2026-02-24T00:00:01.778000","2026-02-24T00:00:01.779000","2026-02-24T00:00:01.781000","2026-02-24T00:00:01.782000","2026-02-24T00:00:01.784000","2026-02-24T00:00:01.785000","2026-02-24T00:00:01.786000","2026-02-24T00:00:01.787000","2026-02-24T00:00:01.788000","2026-02-24T00:00:01.790000","2026-02-24T00:00:01.791000","2026-02-24T00:00:01.792000","2026-02-24T00:00:01.793000","2026-02-24T00:00:01.795000","2026-02-24T00:00:01.796000","2026-02-24T00:00:01.797000","2026-02-24T00:00:01.799000","2026-02-24T00:00:01.800000","2026-02-24T00:00:01.801000","2026-02-24T00:00:01.803000","2026-02-24T00:00:01.804000","2026-02-24T00:00:01.805000","2026-02-24T00:00:01.806000","2026-02-24T00:00:01.807000","2026-02-24T00:00:01.809000","2026-02-24T00:00:01.810000","2026-02-24T00:00:01.811000","2026-02-24T00:00:01.812000","2026-02-24T00:00:01.814000","2026-02-24T00:00:01.815000","2026-02-24T00:00:01.817000","2026-02-24T00:00:01.818000","2026-02-24T00:00:01.819000","2026-02-24T00:00:01.820000","2026-02-24T00:00:01.821000","2026-02-24T00:00:01.823000","2026-02-24T00:00:01.824000","2026-02-24T00:00:01.825000","2026-02-24T00:00:01.827000","2026-02-24T00:00:01.828000","2026-02-24T00:00:01.829000","2026-02-24T00:00:01.830000","2026-02-24T00:00:01.831000","2026-02-24T00:00:01.833000","2026-02-24T00:00:01.834000","2026-02-24T00:00:01.835000","2026-02-24T00:00:01.836000","2026-02-24T00:00:01.838000","2026-02-24T00:00:01.839000","2026-02-24T00:00:01.841000","2026-02-24T00:00:01.842000","2026-02-24T00:00:01.843000","2026-02-24T00:00:01.844000","2026-02-24T00:00:01.846000","2026-02-24T00:00:01.851000","2026-02-24T00:00:01.852000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.858000","2026-02-24T00:00:01.860000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.864000","2026-02-24T00:00:01.865000","2026-02-24T00:00:01.866000","2026-02-24T00:00:01.867000","2026-02-24T00:00:01.869000","2026-02-24T00:00:01.870000","2026-02-24T00:00:01.871000","2026-02-24T00:00:01.873000","2026-02-24T00:00:01.874000","2026-02-24T00:00:01.875000","2026-02-24T00:00:01.877000","2026-02-24T00:00:01.878000","2026-02-24T00:00:01.879000","2026-02-24T00:00:01.881000","2026-02-24T00:00:01.882000","2026-02-24T00:00:01.885000","2026-02-24T00:00:01.886000","2026-02-24T00:00:01.887000","2026-02-24T00:00:01.889000","2026-02-24T00:00:01.890000","2026-02-24T00:00:01.475000","2026-02-24T00:00:01.476000","2026-02-24T00:00:01.477000","2026-02-24T00:00:01.478000","2026-02-24T00:00:01.480000","2026-02-24T00:00:01.481000","2026-02-24T00:00:01.482000","2026-02-24T00:00:01.483000","2026-02-24T00:00:01.484000","2026-02-24T00:00:01.485000","2026-02-24T00:00:01.487000","2026-02-24T00:00:01.488000","2026-02-24T00:00:01.489000","2026-02-24T00:00:01.490000","2026-02-24T00:00:01.491000","2026-02-24T00:00:01.493000","2026-02-24T00:00:01.494000","2026-02-24T00:00:01.495000","2026-02-24T00:00:01.496000","2026-02-24T00:00:01.497000","2026-02-24T00:00:01.499000","2026-02-24T00:00:01.500000","2026-02-24T00:00:01.501000","2026-02-24T00:00:01.502000","2026-02-24T00:00:01.503000","2026-02-24T00:00:01.505000","2026-02-24T00:00:01.506000","2026-02-24T00:00:01.507000","2026-02-24T00:00:01.508000","2026-02-24T00:00:01.510000","2026-02-24T00:00:01.511000","2026-02-24T00:00:01.512000","2026-02-24T00:00:01.513000","2026-02-24T00:00:01.514000","2026-02-24T00:00:01.516000","2026-02-24T00:00:01.517000","2026-02-24T00:00:01.518000","2026-02-24T00:00:01.519000","2026-02-24T00:00:01.521000","2026-02-24T00:00:01.522000","2026-02-24T00:00:01.523000","2026-02-24T00:00:01.524000","2026-02-24T00:00:01.525000","2026-02-24T00:00:01.527000","2026-02-24T00:00:01.528000","2026-02-24T00:00:01.529000","2026-02-24T00:00:01.530000","2026-02-24T00:00:01.531000","2026-02-24T00:00:01.533000","2026-02-24T00:00:01.534000","2026-02-24T00:00:01.535000","2026-02-24T00:00:01.536000","2026-02-24T00:00:01.538000","2026-02-24T00:00:01.539000","2026-02-24T00:00:01.540000","2026-02-24T00:00:01.541000","2026-02-24T00:00:01.542000","2026-02-24T00:00:01.544000","2026-02-24T00:00:01.545000","2026-02-24T00:00:01.546000","2026-02-24T00:00:01.547000","2026-02-24T00:00:01.548000","2026-02-24T00:00:01.550000","2026-02-24T00:00:01.551000","2026-02-24T00:00:01.552000","2026-02-24T00:00:01.553000","2026-02-24T00:00:01.555000","2026-02-24T00:00:01.556000","2026-02-24T00:00:01.557000","2026-02-24T00:00:01.559000","2026-02-24T00:00:01.559000","2026-02-24T00:00:01.561000","2026-02-24T00:00:01.562000","2026-02-24T00:00:01.563000","2026-02-24T00:00:01.568000","2026-02-24T00:00:01.570000","2026-02-24T00:00:01.571000","2026-02-24T00:00:01.573000","2026-02-24T00:00:01.574000","2026-02-24T00:00:01.575000","2026-02-24T00:00:01.576000","2026-02-24T00:00:01.578000","2026-02-24T00:00:01.570000","2026-02-24T00:00:01.572000","2026-02-24T00:00:01.573000","2026-02-24T00:00:01.574000","2026-02-24T00:00:01.575000","2026-02-24T00:00:01.576000","2026-02-24T00:00:01.578000","2026-02-24T00:00:01.579000","2026-02-24T00:00:01.580000","2026-02-24T00:00:01.581000","2026-02-24T00:00:01.582000","2026-02-24T00:00:01.584000","2026-02-24T00:00:01.585000","2026-02-24T00:00:01.586000","2026-02-24T00:00:01.587000","2026-02-24T00:00:01.588000","2026-02-24T00:00:01.590000","2026-02-24T00:00:01.591000","2026-02-24T00:00:01.592000","2026-02-24T00:00:01.593000","2026-02-24T00:00:01.595000","2026-02-24T00:00:01.596000","2026-02-24T00:00:01.597000","2026-02-24T00:00:01.599000","2026-02-24T00:00:01.600000","2026-02-24T00:00:01.601000","2026-02-24T00:00:01.602000","2026-02-24T00:00:01.603000","2026-02-24T00:00:01.604000","2026-02-24T00:00:01.606000","2026-02-24T00:00:01.607000","2026-02-24T00:00:01.608000","2026-02-24T00:00:01.609000","2026-02-24T00:00:01.610000","2026-02-24T00:00:01.612000","2026-02-24T00:00:01.613000","2026-02-24T00:00:01.614000","2026-02-24T00:00:01.615000","2026-02-24T00:00:01.617000","2026-02-24T00:00:01.618000","2026-02-24T00:00:01.619000","2026-02-24T00:00:01.621000","2026-02-24T00:00:01.622000","2026-02-24T00:00:01.623000","2026-02-24T00:00:01.624000","2026-02-24T00:00:01.625000","2026-02-24T00:00:01.626000","2026-02-24T00:00:01.628000","2026-02-24T00:00:01.629000","2026-02-24T00:00:01.630000","2026-02-24T00:00:01.631000","2026-02-24T00:00:01.632000","2026-02-24T00:00:01.634000","2026-02-24T00:00:01.635000","2026-02-24T00:00:01.636000","2026-02-24T00:00:01.637000","2026-02-24T00:00:01.639000","2026-02-24T00:00:01.640000","2026-02-24T00:00:01.641000","2026-02-24T00:00:01.642000","2026-02-24T00:00:01.644000","2026-02-24T00:00:01.645000","2026-02-24T00:00:01.646000","2026-02-24T00:00:01.648000","2026-02-24T00:00:01.649000","2026-02-24T00:00:01.650000","2026-02-24T00:00:01.651000","2026-02-24T00:00:01.652000","2026-02-24T00:00:01.653000","2026-02-24T00:00:01.654000","2026-02-24T00:00:01.656000","2026-02-24T00:00:01.657000","2026-02-24T00:00:01.658000","2026-02-24T00:00:01.659000","2026-02-24T00:00:01.661000","2026-02-24T00:00:01.662000","2026-02-24T00:00:01.663000","2026-02-24T00:00:01.664000","2026-02-24T00:00:01.665000","2026-02-24T00:00:01.667000","2026-02-24T00:00:01.668000","2026-02-24T00:00:01.669000","2026-02-24T00:00:01.671000","2026-02-24T00:00:01.672000","2026-02-24T00:00:01.673000","2026-02-24T00:00:01.675000","2026-02-24T00:00:01.676000","2026-02-24T00:00:01.677000","2026-02-24T00:00:01.678000","2026-02-24T00:00:01.679000","2026-02-24T00:00:01.680000","2026-02-24T00:00:01.681000","2026-02-24T00:00:01.683000","2026-02-24T00:00:01.684000","2026-02-24T00:00:01.685000","2026-02-24T00:00:01.686000","2026-02-24T00:00:01.687000","2026-02-24T00:00:01.689000","2026-02-24T00:00:01.690000","2026-02-24T00:00:01.691000","2026-02-24T00:00:01.692000","2026-02-24T00:00:01.694000","2026-02-24T00:00:01.695000","2026-02-24T00:00:01.696000","2026-02-24T00:00:01.697000","2026-02-24T00:00:01.699000","2026-02-24T00:00:01.700000","2026-02-24T00:00:01.701000","2026-02-24T00:00:01.702000","2026-02-24T00:00:01.703000","2026-02-24T00:00:01.705000","2026-02-24T00:00:01.706000","2026-02-24T00:00:01.707000","2026-02-24T00:00:01.708000","2026-02-24T00:00:01.710000","2026-02-24T00:00:01.711000","2026-02-24T00:00:01.712000","2026-02-24T00:00:01.713000","2026-02-24T00:00:01.715000","2026-02-24T00:00:01.716000","2026-02-24T00:00:01.717000","2026-02-24T00:00:01.719000","2026-02-24T00:00:01.720000","2026-02-24T00:00:01.721000","2026-02-24T00:00:01.723000","2026-02-24T00:00:01.724000","2026-02-24T00:00:01.725000","2026-02-24T00:00:01.727000","2026-02-24T00:00:01.728000","2026-02-24T00:00:01.729000","2026-02-24T00:00:01.730000","2026-02-24T00:00:01.731000","2026-02-24T00:00:01.732000","2026-02-24T00:00:01.734000","2026-02-24T00:00:01.735000","2026-02-24T00:00:01.736000","2026-02-24T00:00:01.738000","2026-02-24T00:00:01.739000","2026-02-24T00:00:01.740000","2026-02-24T00:00:01.742000","2026-02-24T00:00:01.743000","2026-02-24T00:00:01.744000","2026-02-24T00:00:01.745000","2026-02-24T00:00:01.746000","2026-02-24T00:00:01.748000","2026-02-24T00:00:01.749000","2026-02-24T00:00:01.750000","2026-02-24T00:00:01.751000","2026-02-24T00:00:01.753000","2026-02-24T00:00:01.754000","2026-02-24T00:00:01.755000","2026-02-24T00:00:01.757000","2026-02-24T00:00:01.758000","2026-02-24T00:00:01.759000","2026-02-24T00:00:01.760000","2026-02-24T00:00:01.762000","2026-02-24T00:00:01.763000","2026-02-24T00:00:01.764000","2026-02-24T00:00:01.766000","2026-02-24T00:00:01.767000","2026-02-24T00:00:01.768000","2026-02-24T00:00:01.769000","2026-02-24T00:00:01.770000","2026-02-24T00:00:01.772000","2026-02-24T00:00:01.773000","2026-02-24T00:00:01.774000","2026-02-24T00:00:01.776000","2026-02-24T00:00:01.777000","2026-02-24T00:00:01.778000","2026-02-24T00:00:01.779000","2026-02-24T00:00:01.781000","2026-02-24T00:00:01.782000","2026-02-24T00:00:01.783000","2026-02-24T00:00:01.785000","2026-02-24T00:00:01.786000","2026-02-24T00:00:01.787000","2026-02-24T00:00:01.788000","2026-02-24T00:00:01.790000","2026-02-24T00:00:01.791000","2026-02-24T00:00:01.792000","2026-02-24T00:00:01.793000","2026-02-24T00:00:01.795000","2026-02-24T00:00:01.796000","2026-02-24T00:00:01.797000","2026-02-24T00:00:01.799000","2026-02-24T00:00:01.800000","2026-02-24T00:00:01.801000","2026-02-24T00:00:01.803000","2026-02-24T00:00:01.804000","2026-02-24T00:00:01.805000","2026-02-24T00:00:01.806000","2026-02-24T00:00:01.807000","2026-02-24T00:00:01.809000","2026-02-24T00:00:01.810000","2026-02-24T00:00:01.811000","2026-02-24T00:00:01.812000","2026-02-24T00:00:01.814000","2026-02-24T00:00:01.815000","2026-02-24T00:00:01.817000","2026-02-24T00:00:01.818000","2026-02-24T00:00:01.819000","2026-02-24T00:00:01.820000","2026-02-24T00:00:01.821000","2026-02-24T00:00:01.822000","2026-02-24T00:00:01.824000","2026-02-24T00:00:01.825000","2026-02-24T00:00:01.826000","2026-02-24T00:00:01.828000","2026-02-24T00:00:01.829000","2026-02-24T00:00:01.830000","2026-02-24T00:00:01.831000","2026-02-24T00:00:01.833000","2026-02-24T00:00:01.834000","2026-02-24T00:00:01.835000","2026-02-24T00:00:01.836000","2026-02-24T00:00:01.838000","2026-02-24T00:00:01.839000","2026-02-24T00:00:01.841000","2026-02-24T00:00:01.842000","2026-02-24T00:00:01.843000","2026-02-24T00:00:01.844000","2026-02-24T00:00:01.846000","2026-02-24T00:00:01.851000","2026-02-24T00:00:01.852000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.858000","2026-02-24T00:00:01.860000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.864000","2026-02-24T00:00:01.865000","2026-02-24T00:00:01.866000","2026-02-24T00:00:01.867000","2026-02-24T00:00:01.869000","2026-02-24T00:00:01.870000","2026-02-24T00:00:01.871000","2026-02-24T00:00:01.873000","2026-02-24T00:00:01.874000","2026-02-24T00:00:01.875000","2026-02-24T00:00:01.877000","2026-02-24T00:00:01.878000","2026-02-24T00:00:01.879000","2026-02-24T00:00:01.881000","2026-02-24T00:00:01.882000","2026-02-24T00:00:01.885000","2026-02-24T00:00:01.886000","2026-02-24T00:00:01.887000","2026-02-24T00:00:01.889000","2026-02-24T00:00:01.890000","2026-02-24T00:00:01.896000","2026-02-24T00:00:01.898000","2026-02-24T00:00:01.899000","2026-02-24T00:00:01.900000","2026-02-24T00:00:01.901000","2026-02-24T00:00:01.902000","2026-02-24T00:00:01.906000","2026-02-24T00:00:01.913000","2026-02-24T00:00:01.914000","2026-02-24T00:00:01.915000","2026-02-24T00:00:01.916000","2026-02-24T00:00:01.917000","2026-02-24T00:00:01.919000","2026-02-24T00:00:01.920000","2026-02-24T00:00:01.921000","2026-02-24T00:00:01.923000","2026-02-24T00:00:01.924000","2026-02-24T00:00:01.925000","2026-02-24T00:00:01.927000","2026-02-24T00:00:01.928000","2026-02-24T00:00:01.929000","2026-02-24T00:00:01.931000","2026-02-24T00:00:01.932000","2026-02-24T00:00:01.933000","2026-02-24T00:00:01.935000","2026-02-24T00:00:01.936000","2026-02-24T00:00:01.937000","2026-02-24T00:00:01.939000","2026-02-24T00:00:01.940000","2026-02-24T00:00:01.941000","2026-02-24T00:00:01.942000","2026-02-24T00:00:01.944000","2026-02-24T00:00:01.945000","2026-02-24T00:00:01.946000","2026-02-24T00:00:01.948000","2026-02-24T00:00:01.949000","2026-02-24T00:00:01.950000","2026-02-24T00:00:01.952000","2026-02-24T00:00:01.953000","2026-02-24T00:00:01.955000","2026-02-24T00:00:01.956000","2026-02-24T00:00:01.957000","2026-02-24T00:00:01.959000","2026-02-24T00:00:01.960000","2026-02-24T00:00:01.961000","2026-02-24T00:00:01.962000","2026-02-24T00:00:01.964000","2026-02-24T00:00:01.965000","2026-02-24T00:00:01.966000","2026-02-24T00:00:01.968000","2026-02-24T00:00:01.969000","2026-02-24T00:00:01.971000","2026-02-24T00:00:01.972000","2026-02-24T00:00:01.973000","2026-02-24T00:00:01.974000","2026-02-24T00:00:01.976000","2026-02-24T00:00:01.977000","2026-02-24T00:00:01.978000","2026-02-24T00:00:01.980000","2026-02-24T00:00:01.981000","2026-02-24T00:00:01.982000","2026-02-24T00:00:01.984000","2026-02-24T00:00:01.985000","2026-02-24T00:00:01.986000","2026-02-24T00:00:01.988000","2026-02-24T00:00:01.989000","2026-02-24T00:00:01.990000","2026-02-24T00:00:01.992000","2026-02-24T00:00:01.993000","2026-02-24T00:00:01.994000","2026-02-24T00:00:01.996000","2026-02-24T00:00:01.997000","2026-02-24T00:00:01.998000","2026-02-24T00:00:02.000000","2026-02-24T00:00:02.001000","2026-02-24T00:00:02.002000","2026-02-24T00:00:02.004000","2026-02-24T00:00:02.005000","2026-02-24T00:00:02.007000","2026-02-24T00:00:02.008000","2026-02-24T00:00:02.009000","2026-02-24T00:00:02.011000","2026-02-24T00:00:02.012000","2026-02-24T00:00:02.013000","2026-02-24T00:00:02.014000","2026-02-24T00:00:02.016000","2026-02-24T00:00:02.017000","2026-02-24T00:00:02.018000","2026-02-24T00:00:02.020000","2026-02-24T00:00:02.021000","2026-02-24T00:00:02.023000","2026-02-24T00:00:02.024000","2026-02-24T00:00:02.025000","2026-02-24T00:00:02.027000","2026-02-24T00:00:02.028000","2026-02-24T00:00:02.029000","2026-02-24T00:00:02.031000","2026-02-24T00:00:02.032000","2026-02-24T00:00:02.033000","2026-02-24T00:00:02.035000","2026-02-24T00:00:02.036000","2026-02-24T00:00:02.037000","2026-02-24T00:00:02.038000","2026-02-24T00:00:02.040000","2026-02-24T00:00:02.041000","2026-02-24T00:00:02.042000","2026-02-24T00:00:02.044000","2026-02-24T00:00:02.045000","2026-02-24T00:00:02.046000","2026-02-24T00:00:02.048000","2026-02-24T00:00:02.049000","2026-02-24T00:00:02.050000","2026-02-24T00:00:02.052000","2026-02-24T00:00:02.053000","2026-02-24T00:00:02.057000","2026-02-24T00:00:02.057000","2026-02-24T00:00:02.059000","2026-02-24T00:00:02.060000","2026-02-24T00:00:02.068000","2026-02-24T00:00:02.069000","2026-02-24T00:00:02.071000","2026-02-24T00:00:02.072000","2026-02-24T00:00:02.073000","2026-02-24T00:00:02.074000","2026-02-24T00:00:02.076000","2026-02-24T00:00:02.077000","2026-02-24T00:00:02.078000","2026-02-24T00:00:02.080000","2026-02-24T00:00:02.081000","2026-02-24T00:00:02.082000","2026-02-24T00:00:02.084000","2026-02-24T00:00:02.085000","2026-02-24T00:00:02.087000","2026-02-24T00:00:02.088000","2026-02-24T00:00:02.089000","2026-02-24T00:00:01.852000","2026-02-24T00:00:01.856000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.858000","2026-02-24T00:00:01.860000","2026-02-24T00:00:01.861000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.864000","2026-02-24T00:00:01.865000","2026-02-24T00:00:01.866000","2026-02-24T00:00:01.867000","2026-02-24T00:00:01.869000","2026-02-24T00:00:01.870000","2026-02-24T00:00:01.871000","2026-02-24T00:00:01.872000","2026-02-24T00:00:01.874000","2026-02-24T00:00:01.875000","2026-02-24T00:00:01.877000","2026-02-24T00:00:01.878000","2026-02-24T00:00:01.879000","2026-02-24T00:00:01.880000","2026-02-24T00:00:01.882000","2026-02-24T00:00:01.885000","2026-02-24T00:00:01.886000","2026-02-24T00:00:01.887000","2026-02-24T00:00:01.889000","2026-02-24T00:00:01.890000","2026-02-24T00:00:01.896000","2026-02-24T00:00:01.898000","2026-02-24T00:00:01.899000","2026-02-24T00:00:01.900000","2026-02-24T00:00:01.901000","2026-02-24T00:00:01.902000","2026-02-24T00:00:01.906000","2026-02-24T00:00:01.913000","2026-02-24T00:00:01.914000","2026-02-24T00:00:01.915000","2026-02-24T00:00:01.857000","2026-02-24T00:00:01.858000","2026-02-24T00:00:01.860000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.862000","2026-02-24T00:00:01.864000","2026-02-24T00:00:01.865000","2026-02-24T00:00:01.866000","2026-02-24T00:00:01.867000","2026-02-24T00:00:01.869000","2026-02-24T00:00:01.870000","2026-02-24T00:00:01.871000","2026-02-24T00:00:01.873000","2026-02-24T00:00:01.874000","2026-02-24T00:00:01.875000","2026-02-24T00:00:01.877000","2026-02-24T00:00:01.878000","2026-02-24T00:00:01.879000","2026-02-24T00:00:01.881000","2026-02-24T00:00:01.882000","2026-02-24T00:00:01.885000","2026-02-24T00:00:01.886000","2026-02-24T00:00:01.887000","2026-02-24T00:00:01.889000","2026-02-24T00:00:01.890000","2026-02-24T00:00:01.896000","2026-02-24T00:00:01.898000","2026-02-24T00:00:01.899000","2026-02-24T00:00:01.900000","2026-02-24T00:00:01.901000","2026-02-24T00:00:01.902000","2026-02-24T00:00:01.906000","2026-02-24T00:00:01.913000","2026-02-24T00:00:01.914000","2026-02-24T00:00:01.915000","2026-02-24T00:00:01.916000","2026-02-24T00:00:01.917000","2026-02-24T00:00:01.919000","2026-02-24T00:00:01.920000","2026-02-24T00:00:01.921000","2026-02-24T00:00:01.923000","2026-02-24T00:00:01.924000","2026-02-24T00:00:01.925000","2026-02-24T00:00:01.927000","2026-02-24T00:00:01.928000","2026-02-24T00:00:01.929000","2026-02-24T00:00:01.931000","2026-02-24T00:00:01.932000","2026-02-24T00:00:01.933000","2026-02-24T00:00:01.935000","2026-02-24T00:00:01.936000","2026-02-24T00:00:01.937000","2026-02-24T00:00:01.939000","2026-02-24T00:00:01.940000","2026-02-24T00:00:01.941000","2026-02-24T00:00:01.942000","2026-02-24T00:00:01.944000","2026-02-24T00:00:01.945000","2026-02-24T00:00:01.947000","2026-02-24T00:00:01.948000","2026-02-24T00:00:01.949000","2026-02-24T00:00:01.951000","2026-02-24T00:00:01.952000","2026-02-24T00:00:01.953000","2026-02-24T00:00:01.955000","2026-02-24T00:00:01.956000","2026-02-24T00:00:01.957000","2026-02-24T00:00:01.959000","2026-02-24T00:00:01.960000","2026-02-24T00:00:01.961000","2026-02-24T00:00:01.963000","2026-02-24T00:00:01.964000","2026-02-24T00:00:01.965000","2026-02-24T00:00:01.967000","2026-02-24T00:00:01.968000","2026-02-24T00:00:01.969000","2026-02-24T00:00:01.971000","2026-02-24T00:00:01.972000","2026-02-24T00:00:01.973000","2026-02-24T00:00:01.974000","2026-02-24T00:00:01.976000","2026-02-24T00:00:01.977000","2026-02-24T00:00:01.978000","2026-02-24T00:00:01.980000","2026-02-24T00:00:01.981000","2026-02-24T00:00:01.982000","2026-02-24T00:00:01.984000","2026-02-24T00:00:01.985000","2026-02-24T00:00:01.986000","2026-02-24T00:00:01.988000","2026-02-24T00:00:01.989000","2026-02-24T00:00:01.991000","2026-02-24T00:00:01.992000","2026-02-24T00:00:01.993000","2026-02-24T00:00:01.994000","2026-02-24T00:00:01.996000","2026-02-24T00:00:01.997000","2026-02-24T00:00:01.998000","2026-02-24T00:00:02.000000","2026-02-24T00:00:02.001000","2026-02-24T00:00:02.002000","2026-02-24T00:00:02.004000","2026-02-24T00:00:02.005000","2026-02-24T00:00:02.007000","2026-02-24T00:00:02.008000","2026-02-24T00:00:02.009000","2026-02-24T00:00:02.011000","2026-02-24T00:00:02.012000","2026-02-24T00:00:02.013000","2026-02-24T00:00:02.014000","2026-02-24T00:00:02.016000","2026-02-24T00:00:02.017000","2026-02-24T00:00:02.018000","2026-02-24T00:00:02.020000","2026-02-24T00:00:02.021000","2026-02-24T00:00:02.023000","2026-02-24T00:00:02.024000","2026-02-24T00:00:02.025000","2026-02-24T00:00:02.027000","2026-02-24T00:00:02.028000","2026-02-24T00:00:02.029000","2026-02-24T00:00:02.031000","2026-02-24T00:00:02.032000","2026-02-24T00:00:02.033000","2026-02-24T00:00:02.035000","2026-02-24T00:00:02.036000","2026-02-24T00:00:02.037000","2026-02-24T00:00:02.038000","2026-02-24T00:00:02.040000","2026-02-24T00:00:02.041000","2026-02-24T00:00:02.042000","2026-02-24T00:00:02.044000","2026-02-24T00:00:02.045000","2026-02-24T00:00:02.046000","2026-02-24T00:00:02.048000","2026-02-24T00:00:02.049000","2026-02-24T00:00:02.050000","2026-02-24T00:00:02.052000","2026-02-24T00:00:02.053000","2026-02-24T00:00:02.057000","2026-02-24T00:00:02.058000","2026-02-24T00:00:02.059000","2026-02-24T00:00:02.060000","2026-02-24T00:00:02.068000","2026-02-24T00:00:02.069000","2026-02-24T00:00:02.071000","2026-02-24T00:00:02.072000","2026-02-24T00:00:02.073000","2026-02-24T00:00:02.074000","2026-02-24T00:00:02.076000","2026-02-24T00:00:02.077000","2026-02-24T00:00:02.078000","2026-02-24T00:00:02.080000","2026-02-24T00:00:02.081000","2026-02-24T00:00:02.082000","2026-02-24T00:00:02.084000","2026-02-24T00:00:02.085000","2026-02-24T00:00:02.087000","2026-02-24T00:00:02.088000","2026-02-24T00:00:02.092000","2026-02-24T00:00:02.092000","2026-02-24T00:00:02.094000","2026-02-24T00:00:02.095000","2026-02-24T00:00:02.097000","2026-02-24T00:00:02.098000","2026-02-24T00:00:02.099000","2026-02-24T00:00:02.100000","2026-02-24T00:00:02.101000","2026-02-24T00:00:02.102000","2026-02-24T00:00:02.103000","2026-02-24T00:00:02.105000","2026-02-24T00:00:02.106000","2026-02-24T00:00:02.107000","2026-02-24T00:00:02.108000","2026-02-24T00:00:02.109000","2026-02-24T00:00:02.110000","2026-02-24T00:00:02.111000","2026-02-24T00:00:02.113000","2026-02-24T00:00:02.114000","2026-02-24T00:00:02.115000","2026-02-24T00:00:02.116000","2026-02-24T00:00:02.117000","2026-02-24T00:00:02.119000","2026-02-24T00:00:02.120000","2026-02-24T00:00:02.121000","2026-02-24T00:00:02.122000","2026-02-24T00:00:02.123000","2026-02-24T00:00:02.124000","2026-02-24T00:00:02.126000","2026-02-24T00:00:02.127000","2026-02-24T00:00:02.128000","2026-02-24T00:00:02.129000","2026-02-24T00:00:02.131000","2026-02-24T00:00:02.132000","2026-02-24T00:00:02.133000","2026-02-24T00:00:02.134000","2026-02-24T00:00:02.136000","2026-02-24T00:00:02.137000","2026-02-24T00:00:02.138000","2026-02-24T00:00:02.139000","2026-02-24T00:00:02.141000","2026-02-24T00:00:02.142000","2026-02-24T00:00:02.143000","2026-02-24T00:00:02.144000","2026-02-24T00:00:02.145000","2026-02-24T00:00:02.146000","2026-02-24T00:00:02.147000","2026-02-24T00:00:02.148000","2026-02-24T00:00:02.149000","2026-02-24T00:00:02.151000","2026-02-24T00:00:02.152000","2026-02-24T00:00:02.153000","2026-02-24T00:00:02.154000","2026-02-24T00:00:02.155000","2026-02-24T00:00:02.156000","2026-02-24T00:00:02.158000","2026-02-24T00:00:02.159000","2026-02-24T00:00:02.161000","2026-02-24T00:00:02.162000","2026-02-24T00:00:02.163000","2026-02-24T00:00:02.165000","2026-02-24T00:00:02.166000","2026-02-24T00:00:02.167000","2026-02-24T00:00:02.168000","2026-02-24T00:00:02.169000","2026-02-24T00:00:02.170000","2026-02-24T00:00:02.171000","2026-02-24T00:00:02.172000","2026-02-24T00:00:02.173000","2026-02-24T00:00:02.175000","2026-02-24T00:00:02.176000","2026-02-24T00:00:02.177000","2026-02-24T00:00:02.178000","2026-02-24T00:00:02.179000","2026-02-24T00:00:02.180000","2026-02-24T00:00:02.181000","2026-02-24T00:00:02.183000","2026-02-24T00:00:02.184000","2026-02-24T00:00:02.185000","2026-02-24T00:00:02.186000","2026-02-24T00:00:02.187000","2026-02-24T00:00:02.188000","2026-02-24T00:00:02.189000","2026-02-24T00:00:02.191000","2026-02-24T00:00:02.192000","2026-02-24T00:00:02.193000","2026-02-24T00:00:02.194000","2026-02-24T00:00:02.195000","2026-02-24T00:00:02.196000","2026-02-24T00:00:02.198000","2026-02-24T00:00:02.199000","2026-02-24T00:00:02.200000","2026-02-24T00:00:02.201000","2026-02-24T00:00:02.202000","2026-02-24T00:00:02.203000","2026-02-24T00:00:02.205000","2026-02-24T00:00:02.206000","2026-02-24T00:00:02.207000","2026-02-24T00:00:02.210000","2026-02-24T00:00:02.212000","2026-02-24T00:00:02.213000","2026-02-24T00:00:02.215000","2026-02-24T00:00:02.216000","2026-02-24T00:00:02.220000","2026-02-24T00:00:02.221000","2026-02-24T00:00:02.223000","2026-02-24T00:00:02.224000","2026-02-24T00:00:02.229000","2026-02-24T00:00:02.230000","2026-02-24T00:00:02.231000","2026-02-24T00:00:02.232000","2026-02-24T00:00:02.233000","2026-02-24T00:00:02.234000","2026-02-24T00:00:02.236000","2026-02-24T00:00:02.237000","2026-02-24T00:00:02.238000","2026-02-24T00:00:02.239000","2026-02-24T00:00:02.240000","2026-02-24T00:00:01.898000","2026-02-24T00:00:01.899000","2026-02-24T00:00:01.900000","2026-02-24T00:00:01.901000","2026-02-24T00:00:01.902000","2026-02-24T00:00:01.906000","2026-02-24T00:00:01.914000","2026-02-24T00:00:01.915000","2026-02-24T00:00:01.916000","2026-02-24T00:00:01.917000","2026-02-24T00:00:01.919000","2026-02-24T00:00:01.920000","2026-02-24T00:00:01.921000","2026-02-24T00:00:01.923000","2026-02-24T00:00:01.924000","2026-02-24T00:00:01.925000","2026-02-24T00:00:01.926000","2026-02-24T00:00:01.928000","2026-02-24T00:00:01.929000","2026-02-24T00:00:01.931000","2026-02-24T00:00:01.932000","2026-02-24T00:00:01.933000","2026-02-24T00:00:01.935000","2026-02-24T00:00:01.936000","2026-02-24T00:00:01.937000","2026-02-24T00:00:01.939000","2026-02-24T00:00:01.940000","2026-02-24T00:00:01.941000","2026-02-24T00:00:01.942000","2026-02-24T00:00:01.944000","2026-02-24T00:00:01.945000","2026-02-24T00:00:01.946000","2026-02-24T00:00:01.948000","2026-02-24T00:00:01.949000","2026-02-24T00:00:01.950000","2026-02-24T00:00:01.952000","2026-02-24T00:00:01.953000","2026-02-24T00:00:01.955000","2026-02-24T00:00:01.956000","2026-02-24T00:00:01.957000","2026-02-24T00:00:01.958000","2026-02-24T00:00:01.960000","2026-02-24T00:00:01.961000","2026-02-24T00:00:01.962000","2026-02-24T00:00:01.964000","2026-02-24T00:00:01.965000","2026-02-24T00:00:01.966000","2026-02-24T00:00:01.968000","2026-02-24T00:00:01.969000","2026-02-24T00:00:01.970000","2026-02-24T00:00:01.972000","2026-02-24T00:00:01.973000","2026-02-24T00:00:01.974000","2026-02-24T00:00:01.976000","2026-02-24T00:00:01.977000","2026-02-24T00:00:01.978000","2026-02-24T00:00:01.980000","2026-02-24T00:00:01.981000","2026-02-24T00:00:01.982000","2026-02-24T00:00:01.984000","2026-02-24T00:00:01.985000","2026-02-24T00:00:01.986000","2026-02-24T00:00:01.988000","2026-02-24T00:00:01.989000","2026-02-24T00:00:01.990000","2026-02-24T00:00:01.992000","2026-02-24T00:00:01.993000","2026-02-24T00:00:01.994000","2026-02-24T00:00:01.996000","2026-02-24T00:00:01.997000","2026-02-24T00:00:01.998000","2026-02-24T00:00:02.000000","2026-02-24T00:00:02.001000","2026-02-24T00:00:02.002000","2026-02-24T00:00:02.004000","2026-02-24T00:00:02.005000","2026-02-24T00:00:02.006000","2026-02-24T00:00:02.008000","2026-02-24T00:00:02.009000","2026-02-24T00:00:02.010000","2026-02-24T00:00:02.012000","2026-02-24T00:00:02.013000","2026-02-24T00:00:02.014000","2026-02-24T00:00:02.016000","2026-02-24T00:00:02.017000","2026-02-24T00:00:02.018000","2026-02-24T00:00:02.020000","2026-02-24T00:00:02.021000","2026-02-24T00:00:02.022000","2026-02-24T00:00:02.024000","2026-02-24T00:00:02.025000","2026-02-24T00:00:02.026000","2026-02-24T00:00:02.028000","2026-02-24T00:00:02.029000","2026-02-24T00:00:02.031000","2026-02-24T00:00:02.032000","2026-02-24T00:00:02.033000","2026-02-24T00:00:02.035000","2026-02-24T00:00:02.036000","2026-02-24T00:00:02.037000","2026-02-24T00:00:02.038000","2026-02-24T00:00:02.040000","2026-02-24T00:00:02.041000","2026-02-24T00:00:02.042000","2026-02-24T00:00:02.044000","2026-02-24T00:00:02.045000","2026-02-24T00:00:02.046000","2026-02-24T00:00:02.048000","2026-02-24T00:00:02.049000","2026-02-24T00:00:02.050000","2026-02-24T00:00:02.052000","2026-02-24T00:00:02.053000","2026-02-24T00:00:02.057000","2026-02-24T00:00:02.057000","2026-02-24T00:00:02.059000","2026-02-24T00:00:02.060000","2026-02-24T00:00:02.068000","2026-02-24T00:00:02.069000","2026-02-24T00:00:02.071000","2026-02-24T00:00:02.072000","2026-02-24T00:00:02.073000","2026-02-24T00:00:02.074000","2026-02-24T00:00:02.076000","2026-02-24T00:00:02.077000","2026-02-24T00:00:02.078000","2026-02-24T00:00:02.080000","2026-02-24T00:00:02.081000","2026-02-24T00:00:02.082000","2026-02-24T00:00:02.084000","2026-02-24T00:00:02.085000","2026-02-24T00:00:02.087000","2026-02-24T00:00:02.088000","2026-02-24T00:00:02.089000","2026-02-24T00:00:02.091000","2026-02-24T00:00:02.092000","2026-02-24T00:00:02.094000","2026-02-24T00:00:02.095000","2026-02-24T00:00:02.096000","2026-02-24T00:00:02.098000","2026-02-24T00:00:02.099000","2026-02-24T00:00:02.100000","2026-02-24T00:00:02.101000","2026-02-24T00:00:02.102000","2026-02-24T00:00:02.103000","2026-02-24T00:00:02.104000","2026-02-24T00:00:02.106000","2026-02-24T00:00:02.107000","2026-02-24T00:00:02.108000","2026-02-24T00:00:02.109000","2026-02-24T00:00:02.110000","2026-02-24T00:00:02.111000","2026-02-24T00:00:02.113000","2026-02-24T00:00:02.114000","2026-02-24T00:00:02.115000","2026-02-24T00:00:02.116000","2026-02-24T00:00:02.117000","2026-02-24T00:00:02.119000","2026-02-24T00:00:02.120000","2026-02-24T00:00:02.121000","2026-02-24T00:00:02.122000","2026-02-24T00:00:02.123000","2026-02-24T00:00:02.124000","2026-02-24T00:00:02.126000","2026-02-24T00:00:02.127000","2026-02-24T00:00:02.128000","2026-02-24T00:00:02.129000","2026-02-24T00:00:02.130000","2026-02-24T00:00:02.132000","2026-02-24T00:00:02.133000","2026-02-24T00:00:02.134000","2026-02-24T00:00:02.136000","2026-02-24T00:00:02.137000","2026-02-24T00:00:02.138000","2026-02-24T00:00:02.139000","2026-02-24T00:00:02.140000","2026-02-24T00:00:02.141000","2026-02-24T00:00:02.069000","2026-02-24T00:00:02.070000","2026-02-24T00:00:02.072000","2026-02-24T00:00:02.073000","2026-02-24T00:00:02.074000","2026-02-24T00:00:02.075000","2026-02-24T00:00:02.077000","2026-02-24T00:00:02.078000","2026-02-24T00:00:02.080000","2026-02-24T00:00:02.081000","2026-02-24T00:00:02.082000","2026-02-24T00:00:02.084000","2026-02-24T00:00:02.085000","2026-02-24T00:00:02.086000","2026-02-24T00:00:02.088000","2026-02-24T00:00:02.089000","2026-02-24T00:00:02.091000","2026-02-24T00:00:02.092000","2026-02-24T00:00:02.094000","2026-02-24T00:00:02.095000","2026-02-24T00:00:02.097000","2026-02-24T00:00:02.098000","2026-02-24T00:00:02.099000","2026-02-24T00:00:02.100000","2026-02-24T00:00:02.101000","2026-02-24T00:00:02.102000","2026-02-24T00:00:02.103000","2026-02-24T00:00:02.105000","2026-02-24T00:00:02.106000","2026-02-24T00:00:02.107000","2026-02-24T00:00:02.108000","2026-02-24T00:00:02.109000","2026-02-24T00:00:02.110000","2026-02-24T00:00:02.112000","2026-02-24T00:00:02.113000","2026-02-24T00:00:02.114000","2026-02-24T00:00:02.115000","2026-02-24T00:00:02.116000","2026-02-24T00:00:02.117000","2026-02-24T00:00:02.119000","2026-02-24T00:00:02.120000","2026-02-24T00:00:02.121000","2026-02-24T00:00:02.122000","2026-02-24T00:00:02.123000","2026-02-24T00:00:02.124000","2026-02-24T00:00:02.126000","2026-02-24T00:00:02.127000","2026-02-24T00:00:02.128000","2026-02-24T00:00:02.129000","2026-02-24T00:00:02.131000","2026-02-24T00:00:02.132000","2026-02-24T00:00:02.133000","2026-02-24T00:00:02.134000","2026-02-24T00:00:02.136000","2026-02-24T00:00:02.137000","2026-02-24T00:00:02.138000","2026-02-24T00:00:02.139000","2026-02-24T00:00:02.141000","2026-02-24T00:00:02.142000","2026-02-24T00:00:02.143000","2026-02-24T00:00:02.144000","2026-02-24T00:00:02.145000","2026-02-24T00:00:02.146000","2026-02-24T00:00:02.147000","2026-02-24T00:00:02.148000","2026-02-24T00:00:02.149000","2026-02-24T00:00:02.151000","2026-02-24T00:00:02.152000","2026-02-24T00:00:02.153000","2026-02-24T00:00:02.154000","2026-02-24T00:00:02.155000","2026-02-24T00:00:02.156000","2026-02-24T00:00:02.158000","2026-02-24T00:00:02.159000","2026-02-24T00:00:02.161000","2026-02-24T00:00:02.162000","2026-02-24T00:00:02.163000","2026-02-24T00:00:02.165000","2026-02-24T00:00:02.166000","2026-02-24T00:00:02.167000","2026-02-24T00:00:02.168000","2026-02-24T00:00:02.169000","2026-02-24T00:00:02.170000","2026-02-24T00:00:02.171000","2026-02-24T00:00:02.172000","2026-02-24T00:00:02.173000","2026-02-24T00:00:02.175000","2026-02-24T00:00:02.176000","2026-02-24T00:00:02.177000","2026-02-24T00:00:02.178000","2026-02-24T00:00:02.179000","2026-02-24T00:00:02.180000","2026-02-24T00:00:02.182000","2026-02-24T00:00:02.183000","2026-02-24T00:00:02.184000","2026-02-24T00:00:02.185000","2026-02-24T00:00:02.186000","2026-02-24T00:00:02.187000","2026-02-24T00:00:02.188000","2026-02-24T00:00:02.189000","2026-02-24T00:00:02.191000","2026-02-24T00:00:02.192000","2026-02-24T00:00:02.193000","2026-02-24T00:00:02.194000","2026-02-24T00:00:02.195000","2026-02-24T00:00:02.196000","2026-02-24T00:00:02.198000","2026-02-24T00:00:02.199000","2026-02-24T00:00:02.200000","2026-02-24T00:00:02.201000","2026-02-24T00:00:02.202000","2026-02-24T00:00:02.203000","2026-02-24T00:00:02.205000","2026-02-24T00:00:02.206000","2026-02-24T00:00:02.207000","2026-02-24T00:00:02.210000","2026-02-24T00:00:02.212000","2026-02-24T00:00:02.213000","2026-02-24T00:00:02.215000","2026-02-24T00:00:02.216000","2026-02-24T00:00:02.220000","2026-02-24T00:00:02.221000","2026-02-24T00:00:02.223000","2026-02-24T00:00:02.224000","2026-02-24T00:00:02.229000","2026-02-24T00:00:02.230000","2026-02-24T00:00:02.231000","2026-02-24T00:00:02.232000","2026-02-24T00:00:02.233000","2026-02-24T00:00:02.234000","2026-02-24T00:00:02.236000","2026-02-24T00:00:02.237000","2026-02-24T00:00:02.238000","2026-02-24T00:00:02.239000","2026-02-24T00:00:02.240000","2026-02-24T00:00:02.242000","2026-02-24T00:00:02.243000","2026-02-24T00:00:02.244000","2026-02-24T00:00:02.245000","2026-02-24T00:00:02.246000","2026-02-24T00:00:02.247000","2026-02-24T00:00:02.248000","2026-02-24T00:00:02.249000","2026-02-24T00:00:02.250000","2026-02-24T00:00:02.251000","2026-02-24T00:00:02.253000","2026-02-24T00:00:02.254000","2026-02-24T00:00:02.255000","2026-02-24T00:00:02.256000","2026-02-24T00:00:02.257000","2026-02-24T00:00:02.258000","2026-02-24T00:00:02.259000","2026-02-24T00:00:02.260000","2026-02-24T00:00:02.261000","2026-02-24T00:00:02.263000","2026-02-24T00:00:02.264000","2026-02-24T00:00:02.265000","2026-02-24T00:00:02.266000","2026-02-24T00:00:02.267000","2026-02-24T00:00:02.268000","2026-02-24T00:00:02.269000","2026-02-24T00:00:02.270000","2026-02-24T00:00:02.271000","2026-02-24T00:00:02.273000","2026-02-24T00:00:02.274000","2026-02-24T00:00:02.275000","2026-02-24T00:00:02.276000","2026-02-24T00:00:02.277000","2026-02-24T00:00:02.278000","2026-02-24T00:00:02.279000","2026-02-24T00:00:02.280000","2026-02-24T00:00:02.281000","2026-02-24T00:00:02.283000","2026-02-24T00:00:02.284000","2026-02-24T00:00:02.285000","2026-02-24T00:00:02.286000","2026-02-24T00:00:02.287000","2026-02-24T00:00:02.288000","2026-02-24T00:00:02.289000","2026-02-24T00:00:02.290000","2026-02-24T00:00:02.292000","2026-02-24T00:00:02.293000","2026-02-24T00:00:02.294000","2026-02-24T00:00:02.295000","2026-02-24T00:00:02.296000","2026-02-24T00:00:02.297000","2026-02-24T00:00:02.298000","2026-02-24T00:00:02.299000","2026-02-24T00:00:02.300000","2026-02-24T00:00:02.302000","2026-02-24T00:00:02.303000","2026-02-24T00:00:02.304000","2026-02-24T00:00:02.305000","2026-02-24T00:00:02.306000","2026-02-24T00:00:02.307000","2026-02-24T00:00:02.308000","2026-02-24T00:00:02.309000","2026-02-24T00:00:02.310000","2026-02-24T00:00:02.312000","2026-02-24T00:00:02.313000","2026-02-24T00:00:02.314000","2026-02-24T00:00:02.315000","2026-02-24T00:00:02.316000","2026-02-24T00:00:02.317000","2026-02-24T00:00:02.318000","2026-02-24T00:00:02.319000","2026-02-24T00:00:02.321000","2026-02-24T00:00:02.322000","2026-02-24T00:00:02.323000","2026-02-24T00:00:02.324000","2026-02-24T00:00:02.325000","2026-02-24T00:00:02.326000","2026-02-24T00:00:02.327000","2026-02-24T00:00:02.328000","2026-02-24T00:00:02.329000","2026-02-24T00:00:02.331000","2026-02-24T00:00:02.332000","2026-02-24T00:00:02.333000","2026-02-24T00:00:02.334000","2026-02-24T00:00:02.335000","2026-02-24T00:00:02.336000","2026-02-24T00:00:02.337000","2026-02-24T00:00:02.338000","2026-02-24T00:00:02.339000","2026-02-24T00:00:02.341000","2026-02-24T00:00:02.342000","2026-02-24T00:00:02.343000","2026-02-24T00:00:02.344000","2026-02-24T00:00:02.345000","2026-02-24T00:00:02.346000","2026-02-24T00:00:02.347000","2026-02-24T00:00:02.348000","2026-02-24T00:00:02.349000","2026-02-24T00:00:02.351000","2026-02-24T00:00:02.352000","2026-02-24T00:00:02.353000","2026-02-24T00:00:02.354000","2026-02-24T00:00:02.355000","2026-02-24T00:00:02.356000","2026-02-24T00:00:02.357000","2026-02-24T00:00:02.358000","2026-02-24T00:00:02.360000","2026-02-24T00:00:02.361000","2026-02-24T00:00:02.362000","2026-02-24T00:00:02.363000","2026-02-24T00:00:02.364000","2026-02-24T00:00:02.365000","2026-02-24T00:00:02.366000","2026-02-24T00:00:02.368000","2026-02-24T00:00:02.369000","2026-02-24T00:00:02.370000","2026-02-24T00:00:02.371000","2026-02-24T00:00:02.372000","2026-02-24T00:00:02.373000","2026-02-24T00:00:02.375000","2026-02-24T00:00:02.376000","2026-02-24T00:00:02.377000","2026-02-24T00:00:02.378000","2026-02-24T00:00:02.379000","2026-02-24T00:00:02.380000","2026-02-24T00:00:02.381000","2026-02-24T00:00:02.383000","2026-02-24T00:00:02.384000","2026-02-24T00:00:02.385000","2026-02-24T00:00:02.386000","2026-02-24T00:00:02.387000","2026-02-24T00:00:02.389000","2026-02-24T00:00:02.390000","2026-02-24T00:00:02.391000","2026-02-24T00:00:02.392000","2026-02-24T00:00:02.393000","2026-02-24T00:00:02.394000","2026-02-24T00:00:02.395000","2026-02-24T00:00:02.230000","2026-02-24T00:00:02.231000","2026-02-24T00:00:02.232000","2026-02-24T00:00:02.233000","2026-02-24T00:00:02.234000","2026-02-24T00:00:02.236000","2026-02-24T00:00:02.237000","2026-02-24T00:00:02.238000","2026-02-24T00:00:02.239000","2026-02-24T00:00:02.240000","2026-02-24T00:00:02.242000","2026-02-24T00:00:02.243000","2026-02-24T00:00:02.244000","2026-02-24T00:00:02.245000","2026-02-24T00:00:02.246000","2026-02-24T00:00:02.247000","2026-02-24T00:00:02.248000","2026-02-24T00:00:02.249000","2026-02-24T00:00:02.250000","2026-02-24T00:00:02.251000","2026-02-24T00:00:02.252000","2026-02-24T00:00:02.254000","2026-02-24T00:00:02.255000","2026-02-24T00:00:02.256000","2026-02-24T00:00:02.257000","2026-02-24T00:00:02.258000","2026-02-24T00:00:02.259000","2026-02-24T00:00:02.260000","2026-02-24T00:00:02.261000","2026-02-24T00:00:02.262000","2026-02-24T00:00:02.264000","2026-02-24T00:00:02.265000","2026-02-24T00:00:02.266000","2026-02-24T00:00:02.267000","2026-02-24T00:00:02.268000","2026-02-24T00:00:02.269000","2026-02-24T00:00:02.270000","2026-02-24T00:00:02.271000","2026-02-24T00:00:02.273000","2026-02-24T00:00:02.274000","2026-02-24T00:00:02.275000","2026-02-24T00:00:02.276000","2026-02-24T00:00:02.277000","2026-02-24T00:00:02.278000","2026-02-24T00:00:02.279000","2026-02-24T00:00:02.280000","2026-02-24T00:00:02.281000","2026-02-24T00:00:02.283000","2026-02-24T00:00:02.284000","2026-02-24T00:00:02.285000","2026-02-24T00:00:02.286000","2026-02-24T00:00:02.287000","2026-02-24T00:00:02.288000","2026-02-24T00:00:02.289000","2026-02-24T00:00:02.290000","2026-02-24T00:00:02.291000","2026-02-24T00:00:02.293000","2026-02-24T00:00:02.294000","2026-02-24T00:00:02.295000","2026-02-24T00:00:02.296000","2026-02-24T00:00:02.297000","2026-02-24T00:00:02.298000","2026-02-24T00:00:02.299000","2026-02-24T00:00:02.300000","2026-02-24T00:00:02.301000","2026-02-24T00:00:02.303000","2026-02-24T00:00:02.304000","2026-02-24T00:00:02.305000","2026-02-24T00:00:02.306000","2026-02-24T00:00:02.307000","2026-02-24T00:00:02.308000","2026-02-24T00:00:02.309000","2026-02-24T00:00:02.310000","2026-02-24T00:00:02.312000","2026-02-24T00:00:02.313000","2026-02-24T00:00:02.314000","2026-02-24T00:00:02.315000","2026-02-24T00:00:02.316000","2026-02-24T00:00:02.317000","2026-02-24T00:00:02.318000","2026-02-24T00:00:02.319000","2026-02-24T00:00:02.320000","2026-02-24T00:00:02.322000","2026-02-24T00:00:02.323000","2026-02-24T00:00:02.324000","2026-02-24T00:00:02.325000","2026-02-24T00:00:02.326000","2026-02-24T00:00:02.327000","2026-02-24T00:00:02.328000","2026-02-24T00:00:02.329000","2026-02-24T00:00:02.330000","2026-02-24T00:00:02.332000","2026-02-24T00:00:02.333000","2026-02-24T00:00:02.334000","2026-02-24T00:00:02.335000","2026-02-24T00:00:02.336000","2026-02-24T00:00:02.337000","2026-02-24T00:00:02.338000","2026-02-24T00:00:02.339000","2026-02-24T00:00:02.340000","2026-02-24T00:00:02.342000","2026-02-24T00:00:02.343000","2026-02-24T00:00:02.344000","2026-02-24T00:00:02.345000","2026-02-24T00:00:02.346000","2026-02-24T00:00:02.347000","2026-02-24T00:00:02.348000","2026-02-24T00:00:02.349000","2026-02-24T00:00:02.351000","2026-02-24T00:00:02.352000","2026-02-24T00:00:02.353000","2026-02-24T00:00:02.354000","2026-02-24T00:00:02.355000","2026-02-24T00:00:02.356000","2026-02-24T00:00:02.357000","2026-02-24T00:00:02.358000","2026-02-24T00:00:02.359000","2026-02-24T00:00:02.361000","2026-02-24T00:00:02.362000","2026-02-24T00:00:02.363000","2026-02-24T00:00:02.364000","2026-02-24T00:00:02.365000","2026-02-24T00:00:02.366000","2026-02-24T00:00:02.367000","2026-02-24T00:00:02.369000","2026-02-24T00:00:02.370000","2026-02-24T00:00:02.371000","2026-02-24T00:00:02.372000","2026-02-24T00:00:02.373000","2026-02-24T00:00:02.375000","2026-02-24T00:00:02.376000","2026-02-24T00:00:02.377000","2026-02-24T00:00:02.378000","2026-02-24T00:00:02.379000","2026-02-24T00:00:02.380000","2026-02-24T00:00:02.381000","2026-02-24T00:00:02.383000","2026-02-24T00:00:02.384000","2026-02-24T00:00:02.385000","2026-02-24T00:00:02.386000","2026-02-24T00:00:02.387000","2026-02-24T00:00:02.389000","2026-02-24T00:00:02.390000","2026-02-24T00:00:02.391000","2026-02-24T00:00:02.392000","2026-02-24T00:00:02.393000","2026-02-24T00:00:02.394000","2026-02-24T00:00:02.395000","2026-02-24T00:00:02.397000","2026-02-24T00:00:02.398000","2026-02-24T00:00:02.399000","2026-02-24T00:00:02.400000","2026-02-24T00:00:02.401000","2026-02-24T00:00:02.402000","2026-02-24T00:00:02.403000","2026-02-24T00:00:02.404000","2026-02-24T00:00:02.405000","2026-02-24T00:00:02.406000","2026-02-24T00:00:02.407000","2026-02-24T00:00:02.409000","2026-02-24T00:00:02.410000","2026-02-24T00:00:02.412000","2026-02-24T00:00:02.413000","2026-02-24T00:00:02.415000","2026-02-24T00:00:02.417000","2026-02-24T00:00:02.418000","2026-02-24T00:00:02.419000","2026-02-24T00:00:02.420000","2026-02-24T00:00:02.421000","2026-02-24T00:00:02.422000","2026-02-24T00:00:02.423000","2026-02-24T00:00:02.424000","2026-02-24T00:00:02.425000","2026-02-24T00:00:02.426000","2026-02-24T00:00:02.427000","2026-02-24T00:00:02.428000","2026-02-24T00:00:02.429000","2026-02-24T00:00:02.430000","2026-02-24T00:00:02.431000","2026-02-24T00:00:02.432000","2026-02-24T00:00:02.433000","2026-02-24T00:00:02.435000","2026-02-24T00:00:02.436000","2026-02-24T00:00:02.437000","2026-02-24T00:00:02.438000","2026-02-24T00:00:02.439000","2026-02-24T00:00:02.440000","2026-02-24T00:00:02.441000","2026-02-24T00:00:02.443000","2026-02-24T00:00:02.443000","2026-02-24T00:00:02.444000","2026-02-24T00:00:02.445000","2026-02-24T00:00:02.447000","2026-02-24T00:00:02.450000","2026-02-24T00:00:02.451000","2026-02-24T00:00:02.452000","2026-02-24T00:00:02.454000","2026-02-24T00:00:02.455000","2026-02-24T00:00:02.457000","2026-02-24T00:00:02.458000","2026-02-24T00:00:02.460000","2026-02-24T00:00:02.461000","2026-02-24T00:00:02.462000","2026-02-24T00:00:02.463000","2026-02-24T00:00:02.464000","2026-02-24T00:00:02.466000","2026-02-24T00:00:02.468000","2026-02-24T00:00:02.469000","2026-02-24T00:00:02.470000","2026-02-24T00:00:02.471000","2026-02-24T00:00:02.472000","2026-02-24T00:00:02.474000","2026-02-24T00:00:02.475000","2026-02-24T00:00:02.476000","2026-02-24T00:00:02.477000","2026-02-24T00:00:02.478000","2026-02-24T00:00:02.480000","2026-02-24T00:00:02.481000","2026-02-24T00:00:02.482000","2026-02-24T00:00:02.483000","2026-02-24T00:00:02.484000","2026-02-24T00:00:02.486000","2026-02-24T00:00:02.487000","2026-02-24T00:00:02.488000","2026-02-24T00:00:02.489000","2026-02-24T00:00:02.490000","2026-02-24T00:00:02.492000","2026-02-24T00:00:02.493000","2026-02-24T00:00:02.494000","2026-02-24T00:00:02.495000","2026-02-24T00:00:02.497000","2026-02-24T00:00:02.498000","2026-02-24T00:00:02.500000","2026-02-24T00:00:02.501000","2026-02-24T00:00:02.503000","2026-02-24T00:00:02.504000","2026-02-24T00:00:02.506000","2026-02-24T00:00:02.507000","2026-02-24T00:00:02.507000","2026-02-24T00:00:02.508000","2026-02-24T00:00:02.510000","2026-02-24T00:00:02.511000","2026-02-24T00:00:02.512000","2026-02-24T00:00:02.513000","2026-02-24T00:00:02.514000","2026-02-24T00:00:02.515000","2026-02-24T00:00:02.516000","2026-02-24T00:00:02.517000","2026-02-24T00:00:02.518000","2026-02-24T00:00:02.520000","2026-02-24T00:00:02.521000","2026-02-24T00:00:02.522000","2026-02-24T00:00:02.523000","2026-02-24T00:00:02.524000","2026-02-24T00:00:02.525000","2026-02-24T00:00:02.526000","2026-02-24T00:00:02.528000","2026-02-24T00:00:02.529000","2026-02-24T00:00:02.530000","2026-02-24T00:00:02.531000","2026-02-24T00:00:02.532000","2026-02-24T00:00:02.533000","2026-02-24T00:00:02.534000","2026-02-24T00:00:02.536000","2026-02-24T00:00:02.537000","2026-02-24T00:00:02.538000","2026-02-24T00:00:02.539000","2026-02-24T00:00:02.540000","2026-02-24T00:00:02.541000","2026-02-24T00:00:02.542000","2026-02-24T00:00:02.543000","2026-02-24T00:00:02.544000","2026-02-24T00:00:02.545000","2026-02-24T00:00:02.546000","2026-02-24T00:00:02.548000","2026-02-24T00:00:02.549000","2026-02-24T00:00:02.550000","2026-02-24T00:00:02.551000","2026-02-24T00:00:02.552000","2026-02-24T00:00:02.553000","2026-02-24T00:00:02.554000","2026-02-24T00:00:02.556000","2026-02-24T00:00:02.557000","2026-02-24T00:00:02.558000","2026-02-24T00:00:02.559000","2026-02-24T00:00:02.560000","2026-02-24T00:00:02.561000","2026-02-24T00:00:02.562000","2026-02-24T00:00:02.563000","2026-02-24T00:00:02.565000","2026-02-24T00:00:02.566000","2026-02-24T00:00:02.567000","2026-02-24T00:00:02.568000","2026-02-24T00:00:02.569000","2026-02-24T00:00:02.570000","2026-02-24T00:00:02.571000","2026-02-24T00:00:02.572000","2026-02-24T00:00:02.573000","2026-02-24T00:00:02.575000","2026-02-24T00:00:02.576000","2026-02-24T00:00:02.577000","2026-02-24T00:00:02.578000","2026-02-24T00:00:02.580000","2026-02-24T00:00:02.581000","2026-02-24T00:00:02.582000","2026-02-24T00:00:02.583000","2026-02-24T00:00:02.584000","2026-02-24T00:00:02.585000","2026-02-24T00:00:02.586000","2026-02-24T00:00:02.587000","2026-02-24T00:00:02.588000","2026-02-24T00:00:02.589000","2026-02-24T00:00:02.590000","2026-02-24T00:00:02.591000","2026-02-24T00:00:02.593000","2026-02-24T00:00:02.594000","2026-02-24T00:00:02.595000","2026-02-24T00:00:02.596000","2026-02-24T00:00:02.597000","2026-02-24T00:00:02.598000","2026-02-24T00:00:02.599000","2026-02-24T00:00:02.600000","2026-02-24T00:00:02.602000","2026-02-24T00:00:02.603000","2026-02-24T00:00:02.604000","2026-02-24T00:00:02.605000","2026-02-24T00:00:02.606000","2026-02-24T00:00:02.607000","2026-02-24T00:00:02.608000","2026-02-24T00:00:02.610000","2026-02-24T00:00:02.611000","2026-02-24T00:00:02.612000","2026-02-24T00:00:02.451000","2026-02-24T00:00:02.452000","2026-02-24T00:00:02.454000","2026-02-24T00:00:02.455000","2026-02-24T00:00:02.457000","2026-02-24T00:00:02.458000","2026-02-24T00:00:02.460000","2026-02-24T00:00:02.461000","2026-02-24T00:00:02.462000","2026-02-24T00:00:02.463000","2026-02-24T00:00:02.464000","2026-02-24T00:00:02.466000","2026-02-24T00:00:02.468000","2026-02-24T00:00:02.469000","2026-02-24T00:00:02.470000","2026-02-24T00:00:02.472000","2026-02-24T00:00:02.473000","2026-02-24T00:00:02.474000","2026-02-24T00:00:02.475000","2026-02-24T00:00:02.476000","2026-02-24T00:00:02.477000","2026-02-24T00:00:02.478000","2026-02-24T00:00:02.480000","2026-02-24T00:00:02.481000","2026-02-24T00:00:02.482000","2026-02-24T00:00:02.483000","2026-02-24T00:00:02.484000","2026-02-24T00:00:02.486000","2026-02-24T00:00:02.487000","2026-02-24T00:00:02.488000","2026-02-24T00:00:02.489000","2026-02-24T00:00:02.490000","2026-02-24T00:00:02.492000","2026-02-24T00:00:02.493000","2026-02-24T00:00:02.494000","2026-02-24T00:00:02.495000","2026-02-24T00:00:02.874000","2026-02-24T00:00:02.874000","2026-02-24T00:00:02.875000","2026-02-24T00:00:02.876000","2026-02-24T00:00:02.878000","2026-02-24T00:00:02.879000","2026-02-24T00:00:02.880000","2026-02-24T00:00:02.881000","2026-02-24T00:00:02.882000","2026-02-24T00:00:02.884000","2026-02-24T00:00:02.885000","2026-02-24T00:00:02.886000","2026-02-24T00:00:02.888000","2026-02-24T00:00:02.889000","2026-02-24T00:00:02.890000","2026-02-24T00:00:02.892000","2026-02-24T00:00:02.893000","2026-02-24T00:00:02.894000","2026-02-24T00:00:02.895000","2026-02-24T00:00:02.897000","2026-02-24T00:00:02.898000","2026-02-24T00:00:02.899000","2026-02-24T00:00:02.900000","2026-02-24T00:00:02.902000","2026-02-24T00:00:02.903000","2026-02-24T00:00:02.904000","2026-02-24T00:00:02.906000","2026-02-24T00:00:02.907000","2026-02-24T00:00:02.908000","2026-02-24T00:00:02.909000","2026-02-24T00:00:02.911000","2026-02-24T00:00:02.912000","2026-02-24T00:00:02.913000","2026-02-24T00:00:02.915000","2026-02-24T00:00:02.916000","2026-02-24T00:00:02.917000","2026-02-24T00:00:02.918000","2026-02-24T00:00:02.920000","2026-02-24T00:00:02.921000","2026-02-24T00:00:02.922000","2026-02-24T00:00:02.924000","2026-02-24T00:00:02.925000","2026-02-24T00:00:02.926000","2026-02-24T00:00:02.927000","2026-02-24T00:00:02.929000","2026-02-24T00:00:02.930000","2026-02-24T00:00:02.931000","2026-02-24T00:00:02.933000","2026-02-24T00:00:02.934000","2026-02-24T00:00:02.935000","2026-02-24T00:00:02.936000","2026-02-24T00:00:02.938000","2026-02-24T00:00:02.939000","2026-02-24T00:00:02.940000","2026-02-24T00:00:02.942000","2026-02-24T00:00:02.943000","2026-02-24T00:00:02.944000","2026-02-24T00:00:02.945000","2026-02-24T00:00:02.947000","2026-02-24T00:00:02.948000","2026-02-24T00:00:02.949000","2026-02-24T00:00:02.950000","2026-02-24T00:00:02.952000","2026-02-24T00:00:02.953000","2026-02-24T00:00:02.954000","2026-02-24T00:00:02.956000","2026-02-24T00:00:02.957000","2026-02-24T00:00:02.958000","2026-02-24T00:00:02.959000","2026-02-24T00:00:02.961000","2026-02-24T00:00:02.962000","2026-02-24T00:00:02.963000","2026-02-24T00:00:02.965000","2026-02-24T00:00:02.966000","2026-02-24T00:00:02.968000","2026-02-24T00:00:02.969000","2026-02-24T00:00:02.970000","2026-02-24T00:00:02.971000","2026-02-24T00:00:02.973000","2026-02-24T00:00:02.974000","2026-02-24T00:00:02.975000","2026-02-24T00:00:02.976000","2026-02-24T00:00:02.977000","2026-02-24T00:00:02.979000","2026-02-24T00:00:02.980000","2026-02-24T00:00:02.981000","2026-02-24T00:00:02.983000","2026-02-24T00:00:02.984000","2026-02-24T00:00:02.985000","2026-02-24T00:00:02.986000","2026-02-24T00:00:02.988000","2026-02-24T00:00:02.989000","2026-02-24T00:00:02.990000","2026-02-24T00:00:02.991000","2026-02-24T00:00:02.993000","2026-02-24T00:00:02.994000","2026-02-24T00:00:02.995000","2026-02-24T00:00:02.997000","2026-02-24T00:00:02.998000","2026-02-24T00:00:02.999000","2026-02-24T00:00:03.000000","2026-02-24T00:00:03.002000","2026-02-24T00:00:03.003000","2026-02-24T00:00:03.004000","2026-02-24T00:00:03.006000","2026-02-24T00:00:03.007000","2026-02-24T00:00:03.008000","2026-02-24T00:00:03.009000","2026-02-24T00:00:03.011000","2026-02-24T00:00:03.012000","2026-02-24T00:00:03.013000","2026-02-24T00:00:03.015000","2026-02-24T00:00:03.016000","2026-02-24T00:00:03.017000","2026-02-24T00:00:03.019000","2026-02-24T00:00:03.020000","2026-02-24T00:00:03.021000","2026-02-24T00:00:03.023000","2026-02-24T00:00:03.024000","2026-02-24T00:00:03.025000","2026-02-24T00:00:03.027000","2026-02-24T00:00:03.028000","2026-02-24T00:00:03.029000","2026-02-24T00:00:03.031000","2026-02-24T00:00:03.032000","2026-02-24T00:00:03.033000","2026-02-24T00:00:03.035000","2026-02-24T00:00:03.036000","2026-02-24T00:00:03.037000","2026-02-24T00:00:03.039000","2026-02-24T00:00:03.040000","2026-02-24T00:00:03.048000","2026-02-24T00:00:03.049000","2026-02-24T00:00:03.050000","2026-02-24T00:00:03.051000","2026-02-24T00:00:03.052000","2026-02-24T00:00:03.054000","2026-02-24T00:00:03.055000","2026-02-24T00:00:03.057000","2026-02-24T00:00:03.058000","2026-02-24T00:00:03.059000","2026-02-24T00:00:03.061000","2026-02-24T00:00:03.062000","2026-02-24T00:00:03.064000","2026-02-24T00:00:03.065000","2026-02-24T00:00:03.066000","2026-02-24T00:00:03.068000","2026-02-24T00:00:03.069000","2026-02-24T00:00:03.070000","2026-02-24T00:00:03.072000","2026-02-24T00:00:03.073000","2026-02-24T00:00:03.074000","2026-02-24T00:00:03.076000","2026-02-24T00:00:03.077000","2026-02-24T00:00:03.079000","2026-02-24T00:00:03.080000","2026-02-24T00:00:03.081000","2026-02-24T00:00:03.083000","2026-02-24T00:00:03.084000","2026-02-24T00:00:03.086000","2026-02-24T00:00:03.087000","2026-02-24T00:00:03.089000","2026-02-24T00:00:03.090000","2026-02-24T00:00:03.091000","2026-02-24T00:00:03.092000","2026-02-24T00:00:03.094000","2026-02-24T00:00:03.095000","2026-02-24T00:00:03.096000","2026-02-24T00:00:03.098000","2026-02-24T00:00:03.100000","2026-02-24T00:00:03.101000","2026-02-24T00:00:03.103000","2026-02-24T00:00:03.104000","2026-02-24T00:00:03.105000","2026-02-24T00:00:03.106000","2026-02-24T00:00:03.112000","2026-02-24T00:00:03.113000","2026-02-24T00:00:03.114000","2026-02-24T00:00:03.116000","2026-02-24T00:00:03.117000","2026-02-24T00:00:03.119000","2026-02-24T00:00:03.120000","2026-02-24T00:00:03.121000","2026-02-24T00:00:03.123000","2026-02-24T00:00:03.124000","2026-02-24T00:00:03.126000","2026-02-24T00:00:03.127000","2026-02-24T00:00:03.128000","2026-02-24T00:00:03.130000","2026-02-24T00:00:03.131000","2026-02-24T00:00:03.132000","2026-02-24T00:00:03.134000","2026-02-24T00:00:03.135000","2026-02-24T00:00:03.137000","2026-02-24T00:00:03.138000","2026-02-24T00:00:03.139000","2026-02-24T00:00:03.141000","2026-02-24T00:00:03.142000","2026-02-24T00:00:03.144000","2026-02-24T00:00:03.145000","2026-02-24T00:00:03.146000","2026-02-24T00:00:03.148000","2026-02-24T00:00:03.149000","2026-02-24T00:00:03.150000","2026-02-24T00:00:03.152000","2026-02-24T00:00:03.153000","2026-02-24T00:00:03.155000","2026-02-24T00:00:03.156000","2026-02-24T00:00:03.157000","2026-02-24T00:00:03.159000","2026-02-24T00:00:03.160000","2026-02-24T00:00:03.162000","2026-02-24T00:00:03.163000","2026-02-24T00:00:03.164000","2026-02-24T00:00:03.166000","2026-02-24T00:00:03.167000","2026-02-24T00:00:03.169000","2026-02-24T00:00:03.170000","2026-02-24T00:00:03.171000","2026-02-24T00:00:03.173000","2026-02-24T00:00:03.174000","2026-02-24T00:00:03.176000","2026-02-24T00:00:03.177000","2026-02-24T00:00:03.178000","2026-02-24T00:00:03.180000","2026-02-24T00:00:03.181000","2026-02-24T00:00:03.183000","2026-02-24T00:00:03.184000","2026-02-24T00:00:03.185000","2026-02-24T00:00:03.187000","2026-02-24T00:00:03.188000","2026-02-24T00:00:03.189000","2026-02-24T00:00:03.191000","2026-02-24T00:00:03.192000","2026-02-24T00:00:03.194000","2026-02-24T00:00:03.195000","2026-02-24T00:00:03.196000","2026-02-24T00:00:03.198000","2026-02-24T00:00:03.199000","2026-02-24T00:00:03.201000","2026-02-24T00:00:03.202000","2026-02-24T00:00:03.203000","2026-02-24T00:00:03.205000","2026-02-24T00:00:03.206000","2026-02-24T00:00:03.208000","2026-02-24T00:00:03.209000","2026-02-24T00:00:03.211000","2026-02-24T00:00:03.212000","2026-02-24T00:00:03.214000","2026-02-24T00:00:03.215000","2026-02-24T00:00:03.216000","2026-02-24T00:00:03.218000","2026-02-24T00:00:03.219000","2026-02-24T00:00:03.221000","2026-02-24T00:00:03.222000","2026-02-24T00:00:03.224000","2026-02-24T00:00:03.225000","2026-02-24T00:00:03.227000","2026-02-24T00:00:03.228000","2026-02-24T00:00:03.229000","2026-02-24T00:00:03.231000","2026-02-24T00:00:03.232000","2026-02-24T00:00:03.234000","2026-02-24T00:00:03.235000","2026-02-24T00:00:03.236000","2026-02-24T00:00:03.238000","2026-02-24T00:00:03.239000","2026-02-24T00:00:03.241000","2026-02-24T00:00:03.242000","2026-02-24T00:00:03.244000","2026-02-24T00:00:03.245000","2026-02-24T00:00:03.247000","2026-02-24T00:00:03.248000","2026-02-24T00:00:03.250000","2026-02-24T00:00:03.251000","2026-02-24T00:00:03.252000","2026-02-24T00:00:03.254000","2026-02-24T00:00:03.255000","2026-02-24T00:00:03.257000","2026-02-24T00:00:03.258000","2026-02-24T00:00:03.260000","2026-02-24T00:00:03.261000","2026-02-24T00:00:03.263000","2026-02-24T00:00:03.264000","2026-02-24T00:00:03.265000","2026-02-24T00:00:03.267000","2026-02-24T00:00:03.268000","2026-02-24T00:00:03.270000","2026-02-24T00:00:03.271000","2026-02-24T00:00:03.273000","2026-02-24T00:00:03.279000","2026-02-24T00:00:03.281000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.285000","2026-02-24T00:00:03.286000","2026-02-24T00:00:03.288000","2026-02-24T00:00:03.289000","2026-02-24T00:00:03.290000","2026-02-24T00:00:03.292000","2026-02-24T00:00:03.293000","2026-02-24T00:00:03.295000","2026-02-24T00:00:03.296000","2026-02-24T00:00:03.298000","2026-02-24T00:00:03.299000","2026-02-24T00:00:03.301000","2026-02-24T00:00:03.302000","2026-02-24T00:00:03.303000","2026-02-24T00:00:03.305000","2026-02-24T00:00:03.306000","2026-02-24T00:00:03.308000","2026-02-24T00:00:03.309000","2026-02-24T00:00:03.311000","2026-02-24T00:00:03.312000","2026-02-24T00:00:03.314000","2026-02-24T00:00:03.315000","2026-02-24T00:00:03.317000","2026-02-24T00:00:03.319000","2026-02-24T00:00:03.320000","2026-02-24T00:00:03.321000","2026-02-24T00:00:03.322000","2026-02-24T00:00:03.324000","2026-02-24T00:00:03.325000","2026-02-24T00:00:03.327000","2026-02-24T00:00:03.328000","2026-02-24T00:00:03.329000","2026-02-24T00:00:03.331000","2026-02-24T00:00:03.332000","2026-02-24T00:00:03.334000","2026-02-24T00:00:03.335000","2026-02-24T00:00:03.337000","2026-02-24T00:00:03.338000","2026-02-24T00:00:03.340000","2026-02-24T00:00:03.347000","2026-02-24T00:00:03.348000","2026-02-24T00:00:03.349000","2026-02-24T00:00:03.350000","2026-02-24T00:00:03.352000","2026-02-24T00:00:03.353000","2026-02-24T00:00:03.355000","2026-02-24T00:00:03.356000","2026-02-24T00:00:03.358000","2026-02-24T00:00:03.359000","2026-02-24T00:00:03.360000","2026-02-24T00:00:03.362000","2026-02-24T00:00:03.363000","2026-02-24T00:00:03.365000","2026-02-24T00:00:03.366000","2026-02-24T00:00:03.368000","2026-02-24T00:00:03.369000","2026-02-24T00:00:03.371000","2026-02-24T00:00:03.372000","2026-02-24T00:00:03.373000","2026-02-24T00:00:03.375000","2026-02-24T00:00:03.376000","2026-02-24T00:00:03.378000","2026-02-24T00:00:03.379000","2026-02-24T00:00:03.381000","2026-02-24T00:00:03.382000","2026-02-24T00:00:03.384000","2026-02-24T00:00:03.385000","2026-02-24T00:00:03.386000","2026-02-24T00:00:03.388000","2026-02-24T00:00:03.389000","2026-02-24T00:00:03.391000","2026-02-24T00:00:03.393000","2026-02-24T00:00:03.394000","2026-02-24T00:00:03.396000","2026-02-24T00:00:03.397000","2026-02-24T00:00:03.405000","2026-02-24T00:00:03.406000","2026-02-24T00:00:03.407000","2026-02-24T00:00:03.408000","2026-02-24T00:00:03.410000","2026-02-24T00:00:03.411000","2026-02-24T00:00:03.413000","2026-02-24T00:00:03.415000","2026-02-24T00:00:03.416000","2026-02-24T00:00:03.417000","2026-02-24T00:00:03.418000","2026-02-24T00:00:03.420000","2026-02-24T00:00:03.422000","2026-02-24T00:00:03.423000","2026-02-24T00:00:03.425000","2026-02-24T00:00:03.426000","2026-02-24T00:00:03.428000","2026-02-24T00:00:03.429000","2026-02-24T00:00:03.430000","2026-02-24T00:00:03.432000","2026-02-24T00:00:03.433000","2026-02-24T00:00:03.435000","2026-02-24T00:00:03.436000","2026-02-24T00:00:03.438000","2026-02-24T00:00:03.440000","2026-02-24T00:00:03.441000","2026-02-24T00:00:03.442000","2026-02-24T00:00:03.444000","2026-02-24T00:00:03.446000","2026-02-24T00:00:03.447000","2026-02-24T00:00:03.448000","2026-02-24T00:00:03.450000","2026-02-24T00:00:03.451000","2026-02-24T00:00:03.453000","2026-02-24T00:00:03.454000","2026-02-24T00:00:03.456000","2026-02-24T00:00:03.457000","2026-02-24T00:00:03.459000","2026-02-24T00:00:03.460000","2026-02-24T00:00:03.462000","2026-02-24T00:00:03.463000","2026-02-24T00:00:03.465000","2026-02-24T00:00:03.466000","2026-02-24T00:00:03.468000","2026-02-24T00:00:03.469000","2026-02-24T00:00:03.477000","2026-02-24T00:00:03.478000","2026-02-24T00:00:03.479000","2026-02-24T00:00:03.480000","2026-02-24T00:00:03.482000","2026-02-24T00:00:03.483000","2026-02-24T00:00:03.485000","2026-02-24T00:00:03.486000","2026-02-24T00:00:03.488000","2026-02-24T00:00:03.490000","2026-02-24T00:00:03.491000","2026-02-24T00:00:03.492000","2026-02-24T00:00:03.494000","2026-02-24T00:00:03.495000","2026-02-24T00:00:03.497000","2026-02-24T00:00:03.498000","2026-02-24T00:00:03.500000","2026-02-24T00:00:03.501000","2026-02-24T00:00:03.503000","2026-02-24T00:00:03.504000","2026-02-24T00:00:03.506000","2026-02-24T00:00:03.507000","2026-02-24T00:00:03.509000","2026-02-24T00:00:03.510000","2026-02-24T00:00:03.512000","2026-02-24T00:00:03.513000","2026-02-24T00:00:03.515000","2026-02-24T00:00:03.516000","2026-02-24T00:00:03.518000","2026-02-24T00:00:03.519000","2026-02-24T00:00:03.521000","2026-02-24T00:00:03.522000","2026-02-24T00:00:03.524000","2026-02-24T00:00:03.525000","2026-02-24T00:00:03.527000","2026-02-24T00:00:03.528000","2026-02-24T00:00:03.530000","2026-02-24T00:00:03.531000","2026-02-24T00:00:03.533000","2026-02-24T00:00:03.534000","2026-02-24T00:00:03.536000","2026-02-24T00:00:03.537000","2026-02-24T00:00:03.539000","2026-02-24T00:00:03.540000","2026-02-24T00:00:03.542000","2026-02-24T00:00:03.543000","2026-02-24T00:00:03.544000","2026-02-24T00:00:03.546000","2026-02-24T00:00:03.548000","2026-02-24T00:00:03.549000","2026-02-24T00:00:03.551000","2026-02-24T00:00:03.552000","2026-02-24T00:00:03.554000","2026-02-24T00:00:03.555000","2026-02-24T00:00:03.557000","2026-02-24T00:00:03.558000","2026-02-24T00:00:03.560000","2026-02-24T00:00:03.561000","2026-02-24T00:00:03.562000","2026-02-24T00:00:03.564000","2026-02-24T00:00:03.565000","2026-02-24T00:00:03.567000","2026-02-24T00:00:03.568000","2026-02-24T00:00:03.570000","2026-02-24T00:00:03.571000","2026-02-24T00:00:03.573000","2026-02-24T00:00:03.574000","2026-02-24T00:00:03.576000","2026-02-24T00:00:03.577000","2026-02-24T00:00:03.579000","2026-02-24T00:00:03.580000","2026-02-24T00:00:03.582000","2026-02-24T00:00:03.583000","2026-02-24T00:00:03.585000","2026-02-24T00:00:03.586000","2026-02-24T00:00:03.588000","2026-02-24T00:00:03.590000","2026-02-24T00:00:03.591000","2026-02-24T00:00:03.593000","2026-02-24T00:00:03.594000","2026-02-24T00:00:03.596000","2026-02-24T00:00:03.597000","2026-02-24T00:00:03.599000","2026-02-24T00:00:03.600000","2026-02-24T00:00:03.602000","2026-02-24T00:00:03.603000","2026-02-24T00:00:03.605000","2026-02-24T00:00:03.606000","2026-02-24T00:00:03.608000","2026-02-24T00:00:03.612000","2026-02-24T00:00:03.613000","2026-02-24T00:00:03.614000","2026-02-24T00:00:03.616000","2026-02-24T00:00:03.617000","2026-02-24T00:00:03.619000","2026-02-24T00:00:03.620000","2026-02-24T00:00:03.622000","2026-02-24T00:00:03.623000","2026-02-24T00:00:03.625000","2026-02-24T00:00:03.627000","2026-02-24T00:00:03.628000","2026-02-24T00:00:03.630000","2026-02-24T00:00:03.631000","2026-02-24T00:00:03.633000","2026-02-24T00:00:03.634000","2026-02-24T00:00:03.636000","2026-02-24T00:00:03.637000","2026-02-24T00:00:03.639000","2026-02-24T00:00:03.640000","2026-02-24T00:00:03.642000","2026-02-24T00:00:03.643000","2026-02-24T00:00:03.645000","2026-02-24T00:00:03.646000","2026-02-24T00:00:03.648000","2026-02-24T00:00:03.650000","2026-02-24T00:00:03.651000","2026-02-24T00:00:03.653000","2026-02-24T00:00:03.654000","2026-02-24T00:00:03.656000","2026-02-24T00:00:03.657000","2026-02-24T00:00:03.659000","2026-02-24T00:00:03.660000","2026-02-24T00:00:03.662000","2026-02-24T00:00:03.663000","2026-02-24T00:00:03.665000","2026-02-24T00:00:03.666000","2026-02-24T00:00:03.668000","2026-02-24T00:00:03.670000","2026-02-24T00:00:03.671000","2026-02-24T00:00:03.673000","2026-02-24T00:00:03.674000","2026-02-24T00:00:03.676000","2026-02-24T00:00:03.677000","2026-02-24T00:00:03.679000","2026-02-24T00:00:03.680000","2026-02-24T00:00:03.682000","2026-02-24T00:00:03.683000","2026-02-24T00:00:03.685000","2026-02-24T00:00:03.686000","2026-02-24T00:00:03.688000","2026-02-24T00:00:03.690000","2026-02-24T00:00:03.691000","2026-02-24T00:00:03.693000","2026-02-24T00:00:03.694000","2026-02-24T00:00:03.696000","2026-02-24T00:00:03.697000","2026-02-24T00:00:03.699000","2026-02-24T00:00:03.700000","2026-02-24T00:00:03.702000","2026-02-24T00:00:03.703000","2026-02-24T00:00:03.705000","2026-02-24T00:00:03.706000","2026-02-24T00:00:03.708000","2026-02-24T00:00:03.709000","2026-02-24T00:00:03.711000","2026-02-24T00:00:03.712000","2026-02-24T00:00:03.714000","2026-02-24T00:00:03.716000","2026-02-24T00:00:03.717000","2026-02-24T00:00:03.719000","2026-02-24T00:00:03.720000","2026-02-24T00:00:03.722000","2026-02-24T00:00:03.724000","2026-02-24T00:00:03.725000","2026-02-24T00:00:03.726000","2026-02-24T00:00:03.049000","2026-02-24T00:00:03.050000","2026-02-24T00:00:03.051000","2026-02-24T00:00:03.052000","2026-02-24T00:00:03.054000","2026-02-24T00:00:03.055000","2026-02-24T00:00:03.057000","2026-02-24T00:00:03.058000","2026-02-24T00:00:03.059000","2026-02-24T00:00:03.061000","2026-02-24T00:00:03.062000","2026-02-24T00:00:03.063000","2026-02-24T00:00:03.065000","2026-02-24T00:00:03.066000","2026-02-24T00:00:03.068000","2026-02-24T00:00:03.069000","2026-02-24T00:00:03.070000","2026-02-24T00:00:03.072000","2026-02-24T00:00:03.073000","2026-02-24T00:00:03.074000","2026-02-24T00:00:03.076000","2026-02-24T00:00:03.077000","2026-02-24T00:00:03.079000","2026-02-24T00:00:03.080000","2026-02-24T00:00:03.081000","2026-02-24T00:00:03.083000","2026-02-24T00:00:03.084000","2026-02-24T00:00:03.086000","2026-02-24T00:00:03.087000","2026-02-24T00:00:03.089000","2026-02-24T00:00:03.090000","2026-02-24T00:00:03.091000","2026-02-24T00:00:03.092000","2026-02-24T00:00:03.094000","2026-02-24T00:00:03.095000","2026-02-24T00:00:03.096000","2026-02-24T00:00:03.098000","2026-02-24T00:00:03.099000","2026-02-24T00:00:03.101000","2026-02-24T00:00:03.102000","2026-02-24T00:00:03.104000","2026-02-24T00:00:03.105000","2026-02-24T00:00:03.106000","2026-02-24T00:00:03.112000","2026-02-24T00:00:03.113000","2026-02-24T00:00:03.114000","2026-02-24T00:00:03.116000","2026-02-24T00:00:03.117000","2026-02-24T00:00:03.119000","2026-02-24T00:00:03.120000","2026-02-24T00:00:03.121000","2026-02-24T00:00:03.123000","2026-02-24T00:00:03.124000","2026-02-24T00:00:03.126000","2026-02-24T00:00:03.127000","2026-02-24T00:00:03.128000","2026-02-24T00:00:03.130000","2026-02-24T00:00:03.131000","2026-02-24T00:00:03.132000","2026-02-24T00:00:03.134000","2026-02-24T00:00:03.135000","2026-02-24T00:00:03.137000","2026-02-24T00:00:03.138000","2026-02-24T00:00:03.139000","2026-02-24T00:00:03.141000","2026-02-24T00:00:03.142000","2026-02-24T00:00:03.144000","2026-02-24T00:00:03.145000","2026-02-24T00:00:03.146000","2026-02-24T00:00:03.148000","2026-02-24T00:00:03.149000","2026-02-24T00:00:03.150000","2026-02-24T00:00:03.152000","2026-02-24T00:00:03.153000","2026-02-24T00:00:03.155000","2026-02-24T00:00:03.156000","2026-02-24T00:00:03.157000","2026-02-24T00:00:03.159000","2026-02-24T00:00:03.160000","2026-02-24T00:00:03.162000","2026-02-24T00:00:03.163000","2026-02-24T00:00:03.164000","2026-02-24T00:00:03.166000","2026-02-24T00:00:03.167000","2026-02-24T00:00:03.168000","2026-02-24T00:00:03.170000","2026-02-24T00:00:03.171000","2026-02-24T00:00:03.173000","2026-02-24T00:00:03.174000","2026-02-24T00:00:03.175000","2026-02-24T00:00:03.177000","2026-02-24T00:00:03.178000","2026-02-24T00:00:03.180000","2026-02-24T00:00:03.181000","2026-02-24T00:00:03.182000","2026-02-24T00:00:03.184000","2026-02-24T00:00:03.185000","2026-02-24T00:00:03.186000","2026-02-24T00:00:03.188000","2026-02-24T00:00:03.189000","2026-02-24T00:00:03.191000","2026-02-24T00:00:03.192000","2026-02-24T00:00:03.194000","2026-02-24T00:00:03.195000","2026-02-24T00:00:03.196000","2026-02-24T00:00:03.198000","2026-02-24T00:00:03.199000","2026-02-24T00:00:03.201000","2026-02-24T00:00:03.202000","2026-02-24T00:00:03.203000","2026-02-24T00:00:03.205000","2026-02-24T00:00:03.206000","2026-02-24T00:00:03.208000","2026-02-24T00:00:03.209000","2026-02-24T00:00:03.211000","2026-02-24T00:00:03.212000","2026-02-24T00:00:03.214000","2026-02-24T00:00:03.215000","2026-02-24T00:00:03.216000","2026-02-24T00:00:03.218000","2026-02-24T00:00:03.219000","2026-02-24T00:00:03.221000","2026-02-24T00:00:03.222000","2026-02-24T00:00:03.224000","2026-02-24T00:00:03.225000","2026-02-24T00:00:03.226000","2026-02-24T00:00:03.228000","2026-02-24T00:00:03.229000","2026-02-24T00:00:03.231000","2026-02-24T00:00:03.232000","2026-02-24T00:00:03.234000","2026-02-24T00:00:03.235000","2026-02-24T00:00:03.236000","2026-02-24T00:00:03.238000","2026-02-24T00:00:03.239000","2026-02-24T00:00:03.241000","2026-02-24T00:00:03.242000","2026-02-24T00:00:03.244000","2026-02-24T00:00:03.245000","2026-02-24T00:00:03.247000","2026-02-24T00:00:03.248000","2026-02-24T00:00:03.249000","2026-02-24T00:00:03.251000","2026-02-24T00:00:03.252000","2026-02-24T00:00:03.254000","2026-02-24T00:00:03.255000","2026-02-24T00:00:03.257000","2026-02-24T00:00:03.258000","2026-02-24T00:00:03.260000","2026-02-24T00:00:03.261000","2026-02-24T00:00:03.262000","2026-02-24T00:00:03.264000","2026-02-24T00:00:03.265000","2026-02-24T00:00:03.267000","2026-02-24T00:00:03.268000","2026-02-24T00:00:03.270000","2026-02-24T00:00:03.271000","2026-02-24T00:00:03.273000","2026-02-24T00:00:03.279000","2026-02-24T00:00:03.281000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.285000","2026-02-24T00:00:03.286000","2026-02-24T00:00:03.288000","2026-02-24T00:00:03.289000","2026-02-24T00:00:03.290000","2026-02-24T00:00:03.292000","2026-02-24T00:00:03.293000","2026-02-24T00:00:03.295000","2026-02-24T00:00:03.296000","2026-02-24T00:00:03.298000","2026-02-24T00:00:03.299000","2026-02-24T00:00:03.301000","2026-02-24T00:00:03.302000","2026-02-24T00:00:03.303000","2026-02-24T00:00:03.305000","2026-02-24T00:00:03.306000","2026-02-24T00:00:03.308000","2026-02-24T00:00:03.309000","2026-02-24T00:00:03.311000","2026-02-24T00:00:03.312000","2026-02-24T00:00:03.314000","2026-02-24T00:00:03.315000","2026-02-24T00:00:03.317000","2026-02-24T00:00:03.318000","2026-02-24T00:00:03.320000","2026-02-24T00:00:03.321000","2026-02-24T00:00:03.322000","2026-02-24T00:00:03.324000","2026-02-24T00:00:03.325000","2026-02-24T00:00:03.327000","2026-02-24T00:00:03.328000","2026-02-24T00:00:03.329000","2026-02-24T00:00:03.331000","2026-02-24T00:00:03.332000","2026-02-24T00:00:03.334000","2026-02-24T00:00:03.335000","2026-02-24T00:00:03.337000","2026-02-24T00:00:03.338000","2026-02-24T00:00:03.340000","2026-02-24T00:00:03.347000","2026-02-24T00:00:03.348000","2026-02-24T00:00:03.349000","2026-02-24T00:00:03.350000","2026-02-24T00:00:03.352000","2026-02-24T00:00:03.353000","2026-02-24T00:00:03.355000","2026-02-24T00:00:03.356000","2026-02-24T00:00:03.358000","2026-02-24T00:00:03.359000","2026-02-24T00:00:03.360000","2026-02-24T00:00:03.362000","2026-02-24T00:00:03.363000","2026-02-24T00:00:03.365000","2026-02-24T00:00:03.366000","2026-02-24T00:00:03.368000","2026-02-24T00:00:03.369000","2026-02-24T00:00:03.371000","2026-02-24T00:00:03.372000","2026-02-24T00:00:03.373000","2026-02-24T00:00:03.375000","2026-02-24T00:00:03.376000","2026-02-24T00:00:03.378000","2026-02-24T00:00:03.379000","2026-02-24T00:00:03.381000","2026-02-24T00:00:03.382000","2026-02-24T00:00:03.384000","2026-02-24T00:00:03.385000","2026-02-24T00:00:03.386000","2026-02-24T00:00:03.388000","2026-02-24T00:00:03.389000","2026-02-24T00:00:03.391000","2026-02-24T00:00:03.393000","2026-02-24T00:00:03.394000","2026-02-24T00:00:03.396000","2026-02-24T00:00:03.397000","2026-02-24T00:00:03.404000","2026-02-24T00:00:03.406000","2026-02-24T00:00:03.407000","2026-02-24T00:00:03.408000","2026-02-24T00:00:03.410000","2026-02-24T00:00:03.411000","2026-02-24T00:00:03.413000","2026-02-24T00:00:03.415000","2026-02-24T00:00:03.416000","2026-02-24T00:00:03.417000","2026-02-24T00:00:03.418000","2026-02-24T00:00:03.420000","2026-02-24T00:00:03.422000","2026-02-24T00:00:03.423000","2026-02-24T00:00:03.425000","2026-02-24T00:00:03.426000","2026-02-24T00:00:03.428000","2026-02-24T00:00:03.429000","2026-02-24T00:00:03.430000","2026-02-24T00:00:03.432000","2026-02-24T00:00:03.433000","2026-02-24T00:00:03.435000","2026-02-24T00:00:03.436000","2026-02-24T00:00:03.438000","2026-02-24T00:00:03.439000","2026-02-24T00:00:03.441000","2026-02-24T00:00:03.442000","2026-02-24T00:00:03.444000","2026-02-24T00:00:03.445000","2026-02-24T00:00:03.447000","2026-02-24T00:00:03.448000","2026-02-24T00:00:03.450000","2026-02-24T00:00:03.451000","2026-02-24T00:00:03.453000","2026-02-24T00:00:03.454000","2026-02-24T00:00:03.456000","2026-02-24T00:00:03.457000","2026-02-24T00:00:03.459000","2026-02-24T00:00:03.460000","2026-02-24T00:00:03.462000","2026-02-24T00:00:03.463000","2026-02-24T00:00:03.465000","2026-02-24T00:00:03.466000","2026-02-24T00:00:03.468000","2026-02-24T00:00:03.469000","2026-02-24T00:00:03.477000","2026-02-24T00:00:03.478000","2026-02-24T00:00:03.479000","2026-02-24T00:00:03.480000","2026-02-24T00:00:03.482000","2026-02-24T00:00:03.483000","2026-02-24T00:00:03.485000","2026-02-24T00:00:03.486000","2026-02-24T00:00:03.488000","2026-02-24T00:00:03.489000","2026-02-24T00:00:03.491000","2026-02-24T00:00:03.492000","2026-02-24T00:00:03.494000","2026-02-24T00:00:03.495000","2026-02-24T00:00:03.497000","2026-02-24T00:00:03.498000","2026-02-24T00:00:03.500000","2026-02-24T00:00:03.501000","2026-02-24T00:00:03.503000","2026-02-24T00:00:03.504000","2026-02-24T00:00:03.506000","2026-02-24T00:00:03.507000","2026-02-24T00:00:03.509000","2026-02-24T00:00:03.510000","2026-02-24T00:00:03.512000","2026-02-24T00:00:03.513000","2026-02-24T00:00:03.515000","2026-02-24T00:00:03.516000","2026-02-24T00:00:03.518000","2026-02-24T00:00:03.519000","2026-02-24T00:00:03.521000","2026-02-24T00:00:03.522000","2026-02-24T00:00:03.523000","2026-02-24T00:00:03.525000","2026-02-24T00:00:03.526000","2026-02-24T00:00:03.528000","2026-02-24T00:00:03.530000","2026-02-24T00:00:03.531000","2026-02-24T00:00:03.533000","2026-02-24T00:00:03.534000","2026-02-24T00:00:03.536000","2026-02-24T00:00:03.537000","2026-02-24T00:00:03.539000","2026-02-24T00:00:03.540000","2026-02-24T00:00:03.541000","2026-02-24T00:00:03.543000","2026-02-24T00:00:03.544000","2026-02-24T00:00:03.546000","2026-02-24T00:00:03.547000","2026-02-24T00:00:03.549000","2026-02-24T00:00:03.550000","2026-02-24T00:00:03.552000","2026-02-24T00:00:03.554000","2026-02-24T00:00:03.555000","2026-02-24T00:00:03.556000","2026-02-24T00:00:03.558000","2026-02-24T00:00:03.559000","2026-02-24T00:00:03.561000","2026-02-24T00:00:03.562000","2026-02-24T00:00:03.564000","2026-02-24T00:00:03.565000","2026-02-24T00:00:03.567000","2026-02-24T00:00:03.568000","2026-02-24T00:00:03.570000","2026-02-24T00:00:03.571000","2026-02-24T00:00:03.573000","2026-02-24T00:00:03.574000","2026-02-24T00:00:03.576000","2026-02-24T00:00:03.577000","2026-02-24T00:00:03.579000","2026-02-24T00:00:03.580000","2026-02-24T00:00:03.582000","2026-02-24T00:00:03.583000","2026-02-24T00:00:03.585000","2026-02-24T00:00:03.586000","2026-02-24T00:00:03.588000","2026-02-24T00:00:03.590000","2026-02-24T00:00:03.591000","2026-02-24T00:00:03.593000","2026-02-24T00:00:03.594000","2026-02-24T00:00:03.596000","2026-02-24T00:00:03.597000","2026-02-24T00:00:03.599000","2026-02-24T00:00:03.600000","2026-02-24T00:00:03.113000","2026-02-24T00:00:03.114000","2026-02-24T00:00:03.116000","2026-02-24T00:00:03.117000","2026-02-24T00:00:03.119000","2026-02-24T00:00:03.120000","2026-02-24T00:00:03.121000","2026-02-24T00:00:03.123000","2026-02-24T00:00:03.124000","2026-02-24T00:00:03.126000","2026-02-24T00:00:03.127000","2026-02-24T00:00:03.128000","2026-02-24T00:00:03.130000","2026-02-24T00:00:03.131000","2026-02-24T00:00:03.133000","2026-02-24T00:00:03.134000","2026-02-24T00:00:03.135000","2026-02-24T00:00:03.137000","2026-02-24T00:00:03.138000","2026-02-24T00:00:03.140000","2026-02-24T00:00:03.141000","2026-02-24T00:00:03.142000","2026-02-24T00:00:03.144000","2026-02-24T00:00:03.145000","2026-02-24T00:00:03.146000","2026-02-24T00:00:03.148000","2026-02-24T00:00:03.149000","2026-02-24T00:00:03.151000","2026-02-24T00:00:03.152000","2026-02-24T00:00:03.153000","2026-02-24T00:00:03.155000","2026-02-24T00:00:03.156000","2026-02-24T00:00:03.157000","2026-02-24T00:00:03.159000","2026-02-24T00:00:03.160000","2026-02-24T00:00:03.162000","2026-02-24T00:00:03.163000","2026-02-24T00:00:03.164000","2026-02-24T00:00:03.166000","2026-02-24T00:00:03.167000","2026-02-24T00:00:03.169000","2026-02-24T00:00:03.170000","2026-02-24T00:00:03.171000","2026-02-24T00:00:03.173000","2026-02-24T00:00:03.174000","2026-02-24T00:00:03.176000","2026-02-24T00:00:03.177000","2026-02-24T00:00:03.178000","2026-02-24T00:00:03.180000","2026-02-24T00:00:03.181000","2026-02-24T00:00:03.183000","2026-02-24T00:00:03.184000","2026-02-24T00:00:03.185000","2026-02-24T00:00:03.187000","2026-02-24T00:00:03.188000","2026-02-24T00:00:03.189000","2026-02-24T00:00:03.191000","2026-02-24T00:00:03.192000","2026-02-24T00:00:03.194000","2026-02-24T00:00:03.195000","2026-02-24T00:00:03.196000","2026-02-24T00:00:03.198000","2026-02-24T00:00:03.199000","2026-02-24T00:00:03.201000","2026-02-24T00:00:03.202000","2026-02-24T00:00:03.203000","2026-02-24T00:00:03.205000","2026-02-24T00:00:03.206000","2026-02-24T00:00:03.208000","2026-02-24T00:00:03.209000","2026-02-24T00:00:03.211000","2026-02-24T00:00:03.212000","2026-02-24T00:00:03.214000","2026-02-24T00:00:03.215000","2026-02-24T00:00:03.216000","2026-02-24T00:00:03.218000","2026-02-24T00:00:03.219000","2026-02-24T00:00:03.221000","2026-02-24T00:00:03.222000","2026-02-24T00:00:03.224000","2026-02-24T00:00:03.225000","2026-02-24T00:00:03.227000","2026-02-24T00:00:03.228000","2026-02-24T00:00:03.229000","2026-02-24T00:00:03.231000","2026-02-24T00:00:03.232000","2026-02-24T00:00:03.234000","2026-02-24T00:00:03.235000","2026-02-24T00:00:03.237000","2026-02-24T00:00:03.238000","2026-02-24T00:00:03.239000","2026-02-24T00:00:03.241000","2026-02-24T00:00:03.242000","2026-02-24T00:00:03.244000","2026-02-24T00:00:03.245000","2026-02-24T00:00:03.247000","2026-02-24T00:00:03.248000","2026-02-24T00:00:03.250000","2026-02-24T00:00:03.251000","2026-02-24T00:00:03.252000","2026-02-24T00:00:03.254000","2026-02-24T00:00:03.255000","2026-02-24T00:00:03.257000","2026-02-24T00:00:03.258000","2026-02-24T00:00:03.260000","2026-02-24T00:00:03.261000","2026-02-24T00:00:03.263000","2026-02-24T00:00:03.264000","2026-02-24T00:00:03.265000","2026-02-24T00:00:03.267000","2026-02-24T00:00:03.268000","2026-02-24T00:00:03.270000","2026-02-24T00:00:03.271000","2026-02-24T00:00:03.273000","2026-02-24T00:00:03.280000","2026-02-24T00:00:03.282000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.285000","2026-02-24T00:00:03.286000","2026-02-24T00:00:03.288000","2026-02-24T00:00:03.289000","2026-02-24T00:00:03.290000","2026-02-24T00:00:03.292000","2026-02-24T00:00:03.293000","2026-02-24T00:00:03.295000","2026-02-24T00:00:03.296000","2026-02-24T00:00:03.298000","2026-02-24T00:00:03.299000","2026-02-24T00:00:03.301000","2026-02-24T00:00:03.302000","2026-02-24T00:00:03.303000","2026-02-24T00:00:03.305000","2026-02-24T00:00:03.306000","2026-02-24T00:00:03.308000","2026-02-24T00:00:03.309000","2026-02-24T00:00:03.311000","2026-02-24T00:00:03.312000","2026-02-24T00:00:03.314000","2026-02-24T00:00:03.316000","2026-02-24T00:00:03.317000","2026-02-24T00:00:03.319000","2026-02-24T00:00:03.320000","2026-02-24T00:00:03.321000","2026-02-24T00:00:03.322000","2026-02-24T00:00:03.324000","2026-02-24T00:00:03.325000","2026-02-24T00:00:03.327000","2026-02-24T00:00:03.328000","2026-02-24T00:00:03.329000","2026-02-24T00:00:03.331000","2026-02-24T00:00:03.332000","2026-02-24T00:00:03.334000","2026-02-24T00:00:03.335000","2026-02-24T00:00:03.337000","2026-02-24T00:00:03.338000","2026-02-24T00:00:03.340000","2026-02-24T00:00:03.281000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.283000","2026-02-24T00:00:03.285000","2026-02-24T00:00:03.286000","2026-02-24T00:00:03.287000","2026-02-24T00:00:03.289000","2026-02-24T00:00:03.290000","2026-02-24T00:00:03.292000","2026-02-24T00:00:03.293000","2026-02-24T00:00:03.295000","2026-02-24T00:00:03.296000","2026-02-24T00:00:03.298000","2026-02-24T00:00:03.299000","2026-02-24T00:00:03.300000","2026-02-24T00:00:03.302000","2026-02-24T00:00:03.303000","2026-02-24T00:00:03.305000","2026-02-24T00:00:03.306000","2026-02-24T00:00:03.308000","2026-02-24T00:00:03.309000","2026-02-24T00:00:03.311000","2026-02-24T00:00:03.312000","2026-02-24T00:00:03.314000","2026-02-24T00:00:03.315000","2026-02-24T00:00:03.317000","2026-02-24T00:00:03.318000","2026-02-24T00:00:03.320000","2026-02-24T00:00:03.321000","2026-02-24T00:00:03.322000","2026-02-24T00:00:03.324000","2026-02-24T00:00:03.325000","2026-02-24T00:00:03.326000","2026-02-24T00:00:03.328000","2026-02-24T00:00:03.329000","2026-02-24T00:00:03.331000","2026-02-24T00:00:03.332000","2026-02-24T00:00:03.334000","2026-02-24T00:00:03.335000","2026-02-24T00:00:03.337000","2026-02-24T00:00:03.338000","2026-02-24T00:00:03.339000","2026-02-24T00:00:03.347000","2026-02-24T00:00:03.348000","2026-02-24T00:00:03.349000","2026-02-24T00:00:03.350000","2026-02-24T00:00:03.352000","2026-02-24T00:00:03.353000","2026-02-24T00:00:03.355000","2026-02-24T00:00:03.356000","2026-02-24T00:00:03.358000","2026-02-24T00:00:03.359000","2026-02-24T00:00:03.361000","2026-02-24T00:00:03.362000","2026-02-24T00:00:03.363000","2026-02-24T00:00:03.365000","2026-02-24T00:00:03.366000","2026-02-24T00:00:03.368000","2026-02-24T00:00:03.369000","2026-02-24T00:00:03.371000","2026-02-24T00:00:03.372000","2026-02-24T00:00:03.374000","2026-02-24T00:00:03.375000","2026-02-24T00:00:03.376000","2026-02-24T00:00:03.378000","2026-02-24T00:00:03.379000","2026-02-24T00:00:03.381000","2026-02-24T00:00:03.382000","2026-02-24T00:00:03.384000","2026-02-24T00:00:03.385000","2026-02-24T00:00:03.386000","2026-02-24T00:00:03.388000","2026-02-24T00:00:03.389000","2026-02-24T00:00:03.391000","2026-02-24T00:00:03.393000","2026-02-24T00:00:03.394000","2026-02-24T00:00:03.396000","2026-02-24T00:00:03.397000","2026-02-24T00:00:03.405000","2026-02-24T00:00:03.406000","2026-02-24T00:00:03.407000","2026-02-24T00:00:03.408000","2026-02-24T00:00:03.410000","2026-02-24T00:00:03.411000","2026-02-24T00:00:03.413000","2026-02-24T00:00:03.415000","2026-02-24T00:00:03.416000","2026-02-24T00:00:03.417000","2026-02-24T00:00:03.419000","2026-02-24T00:00:03.420000","2026-02-24T00:00:03.422000","2026-02-24T00:00:03.423000","2026-02-24T00:00:03.425000","2026-02-24T00:00:03.426000","2026-02-24T00:00:03.428000","2026-02-24T00:00:03.429000","2026-02-24T00:00:03.430000","2026-02-24T00:00:03.432000","2026-02-24T00:00:03.433000","2026-02-24T00:00:03.435000","2026-02-24T00:00:03.437000","2026-02-24T00:00:03.438000","2026-02-24T00:00:03.440000","2026-02-24T00:00:03.441000","2026-02-24T00:00:03.443000","2026-02-24T00:00:03.444000","2026-02-24T00:00:03.446000","2026-02-24T00:00:03.447000","2026-02-24T00:00:03.448000","2026-02-24T00:00:03.450000","2026-02-24T00:00:03.451000","2026-02-24T00:00:03.453000","2026-02-24T00:00:03.454000","2026-02-24T00:00:03.456000","2026-02-24T00:00:03.457000","2026-02-24T00:00:03.459000","2026-02-24T00:00:03.460000","2026-02-24T00:00:03.462000","2026-02-24T00:00:03.463000","2026-02-24T00:00:03.465000","2026-02-24T00:00:03.466000","2026-02-24T00:00:03.468000","2026-02-24T00:00:03.469000","2026-02-24T00:00:03.477000","2026-02-24T00:00:03.478000","2026-02-24T00:00:03.479000","2026-02-24T00:00:03.480000","2026-02-24T00:00:03.482000","2026-02-24T00:00:03.483000","2026-02-24T00:00:03.485000","2026-02-24T00:00:03.486000","2026-02-24T00:00:03.348000","2026-02-24T00:00:03.349000","2026-02-24T00:00:03.350000","2026-02-24T00:00:03.352000","2026-02-24T00:00:03.353000","2026-02-24T00:00:03.355000","2026-02-24T00:00:03.356000","2026-02-24T00:00:03.358000","2026-02-24T00:00:03.359000","2026-02-24T00:00:03.360000","2026-02-24T00:00:03.362000","2026-02-24T00:00:03.363000","2026-02-24T00:00:03.365000","2026-02-24T00:00:03.366000","2026-02-24T00:00:03.368000","2026-02-24T00:00:03.369000","2026-02-24T00:00:03.371000","2026-02-24T00:00:03.372000","2026-02-24T00:00:03.373000","2026-02-24T00:00:03.375000","2026-02-24T00:00:03.376000","2026-02-24T00:00:03.378000","2026-02-24T00:00:03.379000","2026-02-24T00:00:03.381000","2026-02-24T00:00:03.382000","2026-02-24T00:00:03.384000","2026-02-24T00:00:03.385000","2026-02-24T00:00:03.386000","2026-02-24T00:00:03.388000","2026-02-24T00:00:03.389000","2026-02-24T00:00:03.391000","2026-02-24T00:00:03.393000","2026-02-24T00:00:03.394000","2026-02-24T00:00:03.406000","2026-02-24T00:00:03.407000","2026-02-24T00:00:03.408000","2026-02-24T00:00:03.410000","2026-02-24T00:00:03.411000","2026-02-24T00:00:03.413000","2026-02-24T00:00:03.415000","2026-02-24T00:00:03.416000","2026-02-24T00:00:03.417000","2026-02-24T00:00:03.418000","2026-02-24T00:00:03.420000","2026-02-24T00:00:03.421000","2026-02-24T00:00:03.423000","2026-02-24T00:00:03.424000","2026-02-24T00:00:03.426000","2026-02-24T00:00:03.427000","2026-02-24T00:00:03.429000","2026-02-24T00:00:03.430000","2026-02-24T00:00:03.432000","2026-02-24T00:00:03.433000","2026-02-24T00:00:03.435000","2026-02-24T00:00:03.436000","2026-02-24T00:00:03.438000","2026-02-24T00:00:03.439000","2026-02-24T00:00:03.441000","2026-02-24T00:00:03.442000","2026-02-24T00:00:03.444000","2026-02-24T00:00:03.445000","2026-02-24T00:00:03.447000","2026-02-24T00:00:03.448000","2026-02-24T00:00:03.450000","2026-02-24T00:00:03.451000","2026-02-24T00:00:03.453000","2026-02-24T00:00:03.454000","2026-02-24T00:00:03.456000","2026-02-24T00:00:03.457000","2026-02-24T00:00:03.459000","2026-02-24T00:00:03.460000","2026-02-24T00:00:03.462000","2026-02-24T00:00:03.463000","2026-02-24T00:00:03.465000","2026-02-24T00:00:03.466000","2026-02-24T00:00:03.468000","2026-02-24T00:00:03.469000","2026-02-24T00:00:03.478000","2026-02-24T00:00:03.479000","2026-02-24T00:00:03.480000","2026-02-24T00:00:03.482000","2026-02-24T00:00:03.483000","2026-02-24T00:00:03.485000","2026-02-24T00:00:03.486000","2026-02-24T00:00:03.488000","2026-02-24T00:00:03.490000","2026-02-24T00:00:03.491000","2026-02-24T00:00:03.492000","2026-02-24T00:00:03.494000","2026-02-24T00:00:03.495000","2026-02-24T00:00:03.497000","2026-02-24T00:00:03.498000","2026-02-24T00:00:03.500000","2026-02-24T00:00:03.501000","2026-02-24T00:00:03.503000","2026-02-24T00:00:03.504000","2026-02-24T00:00:03.506000","2026-02-24T00:00:03.507000","2026-02-24T00:00:03.509000","2026-02-24T00:00:03.510000","2026-02-24T00:00:03.512000","2026-02-24T00:00:03.513000","2026-02-24T00:00:03.515000","2026-02-24T00:00:03.516000","2026-02-24T00:00:03.518000","2026-02-24T00:00:03.519000","2026-02-24T00:00:03.521000","2026-02-24T00:00:03.522000","2026-02-24T00:00:03.524000","2026-02-24T00:00:03.525000","2026-02-24T00:00:03.527000","2026-02-24T00:00:03.528000","2026-02-24T00:00:03.530000","2026-02-24T00:00:03.531000","2026-02-24T00:00:03.533000","2026-02-24T00:00:03.534000","2026-02-24T00:00:03.536000","2026-02-24T00:00:03.537000","2026-02-24T00:00:03.539000","2026-02-24T00:00:03.540000","2026-02-24T00:00:03.542000","2026-02-24T00:00:03.543000","2026-02-24T00:00:03.545000","2026-02-24T00:00:03.546000","2026-02-24T00:00:03.548000","2026-02-24T00:00:03.549000","2026-02-24T00:00:03.551000","2026-02-24T00:00:03.552000","2026-02-24T00:00:03.554000","2026-02-24T00:00:03.555000","2026-02-24T00:00:03.557000","2026-02-24T00:00:03.558000","2026-02-24T00:00:03.560000","2026-02-24T00:00:03.561000","2026-02-24T00:00:03.562000","2026-02-24T00:00:03.564000","2026-02-24T00:00:03.565000","2026-02-24T00:00:03.567000","2026-02-24T00:00:03.568000","2026-02-24T00:00:03.570000","2026-02-24T00:00:03.571000","2026-02-24T00:00:03.573000","2026-02-24T00:00:03.574000","2026-02-24T00:00:03.576000","2026-02-24T00:00:03.577000","2026-02-24T00:00:03.579000","2026-02-24T00:00:03.580000","2026-02-24T00:00:03.582000","2026-02-24T00:00:03.583000","2026-02-24T00:00:03.585000","2026-02-24T00:00:03.586000","2026-02-24T00:00:03.588000","2026-02-24T00:00:03.590000","2026-02-24T00:00:03.591000","2026-02-24T00:00:03.593000","2026-02-24T00:00:03.594000","2026-02-24T00:00:03.596000","2026-02-24T00:00:03.597000","2026-02-24T00:00:03.599000","2026-02-24T00:00:03.600000","2026-02-24T00:00:03.602000","2026-02-24T00:00:03.603000","2026-02-24T00:00:03.605000","2026-02-24T00:00:03.606000","2026-02-24T00:00:03.608000","2026-02-24T00:00:03.612000","2026-02-24T00:00:03.613000","2026-02-24T00:00:03.614000","2026-02-24T00:00:03.616000","2026-02-24T00:00:03.617000","2026-02-24T00:00:03.619000","2026-02-24T00:00:03.621000","2026-02-24T00:00:03.622000","2026-02-24T00:00:03.624000","2026-02-24T00:00:03.625000","2026-02-24T00:00:03.627000","2026-02-24T00:00:03.628000","2026-02-24T00:00:03.630000","2026-02-24T00:00:03.631000","2026-02-24T00:00:03.633000","2026-02-24T00:00:03.634000","2026-02-24T00:00:03.636000","2026-02-24T00:00:03.637000","2026-02-24T00:00:03.639000","2026-02-24T00:00:03.640000","2026-02-24T00:00:03.642000","2026-02-24T00:00:03.643000","2026-02-24T00:00:03.645000","2026-02-24T00:00:03.646000","2026-02-24T00:00:03.648000","2026-02-24T00:00:03.650000","2026-02-24T00:00:03.651000","2026-02-24T00:00:03.653000","2026-02-24T00:00:03.654000","2026-02-24T00:00:03.656000","2026-02-24T00:00:03.657000","2026-02-24T00:00:03.659000","2026-02-24T00:00:03.660000","2026-02-24T00:00:03.662000","2026-02-24T00:00:03.663000","2026-02-24T00:00:03.665000","2026-02-24T00:00:03.667000","2026-02-24T00:00:03.668000","2026-02-24T00:00:03.670000","2026-02-24T00:00:03.671000","2026-02-24T00:00:03.673000","2026-02-24T00:00:03.674000","2026-02-24T00:00:03.676000","2026-02-24T00:00:03.677000","2026-02-24T00:00:03.679000","2026-02-24T00:00:03.680000","2026-02-24T00:00:03.682000","2026-02-24T00:00:03.683000","2026-02-24T00:00:03.685000","2026-02-24T00:00:03.686000","2026-02-24T00:00:03.688000","2026-02-24T00:00:03.690000","2026-02-24T00:00:03.691000","2026-02-24T00:00:03.693000","2026-02-24T00:00:03.694000","2026-02-24T00:00:03.696000","2026-02-24T00:00:03.697000","2026-02-24T00:00:03.699000","2026-02-24T00:00:03.700000","2026-02-24T00:00:03.702000","2026-02-24T00:00:03.703000","2026-02-24T00:00:03.705000","2026-02-24T00:00:03.707000","2026-02-24T00:00:03.708000","2026-02-24T00:00:03.709000","2026-02-24T00:00:03.711000","2026-02-24T00:00:03.713000","2026-02-24T00:00:03.714000","2026-02-24T00:00:03.716000","2026-02-24T00:00:03.717000","2026-02-24T00:00:03.719000","2026-02-24T00:00:03.720000","2026-02-24T00:00:03.722000","2026-02-24T00:00:03.724000","2026-02-24T00:00:03.725000","2026-02-24T00:00:03.727000","2026-02-24T00:00:03.728000","2026-02-24T00:00:03.734000","2026-02-24T00:00:03.735000","2026-02-24T00:00:03.736000","2026-02-24T00:00:03.737000","2026-02-24T00:00:03.738000","2026-02-24T00:00:03.740000","2026-02-24T00:00:03.741000","2026-02-24T00:00:03.742000","2026-02-24T00:00:03.743000","2026-02-24T00:00:03.745000","2026-02-24T00:00:03.746000","2026-02-24T00:00:03.747000","2026-02-24T00:00:03.748000","2026-02-24T00:00:03.750000","2026-02-24T00:00:03.751000","2026-02-24T00:00:03.752000","2026-02-24T00:00:03.754000","2026-02-24T00:00:03.755000","2026-02-24T00:00:03.756000","2026-02-24T00:00:03.757000","2026-02-24T00:00:03.759000","2026-02-24T00:00:03.760000","2026-02-24T00:00:03.761000","2026-02-24T00:00:03.762000","2026-02-24T00:00:03.764000","2026-02-24T00:00:03.765000","2026-02-24T00:00:03.766000","2026-02-24T00:00:03.767000","2026-02-24T00:00:03.769000","2026-02-24T00:00:03.770000","2026-02-24T00:00:03.771000","2026-02-24T00:00:03.773000","2026-02-24T00:00:03.774000","2026-02-24T00:00:03.775000","2026-02-24T00:00:03.776000","2026-02-24T00:00:03.778000","2026-02-24T00:00:03.779000","2026-02-24T00:00:03.780000","2026-02-24T00:00:03.781000","2026-02-24T00:00:03.783000","2026-02-24T00:00:03.784000","2026-02-24T00:00:03.785000","2026-02-24T00:00:03.787000","2026-02-24T00:00:03.788000","2026-02-24T00:00:03.789000","2026-02-24T00:00:03.790000","2026-02-24T00:00:03.792000","2026-02-24T00:00:03.793000","2026-02-24T00:00:03.794000","2026-02-24T00:00:03.795000","2026-02-24T00:00:03.797000","2026-02-24T00:00:03.798000","2026-02-24T00:00:03.799000","2026-02-24T00:00:03.800000","2026-02-24T00:00:03.802000","2026-02-24T00:00:03.803000","2026-02-24T00:00:03.805000","2026-02-24T00:00:03.806000","2026-02-24T00:00:03.813000","2026-02-24T00:00:03.814000","2026-02-24T00:00:03.815000","2026-02-24T00:00:03.816000","2026-02-24T00:00:03.818000","2026-02-24T00:00:03.819000","2026-02-24T00:00:03.820000","2026-02-24T00:00:03.822000","2026-02-24T00:00:03.823000","2026-02-24T00:00:03.824000","2026-02-24T00:00:03.825000","2026-02-24T00:00:03.827000","2026-02-24T00:00:03.828000","2026-02-24T00:00:03.829000","2026-02-24T00:00:03.831000","2026-02-24T00:00:03.832000","2026-02-24T00:00:03.833000","2026-02-24T00:00:03.834000","2026-02-24T00:00:03.836000","2026-02-24T00:00:03.837000","2026-02-24T00:00:03.838000","2026-02-24T00:00:03.840000","2026-02-24T00:00:03.841000","2026-02-24T00:00:03.842000","2026-02-24T00:00:03.843000","2026-02-24T00:00:03.845000","2026-02-24T00:00:03.846000","2026-02-24T00:00:03.847000","2026-02-24T00:00:03.849000","2026-02-24T00:00:03.850000","2026-02-24T00:00:03.851000","2026-02-24T00:00:03.852000","2026-02-24T00:00:03.854000","2026-02-24T00:00:03.855000","2026-02-24T00:00:03.856000","2026-02-24T00:00:03.857000","2026-02-24T00:00:03.859000","2026-02-24T00:00:03.860000","2026-02-24T00:00:03.861000","2026-02-24T00:00:03.863000","2026-02-24T00:00:03.864000","2026-02-24T00:00:03.865000","2026-02-24T00:00:03.867000","2026-02-24T00:00:03.874000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.878000","2026-02-24T00:00:03.879000","2026-02-24T00:00:03.880000","2026-02-24T00:00:03.881000","2026-02-24T00:00:03.883000","2026-02-24T00:00:03.884000","2026-02-24T00:00:03.885000","2026-02-24T00:00:03.886000","2026-02-24T00:00:03.888000","2026-02-24T00:00:03.889000","2026-02-24T00:00:03.890000","2026-02-24T00:00:03.892000","2026-02-24T00:00:03.893000","2026-02-24T00:00:03.894000","2026-02-24T00:00:03.896000","2026-02-24T00:00:03.897000","2026-02-24T00:00:03.898000","2026-02-24T00:00:03.899000","2026-02-24T00:00:03.905000","2026-02-24T00:00:03.906000","2026-02-24T00:00:03.907000","2026-02-24T00:00:03.908000","2026-02-24T00:00:03.909000","2026-02-24T00:00:03.911000","2026-02-24T00:00:03.912000","2026-02-24T00:00:03.913000","2026-02-24T00:00:03.914000","2026-02-24T00:00:03.916000","2026-02-24T00:00:03.917000","2026-02-24T00:00:03.918000","2026-02-24T00:00:03.920000","2026-02-24T00:00:03.921000","2026-02-24T00:00:03.922000","2026-02-24T00:00:03.924000","2026-02-24T00:00:03.925000","2026-02-24T00:00:03.926000","2026-02-24T00:00:03.928000","2026-02-24T00:00:03.929000","2026-02-24T00:00:03.930000","2026-02-24T00:00:03.932000","2026-02-24T00:00:03.933000","2026-02-24T00:00:03.935000","2026-02-24T00:00:03.936000","2026-02-24T00:00:03.937000","2026-02-24T00:00:03.938000","2026-02-24T00:00:03.940000","2026-02-24T00:00:03.941000","2026-02-24T00:00:03.942000","2026-02-24T00:00:03.944000","2026-02-24T00:00:03.945000","2026-02-24T00:00:03.946000","2026-02-24T00:00:03.948000","2026-02-24T00:00:03.949000","2026-02-24T00:00:03.950000","2026-02-24T00:00:03.952000","2026-02-24T00:00:03.953000","2026-02-24T00:00:03.954000","2026-02-24T00:00:03.955000","2026-02-24T00:00:03.957000","2026-02-24T00:00:03.958000","2026-02-24T00:00:03.959000","2026-02-24T00:00:03.961000","2026-02-24T00:00:03.962000","2026-02-24T00:00:03.963000","2026-02-24T00:00:03.965000","2026-02-24T00:00:03.966000","2026-02-24T00:00:03.967000","2026-02-24T00:00:03.969000","2026-02-24T00:00:03.970000","2026-02-24T00:00:03.971000","2026-02-24T00:00:03.973000","2026-02-24T00:00:03.974000","2026-02-24T00:00:03.975000","2026-02-24T00:00:03.977000","2026-02-24T00:00:03.978000","2026-02-24T00:00:03.979000","2026-02-24T00:00:03.981000","2026-02-24T00:00:03.982000","2026-02-24T00:00:03.983000","2026-02-24T00:00:03.985000","2026-02-24T00:00:03.986000","2026-02-24T00:00:03.988000","2026-02-24T00:00:03.989000","2026-02-24T00:00:03.990000","2026-02-24T00:00:03.991000","2026-02-24T00:00:03.993000","2026-02-24T00:00:03.994000","2026-02-24T00:00:03.995000","2026-02-24T00:00:03.997000","2026-02-24T00:00:03.998000","2026-02-24T00:00:03.999000","2026-02-24T00:00:04.001000","2026-02-24T00:00:04.002000","2026-02-24T00:00:04.003000","2026-02-24T00:00:04.005000","2026-02-24T00:00:04.006000","2026-02-24T00:00:04.007000","2026-02-24T00:00:04.009000","2026-02-24T00:00:04.010000","2026-02-24T00:00:04.011000","2026-02-24T00:00:04.013000","2026-02-24T00:00:04.014000","2026-02-24T00:00:04.015000","2026-02-24T00:00:04.017000","2026-02-24T00:00:04.018000","2026-02-24T00:00:04.019000","2026-02-24T00:00:04.021000","2026-02-24T00:00:04.022000","2026-02-24T00:00:04.023000","2026-02-24T00:00:04.025000","2026-02-24T00:00:04.026000","2026-02-24T00:00:04.027000","2026-02-24T00:00:04.028000","2026-02-24T00:00:04.030000","2026-02-24T00:00:04.031000","2026-02-24T00:00:04.032000","2026-02-24T00:00:04.034000","2026-02-24T00:00:04.035000","2026-02-24T00:00:04.037000","2026-02-24T00:00:04.038000","2026-02-24T00:00:04.039000","2026-02-24T00:00:04.040000","2026-02-24T00:00:04.042000","2026-02-24T00:00:04.043000","2026-02-24T00:00:04.044000","2026-02-24T00:00:04.046000","2026-02-24T00:00:04.047000","2026-02-24T00:00:04.048000","2026-02-24T00:00:04.050000","2026-02-24T00:00:04.051000","2026-02-24T00:00:04.053000","2026-02-24T00:00:04.054000","2026-02-24T00:00:04.055000","2026-02-24T00:00:04.057000","2026-02-24T00:00:04.058000","2026-02-24T00:00:04.059000","2026-02-24T00:00:04.060000","2026-02-24T00:00:04.062000","2026-02-24T00:00:04.066000","2026-02-24T00:00:04.067000","2026-02-24T00:00:04.068000","2026-02-24T00:00:04.069000","2026-02-24T00:00:04.071000","2026-02-24T00:00:04.072000","2026-02-24T00:00:04.073000","2026-02-24T00:00:04.075000","2026-02-24T00:00:04.076000","2026-02-24T00:00:04.077000","2026-02-24T00:00:04.079000","2026-02-24T00:00:04.080000","2026-02-24T00:00:04.081000","2026-02-24T00:00:04.083000","2026-02-24T00:00:04.084000","2026-02-24T00:00:04.085000","2026-02-24T00:00:04.087000","2026-02-24T00:00:04.088000","2026-02-24T00:00:04.090000","2026-02-24T00:00:04.091000","2026-02-24T00:00:04.092000","2026-02-24T00:00:04.094000","2026-02-24T00:00:04.095000","2026-02-24T00:00:04.097000","2026-02-24T00:00:04.098000","2026-02-24T00:00:04.099000","2026-02-24T00:00:04.101000","2026-02-24T00:00:04.102000","2026-02-24T00:00:04.103000","2026-02-24T00:00:04.105000","2026-02-24T00:00:04.106000","2026-02-24T00:00:04.108000","2026-02-24T00:00:04.109000","2026-02-24T00:00:04.110000","2026-02-24T00:00:04.112000","2026-02-24T00:00:04.113000","2026-02-24T00:00:03.735000","2026-02-24T00:00:03.736000","2026-02-24T00:00:03.737000","2026-02-24T00:00:03.738000","2026-02-24T00:00:03.739000","2026-02-24T00:00:03.741000","2026-02-24T00:00:03.742000","2026-02-24T00:00:03.743000","2026-02-24T00:00:03.745000","2026-02-24T00:00:03.746000","2026-02-24T00:00:03.747000","2026-02-24T00:00:03.748000","2026-02-24T00:00:03.750000","2026-02-24T00:00:03.751000","2026-02-24T00:00:03.752000","2026-02-24T00:00:03.753000","2026-02-24T00:00:03.755000","2026-02-24T00:00:03.756000","2026-02-24T00:00:03.757000","2026-02-24T00:00:03.759000","2026-02-24T00:00:03.760000","2026-02-24T00:00:03.761000","2026-02-24T00:00:03.762000","2026-02-24T00:00:03.764000","2026-02-24T00:00:03.765000","2026-02-24T00:00:03.766000","2026-02-24T00:00:03.767000","2026-02-24T00:00:03.769000","2026-02-24T00:00:03.770000","2026-02-24T00:00:03.771000","2026-02-24T00:00:03.773000","2026-02-24T00:00:03.774000","2026-02-24T00:00:03.775000","2026-02-24T00:00:03.776000","2026-02-24T00:00:03.778000","2026-02-24T00:00:03.779000","2026-02-24T00:00:03.780000","2026-02-24T00:00:03.781000","2026-02-24T00:00:03.783000","2026-02-24T00:00:03.784000","2026-02-24T00:00:03.785000","2026-02-24T00:00:03.786000","2026-02-24T00:00:03.788000","2026-02-24T00:00:03.789000","2026-02-24T00:00:03.790000","2026-02-24T00:00:03.792000","2026-02-24T00:00:03.793000","2026-02-24T00:00:03.794000","2026-02-24T00:00:03.795000","2026-02-24T00:00:03.797000","2026-02-24T00:00:03.798000","2026-02-24T00:00:03.799000","2026-02-24T00:00:03.800000","2026-02-24T00:00:03.802000","2026-02-24T00:00:03.803000","2026-02-24T00:00:03.805000","2026-02-24T00:00:03.806000","2026-02-24T00:00:03.813000","2026-02-24T00:00:03.814000","2026-02-24T00:00:03.815000","2026-02-24T00:00:03.816000","2026-02-24T00:00:03.818000","2026-02-24T00:00:03.819000","2026-02-24T00:00:03.820000","2026-02-24T00:00:03.822000","2026-02-24T00:00:03.823000","2026-02-24T00:00:03.824000","2026-02-24T00:00:03.825000","2026-02-24T00:00:03.827000","2026-02-24T00:00:03.828000","2026-02-24T00:00:03.829000","2026-02-24T00:00:03.831000","2026-02-24T00:00:03.832000","2026-02-24T00:00:03.833000","2026-02-24T00:00:03.834000","2026-02-24T00:00:03.836000","2026-02-24T00:00:03.837000","2026-02-24T00:00:03.838000","2026-02-24T00:00:03.840000","2026-02-24T00:00:03.841000","2026-02-24T00:00:03.842000","2026-02-24T00:00:03.843000","2026-02-24T00:00:03.845000","2026-02-24T00:00:03.846000","2026-02-24T00:00:03.847000","2026-02-24T00:00:03.849000","2026-02-24T00:00:03.850000","2026-02-24T00:00:03.851000","2026-02-24T00:00:03.852000","2026-02-24T00:00:03.854000","2026-02-24T00:00:03.855000","2026-02-24T00:00:03.856000","2026-02-24T00:00:03.857000","2026-02-24T00:00:03.859000","2026-02-24T00:00:03.860000","2026-02-24T00:00:03.861000","2026-02-24T00:00:03.863000","2026-02-24T00:00:03.864000","2026-02-24T00:00:03.865000","2026-02-24T00:00:03.866000","2026-02-24T00:00:03.874000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.878000","2026-02-24T00:00:03.879000","2026-02-24T00:00:03.880000","2026-02-24T00:00:03.881000","2026-02-24T00:00:03.883000","2026-02-24T00:00:03.884000","2026-02-24T00:00:03.885000","2026-02-24T00:00:03.886000","2026-02-24T00:00:03.888000","2026-02-24T00:00:03.889000","2026-02-24T00:00:03.890000","2026-02-24T00:00:03.892000","2026-02-24T00:00:03.814000","2026-02-24T00:00:03.815000","2026-02-24T00:00:03.816000","2026-02-24T00:00:03.818000","2026-02-24T00:00:03.819000","2026-02-24T00:00:03.820000","2026-02-24T00:00:03.821000","2026-02-24T00:00:03.823000","2026-02-24T00:00:03.824000","2026-02-24T00:00:03.825000","2026-02-24T00:00:03.827000","2026-02-24T00:00:03.828000","2026-02-24T00:00:03.829000","2026-02-24T00:00:03.831000","2026-02-24T00:00:03.832000","2026-02-24T00:00:03.833000","2026-02-24T00:00:03.834000","2026-02-24T00:00:03.836000","2026-02-24T00:00:03.837000","2026-02-24T00:00:03.838000","2026-02-24T00:00:03.839000","2026-02-24T00:00:03.841000","2026-02-24T00:00:03.842000","2026-02-24T00:00:03.843000","2026-02-24T00:00:03.845000","2026-02-24T00:00:03.846000","2026-02-24T00:00:03.847000","2026-02-24T00:00:03.848000","2026-02-24T00:00:03.850000","2026-02-24T00:00:03.851000","2026-02-24T00:00:03.852000","2026-02-24T00:00:03.854000","2026-02-24T00:00:03.855000","2026-02-24T00:00:03.856000","2026-02-24T00:00:03.857000","2026-02-24T00:00:03.859000","2026-02-24T00:00:03.860000","2026-02-24T00:00:03.861000","2026-02-24T00:00:03.863000","2026-02-24T00:00:03.864000","2026-02-24T00:00:03.865000","2026-02-24T00:00:03.866000","2026-02-24T00:00:03.874000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.877000","2026-02-24T00:00:03.879000","2026-02-24T00:00:03.880000","2026-02-24T00:00:03.881000","2026-02-24T00:00:03.883000","2026-02-24T00:00:03.884000","2026-02-24T00:00:03.885000","2026-02-24T00:00:03.886000","2026-02-24T00:00:03.888000","2026-02-24T00:00:03.889000","2026-02-24T00:00:03.890000","2026-02-24T00:00:03.892000","2026-02-24T00:00:03.893000","2026-02-24T00:00:03.894000","2026-02-24T00:00:03.896000","2026-02-24T00:00:03.897000","2026-02-24T00:00:03.898000","2026-02-24T00:00:03.899000","2026-02-24T00:00:03.905000","2026-02-24T00:00:03.906000","2026-02-24T00:00:03.907000","2026-02-24T00:00:03.908000","2026-02-24T00:00:03.909000","2026-02-24T00:00:03.911000","2026-02-24T00:00:03.912000","2026-02-24T00:00:03.913000","2026-02-24T00:00:03.914000","2026-02-24T00:00:03.916000","2026-02-24T00:00:03.917000","2026-02-24T00:00:03.918000","2026-02-24T00:00:03.920000","2026-02-24T00:00:03.921000","2026-02-24T00:00:03.922000","2026-02-24T00:00:03.924000","2026-02-24T00:00:03.925000","2026-02-24T00:00:03.926000","2026-02-24T00:00:03.928000","2026-02-24T00:00:03.929000","2026-02-24T00:00:03.930000","2026-02-24T00:00:03.932000","2026-02-24T00:00:03.933000","2026-02-24T00:00:03.934000","2026-02-24T00:00:03.936000","2026-02-24T00:00:03.937000","2026-02-24T00:00:03.938000","2026-02-24T00:00:03.940000","2026-02-24T00:00:03.941000","2026-02-24T00:00:03.942000","2026-02-24T00:00:03.943000","2026-02-24T00:00:03.945000","2026-02-24T00:00:03.946000","2026-02-24T00:00:03.948000","2026-02-24T00:00:03.949000","2026-02-24T00:00:03.950000","2026-02-24T00:00:03.951000","2026-02-24T00:00:03.953000","2026-02-24T00:00:03.954000","2026-02-24T00:00:03.955000","2026-02-24T00:00:03.957000","2026-02-24T00:00:03.958000","2026-02-24T00:00:03.959000","2026-02-24T00:00:03.961000","2026-02-24T00:00:03.962000","2026-02-24T00:00:03.963000","2026-02-24T00:00:03.965000","2026-02-24T00:00:03.966000","2026-02-24T00:00:03.967000","2026-02-24T00:00:03.969000","2026-02-24T00:00:03.970000","2026-02-24T00:00:03.971000","2026-02-24T00:00:03.973000","2026-02-24T00:00:03.974000","2026-02-24T00:00:03.975000","2026-02-24T00:00:03.977000","2026-02-24T00:00:03.978000","2026-02-24T00:00:03.979000","2026-02-24T00:00:03.981000","2026-02-24T00:00:03.982000","2026-02-24T00:00:03.983000","2026-02-24T00:00:03.985000","2026-02-24T00:00:03.986000","2026-02-24T00:00:03.987000","2026-02-24T00:00:03.989000","2026-02-24T00:00:03.990000","2026-02-24T00:00:03.991000","2026-02-24T00:00:03.993000","2026-02-24T00:00:03.994000","2026-02-24T00:00:03.995000","2026-02-24T00:00:03.997000","2026-02-24T00:00:03.998000","2026-02-24T00:00:03.999000","2026-02-24T00:00:04.001000","2026-02-24T00:00:04.002000","2026-02-24T00:00:04.003000","2026-02-24T00:00:04.005000","2026-02-24T00:00:04.006000","2026-02-24T00:00:04.007000","2026-02-24T00:00:04.009000","2026-02-24T00:00:04.010000","2026-02-24T00:00:04.011000","2026-02-24T00:00:04.013000","2026-02-24T00:00:04.014000","2026-02-24T00:00:04.015000","2026-02-24T00:00:04.017000","2026-02-24T00:00:04.018000","2026-02-24T00:00:04.019000","2026-02-24T00:00:04.021000","2026-02-24T00:00:04.022000","2026-02-24T00:00:04.023000","2026-02-24T00:00:04.025000","2026-02-24T00:00:04.026000","2026-02-24T00:00:04.027000","2026-02-24T00:00:04.028000","2026-02-24T00:00:04.030000","2026-02-24T00:00:04.031000","2026-02-24T00:00:04.032000","2026-02-24T00:00:04.034000","2026-02-24T00:00:04.035000","2026-02-24T00:00:04.036000","2026-02-24T00:00:04.038000","2026-02-24T00:00:04.039000","2026-02-24T00:00:04.040000","2026-02-24T00:00:04.042000","2026-02-24T00:00:04.043000","2026-02-24T00:00:04.044000","2026-02-24T00:00:04.046000","2026-02-24T00:00:04.047000","2026-02-24T00:00:04.048000","2026-02-24T00:00:04.050000","2026-02-24T00:00:04.051000","2026-02-24T00:00:04.052000","2026-02-24T00:00:04.054000","2026-02-24T00:00:04.055000","2026-02-24T00:00:04.056000","2026-02-24T00:00:04.058000","2026-02-24T00:00:04.059000","2026-02-24T00:00:04.060000","2026-02-24T00:00:04.062000","2026-02-24T00:00:04.066000","2026-02-24T00:00:04.067000","2026-02-24T00:00:04.068000","2026-02-24T00:00:04.069000","2026-02-24T00:00:04.070000","2026-02-24T00:00:04.072000","2026-02-24T00:00:04.073000","2026-02-24T00:00:04.074000","2026-02-24T00:00:04.076000","2026-02-24T00:00:04.077000","2026-02-24T00:00:04.078000","2026-02-24T00:00:04.080000","2026-02-24T00:00:04.081000","2026-02-24T00:00:04.083000","2026-02-24T00:00:04.084000","2026-02-24T00:00:04.085000","2026-02-24T00:00:04.087000","2026-02-24T00:00:04.088000","2026-02-24T00:00:04.089000","2026-02-24T00:00:04.091000","2026-02-24T00:00:04.092000","2026-02-24T00:00:04.094000","2026-02-24T00:00:04.095000","2026-02-24T00:00:04.096000","2026-02-24T00:00:04.098000","2026-02-24T00:00:04.099000","2026-02-24T00:00:04.101000","2026-02-24T00:00:04.102000","2026-02-24T00:00:04.103000","2026-02-24T00:00:04.105000","2026-02-24T00:00:04.106000","2026-02-24T00:00:04.108000","2026-02-24T00:00:04.109000","2026-02-24T00:00:04.110000","2026-02-24T00:00:04.112000","2026-02-24T00:00:04.113000","2026-02-24T00:00:04.115000","2026-02-24T00:00:04.116000","2026-02-24T00:00:04.117000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.876000","2026-02-24T00:00:03.878000","2026-02-24T00:00:03.906000","2026-02-24T00:00:03.907000","2026-02-24T00:00:03.908000","2026-02-24T00:00:03.909000","2026-02-24T00:00:04.067000","2026-02-24T00:00:04.068000","2026-02-24T00:00:04.069000","2026-02-24T00:00:04.070000","2026-02-24T00:00:04.072000","2026-02-24T00:00:04.073000","2026-02-24T00:00:04.075000","2026-02-24T00:00:04.076000","2026-02-24T00:00:04.077000","2026-02-24T00:00:04.079000","2026-02-24T00:00:04.080000","2026-02-24T00:00:04.081000","2026-02-24T00:00:04.083000","2026-02-24T00:00:04.084000","2026-02-24T00:00:04.085000","2026-02-24T00:00:04.087000","2026-02-24T00:00:04.088000","2026-02-24T00:00:04.090000","2026-02-24T00:00:04.091000","2026-02-24T00:00:04.092000","2026-02-24T00:00:04.094000","2026-02-24T00:00:04.095000","2026-02-24T00:00:04.097000","2026-02-24T00:00:04.098000","2026-02-24T00:00:04.099000","2026-02-24T00:00:04.101000","2026-02-24T00:00:04.102000","2026-02-24T00:00:04.103000","2026-02-24T00:00:04.105000","2026-02-24T00:00:04.106000","2026-02-24T00:00:04.108000","2026-02-24T00:00:04.109000","2026-02-24T00:00:04.110000","2026-02-24T00:00:04.112000","2026-02-24T00:00:04.113000","2026-02-24T00:00:04.115000","2026-02-24T00:00:04.116000","2026-02-24T00:00:04.117000","2026-02-24T00:00:04.118000","2026-02-24T00:00:04.119000","2026-02-24T00:00:04.120000","2026-02-24T00:00:04.121000","2026-02-24T00:00:04.122000","2026-02-24T00:00:04.123000","2026-02-24T00:00:04.124000","2026-02-24T00:00:04.125000","2026-02-24T00:00:04.126000","2026-02-24T00:00:04.127000","2026-02-24T00:00:04.128000","2026-02-24T00:00:04.129000","2026-02-24T00:00:04.134000","2026-02-24T00:00:04.135000","2026-02-24T00:00:04.136000","2026-02-24T00:00:04.137000","2026-02-24T00:00:04.138000","2026-02-24T00:00:04.139000","2026-02-24T00:00:04.140000","2026-02-24T00:00:04.141000","2026-02-24T00:00:04.142000","2026-02-24T00:00:04.143000","2026-02-24T00:00:04.144000","2026-02-24T00:00:04.145000","2026-02-24T00:00:04.146000","2026-02-24T00:00:04.148000","2026-02-24T00:00:04.149000","2026-02-24T00:00:04.150000","2026-02-24T00:00:04.151000","2026-02-24T00:00:04.152000","2026-02-24T00:00:04.153000","2026-02-24T00:00:04.154000","2026-02-24T00:00:04.155000","2026-02-24T00:00:04.156000","2026-02-24T00:00:04.157000","2026-02-24T00:00:04.158000","2026-02-24T00:00:04.159000","2026-02-24T00:00:04.160000","2026-02-24T00:00:04.161000","2026-02-24T00:00:04.163000","2026-02-24T00:00:04.164000","2026-02-24T00:00:04.165000","2026-02-24T00:00:04.166000","2026-02-24T00:00:04.167000","2026-02-24T00:00:04.168000","2026-02-24T00:00:04.169000","2026-02-24T00:00:04.170000","2026-02-24T00:00:04.171000","2026-02-24T00:00:04.172000","2026-02-24T00:00:04.173000","2026-02-24T00:00:04.174000","2026-02-24T00:00:04.175000","2026-02-24T00:00:04.176000","2026-02-24T00:00:04.177000","2026-02-24T00:00:04.178000","2026-02-24T00:00:04.180000","2026-02-24T00:00:04.181000","2026-02-24T00:00:04.182000","2026-02-24T00:00:04.183000","2026-02-24T00:00:04.184000","2026-02-24T00:00:04.185000","2026-02-24T00:00:04.186000","2026-02-24T00:00:04.187000","2026-02-24T00:00:04.188000","2026-02-24T00:00:04.189000","2026-02-24T00:00:04.190000","2026-02-24T00:00:04.191000","2026-02-24T00:00:04.193000","2026-02-24T00:00:04.194000","2026-02-24T00:00:04.195000","2026-02-24T00:00:04.196000","2026-02-24T00:00:04.197000","2026-02-24T00:00:04.198000","2026-02-24T00:00:04.199000","2026-02-24T00:00:04.200000","2026-02-24T00:00:04.201000","2026-02-24T00:00:04.202000","2026-02-24T00:00:04.203000","2026-02-24T00:00:04.204000","2026-02-24T00:00:04.205000","2026-02-24T00:00:04.206000","2026-02-24T00:00:04.207000","2026-02-24T00:00:04.208000","2026-02-24T00:00:04.209000","2026-02-24T00:00:04.210000","2026-02-24T00:00:04.211000","2026-02-24T00:00:04.212000","2026-02-24T00:00:04.213000","2026-02-24T00:00:04.215000","2026-02-24T00:00:04.216000","2026-02-24T00:00:04.217000","2026-02-24T00:00:04.218000","2026-02-24T00:00:04.219000","2026-02-24T00:00:04.220000","2026-02-24T00:00:04.221000","2026-02-24T00:00:04.222000","2026-02-24T00:00:04.223000","2026-02-24T00:00:04.224000","2026-02-24T00:00:04.230000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.233000","2026-02-24T00:00:04.235000","2026-02-24T00:00:04.236000","2026-02-24T00:00:04.238000","2026-02-24T00:00:04.135000","2026-02-24T00:00:04.136000","2026-02-24T00:00:04.137000","2026-02-24T00:00:04.138000","2026-02-24T00:00:04.139000","2026-02-24T00:00:04.140000","2026-02-24T00:00:04.141000","2026-02-24T00:00:04.142000","2026-02-24T00:00:04.143000","2026-02-24T00:00:04.144000","2026-02-24T00:00:04.145000","2026-02-24T00:00:04.147000","2026-02-24T00:00:04.148000","2026-02-24T00:00:04.149000","2026-02-24T00:00:04.150000","2026-02-24T00:00:04.151000","2026-02-24T00:00:04.152000","2026-02-24T00:00:04.153000","2026-02-24T00:00:04.154000","2026-02-24T00:00:04.155000","2026-02-24T00:00:04.156000","2026-02-24T00:00:04.157000","2026-02-24T00:00:04.158000","2026-02-24T00:00:04.159000","2026-02-24T00:00:04.160000","2026-02-24T00:00:04.161000","2026-02-24T00:00:04.163000","2026-02-24T00:00:04.164000","2026-02-24T00:00:04.165000","2026-02-24T00:00:04.166000","2026-02-24T00:00:04.167000","2026-02-24T00:00:04.168000","2026-02-24T00:00:04.169000","2026-02-24T00:00:04.170000","2026-02-24T00:00:04.171000","2026-02-24T00:00:04.172000","2026-02-24T00:00:04.173000","2026-02-24T00:00:04.174000","2026-02-24T00:00:04.175000","2026-02-24T00:00:04.176000","2026-02-24T00:00:04.177000","2026-02-24T00:00:04.178000","2026-02-24T00:00:04.180000","2026-02-24T00:00:04.181000","2026-02-24T00:00:04.182000","2026-02-24T00:00:04.183000","2026-02-24T00:00:04.184000","2026-02-24T00:00:04.185000","2026-02-24T00:00:04.186000","2026-02-24T00:00:04.187000","2026-02-24T00:00:04.188000","2026-02-24T00:00:04.189000","2026-02-24T00:00:04.190000","2026-02-24T00:00:04.192000","2026-02-24T00:00:04.193000","2026-02-24T00:00:04.194000","2026-02-24T00:00:04.195000","2026-02-24T00:00:04.196000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.233000","2026-02-24T00:00:04.235000","2026-02-24T00:00:04.236000","2026-02-24T00:00:04.237000","2026-02-24T00:00:04.239000","2026-02-24T00:00:04.240000","2026-02-24T00:00:04.242000","2026-02-24T00:00:04.243000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.232000","2026-02-24T00:00:04.233000","2026-02-24T00:00:04.235000","2026-02-24T00:00:04.236000","2026-02-24T00:00:04.238000","2026-02-24T00:00:04.239000","2026-02-24T00:00:04.240000","2026-02-24T00:00:04.242000","2026-02-24T00:00:04.243000","2026-02-24T00:00:04.245000","2026-02-24T00:00:04.246000","2026-02-24T00:00:04.247000","2026-02-24T00:00:04.248000","2026-02-24T00:00:04.249000","2026-02-24T00:00:04.250000","2026-02-24T00:00:04.251000","2026-02-24T00:00:04.252000","2026-02-24T00:00:04.253000","2026-02-24T00:00:04.254000","2026-02-24T00:00:04.255000","2026-02-24T00:00:04.257000","2026-02-24T00:00:04.258000","2026-02-24T00:00:04.259000","2026-02-24T00:00:04.260000","2026-02-24T00:00:04.261000","2026-02-24T00:00:04.262000","2026-02-24T00:00:04.263000","2026-02-24T00:00:04.264000","2026-02-24T00:00:04.266000","2026-02-24T00:00:04.267000","2026-02-24T00:00:04.268000","2026-02-24T00:00:04.269000","2026-02-24T00:00:04.270000","2026-02-24T00:00:04.271000","2026-02-24T00:00:04.272000","2026-02-24T00:00:04.273000","2026-02-24T00:00:04.275000","2026-02-24T00:00:04.276000","2026-02-24T00:00:04.277000","2026-02-24T00:00:04.278000","2026-02-24T00:00:04.279000","2026-02-24T00:00:04.280000","2026-02-24T00:00:04.281000","2026-02-24T00:00:04.282000","2026-02-24T00:00:04.284000","2026-02-24T00:00:04.285000","2026-02-24T00:00:04.286000","2026-02-24T00:00:04.287000","2026-02-24T00:00:04.288000","2026-02-24T00:00:04.289000","2026-02-24T00:00:04.290000","2026-02-24T00:00:04.292000","2026-02-24T00:00:04.293000","2026-02-24T00:00:04.294000","2026-02-24T00:00:04.295000","2026-02-24T00:00:04.296000","2026-02-24T00:00:04.297000","2026-02-24T00:00:04.298000","2026-02-24T00:00:04.299000","2026-02-24T00:00:04.300000","2026-02-24T00:00:04.302000","2026-02-24T00:00:04.303000","2026-02-24T00:00:04.304000","2026-02-24T00:00:04.305000","2026-02-24T00:00:04.306000","2026-02-24T00:00:04.307000","2026-02-24T00:00:04.308000","2026-02-24T00:00:04.310000","2026-02-24T00:00:04.311000","2026-02-24T00:00:04.312000","2026-02-24T00:00:04.313000","2026-02-24T00:00:04.314000","2026-02-24T00:00:04.316000","2026-02-24T00:00:04.317000","2026-02-24T00:00:04.318000","2026-02-24T00:00:04.319000","2026-02-24T00:00:04.320000","2026-02-24T00:00:04.321000","2026-02-24T00:00:04.323000","2026-02-24T00:00:04.324000","2026-02-24T00:00:04.325000","2026-02-24T00:00:04.326000","2026-02-24T00:00:04.332000","2026-02-24T00:00:04.332000","2026-02-24T00:00:04.333000","2026-02-24T00:00:04.334000","2026-02-24T00:00:04.336000","2026-02-24T00:00:04.337000","2026-02-24T00:00:04.338000","2026-02-24T00:00:04.339000","2026-02-24T00:00:04.341000","2026-02-24T00:00:04.342000","2026-02-24T00:00:04.343000","2026-02-24T00:00:04.344000","2026-02-24T00:00:04.345000","2026-02-24T00:00:04.347000","2026-02-24T00:00:04.348000","2026-02-24T00:00:04.349000","2026-02-24T00:00:04.350000","2026-02-24T00:00:04.352000","2026-02-24T00:00:04.353000","2026-02-24T00:00:04.354000","2026-02-24T00:00:04.355000","2026-02-24T00:00:04.357000","2026-02-24T00:00:04.358000","2026-02-24T00:00:04.359000","2026-02-24T00:00:04.360000","2026-02-24T00:00:04.361000","2026-02-24T00:00:04.363000","2026-02-24T00:00:04.364000","2026-02-24T00:00:04.365000","2026-02-24T00:00:04.366000","2026-02-24T00:00:04.367000","2026-02-24T00:00:04.369000","2026-02-24T00:00:04.370000","2026-02-24T00:00:04.371000","2026-02-24T00:00:04.372000","2026-02-24T00:00:04.374000","2026-02-24T00:00:04.375000","2026-02-24T00:00:04.376000","2026-02-24T00:00:04.377000","2026-02-24T00:00:04.378000","2026-02-24T00:00:04.380000","2026-02-24T00:00:04.381000","2026-02-24T00:00:04.382000","2026-02-24T00:00:04.383000","2026-02-24T00:00:04.384000","2026-02-24T00:00:04.386000","2026-02-24T00:00:04.387000","2026-02-24T00:00:04.388000","2026-02-24T00:00:04.389000","2026-02-24T00:00:04.391000","2026-02-24T00:00:04.392000","2026-02-24T00:00:04.393000","2026-02-24T00:00:04.394000","2026-02-24T00:00:04.396000","2026-02-24T00:00:04.397000","2026-02-24T00:00:04.398000","2026-02-24T00:00:04.399000","2026-02-24T00:00:04.400000","2026-02-24T00:00:04.401000","2026-02-24T00:00:04.403000","2026-02-24T00:00:04.404000","2026-02-24T00:00:04.405000","2026-02-24T00:00:04.406000","2026-02-24T00:00:04.408000","2026-02-24T00:00:04.409000","2026-02-24T00:00:04.410000","2026-02-24T00:00:04.411000","2026-02-24T00:00:04.412000","2026-02-24T00:00:04.414000","2026-02-24T00:00:04.415000","2026-02-24T00:00:04.417000","2026-02-24T00:00:04.418000","2026-02-24T00:00:04.419000","2026-02-24T00:00:04.421000","2026-02-24T00:00:04.422000","2026-02-24T00:00:04.424000","2026-02-24T00:00:04.425000","2026-02-24T00:00:04.426000","2026-02-24T00:00:04.427000","2026-02-24T00:00:04.428000","2026-02-24T00:00:04.429000","2026-02-24T00:00:04.431000","2026-02-24T00:00:04.440000","2026-02-24T00:00:04.442000","2026-02-24T00:00:04.443000","2026-02-24T00:00:04.444000","2026-02-24T00:00:04.445000","2026-02-24T00:00:04.447000","2026-02-24T00:00:04.448000","2026-02-24T00:00:04.449000","2026-02-24T00:00:04.450000","2026-02-24T00:00:04.452000","2026-02-24T00:00:04.453000","2026-02-24T00:00:04.454000","2026-02-24T00:00:04.455000","2026-02-24T00:00:04.457000","2026-02-24T00:00:04.458000","2026-02-24T00:00:04.459000","2026-02-24T00:00:04.461000","2026-02-24T00:00:04.462000","2026-02-24T00:00:04.463000","2026-02-24T00:00:04.465000","2026-02-24T00:00:04.466000","2026-02-24T00:00:04.467000","2026-02-24T00:00:04.468000","2026-02-24T00:00:04.470000","2026-02-24T00:00:04.471000","2026-02-24T00:00:04.472000","2026-02-24T00:00:04.474000","2026-02-24T00:00:04.475000","2026-02-24T00:00:04.476000","2026-02-24T00:00:04.478000","2026-02-24T00:00:04.479000","2026-02-24T00:00:04.480000","2026-02-24T00:00:04.482000","2026-02-24T00:00:04.483000","2026-02-24T00:00:04.484000","2026-02-24T00:00:04.486000","2026-02-24T00:00:04.487000","2026-02-24T00:00:04.488000","2026-02-24T00:00:04.490000","2026-02-24T00:00:04.491000","2026-02-24T00:00:04.492000","2026-02-24T00:00:04.493000","2026-02-24T00:00:04.495000","2026-02-24T00:00:04.496000","2026-02-24T00:00:04.497000","2026-02-24T00:00:04.499000","2026-02-24T00:00:04.500000","2026-02-24T00:00:04.501000","2026-02-24T00:00:04.503000","2026-02-24T00:00:04.504000","2026-02-24T00:00:04.505000","2026-02-24T00:00:04.507000","2026-02-24T00:00:04.508000","2026-02-24T00:00:04.509000","2026-02-24T00:00:04.511000","2026-02-24T00:00:04.512000","2026-02-24T00:00:04.513000","2026-02-24T00:00:04.515000","2026-02-24T00:00:04.516000","2026-02-24T00:00:04.517000","2026-02-24T00:00:04.519000","2026-02-24T00:00:04.520000","2026-02-24T00:00:04.521000","2026-02-24T00:00:04.523000","2026-02-24T00:00:04.524000","2026-02-24T00:00:04.525000","2026-02-24T00:00:04.527000","2026-02-24T00:00:04.528000","2026-02-24T00:00:04.530000","2026-02-24T00:00:04.531000","2026-02-24T00:00:04.532000","2026-02-24T00:00:04.534000","2026-02-24T00:00:04.540000","2026-02-24T00:00:04.541000","2026-02-24T00:00:04.542000","2026-02-24T00:00:04.543000","2026-02-24T00:00:04.544000","2026-02-24T00:00:04.546000","2026-02-24T00:00:04.547000","2026-02-24T00:00:04.548000","2026-02-24T00:00:04.550000","2026-02-24T00:00:04.551000","2026-02-24T00:00:04.553000","2026-02-24T00:00:04.554000","2026-02-24T00:00:04.555000","2026-02-24T00:00:04.557000","2026-02-24T00:00:04.558000","2026-02-24T00:00:04.559000","2026-02-24T00:00:04.561000","2026-02-24T00:00:04.562000","2026-02-24T00:00:04.563000","2026-02-24T00:00:04.564000","2026-02-24T00:00:04.566000","2026-02-24T00:00:04.567000","2026-02-24T00:00:04.568000","2026-02-24T00:00:04.570000","2026-02-24T00:00:04.571000","2026-02-24T00:00:04.572000","2026-02-24T00:00:04.574000","2026-02-24T00:00:04.575000","2026-02-24T00:00:04.576000","2026-02-24T00:00:04.578000","2026-02-24T00:00:04.579000","2026-02-24T00:00:04.581000","2026-02-24T00:00:04.582000","2026-02-24T00:00:04.583000","2026-02-24T00:00:04.585000","2026-02-24T00:00:04.586000","2026-02-24T00:00:04.587000","2026-02-24T00:00:04.588000","2026-02-24T00:00:04.590000","2026-02-24T00:00:04.591000","2026-02-24T00:00:04.592000","2026-02-24T00:00:04.594000","2026-02-24T00:00:04.595000","2026-02-24T00:00:04.596000","2026-02-24T00:00:04.598000","2026-02-24T00:00:04.599000","2026-02-24T00:00:04.600000","2026-02-24T00:00:04.602000","2026-02-24T00:00:04.603000","2026-02-24T00:00:04.604000","2026-02-24T00:00:04.606000","2026-02-24T00:00:04.607000","2026-02-24T00:00:04.609000","2026-02-24T00:00:04.610000","2026-02-24T00:00:04.611000","2026-02-24T00:00:04.613000","2026-02-24T00:00:04.614000","2026-02-24T00:00:04.615000","2026-02-24T00:00:04.616000","2026-02-24T00:00:04.618000","2026-02-24T00:00:04.619000","2026-02-24T00:00:04.620000","2026-02-24T00:00:04.622000","2026-02-24T00:00:04.623000","2026-02-24T00:00:04.624000","2026-02-24T00:00:04.626000","2026-02-24T00:00:04.627000","2026-02-24T00:00:04.628000","2026-02-24T00:00:04.630000","2026-02-24T00:00:04.631000","2026-02-24T00:00:04.633000","2026-02-24T00:00:04.634000","2026-02-24T00:00:04.635000","2026-02-24T00:00:04.637000","2026-02-24T00:00:04.638000","2026-02-24T00:00:04.639000","2026-02-24T00:00:04.640000","2026-02-24T00:00:04.642000","2026-02-24T00:00:04.643000","2026-02-24T00:00:04.644000","2026-02-24T00:00:04.646000","2026-02-24T00:00:04.647000","2026-02-24T00:00:04.649000","2026-02-24T00:00:04.650000","2026-02-24T00:00:04.651000","2026-02-24T00:00:04.653000","2026-02-24T00:00:04.654000","2026-02-24T00:00:04.656000","2026-02-24T00:00:04.657000","2026-02-24T00:00:04.658000","2026-02-24T00:00:04.660000","2026-02-24T00:00:04.661000","2026-02-24T00:00:04.662000","2026-02-24T00:00:04.664000","2026-02-24T00:00:04.665000","2026-02-24T00:00:04.667000","2026-02-24T00:00:04.668000","2026-02-24T00:00:04.669000","2026-02-24T00:00:04.671000","2026-02-24T00:00:04.672000","2026-02-24T00:00:04.674000","2026-02-24T00:00:04.675000","2026-02-24T00:00:04.676000","2026-02-24T00:00:04.678000","2026-02-24T00:00:04.679000","2026-02-24T00:00:04.681000","2026-02-24T00:00:04.682000","2026-02-24T00:00:04.683000","2026-02-24T00:00:04.685000","2026-02-24T00:00:04.686000","2026-02-24T00:00:04.687000","2026-02-24T00:00:04.689000","2026-02-24T00:00:04.690000","2026-02-24T00:00:04.692000","2026-02-24T00:00:04.693000","2026-02-24T00:00:04.694000","2026-02-24T00:00:04.696000","2026-02-24T00:00:04.697000","2026-02-24T00:00:04.698000","2026-02-24T00:00:04.700000","2026-02-24T00:00:04.701000","2026-02-24T00:00:04.703000","2026-02-24T00:00:04.704000","2026-02-24T00:00:04.705000","2026-02-24T00:00:04.707000","2026-02-24T00:00:04.708000","2026-02-24T00:00:04.710000","2026-02-24T00:00:04.711000","2026-02-24T00:00:04.712000","2026-02-24T00:00:04.714000","2026-02-24T00:00:04.715000","2026-02-24T00:00:04.716000","2026-02-24T00:00:04.718000","2026-02-24T00:00:04.719000","2026-02-24T00:00:04.721000","2026-02-24T00:00:04.722000","2026-02-24T00:00:04.723000","2026-02-24T00:00:04.725000","2026-02-24T00:00:04.726000","2026-02-24T00:00:04.727000","2026-02-24T00:00:04.729000","2026-02-24T00:00:04.730000","2026-02-24T00:00:04.731000","2026-02-24T00:00:04.733000","2026-02-24T00:00:04.734000","2026-02-24T00:00:04.736000","2026-02-24T00:00:04.737000","2026-02-24T00:00:04.738000","2026-02-24T00:00:04.740000","2026-02-24T00:00:04.741000","2026-02-24T00:00:04.742000","2026-02-24T00:00:04.744000","2026-02-24T00:00:04.745000","2026-02-24T00:00:04.746000","2026-02-24T00:00:04.748000","2026-02-24T00:00:04.749000","2026-02-24T00:00:04.751000","2026-02-24T00:00:04.752000","2026-02-24T00:00:04.753000","2026-02-24T00:00:04.755000","2026-02-24T00:00:04.756000","2026-02-24T00:00:04.758000","2026-02-24T00:00:04.759000","2026-02-24T00:00:04.760000","2026-02-24T00:00:04.762000","2026-02-24T00:00:04.763000","2026-02-24T00:00:04.765000","2026-02-24T00:00:04.766000","2026-02-24T00:00:04.768000","2026-02-24T00:00:04.769000","2026-02-24T00:00:04.771000","2026-02-24T00:00:04.772000","2026-02-24T00:00:04.773000","2026-02-24T00:00:04.775000","2026-02-24T00:00:04.776000","2026-02-24T00:00:04.778000","2026-02-24T00:00:04.780000","2026-02-24T00:00:04.781000","2026-02-24T00:00:04.782000","2026-02-24T00:00:04.783000","2026-02-24T00:00:04.784000","2026-02-24T00:00:04.786000","2026-02-24T00:00:04.787000","2026-02-24T00:00:04.789000","2026-02-24T00:00:04.790000","2026-02-24T00:00:04.791000","2026-02-24T00:00:04.793000","2026-02-24T00:00:04.794000","2026-02-24T00:00:04.795000","2026-02-24T00:00:04.797000","2026-02-24T00:00:04.798000","2026-02-24T00:00:04.800000","2026-02-24T00:00:04.801000","2026-02-24T00:00:04.802000","2026-02-24T00:00:04.804000","2026-02-24T00:00:04.805000","2026-02-24T00:00:04.807000","2026-02-24T00:00:04.808000","2026-02-24T00:00:04.809000","2026-02-24T00:00:04.811000","2026-02-24T00:00:04.812000","2026-02-24T00:00:04.814000","2026-02-24T00:00:04.815000","2026-02-24T00:00:04.816000","2026-02-24T00:00:04.817000","2026-02-24T00:00:04.819000","2026-02-24T00:00:04.820000","2026-02-24T00:00:04.822000","2026-02-24T00:00:04.823000","2026-02-24T00:00:04.824000","2026-02-24T00:00:04.826000","2026-02-24T00:00:04.827000","2026-02-24T00:00:04.829000","2026-02-24T00:00:04.830000","2026-02-24T00:00:04.831000","2026-02-24T00:00:04.833000","2026-02-24T00:00:04.834000","2026-02-24T00:00:04.836000","2026-02-24T00:00:04.837000","2026-02-24T00:00:04.839000","2026-02-24T00:00:04.840000","2026-02-24T00:00:04.841000","2026-02-24T00:00:04.843000","2026-02-24T00:00:04.844000","2026-02-24T00:00:04.846000","2026-02-24T00:00:04.847000","2026-02-24T00:00:04.849000","2026-02-24T00:00:04.858000","2026-02-24T00:00:04.859000","2026-02-24T00:00:04.860000","2026-02-24T00:00:04.862000","2026-02-24T00:00:04.863000","2026-02-24T00:00:04.865000","2026-02-24T00:00:04.866000","2026-02-24T00:00:04.867000","2026-02-24T00:00:04.869000","2026-02-24T00:00:04.870000","2026-02-24T00:00:04.871000","2026-02-24T00:00:04.873000","2026-02-24T00:00:04.874000","2026-02-24T00:00:04.876000","2026-02-24T00:00:04.877000","2026-02-24T00:00:04.879000","2026-02-24T00:00:04.880000","2026-02-24T00:00:04.881000","2026-02-24T00:00:04.883000","2026-02-24T00:00:04.884000","2026-02-24T00:00:04.886000","2026-02-24T00:00:04.887000","2026-02-24T00:00:04.889000","2026-02-24T00:00:04.890000","2026-02-24T00:00:04.891000","2026-02-24T00:00:04.893000","2026-02-24T00:00:04.894000","2026-02-24T00:00:04.896000","2026-02-24T00:00:04.897000","2026-02-24T00:00:04.899000","2026-02-24T00:00:04.900000","2026-02-24T00:00:04.901000","2026-02-24T00:00:04.903000","2026-02-24T00:00:04.904000","2026-02-24T00:00:04.906000","2026-02-24T00:00:04.907000","2026-02-24T00:00:04.908000","2026-02-24T00:00:04.910000","2026-02-24T00:00:04.911000","2026-02-24T00:00:04.913000","2026-02-24T00:00:04.914000","2026-02-24T00:00:04.916000","2026-02-24T00:00:04.917000","2026-02-24T00:00:04.919000","2026-02-24T00:00:04.920000","2026-02-24T00:00:04.921000","2026-02-24T00:00:04.923000","2026-02-24T00:00:04.924000","2026-02-24T00:00:04.926000","2026-02-24T00:00:04.927000","2026-02-24T00:00:04.928000","2026-02-24T00:00:04.930000","2026-02-24T00:00:04.931000","2026-02-24T00:00:04.933000","2026-02-24T00:00:04.934000","2026-02-24T00:00:04.936000","2026-02-24T00:00:04.937000","2026-02-24T00:00:04.938000","2026-02-24T00:00:04.940000","2026-02-24T00:00:04.941000","2026-02-24T00:00:04.943000","2026-02-24T00:00:04.944000","2026-02-24T00:00:04.946000","2026-02-24T00:00:04.947000","2026-02-24T00:00:04.948000","2026-02-24T00:00:04.950000","2026-02-24T00:00:04.951000","2026-02-24T00:00:04.953000","2026-02-24T00:00:04.954000","2026-02-24T00:00:04.956000","2026-02-24T00:00:04.957000","2026-02-24T00:00:04.958000","2026-02-24T00:00:04.960000","2026-02-24T00:00:04.961000","2026-02-24T00:00:04.963000","2026-02-24T00:00:04.964000","2026-02-24T00:00:04.965000","2026-02-24T00:00:04.967000","2026-02-24T00:00:04.969000","2026-02-24T00:00:04.970000","2026-02-24T00:00:04.971000","2026-02-24T00:00:04.980000","2026-02-24T00:00:04.981000","2026-02-24T00:00:04.982000","2026-02-24T00:00:04.983000","2026-02-24T00:00:04.985000","2026-02-24T00:00:04.986000","2026-02-24T00:00:04.988000","2026-02-24T00:00:04.989000","2026-02-24T00:00:04.990000","2026-02-24T00:00:04.992000","2026-02-24T00:00:04.993000","2026-02-24T00:00:04.995000","2026-02-24T00:00:04.996000","2026-02-24T00:00:04.998000","2026-02-24T00:00:04.999000","2026-02-24T00:00:05.001000","2026-02-24T00:00:04.332000","2026-02-24T00:00:04.333000","2026-02-24T00:00:04.334000","2026-02-24T00:00:04.336000","2026-02-24T00:00:04.337000","2026-02-24T00:00:04.338000","2026-02-24T00:00:04.339000","2026-02-24T00:00:04.341000","2026-02-24T00:00:04.342000","2026-02-24T00:00:04.343000","2026-02-24T00:00:04.344000","2026-02-24T00:00:04.345000","2026-02-24T00:00:04.347000","2026-02-24T00:00:04.348000","2026-02-24T00:00:04.349000","2026-02-24T00:00:04.350000","2026-02-24T00:00:04.352000","2026-02-24T00:00:04.353000","2026-02-24T00:00:04.354000","2026-02-24T00:00:04.355000","2026-02-24T00:00:04.357000","2026-02-24T00:00:04.358000","2026-02-24T00:00:04.359000","2026-02-24T00:00:04.360000","2026-02-24T00:00:04.361000","2026-02-24T00:00:04.363000","2026-02-24T00:00:04.364000","2026-02-24T00:00:04.365000","2026-02-24T00:00:04.366000","2026-02-24T00:00:04.368000","2026-02-24T00:00:04.369000","2026-02-24T00:00:04.370000","2026-02-24T00:00:04.371000","2026-02-24T00:00:04.372000","2026-02-24T00:00:04.374000","2026-02-24T00:00:04.375000","2026-02-24T00:00:04.376000","2026-02-24T00:00:04.377000","2026-02-24T00:00:04.378000","2026-02-24T00:00:04.380000","2026-02-24T00:00:04.381000","2026-02-24T00:00:04.382000","2026-02-24T00:00:04.383000","2026-02-24T00:00:04.384000","2026-02-24T00:00:04.386000","2026-02-24T00:00:04.387000","2026-02-24T00:00:04.388000","2026-02-24T00:00:04.389000","2026-02-24T00:00:04.391000","2026-02-24T00:00:04.392000","2026-02-24T00:00:04.393000","2026-02-24T00:00:04.394000","2026-02-24T00:00:04.396000","2026-02-24T00:00:04.397000","2026-02-24T00:00:04.398000","2026-02-24T00:00:04.399000","2026-02-24T00:00:04.400000","2026-02-24T00:00:04.402000","2026-02-24T00:00:04.403000","2026-02-24T00:00:04.404000","2026-02-24T00:00:04.405000","2026-02-24T00:00:04.406000","2026-02-24T00:00:04.408000","2026-02-24T00:00:04.409000","2026-02-24T00:00:04.410000","2026-02-24T00:00:04.411000","2026-02-24T00:00:04.413000","2026-02-24T00:00:04.414000","2026-02-24T00:00:04.415000","2026-02-24T00:00:04.417000","2026-02-24T00:00:04.418000","2026-02-24T00:00:04.419000","2026-02-24T00:00:04.421000","2026-02-24T00:00:04.422000","2026-02-24T00:00:04.424000","2026-02-24T00:00:04.425000","2026-02-24T00:00:04.426000","2026-02-24T00:00:04.427000","2026-02-24T00:00:04.428000","2026-02-24T00:00:04.429000","2026-02-24T00:00:04.431000","2026-02-24T00:00:04.441000","2026-02-24T00:00:04.442000","2026-02-24T00:00:04.443000","2026-02-24T00:00:04.444000","2026-02-24T00:00:04.445000","2026-02-24T00:00:04.447000","2026-02-24T00:00:04.448000","2026-02-24T00:00:04.449000","2026-02-24T00:00:04.450000","2026-02-24T00:00:04.452000","2026-02-24T00:00:04.453000","2026-02-24T00:00:04.454000","2026-02-24T00:00:04.455000","2026-02-24T00:00:04.457000","2026-02-24T00:00:04.458000","2026-02-24T00:00:04.459000","2026-02-24T00:00:04.461000","2026-02-24T00:00:04.462000","2026-02-24T00:00:04.463000","2026-02-24T00:00:04.465000","2026-02-24T00:00:04.466000","2026-02-24T00:00:04.467000","2026-02-24T00:00:04.468000","2026-02-24T00:00:04.470000","2026-02-24T00:00:04.471000","2026-02-24T00:00:04.472000","2026-02-24T00:00:04.474000","2026-02-24T00:00:04.475000","2026-02-24T00:00:04.476000","2026-02-24T00:00:04.478000","2026-02-24T00:00:04.479000","2026-02-24T00:00:04.480000","2026-02-24T00:00:04.482000","2026-02-24T00:00:04.483000","2026-02-24T00:00:04.484000","2026-02-24T00:00:04.486000","2026-02-24T00:00:04.487000","2026-02-24T00:00:04.488000","2026-02-24T00:00:04.490000","2026-02-24T00:00:04.491000","2026-02-24T00:00:04.492000","2026-02-24T00:00:04.493000","2026-02-24T00:00:04.495000","2026-02-24T00:00:04.496000","2026-02-24T00:00:04.497000","2026-02-24T00:00:04.499000","2026-02-24T00:00:04.500000","2026-02-24T00:00:04.501000","2026-02-24T00:00:04.503000","2026-02-24T00:00:04.504000","2026-02-24T00:00:04.505000","2026-02-24T00:00:04.507000","2026-02-24T00:00:04.508000","2026-02-24T00:00:04.509000","2026-02-24T00:00:04.511000","2026-02-24T00:00:04.512000","2026-02-24T00:00:04.513000","2026-02-24T00:00:04.515000","2026-02-24T00:00:04.516000","2026-02-24T00:00:04.517000","2026-02-24T00:00:04.519000","2026-02-24T00:00:04.520000","2026-02-24T00:00:04.521000","2026-02-24T00:00:04.523000","2026-02-24T00:00:04.524000","2026-02-24T00:00:04.525000","2026-02-24T00:00:04.527000","2026-02-24T00:00:04.528000","2026-02-24T00:00:04.530000","2026-02-24T00:00:04.531000","2026-02-24T00:00:04.532000","2026-02-24T00:00:04.534000","2026-02-24T00:00:04.540000","2026-02-24T00:00:04.541000","2026-02-24T00:00:04.542000","2026-02-24T00:00:04.543000","2026-02-24T00:00:04.544000","2026-02-24T00:00:04.546000","2026-02-24T00:00:04.547000","2026-02-24T00:00:04.549000","2026-02-24T00:00:04.550000","2026-02-24T00:00:04.551000","2026-02-24T00:00:04.553000","2026-02-24T00:00:04.554000","2026-02-24T00:00:04.555000","2026-02-24T00:00:04.557000","2026-02-24T00:00:04.558000","2026-02-24T00:00:04.559000","2026-02-24T00:00:04.561000","2026-02-24T00:00:04.562000","2026-02-24T00:00:04.563000","2026-02-24T00:00:04.564000","2026-02-24T00:00:04.566000","2026-02-24T00:00:04.567000","2026-02-24T00:00:04.569000","2026-02-24T00:00:04.570000","2026-02-24T00:00:04.571000","2026-02-24T00:00:04.573000","2026-02-24T00:00:04.574000","2026-02-24T00:00:04.575000","2026-02-24T00:00:04.577000","2026-02-24T00:00:04.578000","2026-02-24T00:00:04.579000","2026-02-24T00:00:04.581000","2026-02-24T00:00:04.582000","2026-02-24T00:00:04.583000","2026-02-24T00:00:04.585000","2026-02-24T00:00:04.586000","2026-02-24T00:00:04.442000","2026-02-24T00:00:04.443000","2026-02-24T00:00:04.444000","2026-02-24T00:00:04.445000","2026-02-24T00:00:04.446000","2026-02-24T00:00:04.448000","2026-02-24T00:00:04.449000","2026-02-24T00:00:04.450000","2026-02-24T00:00:04.451000","2026-02-24T00:00:04.453000","2026-02-24T00:00:04.454000","2026-02-24T00:00:04.455000","2026-02-24T00:00:04.457000","2026-02-24T00:00:04.458000","2026-02-24T00:00:04.459000","2026-02-24T00:00:04.461000","2026-02-24T00:00:04.462000","2026-02-24T00:00:04.463000","2026-02-24T00:00:04.465000","2026-02-24T00:00:04.466000","2026-02-24T00:00:04.467000","2026-02-24T00:00:04.468000","2026-02-24T00:00:04.469000","2026-02-24T00:00:04.471000","2026-02-24T00:00:04.472000","2026-02-24T00:00:04.473000","2026-02-24T00:00:04.475000","2026-02-24T00:00:04.476000","2026-02-24T00:00:04.477000","2026-02-24T00:00:04.479000","2026-02-24T00:00:04.480000","2026-02-24T00:00:04.481000","2026-02-24T00:00:04.483000","2026-02-24T00:00:04.484000","2026-02-24T00:00:04.486000","2026-02-24T00:00:04.487000","2026-02-24T00:00:04.488000","2026-02-24T00:00:04.490000","2026-02-24T00:00:04.491000","2026-02-24T00:00:04.492000","2026-02-24T00:00:04.493000","2026-02-24T00:00:04.495000","2026-02-24T00:00:04.496000","2026-02-24T00:00:04.497000","2026-02-24T00:00:04.499000","2026-02-24T00:00:04.500000","2026-02-24T00:00:04.501000","2026-02-24T00:00:04.503000","2026-02-24T00:00:04.504000","2026-02-24T00:00:04.505000","2026-02-24T00:00:04.507000","2026-02-24T00:00:04.508000","2026-02-24T00:00:04.509000","2026-02-24T00:00:04.511000","2026-02-24T00:00:04.512000","2026-02-24T00:00:04.513000","2026-02-24T00:00:04.515000","2026-02-24T00:00:04.516000","2026-02-24T00:00:04.517000","2026-02-24T00:00:04.519000","2026-02-24T00:00:04.520000","2026-02-24T00:00:04.521000","2026-02-24T00:00:04.523000","2026-02-24T00:00:04.524000","2026-02-24T00:00:04.525000","2026-02-24T00:00:04.527000","2026-02-24T00:00:04.528000","2026-02-24T00:00:04.529000","2026-02-24T00:00:04.531000","2026-02-24T00:00:04.532000","2026-02-24T00:00:04.533000","2026-02-24T00:00:04.540000","2026-02-24T00:00:04.541000","2026-02-24T00:00:04.542000","2026-02-24T00:00:04.543000","2026-02-24T00:00:04.544000","2026-02-24T00:00:04.546000","2026-02-24T00:00:04.547000","2026-02-24T00:00:04.548000","2026-02-24T00:00:04.550000","2026-02-24T00:00:04.551000","2026-02-24T00:00:04.553000","2026-02-24T00:00:04.554000","2026-02-24T00:00:04.555000","2026-02-24T00:00:04.557000","2026-02-24T00:00:04.558000","2026-02-24T00:00:04.559000","2026-02-24T00:00:04.561000","2026-02-24T00:00:04.562000","2026-02-24T00:00:04.563000","2026-02-24T00:00:04.564000","2026-02-24T00:00:04.566000","2026-02-24T00:00:04.567000","2026-02-24T00:00:04.568000","2026-02-24T00:00:04.570000","2026-02-24T00:00:04.571000","2026-02-24T00:00:04.572000","2026-02-24T00:00:04.574000","2026-02-24T00:00:04.575000","2026-02-24T00:00:04.576000","2026-02-24T00:00:04.578000","2026-02-24T00:00:04.579000","2026-02-24T00:00:04.581000","2026-02-24T00:00:04.582000","2026-02-24T00:00:04.583000","2026-02-24T00:00:04.585000","2026-02-24T00:00:04.586000","2026-02-24T00:00:04.587000","2026-02-24T00:00:04.588000","2026-02-24T00:00:04.590000","2026-02-24T00:00:04.591000","2026-02-24T00:00:04.592000","2026-02-24T00:00:04.594000","2026-02-24T00:00:04.595000","2026-02-24T00:00:04.596000","2026-02-24T00:00:04.598000","2026-02-24T00:00:04.599000","2026-02-24T00:00:04.600000","2026-02-24T00:00:04.602000","2026-02-24T00:00:04.603000","2026-02-24T00:00:04.604000","2026-02-24T00:00:04.606000","2026-02-24T00:00:04.607000","2026-02-24T00:00:04.609000","2026-02-24T00:00:04.610000","2026-02-24T00:00:04.611000","2026-02-24T00:00:04.613000","2026-02-24T00:00:04.614000","2026-02-24T00:00:04.615000","2026-02-24T00:00:04.616000","2026-02-24T00:00:04.618000","2026-02-24T00:00:04.619000","2026-02-24T00:00:04.620000","2026-02-24T00:00:04.622000","2026-02-24T00:00:04.623000","2026-02-24T00:00:04.624000","2026-02-24T00:00:04.626000","2026-02-24T00:00:04.627000","2026-02-24T00:00:04.628000","2026-02-24T00:00:04.630000","2026-02-24T00:00:04.631000","2026-02-24T00:00:04.632000","2026-02-24T00:00:04.634000","2026-02-24T00:00:04.635000","2026-02-24T00:00:04.636000","2026-02-24T00:00:04.638000","2026-02-24T00:00:04.639000","2026-02-24T00:00:04.640000","2026-02-24T00:00:04.642000","2026-02-24T00:00:04.643000","2026-02-24T00:00:04.644000","2026-02-24T00:00:04.646000","2026-02-24T00:00:04.647000","2026-02-24T00:00:04.649000","2026-02-24T00:00:04.650000","2026-02-24T00:00:04.651000","2026-02-24T00:00:04.653000","2026-02-24T00:00:04.654000","2026-02-24T00:00:04.656000","2026-02-24T00:00:04.657000","2026-02-24T00:00:04.658000","2026-02-24T00:00:04.660000","2026-02-24T00:00:04.661000","2026-02-24T00:00:04.662000","2026-02-24T00:00:04.664000","2026-02-24T00:00:04.665000","2026-02-24T00:00:04.666000","2026-02-24T00:00:04.668000","2026-02-24T00:00:04.669000","2026-02-24T00:00:04.671000","2026-02-24T00:00:04.672000","2026-02-24T00:00:04.674000","2026-02-24T00:00:04.675000","2026-02-24T00:00:04.676000","2026-02-24T00:00:04.678000","2026-02-24T00:00:04.679000","2026-02-24T00:00:04.681000","2026-02-24T00:00:04.682000","2026-02-24T00:00:04.683000","2026-02-24T00:00:04.684000","2026-02-24T00:00:04.686000","2026-02-24T00:00:04.687000","2026-02-24T00:00:04.689000","2026-02-24T00:00:04.690000","2026-02-24T00:00:04.691000","2026-02-24T00:00:04.693000","2026-02-24T00:00:04.694000","2026-02-24T00:00:04.696000","2026-02-24T00:00:04.697000","2026-02-24T00:00:04.698000","2026-02-24T00:00:04.700000","2026-02-24T00:00:04.701000","2026-02-24T00:00:04.702000","2026-02-24T00:00:04.704000","2026-02-24T00:00:04.705000","2026-02-24T00:00:04.707000","2026-02-24T00:00:04.708000","2026-02-24T00:00:04.710000","2026-02-24T00:00:04.711000","2026-02-24T00:00:04.712000","2026-02-24T00:00:04.714000","2026-02-24T00:00:04.715000","2026-02-24T00:00:04.716000","2026-02-24T00:00:04.718000","2026-02-24T00:00:04.719000","2026-02-24T00:00:04.720000","2026-02-24T00:00:04.722000","2026-02-24T00:00:04.723000","2026-02-24T00:00:04.725000","2026-02-24T00:00:04.726000","2026-02-24T00:00:04.727000","2026-02-24T00:00:04.729000","2026-02-24T00:00:04.730000","2026-02-24T00:00:04.731000","2026-02-24T00:00:04.733000","2026-02-24T00:00:04.734000","2026-02-24T00:00:04.735000","2026-02-24T00:00:04.737000","2026-02-24T00:00:04.738000","2026-02-24T00:00:04.740000","2026-02-24T00:00:04.741000","2026-02-24T00:00:04.742000","2026-02-24T00:00:04.744000","2026-02-24T00:00:04.745000","2026-02-24T00:00:04.746000","2026-02-24T00:00:04.748000","2026-02-24T00:00:04.749000","2026-02-24T00:00:04.751000","2026-02-24T00:00:04.752000","2026-02-24T00:00:04.753000","2026-02-24T00:00:04.755000","2026-02-24T00:00:04.756000","2026-02-24T00:00:04.758000","2026-02-24T00:00:04.759000","2026-02-24T00:00:04.760000","2026-02-24T00:00:04.762000","2026-02-24T00:00:04.763000","2026-02-24T00:00:04.765000","2026-02-24T00:00:04.766000","2026-02-24T00:00:04.768000","2026-02-24T00:00:04.769000","2026-02-24T00:00:04.771000","2026-02-24T00:00:04.772000","2026-02-24T00:00:04.773000","2026-02-24T00:00:04.775000","2026-02-24T00:00:04.776000","2026-02-24T00:00:04.778000","2026-02-24T00:00:04.779000","2026-02-24T00:00:04.781000","2026-02-24T00:00:04.782000","2026-02-24T00:00:04.783000","2026-02-24T00:00:04.784000","2026-02-24T00:00:04.786000","2026-02-24T00:00:04.787000","2026-02-24T00:00:04.789000","2026-02-24T00:00:04.790000","2026-02-24T00:00:04.791000","2026-02-24T00:00:04.792000","2026-02-24T00:00:04.794000","2026-02-24T00:00:04.795000","2026-02-24T00:00:04.797000","2026-02-24T00:00:04.798000","2026-02-24T00:00:04.799000","2026-02-24T00:00:04.801000","2026-02-24T00:00:04.802000","2026-02-24T00:00:04.804000","2026-02-24T00:00:04.805000","2026-02-24T00:00:04.807000","2026-02-24T00:00:04.808000","2026-02-24T00:00:04.809000","2026-02-24T00:00:04.811000","2026-02-24T00:00:04.812000","2026-02-24T00:00:04.813000","2026-02-24T00:00:04.814000","2026-02-24T00:00:04.816000","2026-02-24T00:00:04.817000","2026-02-24T00:00:04.819000","2026-02-24T00:00:04.820000","2026-02-24T00:00:04.822000","2026-02-24T00:00:04.823000","2026-02-24T00:00:04.824000","2026-02-24T00:00:04.826000","2026-02-24T00:00:04.827000","2026-02-24T00:00:04.829000","2026-02-24T00:00:04.830000","2026-02-24T00:00:04.831000","2026-02-24T00:00:04.833000","2026-02-24T00:00:04.834000","2026-02-24T00:00:04.836000","2026-02-24T00:00:04.837000","2026-02-24T00:00:04.839000","2026-02-24T00:00:04.840000","2026-02-24T00:00:04.841000","2026-02-24T00:00:04.843000","2026-02-24T00:00:04.844000","2026-02-24T00:00:04.846000","2026-02-24T00:00:04.847000","2026-02-24T00:00:04.849000","2026-02-24T00:00:04.858000","2026-02-24T00:00:04.859000","2026-02-24T00:00:04.860000","2026-02-24T00:00:04.862000","2026-02-24T00:00:04.863000","2026-02-24T00:00:04.865000","2026-02-24T00:00:04.866000","2026-02-24T00:00:04.867000","2026-02-24T00:00:04.869000","2026-02-24T00:00:04.870000","2026-02-24T00:00:04.871000","2026-02-24T00:00:04.873000","2026-02-24T00:00:04.874000","2026-02-24T00:00:04.876000","2026-02-24T00:00:04.877000","2026-02-24T00:00:04.879000","2026-02-24T00:00:04.880000","2026-02-24T00:00:04.881000","2026-02-24T00:00:04.883000","2026-02-24T00:00:04.884000","2026-02-24T00:00:04.886000","2026-02-24T00:00:04.887000","2026-02-24T00:00:04.889000","2026-02-24T00:00:04.890000","2026-02-24T00:00:04.891000","2026-02-24T00:00:04.893000","2026-02-24T00:00:04.894000","2026-02-24T00:00:04.896000","2026-02-24T00:00:04.897000","2026-02-24T00:00:04.899000","2026-02-24T00:00:04.900000","2026-02-24T00:00:04.901000","2026-02-24T00:00:04.903000","2026-02-24T00:00:04.904000","2026-02-24T00:00:04.906000","2026-02-24T00:00:04.907000","2026-02-24T00:00:04.908000","2026-02-24T00:00:04.910000","2026-02-24T00:00:04.911000","2026-02-24T00:00:04.913000","2026-02-24T00:00:04.914000","2026-02-24T00:00:04.915000","2026-02-24T00:00:04.917000","2026-02-24T00:00:04.918000","2026-02-24T00:00:04.920000","2026-02-24T00:00:04.921000","2026-02-24T00:00:04.923000","2026-02-24T00:00:04.924000","2026-02-24T00:00:04.925000","2026-02-24T00:00:04.927000","2026-02-24T00:00:04.928000","2026-02-24T00:00:04.930000","2026-02-24T00:00:04.931000","2026-02-24T00:00:04.933000","2026-02-24T00:00:04.934000","2026-02-24T00:00:04.935000","2026-02-24T00:00:04.937000","2026-02-24T00:00:04.938000","2026-02-24T00:00:04.940000","2026-02-24T00:00:04.941000","2026-02-24T00:00:04.943000","2026-02-24T00:00:04.944000","2026-02-24T00:00:04.945000","2026-02-24T00:00:04.947000","2026-02-24T00:00:04.948000","2026-02-24T00:00:04.950000","2026-02-24T00:00:04.951000","2026-02-24T00:00:04.953000","2026-02-24T00:00:04.954000","2026-02-24T00:00:04.955000","2026-02-24T00:00:04.957000","2026-02-24T00:00:04.958000","2026-02-24T00:00:04.960000","2026-02-24T00:00:04.961000","2026-02-24T00:00:04.963000","2026-02-24T00:00:04.964000","2026-02-24T00:00:04.965000","2026-02-24T00:00:04.967000","2026-02-24T00:00:04.968000","2026-02-24T00:00:04.970000","2026-02-24T00:00:04.971000","2026-02-24T00:00:04.980000","2026-02-24T00:00:04.981000","2026-02-24T00:00:04.982000","2026-02-24T00:00:04.983000","2026-02-24T00:00:04.985000","2026-02-24T00:00:04.986000","2026-02-24T00:00:04.988000","2026-02-24T00:00:04.989000","2026-02-24T00:00:04.990000","2026-02-24T00:00:04.992000","2026-02-24T00:00:04.993000","2026-02-24T00:00:04.995000","2026-02-24T00:00:04.996000","2026-02-24T00:00:04.998000","2026-02-24T00:00:04.999000","2026-02-24T00:00:05.000000","2026-02-24T00:00:05.002000","2026-02-24T00:00:05.003000","2026-02-24T00:00:05.005000","2026-02-24T00:00:05.006000","2026-02-24T00:00:05.008000","2026-02-24T00:00:05.009000","2026-02-24T00:00:05.010000","2026-02-24T00:00:05.012000","2026-02-24T00:00:05.013000","2026-02-24T00:00:05.015000","2026-02-24T00:00:05.016000","2026-02-24T00:00:05.018000","2026-02-24T00:00:05.019000","2026-02-24T00:00:05.021000","2026-02-24T00:00:05.022000","2026-02-24T00:00:05.024000","2026-02-24T00:00:05.025000","2026-02-24T00:00:05.027000","2026-02-24T00:00:05.028000","2026-02-24T00:00:05.030000","2026-02-24T00:00:05.031000","2026-02-24T00:00:05.033000","2026-02-24T00:00:05.034000","2026-02-24T00:00:05.036000","2026-02-24T00:00:05.037000","2026-02-24T00:00:05.039000","2026-02-24T00:00:05.040000","2026-02-24T00:00:05.041000","2026-02-24T00:00:05.043000","2026-02-24T00:00:05.044000","2026-02-24T00:00:05.046000","2026-02-24T00:00:05.047000","2026-02-24T00:00:05.049000","2026-02-24T00:00:05.050000","2026-02-24T00:00:05.052000","2026-02-24T00:00:05.053000","2026-02-24T00:00:05.055000","2026-02-24T00:00:05.056000","2026-02-24T00:00:05.058000","2026-02-24T00:00:05.059000","2026-02-24T00:00:05.061000","2026-02-24T00:00:05.062000","2026-02-24T00:00:05.064000","2026-02-24T00:00:05.065000","2026-02-24T00:00:05.067000","2026-02-24T00:00:05.068000","2026-02-24T00:00:05.070000","2026-02-24T00:00:05.071000","2026-02-24T00:00:05.073000","2026-02-24T00:00:05.074000","2026-02-24T00:00:05.076000","2026-02-24T00:00:05.077000","2026-02-24T00:00:05.079000","2026-02-24T00:00:05.080000","2026-02-24T00:00:05.082000","2026-02-24T00:00:05.083000","2026-02-24T00:00:05.085000","2026-02-24T00:00:05.095000","2026-02-24T00:00:05.097000","2026-02-24T00:00:05.098000","2026-02-24T00:00:05.100000","2026-02-24T00:00:05.101000","2026-02-24T00:00:05.103000","2026-02-24T00:00:05.104000","2026-02-24T00:00:05.105000","2026-02-24T00:00:05.107000","2026-02-24T00:00:05.108000","2026-02-24T00:00:05.110000","2026-02-24T00:00:05.111000","2026-02-24T00:00:05.113000","2026-02-24T00:00:05.114000","2026-02-24T00:00:05.116000","2026-02-24T00:00:05.117000","2026-02-24T00:00:05.118000","2026-02-24T00:00:05.120000","2026-02-24T00:00:05.122000","2026-02-24T00:00:05.123000","2026-02-24T00:00:05.124000","2026-02-24T00:00:05.126000","2026-02-24T00:00:05.127000","2026-02-24T00:00:05.129000","2026-02-24T00:00:05.130000","2026-02-24T00:00:05.132000","2026-02-24T00:00:05.133000","2026-02-24T00:00:05.135000","2026-02-24T00:00:05.136000","2026-02-24T00:00:05.138000","2026-02-24T00:00:05.139000","2026-02-24T00:00:05.141000","2026-02-24T00:00:05.142000","2026-02-24T00:00:05.144000","2026-02-24T00:00:05.145000","2026-02-24T00:00:05.147000","2026-02-24T00:00:05.148000","2026-02-24T00:00:05.150000","2026-02-24T00:00:05.151000","2026-02-24T00:00:05.153000","2026-02-24T00:00:05.154000","2026-02-24T00:00:05.156000","2026-02-24T00:00:05.157000","2026-02-24T00:00:05.159000","2026-02-24T00:00:05.160000","2026-02-24T00:00:05.162000","2026-02-24T00:00:05.163000","2026-02-24T00:00:05.164000","2026-02-24T00:00:05.166000","2026-02-24T00:00:05.167000","2026-02-24T00:00:05.169000","2026-02-24T00:00:05.170000","2026-02-24T00:00:05.172000","2026-02-24T00:00:05.174000","2026-02-24T00:00:05.175000","2026-02-24T00:00:05.176000","2026-02-24T00:00:05.178000","2026-02-24T00:00:05.179000","2026-02-24T00:00:05.181000","2026-02-24T00:00:05.182000","2026-02-24T00:00:05.184000","2026-02-24T00:00:05.185000","2026-02-24T00:00:04.541000","2026-02-24T00:00:04.542000","2026-02-24T00:00:04.543000","2026-02-24T00:00:04.544000","2026-02-24T00:00:04.546000","2026-02-24T00:00:04.547000","2026-02-24T00:00:04.549000","2026-02-24T00:00:04.550000","2026-02-24T00:00:04.551000","2026-02-24T00:00:04.553000","2026-02-24T00:00:04.554000","2026-02-24T00:00:04.555000","2026-02-24T00:00:04.557000","2026-02-24T00:00:04.558000","2026-02-24T00:00:04.560000","2026-02-24T00:00:04.561000","2026-02-24T00:00:04.562000","2026-02-24T00:00:04.563000","2026-02-24T00:00:04.565000","2026-02-24T00:00:04.566000","2026-02-24T00:00:04.567000","2026-02-24T00:00:04.569000","2026-02-24T00:00:04.570000","2026-02-24T00:00:04.571000","2026-02-24T00:00:04.573000","2026-02-24T00:00:04.574000","2026-02-24T00:00:04.575000","2026-02-24T00:00:04.577000","2026-02-24T00:00:04.578000","2026-02-24T00:00:04.579000","2026-02-24T00:00:04.581000","2026-02-24T00:00:04.582000","2026-02-24T00:00:04.583000","2026-02-24T00:00:04.585000","2026-02-24T00:00:04.586000","2026-02-24T00:00:04.588000","2026-02-24T00:00:04.588000","2026-02-24T00:00:04.590000","2026-02-24T00:00:04.591000","2026-02-24T00:00:04.592000","2026-02-24T00:00:04.594000","2026-02-24T00:00:04.595000","2026-02-24T00:00:04.596000","2026-02-24T00:00:04.598000","2026-02-24T00:00:04.599000","2026-02-24T00:00:04.600000","2026-02-24T00:00:04.602000","2026-02-24T00:00:04.603000","2026-02-24T00:00:04.605000","2026-02-24T00:00:04.606000","2026-02-24T00:00:04.607000","2026-02-24T00:00:04.609000","2026-02-24T00:00:04.610000","2026-02-24T00:00:04.611000","2026-02-24T00:00:04.613000","2026-02-24T00:00:04.614000","2026-02-24T00:00:04.615000","2026-02-24T00:00:04.616000","2026-02-24T00:00:04.618000","2026-02-24T00:00:04.619000","2026-02-24T00:00:04.620000","2026-02-24T00:00:04.622000","2026-02-24T00:00:04.623000","2026-02-24T00:00:04.624000","2026-02-24T00:00:04.626000","2026-02-24T00:00:04.627000","2026-02-24T00:00:04.628000","2026-02-24T00:00:04.630000","2026-02-24T00:00:04.631000","2026-02-24T00:00:04.633000","2026-02-24T00:00:04.634000","2026-02-24T00:00:04.635000","2026-02-24T00:00:04.637000","2026-02-24T00:00:04.638000","2026-02-24T00:00:04.639000","2026-02-24T00:00:04.640000","2026-02-24T00:00:04.642000","2026-02-24T00:00:04.643000","2026-02-24T00:00:04.644000","2026-02-24T00:00:04.646000","2026-02-24T00:00:04.647000","2026-02-24T00:00:04.649000","2026-02-24T00:00:04.650000","2026-02-24T00:00:04.651000","2026-02-24T00:00:04.653000","2026-02-24T00:00:04.654000","2026-02-24T00:00:04.656000","2026-02-24T00:00:04.657000","2026-02-24T00:00:04.658000","2026-02-24T00:00:04.660000","2026-02-24T00:00:04.661000","2026-02-24T00:00:04.662000","2026-02-24T00:00:04.664000","2026-02-24T00:00:04.665000","2026-02-24T00:00:04.667000","2026-02-24T00:00:04.668000","2026-02-24T00:00:04.669000","2026-02-24T00:00:04.671000","2026-02-24T00:00:04.672000","2026-02-24T00:00:04.674000","2026-02-24T00:00:04.675000","2026-02-24T00:00:04.676000","2026-02-24T00:00:04.678000","2026-02-24T00:00:04.679000","2026-02-24T00:00:04.681000","2026-02-24T00:00:04.682000","2026-02-24T00:00:04.683000","2026-02-24T00:00:04.685000","2026-02-24T00:00:04.686000","2026-02-24T00:00:04.687000","2026-02-24T00:00:04.689000","2026-02-24T00:00:04.690000","2026-02-24T00:00:04.692000","2026-02-24T00:00:04.693000","2026-02-24T00:00:04.694000","2026-02-24T00:00:04.696000","2026-02-24T00:00:04.697000","2026-02-24T00:00:04.698000","2026-02-24T00:00:04.700000","2026-02-24T00:00:04.701000","2026-02-24T00:00:04.703000","2026-02-24T00:00:04.704000","2026-02-24T00:00:04.705000","2026-02-24T00:00:04.707000","2026-02-24T00:00:04.708000","2026-02-24T00:00:04.710000","2026-02-24T00:00:04.711000","2026-02-24T00:00:04.712000","2026-02-24T00:00:04.714000","2026-02-24T00:00:04.859000","2026-02-24T00:00:04.861000","2026-02-24T00:00:04.862000","2026-02-24T00:00:04.863000","2026-02-24T00:00:04.865000","2026-02-24T00:00:04.866000","2026-02-24T00:00:04.867000","2026-02-24T00:00:04.869000","2026-02-24T00:00:04.870000","2026-02-24T00:00:04.871000","2026-02-24T00:00:04.873000","2026-02-24T00:00:04.874000","2026-02-24T00:00:04.981000","2026-02-24T00:00:04.982000","2026-02-24T00:00:04.983000","2026-02-24T00:00:04.985000","2026-02-24T00:00:04.986000","2026-02-24T00:00:04.988000","2026-02-24T00:00:04.989000","2026-02-24T00:00:04.990000","2026-02-24T00:00:04.992000","2026-02-24T00:00:04.993000","2026-02-24T00:00:04.995000","2026-02-24T00:00:04.996000","2026-02-24T00:00:04.998000","2026-02-24T00:00:04.999000","2026-02-24T00:00:05.001000","2026-02-24T00:00:05.002000","2026-02-24T00:00:05.003000","2026-02-24T00:00:05.005000","2026-02-24T00:00:05.006000","2026-02-24T00:00:05.008000","2026-02-24T00:00:05.009000","2026-02-24T00:00:05.010000","2026-02-24T00:00:05.012000","2026-02-24T00:00:05.013000","2026-02-24T00:00:05.015000","2026-02-24T00:00:05.016000","2026-02-24T00:00:05.018000","2026-02-24T00:00:05.019000","2026-02-24T00:00:05.021000","2026-02-24T00:00:05.022000","2026-02-24T00:00:05.024000","2026-02-24T00:00:05.025000","2026-02-24T00:00:05.027000","2026-02-24T00:00:05.028000","2026-02-24T00:00:05.030000","2026-02-24T00:00:05.031000","2026-02-24T00:00:05.033000","2026-02-24T00:00:05.034000","2026-02-24T00:00:05.036000","2026-02-24T00:00:05.037000","2026-02-24T00:00:05.039000","2026-02-24T00:00:05.040000","2026-02-24T00:00:05.041000","2026-02-24T00:00:05.043000","2026-02-24T00:00:05.044000","2026-02-24T00:00:05.046000","2026-02-24T00:00:05.047000","2026-02-24T00:00:05.049000","2026-02-24T00:00:05.050000","2026-02-24T00:00:05.052000","2026-02-24T00:00:05.053000","2026-02-24T00:00:05.055000","2026-02-24T00:00:05.056000","2026-02-24T00:00:05.058000","2026-02-24T00:00:05.059000","2026-02-24T00:00:05.061000","2026-02-24T00:00:05.062000","2026-02-24T00:00:05.064000","2026-02-24T00:00:05.065000","2026-02-24T00:00:05.067000","2026-02-24T00:00:05.068000","2026-02-24T00:00:05.070000","2026-02-24T00:00:05.071000","2026-02-24T00:00:05.073000","2026-02-24T00:00:05.074000","2026-02-24T00:00:05.076000","2026-02-24T00:00:05.077000","2026-02-24T00:00:05.079000","2026-02-24T00:00:05.080000","2026-02-24T00:00:05.082000","2026-02-24T00:00:05.083000","2026-02-24T00:00:05.085000","2026-02-24T00:00:05.096000","2026-02-24T00:00:05.097000","2026-02-24T00:00:05.098000","2026-02-24T00:00:05.100000","2026-02-24T00:00:05.101000","2026-02-24T00:00:05.103000","2026-02-24T00:00:05.104000","2026-02-24T00:00:05.105000","2026-02-24T00:00:05.107000","2026-02-24T00:00:05.108000","2026-02-24T00:00:05.110000","2026-02-24T00:00:05.111000","2026-02-24T00:00:05.113000","2026-02-24T00:00:05.114000","2026-02-24T00:00:05.116000","2026-02-24T00:00:05.117000","2026-02-24T00:00:05.118000","2026-02-24T00:00:05.120000","2026-02-24T00:00:05.122000","2026-02-24T00:00:05.123000","2026-02-24T00:00:05.125000","2026-02-24T00:00:05.126000","2026-02-24T00:00:05.128000","2026-02-24T00:00:05.129000","2026-02-24T00:00:05.130000","2026-02-24T00:00:05.132000","2026-02-24T00:00:05.133000","2026-02-24T00:00:05.135000","2026-02-24T00:00:05.136000","2026-02-24T00:00:05.138000","2026-02-24T00:00:05.139000","2026-02-24T00:00:05.141000","2026-02-24T00:00:05.142000","2026-02-24T00:00:05.144000","2026-02-24T00:00:05.145000","2026-02-24T00:00:05.147000","2026-02-24T00:00:05.148000","2026-02-24T00:00:05.150000","2026-02-24T00:00:05.151000","2026-02-24T00:00:05.153000","2026-02-24T00:00:05.154000","2026-02-24T00:00:05.156000","2026-02-24T00:00:05.157000","2026-02-24T00:00:05.159000","2026-02-24T00:00:05.160000","2026-02-24T00:00:05.162000","2026-02-24T00:00:05.163000","2026-02-24T00:00:05.165000","2026-02-24T00:00:05.166000","2026-02-24T00:00:05.168000","2026-02-24T00:00:05.169000","2026-02-24T00:00:05.170000","2026-02-24T00:00:05.172000","2026-02-24T00:00:05.174000","2026-02-24T00:00:05.175000","2026-02-24T00:00:05.177000","2026-02-24T00:00:05.178000","2026-02-24T00:00:05.180000","2026-02-24T00:00:05.181000","2026-02-24T00:00:05.182000","2026-02-24T00:00:05.184000","2026-02-24T00:00:05.185000","2026-02-24T00:00:05.187000","2026-02-24T00:00:05.188000","2026-02-24T00:00:05.189000","2026-02-24T00:00:05.190000","2026-02-24T00:00:05.192000","2026-02-24T00:00:05.193000","2026-02-24T00:00:05.194000","2026-02-24T00:00:05.196000","2026-02-24T00:00:05.197000","2026-02-24T00:00:05.197000","2026-02-24T00:00:05.198000","2026-02-24T00:00:05.200000","2026-02-24T00:00:05.201000","2026-02-24T00:00:05.202000","2026-02-24T00:00:05.203000","2026-02-24T00:00:05.204000","2026-02-24T00:00:05.205000","2026-02-24T00:00:05.206000","2026-02-24T00:00:05.207000","2026-02-24T00:00:05.209000","2026-02-24T00:00:05.210000","2026-02-24T00:00:05.211000","2026-02-24T00:00:05.212000","2026-02-24T00:00:05.213000","2026-02-24T00:00:05.214000","2026-02-24T00:00:05.215000","2026-02-24T00:00:05.217000","2026-02-24T00:00:05.226000","2026-02-24T00:00:05.227000","2026-02-24T00:00:05.228000","2026-02-24T00:00:05.229000","2026-02-24T00:00:05.230000","2026-02-24T00:00:05.232000","2026-02-24T00:00:05.233000","2026-02-24T00:00:05.234000","2026-02-24T00:00:05.236000","2026-02-24T00:00:05.237000","2026-02-24T00:00:05.238000","2026-02-24T00:00:05.239000","2026-02-24T00:00:05.241000","2026-02-24T00:00:05.242000","2026-02-24T00:00:05.243000","2026-02-24T00:00:05.245000","2026-02-24T00:00:05.246000","2026-02-24T00:00:05.247000","2026-02-24T00:00:05.249000","2026-02-24T00:00:05.250000","2026-02-24T00:00:05.251000","2026-02-24T00:00:05.252000","2026-02-24T00:00:05.253000","2026-02-24T00:00:05.255000","2026-02-24T00:00:05.256000","2026-02-24T00:00:05.257000","2026-02-24T00:00:05.259000","2026-02-24T00:00:05.260000","2026-02-24T00:00:05.261000","2026-02-24T00:00:05.262000","2026-02-24T00:00:05.264000","2026-02-24T00:00:05.265000","2026-02-24T00:00:05.266000","2026-02-24T00:00:05.268000","2026-02-24T00:00:05.269000","2026-02-24T00:00:05.271000","2026-02-24T00:00:05.272000","2026-02-24T00:00:05.273000","2026-02-24T00:00:05.274000","2026-02-24T00:00:05.275000","2026-02-24T00:00:05.276000","2026-02-24T00:00:05.277000","2026-02-24T00:00:05.279000","2026-02-24T00:00:05.280000","2026-02-24T00:00:05.281000","2026-02-24T00:00:05.282000","2026-02-24T00:00:05.283000","2026-02-24T00:00:05.284000","2026-02-24T00:00:05.285000","2026-02-24T00:00:05.286000","2026-02-24T00:00:05.288000","2026-02-24T00:00:05.289000","2026-02-24T00:00:05.290000","2026-02-24T00:00:05.291000","2026-02-24T00:00:05.292000","2026-02-24T00:00:05.293000","2026-02-24T00:00:05.294000","2026-02-24T00:00:05.024000","2026-02-24T00:00:05.025000","2026-02-24T00:00:05.027000","2026-02-24T00:00:05.028000","2026-02-24T00:00:05.030000","2026-02-24T00:00:05.031000","2026-02-24T00:00:05.033000","2026-02-24T00:00:05.034000","2026-02-24T00:00:05.036000","2026-02-24T00:00:05.037000","2026-02-24T00:00:05.039000","2026-02-24T00:00:05.040000","2026-02-24T00:00:05.041000","2026-02-24T00:00:05.043000","2026-02-24T00:00:05.044000","2026-02-24T00:00:05.046000","2026-02-24T00:00:05.047000","2026-02-24T00:00:05.049000","2026-02-24T00:00:05.050000","2026-02-24T00:00:05.052000","2026-02-24T00:00:05.053000","2026-02-24T00:00:05.055000","2026-02-24T00:00:05.056000","2026-02-24T00:00:05.058000","2026-02-24T00:00:05.059000","2026-02-24T00:00:05.061000","2026-02-24T00:00:05.062000","2026-02-24T00:00:05.064000","2026-02-24T00:00:05.065000","2026-02-24T00:00:05.067000","2026-02-24T00:00:05.068000","2026-02-24T00:00:05.070000","2026-02-24T00:00:05.071000","2026-02-24T00:00:05.073000","2026-02-24T00:00:05.074000","2026-02-24T00:00:05.076000","2026-02-24T00:00:05.077000","2026-02-24T00:00:05.079000","2026-02-24T00:00:05.080000","2026-02-24T00:00:05.082000","2026-02-24T00:00:05.083000","2026-02-24T00:00:05.085000","2026-02-24T00:00:05.096000","2026-02-24T00:00:05.097000","2026-02-24T00:00:05.098000","2026-02-24T00:00:05.100000","2026-02-24T00:00:05.101000","2026-02-24T00:00:05.103000","2026-02-24T00:00:05.104000","2026-02-24T00:00:05.105000","2026-02-24T00:00:05.107000","2026-02-24T00:00:05.108000","2026-02-24T00:00:05.110000","2026-02-24T00:00:05.111000","2026-02-24T00:00:05.113000","2026-02-24T00:00:05.114000","2026-02-24T00:00:05.116000","2026-02-24T00:00:05.117000","2026-02-24T00:00:05.119000","2026-02-24T00:00:05.120000","2026-02-24T00:00:05.122000","2026-02-24T00:00:05.123000","2026-02-24T00:00:05.125000","2026-02-24T00:00:05.126000","2026-02-24T00:00:05.128000","2026-02-24T00:00:05.129000","2026-02-24T00:00:05.130000","2026-02-24T00:00:05.132000","2026-02-24T00:00:05.133000","2026-02-24T00:00:05.135000","2026-02-24T00:00:05.136000","2026-02-24T00:00:05.138000","2026-02-24T00:00:05.139000","2026-02-24T00:00:05.141000","2026-02-24T00:00:05.142000","2026-02-24T00:00:05.144000","2026-02-24T00:00:05.145000","2026-02-24T00:00:05.147000","2026-02-24T00:00:05.148000","2026-02-24T00:00:05.150000","2026-02-24T00:00:05.151000","2026-02-24T00:00:05.153000","2026-02-24T00:00:05.154000","2026-02-24T00:00:05.156000","2026-02-24T00:00:05.157000","2026-02-24T00:00:05.159000","2026-02-24T00:00:05.160000","2026-02-24T00:00:05.162000","2026-02-24T00:00:05.163000","2026-02-24T00:00:05.165000","2026-02-24T00:00:05.166000","2026-02-24T00:00:05.168000","2026-02-24T00:00:05.169000","2026-02-24T00:00:05.171000","2026-02-24T00:00:05.172000","2026-02-24T00:00:05.174000","2026-02-24T00:00:05.175000","2026-02-24T00:00:05.177000","2026-02-24T00:00:05.178000","2026-02-24T00:00:05.180000","2026-02-24T00:00:05.181000","2026-02-24T00:00:05.182000","2026-02-24T00:00:05.184000","2026-02-24T00:00:05.185000","2026-02-24T00:00:05.187000","2026-02-24T00:00:05.188000","2026-02-24T00:00:05.189000","2026-02-24T00:00:05.190000","2026-02-24T00:00:05.192000","2026-02-24T00:00:05.193000","2026-02-24T00:00:05.194000","2026-02-24T00:00:05.196000","2026-02-24T00:00:05.197000","2026-02-24T00:00:05.197000","2026-02-24T00:00:05.199000","2026-02-24T00:00:05.200000","2026-02-24T00:00:05.201000","2026-02-24T00:00:05.202000","2026-02-24T00:00:05.203000","2026-02-24T00:00:05.204000","2026-02-24T00:00:05.205000","2026-02-24T00:00:05.206000","2026-02-24T00:00:05.207000","2026-02-24T00:00:05.209000","2026-02-24T00:00:05.210000","2026-02-24T00:00:05.211000","2026-02-24T00:00:05.212000","2026-02-24T00:00:05.213000","2026-02-24T00:00:05.214000","2026-02-24T00:00:05.216000","2026-02-24T00:00:05.217000","2026-02-24T00:00:05.226000","2026-02-24T00:00:05.227000","2026-02-24T00:00:05.228000","2026-02-24T00:00:05.229000","2026-02-24T00:00:05.231000","2026-02-24T00:00:05.232000","2026-02-24T00:00:05.233000","2026-02-24T00:00:05.234000","2026-02-24T00:00:05.236000","2026-02-24T00:00:05.237000","2026-02-24T00:00:05.238000","2026-02-24T00:00:05.239000","2026-02-24T00:00:05.241000","2026-02-24T00:00:05.242000","2026-02-24T00:00:05.243000","2026-02-24T00:00:05.245000","2026-02-24T00:00:05.246000","2026-02-24T00:00:05.248000","2026-02-24T00:00:05.249000","2026-02-24T00:00:05.250000","2026-02-24T00:00:05.251000","2026-02-24T00:00:05.252000","2026-02-24T00:00:05.254000","2026-02-24T00:00:05.255000","2026-02-24T00:00:05.256000","2026-02-24T00:00:05.257000","2026-02-24T00:00:05.259000","2026-02-24T00:00:05.260000","2026-02-24T00:00:05.261000","2026-02-24T00:00:05.263000","2026-02-24T00:00:05.264000","2026-02-24T00:00:05.265000","2026-02-24T00:00:05.266000","2026-02-24T00:00:05.270000","2026-02-24T00:00:05.271000","2026-02-24T00:00:05.272000","2026-02-24T00:00:05.273000","2026-02-24T00:00:05.275000","2026-02-24T00:00:05.275000","2026-02-24T00:00:05.276000","2026-02-24T00:00:05.278000","2026-02-24T00:00:05.279000","2026-02-24T00:00:05.280000","2026-02-24T00:00:05.281000","2026-02-24T00:00:05.282000","2026-02-24T00:00:05.283000","2026-02-24T00:00:05.284000","2026-02-24T00:00:05.285000","2026-02-24T00:00:05.287000","2026-02-24T00:00:05.288000","2026-02-24T00:00:05.289000","2026-02-24T00:00:05.096000","2026-02-24T00:00:05.097000","2026-02-24T00:00:05.098000","2026-02-24T00:00:05.100000","2026-02-24T00:00:05.101000","2026-02-24T00:00:05.103000","2026-02-24T00:00:05.104000","2026-02-24T00:00:05.105000","2026-02-24T00:00:05.107000","2026-02-24T00:00:05.108000","2026-02-24T00:00:05.110000","2026-02-24T00:00:05.111000","2026-02-24T00:00:05.113000","2026-02-24T00:00:05.114000","2026-02-24T00:00:05.116000","2026-02-24T00:00:05.226000","2026-02-24T00:00:05.227000","2026-02-24T00:00:05.228000","2026-02-24T00:00:05.229000","2026-02-24T00:00:05.231000","2026-02-24T00:00:05.232000","2026-02-24T00:00:05.233000","2026-02-24T00:00:05.234000","2026-02-24T00:00:05.236000","2026-02-24T00:00:05.237000","2026-02-24T00:00:05.238000","2026-02-24T00:00:05.239000","2026-02-24T00:00:05.241000","2026-02-24T00:00:05.242000","2026-02-24T00:00:05.243000","2026-02-24T00:00:05.245000","2026-02-24T00:00:05.246000","2026-02-24T00:00:05.248000","2026-02-24T00:00:05.249000","2026-02-24T00:00:05.250000","2026-02-24T00:00:05.251000","2026-02-24T00:00:05.252000","2026-02-24T00:00:05.254000","2026-02-24T00:00:05.255000","2026-02-24T00:00:05.256000","2026-02-24T00:00:05.257000","2026-02-24T00:00:05.259000","2026-02-24T00:00:05.260000","2026-02-24T00:00:05.261000","2026-02-24T00:00:05.262000","2026-02-24T00:00:05.264000","2026-02-24T00:00:05.265000","2026-02-24T00:00:05.266000","2026-02-24T00:00:05.268000","2026-02-24T00:00:05.227000","2026-02-24T00:00:05.228000","2026-02-24T00:00:05.229000","2026-02-24T00:00:05.231000","2026-02-24T00:00:05.232000","2026-02-24T00:00:05.233000","2026-02-24T00:00:05.234000","2026-02-24T00:00:05.236000","2026-02-24T00:00:05.237000","2026-02-24T00:00:05.238000","2026-02-24T00:00:05.239000","2026-02-24T00:00:05.241000","2026-02-24T00:00:05.242000","2026-02-24T00:00:05.243000","2026-02-24T00:00:05.245000","2026-02-24T00:00:05.246000","2026-02-24T00:00:05.248000","2026-02-24T00:00:05.249000","2026-02-24T00:00:05.250000","2026-02-24T00:00:05.251000","2026-02-24T00:00:05.252000","2026-02-24T00:00:05.254000","2026-02-24T00:00:05.255000","2026-02-24T00:00:05.256000","2026-02-24T00:00:05.257000","2026-02-24T00:00:05.259000","2026-02-24T00:00:05.260000","2026-02-24T00:00:05.261000","2026-02-24T00:00:05.262000","2026-02-24T00:00:05.264000","2026-02-24T00:00:05.265000","2026-02-24T00:00:05.266000","2026-02-24T00:00:05.270000","2026-02-24T00:00:05.271000","2026-02-24T00:00:05.272000","2026-02-24T00:00:05.273000","2026-02-24T00:00:05.275000","2026-02-24T00:00:05.275000","2026-02-24T00:00:05.276000","2026-02-24T00:00:05.277000","2026-02-24T00:00:05.279000","2026-02-24T00:00:05.280000","2026-02-24T00:00:05.281000","2026-02-24T00:00:05.282000","2026-02-24T00:00:05.283000","2026-02-24T00:00:05.284000","2026-02-24T00:00:05.285000","2026-02-24T00:00:05.286000","2026-02-24T00:00:05.288000","2026-02-24T00:00:05.289000","2026-02-24T00:00:05.290000","2026-02-24T00:00:05.291000","2026-02-24T00:00:05.292000","2026-02-24T00:00:05.293000","2026-02-24T00:00:05.294000","2026-02-24T00:00:05.296000","2026-02-24T00:00:05.296000","2026-02-24T00:00:05.297000","2026-02-24T00:00:05.298000","2026-02-24T00:00:05.299000","2026-02-24T00:00:05.300000","2026-02-24T00:00:05.301000","2026-02-24T00:00:05.302000","2026-02-24T00:00:05.303000","2026-02-24T00:00:05.305000","2026-02-24T00:00:05.306000","2026-02-24T00:00:05.307000","2026-02-24T00:00:05.308000","2026-02-24T00:00:05.309000","2026-02-24T00:00:05.310000","2026-02-24T00:00:05.311000","2026-02-24T00:00:05.312000","2026-02-24T00:00:05.313000","2026-02-24T00:00:05.314000","2026-02-24T00:00:05.315000","2026-02-24T00:00:05.316000","2026-02-24T00:00:05.317000","2026-02-24T00:00:05.318000","2026-02-24T00:00:05.319000","2026-02-24T00:00:05.320000","2026-02-24T00:00:05.321000","2026-02-24T00:00:05.322000","2026-02-24T00:00:05.323000","2026-02-24T00:00:05.324000","2026-02-24T00:00:05.325000","2026-02-24T00:00:05.326000","2026-02-24T00:00:05.327000","2026-02-24T00:00:05.328000","2026-02-24T00:00:05.329000","2026-02-24T00:00:05.330000","2026-02-24T00:00:05.331000","2026-02-24T00:00:05.332000","2026-02-24T00:00:05.333000","2026-02-24T00:00:05.334000","2026-02-24T00:00:05.335000","2026-02-24T00:00:05.337000","2026-02-24T00:00:05.337000","2026-02-24T00:00:05.338000","2026-02-24T00:00:05.339000","2026-02-24T00:00:05.343000","2026-02-24T00:00:05.344000","2026-02-24T00:00:05.344000","2026-02-24T00:00:05.345000","2026-02-24T00:00:05.346000","2026-02-24T00:00:05.347000","2026-02-24T00:00:05.349000","2026-02-24T00:00:05.350000","2026-02-24T00:00:05.351000","2026-02-24T00:00:05.352000","2026-02-24T00:00:05.353000","2026-02-24T00:00:05.354000","2026-02-24T00:00:05.355000","2026-02-24T00:00:05.356000","2026-02-24T00:00:05.357000","2026-02-24T00:00:05.358000","2026-02-24T00:00:05.359000","2026-02-24T00:00:05.360000","2026-02-24T00:00:05.361000","2026-02-24T00:00:05.363000","2026-02-24T00:00:05.364000","2026-02-24T00:00:05.365000","2026-02-24T00:00:05.366000","2026-02-24T00:00:05.367000","2026-02-24T00:00:05.368000","2026-02-24T00:00:05.369000","2026-02-24T00:00:05.370000","2026-02-24T00:00:05.371000","2026-02-24T00:00:05.373000","2026-02-24T00:00:05.374000","2026-02-24T00:00:05.375000","2026-02-24T00:00:05.376000","2026-02-24T00:00:05.377000","2026-02-24T00:00:05.378000","2026-02-24T00:00:05.379000","2026-02-24T00:00:05.380000","2026-02-24T00:00:05.381000","2026-02-24T00:00:05.383000","2026-02-24T00:00:05.384000","2026-02-24T00:00:05.385000","2026-02-24T00:00:05.386000","2026-02-24T00:00:05.387000","2026-02-24T00:00:05.388000","2026-02-24T00:00:05.389000","2026-02-24T00:00:05.390000","2026-02-24T00:00:05.391000","2026-02-24T00:00:05.393000","2026-02-24T00:00:05.394000","2026-02-24T00:00:05.395000","2026-02-24T00:00:05.396000","2026-02-24T00:00:05.397000","2026-02-24T00:00:05.398000","2026-02-24T00:00:05.399000","2026-02-24T00:00:05.400000","2026-02-24T00:00:05.402000","2026-02-24T00:00:05.403000","2026-02-24T00:00:05.404000","2026-02-24T00:00:05.405000","2026-02-24T00:00:05.406000","2026-02-24T00:00:05.407000","2026-02-24T00:00:05.408000","2026-02-24T00:00:05.409000","2026-02-24T00:00:05.410000","2026-02-24T00:00:05.412000","2026-02-24T00:00:05.413000","2026-02-24T00:00:05.414000","2026-02-24T00:00:05.415000","2026-02-24T00:00:05.416000","2026-02-24T00:00:05.417000","2026-02-24T00:00:05.418000","2026-02-24T00:00:05.419000","2026-02-24T00:00:05.421000","2026-02-24T00:00:05.422000","2026-02-24T00:00:05.423000","2026-02-24T00:00:05.424000","2026-02-24T00:00:05.425000","2026-02-24T00:00:05.426000","2026-02-24T00:00:05.427000","2026-02-24T00:00:05.428000","2026-02-24T00:00:05.429000","2026-02-24T00:00:05.431000","2026-02-24T00:00:05.432000","2026-02-24T00:00:05.433000","2026-02-24T00:00:05.434000","2026-02-24T00:00:05.435000","2026-02-24T00:00:05.436000","2026-02-24T00:00:05.437000","2026-02-24T00:00:05.438000","2026-02-24T00:00:05.439000","2026-02-24T00:00:05.441000","2026-02-24T00:00:05.442000","2026-02-24T00:00:05.443000","2026-02-24T00:00:05.444000","2026-02-24T00:00:05.445000","2026-02-24T00:00:05.446000","2026-02-24T00:00:05.447000","2026-02-24T00:00:05.448000","2026-02-24T00:00:05.450000","2026-02-24T00:00:05.451000","2026-02-24T00:00:05.452000","2026-02-24T00:00:05.453000","2026-02-24T00:00:05.454000","2026-02-24T00:00:05.455000","2026-02-24T00:00:05.456000","2026-02-24T00:00:05.458000","2026-02-24T00:00:05.461000","2026-02-24T00:00:05.470000","2026-02-24T00:00:05.471000","2026-02-24T00:00:05.472000","2026-02-24T00:00:05.473000","2026-02-24T00:00:05.474000","2026-02-24T00:00:05.475000","2026-02-24T00:00:05.477000","2026-02-24T00:00:05.478000","2026-02-24T00:00:05.479000","2026-02-24T00:00:05.480000","2026-02-24T00:00:05.481000","2026-02-24T00:00:05.483000","2026-02-24T00:00:05.483000","2026-02-24T00:00:05.485000","2026-02-24T00:00:05.486000","2026-02-24T00:00:05.491000","2026-02-24T00:00:05.492000","2026-02-24T00:00:05.493000","2026-02-24T00:00:05.494000","2026-02-24T00:00:05.495000","2026-02-24T00:00:05.497000","2026-02-24T00:00:05.498000","2026-02-24T00:00:05.499000","2026-02-24T00:00:05.500000","2026-02-24T00:00:05.501000","2026-02-24T00:00:05.503000","2026-02-24T00:00:05.504000","2026-02-24T00:00:05.505000","2026-02-24T00:00:05.506000","2026-02-24T00:00:05.507000","2026-02-24T00:00:05.508000","2026-02-24T00:00:05.510000","2026-02-24T00:00:05.511000","2026-02-24T00:00:05.512000","2026-02-24T00:00:05.513000","2026-02-24T00:00:05.514000","2026-02-24T00:00:05.515000","2026-02-24T00:00:05.517000","2026-02-24T00:00:05.518000","2026-02-24T00:00:05.519000","2026-02-24T00:00:05.520000","2026-02-24T00:00:05.521000","2026-02-24T00:00:05.523000","2026-02-24T00:00:05.524000","2026-02-24T00:00:05.525000","2026-02-24T00:00:05.526000","2026-02-24T00:00:05.527000","2026-02-24T00:00:05.528000","2026-02-24T00:00:05.529000","2026-02-24T00:00:05.531000","2026-02-24T00:00:05.532000","2026-02-24T00:00:05.533000","2026-02-24T00:00:05.534000","2026-02-24T00:00:05.535000","2026-02-24T00:00:05.537000","2026-02-24T00:00:05.538000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.541000","2026-02-24T00:00:05.543000","2026-02-24T00:00:05.544000","2026-02-24T00:00:05.545000","2026-02-24T00:00:05.546000","2026-02-24T00:00:05.547000","2026-02-24T00:00:05.344000","2026-02-24T00:00:05.344000","2026-02-24T00:00:05.345000","2026-02-24T00:00:05.346000","2026-02-24T00:00:05.348000","2026-02-24T00:00:05.349000","2026-02-24T00:00:05.350000","2026-02-24T00:00:05.351000","2026-02-24T00:00:05.352000","2026-02-24T00:00:05.353000","2026-02-24T00:00:05.354000","2026-02-24T00:00:05.355000","2026-02-24T00:00:05.356000","2026-02-24T00:00:05.357000","2026-02-24T00:00:05.358000","2026-02-24T00:00:05.359000","2026-02-24T00:00:05.360000","2026-02-24T00:00:05.361000","2026-02-24T00:00:05.363000","2026-02-24T00:00:05.364000","2026-02-24T00:00:05.365000","2026-02-24T00:00:05.366000","2026-02-24T00:00:05.367000","2026-02-24T00:00:05.368000","2026-02-24T00:00:05.369000","2026-02-24T00:00:05.370000","2026-02-24T00:00:05.371000","2026-02-24T00:00:05.373000","2026-02-24T00:00:05.374000","2026-02-24T00:00:05.375000","2026-02-24T00:00:05.376000","2026-02-24T00:00:05.377000","2026-02-24T00:00:05.378000","2026-02-24T00:00:05.379000","2026-02-24T00:00:05.380000","2026-02-24T00:00:05.381000","2026-02-24T00:00:05.383000","2026-02-24T00:00:05.384000","2026-02-24T00:00:05.385000","2026-02-24T00:00:05.386000","2026-02-24T00:00:05.387000","2026-02-24T00:00:05.388000","2026-02-24T00:00:05.389000","2026-02-24T00:00:05.390000","2026-02-24T00:00:05.392000","2026-02-24T00:00:05.393000","2026-02-24T00:00:05.394000","2026-02-24T00:00:05.395000","2026-02-24T00:00:05.396000","2026-02-24T00:00:05.397000","2026-02-24T00:00:05.398000","2026-02-24T00:00:05.399000","2026-02-24T00:00:05.400000","2026-02-24T00:00:05.402000","2026-02-24T00:00:05.403000","2026-02-24T00:00:05.404000","2026-02-24T00:00:05.405000","2026-02-24T00:00:05.406000","2026-02-24T00:00:05.407000","2026-02-24T00:00:05.408000","2026-02-24T00:00:05.409000","2026-02-24T00:00:05.411000","2026-02-24T00:00:05.412000","2026-02-24T00:00:05.413000","2026-02-24T00:00:05.414000","2026-02-24T00:00:05.415000","2026-02-24T00:00:05.416000","2026-02-24T00:00:05.417000","2026-02-24T00:00:05.418000","2026-02-24T00:00:05.419000","2026-02-24T00:00:05.421000","2026-02-24T00:00:05.422000","2026-02-24T00:00:05.423000","2026-02-24T00:00:05.424000","2026-02-24T00:00:05.425000","2026-02-24T00:00:05.426000","2026-02-24T00:00:05.427000","2026-02-24T00:00:05.428000","2026-02-24T00:00:05.430000","2026-02-24T00:00:05.431000","2026-02-24T00:00:05.432000","2026-02-24T00:00:05.433000","2026-02-24T00:00:05.434000","2026-02-24T00:00:05.435000","2026-02-24T00:00:05.436000","2026-02-24T00:00:05.437000","2026-02-24T00:00:05.438000","2026-02-24T00:00:05.440000","2026-02-24T00:00:05.441000","2026-02-24T00:00:05.442000","2026-02-24T00:00:05.443000","2026-02-24T00:00:05.444000","2026-02-24T00:00:05.445000","2026-02-24T00:00:05.446000","2026-02-24T00:00:05.447000","2026-02-24T00:00:05.448000","2026-02-24T00:00:05.450000","2026-02-24T00:00:05.451000","2026-02-24T00:00:05.452000","2026-02-24T00:00:05.453000","2026-02-24T00:00:05.454000","2026-02-24T00:00:05.455000","2026-02-24T00:00:05.456000","2026-02-24T00:00:05.458000","2026-02-24T00:00:05.461000","2026-02-24T00:00:05.470000","2026-02-24T00:00:05.471000","2026-02-24T00:00:05.472000","2026-02-24T00:00:05.473000","2026-02-24T00:00:05.474000","2026-02-24T00:00:05.475000","2026-02-24T00:00:05.477000","2026-02-24T00:00:05.478000","2026-02-24T00:00:05.479000","2026-02-24T00:00:05.480000","2026-02-24T00:00:05.481000","2026-02-24T00:00:05.483000","2026-02-24T00:00:05.484000","2026-02-24T00:00:05.485000","2026-02-24T00:00:05.486000","2026-02-24T00:00:05.491000","2026-02-24T00:00:05.492000","2026-02-24T00:00:05.493000","2026-02-24T00:00:05.494000","2026-02-24T00:00:05.496000","2026-02-24T00:00:05.497000","2026-02-24T00:00:05.498000","2026-02-24T00:00:05.499000","2026-02-24T00:00:05.500000","2026-02-24T00:00:05.501000","2026-02-24T00:00:05.503000","2026-02-24T00:00:05.504000","2026-02-24T00:00:05.505000","2026-02-24T00:00:05.506000","2026-02-24T00:00:05.507000","2026-02-24T00:00:05.508000","2026-02-24T00:00:05.510000","2026-02-24T00:00:05.511000","2026-02-24T00:00:05.512000","2026-02-24T00:00:05.513000","2026-02-24T00:00:05.514000","2026-02-24T00:00:05.515000","2026-02-24T00:00:05.517000","2026-02-24T00:00:05.518000","2026-02-24T00:00:05.519000","2026-02-24T00:00:05.520000","2026-02-24T00:00:05.521000","2026-02-24T00:00:05.523000","2026-02-24T00:00:05.524000","2026-02-24T00:00:05.525000","2026-02-24T00:00:05.526000","2026-02-24T00:00:05.527000","2026-02-24T00:00:05.528000","2026-02-24T00:00:05.529000","2026-02-24T00:00:05.531000","2026-02-24T00:00:05.532000","2026-02-24T00:00:05.533000","2026-02-24T00:00:05.534000","2026-02-24T00:00:05.535000","2026-02-24T00:00:05.537000","2026-02-24T00:00:05.538000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.541000","2026-02-24T00:00:05.543000","2026-02-24T00:00:05.544000","2026-02-24T00:00:05.545000","2026-02-24T00:00:05.546000","2026-02-24T00:00:05.547000","2026-02-24T00:00:05.549000","2026-02-24T00:00:05.551000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.553000","2026-02-24T00:00:05.555000","2026-02-24T00:00:05.556000","2026-02-24T00:00:05.557000","2026-02-24T00:00:05.558000","2026-02-24T00:00:05.559000","2026-02-24T00:00:05.561000","2026-02-24T00:00:05.562000","2026-02-24T00:00:05.563000","2026-02-24T00:00:05.564000","2026-02-24T00:00:05.565000","2026-02-24T00:00:05.566000","2026-02-24T00:00:05.568000","2026-02-24T00:00:05.569000","2026-02-24T00:00:05.570000","2026-02-24T00:00:05.571000","2026-02-24T00:00:05.572000","2026-02-24T00:00:05.573000","2026-02-24T00:00:05.575000","2026-02-24T00:00:05.576000","2026-02-24T00:00:05.577000","2026-02-24T00:00:05.578000","2026-02-24T00:00:05.579000","2026-02-24T00:00:05.581000","2026-02-24T00:00:05.471000","2026-02-24T00:00:05.472000","2026-02-24T00:00:05.473000","2026-02-24T00:00:05.474000","2026-02-24T00:00:05.475000","2026-02-24T00:00:05.476000","2026-02-24T00:00:05.478000","2026-02-24T00:00:05.479000","2026-02-24T00:00:05.480000","2026-02-24T00:00:05.481000","2026-02-24T00:00:05.483000","2026-02-24T00:00:05.483000","2026-02-24T00:00:05.485000","2026-02-24T00:00:05.486000","2026-02-24T00:00:05.491000","2026-02-24T00:00:05.492000","2026-02-24T00:00:05.493000","2026-02-24T00:00:05.494000","2026-02-24T00:00:05.495000","2026-02-24T00:00:05.497000","2026-02-24T00:00:05.498000","2026-02-24T00:00:05.499000","2026-02-24T00:00:05.500000","2026-02-24T00:00:05.501000","2026-02-24T00:00:05.503000","2026-02-24T00:00:05.504000","2026-02-24T00:00:05.505000","2026-02-24T00:00:05.506000","2026-02-24T00:00:05.507000","2026-02-24T00:00:05.508000","2026-02-24T00:00:05.509000","2026-02-24T00:00:05.511000","2026-02-24T00:00:05.512000","2026-02-24T00:00:05.513000","2026-02-24T00:00:05.514000","2026-02-24T00:00:05.515000","2026-02-24T00:00:05.517000","2026-02-24T00:00:05.518000","2026-02-24T00:00:05.519000","2026-02-24T00:00:05.520000","2026-02-24T00:00:05.521000","2026-02-24T00:00:05.522000","2026-02-24T00:00:05.524000","2026-02-24T00:00:05.525000","2026-02-24T00:00:05.526000","2026-02-24T00:00:05.527000","2026-02-24T00:00:05.528000","2026-02-24T00:00:05.529000","2026-02-24T00:00:05.531000","2026-02-24T00:00:05.532000","2026-02-24T00:00:05.533000","2026-02-24T00:00:05.534000","2026-02-24T00:00:05.535000","2026-02-24T00:00:05.536000","2026-02-24T00:00:05.538000","2026-02-24T00:00:05.539000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.541000","2026-02-24T00:00:05.542000","2026-02-24T00:00:05.544000","2026-02-24T00:00:05.545000","2026-02-24T00:00:05.546000","2026-02-24T00:00:05.547000","2026-02-24T00:00:05.550000","2026-02-24T00:00:05.551000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.553000","2026-02-24T00:00:05.554000","2026-02-24T00:00:05.556000","2026-02-24T00:00:05.557000","2026-02-24T00:00:05.558000","2026-02-24T00:00:05.559000","2026-02-24T00:00:05.560000","2026-02-24T00:00:05.562000","2026-02-24T00:00:05.563000","2026-02-24T00:00:05.564000","2026-02-24T00:00:05.565000","2026-02-24T00:00:05.566000","2026-02-24T00:00:05.568000","2026-02-24T00:00:05.569000","2026-02-24T00:00:05.570000","2026-02-24T00:00:05.571000","2026-02-24T00:00:05.572000","2026-02-24T00:00:05.573000","2026-02-24T00:00:05.575000","2026-02-24T00:00:05.576000","2026-02-24T00:00:05.577000","2026-02-24T00:00:05.578000","2026-02-24T00:00:05.579000","2026-02-24T00:00:05.580000","2026-02-24T00:00:05.582000","2026-02-24T00:00:05.583000","2026-02-24T00:00:05.584000","2026-02-24T00:00:05.585000","2026-02-24T00:00:05.586000","2026-02-24T00:00:05.588000","2026-02-24T00:00:05.589000","2026-02-24T00:00:05.590000","2026-02-24T00:00:05.591000","2026-02-24T00:00:05.595000","2026-02-24T00:00:05.597000","2026-02-24T00:00:05.599000","2026-02-24T00:00:05.601000","2026-02-24T00:00:05.602000","2026-02-24T00:00:05.603000","2026-02-24T00:00:05.604000","2026-02-24T00:00:05.606000","2026-02-24T00:00:05.607000","2026-02-24T00:00:05.608000","2026-02-24T00:00:05.609000","2026-02-24T00:00:05.610000","2026-02-24T00:00:05.611000","2026-02-24T00:00:05.613000","2026-02-24T00:00:05.614000","2026-02-24T00:00:05.615000","2026-02-24T00:00:05.616000","2026-02-24T00:00:05.620000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.624000","2026-02-24T00:00:05.625000","2026-02-24T00:00:05.626000","2026-02-24T00:00:05.628000","2026-02-24T00:00:05.629000","2026-02-24T00:00:05.630000","2026-02-24T00:00:05.632000","2026-02-24T00:00:05.633000","2026-02-24T00:00:05.634000","2026-02-24T00:00:05.636000","2026-02-24T00:00:05.637000","2026-02-24T00:00:05.639000","2026-02-24T00:00:05.640000","2026-02-24T00:00:05.641000","2026-02-24T00:00:05.643000","2026-02-24T00:00:05.644000","2026-02-24T00:00:05.646000","2026-02-24T00:00:05.647000","2026-02-24T00:00:05.648000","2026-02-24T00:00:05.650000","2026-02-24T00:00:05.651000","2026-02-24T00:00:05.652000","2026-02-24T00:00:05.654000","2026-02-24T00:00:05.655000","2026-02-24T00:00:05.657000","2026-02-24T00:00:05.658000","2026-02-24T00:00:05.659000","2026-02-24T00:00:05.661000","2026-02-24T00:00:05.662000","2026-02-24T00:00:05.664000","2026-02-24T00:00:05.665000","2026-02-24T00:00:05.666000","2026-02-24T00:00:05.668000","2026-02-24T00:00:05.669000","2026-02-24T00:00:05.671000","2026-02-24T00:00:05.673000","2026-02-24T00:00:05.674000","2026-02-24T00:00:05.675000","2026-02-24T00:00:05.677000","2026-02-24T00:00:05.678000","2026-02-24T00:00:05.680000","2026-02-24T00:00:05.681000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.683000","2026-02-24T00:00:05.685000","2026-02-24T00:00:05.687000","2026-02-24T00:00:05.688000","2026-02-24T00:00:05.690000","2026-02-24T00:00:05.691000","2026-02-24T00:00:05.692000","2026-02-24T00:00:05.693000","2026-02-24T00:00:05.695000","2026-02-24T00:00:05.696000","2026-02-24T00:00:05.697000","2026-02-24T00:00:05.699000","2026-02-24T00:00:05.700000","2026-02-24T00:00:05.701000","2026-02-24T00:00:05.703000","2026-02-24T00:00:05.704000","2026-02-24T00:00:05.706000","2026-02-24T00:00:05.707000","2026-02-24T00:00:05.708000","2026-02-24T00:00:05.710000","2026-02-24T00:00:05.711000","2026-02-24T00:00:05.712000","2026-02-24T00:00:05.714000","2026-02-24T00:00:05.715000","2026-02-24T00:00:05.717000","2026-02-24T00:00:05.718000","2026-02-24T00:00:05.719000","2026-02-24T00:00:05.721000","2026-02-24T00:00:05.729000","2026-02-24T00:00:05.730000","2026-02-24T00:00:05.732000","2026-02-24T00:00:05.733000","2026-02-24T00:00:05.735000","2026-02-24T00:00:05.736000","2026-02-24T00:00:05.738000","2026-02-24T00:00:05.739000","2026-02-24T00:00:05.740000","2026-02-24T00:00:05.741000","2026-02-24T00:00:05.743000","2026-02-24T00:00:05.744000","2026-02-24T00:00:05.745000","2026-02-24T00:00:05.747000","2026-02-24T00:00:05.748000","2026-02-24T00:00:05.749000","2026-02-24T00:00:05.751000","2026-02-24T00:00:05.758000","2026-02-24T00:00:05.759000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.762000","2026-02-24T00:00:05.763000","2026-02-24T00:00:05.764000","2026-02-24T00:00:05.766000","2026-02-24T00:00:05.767000","2026-02-24T00:00:05.769000","2026-02-24T00:00:05.770000","2026-02-24T00:00:05.771000","2026-02-24T00:00:05.773000","2026-02-24T00:00:05.774000","2026-02-24T00:00:05.776000","2026-02-24T00:00:05.777000","2026-02-24T00:00:05.778000","2026-02-24T00:00:05.780000","2026-02-24T00:00:05.781000","2026-02-24T00:00:05.782000","2026-02-24T00:00:05.784000","2026-02-24T00:00:05.785000","2026-02-24T00:00:05.787000","2026-02-24T00:00:05.788000","2026-02-24T00:00:05.789000","2026-02-24T00:00:05.791000","2026-02-24T00:00:05.792000","2026-02-24T00:00:05.793000","2026-02-24T00:00:05.795000","2026-02-24T00:00:05.796000","2026-02-24T00:00:05.798000","2026-02-24T00:00:05.799000","2026-02-24T00:00:05.800000","2026-02-24T00:00:05.802000","2026-02-24T00:00:05.803000","2026-02-24T00:00:05.805000","2026-02-24T00:00:05.806000","2026-02-24T00:00:05.807000","2026-02-24T00:00:05.809000","2026-02-24T00:00:05.810000","2026-02-24T00:00:05.812000","2026-02-24T00:00:05.813000","2026-02-24T00:00:05.815000","2026-02-24T00:00:05.816000","2026-02-24T00:00:05.817000","2026-02-24T00:00:05.819000","2026-02-24T00:00:05.820000","2026-02-24T00:00:05.822000","2026-02-24T00:00:05.823000","2026-02-24T00:00:05.825000","2026-02-24T00:00:05.826000","2026-02-24T00:00:05.827000","2026-02-24T00:00:05.829000","2026-02-24T00:00:05.830000","2026-02-24T00:00:05.832000","2026-02-24T00:00:05.833000","2026-02-24T00:00:05.835000","2026-02-24T00:00:05.836000","2026-02-24T00:00:05.837000","2026-02-24T00:00:05.839000","2026-02-24T00:00:05.840000","2026-02-24T00:00:05.842000","2026-02-24T00:00:05.843000","2026-02-24T00:00:05.845000","2026-02-24T00:00:05.846000","2026-02-24T00:00:05.847000","2026-02-24T00:00:05.849000","2026-02-24T00:00:05.850000","2026-02-24T00:00:05.852000","2026-02-24T00:00:05.853000","2026-02-24T00:00:05.855000","2026-02-24T00:00:05.856000","2026-02-24T00:00:05.858000","2026-02-24T00:00:05.859000","2026-02-24T00:00:05.860000","2026-02-24T00:00:05.862000","2026-02-24T00:00:05.863000","2026-02-24T00:00:05.865000","2026-02-24T00:00:05.866000","2026-02-24T00:00:05.867000","2026-02-24T00:00:05.869000","2026-02-24T00:00:05.870000","2026-02-24T00:00:05.872000","2026-02-24T00:00:05.873000","2026-02-24T00:00:05.875000","2026-02-24T00:00:05.876000","2026-02-24T00:00:05.878000","2026-02-24T00:00:05.879000","2026-02-24T00:00:05.880000","2026-02-24T00:00:05.882000","2026-02-24T00:00:05.883000","2026-02-24T00:00:05.885000","2026-02-24T00:00:05.886000","2026-02-24T00:00:05.888000","2026-02-24T00:00:05.889000","2026-02-24T00:00:05.890000","2026-02-24T00:00:05.892000","2026-02-24T00:00:05.893000","2026-02-24T00:00:05.895000","2026-02-24T00:00:05.896000","2026-02-24T00:00:05.898000","2026-02-24T00:00:05.899000","2026-02-24T00:00:05.900000","2026-02-24T00:00:05.902000","2026-02-24T00:00:05.903000","2026-02-24T00:00:05.905000","2026-02-24T00:00:05.906000","2026-02-24T00:00:05.908000","2026-02-24T00:00:05.909000","2026-02-24T00:00:05.911000","2026-02-24T00:00:05.912000","2026-02-24T00:00:05.913000","2026-02-24T00:00:05.915000","2026-02-24T00:00:05.916000","2026-02-24T00:00:05.918000","2026-02-24T00:00:05.919000","2026-02-24T00:00:05.921000","2026-02-24T00:00:05.922000","2026-02-24T00:00:05.923000","2026-02-24T00:00:05.925000","2026-02-24T00:00:05.926000","2026-02-24T00:00:05.928000","2026-02-24T00:00:05.929000","2026-02-24T00:00:05.931000","2026-02-24T00:00:05.932000","2026-02-24T00:00:05.933000","2026-02-24T00:00:05.935000","2026-02-24T00:00:05.936000","2026-02-24T00:00:05.938000","2026-02-24T00:00:05.939000","2026-02-24T00:00:05.941000","2026-02-24T00:00:05.942000","2026-02-24T00:00:05.944000","2026-02-24T00:00:05.945000","2026-02-24T00:00:05.946000","2026-02-24T00:00:05.948000","2026-02-24T00:00:05.949000","2026-02-24T00:00:05.951000","2026-02-24T00:00:05.952000","2026-02-24T00:00:05.954000","2026-02-24T00:00:05.955000","2026-02-24T00:00:05.956000","2026-02-24T00:00:05.957000","2026-02-24T00:00:05.959000","2026-02-24T00:00:05.960000","2026-02-24T00:00:05.961000","2026-02-24T00:00:05.962000","2026-02-24T00:00:05.964000","2026-02-24T00:00:05.965000","2026-02-24T00:00:05.966000","2026-02-24T00:00:05.968000","2026-02-24T00:00:05.969000","2026-02-24T00:00:05.970000","2026-02-24T00:00:05.972000","2026-02-24T00:00:05.973000","2026-02-24T00:00:05.974000","2026-02-24T00:00:05.976000","2026-02-24T00:00:05.977000","2026-02-24T00:00:05.978000","2026-02-24T00:00:05.980000","2026-02-24T00:00:05.981000","2026-02-24T00:00:05.982000","2026-02-24T00:00:05.983000","2026-02-24T00:00:05.985000","2026-02-24T00:00:05.986000","2026-02-24T00:00:05.987000","2026-02-24T00:00:05.989000","2026-02-24T00:00:05.990000","2026-02-24T00:00:05.991000","2026-02-24T00:00:05.993000","2026-02-24T00:00:05.994000","2026-02-24T00:00:05.995000","2026-02-24T00:00:05.997000","2026-02-24T00:00:05.998000","2026-02-24T00:00:05.999000","2026-02-24T00:00:06.001000","2026-02-24T00:00:06.002000","2026-02-24T00:00:06.003000","2026-02-24T00:00:06.005000","2026-02-24T00:00:06.006000","2026-02-24T00:00:06.007000","2026-02-24T00:00:06.009000","2026-02-24T00:00:06.010000","2026-02-24T00:00:06.011000","2026-02-24T00:00:06.013000","2026-02-24T00:00:06.014000","2026-02-24T00:00:06.015000","2026-02-24T00:00:06.017000","2026-02-24T00:00:06.018000","2026-02-24T00:00:06.019000","2026-02-24T00:00:06.020000","2026-02-24T00:00:06.022000","2026-02-24T00:00:06.023000","2026-02-24T00:00:06.024000","2026-02-24T00:00:06.026000","2026-02-24T00:00:06.027000","2026-02-24T00:00:06.028000","2026-02-24T00:00:06.030000","2026-02-24T00:00:06.031000","2026-02-24T00:00:06.032000","2026-02-24T00:00:06.034000","2026-02-24T00:00:06.035000","2026-02-24T00:00:06.036000","2026-02-24T00:00:06.038000","2026-02-24T00:00:06.039000","2026-02-24T00:00:06.040000","2026-02-24T00:00:06.041000","2026-02-24T00:00:06.043000","2026-02-24T00:00:06.044000","2026-02-24T00:00:06.045000","2026-02-24T00:00:06.047000","2026-02-24T00:00:06.048000","2026-02-24T00:00:06.049000","2026-02-24T00:00:06.050000","2026-02-24T00:00:06.052000","2026-02-24T00:00:06.053000","2026-02-24T00:00:06.054000","2026-02-24T00:00:06.055000","2026-02-24T00:00:06.057000","2026-02-24T00:00:06.058000","2026-02-24T00:00:06.059000","2026-02-24T00:00:06.061000","2026-02-24T00:00:06.062000","2026-02-24T00:00:06.063000","2026-02-24T00:00:06.065000","2026-02-24T00:00:06.066000","2026-02-24T00:00:06.067000","2026-02-24T00:00:06.068000","2026-02-24T00:00:06.070000","2026-02-24T00:00:06.071000","2026-02-24T00:00:06.072000","2026-02-24T00:00:06.073000","2026-02-24T00:00:06.075000","2026-02-24T00:00:06.076000","2026-02-24T00:00:06.077000","2026-02-24T00:00:06.079000","2026-02-24T00:00:06.080000","2026-02-24T00:00:06.081000","2026-02-24T00:00:06.082000","2026-02-24T00:00:06.084000","2026-02-24T00:00:06.085000","2026-02-24T00:00:06.086000","2026-02-24T00:00:06.088000","2026-02-24T00:00:06.089000","2026-02-24T00:00:06.090000","2026-02-24T00:00:06.092000","2026-02-24T00:00:06.093000","2026-02-24T00:00:06.094000","2026-02-24T00:00:06.095000","2026-02-24T00:00:06.097000","2026-02-24T00:00:06.098000","2026-02-24T00:00:06.099000","2026-02-24T00:00:06.100000","2026-02-24T00:00:06.102000","2026-02-24T00:00:06.103000","2026-02-24T00:00:06.104000","2026-02-24T00:00:06.106000","2026-02-24T00:00:06.107000","2026-02-24T00:00:06.108000","2026-02-24T00:00:06.110000","2026-02-24T00:00:06.111000","2026-02-24T00:00:06.112000","2026-02-24T00:00:06.113000","2026-02-24T00:00:06.115000","2026-02-24T00:00:06.116000","2026-02-24T00:00:06.117000","2026-02-24T00:00:06.118000","2026-02-24T00:00:06.120000","2026-02-24T00:00:06.121000","2026-02-24T00:00:06.122000","2026-02-24T00:00:06.124000","2026-02-24T00:00:06.125000","2026-02-24T00:00:06.126000","2026-02-24T00:00:06.128000","2026-02-24T00:00:06.130000","2026-02-24T00:00:06.132000","2026-02-24T00:00:06.133000","2026-02-24T00:00:06.135000","2026-02-24T00:00:06.136000","2026-02-24T00:00:05.492000","2026-02-24T00:00:05.493000","2026-02-24T00:00:05.494000","2026-02-24T00:00:05.495000","2026-02-24T00:00:05.497000","2026-02-24T00:00:05.498000","2026-02-24T00:00:05.499000","2026-02-24T00:00:05.500000","2026-02-24T00:00:05.501000","2026-02-24T00:00:05.503000","2026-02-24T00:00:05.504000","2026-02-24T00:00:05.505000","2026-02-24T00:00:05.506000","2026-02-24T00:00:05.507000","2026-02-24T00:00:05.508000","2026-02-24T00:00:05.509000","2026-02-24T00:00:05.511000","2026-02-24T00:00:05.512000","2026-02-24T00:00:05.513000","2026-02-24T00:00:05.514000","2026-02-24T00:00:05.515000","2026-02-24T00:00:05.517000","2026-02-24T00:00:05.518000","2026-02-24T00:00:05.519000","2026-02-24T00:00:05.520000","2026-02-24T00:00:05.521000","2026-02-24T00:00:05.522000","2026-02-24T00:00:05.524000","2026-02-24T00:00:05.525000","2026-02-24T00:00:05.526000","2026-02-24T00:00:05.527000","2026-02-24T00:00:05.528000","2026-02-24T00:00:05.529000","2026-02-24T00:00:05.531000","2026-02-24T00:00:05.532000","2026-02-24T00:00:05.533000","2026-02-24T00:00:05.534000","2026-02-24T00:00:05.535000","2026-02-24T00:00:05.537000","2026-02-24T00:00:05.538000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.540000","2026-02-24T00:00:05.541000","2026-02-24T00:00:05.542000","2026-02-24T00:00:05.544000","2026-02-24T00:00:05.545000","2026-02-24T00:00:05.546000","2026-02-24T00:00:05.547000","2026-02-24T00:00:05.549000","2026-02-24T00:00:05.550000","2026-02-24T00:00:05.551000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.553000","2026-02-24T00:00:05.555000","2026-02-24T00:00:05.556000","2026-02-24T00:00:05.557000","2026-02-24T00:00:05.558000","2026-02-24T00:00:05.559000","2026-02-24T00:00:05.560000","2026-02-24T00:00:05.562000","2026-02-24T00:00:05.563000","2026-02-24T00:00:05.564000","2026-02-24T00:00:05.565000","2026-02-24T00:00:05.566000","2026-02-24T00:00:05.568000","2026-02-24T00:00:05.569000","2026-02-24T00:00:05.570000","2026-02-24T00:00:05.571000","2026-02-24T00:00:05.572000","2026-02-24T00:00:05.573000","2026-02-24T00:00:05.575000","2026-02-24T00:00:05.576000","2026-02-24T00:00:05.577000","2026-02-24T00:00:05.578000","2026-02-24T00:00:05.579000","2026-02-24T00:00:05.581000","2026-02-24T00:00:05.582000","2026-02-24T00:00:05.583000","2026-02-24T00:00:05.584000","2026-02-24T00:00:05.585000","2026-02-24T00:00:05.587000","2026-02-24T00:00:05.588000","2026-02-24T00:00:05.589000","2026-02-24T00:00:05.590000","2026-02-24T00:00:05.592000","2026-02-24T00:00:05.596000","2026-02-24T00:00:05.597000","2026-02-24T00:00:05.599000","2026-02-24T00:00:05.601000","2026-02-24T00:00:05.602000","2026-02-24T00:00:05.603000","2026-02-24T00:00:05.604000","2026-02-24T00:00:05.606000","2026-02-24T00:00:05.607000","2026-02-24T00:00:05.608000","2026-02-24T00:00:05.609000","2026-02-24T00:00:05.610000","2026-02-24T00:00:05.612000","2026-02-24T00:00:05.613000","2026-02-24T00:00:05.614000","2026-02-24T00:00:05.615000","2026-02-24T00:00:05.616000","2026-02-24T00:00:05.618000","2026-02-24T00:00:05.620000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.624000","2026-02-24T00:00:05.625000","2026-02-24T00:00:05.626000","2026-02-24T00:00:05.628000","2026-02-24T00:00:05.629000","2026-02-24T00:00:05.630000","2026-02-24T00:00:05.632000","2026-02-24T00:00:05.633000","2026-02-24T00:00:05.634000","2026-02-24T00:00:05.636000","2026-02-24T00:00:05.637000","2026-02-24T00:00:05.639000","2026-02-24T00:00:05.640000","2026-02-24T00:00:05.642000","2026-02-24T00:00:05.643000","2026-02-24T00:00:05.644000","2026-02-24T00:00:05.646000","2026-02-24T00:00:05.647000","2026-02-24T00:00:05.648000","2026-02-24T00:00:05.650000","2026-02-24T00:00:05.651000","2026-02-24T00:00:05.652000","2026-02-24T00:00:05.654000","2026-02-24T00:00:05.655000","2026-02-24T00:00:05.657000","2026-02-24T00:00:05.658000","2026-02-24T00:00:05.659000","2026-02-24T00:00:05.661000","2026-02-24T00:00:05.662000","2026-02-24T00:00:05.664000","2026-02-24T00:00:05.665000","2026-02-24T00:00:05.666000","2026-02-24T00:00:05.668000","2026-02-24T00:00:05.669000","2026-02-24T00:00:05.671000","2026-02-24T00:00:05.673000","2026-02-24T00:00:05.674000","2026-02-24T00:00:05.676000","2026-02-24T00:00:05.677000","2026-02-24T00:00:05.678000","2026-02-24T00:00:05.680000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.683000","2026-02-24T00:00:05.685000","2026-02-24T00:00:05.687000","2026-02-24T00:00:05.688000","2026-02-24T00:00:05.690000","2026-02-24T00:00:05.691000","2026-02-24T00:00:05.692000","2026-02-24T00:00:05.694000","2026-02-24T00:00:05.695000","2026-02-24T00:00:05.696000","2026-02-24T00:00:05.697000","2026-02-24T00:00:05.699000","2026-02-24T00:00:05.700000","2026-02-24T00:00:05.702000","2026-02-24T00:00:05.703000","2026-02-24T00:00:05.704000","2026-02-24T00:00:05.706000","2026-02-24T00:00:05.707000","2026-02-24T00:00:05.708000","2026-02-24T00:00:05.710000","2026-02-24T00:00:05.711000","2026-02-24T00:00:05.713000","2026-02-24T00:00:05.714000","2026-02-24T00:00:05.715000","2026-02-24T00:00:05.717000","2026-02-24T00:00:05.718000","2026-02-24T00:00:05.719000","2026-02-24T00:00:05.721000","2026-02-24T00:00:05.729000","2026-02-24T00:00:05.731000","2026-02-24T00:00:05.732000","2026-02-24T00:00:05.733000","2026-02-24T00:00:05.735000","2026-02-24T00:00:05.736000","2026-02-24T00:00:05.738000","2026-02-24T00:00:05.739000","2026-02-24T00:00:05.740000","2026-02-24T00:00:05.742000","2026-02-24T00:00:05.743000","2026-02-24T00:00:05.744000","2026-02-24T00:00:05.745000","2026-02-24T00:00:05.747000","2026-02-24T00:00:05.748000","2026-02-24T00:00:05.750000","2026-02-24T00:00:05.751000","2026-02-24T00:00:05.758000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.762000","2026-02-24T00:00:05.763000","2026-02-24T00:00:05.765000","2026-02-24T00:00:05.766000","2026-02-24T00:00:05.767000","2026-02-24T00:00:05.769000","2026-02-24T00:00:05.770000","2026-02-24T00:00:05.772000","2026-02-24T00:00:05.773000","2026-02-24T00:00:05.774000","2026-02-24T00:00:05.776000","2026-02-24T00:00:05.777000","2026-02-24T00:00:05.778000","2026-02-24T00:00:05.780000","2026-02-24T00:00:05.781000","2026-02-24T00:00:05.783000","2026-02-24T00:00:05.784000","2026-02-24T00:00:05.785000","2026-02-24T00:00:05.787000","2026-02-24T00:00:05.788000","2026-02-24T00:00:05.789000","2026-02-24T00:00:05.791000","2026-02-24T00:00:05.792000","2026-02-24T00:00:05.794000","2026-02-24T00:00:05.795000","2026-02-24T00:00:05.796000","2026-02-24T00:00:05.798000","2026-02-24T00:00:05.799000","2026-02-24T00:00:05.801000","2026-02-24T00:00:05.802000","2026-02-24T00:00:05.803000","2026-02-24T00:00:05.805000","2026-02-24T00:00:05.806000","2026-02-24T00:00:05.808000","2026-02-24T00:00:05.809000","2026-02-24T00:00:05.810000","2026-02-24T00:00:05.812000","2026-02-24T00:00:05.813000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.552000","2026-02-24T00:00:05.553000","2026-02-24T00:00:05.555000","2026-02-24T00:00:05.556000","2026-02-24T00:00:05.557000","2026-02-24T00:00:05.558000","2026-02-24T00:00:05.559000","2026-02-24T00:00:05.561000","2026-02-24T00:00:05.562000","2026-02-24T00:00:05.563000","2026-02-24T00:00:05.564000","2026-02-24T00:00:05.565000","2026-02-24T00:00:05.566000","2026-02-24T00:00:05.568000","2026-02-24T00:00:05.569000","2026-02-24T00:00:05.570000","2026-02-24T00:00:05.571000","2026-02-24T00:00:05.572000","2026-02-24T00:00:05.573000","2026-02-24T00:00:05.575000","2026-02-24T00:00:05.576000","2026-02-24T00:00:05.577000","2026-02-24T00:00:05.578000","2026-02-24T00:00:05.579000","2026-02-24T00:00:05.581000","2026-02-24T00:00:05.582000","2026-02-24T00:00:05.583000","2026-02-24T00:00:05.585000","2026-02-24T00:00:05.585000","2026-02-24T00:00:05.587000","2026-02-24T00:00:05.588000","2026-02-24T00:00:05.589000","2026-02-24T00:00:05.590000","2026-02-24T00:00:05.592000","2026-02-24T00:00:05.596000","2026-02-24T00:00:05.597000","2026-02-24T00:00:05.599000","2026-02-24T00:00:05.601000","2026-02-24T00:00:05.602000","2026-02-24T00:00:05.603000","2026-02-24T00:00:05.604000","2026-02-24T00:00:05.606000","2026-02-24T00:00:05.607000","2026-02-24T00:00:05.608000","2026-02-24T00:00:05.609000","2026-02-24T00:00:05.610000","2026-02-24T00:00:05.612000","2026-02-24T00:00:05.613000","2026-02-24T00:00:05.614000","2026-02-24T00:00:05.615000","2026-02-24T00:00:05.616000","2026-02-24T00:00:05.618000","2026-02-24T00:00:05.620000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.624000","2026-02-24T00:00:05.625000","2026-02-24T00:00:05.626000","2026-02-24T00:00:05.628000","2026-02-24T00:00:05.629000","2026-02-24T00:00:05.630000","2026-02-24T00:00:05.632000","2026-02-24T00:00:05.633000","2026-02-24T00:00:05.634000","2026-02-24T00:00:05.636000","2026-02-24T00:00:05.637000","2026-02-24T00:00:05.639000","2026-02-24T00:00:05.640000","2026-02-24T00:00:05.642000","2026-02-24T00:00:05.643000","2026-02-24T00:00:05.644000","2026-02-24T00:00:05.646000","2026-02-24T00:00:05.647000","2026-02-24T00:00:05.648000","2026-02-24T00:00:05.650000","2026-02-24T00:00:05.651000","2026-02-24T00:00:05.653000","2026-02-24T00:00:05.654000","2026-02-24T00:00:05.655000","2026-02-24T00:00:05.657000","2026-02-24T00:00:05.658000","2026-02-24T00:00:05.659000","2026-02-24T00:00:05.661000","2026-02-24T00:00:05.662000","2026-02-24T00:00:05.664000","2026-02-24T00:00:05.665000","2026-02-24T00:00:05.666000","2026-02-24T00:00:05.668000","2026-02-24T00:00:05.669000","2026-02-24T00:00:05.671000","2026-02-24T00:00:05.673000","2026-02-24T00:00:05.674000","2026-02-24T00:00:05.676000","2026-02-24T00:00:05.677000","2026-02-24T00:00:05.678000","2026-02-24T00:00:05.680000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.683000","2026-02-24T00:00:05.685000","2026-02-24T00:00:05.687000","2026-02-24T00:00:05.688000","2026-02-24T00:00:05.690000","2026-02-24T00:00:05.691000","2026-02-24T00:00:05.692000","2026-02-24T00:00:05.694000","2026-02-24T00:00:05.695000","2026-02-24T00:00:05.696000","2026-02-24T00:00:05.697000","2026-02-24T00:00:05.699000","2026-02-24T00:00:05.700000","2026-02-24T00:00:05.702000","2026-02-24T00:00:05.703000","2026-02-24T00:00:05.704000","2026-02-24T00:00:05.706000","2026-02-24T00:00:05.707000","2026-02-24T00:00:05.709000","2026-02-24T00:00:05.710000","2026-02-24T00:00:05.711000","2026-02-24T00:00:05.713000","2026-02-24T00:00:05.714000","2026-02-24T00:00:05.715000","2026-02-24T00:00:05.717000","2026-02-24T00:00:05.718000","2026-02-24T00:00:05.719000","2026-02-24T00:00:05.721000","2026-02-24T00:00:05.584000","2026-02-24T00:00:05.585000","2026-02-24T00:00:05.587000","2026-02-24T00:00:05.588000","2026-02-24T00:00:05.589000","2026-02-24T00:00:05.590000","2026-02-24T00:00:05.592000","2026-02-24T00:00:05.596000","2026-02-24T00:00:05.597000","2026-02-24T00:00:05.599000","2026-02-24T00:00:05.601000","2026-02-24T00:00:05.602000","2026-02-24T00:00:05.603000","2026-02-24T00:00:05.604000","2026-02-24T00:00:05.606000","2026-02-24T00:00:05.607000","2026-02-24T00:00:05.608000","2026-02-24T00:00:05.609000","2026-02-24T00:00:05.610000","2026-02-24T00:00:05.612000","2026-02-24T00:00:05.613000","2026-02-24T00:00:05.614000","2026-02-24T00:00:05.615000","2026-02-24T00:00:05.616000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.622000","2026-02-24T00:00:05.624000","2026-02-24T00:00:05.625000","2026-02-24T00:00:05.626000","2026-02-24T00:00:05.628000","2026-02-24T00:00:05.629000","2026-02-24T00:00:05.630000","2026-02-24T00:00:05.632000","2026-02-24T00:00:05.633000","2026-02-24T00:00:05.634000","2026-02-24T00:00:05.636000","2026-02-24T00:00:05.637000","2026-02-24T00:00:05.639000","2026-02-24T00:00:05.640000","2026-02-24T00:00:05.642000","2026-02-24T00:00:05.643000","2026-02-24T00:00:05.644000","2026-02-24T00:00:05.646000","2026-02-24T00:00:05.647000","2026-02-24T00:00:05.648000","2026-02-24T00:00:05.650000","2026-02-24T00:00:05.651000","2026-02-24T00:00:05.652000","2026-02-24T00:00:05.654000","2026-02-24T00:00:05.655000","2026-02-24T00:00:05.657000","2026-02-24T00:00:05.658000","2026-02-24T00:00:05.659000","2026-02-24T00:00:05.661000","2026-02-24T00:00:05.662000","2026-02-24T00:00:05.664000","2026-02-24T00:00:05.665000","2026-02-24T00:00:05.666000","2026-02-24T00:00:05.668000","2026-02-24T00:00:05.669000","2026-02-24T00:00:05.671000","2026-02-24T00:00:05.673000","2026-02-24T00:00:05.674000","2026-02-24T00:00:05.675000","2026-02-24T00:00:05.677000","2026-02-24T00:00:05.678000","2026-02-24T00:00:05.680000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.682000","2026-02-24T00:00:05.683000","2026-02-24T00:00:05.685000","2026-02-24T00:00:05.687000","2026-02-24T00:00:05.688000","2026-02-24T00:00:05.690000","2026-02-24T00:00:05.691000","2026-02-24T00:00:05.692000","2026-02-24T00:00:05.694000","2026-02-24T00:00:05.695000","2026-02-24T00:00:05.696000","2026-02-24T00:00:05.697000","2026-02-24T00:00:05.699000","2026-02-24T00:00:05.700000","2026-02-24T00:00:05.701000","2026-02-24T00:00:05.703000","2026-02-24T00:00:05.704000","2026-02-24T00:00:05.706000","2026-02-24T00:00:05.707000","2026-02-24T00:00:05.708000","2026-02-24T00:00:05.710000","2026-02-24T00:00:05.711000","2026-02-24T00:00:05.713000","2026-02-24T00:00:05.714000","2026-02-24T00:00:05.715000","2026-02-24T00:00:05.717000","2026-02-24T00:00:05.718000","2026-02-24T00:00:05.719000","2026-02-24T00:00:05.721000","2026-02-24T00:00:05.729000","2026-02-24T00:00:05.731000","2026-02-24T00:00:05.732000","2026-02-24T00:00:05.733000","2026-02-24T00:00:05.735000","2026-02-24T00:00:05.736000","2026-02-24T00:00:05.738000","2026-02-24T00:00:05.739000","2026-02-24T00:00:05.740000","2026-02-24T00:00:05.742000","2026-02-24T00:00:05.743000","2026-02-24T00:00:05.744000","2026-02-24T00:00:05.745000","2026-02-24T00:00:05.747000","2026-02-24T00:00:05.748000","2026-02-24T00:00:05.750000","2026-02-24T00:00:05.751000","2026-02-24T00:00:05.758000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.762000","2026-02-24T00:00:05.763000","2026-02-24T00:00:05.765000","2026-02-24T00:00:05.766000","2026-02-24T00:00:05.767000","2026-02-24T00:00:05.769000","2026-02-24T00:00:05.770000","2026-02-24T00:00:05.772000","2026-02-24T00:00:05.773000","2026-02-24T00:00:05.774000","2026-02-24T00:00:05.776000","2026-02-24T00:00:05.777000","2026-02-24T00:00:05.778000","2026-02-24T00:00:05.780000","2026-02-24T00:00:05.781000","2026-02-24T00:00:05.783000","2026-02-24T00:00:05.784000","2026-02-24T00:00:05.785000","2026-02-24T00:00:05.787000","2026-02-24T00:00:05.788000","2026-02-24T00:00:05.789000","2026-02-24T00:00:05.791000","2026-02-24T00:00:05.792000","2026-02-24T00:00:05.794000","2026-02-24T00:00:05.795000","2026-02-24T00:00:05.796000","2026-02-24T00:00:05.798000","2026-02-24T00:00:05.799000","2026-02-24T00:00:05.801000","2026-02-24T00:00:05.802000","2026-02-24T00:00:05.803000","2026-02-24T00:00:05.805000","2026-02-24T00:00:05.806000","2026-02-24T00:00:05.808000","2026-02-24T00:00:05.809000","2026-02-24T00:00:05.810000","2026-02-24T00:00:05.812000","2026-02-24T00:00:05.813000","2026-02-24T00:00:05.815000","2026-02-24T00:00:05.816000","2026-02-24T00:00:05.817000","2026-02-24T00:00:05.819000","2026-02-24T00:00:05.820000","2026-02-24T00:00:05.822000","2026-02-24T00:00:05.823000","2026-02-24T00:00:05.825000","2026-02-24T00:00:05.826000","2026-02-24T00:00:05.827000","2026-02-24T00:00:05.829000","2026-02-24T00:00:05.830000","2026-02-24T00:00:05.832000","2026-02-24T00:00:05.833000","2026-02-24T00:00:05.835000","2026-02-24T00:00:05.836000","2026-02-24T00:00:05.838000","2026-02-24T00:00:05.839000","2026-02-24T00:00:05.840000","2026-02-24T00:00:05.842000","2026-02-24T00:00:05.843000","2026-02-24T00:00:05.845000","2026-02-24T00:00:05.846000","2026-02-24T00:00:05.847000","2026-02-24T00:00:05.849000","2026-02-24T00:00:05.850000","2026-02-24T00:00:05.852000","2026-02-24T00:00:05.853000","2026-02-24T00:00:05.855000","2026-02-24T00:00:05.856000","2026-02-24T00:00:05.858000","2026-02-24T00:00:05.859000","2026-02-24T00:00:05.860000","2026-02-24T00:00:05.862000","2026-02-24T00:00:05.863000","2026-02-24T00:00:05.865000","2026-02-24T00:00:05.866000","2026-02-24T00:00:05.868000","2026-02-24T00:00:05.869000","2026-02-24T00:00:05.870000","2026-02-24T00:00:05.872000","2026-02-24T00:00:05.873000","2026-02-24T00:00:05.875000","2026-02-24T00:00:05.876000","2026-02-24T00:00:05.878000","2026-02-24T00:00:05.879000","2026-02-24T00:00:05.880000","2026-02-24T00:00:05.882000","2026-02-24T00:00:05.883000","2026-02-24T00:00:05.885000","2026-02-24T00:00:05.886000","2026-02-24T00:00:05.888000","2026-02-24T00:00:05.889000","2026-02-24T00:00:05.891000","2026-02-24T00:00:05.892000","2026-02-24T00:00:05.893000","2026-02-24T00:00:05.895000","2026-02-24T00:00:05.896000","2026-02-24T00:00:05.898000","2026-02-24T00:00:05.899000","2026-02-24T00:00:05.900000","2026-02-24T00:00:05.902000","2026-02-24T00:00:05.903000","2026-02-24T00:00:05.905000","2026-02-24T00:00:05.906000","2026-02-24T00:00:05.908000","2026-02-24T00:00:05.909000","2026-02-24T00:00:05.911000","2026-02-24T00:00:05.912000","2026-02-24T00:00:05.913000","2026-02-24T00:00:05.915000","2026-02-24T00:00:05.916000","2026-02-24T00:00:05.918000","2026-02-24T00:00:05.919000","2026-02-24T00:00:05.921000","2026-02-24T00:00:05.922000","2026-02-24T00:00:05.924000","2026-02-24T00:00:05.925000","2026-02-24T00:00:05.926000","2026-02-24T00:00:05.928000","2026-02-24T00:00:05.929000","2026-02-24T00:00:05.931000","2026-02-24T00:00:05.932000","2026-02-24T00:00:05.934000","2026-02-24T00:00:05.935000","2026-02-24T00:00:05.936000","2026-02-24T00:00:05.938000","2026-02-24T00:00:05.939000","2026-02-24T00:00:05.941000","2026-02-24T00:00:05.942000","2026-02-24T00:00:05.944000","2026-02-24T00:00:05.945000","2026-02-24T00:00:05.946000","2026-02-24T00:00:05.948000","2026-02-24T00:00:05.949000","2026-02-24T00:00:05.951000","2026-02-24T00:00:05.952000","2026-02-24T00:00:05.731000","2026-02-24T00:00:05.732000","2026-02-24T00:00:05.733000","2026-02-24T00:00:05.735000","2026-02-24T00:00:05.736000","2026-02-24T00:00:05.738000","2026-02-24T00:00:05.739000","2026-02-24T00:00:05.740000","2026-02-24T00:00:05.742000","2026-02-24T00:00:05.743000","2026-02-24T00:00:05.744000","2026-02-24T00:00:05.745000","2026-02-24T00:00:05.747000","2026-02-24T00:00:05.748000","2026-02-24T00:00:05.750000","2026-02-24T00:00:05.751000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.760000","2026-02-24T00:00:05.762000","2026-02-24T00:00:05.763000","2026-02-24T00:00:05.765000","2026-02-24T00:00:05.766000","2026-02-24T00:00:05.767000","2026-02-24T00:00:05.769000","2026-02-24T00:00:05.770000","2026-02-24T00:00:05.772000","2026-02-24T00:00:05.773000","2026-02-24T00:00:05.774000","2026-02-24T00:00:05.776000","2026-02-24T00:00:05.777000","2026-02-24T00:00:05.778000","2026-02-24T00:00:05.780000","2026-02-24T00:00:05.781000","2026-02-24T00:00:05.782000","2026-02-24T00:00:05.784000","2026-02-24T00:00:05.785000","2026-02-24T00:00:05.787000","2026-02-24T00:00:05.788000","2026-02-24T00:00:05.789000","2026-02-24T00:00:05.791000","2026-02-24T00:00:05.792000","2026-02-24T00:00:05.793000","2026-02-24T00:00:05.795000","2026-02-24T00:00:05.796000","2026-02-24T00:00:05.798000","2026-02-24T00:00:05.799000","2026-02-24T00:00:05.800000","2026-02-24T00:00:05.802000","2026-02-24T00:00:05.803000","2026-02-24T00:00:05.805000","2026-02-24T00:00:05.806000","2026-02-24T00:00:05.808000","2026-02-24T00:00:05.809000","2026-02-24T00:00:05.810000","2026-02-24T00:00:05.812000","2026-02-24T00:00:05.813000","2026-02-24T00:00:05.815000","2026-02-24T00:00:05.816000","2026-02-24T00:00:05.817000","2026-02-24T00:00:05.819000","2026-02-24T00:00:05.820000","2026-02-24T00:00:05.822000","2026-02-24T00:00:05.823000","2026-02-24T00:00:05.825000","2026-02-24T00:00:05.826000","2026-02-24T00:00:05.827000","2026-02-24T00:00:05.829000","2026-02-24T00:00:05.830000","2026-02-24T00:00:05.832000","2026-02-24T00:00:05.833000","2026-02-24T00:00:05.835000","2026-02-24T00:00:05.836000","2026-02-24T00:00:05.838000","2026-02-24T00:00:05.839000","2026-02-24T00:00:05.840000","2026-02-24T00:00:05.842000","2026-02-24T00:00:05.843000","2026-02-24T00:00:05.845000","2026-02-24T00:00:05.846000","2026-02-24T00:00:05.847000","2026-02-24T00:00:05.849000","2026-02-24T00:00:05.850000","2026-02-24T00:00:05.852000","2026-02-24T00:00:05.853000","2026-02-24T00:00:05.855000","2026-02-24T00:00:05.856000","2026-02-24T00:00:05.858000","2026-02-24T00:00:05.859000","2026-02-24T00:00:05.860000","2026-02-24T00:00:05.862000","2026-02-24T00:00:05.863000","2026-02-24T00:00:05.865000","2026-02-24T00:00:05.866000","2026-02-24T00:00:05.868000","2026-02-24T00:00:05.869000","2026-02-24T00:00:05.870000","2026-02-24T00:00:05.872000","2026-02-24T00:00:05.873000","2026-02-24T00:00:05.875000","2026-02-24T00:00:05.876000","2026-02-24T00:00:05.878000","2026-02-24T00:00:05.879000","2026-02-24T00:00:05.880000","2026-02-24T00:00:05.882000","2026-02-24T00:00:05.883000","2026-02-24T00:00:05.885000","2026-02-24T00:00:05.886000","2026-02-24T00:00:05.888000","2026-02-24T00:00:05.889000","2026-02-24T00:00:05.891000","2026-02-24T00:00:05.892000","2026-02-24T00:00:05.893000","2026-02-24T00:00:05.895000","2026-02-24T00:00:05.896000","2026-02-24T00:00:05.898000","2026-02-24T00:00:05.899000","2026-02-24T00:00:05.900000","2026-02-24T00:00:05.902000","2026-02-24T00:00:05.903000","2026-02-24T00:00:05.905000","2026-02-24T00:00:05.906000","2026-02-24T00:00:05.908000","2026-02-24T00:00:05.909000","2026-02-24T00:00:05.911000","2026-02-24T00:00:05.912000","2026-02-24T00:00:05.913000","2026-02-24T00:00:05.915000","2026-02-24T00:00:05.916000","2026-02-24T00:00:05.918000","2026-02-24T00:00:05.919000","2026-02-24T00:00:05.921000","2026-02-24T00:00:05.922000","2026-02-24T00:00:05.924000","2026-02-24T00:00:05.925000","2026-02-24T00:00:05.926000","2026-02-24T00:00:05.928000","2026-02-24T00:00:05.929000","2026-02-24T00:00:05.931000","2026-02-24T00:00:05.932000","2026-02-24T00:00:05.934000","2026-02-24T00:00:05.935000","2026-02-24T00:00:05.936000","2026-02-24T00:00:05.938000","2026-02-24T00:00:05.939000","2026-02-24T00:00:05.941000","2026-02-24T00:00:05.942000","2026-02-24T00:00:05.944000","2026-02-24T00:00:05.945000","2026-02-24T00:00:05.946000","2026-02-24T00:00:05.948000","2026-02-24T00:00:05.949000","2026-02-24T00:00:05.951000","2026-02-24T00:00:05.952000","2026-02-24T00:00:05.954000","2026-02-24T00:00:05.955000","2026-02-24T00:00:05.956000","2026-02-24T00:00:05.957000","2026-02-24T00:00:05.959000","2026-02-24T00:00:05.960000","2026-02-24T00:00:05.961000","2026-02-24T00:00:05.963000","2026-02-24T00:00:05.964000","2026-02-24T00:00:05.965000","2026-02-24T00:00:05.966000","2026-02-24T00:00:05.968000","2026-02-24T00:00:05.969000","2026-02-24T00:00:05.970000","2026-02-24T00:00:05.972000","2026-02-24T00:00:05.973000","2026-02-24T00:00:05.974000","2026-02-24T00:00:05.976000","2026-02-24T00:00:05.977000","2026-02-24T00:00:05.978000","2026-02-24T00:00:05.980000","2026-02-24T00:00:05.981000","2026-02-24T00:00:05.982000","2026-02-24T00:00:05.984000","2026-02-24T00:00:05.985000","2026-02-24T00:00:05.986000","2026-02-24T00:00:05.987000","2026-02-24T00:00:05.989000","2026-02-24T00:00:05.990000","2026-02-24T00:00:05.991000","2026-02-24T00:00:05.993000","2026-02-24T00:00:05.994000","2026-02-24T00:00:05.995000","2026-02-24T00:00:05.997000","2026-02-24T00:00:05.998000","2026-02-24T00:00:05.999000","2026-02-24T00:00:06.001000","2026-02-24T00:00:06.002000","2026-02-24T00:00:06.003000","2026-02-24T00:00:06.005000","2026-02-24T00:00:06.006000","2026-02-24T00:00:06.007000","2026-02-24T00:00:06.009000","2026-02-24T00:00:06.010000","2026-02-24T00:00:06.011000","2026-02-24T00:00:06.013000","2026-02-24T00:00:06.014000","2026-02-24T00:00:06.015000","2026-02-24T00:00:06.017000","2026-02-24T00:00:06.018000","2026-02-24T00:00:06.019000","2026-02-24T00:00:06.020000","2026-02-24T00:00:06.022000","2026-02-24T00:00:06.023000","2026-02-24T00:00:06.024000","2026-02-24T00:00:06.026000","2026-02-24T00:00:06.027000","2026-02-24T00:00:06.028000","2026-02-24T00:00:06.030000","2026-02-24T00:00:06.031000","2026-02-24T00:00:06.032000","2026-02-24T00:00:06.034000","2026-02-24T00:00:06.035000","2026-02-24T00:00:06.036000","2026-02-24T00:00:06.038000","2026-02-24T00:00:06.039000","2026-02-24T00:00:06.243000","2026-02-24T00:00:06.244000","2026-02-24T00:00:06.245000","2026-02-24T00:00:06.250000","2026-02-24T00:00:06.250000","2026-02-24T00:00:06.251000","2026-02-24T00:00:06.252000","2026-02-24T00:00:06.254000","2026-02-24T00:00:06.255000","2026-02-24T00:00:06.250000","2026-02-24T00:00:06.251000","2026-02-24T00:00:06.252000","2026-02-24T00:00:06.254000","2026-02-24T00:00:06.255000","2026-02-24T00:00:06.256000","2026-02-24T00:00:06.257000","2026-02-24T00:00:06.258000","2026-02-24T00:00:06.260000","2026-02-24T00:00:06.261000","2026-02-24T00:00:06.263000","2026-02-24T00:00:06.265000","2026-02-24T00:00:06.267000","2026-02-24T00:00:06.268000","2026-02-24T00:00:06.269000","2026-02-24T00:00:06.274000","2026-02-24T00:00:06.275000","2026-02-24T00:00:06.276000","2026-02-24T00:00:06.276000","2026-02-24T00:00:06.277000","2026-02-24T00:00:06.278000","2026-02-24T00:00:06.280000","2026-02-24T00:00:06.281000","2026-02-24T00:00:06.282000","2026-02-24T00:00:06.283000","2026-02-24T00:00:06.284000","2026-02-24T00:00:06.285000","2026-02-24T00:00:06.286000","2026-02-24T00:00:06.287000","2026-02-24T00:00:06.288000","2026-02-24T00:00:06.289000","2026-02-24T00:00:06.295000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.297000","2026-02-24T00:00:06.298000","2026-02-24T00:00:06.300000","2026-02-24T00:00:06.301000","2026-02-24T00:00:06.302000","2026-02-24T00:00:06.303000","2026-02-24T00:00:06.304000","2026-02-24T00:00:06.305000","2026-02-24T00:00:06.306000","2026-02-24T00:00:06.307000","2026-02-24T00:00:06.309000","2026-02-24T00:00:06.310000","2026-02-24T00:00:06.311000","2026-02-24T00:00:06.312000","2026-02-24T00:00:06.313000","2026-02-24T00:00:06.314000","2026-02-24T00:00:06.315000","2026-02-24T00:00:06.316000","2026-02-24T00:00:06.318000","2026-02-24T00:00:06.319000","2026-02-24T00:00:06.320000","2026-02-24T00:00:06.321000","2026-02-24T00:00:06.322000","2026-02-24T00:00:06.323000","2026-02-24T00:00:06.324000","2026-02-24T00:00:06.326000","2026-02-24T00:00:06.327000","2026-02-24T00:00:06.328000","2026-02-24T00:00:06.329000","2026-02-24T00:00:06.330000","2026-02-24T00:00:06.331000","2026-02-24T00:00:06.332000","2026-02-24T00:00:06.333000","2026-02-24T00:00:06.335000","2026-02-24T00:00:06.336000","2026-02-24T00:00:06.337000","2026-02-24T00:00:06.338000","2026-02-24T00:00:06.339000","2026-02-24T00:00:06.340000","2026-02-24T00:00:06.342000","2026-02-24T00:00:06.343000","2026-02-24T00:00:06.344000","2026-02-24T00:00:06.345000","2026-02-24T00:00:06.346000","2026-02-24T00:00:06.348000","2026-02-24T00:00:06.349000","2026-02-24T00:00:06.350000","2026-02-24T00:00:06.351000","2026-02-24T00:00:06.352000","2026-02-24T00:00:06.353000","2026-02-24T00:00:06.354000","2026-02-24T00:00:06.356000","2026-02-24T00:00:06.357000","2026-02-24T00:00:06.358000","2026-02-24T00:00:06.359000","2026-02-24T00:00:06.360000","2026-02-24T00:00:06.361000","2026-02-24T00:00:06.363000","2026-02-24T00:00:06.364000","2026-02-24T00:00:06.365000","2026-02-24T00:00:06.366000","2026-02-24T00:00:06.367000","2026-02-24T00:00:06.368000","2026-02-24T00:00:06.370000","2026-02-24T00:00:06.371000","2026-02-24T00:00:06.372000","2026-02-24T00:00:06.373000","2026-02-24T00:00:06.374000","2026-02-24T00:00:06.375000","2026-02-24T00:00:06.377000","2026-02-24T00:00:06.378000","2026-02-24T00:00:06.379000","2026-02-24T00:00:06.380000","2026-02-24T00:00:06.381000","2026-02-24T00:00:06.382000","2026-02-24T00:00:06.384000","2026-02-24T00:00:06.385000","2026-02-24T00:00:06.386000","2026-02-24T00:00:06.387000","2026-02-24T00:00:06.388000","2026-02-24T00:00:06.390000","2026-02-24T00:00:06.391000","2026-02-24T00:00:06.392000","2026-02-24T00:00:06.393000","2026-02-24T00:00:06.394000","2026-02-24T00:00:06.395000","2026-02-24T00:00:06.397000","2026-02-24T00:00:06.398000","2026-02-24T00:00:06.399000","2026-02-24T00:00:06.400000","2026-02-24T00:00:06.401000","2026-02-24T00:00:06.402000","2026-02-24T00:00:06.403000","2026-02-24T00:00:06.405000","2026-02-24T00:00:06.406000","2026-02-24T00:00:06.409000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.413000","2026-02-24T00:00:06.414000","2026-02-24T00:00:06.416000","2026-02-24T00:00:06.417000","2026-02-24T00:00:06.418000","2026-02-24T00:00:06.420000","2026-02-24T00:00:06.421000","2026-02-24T00:00:06.423000","2026-02-24T00:00:06.424000","2026-02-24T00:00:06.425000","2026-02-24T00:00:06.426000","2026-02-24T00:00:06.428000","2026-02-24T00:00:06.429000","2026-02-24T00:00:06.431000","2026-02-24T00:00:06.432000","2026-02-24T00:00:06.433000","2026-02-24T00:00:06.435000","2026-02-24T00:00:06.436000","2026-02-24T00:00:06.438000","2026-02-24T00:00:06.439000","2026-02-24T00:00:06.441000","2026-02-24T00:00:06.442000","2026-02-24T00:00:06.443000","2026-02-24T00:00:06.445000","2026-02-24T00:00:06.446000","2026-02-24T00:00:06.447000","2026-02-24T00:00:06.449000","2026-02-24T00:00:06.450000","2026-02-24T00:00:06.451000","2026-02-24T00:00:06.453000","2026-02-24T00:00:06.454000","2026-02-24T00:00:06.456000","2026-02-24T00:00:06.457000","2026-02-24T00:00:06.458000","2026-02-24T00:00:06.460000","2026-02-24T00:00:06.461000","2026-02-24T00:00:06.462000","2026-02-24T00:00:06.464000","2026-02-24T00:00:06.465000","2026-02-24T00:00:06.467000","2026-02-24T00:00:06.468000","2026-02-24T00:00:06.469000","2026-02-24T00:00:06.471000","2026-02-24T00:00:06.472000","2026-02-24T00:00:06.474000","2026-02-24T00:00:06.475000","2026-02-24T00:00:06.476000","2026-02-24T00:00:06.478000","2026-02-24T00:00:06.479000","2026-02-24T00:00:06.480000","2026-02-24T00:00:06.482000","2026-02-24T00:00:06.483000","2026-02-24T00:00:06.485000","2026-02-24T00:00:06.486000","2026-02-24T00:00:06.487000","2026-02-24T00:00:06.489000","2026-02-24T00:00:06.490000","2026-02-24T00:00:06.492000","2026-02-24T00:00:06.493000","2026-02-24T00:00:06.494000","2026-02-24T00:00:06.496000","2026-02-24T00:00:06.497000","2026-02-24T00:00:06.499000","2026-02-24T00:00:06.500000","2026-02-24T00:00:06.501000","2026-02-24T00:00:06.503000","2026-02-24T00:00:06.511000","2026-02-24T00:00:06.512000","2026-02-24T00:00:06.513000","2026-02-24T00:00:06.514000","2026-02-24T00:00:06.275000","2026-02-24T00:00:06.276000","2026-02-24T00:00:06.276000","2026-02-24T00:00:06.277000","2026-02-24T00:00:06.278000","2026-02-24T00:00:06.280000","2026-02-24T00:00:06.281000","2026-02-24T00:00:06.282000","2026-02-24T00:00:06.283000","2026-02-24T00:00:06.284000","2026-02-24T00:00:06.285000","2026-02-24T00:00:06.286000","2026-02-24T00:00:06.287000","2026-02-24T00:00:06.288000","2026-02-24T00:00:06.289000","2026-02-24T00:00:06.294000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.297000","2026-02-24T00:00:06.298000","2026-02-24T00:00:06.300000","2026-02-24T00:00:06.301000","2026-02-24T00:00:06.302000","2026-02-24T00:00:06.303000","2026-02-24T00:00:06.304000","2026-02-24T00:00:06.305000","2026-02-24T00:00:06.306000","2026-02-24T00:00:06.307000","2026-02-24T00:00:06.308000","2026-02-24T00:00:06.310000","2026-02-24T00:00:06.311000","2026-02-24T00:00:06.312000","2026-02-24T00:00:06.313000","2026-02-24T00:00:06.314000","2026-02-24T00:00:06.315000","2026-02-24T00:00:06.316000","2026-02-24T00:00:06.318000","2026-02-24T00:00:06.319000","2026-02-24T00:00:06.320000","2026-02-24T00:00:06.321000","2026-02-24T00:00:06.322000","2026-02-24T00:00:06.323000","2026-02-24T00:00:06.324000","2026-02-24T00:00:06.325000","2026-02-24T00:00:06.327000","2026-02-24T00:00:06.328000","2026-02-24T00:00:06.329000","2026-02-24T00:00:06.330000","2026-02-24T00:00:06.331000","2026-02-24T00:00:06.332000","2026-02-24T00:00:06.333000","2026-02-24T00:00:06.334000","2026-02-24T00:00:06.336000","2026-02-24T00:00:06.337000","2026-02-24T00:00:06.338000","2026-02-24T00:00:06.339000","2026-02-24T00:00:06.340000","2026-02-24T00:00:06.342000","2026-02-24T00:00:06.343000","2026-02-24T00:00:06.344000","2026-02-24T00:00:06.345000","2026-02-24T00:00:06.346000","2026-02-24T00:00:06.348000","2026-02-24T00:00:06.349000","2026-02-24T00:00:06.350000","2026-02-24T00:00:06.351000","2026-02-24T00:00:06.352000","2026-02-24T00:00:06.353000","2026-02-24T00:00:06.354000","2026-02-24T00:00:06.356000","2026-02-24T00:00:06.357000","2026-02-24T00:00:06.358000","2026-02-24T00:00:06.359000","2026-02-24T00:00:06.360000","2026-02-24T00:00:06.361000","2026-02-24T00:00:06.363000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.296000","2026-02-24T00:00:06.297000","2026-02-24T00:00:06.298000","2026-02-24T00:00:06.300000","2026-02-24T00:00:06.301000","2026-02-24T00:00:06.302000","2026-02-24T00:00:06.303000","2026-02-24T00:00:06.304000","2026-02-24T00:00:06.305000","2026-02-24T00:00:06.306000","2026-02-24T00:00:06.307000","2026-02-24T00:00:06.309000","2026-02-24T00:00:06.310000","2026-02-24T00:00:06.311000","2026-02-24T00:00:06.312000","2026-02-24T00:00:06.313000","2026-02-24T00:00:06.314000","2026-02-24T00:00:06.315000","2026-02-24T00:00:06.316000","2026-02-24T00:00:06.318000","2026-02-24T00:00:06.319000","2026-02-24T00:00:06.320000","2026-02-24T00:00:06.321000","2026-02-24T00:00:06.322000","2026-02-24T00:00:06.323000","2026-02-24T00:00:06.324000","2026-02-24T00:00:06.325000","2026-02-24T00:00:06.327000","2026-02-24T00:00:06.328000","2026-02-24T00:00:06.329000","2026-02-24T00:00:06.330000","2026-02-24T00:00:06.331000","2026-02-24T00:00:06.332000","2026-02-24T00:00:06.333000","2026-02-24T00:00:06.335000","2026-02-24T00:00:06.336000","2026-02-24T00:00:06.337000","2026-02-24T00:00:06.338000","2026-02-24T00:00:06.339000","2026-02-24T00:00:06.340000","2026-02-24T00:00:06.342000","2026-02-24T00:00:06.343000","2026-02-24T00:00:06.344000","2026-02-24T00:00:06.345000","2026-02-24T00:00:06.346000","2026-02-24T00:00:06.348000","2026-02-24T00:00:06.349000","2026-02-24T00:00:06.350000","2026-02-24T00:00:06.351000","2026-02-24T00:00:06.352000","2026-02-24T00:00:06.353000","2026-02-24T00:00:06.354000","2026-02-24T00:00:06.356000","2026-02-24T00:00:06.357000","2026-02-24T00:00:06.358000","2026-02-24T00:00:06.359000","2026-02-24T00:00:06.360000","2026-02-24T00:00:06.361000","2026-02-24T00:00:06.363000","2026-02-24T00:00:06.364000","2026-02-24T00:00:06.365000","2026-02-24T00:00:06.366000","2026-02-24T00:00:06.367000","2026-02-24T00:00:06.368000","2026-02-24T00:00:06.370000","2026-02-24T00:00:06.371000","2026-02-24T00:00:06.372000","2026-02-24T00:00:06.373000","2026-02-24T00:00:06.374000","2026-02-24T00:00:06.375000","2026-02-24T00:00:06.377000","2026-02-24T00:00:06.378000","2026-02-24T00:00:06.379000","2026-02-24T00:00:06.380000","2026-02-24T00:00:06.381000","2026-02-24T00:00:06.382000","2026-02-24T00:00:06.384000","2026-02-24T00:00:06.385000","2026-02-24T00:00:06.386000","2026-02-24T00:00:06.387000","2026-02-24T00:00:06.388000","2026-02-24T00:00:06.390000","2026-02-24T00:00:06.391000","2026-02-24T00:00:06.392000","2026-02-24T00:00:06.393000","2026-02-24T00:00:06.394000","2026-02-24T00:00:06.395000","2026-02-24T00:00:06.396000","2026-02-24T00:00:06.398000","2026-02-24T00:00:06.399000","2026-02-24T00:00:06.400000","2026-02-24T00:00:06.401000","2026-02-24T00:00:06.402000","2026-02-24T00:00:06.403000","2026-02-24T00:00:06.405000","2026-02-24T00:00:06.406000","2026-02-24T00:00:06.409000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.413000","2026-02-24T00:00:06.414000","2026-02-24T00:00:06.416000","2026-02-24T00:00:06.417000","2026-02-24T00:00:06.418000","2026-02-24T00:00:06.420000","2026-02-24T00:00:06.421000","2026-02-24T00:00:06.423000","2026-02-24T00:00:06.424000","2026-02-24T00:00:06.425000","2026-02-24T00:00:06.426000","2026-02-24T00:00:06.428000","2026-02-24T00:00:06.429000","2026-02-24T00:00:06.431000","2026-02-24T00:00:06.432000","2026-02-24T00:00:06.433000","2026-02-24T00:00:06.435000","2026-02-24T00:00:06.436000","2026-02-24T00:00:06.438000","2026-02-24T00:00:06.439000","2026-02-24T00:00:06.441000","2026-02-24T00:00:06.442000","2026-02-24T00:00:06.443000","2026-02-24T00:00:06.445000","2026-02-24T00:00:06.446000","2026-02-24T00:00:06.447000","2026-02-24T00:00:06.449000","2026-02-24T00:00:06.450000","2026-02-24T00:00:06.451000","2026-02-24T00:00:06.453000","2026-02-24T00:00:06.454000","2026-02-24T00:00:06.456000","2026-02-24T00:00:06.457000","2026-02-24T00:00:06.458000","2026-02-24T00:00:06.460000","2026-02-24T00:00:06.461000","2026-02-24T00:00:06.462000","2026-02-24T00:00:06.464000","2026-02-24T00:00:06.465000","2026-02-24T00:00:06.467000","2026-02-24T00:00:06.468000","2026-02-24T00:00:06.469000","2026-02-24T00:00:06.471000","2026-02-24T00:00:06.472000","2026-02-24T00:00:06.474000","2026-02-24T00:00:06.475000","2026-02-24T00:00:06.476000","2026-02-24T00:00:06.478000","2026-02-24T00:00:06.479000","2026-02-24T00:00:06.480000","2026-02-24T00:00:06.482000","2026-02-24T00:00:06.483000","2026-02-24T00:00:06.485000","2026-02-24T00:00:06.486000","2026-02-24T00:00:06.487000","2026-02-24T00:00:06.489000","2026-02-24T00:00:06.490000","2026-02-24T00:00:06.492000","2026-02-24T00:00:06.493000","2026-02-24T00:00:06.494000","2026-02-24T00:00:06.496000","2026-02-24T00:00:06.497000","2026-02-24T00:00:06.499000","2026-02-24T00:00:06.500000","2026-02-24T00:00:06.501000","2026-02-24T00:00:06.503000","2026-02-24T00:00:06.511000","2026-02-24T00:00:06.512000","2026-02-24T00:00:06.513000","2026-02-24T00:00:06.514000","2026-02-24T00:00:06.516000","2026-02-24T00:00:06.517000","2026-02-24T00:00:06.518000","2026-02-24T00:00:06.520000","2026-02-24T00:00:06.521000","2026-02-24T00:00:06.522000","2026-02-24T00:00:06.524000","2026-02-24T00:00:06.525000","2026-02-24T00:00:06.527000","2026-02-24T00:00:06.528000","2026-02-24T00:00:06.530000","2026-02-24T00:00:06.531000","2026-02-24T00:00:06.533000","2026-02-24T00:00:06.534000","2026-02-24T00:00:06.536000","2026-02-24T00:00:06.537000","2026-02-24T00:00:06.540000","2026-02-24T00:00:06.541000","2026-02-24T00:00:06.542000","2026-02-24T00:00:06.543000","2026-02-24T00:00:06.545000","2026-02-24T00:00:06.546000","2026-02-24T00:00:06.548000","2026-02-24T00:00:06.549000","2026-02-24T00:00:06.551000","2026-02-24T00:00:06.552000","2026-02-24T00:00:06.557000","2026-02-24T00:00:06.558000","2026-02-24T00:00:06.560000","2026-02-24T00:00:06.561000","2026-02-24T00:00:06.563000","2026-02-24T00:00:06.564000","2026-02-24T00:00:06.565000","2026-02-24T00:00:06.567000","2026-02-24T00:00:06.568000","2026-02-24T00:00:06.570000","2026-02-24T00:00:06.571000","2026-02-24T00:00:06.573000","2026-02-24T00:00:06.574000","2026-02-24T00:00:06.575000","2026-02-24T00:00:06.577000","2026-02-24T00:00:06.578000","2026-02-24T00:00:06.580000","2026-02-24T00:00:06.581000","2026-02-24T00:00:06.583000","2026-02-24T00:00:06.584000","2026-02-24T00:00:06.585000","2026-02-24T00:00:06.587000","2026-02-24T00:00:06.588000","2026-02-24T00:00:06.590000","2026-02-24T00:00:06.591000","2026-02-24T00:00:06.593000","2026-02-24T00:00:06.594000","2026-02-24T00:00:06.596000","2026-02-24T00:00:06.597000","2026-02-24T00:00:06.598000","2026-02-24T00:00:06.600000","2026-02-24T00:00:06.601000","2026-02-24T00:00:06.603000","2026-02-24T00:00:06.604000","2026-02-24T00:00:06.606000","2026-02-24T00:00:06.607000","2026-02-24T00:00:06.608000","2026-02-24T00:00:06.610000","2026-02-24T00:00:06.611000","2026-02-24T00:00:06.613000","2026-02-24T00:00:06.614000","2026-02-24T00:00:06.616000","2026-02-24T00:00:06.617000","2026-02-24T00:00:06.618000","2026-02-24T00:00:06.620000","2026-02-24T00:00:06.621000","2026-02-24T00:00:06.623000","2026-02-24T00:00:06.624000","2026-02-24T00:00:06.626000","2026-02-24T00:00:06.627000","2026-02-24T00:00:06.629000","2026-02-24T00:00:06.630000","2026-02-24T00:00:06.631000","2026-02-24T00:00:06.633000","2026-02-24T00:00:06.634000","2026-02-24T00:00:06.636000","2026-02-24T00:00:06.637000","2026-02-24T00:00:06.639000","2026-02-24T00:00:06.640000","2026-02-24T00:00:06.641000","2026-02-24T00:00:06.643000","2026-02-24T00:00:06.644000","2026-02-24T00:00:06.646000","2026-02-24T00:00:06.647000","2026-02-24T00:00:06.649000","2026-02-24T00:00:06.650000","2026-02-24T00:00:06.651000","2026-02-24T00:00:06.653000","2026-02-24T00:00:06.654000","2026-02-24T00:00:06.656000","2026-02-24T00:00:06.657000","2026-02-24T00:00:06.659000","2026-02-24T00:00:06.660000","2026-02-24T00:00:06.662000","2026-02-24T00:00:06.663000","2026-02-24T00:00:06.664000","2026-02-24T00:00:06.666000","2026-02-24T00:00:06.667000","2026-02-24T00:00:06.669000","2026-02-24T00:00:06.670000","2026-02-24T00:00:06.672000","2026-02-24T00:00:06.673000","2026-02-24T00:00:06.675000","2026-02-24T00:00:06.676000","2026-02-24T00:00:06.678000","2026-02-24T00:00:06.679000","2026-02-24T00:00:06.680000","2026-02-24T00:00:06.682000","2026-02-24T00:00:06.683000","2026-02-24T00:00:06.685000","2026-02-24T00:00:06.686000","2026-02-24T00:00:06.694000","2026-02-24T00:00:06.695000","2026-02-24T00:00:06.696000","2026-02-24T00:00:06.697000","2026-02-24T00:00:06.698000","2026-02-24T00:00:06.705000","2026-02-24T00:00:06.706000","2026-02-24T00:00:06.707000","2026-02-24T00:00:06.709000","2026-02-24T00:00:06.710000","2026-02-24T00:00:06.712000","2026-02-24T00:00:06.713000","2026-02-24T00:00:06.715000","2026-02-24T00:00:06.716000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.722000","2026-02-24T00:00:06.724000","2026-02-24T00:00:06.725000","2026-02-24T00:00:06.726000","2026-02-24T00:00:06.728000","2026-02-24T00:00:06.729000","2026-02-24T00:00:06.731000","2026-02-24T00:00:06.732000","2026-02-24T00:00:06.733000","2026-02-24T00:00:06.735000","2026-02-24T00:00:06.736000","2026-02-24T00:00:06.738000","2026-02-24T00:00:06.739000","2026-02-24T00:00:06.741000","2026-02-24T00:00:06.742000","2026-02-24T00:00:06.744000","2026-02-24T00:00:06.745000","2026-02-24T00:00:06.746000","2026-02-24T00:00:06.748000","2026-02-24T00:00:06.749000","2026-02-24T00:00:06.751000","2026-02-24T00:00:06.753000","2026-02-24T00:00:06.754000","2026-02-24T00:00:06.755000","2026-02-24T00:00:06.757000","2026-02-24T00:00:06.759000","2026-02-24T00:00:06.760000","2026-02-24T00:00:06.761000","2026-02-24T00:00:06.763000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.411000","2026-02-24T00:00:06.413000","2026-02-24T00:00:06.414000","2026-02-24T00:00:06.416000","2026-02-24T00:00:06.417000","2026-02-24T00:00:06.418000","2026-02-24T00:00:06.420000","2026-02-24T00:00:06.421000","2026-02-24T00:00:06.422000","2026-02-24T00:00:06.424000","2026-02-24T00:00:06.425000","2026-02-24T00:00:06.426000","2026-02-24T00:00:06.428000","2026-02-24T00:00:06.429000","2026-02-24T00:00:06.431000","2026-02-24T00:00:06.432000","2026-02-24T00:00:06.433000","2026-02-24T00:00:06.435000","2026-02-24T00:00:06.436000","2026-02-24T00:00:06.438000","2026-02-24T00:00:06.439000","2026-02-24T00:00:06.441000","2026-02-24T00:00:06.442000","2026-02-24T00:00:06.443000","2026-02-24T00:00:06.445000","2026-02-24T00:00:06.446000","2026-02-24T00:00:06.447000","2026-02-24T00:00:06.449000","2026-02-24T00:00:06.450000","2026-02-24T00:00:06.451000","2026-02-24T00:00:06.453000","2026-02-24T00:00:06.454000","2026-02-24T00:00:06.456000","2026-02-24T00:00:06.457000","2026-02-24T00:00:06.458000","2026-02-24T00:00:06.460000","2026-02-24T00:00:06.461000","2026-02-24T00:00:06.462000","2026-02-24T00:00:06.464000","2026-02-24T00:00:06.465000","2026-02-24T00:00:06.467000","2026-02-24T00:00:06.468000","2026-02-24T00:00:06.469000","2026-02-24T00:00:06.471000","2026-02-24T00:00:06.472000","2026-02-24T00:00:06.474000","2026-02-24T00:00:06.475000","2026-02-24T00:00:06.476000","2026-02-24T00:00:06.478000","2026-02-24T00:00:06.479000","2026-02-24T00:00:06.480000","2026-02-24T00:00:06.482000","2026-02-24T00:00:06.483000","2026-02-24T00:00:06.484000","2026-02-24T00:00:06.486000","2026-02-24T00:00:06.487000","2026-02-24T00:00:06.489000","2026-02-24T00:00:06.490000","2026-02-24T00:00:06.491000","2026-02-24T00:00:06.493000","2026-02-24T00:00:06.494000","2026-02-24T00:00:06.496000","2026-02-24T00:00:06.497000","2026-02-24T00:00:06.499000","2026-02-24T00:00:06.500000","2026-02-24T00:00:06.501000","2026-02-24T00:00:06.503000","2026-02-24T00:00:06.511000","2026-02-24T00:00:06.512000","2026-02-24T00:00:06.513000","2026-02-24T00:00:06.514000","2026-02-24T00:00:06.516000","2026-02-24T00:00:06.517000","2026-02-24T00:00:06.518000","2026-02-24T00:00:06.520000","2026-02-24T00:00:06.521000","2026-02-24T00:00:06.522000","2026-02-24T00:00:06.524000","2026-02-24T00:00:06.525000","2026-02-24T00:00:06.527000","2026-02-24T00:00:06.528000","2026-02-24T00:00:06.530000","2026-02-24T00:00:06.531000","2026-02-24T00:00:06.532000","2026-02-24T00:00:06.534000","2026-02-24T00:00:06.536000","2026-02-24T00:00:06.537000","2026-02-24T00:00:06.538000","2026-02-24T00:00:06.540000","2026-02-24T00:00:06.541000","2026-02-24T00:00:06.542000","2026-02-24T00:00:06.543000","2026-02-24T00:00:06.545000","2026-02-24T00:00:06.546000","2026-02-24T00:00:06.548000","2026-02-24T00:00:06.549000","2026-02-24T00:00:06.550000","2026-02-24T00:00:06.552000","2026-02-24T00:00:06.557000","2026-02-24T00:00:06.558000","2026-02-24T00:00:06.560000","2026-02-24T00:00:06.561000","2026-02-24T00:00:06.563000","2026-02-24T00:00:06.564000","2026-02-24T00:00:06.565000","2026-02-24T00:00:06.567000","2026-02-24T00:00:06.568000","2026-02-24T00:00:06.570000","2026-02-24T00:00:06.571000","2026-02-24T00:00:06.573000","2026-02-24T00:00:06.574000","2026-02-24T00:00:06.575000","2026-02-24T00:00:06.577000","2026-02-24T00:00:06.578000","2026-02-24T00:00:06.580000","2026-02-24T00:00:06.581000","2026-02-24T00:00:06.582000","2026-02-24T00:00:06.584000","2026-02-24T00:00:06.585000","2026-02-24T00:00:06.587000","2026-02-24T00:00:06.588000","2026-02-24T00:00:06.590000","2026-02-24T00:00:06.591000","2026-02-24T00:00:06.593000","2026-02-24T00:00:06.594000","2026-02-24T00:00:06.596000","2026-02-24T00:00:06.597000","2026-02-24T00:00:06.598000","2026-02-24T00:00:06.600000","2026-02-24T00:00:06.601000","2026-02-24T00:00:06.603000","2026-02-24T00:00:06.604000","2026-02-24T00:00:06.606000","2026-02-24T00:00:06.607000","2026-02-24T00:00:06.608000","2026-02-24T00:00:06.610000","2026-02-24T00:00:06.611000","2026-02-24T00:00:06.613000","2026-02-24T00:00:06.614000","2026-02-24T00:00:06.616000","2026-02-24T00:00:06.617000","2026-02-24T00:00:06.618000","2026-02-24T00:00:06.620000","2026-02-24T00:00:06.621000","2026-02-24T00:00:06.623000","2026-02-24T00:00:06.624000","2026-02-24T00:00:06.626000","2026-02-24T00:00:06.627000","2026-02-24T00:00:06.629000","2026-02-24T00:00:06.630000","2026-02-24T00:00:06.631000","2026-02-24T00:00:06.633000","2026-02-24T00:00:06.634000","2026-02-24T00:00:06.636000","2026-02-24T00:00:06.637000","2026-02-24T00:00:06.639000","2026-02-24T00:00:06.640000","2026-02-24T00:00:06.641000","2026-02-24T00:00:06.643000","2026-02-24T00:00:06.644000","2026-02-24T00:00:06.646000","2026-02-24T00:00:06.647000","2026-02-24T00:00:06.649000","2026-02-24T00:00:06.650000","2026-02-24T00:00:06.651000","2026-02-24T00:00:06.653000","2026-02-24T00:00:06.654000","2026-02-24T00:00:06.656000","2026-02-24T00:00:06.657000","2026-02-24T00:00:06.659000","2026-02-24T00:00:06.660000","2026-02-24T00:00:06.662000","2026-02-24T00:00:06.663000","2026-02-24T00:00:06.664000","2026-02-24T00:00:06.666000","2026-02-24T00:00:06.667000","2026-02-24T00:00:06.669000","2026-02-24T00:00:06.670000","2026-02-24T00:00:06.672000","2026-02-24T00:00:06.673000","2026-02-24T00:00:06.675000","2026-02-24T00:00:06.676000","2026-02-24T00:00:06.678000","2026-02-24T00:00:06.679000","2026-02-24T00:00:06.680000","2026-02-24T00:00:06.682000","2026-02-24T00:00:06.683000","2026-02-24T00:00:06.685000","2026-02-24T00:00:06.686000","2026-02-24T00:00:06.694000","2026-02-24T00:00:06.695000","2026-02-24T00:00:06.696000","2026-02-24T00:00:06.697000","2026-02-24T00:00:06.698000","2026-02-24T00:00:06.705000","2026-02-24T00:00:06.706000","2026-02-24T00:00:06.707000","2026-02-24T00:00:06.709000","2026-02-24T00:00:06.710000","2026-02-24T00:00:06.712000","2026-02-24T00:00:06.713000","2026-02-24T00:00:06.715000","2026-02-24T00:00:06.716000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.722000","2026-02-24T00:00:06.723000","2026-02-24T00:00:06.725000","2026-02-24T00:00:06.726000","2026-02-24T00:00:06.728000","2026-02-24T00:00:06.729000","2026-02-24T00:00:06.731000","2026-02-24T00:00:06.732000","2026-02-24T00:00:06.733000","2026-02-24T00:00:06.735000","2026-02-24T00:00:06.736000","2026-02-24T00:00:06.738000","2026-02-24T00:00:06.739000","2026-02-24T00:00:06.741000","2026-02-24T00:00:06.742000","2026-02-24T00:00:06.744000","2026-02-24T00:00:06.745000","2026-02-24T00:00:06.746000","2026-02-24T00:00:06.748000","2026-02-24T00:00:06.749000","2026-02-24T00:00:06.751000","2026-02-24T00:00:06.752000","2026-02-24T00:00:06.754000","2026-02-24T00:00:06.755000","2026-02-24T00:00:06.757000","2026-02-24T00:00:06.758000","2026-02-24T00:00:06.760000","2026-02-24T00:00:06.761000","2026-02-24T00:00:06.763000","2026-02-24T00:00:06.769000","2026-02-24T00:00:06.770000","2026-02-24T00:00:06.771000","2026-02-24T00:00:06.772000","2026-02-24T00:00:06.774000","2026-02-24T00:00:06.775000","2026-02-24T00:00:06.777000","2026-02-24T00:00:06.778000","2026-02-24T00:00:06.780000","2026-02-24T00:00:06.781000","2026-02-24T00:00:06.783000","2026-02-24T00:00:06.784000","2026-02-24T00:00:06.786000","2026-02-24T00:00:06.787000","2026-02-24T00:00:06.789000","2026-02-24T00:00:06.790000","2026-02-24T00:00:06.792000","2026-02-24T00:00:06.793000","2026-02-24T00:00:06.795000","2026-02-24T00:00:06.796000","2026-02-24T00:00:06.798000","2026-02-24T00:00:06.799000","2026-02-24T00:00:06.801000","2026-02-24T00:00:06.802000","2026-02-24T00:00:06.804000","2026-02-24T00:00:06.805000","2026-02-24T00:00:06.811000","2026-02-24T00:00:06.812000","2026-02-24T00:00:06.813000","2026-02-24T00:00:06.814000","2026-02-24T00:00:06.816000","2026-02-24T00:00:06.817000","2026-02-24T00:00:06.819000","2026-02-24T00:00:06.820000","2026-02-24T00:00:06.822000","2026-02-24T00:00:06.824000","2026-02-24T00:00:06.825000","2026-02-24T00:00:06.826000","2026-02-24T00:00:06.828000","2026-02-24T00:00:06.829000","2026-02-24T00:00:06.831000","2026-02-24T00:00:06.832000","2026-02-24T00:00:06.834000","2026-02-24T00:00:06.835000","2026-02-24T00:00:06.837000","2026-02-24T00:00:06.838000","2026-02-24T00:00:06.840000","2026-02-24T00:00:06.841000","2026-02-24T00:00:06.843000","2026-02-24T00:00:06.844000","2026-02-24T00:00:06.846000","2026-02-24T00:00:06.847000","2026-02-24T00:00:06.849000","2026-02-24T00:00:06.850000","2026-02-24T00:00:06.852000","2026-02-24T00:00:06.853000","2026-02-24T00:00:06.855000","2026-02-24T00:00:06.856000","2026-02-24T00:00:06.858000","2026-02-24T00:00:06.859000","2026-02-24T00:00:06.861000","2026-02-24T00:00:06.862000","2026-02-24T00:00:06.864000","2026-02-24T00:00:06.865000","2026-02-24T00:00:06.867000","2026-02-24T00:00:06.868000","2026-02-24T00:00:06.870000","2026-02-24T00:00:06.871000","2026-02-24T00:00:06.873000","2026-02-24T00:00:06.874000","2026-02-24T00:00:06.876000","2026-02-24T00:00:06.877000","2026-02-24T00:00:06.879000","2026-02-24T00:00:06.880000","2026-02-24T00:00:06.882000","2026-02-24T00:00:06.883000","2026-02-24T00:00:06.884000","2026-02-24T00:00:06.886000","2026-02-24T00:00:06.888000","2026-02-24T00:00:06.889000","2026-02-24T00:00:06.891000","2026-02-24T00:00:06.892000","2026-02-24T00:00:06.893000","2026-02-24T00:00:06.895000","2026-02-24T00:00:06.896000","2026-02-24T00:00:06.898000","2026-02-24T00:00:06.899000","2026-02-24T00:00:06.901000","2026-02-24T00:00:06.902000","2026-02-24T00:00:06.904000","2026-02-24T00:00:06.906000","2026-02-24T00:00:06.907000","2026-02-24T00:00:06.908000","2026-02-24T00:00:06.910000","2026-02-24T00:00:06.911000","2026-02-24T00:00:06.913000","2026-02-24T00:00:06.914000","2026-02-24T00:00:06.916000","2026-02-24T00:00:06.917000","2026-02-24T00:00:06.919000","2026-02-24T00:00:06.920000","2026-02-24T00:00:06.922000","2026-02-24T00:00:06.923000","2026-02-24T00:00:06.925000","2026-02-24T00:00:06.926000","2026-02-24T00:00:06.928000","2026-02-24T00:00:06.929000","2026-02-24T00:00:06.931000","2026-02-24T00:00:06.932000","2026-02-24T00:00:06.934000","2026-02-24T00:00:06.935000","2026-02-24T00:00:06.937000","2026-02-24T00:00:06.938000","2026-02-24T00:00:06.940000","2026-02-24T00:00:06.942000","2026-02-24T00:00:06.943000","2026-02-24T00:00:06.945000","2026-02-24T00:00:06.946000","2026-02-24T00:00:06.948000","2026-02-24T00:00:06.949000","2026-02-24T00:00:06.951000","2026-02-24T00:00:06.952000","2026-02-24T00:00:06.956000","2026-02-24T00:00:06.957000","2026-02-24T00:00:06.959000","2026-02-24T00:00:06.960000","2026-02-24T00:00:06.962000","2026-02-24T00:00:06.963000","2026-02-24T00:00:06.964000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.971000","2026-02-24T00:00:06.972000","2026-02-24T00:00:06.974000","2026-02-24T00:00:06.975000","2026-02-24T00:00:06.977000","2026-02-24T00:00:06.978000","2026-02-24T00:00:06.980000","2026-02-24T00:00:06.981000","2026-02-24T00:00:06.983000","2026-02-24T00:00:06.984000","2026-02-24T00:00:06.986000","2026-02-24T00:00:06.988000","2026-02-24T00:00:06.989000","2026-02-24T00:00:06.991000","2026-02-24T00:00:06.992000","2026-02-24T00:00:06.994000","2026-02-24T00:00:06.995000","2026-02-24T00:00:06.997000","2026-02-24T00:00:06.998000","2026-02-24T00:00:07.000000","2026-02-24T00:00:07.001000","2026-02-24T00:00:07.003000","2026-02-24T00:00:07.005000","2026-02-24T00:00:07.006000","2026-02-24T00:00:07.008000","2026-02-24T00:00:07.009000","2026-02-24T00:00:07.011000","2026-02-24T00:00:07.012000","2026-02-24T00:00:07.014000","2026-02-24T00:00:07.015000","2026-02-24T00:00:07.017000","2026-02-24T00:00:07.018000","2026-02-24T00:00:07.020000","2026-02-24T00:00:07.021000","2026-02-24T00:00:07.023000","2026-02-24T00:00:07.025000","2026-02-24T00:00:07.026000","2026-02-24T00:00:07.028000","2026-02-24T00:00:07.029000","2026-02-24T00:00:07.031000","2026-02-24T00:00:07.032000","2026-02-24T00:00:07.034000","2026-02-24T00:00:07.035000","2026-02-24T00:00:07.037000","2026-02-24T00:00:07.038000","2026-02-24T00:00:07.040000","2026-02-24T00:00:07.042000","2026-02-24T00:00:07.043000","2026-02-24T00:00:07.045000","2026-02-24T00:00:07.046000","2026-02-24T00:00:07.048000","2026-02-24T00:00:07.049000","2026-02-24T00:00:07.051000","2026-02-24T00:00:07.052000","2026-02-24T00:00:07.054000","2026-02-24T00:00:07.055000","2026-02-24T00:00:07.057000","2026-02-24T00:00:07.058000","2026-02-24T00:00:07.060000","2026-02-24T00:00:07.062000","2026-02-24T00:00:07.063000","2026-02-24T00:00:07.065000","2026-02-24T00:00:07.066000","2026-02-24T00:00:07.068000","2026-02-24T00:00:07.069000","2026-02-24T00:00:07.071000","2026-02-24T00:00:07.072000","2026-02-24T00:00:07.074000","2026-02-24T00:00:07.075000","2026-02-24T00:00:07.077000","2026-02-24T00:00:07.079000","2026-02-24T00:00:07.080000","2026-02-24T00:00:07.082000","2026-02-24T00:00:07.083000","2026-02-24T00:00:07.085000","2026-02-24T00:00:07.086000","2026-02-24T00:00:07.088000","2026-02-24T00:00:07.089000","2026-02-24T00:00:07.091000","2026-02-24T00:00:07.092000","2026-02-24T00:00:07.094000","2026-02-24T00:00:07.096000","2026-02-24T00:00:07.097000","2026-02-24T00:00:07.099000","2026-02-24T00:00:07.100000","2026-02-24T00:00:07.102000","2026-02-24T00:00:07.103000","2026-02-24T00:00:07.105000","2026-02-24T00:00:07.106000","2026-02-24T00:00:07.108000","2026-02-24T00:00:07.109000","2026-02-24T00:00:07.111000","2026-02-24T00:00:07.113000","2026-02-24T00:00:07.114000","2026-02-24T00:00:07.116000","2026-02-24T00:00:07.117000","2026-02-24T00:00:07.119000","2026-02-24T00:00:07.120000","2026-02-24T00:00:07.122000","2026-02-24T00:00:07.123000","2026-02-24T00:00:07.125000","2026-02-24T00:00:07.126000","2026-02-24T00:00:07.128000","2026-02-24T00:00:07.130000","2026-02-24T00:00:07.131000","2026-02-24T00:00:07.133000","2026-02-24T00:00:07.134000","2026-02-24T00:00:07.136000","2026-02-24T00:00:07.137000","2026-02-24T00:00:07.139000","2026-02-24T00:00:07.141000","2026-02-24T00:00:07.142000","2026-02-24T00:00:07.144000","2026-02-24T00:00:07.145000","2026-02-24T00:00:07.147000","2026-02-24T00:00:07.149000","2026-02-24T00:00:07.150000","2026-02-24T00:00:07.152000","2026-02-24T00:00:07.153000","2026-02-24T00:00:07.155000","2026-02-24T00:00:07.156000","2026-02-24T00:00:07.158000","2026-02-24T00:00:07.160000","2026-02-24T00:00:07.161000","2026-02-24T00:00:07.163000","2026-02-24T00:00:07.165000","2026-02-24T00:00:07.166000","2026-02-24T00:00:07.168000","2026-02-24T00:00:07.169000","2026-02-24T00:00:07.171000","2026-02-24T00:00:07.172000","2026-02-24T00:00:07.177000","2026-02-24T00:00:07.178000","2026-02-24T00:00:07.179000","2026-02-24T00:00:07.181000","2026-02-24T00:00:07.182000","2026-02-24T00:00:07.184000","2026-02-24T00:00:07.185000","2026-02-24T00:00:07.189000","2026-02-24T00:00:07.192000","2026-02-24T00:00:07.193000","2026-02-24T00:00:07.195000","2026-02-24T00:00:07.196000","2026-02-24T00:00:07.198000","2026-02-24T00:00:07.200000","2026-02-24T00:00:07.201000","2026-02-24T00:00:07.203000","2026-02-24T00:00:07.204000","2026-02-24T00:00:07.206000","2026-02-24T00:00:07.207000","2026-02-24T00:00:07.209000","2026-02-24T00:00:07.211000","2026-02-24T00:00:07.212000","2026-02-24T00:00:07.214000","2026-02-24T00:00:07.215000","2026-02-24T00:00:07.217000","2026-02-24T00:00:07.219000","2026-02-24T00:00:07.220000","2026-02-24T00:00:07.222000","2026-02-24T00:00:07.223000","2026-02-24T00:00:07.225000","2026-02-24T00:00:07.226000","2026-02-24T00:00:07.228000","2026-02-24T00:00:07.230000","2026-02-24T00:00:07.232000","2026-02-24T00:00:07.233000","2026-02-24T00:00:07.235000","2026-02-24T00:00:07.236000","2026-02-24T00:00:07.238000","2026-02-24T00:00:07.239000","2026-02-24T00:00:07.241000","2026-02-24T00:00:07.242000","2026-02-24T00:00:07.244000","2026-02-24T00:00:07.246000","2026-02-24T00:00:07.247000","2026-02-24T00:00:07.249000","2026-02-24T00:00:07.251000","2026-02-24T00:00:07.252000","2026-02-24T00:00:07.254000","2026-02-24T00:00:07.255000","2026-02-24T00:00:07.257000","2026-02-24T00:00:07.259000","2026-02-24T00:00:07.260000","2026-02-24T00:00:07.262000","2026-02-24T00:00:07.263000","2026-02-24T00:00:07.265000","2026-02-24T00:00:07.267000","2026-02-24T00:00:07.268000","2026-02-24T00:00:07.270000","2026-02-24T00:00:07.271000","2026-02-24T00:00:07.273000","2026-02-24T00:00:07.274000","2026-02-24T00:00:07.276000","2026-02-24T00:00:07.278000","2026-02-24T00:00:07.279000","2026-02-24T00:00:07.281000","2026-02-24T00:00:07.283000","2026-02-24T00:00:07.284000","2026-02-24T00:00:07.286000","2026-02-24T00:00:07.287000","2026-02-24T00:00:07.289000","2026-02-24T00:00:07.291000","2026-02-24T00:00:07.292000","2026-02-24T00:00:07.294000","2026-02-24T00:00:07.295000","2026-02-24T00:00:07.297000","2026-02-24T00:00:07.299000","2026-02-24T00:00:07.300000","2026-02-24T00:00:07.302000","2026-02-24T00:00:07.303000","2026-02-24T00:00:07.305000","2026-02-24T00:00:07.306000","2026-02-24T00:00:07.308000","2026-02-24T00:00:07.310000","2026-02-24T00:00:07.311000","2026-02-24T00:00:07.313000","2026-02-24T00:00:07.315000","2026-02-24T00:00:07.316000","2026-02-24T00:00:07.318000","2026-02-24T00:00:07.319000","2026-02-24T00:00:07.321000","2026-02-24T00:00:07.323000","2026-02-24T00:00:07.324000","2026-02-24T00:00:07.326000","2026-02-24T00:00:07.327000","2026-02-24T00:00:07.329000","2026-02-24T00:00:07.330000","2026-02-24T00:00:07.332000","2026-02-24T00:00:07.334000","2026-02-24T00:00:07.335000","2026-02-24T00:00:07.337000","2026-02-24T00:00:07.339000","2026-02-24T00:00:07.340000","2026-02-24T00:00:07.342000","2026-02-24T00:00:07.344000","2026-02-24T00:00:07.345000","2026-02-24T00:00:07.347000","2026-02-24T00:00:07.348000","2026-02-24T00:00:07.350000","2026-02-24T00:00:07.352000","2026-02-24T00:00:07.353000","2026-02-24T00:00:07.355000","2026-02-24T00:00:07.357000","2026-02-24T00:00:07.358000","2026-02-24T00:00:07.360000","2026-02-24T00:00:07.362000","2026-02-24T00:00:07.363000","2026-02-24T00:00:07.365000","2026-02-24T00:00:07.367000","2026-02-24T00:00:07.368000","2026-02-24T00:00:07.370000","2026-02-24T00:00:07.371000","2026-02-24T00:00:07.373000","2026-02-24T00:00:07.375000","2026-02-24T00:00:07.376000","2026-02-24T00:00:07.378000","2026-02-24T00:00:07.380000","2026-02-24T00:00:07.381000","2026-02-24T00:00:07.383000","2026-02-24T00:00:07.385000","2026-02-24T00:00:07.386000","2026-02-24T00:00:07.388000","2026-02-24T00:00:07.390000","2026-02-24T00:00:07.391000","2026-02-24T00:00:07.393000","2026-02-24T00:00:07.394000","2026-02-24T00:00:07.396000","2026-02-24T00:00:07.398000","2026-02-24T00:00:07.399000","2026-02-24T00:00:07.401000","2026-02-24T00:00:07.403000","2026-02-24T00:00:07.404000","2026-02-24T00:00:07.406000","2026-02-24T00:00:07.408000","2026-02-24T00:00:07.409000","2026-02-24T00:00:07.411000","2026-02-24T00:00:07.413000","2026-02-24T00:00:07.414000","2026-02-24T00:00:07.416000","2026-02-24T00:00:07.417000","2026-02-24T00:00:07.419000","2026-02-24T00:00:07.426000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.430000","2026-02-24T00:00:07.432000","2026-02-24T00:00:07.433000","2026-02-24T00:00:07.435000","2026-02-24T00:00:07.437000","2026-02-24T00:00:07.438000","2026-02-24T00:00:07.440000","2026-02-24T00:00:07.442000","2026-02-24T00:00:07.443000","2026-02-24T00:00:07.445000","2026-02-24T00:00:07.446000","2026-02-24T00:00:07.448000","2026-02-24T00:00:07.450000","2026-02-24T00:00:07.452000","2026-02-24T00:00:07.453000","2026-02-24T00:00:07.455000","2026-02-24T00:00:07.457000","2026-02-24T00:00:07.458000","2026-02-24T00:00:07.460000","2026-02-24T00:00:07.461000","2026-02-24T00:00:07.469000","2026-02-24T00:00:07.470000","2026-02-24T00:00:07.471000","2026-02-24T00:00:07.473000","2026-02-24T00:00:07.475000","2026-02-24T00:00:07.476000","2026-02-24T00:00:07.478000","2026-02-24T00:00:07.480000","2026-02-24T00:00:07.481000","2026-02-24T00:00:07.483000","2026-02-24T00:00:07.484000","2026-02-24T00:00:07.486000","2026-02-24T00:00:07.488000","2026-02-24T00:00:07.489000","2026-02-24T00:00:07.491000","2026-02-24T00:00:07.493000","2026-02-24T00:00:07.494000","2026-02-24T00:00:07.496000","2026-02-24T00:00:07.498000","2026-02-24T00:00:07.499000","2026-02-24T00:00:07.501000","2026-02-24T00:00:07.503000","2026-02-24T00:00:07.504000","2026-02-24T00:00:07.506000","2026-02-24T00:00:07.507000","2026-02-24T00:00:07.509000","2026-02-24T00:00:07.511000","2026-02-24T00:00:07.512000","2026-02-24T00:00:07.514000","2026-02-24T00:00:07.516000","2026-02-24T00:00:07.517000","2026-02-24T00:00:07.519000","2026-02-24T00:00:07.521000","2026-02-24T00:00:07.522000","2026-02-24T00:00:07.524000","2026-02-24T00:00:07.530000","2026-02-24T00:00:07.531000","2026-02-24T00:00:07.532000","2026-02-24T00:00:07.534000","2026-02-24T00:00:07.536000","2026-02-24T00:00:07.543000","2026-02-24T00:00:07.545000","2026-02-24T00:00:07.546000","2026-02-24T00:00:07.547000","2026-02-24T00:00:07.549000","2026-02-24T00:00:07.550000","2026-02-24T00:00:07.552000","2026-02-24T00:00:07.553000","2026-02-24T00:00:07.555000","2026-02-24T00:00:07.557000","2026-02-24T00:00:07.559000","2026-02-24T00:00:07.560000","2026-02-24T00:00:07.562000","2026-02-24T00:00:07.564000","2026-02-24T00:00:07.565000","2026-02-24T00:00:07.567000","2026-02-24T00:00:07.569000","2026-02-24T00:00:07.570000","2026-02-24T00:00:07.572000","2026-02-24T00:00:07.574000","2026-02-24T00:00:07.576000","2026-02-24T00:00:07.577000","2026-02-24T00:00:07.579000","2026-02-24T00:00:07.581000","2026-02-24T00:00:07.584000","2026-02-24T00:00:07.586000","2026-02-24T00:00:07.587000","2026-02-24T00:00:07.595000","2026-02-24T00:00:07.597000","2026-02-24T00:00:07.598000","2026-02-24T00:00:07.599000","2026-02-24T00:00:07.601000","2026-02-24T00:00:07.603000","2026-02-24T00:00:07.604000","2026-02-24T00:00:07.606000","2026-02-24T00:00:06.442000","2026-02-24T00:00:06.443000","2026-02-24T00:00:06.444000","2026-02-24T00:00:06.446000","2026-02-24T00:00:06.447000","2026-02-24T00:00:06.449000","2026-02-24T00:00:06.450000","2026-02-24T00:00:06.451000","2026-02-24T00:00:06.512000","2026-02-24T00:00:06.513000","2026-02-24T00:00:06.514000","2026-02-24T00:00:06.516000","2026-02-24T00:00:06.517000","2026-02-24T00:00:06.518000","2026-02-24T00:00:06.520000","2026-02-24T00:00:06.521000","2026-02-24T00:00:06.523000","2026-02-24T00:00:06.524000","2026-02-24T00:00:06.525000","2026-02-24T00:00:06.527000","2026-02-24T00:00:06.528000","2026-02-24T00:00:06.530000","2026-02-24T00:00:06.531000","2026-02-24T00:00:06.533000","2026-02-24T00:00:06.534000","2026-02-24T00:00:06.536000","2026-02-24T00:00:06.537000","2026-02-24T00:00:06.540000","2026-02-24T00:00:06.541000","2026-02-24T00:00:06.542000","2026-02-24T00:00:06.543000","2026-02-24T00:00:06.545000","2026-02-24T00:00:06.546000","2026-02-24T00:00:06.548000","2026-02-24T00:00:06.549000","2026-02-24T00:00:06.551000","2026-02-24T00:00:06.552000","2026-02-24T00:00:06.558000","2026-02-24T00:00:06.558000","2026-02-24T00:00:06.560000","2026-02-24T00:00:06.561000","2026-02-24T00:00:06.563000","2026-02-24T00:00:06.564000","2026-02-24T00:00:06.565000","2026-02-24T00:00:06.567000","2026-02-24T00:00:06.568000","2026-02-24T00:00:06.570000","2026-02-24T00:00:06.571000","2026-02-24T00:00:06.573000","2026-02-24T00:00:06.574000","2026-02-24T00:00:06.575000","2026-02-24T00:00:06.577000","2026-02-24T00:00:06.578000","2026-02-24T00:00:06.580000","2026-02-24T00:00:06.581000","2026-02-24T00:00:06.583000","2026-02-24T00:00:06.584000","2026-02-24T00:00:06.585000","2026-02-24T00:00:06.587000","2026-02-24T00:00:06.588000","2026-02-24T00:00:06.590000","2026-02-24T00:00:06.591000","2026-02-24T00:00:06.593000","2026-02-24T00:00:06.594000","2026-02-24T00:00:06.596000","2026-02-24T00:00:06.597000","2026-02-24T00:00:06.598000","2026-02-24T00:00:06.600000","2026-02-24T00:00:06.601000","2026-02-24T00:00:06.603000","2026-02-24T00:00:06.604000","2026-02-24T00:00:06.606000","2026-02-24T00:00:06.607000","2026-02-24T00:00:06.608000","2026-02-24T00:00:06.610000","2026-02-24T00:00:06.611000","2026-02-24T00:00:06.613000","2026-02-24T00:00:06.614000","2026-02-24T00:00:06.616000","2026-02-24T00:00:06.617000","2026-02-24T00:00:06.618000","2026-02-24T00:00:06.620000","2026-02-24T00:00:06.621000","2026-02-24T00:00:06.623000","2026-02-24T00:00:06.624000","2026-02-24T00:00:06.626000","2026-02-24T00:00:06.627000","2026-02-24T00:00:06.629000","2026-02-24T00:00:06.630000","2026-02-24T00:00:06.631000","2026-02-24T00:00:06.633000","2026-02-24T00:00:06.634000","2026-02-24T00:00:06.636000","2026-02-24T00:00:06.637000","2026-02-24T00:00:06.639000","2026-02-24T00:00:06.640000","2026-02-24T00:00:06.642000","2026-02-24T00:00:06.643000","2026-02-24T00:00:06.644000","2026-02-24T00:00:06.646000","2026-02-24T00:00:06.647000","2026-02-24T00:00:06.649000","2026-02-24T00:00:06.650000","2026-02-24T00:00:06.652000","2026-02-24T00:00:06.653000","2026-02-24T00:00:06.654000","2026-02-24T00:00:06.656000","2026-02-24T00:00:06.657000","2026-02-24T00:00:06.659000","2026-02-24T00:00:06.660000","2026-02-24T00:00:06.662000","2026-02-24T00:00:06.663000","2026-02-24T00:00:06.664000","2026-02-24T00:00:06.666000","2026-02-24T00:00:06.667000","2026-02-24T00:00:06.669000","2026-02-24T00:00:06.670000","2026-02-24T00:00:06.672000","2026-02-24T00:00:06.673000","2026-02-24T00:00:06.675000","2026-02-24T00:00:06.676000","2026-02-24T00:00:06.678000","2026-02-24T00:00:06.679000","2026-02-24T00:00:06.680000","2026-02-24T00:00:06.682000","2026-02-24T00:00:06.683000","2026-02-24T00:00:06.685000","2026-02-24T00:00:06.686000","2026-02-24T00:00:06.540000","2026-02-24T00:00:06.541000","2026-02-24T00:00:06.542000","2026-02-24T00:00:06.543000","2026-02-24T00:00:06.545000","2026-02-24T00:00:06.558000","2026-02-24T00:00:06.559000","2026-02-24T00:00:06.561000","2026-02-24T00:00:06.563000","2026-02-24T00:00:06.564000","2026-02-24T00:00:06.565000","2026-02-24T00:00:06.567000","2026-02-24T00:00:06.568000","2026-02-24T00:00:06.570000","2026-02-24T00:00:06.571000","2026-02-24T00:00:06.572000","2026-02-24T00:00:06.574000","2026-02-24T00:00:06.575000","2026-02-24T00:00:06.577000","2026-02-24T00:00:06.578000","2026-02-24T00:00:06.580000","2026-02-24T00:00:06.581000","2026-02-24T00:00:06.582000","2026-02-24T00:00:06.584000","2026-02-24T00:00:06.585000","2026-02-24T00:00:06.587000","2026-02-24T00:00:06.588000","2026-02-24T00:00:06.590000","2026-02-24T00:00:06.591000","2026-02-24T00:00:06.593000","2026-02-24T00:00:06.594000","2026-02-24T00:00:06.595000","2026-02-24T00:00:06.597000","2026-02-24T00:00:06.598000","2026-02-24T00:00:06.600000","2026-02-24T00:00:06.601000","2026-02-24T00:00:06.603000","2026-02-24T00:00:06.604000","2026-02-24T00:00:06.605000","2026-02-24T00:00:06.607000","2026-02-24T00:00:06.608000","2026-02-24T00:00:06.610000","2026-02-24T00:00:06.611000","2026-02-24T00:00:06.613000","2026-02-24T00:00:06.614000","2026-02-24T00:00:06.615000","2026-02-24T00:00:06.617000","2026-02-24T00:00:06.618000","2026-02-24T00:00:06.620000","2026-02-24T00:00:06.621000","2026-02-24T00:00:06.623000","2026-02-24T00:00:06.624000","2026-02-24T00:00:06.626000","2026-02-24T00:00:06.627000","2026-02-24T00:00:06.628000","2026-02-24T00:00:06.630000","2026-02-24T00:00:06.631000","2026-02-24T00:00:06.633000","2026-02-24T00:00:06.634000","2026-02-24T00:00:06.636000","2026-02-24T00:00:06.637000","2026-02-24T00:00:06.638000","2026-02-24T00:00:06.640000","2026-02-24T00:00:06.641000","2026-02-24T00:00:06.643000","2026-02-24T00:00:06.644000","2026-02-24T00:00:06.646000","2026-02-24T00:00:06.647000","2026-02-24T00:00:06.648000","2026-02-24T00:00:06.650000","2026-02-24T00:00:06.651000","2026-02-24T00:00:06.653000","2026-02-24T00:00:06.654000","2026-02-24T00:00:06.656000","2026-02-24T00:00:06.657000","2026-02-24T00:00:06.659000","2026-02-24T00:00:06.660000","2026-02-24T00:00:06.662000","2026-02-24T00:00:06.663000","2026-02-24T00:00:06.664000","2026-02-24T00:00:06.666000","2026-02-24T00:00:06.667000","2026-02-24T00:00:06.669000","2026-02-24T00:00:06.670000","2026-02-24T00:00:06.672000","2026-02-24T00:00:06.673000","2026-02-24T00:00:06.675000","2026-02-24T00:00:06.676000","2026-02-24T00:00:06.678000","2026-02-24T00:00:06.679000","2026-02-24T00:00:06.680000","2026-02-24T00:00:06.682000","2026-02-24T00:00:06.683000","2026-02-24T00:00:06.685000","2026-02-24T00:00:06.686000","2026-02-24T00:00:06.694000","2026-02-24T00:00:06.695000","2026-02-24T00:00:06.695000","2026-02-24T00:00:06.697000","2026-02-24T00:00:06.698000","2026-02-24T00:00:06.705000","2026-02-24T00:00:06.706000","2026-02-24T00:00:06.707000","2026-02-24T00:00:06.709000","2026-02-24T00:00:06.710000","2026-02-24T00:00:06.712000","2026-02-24T00:00:06.713000","2026-02-24T00:00:06.714000","2026-02-24T00:00:06.716000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.722000","2026-02-24T00:00:06.723000","2026-02-24T00:00:06.725000","2026-02-24T00:00:06.726000","2026-02-24T00:00:06.728000","2026-02-24T00:00:06.729000","2026-02-24T00:00:06.730000","2026-02-24T00:00:06.732000","2026-02-24T00:00:06.733000","2026-02-24T00:00:06.735000","2026-02-24T00:00:06.736000","2026-02-24T00:00:06.738000","2026-02-24T00:00:06.739000","2026-02-24T00:00:06.740000","2026-02-24T00:00:06.742000","2026-02-24T00:00:06.743000","2026-02-24T00:00:06.745000","2026-02-24T00:00:06.746000","2026-02-24T00:00:06.748000","2026-02-24T00:00:06.749000","2026-02-24T00:00:06.751000","2026-02-24T00:00:06.752000","2026-02-24T00:00:06.754000","2026-02-24T00:00:06.755000","2026-02-24T00:00:06.757000","2026-02-24T00:00:06.758000","2026-02-24T00:00:06.760000","2026-02-24T00:00:06.761000","2026-02-24T00:00:06.763000","2026-02-24T00:00:06.769000","2026-02-24T00:00:06.770000","2026-02-24T00:00:06.771000","2026-02-24T00:00:06.772000","2026-02-24T00:00:06.774000","2026-02-24T00:00:06.775000","2026-02-24T00:00:06.777000","2026-02-24T00:00:06.778000","2026-02-24T00:00:06.780000","2026-02-24T00:00:06.781000","2026-02-24T00:00:06.783000","2026-02-24T00:00:06.784000","2026-02-24T00:00:06.786000","2026-02-24T00:00:06.787000","2026-02-24T00:00:06.789000","2026-02-24T00:00:06.790000","2026-02-24T00:00:06.791000","2026-02-24T00:00:06.793000","2026-02-24T00:00:06.795000","2026-02-24T00:00:06.796000","2026-02-24T00:00:06.798000","2026-02-24T00:00:06.799000","2026-02-24T00:00:06.801000","2026-02-24T00:00:06.802000","2026-02-24T00:00:06.804000","2026-02-24T00:00:06.805000","2026-02-24T00:00:06.811000","2026-02-24T00:00:06.812000","2026-02-24T00:00:06.813000","2026-02-24T00:00:06.814000","2026-02-24T00:00:06.816000","2026-02-24T00:00:06.817000","2026-02-24T00:00:06.819000","2026-02-24T00:00:06.820000","2026-02-24T00:00:06.822000","2026-02-24T00:00:06.824000","2026-02-24T00:00:06.825000","2026-02-24T00:00:06.826000","2026-02-24T00:00:06.828000","2026-02-24T00:00:06.829000","2026-02-24T00:00:06.831000","2026-02-24T00:00:06.832000","2026-02-24T00:00:06.834000","2026-02-24T00:00:06.835000","2026-02-24T00:00:06.837000","2026-02-24T00:00:06.838000","2026-02-24T00:00:06.840000","2026-02-24T00:00:06.841000","2026-02-24T00:00:06.843000","2026-02-24T00:00:06.844000","2026-02-24T00:00:06.846000","2026-02-24T00:00:06.847000","2026-02-24T00:00:06.849000","2026-02-24T00:00:06.850000","2026-02-24T00:00:06.852000","2026-02-24T00:00:06.853000","2026-02-24T00:00:06.855000","2026-02-24T00:00:06.856000","2026-02-24T00:00:06.858000","2026-02-24T00:00:06.859000","2026-02-24T00:00:06.861000","2026-02-24T00:00:06.862000","2026-02-24T00:00:06.864000","2026-02-24T00:00:06.865000","2026-02-24T00:00:06.867000","2026-02-24T00:00:06.868000","2026-02-24T00:00:06.870000","2026-02-24T00:00:06.871000","2026-02-24T00:00:06.873000","2026-02-24T00:00:06.874000","2026-02-24T00:00:06.876000","2026-02-24T00:00:06.877000","2026-02-24T00:00:06.879000","2026-02-24T00:00:06.880000","2026-02-24T00:00:06.881000","2026-02-24T00:00:06.883000","2026-02-24T00:00:06.884000","2026-02-24T00:00:06.886000","2026-02-24T00:00:06.887000","2026-02-24T00:00:06.889000","2026-02-24T00:00:06.890000","2026-02-24T00:00:06.892000","2026-02-24T00:00:06.893000","2026-02-24T00:00:06.895000","2026-02-24T00:00:06.896000","2026-02-24T00:00:06.898000","2026-02-24T00:00:06.899000","2026-02-24T00:00:06.901000","2026-02-24T00:00:06.902000","2026-02-24T00:00:06.904000","2026-02-24T00:00:06.905000","2026-02-24T00:00:06.907000","2026-02-24T00:00:06.908000","2026-02-24T00:00:06.910000","2026-02-24T00:00:06.911000","2026-02-24T00:00:06.913000","2026-02-24T00:00:06.914000","2026-02-24T00:00:06.916000","2026-02-24T00:00:06.917000","2026-02-24T00:00:06.919000","2026-02-24T00:00:06.920000","2026-02-24T00:00:06.922000","2026-02-24T00:00:06.923000","2026-02-24T00:00:06.925000","2026-02-24T00:00:06.926000","2026-02-24T00:00:06.928000","2026-02-24T00:00:06.929000","2026-02-24T00:00:06.931000","2026-02-24T00:00:06.932000","2026-02-24T00:00:06.934000","2026-02-24T00:00:06.935000","2026-02-24T00:00:06.937000","2026-02-24T00:00:06.938000","2026-02-24T00:00:06.940000","2026-02-24T00:00:06.942000","2026-02-24T00:00:06.943000","2026-02-24T00:00:06.945000","2026-02-24T00:00:06.946000","2026-02-24T00:00:06.947000","2026-02-24T00:00:06.949000","2026-02-24T00:00:06.951000","2026-02-24T00:00:06.952000","2026-02-24T00:00:06.956000","2026-02-24T00:00:06.957000","2026-02-24T00:00:06.958000","2026-02-24T00:00:06.960000","2026-02-24T00:00:06.962000","2026-02-24T00:00:06.963000","2026-02-24T00:00:06.964000","2026-02-24T00:00:06.968000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.971000","2026-02-24T00:00:06.972000","2026-02-24T00:00:06.974000","2026-02-24T00:00:06.975000","2026-02-24T00:00:06.977000","2026-02-24T00:00:06.978000","2026-02-24T00:00:06.980000","2026-02-24T00:00:06.981000","2026-02-24T00:00:06.983000","2026-02-24T00:00:06.984000","2026-02-24T00:00:06.986000","2026-02-24T00:00:06.987000","2026-02-24T00:00:06.989000","2026-02-24T00:00:06.991000","2026-02-24T00:00:06.992000","2026-02-24T00:00:06.994000","2026-02-24T00:00:06.995000","2026-02-24T00:00:06.997000","2026-02-24T00:00:06.998000","2026-02-24T00:00:07.000000","2026-02-24T00:00:07.001000","2026-02-24T00:00:07.003000","2026-02-24T00:00:07.004000","2026-02-24T00:00:07.006000","2026-02-24T00:00:07.008000","2026-02-24T00:00:07.009000","2026-02-24T00:00:07.011000","2026-02-24T00:00:07.012000","2026-02-24T00:00:07.014000","2026-02-24T00:00:07.015000","2026-02-24T00:00:07.017000","2026-02-24T00:00:07.018000","2026-02-24T00:00:07.020000","2026-02-24T00:00:07.021000","2026-02-24T00:00:07.023000","2026-02-24T00:00:07.025000","2026-02-24T00:00:07.026000","2026-02-24T00:00:07.028000","2026-02-24T00:00:07.029000","2026-02-24T00:00:07.031000","2026-02-24T00:00:07.032000","2026-02-24T00:00:07.034000","2026-02-24T00:00:07.035000","2026-02-24T00:00:07.037000","2026-02-24T00:00:07.038000","2026-02-24T00:00:07.040000","2026-02-24T00:00:07.042000","2026-02-24T00:00:07.043000","2026-02-24T00:00:07.045000","2026-02-24T00:00:07.046000","2026-02-24T00:00:07.048000","2026-02-24T00:00:07.049000","2026-02-24T00:00:07.051000","2026-02-24T00:00:07.052000","2026-02-24T00:00:07.054000","2026-02-24T00:00:07.055000","2026-02-24T00:00:07.057000","2026-02-24T00:00:07.058000","2026-02-24T00:00:07.060000","2026-02-24T00:00:07.062000","2026-02-24T00:00:07.063000","2026-02-24T00:00:07.065000","2026-02-24T00:00:07.066000","2026-02-24T00:00:07.068000","2026-02-24T00:00:07.069000","2026-02-24T00:00:07.071000","2026-02-24T00:00:07.072000","2026-02-24T00:00:07.074000","2026-02-24T00:00:07.075000","2026-02-24T00:00:07.077000","2026-02-24T00:00:07.079000","2026-02-24T00:00:07.080000","2026-02-24T00:00:07.082000","2026-02-24T00:00:07.083000","2026-02-24T00:00:07.085000","2026-02-24T00:00:07.086000","2026-02-24T00:00:07.088000","2026-02-24T00:00:07.089000","2026-02-24T00:00:07.091000","2026-02-24T00:00:07.092000","2026-02-24T00:00:07.094000","2026-02-24T00:00:07.095000","2026-02-24T00:00:07.097000","2026-02-24T00:00:07.099000","2026-02-24T00:00:07.100000","2026-02-24T00:00:07.102000","2026-02-24T00:00:07.103000","2026-02-24T00:00:07.105000","2026-02-24T00:00:07.106000","2026-02-24T00:00:07.108000","2026-02-24T00:00:07.109000","2026-02-24T00:00:07.111000","2026-02-24T00:00:07.113000","2026-02-24T00:00:07.114000","2026-02-24T00:00:07.116000","2026-02-24T00:00:07.117000","2026-02-24T00:00:07.119000","2026-02-24T00:00:07.120000","2026-02-24T00:00:07.122000","2026-02-24T00:00:07.123000","2026-02-24T00:00:07.125000","2026-02-24T00:00:07.126000","2026-02-24T00:00:07.128000","2026-02-24T00:00:07.129000","2026-02-24T00:00:07.131000","2026-02-24T00:00:07.133000","2026-02-24T00:00:07.134000","2026-02-24T00:00:07.136000","2026-02-24T00:00:07.137000","2026-02-24T00:00:07.139000","2026-02-24T00:00:07.140000","2026-02-24T00:00:07.142000","2026-02-24T00:00:07.144000","2026-02-24T00:00:07.145000","2026-02-24T00:00:07.147000","2026-02-24T00:00:07.149000","2026-02-24T00:00:07.150000","2026-02-24T00:00:07.152000","2026-02-24T00:00:07.153000","2026-02-24T00:00:07.155000","2026-02-24T00:00:07.156000","2026-02-24T00:00:07.158000","2026-02-24T00:00:07.160000","2026-02-24T00:00:07.161000","2026-02-24T00:00:07.163000","2026-02-24T00:00:07.165000","2026-02-24T00:00:07.166000","2026-02-24T00:00:07.168000","2026-02-24T00:00:07.169000","2026-02-24T00:00:07.171000","2026-02-24T00:00:07.172000","2026-02-24T00:00:07.177000","2026-02-24T00:00:07.178000","2026-02-24T00:00:07.179000","2026-02-24T00:00:07.180000","2026-02-24T00:00:07.182000","2026-02-24T00:00:07.184000","2026-02-24T00:00:07.185000","2026-02-24T00:00:07.189000","2026-02-24T00:00:07.192000","2026-02-24T00:00:07.193000","2026-02-24T00:00:07.195000","2026-02-24T00:00:07.196000","2026-02-24T00:00:07.198000","2026-02-24T00:00:07.199000","2026-02-24T00:00:07.201000","2026-02-24T00:00:07.203000","2026-02-24T00:00:07.204000","2026-02-24T00:00:07.206000","2026-02-24T00:00:07.207000","2026-02-24T00:00:07.209000","2026-02-24T00:00:07.211000","2026-02-24T00:00:07.212000","2026-02-24T00:00:07.214000","2026-02-24T00:00:07.215000","2026-02-24T00:00:07.217000","2026-02-24T00:00:07.219000","2026-02-24T00:00:07.220000","2026-02-24T00:00:07.222000","2026-02-24T00:00:07.223000","2026-02-24T00:00:07.225000","2026-02-24T00:00:07.226000","2026-02-24T00:00:07.228000","2026-02-24T00:00:07.230000","2026-02-24T00:00:07.231000","2026-02-24T00:00:07.233000","2026-02-24T00:00:07.235000","2026-02-24T00:00:07.236000","2026-02-24T00:00:07.238000","2026-02-24T00:00:07.239000","2026-02-24T00:00:07.241000","2026-02-24T00:00:07.242000","2026-02-24T00:00:07.244000","2026-02-24T00:00:07.246000","2026-02-24T00:00:07.247000","2026-02-24T00:00:07.249000","2026-02-24T00:00:07.251000","2026-02-24T00:00:07.252000","2026-02-24T00:00:07.254000","2026-02-24T00:00:07.255000","2026-02-24T00:00:07.257000","2026-02-24T00:00:07.258000","2026-02-24T00:00:07.260000","2026-02-24T00:00:07.262000","2026-02-24T00:00:07.263000","2026-02-24T00:00:07.265000","2026-02-24T00:00:07.267000","2026-02-24T00:00:07.268000","2026-02-24T00:00:07.270000","2026-02-24T00:00:07.271000","2026-02-24T00:00:07.273000","2026-02-24T00:00:07.274000","2026-02-24T00:00:07.276000","2026-02-24T00:00:07.278000","2026-02-24T00:00:07.279000","2026-02-24T00:00:07.281000","2026-02-24T00:00:07.282000","2026-02-24T00:00:07.284000","2026-02-24T00:00:07.286000","2026-02-24T00:00:07.287000","2026-02-24T00:00:07.289000","2026-02-24T00:00:07.291000","2026-02-24T00:00:07.292000","2026-02-24T00:00:07.294000","2026-02-24T00:00:07.295000","2026-02-24T00:00:07.297000","2026-02-24T00:00:07.298000","2026-02-24T00:00:07.300000","2026-02-24T00:00:07.302000","2026-02-24T00:00:07.303000","2026-02-24T00:00:07.305000","2026-02-24T00:00:07.306000","2026-02-24T00:00:07.308000","2026-02-24T00:00:07.310000","2026-02-24T00:00:07.311000","2026-02-24T00:00:07.313000","2026-02-24T00:00:07.314000","2026-02-24T00:00:07.316000","2026-02-24T00:00:07.318000","2026-02-24T00:00:07.319000","2026-02-24T00:00:07.321000","2026-02-24T00:00:07.322000","2026-02-24T00:00:07.324000","2026-02-24T00:00:07.326000","2026-02-24T00:00:07.327000","2026-02-24T00:00:07.329000","2026-02-24T00:00:07.330000","2026-02-24T00:00:07.332000","2026-02-24T00:00:07.334000","2026-02-24T00:00:07.335000","2026-02-24T00:00:07.337000","2026-02-24T00:00:07.339000","2026-02-24T00:00:07.340000","2026-02-24T00:00:07.342000","2026-02-24T00:00:07.343000","2026-02-24T00:00:07.345000","2026-02-24T00:00:07.347000","2026-02-24T00:00:07.348000","2026-02-24T00:00:07.350000","2026-02-24T00:00:07.352000","2026-02-24T00:00:07.353000","2026-02-24T00:00:07.355000","2026-02-24T00:00:07.357000","2026-02-24T00:00:07.358000","2026-02-24T00:00:07.360000","2026-02-24T00:00:07.362000","2026-02-24T00:00:07.363000","2026-02-24T00:00:07.365000","2026-02-24T00:00:07.366000","2026-02-24T00:00:07.368000","2026-02-24T00:00:07.370000","2026-02-24T00:00:07.371000","2026-02-24T00:00:07.373000","2026-02-24T00:00:07.375000","2026-02-24T00:00:07.376000","2026-02-24T00:00:07.378000","2026-02-24T00:00:07.380000","2026-02-24T00:00:07.381000","2026-02-24T00:00:07.383000","2026-02-24T00:00:07.385000","2026-02-24T00:00:07.386000","2026-02-24T00:00:07.388000","2026-02-24T00:00:07.390000","2026-02-24T00:00:07.391000","2026-02-24T00:00:07.393000","2026-02-24T00:00:07.394000","2026-02-24T00:00:07.396000","2026-02-24T00:00:07.398000","2026-02-24T00:00:07.399000","2026-02-24T00:00:07.401000","2026-02-24T00:00:07.403000","2026-02-24T00:00:07.404000","2026-02-24T00:00:07.406000","2026-02-24T00:00:07.408000","2026-02-24T00:00:07.409000","2026-02-24T00:00:07.411000","2026-02-24T00:00:07.412000","2026-02-24T00:00:07.414000","2026-02-24T00:00:07.416000","2026-02-24T00:00:07.417000","2026-02-24T00:00:07.419000","2026-02-24T00:00:06.695000","2026-02-24T00:00:06.696000","2026-02-24T00:00:06.697000","2026-02-24T00:00:06.698000","2026-02-24T00:00:06.706000","2026-02-24T00:00:06.707000","2026-02-24T00:00:06.709000","2026-02-24T00:00:06.710000","2026-02-24T00:00:06.712000","2026-02-24T00:00:06.713000","2026-02-24T00:00:06.715000","2026-02-24T00:00:06.716000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.720000","2026-02-24T00:00:06.722000","2026-02-24T00:00:06.724000","2026-02-24T00:00:06.725000","2026-02-24T00:00:06.726000","2026-02-24T00:00:06.728000","2026-02-24T00:00:06.729000","2026-02-24T00:00:06.731000","2026-02-24T00:00:06.732000","2026-02-24T00:00:06.733000","2026-02-24T00:00:06.735000","2026-02-24T00:00:06.736000","2026-02-24T00:00:06.738000","2026-02-24T00:00:06.739000","2026-02-24T00:00:06.741000","2026-02-24T00:00:06.742000","2026-02-24T00:00:06.744000","2026-02-24T00:00:06.745000","2026-02-24T00:00:06.747000","2026-02-24T00:00:06.748000","2026-02-24T00:00:06.749000","2026-02-24T00:00:06.751000","2026-02-24T00:00:06.753000","2026-02-24T00:00:06.754000","2026-02-24T00:00:06.755000","2026-02-24T00:00:06.757000","2026-02-24T00:00:06.759000","2026-02-24T00:00:06.760000","2026-02-24T00:00:06.761000","2026-02-24T00:00:06.763000","2026-02-24T00:00:06.769000","2026-02-24T00:00:06.770000","2026-02-24T00:00:06.771000","2026-02-24T00:00:06.772000","2026-02-24T00:00:06.774000","2026-02-24T00:00:06.775000","2026-02-24T00:00:06.777000","2026-02-24T00:00:06.778000","2026-02-24T00:00:06.780000","2026-02-24T00:00:06.781000","2026-02-24T00:00:06.783000","2026-02-24T00:00:06.784000","2026-02-24T00:00:06.786000","2026-02-24T00:00:06.787000","2026-02-24T00:00:06.789000","2026-02-24T00:00:06.790000","2026-02-24T00:00:06.792000","2026-02-24T00:00:06.793000","2026-02-24T00:00:06.795000","2026-02-24T00:00:06.796000","2026-02-24T00:00:06.798000","2026-02-24T00:00:06.799000","2026-02-24T00:00:06.801000","2026-02-24T00:00:06.802000","2026-02-24T00:00:06.804000","2026-02-24T00:00:06.805000","2026-02-24T00:00:06.811000","2026-02-24T00:00:06.812000","2026-02-24T00:00:06.813000","2026-02-24T00:00:06.814000","2026-02-24T00:00:06.816000","2026-02-24T00:00:06.817000","2026-02-24T00:00:06.819000","2026-02-24T00:00:06.820000","2026-02-24T00:00:06.822000","2026-02-24T00:00:06.824000","2026-02-24T00:00:06.825000","2026-02-24T00:00:06.827000","2026-02-24T00:00:06.828000","2026-02-24T00:00:06.829000","2026-02-24T00:00:06.831000","2026-02-24T00:00:06.832000","2026-02-24T00:00:06.834000","2026-02-24T00:00:06.835000","2026-02-24T00:00:06.837000","2026-02-24T00:00:06.838000","2026-02-24T00:00:06.840000","2026-02-24T00:00:06.841000","2026-02-24T00:00:06.843000","2026-02-24T00:00:06.844000","2026-02-24T00:00:06.846000","2026-02-24T00:00:06.847000","2026-02-24T00:00:06.849000","2026-02-24T00:00:06.850000","2026-02-24T00:00:06.852000","2026-02-24T00:00:06.853000","2026-02-24T00:00:06.855000","2026-02-24T00:00:06.856000","2026-02-24T00:00:06.858000","2026-02-24T00:00:06.859000","2026-02-24T00:00:06.861000","2026-02-24T00:00:06.862000","2026-02-24T00:00:06.864000","2026-02-24T00:00:06.865000","2026-02-24T00:00:06.867000","2026-02-24T00:00:06.868000","2026-02-24T00:00:06.870000","2026-02-24T00:00:06.871000","2026-02-24T00:00:06.873000","2026-02-24T00:00:06.874000","2026-02-24T00:00:06.876000","2026-02-24T00:00:06.877000","2026-02-24T00:00:06.879000","2026-02-24T00:00:06.880000","2026-02-24T00:00:06.882000","2026-02-24T00:00:06.883000","2026-02-24T00:00:06.884000","2026-02-24T00:00:06.886000","2026-02-24T00:00:06.888000","2026-02-24T00:00:06.889000","2026-02-24T00:00:06.891000","2026-02-24T00:00:06.892000","2026-02-24T00:00:06.893000","2026-02-24T00:00:06.895000","2026-02-24T00:00:06.896000","2026-02-24T00:00:06.898000","2026-02-24T00:00:06.899000","2026-02-24T00:00:06.901000","2026-02-24T00:00:06.902000","2026-02-24T00:00:06.904000","2026-02-24T00:00:06.906000","2026-02-24T00:00:06.907000","2026-02-24T00:00:06.908000","2026-02-24T00:00:06.910000","2026-02-24T00:00:06.911000","2026-02-24T00:00:06.913000","2026-02-24T00:00:06.914000","2026-02-24T00:00:06.916000","2026-02-24T00:00:06.917000","2026-02-24T00:00:06.919000","2026-02-24T00:00:06.920000","2026-02-24T00:00:06.922000","2026-02-24T00:00:06.923000","2026-02-24T00:00:06.925000","2026-02-24T00:00:06.926000","2026-02-24T00:00:06.928000","2026-02-24T00:00:06.929000","2026-02-24T00:00:06.931000","2026-02-24T00:00:06.932000","2026-02-24T00:00:06.934000","2026-02-24T00:00:06.935000","2026-02-24T00:00:06.937000","2026-02-24T00:00:06.938000","2026-02-24T00:00:06.940000","2026-02-24T00:00:06.942000","2026-02-24T00:00:06.943000","2026-02-24T00:00:06.945000","2026-02-24T00:00:06.946000","2026-02-24T00:00:06.948000","2026-02-24T00:00:06.949000","2026-02-24T00:00:06.951000","2026-02-24T00:00:06.952000","2026-02-24T00:00:06.956000","2026-02-24T00:00:06.957000","2026-02-24T00:00:06.959000","2026-02-24T00:00:06.960000","2026-02-24T00:00:06.962000","2026-02-24T00:00:06.963000","2026-02-24T00:00:06.964000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.971000","2026-02-24T00:00:06.972000","2026-02-24T00:00:06.974000","2026-02-24T00:00:06.976000","2026-02-24T00:00:06.977000","2026-02-24T00:00:06.978000","2026-02-24T00:00:06.980000","2026-02-24T00:00:06.981000","2026-02-24T00:00:06.983000","2026-02-24T00:00:06.984000","2026-02-24T00:00:06.986000","2026-02-24T00:00:06.988000","2026-02-24T00:00:06.989000","2026-02-24T00:00:06.991000","2026-02-24T00:00:06.992000","2026-02-24T00:00:06.994000","2026-02-24T00:00:06.995000","2026-02-24T00:00:06.997000","2026-02-24T00:00:06.998000","2026-02-24T00:00:07.000000","2026-02-24T00:00:07.001000","2026-02-24T00:00:07.003000","2026-02-24T00:00:07.005000","2026-02-24T00:00:07.006000","2026-02-24T00:00:07.008000","2026-02-24T00:00:07.009000","2026-02-24T00:00:07.011000","2026-02-24T00:00:07.012000","2026-02-24T00:00:07.014000","2026-02-24T00:00:07.015000","2026-02-24T00:00:07.017000","2026-02-24T00:00:07.018000","2026-02-24T00:00:07.020000","2026-02-24T00:00:07.021000","2026-02-24T00:00:07.023000","2026-02-24T00:00:07.025000","2026-02-24T00:00:07.026000","2026-02-24T00:00:07.028000","2026-02-24T00:00:07.029000","2026-02-24T00:00:07.031000","2026-02-24T00:00:07.032000","2026-02-24T00:00:07.034000","2026-02-24T00:00:07.035000","2026-02-24T00:00:07.037000","2026-02-24T00:00:07.038000","2026-02-24T00:00:07.040000","2026-02-24T00:00:07.042000","2026-02-24T00:00:07.043000","2026-02-24T00:00:07.045000","2026-02-24T00:00:07.046000","2026-02-24T00:00:07.048000","2026-02-24T00:00:07.049000","2026-02-24T00:00:07.051000","2026-02-24T00:00:07.052000","2026-02-24T00:00:07.054000","2026-02-24T00:00:07.055000","2026-02-24T00:00:07.057000","2026-02-24T00:00:07.058000","2026-02-24T00:00:07.060000","2026-02-24T00:00:07.062000","2026-02-24T00:00:07.063000","2026-02-24T00:00:07.065000","2026-02-24T00:00:07.066000","2026-02-24T00:00:07.068000","2026-02-24T00:00:07.069000","2026-02-24T00:00:07.071000","2026-02-24T00:00:07.072000","2026-02-24T00:00:07.074000","2026-02-24T00:00:07.075000","2026-02-24T00:00:07.077000","2026-02-24T00:00:07.079000","2026-02-24T00:00:07.080000","2026-02-24T00:00:07.082000","2026-02-24T00:00:07.083000","2026-02-24T00:00:07.085000","2026-02-24T00:00:07.086000","2026-02-24T00:00:07.088000","2026-02-24T00:00:07.089000","2026-02-24T00:00:07.091000","2026-02-24T00:00:07.092000","2026-02-24T00:00:07.094000","2026-02-24T00:00:07.096000","2026-02-24T00:00:07.097000","2026-02-24T00:00:07.099000","2026-02-24T00:00:07.100000","2026-02-24T00:00:07.102000","2026-02-24T00:00:07.103000","2026-02-24T00:00:07.105000","2026-02-24T00:00:07.106000","2026-02-24T00:00:07.108000","2026-02-24T00:00:07.109000","2026-02-24T00:00:07.111000","2026-02-24T00:00:07.113000","2026-02-24T00:00:07.114000","2026-02-24T00:00:07.116000","2026-02-24T00:00:07.117000","2026-02-24T00:00:07.119000","2026-02-24T00:00:07.120000","2026-02-24T00:00:07.122000","2026-02-24T00:00:07.123000","2026-02-24T00:00:07.125000","2026-02-24T00:00:07.126000","2026-02-24T00:00:07.128000","2026-02-24T00:00:07.130000","2026-02-24T00:00:07.131000","2026-02-24T00:00:07.133000","2026-02-24T00:00:07.134000","2026-02-24T00:00:07.136000","2026-02-24T00:00:07.137000","2026-02-24T00:00:07.139000","2026-02-24T00:00:07.141000","2026-02-24T00:00:07.142000","2026-02-24T00:00:07.144000","2026-02-24T00:00:07.145000","2026-02-24T00:00:07.147000","2026-02-24T00:00:07.149000","2026-02-24T00:00:07.150000","2026-02-24T00:00:07.152000","2026-02-24T00:00:07.153000","2026-02-24T00:00:07.155000","2026-02-24T00:00:07.157000","2026-02-24T00:00:07.158000","2026-02-24T00:00:07.160000","2026-02-24T00:00:07.161000","2026-02-24T00:00:07.163000","2026-02-24T00:00:07.165000","2026-02-24T00:00:07.166000","2026-02-24T00:00:07.168000","2026-02-24T00:00:07.169000","2026-02-24T00:00:07.171000","2026-02-24T00:00:07.172000","2026-02-24T00:00:07.177000","2026-02-24T00:00:07.178000","2026-02-24T00:00:07.179000","2026-02-24T00:00:07.181000","2026-02-24T00:00:07.182000","2026-02-24T00:00:07.184000","2026-02-24T00:00:07.185000","2026-02-24T00:00:06.770000","2026-02-24T00:00:06.771000","2026-02-24T00:00:06.772000","2026-02-24T00:00:06.774000","2026-02-24T00:00:06.775000","2026-02-24T00:00:06.777000","2026-02-24T00:00:06.778000","2026-02-24T00:00:06.780000","2026-02-24T00:00:06.781000","2026-02-24T00:00:06.783000","2026-02-24T00:00:06.784000","2026-02-24T00:00:06.786000","2026-02-24T00:00:06.787000","2026-02-24T00:00:06.789000","2026-02-24T00:00:06.790000","2026-02-24T00:00:06.792000","2026-02-24T00:00:06.793000","2026-02-24T00:00:06.795000","2026-02-24T00:00:06.796000","2026-02-24T00:00:06.798000","2026-02-24T00:00:06.799000","2026-02-24T00:00:06.801000","2026-02-24T00:00:06.802000","2026-02-24T00:00:06.804000","2026-02-24T00:00:06.805000","2026-02-24T00:00:06.812000","2026-02-24T00:00:06.813000","2026-02-24T00:00:06.814000","2026-02-24T00:00:06.816000","2026-02-24T00:00:06.817000","2026-02-24T00:00:06.819000","2026-02-24T00:00:06.820000","2026-02-24T00:00:06.822000","2026-02-24T00:00:06.824000","2026-02-24T00:00:06.825000","2026-02-24T00:00:06.827000","2026-02-24T00:00:06.828000","2026-02-24T00:00:06.829000","2026-02-24T00:00:06.831000","2026-02-24T00:00:06.832000","2026-02-24T00:00:06.834000","2026-02-24T00:00:06.835000","2026-02-24T00:00:06.837000","2026-02-24T00:00:06.838000","2026-02-24T00:00:06.840000","2026-02-24T00:00:06.841000","2026-02-24T00:00:06.843000","2026-02-24T00:00:06.844000","2026-02-24T00:00:06.846000","2026-02-24T00:00:06.847000","2026-02-24T00:00:06.849000","2026-02-24T00:00:06.850000","2026-02-24T00:00:06.852000","2026-02-24T00:00:06.853000","2026-02-24T00:00:06.855000","2026-02-24T00:00:06.856000","2026-02-24T00:00:06.858000","2026-02-24T00:00:06.859000","2026-02-24T00:00:06.861000","2026-02-24T00:00:06.862000","2026-02-24T00:00:06.864000","2026-02-24T00:00:06.865000","2026-02-24T00:00:06.867000","2026-02-24T00:00:06.868000","2026-02-24T00:00:06.870000","2026-02-24T00:00:06.871000","2026-02-24T00:00:06.873000","2026-02-24T00:00:06.874000","2026-02-24T00:00:06.876000","2026-02-24T00:00:06.877000","2026-02-24T00:00:06.879000","2026-02-24T00:00:06.880000","2026-02-24T00:00:06.882000","2026-02-24T00:00:06.883000","2026-02-24T00:00:06.885000","2026-02-24T00:00:06.886000","2026-02-24T00:00:06.888000","2026-02-24T00:00:06.889000","2026-02-24T00:00:06.891000","2026-02-24T00:00:06.892000","2026-02-24T00:00:06.894000","2026-02-24T00:00:06.895000","2026-02-24T00:00:06.896000","2026-02-24T00:00:06.898000","2026-02-24T00:00:06.899000","2026-02-24T00:00:06.901000","2026-02-24T00:00:06.902000","2026-02-24T00:00:06.904000","2026-02-24T00:00:06.906000","2026-02-24T00:00:06.907000","2026-02-24T00:00:06.909000","2026-02-24T00:00:06.910000","2026-02-24T00:00:06.912000","2026-02-24T00:00:06.913000","2026-02-24T00:00:06.914000","2026-02-24T00:00:06.916000","2026-02-24T00:00:06.917000","2026-02-24T00:00:06.919000","2026-02-24T00:00:06.920000","2026-02-24T00:00:06.922000","2026-02-24T00:00:06.923000","2026-02-24T00:00:06.925000","2026-02-24T00:00:06.926000","2026-02-24T00:00:06.928000","2026-02-24T00:00:06.929000","2026-02-24T00:00:06.931000","2026-02-24T00:00:06.932000","2026-02-24T00:00:06.934000","2026-02-24T00:00:06.935000","2026-02-24T00:00:06.937000","2026-02-24T00:00:06.938000","2026-02-24T00:00:06.940000","2026-02-24T00:00:06.942000","2026-02-24T00:00:06.943000","2026-02-24T00:00:06.945000","2026-02-24T00:00:06.946000","2026-02-24T00:00:06.948000","2026-02-24T00:00:06.949000","2026-02-24T00:00:06.951000","2026-02-24T00:00:06.952000","2026-02-24T00:00:06.956000","2026-02-24T00:00:06.957000","2026-02-24T00:00:06.959000","2026-02-24T00:00:06.960000","2026-02-24T00:00:06.962000","2026-02-24T00:00:06.963000","2026-02-24T00:00:06.967000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.969000","2026-02-24T00:00:06.971000","2026-02-24T00:00:06.972000","2026-02-24T00:00:06.974000","2026-02-24T00:00:06.976000","2026-02-24T00:00:06.977000","2026-02-24T00:00:06.978000","2026-02-24T00:00:06.980000","2026-02-24T00:00:06.981000","2026-02-24T00:00:06.983000","2026-02-24T00:00:06.984000","2026-02-24T00:00:06.986000","2026-02-24T00:00:06.988000","2026-02-24T00:00:06.989000","2026-02-24T00:00:06.991000","2026-02-24T00:00:06.992000","2026-02-24T00:00:06.994000","2026-02-24T00:00:06.996000","2026-02-24T00:00:06.997000","2026-02-24T00:00:06.998000","2026-02-24T00:00:07.000000","2026-02-24T00:00:07.001000","2026-02-24T00:00:07.003000","2026-02-24T00:00:07.005000","2026-02-24T00:00:07.006000","2026-02-24T00:00:07.008000","2026-02-24T00:00:07.009000","2026-02-24T00:00:07.011000","2026-02-24T00:00:07.012000","2026-02-24T00:00:07.014000","2026-02-24T00:00:07.015000","2026-02-24T00:00:07.017000","2026-02-24T00:00:07.018000","2026-02-24T00:00:07.020000","2026-02-24T00:00:07.022000","2026-02-24T00:00:07.023000","2026-02-24T00:00:07.025000","2026-02-24T00:00:07.026000","2026-02-24T00:00:07.028000","2026-02-24T00:00:07.029000","2026-02-24T00:00:07.031000","2026-02-24T00:00:07.033000","2026-02-24T00:00:07.034000","2026-02-24T00:00:07.035000","2026-02-24T00:00:07.037000","2026-02-24T00:00:07.038000","2026-02-24T00:00:07.040000","2026-02-24T00:00:07.042000","2026-02-24T00:00:07.043000","2026-02-24T00:00:07.045000","2026-02-24T00:00:07.046000","2026-02-24T00:00:07.048000","2026-02-24T00:00:07.049000","2026-02-24T00:00:07.051000","2026-02-24T00:00:07.052000","2026-02-24T00:00:07.054000","2026-02-24T00:00:07.055000","2026-02-24T00:00:07.057000","2026-02-24T00:00:07.059000","2026-02-24T00:00:07.060000","2026-02-24T00:00:07.062000","2026-02-24T00:00:07.063000","2026-02-24T00:00:07.065000","2026-02-24T00:00:07.066000","2026-02-24T00:00:07.068000","2026-02-24T00:00:07.069000","2026-02-24T00:00:07.071000","2026-02-24T00:00:07.072000","2026-02-24T00:00:07.074000","2026-02-24T00:00:07.075000","2026-02-24T00:00:07.077000","2026-02-24T00:00:07.079000","2026-02-24T00:00:07.080000","2026-02-24T00:00:07.082000","2026-02-24T00:00:07.083000","2026-02-24T00:00:07.085000","2026-02-24T00:00:07.086000","2026-02-24T00:00:07.088000","2026-02-24T00:00:07.089000","2026-02-24T00:00:07.091000","2026-02-24T00:00:07.092000","2026-02-24T00:00:07.094000","2026-02-24T00:00:07.096000","2026-02-24T00:00:07.097000","2026-02-24T00:00:07.099000","2026-02-24T00:00:07.100000","2026-02-24T00:00:07.102000","2026-02-24T00:00:07.103000","2026-02-24T00:00:07.105000","2026-02-24T00:00:07.106000","2026-02-24T00:00:07.108000","2026-02-24T00:00:07.110000","2026-02-24T00:00:07.111000","2026-02-24T00:00:07.113000","2026-02-24T00:00:07.114000","2026-02-24T00:00:07.116000","2026-02-24T00:00:07.117000","2026-02-24T00:00:07.119000","2026-02-24T00:00:07.120000","2026-02-24T00:00:07.122000","2026-02-24T00:00:07.123000","2026-02-24T00:00:07.125000","2026-02-24T00:00:07.127000","2026-02-24T00:00:07.128000","2026-02-24T00:00:07.130000","2026-02-24T00:00:07.131000","2026-02-24T00:00:07.133000","2026-02-24T00:00:07.134000","2026-02-24T00:00:07.136000","2026-02-24T00:00:07.137000","2026-02-24T00:00:07.139000","2026-02-24T00:00:07.141000","2026-02-24T00:00:07.142000","2026-02-24T00:00:07.144000","2026-02-24T00:00:07.145000","2026-02-24T00:00:07.147000","2026-02-24T00:00:07.149000","2026-02-24T00:00:07.150000","2026-02-24T00:00:07.152000","2026-02-24T00:00:07.153000","2026-02-24T00:00:07.155000","2026-02-24T00:00:07.157000","2026-02-24T00:00:07.158000","2026-02-24T00:00:07.160000","2026-02-24T00:00:07.161000","2026-02-24T00:00:07.163000","2026-02-24T00:00:07.165000","2026-02-24T00:00:07.166000","2026-02-24T00:00:07.168000","2026-02-24T00:00:07.169000","2026-02-24T00:00:07.171000","2026-02-24T00:00:07.173000","2026-02-24T00:00:07.177000","2026-02-24T00:00:07.178000","2026-02-24T00:00:07.179000","2026-02-24T00:00:07.181000","2026-02-24T00:00:07.182000","2026-02-24T00:00:07.184000","2026-02-24T00:00:07.185000","2026-02-24T00:00:07.189000","2026-02-24T00:00:07.192000","2026-02-24T00:00:07.193000","2026-02-24T00:00:07.195000","2026-02-24T00:00:07.196000","2026-02-24T00:00:07.198000","2026-02-24T00:00:07.200000","2026-02-24T00:00:07.201000","2026-02-24T00:00:07.203000","2026-02-24T00:00:07.204000","2026-02-24T00:00:07.206000","2026-02-24T00:00:07.207000","2026-02-24T00:00:07.209000","2026-02-24T00:00:07.211000","2026-02-24T00:00:07.212000","2026-02-24T00:00:07.214000","2026-02-24T00:00:07.216000","2026-02-24T00:00:07.217000","2026-02-24T00:00:07.219000","2026-02-24T00:00:07.220000","2026-02-24T00:00:07.222000","2026-02-24T00:00:07.223000","2026-02-24T00:00:07.225000","2026-02-24T00:00:07.227000","2026-02-24T00:00:07.228000","2026-02-24T00:00:07.230000","2026-02-24T00:00:07.232000","2026-02-24T00:00:07.233000","2026-02-24T00:00:07.235000","2026-02-24T00:00:07.236000","2026-02-24T00:00:07.238000","2026-02-24T00:00:07.239000","2026-02-24T00:00:07.241000","2026-02-24T00:00:07.243000","2026-02-24T00:00:07.244000","2026-02-24T00:00:07.246000","2026-02-24T00:00:07.247000","2026-02-24T00:00:07.249000","2026-02-24T00:00:07.251000","2026-02-24T00:00:07.252000","2026-02-24T00:00:07.254000","2026-02-24T00:00:07.255000","2026-02-24T00:00:07.257000","2026-02-24T00:00:07.259000","2026-02-24T00:00:07.260000","2026-02-24T00:00:07.262000","2026-02-24T00:00:07.263000","2026-02-24T00:00:07.265000","2026-02-24T00:00:07.267000","2026-02-24T00:00:07.268000","2026-02-24T00:00:07.270000","2026-02-24T00:00:07.271000","2026-02-24T00:00:07.273000","2026-02-24T00:00:07.275000","2026-02-24T00:00:07.276000","2026-02-24T00:00:07.278000","2026-02-24T00:00:07.279000","2026-02-24T00:00:07.281000","2026-02-24T00:00:07.283000","2026-02-24T00:00:07.284000","2026-02-24T00:00:07.286000","2026-02-24T00:00:07.287000","2026-02-24T00:00:07.289000","2026-02-24T00:00:07.291000","2026-02-24T00:00:07.292000","2026-02-24T00:00:07.294000","2026-02-24T00:00:07.295000","2026-02-24T00:00:07.297000","2026-02-24T00:00:07.299000","2026-02-24T00:00:07.300000","2026-02-24T00:00:07.302000","2026-02-24T00:00:07.303000","2026-02-24T00:00:07.305000","2026-02-24T00:00:07.307000","2026-02-24T00:00:07.308000","2026-02-24T00:00:07.310000","2026-02-24T00:00:07.311000","2026-02-24T00:00:07.313000","2026-02-24T00:00:07.315000","2026-02-24T00:00:07.316000","2026-02-24T00:00:07.318000","2026-02-24T00:00:07.319000","2026-02-24T00:00:07.321000","2026-02-24T00:00:07.323000","2026-02-24T00:00:07.324000","2026-02-24T00:00:07.326000","2026-02-24T00:00:07.327000","2026-02-24T00:00:07.329000","2026-02-24T00:00:07.331000","2026-02-24T00:00:07.332000","2026-02-24T00:00:07.334000","2026-02-24T00:00:07.335000","2026-02-24T00:00:07.337000","2026-02-24T00:00:07.339000","2026-02-24T00:00:07.340000","2026-02-24T00:00:07.342000","2026-02-24T00:00:07.344000","2026-02-24T00:00:07.345000","2026-02-24T00:00:07.347000","2026-02-24T00:00:07.349000","2026-02-24T00:00:07.350000","2026-02-24T00:00:07.352000","2026-02-24T00:00:07.354000","2026-02-24T00:00:07.355000","2026-02-24T00:00:07.357000","2026-02-24T00:00:07.358000","2026-02-24T00:00:07.360000","2026-02-24T00:00:07.362000","2026-02-24T00:00:07.363000","2026-02-24T00:00:07.365000","2026-02-24T00:00:07.367000","2026-02-24T00:00:07.368000","2026-02-24T00:00:07.370000","2026-02-24T00:00:07.372000","2026-02-24T00:00:07.373000","2026-02-24T00:00:07.375000","2026-02-24T00:00:07.376000","2026-02-24T00:00:07.378000","2026-02-24T00:00:07.380000","2026-02-24T00:00:07.381000","2026-02-24T00:00:07.383000","2026-02-24T00:00:07.385000","2026-02-24T00:00:07.386000","2026-02-24T00:00:07.388000","2026-02-24T00:00:07.390000","2026-02-24T00:00:07.391000","2026-02-24T00:00:07.393000","2026-02-24T00:00:07.394000","2026-02-24T00:00:07.396000","2026-02-24T00:00:07.398000","2026-02-24T00:00:07.399000","2026-02-24T00:00:07.401000","2026-02-24T00:00:07.403000","2026-02-24T00:00:07.404000","2026-02-24T00:00:07.406000","2026-02-24T00:00:07.408000","2026-02-24T00:00:07.409000","2026-02-24T00:00:07.411000","2026-02-24T00:00:07.413000","2026-02-24T00:00:07.414000","2026-02-24T00:00:07.416000","2026-02-24T00:00:07.418000","2026-02-24T00:00:07.419000","2026-02-24T00:00:07.426000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.429000","2026-02-24T00:00:07.430000","2026-02-24T00:00:07.432000","2026-02-24T00:00:07.433000","2026-02-24T00:00:07.435000","2026-02-24T00:00:07.437000","2026-02-24T00:00:07.439000","2026-02-24T00:00:07.440000","2026-02-24T00:00:07.442000","2026-02-24T00:00:07.443000","2026-02-24T00:00:07.445000","2026-02-24T00:00:07.447000","2026-02-24T00:00:07.448000","2026-02-24T00:00:07.450000","2026-02-24T00:00:07.452000","2026-02-24T00:00:07.453000","2026-02-24T00:00:07.455000","2026-02-24T00:00:07.457000","2026-02-24T00:00:07.459000","2026-02-24T00:00:07.460000","2026-02-24T00:00:07.461000","2026-02-24T00:00:07.192000","2026-02-24T00:00:07.193000","2026-02-24T00:00:07.195000","2026-02-24T00:00:07.196000","2026-02-24T00:00:07.198000","2026-02-24T00:00:07.200000","2026-02-24T00:00:07.201000","2026-02-24T00:00:07.203000","2026-02-24T00:00:07.204000","2026-02-24T00:00:07.206000","2026-02-24T00:00:07.207000","2026-02-24T00:00:07.209000","2026-02-24T00:00:07.211000","2026-02-24T00:00:07.212000","2026-02-24T00:00:07.214000","2026-02-24T00:00:07.215000","2026-02-24T00:00:07.217000","2026-02-24T00:00:07.219000","2026-02-24T00:00:07.220000","2026-02-24T00:00:07.222000","2026-02-24T00:00:07.223000","2026-02-24T00:00:07.225000","2026-02-24T00:00:07.227000","2026-02-24T00:00:07.228000","2026-02-24T00:00:07.230000","2026-02-24T00:00:07.232000","2026-02-24T00:00:07.233000","2026-02-24T00:00:07.235000","2026-02-24T00:00:07.236000","2026-02-24T00:00:07.238000","2026-02-24T00:00:07.239000","2026-02-24T00:00:07.241000","2026-02-24T00:00:07.243000","2026-02-24T00:00:07.244000","2026-02-24T00:00:07.246000","2026-02-24T00:00:07.247000","2026-02-24T00:00:07.249000","2026-02-24T00:00:07.251000","2026-02-24T00:00:07.252000","2026-02-24T00:00:07.254000","2026-02-24T00:00:07.255000","2026-02-24T00:00:07.257000","2026-02-24T00:00:07.259000","2026-02-24T00:00:07.260000","2026-02-24T00:00:07.262000","2026-02-24T00:00:07.263000","2026-02-24T00:00:07.265000","2026-02-24T00:00:07.267000","2026-02-24T00:00:07.268000","2026-02-24T00:00:07.270000","2026-02-24T00:00:07.271000","2026-02-24T00:00:07.273000","2026-02-24T00:00:07.275000","2026-02-24T00:00:07.276000","2026-02-24T00:00:07.278000","2026-02-24T00:00:07.279000","2026-02-24T00:00:07.281000","2026-02-24T00:00:07.283000","2026-02-24T00:00:07.284000","2026-02-24T00:00:07.286000","2026-02-24T00:00:07.287000","2026-02-24T00:00:07.289000","2026-02-24T00:00:07.291000","2026-02-24T00:00:07.292000","2026-02-24T00:00:07.294000","2026-02-24T00:00:07.295000","2026-02-24T00:00:07.297000","2026-02-24T00:00:07.299000","2026-02-24T00:00:07.300000","2026-02-24T00:00:07.302000","2026-02-24T00:00:07.303000","2026-02-24T00:00:07.305000","2026-02-24T00:00:07.306000","2026-02-24T00:00:07.308000","2026-02-24T00:00:07.310000","2026-02-24T00:00:07.311000","2026-02-24T00:00:07.313000","2026-02-24T00:00:07.315000","2026-02-24T00:00:07.316000","2026-02-24T00:00:07.318000","2026-02-24T00:00:07.319000","2026-02-24T00:00:07.321000","2026-02-24T00:00:07.323000","2026-02-24T00:00:07.324000","2026-02-24T00:00:07.326000","2026-02-24T00:00:07.327000","2026-02-24T00:00:07.329000","2026-02-24T00:00:07.331000","2026-02-24T00:00:07.332000","2026-02-24T00:00:07.334000","2026-02-24T00:00:07.335000","2026-02-24T00:00:07.337000","2026-02-24T00:00:07.339000","2026-02-24T00:00:07.340000","2026-02-24T00:00:07.342000","2026-02-24T00:00:07.344000","2026-02-24T00:00:07.345000","2026-02-24T00:00:07.347000","2026-02-24T00:00:07.349000","2026-02-24T00:00:07.350000","2026-02-24T00:00:07.352000","2026-02-24T00:00:07.353000","2026-02-24T00:00:07.355000","2026-02-24T00:00:07.357000","2026-02-24T00:00:07.358000","2026-02-24T00:00:07.360000","2026-02-24T00:00:07.362000","2026-02-24T00:00:07.363000","2026-02-24T00:00:07.365000","2026-02-24T00:00:07.367000","2026-02-24T00:00:07.368000","2026-02-24T00:00:07.370000","2026-02-24T00:00:07.372000","2026-02-24T00:00:07.373000","2026-02-24T00:00:07.375000","2026-02-24T00:00:07.376000","2026-02-24T00:00:07.378000","2026-02-24T00:00:07.380000","2026-02-24T00:00:07.381000","2026-02-24T00:00:07.383000","2026-02-24T00:00:07.385000","2026-02-24T00:00:07.386000","2026-02-24T00:00:07.388000","2026-02-24T00:00:07.390000","2026-02-24T00:00:07.391000","2026-02-24T00:00:07.393000","2026-02-24T00:00:07.394000","2026-02-24T00:00:07.396000","2026-02-24T00:00:07.398000","2026-02-24T00:00:07.399000","2026-02-24T00:00:07.401000","2026-02-24T00:00:07.403000","2026-02-24T00:00:07.404000","2026-02-24T00:00:07.406000","2026-02-24T00:00:07.408000","2026-02-24T00:00:07.409000","2026-02-24T00:00:07.411000","2026-02-24T00:00:07.413000","2026-02-24T00:00:07.414000","2026-02-24T00:00:07.416000","2026-02-24T00:00:07.418000","2026-02-24T00:00:07.419000","2026-02-24T00:00:07.426000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.430000","2026-02-24T00:00:07.432000","2026-02-24T00:00:07.433000","2026-02-24T00:00:07.435000","2026-02-24T00:00:07.437000","2026-02-24T00:00:07.439000","2026-02-24T00:00:07.440000","2026-02-24T00:00:07.442000","2026-02-24T00:00:07.443000","2026-02-24T00:00:07.445000","2026-02-24T00:00:07.447000","2026-02-24T00:00:07.448000","2026-02-24T00:00:07.450000","2026-02-24T00:00:07.452000","2026-02-24T00:00:07.453000","2026-02-24T00:00:07.455000","2026-02-24T00:00:07.457000","2026-02-24T00:00:07.459000","2026-02-24T00:00:07.460000","2026-02-24T00:00:07.461000","2026-02-24T00:00:07.469000","2026-02-24T00:00:07.470000","2026-02-24T00:00:07.472000","2026-02-24T00:00:07.473000","2026-02-24T00:00:07.475000","2026-02-24T00:00:07.476000","2026-02-24T00:00:07.478000","2026-02-24T00:00:07.480000","2026-02-24T00:00:07.481000","2026-02-24T00:00:07.483000","2026-02-24T00:00:07.485000","2026-02-24T00:00:07.486000","2026-02-24T00:00:07.488000","2026-02-24T00:00:07.490000","2026-02-24T00:00:07.491000","2026-02-24T00:00:07.493000","2026-02-24T00:00:07.495000","2026-02-24T00:00:07.496000","2026-02-24T00:00:07.498000","2026-02-24T00:00:07.499000","2026-02-24T00:00:07.501000","2026-02-24T00:00:07.503000","2026-02-24T00:00:07.504000","2026-02-24T00:00:07.506000","2026-02-24T00:00:07.508000","2026-02-24T00:00:07.509000","2026-02-24T00:00:07.511000","2026-02-24T00:00:07.513000","2026-02-24T00:00:07.514000","2026-02-24T00:00:07.516000","2026-02-24T00:00:07.518000","2026-02-24T00:00:07.519000","2026-02-24T00:00:07.521000","2026-02-24T00:00:07.522000","2026-02-24T00:00:07.524000","2026-02-24T00:00:07.428000","2026-02-24T00:00:07.429000","2026-02-24T00:00:07.430000","2026-02-24T00:00:07.432000","2026-02-24T00:00:07.433000","2026-02-24T00:00:07.435000","2026-02-24T00:00:07.437000","2026-02-24T00:00:07.439000","2026-02-24T00:00:07.440000","2026-02-24T00:00:07.442000","2026-02-24T00:00:07.443000","2026-02-24T00:00:07.445000","2026-02-24T00:00:07.447000","2026-02-24T00:00:07.448000","2026-02-24T00:00:07.450000","2026-02-24T00:00:07.452000","2026-02-24T00:00:07.453000","2026-02-24T00:00:07.455000","2026-02-24T00:00:07.457000","2026-02-24T00:00:07.459000","2026-02-24T00:00:07.460000","2026-02-24T00:00:07.461000","2026-02-24T00:00:07.469000","2026-02-24T00:00:07.470000","2026-02-24T00:00:07.472000","2026-02-24T00:00:07.473000","2026-02-24T00:00:07.475000","2026-02-24T00:00:07.476000","2026-02-24T00:00:07.478000","2026-02-24T00:00:07.480000","2026-02-24T00:00:07.481000","2026-02-24T00:00:07.483000","2026-02-24T00:00:07.485000","2026-02-24T00:00:07.486000","2026-02-24T00:00:07.488000","2026-02-24T00:00:07.490000","2026-02-24T00:00:07.491000","2026-02-24T00:00:07.493000","2026-02-24T00:00:07.495000","2026-02-24T00:00:07.496000","2026-02-24T00:00:07.498000","2026-02-24T00:00:07.499000","2026-02-24T00:00:07.501000","2026-02-24T00:00:07.503000","2026-02-24T00:00:07.504000","2026-02-24T00:00:07.506000","2026-02-24T00:00:07.508000","2026-02-24T00:00:07.509000","2026-02-24T00:00:07.511000","2026-02-24T00:00:07.513000","2026-02-24T00:00:07.514000","2026-02-24T00:00:07.516000","2026-02-24T00:00:07.518000","2026-02-24T00:00:07.519000","2026-02-24T00:00:07.521000","2026-02-24T00:00:07.522000","2026-02-24T00:00:07.524000","2026-02-24T00:00:07.528000","2026-02-24T00:00:07.530000","2026-02-24T00:00:07.531000","2026-02-24T00:00:07.532000","2026-02-24T00:00:07.534000","2026-02-24T00:00:07.536000","2026-02-24T00:00:07.543000","2026-02-24T00:00:07.545000","2026-02-24T00:00:07.546000","2026-02-24T00:00:07.547000","2026-02-24T00:00:07.549000","2026-02-24T00:00:07.550000","2026-02-24T00:00:07.552000","2026-02-24T00:00:07.554000","2026-02-24T00:00:07.555000","2026-02-24T00:00:07.557000","2026-02-24T00:00:07.559000","2026-02-24T00:00:07.560000","2026-02-24T00:00:07.562000","2026-02-24T00:00:07.564000","2026-02-24T00:00:07.565000","2026-02-24T00:00:07.567000","2026-02-24T00:00:07.569000","2026-02-24T00:00:07.571000","2026-02-24T00:00:07.573000","2026-02-24T00:00:07.574000","2026-02-24T00:00:07.576000","2026-02-24T00:00:07.577000","2026-02-24T00:00:07.579000","2026-02-24T00:00:07.581000","2026-02-24T00:00:07.583000","2026-02-24T00:00:07.584000","2026-02-24T00:00:07.586000","2026-02-24T00:00:07.587000","2026-02-24T00:00:07.595000","2026-02-24T00:00:07.597000","2026-02-24T00:00:07.598000","2026-02-24T00:00:07.600000","2026-02-24T00:00:07.601000","2026-02-24T00:00:07.603000","2026-02-24T00:00:07.604000","2026-02-24T00:00:07.606000","2026-02-24T00:00:07.610000","2026-02-24T00:00:07.611000","2026-02-24T00:00:07.612000","2026-02-24T00:00:07.614000","2026-02-24T00:00:07.615000","2026-02-24T00:00:07.616000","2026-02-24T00:00:07.617000","2026-02-24T00:00:07.618000","2026-02-24T00:00:07.619000","2026-02-24T00:00:07.620000","2026-02-24T00:00:07.621000","2026-02-24T00:00:07.623000","2026-02-24T00:00:07.624000","2026-02-24T00:00:07.625000","2026-02-24T00:00:07.626000","2026-02-24T00:00:07.627000","2026-02-24T00:00:07.628000","2026-02-24T00:00:07.630000","2026-02-24T00:00:07.631000","2026-02-24T00:00:07.632000","2026-02-24T00:00:07.633000","2026-02-24T00:00:07.634000","2026-02-24T00:00:07.636000","2026-02-24T00:00:07.637000","2026-02-24T00:00:07.638000","2026-02-24T00:00:07.639000","2026-02-24T00:00:07.640000","2026-02-24T00:00:07.641000","2026-02-24T00:00:07.642000","2026-02-24T00:00:07.644000","2026-02-24T00:00:07.645000","2026-02-24T00:00:07.646000","2026-02-24T00:00:07.647000","2026-02-24T00:00:07.648000","2026-02-24T00:00:07.649000","2026-02-24T00:00:07.651000","2026-02-24T00:00:07.652000","2026-02-24T00:00:07.653000","2026-02-24T00:00:07.654000","2026-02-24T00:00:07.656000","2026-02-24T00:00:07.657000","2026-02-24T00:00:07.658000","2026-02-24T00:00:07.666000","2026-02-24T00:00:07.668000","2026-02-24T00:00:07.470000","2026-02-24T00:00:07.472000","2026-02-24T00:00:07.473000","2026-02-24T00:00:07.475000","2026-02-24T00:00:07.476000","2026-02-24T00:00:07.478000","2026-02-24T00:00:07.480000","2026-02-24T00:00:07.481000","2026-02-24T00:00:07.483000","2026-02-24T00:00:07.485000","2026-02-24T00:00:07.486000","2026-02-24T00:00:07.488000","2026-02-24T00:00:07.490000","2026-02-24T00:00:07.491000","2026-02-24T00:00:07.493000","2026-02-24T00:00:07.495000","2026-02-24T00:00:07.496000","2026-02-24T00:00:07.498000","2026-02-24T00:00:07.499000","2026-02-24T00:00:07.501000","2026-02-24T00:00:07.503000","2026-02-24T00:00:07.504000","2026-02-24T00:00:07.506000","2026-02-24T00:00:07.508000","2026-02-24T00:00:07.509000","2026-02-24T00:00:07.511000","2026-02-24T00:00:07.513000","2026-02-24T00:00:07.514000","2026-02-24T00:00:07.516000","2026-02-24T00:00:07.518000","2026-02-24T00:00:07.519000","2026-02-24T00:00:07.521000","2026-02-24T00:00:07.522000","2026-02-24T00:00:07.524000","2026-02-24T00:00:07.528000","2026-02-24T00:00:07.530000","2026-02-24T00:00:07.531000","2026-02-24T00:00:07.532000","2026-02-24T00:00:07.534000","2026-02-24T00:00:07.536000","2026-02-24T00:00:07.531000","2026-02-24T00:00:07.532000","2026-02-24T00:00:07.534000","2026-02-24T00:00:07.536000","2026-02-24T00:00:07.543000","2026-02-24T00:00:07.545000","2026-02-24T00:00:07.546000","2026-02-24T00:00:07.547000","2026-02-24T00:00:07.549000","2026-02-24T00:00:07.550000","2026-02-24T00:00:07.552000","2026-02-24T00:00:07.554000","2026-02-24T00:00:07.555000","2026-02-24T00:00:07.557000","2026-02-24T00:00:07.559000","2026-02-24T00:00:07.560000","2026-02-24T00:00:07.562000","2026-02-24T00:00:07.564000","2026-02-24T00:00:07.565000","2026-02-24T00:00:07.567000","2026-02-24T00:00:07.569000","2026-02-24T00:00:07.571000","2026-02-24T00:00:07.572000","2026-02-24T00:00:07.545000","2026-02-24T00:00:07.546000","2026-02-24T00:00:07.547000","2026-02-24T00:00:07.549000","2026-02-24T00:00:07.550000","2026-02-24T00:00:07.552000","2026-02-24T00:00:07.554000","2026-02-24T00:00:07.555000","2026-02-24T00:00:07.557000","2026-02-24T00:00:07.559000","2026-02-24T00:00:07.560000","2026-02-24T00:00:07.562000","2026-02-24T00:00:07.564000","2026-02-24T00:00:07.565000","2026-02-24T00:00:07.567000","2026-02-24T00:00:07.569000","2026-02-24T00:00:07.571000","2026-02-24T00:00:07.597000","2026-02-24T00:00:07.598000","2026-02-24T00:00:07.600000","2026-02-24T00:00:07.601000","2026-02-24T00:00:07.603000","2026-02-24T00:00:07.604000","2026-02-24T00:00:07.606000","2026-02-24T00:00:07.610000","2026-02-24T00:00:07.611000","2026-02-24T00:00:07.612000","2026-02-24T00:00:07.613000","2026-02-24T00:00:07.615000","2026-02-24T00:00:07.616000","2026-02-24T00:00:07.617000","2026-02-24T00:00:07.618000","2026-02-24T00:00:07.619000","2026-02-24T00:00:07.620000","2026-02-24T00:00:07.621000","2026-02-24T00:00:07.623000","2026-02-24T00:00:07.624000","2026-02-24T00:00:07.625000","2026-02-24T00:00:07.626000","2026-02-24T00:00:07.627000","2026-02-24T00:00:07.628000","2026-02-24T00:00:07.630000","2026-02-24T00:00:07.631000","2026-02-24T00:00:07.632000","2026-02-24T00:00:07.633000","2026-02-24T00:00:07.634000","2026-02-24T00:00:07.635000","2026-02-24T00:00:07.637000","2026-02-24T00:00:07.638000","2026-02-24T00:00:07.639000","2026-02-24T00:00:07.640000","2026-02-24T00:00:07.641000","2026-02-24T00:00:07.642000","2026-02-24T00:00:07.644000","2026-02-24T00:00:07.645000","2026-02-24T00:00:07.646000","2026-02-24T00:00:07.647000","2026-02-24T00:00:07.648000","2026-02-24T00:00:07.649000","2026-02-24T00:00:07.651000","2026-02-24T00:00:07.652000","2026-02-24T00:00:07.653000","2026-02-24T00:00:07.654000","2026-02-24T00:00:07.656000","2026-02-24T00:00:07.657000","2026-02-24T00:00:07.658000","2026-02-24T00:00:07.666000","2026-02-24T00:00:07.668000","2026-02-24T00:00:07.669000","2026-02-24T00:00:07.670000","2026-02-24T00:00:07.671000","2026-02-24T00:00:07.672000","2026-02-24T00:00:07.673000","2026-02-24T00:00:07.674000","2026-02-24T00:00:07.675000","2026-02-24T00:00:07.677000","2026-02-24T00:00:07.678000","2026-02-24T00:00:07.679000","2026-02-24T00:00:07.680000","2026-02-24T00:00:07.681000","2026-02-24T00:00:07.682000","2026-02-24T00:00:07.684000","2026-02-24T00:00:07.685000","2026-02-24T00:00:07.686000","2026-02-24T00:00:07.687000","2026-02-24T00:00:07.688000","2026-02-24T00:00:07.690000","2026-02-24T00:00:07.691000","2026-02-24T00:00:07.692000","2026-02-24T00:00:07.693000","2026-02-24T00:00:07.693000","2026-02-24T00:00:07.694000","2026-02-24T00:00:07.695000","2026-02-24T00:00:07.696000","2026-02-24T00:00:07.697000","2026-02-24T00:00:07.699000","2026-02-24T00:00:07.700000","2026-02-24T00:00:07.701000","2026-02-24T00:00:07.702000","2026-02-24T00:00:07.703000","2026-02-24T00:00:07.704000","2026-02-24T00:00:07.705000","2026-02-24T00:00:07.706000","2026-02-24T00:00:07.707000","2026-02-24T00:00:07.708000","2026-02-24T00:00:07.709000","2026-02-24T00:00:07.710000","2026-02-24T00:00:07.711000","2026-02-24T00:00:07.712000","2026-02-24T00:00:07.713000","2026-02-24T00:00:07.714000","2026-02-24T00:00:07.715000","2026-02-24T00:00:07.716000","2026-02-24T00:00:07.718000","2026-02-24T00:00:07.719000","2026-02-24T00:00:07.720000","2026-02-24T00:00:07.721000","2026-02-24T00:00:07.722000","2026-02-24T00:00:07.723000","2026-02-24T00:00:07.724000","2026-02-24T00:00:07.725000","2026-02-24T00:00:07.726000","2026-02-24T00:00:07.727000","2026-02-24T00:00:07.728000","2026-02-24T00:00:07.729000","2026-02-24T00:00:07.730000","2026-02-24T00:00:07.731000","2026-02-24T00:00:07.733000","2026-02-24T00:00:07.734000","2026-02-24T00:00:07.735000","2026-02-24T00:00:07.736000","2026-02-24T00:00:07.737000","2026-02-24T00:00:07.738000","2026-02-24T00:00:07.739000","2026-02-24T00:00:07.740000","2026-02-24T00:00:07.741000","2026-02-24T00:00:07.742000","2026-02-24T00:00:07.743000","2026-02-24T00:00:07.744000","2026-02-24T00:00:07.745000","2026-02-24T00:00:07.746000","2026-02-24T00:00:07.747000","2026-02-24T00:00:07.749000","2026-02-24T00:00:07.750000","2026-02-24T00:00:07.751000","2026-02-24T00:00:07.752000","2026-02-24T00:00:07.753000","2026-02-24T00:00:07.754000","2026-02-24T00:00:07.755000","2026-02-24T00:00:07.756000","2026-02-24T00:00:07.757000","2026-02-24T00:00:07.758000","2026-02-24T00:00:07.759000","2026-02-24T00:00:07.760000","2026-02-24T00:00:07.761000","2026-02-24T00:00:07.763000","2026-02-24T00:00:07.764000","2026-02-24T00:00:07.765000","2026-02-24T00:00:07.766000","2026-02-24T00:00:07.767000","2026-02-24T00:00:07.768000","2026-02-24T00:00:07.769000","2026-02-24T00:00:07.770000","2026-02-24T00:00:07.771000","2026-02-24T00:00:07.772000","2026-02-24T00:00:07.773000","2026-02-24T00:00:07.774000","2026-02-24T00:00:07.775000","2026-02-24T00:00:07.777000","2026-02-24T00:00:07.778000","2026-02-24T00:00:07.779000","2026-02-24T00:00:07.780000","2026-02-24T00:00:07.781000","2026-02-24T00:00:07.668000","2026-02-24T00:00:07.669000","2026-02-24T00:00:07.670000","2026-02-24T00:00:07.671000","2026-02-24T00:00:07.672000","2026-02-24T00:00:07.673000","2026-02-24T00:00:07.674000","2026-02-24T00:00:07.675000","2026-02-24T00:00:07.676000","2026-02-24T00:00:07.678000","2026-02-24T00:00:07.679000","2026-02-24T00:00:07.680000","2026-02-24T00:00:07.681000","2026-02-24T00:00:07.682000","2026-02-24T00:00:07.684000","2026-02-24T00:00:07.685000","2026-02-24T00:00:07.686000","2026-02-24T00:00:07.687000","2026-02-24T00:00:07.688000","2026-02-24T00:00:08.073000","2026-02-24T00:00:08.074000","2026-02-24T00:00:08.075000","2026-02-24T00:00:08.077000","2026-02-24T00:00:08.078000","2026-02-24T00:00:08.080000","2026-02-24T00:00:08.081000","2026-02-24T00:00:08.082000","2026-02-24T00:00:08.082000","2026-02-24T00:00:08.083000","2026-02-24T00:00:08.084000","2026-02-24T00:00:08.085000","2026-02-24T00:00:08.087000","2026-02-24T00:00:08.088000","2026-02-24T00:00:08.089000","2026-02-24T00:00:08.090000","2026-02-24T00:00:08.091000","2026-02-24T00:00:08.092000","2026-02-24T00:00:08.093000","2026-02-24T00:00:08.094000","2026-02-24T00:00:08.095000","2026-02-24T00:00:08.096000","2026-02-24T00:00:08.097000","2026-02-24T00:00:08.098000","2026-02-24T00:00:08.175000","2026-02-24T00:00:08.176000","2026-02-24T00:00:08.177000","2026-02-24T00:00:08.178000","2026-02-24T00:00:08.179000","2026-02-24T00:00:08.181000","2026-02-24T00:00:08.182000","2026-02-24T00:00:08.183000","2026-02-24T00:00:08.184000","2026-02-24T00:00:08.185000","2026-02-24T00:00:08.187000","2026-02-24T00:00:08.188000","2026-02-24T00:00:08.189000","2026-02-24T00:00:08.190000","2026-02-24T00:00:08.192000","2026-02-24T00:00:08.193000","2026-02-24T00:00:08.194000","2026-02-24T00:00:08.196000","2026-02-24T00:00:08.197000","2026-02-24T00:00:08.199000","2026-02-24T00:00:08.201000","2026-02-24T00:00:08.202000","2026-02-24T00:00:08.204000","2026-02-24T00:00:08.204000","2026-02-24T00:00:08.206000","2026-02-24T00:00:08.207000","2026-02-24T00:00:08.208000","2026-02-24T00:00:08.209000","2026-02-24T00:00:08.210000","2026-02-24T00:00:08.212000","2026-02-24T00:00:08.213000","2026-02-24T00:00:08.214000","2026-02-24T00:00:08.215000","2026-02-24T00:00:08.217000","2026-02-24T00:00:08.218000","2026-02-24T00:00:08.219000","2026-02-24T00:00:08.220000","2026-02-24T00:00:08.221000","2026-02-24T00:00:08.223000","2026-02-24T00:00:08.224000","2026-02-24T00:00:08.225000","2026-02-24T00:00:08.226000","2026-02-24T00:00:08.228000","2026-02-24T00:00:08.229000","2026-02-24T00:00:08.230000","2026-02-24T00:00:08.231000","2026-02-24T00:00:08.232000","2026-02-24T00:00:08.234000","2026-02-24T00:00:08.235000","2026-02-24T00:00:08.236000","2026-02-24T00:00:08.237000","2026-02-24T00:00:08.239000","2026-02-24T00:00:08.240000","2026-02-24T00:00:08.241000","2026-02-24T00:00:08.243000","2026-02-24T00:00:08.244000","2026-02-24T00:00:08.245000","2026-02-24T00:00:08.246000","2026-02-24T00:00:08.247000","2026-02-24T00:00:08.248000","2026-02-24T00:00:08.250000","2026-02-24T00:00:08.251000","2026-02-24T00:00:08.252000","2026-02-24T00:00:08.253000","2026-02-24T00:00:08.255000","2026-02-24T00:00:08.256000","2026-02-24T00:00:08.257000","2026-02-24T00:00:08.258000","2026-02-24T00:00:08.260000","2026-02-24T00:00:08.261000","2026-02-24T00:00:08.262000","2026-02-24T00:00:08.263000","2026-02-24T00:00:08.264000","2026-02-24T00:00:08.266000","2026-02-24T00:00:08.267000","2026-02-24T00:00:08.268000","2026-02-24T00:00:08.269000","2026-02-24T00:00:08.271000","2026-02-24T00:00:08.272000","2026-02-24T00:00:08.273000","2026-02-24T00:00:08.274000","2026-02-24T00:00:08.275000","2026-02-24T00:00:08.277000","2026-02-24T00:00:08.278000","2026-02-24T00:00:08.279000","2026-02-24T00:00:08.281000","2026-02-24T00:00:08.282000","2026-02-24T00:00:08.284000","2026-02-24T00:00:08.285000","2026-02-24T00:00:08.286000","2026-02-24T00:00:08.288000","2026-02-24T00:00:08.289000","2026-02-24T00:00:08.290000","2026-02-24T00:00:08.291000","2026-02-24T00:00:08.292000","2026-02-24T00:00:08.293000","2026-02-24T00:00:08.295000","2026-02-24T00:00:08.296000","2026-02-24T00:00:08.297000","2026-02-24T00:00:08.298000","2026-02-24T00:00:08.300000","2026-02-24T00:00:08.301000","2026-02-24T00:00:08.302000","2026-02-24T00:00:08.303000","2026-02-24T00:00:08.304000","2026-02-24T00:00:08.306000","2026-02-24T00:00:08.307000","2026-02-24T00:00:08.309000","2026-02-24T00:00:08.310000","2026-02-24T00:00:08.311000","2026-02-24T00:00:08.312000","2026-02-24T00:00:08.314000","2026-02-24T00:00:08.315000","2026-02-24T00:00:08.316000","2026-02-24T00:00:08.317000","2026-02-24T00:00:08.327000","2026-02-24T00:00:08.327000","2026-02-24T00:00:08.328000","2026-02-24T00:00:08.329000","2026-02-24T00:00:08.331000","2026-02-24T00:00:08.332000","2026-02-24T00:00:08.334000","2026-02-24T00:00:08.335000","2026-02-24T00:00:08.336000","2026-02-24T00:00:08.338000","2026-02-24T00:00:08.339000","2026-02-24T00:00:08.340000","2026-02-24T00:00:08.342000","2026-02-24T00:00:08.343000","2026-02-24T00:00:08.344000","2026-02-24T00:00:08.346000","2026-02-24T00:00:08.347000","2026-02-24T00:00:08.349000","2026-02-24T00:00:08.351000","2026-02-24T00:00:08.352000","2026-02-24T00:00:08.353000","2026-02-24T00:00:08.355000","2026-02-24T00:00:08.356000","2026-02-24T00:00:08.357000","2026-02-24T00:00:08.359000","2026-02-24T00:00:08.360000","2026-02-24T00:00:08.361000","2026-02-24T00:00:08.365000","2026-02-24T00:00:08.366000","2026-02-24T00:00:08.367000","2026-02-24T00:00:08.369000","2026-02-24T00:00:08.370000","2026-02-24T00:00:08.378000","2026-02-24T00:00:08.379000","2026-02-24T00:00:08.380000","2026-02-24T00:00:08.381000","2026-02-24T00:00:08.383000","2026-02-24T00:00:08.384000","2026-02-24T00:00:08.385000","2026-02-24T00:00:08.387000","2026-02-24T00:00:08.388000","2026-02-24T00:00:08.389000","2026-02-24T00:00:08.391000","2026-02-24T00:00:08.392000","2026-02-24T00:00:08.394000","2026-02-24T00:00:08.395000","2026-02-24T00:00:08.396000","2026-02-24T00:00:08.398000","2026-02-24T00:00:08.399000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.405000","2026-02-24T00:00:08.406000","2026-02-24T00:00:08.407000","2026-02-24T00:00:08.409000","2026-02-24T00:00:08.410000","2026-02-24T00:00:08.412000","2026-02-24T00:00:08.413000","2026-02-24T00:00:08.415000","2026-02-24T00:00:08.416000","2026-02-24T00:00:08.417000","2026-02-24T00:00:08.419000","2026-02-24T00:00:08.420000","2026-02-24T00:00:08.421000","2026-02-24T00:00:08.423000","2026-02-24T00:00:08.424000","2026-02-24T00:00:08.425000","2026-02-24T00:00:08.427000","2026-02-24T00:00:08.428000","2026-02-24T00:00:08.430000","2026-02-24T00:00:08.431000","2026-02-24T00:00:08.432000","2026-02-24T00:00:08.434000","2026-02-24T00:00:08.435000","2026-02-24T00:00:08.437000","2026-02-24T00:00:08.438000","2026-02-24T00:00:08.439000","2026-02-24T00:00:08.441000","2026-02-24T00:00:08.442000","2026-02-24T00:00:08.443000","2026-02-24T00:00:08.445000","2026-02-24T00:00:08.446000","2026-02-24T00:00:08.447000","2026-02-24T00:00:08.449000","2026-02-24T00:00:08.451000","2026-02-24T00:00:08.452000","2026-02-24T00:00:08.453000","2026-02-24T00:00:08.454000","2026-02-24T00:00:08.456000","2026-02-24T00:00:08.457000","2026-02-24T00:00:08.459000","2026-02-24T00:00:08.460000","2026-02-24T00:00:08.461000","2026-02-24T00:00:08.463000","2026-02-24T00:00:08.464000","2026-02-24T00:00:08.466000","2026-02-24T00:00:08.467000","2026-02-24T00:00:08.468000","2026-02-24T00:00:08.472000","2026-02-24T00:00:08.473000","2026-02-24T00:00:08.474000","2026-02-24T00:00:08.476000","2026-02-24T00:00:08.477000","2026-02-24T00:00:08.478000","2026-02-24T00:00:08.480000","2026-02-24T00:00:08.481000","2026-02-24T00:00:08.483000","2026-02-24T00:00:08.484000","2026-02-24T00:00:08.485000","2026-02-24T00:00:08.487000","2026-02-24T00:00:08.488000","2026-02-24T00:00:08.489000","2026-02-24T00:00:08.491000","2026-02-24T00:00:08.492000","2026-02-24T00:00:08.494000","2026-02-24T00:00:08.495000","2026-02-24T00:00:08.497000","2026-02-24T00:00:08.498000","2026-02-24T00:00:08.499000","2026-02-24T00:00:08.501000","2026-02-24T00:00:08.502000","2026-02-24T00:00:08.503000","2026-02-24T00:00:08.505000","2026-02-24T00:00:08.506000","2026-02-24T00:00:08.508000","2026-02-24T00:00:08.509000","2026-02-24T00:00:08.510000","2026-02-24T00:00:08.512000","2026-02-24T00:00:08.513000","2026-02-24T00:00:08.515000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.519000","2026-02-24T00:00:08.520000","2026-02-24T00:00:08.524000","2026-02-24T00:00:08.526000","2026-02-24T00:00:08.527000","2026-02-24T00:00:08.528000","2026-02-24T00:00:08.529000","2026-02-24T00:00:08.530000","2026-02-24T00:00:08.532000","2026-02-24T00:00:08.533000","2026-02-24T00:00:08.535000","2026-02-24T00:00:08.536000","2026-02-24T00:00:08.327000","2026-02-24T00:00:08.327000","2026-02-24T00:00:08.328000","2026-02-24T00:00:08.330000","2026-02-24T00:00:08.331000","2026-02-24T00:00:08.332000","2026-02-24T00:00:08.334000","2026-02-24T00:00:08.335000","2026-02-24T00:00:08.336000","2026-02-24T00:00:08.338000","2026-02-24T00:00:08.339000","2026-02-24T00:00:08.340000","2026-02-24T00:00:08.342000","2026-02-24T00:00:08.343000","2026-02-24T00:00:08.344000","2026-02-24T00:00:08.346000","2026-02-24T00:00:08.347000","2026-02-24T00:00:08.349000","2026-02-24T00:00:08.351000","2026-02-24T00:00:08.352000","2026-02-24T00:00:08.353000","2026-02-24T00:00:08.355000","2026-02-24T00:00:08.356000","2026-02-24T00:00:08.357000","2026-02-24T00:00:08.359000","2026-02-24T00:00:08.360000","2026-02-24T00:00:08.361000","2026-02-24T00:00:08.365000","2026-02-24T00:00:08.366000","2026-02-24T00:00:08.367000","2026-02-24T00:00:08.369000","2026-02-24T00:00:08.370000","2026-02-24T00:00:08.378000","2026-02-24T00:00:08.379000","2026-02-24T00:00:08.380000","2026-02-24T00:00:08.381000","2026-02-24T00:00:08.383000","2026-02-24T00:00:08.384000","2026-02-24T00:00:08.385000","2026-02-24T00:00:08.387000","2026-02-24T00:00:08.388000","2026-02-24T00:00:08.389000","2026-02-24T00:00:08.391000","2026-02-24T00:00:08.392000","2026-02-24T00:00:08.394000","2026-02-24T00:00:08.395000","2026-02-24T00:00:08.396000","2026-02-24T00:00:08.398000","2026-02-24T00:00:08.399000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.405000","2026-02-24T00:00:08.406000","2026-02-24T00:00:08.407000","2026-02-24T00:00:08.409000","2026-02-24T00:00:08.410000","2026-02-24T00:00:08.412000","2026-02-24T00:00:08.413000","2026-02-24T00:00:08.415000","2026-02-24T00:00:08.416000","2026-02-24T00:00:08.417000","2026-02-24T00:00:08.419000","2026-02-24T00:00:08.420000","2026-02-24T00:00:08.421000","2026-02-24T00:00:08.423000","2026-02-24T00:00:08.424000","2026-02-24T00:00:08.425000","2026-02-24T00:00:08.427000","2026-02-24T00:00:08.428000","2026-02-24T00:00:08.430000","2026-02-24T00:00:08.431000","2026-02-24T00:00:08.432000","2026-02-24T00:00:08.434000","2026-02-24T00:00:08.435000","2026-02-24T00:00:08.437000","2026-02-24T00:00:08.438000","2026-02-24T00:00:08.439000","2026-02-24T00:00:08.441000","2026-02-24T00:00:08.442000","2026-02-24T00:00:08.443000","2026-02-24T00:00:08.445000","2026-02-24T00:00:08.446000","2026-02-24T00:00:08.448000","2026-02-24T00:00:08.449000","2026-02-24T00:00:08.451000","2026-02-24T00:00:08.452000","2026-02-24T00:00:08.453000","2026-02-24T00:00:08.454000","2026-02-24T00:00:08.456000","2026-02-24T00:00:08.457000","2026-02-24T00:00:08.459000","2026-02-24T00:00:08.460000","2026-02-24T00:00:08.461000","2026-02-24T00:00:08.463000","2026-02-24T00:00:08.464000","2026-02-24T00:00:08.466000","2026-02-24T00:00:08.467000","2026-02-24T00:00:08.468000","2026-02-24T00:00:08.472000","2026-02-24T00:00:08.473000","2026-02-24T00:00:08.474000","2026-02-24T00:00:08.476000","2026-02-24T00:00:08.477000","2026-02-24T00:00:08.478000","2026-02-24T00:00:08.480000","2026-02-24T00:00:08.481000","2026-02-24T00:00:08.483000","2026-02-24T00:00:08.484000","2026-02-24T00:00:08.485000","2026-02-24T00:00:08.487000","2026-02-24T00:00:08.488000","2026-02-24T00:00:08.489000","2026-02-24T00:00:08.491000","2026-02-24T00:00:08.492000","2026-02-24T00:00:08.494000","2026-02-24T00:00:08.495000","2026-02-24T00:00:08.497000","2026-02-24T00:00:08.498000","2026-02-24T00:00:08.499000","2026-02-24T00:00:08.501000","2026-02-24T00:00:08.502000","2026-02-24T00:00:08.503000","2026-02-24T00:00:08.505000","2026-02-24T00:00:08.506000","2026-02-24T00:00:08.508000","2026-02-24T00:00:08.509000","2026-02-24T00:00:08.510000","2026-02-24T00:00:08.512000","2026-02-24T00:00:08.513000","2026-02-24T00:00:08.515000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.519000","2026-02-24T00:00:08.520000","2026-02-24T00:00:08.327000","2026-02-24T00:00:08.328000","2026-02-24T00:00:08.330000","2026-02-24T00:00:08.331000","2026-02-24T00:00:08.332000","2026-02-24T00:00:08.334000","2026-02-24T00:00:08.335000","2026-02-24T00:00:08.336000","2026-02-24T00:00:08.338000","2026-02-24T00:00:08.339000","2026-02-24T00:00:08.340000","2026-02-24T00:00:08.358000","2026-02-24T00:00:08.360000","2026-02-24T00:00:08.361000","2026-02-24T00:00:08.363000","2026-02-24T00:00:08.365000","2026-02-24T00:00:08.366000","2026-02-24T00:00:08.367000","2026-02-24T00:00:08.368000","2026-02-24T00:00:08.370000","2026-02-24T00:00:08.378000","2026-02-24T00:00:08.379000","2026-02-24T00:00:08.380000","2026-02-24T00:00:08.381000","2026-02-24T00:00:08.383000","2026-02-24T00:00:08.384000","2026-02-24T00:00:08.385000","2026-02-24T00:00:08.387000","2026-02-24T00:00:08.388000","2026-02-24T00:00:08.389000","2026-02-24T00:00:08.391000","2026-02-24T00:00:08.392000","2026-02-24T00:00:08.394000","2026-02-24T00:00:08.395000","2026-02-24T00:00:08.396000","2026-02-24T00:00:08.398000","2026-02-24T00:00:08.399000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.405000","2026-02-24T00:00:08.406000","2026-02-24T00:00:08.407000","2026-02-24T00:00:08.409000","2026-02-24T00:00:08.410000","2026-02-24T00:00:08.412000","2026-02-24T00:00:08.413000","2026-02-24T00:00:08.415000","2026-02-24T00:00:08.416000","2026-02-24T00:00:08.417000","2026-02-24T00:00:08.419000","2026-02-24T00:00:08.420000","2026-02-24T00:00:08.421000","2026-02-24T00:00:08.422000","2026-02-24T00:00:08.424000","2026-02-24T00:00:08.425000","2026-02-24T00:00:08.427000","2026-02-24T00:00:08.428000","2026-02-24T00:00:08.429000","2026-02-24T00:00:08.431000","2026-02-24T00:00:08.432000","2026-02-24T00:00:08.434000","2026-02-24T00:00:08.435000","2026-02-24T00:00:08.437000","2026-02-24T00:00:08.438000","2026-02-24T00:00:08.439000","2026-02-24T00:00:08.441000","2026-02-24T00:00:08.442000","2026-02-24T00:00:08.443000","2026-02-24T00:00:08.445000","2026-02-24T00:00:08.446000","2026-02-24T00:00:08.447000","2026-02-24T00:00:08.449000","2026-02-24T00:00:08.451000","2026-02-24T00:00:08.452000","2026-02-24T00:00:08.453000","2026-02-24T00:00:08.454000","2026-02-24T00:00:08.456000","2026-02-24T00:00:08.457000","2026-02-24T00:00:08.459000","2026-02-24T00:00:08.460000","2026-02-24T00:00:08.461000","2026-02-24T00:00:08.463000","2026-02-24T00:00:08.464000","2026-02-24T00:00:08.466000","2026-02-24T00:00:08.467000","2026-02-24T00:00:08.468000","2026-02-24T00:00:08.472000","2026-02-24T00:00:08.473000","2026-02-24T00:00:08.474000","2026-02-24T00:00:08.475000","2026-02-24T00:00:08.477000","2026-02-24T00:00:08.478000","2026-02-24T00:00:08.480000","2026-02-24T00:00:08.481000","2026-02-24T00:00:08.482000","2026-02-24T00:00:08.484000","2026-02-24T00:00:08.485000","2026-02-24T00:00:08.487000","2026-02-24T00:00:08.488000","2026-02-24T00:00:08.489000","2026-02-24T00:00:08.491000","2026-02-24T00:00:08.492000","2026-02-24T00:00:08.494000","2026-02-24T00:00:08.495000","2026-02-24T00:00:08.496000","2026-02-24T00:00:08.498000","2026-02-24T00:00:08.499000","2026-02-24T00:00:08.500000","2026-02-24T00:00:08.502000","2026-02-24T00:00:08.503000","2026-02-24T00:00:08.505000","2026-02-24T00:00:08.506000","2026-02-24T00:00:08.507000","2026-02-24T00:00:08.509000","2026-02-24T00:00:08.510000","2026-02-24T00:00:08.512000","2026-02-24T00:00:08.513000","2026-02-24T00:00:08.515000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.519000","2026-02-24T00:00:08.520000","2026-02-24T00:00:08.524000","2026-02-24T00:00:08.526000","2026-02-24T00:00:08.527000","2026-02-24T00:00:08.528000","2026-02-24T00:00:08.529000","2026-02-24T00:00:08.530000","2026-02-24T00:00:08.532000","2026-02-24T00:00:08.533000","2026-02-24T00:00:08.535000","2026-02-24T00:00:08.536000","2026-02-24T00:00:08.543000","2026-02-24T00:00:08.544000","2026-02-24T00:00:08.545000","2026-02-24T00:00:08.546000","2026-02-24T00:00:08.547000","2026-02-24T00:00:08.548000","2026-02-24T00:00:08.549000","2026-02-24T00:00:08.550000","2026-02-24T00:00:08.552000","2026-02-24T00:00:08.553000","2026-02-24T00:00:08.554000","2026-02-24T00:00:08.555000","2026-02-24T00:00:08.556000","2026-02-24T00:00:08.557000","2026-02-24T00:00:08.559000","2026-02-24T00:00:08.560000","2026-02-24T00:00:08.561000","2026-02-24T00:00:08.562000","2026-02-24T00:00:08.563000","2026-02-24T00:00:08.565000","2026-02-24T00:00:08.566000","2026-02-24T00:00:08.567000","2026-02-24T00:00:08.568000","2026-02-24T00:00:08.569000","2026-02-24T00:00:08.570000","2026-02-24T00:00:08.571000","2026-02-24T00:00:08.573000","2026-02-24T00:00:08.574000","2026-02-24T00:00:08.575000","2026-02-24T00:00:08.576000","2026-02-24T00:00:08.577000","2026-02-24T00:00:08.579000","2026-02-24T00:00:08.580000","2026-02-24T00:00:08.581000","2026-02-24T00:00:08.582000","2026-02-24T00:00:08.583000","2026-02-24T00:00:08.584000","2026-02-24T00:00:08.586000","2026-02-24T00:00:08.587000","2026-02-24T00:00:08.588000","2026-02-24T00:00:08.589000","2026-02-24T00:00:08.590000","2026-02-24T00:00:08.591000","2026-02-24T00:00:08.593000","2026-02-24T00:00:08.594000","2026-02-24T00:00:08.595000","2026-02-24T00:00:08.596000","2026-02-24T00:00:08.598000","2026-02-24T00:00:08.599000","2026-02-24T00:00:08.600000","2026-02-24T00:00:08.601000","2026-02-24T00:00:08.602000","2026-02-24T00:00:08.603000","2026-02-24T00:00:08.604000","2026-02-24T00:00:08.605000","2026-02-24T00:00:08.607000","2026-02-24T00:00:08.608000","2026-02-24T00:00:08.609000","2026-02-24T00:00:08.610000","2026-02-24T00:00:08.611000","2026-02-24T00:00:08.612000","2026-02-24T00:00:08.614000","2026-02-24T00:00:08.615000","2026-02-24T00:00:08.616000","2026-02-24T00:00:08.617000","2026-02-24T00:00:08.619000","2026-02-24T00:00:08.620000","2026-02-24T00:00:08.621000","2026-02-24T00:00:08.622000","2026-02-24T00:00:08.623000","2026-02-24T00:00:08.625000","2026-02-24T00:00:08.625000","2026-02-24T00:00:08.627000","2026-02-24T00:00:08.628000","2026-02-24T00:00:08.629000","2026-02-24T00:00:08.630000","2026-02-24T00:00:08.631000","2026-02-24T00:00:08.633000","2026-02-24T00:00:08.634000","2026-02-24T00:00:08.635000","2026-02-24T00:00:08.636000","2026-02-24T00:00:08.637000","2026-02-24T00:00:08.638000","2026-02-24T00:00:08.640000","2026-02-24T00:00:08.641000","2026-02-24T00:00:08.642000","2026-02-24T00:00:08.643000","2026-02-24T00:00:08.644000","2026-02-24T00:00:08.646000","2026-02-24T00:00:08.647000","2026-02-24T00:00:08.648000","2026-02-24T00:00:08.649000","2026-02-24T00:00:08.651000","2026-02-24T00:00:08.652000","2026-02-24T00:00:08.653000","2026-02-24T00:00:08.654000","2026-02-24T00:00:08.655000","2026-02-24T00:00:08.656000","2026-02-24T00:00:08.657000","2026-02-24T00:00:08.659000","2026-02-24T00:00:08.660000","2026-02-24T00:00:08.661000","2026-02-24T00:00:08.662000","2026-02-24T00:00:08.663000","2026-02-24T00:00:08.664000","2026-02-24T00:00:08.666000","2026-02-24T00:00:08.667000","2026-02-24T00:00:08.668000","2026-02-24T00:00:08.669000","2026-02-24T00:00:08.670000","2026-02-24T00:00:08.672000","2026-02-24T00:00:08.673000","2026-02-24T00:00:08.674000","2026-02-24T00:00:08.675000","2026-02-24T00:00:08.676000","2026-02-24T00:00:08.677000","2026-02-24T00:00:08.679000","2026-02-24T00:00:08.680000","2026-02-24T00:00:08.681000","2026-02-24T00:00:08.683000","2026-02-24T00:00:08.684000","2026-02-24T00:00:08.685000","2026-02-24T00:00:08.686000","2026-02-24T00:00:08.687000","2026-02-24T00:00:08.688000","2026-02-24T00:00:08.689000","2026-02-24T00:00:08.690000","2026-02-24T00:00:08.692000","2026-02-24T00:00:08.693000","2026-02-24T00:00:08.694000","2026-02-24T00:00:08.695000","2026-02-24T00:00:08.696000","2026-02-24T00:00:08.698000","2026-02-24T00:00:08.699000","2026-02-24T00:00:08.700000","2026-02-24T00:00:08.701000","2026-02-24T00:00:08.702000","2026-02-24T00:00:08.704000","2026-02-24T00:00:08.705000","2026-02-24T00:00:08.706000","2026-02-24T00:00:08.707000","2026-02-24T00:00:08.708000","2026-02-24T00:00:08.709000","2026-02-24T00:00:08.711000","2026-02-24T00:00:08.712000","2026-02-24T00:00:08.713000","2026-02-24T00:00:08.714000","2026-02-24T00:00:08.716000","2026-02-24T00:00:08.717000","2026-02-24T00:00:08.718000","2026-02-24T00:00:08.719000","2026-02-24T00:00:08.720000","2026-02-24T00:00:08.722000","2026-02-24T00:00:08.723000","2026-02-24T00:00:08.724000","2026-02-24T00:00:08.725000","2026-02-24T00:00:08.727000","2026-02-24T00:00:08.728000","2026-02-24T00:00:08.729000","2026-02-24T00:00:08.730000","2026-02-24T00:00:08.732000","2026-02-24T00:00:08.733000","2026-02-24T00:00:08.734000","2026-02-24T00:00:08.735000","2026-02-24T00:00:08.737000","2026-02-24T00:00:08.738000","2026-02-24T00:00:08.739000","2026-02-24T00:00:08.740000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.745000","2026-02-24T00:00:08.746000","2026-02-24T00:00:08.748000","2026-02-24T00:00:08.749000","2026-02-24T00:00:08.750000","2026-02-24T00:00:08.751000","2026-02-24T00:00:08.752000","2026-02-24T00:00:08.754000","2026-02-24T00:00:08.755000","2026-02-24T00:00:08.756000","2026-02-24T00:00:08.757000","2026-02-24T00:00:08.759000","2026-02-24T00:00:08.760000","2026-02-24T00:00:08.761000","2026-02-24T00:00:08.762000","2026-02-24T00:00:08.763000","2026-02-24T00:00:08.765000","2026-02-24T00:00:08.766000","2026-02-24T00:00:08.767000","2026-02-24T00:00:08.768000","2026-02-24T00:00:08.770000","2026-02-24T00:00:08.771000","2026-02-24T00:00:08.772000","2026-02-24T00:00:08.773000","2026-02-24T00:00:08.775000","2026-02-24T00:00:08.776000","2026-02-24T00:00:08.777000","2026-02-24T00:00:08.778000","2026-02-24T00:00:08.779000","2026-02-24T00:00:08.781000","2026-02-24T00:00:08.782000","2026-02-24T00:00:08.783000","2026-02-24T00:00:08.784000","2026-02-24T00:00:08.786000","2026-02-24T00:00:08.787000","2026-02-24T00:00:08.788000","2026-02-24T00:00:08.789000","2026-02-24T00:00:08.791000","2026-02-24T00:00:08.792000","2026-02-24T00:00:08.793000","2026-02-24T00:00:08.794000","2026-02-24T00:00:08.795000","2026-02-24T00:00:08.797000","2026-02-24T00:00:08.798000","2026-02-24T00:00:08.799000","2026-02-24T00:00:08.801000","2026-02-24T00:00:08.806000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.809000","2026-02-24T00:00:08.810000","2026-02-24T00:00:08.811000","2026-02-24T00:00:08.812000","2026-02-24T00:00:08.814000","2026-02-24T00:00:08.815000","2026-02-24T00:00:08.816000","2026-02-24T00:00:08.817000","2026-02-24T00:00:08.818000","2026-02-24T00:00:08.820000","2026-02-24T00:00:08.821000","2026-02-24T00:00:08.822000","2026-02-24T00:00:08.823000","2026-02-24T00:00:08.825000","2026-02-24T00:00:08.826000","2026-02-24T00:00:08.827000","2026-02-24T00:00:08.828000","2026-02-24T00:00:08.830000","2026-02-24T00:00:08.831000","2026-02-24T00:00:08.832000","2026-02-24T00:00:08.833000","2026-02-24T00:00:08.834000","2026-02-24T00:00:08.836000","2026-02-24T00:00:08.837000","2026-02-24T00:00:08.838000","2026-02-24T00:00:08.839000","2026-02-24T00:00:08.841000","2026-02-24T00:00:08.842000","2026-02-24T00:00:08.843000","2026-02-24T00:00:08.845000","2026-02-24T00:00:08.846000","2026-02-24T00:00:08.847000","2026-02-24T00:00:08.848000","2026-02-24T00:00:08.849000","2026-02-24T00:00:08.851000","2026-02-24T00:00:08.852000","2026-02-24T00:00:08.853000","2026-02-24T00:00:08.854000","2026-02-24T00:00:08.855000","2026-02-24T00:00:08.857000","2026-02-24T00:00:08.858000","2026-02-24T00:00:08.859000","2026-02-24T00:00:08.860000","2026-02-24T00:00:08.861000","2026-02-24T00:00:08.863000","2026-02-24T00:00:08.864000","2026-02-24T00:00:08.865000","2026-02-24T00:00:08.866000","2026-02-24T00:00:08.868000","2026-02-24T00:00:08.869000","2026-02-24T00:00:08.870000","2026-02-24T00:00:08.871000","2026-02-24T00:00:08.873000","2026-02-24T00:00:08.874000","2026-02-24T00:00:08.875000","2026-02-24T00:00:08.876000","2026-02-24T00:00:08.878000","2026-02-24T00:00:08.879000","2026-02-24T00:00:08.880000","2026-02-24T00:00:08.881000","2026-02-24T00:00:08.883000","2026-02-24T00:00:08.884000","2026-02-24T00:00:08.885000","2026-02-24T00:00:08.887000","2026-02-24T00:00:08.888000","2026-02-24T00:00:08.889000","2026-02-24T00:00:08.890000","2026-02-24T00:00:08.892000","2026-02-24T00:00:08.893000","2026-02-24T00:00:08.894000","2026-02-24T00:00:08.896000","2026-02-24T00:00:08.897000","2026-02-24T00:00:08.898000","2026-02-24T00:00:08.899000","2026-02-24T00:00:08.901000","2026-02-24T00:00:08.906000","2026-02-24T00:00:08.907000","2026-02-24T00:00:08.908000","2026-02-24T00:00:08.909000","2026-02-24T00:00:08.910000","2026-02-24T00:00:08.912000","2026-02-24T00:00:08.913000","2026-02-24T00:00:08.914000","2026-02-24T00:00:08.915000","2026-02-24T00:00:08.917000","2026-02-24T00:00:08.918000","2026-02-24T00:00:08.919000","2026-02-24T00:00:08.921000","2026-02-24T00:00:08.922000","2026-02-24T00:00:08.923000","2026-02-24T00:00:08.924000","2026-02-24T00:00:08.926000","2026-02-24T00:00:08.927000","2026-02-24T00:00:08.928000","2026-02-24T00:00:08.930000","2026-02-24T00:00:08.931000","2026-02-24T00:00:08.932000","2026-02-24T00:00:08.933000","2026-02-24T00:00:08.935000","2026-02-24T00:00:08.936000","2026-02-24T00:00:08.937000","2026-02-24T00:00:08.939000","2026-02-24T00:00:08.940000","2026-02-24T00:00:08.941000","2026-02-24T00:00:08.942000","2026-02-24T00:00:08.944000","2026-02-24T00:00:08.945000","2026-02-24T00:00:08.946000","2026-02-24T00:00:08.947000","2026-02-24T00:00:08.949000","2026-02-24T00:00:08.950000","2026-02-24T00:00:08.951000","2026-02-24T00:00:08.953000","2026-02-24T00:00:08.954000","2026-02-24T00:00:08.955000","2026-02-24T00:00:08.956000","2026-02-24T00:00:08.958000","2026-02-24T00:00:08.959000","2026-02-24T00:00:08.960000","2026-02-24T00:00:08.962000","2026-02-24T00:00:08.963000","2026-02-24T00:00:08.965000","2026-02-24T00:00:08.966000","2026-02-24T00:00:08.967000","2026-02-24T00:00:08.968000","2026-02-24T00:00:08.969000","2026-02-24T00:00:08.971000","2026-02-24T00:00:08.972000","2026-02-24T00:00:08.973000","2026-02-24T00:00:08.974000","2026-02-24T00:00:08.976000","2026-02-24T00:00:08.977000","2026-02-24T00:00:08.978000","2026-02-24T00:00:08.980000","2026-02-24T00:00:08.981000","2026-02-24T00:00:08.982000","2026-02-24T00:00:08.983000","2026-02-24T00:00:08.985000","2026-02-24T00:00:08.986000","2026-02-24T00:00:08.987000","2026-02-24T00:00:08.988000","2026-02-24T00:00:08.990000","2026-02-24T00:00:08.991000","2026-02-24T00:00:08.992000","2026-02-24T00:00:08.994000","2026-02-24T00:00:08.995000","2026-02-24T00:00:08.996000","2026-02-24T00:00:08.998000","2026-02-24T00:00:08.999000","2026-02-24T00:00:09.000000","2026-02-24T00:00:09.001000","2026-02-24T00:00:09.003000","2026-02-24T00:00:09.004000","2026-02-24T00:00:09.005000","2026-02-24T00:00:09.006000","2026-02-24T00:00:09.008000","2026-02-24T00:00:09.009000","2026-02-24T00:00:09.010000","2026-02-24T00:00:09.012000","2026-02-24T00:00:09.013000","2026-02-24T00:00:09.014000","2026-02-24T00:00:09.016000","2026-02-24T00:00:09.017000","2026-02-24T00:00:09.018000","2026-02-24T00:00:09.019000","2026-02-24T00:00:09.021000","2026-02-24T00:00:09.022000","2026-02-24T00:00:09.023000","2026-02-24T00:00:09.024000","2026-02-24T00:00:09.026000","2026-02-24T00:00:09.027000","2026-02-24T00:00:09.028000","2026-02-24T00:00:09.030000","2026-02-24T00:00:09.031000","2026-02-24T00:00:09.032000","2026-02-24T00:00:09.033000","2026-02-24T00:00:09.035000","2026-02-24T00:00:09.036000","2026-02-24T00:00:09.037000","2026-02-24T00:00:09.039000","2026-02-24T00:00:09.040000","2026-02-24T00:00:09.041000","2026-02-24T00:00:09.043000","2026-02-24T00:00:09.044000","2026-02-24T00:00:09.045000","2026-02-24T00:00:09.047000","2026-02-24T00:00:09.048000","2026-02-24T00:00:09.049000","2026-02-24T00:00:09.051000","2026-02-24T00:00:09.052000","2026-02-24T00:00:09.053000","2026-02-24T00:00:09.054000","2026-02-24T00:00:09.056000","2026-02-24T00:00:09.057000","2026-02-24T00:00:09.059000","2026-02-24T00:00:09.060000","2026-02-24T00:00:09.061000","2026-02-24T00:00:09.063000","2026-02-24T00:00:09.064000","2026-02-24T00:00:09.065000","2026-02-24T00:00:09.066000","2026-02-24T00:00:09.068000","2026-02-24T00:00:09.069000","2026-02-24T00:00:09.070000","2026-02-24T00:00:09.072000","2026-02-24T00:00:09.073000","2026-02-24T00:00:09.074000","2026-02-24T00:00:09.076000","2026-02-24T00:00:09.077000","2026-02-24T00:00:09.079000","2026-02-24T00:00:09.080000","2026-02-24T00:00:09.081000","2026-02-24T00:00:09.083000","2026-02-24T00:00:09.084000","2026-02-24T00:00:09.085000","2026-02-24T00:00:09.086000","2026-02-24T00:00:09.088000","2026-02-24T00:00:09.089000","2026-02-24T00:00:09.090000","2026-02-24T00:00:09.092000","2026-02-24T00:00:09.099000","2026-02-24T00:00:09.100000","2026-02-24T00:00:09.101000","2026-02-24T00:00:09.102000","2026-02-24T00:00:09.104000","2026-02-24T00:00:09.105000","2026-02-24T00:00:09.106000","2026-02-24T00:00:09.108000","2026-02-24T00:00:09.109000","2026-02-24T00:00:09.110000","2026-02-24T00:00:09.112000","2026-02-24T00:00:09.113000","2026-02-24T00:00:09.114000","2026-02-24T00:00:09.116000","2026-02-24T00:00:09.117000","2026-02-24T00:00:09.118000","2026-02-24T00:00:09.120000","2026-02-24T00:00:09.121000","2026-02-24T00:00:09.122000","2026-02-24T00:00:09.124000","2026-02-24T00:00:09.125000","2026-02-24T00:00:09.126000","2026-02-24T00:00:09.128000","2026-02-24T00:00:09.129000","2026-02-24T00:00:09.130000","2026-02-24T00:00:09.132000","2026-02-24T00:00:09.133000","2026-02-24T00:00:09.134000","2026-02-24T00:00:09.136000","2026-02-24T00:00:09.137000","2026-02-24T00:00:09.138000","2026-02-24T00:00:09.140000","2026-02-24T00:00:09.141000","2026-02-24T00:00:09.142000","2026-02-24T00:00:09.144000","2026-02-24T00:00:09.145000","2026-02-24T00:00:09.146000","2026-02-24T00:00:09.148000","2026-02-24T00:00:09.149000","2026-02-24T00:00:09.150000","2026-02-24T00:00:09.152000","2026-02-24T00:00:09.153000","2026-02-24T00:00:09.154000","2026-02-24T00:00:09.156000","2026-02-24T00:00:09.157000","2026-02-24T00:00:09.158000","2026-02-24T00:00:09.160000","2026-02-24T00:00:09.161000","2026-02-24T00:00:09.162000","2026-02-24T00:00:09.164000","2026-02-24T00:00:09.165000","2026-02-24T00:00:09.166000","2026-02-24T00:00:09.168000","2026-02-24T00:00:09.169000","2026-02-24T00:00:09.170000","2026-02-24T00:00:09.172000","2026-02-24T00:00:09.173000","2026-02-24T00:00:09.174000","2026-02-24T00:00:09.176000","2026-02-24T00:00:09.177000","2026-02-24T00:00:09.178000","2026-02-24T00:00:09.180000","2026-02-24T00:00:09.181000","2026-02-24T00:00:09.182000","2026-02-24T00:00:09.184000","2026-02-24T00:00:09.185000","2026-02-24T00:00:09.186000","2026-02-24T00:00:09.188000","2026-02-24T00:00:09.189000","2026-02-24T00:00:09.190000","2026-02-24T00:00:09.192000","2026-02-24T00:00:09.193000","2026-02-24T00:00:09.194000","2026-02-24T00:00:09.196000","2026-02-24T00:00:09.197000","2026-02-24T00:00:09.198000","2026-02-24T00:00:09.200000","2026-02-24T00:00:09.201000","2026-02-24T00:00:09.202000","2026-02-24T00:00:09.204000","2026-02-24T00:00:09.205000","2026-02-24T00:00:09.206000","2026-02-24T00:00:09.208000","2026-02-24T00:00:09.209000","2026-02-24T00:00:09.210000","2026-02-24T00:00:09.212000","2026-02-24T00:00:09.213000","2026-02-24T00:00:09.215000","2026-02-24T00:00:09.216000","2026-02-24T00:00:09.217000","2026-02-24T00:00:09.219000","2026-02-24T00:00:09.220000","2026-02-24T00:00:09.222000","2026-02-24T00:00:09.223000","2026-02-24T00:00:09.224000","2026-02-24T00:00:09.226000","2026-02-24T00:00:09.227000","2026-02-24T00:00:09.228000","2026-02-24T00:00:09.230000","2026-02-24T00:00:09.231000","2026-02-24T00:00:09.233000","2026-02-24T00:00:09.234000","2026-02-24T00:00:09.235000","2026-02-24T00:00:09.237000","2026-02-24T00:00:09.238000","2026-02-24T00:00:09.239000","2026-02-24T00:00:09.241000","2026-02-24T00:00:09.242000","2026-02-24T00:00:09.244000","2026-02-24T00:00:09.245000","2026-02-24T00:00:09.246000","2026-02-24T00:00:09.248000","2026-02-24T00:00:09.250000","2026-02-24T00:00:09.251000","2026-02-24T00:00:09.252000","2026-02-24T00:00:09.253000","2026-02-24T00:00:09.255000","2026-02-24T00:00:09.256000","2026-02-24T00:00:09.257000","2026-02-24T00:00:09.259000","2026-02-24T00:00:09.260000","2026-02-24T00:00:09.262000","2026-02-24T00:00:09.263000","2026-02-24T00:00:09.264000","2026-02-24T00:00:09.266000","2026-02-24T00:00:09.270000","2026-02-24T00:00:09.272000","2026-02-24T00:00:09.272000","2026-02-24T00:00:09.274000","2026-02-24T00:00:09.275000","2026-02-24T00:00:09.276000","2026-02-24T00:00:09.278000","2026-02-24T00:00:09.279000","2026-02-24T00:00:09.281000","2026-02-24T00:00:09.282000","2026-02-24T00:00:09.286000","2026-02-24T00:00:09.288000","2026-02-24T00:00:09.289000","2026-02-24T00:00:09.290000","2026-02-24T00:00:09.292000","2026-02-24T00:00:09.293000","2026-02-24T00:00:09.294000","2026-02-24T00:00:09.296000","2026-02-24T00:00:09.297000","2026-02-24T00:00:09.298000","2026-02-24T00:00:09.300000","2026-02-24T00:00:09.307000","2026-02-24T00:00:09.308000","2026-02-24T00:00:09.309000","2026-02-24T00:00:09.310000","2026-02-24T00:00:09.312000","2026-02-24T00:00:09.313000","2026-02-24T00:00:09.314000","2026-02-24T00:00:09.316000","2026-02-24T00:00:09.317000","2026-02-24T00:00:09.319000","2026-02-24T00:00:09.320000","2026-02-24T00:00:09.321000","2026-02-24T00:00:09.323000","2026-02-24T00:00:09.324000","2026-02-24T00:00:09.329000","2026-02-24T00:00:09.330000","2026-02-24T00:00:09.331000","2026-02-24T00:00:09.333000","2026-02-24T00:00:09.334000","2026-02-24T00:00:09.335000","2026-02-24T00:00:09.337000","2026-02-24T00:00:09.338000","2026-02-24T00:00:09.339000","2026-02-24T00:00:09.341000","2026-02-24T00:00:09.342000","2026-02-24T00:00:09.344000","2026-02-24T00:00:09.345000","2026-02-24T00:00:09.346000","2026-02-24T00:00:09.348000","2026-02-24T00:00:09.349000","2026-02-24T00:00:09.351000","2026-02-24T00:00:09.352000","2026-02-24T00:00:09.353000","2026-02-24T00:00:09.355000","2026-02-24T00:00:09.356000","2026-02-24T00:00:09.357000","2026-02-24T00:00:09.359000","2026-02-24T00:00:09.360000","2026-02-24T00:00:09.362000","2026-02-24T00:00:09.363000","2026-02-24T00:00:09.365000","2026-02-24T00:00:09.366000","2026-02-24T00:00:09.367000","2026-02-24T00:00:09.369000","2026-02-24T00:00:09.370000","2026-02-24T00:00:09.371000","2026-02-24T00:00:09.373000","2026-02-24T00:00:08.379000","2026-02-24T00:00:08.380000","2026-02-24T00:00:08.381000","2026-02-24T00:00:08.382000","2026-02-24T00:00:08.384000","2026-02-24T00:00:08.385000","2026-02-24T00:00:08.387000","2026-02-24T00:00:08.388000","2026-02-24T00:00:08.389000","2026-02-24T00:00:08.391000","2026-02-24T00:00:08.392000","2026-02-24T00:00:08.393000","2026-02-24T00:00:08.395000","2026-02-24T00:00:08.396000","2026-02-24T00:00:08.398000","2026-02-24T00:00:08.399000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.403000","2026-02-24T00:00:08.405000","2026-02-24T00:00:08.406000","2026-02-24T00:00:08.407000","2026-02-24T00:00:08.409000","2026-02-24T00:00:08.410000","2026-02-24T00:00:08.411000","2026-02-24T00:00:08.413000","2026-02-24T00:00:08.414000","2026-02-24T00:00:08.416000","2026-02-24T00:00:08.417000","2026-02-24T00:00:08.418000","2026-02-24T00:00:08.420000","2026-02-24T00:00:08.421000","2026-02-24T00:00:08.422000","2026-02-24T00:00:08.424000","2026-02-24T00:00:08.425000","2026-02-24T00:00:08.427000","2026-02-24T00:00:08.428000","2026-02-24T00:00:08.429000","2026-02-24T00:00:08.431000","2026-02-24T00:00:08.432000","2026-02-24T00:00:08.434000","2026-02-24T00:00:08.435000","2026-02-24T00:00:08.436000","2026-02-24T00:00:08.438000","2026-02-24T00:00:08.439000","2026-02-24T00:00:08.440000","2026-02-24T00:00:08.442000","2026-02-24T00:00:08.443000","2026-02-24T00:00:08.445000","2026-02-24T00:00:08.446000","2026-02-24T00:00:08.447000","2026-02-24T00:00:08.449000","2026-02-24T00:00:08.450000","2026-02-24T00:00:08.452000","2026-02-24T00:00:08.453000","2026-02-24T00:00:08.454000","2026-02-24T00:00:08.456000","2026-02-24T00:00:08.457000","2026-02-24T00:00:08.459000","2026-02-24T00:00:08.460000","2026-02-24T00:00:08.461000","2026-02-24T00:00:08.463000","2026-02-24T00:00:08.464000","2026-02-24T00:00:08.465000","2026-02-24T00:00:08.467000","2026-02-24T00:00:08.468000","2026-02-24T00:00:08.473000","2026-02-24T00:00:08.474000","2026-02-24T00:00:08.476000","2026-02-24T00:00:08.477000","2026-02-24T00:00:08.478000","2026-02-24T00:00:08.480000","2026-02-24T00:00:08.481000","2026-02-24T00:00:08.483000","2026-02-24T00:00:08.484000","2026-02-24T00:00:08.485000","2026-02-24T00:00:08.487000","2026-02-24T00:00:08.488000","2026-02-24T00:00:08.489000","2026-02-24T00:00:08.491000","2026-02-24T00:00:08.492000","2026-02-24T00:00:08.494000","2026-02-24T00:00:08.495000","2026-02-24T00:00:08.497000","2026-02-24T00:00:08.498000","2026-02-24T00:00:08.499000","2026-02-24T00:00:08.501000","2026-02-24T00:00:08.502000","2026-02-24T00:00:08.503000","2026-02-24T00:00:08.505000","2026-02-24T00:00:08.506000","2026-02-24T00:00:08.508000","2026-02-24T00:00:08.509000","2026-02-24T00:00:08.510000","2026-02-24T00:00:08.512000","2026-02-24T00:00:08.513000","2026-02-24T00:00:08.515000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.517000","2026-02-24T00:00:08.519000","2026-02-24T00:00:08.520000","2026-02-24T00:00:08.524000","2026-02-24T00:00:08.526000","2026-02-24T00:00:08.527000","2026-02-24T00:00:08.528000","2026-02-24T00:00:08.529000","2026-02-24T00:00:08.530000","2026-02-24T00:00:08.532000","2026-02-24T00:00:08.533000","2026-02-24T00:00:08.535000","2026-02-24T00:00:08.536000","2026-02-24T00:00:08.538000","2026-02-24T00:00:08.543000","2026-02-24T00:00:08.544000","2026-02-24T00:00:08.545000","2026-02-24T00:00:08.546000","2026-02-24T00:00:08.547000","2026-02-24T00:00:08.548000","2026-02-24T00:00:08.549000","2026-02-24T00:00:08.551000","2026-02-24T00:00:08.552000","2026-02-24T00:00:08.553000","2026-02-24T00:00:08.554000","2026-02-24T00:00:08.555000","2026-02-24T00:00:08.556000","2026-02-24T00:00:08.557000","2026-02-24T00:00:08.559000","2026-02-24T00:00:08.560000","2026-02-24T00:00:08.561000","2026-02-24T00:00:08.562000","2026-02-24T00:00:08.563000","2026-02-24T00:00:08.565000","2026-02-24T00:00:08.566000","2026-02-24T00:00:08.567000","2026-02-24T00:00:08.568000","2026-02-24T00:00:08.569000","2026-02-24T00:00:08.570000","2026-02-24T00:00:08.571000","2026-02-24T00:00:08.573000","2026-02-24T00:00:08.574000","2026-02-24T00:00:08.575000","2026-02-24T00:00:08.576000","2026-02-24T00:00:08.578000","2026-02-24T00:00:08.579000","2026-02-24T00:00:08.580000","2026-02-24T00:00:08.581000","2026-02-24T00:00:08.582000","2026-02-24T00:00:08.583000","2026-02-24T00:00:08.584000","2026-02-24T00:00:08.586000","2026-02-24T00:00:08.587000","2026-02-24T00:00:08.588000","2026-02-24T00:00:08.589000","2026-02-24T00:00:08.590000","2026-02-24T00:00:08.592000","2026-02-24T00:00:08.593000","2026-02-24T00:00:08.594000","2026-02-24T00:00:08.595000","2026-02-24T00:00:08.596000","2026-02-24T00:00:08.598000","2026-02-24T00:00:08.599000","2026-02-24T00:00:08.600000","2026-02-24T00:00:08.601000","2026-02-24T00:00:08.602000","2026-02-24T00:00:08.603000","2026-02-24T00:00:08.604000","2026-02-24T00:00:08.606000","2026-02-24T00:00:08.607000","2026-02-24T00:00:08.608000","2026-02-24T00:00:08.609000","2026-02-24T00:00:08.610000","2026-02-24T00:00:08.611000","2026-02-24T00:00:08.613000","2026-02-24T00:00:08.614000","2026-02-24T00:00:08.615000","2026-02-24T00:00:08.616000","2026-02-24T00:00:08.617000","2026-02-24T00:00:08.619000","2026-02-24T00:00:08.620000","2026-02-24T00:00:08.621000","2026-02-24T00:00:08.622000","2026-02-24T00:00:08.624000","2026-02-24T00:00:08.625000","2026-02-24T00:00:08.626000","2026-02-24T00:00:08.627000","2026-02-24T00:00:08.628000","2026-02-24T00:00:08.629000","2026-02-24T00:00:08.630000","2026-02-24T00:00:08.631000","2026-02-24T00:00:08.633000","2026-02-24T00:00:08.634000","2026-02-24T00:00:08.635000","2026-02-24T00:00:08.636000","2026-02-24T00:00:08.637000","2026-02-24T00:00:08.638000","2026-02-24T00:00:08.640000","2026-02-24T00:00:08.641000","2026-02-24T00:00:08.642000","2026-02-24T00:00:08.643000","2026-02-24T00:00:08.645000","2026-02-24T00:00:08.646000","2026-02-24T00:00:08.647000","2026-02-24T00:00:08.648000","2026-02-24T00:00:08.650000","2026-02-24T00:00:08.651000","2026-02-24T00:00:08.652000","2026-02-24T00:00:08.653000","2026-02-24T00:00:08.654000","2026-02-24T00:00:08.655000","2026-02-24T00:00:08.656000","2026-02-24T00:00:08.658000","2026-02-24T00:00:08.659000","2026-02-24T00:00:08.660000","2026-02-24T00:00:08.661000","2026-02-24T00:00:08.662000","2026-02-24T00:00:08.663000","2026-02-24T00:00:08.665000","2026-02-24T00:00:08.666000","2026-02-24T00:00:08.667000","2026-02-24T00:00:08.668000","2026-02-24T00:00:08.669000","2026-02-24T00:00:08.670000","2026-02-24T00:00:08.672000","2026-02-24T00:00:08.673000","2026-02-24T00:00:08.674000","2026-02-24T00:00:08.675000","2026-02-24T00:00:08.676000","2026-02-24T00:00:08.678000","2026-02-24T00:00:08.679000","2026-02-24T00:00:08.680000","2026-02-24T00:00:08.681000","2026-02-24T00:00:08.683000","2026-02-24T00:00:08.684000","2026-02-24T00:00:08.685000","2026-02-24T00:00:08.686000","2026-02-24T00:00:08.687000","2026-02-24T00:00:08.688000","2026-02-24T00:00:08.689000","2026-02-24T00:00:08.691000","2026-02-24T00:00:08.692000","2026-02-24T00:00:08.693000","2026-02-24T00:00:08.694000","2026-02-24T00:00:08.695000","2026-02-24T00:00:08.696000","2026-02-24T00:00:08.698000","2026-02-24T00:00:08.699000","2026-02-24T00:00:08.700000","2026-02-24T00:00:08.701000","2026-02-24T00:00:08.702000","2026-02-24T00:00:08.704000","2026-02-24T00:00:08.705000","2026-02-24T00:00:08.706000","2026-02-24T00:00:08.707000","2026-02-24T00:00:08.708000","2026-02-24T00:00:08.710000","2026-02-24T00:00:08.711000","2026-02-24T00:00:08.712000","2026-02-24T00:00:08.713000","2026-02-24T00:00:08.714000","2026-02-24T00:00:08.716000","2026-02-24T00:00:08.717000","2026-02-24T00:00:08.718000","2026-02-24T00:00:08.719000","2026-02-24T00:00:08.721000","2026-02-24T00:00:08.722000","2026-02-24T00:00:08.723000","2026-02-24T00:00:08.724000","2026-02-24T00:00:08.725000","2026-02-24T00:00:08.727000","2026-02-24T00:00:08.728000","2026-02-24T00:00:08.729000","2026-02-24T00:00:08.730000","2026-02-24T00:00:08.732000","2026-02-24T00:00:08.733000","2026-02-24T00:00:08.734000","2026-02-24T00:00:08.735000","2026-02-24T00:00:08.737000","2026-02-24T00:00:08.738000","2026-02-24T00:00:08.739000","2026-02-24T00:00:08.740000","2026-02-24T00:00:08.742000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.745000","2026-02-24T00:00:08.747000","2026-02-24T00:00:08.748000","2026-02-24T00:00:08.749000","2026-02-24T00:00:08.750000","2026-02-24T00:00:08.751000","2026-02-24T00:00:08.752000","2026-02-24T00:00:08.754000","2026-02-24T00:00:08.755000","2026-02-24T00:00:08.756000","2026-02-24T00:00:08.757000","2026-02-24T00:00:08.759000","2026-02-24T00:00:08.760000","2026-02-24T00:00:08.761000","2026-02-24T00:00:08.762000","2026-02-24T00:00:08.764000","2026-02-24T00:00:08.765000","2026-02-24T00:00:08.766000","2026-02-24T00:00:08.767000","2026-02-24T00:00:08.769000","2026-02-24T00:00:08.770000","2026-02-24T00:00:08.771000","2026-02-24T00:00:08.772000","2026-02-24T00:00:08.773000","2026-02-24T00:00:08.775000","2026-02-24T00:00:08.776000","2026-02-24T00:00:08.777000","2026-02-24T00:00:08.778000","2026-02-24T00:00:08.779000","2026-02-24T00:00:08.781000","2026-02-24T00:00:08.782000","2026-02-24T00:00:08.783000","2026-02-24T00:00:08.784000","2026-02-24T00:00:08.786000","2026-02-24T00:00:08.787000","2026-02-24T00:00:08.788000","2026-02-24T00:00:08.789000","2026-02-24T00:00:08.791000","2026-02-24T00:00:08.792000","2026-02-24T00:00:08.793000","2026-02-24T00:00:08.794000","2026-02-24T00:00:08.795000","2026-02-24T00:00:08.797000","2026-02-24T00:00:08.798000","2026-02-24T00:00:08.799000","2026-02-24T00:00:08.801000","2026-02-24T00:00:08.806000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.809000","2026-02-24T00:00:08.810000","2026-02-24T00:00:08.811000","2026-02-24T00:00:08.812000","2026-02-24T00:00:08.814000","2026-02-24T00:00:08.815000","2026-02-24T00:00:08.816000","2026-02-24T00:00:08.817000","2026-02-24T00:00:08.819000","2026-02-24T00:00:08.820000","2026-02-24T00:00:08.821000","2026-02-24T00:00:08.822000","2026-02-24T00:00:08.824000","2026-02-24T00:00:08.825000","2026-02-24T00:00:08.826000","2026-02-24T00:00:08.827000","2026-02-24T00:00:08.828000","2026-02-24T00:00:08.830000","2026-02-24T00:00:08.831000","2026-02-24T00:00:08.832000","2026-02-24T00:00:08.833000","2026-02-24T00:00:08.835000","2026-02-24T00:00:08.836000","2026-02-24T00:00:08.837000","2026-02-24T00:00:08.838000","2026-02-24T00:00:08.839000","2026-02-24T00:00:08.841000","2026-02-24T00:00:08.842000","2026-02-24T00:00:08.843000","2026-02-24T00:00:08.845000","2026-02-24T00:00:08.846000","2026-02-24T00:00:08.847000","2026-02-24T00:00:08.848000","2026-02-24T00:00:08.850000","2026-02-24T00:00:08.851000","2026-02-24T00:00:08.852000","2026-02-24T00:00:08.853000","2026-02-24T00:00:08.854000","2026-02-24T00:00:08.856000","2026-02-24T00:00:08.857000","2026-02-24T00:00:08.858000","2026-02-24T00:00:08.859000","2026-02-24T00:00:08.860000","2026-02-24T00:00:08.862000","2026-02-24T00:00:08.863000","2026-02-24T00:00:08.864000","2026-02-24T00:00:08.865000","2026-02-24T00:00:08.867000","2026-02-24T00:00:08.868000","2026-02-24T00:00:08.869000","2026-02-24T00:00:08.870000","2026-02-24T00:00:08.872000","2026-02-24T00:00:08.873000","2026-02-24T00:00:08.874000","2026-02-24T00:00:08.875000","2026-02-24T00:00:08.876000","2026-02-24T00:00:08.878000","2026-02-24T00:00:08.879000","2026-02-24T00:00:08.880000","2026-02-24T00:00:08.882000","2026-02-24T00:00:08.883000","2026-02-24T00:00:08.884000","2026-02-24T00:00:08.885000","2026-02-24T00:00:08.887000","2026-02-24T00:00:08.888000","2026-02-24T00:00:08.889000","2026-02-24T00:00:08.891000","2026-02-24T00:00:08.892000","2026-02-24T00:00:08.893000","2026-02-24T00:00:08.894000","2026-02-24T00:00:08.896000","2026-02-24T00:00:08.897000","2026-02-24T00:00:08.898000","2026-02-24T00:00:08.899000","2026-02-24T00:00:08.901000","2026-02-24T00:00:08.906000","2026-02-24T00:00:08.907000","2026-02-24T00:00:08.908000","2026-02-24T00:00:08.909000","2026-02-24T00:00:08.910000","2026-02-24T00:00:08.912000","2026-02-24T00:00:08.913000","2026-02-24T00:00:08.914000","2026-02-24T00:00:08.915000","2026-02-24T00:00:08.917000","2026-02-24T00:00:08.918000","2026-02-24T00:00:08.919000","2026-02-24T00:00:08.921000","2026-02-24T00:00:08.922000","2026-02-24T00:00:08.923000","2026-02-24T00:00:08.924000","2026-02-24T00:00:08.926000","2026-02-24T00:00:08.927000","2026-02-24T00:00:08.928000","2026-02-24T00:00:08.930000","2026-02-24T00:00:08.931000","2026-02-24T00:00:08.932000","2026-02-24T00:00:08.933000","2026-02-24T00:00:08.935000","2026-02-24T00:00:08.936000","2026-02-24T00:00:08.937000","2026-02-24T00:00:08.939000","2026-02-24T00:00:08.940000","2026-02-24T00:00:08.941000","2026-02-24T00:00:08.942000","2026-02-24T00:00:08.944000","2026-02-24T00:00:08.945000","2026-02-24T00:00:08.946000","2026-02-24T00:00:08.948000","2026-02-24T00:00:08.949000","2026-02-24T00:00:08.950000","2026-02-24T00:00:08.951000","2026-02-24T00:00:08.953000","2026-02-24T00:00:08.954000","2026-02-24T00:00:08.955000","2026-02-24T00:00:08.957000","2026-02-24T00:00:08.958000","2026-02-24T00:00:08.959000","2026-02-24T00:00:08.961000","2026-02-24T00:00:08.962000","2026-02-24T00:00:08.963000","2026-02-24T00:00:08.965000","2026-02-24T00:00:08.966000","2026-02-24T00:00:08.967000","2026-02-24T00:00:08.968000","2026-02-24T00:00:08.969000","2026-02-24T00:00:08.971000","2026-02-24T00:00:08.972000","2026-02-24T00:00:08.973000","2026-02-24T00:00:08.975000","2026-02-24T00:00:08.976000","2026-02-24T00:00:08.977000","2026-02-24T00:00:08.978000","2026-02-24T00:00:08.980000","2026-02-24T00:00:08.981000","2026-02-24T00:00:08.982000","2026-02-24T00:00:08.983000","2026-02-24T00:00:08.985000","2026-02-24T00:00:08.986000","2026-02-24T00:00:08.987000","2026-02-24T00:00:08.988000","2026-02-24T00:00:08.990000","2026-02-24T00:00:08.991000","2026-02-24T00:00:08.993000","2026-02-24T00:00:08.994000","2026-02-24T00:00:08.995000","2026-02-24T00:00:08.996000","2026-02-24T00:00:08.998000","2026-02-24T00:00:08.999000","2026-02-24T00:00:09.000000","2026-02-24T00:00:09.001000","2026-02-24T00:00:09.003000","2026-02-24T00:00:09.004000","2026-02-24T00:00:09.005000","2026-02-24T00:00:09.006000","2026-02-24T00:00:09.008000","2026-02-24T00:00:09.009000","2026-02-24T00:00:09.010000","2026-02-24T00:00:09.012000","2026-02-24T00:00:09.013000","2026-02-24T00:00:09.014000","2026-02-24T00:00:09.016000","2026-02-24T00:00:09.017000","2026-02-24T00:00:09.018000","2026-02-24T00:00:09.019000","2026-02-24T00:00:09.021000","2026-02-24T00:00:09.022000","2026-02-24T00:00:09.023000","2026-02-24T00:00:09.024000","2026-02-24T00:00:09.026000","2026-02-24T00:00:09.027000","2026-02-24T00:00:09.028000","2026-02-24T00:00:09.030000","2026-02-24T00:00:09.031000","2026-02-24T00:00:09.032000","2026-02-24T00:00:09.034000","2026-02-24T00:00:09.035000","2026-02-24T00:00:09.036000","2026-02-24T00:00:09.037000","2026-02-24T00:00:09.039000","2026-02-24T00:00:09.040000","2026-02-24T00:00:09.041000","2026-02-24T00:00:09.043000","2026-02-24T00:00:09.044000","2026-02-24T00:00:09.045000","2026-02-24T00:00:09.047000","2026-02-24T00:00:09.048000","2026-02-24T00:00:09.049000","2026-02-24T00:00:09.051000","2026-02-24T00:00:09.052000","2026-02-24T00:00:09.053000","2026-02-24T00:00:09.055000","2026-02-24T00:00:09.056000","2026-02-24T00:00:09.057000","2026-02-24T00:00:09.059000","2026-02-24T00:00:09.060000","2026-02-24T00:00:09.061000","2026-02-24T00:00:09.063000","2026-02-24T00:00:09.064000","2026-02-24T00:00:09.065000","2026-02-24T00:00:09.066000","2026-02-24T00:00:09.068000","2026-02-24T00:00:09.069000","2026-02-24T00:00:09.070000","2026-02-24T00:00:09.072000","2026-02-24T00:00:09.073000","2026-02-24T00:00:09.074000","2026-02-24T00:00:09.076000","2026-02-24T00:00:09.077000","2026-02-24T00:00:09.079000","2026-02-24T00:00:09.080000","2026-02-24T00:00:09.081000","2026-02-24T00:00:09.083000","2026-02-24T00:00:09.084000","2026-02-24T00:00:09.085000","2026-02-24T00:00:09.086000","2026-02-24T00:00:09.088000","2026-02-24T00:00:09.089000","2026-02-24T00:00:09.090000","2026-02-24T00:00:09.092000","2026-02-24T00:00:09.099000","2026-02-24T00:00:09.100000","2026-02-24T00:00:09.101000","2026-02-24T00:00:09.102000","2026-02-24T00:00:09.104000","2026-02-24T00:00:09.105000","2026-02-24T00:00:09.107000","2026-02-24T00:00:09.108000","2026-02-24T00:00:09.109000","2026-02-24T00:00:09.110000","2026-02-24T00:00:09.112000","2026-02-24T00:00:09.113000","2026-02-24T00:00:09.114000","2026-02-24T00:00:09.116000","2026-02-24T00:00:09.117000","2026-02-24T00:00:09.118000","2026-02-24T00:00:09.120000","2026-02-24T00:00:09.121000","2026-02-24T00:00:09.122000","2026-02-24T00:00:09.124000","2026-02-24T00:00:09.125000","2026-02-24T00:00:09.126000","2026-02-24T00:00:09.128000","2026-02-24T00:00:09.129000","2026-02-24T00:00:09.130000","2026-02-24T00:00:09.132000","2026-02-24T00:00:09.133000","2026-02-24T00:00:09.134000","2026-02-24T00:00:09.136000","2026-02-24T00:00:09.137000","2026-02-24T00:00:09.138000","2026-02-24T00:00:09.140000","2026-02-24T00:00:09.141000","2026-02-24T00:00:09.142000","2026-02-24T00:00:09.144000","2026-02-24T00:00:09.145000","2026-02-24T00:00:09.146000","2026-02-24T00:00:09.148000","2026-02-24T00:00:09.149000","2026-02-24T00:00:09.151000","2026-02-24T00:00:09.152000","2026-02-24T00:00:09.153000","2026-02-24T00:00:09.154000","2026-02-24T00:00:09.156000","2026-02-24T00:00:09.157000","2026-02-24T00:00:09.158000","2026-02-24T00:00:09.160000","2026-02-24T00:00:09.161000","2026-02-24T00:00:09.162000","2026-02-24T00:00:09.164000","2026-02-24T00:00:09.165000","2026-02-24T00:00:09.166000","2026-02-24T00:00:09.168000","2026-02-24T00:00:09.169000","2026-02-24T00:00:09.170000","2026-02-24T00:00:09.172000","2026-02-24T00:00:09.173000","2026-02-24T00:00:09.174000","2026-02-24T00:00:09.176000","2026-02-24T00:00:09.177000","2026-02-24T00:00:09.178000","2026-02-24T00:00:09.180000","2026-02-24T00:00:09.181000","2026-02-24T00:00:09.182000","2026-02-24T00:00:09.184000","2026-02-24T00:00:09.185000","2026-02-24T00:00:09.186000","2026-02-24T00:00:09.188000","2026-02-24T00:00:09.189000","2026-02-24T00:00:09.190000","2026-02-24T00:00:09.192000","2026-02-24T00:00:09.193000","2026-02-24T00:00:09.194000","2026-02-24T00:00:09.196000","2026-02-24T00:00:09.197000","2026-02-24T00:00:09.198000","2026-02-24T00:00:09.200000","2026-02-24T00:00:09.201000","2026-02-24T00:00:09.202000","2026-02-24T00:00:09.204000","2026-02-24T00:00:09.205000","2026-02-24T00:00:09.206000","2026-02-24T00:00:09.208000","2026-02-24T00:00:09.209000","2026-02-24T00:00:09.211000","2026-02-24T00:00:09.212000","2026-02-24T00:00:09.213000","2026-02-24T00:00:09.215000","2026-02-24T00:00:09.216000","2026-02-24T00:00:09.217000","2026-02-24T00:00:09.219000","2026-02-24T00:00:09.220000","2026-02-24T00:00:09.222000","2026-02-24T00:00:09.223000","2026-02-24T00:00:09.224000","2026-02-24T00:00:09.226000","2026-02-24T00:00:09.227000","2026-02-24T00:00:09.228000","2026-02-24T00:00:09.230000","2026-02-24T00:00:09.231000","2026-02-24T00:00:09.233000","2026-02-24T00:00:09.234000","2026-02-24T00:00:09.235000","2026-02-24T00:00:09.237000","2026-02-24T00:00:09.238000","2026-02-24T00:00:09.240000","2026-02-24T00:00:09.241000","2026-02-24T00:00:09.242000","2026-02-24T00:00:09.244000","2026-02-24T00:00:09.245000","2026-02-24T00:00:09.247000","2026-02-24T00:00:09.248000","2026-02-24T00:00:09.250000","2026-02-24T00:00:09.251000","2026-02-24T00:00:09.252000","2026-02-24T00:00:09.253000","2026-02-24T00:00:09.255000","2026-02-24T00:00:09.256000","2026-02-24T00:00:09.257000","2026-02-24T00:00:09.259000","2026-02-24T00:00:09.260000","2026-02-24T00:00:09.262000","2026-02-24T00:00:09.263000","2026-02-24T00:00:09.264000","2026-02-24T00:00:09.266000","2026-02-24T00:00:09.270000","2026-02-24T00:00:09.272000","2026-02-24T00:00:09.273000","2026-02-24T00:00:09.274000","2026-02-24T00:00:09.275000","2026-02-24T00:00:09.277000","2026-02-24T00:00:09.278000","2026-02-24T00:00:09.279000","2026-02-24T00:00:09.281000","2026-02-24T00:00:09.282000","2026-02-24T00:00:08.526000","2026-02-24T00:00:08.527000","2026-02-24T00:00:08.528000","2026-02-24T00:00:08.529000","2026-02-24T00:00:08.530000","2026-02-24T00:00:08.532000","2026-02-24T00:00:08.533000","2026-02-24T00:00:08.534000","2026-02-24T00:00:08.536000","2026-02-24T00:00:08.543000","2026-02-24T00:00:08.544000","2026-02-24T00:00:08.545000","2026-02-24T00:00:08.546000","2026-02-24T00:00:08.547000","2026-02-24T00:00:08.548000","2026-02-24T00:00:08.549000","2026-02-24T00:00:08.551000","2026-02-24T00:00:08.552000","2026-02-24T00:00:08.553000","2026-02-24T00:00:08.554000","2026-02-24T00:00:08.555000","2026-02-24T00:00:08.556000","2026-02-24T00:00:08.557000","2026-02-24T00:00:08.559000","2026-02-24T00:00:08.560000","2026-02-24T00:00:08.561000","2026-02-24T00:00:08.562000","2026-02-24T00:00:08.563000","2026-02-24T00:00:08.565000","2026-02-24T00:00:08.566000","2026-02-24T00:00:08.567000","2026-02-24T00:00:08.568000","2026-02-24T00:00:08.569000","2026-02-24T00:00:08.570000","2026-02-24T00:00:08.571000","2026-02-24T00:00:08.573000","2026-02-24T00:00:08.574000","2026-02-24T00:00:08.575000","2026-02-24T00:00:08.576000","2026-02-24T00:00:08.578000","2026-02-24T00:00:08.579000","2026-02-24T00:00:08.580000","2026-02-24T00:00:08.581000","2026-02-24T00:00:08.582000","2026-02-24T00:00:08.583000","2026-02-24T00:00:08.584000","2026-02-24T00:00:08.586000","2026-02-24T00:00:08.587000","2026-02-24T00:00:08.588000","2026-02-24T00:00:08.589000","2026-02-24T00:00:08.590000","2026-02-24T00:00:08.592000","2026-02-24T00:00:08.593000","2026-02-24T00:00:08.594000","2026-02-24T00:00:08.595000","2026-02-24T00:00:08.596000","2026-02-24T00:00:08.598000","2026-02-24T00:00:08.599000","2026-02-24T00:00:08.600000","2026-02-24T00:00:08.601000","2026-02-24T00:00:08.602000","2026-02-24T00:00:08.603000","2026-02-24T00:00:08.604000","2026-02-24T00:00:08.606000","2026-02-24T00:00:08.607000","2026-02-24T00:00:08.608000","2026-02-24T00:00:08.609000","2026-02-24T00:00:08.610000","2026-02-24T00:00:08.611000","2026-02-24T00:00:08.613000","2026-02-24T00:00:08.614000","2026-02-24T00:00:08.615000","2026-02-24T00:00:08.616000","2026-02-24T00:00:08.617000","2026-02-24T00:00:08.619000","2026-02-24T00:00:08.620000","2026-02-24T00:00:08.621000","2026-02-24T00:00:08.622000","2026-02-24T00:00:08.624000","2026-02-24T00:00:08.625000","2026-02-24T00:00:08.626000","2026-02-24T00:00:08.627000","2026-02-24T00:00:08.628000","2026-02-24T00:00:08.629000","2026-02-24T00:00:08.630000","2026-02-24T00:00:08.631000","2026-02-24T00:00:08.633000","2026-02-24T00:00:08.634000","2026-02-24T00:00:08.635000","2026-02-24T00:00:08.636000","2026-02-24T00:00:08.637000","2026-02-24T00:00:08.638000","2026-02-24T00:00:08.640000","2026-02-24T00:00:08.641000","2026-02-24T00:00:08.642000","2026-02-24T00:00:08.643000","2026-02-24T00:00:08.645000","2026-02-24T00:00:08.646000","2026-02-24T00:00:08.647000","2026-02-24T00:00:08.648000","2026-02-24T00:00:08.650000","2026-02-24T00:00:08.651000","2026-02-24T00:00:08.652000","2026-02-24T00:00:08.653000","2026-02-24T00:00:08.654000","2026-02-24T00:00:08.655000","2026-02-24T00:00:08.656000","2026-02-24T00:00:08.658000","2026-02-24T00:00:08.659000","2026-02-24T00:00:08.660000","2026-02-24T00:00:08.661000","2026-02-24T00:00:08.662000","2026-02-24T00:00:08.663000","2026-02-24T00:00:08.665000","2026-02-24T00:00:08.666000","2026-02-24T00:00:08.667000","2026-02-24T00:00:08.668000","2026-02-24T00:00:08.669000","2026-02-24T00:00:08.670000","2026-02-24T00:00:08.672000","2026-02-24T00:00:08.673000","2026-02-24T00:00:08.674000","2026-02-24T00:00:08.675000","2026-02-24T00:00:08.676000","2026-02-24T00:00:08.678000","2026-02-24T00:00:08.679000","2026-02-24T00:00:08.680000","2026-02-24T00:00:08.681000","2026-02-24T00:00:08.683000","2026-02-24T00:00:08.684000","2026-02-24T00:00:08.685000","2026-02-24T00:00:08.686000","2026-02-24T00:00:08.687000","2026-02-24T00:00:08.688000","2026-02-24T00:00:08.689000","2026-02-24T00:00:08.691000","2026-02-24T00:00:08.692000","2026-02-24T00:00:08.693000","2026-02-24T00:00:08.694000","2026-02-24T00:00:08.695000","2026-02-24T00:00:08.696000","2026-02-24T00:00:08.698000","2026-02-24T00:00:08.699000","2026-02-24T00:00:08.700000","2026-02-24T00:00:08.701000","2026-02-24T00:00:08.702000","2026-02-24T00:00:08.704000","2026-02-24T00:00:08.705000","2026-02-24T00:00:08.706000","2026-02-24T00:00:08.707000","2026-02-24T00:00:08.708000","2026-02-24T00:00:08.710000","2026-02-24T00:00:08.711000","2026-02-24T00:00:08.712000","2026-02-24T00:00:08.713000","2026-02-24T00:00:08.714000","2026-02-24T00:00:08.716000","2026-02-24T00:00:08.717000","2026-02-24T00:00:08.718000","2026-02-24T00:00:08.719000","2026-02-24T00:00:08.721000","2026-02-24T00:00:08.722000","2026-02-24T00:00:08.723000","2026-02-24T00:00:08.724000","2026-02-24T00:00:08.725000","2026-02-24T00:00:08.727000","2026-02-24T00:00:08.728000","2026-02-24T00:00:08.729000","2026-02-24T00:00:08.730000","2026-02-24T00:00:08.732000","2026-02-24T00:00:08.733000","2026-02-24T00:00:08.734000","2026-02-24T00:00:08.735000","2026-02-24T00:00:08.737000","2026-02-24T00:00:08.738000","2026-02-24T00:00:08.739000","2026-02-24T00:00:08.740000","2026-02-24T00:00:08.742000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.745000","2026-02-24T00:00:08.747000","2026-02-24T00:00:08.748000","2026-02-24T00:00:08.749000","2026-02-24T00:00:08.750000","2026-02-24T00:00:08.751000","2026-02-24T00:00:08.752000","2026-02-24T00:00:08.754000","2026-02-24T00:00:08.755000","2026-02-24T00:00:08.756000","2026-02-24T00:00:08.757000","2026-02-24T00:00:08.759000","2026-02-24T00:00:08.760000","2026-02-24T00:00:08.761000","2026-02-24T00:00:08.762000","2026-02-24T00:00:08.764000","2026-02-24T00:00:08.765000","2026-02-24T00:00:08.766000","2026-02-24T00:00:08.767000","2026-02-24T00:00:08.769000","2026-02-24T00:00:08.770000","2026-02-24T00:00:08.771000","2026-02-24T00:00:08.772000","2026-02-24T00:00:08.773000","2026-02-24T00:00:08.775000","2026-02-24T00:00:08.776000","2026-02-24T00:00:08.777000","2026-02-24T00:00:08.778000","2026-02-24T00:00:08.779000","2026-02-24T00:00:08.781000","2026-02-24T00:00:08.782000","2026-02-24T00:00:08.783000","2026-02-24T00:00:08.784000","2026-02-24T00:00:08.786000","2026-02-24T00:00:08.787000","2026-02-24T00:00:08.788000","2026-02-24T00:00:08.789000","2026-02-24T00:00:08.791000","2026-02-24T00:00:08.792000","2026-02-24T00:00:08.793000","2026-02-24T00:00:08.794000","2026-02-24T00:00:08.795000","2026-02-24T00:00:08.797000","2026-02-24T00:00:08.798000","2026-02-24T00:00:08.799000","2026-02-24T00:00:08.801000","2026-02-24T00:00:08.544000","2026-02-24T00:00:08.545000","2026-02-24T00:00:08.546000","2026-02-24T00:00:08.547000","2026-02-24T00:00:08.548000","2026-02-24T00:00:08.549000","2026-02-24T00:00:08.551000","2026-02-24T00:00:08.552000","2026-02-24T00:00:08.553000","2026-02-24T00:00:08.554000","2026-02-24T00:00:08.555000","2026-02-24T00:00:08.556000","2026-02-24T00:00:08.557000","2026-02-24T00:00:08.559000","2026-02-24T00:00:08.560000","2026-02-24T00:00:08.561000","2026-02-24T00:00:08.562000","2026-02-24T00:00:08.563000","2026-02-24T00:00:08.565000","2026-02-24T00:00:08.566000","2026-02-24T00:00:08.567000","2026-02-24T00:00:08.568000","2026-02-24T00:00:08.569000","2026-02-24T00:00:08.570000","2026-02-24T00:00:08.571000","2026-02-24T00:00:08.573000","2026-02-24T00:00:08.574000","2026-02-24T00:00:08.575000","2026-02-24T00:00:08.576000","2026-02-24T00:00:08.578000","2026-02-24T00:00:08.579000","2026-02-24T00:00:08.580000","2026-02-24T00:00:08.581000","2026-02-24T00:00:08.582000","2026-02-24T00:00:08.583000","2026-02-24T00:00:08.584000","2026-02-24T00:00:08.586000","2026-02-24T00:00:08.587000","2026-02-24T00:00:08.588000","2026-02-24T00:00:08.589000","2026-02-24T00:00:08.590000","2026-02-24T00:00:08.592000","2026-02-24T00:00:08.593000","2026-02-24T00:00:08.594000","2026-02-24T00:00:08.595000","2026-02-24T00:00:08.596000","2026-02-24T00:00:08.598000","2026-02-24T00:00:08.599000","2026-02-24T00:00:08.600000","2026-02-24T00:00:08.601000","2026-02-24T00:00:08.602000","2026-02-24T00:00:08.603000","2026-02-24T00:00:08.604000","2026-02-24T00:00:08.606000","2026-02-24T00:00:08.607000","2026-02-24T00:00:08.608000","2026-02-24T00:00:08.609000","2026-02-24T00:00:08.610000","2026-02-24T00:00:08.611000","2026-02-24T00:00:08.613000","2026-02-24T00:00:08.614000","2026-02-24T00:00:08.615000","2026-02-24T00:00:08.616000","2026-02-24T00:00:08.617000","2026-02-24T00:00:08.619000","2026-02-24T00:00:08.620000","2026-02-24T00:00:08.621000","2026-02-24T00:00:08.622000","2026-02-24T00:00:08.624000","2026-02-24T00:00:08.625000","2026-02-24T00:00:08.626000","2026-02-24T00:00:08.627000","2026-02-24T00:00:08.628000","2026-02-24T00:00:08.629000","2026-02-24T00:00:08.630000","2026-02-24T00:00:08.631000","2026-02-24T00:00:08.633000","2026-02-24T00:00:08.634000","2026-02-24T00:00:08.635000","2026-02-24T00:00:08.636000","2026-02-24T00:00:08.637000","2026-02-24T00:00:08.639000","2026-02-24T00:00:08.640000","2026-02-24T00:00:08.641000","2026-02-24T00:00:08.642000","2026-02-24T00:00:08.643000","2026-02-24T00:00:08.645000","2026-02-24T00:00:08.646000","2026-02-24T00:00:08.647000","2026-02-24T00:00:08.648000","2026-02-24T00:00:08.650000","2026-02-24T00:00:08.651000","2026-02-24T00:00:08.652000","2026-02-24T00:00:08.653000","2026-02-24T00:00:08.654000","2026-02-24T00:00:08.655000","2026-02-24T00:00:08.656000","2026-02-24T00:00:08.658000","2026-02-24T00:00:08.659000","2026-02-24T00:00:08.660000","2026-02-24T00:00:08.661000","2026-02-24T00:00:08.662000","2026-02-24T00:00:08.663000","2026-02-24T00:00:08.665000","2026-02-24T00:00:08.666000","2026-02-24T00:00:08.667000","2026-02-24T00:00:08.668000","2026-02-24T00:00:08.669000","2026-02-24T00:00:08.670000","2026-02-24T00:00:08.672000","2026-02-24T00:00:08.673000","2026-02-24T00:00:08.674000","2026-02-24T00:00:08.675000","2026-02-24T00:00:08.676000","2026-02-24T00:00:08.678000","2026-02-24T00:00:08.679000","2026-02-24T00:00:08.680000","2026-02-24T00:00:08.682000","2026-02-24T00:00:08.683000","2026-02-24T00:00:08.684000","2026-02-24T00:00:08.685000","2026-02-24T00:00:08.686000","2026-02-24T00:00:08.687000","2026-02-24T00:00:08.688000","2026-02-24T00:00:08.689000","2026-02-24T00:00:08.691000","2026-02-24T00:00:08.692000","2026-02-24T00:00:08.693000","2026-02-24T00:00:08.694000","2026-02-24T00:00:08.695000","2026-02-24T00:00:08.696000","2026-02-24T00:00:08.698000","2026-02-24T00:00:08.699000","2026-02-24T00:00:08.700000","2026-02-24T00:00:08.701000","2026-02-24T00:00:08.702000","2026-02-24T00:00:08.704000","2026-02-24T00:00:08.705000","2026-02-24T00:00:08.706000","2026-02-24T00:00:08.707000","2026-02-24T00:00:08.709000","2026-02-24T00:00:08.710000","2026-02-24T00:00:08.711000","2026-02-24T00:00:08.712000","2026-02-24T00:00:08.713000","2026-02-24T00:00:08.714000","2026-02-24T00:00:08.716000","2026-02-24T00:00:08.717000","2026-02-24T00:00:08.718000","2026-02-24T00:00:08.719000","2026-02-24T00:00:08.721000","2026-02-24T00:00:08.722000","2026-02-24T00:00:08.723000","2026-02-24T00:00:08.724000","2026-02-24T00:00:08.725000","2026-02-24T00:00:08.727000","2026-02-24T00:00:08.728000","2026-02-24T00:00:08.729000","2026-02-24T00:00:08.731000","2026-02-24T00:00:08.732000","2026-02-24T00:00:08.733000","2026-02-24T00:00:08.734000","2026-02-24T00:00:08.735000","2026-02-24T00:00:08.737000","2026-02-24T00:00:08.738000","2026-02-24T00:00:08.739000","2026-02-24T00:00:08.740000","2026-02-24T00:00:08.742000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.744000","2026-02-24T00:00:08.745000","2026-02-24T00:00:08.747000","2026-02-24T00:00:08.748000","2026-02-24T00:00:08.749000","2026-02-24T00:00:08.750000","2026-02-24T00:00:08.751000","2026-02-24T00:00:08.752000","2026-02-24T00:00:08.754000","2026-02-24T00:00:08.755000","2026-02-24T00:00:08.756000","2026-02-24T00:00:08.757000","2026-02-24T00:00:08.759000","2026-02-24T00:00:08.760000","2026-02-24T00:00:08.761000","2026-02-24T00:00:08.762000","2026-02-24T00:00:08.764000","2026-02-24T00:00:08.765000","2026-02-24T00:00:08.766000","2026-02-24T00:00:08.767000","2026-02-24T00:00:08.769000","2026-02-24T00:00:08.770000","2026-02-24T00:00:08.771000","2026-02-24T00:00:08.772000","2026-02-24T00:00:08.773000","2026-02-24T00:00:08.775000","2026-02-24T00:00:08.776000","2026-02-24T00:00:08.777000","2026-02-24T00:00:08.778000","2026-02-24T00:00:08.780000","2026-02-24T00:00:08.781000","2026-02-24T00:00:08.782000","2026-02-24T00:00:08.783000","2026-02-24T00:00:08.784000","2026-02-24T00:00:08.786000","2026-02-24T00:00:08.787000","2026-02-24T00:00:08.788000","2026-02-24T00:00:08.789000","2026-02-24T00:00:08.791000","2026-02-24T00:00:08.792000","2026-02-24T00:00:08.793000","2026-02-24T00:00:08.794000","2026-02-24T00:00:08.795000","2026-02-24T00:00:08.797000","2026-02-24T00:00:08.798000","2026-02-24T00:00:08.799000","2026-02-24T00:00:08.801000","2026-02-24T00:00:08.806000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.808000","2026-02-24T00:00:08.809000","2026-02-24T00:00:08.810000","2026-02-24T00:00:08.811000","2026-02-24T00:00:08.812000","2026-02-24T00:00:08.814000","2026-02-24T00:00:08.815000","2026-02-24T00:00:08.816000","2026-02-24T00:00:08.817000","2026-02-24T00:00:08.819000","2026-02-24T00:00:08.820000","2026-02-24T00:00:08.821000","2026-02-24T00:00:08.822000","2026-02-24T00:00:08.824000","2026-02-24T00:00:08.825000","2026-02-24T00:00:08.826000","2026-02-24T00:00:08.827000","2026-02-24T00:00:08.828000","2026-02-24T00:00:08.830000","2026-02-24T00:00:08.831000","2026-02-24T00:00:08.832000","2026-02-24T00:00:08.833000","2026-02-24T00:00:08.835000","2026-02-24T00:00:08.836000","2026-02-24T00:00:08.837000","2026-02-24T00:00:08.838000","2026-02-24T00:00:08.839000","2026-02-24T00:00:08.841000","2026-02-24T00:00:08.842000","2026-02-24T00:00:08.843000","2026-02-24T00:00:08.845000","2026-02-24T00:00:08.846000","2026-02-24T00:00:08.847000","2026-02-24T00:00:08.848000","2026-02-24T00:00:08.850000","2026-02-24T00:00:08.851000","2026-02-24T00:00:08.852000","2026-02-24T00:00:08.853000","2026-02-24T00:00:08.854000","2026-02-24T00:00:08.856000","2026-02-24T00:00:08.857000","2026-02-24T00:00:08.858000","2026-02-24T00:00:08.859000","2026-02-24T00:00:08.860000","2026-02-24T00:00:08.862000","2026-02-24T00:00:08.863000","2026-02-24T00:00:08.864000","2026-02-24T00:00:08.865000","2026-02-24T00:00:08.867000","2026-02-24T00:00:08.868000","2026-02-24T00:00:08.869000","2026-02-24T00:00:08.870000","2026-02-24T00:00:08.872000","2026-02-24T00:00:08.873000","2026-02-24T00:00:08.874000","2026-02-24T00:00:08.875000","2026-02-24T00:00:08.877000","2026-02-24T00:00:08.878000","2026-02-24T00:00:08.879000","2026-02-24T00:00:08.880000","2026-02-24T00:00:08.882000","2026-02-24T00:00:08.883000","2026-02-24T00:00:08.884000","2026-02-24T00:00:08.885000","2026-02-24T00:00:08.887000","2026-02-24T00:00:08.888000","2026-02-24T00:00:08.889000","2026-02-24T00:00:08.891000","2026-02-24T00:00:08.892000","2026-02-24T00:00:08.893000","2026-02-24T00:00:08.894000","2026-02-24T00:00:08.896000","2026-02-24T00:00:08.897000","2026-02-24T00:00:08.898000","2026-02-24T00:00:08.899000","2026-02-24T00:00:08.901000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.807000","2026-02-24T00:00:08.809000","2026-02-24T00:00:08.810000","2026-02-24T00:00:08.811000","2026-02-24T00:00:08.812000","2026-02-24T00:00:08.814000","2026-02-24T00:00:08.815000","2026-02-24T00:00:08.816000","2026-02-24T00:00:08.817000","2026-02-24T00:00:08.818000","2026-02-24T00:00:08.820000","2026-02-24T00:00:08.821000","2026-02-24T00:00:08.822000","2026-02-24T00:00:08.824000","2026-02-24T00:00:08.825000","2026-02-24T00:00:08.826000","2026-02-24T00:00:08.827000","2026-02-24T00:00:08.828000","2026-02-24T00:00:08.830000","2026-02-24T00:00:08.831000","2026-02-24T00:00:08.832000","2026-02-24T00:00:08.833000","2026-02-24T00:00:08.834000","2026-02-24T00:00:08.836000","2026-02-24T00:00:08.837000","2026-02-24T00:00:08.838000","2026-02-24T00:00:08.839000","2026-02-24T00:00:08.841000","2026-02-24T00:00:08.842000","2026-02-24T00:00:08.843000","2026-02-24T00:00:08.845000","2026-02-24T00:00:08.846000","2026-02-24T00:00:08.847000","2026-02-24T00:00:08.848000","2026-02-24T00:00:08.850000","2026-02-24T00:00:08.851000","2026-02-24T00:00:08.852000","2026-02-24T00:00:08.853000","2026-02-24T00:00:08.854000","2026-02-24T00:00:08.856000","2026-02-24T00:00:08.857000","2026-02-24T00:00:08.858000","2026-02-24T00:00:08.859000","2026-02-24T00:00:08.860000","2026-02-24T00:00:08.862000","2026-02-24T00:00:08.863000","2026-02-24T00:00:08.864000","2026-02-24T00:00:08.865000","2026-02-24T00:00:08.867000","2026-02-24T00:00:08.868000","2026-02-24T00:00:08.869000","2026-02-24T00:00:08.870000","2026-02-24T00:00:08.872000","2026-02-24T00:00:08.873000","2026-02-24T00:00:08.874000","2026-02-24T00:00:08.875000","2026-02-24T00:00:08.876000","2026-02-24T00:00:08.878000","2026-02-24T00:00:08.879000","2026-02-24T00:00:08.880000","2026-02-24T00:00:08.882000","2026-02-24T00:00:08.883000","2026-02-24T00:00:08.884000","2026-02-24T00:00:08.885000","2026-02-24T00:00:08.887000","2026-02-24T00:00:08.888000","2026-02-24T00:00:08.889000","2026-02-24T00:00:08.891000","2026-02-24T00:00:08.892000","2026-02-24T00:00:08.893000","2026-02-24T00:00:08.894000","2026-02-24T00:00:08.896000","2026-02-24T00:00:08.897000","2026-02-24T00:00:08.898000","2026-02-24T00:00:08.899000","2026-02-24T00:00:08.901000","2026-02-24T00:00:08.906000","2026-02-24T00:00:08.907000","2026-02-24T00:00:08.908000","2026-02-24T00:00:08.909000","2026-02-24T00:00:08.910000","2026-02-24T00:00:08.912000","2026-02-24T00:00:08.913000","2026-02-24T00:00:08.914000","2026-02-24T00:00:08.915000","2026-02-24T00:00:08.917000","2026-02-24T00:00:08.918000","2026-02-24T00:00:08.919000","2026-02-24T00:00:08.921000","2026-02-24T00:00:08.922000","2026-02-24T00:00:08.923000","2026-02-24T00:00:08.924000","2026-02-24T00:00:08.926000","2026-02-24T00:00:08.927000","2026-02-24T00:00:08.928000","2026-02-24T00:00:08.930000","2026-02-24T00:00:08.931000","2026-02-24T00:00:08.932000","2026-02-24T00:00:08.933000","2026-02-24T00:00:08.935000","2026-02-24T00:00:08.936000","2026-02-24T00:00:08.937000","2026-02-24T00:00:08.939000","2026-02-24T00:00:08.940000","2026-02-24T00:00:08.941000","2026-02-24T00:00:08.942000","2026-02-24T00:00:08.944000","2026-02-24T00:00:08.945000","2026-02-24T00:00:08.946000","2026-02-24T00:00:08.948000","2026-02-24T00:00:08.949000","2026-02-24T00:00:08.950000","2026-02-24T00:00:08.951000","2026-02-24T00:00:08.953000","2026-02-24T00:00:08.954000","2026-02-24T00:00:08.955000","2026-02-24T00:00:08.957000","2026-02-24T00:00:08.958000","2026-02-24T00:00:08.959000","2026-02-24T00:00:08.961000","2026-02-24T00:00:08.962000","2026-02-24T00:00:08.963000","2026-02-24T00:00:08.965000","2026-02-24T00:00:08.966000","2026-02-24T00:00:08.967000","2026-02-24T00:00:08.968000","2026-02-24T00:00:08.969000","2026-02-24T00:00:08.971000","2026-02-24T00:00:08.972000","2026-02-24T00:00:08.973000","2026-02-24T00:00:08.974000","2026-02-24T00:00:08.976000","2026-02-24T00:00:08.977000","2026-02-24T00:00:08.978000","2026-02-24T00:00:08.980000","2026-02-24T00:00:08.981000","2026-02-24T00:00:08.982000","2026-02-24T00:00:08.983000","2026-02-24T00:00:08.985000","2026-02-24T00:00:08.986000","2026-02-24T00:00:08.987000","2026-02-24T00:00:08.988000","2026-02-24T00:00:08.990000","2026-02-24T00:00:08.991000","2026-02-24T00:00:08.993000","2026-02-24T00:00:08.994000","2026-02-24T00:00:08.995000","2026-02-24T00:00:08.996000","2026-02-24T00:00:08.998000","2026-02-24T00:00:08.999000","2026-02-24T00:00:09.000000","2026-02-24T00:00:09.001000","2026-02-24T00:00:09.003000","2026-02-24T00:00:09.004000","2026-02-24T00:00:09.005000","2026-02-24T00:00:09.006000","2026-02-24T00:00:09.008000","2026-02-24T00:00:09.009000","2026-02-24T00:00:09.010000","2026-02-24T00:00:09.012000","2026-02-24T00:00:09.013000","2026-02-24T00:00:09.014000","2026-02-24T00:00:09.016000","2026-02-24T00:00:09.017000","2026-02-24T00:00:09.018000","2026-02-24T00:00:09.019000","2026-02-24T00:00:09.021000","2026-02-24T00:00:09.022000","2026-02-24T00:00:09.023000","2026-02-24T00:00:09.024000","2026-02-24T00:00:09.026000","2026-02-24T00:00:09.027000","2026-02-24T00:00:09.028000","2026-02-24T00:00:09.030000","2026-02-24T00:00:09.031000","2026-02-24T00:00:09.032000","2026-02-24T00:00:09.033000","2026-02-24T00:00:09.035000","2026-02-24T00:00:09.036000","2026-02-24T00:00:09.037000","2026-02-24T00:00:09.039000","2026-02-24T00:00:09.040000","2026-02-24T00:00:09.041000","2026-02-24T00:00:09.043000","2026-02-24T00:00:09.044000","2026-02-24T00:00:09.045000","2026-02-24T00:00:09.047000","2026-02-24T00:00:09.048000","2026-02-24T00:00:09.049000","2026-02-24T00:00:09.051000","2026-02-24T00:00:09.052000","2026-02-24T00:00:09.053000","2026-02-24T00:00:09.055000","2026-02-24T00:00:09.056000","2026-02-24T00:00:09.057000","2026-02-24T00:00:09.059000","2026-02-24T00:00:09.060000","2026-02-24T00:00:09.061000","2026-02-24T00:00:09.063000","2026-02-24T00:00:09.064000","2026-02-24T00:00:09.065000","2026-02-24T00:00:09.066000","2026-02-24T00:00:09.068000","2026-02-24T00:00:09.069000","2026-02-24T00:00:09.070000","2026-02-24T00:00:09.072000","2026-02-24T00:00:09.073000","2026-02-24T00:00:09.074000","2026-02-24T00:00:09.076000","2026-02-24T00:00:09.077000","2026-02-24T00:00:09.079000","2026-02-24T00:00:09.080000","2026-02-24T00:00:09.081000","2026-02-24T00:00:09.083000","2026-02-24T00:00:09.084000","2026-02-24T00:00:09.085000","2026-02-24T00:00:09.086000","2026-02-24T00:00:09.088000","2026-02-24T00:00:09.089000","2026-02-24T00:00:09.090000","2026-02-24T00:00:09.092000","2026-02-24T00:00:09.099000","2026-02-24T00:00:09.100000","2026-02-24T00:00:09.101000","2026-02-24T00:00:09.102000","2026-02-24T00:00:09.104000","2026-02-24T00:00:09.105000","2026-02-24T00:00:09.106000","2026-02-24T00:00:09.108000","2026-02-24T00:00:09.109000","2026-02-24T00:00:09.110000","2026-02-24T00:00:09.112000","2026-02-24T00:00:09.113000","2026-02-24T00:00:09.114000","2026-02-24T00:00:09.116000","2026-02-24T00:00:09.117000","2026-02-24T00:00:09.118000","2026-02-24T00:00:09.120000","2026-02-24T00:00:09.121000","2026-02-24T00:00:09.122000","2026-02-24T00:00:09.124000","2026-02-24T00:00:09.125000","2026-02-24T00:00:09.126000","2026-02-24T00:00:09.128000","2026-02-24T00:00:09.129000","2026-02-24T00:00:09.130000","2026-02-24T00:00:09.132000","2026-02-24T00:00:09.133000","2026-02-24T00:00:09.134000","2026-02-24T00:00:09.136000","2026-02-24T00:00:09.137000","2026-02-24T00:00:09.138000","2026-02-24T00:00:09.140000","2026-02-24T00:00:09.141000","2026-02-24T00:00:09.142000","2026-02-24T00:00:09.144000","2026-02-24T00:00:09.145000","2026-02-24T00:00:09.146000","2026-02-24T00:00:09.148000","2026-02-24T00:00:09.149000","2026-02-24T00:00:09.150000","2026-02-24T00:00:09.152000","2026-02-24T00:00:09.153000","2026-02-24T00:00:09.154000","2026-02-24T00:00:09.156000","2026-02-24T00:00:09.157000","2026-02-24T00:00:09.158000","2026-02-24T00:00:09.160000","2026-02-24T00:00:09.161000","2026-02-24T00:00:09.162000","2026-02-24T00:00:09.164000","2026-02-24T00:00:09.165000","2026-02-24T00:00:09.166000","2026-02-24T00:00:09.168000","2026-02-24T00:00:09.169000","2026-02-24T00:00:09.170000","2026-02-24T00:00:09.172000","2026-02-24T00:00:09.173000","2026-02-24T00:00:09.174000","2026-02-24T00:00:09.176000","2026-02-24T00:00:09.177000","2026-02-24T00:00:09.178000","2026-02-24T00:00:09.180000","2026-02-24T00:00:09.181000","2026-02-24T00:00:09.182000","2026-02-24T00:00:09.184000","2026-02-24T00:00:09.185000","2026-02-24T00:00:09.186000","2026-02-24T00:00:09.188000","2026-02-24T00:00:09.189000","2026-02-24T00:00:09.190000","2026-02-24T00:00:09.192000","2026-02-24T00:00:09.193000","2026-02-24T00:00:09.194000","2026-02-24T00:00:09.196000","2026-02-24T00:00:09.197000","2026-02-24T00:00:09.198000","2026-02-24T00:00:09.200000","2026-02-24T00:00:09.201000","2026-02-24T00:00:09.202000","2026-02-24T00:00:09.204000","2026-02-24T00:00:09.205000","2026-02-24T00:00:09.206000","2026-02-24T00:00:09.208000","2026-02-24T00:00:09.209000","2026-02-24T00:00:09.210000","2026-02-24T00:00:09.212000","2026-02-24T00:00:09.213000","2026-02-24T00:00:09.215000","2026-02-24T00:00:09.216000","2026-02-24T00:00:09.217000","2026-02-24T00:00:09.219000","2026-02-24T00:00:09.220000","2026-02-24T00:00:09.222000","2026-02-24T00:00:09.223000","2026-02-24T00:00:09.224000","2026-02-24T00:00:09.226000","2026-02-24T00:00:09.227000","2026-02-24T00:00:09.228000","2026-02-24T00:00:09.230000","2026-02-24T00:00:09.231000","2026-02-24T00:00:09.233000","2026-02-24T00:00:09.234000","2026-02-24T00:00:09.235000","2026-02-24T00:00:09.237000","2026-02-24T00:00:09.238000","2026-02-24T00:00:09.239000","2026-02-24T00:00:09.241000","2026-02-24T00:00:09.242000","2026-02-24T00:00:09.244000","2026-02-24T00:00:09.245000","2026-02-24T00:00:09.247000","2026-02-24T00:00:09.248000","2026-02-24T00:00:09.250000","2026-02-24T00:00:09.251000","2026-02-24T00:00:09.252000","2026-02-24T00:00:09.253000","2026-02-24T00:00:09.255000","2026-02-24T00:00:09.256000","2026-02-24T00:00:09.257000","2026-02-24T00:00:09.259000","2026-02-24T00:00:09.260000","2026-02-24T00:00:09.262000","2026-02-24T00:00:09.263000","2026-02-24T00:00:09.264000","2026-02-24T00:00:09.266000","2026-02-24T00:00:09.270000","2026-02-24T00:00:09.272000","2026-02-24T00:00:09.273000","2026-02-24T00:00:09.274000","2026-02-24T00:00:09.275000","2026-02-24T00:00:09.277000","2026-02-24T00:00:09.278000","2026-02-24T00:00:09.279000","2026-02-24T00:00:09.281000","2026-02-24T00:00:09.282000","2026-02-24T00:00:09.286000","2026-02-24T00:00:09.288000","2026-02-24T00:00:09.289000","2026-02-24T00:00:09.290000","2026-02-24T00:00:09.292000","2026-02-24T00:00:09.293000","2026-02-24T00:00:09.294000","2026-02-24T00:00:09.296000","2026-02-24T00:00:09.297000","2026-02-24T00:00:09.298000","2026-02-24T00:00:09.300000","2026-02-24T00:00:09.307000","2026-02-24T00:00:09.308000","2026-02-24T00:00:09.309000","2026-02-24T00:00:09.310000","2026-02-24T00:00:09.312000","2026-02-24T00:00:09.313000","2026-02-24T00:00:09.315000","2026-02-24T00:00:09.316000","2026-02-24T00:00:09.317000","2026-02-24T00:00:09.319000","2026-02-24T00:00:09.320000","2026-02-24T00:00:09.321000","2026-02-24T00:00:09.323000","2026-02-24T00:00:09.324000","2026-02-24T00:00:09.329000","2026-02-24T00:00:09.330000","2026-02-24T00:00:09.331000","2026-02-24T00:00:09.333000","2026-02-24T00:00:09.334000","2026-02-24T00:00:09.336000","2026-02-24T00:00:09.337000","2026-02-24T00:00:09.338000","2026-02-24T00:00:09.340000","2026-02-24T00:00:09.341000","2026-02-24T00:00:09.342000","2026-02-24T00:00:09.344000","2026-02-24T00:00:09.345000","2026-02-24T00:00:09.347000","2026-02-24T00:00:09.348000","2026-02-24T00:00:09.349000","2026-02-24T00:00:09.351000","2026-02-24T00:00:09.352000","2026-02-24T00:00:09.353000","2026-02-24T00:00:09.355000","2026-02-24T00:00:09.356000","2026-02-24T00:00:09.358000","2026-02-24T00:00:09.359000","2026-02-24T00:00:09.360000","2026-02-24T00:00:09.362000","2026-02-24T00:00:09.363000","2026-02-24T00:00:09.365000","2026-02-24T00:00:09.366000","2026-02-24T00:00:09.367000","2026-02-24T00:00:09.369000","2026-02-24T00:00:09.370000","2026-02-24T00:00:09.371000","2026-02-24T00:00:09.373000","2026-02-24T00:00:09.375000","2026-02-24T00:00:09.375000","2026-02-24T00:00:09.377000","2026-02-24T00:00:09.378000","2026-02-24T00:00:09.379000","2026-02-24T00:00:09.381000","2026-02-24T00:00:09.382000","2026-02-24T00:00:09.383000","2026-02-24T00:00:09.385000","2026-02-24T00:00:09.386000","2026-02-24T00:00:09.388000","2026-02-24T00:00:09.389000","2026-02-24T00:00:09.391000","2026-02-24T00:00:09.392000","2026-02-24T00:00:09.397000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.399000","2026-02-24T00:00:09.400000","2026-02-24T00:00:09.401000","2026-02-24T00:00:09.402000","2026-02-24T00:00:09.403000","2026-02-24T00:00:09.405000","2026-02-24T00:00:09.406000","2026-02-24T00:00:09.407000","2026-02-24T00:00:09.408000","2026-02-24T00:00:09.410000","2026-02-24T00:00:09.411000","2026-02-24T00:00:09.412000","2026-02-24T00:00:09.413000","2026-02-24T00:00:09.415000","2026-02-24T00:00:09.416000","2026-02-24T00:00:09.417000","2026-02-24T00:00:09.418000","2026-02-24T00:00:09.419000","2026-02-24T00:00:09.421000","2026-02-24T00:00:09.422000","2026-02-24T00:00:09.423000","2026-02-24T00:00:09.424000","2026-02-24T00:00:09.426000","2026-02-24T00:00:09.427000","2026-02-24T00:00:09.428000","2026-02-24T00:00:09.429000","2026-02-24T00:00:09.431000","2026-02-24T00:00:09.432000","2026-02-24T00:00:09.433000","2026-02-24T00:00:09.434000","2026-02-24T00:00:09.435000","2026-02-24T00:00:09.437000","2026-02-24T00:00:09.438000","2026-02-24T00:00:09.439000","2026-02-24T00:00:09.440000","2026-02-24T00:00:09.442000","2026-02-24T00:00:09.443000","2026-02-24T00:00:09.444000","2026-02-24T00:00:09.445000","2026-02-24T00:00:09.447000","2026-02-24T00:00:09.448000","2026-02-24T00:00:09.449000","2026-02-24T00:00:09.450000","2026-02-24T00:00:09.452000","2026-02-24T00:00:09.453000","2026-02-24T00:00:09.455000","2026-02-24T00:00:09.456000","2026-02-24T00:00:09.458000","2026-02-24T00:00:09.459000","2026-02-24T00:00:09.460000","2026-02-24T00:00:09.461000","2026-02-24T00:00:09.462000","2026-02-24T00:00:09.464000","2026-02-24T00:00:09.465000","2026-02-24T00:00:09.466000","2026-02-24T00:00:09.467000","2026-02-24T00:00:09.469000","2026-02-24T00:00:09.470000","2026-02-24T00:00:09.471000","2026-02-24T00:00:09.473000","2026-02-24T00:00:09.474000","2026-02-24T00:00:09.475000","2026-02-24T00:00:08.907000","2026-02-24T00:00:08.908000","2026-02-24T00:00:08.909000","2026-02-24T00:00:08.910000","2026-02-24T00:00:08.912000","2026-02-24T00:00:08.913000","2026-02-24T00:00:08.914000","2026-02-24T00:00:08.915000","2026-02-24T00:00:08.917000","2026-02-24T00:00:08.918000","2026-02-24T00:00:08.919000","2026-02-24T00:00:08.921000","2026-02-24T00:00:08.922000","2026-02-24T00:00:08.923000","2026-02-24T00:00:08.924000","2026-02-24T00:00:08.926000","2026-02-24T00:00:08.927000","2026-02-24T00:00:08.928000","2026-02-24T00:00:08.929000","2026-02-24T00:00:08.931000","2026-02-24T00:00:08.932000","2026-02-24T00:00:08.933000","2026-02-24T00:00:08.935000","2026-02-24T00:00:08.936000","2026-02-24T00:00:08.937000","2026-02-24T00:00:08.938000","2026-02-24T00:00:08.940000","2026-02-24T00:00:08.941000","2026-02-24T00:00:08.942000","2026-02-24T00:00:08.944000","2026-02-24T00:00:08.945000","2026-02-24T00:00:08.946000","2026-02-24T00:00:08.947000","2026-02-24T00:00:08.949000","2026-02-24T00:00:08.950000","2026-02-24T00:00:08.951000","2026-02-24T00:00:08.952000","2026-02-24T00:00:08.954000","2026-02-24T00:00:08.955000","2026-02-24T00:00:08.956000","2026-02-24T00:00:08.958000","2026-02-24T00:00:08.959000","2026-02-24T00:00:08.960000","2026-02-24T00:00:08.962000","2026-02-24T00:00:08.963000","2026-02-24T00:00:08.965000","2026-02-24T00:00:08.965000","2026-02-24T00:00:08.967000","2026-02-24T00:00:08.968000","2026-02-24T00:00:08.969000","2026-02-24T00:00:08.970000","2026-02-24T00:00:08.972000","2026-02-24T00:00:08.973000","2026-02-24T00:00:08.974000","2026-02-24T00:00:08.976000","2026-02-24T00:00:08.977000","2026-02-24T00:00:08.978000","2026-02-24T00:00:08.980000","2026-02-24T00:00:08.981000","2026-02-24T00:00:08.982000","2026-02-24T00:00:08.983000","2026-02-24T00:00:08.985000","2026-02-24T00:00:08.986000","2026-02-24T00:00:08.987000","2026-02-24T00:00:08.988000","2026-02-24T00:00:08.990000","2026-02-24T00:00:08.991000","2026-02-24T00:00:08.992000","2026-02-24T00:00:08.994000","2026-02-24T00:00:08.995000","2026-02-24T00:00:08.996000","2026-02-24T00:00:08.997000","2026-02-24T00:00:08.999000","2026-02-24T00:00:09.000000","2026-02-24T00:00:09.001000","2026-02-24T00:00:09.003000","2026-02-24T00:00:09.004000","2026-02-24T00:00:09.005000","2026-02-24T00:00:09.006000","2026-02-24T00:00:09.008000","2026-02-24T00:00:09.009000","2026-02-24T00:00:09.010000","2026-02-24T00:00:09.012000","2026-02-24T00:00:09.013000","2026-02-24T00:00:09.014000","2026-02-24T00:00:09.015000","2026-02-24T00:00:09.017000","2026-02-24T00:00:09.018000","2026-02-24T00:00:09.019000","2026-02-24T00:00:09.020000","2026-02-24T00:00:09.022000","2026-02-24T00:00:09.023000","2026-02-24T00:00:09.024000","2026-02-24T00:00:09.026000","2026-02-24T00:00:09.027000","2026-02-24T00:00:09.028000","2026-02-24T00:00:09.030000","2026-02-24T00:00:09.031000","2026-02-24T00:00:09.032000","2026-02-24T00:00:09.033000","2026-02-24T00:00:09.035000","2026-02-24T00:00:09.036000","2026-02-24T00:00:09.037000","2026-02-24T00:00:09.038000","2026-02-24T00:00:09.040000","2026-02-24T00:00:09.041000","2026-02-24T00:00:09.043000","2026-02-24T00:00:09.044000","2026-02-24T00:00:09.045000","2026-02-24T00:00:09.047000","2026-02-24T00:00:09.048000","2026-02-24T00:00:09.049000","2026-02-24T00:00:09.050000","2026-02-24T00:00:09.052000","2026-02-24T00:00:09.053000","2026-02-24T00:00:09.054000","2026-02-24T00:00:09.056000","2026-02-24T00:00:09.057000","2026-02-24T00:00:09.058000","2026-02-24T00:00:09.060000","2026-02-24T00:00:09.061000","2026-02-24T00:00:09.062000","2026-02-24T00:00:09.064000","2026-02-24T00:00:09.065000","2026-02-24T00:00:09.066000","2026-02-24T00:00:09.068000","2026-02-24T00:00:09.069000","2026-02-24T00:00:09.070000","2026-02-24T00:00:09.072000","2026-02-24T00:00:09.073000","2026-02-24T00:00:09.074000","2026-02-24T00:00:09.076000","2026-02-24T00:00:09.077000","2026-02-24T00:00:09.078000","2026-02-24T00:00:09.080000","2026-02-24T00:00:09.081000","2026-02-24T00:00:09.083000","2026-02-24T00:00:09.084000","2026-02-24T00:00:09.085000","2026-02-24T00:00:09.086000","2026-02-24T00:00:09.088000","2026-02-24T00:00:09.089000","2026-02-24T00:00:09.090000","2026-02-24T00:00:09.092000","2026-02-24T00:00:09.100000","2026-02-24T00:00:09.101000","2026-02-24T00:00:09.102000","2026-02-24T00:00:09.104000","2026-02-24T00:00:09.105000","2026-02-24T00:00:09.106000","2026-02-24T00:00:09.108000","2026-02-24T00:00:09.109000","2026-02-24T00:00:09.110000","2026-02-24T00:00:09.112000","2026-02-24T00:00:09.113000","2026-02-24T00:00:09.114000","2026-02-24T00:00:09.116000","2026-02-24T00:00:09.117000","2026-02-24T00:00:09.118000","2026-02-24T00:00:09.120000","2026-02-24T00:00:09.121000","2026-02-24T00:00:09.122000","2026-02-24T00:00:09.124000","2026-02-24T00:00:09.125000","2026-02-24T00:00:09.126000","2026-02-24T00:00:09.128000","2026-02-24T00:00:09.129000","2026-02-24T00:00:09.130000","2026-02-24T00:00:09.132000","2026-02-24T00:00:09.133000","2026-02-24T00:00:09.134000","2026-02-24T00:00:09.136000","2026-02-24T00:00:09.137000","2026-02-24T00:00:09.138000","2026-02-24T00:00:09.140000","2026-02-24T00:00:09.141000","2026-02-24T00:00:09.142000","2026-02-24T00:00:09.144000","2026-02-24T00:00:09.145000","2026-02-24T00:00:09.146000","2026-02-24T00:00:09.148000","2026-02-24T00:00:09.149000","2026-02-24T00:00:09.150000","2026-02-24T00:00:09.152000","2026-02-24T00:00:09.153000","2026-02-24T00:00:09.154000","2026-02-24T00:00:09.156000","2026-02-24T00:00:09.157000","2026-02-24T00:00:09.158000","2026-02-24T00:00:09.160000","2026-02-24T00:00:09.161000","2026-02-24T00:00:09.162000","2026-02-24T00:00:09.164000","2026-02-24T00:00:09.165000","2026-02-24T00:00:09.166000","2026-02-24T00:00:09.168000","2026-02-24T00:00:09.169000","2026-02-24T00:00:09.170000","2026-02-24T00:00:09.172000","2026-02-24T00:00:09.173000","2026-02-24T00:00:09.174000","2026-02-24T00:00:09.176000","2026-02-24T00:00:09.177000","2026-02-24T00:00:09.178000","2026-02-24T00:00:09.180000","2026-02-24T00:00:09.181000","2026-02-24T00:00:09.182000","2026-02-24T00:00:09.184000","2026-02-24T00:00:09.185000","2026-02-24T00:00:09.186000","2026-02-24T00:00:09.188000","2026-02-24T00:00:09.189000","2026-02-24T00:00:09.190000","2026-02-24T00:00:09.192000","2026-02-24T00:00:09.193000","2026-02-24T00:00:09.194000","2026-02-24T00:00:09.196000","2026-02-24T00:00:09.197000","2026-02-24T00:00:09.198000","2026-02-24T00:00:09.200000","2026-02-24T00:00:09.201000","2026-02-24T00:00:09.202000","2026-02-24T00:00:09.204000","2026-02-24T00:00:09.205000","2026-02-24T00:00:09.206000","2026-02-24T00:00:09.208000","2026-02-24T00:00:09.209000","2026-02-24T00:00:09.210000","2026-02-24T00:00:09.212000","2026-02-24T00:00:09.213000","2026-02-24T00:00:09.215000","2026-02-24T00:00:09.216000","2026-02-24T00:00:09.217000","2026-02-24T00:00:09.219000","2026-02-24T00:00:09.220000","2026-02-24T00:00:09.221000","2026-02-24T00:00:09.223000","2026-02-24T00:00:09.224000","2026-02-24T00:00:09.226000","2026-02-24T00:00:09.227000","2026-02-24T00:00:09.228000","2026-02-24T00:00:09.230000","2026-02-24T00:00:09.231000","2026-02-24T00:00:09.233000","2026-02-24T00:00:09.234000","2026-02-24T00:00:09.235000","2026-02-24T00:00:09.237000","2026-02-24T00:00:09.238000","2026-02-24T00:00:09.239000","2026-02-24T00:00:09.241000","2026-02-24T00:00:09.242000","2026-02-24T00:00:09.244000","2026-02-24T00:00:09.245000","2026-02-24T00:00:09.246000","2026-02-24T00:00:09.248000","2026-02-24T00:00:09.250000","2026-02-24T00:00:09.251000","2026-02-24T00:00:09.252000","2026-02-24T00:00:09.253000","2026-02-24T00:00:09.255000","2026-02-24T00:00:09.256000","2026-02-24T00:00:09.257000","2026-02-24T00:00:09.259000","2026-02-24T00:00:09.260000","2026-02-24T00:00:09.261000","2026-02-24T00:00:09.263000","2026-02-24T00:00:09.264000","2026-02-24T00:00:09.266000","2026-02-24T00:00:09.272000","2026-02-24T00:00:09.273000","2026-02-24T00:00:09.274000","2026-02-24T00:00:09.275000","2026-02-24T00:00:09.277000","2026-02-24T00:00:09.278000","2026-02-24T00:00:09.279000","2026-02-24T00:00:09.281000","2026-02-24T00:00:09.282000","2026-02-24T00:00:09.286000","2026-02-24T00:00:09.288000","2026-02-24T00:00:09.289000","2026-02-24T00:00:09.290000","2026-02-24T00:00:09.292000","2026-02-24T00:00:09.293000","2026-02-24T00:00:09.294000","2026-02-24T00:00:09.296000","2026-02-24T00:00:09.297000","2026-02-24T00:00:09.298000","2026-02-24T00:00:09.300000","2026-02-24T00:00:09.288000","2026-02-24T00:00:09.289000","2026-02-24T00:00:09.290000","2026-02-24T00:00:09.292000","2026-02-24T00:00:09.293000","2026-02-24T00:00:09.294000","2026-02-24T00:00:09.296000","2026-02-24T00:00:09.297000","2026-02-24T00:00:09.298000","2026-02-24T00:00:09.300000","2026-02-24T00:00:09.307000","2026-02-24T00:00:09.308000","2026-02-24T00:00:09.309000","2026-02-24T00:00:09.310000","2026-02-24T00:00:09.312000","2026-02-24T00:00:09.313000","2026-02-24T00:00:09.314000","2026-02-24T00:00:09.316000","2026-02-24T00:00:09.317000","2026-02-24T00:00:09.319000","2026-02-24T00:00:09.320000","2026-02-24T00:00:09.321000","2026-02-24T00:00:09.323000","2026-02-24T00:00:09.324000","2026-02-24T00:00:09.308000","2026-02-24T00:00:09.309000","2026-02-24T00:00:09.310000","2026-02-24T00:00:09.312000","2026-02-24T00:00:09.313000","2026-02-24T00:00:09.314000","2026-02-24T00:00:09.316000","2026-02-24T00:00:09.317000","2026-02-24T00:00:09.319000","2026-02-24T00:00:09.320000","2026-02-24T00:00:09.321000","2026-02-24T00:00:09.323000","2026-02-24T00:00:09.324000","2026-02-24T00:00:09.329000","2026-02-24T00:00:09.330000","2026-02-24T00:00:09.331000","2026-02-24T00:00:09.333000","2026-02-24T00:00:09.334000","2026-02-24T00:00:09.336000","2026-02-24T00:00:09.337000","2026-02-24T00:00:09.338000","2026-02-24T00:00:09.340000","2026-02-24T00:00:09.341000","2026-02-24T00:00:09.342000","2026-02-24T00:00:09.344000","2026-02-24T00:00:09.345000","2026-02-24T00:00:09.347000","2026-02-24T00:00:09.348000","2026-02-24T00:00:09.349000","2026-02-24T00:00:09.351000","2026-02-24T00:00:09.352000","2026-02-24T00:00:09.353000","2026-02-24T00:00:09.355000","2026-02-24T00:00:09.356000","2026-02-24T00:00:09.358000","2026-02-24T00:00:09.359000","2026-02-24T00:00:09.360000","2026-02-24T00:00:09.362000","2026-02-24T00:00:09.363000","2026-02-24T00:00:09.365000","2026-02-24T00:00:09.366000","2026-02-24T00:00:09.367000","2026-02-24T00:00:09.369000","2026-02-24T00:00:09.370000","2026-02-24T00:00:09.371000","2026-02-24T00:00:09.373000","2026-02-24T00:00:09.375000","2026-02-24T00:00:09.375000","2026-02-24T00:00:09.377000","2026-02-24T00:00:09.378000","2026-02-24T00:00:09.379000","2026-02-24T00:00:09.381000","2026-02-24T00:00:09.382000","2026-02-24T00:00:09.383000","2026-02-24T00:00:09.385000","2026-02-24T00:00:09.386000","2026-02-24T00:00:09.388000","2026-02-24T00:00:09.389000","2026-02-24T00:00:09.391000","2026-02-24T00:00:09.392000","2026-02-24T00:00:09.397000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.400000","2026-02-24T00:00:09.401000","2026-02-24T00:00:09.402000","2026-02-24T00:00:09.403000","2026-02-24T00:00:09.405000","2026-02-24T00:00:09.406000","2026-02-24T00:00:09.407000","2026-02-24T00:00:09.408000","2026-02-24T00:00:09.409000","2026-02-24T00:00:09.411000","2026-02-24T00:00:09.412000","2026-02-24T00:00:09.413000","2026-02-24T00:00:09.415000","2026-02-24T00:00:09.416000","2026-02-24T00:00:09.417000","2026-02-24T00:00:09.418000","2026-02-24T00:00:09.419000","2026-02-24T00:00:09.421000","2026-02-24T00:00:09.422000","2026-02-24T00:00:09.423000","2026-02-24T00:00:09.424000","2026-02-24T00:00:09.426000","2026-02-24T00:00:09.427000","2026-02-24T00:00:09.428000","2026-02-24T00:00:09.429000","2026-02-24T00:00:09.431000","2026-02-24T00:00:09.432000","2026-02-24T00:00:09.433000","2026-02-24T00:00:09.434000","2026-02-24T00:00:09.435000","2026-02-24T00:00:09.437000","2026-02-24T00:00:09.438000","2026-02-24T00:00:09.439000","2026-02-24T00:00:09.440000","2026-02-24T00:00:09.441000","2026-02-24T00:00:09.443000","2026-02-24T00:00:09.444000","2026-02-24T00:00:09.445000","2026-02-24T00:00:09.446000","2026-02-24T00:00:09.448000","2026-02-24T00:00:09.449000","2026-02-24T00:00:09.450000","2026-02-24T00:00:09.452000","2026-02-24T00:00:09.453000","2026-02-24T00:00:09.455000","2026-02-24T00:00:09.456000","2026-02-24T00:00:09.458000","2026-02-24T00:00:09.459000","2026-02-24T00:00:09.460000","2026-02-24T00:00:09.461000","2026-02-24T00:00:09.462000","2026-02-24T00:00:09.463000","2026-02-24T00:00:09.465000","2026-02-24T00:00:09.466000","2026-02-24T00:00:09.467000","2026-02-24T00:00:09.469000","2026-02-24T00:00:09.470000","2026-02-24T00:00:09.471000","2026-02-24T00:00:09.472000","2026-02-24T00:00:09.474000","2026-02-24T00:00:09.475000","2026-02-24T00:00:09.479000","2026-02-24T00:00:09.481000","2026-02-24T00:00:09.482000","2026-02-24T00:00:09.483000","2026-02-24T00:00:09.485000","2026-02-24T00:00:09.486000","2026-02-24T00:00:09.487000","2026-02-24T00:00:09.489000","2026-02-24T00:00:09.491000","2026-02-24T00:00:09.492000","2026-02-24T00:00:09.493000","2026-02-24T00:00:09.494000","2026-02-24T00:00:09.495000","2026-02-24T00:00:09.496000","2026-02-24T00:00:09.497000","2026-02-24T00:00:09.499000","2026-02-24T00:00:09.500000","2026-02-24T00:00:09.501000","2026-02-24T00:00:09.502000","2026-02-24T00:00:09.503000","2026-02-24T00:00:09.504000","2026-02-24T00:00:09.506000","2026-02-24T00:00:09.507000","2026-02-24T00:00:09.508000","2026-02-24T00:00:09.509000","2026-02-24T00:00:09.510000","2026-02-24T00:00:09.512000","2026-02-24T00:00:09.513000","2026-02-24T00:00:09.514000","2026-02-24T00:00:09.515000","2026-02-24T00:00:09.516000","2026-02-24T00:00:09.517000","2026-02-24T00:00:09.519000","2026-02-24T00:00:09.520000","2026-02-24T00:00:09.521000","2026-02-24T00:00:09.522000","2026-02-24T00:00:09.524000","2026-02-24T00:00:09.525000","2026-02-24T00:00:09.526000","2026-02-24T00:00:09.527000","2026-02-24T00:00:09.528000","2026-02-24T00:00:09.529000","2026-02-24T00:00:09.530000","2026-02-24T00:00:09.532000","2026-02-24T00:00:09.533000","2026-02-24T00:00:09.534000","2026-02-24T00:00:09.535000","2026-02-24T00:00:09.536000","2026-02-24T00:00:09.537000","2026-02-24T00:00:09.539000","2026-02-24T00:00:09.540000","2026-02-24T00:00:09.541000","2026-02-24T00:00:09.542000","2026-02-24T00:00:09.543000","2026-02-24T00:00:09.545000","2026-02-24T00:00:09.546000","2026-02-24T00:00:09.547000","2026-02-24T00:00:09.548000","2026-02-24T00:00:09.549000","2026-02-24T00:00:09.551000","2026-02-24T00:00:09.552000","2026-02-24T00:00:09.553000","2026-02-24T00:00:09.554000","2026-02-24T00:00:09.555000","2026-02-24T00:00:09.556000","2026-02-24T00:00:09.558000","2026-02-24T00:00:09.559000","2026-02-24T00:00:09.560000","2026-02-24T00:00:09.561000","2026-02-24T00:00:09.562000","2026-02-24T00:00:09.564000","2026-02-24T00:00:09.565000","2026-02-24T00:00:09.566000","2026-02-24T00:00:09.567000","2026-02-24T00:00:09.569000","2026-02-24T00:00:09.570000","2026-02-24T00:00:09.571000","2026-02-24T00:00:09.572000","2026-02-24T00:00:09.573000","2026-02-24T00:00:09.575000","2026-02-24T00:00:09.576000","2026-02-24T00:00:09.577000","2026-02-24T00:00:09.579000","2026-02-24T00:00:09.580000","2026-02-24T00:00:09.582000","2026-02-24T00:00:09.583000","2026-02-24T00:00:09.585000","2026-02-24T00:00:09.586000","2026-02-24T00:00:09.587000","2026-02-24T00:00:09.588000","2026-02-24T00:00:09.589000","2026-02-24T00:00:09.590000","2026-02-24T00:00:09.592000","2026-02-24T00:00:09.593000","2026-02-24T00:00:09.594000","2026-02-24T00:00:09.595000","2026-02-24T00:00:09.597000","2026-02-24T00:00:09.598000","2026-02-24T00:00:09.599000","2026-02-24T00:00:09.600000","2026-02-24T00:00:09.602000","2026-02-24T00:00:09.603000","2026-02-24T00:00:09.605000","2026-02-24T00:00:09.606000","2026-02-24T00:00:09.608000","2026-02-24T00:00:09.609000","2026-02-24T00:00:09.610000","2026-02-24T00:00:09.611000","2026-02-24T00:00:09.612000","2026-02-24T00:00:09.330000","2026-02-24T00:00:09.331000","2026-02-24T00:00:09.333000","2026-02-24T00:00:09.334000","2026-02-24T00:00:09.336000","2026-02-24T00:00:09.337000","2026-02-24T00:00:09.338000","2026-02-24T00:00:09.340000","2026-02-24T00:00:09.341000","2026-02-24T00:00:09.342000","2026-02-24T00:00:09.344000","2026-02-24T00:00:09.345000","2026-02-24T00:00:09.347000","2026-02-24T00:00:09.348000","2026-02-24T00:00:09.349000","2026-02-24T00:00:09.351000","2026-02-24T00:00:09.352000","2026-02-24T00:00:09.354000","2026-02-24T00:00:09.355000","2026-02-24T00:00:09.356000","2026-02-24T00:00:09.358000","2026-02-24T00:00:09.359000","2026-02-24T00:00:09.360000","2026-02-24T00:00:09.362000","2026-02-24T00:00:09.363000","2026-02-24T00:00:09.365000","2026-02-24T00:00:09.366000","2026-02-24T00:00:09.367000","2026-02-24T00:00:09.369000","2026-02-24T00:00:09.370000","2026-02-24T00:00:09.371000","2026-02-24T00:00:09.373000","2026-02-24T00:00:09.375000","2026-02-24T00:00:09.376000","2026-02-24T00:00:09.377000","2026-02-24T00:00:09.378000","2026-02-24T00:00:09.379000","2026-02-24T00:00:09.381000","2026-02-24T00:00:09.382000","2026-02-24T00:00:09.383000","2026-02-24T00:00:09.385000","2026-02-24T00:00:09.386000","2026-02-24T00:00:09.388000","2026-02-24T00:00:09.389000","2026-02-24T00:00:09.391000","2026-02-24T00:00:09.392000","2026-02-24T00:00:09.397000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.399000","2026-02-24T00:00:09.400000","2026-02-24T00:00:09.401000","2026-02-24T00:00:09.402000","2026-02-24T00:00:09.403000","2026-02-24T00:00:09.405000","2026-02-24T00:00:09.406000","2026-02-24T00:00:09.407000","2026-02-24T00:00:09.408000","2026-02-24T00:00:09.410000","2026-02-24T00:00:09.411000","2026-02-24T00:00:09.412000","2026-02-24T00:00:09.413000","2026-02-24T00:00:09.415000","2026-02-24T00:00:09.416000","2026-02-24T00:00:09.417000","2026-02-24T00:00:09.418000","2026-02-24T00:00:09.420000","2026-02-24T00:00:09.421000","2026-02-24T00:00:09.422000","2026-02-24T00:00:09.423000","2026-02-24T00:00:09.424000","2026-02-24T00:00:09.426000","2026-02-24T00:00:09.427000","2026-02-24T00:00:09.428000","2026-02-24T00:00:09.429000","2026-02-24T00:00:09.431000","2026-02-24T00:00:09.432000","2026-02-24T00:00:09.433000","2026-02-24T00:00:09.434000","2026-02-24T00:00:09.435000","2026-02-24T00:00:09.437000","2026-02-24T00:00:09.438000","2026-02-24T00:00:09.439000","2026-02-24T00:00:09.440000","2026-02-24T00:00:09.442000","2026-02-24T00:00:09.443000","2026-02-24T00:00:09.444000","2026-02-24T00:00:09.445000","2026-02-24T00:00:09.447000","2026-02-24T00:00:09.448000","2026-02-24T00:00:09.449000","2026-02-24T00:00:09.450000","2026-02-24T00:00:09.452000","2026-02-24T00:00:09.453000","2026-02-24T00:00:09.455000","2026-02-24T00:00:09.456000","2026-02-24T00:00:09.458000","2026-02-24T00:00:09.459000","2026-02-24T00:00:09.460000","2026-02-24T00:00:09.461000","2026-02-24T00:00:09.462000","2026-02-24T00:00:09.464000","2026-02-24T00:00:09.465000","2026-02-24T00:00:09.466000","2026-02-24T00:00:09.467000","2026-02-24T00:00:09.469000","2026-02-24T00:00:09.470000","2026-02-24T00:00:09.471000","2026-02-24T00:00:09.473000","2026-02-24T00:00:09.474000","2026-02-24T00:00:09.475000","2026-02-24T00:00:09.479000","2026-02-24T00:00:09.481000","2026-02-24T00:00:09.482000","2026-02-24T00:00:09.483000","2026-02-24T00:00:09.485000","2026-02-24T00:00:09.486000","2026-02-24T00:00:09.488000","2026-02-24T00:00:09.489000","2026-02-24T00:00:09.491000","2026-02-24T00:00:09.492000","2026-02-24T00:00:09.493000","2026-02-24T00:00:09.494000","2026-02-24T00:00:09.495000","2026-02-24T00:00:09.496000","2026-02-24T00:00:09.497000","2026-02-24T00:00:09.499000","2026-02-24T00:00:09.500000","2026-02-24T00:00:09.501000","2026-02-24T00:00:09.502000","2026-02-24T00:00:09.503000","2026-02-24T00:00:09.505000","2026-02-24T00:00:09.506000","2026-02-24T00:00:09.507000","2026-02-24T00:00:09.508000","2026-02-24T00:00:09.509000","2026-02-24T00:00:09.510000","2026-02-24T00:00:09.512000","2026-02-24T00:00:09.513000","2026-02-24T00:00:09.514000","2026-02-24T00:00:09.515000","2026-02-24T00:00:09.516000","2026-02-24T00:00:09.517000","2026-02-24T00:00:09.519000","2026-02-24T00:00:09.520000","2026-02-24T00:00:09.521000","2026-02-24T00:00:09.522000","2026-02-24T00:00:09.524000","2026-02-24T00:00:09.525000","2026-02-24T00:00:09.526000","2026-02-24T00:00:09.527000","2026-02-24T00:00:09.528000","2026-02-24T00:00:09.529000","2026-02-24T00:00:09.530000","2026-02-24T00:00:09.532000","2026-02-24T00:00:09.533000","2026-02-24T00:00:09.534000","2026-02-24T00:00:09.535000","2026-02-24T00:00:09.536000","2026-02-24T00:00:09.537000","2026-02-24T00:00:09.539000","2026-02-24T00:00:09.540000","2026-02-24T00:00:09.541000","2026-02-24T00:00:09.542000","2026-02-24T00:00:09.543000","2026-02-24T00:00:09.545000","2026-02-24T00:00:09.546000","2026-02-24T00:00:09.547000","2026-02-24T00:00:09.548000","2026-02-24T00:00:09.549000","2026-02-24T00:00:09.551000","2026-02-24T00:00:09.552000","2026-02-24T00:00:09.553000","2026-02-24T00:00:09.554000","2026-02-24T00:00:09.555000","2026-02-24T00:00:09.556000","2026-02-24T00:00:09.558000","2026-02-24T00:00:09.559000","2026-02-24T00:00:09.560000","2026-02-24T00:00:09.561000","2026-02-24T00:00:09.562000","2026-02-24T00:00:09.564000","2026-02-24T00:00:09.565000","2026-02-24T00:00:09.566000","2026-02-24T00:00:09.567000","2026-02-24T00:00:09.569000","2026-02-24T00:00:09.570000","2026-02-24T00:00:09.571000","2026-02-24T00:00:09.572000","2026-02-24T00:00:09.573000","2026-02-24T00:00:09.575000","2026-02-24T00:00:09.576000","2026-02-24T00:00:09.577000","2026-02-24T00:00:09.579000","2026-02-24T00:00:09.580000","2026-02-24T00:00:09.582000","2026-02-24T00:00:09.583000","2026-02-24T00:00:09.585000","2026-02-24T00:00:09.586000","2026-02-24T00:00:09.587000","2026-02-24T00:00:09.588000","2026-02-24T00:00:09.589000","2026-02-24T00:00:09.590000","2026-02-24T00:00:09.592000","2026-02-24T00:00:09.593000","2026-02-24T00:00:09.594000","2026-02-24T00:00:09.595000","2026-02-24T00:00:09.597000","2026-02-24T00:00:09.598000","2026-02-24T00:00:09.599000","2026-02-24T00:00:09.600000","2026-02-24T00:00:09.602000","2026-02-24T00:00:09.603000","2026-02-24T00:00:09.605000","2026-02-24T00:00:09.606000","2026-02-24T00:00:09.608000","2026-02-24T00:00:09.609000","2026-02-24T00:00:09.610000","2026-02-24T00:00:09.611000","2026-02-24T00:00:09.612000","2026-02-24T00:00:09.614000","2026-02-24T00:00:09.616000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.619000","2026-02-24T00:00:09.620000","2026-02-24T00:00:09.621000","2026-02-24T00:00:09.623000","2026-02-24T00:00:09.624000","2026-02-24T00:00:09.625000","2026-02-24T00:00:09.626000","2026-02-24T00:00:09.628000","2026-02-24T00:00:09.629000","2026-02-24T00:00:09.630000","2026-02-24T00:00:09.631000","2026-02-24T00:00:09.632000","2026-02-24T00:00:09.634000","2026-02-24T00:00:09.635000","2026-02-24T00:00:09.636000","2026-02-24T00:00:09.637000","2026-02-24T00:00:09.638000","2026-02-24T00:00:09.640000","2026-02-24T00:00:09.641000","2026-02-24T00:00:09.642000","2026-02-24T00:00:09.643000","2026-02-24T00:00:09.644000","2026-02-24T00:00:09.646000","2026-02-24T00:00:09.647000","2026-02-24T00:00:09.648000","2026-02-24T00:00:09.649000","2026-02-24T00:00:09.651000","2026-02-24T00:00:09.652000","2026-02-24T00:00:09.653000","2026-02-24T00:00:09.654000","2026-02-24T00:00:09.655000","2026-02-24T00:00:09.657000","2026-02-24T00:00:09.658000","2026-02-24T00:00:09.659000","2026-02-24T00:00:09.660000","2026-02-24T00:00:09.662000","2026-02-24T00:00:09.663000","2026-02-24T00:00:09.664000","2026-02-24T00:00:09.665000","2026-02-24T00:00:09.667000","2026-02-24T00:00:09.668000","2026-02-24T00:00:09.669000","2026-02-24T00:00:09.670000","2026-02-24T00:00:09.671000","2026-02-24T00:00:09.673000","2026-02-24T00:00:09.674000","2026-02-24T00:00:09.675000","2026-02-24T00:00:09.676000","2026-02-24T00:00:09.678000","2026-02-24T00:00:09.679000","2026-02-24T00:00:09.680000","2026-02-24T00:00:09.681000","2026-02-24T00:00:09.682000","2026-02-24T00:00:09.684000","2026-02-24T00:00:09.685000","2026-02-24T00:00:09.686000","2026-02-24T00:00:09.688000","2026-02-24T00:00:09.689000","2026-02-24T00:00:09.690000","2026-02-24T00:00:09.691000","2026-02-24T00:00:09.692000","2026-02-24T00:00:09.693000","2026-02-24T00:00:09.695000","2026-02-24T00:00:09.696000","2026-02-24T00:00:09.697000","2026-02-24T00:00:09.698000","2026-02-24T00:00:09.700000","2026-02-24T00:00:09.701000","2026-02-24T00:00:09.702000","2026-02-24T00:00:09.703000","2026-02-24T00:00:09.705000","2026-02-24T00:00:09.706000","2026-02-24T00:00:09.707000","2026-02-24T00:00:09.708000","2026-02-24T00:00:09.710000","2026-02-24T00:00:09.711000","2026-02-24T00:00:09.712000","2026-02-24T00:00:09.713000","2026-02-24T00:00:09.714000","2026-02-24T00:00:09.716000","2026-02-24T00:00:09.717000","2026-02-24T00:00:09.718000","2026-02-24T00:00:09.720000","2026-02-24T00:00:09.721000","2026-02-24T00:00:09.723000","2026-02-24T00:00:09.730000","2026-02-24T00:00:09.732000","2026-02-24T00:00:09.733000","2026-02-24T00:00:09.735000","2026-02-24T00:00:09.736000","2026-02-24T00:00:09.737000","2026-02-24T00:00:09.738000","2026-02-24T00:00:09.739000","2026-02-24T00:00:09.741000","2026-02-24T00:00:09.742000","2026-02-24T00:00:09.743000","2026-02-24T00:00:09.745000","2026-02-24T00:00:09.746000","2026-02-24T00:00:09.747000","2026-02-24T00:00:09.748000","2026-02-24T00:00:09.750000","2026-02-24T00:00:09.751000","2026-02-24T00:00:09.752000","2026-02-24T00:00:09.753000","2026-02-24T00:00:09.755000","2026-02-24T00:00:09.756000","2026-02-24T00:00:09.758000","2026-02-24T00:00:09.759000","2026-02-24T00:00:09.760000","2026-02-24T00:00:09.761000","2026-02-24T00:00:09.762000","2026-02-24T00:00:09.764000","2026-02-24T00:00:09.765000","2026-02-24T00:00:09.766000","2026-02-24T00:00:09.768000","2026-02-24T00:00:09.769000","2026-02-24T00:00:09.770000","2026-02-24T00:00:09.771000","2026-02-24T00:00:09.773000","2026-02-24T00:00:09.774000","2026-02-24T00:00:09.775000","2026-02-24T00:00:09.776000","2026-02-24T00:00:09.778000","2026-02-24T00:00:09.377000","2026-02-24T00:00:09.378000","2026-02-24T00:00:09.379000","2026-02-24T00:00:09.380000","2026-02-24T00:00:09.382000","2026-02-24T00:00:09.383000","2026-02-24T00:00:09.385000","2026-02-24T00:00:09.386000","2026-02-24T00:00:09.388000","2026-02-24T00:00:09.389000","2026-02-24T00:00:09.391000","2026-02-24T00:00:09.392000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.398000","2026-02-24T00:00:09.400000","2026-02-24T00:00:09.401000","2026-02-24T00:00:09.402000","2026-02-24T00:00:09.403000","2026-02-24T00:00:09.405000","2026-02-24T00:00:09.406000","2026-02-24T00:00:09.407000","2026-02-24T00:00:09.408000","2026-02-24T00:00:09.410000","2026-02-24T00:00:09.411000","2026-02-24T00:00:09.412000","2026-02-24T00:00:09.413000","2026-02-24T00:00:09.415000","2026-02-24T00:00:09.416000","2026-02-24T00:00:09.417000","2026-02-24T00:00:09.418000","2026-02-24T00:00:09.419000","2026-02-24T00:00:09.421000","2026-02-24T00:00:09.422000","2026-02-24T00:00:09.423000","2026-02-24T00:00:09.424000","2026-02-24T00:00:09.426000","2026-02-24T00:00:09.427000","2026-02-24T00:00:09.428000","2026-02-24T00:00:09.429000","2026-02-24T00:00:09.431000","2026-02-24T00:00:09.432000","2026-02-24T00:00:09.433000","2026-02-24T00:00:09.434000","2026-02-24T00:00:09.435000","2026-02-24T00:00:09.437000","2026-02-24T00:00:09.438000","2026-02-24T00:00:09.439000","2026-02-24T00:00:09.440000","2026-02-24T00:00:09.441000","2026-02-24T00:00:09.443000","2026-02-24T00:00:09.444000","2026-02-24T00:00:09.445000","2026-02-24T00:00:09.447000","2026-02-24T00:00:09.448000","2026-02-24T00:00:09.449000","2026-02-24T00:00:09.450000","2026-02-24T00:00:09.452000","2026-02-24T00:00:09.453000","2026-02-24T00:00:09.455000","2026-02-24T00:00:09.456000","2026-02-24T00:00:09.458000","2026-02-24T00:00:09.459000","2026-02-24T00:00:09.460000","2026-02-24T00:00:09.461000","2026-02-24T00:00:09.462000","2026-02-24T00:00:09.464000","2026-02-24T00:00:09.465000","2026-02-24T00:00:09.466000","2026-02-24T00:00:09.467000","2026-02-24T00:00:09.469000","2026-02-24T00:00:09.470000","2026-02-24T00:00:09.471000","2026-02-24T00:00:09.473000","2026-02-24T00:00:09.474000","2026-02-24T00:00:09.475000","2026-02-24T00:00:09.479000","2026-02-24T00:00:09.481000","2026-02-24T00:00:09.482000","2026-02-24T00:00:09.483000","2026-02-24T00:00:09.484000","2026-02-24T00:00:09.486000","2026-02-24T00:00:09.487000","2026-02-24T00:00:09.489000","2026-02-24T00:00:09.491000","2026-02-24T00:00:09.492000","2026-02-24T00:00:09.493000","2026-02-24T00:00:09.494000","2026-02-24T00:00:09.495000","2026-02-24T00:00:09.496000","2026-02-24T00:00:09.497000","2026-02-24T00:00:09.499000","2026-02-24T00:00:09.500000","2026-02-24T00:00:09.501000","2026-02-24T00:00:09.502000","2026-02-24T00:00:09.503000","2026-02-24T00:00:09.504000","2026-02-24T00:00:09.506000","2026-02-24T00:00:09.507000","2026-02-24T00:00:09.508000","2026-02-24T00:00:09.509000","2026-02-24T00:00:09.510000","2026-02-24T00:00:09.512000","2026-02-24T00:00:09.513000","2026-02-24T00:00:09.514000","2026-02-24T00:00:09.515000","2026-02-24T00:00:09.516000","2026-02-24T00:00:09.517000","2026-02-24T00:00:09.519000","2026-02-24T00:00:09.520000","2026-02-24T00:00:09.521000","2026-02-24T00:00:09.522000","2026-02-24T00:00:09.524000","2026-02-24T00:00:09.525000","2026-02-24T00:00:09.526000","2026-02-24T00:00:09.527000","2026-02-24T00:00:09.528000","2026-02-24T00:00:09.529000","2026-02-24T00:00:09.530000","2026-02-24T00:00:09.532000","2026-02-24T00:00:09.533000","2026-02-24T00:00:09.534000","2026-02-24T00:00:09.535000","2026-02-24T00:00:09.536000","2026-02-24T00:00:09.537000","2026-02-24T00:00:09.539000","2026-02-24T00:00:09.540000","2026-02-24T00:00:09.541000","2026-02-24T00:00:09.542000","2026-02-24T00:00:09.543000","2026-02-24T00:00:09.545000","2026-02-24T00:00:09.546000","2026-02-24T00:00:09.547000","2026-02-24T00:00:09.548000","2026-02-24T00:00:09.549000","2026-02-24T00:00:09.551000","2026-02-24T00:00:09.552000","2026-02-24T00:00:09.553000","2026-02-24T00:00:09.554000","2026-02-24T00:00:09.555000","2026-02-24T00:00:09.556000","2026-02-24T00:00:09.558000","2026-02-24T00:00:09.559000","2026-02-24T00:00:09.560000","2026-02-24T00:00:09.561000","2026-02-24T00:00:09.562000","2026-02-24T00:00:09.564000","2026-02-24T00:00:09.565000","2026-02-24T00:00:09.566000","2026-02-24T00:00:09.567000","2026-02-24T00:00:09.569000","2026-02-24T00:00:09.570000","2026-02-24T00:00:09.571000","2026-02-24T00:00:09.572000","2026-02-24T00:00:09.573000","2026-02-24T00:00:09.575000","2026-02-24T00:00:09.576000","2026-02-24T00:00:09.577000","2026-02-24T00:00:09.579000","2026-02-24T00:00:09.580000","2026-02-24T00:00:09.582000","2026-02-24T00:00:09.583000","2026-02-24T00:00:09.585000","2026-02-24T00:00:09.586000","2026-02-24T00:00:09.587000","2026-02-24T00:00:09.588000","2026-02-24T00:00:09.589000","2026-02-24T00:00:09.590000","2026-02-24T00:00:09.592000","2026-02-24T00:00:09.593000","2026-02-24T00:00:09.594000","2026-02-24T00:00:09.595000","2026-02-24T00:00:09.597000","2026-02-24T00:00:09.598000","2026-02-24T00:00:09.599000","2026-02-24T00:00:09.600000","2026-02-24T00:00:09.601000","2026-02-24T00:00:09.603000","2026-02-24T00:00:09.605000","2026-02-24T00:00:09.606000","2026-02-24T00:00:09.608000","2026-02-24T00:00:09.609000","2026-02-24T00:00:09.610000","2026-02-24T00:00:09.611000","2026-02-24T00:00:09.612000","2026-02-24T00:00:09.614000","2026-02-24T00:00:09.616000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.619000","2026-02-24T00:00:09.620000","2026-02-24T00:00:09.621000","2026-02-24T00:00:09.623000","2026-02-24T00:00:09.624000","2026-02-24T00:00:09.625000","2026-02-24T00:00:09.626000","2026-02-24T00:00:09.628000","2026-02-24T00:00:09.629000","2026-02-24T00:00:09.630000","2026-02-24T00:00:09.631000","2026-02-24T00:00:09.632000","2026-02-24T00:00:09.634000","2026-02-24T00:00:09.635000","2026-02-24T00:00:09.636000","2026-02-24T00:00:09.637000","2026-02-24T00:00:09.638000","2026-02-24T00:00:09.640000","2026-02-24T00:00:09.641000","2026-02-24T00:00:09.642000","2026-02-24T00:00:09.643000","2026-02-24T00:00:09.644000","2026-02-24T00:00:09.646000","2026-02-24T00:00:09.647000","2026-02-24T00:00:09.648000","2026-02-24T00:00:09.649000","2026-02-24T00:00:09.651000","2026-02-24T00:00:09.652000","2026-02-24T00:00:09.653000","2026-02-24T00:00:09.654000","2026-02-24T00:00:09.655000","2026-02-24T00:00:09.657000","2026-02-24T00:00:09.658000","2026-02-24T00:00:09.659000","2026-02-24T00:00:09.660000","2026-02-24T00:00:09.662000","2026-02-24T00:00:09.663000","2026-02-24T00:00:09.664000","2026-02-24T00:00:09.665000","2026-02-24T00:00:09.666000","2026-02-24T00:00:09.668000","2026-02-24T00:00:09.669000","2026-02-24T00:00:09.670000","2026-02-24T00:00:09.671000","2026-02-24T00:00:09.673000","2026-02-24T00:00:09.674000","2026-02-24T00:00:09.675000","2026-02-24T00:00:09.676000","2026-02-24T00:00:09.678000","2026-02-24T00:00:09.679000","2026-02-24T00:00:09.680000","2026-02-24T00:00:09.681000","2026-02-24T00:00:09.682000","2026-02-24T00:00:09.684000","2026-02-24T00:00:09.685000","2026-02-24T00:00:09.686000","2026-02-24T00:00:09.688000","2026-02-24T00:00:09.689000","2026-02-24T00:00:09.690000","2026-02-24T00:00:09.691000","2026-02-24T00:00:09.692000","2026-02-24T00:00:09.693000","2026-02-24T00:00:09.695000","2026-02-24T00:00:09.696000","2026-02-24T00:00:09.697000","2026-02-24T00:00:09.698000","2026-02-24T00:00:09.700000","2026-02-24T00:00:09.701000","2026-02-24T00:00:09.702000","2026-02-24T00:00:09.703000","2026-02-24T00:00:09.705000","2026-02-24T00:00:09.706000","2026-02-24T00:00:09.707000","2026-02-24T00:00:09.708000","2026-02-24T00:00:09.710000","2026-02-24T00:00:09.711000","2026-02-24T00:00:09.712000","2026-02-24T00:00:09.713000","2026-02-24T00:00:09.714000","2026-02-24T00:00:09.716000","2026-02-24T00:00:09.717000","2026-02-24T00:00:09.718000","2026-02-24T00:00:09.720000","2026-02-24T00:00:09.721000","2026-02-24T00:00:09.722000","2026-02-24T00:00:09.730000","2026-02-24T00:00:09.732000","2026-02-24T00:00:09.733000","2026-02-24T00:00:09.735000","2026-02-24T00:00:09.736000","2026-02-24T00:00:09.737000","2026-02-24T00:00:09.738000","2026-02-24T00:00:09.739000","2026-02-24T00:00:09.741000","2026-02-24T00:00:09.742000","2026-02-24T00:00:09.743000","2026-02-24T00:00:09.744000","2026-02-24T00:00:09.746000","2026-02-24T00:00:09.747000","2026-02-24T00:00:09.748000","2026-02-24T00:00:09.750000","2026-02-24T00:00:09.751000","2026-02-24T00:00:09.752000","2026-02-24T00:00:09.753000","2026-02-24T00:00:09.755000","2026-02-24T00:00:09.756000","2026-02-24T00:00:09.758000","2026-02-24T00:00:09.759000","2026-02-24T00:00:09.760000","2026-02-24T00:00:09.761000","2026-02-24T00:00:09.762000","2026-02-24T00:00:09.764000","2026-02-24T00:00:09.765000","2026-02-24T00:00:09.766000","2026-02-24T00:00:09.768000","2026-02-24T00:00:09.769000","2026-02-24T00:00:09.770000","2026-02-24T00:00:09.771000","2026-02-24T00:00:09.773000","2026-02-24T00:00:09.774000","2026-02-24T00:00:09.775000","2026-02-24T00:00:09.776000","2026-02-24T00:00:09.778000","2026-02-24T00:00:09.786000","2026-02-24T00:00:09.787000","2026-02-24T00:00:09.788000","2026-02-24T00:00:09.789000","2026-02-24T00:00:09.790000","2026-02-24T00:00:09.791000","2026-02-24T00:00:09.793000","2026-02-24T00:00:09.794000","2026-02-24T00:00:09.795000","2026-02-24T00:00:09.797000","2026-02-24T00:00:09.798000","2026-02-24T00:00:09.799000","2026-02-24T00:00:09.801000","2026-02-24T00:00:09.802000","2026-02-24T00:00:09.803000","2026-02-24T00:00:09.804000","2026-02-24T00:00:09.805000","2026-02-24T00:00:09.807000","2026-02-24T00:00:09.808000","2026-02-24T00:00:09.809000","2026-02-24T00:00:09.811000","2026-02-24T00:00:09.812000","2026-02-24T00:00:09.813000","2026-02-24T00:00:09.815000","2026-02-24T00:00:09.816000","2026-02-24T00:00:09.817000","2026-02-24T00:00:09.819000","2026-02-24T00:00:09.820000","2026-02-24T00:00:09.821000","2026-02-24T00:00:09.822000","2026-02-24T00:00:09.823000","2026-02-24T00:00:09.825000","2026-02-24T00:00:09.826000","2026-02-24T00:00:09.827000","2026-02-24T00:00:09.828000","2026-02-24T00:00:09.830000","2026-02-24T00:00:09.831000","2026-02-24T00:00:09.832000","2026-02-24T00:00:09.833000","2026-02-24T00:00:09.835000","2026-02-24T00:00:09.836000","2026-02-24T00:00:09.837000","2026-02-24T00:00:09.839000","2026-02-24T00:00:09.840000","2026-02-24T00:00:09.841000","2026-02-24T00:00:09.843000","2026-02-24T00:00:09.844000","2026-02-24T00:00:09.845000","2026-02-24T00:00:09.846000","2026-02-24T00:00:09.848000","2026-02-24T00:00:09.849000","2026-02-24T00:00:09.850000","2026-02-24T00:00:09.851000","2026-02-24T00:00:09.853000","2026-02-24T00:00:09.854000","2026-02-24T00:00:09.855000","2026-02-24T00:00:09.857000","2026-02-24T00:00:09.858000","2026-02-24T00:00:09.860000","2026-02-24T00:00:09.861000","2026-02-24T00:00:09.862000","2026-02-24T00:00:09.863000","2026-02-24T00:00:09.864000","2026-02-24T00:00:09.866000","2026-02-24T00:00:09.867000","2026-02-24T00:00:09.868000","2026-02-24T00:00:09.869000","2026-02-24T00:00:09.871000","2026-02-24T00:00:09.872000","2026-02-24T00:00:09.873000","2026-02-24T00:00:09.874000","2026-02-24T00:00:09.876000","2026-02-24T00:00:09.877000","2026-02-24T00:00:09.878000","2026-02-24T00:00:09.880000","2026-02-24T00:00:09.881000","2026-02-24T00:00:09.882000","2026-02-24T00:00:09.884000","2026-02-24T00:00:09.885000","2026-02-24T00:00:09.886000","2026-02-24T00:00:09.887000","2026-02-24T00:00:09.888000","2026-02-24T00:00:09.890000","2026-02-24T00:00:09.891000","2026-02-24T00:00:09.892000","2026-02-24T00:00:09.894000","2026-02-24T00:00:09.895000","2026-02-24T00:00:09.896000","2026-02-24T00:00:09.897000","2026-02-24T00:00:09.899000","2026-02-24T00:00:09.900000","2026-02-24T00:00:09.901000","2026-02-24T00:00:09.903000","2026-02-24T00:00:09.904000","2026-02-24T00:00:09.905000","2026-02-24T00:00:09.907000","2026-02-24T00:00:09.908000","2026-02-24T00:00:09.910000","2026-02-24T00:00:09.913000","2026-02-24T00:00:09.914000","2026-02-24T00:00:09.915000","2026-02-24T00:00:09.916000","2026-02-24T00:00:09.917000","2026-02-24T00:00:09.921000","2026-02-24T00:00:09.922000","2026-02-24T00:00:09.923000","2026-02-24T00:00:09.924000","2026-02-24T00:00:09.925000","2026-02-24T00:00:09.927000","2026-02-24T00:00:09.928000","2026-02-24T00:00:09.929000","2026-02-24T00:00:09.931000","2026-02-24T00:00:09.932000","2026-02-24T00:00:09.933000","2026-02-24T00:00:09.935000","2026-02-24T00:00:09.936000","2026-02-24T00:00:09.937000","2026-02-24T00:00:09.939000","2026-02-24T00:00:09.940000","2026-02-24T00:00:09.941000","2026-02-24T00:00:09.943000","2026-02-24T00:00:09.944000","2026-02-24T00:00:09.945000","2026-02-24T00:00:09.947000","2026-02-24T00:00:09.948000","2026-02-24T00:00:09.949000","2026-02-24T00:00:09.951000","2026-02-24T00:00:09.952000","2026-02-24T00:00:09.953000","2026-02-24T00:00:09.955000","2026-02-24T00:00:09.956000","2026-02-24T00:00:09.957000","2026-02-24T00:00:09.958000","2026-02-24T00:00:09.960000","2026-02-24T00:00:09.961000","2026-02-24T00:00:09.963000","2026-02-24T00:00:09.964000","2026-02-24T00:00:09.965000","2026-02-24T00:00:09.966000","2026-02-24T00:00:09.968000","2026-02-24T00:00:09.969000","2026-02-24T00:00:09.970000","2026-02-24T00:00:09.972000","2026-02-24T00:00:09.973000","2026-02-24T00:00:09.974000","2026-02-24T00:00:09.976000","2026-02-24T00:00:09.977000","2026-02-24T00:00:09.978000","2026-02-24T00:00:09.980000","2026-02-24T00:00:09.981000","2026-02-24T00:00:09.982000","2026-02-24T00:00:09.984000","2026-02-24T00:00:09.985000","2026-02-24T00:00:09.986000","2026-02-24T00:00:09.988000","2026-02-24T00:00:09.989000","2026-02-24T00:00:09.990000","2026-02-24T00:00:09.992000","2026-02-24T00:00:09.993000","2026-02-24T00:00:09.994000","2026-02-24T00:00:09.996000","2026-02-24T00:00:09.997000","2026-02-24T00:00:09.998000","2026-02-24T00:00:10.000000","2026-02-24T00:00:10.001000","2026-02-24T00:00:10.002000","2026-02-24T00:00:10.004000","2026-02-24T00:00:10.005000","2026-02-24T00:00:10.012000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.016000","2026-02-24T00:00:10.017000","2026-02-24T00:00:10.018000","2026-02-24T00:00:10.020000","2026-02-24T00:00:10.021000","2026-02-24T00:00:10.022000","2026-02-24T00:00:10.024000","2026-02-24T00:00:10.025000","2026-02-24T00:00:10.026000","2026-02-24T00:00:10.028000","2026-02-24T00:00:10.029000","2026-02-24T00:00:10.030000","2026-02-24T00:00:10.032000","2026-02-24T00:00:09.481000","2026-02-24T00:00:09.482000","2026-02-24T00:00:09.483000","2026-02-24T00:00:09.484000","2026-02-24T00:00:09.486000","2026-02-24T00:00:09.487000","2026-02-24T00:00:09.489000","2026-02-24T00:00:09.491000","2026-02-24T00:00:09.492000","2026-02-24T00:00:09.493000","2026-02-24T00:00:09.494000","2026-02-24T00:00:09.495000","2026-02-24T00:00:09.496000","2026-02-24T00:00:09.497000","2026-02-24T00:00:09.498000","2026-02-24T00:00:09.500000","2026-02-24T00:00:09.501000","2026-02-24T00:00:09.502000","2026-02-24T00:00:09.503000","2026-02-24T00:00:09.504000","2026-02-24T00:00:09.506000","2026-02-24T00:00:09.507000","2026-02-24T00:00:09.508000","2026-02-24T00:00:09.509000","2026-02-24T00:00:09.510000","2026-02-24T00:00:09.511000","2026-02-24T00:00:09.513000","2026-02-24T00:00:09.514000","2026-02-24T00:00:09.515000","2026-02-24T00:00:09.516000","2026-02-24T00:00:09.517000","2026-02-24T00:00:09.519000","2026-02-24T00:00:09.520000","2026-02-24T00:00:09.521000","2026-02-24T00:00:09.522000","2026-02-24T00:00:09.523000","2026-02-24T00:00:09.525000","2026-02-24T00:00:09.526000","2026-02-24T00:00:09.527000","2026-02-24T00:00:09.528000","2026-02-24T00:00:09.529000","2026-02-24T00:00:09.530000","2026-02-24T00:00:09.532000","2026-02-24T00:00:09.533000","2026-02-24T00:00:09.534000","2026-02-24T00:00:09.535000","2026-02-24T00:00:09.536000","2026-02-24T00:00:09.537000","2026-02-24T00:00:09.539000","2026-02-24T00:00:09.540000","2026-02-24T00:00:09.541000","2026-02-24T00:00:09.542000","2026-02-24T00:00:09.543000","2026-02-24T00:00:09.545000","2026-02-24T00:00:09.546000","2026-02-24T00:00:09.547000","2026-02-24T00:00:09.548000","2026-02-24T00:00:09.549000","2026-02-24T00:00:09.550000","2026-02-24T00:00:09.552000","2026-02-24T00:00:09.553000","2026-02-24T00:00:09.554000","2026-02-24T00:00:09.555000","2026-02-24T00:00:09.556000","2026-02-24T00:00:09.557000","2026-02-24T00:00:09.559000","2026-02-24T00:00:09.560000","2026-02-24T00:00:09.561000","2026-02-24T00:00:09.562000","2026-02-24T00:00:09.564000","2026-02-24T00:00:09.565000","2026-02-24T00:00:09.566000","2026-02-24T00:00:09.567000","2026-02-24T00:00:09.569000","2026-02-24T00:00:09.570000","2026-02-24T00:00:09.571000","2026-02-24T00:00:09.572000","2026-02-24T00:00:09.573000","2026-02-24T00:00:09.574000","2026-02-24T00:00:09.576000","2026-02-24T00:00:09.577000","2026-02-24T00:00:09.579000","2026-02-24T00:00:09.580000","2026-02-24T00:00:09.581000","2026-02-24T00:00:09.583000","2026-02-24T00:00:09.585000","2026-02-24T00:00:09.586000","2026-02-24T00:00:09.587000","2026-02-24T00:00:09.588000","2026-02-24T00:00:09.589000","2026-02-24T00:00:09.590000","2026-02-24T00:00:09.591000","2026-02-24T00:00:09.593000","2026-02-24T00:00:09.594000","2026-02-24T00:00:09.595000","2026-02-24T00:00:09.597000","2026-02-24T00:00:09.598000","2026-02-24T00:00:09.599000","2026-02-24T00:00:09.600000","2026-02-24T00:00:09.601000","2026-02-24T00:00:09.603000","2026-02-24T00:00:09.605000","2026-02-24T00:00:09.606000","2026-02-24T00:00:09.608000","2026-02-24T00:00:09.609000","2026-02-24T00:00:09.610000","2026-02-24T00:00:09.611000","2026-02-24T00:00:09.612000","2026-02-24T00:00:09.614000","2026-02-24T00:00:09.616000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.619000","2026-02-24T00:00:09.620000","2026-02-24T00:00:09.621000","2026-02-24T00:00:09.623000","2026-02-24T00:00:09.624000","2026-02-24T00:00:09.625000","2026-02-24T00:00:09.626000","2026-02-24T00:00:09.628000","2026-02-24T00:00:09.629000","2026-02-24T00:00:09.630000","2026-02-24T00:00:09.631000","2026-02-24T00:00:09.632000","2026-02-24T00:00:09.634000","2026-02-24T00:00:09.635000","2026-02-24T00:00:09.636000","2026-02-24T00:00:09.637000","2026-02-24T00:00:09.638000","2026-02-24T00:00:09.640000","2026-02-24T00:00:09.641000","2026-02-24T00:00:09.642000","2026-02-24T00:00:09.643000","2026-02-24T00:00:09.644000","2026-02-24T00:00:09.646000","2026-02-24T00:00:09.647000","2026-02-24T00:00:09.648000","2026-02-24T00:00:09.649000","2026-02-24T00:00:09.651000","2026-02-24T00:00:09.652000","2026-02-24T00:00:09.653000","2026-02-24T00:00:09.654000","2026-02-24T00:00:09.656000","2026-02-24T00:00:09.657000","2026-02-24T00:00:09.658000","2026-02-24T00:00:09.659000","2026-02-24T00:00:09.660000","2026-02-24T00:00:09.662000","2026-02-24T00:00:09.663000","2026-02-24T00:00:09.664000","2026-02-24T00:00:09.665000","2026-02-24T00:00:09.667000","2026-02-24T00:00:09.668000","2026-02-24T00:00:09.669000","2026-02-24T00:00:09.670000","2026-02-24T00:00:09.671000","2026-02-24T00:00:09.673000","2026-02-24T00:00:09.674000","2026-02-24T00:00:09.675000","2026-02-24T00:00:09.676000","2026-02-24T00:00:09.678000","2026-02-24T00:00:09.679000","2026-02-24T00:00:09.680000","2026-02-24T00:00:09.681000","2026-02-24T00:00:09.682000","2026-02-24T00:00:09.684000","2026-02-24T00:00:09.685000","2026-02-24T00:00:09.686000","2026-02-24T00:00:09.688000","2026-02-24T00:00:09.689000","2026-02-24T00:00:09.690000","2026-02-24T00:00:09.691000","2026-02-24T00:00:09.692000","2026-02-24T00:00:09.694000","2026-02-24T00:00:09.695000","2026-02-24T00:00:09.696000","2026-02-24T00:00:09.697000","2026-02-24T00:00:09.698000","2026-02-24T00:00:09.700000","2026-02-24T00:00:09.701000","2026-02-24T00:00:09.702000","2026-02-24T00:00:09.703000","2026-02-24T00:00:09.705000","2026-02-24T00:00:09.706000","2026-02-24T00:00:09.707000","2026-02-24T00:00:09.708000","2026-02-24T00:00:09.710000","2026-02-24T00:00:09.711000","2026-02-24T00:00:09.712000","2026-02-24T00:00:09.713000","2026-02-24T00:00:09.714000","2026-02-24T00:00:09.716000","2026-02-24T00:00:09.717000","2026-02-24T00:00:09.718000","2026-02-24T00:00:09.720000","2026-02-24T00:00:09.721000","2026-02-24T00:00:09.723000","2026-02-24T00:00:09.730000","2026-02-24T00:00:09.732000","2026-02-24T00:00:09.733000","2026-02-24T00:00:09.735000","2026-02-24T00:00:09.736000","2026-02-24T00:00:09.737000","2026-02-24T00:00:09.738000","2026-02-24T00:00:09.739000","2026-02-24T00:00:09.741000","2026-02-24T00:00:09.742000","2026-02-24T00:00:09.743000","2026-02-24T00:00:09.745000","2026-02-24T00:00:09.746000","2026-02-24T00:00:09.747000","2026-02-24T00:00:09.748000","2026-02-24T00:00:09.750000","2026-02-24T00:00:09.751000","2026-02-24T00:00:09.752000","2026-02-24T00:00:09.753000","2026-02-24T00:00:09.755000","2026-02-24T00:00:09.756000","2026-02-24T00:00:09.758000","2026-02-24T00:00:09.759000","2026-02-24T00:00:09.760000","2026-02-24T00:00:09.761000","2026-02-24T00:00:09.762000","2026-02-24T00:00:09.764000","2026-02-24T00:00:09.765000","2026-02-24T00:00:09.766000","2026-02-24T00:00:09.768000","2026-02-24T00:00:09.769000","2026-02-24T00:00:09.770000","2026-02-24T00:00:09.771000","2026-02-24T00:00:09.773000","2026-02-24T00:00:09.774000","2026-02-24T00:00:09.775000","2026-02-24T00:00:09.776000","2026-02-24T00:00:09.778000","2026-02-24T00:00:09.786000","2026-02-24T00:00:09.787000","2026-02-24T00:00:09.788000","2026-02-24T00:00:09.789000","2026-02-24T00:00:09.790000","2026-02-24T00:00:09.791000","2026-02-24T00:00:09.793000","2026-02-24T00:00:09.794000","2026-02-24T00:00:09.795000","2026-02-24T00:00:09.797000","2026-02-24T00:00:09.798000","2026-02-24T00:00:09.799000","2026-02-24T00:00:09.801000","2026-02-24T00:00:09.802000","2026-02-24T00:00:09.803000","2026-02-24T00:00:09.804000","2026-02-24T00:00:09.805000","2026-02-24T00:00:09.807000","2026-02-24T00:00:09.808000","2026-02-24T00:00:09.809000","2026-02-24T00:00:09.811000","2026-02-24T00:00:09.812000","2026-02-24T00:00:09.813000","2026-02-24T00:00:09.815000","2026-02-24T00:00:09.816000","2026-02-24T00:00:09.817000","2026-02-24T00:00:09.819000","2026-02-24T00:00:09.820000","2026-02-24T00:00:09.821000","2026-02-24T00:00:09.822000","2026-02-24T00:00:09.823000","2026-02-24T00:00:09.825000","2026-02-24T00:00:09.826000","2026-02-24T00:00:09.827000","2026-02-24T00:00:09.828000","2026-02-24T00:00:09.830000","2026-02-24T00:00:09.831000","2026-02-24T00:00:09.832000","2026-02-24T00:00:09.833000","2026-02-24T00:00:09.835000","2026-02-24T00:00:09.836000","2026-02-24T00:00:09.837000","2026-02-24T00:00:09.839000","2026-02-24T00:00:09.840000","2026-02-24T00:00:09.841000","2026-02-24T00:00:09.843000","2026-02-24T00:00:09.844000","2026-02-24T00:00:09.845000","2026-02-24T00:00:09.846000","2026-02-24T00:00:09.848000","2026-02-24T00:00:09.849000","2026-02-24T00:00:09.850000","2026-02-24T00:00:09.851000","2026-02-24T00:00:09.853000","2026-02-24T00:00:09.854000","2026-02-24T00:00:09.855000","2026-02-24T00:00:09.857000","2026-02-24T00:00:09.858000","2026-02-24T00:00:09.860000","2026-02-24T00:00:09.861000","2026-02-24T00:00:09.862000","2026-02-24T00:00:09.863000","2026-02-24T00:00:09.864000","2026-02-24T00:00:09.866000","2026-02-24T00:00:09.867000","2026-02-24T00:00:09.868000","2026-02-24T00:00:09.869000","2026-02-24T00:00:09.871000","2026-02-24T00:00:09.872000","2026-02-24T00:00:09.873000","2026-02-24T00:00:09.874000","2026-02-24T00:00:09.876000","2026-02-24T00:00:09.877000","2026-02-24T00:00:09.878000","2026-02-24T00:00:09.880000","2026-02-24T00:00:09.881000","2026-02-24T00:00:09.882000","2026-02-24T00:00:09.884000","2026-02-24T00:00:09.885000","2026-02-24T00:00:09.886000","2026-02-24T00:00:09.887000","2026-02-24T00:00:09.888000","2026-02-24T00:00:09.890000","2026-02-24T00:00:09.891000","2026-02-24T00:00:09.892000","2026-02-24T00:00:09.894000","2026-02-24T00:00:09.895000","2026-02-24T00:00:09.896000","2026-02-24T00:00:09.898000","2026-02-24T00:00:09.899000","2026-02-24T00:00:09.900000","2026-02-24T00:00:09.901000","2026-02-24T00:00:09.903000","2026-02-24T00:00:09.904000","2026-02-24T00:00:09.905000","2026-02-24T00:00:09.907000","2026-02-24T00:00:09.908000","2026-02-24T00:00:09.910000","2026-02-24T00:00:09.913000","2026-02-24T00:00:09.914000","2026-02-24T00:00:09.915000","2026-02-24T00:00:09.916000","2026-02-24T00:00:09.917000","2026-02-24T00:00:09.921000","2026-02-24T00:00:09.922000","2026-02-24T00:00:09.923000","2026-02-24T00:00:09.924000","2026-02-24T00:00:09.925000","2026-02-24T00:00:09.927000","2026-02-24T00:00:09.928000","2026-02-24T00:00:09.929000","2026-02-24T00:00:09.931000","2026-02-24T00:00:09.932000","2026-02-24T00:00:09.933000","2026-02-24T00:00:09.935000","2026-02-24T00:00:09.936000","2026-02-24T00:00:09.937000","2026-02-24T00:00:09.939000","2026-02-24T00:00:09.940000","2026-02-24T00:00:09.941000","2026-02-24T00:00:09.943000","2026-02-24T00:00:09.944000","2026-02-24T00:00:09.945000","2026-02-24T00:00:09.947000","2026-02-24T00:00:09.948000","2026-02-24T00:00:09.949000","2026-02-24T00:00:09.951000","2026-02-24T00:00:09.952000","2026-02-24T00:00:09.953000","2026-02-24T00:00:09.955000","2026-02-24T00:00:09.956000","2026-02-24T00:00:09.957000","2026-02-24T00:00:09.958000","2026-02-24T00:00:09.960000","2026-02-24T00:00:09.961000","2026-02-24T00:00:09.963000","2026-02-24T00:00:09.964000","2026-02-24T00:00:09.965000","2026-02-24T00:00:09.966000","2026-02-24T00:00:09.968000","2026-02-24T00:00:09.969000","2026-02-24T00:00:09.970000","2026-02-24T00:00:09.972000","2026-02-24T00:00:09.973000","2026-02-24T00:00:09.975000","2026-02-24T00:00:09.976000","2026-02-24T00:00:09.977000","2026-02-24T00:00:09.978000","2026-02-24T00:00:09.980000","2026-02-24T00:00:09.981000","2026-02-24T00:00:09.982000","2026-02-24T00:00:09.984000","2026-02-24T00:00:09.985000","2026-02-24T00:00:09.986000","2026-02-24T00:00:09.988000","2026-02-24T00:00:09.989000","2026-02-24T00:00:09.991000","2026-02-24T00:00:09.992000","2026-02-24T00:00:09.993000","2026-02-24T00:00:09.994000","2026-02-24T00:00:09.996000","2026-02-24T00:00:09.997000","2026-02-24T00:00:09.998000","2026-02-24T00:00:10.000000","2026-02-24T00:00:10.001000","2026-02-24T00:00:10.002000","2026-02-24T00:00:10.004000","2026-02-24T00:00:10.005000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.617000","2026-02-24T00:00:09.618000","2026-02-24T00:00:09.620000","2026-02-24T00:00:09.621000","2026-02-24T00:00:09.623000","2026-02-24T00:00:09.623000","2026-02-24T00:00:09.625000","2026-02-24T00:00:09.626000","2026-02-24T00:00:09.627000","2026-02-24T00:00:09.629000","2026-02-24T00:00:09.630000","2026-02-24T00:00:09.631000","2026-02-24T00:00:09.632000","2026-02-24T00:00:09.633000","2026-02-24T00:00:09.635000","2026-02-24T00:00:09.636000","2026-02-24T00:00:09.637000","2026-02-24T00:00:09.638000","2026-02-24T00:00:09.639000","2026-02-24T00:00:09.641000","2026-02-24T00:00:09.642000","2026-02-24T00:00:09.643000","2026-02-24T00:00:09.644000","2026-02-24T00:00:09.646000","2026-02-24T00:00:09.647000","2026-02-24T00:00:09.648000","2026-02-24T00:00:09.649000","2026-02-24T00:00:09.650000","2026-02-24T00:00:09.652000","2026-02-24T00:00:09.653000","2026-02-24T00:00:09.654000","2026-02-24T00:00:09.655000","2026-02-24T00:00:09.657000","2026-02-24T00:00:09.658000","2026-02-24T00:00:09.659000","2026-02-24T00:00:09.660000","2026-02-24T00:00:09.661000","2026-02-24T00:00:09.663000","2026-02-24T00:00:09.664000","2026-02-24T00:00:09.665000","2026-02-24T00:00:09.666000","2026-02-24T00:00:09.668000","2026-02-24T00:00:09.669000","2026-02-24T00:00:09.670000","2026-02-24T00:00:09.671000","2026-02-24T00:00:09.672000","2026-02-24T00:00:09.674000","2026-02-24T00:00:09.675000","2026-02-24T00:00:09.676000","2026-02-24T00:00:09.677000","2026-02-24T00:00:09.679000","2026-02-24T00:00:09.680000","2026-02-24T00:00:09.681000","2026-02-24T00:00:09.682000","2026-02-24T00:00:09.684000","2026-02-24T00:00:09.685000","2026-02-24T00:00:09.689000","2026-02-24T00:00:09.690000","2026-02-24T00:00:09.691000","2026-02-24T00:00:09.692000","2026-02-24T00:00:09.693000","2026-02-24T00:00:09.695000","2026-02-24T00:00:09.696000","2026-02-24T00:00:09.697000","2026-02-24T00:00:09.698000","2026-02-24T00:00:09.700000","2026-02-24T00:00:09.701000","2026-02-24T00:00:09.702000","2026-02-24T00:00:09.703000","2026-02-24T00:00:09.705000","2026-02-24T00:00:09.706000","2026-02-24T00:00:09.707000","2026-02-24T00:00:09.708000","2026-02-24T00:00:09.709000","2026-02-24T00:00:09.711000","2026-02-24T00:00:09.712000","2026-02-24T00:00:09.713000","2026-02-24T00:00:09.714000","2026-02-24T00:00:09.716000","2026-02-24T00:00:09.717000","2026-02-24T00:00:09.718000","2026-02-24T00:00:09.719000","2026-02-24T00:00:09.721000","2026-02-24T00:00:09.722000","2026-02-24T00:00:09.732000","2026-02-24T00:00:09.733000","2026-02-24T00:00:09.735000","2026-02-24T00:00:09.736000","2026-02-24T00:00:09.737000","2026-02-24T00:00:09.738000","2026-02-24T00:00:09.739000","2026-02-24T00:00:09.741000","2026-02-24T00:00:09.742000","2026-02-24T00:00:09.743000","2026-02-24T00:00:09.744000","2026-02-24T00:00:09.746000","2026-02-24T00:00:09.747000","2026-02-24T00:00:09.748000","2026-02-24T00:00:09.749000","2026-02-24T00:00:09.751000","2026-02-24T00:00:09.752000","2026-02-24T00:00:09.753000","2026-02-24T00:00:09.755000","2026-02-24T00:00:09.756000","2026-02-24T00:00:09.757000","2026-02-24T00:00:09.759000","2026-02-24T00:00:09.760000","2026-02-24T00:00:09.761000","2026-02-24T00:00:09.762000","2026-02-24T00:00:09.763000","2026-02-24T00:00:09.765000","2026-02-24T00:00:09.766000","2026-02-24T00:00:09.767000","2026-02-24T00:00:09.769000","2026-02-24T00:00:09.770000","2026-02-24T00:00:09.771000","2026-02-24T00:00:09.772000","2026-02-24T00:00:09.774000","2026-02-24T00:00:09.775000","2026-02-24T00:00:09.776000","2026-02-24T00:00:09.778000","2026-02-24T00:00:09.786000","2026-02-24T00:00:09.787000","2026-02-24T00:00:09.787000","2026-02-24T00:00:09.789000","2026-02-24T00:00:09.790000","2026-02-24T00:00:09.791000","2026-02-24T00:00:09.793000","2026-02-24T00:00:09.794000","2026-02-24T00:00:09.795000","2026-02-24T00:00:09.797000","2026-02-24T00:00:09.798000","2026-02-24T00:00:09.799000","2026-02-24T00:00:09.801000","2026-02-24T00:00:09.802000","2026-02-24T00:00:09.803000","2026-02-24T00:00:09.804000","2026-02-24T00:00:09.805000","2026-02-24T00:00:09.807000","2026-02-24T00:00:09.808000","2026-02-24T00:00:09.809000","2026-02-24T00:00:09.810000","2026-02-24T00:00:09.812000","2026-02-24T00:00:09.813000","2026-02-24T00:00:09.815000","2026-02-24T00:00:09.816000","2026-02-24T00:00:09.817000","2026-02-24T00:00:09.819000","2026-02-24T00:00:09.820000","2026-02-24T00:00:09.821000","2026-02-24T00:00:09.822000","2026-02-24T00:00:09.823000","2026-02-24T00:00:09.825000","2026-02-24T00:00:09.826000","2026-02-24T00:00:09.827000","2026-02-24T00:00:09.828000","2026-02-24T00:00:09.830000","2026-02-24T00:00:09.831000","2026-02-24T00:00:09.832000","2026-02-24T00:00:09.833000","2026-02-24T00:00:09.835000","2026-02-24T00:00:09.836000","2026-02-24T00:00:09.837000","2026-02-24T00:00:09.839000","2026-02-24T00:00:09.840000","2026-02-24T00:00:09.841000","2026-02-24T00:00:09.843000","2026-02-24T00:00:09.844000","2026-02-24T00:00:09.845000","2026-02-24T00:00:09.846000","2026-02-24T00:00:09.848000","2026-02-24T00:00:09.849000","2026-02-24T00:00:09.850000","2026-02-24T00:00:09.851000","2026-02-24T00:00:09.853000","2026-02-24T00:00:09.854000","2026-02-24T00:00:09.855000","2026-02-24T00:00:09.857000","2026-02-24T00:00:09.858000","2026-02-24T00:00:09.859000","2026-02-24T00:00:09.861000","2026-02-24T00:00:09.862000","2026-02-24T00:00:09.863000","2026-02-24T00:00:09.864000","2026-02-24T00:00:09.866000","2026-02-24T00:00:09.867000","2026-02-24T00:00:09.868000","2026-02-24T00:00:09.869000","2026-02-24T00:00:09.870000","2026-02-24T00:00:09.872000","2026-02-24T00:00:09.873000","2026-02-24T00:00:09.874000","2026-02-24T00:00:09.876000","2026-02-24T00:00:09.877000","2026-02-24T00:00:09.878000","2026-02-24T00:00:09.880000","2026-02-24T00:00:09.881000","2026-02-24T00:00:09.882000","2026-02-24T00:00:09.884000","2026-02-24T00:00:09.885000","2026-02-24T00:00:09.886000","2026-02-24T00:00:09.887000","2026-02-24T00:00:09.888000","2026-02-24T00:00:09.890000","2026-02-24T00:00:09.891000","2026-02-24T00:00:09.892000","2026-02-24T00:00:09.894000","2026-02-24T00:00:09.895000","2026-02-24T00:00:09.896000","2026-02-24T00:00:09.897000","2026-02-24T00:00:09.899000","2026-02-24T00:00:09.900000","2026-02-24T00:00:09.901000","2026-02-24T00:00:09.903000","2026-02-24T00:00:09.904000","2026-02-24T00:00:09.905000","2026-02-24T00:00:09.907000","2026-02-24T00:00:09.908000","2026-02-24T00:00:09.909000","2026-02-24T00:00:09.913000","2026-02-24T00:00:09.914000","2026-02-24T00:00:09.915000","2026-02-24T00:00:09.916000","2026-02-24T00:00:09.917000","2026-02-24T00:00:09.921000","2026-02-24T00:00:09.922000","2026-02-24T00:00:09.923000","2026-02-24T00:00:09.924000","2026-02-24T00:00:09.925000","2026-02-24T00:00:09.927000","2026-02-24T00:00:09.928000","2026-02-24T00:00:09.929000","2026-02-24T00:00:09.931000","2026-02-24T00:00:09.932000","2026-02-24T00:00:09.933000","2026-02-24T00:00:09.935000","2026-02-24T00:00:09.936000","2026-02-24T00:00:09.937000","2026-02-24T00:00:09.939000","2026-02-24T00:00:09.940000","2026-02-24T00:00:09.941000","2026-02-24T00:00:09.943000","2026-02-24T00:00:09.944000","2026-02-24T00:00:09.945000","2026-02-24T00:00:09.947000","2026-02-24T00:00:09.948000","2026-02-24T00:00:09.949000","2026-02-24T00:00:09.951000","2026-02-24T00:00:09.952000","2026-02-24T00:00:09.953000","2026-02-24T00:00:09.954000","2026-02-24T00:00:09.956000","2026-02-24T00:00:09.957000","2026-02-24T00:00:09.958000","2026-02-24T00:00:09.960000","2026-02-24T00:00:09.961000","2026-02-24T00:00:09.963000","2026-02-24T00:00:09.964000","2026-02-24T00:00:09.965000","2026-02-24T00:00:09.966000","2026-02-24T00:00:09.968000","2026-02-24T00:00:09.969000","2026-02-24T00:00:09.970000","2026-02-24T00:00:09.972000","2026-02-24T00:00:09.973000","2026-02-24T00:00:09.974000","2026-02-24T00:00:09.976000","2026-02-24T00:00:09.977000","2026-02-24T00:00:09.978000","2026-02-24T00:00:09.980000","2026-02-24T00:00:09.981000","2026-02-24T00:00:09.982000","2026-02-24T00:00:09.984000","2026-02-24T00:00:09.985000","2026-02-24T00:00:09.986000","2026-02-24T00:00:09.988000","2026-02-24T00:00:09.989000","2026-02-24T00:00:09.990000","2026-02-24T00:00:09.992000","2026-02-24T00:00:09.993000","2026-02-24T00:00:09.994000","2026-02-24T00:00:09.996000","2026-02-24T00:00:09.997000","2026-02-24T00:00:09.998000","2026-02-24T00:00:10.000000","2026-02-24T00:00:10.001000","2026-02-24T00:00:10.002000","2026-02-24T00:00:10.004000","2026-02-24T00:00:10.005000","2026-02-24T00:00:10.012000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.016000","2026-02-24T00:00:10.017000","2026-02-24T00:00:10.018000","2026-02-24T00:00:10.020000","2026-02-24T00:00:10.021000","2026-02-24T00:00:10.022000","2026-02-24T00:00:10.024000","2026-02-24T00:00:10.025000","2026-02-24T00:00:10.026000","2026-02-24T00:00:10.028000","2026-02-24T00:00:10.029000","2026-02-24T00:00:10.030000","2026-02-24T00:00:10.032000","2026-02-24T00:00:10.039000","2026-02-24T00:00:10.040000","2026-02-24T00:00:10.041000","2026-02-24T00:00:10.042000","2026-02-24T00:00:10.043000","2026-02-24T00:00:10.044000","2026-02-24T00:00:10.046000","2026-02-24T00:00:10.047000","2026-02-24T00:00:10.048000","2026-02-24T00:00:10.049000","2026-02-24T00:00:10.050000","2026-02-24T00:00:10.051000","2026-02-24T00:00:10.053000","2026-02-24T00:00:10.054000","2026-02-24T00:00:10.055000","2026-02-24T00:00:10.056000","2026-02-24T00:00:10.063000","2026-02-24T00:00:10.064000","2026-02-24T00:00:10.065000","2026-02-24T00:00:10.066000","2026-02-24T00:00:10.067000","2026-02-24T00:00:10.068000","2026-02-24T00:00:10.070000","2026-02-24T00:00:10.071000","2026-02-24T00:00:10.072000","2026-02-24T00:00:10.073000","2026-02-24T00:00:10.074000","2026-02-24T00:00:10.075000","2026-02-24T00:00:10.077000","2026-02-24T00:00:10.078000","2026-02-24T00:00:10.079000","2026-02-24T00:00:10.080000","2026-02-24T00:00:10.081000","2026-02-24T00:00:10.083000","2026-02-24T00:00:10.084000","2026-02-24T00:00:10.085000","2026-02-24T00:00:10.086000","2026-02-24T00:00:10.088000","2026-02-24T00:00:10.089000","2026-02-24T00:00:10.090000","2026-02-24T00:00:10.091000","2026-02-24T00:00:10.092000","2026-02-24T00:00:10.093000","2026-02-24T00:00:10.094000","2026-02-24T00:00:10.095000","2026-02-24T00:00:10.097000","2026-02-24T00:00:10.098000","2026-02-24T00:00:10.099000","2026-02-24T00:00:10.100000","2026-02-24T00:00:10.101000","2026-02-24T00:00:10.102000","2026-02-24T00:00:10.104000","2026-02-24T00:00:10.105000","2026-02-24T00:00:10.106000","2026-02-24T00:00:10.107000","2026-02-24T00:00:10.108000","2026-02-24T00:00:10.109000","2026-02-24T00:00:10.111000","2026-02-24T00:00:10.112000","2026-02-24T00:00:10.113000","2026-02-24T00:00:10.114000","2026-02-24T00:00:10.115000","2026-02-24T00:00:10.117000","2026-02-24T00:00:10.118000","2026-02-24T00:00:10.119000","2026-02-24T00:00:10.120000","2026-02-24T00:00:10.121000","2026-02-24T00:00:10.122000","2026-02-24T00:00:10.124000","2026-02-24T00:00:10.125000","2026-02-24T00:00:10.126000","2026-02-24T00:00:10.127000","2026-02-24T00:00:10.128000","2026-02-24T00:00:10.129000","2026-02-24T00:00:10.131000","2026-02-24T00:00:10.132000","2026-02-24T00:00:10.133000","2026-02-24T00:00:10.134000","2026-02-24T00:00:10.135000","2026-02-24T00:00:10.136000","2026-02-24T00:00:10.138000","2026-02-24T00:00:10.139000","2026-02-24T00:00:10.140000","2026-02-24T00:00:10.141000","2026-02-24T00:00:10.142000","2026-02-24T00:00:10.144000","2026-02-24T00:00:10.145000","2026-02-24T00:00:10.146000","2026-02-24T00:00:10.147000","2026-02-24T00:00:10.148000","2026-02-24T00:00:10.149000","2026-02-24T00:00:10.151000","2026-02-24T00:00:10.152000","2026-02-24T00:00:10.153000","2026-02-24T00:00:10.154000","2026-02-24T00:00:10.155000","2026-02-24T00:00:10.156000","2026-02-24T00:00:10.158000","2026-02-24T00:00:10.159000","2026-02-24T00:00:10.160000","2026-02-24T00:00:10.161000","2026-02-24T00:00:10.162000","2026-02-24T00:00:10.163000","2026-02-24T00:00:10.164000","2026-02-24T00:00:10.166000","2026-02-24T00:00:10.167000","2026-02-24T00:00:10.168000","2026-02-24T00:00:10.169000","2026-02-24T00:00:10.170000","2026-02-24T00:00:10.171000","2026-02-24T00:00:10.173000","2026-02-24T00:00:10.174000","2026-02-24T00:00:10.175000","2026-02-24T00:00:10.176000","2026-02-24T00:00:10.177000","2026-02-24T00:00:10.179000","2026-02-24T00:00:10.180000","2026-02-24T00:00:10.181000","2026-02-24T00:00:10.182000","2026-02-24T00:00:10.183000","2026-02-24T00:00:10.184000","2026-02-24T00:00:10.186000","2026-02-24T00:00:10.187000","2026-02-24T00:00:10.188000","2026-02-24T00:00:10.189000","2026-02-24T00:00:10.190000","2026-02-24T00:00:10.191000","2026-02-24T00:00:10.193000","2026-02-24T00:00:10.194000","2026-02-24T00:00:10.195000","2026-02-24T00:00:10.196000","2026-02-24T00:00:10.197000","2026-02-24T00:00:10.199000","2026-02-24T00:00:10.200000","2026-02-24T00:00:10.201000","2026-02-24T00:00:10.202000","2026-02-24T00:00:10.203000","2026-02-24T00:00:10.205000","2026-02-24T00:00:10.206000","2026-02-24T00:00:10.207000","2026-02-24T00:00:10.208000","2026-02-24T00:00:10.210000","2026-02-24T00:00:10.211000","2026-02-24T00:00:10.212000","2026-02-24T00:00:10.213000","2026-02-24T00:00:10.214000","2026-02-24T00:00:10.216000","2026-02-24T00:00:10.217000","2026-02-24T00:00:10.218000","2026-02-24T00:00:10.219000","2026-02-24T00:00:10.221000","2026-02-24T00:00:10.222000","2026-02-24T00:00:10.223000","2026-02-24T00:00:10.224000","2026-02-24T00:00:10.226000","2026-02-24T00:00:10.227000","2026-02-24T00:00:10.228000","2026-02-24T00:00:10.229000","2026-02-24T00:00:10.230000","2026-02-24T00:00:10.232000","2026-02-24T00:00:10.233000","2026-02-24T00:00:10.234000","2026-02-24T00:00:10.235000","2026-02-24T00:00:10.236000","2026-02-24T00:00:10.238000","2026-02-24T00:00:10.239000","2026-02-24T00:00:10.240000","2026-02-24T00:00:10.241000","2026-02-24T00:00:10.243000","2026-02-24T00:00:10.244000","2026-02-24T00:00:10.245000","2026-02-24T00:00:10.246000","2026-02-24T00:00:10.247000","2026-02-24T00:00:10.249000","2026-02-24T00:00:10.250000","2026-02-24T00:00:10.251000","2026-02-24T00:00:10.252000","2026-02-24T00:00:10.254000","2026-02-24T00:00:10.255000","2026-02-24T00:00:10.256000","2026-02-24T00:00:10.257000","2026-02-24T00:00:10.258000","2026-02-24T00:00:10.260000","2026-02-24T00:00:10.261000","2026-02-24T00:00:10.262000","2026-02-24T00:00:10.263000","2026-02-24T00:00:10.265000","2026-02-24T00:00:10.266000","2026-02-24T00:00:10.267000","2026-02-24T00:00:10.268000","2026-02-24T00:00:10.270000","2026-02-24T00:00:10.271000","2026-02-24T00:00:10.272000","2026-02-24T00:00:10.273000","2026-02-24T00:00:10.275000","2026-02-24T00:00:10.276000","2026-02-24T00:00:10.277000","2026-02-24T00:00:10.278000","2026-02-24T00:00:10.279000","2026-02-24T00:00:10.280000","2026-02-24T00:00:10.282000","2026-02-24T00:00:10.283000","2026-02-24T00:00:10.284000","2026-02-24T00:00:10.285000","2026-02-24T00:00:10.287000","2026-02-24T00:00:10.288000","2026-02-24T00:00:10.289000","2026-02-24T00:00:10.290000","2026-02-24T00:00:10.291000","2026-02-24T00:00:10.293000","2026-02-24T00:00:10.294000","2026-02-24T00:00:10.295000","2026-02-24T00:00:10.296000","2026-02-24T00:00:10.298000","2026-02-24T00:00:10.299000","2026-02-24T00:00:10.300000","2026-02-24T00:00:10.301000","2026-02-24T00:00:10.302000","2026-02-24T00:00:10.304000","2026-02-24T00:00:10.305000","2026-02-24T00:00:10.306000","2026-02-24T00:00:10.307000","2026-02-24T00:00:10.308000","2026-02-24T00:00:10.310000","2026-02-24T00:00:10.315000","2026-02-24T00:00:10.316000","2026-02-24T00:00:10.317000","2026-02-24T00:00:10.318000","2026-02-24T00:00:10.319000","2026-02-24T00:00:10.320000","2026-02-24T00:00:10.322000","2026-02-24T00:00:10.323000","2026-02-24T00:00:10.324000","2026-02-24T00:00:10.325000","2026-02-24T00:00:10.327000","2026-02-24T00:00:10.328000","2026-02-24T00:00:10.329000","2026-02-24T00:00:10.330000","2026-02-24T00:00:10.331000","2026-02-24T00:00:10.333000","2026-02-24T00:00:10.334000","2026-02-24T00:00:10.335000","2026-02-24T00:00:10.336000","2026-02-24T00:00:10.337000","2026-02-24T00:00:10.339000","2026-02-24T00:00:10.340000","2026-02-24T00:00:10.341000","2026-02-24T00:00:10.342000","2026-02-24T00:00:10.344000","2026-02-24T00:00:10.345000","2026-02-24T00:00:10.346000","2026-02-24T00:00:10.347000","2026-02-24T00:00:10.348000","2026-02-24T00:00:10.350000","2026-02-24T00:00:10.351000","2026-02-24T00:00:10.352000","2026-02-24T00:00:10.353000","2026-02-24T00:00:10.355000","2026-02-24T00:00:10.356000","2026-02-24T00:00:10.357000","2026-02-24T00:00:10.358000","2026-02-24T00:00:10.360000","2026-02-24T00:00:10.361000","2026-02-24T00:00:10.362000","2026-02-24T00:00:10.364000","2026-02-24T00:00:10.365000","2026-02-24T00:00:10.366000","2026-02-24T00:00:10.367000","2026-02-24T00:00:10.369000","2026-02-24T00:00:10.370000","2026-02-24T00:00:10.371000","2026-02-24T00:00:10.372000","2026-02-24T00:00:10.374000","2026-02-24T00:00:10.375000","2026-02-24T00:00:10.376000","2026-02-24T00:00:10.378000","2026-02-24T00:00:10.379000","2026-02-24T00:00:10.380000","2026-02-24T00:00:10.381000","2026-02-24T00:00:10.383000","2026-02-24T00:00:10.384000","2026-02-24T00:00:10.385000","2026-02-24T00:00:10.386000","2026-02-24T00:00:09.786000","2026-02-24T00:00:09.787000","2026-02-24T00:00:09.789000","2026-02-24T00:00:09.790000","2026-02-24T00:00:09.791000","2026-02-24T00:00:09.793000","2026-02-24T00:00:09.794000","2026-02-24T00:00:09.795000","2026-02-24T00:00:09.797000","2026-02-24T00:00:09.798000","2026-02-24T00:00:09.799000","2026-02-24T00:00:09.801000","2026-02-24T00:00:09.802000","2026-02-24T00:00:09.803000","2026-02-24T00:00:09.804000","2026-02-24T00:00:09.805000","2026-02-24T00:00:09.807000","2026-02-24T00:00:09.808000","2026-02-24T00:00:09.809000","2026-02-24T00:00:09.810000","2026-02-24T00:00:09.812000","2026-02-24T00:00:09.813000","2026-02-24T00:00:09.814000","2026-02-24T00:00:09.816000","2026-02-24T00:00:09.817000","2026-02-24T00:00:09.819000","2026-02-24T00:00:09.820000","2026-02-24T00:00:09.821000","2026-02-24T00:00:09.822000","2026-02-24T00:00:09.823000","2026-02-24T00:00:09.824000","2026-02-24T00:00:09.826000","2026-02-24T00:00:09.827000","2026-02-24T00:00:09.828000","2026-02-24T00:00:09.830000","2026-02-24T00:00:09.831000","2026-02-24T00:00:09.832000","2026-02-24T00:00:09.833000","2026-02-24T00:00:09.835000","2026-02-24T00:00:09.836000","2026-02-24T00:00:09.837000","2026-02-24T00:00:09.839000","2026-02-24T00:00:09.840000","2026-02-24T00:00:09.841000","2026-02-24T00:00:09.843000","2026-02-24T00:00:09.844000","2026-02-24T00:00:09.845000","2026-02-24T00:00:09.846000","2026-02-24T00:00:09.847000","2026-02-24T00:00:09.849000","2026-02-24T00:00:09.850000","2026-02-24T00:00:09.851000","2026-02-24T00:00:09.853000","2026-02-24T00:00:09.854000","2026-02-24T00:00:09.855000","2026-02-24T00:00:09.857000","2026-02-24T00:00:09.858000","2026-02-24T00:00:09.859000","2026-02-24T00:00:09.861000","2026-02-24T00:00:09.862000","2026-02-24T00:00:09.863000","2026-02-24T00:00:09.864000","2026-02-24T00:00:09.865000","2026-02-24T00:00:09.867000","2026-02-24T00:00:09.868000","2026-02-24T00:00:09.869000","2026-02-24T00:00:09.870000","2026-02-24T00:00:09.872000","2026-02-24T00:00:09.873000","2026-02-24T00:00:09.874000","2026-02-24T00:00:09.876000","2026-02-24T00:00:09.877000","2026-02-24T00:00:09.878000","2026-02-24T00:00:09.880000","2026-02-24T00:00:09.881000","2026-02-24T00:00:09.882000","2026-02-24T00:00:09.883000","2026-02-24T00:00:09.885000","2026-02-24T00:00:09.886000","2026-02-24T00:00:09.887000","2026-02-24T00:00:09.888000","2026-02-24T00:00:09.890000","2026-02-24T00:00:09.891000","2026-02-24T00:00:09.892000","2026-02-24T00:00:09.894000","2026-02-24T00:00:09.895000","2026-02-24T00:00:09.896000","2026-02-24T00:00:09.897000","2026-02-24T00:00:09.899000","2026-02-24T00:00:09.900000","2026-02-24T00:00:09.901000","2026-02-24T00:00:09.903000","2026-02-24T00:00:09.904000","2026-02-24T00:00:09.905000","2026-02-24T00:00:09.907000","2026-02-24T00:00:09.908000","2026-02-24T00:00:09.909000","2026-02-24T00:00:09.911000","2026-02-24T00:00:09.912000","2026-02-24T00:00:09.914000","2026-02-24T00:00:09.915000","2026-02-24T00:00:09.916000","2026-02-24T00:00:09.917000","2026-02-24T00:00:09.921000","2026-02-24T00:00:09.922000","2026-02-24T00:00:09.923000","2026-02-24T00:00:09.924000","2026-02-24T00:00:09.925000","2026-02-24T00:00:09.927000","2026-02-24T00:00:09.928000","2026-02-24T00:00:09.929000","2026-02-24T00:00:09.931000","2026-02-24T00:00:09.932000","2026-02-24T00:00:09.933000","2026-02-24T00:00:09.935000","2026-02-24T00:00:09.936000","2026-02-24T00:00:09.937000","2026-02-24T00:00:09.938000","2026-02-24T00:00:09.940000","2026-02-24T00:00:09.941000","2026-02-24T00:00:09.942000","2026-02-24T00:00:09.944000","2026-02-24T00:00:09.945000","2026-02-24T00:00:09.947000","2026-02-24T00:00:09.948000","2026-02-24T00:00:09.949000","2026-02-24T00:00:09.951000","2026-02-24T00:00:09.952000","2026-02-24T00:00:09.953000","2026-02-24T00:00:09.954000","2026-02-24T00:00:09.956000","2026-02-24T00:00:09.957000","2026-02-24T00:00:09.958000","2026-02-24T00:00:09.960000","2026-02-24T00:00:09.961000","2026-02-24T00:00:09.962000","2026-02-24T00:00:09.964000","2026-02-24T00:00:09.965000","2026-02-24T00:00:09.966000","2026-02-24T00:00:09.968000","2026-02-24T00:00:09.969000","2026-02-24T00:00:09.970000","2026-02-24T00:00:09.972000","2026-02-24T00:00:09.973000","2026-02-24T00:00:09.974000","2026-02-24T00:00:09.976000","2026-02-24T00:00:09.977000","2026-02-24T00:00:09.978000","2026-02-24T00:00:09.980000","2026-02-24T00:00:09.981000","2026-02-24T00:00:09.982000","2026-02-24T00:00:09.984000","2026-02-24T00:00:09.985000","2026-02-24T00:00:09.986000","2026-02-24T00:00:09.988000","2026-02-24T00:00:09.989000","2026-02-24T00:00:09.990000","2026-02-24T00:00:09.992000","2026-02-24T00:00:09.993000","2026-02-24T00:00:09.994000","2026-02-24T00:00:09.996000","2026-02-24T00:00:09.997000","2026-02-24T00:00:09.998000","2026-02-24T00:00:10.000000","2026-02-24T00:00:10.001000","2026-02-24T00:00:10.002000","2026-02-24T00:00:10.004000","2026-02-24T00:00:10.005000","2026-02-24T00:00:10.012000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.016000","2026-02-24T00:00:10.017000","2026-02-24T00:00:10.018000","2026-02-24T00:00:10.020000","2026-02-24T00:00:10.021000","2026-02-24T00:00:10.022000","2026-02-24T00:00:10.024000","2026-02-24T00:00:10.025000","2026-02-24T00:00:10.026000","2026-02-24T00:00:10.028000","2026-02-24T00:00:10.029000","2026-02-24T00:00:10.030000","2026-02-24T00:00:10.032000","2026-02-24T00:00:10.039000","2026-02-24T00:00:10.040000","2026-02-24T00:00:10.041000","2026-02-24T00:00:10.042000","2026-02-24T00:00:10.043000","2026-02-24T00:00:10.044000","2026-02-24T00:00:10.045000","2026-02-24T00:00:10.047000","2026-02-24T00:00:10.048000","2026-02-24T00:00:10.049000","2026-02-24T00:00:10.050000","2026-02-24T00:00:10.051000","2026-02-24T00:00:10.052000","2026-02-24T00:00:10.054000","2026-02-24T00:00:10.055000","2026-02-24T00:00:10.056000","2026-02-24T00:00:10.063000","2026-02-24T00:00:10.064000","2026-02-24T00:00:10.065000","2026-02-24T00:00:10.066000","2026-02-24T00:00:10.067000","2026-02-24T00:00:10.068000","2026-02-24T00:00:10.070000","2026-02-24T00:00:10.071000","2026-02-24T00:00:10.072000","2026-02-24T00:00:10.073000","2026-02-24T00:00:10.074000","2026-02-24T00:00:10.075000","2026-02-24T00:00:10.077000","2026-02-24T00:00:10.078000","2026-02-24T00:00:10.079000","2026-02-24T00:00:10.080000","2026-02-24T00:00:10.081000","2026-02-24T00:00:10.083000","2026-02-24T00:00:10.084000","2026-02-24T00:00:10.085000","2026-02-24T00:00:10.086000","2026-02-24T00:00:10.087000","2026-02-24T00:00:10.089000","2026-02-24T00:00:10.090000","2026-02-24T00:00:10.091000","2026-02-24T00:00:10.092000","2026-02-24T00:00:10.093000","2026-02-24T00:00:10.094000","2026-02-24T00:00:10.095000","2026-02-24T00:00:10.096000","2026-02-24T00:00:10.098000","2026-02-24T00:00:10.099000","2026-02-24T00:00:10.100000","2026-02-24T00:00:10.101000","2026-02-24T00:00:10.102000","2026-02-24T00:00:10.104000","2026-02-24T00:00:10.105000","2026-02-24T00:00:10.106000","2026-02-24T00:00:10.107000","2026-02-24T00:00:10.108000","2026-02-24T00:00:10.109000","2026-02-24T00:00:10.111000","2026-02-24T00:00:10.112000","2026-02-24T00:00:10.113000","2026-02-24T00:00:10.114000","2026-02-24T00:00:10.115000","2026-02-24T00:00:10.116000","2026-02-24T00:00:10.118000","2026-02-24T00:00:10.119000","2026-02-24T00:00:10.120000","2026-02-24T00:00:10.121000","2026-02-24T00:00:10.122000","2026-02-24T00:00:10.123000","2026-02-24T00:00:10.125000","2026-02-24T00:00:10.126000","2026-02-24T00:00:10.127000","2026-02-24T00:00:10.128000","2026-02-24T00:00:10.129000","2026-02-24T00:00:10.130000","2026-02-24T00:00:10.132000","2026-02-24T00:00:10.133000","2026-02-24T00:00:10.134000","2026-02-24T00:00:10.135000","2026-02-24T00:00:10.136000","2026-02-24T00:00:10.137000","2026-02-24T00:00:10.139000","2026-02-24T00:00:10.140000","2026-02-24T00:00:10.141000","2026-02-24T00:00:10.142000","2026-02-24T00:00:10.144000","2026-02-24T00:00:10.145000","2026-02-24T00:00:10.146000","2026-02-24T00:00:10.147000","2026-02-24T00:00:10.148000","2026-02-24T00:00:10.149000","2026-02-24T00:00:10.150000","2026-02-24T00:00:10.152000","2026-02-24T00:00:10.153000","2026-02-24T00:00:10.154000","2026-02-24T00:00:10.155000","2026-02-24T00:00:10.156000","2026-02-24T00:00:10.157000","2026-02-24T00:00:10.158000","2026-02-24T00:00:10.160000","2026-02-24T00:00:10.161000","2026-02-24T00:00:10.162000","2026-02-24T00:00:10.163000","2026-02-24T00:00:10.164000","2026-02-24T00:00:10.166000","2026-02-24T00:00:10.167000","2026-02-24T00:00:10.168000","2026-02-24T00:00:10.169000","2026-02-24T00:00:10.170000","2026-02-24T00:00:10.171000","2026-02-24T00:00:10.173000","2026-02-24T00:00:10.174000","2026-02-24T00:00:10.175000","2026-02-24T00:00:10.176000","2026-02-24T00:00:10.177000","2026-02-24T00:00:10.179000","2026-02-24T00:00:10.180000","2026-02-24T00:00:10.181000","2026-02-24T00:00:10.182000","2026-02-24T00:00:10.183000","2026-02-24T00:00:10.184000","2026-02-24T00:00:10.185000","2026-02-24T00:00:10.187000","2026-02-24T00:00:10.188000","2026-02-24T00:00:10.189000","2026-02-24T00:00:10.190000","2026-02-24T00:00:10.191000","2026-02-24T00:00:10.193000","2026-02-24T00:00:10.194000","2026-02-24T00:00:10.195000","2026-02-24T00:00:10.196000","2026-02-24T00:00:10.197000","2026-02-24T00:00:10.199000","2026-02-24T00:00:10.200000","2026-02-24T00:00:10.201000","2026-02-24T00:00:10.202000","2026-02-24T00:00:10.203000","2026-02-24T00:00:10.205000","2026-02-24T00:00:10.206000","2026-02-24T00:00:10.207000","2026-02-24T00:00:10.208000","2026-02-24T00:00:10.209000","2026-02-24T00:00:10.211000","2026-02-24T00:00:10.212000","2026-02-24T00:00:10.213000","2026-02-24T00:00:10.214000","2026-02-24T00:00:10.216000","2026-02-24T00:00:10.217000","2026-02-24T00:00:10.218000","2026-02-24T00:00:10.219000","2026-02-24T00:00:10.220000","2026-02-24T00:00:10.222000","2026-02-24T00:00:10.223000","2026-02-24T00:00:10.224000","2026-02-24T00:00:10.225000","2026-02-24T00:00:10.227000","2026-02-24T00:00:10.228000","2026-02-24T00:00:10.229000","2026-02-24T00:00:10.230000","2026-02-24T00:00:10.232000","2026-02-24T00:00:10.233000","2026-02-24T00:00:10.234000","2026-02-24T00:00:10.235000","2026-02-24T00:00:10.236000","2026-02-24T00:00:10.238000","2026-02-24T00:00:10.239000","2026-02-24T00:00:10.240000","2026-02-24T00:00:10.241000","2026-02-24T00:00:10.243000","2026-02-24T00:00:10.244000","2026-02-24T00:00:10.245000","2026-02-24T00:00:10.246000","2026-02-24T00:00:10.247000","2026-02-24T00:00:10.249000","2026-02-24T00:00:10.250000","2026-02-24T00:00:10.251000","2026-02-24T00:00:10.252000","2026-02-24T00:00:10.254000","2026-02-24T00:00:10.255000","2026-02-24T00:00:10.256000","2026-02-24T00:00:10.257000","2026-02-24T00:00:10.258000","2026-02-24T00:00:10.260000","2026-02-24T00:00:10.261000","2026-02-24T00:00:10.262000","2026-02-24T00:00:10.263000","2026-02-24T00:00:10.264000","2026-02-24T00:00:10.266000","2026-02-24T00:00:10.267000","2026-02-24T00:00:10.268000","2026-02-24T00:00:10.269000","2026-02-24T00:00:10.271000","2026-02-24T00:00:10.272000","2026-02-24T00:00:10.273000","2026-02-24T00:00:10.274000","2026-02-24T00:00:10.276000","2026-02-24T00:00:10.277000","2026-02-24T00:00:10.278000","2026-02-24T00:00:10.279000","2026-02-24T00:00:10.280000","2026-02-24T00:00:10.282000","2026-02-24T00:00:10.283000","2026-02-24T00:00:10.284000","2026-02-24T00:00:10.285000","2026-02-24T00:00:10.286000","2026-02-24T00:00:10.288000","2026-02-24T00:00:10.289000","2026-02-24T00:00:10.290000","2026-02-24T00:00:10.291000","2026-02-24T00:00:10.293000","2026-02-24T00:00:10.294000","2026-02-24T00:00:10.295000","2026-02-24T00:00:10.296000","2026-02-24T00:00:10.298000","2026-02-24T00:00:10.299000","2026-02-24T00:00:10.300000","2026-02-24T00:00:10.301000","2026-02-24T00:00:10.302000","2026-02-24T00:00:10.304000","2026-02-24T00:00:10.305000","2026-02-24T00:00:10.306000","2026-02-24T00:00:10.307000","2026-02-24T00:00:10.308000","2026-02-24T00:00:10.310000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.014000","2026-02-24T00:00:10.016000","2026-02-24T00:00:10.017000","2026-02-24T00:00:10.018000","2026-02-24T00:00:10.020000","2026-02-24T00:00:10.021000","2026-02-24T00:00:10.022000","2026-02-24T00:00:10.024000","2026-02-24T00:00:10.025000","2026-02-24T00:00:10.026000","2026-02-24T00:00:10.028000","2026-02-24T00:00:10.029000","2026-02-24T00:00:10.030000","2026-02-24T00:00:10.032000","2026-02-24T00:00:10.039000","2026-02-24T00:00:10.040000","2026-02-24T00:00:10.041000","2026-02-24T00:00:10.042000","2026-02-24T00:00:10.043000","2026-02-24T00:00:10.044000","2026-02-24T00:00:10.046000","2026-02-24T00:00:10.047000","2026-02-24T00:00:10.048000","2026-02-24T00:00:10.049000","2026-02-24T00:00:10.050000","2026-02-24T00:00:10.051000","2026-02-24T00:00:10.053000","2026-02-24T00:00:10.054000","2026-02-24T00:00:10.055000","2026-02-24T00:00:10.056000","2026-02-24T00:00:10.063000","2026-02-24T00:00:10.065000","2026-02-24T00:00:10.065000","2026-02-24T00:00:10.066000","2026-02-24T00:00:10.067000","2026-02-24T00:00:10.069000","2026-02-24T00:00:10.070000","2026-02-24T00:00:10.071000","2026-02-24T00:00:10.072000","2026-02-24T00:00:10.073000","2026-02-24T00:00:10.074000","2026-02-24T00:00:10.075000","2026-02-24T00:00:10.077000","2026-02-24T00:00:10.078000","2026-02-24T00:00:10.079000","2026-02-24T00:00:10.080000","2026-02-24T00:00:10.081000","2026-02-24T00:00:10.083000","2026-02-24T00:00:10.084000","2026-02-24T00:00:10.085000","2026-02-24T00:00:10.086000","2026-02-24T00:00:10.088000","2026-02-24T00:00:10.089000","2026-02-24T00:00:10.090000","2026-02-24T00:00:10.091000","2026-02-24T00:00:10.092000","2026-02-24T00:00:10.093000","2026-02-24T00:00:10.094000","2026-02-24T00:00:10.095000","2026-02-24T00:00:10.097000","2026-02-24T00:00:10.098000","2026-02-24T00:00:10.099000","2026-02-24T00:00:10.100000","2026-02-24T00:00:10.101000","2026-02-24T00:00:10.102000","2026-02-24T00:00:10.104000","2026-02-24T00:00:10.105000","2026-02-24T00:00:10.106000","2026-02-24T00:00:10.107000","2026-02-24T00:00:10.109000","2026-02-24T00:00:10.109000","2026-02-24T00:00:10.111000","2026-02-24T00:00:10.112000","2026-02-24T00:00:10.113000","2026-02-24T00:00:10.114000","2026-02-24T00:00:10.115000","2026-02-24T00:00:10.117000","2026-02-24T00:00:10.118000","2026-02-24T00:00:10.119000","2026-02-24T00:00:10.120000","2026-02-24T00:00:10.121000","2026-02-24T00:00:10.122000","2026-02-24T00:00:10.124000","2026-02-24T00:00:10.125000","2026-02-24T00:00:10.126000","2026-02-24T00:00:10.127000","2026-02-24T00:00:10.128000","2026-02-24T00:00:10.129000","2026-02-24T00:00:10.131000","2026-02-24T00:00:10.132000","2026-02-24T00:00:10.133000","2026-02-24T00:00:10.134000","2026-02-24T00:00:10.135000","2026-02-24T00:00:10.136000","2026-02-24T00:00:10.138000","2026-02-24T00:00:10.139000","2026-02-24T00:00:10.140000","2026-02-24T00:00:10.141000","2026-02-24T00:00:10.142000","2026-02-24T00:00:10.144000","2026-02-24T00:00:10.145000","2026-02-24T00:00:10.146000","2026-02-24T00:00:10.147000","2026-02-24T00:00:10.148000","2026-02-24T00:00:10.149000","2026-02-24T00:00:10.151000","2026-02-24T00:00:10.152000","2026-02-24T00:00:10.153000","2026-02-24T00:00:10.154000","2026-02-24T00:00:10.155000","2026-02-24T00:00:10.156000","2026-02-24T00:00:10.158000","2026-02-24T00:00:10.159000","2026-02-24T00:00:10.160000","2026-02-24T00:00:10.161000","2026-02-24T00:00:10.162000","2026-02-24T00:00:10.163000","2026-02-24T00:00:10.164000","2026-02-24T00:00:10.166000","2026-02-24T00:00:10.167000","2026-02-24T00:00:10.168000","2026-02-24T00:00:10.169000","2026-02-24T00:00:10.170000","2026-02-24T00:00:10.171000","2026-02-24T00:00:10.173000","2026-02-24T00:00:10.174000","2026-02-24T00:00:10.175000","2026-02-24T00:00:10.176000","2026-02-24T00:00:10.177000","2026-02-24T00:00:10.179000","2026-02-24T00:00:10.180000","2026-02-24T00:00:10.181000","2026-02-24T00:00:10.182000","2026-02-24T00:00:10.183000","2026-02-24T00:00:10.184000","2026-02-24T00:00:10.186000","2026-02-24T00:00:10.187000","2026-02-24T00:00:10.188000","2026-02-24T00:00:10.189000","2026-02-24T00:00:10.190000","2026-02-24T00:00:10.191000","2026-02-24T00:00:10.193000","2026-02-24T00:00:10.194000","2026-02-24T00:00:10.195000","2026-02-24T00:00:10.196000","2026-02-24T00:00:10.197000","2026-02-24T00:00:10.199000","2026-02-24T00:00:10.200000","2026-02-24T00:00:10.201000","2026-02-24T00:00:10.202000","2026-02-24T00:00:10.204000","2026-02-24T00:00:10.205000","2026-02-24T00:00:10.206000","2026-02-24T00:00:10.207000","2026-02-24T00:00:10.208000","2026-02-24T00:00:10.210000","2026-02-24T00:00:10.211000","2026-02-24T00:00:10.212000","2026-02-24T00:00:10.213000","2026-02-24T00:00:10.214000","2026-02-24T00:00:10.216000","2026-02-24T00:00:10.217000","2026-02-24T00:00:10.218000","2026-02-24T00:00:10.219000","2026-02-24T00:00:10.221000","2026-02-24T00:00:10.222000","2026-02-24T00:00:10.223000","2026-02-24T00:00:10.224000","2026-02-24T00:00:10.226000","2026-02-24T00:00:10.227000","2026-02-24T00:00:10.228000","2026-02-24T00:00:10.229000","2026-02-24T00:00:10.230000","2026-02-24T00:00:10.232000","2026-02-24T00:00:10.233000","2026-02-24T00:00:10.234000","2026-02-24T00:00:10.235000","2026-02-24T00:00:10.236000","2026-02-24T00:00:10.238000","2026-02-24T00:00:10.239000","2026-02-24T00:00:10.240000","2026-02-24T00:00:10.241000","2026-02-24T00:00:10.243000","2026-02-24T00:00:10.244000","2026-02-24T00:00:10.245000","2026-02-24T00:00:10.246000","2026-02-24T00:00:10.247000","2026-02-24T00:00:10.249000","2026-02-24T00:00:10.250000","2026-02-24T00:00:10.251000","2026-02-24T00:00:10.252000","2026-02-24T00:00:10.254000","2026-02-24T00:00:10.255000","2026-02-24T00:00:10.256000","2026-02-24T00:00:10.257000","2026-02-24T00:00:10.258000","2026-02-24T00:00:10.260000","2026-02-24T00:00:10.261000","2026-02-24T00:00:10.262000","2026-02-24T00:00:10.263000","2026-02-24T00:00:10.265000","2026-02-24T00:00:10.266000","2026-02-24T00:00:10.267000","2026-02-24T00:00:10.268000","2026-02-24T00:00:10.270000","2026-02-24T00:00:10.271000","2026-02-24T00:00:10.272000","2026-02-24T00:00:10.273000","2026-02-24T00:00:10.275000","2026-02-24T00:00:10.276000","2026-02-24T00:00:10.277000","2026-02-24T00:00:10.278000","2026-02-24T00:00:10.279000","2026-02-24T00:00:10.280000","2026-02-24T00:00:10.282000","2026-02-24T00:00:10.283000","2026-02-24T00:00:10.284000","2026-02-24T00:00:10.285000","2026-02-24T00:00:10.287000","2026-02-24T00:00:10.288000","2026-02-24T00:00:10.289000","2026-02-24T00:00:10.290000","2026-02-24T00:00:10.291000","2026-02-24T00:00:10.293000","2026-02-24T00:00:10.294000","2026-02-24T00:00:10.295000","2026-02-24T00:00:10.296000","2026-02-24T00:00:10.298000","2026-02-24T00:00:10.299000","2026-02-24T00:00:10.300000","2026-02-24T00:00:10.301000","2026-02-24T00:00:10.302000","2026-02-24T00:00:10.304000","2026-02-24T00:00:10.305000","2026-02-24T00:00:10.306000","2026-02-24T00:00:10.307000","2026-02-24T00:00:10.309000","2026-02-24T00:00:10.310000","2026-02-24T00:00:10.315000","2026-02-24T00:00:10.316000","2026-02-24T00:00:10.317000","2026-02-24T00:00:10.318000","2026-02-24T00:00:10.319000","2026-02-24T00:00:10.320000","2026-02-24T00:00:10.322000","2026-02-24T00:00:10.323000","2026-02-24T00:00:10.324000","2026-02-24T00:00:10.325000","2026-02-24T00:00:10.327000","2026-02-24T00:00:10.328000","2026-02-24T00:00:10.329000","2026-02-24T00:00:10.330000","2026-02-24T00:00:10.331000","2026-02-24T00:00:10.333000","2026-02-24T00:00:10.334000","2026-02-24T00:00:10.335000","2026-02-24T00:00:10.336000","2026-02-24T00:00:10.338000","2026-02-24T00:00:10.339000","2026-02-24T00:00:10.340000","2026-02-24T00:00:10.341000","2026-02-24T00:00:10.342000","2026-02-24T00:00:10.344000","2026-02-24T00:00:10.345000","2026-02-24T00:00:10.346000","2026-02-24T00:00:10.347000","2026-02-24T00:00:10.349000","2026-02-24T00:00:10.350000","2026-02-24T00:00:10.351000","2026-02-24T00:00:10.352000","2026-02-24T00:00:10.353000","2026-02-24T00:00:10.355000","2026-02-24T00:00:10.356000","2026-02-24T00:00:10.357000","2026-02-24T00:00:10.358000","2026-02-24T00:00:10.360000","2026-02-24T00:00:10.361000","2026-02-24T00:00:10.362000","2026-02-24T00:00:10.364000","2026-02-24T00:00:10.365000","2026-02-24T00:00:10.366000","2026-02-24T00:00:10.367000","2026-02-24T00:00:10.369000","2026-02-24T00:00:10.370000","2026-02-24T00:00:10.371000","2026-02-24T00:00:10.372000","2026-02-24T00:00:10.374000","2026-02-24T00:00:10.375000","2026-02-24T00:00:10.376000","2026-02-24T00:00:10.378000","2026-02-24T00:00:10.379000","2026-02-24T00:00:10.380000","2026-02-24T00:00:10.381000","2026-02-24T00:00:10.383000","2026-02-24T00:00:10.384000","2026-02-24T00:00:10.385000","2026-02-24T00:00:10.387000","2026-02-24T00:00:10.388000","2026-02-24T00:00:10.389000","2026-02-24T00:00:10.390000","2026-02-24T00:00:10.392000","2026-02-24T00:00:10.393000","2026-02-24T00:00:10.394000","2026-02-24T00:00:10.395000","2026-02-24T00:00:10.396000","2026-02-24T00:00:10.398000","2026-02-24T00:00:10.399000","2026-02-24T00:00:10.400000","2026-02-24T00:00:10.401000","2026-02-24T00:00:10.402000","2026-02-24T00:00:10.404000","2026-02-24T00:00:10.405000","2026-02-24T00:00:10.406000","2026-02-24T00:00:10.407000","2026-02-24T00:00:10.409000","2026-02-24T00:00:10.410000","2026-02-24T00:00:10.411000","2026-02-24T00:00:10.412000","2026-02-24T00:00:10.413000","2026-02-24T00:00:10.415000","2026-02-24T00:00:10.416000","2026-02-24T00:00:10.417000","2026-02-24T00:00:10.418000","2026-02-24T00:00:10.420000","2026-02-24T00:00:10.421000","2026-02-24T00:00:10.422000","2026-02-24T00:00:10.423000","2026-02-24T00:00:10.425000","2026-02-24T00:00:10.426000","2026-02-24T00:00:10.427000","2026-02-24T00:00:10.428000","2026-02-24T00:00:10.429000","2026-02-24T00:00:10.431000","2026-02-24T00:00:10.432000","2026-02-24T00:00:10.433000","2026-02-24T00:00:10.434000","2026-02-24T00:00:10.436000","2026-02-24T00:00:10.437000","2026-02-24T00:00:10.438000","2026-02-24T00:00:10.439000","2026-02-24T00:00:10.440000","2026-02-24T00:00:10.442000","2026-02-24T00:00:10.443000","2026-02-24T00:00:10.444000","2026-02-24T00:00:10.445000","2026-02-24T00:00:10.447000","2026-02-24T00:00:10.448000","2026-02-24T00:00:10.449000","2026-02-24T00:00:10.450000","2026-02-24T00:00:10.452000","2026-02-24T00:00:10.453000","2026-02-24T00:00:10.454000","2026-02-24T00:00:10.456000","2026-02-24T00:00:10.457000","2026-02-24T00:00:10.458000","2026-02-24T00:00:10.459000","2026-02-24T00:00:10.461000","2026-02-24T00:00:10.462000","2026-02-24T00:00:10.463000","2026-02-24T00:00:10.465000","2026-02-24T00:00:10.466000","2026-02-24T00:00:10.467000","2026-02-24T00:00:10.468000","2026-02-24T00:00:10.470000","2026-02-24T00:00:10.471000","2026-02-24T00:00:10.472000","2026-02-24T00:00:10.473000","2026-02-24T00:00:10.475000","2026-02-24T00:00:10.476000","2026-02-24T00:00:10.477000","2026-02-24T00:00:10.478000","2026-02-24T00:00:10.480000","2026-02-24T00:00:10.481000","2026-02-24T00:00:10.482000","2026-02-24T00:00:10.484000","2026-02-24T00:00:10.485000","2026-02-24T00:00:10.486000","2026-02-24T00:00:10.487000","2026-02-24T00:00:10.489000","2026-02-24T00:00:10.490000","2026-02-24T00:00:10.491000","2026-02-24T00:00:10.492000","2026-02-24T00:00:10.494000","2026-02-24T00:00:10.495000","2026-02-24T00:00:10.496000","2026-02-24T00:00:10.498000","2026-02-24T00:00:10.499000","2026-02-24T00:00:10.500000","2026-02-24T00:00:10.501000","2026-02-24T00:00:10.503000","2026-02-24T00:00:10.504000","2026-02-24T00:00:10.505000","2026-02-24T00:00:10.506000","2026-02-24T00:00:10.508000","2026-02-24T00:00:10.509000","2026-02-24T00:00:10.510000","2026-02-24T00:00:10.511000","2026-02-24T00:00:10.513000","2026-02-24T00:00:10.514000","2026-02-24T00:00:10.515000","2026-02-24T00:00:10.517000","2026-02-24T00:00:10.518000","2026-02-24T00:00:10.519000","2026-02-24T00:00:10.520000","2026-02-24T00:00:10.522000","2026-02-24T00:00:10.523000","2026-02-24T00:00:10.524000","2026-02-24T00:00:10.525000","2026-02-24T00:00:10.527000","2026-02-24T00:00:10.528000","2026-02-24T00:00:10.529000","2026-02-24T00:00:10.530000","2026-02-24T00:00:10.532000","2026-02-24T00:00:10.533000","2026-02-24T00:00:10.534000","2026-02-24T00:00:10.536000","2026-02-24T00:00:10.537000","2026-02-24T00:00:10.538000","2026-02-24T00:00:10.540000","2026-02-24T00:00:10.541000","2026-02-24T00:00:10.542000","2026-02-24T00:00:10.543000","2026-02-24T00:00:10.545000","2026-02-24T00:00:10.546000","2026-02-24T00:00:10.547000","2026-02-24T00:00:10.548000","2026-02-24T00:00:10.550000","2026-02-24T00:00:10.551000","2026-02-24T00:00:10.552000","2026-02-24T00:00:10.553000","2026-02-24T00:00:10.555000","2026-02-24T00:00:10.556000","2026-02-24T00:00:10.557000","2026-02-24T00:00:10.559000","2026-02-24T00:00:10.560000","2026-02-24T00:00:10.561000","2026-02-24T00:00:10.562000","2026-02-24T00:00:10.564000","2026-02-24T00:00:10.565000","2026-02-24T00:00:10.566000","2026-02-24T00:00:10.567000","2026-02-24T00:00:10.569000","2026-02-24T00:00:10.570000","2026-02-24T00:00:10.571000","2026-02-24T00:00:10.573000","2026-02-24T00:00:10.574000","2026-02-24T00:00:10.575000","2026-02-24T00:00:10.576000","2026-02-24T00:00:10.577000","2026-02-24T00:00:10.579000","2026-02-24T00:00:10.580000","2026-02-24T00:00:10.581000","2026-02-24T00:00:10.582000","2026-02-24T00:00:10.584000","2026-02-24T00:00:10.585000","2026-02-24T00:00:10.586000","2026-02-24T00:00:10.588000","2026-02-24T00:00:10.589000","2026-02-24T00:00:10.590000","2026-02-24T00:00:10.591000","2026-02-24T00:00:10.592000","2026-02-24T00:00:10.594000","2026-02-24T00:00:10.595000","2026-02-24T00:00:10.596000","2026-02-24T00:00:10.598000","2026-02-24T00:00:10.599000","2026-02-24T00:00:10.600000","2026-02-24T00:00:10.601000","2026-02-24T00:00:10.603000","2026-02-24T00:00:10.604000","2026-02-24T00:00:10.605000","2026-02-24T00:00:10.606000","2026-02-24T00:00:10.608000","2026-02-24T00:00:10.609000","2026-02-24T00:00:10.610000","2026-02-24T00:00:10.612000","2026-02-24T00:00:10.613000","2026-02-24T00:00:10.614000","2026-02-24T00:00:10.615000","2026-02-24T00:00:10.617000","2026-02-24T00:00:10.618000","2026-02-24T00:00:10.619000","2026-02-24T00:00:10.620000","2026-02-24T00:00:10.622000","2026-02-24T00:00:10.623000","2026-02-24T00:00:10.624000","2026-02-24T00:00:10.625000","2026-02-24T00:00:10.627000","2026-02-24T00:00:10.628000","2026-02-24T00:00:10.629000","2026-02-24T00:00:10.630000","2026-02-24T00:00:10.632000","2026-02-24T00:00:10.633000","2026-02-24T00:00:10.634000","2026-02-24T00:00:10.636000","2026-02-24T00:00:10.637000","2026-02-24T00:00:10.638000","2026-02-24T00:00:10.639000","2026-02-24T00:00:10.641000","2026-02-24T00:00:10.642000","2026-02-24T00:00:10.643000","2026-02-24T00:00:10.645000","2026-02-24T00:00:10.646000","2026-02-24T00:00:10.647000","2026-02-24T00:00:10.648000","2026-02-24T00:00:10.650000","2026-02-24T00:00:10.651000","2026-02-24T00:00:10.652000","2026-02-24T00:00:10.654000","2026-02-24T00:00:10.655000","2026-02-24T00:00:10.656000","2026-02-24T00:00:10.657000","2026-02-24T00:00:10.658000","2026-02-24T00:00:10.660000","2026-02-24T00:00:10.661000","2026-02-24T00:00:10.662000","2026-02-24T00:00:10.663000","2026-02-24T00:00:10.665000","2026-02-24T00:00:10.666000","2026-02-24T00:00:10.667000","2026-02-24T00:00:10.669000","2026-02-24T00:00:10.040000","2026-02-24T00:00:10.041000","2026-02-24T00:00:10.042000","2026-02-24T00:00:10.043000","2026-02-24T00:00:10.044000","2026-02-24T00:00:10.046000","2026-02-24T00:00:10.047000","2026-02-24T00:00:10.048000","2026-02-24T00:00:10.049000","2026-02-24T00:00:10.050000","2026-02-24T00:00:10.051000","2026-02-24T00:00:10.053000","2026-02-24T00:00:10.054000","2026-02-24T00:00:10.055000","2026-02-24T00:00:10.056000","2026-02-24T00:00:10.064000","2026-02-24T00:00:10.065000","2026-02-24T00:00:10.066000","2026-02-24T00:00:10.067000","2026-02-24T00:00:10.068000","2026-02-24T00:00:10.070000","2026-02-24T00:00:10.071000","2026-02-24T00:00:10.072000","2026-02-24T00:00:10.073000","2026-02-24T00:00:10.074000","2026-02-24T00:00:10.075000","2026-02-24T00:00:10.077000","2026-02-24T00:00:10.078000","2026-02-24T00:00:10.079000","2026-02-24T00:00:10.080000","2026-02-24T00:00:10.081000","2026-02-24T00:00:10.083000","2026-02-24T00:00:10.084000","2026-02-24T00:00:10.085000","2026-02-24T00:00:10.086000","2026-02-24T00:00:10.088000","2026-02-24T00:00:10.089000","2026-02-24T00:00:10.090000","2026-02-24T00:00:10.091000","2026-02-24T00:00:10.092000","2026-02-24T00:00:10.093000","2026-02-24T00:00:10.094000","2026-02-24T00:00:10.095000","2026-02-24T00:00:10.097000","2026-02-24T00:00:10.098000","2026-02-24T00:00:10.099000","2026-02-24T00:00:10.100000","2026-02-24T00:00:10.101000","2026-02-24T00:00:10.102000","2026-02-24T00:00:10.104000","2026-02-24T00:00:10.105000","2026-02-24T00:00:10.106000","2026-02-24T00:00:10.107000","2026-02-24T00:00:10.108000","2026-02-24T00:00:10.109000","2026-02-24T00:00:10.111000","2026-02-24T00:00:10.112000","2026-02-24T00:00:10.113000","2026-02-24T00:00:10.114000","2026-02-24T00:00:10.115000","2026-02-24T00:00:10.117000","2026-02-24T00:00:10.118000","2026-02-24T00:00:10.119000","2026-02-24T00:00:10.120000","2026-02-24T00:00:10.121000","2026-02-24T00:00:10.122000","2026-02-24T00:00:10.124000","2026-02-24T00:00:10.125000","2026-02-24T00:00:10.126000","2026-02-24T00:00:10.127000","2026-02-24T00:00:10.128000","2026-02-24T00:00:10.129000","2026-02-24T00:00:10.131000","2026-02-24T00:00:10.132000","2026-02-24T00:00:10.133000","2026-02-24T00:00:10.134000","2026-02-24T00:00:10.135000","2026-02-24T00:00:10.136000","2026-02-24T00:00:10.138000","2026-02-24T00:00:10.139000","2026-02-24T00:00:10.140000","2026-02-24T00:00:10.141000","2026-02-24T00:00:10.142000","2026-02-24T00:00:10.144000","2026-02-24T00:00:10.145000","2026-02-24T00:00:10.146000","2026-02-24T00:00:10.147000","2026-02-24T00:00:10.148000","2026-02-24T00:00:10.149000","2026-02-24T00:00:10.151000","2026-02-24T00:00:10.152000","2026-02-24T00:00:10.153000","2026-02-24T00:00:10.154000","2026-02-24T00:00:10.155000","2026-02-24T00:00:10.156000","2026-02-24T00:00:10.158000","2026-02-24T00:00:10.159000","2026-02-24T00:00:10.160000","2026-02-24T00:00:10.161000","2026-02-24T00:00:10.162000","2026-02-24T00:00:10.163000","2026-02-24T00:00:10.164000","2026-02-24T00:00:10.166000","2026-02-24T00:00:10.167000","2026-02-24T00:00:10.168000","2026-02-24T00:00:10.169000","2026-02-24T00:00:10.170000","2026-02-24T00:00:10.171000","2026-02-24T00:00:10.173000","2026-02-24T00:00:10.174000","2026-02-24T00:00:10.175000","2026-02-24T00:00:10.176000","2026-02-24T00:00:10.177000","2026-02-24T00:00:10.179000","2026-02-24T00:00:10.180000","2026-02-24T00:00:10.181000","2026-02-24T00:00:10.182000","2026-02-24T00:00:10.183000","2026-02-24T00:00:10.184000","2026-02-24T00:00:10.186000","2026-02-24T00:00:10.187000","2026-02-24T00:00:10.188000","2026-02-24T00:00:10.189000","2026-02-24T00:00:10.190000","2026-02-24T00:00:10.191000","2026-02-24T00:00:10.193000","2026-02-24T00:00:10.194000","2026-02-24T00:00:10.195000","2026-02-24T00:00:10.196000","2026-02-24T00:00:10.197000","2026-02-24T00:00:10.199000","2026-02-24T00:00:10.200000","2026-02-24T00:00:10.201000","2026-02-24T00:00:10.202000","2026-02-24T00:00:10.204000","2026-02-24T00:00:10.205000","2026-02-24T00:00:10.206000","2026-02-24T00:00:10.207000","2026-02-24T00:00:10.208000","2026-02-24T00:00:10.210000","2026-02-24T00:00:10.211000","2026-02-24T00:00:10.212000","2026-02-24T00:00:10.213000","2026-02-24T00:00:10.214000","2026-02-24T00:00:10.216000","2026-02-24T00:00:10.217000","2026-02-24T00:00:10.218000","2026-02-24T00:00:10.219000","2026-02-24T00:00:10.221000","2026-02-24T00:00:10.222000","2026-02-24T00:00:10.223000","2026-02-24T00:00:10.224000","2026-02-24T00:00:10.226000","2026-02-24T00:00:10.227000","2026-02-24T00:00:10.228000","2026-02-24T00:00:10.229000","2026-02-24T00:00:10.230000","2026-02-24T00:00:10.232000","2026-02-24T00:00:10.233000","2026-02-24T00:00:10.234000","2026-02-24T00:00:10.235000","2026-02-24T00:00:10.236000","2026-02-24T00:00:10.238000","2026-02-24T00:00:10.239000","2026-02-24T00:00:10.240000","2026-02-24T00:00:10.241000","2026-02-24T00:00:10.243000","2026-02-24T00:00:10.244000","2026-02-24T00:00:10.245000","2026-02-24T00:00:10.246000","2026-02-24T00:00:10.247000","2026-02-24T00:00:10.249000","2026-02-24T00:00:10.250000","2026-02-24T00:00:10.251000","2026-02-24T00:00:10.252000","2026-02-24T00:00:10.254000","2026-02-24T00:00:10.255000","2026-02-24T00:00:10.256000","2026-02-24T00:00:10.257000","2026-02-24T00:00:10.258000","2026-02-24T00:00:10.260000","2026-02-24T00:00:10.261000","2026-02-24T00:00:10.262000","2026-02-24T00:00:10.263000","2026-02-24T00:00:10.265000","2026-02-24T00:00:10.266000","2026-02-24T00:00:10.267000","2026-02-24T00:00:10.268000","2026-02-24T00:00:10.270000","2026-02-24T00:00:10.271000","2026-02-24T00:00:10.272000","2026-02-24T00:00:10.273000","2026-02-24T00:00:10.275000","2026-02-24T00:00:10.276000","2026-02-24T00:00:10.277000","2026-02-24T00:00:10.278000","2026-02-24T00:00:10.279000","2026-02-24T00:00:10.280000","2026-02-24T00:00:10.282000","2026-02-24T00:00:10.283000","2026-02-24T00:00:10.284000","2026-02-24T00:00:10.285000","2026-02-24T00:00:10.287000","2026-02-24T00:00:10.288000","2026-02-24T00:00:10.289000","2026-02-24T00:00:10.290000","2026-02-24T00:00:10.291000","2026-02-24T00:00:10.293000","2026-02-24T00:00:10.294000","2026-02-24T00:00:10.295000","2026-02-24T00:00:10.296000","2026-02-24T00:00:10.298000","2026-02-24T00:00:10.299000","2026-02-24T00:00:10.300000","2026-02-24T00:00:10.301000","2026-02-24T00:00:10.302000","2026-02-24T00:00:10.304000","2026-02-24T00:00:10.305000","2026-02-24T00:00:10.306000","2026-02-24T00:00:10.307000","2026-02-24T00:00:10.308000","2026-02-24T00:00:10.310000","2026-02-24T00:00:10.315000","2026-02-24T00:00:10.316000","2026-02-24T00:00:10.317000","2026-02-24T00:00:10.318000","2026-02-24T00:00:10.319000","2026-02-24T00:00:10.320000","2026-02-24T00:00:10.322000","2026-02-24T00:00:10.323000","2026-02-24T00:00:10.324000","2026-02-24T00:00:10.325000","2026-02-24T00:00:10.327000","2026-02-24T00:00:10.328000","2026-02-24T00:00:10.329000","2026-02-24T00:00:10.330000","2026-02-24T00:00:10.331000","2026-02-24T00:00:10.333000","2026-02-24T00:00:10.334000","2026-02-24T00:00:10.335000","2026-02-24T00:00:10.336000","2026-02-24T00:00:10.338000","2026-02-24T00:00:10.339000","2026-02-24T00:00:10.340000","2026-02-24T00:00:10.341000","2026-02-24T00:00:10.342000","2026-02-24T00:00:10.344000","2026-02-24T00:00:10.345000","2026-02-24T00:00:10.346000","2026-02-24T00:00:10.347000","2026-02-24T00:00:10.348000","2026-02-24T00:00:10.350000","2026-02-24T00:00:10.351000","2026-02-24T00:00:10.352000","2026-02-24T00:00:10.353000","2026-02-24T00:00:10.355000","2026-02-24T00:00:10.356000","2026-02-24T00:00:10.357000","2026-02-24T00:00:10.358000","2026-02-24T00:00:10.360000","2026-02-24T00:00:10.361000","2026-02-24T00:00:10.362000","2026-02-24T00:00:10.364000","2026-02-24T00:00:10.365000","2026-02-24T00:00:10.366000","2026-02-24T00:00:10.367000","2026-02-24T00:00:10.369000","2026-02-24T00:00:10.370000","2026-02-24T00:00:10.371000","2026-02-24T00:00:10.372000","2026-02-24T00:00:10.374000","2026-02-24T00:00:10.375000","2026-02-24T00:00:10.376000","2026-02-24T00:00:10.378000","2026-02-24T00:00:10.379000","2026-02-24T00:00:10.380000","2026-02-24T00:00:10.381000","2026-02-24T00:00:10.383000","2026-02-24T00:00:10.384000","2026-02-24T00:00:10.385000","2026-02-24T00:00:10.387000","2026-02-24T00:00:10.388000","2026-02-24T00:00:10.389000","2026-02-24T00:00:10.390000","2026-02-24T00:00:10.392000","2026-02-24T00:00:10.393000","2026-02-24T00:00:10.394000","2026-02-24T00:00:10.395000","2026-02-24T00:00:10.396000","2026-02-24T00:00:10.397000","2026-02-24T00:00:10.399000","2026-02-24T00:00:10.400000","2026-02-24T00:00:10.401000","2026-02-24T00:00:10.402000","2026-02-24T00:00:10.404000","2026-02-24T00:00:10.405000","2026-02-24T00:00:10.406000","2026-02-24T00:00:10.407000","2026-02-24T00:00:10.408000","2026-02-24T00:00:10.410000","2026-02-24T00:00:10.411000","2026-02-24T00:00:10.412000","2026-02-24T00:00:10.413000","2026-02-24T00:00:10.415000","2026-02-24T00:00:10.416000","2026-02-24T00:00:10.417000","2026-02-24T00:00:10.418000","2026-02-24T00:00:10.419000","2026-02-24T00:00:10.421000","2026-02-24T00:00:10.422000","2026-02-24T00:00:10.423000","2026-02-24T00:00:10.425000","2026-02-24T00:00:10.426000","2026-02-24T00:00:10.427000","2026-02-24T00:00:10.428000","2026-02-24T00:00:10.429000","2026-02-24T00:00:10.431000","2026-02-24T00:00:10.432000","2026-02-24T00:00:10.433000","2026-02-24T00:00:10.434000","2026-02-24T00:00:10.435000","2026-02-24T00:00:10.437000","2026-02-24T00:00:10.438000","2026-02-24T00:00:10.439000","2026-02-24T00:00:10.440000","2026-02-24T00:00:10.442000","2026-02-24T00:00:10.443000","2026-02-24T00:00:10.444000","2026-02-24T00:00:10.445000","2026-02-24T00:00:10.447000","2026-02-24T00:00:10.448000","2026-02-24T00:00:10.449000","2026-02-24T00:00:10.450000","2026-02-24T00:00:10.452000","2026-02-24T00:00:10.453000","2026-02-24T00:00:10.454000","2026-02-24T00:00:10.456000","2026-02-24T00:00:10.457000","2026-02-24T00:00:10.458000","2026-02-24T00:00:10.459000","2026-02-24T00:00:10.461000","2026-02-24T00:00:10.462000","2026-02-24T00:00:10.463000","2026-02-24T00:00:10.465000","2026-02-24T00:00:10.466000","2026-02-24T00:00:10.467000","2026-02-24T00:00:10.468000","2026-02-24T00:00:10.470000","2026-02-24T00:00:10.471000","2026-02-24T00:00:10.472000","2026-02-24T00:00:10.473000","2026-02-24T00:00:10.475000","2026-02-24T00:00:10.476000","2026-02-24T00:00:10.477000","2026-02-24T00:00:10.478000","2026-02-24T00:00:10.480000","2026-02-24T00:00:10.481000","2026-02-24T00:00:10.482000","2026-02-24T00:00:10.484000","2026-02-24T00:00:10.485000","2026-02-24T00:00:10.486000","2026-02-24T00:00:10.487000","2026-02-24T00:00:10.489000","2026-02-24T00:00:10.490000","2026-02-24T00:00:10.491000","2026-02-24T00:00:10.492000","2026-02-24T00:00:10.494000","2026-02-24T00:00:10.495000","2026-02-24T00:00:10.496000","2026-02-24T00:00:10.498000","2026-02-24T00:00:10.499000","2026-02-24T00:00:10.500000","2026-02-24T00:00:10.501000","2026-02-24T00:00:10.503000","2026-02-24T00:00:10.504000","2026-02-24T00:00:10.505000","2026-02-24T00:00:10.506000","2026-02-24T00:00:10.508000","2026-02-24T00:00:10.509000","2026-02-24T00:00:10.510000","2026-02-24T00:00:10.511000","2026-02-24T00:00:10.513000","2026-02-24T00:00:10.514000","2026-02-24T00:00:10.515000","2026-02-24T00:00:10.516000","2026-02-24T00:00:10.518000","2026-02-24T00:00:10.519000","2026-02-24T00:00:10.520000","2026-02-24T00:00:10.522000","2026-02-24T00:00:10.523000","2026-02-24T00:00:10.524000","2026-02-24T00:00:10.525000","2026-02-24T00:00:10.527000","2026-02-24T00:00:10.528000","2026-02-24T00:00:10.529000","2026-02-24T00:00:10.530000","2026-02-24T00:00:10.532000","2026-02-24T00:00:10.533000","2026-02-24T00:00:10.534000","2026-02-24T00:00:10.536000","2026-02-24T00:00:10.537000","2026-02-24T00:00:10.538000","2026-02-24T00:00:10.540000","2026-02-24T00:00:10.541000","2026-02-24T00:00:10.542000","2026-02-24T00:00:10.543000","2026-02-24T00:00:10.545000","2026-02-24T00:00:10.546000","2026-02-24T00:00:10.547000","2026-02-24T00:00:10.548000","2026-02-24T00:00:10.550000","2026-02-24T00:00:10.551000","2026-02-24T00:00:10.552000","2026-02-24T00:00:10.553000","2026-02-24T00:00:10.555000","2026-02-24T00:00:10.556000","2026-02-24T00:00:10.557000","2026-02-24T00:00:10.559000","2026-02-24T00:00:10.560000","2026-02-24T00:00:10.561000","2026-02-24T00:00:10.562000","2026-02-24T00:00:10.564000","2026-02-24T00:00:10.565000","2026-02-24T00:00:10.566000","2026-02-24T00:00:10.567000","2026-02-24T00:00:10.569000","2026-02-24T00:00:10.570000","2026-02-24T00:00:10.571000","2026-02-24T00:00:10.573000","2026-02-24T00:00:10.316000","2026-02-24T00:00:10.317000","2026-02-24T00:00:10.318000","2026-02-24T00:00:10.319000","2026-02-24T00:00:10.320000","2026-02-24T00:00:10.322000","2026-02-24T00:00:10.323000","2026-02-24T00:00:10.324000","2026-02-24T00:00:10.325000","2026-02-24T00:00:10.326000","2026-02-24T00:00:10.328000","2026-02-24T00:00:10.329000","2026-02-24T00:00:10.330000","2026-02-24T00:00:10.331000","2026-02-24T00:00:10.333000","2026-02-24T00:00:10.334000","2026-02-24T00:00:10.335000","2026-02-24T00:00:10.336000","2026-02-24T00:00:10.337000","2026-02-24T00:00:10.339000","2026-02-24T00:00:10.340000","2026-02-24T00:00:10.341000","2026-02-24T00:00:10.342000","2026-02-24T00:00:10.344000","2026-02-24T00:00:10.345000","2026-02-24T00:00:10.346000","2026-02-24T00:00:10.347000","2026-02-24T00:00:10.348000","2026-02-24T00:00:10.350000","2026-02-24T00:00:10.351000","2026-02-24T00:00:10.352000","2026-02-24T00:00:10.353000","2026-02-24T00:00:10.355000","2026-02-24T00:00:10.356000","2026-02-24T00:00:10.357000","2026-02-24T00:00:10.358000","2026-02-24T00:00:10.360000","2026-02-24T00:00:10.361000","2026-02-24T00:00:10.362000","2026-02-24T00:00:10.363000","2026-02-24T00:00:10.365000","2026-02-24T00:00:10.366000","2026-02-24T00:00:10.367000","2026-02-24T00:00:10.369000","2026-02-24T00:00:10.370000","2026-02-24T00:00:10.371000","2026-02-24T00:00:10.372000","2026-02-24T00:00:10.374000","2026-02-24T00:00:10.375000","2026-02-24T00:00:10.376000","2026-02-24T00:00:10.377000","2026-02-24T00:00:10.379000","2026-02-24T00:00:10.380000","2026-02-24T00:00:10.381000","2026-02-24T00:00:10.383000","2026-02-24T00:00:10.384000","2026-02-24T00:00:10.385000","2026-02-24T00:00:10.386000","2026-02-24T00:00:10.388000","2026-02-24T00:00:10.389000","2026-02-24T00:00:10.390000","2026-02-24T00:00:10.391000","2026-02-24T00:00:10.393000","2026-02-24T00:00:10.394000","2026-02-24T00:00:10.395000","2026-02-24T00:00:10.396000","2026-02-24T00:00:10.397000","2026-02-24T00:00:10.399000","2026-02-24T00:00:10.400000","2026-02-24T00:00:10.401000","2026-02-24T00:00:10.402000","2026-02-24T00:00:10.404000","2026-02-24T00:00:10.405000","2026-02-24T00:00:10.406000","2026-02-24T00:00:10.407000","2026-02-24T00:00:10.408000","2026-02-24T00:00:10.410000","2026-02-24T00:00:10.411000","2026-02-24T00:00:10.412000","2026-02-24T00:00:10.413000","2026-02-24T00:00:10.415000","2026-02-24T00:00:10.416000","2026-02-24T00:00:10.417000","2026-02-24T00:00:10.418000","2026-02-24T00:00:10.419000","2026-02-24T00:00:10.421000","2026-02-24T00:00:10.422000","2026-02-24T00:00:10.423000","2026-02-24T00:00:10.424000","2026-02-24T00:00:10.426000","2026-02-24T00:00:10.427000","2026-02-24T00:00:10.428000","2026-02-24T00:00:10.429000","2026-02-24T00:00:10.430000","2026-02-24T00:00:10.432000","2026-02-24T00:00:10.433000","2026-02-24T00:00:10.434000","2026-02-24T00:00:10.435000","2026-02-24T00:00:10.437000","2026-02-24T00:00:10.438000","2026-02-24T00:00:10.439000","2026-02-24T00:00:10.440000","2026-02-24T00:00:10.442000","2026-02-24T00:00:10.443000","2026-02-24T00:00:10.444000","2026-02-24T00:00:10.445000","2026-02-24T00:00:10.447000","2026-02-24T00:00:10.448000","2026-02-24T00:00:10.449000","2026-02-24T00:00:10.450000","2026-02-24T00:00:10.452000","2026-02-24T00:00:10.453000","2026-02-24T00:00:10.454000","2026-02-24T00:00:10.455000","2026-02-24T00:00:10.457000","2026-02-24T00:00:10.458000","2026-02-24T00:00:10.459000","2026-02-24T00:00:10.461000","2026-02-24T00:00:10.462000","2026-02-24T00:00:10.463000","2026-02-24T00:00:10.464000","2026-02-24T00:00:10.466000","2026-02-24T00:00:10.467000","2026-02-24T00:00:10.468000","2026-02-24T00:00:10.469000","2026-02-24T00:00:10.471000","2026-02-24T00:00:10.472000","2026-02-24T00:00:10.473000","2026-02-24T00:00:10.475000","2026-02-24T00:00:10.476000","2026-02-24T00:00:10.477000","2026-02-24T00:00:10.478000","2026-02-24T00:00:10.480000","2026-02-24T00:00:10.481000","2026-02-24T00:00:10.482000","2026-02-24T00:00:10.484000","2026-02-24T00:00:10.485000","2026-02-24T00:00:10.486000","2026-02-24T00:00:10.487000","2026-02-24T00:00:10.489000","2026-02-24T00:00:10.490000","2026-02-24T00:00:10.491000","2026-02-24T00:00:10.492000","2026-02-24T00:00:10.494000","2026-02-24T00:00:10.495000","2026-02-24T00:00:10.496000","2026-02-24T00:00:10.498000","2026-02-24T00:00:10.499000","2026-02-24T00:00:10.500000","2026-02-24T00:00:10.501000","2026-02-24T00:00:10.502000","2026-02-24T00:00:10.504000","2026-02-24T00:00:10.505000","2026-02-24T00:00:10.506000","2026-02-24T00:00:10.508000","2026-02-24T00:00:10.509000","2026-02-24T00:00:10.510000","2026-02-24T00:00:10.511000","2026-02-24T00:00:10.513000","2026-02-24T00:00:10.514000","2026-02-24T00:00:10.515000","2026-02-24T00:00:10.516000","2026-02-24T00:00:10.518000","2026-02-24T00:00:10.519000","2026-02-24T00:00:10.520000","2026-02-24T00:00:10.522000","2026-02-24T00:00:10.523000","2026-02-24T00:00:10.524000","2026-02-24T00:00:10.525000","2026-02-24T00:00:10.527000","2026-02-24T00:00:10.528000","2026-02-24T00:00:10.529000","2026-02-24T00:00:10.530000","2026-02-24T00:00:10.532000","2026-02-24T00:00:10.533000","2026-02-24T00:00:10.534000","2026-02-24T00:00:10.536000","2026-02-24T00:00:10.537000","2026-02-24T00:00:10.538000","2026-02-24T00:00:10.539000","2026-02-24T00:00:10.541000","2026-02-24T00:00:10.542000","2026-02-24T00:00:10.543000","2026-02-24T00:00:10.544000","2026-02-24T00:00:10.546000","2026-02-24T00:00:10.547000","2026-02-24T00:00:10.548000","2026-02-24T00:00:10.550000","2026-02-24T00:00:10.551000","2026-02-24T00:00:10.552000","2026-02-24T00:00:10.553000","2026-02-24T00:00:10.555000","2026-02-24T00:00:10.556000","2026-02-24T00:00:10.557000","2026-02-24T00:00:10.558000","2026-02-24T00:00:10.560000","2026-02-24T00:00:10.561000","2026-02-24T00:00:10.562000","2026-02-24T00:00:10.564000","2026-02-24T00:00:10.565000","2026-02-24T00:00:10.566000","2026-02-24T00:00:10.567000","2026-02-24T00:00:10.569000","2026-02-24T00:00:10.570000","2026-02-24T00:00:10.571000","2026-02-24T00:00:10.572000","2026-02-24T00:00:10.574000","2026-02-24T00:00:10.575000","2026-02-24T00:00:10.576000","2026-02-24T00:00:10.577000","2026-02-24T00:00:10.579000","2026-02-24T00:00:10.580000","2026-02-24T00:00:10.581000","2026-02-24T00:00:10.582000","2026-02-24T00:00:10.584000","2026-02-24T00:00:10.585000","2026-02-24T00:00:10.586000","2026-02-24T00:00:10.588000","2026-02-24T00:00:10.589000","2026-02-24T00:00:10.590000","2026-02-24T00:00:10.591000","2026-02-24T00:00:10.592000","2026-02-24T00:00:10.594000","2026-02-24T00:00:10.595000","2026-02-24T00:00:10.596000","2026-02-24T00:00:10.598000","2026-02-24T00:00:10.599000","2026-02-24T00:00:10.600000","2026-02-24T00:00:10.601000","2026-02-24T00:00:10.603000","2026-02-24T00:00:10.604000","2026-02-24T00:00:10.605000","2026-02-24T00:00:10.606000","2026-02-24T00:00:10.608000","2026-02-24T00:00:10.609000","2026-02-24T00:00:10.610000","2026-02-24T00:00:10.612000","2026-02-24T00:00:10.613000","2026-02-24T00:00:10.614000","2026-02-24T00:00:10.615000","2026-02-24T00:00:10.617000","2026-02-24T00:00:10.618000","2026-02-24T00:00:10.619000","2026-02-24T00:00:10.620000","2026-02-24T00:00:10.622000","2026-02-24T00:00:10.623000","2026-02-24T00:00:10.624000","2026-02-24T00:00:10.625000","2026-02-24T00:00:10.627000","2026-02-24T00:00:10.628000","2026-02-24T00:00:10.629000","2026-02-24T00:00:10.630000","2026-02-24T00:00:10.632000","2026-02-24T00:00:10.633000","2026-02-24T00:00:10.634000","2026-02-24T00:00:10.636000","2026-02-24T00:00:10.637000","2026-02-24T00:00:10.638000","2026-02-24T00:00:10.639000","2026-02-24T00:00:10.641000","2026-02-24T00:00:10.642000","2026-02-24T00:00:10.643000","2026-02-24T00:00:10.645000","2026-02-24T00:00:10.646000","2026-02-24T00:00:10.647000","2026-02-24T00:00:10.648000","2026-02-24T00:00:10.650000","2026-02-24T00:00:10.651000","2026-02-24T00:00:10.652000","2026-02-24T00:00:10.654000","2026-02-24T00:00:10.655000","2026-02-24T00:00:10.656000","2026-02-24T00:00:10.657000","2026-02-24T00:00:10.658000","2026-02-24T00:00:10.660000","2026-02-24T00:00:10.661000","2026-02-24T00:00:10.662000","2026-02-24T00:00:10.663000","2026-02-24T00:00:10.665000","2026-02-24T00:00:10.666000","2026-02-24T00:00:10.667000","2026-02-24T00:00:10.669000","2026-02-24T00:00:10.670000","2026-02-24T00:00:10.671000","2026-02-24T00:00:10.672000","2026-02-24T00:00:10.673000","2026-02-24T00:00:10.674000","2026-02-24T00:00:10.676000","2026-02-24T00:00:10.677000","2026-02-24T00:00:10.678000","2026-02-24T00:00:10.679000","2026-02-24T00:00:10.680000","2026-02-24T00:00:10.681000","2026-02-24T00:00:10.683000","2026-02-24T00:00:10.684000","2026-02-24T00:00:10.685000","2026-02-24T00:00:10.686000","2026-02-24T00:00:10.687000","2026-02-24T00:00:10.688000","2026-02-24T00:00:10.690000","2026-02-24T00:00:10.691000","2026-02-24T00:00:10.692000","2026-02-24T00:00:10.693000","2026-02-24T00:00:10.694000","2026-02-24T00:00:10.695000","2026-02-24T00:00:10.697000","2026-02-24T00:00:10.698000","2026-02-24T00:00:10.699000","2026-02-24T00:00:10.700000","2026-02-24T00:00:10.701000","2026-02-24T00:00:10.703000","2026-02-24T00:00:10.704000","2026-02-24T00:00:10.705000","2026-02-24T00:00:10.706000","2026-02-24T00:00:10.707000","2026-02-24T00:00:10.708000","2026-02-24T00:00:10.710000","2026-02-24T00:00:10.711000","2026-02-24T00:00:10.712000","2026-02-24T00:00:10.713000","2026-02-24T00:00:10.714000","2026-02-24T00:00:10.715000","2026-02-24T00:00:10.717000","2026-02-24T00:00:10.718000","2026-02-24T00:00:10.719000","2026-02-24T00:00:10.720000","2026-02-24T00:00:10.721000","2026-02-24T00:00:10.722000","2026-02-24T00:00:10.724000","2026-02-24T00:00:10.725000","2026-02-24T00:00:10.726000","2026-02-24T00:00:10.727000","2026-02-24T00:00:10.728000","2026-02-24T00:00:10.729000","2026-02-24T00:00:10.731000","2026-02-24T00:00:10.732000","2026-02-24T00:00:10.733000","2026-02-24T00:00:10.734000","2026-02-24T00:00:10.735000","2026-02-24T00:00:10.737000","2026-02-24T00:00:10.738000","2026-02-24T00:00:10.739000","2026-02-24T00:00:10.740000","2026-02-24T00:00:10.741000","2026-02-24T00:00:10.742000","2026-02-24T00:00:10.743000","2026-02-24T00:00:10.745000","2026-02-24T00:00:10.746000","2026-02-24T00:00:10.747000","2026-02-24T00:00:10.748000","2026-02-24T00:00:10.749000","2026-02-24T00:00:10.751000","2026-02-24T00:00:10.752000","2026-02-24T00:00:10.753000","2026-02-24T00:00:10.754000","2026-02-24T00:00:10.755000","2026-02-24T00:00:10.756000","2026-02-24T00:00:10.758000","2026-02-24T00:00:10.759000","2026-02-24T00:00:10.760000","2026-02-24T00:00:10.761000","2026-02-24T00:00:10.762000","2026-02-24T00:00:10.763000","2026-02-24T00:00:10.765000","2026-02-24T00:00:10.766000","2026-02-24T00:00:10.767000","2026-02-24T00:00:10.768000","2026-02-24T00:00:10.769000","2026-02-24T00:00:10.770000","2026-02-24T00:00:10.772000","2026-02-24T00:00:10.773000","2026-02-24T00:00:10.774000","2026-02-24T00:00:10.775000","2026-02-24T00:00:10.776000","2026-02-24T00:00:10.778000","2026-02-24T00:00:10.779000","2026-02-24T00:00:10.780000","2026-02-24T00:00:10.781000","2026-02-24T00:00:10.783000","2026-02-24T00:00:10.784000","2026-02-24T00:00:10.785000","2026-02-24T00:00:10.786000","2026-02-24T00:00:10.787000","2026-02-24T00:00:10.789000","2026-02-24T00:00:10.790000","2026-02-24T00:00:10.791000","2026-02-24T00:00:10.792000","2026-02-24T00:00:10.794000","2026-02-24T00:00:10.795000","2026-02-24T00:00:10.796000","2026-02-24T00:00:10.797000","2026-02-24T00:00:10.798000","2026-02-24T00:00:10.800000","2026-02-24T00:00:10.801000","2026-02-24T00:00:10.802000","2026-02-24T00:00:10.803000","2026-02-24T00:00:10.804000","2026-02-24T00:00:10.806000","2026-02-24T00:00:10.807000","2026-02-24T00:00:10.808000","2026-02-24T00:00:10.809000","2026-02-24T00:00:10.811000","2026-02-24T00:00:10.812000","2026-02-24T00:00:10.813000","2026-02-24T00:00:10.814000","2026-02-24T00:00:10.815000","2026-02-24T00:00:10.817000","2026-02-24T00:00:10.818000","2026-02-24T00:00:10.819000","2026-02-24T00:00:10.820000","2026-02-24T00:00:10.822000","2026-02-24T00:00:10.823000","2026-02-24T00:00:10.824000","2026-02-24T00:00:10.825000","2026-02-24T00:00:10.826000","2026-02-24T00:00:10.828000","2026-02-24T00:00:10.829000","2026-02-24T00:00:10.830000","2026-02-24T00:00:10.831000","2026-02-24T00:00:10.833000","2026-02-24T00:00:10.834000","2026-02-24T00:00:10.835000","2026-02-24T00:00:10.836000","2026-02-24T00:00:10.837000","2026-02-24T00:00:10.839000","2026-02-24T00:00:10.840000","2026-02-24T00:00:10.841000","2026-02-24T00:00:10.842000","2026-02-24T00:00:10.844000","2026-02-24T00:00:10.845000","2026-02-24T00:00:10.846000","2026-02-24T00:00:10.847000","2026-02-24T00:00:10.848000"],"customdata":[[13,120,"00:00:00.000","00:00:00.192","00:00:00.051","00:00:00.053","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.053","00:00:00.055","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.055","00:00:00.056","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.056","00:00:00.058","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.058","00:00:00.059","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.059","00:00:00.061","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.061","00:00:00.062","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.062","00:00:00.064","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.064","00:00:00.065","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.065","00:00:00.067","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.067","00:00:00.068","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.068","00:00:00.070","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.070","00:00:00.071","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.071","00:00:00.073","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.073","00:00:00.074","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.074","00:00:00.075","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.075","00:00:00.076","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.076","00:00:00.078","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.078","00:00:00.079","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.079","00:00:00.080","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.080","00:00:00.081","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.081","00:00:00.083","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.083","00:00:00.084","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.084","00:00:00.085","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.085","00:00:00.086","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.086","00:00:00.088","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.088","00:00:00.089","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.089","00:00:00.090","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.090","00:00:00.091","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.091","00:00:00.093","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.093","00:00:00.094","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.094","00:00:00.095","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.095","00:00:00.096","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.096","00:00:00.097","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.097","00:00:00.098","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.098","00:00:00.099","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.099","00:00:00.100","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.100","00:00:00.102","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.102","00:00:00.103","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.103","00:00:00.104","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.104","00:00:00.105","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.105","00:00:00.106","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.106","00:00:00.107","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.107","00:00:00.109","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.109","00:00:00.110","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.110","00:00:00.111","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.111","00:00:00.112","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.112","00:00:00.113","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.113","00:00:00.114","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.114","00:00:00.115","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.115","00:00:00.116","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.116","00:00:00.118","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.118","00:00:00.119","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.119","00:00:00.120","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.120","00:00:00.121","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.121","00:00:00.122","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.122","00:00:00.123","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.123","00:00:00.124","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.124","00:00:00.125","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.125","00:00:00.126","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.126","00:00:00.127","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.127","00:00:00.129","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.129","00:00:00.130","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.130","00:00:00.131","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.131","00:00:00.132","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.132","00:00:00.133","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.133","00:00:00.134","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.134","00:00:00.135","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.135","00:00:00.137","0.002s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.137","00:00:00.137","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.137","00:00:00.138","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.138","00:00:00.140","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.140","00:00:00.141","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.145","00:00:00.146","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.146","00:00:00.147","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.147","00:00:00.148","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.148","00:00:00.149","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.149","00:00:00.150","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.150","00:00:00.151","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.151","00:00:00.152","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.152","00:00:00.153","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.153","00:00:00.155","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.155","00:00:00.156","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.156","00:00:00.157","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.157","00:00:00.158","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.158","00:00:00.159","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.159","00:00:00.160","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.160","00:00:00.161","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.161","00:00:00.162","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.162","00:00:00.164","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.164","00:00:00.165","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.165","00:00:00.166","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.166","00:00:00.167","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.167","00:00:00.168","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.168","00:00:00.169","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.169","00:00:00.170","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.170","00:00:00.172","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.172","00:00:00.173","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.173","00:00:00.174","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.174","00:00:00.175","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.175","00:00:00.176","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.176","00:00:00.177","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.177","00:00:00.178","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.178","00:00:00.180","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.180","00:00:00.181","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.181","00:00:00.182","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.182","00:00:00.183","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.183","00:00:00.184","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.184","00:00:00.185","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.185","00:00:00.186","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.186","00:00:00.188","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.188","00:00:00.189","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.189","00:00:00.190","0.001s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.190","00:00:00.192","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.146","00:00:00.147","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.147","00:00:00.148","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.148","00:00:00.149","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.149","00:00:00.150","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.150","00:00:00.151","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.151","00:00:00.152","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.152","00:00:00.153","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.153","00:00:00.154","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.154","00:00:00.156","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.156","00:00:00.157","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.157","00:00:00.158","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.158","00:00:00.159","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.159","00:00:00.160","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.160","00:00:00.161","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.161","00:00:00.162","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.162","00:00:00.164","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.164","00:00:00.165","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.165","00:00:00.166","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.166","00:00:00.167","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.167","00:00:00.168","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.168","00:00:00.169","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.169","00:00:00.170","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.170","00:00:00.172","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.172","00:00:00.173","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.173","00:00:00.174","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.174","00:00:00.175","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.175","00:00:00.176","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.176","00:00:00.177","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.177","00:00:00.178","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.178","00:00:00.180","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.180","00:00:00.181","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.181","00:00:00.182","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.182","00:00:00.183","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.183","00:00:00.184","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.184","00:00:00.185","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.185","00:00:00.186","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.186","00:00:00.188","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.188","00:00:00.189","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.189","00:00:00.190","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.190","00:00:00.192","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.192","00:00:00.192","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.192","00:00:00.193","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.193","00:00:00.194","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.194","00:00:00.196","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.196","00:00:00.197","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.197","00:00:00.198","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.198","00:00:00.199","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.199","00:00:00.200","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.200","00:00:00.201","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.201","00:00:00.202","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.202","00:00:00.203","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.203","00:00:00.204","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.204","00:00:00.205","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.205","00:00:00.206","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.206","00:00:00.208","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.208","00:00:00.209","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.209","00:00:00.210","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.210","00:00:00.211","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.211","00:00:00.212","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.212","00:00:00.213","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.213","00:00:00.214","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.214","00:00:00.215","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.215","00:00:00.216","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.216","00:00:00.217","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.217","00:00:00.218","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.218","00:00:00.220","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.220","00:00:00.221","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.221","00:00:00.222","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.222","00:00:00.223","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.223","00:00:00.224","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.224","00:00:00.225","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.225","00:00:00.226","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.226","00:00:00.227","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.227","00:00:00.228","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.228","00:00:00.229","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.229","00:00:00.230","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.230","00:00:00.232","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.232","00:00:00.233","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.233","00:00:00.234","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.234","00:00:00.235","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.235","00:00:00.236","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.236","00:00:00.238","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.238","00:00:00.239","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.239","00:00:00.240","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.240","00:00:00.242","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.249","00:00:00.250","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.250","00:00:00.250","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.250","00:00:00.252","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.252","00:00:00.253","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.253","00:00:00.254","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.254","00:00:00.255","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.255","00:00:00.256","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.256","00:00:00.257","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.257","00:00:00.258","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.258","00:00:00.259","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.259","00:00:00.261","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.261","00:00:00.262","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.262","00:00:00.263","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.263","00:00:00.264","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.264","00:00:00.265","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.265","00:00:00.266","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.266","00:00:00.267","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.267","00:00:00.269","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.269","00:00:00.270","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.270","00:00:00.271","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.271","00:00:00.272","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.272","00:00:00.273","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.273","00:00:00.275","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.275","00:00:00.276","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.276","00:00:00.277","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.277","00:00:00.279","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.279","00:00:00.280","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.280","00:00:00.281","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.281","00:00:00.282","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.282","00:00:00.283","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.283","00:00:00.284","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.284","00:00:00.285","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.285","00:00:00.287","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.287","00:00:00.288","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.288","00:00:00.289","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.289","00:00:00.290","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.290","00:00:00.291","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.291","00:00:00.293","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.293","00:00:00.294","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.294","00:00:00.295","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.295","00:00:00.296","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.296","00:00:00.297","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.297","00:00:00.298","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.298","00:00:00.300","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.300","00:00:00.301","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.301","00:00:00.302","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.302","00:00:00.303","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.303","00:00:00.304","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.304","00:00:00.306","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.306","00:00:00.307","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.307","00:00:00.309","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.309","00:00:00.310","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.310","00:00:00.312","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.312","00:00:00.313","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.313","00:00:00.315","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.315","00:00:00.316","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.316","00:00:00.317","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.317","00:00:00.318","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.318","00:00:00.319","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.319","00:00:00.320","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.320","00:00:00.321","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.321","00:00:00.322","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.322","00:00:00.323","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.323","00:00:00.325","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.325","00:00:00.325","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.325","00:00:00.327","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.327","00:00:00.328","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.328","00:00:00.329","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.333","00:00:00.334","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.334","00:00:00.335","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.335","00:00:00.336","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.336","00:00:00.337","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.337","00:00:00.338","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.338","00:00:00.339","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.339","00:00:00.340","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.340","00:00:00.341","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.341","00:00:00.342","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.342","00:00:00.343","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.343","00:00:00.345","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.345","00:00:00.346","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.346","00:00:00.347","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.347","00:00:00.348","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.348","00:00:00.349","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.349","00:00:00.350","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.350","00:00:00.351","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.351","00:00:00.352","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.352","00:00:00.354","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.354","00:00:00.355","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.355","00:00:00.356","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.356","00:00:00.357","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.357","00:00:00.358","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.358","00:00:00.359","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.359","00:00:00.360","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.360","00:00:00.361","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.361","00:00:00.362","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.362","00:00:00.364","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.364","00:00:00.365","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.365","00:00:00.366","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.366","00:00:00.367","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.367","00:00:00.368","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.368","00:00:00.369","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.369","00:00:00.370","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.370","00:00:00.371","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.371","00:00:00.372","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.372","00:00:00.374","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.374","00:00:00.375","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.375","00:00:00.376","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.376","00:00:00.377","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.377","00:00:00.378","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.378","00:00:00.379","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.379","00:00:00.380","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.380","00:00:00.381","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.381","00:00:00.383","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.383","00:00:00.384","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.384","00:00:00.385","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.385","00:00:00.386","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.386","00:00:00.387","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.387","00:00:00.388","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.388","00:00:00.389","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.393","00:00:00.394","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.394","00:00:00.395","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.395","00:00:00.396","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.396","00:00:00.397","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.397","00:00:00.399","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.399","00:00:00.400","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.400","00:00:00.401","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.401","00:00:00.402","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.402","00:00:00.403","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.403","00:00:00.404","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.404","00:00:00.406","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.406","00:00:00.407","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.407","00:00:00.408","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.408","00:00:00.409","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.409","00:00:00.410","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.410","00:00:00.411","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.411","00:00:00.413","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.413","00:00:00.414","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.414","00:00:00.415","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.415","00:00:00.416","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.416","00:00:00.417","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.417","00:00:00.418","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.418","00:00:00.420","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.420","00:00:00.421","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.421","00:00:00.422","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.422","00:00:00.423","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.423","00:00:00.424","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.424","00:00:00.426","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.426","00:00:00.427","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.427","00:00:00.428","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.428","00:00:00.429","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.429","00:00:00.430","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.430","00:00:00.431","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.431","00:00:00.433","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.433","00:00:00.434","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.434","00:00:00.435","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.435","00:00:00.436","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.436","00:00:00.438","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.438","00:00:00.439","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.439","00:00:00.441","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.441","00:00:00.442","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.442","00:00:00.443","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.443","00:00:00.444","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.444","00:00:00.445","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.445","00:00:00.446","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.446","00:00:00.447","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.447","00:00:00.448","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.448","00:00:00.449","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.449","00:00:00.451","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.451","00:00:00.452","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.452","00:00:00.453","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.453","00:00:00.454","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.454","00:00:00.455","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.455","00:00:00.457","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.457","00:00:00.458","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.458","00:00:00.459","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.459","00:00:00.460","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.460","00:00:00.462","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.462","00:00:00.463","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.463","00:00:00.464","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.464","00:00:00.466","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.466","00:00:00.467","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.467","00:00:00.468","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.468","00:00:00.469","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.469","00:00:00.470","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.470","00:00:00.471","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.471","00:00:00.473","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.473","00:00:00.474","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.474","00:00:00.475","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.475","00:00:00.476","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.476","00:00:00.477","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.477","00:00:00.479","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.479","00:00:00.480","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.480","00:00:00.481","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.481","00:00:00.482","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.482","00:00:00.484","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.484","00:00:00.485","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.485","00:00:00.486","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.486","00:00:00.487","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.487","00:00:00.489","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.489","00:00:00.490","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.490","00:00:00.492","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.492","00:00:00.493","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.493","00:00:00.493","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.493","00:00:00.495","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.495","00:00:00.496","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.496","00:00:00.497","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.497","00:00:00.498","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.498","00:00:00.500","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.500","00:00:00.501","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.501","00:00:00.502","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.502","00:00:00.503","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.503","00:00:00.504","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.504","00:00:00.506","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.506","00:00:00.507","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.507","00:00:00.508","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.508","00:00:00.509","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.509","00:00:00.511","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.511","00:00:00.512","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.512","00:00:00.513","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.513","00:00:00.514","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.518","00:00:00.519","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.519","00:00:00.520","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.520","00:00:00.521","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.521","00:00:00.522","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.522","00:00:00.523","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.523","00:00:00.525","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.525","00:00:00.526","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.526","00:00:00.527","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.527","00:00:00.528","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.528","00:00:00.529","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.529","00:00:00.530","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.530","00:00:00.532","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.532","00:00:00.533","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.533","00:00:00.534","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.534","00:00:00.535","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.535","00:00:00.536","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.536","00:00:00.538","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.538","00:00:00.539","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.539","00:00:00.540","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.540","00:00:00.541","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.541","00:00:00.542","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.542","00:00:00.543","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.543","00:00:00.545","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.545","00:00:00.546","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.546","00:00:00.547","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.547","00:00:00.548","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.548","00:00:00.549","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.549","00:00:00.550","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.550","00:00:00.552","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.552","00:00:00.553","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.553","00:00:00.554","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.554","00:00:00.555","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.555","00:00:00.556","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.556","00:00:00.557","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.557","00:00:00.559","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.559","00:00:00.560","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.560","00:00:00.561","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.561","00:00:00.562","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.562","00:00:00.563","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.563","00:00:00.565","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.565","00:00:00.566","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.570","00:00:00.571","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.571","00:00:00.571","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.571","00:00:00.573","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.573","00:00:00.574","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.574","00:00:00.575","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.575","00:00:00.576","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.576","00:00:00.577","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.577","00:00:00.579","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.579","00:00:00.580","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.580","00:00:00.581","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.581","00:00:00.582","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.582","00:00:00.583","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.583","00:00:00.585","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.585","00:00:00.586","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.586","00:00:00.587","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.587","00:00:00.589","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.589","00:00:00.590","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.590","00:00:00.591","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.591","00:00:00.592","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.592","00:00:00.594","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.594","00:00:00.595","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.595","00:00:00.596","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.596","00:00:00.597","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.597","00:00:00.598","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.598","00:00:00.599","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.599","00:00:00.601","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.601","00:00:00.602","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.602","00:00:00.603","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.603","00:00:00.604","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.604","00:00:00.605","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.605","00:00:00.607","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.607","00:00:00.608","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.608","00:00:00.609","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.609","00:00:00.610","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.610","00:00:00.611","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.611","00:00:00.613","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.613","00:00:00.614","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.614","00:00:00.615","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.615","00:00:00.617","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.617","00:00:00.618","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.618","00:00:00.619","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.619","00:00:00.620","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.620","00:00:00.621","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.621","00:00:00.622","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.622","00:00:00.624","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.624","00:00:00.625","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.625","00:00:00.626","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.626","00:00:00.627","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.627","00:00:00.629","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.629","00:00:00.630","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.630","00:00:00.631","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.631","00:00:00.632","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.632","00:00:00.633","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.633","00:00:00.635","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.635","00:00:00.636","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.636","00:00:00.637","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.637","00:00:00.638","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.638","00:00:00.640","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.640","00:00:00.641","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.641","00:00:00.642","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.642","00:00:00.644","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.644","00:00:00.645","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.645","00:00:00.646","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.646","00:00:00.647","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.647","00:00:00.648","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.648","00:00:00.649","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.649","00:00:00.650","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.650","00:00:00.652","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.652","00:00:00.653","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.653","00:00:00.654","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.654","00:00:00.655","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.655","00:00:00.656","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.656","00:00:00.658","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.658","00:00:00.659","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.659","00:00:00.660","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.660","00:00:00.661","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.661","00:00:00.663","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.663","00:00:00.664","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.664","00:00:00.665","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.665","00:00:00.666","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.666","00:00:00.668","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.668","00:00:00.669","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.669","00:00:00.670","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.670","00:00:00.672","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.672","00:00:00.672","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.672","00:00:00.673","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.673","00:00:00.675","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.675","00:00:00.676","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.676","00:00:00.677","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.677","00:00:00.678","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.678","00:00:00.680","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.680","00:00:00.681","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.681","00:00:00.682","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.682","00:00:00.683","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.683","00:00:00.684","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.684","00:00:00.686","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.686","00:00:00.687","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.687","00:00:00.688","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.688","00:00:00.689","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.689","00:00:00.691","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.691","00:00:00.692","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.692","00:00:00.693","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.693","00:00:00.695","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.695","00:00:00.696","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.696","00:00:00.697","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.697","00:00:00.698","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.698","00:00:00.699","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.699","00:00:00.700","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.700","00:00:00.701","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.701","00:00:00.703","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.703","00:00:00.704","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.704","00:00:00.705","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.705","00:00:00.706","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.706","00:00:00.708","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.708","00:00:00.709","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.709","00:00:00.710","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.710","00:00:00.711","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.711","00:00:00.712","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.712","00:00:00.714","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.714","00:00:00.715","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.715","00:00:00.716","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.716","00:00:00.718","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.718","00:00:00.719","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.719","00:00:00.720","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.720","00:00:00.721","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.721","00:00:00.722","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.722","00:00:00.723","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.723","00:00:00.725","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.725","00:00:00.726","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.726","00:00:00.727","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.727","00:00:00.728","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.728","00:00:00.729","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.729","00:00:00.731","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.731","00:00:00.732","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.732","00:00:00.733","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.733","00:00:00.734","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.734","00:00:00.736","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.736","00:00:00.737","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.737","00:00:00.738","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.738","00:00:00.739","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.739","00:00:00.741","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.741","00:00:00.742","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.742","00:00:00.743","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.743","00:00:00.745","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.745","00:00:00.746","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.746","00:00:00.747","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.747","00:00:00.748","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.748","00:00:00.750","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.750","00:00:00.751","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.751","00:00:00.752","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.752","00:00:00.753","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.753","00:00:00.755","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.755","00:00:00.756","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.756","00:00:00.757","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.757","00:00:00.758","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.758","00:00:00.760","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.760","00:00:00.761","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.761","00:00:00.762","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.762","00:00:00.763","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.763","00:00:00.765","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.765","00:00:00.766","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.766","00:00:00.767","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.767","00:00:00.769","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.769","00:00:00.770","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.770","00:00:00.771","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.771","00:00:00.772","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.772","00:00:00.774","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.774","00:00:00.775","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.775","00:00:00.776","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.776","00:00:00.777","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.777","00:00:00.779","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.779","00:00:00.780","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.780","00:00:00.781","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.781","00:00:00.783","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.783","00:00:00.784","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.784","00:00:00.786","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.786","00:00:00.787","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.787","00:00:00.788","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.788","00:00:00.789","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.789","00:00:00.790","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.790","00:00:00.791","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.791","00:00:00.793","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.793","00:00:00.794","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.794","00:00:00.795","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.795","00:00:00.796","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.796","00:00:00.798","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.798","00:00:00.799","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.799","00:00:00.801","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.801","00:00:00.802","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.802","00:00:00.803","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.803","00:00:00.804","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.804","00:00:00.806","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.806","00:00:00.807","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.807","00:00:00.808","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.812","00:00:00.813","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.813","00:00:00.814","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.814","00:00:00.815","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.815","00:00:00.816","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.816","00:00:00.817","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.817","00:00:00.819","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.819","00:00:00.820","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.820","00:00:00.821","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.821","00:00:00.823","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.823","00:00:00.824","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.824","00:00:00.825","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.825","00:00:00.827","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.827","00:00:00.828","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.828","00:00:00.829","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.829","00:00:00.831","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.831","00:00:00.832","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.832","00:00:00.833","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.833","00:00:00.834","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.834","00:00:00.835","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.835","00:00:00.837","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.837","00:00:00.838","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.838","00:00:00.839","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.839","00:00:00.840","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.840","00:00:00.842","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.842","00:00:00.843","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.843","00:00:00.844","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.844","00:00:00.845","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.845","00:00:00.847","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.847","00:00:00.848","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.848","00:00:00.849","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.849","00:00:00.851","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.851","00:00:00.852","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.852","00:00:00.853","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.853","00:00:00.854","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.854","00:00:00.856","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.856","00:00:00.857","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.857","00:00:00.858","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.858","00:00:00.860","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.860","00:00:00.861","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.861","00:00:00.862","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.862","00:00:00.863","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.863","00:00:00.865","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.865","00:00:00.866","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.866","00:00:00.867","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.867","00:00:00.868","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.868","00:00:00.870","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.870","00:00:00.871","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.871","00:00:00.872","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.872","00:00:00.874","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.874","00:00:00.875","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.875","00:00:00.876","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.876","00:00:00.877","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.877","00:00:00.879","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.879","00:00:00.880","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.880","00:00:00.881","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.881","00:00:00.882","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.882","00:00:00.884","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.884","00:00:00.885","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.885","00:00:00.886","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.886","00:00:00.888","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.888","00:00:00.889","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.889","00:00:00.890","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.890","00:00:00.891","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.891","00:00:00.893","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.893","00:00:00.894","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.894","00:00:00.895","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.895","00:00:00.896","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.896","00:00:00.898","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.898","00:00:00.899","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.899","00:00:00.900","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.900","00:00:00.902","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.902","00:00:00.903","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.903","00:00:00.904","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.904","00:00:00.905","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.905","00:00:00.907","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.907","00:00:00.908","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.908","00:00:00.909","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.909","00:00:00.911","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.911","00:00:00.912","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.912","00:00:00.913","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.913","00:00:00.915","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.915","00:00:00.916","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.916","00:00:00.917","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.917","00:00:00.919","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.919","00:00:00.920","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.920","00:00:00.921","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.921","00:00:00.923","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.923","00:00:00.924","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.924","00:00:00.925","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.925","00:00:00.926","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.926","00:00:00.928","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.928","00:00:00.929","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.929","00:00:00.930","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.930","00:00:00.932","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.932","00:00:00.933","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.933","00:00:00.934","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.934","00:00:00.936","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.936","00:00:00.937","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.937","00:00:00.938","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.938","00:00:00.940","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.940","00:00:00.941","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.941","00:00:00.943","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.943","00:00:00.944","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.944","00:00:00.945","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.945","00:00:00.947","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.947","00:00:00.948","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.948","00:00:00.950","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.950","00:00:00.951","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.951","00:00:00.952","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.952","00:00:00.953","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.953","00:00:00.954","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.954","00:00:00.956","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.956","00:00:00.957","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.957","00:00:00.958","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.958","00:00:00.960","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.960","00:00:00.961","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.961","00:00:00.962","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.962","00:00:00.964","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.964","00:00:00.965","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.965","00:00:00.966","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.966","00:00:00.968","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.968","00:00:00.969","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.969","00:00:00.970","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.970","00:00:00.971","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.971","00:00:00.973","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.973","00:00:00.974","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.974","00:00:00.975","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.975","00:00:00.977","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.977","00:00:00.978","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.978","00:00:00.979","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.979","00:00:00.981","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.981","00:00:00.982","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.982","00:00:00.983","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.983","00:00:00.985","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.985","00:00:00.986","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.986","00:00:00.987","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.987","00:00:00.989","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.989","00:00:00.990","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.990","00:00:00.991","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.991","00:00:00.993","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.993","00:00:00.994","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.994","00:00:00.995","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.995","00:00:00.997","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.997","00:00:00.998","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.998","00:00:00.999","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.999","00:00:01.001","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.001","00:00:01.002","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.002","00:00:01.003","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.003","00:00:01.004","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.004","00:00:01.006","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.006","00:00:01.007","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.007","00:00:01.008","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.008","00:00:01.010","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.010","00:00:01.011","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.011","00:00:01.013","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.013","00:00:01.014","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.014","00:00:01.015","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.015","00:00:01.017","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.017","00:00:01.018","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.018","00:00:01.019","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.019","00:00:01.020","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.020","00:00:01.022","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.022","00:00:01.023","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.023","00:00:01.024","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.024","00:00:01.026","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.026","00:00:01.027","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.027","00:00:01.028","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.028","00:00:01.030","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.030","00:00:01.031","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.031","00:00:01.032","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.032","00:00:01.034","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.034","00:00:01.035","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.035","00:00:01.036","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.036","00:00:01.037","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.037","00:00:01.039","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.039","00:00:01.040","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.040","00:00:01.041","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.041","00:00:01.043","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.043","00:00:01.044","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.044","00:00:01.046","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.046","00:00:01.047","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.047","00:00:01.048","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.048","00:00:01.049","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.049","00:00:01.051","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.051","00:00:01.052","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.052","00:00:01.053","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.053","00:00:01.055","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.055","00:00:01.056","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.056","00:00:01.057","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.057","00:00:01.059","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.059","00:00:01.060","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.060","00:00:01.061","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.061","00:00:01.063","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.063","00:00:01.064","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.064","00:00:01.065","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.065","00:00:01.067","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.067","00:00:01.068","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.068","00:00:01.069","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.069","00:00:01.071","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.071","00:00:01.072","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.072","00:00:01.073","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.073","00:00:01.075","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.075","00:00:01.076","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.076","00:00:01.078","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.078","00:00:01.079","0.002s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.079","00:00:01.080","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.080","00:00:01.082","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.082","00:00:01.083","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.083","00:00:01.084","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.084","00:00:01.086","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.086","00:00:01.087","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.087","00:00:01.089","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.089","00:00:01.090","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.090","00:00:01.091","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.091","00:00:01.093","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.093","00:00:01.094","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.094","00:00:01.095","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.095","00:00:01.097","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.097","00:00:01.098","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.098","00:00:01.099","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.099","00:00:01.101","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.101","00:00:01.102","0.001s"],[25,770,"00:00:00.135","00:00:01.104","00:00:01.102","00:00:01.104","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.250","00:00:00.250","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.250","00:00:00.252","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.252","00:00:00.253","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.253","00:00:00.254","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.254","00:00:00.255","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.255","00:00:00.256","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.256","00:00:00.257","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.257","00:00:00.258","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.258","00:00:00.259","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.259","00:00:00.261","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.261","00:00:00.262","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.262","00:00:00.263","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.263","00:00:00.264","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.264","00:00:00.265","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.265","00:00:00.266","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.266","00:00:00.267","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.267","00:00:00.269","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.269","00:00:00.270","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.270","00:00:00.271","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.271","00:00:00.272","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.272","00:00:00.273","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.273","00:00:00.275","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.275","00:00:00.276","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.276","00:00:00.277","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.277","00:00:00.279","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.279","00:00:00.280","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.280","00:00:00.281","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.281","00:00:00.282","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.282","00:00:00.283","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.283","00:00:00.284","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.284","00:00:00.285","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.285","00:00:00.287","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.287","00:00:00.288","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.288","00:00:00.289","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.289","00:00:00.290","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.290","00:00:00.292","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.292","00:00:00.293","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.293","00:00:00.294","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.294","00:00:00.295","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.295","00:00:00.296","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.296","00:00:00.297","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.297","00:00:00.298","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.298","00:00:00.300","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.300","00:00:00.301","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.301","00:00:00.302","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.302","00:00:00.303","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.303","00:00:00.305","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.305","00:00:00.306","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.306","00:00:00.307","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.307","00:00:00.309","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.309","00:00:00.310","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.310","00:00:00.312","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.312","00:00:00.313","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.313","00:00:00.315","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.315","00:00:00.316","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.316","00:00:00.317","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.317","00:00:00.318","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.318","00:00:00.319","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.319","00:00:00.320","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.320","00:00:00.321","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.321","00:00:00.322","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.322","00:00:00.324","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.324","00:00:00.325","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.325","00:00:00.326","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.326","00:00:00.327","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.327","00:00:00.328","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.328","00:00:00.329","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.333","00:00:00.334","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.334","00:00:00.335","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.335","00:00:00.336","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.336","00:00:00.337","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.337","00:00:00.338","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.338","00:00:00.339","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.339","00:00:00.340","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.340","00:00:00.341","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.341","00:00:00.342","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.342","00:00:00.344","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.344","00:00:00.345","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.345","00:00:00.346","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.346","00:00:00.347","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.347","00:00:00.348","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.348","00:00:00.349","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.349","00:00:00.350","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.350","00:00:00.351","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.351","00:00:00.352","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.352","00:00:00.354","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.354","00:00:00.355","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.355","00:00:00.356","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.356","00:00:00.357","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.357","00:00:00.358","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.358","00:00:00.359","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.359","00:00:00.360","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.360","00:00:00.361","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.361","00:00:00.363","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.363","00:00:00.364","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.364","00:00:00.365","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.365","00:00:00.366","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.366","00:00:00.367","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.367","00:00:00.368","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.368","00:00:00.369","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.369","00:00:00.370","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.370","00:00:00.371","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.371","00:00:00.373","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.373","00:00:00.374","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.374","00:00:00.375","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.375","00:00:00.376","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.376","00:00:00.377","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.377","00:00:00.378","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.378","00:00:00.379","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.379","00:00:00.380","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.380","00:00:00.382","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.382","00:00:00.383","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.383","00:00:00.384","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.384","00:00:00.385","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.385","00:00:00.386","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.386","00:00:00.387","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.387","00:00:00.388","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.388","00:00:00.389","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.393","00:00:00.395","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.395","00:00:00.395","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.395","00:00:00.396","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.396","00:00:00.398","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.398","00:00:00.399","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.399","00:00:00.400","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.400","00:00:00.401","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.401","00:00:00.402","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.402","00:00:00.403","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.403","00:00:00.405","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.405","00:00:00.406","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.406","00:00:00.407","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.407","00:00:00.408","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.408","00:00:00.409","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.409","00:00:00.410","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.410","00:00:00.412","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.412","00:00:00.413","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.413","00:00:00.414","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.414","00:00:00.415","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.415","00:00:00.416","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.416","00:00:00.417","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.417","00:00:00.419","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.419","00:00:00.420","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.420","00:00:00.421","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.421","00:00:00.422","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.422","00:00:00.423","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.423","00:00:00.424","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.424","00:00:00.426","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.426","00:00:00.427","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.427","00:00:00.428","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.428","00:00:00.429","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.429","00:00:00.430","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.430","00:00:00.431","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.431","00:00:00.433","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.433","00:00:00.434","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.434","00:00:00.435","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.435","00:00:00.437","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.437","00:00:00.438","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.438","00:00:00.439","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.439","00:00:00.441","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.441","00:00:00.442","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.442","00:00:00.443","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.443","00:00:00.444","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.444","00:00:00.445","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.445","00:00:00.446","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.446","00:00:00.447","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.447","00:00:00.448","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.448","00:00:00.450","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.450","00:00:00.451","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.451","00:00:00.452","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.452","00:00:00.453","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.453","00:00:00.454","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.454","00:00:00.456","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.456","00:00:00.457","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.457","00:00:00.458","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.458","00:00:00.459","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.459","00:00:00.460","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.460","00:00:00.462","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.462","00:00:00.463","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.463","00:00:00.464","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.464","00:00:00.466","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.466","00:00:00.467","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.467","00:00:00.468","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.468","00:00:00.469","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.469","00:00:00.470","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.470","00:00:00.472","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.472","00:00:00.473","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.473","00:00:00.474","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.474","00:00:00.475","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.475","00:00:00.476","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.476","00:00:00.478","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.478","00:00:00.479","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.479","00:00:00.480","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.480","00:00:00.481","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.481","00:00:00.482","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.482","00:00:00.484","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.484","00:00:00.485","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.485","00:00:00.486","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.486","00:00:00.488","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.488","00:00:00.489","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.489","00:00:00.490","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.490","00:00:00.492","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.492","00:00:00.493","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.493","00:00:00.494","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.494","00:00:00.495","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.495","00:00:00.496","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.496","00:00:00.497","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.497","00:00:00.499","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.499","00:00:00.500","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.500","00:00:00.501","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.501","00:00:00.502","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.502","00:00:00.503","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.503","00:00:00.505","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.505","00:00:00.506","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.506","00:00:00.507","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.507","00:00:00.508","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.508","00:00:00.509","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.509","00:00:00.511","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.511","00:00:00.512","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.512","00:00:00.513","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.513","00:00:00.515","0.002s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.518","00:00:00.519","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.519","00:00:00.520","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.520","00:00:00.521","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.521","00:00:00.522","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.522","00:00:00.524","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.524","00:00:00.525","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.525","00:00:00.526","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.526","00:00:00.527","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.527","00:00:00.528","0.001s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.528","00:00:00.530","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.334","00:00:00.335","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.335","00:00:00.336","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.336","00:00:00.337","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.337","00:00:00.338","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.338","00:00:00.339","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.339","00:00:00.340","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.340","00:00:00.341","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.341","00:00:00.342","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.342","00:00:00.343","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.343","00:00:00.345","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.345","00:00:00.346","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.346","00:00:00.347","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.347","00:00:00.348","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.348","00:00:00.349","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.349","00:00:00.350","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.350","00:00:00.351","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.351","00:00:00.352","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.352","00:00:00.354","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.354","00:00:00.355","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.355","00:00:00.356","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.356","00:00:00.357","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.357","00:00:00.358","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.358","00:00:00.359","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.359","00:00:00.360","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.360","00:00:00.361","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.361","00:00:00.363","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.363","00:00:00.364","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.364","00:00:00.365","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.365","00:00:00.366","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.366","00:00:00.367","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.367","00:00:00.368","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.368","00:00:00.369","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.369","00:00:00.370","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.370","00:00:00.371","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.371","00:00:00.373","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.373","00:00:00.374","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.374","00:00:00.375","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.375","00:00:00.376","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.376","00:00:00.377","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.377","00:00:00.378","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.378","00:00:00.379","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.379","00:00:00.380","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.380","00:00:00.382","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.382","00:00:00.383","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.383","00:00:00.384","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.384","00:00:00.385","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.385","00:00:00.386","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.386","00:00:00.387","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.387","00:00:00.388","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.388","00:00:00.389","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.393","00:00:00.395","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.395","00:00:00.395","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.395","00:00:00.396","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.396","00:00:00.397","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.397","00:00:00.399","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.399","00:00:00.400","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.400","00:00:00.401","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.401","00:00:00.402","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.402","00:00:00.403","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.403","00:00:00.404","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.404","00:00:00.406","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.406","00:00:00.407","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.407","00:00:00.408","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.408","00:00:00.409","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.409","00:00:00.410","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.410","00:00:00.412","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.412","00:00:00.413","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.413","00:00:00.414","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.414","00:00:00.415","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.415","00:00:00.416","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.416","00:00:00.417","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.417","00:00:00.418","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.418","00:00:00.420","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.420","00:00:00.421","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.421","00:00:00.422","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.422","00:00:00.423","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.423","00:00:00.424","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.424","00:00:00.426","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.426","00:00:00.427","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.427","00:00:00.428","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.428","00:00:00.429","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.429","00:00:00.430","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.430","00:00:00.431","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.431","00:00:00.433","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.433","00:00:00.434","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.434","00:00:00.435","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.435","00:00:00.437","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.437","00:00:00.438","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.438","00:00:00.439","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.439","00:00:00.441","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.441","00:00:00.442","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.442","00:00:00.443","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.443","00:00:00.444","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.444","00:00:00.445","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.445","00:00:00.446","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.446","00:00:00.447","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.447","00:00:00.448","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.448","00:00:00.450","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.450","00:00:00.451","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.451","00:00:00.452","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.452","00:00:00.453","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.453","00:00:00.454","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.454","00:00:00.455","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.455","00:00:00.457","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.457","00:00:00.458","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.458","00:00:00.459","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.459","00:00:00.460","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.460","00:00:00.462","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.462","00:00:00.463","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.463","00:00:00.464","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.464","00:00:00.466","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.466","00:00:00.467","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.467","00:00:00.468","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.468","00:00:00.469","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.469","00:00:00.470","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.470","00:00:00.471","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.471","00:00:00.473","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.473","00:00:00.474","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.474","00:00:00.475","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.475","00:00:00.476","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.476","00:00:00.478","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.478","00:00:00.479","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.479","00:00:00.480","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.480","00:00:00.481","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.481","00:00:00.482","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.482","00:00:00.484","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.484","00:00:00.485","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.485","00:00:00.486","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.486","00:00:00.487","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.487","00:00:00.489","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.489","00:00:00.490","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.490","00:00:00.492","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.492","00:00:00.493","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.493","00:00:00.494","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.494","00:00:00.495","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.495","00:00:00.496","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.496","00:00:00.497","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.497","00:00:00.499","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.499","00:00:00.500","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.500","00:00:00.501","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.501","00:00:00.502","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.502","00:00:00.503","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.503","00:00:00.504","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.504","00:00:00.506","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.506","00:00:00.507","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.507","00:00:00.508","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.508","00:00:00.509","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.509","00:00:00.511","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.511","00:00:00.512","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.512","00:00:00.513","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.513","00:00:00.515","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.518","00:00:00.519","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.519","00:00:00.520","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.520","00:00:00.521","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.521","00:00:00.522","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.522","00:00:00.524","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.524","00:00:00.525","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.525","00:00:00.526","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.526","00:00:00.527","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.527","00:00:00.528","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.528","00:00:00.530","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.530","00:00:00.530","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.530","00:00:00.532","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.532","00:00:00.533","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.533","00:00:00.534","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.534","00:00:00.535","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.535","00:00:00.536","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.536","00:00:00.538","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.538","00:00:00.539","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.539","00:00:00.540","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.540","00:00:00.541","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.541","00:00:00.542","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.542","00:00:00.543","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.543","00:00:00.545","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.545","00:00:00.546","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.546","00:00:00.547","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.547","00:00:00.548","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.548","00:00:00.549","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.549","00:00:00.550","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.550","00:00:00.552","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.552","00:00:00.553","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.553","00:00:00.554","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.554","00:00:00.555","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.555","00:00:00.556","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.556","00:00:00.558","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.558","00:00:00.559","0.002s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.559","00:00:00.560","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.560","00:00:00.561","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.561","00:00:00.562","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.562","00:00:00.563","0.001s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.563","00:00:00.565","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.395","00:00:00.395","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.395","00:00:00.396","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.396","00:00:00.398","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.398","00:00:00.399","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.399","00:00:00.400","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.400","00:00:00.401","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.401","00:00:00.402","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.402","00:00:00.403","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.403","00:00:00.404","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.404","00:00:00.406","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.406","00:00:00.407","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.407","00:00:00.408","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.408","00:00:00.409","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.409","00:00:00.410","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.410","00:00:00.412","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.412","00:00:00.413","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.413","00:00:00.414","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.414","00:00:00.415","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.415","00:00:00.416","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.416","00:00:00.417","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.417","00:00:00.419","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.419","00:00:00.420","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.420","00:00:00.421","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.421","00:00:00.422","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.422","00:00:00.423","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.423","00:00:00.424","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.424","00:00:00.426","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.426","00:00:00.427","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.427","00:00:00.428","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.428","00:00:00.429","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.429","00:00:00.430","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.430","00:00:00.431","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.431","00:00:00.433","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.433","00:00:00.434","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.434","00:00:00.435","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.435","00:00:00.437","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.437","00:00:00.438","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.438","00:00:00.439","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.439","00:00:00.441","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.441","00:00:00.442","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.442","00:00:00.443","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.443","00:00:00.444","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.444","00:00:00.445","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.445","00:00:00.446","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.446","00:00:00.447","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.447","00:00:00.448","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.448","00:00:00.450","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.450","00:00:00.451","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.451","00:00:00.452","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.452","00:00:00.453","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.453","00:00:00.454","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.454","00:00:00.456","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.456","00:00:00.457","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.457","00:00:00.458","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.458","00:00:00.459","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.459","00:00:00.460","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.460","00:00:00.462","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.462","00:00:00.463","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.463","00:00:00.464","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.464","00:00:00.466","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.466","00:00:00.467","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.467","00:00:00.468","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.468","00:00:00.469","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.469","00:00:00.470","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.470","00:00:00.471","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.471","00:00:00.473","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.473","00:00:00.474","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.474","00:00:00.475","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.475","00:00:00.476","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.476","00:00:00.478","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.478","00:00:00.479","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.479","00:00:00.480","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.480","00:00:00.481","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.481","00:00:00.482","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.482","00:00:00.484","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.484","00:00:00.485","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.485","00:00:00.486","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.486","00:00:00.488","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.488","00:00:00.489","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.489","00:00:00.490","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.490","00:00:00.492","0.002s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.492","00:00:00.493","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.493","00:00:00.494","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.494","00:00:00.495","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.495","00:00:00.496","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.496","00:00:00.497","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.497","00:00:00.499","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.499","00:00:00.500","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.500","00:00:00.501","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.501","00:00:00.502","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.502","00:00:00.503","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.503","00:00:00.505","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.505","00:00:00.506","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.506","00:00:00.507","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.507","00:00:00.508","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.508","00:00:00.509","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.509","00:00:00.511","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.511","00:00:00.512","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.512","00:00:00.513","0.001s"],[329,101,"00:00:00.382","00:00:00.515","00:00:00.513","00:00:00.515","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.519","00:00:00.520","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.520","00:00:00.521","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.521","00:00:00.522","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.522","00:00:00.524","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.524","00:00:00.525","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.525","00:00:00.526","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.526","00:00:00.527","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.527","00:00:00.528","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.528","00:00:00.530","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.530","00:00:00.530","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.530","00:00:00.532","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.532","00:00:00.533","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.533","00:00:00.534","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.534","00:00:00.535","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.535","00:00:00.536","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.536","00:00:00.538","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.538","00:00:00.539","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.539","00:00:00.540","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.540","00:00:00.541","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.541","00:00:00.542","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.542","00:00:00.543","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.543","00:00:00.545","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.545","00:00:00.546","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.546","00:00:00.547","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.547","00:00:00.548","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.548","00:00:00.549","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.549","00:00:00.550","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.550","00:00:00.552","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.552","00:00:00.553","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.553","00:00:00.554","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.554","00:00:00.555","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.555","00:00:00.556","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.556","00:00:00.558","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.558","00:00:00.559","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.559","00:00:00.560","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.560","00:00:00.561","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.561","00:00:00.562","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.562","00:00:00.563","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.563","00:00:00.565","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.565","00:00:00.566","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.570","00:00:00.571","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.571","00:00:00.571","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.571","00:00:00.573","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.573","00:00:00.574","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.574","00:00:00.575","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.575","00:00:00.576","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.576","00:00:00.578","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.578","00:00:00.579","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.579","00:00:00.580","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.580","00:00:00.581","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.581","00:00:00.582","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.582","00:00:00.584","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.584","00:00:00.585","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.585","00:00:00.586","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.586","00:00:00.587","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.587","00:00:00.589","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.589","00:00:00.590","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.590","00:00:00.591","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.591","00:00:00.592","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.592","00:00:00.594","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.594","00:00:00.595","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.595","00:00:00.596","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.596","00:00:00.597","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.597","00:00:00.598","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.598","00:00:00.599","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.599","00:00:00.601","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.601","00:00:00.602","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.602","00:00:00.603","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.603","00:00:00.604","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.604","00:00:00.605","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.605","00:00:00.607","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.607","00:00:00.608","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.608","00:00:00.609","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.609","00:00:00.610","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.610","00:00:00.612","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.612","00:00:00.613","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.613","00:00:00.614","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.614","00:00:00.615","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.615","00:00:00.617","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.617","00:00:00.618","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.618","00:00:00.619","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.619","00:00:00.620","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.620","00:00:00.621","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.621","00:00:00.622","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.622","00:00:00.624","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.624","00:00:00.625","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.625","00:00:00.626","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.626","00:00:00.627","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.627","00:00:00.629","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.629","00:00:00.630","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.630","00:00:00.631","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.631","00:00:00.632","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.632","00:00:00.633","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.633","00:00:00.635","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.635","00:00:00.636","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.636","00:00:00.637","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.637","00:00:00.638","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.638","00:00:00.640","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.640","00:00:00.641","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.641","00:00:00.642","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.642","00:00:00.644","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.644","00:00:00.645","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.645","00:00:00.646","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.646","00:00:00.647","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.647","00:00:00.648","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.648","00:00:00.649","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.649","00:00:00.650","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.650","00:00:00.652","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.652","00:00:00.653","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.653","00:00:00.654","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.654","00:00:00.655","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.655","00:00:00.657","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.657","00:00:00.658","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.658","00:00:00.659","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.659","00:00:00.660","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.660","00:00:00.661","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.661","00:00:00.663","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.663","00:00:00.664","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.664","00:00:00.665","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.665","00:00:00.666","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.666","00:00:00.668","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.668","00:00:00.669","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.669","00:00:00.670","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.670","00:00:00.672","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.672","00:00:00.673","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.673","00:00:00.674","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.674","00:00:00.675","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.675","00:00:00.676","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.676","00:00:00.677","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.677","00:00:00.678","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.678","00:00:00.680","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.680","00:00:00.681","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.681","00:00:00.682","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.682","00:00:00.683","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.683","00:00:00.684","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.684","00:00:00.686","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.686","00:00:00.687","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.687","00:00:00.688","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.688","00:00:00.689","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.689","00:00:00.691","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.691","00:00:00.692","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.692","00:00:00.693","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.693","00:00:00.695","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.695","00:00:00.696","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.696","00:00:00.697","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.697","00:00:00.698","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.698","00:00:00.699","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.699","00:00:00.700","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.700","00:00:00.702","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.702","00:00:00.703","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.703","00:00:00.704","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.704","00:00:00.705","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.705","00:00:00.706","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.706","00:00:00.708","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.708","00:00:00.709","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.709","00:00:00.710","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.710","00:00:00.711","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.711","00:00:00.712","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.712","00:00:00.714","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.714","00:00:00.715","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.715","00:00:00.716","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.716","00:00:00.718","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.718","00:00:00.719","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.719","00:00:00.720","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.720","00:00:00.721","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.721","00:00:00.722","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.722","00:00:00.723","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.723","00:00:00.725","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.725","00:00:00.726","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.726","00:00:00.727","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.727","00:00:00.728","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.728","00:00:00.729","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.729","00:00:00.731","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.731","00:00:00.732","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.732","00:00:00.733","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.733","00:00:00.734","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.734","00:00:00.736","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.736","00:00:00.737","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.737","00:00:00.738","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.738","00:00:00.739","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.739","00:00:00.741","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.741","00:00:00.742","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.742","00:00:00.743","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.743","00:00:00.745","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.745","00:00:00.746","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.746","00:00:00.747","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.747","00:00:00.748","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.748","00:00:00.750","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.750","00:00:00.751","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.751","00:00:00.752","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.752","00:00:00.753","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.753","00:00:00.755","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.755","00:00:00.756","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.756","00:00:00.757","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.757","00:00:00.758","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.758","00:00:00.760","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.760","00:00:00.761","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.761","00:00:00.762","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.762","00:00:00.763","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.763","00:00:00.765","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.765","00:00:00.766","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.766","00:00:00.767","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.767","00:00:00.769","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.769","00:00:00.770","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.770","00:00:00.771","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.771","00:00:00.772","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.772","00:00:00.774","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.774","00:00:00.775","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.775","00:00:00.776","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.776","00:00:00.777","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.777","00:00:00.779","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.779","00:00:00.780","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.780","00:00:00.781","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.781","00:00:00.783","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.783","00:00:00.784","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.784","00:00:00.786","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.786","00:00:00.787","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.787","00:00:00.788","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.788","00:00:00.789","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.789","00:00:00.790","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.790","00:00:00.791","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.791","00:00:00.793","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.793","00:00:00.794","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.794","00:00:00.795","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.795","00:00:00.797","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.797","00:00:00.798","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.798","00:00:00.799","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.799","00:00:00.801","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.801","00:00:00.802","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.802","00:00:00.803","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.803","00:00:00.804","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.804","00:00:00.806","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.806","00:00:00.807","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.807","00:00:00.808","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.812","00:00:00.813","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.813","00:00:00.814","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.814","00:00:00.815","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.815","00:00:00.816","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.816","00:00:00.818","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.818","00:00:00.819","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.819","00:00:00.820","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.820","00:00:00.821","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.821","00:00:00.823","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.823","00:00:00.824","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.824","00:00:00.825","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.825","00:00:00.827","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.827","00:00:00.828","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.828","00:00:00.829","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.829","00:00:00.831","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.831","00:00:00.832","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.832","00:00:00.833","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.833","00:00:00.834","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.834","00:00:00.835","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.835","00:00:00.837","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.837","00:00:00.838","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.838","00:00:00.839","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.839","00:00:00.840","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.840","00:00:00.842","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.842","00:00:00.843","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.843","00:00:00.844","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.844","00:00:00.846","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.846","00:00:00.847","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.847","00:00:00.848","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.848","00:00:00.849","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.849","00:00:00.851","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.851","00:00:00.852","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.852","00:00:00.853","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.853","00:00:00.855","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.855","00:00:00.856","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.856","00:00:00.857","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.857","00:00:00.859","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.859","00:00:00.860","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.860","00:00:00.861","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.861","00:00:00.862","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.862","00:00:00.863","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.863","00:00:00.865","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.865","00:00:00.866","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.866","00:00:00.867","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.867","00:00:00.868","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.868","00:00:00.870","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.870","00:00:00.871","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.871","00:00:00.872","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.872","00:00:00.874","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.874","00:00:00.875","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.875","00:00:00.876","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.876","00:00:00.877","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.877","00:00:00.879","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.879","00:00:00.880","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.880","00:00:00.881","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.881","00:00:00.882","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.882","00:00:00.884","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.884","00:00:00.885","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.885","00:00:00.886","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.886","00:00:00.888","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.888","00:00:00.889","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.889","00:00:00.890","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.890","00:00:00.891","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.891","00:00:00.893","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.893","00:00:00.894","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.894","00:00:00.895","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.895","00:00:00.896","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.896","00:00:00.898","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.898","00:00:00.899","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.899","00:00:00.900","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.900","00:00:00.902","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.902","00:00:00.903","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.903","00:00:00.904","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.904","00:00:00.906","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.906","00:00:00.907","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.907","00:00:00.908","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.908","00:00:00.909","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.909","00:00:00.911","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.911","00:00:00.912","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.912","00:00:00.913","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.913","00:00:00.915","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.915","00:00:00.916","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.916","00:00:00.917","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.917","00:00:00.919","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.919","00:00:00.920","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.920","00:00:00.921","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.921","00:00:00.923","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.923","00:00:00.924","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.924","00:00:00.925","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.925","00:00:00.927","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.927","00:00:00.928","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.928","00:00:00.929","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.929","00:00:00.930","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.930","00:00:00.932","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.932","00:00:00.933","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.933","00:00:00.935","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.935","00:00:00.936","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.936","00:00:00.937","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.937","00:00:00.939","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.939","00:00:00.940","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.940","00:00:00.941","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.941","00:00:00.943","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.943","00:00:00.944","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.944","00:00:00.946","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.946","00:00:00.947","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.947","00:00:00.948","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.948","00:00:00.950","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.950","00:00:00.951","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.951","00:00:00.952","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.952","00:00:00.953","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.953","00:00:00.955","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.955","00:00:00.956","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.956","00:00:00.957","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.957","00:00:00.958","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.958","00:00:00.960","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.960","00:00:00.961","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.961","00:00:00.962","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.962","00:00:00.964","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.964","00:00:00.965","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.965","00:00:00.966","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.966","00:00:00.968","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.968","00:00:00.969","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.969","00:00:00.970","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.970","00:00:00.971","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.971","00:00:00.973","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.973","00:00:00.974","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.974","00:00:00.975","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.975","00:00:00.977","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.977","00:00:00.978","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.978","00:00:00.980","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.980","00:00:00.981","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.981","00:00:00.982","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.982","00:00:00.983","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.983","00:00:00.985","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.985","00:00:00.986","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.986","00:00:00.987","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.987","00:00:00.989","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.989","00:00:00.990","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.990","00:00:00.991","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.991","00:00:00.993","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.993","00:00:00.994","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.994","00:00:00.996","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.996","00:00:00.997","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.997","00:00:00.998","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.998","00:00:00.999","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.999","00:00:01.001","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.001","00:00:01.002","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.002","00:00:01.003","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.003","00:00:01.004","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.004","00:00:01.006","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.006","00:00:01.007","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.007","00:00:01.008","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.008","00:00:01.010","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.010","00:00:01.011","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.011","00:00:01.013","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.013","00:00:01.014","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.014","00:00:01.015","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.015","00:00:01.017","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.017","00:00:01.018","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.018","00:00:01.019","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.019","00:00:01.020","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.020","00:00:01.022","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.022","00:00:01.023","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.023","00:00:01.025","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.025","00:00:01.026","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.026","00:00:01.027","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.027","00:00:01.028","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.028","00:00:01.030","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.030","00:00:01.031","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.031","00:00:01.032","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.032","00:00:01.034","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.034","00:00:01.035","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.035","00:00:01.036","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.036","00:00:01.038","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.038","00:00:01.039","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.039","00:00:01.040","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.040","00:00:01.041","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.041","00:00:01.043","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.043","00:00:01.044","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.044","00:00:01.046","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.046","00:00:01.047","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.047","00:00:01.048","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.048","00:00:01.049","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.049","00:00:01.051","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.051","00:00:01.052","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.052","00:00:01.054","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.054","00:00:01.055","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.055","00:00:01.056","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.056","00:00:01.057","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.057","00:00:01.059","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.059","00:00:01.060","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.060","00:00:01.061","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.061","00:00:01.063","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.063","00:00:01.064","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.064","00:00:01.065","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.065","00:00:01.067","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.067","00:00:01.068","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.068","00:00:01.069","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.069","00:00:01.071","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.071","00:00:01.072","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.072","00:00:01.073","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.073","00:00:01.075","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.075","00:00:01.076","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.076","00:00:01.078","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.078","00:00:01.079","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.079","00:00:01.080","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.080","00:00:01.082","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.082","00:00:01.083","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.083","00:00:01.084","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.084","00:00:01.086","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.086","00:00:01.087","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.087","00:00:01.089","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.089","00:00:01.090","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.090","00:00:01.091","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.091","00:00:01.093","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.093","00:00:01.094","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.094","00:00:01.095","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.095","00:00:01.097","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.097","00:00:01.098","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.098","00:00:01.100","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.100","00:00:01.101","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.101","00:00:01.102","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.102","00:00:01.104","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.104","00:00:01.105","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.105","00:00:01.106","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.106","00:00:01.107","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.107","00:00:01.108","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.108","00:00:01.109","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.109","00:00:01.110","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.110","00:00:01.112","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.112","00:00:01.113","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.113","00:00:01.114","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.114","00:00:01.115","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.115","00:00:01.116","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.116","00:00:01.118","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.118","00:00:01.119","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.119","00:00:01.120","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.120","00:00:01.121","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.121","00:00:01.122","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.122","00:00:01.124","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.124","00:00:01.125","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.125","00:00:01.126","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.126","00:00:01.127","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.127","00:00:01.128","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.128","00:00:01.129","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.129","00:00:01.130","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.130","00:00:01.132","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.132","00:00:01.133","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.133","00:00:01.134","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.134","00:00:01.135","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.135","00:00:01.136","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.136","00:00:01.138","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.138","00:00:01.139","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.139","00:00:01.140","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.140","00:00:01.141","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.141","00:00:01.143","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.143","00:00:01.144","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.144","00:00:01.145","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.145","00:00:01.146","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.146","00:00:01.148","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.148","00:00:01.149","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.149","00:00:01.150","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.150","00:00:01.152","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.152","00:00:01.153","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.153","00:00:01.154","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.154","00:00:01.155","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.155","00:00:01.156","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.156","00:00:01.157","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.157","00:00:01.158","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.158","00:00:01.160","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.160","00:00:01.161","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.161","00:00:01.163","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.163","00:00:01.163","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.163","00:00:01.165","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.165","00:00:01.166","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.166","00:00:01.167","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.167","00:00:01.169","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.169","00:00:01.170","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.170","00:00:01.171","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.171","00:00:01.172","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.172","00:00:01.173","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.173","00:00:01.175","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.175","00:00:01.176","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.176","00:00:01.177","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.177","00:00:01.178","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.178","00:00:01.180","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.180","00:00:01.181","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.181","00:00:01.182","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.182","00:00:01.183","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.183","00:00:01.184","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.184","00:00:01.186","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.186","00:00:01.187","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.187","00:00:01.188","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.188","00:00:01.189","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.189","00:00:01.191","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.191","00:00:01.192","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.192","00:00:01.193","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.193","00:00:01.195","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.195","00:00:01.196","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.196","00:00:01.197","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.197","00:00:01.199","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.199","00:00:01.200","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.200","00:00:01.202","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.202","00:00:01.203","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.203","00:00:01.204","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.204","00:00:01.205","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.205","00:00:01.207","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.207","00:00:01.208","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.208","00:00:01.209","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.209","00:00:01.210","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.210","00:00:01.211","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.211","00:00:01.212","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.212","00:00:01.214","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.214","00:00:01.215","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.215","00:00:01.216","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.216","00:00:01.217","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.217","00:00:01.219","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.219","00:00:01.220","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.223","00:00:01.224","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.224","00:00:01.225","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.225","00:00:01.227","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.227","00:00:01.228","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.228","00:00:01.229","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.229","00:00:01.230","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.230","00:00:01.232","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.232","00:00:01.233","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.233","00:00:01.234","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.234","00:00:01.236","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.236","00:00:01.237","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.237","00:00:01.238","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.238","00:00:01.239","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.239","00:00:01.241","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.241","00:00:01.242","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.242","00:00:01.243","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.243","00:00:01.244","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.244","00:00:01.246","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.246","00:00:01.247","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.247","00:00:01.248","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.248","00:00:01.249","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.249","00:00:01.251","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.251","00:00:01.252","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.252","00:00:01.253","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.253","00:00:01.254","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.254","00:00:01.256","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.256","00:00:01.257","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.257","00:00:01.258","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.258","00:00:01.260","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.260","00:00:01.261","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.261","00:00:01.262","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.262","00:00:01.263","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.263","00:00:01.265","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.265","00:00:01.266","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.266","00:00:01.267","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.267","00:00:01.269","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.269","00:00:01.270","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.270","00:00:01.271","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.271","00:00:01.272","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.272","00:00:01.274","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.274","00:00:01.275","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.275","00:00:01.276","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.276","00:00:01.277","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.277","00:00:01.279","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.279","00:00:01.280","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.280","00:00:01.281","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.281","00:00:01.282","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.282","00:00:01.284","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.284","00:00:01.285","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.285","00:00:01.286","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.286","00:00:01.288","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.288","00:00:01.289","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.289","00:00:01.290","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.290","00:00:01.291","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.291","00:00:01.293","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.293","00:00:01.294","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.294","00:00:01.295","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.295","00:00:01.296","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.296","00:00:01.298","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.298","00:00:01.299","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.299","00:00:01.300","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.300","00:00:01.302","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.302","00:00:01.303","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.303","00:00:01.304","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.304","00:00:01.306","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.306","00:00:01.307","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.307","00:00:01.308","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.308","00:00:01.310","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.310","00:00:01.311","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.311","00:00:01.312","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.312","00:00:01.314","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.314","00:00:01.315","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.315","00:00:01.316","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.316","00:00:01.318","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.318","00:00:01.319","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.319","00:00:01.320","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.320","00:00:01.322","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.322","00:00:01.323","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.323","00:00:01.324","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.324","00:00:01.326","0.002s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.326","00:00:01.327","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.327","00:00:01.328","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.328","00:00:01.329","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.329","00:00:01.331","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.331","00:00:01.332","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.332","00:00:01.333","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.333","00:00:01.334","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.334","00:00:01.336","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.336","00:00:01.337","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.337","00:00:01.338","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.338","00:00:01.339","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.339","00:00:01.341","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.341","00:00:01.342","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.342","00:00:01.343","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.343","00:00:01.344","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.344","00:00:01.346","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.346","00:00:01.347","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.347","00:00:01.348","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.348","00:00:01.350","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.350","00:00:01.351","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.351","00:00:01.352","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.352","00:00:01.353","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.353","00:00:01.355","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.355","00:00:01.356","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.356","00:00:01.357","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.357","00:00:01.359","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.359","00:00:01.360","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.360","00:00:01.361","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.361","00:00:01.362","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.362","00:00:01.364","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.364","00:00:01.365","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.365","00:00:01.366","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.366","00:00:01.367","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.367","00:00:01.369","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.369","00:00:01.370","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.370","00:00:01.371","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.371","00:00:01.373","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.373","00:00:01.374","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.374","00:00:01.375","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.375","00:00:01.376","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.376","00:00:01.378","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.378","00:00:01.379","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.379","00:00:01.380","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.380","00:00:01.382","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.382","00:00:01.383","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.383","00:00:01.384","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.384","00:00:01.385","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.385","00:00:01.387","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.387","00:00:01.388","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.388","00:00:01.389","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.395","00:00:01.396","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.396","00:00:01.397","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.397","00:00:01.398","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.398","00:00:01.400","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.400","00:00:01.401","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.401","00:00:01.402","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.402","00:00:01.404","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.404","00:00:01.405","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.405","00:00:01.406","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.406","00:00:01.408","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.408","00:00:01.409","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.409","00:00:01.410","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.410","00:00:01.412","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.412","00:00:01.413","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.413","00:00:01.414","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.414","00:00:01.416","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.416","00:00:01.417","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.417","00:00:01.418","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.418","00:00:01.420","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.420","00:00:01.421","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.421","00:00:01.422","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.422","00:00:01.424","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.424","00:00:01.425","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.425","00:00:01.426","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.426","00:00:01.428","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.428","00:00:01.429","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.429","00:00:01.430","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.430","00:00:01.432","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.432","00:00:01.433","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.433","00:00:01.434","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.434","00:00:01.436","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.436","00:00:01.437","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.437","00:00:01.438","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.438","00:00:01.440","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.440","00:00:01.441","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.441","00:00:01.442","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.442","00:00:01.444","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.444","00:00:01.445","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.445","00:00:01.446","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.446","00:00:01.447","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.447","00:00:01.449","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.449","00:00:01.450","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.450","00:00:01.452","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.452","00:00:01.453","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.453","00:00:01.454","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.454","00:00:01.455","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.455","00:00:01.457","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.457","00:00:01.458","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.458","00:00:01.460","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.460","00:00:01.461","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.461","00:00:01.462","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.462","00:00:01.464","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.464","00:00:01.465","0.001s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.465","00:00:01.467","0.002s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.571","00:00:00.571","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.571","00:00:00.573","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.573","00:00:00.574","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.574","00:00:00.575","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.575","00:00:00.576","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.576","00:00:00.578","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.578","00:00:00.579","0.001s"],[399,9,"00:00:00.558","00:00:00.580","00:00:00.579","00:00:00.580","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.813","00:00:00.814","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.814","00:00:00.815","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.815","00:00:00.816","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.816","00:00:00.818","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.818","00:00:00.819","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.819","00:00:00.820","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.820","00:00:00.821","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.821","00:00:00.823","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.823","00:00:00.824","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.824","00:00:00.825","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.825","00:00:00.827","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.827","00:00:00.828","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.828","00:00:00.829","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.829","00:00:00.831","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.831","00:00:00.832","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.832","00:00:00.833","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.833","00:00:00.834","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.834","00:00:00.835","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.835","00:00:00.837","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.837","00:00:00.838","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.838","00:00:00.839","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.839","00:00:00.840","0.001s"],[328,24,"00:00:00.799","00:00:00.842","00:00:00.840","00:00:00.842","0.002s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.170","00:00:01.171","0.001s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.171","00:00:01.172","0.001s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.172","00:00:01.173","0.001s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.173","00:00:01.175","0.001s"],[4,6,"00:00:01.161","00:00:01.176","00:00:01.175","00:00:01.176","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.224","00:00:01.225","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.225","00:00:01.227","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.227","00:00:01.228","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.228","00:00:01.229","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.229","00:00:01.230","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.230","00:00:01.232","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.232","00:00:01.233","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.233","00:00:01.234","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.234","00:00:01.235","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.235","00:00:01.237","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.237","00:00:01.238","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.238","00:00:01.239","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.239","00:00:01.241","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.241","00:00:01.242","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.242","00:00:01.243","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.243","00:00:01.244","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.244","00:00:01.245","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.245","00:00:01.247","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.247","00:00:01.248","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.248","00:00:01.249","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.249","00:00:01.251","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.251","00:00:01.252","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.252","00:00:01.253","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.253","00:00:01.254","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.254","00:00:01.256","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.256","00:00:01.257","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.257","00:00:01.258","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.258","00:00:01.260","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.260","00:00:01.261","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.261","00:00:01.262","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.262","00:00:01.263","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.263","00:00:01.265","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.265","00:00:01.266","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.266","00:00:01.267","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.267","00:00:01.268","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.268","00:00:01.270","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.270","00:00:01.271","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.271","00:00:01.272","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.272","00:00:01.274","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.274","00:00:01.275","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.275","00:00:01.276","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.276","00:00:01.277","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.277","00:00:01.279","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.279","00:00:01.280","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.280","00:00:01.281","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.281","00:00:01.282","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.282","00:00:01.284","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.284","00:00:01.285","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.285","00:00:01.286","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.286","00:00:01.288","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.288","00:00:01.289","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.289","00:00:01.290","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.290","00:00:01.291","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.291","00:00:01.293","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.293","00:00:01.294","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.294","00:00:01.295","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.295","00:00:01.296","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.296","00:00:01.298","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.298","00:00:01.299","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.299","00:00:01.300","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.300","00:00:01.302","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.302","00:00:01.303","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.303","00:00:01.304","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.304","00:00:01.306","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.306","00:00:01.307","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.307","00:00:01.308","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.308","00:00:01.310","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.310","00:00:01.311","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.311","00:00:01.312","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.312","00:00:01.314","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.314","00:00:01.315","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.315","00:00:01.316","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.316","00:00:01.317","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.317","00:00:01.319","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.319","00:00:01.320","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.320","00:00:01.322","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.322","00:00:01.323","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.323","00:00:01.324","0.001s"],[205,80,"00:00:01.213","00:00:01.326","00:00:01.324","00:00:01.326","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.396","00:00:01.397","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.397","00:00:01.398","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.398","00:00:01.400","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.400","00:00:01.401","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.401","00:00:01.402","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.402","00:00:01.404","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.404","00:00:01.405","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.405","00:00:01.406","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.406","00:00:01.408","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.408","00:00:01.409","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.409","00:00:01.410","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.410","00:00:01.412","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.412","00:00:01.413","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.413","00:00:01.414","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.414","00:00:01.416","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.416","00:00:01.417","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.417","00:00:01.418","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.418","00:00:01.420","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.420","00:00:01.421","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.421","00:00:01.422","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.422","00:00:01.424","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.424","00:00:01.425","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.425","00:00:01.426","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.426","00:00:01.428","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.428","00:00:01.429","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.429","00:00:01.430","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.430","00:00:01.432","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.432","00:00:01.433","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.433","00:00:01.434","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.434","00:00:01.436","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.436","00:00:01.437","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.437","00:00:01.438","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.438","00:00:01.440","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.440","00:00:01.441","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.441","00:00:01.442","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.442","00:00:01.444","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.444","00:00:01.445","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.445","00:00:01.446","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.446","00:00:01.448","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.448","00:00:01.449","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.449","00:00:01.450","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.450","00:00:01.452","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.452","00:00:01.453","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.453","00:00:01.454","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.454","00:00:01.455","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.455","00:00:01.457","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.457","00:00:01.458","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.458","00:00:01.460","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.460","00:00:01.461","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.461","00:00:01.462","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.462","00:00:01.464","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.464","00:00:01.465","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.467","00:00:01.467","0.000s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.467","00:00:01.468","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.468","00:00:01.469","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.469","00:00:01.470","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.470","00:00:01.471","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.471","00:00:01.472","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.472","00:00:01.474","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.474","00:00:01.475","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.475","00:00:01.476","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.476","00:00:01.477","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.477","00:00:01.478","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.478","00:00:01.480","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.480","00:00:01.481","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.481","00:00:01.482","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.482","00:00:01.483","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.483","00:00:01.484","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.484","00:00:01.485","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.485","00:00:01.487","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.487","00:00:01.488","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.488","00:00:01.489","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.489","00:00:01.490","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.490","00:00:01.491","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.491","00:00:01.493","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.493","00:00:01.494","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.494","00:00:01.495","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.495","00:00:01.496","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.496","00:00:01.497","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.497","00:00:01.499","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.499","00:00:01.500","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.500","00:00:01.501","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.501","00:00:01.502","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.502","00:00:01.503","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.503","00:00:01.505","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.505","00:00:01.506","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.506","00:00:01.507","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.507","00:00:01.508","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.508","00:00:01.510","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.510","00:00:01.511","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.511","00:00:01.512","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.512","00:00:01.513","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.513","00:00:01.514","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.514","00:00:01.516","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.516","00:00:01.517","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.517","00:00:01.518","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.518","00:00:01.519","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.519","00:00:01.521","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.521","00:00:01.522","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.522","00:00:01.523","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.523","00:00:01.524","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.524","00:00:01.525","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.525","00:00:01.527","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.527","00:00:01.528","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.528","00:00:01.529","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.529","00:00:01.530","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.530","00:00:01.531","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.531","00:00:01.533","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.533","00:00:01.534","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.534","00:00:01.535","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.535","00:00:01.536","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.536","00:00:01.538","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.538","00:00:01.539","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.539","00:00:01.540","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.540","00:00:01.541","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.541","00:00:01.542","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.542","00:00:01.544","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.544","00:00:01.545","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.545","00:00:01.546","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.546","00:00:01.547","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.547","00:00:01.548","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.548","00:00:01.550","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.550","00:00:01.551","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.551","00:00:01.552","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.552","00:00:01.553","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.553","00:00:01.555","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.555","00:00:01.556","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.556","00:00:01.557","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.557","00:00:01.559","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.559","00:00:01.560","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.560","00:00:01.561","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.561","00:00:01.562","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.562","00:00:01.563","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.563","00:00:01.565","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.568","00:00:01.570","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.570","00:00:01.572","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.572","00:00:01.573","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.573","00:00:01.574","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.574","00:00:01.575","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.575","00:00:01.576","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.576","00:00:01.578","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.578","00:00:01.579","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.579","00:00:01.580","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.580","00:00:01.581","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.581","00:00:01.582","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.582","00:00:01.584","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.584","00:00:01.585","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.585","00:00:01.586","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.586","00:00:01.587","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.587","00:00:01.589","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.589","00:00:01.590","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.590","00:00:01.591","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.591","00:00:01.592","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.592","00:00:01.593","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.593","00:00:01.595","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.595","00:00:01.596","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.596","00:00:01.598","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.598","00:00:01.599","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.599","00:00:01.600","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.600","00:00:01.601","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.601","00:00:01.602","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.602","00:00:01.603","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.603","00:00:01.604","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.604","00:00:01.606","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.606","00:00:01.607","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.607","00:00:01.608","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.608","00:00:01.609","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.609","00:00:01.610","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.610","00:00:01.612","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.612","00:00:01.613","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.613","00:00:01.614","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.614","00:00:01.615","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.615","00:00:01.617","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.617","00:00:01.618","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.618","00:00:01.619","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.619","00:00:01.621","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.621","00:00:01.622","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.622","00:00:01.623","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.623","00:00:01.624","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.624","00:00:01.626","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.626","00:00:01.626","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.626","00:00:01.628","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.628","00:00:01.629","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.629","00:00:01.630","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.630","00:00:01.631","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.631","00:00:01.632","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.632","00:00:01.634","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.634","00:00:01.635","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.635","00:00:01.636","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.636","00:00:01.637","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.637","00:00:01.639","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.639","00:00:01.640","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.640","00:00:01.641","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.641","00:00:01.642","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.642","00:00:01.644","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.644","00:00:01.645","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.645","00:00:01.646","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.646","00:00:01.648","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.648","00:00:01.649","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.649","00:00:01.650","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.650","00:00:01.651","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.651","00:00:01.652","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.652","00:00:01.653","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.653","00:00:01.655","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.655","00:00:01.656","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.656","00:00:01.657","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.657","00:00:01.658","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.658","00:00:01.659","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.659","00:00:01.661","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.661","00:00:01.662","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.662","00:00:01.663","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.663","00:00:01.664","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.664","00:00:01.666","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.666","00:00:01.667","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.667","00:00:01.668","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.668","00:00:01.669","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.669","00:00:01.671","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.671","00:00:01.672","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.672","00:00:01.673","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.673","00:00:01.675","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.675","00:00:01.676","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.676","00:00:01.677","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.677","00:00:01.678","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.678","00:00:01.679","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.679","00:00:01.680","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.680","00:00:01.681","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.681","00:00:01.683","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.683","00:00:01.684","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.684","00:00:01.685","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.685","00:00:01.686","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.686","00:00:01.688","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.688","00:00:01.689","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.689","00:00:01.690","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.690","00:00:01.691","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.691","00:00:01.692","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.692","00:00:01.694","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.694","00:00:01.695","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.695","00:00:01.696","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.696","00:00:01.698","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.698","00:00:01.699","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.699","00:00:01.700","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.700","00:00:01.702","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.702","00:00:01.703","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.703","00:00:01.703","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.703","00:00:01.705","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.705","00:00:01.706","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.706","00:00:01.707","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.707","00:00:01.708","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.708","00:00:01.710","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.710","00:00:01.711","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.711","00:00:01.712","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.712","00:00:01.714","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.714","00:00:01.715","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.715","00:00:01.716","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.716","00:00:01.717","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.717","00:00:01.719","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.719","00:00:01.720","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.720","00:00:01.721","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.721","00:00:01.723","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.723","00:00:01.724","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.724","00:00:01.725","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.725","00:00:01.727","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.727","00:00:01.728","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.728","00:00:01.729","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.729","00:00:01.730","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.730","00:00:01.731","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.731","00:00:01.733","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.733","00:00:01.734","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.734","00:00:01.735","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.735","00:00:01.736","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.736","00:00:01.738","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.738","00:00:01.739","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.739","00:00:01.740","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.740","00:00:01.742","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.742","00:00:01.743","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.743","00:00:01.744","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.744","00:00:01.745","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.745","00:00:01.746","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.746","00:00:01.748","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.748","00:00:01.749","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.749","00:00:01.750","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.750","00:00:01.752","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.752","00:00:01.753","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.753","00:00:01.754","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.754","00:00:01.755","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.755","00:00:01.757","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.757","00:00:01.758","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.758","00:00:01.759","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.759","00:00:01.760","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.760","00:00:01.762","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.762","00:00:01.763","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.763","00:00:01.764","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.764","00:00:01.766","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.766","00:00:01.767","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.767","00:00:01.768","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.768","00:00:01.769","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.769","00:00:01.771","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.771","00:00:01.772","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.772","00:00:01.773","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.773","00:00:01.774","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.774","00:00:01.776","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.776","00:00:01.777","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.777","00:00:01.778","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.778","00:00:01.779","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.779","00:00:01.781","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.781","00:00:01.782","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.782","00:00:01.784","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.784","00:00:01.785","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.785","00:00:01.786","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.786","00:00:01.787","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.787","00:00:01.788","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.788","00:00:01.790","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.790","00:00:01.791","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.791","00:00:01.792","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.792","00:00:01.793","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.793","00:00:01.795","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.795","00:00:01.796","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.796","00:00:01.797","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.797","00:00:01.799","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.799","00:00:01.800","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.800","00:00:01.801","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.801","00:00:01.803","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.803","00:00:01.804","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.804","00:00:01.805","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.805","00:00:01.806","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.806","00:00:01.807","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.807","00:00:01.809","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.809","00:00:01.810","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.810","00:00:01.811","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.811","00:00:01.812","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.812","00:00:01.814","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.814","00:00:01.815","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.815","00:00:01.817","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.817","00:00:01.818","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.818","00:00:01.819","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.819","00:00:01.820","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.820","00:00:01.821","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.821","00:00:01.823","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.823","00:00:01.824","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.824","00:00:01.825","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.825","00:00:01.827","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.827","00:00:01.828","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.828","00:00:01.829","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.829","00:00:01.830","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.830","00:00:01.831","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.831","00:00:01.833","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.833","00:00:01.834","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.834","00:00:01.835","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.835","00:00:01.836","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.836","00:00:01.838","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.838","00:00:01.839","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.839","00:00:01.841","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.841","00:00:01.842","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.842","00:00:01.843","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.843","00:00:01.844","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.844","00:00:01.846","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.846","00:00:01.847","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.851","00:00:01.852","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.852","00:00:01.853","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.857","00:00:01.857","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.857","00:00:01.858","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.858","00:00:01.860","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.860","00:00:01.862","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.862","00:00:01.862","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.862","00:00:01.864","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.864","00:00:01.865","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.865","00:00:01.866","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.866","00:00:01.867","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.867","00:00:01.869","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.869","00:00:01.870","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.870","00:00:01.871","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.871","00:00:01.873","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.873","00:00:01.874","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.874","00:00:01.875","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.875","00:00:01.877","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.877","00:00:01.878","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.878","00:00:01.879","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.879","00:00:01.881","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.881","00:00:01.882","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.882","00:00:01.883","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.885","00:00:01.886","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.886","00:00:01.887","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.887","00:00:01.889","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.889","00:00:01.890","0.001s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.890","00:00:01.891","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.475","00:00:01.476","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.476","00:00:01.477","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.477","00:00:01.478","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.478","00:00:01.480","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.480","00:00:01.481","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.481","00:00:01.482","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.482","00:00:01.483","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.483","00:00:01.484","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.484","00:00:01.485","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.485","00:00:01.487","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.487","00:00:01.488","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.488","00:00:01.489","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.489","00:00:01.490","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.490","00:00:01.491","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.491","00:00:01.493","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.493","00:00:01.494","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.494","00:00:01.495","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.495","00:00:01.496","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.496","00:00:01.497","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.497","00:00:01.499","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.499","00:00:01.500","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.500","00:00:01.501","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.501","00:00:01.502","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.502","00:00:01.503","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.503","00:00:01.505","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.505","00:00:01.506","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.506","00:00:01.507","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.507","00:00:01.508","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.508","00:00:01.510","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.510","00:00:01.511","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.511","00:00:01.512","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.512","00:00:01.513","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.513","00:00:01.514","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.514","00:00:01.516","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.516","00:00:01.517","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.517","00:00:01.518","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.518","00:00:01.519","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.519","00:00:01.521","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.521","00:00:01.522","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.522","00:00:01.523","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.523","00:00:01.524","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.524","00:00:01.525","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.525","00:00:01.527","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.527","00:00:01.528","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.528","00:00:01.529","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.529","00:00:01.530","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.530","00:00:01.531","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.531","00:00:01.533","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.533","00:00:01.534","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.534","00:00:01.535","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.535","00:00:01.536","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.536","00:00:01.538","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.538","00:00:01.539","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.539","00:00:01.540","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.540","00:00:01.541","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.541","00:00:01.542","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.542","00:00:01.544","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.544","00:00:01.545","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.545","00:00:01.546","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.546","00:00:01.547","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.547","00:00:01.548","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.548","00:00:01.550","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.550","00:00:01.551","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.551","00:00:01.552","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.552","00:00:01.553","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.553","00:00:01.555","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.555","00:00:01.556","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.556","00:00:01.557","0.002s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.557","00:00:01.559","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.559","00:00:01.559","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.559","00:00:01.561","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.561","00:00:01.562","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.562","00:00:01.563","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.563","00:00:01.565","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.568","00:00:01.570","0.002s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.570","00:00:01.571","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.571","00:00:01.573","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.573","00:00:01.574","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.574","00:00:01.575","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.575","00:00:01.576","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.576","00:00:01.578","0.001s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.578","00:00:01.579","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.570","00:00:01.572","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.572","00:00:01.573","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.573","00:00:01.574","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.574","00:00:01.575","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.575","00:00:01.576","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.576","00:00:01.578","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.578","00:00:01.579","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.579","00:00:01.580","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.580","00:00:01.581","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.581","00:00:01.582","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.582","00:00:01.584","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.584","00:00:01.585","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.585","00:00:01.586","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.586","00:00:01.587","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.587","00:00:01.588","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.588","00:00:01.590","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.590","00:00:01.591","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.591","00:00:01.592","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.592","00:00:01.593","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.593","00:00:01.595","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.595","00:00:01.596","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.596","00:00:01.597","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.597","00:00:01.599","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.599","00:00:01.600","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.600","00:00:01.601","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.601","00:00:01.602","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.602","00:00:01.603","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.603","00:00:01.604","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.604","00:00:01.606","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.606","00:00:01.607","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.607","00:00:01.608","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.608","00:00:01.609","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.609","00:00:01.610","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.610","00:00:01.612","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.612","00:00:01.613","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.613","00:00:01.614","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.614","00:00:01.615","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.615","00:00:01.617","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.617","00:00:01.618","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.618","00:00:01.619","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.619","00:00:01.621","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.621","00:00:01.622","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.622","00:00:01.623","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.623","00:00:01.624","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.624","00:00:01.625","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.625","00:00:01.626","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.626","00:00:01.628","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.628","00:00:01.629","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.629","00:00:01.630","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.630","00:00:01.631","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.631","00:00:01.632","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.632","00:00:01.634","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.634","00:00:01.635","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.635","00:00:01.636","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.636","00:00:01.637","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.637","00:00:01.639","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.639","00:00:01.640","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.640","00:00:01.641","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.641","00:00:01.642","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.642","00:00:01.644","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.644","00:00:01.645","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.645","00:00:01.646","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.646","00:00:01.648","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.648","00:00:01.649","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.649","00:00:01.650","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.650","00:00:01.651","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.651","00:00:01.652","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.652","00:00:01.653","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.653","00:00:01.654","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.654","00:00:01.656","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.656","00:00:01.657","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.657","00:00:01.658","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.658","00:00:01.659","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.659","00:00:01.661","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.661","00:00:01.662","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.662","00:00:01.663","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.663","00:00:01.664","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.664","00:00:01.665","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.665","00:00:01.667","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.667","00:00:01.668","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.668","00:00:01.669","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.669","00:00:01.671","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.671","00:00:01.672","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.672","00:00:01.673","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.673","00:00:01.675","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.675","00:00:01.676","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.676","00:00:01.677","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.677","00:00:01.678","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.678","00:00:01.679","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.679","00:00:01.680","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.680","00:00:01.681","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.681","00:00:01.683","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.683","00:00:01.684","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.684","00:00:01.685","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.685","00:00:01.686","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.686","00:00:01.687","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.687","00:00:01.689","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.689","00:00:01.690","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.690","00:00:01.691","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.691","00:00:01.692","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.692","00:00:01.694","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.694","00:00:01.695","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.695","00:00:01.696","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.696","00:00:01.697","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.697","00:00:01.699","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.699","00:00:01.700","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.700","00:00:01.701","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.701","00:00:01.702","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.702","00:00:01.703","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.703","00:00:01.705","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.705","00:00:01.706","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.706","00:00:01.707","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.707","00:00:01.708","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.708","00:00:01.710","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.710","00:00:01.711","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.711","00:00:01.712","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.712","00:00:01.713","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.713","00:00:01.715","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.715","00:00:01.716","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.716","00:00:01.717","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.717","00:00:01.719","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.719","00:00:01.720","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.720","00:00:01.721","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.721","00:00:01.723","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.723","00:00:01.724","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.724","00:00:01.725","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.725","00:00:01.727","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.727","00:00:01.728","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.728","00:00:01.729","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.729","00:00:01.730","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.730","00:00:01.731","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.731","00:00:01.732","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.732","00:00:01.734","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.734","00:00:01.735","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.735","00:00:01.736","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.736","00:00:01.738","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.738","00:00:01.739","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.739","00:00:01.740","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.740","00:00:01.742","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.742","00:00:01.743","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.743","00:00:01.744","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.744","00:00:01.745","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.745","00:00:01.746","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.746","00:00:01.748","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.748","00:00:01.749","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.749","00:00:01.750","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.750","00:00:01.751","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.751","00:00:01.753","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.753","00:00:01.754","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.754","00:00:01.755","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.755","00:00:01.757","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.757","00:00:01.758","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.758","00:00:01.759","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.759","00:00:01.760","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.760","00:00:01.762","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.762","00:00:01.763","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.763","00:00:01.764","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.764","00:00:01.766","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.766","00:00:01.767","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.767","00:00:01.768","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.768","00:00:01.769","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.769","00:00:01.770","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.770","00:00:01.772","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.772","00:00:01.773","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.773","00:00:01.774","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.774","00:00:01.776","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.776","00:00:01.777","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.777","00:00:01.778","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.778","00:00:01.779","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.779","00:00:01.781","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.781","00:00:01.782","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.782","00:00:01.783","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.783","00:00:01.785","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.785","00:00:01.786","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.786","00:00:01.787","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.787","00:00:01.788","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.788","00:00:01.790","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.790","00:00:01.791","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.791","00:00:01.792","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.792","00:00:01.793","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.793","00:00:01.795","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.795","00:00:01.796","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.796","00:00:01.797","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.797","00:00:01.799","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.799","00:00:01.800","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.800","00:00:01.801","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.801","00:00:01.803","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.803","00:00:01.804","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.804","00:00:01.805","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.805","00:00:01.806","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.806","00:00:01.807","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.807","00:00:01.809","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.809","00:00:01.810","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.810","00:00:01.811","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.811","00:00:01.812","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.812","00:00:01.814","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.814","00:00:01.815","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.815","00:00:01.817","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.817","00:00:01.818","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.818","00:00:01.819","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.819","00:00:01.820","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.820","00:00:01.821","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.821","00:00:01.822","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.822","00:00:01.824","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.824","00:00:01.825","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.825","00:00:01.826","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.826","00:00:01.828","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.828","00:00:01.829","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.829","00:00:01.830","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.830","00:00:01.831","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.831","00:00:01.833","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.833","00:00:01.834","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.834","00:00:01.835","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.835","00:00:01.836","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.836","00:00:01.838","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.838","00:00:01.839","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.839","00:00:01.841","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.841","00:00:01.842","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.842","00:00:01.843","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.843","00:00:01.844","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.844","00:00:01.846","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.846","00:00:01.847","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.851","00:00:01.852","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.852","00:00:01.853","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.857","00:00:01.857","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.857","00:00:01.858","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.858","00:00:01.860","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.860","00:00:01.862","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.862","00:00:01.862","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.862","00:00:01.864","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.864","00:00:01.865","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.865","00:00:01.866","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.866","00:00:01.867","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.867","00:00:01.869","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.869","00:00:01.870","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.870","00:00:01.871","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.871","00:00:01.873","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.873","00:00:01.874","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.874","00:00:01.875","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.875","00:00:01.877","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.877","00:00:01.878","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.878","00:00:01.879","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.879","00:00:01.881","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.881","00:00:01.882","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.882","00:00:01.883","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.885","00:00:01.886","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.886","00:00:01.887","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.887","00:00:01.889","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.889","00:00:01.890","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.890","00:00:01.891","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.896","00:00:01.898","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.898","00:00:01.899","0.000s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.899","00:00:01.900","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.900","00:00:01.901","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.901","00:00:01.902","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.902","00:00:01.904","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.906","00:00:01.908","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.913","00:00:01.914","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.914","00:00:01.915","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.915","00:00:01.916","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.916","00:00:01.917","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.917","00:00:01.919","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.919","00:00:01.920","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.920","00:00:01.921","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.921","00:00:01.923","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.923","00:00:01.924","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.924","00:00:01.925","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.925","00:00:01.927","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.927","00:00:01.928","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.928","00:00:01.929","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.929","00:00:01.931","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.931","00:00:01.932","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.932","00:00:01.933","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.933","00:00:01.935","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.935","00:00:01.936","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.936","00:00:01.937","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.937","00:00:01.939","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.939","00:00:01.940","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.940","00:00:01.941","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.941","00:00:01.942","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.942","00:00:01.944","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.944","00:00:01.945","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.945","00:00:01.946","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.946","00:00:01.948","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.948","00:00:01.949","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.949","00:00:01.950","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.950","00:00:01.952","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.952","00:00:01.953","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.953","00:00:01.955","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.955","00:00:01.956","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.956","00:00:01.957","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.957","00:00:01.959","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.959","00:00:01.960","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.960","00:00:01.961","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.961","00:00:01.962","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.962","00:00:01.964","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.964","00:00:01.965","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.965","00:00:01.966","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.966","00:00:01.968","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.968","00:00:01.969","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.969","00:00:01.971","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.971","00:00:01.972","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.972","00:00:01.973","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.973","00:00:01.974","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.974","00:00:01.976","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.976","00:00:01.977","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.977","00:00:01.978","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.978","00:00:01.980","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.980","00:00:01.981","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.981","00:00:01.982","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.982","00:00:01.984","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.984","00:00:01.985","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.985","00:00:01.986","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.986","00:00:01.988","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.988","00:00:01.989","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.989","00:00:01.990","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.990","00:00:01.992","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.992","00:00:01.993","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.993","00:00:01.994","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.994","00:00:01.996","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.996","00:00:01.997","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.997","00:00:01.998","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.998","00:00:02.000","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.000","00:00:02.001","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.001","00:00:02.002","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.002","00:00:02.004","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.004","00:00:02.005","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.005","00:00:02.007","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.007","00:00:02.008","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.008","00:00:02.009","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.009","00:00:02.011","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.011","00:00:02.012","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.012","00:00:02.013","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.013","00:00:02.014","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.014","00:00:02.016","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.016","00:00:02.017","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.017","00:00:02.018","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.018","00:00:02.020","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.020","00:00:02.021","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.021","00:00:02.023","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.023","00:00:02.024","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.024","00:00:02.025","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.025","00:00:02.027","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.027","00:00:02.028","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.028","00:00:02.029","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.029","00:00:02.031","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.031","00:00:02.032","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.032","00:00:02.033","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.033","00:00:02.035","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.035","00:00:02.036","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.036","00:00:02.037","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.037","00:00:02.038","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.038","00:00:02.040","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.040","00:00:02.041","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.041","00:00:02.042","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.042","00:00:02.044","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.044","00:00:02.045","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.045","00:00:02.046","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.046","00:00:02.048","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.048","00:00:02.049","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.049","00:00:02.050","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.050","00:00:02.052","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.052","00:00:02.053","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.053","00:00:02.055","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.057","00:00:02.057","0.000s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.057","00:00:02.059","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.059","00:00:02.060","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.060","00:00:02.062","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.068","00:00:02.069","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.069","00:00:02.071","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.071","00:00:02.072","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.072","00:00:02.073","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.073","00:00:02.074","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.074","00:00:02.076","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.076","00:00:02.077","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.077","00:00:02.078","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.078","00:00:02.080","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.080","00:00:02.081","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.081","00:00:02.082","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.082","00:00:02.084","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.084","00:00:02.085","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.085","00:00:02.087","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.087","00:00:02.088","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.088","00:00:02.089","0.001s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.089","00:00:02.091","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.852","00:00:01.853","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.856","00:00:01.857","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.857","00:00:01.858","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.858","00:00:01.860","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.860","00:00:01.861","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.861","00:00:01.862","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.862","00:00:01.864","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.864","00:00:01.865","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.865","00:00:01.866","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.866","00:00:01.867","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.867","00:00:01.869","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.869","00:00:01.870","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.870","00:00:01.871","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.871","00:00:01.872","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.872","00:00:01.874","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.874","00:00:01.875","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.875","00:00:01.877","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.877","00:00:01.878","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.878","00:00:01.879","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.879","00:00:01.880","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.880","00:00:01.882","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.882","00:00:01.883","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.885","00:00:01.886","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.886","00:00:01.887","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.887","00:00:01.889","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.889","00:00:01.890","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.890","00:00:01.891","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.896","00:00:01.898","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.898","00:00:01.899","0.000s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.899","00:00:01.900","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.900","00:00:01.901","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.901","00:00:01.902","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.902","00:00:01.904","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.906","00:00:01.908","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.913","00:00:01.914","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.914","00:00:01.915","0.001s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.915","00:00:01.916","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.857","00:00:01.858","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.858","00:00:01.860","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.860","00:00:01.862","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.862","00:00:01.862","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.862","00:00:01.864","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.864","00:00:01.865","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.865","00:00:01.866","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.866","00:00:01.867","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.867","00:00:01.869","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.869","00:00:01.870","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.870","00:00:01.871","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.871","00:00:01.873","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.873","00:00:01.874","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.874","00:00:01.875","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.875","00:00:01.877","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.877","00:00:01.878","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.878","00:00:01.879","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.879","00:00:01.881","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.881","00:00:01.882","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.882","00:00:01.883","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.885","00:00:01.886","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.886","00:00:01.887","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.887","00:00:01.889","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.889","00:00:01.890","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.890","00:00:01.891","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.896","00:00:01.898","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.898","00:00:01.899","0.000s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.899","00:00:01.900","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.900","00:00:01.901","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.901","00:00:01.902","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.902","00:00:01.904","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.906","00:00:01.908","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.913","00:00:01.914","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.914","00:00:01.915","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.915","00:00:01.916","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.916","00:00:01.917","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.917","00:00:01.919","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.919","00:00:01.920","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.920","00:00:01.921","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.921","00:00:01.923","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.923","00:00:01.924","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.924","00:00:01.925","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.925","00:00:01.927","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.927","00:00:01.928","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.928","00:00:01.929","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.929","00:00:01.931","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.931","00:00:01.932","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.932","00:00:01.933","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.933","00:00:01.935","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.935","00:00:01.936","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.936","00:00:01.937","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.937","00:00:01.939","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.939","00:00:01.940","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.940","00:00:01.941","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.941","00:00:01.942","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.942","00:00:01.944","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.944","00:00:01.945","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.945","00:00:01.947","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.947","00:00:01.948","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.948","00:00:01.949","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.949","00:00:01.951","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.951","00:00:01.952","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.952","00:00:01.953","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.953","00:00:01.955","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.955","00:00:01.956","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.956","00:00:01.957","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.957","00:00:01.959","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.959","00:00:01.960","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.960","00:00:01.961","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.961","00:00:01.963","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.963","00:00:01.964","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.964","00:00:01.965","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.965","00:00:01.967","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.967","00:00:01.968","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.968","00:00:01.969","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.969","00:00:01.971","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.971","00:00:01.972","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.972","00:00:01.973","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.973","00:00:01.974","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.974","00:00:01.976","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.976","00:00:01.977","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.977","00:00:01.978","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.978","00:00:01.980","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.980","00:00:01.981","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.981","00:00:01.982","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.982","00:00:01.984","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.984","00:00:01.985","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.985","00:00:01.986","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.986","00:00:01.988","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.988","00:00:01.989","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.989","00:00:01.991","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.991","00:00:01.992","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.992","00:00:01.993","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.993","00:00:01.994","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.994","00:00:01.996","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.996","00:00:01.997","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.997","00:00:01.998","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.998","00:00:02.000","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.000","00:00:02.001","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.001","00:00:02.002","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.002","00:00:02.004","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.004","00:00:02.005","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.005","00:00:02.007","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.007","00:00:02.008","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.008","00:00:02.009","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.009","00:00:02.011","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.011","00:00:02.012","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.012","00:00:02.013","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.013","00:00:02.014","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.014","00:00:02.016","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.016","00:00:02.017","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.017","00:00:02.018","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.018","00:00:02.020","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.020","00:00:02.021","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.021","00:00:02.023","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.023","00:00:02.024","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.024","00:00:02.025","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.025","00:00:02.027","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.027","00:00:02.028","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.028","00:00:02.029","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.029","00:00:02.031","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.031","00:00:02.032","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.032","00:00:02.033","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.033","00:00:02.035","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.035","00:00:02.036","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.036","00:00:02.037","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.037","00:00:02.038","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.038","00:00:02.040","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.040","00:00:02.041","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.041","00:00:02.042","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.042","00:00:02.044","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.044","00:00:02.045","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.045","00:00:02.046","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.046","00:00:02.048","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.048","00:00:02.049","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.049","00:00:02.050","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.050","00:00:02.052","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.052","00:00:02.053","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.053","00:00:02.055","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.057","00:00:02.058","0.000s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.058","00:00:02.059","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.059","00:00:02.060","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.060","00:00:02.062","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.068","00:00:02.069","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.069","00:00:02.071","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.071","00:00:02.072","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.072","00:00:02.073","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.073","00:00:02.074","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.074","00:00:02.076","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.076","00:00:02.077","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.077","00:00:02.078","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.078","00:00:02.080","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.080","00:00:02.081","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.081","00:00:02.082","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.082","00:00:02.084","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.084","00:00:02.085","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.085","00:00:02.087","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.087","00:00:02.088","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.088","00:00:02.089","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.092","00:00:02.092","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.092","00:00:02.094","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.094","00:00:02.095","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.095","00:00:02.097","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.097","00:00:02.098","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.098","00:00:02.099","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.099","00:00:02.100","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.100","00:00:02.101","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.101","00:00:02.102","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.102","00:00:02.103","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.103","00:00:02.105","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.105","00:00:02.106","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.106","00:00:02.107","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.107","00:00:02.108","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.108","00:00:02.109","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.109","00:00:02.110","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.110","00:00:02.111","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.111","00:00:02.113","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.113","00:00:02.114","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.114","00:00:02.115","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.115","00:00:02.116","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.116","00:00:02.117","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.117","00:00:02.119","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.119","00:00:02.120","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.120","00:00:02.121","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.121","00:00:02.122","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.122","00:00:02.123","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.123","00:00:02.124","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.124","00:00:02.126","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.126","00:00:02.127","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.127","00:00:02.128","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.128","00:00:02.129","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.129","00:00:02.131","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.131","00:00:02.132","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.132","00:00:02.133","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.133","00:00:02.134","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.134","00:00:02.136","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.136","00:00:02.137","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.137","00:00:02.138","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.138","00:00:02.139","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.139","00:00:02.141","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.141","00:00:02.142","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.142","00:00:02.143","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.143","00:00:02.144","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.144","00:00:02.145","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.145","00:00:02.146","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.146","00:00:02.147","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.147","00:00:02.148","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.148","00:00:02.149","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.149","00:00:02.151","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.151","00:00:02.152","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.152","00:00:02.153","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.153","00:00:02.154","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.154","00:00:02.155","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.155","00:00:02.156","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.156","00:00:02.158","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.158","00:00:02.159","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.159","00:00:02.161","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.161","00:00:02.162","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.162","00:00:02.163","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.163","00:00:02.165","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.165","00:00:02.166","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.166","00:00:02.167","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.167","00:00:02.168","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.168","00:00:02.169","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.169","00:00:02.170","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.170","00:00:02.171","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.171","00:00:02.172","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.172","00:00:02.173","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.173","00:00:02.175","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.175","00:00:02.176","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.176","00:00:02.177","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.177","00:00:02.178","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.178","00:00:02.179","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.179","00:00:02.180","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.180","00:00:02.181","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.181","00:00:02.183","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.183","00:00:02.184","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.184","00:00:02.185","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.185","00:00:02.186","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.186","00:00:02.187","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.187","00:00:02.188","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.188","00:00:02.189","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.189","00:00:02.191","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.191","00:00:02.192","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.192","00:00:02.193","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.193","00:00:02.194","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.194","00:00:02.195","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.195","00:00:02.196","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.196","00:00:02.198","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.198","00:00:02.199","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.199","00:00:02.200","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.200","00:00:02.201","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.201","00:00:02.202","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.202","00:00:02.203","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.203","00:00:02.205","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.205","00:00:02.206","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.206","00:00:02.207","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.207","00:00:02.208","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.210","00:00:02.212","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.212","00:00:02.213","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.213","00:00:02.215","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.215","00:00:02.216","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.216","00:00:02.218","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.220","00:00:02.221","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.221","00:00:02.223","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.223","00:00:02.224","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.224","00:00:02.225","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.229","00:00:02.230","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.230","00:00:02.231","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.231","00:00:02.232","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.232","00:00:02.233","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.233","00:00:02.234","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.234","00:00:02.236","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.236","00:00:02.237","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.237","00:00:02.238","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.238","00:00:02.239","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.239","00:00:02.240","0.001s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.240","00:00:02.242","0.001s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.898","00:00:01.899","0.000s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.899","00:00:01.900","0.001s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.900","00:00:01.901","0.002s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.901","00:00:01.902","0.001s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.902","00:00:01.904","0.002s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.906","00:00:01.908","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.914","00:00:01.915","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.915","00:00:01.916","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.916","00:00:01.917","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.917","00:00:01.919","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.919","00:00:01.920","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.920","00:00:01.921","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.921","00:00:01.923","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.923","00:00:01.924","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.924","00:00:01.925","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.925","00:00:01.926","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.926","00:00:01.928","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.928","00:00:01.929","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.929","00:00:01.931","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.931","00:00:01.932","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.932","00:00:01.933","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.933","00:00:01.935","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.935","00:00:01.936","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.936","00:00:01.937","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.937","00:00:01.939","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.939","00:00:01.940","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.940","00:00:01.941","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.941","00:00:01.942","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.942","00:00:01.944","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.944","00:00:01.945","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.945","00:00:01.946","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.946","00:00:01.948","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.948","00:00:01.949","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.949","00:00:01.950","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.950","00:00:01.952","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.952","00:00:01.953","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.953","00:00:01.955","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.955","00:00:01.956","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.956","00:00:01.957","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.957","00:00:01.958","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.958","00:00:01.960","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.960","00:00:01.961","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.961","00:00:01.962","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.962","00:00:01.964","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.964","00:00:01.965","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.965","00:00:01.966","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.966","00:00:01.968","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.968","00:00:01.969","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.969","00:00:01.970","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.970","00:00:01.972","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.972","00:00:01.973","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.973","00:00:01.974","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.974","00:00:01.976","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.976","00:00:01.977","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.977","00:00:01.978","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.978","00:00:01.980","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.980","00:00:01.981","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.981","00:00:01.982","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.982","00:00:01.984","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.984","00:00:01.985","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.985","00:00:01.986","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.986","00:00:01.988","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.988","00:00:01.989","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.989","00:00:01.990","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.990","00:00:01.992","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.992","00:00:01.993","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.993","00:00:01.994","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.994","00:00:01.996","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.996","00:00:01.997","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.997","00:00:01.998","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:01.998","00:00:02.000","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.000","00:00:02.001","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.001","00:00:02.002","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.002","00:00:02.004","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.004","00:00:02.005","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.005","00:00:02.006","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.006","00:00:02.008","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.008","00:00:02.009","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.009","00:00:02.010","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.010","00:00:02.012","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.012","00:00:02.013","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.013","00:00:02.014","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.014","00:00:02.016","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.016","00:00:02.017","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.017","00:00:02.018","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.018","00:00:02.020","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.020","00:00:02.021","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.021","00:00:02.022","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.022","00:00:02.024","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.024","00:00:02.025","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.025","00:00:02.026","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.026","00:00:02.028","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.028","00:00:02.029","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.029","00:00:02.031","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.031","00:00:02.032","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.032","00:00:02.033","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.033","00:00:02.035","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.035","00:00:02.036","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.036","00:00:02.037","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.037","00:00:02.038","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.038","00:00:02.040","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.040","00:00:02.041","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.041","00:00:02.042","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.042","00:00:02.044","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.044","00:00:02.045","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.045","00:00:02.046","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.046","00:00:02.048","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.048","00:00:02.049","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.049","00:00:02.050","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.050","00:00:02.052","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.052","00:00:02.053","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.053","00:00:02.055","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.057","00:00:02.057","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.057","00:00:02.059","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.059","00:00:02.060","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.060","00:00:02.062","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.068","00:00:02.069","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.069","00:00:02.071","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.071","00:00:02.072","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.072","00:00:02.073","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.073","00:00:02.074","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.074","00:00:02.076","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.076","00:00:02.077","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.077","00:00:02.078","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.078","00:00:02.080","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.080","00:00:02.081","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.081","00:00:02.082","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.082","00:00:02.084","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.084","00:00:02.085","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.085","00:00:02.087","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.087","00:00:02.088","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.088","00:00:02.089","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.089","00:00:02.091","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.091","00:00:02.092","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.092","00:00:02.094","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.094","00:00:02.095","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.095","00:00:02.096","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.096","00:00:02.098","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.098","00:00:02.099","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.099","00:00:02.100","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.100","00:00:02.101","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.101","00:00:02.102","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.102","00:00:02.103","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.103","00:00:02.104","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.104","00:00:02.106","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.106","00:00:02.107","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.107","00:00:02.108","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.108","00:00:02.109","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.109","00:00:02.110","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.110","00:00:02.111","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.111","00:00:02.113","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.113","00:00:02.114","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.114","00:00:02.115","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.115","00:00:02.116","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.116","00:00:02.117","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.117","00:00:02.119","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.119","00:00:02.120","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.120","00:00:02.121","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.121","00:00:02.122","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.122","00:00:02.123","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.123","00:00:02.124","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.124","00:00:02.126","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.126","00:00:02.127","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.127","00:00:02.128","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.128","00:00:02.129","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.129","00:00:02.130","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.130","00:00:02.132","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.132","00:00:02.133","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.133","00:00:02.134","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.134","00:00:02.136","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.136","00:00:02.137","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.137","00:00:02.138","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.138","00:00:02.139","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.139","00:00:02.140","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.140","00:00:02.141","0.001s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.141","00:00:02.143","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.069","00:00:02.070","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.070","00:00:02.072","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.072","00:00:02.073","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.073","00:00:02.074","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.074","00:00:02.075","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.075","00:00:02.077","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.077","00:00:02.078","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.078","00:00:02.080","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.080","00:00:02.081","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.081","00:00:02.082","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.082","00:00:02.084","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.084","00:00:02.085","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.085","00:00:02.086","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.086","00:00:02.088","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.088","00:00:02.089","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.089","00:00:02.091","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.091","00:00:02.092","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.092","00:00:02.094","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.094","00:00:02.095","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.095","00:00:02.097","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.097","00:00:02.098","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.098","00:00:02.099","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.099","00:00:02.100","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.100","00:00:02.101","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.101","00:00:02.102","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.102","00:00:02.103","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.103","00:00:02.105","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.105","00:00:02.106","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.106","00:00:02.107","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.107","00:00:02.108","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.108","00:00:02.109","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.109","00:00:02.110","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.110","00:00:02.112","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.112","00:00:02.113","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.113","00:00:02.114","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.114","00:00:02.115","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.115","00:00:02.116","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.116","00:00:02.117","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.117","00:00:02.119","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.119","00:00:02.120","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.120","00:00:02.121","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.121","00:00:02.122","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.122","00:00:02.123","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.123","00:00:02.124","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.124","00:00:02.126","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.126","00:00:02.127","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.127","00:00:02.128","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.128","00:00:02.129","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.129","00:00:02.131","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.131","00:00:02.132","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.132","00:00:02.133","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.133","00:00:02.134","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.134","00:00:02.136","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.136","00:00:02.137","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.137","00:00:02.138","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.138","00:00:02.139","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.139","00:00:02.141","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.141","00:00:02.142","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.142","00:00:02.143","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.143","00:00:02.144","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.144","00:00:02.145","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.145","00:00:02.146","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.146","00:00:02.147","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.147","00:00:02.148","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.148","00:00:02.149","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.149","00:00:02.151","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.151","00:00:02.152","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.152","00:00:02.153","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.153","00:00:02.154","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.154","00:00:02.155","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.155","00:00:02.156","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.156","00:00:02.158","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.158","00:00:02.159","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.159","00:00:02.161","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.161","00:00:02.162","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.162","00:00:02.163","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.163","00:00:02.165","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.165","00:00:02.166","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.166","00:00:02.167","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.167","00:00:02.168","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.168","00:00:02.169","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.169","00:00:02.170","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.170","00:00:02.171","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.171","00:00:02.172","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.172","00:00:02.173","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.173","00:00:02.175","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.175","00:00:02.176","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.176","00:00:02.177","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.177","00:00:02.178","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.178","00:00:02.179","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.179","00:00:02.180","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.180","00:00:02.182","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.182","00:00:02.183","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.183","00:00:02.184","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.184","00:00:02.185","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.185","00:00:02.186","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.186","00:00:02.187","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.187","00:00:02.188","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.188","00:00:02.189","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.189","00:00:02.191","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.191","00:00:02.192","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.192","00:00:02.193","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.193","00:00:02.194","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.194","00:00:02.195","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.195","00:00:02.196","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.196","00:00:02.198","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.198","00:00:02.199","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.199","00:00:02.200","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.200","00:00:02.201","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.201","00:00:02.202","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.202","00:00:02.203","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.203","00:00:02.205","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.205","00:00:02.206","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.206","00:00:02.207","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.207","00:00:02.208","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.210","00:00:02.212","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.212","00:00:02.213","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.213","00:00:02.215","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.215","00:00:02.216","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.216","00:00:02.218","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.220","00:00:02.221","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.221","00:00:02.223","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.223","00:00:02.224","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.224","00:00:02.225","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.229","00:00:02.230","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.230","00:00:02.231","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.231","00:00:02.232","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.232","00:00:02.233","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.233","00:00:02.234","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.234","00:00:02.236","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.236","00:00:02.237","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.237","00:00:02.238","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.238","00:00:02.239","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.239","00:00:02.240","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.240","00:00:02.242","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.242","00:00:02.243","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.243","00:00:02.244","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.244","00:00:02.245","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.245","00:00:02.246","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.246","00:00:02.247","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.247","00:00:02.248","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.248","00:00:02.249","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.249","00:00:02.250","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.250","00:00:02.251","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.251","00:00:02.253","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.253","00:00:02.254","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.254","00:00:02.255","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.255","00:00:02.256","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.256","00:00:02.257","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.257","00:00:02.258","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.258","00:00:02.259","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.259","00:00:02.260","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.260","00:00:02.261","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.261","00:00:02.263","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.263","00:00:02.264","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.264","00:00:02.265","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.265","00:00:02.266","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.266","00:00:02.267","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.267","00:00:02.268","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.268","00:00:02.269","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.269","00:00:02.270","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.270","00:00:02.271","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.271","00:00:02.273","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.273","00:00:02.274","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.274","00:00:02.275","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.275","00:00:02.276","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.276","00:00:02.277","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.277","00:00:02.278","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.278","00:00:02.279","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.279","00:00:02.280","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.280","00:00:02.281","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.281","00:00:02.283","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.283","00:00:02.284","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.284","00:00:02.285","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.285","00:00:02.286","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.286","00:00:02.287","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.287","00:00:02.288","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.288","00:00:02.289","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.289","00:00:02.290","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.290","00:00:02.292","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.292","00:00:02.293","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.293","00:00:02.294","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.294","00:00:02.295","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.295","00:00:02.296","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.296","00:00:02.297","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.297","00:00:02.298","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.298","00:00:02.299","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.299","00:00:02.300","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.300","00:00:02.302","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.302","00:00:02.303","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.303","00:00:02.304","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.304","00:00:02.305","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.305","00:00:02.306","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.306","00:00:02.307","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.307","00:00:02.308","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.308","00:00:02.309","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.309","00:00:02.310","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.310","00:00:02.312","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.312","00:00:02.313","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.313","00:00:02.314","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.314","00:00:02.315","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.315","00:00:02.316","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.316","00:00:02.317","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.317","00:00:02.318","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.318","00:00:02.319","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.319","00:00:02.321","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.321","00:00:02.322","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.322","00:00:02.323","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.323","00:00:02.324","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.324","00:00:02.325","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.325","00:00:02.326","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.326","00:00:02.327","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.327","00:00:02.328","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.328","00:00:02.329","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.329","00:00:02.331","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.331","00:00:02.332","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.332","00:00:02.333","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.333","00:00:02.334","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.334","00:00:02.335","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.335","00:00:02.336","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.336","00:00:02.337","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.337","00:00:02.338","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.338","00:00:02.339","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.339","00:00:02.341","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.341","00:00:02.342","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.342","00:00:02.343","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.343","00:00:02.344","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.344","00:00:02.345","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.345","00:00:02.346","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.346","00:00:02.347","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.347","00:00:02.348","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.348","00:00:02.349","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.349","00:00:02.351","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.351","00:00:02.352","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.352","00:00:02.353","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.353","00:00:02.354","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.354","00:00:02.355","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.355","00:00:02.356","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.356","00:00:02.357","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.357","00:00:02.358","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.358","00:00:02.360","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.360","00:00:02.361","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.361","00:00:02.362","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.362","00:00:02.363","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.363","00:00:02.364","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.364","00:00:02.365","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.365","00:00:02.366","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.366","00:00:02.368","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.368","00:00:02.369","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.369","00:00:02.370","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.370","00:00:02.371","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.371","00:00:02.372","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.372","00:00:02.373","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.373","00:00:02.375","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.375","00:00:02.376","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.376","00:00:02.377","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.377","00:00:02.378","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.378","00:00:02.379","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.379","00:00:02.380","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.380","00:00:02.381","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.381","00:00:02.383","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.383","00:00:02.384","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.384","00:00:02.385","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.385","00:00:02.386","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.386","00:00:02.387","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.387","00:00:02.389","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.389","00:00:02.390","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.390","00:00:02.391","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.391","00:00:02.392","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.392","00:00:02.393","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.393","00:00:02.394","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.394","00:00:02.395","0.001s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.395","00:00:02.397","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.230","00:00:02.231","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.231","00:00:02.232","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.232","00:00:02.233","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.233","00:00:02.234","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.234","00:00:02.236","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.236","00:00:02.237","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.237","00:00:02.238","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.238","00:00:02.239","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.239","00:00:02.240","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.240","00:00:02.242","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.242","00:00:02.243","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.243","00:00:02.244","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.244","00:00:02.245","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.245","00:00:02.246","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.246","00:00:02.247","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.247","00:00:02.248","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.248","00:00:02.249","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.249","00:00:02.250","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.250","00:00:02.251","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.251","00:00:02.252","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.252","00:00:02.254","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.254","00:00:02.255","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.255","00:00:02.256","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.256","00:00:02.257","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.257","00:00:02.258","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.258","00:00:02.259","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.259","00:00:02.260","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.260","00:00:02.261","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.261","00:00:02.262","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.262","00:00:02.264","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.264","00:00:02.265","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.265","00:00:02.266","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.266","00:00:02.267","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.267","00:00:02.268","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.268","00:00:02.269","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.269","00:00:02.270","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.270","00:00:02.271","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.271","00:00:02.273","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.273","00:00:02.274","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.274","00:00:02.275","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.275","00:00:02.276","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.276","00:00:02.277","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.277","00:00:02.278","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.278","00:00:02.279","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.279","00:00:02.280","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.280","00:00:02.281","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.281","00:00:02.283","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.283","00:00:02.284","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.284","00:00:02.285","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.285","00:00:02.286","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.286","00:00:02.287","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.287","00:00:02.288","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.288","00:00:02.289","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.289","00:00:02.290","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.290","00:00:02.291","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.291","00:00:02.293","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.293","00:00:02.294","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.294","00:00:02.295","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.295","00:00:02.296","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.296","00:00:02.297","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.297","00:00:02.298","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.298","00:00:02.299","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.299","00:00:02.300","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.300","00:00:02.301","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.301","00:00:02.303","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.303","00:00:02.304","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.304","00:00:02.305","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.305","00:00:02.306","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.306","00:00:02.307","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.307","00:00:02.308","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.308","00:00:02.309","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.309","00:00:02.310","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.310","00:00:02.312","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.312","00:00:02.313","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.313","00:00:02.314","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.314","00:00:02.315","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.315","00:00:02.316","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.316","00:00:02.317","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.317","00:00:02.318","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.318","00:00:02.319","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.319","00:00:02.320","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.320","00:00:02.322","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.322","00:00:02.323","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.323","00:00:02.324","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.324","00:00:02.325","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.325","00:00:02.326","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.326","00:00:02.327","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.327","00:00:02.328","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.328","00:00:02.329","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.329","00:00:02.330","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.330","00:00:02.332","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.332","00:00:02.333","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.333","00:00:02.334","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.334","00:00:02.335","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.335","00:00:02.336","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.336","00:00:02.337","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.337","00:00:02.338","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.338","00:00:02.339","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.339","00:00:02.340","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.340","00:00:02.342","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.342","00:00:02.343","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.343","00:00:02.344","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.344","00:00:02.345","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.345","00:00:02.346","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.346","00:00:02.347","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.347","00:00:02.348","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.348","00:00:02.349","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.349","00:00:02.351","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.351","00:00:02.352","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.352","00:00:02.353","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.353","00:00:02.354","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.354","00:00:02.355","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.355","00:00:02.356","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.356","00:00:02.357","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.357","00:00:02.358","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.358","00:00:02.359","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.359","00:00:02.361","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.361","00:00:02.362","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.362","00:00:02.363","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.363","00:00:02.364","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.364","00:00:02.365","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.365","00:00:02.366","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.366","00:00:02.367","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.367","00:00:02.369","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.369","00:00:02.370","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.370","00:00:02.371","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.371","00:00:02.372","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.372","00:00:02.373","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.373","00:00:02.375","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.375","00:00:02.376","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.376","00:00:02.377","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.377","00:00:02.378","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.378","00:00:02.379","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.379","00:00:02.380","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.380","00:00:02.381","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.381","00:00:02.383","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.383","00:00:02.384","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.384","00:00:02.385","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.385","00:00:02.386","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.386","00:00:02.387","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.387","00:00:02.389","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.389","00:00:02.390","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.390","00:00:02.391","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.391","00:00:02.392","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.392","00:00:02.393","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.393","00:00:02.394","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.394","00:00:02.395","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.395","00:00:02.397","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.397","00:00:02.398","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.398","00:00:02.399","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.399","00:00:02.400","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.400","00:00:02.401","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.401","00:00:02.402","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.402","00:00:02.403","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.403","00:00:02.404","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.404","00:00:02.405","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.405","00:00:02.406","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.406","00:00:02.407","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.407","00:00:02.409","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.409","00:00:02.410","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.410","00:00:02.412","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.412","00:00:02.413","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.413","00:00:02.415","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.415","00:00:02.417","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.417","00:00:02.418","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.418","00:00:02.419","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.419","00:00:02.420","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.420","00:00:02.421","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.421","00:00:02.422","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.422","00:00:02.423","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.423","00:00:02.424","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.424","00:00:02.425","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.425","00:00:02.426","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.426","00:00:02.427","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.427","00:00:02.428","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.428","00:00:02.429","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.429","00:00:02.430","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.430","00:00:02.431","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.431","00:00:02.432","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.432","00:00:02.433","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.433","00:00:02.435","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.435","00:00:02.436","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.436","00:00:02.437","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.437","00:00:02.438","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.438","00:00:02.439","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.439","00:00:02.440","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.440","00:00:02.441","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.441","00:00:02.443","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.443","00:00:02.443","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.443","00:00:02.444","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.444","00:00:02.445","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.445","00:00:02.447","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.447","00:00:02.448","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.450","00:00:02.451","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.451","00:00:02.452","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.452","00:00:02.454","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.454","00:00:02.455","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.455","00:00:02.457","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.457","00:00:02.458","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.458","00:00:02.460","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.460","00:00:02.461","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.461","00:00:02.462","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.462","00:00:02.463","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.463","00:00:02.464","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.464","00:00:02.466","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.466","00:00:02.468","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.468","00:00:02.469","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.469","00:00:02.470","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.470","00:00:02.471","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.471","00:00:02.472","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.472","00:00:02.474","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.474","00:00:02.475","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.475","00:00:02.476","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.476","00:00:02.477","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.477","00:00:02.478","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.478","00:00:02.480","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.480","00:00:02.481","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.481","00:00:02.482","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.482","00:00:02.483","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.483","00:00:02.484","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.484","00:00:02.486","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.486","00:00:02.487","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.487","00:00:02.488","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.488","00:00:02.489","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.489","00:00:02.490","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.490","00:00:02.492","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.492","00:00:02.493","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.493","00:00:02.494","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.494","00:00:02.495","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.495","00:00:02.497","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.497","00:00:02.498","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.498","00:00:02.500","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.500","00:00:02.501","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.501","00:00:02.503","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.503","00:00:02.504","0.002s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.504","00:00:02.506","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.506","00:00:02.507","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.507","00:00:02.507","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.507","00:00:02.508","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.508","00:00:02.510","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.510","00:00:02.511","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.511","00:00:02.512","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.512","00:00:02.513","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.513","00:00:02.514","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.514","00:00:02.515","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.515","00:00:02.516","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.516","00:00:02.517","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.517","00:00:02.518","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.518","00:00:02.520","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.520","00:00:02.521","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.521","00:00:02.522","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.522","00:00:02.523","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.523","00:00:02.524","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.524","00:00:02.525","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.525","00:00:02.526","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.526","00:00:02.528","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.528","00:00:02.529","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.529","00:00:02.530","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.530","00:00:02.531","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.531","00:00:02.532","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.532","00:00:02.533","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.533","00:00:02.534","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.534","00:00:02.536","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.536","00:00:02.537","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.537","00:00:02.538","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.538","00:00:02.539","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.539","00:00:02.540","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.540","00:00:02.541","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.541","00:00:02.542","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.542","00:00:02.543","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.543","00:00:02.544","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.544","00:00:02.545","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.545","00:00:02.546","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.546","00:00:02.548","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.548","00:00:02.549","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.549","00:00:02.550","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.550","00:00:02.551","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.551","00:00:02.552","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.552","00:00:02.553","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.553","00:00:02.554","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.554","00:00:02.556","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.556","00:00:02.557","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.557","00:00:02.558","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.558","00:00:02.559","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.559","00:00:02.560","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.560","00:00:02.561","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.561","00:00:02.562","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.562","00:00:02.563","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.563","00:00:02.565","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.565","00:00:02.566","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.566","00:00:02.567","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.567","00:00:02.568","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.568","00:00:02.569","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.569","00:00:02.570","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.570","00:00:02.571","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.571","00:00:02.572","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.572","00:00:02.573","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.573","00:00:02.575","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.575","00:00:02.576","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.576","00:00:02.577","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.577","00:00:02.578","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.578","00:00:02.580","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.580","00:00:02.581","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.581","00:00:02.582","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.582","00:00:02.583","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.583","00:00:02.584","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.584","00:00:02.585","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.585","00:00:02.586","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.586","00:00:02.587","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.587","00:00:02.588","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.588","00:00:02.589","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.589","00:00:02.590","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.590","00:00:02.591","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.591","00:00:02.593","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.593","00:00:02.594","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.594","00:00:02.595","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.595","00:00:02.596","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.596","00:00:02.597","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.597","00:00:02.598","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.598","00:00:02.599","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.599","00:00:02.600","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.600","00:00:02.602","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.602","00:00:02.603","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.603","00:00:02.604","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.604","00:00:02.605","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.605","00:00:02.606","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.606","00:00:02.607","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.607","00:00:02.608","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.608","00:00:02.610","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.610","00:00:02.611","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.611","00:00:02.612","0.001s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.612","00:00:02.613","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.451","00:00:02.452","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.452","00:00:02.454","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.454","00:00:02.455","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.455","00:00:02.457","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.457","00:00:02.458","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.458","00:00:02.460","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.460","00:00:02.461","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.461","00:00:02.462","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.462","00:00:02.463","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.463","00:00:02.464","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.464","00:00:02.466","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.466","00:00:02.468","0.002s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.468","00:00:02.469","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.469","00:00:02.470","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.470","00:00:02.472","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.472","00:00:02.473","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.473","00:00:02.474","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.474","00:00:02.475","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.475","00:00:02.476","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.476","00:00:02.477","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.477","00:00:02.478","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.478","00:00:02.480","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.480","00:00:02.481","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.481","00:00:02.482","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.482","00:00:02.483","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.483","00:00:02.484","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.484","00:00:02.486","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.486","00:00:02.487","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.487","00:00:02.488","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.488","00:00:02.489","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.489","00:00:02.490","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.490","00:00:02.492","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.492","00:00:02.493","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.493","00:00:02.494","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.494","00:00:02.495","0.001s"],[390,37,"00:00:02.441","00:00:02.497","00:00:02.495","00:00:02.497","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.874","00:00:02.874","0.000s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.874","00:00:02.875","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.875","00:00:02.876","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.876","00:00:02.878","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.878","00:00:02.879","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.879","00:00:02.880","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.880","00:00:02.881","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.881","00:00:02.882","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.882","00:00:02.884","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.884","00:00:02.885","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.885","00:00:02.886","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.886","00:00:02.888","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.888","00:00:02.889","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.889","00:00:02.890","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.890","00:00:02.892","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.892","00:00:02.893","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.893","00:00:02.894","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.894","00:00:02.895","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.895","00:00:02.897","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.897","00:00:02.898","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.898","00:00:02.899","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.899","00:00:02.900","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.900","00:00:02.902","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.902","00:00:02.903","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.903","00:00:02.904","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.904","00:00:02.906","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.906","00:00:02.907","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.907","00:00:02.908","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.908","00:00:02.909","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.909","00:00:02.911","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.911","00:00:02.912","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.912","00:00:02.913","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.913","00:00:02.915","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.915","00:00:02.916","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.916","00:00:02.917","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.917","00:00:02.918","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.918","00:00:02.920","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.920","00:00:02.921","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.921","00:00:02.922","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.922","00:00:02.924","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.924","00:00:02.925","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.925","00:00:02.926","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.926","00:00:02.927","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.927","00:00:02.929","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.929","00:00:02.930","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.930","00:00:02.931","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.931","00:00:02.933","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.933","00:00:02.934","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.934","00:00:02.935","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.935","00:00:02.936","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.936","00:00:02.938","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.938","00:00:02.939","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.939","00:00:02.940","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.940","00:00:02.942","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.942","00:00:02.943","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.943","00:00:02.944","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.944","00:00:02.945","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.945","00:00:02.947","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.947","00:00:02.948","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.948","00:00:02.949","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.949","00:00:02.950","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.950","00:00:02.952","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.952","00:00:02.953","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.953","00:00:02.954","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.954","00:00:02.956","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.956","00:00:02.957","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.957","00:00:02.958","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.958","00:00:02.959","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.959","00:00:02.961","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.961","00:00:02.962","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.962","00:00:02.963","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.963","00:00:02.965","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.965","00:00:02.966","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.966","00:00:02.968","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.968","00:00:02.969","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.969","00:00:02.970","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.970","00:00:02.971","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.971","00:00:02.973","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.973","00:00:02.974","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.974","00:00:02.975","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.975","00:00:02.976","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.976","00:00:02.977","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.977","00:00:02.979","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.979","00:00:02.980","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.980","00:00:02.981","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.981","00:00:02.983","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.983","00:00:02.984","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.984","00:00:02.985","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.985","00:00:02.986","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.986","00:00:02.988","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.988","00:00:02.989","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.989","00:00:02.990","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.990","00:00:02.991","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.991","00:00:02.993","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.993","00:00:02.994","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.994","00:00:02.995","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.995","00:00:02.997","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.997","00:00:02.998","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.998","00:00:02.999","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:02.999","00:00:03.000","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.000","00:00:03.002","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.002","00:00:03.003","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.003","00:00:03.004","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.004","00:00:03.006","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.006","00:00:03.007","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.007","00:00:03.008","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.008","00:00:03.009","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.009","00:00:03.011","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.011","00:00:03.012","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.012","00:00:03.013","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.013","00:00:03.015","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.015","00:00:03.016","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.016","00:00:03.017","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.017","00:00:03.019","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.019","00:00:03.020","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.020","00:00:03.021","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.021","00:00:03.023","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.023","00:00:03.024","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.024","00:00:03.025","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.025","00:00:03.027","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.027","00:00:03.028","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.028","00:00:03.029","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.029","00:00:03.031","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.031","00:00:03.032","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.032","00:00:03.033","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.033","00:00:03.035","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.035","00:00:03.036","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.036","00:00:03.037","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.037","00:00:03.039","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.039","00:00:03.040","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.040","00:00:03.041","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.048","00:00:03.049","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.049","00:00:03.050","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.050","00:00:03.051","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.051","00:00:03.052","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.052","00:00:03.054","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.054","00:00:03.055","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.055","00:00:03.057","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.057","00:00:03.058","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.058","00:00:03.059","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.059","00:00:03.061","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.061","00:00:03.062","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.062","00:00:03.064","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.064","00:00:03.065","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.065","00:00:03.066","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.066","00:00:03.068","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.068","00:00:03.069","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.069","00:00:03.070","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.070","00:00:03.072","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.072","00:00:03.073","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.073","00:00:03.074","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.074","00:00:03.076","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.076","00:00:03.077","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.077","00:00:03.079","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.079","00:00:03.080","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.080","00:00:03.081","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.081","00:00:03.083","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.083","00:00:03.084","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.084","00:00:03.086","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.086","00:00:03.087","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.087","00:00:03.089","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.089","00:00:03.090","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.090","00:00:03.091","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.091","00:00:03.092","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.092","00:00:03.094","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.094","00:00:03.095","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.095","00:00:03.096","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.096","00:00:03.098","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.098","00:00:03.100","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.100","00:00:03.101","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.101","00:00:03.103","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.103","00:00:03.104","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.104","00:00:03.105","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.105","00:00:03.106","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.106","00:00:03.108","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.112","00:00:03.113","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.113","00:00:03.114","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.114","00:00:03.116","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.116","00:00:03.117","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.117","00:00:03.119","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.119","00:00:03.120","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.120","00:00:03.121","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.121","00:00:03.123","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.123","00:00:03.124","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.124","00:00:03.126","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.126","00:00:03.127","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.127","00:00:03.128","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.128","00:00:03.130","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.130","00:00:03.131","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.131","00:00:03.132","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.132","00:00:03.134","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.134","00:00:03.135","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.135","00:00:03.137","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.137","00:00:03.138","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.138","00:00:03.139","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.139","00:00:03.141","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.141","00:00:03.142","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.142","00:00:03.144","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.144","00:00:03.145","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.145","00:00:03.146","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.146","00:00:03.148","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.148","00:00:03.149","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.149","00:00:03.150","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.150","00:00:03.152","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.152","00:00:03.153","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.153","00:00:03.155","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.155","00:00:03.156","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.156","00:00:03.157","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.157","00:00:03.159","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.159","00:00:03.160","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.160","00:00:03.162","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.162","00:00:03.163","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.163","00:00:03.164","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.164","00:00:03.166","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.166","00:00:03.167","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.167","00:00:03.169","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.169","00:00:03.170","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.170","00:00:03.171","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.171","00:00:03.173","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.173","00:00:03.174","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.174","00:00:03.176","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.176","00:00:03.177","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.177","00:00:03.178","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.178","00:00:03.180","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.180","00:00:03.181","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.181","00:00:03.183","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.183","00:00:03.184","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.184","00:00:03.185","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.185","00:00:03.187","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.187","00:00:03.188","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.188","00:00:03.189","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.189","00:00:03.191","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.191","00:00:03.192","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.192","00:00:03.194","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.194","00:00:03.195","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.195","00:00:03.196","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.196","00:00:03.198","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.198","00:00:03.199","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.199","00:00:03.201","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.201","00:00:03.202","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.202","00:00:03.203","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.203","00:00:03.205","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.205","00:00:03.206","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.206","00:00:03.208","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.208","00:00:03.209","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.209","00:00:03.211","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.211","00:00:03.212","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.212","00:00:03.214","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.214","00:00:03.215","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.215","00:00:03.216","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.216","00:00:03.218","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.218","00:00:03.219","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.219","00:00:03.221","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.221","00:00:03.222","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.222","00:00:03.224","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.224","00:00:03.225","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.225","00:00:03.227","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.227","00:00:03.228","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.228","00:00:03.229","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.229","00:00:03.231","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.231","00:00:03.232","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.232","00:00:03.234","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.234","00:00:03.235","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.235","00:00:03.236","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.236","00:00:03.238","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.238","00:00:03.239","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.239","00:00:03.241","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.241","00:00:03.242","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.242","00:00:03.244","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.244","00:00:03.245","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.245","00:00:03.247","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.247","00:00:03.248","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.248","00:00:03.250","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.250","00:00:03.251","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.251","00:00:03.252","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.252","00:00:03.254","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.254","00:00:03.255","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.255","00:00:03.257","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.257","00:00:03.258","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.258","00:00:03.260","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.260","00:00:03.261","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.261","00:00:03.263","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.263","00:00:03.264","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.264","00:00:03.265","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.265","00:00:03.267","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.267","00:00:03.268","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.268","00:00:03.270","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.270","00:00:03.271","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.271","00:00:03.273","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.273","00:00:03.274","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.279","00:00:03.281","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.281","00:00:03.283","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.283","00:00:03.283","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.283","00:00:03.285","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.285","00:00:03.286","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.286","00:00:03.288","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.288","00:00:03.289","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.289","00:00:03.290","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.290","00:00:03.292","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.292","00:00:03.293","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.293","00:00:03.295","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.295","00:00:03.296","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.296","00:00:03.298","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.298","00:00:03.299","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.299","00:00:03.301","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.301","00:00:03.302","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.302","00:00:03.303","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.303","00:00:03.305","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.305","00:00:03.306","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.306","00:00:03.308","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.308","00:00:03.309","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.309","00:00:03.311","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.311","00:00:03.312","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.312","00:00:03.314","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.314","00:00:03.315","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.315","00:00:03.317","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.317","00:00:03.319","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.319","00:00:03.320","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.320","00:00:03.321","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.321","00:00:03.322","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.322","00:00:03.324","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.324","00:00:03.325","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.325","00:00:03.327","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.327","00:00:03.328","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.328","00:00:03.329","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.329","00:00:03.331","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.331","00:00:03.332","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.332","00:00:03.334","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.334","00:00:03.335","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.335","00:00:03.337","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.337","00:00:03.338","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.338","00:00:03.340","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.340","00:00:03.341","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.347","00:00:03.348","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.348","00:00:03.349","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.349","00:00:03.350","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.350","00:00:03.352","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.352","00:00:03.353","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.353","00:00:03.355","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.355","00:00:03.356","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.356","00:00:03.358","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.358","00:00:03.359","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.359","00:00:03.360","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.360","00:00:03.362","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.362","00:00:03.363","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.363","00:00:03.365","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.365","00:00:03.366","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.366","00:00:03.368","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.368","00:00:03.369","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.369","00:00:03.371","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.371","00:00:03.372","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.372","00:00:03.373","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.373","00:00:03.375","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.375","00:00:03.376","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.376","00:00:03.378","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.378","00:00:03.379","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.379","00:00:03.381","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.381","00:00:03.382","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.382","00:00:03.384","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.384","00:00:03.385","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.385","00:00:03.386","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.386","00:00:03.388","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.388","00:00:03.389","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.389","00:00:03.391","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.391","00:00:03.393","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.393","00:00:03.394","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.394","00:00:03.396","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.396","00:00:03.397","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.397","00:00:03.398","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.405","00:00:03.406","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.406","00:00:03.407","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.407","00:00:03.408","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.408","00:00:03.410","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.410","00:00:03.411","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.411","00:00:03.413","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.413","00:00:03.415","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.415","00:00:03.416","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.416","00:00:03.417","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.417","00:00:03.418","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.418","00:00:03.420","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.420","00:00:03.422","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.422","00:00:03.423","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.423","00:00:03.425","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.425","00:00:03.426","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.426","00:00:03.428","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.428","00:00:03.429","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.429","00:00:03.430","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.430","00:00:03.432","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.432","00:00:03.433","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.433","00:00:03.435","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.435","00:00:03.436","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.436","00:00:03.438","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.438","00:00:03.440","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.440","00:00:03.441","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.441","00:00:03.442","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.442","00:00:03.444","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.444","00:00:03.446","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.446","00:00:03.447","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.447","00:00:03.448","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.448","00:00:03.450","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.450","00:00:03.451","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.451","00:00:03.453","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.453","00:00:03.454","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.454","00:00:03.456","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.456","00:00:03.457","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.457","00:00:03.459","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.459","00:00:03.460","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.460","00:00:03.462","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.462","00:00:03.463","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.463","00:00:03.465","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.465","00:00:03.466","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.466","00:00:03.468","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.468","00:00:03.469","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.469","00:00:03.471","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.477","00:00:03.478","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.478","00:00:03.479","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.479","00:00:03.480","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.480","00:00:03.482","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.482","00:00:03.483","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.483","00:00:03.485","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.485","00:00:03.486","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.486","00:00:03.488","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.488","00:00:03.490","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.490","00:00:03.491","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.491","00:00:03.492","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.492","00:00:03.494","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.494","00:00:03.495","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.495","00:00:03.497","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.497","00:00:03.498","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.498","00:00:03.500","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.500","00:00:03.501","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.501","00:00:03.503","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.503","00:00:03.504","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.504","00:00:03.506","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.506","00:00:03.507","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.507","00:00:03.509","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.509","00:00:03.510","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.510","00:00:03.512","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.512","00:00:03.513","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.513","00:00:03.515","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.515","00:00:03.516","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.516","00:00:03.518","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.518","00:00:03.519","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.519","00:00:03.521","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.521","00:00:03.522","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.522","00:00:03.524","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.524","00:00:03.525","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.525","00:00:03.527","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.527","00:00:03.528","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.528","00:00:03.530","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.530","00:00:03.531","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.531","00:00:03.533","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.533","00:00:03.534","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.534","00:00:03.536","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.536","00:00:03.537","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.537","00:00:03.539","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.539","00:00:03.540","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.540","00:00:03.542","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.542","00:00:03.543","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.543","00:00:03.544","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.544","00:00:03.546","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.546","00:00:03.548","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.548","00:00:03.549","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.549","00:00:03.551","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.551","00:00:03.552","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.552","00:00:03.554","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.554","00:00:03.555","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.555","00:00:03.557","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.557","00:00:03.558","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.558","00:00:03.560","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.560","00:00:03.561","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.561","00:00:03.562","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.562","00:00:03.564","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.564","00:00:03.565","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.565","00:00:03.567","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.567","00:00:03.568","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.568","00:00:03.570","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.570","00:00:03.571","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.571","00:00:03.573","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.573","00:00:03.574","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.574","00:00:03.576","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.576","00:00:03.577","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.577","00:00:03.579","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.579","00:00:03.580","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.580","00:00:03.582","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.582","00:00:03.583","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.583","00:00:03.585","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.585","00:00:03.586","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.586","00:00:03.588","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.588","00:00:03.590","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.590","00:00:03.591","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.591","00:00:03.593","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.593","00:00:03.594","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.594","00:00:03.596","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.596","00:00:03.597","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.597","00:00:03.599","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.599","00:00:03.600","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.600","00:00:03.602","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.602","00:00:03.603","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.603","00:00:03.605","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.605","00:00:03.606","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.606","00:00:03.608","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.608","00:00:03.609","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.612","00:00:03.613","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.613","00:00:03.614","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.614","00:00:03.616","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.616","00:00:03.617","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.617","00:00:03.619","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.619","00:00:03.620","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.620","00:00:03.622","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.622","00:00:03.623","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.623","00:00:03.625","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.625","00:00:03.627","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.627","00:00:03.628","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.628","00:00:03.630","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.630","00:00:03.631","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.631","00:00:03.633","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.633","00:00:03.634","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.634","00:00:03.636","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.636","00:00:03.637","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.637","00:00:03.639","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.639","00:00:03.640","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.640","00:00:03.642","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.642","00:00:03.643","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.643","00:00:03.645","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.645","00:00:03.646","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.646","00:00:03.648","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.648","00:00:03.650","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.650","00:00:03.651","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.651","00:00:03.653","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.653","00:00:03.654","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.654","00:00:03.656","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.656","00:00:03.657","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.657","00:00:03.659","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.659","00:00:03.660","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.660","00:00:03.662","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.662","00:00:03.663","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.663","00:00:03.665","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.665","00:00:03.666","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.666","00:00:03.668","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.668","00:00:03.670","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.670","00:00:03.671","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.671","00:00:03.673","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.673","00:00:03.674","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.674","00:00:03.676","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.676","00:00:03.677","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.677","00:00:03.679","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.679","00:00:03.680","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.680","00:00:03.682","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.682","00:00:03.683","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.683","00:00:03.685","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.685","00:00:03.686","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.686","00:00:03.688","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.688","00:00:03.690","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.690","00:00:03.691","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.691","00:00:03.693","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.693","00:00:03.694","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.694","00:00:03.696","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.696","00:00:03.697","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.697","00:00:03.699","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.699","00:00:03.700","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.700","00:00:03.702","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.702","00:00:03.703","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.703","00:00:03.705","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.705","00:00:03.706","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.706","00:00:03.708","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.708","00:00:03.709","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.709","00:00:03.711","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.711","00:00:03.712","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.712","00:00:03.714","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.714","00:00:03.716","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.716","00:00:03.717","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.717","00:00:03.719","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.719","00:00:03.720","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.720","00:00:03.722","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.722","00:00:03.724","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.724","00:00:03.725","0.001s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.725","00:00:03.726","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.726","00:00:03.728","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.049","00:00:03.050","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.050","00:00:03.051","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.051","00:00:03.052","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.052","00:00:03.054","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.054","00:00:03.055","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.055","00:00:03.057","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.057","00:00:03.058","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.058","00:00:03.059","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.059","00:00:03.061","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.061","00:00:03.062","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.062","00:00:03.063","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.063","00:00:03.065","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.065","00:00:03.066","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.066","00:00:03.068","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.068","00:00:03.069","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.069","00:00:03.070","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.070","00:00:03.072","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.072","00:00:03.073","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.073","00:00:03.074","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.074","00:00:03.076","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.076","00:00:03.077","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.077","00:00:03.079","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.079","00:00:03.080","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.080","00:00:03.081","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.081","00:00:03.083","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.083","00:00:03.084","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.084","00:00:03.086","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.086","00:00:03.087","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.087","00:00:03.089","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.089","00:00:03.090","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.090","00:00:03.091","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.091","00:00:03.092","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.092","00:00:03.094","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.094","00:00:03.095","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.095","00:00:03.096","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.096","00:00:03.098","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.098","00:00:03.099","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.099","00:00:03.101","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.101","00:00:03.102","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.102","00:00:03.104","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.104","00:00:03.105","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.105","00:00:03.106","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.106","00:00:03.108","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.112","00:00:03.113","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.113","00:00:03.114","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.114","00:00:03.116","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.116","00:00:03.117","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.117","00:00:03.119","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.119","00:00:03.120","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.120","00:00:03.121","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.121","00:00:03.123","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.123","00:00:03.124","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.124","00:00:03.126","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.126","00:00:03.127","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.127","00:00:03.128","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.128","00:00:03.130","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.130","00:00:03.131","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.131","00:00:03.132","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.132","00:00:03.134","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.134","00:00:03.135","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.135","00:00:03.137","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.137","00:00:03.138","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.138","00:00:03.139","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.139","00:00:03.141","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.141","00:00:03.142","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.142","00:00:03.144","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.144","00:00:03.145","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.145","00:00:03.146","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.146","00:00:03.148","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.148","00:00:03.149","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.149","00:00:03.150","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.150","00:00:03.152","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.152","00:00:03.153","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.153","00:00:03.155","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.155","00:00:03.156","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.156","00:00:03.157","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.157","00:00:03.159","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.159","00:00:03.160","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.160","00:00:03.162","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.162","00:00:03.163","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.163","00:00:03.164","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.164","00:00:03.166","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.166","00:00:03.167","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.167","00:00:03.168","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.168","00:00:03.170","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.170","00:00:03.171","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.171","00:00:03.173","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.173","00:00:03.174","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.174","00:00:03.175","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.175","00:00:03.177","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.177","00:00:03.178","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.178","00:00:03.180","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.180","00:00:03.181","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.181","00:00:03.182","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.182","00:00:03.184","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.184","00:00:03.185","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.185","00:00:03.186","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.186","00:00:03.188","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.188","00:00:03.189","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.189","00:00:03.191","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.191","00:00:03.192","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.192","00:00:03.194","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.194","00:00:03.195","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.195","00:00:03.196","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.196","00:00:03.198","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.198","00:00:03.199","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.199","00:00:03.201","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.201","00:00:03.202","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.202","00:00:03.203","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.203","00:00:03.205","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.205","00:00:03.206","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.206","00:00:03.208","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.208","00:00:03.209","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.209","00:00:03.211","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.211","00:00:03.212","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.212","00:00:03.214","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.214","00:00:03.215","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.215","00:00:03.216","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.216","00:00:03.218","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.218","00:00:03.219","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.219","00:00:03.221","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.221","00:00:03.222","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.222","00:00:03.224","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.224","00:00:03.225","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.225","00:00:03.226","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.226","00:00:03.228","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.228","00:00:03.229","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.229","00:00:03.231","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.231","00:00:03.232","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.232","00:00:03.234","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.234","00:00:03.235","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.235","00:00:03.236","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.236","00:00:03.238","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.238","00:00:03.239","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.239","00:00:03.241","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.241","00:00:03.242","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.242","00:00:03.244","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.244","00:00:03.245","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.245","00:00:03.247","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.247","00:00:03.248","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.248","00:00:03.249","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.249","00:00:03.251","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.251","00:00:03.252","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.252","00:00:03.254","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.254","00:00:03.255","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.255","00:00:03.257","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.257","00:00:03.258","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.258","00:00:03.260","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.260","00:00:03.261","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.261","00:00:03.262","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.262","00:00:03.264","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.264","00:00:03.265","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.265","00:00:03.267","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.267","00:00:03.268","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.268","00:00:03.270","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.270","00:00:03.271","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.271","00:00:03.273","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.273","00:00:03.274","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.279","00:00:03.281","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.281","00:00:03.283","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.283","00:00:03.283","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.283","00:00:03.285","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.285","00:00:03.286","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.286","00:00:03.288","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.288","00:00:03.289","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.289","00:00:03.290","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.290","00:00:03.292","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.292","00:00:03.293","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.293","00:00:03.295","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.295","00:00:03.296","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.296","00:00:03.298","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.298","00:00:03.299","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.299","00:00:03.301","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.301","00:00:03.302","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.302","00:00:03.303","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.303","00:00:03.305","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.305","00:00:03.306","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.306","00:00:03.308","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.308","00:00:03.309","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.309","00:00:03.311","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.311","00:00:03.312","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.312","00:00:03.314","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.314","00:00:03.315","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.315","00:00:03.317","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.317","00:00:03.318","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.318","00:00:03.320","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.320","00:00:03.321","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.321","00:00:03.322","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.322","00:00:03.324","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.324","00:00:03.325","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.325","00:00:03.327","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.327","00:00:03.328","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.328","00:00:03.329","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.329","00:00:03.331","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.331","00:00:03.332","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.332","00:00:03.334","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.334","00:00:03.335","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.335","00:00:03.337","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.337","00:00:03.338","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.338","00:00:03.340","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.340","00:00:03.341","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.347","00:00:03.348","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.348","00:00:03.349","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.349","00:00:03.350","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.350","00:00:03.352","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.352","00:00:03.353","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.353","00:00:03.355","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.355","00:00:03.356","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.356","00:00:03.358","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.358","00:00:03.359","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.359","00:00:03.360","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.360","00:00:03.362","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.362","00:00:03.363","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.363","00:00:03.365","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.365","00:00:03.366","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.366","00:00:03.368","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.368","00:00:03.369","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.369","00:00:03.371","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.371","00:00:03.372","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.372","00:00:03.373","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.373","00:00:03.375","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.375","00:00:03.376","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.376","00:00:03.378","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.378","00:00:03.379","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.379","00:00:03.381","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.381","00:00:03.382","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.382","00:00:03.384","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.384","00:00:03.385","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.385","00:00:03.386","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.386","00:00:03.388","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.388","00:00:03.389","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.389","00:00:03.391","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.391","00:00:03.393","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.393","00:00:03.394","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.394","00:00:03.396","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.396","00:00:03.397","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.397","00:00:03.398","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.404","00:00:03.406","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.406","00:00:03.407","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.407","00:00:03.408","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.408","00:00:03.410","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.410","00:00:03.411","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.411","00:00:03.413","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.413","00:00:03.415","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.415","00:00:03.416","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.416","00:00:03.417","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.417","00:00:03.418","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.418","00:00:03.420","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.420","00:00:03.422","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.422","00:00:03.423","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.423","00:00:03.425","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.425","00:00:03.426","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.426","00:00:03.428","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.428","00:00:03.429","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.429","00:00:03.430","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.430","00:00:03.432","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.432","00:00:03.433","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.433","00:00:03.435","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.435","00:00:03.436","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.436","00:00:03.438","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.438","00:00:03.439","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.439","00:00:03.441","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.441","00:00:03.442","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.442","00:00:03.444","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.444","00:00:03.445","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.445","00:00:03.447","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.447","00:00:03.448","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.448","00:00:03.450","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.450","00:00:03.451","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.451","00:00:03.453","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.453","00:00:03.454","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.454","00:00:03.456","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.456","00:00:03.457","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.457","00:00:03.459","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.459","00:00:03.460","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.460","00:00:03.462","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.462","00:00:03.463","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.463","00:00:03.465","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.465","00:00:03.466","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.466","00:00:03.468","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.468","00:00:03.469","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.469","00:00:03.471","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.477","00:00:03.478","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.478","00:00:03.479","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.479","00:00:03.480","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.480","00:00:03.482","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.482","00:00:03.483","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.483","00:00:03.485","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.485","00:00:03.486","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.486","00:00:03.488","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.488","00:00:03.489","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.489","00:00:03.491","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.491","00:00:03.492","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.492","00:00:03.494","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.494","00:00:03.495","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.495","00:00:03.497","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.497","00:00:03.498","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.498","00:00:03.500","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.500","00:00:03.501","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.501","00:00:03.503","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.503","00:00:03.504","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.504","00:00:03.506","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.506","00:00:03.507","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.507","00:00:03.509","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.509","00:00:03.510","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.510","00:00:03.512","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.512","00:00:03.513","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.513","00:00:03.515","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.515","00:00:03.516","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.516","00:00:03.518","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.518","00:00:03.519","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.519","00:00:03.521","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.521","00:00:03.522","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.522","00:00:03.523","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.523","00:00:03.525","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.525","00:00:03.526","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.526","00:00:03.528","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.528","00:00:03.530","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.530","00:00:03.531","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.531","00:00:03.533","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.533","00:00:03.534","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.534","00:00:03.536","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.536","00:00:03.537","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.537","00:00:03.539","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.539","00:00:03.540","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.540","00:00:03.541","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.541","00:00:03.543","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.543","00:00:03.544","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.544","00:00:03.546","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.546","00:00:03.547","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.547","00:00:03.549","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.549","00:00:03.550","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.550","00:00:03.552","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.552","00:00:03.554","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.554","00:00:03.555","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.555","00:00:03.556","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.556","00:00:03.558","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.558","00:00:03.559","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.559","00:00:03.561","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.561","00:00:03.562","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.562","00:00:03.564","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.564","00:00:03.565","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.565","00:00:03.567","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.567","00:00:03.568","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.568","00:00:03.570","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.570","00:00:03.571","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.571","00:00:03.573","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.573","00:00:03.574","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.574","00:00:03.576","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.576","00:00:03.577","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.577","00:00:03.579","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.579","00:00:03.580","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.580","00:00:03.582","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.582","00:00:03.583","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.583","00:00:03.585","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.585","00:00:03.586","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.586","00:00:03.588","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.588","00:00:03.590","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.590","00:00:03.591","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.591","00:00:03.593","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.593","00:00:03.594","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.594","00:00:03.596","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.596","00:00:03.597","0.001s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.597","00:00:03.599","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.599","00:00:03.600","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.600","00:00:03.602","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.113","00:00:03.114","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.114","00:00:03.116","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.116","00:00:03.117","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.117","00:00:03.119","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.119","00:00:03.120","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.120","00:00:03.121","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.121","00:00:03.123","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.123","00:00:03.124","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.124","00:00:03.126","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.126","00:00:03.127","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.127","00:00:03.128","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.128","00:00:03.130","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.130","00:00:03.131","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.131","00:00:03.133","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.133","00:00:03.134","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.134","00:00:03.135","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.135","00:00:03.137","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.137","00:00:03.138","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.138","00:00:03.140","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.140","00:00:03.141","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.141","00:00:03.142","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.142","00:00:03.144","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.144","00:00:03.145","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.145","00:00:03.146","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.146","00:00:03.148","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.148","00:00:03.149","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.149","00:00:03.151","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.151","00:00:03.152","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.152","00:00:03.153","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.153","00:00:03.155","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.155","00:00:03.156","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.156","00:00:03.157","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.157","00:00:03.159","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.159","00:00:03.160","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.160","00:00:03.162","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.162","00:00:03.163","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.163","00:00:03.164","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.164","00:00:03.166","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.166","00:00:03.167","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.167","00:00:03.169","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.169","00:00:03.170","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.170","00:00:03.171","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.171","00:00:03.173","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.173","00:00:03.174","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.174","00:00:03.176","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.176","00:00:03.177","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.177","00:00:03.178","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.178","00:00:03.180","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.180","00:00:03.181","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.181","00:00:03.183","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.183","00:00:03.184","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.184","00:00:03.185","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.185","00:00:03.187","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.187","00:00:03.188","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.188","00:00:03.189","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.189","00:00:03.191","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.191","00:00:03.192","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.192","00:00:03.194","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.194","00:00:03.195","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.195","00:00:03.196","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.196","00:00:03.198","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.198","00:00:03.199","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.199","00:00:03.201","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.201","00:00:03.202","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.202","00:00:03.203","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.203","00:00:03.205","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.205","00:00:03.206","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.206","00:00:03.208","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.208","00:00:03.209","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.209","00:00:03.211","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.211","00:00:03.212","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.212","00:00:03.214","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.214","00:00:03.215","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.215","00:00:03.216","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.216","00:00:03.218","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.218","00:00:03.219","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.219","00:00:03.221","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.221","00:00:03.222","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.222","00:00:03.224","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.224","00:00:03.225","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.225","00:00:03.227","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.227","00:00:03.228","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.228","00:00:03.229","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.229","00:00:03.231","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.231","00:00:03.232","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.232","00:00:03.234","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.234","00:00:03.235","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.235","00:00:03.237","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.237","00:00:03.238","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.238","00:00:03.239","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.239","00:00:03.241","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.241","00:00:03.242","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.242","00:00:03.244","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.244","00:00:03.245","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.245","00:00:03.247","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.247","00:00:03.248","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.248","00:00:03.250","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.250","00:00:03.251","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.251","00:00:03.252","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.252","00:00:03.254","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.254","00:00:03.255","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.255","00:00:03.257","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.257","00:00:03.258","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.258","00:00:03.260","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.260","00:00:03.261","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.261","00:00:03.263","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.263","00:00:03.264","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.264","00:00:03.265","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.265","00:00:03.267","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.267","00:00:03.268","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.268","00:00:03.270","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.270","00:00:03.271","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.271","00:00:03.273","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.273","00:00:03.274","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.280","00:00:03.282","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.282","00:00:03.283","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.283","00:00:03.283","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.283","00:00:03.285","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.285","00:00:03.286","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.286","00:00:03.288","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.288","00:00:03.289","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.289","00:00:03.290","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.290","00:00:03.292","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.292","00:00:03.293","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.293","00:00:03.295","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.295","00:00:03.296","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.296","00:00:03.298","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.298","00:00:03.299","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.299","00:00:03.301","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.301","00:00:03.302","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.302","00:00:03.303","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.303","00:00:03.305","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.305","00:00:03.306","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.306","00:00:03.308","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.308","00:00:03.309","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.309","00:00:03.311","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.311","00:00:03.312","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.312","00:00:03.314","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.314","00:00:03.316","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.316","00:00:03.317","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.317","00:00:03.319","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.319","00:00:03.320","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.320","00:00:03.321","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.321","00:00:03.322","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.322","00:00:03.324","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.324","00:00:03.325","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.325","00:00:03.327","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.327","00:00:03.328","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.328","00:00:03.329","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.329","00:00:03.331","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.331","00:00:03.332","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.332","00:00:03.334","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.334","00:00:03.335","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.335","00:00:03.337","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.337","00:00:03.338","0.001s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.338","00:00:03.340","0.002s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.340","00:00:03.341","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.281","00:00:03.283","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.283","00:00:03.283","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.283","00:00:03.285","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.285","00:00:03.286","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.286","00:00:03.287","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.287","00:00:03.289","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.289","00:00:03.290","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.290","00:00:03.292","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.292","00:00:03.293","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.293","00:00:03.295","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.295","00:00:03.296","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.296","00:00:03.298","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.298","00:00:03.299","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.299","00:00:03.300","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.300","00:00:03.302","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.302","00:00:03.303","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.303","00:00:03.305","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.305","00:00:03.306","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.306","00:00:03.308","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.308","00:00:03.309","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.309","00:00:03.311","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.311","00:00:03.312","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.312","00:00:03.314","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.314","00:00:03.315","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.315","00:00:03.317","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.317","00:00:03.318","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.318","00:00:03.320","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.320","00:00:03.321","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.321","00:00:03.322","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.322","00:00:03.324","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.324","00:00:03.325","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.325","00:00:03.326","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.326","00:00:03.328","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.328","00:00:03.329","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.329","00:00:03.331","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.331","00:00:03.332","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.332","00:00:03.334","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.334","00:00:03.335","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.335","00:00:03.337","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.337","00:00:03.338","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.338","00:00:03.339","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.339","00:00:03.341","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.347","00:00:03.348","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.348","00:00:03.349","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.349","00:00:03.350","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.350","00:00:03.352","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.352","00:00:03.353","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.353","00:00:03.355","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.355","00:00:03.356","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.356","00:00:03.358","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.358","00:00:03.359","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.359","00:00:03.361","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.361","00:00:03.362","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.362","00:00:03.363","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.363","00:00:03.365","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.365","00:00:03.366","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.366","00:00:03.368","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.368","00:00:03.369","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.369","00:00:03.371","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.371","00:00:03.372","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.372","00:00:03.374","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.374","00:00:03.375","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.375","00:00:03.376","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.376","00:00:03.378","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.378","00:00:03.379","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.379","00:00:03.381","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.381","00:00:03.382","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.382","00:00:03.384","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.384","00:00:03.385","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.385","00:00:03.386","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.386","00:00:03.388","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.388","00:00:03.389","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.389","00:00:03.391","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.391","00:00:03.393","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.393","00:00:03.394","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.394","00:00:03.396","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.396","00:00:03.397","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.397","00:00:03.398","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.405","00:00:03.406","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.406","00:00:03.407","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.407","00:00:03.408","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.408","00:00:03.410","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.410","00:00:03.411","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.411","00:00:03.413","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.413","00:00:03.415","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.415","00:00:03.416","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.416","00:00:03.417","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.417","00:00:03.419","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.419","00:00:03.420","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.420","00:00:03.422","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.422","00:00:03.423","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.423","00:00:03.425","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.425","00:00:03.426","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.426","00:00:03.428","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.428","00:00:03.429","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.429","00:00:03.430","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.430","00:00:03.432","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.432","00:00:03.433","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.433","00:00:03.435","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.435","00:00:03.437","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.437","00:00:03.438","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.438","00:00:03.440","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.440","00:00:03.441","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.441","00:00:03.443","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.443","00:00:03.444","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.444","00:00:03.446","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.446","00:00:03.447","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.447","00:00:03.448","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.448","00:00:03.450","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.450","00:00:03.451","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.451","00:00:03.453","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.453","00:00:03.454","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.454","00:00:03.456","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.456","00:00:03.457","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.457","00:00:03.459","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.459","00:00:03.460","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.460","00:00:03.462","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.462","00:00:03.463","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.463","00:00:03.465","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.465","00:00:03.466","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.466","00:00:03.468","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.468","00:00:03.469","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.469","00:00:03.471","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.477","00:00:03.478","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.478","00:00:03.479","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.479","00:00:03.480","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.480","00:00:03.482","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.482","00:00:03.483","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.483","00:00:03.485","0.002s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.485","00:00:03.486","0.001s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.486","00:00:03.488","0.002s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.348","00:00:03.349","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.349","00:00:03.350","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.350","00:00:03.352","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.352","00:00:03.353","0.002s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.353","00:00:03.355","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.355","00:00:03.356","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.356","00:00:03.358","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.358","00:00:03.359","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.359","00:00:03.360","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.360","00:00:03.362","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.362","00:00:03.363","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.363","00:00:03.365","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.365","00:00:03.366","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.366","00:00:03.368","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.368","00:00:03.369","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.369","00:00:03.371","0.002s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.371","00:00:03.372","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.372","00:00:03.373","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.373","00:00:03.375","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.375","00:00:03.376","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.376","00:00:03.378","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.378","00:00:03.379","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.379","00:00:03.381","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.381","00:00:03.382","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.382","00:00:03.384","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.384","00:00:03.385","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.385","00:00:03.386","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.386","00:00:03.388","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.388","00:00:03.389","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.389","00:00:03.391","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.391","00:00:03.393","0.002s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.393","00:00:03.394","0.001s"],[619,34,"00:00:03.281","00:00:03.396","00:00:03.394","00:00:03.396","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.406","00:00:03.407","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.407","00:00:03.408","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.408","00:00:03.410","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.410","00:00:03.411","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.411","00:00:03.413","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.413","00:00:03.415","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.415","00:00:03.416","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.416","00:00:03.417","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.417","00:00:03.418","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.418","00:00:03.420","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.420","00:00:03.421","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.421","00:00:03.423","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.423","00:00:03.424","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.424","00:00:03.426","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.426","00:00:03.427","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.427","00:00:03.429","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.429","00:00:03.430","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.430","00:00:03.432","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.432","00:00:03.433","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.433","00:00:03.435","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.435","00:00:03.436","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.436","00:00:03.438","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.438","00:00:03.439","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.439","00:00:03.441","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.441","00:00:03.442","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.442","00:00:03.444","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.444","00:00:03.445","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.445","00:00:03.447","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.447","00:00:03.448","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.448","00:00:03.450","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.450","00:00:03.451","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.451","00:00:03.453","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.453","00:00:03.454","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.454","00:00:03.456","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.456","00:00:03.457","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.457","00:00:03.459","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.459","00:00:03.460","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.460","00:00:03.462","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.462","00:00:03.463","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.463","00:00:03.465","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.465","00:00:03.466","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.466","00:00:03.468","0.001s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.468","00:00:03.469","0.002s"],[279,45,"00:00:03.391","00:00:03.471","00:00:03.469","00:00:03.471","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.478","00:00:03.479","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.479","00:00:03.480","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.480","00:00:03.482","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.482","00:00:03.483","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.483","00:00:03.485","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.485","00:00:03.486","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.486","00:00:03.488","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.488","00:00:03.490","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.490","00:00:03.491","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.491","00:00:03.492","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.492","00:00:03.494","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.494","00:00:03.495","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.495","00:00:03.497","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.497","00:00:03.498","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.498","00:00:03.500","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.500","00:00:03.501","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.501","00:00:03.503","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.503","00:00:03.504","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.504","00:00:03.506","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.506","00:00:03.507","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.507","00:00:03.509","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.509","00:00:03.510","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.510","00:00:03.512","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.512","00:00:03.513","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.513","00:00:03.515","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.515","00:00:03.516","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.516","00:00:03.518","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.518","00:00:03.519","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.519","00:00:03.521","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.521","00:00:03.522","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.522","00:00:03.524","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.524","00:00:03.525","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.525","00:00:03.527","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.527","00:00:03.528","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.528","00:00:03.530","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.530","00:00:03.531","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.531","00:00:03.533","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.533","00:00:03.534","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.534","00:00:03.536","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.536","00:00:03.537","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.537","00:00:03.539","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.539","00:00:03.540","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.540","00:00:03.542","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.542","00:00:03.543","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.543","00:00:03.545","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.545","00:00:03.546","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.546","00:00:03.548","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.548","00:00:03.549","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.549","00:00:03.551","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.551","00:00:03.552","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.552","00:00:03.554","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.554","00:00:03.555","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.555","00:00:03.557","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.557","00:00:03.558","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.558","00:00:03.560","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.560","00:00:03.561","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.561","00:00:03.562","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.562","00:00:03.564","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.564","00:00:03.565","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.565","00:00:03.567","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.567","00:00:03.568","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.568","00:00:03.570","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.570","00:00:03.571","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.571","00:00:03.573","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.573","00:00:03.574","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.574","00:00:03.576","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.576","00:00:03.577","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.577","00:00:03.579","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.579","00:00:03.580","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.580","00:00:03.582","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.582","00:00:03.583","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.583","00:00:03.585","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.585","00:00:03.586","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.586","00:00:03.588","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.588","00:00:03.590","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.590","00:00:03.591","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.591","00:00:03.593","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.593","00:00:03.594","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.594","00:00:03.596","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.596","00:00:03.597","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.597","00:00:03.599","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.599","00:00:03.600","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.600","00:00:03.602","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.602","00:00:03.603","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.603","00:00:03.605","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.605","00:00:03.606","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.606","00:00:03.608","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.608","00:00:03.610","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.612","00:00:03.613","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.613","00:00:03.614","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.614","00:00:03.616","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.616","00:00:03.617","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.617","00:00:03.619","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.619","00:00:03.621","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.621","00:00:03.622","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.622","00:00:03.624","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.624","00:00:03.625","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.625","00:00:03.627","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.627","00:00:03.628","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.628","00:00:03.630","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.630","00:00:03.631","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.631","00:00:03.633","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.633","00:00:03.634","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.634","00:00:03.636","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.636","00:00:03.637","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.637","00:00:03.639","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.639","00:00:03.640","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.640","00:00:03.642","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.642","00:00:03.643","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.643","00:00:03.645","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.645","00:00:03.646","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.646","00:00:03.648","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.648","00:00:03.650","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.650","00:00:03.651","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.651","00:00:03.653","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.653","00:00:03.654","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.654","00:00:03.656","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.656","00:00:03.657","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.657","00:00:03.659","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.659","00:00:03.660","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.660","00:00:03.662","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.662","00:00:03.663","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.663","00:00:03.665","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.665","00:00:03.667","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.667","00:00:03.668","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.668","00:00:03.670","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.670","00:00:03.671","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.671","00:00:03.673","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.673","00:00:03.674","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.674","00:00:03.676","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.676","00:00:03.677","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.677","00:00:03.679","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.679","00:00:03.680","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.680","00:00:03.682","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.682","00:00:03.683","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.683","00:00:03.685","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.685","00:00:03.686","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.686","00:00:03.688","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.688","00:00:03.690","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.690","00:00:03.691","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.691","00:00:03.693","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.693","00:00:03.694","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.694","00:00:03.696","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.696","00:00:03.697","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.697","00:00:03.699","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.699","00:00:03.700","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.700","00:00:03.702","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.702","00:00:03.703","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.703","00:00:03.705","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.705","00:00:03.707","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.707","00:00:03.708","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.708","00:00:03.709","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.709","00:00:03.711","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.711","00:00:03.713","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.713","00:00:03.714","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.714","00:00:03.716","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.716","00:00:03.717","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.717","00:00:03.719","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.719","00:00:03.720","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.720","00:00:03.722","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.722","00:00:03.724","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.724","00:00:03.725","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.725","00:00:03.727","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.727","00:00:03.728","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.728","00:00:03.729","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.734","00:00:03.735","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.735","00:00:03.736","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.736","00:00:03.737","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.737","00:00:03.738","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.738","00:00:03.740","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.740","00:00:03.741","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.741","00:00:03.742","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.742","00:00:03.743","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.743","00:00:03.745","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.745","00:00:03.746","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.746","00:00:03.747","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.747","00:00:03.748","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.748","00:00:03.750","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.750","00:00:03.751","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.751","00:00:03.752","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.752","00:00:03.754","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.754","00:00:03.755","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.755","00:00:03.756","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.756","00:00:03.757","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.757","00:00:03.759","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.759","00:00:03.760","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.760","00:00:03.761","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.761","00:00:03.762","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.762","00:00:03.764","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.764","00:00:03.765","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.765","00:00:03.766","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.766","00:00:03.767","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.767","00:00:03.769","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.769","00:00:03.770","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.770","00:00:03.771","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.771","00:00:03.773","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.773","00:00:03.774","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.774","00:00:03.775","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.775","00:00:03.776","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.776","00:00:03.778","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.778","00:00:03.779","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.779","00:00:03.780","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.780","00:00:03.781","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.781","00:00:03.783","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.783","00:00:03.784","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.784","00:00:03.785","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.785","00:00:03.787","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.787","00:00:03.788","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.788","00:00:03.789","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.789","00:00:03.790","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.790","00:00:03.792","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.792","00:00:03.793","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.793","00:00:03.794","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.794","00:00:03.795","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.795","00:00:03.797","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.797","00:00:03.798","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.798","00:00:03.799","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.799","00:00:03.800","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.800","00:00:03.802","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.802","00:00:03.803","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.803","00:00:03.805","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.805","00:00:03.806","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.806","00:00:03.808","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.813","00:00:03.814","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.814","00:00:03.815","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.815","00:00:03.816","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.816","00:00:03.818","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.818","00:00:03.819","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.819","00:00:03.820","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.820","00:00:03.822","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.822","00:00:03.823","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.823","00:00:03.824","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.824","00:00:03.825","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.825","00:00:03.827","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.827","00:00:03.828","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.828","00:00:03.829","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.829","00:00:03.831","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.831","00:00:03.832","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.832","00:00:03.833","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.833","00:00:03.834","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.834","00:00:03.836","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.836","00:00:03.837","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.837","00:00:03.838","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.838","00:00:03.840","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.840","00:00:03.841","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.841","00:00:03.842","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.842","00:00:03.843","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.843","00:00:03.845","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.845","00:00:03.846","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.846","00:00:03.847","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.847","00:00:03.849","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.849","00:00:03.850","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.850","00:00:03.851","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.851","00:00:03.852","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.852","00:00:03.854","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.854","00:00:03.855","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.855","00:00:03.856","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.856","00:00:03.857","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.857","00:00:03.859","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.859","00:00:03.860","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.860","00:00:03.861","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.861","00:00:03.863","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.863","00:00:03.864","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.864","00:00:03.865","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.865","00:00:03.867","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.867","00:00:03.868","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.874","00:00:03.876","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.876","00:00:03.876","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.876","00:00:03.878","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.878","00:00:03.879","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.879","00:00:03.880","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.880","00:00:03.881","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.881","00:00:03.883","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.883","00:00:03.884","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.884","00:00:03.885","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.885","00:00:03.886","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.886","00:00:03.888","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.888","00:00:03.889","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.889","00:00:03.890","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.890","00:00:03.892","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.892","00:00:03.893","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.893","00:00:03.894","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.894","00:00:03.896","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.896","00:00:03.897","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.897","00:00:03.898","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.898","00:00:03.899","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.899","00:00:03.901","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.905","00:00:03.906","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.906","00:00:03.907","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.907","00:00:03.908","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.908","00:00:03.909","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.909","00:00:03.911","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.911","00:00:03.912","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.912","00:00:03.913","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.913","00:00:03.914","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.914","00:00:03.916","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.916","00:00:03.917","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.917","00:00:03.918","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.918","00:00:03.920","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.920","00:00:03.921","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.921","00:00:03.922","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.922","00:00:03.924","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.924","00:00:03.925","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.925","00:00:03.926","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.926","00:00:03.928","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.928","00:00:03.929","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.929","00:00:03.930","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.930","00:00:03.932","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.932","00:00:03.933","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.933","00:00:03.935","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.935","00:00:03.936","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.936","00:00:03.937","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.937","00:00:03.938","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.938","00:00:03.940","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.940","00:00:03.941","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.941","00:00:03.942","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.942","00:00:03.944","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.944","00:00:03.945","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.945","00:00:03.946","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.946","00:00:03.948","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.948","00:00:03.949","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.949","00:00:03.950","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.950","00:00:03.952","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.952","00:00:03.953","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.953","00:00:03.954","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.954","00:00:03.955","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.955","00:00:03.957","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.957","00:00:03.958","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.958","00:00:03.959","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.959","00:00:03.961","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.961","00:00:03.962","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.962","00:00:03.963","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.963","00:00:03.965","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.965","00:00:03.966","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.966","00:00:03.967","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.967","00:00:03.969","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.969","00:00:03.970","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.970","00:00:03.971","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.971","00:00:03.973","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.973","00:00:03.974","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.974","00:00:03.975","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.975","00:00:03.977","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.977","00:00:03.978","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.978","00:00:03.979","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.979","00:00:03.981","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.981","00:00:03.982","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.982","00:00:03.983","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.983","00:00:03.985","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.985","00:00:03.986","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.986","00:00:03.988","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.988","00:00:03.989","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.989","00:00:03.990","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.990","00:00:03.991","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.991","00:00:03.993","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.993","00:00:03.994","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.994","00:00:03.995","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.995","00:00:03.997","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.997","00:00:03.998","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.998","00:00:03.999","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.999","00:00:04.001","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.001","00:00:04.002","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.002","00:00:04.003","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.003","00:00:04.005","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.005","00:00:04.006","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.006","00:00:04.007","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.007","00:00:04.009","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.009","00:00:04.010","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.010","00:00:04.011","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.011","00:00:04.013","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.013","00:00:04.014","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.014","00:00:04.015","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.015","00:00:04.017","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.017","00:00:04.018","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.018","00:00:04.019","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.019","00:00:04.021","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.021","00:00:04.022","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.022","00:00:04.023","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.023","00:00:04.025","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.025","00:00:04.026","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.026","00:00:04.027","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.027","00:00:04.028","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.028","00:00:04.030","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.030","00:00:04.031","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.031","00:00:04.032","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.032","00:00:04.034","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.034","00:00:04.035","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.035","00:00:04.037","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.037","00:00:04.038","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.038","00:00:04.039","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.039","00:00:04.040","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.040","00:00:04.042","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.042","00:00:04.043","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.043","00:00:04.044","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.044","00:00:04.046","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.046","00:00:04.047","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.047","00:00:04.048","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.048","00:00:04.050","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.050","00:00:04.051","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.051","00:00:04.053","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.053","00:00:04.054","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.054","00:00:04.055","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.055","00:00:04.057","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.057","00:00:04.058","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.058","00:00:04.059","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.059","00:00:04.060","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.060","00:00:04.062","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.062","00:00:04.063","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.066","00:00:04.067","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.067","00:00:04.068","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.068","00:00:04.069","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.069","00:00:04.071","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.071","00:00:04.072","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.072","00:00:04.073","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.073","00:00:04.075","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.075","00:00:04.076","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.076","00:00:04.077","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.077","00:00:04.079","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.079","00:00:04.080","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.080","00:00:04.081","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.081","00:00:04.083","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.083","00:00:04.084","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.084","00:00:04.085","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.085","00:00:04.087","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.087","00:00:04.088","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.088","00:00:04.090","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.090","00:00:04.091","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.091","00:00:04.092","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.092","00:00:04.094","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.094","00:00:04.095","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.095","00:00:04.097","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.097","00:00:04.098","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.098","00:00:04.099","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.099","00:00:04.101","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.101","00:00:04.102","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.102","00:00:04.103","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.103","00:00:04.105","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.105","00:00:04.106","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.106","00:00:04.108","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.108","00:00:04.109","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.109","00:00:04.110","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.110","00:00:04.112","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.112","00:00:04.113","0.001s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.113","00:00:04.115","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.735","00:00:03.736","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.736","00:00:03.737","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.737","00:00:03.738","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.738","00:00:03.739","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.739","00:00:03.741","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.741","00:00:03.742","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.742","00:00:03.743","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.743","00:00:03.745","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.745","00:00:03.746","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.746","00:00:03.747","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.747","00:00:03.748","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.748","00:00:03.750","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.750","00:00:03.751","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.751","00:00:03.752","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.752","00:00:03.753","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.753","00:00:03.755","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.755","00:00:03.756","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.756","00:00:03.757","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.757","00:00:03.759","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.759","00:00:03.760","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.760","00:00:03.761","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.761","00:00:03.762","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.762","00:00:03.764","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.764","00:00:03.765","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.765","00:00:03.766","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.766","00:00:03.767","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.767","00:00:03.769","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.769","00:00:03.770","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.770","00:00:03.771","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.771","00:00:03.773","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.773","00:00:03.774","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.774","00:00:03.775","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.775","00:00:03.776","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.776","00:00:03.778","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.778","00:00:03.779","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.779","00:00:03.780","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.780","00:00:03.781","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.781","00:00:03.783","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.783","00:00:03.784","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.784","00:00:03.785","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.785","00:00:03.786","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.786","00:00:03.788","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.788","00:00:03.789","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.789","00:00:03.790","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.790","00:00:03.792","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.792","00:00:03.793","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.793","00:00:03.794","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.794","00:00:03.795","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.795","00:00:03.797","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.797","00:00:03.798","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.798","00:00:03.799","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.799","00:00:03.800","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.800","00:00:03.802","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.802","00:00:03.803","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.803","00:00:03.805","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.805","00:00:03.806","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.806","00:00:03.808","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.813","00:00:03.814","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.814","00:00:03.815","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.815","00:00:03.816","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.816","00:00:03.818","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.818","00:00:03.819","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.819","00:00:03.820","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.820","00:00:03.822","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.822","00:00:03.823","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.823","00:00:03.824","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.824","00:00:03.825","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.825","00:00:03.827","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.827","00:00:03.828","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.828","00:00:03.829","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.829","00:00:03.831","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.831","00:00:03.832","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.832","00:00:03.833","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.833","00:00:03.834","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.834","00:00:03.836","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.836","00:00:03.837","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.837","00:00:03.838","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.838","00:00:03.840","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.840","00:00:03.841","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.841","00:00:03.842","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.842","00:00:03.843","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.843","00:00:03.845","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.845","00:00:03.846","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.846","00:00:03.847","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.847","00:00:03.849","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.849","00:00:03.850","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.850","00:00:03.851","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.851","00:00:03.852","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.852","00:00:03.854","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.854","00:00:03.855","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.855","00:00:03.856","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.856","00:00:03.857","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.857","00:00:03.859","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.859","00:00:03.860","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.860","00:00:03.861","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.861","00:00:03.863","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.863","00:00:03.864","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.864","00:00:03.865","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.865","00:00:03.866","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.866","00:00:03.868","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.874","00:00:03.876","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.876","00:00:03.876","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.876","00:00:03.878","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.878","00:00:03.879","0.002s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.879","00:00:03.880","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.880","00:00:03.881","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.881","00:00:03.883","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.883","00:00:03.884","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.884","00:00:03.885","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.885","00:00:03.886","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.886","00:00:03.888","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.888","00:00:03.889","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.889","00:00:03.890","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.890","00:00:03.892","0.001s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.892","00:00:03.893","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.814","00:00:03.815","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.815","00:00:03.816","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.816","00:00:03.818","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.818","00:00:03.819","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.819","00:00:03.820","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.820","00:00:03.821","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.821","00:00:03.823","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.823","00:00:03.824","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.824","00:00:03.825","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.825","00:00:03.827","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.827","00:00:03.828","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.828","00:00:03.829","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.829","00:00:03.831","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.831","00:00:03.832","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.832","00:00:03.833","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.833","00:00:03.834","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.834","00:00:03.836","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.836","00:00:03.837","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.837","00:00:03.838","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.838","00:00:03.839","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.839","00:00:03.841","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.841","00:00:03.842","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.842","00:00:03.843","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.843","00:00:03.845","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.845","00:00:03.846","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.846","00:00:03.847","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.847","00:00:03.848","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.848","00:00:03.850","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.850","00:00:03.851","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.851","00:00:03.852","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.852","00:00:03.854","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.854","00:00:03.855","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.855","00:00:03.856","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.856","00:00:03.857","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.857","00:00:03.859","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.859","00:00:03.860","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.860","00:00:03.861","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.861","00:00:03.863","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.863","00:00:03.864","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.864","00:00:03.865","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.865","00:00:03.866","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.866","00:00:03.868","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.874","00:00:03.876","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.876","00:00:03.876","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.876","00:00:03.877","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.877","00:00:03.879","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.879","00:00:03.880","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.880","00:00:03.881","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.881","00:00:03.883","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.883","00:00:03.884","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.884","00:00:03.885","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.885","00:00:03.886","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.886","00:00:03.888","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.888","00:00:03.889","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.889","00:00:03.890","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.890","00:00:03.892","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.892","00:00:03.893","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.893","00:00:03.894","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.894","00:00:03.896","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.896","00:00:03.897","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.897","00:00:03.898","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.898","00:00:03.899","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.899","00:00:03.901","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.905","00:00:03.906","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.906","00:00:03.907","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.907","00:00:03.908","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.908","00:00:03.909","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.909","00:00:03.911","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.911","00:00:03.912","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.912","00:00:03.913","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.913","00:00:03.914","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.914","00:00:03.916","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.916","00:00:03.917","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.917","00:00:03.918","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.918","00:00:03.920","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.920","00:00:03.921","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.921","00:00:03.922","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.922","00:00:03.924","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.924","00:00:03.925","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.925","00:00:03.926","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.926","00:00:03.928","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.928","00:00:03.929","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.929","00:00:03.930","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.930","00:00:03.932","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.932","00:00:03.933","0.002s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.933","00:00:03.934","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.934","00:00:03.936","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.936","00:00:03.937","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.937","00:00:03.938","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.938","00:00:03.940","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.940","00:00:03.941","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.941","00:00:03.942","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.942","00:00:03.943","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.943","00:00:03.945","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.945","00:00:03.946","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.946","00:00:03.948","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.948","00:00:03.949","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.949","00:00:03.950","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.950","00:00:03.951","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.951","00:00:03.953","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.953","00:00:03.954","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.954","00:00:03.955","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.955","00:00:03.957","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.957","00:00:03.958","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.958","00:00:03.959","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.959","00:00:03.961","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.961","00:00:03.962","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.962","00:00:03.963","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.963","00:00:03.965","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.965","00:00:03.966","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.966","00:00:03.967","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.967","00:00:03.969","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.969","00:00:03.970","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.970","00:00:03.971","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.971","00:00:03.973","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.973","00:00:03.974","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.974","00:00:03.975","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.975","00:00:03.977","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.977","00:00:03.978","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.978","00:00:03.979","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.979","00:00:03.981","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.981","00:00:03.982","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.982","00:00:03.983","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.983","00:00:03.985","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.985","00:00:03.986","0.002s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.986","00:00:03.987","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.987","00:00:03.989","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.989","00:00:03.990","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.990","00:00:03.991","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.991","00:00:03.993","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.993","00:00:03.994","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.994","00:00:03.995","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.995","00:00:03.997","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.997","00:00:03.998","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.998","00:00:03.999","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.999","00:00:04.001","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.001","00:00:04.002","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.002","00:00:04.003","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.003","00:00:04.005","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.005","00:00:04.006","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.006","00:00:04.007","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.007","00:00:04.009","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.009","00:00:04.010","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.010","00:00:04.011","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.011","00:00:04.013","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.013","00:00:04.014","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.014","00:00:04.015","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.015","00:00:04.017","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.017","00:00:04.018","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.018","00:00:04.019","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.019","00:00:04.021","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.021","00:00:04.022","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.022","00:00:04.023","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.023","00:00:04.025","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.025","00:00:04.026","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.026","00:00:04.027","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.027","00:00:04.028","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.028","00:00:04.030","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.030","00:00:04.031","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.031","00:00:04.032","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.032","00:00:04.034","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.034","00:00:04.035","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.035","00:00:04.036","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.036","00:00:04.038","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.038","00:00:04.039","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.039","00:00:04.040","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.040","00:00:04.042","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.042","00:00:04.043","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.043","00:00:04.044","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.044","00:00:04.046","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.046","00:00:04.047","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.047","00:00:04.048","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.048","00:00:04.050","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.050","00:00:04.051","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.051","00:00:04.052","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.052","00:00:04.054","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.054","00:00:04.055","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.055","00:00:04.056","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.056","00:00:04.058","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.058","00:00:04.059","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.059","00:00:04.060","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.060","00:00:04.062","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.062","00:00:04.063","0.002s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.066","00:00:04.067","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.067","00:00:04.068","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.068","00:00:04.069","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.069","00:00:04.070","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.070","00:00:04.072","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.072","00:00:04.073","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.073","00:00:04.074","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.074","00:00:04.076","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.076","00:00:04.077","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.077","00:00:04.078","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.078","00:00:04.080","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.080","00:00:04.081","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.081","00:00:04.083","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.083","00:00:04.084","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.084","00:00:04.085","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.085","00:00:04.087","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.087","00:00:04.088","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.088","00:00:04.089","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.089","00:00:04.091","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.091","00:00:04.092","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.092","00:00:04.094","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.094","00:00:04.095","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.095","00:00:04.096","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.096","00:00:04.098","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.098","00:00:04.099","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.099","00:00:04.101","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.101","00:00:04.102","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.102","00:00:04.103","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.103","00:00:04.105","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.105","00:00:04.106","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.106","00:00:04.108","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.108","00:00:04.109","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.109","00:00:04.110","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.110","00:00:04.112","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.112","00:00:04.113","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.113","00:00:04.115","0.002s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.115","00:00:04.116","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.116","00:00:04.117","0.001s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.117","00:00:04.118","0.001s"],[45,4,"00:00:03.862","00:00:03.879","00:00:03.876","00:00:03.876","0.001s"],[45,4,"00:00:03.862","00:00:03.879","00:00:03.876","00:00:03.878","0.001s"],[45,4,"00:00:03.862","00:00:03.879","00:00:03.878","00:00:03.879","0.002s"],[11,5,"00:00:03.895","00:00:03.911","00:00:03.906","00:00:03.907","0.001s"],[11,5,"00:00:03.895","00:00:03.911","00:00:03.907","00:00:03.908","0.001s"],[11,5,"00:00:03.895","00:00:03.911","00:00:03.908","00:00:03.909","0.001s"],[11,5,"00:00:03.895","00:00:03.911","00:00:03.909","00:00:03.911","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.067","00:00:04.068","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.068","00:00:04.069","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.069","00:00:04.070","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.070","00:00:04.072","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.072","00:00:04.073","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.073","00:00:04.075","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.075","00:00:04.076","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.076","00:00:04.077","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.077","00:00:04.079","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.079","00:00:04.080","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.080","00:00:04.081","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.081","00:00:04.083","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.083","00:00:04.084","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.084","00:00:04.085","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.085","00:00:04.087","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.087","00:00:04.088","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.088","00:00:04.090","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.090","00:00:04.091","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.091","00:00:04.092","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.092","00:00:04.094","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.094","00:00:04.095","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.095","00:00:04.097","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.097","00:00:04.098","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.098","00:00:04.099","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.099","00:00:04.101","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.101","00:00:04.102","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.102","00:00:04.103","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.103","00:00:04.105","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.105","00:00:04.106","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.106","00:00:04.108","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.108","00:00:04.109","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.109","00:00:04.110","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.110","00:00:04.112","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.112","00:00:04.113","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.113","00:00:04.115","0.002s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.115","00:00:04.116","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.116","00:00:04.117","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.117","00:00:04.118","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.118","00:00:04.119","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.119","00:00:04.120","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.120","00:00:04.121","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.121","00:00:04.122","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.122","00:00:04.123","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.123","00:00:04.124","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.124","00:00:04.125","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.125","00:00:04.126","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.126","00:00:04.127","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.127","00:00:04.128","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.128","00:00:04.129","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.129","00:00:04.130","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.134","00:00:04.135","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.135","00:00:04.136","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.136","00:00:04.137","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.137","00:00:04.138","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.138","00:00:04.139","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.139","00:00:04.140","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.140","00:00:04.141","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.141","00:00:04.142","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.142","00:00:04.143","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.143","00:00:04.144","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.144","00:00:04.145","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.145","00:00:04.146","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.146","00:00:04.148","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.148","00:00:04.149","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.149","00:00:04.150","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.150","00:00:04.151","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.151","00:00:04.152","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.152","00:00:04.153","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.153","00:00:04.154","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.154","00:00:04.155","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.155","00:00:04.156","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.156","00:00:04.157","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.157","00:00:04.158","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.158","00:00:04.159","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.159","00:00:04.160","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.160","00:00:04.161","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.161","00:00:04.163","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.163","00:00:04.164","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.164","00:00:04.165","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.165","00:00:04.166","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.166","00:00:04.167","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.167","00:00:04.168","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.168","00:00:04.169","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.169","00:00:04.170","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.170","00:00:04.171","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.171","00:00:04.172","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.172","00:00:04.173","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.173","00:00:04.174","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.174","00:00:04.175","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.175","00:00:04.176","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.176","00:00:04.177","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.177","00:00:04.178","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.178","00:00:04.180","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.180","00:00:04.181","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.181","00:00:04.182","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.182","00:00:04.183","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.183","00:00:04.184","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.184","00:00:04.185","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.185","00:00:04.186","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.186","00:00:04.187","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.187","00:00:04.188","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.188","00:00:04.189","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.189","00:00:04.190","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.190","00:00:04.191","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.191","00:00:04.193","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.193","00:00:04.194","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.194","00:00:04.195","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.195","00:00:04.196","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.196","00:00:04.197","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.197","00:00:04.198","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.198","00:00:04.199","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.199","00:00:04.200","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.200","00:00:04.201","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.201","00:00:04.202","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.202","00:00:04.203","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.203","00:00:04.204","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.204","00:00:04.205","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.205","00:00:04.206","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.206","00:00:04.207","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.207","00:00:04.208","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.208","00:00:04.209","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.209","00:00:04.210","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.210","00:00:04.211","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.211","00:00:04.212","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.212","00:00:04.213","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.213","00:00:04.215","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.215","00:00:04.216","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.216","00:00:04.217","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.217","00:00:04.218","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.218","00:00:04.219","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.219","00:00:04.220","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.220","00:00:04.221","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.221","00:00:04.222","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.222","00:00:04.223","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.223","00:00:04.224","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.224","00:00:04.226","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.230","00:00:04.232","0.002s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.232","00:00:04.232","0.000s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.232","00:00:04.233","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.233","00:00:04.235","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.235","00:00:04.236","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.236","00:00:04.238","0.001s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.238","00:00:04.239","0.002s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.135","00:00:04.136","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.136","00:00:04.137","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.137","00:00:04.138","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.138","00:00:04.139","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.139","00:00:04.140","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.140","00:00:04.141","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.141","00:00:04.142","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.142","00:00:04.143","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.143","00:00:04.144","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.144","00:00:04.145","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.145","00:00:04.147","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.147","00:00:04.148","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.148","00:00:04.149","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.149","00:00:04.150","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.150","00:00:04.151","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.151","00:00:04.152","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.152","00:00:04.153","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.153","00:00:04.154","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.154","00:00:04.155","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.155","00:00:04.156","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.156","00:00:04.157","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.157","00:00:04.158","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.158","00:00:04.159","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.159","00:00:04.160","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.160","00:00:04.161","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.161","00:00:04.163","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.163","00:00:04.164","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.164","00:00:04.165","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.165","00:00:04.166","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.166","00:00:04.167","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.167","00:00:04.168","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.168","00:00:04.169","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.169","00:00:04.170","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.170","00:00:04.171","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.171","00:00:04.172","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.172","00:00:04.173","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.173","00:00:04.174","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.174","00:00:04.175","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.175","00:00:04.176","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.176","00:00:04.177","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.177","00:00:04.178","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.178","00:00:04.180","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.180","00:00:04.181","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.181","00:00:04.182","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.182","00:00:04.183","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.183","00:00:04.184","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.184","00:00:04.185","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.185","00:00:04.186","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.186","00:00:04.187","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.187","00:00:04.188","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.188","00:00:04.189","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.189","00:00:04.190","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.190","00:00:04.192","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.192","00:00:04.193","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.193","00:00:04.194","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.194","00:00:04.195","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.195","00:00:04.196","0.001s"],[80,59,"00:00:04.124","00:00:04.197","00:00:04.196","00:00:04.197","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.232","00:00:04.232","0.000s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.232","00:00:04.233","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.233","00:00:04.235","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.235","00:00:04.236","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.236","00:00:04.237","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.237","00:00:04.239","0.002s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.239","00:00:04.240","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.240","00:00:04.242","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.242","00:00:04.243","0.001s"],[768,11,"00:00:04.216","00:00:04.245","00:00:04.243","00:00:04.245","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.232","00:00:04.232","0.000s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.232","00:00:04.233","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.233","00:00:04.235","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.235","00:00:04.236","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.236","00:00:04.238","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.238","00:00:04.239","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.239","00:00:04.240","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.240","00:00:04.242","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.242","00:00:04.243","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.243","00:00:04.245","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.245","00:00:04.246","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.246","00:00:04.247","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.247","00:00:04.248","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.248","00:00:04.249","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.249","00:00:04.250","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.250","00:00:04.251","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.251","00:00:04.252","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.252","00:00:04.253","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.253","00:00:04.254","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.254","00:00:04.255","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.255","00:00:04.257","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.257","00:00:04.258","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.258","00:00:04.259","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.259","00:00:04.260","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.260","00:00:04.261","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.261","00:00:04.262","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.262","00:00:04.263","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.263","00:00:04.264","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.264","00:00:04.266","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.266","00:00:04.267","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.267","00:00:04.268","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.268","00:00:04.269","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.269","00:00:04.270","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.270","00:00:04.271","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.271","00:00:04.272","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.272","00:00:04.273","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.273","00:00:04.275","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.275","00:00:04.276","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.276","00:00:04.277","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.277","00:00:04.278","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.278","00:00:04.279","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.279","00:00:04.280","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.280","00:00:04.281","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.281","00:00:04.282","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.282","00:00:04.284","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.284","00:00:04.285","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.285","00:00:04.286","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.286","00:00:04.287","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.287","00:00:04.288","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.288","00:00:04.289","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.289","00:00:04.290","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.290","00:00:04.292","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.292","00:00:04.293","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.293","00:00:04.294","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.294","00:00:04.295","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.295","00:00:04.296","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.296","00:00:04.297","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.297","00:00:04.298","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.298","00:00:04.299","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.299","00:00:04.300","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.300","00:00:04.302","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.302","00:00:04.303","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.303","00:00:04.304","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.304","00:00:04.305","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.305","00:00:04.306","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.306","00:00:04.307","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.307","00:00:04.308","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.308","00:00:04.310","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.310","00:00:04.311","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.311","00:00:04.312","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.312","00:00:04.313","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.313","00:00:04.314","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.314","00:00:04.316","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.316","00:00:04.317","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.317","00:00:04.318","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.318","00:00:04.319","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.319","00:00:04.320","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.320","00:00:04.321","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.321","00:00:04.323","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.323","00:00:04.324","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.324","00:00:04.325","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.325","00:00:04.326","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.326","00:00:04.327","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.332","00:00:04.332","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.332","00:00:04.333","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.333","00:00:04.334","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.334","00:00:04.336","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.336","00:00:04.337","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.337","00:00:04.338","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.338","00:00:04.339","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.339","00:00:04.341","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.341","00:00:04.342","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.342","00:00:04.343","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.343","00:00:04.344","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.344","00:00:04.345","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.345","00:00:04.347","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.347","00:00:04.348","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.348","00:00:04.349","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.349","00:00:04.350","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.350","00:00:04.352","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.352","00:00:04.353","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.353","00:00:04.354","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.354","00:00:04.355","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.355","00:00:04.357","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.357","00:00:04.358","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.358","00:00:04.359","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.359","00:00:04.360","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.360","00:00:04.361","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.361","00:00:04.363","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.363","00:00:04.364","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.364","00:00:04.365","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.365","00:00:04.366","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.366","00:00:04.367","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.367","00:00:04.369","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.369","00:00:04.370","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.370","00:00:04.371","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.371","00:00:04.372","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.372","00:00:04.374","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.374","00:00:04.375","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.375","00:00:04.376","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.376","00:00:04.377","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.377","00:00:04.378","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.378","00:00:04.380","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.380","00:00:04.381","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.381","00:00:04.382","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.382","00:00:04.383","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.383","00:00:04.384","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.384","00:00:04.386","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.386","00:00:04.387","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.387","00:00:04.388","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.388","00:00:04.389","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.389","00:00:04.391","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.391","00:00:04.392","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.392","00:00:04.393","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.393","00:00:04.394","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.394","00:00:04.396","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.396","00:00:04.397","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.397","00:00:04.398","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.398","00:00:04.399","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.399","00:00:04.400","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.400","00:00:04.401","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.401","00:00:04.403","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.403","00:00:04.404","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.404","00:00:04.405","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.405","00:00:04.406","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.406","00:00:04.408","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.408","00:00:04.409","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.409","00:00:04.410","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.410","00:00:04.411","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.411","00:00:04.412","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.412","00:00:04.414","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.414","00:00:04.415","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.415","00:00:04.417","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.417","00:00:04.418","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.418","00:00:04.419","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.419","00:00:04.421","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.421","00:00:04.422","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.422","00:00:04.424","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.424","00:00:04.425","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.425","00:00:04.426","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.426","00:00:04.427","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.427","00:00:04.428","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.428","00:00:04.429","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.429","00:00:04.431","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.431","00:00:04.433","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.440","00:00:04.442","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.442","00:00:04.443","0.000s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.443","00:00:04.444","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.444","00:00:04.445","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.445","00:00:04.447","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.447","00:00:04.448","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.448","00:00:04.449","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.449","00:00:04.450","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.450","00:00:04.452","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.452","00:00:04.453","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.453","00:00:04.454","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.454","00:00:04.455","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.455","00:00:04.457","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.457","00:00:04.458","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.458","00:00:04.459","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.459","00:00:04.461","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.461","00:00:04.462","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.462","00:00:04.463","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.463","00:00:04.465","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.465","00:00:04.466","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.466","00:00:04.467","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.467","00:00:04.468","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.468","00:00:04.470","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.470","00:00:04.471","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.471","00:00:04.472","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.472","00:00:04.474","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.474","00:00:04.475","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.475","00:00:04.476","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.476","00:00:04.478","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.478","00:00:04.479","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.479","00:00:04.480","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.480","00:00:04.482","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.482","00:00:04.483","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.483","00:00:04.484","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.484","00:00:04.486","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.486","00:00:04.487","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.487","00:00:04.488","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.488","00:00:04.490","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.490","00:00:04.491","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.491","00:00:04.492","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.492","00:00:04.493","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.493","00:00:04.495","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.495","00:00:04.496","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.496","00:00:04.497","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.497","00:00:04.499","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.499","00:00:04.500","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.500","00:00:04.501","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.501","00:00:04.503","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.503","00:00:04.504","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.504","00:00:04.505","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.505","00:00:04.507","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.507","00:00:04.508","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.508","00:00:04.509","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.509","00:00:04.511","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.511","00:00:04.512","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.512","00:00:04.513","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.513","00:00:04.515","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.515","00:00:04.516","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.516","00:00:04.517","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.517","00:00:04.519","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.519","00:00:04.520","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.520","00:00:04.521","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.521","00:00:04.523","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.523","00:00:04.524","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.524","00:00:04.525","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.525","00:00:04.527","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.527","00:00:04.528","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.528","00:00:04.530","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.530","00:00:04.531","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.531","00:00:04.532","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.532","00:00:04.534","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.534","00:00:04.535","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.540","00:00:04.541","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.541","00:00:04.542","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.542","00:00:04.543","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.543","00:00:04.544","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.544","00:00:04.546","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.546","00:00:04.547","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.547","00:00:04.548","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.548","00:00:04.550","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.550","00:00:04.551","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.551","00:00:04.553","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.553","00:00:04.554","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.554","00:00:04.555","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.555","00:00:04.557","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.557","00:00:04.558","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.558","00:00:04.559","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.559","00:00:04.561","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.561","00:00:04.562","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.562","00:00:04.563","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.563","00:00:04.564","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.564","00:00:04.566","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.566","00:00:04.567","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.567","00:00:04.568","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.568","00:00:04.570","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.570","00:00:04.571","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.571","00:00:04.572","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.572","00:00:04.574","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.574","00:00:04.575","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.575","00:00:04.576","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.576","00:00:04.578","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.578","00:00:04.579","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.579","00:00:04.581","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.581","00:00:04.582","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.582","00:00:04.583","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.583","00:00:04.585","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.585","00:00:04.586","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.586","00:00:04.587","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.587","00:00:04.588","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.588","00:00:04.590","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.590","00:00:04.591","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.591","00:00:04.592","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.592","00:00:04.594","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.594","00:00:04.595","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.595","00:00:04.596","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.596","00:00:04.598","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.598","00:00:04.599","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.599","00:00:04.600","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.600","00:00:04.602","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.602","00:00:04.603","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.603","00:00:04.604","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.604","00:00:04.606","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.606","00:00:04.607","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.607","00:00:04.609","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.609","00:00:04.610","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.610","00:00:04.611","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.611","00:00:04.613","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.613","00:00:04.614","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.614","00:00:04.615","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.615","00:00:04.616","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.616","00:00:04.618","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.618","00:00:04.619","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.619","00:00:04.620","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.620","00:00:04.622","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.622","00:00:04.623","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.623","00:00:04.624","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.624","00:00:04.626","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.626","00:00:04.627","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.627","00:00:04.628","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.628","00:00:04.630","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.630","00:00:04.631","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.631","00:00:04.633","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.633","00:00:04.634","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.634","00:00:04.635","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.635","00:00:04.637","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.637","00:00:04.638","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.638","00:00:04.639","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.639","00:00:04.640","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.640","00:00:04.642","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.642","00:00:04.643","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.643","00:00:04.644","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.644","00:00:04.646","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.646","00:00:04.647","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.647","00:00:04.649","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.649","00:00:04.650","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.650","00:00:04.651","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.651","00:00:04.653","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.653","00:00:04.654","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.654","00:00:04.656","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.656","00:00:04.657","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.657","00:00:04.658","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.658","00:00:04.660","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.660","00:00:04.661","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.661","00:00:04.662","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.662","00:00:04.664","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.664","00:00:04.665","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.665","00:00:04.667","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.667","00:00:04.668","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.668","00:00:04.669","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.669","00:00:04.671","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.671","00:00:04.672","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.672","00:00:04.674","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.674","00:00:04.675","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.675","00:00:04.676","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.676","00:00:04.678","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.678","00:00:04.679","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.679","00:00:04.681","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.681","00:00:04.682","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.682","00:00:04.683","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.683","00:00:04.685","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.685","00:00:04.686","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.686","00:00:04.687","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.687","00:00:04.689","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.689","00:00:04.690","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.690","00:00:04.692","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.692","00:00:04.693","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.693","00:00:04.694","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.694","00:00:04.696","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.696","00:00:04.697","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.697","00:00:04.698","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.698","00:00:04.700","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.700","00:00:04.701","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.701","00:00:04.703","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.703","00:00:04.704","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.704","00:00:04.705","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.705","00:00:04.707","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.707","00:00:04.708","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.708","00:00:04.710","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.710","00:00:04.711","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.711","00:00:04.712","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.712","00:00:04.714","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.714","00:00:04.715","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.715","00:00:04.716","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.716","00:00:04.718","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.718","00:00:04.719","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.719","00:00:04.721","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.721","00:00:04.722","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.722","00:00:04.723","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.723","00:00:04.725","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.725","00:00:04.726","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.726","00:00:04.727","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.727","00:00:04.729","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.729","00:00:04.730","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.730","00:00:04.731","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.731","00:00:04.733","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.733","00:00:04.734","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.734","00:00:04.736","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.736","00:00:04.737","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.737","00:00:04.738","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.738","00:00:04.740","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.740","00:00:04.741","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.741","00:00:04.742","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.742","00:00:04.744","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.744","00:00:04.745","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.745","00:00:04.746","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.746","00:00:04.748","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.748","00:00:04.749","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.749","00:00:04.751","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.751","00:00:04.752","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.752","00:00:04.753","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.753","00:00:04.755","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.755","00:00:04.756","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.756","00:00:04.758","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.758","00:00:04.759","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.759","00:00:04.760","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.760","00:00:04.762","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.762","00:00:04.763","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.763","00:00:04.765","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.765","00:00:04.766","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.766","00:00:04.768","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.768","00:00:04.769","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.769","00:00:04.771","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.771","00:00:04.772","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.772","00:00:04.773","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.773","00:00:04.775","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.775","00:00:04.776","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.776","00:00:04.778","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.778","00:00:04.780","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.780","00:00:04.781","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.781","00:00:04.782","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.782","00:00:04.783","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.783","00:00:04.784","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.784","00:00:04.786","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.786","00:00:04.787","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.787","00:00:04.789","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.789","00:00:04.790","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.790","00:00:04.791","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.791","00:00:04.793","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.793","00:00:04.794","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.794","00:00:04.795","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.795","00:00:04.797","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.797","00:00:04.798","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.798","00:00:04.800","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.800","00:00:04.801","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.801","00:00:04.802","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.802","00:00:04.804","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.804","00:00:04.805","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.805","00:00:04.807","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.807","00:00:04.808","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.808","00:00:04.809","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.809","00:00:04.811","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.811","00:00:04.812","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.812","00:00:04.814","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.814","00:00:04.815","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.815","00:00:04.816","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.816","00:00:04.817","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.817","00:00:04.819","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.819","00:00:04.820","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.820","00:00:04.822","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.822","00:00:04.823","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.823","00:00:04.824","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.824","00:00:04.826","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.826","00:00:04.827","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.827","00:00:04.829","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.829","00:00:04.830","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.830","00:00:04.831","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.831","00:00:04.833","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.833","00:00:04.834","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.834","00:00:04.836","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.836","00:00:04.837","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.837","00:00:04.839","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.839","00:00:04.840","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.840","00:00:04.841","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.841","00:00:04.843","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.843","00:00:04.844","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.844","00:00:04.846","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.846","00:00:04.847","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.847","00:00:04.849","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.849","00:00:04.850","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.858","00:00:04.859","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.859","00:00:04.860","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.860","00:00:04.862","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.862","00:00:04.863","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.863","00:00:04.865","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.865","00:00:04.866","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.866","00:00:04.867","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.867","00:00:04.869","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.869","00:00:04.870","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.870","00:00:04.871","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.871","00:00:04.873","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.873","00:00:04.874","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.874","00:00:04.876","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.876","00:00:04.877","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.877","00:00:04.879","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.879","00:00:04.880","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.880","00:00:04.881","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.881","00:00:04.883","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.883","00:00:04.884","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.884","00:00:04.886","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.886","00:00:04.887","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.887","00:00:04.889","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.889","00:00:04.890","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.890","00:00:04.891","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.891","00:00:04.893","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.893","00:00:04.894","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.894","00:00:04.896","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.896","00:00:04.897","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.897","00:00:04.899","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.899","00:00:04.900","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.900","00:00:04.901","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.901","00:00:04.903","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.903","00:00:04.904","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.904","00:00:04.906","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.906","00:00:04.907","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.907","00:00:04.908","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.908","00:00:04.910","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.910","00:00:04.911","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.911","00:00:04.913","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.913","00:00:04.914","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.914","00:00:04.916","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.916","00:00:04.917","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.917","00:00:04.919","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.919","00:00:04.920","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.920","00:00:04.921","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.921","00:00:04.923","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.923","00:00:04.924","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.924","00:00:04.926","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.926","00:00:04.927","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.927","00:00:04.928","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.928","00:00:04.930","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.930","00:00:04.931","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.931","00:00:04.933","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.933","00:00:04.934","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.934","00:00:04.936","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.936","00:00:04.937","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.937","00:00:04.938","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.938","00:00:04.940","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.940","00:00:04.941","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.941","00:00:04.943","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.943","00:00:04.944","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.944","00:00:04.946","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.946","00:00:04.947","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.947","00:00:04.948","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.948","00:00:04.950","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.950","00:00:04.951","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.951","00:00:04.953","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.953","00:00:04.954","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.954","00:00:04.956","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.956","00:00:04.957","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.957","00:00:04.958","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.958","00:00:04.960","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.960","00:00:04.961","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.961","00:00:04.963","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.963","00:00:04.964","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.964","00:00:04.965","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.965","00:00:04.967","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.967","00:00:04.969","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.969","00:00:04.970","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.970","00:00:04.971","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.971","00:00:04.973","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.980","00:00:04.981","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.981","00:00:04.982","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.982","00:00:04.983","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.983","00:00:04.985","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.985","00:00:04.986","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.986","00:00:04.988","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.988","00:00:04.989","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.989","00:00:04.990","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.990","00:00:04.992","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.992","00:00:04.993","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.993","00:00:04.995","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.995","00:00:04.996","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.996","00:00:04.998","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.998","00:00:04.999","0.001s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.999","00:00:05.001","0.002s"],[317,567,"00:00:04.217","00:00:05.002","00:00:05.001","00:00:05.002","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.332","00:00:04.333","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.333","00:00:04.334","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.334","00:00:04.336","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.336","00:00:04.337","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.337","00:00:04.338","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.338","00:00:04.339","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.339","00:00:04.341","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.341","00:00:04.342","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.342","00:00:04.343","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.343","00:00:04.344","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.344","00:00:04.345","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.345","00:00:04.347","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.347","00:00:04.348","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.348","00:00:04.349","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.349","00:00:04.350","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.350","00:00:04.352","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.352","00:00:04.353","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.353","00:00:04.354","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.354","00:00:04.355","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.355","00:00:04.357","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.357","00:00:04.358","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.358","00:00:04.359","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.359","00:00:04.360","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.360","00:00:04.361","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.361","00:00:04.363","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.363","00:00:04.364","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.364","00:00:04.365","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.365","00:00:04.366","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.366","00:00:04.368","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.368","00:00:04.369","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.369","00:00:04.370","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.370","00:00:04.371","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.371","00:00:04.372","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.372","00:00:04.374","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.374","00:00:04.375","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.375","00:00:04.376","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.376","00:00:04.377","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.377","00:00:04.378","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.378","00:00:04.380","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.380","00:00:04.381","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.381","00:00:04.382","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.382","00:00:04.383","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.383","00:00:04.384","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.384","00:00:04.386","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.386","00:00:04.387","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.387","00:00:04.388","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.388","00:00:04.389","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.389","00:00:04.391","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.391","00:00:04.392","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.392","00:00:04.393","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.393","00:00:04.394","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.394","00:00:04.396","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.396","00:00:04.397","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.397","00:00:04.398","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.398","00:00:04.399","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.399","00:00:04.400","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.400","00:00:04.402","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.402","00:00:04.403","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.403","00:00:04.404","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.404","00:00:04.405","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.405","00:00:04.406","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.406","00:00:04.408","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.408","00:00:04.409","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.409","00:00:04.410","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.410","00:00:04.411","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.411","00:00:04.413","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.413","00:00:04.414","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.414","00:00:04.415","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.415","00:00:04.417","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.417","00:00:04.418","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.418","00:00:04.419","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.419","00:00:04.421","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.421","00:00:04.422","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.422","00:00:04.424","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.424","00:00:04.425","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.425","00:00:04.426","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.426","00:00:04.427","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.427","00:00:04.428","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.428","00:00:04.429","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.429","00:00:04.431","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.431","00:00:04.433","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.441","00:00:04.442","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.442","00:00:04.443","0.000s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.443","00:00:04.444","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.444","00:00:04.445","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.445","00:00:04.447","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.447","00:00:04.448","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.448","00:00:04.449","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.449","00:00:04.450","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.450","00:00:04.452","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.452","00:00:04.453","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.453","00:00:04.454","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.454","00:00:04.455","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.455","00:00:04.457","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.457","00:00:04.458","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.458","00:00:04.459","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.459","00:00:04.461","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.461","00:00:04.462","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.462","00:00:04.463","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.463","00:00:04.465","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.465","00:00:04.466","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.466","00:00:04.467","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.467","00:00:04.468","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.468","00:00:04.470","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.470","00:00:04.471","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.471","00:00:04.472","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.472","00:00:04.474","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.474","00:00:04.475","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.475","00:00:04.476","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.476","00:00:04.478","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.478","00:00:04.479","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.479","00:00:04.480","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.480","00:00:04.482","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.482","00:00:04.483","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.483","00:00:04.484","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.484","00:00:04.486","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.486","00:00:04.487","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.487","00:00:04.488","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.488","00:00:04.490","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.490","00:00:04.491","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.491","00:00:04.492","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.492","00:00:04.493","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.493","00:00:04.495","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.495","00:00:04.496","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.496","00:00:04.497","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.497","00:00:04.499","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.499","00:00:04.500","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.500","00:00:04.501","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.501","00:00:04.503","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.503","00:00:04.504","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.504","00:00:04.505","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.505","00:00:04.507","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.507","00:00:04.508","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.508","00:00:04.509","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.509","00:00:04.511","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.511","00:00:04.512","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.512","00:00:04.513","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.513","00:00:04.515","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.515","00:00:04.516","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.516","00:00:04.517","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.517","00:00:04.519","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.519","00:00:04.520","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.520","00:00:04.521","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.521","00:00:04.523","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.523","00:00:04.524","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.524","00:00:04.525","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.525","00:00:04.527","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.527","00:00:04.528","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.528","00:00:04.530","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.530","00:00:04.531","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.531","00:00:04.532","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.532","00:00:04.534","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.534","00:00:04.535","0.002s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.540","00:00:04.541","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.541","00:00:04.542","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.542","00:00:04.543","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.543","00:00:04.544","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.544","00:00:04.546","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.546","00:00:04.547","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.547","00:00:04.549","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.549","00:00:04.550","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.550","00:00:04.551","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.551","00:00:04.553","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.553","00:00:04.554","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.554","00:00:04.555","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.555","00:00:04.557","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.557","00:00:04.558","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.558","00:00:04.559","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.559","00:00:04.561","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.561","00:00:04.562","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.562","00:00:04.563","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.563","00:00:04.564","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.564","00:00:04.566","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.566","00:00:04.567","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.567","00:00:04.569","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.569","00:00:04.570","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.570","00:00:04.571","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.571","00:00:04.573","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.573","00:00:04.574","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.574","00:00:04.575","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.575","00:00:04.577","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.577","00:00:04.578","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.578","00:00:04.579","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.579","00:00:04.581","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.581","00:00:04.582","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.582","00:00:04.583","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.583","00:00:04.585","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.585","00:00:04.586","0.001s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.586","00:00:04.587","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.442","00:00:04.443","0.000s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.443","00:00:04.444","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.444","00:00:04.445","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.445","00:00:04.446","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.446","00:00:04.448","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.448","00:00:04.449","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.449","00:00:04.450","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.450","00:00:04.451","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.451","00:00:04.453","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.453","00:00:04.454","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.454","00:00:04.455","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.455","00:00:04.457","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.457","00:00:04.458","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.458","00:00:04.459","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.459","00:00:04.461","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.461","00:00:04.462","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.462","00:00:04.463","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.463","00:00:04.465","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.465","00:00:04.466","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.466","00:00:04.467","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.467","00:00:04.468","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.468","00:00:04.469","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.469","00:00:04.471","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.471","00:00:04.472","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.472","00:00:04.473","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.473","00:00:04.475","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.475","00:00:04.476","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.476","00:00:04.477","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.477","00:00:04.479","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.479","00:00:04.480","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.480","00:00:04.481","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.481","00:00:04.483","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.483","00:00:04.484","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.484","00:00:04.486","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.486","00:00:04.487","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.487","00:00:04.488","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.488","00:00:04.490","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.490","00:00:04.491","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.491","00:00:04.492","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.492","00:00:04.493","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.493","00:00:04.495","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.495","00:00:04.496","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.496","00:00:04.497","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.497","00:00:04.499","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.499","00:00:04.500","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.500","00:00:04.501","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.501","00:00:04.503","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.503","00:00:04.504","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.504","00:00:04.505","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.505","00:00:04.507","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.507","00:00:04.508","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.508","00:00:04.509","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.509","00:00:04.511","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.511","00:00:04.512","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.512","00:00:04.513","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.513","00:00:04.515","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.515","00:00:04.516","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.516","00:00:04.517","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.517","00:00:04.519","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.519","00:00:04.520","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.520","00:00:04.521","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.521","00:00:04.523","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.523","00:00:04.524","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.524","00:00:04.525","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.525","00:00:04.527","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.527","00:00:04.528","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.528","00:00:04.529","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.529","00:00:04.531","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.531","00:00:04.532","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.532","00:00:04.533","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.533","00:00:04.535","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.540","00:00:04.541","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.541","00:00:04.542","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.542","00:00:04.543","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.543","00:00:04.544","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.544","00:00:04.546","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.546","00:00:04.547","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.547","00:00:04.548","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.548","00:00:04.550","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.550","00:00:04.551","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.551","00:00:04.553","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.553","00:00:04.554","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.554","00:00:04.555","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.555","00:00:04.557","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.557","00:00:04.558","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.558","00:00:04.559","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.559","00:00:04.561","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.561","00:00:04.562","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.562","00:00:04.563","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.563","00:00:04.564","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.564","00:00:04.566","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.566","00:00:04.567","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.567","00:00:04.568","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.568","00:00:04.570","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.570","00:00:04.571","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.571","00:00:04.572","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.572","00:00:04.574","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.574","00:00:04.575","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.575","00:00:04.576","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.576","00:00:04.578","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.578","00:00:04.579","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.579","00:00:04.581","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.581","00:00:04.582","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.582","00:00:04.583","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.583","00:00:04.585","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.585","00:00:04.586","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.586","00:00:04.587","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.587","00:00:04.588","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.588","00:00:04.590","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.590","00:00:04.591","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.591","00:00:04.592","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.592","00:00:04.594","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.594","00:00:04.595","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.595","00:00:04.596","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.596","00:00:04.598","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.598","00:00:04.599","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.599","00:00:04.600","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.600","00:00:04.602","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.602","00:00:04.603","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.603","00:00:04.604","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.604","00:00:04.606","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.606","00:00:04.607","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.607","00:00:04.609","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.609","00:00:04.610","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.610","00:00:04.611","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.611","00:00:04.613","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.613","00:00:04.614","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.614","00:00:04.615","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.615","00:00:04.616","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.616","00:00:04.618","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.618","00:00:04.619","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.619","00:00:04.620","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.620","00:00:04.622","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.622","00:00:04.623","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.623","00:00:04.624","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.624","00:00:04.626","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.626","00:00:04.627","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.627","00:00:04.628","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.628","00:00:04.630","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.630","00:00:04.631","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.631","00:00:04.632","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.632","00:00:04.634","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.634","00:00:04.635","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.635","00:00:04.636","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.636","00:00:04.638","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.638","00:00:04.639","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.639","00:00:04.640","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.640","00:00:04.642","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.642","00:00:04.643","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.643","00:00:04.644","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.644","00:00:04.646","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.646","00:00:04.647","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.647","00:00:04.649","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.649","00:00:04.650","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.650","00:00:04.651","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.651","00:00:04.653","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.653","00:00:04.654","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.654","00:00:04.656","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.656","00:00:04.657","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.657","00:00:04.658","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.658","00:00:04.660","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.660","00:00:04.661","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.661","00:00:04.662","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.662","00:00:04.664","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.664","00:00:04.665","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.665","00:00:04.666","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.666","00:00:04.668","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.668","00:00:04.669","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.669","00:00:04.671","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.671","00:00:04.672","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.672","00:00:04.674","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.674","00:00:04.675","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.675","00:00:04.676","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.676","00:00:04.678","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.678","00:00:04.679","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.679","00:00:04.681","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.681","00:00:04.682","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.682","00:00:04.683","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.683","00:00:04.684","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.684","00:00:04.686","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.686","00:00:04.687","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.687","00:00:04.689","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.689","00:00:04.690","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.690","00:00:04.691","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.691","00:00:04.693","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.693","00:00:04.694","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.694","00:00:04.696","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.696","00:00:04.697","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.697","00:00:04.698","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.698","00:00:04.700","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.700","00:00:04.701","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.701","00:00:04.702","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.702","00:00:04.704","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.704","00:00:04.705","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.705","00:00:04.707","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.707","00:00:04.708","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.708","00:00:04.710","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.710","00:00:04.711","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.711","00:00:04.712","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.712","00:00:04.714","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.714","00:00:04.715","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.715","00:00:04.716","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.716","00:00:04.718","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.718","00:00:04.719","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.719","00:00:04.720","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.720","00:00:04.722","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.722","00:00:04.723","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.723","00:00:04.725","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.725","00:00:04.726","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.726","00:00:04.727","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.727","00:00:04.729","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.729","00:00:04.730","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.730","00:00:04.731","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.731","00:00:04.733","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.733","00:00:04.734","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.734","00:00:04.735","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.735","00:00:04.737","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.737","00:00:04.738","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.738","00:00:04.740","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.740","00:00:04.741","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.741","00:00:04.742","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.742","00:00:04.744","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.744","00:00:04.745","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.745","00:00:04.746","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.746","00:00:04.748","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.748","00:00:04.749","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.749","00:00:04.751","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.751","00:00:04.752","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.752","00:00:04.753","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.753","00:00:04.755","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.755","00:00:04.756","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.756","00:00:04.758","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.758","00:00:04.759","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.759","00:00:04.760","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.760","00:00:04.762","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.762","00:00:04.763","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.763","00:00:04.765","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.765","00:00:04.766","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.766","00:00:04.768","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.768","00:00:04.769","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.769","00:00:04.771","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.771","00:00:04.772","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.772","00:00:04.773","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.773","00:00:04.775","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.775","00:00:04.776","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.776","00:00:04.778","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.778","00:00:04.779","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.779","00:00:04.781","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.781","00:00:04.782","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.782","00:00:04.783","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.783","00:00:04.784","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.784","00:00:04.786","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.786","00:00:04.787","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.787","00:00:04.789","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.789","00:00:04.790","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.790","00:00:04.791","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.791","00:00:04.792","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.792","00:00:04.794","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.794","00:00:04.795","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.795","00:00:04.797","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.797","00:00:04.798","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.798","00:00:04.799","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.799","00:00:04.801","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.801","00:00:04.802","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.802","00:00:04.804","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.804","00:00:04.805","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.805","00:00:04.807","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.807","00:00:04.808","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.808","00:00:04.809","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.809","00:00:04.811","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.811","00:00:04.812","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.812","00:00:04.813","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.813","00:00:04.814","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.814","00:00:04.816","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.816","00:00:04.817","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.817","00:00:04.819","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.819","00:00:04.820","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.820","00:00:04.822","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.822","00:00:04.823","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.823","00:00:04.824","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.824","00:00:04.826","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.826","00:00:04.827","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.827","00:00:04.829","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.829","00:00:04.830","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.830","00:00:04.831","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.831","00:00:04.833","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.833","00:00:04.834","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.834","00:00:04.836","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.836","00:00:04.837","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.837","00:00:04.839","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.839","00:00:04.840","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.840","00:00:04.841","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.841","00:00:04.843","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.843","00:00:04.844","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.844","00:00:04.846","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.846","00:00:04.847","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.847","00:00:04.849","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.849","00:00:04.850","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.858","00:00:04.859","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.859","00:00:04.860","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.860","00:00:04.862","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.862","00:00:04.863","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.863","00:00:04.865","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.865","00:00:04.866","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.866","00:00:04.867","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.867","00:00:04.869","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.869","00:00:04.870","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.870","00:00:04.871","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.871","00:00:04.873","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.873","00:00:04.874","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.874","00:00:04.876","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.876","00:00:04.877","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.877","00:00:04.879","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.879","00:00:04.880","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.880","00:00:04.881","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.881","00:00:04.883","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.883","00:00:04.884","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.884","00:00:04.886","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.886","00:00:04.887","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.887","00:00:04.889","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.889","00:00:04.890","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.890","00:00:04.891","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.891","00:00:04.893","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.893","00:00:04.894","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.894","00:00:04.896","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.896","00:00:04.897","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.897","00:00:04.899","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.899","00:00:04.900","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.900","00:00:04.901","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.901","00:00:04.903","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.903","00:00:04.904","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.904","00:00:04.906","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.906","00:00:04.907","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.907","00:00:04.908","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.908","00:00:04.910","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.910","00:00:04.911","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.911","00:00:04.913","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.913","00:00:04.914","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.914","00:00:04.915","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.915","00:00:04.917","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.917","00:00:04.918","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.918","00:00:04.920","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.920","00:00:04.921","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.921","00:00:04.923","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.923","00:00:04.924","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.924","00:00:04.925","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.925","00:00:04.927","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.927","00:00:04.928","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.928","00:00:04.930","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.930","00:00:04.931","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.931","00:00:04.933","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.933","00:00:04.934","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.934","00:00:04.935","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.935","00:00:04.937","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.937","00:00:04.938","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.938","00:00:04.940","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.940","00:00:04.941","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.941","00:00:04.943","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.943","00:00:04.944","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.944","00:00:04.945","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.945","00:00:04.947","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.947","00:00:04.948","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.948","00:00:04.950","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.950","00:00:04.951","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.951","00:00:04.953","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.953","00:00:04.954","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.954","00:00:04.955","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.955","00:00:04.957","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.957","00:00:04.958","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.958","00:00:04.960","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.960","00:00:04.961","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.961","00:00:04.963","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.963","00:00:04.964","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.964","00:00:04.965","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.965","00:00:04.967","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.967","00:00:04.968","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.968","00:00:04.970","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.970","00:00:04.971","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.971","00:00:04.973","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.980","00:00:04.981","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.981","00:00:04.982","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.982","00:00:04.983","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.983","00:00:04.985","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.985","00:00:04.986","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.986","00:00:04.988","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.988","00:00:04.989","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.989","00:00:04.990","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.990","00:00:04.992","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.992","00:00:04.993","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.993","00:00:04.995","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.995","00:00:04.996","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.996","00:00:04.998","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.998","00:00:04.999","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.999","00:00:05.000","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.000","00:00:05.002","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.002","00:00:05.003","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.003","00:00:05.005","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.005","00:00:05.006","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.006","00:00:05.008","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.008","00:00:05.009","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.009","00:00:05.010","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.010","00:00:05.012","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.012","00:00:05.013","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.013","00:00:05.015","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.015","00:00:05.016","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.016","00:00:05.018","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.018","00:00:05.019","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.019","00:00:05.021","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.021","00:00:05.022","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.022","00:00:05.024","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.024","00:00:05.025","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.025","00:00:05.027","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.027","00:00:05.028","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.028","00:00:05.030","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.030","00:00:05.031","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.031","00:00:05.033","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.033","00:00:05.034","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.034","00:00:05.036","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.036","00:00:05.037","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.037","00:00:05.039","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.039","00:00:05.040","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.040","00:00:05.041","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.041","00:00:05.043","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.043","00:00:05.044","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.044","00:00:05.046","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.046","00:00:05.047","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.047","00:00:05.049","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.049","00:00:05.050","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.050","00:00:05.052","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.052","00:00:05.053","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.053","00:00:05.055","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.055","00:00:05.056","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.056","00:00:05.058","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.058","00:00:05.059","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.059","00:00:05.061","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.061","00:00:05.062","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.062","00:00:05.064","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.064","00:00:05.065","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.065","00:00:05.067","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.067","00:00:05.068","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.068","00:00:05.070","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.070","00:00:05.071","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.071","00:00:05.073","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.073","00:00:05.074","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.074","00:00:05.076","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.076","00:00:05.077","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.077","00:00:05.079","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.079","00:00:05.080","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.080","00:00:05.082","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.082","00:00:05.083","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.083","00:00:05.085","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.085","00:00:05.086","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.095","00:00:05.097","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.097","00:00:05.098","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.098","00:00:05.100","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.100","00:00:05.101","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.101","00:00:05.103","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.103","00:00:05.104","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.104","00:00:05.105","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.105","00:00:05.107","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.107","00:00:05.108","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.108","00:00:05.110","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.110","00:00:05.111","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.111","00:00:05.113","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.113","00:00:05.114","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.114","00:00:05.116","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.116","00:00:05.117","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.117","00:00:05.118","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.118","00:00:05.120","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.120","00:00:05.122","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.122","00:00:05.123","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.123","00:00:05.124","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.124","00:00:05.126","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.126","00:00:05.127","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.127","00:00:05.129","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.129","00:00:05.130","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.130","00:00:05.132","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.132","00:00:05.133","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.133","00:00:05.135","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.135","00:00:05.136","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.136","00:00:05.138","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.138","00:00:05.139","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.139","00:00:05.141","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.141","00:00:05.142","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.142","00:00:05.144","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.144","00:00:05.145","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.145","00:00:05.147","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.147","00:00:05.148","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.148","00:00:05.150","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.150","00:00:05.151","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.151","00:00:05.153","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.153","00:00:05.154","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.154","00:00:05.156","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.156","00:00:05.157","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.157","00:00:05.159","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.159","00:00:05.160","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.160","00:00:05.162","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.162","00:00:05.163","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.163","00:00:05.164","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.164","00:00:05.166","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.166","00:00:05.167","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.167","00:00:05.169","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.169","00:00:05.170","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.170","00:00:05.172","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.172","00:00:05.174","0.002s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.174","00:00:05.175","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.175","00:00:05.176","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.176","00:00:05.178","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.178","00:00:05.179","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.179","00:00:05.181","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.181","00:00:05.182","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.182","00:00:05.184","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.184","00:00:05.185","0.001s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.185","00:00:05.187","0.002s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.541","00:00:04.542","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.542","00:00:04.543","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.543","00:00:04.544","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.544","00:00:04.546","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.546","00:00:04.547","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.547","00:00:04.549","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.549","00:00:04.550","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.550","00:00:04.551","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.551","00:00:04.553","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.553","00:00:04.554","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.554","00:00:04.555","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.555","00:00:04.557","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.557","00:00:04.558","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.558","00:00:04.560","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.560","00:00:04.561","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.561","00:00:04.562","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.562","00:00:04.563","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.563","00:00:04.565","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.565","00:00:04.566","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.566","00:00:04.567","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.567","00:00:04.569","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.569","00:00:04.570","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.570","00:00:04.571","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.571","00:00:04.573","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.573","00:00:04.574","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.574","00:00:04.575","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.575","00:00:04.577","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.577","00:00:04.578","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.578","00:00:04.579","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.579","00:00:04.581","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.581","00:00:04.582","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.582","00:00:04.583","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.583","00:00:04.585","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.585","00:00:04.586","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.586","00:00:04.588","0.002s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.588","00:00:04.588","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.588","00:00:04.590","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.590","00:00:04.591","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.591","00:00:04.592","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.592","00:00:04.594","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.594","00:00:04.595","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.595","00:00:04.596","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.596","00:00:04.598","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.598","00:00:04.599","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.599","00:00:04.600","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.600","00:00:04.602","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.602","00:00:04.603","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.603","00:00:04.605","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.605","00:00:04.606","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.606","00:00:04.607","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.607","00:00:04.609","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.609","00:00:04.610","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.610","00:00:04.611","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.611","00:00:04.613","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.613","00:00:04.614","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.614","00:00:04.615","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.615","00:00:04.616","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.616","00:00:04.618","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.618","00:00:04.619","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.619","00:00:04.620","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.620","00:00:04.622","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.622","00:00:04.623","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.623","00:00:04.624","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.624","00:00:04.626","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.626","00:00:04.627","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.627","00:00:04.628","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.628","00:00:04.630","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.630","00:00:04.631","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.631","00:00:04.633","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.633","00:00:04.634","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.634","00:00:04.635","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.635","00:00:04.637","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.637","00:00:04.638","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.638","00:00:04.639","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.639","00:00:04.640","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.640","00:00:04.642","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.642","00:00:04.643","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.643","00:00:04.644","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.644","00:00:04.646","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.646","00:00:04.647","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.647","00:00:04.649","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.649","00:00:04.650","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.650","00:00:04.651","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.651","00:00:04.653","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.653","00:00:04.654","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.654","00:00:04.656","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.656","00:00:04.657","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.657","00:00:04.658","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.658","00:00:04.660","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.660","00:00:04.661","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.661","00:00:04.662","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.662","00:00:04.664","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.664","00:00:04.665","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.665","00:00:04.667","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.667","00:00:04.668","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.668","00:00:04.669","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.669","00:00:04.671","0.002s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.671","00:00:04.672","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.672","00:00:04.674","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.674","00:00:04.675","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.675","00:00:04.676","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.676","00:00:04.678","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.678","00:00:04.679","0.002s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.679","00:00:04.681","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.681","00:00:04.682","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.682","00:00:04.683","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.683","00:00:04.685","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.685","00:00:04.686","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.686","00:00:04.687","0.002s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.687","00:00:04.689","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.689","00:00:04.690","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.690","00:00:04.692","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.692","00:00:04.693","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.693","00:00:04.694","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.694","00:00:04.696","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.696","00:00:04.697","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.697","00:00:04.698","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.698","00:00:04.700","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.700","00:00:04.701","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.701","00:00:04.703","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.703","00:00:04.704","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.704","00:00:04.705","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.705","00:00:04.707","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.707","00:00:04.708","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.708","00:00:04.710","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.710","00:00:04.711","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.711","00:00:04.712","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.712","00:00:04.714","0.001s"],[8,130,"00:00:04.529","00:00:04.715","00:00:04.714","00:00:04.715","0.002s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.859","00:00:04.861","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.861","00:00:04.862","0.002s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.862","00:00:04.863","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.863","00:00:04.865","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.865","00:00:04.866","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.866","00:00:04.867","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.867","00:00:04.869","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.869","00:00:04.870","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.870","00:00:04.871","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.871","00:00:04.873","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.873","00:00:04.874","0.001s"],[298,13,"00:00:04.843","00:00:04.876","00:00:04.874","00:00:04.876","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.981","00:00:04.982","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.982","00:00:04.983","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.983","00:00:04.985","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.985","00:00:04.986","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.986","00:00:04.988","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.988","00:00:04.989","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.989","00:00:04.990","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.990","00:00:04.992","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.992","00:00:04.993","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.993","00:00:04.995","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.995","00:00:04.996","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.996","00:00:04.998","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.998","00:00:04.999","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:04.999","00:00:05.001","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.001","00:00:05.002","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.002","00:00:05.003","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.003","00:00:05.005","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.005","00:00:05.006","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.006","00:00:05.008","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.008","00:00:05.009","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.009","00:00:05.010","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.010","00:00:05.012","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.012","00:00:05.013","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.013","00:00:05.015","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.015","00:00:05.016","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.016","00:00:05.018","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.018","00:00:05.019","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.019","00:00:05.021","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.021","00:00:05.022","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.022","00:00:05.024","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.024","00:00:05.025","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.025","00:00:05.027","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.027","00:00:05.028","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.028","00:00:05.030","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.030","00:00:05.031","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.031","00:00:05.033","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.033","00:00:05.034","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.034","00:00:05.036","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.036","00:00:05.037","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.037","00:00:05.039","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.039","00:00:05.040","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.040","00:00:05.041","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.041","00:00:05.043","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.043","00:00:05.044","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.044","00:00:05.046","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.046","00:00:05.047","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.047","00:00:05.049","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.049","00:00:05.050","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.050","00:00:05.052","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.052","00:00:05.053","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.053","00:00:05.055","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.055","00:00:05.056","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.056","00:00:05.058","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.058","00:00:05.059","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.059","00:00:05.061","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.061","00:00:05.062","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.062","00:00:05.064","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.064","00:00:05.065","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.065","00:00:05.067","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.067","00:00:05.068","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.068","00:00:05.070","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.070","00:00:05.071","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.071","00:00:05.073","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.073","00:00:05.074","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.074","00:00:05.076","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.076","00:00:05.077","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.077","00:00:05.079","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.079","00:00:05.080","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.080","00:00:05.082","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.082","00:00:05.083","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.083","00:00:05.085","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.085","00:00:05.086","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.096","00:00:05.097","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.097","00:00:05.098","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.098","00:00:05.100","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.100","00:00:05.101","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.101","00:00:05.103","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.103","00:00:05.104","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.104","00:00:05.105","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.105","00:00:05.107","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.107","00:00:05.108","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.108","00:00:05.110","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.110","00:00:05.111","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.111","00:00:05.113","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.113","00:00:05.114","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.114","00:00:05.116","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.116","00:00:05.117","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.117","00:00:05.118","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.118","00:00:05.120","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.120","00:00:05.122","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.122","00:00:05.123","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.123","00:00:05.125","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.125","00:00:05.126","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.126","00:00:05.128","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.128","00:00:05.129","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.129","00:00:05.130","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.130","00:00:05.132","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.132","00:00:05.133","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.133","00:00:05.135","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.135","00:00:05.136","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.136","00:00:05.138","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.138","00:00:05.139","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.139","00:00:05.141","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.141","00:00:05.142","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.142","00:00:05.144","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.144","00:00:05.145","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.145","00:00:05.147","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.147","00:00:05.148","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.148","00:00:05.150","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.150","00:00:05.151","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.151","00:00:05.153","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.153","00:00:05.154","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.154","00:00:05.156","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.156","00:00:05.157","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.157","00:00:05.159","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.159","00:00:05.160","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.160","00:00:05.162","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.162","00:00:05.163","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.163","00:00:05.165","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.165","00:00:05.166","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.166","00:00:05.168","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.168","00:00:05.169","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.169","00:00:05.170","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.170","00:00:05.172","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.172","00:00:05.174","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.174","00:00:05.175","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.175","00:00:05.177","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.177","00:00:05.178","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.178","00:00:05.180","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.180","00:00:05.181","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.181","00:00:05.182","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.182","00:00:05.184","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.184","00:00:05.185","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.185","00:00:05.187","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.187","00:00:05.188","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.188","00:00:05.189","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.189","00:00:05.190","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.190","00:00:05.192","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.192","00:00:05.193","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.193","00:00:05.194","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.194","00:00:05.196","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.196","00:00:05.197","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.197","00:00:05.197","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.197","00:00:05.198","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.198","00:00:05.200","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.200","00:00:05.201","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.201","00:00:05.202","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.202","00:00:05.203","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.203","00:00:05.204","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.204","00:00:05.205","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.205","00:00:05.206","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.206","00:00:05.207","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.207","00:00:05.209","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.209","00:00:05.210","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.210","00:00:05.211","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.211","00:00:05.212","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.212","00:00:05.213","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.213","00:00:05.214","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.214","00:00:05.215","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.215","00:00:05.217","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.217","00:00:05.218","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.226","00:00:05.227","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.227","00:00:05.228","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.228","00:00:05.229","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.229","00:00:05.230","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.230","00:00:05.232","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.232","00:00:05.233","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.233","00:00:05.234","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.234","00:00:05.236","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.236","00:00:05.237","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.237","00:00:05.238","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.238","00:00:05.239","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.239","00:00:05.241","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.241","00:00:05.242","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.242","00:00:05.243","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.243","00:00:05.245","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.245","00:00:05.246","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.246","00:00:05.247","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.247","00:00:05.249","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.249","00:00:05.250","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.250","00:00:05.251","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.251","00:00:05.252","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.252","00:00:05.253","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.253","00:00:05.255","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.255","00:00:05.256","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.256","00:00:05.257","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.257","00:00:05.259","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.259","00:00:05.260","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.260","00:00:05.261","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.261","00:00:05.262","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.262","00:00:05.264","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.264","00:00:05.265","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.265","00:00:05.266","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.266","00:00:05.268","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.268","00:00:05.269","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.269","00:00:05.271","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.271","00:00:05.272","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.272","00:00:05.273","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.273","00:00:05.274","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.274","00:00:05.275","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.275","00:00:05.276","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.276","00:00:05.277","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.277","00:00:05.279","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.279","00:00:05.280","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.280","00:00:05.281","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.281","00:00:05.282","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.282","00:00:05.283","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.283","00:00:05.284","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.284","00:00:05.285","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.285","00:00:05.286","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.286","00:00:05.288","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.288","00:00:05.289","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.289","00:00:05.290","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.290","00:00:05.291","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.291","00:00:05.292","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.292","00:00:05.293","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.293","00:00:05.294","0.001s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.294","00:00:05.296","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.024","00:00:05.025","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.025","00:00:05.027","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.027","00:00:05.028","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.028","00:00:05.030","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.030","00:00:05.031","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.031","00:00:05.033","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.033","00:00:05.034","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.034","00:00:05.036","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.036","00:00:05.037","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.037","00:00:05.039","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.039","00:00:05.040","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.040","00:00:05.041","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.041","00:00:05.043","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.043","00:00:05.044","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.044","00:00:05.046","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.046","00:00:05.047","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.047","00:00:05.049","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.049","00:00:05.050","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.050","00:00:05.052","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.052","00:00:05.053","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.053","00:00:05.055","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.055","00:00:05.056","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.056","00:00:05.058","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.058","00:00:05.059","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.059","00:00:05.061","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.061","00:00:05.062","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.062","00:00:05.064","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.064","00:00:05.065","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.065","00:00:05.067","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.067","00:00:05.068","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.068","00:00:05.070","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.070","00:00:05.071","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.071","00:00:05.073","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.073","00:00:05.074","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.074","00:00:05.076","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.076","00:00:05.077","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.077","00:00:05.079","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.079","00:00:05.080","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.080","00:00:05.082","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.082","00:00:05.083","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.083","00:00:05.085","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.085","00:00:05.086","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.096","00:00:05.097","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.097","00:00:05.098","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.098","00:00:05.100","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.100","00:00:05.101","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.101","00:00:05.103","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.103","00:00:05.104","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.104","00:00:05.105","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.105","00:00:05.107","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.107","00:00:05.108","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.108","00:00:05.110","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.110","00:00:05.111","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.111","00:00:05.113","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.113","00:00:05.114","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.114","00:00:05.116","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.116","00:00:05.117","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.117","00:00:05.119","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.119","00:00:05.120","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.120","00:00:05.122","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.122","00:00:05.123","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.123","00:00:05.125","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.125","00:00:05.126","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.126","00:00:05.128","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.128","00:00:05.129","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.129","00:00:05.130","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.130","00:00:05.132","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.132","00:00:05.133","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.133","00:00:05.135","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.135","00:00:05.136","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.136","00:00:05.138","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.138","00:00:05.139","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.139","00:00:05.141","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.141","00:00:05.142","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.142","00:00:05.144","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.144","00:00:05.145","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.145","00:00:05.147","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.147","00:00:05.148","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.148","00:00:05.150","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.150","00:00:05.151","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.151","00:00:05.153","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.153","00:00:05.154","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.154","00:00:05.156","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.156","00:00:05.157","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.157","00:00:05.159","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.159","00:00:05.160","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.160","00:00:05.162","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.162","00:00:05.163","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.163","00:00:05.165","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.165","00:00:05.166","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.166","00:00:05.168","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.168","00:00:05.169","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.169","00:00:05.171","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.171","00:00:05.172","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.172","00:00:05.174","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.174","00:00:05.175","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.175","00:00:05.177","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.177","00:00:05.178","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.178","00:00:05.180","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.180","00:00:05.181","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.181","00:00:05.182","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.182","00:00:05.184","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.184","00:00:05.185","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.185","00:00:05.187","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.187","00:00:05.188","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.188","00:00:05.189","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.189","00:00:05.190","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.190","00:00:05.192","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.192","00:00:05.193","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.193","00:00:05.194","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.194","00:00:05.196","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.196","00:00:05.197","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.197","00:00:05.197","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.197","00:00:05.199","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.199","00:00:05.200","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.200","00:00:05.201","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.201","00:00:05.202","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.202","00:00:05.203","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.203","00:00:05.204","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.204","00:00:05.205","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.205","00:00:05.206","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.206","00:00:05.207","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.207","00:00:05.209","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.209","00:00:05.210","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.210","00:00:05.211","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.211","00:00:05.212","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.212","00:00:05.213","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.213","00:00:05.214","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.214","00:00:05.216","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.216","00:00:05.217","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.217","00:00:05.218","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.226","00:00:05.227","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.227","00:00:05.228","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.228","00:00:05.229","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.229","00:00:05.231","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.231","00:00:05.232","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.232","00:00:05.233","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.233","00:00:05.234","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.234","00:00:05.236","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.236","00:00:05.237","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.237","00:00:05.238","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.238","00:00:05.239","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.239","00:00:05.241","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.241","00:00:05.242","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.242","00:00:05.243","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.243","00:00:05.245","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.245","00:00:05.246","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.246","00:00:05.248","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.248","00:00:05.249","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.249","00:00:05.250","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.250","00:00:05.251","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.251","00:00:05.252","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.252","00:00:05.254","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.254","00:00:05.255","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.255","00:00:05.256","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.256","00:00:05.257","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.257","00:00:05.259","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.259","00:00:05.260","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.260","00:00:05.261","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.261","00:00:05.263","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.263","00:00:05.264","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.264","00:00:05.265","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.265","00:00:05.266","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.266","00:00:05.268","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.270","00:00:05.271","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.271","00:00:05.272","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.272","00:00:05.273","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.273","00:00:05.275","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.275","00:00:05.275","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.275","00:00:05.276","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.276","00:00:05.278","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.278","00:00:05.279","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.279","00:00:05.280","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.280","00:00:05.281","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.281","00:00:05.282","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.282","00:00:05.283","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.283","00:00:05.284","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.284","00:00:05.285","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.285","00:00:05.287","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.287","00:00:05.288","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.288","00:00:05.289","0.001s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.289","00:00:05.290","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.096","00:00:05.097","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.097","00:00:05.098","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.098","00:00:05.100","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.100","00:00:05.101","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.101","00:00:05.103","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.103","00:00:05.104","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.104","00:00:05.105","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.105","00:00:05.107","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.107","00:00:05.108","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.108","00:00:05.110","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.110","00:00:05.111","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.111","00:00:05.113","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.113","00:00:05.114","0.001s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.114","00:00:05.116","0.002s"],[696,16,"00:00:05.078","00:00:05.117","00:00:05.116","00:00:05.117","0.002s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.226","00:00:05.227","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.227","00:00:05.228","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.228","00:00:05.229","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.229","00:00:05.231","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.231","00:00:05.232","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.232","00:00:05.233","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.233","00:00:05.234","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.234","00:00:05.236","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.236","00:00:05.237","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.237","00:00:05.238","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.238","00:00:05.239","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.239","00:00:05.241","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.241","00:00:05.242","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.242","00:00:05.243","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.243","00:00:05.245","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.245","00:00:05.246","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.246","00:00:05.248","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.248","00:00:05.249","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.249","00:00:05.250","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.250","00:00:05.251","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.251","00:00:05.252","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.252","00:00:05.254","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.254","00:00:05.255","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.255","00:00:05.256","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.256","00:00:05.257","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.257","00:00:05.259","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.259","00:00:05.260","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.260","00:00:05.261","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.261","00:00:05.262","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.262","00:00:05.264","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.264","00:00:05.265","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.265","00:00:05.266","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.266","00:00:05.268","0.001s"],[575,35,"00:00:05.209","00:00:05.270","00:00:05.268","00:00:05.270","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.227","00:00:05.228","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.228","00:00:05.229","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.229","00:00:05.231","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.231","00:00:05.232","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.232","00:00:05.233","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.233","00:00:05.234","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.234","00:00:05.236","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.236","00:00:05.237","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.237","00:00:05.238","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.238","00:00:05.239","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.239","00:00:05.241","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.241","00:00:05.242","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.242","00:00:05.243","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.243","00:00:05.245","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.245","00:00:05.246","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.246","00:00:05.248","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.248","00:00:05.249","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.249","00:00:05.250","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.250","00:00:05.251","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.251","00:00:05.252","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.252","00:00:05.254","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.254","00:00:05.255","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.255","00:00:05.256","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.256","00:00:05.257","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.257","00:00:05.259","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.259","00:00:05.260","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.260","00:00:05.261","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.261","00:00:05.262","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.262","00:00:05.264","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.264","00:00:05.265","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.265","00:00:05.266","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.266","00:00:05.268","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.270","00:00:05.271","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.271","00:00:05.272","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.272","00:00:05.273","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.273","00:00:05.275","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.275","00:00:05.275","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.275","00:00:05.276","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.276","00:00:05.277","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.277","00:00:05.279","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.279","00:00:05.280","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.280","00:00:05.281","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.281","00:00:05.282","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.282","00:00:05.283","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.283","00:00:05.284","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.284","00:00:05.285","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.285","00:00:05.286","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.286","00:00:05.288","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.288","00:00:05.289","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.289","00:00:05.290","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.290","00:00:05.291","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.291","00:00:05.292","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.292","00:00:05.293","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.293","00:00:05.294","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.294","00:00:05.296","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.296","00:00:05.296","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.296","00:00:05.297","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.297","00:00:05.298","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.298","00:00:05.299","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.299","00:00:05.300","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.300","00:00:05.301","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.301","00:00:05.302","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.302","00:00:05.303","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.303","00:00:05.305","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.305","00:00:05.306","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.306","00:00:05.307","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.307","00:00:05.308","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.308","00:00:05.309","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.309","00:00:05.310","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.310","00:00:05.311","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.311","00:00:05.312","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.312","00:00:05.313","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.313","00:00:05.314","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.314","00:00:05.315","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.315","00:00:05.316","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.316","00:00:05.317","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.317","00:00:05.318","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.318","00:00:05.319","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.319","00:00:05.320","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.320","00:00:05.321","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.321","00:00:05.322","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.322","00:00:05.323","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.323","00:00:05.324","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.324","00:00:05.325","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.325","00:00:05.326","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.326","00:00:05.327","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.327","00:00:05.328","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.328","00:00:05.329","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.329","00:00:05.330","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.330","00:00:05.331","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.331","00:00:05.332","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.332","00:00:05.333","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.333","00:00:05.334","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.334","00:00:05.335","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.335","00:00:05.337","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.337","00:00:05.337","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.337","00:00:05.338","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.338","00:00:05.339","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.339","00:00:05.340","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.343","00:00:05.344","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.344","00:00:05.344","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.344","00:00:05.345","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.345","00:00:05.346","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.346","00:00:05.347","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.347","00:00:05.349","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.349","00:00:05.350","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.350","00:00:05.351","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.351","00:00:05.352","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.352","00:00:05.353","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.353","00:00:05.354","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.354","00:00:05.355","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.355","00:00:05.356","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.356","00:00:05.357","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.357","00:00:05.358","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.358","00:00:05.359","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.359","00:00:05.360","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.360","00:00:05.361","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.361","00:00:05.363","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.363","00:00:05.364","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.364","00:00:05.365","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.365","00:00:05.366","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.366","00:00:05.367","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.367","00:00:05.368","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.368","00:00:05.369","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.369","00:00:05.370","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.370","00:00:05.371","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.371","00:00:05.373","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.373","00:00:05.374","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.374","00:00:05.375","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.375","00:00:05.376","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.376","00:00:05.377","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.377","00:00:05.378","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.378","00:00:05.379","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.379","00:00:05.380","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.380","00:00:05.381","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.381","00:00:05.383","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.383","00:00:05.384","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.384","00:00:05.385","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.385","00:00:05.386","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.386","00:00:05.387","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.387","00:00:05.388","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.388","00:00:05.389","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.389","00:00:05.390","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.390","00:00:05.391","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.391","00:00:05.393","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.393","00:00:05.394","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.394","00:00:05.395","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.395","00:00:05.396","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.396","00:00:05.397","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.397","00:00:05.398","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.398","00:00:05.399","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.399","00:00:05.400","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.400","00:00:05.402","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.402","00:00:05.403","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.403","00:00:05.404","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.404","00:00:05.405","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.405","00:00:05.406","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.406","00:00:05.407","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.407","00:00:05.408","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.408","00:00:05.409","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.409","00:00:05.410","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.410","00:00:05.412","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.412","00:00:05.413","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.413","00:00:05.414","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.414","00:00:05.415","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.415","00:00:05.416","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.416","00:00:05.417","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.417","00:00:05.418","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.418","00:00:05.419","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.419","00:00:05.421","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.421","00:00:05.422","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.422","00:00:05.423","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.423","00:00:05.424","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.424","00:00:05.425","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.425","00:00:05.426","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.426","00:00:05.427","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.427","00:00:05.428","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.428","00:00:05.429","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.429","00:00:05.431","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.431","00:00:05.432","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.432","00:00:05.433","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.433","00:00:05.434","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.434","00:00:05.435","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.435","00:00:05.436","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.436","00:00:05.437","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.437","00:00:05.438","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.438","00:00:05.439","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.439","00:00:05.441","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.441","00:00:05.442","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.442","00:00:05.443","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.443","00:00:05.444","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.444","00:00:05.445","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.445","00:00:05.446","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.446","00:00:05.447","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.447","00:00:05.448","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.448","00:00:05.450","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.450","00:00:05.451","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.451","00:00:05.452","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.452","00:00:05.453","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.453","00:00:05.454","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.454","00:00:05.455","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.455","00:00:05.456","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.456","00:00:05.458","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.458","00:00:05.459","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.461","00:00:05.463","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.470","00:00:05.471","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.471","00:00:05.472","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.472","00:00:05.473","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.473","00:00:05.474","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.474","00:00:05.475","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.475","00:00:05.477","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.477","00:00:05.478","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.478","00:00:05.479","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.479","00:00:05.480","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.480","00:00:05.481","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.481","00:00:05.483","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.483","00:00:05.483","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.483","00:00:05.485","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.485","00:00:05.486","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.486","00:00:05.487","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.491","00:00:05.492","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.492","00:00:05.493","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.493","00:00:05.494","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.494","00:00:05.495","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.495","00:00:05.497","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.497","00:00:05.498","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.498","00:00:05.499","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.499","00:00:05.500","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.500","00:00:05.501","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.501","00:00:05.503","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.503","00:00:05.504","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.504","00:00:05.505","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.505","00:00:05.506","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.506","00:00:05.507","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.507","00:00:05.508","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.508","00:00:05.510","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.510","00:00:05.511","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.511","00:00:05.512","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.512","00:00:05.513","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.513","00:00:05.514","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.514","00:00:05.515","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.515","00:00:05.517","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.517","00:00:05.518","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.518","00:00:05.519","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.519","00:00:05.520","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.520","00:00:05.521","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.521","00:00:05.523","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.523","00:00:05.524","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.524","00:00:05.525","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.525","00:00:05.526","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.526","00:00:05.527","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.527","00:00:05.528","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.528","00:00:05.529","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.529","00:00:05.531","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.531","00:00:05.532","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.532","00:00:05.533","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.533","00:00:05.534","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.534","00:00:05.535","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.535","00:00:05.537","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.537","00:00:05.538","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.538","00:00:05.540","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.540","00:00:05.540","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.540","00:00:05.541","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.541","00:00:05.543","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.543","00:00:05.544","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.544","00:00:05.545","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.545","00:00:05.546","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.546","00:00:05.547","0.001s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.547","00:00:05.549","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.344","00:00:05.344","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.344","00:00:05.345","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.345","00:00:05.346","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.346","00:00:05.348","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.348","00:00:05.349","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.349","00:00:05.350","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.350","00:00:05.351","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.351","00:00:05.352","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.352","00:00:05.353","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.353","00:00:05.354","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.354","00:00:05.355","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.355","00:00:05.356","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.356","00:00:05.357","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.357","00:00:05.358","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.358","00:00:05.359","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.359","00:00:05.360","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.360","00:00:05.361","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.361","00:00:05.363","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.363","00:00:05.364","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.364","00:00:05.365","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.365","00:00:05.366","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.366","00:00:05.367","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.367","00:00:05.368","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.368","00:00:05.369","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.369","00:00:05.370","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.370","00:00:05.371","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.371","00:00:05.373","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.373","00:00:05.374","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.374","00:00:05.375","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.375","00:00:05.376","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.376","00:00:05.377","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.377","00:00:05.378","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.378","00:00:05.379","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.379","00:00:05.380","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.380","00:00:05.381","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.381","00:00:05.383","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.383","00:00:05.384","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.384","00:00:05.385","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.385","00:00:05.386","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.386","00:00:05.387","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.387","00:00:05.388","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.388","00:00:05.389","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.389","00:00:05.390","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.390","00:00:05.392","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.392","00:00:05.393","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.393","00:00:05.394","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.394","00:00:05.395","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.395","00:00:05.396","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.396","00:00:05.397","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.397","00:00:05.398","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.398","00:00:05.399","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.399","00:00:05.400","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.400","00:00:05.402","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.402","00:00:05.403","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.403","00:00:05.404","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.404","00:00:05.405","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.405","00:00:05.406","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.406","00:00:05.407","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.407","00:00:05.408","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.408","00:00:05.409","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.409","00:00:05.411","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.411","00:00:05.412","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.412","00:00:05.413","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.413","00:00:05.414","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.414","00:00:05.415","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.415","00:00:05.416","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.416","00:00:05.417","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.417","00:00:05.418","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.418","00:00:05.419","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.419","00:00:05.421","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.421","00:00:05.422","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.422","00:00:05.423","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.423","00:00:05.424","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.424","00:00:05.425","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.425","00:00:05.426","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.426","00:00:05.427","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.427","00:00:05.428","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.428","00:00:05.430","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.430","00:00:05.431","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.431","00:00:05.432","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.432","00:00:05.433","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.433","00:00:05.434","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.434","00:00:05.435","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.435","00:00:05.436","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.436","00:00:05.437","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.437","00:00:05.438","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.438","00:00:05.440","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.440","00:00:05.441","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.441","00:00:05.442","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.442","00:00:05.443","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.443","00:00:05.444","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.444","00:00:05.445","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.445","00:00:05.446","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.446","00:00:05.447","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.447","00:00:05.448","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.448","00:00:05.450","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.450","00:00:05.451","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.451","00:00:05.452","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.452","00:00:05.453","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.453","00:00:05.454","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.454","00:00:05.455","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.455","00:00:05.456","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.456","00:00:05.458","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.458","00:00:05.459","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.461","00:00:05.463","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.470","00:00:05.471","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.471","00:00:05.472","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.472","00:00:05.473","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.473","00:00:05.474","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.474","00:00:05.475","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.475","00:00:05.477","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.477","00:00:05.478","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.478","00:00:05.479","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.479","00:00:05.480","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.480","00:00:05.481","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.481","00:00:05.483","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.483","00:00:05.484","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.484","00:00:05.485","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.485","00:00:05.486","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.486","00:00:05.487","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.491","00:00:05.492","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.492","00:00:05.493","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.493","00:00:05.494","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.494","00:00:05.496","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.496","00:00:05.497","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.497","00:00:05.498","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.498","00:00:05.499","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.499","00:00:05.500","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.500","00:00:05.501","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.501","00:00:05.503","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.503","00:00:05.504","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.504","00:00:05.505","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.505","00:00:05.506","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.506","00:00:05.507","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.507","00:00:05.508","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.508","00:00:05.510","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.510","00:00:05.511","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.511","00:00:05.512","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.512","00:00:05.513","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.513","00:00:05.514","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.514","00:00:05.515","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.515","00:00:05.517","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.517","00:00:05.518","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.518","00:00:05.519","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.519","00:00:05.520","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.520","00:00:05.521","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.521","00:00:05.523","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.523","00:00:05.524","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.524","00:00:05.525","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.525","00:00:05.526","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.526","00:00:05.527","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.527","00:00:05.528","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.528","00:00:05.529","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.529","00:00:05.531","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.531","00:00:05.532","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.532","00:00:05.533","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.533","00:00:05.534","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.534","00:00:05.535","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.535","00:00:05.537","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.537","00:00:05.538","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.538","00:00:05.540","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.540","00:00:05.540","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.540","00:00:05.541","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.541","00:00:05.543","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.543","00:00:05.544","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.544","00:00:05.545","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.545","00:00:05.546","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.546","00:00:05.547","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.547","00:00:05.549","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.549","00:00:05.551","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.551","00:00:05.552","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.552","00:00:05.552","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.552","00:00:05.553","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.553","00:00:05.555","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.555","00:00:05.556","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.556","00:00:05.557","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.557","00:00:05.558","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.558","00:00:05.559","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.559","00:00:05.561","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.561","00:00:05.562","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.562","00:00:05.563","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.563","00:00:05.564","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.564","00:00:05.565","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.565","00:00:05.566","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.566","00:00:05.568","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.568","00:00:05.569","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.569","00:00:05.570","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.570","00:00:05.571","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.571","00:00:05.572","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.572","00:00:05.573","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.573","00:00:05.575","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.575","00:00:05.576","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.576","00:00:05.577","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.577","00:00:05.578","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.578","00:00:05.579","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.579","00:00:05.581","0.001s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.581","00:00:05.582","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.471","00:00:05.472","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.472","00:00:05.473","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.473","00:00:05.474","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.474","00:00:05.475","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.475","00:00:05.476","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.476","00:00:05.478","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.478","00:00:05.479","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.479","00:00:05.480","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.480","00:00:05.481","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.481","00:00:05.483","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.483","00:00:05.483","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.483","00:00:05.485","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.485","00:00:05.486","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.486","00:00:05.487","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.491","00:00:05.492","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.492","00:00:05.493","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.493","00:00:05.494","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.494","00:00:05.495","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.495","00:00:05.497","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.497","00:00:05.498","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.498","00:00:05.499","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.499","00:00:05.500","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.500","00:00:05.501","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.501","00:00:05.503","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.503","00:00:05.504","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.504","00:00:05.505","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.505","00:00:05.506","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.506","00:00:05.507","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.507","00:00:05.508","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.508","00:00:05.509","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.509","00:00:05.511","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.511","00:00:05.512","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.512","00:00:05.513","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.513","00:00:05.514","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.514","00:00:05.515","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.515","00:00:05.517","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.517","00:00:05.518","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.518","00:00:05.519","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.519","00:00:05.520","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.520","00:00:05.521","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.521","00:00:05.522","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.522","00:00:05.524","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.524","00:00:05.525","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.525","00:00:05.526","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.526","00:00:05.527","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.527","00:00:05.528","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.528","00:00:05.529","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.529","00:00:05.531","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.531","00:00:05.532","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.532","00:00:05.533","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.533","00:00:05.534","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.534","00:00:05.535","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.535","00:00:05.536","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.536","00:00:05.538","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.538","00:00:05.539","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.539","00:00:05.540","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.540","00:00:05.541","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.541","00:00:05.542","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.542","00:00:05.544","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.544","00:00:05.545","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.545","00:00:05.546","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.546","00:00:05.547","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.547","00:00:05.548","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.550","00:00:05.551","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.551","00:00:05.552","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.552","00:00:05.553","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.553","00:00:05.554","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.554","00:00:05.556","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.556","00:00:05.557","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.557","00:00:05.558","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.558","00:00:05.559","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.559","00:00:05.560","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.560","00:00:05.562","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.562","00:00:05.563","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.563","00:00:05.564","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.564","00:00:05.565","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.565","00:00:05.566","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.566","00:00:05.568","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.568","00:00:05.569","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.569","00:00:05.570","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.570","00:00:05.571","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.571","00:00:05.572","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.572","00:00:05.573","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.573","00:00:05.575","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.575","00:00:05.576","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.576","00:00:05.577","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.577","00:00:05.578","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.578","00:00:05.579","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.579","00:00:05.580","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.580","00:00:05.582","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.582","00:00:05.583","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.583","00:00:05.584","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.584","00:00:05.585","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.585","00:00:05.586","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.586","00:00:05.588","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.588","00:00:05.589","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.589","00:00:05.590","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.590","00:00:05.591","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.591","00:00:05.593","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.595","00:00:05.597","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.597","00:00:05.599","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.599","00:00:05.601","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.601","00:00:05.602","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.602","00:00:05.603","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.603","00:00:05.604","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.604","00:00:05.606","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.606","00:00:05.607","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.607","00:00:05.608","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.608","00:00:05.609","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.609","00:00:05.610","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.610","00:00:05.611","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.611","00:00:05.613","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.613","00:00:05.614","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.614","00:00:05.615","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.615","00:00:05.616","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.616","00:00:05.618","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.620","00:00:05.622","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.622","00:00:05.622","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.622","00:00:05.624","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.624","00:00:05.625","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.625","00:00:05.626","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.626","00:00:05.628","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.628","00:00:05.629","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.629","00:00:05.630","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.630","00:00:05.632","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.632","00:00:05.633","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.633","00:00:05.634","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.634","00:00:05.636","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.636","00:00:05.637","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.637","00:00:05.639","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.639","00:00:05.640","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.640","00:00:05.641","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.641","00:00:05.643","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.643","00:00:05.644","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.644","00:00:05.646","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.646","00:00:05.647","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.647","00:00:05.648","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.648","00:00:05.650","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.650","00:00:05.651","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.651","00:00:05.652","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.652","00:00:05.654","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.654","00:00:05.655","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.655","00:00:05.657","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.657","00:00:05.658","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.658","00:00:05.659","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.659","00:00:05.661","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.661","00:00:05.662","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.662","00:00:05.664","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.664","00:00:05.665","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.665","00:00:05.666","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.666","00:00:05.668","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.668","00:00:05.669","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.669","00:00:05.671","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.671","00:00:05.673","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.673","00:00:05.674","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.674","00:00:05.675","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.675","00:00:05.677","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.677","00:00:05.678","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.678","00:00:05.680","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.680","00:00:05.681","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.681","00:00:05.682","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.682","00:00:05.683","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.683","00:00:05.685","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.685","00:00:05.687","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.687","00:00:05.688","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.688","00:00:05.690","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.690","00:00:05.691","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.691","00:00:05.692","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.692","00:00:05.693","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.693","00:00:05.695","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.695","00:00:05.696","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.696","00:00:05.697","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.697","00:00:05.699","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.699","00:00:05.700","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.700","00:00:05.701","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.701","00:00:05.703","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.703","00:00:05.704","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.704","00:00:05.706","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.706","00:00:05.707","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.707","00:00:05.708","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.708","00:00:05.710","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.710","00:00:05.711","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.711","00:00:05.712","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.712","00:00:05.714","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.714","00:00:05.715","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.715","00:00:05.717","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.717","00:00:05.718","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.718","00:00:05.719","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.719","00:00:05.721","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.721","00:00:05.723","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.729","00:00:05.730","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.730","00:00:05.732","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.732","00:00:05.733","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.733","00:00:05.735","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.735","00:00:05.736","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.736","00:00:05.738","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.738","00:00:05.739","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.739","00:00:05.740","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.740","00:00:05.741","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.741","00:00:05.743","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.743","00:00:05.744","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.744","00:00:05.745","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.745","00:00:05.747","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.747","00:00:05.748","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.748","00:00:05.749","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.749","00:00:05.751","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.751","00:00:05.753","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.758","00:00:05.759","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.759","00:00:05.760","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.760","00:00:05.762","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.762","00:00:05.763","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.763","00:00:05.764","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.764","00:00:05.766","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.766","00:00:05.767","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.767","00:00:05.769","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.769","00:00:05.770","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.770","00:00:05.771","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.771","00:00:05.773","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.773","00:00:05.774","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.774","00:00:05.776","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.776","00:00:05.777","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.777","00:00:05.778","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.778","00:00:05.780","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.780","00:00:05.781","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.781","00:00:05.782","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.782","00:00:05.784","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.784","00:00:05.785","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.785","00:00:05.787","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.787","00:00:05.788","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.788","00:00:05.789","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.789","00:00:05.791","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.791","00:00:05.792","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.792","00:00:05.793","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.793","00:00:05.795","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.795","00:00:05.796","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.796","00:00:05.798","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.798","00:00:05.799","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.799","00:00:05.800","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.800","00:00:05.802","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.802","00:00:05.803","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.803","00:00:05.805","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.805","00:00:05.806","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.806","00:00:05.807","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.807","00:00:05.809","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.809","00:00:05.810","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.810","00:00:05.812","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.812","00:00:05.813","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.813","00:00:05.815","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.815","00:00:05.816","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.816","00:00:05.817","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.817","00:00:05.819","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.819","00:00:05.820","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.820","00:00:05.822","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.822","00:00:05.823","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.823","00:00:05.825","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.825","00:00:05.826","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.826","00:00:05.827","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.827","00:00:05.829","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.829","00:00:05.830","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.830","00:00:05.832","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.832","00:00:05.833","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.833","00:00:05.835","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.835","00:00:05.836","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.836","00:00:05.837","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.837","00:00:05.839","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.839","00:00:05.840","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.840","00:00:05.842","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.842","00:00:05.843","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.843","00:00:05.845","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.845","00:00:05.846","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.846","00:00:05.847","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.847","00:00:05.849","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.849","00:00:05.850","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.850","00:00:05.852","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.852","00:00:05.853","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.853","00:00:05.855","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.855","00:00:05.856","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.856","00:00:05.858","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.858","00:00:05.859","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.859","00:00:05.860","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.860","00:00:05.862","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.862","00:00:05.863","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.863","00:00:05.865","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.865","00:00:05.866","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.866","00:00:05.867","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.867","00:00:05.869","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.869","00:00:05.870","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.870","00:00:05.872","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.872","00:00:05.873","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.873","00:00:05.875","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.875","00:00:05.876","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.876","00:00:05.878","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.878","00:00:05.879","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.879","00:00:05.880","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.880","00:00:05.882","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.882","00:00:05.883","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.883","00:00:05.885","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.885","00:00:05.886","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.886","00:00:05.888","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.888","00:00:05.889","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.889","00:00:05.890","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.890","00:00:05.892","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.892","00:00:05.893","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.893","00:00:05.895","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.895","00:00:05.896","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.896","00:00:05.898","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.898","00:00:05.899","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.899","00:00:05.900","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.900","00:00:05.902","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.902","00:00:05.903","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.903","00:00:05.905","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.905","00:00:05.906","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.906","00:00:05.908","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.908","00:00:05.909","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.909","00:00:05.911","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.911","00:00:05.912","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.912","00:00:05.913","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.913","00:00:05.915","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.915","00:00:05.916","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.916","00:00:05.918","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.918","00:00:05.919","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.919","00:00:05.921","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.921","00:00:05.922","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.922","00:00:05.923","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.923","00:00:05.925","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.925","00:00:05.926","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.926","00:00:05.928","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.928","00:00:05.929","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.929","00:00:05.931","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.931","00:00:05.932","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.932","00:00:05.933","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.933","00:00:05.935","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.935","00:00:05.936","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.936","00:00:05.938","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.938","00:00:05.939","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.939","00:00:05.941","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.941","00:00:05.942","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.942","00:00:05.944","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.944","00:00:05.945","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.945","00:00:05.946","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.946","00:00:05.948","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.948","00:00:05.949","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.949","00:00:05.951","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.951","00:00:05.952","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.952","00:00:05.954","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.954","00:00:05.955","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.955","00:00:05.956","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.956","00:00:05.957","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.957","00:00:05.959","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.959","00:00:05.960","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.960","00:00:05.961","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.961","00:00:05.962","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.962","00:00:05.964","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.964","00:00:05.965","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.965","00:00:05.966","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.966","00:00:05.968","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.968","00:00:05.969","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.969","00:00:05.970","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.970","00:00:05.972","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.972","00:00:05.973","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.973","00:00:05.974","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.974","00:00:05.976","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.976","00:00:05.977","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.977","00:00:05.978","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.978","00:00:05.980","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.980","00:00:05.981","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.981","00:00:05.982","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.982","00:00:05.983","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.983","00:00:05.985","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.985","00:00:05.986","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.986","00:00:05.987","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.987","00:00:05.989","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.989","00:00:05.990","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.990","00:00:05.991","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.991","00:00:05.993","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.993","00:00:05.994","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.994","00:00:05.995","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.995","00:00:05.997","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.997","00:00:05.998","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.998","00:00:05.999","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.999","00:00:06.001","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.001","00:00:06.002","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.002","00:00:06.003","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.003","00:00:06.005","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.005","00:00:06.006","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.006","00:00:06.007","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.007","00:00:06.009","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.009","00:00:06.010","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.010","00:00:06.011","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.011","00:00:06.013","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.013","00:00:06.014","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.014","00:00:06.015","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.015","00:00:06.017","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.017","00:00:06.018","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.018","00:00:06.019","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.019","00:00:06.020","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.020","00:00:06.022","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.022","00:00:06.023","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.023","00:00:06.024","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.024","00:00:06.026","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.026","00:00:06.027","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.027","00:00:06.028","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.028","00:00:06.030","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.030","00:00:06.031","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.031","00:00:06.032","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.032","00:00:06.034","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.034","00:00:06.035","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.035","00:00:06.036","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.036","00:00:06.038","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.038","00:00:06.039","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.039","00:00:06.040","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.040","00:00:06.041","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.041","00:00:06.043","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.043","00:00:06.044","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.044","00:00:06.045","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.045","00:00:06.047","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.047","00:00:06.048","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.048","00:00:06.049","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.049","00:00:06.050","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.050","00:00:06.052","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.052","00:00:06.053","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.053","00:00:06.054","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.054","00:00:06.055","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.055","00:00:06.057","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.057","00:00:06.058","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.058","00:00:06.059","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.059","00:00:06.061","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.061","00:00:06.062","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.062","00:00:06.063","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.063","00:00:06.065","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.065","00:00:06.066","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.066","00:00:06.067","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.067","00:00:06.068","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.068","00:00:06.070","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.070","00:00:06.071","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.071","00:00:06.072","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.072","00:00:06.073","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.073","00:00:06.075","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.075","00:00:06.076","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.076","00:00:06.077","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.077","00:00:06.079","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.079","00:00:06.080","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.080","00:00:06.081","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.081","00:00:06.082","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.082","00:00:06.084","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.084","00:00:06.085","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.085","00:00:06.086","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.086","00:00:06.088","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.088","00:00:06.089","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.089","00:00:06.090","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.090","00:00:06.092","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.092","00:00:06.093","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.093","00:00:06.094","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.094","00:00:06.095","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.095","00:00:06.097","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.097","00:00:06.098","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.098","00:00:06.099","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.099","00:00:06.100","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.100","00:00:06.102","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.102","00:00:06.103","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.103","00:00:06.104","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.104","00:00:06.106","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.106","00:00:06.107","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.107","00:00:06.108","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.108","00:00:06.110","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.110","00:00:06.111","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.111","00:00:06.112","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.112","00:00:06.113","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.113","00:00:06.115","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.115","00:00:06.116","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.116","00:00:06.117","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.117","00:00:06.118","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.118","00:00:06.120","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.120","00:00:06.121","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.121","00:00:06.122","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.122","00:00:06.124","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.124","00:00:06.125","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.125","00:00:06.126","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.126","00:00:06.128","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.128","00:00:06.130","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.130","00:00:06.132","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.132","00:00:06.133","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.133","00:00:06.135","0.001s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.135","00:00:06.136","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:06.136","00:00:06.137","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.492","00:00:05.493","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.493","00:00:05.494","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.494","00:00:05.495","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.495","00:00:05.497","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.497","00:00:05.498","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.498","00:00:05.499","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.499","00:00:05.500","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.500","00:00:05.501","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.501","00:00:05.503","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.503","00:00:05.504","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.504","00:00:05.505","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.505","00:00:05.506","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.506","00:00:05.507","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.507","00:00:05.508","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.508","00:00:05.509","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.509","00:00:05.511","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.511","00:00:05.512","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.512","00:00:05.513","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.513","00:00:05.514","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.514","00:00:05.515","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.515","00:00:05.517","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.517","00:00:05.518","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.518","00:00:05.519","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.519","00:00:05.520","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.520","00:00:05.521","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.521","00:00:05.522","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.522","00:00:05.524","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.524","00:00:05.525","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.525","00:00:05.526","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.526","00:00:05.527","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.527","00:00:05.528","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.528","00:00:05.529","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.529","00:00:05.531","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.531","00:00:05.532","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.532","00:00:05.533","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.533","00:00:05.534","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.534","00:00:05.535","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.535","00:00:05.537","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.537","00:00:05.538","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.538","00:00:05.540","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.540","00:00:05.540","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.540","00:00:05.541","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.541","00:00:05.542","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.542","00:00:05.544","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.544","00:00:05.545","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.545","00:00:05.546","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.546","00:00:05.547","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.547","00:00:05.549","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.549","00:00:05.550","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.550","00:00:05.551","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.551","00:00:05.552","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.552","00:00:05.553","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.553","00:00:05.555","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.555","00:00:05.556","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.556","00:00:05.557","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.557","00:00:05.558","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.558","00:00:05.559","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.559","00:00:05.560","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.560","00:00:05.562","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.562","00:00:05.563","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.563","00:00:05.564","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.564","00:00:05.565","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.565","00:00:05.566","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.566","00:00:05.568","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.568","00:00:05.569","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.569","00:00:05.570","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.570","00:00:05.571","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.571","00:00:05.572","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.572","00:00:05.573","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.573","00:00:05.575","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.575","00:00:05.576","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.576","00:00:05.577","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.577","00:00:05.578","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.578","00:00:05.579","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.579","00:00:05.581","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.581","00:00:05.582","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.582","00:00:05.583","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.583","00:00:05.584","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.584","00:00:05.585","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.585","00:00:05.587","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.587","00:00:05.588","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.588","00:00:05.589","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.589","00:00:05.590","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.590","00:00:05.592","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.592","00:00:05.593","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.596","00:00:05.597","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.597","00:00:05.599","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.599","00:00:05.601","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.601","00:00:05.602","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.602","00:00:05.603","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.603","00:00:05.604","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.604","00:00:05.606","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.606","00:00:05.607","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.607","00:00:05.608","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.608","00:00:05.609","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.609","00:00:05.610","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.610","00:00:05.612","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.612","00:00:05.613","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.613","00:00:05.614","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.614","00:00:05.615","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.615","00:00:05.616","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.616","00:00:05.618","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.618","00:00:05.620","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.620","00:00:05.622","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.622","00:00:05.622","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.622","00:00:05.624","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.624","00:00:05.625","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.625","00:00:05.626","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.626","00:00:05.628","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.628","00:00:05.629","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.629","00:00:05.630","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.630","00:00:05.632","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.632","00:00:05.633","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.633","00:00:05.634","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.634","00:00:05.636","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.636","00:00:05.637","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.637","00:00:05.639","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.639","00:00:05.640","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.640","00:00:05.642","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.642","00:00:05.643","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.643","00:00:05.644","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.644","00:00:05.646","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.646","00:00:05.647","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.647","00:00:05.648","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.648","00:00:05.650","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.650","00:00:05.651","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.651","00:00:05.652","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.652","00:00:05.654","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.654","00:00:05.655","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.655","00:00:05.657","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.657","00:00:05.658","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.658","00:00:05.659","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.659","00:00:05.661","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.661","00:00:05.662","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.662","00:00:05.664","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.664","00:00:05.665","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.665","00:00:05.666","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.666","00:00:05.668","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.668","00:00:05.669","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.669","00:00:05.671","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.671","00:00:05.673","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.673","00:00:05.674","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.674","00:00:05.676","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.676","00:00:05.677","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.677","00:00:05.678","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.678","00:00:05.680","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.680","00:00:05.682","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.682","00:00:05.682","0.000s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.682","00:00:05.683","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.683","00:00:05.685","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.685","00:00:05.687","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.687","00:00:05.688","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.688","00:00:05.690","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.690","00:00:05.691","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.691","00:00:05.692","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.692","00:00:05.694","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.694","00:00:05.695","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.695","00:00:05.696","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.696","00:00:05.697","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.697","00:00:05.699","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.699","00:00:05.700","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.700","00:00:05.702","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.702","00:00:05.703","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.703","00:00:05.704","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.704","00:00:05.706","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.706","00:00:05.707","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.707","00:00:05.708","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.708","00:00:05.710","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.710","00:00:05.711","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.711","00:00:05.713","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.713","00:00:05.714","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.714","00:00:05.715","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.715","00:00:05.717","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.717","00:00:05.718","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.718","00:00:05.719","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.719","00:00:05.721","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.721","00:00:05.723","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.729","00:00:05.731","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.731","00:00:05.732","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.732","00:00:05.733","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.733","00:00:05.735","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.735","00:00:05.736","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.736","00:00:05.738","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.738","00:00:05.739","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.739","00:00:05.740","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.740","00:00:05.742","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.742","00:00:05.743","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.743","00:00:05.744","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.744","00:00:05.745","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.745","00:00:05.747","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.747","00:00:05.748","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.748","00:00:05.750","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.750","00:00:05.751","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.751","00:00:05.753","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.758","00:00:05.760","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.760","00:00:05.760","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.760","00:00:05.762","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.762","00:00:05.763","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.763","00:00:05.765","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.765","00:00:05.766","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.766","00:00:05.767","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.767","00:00:05.769","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.769","00:00:05.770","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.770","00:00:05.772","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.772","00:00:05.773","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.773","00:00:05.774","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.774","00:00:05.776","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.776","00:00:05.777","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.777","00:00:05.778","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.778","00:00:05.780","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.780","00:00:05.781","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.781","00:00:05.783","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.783","00:00:05.784","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.784","00:00:05.785","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.785","00:00:05.787","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.787","00:00:05.788","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.788","00:00:05.789","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.789","00:00:05.791","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.791","00:00:05.792","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.792","00:00:05.794","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.794","00:00:05.795","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.795","00:00:05.796","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.796","00:00:05.798","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.798","00:00:05.799","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.799","00:00:05.801","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.801","00:00:05.802","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.802","00:00:05.803","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.803","00:00:05.805","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.805","00:00:05.806","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.806","00:00:05.808","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.808","00:00:05.809","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.809","00:00:05.810","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.810","00:00:05.812","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.812","00:00:05.813","0.001s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.813","00:00:05.815","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.552","00:00:05.552","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.552","00:00:05.553","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.553","00:00:05.555","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.555","00:00:05.556","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.556","00:00:05.557","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.557","00:00:05.558","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.558","00:00:05.559","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.559","00:00:05.561","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.561","00:00:05.562","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.562","00:00:05.563","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.563","00:00:05.564","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.564","00:00:05.565","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.565","00:00:05.566","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.566","00:00:05.568","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.568","00:00:05.569","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.569","00:00:05.570","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.570","00:00:05.571","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.571","00:00:05.572","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.572","00:00:05.573","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.573","00:00:05.575","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.575","00:00:05.576","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.576","00:00:05.577","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.577","00:00:05.578","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.578","00:00:05.579","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.579","00:00:05.581","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.581","00:00:05.582","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.582","00:00:05.583","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.583","00:00:05.585","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.585","00:00:05.585","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.585","00:00:05.587","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.587","00:00:05.588","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.588","00:00:05.589","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.589","00:00:05.590","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.590","00:00:05.592","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.592","00:00:05.593","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.596","00:00:05.597","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.597","00:00:05.599","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.599","00:00:05.601","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.601","00:00:05.602","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.602","00:00:05.603","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.603","00:00:05.604","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.604","00:00:05.606","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.606","00:00:05.607","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.607","00:00:05.608","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.608","00:00:05.609","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.609","00:00:05.610","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.610","00:00:05.612","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.612","00:00:05.613","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.613","00:00:05.614","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.614","00:00:05.615","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.615","00:00:05.616","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.616","00:00:05.618","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.618","00:00:05.620","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.620","00:00:05.622","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.622","00:00:05.622","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.622","00:00:05.624","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.624","00:00:05.625","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.625","00:00:05.626","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.626","00:00:05.628","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.628","00:00:05.629","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.629","00:00:05.630","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.630","00:00:05.632","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.632","00:00:05.633","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.633","00:00:05.634","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.634","00:00:05.636","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.636","00:00:05.637","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.637","00:00:05.639","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.639","00:00:05.640","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.640","00:00:05.642","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.642","00:00:05.643","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.643","00:00:05.644","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.644","00:00:05.646","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.646","00:00:05.647","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.647","00:00:05.648","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.648","00:00:05.650","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.650","00:00:05.651","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.651","00:00:05.653","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.653","00:00:05.654","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.654","00:00:05.655","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.655","00:00:05.657","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.657","00:00:05.658","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.658","00:00:05.659","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.659","00:00:05.661","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.661","00:00:05.662","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.662","00:00:05.664","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.664","00:00:05.665","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.665","00:00:05.666","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.666","00:00:05.668","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.668","00:00:05.669","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.669","00:00:05.671","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.671","00:00:05.673","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.673","00:00:05.674","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.674","00:00:05.676","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.676","00:00:05.677","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.677","00:00:05.678","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.678","00:00:05.680","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.680","00:00:05.682","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.682","00:00:05.682","0.000s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.682","00:00:05.683","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.683","00:00:05.685","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.685","00:00:05.687","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.687","00:00:05.688","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.688","00:00:05.690","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.690","00:00:05.691","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.691","00:00:05.692","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.692","00:00:05.694","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.694","00:00:05.695","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.695","00:00:05.696","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.696","00:00:05.697","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.697","00:00:05.699","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.699","00:00:05.700","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.700","00:00:05.702","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.702","00:00:05.703","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.703","00:00:05.704","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.704","00:00:05.706","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.706","00:00:05.707","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.707","00:00:05.709","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.709","00:00:05.710","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.710","00:00:05.711","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.711","00:00:05.713","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.713","00:00:05.714","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.714","00:00:05.715","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.715","00:00:05.717","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.717","00:00:05.718","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.718","00:00:05.719","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.719","00:00:05.721","0.001s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.721","00:00:05.723","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.584","00:00:05.585","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.585","00:00:05.587","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.587","00:00:05.588","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.588","00:00:05.589","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.589","00:00:05.590","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.590","00:00:05.592","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.592","00:00:05.593","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.596","00:00:05.597","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.597","00:00:05.599","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.599","00:00:05.601","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.601","00:00:05.602","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.602","00:00:05.603","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.603","00:00:05.604","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.604","00:00:05.606","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.606","00:00:05.607","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.607","00:00:05.608","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.608","00:00:05.609","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.609","00:00:05.610","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.610","00:00:05.612","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.612","00:00:05.613","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.613","00:00:05.614","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.614","00:00:05.615","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.615","00:00:05.616","0.001s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.616","00:00:05.618","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.622","00:00:05.622","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.622","00:00:05.624","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.624","00:00:05.625","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.625","00:00:05.626","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.626","00:00:05.628","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.628","00:00:05.629","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.629","00:00:05.630","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.630","00:00:05.632","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.632","00:00:05.633","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.633","00:00:05.634","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.634","00:00:05.636","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.636","00:00:05.637","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.637","00:00:05.639","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.639","00:00:05.640","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.640","00:00:05.642","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.642","00:00:05.643","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.643","00:00:05.644","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.644","00:00:05.646","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.646","00:00:05.647","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.647","00:00:05.648","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.648","00:00:05.650","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.650","00:00:05.651","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.651","00:00:05.652","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.652","00:00:05.654","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.654","00:00:05.655","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.655","00:00:05.657","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.657","00:00:05.658","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.658","00:00:05.659","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.659","00:00:05.661","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.661","00:00:05.662","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.662","00:00:05.664","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.664","00:00:05.665","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.665","00:00:05.666","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.666","00:00:05.668","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.668","00:00:05.669","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.669","00:00:05.671","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.671","00:00:05.673","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.673","00:00:05.674","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.674","00:00:05.675","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.675","00:00:05.677","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.677","00:00:05.678","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.678","00:00:05.680","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.680","00:00:05.682","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.682","00:00:05.682","0.000s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.682","00:00:05.683","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.683","00:00:05.685","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.685","00:00:05.687","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.687","00:00:05.688","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.688","00:00:05.690","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.690","00:00:05.691","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.691","00:00:05.692","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.692","00:00:05.694","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.694","00:00:05.695","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.695","00:00:05.696","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.696","00:00:05.697","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.697","00:00:05.699","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.699","00:00:05.700","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.700","00:00:05.701","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.701","00:00:05.703","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.703","00:00:05.704","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.704","00:00:05.706","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.706","00:00:05.707","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.707","00:00:05.708","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.708","00:00:05.710","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.710","00:00:05.711","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.711","00:00:05.713","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.713","00:00:05.714","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.714","00:00:05.715","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.715","00:00:05.717","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.717","00:00:05.718","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.718","00:00:05.719","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.719","00:00:05.721","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.721","00:00:05.723","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.729","00:00:05.731","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.731","00:00:05.732","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.732","00:00:05.733","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.733","00:00:05.735","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.735","00:00:05.736","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.736","00:00:05.738","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.738","00:00:05.739","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.739","00:00:05.740","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.740","00:00:05.742","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.742","00:00:05.743","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.743","00:00:05.744","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.744","00:00:05.745","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.745","00:00:05.747","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.747","00:00:05.748","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.748","00:00:05.750","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.750","00:00:05.751","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.751","00:00:05.753","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.758","00:00:05.760","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.760","00:00:05.760","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.760","00:00:05.762","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.762","00:00:05.763","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.763","00:00:05.765","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.765","00:00:05.766","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.766","00:00:05.767","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.767","00:00:05.769","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.769","00:00:05.770","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.770","00:00:05.772","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.772","00:00:05.773","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.773","00:00:05.774","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.774","00:00:05.776","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.776","00:00:05.777","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.777","00:00:05.778","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.778","00:00:05.780","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.780","00:00:05.781","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.781","00:00:05.783","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.783","00:00:05.784","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.784","00:00:05.785","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.785","00:00:05.787","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.787","00:00:05.788","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.788","00:00:05.789","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.789","00:00:05.791","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.791","00:00:05.792","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.792","00:00:05.794","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.794","00:00:05.795","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.795","00:00:05.796","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.796","00:00:05.798","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.798","00:00:05.799","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.799","00:00:05.801","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.801","00:00:05.802","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.802","00:00:05.803","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.803","00:00:05.805","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.805","00:00:05.806","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.806","00:00:05.808","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.808","00:00:05.809","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.809","00:00:05.810","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.810","00:00:05.812","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.812","00:00:05.813","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.813","00:00:05.815","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.815","00:00:05.816","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.816","00:00:05.817","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.817","00:00:05.819","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.819","00:00:05.820","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.820","00:00:05.822","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.822","00:00:05.823","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.823","00:00:05.825","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.825","00:00:05.826","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.826","00:00:05.827","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.827","00:00:05.829","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.829","00:00:05.830","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.830","00:00:05.832","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.832","00:00:05.833","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.833","00:00:05.835","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.835","00:00:05.836","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.836","00:00:05.838","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.838","00:00:05.839","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.839","00:00:05.840","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.840","00:00:05.842","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.842","00:00:05.843","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.843","00:00:05.845","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.845","00:00:05.846","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.846","00:00:05.847","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.847","00:00:05.849","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.849","00:00:05.850","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.850","00:00:05.852","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.852","00:00:05.853","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.853","00:00:05.855","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.855","00:00:05.856","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.856","00:00:05.858","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.858","00:00:05.859","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.859","00:00:05.860","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.860","00:00:05.862","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.862","00:00:05.863","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.863","00:00:05.865","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.865","00:00:05.866","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.866","00:00:05.868","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.868","00:00:05.869","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.869","00:00:05.870","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.870","00:00:05.872","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.872","00:00:05.873","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.873","00:00:05.875","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.875","00:00:05.876","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.876","00:00:05.878","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.878","00:00:05.879","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.879","00:00:05.880","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.880","00:00:05.882","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.882","00:00:05.883","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.883","00:00:05.885","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.885","00:00:05.886","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.886","00:00:05.888","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.888","00:00:05.889","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.889","00:00:05.891","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.891","00:00:05.892","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.892","00:00:05.893","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.893","00:00:05.895","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.895","00:00:05.896","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.896","00:00:05.898","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.898","00:00:05.899","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.899","00:00:05.900","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.900","00:00:05.902","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.902","00:00:05.903","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.903","00:00:05.905","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.905","00:00:05.906","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.906","00:00:05.908","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.908","00:00:05.909","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.909","00:00:05.911","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.911","00:00:05.912","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.912","00:00:05.913","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.913","00:00:05.915","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.915","00:00:05.916","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.916","00:00:05.918","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.918","00:00:05.919","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.919","00:00:05.921","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.921","00:00:05.922","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.922","00:00:05.924","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.924","00:00:05.925","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.925","00:00:05.926","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.926","00:00:05.928","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.928","00:00:05.929","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.929","00:00:05.931","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.931","00:00:05.932","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.932","00:00:05.934","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.934","00:00:05.935","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.935","00:00:05.936","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.936","00:00:05.938","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.938","00:00:05.939","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.939","00:00:05.941","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.941","00:00:05.942","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.942","00:00:05.944","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.944","00:00:05.945","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.945","00:00:05.946","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.946","00:00:05.948","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.948","00:00:05.949","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.949","00:00:05.951","0.001s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.951","00:00:05.952","0.002s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.952","00:00:05.954","0.002s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.731","00:00:05.732","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.732","00:00:05.733","0.002s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.733","00:00:05.735","0.002s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.735","00:00:05.736","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.736","00:00:05.738","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.738","00:00:05.739","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.739","00:00:05.740","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.740","00:00:05.742","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.742","00:00:05.743","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.743","00:00:05.744","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.744","00:00:05.745","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.745","00:00:05.747","0.002s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.747","00:00:05.748","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.748","00:00:05.750","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.750","00:00:05.751","0.001s"],[336,17,"00:00:05.679","00:00:05.753","00:00:05.751","00:00:05.753","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.760","00:00:05.760","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.760","00:00:05.762","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.762","00:00:05.763","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.763","00:00:05.765","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.765","00:00:05.766","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.766","00:00:05.767","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.767","00:00:05.769","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.769","00:00:05.770","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.770","00:00:05.772","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.772","00:00:05.773","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.773","00:00:05.774","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.774","00:00:05.776","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.776","00:00:05.777","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.777","00:00:05.778","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.778","00:00:05.780","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.780","00:00:05.781","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.781","00:00:05.782","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.782","00:00:05.784","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.784","00:00:05.785","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.785","00:00:05.787","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.787","00:00:05.788","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.788","00:00:05.789","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.789","00:00:05.791","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.791","00:00:05.792","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.792","00:00:05.793","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.793","00:00:05.795","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.795","00:00:05.796","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.796","00:00:05.798","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.798","00:00:05.799","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.799","00:00:05.800","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.800","00:00:05.802","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.802","00:00:05.803","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.803","00:00:05.805","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.805","00:00:05.806","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.806","00:00:05.808","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.808","00:00:05.809","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.809","00:00:05.810","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.810","00:00:05.812","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.812","00:00:05.813","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.813","00:00:05.815","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.815","00:00:05.816","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.816","00:00:05.817","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.817","00:00:05.819","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.819","00:00:05.820","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.820","00:00:05.822","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.822","00:00:05.823","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.823","00:00:05.825","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.825","00:00:05.826","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.826","00:00:05.827","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.827","00:00:05.829","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.829","00:00:05.830","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.830","00:00:05.832","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.832","00:00:05.833","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.833","00:00:05.835","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.835","00:00:05.836","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.836","00:00:05.838","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.838","00:00:05.839","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.839","00:00:05.840","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.840","00:00:05.842","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.842","00:00:05.843","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.843","00:00:05.845","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.845","00:00:05.846","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.846","00:00:05.847","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.847","00:00:05.849","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.849","00:00:05.850","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.850","00:00:05.852","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.852","00:00:05.853","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.853","00:00:05.855","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.855","00:00:05.856","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.856","00:00:05.858","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.858","00:00:05.859","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.859","00:00:05.860","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.860","00:00:05.862","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.862","00:00:05.863","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.863","00:00:05.865","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.865","00:00:05.866","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.866","00:00:05.868","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.868","00:00:05.869","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.869","00:00:05.870","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.870","00:00:05.872","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.872","00:00:05.873","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.873","00:00:05.875","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.875","00:00:05.876","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.876","00:00:05.878","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.878","00:00:05.879","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.879","00:00:05.880","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.880","00:00:05.882","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.882","00:00:05.883","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.883","00:00:05.885","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.885","00:00:05.886","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.886","00:00:05.888","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.888","00:00:05.889","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.889","00:00:05.891","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.891","00:00:05.892","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.892","00:00:05.893","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.893","00:00:05.895","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.895","00:00:05.896","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.896","00:00:05.898","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.898","00:00:05.899","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.899","00:00:05.900","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.900","00:00:05.902","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.902","00:00:05.903","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.903","00:00:05.905","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.905","00:00:05.906","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.906","00:00:05.908","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.908","00:00:05.909","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.909","00:00:05.911","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.911","00:00:05.912","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.912","00:00:05.913","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.913","00:00:05.915","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.915","00:00:05.916","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.916","00:00:05.918","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.918","00:00:05.919","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.919","00:00:05.921","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.921","00:00:05.922","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.922","00:00:05.924","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.924","00:00:05.925","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.925","00:00:05.926","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.926","00:00:05.928","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.928","00:00:05.929","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.929","00:00:05.931","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.931","00:00:05.932","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.932","00:00:05.934","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.934","00:00:05.935","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.935","00:00:05.936","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.936","00:00:05.938","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.938","00:00:05.939","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.939","00:00:05.941","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.941","00:00:05.942","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.942","00:00:05.944","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.944","00:00:05.945","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.945","00:00:05.946","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.946","00:00:05.948","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.948","00:00:05.949","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.949","00:00:05.951","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.951","00:00:05.952","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.952","00:00:05.954","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.954","00:00:05.955","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.955","00:00:05.956","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.956","00:00:05.957","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.957","00:00:05.959","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.959","00:00:05.960","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.960","00:00:05.961","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.961","00:00:05.963","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.963","00:00:05.964","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.964","00:00:05.965","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.965","00:00:05.966","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.966","00:00:05.968","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.968","00:00:05.969","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.969","00:00:05.970","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.970","00:00:05.972","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.972","00:00:05.973","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.973","00:00:05.974","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.974","00:00:05.976","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.976","00:00:05.977","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.977","00:00:05.978","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.978","00:00:05.980","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.980","00:00:05.981","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.981","00:00:05.982","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.982","00:00:05.984","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.984","00:00:05.985","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.985","00:00:05.986","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.986","00:00:05.987","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.987","00:00:05.989","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.989","00:00:05.990","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.990","00:00:05.991","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.991","00:00:05.993","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.993","00:00:05.994","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.994","00:00:05.995","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.995","00:00:05.997","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.997","00:00:05.998","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.998","00:00:05.999","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:05.999","00:00:06.001","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.001","00:00:06.002","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.002","00:00:06.003","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.003","00:00:06.005","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.005","00:00:06.006","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.006","00:00:06.007","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.007","00:00:06.009","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.009","00:00:06.010","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.010","00:00:06.011","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.011","00:00:06.013","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.013","00:00:06.014","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.014","00:00:06.015","0.002s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.015","00:00:06.017","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.017","00:00:06.018","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.018","00:00:06.019","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.019","00:00:06.020","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.020","00:00:06.022","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.022","00:00:06.023","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.023","00:00:06.024","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.024","00:00:06.026","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.026","00:00:06.027","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.027","00:00:06.028","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.028","00:00:06.030","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.030","00:00:06.031","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.031","00:00:06.032","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.032","00:00:06.034","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.034","00:00:06.035","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.035","00:00:06.036","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.036","00:00:06.038","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.038","00:00:06.039","0.001s"],[6,204,"00:00:05.742","00:00:06.040","00:00:06.039","00:00:06.040","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.243","00:00:06.244","0.000s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.244","00:00:06.245","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.245","00:00:06.246","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.250","00:00:06.250","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.250","00:00:06.251","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.251","00:00:06.252","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.252","00:00:06.254","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.254","00:00:06.255","0.001s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.255","00:00:06.256","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.250","00:00:06.251","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.251","00:00:06.252","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.252","00:00:06.254","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.254","00:00:06.255","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.255","00:00:06.256","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.256","00:00:06.257","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.257","00:00:06.258","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.258","00:00:06.260","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.260","00:00:06.261","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.261","00:00:06.263","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.263","00:00:06.265","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.265","00:00:06.267","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.267","00:00:06.268","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.268","00:00:06.269","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.269","00:00:06.270","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.274","00:00:06.275","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.275","00:00:06.276","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.276","00:00:06.276","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.276","00:00:06.277","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.277","00:00:06.278","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.278","00:00:06.280","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.280","00:00:06.281","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.281","00:00:06.282","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.282","00:00:06.283","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.283","00:00:06.284","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.284","00:00:06.285","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.285","00:00:06.286","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.286","00:00:06.287","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.287","00:00:06.288","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.288","00:00:06.289","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.289","00:00:06.290","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.295","00:00:06.296","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.296","00:00:06.296","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.296","00:00:06.297","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.297","00:00:06.298","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.298","00:00:06.300","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.300","00:00:06.301","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.301","00:00:06.302","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.302","00:00:06.303","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.303","00:00:06.304","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.304","00:00:06.305","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.305","00:00:06.306","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.306","00:00:06.307","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.307","00:00:06.309","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.309","00:00:06.310","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.310","00:00:06.311","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.311","00:00:06.312","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.312","00:00:06.313","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.313","00:00:06.314","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.314","00:00:06.315","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.315","00:00:06.316","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.316","00:00:06.318","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.318","00:00:06.319","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.319","00:00:06.320","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.320","00:00:06.321","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.321","00:00:06.322","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.322","00:00:06.323","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.323","00:00:06.324","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.324","00:00:06.326","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.326","00:00:06.327","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.327","00:00:06.328","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.328","00:00:06.329","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.329","00:00:06.330","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.330","00:00:06.331","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.331","00:00:06.332","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.332","00:00:06.333","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.333","00:00:06.335","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.335","00:00:06.336","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.336","00:00:06.337","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.337","00:00:06.338","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.338","00:00:06.339","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.339","00:00:06.340","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.340","00:00:06.342","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.342","00:00:06.343","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.343","00:00:06.344","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.344","00:00:06.345","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.345","00:00:06.346","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.346","00:00:06.348","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.348","00:00:06.349","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.349","00:00:06.350","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.350","00:00:06.351","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.351","00:00:06.352","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.352","00:00:06.353","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.353","00:00:06.354","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.354","00:00:06.356","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.356","00:00:06.357","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.357","00:00:06.358","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.358","00:00:06.359","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.359","00:00:06.360","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.360","00:00:06.361","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.361","00:00:06.363","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.363","00:00:06.364","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.364","00:00:06.365","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.365","00:00:06.366","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.366","00:00:06.367","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.367","00:00:06.368","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.368","00:00:06.370","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.370","00:00:06.371","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.371","00:00:06.372","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.372","00:00:06.373","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.373","00:00:06.374","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.374","00:00:06.375","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.375","00:00:06.377","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.377","00:00:06.378","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.378","00:00:06.379","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.379","00:00:06.380","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.380","00:00:06.381","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.381","00:00:06.382","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.382","00:00:06.384","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.384","00:00:06.385","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.385","00:00:06.386","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.386","00:00:06.387","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.387","00:00:06.388","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.388","00:00:06.390","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.390","00:00:06.391","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.391","00:00:06.392","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.392","00:00:06.393","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.393","00:00:06.394","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.394","00:00:06.395","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.395","00:00:06.397","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.397","00:00:06.398","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.398","00:00:06.399","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.399","00:00:06.400","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.400","00:00:06.401","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.401","00:00:06.402","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.402","00:00:06.403","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.403","00:00:06.405","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.405","00:00:06.406","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.406","00:00:06.407","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.409","00:00:06.411","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.411","00:00:06.411","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.411","00:00:06.413","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.413","00:00:06.414","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.414","00:00:06.416","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.416","00:00:06.417","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.417","00:00:06.418","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.418","00:00:06.420","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.420","00:00:06.421","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.421","00:00:06.423","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.423","00:00:06.424","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.424","00:00:06.425","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.425","00:00:06.426","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.426","00:00:06.428","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.428","00:00:06.429","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.429","00:00:06.431","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.431","00:00:06.432","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.432","00:00:06.433","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.433","00:00:06.435","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.435","00:00:06.436","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.436","00:00:06.438","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.438","00:00:06.439","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.439","00:00:06.441","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.441","00:00:06.442","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.442","00:00:06.443","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.443","00:00:06.445","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.445","00:00:06.446","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.446","00:00:06.447","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.447","00:00:06.449","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.449","00:00:06.450","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.450","00:00:06.451","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.451","00:00:06.453","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.453","00:00:06.454","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.454","00:00:06.456","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.456","00:00:06.457","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.457","00:00:06.458","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.458","00:00:06.460","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.460","00:00:06.461","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.461","00:00:06.462","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.462","00:00:06.464","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.464","00:00:06.465","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.465","00:00:06.467","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.467","00:00:06.468","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.468","00:00:06.469","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.469","00:00:06.471","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.471","00:00:06.472","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.472","00:00:06.474","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.474","00:00:06.475","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.475","00:00:06.476","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.476","00:00:06.478","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.478","00:00:06.479","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.479","00:00:06.480","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.480","00:00:06.482","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.482","00:00:06.483","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.483","00:00:06.485","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.485","00:00:06.486","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.486","00:00:06.487","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.487","00:00:06.489","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.489","00:00:06.490","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.490","00:00:06.492","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.492","00:00:06.493","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.493","00:00:06.494","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.494","00:00:06.496","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.496","00:00:06.497","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.497","00:00:06.499","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.499","00:00:06.500","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.500","00:00:06.501","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.501","00:00:06.503","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.503","00:00:06.504","0.002s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.511","00:00:06.512","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.512","00:00:06.513","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.513","00:00:06.514","0.001s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.514","00:00:06.516","0.002s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.275","00:00:06.276","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.276","00:00:06.276","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.276","00:00:06.277","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.277","00:00:06.278","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.278","00:00:06.280","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.280","00:00:06.281","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.281","00:00:06.282","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.282","00:00:06.283","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.283","00:00:06.284","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.284","00:00:06.285","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.285","00:00:06.286","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.286","00:00:06.287","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.287","00:00:06.288","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.288","00:00:06.289","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.289","00:00:06.290","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.294","00:00:06.296","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.296","00:00:06.296","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.296","00:00:06.297","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.297","00:00:06.298","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.298","00:00:06.300","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.300","00:00:06.301","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.301","00:00:06.302","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.302","00:00:06.303","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.303","00:00:06.304","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.304","00:00:06.305","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.305","00:00:06.306","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.306","00:00:06.307","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.307","00:00:06.308","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.308","00:00:06.310","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.310","00:00:06.311","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.311","00:00:06.312","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.312","00:00:06.313","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.313","00:00:06.314","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.314","00:00:06.315","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.315","00:00:06.316","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.316","00:00:06.318","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.318","00:00:06.319","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.319","00:00:06.320","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.320","00:00:06.321","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.321","00:00:06.322","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.322","00:00:06.323","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.323","00:00:06.324","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.324","00:00:06.325","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.325","00:00:06.327","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.327","00:00:06.328","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.328","00:00:06.329","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.329","00:00:06.330","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.330","00:00:06.331","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.331","00:00:06.332","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.332","00:00:06.333","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.333","00:00:06.334","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.334","00:00:06.336","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.336","00:00:06.337","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.337","00:00:06.338","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.338","00:00:06.339","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.339","00:00:06.340","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.340","00:00:06.342","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.342","00:00:06.343","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.343","00:00:06.344","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.344","00:00:06.345","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.345","00:00:06.346","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.346","00:00:06.348","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.348","00:00:06.349","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.349","00:00:06.350","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.350","00:00:06.351","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.351","00:00:06.352","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.352","00:00:06.353","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.353","00:00:06.354","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.354","00:00:06.356","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.356","00:00:06.357","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.357","00:00:06.358","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.358","00:00:06.359","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.359","00:00:06.360","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.360","00:00:06.361","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.361","00:00:06.363","0.001s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.363","00:00:06.364","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.296","00:00:06.296","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.296","00:00:06.297","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.297","00:00:06.298","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.298","00:00:06.300","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.300","00:00:06.301","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.301","00:00:06.302","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.302","00:00:06.303","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.303","00:00:06.304","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.304","00:00:06.305","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.305","00:00:06.306","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.306","00:00:06.307","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.307","00:00:06.309","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.309","00:00:06.310","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.310","00:00:06.311","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.311","00:00:06.312","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.312","00:00:06.313","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.313","00:00:06.314","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.314","00:00:06.315","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.315","00:00:06.316","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.316","00:00:06.318","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.318","00:00:06.319","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.319","00:00:06.320","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.320","00:00:06.321","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.321","00:00:06.322","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.322","00:00:06.323","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.323","00:00:06.324","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.324","00:00:06.325","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.325","00:00:06.327","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.327","00:00:06.328","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.328","00:00:06.329","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.329","00:00:06.330","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.330","00:00:06.331","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.331","00:00:06.332","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.332","00:00:06.333","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.333","00:00:06.335","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.335","00:00:06.336","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.336","00:00:06.337","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.337","00:00:06.338","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.338","00:00:06.339","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.339","00:00:06.340","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.340","00:00:06.342","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.342","00:00:06.343","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.343","00:00:06.344","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.344","00:00:06.345","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.345","00:00:06.346","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.346","00:00:06.348","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.348","00:00:06.349","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.349","00:00:06.350","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.350","00:00:06.351","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.351","00:00:06.352","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.352","00:00:06.353","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.353","00:00:06.354","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.354","00:00:06.356","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.356","00:00:06.357","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.357","00:00:06.358","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.358","00:00:06.359","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.359","00:00:06.360","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.360","00:00:06.361","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.361","00:00:06.363","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.363","00:00:06.364","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.364","00:00:06.365","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.365","00:00:06.366","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.366","00:00:06.367","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.367","00:00:06.368","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.368","00:00:06.370","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.370","00:00:06.371","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.371","00:00:06.372","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.372","00:00:06.373","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.373","00:00:06.374","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.374","00:00:06.375","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.375","00:00:06.377","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.377","00:00:06.378","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.378","00:00:06.379","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.379","00:00:06.380","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.380","00:00:06.381","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.381","00:00:06.382","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.382","00:00:06.384","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.384","00:00:06.385","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.385","00:00:06.386","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.386","00:00:06.387","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.387","00:00:06.388","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.388","00:00:06.390","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.390","00:00:06.391","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.391","00:00:06.392","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.392","00:00:06.393","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.393","00:00:06.394","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.394","00:00:06.395","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.395","00:00:06.396","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.396","00:00:06.398","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.398","00:00:06.399","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.399","00:00:06.400","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.400","00:00:06.401","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.401","00:00:06.402","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.402","00:00:06.403","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.403","00:00:06.405","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.405","00:00:06.406","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.406","00:00:06.407","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.409","00:00:06.411","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.411","00:00:06.411","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.411","00:00:06.413","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.413","00:00:06.414","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.414","00:00:06.416","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.416","00:00:06.417","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.417","00:00:06.418","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.418","00:00:06.420","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.420","00:00:06.421","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.421","00:00:06.423","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.423","00:00:06.424","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.424","00:00:06.425","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.425","00:00:06.426","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.426","00:00:06.428","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.428","00:00:06.429","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.429","00:00:06.431","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.431","00:00:06.432","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.432","00:00:06.433","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.433","00:00:06.435","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.435","00:00:06.436","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.436","00:00:06.438","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.438","00:00:06.439","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.439","00:00:06.441","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.441","00:00:06.442","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.442","00:00:06.443","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.443","00:00:06.445","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.445","00:00:06.446","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.446","00:00:06.447","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.447","00:00:06.449","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.449","00:00:06.450","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.450","00:00:06.451","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.451","00:00:06.453","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.453","00:00:06.454","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.454","00:00:06.456","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.456","00:00:06.457","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.457","00:00:06.458","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.458","00:00:06.460","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.460","00:00:06.461","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.461","00:00:06.462","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.462","00:00:06.464","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.464","00:00:06.465","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.465","00:00:06.467","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.467","00:00:06.468","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.468","00:00:06.469","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.469","00:00:06.471","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.471","00:00:06.472","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.472","00:00:06.474","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.474","00:00:06.475","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.475","00:00:06.476","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.476","00:00:06.478","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.478","00:00:06.479","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.479","00:00:06.480","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.480","00:00:06.482","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.482","00:00:06.483","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.483","00:00:06.485","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.485","00:00:06.486","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.486","00:00:06.487","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.487","00:00:06.489","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.489","00:00:06.490","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.490","00:00:06.492","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.492","00:00:06.493","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.493","00:00:06.494","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.494","00:00:06.496","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.496","00:00:06.497","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.497","00:00:06.499","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.499","00:00:06.500","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.500","00:00:06.501","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.501","00:00:06.503","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.503","00:00:06.504","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.511","00:00:06.512","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.512","00:00:06.513","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.513","00:00:06.514","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.514","00:00:06.516","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.516","00:00:06.517","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.517","00:00:06.518","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.518","00:00:06.520","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.520","00:00:06.521","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.521","00:00:06.522","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.522","00:00:06.524","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.524","00:00:06.525","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.525","00:00:06.527","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.527","00:00:06.528","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.528","00:00:06.530","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.530","00:00:06.531","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.531","00:00:06.533","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.533","00:00:06.534","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.534","00:00:06.536","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.536","00:00:06.537","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.537","00:00:06.538","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.540","00:00:06.541","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.541","00:00:06.542","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.542","00:00:06.543","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.543","00:00:06.545","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.545","00:00:06.546","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.546","00:00:06.548","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.548","00:00:06.549","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.549","00:00:06.551","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.551","00:00:06.552","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.552","00:00:06.553","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.557","00:00:06.558","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.558","00:00:06.560","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.560","00:00:06.561","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.561","00:00:06.563","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.563","00:00:06.564","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.564","00:00:06.565","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.565","00:00:06.567","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.567","00:00:06.568","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.568","00:00:06.570","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.570","00:00:06.571","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.571","00:00:06.573","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.573","00:00:06.574","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.574","00:00:06.575","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.575","00:00:06.577","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.577","00:00:06.578","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.578","00:00:06.580","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.580","00:00:06.581","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.581","00:00:06.583","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.583","00:00:06.584","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.584","00:00:06.585","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.585","00:00:06.587","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.587","00:00:06.588","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.588","00:00:06.590","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.590","00:00:06.591","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.591","00:00:06.593","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.593","00:00:06.594","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.594","00:00:06.596","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.596","00:00:06.597","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.597","00:00:06.598","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.598","00:00:06.600","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.600","00:00:06.601","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.601","00:00:06.603","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.603","00:00:06.604","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.604","00:00:06.606","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.606","00:00:06.607","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.607","00:00:06.608","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.608","00:00:06.610","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.610","00:00:06.611","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.611","00:00:06.613","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.613","00:00:06.614","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.614","00:00:06.616","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.616","00:00:06.617","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.617","00:00:06.618","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.618","00:00:06.620","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.620","00:00:06.621","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.621","00:00:06.623","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.623","00:00:06.624","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.624","00:00:06.626","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.626","00:00:06.627","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.627","00:00:06.629","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.629","00:00:06.630","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.630","00:00:06.631","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.631","00:00:06.633","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.633","00:00:06.634","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.634","00:00:06.636","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.636","00:00:06.637","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.637","00:00:06.639","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.639","00:00:06.640","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.640","00:00:06.641","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.641","00:00:06.643","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.643","00:00:06.644","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.644","00:00:06.646","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.646","00:00:06.647","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.647","00:00:06.649","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.649","00:00:06.650","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.650","00:00:06.651","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.651","00:00:06.653","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.653","00:00:06.654","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.654","00:00:06.656","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.656","00:00:06.657","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.657","00:00:06.659","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.659","00:00:06.660","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.660","00:00:06.662","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.662","00:00:06.663","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.663","00:00:06.664","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.664","00:00:06.666","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.666","00:00:06.667","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.667","00:00:06.669","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.669","00:00:06.670","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.670","00:00:06.672","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.672","00:00:06.673","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.673","00:00:06.675","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.675","00:00:06.676","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.676","00:00:06.678","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.678","00:00:06.679","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.679","00:00:06.680","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.680","00:00:06.682","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.682","00:00:06.683","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.683","00:00:06.685","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.685","00:00:06.686","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.686","00:00:06.688","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.694","00:00:06.695","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.695","00:00:06.696","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.696","00:00:06.697","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.697","00:00:06.698","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.698","00:00:06.700","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.705","00:00:06.706","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.706","00:00:06.707","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.707","00:00:06.709","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.709","00:00:06.710","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.710","00:00:06.712","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.712","00:00:06.713","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.713","00:00:06.715","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.715","00:00:06.716","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.716","00:00:06.718","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.720","00:00:06.720","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.720","00:00:06.722","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.722","00:00:06.724","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.724","00:00:06.725","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.725","00:00:06.726","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.726","00:00:06.728","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.728","00:00:06.729","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.729","00:00:06.731","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.731","00:00:06.732","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.732","00:00:06.733","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.733","00:00:06.735","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.735","00:00:06.736","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.736","00:00:06.738","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.738","00:00:06.739","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.739","00:00:06.741","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.741","00:00:06.742","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.742","00:00:06.744","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.744","00:00:06.745","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.745","00:00:06.746","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.746","00:00:06.748","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.748","00:00:06.749","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.749","00:00:06.751","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.751","00:00:06.753","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.753","00:00:06.754","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.754","00:00:06.755","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.755","00:00:06.757","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.757","00:00:06.759","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.759","00:00:06.760","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.760","00:00:06.761","0.001s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.761","00:00:06.763","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.763","00:00:06.765","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.411","00:00:06.411","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.411","00:00:06.413","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.413","00:00:06.414","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.414","00:00:06.416","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.416","00:00:06.417","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.417","00:00:06.418","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.418","00:00:06.420","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.420","00:00:06.421","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.421","00:00:06.422","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.422","00:00:06.424","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.424","00:00:06.425","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.425","00:00:06.426","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.426","00:00:06.428","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.428","00:00:06.429","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.429","00:00:06.431","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.431","00:00:06.432","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.432","00:00:06.433","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.433","00:00:06.435","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.435","00:00:06.436","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.436","00:00:06.438","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.438","00:00:06.439","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.439","00:00:06.441","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.441","00:00:06.442","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.442","00:00:06.443","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.443","00:00:06.445","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.445","00:00:06.446","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.446","00:00:06.447","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.447","00:00:06.449","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.449","00:00:06.450","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.450","00:00:06.451","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.451","00:00:06.453","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.453","00:00:06.454","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.454","00:00:06.456","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.456","00:00:06.457","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.457","00:00:06.458","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.458","00:00:06.460","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.460","00:00:06.461","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.461","00:00:06.462","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.462","00:00:06.464","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.464","00:00:06.465","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.465","00:00:06.467","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.467","00:00:06.468","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.468","00:00:06.469","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.469","00:00:06.471","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.471","00:00:06.472","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.472","00:00:06.474","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.474","00:00:06.475","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.475","00:00:06.476","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.476","00:00:06.478","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.478","00:00:06.479","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.479","00:00:06.480","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.480","00:00:06.482","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.482","00:00:06.483","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.483","00:00:06.484","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.484","00:00:06.486","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.486","00:00:06.487","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.487","00:00:06.489","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.489","00:00:06.490","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.490","00:00:06.491","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.491","00:00:06.493","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.493","00:00:06.494","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.494","00:00:06.496","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.496","00:00:06.497","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.497","00:00:06.499","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.499","00:00:06.500","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.500","00:00:06.501","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.501","00:00:06.503","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.503","00:00:06.504","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.511","00:00:06.512","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.512","00:00:06.513","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.513","00:00:06.514","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.514","00:00:06.516","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.516","00:00:06.517","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.517","00:00:06.518","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.518","00:00:06.520","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.520","00:00:06.521","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.521","00:00:06.522","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.522","00:00:06.524","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.524","00:00:06.525","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.525","00:00:06.527","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.527","00:00:06.528","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.528","00:00:06.530","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.530","00:00:06.531","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.531","00:00:06.532","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.532","00:00:06.534","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.534","00:00:06.536","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.536","00:00:06.537","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.537","00:00:06.538","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.538","00:00:06.540","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.540","00:00:06.541","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.541","00:00:06.542","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.542","00:00:06.543","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.543","00:00:06.545","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.545","00:00:06.546","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.546","00:00:06.548","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.548","00:00:06.549","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.549","00:00:06.550","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.550","00:00:06.552","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.552","00:00:06.553","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.557","00:00:06.558","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.558","00:00:06.560","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.560","00:00:06.561","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.561","00:00:06.563","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.563","00:00:06.564","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.564","00:00:06.565","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.565","00:00:06.567","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.567","00:00:06.568","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.568","00:00:06.570","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.570","00:00:06.571","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.571","00:00:06.573","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.573","00:00:06.574","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.574","00:00:06.575","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.575","00:00:06.577","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.577","00:00:06.578","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.578","00:00:06.580","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.580","00:00:06.581","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.581","00:00:06.582","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.582","00:00:06.584","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.584","00:00:06.585","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.585","00:00:06.587","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.587","00:00:06.588","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.588","00:00:06.590","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.590","00:00:06.591","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.591","00:00:06.593","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.593","00:00:06.594","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.594","00:00:06.596","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.596","00:00:06.597","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.597","00:00:06.598","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.598","00:00:06.600","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.600","00:00:06.601","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.601","00:00:06.603","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.603","00:00:06.604","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.604","00:00:06.606","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.606","00:00:06.607","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.607","00:00:06.608","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.608","00:00:06.610","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.610","00:00:06.611","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.611","00:00:06.613","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.613","00:00:06.614","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.614","00:00:06.616","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.616","00:00:06.617","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.617","00:00:06.618","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.618","00:00:06.620","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.620","00:00:06.621","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.621","00:00:06.623","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.623","00:00:06.624","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.624","00:00:06.626","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.626","00:00:06.627","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.627","00:00:06.629","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.629","00:00:06.630","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.630","00:00:06.631","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.631","00:00:06.633","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.633","00:00:06.634","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.634","00:00:06.636","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.636","00:00:06.637","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.637","00:00:06.639","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.639","00:00:06.640","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.640","00:00:06.641","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.641","00:00:06.643","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.643","00:00:06.644","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.644","00:00:06.646","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.646","00:00:06.647","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.647","00:00:06.649","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.649","00:00:06.650","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.650","00:00:06.651","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.651","00:00:06.653","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.653","00:00:06.654","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.654","00:00:06.656","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.656","00:00:06.657","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.657","00:00:06.659","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.659","00:00:06.660","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.660","00:00:06.662","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.662","00:00:06.663","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.663","00:00:06.664","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.664","00:00:06.666","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.666","00:00:06.667","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.667","00:00:06.669","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.669","00:00:06.670","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.670","00:00:06.672","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.672","00:00:06.673","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.673","00:00:06.675","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.675","00:00:06.676","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.676","00:00:06.678","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.678","00:00:06.679","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.679","00:00:06.680","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.680","00:00:06.682","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.682","00:00:06.683","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.683","00:00:06.685","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.685","00:00:06.686","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.686","00:00:06.688","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.694","00:00:06.695","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.695","00:00:06.696","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.696","00:00:06.697","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.697","00:00:06.698","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.698","00:00:06.700","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.705","00:00:06.706","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.706","00:00:06.707","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.707","00:00:06.709","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.709","00:00:06.710","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.710","00:00:06.712","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.712","00:00:06.713","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.713","00:00:06.715","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.715","00:00:06.716","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.716","00:00:06.718","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.720","00:00:06.720","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.720","00:00:06.722","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.722","00:00:06.723","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.723","00:00:06.725","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.725","00:00:06.726","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.726","00:00:06.728","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.728","00:00:06.729","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.729","00:00:06.731","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.731","00:00:06.732","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.732","00:00:06.733","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.733","00:00:06.735","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.735","00:00:06.736","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.736","00:00:06.738","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.738","00:00:06.739","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.739","00:00:06.741","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.741","00:00:06.742","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.742","00:00:06.744","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.744","00:00:06.745","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.745","00:00:06.746","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.746","00:00:06.748","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.748","00:00:06.749","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.749","00:00:06.751","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.751","00:00:06.752","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.752","00:00:06.754","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.754","00:00:06.755","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.755","00:00:06.757","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.757","00:00:06.758","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.758","00:00:06.760","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.760","00:00:06.761","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.761","00:00:06.763","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.763","00:00:06.765","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.769","00:00:06.770","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.770","00:00:06.771","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.771","00:00:06.772","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.772","00:00:06.774","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.774","00:00:06.775","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.775","00:00:06.777","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.777","00:00:06.778","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.778","00:00:06.780","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.780","00:00:06.781","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.781","00:00:06.783","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.783","00:00:06.784","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.784","00:00:06.786","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.786","00:00:06.787","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.787","00:00:06.789","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.789","00:00:06.790","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.790","00:00:06.792","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.792","00:00:06.793","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.793","00:00:06.795","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.795","00:00:06.796","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.796","00:00:06.798","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.798","00:00:06.799","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.799","00:00:06.801","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.801","00:00:06.802","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.802","00:00:06.804","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.804","00:00:06.805","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.805","00:00:06.807","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.811","00:00:06.812","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.812","00:00:06.813","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.813","00:00:06.814","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.814","00:00:06.816","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.816","00:00:06.817","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.817","00:00:06.819","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.819","00:00:06.820","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.820","00:00:06.822","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.822","00:00:06.824","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.824","00:00:06.825","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.825","00:00:06.826","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.826","00:00:06.828","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.828","00:00:06.829","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.829","00:00:06.831","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.831","00:00:06.832","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.832","00:00:06.834","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.834","00:00:06.835","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.835","00:00:06.837","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.837","00:00:06.838","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.838","00:00:06.840","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.840","00:00:06.841","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.841","00:00:06.843","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.843","00:00:06.844","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.844","00:00:06.846","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.846","00:00:06.847","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.847","00:00:06.849","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.849","00:00:06.850","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.850","00:00:06.852","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.852","00:00:06.853","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.853","00:00:06.855","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.855","00:00:06.856","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.856","00:00:06.858","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.858","00:00:06.859","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.859","00:00:06.861","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.861","00:00:06.862","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.862","00:00:06.864","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.864","00:00:06.865","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.865","00:00:06.867","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.867","00:00:06.868","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.868","00:00:06.870","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.870","00:00:06.871","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.871","00:00:06.873","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.873","00:00:06.874","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.874","00:00:06.876","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.876","00:00:06.877","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.877","00:00:06.879","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.879","00:00:06.880","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.880","00:00:06.882","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.882","00:00:06.883","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.883","00:00:06.884","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.884","00:00:06.886","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.886","00:00:06.888","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.888","00:00:06.889","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.889","00:00:06.891","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.891","00:00:06.892","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.892","00:00:06.893","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.893","00:00:06.895","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.895","00:00:06.896","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.896","00:00:06.898","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.898","00:00:06.899","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.899","00:00:06.901","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.901","00:00:06.902","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.902","00:00:06.904","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.904","00:00:06.906","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.906","00:00:06.907","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.907","00:00:06.908","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.908","00:00:06.910","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.910","00:00:06.911","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.911","00:00:06.913","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.913","00:00:06.914","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.914","00:00:06.916","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.916","00:00:06.917","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.917","00:00:06.919","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.919","00:00:06.920","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.920","00:00:06.922","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.922","00:00:06.923","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.923","00:00:06.925","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.925","00:00:06.926","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.926","00:00:06.928","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.928","00:00:06.929","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.929","00:00:06.931","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.931","00:00:06.932","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.932","00:00:06.934","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.934","00:00:06.935","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.935","00:00:06.937","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.937","00:00:06.938","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.938","00:00:06.940","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.940","00:00:06.942","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.942","00:00:06.943","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.943","00:00:06.945","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.945","00:00:06.946","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.946","00:00:06.948","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.948","00:00:06.949","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.949","00:00:06.951","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.951","00:00:06.952","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.952","00:00:06.954","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.956","00:00:06.957","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.957","00:00:06.959","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.959","00:00:06.960","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.960","00:00:06.962","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.962","00:00:06.963","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.963","00:00:06.964","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.964","00:00:06.966","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.969","00:00:06.969","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.969","00:00:06.971","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.971","00:00:06.972","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.972","00:00:06.974","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.974","00:00:06.975","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.975","00:00:06.977","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.977","00:00:06.978","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.978","00:00:06.980","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.980","00:00:06.981","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.981","00:00:06.983","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.983","00:00:06.984","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.984","00:00:06.986","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.986","00:00:06.988","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.988","00:00:06.989","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.989","00:00:06.991","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.991","00:00:06.992","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.992","00:00:06.994","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.994","00:00:06.995","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.995","00:00:06.997","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.997","00:00:06.998","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.998","00:00:07.000","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.000","00:00:07.001","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.001","00:00:07.003","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.003","00:00:07.005","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.005","00:00:07.006","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.006","00:00:07.008","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.008","00:00:07.009","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.009","00:00:07.011","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.011","00:00:07.012","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.012","00:00:07.014","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.014","00:00:07.015","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.015","00:00:07.017","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.017","00:00:07.018","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.018","00:00:07.020","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.020","00:00:07.021","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.021","00:00:07.023","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.023","00:00:07.025","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.025","00:00:07.026","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.026","00:00:07.028","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.028","00:00:07.029","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.029","00:00:07.031","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.031","00:00:07.032","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.032","00:00:07.034","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.034","00:00:07.035","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.035","00:00:07.037","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.037","00:00:07.038","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.038","00:00:07.040","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.040","00:00:07.042","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.042","00:00:07.043","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.043","00:00:07.045","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.045","00:00:07.046","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.046","00:00:07.048","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.048","00:00:07.049","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.049","00:00:07.051","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.051","00:00:07.052","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.052","00:00:07.054","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.054","00:00:07.055","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.055","00:00:07.057","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.057","00:00:07.058","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.058","00:00:07.060","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.060","00:00:07.062","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.062","00:00:07.063","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.063","00:00:07.065","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.065","00:00:07.066","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.066","00:00:07.068","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.068","00:00:07.069","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.069","00:00:07.071","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.071","00:00:07.072","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.072","00:00:07.074","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.074","00:00:07.075","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.075","00:00:07.077","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.077","00:00:07.079","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.079","00:00:07.080","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.080","00:00:07.082","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.082","00:00:07.083","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.083","00:00:07.085","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.085","00:00:07.086","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.086","00:00:07.088","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.088","00:00:07.089","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.089","00:00:07.091","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.091","00:00:07.092","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.092","00:00:07.094","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.094","00:00:07.096","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.096","00:00:07.097","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.097","00:00:07.099","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.099","00:00:07.100","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.100","00:00:07.102","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.102","00:00:07.103","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.103","00:00:07.105","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.105","00:00:07.106","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.106","00:00:07.108","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.108","00:00:07.109","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.109","00:00:07.111","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.111","00:00:07.113","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.113","00:00:07.114","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.114","00:00:07.116","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.116","00:00:07.117","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.117","00:00:07.119","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.119","00:00:07.120","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.120","00:00:07.122","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.122","00:00:07.123","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.123","00:00:07.125","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.125","00:00:07.126","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.126","00:00:07.128","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.128","00:00:07.130","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.130","00:00:07.131","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.131","00:00:07.133","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.133","00:00:07.134","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.134","00:00:07.136","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.136","00:00:07.137","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.137","00:00:07.139","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.139","00:00:07.141","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.141","00:00:07.142","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.142","00:00:07.144","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.144","00:00:07.145","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.145","00:00:07.147","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.147","00:00:07.149","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.149","00:00:07.150","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.150","00:00:07.152","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.152","00:00:07.153","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.153","00:00:07.155","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.155","00:00:07.156","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.156","00:00:07.158","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.158","00:00:07.160","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.160","00:00:07.161","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.161","00:00:07.163","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.163","00:00:07.165","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.165","00:00:07.166","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.166","00:00:07.168","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.168","00:00:07.169","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.169","00:00:07.171","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.171","00:00:07.172","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.172","00:00:07.174","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.177","00:00:07.178","0.000s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.178","00:00:07.179","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.179","00:00:07.181","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.181","00:00:07.182","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.182","00:00:07.184","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.184","00:00:07.185","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.185","00:00:07.187","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.189","00:00:07.190","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.192","00:00:07.193","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.193","00:00:07.195","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.195","00:00:07.196","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.196","00:00:07.198","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.198","00:00:07.200","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.200","00:00:07.201","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.201","00:00:07.203","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.203","00:00:07.204","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.204","00:00:07.206","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.206","00:00:07.207","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.207","00:00:07.209","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.209","00:00:07.211","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.211","00:00:07.212","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.212","00:00:07.214","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.214","00:00:07.215","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.215","00:00:07.217","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.217","00:00:07.219","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.219","00:00:07.220","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.220","00:00:07.222","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.222","00:00:07.223","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.223","00:00:07.225","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.225","00:00:07.226","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.226","00:00:07.228","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.228","00:00:07.230","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.230","00:00:07.232","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.232","00:00:07.233","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.233","00:00:07.235","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.235","00:00:07.236","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.236","00:00:07.238","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.238","00:00:07.239","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.239","00:00:07.241","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.241","00:00:07.242","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.242","00:00:07.244","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.244","00:00:07.246","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.246","00:00:07.247","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.247","00:00:07.249","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.249","00:00:07.251","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.251","00:00:07.252","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.252","00:00:07.254","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.254","00:00:07.255","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.255","00:00:07.257","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.257","00:00:07.259","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.259","00:00:07.260","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.260","00:00:07.262","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.262","00:00:07.263","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.263","00:00:07.265","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.265","00:00:07.267","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.267","00:00:07.268","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.268","00:00:07.270","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.270","00:00:07.271","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.271","00:00:07.273","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.273","00:00:07.274","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.274","00:00:07.276","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.276","00:00:07.278","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.278","00:00:07.279","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.279","00:00:07.281","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.281","00:00:07.283","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.283","00:00:07.284","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.284","00:00:07.286","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.286","00:00:07.287","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.287","00:00:07.289","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.289","00:00:07.291","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.291","00:00:07.292","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.292","00:00:07.294","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.294","00:00:07.295","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.295","00:00:07.297","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.297","00:00:07.299","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.299","00:00:07.300","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.300","00:00:07.302","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.302","00:00:07.303","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.303","00:00:07.305","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.305","00:00:07.306","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.306","00:00:07.308","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.308","00:00:07.310","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.310","00:00:07.311","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.311","00:00:07.313","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.313","00:00:07.315","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.315","00:00:07.316","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.316","00:00:07.318","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.318","00:00:07.319","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.319","00:00:07.321","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.321","00:00:07.323","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.323","00:00:07.324","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.324","00:00:07.326","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.326","00:00:07.327","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.327","00:00:07.329","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.329","00:00:07.330","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.330","00:00:07.332","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.332","00:00:07.334","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.334","00:00:07.335","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.335","00:00:07.337","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.337","00:00:07.339","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.339","00:00:07.340","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.340","00:00:07.342","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.342","00:00:07.344","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.344","00:00:07.345","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.345","00:00:07.347","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.347","00:00:07.348","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.348","00:00:07.350","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.350","00:00:07.352","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.352","00:00:07.353","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.353","00:00:07.355","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.355","00:00:07.357","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.357","00:00:07.358","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.358","00:00:07.360","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.360","00:00:07.362","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.362","00:00:07.363","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.363","00:00:07.365","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.365","00:00:07.367","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.367","00:00:07.368","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.368","00:00:07.370","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.370","00:00:07.371","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.371","00:00:07.373","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.373","00:00:07.375","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.375","00:00:07.376","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.376","00:00:07.378","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.378","00:00:07.380","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.380","00:00:07.381","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.381","00:00:07.383","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.383","00:00:07.385","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.385","00:00:07.386","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.386","00:00:07.388","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.388","00:00:07.390","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.390","00:00:07.391","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.391","00:00:07.393","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.393","00:00:07.394","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.394","00:00:07.396","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.396","00:00:07.398","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.398","00:00:07.399","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.399","00:00:07.401","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.401","00:00:07.403","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.403","00:00:07.404","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.404","00:00:07.406","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.406","00:00:07.408","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.408","00:00:07.409","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.409","00:00:07.411","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.411","00:00:07.413","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.413","00:00:07.414","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.414","00:00:07.416","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.416","00:00:07.417","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.417","00:00:07.419","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.419","00:00:07.421","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.426","00:00:07.428","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.428","00:00:07.428","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.428","00:00:07.430","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.430","00:00:07.432","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.432","00:00:07.433","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.433","00:00:07.435","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.435","00:00:07.437","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.437","00:00:07.438","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.438","00:00:07.440","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.440","00:00:07.442","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.442","00:00:07.443","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.443","00:00:07.445","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.445","00:00:07.446","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.446","00:00:07.448","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.448","00:00:07.450","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.450","00:00:07.452","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.452","00:00:07.453","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.453","00:00:07.455","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.455","00:00:07.457","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.457","00:00:07.458","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.458","00:00:07.460","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.460","00:00:07.461","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.461","00:00:07.463","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.469","00:00:07.470","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.470","00:00:07.471","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.471","00:00:07.473","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.473","00:00:07.475","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.475","00:00:07.476","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.476","00:00:07.478","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.478","00:00:07.480","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.480","00:00:07.481","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.481","00:00:07.483","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.483","00:00:07.484","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.484","00:00:07.486","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.486","00:00:07.488","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.488","00:00:07.489","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.489","00:00:07.491","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.491","00:00:07.493","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.493","00:00:07.494","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.494","00:00:07.496","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.496","00:00:07.498","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.498","00:00:07.499","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.499","00:00:07.501","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.501","00:00:07.503","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.503","00:00:07.504","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.504","00:00:07.506","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.506","00:00:07.507","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.507","00:00:07.509","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.509","00:00:07.511","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.511","00:00:07.512","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.512","00:00:07.514","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.514","00:00:07.516","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.516","00:00:07.517","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.517","00:00:07.519","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.519","00:00:07.521","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.521","00:00:07.522","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.522","00:00:07.524","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.524","00:00:07.526","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.530","00:00:07.531","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.531","00:00:07.532","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.532","00:00:07.534","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.534","00:00:07.536","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.536","00:00:07.537","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.543","00:00:07.545","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.545","00:00:07.546","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.546","00:00:07.547","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.547","00:00:07.549","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.549","00:00:07.550","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.550","00:00:07.552","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.552","00:00:07.553","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.553","00:00:07.555","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.555","00:00:07.557","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.557","00:00:07.559","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.559","00:00:07.560","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.560","00:00:07.562","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.562","00:00:07.564","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.564","00:00:07.565","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.565","00:00:07.567","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.567","00:00:07.569","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.569","00:00:07.570","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.570","00:00:07.572","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.572","00:00:07.574","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.574","00:00:07.576","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.576","00:00:07.577","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.577","00:00:07.579","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.579","00:00:07.581","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.581","00:00:07.582","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.584","00:00:07.586","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.586","00:00:07.587","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.587","00:00:07.589","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.595","00:00:07.597","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.597","00:00:07.598","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.598","00:00:07.599","0.001s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.599","00:00:07.601","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.601","00:00:07.603","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.603","00:00:07.604","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.604","00:00:07.606","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.606","00:00:07.608","0.002s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.442","00:00:06.443","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.443","00:00:06.444","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.444","00:00:06.446","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.446","00:00:06.447","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.447","00:00:06.449","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.449","00:00:06.450","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.450","00:00:06.451","0.001s"],[481,9,"00:00:06.430","00:00:06.453","00:00:06.451","00:00:06.453","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.512","00:00:06.513","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.513","00:00:06.514","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.514","00:00:06.516","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.516","00:00:06.517","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.517","00:00:06.518","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.518","00:00:06.520","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.520","00:00:06.521","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.521","00:00:06.523","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.523","00:00:06.524","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.524","00:00:06.525","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.525","00:00:06.527","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.527","00:00:06.528","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.528","00:00:06.530","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.530","00:00:06.531","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.531","00:00:06.533","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.533","00:00:06.534","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.534","00:00:06.536","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.536","00:00:06.537","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.537","00:00:06.538","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.540","00:00:06.541","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.541","00:00:06.542","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.542","00:00:06.543","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.543","00:00:06.545","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.545","00:00:06.546","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.546","00:00:06.548","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.548","00:00:06.549","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.549","00:00:06.551","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.551","00:00:06.552","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.552","00:00:06.553","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.558","00:00:06.558","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.558","00:00:06.560","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.560","00:00:06.561","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.561","00:00:06.563","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.563","00:00:06.564","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.564","00:00:06.565","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.565","00:00:06.567","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.567","00:00:06.568","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.568","00:00:06.570","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.570","00:00:06.571","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.571","00:00:06.573","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.573","00:00:06.574","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.574","00:00:06.575","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.575","00:00:06.577","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.577","00:00:06.578","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.578","00:00:06.580","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.580","00:00:06.581","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.581","00:00:06.583","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.583","00:00:06.584","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.584","00:00:06.585","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.585","00:00:06.587","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.587","00:00:06.588","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.588","00:00:06.590","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.590","00:00:06.591","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.591","00:00:06.593","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.593","00:00:06.594","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.594","00:00:06.596","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.596","00:00:06.597","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.597","00:00:06.598","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.598","00:00:06.600","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.600","00:00:06.601","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.601","00:00:06.603","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.603","00:00:06.604","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.604","00:00:06.606","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.606","00:00:06.607","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.607","00:00:06.608","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.608","00:00:06.610","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.610","00:00:06.611","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.611","00:00:06.613","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.613","00:00:06.614","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.614","00:00:06.616","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.616","00:00:06.617","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.617","00:00:06.618","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.618","00:00:06.620","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.620","00:00:06.621","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.621","00:00:06.623","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.623","00:00:06.624","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.624","00:00:06.626","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.626","00:00:06.627","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.627","00:00:06.629","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.629","00:00:06.630","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.630","00:00:06.631","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.631","00:00:06.633","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.633","00:00:06.634","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.634","00:00:06.636","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.636","00:00:06.637","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.637","00:00:06.639","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.639","00:00:06.640","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.640","00:00:06.642","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.642","00:00:06.643","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.643","00:00:06.644","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.644","00:00:06.646","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.646","00:00:06.647","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.647","00:00:06.649","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.649","00:00:06.650","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.650","00:00:06.652","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.652","00:00:06.653","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.653","00:00:06.654","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.654","00:00:06.656","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.656","00:00:06.657","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.657","00:00:06.659","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.659","00:00:06.660","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.660","00:00:06.662","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.662","00:00:06.663","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.663","00:00:06.664","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.664","00:00:06.666","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.666","00:00:06.667","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.667","00:00:06.669","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.669","00:00:06.670","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.670","00:00:06.672","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.672","00:00:06.673","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.673","00:00:06.675","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.675","00:00:06.676","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.676","00:00:06.678","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.678","00:00:06.679","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.679","00:00:06.680","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.680","00:00:06.682","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.682","00:00:06.683","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.683","00:00:06.685","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.685","00:00:06.686","0.001s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.686","00:00:06.688","0.002s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.540","00:00:06.541","0.001s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.541","00:00:06.542","0.001s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.542","00:00:06.543","0.001s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.543","00:00:06.545","0.001s"],[641,6,"00:00:06.529","00:00:06.546","00:00:06.545","00:00:06.546","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.558","00:00:06.559","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.559","00:00:06.561","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.561","00:00:06.563","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.563","00:00:06.564","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.564","00:00:06.565","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.565","00:00:06.567","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.567","00:00:06.568","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.568","00:00:06.570","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.570","00:00:06.571","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.571","00:00:06.572","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.572","00:00:06.574","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.574","00:00:06.575","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.575","00:00:06.577","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.577","00:00:06.578","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.578","00:00:06.580","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.580","00:00:06.581","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.581","00:00:06.582","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.582","00:00:06.584","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.584","00:00:06.585","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.585","00:00:06.587","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.587","00:00:06.588","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.588","00:00:06.590","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.590","00:00:06.591","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.591","00:00:06.593","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.593","00:00:06.594","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.594","00:00:06.595","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.595","00:00:06.597","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.597","00:00:06.598","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.598","00:00:06.600","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.600","00:00:06.601","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.601","00:00:06.603","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.603","00:00:06.604","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.604","00:00:06.605","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.605","00:00:06.607","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.607","00:00:06.608","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.608","00:00:06.610","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.610","00:00:06.611","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.611","00:00:06.613","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.613","00:00:06.614","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.614","00:00:06.615","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.615","00:00:06.617","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.617","00:00:06.618","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.618","00:00:06.620","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.620","00:00:06.621","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.621","00:00:06.623","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.623","00:00:06.624","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.624","00:00:06.626","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.626","00:00:06.627","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.627","00:00:06.628","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.628","00:00:06.630","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.630","00:00:06.631","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.631","00:00:06.633","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.633","00:00:06.634","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.634","00:00:06.636","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.636","00:00:06.637","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.637","00:00:06.638","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.638","00:00:06.640","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.640","00:00:06.641","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.641","00:00:06.643","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.643","00:00:06.644","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.644","00:00:06.646","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.646","00:00:06.647","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.647","00:00:06.648","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.648","00:00:06.650","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.650","00:00:06.651","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.651","00:00:06.653","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.653","00:00:06.654","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.654","00:00:06.656","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.656","00:00:06.657","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.657","00:00:06.659","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.659","00:00:06.660","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.660","00:00:06.662","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.662","00:00:06.663","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.663","00:00:06.664","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.664","00:00:06.666","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.666","00:00:06.667","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.667","00:00:06.669","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.669","00:00:06.670","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.670","00:00:06.672","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.672","00:00:06.673","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.673","00:00:06.675","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.675","00:00:06.676","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.676","00:00:06.678","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.678","00:00:06.679","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.679","00:00:06.680","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.680","00:00:06.682","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.682","00:00:06.683","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.683","00:00:06.685","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.685","00:00:06.686","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.686","00:00:06.688","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.694","00:00:06.695","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.695","00:00:06.695","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.695","00:00:06.697","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.697","00:00:06.698","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.698","00:00:06.700","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.705","00:00:06.706","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.706","00:00:06.707","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.707","00:00:06.709","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.709","00:00:06.710","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.710","00:00:06.712","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.712","00:00:06.713","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.713","00:00:06.714","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.714","00:00:06.716","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.716","00:00:06.718","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.720","00:00:06.720","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.720","00:00:06.722","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.722","00:00:06.723","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.723","00:00:06.725","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.725","00:00:06.726","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.726","00:00:06.728","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.728","00:00:06.729","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.729","00:00:06.730","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.730","00:00:06.732","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.732","00:00:06.733","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.733","00:00:06.735","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.735","00:00:06.736","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.736","00:00:06.738","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.738","00:00:06.739","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.739","00:00:06.740","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.740","00:00:06.742","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.742","00:00:06.743","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.743","00:00:06.745","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.745","00:00:06.746","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.746","00:00:06.748","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.748","00:00:06.749","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.749","00:00:06.751","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.751","00:00:06.752","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.752","00:00:06.754","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.754","00:00:06.755","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.755","00:00:06.757","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.757","00:00:06.758","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.758","00:00:06.760","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.760","00:00:06.761","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.761","00:00:06.763","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.763","00:00:06.764","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.769","00:00:06.770","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.770","00:00:06.771","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.771","00:00:06.772","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.772","00:00:06.774","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.774","00:00:06.775","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.775","00:00:06.777","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.777","00:00:06.778","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.778","00:00:06.780","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.780","00:00:06.781","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.781","00:00:06.783","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.783","00:00:06.784","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.784","00:00:06.786","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.786","00:00:06.787","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.787","00:00:06.789","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.789","00:00:06.790","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.790","00:00:06.791","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.791","00:00:06.793","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.793","00:00:06.795","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.795","00:00:06.796","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.796","00:00:06.798","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.798","00:00:06.799","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.799","00:00:06.801","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.801","00:00:06.802","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.802","00:00:06.804","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.804","00:00:06.805","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.805","00:00:06.807","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.811","00:00:06.812","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.812","00:00:06.813","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.813","00:00:06.814","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.814","00:00:06.816","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.816","00:00:06.817","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.817","00:00:06.819","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.819","00:00:06.820","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.820","00:00:06.822","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.822","00:00:06.824","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.824","00:00:06.825","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.825","00:00:06.826","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.826","00:00:06.828","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.828","00:00:06.829","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.829","00:00:06.831","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.831","00:00:06.832","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.832","00:00:06.834","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.834","00:00:06.835","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.835","00:00:06.837","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.837","00:00:06.838","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.838","00:00:06.840","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.840","00:00:06.841","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.841","00:00:06.843","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.843","00:00:06.844","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.844","00:00:06.846","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.846","00:00:06.847","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.847","00:00:06.849","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.849","00:00:06.850","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.850","00:00:06.852","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.852","00:00:06.853","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.853","00:00:06.855","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.855","00:00:06.856","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.856","00:00:06.858","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.858","00:00:06.859","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.859","00:00:06.861","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.861","00:00:06.862","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.862","00:00:06.864","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.864","00:00:06.865","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.865","00:00:06.867","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.867","00:00:06.868","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.868","00:00:06.870","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.870","00:00:06.871","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.871","00:00:06.873","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.873","00:00:06.874","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.874","00:00:06.876","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.876","00:00:06.877","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.877","00:00:06.879","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.879","00:00:06.880","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.880","00:00:06.881","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.881","00:00:06.883","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.883","00:00:06.884","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.884","00:00:06.886","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.886","00:00:06.887","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.887","00:00:06.889","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.889","00:00:06.890","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.890","00:00:06.892","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.892","00:00:06.893","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.893","00:00:06.895","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.895","00:00:06.896","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.896","00:00:06.898","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.898","00:00:06.899","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.899","00:00:06.901","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.901","00:00:06.902","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.902","00:00:06.904","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.904","00:00:06.905","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.905","00:00:06.907","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.907","00:00:06.908","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.908","00:00:06.910","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.910","00:00:06.911","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.911","00:00:06.913","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.913","00:00:06.914","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.914","00:00:06.916","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.916","00:00:06.917","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.917","00:00:06.919","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.919","00:00:06.920","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.920","00:00:06.922","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.922","00:00:06.923","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.923","00:00:06.925","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.925","00:00:06.926","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.926","00:00:06.928","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.928","00:00:06.929","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.929","00:00:06.931","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.931","00:00:06.932","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.932","00:00:06.934","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.934","00:00:06.935","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.935","00:00:06.937","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.937","00:00:06.938","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.938","00:00:06.940","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.940","00:00:06.942","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.942","00:00:06.943","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.943","00:00:06.945","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.945","00:00:06.946","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.946","00:00:06.947","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.947","00:00:06.949","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.949","00:00:06.951","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.951","00:00:06.952","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.952","00:00:06.954","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.956","00:00:06.957","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.957","00:00:06.958","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.958","00:00:06.960","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.960","00:00:06.962","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.962","00:00:06.963","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.963","00:00:06.964","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.964","00:00:06.966","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.968","00:00:06.969","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.969","00:00:06.971","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.971","00:00:06.972","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.972","00:00:06.974","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.974","00:00:06.975","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.975","00:00:06.977","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.977","00:00:06.978","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.978","00:00:06.980","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.980","00:00:06.981","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.981","00:00:06.983","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.983","00:00:06.984","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.984","00:00:06.986","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.986","00:00:06.987","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.987","00:00:06.989","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.989","00:00:06.991","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.991","00:00:06.992","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.992","00:00:06.994","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.994","00:00:06.995","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.995","00:00:06.997","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.997","00:00:06.998","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.998","00:00:07.000","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.000","00:00:07.001","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.001","00:00:07.003","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.003","00:00:07.004","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.004","00:00:07.006","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.006","00:00:07.008","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.008","00:00:07.009","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.009","00:00:07.011","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.011","00:00:07.012","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.012","00:00:07.014","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.014","00:00:07.015","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.015","00:00:07.017","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.017","00:00:07.018","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.018","00:00:07.020","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.020","00:00:07.021","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.021","00:00:07.023","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.023","00:00:07.025","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.025","00:00:07.026","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.026","00:00:07.028","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.028","00:00:07.029","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.029","00:00:07.031","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.031","00:00:07.032","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.032","00:00:07.034","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.034","00:00:07.035","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.035","00:00:07.037","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.037","00:00:07.038","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.038","00:00:07.040","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.040","00:00:07.042","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.042","00:00:07.043","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.043","00:00:07.045","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.045","00:00:07.046","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.046","00:00:07.048","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.048","00:00:07.049","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.049","00:00:07.051","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.051","00:00:07.052","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.052","00:00:07.054","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.054","00:00:07.055","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.055","00:00:07.057","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.057","00:00:07.058","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.058","00:00:07.060","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.060","00:00:07.062","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.062","00:00:07.063","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.063","00:00:07.065","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.065","00:00:07.066","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.066","00:00:07.068","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.068","00:00:07.069","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.069","00:00:07.071","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.071","00:00:07.072","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.072","00:00:07.074","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.074","00:00:07.075","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.075","00:00:07.077","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.077","00:00:07.079","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.079","00:00:07.080","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.080","00:00:07.082","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.082","00:00:07.083","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.083","00:00:07.085","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.085","00:00:07.086","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.086","00:00:07.088","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.088","00:00:07.089","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.089","00:00:07.091","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.091","00:00:07.092","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.092","00:00:07.094","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.094","00:00:07.095","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.095","00:00:07.097","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.097","00:00:07.099","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.099","00:00:07.100","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.100","00:00:07.102","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.102","00:00:07.103","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.103","00:00:07.105","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.105","00:00:07.106","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.106","00:00:07.108","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.108","00:00:07.109","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.109","00:00:07.111","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.111","00:00:07.113","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.113","00:00:07.114","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.114","00:00:07.116","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.116","00:00:07.117","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.117","00:00:07.119","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.119","00:00:07.120","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.120","00:00:07.122","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.122","00:00:07.123","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.123","00:00:07.125","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.125","00:00:07.126","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.126","00:00:07.128","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.128","00:00:07.129","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.129","00:00:07.131","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.131","00:00:07.133","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.133","00:00:07.134","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.134","00:00:07.136","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.136","00:00:07.137","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.137","00:00:07.139","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.139","00:00:07.140","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.140","00:00:07.142","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.142","00:00:07.144","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.144","00:00:07.145","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.145","00:00:07.147","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.147","00:00:07.149","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.149","00:00:07.150","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.150","00:00:07.152","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.152","00:00:07.153","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.153","00:00:07.155","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.155","00:00:07.156","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.156","00:00:07.158","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.158","00:00:07.160","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.160","00:00:07.161","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.161","00:00:07.163","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.163","00:00:07.165","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.165","00:00:07.166","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.166","00:00:07.168","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.168","00:00:07.169","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.169","00:00:07.171","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.171","00:00:07.172","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.172","00:00:07.174","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.177","00:00:07.178","0.000s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.178","00:00:07.179","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.179","00:00:07.180","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.180","00:00:07.182","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.182","00:00:07.184","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.184","00:00:07.185","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.185","00:00:07.187","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.189","00:00:07.190","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.192","00:00:07.193","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.193","00:00:07.195","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.195","00:00:07.196","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.196","00:00:07.198","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.198","00:00:07.199","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.199","00:00:07.201","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.201","00:00:07.203","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.203","00:00:07.204","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.204","00:00:07.206","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.206","00:00:07.207","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.207","00:00:07.209","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.209","00:00:07.211","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.211","00:00:07.212","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.212","00:00:07.214","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.214","00:00:07.215","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.215","00:00:07.217","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.217","00:00:07.219","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.219","00:00:07.220","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.220","00:00:07.222","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.222","00:00:07.223","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.223","00:00:07.225","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.225","00:00:07.226","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.226","00:00:07.228","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.228","00:00:07.230","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.230","00:00:07.231","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.231","00:00:07.233","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.233","00:00:07.235","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.235","00:00:07.236","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.236","00:00:07.238","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.238","00:00:07.239","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.239","00:00:07.241","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.241","00:00:07.242","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.242","00:00:07.244","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.244","00:00:07.246","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.246","00:00:07.247","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.247","00:00:07.249","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.249","00:00:07.251","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.251","00:00:07.252","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.252","00:00:07.254","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.254","00:00:07.255","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.255","00:00:07.257","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.257","00:00:07.258","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.258","00:00:07.260","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.260","00:00:07.262","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.262","00:00:07.263","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.263","00:00:07.265","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.265","00:00:07.267","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.267","00:00:07.268","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.268","00:00:07.270","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.270","00:00:07.271","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.271","00:00:07.273","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.273","00:00:07.274","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.274","00:00:07.276","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.276","00:00:07.278","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.278","00:00:07.279","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.279","00:00:07.281","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.281","00:00:07.282","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.282","00:00:07.284","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.284","00:00:07.286","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.286","00:00:07.287","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.287","00:00:07.289","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.289","00:00:07.291","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.291","00:00:07.292","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.292","00:00:07.294","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.294","00:00:07.295","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.295","00:00:07.297","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.297","00:00:07.298","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.298","00:00:07.300","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.300","00:00:07.302","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.302","00:00:07.303","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.303","00:00:07.305","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.305","00:00:07.306","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.306","00:00:07.308","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.308","00:00:07.310","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.310","00:00:07.311","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.311","00:00:07.313","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.313","00:00:07.314","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.314","00:00:07.316","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.316","00:00:07.318","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.318","00:00:07.319","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.319","00:00:07.321","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.321","00:00:07.322","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.322","00:00:07.324","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.324","00:00:07.326","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.326","00:00:07.327","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.327","00:00:07.329","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.329","00:00:07.330","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.330","00:00:07.332","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.332","00:00:07.334","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.334","00:00:07.335","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.335","00:00:07.337","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.337","00:00:07.339","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.339","00:00:07.340","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.340","00:00:07.342","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.342","00:00:07.343","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.343","00:00:07.345","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.345","00:00:07.347","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.347","00:00:07.348","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.348","00:00:07.350","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.350","00:00:07.352","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.352","00:00:07.353","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.353","00:00:07.355","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.355","00:00:07.357","0.001s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.357","00:00:07.358","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.358","00:00:07.360","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.360","00:00:07.362","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.362","00:00:07.363","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.363","00:00:07.365","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.365","00:00:07.366","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.366","00:00:07.368","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.368","00:00:07.370","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.370","00:00:07.371","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.371","00:00:07.373","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.373","00:00:07.375","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.375","00:00:07.376","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.376","00:00:07.378","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.378","00:00:07.380","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.380","00:00:07.381","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.381","00:00:07.383","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.383","00:00:07.385","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.385","00:00:07.386","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.386","00:00:07.388","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.388","00:00:07.390","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.390","00:00:07.391","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.391","00:00:07.393","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.393","00:00:07.394","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.394","00:00:07.396","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.396","00:00:07.398","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.398","00:00:07.399","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.399","00:00:07.401","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.401","00:00:07.403","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.403","00:00:07.404","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.404","00:00:07.406","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.406","00:00:07.408","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.408","00:00:07.409","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.409","00:00:07.411","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.411","00:00:07.412","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.412","00:00:07.414","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.414","00:00:07.416","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.416","00:00:07.417","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.417","00:00:07.419","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.419","00:00:07.421","0.002s"],[15,5,"00:00:06.561","00:00:06.700","00:00:06.695","00:00:06.696","0.001s"],[15,5,"00:00:06.561","00:00:06.700","00:00:06.696","00:00:06.697","0.001s"],[15,5,"00:00:06.561","00:00:06.700","00:00:06.697","00:00:06.698","0.001s"],[15,5,"00:00:06.561","00:00:06.700","00:00:06.698","00:00:06.700","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.706","00:00:06.707","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.707","00:00:06.709","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.709","00:00:06.710","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.710","00:00:06.712","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.712","00:00:06.713","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.713","00:00:06.715","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.715","00:00:06.716","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.716","00:00:06.718","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.720","00:00:06.720","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.720","00:00:06.722","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.722","00:00:06.724","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.724","00:00:06.725","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.725","00:00:06.726","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.726","00:00:06.728","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.728","00:00:06.729","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.729","00:00:06.731","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.731","00:00:06.732","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.732","00:00:06.733","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.733","00:00:06.735","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.735","00:00:06.736","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.736","00:00:06.738","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.738","00:00:06.739","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.739","00:00:06.741","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.741","00:00:06.742","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.742","00:00:06.744","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.744","00:00:06.745","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.745","00:00:06.747","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.747","00:00:06.748","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.748","00:00:06.749","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.749","00:00:06.751","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.751","00:00:06.753","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.753","00:00:06.754","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.754","00:00:06.755","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.755","00:00:06.757","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.757","00:00:06.759","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.759","00:00:06.760","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.760","00:00:06.761","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.761","00:00:06.763","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.763","00:00:06.765","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.769","00:00:06.770","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.770","00:00:06.771","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.771","00:00:06.772","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.772","00:00:06.774","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.774","00:00:06.775","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.775","00:00:06.777","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.777","00:00:06.778","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.778","00:00:06.780","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.780","00:00:06.781","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.781","00:00:06.783","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.783","00:00:06.784","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.784","00:00:06.786","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.786","00:00:06.787","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.787","00:00:06.789","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.789","00:00:06.790","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.790","00:00:06.792","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.792","00:00:06.793","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.793","00:00:06.795","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.795","00:00:06.796","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.796","00:00:06.798","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.798","00:00:06.799","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.799","00:00:06.801","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.801","00:00:06.802","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.802","00:00:06.804","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.804","00:00:06.805","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.805","00:00:06.807","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.811","00:00:06.812","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.812","00:00:06.813","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.813","00:00:06.814","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.814","00:00:06.816","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.816","00:00:06.817","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.817","00:00:06.819","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.819","00:00:06.820","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.820","00:00:06.822","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.822","00:00:06.824","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.824","00:00:06.825","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.825","00:00:06.827","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.827","00:00:06.828","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.828","00:00:06.829","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.829","00:00:06.831","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.831","00:00:06.832","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.832","00:00:06.834","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.834","00:00:06.835","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.835","00:00:06.837","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.837","00:00:06.838","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.838","00:00:06.840","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.840","00:00:06.841","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.841","00:00:06.843","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.843","00:00:06.844","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.844","00:00:06.846","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.846","00:00:06.847","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.847","00:00:06.849","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.849","00:00:06.850","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.850","00:00:06.852","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.852","00:00:06.853","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.853","00:00:06.855","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.855","00:00:06.856","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.856","00:00:06.858","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.858","00:00:06.859","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.859","00:00:06.861","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.861","00:00:06.862","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.862","00:00:06.864","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.864","00:00:06.865","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.865","00:00:06.867","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.867","00:00:06.868","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.868","00:00:06.870","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.870","00:00:06.871","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.871","00:00:06.873","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.873","00:00:06.874","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.874","00:00:06.876","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.876","00:00:06.877","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.877","00:00:06.879","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.879","00:00:06.880","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.880","00:00:06.882","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.882","00:00:06.883","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.883","00:00:06.884","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.884","00:00:06.886","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.886","00:00:06.888","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.888","00:00:06.889","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.889","00:00:06.891","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.891","00:00:06.892","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.892","00:00:06.893","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.893","00:00:06.895","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.895","00:00:06.896","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.896","00:00:06.898","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.898","00:00:06.899","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.899","00:00:06.901","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.901","00:00:06.902","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.902","00:00:06.904","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.904","00:00:06.906","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.906","00:00:06.907","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.907","00:00:06.908","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.908","00:00:06.910","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.910","00:00:06.911","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.911","00:00:06.913","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.913","00:00:06.914","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.914","00:00:06.916","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.916","00:00:06.917","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.917","00:00:06.919","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.919","00:00:06.920","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.920","00:00:06.922","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.922","00:00:06.923","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.923","00:00:06.925","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.925","00:00:06.926","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.926","00:00:06.928","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.928","00:00:06.929","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.929","00:00:06.931","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.931","00:00:06.932","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.932","00:00:06.934","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.934","00:00:06.935","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.935","00:00:06.937","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.937","00:00:06.938","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.938","00:00:06.940","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.940","00:00:06.942","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.942","00:00:06.943","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.943","00:00:06.945","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.945","00:00:06.946","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.946","00:00:06.948","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.948","00:00:06.949","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.949","00:00:06.951","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.951","00:00:06.952","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.952","00:00:06.954","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.956","00:00:06.957","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.957","00:00:06.959","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.959","00:00:06.960","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.960","00:00:06.962","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.962","00:00:06.963","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.963","00:00:06.964","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.964","00:00:06.966","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.969","00:00:06.969","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.969","00:00:06.971","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.971","00:00:06.972","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.972","00:00:06.974","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.974","00:00:06.976","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.976","00:00:06.977","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.977","00:00:06.978","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.978","00:00:06.980","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.980","00:00:06.981","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.981","00:00:06.983","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.983","00:00:06.984","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.984","00:00:06.986","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.986","00:00:06.988","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.988","00:00:06.989","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.989","00:00:06.991","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.991","00:00:06.992","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.992","00:00:06.994","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.994","00:00:06.995","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.995","00:00:06.997","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.997","00:00:06.998","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.998","00:00:07.000","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.000","00:00:07.001","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.001","00:00:07.003","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.003","00:00:07.005","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.005","00:00:07.006","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.006","00:00:07.008","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.008","00:00:07.009","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.009","00:00:07.011","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.011","00:00:07.012","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.012","00:00:07.014","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.014","00:00:07.015","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.015","00:00:07.017","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.017","00:00:07.018","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.018","00:00:07.020","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.020","00:00:07.021","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.021","00:00:07.023","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.023","00:00:07.025","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.025","00:00:07.026","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.026","00:00:07.028","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.028","00:00:07.029","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.029","00:00:07.031","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.031","00:00:07.032","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.032","00:00:07.034","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.034","00:00:07.035","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.035","00:00:07.037","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.037","00:00:07.038","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.038","00:00:07.040","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.040","00:00:07.042","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.042","00:00:07.043","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.043","00:00:07.045","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.045","00:00:07.046","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.046","00:00:07.048","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.048","00:00:07.049","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.049","00:00:07.051","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.051","00:00:07.052","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.052","00:00:07.054","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.054","00:00:07.055","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.055","00:00:07.057","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.057","00:00:07.058","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.058","00:00:07.060","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.060","00:00:07.062","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.062","00:00:07.063","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.063","00:00:07.065","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.065","00:00:07.066","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.066","00:00:07.068","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.068","00:00:07.069","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.069","00:00:07.071","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.071","00:00:07.072","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.072","00:00:07.074","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.074","00:00:07.075","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.075","00:00:07.077","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.077","00:00:07.079","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.079","00:00:07.080","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.080","00:00:07.082","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.082","00:00:07.083","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.083","00:00:07.085","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.085","00:00:07.086","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.086","00:00:07.088","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.088","00:00:07.089","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.089","00:00:07.091","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.091","00:00:07.092","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.092","00:00:07.094","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.094","00:00:07.096","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.096","00:00:07.097","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.097","00:00:07.099","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.099","00:00:07.100","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.100","00:00:07.102","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.102","00:00:07.103","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.103","00:00:07.105","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.105","00:00:07.106","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.106","00:00:07.108","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.108","00:00:07.109","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.109","00:00:07.111","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.111","00:00:07.113","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.113","00:00:07.114","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.114","00:00:07.116","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.116","00:00:07.117","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.117","00:00:07.119","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.119","00:00:07.120","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.120","00:00:07.122","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.122","00:00:07.123","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.123","00:00:07.125","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.125","00:00:07.126","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.126","00:00:07.128","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.128","00:00:07.130","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.130","00:00:07.131","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.131","00:00:07.133","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.133","00:00:07.134","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.134","00:00:07.136","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.136","00:00:07.137","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.137","00:00:07.139","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.139","00:00:07.141","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.141","00:00:07.142","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.142","00:00:07.144","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.144","00:00:07.145","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.145","00:00:07.147","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.147","00:00:07.149","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.149","00:00:07.150","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.150","00:00:07.152","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.152","00:00:07.153","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.153","00:00:07.155","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.155","00:00:07.157","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.157","00:00:07.158","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.158","00:00:07.160","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.160","00:00:07.161","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.161","00:00:07.163","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.163","00:00:07.165","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.165","00:00:07.166","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.166","00:00:07.168","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.168","00:00:07.169","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.169","00:00:07.171","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.171","00:00:07.172","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.172","00:00:07.174","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.177","00:00:07.178","0.000s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.178","00:00:07.179","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.179","00:00:07.181","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.181","00:00:07.182","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.182","00:00:07.184","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.184","00:00:07.185","0.001s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.185","00:00:07.187","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.770","00:00:06.771","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.771","00:00:06.772","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.772","00:00:06.774","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.774","00:00:06.775","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.775","00:00:06.777","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.777","00:00:06.778","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.778","00:00:06.780","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.780","00:00:06.781","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.781","00:00:06.783","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.783","00:00:06.784","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.784","00:00:06.786","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.786","00:00:06.787","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.787","00:00:06.789","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.789","00:00:06.790","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.790","00:00:06.792","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.792","00:00:06.793","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.793","00:00:06.795","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.795","00:00:06.796","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.796","00:00:06.798","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.798","00:00:06.799","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.799","00:00:06.801","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.801","00:00:06.802","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.802","00:00:06.804","0.002s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.804","00:00:06.805","0.001s"],[77,26,"00:00:06.693","00:00:06.807","00:00:06.805","00:00:06.807","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.812","00:00:06.813","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.813","00:00:06.814","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.814","00:00:06.816","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.816","00:00:06.817","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.817","00:00:06.819","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.819","00:00:06.820","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.820","00:00:06.822","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.822","00:00:06.824","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.824","00:00:06.825","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.825","00:00:06.827","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.827","00:00:06.828","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.828","00:00:06.829","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.829","00:00:06.831","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.831","00:00:06.832","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.832","00:00:06.834","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.834","00:00:06.835","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.835","00:00:06.837","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.837","00:00:06.838","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.838","00:00:06.840","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.840","00:00:06.841","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.841","00:00:06.843","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.843","00:00:06.844","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.844","00:00:06.846","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.846","00:00:06.847","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.847","00:00:06.849","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.849","00:00:06.850","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.850","00:00:06.852","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.852","00:00:06.853","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.853","00:00:06.855","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.855","00:00:06.856","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.856","00:00:06.858","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.858","00:00:06.859","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.859","00:00:06.861","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.861","00:00:06.862","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.862","00:00:06.864","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.864","00:00:06.865","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.865","00:00:06.867","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.867","00:00:06.868","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.868","00:00:06.870","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.870","00:00:06.871","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.871","00:00:06.873","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.873","00:00:06.874","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.874","00:00:06.876","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.876","00:00:06.877","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.877","00:00:06.879","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.879","00:00:06.880","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.880","00:00:06.882","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.882","00:00:06.883","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.883","00:00:06.885","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.885","00:00:06.886","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.886","00:00:06.888","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.888","00:00:06.889","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.889","00:00:06.891","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.891","00:00:06.892","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.892","00:00:06.894","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.894","00:00:06.895","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.895","00:00:06.896","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.896","00:00:06.898","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.898","00:00:06.899","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.899","00:00:06.901","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.901","00:00:06.902","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.902","00:00:06.904","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.904","00:00:06.906","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.906","00:00:06.907","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.907","00:00:06.909","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.909","00:00:06.910","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.910","00:00:06.912","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.912","00:00:06.913","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.913","00:00:06.914","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.914","00:00:06.916","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.916","00:00:06.917","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.917","00:00:06.919","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.919","00:00:06.920","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.920","00:00:06.922","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.922","00:00:06.923","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.923","00:00:06.925","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.925","00:00:06.926","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.926","00:00:06.928","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.928","00:00:06.929","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.929","00:00:06.931","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.931","00:00:06.932","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.932","00:00:06.934","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.934","00:00:06.935","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.935","00:00:06.937","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.937","00:00:06.938","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.938","00:00:06.940","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.940","00:00:06.942","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.942","00:00:06.943","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.943","00:00:06.945","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.945","00:00:06.946","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.946","00:00:06.948","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.948","00:00:06.949","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.949","00:00:06.951","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.951","00:00:06.952","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.952","00:00:06.954","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.956","00:00:06.957","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.957","00:00:06.959","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.959","00:00:06.960","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.960","00:00:06.962","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.962","00:00:06.963","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.963","00:00:06.965","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.967","00:00:06.969","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.969","00:00:06.969","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.969","00:00:06.971","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.971","00:00:06.972","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.972","00:00:06.974","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.974","00:00:06.976","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.976","00:00:06.977","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.977","00:00:06.978","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.978","00:00:06.980","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.980","00:00:06.981","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.981","00:00:06.983","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.983","00:00:06.984","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.984","00:00:06.986","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.986","00:00:06.988","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.988","00:00:06.989","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.989","00:00:06.991","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.991","00:00:06.992","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.992","00:00:06.994","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.994","00:00:06.996","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.996","00:00:06.997","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.997","00:00:06.998","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.998","00:00:07.000","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.000","00:00:07.001","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.001","00:00:07.003","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.003","00:00:07.005","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.005","00:00:07.006","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.006","00:00:07.008","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.008","00:00:07.009","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.009","00:00:07.011","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.011","00:00:07.012","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.012","00:00:07.014","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.014","00:00:07.015","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.015","00:00:07.017","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.017","00:00:07.018","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.018","00:00:07.020","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.020","00:00:07.022","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.022","00:00:07.023","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.023","00:00:07.025","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.025","00:00:07.026","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.026","00:00:07.028","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.028","00:00:07.029","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.029","00:00:07.031","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.031","00:00:07.033","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.033","00:00:07.034","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.034","00:00:07.035","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.035","00:00:07.037","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.037","00:00:07.038","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.038","00:00:07.040","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.040","00:00:07.042","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.042","00:00:07.043","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.043","00:00:07.045","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.045","00:00:07.046","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.046","00:00:07.048","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.048","00:00:07.049","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.049","00:00:07.051","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.051","00:00:07.052","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.052","00:00:07.054","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.054","00:00:07.055","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.055","00:00:07.057","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.057","00:00:07.059","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.059","00:00:07.060","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.060","00:00:07.062","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.062","00:00:07.063","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.063","00:00:07.065","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.065","00:00:07.066","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.066","00:00:07.068","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.068","00:00:07.069","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.069","00:00:07.071","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.071","00:00:07.072","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.072","00:00:07.074","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.074","00:00:07.075","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.075","00:00:07.077","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.077","00:00:07.079","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.079","00:00:07.080","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.080","00:00:07.082","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.082","00:00:07.083","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.083","00:00:07.085","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.085","00:00:07.086","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.086","00:00:07.088","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.088","00:00:07.089","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.089","00:00:07.091","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.091","00:00:07.092","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.092","00:00:07.094","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.094","00:00:07.096","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.096","00:00:07.097","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.097","00:00:07.099","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.099","00:00:07.100","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.100","00:00:07.102","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.102","00:00:07.103","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.103","00:00:07.105","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.105","00:00:07.106","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.106","00:00:07.108","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.108","00:00:07.110","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.110","00:00:07.111","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.111","00:00:07.113","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.113","00:00:07.114","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.114","00:00:07.116","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.116","00:00:07.117","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.117","00:00:07.119","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.119","00:00:07.120","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.120","00:00:07.122","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.122","00:00:07.123","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.123","00:00:07.125","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.125","00:00:07.127","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.127","00:00:07.128","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.128","00:00:07.130","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.130","00:00:07.131","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.131","00:00:07.133","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.133","00:00:07.134","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.134","00:00:07.136","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.136","00:00:07.137","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.137","00:00:07.139","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.139","00:00:07.141","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.141","00:00:07.142","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.142","00:00:07.144","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.144","00:00:07.145","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.145","00:00:07.147","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.147","00:00:07.149","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.149","00:00:07.150","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.150","00:00:07.152","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.152","00:00:07.153","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.153","00:00:07.155","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.155","00:00:07.157","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.157","00:00:07.158","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.158","00:00:07.160","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.160","00:00:07.161","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.161","00:00:07.163","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.163","00:00:07.165","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.165","00:00:07.166","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.166","00:00:07.168","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.168","00:00:07.169","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.169","00:00:07.171","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.171","00:00:07.173","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.173","00:00:07.174","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.177","00:00:07.178","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.178","00:00:07.179","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.179","00:00:07.181","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.181","00:00:07.182","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.182","00:00:07.184","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.184","00:00:07.185","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.185","00:00:07.187","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.189","00:00:07.190","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.192","00:00:07.193","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.193","00:00:07.195","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.195","00:00:07.196","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.196","00:00:07.198","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.198","00:00:07.200","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.200","00:00:07.201","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.201","00:00:07.203","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.203","00:00:07.204","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.204","00:00:07.206","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.206","00:00:07.207","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.207","00:00:07.209","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.209","00:00:07.211","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.211","00:00:07.212","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.212","00:00:07.214","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.214","00:00:07.216","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.216","00:00:07.217","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.217","00:00:07.219","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.219","00:00:07.220","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.220","00:00:07.222","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.222","00:00:07.223","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.223","00:00:07.225","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.225","00:00:07.227","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.227","00:00:07.228","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.228","00:00:07.230","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.230","00:00:07.232","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.232","00:00:07.233","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.233","00:00:07.235","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.235","00:00:07.236","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.236","00:00:07.238","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.238","00:00:07.239","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.239","00:00:07.241","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.241","00:00:07.243","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.243","00:00:07.244","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.244","00:00:07.246","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.246","00:00:07.247","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.247","00:00:07.249","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.249","00:00:07.251","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.251","00:00:07.252","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.252","00:00:07.254","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.254","00:00:07.255","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.255","00:00:07.257","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.257","00:00:07.259","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.259","00:00:07.260","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.260","00:00:07.262","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.262","00:00:07.263","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.263","00:00:07.265","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.265","00:00:07.267","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.267","00:00:07.268","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.268","00:00:07.270","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.270","00:00:07.271","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.271","00:00:07.273","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.273","00:00:07.275","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.275","00:00:07.276","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.276","00:00:07.278","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.278","00:00:07.279","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.279","00:00:07.281","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.281","00:00:07.283","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.283","00:00:07.284","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.284","00:00:07.286","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.286","00:00:07.287","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.287","00:00:07.289","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.289","00:00:07.291","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.291","00:00:07.292","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.292","00:00:07.294","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.294","00:00:07.295","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.295","00:00:07.297","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.297","00:00:07.299","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.299","00:00:07.300","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.300","00:00:07.302","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.302","00:00:07.303","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.303","00:00:07.305","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.305","00:00:07.307","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.307","00:00:07.308","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.308","00:00:07.310","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.310","00:00:07.311","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.311","00:00:07.313","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.313","00:00:07.315","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.315","00:00:07.316","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.316","00:00:07.318","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.318","00:00:07.319","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.319","00:00:07.321","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.321","00:00:07.323","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.323","00:00:07.324","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.324","00:00:07.326","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.326","00:00:07.327","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.327","00:00:07.329","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.329","00:00:07.331","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.331","00:00:07.332","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.332","00:00:07.334","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.334","00:00:07.335","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.335","00:00:07.337","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.337","00:00:07.339","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.339","00:00:07.340","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.340","00:00:07.342","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.342","00:00:07.344","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.344","00:00:07.345","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.345","00:00:07.347","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.347","00:00:07.349","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.349","00:00:07.350","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.350","00:00:07.352","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.352","00:00:07.354","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.354","00:00:07.355","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.355","00:00:07.357","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.357","00:00:07.358","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.358","00:00:07.360","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.360","00:00:07.362","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.362","00:00:07.363","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.363","00:00:07.365","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.365","00:00:07.367","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.367","00:00:07.368","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.368","00:00:07.370","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.370","00:00:07.372","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.372","00:00:07.373","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.373","00:00:07.375","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.375","00:00:07.376","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.376","00:00:07.378","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.378","00:00:07.380","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.380","00:00:07.381","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.381","00:00:07.383","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.383","00:00:07.385","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.385","00:00:07.386","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.386","00:00:07.388","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.388","00:00:07.390","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.390","00:00:07.391","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.391","00:00:07.393","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.393","00:00:07.394","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.394","00:00:07.396","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.396","00:00:07.398","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.398","00:00:07.399","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.399","00:00:07.401","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.401","00:00:07.403","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.403","00:00:07.404","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.404","00:00:07.406","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.406","00:00:07.408","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.408","00:00:07.409","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.409","00:00:07.411","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.411","00:00:07.413","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.413","00:00:07.414","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.414","00:00:07.416","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.416","00:00:07.418","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.418","00:00:07.419","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.419","00:00:07.421","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.426","00:00:07.428","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.428","00:00:07.429","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.429","00:00:07.430","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.430","00:00:07.432","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.432","00:00:07.433","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.433","00:00:07.435","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.435","00:00:07.437","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.437","00:00:07.439","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.439","00:00:07.440","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.440","00:00:07.442","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.442","00:00:07.443","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.443","00:00:07.445","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.445","00:00:07.447","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.447","00:00:07.448","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.448","00:00:07.450","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.450","00:00:07.452","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.452","00:00:07.453","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.453","00:00:07.455","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.455","00:00:07.457","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.457","00:00:07.459","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.459","00:00:07.460","0.001s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.460","00:00:07.461","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.461","00:00:07.463","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.192","00:00:07.193","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.193","00:00:07.195","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.195","00:00:07.196","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.196","00:00:07.198","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.198","00:00:07.200","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.200","00:00:07.201","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.201","00:00:07.203","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.203","00:00:07.204","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.204","00:00:07.206","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.206","00:00:07.207","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.207","00:00:07.209","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.209","00:00:07.211","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.211","00:00:07.212","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.212","00:00:07.214","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.214","00:00:07.215","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.215","00:00:07.217","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.217","00:00:07.219","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.219","00:00:07.220","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.220","00:00:07.222","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.222","00:00:07.223","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.223","00:00:07.225","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.225","00:00:07.227","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.227","00:00:07.228","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.228","00:00:07.230","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.230","00:00:07.232","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.232","00:00:07.233","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.233","00:00:07.235","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.235","00:00:07.236","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.236","00:00:07.238","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.238","00:00:07.239","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.239","00:00:07.241","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.241","00:00:07.243","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.243","00:00:07.244","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.244","00:00:07.246","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.246","00:00:07.247","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.247","00:00:07.249","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.249","00:00:07.251","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.251","00:00:07.252","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.252","00:00:07.254","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.254","00:00:07.255","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.255","00:00:07.257","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.257","00:00:07.259","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.259","00:00:07.260","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.260","00:00:07.262","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.262","00:00:07.263","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.263","00:00:07.265","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.265","00:00:07.267","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.267","00:00:07.268","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.268","00:00:07.270","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.270","00:00:07.271","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.271","00:00:07.273","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.273","00:00:07.275","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.275","00:00:07.276","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.276","00:00:07.278","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.278","00:00:07.279","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.279","00:00:07.281","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.281","00:00:07.283","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.283","00:00:07.284","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.284","00:00:07.286","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.286","00:00:07.287","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.287","00:00:07.289","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.289","00:00:07.291","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.291","00:00:07.292","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.292","00:00:07.294","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.294","00:00:07.295","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.295","00:00:07.297","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.297","00:00:07.299","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.299","00:00:07.300","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.300","00:00:07.302","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.302","00:00:07.303","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.303","00:00:07.305","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.305","00:00:07.306","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.306","00:00:07.308","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.308","00:00:07.310","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.310","00:00:07.311","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.311","00:00:07.313","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.313","00:00:07.315","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.315","00:00:07.316","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.316","00:00:07.318","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.318","00:00:07.319","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.319","00:00:07.321","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.321","00:00:07.323","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.323","00:00:07.324","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.324","00:00:07.326","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.326","00:00:07.327","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.327","00:00:07.329","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.329","00:00:07.331","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.331","00:00:07.332","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.332","00:00:07.334","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.334","00:00:07.335","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.335","00:00:07.337","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.337","00:00:07.339","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.339","00:00:07.340","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.340","00:00:07.342","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.342","00:00:07.344","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.344","00:00:07.345","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.345","00:00:07.347","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.347","00:00:07.349","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.349","00:00:07.350","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.350","00:00:07.352","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.352","00:00:07.353","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.353","00:00:07.355","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.355","00:00:07.357","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.357","00:00:07.358","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.358","00:00:07.360","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.360","00:00:07.362","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.362","00:00:07.363","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.363","00:00:07.365","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.365","00:00:07.367","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.367","00:00:07.368","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.368","00:00:07.370","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.370","00:00:07.372","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.372","00:00:07.373","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.373","00:00:07.375","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.375","00:00:07.376","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.376","00:00:07.378","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.378","00:00:07.380","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.380","00:00:07.381","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.381","00:00:07.383","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.383","00:00:07.385","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.385","00:00:07.386","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.386","00:00:07.388","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.388","00:00:07.390","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.390","00:00:07.391","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.391","00:00:07.393","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.393","00:00:07.394","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.394","00:00:07.396","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.396","00:00:07.398","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.398","00:00:07.399","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.399","00:00:07.401","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.401","00:00:07.403","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.403","00:00:07.404","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.404","00:00:07.406","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.406","00:00:07.408","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.408","00:00:07.409","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.409","00:00:07.411","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.411","00:00:07.413","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.413","00:00:07.414","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.414","00:00:07.416","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.416","00:00:07.418","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.418","00:00:07.419","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.419","00:00:07.421","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.426","00:00:07.428","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.428","00:00:07.428","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.428","00:00:07.430","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.430","00:00:07.432","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.432","00:00:07.433","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.433","00:00:07.435","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.435","00:00:07.437","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.437","00:00:07.439","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.439","00:00:07.440","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.440","00:00:07.442","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.442","00:00:07.443","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.443","00:00:07.445","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.445","00:00:07.447","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.447","00:00:07.448","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.448","00:00:07.450","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.450","00:00:07.452","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.452","00:00:07.453","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.453","00:00:07.455","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.455","00:00:07.457","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.457","00:00:07.459","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.459","00:00:07.460","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.460","00:00:07.461","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.461","00:00:07.463","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.469","00:00:07.470","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.470","00:00:07.472","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.472","00:00:07.473","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.473","00:00:07.475","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.475","00:00:07.476","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.476","00:00:07.478","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.478","00:00:07.480","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.480","00:00:07.481","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.481","00:00:07.483","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.483","00:00:07.485","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.485","00:00:07.486","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.486","00:00:07.488","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.488","00:00:07.490","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.490","00:00:07.491","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.491","00:00:07.493","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.493","00:00:07.495","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.495","00:00:07.496","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.496","00:00:07.498","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.498","00:00:07.499","0.001s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.499","00:00:07.501","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.501","00:00:07.503","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.503","00:00:07.504","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.504","00:00:07.506","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.506","00:00:07.508","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.508","00:00:07.509","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.509","00:00:07.511","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.511","00:00:07.513","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.513","00:00:07.514","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.514","00:00:07.516","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.516","00:00:07.518","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.518","00:00:07.519","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.519","00:00:07.521","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.521","00:00:07.522","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.522","00:00:07.524","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.524","00:00:07.526","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.428","00:00:07.429","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.429","00:00:07.430","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.430","00:00:07.432","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.432","00:00:07.433","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.433","00:00:07.435","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.435","00:00:07.437","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.437","00:00:07.439","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.439","00:00:07.440","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.440","00:00:07.442","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.442","00:00:07.443","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.443","00:00:07.445","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.445","00:00:07.447","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.447","00:00:07.448","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.448","00:00:07.450","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.450","00:00:07.452","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.452","00:00:07.453","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.453","00:00:07.455","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.455","00:00:07.457","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.457","00:00:07.459","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.459","00:00:07.460","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.460","00:00:07.461","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.461","00:00:07.463","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.469","00:00:07.470","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.470","00:00:07.472","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.472","00:00:07.473","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.473","00:00:07.475","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.475","00:00:07.476","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.476","00:00:07.478","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.478","00:00:07.480","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.480","00:00:07.481","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.481","00:00:07.483","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.483","00:00:07.485","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.485","00:00:07.486","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.486","00:00:07.488","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.488","00:00:07.490","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.490","00:00:07.491","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.491","00:00:07.493","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.493","00:00:07.495","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.495","00:00:07.496","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.496","00:00:07.498","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.498","00:00:07.499","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.499","00:00:07.501","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.501","00:00:07.503","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.503","00:00:07.504","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.504","00:00:07.506","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.506","00:00:07.508","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.508","00:00:07.509","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.509","00:00:07.511","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.511","00:00:07.513","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.513","00:00:07.514","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.514","00:00:07.516","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.516","00:00:07.518","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.518","00:00:07.519","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.519","00:00:07.521","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.521","00:00:07.522","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.522","00:00:07.524","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.524","00:00:07.526","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.528","00:00:07.530","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.530","00:00:07.531","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.531","00:00:07.532","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.532","00:00:07.534","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.534","00:00:07.536","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.536","00:00:07.538","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.543","00:00:07.545","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.545","00:00:07.546","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.546","00:00:07.547","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.547","00:00:07.549","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.549","00:00:07.550","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.550","00:00:07.552","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.552","00:00:07.554","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.554","00:00:07.555","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.555","00:00:07.557","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.557","00:00:07.559","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.559","00:00:07.560","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.560","00:00:07.562","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.562","00:00:07.564","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.564","00:00:07.565","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.565","00:00:07.567","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.567","00:00:07.569","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.569","00:00:07.571","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.571","00:00:07.573","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.573","00:00:07.574","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.574","00:00:07.576","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.576","00:00:07.577","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.577","00:00:07.579","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.579","00:00:07.581","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.581","00:00:07.583","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.583","00:00:07.584","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.584","00:00:07.586","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.586","00:00:07.587","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.587","00:00:07.589","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.595","00:00:07.597","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.597","00:00:07.598","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.598","00:00:07.600","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.600","00:00:07.601","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.601","00:00:07.603","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.603","00:00:07.604","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.604","00:00:07.606","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.606","00:00:07.608","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.610","00:00:07.611","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.611","00:00:07.612","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.612","00:00:07.614","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.614","00:00:07.615","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.615","00:00:07.616","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.616","00:00:07.617","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.617","00:00:07.618","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.618","00:00:07.619","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.619","00:00:07.620","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.620","00:00:07.621","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.621","00:00:07.623","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.623","00:00:07.624","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.624","00:00:07.625","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.625","00:00:07.626","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.626","00:00:07.627","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.627","00:00:07.628","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.628","00:00:07.630","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.630","00:00:07.631","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.631","00:00:07.632","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.632","00:00:07.633","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.633","00:00:07.634","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.634","00:00:07.636","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.636","00:00:07.637","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.637","00:00:07.638","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.638","00:00:07.639","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.639","00:00:07.640","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.640","00:00:07.641","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.641","00:00:07.642","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.642","00:00:07.644","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.644","00:00:07.645","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.645","00:00:07.646","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.646","00:00:07.647","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.647","00:00:07.648","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.648","00:00:07.649","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.649","00:00:07.651","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.651","00:00:07.652","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.652","00:00:07.653","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.653","00:00:07.654","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.654","00:00:07.656","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.656","00:00:07.657","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.657","00:00:07.658","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.658","00:00:07.659","0.001s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.666","00:00:07.668","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.668","00:00:07.669","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.470","00:00:07.472","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.472","00:00:07.473","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.473","00:00:07.475","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.475","00:00:07.476","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.476","00:00:07.478","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.478","00:00:07.480","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.480","00:00:07.481","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.481","00:00:07.483","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.483","00:00:07.485","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.485","00:00:07.486","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.486","00:00:07.488","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.488","00:00:07.490","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.490","00:00:07.491","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.491","00:00:07.493","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.493","00:00:07.495","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.495","00:00:07.496","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.496","00:00:07.498","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.498","00:00:07.499","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.499","00:00:07.501","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.501","00:00:07.503","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.503","00:00:07.504","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.504","00:00:07.506","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.506","00:00:07.508","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.508","00:00:07.509","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.509","00:00:07.511","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.511","00:00:07.513","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.513","00:00:07.514","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.514","00:00:07.516","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.516","00:00:07.518","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.518","00:00:07.519","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.519","00:00:07.521","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.521","00:00:07.522","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.522","00:00:07.524","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.524","00:00:07.526","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.528","00:00:07.530","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.530","00:00:07.531","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.531","00:00:07.532","0.001s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.532","00:00:07.534","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.534","00:00:07.536","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.536","00:00:07.538","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.531","00:00:07.532","0.001s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.532","00:00:07.534","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.534","00:00:07.536","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.536","00:00:07.538","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.543","00:00:07.545","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.545","00:00:07.546","0.001s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.546","00:00:07.547","0.001s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.547","00:00:07.549","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.549","00:00:07.550","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.550","00:00:07.552","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.552","00:00:07.554","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.554","00:00:07.555","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.555","00:00:07.557","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.557","00:00:07.559","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.559","00:00:07.560","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.560","00:00:07.562","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.562","00:00:07.564","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.564","00:00:07.565","0.001s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.565","00:00:07.567","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.567","00:00:07.569","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.569","00:00:07.571","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.571","00:00:07.572","0.002s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.572","00:00:07.574","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.545","00:00:07.546","0.001s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.546","00:00:07.547","0.001s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.547","00:00:07.549","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.549","00:00:07.550","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.550","00:00:07.552","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.552","00:00:07.554","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.554","00:00:07.555","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.555","00:00:07.557","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.557","00:00:07.559","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.559","00:00:07.560","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.560","00:00:07.562","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.562","00:00:07.564","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.564","00:00:07.565","0.001s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.565","00:00:07.567","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.567","00:00:07.569","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.569","00:00:07.571","0.002s"],[480,18,"00:00:07.174","00:00:07.573","00:00:07.571","00:00:07.573","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.597","00:00:07.598","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.598","00:00:07.600","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.600","00:00:07.601","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.601","00:00:07.603","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.603","00:00:07.604","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.604","00:00:07.606","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.606","00:00:07.608","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.610","00:00:07.611","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.611","00:00:07.612","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.612","00:00:07.613","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.613","00:00:07.615","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.615","00:00:07.616","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.616","00:00:07.617","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.617","00:00:07.618","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.618","00:00:07.619","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.619","00:00:07.620","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.620","00:00:07.621","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.621","00:00:07.623","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.623","00:00:07.624","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.624","00:00:07.625","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.625","00:00:07.626","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.626","00:00:07.627","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.627","00:00:07.628","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.628","00:00:07.630","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.630","00:00:07.631","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.631","00:00:07.632","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.632","00:00:07.633","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.633","00:00:07.634","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.634","00:00:07.635","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.635","00:00:07.637","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.637","00:00:07.638","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.638","00:00:07.639","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.639","00:00:07.640","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.640","00:00:07.641","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.641","00:00:07.642","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.642","00:00:07.644","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.644","00:00:07.645","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.645","00:00:07.646","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.646","00:00:07.647","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.647","00:00:07.648","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.648","00:00:07.649","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.649","00:00:07.651","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.651","00:00:07.652","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.652","00:00:07.653","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.653","00:00:07.654","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.654","00:00:07.656","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.656","00:00:07.657","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.657","00:00:07.658","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.658","00:00:07.659","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.666","00:00:07.668","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.668","00:00:07.669","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.669","00:00:07.670","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.670","00:00:07.671","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.671","00:00:07.672","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.672","00:00:07.673","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.673","00:00:07.674","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.674","00:00:07.675","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.675","00:00:07.677","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.677","00:00:07.678","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.678","00:00:07.679","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.679","00:00:07.680","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.680","00:00:07.681","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.681","00:00:07.682","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.682","00:00:07.684","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.684","00:00:07.685","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.685","00:00:07.686","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.686","00:00:07.687","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.687","00:00:07.688","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.688","00:00:07.690","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.690","00:00:07.691","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.691","00:00:07.692","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.692","00:00:07.693","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.693","00:00:07.693","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.693","00:00:07.694","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.694","00:00:07.695","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.695","00:00:07.696","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.696","00:00:07.697","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.697","00:00:07.699","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.699","00:00:07.700","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.700","00:00:07.701","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.701","00:00:07.702","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.702","00:00:07.703","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.703","00:00:07.704","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.704","00:00:07.705","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.705","00:00:07.706","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.706","00:00:07.707","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.707","00:00:07.708","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.708","00:00:07.709","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.709","00:00:07.710","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.710","00:00:07.711","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.711","00:00:07.712","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.712","00:00:07.713","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.713","00:00:07.714","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.714","00:00:07.715","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.715","00:00:07.716","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.716","00:00:07.718","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.718","00:00:07.719","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.719","00:00:07.720","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.720","00:00:07.721","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.721","00:00:07.722","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.722","00:00:07.723","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.723","00:00:07.724","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.724","00:00:07.725","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.725","00:00:07.726","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.726","00:00:07.727","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.727","00:00:07.728","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.728","00:00:07.729","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.729","00:00:07.730","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.730","00:00:07.731","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.731","00:00:07.733","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.733","00:00:07.734","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.734","00:00:07.735","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.735","00:00:07.736","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.736","00:00:07.737","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.737","00:00:07.738","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.738","00:00:07.739","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.739","00:00:07.740","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.740","00:00:07.741","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.741","00:00:07.742","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.742","00:00:07.743","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.743","00:00:07.744","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.744","00:00:07.745","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.745","00:00:07.746","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.746","00:00:07.747","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.747","00:00:07.749","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.749","00:00:07.750","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.750","00:00:07.751","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.751","00:00:07.752","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.752","00:00:07.753","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.753","00:00:07.754","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.754","00:00:07.755","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.755","00:00:07.756","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.756","00:00:07.757","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.757","00:00:07.758","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.758","00:00:07.759","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.759","00:00:07.760","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.760","00:00:07.761","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.761","00:00:07.763","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.763","00:00:07.764","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.764","00:00:07.765","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.765","00:00:07.766","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.766","00:00:07.767","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.767","00:00:07.768","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.768","00:00:07.769","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.769","00:00:07.770","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.770","00:00:07.771","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.771","00:00:07.772","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.772","00:00:07.773","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.773","00:00:07.774","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.774","00:00:07.775","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.775","00:00:07.777","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.777","00:00:07.778","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.778","00:00:07.779","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.779","00:00:07.780","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.780","00:00:07.781","0.001s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.781","00:00:07.782","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.668","00:00:07.669","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.669","00:00:07.670","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.670","00:00:07.671","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.671","00:00:07.672","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.672","00:00:07.673","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.673","00:00:07.674","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.674","00:00:07.675","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.675","00:00:07.676","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.676","00:00:07.678","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.678","00:00:07.679","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.679","00:00:07.680","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.680","00:00:07.681","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.681","00:00:07.682","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.682","00:00:07.684","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.684","00:00:07.685","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.685","00:00:07.686","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.686","00:00:07.687","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.687","00:00:07.688","0.001s"],[336,20,"00:00:07.652","00:00:07.690","00:00:07.688","00:00:07.690","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.073","00:00:08.074","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.074","00:00:08.075","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.075","00:00:08.077","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.077","00:00:08.078","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.078","00:00:08.080","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.080","00:00:08.081","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.081","00:00:08.082","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.082","00:00:08.082","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.082","00:00:08.083","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.083","00:00:08.084","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.084","00:00:08.085","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.085","00:00:08.087","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.087","00:00:08.088","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.088","00:00:08.089","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.089","00:00:08.090","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.090","00:00:08.091","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.091","00:00:08.092","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.092","00:00:08.093","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.093","00:00:08.094","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.094","00:00:08.095","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.095","00:00:08.096","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.096","00:00:08.097","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.097","00:00:08.098","0.001s"],[24,25,"00:00:08.062","00:00:08.099","00:00:08.098","00:00:08.099","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.175","00:00:08.176","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.176","00:00:08.177","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.177","00:00:08.178","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.178","00:00:08.179","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.179","00:00:08.181","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.181","00:00:08.182","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.182","00:00:08.183","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.183","00:00:08.184","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.184","00:00:08.185","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.185","00:00:08.187","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.187","00:00:08.188","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.188","00:00:08.189","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.189","00:00:08.190","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.190","00:00:08.192","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.192","00:00:08.193","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.193","00:00:08.194","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.194","00:00:08.196","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.196","00:00:08.197","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.197","00:00:08.199","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.199","00:00:08.201","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.201","00:00:08.202","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.202","00:00:08.204","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.204","00:00:08.204","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.204","00:00:08.206","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.206","00:00:08.207","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.207","00:00:08.208","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.208","00:00:08.209","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.209","00:00:08.210","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.210","00:00:08.212","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.212","00:00:08.213","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.213","00:00:08.214","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.214","00:00:08.215","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.215","00:00:08.217","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.217","00:00:08.218","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.218","00:00:08.219","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.219","00:00:08.220","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.220","00:00:08.221","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.221","00:00:08.223","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.223","00:00:08.224","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.224","00:00:08.225","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.225","00:00:08.226","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.226","00:00:08.228","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.228","00:00:08.229","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.229","00:00:08.230","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.230","00:00:08.231","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.231","00:00:08.232","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.232","00:00:08.234","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.234","00:00:08.235","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.235","00:00:08.236","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.236","00:00:08.237","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.237","00:00:08.239","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.239","00:00:08.240","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.240","00:00:08.241","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.241","00:00:08.243","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.243","00:00:08.244","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.244","00:00:08.245","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.245","00:00:08.246","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.246","00:00:08.247","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.247","00:00:08.248","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.248","00:00:08.250","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.250","00:00:08.251","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.251","00:00:08.252","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.252","00:00:08.253","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.253","00:00:08.255","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.255","00:00:08.256","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.256","00:00:08.257","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.257","00:00:08.258","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.258","00:00:08.260","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.260","00:00:08.261","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.261","00:00:08.262","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.262","00:00:08.263","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.263","00:00:08.264","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.264","00:00:08.266","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.266","00:00:08.267","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.267","00:00:08.268","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.268","00:00:08.269","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.269","00:00:08.271","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.271","00:00:08.272","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.272","00:00:08.273","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.273","00:00:08.274","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.274","00:00:08.275","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.275","00:00:08.277","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.277","00:00:08.278","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.278","00:00:08.279","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.279","00:00:08.281","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.281","00:00:08.282","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.282","00:00:08.284","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.284","00:00:08.285","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.285","00:00:08.286","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.286","00:00:08.288","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.288","00:00:08.289","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.289","00:00:08.290","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.290","00:00:08.291","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.291","00:00:08.292","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.292","00:00:08.293","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.293","00:00:08.295","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.295","00:00:08.296","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.296","00:00:08.297","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.297","00:00:08.298","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.298","00:00:08.300","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.300","00:00:08.301","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.301","00:00:08.302","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.302","00:00:08.303","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.303","00:00:08.304","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.304","00:00:08.306","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.306","00:00:08.307","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.307","00:00:08.309","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.309","00:00:08.310","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.310","00:00:08.311","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.311","00:00:08.312","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.312","00:00:08.314","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.314","00:00:08.315","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.315","00:00:08.316","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.316","00:00:08.317","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.317","00:00:08.318","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.327","00:00:08.327","0.000s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.327","00:00:08.328","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.328","00:00:08.329","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.329","00:00:08.331","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.331","00:00:08.332","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.332","00:00:08.334","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.334","00:00:08.335","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.335","00:00:08.336","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.336","00:00:08.338","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.338","00:00:08.339","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.339","00:00:08.340","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.340","00:00:08.342","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.342","00:00:08.343","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.343","00:00:08.344","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.344","00:00:08.346","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.346","00:00:08.347","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.347","00:00:08.349","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.349","00:00:08.351","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.351","00:00:08.352","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.352","00:00:08.353","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.353","00:00:08.355","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.355","00:00:08.356","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.356","00:00:08.357","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.357","00:00:08.359","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.359","00:00:08.360","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.360","00:00:08.361","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.361","00:00:08.363","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.365","00:00:08.366","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.366","00:00:08.367","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.367","00:00:08.369","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.369","00:00:08.370","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.370","00:00:08.372","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.378","00:00:08.379","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.379","00:00:08.380","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.380","00:00:08.381","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.381","00:00:08.383","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.383","00:00:08.384","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.384","00:00:08.385","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.385","00:00:08.387","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.387","00:00:08.388","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.388","00:00:08.389","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.389","00:00:08.391","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.391","00:00:08.392","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.392","00:00:08.394","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.394","00:00:08.395","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.395","00:00:08.396","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.396","00:00:08.398","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.398","00:00:08.399","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.399","00:00:08.401","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.403","00:00:08.403","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.403","00:00:08.405","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.405","00:00:08.406","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.406","00:00:08.407","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.407","00:00:08.409","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.409","00:00:08.410","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.410","00:00:08.412","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.412","00:00:08.413","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.413","00:00:08.415","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.415","00:00:08.416","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.416","00:00:08.417","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.417","00:00:08.419","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.419","00:00:08.420","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.420","00:00:08.421","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.421","00:00:08.423","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.423","00:00:08.424","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.424","00:00:08.425","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.425","00:00:08.427","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.427","00:00:08.428","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.428","00:00:08.430","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.430","00:00:08.431","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.431","00:00:08.432","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.432","00:00:08.434","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.434","00:00:08.435","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.435","00:00:08.437","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.437","00:00:08.438","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.438","00:00:08.439","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.439","00:00:08.441","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.441","00:00:08.442","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.442","00:00:08.443","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.443","00:00:08.445","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.445","00:00:08.446","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.446","00:00:08.447","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.447","00:00:08.449","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.449","00:00:08.451","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.451","00:00:08.452","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.452","00:00:08.453","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.453","00:00:08.454","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.454","00:00:08.456","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.456","00:00:08.457","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.457","00:00:08.459","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.459","00:00:08.460","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.460","00:00:08.461","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.461","00:00:08.463","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.463","00:00:08.464","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.464","00:00:08.466","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.466","00:00:08.467","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.467","00:00:08.468","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.468","00:00:08.470","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.472","00:00:08.473","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.473","00:00:08.474","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.474","00:00:08.476","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.476","00:00:08.477","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.477","00:00:08.478","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.478","00:00:08.480","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.480","00:00:08.481","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.481","00:00:08.483","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.483","00:00:08.484","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.484","00:00:08.485","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.485","00:00:08.487","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.487","00:00:08.488","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.488","00:00:08.489","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.489","00:00:08.491","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.491","00:00:08.492","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.492","00:00:08.494","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.494","00:00:08.495","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.495","00:00:08.497","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.497","00:00:08.498","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.498","00:00:08.499","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.499","00:00:08.501","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.501","00:00:08.502","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.502","00:00:08.503","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.503","00:00:08.505","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.505","00:00:08.506","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.506","00:00:08.508","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.508","00:00:08.509","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.509","00:00:08.510","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.510","00:00:08.512","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.512","00:00:08.513","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.513","00:00:08.515","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.515","00:00:08.517","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.517","00:00:08.517","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.517","00:00:08.519","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.519","00:00:08.520","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.520","00:00:08.522","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.524","00:00:08.526","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.526","00:00:08.527","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.527","00:00:08.528","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.528","00:00:08.529","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.529","00:00:08.530","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.530","00:00:08.532","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.532","00:00:08.533","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.533","00:00:08.535","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.535","00:00:08.536","0.001s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.536","00:00:08.538","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.327","00:00:08.327","0.000s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.327","00:00:08.328","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.328","00:00:08.330","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.330","00:00:08.331","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.331","00:00:08.332","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.332","00:00:08.334","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.334","00:00:08.335","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.335","00:00:08.336","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.336","00:00:08.338","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.338","00:00:08.339","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.339","00:00:08.340","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.340","00:00:08.342","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.342","00:00:08.343","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.343","00:00:08.344","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.344","00:00:08.346","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.346","00:00:08.347","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.347","00:00:08.349","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.349","00:00:08.351","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.351","00:00:08.352","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.352","00:00:08.353","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.353","00:00:08.355","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.355","00:00:08.356","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.356","00:00:08.357","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.357","00:00:08.359","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.359","00:00:08.360","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.360","00:00:08.361","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.361","00:00:08.363","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.365","00:00:08.366","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.366","00:00:08.367","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.367","00:00:08.369","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.369","00:00:08.370","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.370","00:00:08.372","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.378","00:00:08.379","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.379","00:00:08.380","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.380","00:00:08.381","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.381","00:00:08.383","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.383","00:00:08.384","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.384","00:00:08.385","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.385","00:00:08.387","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.387","00:00:08.388","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.388","00:00:08.389","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.389","00:00:08.391","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.391","00:00:08.392","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.392","00:00:08.394","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.394","00:00:08.395","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.395","00:00:08.396","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.396","00:00:08.398","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.398","00:00:08.399","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.399","00:00:08.401","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.403","00:00:08.403","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.403","00:00:08.405","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.405","00:00:08.406","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.406","00:00:08.407","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.407","00:00:08.409","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.409","00:00:08.410","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.410","00:00:08.412","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.412","00:00:08.413","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.413","00:00:08.415","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.415","00:00:08.416","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.416","00:00:08.417","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.417","00:00:08.419","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.419","00:00:08.420","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.420","00:00:08.421","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.421","00:00:08.423","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.423","00:00:08.424","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.424","00:00:08.425","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.425","00:00:08.427","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.427","00:00:08.428","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.428","00:00:08.430","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.430","00:00:08.431","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.431","00:00:08.432","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.432","00:00:08.434","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.434","00:00:08.435","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.435","00:00:08.437","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.437","00:00:08.438","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.438","00:00:08.439","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.439","00:00:08.441","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.441","00:00:08.442","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.442","00:00:08.443","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.443","00:00:08.445","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.445","00:00:08.446","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.446","00:00:08.448","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.448","00:00:08.449","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.449","00:00:08.451","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.451","00:00:08.452","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.452","00:00:08.453","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.453","00:00:08.454","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.454","00:00:08.456","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.456","00:00:08.457","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.457","00:00:08.459","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.459","00:00:08.460","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.460","00:00:08.461","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.461","00:00:08.463","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.463","00:00:08.464","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.464","00:00:08.466","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.466","00:00:08.467","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.467","00:00:08.468","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.468","00:00:08.470","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.472","00:00:08.473","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.473","00:00:08.474","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.474","00:00:08.476","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.476","00:00:08.477","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.477","00:00:08.478","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.478","00:00:08.480","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.480","00:00:08.481","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.481","00:00:08.483","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.483","00:00:08.484","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.484","00:00:08.485","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.485","00:00:08.487","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.487","00:00:08.488","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.488","00:00:08.489","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.489","00:00:08.491","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.491","00:00:08.492","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.492","00:00:08.494","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.494","00:00:08.495","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.495","00:00:08.497","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.497","00:00:08.498","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.498","00:00:08.499","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.499","00:00:08.501","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.501","00:00:08.502","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.502","00:00:08.503","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.503","00:00:08.505","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.505","00:00:08.506","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.506","00:00:08.508","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.508","00:00:08.509","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.509","00:00:08.510","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.510","00:00:08.512","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.512","00:00:08.513","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.513","00:00:08.515","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.515","00:00:08.517","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.517","00:00:08.517","0.001s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.517","00:00:08.519","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.519","00:00:08.520","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.520","00:00:08.522","0.002s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.327","00:00:08.328","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.328","00:00:08.330","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.330","00:00:08.331","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.331","00:00:08.332","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.332","00:00:08.334","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.334","00:00:08.335","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.335","00:00:08.336","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.336","00:00:08.338","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.338","00:00:08.339","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.339","00:00:08.340","0.001s"],[112,12,"00:00:08.313","00:00:08.342","00:00:08.340","00:00:08.342","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.358","00:00:08.360","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.360","00:00:08.361","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.361","00:00:08.363","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.363","00:00:08.365","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.365","00:00:08.366","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.366","00:00:08.367","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.367","00:00:08.368","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.368","00:00:08.370","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.370","00:00:08.372","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.378","00:00:08.379","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.379","00:00:08.380","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.380","00:00:08.381","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.381","00:00:08.383","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.383","00:00:08.384","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.384","00:00:08.385","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.385","00:00:08.387","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.387","00:00:08.388","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.388","00:00:08.389","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.389","00:00:08.391","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.391","00:00:08.392","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.392","00:00:08.394","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.394","00:00:08.395","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.395","00:00:08.396","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.396","00:00:08.398","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.398","00:00:08.399","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.399","00:00:08.401","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.403","00:00:08.403","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.403","00:00:08.405","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.405","00:00:08.406","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.406","00:00:08.407","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.407","00:00:08.409","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.409","00:00:08.410","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.410","00:00:08.412","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.412","00:00:08.413","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.413","00:00:08.415","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.415","00:00:08.416","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.416","00:00:08.417","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.417","00:00:08.419","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.419","00:00:08.420","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.420","00:00:08.421","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.421","00:00:08.422","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.422","00:00:08.424","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.424","00:00:08.425","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.425","00:00:08.427","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.427","00:00:08.428","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.428","00:00:08.429","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.429","00:00:08.431","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.431","00:00:08.432","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.432","00:00:08.434","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.434","00:00:08.435","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.435","00:00:08.437","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.437","00:00:08.438","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.438","00:00:08.439","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.439","00:00:08.441","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.441","00:00:08.442","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.442","00:00:08.443","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.443","00:00:08.445","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.445","00:00:08.446","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.446","00:00:08.447","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.447","00:00:08.449","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.449","00:00:08.451","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.451","00:00:08.452","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.452","00:00:08.453","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.453","00:00:08.454","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.454","00:00:08.456","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.456","00:00:08.457","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.457","00:00:08.459","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.459","00:00:08.460","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.460","00:00:08.461","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.461","00:00:08.463","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.463","00:00:08.464","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.464","00:00:08.466","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.466","00:00:08.467","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.467","00:00:08.468","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.468","00:00:08.470","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.472","00:00:08.473","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.473","00:00:08.474","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.474","00:00:08.475","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.475","00:00:08.477","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.477","00:00:08.478","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.478","00:00:08.480","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.480","00:00:08.481","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.481","00:00:08.482","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.482","00:00:08.484","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.484","00:00:08.485","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.485","00:00:08.487","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.487","00:00:08.488","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.488","00:00:08.489","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.489","00:00:08.491","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.491","00:00:08.492","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.492","00:00:08.494","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.494","00:00:08.495","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.495","00:00:08.496","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.496","00:00:08.498","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.498","00:00:08.499","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.499","00:00:08.500","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.500","00:00:08.502","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.502","00:00:08.503","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.503","00:00:08.505","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.505","00:00:08.506","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.506","00:00:08.507","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.507","00:00:08.509","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.509","00:00:08.510","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.510","00:00:08.512","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.512","00:00:08.513","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.513","00:00:08.515","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.515","00:00:08.517","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.517","00:00:08.517","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.517","00:00:08.519","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.519","00:00:08.520","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.520","00:00:08.522","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.524","00:00:08.526","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.526","00:00:08.527","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.527","00:00:08.528","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.528","00:00:08.529","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.529","00:00:08.530","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.530","00:00:08.532","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.532","00:00:08.533","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.533","00:00:08.535","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.535","00:00:08.536","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.536","00:00:08.538","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.543","00:00:08.544","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.544","00:00:08.545","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.545","00:00:08.546","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.546","00:00:08.547","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.547","00:00:08.548","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.548","00:00:08.549","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.549","00:00:08.550","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.550","00:00:08.552","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.552","00:00:08.553","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.553","00:00:08.554","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.554","00:00:08.555","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.555","00:00:08.556","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.556","00:00:08.557","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.557","00:00:08.559","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.559","00:00:08.560","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.560","00:00:08.561","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.561","00:00:08.562","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.562","00:00:08.563","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.563","00:00:08.565","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.565","00:00:08.566","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.566","00:00:08.567","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.567","00:00:08.568","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.568","00:00:08.569","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.569","00:00:08.570","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.570","00:00:08.571","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.571","00:00:08.573","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.573","00:00:08.574","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.574","00:00:08.575","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.575","00:00:08.576","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.576","00:00:08.577","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.577","00:00:08.579","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.579","00:00:08.580","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.580","00:00:08.581","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.581","00:00:08.582","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.582","00:00:08.583","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.583","00:00:08.584","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.584","00:00:08.586","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.586","00:00:08.587","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.587","00:00:08.588","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.588","00:00:08.589","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.589","00:00:08.590","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.590","00:00:08.591","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.591","00:00:08.593","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.593","00:00:08.594","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.594","00:00:08.595","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.595","00:00:08.596","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.596","00:00:08.598","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.598","00:00:08.599","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.599","00:00:08.600","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.600","00:00:08.601","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.601","00:00:08.602","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.602","00:00:08.603","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.603","00:00:08.604","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.604","00:00:08.605","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.605","00:00:08.607","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.607","00:00:08.608","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.608","00:00:08.609","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.609","00:00:08.610","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.610","00:00:08.611","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.611","00:00:08.612","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.612","00:00:08.614","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.614","00:00:08.615","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.615","00:00:08.616","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.616","00:00:08.617","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.617","00:00:08.619","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.619","00:00:08.620","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.620","00:00:08.621","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.621","00:00:08.622","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.622","00:00:08.623","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.623","00:00:08.625","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.625","00:00:08.625","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.625","00:00:08.627","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.627","00:00:08.628","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.628","00:00:08.629","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.629","00:00:08.630","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.630","00:00:08.631","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.631","00:00:08.633","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.633","00:00:08.634","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.634","00:00:08.635","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.635","00:00:08.636","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.636","00:00:08.637","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.637","00:00:08.638","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.638","00:00:08.640","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.640","00:00:08.641","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.641","00:00:08.642","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.642","00:00:08.643","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.643","00:00:08.644","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.644","00:00:08.646","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.646","00:00:08.647","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.647","00:00:08.648","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.648","00:00:08.649","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.649","00:00:08.651","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.651","00:00:08.652","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.652","00:00:08.653","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.653","00:00:08.654","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.654","00:00:08.655","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.655","00:00:08.656","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.656","00:00:08.657","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.657","00:00:08.659","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.659","00:00:08.660","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.660","00:00:08.661","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.661","00:00:08.662","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.662","00:00:08.663","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.663","00:00:08.664","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.664","00:00:08.666","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.666","00:00:08.667","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.667","00:00:08.668","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.668","00:00:08.669","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.669","00:00:08.670","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.670","00:00:08.672","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.672","00:00:08.673","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.673","00:00:08.674","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.674","00:00:08.675","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.675","00:00:08.676","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.676","00:00:08.677","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.677","00:00:08.679","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.679","00:00:08.680","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.680","00:00:08.681","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.681","00:00:08.683","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.683","00:00:08.684","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.684","00:00:08.685","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.685","00:00:08.686","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.686","00:00:08.687","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.687","00:00:08.688","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.688","00:00:08.689","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.689","00:00:08.690","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.690","00:00:08.692","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.692","00:00:08.693","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.693","00:00:08.694","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.694","00:00:08.695","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.695","00:00:08.696","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.696","00:00:08.698","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.698","00:00:08.699","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.699","00:00:08.700","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.700","00:00:08.701","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.701","00:00:08.702","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.702","00:00:08.704","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.704","00:00:08.705","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.705","00:00:08.706","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.706","00:00:08.707","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.707","00:00:08.708","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.708","00:00:08.709","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.709","00:00:08.711","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.711","00:00:08.712","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.712","00:00:08.713","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.713","00:00:08.714","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.714","00:00:08.716","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.716","00:00:08.717","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.717","00:00:08.718","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.718","00:00:08.719","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.719","00:00:08.720","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.720","00:00:08.722","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.722","00:00:08.723","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.723","00:00:08.724","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.724","00:00:08.725","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.725","00:00:08.727","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.727","00:00:08.728","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.728","00:00:08.729","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.729","00:00:08.730","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.730","00:00:08.732","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.732","00:00:08.733","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.733","00:00:08.734","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.734","00:00:08.735","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.735","00:00:08.737","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.737","00:00:08.738","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.738","00:00:08.739","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.739","00:00:08.740","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.740","00:00:08.741","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.744","00:00:08.744","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.744","00:00:08.745","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.745","00:00:08.746","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.746","00:00:08.748","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.748","00:00:08.749","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.749","00:00:08.750","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.750","00:00:08.751","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.751","00:00:08.752","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.752","00:00:08.754","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.754","00:00:08.755","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.755","00:00:08.756","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.756","00:00:08.757","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.757","00:00:08.759","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.759","00:00:08.760","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.760","00:00:08.761","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.761","00:00:08.762","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.762","00:00:08.763","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.763","00:00:08.765","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.765","00:00:08.766","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.766","00:00:08.767","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.767","00:00:08.768","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.768","00:00:08.770","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.770","00:00:08.771","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.771","00:00:08.772","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.772","00:00:08.773","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.773","00:00:08.775","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.775","00:00:08.776","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.776","00:00:08.777","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.777","00:00:08.778","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.778","00:00:08.779","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.779","00:00:08.781","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.781","00:00:08.782","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.782","00:00:08.783","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.783","00:00:08.784","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.784","00:00:08.786","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.786","00:00:08.787","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.787","00:00:08.788","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.788","00:00:08.789","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.789","00:00:08.791","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.791","00:00:08.792","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.792","00:00:08.793","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.793","00:00:08.794","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.794","00:00:08.795","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.795","00:00:08.797","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.797","00:00:08.798","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.798","00:00:08.799","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.799","00:00:08.801","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.801","00:00:08.802","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.806","00:00:08.807","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.807","00:00:08.807","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.807","00:00:08.809","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.809","00:00:08.810","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.810","00:00:08.811","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.811","00:00:08.812","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.812","00:00:08.814","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.814","00:00:08.815","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.815","00:00:08.816","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.816","00:00:08.817","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.817","00:00:08.818","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.818","00:00:08.820","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.820","00:00:08.821","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.821","00:00:08.822","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.822","00:00:08.823","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.823","00:00:08.825","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.825","00:00:08.826","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.826","00:00:08.827","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.827","00:00:08.828","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.828","00:00:08.830","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.830","00:00:08.831","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.831","00:00:08.832","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.832","00:00:08.833","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.833","00:00:08.834","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.834","00:00:08.836","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.836","00:00:08.837","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.837","00:00:08.838","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.838","00:00:08.839","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.839","00:00:08.841","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.841","00:00:08.842","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.842","00:00:08.843","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.843","00:00:08.845","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.845","00:00:08.846","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.846","00:00:08.847","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.847","00:00:08.848","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.848","00:00:08.849","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.849","00:00:08.851","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.851","00:00:08.852","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.852","00:00:08.853","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.853","00:00:08.854","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.854","00:00:08.855","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.855","00:00:08.857","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.857","00:00:08.858","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.858","00:00:08.859","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.859","00:00:08.860","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.860","00:00:08.861","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.861","00:00:08.863","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.863","00:00:08.864","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.864","00:00:08.865","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.865","00:00:08.866","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.866","00:00:08.868","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.868","00:00:08.869","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.869","00:00:08.870","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.870","00:00:08.871","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.871","00:00:08.873","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.873","00:00:08.874","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.874","00:00:08.875","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.875","00:00:08.876","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.876","00:00:08.878","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.878","00:00:08.879","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.879","00:00:08.880","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.880","00:00:08.881","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.881","00:00:08.883","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.883","00:00:08.884","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.884","00:00:08.885","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.885","00:00:08.887","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.887","00:00:08.888","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.888","00:00:08.889","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.889","00:00:08.890","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.890","00:00:08.892","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.892","00:00:08.893","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.893","00:00:08.894","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.894","00:00:08.896","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.896","00:00:08.897","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.897","00:00:08.898","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.898","00:00:08.899","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.899","00:00:08.901","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.901","00:00:08.902","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.906","00:00:08.907","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.907","00:00:08.908","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.908","00:00:08.909","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.909","00:00:08.910","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.910","00:00:08.912","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.912","00:00:08.913","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.913","00:00:08.914","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.914","00:00:08.915","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.915","00:00:08.917","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.917","00:00:08.918","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.918","00:00:08.919","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.919","00:00:08.921","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.921","00:00:08.922","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.922","00:00:08.923","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.923","00:00:08.924","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.924","00:00:08.926","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.926","00:00:08.927","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.927","00:00:08.928","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.928","00:00:08.930","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.930","00:00:08.931","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.931","00:00:08.932","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.932","00:00:08.933","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.933","00:00:08.935","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.935","00:00:08.936","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.936","00:00:08.937","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.937","00:00:08.939","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.939","00:00:08.940","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.940","00:00:08.941","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.941","00:00:08.942","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.942","00:00:08.944","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.944","00:00:08.945","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.945","00:00:08.946","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.946","00:00:08.947","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.947","00:00:08.949","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.949","00:00:08.950","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.950","00:00:08.951","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.951","00:00:08.953","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.953","00:00:08.954","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.954","00:00:08.955","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.955","00:00:08.956","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.956","00:00:08.958","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.958","00:00:08.959","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.959","00:00:08.960","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.960","00:00:08.962","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.962","00:00:08.963","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.963","00:00:08.965","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.965","00:00:08.966","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.966","00:00:08.967","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.967","00:00:08.968","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.968","00:00:08.969","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.969","00:00:08.971","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.971","00:00:08.972","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.972","00:00:08.973","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.973","00:00:08.974","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.974","00:00:08.976","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.976","00:00:08.977","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.977","00:00:08.978","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.978","00:00:08.980","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.980","00:00:08.981","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.981","00:00:08.982","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.982","00:00:08.983","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.983","00:00:08.985","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.985","00:00:08.986","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.986","00:00:08.987","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.987","00:00:08.988","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.988","00:00:08.990","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.990","00:00:08.991","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.991","00:00:08.992","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.992","00:00:08.994","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.994","00:00:08.995","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.995","00:00:08.996","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.996","00:00:08.998","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.998","00:00:08.999","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.999","00:00:09.000","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.000","00:00:09.001","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.001","00:00:09.003","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.003","00:00:09.004","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.004","00:00:09.005","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.005","00:00:09.006","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.006","00:00:09.008","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.008","00:00:09.009","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.009","00:00:09.010","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.010","00:00:09.012","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.012","00:00:09.013","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.013","00:00:09.014","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.014","00:00:09.016","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.016","00:00:09.017","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.017","00:00:09.018","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.018","00:00:09.019","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.019","00:00:09.021","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.021","00:00:09.022","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.022","00:00:09.023","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.023","00:00:09.024","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.024","00:00:09.026","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.026","00:00:09.027","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.027","00:00:09.028","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.028","00:00:09.030","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.030","00:00:09.031","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.031","00:00:09.032","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.032","00:00:09.033","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.033","00:00:09.035","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.035","00:00:09.036","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.036","00:00:09.037","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.037","00:00:09.039","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.039","00:00:09.040","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.040","00:00:09.041","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.041","00:00:09.043","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.043","00:00:09.044","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.044","00:00:09.045","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.045","00:00:09.047","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.047","00:00:09.048","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.048","00:00:09.049","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.049","00:00:09.051","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.051","00:00:09.052","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.052","00:00:09.053","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.053","00:00:09.054","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.054","00:00:09.056","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.056","00:00:09.057","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.057","00:00:09.059","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.059","00:00:09.060","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.060","00:00:09.061","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.061","00:00:09.063","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.063","00:00:09.064","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.064","00:00:09.065","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.065","00:00:09.066","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.066","00:00:09.068","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.068","00:00:09.069","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.069","00:00:09.070","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.070","00:00:09.072","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.072","00:00:09.073","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.073","00:00:09.074","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.074","00:00:09.076","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.076","00:00:09.077","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.077","00:00:09.079","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.079","00:00:09.080","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.080","00:00:09.081","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.081","00:00:09.083","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.083","00:00:09.084","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.084","00:00:09.085","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.085","00:00:09.086","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.086","00:00:09.088","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.088","00:00:09.089","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.089","00:00:09.090","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.090","00:00:09.092","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.092","00:00:09.094","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.099","00:00:09.100","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.100","00:00:09.101","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.101","00:00:09.102","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.102","00:00:09.104","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.104","00:00:09.105","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.105","00:00:09.106","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.106","00:00:09.108","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.108","00:00:09.109","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.109","00:00:09.110","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.110","00:00:09.112","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.112","00:00:09.113","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.113","00:00:09.114","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.114","00:00:09.116","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.116","00:00:09.117","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.117","00:00:09.118","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.118","00:00:09.120","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.120","00:00:09.121","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.121","00:00:09.122","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.122","00:00:09.124","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.124","00:00:09.125","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.125","00:00:09.126","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.126","00:00:09.128","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.128","00:00:09.129","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.129","00:00:09.130","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.130","00:00:09.132","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.132","00:00:09.133","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.133","00:00:09.134","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.134","00:00:09.136","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.136","00:00:09.137","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.137","00:00:09.138","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.138","00:00:09.140","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.140","00:00:09.141","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.141","00:00:09.142","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.142","00:00:09.144","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.144","00:00:09.145","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.145","00:00:09.146","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.146","00:00:09.148","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.148","00:00:09.149","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.149","00:00:09.150","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.150","00:00:09.152","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.152","00:00:09.153","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.153","00:00:09.154","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.154","00:00:09.156","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.156","00:00:09.157","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.157","00:00:09.158","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.158","00:00:09.160","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.160","00:00:09.161","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.161","00:00:09.162","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.162","00:00:09.164","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.164","00:00:09.165","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.165","00:00:09.166","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.166","00:00:09.168","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.168","00:00:09.169","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.169","00:00:09.170","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.170","00:00:09.172","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.172","00:00:09.173","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.173","00:00:09.174","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.174","00:00:09.176","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.176","00:00:09.177","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.177","00:00:09.178","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.178","00:00:09.180","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.180","00:00:09.181","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.181","00:00:09.182","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.182","00:00:09.184","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.184","00:00:09.185","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.185","00:00:09.186","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.186","00:00:09.188","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.188","00:00:09.189","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.189","00:00:09.190","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.190","00:00:09.192","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.192","00:00:09.193","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.193","00:00:09.194","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.194","00:00:09.196","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.196","00:00:09.197","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.197","00:00:09.198","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.198","00:00:09.200","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.200","00:00:09.201","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.201","00:00:09.202","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.202","00:00:09.204","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.204","00:00:09.205","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.205","00:00:09.206","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.206","00:00:09.208","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.208","00:00:09.209","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.209","00:00:09.210","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.210","00:00:09.212","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.212","00:00:09.213","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.213","00:00:09.215","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.215","00:00:09.216","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.216","00:00:09.217","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.217","00:00:09.219","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.219","00:00:09.220","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.220","00:00:09.222","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.222","00:00:09.223","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.223","00:00:09.224","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.224","00:00:09.226","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.226","00:00:09.227","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.227","00:00:09.228","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.228","00:00:09.230","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.230","00:00:09.231","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.231","00:00:09.233","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.233","00:00:09.234","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.234","00:00:09.235","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.235","00:00:09.237","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.237","00:00:09.238","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.238","00:00:09.239","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.239","00:00:09.241","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.241","00:00:09.242","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.242","00:00:09.244","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.244","00:00:09.245","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.245","00:00:09.246","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.246","00:00:09.248","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.248","00:00:09.250","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.250","00:00:09.251","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.251","00:00:09.252","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.252","00:00:09.253","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.253","00:00:09.255","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.255","00:00:09.256","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.256","00:00:09.257","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.257","00:00:09.259","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.259","00:00:09.260","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.260","00:00:09.262","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.262","00:00:09.263","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.263","00:00:09.264","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.264","00:00:09.266","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.266","00:00:09.268","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.270","00:00:09.272","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.272","00:00:09.272","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.272","00:00:09.274","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.274","00:00:09.275","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.275","00:00:09.276","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.276","00:00:09.278","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.278","00:00:09.279","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.279","00:00:09.281","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.281","00:00:09.282","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.282","00:00:09.284","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.286","00:00:09.288","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.288","00:00:09.289","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.289","00:00:09.290","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.290","00:00:09.292","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.292","00:00:09.293","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.293","00:00:09.294","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.294","00:00:09.296","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.296","00:00:09.297","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.297","00:00:09.298","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.298","00:00:09.300","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.300","00:00:09.302","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.307","00:00:09.308","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.308","00:00:09.309","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.309","00:00:09.310","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.310","00:00:09.312","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.312","00:00:09.313","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.313","00:00:09.314","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.314","00:00:09.316","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.316","00:00:09.317","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.317","00:00:09.319","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.319","00:00:09.320","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.320","00:00:09.321","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.321","00:00:09.323","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.323","00:00:09.324","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.324","00:00:09.326","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.329","00:00:09.330","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.330","00:00:09.331","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.331","00:00:09.333","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.333","00:00:09.334","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.334","00:00:09.335","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.335","00:00:09.337","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.337","00:00:09.338","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.338","00:00:09.339","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.339","00:00:09.341","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.341","00:00:09.342","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.342","00:00:09.344","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.344","00:00:09.345","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.345","00:00:09.346","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.346","00:00:09.348","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.348","00:00:09.349","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.349","00:00:09.351","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.351","00:00:09.352","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.352","00:00:09.353","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.353","00:00:09.355","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.355","00:00:09.356","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.356","00:00:09.357","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.357","00:00:09.359","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.359","00:00:09.360","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.360","00:00:09.362","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.362","00:00:09.363","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.363","00:00:09.365","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.365","00:00:09.366","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.366","00:00:09.367","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.367","00:00:09.369","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.369","00:00:09.370","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.370","00:00:09.371","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.371","00:00:09.373","0.001s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.373","00:00:09.374","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.379","00:00:08.380","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.380","00:00:08.381","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.381","00:00:08.382","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.382","00:00:08.384","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.384","00:00:08.385","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.385","00:00:08.387","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.387","00:00:08.388","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.388","00:00:08.389","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.389","00:00:08.391","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.391","00:00:08.392","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.392","00:00:08.393","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.393","00:00:08.395","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.395","00:00:08.396","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.396","00:00:08.398","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.398","00:00:08.399","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.399","00:00:08.400","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.403","00:00:08.403","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.403","00:00:08.405","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.405","00:00:08.406","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.406","00:00:08.407","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.407","00:00:08.409","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.409","00:00:08.410","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.410","00:00:08.411","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.411","00:00:08.413","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.413","00:00:08.414","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.414","00:00:08.416","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.416","00:00:08.417","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.417","00:00:08.418","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.418","00:00:08.420","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.420","00:00:08.421","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.421","00:00:08.422","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.422","00:00:08.424","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.424","00:00:08.425","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.425","00:00:08.427","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.427","00:00:08.428","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.428","00:00:08.429","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.429","00:00:08.431","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.431","00:00:08.432","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.432","00:00:08.434","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.434","00:00:08.435","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.435","00:00:08.436","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.436","00:00:08.438","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.438","00:00:08.439","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.439","00:00:08.440","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.440","00:00:08.442","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.442","00:00:08.443","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.443","00:00:08.445","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.445","00:00:08.446","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.446","00:00:08.447","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.447","00:00:08.449","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.449","00:00:08.450","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.450","00:00:08.452","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.452","00:00:08.453","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.453","00:00:08.454","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.454","00:00:08.456","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.456","00:00:08.457","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.457","00:00:08.459","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.459","00:00:08.460","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.460","00:00:08.461","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.461","00:00:08.463","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.463","00:00:08.464","0.002s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.464","00:00:08.465","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.465","00:00:08.467","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.467","00:00:08.468","0.001s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.468","00:00:08.470","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.473","00:00:08.474","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.474","00:00:08.476","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.476","00:00:08.477","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.477","00:00:08.478","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.478","00:00:08.480","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.480","00:00:08.481","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.481","00:00:08.483","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.483","00:00:08.484","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.484","00:00:08.485","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.485","00:00:08.487","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.487","00:00:08.488","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.488","00:00:08.489","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.489","00:00:08.491","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.491","00:00:08.492","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.492","00:00:08.494","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.494","00:00:08.495","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.495","00:00:08.497","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.497","00:00:08.498","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.498","00:00:08.499","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.499","00:00:08.501","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.501","00:00:08.502","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.502","00:00:08.503","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.503","00:00:08.505","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.505","00:00:08.506","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.506","00:00:08.508","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.508","00:00:08.509","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.509","00:00:08.510","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.510","00:00:08.512","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.512","00:00:08.513","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.513","00:00:08.515","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.515","00:00:08.517","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.517","00:00:08.517","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.517","00:00:08.519","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.519","00:00:08.520","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.520","00:00:08.522","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.524","00:00:08.526","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.526","00:00:08.527","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.527","00:00:08.528","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.528","00:00:08.529","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.529","00:00:08.530","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.530","00:00:08.532","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.532","00:00:08.533","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.533","00:00:08.535","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.535","00:00:08.536","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.536","00:00:08.538","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.538","00:00:08.540","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.543","00:00:08.544","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.544","00:00:08.545","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.545","00:00:08.546","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.546","00:00:08.547","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.547","00:00:08.548","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.548","00:00:08.549","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.549","00:00:08.551","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.551","00:00:08.552","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.552","00:00:08.553","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.553","00:00:08.554","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.554","00:00:08.555","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.555","00:00:08.556","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.556","00:00:08.557","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.557","00:00:08.559","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.559","00:00:08.560","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.560","00:00:08.561","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.561","00:00:08.562","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.562","00:00:08.563","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.563","00:00:08.565","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.565","00:00:08.566","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.566","00:00:08.567","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.567","00:00:08.568","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.568","00:00:08.569","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.569","00:00:08.570","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.570","00:00:08.571","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.571","00:00:08.573","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.573","00:00:08.574","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.574","00:00:08.575","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.575","00:00:08.576","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.576","00:00:08.578","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.578","00:00:08.579","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.579","00:00:08.580","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.580","00:00:08.581","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.581","00:00:08.582","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.582","00:00:08.583","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.583","00:00:08.584","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.584","00:00:08.586","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.586","00:00:08.587","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.587","00:00:08.588","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.588","00:00:08.589","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.589","00:00:08.590","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.590","00:00:08.592","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.592","00:00:08.593","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.593","00:00:08.594","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.594","00:00:08.595","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.595","00:00:08.596","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.596","00:00:08.598","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.598","00:00:08.599","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.599","00:00:08.600","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.600","00:00:08.601","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.601","00:00:08.602","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.602","00:00:08.603","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.603","00:00:08.604","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.604","00:00:08.606","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.606","00:00:08.607","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.607","00:00:08.608","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.608","00:00:08.609","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.609","00:00:08.610","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.610","00:00:08.611","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.611","00:00:08.613","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.613","00:00:08.614","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.614","00:00:08.615","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.615","00:00:08.616","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.616","00:00:08.617","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.617","00:00:08.619","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.619","00:00:08.620","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.620","00:00:08.621","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.621","00:00:08.622","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.622","00:00:08.624","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.624","00:00:08.625","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.625","00:00:08.626","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.626","00:00:08.627","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.627","00:00:08.628","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.628","00:00:08.629","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.629","00:00:08.630","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.630","00:00:08.631","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.631","00:00:08.633","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.633","00:00:08.634","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.634","00:00:08.635","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.635","00:00:08.636","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.636","00:00:08.637","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.637","00:00:08.638","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.638","00:00:08.640","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.640","00:00:08.641","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.641","00:00:08.642","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.642","00:00:08.643","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.643","00:00:08.645","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.645","00:00:08.646","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.646","00:00:08.647","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.647","00:00:08.648","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.648","00:00:08.650","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.650","00:00:08.651","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.651","00:00:08.652","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.652","00:00:08.653","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.653","00:00:08.654","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.654","00:00:08.655","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.655","00:00:08.656","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.656","00:00:08.658","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.658","00:00:08.659","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.659","00:00:08.660","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.660","00:00:08.661","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.661","00:00:08.662","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.662","00:00:08.663","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.663","00:00:08.665","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.665","00:00:08.666","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.666","00:00:08.667","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.667","00:00:08.668","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.668","00:00:08.669","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.669","00:00:08.670","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.670","00:00:08.672","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.672","00:00:08.673","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.673","00:00:08.674","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.674","00:00:08.675","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.675","00:00:08.676","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.676","00:00:08.678","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.678","00:00:08.679","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.679","00:00:08.680","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.680","00:00:08.681","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.681","00:00:08.683","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.683","00:00:08.684","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.684","00:00:08.685","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.685","00:00:08.686","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.686","00:00:08.687","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.687","00:00:08.688","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.688","00:00:08.689","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.689","00:00:08.691","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.691","00:00:08.692","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.692","00:00:08.693","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.693","00:00:08.694","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.694","00:00:08.695","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.695","00:00:08.696","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.696","00:00:08.698","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.698","00:00:08.699","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.699","00:00:08.700","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.700","00:00:08.701","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.701","00:00:08.702","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.702","00:00:08.704","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.704","00:00:08.705","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.705","00:00:08.706","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.706","00:00:08.707","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.707","00:00:08.708","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.708","00:00:08.710","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.710","00:00:08.711","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.711","00:00:08.712","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.712","00:00:08.713","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.713","00:00:08.714","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.714","00:00:08.716","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.716","00:00:08.717","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.717","00:00:08.718","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.718","00:00:08.719","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.719","00:00:08.721","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.721","00:00:08.722","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.722","00:00:08.723","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.723","00:00:08.724","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.724","00:00:08.725","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.725","00:00:08.727","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.727","00:00:08.728","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.728","00:00:08.729","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.729","00:00:08.730","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.730","00:00:08.732","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.732","00:00:08.733","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.733","00:00:08.734","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.734","00:00:08.735","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.735","00:00:08.737","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.737","00:00:08.738","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.738","00:00:08.739","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.739","00:00:08.740","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.740","00:00:08.742","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.742","00:00:08.744","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.744","00:00:08.744","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.744","00:00:08.745","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.745","00:00:08.747","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.747","00:00:08.748","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.748","00:00:08.749","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.749","00:00:08.750","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.750","00:00:08.751","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.751","00:00:08.752","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.752","00:00:08.754","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.754","00:00:08.755","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.755","00:00:08.756","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.756","00:00:08.757","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.757","00:00:08.759","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.759","00:00:08.760","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.760","00:00:08.761","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.761","00:00:08.762","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.762","00:00:08.764","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.764","00:00:08.765","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.765","00:00:08.766","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.766","00:00:08.767","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.767","00:00:08.769","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.769","00:00:08.770","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.770","00:00:08.771","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.771","00:00:08.772","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.772","00:00:08.773","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.773","00:00:08.775","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.775","00:00:08.776","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.776","00:00:08.777","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.777","00:00:08.778","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.778","00:00:08.779","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.779","00:00:08.781","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.781","00:00:08.782","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.782","00:00:08.783","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.783","00:00:08.784","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.784","00:00:08.786","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.786","00:00:08.787","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.787","00:00:08.788","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.788","00:00:08.789","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.789","00:00:08.791","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.791","00:00:08.792","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.792","00:00:08.793","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.793","00:00:08.794","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.794","00:00:08.795","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.795","00:00:08.797","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.797","00:00:08.798","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.798","00:00:08.799","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.799","00:00:08.801","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.801","00:00:08.802","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.806","00:00:08.807","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.807","00:00:08.807","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.807","00:00:08.809","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.809","00:00:08.810","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.810","00:00:08.811","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.811","00:00:08.812","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.812","00:00:08.814","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.814","00:00:08.815","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.815","00:00:08.816","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.816","00:00:08.817","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.817","00:00:08.819","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.819","00:00:08.820","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.820","00:00:08.821","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.821","00:00:08.822","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.822","00:00:08.824","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.824","00:00:08.825","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.825","00:00:08.826","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.826","00:00:08.827","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.827","00:00:08.828","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.828","00:00:08.830","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.830","00:00:08.831","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.831","00:00:08.832","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.832","00:00:08.833","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.833","00:00:08.835","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.835","00:00:08.836","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.836","00:00:08.837","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.837","00:00:08.838","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.838","00:00:08.839","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.839","00:00:08.841","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.841","00:00:08.842","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.842","00:00:08.843","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.843","00:00:08.845","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.845","00:00:08.846","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.846","00:00:08.847","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.847","00:00:08.848","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.848","00:00:08.850","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.850","00:00:08.851","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.851","00:00:08.852","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.852","00:00:08.853","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.853","00:00:08.854","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.854","00:00:08.856","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.856","00:00:08.857","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.857","00:00:08.858","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.858","00:00:08.859","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.859","00:00:08.860","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.860","00:00:08.862","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.862","00:00:08.863","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.863","00:00:08.864","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.864","00:00:08.865","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.865","00:00:08.867","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.867","00:00:08.868","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.868","00:00:08.869","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.869","00:00:08.870","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.870","00:00:08.872","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.872","00:00:08.873","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.873","00:00:08.874","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.874","00:00:08.875","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.875","00:00:08.876","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.876","00:00:08.878","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.878","00:00:08.879","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.879","00:00:08.880","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.880","00:00:08.882","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.882","00:00:08.883","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.883","00:00:08.884","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.884","00:00:08.885","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.885","00:00:08.887","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.887","00:00:08.888","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.888","00:00:08.889","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.889","00:00:08.891","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.891","00:00:08.892","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.892","00:00:08.893","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.893","00:00:08.894","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.894","00:00:08.896","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.896","00:00:08.897","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.897","00:00:08.898","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.898","00:00:08.899","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.899","00:00:08.901","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.901","00:00:08.902","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.906","00:00:08.907","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.907","00:00:08.908","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.908","00:00:08.909","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.909","00:00:08.910","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.910","00:00:08.912","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.912","00:00:08.913","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.913","00:00:08.914","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.914","00:00:08.915","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.915","00:00:08.917","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.917","00:00:08.918","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.918","00:00:08.919","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.919","00:00:08.921","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.921","00:00:08.922","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.922","00:00:08.923","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.923","00:00:08.924","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.924","00:00:08.926","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.926","00:00:08.927","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.927","00:00:08.928","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.928","00:00:08.930","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.930","00:00:08.931","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.931","00:00:08.932","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.932","00:00:08.933","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.933","00:00:08.935","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.935","00:00:08.936","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.936","00:00:08.937","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.937","00:00:08.939","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.939","00:00:08.940","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.940","00:00:08.941","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.941","00:00:08.942","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.942","00:00:08.944","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.944","00:00:08.945","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.945","00:00:08.946","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.946","00:00:08.948","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.948","00:00:08.949","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.949","00:00:08.950","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.950","00:00:08.951","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.951","00:00:08.953","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.953","00:00:08.954","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.954","00:00:08.955","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.955","00:00:08.957","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.957","00:00:08.958","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.958","00:00:08.959","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.959","00:00:08.961","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.961","00:00:08.962","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.962","00:00:08.963","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.963","00:00:08.965","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.965","00:00:08.966","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.966","00:00:08.967","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.967","00:00:08.968","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.968","00:00:08.969","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.969","00:00:08.971","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.971","00:00:08.972","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.972","00:00:08.973","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.973","00:00:08.975","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.975","00:00:08.976","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.976","00:00:08.977","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.977","00:00:08.978","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.978","00:00:08.980","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.980","00:00:08.981","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.981","00:00:08.982","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.982","00:00:08.983","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.983","00:00:08.985","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.985","00:00:08.986","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.986","00:00:08.987","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.987","00:00:08.988","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.988","00:00:08.990","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.990","00:00:08.991","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.991","00:00:08.993","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.993","00:00:08.994","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.994","00:00:08.995","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.995","00:00:08.996","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.996","00:00:08.998","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.998","00:00:08.999","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.999","00:00:09.000","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.000","00:00:09.001","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.001","00:00:09.003","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.003","00:00:09.004","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.004","00:00:09.005","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.005","00:00:09.006","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.006","00:00:09.008","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.008","00:00:09.009","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.009","00:00:09.010","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.010","00:00:09.012","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.012","00:00:09.013","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.013","00:00:09.014","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.014","00:00:09.016","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.016","00:00:09.017","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.017","00:00:09.018","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.018","00:00:09.019","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.019","00:00:09.021","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.021","00:00:09.022","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.022","00:00:09.023","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.023","00:00:09.024","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.024","00:00:09.026","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.026","00:00:09.027","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.027","00:00:09.028","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.028","00:00:09.030","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.030","00:00:09.031","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.031","00:00:09.032","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.032","00:00:09.034","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.034","00:00:09.035","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.035","00:00:09.036","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.036","00:00:09.037","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.037","00:00:09.039","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.039","00:00:09.040","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.040","00:00:09.041","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.041","00:00:09.043","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.043","00:00:09.044","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.044","00:00:09.045","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.045","00:00:09.047","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.047","00:00:09.048","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.048","00:00:09.049","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.049","00:00:09.051","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.051","00:00:09.052","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.052","00:00:09.053","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.053","00:00:09.055","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.055","00:00:09.056","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.056","00:00:09.057","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.057","00:00:09.059","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.059","00:00:09.060","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.060","00:00:09.061","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.061","00:00:09.063","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.063","00:00:09.064","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.064","00:00:09.065","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.065","00:00:09.066","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.066","00:00:09.068","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.068","00:00:09.069","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.069","00:00:09.070","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.070","00:00:09.072","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.072","00:00:09.073","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.073","00:00:09.074","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.074","00:00:09.076","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.076","00:00:09.077","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.077","00:00:09.079","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.079","00:00:09.080","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.080","00:00:09.081","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.081","00:00:09.083","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.083","00:00:09.084","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.084","00:00:09.085","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.085","00:00:09.086","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.086","00:00:09.088","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.088","00:00:09.089","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.089","00:00:09.090","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.090","00:00:09.092","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.092","00:00:09.094","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.099","00:00:09.100","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.100","00:00:09.101","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.101","00:00:09.102","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.102","00:00:09.104","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.104","00:00:09.105","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.105","00:00:09.107","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.107","00:00:09.108","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.108","00:00:09.109","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.109","00:00:09.110","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.110","00:00:09.112","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.112","00:00:09.113","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.113","00:00:09.114","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.114","00:00:09.116","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.116","00:00:09.117","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.117","00:00:09.118","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.118","00:00:09.120","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.120","00:00:09.121","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.121","00:00:09.122","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.122","00:00:09.124","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.124","00:00:09.125","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.125","00:00:09.126","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.126","00:00:09.128","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.128","00:00:09.129","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.129","00:00:09.130","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.130","00:00:09.132","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.132","00:00:09.133","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.133","00:00:09.134","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.134","00:00:09.136","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.136","00:00:09.137","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.137","00:00:09.138","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.138","00:00:09.140","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.140","00:00:09.141","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.141","00:00:09.142","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.142","00:00:09.144","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.144","00:00:09.145","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.145","00:00:09.146","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.146","00:00:09.148","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.148","00:00:09.149","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.149","00:00:09.151","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.151","00:00:09.152","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.152","00:00:09.153","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.153","00:00:09.154","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.154","00:00:09.156","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.156","00:00:09.157","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.157","00:00:09.158","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.158","00:00:09.160","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.160","00:00:09.161","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.161","00:00:09.162","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.162","00:00:09.164","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.164","00:00:09.165","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.165","00:00:09.166","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.166","00:00:09.168","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.168","00:00:09.169","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.169","00:00:09.170","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.170","00:00:09.172","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.172","00:00:09.173","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.173","00:00:09.174","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.174","00:00:09.176","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.176","00:00:09.177","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.177","00:00:09.178","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.178","00:00:09.180","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.180","00:00:09.181","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.181","00:00:09.182","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.182","00:00:09.184","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.184","00:00:09.185","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.185","00:00:09.186","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.186","00:00:09.188","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.188","00:00:09.189","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.189","00:00:09.190","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.190","00:00:09.192","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.192","00:00:09.193","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.193","00:00:09.194","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.194","00:00:09.196","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.196","00:00:09.197","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.197","00:00:09.198","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.198","00:00:09.200","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.200","00:00:09.201","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.201","00:00:09.202","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.202","00:00:09.204","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.204","00:00:09.205","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.205","00:00:09.206","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.206","00:00:09.208","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.208","00:00:09.209","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.209","00:00:09.211","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.211","00:00:09.212","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.212","00:00:09.213","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.213","00:00:09.215","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.215","00:00:09.216","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.216","00:00:09.217","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.217","00:00:09.219","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.219","00:00:09.220","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.220","00:00:09.222","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.222","00:00:09.223","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.223","00:00:09.224","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.224","00:00:09.226","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.226","00:00:09.227","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.227","00:00:09.228","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.228","00:00:09.230","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.230","00:00:09.231","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.231","00:00:09.233","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.233","00:00:09.234","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.234","00:00:09.235","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.235","00:00:09.237","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.237","00:00:09.238","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.238","00:00:09.240","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.240","00:00:09.241","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.241","00:00:09.242","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.242","00:00:09.244","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.244","00:00:09.245","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.245","00:00:09.247","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.247","00:00:09.248","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.248","00:00:09.250","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.250","00:00:09.251","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.251","00:00:09.252","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.252","00:00:09.253","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.253","00:00:09.255","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.255","00:00:09.256","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.256","00:00:09.257","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.257","00:00:09.259","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.259","00:00:09.260","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.260","00:00:09.262","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.262","00:00:09.263","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.263","00:00:09.264","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.264","00:00:09.266","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.266","00:00:09.268","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.270","00:00:09.272","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.272","00:00:09.273","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.273","00:00:09.274","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.274","00:00:09.275","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.275","00:00:09.277","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.277","00:00:09.278","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.278","00:00:09.279","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.279","00:00:09.281","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.281","00:00:09.282","0.001s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.282","00:00:09.284","0.002s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.526","00:00:08.527","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.527","00:00:08.528","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.528","00:00:08.529","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.529","00:00:08.530","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.530","00:00:08.532","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.532","00:00:08.533","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.533","00:00:08.534","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.534","00:00:08.536","0.001s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.536","00:00:08.537","0.002s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.543","00:00:08.544","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.544","00:00:08.545","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.545","00:00:08.546","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.546","00:00:08.547","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.547","00:00:08.548","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.548","00:00:08.549","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.549","00:00:08.551","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.551","00:00:08.552","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.552","00:00:08.553","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.553","00:00:08.554","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.554","00:00:08.555","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.555","00:00:08.556","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.556","00:00:08.557","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.557","00:00:08.559","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.559","00:00:08.560","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.560","00:00:08.561","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.561","00:00:08.562","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.562","00:00:08.563","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.563","00:00:08.565","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.565","00:00:08.566","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.566","00:00:08.567","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.567","00:00:08.568","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.568","00:00:08.569","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.569","00:00:08.570","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.570","00:00:08.571","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.571","00:00:08.573","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.573","00:00:08.574","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.574","00:00:08.575","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.575","00:00:08.576","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.576","00:00:08.578","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.578","00:00:08.579","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.579","00:00:08.580","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.580","00:00:08.581","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.581","00:00:08.582","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.582","00:00:08.583","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.583","00:00:08.584","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.584","00:00:08.586","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.586","00:00:08.587","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.587","00:00:08.588","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.588","00:00:08.589","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.589","00:00:08.590","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.590","00:00:08.592","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.592","00:00:08.593","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.593","00:00:08.594","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.594","00:00:08.595","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.595","00:00:08.596","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.596","00:00:08.598","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.598","00:00:08.599","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.599","00:00:08.600","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.600","00:00:08.601","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.601","00:00:08.602","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.602","00:00:08.603","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.603","00:00:08.604","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.604","00:00:08.606","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.606","00:00:08.607","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.607","00:00:08.608","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.608","00:00:08.609","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.609","00:00:08.610","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.610","00:00:08.611","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.611","00:00:08.613","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.613","00:00:08.614","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.614","00:00:08.615","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.615","00:00:08.616","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.616","00:00:08.617","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.617","00:00:08.619","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.619","00:00:08.620","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.620","00:00:08.621","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.621","00:00:08.622","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.622","00:00:08.624","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.624","00:00:08.625","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.625","00:00:08.626","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.626","00:00:08.627","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.627","00:00:08.628","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.628","00:00:08.629","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.629","00:00:08.630","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.630","00:00:08.631","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.631","00:00:08.633","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.633","00:00:08.634","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.634","00:00:08.635","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.635","00:00:08.636","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.636","00:00:08.637","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.637","00:00:08.638","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.638","00:00:08.640","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.640","00:00:08.641","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.641","00:00:08.642","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.642","00:00:08.643","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.643","00:00:08.645","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.645","00:00:08.646","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.646","00:00:08.647","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.647","00:00:08.648","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.648","00:00:08.650","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.650","00:00:08.651","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.651","00:00:08.652","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.652","00:00:08.653","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.653","00:00:08.654","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.654","00:00:08.655","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.655","00:00:08.656","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.656","00:00:08.658","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.658","00:00:08.659","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.659","00:00:08.660","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.660","00:00:08.661","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.661","00:00:08.662","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.662","00:00:08.663","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.663","00:00:08.665","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.665","00:00:08.666","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.666","00:00:08.667","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.667","00:00:08.668","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.668","00:00:08.669","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.669","00:00:08.670","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.670","00:00:08.672","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.672","00:00:08.673","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.673","00:00:08.674","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.674","00:00:08.675","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.675","00:00:08.676","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.676","00:00:08.678","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.678","00:00:08.679","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.679","00:00:08.680","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.680","00:00:08.681","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.681","00:00:08.683","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.683","00:00:08.684","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.684","00:00:08.685","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.685","00:00:08.686","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.686","00:00:08.687","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.687","00:00:08.688","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.688","00:00:08.689","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.689","00:00:08.691","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.691","00:00:08.692","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.692","00:00:08.693","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.693","00:00:08.694","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.694","00:00:08.695","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.695","00:00:08.696","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.696","00:00:08.698","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.698","00:00:08.699","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.699","00:00:08.700","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.700","00:00:08.701","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.701","00:00:08.702","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.702","00:00:08.704","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.704","00:00:08.705","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.705","00:00:08.706","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.706","00:00:08.707","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.707","00:00:08.708","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.708","00:00:08.710","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.710","00:00:08.711","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.711","00:00:08.712","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.712","00:00:08.713","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.713","00:00:08.714","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.714","00:00:08.716","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.716","00:00:08.717","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.717","00:00:08.718","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.718","00:00:08.719","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.719","00:00:08.721","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.721","00:00:08.722","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.722","00:00:08.723","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.723","00:00:08.724","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.724","00:00:08.725","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.725","00:00:08.727","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.727","00:00:08.728","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.728","00:00:08.729","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.729","00:00:08.730","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.730","00:00:08.732","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.732","00:00:08.733","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.733","00:00:08.734","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.734","00:00:08.735","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.735","00:00:08.737","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.737","00:00:08.738","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.738","00:00:08.739","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.739","00:00:08.740","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.740","00:00:08.742","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.742","00:00:08.744","0.002s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.744","00:00:08.744","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.744","00:00:08.745","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.745","00:00:08.747","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.747","00:00:08.748","0.002s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.748","00:00:08.749","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.749","00:00:08.750","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.750","00:00:08.751","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.751","00:00:08.752","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.752","00:00:08.754","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.754","00:00:08.755","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.755","00:00:08.756","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.756","00:00:08.757","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.757","00:00:08.759","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.759","00:00:08.760","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.760","00:00:08.761","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.761","00:00:08.762","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.762","00:00:08.764","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.764","00:00:08.765","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.765","00:00:08.766","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.766","00:00:08.767","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.767","00:00:08.769","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.769","00:00:08.770","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.770","00:00:08.771","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.771","00:00:08.772","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.772","00:00:08.773","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.773","00:00:08.775","0.002s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.775","00:00:08.776","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.776","00:00:08.777","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.777","00:00:08.778","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.778","00:00:08.779","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.779","00:00:08.781","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.781","00:00:08.782","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.782","00:00:08.783","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.783","00:00:08.784","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.784","00:00:08.786","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.786","00:00:08.787","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.787","00:00:08.788","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.788","00:00:08.789","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.789","00:00:08.791","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.791","00:00:08.792","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.792","00:00:08.793","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.793","00:00:08.794","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.794","00:00:08.795","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.795","00:00:08.797","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.797","00:00:08.798","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.798","00:00:08.799","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.799","00:00:08.801","0.001s"],[245,218,"00:00:08.457","00:00:08.802","00:00:08.801","00:00:08.802","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.544","00:00:08.545","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.545","00:00:08.546","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.546","00:00:08.547","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.547","00:00:08.548","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.548","00:00:08.549","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.549","00:00:08.551","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.551","00:00:08.552","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.552","00:00:08.553","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.553","00:00:08.554","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.554","00:00:08.555","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.555","00:00:08.556","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.556","00:00:08.557","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.557","00:00:08.559","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.559","00:00:08.560","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.560","00:00:08.561","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.561","00:00:08.562","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.562","00:00:08.563","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.563","00:00:08.565","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.565","00:00:08.566","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.566","00:00:08.567","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.567","00:00:08.568","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.568","00:00:08.569","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.569","00:00:08.570","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.570","00:00:08.571","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.571","00:00:08.573","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.573","00:00:08.574","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.574","00:00:08.575","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.575","00:00:08.576","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.576","00:00:08.578","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.578","00:00:08.579","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.579","00:00:08.580","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.580","00:00:08.581","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.581","00:00:08.582","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.582","00:00:08.583","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.583","00:00:08.584","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.584","00:00:08.586","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.586","00:00:08.587","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.587","00:00:08.588","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.588","00:00:08.589","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.589","00:00:08.590","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.590","00:00:08.592","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.592","00:00:08.593","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.593","00:00:08.594","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.594","00:00:08.595","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.595","00:00:08.596","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.596","00:00:08.598","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.598","00:00:08.599","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.599","00:00:08.600","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.600","00:00:08.601","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.601","00:00:08.602","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.602","00:00:08.603","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.603","00:00:08.604","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.604","00:00:08.606","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.606","00:00:08.607","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.607","00:00:08.608","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.608","00:00:08.609","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.609","00:00:08.610","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.610","00:00:08.611","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.611","00:00:08.613","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.613","00:00:08.614","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.614","00:00:08.615","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.615","00:00:08.616","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.616","00:00:08.617","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.617","00:00:08.619","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.619","00:00:08.620","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.620","00:00:08.621","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.621","00:00:08.622","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.622","00:00:08.624","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.624","00:00:08.625","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.625","00:00:08.626","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.626","00:00:08.627","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.627","00:00:08.628","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.628","00:00:08.629","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.629","00:00:08.630","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.630","00:00:08.631","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.631","00:00:08.633","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.633","00:00:08.634","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.634","00:00:08.635","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.635","00:00:08.636","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.636","00:00:08.637","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.637","00:00:08.639","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.639","00:00:08.640","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.640","00:00:08.641","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.641","00:00:08.642","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.642","00:00:08.643","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.643","00:00:08.645","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.645","00:00:08.646","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.646","00:00:08.647","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.647","00:00:08.648","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.648","00:00:08.650","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.650","00:00:08.651","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.651","00:00:08.652","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.652","00:00:08.653","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.653","00:00:08.654","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.654","00:00:08.655","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.655","00:00:08.656","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.656","00:00:08.658","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.658","00:00:08.659","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.659","00:00:08.660","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.660","00:00:08.661","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.661","00:00:08.662","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.662","00:00:08.663","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.663","00:00:08.665","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.665","00:00:08.666","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.666","00:00:08.667","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.667","00:00:08.668","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.668","00:00:08.669","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.669","00:00:08.670","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.670","00:00:08.672","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.672","00:00:08.673","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.673","00:00:08.674","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.674","00:00:08.675","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.675","00:00:08.676","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.676","00:00:08.678","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.678","00:00:08.679","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.679","00:00:08.680","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.680","00:00:08.682","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.682","00:00:08.683","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.683","00:00:08.684","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.684","00:00:08.685","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.685","00:00:08.686","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.686","00:00:08.687","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.687","00:00:08.688","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.688","00:00:08.689","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.689","00:00:08.691","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.691","00:00:08.692","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.692","00:00:08.693","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.693","00:00:08.694","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.694","00:00:08.695","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.695","00:00:08.696","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.696","00:00:08.698","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.698","00:00:08.699","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.699","00:00:08.700","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.700","00:00:08.701","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.701","00:00:08.702","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.702","00:00:08.704","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.704","00:00:08.705","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.705","00:00:08.706","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.706","00:00:08.707","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.707","00:00:08.709","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.709","00:00:08.710","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.710","00:00:08.711","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.711","00:00:08.712","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.712","00:00:08.713","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.713","00:00:08.714","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.714","00:00:08.716","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.716","00:00:08.717","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.717","00:00:08.718","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.718","00:00:08.719","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.719","00:00:08.721","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.721","00:00:08.722","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.722","00:00:08.723","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.723","00:00:08.724","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.724","00:00:08.725","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.725","00:00:08.727","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.727","00:00:08.728","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.728","00:00:08.729","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.729","00:00:08.731","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.731","00:00:08.732","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.732","00:00:08.733","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.733","00:00:08.734","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.734","00:00:08.735","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.735","00:00:08.737","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.737","00:00:08.738","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.738","00:00:08.739","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.739","00:00:08.740","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.740","00:00:08.742","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.742","00:00:08.744","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.744","00:00:08.744","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.744","00:00:08.745","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.745","00:00:08.747","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.747","00:00:08.748","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.748","00:00:08.749","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.749","00:00:08.750","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.750","00:00:08.751","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.751","00:00:08.752","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.752","00:00:08.754","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.754","00:00:08.755","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.755","00:00:08.756","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.756","00:00:08.757","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.757","00:00:08.759","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.759","00:00:08.760","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.760","00:00:08.761","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.761","00:00:08.762","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.762","00:00:08.764","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.764","00:00:08.765","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.765","00:00:08.766","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.766","00:00:08.767","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.767","00:00:08.769","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.769","00:00:08.770","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.770","00:00:08.771","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.771","00:00:08.772","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.772","00:00:08.773","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.773","00:00:08.775","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.775","00:00:08.776","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.776","00:00:08.777","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.777","00:00:08.778","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.778","00:00:08.780","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.780","00:00:08.781","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.781","00:00:08.782","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.782","00:00:08.783","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.783","00:00:08.784","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.784","00:00:08.786","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.786","00:00:08.787","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.787","00:00:08.788","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.788","00:00:08.789","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.789","00:00:08.791","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.791","00:00:08.792","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.792","00:00:08.793","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.793","00:00:08.794","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.794","00:00:08.795","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.795","00:00:08.797","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.797","00:00:08.798","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.798","00:00:08.799","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.799","00:00:08.801","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.801","00:00:08.802","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.806","00:00:08.807","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.807","00:00:08.808","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.808","00:00:08.809","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.809","00:00:08.810","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.810","00:00:08.811","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.811","00:00:08.812","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.812","00:00:08.814","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.814","00:00:08.815","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.815","00:00:08.816","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.816","00:00:08.817","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.817","00:00:08.819","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.819","00:00:08.820","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.820","00:00:08.821","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.821","00:00:08.822","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.822","00:00:08.824","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.824","00:00:08.825","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.825","00:00:08.826","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.826","00:00:08.827","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.827","00:00:08.828","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.828","00:00:08.830","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.830","00:00:08.831","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.831","00:00:08.832","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.832","00:00:08.833","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.833","00:00:08.835","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.835","00:00:08.836","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.836","00:00:08.837","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.837","00:00:08.838","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.838","00:00:08.839","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.839","00:00:08.841","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.841","00:00:08.842","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.842","00:00:08.843","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.843","00:00:08.845","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.845","00:00:08.846","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.846","00:00:08.847","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.847","00:00:08.848","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.848","00:00:08.850","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.850","00:00:08.851","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.851","00:00:08.852","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.852","00:00:08.853","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.853","00:00:08.854","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.854","00:00:08.856","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.856","00:00:08.857","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.857","00:00:08.858","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.858","00:00:08.859","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.859","00:00:08.860","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.860","00:00:08.862","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.862","00:00:08.863","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.863","00:00:08.864","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.864","00:00:08.865","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.865","00:00:08.867","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.867","00:00:08.868","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.868","00:00:08.869","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.869","00:00:08.870","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.870","00:00:08.872","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.872","00:00:08.873","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.873","00:00:08.874","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.874","00:00:08.875","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.875","00:00:08.877","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.877","00:00:08.878","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.878","00:00:08.879","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.879","00:00:08.880","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.880","00:00:08.882","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.882","00:00:08.883","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.883","00:00:08.884","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.884","00:00:08.885","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.885","00:00:08.887","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.887","00:00:08.888","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.888","00:00:08.889","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.889","00:00:08.891","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.891","00:00:08.892","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.892","00:00:08.893","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.893","00:00:08.894","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.894","00:00:08.896","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.896","00:00:08.897","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.897","00:00:08.898","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.898","00:00:08.899","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.899","00:00:08.901","0.001s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.901","00:00:08.902","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.807","00:00:08.807","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.807","00:00:08.809","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.809","00:00:08.810","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.810","00:00:08.811","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.811","00:00:08.812","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.812","00:00:08.814","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.814","00:00:08.815","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.815","00:00:08.816","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.816","00:00:08.817","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.817","00:00:08.818","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.818","00:00:08.820","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.820","00:00:08.821","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.821","00:00:08.822","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.822","00:00:08.824","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.824","00:00:08.825","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.825","00:00:08.826","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.826","00:00:08.827","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.827","00:00:08.828","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.828","00:00:08.830","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.830","00:00:08.831","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.831","00:00:08.832","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.832","00:00:08.833","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.833","00:00:08.834","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.834","00:00:08.836","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.836","00:00:08.837","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.837","00:00:08.838","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.838","00:00:08.839","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.839","00:00:08.841","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.841","00:00:08.842","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.842","00:00:08.843","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.843","00:00:08.845","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.845","00:00:08.846","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.846","00:00:08.847","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.847","00:00:08.848","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.848","00:00:08.850","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.850","00:00:08.851","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.851","00:00:08.852","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.852","00:00:08.853","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.853","00:00:08.854","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.854","00:00:08.856","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.856","00:00:08.857","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.857","00:00:08.858","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.858","00:00:08.859","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.859","00:00:08.860","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.860","00:00:08.862","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.862","00:00:08.863","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.863","00:00:08.864","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.864","00:00:08.865","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.865","00:00:08.867","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.867","00:00:08.868","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.868","00:00:08.869","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.869","00:00:08.870","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.870","00:00:08.872","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.872","00:00:08.873","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.873","00:00:08.874","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.874","00:00:08.875","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.875","00:00:08.876","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.876","00:00:08.878","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.878","00:00:08.879","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.879","00:00:08.880","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.880","00:00:08.882","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.882","00:00:08.883","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.883","00:00:08.884","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.884","00:00:08.885","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.885","00:00:08.887","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.887","00:00:08.888","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.888","00:00:08.889","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.889","00:00:08.891","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.891","00:00:08.892","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.892","00:00:08.893","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.893","00:00:08.894","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.894","00:00:08.896","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.896","00:00:08.897","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.897","00:00:08.898","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.898","00:00:08.899","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.899","00:00:08.901","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.901","00:00:08.902","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.906","00:00:08.907","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.907","00:00:08.908","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.908","00:00:08.909","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.909","00:00:08.910","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.910","00:00:08.912","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.912","00:00:08.913","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.913","00:00:08.914","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.914","00:00:08.915","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.915","00:00:08.917","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.917","00:00:08.918","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.918","00:00:08.919","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.919","00:00:08.921","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.921","00:00:08.922","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.922","00:00:08.923","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.923","00:00:08.924","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.924","00:00:08.926","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.926","00:00:08.927","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.927","00:00:08.928","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.928","00:00:08.930","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.930","00:00:08.931","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.931","00:00:08.932","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.932","00:00:08.933","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.933","00:00:08.935","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.935","00:00:08.936","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.936","00:00:08.937","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.937","00:00:08.939","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.939","00:00:08.940","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.940","00:00:08.941","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.941","00:00:08.942","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.942","00:00:08.944","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.944","00:00:08.945","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.945","00:00:08.946","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.946","00:00:08.948","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.948","00:00:08.949","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.949","00:00:08.950","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.950","00:00:08.951","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.951","00:00:08.953","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.953","00:00:08.954","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.954","00:00:08.955","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.955","00:00:08.957","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.957","00:00:08.958","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.958","00:00:08.959","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.959","00:00:08.961","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.961","00:00:08.962","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.962","00:00:08.963","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.963","00:00:08.965","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.965","00:00:08.966","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.966","00:00:08.967","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.967","00:00:08.968","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.968","00:00:08.969","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.969","00:00:08.971","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.971","00:00:08.972","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.972","00:00:08.973","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.973","00:00:08.974","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.974","00:00:08.976","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.976","00:00:08.977","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.977","00:00:08.978","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.978","00:00:08.980","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.980","00:00:08.981","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.981","00:00:08.982","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.982","00:00:08.983","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.983","00:00:08.985","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.985","00:00:08.986","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.986","00:00:08.987","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.987","00:00:08.988","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.988","00:00:08.990","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.990","00:00:08.991","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.991","00:00:08.993","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.993","00:00:08.994","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.994","00:00:08.995","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.995","00:00:08.996","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.996","00:00:08.998","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.998","00:00:08.999","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.999","00:00:09.000","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.000","00:00:09.001","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.001","00:00:09.003","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.003","00:00:09.004","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.004","00:00:09.005","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.005","00:00:09.006","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.006","00:00:09.008","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.008","00:00:09.009","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.009","00:00:09.010","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.010","00:00:09.012","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.012","00:00:09.013","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.013","00:00:09.014","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.014","00:00:09.016","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.016","00:00:09.017","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.017","00:00:09.018","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.018","00:00:09.019","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.019","00:00:09.021","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.021","00:00:09.022","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.022","00:00:09.023","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.023","00:00:09.024","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.024","00:00:09.026","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.026","00:00:09.027","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.027","00:00:09.028","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.028","00:00:09.030","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.030","00:00:09.031","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.031","00:00:09.032","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.032","00:00:09.033","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.033","00:00:09.035","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.035","00:00:09.036","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.036","00:00:09.037","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.037","00:00:09.039","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.039","00:00:09.040","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.040","00:00:09.041","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.041","00:00:09.043","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.043","00:00:09.044","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.044","00:00:09.045","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.045","00:00:09.047","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.047","00:00:09.048","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.048","00:00:09.049","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.049","00:00:09.051","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.051","00:00:09.052","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.052","00:00:09.053","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.053","00:00:09.055","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.055","00:00:09.056","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.056","00:00:09.057","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.057","00:00:09.059","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.059","00:00:09.060","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.060","00:00:09.061","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.061","00:00:09.063","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.063","00:00:09.064","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.064","00:00:09.065","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.065","00:00:09.066","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.066","00:00:09.068","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.068","00:00:09.069","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.069","00:00:09.070","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.070","00:00:09.072","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.072","00:00:09.073","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.073","00:00:09.074","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.074","00:00:09.076","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.076","00:00:09.077","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.077","00:00:09.079","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.079","00:00:09.080","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.080","00:00:09.081","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.081","00:00:09.083","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.083","00:00:09.084","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.084","00:00:09.085","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.085","00:00:09.086","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.086","00:00:09.088","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.088","00:00:09.089","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.089","00:00:09.090","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.090","00:00:09.092","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.092","00:00:09.094","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.099","00:00:09.100","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.100","00:00:09.101","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.101","00:00:09.102","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.102","00:00:09.104","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.104","00:00:09.105","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.105","00:00:09.106","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.106","00:00:09.108","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.108","00:00:09.109","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.109","00:00:09.110","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.110","00:00:09.112","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.112","00:00:09.113","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.113","00:00:09.114","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.114","00:00:09.116","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.116","00:00:09.117","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.117","00:00:09.118","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.118","00:00:09.120","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.120","00:00:09.121","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.121","00:00:09.122","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.122","00:00:09.124","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.124","00:00:09.125","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.125","00:00:09.126","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.126","00:00:09.128","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.128","00:00:09.129","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.129","00:00:09.130","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.130","00:00:09.132","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.132","00:00:09.133","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.133","00:00:09.134","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.134","00:00:09.136","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.136","00:00:09.137","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.137","00:00:09.138","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.138","00:00:09.140","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.140","00:00:09.141","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.141","00:00:09.142","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.142","00:00:09.144","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.144","00:00:09.145","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.145","00:00:09.146","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.146","00:00:09.148","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.148","00:00:09.149","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.149","00:00:09.150","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.150","00:00:09.152","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.152","00:00:09.153","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.153","00:00:09.154","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.154","00:00:09.156","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.156","00:00:09.157","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.157","00:00:09.158","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.158","00:00:09.160","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.160","00:00:09.161","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.161","00:00:09.162","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.162","00:00:09.164","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.164","00:00:09.165","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.165","00:00:09.166","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.166","00:00:09.168","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.168","00:00:09.169","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.169","00:00:09.170","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.170","00:00:09.172","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.172","00:00:09.173","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.173","00:00:09.174","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.174","00:00:09.176","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.176","00:00:09.177","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.177","00:00:09.178","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.178","00:00:09.180","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.180","00:00:09.181","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.181","00:00:09.182","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.182","00:00:09.184","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.184","00:00:09.185","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.185","00:00:09.186","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.186","00:00:09.188","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.188","00:00:09.189","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.189","00:00:09.190","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.190","00:00:09.192","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.192","00:00:09.193","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.193","00:00:09.194","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.194","00:00:09.196","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.196","00:00:09.197","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.197","00:00:09.198","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.198","00:00:09.200","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.200","00:00:09.201","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.201","00:00:09.202","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.202","00:00:09.204","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.204","00:00:09.205","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.205","00:00:09.206","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.206","00:00:09.208","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.208","00:00:09.209","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.209","00:00:09.210","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.210","00:00:09.212","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.212","00:00:09.213","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.213","00:00:09.215","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.215","00:00:09.216","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.216","00:00:09.217","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.217","00:00:09.219","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.219","00:00:09.220","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.220","00:00:09.222","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.222","00:00:09.223","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.223","00:00:09.224","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.224","00:00:09.226","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.226","00:00:09.227","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.227","00:00:09.228","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.228","00:00:09.230","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.230","00:00:09.231","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.231","00:00:09.233","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.233","00:00:09.234","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.234","00:00:09.235","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.235","00:00:09.237","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.237","00:00:09.238","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.238","00:00:09.239","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.239","00:00:09.241","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.241","00:00:09.242","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.242","00:00:09.244","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.244","00:00:09.245","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.245","00:00:09.247","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.247","00:00:09.248","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.248","00:00:09.250","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.250","00:00:09.251","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.251","00:00:09.252","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.252","00:00:09.253","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.253","00:00:09.255","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.255","00:00:09.256","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.256","00:00:09.257","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.257","00:00:09.259","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.259","00:00:09.260","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.260","00:00:09.262","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.262","00:00:09.263","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.263","00:00:09.264","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.264","00:00:09.266","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.266","00:00:09.268","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.270","00:00:09.272","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.272","00:00:09.273","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.273","00:00:09.274","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.274","00:00:09.275","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.275","00:00:09.277","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.277","00:00:09.278","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.278","00:00:09.279","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.279","00:00:09.281","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.281","00:00:09.282","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.282","00:00:09.284","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.286","00:00:09.288","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.288","00:00:09.289","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.289","00:00:09.290","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.290","00:00:09.292","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.292","00:00:09.293","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.293","00:00:09.294","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.294","00:00:09.296","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.296","00:00:09.297","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.297","00:00:09.298","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.298","00:00:09.300","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.300","00:00:09.302","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.307","00:00:09.308","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.308","00:00:09.309","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.309","00:00:09.310","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.310","00:00:09.312","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.312","00:00:09.313","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.313","00:00:09.315","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.315","00:00:09.316","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.316","00:00:09.317","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.317","00:00:09.319","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.319","00:00:09.320","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.320","00:00:09.321","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.321","00:00:09.323","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.323","00:00:09.324","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.324","00:00:09.326","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.329","00:00:09.330","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.330","00:00:09.331","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.331","00:00:09.333","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.333","00:00:09.334","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.334","00:00:09.336","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.336","00:00:09.337","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.337","00:00:09.338","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.338","00:00:09.340","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.340","00:00:09.341","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.341","00:00:09.342","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.342","00:00:09.344","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.344","00:00:09.345","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.345","00:00:09.347","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.347","00:00:09.348","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.348","00:00:09.349","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.349","00:00:09.351","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.351","00:00:09.352","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.352","00:00:09.353","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.353","00:00:09.355","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.355","00:00:09.356","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.356","00:00:09.358","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.358","00:00:09.359","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.359","00:00:09.360","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.360","00:00:09.362","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.362","00:00:09.363","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.363","00:00:09.365","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.365","00:00:09.366","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.366","00:00:09.367","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.367","00:00:09.369","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.369","00:00:09.370","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.370","00:00:09.371","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.371","00:00:09.373","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.373","00:00:09.375","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.375","00:00:09.375","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.375","00:00:09.377","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.377","00:00:09.378","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.378","00:00:09.379","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.379","00:00:09.381","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.381","00:00:09.382","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.382","00:00:09.383","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.383","00:00:09.385","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.385","00:00:09.386","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.386","00:00:09.388","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.388","00:00:09.389","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.389","00:00:09.391","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.391","00:00:09.392","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.392","00:00:09.393","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.397","00:00:09.398","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.398","00:00:09.399","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.399","00:00:09.400","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.400","00:00:09.401","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.401","00:00:09.402","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.402","00:00:09.403","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.403","00:00:09.405","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.405","00:00:09.406","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.406","00:00:09.407","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.407","00:00:09.408","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.408","00:00:09.410","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.410","00:00:09.411","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.411","00:00:09.412","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.412","00:00:09.413","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.413","00:00:09.415","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.415","00:00:09.416","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.416","00:00:09.417","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.417","00:00:09.418","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.418","00:00:09.419","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.419","00:00:09.421","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.421","00:00:09.422","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.422","00:00:09.423","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.423","00:00:09.424","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.424","00:00:09.426","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.426","00:00:09.427","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.427","00:00:09.428","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.428","00:00:09.429","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.429","00:00:09.431","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.431","00:00:09.432","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.432","00:00:09.433","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.433","00:00:09.434","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.434","00:00:09.435","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.435","00:00:09.437","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.437","00:00:09.438","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.438","00:00:09.439","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.439","00:00:09.440","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.440","00:00:09.442","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.442","00:00:09.443","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.443","00:00:09.444","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.444","00:00:09.445","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.445","00:00:09.447","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.447","00:00:09.448","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.448","00:00:09.449","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.449","00:00:09.450","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.450","00:00:09.452","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.452","00:00:09.453","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.453","00:00:09.455","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.455","00:00:09.456","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.456","00:00:09.458","0.002s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.458","00:00:09.459","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.459","00:00:09.460","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.460","00:00:09.461","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.461","00:00:09.462","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.462","00:00:09.464","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.464","00:00:09.465","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.465","00:00:09.466","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.466","00:00:09.467","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.467","00:00:09.469","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.469","00:00:09.470","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.470","00:00:09.471","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.471","00:00:09.473","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.473","00:00:09.474","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.474","00:00:09.475","0.001s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.475","00:00:09.477","0.002s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.907","00:00:08.908","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.908","00:00:08.909","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.909","00:00:08.910","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.910","00:00:08.912","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.912","00:00:08.913","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.913","00:00:08.914","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.914","00:00:08.915","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.915","00:00:08.917","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.917","00:00:08.918","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.918","00:00:08.919","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.919","00:00:08.921","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.921","00:00:08.922","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.922","00:00:08.923","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.923","00:00:08.924","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.924","00:00:08.926","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.926","00:00:08.927","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.927","00:00:08.928","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.928","00:00:08.929","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.929","00:00:08.931","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.931","00:00:08.932","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.932","00:00:08.933","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.933","00:00:08.935","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.935","00:00:08.936","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.936","00:00:08.937","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.937","00:00:08.938","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.938","00:00:08.940","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.940","00:00:08.941","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.941","00:00:08.942","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.942","00:00:08.944","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.944","00:00:08.945","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.945","00:00:08.946","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.946","00:00:08.947","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.947","00:00:08.949","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.949","00:00:08.950","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.950","00:00:08.951","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.951","00:00:08.952","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.952","00:00:08.954","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.954","00:00:08.955","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.955","00:00:08.956","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.956","00:00:08.958","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.958","00:00:08.959","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.959","00:00:08.960","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.960","00:00:08.962","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.962","00:00:08.963","0.002s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.963","00:00:08.965","0.002s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.965","00:00:08.965","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.965","00:00:08.967","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.967","00:00:08.968","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.968","00:00:08.969","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.969","00:00:08.970","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.970","00:00:08.972","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.972","00:00:08.973","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.973","00:00:08.974","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.974","00:00:08.976","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.976","00:00:08.977","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.977","00:00:08.978","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.978","00:00:08.980","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.980","00:00:08.981","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.981","00:00:08.982","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.982","00:00:08.983","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.983","00:00:08.985","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.985","00:00:08.986","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.986","00:00:08.987","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.987","00:00:08.988","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.988","00:00:08.990","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.990","00:00:08.991","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.991","00:00:08.992","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.992","00:00:08.994","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.994","00:00:08.995","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.995","00:00:08.996","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.996","00:00:08.997","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.997","00:00:08.999","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:08.999","00:00:09.000","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.000","00:00:09.001","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.001","00:00:09.003","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.003","00:00:09.004","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.004","00:00:09.005","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.005","00:00:09.006","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.006","00:00:09.008","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.008","00:00:09.009","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.009","00:00:09.010","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.010","00:00:09.012","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.012","00:00:09.013","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.013","00:00:09.014","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.014","00:00:09.015","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.015","00:00:09.017","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.017","00:00:09.018","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.018","00:00:09.019","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.019","00:00:09.020","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.020","00:00:09.022","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.022","00:00:09.023","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.023","00:00:09.024","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.024","00:00:09.026","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.026","00:00:09.027","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.027","00:00:09.028","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.028","00:00:09.030","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.030","00:00:09.031","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.031","00:00:09.032","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.032","00:00:09.033","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.033","00:00:09.035","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.035","00:00:09.036","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.036","00:00:09.037","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.037","00:00:09.038","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.038","00:00:09.040","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.040","00:00:09.041","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.041","00:00:09.043","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.043","00:00:09.044","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.044","00:00:09.045","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.045","00:00:09.047","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.047","00:00:09.048","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.048","00:00:09.049","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.049","00:00:09.050","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.050","00:00:09.052","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.052","00:00:09.053","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.053","00:00:09.054","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.054","00:00:09.056","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.056","00:00:09.057","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.057","00:00:09.058","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.058","00:00:09.060","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.060","00:00:09.061","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.061","00:00:09.062","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.062","00:00:09.064","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.064","00:00:09.065","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.065","00:00:09.066","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.066","00:00:09.068","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.068","00:00:09.069","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.069","00:00:09.070","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.070","00:00:09.072","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.072","00:00:09.073","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.073","00:00:09.074","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.074","00:00:09.076","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.076","00:00:09.077","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.077","00:00:09.078","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.078","00:00:09.080","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.080","00:00:09.081","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.081","00:00:09.083","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.083","00:00:09.084","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.084","00:00:09.085","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.085","00:00:09.086","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.086","00:00:09.088","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.088","00:00:09.089","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.089","00:00:09.090","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.090","00:00:09.092","0.001s"],[343,145,"00:00:08.651","00:00:09.093","00:00:09.092","00:00:09.093","0.002s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.100","00:00:09.101","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.101","00:00:09.102","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.102","00:00:09.104","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.104","00:00:09.105","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.105","00:00:09.106","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.106","00:00:09.108","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.108","00:00:09.109","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.109","00:00:09.110","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.110","00:00:09.112","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.112","00:00:09.113","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.113","00:00:09.114","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.114","00:00:09.116","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.116","00:00:09.117","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.117","00:00:09.118","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.118","00:00:09.120","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.120","00:00:09.121","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.121","00:00:09.122","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.122","00:00:09.124","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.124","00:00:09.125","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.125","00:00:09.126","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.126","00:00:09.128","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.128","00:00:09.129","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.129","00:00:09.130","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.130","00:00:09.132","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.132","00:00:09.133","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.133","00:00:09.134","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.134","00:00:09.136","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.136","00:00:09.137","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.137","00:00:09.138","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.138","00:00:09.140","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.140","00:00:09.141","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.141","00:00:09.142","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.142","00:00:09.144","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.144","00:00:09.145","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.145","00:00:09.146","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.146","00:00:09.148","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.148","00:00:09.149","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.149","00:00:09.150","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.150","00:00:09.152","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.152","00:00:09.153","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.153","00:00:09.154","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.154","00:00:09.156","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.156","00:00:09.157","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.157","00:00:09.158","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.158","00:00:09.160","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.160","00:00:09.161","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.161","00:00:09.162","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.162","00:00:09.164","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.164","00:00:09.165","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.165","00:00:09.166","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.166","00:00:09.168","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.168","00:00:09.169","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.169","00:00:09.170","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.170","00:00:09.172","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.172","00:00:09.173","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.173","00:00:09.174","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.174","00:00:09.176","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.176","00:00:09.177","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.177","00:00:09.178","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.178","00:00:09.180","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.180","00:00:09.181","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.181","00:00:09.182","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.182","00:00:09.184","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.184","00:00:09.185","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.185","00:00:09.186","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.186","00:00:09.188","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.188","00:00:09.189","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.189","00:00:09.190","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.190","00:00:09.192","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.192","00:00:09.193","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.193","00:00:09.194","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.194","00:00:09.196","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.196","00:00:09.197","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.197","00:00:09.198","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.198","00:00:09.200","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.200","00:00:09.201","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.201","00:00:09.202","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.202","00:00:09.204","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.204","00:00:09.205","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.205","00:00:09.206","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.206","00:00:09.208","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.208","00:00:09.209","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.209","00:00:09.210","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.210","00:00:09.212","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.212","00:00:09.213","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.213","00:00:09.215","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.215","00:00:09.216","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.216","00:00:09.217","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.217","00:00:09.219","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.219","00:00:09.220","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.220","00:00:09.221","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.221","00:00:09.223","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.223","00:00:09.224","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.224","00:00:09.226","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.226","00:00:09.227","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.227","00:00:09.228","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.228","00:00:09.230","0.002s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.230","00:00:09.231","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.231","00:00:09.233","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.233","00:00:09.234","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.234","00:00:09.235","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.235","00:00:09.237","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.237","00:00:09.238","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.238","00:00:09.239","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.239","00:00:09.241","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.241","00:00:09.242","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.242","00:00:09.244","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.244","00:00:09.245","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.245","00:00:09.246","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.246","00:00:09.248","0.002s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.248","00:00:09.250","0.002s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.250","00:00:09.251","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.251","00:00:09.252","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.252","00:00:09.253","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.253","00:00:09.255","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.255","00:00:09.256","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.256","00:00:09.257","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.257","00:00:09.259","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.259","00:00:09.260","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.260","00:00:09.261","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.261","00:00:09.263","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.263","00:00:09.264","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.264","00:00:09.266","0.001s"],[13,125,"00:00:08.741","00:00:09.267","00:00:09.266","00:00:09.267","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.272","00:00:09.273","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.273","00:00:09.274","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.274","00:00:09.275","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.275","00:00:09.277","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.277","00:00:09.278","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.278","00:00:09.279","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.279","00:00:09.281","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.281","00:00:09.282","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.282","00:00:09.284","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.286","00:00:09.288","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.288","00:00:09.289","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.289","00:00:09.290","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.290","00:00:09.292","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.292","00:00:09.293","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.293","00:00:09.294","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.294","00:00:09.296","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.296","00:00:09.297","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.297","00:00:09.298","0.001s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.298","00:00:09.300","0.002s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.300","00:00:09.302","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.288","00:00:09.289","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.289","00:00:09.290","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.290","00:00:09.292","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.292","00:00:09.293","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.293","00:00:09.294","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.294","00:00:09.296","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.296","00:00:09.297","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.297","00:00:09.298","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.298","00:00:09.300","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.300","00:00:09.301","0.002s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.307","00:00:09.308","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.308","00:00:09.309","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.309","00:00:09.310","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.310","00:00:09.312","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.312","00:00:09.313","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.313","00:00:09.314","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.314","00:00:09.316","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.316","00:00:09.317","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.317","00:00:09.319","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.319","00:00:09.320","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.320","00:00:09.321","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.321","00:00:09.323","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.323","00:00:09.324","0.001s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.324","00:00:09.326","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.308","00:00:09.309","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.309","00:00:09.310","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.310","00:00:09.312","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.312","00:00:09.313","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.313","00:00:09.314","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.314","00:00:09.316","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.316","00:00:09.317","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.317","00:00:09.319","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.319","00:00:09.320","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.320","00:00:09.321","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.321","00:00:09.323","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.323","00:00:09.324","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.324","00:00:09.326","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.329","00:00:09.330","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.330","00:00:09.331","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.331","00:00:09.333","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.333","00:00:09.334","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.334","00:00:09.336","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.336","00:00:09.337","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.337","00:00:09.338","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.338","00:00:09.340","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.340","00:00:09.341","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.341","00:00:09.342","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.342","00:00:09.344","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.344","00:00:09.345","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.345","00:00:09.347","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.347","00:00:09.348","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.348","00:00:09.349","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.349","00:00:09.351","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.351","00:00:09.352","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.352","00:00:09.353","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.353","00:00:09.355","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.355","00:00:09.356","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.356","00:00:09.358","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.358","00:00:09.359","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.359","00:00:09.360","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.360","00:00:09.362","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.362","00:00:09.363","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.363","00:00:09.365","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.365","00:00:09.366","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.366","00:00:09.367","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.367","00:00:09.369","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.369","00:00:09.370","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.370","00:00:09.371","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.371","00:00:09.373","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.373","00:00:09.375","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.375","00:00:09.375","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.375","00:00:09.377","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.377","00:00:09.378","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.378","00:00:09.379","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.379","00:00:09.381","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.381","00:00:09.382","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.382","00:00:09.383","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.383","00:00:09.385","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.385","00:00:09.386","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.386","00:00:09.388","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.388","00:00:09.389","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.389","00:00:09.391","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.391","00:00:09.392","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.392","00:00:09.393","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.397","00:00:09.398","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.398","00:00:09.398","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.398","00:00:09.400","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.400","00:00:09.401","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.401","00:00:09.402","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.402","00:00:09.403","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.403","00:00:09.405","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.405","00:00:09.406","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.406","00:00:09.407","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.407","00:00:09.408","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.408","00:00:09.409","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.409","00:00:09.411","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.411","00:00:09.412","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.412","00:00:09.413","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.413","00:00:09.415","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.415","00:00:09.416","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.416","00:00:09.417","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.417","00:00:09.418","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.418","00:00:09.419","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.419","00:00:09.421","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.421","00:00:09.422","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.422","00:00:09.423","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.423","00:00:09.424","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.424","00:00:09.426","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.426","00:00:09.427","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.427","00:00:09.428","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.428","00:00:09.429","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.429","00:00:09.431","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.431","00:00:09.432","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.432","00:00:09.433","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.433","00:00:09.434","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.434","00:00:09.435","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.435","00:00:09.437","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.437","00:00:09.438","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.438","00:00:09.439","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.439","00:00:09.440","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.440","00:00:09.441","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.441","00:00:09.443","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.443","00:00:09.444","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.444","00:00:09.445","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.445","00:00:09.446","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.446","00:00:09.448","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.448","00:00:09.449","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.449","00:00:09.450","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.450","00:00:09.452","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.452","00:00:09.453","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.453","00:00:09.455","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.455","00:00:09.456","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.456","00:00:09.458","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.458","00:00:09.459","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.459","00:00:09.460","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.460","00:00:09.461","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.461","00:00:09.462","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.462","00:00:09.463","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.463","00:00:09.465","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.465","00:00:09.466","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.466","00:00:09.467","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.467","00:00:09.469","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.469","00:00:09.470","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.470","00:00:09.471","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.471","00:00:09.472","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.472","00:00:09.474","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.474","00:00:09.475","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.475","00:00:09.477","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.479","00:00:09.481","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.481","00:00:09.482","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.482","00:00:09.483","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.483","00:00:09.485","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.485","00:00:09.486","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.486","00:00:09.487","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.487","00:00:09.489","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.489","00:00:09.491","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.491","00:00:09.492","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.492","00:00:09.493","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.493","00:00:09.494","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.494","00:00:09.495","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.495","00:00:09.496","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.496","00:00:09.497","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.497","00:00:09.499","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.499","00:00:09.500","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.500","00:00:09.501","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.501","00:00:09.502","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.502","00:00:09.503","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.503","00:00:09.504","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.504","00:00:09.506","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.506","00:00:09.507","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.507","00:00:09.508","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.508","00:00:09.509","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.509","00:00:09.510","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.510","00:00:09.512","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.512","00:00:09.513","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.513","00:00:09.514","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.514","00:00:09.515","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.515","00:00:09.516","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.516","00:00:09.517","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.517","00:00:09.519","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.519","00:00:09.520","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.520","00:00:09.521","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.521","00:00:09.522","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.522","00:00:09.524","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.524","00:00:09.525","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.525","00:00:09.526","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.526","00:00:09.527","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.527","00:00:09.528","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.528","00:00:09.529","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.529","00:00:09.530","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.530","00:00:09.532","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.532","00:00:09.533","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.533","00:00:09.534","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.534","00:00:09.535","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.535","00:00:09.536","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.536","00:00:09.537","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.537","00:00:09.539","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.539","00:00:09.540","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.540","00:00:09.541","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.541","00:00:09.542","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.542","00:00:09.543","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.543","00:00:09.545","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.545","00:00:09.546","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.546","00:00:09.547","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.547","00:00:09.548","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.548","00:00:09.549","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.549","00:00:09.551","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.551","00:00:09.552","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.552","00:00:09.553","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.553","00:00:09.554","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.554","00:00:09.555","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.555","00:00:09.556","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.556","00:00:09.558","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.558","00:00:09.559","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.559","00:00:09.560","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.560","00:00:09.561","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.561","00:00:09.562","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.562","00:00:09.564","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.564","00:00:09.565","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.565","00:00:09.566","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.566","00:00:09.567","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.567","00:00:09.569","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.569","00:00:09.570","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.570","00:00:09.571","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.571","00:00:09.572","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.572","00:00:09.573","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.573","00:00:09.575","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.575","00:00:09.576","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.576","00:00:09.577","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.577","00:00:09.579","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.579","00:00:09.580","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.580","00:00:09.582","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.582","00:00:09.583","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.583","00:00:09.585","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.585","00:00:09.586","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.586","00:00:09.587","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.587","00:00:09.588","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.588","00:00:09.589","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.589","00:00:09.590","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.590","00:00:09.592","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.592","00:00:09.593","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.593","00:00:09.594","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.594","00:00:09.595","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.595","00:00:09.597","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.597","00:00:09.598","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.598","00:00:09.599","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.599","00:00:09.600","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.600","00:00:09.602","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.602","00:00:09.603","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.603","00:00:09.605","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.605","00:00:09.606","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.606","00:00:09.608","0.002s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.608","00:00:09.609","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.609","00:00:09.610","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.610","00:00:09.611","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.611","00:00:09.612","0.001s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.612","00:00:09.614","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.330","00:00:09.331","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.331","00:00:09.333","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.333","00:00:09.334","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.334","00:00:09.336","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.336","00:00:09.337","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.337","00:00:09.338","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.338","00:00:09.340","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.340","00:00:09.341","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.341","00:00:09.342","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.342","00:00:09.344","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.344","00:00:09.345","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.345","00:00:09.347","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.347","00:00:09.348","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.348","00:00:09.349","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.349","00:00:09.351","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.351","00:00:09.352","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.352","00:00:09.354","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.354","00:00:09.355","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.355","00:00:09.356","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.356","00:00:09.358","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.358","00:00:09.359","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.359","00:00:09.360","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.360","00:00:09.362","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.362","00:00:09.363","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.363","00:00:09.365","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.365","00:00:09.366","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.366","00:00:09.367","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.367","00:00:09.369","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.369","00:00:09.370","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.370","00:00:09.371","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.371","00:00:09.373","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.373","00:00:09.375","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.375","00:00:09.376","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.376","00:00:09.377","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.377","00:00:09.378","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.378","00:00:09.379","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.379","00:00:09.381","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.381","00:00:09.382","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.382","00:00:09.383","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.383","00:00:09.385","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.385","00:00:09.386","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.386","00:00:09.388","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.388","00:00:09.389","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.389","00:00:09.391","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.391","00:00:09.392","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.392","00:00:09.393","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.397","00:00:09.398","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.398","00:00:09.399","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.399","00:00:09.400","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.400","00:00:09.401","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.401","00:00:09.402","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.402","00:00:09.403","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.403","00:00:09.405","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.405","00:00:09.406","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.406","00:00:09.407","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.407","00:00:09.408","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.408","00:00:09.410","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.410","00:00:09.411","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.411","00:00:09.412","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.412","00:00:09.413","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.413","00:00:09.415","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.415","00:00:09.416","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.416","00:00:09.417","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.417","00:00:09.418","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.418","00:00:09.420","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.420","00:00:09.421","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.421","00:00:09.422","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.422","00:00:09.423","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.423","00:00:09.424","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.424","00:00:09.426","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.426","00:00:09.427","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.427","00:00:09.428","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.428","00:00:09.429","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.429","00:00:09.431","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.431","00:00:09.432","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.432","00:00:09.433","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.433","00:00:09.434","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.434","00:00:09.435","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.435","00:00:09.437","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.437","00:00:09.438","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.438","00:00:09.439","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.439","00:00:09.440","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.440","00:00:09.442","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.442","00:00:09.443","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.443","00:00:09.444","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.444","00:00:09.445","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.445","00:00:09.447","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.447","00:00:09.448","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.448","00:00:09.449","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.449","00:00:09.450","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.450","00:00:09.452","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.452","00:00:09.453","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.453","00:00:09.455","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.455","00:00:09.456","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.456","00:00:09.458","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.458","00:00:09.459","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.459","00:00:09.460","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.460","00:00:09.461","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.461","00:00:09.462","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.462","00:00:09.464","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.464","00:00:09.465","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.465","00:00:09.466","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.466","00:00:09.467","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.467","00:00:09.469","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.469","00:00:09.470","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.470","00:00:09.471","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.471","00:00:09.473","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.473","00:00:09.474","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.474","00:00:09.475","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.475","00:00:09.477","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.479","00:00:09.481","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.481","00:00:09.482","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.482","00:00:09.483","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.483","00:00:09.485","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.485","00:00:09.486","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.486","00:00:09.488","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.488","00:00:09.489","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.489","00:00:09.491","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.491","00:00:09.492","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.492","00:00:09.493","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.493","00:00:09.494","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.494","00:00:09.495","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.495","00:00:09.496","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.496","00:00:09.497","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.497","00:00:09.499","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.499","00:00:09.500","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.500","00:00:09.501","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.501","00:00:09.502","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.502","00:00:09.503","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.503","00:00:09.505","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.505","00:00:09.506","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.506","00:00:09.507","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.507","00:00:09.508","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.508","00:00:09.509","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.509","00:00:09.510","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.510","00:00:09.512","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.512","00:00:09.513","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.513","00:00:09.514","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.514","00:00:09.515","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.515","00:00:09.516","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.516","00:00:09.517","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.517","00:00:09.519","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.519","00:00:09.520","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.520","00:00:09.521","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.521","00:00:09.522","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.522","00:00:09.524","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.524","00:00:09.525","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.525","00:00:09.526","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.526","00:00:09.527","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.527","00:00:09.528","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.528","00:00:09.529","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.529","00:00:09.530","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.530","00:00:09.532","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.532","00:00:09.533","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.533","00:00:09.534","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.534","00:00:09.535","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.535","00:00:09.536","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.536","00:00:09.537","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.537","00:00:09.539","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.539","00:00:09.540","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.540","00:00:09.541","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.541","00:00:09.542","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.542","00:00:09.543","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.543","00:00:09.545","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.545","00:00:09.546","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.546","00:00:09.547","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.547","00:00:09.548","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.548","00:00:09.549","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.549","00:00:09.551","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.551","00:00:09.552","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.552","00:00:09.553","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.553","00:00:09.554","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.554","00:00:09.555","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.555","00:00:09.556","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.556","00:00:09.558","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.558","00:00:09.559","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.559","00:00:09.560","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.560","00:00:09.561","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.561","00:00:09.562","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.562","00:00:09.564","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.564","00:00:09.565","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.565","00:00:09.566","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.566","00:00:09.567","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.567","00:00:09.569","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.569","00:00:09.570","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.570","00:00:09.571","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.571","00:00:09.572","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.572","00:00:09.573","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.573","00:00:09.575","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.575","00:00:09.576","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.576","00:00:09.577","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.577","00:00:09.579","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.579","00:00:09.580","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.580","00:00:09.582","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.582","00:00:09.583","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.583","00:00:09.585","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.585","00:00:09.586","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.586","00:00:09.587","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.587","00:00:09.588","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.588","00:00:09.589","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.589","00:00:09.590","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.590","00:00:09.592","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.592","00:00:09.593","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.593","00:00:09.594","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.594","00:00:09.595","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.595","00:00:09.597","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.597","00:00:09.598","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.598","00:00:09.599","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.599","00:00:09.600","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.600","00:00:09.602","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.602","00:00:09.603","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.603","00:00:09.605","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.605","00:00:09.606","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.606","00:00:09.608","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.608","00:00:09.609","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.609","00:00:09.610","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.610","00:00:09.611","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.611","00:00:09.612","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.612","00:00:09.614","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.614","00:00:09.616","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.616","00:00:09.617","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.617","00:00:09.617","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.617","00:00:09.619","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.619","00:00:09.620","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.620","00:00:09.621","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.621","00:00:09.623","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.623","00:00:09.624","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.624","00:00:09.625","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.625","00:00:09.626","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.626","00:00:09.628","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.628","00:00:09.629","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.629","00:00:09.630","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.630","00:00:09.631","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.631","00:00:09.632","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.632","00:00:09.634","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.634","00:00:09.635","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.635","00:00:09.636","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.636","00:00:09.637","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.637","00:00:09.638","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.638","00:00:09.640","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.640","00:00:09.641","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.641","00:00:09.642","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.642","00:00:09.643","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.643","00:00:09.644","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.644","00:00:09.646","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.646","00:00:09.647","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.647","00:00:09.648","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.648","00:00:09.649","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.649","00:00:09.651","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.651","00:00:09.652","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.652","00:00:09.653","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.653","00:00:09.654","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.654","00:00:09.655","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.655","00:00:09.657","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.657","00:00:09.658","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.658","00:00:09.659","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.659","00:00:09.660","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.660","00:00:09.662","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.662","00:00:09.663","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.663","00:00:09.664","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.664","00:00:09.665","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.665","00:00:09.667","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.667","00:00:09.668","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.668","00:00:09.669","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.669","00:00:09.670","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.670","00:00:09.671","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.671","00:00:09.673","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.673","00:00:09.674","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.674","00:00:09.675","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.675","00:00:09.676","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.676","00:00:09.678","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.678","00:00:09.679","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.679","00:00:09.680","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.680","00:00:09.681","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.681","00:00:09.682","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.682","00:00:09.684","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.684","00:00:09.685","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.685","00:00:09.686","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.686","00:00:09.688","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.688","00:00:09.689","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.689","00:00:09.690","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.690","00:00:09.691","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.691","00:00:09.692","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.692","00:00:09.693","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.693","00:00:09.695","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.695","00:00:09.696","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.696","00:00:09.697","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.697","00:00:09.698","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.698","00:00:09.700","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.700","00:00:09.701","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.701","00:00:09.702","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.702","00:00:09.703","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.703","00:00:09.705","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.705","00:00:09.706","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.706","00:00:09.707","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.707","00:00:09.708","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.708","00:00:09.710","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.710","00:00:09.711","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.711","00:00:09.712","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.712","00:00:09.713","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.713","00:00:09.714","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.714","00:00:09.716","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.716","00:00:09.717","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.717","00:00:09.718","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.718","00:00:09.720","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.720","00:00:09.721","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.721","00:00:09.723","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.723","00:00:09.724","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.730","00:00:09.732","0.002s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.732","00:00:09.733","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.733","00:00:09.735","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.735","00:00:09.736","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.736","00:00:09.737","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.737","00:00:09.738","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.738","00:00:09.739","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.739","00:00:09.741","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.741","00:00:09.742","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.742","00:00:09.743","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.743","00:00:09.745","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.745","00:00:09.746","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.746","00:00:09.747","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.747","00:00:09.748","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.748","00:00:09.750","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.750","00:00:09.751","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.751","00:00:09.752","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.752","00:00:09.753","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.753","00:00:09.755","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.755","00:00:09.756","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.756","00:00:09.758","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.758","00:00:09.759","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.759","00:00:09.760","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.760","00:00:09.761","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.761","00:00:09.762","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.762","00:00:09.764","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.764","00:00:09.765","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.765","00:00:09.766","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.766","00:00:09.768","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.768","00:00:09.769","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.769","00:00:09.770","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.770","00:00:09.771","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.771","00:00:09.773","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.773","00:00:09.774","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.774","00:00:09.775","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.775","00:00:09.776","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.776","00:00:09.778","0.001s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.778","00:00:09.780","0.002s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.377","00:00:09.378","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.378","00:00:09.379","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.379","00:00:09.380","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.380","00:00:09.382","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.382","00:00:09.383","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.383","00:00:09.385","0.002s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.385","00:00:09.386","0.002s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.386","00:00:09.388","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.388","00:00:09.389","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.389","00:00:09.391","0.002s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.391","00:00:09.392","0.001s"],[225,13,"00:00:09.248","00:00:09.393","00:00:09.392","00:00:09.393","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.398","00:00:09.398","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.398","00:00:09.400","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.400","00:00:09.401","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.401","00:00:09.402","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.402","00:00:09.403","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.403","00:00:09.405","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.405","00:00:09.406","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.406","00:00:09.407","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.407","00:00:09.408","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.408","00:00:09.410","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.410","00:00:09.411","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.411","00:00:09.412","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.412","00:00:09.413","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.413","00:00:09.415","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.415","00:00:09.416","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.416","00:00:09.417","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.417","00:00:09.418","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.418","00:00:09.419","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.419","00:00:09.421","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.421","00:00:09.422","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.422","00:00:09.423","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.423","00:00:09.424","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.424","00:00:09.426","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.426","00:00:09.427","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.427","00:00:09.428","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.428","00:00:09.429","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.429","00:00:09.431","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.431","00:00:09.432","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.432","00:00:09.433","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.433","00:00:09.434","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.434","00:00:09.435","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.435","00:00:09.437","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.437","00:00:09.438","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.438","00:00:09.439","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.439","00:00:09.440","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.440","00:00:09.441","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.441","00:00:09.443","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.443","00:00:09.444","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.444","00:00:09.445","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.445","00:00:09.447","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.447","00:00:09.448","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.448","00:00:09.449","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.449","00:00:09.450","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.450","00:00:09.452","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.452","00:00:09.453","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.453","00:00:09.455","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.455","00:00:09.456","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.456","00:00:09.458","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.458","00:00:09.459","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.459","00:00:09.460","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.460","00:00:09.461","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.461","00:00:09.462","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.462","00:00:09.464","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.464","00:00:09.465","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.465","00:00:09.466","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.466","00:00:09.467","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.467","00:00:09.469","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.469","00:00:09.470","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.470","00:00:09.471","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.471","00:00:09.473","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.473","00:00:09.474","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.474","00:00:09.475","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.475","00:00:09.477","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.479","00:00:09.481","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.481","00:00:09.482","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.482","00:00:09.483","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.483","00:00:09.484","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.484","00:00:09.486","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.486","00:00:09.487","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.487","00:00:09.489","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.489","00:00:09.491","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.491","00:00:09.492","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.492","00:00:09.493","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.493","00:00:09.494","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.494","00:00:09.495","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.495","00:00:09.496","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.496","00:00:09.497","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.497","00:00:09.499","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.499","00:00:09.500","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.500","00:00:09.501","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.501","00:00:09.502","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.502","00:00:09.503","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.503","00:00:09.504","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.504","00:00:09.506","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.506","00:00:09.507","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.507","00:00:09.508","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.508","00:00:09.509","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.509","00:00:09.510","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.510","00:00:09.512","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.512","00:00:09.513","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.513","00:00:09.514","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.514","00:00:09.515","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.515","00:00:09.516","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.516","00:00:09.517","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.517","00:00:09.519","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.519","00:00:09.520","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.520","00:00:09.521","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.521","00:00:09.522","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.522","00:00:09.524","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.524","00:00:09.525","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.525","00:00:09.526","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.526","00:00:09.527","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.527","00:00:09.528","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.528","00:00:09.529","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.529","00:00:09.530","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.530","00:00:09.532","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.532","00:00:09.533","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.533","00:00:09.534","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.534","00:00:09.535","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.535","00:00:09.536","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.536","00:00:09.537","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.537","00:00:09.539","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.539","00:00:09.540","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.540","00:00:09.541","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.541","00:00:09.542","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.542","00:00:09.543","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.543","00:00:09.545","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.545","00:00:09.546","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.546","00:00:09.547","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.547","00:00:09.548","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.548","00:00:09.549","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.549","00:00:09.551","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.551","00:00:09.552","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.552","00:00:09.553","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.553","00:00:09.554","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.554","00:00:09.555","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.555","00:00:09.556","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.556","00:00:09.558","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.558","00:00:09.559","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.559","00:00:09.560","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.560","00:00:09.561","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.561","00:00:09.562","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.562","00:00:09.564","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.564","00:00:09.565","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.565","00:00:09.566","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.566","00:00:09.567","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.567","00:00:09.569","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.569","00:00:09.570","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.570","00:00:09.571","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.571","00:00:09.572","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.572","00:00:09.573","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.573","00:00:09.575","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.575","00:00:09.576","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.576","00:00:09.577","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.577","00:00:09.579","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.579","00:00:09.580","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.580","00:00:09.582","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.582","00:00:09.583","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.583","00:00:09.585","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.585","00:00:09.586","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.586","00:00:09.587","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.587","00:00:09.588","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.588","00:00:09.589","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.589","00:00:09.590","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.590","00:00:09.592","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.592","00:00:09.593","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.593","00:00:09.594","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.594","00:00:09.595","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.595","00:00:09.597","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.597","00:00:09.598","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.598","00:00:09.599","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.599","00:00:09.600","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.600","00:00:09.601","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.601","00:00:09.603","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.603","00:00:09.605","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.605","00:00:09.606","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.606","00:00:09.608","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.608","00:00:09.609","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.609","00:00:09.610","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.610","00:00:09.611","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.611","00:00:09.612","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.612","00:00:09.614","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.614","00:00:09.616","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.616","00:00:09.617","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.617","00:00:09.617","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.617","00:00:09.619","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.619","00:00:09.620","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.620","00:00:09.621","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.621","00:00:09.623","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.623","00:00:09.624","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.624","00:00:09.625","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.625","00:00:09.626","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.626","00:00:09.628","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.628","00:00:09.629","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.629","00:00:09.630","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.630","00:00:09.631","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.631","00:00:09.632","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.632","00:00:09.634","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.634","00:00:09.635","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.635","00:00:09.636","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.636","00:00:09.637","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.637","00:00:09.638","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.638","00:00:09.640","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.640","00:00:09.641","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.641","00:00:09.642","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.642","00:00:09.643","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.643","00:00:09.644","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.644","00:00:09.646","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.646","00:00:09.647","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.647","00:00:09.648","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.648","00:00:09.649","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.649","00:00:09.651","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.651","00:00:09.652","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.652","00:00:09.653","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.653","00:00:09.654","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.654","00:00:09.655","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.655","00:00:09.657","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.657","00:00:09.658","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.658","00:00:09.659","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.659","00:00:09.660","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.660","00:00:09.662","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.662","00:00:09.663","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.663","00:00:09.664","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.664","00:00:09.665","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.665","00:00:09.666","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.666","00:00:09.668","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.668","00:00:09.669","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.669","00:00:09.670","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.670","00:00:09.671","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.671","00:00:09.673","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.673","00:00:09.674","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.674","00:00:09.675","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.675","00:00:09.676","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.676","00:00:09.678","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.678","00:00:09.679","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.679","00:00:09.680","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.680","00:00:09.681","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.681","00:00:09.682","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.682","00:00:09.684","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.684","00:00:09.685","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.685","00:00:09.686","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.686","00:00:09.688","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.688","00:00:09.689","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.689","00:00:09.690","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.690","00:00:09.691","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.691","00:00:09.692","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.692","00:00:09.693","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.693","00:00:09.695","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.695","00:00:09.696","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.696","00:00:09.697","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.697","00:00:09.698","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.698","00:00:09.700","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.700","00:00:09.701","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.701","00:00:09.702","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.702","00:00:09.703","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.703","00:00:09.705","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.705","00:00:09.706","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.706","00:00:09.707","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.707","00:00:09.708","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.708","00:00:09.710","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.710","00:00:09.711","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.711","00:00:09.712","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.712","00:00:09.713","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.713","00:00:09.714","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.714","00:00:09.716","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.716","00:00:09.717","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.717","00:00:09.718","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.718","00:00:09.720","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.720","00:00:09.721","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.721","00:00:09.722","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.722","00:00:09.724","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.730","00:00:09.732","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.732","00:00:09.733","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.733","00:00:09.735","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.735","00:00:09.736","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.736","00:00:09.737","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.737","00:00:09.738","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.738","00:00:09.739","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.739","00:00:09.741","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.741","00:00:09.742","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.742","00:00:09.743","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.743","00:00:09.744","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.744","00:00:09.746","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.746","00:00:09.747","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.747","00:00:09.748","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.748","00:00:09.750","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.750","00:00:09.751","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.751","00:00:09.752","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.752","00:00:09.753","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.753","00:00:09.755","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.755","00:00:09.756","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.756","00:00:09.758","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.758","00:00:09.759","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.759","00:00:09.760","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.760","00:00:09.761","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.761","00:00:09.762","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.762","00:00:09.764","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.764","00:00:09.765","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.765","00:00:09.766","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.766","00:00:09.768","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.768","00:00:09.769","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.769","00:00:09.770","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.770","00:00:09.771","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.771","00:00:09.773","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.773","00:00:09.774","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.774","00:00:09.775","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.775","00:00:09.776","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.776","00:00:09.778","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.778","00:00:09.780","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.786","00:00:09.787","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.787","00:00:09.788","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.788","00:00:09.789","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.789","00:00:09.790","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.790","00:00:09.791","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.791","00:00:09.793","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.793","00:00:09.794","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.794","00:00:09.795","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.795","00:00:09.797","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.797","00:00:09.798","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.798","00:00:09.799","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.799","00:00:09.801","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.801","00:00:09.802","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.802","00:00:09.803","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.803","00:00:09.804","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.804","00:00:09.805","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.805","00:00:09.807","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.807","00:00:09.808","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.808","00:00:09.809","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.809","00:00:09.811","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.811","00:00:09.812","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.812","00:00:09.813","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.813","00:00:09.815","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.815","00:00:09.816","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.816","00:00:09.817","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.817","00:00:09.819","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.819","00:00:09.820","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.820","00:00:09.821","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.821","00:00:09.822","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.822","00:00:09.823","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.823","00:00:09.825","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.825","00:00:09.826","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.826","00:00:09.827","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.827","00:00:09.828","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.828","00:00:09.830","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.830","00:00:09.831","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.831","00:00:09.832","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.832","00:00:09.833","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.833","00:00:09.835","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.835","00:00:09.836","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.836","00:00:09.837","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.837","00:00:09.839","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.839","00:00:09.840","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.840","00:00:09.841","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.841","00:00:09.843","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.843","00:00:09.844","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.844","00:00:09.845","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.845","00:00:09.846","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.846","00:00:09.848","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.848","00:00:09.849","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.849","00:00:09.850","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.850","00:00:09.851","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.851","00:00:09.853","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.853","00:00:09.854","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.854","00:00:09.855","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.855","00:00:09.857","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.857","00:00:09.858","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.858","00:00:09.860","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.860","00:00:09.861","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.861","00:00:09.862","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.862","00:00:09.863","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.863","00:00:09.864","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.864","00:00:09.866","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.866","00:00:09.867","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.867","00:00:09.868","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.868","00:00:09.869","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.869","00:00:09.871","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.871","00:00:09.872","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.872","00:00:09.873","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.873","00:00:09.874","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.874","00:00:09.876","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.876","00:00:09.877","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.877","00:00:09.878","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.878","00:00:09.880","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.880","00:00:09.881","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.881","00:00:09.882","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.882","00:00:09.884","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.884","00:00:09.885","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.885","00:00:09.886","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.886","00:00:09.887","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.887","00:00:09.888","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.888","00:00:09.890","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.890","00:00:09.891","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.891","00:00:09.892","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.892","00:00:09.894","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.894","00:00:09.895","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.895","00:00:09.896","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.896","00:00:09.897","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.897","00:00:09.899","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.899","00:00:09.900","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.900","00:00:09.901","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.901","00:00:09.903","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.903","00:00:09.904","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.904","00:00:09.905","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.905","00:00:09.907","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.907","00:00:09.908","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.908","00:00:09.910","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.910","00:00:09.911","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.913","00:00:09.914","0.000s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.914","00:00:09.915","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.915","00:00:09.916","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.916","00:00:09.917","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.917","00:00:09.919","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.921","00:00:09.922","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.922","00:00:09.923","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.923","00:00:09.924","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.924","00:00:09.925","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.925","00:00:09.927","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.927","00:00:09.928","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.928","00:00:09.929","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.929","00:00:09.931","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.931","00:00:09.932","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.932","00:00:09.933","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.933","00:00:09.935","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.935","00:00:09.936","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.936","00:00:09.937","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.937","00:00:09.939","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.939","00:00:09.940","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.940","00:00:09.941","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.941","00:00:09.943","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.943","00:00:09.944","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.944","00:00:09.945","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.945","00:00:09.947","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.947","00:00:09.948","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.948","00:00:09.949","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.949","00:00:09.951","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.951","00:00:09.952","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.952","00:00:09.953","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.953","00:00:09.955","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.955","00:00:09.956","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.956","00:00:09.957","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.957","00:00:09.958","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.958","00:00:09.960","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.960","00:00:09.961","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.961","00:00:09.963","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.963","00:00:09.964","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.964","00:00:09.965","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.965","00:00:09.966","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.966","00:00:09.968","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.968","00:00:09.969","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.969","00:00:09.970","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.970","00:00:09.972","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.972","00:00:09.973","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.973","00:00:09.974","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.974","00:00:09.976","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.976","00:00:09.977","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.977","00:00:09.978","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.978","00:00:09.980","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.980","00:00:09.981","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.981","00:00:09.982","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.982","00:00:09.984","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.984","00:00:09.985","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.985","00:00:09.986","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.986","00:00:09.988","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.988","00:00:09.989","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.989","00:00:09.990","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.990","00:00:09.992","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.992","00:00:09.993","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.993","00:00:09.994","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.994","00:00:09.996","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.996","00:00:09.997","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.997","00:00:09.998","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.998","00:00:10.000","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.000","00:00:10.001","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.001","00:00:10.002","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.002","00:00:10.004","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.004","00:00:10.005","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.005","00:00:10.007","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.012","00:00:10.014","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.014","00:00:10.014","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.014","00:00:10.016","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.016","00:00:10.017","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.017","00:00:10.018","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.018","00:00:10.020","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.020","00:00:10.021","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.021","00:00:10.022","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.022","00:00:10.024","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.024","00:00:10.025","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.025","00:00:10.026","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.026","00:00:10.028","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.028","00:00:10.029","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.029","00:00:10.030","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.030","00:00:10.032","0.001s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.032","00:00:10.033","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.481","00:00:09.482","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.482","00:00:09.483","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.483","00:00:09.484","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.484","00:00:09.486","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.486","00:00:09.487","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.487","00:00:09.489","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.489","00:00:09.491","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.491","00:00:09.492","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.492","00:00:09.493","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.493","00:00:09.494","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.494","00:00:09.495","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.495","00:00:09.496","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.496","00:00:09.497","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.497","00:00:09.498","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.498","00:00:09.500","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.500","00:00:09.501","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.501","00:00:09.502","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.502","00:00:09.503","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.503","00:00:09.504","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.504","00:00:09.506","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.506","00:00:09.507","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.507","00:00:09.508","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.508","00:00:09.509","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.509","00:00:09.510","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.510","00:00:09.511","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.511","00:00:09.513","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.513","00:00:09.514","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.514","00:00:09.515","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.515","00:00:09.516","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.516","00:00:09.517","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.517","00:00:09.519","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.519","00:00:09.520","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.520","00:00:09.521","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.521","00:00:09.522","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.522","00:00:09.523","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.523","00:00:09.525","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.525","00:00:09.526","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.526","00:00:09.527","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.527","00:00:09.528","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.528","00:00:09.529","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.529","00:00:09.530","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.530","00:00:09.532","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.532","00:00:09.533","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.533","00:00:09.534","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.534","00:00:09.535","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.535","00:00:09.536","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.536","00:00:09.537","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.537","00:00:09.539","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.539","00:00:09.540","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.540","00:00:09.541","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.541","00:00:09.542","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.542","00:00:09.543","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.543","00:00:09.545","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.545","00:00:09.546","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.546","00:00:09.547","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.547","00:00:09.548","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.548","00:00:09.549","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.549","00:00:09.550","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.550","00:00:09.552","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.552","00:00:09.553","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.553","00:00:09.554","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.554","00:00:09.555","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.555","00:00:09.556","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.556","00:00:09.557","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.557","00:00:09.559","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.559","00:00:09.560","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.560","00:00:09.561","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.561","00:00:09.562","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.562","00:00:09.564","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.564","00:00:09.565","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.565","00:00:09.566","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.566","00:00:09.567","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.567","00:00:09.569","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.569","00:00:09.570","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.570","00:00:09.571","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.571","00:00:09.572","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.572","00:00:09.573","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.573","00:00:09.574","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.574","00:00:09.576","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.576","00:00:09.577","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.577","00:00:09.579","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.579","00:00:09.580","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.580","00:00:09.581","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.581","00:00:09.583","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.583","00:00:09.585","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.585","00:00:09.586","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.586","00:00:09.587","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.587","00:00:09.588","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.588","00:00:09.589","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.589","00:00:09.590","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.590","00:00:09.591","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.591","00:00:09.593","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.593","00:00:09.594","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.594","00:00:09.595","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.595","00:00:09.597","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.597","00:00:09.598","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.598","00:00:09.599","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.599","00:00:09.600","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.600","00:00:09.601","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.601","00:00:09.603","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.603","00:00:09.605","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.605","00:00:09.606","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.606","00:00:09.608","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.608","00:00:09.609","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.609","00:00:09.610","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.610","00:00:09.611","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.611","00:00:09.612","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.612","00:00:09.614","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.614","00:00:09.616","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.616","00:00:09.617","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.617","00:00:09.617","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.617","00:00:09.619","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.619","00:00:09.620","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.620","00:00:09.621","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.621","00:00:09.623","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.623","00:00:09.624","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.624","00:00:09.625","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.625","00:00:09.626","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.626","00:00:09.628","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.628","00:00:09.629","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.629","00:00:09.630","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.630","00:00:09.631","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.631","00:00:09.632","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.632","00:00:09.634","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.634","00:00:09.635","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.635","00:00:09.636","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.636","00:00:09.637","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.637","00:00:09.638","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.638","00:00:09.640","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.640","00:00:09.641","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.641","00:00:09.642","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.642","00:00:09.643","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.643","00:00:09.644","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.644","00:00:09.646","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.646","00:00:09.647","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.647","00:00:09.648","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.648","00:00:09.649","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.649","00:00:09.651","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.651","00:00:09.652","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.652","00:00:09.653","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.653","00:00:09.654","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.654","00:00:09.656","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.656","00:00:09.657","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.657","00:00:09.658","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.658","00:00:09.659","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.659","00:00:09.660","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.660","00:00:09.662","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.662","00:00:09.663","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.663","00:00:09.664","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.664","00:00:09.665","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.665","00:00:09.667","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.667","00:00:09.668","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.668","00:00:09.669","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.669","00:00:09.670","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.670","00:00:09.671","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.671","00:00:09.673","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.673","00:00:09.674","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.674","00:00:09.675","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.675","00:00:09.676","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.676","00:00:09.678","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.678","00:00:09.679","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.679","00:00:09.680","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.680","00:00:09.681","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.681","00:00:09.682","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.682","00:00:09.684","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.684","00:00:09.685","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.685","00:00:09.686","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.686","00:00:09.688","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.688","00:00:09.689","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.689","00:00:09.690","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.690","00:00:09.691","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.691","00:00:09.692","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.692","00:00:09.694","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.694","00:00:09.695","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.695","00:00:09.696","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.696","00:00:09.697","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.697","00:00:09.698","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.698","00:00:09.700","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.700","00:00:09.701","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.701","00:00:09.702","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.702","00:00:09.703","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.703","00:00:09.705","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.705","00:00:09.706","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.706","00:00:09.707","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.707","00:00:09.708","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.708","00:00:09.710","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.710","00:00:09.711","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.711","00:00:09.712","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.712","00:00:09.713","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.713","00:00:09.714","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.714","00:00:09.716","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.716","00:00:09.717","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.717","00:00:09.718","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.718","00:00:09.720","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.720","00:00:09.721","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.721","00:00:09.723","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.723","00:00:09.724","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.730","00:00:09.732","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.732","00:00:09.733","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.733","00:00:09.735","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.735","00:00:09.736","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.736","00:00:09.737","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.737","00:00:09.738","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.738","00:00:09.739","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.739","00:00:09.741","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.741","00:00:09.742","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.742","00:00:09.743","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.743","00:00:09.745","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.745","00:00:09.746","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.746","00:00:09.747","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.747","00:00:09.748","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.748","00:00:09.750","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.750","00:00:09.751","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.751","00:00:09.752","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.752","00:00:09.753","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.753","00:00:09.755","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.755","00:00:09.756","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.756","00:00:09.758","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.758","00:00:09.759","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.759","00:00:09.760","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.760","00:00:09.761","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.761","00:00:09.762","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.762","00:00:09.764","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.764","00:00:09.765","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.765","00:00:09.766","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.766","00:00:09.768","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.768","00:00:09.769","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.769","00:00:09.770","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.770","00:00:09.771","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.771","00:00:09.773","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.773","00:00:09.774","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.774","00:00:09.775","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.775","00:00:09.776","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.776","00:00:09.778","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.778","00:00:09.780","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.786","00:00:09.787","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.787","00:00:09.788","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.788","00:00:09.789","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.789","00:00:09.790","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.790","00:00:09.791","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.791","00:00:09.793","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.793","00:00:09.794","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.794","00:00:09.795","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.795","00:00:09.797","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.797","00:00:09.798","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.798","00:00:09.799","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.799","00:00:09.801","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.801","00:00:09.802","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.802","00:00:09.803","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.803","00:00:09.804","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.804","00:00:09.805","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.805","00:00:09.807","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.807","00:00:09.808","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.808","00:00:09.809","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.809","00:00:09.811","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.811","00:00:09.812","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.812","00:00:09.813","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.813","00:00:09.815","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.815","00:00:09.816","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.816","00:00:09.817","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.817","00:00:09.819","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.819","00:00:09.820","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.820","00:00:09.821","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.821","00:00:09.822","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.822","00:00:09.823","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.823","00:00:09.825","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.825","00:00:09.826","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.826","00:00:09.827","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.827","00:00:09.828","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.828","00:00:09.830","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.830","00:00:09.831","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.831","00:00:09.832","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.832","00:00:09.833","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.833","00:00:09.835","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.835","00:00:09.836","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.836","00:00:09.837","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.837","00:00:09.839","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.839","00:00:09.840","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.840","00:00:09.841","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.841","00:00:09.843","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.843","00:00:09.844","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.844","00:00:09.845","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.845","00:00:09.846","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.846","00:00:09.848","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.848","00:00:09.849","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.849","00:00:09.850","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.850","00:00:09.851","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.851","00:00:09.853","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.853","00:00:09.854","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.854","00:00:09.855","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.855","00:00:09.857","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.857","00:00:09.858","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.858","00:00:09.860","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.860","00:00:09.861","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.861","00:00:09.862","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.862","00:00:09.863","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.863","00:00:09.864","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.864","00:00:09.866","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.866","00:00:09.867","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.867","00:00:09.868","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.868","00:00:09.869","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.869","00:00:09.871","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.871","00:00:09.872","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.872","00:00:09.873","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.873","00:00:09.874","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.874","00:00:09.876","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.876","00:00:09.877","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.877","00:00:09.878","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.878","00:00:09.880","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.880","00:00:09.881","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.881","00:00:09.882","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.882","00:00:09.884","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.884","00:00:09.885","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.885","00:00:09.886","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.886","00:00:09.887","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.887","00:00:09.888","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.888","00:00:09.890","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.890","00:00:09.891","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.891","00:00:09.892","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.892","00:00:09.894","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.894","00:00:09.895","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.895","00:00:09.896","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.896","00:00:09.898","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.898","00:00:09.899","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.899","00:00:09.900","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.900","00:00:09.901","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.901","00:00:09.903","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.903","00:00:09.904","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.904","00:00:09.905","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.905","00:00:09.907","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.907","00:00:09.908","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.908","00:00:09.910","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.910","00:00:09.911","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.913","00:00:09.914","0.000s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.914","00:00:09.915","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.915","00:00:09.916","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.916","00:00:09.917","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.917","00:00:09.919","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.921","00:00:09.922","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.922","00:00:09.923","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.923","00:00:09.924","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.924","00:00:09.925","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.925","00:00:09.927","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.927","00:00:09.928","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.928","00:00:09.929","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.929","00:00:09.931","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.931","00:00:09.932","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.932","00:00:09.933","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.933","00:00:09.935","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.935","00:00:09.936","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.936","00:00:09.937","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.937","00:00:09.939","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.939","00:00:09.940","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.940","00:00:09.941","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.941","00:00:09.943","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.943","00:00:09.944","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.944","00:00:09.945","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.945","00:00:09.947","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.947","00:00:09.948","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.948","00:00:09.949","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.949","00:00:09.951","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.951","00:00:09.952","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.952","00:00:09.953","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.953","00:00:09.955","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.955","00:00:09.956","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.956","00:00:09.957","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.957","00:00:09.958","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.958","00:00:09.960","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.960","00:00:09.961","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.961","00:00:09.963","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.963","00:00:09.964","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.964","00:00:09.965","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.965","00:00:09.966","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.966","00:00:09.968","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.968","00:00:09.969","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.969","00:00:09.970","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.970","00:00:09.972","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.972","00:00:09.973","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.973","00:00:09.975","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.975","00:00:09.976","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.976","00:00:09.977","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.977","00:00:09.978","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.978","00:00:09.980","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.980","00:00:09.981","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.981","00:00:09.982","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.982","00:00:09.984","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.984","00:00:09.985","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.985","00:00:09.986","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.986","00:00:09.988","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.988","00:00:09.989","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.989","00:00:09.991","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.991","00:00:09.992","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.992","00:00:09.993","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.993","00:00:09.994","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.994","00:00:09.996","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.996","00:00:09.997","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.997","00:00:09.998","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.998","00:00:10.000","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:10.000","00:00:10.001","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:10.001","00:00:10.002","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:10.002","00:00:10.004","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:10.004","00:00:10.005","0.001s"],[35,408,"00:00:09.410","00:00:10.007","00:00:10.005","00:00:10.007","0.002s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.617","00:00:09.617","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.617","00:00:09.618","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.618","00:00:09.620","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.620","00:00:09.621","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.621","00:00:09.623","0.002s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.623","00:00:09.623","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.623","00:00:09.625","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.625","00:00:09.626","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.626","00:00:09.627","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.627","00:00:09.629","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.629","00:00:09.630","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.630","00:00:09.631","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.631","00:00:09.632","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.632","00:00:09.633","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.633","00:00:09.635","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.635","00:00:09.636","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.636","00:00:09.637","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.637","00:00:09.638","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.638","00:00:09.639","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.639","00:00:09.641","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.641","00:00:09.642","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.642","00:00:09.643","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.643","00:00:09.644","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.644","00:00:09.646","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.646","00:00:09.647","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.647","00:00:09.648","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.648","00:00:09.649","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.649","00:00:09.650","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.650","00:00:09.652","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.652","00:00:09.653","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.653","00:00:09.654","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.654","00:00:09.655","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.655","00:00:09.657","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.657","00:00:09.658","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.658","00:00:09.659","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.659","00:00:09.660","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.660","00:00:09.661","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.661","00:00:09.663","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.663","00:00:09.664","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.664","00:00:09.665","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.665","00:00:09.666","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.666","00:00:09.668","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.668","00:00:09.669","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.669","00:00:09.670","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.670","00:00:09.671","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.671","00:00:09.672","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.672","00:00:09.674","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.674","00:00:09.675","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.675","00:00:09.676","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.676","00:00:09.677","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.677","00:00:09.679","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.679","00:00:09.680","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.680","00:00:09.681","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.681","00:00:09.682","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.682","00:00:09.684","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.684","00:00:09.685","0.001s"],[133,58,"00:00:09.424","00:00:09.686","00:00:09.685","00:00:09.686","0.002s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.689","00:00:09.690","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.690","00:00:09.691","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.691","00:00:09.692","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.692","00:00:09.693","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.693","00:00:09.695","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.695","00:00:09.696","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.696","00:00:09.697","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.697","00:00:09.698","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.698","00:00:09.700","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.700","00:00:09.701","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.701","00:00:09.702","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.702","00:00:09.703","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.703","00:00:09.705","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.705","00:00:09.706","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.706","00:00:09.707","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.707","00:00:09.708","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.708","00:00:09.709","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.709","00:00:09.711","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.711","00:00:09.712","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.712","00:00:09.713","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.713","00:00:09.714","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.714","00:00:09.716","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.716","00:00:09.717","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.717","00:00:09.718","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.718","00:00:09.719","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.719","00:00:09.721","0.002s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.721","00:00:09.722","0.001s"],[98,29,"00:00:09.562","00:00:09.724","00:00:09.722","00:00:09.724","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.732","00:00:09.733","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.733","00:00:09.735","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.735","00:00:09.736","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.736","00:00:09.737","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.737","00:00:09.738","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.738","00:00:09.739","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.739","00:00:09.741","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.741","00:00:09.742","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.742","00:00:09.743","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.743","00:00:09.744","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.744","00:00:09.746","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.746","00:00:09.747","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.747","00:00:09.748","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.748","00:00:09.749","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.749","00:00:09.751","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.751","00:00:09.752","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.752","00:00:09.753","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.753","00:00:09.755","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.755","00:00:09.756","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.756","00:00:09.757","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.757","00:00:09.759","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.759","00:00:09.760","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.760","00:00:09.761","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.761","00:00:09.762","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.762","00:00:09.763","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.763","00:00:09.765","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.765","00:00:09.766","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.766","00:00:09.767","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.767","00:00:09.769","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.769","00:00:09.770","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.770","00:00:09.771","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.771","00:00:09.772","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.772","00:00:09.774","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.774","00:00:09.775","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.775","00:00:09.776","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.776","00:00:09.778","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.778","00:00:09.779","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.786","00:00:09.787","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.787","00:00:09.787","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.787","00:00:09.789","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.789","00:00:09.790","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.790","00:00:09.791","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.791","00:00:09.793","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.793","00:00:09.794","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.794","00:00:09.795","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.795","00:00:09.797","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.797","00:00:09.798","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.798","00:00:09.799","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.799","00:00:09.801","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.801","00:00:09.802","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.802","00:00:09.803","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.803","00:00:09.804","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.804","00:00:09.805","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.805","00:00:09.807","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.807","00:00:09.808","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.808","00:00:09.809","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.809","00:00:09.810","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.810","00:00:09.812","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.812","00:00:09.813","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.813","00:00:09.815","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.815","00:00:09.816","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.816","00:00:09.817","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.817","00:00:09.819","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.819","00:00:09.820","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.820","00:00:09.821","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.821","00:00:09.822","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.822","00:00:09.823","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.823","00:00:09.825","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.825","00:00:09.826","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.826","00:00:09.827","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.827","00:00:09.828","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.828","00:00:09.830","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.830","00:00:09.831","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.831","00:00:09.832","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.832","00:00:09.833","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.833","00:00:09.835","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.835","00:00:09.836","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.836","00:00:09.837","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.837","00:00:09.839","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.839","00:00:09.840","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.840","00:00:09.841","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.841","00:00:09.843","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.843","00:00:09.844","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.844","00:00:09.845","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.845","00:00:09.846","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.846","00:00:09.848","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.848","00:00:09.849","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.849","00:00:09.850","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.850","00:00:09.851","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.851","00:00:09.853","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.853","00:00:09.854","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.854","00:00:09.855","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.855","00:00:09.857","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.857","00:00:09.858","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.858","00:00:09.859","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.859","00:00:09.861","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.861","00:00:09.862","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.862","00:00:09.863","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.863","00:00:09.864","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.864","00:00:09.866","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.866","00:00:09.867","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.867","00:00:09.868","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.868","00:00:09.869","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.869","00:00:09.870","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.870","00:00:09.872","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.872","00:00:09.873","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.873","00:00:09.874","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.874","00:00:09.876","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.876","00:00:09.877","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.877","00:00:09.878","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.878","00:00:09.880","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.880","00:00:09.881","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.881","00:00:09.882","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.882","00:00:09.884","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.884","00:00:09.885","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.885","00:00:09.886","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.886","00:00:09.887","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.887","00:00:09.888","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.888","00:00:09.890","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.890","00:00:09.891","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.891","00:00:09.892","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.892","00:00:09.894","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.894","00:00:09.895","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.895","00:00:09.896","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.896","00:00:09.897","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.897","00:00:09.899","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.899","00:00:09.900","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.900","00:00:09.901","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.901","00:00:09.903","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.903","00:00:09.904","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.904","00:00:09.905","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.905","00:00:09.907","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.907","00:00:09.908","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.908","00:00:09.909","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.909","00:00:09.911","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.913","00:00:09.914","0.000s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.914","00:00:09.915","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.915","00:00:09.916","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.916","00:00:09.917","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.917","00:00:09.919","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.921","00:00:09.922","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.922","00:00:09.923","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.923","00:00:09.924","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.924","00:00:09.925","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.925","00:00:09.927","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.927","00:00:09.928","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.928","00:00:09.929","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.929","00:00:09.931","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.931","00:00:09.932","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.932","00:00:09.933","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.933","00:00:09.935","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.935","00:00:09.936","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.936","00:00:09.937","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.937","00:00:09.939","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.939","00:00:09.940","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.940","00:00:09.941","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.941","00:00:09.943","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.943","00:00:09.944","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.944","00:00:09.945","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.945","00:00:09.947","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.947","00:00:09.948","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.948","00:00:09.949","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.949","00:00:09.951","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.951","00:00:09.952","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.952","00:00:09.953","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.953","00:00:09.954","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.954","00:00:09.956","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.956","00:00:09.957","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.957","00:00:09.958","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.958","00:00:09.960","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.960","00:00:09.961","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.961","00:00:09.963","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.963","00:00:09.964","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.964","00:00:09.965","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.965","00:00:09.966","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.966","00:00:09.968","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.968","00:00:09.969","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.969","00:00:09.970","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.970","00:00:09.972","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.972","00:00:09.973","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.973","00:00:09.974","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.974","00:00:09.976","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.976","00:00:09.977","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.977","00:00:09.978","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.978","00:00:09.980","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.980","00:00:09.981","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.981","00:00:09.982","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.982","00:00:09.984","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.984","00:00:09.985","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.985","00:00:09.986","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.986","00:00:09.988","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.988","00:00:09.989","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.989","00:00:09.990","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.990","00:00:09.992","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.992","00:00:09.993","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.993","00:00:09.994","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.994","00:00:09.996","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.996","00:00:09.997","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.997","00:00:09.998","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.998","00:00:10.000","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.000","00:00:10.001","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.001","00:00:10.002","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.002","00:00:10.004","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.004","00:00:10.005","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.005","00:00:10.007","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.012","00:00:10.014","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.014","00:00:10.014","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.014","00:00:10.016","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.016","00:00:10.017","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.017","00:00:10.018","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.018","00:00:10.020","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.020","00:00:10.021","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.021","00:00:10.022","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.022","00:00:10.024","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.024","00:00:10.025","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.025","00:00:10.026","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.026","00:00:10.028","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.028","00:00:10.029","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.029","00:00:10.030","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.030","00:00:10.032","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.032","00:00:10.033","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.039","00:00:10.040","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.040","00:00:10.041","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.041","00:00:10.042","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.042","00:00:10.043","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.043","00:00:10.044","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.044","00:00:10.046","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.046","00:00:10.047","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.047","00:00:10.048","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.048","00:00:10.049","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.049","00:00:10.050","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.050","00:00:10.051","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.051","00:00:10.053","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.053","00:00:10.054","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.054","00:00:10.055","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.055","00:00:10.056","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.056","00:00:10.058","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.063","00:00:10.064","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.064","00:00:10.065","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.065","00:00:10.066","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.066","00:00:10.067","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.067","00:00:10.068","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.068","00:00:10.070","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.070","00:00:10.071","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.071","00:00:10.072","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.072","00:00:10.073","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.073","00:00:10.074","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.074","00:00:10.075","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.075","00:00:10.077","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.077","00:00:10.078","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.078","00:00:10.079","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.079","00:00:10.080","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.080","00:00:10.081","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.081","00:00:10.083","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.083","00:00:10.084","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.084","00:00:10.085","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.085","00:00:10.086","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.086","00:00:10.088","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.088","00:00:10.089","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.089","00:00:10.090","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.090","00:00:10.091","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.091","00:00:10.092","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.092","00:00:10.093","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.093","00:00:10.094","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.094","00:00:10.095","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.095","00:00:10.097","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.097","00:00:10.098","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.098","00:00:10.099","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.099","00:00:10.100","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.100","00:00:10.101","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.101","00:00:10.102","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.102","00:00:10.104","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.104","00:00:10.105","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.105","00:00:10.106","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.106","00:00:10.107","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.107","00:00:10.108","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.108","00:00:10.109","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.109","00:00:10.111","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.111","00:00:10.112","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.112","00:00:10.113","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.113","00:00:10.114","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.114","00:00:10.115","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.115","00:00:10.117","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.117","00:00:10.118","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.118","00:00:10.119","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.119","00:00:10.120","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.120","00:00:10.121","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.121","00:00:10.122","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.122","00:00:10.124","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.124","00:00:10.125","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.125","00:00:10.126","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.126","00:00:10.127","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.127","00:00:10.128","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.128","00:00:10.129","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.129","00:00:10.131","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.131","00:00:10.132","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.132","00:00:10.133","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.133","00:00:10.134","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.134","00:00:10.135","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.135","00:00:10.136","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.136","00:00:10.138","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.138","00:00:10.139","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.139","00:00:10.140","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.140","00:00:10.141","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.141","00:00:10.142","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.142","00:00:10.144","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.144","00:00:10.145","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.145","00:00:10.146","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.146","00:00:10.147","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.147","00:00:10.148","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.148","00:00:10.149","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.149","00:00:10.151","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.151","00:00:10.152","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.152","00:00:10.153","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.153","00:00:10.154","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.154","00:00:10.155","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.155","00:00:10.156","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.156","00:00:10.158","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.158","00:00:10.159","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.159","00:00:10.160","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.160","00:00:10.161","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.161","00:00:10.162","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.162","00:00:10.163","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.163","00:00:10.164","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.164","00:00:10.166","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.166","00:00:10.167","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.167","00:00:10.168","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.168","00:00:10.169","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.169","00:00:10.170","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.170","00:00:10.171","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.171","00:00:10.173","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.173","00:00:10.174","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.174","00:00:10.175","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.175","00:00:10.176","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.176","00:00:10.177","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.177","00:00:10.179","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.179","00:00:10.180","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.180","00:00:10.181","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.181","00:00:10.182","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.182","00:00:10.183","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.183","00:00:10.184","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.184","00:00:10.186","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.186","00:00:10.187","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.187","00:00:10.188","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.188","00:00:10.189","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.189","00:00:10.190","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.190","00:00:10.191","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.191","00:00:10.193","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.193","00:00:10.194","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.194","00:00:10.195","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.195","00:00:10.196","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.196","00:00:10.197","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.197","00:00:10.199","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.199","00:00:10.200","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.200","00:00:10.201","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.201","00:00:10.202","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.202","00:00:10.203","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.203","00:00:10.205","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.205","00:00:10.206","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.206","00:00:10.207","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.207","00:00:10.208","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.208","00:00:10.210","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.210","00:00:10.211","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.211","00:00:10.212","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.212","00:00:10.213","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.213","00:00:10.214","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.214","00:00:10.216","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.216","00:00:10.217","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.217","00:00:10.218","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.218","00:00:10.219","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.219","00:00:10.221","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.221","00:00:10.222","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.222","00:00:10.223","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.223","00:00:10.224","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.224","00:00:10.226","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.226","00:00:10.227","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.227","00:00:10.228","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.228","00:00:10.229","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.229","00:00:10.230","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.230","00:00:10.232","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.232","00:00:10.233","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.233","00:00:10.234","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.234","00:00:10.235","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.235","00:00:10.236","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.236","00:00:10.238","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.238","00:00:10.239","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.239","00:00:10.240","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.240","00:00:10.241","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.241","00:00:10.243","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.243","00:00:10.244","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.244","00:00:10.245","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.245","00:00:10.246","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.246","00:00:10.247","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.247","00:00:10.249","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.249","00:00:10.250","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.250","00:00:10.251","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.251","00:00:10.252","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.252","00:00:10.254","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.254","00:00:10.255","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.255","00:00:10.256","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.256","00:00:10.257","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.257","00:00:10.258","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.258","00:00:10.260","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.260","00:00:10.261","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.261","00:00:10.262","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.262","00:00:10.263","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.263","00:00:10.265","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.265","00:00:10.266","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.266","00:00:10.267","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.267","00:00:10.268","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.268","00:00:10.270","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.270","00:00:10.271","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.271","00:00:10.272","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.272","00:00:10.273","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.273","00:00:10.275","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.275","00:00:10.276","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.276","00:00:10.277","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.277","00:00:10.278","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.278","00:00:10.279","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.279","00:00:10.280","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.280","00:00:10.282","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.282","00:00:10.283","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.283","00:00:10.284","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.284","00:00:10.285","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.285","00:00:10.287","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.287","00:00:10.288","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.288","00:00:10.289","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.289","00:00:10.290","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.290","00:00:10.291","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.291","00:00:10.293","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.293","00:00:10.294","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.294","00:00:10.295","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.295","00:00:10.296","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.296","00:00:10.298","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.298","00:00:10.299","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.299","00:00:10.300","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.300","00:00:10.301","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.301","00:00:10.302","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.302","00:00:10.304","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.304","00:00:10.305","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.305","00:00:10.306","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.306","00:00:10.307","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.307","00:00:10.308","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.308","00:00:10.310","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.310","00:00:10.311","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.315","00:00:10.316","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.316","00:00:10.317","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.317","00:00:10.318","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.318","00:00:10.319","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.319","00:00:10.320","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.320","00:00:10.322","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.322","00:00:10.323","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.323","00:00:10.324","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.324","00:00:10.325","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.325","00:00:10.327","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.327","00:00:10.328","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.328","00:00:10.329","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.329","00:00:10.330","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.330","00:00:10.331","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.331","00:00:10.333","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.333","00:00:10.334","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.334","00:00:10.335","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.335","00:00:10.336","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.336","00:00:10.337","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.337","00:00:10.339","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.339","00:00:10.340","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.340","00:00:10.341","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.341","00:00:10.342","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.342","00:00:10.344","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.344","00:00:10.345","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.345","00:00:10.346","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.346","00:00:10.347","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.347","00:00:10.348","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.348","00:00:10.350","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.350","00:00:10.351","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.351","00:00:10.352","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.352","00:00:10.353","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.353","00:00:10.355","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.355","00:00:10.356","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.356","00:00:10.357","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.357","00:00:10.358","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.358","00:00:10.360","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.360","00:00:10.361","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.361","00:00:10.362","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.362","00:00:10.364","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.364","00:00:10.365","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.365","00:00:10.366","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.366","00:00:10.367","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.367","00:00:10.369","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.369","00:00:10.370","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.370","00:00:10.371","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.371","00:00:10.372","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.372","00:00:10.374","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.374","00:00:10.375","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.375","00:00:10.376","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.376","00:00:10.378","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.378","00:00:10.379","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.379","00:00:10.380","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.380","00:00:10.381","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.381","00:00:10.383","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.383","00:00:10.384","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.384","00:00:10.385","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.385","00:00:10.386","0.001s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.386","00:00:10.388","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.786","00:00:09.787","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.787","00:00:09.789","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.789","00:00:09.790","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.790","00:00:09.791","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.791","00:00:09.793","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.793","00:00:09.794","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.794","00:00:09.795","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.795","00:00:09.797","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.797","00:00:09.798","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.798","00:00:09.799","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.799","00:00:09.801","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.801","00:00:09.802","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.802","00:00:09.803","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.803","00:00:09.804","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.804","00:00:09.805","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.805","00:00:09.807","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.807","00:00:09.808","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.808","00:00:09.809","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.809","00:00:09.810","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.810","00:00:09.812","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.812","00:00:09.813","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.813","00:00:09.814","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.814","00:00:09.816","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.816","00:00:09.817","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.817","00:00:09.819","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.819","00:00:09.820","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.820","00:00:09.821","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.821","00:00:09.822","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.822","00:00:09.823","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.823","00:00:09.824","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.824","00:00:09.826","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.826","00:00:09.827","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.827","00:00:09.828","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.828","00:00:09.830","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.830","00:00:09.831","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.831","00:00:09.832","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.832","00:00:09.833","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.833","00:00:09.835","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.835","00:00:09.836","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.836","00:00:09.837","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.837","00:00:09.839","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.839","00:00:09.840","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.840","00:00:09.841","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.841","00:00:09.843","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.843","00:00:09.844","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.844","00:00:09.845","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.845","00:00:09.846","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.846","00:00:09.847","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.847","00:00:09.849","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.849","00:00:09.850","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.850","00:00:09.851","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.851","00:00:09.853","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.853","00:00:09.854","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.854","00:00:09.855","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.855","00:00:09.857","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.857","00:00:09.858","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.858","00:00:09.859","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.859","00:00:09.861","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.861","00:00:09.862","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.862","00:00:09.863","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.863","00:00:09.864","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.864","00:00:09.865","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.865","00:00:09.867","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.867","00:00:09.868","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.868","00:00:09.869","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.869","00:00:09.870","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.870","00:00:09.872","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.872","00:00:09.873","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.873","00:00:09.874","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.874","00:00:09.876","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.876","00:00:09.877","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.877","00:00:09.878","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.878","00:00:09.880","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.880","00:00:09.881","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.881","00:00:09.882","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.882","00:00:09.883","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.883","00:00:09.885","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.885","00:00:09.886","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.886","00:00:09.887","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.887","00:00:09.888","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.888","00:00:09.890","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.890","00:00:09.891","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.891","00:00:09.892","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.892","00:00:09.894","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.894","00:00:09.895","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.895","00:00:09.896","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.896","00:00:09.897","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.897","00:00:09.899","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.899","00:00:09.900","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.900","00:00:09.901","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.901","00:00:09.903","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.903","00:00:09.904","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.904","00:00:09.905","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.905","00:00:09.907","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.907","00:00:09.908","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.908","00:00:09.909","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.909","00:00:09.911","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.911","00:00:09.912","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.912","00:00:09.914","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.914","00:00:09.915","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.915","00:00:09.916","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.916","00:00:09.917","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.917","00:00:09.919","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.921","00:00:09.922","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.922","00:00:09.923","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.923","00:00:09.924","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.924","00:00:09.925","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.925","00:00:09.927","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.927","00:00:09.928","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.928","00:00:09.929","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.929","00:00:09.931","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.931","00:00:09.932","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.932","00:00:09.933","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.933","00:00:09.935","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.935","00:00:09.936","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.936","00:00:09.937","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.937","00:00:09.938","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.938","00:00:09.940","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.940","00:00:09.941","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.941","00:00:09.942","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.942","00:00:09.944","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.944","00:00:09.945","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.945","00:00:09.947","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.947","00:00:09.948","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.948","00:00:09.949","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.949","00:00:09.951","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.951","00:00:09.952","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.952","00:00:09.953","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.953","00:00:09.954","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.954","00:00:09.956","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.956","00:00:09.957","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.957","00:00:09.958","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.958","00:00:09.960","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.960","00:00:09.961","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.961","00:00:09.962","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.962","00:00:09.964","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.964","00:00:09.965","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.965","00:00:09.966","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.966","00:00:09.968","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.968","00:00:09.969","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.969","00:00:09.970","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.970","00:00:09.972","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.972","00:00:09.973","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.973","00:00:09.974","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.974","00:00:09.976","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.976","00:00:09.977","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.977","00:00:09.978","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.978","00:00:09.980","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.980","00:00:09.981","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.981","00:00:09.982","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.982","00:00:09.984","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.984","00:00:09.985","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.985","00:00:09.986","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.986","00:00:09.988","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.988","00:00:09.989","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.989","00:00:09.990","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.990","00:00:09.992","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.992","00:00:09.993","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.993","00:00:09.994","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.994","00:00:09.996","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.996","00:00:09.997","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.997","00:00:09.998","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.998","00:00:10.000","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.000","00:00:10.001","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.001","00:00:10.002","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.002","00:00:10.004","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.004","00:00:10.005","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.005","00:00:10.007","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.012","00:00:10.014","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.014","00:00:10.014","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.014","00:00:10.016","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.016","00:00:10.017","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.017","00:00:10.018","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.018","00:00:10.020","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.020","00:00:10.021","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.021","00:00:10.022","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.022","00:00:10.024","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.024","00:00:10.025","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.025","00:00:10.026","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.026","00:00:10.028","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.028","00:00:10.029","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.029","00:00:10.030","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.030","00:00:10.032","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.032","00:00:10.033","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.039","00:00:10.040","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.040","00:00:10.041","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.041","00:00:10.042","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.042","00:00:10.043","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.043","00:00:10.044","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.044","00:00:10.045","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.045","00:00:10.047","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.047","00:00:10.048","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.048","00:00:10.049","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.049","00:00:10.050","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.050","00:00:10.051","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.051","00:00:10.052","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.052","00:00:10.054","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.054","00:00:10.055","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.055","00:00:10.056","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.056","00:00:10.057","0.002s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.063","00:00:10.064","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.064","00:00:10.065","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.065","00:00:10.066","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.066","00:00:10.067","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.067","00:00:10.068","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.068","00:00:10.070","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.070","00:00:10.071","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.071","00:00:10.072","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.072","00:00:10.073","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.073","00:00:10.074","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.074","00:00:10.075","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.075","00:00:10.077","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.077","00:00:10.078","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.078","00:00:10.079","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.079","00:00:10.080","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.080","00:00:10.081","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.081","00:00:10.083","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.083","00:00:10.084","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.084","00:00:10.085","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.085","00:00:10.086","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.086","00:00:10.087","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.087","00:00:10.089","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.089","00:00:10.090","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.090","00:00:10.091","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.091","00:00:10.092","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.092","00:00:10.093","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.093","00:00:10.094","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.094","00:00:10.095","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.095","00:00:10.096","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.096","00:00:10.098","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.098","00:00:10.099","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.099","00:00:10.100","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.100","00:00:10.101","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.101","00:00:10.102","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.102","00:00:10.104","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.104","00:00:10.105","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.105","00:00:10.106","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.106","00:00:10.107","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.107","00:00:10.108","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.108","00:00:10.109","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.109","00:00:10.111","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.111","00:00:10.112","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.112","00:00:10.113","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.113","00:00:10.114","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.114","00:00:10.115","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.115","00:00:10.116","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.116","00:00:10.118","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.118","00:00:10.119","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.119","00:00:10.120","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.120","00:00:10.121","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.121","00:00:10.122","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.122","00:00:10.123","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.123","00:00:10.125","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.125","00:00:10.126","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.126","00:00:10.127","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.127","00:00:10.128","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.128","00:00:10.129","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.129","00:00:10.130","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.130","00:00:10.132","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.132","00:00:10.133","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.133","00:00:10.134","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.134","00:00:10.135","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.135","00:00:10.136","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.136","00:00:10.137","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.137","00:00:10.139","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.139","00:00:10.140","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.140","00:00:10.141","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.141","00:00:10.142","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.142","00:00:10.144","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.144","00:00:10.145","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.145","00:00:10.146","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.146","00:00:10.147","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.147","00:00:10.148","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.148","00:00:10.149","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.149","00:00:10.150","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.150","00:00:10.152","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.152","00:00:10.153","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.153","00:00:10.154","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.154","00:00:10.155","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.155","00:00:10.156","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.156","00:00:10.157","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.157","00:00:10.158","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.158","00:00:10.160","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.160","00:00:10.161","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.161","00:00:10.162","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.162","00:00:10.163","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.163","00:00:10.164","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.164","00:00:10.166","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.166","00:00:10.167","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.167","00:00:10.168","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.168","00:00:10.169","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.169","00:00:10.170","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.170","00:00:10.171","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.171","00:00:10.173","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.173","00:00:10.174","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.174","00:00:10.175","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.175","00:00:10.176","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.176","00:00:10.177","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.177","00:00:10.179","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.179","00:00:10.180","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.180","00:00:10.181","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.181","00:00:10.182","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.182","00:00:10.183","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.183","00:00:10.184","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.184","00:00:10.185","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.185","00:00:10.187","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.187","00:00:10.188","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.188","00:00:10.189","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.189","00:00:10.190","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.190","00:00:10.191","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.191","00:00:10.193","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.193","00:00:10.194","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.194","00:00:10.195","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.195","00:00:10.196","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.196","00:00:10.197","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.197","00:00:10.199","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.199","00:00:10.200","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.200","00:00:10.201","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.201","00:00:10.202","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.202","00:00:10.203","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.203","00:00:10.205","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.205","00:00:10.206","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.206","00:00:10.207","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.207","00:00:10.208","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.208","00:00:10.209","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.209","00:00:10.211","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.211","00:00:10.212","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.212","00:00:10.213","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.213","00:00:10.214","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.214","00:00:10.216","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.216","00:00:10.217","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.217","00:00:10.218","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.218","00:00:10.219","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.219","00:00:10.220","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.220","00:00:10.222","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.222","00:00:10.223","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.223","00:00:10.224","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.224","00:00:10.225","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.225","00:00:10.227","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.227","00:00:10.228","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.228","00:00:10.229","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.229","00:00:10.230","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.230","00:00:10.232","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.232","00:00:10.233","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.233","00:00:10.234","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.234","00:00:10.235","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.235","00:00:10.236","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.236","00:00:10.238","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.238","00:00:10.239","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.239","00:00:10.240","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.240","00:00:10.241","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.241","00:00:10.243","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.243","00:00:10.244","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.244","00:00:10.245","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.245","00:00:10.246","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.246","00:00:10.247","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.247","00:00:10.249","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.249","00:00:10.250","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.250","00:00:10.251","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.251","00:00:10.252","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.252","00:00:10.254","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.254","00:00:10.255","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.255","00:00:10.256","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.256","00:00:10.257","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.257","00:00:10.258","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.258","00:00:10.260","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.260","00:00:10.261","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.261","00:00:10.262","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.262","00:00:10.263","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.263","00:00:10.264","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.264","00:00:10.266","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.266","00:00:10.267","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.267","00:00:10.268","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.268","00:00:10.269","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.269","00:00:10.271","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.271","00:00:10.272","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.272","00:00:10.273","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.273","00:00:10.274","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.274","00:00:10.276","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.276","00:00:10.277","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.277","00:00:10.278","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.278","00:00:10.279","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.279","00:00:10.280","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.280","00:00:10.282","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.282","00:00:10.283","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.283","00:00:10.284","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.284","00:00:10.285","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.285","00:00:10.286","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.286","00:00:10.288","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.288","00:00:10.289","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.289","00:00:10.290","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.290","00:00:10.291","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.291","00:00:10.293","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.293","00:00:10.294","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.294","00:00:10.295","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.295","00:00:10.296","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.296","00:00:10.298","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.298","00:00:10.299","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.299","00:00:10.300","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.300","00:00:10.301","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.301","00:00:10.302","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.302","00:00:10.304","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.304","00:00:10.305","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.305","00:00:10.306","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.306","00:00:10.307","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.307","00:00:10.308","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.308","00:00:10.310","0.001s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.310","00:00:10.311","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.014","00:00:10.014","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.014","00:00:10.016","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.016","00:00:10.017","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.017","00:00:10.018","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.018","00:00:10.020","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.020","00:00:10.021","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.021","00:00:10.022","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.022","00:00:10.024","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.024","00:00:10.025","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.025","00:00:10.026","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.026","00:00:10.028","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.028","00:00:10.029","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.029","00:00:10.030","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.030","00:00:10.032","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.032","00:00:10.033","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.039","00:00:10.040","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.040","00:00:10.041","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.041","00:00:10.042","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.042","00:00:10.043","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.043","00:00:10.044","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.044","00:00:10.046","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.046","00:00:10.047","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.047","00:00:10.048","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.048","00:00:10.049","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.049","00:00:10.050","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.050","00:00:10.051","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.051","00:00:10.053","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.053","00:00:10.054","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.054","00:00:10.055","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.055","00:00:10.056","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.056","00:00:10.058","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.063","00:00:10.065","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.065","00:00:10.065","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.065","00:00:10.066","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.066","00:00:10.067","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.067","00:00:10.069","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.069","00:00:10.070","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.070","00:00:10.071","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.071","00:00:10.072","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.072","00:00:10.073","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.073","00:00:10.074","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.074","00:00:10.075","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.075","00:00:10.077","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.077","00:00:10.078","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.078","00:00:10.079","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.079","00:00:10.080","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.080","00:00:10.081","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.081","00:00:10.083","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.083","00:00:10.084","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.084","00:00:10.085","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.085","00:00:10.086","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.086","00:00:10.088","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.088","00:00:10.089","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.089","00:00:10.090","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.090","00:00:10.091","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.091","00:00:10.092","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.092","00:00:10.093","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.093","00:00:10.094","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.094","00:00:10.095","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.095","00:00:10.097","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.097","00:00:10.098","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.098","00:00:10.099","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.099","00:00:10.100","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.100","00:00:10.101","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.101","00:00:10.102","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.102","00:00:10.104","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.104","00:00:10.105","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.105","00:00:10.106","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.106","00:00:10.107","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.107","00:00:10.109","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.109","00:00:10.109","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.109","00:00:10.111","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.111","00:00:10.112","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.112","00:00:10.113","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.113","00:00:10.114","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.114","00:00:10.115","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.115","00:00:10.117","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.117","00:00:10.118","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.118","00:00:10.119","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.119","00:00:10.120","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.120","00:00:10.121","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.121","00:00:10.122","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.122","00:00:10.124","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.124","00:00:10.125","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.125","00:00:10.126","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.126","00:00:10.127","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.127","00:00:10.128","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.128","00:00:10.129","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.129","00:00:10.131","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.131","00:00:10.132","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.132","00:00:10.133","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.133","00:00:10.134","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.134","00:00:10.135","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.135","00:00:10.136","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.136","00:00:10.138","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.138","00:00:10.139","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.139","00:00:10.140","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.140","00:00:10.141","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.141","00:00:10.142","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.142","00:00:10.144","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.144","00:00:10.145","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.145","00:00:10.146","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.146","00:00:10.147","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.147","00:00:10.148","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.148","00:00:10.149","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.149","00:00:10.151","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.151","00:00:10.152","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.152","00:00:10.153","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.153","00:00:10.154","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.154","00:00:10.155","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.155","00:00:10.156","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.156","00:00:10.158","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.158","00:00:10.159","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.159","00:00:10.160","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.160","00:00:10.161","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.161","00:00:10.162","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.162","00:00:10.163","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.163","00:00:10.164","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.164","00:00:10.166","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.166","00:00:10.167","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.167","00:00:10.168","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.168","00:00:10.169","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.169","00:00:10.170","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.170","00:00:10.171","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.171","00:00:10.173","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.173","00:00:10.174","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.174","00:00:10.175","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.175","00:00:10.176","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.176","00:00:10.177","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.177","00:00:10.179","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.179","00:00:10.180","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.180","00:00:10.181","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.181","00:00:10.182","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.182","00:00:10.183","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.183","00:00:10.184","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.184","00:00:10.186","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.186","00:00:10.187","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.187","00:00:10.188","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.188","00:00:10.189","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.189","00:00:10.190","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.190","00:00:10.191","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.191","00:00:10.193","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.193","00:00:10.194","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.194","00:00:10.195","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.195","00:00:10.196","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.196","00:00:10.197","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.197","00:00:10.199","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.199","00:00:10.200","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.200","00:00:10.201","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.201","00:00:10.202","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.202","00:00:10.204","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.204","00:00:10.205","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.205","00:00:10.206","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.206","00:00:10.207","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.207","00:00:10.208","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.208","00:00:10.210","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.210","00:00:10.211","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.211","00:00:10.212","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.212","00:00:10.213","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.213","00:00:10.214","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.214","00:00:10.216","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.216","00:00:10.217","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.217","00:00:10.218","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.218","00:00:10.219","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.219","00:00:10.221","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.221","00:00:10.222","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.222","00:00:10.223","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.223","00:00:10.224","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.224","00:00:10.226","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.226","00:00:10.227","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.227","00:00:10.228","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.228","00:00:10.229","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.229","00:00:10.230","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.230","00:00:10.232","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.232","00:00:10.233","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.233","00:00:10.234","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.234","00:00:10.235","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.235","00:00:10.236","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.236","00:00:10.238","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.238","00:00:10.239","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.239","00:00:10.240","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.240","00:00:10.241","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.241","00:00:10.243","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.243","00:00:10.244","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.244","00:00:10.245","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.245","00:00:10.246","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.246","00:00:10.247","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.247","00:00:10.249","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.249","00:00:10.250","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.250","00:00:10.251","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.251","00:00:10.252","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.252","00:00:10.254","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.254","00:00:10.255","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.255","00:00:10.256","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.256","00:00:10.257","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.257","00:00:10.258","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.258","00:00:10.260","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.260","00:00:10.261","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.261","00:00:10.262","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.262","00:00:10.263","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.263","00:00:10.265","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.265","00:00:10.266","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.266","00:00:10.267","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.267","00:00:10.268","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.268","00:00:10.270","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.270","00:00:10.271","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.271","00:00:10.272","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.272","00:00:10.273","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.273","00:00:10.275","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.275","00:00:10.276","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.276","00:00:10.277","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.277","00:00:10.278","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.278","00:00:10.279","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.279","00:00:10.280","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.280","00:00:10.282","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.282","00:00:10.283","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.283","00:00:10.284","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.284","00:00:10.285","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.285","00:00:10.287","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.287","00:00:10.288","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.288","00:00:10.289","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.289","00:00:10.290","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.290","00:00:10.291","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.291","00:00:10.293","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.293","00:00:10.294","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.294","00:00:10.295","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.295","00:00:10.296","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.296","00:00:10.298","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.298","00:00:10.299","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.299","00:00:10.300","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.300","00:00:10.301","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.301","00:00:10.302","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.302","00:00:10.304","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.304","00:00:10.305","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.305","00:00:10.306","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.306","00:00:10.307","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.307","00:00:10.309","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.309","00:00:10.310","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.310","00:00:10.311","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.315","00:00:10.316","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.316","00:00:10.317","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.317","00:00:10.318","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.318","00:00:10.319","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.319","00:00:10.320","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.320","00:00:10.322","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.322","00:00:10.323","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.323","00:00:10.324","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.324","00:00:10.325","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.325","00:00:10.327","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.327","00:00:10.328","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.328","00:00:10.329","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.329","00:00:10.330","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.330","00:00:10.331","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.331","00:00:10.333","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.333","00:00:10.334","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.334","00:00:10.335","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.335","00:00:10.336","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.336","00:00:10.338","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.338","00:00:10.339","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.339","00:00:10.340","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.340","00:00:10.341","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.341","00:00:10.342","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.342","00:00:10.344","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.344","00:00:10.345","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.345","00:00:10.346","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.346","00:00:10.347","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.347","00:00:10.349","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.349","00:00:10.350","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.350","00:00:10.351","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.351","00:00:10.352","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.352","00:00:10.353","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.353","00:00:10.355","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.355","00:00:10.356","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.356","00:00:10.357","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.357","00:00:10.358","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.358","00:00:10.360","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.360","00:00:10.361","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.361","00:00:10.362","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.362","00:00:10.364","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.364","00:00:10.365","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.365","00:00:10.366","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.366","00:00:10.367","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.367","00:00:10.369","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.369","00:00:10.370","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.370","00:00:10.371","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.371","00:00:10.372","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.372","00:00:10.374","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.374","00:00:10.375","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.375","00:00:10.376","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.376","00:00:10.378","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.378","00:00:10.379","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.379","00:00:10.380","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.380","00:00:10.381","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.381","00:00:10.383","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.383","00:00:10.384","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.384","00:00:10.385","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.385","00:00:10.387","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.387","00:00:10.388","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.388","00:00:10.389","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.389","00:00:10.390","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.390","00:00:10.392","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.392","00:00:10.393","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.393","00:00:10.394","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.394","00:00:10.395","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.395","00:00:10.396","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.396","00:00:10.398","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.398","00:00:10.399","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.399","00:00:10.400","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.400","00:00:10.401","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.401","00:00:10.402","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.402","00:00:10.404","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.404","00:00:10.405","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.405","00:00:10.406","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.406","00:00:10.407","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.407","00:00:10.409","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.409","00:00:10.410","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.410","00:00:10.411","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.411","00:00:10.412","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.412","00:00:10.413","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.413","00:00:10.415","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.415","00:00:10.416","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.416","00:00:10.417","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.417","00:00:10.418","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.418","00:00:10.420","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.420","00:00:10.421","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.421","00:00:10.422","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.422","00:00:10.423","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.423","00:00:10.425","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.425","00:00:10.426","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.426","00:00:10.427","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.427","00:00:10.428","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.428","00:00:10.429","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.429","00:00:10.431","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.431","00:00:10.432","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.432","00:00:10.433","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.433","00:00:10.434","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.434","00:00:10.436","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.436","00:00:10.437","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.437","00:00:10.438","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.438","00:00:10.439","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.439","00:00:10.440","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.440","00:00:10.442","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.442","00:00:10.443","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.443","00:00:10.444","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.444","00:00:10.445","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.445","00:00:10.447","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.447","00:00:10.448","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.448","00:00:10.449","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.449","00:00:10.450","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.450","00:00:10.452","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.452","00:00:10.453","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.453","00:00:10.454","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.454","00:00:10.456","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.456","00:00:10.457","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.457","00:00:10.458","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.458","00:00:10.459","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.459","00:00:10.461","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.461","00:00:10.462","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.462","00:00:10.463","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.463","00:00:10.465","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.465","00:00:10.466","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.466","00:00:10.467","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.467","00:00:10.468","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.468","00:00:10.470","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.470","00:00:10.471","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.471","00:00:10.472","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.472","00:00:10.473","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.473","00:00:10.475","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.475","00:00:10.476","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.476","00:00:10.477","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.477","00:00:10.478","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.478","00:00:10.480","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.480","00:00:10.481","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.481","00:00:10.482","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.482","00:00:10.484","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.484","00:00:10.485","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.485","00:00:10.486","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.486","00:00:10.487","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.487","00:00:10.489","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.489","00:00:10.490","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.490","00:00:10.491","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.491","00:00:10.492","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.492","00:00:10.494","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.494","00:00:10.495","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.495","00:00:10.496","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.496","00:00:10.498","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.498","00:00:10.499","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.499","00:00:10.500","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.500","00:00:10.501","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.501","00:00:10.503","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.503","00:00:10.504","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.504","00:00:10.505","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.505","00:00:10.506","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.506","00:00:10.508","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.508","00:00:10.509","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.509","00:00:10.510","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.510","00:00:10.511","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.511","00:00:10.513","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.513","00:00:10.514","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.514","00:00:10.515","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.515","00:00:10.517","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.517","00:00:10.518","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.518","00:00:10.519","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.519","00:00:10.520","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.520","00:00:10.522","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.522","00:00:10.523","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.523","00:00:10.524","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.524","00:00:10.525","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.525","00:00:10.527","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.527","00:00:10.528","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.528","00:00:10.529","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.529","00:00:10.530","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.530","00:00:10.532","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.532","00:00:10.533","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.533","00:00:10.534","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.534","00:00:10.536","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.536","00:00:10.537","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.537","00:00:10.538","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.538","00:00:10.540","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.540","00:00:10.541","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.541","00:00:10.542","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.542","00:00:10.543","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.543","00:00:10.545","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.545","00:00:10.546","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.546","00:00:10.547","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.547","00:00:10.548","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.548","00:00:10.550","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.550","00:00:10.551","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.551","00:00:10.552","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.552","00:00:10.553","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.553","00:00:10.555","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.555","00:00:10.556","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.556","00:00:10.557","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.557","00:00:10.559","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.559","00:00:10.560","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.560","00:00:10.561","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.561","00:00:10.562","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.562","00:00:10.564","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.564","00:00:10.565","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.565","00:00:10.566","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.566","00:00:10.567","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.567","00:00:10.569","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.569","00:00:10.570","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.570","00:00:10.571","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.571","00:00:10.573","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.573","00:00:10.574","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.574","00:00:10.575","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.575","00:00:10.576","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.576","00:00:10.577","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.577","00:00:10.579","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.579","00:00:10.580","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.580","00:00:10.581","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.581","00:00:10.582","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.582","00:00:10.584","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.584","00:00:10.585","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.585","00:00:10.586","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.586","00:00:10.588","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.588","00:00:10.589","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.589","00:00:10.590","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.590","00:00:10.591","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.591","00:00:10.592","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.592","00:00:10.594","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.594","00:00:10.595","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.595","00:00:10.596","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.596","00:00:10.598","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.598","00:00:10.599","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.599","00:00:10.600","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.600","00:00:10.601","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.601","00:00:10.603","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.603","00:00:10.604","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.604","00:00:10.605","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.605","00:00:10.606","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.606","00:00:10.608","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.608","00:00:10.609","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.609","00:00:10.610","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.610","00:00:10.612","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.612","00:00:10.613","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.613","00:00:10.614","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.614","00:00:10.615","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.615","00:00:10.617","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.617","00:00:10.618","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.618","00:00:10.619","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.619","00:00:10.620","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.620","00:00:10.622","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.622","00:00:10.623","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.623","00:00:10.624","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.624","00:00:10.625","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.625","00:00:10.627","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.627","00:00:10.628","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.628","00:00:10.629","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.629","00:00:10.630","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.630","00:00:10.632","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.632","00:00:10.633","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.633","00:00:10.634","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.634","00:00:10.636","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.636","00:00:10.637","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.637","00:00:10.638","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.638","00:00:10.639","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.639","00:00:10.641","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.641","00:00:10.642","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.642","00:00:10.643","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.643","00:00:10.645","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.645","00:00:10.646","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.646","00:00:10.647","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.647","00:00:10.648","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.648","00:00:10.650","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.650","00:00:10.651","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.651","00:00:10.652","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.652","00:00:10.654","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.654","00:00:10.655","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.655","00:00:10.656","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.656","00:00:10.657","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.657","00:00:10.658","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.658","00:00:10.660","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.660","00:00:10.661","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.661","00:00:10.662","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.662","00:00:10.663","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.663","00:00:10.665","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.665","00:00:10.666","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.666","00:00:10.667","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.667","00:00:10.669","0.001s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.669","00:00:10.670","0.002s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.040","00:00:10.041","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.041","00:00:10.042","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.042","00:00:10.043","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.043","00:00:10.044","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.044","00:00:10.046","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.046","00:00:10.047","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.047","00:00:10.048","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.048","00:00:10.049","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.049","00:00:10.050","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.050","00:00:10.051","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.051","00:00:10.053","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.053","00:00:10.054","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.054","00:00:10.055","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.055","00:00:10.056","0.001s"],[363,16,"00:00:09.720","00:00:10.058","00:00:10.056","00:00:10.058","0.002s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.064","00:00:10.065","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.065","00:00:10.066","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.066","00:00:10.067","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.067","00:00:10.068","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.068","00:00:10.070","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.070","00:00:10.071","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.071","00:00:10.072","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.072","00:00:10.073","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.073","00:00:10.074","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.074","00:00:10.075","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.075","00:00:10.077","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.077","00:00:10.078","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.078","00:00:10.079","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.079","00:00:10.080","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.080","00:00:10.081","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.081","00:00:10.083","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.083","00:00:10.084","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.084","00:00:10.085","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.085","00:00:10.086","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.086","00:00:10.088","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.088","00:00:10.089","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.089","00:00:10.090","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.090","00:00:10.091","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.091","00:00:10.092","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.092","00:00:10.093","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.093","00:00:10.094","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.094","00:00:10.095","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.095","00:00:10.097","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.097","00:00:10.098","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.098","00:00:10.099","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.099","00:00:10.100","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.100","00:00:10.101","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.101","00:00:10.102","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.102","00:00:10.104","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.104","00:00:10.105","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.105","00:00:10.106","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.106","00:00:10.107","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.107","00:00:10.108","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.108","00:00:10.109","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.109","00:00:10.111","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.111","00:00:10.112","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.112","00:00:10.113","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.113","00:00:10.114","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.114","00:00:10.115","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.115","00:00:10.117","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.117","00:00:10.118","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.118","00:00:10.119","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.119","00:00:10.120","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.120","00:00:10.121","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.121","00:00:10.122","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.122","00:00:10.124","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.124","00:00:10.125","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.125","00:00:10.126","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.126","00:00:10.127","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.127","00:00:10.128","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.128","00:00:10.129","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.129","00:00:10.131","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.131","00:00:10.132","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.132","00:00:10.133","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.133","00:00:10.134","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.134","00:00:10.135","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.135","00:00:10.136","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.136","00:00:10.138","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.138","00:00:10.139","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.139","00:00:10.140","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.140","00:00:10.141","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.141","00:00:10.142","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.142","00:00:10.144","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.144","00:00:10.145","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.145","00:00:10.146","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.146","00:00:10.147","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.147","00:00:10.148","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.148","00:00:10.149","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.149","00:00:10.151","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.151","00:00:10.152","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.152","00:00:10.153","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.153","00:00:10.154","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.154","00:00:10.155","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.155","00:00:10.156","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.156","00:00:10.158","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.158","00:00:10.159","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.159","00:00:10.160","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.160","00:00:10.161","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.161","00:00:10.162","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.162","00:00:10.163","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.163","00:00:10.164","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.164","00:00:10.166","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.166","00:00:10.167","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.167","00:00:10.168","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.168","00:00:10.169","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.169","00:00:10.170","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.170","00:00:10.171","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.171","00:00:10.173","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.173","00:00:10.174","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.174","00:00:10.175","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.175","00:00:10.176","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.176","00:00:10.177","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.177","00:00:10.179","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.179","00:00:10.180","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.180","00:00:10.181","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.181","00:00:10.182","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.182","00:00:10.183","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.183","00:00:10.184","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.184","00:00:10.186","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.186","00:00:10.187","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.187","00:00:10.188","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.188","00:00:10.189","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.189","00:00:10.190","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.190","00:00:10.191","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.191","00:00:10.193","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.193","00:00:10.194","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.194","00:00:10.195","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.195","00:00:10.196","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.196","00:00:10.197","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.197","00:00:10.199","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.199","00:00:10.200","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.200","00:00:10.201","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.201","00:00:10.202","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.202","00:00:10.204","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.204","00:00:10.205","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.205","00:00:10.206","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.206","00:00:10.207","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.207","00:00:10.208","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.208","00:00:10.210","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.210","00:00:10.211","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.211","00:00:10.212","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.212","00:00:10.213","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.213","00:00:10.214","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.214","00:00:10.216","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.216","00:00:10.217","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.217","00:00:10.218","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.218","00:00:10.219","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.219","00:00:10.221","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.221","00:00:10.222","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.222","00:00:10.223","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.223","00:00:10.224","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.224","00:00:10.226","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.226","00:00:10.227","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.227","00:00:10.228","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.228","00:00:10.229","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.229","00:00:10.230","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.230","00:00:10.232","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.232","00:00:10.233","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.233","00:00:10.234","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.234","00:00:10.235","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.235","00:00:10.236","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.236","00:00:10.238","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.238","00:00:10.239","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.239","00:00:10.240","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.240","00:00:10.241","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.241","00:00:10.243","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.243","00:00:10.244","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.244","00:00:10.245","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.245","00:00:10.246","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.246","00:00:10.247","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.247","00:00:10.249","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.249","00:00:10.250","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.250","00:00:10.251","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.251","00:00:10.252","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.252","00:00:10.254","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.254","00:00:10.255","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.255","00:00:10.256","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.256","00:00:10.257","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.257","00:00:10.258","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.258","00:00:10.260","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.260","00:00:10.261","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.261","00:00:10.262","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.262","00:00:10.263","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.263","00:00:10.265","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.265","00:00:10.266","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.266","00:00:10.267","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.267","00:00:10.268","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.268","00:00:10.270","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.270","00:00:10.271","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.271","00:00:10.272","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.272","00:00:10.273","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.273","00:00:10.275","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.275","00:00:10.276","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.276","00:00:10.277","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.277","00:00:10.278","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.278","00:00:10.279","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.279","00:00:10.280","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.280","00:00:10.282","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.282","00:00:10.283","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.283","00:00:10.284","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.284","00:00:10.285","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.285","00:00:10.287","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.287","00:00:10.288","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.288","00:00:10.289","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.289","00:00:10.290","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.290","00:00:10.291","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.291","00:00:10.293","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.293","00:00:10.294","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.294","00:00:10.295","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.295","00:00:10.296","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.296","00:00:10.298","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.298","00:00:10.299","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.299","00:00:10.300","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.300","00:00:10.301","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.301","00:00:10.302","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.302","00:00:10.304","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.304","00:00:10.305","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.305","00:00:10.306","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.306","00:00:10.307","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.307","00:00:10.308","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.308","00:00:10.310","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.310","00:00:10.311","0.002s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.315","00:00:10.316","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.316","00:00:10.317","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.317","00:00:10.318","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.318","00:00:10.319","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.319","00:00:10.320","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.320","00:00:10.322","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.322","00:00:10.323","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.323","00:00:10.324","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.324","00:00:10.325","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.325","00:00:10.327","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.327","00:00:10.328","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.328","00:00:10.329","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.329","00:00:10.330","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.330","00:00:10.331","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.331","00:00:10.333","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.333","00:00:10.334","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.334","00:00:10.335","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.335","00:00:10.336","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.336","00:00:10.338","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.338","00:00:10.339","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.339","00:00:10.340","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.340","00:00:10.341","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.341","00:00:10.342","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.342","00:00:10.344","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.344","00:00:10.345","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.345","00:00:10.346","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.346","00:00:10.347","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.347","00:00:10.348","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.348","00:00:10.350","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.350","00:00:10.351","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.351","00:00:10.352","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.352","00:00:10.353","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.353","00:00:10.355","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.355","00:00:10.356","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.356","00:00:10.357","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.357","00:00:10.358","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.358","00:00:10.360","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.360","00:00:10.361","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.361","00:00:10.362","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.362","00:00:10.364","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.364","00:00:10.365","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.365","00:00:10.366","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.366","00:00:10.367","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.367","00:00:10.369","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.369","00:00:10.370","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.370","00:00:10.371","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.371","00:00:10.372","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.372","00:00:10.374","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.374","00:00:10.375","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.375","00:00:10.376","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.376","00:00:10.378","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.378","00:00:10.379","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.379","00:00:10.380","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.380","00:00:10.381","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.381","00:00:10.383","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.383","00:00:10.384","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.384","00:00:10.385","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.385","00:00:10.387","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.387","00:00:10.388","0.002s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.388","00:00:10.389","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.389","00:00:10.390","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.390","00:00:10.392","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.392","00:00:10.393","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.393","00:00:10.394","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.394","00:00:10.395","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.395","00:00:10.396","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.396","00:00:10.397","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.397","00:00:10.399","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.399","00:00:10.400","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.400","00:00:10.401","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.401","00:00:10.402","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.402","00:00:10.404","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.404","00:00:10.405","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.405","00:00:10.406","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.406","00:00:10.407","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.407","00:00:10.408","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.408","00:00:10.410","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.410","00:00:10.411","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.411","00:00:10.412","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.412","00:00:10.413","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.413","00:00:10.415","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.415","00:00:10.416","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.416","00:00:10.417","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.417","00:00:10.418","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.418","00:00:10.419","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.419","00:00:10.421","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.421","00:00:10.422","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.422","00:00:10.423","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.423","00:00:10.425","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.425","00:00:10.426","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.426","00:00:10.427","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.427","00:00:10.428","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.428","00:00:10.429","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.429","00:00:10.431","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.431","00:00:10.432","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.432","00:00:10.433","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.433","00:00:10.434","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.434","00:00:10.435","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.435","00:00:10.437","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.437","00:00:10.438","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.438","00:00:10.439","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.439","00:00:10.440","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.440","00:00:10.442","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.442","00:00:10.443","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.443","00:00:10.444","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.444","00:00:10.445","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.445","00:00:10.447","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.447","00:00:10.448","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.448","00:00:10.449","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.449","00:00:10.450","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.450","00:00:10.452","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.452","00:00:10.453","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.453","00:00:10.454","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.454","00:00:10.456","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.456","00:00:10.457","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.457","00:00:10.458","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.458","00:00:10.459","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.459","00:00:10.461","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.461","00:00:10.462","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.462","00:00:10.463","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.463","00:00:10.465","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.465","00:00:10.466","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.466","00:00:10.467","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.467","00:00:10.468","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.468","00:00:10.470","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.470","00:00:10.471","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.471","00:00:10.472","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.472","00:00:10.473","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.473","00:00:10.475","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.475","00:00:10.476","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.476","00:00:10.477","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.477","00:00:10.478","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.478","00:00:10.480","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.480","00:00:10.481","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.481","00:00:10.482","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.482","00:00:10.484","0.002s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.484","00:00:10.485","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.485","00:00:10.486","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.486","00:00:10.487","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.487","00:00:10.489","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.489","00:00:10.490","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.490","00:00:10.491","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.491","00:00:10.492","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.492","00:00:10.494","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.494","00:00:10.495","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.495","00:00:10.496","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.496","00:00:10.498","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.498","00:00:10.499","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.499","00:00:10.500","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.500","00:00:10.501","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.501","00:00:10.503","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.503","00:00:10.504","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.504","00:00:10.505","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.505","00:00:10.506","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.506","00:00:10.508","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.508","00:00:10.509","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.509","00:00:10.510","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.510","00:00:10.511","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.511","00:00:10.513","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.513","00:00:10.514","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.514","00:00:10.515","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.515","00:00:10.516","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.516","00:00:10.518","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.518","00:00:10.519","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.519","00:00:10.520","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.520","00:00:10.522","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.522","00:00:10.523","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.523","00:00:10.524","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.524","00:00:10.525","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.525","00:00:10.527","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.527","00:00:10.528","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.528","00:00:10.529","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.529","00:00:10.530","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.530","00:00:10.532","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.532","00:00:10.533","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.533","00:00:10.534","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.534","00:00:10.536","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.536","00:00:10.537","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.537","00:00:10.538","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.538","00:00:10.540","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.540","00:00:10.541","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.541","00:00:10.542","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.542","00:00:10.543","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.543","00:00:10.545","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.545","00:00:10.546","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.546","00:00:10.547","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.547","00:00:10.548","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.548","00:00:10.550","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.550","00:00:10.551","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.551","00:00:10.552","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.552","00:00:10.553","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.553","00:00:10.555","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.555","00:00:10.556","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.556","00:00:10.557","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.557","00:00:10.559","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.559","00:00:10.560","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.560","00:00:10.561","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.561","00:00:10.562","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.562","00:00:10.564","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.564","00:00:10.565","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.565","00:00:10.566","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.566","00:00:10.567","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.567","00:00:10.569","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.569","00:00:10.570","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.570","00:00:10.571","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.571","00:00:10.573","0.001s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.573","00:00:10.574","0.002s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.316","00:00:10.317","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.317","00:00:10.318","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.318","00:00:10.319","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.319","00:00:10.320","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.320","00:00:10.322","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.322","00:00:10.323","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.323","00:00:10.324","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.324","00:00:10.325","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.325","00:00:10.326","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.326","00:00:10.328","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.328","00:00:10.329","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.329","00:00:10.330","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.330","00:00:10.331","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.331","00:00:10.333","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.333","00:00:10.334","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.334","00:00:10.335","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.335","00:00:10.336","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.336","00:00:10.337","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.337","00:00:10.339","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.339","00:00:10.340","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.340","00:00:10.341","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.341","00:00:10.342","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.342","00:00:10.344","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.344","00:00:10.345","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.345","00:00:10.346","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.346","00:00:10.347","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.347","00:00:10.348","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.348","00:00:10.350","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.350","00:00:10.351","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.351","00:00:10.352","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.352","00:00:10.353","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.353","00:00:10.355","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.355","00:00:10.356","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.356","00:00:10.357","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.357","00:00:10.358","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.358","00:00:10.360","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.360","00:00:10.361","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.361","00:00:10.362","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.362","00:00:10.363","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.363","00:00:10.365","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.365","00:00:10.366","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.366","00:00:10.367","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.367","00:00:10.369","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.369","00:00:10.370","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.370","00:00:10.371","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.371","00:00:10.372","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.372","00:00:10.374","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.374","00:00:10.375","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.375","00:00:10.376","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.376","00:00:10.377","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.377","00:00:10.379","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.379","00:00:10.380","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.380","00:00:10.381","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.381","00:00:10.383","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.383","00:00:10.384","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.384","00:00:10.385","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.385","00:00:10.386","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.386","00:00:10.388","0.002s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.388","00:00:10.389","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.389","00:00:10.390","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.390","00:00:10.391","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.391","00:00:10.393","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.393","00:00:10.394","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.394","00:00:10.395","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.395","00:00:10.396","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.396","00:00:10.397","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.397","00:00:10.399","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.399","00:00:10.400","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.400","00:00:10.401","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.401","00:00:10.402","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.402","00:00:10.404","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.404","00:00:10.405","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.405","00:00:10.406","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.406","00:00:10.407","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.407","00:00:10.408","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.408","00:00:10.410","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.410","00:00:10.411","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.411","00:00:10.412","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.412","00:00:10.413","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.413","00:00:10.415","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.415","00:00:10.416","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.416","00:00:10.417","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.417","00:00:10.418","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.418","00:00:10.419","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.419","00:00:10.421","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.421","00:00:10.422","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.422","00:00:10.423","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.423","00:00:10.424","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.424","00:00:10.426","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.426","00:00:10.427","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.427","00:00:10.428","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.428","00:00:10.429","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.429","00:00:10.430","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.430","00:00:10.432","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.432","00:00:10.433","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.433","00:00:10.434","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.434","00:00:10.435","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.435","00:00:10.437","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.437","00:00:10.438","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.438","00:00:10.439","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.439","00:00:10.440","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.440","00:00:10.442","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.442","00:00:10.443","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.443","00:00:10.444","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.444","00:00:10.445","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.445","00:00:10.447","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.447","00:00:10.448","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.448","00:00:10.449","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.449","00:00:10.450","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.450","00:00:10.452","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.452","00:00:10.453","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.453","00:00:10.454","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.454","00:00:10.455","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.455","00:00:10.457","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.457","00:00:10.458","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.458","00:00:10.459","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.459","00:00:10.461","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.461","00:00:10.462","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.462","00:00:10.463","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.463","00:00:10.464","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.464","00:00:10.466","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.466","00:00:10.467","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.467","00:00:10.468","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.468","00:00:10.469","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.469","00:00:10.471","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.471","00:00:10.472","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.472","00:00:10.473","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.473","00:00:10.475","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.475","00:00:10.476","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.476","00:00:10.477","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.477","00:00:10.478","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.478","00:00:10.480","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.480","00:00:10.481","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.481","00:00:10.482","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.482","00:00:10.484","0.002s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.484","00:00:10.485","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.485","00:00:10.486","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.486","00:00:10.487","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.487","00:00:10.489","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.489","00:00:10.490","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.490","00:00:10.491","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.491","00:00:10.492","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.492","00:00:10.494","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.494","00:00:10.495","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.495","00:00:10.496","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.496","00:00:10.498","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.498","00:00:10.499","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.499","00:00:10.500","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.500","00:00:10.501","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.501","00:00:10.502","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.502","00:00:10.504","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.504","00:00:10.505","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.505","00:00:10.506","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.506","00:00:10.508","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.508","00:00:10.509","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.509","00:00:10.510","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.510","00:00:10.511","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.511","00:00:10.513","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.513","00:00:10.514","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.514","00:00:10.515","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.515","00:00:10.516","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.516","00:00:10.518","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.518","00:00:10.519","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.519","00:00:10.520","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.520","00:00:10.522","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.522","00:00:10.523","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.523","00:00:10.524","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.524","00:00:10.525","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.525","00:00:10.527","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.527","00:00:10.528","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.528","00:00:10.529","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.529","00:00:10.530","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.530","00:00:10.532","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.532","00:00:10.533","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.533","00:00:10.534","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.534","00:00:10.536","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.536","00:00:10.537","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.537","00:00:10.538","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.538","00:00:10.539","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.539","00:00:10.541","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.541","00:00:10.542","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.542","00:00:10.543","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.543","00:00:10.544","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.544","00:00:10.546","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.546","00:00:10.547","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.547","00:00:10.548","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.548","00:00:10.550","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.550","00:00:10.551","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.551","00:00:10.552","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.552","00:00:10.553","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.553","00:00:10.555","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.555","00:00:10.556","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.556","00:00:10.557","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.557","00:00:10.558","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.558","00:00:10.560","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.560","00:00:10.561","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.561","00:00:10.562","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.562","00:00:10.564","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.564","00:00:10.565","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.565","00:00:10.566","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.566","00:00:10.567","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.567","00:00:10.569","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.569","00:00:10.570","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.570","00:00:10.571","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.571","00:00:10.572","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.572","00:00:10.574","0.002s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.574","00:00:10.575","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.575","00:00:10.576","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.576","00:00:10.577","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.577","00:00:10.579","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.579","00:00:10.580","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.580","00:00:10.581","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.581","00:00:10.582","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.582","00:00:10.584","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.584","00:00:10.585","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.585","00:00:10.586","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.586","00:00:10.588","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.588","00:00:10.589","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.589","00:00:10.590","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.590","00:00:10.591","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.591","00:00:10.592","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.592","00:00:10.594","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.594","00:00:10.595","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.595","00:00:10.596","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.596","00:00:10.598","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.598","00:00:10.599","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.599","00:00:10.600","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.600","00:00:10.601","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.601","00:00:10.603","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.603","00:00:10.604","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.604","00:00:10.605","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.605","00:00:10.606","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.606","00:00:10.608","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.608","00:00:10.609","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.609","00:00:10.610","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.610","00:00:10.612","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.612","00:00:10.613","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.613","00:00:10.614","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.614","00:00:10.615","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.615","00:00:10.617","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.617","00:00:10.618","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.618","00:00:10.619","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.619","00:00:10.620","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.620","00:00:10.622","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.622","00:00:10.623","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.623","00:00:10.624","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.624","00:00:10.625","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.625","00:00:10.627","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.627","00:00:10.628","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.628","00:00:10.629","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.629","00:00:10.630","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.630","00:00:10.632","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.632","00:00:10.633","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.633","00:00:10.634","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.634","00:00:10.636","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.636","00:00:10.637","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.637","00:00:10.638","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.638","00:00:10.639","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.639","00:00:10.641","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.641","00:00:10.642","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.642","00:00:10.643","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.643","00:00:10.645","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.645","00:00:10.646","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.646","00:00:10.647","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.647","00:00:10.648","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.648","00:00:10.650","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.650","00:00:10.651","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.651","00:00:10.652","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.652","00:00:10.654","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.654","00:00:10.655","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.655","00:00:10.656","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.656","00:00:10.657","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.657","00:00:10.658","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.658","00:00:10.660","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.660","00:00:10.661","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.661","00:00:10.662","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.662","00:00:10.663","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.663","00:00:10.665","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.665","00:00:10.666","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.666","00:00:10.667","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.667","00:00:10.669","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.669","00:00:10.670","0.002s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.670","00:00:10.671","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.671","00:00:10.672","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.672","00:00:10.673","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.673","00:00:10.674","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.674","00:00:10.676","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.676","00:00:10.677","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.677","00:00:10.678","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.678","00:00:10.679","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.679","00:00:10.680","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.680","00:00:10.681","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.681","00:00:10.683","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.683","00:00:10.684","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.684","00:00:10.685","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.685","00:00:10.686","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.686","00:00:10.687","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.687","00:00:10.688","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.688","00:00:10.690","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.690","00:00:10.691","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.691","00:00:10.692","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.692","00:00:10.693","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.693","00:00:10.694","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.694","00:00:10.695","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.695","00:00:10.697","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.697","00:00:10.698","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.698","00:00:10.699","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.699","00:00:10.700","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.700","00:00:10.701","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.701","00:00:10.703","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.703","00:00:10.704","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.704","00:00:10.705","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.705","00:00:10.706","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.706","00:00:10.707","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.707","00:00:10.708","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.708","00:00:10.710","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.710","00:00:10.711","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.711","00:00:10.712","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.712","00:00:10.713","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.713","00:00:10.714","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.714","00:00:10.715","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.715","00:00:10.717","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.717","00:00:10.718","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.718","00:00:10.719","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.719","00:00:10.720","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.720","00:00:10.721","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.721","00:00:10.722","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.722","00:00:10.724","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.724","00:00:10.725","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.725","00:00:10.726","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.726","00:00:10.727","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.727","00:00:10.728","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.728","00:00:10.729","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.729","00:00:10.731","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.731","00:00:10.732","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.732","00:00:10.733","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.733","00:00:10.734","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.734","00:00:10.735","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.735","00:00:10.737","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.737","00:00:10.738","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.738","00:00:10.739","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.739","00:00:10.740","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.740","00:00:10.741","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.741","00:00:10.742","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.742","00:00:10.743","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.743","00:00:10.745","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.745","00:00:10.746","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.746","00:00:10.747","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.747","00:00:10.748","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.748","00:00:10.749","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.749","00:00:10.751","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.751","00:00:10.752","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.752","00:00:10.753","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.753","00:00:10.754","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.754","00:00:10.755","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.755","00:00:10.756","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.756","00:00:10.758","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.758","00:00:10.759","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.759","00:00:10.760","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.760","00:00:10.761","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.761","00:00:10.762","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.762","00:00:10.763","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.763","00:00:10.765","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.765","00:00:10.766","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.766","00:00:10.767","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.767","00:00:10.768","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.768","00:00:10.769","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.769","00:00:10.770","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.770","00:00:10.772","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.772","00:00:10.773","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.773","00:00:10.774","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.774","00:00:10.775","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.775","00:00:10.776","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.776","00:00:10.778","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.778","00:00:10.779","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.779","00:00:10.780","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.780","00:00:10.781","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.781","00:00:10.783","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.783","00:00:10.784","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.784","00:00:10.785","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.785","00:00:10.786","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.786","00:00:10.787","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.787","00:00:10.789","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.789","00:00:10.790","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.790","00:00:10.791","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.791","00:00:10.792","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.792","00:00:10.794","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.794","00:00:10.795","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.795","00:00:10.796","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.796","00:00:10.797","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.797","00:00:10.798","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.798","00:00:10.800","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.800","00:00:10.801","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.801","00:00:10.802","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.802","00:00:10.803","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.803","00:00:10.804","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.804","00:00:10.806","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.806","00:00:10.807","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.807","00:00:10.808","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.808","00:00:10.809","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.809","00:00:10.811","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.811","00:00:10.812","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.812","00:00:10.813","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.813","00:00:10.814","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.814","00:00:10.815","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.815","00:00:10.817","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.817","00:00:10.818","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.818","00:00:10.819","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.819","00:00:10.820","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.820","00:00:10.822","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.822","00:00:10.823","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.823","00:00:10.824","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.824","00:00:10.825","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.825","00:00:10.826","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.826","00:00:10.828","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.828","00:00:10.829","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.829","00:00:10.830","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.830","00:00:10.831","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.831","00:00:10.833","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.833","00:00:10.834","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.834","00:00:10.835","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.835","00:00:10.836","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.836","00:00:10.837","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.837","00:00:10.839","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.839","00:00:10.840","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.840","00:00:10.841","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.841","00:00:10.842","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.842","00:00:10.844","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.844","00:00:10.845","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.845","00:00:10.846","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.846","00:00:10.847","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.847","00:00:10.848","0.001s"],[140,434,"00:00:09.913","00:00:10.850","00:00:10.848","00:00:10.850","0.002s"]],"hovertemplate":"\u003cb\u003e%{y}\u003c\u002fb\u003e\u003cbr\u003eType: %{fullData.name}\u003cbr\u003eStart: %{customdata[4]}\u003cbr\u003eEnd: %{customdata[5]}\u003cbr\u003eDuration: %{customdata[6]}\u003cbr\u003ePrompt Tokens: %{customdata[0]}\u003cbr\u003eOutput Tokens: %{customdata[1]}\u003cbr\u003eRequest Start Time: %{customdata[2]}\u003cbr\u003eRequest End Time: %{customdata[3]}\u003cbr\u003e\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"ITL \u003c 2ms","marker":{"color":"#109618","pattern":{"shape":""}},"name":"ITL \u003c 2ms","orientation":"h","showlegend":true,"textposition":"auto","x":{"dtype":"i1","bdata":"AgIBAgECAQIBAgECAQIBAQECAQEBAgEBAQIBAQECAQEBAQEBAQIBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAQIBAQEBAQECAAECAQEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAgEBAQEBAQIBAQEBAQECAQECAQEBAQEBAQECAQEBAQEBAgEBAQEBAQIBAQEBAQECAQEBAQEBAgEBAgABAQIBAQEBAQEBAQEBAgEBAQEBAQEBAQECAQEBAQEBAQEBAQIBAQEBAgEBAgEAAgEBAQEBAQECAQEBAQEBAgEBAQECAQECAQEBAQEBAgEBAQECAQEBAQECAQEBAQIBAgECAQIBAQEBAQEBAQIAAgEBAQEBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQIBAgEBAQEBAQEBAgEBAQECAQEBAgEBAgEBAQEBAgEBAQECAQEBAgEBAQIBAgEAAgEBAQIBAQEBAgEBAQIBAQEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEAAgEBAQECAQEBAQIBAQIBAQECAQEBAQECAQEBAQIBAQEBAgEBAgEBAQEBAgEBAQIBAQEBAgEBAQIBAQIBAQEBAQECAQEBAQIBAQECAQEBAgEBAgABAgEBAQIBAQEBAgEBAQIBAQIBAQEBAQECAQEBAgEBAQECAQECAQEBAQECAQEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgECAQEBAQECAQEBAgECAQEBAgEBAQEBAQECAQECAQECAQECAQEBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQECAQECAQECAQECAQEBAgEBAgEBAgEBAgECAQECAQIBAQEBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQIBAQIBAQECAQECAQECAQECAQEBAgEBAgECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAQIBAQIBAgACAQEBAQEBAQIBAQEBAQECAQEBAQIBAQIBAQEBAQECAQEBAgEBAQEBAQIBAQECAQECAQIBAgEBAQEBAQECAQEBAQEBAQEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQECAAECAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQECAQECAQEBAQEBAQIBAQEBAgEBAQECAQECAQEBAQIBAQEBAgEBAQECAQECAQECAQEBAQECAQEBAQIBAQEBAgEBAgEBAQECAQEBAQIBAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQIAAQECAQEBAQECAQEBAQIBAQEBAQECAQEBAQIBAQEBAQIBAQIBAQIBAQEBAQEBAgEBAQEBAgEBAQIBAQIBAQEBAQIBAQECAQEBAQIBAQECAQIBAQEBAQIBAQEBAQIBAQECAQECAQEBAQIBAQEBAgACAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgABAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAgEBAgEBAQEBAQECAQEBAQIBAQEBAgEBAgEBAQEBAgEBAQIBAQEBAgEBAgEBAgEBAQEBAgEBAQECAQEBAQIBAQIBAQECAQEBAQIAAgEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQACAQEBAgEBAQECAQEBAgEBAQIBAQEBAQIBAQEBAgEBAQIBAQECAQEBAQECAQEBAgEBAQECAQEBAgEBAgEBAQEBAQIBAQECAQEBAQIBAQECAQECAQEBAQEBAgEBAQECAQEBAgEBAgEBAQEBAgEBAQECAQEBAQIBAQIBAQEBAQIBAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQIBAQEBAQIBAQIBAQIBAQECAQEBAQEBAgEBAQIBAQIBAQIBAQEBAgEBAQIBAQIBAQECAQECAQECAQEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQECAQIBAQIBAQIBAgEBAgEBAQIBAQECAQECAQECAQEBAgEBAgECAQEBAgEBAgEBAgECAQEBAgEBAQIBAQIBAgEBAgEBAQIBAgEBAQIBAQIBAQIBAQECAQIBAQECAQIBAQECAQECAQECAQECAQECAQIBAQIBAQIBAgEBAgEBAgECAQECAQEBAQEBAgEBAQECAQEBAQIBAQEBAQECAQEBAQIBAQECAQEBAgEBAgEBAQEBAQIBAgACAQECAQEBAQIBAQECAQEBAQIBAQECAQECAQECAQIBAQECAQEBAQECAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAQECAQEBAgEBAQIBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQECAQEBAgEBAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAQIBAgEBAgECAAIBAQECAQEBAQECAQEBAgEBAgEBAgEBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQECAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAgEAAQEBAQECAQEBAQIBAQEBAQIBAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAgEBAQECAgIBAQEBAgEBAQECAQEBAgEBAQECAQIBAQEBAQECAQEBAQIBAQECAQECAQEBAgACAQEBAQIBAQECAQEBAgEBAgEBAQEBAgEBAQECAQEBAgEBAQIBAQIBAQEBAQECAQEBAgEBAQECAQECAQECAQACAQEBAgEBAgEBAQIBAQIBAQIBAQEBAgEBAQIBAQIBAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQIBAQEBAgEBAQIBAQIBAQIBAQEBAgEBAQIBAgEBAQECAQECAQEBAQIBAQECAQIBAQECAQEBAAECAgACAQEBAgEBAgEBAgEBAgEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQIAAgEBAgIBAgEBAQIBAgEBAQECAQEBAQIBAQEBAgEBAQIBAQIBAQEBAQIBAQEBAgEBAQIBAQIBAQEBAQIBAQEBAgEBAQIBAQECAQECAQEBAQEBAgEBAQIBAQEBAgEBAgEBAgEBAQEBAQIBAQEBAgEBAQIBAQECAQEBAQIBAQECAQEBAgEBAgEBAgEBAgEBAQEBAgEBAgEBAgEBAQECAQEBAgEBAgEBAQIBAQIBAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQECAQEBAgECAQEBAQECAQECAQEBAgEBAQIBAgEBAQIBAQEAAQICAAIBAQECAQECAQECAQECAQEBAQIBAQIBAQEBAgIBAQEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAgACAQIBAgEBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAQECAQEBAgECAQEBAgEBAQIBAQIBAQEBAgIBAQEBAgIAAgEBAQIBAQIBAQIBAQIBAQEBAgEBAgEBAQECAgEBAQECAQECAQECAQECAQECAQECAQEBAgECAQECAQECAQECAQECAQECAQECAQEBAgEBAgEBAgEBAgECAQEBAgEBAgEBAgECAQECAQEBAgEBAgECAQECAQECAQECAQEBAgEBAgEBAgEBAgECAQEBAgECAQEBAgEBAgEBAgECAQEAAgECAQEBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQIBAQECAQEBAgEBAQEBAQEBAgEBAQEBAgECAQECAQEBAQEBAQECAQEBAQEBAgEBAQEBAQIBAQEBAQIBAQEBAQIBAQECAQIBAgECAQEBAQEBAQIBAQEBAgEBAQECAgEBAQIBAQIBAQECAQIBAQIBAQIBAQECAQECAQECAQIBAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQIBAQIBAQECAQECAQECAQECAQIAAgECAQIBAQECAQECAQECAQIBAQIBAgEBAgEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQECAQECAQEBAQECAQIBAQECAQIBAQIBAQIBAgECAQIBAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAgEBAQIBAQECAQEBAQEBAQECAQEBAQECAQIBAQIBAQEBAQEBAQIBAQEBAQIBAQEBAQEBAgEBAQEBAgEBAQEBAgEBAQIBAgECAQIBAQEBAQEBAgEBAQECAQEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQIBAQEBAQIBAQEBAQECAQEBAQIBAQEBAQECAQEBAQIBAQEBAgEBAQEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQECAQEBAQEBAgEBAQECAQEBAQEBAgEBAQECAQEBAQEBAgEBAQEBAQEBAQECAQIBAgIBAQEBAQEBAQEBAQEBAQEBAgEBAQEBAQIAAQECAQEBAgECAQIBAQEBAgIBAQEBAgEBAQECAQEBAQIBAQEBAgEBAQIBAgECAQIBAAECAQEBAQEBAQECAQEBAQEBAgEBAQEBAQIBAQEBAQEBAQEBAgEBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAgEBAQIBAQEBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQIBAQEBAgECAQIBAQEBAgIBAQIBAQEBAQECAQEBAQIBAQEBAgEBAQIAAQECAQEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAgEBAQIBAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQEBAQECAQIBAQIBAgEBAgEBAgEBAgECAQECAQIBAgEBAQIBAQICAQIBAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAgEBAgECAQIBAgEBAgECAQIBAgEBAgECAQECAQIBAgECAQIBAQIBAgECAQIBAQIBAgECAQICAAIBAgEBAgECAQIBAgEBAgECAQIBAgECAgEBAQIBAgEBAgECAQIBAgEBAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAgEBAgECAgECAQEBAQECAQICAQEBAgIBAgECAQECAQIBAgIBAQICAQECAQIBAgECAQIBAgECAQIBAQECAQIBAgIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgEBAgIBAgECAQIBAgEBAgECAQIBAgECAQIBAgECAQICAQIBAgECAQIBAgECAQEBAgECAQIBAgIBAgECAQIBAgECAQIBAgIBAgECAQIBAgECAQICAQIBAgECAQIBAgECAgECAQIBAgECAQIBAgECAQICAQIBAgIBAQIBAQECAQIBAQIBAQIBAgEBAgEBAgECAQECAQIBAgEBAQIBAQIBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQIBAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAgECAQECAQIBAgECAQICAAIBAgEBAgECAQIBAgEBAgECAQIBAgECAQIBAQIBAgEBAgECAQIBAgEBAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAgEBAgECAgECAQECAQECAQICAQEBAgIBAgECAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgIBAgECAQIBAQIBAgECAQICAQECAQIBAgECAQIBAgECAQIBAgECAQICAQIBAgECAQIBAgECAQECAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAgECAQECAQIBAgECAQACAQIBAQIBAgECAQIBAQIBAgECAQICAQIBAQECAQIBAQIBAgECAQIBAgACAQECAQIBAgECAQECAQIBAgECAQIBAgECAQECAQECAQIBAgECAQECAQEBAgECAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgIBAgEBAQEBAgECAgEBAgECAQIBAgEBAgECAgECAQIBAgEBAgECAQIBAgECAQIBAgECAQEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAgECAQECAQICAQIBAQIBAgIBAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgEBAgECAQICAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgECAQIBAgECAQIBAgECAgECAQIBAgECAQIBAgIBAQIBAgIBAgECAQIBAgECAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgEBAgIBAgECAQICAQIBAQEBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQEBAgECAQIBAQECAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgEBAQIBAQIBAQIBAgACAQEBAgEBAQIBAQIBAQIBAQECAQEBAQIBAQECAQECAQECAQECAQECAQIBAQECAQECAQECAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAQIBAgEBAQIBAQIBAQIBAgEBAgEBAQIBAQEBAgEBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgECAQEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAgECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgIAAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgIAAQIBAQIBAQECAQECAQECAQEBAgEBAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQECAQECAQECAQECAQECAQECAQECAQEBAQECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAgEBAQACAQEBAQIBAQECAQIBAQIBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAgEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQEBAgEBAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQECAgABAgECAQEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAQEBAgEBAQEBAQEBAQECAQEBAQEAAQIBAQIBAgECAAECAQIBAQIBAgEBAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQECAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQEAAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAgEBAQEBAgEBAQIBAQEBAgECAQECAQIBAQEBAQICAgEBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQECAQECAQECAQECAQECAQECAQECAQIBAQIBAQEBAQIBAQIBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQIBAQIBAgEBAgEBAgEBAgECAQECAQIBAQIBAgECAQIBAQIBAgIBAQEBAgECAQECAQECAQIBAQIBAgEBAgECAQEBAgECAQECAQIBAQIBAgECAQECAQIBAgEBAQIBAgEBAgEBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQECAQIBAgECAQECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAQIBAgEBAgIBAQIBAQECAQIBAQIBAgECAQIBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAQIBAQECAQEBAgEBAQECAQEBAQIBAQECAQECAQECAQIBAQEBAQICAQEBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQECAQECAQECAQECAQECAQECAQECAQIBAQIBAQEBAQIBAgEBAgEBAgEBAgEBAQIBAgEBAgEBAgEBAgEBAgEBAQEBAQIBAQECAQECAQECAQECAQEBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQEBAgEBAgECAQECAQECAQEBAgEBAgEBAgEBAgECAQECAQEBAgEBAgEBAgEBAgEBAgECAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAQIBAgECAQECAQIBAQECAQIBAQIBAgEBAgEBAgECAQIBAQIBAQIBAQIBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgECAQIBAgEBAgECAQIBAQECAQIBAQECAQIBAQIBAgECAQECAQEBAgECAQIBAQIBAgEBAgECAQIBAQIBAgECAQEBAgECAQECAQECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgEBAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQIBAgECAQIBAgECAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgEBAgECAQIBAgEBAgIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgECAgEBAgECAQIBAgEBAQIBAgEBAgEBAgECAQEBAgEBAgEBAgEBAgEBAgEBAgECAAIBAQIBAQIBAQIBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAgEBAgEBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAQIBAQIBAgEBAgECAQECAQIBAgECAQECAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQEBAgECAQECAQIBAgECAQECAgECAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQICAQIBAgEBAgECAQEBAgEBAgEAAQIBAQEBAQEBAgEBAQEBAQIBAQEBAQIBAQIBAQECAQECAQECAQEBAQIBAQIBAQECAQECAQIBAQEBAQECAQEBAQEBAQIBAQEBAQECAQIBAgECAQIBAgEBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQECAQIBAQIBAgECAQIBAgECAQIBAgEBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQECAQIBAQECAQECAQACAQEBAQEBAQECAQEBAQECAQEBAQECAQEBAgEBAQIBAQIBAgEBAQECAQEBAgEBAgEBAQIBAQECAAECAQEBAQEBAQIBAQEBAQIBAgEBAgECAQIBAgEBAQECAQEBAgEBAQIBAQIBAgEBAQECAQEBAgEBAQIBAQICAQECAQEBAgEBAQIBAQIBAgEBAQECAQEBAgEBAQIBAQIBAQECAAEBAgEBAQEBAQECAQEBAQEBAgABAQEBAQEBAgEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQIAAQEBAQABAQECAQEBAQEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQIBAgEBAQEBAgEBAQECAAIBAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAQIBAQEBAgECAAECAQEBAQIAAQECAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAgEBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQECAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQEBAQIBAQEBAQECAQIBAQEBAQIBAQEBAgEBAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQECAQEBAQIBAgABAgEBAQECAgEAAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAQEBAgEBAQIAAgEBAQEBAQIBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAQEBAQECAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAgICAgEBAQIBAQEBAQIBAQECAgACAQECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgIBAQIBAgEBAQICAQIBAQECAQECAQECAQIBAQIBAQIBAgEBAgIBAgECAQIBAQECAQECAQECAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAgEBAQIBAQECAQECAQECAQECAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQECAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQECAQECAQECAgIBAgEBAQEBAgEBAQECAQEBAQEBAgEBAQECAQEBAQECAQEBAQECAQEBAQIBAgABAQIBAQECAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQECAQECAgEBAQIBAQEBAgEBAQECAgIAAgEBAgEBAgEBAgECAQIBAQIBAQIBAQIBAgEBAgECAQECAQICAQIBAQICAAECAgECAQECAQEBAgECAQECAQECAQIBAQIBAQICAgEBAgECAQECAQEBAgECAQICAAIBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAgABAgEBAQECAQEBAQECAQEBAQECAQEBAQIBAQIAAgEBAQIBAQICAQEBAgEBAQECAQEBAQICAgACAQECAQECAQECAQIBAgEBAgEBAgECAQECAQECAQIBAQIBAgIBAgEBAgIAAQICAQIBAQIBAQECAQIBAQIBAgEBAgEBAgEBAgIBAgEBAQIBAQICAQEBAgEBAQECAQEBAQIAAgEBAgEBAgEBAgECAQIBAQIBAQIBAQIBAgEBAgECAQECAQICAQECAQICAAECAgECAQECAQEBAgEBAgECAQECAQIBAQIBAQICAgEBAgECAQECAQEBAgECAQICAAIBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAQIBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAQIBAgEBAgEBAQIBAgECAAIBAgEBAgECAQECAQECAQECAQIBAQIBAQIBAgEBAgECAQIBAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAQIBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAQECAQECAQEBAgEBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQECAQECAQECAQEBAQEAAQECAQEBAQIBAQEBAgECAgIBAQEBAQABAQIBAQEBAQEBAQEBAQABAQIBAQEBAQEBAgEBAQEBAQECAQEBAQEBAgEBAQEBAQECAQEBAQECAQEBAQIBAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQEBAQECAQECAAIBAgEBAgECAQEBAgECAQECAQIBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgEBAgEBAQECAQABAQIBAQEBAQEBAQEBAgABAQIBAQEBAQEBAQIBAQEBAQECAQEBAQEBAQIBAQEBAQEBAgEBAQECAQEBAQIBAQEBAQECAQEBAQECAQABAQIBAQEBAQEBAgEBAQEBAQECAQEBAQEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQECAQEBAQECAQECAAIBAgEBAgECAQEBAgECAQECAQIBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgEBAgEBAQECAQECAQECAQIBAgECAQIBAQEBAQIBAgECAQEBAgECAQECAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQECAQIBAgECAQIBAQIBAgECAQEBAQIBAQIBAgECAQIAAgIBAQIBAgEBAgECAQIBAgEBAgECAgEBAgIBAQICAAIBAgEBAgEBAgEBAgECAQECAQIBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgEBAgECAQECAQIBAgEBAgEBAQECAQECAQECAQIBAgEBAgIBAQIBAQECAQIBAQIBAQIBAgEBAgECAQIBAQIBAgEBAgECAQIBAgECAQECAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAgECAQECAQIBAgEBAQECAQECAQIBAgECAAIBAgECAQIBAQIBAgECAQIBAQIBAgECAQIBAgECAgEBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQEBAgECAQICAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQECAgECAQECAQIBAgECAgEBAgECAQIBAgECAQIBAgECAQIBAgECAgECAQIBAgECAQIBAgEBAgACAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgIBAgECAgECAQIBAgIBAgIBAgECAQIBAQIBAgECAQECAQICAQIBAgECAgECAQICAQIBAgECAgIBAgECAQIBAgIBAgIBAgECAgECAQICAQIBAgECAgECAgECAQICAQIBAgIBAgECAQICAQICAQIBAgIBAgECAQICAQICAQICAQIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAgECAQICAQICAQICAQICAQIBAgICAAICAQICAQICAQIBAgICAQICAQIBAgEBAgIBAgIBAgECAgECAgECAgECAgECAQICAQICAQICAQICAQECAgECAQECAQIBAgICAQICAQICAQICAgECAgECAQICAQECAgECAgEBAgECAQECAQECAQECAQIBAQIBAgECAQIBAQEBAQIBAgECAQEAAgECAQECAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQIBAQIBAgECAQEBAgEBAgIBAQIBAgEBAgECAQIBAQIBAgECAQIBAQIBAgECAQECAQIBAgEBAgECAQIBAgEBAgECAQIBAQIBAgECAQECAQIBAgECAQIBAQIBAgECAQIBAgEBAgECAQIBAAIBAgEBAgECAQECAgACAQIBAgEBAgECAQIBAQIBAgECAQIBAgECAQIBAgEBAQECAQIBAgECAQIBAgEBAgIBAgECAQIBAgEBAQIBAgECAgEBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgIBAgEBAgIBAgEBAgIBAQIBAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgECAQICAQIBAgECAQICAQIBAgECAQIBAgECAgECAQIBAgIBAgIBAgECAQICAQICAQIBAgECAQEBAgIBAgEBAgECAQICAQIBAgIBAgECAgECAQIBAgIBAgIBAgECAQICAQICAQIBAgECAgECAgECAQIBAgIBAgECAgECAgECAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgECAgECAQICAQICAQICAQICAQIBAgIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAQICAQICAQEBAgECAQIBAgECAAICAQECAQIBAQIBAgECAQIBAgEBAgIBAQICAQECAgEBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQEBAgECAQICAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQECAgECAQECAQIBAgECAgEBAgECAQIBAgECAQIBAgECAQIBAgECAgECAQIBAgECAQIBAgEBAgACAQICAQECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgECAgECAQIBAgIBAgECAgECAQICAQIBAgIBAgECAQIBAQIBAgECAQECAQIBAgECAQIBAgECAQIBAgECAQIBAgEBAgECAQICAQIBAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAQIBAgECAgECAQIBAQIBAgECAQIBAgECAQIBAgECAgECAQIBAgECAQIBAgECAgACAQICAQECAQIBAgIBAgECAgEBAgECAgECAQIBAgECAQICAQIBAgECAgEBAgECAgECAQIBAgECAQICAQIBAgECAQIBAgECAgECAQIBAgECAQICAQIBAgECAQICAQIBAgECAQIBAgIBAgECAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgEBAQIBAgECAQECAQICAQIBAgECAgECAgECAQIBAgIBAgIBAgECAQICAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgECAQICAQICAQICAQICAQIBAgIBAgIBAgIBAgECAgECAgECAgECAQICAQICAQICAQICAQICAQICAQECAQICAgECAQICAQICAQICAgEBAgECAQICAQIBAgECAgECAQICAQIBAgIBAgIBAgECAQICAQIBAgIBAgECAgECAQICAQIBAgIBAgECAgECAQICAQIBAgIBAgECAQICAQICAQIBAgIBAgECAgECAQICAQICAQICAQIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAQICAQICAQICAQICAQICAQICAAICAQICAgECAQICAQICAQICAgEBAgECAQIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAgECAQICAQECAQICAgECAQICAQICAQICAgEBAgECAQIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAgECAQICAgEBAgICAgEBAgECAgECAgECAgECAgICAQIBAgICAQIBAgIBAgECAQICAQECAQEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAQIBAQEBAQIBAQECAQEBAgECAQIBAgIBAgIBAgIBAgIBAgECAgECAgECAgECAgECAQICAgEBAgICAQICAgIBAQIBAgIBAgIBAgIBAgICAQIBAQIBAgIBAgIBAgIBAgICAgECAQIBAgIBAQECAQEBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQIBAQECAQEBAQEBAQIBAQEBAQIBAQEBAgEBAQABAQEBAgEBAQEBAQEBAQEBAQEBAQEBAgEBAQEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAgEBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQEBAgEBAQECAQEBAQIBAQIBAgEBAAEBAQIBAQEBAQEBAQEBAQEBAQEBAgEBAQECAQEBAgEBAgECAgECAAIBAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQIBAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQECAQECAQIBAQIBAQEBAQIBAQECAQEBAQIBAgEBAQIBAQEBAAEBAgECAQECAQECAQECAQICAQECAQECAQECAQECAQIBAQECAQECAQECAQIBAQIBAgACAQECAQIBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAQICAQEBAgECAQECAQIBAQIBAQIBAQIBAgEBAgEBAgECAQIBAQIBAQIBAgEBAgECAgACAQICAQEBAQIBAgECAAECAQECAQECAQECAQECAQICAQECAQECAQECAQECAQIBAQECAQECAQECAQIBAQIBAgACAQECAQIBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAgECAQEBAgECAQECAQIBAQIBAQIBAQIBAgEBAgEBAgECAQIBAQIBAQIBAgEBAgECAgACAQIBAgEBAgEBAgEBAgIBAgIBAQECAgEBAQIBAQIBAQIBAgEBAgECAAIBAQIBAgECAQECAQEBAgECAQECAQIBAgEBAgEBAgEBAgIBAQECAQIBAQIBAgEBAgEBAQIBAgEBAgECAQECAQIBAQIBAQIBAgEBAgECAQICAAIBAgIBAQEBAgECAQIBAQEBAQEBAgEBAQEBAgEBAQECAQEBAQEBAgEBAQECAQEBAQECAQEBAQECAQEBAgEBAQEBAQECAQEBAQECAQEBAgEBAQECAAIBAQEBAgEBAQEBAgEBAQECAQEBAgEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAgEBAQEBAQECAQEBAQIBAQEBAgEBAQEBAgEBAQIBAQEBAgEBAQIBAQECAQEBAgEBAQEAAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAgEBAAIBAQECAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAgEBAQECAQEBAQIBAQEBAgEBAQIBAQECAQEBAgEBAQIBAQIBAQECAQECAQEBAgEBAQEBAgEBAQIBAQIBAQECAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgECAQEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAgEBAQIBAQIBAQIBAgEBAgEBAQIBAQICAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgEBAgECAQECAgEBAQIBAQIBAgEBAgICAAIBAQIBAgECAgEBAgEBAgEBAgIBAQECAQECAQIBAQIBAgEBAgEBAgEBAgECAQECAQIBAQIBAQIBAgECAQECAQECAQEBAQIBAgEBAgEBAgECAQEAAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAgEBAgEBAgECAQIBAQIBAgEBAgEBAgECAQIBAQIBAQIBAgEBAgECAgACAQICAQEBAQIBAgECAgEBAQEBAQIBAQEBAQECAQEBAQIBAQEBAQECAQEBAgEBAQEBAQIBAQEBAgEBAQECAQEBAQEBAgEBAQEBAgEBAQECAQEBAgEBAQEBAQECAQEBAQECAQEBAgEBAQIBAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQECAQEBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQECAQEBAgIAAQIBAQEBAQIBAQECAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAgEBAAIBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAQEBAgEBAQIBAQIBAQECAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQECAQECAQEBAQIBAQIBAQECAQEBAgEBAQIBAgEBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQECAQECAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAQIBAQIBAgEBAgEBAQIBAQICAQEBAgECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAgEBAQIBAQIBAgEBAgICAQEBAgEBAgECAQEBAQIBAQIBAQEBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAQIBAQECAQEBAQEBAgEBAQECAQEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQECAQEBAQEBAQIBAQEBAQIBAQECAQEBAgEBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQIBAQECAgABAgEBAQEBAgEBAQIBAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAgEBAQECAQECAQEBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAQIBAQECAQEBAQEBAgEBAQECAQEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQECAQEBAQEBAQIBAQEBAgEBAQECAQEBAgEBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAgEBAQEBAQECAQEBAQECAQEBAQIBAQECAQEBAQECAQEBAgEBAQECAQECAQEBAQIBAQECAgABAgEBAQEBAgEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQECAQECAQEBAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQECAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQACAQEBAgEBAQECAQECAQEBAQIBAQEBAgEBAQIBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQECAQEBAQIBAQIBAQECAQECAQEBAgEBAQIBAQEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgEBAgEBAgEBAgEBAQECAQEBAgEBAgEBAQIBAQECAQIBAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQECAQIBAQIBAQECAQECAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQIBAgECAQIBAQECAQECAQIBAQICAgEBAQIBAQIBAgIBAQIBAQIBAQICAQEBAgECAQECAQECAQIBAQIBAgEBAgEBAgECAQECAQECAQIBAQIBAgEBAgEBAgIAAgEBAgEBAgECAQIBAQEBAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQECAQEBAgECAQIBAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQECAQECAQECAQIAAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgECAQECAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgECAQEBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgIBAQECAQECAQECAQIBAQEBAgEBAgECAgEBAgEBAgEBAgIBAQIBAQIBAQIBAQEBAgEBAgECAQECAQIBAQIBAQIBAgEBAgECAQECAQIBAQIBAQIBAgEBAgEBAgECAQECAQIBAQIBAQICAAIBAQIBAQIBAgECAQEBAAIBAQECAQEBAQIBAQIBAQEBAgEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQIBAgECAQEBAQECAQECAQEBAgECAgEBAgEBAgIBAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQECAQEBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQEBAQIBAQEBAgEBAQIBAQEBAgEBAgECAQIBAQEBAQIBAQECAQEBAgECAQIBAQEBAgECAQIBAQIBAQIBAgEBAgECAQECAQECAQIBAQIBAQICAQEBAQIBAQIBAgECAQEBAQEBAQECAQEBAgEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQIBAgECAQEBAQIBAQECAQECAQECAgEBAgECAQIBAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQECAQEBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQEBAQIBAQEBAgEBAQIBAQEBAgEBAgECAQIBAQEBAQIBAQECAQEBAgECAQIBAQEBAgIBAAIBAQIBAQECAQEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAgEBAQEBAgEBAQIBAQECAQEBAgEBAQECAQECAQIBAgECAQEBAQIBAQIBAQECAQEBAgECAQEBAQIBAQIBAQECAQEBAgIBAQECAQIBAgECAQEAAgEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQECAQECAQEBAgECAQIBAQEBAgEBAQIBAQIBAQICAQEBAgECAgEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQEBAgEBAQEBAgEBAQECAQEBAgEBAQECAQECAQIBAgEBAQEBAgEBAQIBAQEBAgIBAgEBAQECAgEAAgEBAgEBAQIBAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQECAQECAQEBAQECAQEBAgEBAQIBAQECAQEBAQIBAQIBAQICAQIBAQEBAgEBAQIBAQIBAQECAQIBAQEBAgEBAgEBAQIBAQECAgEBAQEBAgEBAgEBAgEBAQECAQECAQECAQECAQEBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQECAQECAQIBAQEBAgEBAQIBAQECAQECAQECAQEBAQIBAQIBAQECAQECAQECAQIBAQEBAQIBAQEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAgEBAQIBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAgIAAgEBAgEBAgEBAgEBAgEBAQECAQICAQEBAQEBAQIBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQIBAQECAQEBAQECAQIBAQICAQEBAQEBAgEBAgEBAQECAgECAQEBAQICAQACAQECAQEBAgEBAQECAQEBAQIBAQEBAgEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAgECAQIBAgEBAQECAQECAQEBAgEBAQIBAgEBAQECAQECAQEBAgEBAQICAQEBAQECAQECAQECAQEBAQIBAQIBAQIBAQIBAQEBAgEBAQIBAQECAQECAQECAQEBAgEBAQIBAQIBAgEBAQECAQEBAgEBAQIBAQIBAQIBAQEBAgEBAgEBAgEBAQIBAQIBAgEBAQEBAgEBAQECAQECAQECAQECAQECAQECAQECAQECAQEBAgECAQEBAgEBAgECAQEBAgEBAgEBAgECAQEBAgEBAgEBAgECAAECAQIAAgEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQEBAQIBAQECAQEBAgEBAQECAQEBAgEBAQIBAgECAQEBAQIBAQECAQEBAgEBAgEBAgEBAQECAQECAQEBAgEBAgEBAAIBAQIBAQIBAQIBAQEBAgEBAQIBAgEBAgEBAQECAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQEBAgEBAgEBAgEBAgEBAQECAQECAQEBAgEBAgEBAgEBAgEBAQECAQEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQECAQECAQIBAQECAQECAQECAQECAQECAQECAQECAQECAQECAQECAQICAAIBAQIBAQIBAQIBAQIBAQEBAQECAQEBAQECAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAgEBAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQIBAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQEBAgEBAQEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQEBAgECAQECAQECAQECAQEBAQIBAQECAQECAQIBAQEBAQIBAQIBAQECAQECAQECAQEBAQIBAQIBAQIBAQIBAQEBAgEBAQIBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQIBAQIBAgEBAQIBAQEBAgEBAgEBAgEBAQIBAQIBAgEBAgEBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAQIBAgIAAgEBAgEBAgEBAgEBAgEBAQEBAQECAQEBAQECAQEBAQEBAQECAQEBAQECAQEBAQIBAQEBAgEBAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQECAQEBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQECAQEBAQEBAgEBAQECAQEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQECAQACAQECAQECAQECAQECAQEBAQEBAgEBAQEBAgEBAQICAAEBAgEBAQEBAQIBAQEBAgEBAQIBAQEBAQEBAgEBAQEBAgEBAQIAAgEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQIBAQECAQEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQECAQECAQEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQEBAgEBAQIBAQIBAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAQIBAQECAQECAQEBAQECAQEBAQECAQEBAgEBAQECAQEBAQECAQEBAQIBAQECAQEBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAQECAQEBAQECAQEBAQEBAgEBAQEBAgEBAQECAQEBAQECAQEBAQECAQEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQECAQEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQIBAQECAQEBAgEBAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAQEBAgEBAQIBAQEBAgEBAQIBAQEBAgEBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQEBAQECAQEBAgEBAQECAQEBAgEBAQECAQECAQEBAQIBAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQIBAQECAQEBAgEBAgEBAQIBAQECAQEBAgEBAQIBAQIBAQEBAgEBAgEBAQIBAQECAQECAQEBAgEBAQIBAQECAQEBAgEBAgEBAQIBAQIBAQECAQECAQEBAQIBAQECAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQECAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAQIBAQEBAgEBAQEBAQIBAQEBAgEBAQEBAgEBAQEBAgEBAQEBAgEBAQECAQEBAgEBAQECAQEBAgEBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQIBAQECAQEBAQI="},"xaxis":"x","y":["Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 0","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 1","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 2","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 3","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 4","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 5","Req 6","Req 6","Req 6","Req 6","Req 6","Req 6","Req 6","Req 6","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 7","Req 8","Req 8","Req 8","Req 8","Req 8","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 9","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 11","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 15","Req 15","Req 15","Req 15","Req 15","Req 15","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 16","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 17","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 18","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 19","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 20","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 21","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 22","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 23","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 24","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 25","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 26","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 27","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 28","Req 29","Req 29","Req 29","Req 30","Req 30","Req 30","Req 30","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 31","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 32","Req 33","Req 33","Req 33","Req 33","Req 33","Req 33","Req 33","Req 33","Req 33","Req 33","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 34","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 35","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 36","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 37","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 38","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 39","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 40","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 41","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 42","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 43","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 44","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 45","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 46","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 47","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 48","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 49","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 50","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 51","Req 52","Req 52","Req 52","Req 52","Req 52","Req 52","Req 52","Req 52","Req 52","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 53","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 54","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 55","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 57","Req 57","Req 57","Req 57","Req 57","Req 57","Req 57","Req 57","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 58","Req 59","Req 59","Req 59","Req 59","Req 59","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 61","Req 61","Req 61","Req 61","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 63","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 65","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 66","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 67","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 68","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 69","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 70","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 71","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 72","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 74","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 75","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 77","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 78","Req 79","Req 79","Req 79","Req 79","Req 79","Req 79","Req 79","Req 79","Req 79","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 80","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 81","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 82","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 83","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 84","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 85","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 86","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 87","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 88","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 89","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 90","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 91","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 92","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 93","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 94","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 95","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 96","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 97","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 98","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99","Req 99"],"yaxis":"y","type":"bar"},{"base":["2026-02-24T00:00:00.034000","2026-02-24T00:00:00.038000","2026-02-24T00:00:00.043000","2026-02-24T00:00:00.047000","2026-02-24T00:00:00.141000","2026-02-24T00:00:00.329000","2026-02-24T00:00:00.389000","2026-02-24T00:00:00.514000","2026-02-24T00:00:00.566000","2026-02-24T00:00:00.808000","2026-02-24T00:00:00.329000","2026-02-24T00:00:00.389000","2026-02-24T00:00:00.515000","2026-02-24T00:00:00.389000","2026-02-24T00:00:00.515000","2026-02-24T00:00:00.566000","2026-02-24T00:00:00.808000","2026-02-24T00:00:01.220000","2026-02-24T00:00:01.465000","2026-02-24T00:00:01.565000","2026-02-24T00:00:01.847000","2026-02-24T00:00:01.853000","2026-02-24T00:00:01.883000","2026-02-24T00:00:01.891000","2026-02-24T00:00:01.565000","2026-02-24T00:00:01.847000","2026-02-24T00:00:01.853000","2026-02-24T00:00:01.883000","2026-02-24T00:00:01.891000","2026-02-24T00:00:01.894000","2026-02-24T00:00:01.904000","2026-02-24T00:00:01.908000","2026-02-24T00:00:01.910000","2026-02-24T00:00:02.055000","2026-02-24T00:00:01.853000","2026-02-24T00:00:01.883000","2026-02-24T00:00:01.891000","2026-02-24T00:00:01.893000","2026-02-24T00:00:01.904000","2026-02-24T00:00:01.908000","2026-02-24T00:00:01.910000","2026-02-24T00:00:01.883000","2026-02-24T00:00:01.891000","2026-02-24T00:00:01.894000","2026-02-24T00:00:01.904000","2026-02-24T00:00:01.908000","2026-02-24T00:00:01.910000","2026-02-24T00:00:02.055000","2026-02-24T00:00:02.089000","2026-02-24T00:00:02.208000","2026-02-24T00:00:02.218000","2026-02-24T00:00:02.225000","2026-02-24T00:00:01.904000","2026-02-24T00:00:01.908000","2026-02-24T00:00:02.055000","2026-02-24T00:00:02.208000","2026-02-24T00:00:02.218000","2026-02-24T00:00:02.225000","2026-02-24T00:00:02.448000","2026-02-24T00:00:03.108000","2026-02-24T00:00:03.609000","2026-02-24T00:00:03.108000","2026-02-24T00:00:03.610000","2026-02-24T00:00:03.729000","2026-02-24T00:00:03.901000","2026-02-24T00:00:04.063000","2026-02-24T00:00:03.901000","2026-02-24T00:00:04.063000","2026-02-24T00:00:04.130000","2026-02-24T00:00:04.226000","2026-02-24T00:00:04.327000","2026-02-24T00:00:04.535000","2026-02-24T00:00:04.535000","2026-02-24T00:00:04.535000","2026-02-24T00:00:05.093000","2026-02-24T00:00:05.093000","2026-02-24T00:00:05.218000","2026-02-24T00:00:05.222000","2026-02-24T00:00:05.093000","2026-02-24T00:00:05.218000","2026-02-24T00:00:05.222000","2026-02-24T00:00:05.268000","2026-02-24T00:00:05.268000","2026-02-24T00:00:05.340000","2026-02-24T00:00:05.459000","2026-02-24T00:00:05.463000","2026-02-24T00:00:05.487000","2026-02-24T00:00:05.459000","2026-02-24T00:00:05.463000","2026-02-24T00:00:05.487000","2026-02-24T00:00:05.487000","2026-02-24T00:00:05.548000","2026-02-24T00:00:05.593000","2026-02-24T00:00:05.618000","2026-02-24T00:00:05.593000","2026-02-24T00:00:05.593000","2026-02-24T00:00:05.593000","2026-02-24T00:00:06.246000","2026-02-24T00:00:06.270000","2026-02-24T00:00:06.290000","2026-02-24T00:00:06.407000","2026-02-24T00:00:06.290000","2026-02-24T00:00:06.407000","2026-02-24T00:00:06.538000","2026-02-24T00:00:06.553000","2026-02-24T00:00:06.718000","2026-02-24T00:00:06.553000","2026-02-24T00:00:06.718000","2026-02-24T00:00:06.765000","2026-02-24T00:00:06.807000","2026-02-24T00:00:06.954000","2026-02-24T00:00:06.966000","2026-02-24T00:00:07.174000","2026-02-24T00:00:07.187000","2026-02-24T00:00:07.190000","2026-02-24T00:00:07.421000","2026-02-24T00:00:07.423000","2026-02-24T00:00:07.463000","2026-02-24T00:00:07.465000","2026-02-24T00:00:07.526000","2026-02-24T00:00:07.528000","2026-02-24T00:00:07.582000","2026-02-24T00:00:07.608000","2026-02-24T00:00:06.538000","2026-02-24T00:00:06.553000","2026-02-24T00:00:06.718000","2026-02-24T00:00:06.764000","2026-02-24T00:00:06.807000","2026-02-24T00:00:06.954000","2026-02-24T00:00:06.966000","2026-02-24T00:00:07.174000","2026-02-24T00:00:07.187000","2026-02-24T00:00:07.190000","2026-02-24T00:00:07.421000","2026-02-24T00:00:06.718000","2026-02-24T00:00:06.765000","2026-02-24T00:00:06.807000","2026-02-24T00:00:06.954000","2026-02-24T00:00:06.966000","2026-02-24T00:00:07.174000","2026-02-24T00:00:07.187000","2026-02-24T00:00:06.954000","2026-02-24T00:00:06.965000","2026-02-24T00:00:07.174000","2026-02-24T00:00:07.187000","2026-02-24T00:00:07.190000","2026-02-24T00:00:07.421000","2026-02-24T00:00:07.423000","2026-02-24T00:00:07.463000","2026-02-24T00:00:07.421000","2026-02-24T00:00:07.423000","2026-02-24T00:00:07.463000","2026-02-24T00:00:07.465000","2026-02-24T00:00:07.526000","2026-02-24T00:00:07.463000","2026-02-24T00:00:07.465000","2026-02-24T00:00:07.526000","2026-02-24T00:00:07.608000","2026-02-24T00:00:07.526000","2026-02-24T00:00:07.608000","2026-02-24T00:00:08.318000","2026-02-24T00:00:08.322000","2026-02-24T00:00:08.363000","2026-02-24T00:00:08.401000","2026-02-24T00:00:08.470000","2026-02-24T00:00:08.522000","2026-02-24T00:00:08.363000","2026-02-24T00:00:08.401000","2026-02-24T00:00:08.470000","2026-02-24T00:00:08.401000","2026-02-24T00:00:08.470000","2026-02-24T00:00:08.522000","2026-02-24T00:00:08.538000","2026-02-24T00:00:08.540000","2026-02-24T00:00:08.741000","2026-02-24T00:00:08.802000","2026-02-24T00:00:08.902000","2026-02-24T00:00:09.268000","2026-02-24T00:00:09.284000","2026-02-24T00:00:09.326000","2026-02-24T00:00:08.400000","2026-02-24T00:00:08.522000","2026-02-24T00:00:08.540000","2026-02-24T00:00:08.802000","2026-02-24T00:00:08.902000","2026-02-24T00:00:09.268000","2026-02-24T00:00:08.537000","2026-02-24T00:00:08.802000","2026-02-24T00:00:08.902000","2026-02-24T00:00:09.268000","2026-02-24T00:00:09.284000","2026-02-24T00:00:09.326000","2026-02-24T00:00:09.393000","2026-02-24T00:00:09.284000","2026-02-24T00:00:09.326000","2026-02-24T00:00:09.393000","2026-02-24T00:00:09.477000","2026-02-24T00:00:09.393000","2026-02-24T00:00:09.477000","2026-02-24T00:00:09.477000","2026-02-24T00:00:09.911000","2026-02-24T00:00:09.919000","2026-02-24T00:00:09.911000","2026-02-24T00:00:09.919000","2026-02-24T00:00:09.911000","2026-02-24T00:00:09.919000","2026-02-24T00:00:10.311000","2026-02-24T00:00:09.919000","2026-02-24T00:00:10.311000","2026-02-24T00:00:10.311000"],"customdata":[[13,120,"00:00:00.000","00:00:00.192","00:00:00.034","00:00:00.038","0.004s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.038","00:00:00.043","0.004s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.043","00:00:00.047","0.004s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.047","00:00:00.051","0.004s"],[13,120,"00:00:00.000","00:00:00.192","00:00:00.141","00:00:00.145","0.004s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.329","00:00:00.333","0.004s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.389","00:00:00.393","0.004s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.514","00:00:00.518","0.004s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.566","00:00:00.570","0.004s"],[25,770,"00:00:00.135","00:00:01.104","00:00:00.808","00:00:00.812","0.004s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.329","00:00:00.333","0.004s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.389","00:00:00.393","0.004s"],[27,233,"00:00:00.236","00:00:00.530","00:00:00.515","00:00:00.518","0.003s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.389","00:00:00.393","0.004s"],[10,194,"00:00:00.322","00:00:00.565","00:00:00.515","00:00:00.518","0.004s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.566","00:00:00.570","0.004s"],[29,741,"00:00:00.495","00:00:01.467","00:00:00.808","00:00:00.812","0.004s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.220","00:00:01.223","0.004s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.465","00:00:01.467","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.565","00:00:01.568","0.004s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.847","00:00:01.851","0.004s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.853","00:00:01.857","0.004s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.883","00:00:01.885","0.002s"],[16,392,"00:00:01.384","00:00:01.894","00:00:01.891","00:00:01.894","0.002s"],[372,84,"00:00:01.466","00:00:01.579","00:00:01.565","00:00:01.568","0.004s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.847","00:00:01.851","0.004s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.853","00:00:01.857","0.004s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.883","00:00:01.885","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.891","00:00:01.894","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.894","00:00:01.896","0.003s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.904","00:00:01.906","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.908","00:00:01.910","0.002s"],[403,396,"00:00:01.557","00:00:02.091","00:00:01.910","00:00:01.913","0.003s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.055","00:00:02.057","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.853","00:00:01.856","0.004s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.883","00:00:01.885","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.891","00:00:01.893","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.893","00:00:01.896","0.003s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.904","00:00:01.906","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.908","00:00:01.910","0.002s"],[9,45,"00:00:01.840","00:00:01.916","00:00:01.910","00:00:01.913","0.003s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.883","00:00:01.885","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.891","00:00:01.894","0.003s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.894","00:00:01.896","0.003s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.904","00:00:01.906","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.908","00:00:01.910","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:01.910","00:00:01.913","0.003s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.055","00:00:02.057","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.089","00:00:02.092","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.208","00:00:02.210","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.218","00:00:02.220","0.002s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.225","00:00:02.229","0.004s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.904","00:00:01.906","0.002s"],[770,9,"00:00:01.858","00:00:01.910","00:00:01.908","00:00:01.910","0.002s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.055","00:00:02.057","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.208","00:00:02.210","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.218","00:00:02.220","0.002s"],[8,277,"00:00:02.054","00:00:02.397","00:00:02.225","00:00:02.229","0.004s"],[14,334,"00:00:02.219","00:00:02.613","00:00:02.448","00:00:02.450","0.002s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.108","00:00:03.112","0.005s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.609","00:00:03.612","0.002s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.108","00:00:03.112","0.005s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.610","00:00:03.612","0.002s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.729","00:00:03.734","0.005s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.901","00:00:03.905","0.004s"],[344,450,"00:00:03.408","00:00:04.115","00:00:04.063","00:00:04.066","0.003s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.901","00:00:03.905","0.004s"],[10,226,"00:00:03.803","00:00:04.118","00:00:04.063","00:00:04.066","0.003s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.130","00:00:04.134","0.004s"],[4,146,"00:00:04.058","00:00:04.239","00:00:04.226","00:00:04.230","0.004s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.327","00:00:04.332","0.004s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.535","00:00:04.540","0.005s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.535","00:00:04.540","0.005s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.535","00:00:04.540","0.005s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.093","00:00:05.095","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.093","00:00:05.096","0.002s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.218","00:00:05.222","0.004s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.222","00:00:05.226","0.004s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.093","00:00:05.096","0.002s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.218","00:00:05.222","0.004s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.222","00:00:05.226","0.004s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.268","00:00:05.270","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.268","00:00:05.270","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.340","00:00:05.343","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.459","00:00:05.461","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.463","00:00:05.465","0.002s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.487","00:00:05.491","0.004s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.459","00:00:05.461","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.463","00:00:05.465","0.002s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.487","00:00:05.491","0.004s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.487","00:00:05.491","0.004s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.548","00:00:05.550","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.593","00:00:05.595","0.002s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.618","00:00:05.620","0.002s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.593","00:00:05.596","0.002s"],[147,129,"00:00:05.497","00:00:05.723","00:00:05.593","00:00:05.596","0.002s"],[481,26,"00:00:05.538","00:00:05.618","00:00:05.593","00:00:05.596","0.002s"],[331,11,"00:00:06.227","00:00:06.256","00:00:06.246","00:00:06.250","0.004s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.270","00:00:06.274","0.004s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.290","00:00:06.295","0.004s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.407","00:00:06.409","0.002s"],[45,78,"00:00:06.264","00:00:06.364","00:00:06.290","00:00:06.294","0.004s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.407","00:00:06.409","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.538","00:00:06.540","0.002s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.553","00:00:06.557","0.004s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.718","00:00:06.720","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.553","00:00:06.557","0.004s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.718","00:00:06.720","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.765","00:00:06.769","0.004s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.807","00:00:06.811","0.004s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.954","00:00:06.956","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.966","00:00:06.969","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.174","00:00:07.177","0.003s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.187","00:00:07.189","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.190","00:00:07.192","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.421","00:00:07.423","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.423","00:00:07.426","0.003s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.463","00:00:07.465","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.465","00:00:07.469","0.004s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.526","00:00:07.528","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.528","00:00:07.530","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.582","00:00:07.584","0.002s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.608","00:00:07.610","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.538","00:00:06.540","0.002s"],[93,123,"00:00:06.498","00:00:06.688","00:00:06.553","00:00:06.558","0.004s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.718","00:00:06.720","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.764","00:00:06.769","0.004s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.807","00:00:06.811","0.004s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.954","00:00:06.956","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.966","00:00:06.968","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.174","00:00:07.177","0.003s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.187","00:00:07.189","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.190","00:00:07.192","0.002s"],[22,559,"00:00:06.548","00:00:07.423","00:00:07.421","00:00:07.423","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.718","00:00:06.720","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.765","00:00:06.769","0.004s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.807","00:00:06.811","0.004s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.954","00:00:06.956","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:06.966","00:00:06.969","0.002s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.174","00:00:07.177","0.003s"],[16,316,"00:00:06.677","00:00:07.189","00:00:07.187","00:00:07.189","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.954","00:00:06.956","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:06.965","00:00:06.967","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.174","00:00:07.177","0.003s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.187","00:00:07.189","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.190","00:00:07.192","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.421","00:00:07.423","0.002s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.423","00:00:07.426","0.003s"],[32,417,"00:00:06.717","00:00:07.465","00:00:07.463","00:00:07.465","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.421","00:00:07.423","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.423","00:00:07.426","0.003s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.463","00:00:07.465","0.002s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.465","00:00:07.469","0.004s"],[689,206,"00:00:06.767","00:00:07.528","00:00:07.526","00:00:07.528","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.463","00:00:07.465","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.465","00:00:07.469","0.004s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.526","00:00:07.528","0.002s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.608","00:00:07.610","0.002s"],[9,42,"00:00:06.966","00:00:07.538","00:00:07.526","00:00:07.528","0.002s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.608","00:00:07.610","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.318","00:00:08.322","0.004s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.322","00:00:08.327","0.004s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.363","00:00:08.365","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.401","00:00:08.403","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.470","00:00:08.472","0.002s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.522","00:00:08.524","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.363","00:00:08.365","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.401","00:00:08.403","0.002s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.470","00:00:08.472","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.401","00:00:08.403","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.470","00:00:08.472","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.522","00:00:08.524","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.538","00:00:08.540","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.540","00:00:08.543","0.004s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.741","00:00:08.744","0.002s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.802","00:00:08.806","0.004s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.902","00:00:08.906","0.004s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.268","00:00:09.270","0.003s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.284","00:00:09.286","0.003s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.326","00:00:09.329","0.004s"],[348,67,"00:00:08.362","00:00:08.470","00:00:08.400","00:00:08.403","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.522","00:00:08.524","0.002s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.540","00:00:08.543","0.004s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.802","00:00:08.806","0.003s"],[82,628,"00:00:08.401","00:00:09.284","00:00:08.902","00:00:08.906","0.004s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.268","00:00:09.270","0.003s"],[467,11,"00:00:08.414","00:00:08.539","00:00:08.537","00:00:08.539","0.002s"],[11,296,"00:00:08.514","00:00:08.902","00:00:08.802","00:00:08.806","0.003s"],[24,501,"00:00:08.522","00:00:09.477","00:00:08.902","00:00:08.906","0.004s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.268","00:00:09.270","0.003s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.284","00:00:09.286","0.003s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.326","00:00:09.329","0.004s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.393","00:00:09.397","0.003s"],[387,22,"00:00:08.774","00:00:09.302","00:00:09.284","00:00:09.286","0.003s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.326","00:00:09.329","0.004s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.393","00:00:09.397","0.003s"],[32,237,"00:00:08.866","00:00:09.614","00:00:09.477","00:00:09.479","0.003s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.393","00:00:09.397","0.003s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.477","00:00:09.479","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.477","00:00:09.479","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.911","00:00:09.913","0.002s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.919","00:00:09.921","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.911","00:00:09.913","0.002s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.919","00:00:09.921","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.911","00:00:09.913","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.919","00:00:09.921","0.002s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.311","00:00:10.315","0.003s"],[23,413,"00:00:09.621","00:00:10.311","00:00:09.919","00:00:09.921","0.002s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.311","00:00:10.315","0.003s"],[203,416,"00:00:09.912","00:00:10.574","00:00:10.311","00:00:10.315","0.003s"]],"hovertemplate":"\u003cb\u003e%{y}\u003c\u002fb\u003e\u003cbr\u003eType: %{fullData.name}\u003cbr\u003eStart: %{customdata[4]}\u003cbr\u003eEnd: %{customdata[5]}\u003cbr\u003eDuration: %{customdata[6]}\u003cbr\u003ePrompt Tokens: %{customdata[0]}\u003cbr\u003eOutput Tokens: %{customdata[1]}\u003cbr\u003eRequest Start Time: %{customdata[2]}\u003cbr\u003eRequest End Time: %{customdata[3]}\u003cbr\u003e\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"2ms \u2264 ITL \u003c 5ms","marker":{"color":"#FF7F0E","pattern":{"shape":""}},"name":"2ms \u2264 ITL \u003c 5ms","orientation":"h","showlegend":true,"textposition":"auto","x":{"dtype":"i1","bdata":"BAUEBAQEBAQEBAQEAwQDBAQDAgMEBAIDAwQEAgMCAgIDAgMCAgMCAgMCAwICAgMCAwICBAICAgICBAIEAwQCBQQDBAMEBAUFBQUCAwQEAwQEAgIDAgIEAgIEBAICAgMDAwQEBQIEAgIEAgQCBAQCAwMCAgIDAgQCAgICAgUCBQQCAgMCAgICBAQCAwMCAgIDAgICAwICAwIEAgIEAgICAgQFAgICAgICAgICAgIDAwQEAgIDAwIDBAQCAgQEAgIDBAIDBAIEAgICAgICAgIEAgQE"},"xaxis":"x","y":["Req 0","Req 0","Req 0","Req 0","Req 0","Req 1","Req 1","Req 1","Req 1","Req 1","Req 2","Req 2","Req 2","Req 3","Req 3","Req 5","Req 5","Req 5","Req 10","Req 10","Req 10","Req 10","Req 10","Req 10","Req 11","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 12","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 13","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 14","Req 15","Req 15","Req 16","Req 17","Req 17","Req 17","Req 18","Req 20","Req 20","Req 21","Req 26","Req 26","Req 26","Req 26","Req 28","Req 28","Req 31","Req 31","Req 34","Req 34","Req 35","Req 36","Req 36","Req 39","Req 39","Req 39","Req 40","Req 40","Req 40","Req 40","Req 43","Req 43","Req 43","Req 43","Req 43","Req 44","Req 44","Req 44","Req 45","Req 45","Req 45","Req 45","Req 46","Req 47","Req 48","Req 52","Req 53","Req 53","Req 53","Req 54","Req 55","Req 55","Req 55","Req 55","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 56","Req 58","Req 58","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 60","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 62","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 64","Req 65","Req 65","Req 65","Req 65","Req 65","Req 66","Req 66","Req 66","Req 66","Req 67","Req 70","Req 73","Req 73","Req 73","Req 73","Req 73","Req 73","Req 74","Req 74","Req 74","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 76","Req 77","Req 78","Req 78","Req 78","Req 78","Req 78","Req 79","Req 81","Req 82","Req 82","Req 82","Req 82","Req 82","Req 85","Req 87","Req 87","Req 87","Req 88","Req 88","Req 90","Req 90","Req 90","Req 91","Req 91","Req 94","Req 94","Req 94","Req 95","Req 96","Req 98"],"yaxis":"y","type":"bar"},{"base":["2026-02-24T00:00:00.242000","2026-02-24T00:00:01.389000","2026-02-24T00:00:02.062000","2026-02-24T00:00:02.062000","2026-02-24T00:00:02.062000","2026-02-24T00:00:03.041000","2026-02-24T00:00:03.274000","2026-02-24T00:00:03.341000","2026-02-24T00:00:03.398000","2026-02-24T00:00:03.471000","2026-02-24T00:00:03.274000","2026-02-24T00:00:03.341000","2026-02-24T00:00:03.398000","2026-02-24T00:00:03.471000","2026-02-24T00:00:03.274000","2026-02-24T00:00:03.341000","2026-02-24T00:00:03.398000","2026-02-24T00:00:03.471000","2026-02-24T00:00:03.808000","2026-02-24T00:00:03.868000","2026-02-24T00:00:03.808000","2026-02-24T00:00:03.868000","2026-02-24T00:00:03.868000","2026-02-24T00:00:04.433000","2026-02-24T00:00:04.850000","2026-02-24T00:00:04.973000","2026-02-24T00:00:04.433000","2026-02-24T00:00:04.850000","2026-02-24T00:00:04.973000","2026-02-24T00:00:05.086000","2026-02-24T00:00:05.086000","2026-02-24T00:00:05.086000","2026-02-24T00:00:05.465000","2026-02-24T00:00:05.465000","2026-02-24T00:00:05.723000","2026-02-24T00:00:05.753000","2026-02-24T00:00:05.723000","2026-02-24T00:00:05.753000","2026-02-24T00:00:05.723000","2026-02-24T00:00:05.753000","2026-02-24T00:00:06.504000","2026-02-24T00:00:06.504000","2026-02-24T00:00:06.688000","2026-02-24T00:00:06.700000","2026-02-24T00:00:06.504000","2026-02-24T00:00:06.688000","2026-02-24T00:00:06.700000","2026-02-24T00:00:07.537000","2026-02-24T00:00:07.589000","2026-02-24T00:00:06.688000","2026-02-24T00:00:06.700000","2026-02-24T00:00:07.538000","2026-02-24T00:00:07.589000","2026-02-24T00:00:07.659000","2026-02-24T00:00:07.538000","2026-02-24T00:00:07.659000","2026-02-24T00:00:08.372000","2026-02-24T00:00:08.372000","2026-02-24T00:00:08.372000","2026-02-24T00:00:09.094000","2026-02-24T00:00:09.302000","2026-02-24T00:00:09.094000","2026-02-24T00:00:09.094000","2026-02-24T00:00:09.302000","2026-02-24T00:00:09.301000","2026-02-24T00:00:09.724000","2026-02-24T00:00:09.724000","2026-02-24T00:00:09.780000","2026-02-24T00:00:10.007000","2026-02-24T00:00:09.724000","2026-02-24T00:00:09.780000","2026-02-24T00:00:09.779000","2026-02-24T00:00:10.007000","2026-02-24T00:00:10.033000","2026-02-24T00:00:10.058000","2026-02-24T00:00:10.007000","2026-02-24T00:00:10.033000","2026-02-24T00:00:10.057000","2026-02-24T00:00:10.033000","2026-02-24T00:00:10.058000"],"customdata":[[25,770,"00:00:00.135","00:00:01.104","00:00:00.242","00:00:00.249","0.007s"],[29,741,"00:00:00.495","00:00:01.467","00:00:01.389","00:00:01.395","0.006s"],[403,396,"00:00:01.557","00:00:02.091","00:00:02.062","00:00:02.068","0.006s"],[12,291,"00:00:01.848","00:00:02.242","00:00:02.062","00:00:02.068","0.006s"],[227,173,"00:00:01.860","00:00:02.143","00:00:02.062","00:00:02.068","0.006s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.041","00:00:03.048","0.006s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.274","00:00:03.279","0.005s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.341","00:00:03.347","0.005s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.398","00:00:03.405","0.006s"],[659,587,"00:00:02.860","00:00:03.728","00:00:03.471","00:00:03.477","0.005s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.274","00:00:03.279","0.005s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.341","00:00:03.347","0.005s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.398","00:00:03.404","0.006s"],[59,372,"00:00:03.034","00:00:03.602","00:00:03.471","00:00:03.477","0.005s"],[76,159,"00:00:03.102","00:00:03.341","00:00:03.274","00:00:03.280","0.005s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.341","00:00:03.347","0.006s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.398","00:00:03.405","0.006s"],[744,135,"00:00:03.266","00:00:03.488","00:00:03.471","00:00:03.477","0.005s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.808","00:00:03.813","0.006s"],[344,450,"00:00:03.408","00:00:04.115","00:00:03.868","00:00:03.874","0.006s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.808","00:00:03.813","0.006s"],[9,118,"00:00:03.723","00:00:03.893","00:00:03.868","00:00:03.874","0.006s"],[10,226,"00:00:03.803","00:00:04.118","00:00:03.868","00:00:03.874","0.006s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.433","00:00:04.440","0.008s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.850","00:00:04.858","0.008s"],[317,567,"00:00:04.217","00:00:05.002","00:00:04.973","00:00:04.980","0.007s"],[40,192,"00:00:04.321","00:00:04.587","00:00:04.433","00:00:04.441","0.008s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.850","00:00:04.858","0.008s"],[619,520,"00:00:04.425","00:00:05.187","00:00:04.973","00:00:04.980","0.007s"],[619,520,"00:00:04.425","00:00:05.187","00:00:05.086","00:00:05.093","0.007s"],[16,223,"00:00:04.967","00:00:05.296","00:00:05.086","00:00:05.093","0.007s"],[17,188,"00:00:05.016","00:00:05.290","00:00:05.086","00:00:05.093","0.007s"],[11,276,"00:00:05.215","00:00:05.549","00:00:05.465","00:00:05.470","0.005s"],[7,202,"00:00:05.335","00:00:05.582","00:00:05.465","00:00:05.470","0.005s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.723","00:00:05.729","0.006s"],[281,492,"00:00:05.456","00:00:06.137","00:00:05.753","00:00:05.758","0.006s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.723","00:00:05.729","0.006s"],[12,239,"00:00:05.481","00:00:05.815","00:00:05.753","00:00:05.758","0.006s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.723","00:00:05.729","0.006s"],[770,231,"00:00:05.587","00:00:05.954","00:00:05.753","00:00:05.758","0.006s"],[9,207,"00:00:06.239","00:00:06.516","00:00:06.504","00:00:06.511","0.007s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.504","00:00:06.511","0.007s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.688","00:00:06.694","0.006s"],[222,340,"00:00:06.283","00:00:06.765","00:00:06.700","00:00:06.705","0.005s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.504","00:00:06.511","0.007s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.688","00:00:06.694","0.006s"],[802,768,"00:00:06.399","00:00:07.610","00:00:06.700","00:00:06.705","0.005s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.537","00:00:07.543","0.005s"],[802,768,"00:00:06.399","00:00:07.610","00:00:07.589","00:00:07.595","0.006s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.688","00:00:06.694","0.006s"],[22,559,"00:00:06.548","00:00:07.423","00:00:06.700","00:00:06.705","0.005s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.538","00:00:07.543","0.005s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.589","00:00:07.595","0.006s"],[178,151,"00:00:06.954","00:00:07.669","00:00:07.659","00:00:07.666","0.007s"],[628,25,"00:00:07.163","00:00:07.574","00:00:07.538","00:00:07.543","0.005s"],[42,159,"00:00:07.582","00:00:07.782","00:00:07.659","00:00:07.666","0.007s"],[515,267,"00:00:08.162","00:00:08.538","00:00:08.372","00:00:08.378","0.006s"],[761,139,"00:00:08.308","00:00:08.522","00:00:08.372","00:00:08.378","0.006s"],[113,768,"00:00:08.348","00:00:09.374","00:00:08.372","00:00:08.378","0.006s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.094","00:00:09.099","0.005s"],[113,768,"00:00:08.348","00:00:09.374","00:00:09.302","00:00:09.307","0.005s"],[82,628,"00:00:08.401","00:00:09.284","00:00:09.094","00:00:09.099","0.005s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.094","00:00:09.099","0.005s"],[24,501,"00:00:08.522","00:00:09.477","00:00:09.302","00:00:09.307","0.005s"],[258,26,"00:00:08.855","00:00:09.326","00:00:09.301","00:00:09.307","0.005s"],[28,350,"00:00:08.959","00:00:09.780","00:00:09.724","00:00:09.730","0.006s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.724","00:00:09.730","0.006s"],[248,490,"00:00:09.290","00:00:10.033","00:00:09.780","00:00:09.786","0.006s"],[248,490,"00:00:09.290","00:00:10.033","00:00:10.007","00:00:10.012","0.005s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.724","00:00:09.730","0.006s"],[35,408,"00:00:09.410","00:00:10.007","00:00:09.780","00:00:09.786","0.006s"],[27,512,"00:00:09.599","00:00:10.388","00:00:09.779","00:00:09.786","0.006s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.007","00:00:10.012","0.006s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.033","00:00:10.039","0.005s"],[27,512,"00:00:09.599","00:00:10.388","00:00:10.058","00:00:10.063","0.005s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.007","00:00:10.012","0.005s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.033","00:00:10.039","0.006s"],[23,413,"00:00:09.621","00:00:10.311","00:00:10.057","00:00:10.063","0.006s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.033","00:00:10.039","0.005s"],[58,526,"00:00:09.717","00:00:10.670","00:00:10.058","00:00:10.063","0.005s"]],"hovertemplate":"\u003cb\u003e%{y}\u003c\u002fb\u003e\u003cbr\u003eType: %{fullData.name}\u003cbr\u003eStart: %{customdata[4]}\u003cbr\u003eEnd: %{customdata[5]}\u003cbr\u003eDuration: %{customdata[6]}\u003cbr\u003ePrompt Tokens: %{customdata[0]}\u003cbr\u003eOutput Tokens: %{customdata[1]}\u003cbr\u003eRequest Start Time: %{customdata[2]}\u003cbr\u003eRequest End Time: %{customdata[3]}\u003cbr\u003e\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"ITL \u2265 5ms","marker":{"color":"#D62728","pattern":{"shape":""}},"name":"ITL \u2265 5ms","orientation":"h","showlegend":true,"textposition":"auto","x":{"dtype":"i1","bdata":"BwYGBgYHBQYHBgUGBgYGBgcGBQYFBgYHCAcICAcHBwcFBQYFBgUGBQcHBgUHBgUGBgYFBQYHBQcGBgYFBQUFBQYGBgYFBgYHBQYFBQYGBgU="},"xaxis":"x","y":["Req 1","Req 5","Req 12","Req 14","Req 16","Req 20","Req 20","Req 20","Req 20","Req 20","Req 21","Req 21","Req 21","Req 21","Req 22","Req 23","Req 23","Req 23","Req 26","Req 26","Req 27","Req 27","Req 28","Req 34","Req 34","Req 34","Req 35","Req 36","Req 36","Req 36","Req 39","Req 40","Req 43","Req 44","Req 45","Req 45","Req 46","Req 46","Req 49","Req 49","Req 53","Req 55","Req 55","Req 55","Req 56","Req 56","Req 56","Req 56","Req 56","Req 60","Req 60","Req 66","Req 66","Req 66","Req 68","Req 70","Req 73","Req 74","Req 76","Req 76","Req 76","Req 78","Req 82","Req 82","Req 86","Req 88","Req 90","Req 90","Req 90","Req 91","Req 91","Req 94","Req 94","Req 94","Req 94","Req 95","Req 95","Req 95","Req 96","Req 96"],"yaxis":"y","type":"bar"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermap":[{"type":"scattermap","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"type":"date","title":{"text":"Time"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Request ID"},"autorange":"reversed"},"legend":{"title":{"text":"type"},"tracegroupgap":0},"margin":{"t":60},"barmode":"overlay","showlegend":true},                        {"responsive": true}                    )                };            </script>        </div>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/assets/models/pooling_models/cheat_sheet.svg b/docs/assets/models/pooling_models/cheat_sheet.svg
new file mode 100644
index 000000000000..f8fc17569c70
--- /dev/null
+++ b/docs/assets/models/pooling_models/cheat_sheet.svg
@@ -0,0 +1,796 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="1920px" height="588px" viewBox="0 0 1920 588" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>cheat_sheet</title>
+    <defs>
+        <path d="M0,0 L1920,0 L1920,588 L0,588 L0,0 Z" id="path-1"></path>
+        <path d="M0,0 L214,0 L214,52 L0,52 L0,0 Z" id="path-2"></path>
+        <path d="M0,0 L214,0 L214,40 L0,40 L0,0 Z" id="path-3"></path>
+        <path d="M0,0 L214,0 L214,40 L0,40 L0,0 Z" id="path-4"></path>
+        <path d="M0,0 L190,0 L190,52 L0,52 L0,0 Z" id="path-5"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-6"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-7"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-8"></path>
+        <path d="M0,0 L190,0 L190,52 L0,52 L0,0 Z" id="path-9"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-10"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-11"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-12"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-13"></path>
+        <path d="M0,0 L190,0 L190,40 L0,40 L0,0 Z" id="path-14"></path>
+        <path d="M0,0 L170,0 L170,52 L0,52 L0,0 Z" id="path-15"></path>
+        <path d="M0,0 L170,0 L170,40 L0,40 L0,0 Z" id="path-16"></path>
+        <path d="M0,0 L170,0 L170,40 L0,40 L0,0 Z" id="path-17"></path>
+        <path d="M0,0 L170,0 L170,40 L0,40 L0,0 Z" id="path-18"></path>
+        <path d="M0,0 L170,0 L170,40 L0,40 L0,0 Z" id="path-19"></path>
+        <path d="M0,0 L170,0 L170,40 L0,40 L0,0 Z" id="path-20"></path>
+        <path d="M0,0 L336,0 L336,52 L0,52 L0,0 Z" id="path-21"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-22"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-23"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-24"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-25"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-26"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-27"></path>
+        <path d="M0,0 L336,0 L336,40 L0,40 L0,0 Z" id="path-28"></path>
+        <path d="M0,0 L600,0 L600,52 L0,52 L0,0 Z" id="path-29"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-30"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-31"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-32"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-33"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-34"></path>
+        <path d="M0,0 L600,0 L600,40 L0,40 L0,0 Z" id="path-35"></path>
+    </defs>
+    <g id="cheat_sheet-@字体放大版_outlined" stroke="none" fill="none" xlink:href="#path-1" fill-rule="evenodd">
+        <use fill="#FFFFFF" xlink:href="#path-1"></use>
+        <g id="List-@Pooling-Granularity" stroke-width="1" transform="translate(6, 39.6)">
+            <g id="header-bg" xlink:href="#path-2" fill="#9172E2">
+                <g id="bg">
+                    <path d="M20,0 L194,0 C205.045695,-3.55271368e-15 214,8.954305 214,20 L214,52 L214,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1"></path>
+                    <path d="M194,0 C205.045695,0 214,8.954305 214,20 L214,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L194,0 Z M194,1.6 L20.0000001,1.6 L18.118707,1.69499723 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.4 L212.4,50.4 L212.4,20 C212.4,9.8379606 204.162039,1.6 194,1.6 Z" id="header-bg" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)">
+                    <path d="M11.3167969,22.0117188 C11.7777344,22.0117188 12.1742188,21.8515625 12.50625,21.53125 C12.8382813,21.2109375 13.0042969,20.7890625 13.0042969,20.265625 L13.0042969,15.2851563 L16.2738281,15.2851563 C20.5472656,15.2851563 22.6839844,13.5820313 22.6839844,10.1757813 C22.6839844,9.24609375 22.5394531,8.44726562 22.2503906,7.77929687 C21.9613281,7.11132813 21.5375,6.58398438 20.9789063,6.19726562 C20.4203125,5.81054688 19.7757813,5.52929688 19.0453125,5.35351562 C18.3148438,5.17773438 17.4652344,5.08984375 16.4964844,5.08984375 L11.4574219,5.08984375 C10.8792969,5.08984375 10.4300781,5.28515625 10.1097656,5.67578125 C9.78945313,6.06640625 9.62929688,6.55859375 9.62929688,7.15234375 L9.62929688,20.265625 C9.62929688,20.7890625 9.79726563,21.2109375 10.1332031,21.53125 C10.4691406,21.8515625 10.8636719,22.0117188 11.3167969,22.0117188 Z M13.0042969,12.8476562 L13.0042969,7.65625 L16.1332031,7.65625 C16.6800781,7.65625 17.1351563,7.6875 17.4984375,7.75 C17.8617188,7.8125 18.19375,7.9296875 18.4945313,8.1015625 C18.7953125,8.2734375 19.0160156,8.53125 19.1566406,8.875 C19.2972656,9.21875 19.3675781,9.65234375 19.3675781,10.1757813 C19.3675781,10.7148438 19.2972656,11.1601562 19.1566406,11.5117188 C19.0160156,11.8632813 18.7953125,12.1347656 18.4945313,12.3261719 C18.19375,12.5175781 17.8539063,12.6523438 17.475,12.7304688 C17.0960938,12.8085938 16.6214844,12.8476562 16.0511719,12.8476562 L13.0042969,12.8476562 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M29.3839844,20.1367188 C28.3761719,20.1367188 27.5910156,19.7851563 27.0285156,19.0820312 C26.4660156,18.3789062 26.1847656,17.3945313 26.1847656,16.1289062 C26.1847656,14.8476562 26.4640625,13.8535156 27.0226563,13.1464844 C27.58125,12.4394531 28.3683594,12.0859375 29.3839844,12.0859375 C30.3996094,12.0859375 31.1886719,12.4414062 31.7511719,13.1523438 C32.3136719,13.8632813 32.5949219,14.8554688 32.5949219,16.1289062 C32.5949219,17.3945313 32.3136719,18.3789062 31.7511719,19.0820312 C31.1886719,19.7851563 30.3996094,20.1367188 29.3839844,20.1367188 Z M29.3839844,22.140625 C30.2121094,22.140625 30.9699219,22.0273437 31.6574219,21.8007813 C32.3449219,21.5742188 32.925,21.2714844 33.3976563,20.8925781 C33.8703125,20.5136719 34.26875,20.0644531 34.5929688,19.5449219 C34.9171875,19.0253906 35.1554688,18.4804688 35.3078125,17.9101562 C35.4601563,17.3398438 35.5363281,16.7460938 35.5363281,16.1289062 C35.5363281,15.4804688 35.45625,14.859375 35.2960938,14.265625 C35.1359375,13.671875 34.8878906,13.1191406 34.5519531,12.6074219 C34.2160156,12.0957031 33.8097656,11.6542969 33.3332031,11.2832031 C32.8566406,10.9121094 32.2804688,10.6191406 31.6046875,10.4042969 C30.9289063,10.1894531 30.1886719,10.0820312 29.3839844,10.0820312 C28.5636719,10.0820312 27.8117188,10.1933594 27.128125,10.4160156 C26.4445313,10.6386719 25.8664063,10.9414062 25.39375,11.3242188 C24.9210938,11.7070312 24.5207031,12.15625 24.1925781,12.671875 C23.8644531,13.1875 23.6242188,13.7363281 23.471875,14.3183594 C23.3195313,14.9003906 23.2433594,15.5039062 23.2433594,16.1289062 C23.2433594,16.9179687 23.3683594,17.6640625 23.6183594,18.3671875 C23.8683594,19.0703125 24.2335938,19.7070312 24.7140625,20.2773438 C25.1945313,20.8476563 25.8371094,21.3007813 26.6417969,21.6367188 C27.4464844,21.9726562 28.3605469,22.140625 29.3839844,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M42.7519531,20.1367188 C41.7441406,20.1367188 40.9589844,19.7851563 40.3964844,19.0820312 C39.8339844,18.3789062 39.5527344,17.3945313 39.5527344,16.1289062 C39.5527344,14.8476562 39.8320313,13.8535156 40.390625,13.1464844 C40.9492188,12.4394531 41.7363281,12.0859375 42.7519531,12.0859375 C43.7675781,12.0859375 44.5566406,12.4414062 45.1191406,13.1523438 C45.6816406,13.8632813 45.9628906,14.8554688 45.9628906,16.1289062 C45.9628906,17.3945313 45.6816406,18.3789062 45.1191406,19.0820312 C44.5566406,19.7851563 43.7675781,20.1367188 42.7519531,20.1367188 Z M42.7519531,22.140625 C43.5800781,22.140625 44.3378906,22.0273437 45.0253906,21.8007813 C45.7128906,21.5742188 46.2929688,21.2714844 46.765625,20.8925781 C47.2382813,20.5136719 47.6367188,20.0644531 47.9609375,19.5449219 C48.2851563,19.0253906 48.5234375,18.4804688 48.6757813,17.9101562 C48.828125,17.3398438 48.9042969,16.7460938 48.9042969,16.1289062 C48.9042969,15.4804688 48.8242188,14.859375 48.6640625,14.265625 C48.5039063,13.671875 48.2558594,13.1191406 47.9199219,12.6074219 C47.5839844,12.0957031 47.1777344,11.6542969 46.7011719,11.2832031 C46.2246094,10.9121094 45.6484375,10.6191406 44.9726563,10.4042969 C44.296875,10.1894531 43.5566406,10.0820312 42.7519531,10.0820312 C41.9316406,10.0820312 41.1796875,10.1933594 40.4960938,10.4160156 C39.8125,10.6386719 39.234375,10.9414062 38.7617188,11.3242188 C38.2890625,11.7070312 37.8886719,12.15625 37.5605469,12.671875 C37.2324219,13.1875 36.9921875,13.7363281 36.8398438,14.3183594 C36.6875,14.9003906 36.6113281,15.5039062 36.6113281,16.1289062 C36.6113281,16.9179687 36.7363281,17.6640625 36.9863281,18.3671875 C37.2363281,19.0703125 37.6015625,19.7070312 38.0820313,20.2773438 C38.5625,20.8476563 39.2050781,21.3007813 40.0097656,21.6367188 C40.8144531,21.9726562 41.7285156,22.140625 42.7519531,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M52.1003906,22 C52.5378906,22 52.8992188,21.8554688 53.184375,21.5664062 C53.4695313,21.2773438 53.6121094,20.875 53.6121094,20.359375 L53.6121094,6.75390625 C53.6121094,6.23828125 53.4714844,5.8359375 53.1902344,5.546875 C52.9089844,5.2578125 52.5535156,5.11328125 52.1238281,5.11328125 C51.6941406,5.11328125 51.3425781,5.2578125 51.0691406,5.546875 C50.7957031,5.8359375 50.6589844,6.23828125 50.6589844,6.75390625 L50.6589844,20.359375 C50.6589844,20.8828125 50.79375,21.2871094 51.0632813,21.5722656 C51.3328125,21.8574219 51.6785156,22 52.1003906,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M57.4644531,22 C57.8941406,22 58.2476563,21.8554688 58.525,21.5664062 C58.8023438,21.2773437 58.9410156,20.875 58.9410156,20.359375 L58.9410156,11.8984375 C58.9410156,11.375 58.8023438,10.96875 58.525,10.6796875 C58.2476563,10.390625 57.8941406,10.2460938 57.4644531,10.2460938 C57.0347656,10.2460938 56.6832031,10.390625 56.4097656,10.6796875 C56.1363281,10.96875 55.9996094,11.375 55.9996094,11.8984375 L55.9996094,20.359375 C55.9996094,20.8828125 56.1363281,21.2871094 56.4097656,21.5722656 C56.6832031,21.8574219 57.0347656,22 57.4644531,22 Z M57.4644531,7.90234375 C57.9722656,7.90234375 58.384375,7.75195312 58.7007813,7.45117188 C59.0171875,7.15039063 59.1753906,6.7578125 59.1753906,6.2734375 C59.1753906,5.7890625 59.0191406,5.3984375 58.7066406,5.1015625 C58.3941406,4.8046875 57.9839844,4.65625 57.4761719,4.65625 C56.9605469,4.65625 56.5445313,4.8046875 56.228125,5.1015625 C55.9117188,5.3984375 55.7535156,5.7890625 55.7535156,6.2734375 C55.7535156,6.7578125 55.9117188,7.15039063 56.228125,7.45117188 C56.5445313,7.75195312 56.9566406,7.90234375 57.4644531,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M62.5824219,22 C63.0121094,22 63.365625,21.8574219 63.6429688,21.5722656 C63.9203125,21.2871094 64.0589844,20.890625 64.0589844,20.3828125 L64.0589844,15.4257812 C64.0589844,14.4335938 64.3441406,13.6289063 64.9144531,13.0117187 C65.4847656,12.3945312 66.1605469,12.0859375 66.9417969,12.0859375 C67.5902344,12.0859375 68.1234375,12.2988281 68.5414063,12.7246094 C68.959375,13.1503906 69.1683594,13.765625 69.1683594,14.5703125 L69.1683594,20.3828125 C69.1683594,20.890625 69.3050781,21.2871094 69.5785156,21.5722656 C69.8519531,21.8574219 70.1957031,22 70.6097656,22 C71.0472656,22 71.4066406,21.8574219 71.6878906,21.5722656 C71.9691406,21.2871094 72.1097656,20.890625 72.1097656,20.3828125 L72.1097656,14.5820312 C72.1097656,13.8320313 71.9964844,13.1640625 71.7699219,12.578125 C71.5433594,11.9921875 71.2347656,11.5214844 70.8441406,11.1660156 C70.4535156,10.8105469 70.0101563,10.5410156 69.5140625,10.3574219 C69.0179688,10.1738281 68.4886719,10.0820312 67.9261719,10.0820312 C67.0121094,10.0820312 66.225,10.2539062 65.5648438,10.5976562 C64.9046875,10.9414062 64.4027344,11.4453125 64.0589844,12.109375 L64.0589844,11.6992188 C64.0589844,11.2382812 63.9222656,10.8808594 63.6488281,10.6269531 C63.3753906,10.3730469 63.0277344,10.2460938 62.6058594,10.2460938 C62.1761719,10.2460938 61.8207031,10.375 61.5394531,10.6328125 C61.2582031,10.890625 61.1175781,11.2539062 61.1175781,11.7226563 L61.1175781,20.3828125 C61.1175781,20.890625 61.2542969,21.2871094 61.5277344,21.5722656 C61.8011719,21.8574219 62.1527344,22 62.5824219,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M82.7472656,21.8476563 C82.7472656,22.9570313 82.4425781,23.7832031 81.8332031,24.3261719 C81.2238281,24.8691406 80.3722656,25.140625 79.2785156,25.140625 C78.9660156,25.140625 78.6476563,25.1132812 78.3234375,25.0585938 C77.9992188,25.0039062 77.7550781,24.953125 77.5910156,24.90625 C77.4269531,24.859375 77.1691406,24.7753906 76.8175781,24.6542969 C76.4660156,24.5332031 76.2550781,24.4609375 76.1847656,24.4375 C76.0597656,24.390625 75.9347656,24.3671875 75.8097656,24.3671875 C75.5363281,24.3671875 75.3097656,24.46875 75.1300781,24.671875 C74.9503906,24.875 74.8605469,25.109375 74.8605469,25.375 C74.8605469,25.75 75.0402344,26.046875 75.3996094,26.265625 C75.8214844,26.5234375 76.4132813,26.7402344 77.175,26.9160156 C77.9367188,27.0917969 78.7355469,27.1796875 79.5714844,27.1796875 C81.4152344,27.1796875 82.8742188,26.6953125 83.9484375,25.7265625 C85.0226563,24.7578125 85.5597656,23.3476563 85.5597656,21.4960938 L85.5597656,11.8867188 C85.5597656,11.3710938 85.4289063,10.96875 85.1671875,10.6796875 C84.9054688,10.390625 84.5714844,10.2460938 84.1652344,10.2460938 C83.8058594,10.2460938 83.4992188,10.3535156 83.2453125,10.5683594 C82.9914063,10.7832031 82.8449219,11.0898438 82.8058594,11.4882813 L82.8058594,12.0273438 C82.4152344,11.4023438 81.9503906,10.9257812 81.4113281,10.5976562 C80.8722656,10.2695312 80.1300781,10.1054688 79.1847656,10.1054688 C77.4894531,10.1054688 76.1476563,10.6699219 75.159375,11.7988281 C74.1710938,12.9277344 73.6769531,14.3945312 73.6769531,16.1992188 C73.6769531,17.9882812 74.1847656,19.4042969 75.2003906,20.4472656 C76.2160156,21.4902344 77.5675781,22.0117188 79.2550781,22.0117188 C80.9191406,22.0117188 82.0832031,21.3828125 82.7472656,20.125 L82.7472656,21.8476563 Z M79.7824219,20.0546875 C78.8917969,20.0390625 78.1515625,19.7011719 77.5617188,19.0410156 C76.971875,18.3808594 76.6769531,17.40625 76.6769531,16.1171875 C76.6769531,15.671875 76.7121094,15.2558594 76.7824219,14.8691406 C76.8527344,14.4824219 76.9660156,14.1152344 77.1222656,13.7675781 C77.2785156,13.4199219 77.4738281,13.1230469 77.7082031,12.8769531 C77.9425781,12.6308594 78.2355469,12.4355469 78.5871094,12.2910156 C78.9386719,12.1464844 79.3332031,12.0742188 79.7707031,12.0742188 C81.7550781,12.0742188 82.7472656,13.4296875 82.7472656,16.140625 C82.7472656,17.4765625 82.4777344,18.4589844 81.9386719,19.0878906 C81.3996094,19.7167969 80.6808594,20.0390625 79.7824219,20.0546875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M101.959766,22.3046875 C103.139453,22.3046875 104.776172,22.0742188 106.869922,21.6132813 C107.455859,21.4726563 107.871875,21.2382812 108.117969,20.9101563 C108.364063,20.5820313 108.487109,20.0625 108.487109,19.3515625 L108.487109,14.4296875 C108.487109,13.375 108.061328,12.8476563 107.209766,12.8476563 L102.674609,12.8476563 C102.330859,12.8476563 102.063281,12.9707031 101.871875,13.2167969 C101.680469,13.4628906 101.584766,13.7421875 101.584766,14.0546875 C101.584766,14.390625 101.686328,14.6738281 101.889453,14.9042969 C102.092578,15.1347656 102.369922,15.25 102.721484,15.25 L105.170703,15.25 L105.170703,19.2460938 C104.139453,19.4882813 103.123828,19.609375 102.123828,19.609375 C98.6785156,19.609375 96.9558594,17.5585938 96.9558594,13.4570312 C96.9558594,12.5117188 97.0535156,11.671875 97.2488281,10.9375 C97.4441406,10.203125 97.7488281,9.56054688 98.1628906,9.00976563 C98.5769531,8.45898438 99.1277344,8.03710937 99.8152344,7.74414062 C100.502734,7.45117188 101.311328,7.3046875 102.241016,7.3046875 C103.342578,7.3046875 104.705859,7.5546875 106.330859,8.0546875 C106.448047,8.1171875 106.580859,8.1484375 106.729297,8.1484375 C107.073047,8.1484375 107.354297,8.00976562 107.573047,7.73242188 C107.791797,7.45507813 107.901172,7.140625 107.901172,6.7890625 C107.901172,6.5703125 107.838672,6.35742188 107.713672,6.15039062 C107.588672,5.94335938 107.405078,5.77734375 107.162891,5.65234375 C105.725391,4.96484375 104.041797,4.62109375 102.112109,4.62109375 C100.682422,4.62109375 99.4050781,4.8515625 98.2800781,5.3125 C97.1550781,5.7734375 96.2410156,6.41015625 95.5378906,7.22265625 C94.8347656,8.03515625 94.3035156,8.96875 93.9441406,10.0234375 C93.5847656,11.078125 93.4050781,12.2265625 93.4050781,13.46875 C93.4050781,15.3671875 93.7605469,16.984375 94.4714844,18.3203125 C95.1824219,19.65625 96.1746094,20.6542969 97.4480469,21.3144531 C98.7214844,21.9746094 100.225391,22.3046875 101.959766,22.3046875 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M111.929297,22 C112.366797,22 112.724219,21.8632812 113.001563,21.5898438 C113.278906,21.3164063 113.417578,20.9257813 113.417578,20.4179688 L113.417578,15.4726562 C113.417578,14.9179687 113.507422,14.4492188 113.687109,14.0664062 C113.866797,13.6835937 114.120703,13.3984375 114.448828,13.2109375 C114.776953,13.0234375 115.11875,12.8925781 115.474219,12.8183594 C115.829688,12.7441406 116.237891,12.7070313 116.698828,12.7070313 C117.003516,12.7070313 117.245703,12.5820313 117.425391,12.3320312 C117.605078,12.0820312 117.694922,11.7929687 117.694922,11.4648437 C117.694922,11.1210938 117.597266,10.828125 117.401953,10.5859375 C117.206641,10.34375 116.929297,10.2226562 116.569922,10.2226562 C115.835547,10.2226562 115.173438,10.4746094 114.583594,10.9785156 C113.99375,11.4824219 113.593359,12.0859375 113.382422,12.7890625 L113.417578,11.5820312 C113.425391,11.1679688 113.282813,10.8417969 112.989844,10.6035156 C112.696875,10.3652344 112.351172,10.2460938 111.952734,10.2460938 C111.546484,10.2460938 111.196875,10.3691406 110.903906,10.6152344 C110.610938,10.8613281 110.464453,11.2070312 110.464453,11.6523438 L110.464453,20.3945312 C110.464453,20.9101562 110.603125,21.3066406 110.880469,21.5839844 C111.157813,21.8613281 111.507422,22 111.929297,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M122.074609,22.140625 C122.980859,22.140625 123.713281,21.9550781 124.271875,21.5839844 C124.830469,21.2128906 125.234766,20.7304688 125.484766,20.1367188 L125.484766,20.5234375 C125.484766,20.9921875 125.623437,21.3554688 125.900781,21.6132812 C126.178125,21.8710938 126.508203,22 126.891016,22 C127.273828,22 127.601953,21.8710938 127.875391,21.6132812 C128.148828,21.3554688 128.285547,20.9921875 128.285547,20.5234375 L128.285547,14.5703125 C128.285547,13.7734375 128.166406,13.0820312 127.928125,12.4960937 C127.689844,11.9101563 127.351953,11.4453125 126.914453,11.1015625 C126.476953,10.7578125 125.973047,10.5039062 125.402734,10.3398438 C124.832422,10.1757812 124.187891,10.09375 123.469141,10.09375 C121.883203,10.09375 120.441797,10.3984375 119.144922,11.0078125 C118.801172,11.171875 118.629297,11.4453125 118.629297,11.828125 C118.629297,12.1171875 118.725,12.3769531 118.916406,12.6074219 C119.107812,12.8378906 119.340234,12.953125 119.613672,12.953125 C119.746484,12.953125 119.855859,12.9335938 119.941797,12.8945313 C119.949609,12.8945313 120.035547,12.8613281 120.199609,12.7949219 C120.363672,12.7285156 120.469141,12.6875 120.516016,12.671875 C120.562891,12.65625 120.668359,12.6191406 120.832422,12.5605469 C120.996484,12.5019531 121.121484,12.4609375 121.207422,12.4375 C121.293359,12.4140625 121.418359,12.3789062 121.582422,12.3320313 C121.746484,12.2851562 121.887109,12.2519531 122.004297,12.2324219 C122.121484,12.2128906 122.258203,12.1894531 122.414453,12.1621094 C122.570703,12.1347656 122.725,12.1152344 122.877344,12.1035156 C123.029687,12.0917969 123.180078,12.0859375 123.328516,12.0859375 C124.016016,12.0859375 124.533594,12.2558594 124.88125,12.5957031 C125.228906,12.9355469 125.402734,13.4726562 125.402734,14.2070312 L125.402734,15.109375 C124.605859,15.109375 123.926172,15.1191406 123.363672,15.1386719 C122.801172,15.1582031 122.244531,15.1992187 121.69375,15.2617188 C121.142969,15.3242188 120.687891,15.4121094 120.328516,15.5253906 C119.969141,15.6386719 119.629297,15.7851562 119.308984,15.9648438 C118.988672,16.1445313 118.740625,16.3632812 118.564844,16.6210938 C118.389062,16.8789063 118.252344,17.1855469 118.154687,17.5410156 C118.057031,17.8964844 118.008203,18.3085938 118.008203,18.7773438 C118.008203,19.8320313 118.389062,20.6542969 119.150781,21.2441406 C119.9125,21.8339844 120.887109,22.1328125 122.074609,22.140625 Z M122.437891,20.2890625 C121.914453,20.2890625 121.494531,20.1464844 121.178125,19.8613281 C120.861719,19.5761719 120.703516,19.1601562 120.703516,18.6132812 C120.703516,18.2539062 120.758203,17.9589844 120.867578,17.7285156 C120.976953,17.4980469 121.129297,17.3105469 121.324609,17.1660156 C121.519922,17.0214844 121.822656,16.9160156 122.232812,16.8496094 C122.642969,16.7832031 123.076562,16.7382812 123.533594,16.7148438 C123.990625,16.6914062 124.609766,16.6796875 125.391016,16.6796875 L125.391016,17.0195312 C125.391016,17.9648438 125.090234,18.7460938 124.488672,19.3632812 C123.887109,19.9804688 123.203516,20.2890625 122.437891,20.2890625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M132.032422,22 C132.462109,22 132.815625,21.8574219 133.092969,21.5722656 C133.370313,21.2871094 133.508984,20.890625 133.508984,20.3828125 L133.508984,15.4257812 C133.508984,14.4335938 133.794141,13.6289063 134.364453,13.0117187 C134.934766,12.3945312 135.610547,12.0859375 136.391797,12.0859375 C137.040234,12.0859375 137.573438,12.2988281 137.991406,12.7246094 C138.409375,13.1503906 138.618359,13.765625 138.618359,14.5703125 L138.618359,20.3828125 C138.618359,20.890625 138.755078,21.2871094 139.028516,21.5722656 C139.301953,21.8574219 139.645703,22 140.059766,22 C140.497266,22 140.856641,21.8574219 141.137891,21.5722656 C141.419141,21.2871094 141.559766,20.890625 141.559766,20.3828125 L141.559766,14.5820312 C141.559766,13.8320313 141.446484,13.1640625 141.219922,12.578125 C140.993359,11.9921875 140.684766,11.5214844 140.294141,11.1660156 C139.903516,10.8105469 139.460156,10.5410156 138.964063,10.3574219 C138.467969,10.1738281 137.938672,10.0820312 137.376172,10.0820312 C136.462109,10.0820312 135.675,10.2539062 135.014844,10.5976562 C134.354688,10.9414062 133.852734,11.4453125 133.508984,12.109375 L133.508984,11.6992188 C133.508984,11.2382812 133.372266,10.8808594 133.098828,10.6269531 C132.825391,10.3730469 132.477734,10.2460938 132.055859,10.2460938 C131.626172,10.2460938 131.270703,10.375 130.989453,10.6328125 C130.708203,10.890625 130.567578,11.2539062 130.567578,11.7226563 L130.567578,20.3828125 C130.567578,20.890625 130.704297,21.2871094 130.977734,21.5722656 C131.251172,21.8574219 131.602734,22 132.032422,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M147.884766,22.140625 C149.736328,22.140625 151.013672,21.46875 151.716797,20.125 L151.716797,20.5585938 C151.716797,21.0195313 151.853516,21.375 152.126953,21.625 C152.400391,21.875 152.748047,22 153.169922,22 C153.599609,22 153.955078,21.8710938 154.236328,21.6132812 C154.517578,21.3554687 154.658203,20.9921875 154.658203,20.5234375 L154.658203,11.875 C154.658203,11.359375 154.521484,10.9589844 154.248047,10.6738281 C153.974609,10.3886719 153.623047,10.2460938 153.193359,10.2460938 C152.763672,10.2460938 152.410156,10.3886719 152.132812,10.6738281 C151.855469,10.9589844 151.716797,11.359375 151.716797,11.875 L151.716797,16.796875 C151.716797,17.7890625 151.429688,18.59375 150.855469,19.2109375 C150.28125,19.828125 149.599609,20.1367188 148.810547,20.1367188 C148.169922,20.1367188 147.642578,19.9238281 147.228516,19.4980469 C146.814453,19.0722656 146.607422,18.4570312 146.607422,17.6523438 L146.607422,11.875 C146.607422,11.359375 146.470703,10.9589844 146.197266,10.6738281 C145.923828,10.3886719 145.580078,10.2460938 145.166016,10.2460938 C144.728516,10.2460938 144.369141,10.3886719 144.087891,10.6738281 C143.806641,10.9589844 143.666016,11.359375 143.666016,11.875 L143.666016,17.6523438 C143.666016,18.3945312 143.78125,19.0585938 144.011719,19.6445313 C144.242188,20.2304688 144.556641,20.7011719 144.955078,21.0566406 C145.353516,21.4121094 145.800781,21.6816406 146.296875,21.8652344 C146.792969,22.0488281 147.322266,22.140625 147.884766,22.140625 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M158.463672,22 C158.901172,22 159.2625,21.8554688 159.547656,21.5664062 C159.832812,21.2773438 159.975391,20.875 159.975391,20.359375 L159.975391,6.75390625 C159.975391,6.23828125 159.834766,5.8359375 159.553516,5.546875 C159.272266,5.2578125 158.916797,5.11328125 158.487109,5.11328125 C158.057422,5.11328125 157.705859,5.2578125 157.432422,5.546875 C157.158984,5.8359375 157.022266,6.23828125 157.022266,6.75390625 L157.022266,20.359375 C157.022266,20.8828125 157.157031,21.2871094 157.426562,21.5722656 C157.696094,21.8574219 158.041797,22 158.463672,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M165.632422,22.140625 C166.538672,22.140625 167.271094,21.9550781 167.829688,21.5839844 C168.388281,21.2128906 168.792578,20.7304688 169.042578,20.1367188 L169.042578,20.5234375 C169.042578,20.9921875 169.18125,21.3554688 169.458594,21.6132812 C169.735938,21.8710938 170.066016,22 170.448828,22 C170.831641,22 171.159766,21.8710938 171.433203,21.6132812 C171.706641,21.3554688 171.843359,20.9921875 171.843359,20.5234375 L171.843359,14.5703125 C171.843359,13.7734375 171.724219,13.0820312 171.485938,12.4960937 C171.247656,11.9101563 170.909766,11.4453125 170.472266,11.1015625 C170.034766,10.7578125 169.530859,10.5039062 168.960547,10.3398438 C168.390234,10.1757812 167.745703,10.09375 167.026953,10.09375 C165.441016,10.09375 163.999609,10.3984375 162.702734,11.0078125 C162.358984,11.171875 162.187109,11.4453125 162.187109,11.828125 C162.187109,12.1171875 162.282813,12.3769531 162.474219,12.6074219 C162.665625,12.8378906 162.898047,12.953125 163.171484,12.953125 C163.304297,12.953125 163.413672,12.9335938 163.499609,12.8945313 C163.507422,12.8945313 163.593359,12.8613281 163.757422,12.7949219 C163.921484,12.7285156 164.026953,12.6875 164.073828,12.671875 C164.120703,12.65625 164.226172,12.6191406 164.390234,12.5605469 C164.554297,12.5019531 164.679297,12.4609375 164.765234,12.4375 C164.851172,12.4140625 164.976172,12.3789062 165.140234,12.3320313 C165.304297,12.2851562 165.444922,12.2519531 165.562109,12.2324219 C165.679297,12.2128906 165.816016,12.1894531 165.972266,12.1621094 C166.128516,12.1347656 166.282813,12.1152344 166.435156,12.1035156 C166.5875,12.0917969 166.737891,12.0859375 166.886328,12.0859375 C167.573828,12.0859375 168.091406,12.2558594 168.439063,12.5957031 C168.786719,12.9355469 168.960547,13.4726562 168.960547,14.2070312 L168.960547,15.109375 C168.163672,15.109375 167.483984,15.1191406 166.921484,15.1386719 C166.358984,15.1582031 165.802344,15.1992187 165.251563,15.2617188 C164.700781,15.3242188 164.245703,15.4121094 163.886328,15.5253906 C163.526953,15.6386719 163.187109,15.7851562 162.866797,15.9648438 C162.546484,16.1445313 162.298438,16.3632812 162.122656,16.6210938 C161.946875,16.8789063 161.810156,17.1855469 161.7125,17.5410156 C161.614844,17.8964844 161.566016,18.3085938 161.566016,18.7773438 C161.566016,19.8320313 161.946875,20.6542969 162.708594,21.2441406 C163.470313,21.8339844 164.444922,22.1328125 165.632422,22.140625 Z M165.995703,20.2890625 C165.472266,20.2890625 165.052344,20.1464844 164.735938,19.8613281 C164.419531,19.5761719 164.261328,19.1601562 164.261328,18.6132812 C164.261328,18.2539062 164.316016,17.9589844 164.425391,17.7285156 C164.534766,17.4980469 164.687109,17.3105469 164.882422,17.1660156 C165.077734,17.0214844 165.380469,16.9160156 165.790625,16.8496094 C166.200781,16.7832031 166.634375,16.7382812 167.091406,16.7148438 C167.548438,16.6914062 168.167578,16.6796875 168.948828,16.6796875 L168.948828,17.0195312 C168.948828,17.9648438 168.648047,18.7460938 168.046484,19.3632812 C167.444922,19.9804688 166.761328,20.2890625 165.995703,20.2890625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M175.601953,22 C176.039453,22 176.396875,21.8632812 176.674219,21.5898438 C176.951562,21.3164063 177.090234,20.9257813 177.090234,20.4179688 L177.090234,15.4726562 C177.090234,14.9179687 177.180078,14.4492188 177.359766,14.0664062 C177.539453,13.6835937 177.793359,13.3984375 178.121484,13.2109375 C178.449609,13.0234375 178.791406,12.8925781 179.146875,12.8183594 C179.502344,12.7441406 179.910547,12.7070313 180.371484,12.7070313 C180.676172,12.7070313 180.918359,12.5820313 181.098047,12.3320312 C181.277734,12.0820312 181.367578,11.7929687 181.367578,11.4648437 C181.367578,11.1210938 181.269922,10.828125 181.074609,10.5859375 C180.879297,10.34375 180.601953,10.2226562 180.242578,10.2226562 C179.508203,10.2226562 178.846094,10.4746094 178.25625,10.9785156 C177.666406,11.4824219 177.266016,12.0859375 177.055078,12.7890625 L177.090234,11.5820312 C177.098047,11.1679688 176.955469,10.8417969 176.6625,10.6035156 C176.369531,10.3652344 176.023828,10.2460938 175.625391,10.2460938 C175.219141,10.2460938 174.869531,10.3691406 174.576562,10.6152344 C174.283594,10.8613281 174.137109,11.2070312 174.137109,11.6523438 L174.137109,20.3945312 C174.137109,20.9101562 174.275781,21.3066406 174.553125,21.5839844 C174.830469,21.8613281 175.180078,22 175.601953,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M183.942578,22 C184.372266,22 184.725781,21.8554688 185.003125,21.5664062 C185.280469,21.2773437 185.419141,20.875 185.419141,20.359375 L185.419141,11.8984375 C185.419141,11.375 185.280469,10.96875 185.003125,10.6796875 C184.725781,10.390625 184.372266,10.2460938 183.942578,10.2460938 C183.512891,10.2460938 183.161328,10.390625 182.887891,10.6796875 C182.614453,10.96875 182.477734,11.375 182.477734,11.8984375 L182.477734,20.359375 C182.477734,20.8828125 182.614453,21.2871094 182.887891,21.5722656 C183.161328,21.8574219 183.512891,22 183.942578,22 Z M183.942578,7.90234375 C184.450391,7.90234375 184.8625,7.75195312 185.178906,7.45117188 C185.495313,7.15039063 185.653516,6.7578125 185.653516,6.2734375 C185.653516,5.7890625 185.497266,5.3984375 185.184766,5.1015625 C184.872266,4.8046875 184.462109,4.65625 183.954297,4.65625 C183.438672,4.65625 183.022656,4.8046875 182.70625,5.1015625 C182.389844,5.3984375 182.231641,5.7890625 182.231641,6.2734375 C182.231641,6.7578125 182.389844,7.15039063 182.70625,7.45117188 C183.022656,7.75195312 183.434766,7.90234375 183.942578,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M191.884766,22.1054688 L192.142578,22.1054688 C192.423828,22.1054688 192.650391,22.1015625 192.822266,22.09375 C192.994141,22.0859375 193.199219,22.0722656 193.4375,22.0527344 C193.675781,22.0332031 193.865234,21.9960938 194.005859,21.9414062 C194.146484,21.8867188 194.285156,21.8164062 194.421875,21.7304688 C194.558594,21.6445312 194.658203,21.53125 194.720703,21.390625 C194.783203,21.25 194.814453,21.0859375 194.814453,20.8984375 C194.814453,20.6015625 194.720703,20.34375 194.533203,20.125 C194.345703,19.90625 194.091797,19.796875 193.771484,19.796875 L193.689453,19.796875 L192.857422,19.84375 L192.728516,19.84375 C192.251953,19.84375 191.919922,19.6601562 191.732422,19.2929688 C191.544922,18.9257812 191.451172,18.3671875 191.451172,17.6171875 L191.451172,12.2851562 L193.326172,12.2851562 C193.662109,12.2851562 193.916016,12.1992188 194.087891,12.0273438 C194.259766,11.8554688 194.345703,11.6210938 194.345703,11.3242188 C194.345703,11.0195313 194.259766,10.7753906 194.087891,10.5917969 C193.916016,10.4082031 193.666016,10.3164062 193.337891,10.3164062 L191.451172,10.3164062 L191.451172,7.328125 C191.451172,6.875 191.355469,6.53515625 191.164062,6.30859375 C190.972656,6.08203125 190.705078,5.96875 190.361328,5.96875 C189.931641,5.96875 189.59375,6.10351562 189.347656,6.37304688 C189.101562,6.64257812 188.955078,6.98828125 188.908203,7.41015625 L188.556641,10.3164062 L187.326172,10.3164062 C186.990234,10.3164062 186.736328,10.4042969 186.564453,10.5800781 C186.392578,10.7558594 186.306641,10.9882813 186.306641,11.2773438 C186.306641,11.5820313 186.398438,11.8261719 186.582031,12.0097656 C186.765625,12.1933594 187.025391,12.2851562 187.361328,12.2851562 L188.498047,12.2851562 L188.498047,18.34375 C188.498047,19.5546875 188.796875,20.4824219 189.394531,21.1269531 C189.992188,21.7714844 190.822266,22.0976562 191.884766,22.1054688 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M199.627734,26.171875 L206.623828,12.1679688 C206.733203,11.9492187 206.787891,11.7226562 206.787891,11.4882812 C206.787891,11.1210938 206.653125,10.8105469 206.383594,10.5566406 C206.114062,10.3027344 205.799609,10.1757812 205.440234,10.1757812 C204.869922,10.1757812 204.436328,10.4804688 204.139453,11.0898438 L201.010547,17.3359375 L197.893359,11.1953125 C197.604297,10.6328125 197.182422,10.3515625 196.627734,10.3515625 C196.252734,10.3515625 195.920703,10.4804688 195.631641,10.7382812 C195.342578,10.9960938 195.198047,11.3085938 195.198047,11.6757812 C195.198047,11.9023437 195.252734,12.1210938 195.362109,12.3320312 L199.545703,20.2773438 L197.201953,24.953125 C197.100391,25.15625 197.049609,25.3671875 197.049609,25.5859375 C197.049609,25.96875 197.194141,26.2949219 197.483203,26.5644531 C197.772266,26.8339844 198.100391,26.96875 198.467578,26.96875 C198.975391,26.96875 199.362109,26.703125 199.627734,26.171875 Z" id="Path" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50.4)">
+                <g id="bg">
+                    <path d="M0,0 L214,0 L214,136 C214,140.418278 210.418278,144 206,144 L8,144 C3.581722,144 0,140.418278 0,136 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M214,0 L214,136 C214,140.418278 210.418278,144 206,144 L8,144 C3.581722,144 0,140.418278 0,136 L0,0 L214,0 Z M212.4,1.60000004 L1.6,1.60000004 L1.6,136 C1.6,139.534622 4.4653776,142.4 8,142.4 L206,142.4 C209.534622,142.4 212.4,139.534622 212.4,136 L212.4,1.60000004 Z" fill="#9172E2" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-3" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M8.07421875,25.2792969 C10.1731771,25.2792969 11.8341471,24.8180339 13.0571289,23.8955078 C14.2801107,22.9729818 14.8916016,21.6822917 14.8916016,20.0234375 C14.8916016,19.3378906 14.7879232,18.7327474 14.5805664,18.2080078 C14.3732096,17.6832682 14.0600586,17.2325846 13.6411133,16.855957 C13.222168,16.4793294 12.7566732,16.1598307 12.2446289,15.8974609 C11.7325846,15.6350911 11.1083984,15.3854167 10.3720703,15.1484375 L6.71582031,13.9677734 C5.85253906,13.680013 5.22412109,13.3393555 4.83056641,12.9458008 C4.43701172,12.5522461 4.24023438,12.0253906 4.24023438,11.3652344 C4.24023438,10.485026 4.62109375,9.81217448 5.3828125,9.34667969 C6.14453125,8.8811849 7.16015625,8.6484375 8.4296875,8.6484375 C9.31835938,8.6484375 10.1541341,8.79020182 10.9370117,9.07373047 C11.7198893,9.35725911 12.3694661,9.66829427 12.8857422,10.0068359 C13.0634766,10.1253255 13.2496745,10.1845703 13.4443359,10.1845703 C13.7151693,10.1845703 13.9479167,10.078776 14.1425781,9.8671875 C14.3372396,9.65559896 14.4345703,9.41861979 14.4345703,9.15625 C14.4345703,8.90234375 14.3372396,8.69075521 14.1425781,8.52148437 C13.5670573,8.00520833 12.7355143,7.55452474 11.6479492,7.16943359 C10.5603841,6.78434245 9.41992188,6.59179688 8.2265625,6.59179688 C7.0078125,6.59179688 5.91389974,6.78222656 4.94482422,7.16308594 C3.9757487,7.54394531 3.20556641,8.10677083 2.63427734,8.8515625 C2.06298828,9.59635417 1.77734375,10.4596354 1.77734375,11.4414062 C1.77734375,12.6601562 2.10742188,13.6165365 2.76757813,14.3105469 C3.42773437,15.0045573 4.43066406,15.5800781 5.77636719,16.0371094 L9.41992188,17.2558594 C9.96158854,17.4420573 10.4016927,17.6155599 10.7402344,17.7763672 C11.078776,17.9371745 11.3813477,18.1360677 11.6479492,18.3730469 C11.9145508,18.610026 12.1049805,18.887207 12.2192383,19.2045898 C12.3334961,19.5219727 12.390625,19.9049479 12.390625,20.3535156 C12.390625,21.2845052 11.9991862,21.9954427 11.2163086,22.4863281 C10.433431,22.9772135 9.42415365,23.2226562 8.18847656,23.2226562 C6.29264323,23.2226562 4.62532552,22.6640625 3.18652344,21.546875 C3.09342448,21.4791667 2.98763021,21.4453125 2.86914063,21.4453125 C2.60677083,21.4453125 2.34651693,21.5891927 2.08837891,21.8769531 C1.83024089,22.1647135 1.70117188,22.4440104 1.70117188,22.7148438 C1.70117188,22.875651 1.75195312,23.0026042 1.85351563,23.0957031 C3.41927083,24.5514323 5.49283854,25.2792969 8.07421875,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M18.8173828,17.7382812 C18.8597005,17.2135417 18.9718424,16.7163086 19.1538086,16.246582 C19.3357747,15.7768555 19.5812174,15.3494466 19.8901367,14.9643555 C20.199056,14.5792643 20.5947266,14.2724609 21.0771484,14.0439453 C21.5595703,13.8154297 22.0970052,13.7011719 22.6894531,13.7011719 C23.8320312,13.7011719 24.6995443,14.0777995 25.2919922,14.8310547 C25.8844401,15.5843099 26.21875,16.5533854 26.2949219,17.7382812 L18.8173828,17.7382812 Z M22.7275391,25.2285156 C24.6233724,25.2285156 26.2399089,24.6276042 27.5771484,23.4257812 C27.8141276,23.2141927 27.9326172,22.9729818 27.9326172,22.7021484 C27.9326172,22.4905599 27.862793,22.3064779 27.7231445,22.1499023 C27.5834961,21.9933268 27.4121094,21.9150391 27.2089844,21.9150391 C27.0397135,21.9150391 26.8789063,21.9742839 26.7265625,22.0927734 C26.1171875,22.5582682 25.5120443,22.9264323 24.9111328,23.1972656 C24.3102214,23.468099 23.633138,23.6035156 22.8798828,23.6035156 C21.6865234,23.5865885 20.7089844,23.2036133 19.9472656,22.4545898 C19.1855469,21.7055664 18.796224,20.6031901 18.7792969,19.1474609 L27.4882812,19.1474609 C27.7591146,19.1474609 27.9622396,19.0670573 28.0976562,18.90625 C28.2330729,18.7454427 28.3007812,18.5423177 28.3007812,18.296875 C28.2753906,17.4420573 28.1526693,16.6570638 27.9326172,15.9418945 C27.7125651,15.2267253 27.382487,14.5792643 26.9423828,13.9995117 C26.5022786,13.4197591 25.9098307,12.9648438 25.1650391,12.6347656 C24.4202474,12.3046875 23.5527344,12.1396484 22.5625,12.1396484 C21.3098958,12.1396484 20.2138672,12.4443359 19.2744141,13.0537109 C18.3349609,13.6630859 17.632487,14.4544271 17.1669922,15.4277344 C16.7014974,16.4010417 16.46875,17.4759115 16.46875,18.6523437 C16.46875,19.9895833 16.745931,21.159668 17.300293,22.1625977 C17.8546549,23.1655273 18.6015625,23.9251302 19.5410156,24.4414063 C20.4804687,24.9576823 21.5426432,25.2200521 22.7275391,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M35.578125,23.6542969 C34.9179688,23.6542969 34.340332,23.5083008 33.8452148,23.2163086 C33.3500977,22.9243164 32.9671224,22.5307617 32.6962891,22.0356445 C32.4254557,21.5405273 32.2244466,21.0136719 32.0932617,20.4550781 C31.9620768,19.8964844 31.8964844,19.3125 31.8964844,18.703125 C31.8964844,18.2376302 31.9303385,17.7848307 31.9980469,17.3447266 C32.0657552,16.9046224 32.1863607,16.4602865 32.3598633,16.0117188 C32.5333659,15.563151 32.7513021,15.1738281 33.0136719,14.84375 C33.2760417,14.5136719 33.6166992,14.2428385 34.0356445,14.03125 C34.4545898,13.8196615 34.9306641,13.7096354 35.4638672,13.7011719 C38.0875651,13.7011719 39.3994141,15.3811849 39.3994141,18.7412109 C39.3994141,19.3844401 39.327474,19.9874674 39.1835938,20.550293 C39.0397135,21.1131185 38.8217773,21.6315104 38.5297852,22.1054688 C38.237793,22.5794271 37.8400065,22.9560547 37.3364258,23.2353516 C36.8328451,23.5146484 36.2467448,23.6542969 35.578125,23.6542969 Z M40.5673828,29.8242188 C40.8720703,29.8242188 41.1386719,29.7290039 41.3671875,29.5385742 C41.5957031,29.3481445 41.7099609,29.0878906 41.7099609,28.7578125 L41.7099609,13.4599609 C41.7099609,13.1214193 41.597819,12.8590495 41.3735352,12.6728516 C41.1492513,12.4866536 40.8805339,12.3935547 40.5673828,12.3935547 C40.1865234,12.3935547 39.898763,12.4887695 39.7041016,12.6791992 C39.5094401,12.8696289 39.4121094,13.1298828 39.4121094,13.4599609 L39.4121094,14.5898438 C39.0058594,13.7773438 38.4239909,13.1658529 37.6665039,12.7553711 C36.9090169,12.3448893 36.039388,12.1396484 35.0576172,12.1396484 C34.1774089,12.1396484 33.3839518,12.3194987 32.6772461,12.6791992 C31.9705404,13.0388997 31.3971354,13.5255534 30.9570312,14.1391602 C30.5169271,14.7527669 30.1805013,15.453125 29.9477539,16.2402344 C29.7150065,17.0273438 29.5986328,17.8652344 29.5986328,18.7539062 C29.5986328,19.6595052 29.7255859,20.5037435 29.9794922,21.2866211 C30.2333984,22.0694987 30.5952148,22.7550456 31.0649414,23.3432617 C31.534668,23.9314779 32.1313477,24.3927409 32.8549805,24.7270508 C33.5786133,25.0613607 34.3805339,25.2285156 35.2607422,25.2285156 C36.2001953,25.2285156 37.0317383,25.0232747 37.7553711,24.612793 C38.4790039,24.2023112 39.03125,23.6162109 39.4121094,22.8544922 L39.4121094,28.7578125 C39.4121094,29.0878906 39.5263672,29.3481445 39.7548828,29.5385742 C39.9833984,29.7290039 40.2542318,29.8242188 40.5673828,29.8242188 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M49.0126953,25.2285156 C49.9860026,25.2285156 50.8408203,25.0211589 51.5771484,24.6064453 C52.3134766,24.1917318 52.8424479,23.5992839 53.1640625,22.8291016 L53.1640625,23.9589844 C53.1640625,24.2890625 53.2740885,24.5450846 53.4941406,24.7270508 C53.7141927,24.9090169 53.9807943,25 54.2939453,25 C54.6070964,25 54.8779297,24.9047852 55.1064453,24.7143555 C55.3349609,24.5239258 55.4492188,24.2594401 55.4492188,23.9208984 L55.4492188,13.4726562 C55.4492188,13.125651 55.3391927,12.8569336 55.1191406,12.6665039 C54.8990885,12.4760742 54.632487,12.3808594 54.3193359,12.3808594 C54.0061849,12.3808594 53.7353516,12.4781901 53.5068359,12.6728516 C53.2783203,12.867513 53.1640625,13.1341146 53.1640625,13.4726562 L53.1640625,19.7822266 C53.1048177,20.9840495 52.7366536,21.9298503 52.0595703,22.6196289 C51.382487,23.3094076 50.5530599,23.6542969 49.5712891,23.6542969 C48.6572266,23.6542969 47.9547526,23.3728841 47.4638672,22.8100586 C46.9729818,22.2472331 46.7275391,21.4241536 46.7275391,20.3408203 L46.7275391,13.4726562 C46.7275391,13.125651 46.6153971,12.8569336 46.3911133,12.6665039 C46.1668294,12.4760742 45.898112,12.3808594 45.5849609,12.3808594 C45.2718099,12.3808594 45.0009766,12.4760742 44.7724609,12.6665039 C44.5439453,12.8569336 44.4296875,13.125651 44.4296875,13.4726562 L44.4296875,20.3154297 C44.4296875,21.9573568 44.8465169,23.1866862 45.6801758,24.003418 C46.5138346,24.8201497 47.6246745,25.2285156 49.0126953,25.2285156 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M59.8701172,17.7382812 C59.9124349,17.2135417 60.0245768,16.7163086 60.206543,16.246582 C60.3885091,15.7768555 60.6339518,15.3494466 60.9428711,14.9643555 C61.2517904,14.5792643 61.6474609,14.2724609 62.1298828,14.0439453 C62.6123047,13.8154297 63.1497396,13.7011719 63.7421875,13.7011719 C64.8847656,13.7011719 65.7522786,14.0777995 66.3447266,14.8310547 C66.9371745,15.5843099 67.2714844,16.5533854 67.3476562,17.7382812 L59.8701172,17.7382812 Z M63.7802734,25.2285156 C65.6761068,25.2285156 67.2926432,24.6276042 68.6298828,23.4257812 C68.866862,23.2141927 68.9853516,22.9729818 68.9853516,22.7021484 C68.9853516,22.4905599 68.9155273,22.3064779 68.7758789,22.1499023 C68.6362305,21.9933268 68.4648438,21.9150391 68.2617188,21.9150391 C68.0924479,21.9150391 67.9316406,21.9742839 67.7792969,22.0927734 C67.1699219,22.5582682 66.5647786,22.9264323 65.9638672,23.1972656 C65.3629557,23.468099 64.6858724,23.6035156 63.9326172,23.6035156 C62.7392578,23.5865885 61.7617187,23.2036133 61,22.4545898 C60.2382812,21.7055664 59.8489583,20.6031901 59.8320313,19.1474609 L68.5410156,19.1474609 C68.811849,19.1474609 69.014974,19.0670573 69.1503906,18.90625 C69.2858073,18.7454427 69.3535156,18.5423177 69.3535156,18.296875 C69.328125,17.4420573 69.2054036,16.6570638 68.9853516,15.9418945 C68.7652995,15.2267253 68.4352214,14.5792643 67.9951172,13.9995117 C67.555013,13.4197591 66.9625651,12.9648438 66.2177734,12.6347656 C65.4729818,12.3046875 64.6054688,12.1396484 63.6152344,12.1396484 C62.3626302,12.1396484 61.2666016,12.4443359 60.3271484,13.0537109 C59.3876953,13.6630859 58.6852214,14.4544271 58.2197266,15.4277344 C57.7542318,16.4010417 57.5214844,17.4759115 57.5214844,18.6523437 C57.5214844,19.9895833 57.7986654,21.159668 58.3530273,22.1625977 C58.9073893,23.1655273 59.6542969,23.9251302 60.59375,24.4414063 C61.5332031,24.9576823 62.5953776,25.2200521 63.7802734,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M72.6445312,25 C72.9576823,25 73.2306315,24.9026693 73.4633789,24.7080078 C73.6961263,24.5133464 73.8125,24.2467448 73.8125,23.9082031 L73.8125,17.5732422 C73.8717448,16.3798828 74.2356771,15.4361979 74.9042969,14.7421875 C75.5729167,14.0481771 76.3938802,13.7011719 77.3671875,13.7011719 C78.28125,13.7011719 78.9900716,13.9847005 79.4936523,14.5517578 C79.9972331,15.1188151 80.2490234,15.9440104 80.2490234,17.0273438 L80.2490234,23.9082031 C80.2490234,24.2552083 80.3611654,24.5239258 80.5854492,24.7143555 C80.8097331,24.9047852 81.0784505,25 81.3916016,25 C81.7047526,25 81.9734701,24.9047852 82.1977539,24.7143555 C82.4220378,24.5239258 82.5341797,24.2552083 82.5341797,23.9082031 L82.5341797,17.0527344 C82.5341797,15.4023437 82.1194661,14.1708984 81.2900391,13.3583984 C80.460612,12.5458984 79.3476562,12.1396484 77.9511719,12.1396484 C76.9778646,12.1396484 76.1251628,12.3470052 75.3930664,12.7617188 C74.6609701,13.1764323 74.1341146,13.7688802 73.8125,14.5390625 L73.8125,13.4345703 C73.8125,13.1044922 73.7003581,12.8484701 73.4760742,12.6665039 C73.2517904,12.4845378 72.9830729,12.3935547 72.6699219,12.3935547 C72.3567708,12.3935547 72.0859375,12.4887695 71.8574219,12.6791992 C71.6289062,12.8696289 71.5146484,13.1341146 71.5146484,13.4726563 L71.5146484,23.9082031 C71.5146484,24.2552083 71.6246745,24.5239258 71.8447266,24.7143555 C72.0647786,24.9047852 72.3313802,25 72.6445312,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M90.3955078,25.2412109 C92.054362,25.2412109 93.3408203,24.8730469 94.2548828,24.1367188 C94.5426432,23.8997396 94.6865234,23.6416016 94.6865234,23.3623047 C94.6865234,23.1676432 94.6188151,23.0047201 94.4833984,22.8735352 C94.3479818,22.7423503 94.1829427,22.6767578 93.9882812,22.6767578 C93.8105469,22.6767578 93.641276,22.7317708 93.4804688,22.8417969 C92.7018229,23.3834635 91.7327474,23.6542969 90.5732422,23.6542969 C89.9130859,23.6542969 89.3291016,23.5167643 88.8212891,23.2416992 C88.3134766,22.9666341 87.9072266,22.5942383 87.6025391,22.1245117 C87.2978516,21.6547852 87.0693359,21.1321615 86.9169922,20.5566406 C86.7646484,19.9811198 86.6884766,19.3717448 86.6884766,18.7285156 C86.6884766,17.188151 87.0651042,15.9630534 87.8183594,15.0532227 C88.5716146,14.1433919 89.5533854,13.6884766 90.7636719,13.6884766 C91.6946615,13.6884766 92.5452474,13.938151 93.3154297,14.4375 C93.4847005,14.547526 93.6624349,14.6025391 93.8486328,14.6025391 C94.0517578,14.6025391 94.2231445,14.5390625 94.362793,14.4121094 C94.5024414,14.2851563 94.5722656,14.1285807 94.5722656,13.9423828 C94.5722656,13.680013 94.4283854,13.4345703 94.140625,13.2060547 C93.7766927,12.9013672 93.2858073,12.6474609 92.6679688,12.4443359 C92.0501302,12.2412109 91.3772786,12.1396484 90.6494141,12.1396484 C89.4052734,12.1396484 88.305013,12.4316406 87.3486328,13.015625 C86.3922526,13.5996094 85.6601562,14.3867188 85.1523438,15.3769531 C84.6445312,16.3671875 84.390625,17.4716797 84.390625,18.6904297 C84.390625,20.5777995 84.9322917,22.1414388 86.015625,23.3813477 C87.0989583,24.6212565 88.5589193,25.2412109 90.3955078,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M97.9902344,17.7382812 C98.0325521,17.2135417 98.144694,16.7163086 98.3266602,16.246582 C98.5086263,15.7768555 98.754069,15.3494466 99.0629883,14.9643555 C99.3719076,14.5792643 99.7675781,14.2724609 100.25,14.0439453 C100.732422,13.8154297 101.269857,13.7011719 101.862305,13.7011719 C103.004883,13.7011719 103.872396,14.0777995 104.464844,14.8310547 C105.057292,15.5843099 105.391602,16.5533854 105.467773,17.7382812 L97.9902344,17.7382812 Z M101.900391,25.2285156 C103.796224,25.2285156 105.41276,24.6276042 106.75,23.4257812 C106.986979,23.2141927 107.105469,22.9729818 107.105469,22.7021484 C107.105469,22.4905599 107.035645,22.3064779 106.895996,22.1499023 C106.756348,21.9933268 106.584961,21.9150391 106.381836,21.9150391 C106.212565,21.9150391 106.051758,21.9742839 105.899414,22.0927734 C105.290039,22.5582682 104.684896,22.9264323 104.083984,23.1972656 C103.483073,23.468099 102.80599,23.6035156 102.052734,23.6035156 C100.859375,23.5865885 99.8818359,23.2036133 99.1201172,22.4545898 C98.3583984,21.7055664 97.9690755,20.6031901 97.9521484,19.1474609 L106.661133,19.1474609 C106.931966,19.1474609 107.135091,19.0670573 107.270508,18.90625 C107.405924,18.7454427 107.473633,18.5423177 107.473633,18.296875 C107.448242,17.4420573 107.325521,16.6570638 107.105469,15.9418945 C106.885417,15.2267253 106.555339,14.5792643 106.115234,13.9995117 C105.67513,13.4197591 105.082682,12.9648438 104.337891,12.6347656 C103.593099,12.3046875 102.725586,12.1396484 101.735352,12.1396484 C100.482747,12.1396484 99.3867188,12.4443359 98.4472656,13.0537109 C97.5078125,13.6630859 96.8053385,14.4544271 96.3398438,15.4277344 C95.874349,16.4010417 95.6416016,17.4759115 95.6416016,18.6523437 C95.6416016,19.9895833 95.9187826,21.159668 96.4731445,22.1625977 C97.0275065,23.1655273 97.7744141,23.9251302 98.7138672,24.4414063 C99.6533203,24.9576823 100.715495,25.2200521 101.900391,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M109.21582,19.1728516 L114.865234,19.1728516 C115.161458,19.1728516 115.404785,19.0924479 115.595215,18.9316406 C115.785645,18.7708333 115.880859,18.5634766 115.880859,18.3095703 C115.880859,18.038737 115.78776,17.8208008 115.601562,17.6557617 C115.415365,17.4907227 115.169922,17.4082031 114.865234,17.4082031 L109.21582,17.4082031 C108.911133,17.4082031 108.66569,17.4907227 108.479492,17.6557617 C108.293294,17.8208008 108.200195,18.038737 108.200195,18.3095703 C108.200195,18.5634766 108.29541,18.7708333 108.48584,18.9316406 C108.67627,19.0924479 108.919596,19.1728516 109.21582,19.1728516 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M131.130859,25 C131.841797,25 132.341146,24.5768229 132.628906,23.7304688 L136.094727,13.8027344 C136.145508,13.6673177 136.170898,13.531901 136.170898,13.3964844 C136.170898,13.1002604 136.062988,12.8611654 135.847168,12.6791992 C135.631348,12.4972331 135.383789,12.40625 135.104492,12.40625 C134.59668,12.40625 134.26237,12.6559245 134.101562,13.1552734 L130.953125,22.5625 L127.804688,13.6503906 C127.508464,12.8040365 126.996419,12.3808594 126.268555,12.3808594 C125.549154,12.3808594 125.041341,12.8040365 124.745117,13.6503906 L121.59668,22.5625 L118.448242,13.1552734 C118.287435,12.6559245 117.953125,12.40625 117.445312,12.40625 C117.166016,12.40625 116.918457,12.4972331 116.702637,12.6791992 C116.486816,12.8611654 116.378906,13.1002604 116.378906,13.3964844 C116.378906,13.531901 116.404297,13.6673177 116.455078,13.8027344 L119.920898,23.7304688 C120.208659,24.5768229 120.708008,25 121.418945,25 C122.163737,25 122.705404,24.5345052 123.043945,23.6035156 L126.268555,14.5644531 L129.505859,23.6035156 C129.835938,24.5345052 130.377604,25 131.130859,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M139.169922,9.44824219 C139.61849,9.44824219 139.97819,9.31917318 140.249023,9.06103516 C140.519857,8.80289714 140.655273,8.46647135 140.655273,8.05175781 C140.655273,7.62858073 140.519857,7.28792318 140.249023,7.02978516 C139.97819,6.77164714 139.622721,6.64257812 139.182617,6.64257812 C138.734049,6.64257812 138.372233,6.77376302 138.097168,7.03613281 C137.822103,7.2985026 137.68457,7.63704427 137.68457,8.05175781 C137.68457,8.46647135 137.819987,8.80289714 138.09082,9.06103516 C138.361654,9.31917318 138.721354,9.44824219 139.169922,9.44824219 Z M139.157227,25 C139.470378,25 139.743327,24.8963216 139.976074,24.6889648 C140.208822,24.4816081 140.325195,24.2001953 140.325195,23.8447266 L140.325195,13.5234375 C140.325195,13.1679688 140.213053,12.8886719 139.98877,12.6855469 C139.764486,12.4824219 139.5,12.3808594 139.195312,12.3808594 C138.882161,12.3808594 138.609212,12.4824219 138.376465,12.6855469 C138.143717,12.8886719 138.027344,13.1679688 138.027344,13.5234375 L138.027344,23.8447266 C138.027344,24.2171224 138.13737,24.5027669 138.357422,24.7016602 C138.577474,24.9005534 138.844076,25 139.157227,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M143.032227,23.8574219 C143.540039,24.2298177 144.219238,24.5535482 145.069824,24.8286133 C145.92041,25.1036784 146.798503,25.2412109 147.704102,25.2412109 C148.677409,25.2412109 149.532227,25.1079102 150.268555,24.8413086 C151.004883,24.574707 151.599447,24.1451823 152.052246,23.5527344 C152.505046,22.9602865 152.731445,22.2324219 152.731445,21.3691406 C152.731445,20.4550781 152.458496,19.7039388 151.912598,19.1157227 C151.366699,18.5275065 150.454753,18.0768229 149.176758,17.7636719 L146.980469,17.2177734 C146.057943,16.9892578 145.457031,16.7713216 145.177734,16.5639648 C144.898438,16.3566081 144.758789,15.9990234 144.758789,15.4912109 C144.758789,14.8649089 145.029622,14.4057617 145.571289,14.1137695 C146.112956,13.8217773 146.84082,13.6757813 147.754883,13.6757813 C148.042643,13.6757813 148.326172,13.6948242 148.605469,13.7329102 C148.884766,13.7709961 149.159831,13.8260091 149.430664,13.8979492 C149.701497,13.9698893 149.915202,14.03125 150.071777,14.0820313 C150.228353,14.1328125 150.431478,14.2068685 150.681152,14.3041992 C150.930827,14.4015299 151.068359,14.4544271 151.09375,14.4628906 C151.229167,14.5136719 151.360352,14.5390625 151.487305,14.5390625 C151.707357,14.5390625 151.882975,14.4692383 152.01416,14.3295898 C152.145345,14.1899414 152.210938,14.0227865 152.210938,13.828125 C152.210938,13.4980469 152.045898,13.2483724 151.71582,13.0791016 C151.258789,12.8336589 150.668457,12.6114909 149.944824,12.4125977 C149.221191,12.2137044 148.448893,12.1142578 147.62793,12.1142578 C146.942383,12.1142578 146.307617,12.1798503 145.723633,12.3110352 C145.139648,12.4422201 144.612793,12.6411133 144.143066,12.9077148 C143.67334,13.1743164 143.30306,13.5361328 143.032227,13.9931641 C142.761393,14.4501953 142.625977,14.9791667 142.625977,15.5800781 C142.625977,15.9609375 142.664062,16.2994792 142.740234,16.5957031 C142.816406,16.8919271 142.939128,17.1500651 143.108398,17.3701172 C143.277669,17.5901693 143.459635,17.7784831 143.654297,17.9350586 C143.848958,18.0916341 144.109212,18.2376302 144.435059,18.3730469 C144.760905,18.5084635 145.065592,18.6184896 145.349121,18.703125 C145.63265,18.7877604 146.00293,18.8893229 146.459961,19.0078125 L148.707031,19.5664062 C149.451823,19.7526042 149.985026,19.9980469 150.306641,20.3027344 C150.628255,20.6074219 150.789062,21.0136719 150.789062,21.5214844 C150.789062,22.2324219 150.503418,22.7719727 149.932129,23.1401367 C149.36084,23.5083008 148.609701,23.6923828 147.678711,23.6923828 C146.383789,23.6839193 145.156576,23.319987 143.99707,22.6005859 C143.810872,22.4820964 143.616211,22.4228516 143.413086,22.4228516 C143.193034,22.4228516 143.011068,22.4969076 142.867188,22.6450195 C142.723307,22.7931315 142.651367,22.96875 142.651367,23.171875 C142.651367,23.4511719 142.77832,23.6796875 143.032227,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M156.454102,17.7382812 C156.496419,17.2135417 156.608561,16.7163086 156.790527,16.246582 C156.972493,15.7768555 157.217936,15.3494466 157.526855,14.9643555 C157.835775,14.5792643 158.231445,14.2724609 158.713867,14.0439453 C159.196289,13.8154297 159.733724,13.7011719 160.326172,13.7011719 C161.46875,13.7011719 162.336263,14.0777995 162.928711,14.8310547 C163.521159,15.5843099 163.855469,16.5533854 163.931641,17.7382812 L156.454102,17.7382812 Z M160.364258,25.2285156 C162.260091,25.2285156 163.876628,24.6276042 165.213867,23.4257812 C165.450846,23.2141927 165.569336,22.9729818 165.569336,22.7021484 C165.569336,22.4905599 165.499512,22.3064779 165.359863,22.1499023 C165.220215,21.9933268 165.048828,21.9150391 164.845703,21.9150391 C164.676432,21.9150391 164.515625,21.9742839 164.363281,22.0927734 C163.753906,22.5582682 163.148763,22.9264323 162.547852,23.1972656 C161.94694,23.468099 161.269857,23.6035156 160.516602,23.6035156 C159.323242,23.5865885 158.345703,23.2036133 157.583984,22.4545898 C156.822266,21.7055664 156.432943,20.6031901 156.416016,19.1474609 L165.125,19.1474609 C165.395833,19.1474609 165.598958,19.0670573 165.734375,18.90625 C165.869792,18.7454427 165.9375,18.5423177 165.9375,18.296875 C165.912109,17.4420573 165.789388,16.6570638 165.569336,15.9418945 C165.349284,15.2267253 165.019206,14.5792643 164.579102,13.9995117 C164.138997,13.4197591 163.546549,12.9648438 162.801758,12.6347656 C162.056966,12.3046875 161.189453,12.1396484 160.199219,12.1396484 C158.946615,12.1396484 157.850586,12.4443359 156.911133,13.0537109 C155.97168,13.6630859 155.269206,14.4544271 154.803711,15.4277344 C154.338216,16.4010417 154.105469,17.4759115 154.105469,18.6523437 C154.105469,19.9895833 154.38265,21.159668 154.937012,22.1625977 C155.491374,23.1655273 156.238281,23.9251302 157.177734,24.4414063 C158.117188,24.9576823 159.179362,25.2200521 160.364258,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-2" transform="translate(0, 92)" xlink:href="#path-4" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M7.45214844,25 C7.79915365,25 8.09960938,24.889974 8.35351563,24.6699219 C8.60742188,24.4498698 8.734375,24.1494141 8.734375,23.7685547 L8.734375,8.90234375 L13.4570313,8.90234375 C13.7871094,8.90234375 14.0452474,8.80078125 14.2314453,8.59765625 C14.4176432,8.39453125 14.5107422,8.15332031 14.5107422,7.87402344 C14.5107422,7.59472656 14.4197591,7.35563151 14.237793,7.15673828 C14.0558268,6.95784505 13.7955729,6.85839844 13.4570313,6.85839844 L1.38378906,6.85839844 C1.05371094,6.85839844 0.795572917,6.95996094 0.609375,7.16308594 C0.423177083,7.36621094 0.330078125,7.6031901 0.330078125,7.87402344 C0.330078125,8.15332031 0.423177083,8.39453125 0.609375,8.59765625 C0.795572917,8.80078125 1.05371094,8.90234375 1.38378906,8.90234375 L6.16992188,8.90234375 L6.16992188,23.7685547 C6.16992188,24.1494141 6.296875,24.4498698 6.55078125,24.6699219 C6.8046875,24.889974 7.10514323,25 7.45214844,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M21.2675781,25.2285156 C23.2226562,25.2285156 24.7714844,24.6149089 25.9140625,23.3876953 C27.0566406,22.1604818 27.6279297,20.5947266 27.6279297,18.6904297 C27.6279297,16.7692057 27.0545247,15.1971029 25.9077148,13.9741211 C24.7609049,12.7511393 23.2141927,12.1396484 21.2675781,12.1396484 C19.3209635,12.1396484 17.7742513,12.7532552 16.6274414,13.9804688 C15.4806315,15.2076823 14.9072266,16.7776693 14.9072266,18.6904297 C14.9072266,20.5947266 15.4806315,22.1604818 16.6274414,23.3876953 C17.7742513,24.6149089 19.3209635,25.2285156 21.2675781,25.2285156 Z M21.2421875,23.6542969 C19.9726562,23.6542969 18.980306,23.2078451 18.2651367,22.3149414 C17.5499674,21.4220378 17.1923828,20.2138672 17.1923828,18.6904297 C17.1923828,17.1500651 17.5520833,15.933431 18.2714844,15.0405273 C18.9908854,14.1476237 19.9895833,13.7011719 21.2675781,13.7011719 C22.5371094,13.7011719 23.5336914,14.1497396 24.2573242,15.046875 C24.980957,15.9440104 25.3427734,17.1585286 25.3427734,18.6904297 C25.3427734,20.2307943 24.9851888,21.4431966 24.2700195,22.3276367 C23.5548503,23.2120768 22.5455729,23.6542969 21.2421875,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M30.6015625,25 C30.9147135,25 31.1876628,24.8984375 31.4204102,24.6953125 C31.6531576,24.4921875 31.7695313,24.2086589 31.7695313,23.8447266 L31.7695313,18.3095703 L37.7363281,24.6572266 C37.9479167,24.8942057 38.2018229,25.0126953 38.4980469,25.0126953 C38.7773438,25.0126953 39.0249023,24.906901 39.2407227,24.6953125 C39.456543,24.483724 39.5644531,24.242513 39.5644531,23.9716797 C39.5644531,23.726237 39.4713542,23.5061849 39.2851562,23.3115234 L34.2451172,18.0048828 L38.8535156,13.8789062 C39.0566406,13.6842448 39.1582031,13.4684245 39.1582031,13.2314453 C39.1582031,12.9690755 39.0545247,12.7320964 38.847168,12.5205078 C38.6398112,12.3089193 38.4007161,12.203125 38.1298828,12.203125 C37.9182943,12.203125 37.719401,12.2835286 37.5332031,12.4443359 L31.7695313,17.7255859 L31.7695313,8.01367188 C31.7695313,7.65820312 31.6573893,7.37890625 31.4331055,7.17578125 C31.2088216,6.97265625 30.9443359,6.87109375 30.6396484,6.87109375 C30.3264974,6.87109375 30.0535482,6.97265625 29.8208008,7.17578125 C29.5880534,7.37890625 29.4716797,7.65820312 29.4716797,8.01367188 L29.4716797,23.8447266 C29.4716797,24.2171224 29.5817057,24.5027669 29.8017578,24.7016602 C30.0218099,24.9005534 30.2884115,25 30.6015625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M42.7158203,17.7382812 C42.758138,17.2135417 42.8702799,16.7163086 43.0522461,16.246582 C43.2342122,15.7768555 43.4796549,15.3494466 43.7885742,14.9643555 C44.0974935,14.5792643 44.4931641,14.2724609 44.9755859,14.0439453 C45.4580078,13.8154297 45.9954427,13.7011719 46.5878906,13.7011719 C47.7304688,13.7011719 48.5979818,14.0777995 49.1904297,14.8310547 C49.7828776,15.5843099 50.1171875,16.5533854 50.1933594,17.7382812 L42.7158203,17.7382812 Z M46.6259766,25.2285156 C48.5218099,25.2285156 50.1383464,24.6276042 51.4755859,23.4257812 C51.7125651,23.2141927 51.8310547,22.9729818 51.8310547,22.7021484 C51.8310547,22.4905599 51.7612305,22.3064779 51.621582,22.1499023 C51.4819336,21.9933268 51.3105469,21.9150391 51.1074219,21.9150391 C50.938151,21.9150391 50.7773438,21.9742839 50.625,22.0927734 C50.015625,22.5582682 49.4104818,22.9264323 48.8095703,23.1972656 C48.2086589,23.468099 47.5315755,23.6035156 46.7783203,23.6035156 C45.5849609,23.5865885 44.6074219,23.2036133 43.8457031,22.4545898 C43.0839844,21.7055664 42.6946615,20.6031901 42.6777344,19.1474609 L51.3867188,19.1474609 C51.6575521,19.1474609 51.8606771,19.0670573 51.9960938,18.90625 C52.1315104,18.7454427 52.1992188,18.5423177 52.1992188,18.296875 C52.1738281,17.4420573 52.0511068,16.6570638 51.8310547,15.9418945 C51.6110026,15.2267253 51.2809245,14.5792643 50.8408203,13.9995117 C50.4007161,13.4197591 49.8082682,12.9648438 49.0634766,12.6347656 C48.3186849,12.3046875 47.4511719,12.1396484 46.4609375,12.1396484 C45.2083333,12.1396484 44.1123047,12.4443359 43.1728516,13.0537109 C42.2333984,13.6630859 41.5309245,14.4544271 41.0654297,15.4277344 C40.5999349,16.4010417 40.3671875,17.4759115 40.3671875,18.6523437 C40.3671875,19.9895833 40.6443685,21.159668 41.1987305,22.1625977 C41.7530924,23.1655273 42.5,23.9251302 43.4394531,24.4414063 C44.3789062,24.9576823 45.4410807,25.2200521 46.6259766,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.4902344,25 C55.8033854,25 56.0763346,24.9026693 56.309082,24.7080078 C56.5418294,24.5133464 56.6582031,24.2467448 56.6582031,23.9082031 L56.6582031,17.5732422 C56.7174479,16.3798828 57.0813802,15.4361979 57.75,14.7421875 C58.4186198,14.0481771 59.2395833,13.7011719 60.2128906,13.7011719 C61.1269531,13.7011719 61.8357747,13.9847005 62.3393555,14.5517578 C62.8429362,15.1188151 63.0947266,15.9440104 63.0947266,17.0273438 L63.0947266,23.9082031 C63.0947266,24.2552083 63.2068685,24.5239258 63.4311523,24.7143555 C63.6554362,24.9047852 63.9241536,25 64.2373047,25 C64.5504557,25 64.8191732,24.9047852 65.043457,24.7143555 C65.2677409,24.5239258 65.3798828,24.2552083 65.3798828,23.9082031 L65.3798828,17.0527344 C65.3798828,15.4023437 64.9651693,14.1708984 64.1357422,13.3583984 C63.3063151,12.5458984 62.1933594,12.1396484 60.796875,12.1396484 C59.8235677,12.1396484 58.9708659,12.3470052 58.2387695,12.7617188 C57.5066732,13.1764323 56.9798177,13.7688802 56.6582031,14.5390625 L56.6582031,13.4345703 C56.6582031,13.1044922 56.5460612,12.8484701 56.3217773,12.6665039 C56.0974935,12.4845378 55.828776,12.3935547 55.515625,12.3935547 C55.202474,12.3935547 54.9316406,12.4887695 54.703125,12.6791992 C54.4746094,12.8696289 54.3603516,13.1341146 54.3603516,13.4726563 L54.3603516,23.9082031 C54.3603516,24.2552083 54.4703776,24.5239258 54.6904297,24.7143555 C54.9104818,24.9047852 55.1770833,25 55.4902344,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M67.6806641,19.1728516 L73.3300781,19.1728516 C73.6263021,19.1728516 73.8696289,19.0924479 74.0600586,18.9316406 C74.2504883,18.7708333 74.3457031,18.5634766 74.3457031,18.3095703 C74.3457031,18.038737 74.2526042,17.8208008 74.0664062,17.6557617 C73.8802083,17.4907227 73.6347656,17.4082031 73.3300781,17.4082031 L67.6806641,17.4082031 C67.3759766,17.4082031 67.1305339,17.4907227 66.9443359,17.6557617 C66.758138,17.8208008 66.6650391,18.038737 66.6650391,18.3095703 C66.6650391,18.5634766 66.7602539,18.7708333 66.9506836,18.9316406 C67.1411133,19.0924479 67.3844401,19.1728516 67.6806641,19.1728516 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M89.5957031,25 C90.3066406,25 90.8059896,24.5768229 91.09375,23.7304688 L94.5595703,13.8027344 C94.6103516,13.6673177 94.6357422,13.531901 94.6357422,13.3964844 C94.6357422,13.1002604 94.527832,12.8611654 94.3120117,12.6791992 C94.0961914,12.4972331 93.8486328,12.40625 93.5693359,12.40625 C93.0615234,12.40625 92.7272135,12.6559245 92.5664062,13.1552734 L89.4179688,22.5625 L86.2695312,13.6503906 C85.9733073,12.8040365 85.461263,12.3808594 84.7333984,12.3808594 C84.0139974,12.3808594 83.5061849,12.8040365 83.2099609,13.6503906 L80.0615234,22.5625 L76.9130859,13.1552734 C76.7522786,12.6559245 76.4179688,12.40625 75.9101562,12.40625 C75.6308594,12.40625 75.3833008,12.4972331 75.1674805,12.6791992 C74.9516602,12.8611654 74.84375,13.1002604 74.84375,13.3964844 C74.84375,13.531901 74.8691406,13.6673177 74.9199219,13.8027344 L78.3857422,23.7304688 C78.6735026,24.5768229 79.1728516,25 79.8837891,25 C80.6285807,25 81.1702474,24.5345052 81.5087891,23.6035156 L84.7333984,14.5644531 L87.9707031,23.6035156 C88.3007812,24.5345052 88.8424479,25 89.5957031,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M97.6347656,9.44824219 C98.0833333,9.44824219 98.4430339,9.31917318 98.7138672,9.06103516 C98.9847005,8.80289714 99.1201172,8.46647135 99.1201172,8.05175781 C99.1201172,7.62858073 98.9847005,7.28792318 98.7138672,7.02978516 C98.4430339,6.77164714 98.0875651,6.64257812 97.6474609,6.64257812 C97.1988932,6.64257812 96.8370768,6.77376302 96.5620117,7.03613281 C96.2869466,7.2985026 96.1494141,7.63704427 96.1494141,8.05175781 C96.1494141,8.46647135 96.2848307,8.80289714 96.5556641,9.06103516 C96.8264974,9.31917318 97.1861979,9.44824219 97.6347656,9.44824219 Z M97.6220703,25 C97.9352214,25 98.2081706,24.8963216 98.440918,24.6889648 C98.6736654,24.4816081 98.7900391,24.2001953 98.7900391,23.8447266 L98.7900391,13.5234375 C98.7900391,13.1679688 98.6778971,12.8886719 98.4536133,12.6855469 C98.2293294,12.4824219 97.9648438,12.3808594 97.6601562,12.3808594 C97.3470052,12.3808594 97.074056,12.4824219 96.8413086,12.6855469 C96.6085612,12.8886719 96.4921875,13.1679688 96.4921875,13.5234375 L96.4921875,23.8447266 C96.4921875,24.2171224 96.6022135,24.5027669 96.8222656,24.7016602 C97.0423177,24.9005534 97.3089193,25 97.6220703,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M101.49707,23.8574219 C102.004883,24.2298177 102.684082,24.5535482 103.534668,24.8286133 C104.385254,25.1036784 105.263346,25.2412109 106.168945,25.2412109 C107.142253,25.2412109 107.99707,25.1079102 108.733398,24.8413086 C109.469727,24.574707 110.06429,24.1451823 110.51709,23.5527344 C110.969889,22.9602865 111.196289,22.2324219 111.196289,21.3691406 C111.196289,20.4550781 110.92334,19.7039388 110.377441,19.1157227 C109.831543,18.5275065 108.919596,18.0768229 107.641602,17.7636719 L105.445312,17.2177734 C104.522786,16.9892578 103.921875,16.7713216 103.642578,16.5639648 C103.363281,16.3566081 103.223633,15.9990234 103.223633,15.4912109 C103.223633,14.8649089 103.494466,14.4057617 104.036133,14.1137695 C104.577799,13.8217773 105.305664,13.6757813 106.219727,13.6757813 C106.507487,13.6757813 106.791016,13.6948242 107.070312,13.7329102 C107.349609,13.7709961 107.624674,13.8260091 107.895508,13.8979492 C108.166341,13.9698893 108.380046,14.03125 108.536621,14.0820313 C108.693197,14.1328125 108.896322,14.2068685 109.145996,14.3041992 C109.395671,14.4015299 109.533203,14.4544271 109.558594,14.4628906 C109.69401,14.5136719 109.825195,14.5390625 109.952148,14.5390625 C110.172201,14.5390625 110.347819,14.4692383 110.479004,14.3295898 C110.610189,14.1899414 110.675781,14.0227865 110.675781,13.828125 C110.675781,13.4980469 110.510742,13.2483724 110.180664,13.0791016 C109.723633,12.8336589 109.133301,12.6114909 108.409668,12.4125977 C107.686035,12.2137044 106.913737,12.1142578 106.092773,12.1142578 C105.407227,12.1142578 104.772461,12.1798503 104.188477,12.3110352 C103.604492,12.4422201 103.077637,12.6411133 102.60791,12.9077148 C102.138184,13.1743164 101.767904,13.5361328 101.49707,13.9931641 C101.226237,14.4501953 101.09082,14.9791667 101.09082,15.5800781 C101.09082,15.9609375 101.128906,16.2994792 101.205078,16.5957031 C101.28125,16.8919271 101.403971,17.1500651 101.573242,17.3701172 C101.742513,17.5901693 101.924479,17.7784831 102.119141,17.9350586 C102.313802,18.0916341 102.574056,18.2376302 102.899902,18.3730469 C103.225749,18.5084635 103.530436,18.6184896 103.813965,18.703125 C104.097493,18.7877604 104.467773,18.8893229 104.924805,19.0078125 L107.171875,19.5664062 C107.916667,19.7526042 108.44987,19.9980469 108.771484,20.3027344 C109.093099,20.6074219 109.253906,21.0136719 109.253906,21.5214844 C109.253906,22.2324219 108.968262,22.7719727 108.396973,23.1401367 C107.825684,23.5083008 107.074544,23.6923828 106.143555,23.6923828 C104.848633,23.6839193 103.621419,23.319987 102.461914,22.6005859 C102.275716,22.4820964 102.081055,22.4228516 101.87793,22.4228516 C101.657878,22.4228516 101.475911,22.4969076 101.332031,22.6450195 C101.188151,22.7931315 101.116211,22.96875 101.116211,23.171875 C101.116211,23.4511719 101.243164,23.6796875 101.49707,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M114.918945,17.7382812 C114.961263,17.2135417 115.073405,16.7163086 115.255371,16.246582 C115.437337,15.7768555 115.68278,15.3494466 115.991699,14.9643555 C116.300618,14.5792643 116.696289,14.2724609 117.178711,14.0439453 C117.661133,13.8154297 118.198568,13.7011719 118.791016,13.7011719 C119.933594,13.7011719 120.801107,14.0777995 121.393555,14.8310547 C121.986003,15.5843099 122.320312,16.5533854 122.396484,17.7382812 L114.918945,17.7382812 Z M118.829102,25.2285156 C120.724935,25.2285156 122.341471,24.6276042 123.678711,23.4257812 C123.91569,23.2141927 124.03418,22.9729818 124.03418,22.7021484 C124.03418,22.4905599 123.964355,22.3064779 123.824707,22.1499023 C123.685059,21.9933268 123.513672,21.9150391 123.310547,21.9150391 C123.141276,21.9150391 122.980469,21.9742839 122.828125,22.0927734 C122.21875,22.5582682 121.613607,22.9264323 121.012695,23.1972656 C120.411784,23.468099 119.734701,23.6035156 118.981445,23.6035156 C117.788086,23.5865885 116.810547,23.2036133 116.048828,22.4545898 C115.287109,21.7055664 114.897786,20.6031901 114.880859,19.1474609 L123.589844,19.1474609 C123.860677,19.1474609 124.063802,19.0670573 124.199219,18.90625 C124.334635,18.7454427 124.402344,18.5423177 124.402344,18.296875 C124.376953,17.4420573 124.254232,16.6570638 124.03418,15.9418945 C123.814128,15.2267253 123.484049,14.5792643 123.043945,13.9995117 C122.603841,13.4197591 122.011393,12.9648438 121.266602,12.6347656 C120.52181,12.3046875 119.654297,12.1396484 118.664062,12.1396484 C117.411458,12.1396484 116.31543,12.4443359 115.375977,13.0537109 C114.436523,13.6630859 113.734049,14.4544271 113.268555,15.4277344 C112.80306,16.4010417 112.570312,17.4759115 112.570312,18.6523437 C112.570312,19.9895833 112.847493,21.159668 113.401855,22.1625977 C113.956217,23.1655273 114.703125,23.9251302 115.642578,24.4414063 C116.582031,24.9576823 117.644206,25.2200521 118.829102,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="List-@Score-Types" stroke-width="1" transform="translate(504, 348)">
+            <g id="header-/-02_green" xlink:href="#path-5">
+                <g id="bg">
+                    <path d="M20,0 L170,0 C181.045695,-3.55271368e-15 190,8.954305 190,20 L190,52 L190,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1" fill="#1CBB8B"></path>
+                    <path d="M170,0 C181.045695,0 190,8.954305 190,20 L190,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L170,0 Z M170,1.6 L20,1.6 L18.118707,1.69499719 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.4 L188.4,50.4 L188.4,20 C188.4,9.8379606 180.162039,1.6 170,1.6 Z" id="header-bg" fill="#12BE8B" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)" fill="#1CBB8B">
+                    <path d="M36.5855469,22.3046875 C38.6324219,22.3046875 40.2632813,21.8457031 41.478125,20.9277344 C42.6929688,20.0097656 43.3003906,18.7617188 43.3003906,17.1835938 C43.3003906,16.5039063 43.1988281,15.9042969 42.9957031,15.3847656 C42.7925781,14.8652344 42.478125,14.4140625 42.0523438,14.03125 C41.6265625,13.6484375 41.1460938,13.3242187 40.6109375,13.0585938 C40.0757813,12.7929687 39.4175781,12.5351563 38.6363281,12.2851563 C38.5972656,12.2695312 38.0269531,12.1015625 36.9253906,11.78125 C35.8238281,11.4609375 35.2535156,11.2929687 35.2144531,11.2773438 C34.6285156,11.0820312 34.1929688,10.8300781 33.9078125,10.5214844 C33.6226563,10.2128906 33.4800781,9.8125 33.4800781,9.3203125 C33.4800781,8.9375 33.5738281,8.609375 33.7613281,8.3359375 C33.9488281,8.0625 34.2105469,7.8515625 34.5464844,7.703125 C34.8824219,7.5546875 35.2476563,7.44726562 35.6421875,7.38085937 C36.0367188,7.31445313 36.4761719,7.28125 36.9605469,7.28125 C38.4214844,7.28125 39.7808594,7.640625 41.0386719,8.359375 C41.2417969,8.46875 41.4292969,8.5234375 41.6011719,8.5234375 C41.9292969,8.5234375 42.2105469,8.38085937 42.4449219,8.09570313 C42.6792969,7.81054687 42.7964844,7.4921875 42.7964844,7.140625 C42.7964844,6.796875 42.6753906,6.515625 42.4332031,6.296875 C41.8707031,5.8125 41.0621094,5.41210938 40.0074219,5.09570312 C38.9527344,4.77929688 37.8667969,4.62109375 36.7496094,4.62109375 C34.8355469,4.62109375 33.275,5.05859375 32.0679688,5.93359375 C30.8609375,6.80859375 30.2574219,7.9921875 30.2574219,9.484375 C30.2574219,10.640625 30.5523438,11.5566406 31.1421875,12.2324219 C31.7320313,12.9082031 32.6949219,13.4765625 34.0308594,13.9375 L37.8980469,15.2265625 C38.6246094,15.4609375 39.1597656,15.7460938 39.5035156,16.0820312 C39.8472656,16.4179688 40.0191406,16.8828125 40.0191406,17.4765625 C40.0191406,18.1953125 39.7007813,18.7382813 39.0640625,19.1054688 C38.4273438,19.4726562 37.5972656,19.65625 36.5738281,19.65625 C34.9722656,19.65625 33.4410156,19.21875 31.9800781,18.34375 C31.8472656,18.265625 31.7105469,18.2265625 31.5699219,18.2265625 C31.2339844,18.2265625 30.9273438,18.4121094 30.65,18.7832031 C30.3726563,19.1542969 30.2339844,19.53125 30.2339844,19.9140625 C30.2339844,20.203125 30.3238281,20.4140625 30.5035156,20.546875 C32.0113281,21.71875 34.0386719,22.3046875 36.5855469,22.3046875 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M47.9457031,16.0820312 C47.9457031,14.9023438 48.2328125,13.9609375 48.8070313,13.2578125 C49.38125,12.5546875 50.1761719,12.203125 51.1917969,12.203125 C51.8636719,12.203125 52.5511719,12.3789062 53.2542969,12.7304688 C53.4339844,12.8320313 53.6175781,12.8828125 53.8050781,12.8828125 C54.0707031,12.8828125 54.2953125,12.7890625 54.4789063,12.6015625 C54.6625,12.4140625 54.7542969,12.1914063 54.7542969,11.9335938 C54.7542969,11.5742188 54.5785156,11.2695312 54.2269531,11.0195312 C53.3675781,10.3945312 52.2542969,10.0820312 50.8871094,10.0820312 C49.7386719,10.0820312 48.7113281,10.3535156 47.8050781,10.8964844 C46.8988281,11.4394531 46.2015625,12.1679688 45.7132813,13.0820312 C45.225,13.9960937 44.9808594,15.0039062 44.9808594,16.1054688 C44.9808594,17.8710937 45.5277344,19.3183594 46.6214844,20.4472656 C47.7152344,21.5761719 49.1253906,22.140625 50.8519531,22.140625 C51.7113281,22.140625 52.4535156,22.0410156 53.0785156,21.8417969 C53.7035156,21.6425781 54.1800781,21.3945312 54.5082031,21.0976562 C54.8207031,20.8320312 54.9769531,20.5195312 54.9769531,20.1601562 C54.9769531,19.8945313 54.8910156,19.6660156 54.7191406,19.4746094 C54.5472656,19.2832031 54.3363281,19.1875 54.0863281,19.1875 C53.8832031,19.1875 53.6839844,19.2539063 53.4886719,19.3867188 C52.8792969,19.7929688 52.0941406,19.9960937 51.1332031,19.9960937 C50.1410156,19.9960937 49.3617188,19.6386719 48.7953125,18.9238281 C48.2289063,18.2089844 47.9457031,17.2617188 47.9457031,16.0820312 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M62.3175781,20.1367188 C61.3097656,20.1367188 60.5246094,19.7851563 59.9621094,19.0820312 C59.3996094,18.3789062 59.1183594,17.3945313 59.1183594,16.1289062 C59.1183594,14.8476562 59.3976563,13.8535156 59.95625,13.1464844 C60.5148438,12.4394531 61.3019531,12.0859375 62.3175781,12.0859375 C63.3332031,12.0859375 64.1222656,12.4414062 64.6847656,13.1523438 C65.2472656,13.8632813 65.5285156,14.8554688 65.5285156,16.1289062 C65.5285156,17.3945313 65.2472656,18.3789062 64.6847656,19.0820312 C64.1222656,19.7851563 63.3332031,20.1367188 62.3175781,20.1367188 Z M62.3175781,22.140625 C63.1457031,22.140625 63.9035156,22.0273437 64.5910156,21.8007813 C65.2785156,21.5742188 65.8585938,21.2714844 66.33125,20.8925781 C66.8039063,20.5136719 67.2023438,20.0644531 67.5265625,19.5449219 C67.8507813,19.0253906 68.0890625,18.4804688 68.2414063,17.9101562 C68.39375,17.3398438 68.4699219,16.7460938 68.4699219,16.1289062 C68.4699219,15.4804688 68.3898438,14.859375 68.2296875,14.265625 C68.0695313,13.671875 67.8214844,13.1191406 67.4855469,12.6074219 C67.1496094,12.0957031 66.7433594,11.6542969 66.2667969,11.2832031 C65.7902344,10.9121094 65.2140625,10.6191406 64.5382813,10.4042969 C63.8625,10.1894531 63.1222656,10.0820312 62.3175781,10.0820312 C61.4972656,10.0820312 60.7453125,10.1933594 60.0617188,10.4160156 C59.378125,10.6386719 58.8,10.9414062 58.3273438,11.3242188 C57.8546875,11.7070312 57.4542969,12.15625 57.1261719,12.671875 C56.7980469,13.1875 56.5578125,13.7363281 56.4054688,14.3183594 C56.253125,14.9003906 56.1769531,15.5039062 56.1769531,16.1289062 C56.1769531,16.9179687 56.3019531,17.6640625 56.5519531,18.3671875 C56.8019531,19.0703125 57.1671875,19.7070312 57.6476563,20.2773438 C58.128125,20.8476563 58.7707031,21.3007813 59.5753906,21.6367188 C60.3800781,21.9726562 61.2941406,22.140625 62.3175781,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M72.1308594,22 C72.5683594,22 72.9257813,21.8632812 73.203125,21.5898438 C73.4804688,21.3164063 73.6191406,20.9257813 73.6191406,20.4179688 L73.6191406,15.4726562 C73.6191406,14.9179687 73.7089844,14.4492188 73.8886719,14.0664062 C74.0683594,13.6835937 74.3222656,13.3984375 74.6503906,13.2109375 C74.9785156,13.0234375 75.3203125,12.8925781 75.6757813,12.8183594 C76.03125,12.7441406 76.4394531,12.7070313 76.9003906,12.7070313 C77.2050781,12.7070313 77.4472656,12.5820313 77.6269531,12.3320312 C77.8066406,12.0820312 77.8964844,11.7929687 77.8964844,11.4648437 C77.8964844,11.1210938 77.7988281,10.828125 77.6035156,10.5859375 C77.4082031,10.34375 77.1308594,10.2226562 76.7714844,10.2226562 C76.0371094,10.2226562 75.375,10.4746094 74.7851563,10.9785156 C74.1953125,11.4824219 73.7949219,12.0859375 73.5839844,12.7890625 L73.6191406,11.5820312 C73.6269531,11.1679688 73.484375,10.8417969 73.1914063,10.6035156 C72.8984375,10.3652344 72.5527344,10.2460938 72.1542969,10.2460938 C71.7480469,10.2460938 71.3984375,10.3691406 71.1054688,10.6152344 C70.8125,10.8613281 70.6660156,11.2070312 70.6660156,11.6523438 L70.6660156,20.3945312 C70.6660156,20.9101562 70.8046875,21.3066406 71.0820313,21.5839844 C71.359375,21.8613281 71.7089844,22 72.1308594,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M78.8269531,16.0117187 C78.8269531,17.9414062 79.3835938,19.4453125 80.496875,20.5234375 C81.6101563,21.6015625 83.0925781,22.140625 84.9441406,22.140625 C86.5925781,22.140625 88.0339844,21.6210938 89.2683594,20.5820312 C89.5183594,20.3632813 89.6433594,20.0976562 89.6433594,19.7851562 C89.6433594,19.5117187 89.5496094,19.265625 89.3621094,19.046875 C89.1746094,18.828125 88.9597656,18.71875 88.7175781,18.71875 C88.5535156,18.71875 88.4011719,18.7734375 88.2605469,18.8828125 C87.1589844,19.671875 86.1238281,20.0664063 85.1550781,20.0664063 C83.0222656,20.0664063 81.8933594,18.9648438 81.7683594,16.7617188 L89.1746094,16.7617188 C89.4558594,16.7617188 89.6667969,16.6679688 89.8074219,16.4804687 C89.9480469,16.2929688 90.0183594,16.046875 90.0183594,15.7421875 C90.0105469,14.96875 89.8972656,14.25 89.6785156,13.5859375 C89.4597656,12.921875 89.1375,12.3242187 88.7117188,11.7929688 C88.2859375,11.2617188 87.7234375,10.8457031 87.0242188,10.5449219 C86.325,10.2441406 85.5222656,10.09375 84.6160156,10.09375 C83.4363281,10.09375 82.4011719,10.3671875 81.5105469,10.9140625 C80.6199219,11.4609375 79.95,12.1777344 79.5007813,13.0644531 C79.0515625,13.9511719 78.8269531,14.9335938 78.8269531,16.0117187 Z M81.7917969,15.109375 C81.8621094,14.2578125 82.1453125,13.53125 82.6414063,12.9296875 C83.1375,12.328125 83.8230469,12.0273438 84.6980469,12.0273438 C85.5964844,12.0273438 86.2742188,12.3164062 86.73125,12.8945312 C87.1882813,13.4726562 87.4480469,14.2109375 87.5105469,15.109375 L81.7917969,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M103.926172,22.0117188 C104.387109,22.0117188 104.783594,21.8496094 105.115625,21.5253906 C105.447656,21.2011719 105.613672,20.7773438 105.613672,20.2539063 L105.613672,7.76171875 L109.422266,7.76171875 C109.812891,7.76171875 110.125391,7.62890625 110.359766,7.36328125 C110.594141,7.09765625 110.711328,6.78515625 110.711328,6.42578125 C110.711328,6.07421875 110.592188,5.76367188 110.353906,5.49414063 C110.115625,5.22460938 109.805078,5.08984375 109.422266,5.08984375 L98.4300781,5.08984375 C98.0394531,5.08984375 97.725,5.22460938 97.4867188,5.49414063 C97.2484375,5.76367188 97.1292969,6.078125 97.1292969,6.4375 C97.1292969,6.796875 97.2464844,7.10742188 97.4808594,7.36914063 C97.7152344,7.63085938 98.0316406,7.76171875 98.4300781,7.76171875 L102.250391,7.76171875 L102.250391,20.2539063 C102.250391,20.7773438 102.416406,21.2011719 102.748438,21.5253906 C103.080469,21.8496094 103.473047,22.0117188 103.926172,22.0117188 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M115.837109,26.171875 L122.833203,12.1679688 C122.942578,11.9492187 122.997266,11.7226562 122.997266,11.4882812 C122.997266,11.1210938 122.8625,10.8105469 122.592969,10.5566406 C122.323437,10.3027344 122.008984,10.1757812 121.649609,10.1757812 C121.079297,10.1757812 120.645703,10.4804688 120.348828,11.0898438 L117.219922,17.3359375 L114.102734,11.1953125 C113.813672,10.6328125 113.391797,10.3515625 112.837109,10.3515625 C112.462109,10.3515625 112.130078,10.4804688 111.841016,10.7382812 C111.551953,10.9960938 111.407422,11.3085938 111.407422,11.6757812 C111.407422,11.9023437 111.462109,12.1210938 111.571484,12.3320312 L115.755078,20.2773438 L113.411328,24.953125 C113.309766,25.15625 113.258984,25.3671875 113.258984,25.5859375 C113.258984,25.96875 113.403516,26.2949219 113.692578,26.5644531 C113.981641,26.8339844 114.309766,26.96875 114.676953,26.96875 C115.184766,26.96875 115.571484,26.703125 115.837109,26.171875 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M126.048828,26.4414062 C126.486328,26.4414062 126.84375,26.3027344 127.121094,26.0253906 C127.398438,25.7480469 127.537109,25.3554688 127.537109,24.8476563 L127.537109,20.2539062 C128.302734,21.5117188 129.537109,22.140625 131.240234,22.140625 C132.826172,22.140625 134.095703,21.6074219 135.048828,20.5410156 C136.001953,19.4746094 136.478516,18.0234375 136.478516,16.1875 C136.478516,15.03125 136.28125,13.9960938 135.886719,13.0820312 C135.492188,12.1679687 134.90625,11.4394531 134.128906,10.8964844 C133.351562,10.3535156 132.443359,10.0820312 131.404297,10.0820312 C129.576172,10.0820312 128.287109,10.7617188 127.537109,12.1210937 L127.537109,11.7109375 C127.537109,11.234375 127.40625,10.8710938 127.144531,10.6210938 C126.882812,10.3710938 126.533203,10.2460938 126.095703,10.2460938 C125.673828,10.2460938 125.318359,10.3984375 125.029297,10.703125 C124.740234,11.0078125 124.595703,11.4140625 124.595703,11.921875 L124.595703,24.8476563 C124.595703,25.3476562 124.732422,25.7382812 125.005859,26.0195312 C125.279297,26.3007812 125.626953,26.4414062 126.048828,26.4414062 Z M130.619141,20.1367188 C129.916016,20.1367188 129.328125,19.9511719 128.855469,19.5800781 C128.382812,19.2089844 128.046875,18.7324219 127.847656,18.1503906 C127.648438,17.5683594 127.548828,16.8984375 127.548828,16.140625 C127.548828,14.796875 127.785156,13.7851563 128.257812,13.1054688 C128.730469,12.4257812 129.529297,12.0859375 130.654297,12.0859375 C131.091797,12.0859375 131.482422,12.171875 131.826172,12.34375 C132.169922,12.515625 132.449219,12.7382812 132.664062,13.0117188 C132.878906,13.2851563 133.056641,13.6054688 133.197266,13.9726562 C133.337891,14.3398438 133.435547,14.703125 133.490234,15.0625 C133.544922,15.421875 133.572266,15.7890625 133.572266,16.1640625 C133.572266,16.578125 133.541016,16.96875 133.478516,17.3359375 C133.416016,17.703125 133.3125,18.0605469 133.167969,18.4082031 C133.023438,18.7558594 132.841797,19.0546875 132.623047,19.3046875 C132.404297,19.5546875 132.123047,19.7558594 131.779297,19.9082031 C131.435547,20.0605469 131.048828,20.1367188 130.619141,20.1367188 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M138.053516,16.0117187 C138.053516,17.9414062 138.610156,19.4453125 139.723437,20.5234375 C140.836719,21.6015625 142.319141,22.140625 144.170703,22.140625 C145.819141,22.140625 147.260547,21.6210938 148.494922,20.5820312 C148.744922,20.3632813 148.869922,20.0976562 148.869922,19.7851562 C148.869922,19.5117187 148.776172,19.265625 148.588672,19.046875 C148.401172,18.828125 148.186328,18.71875 147.944141,18.71875 C147.780078,18.71875 147.627734,18.7734375 147.487109,18.8828125 C146.385547,19.671875 145.350391,20.0664063 144.381641,20.0664063 C142.248828,20.0664063 141.119922,18.9648438 140.994922,16.7617188 L148.401172,16.7617188 C148.682422,16.7617188 148.893359,16.6679688 149.033984,16.4804687 C149.174609,16.2929688 149.244922,16.046875 149.244922,15.7421875 C149.237109,14.96875 149.123828,14.25 148.905078,13.5859375 C148.686328,12.921875 148.364062,12.3242187 147.938281,11.7929688 C147.5125,11.2617188 146.95,10.8457031 146.250781,10.5449219 C145.551562,10.2441406 144.748828,10.09375 143.842578,10.09375 C142.662891,10.09375 141.627734,10.3671875 140.737109,10.9140625 C139.846484,11.4609375 139.176562,12.1777344 138.727344,13.0644531 C138.278125,13.9511719 138.053516,14.9335938 138.053516,16.0117187 Z M141.018359,15.109375 C141.088672,14.2578125 141.371875,13.53125 141.867969,12.9296875 C142.364062,12.328125 143.049609,12.0273438 143.924609,12.0273438 C144.823047,12.0273438 145.500781,12.3164062 145.957812,12.8945312 C146.414844,13.4726562 146.674609,14.2109375 146.737109,15.109375 L141.018359,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M151.265234,20.8164062 C152.507422,21.7226563 154.038672,22.1757812 155.858984,22.1757812 C157.265234,22.1757812 158.407813,21.859375 159.286719,21.2265625 C160.165625,20.59375 160.605078,19.6796875 160.605078,18.484375 C160.605078,17.53125 160.314063,16.7851563 159.732031,16.2460937 C159.15,15.7070312 158.230078,15.28125 156.972266,14.96875 L155.167578,14.5117188 C154.519141,14.3632813 154.079688,14.2011719 153.849219,14.0253906 C153.61875,13.8496094 153.503516,13.578125 153.503516,13.2109375 C153.503516,12.796875 153.714453,12.4902344 154.136328,12.2910156 C154.558203,12.0917969 155.069922,11.9921875 155.671484,11.9921875 C155.851172,11.9921875 156.028906,12 156.204688,12.015625 C156.380469,12.03125 156.558203,12.0566406 156.737891,12.0917969 C156.917578,12.1269531 157.071875,12.1582031 157.200781,12.1855469 C157.329688,12.2128906 157.483984,12.2558594 157.663672,12.3144531 C157.843359,12.3730469 157.966406,12.4121094 158.032813,12.4316406 C158.099219,12.4511719 158.214453,12.4921875 158.378516,12.5546875 C158.542578,12.6171875 158.628516,12.6484375 158.636328,12.6484375 C158.808203,12.7109375 158.960547,12.7421875 159.093359,12.7421875 C159.358984,12.7421875 159.571875,12.6542969 159.732031,12.4785156 C159.892188,12.3027344 159.972266,12.0898438 159.972266,11.8398438 C159.972266,11.4179688 159.761328,11.1054688 159.339453,10.9023438 C158.128516,10.3164062 156.847266,10.0234375 155.495703,10.0234375 C154.097266,10.0234375 152.968359,10.3105469 152.108984,10.8847656 C151.249609,11.4589844 150.819922,12.28125 150.819922,13.3515625 C150.819922,13.796875 150.878516,14.1855469 150.995703,14.5175781 C151.112891,14.8496094 151.265234,15.1269531 151.452734,15.3496094 C151.640234,15.5722656 151.909766,15.7753906 152.261328,15.9589844 C152.612891,16.1425781 152.960547,16.2929687 153.304297,16.4101562 C153.648047,16.5273438 154.097266,16.6601563 154.651953,16.8085938 L156.480078,17.2773438 C157.566016,17.5429688 158.108984,18.0078125 158.108984,18.671875 C158.108984,19.046875 157.982031,19.3535156 157.728125,19.5917969 C157.474219,19.8300781 157.169531,19.9941406 156.814063,20.0839844 C156.458594,20.1738281 156.054297,20.21875 155.601172,20.21875 C155.140234,20.21875 154.679297,20.1542969 154.218359,20.0253906 C153.757422,19.8964844 153.423438,19.78125 153.216406,19.6796875 C153.009375,19.578125 152.733984,19.4296875 152.390234,19.234375 C152.179297,19.1171875 151.976172,19.0585937 151.780859,19.0585937 C151.507422,19.0585937 151.282813,19.15625 151.107031,19.3515625 C150.93125,19.546875 150.843359,19.7734375 150.843359,20.03125 C150.843359,20.3515625 150.983984,20.6132813 151.265234,20.8164062 Z" id="Path" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50)">
+                <g id="bg">
+                    <path d="M0,0 L190,0 L190,136 C190,140.418278 186.418278,144 182,144 L8,144 C3.581722,144 0,140.418278 0,136 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M190,0 L190,136 C190,140.418278 186.418278,144 182,144 L8,144 C3.581722,144 0,140.418278 0,136 L0,0 L190,0 Z M188.4,1.60000015 L1.6,1.60000015 L1.6,136 C1.6,139.534622 4.4653776,142.4 8,142.4 L182,142.4 C185.534622,142.4 188.4,139.534622 188.4,136 L188.4,1.60000015 Z" fill="#1CBB8B" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-6" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M7.07128906,25.2412109 C8.73014323,25.2412109 10.0166016,24.8730469 10.9306641,24.1367188 C11.2184245,23.8997396 11.3623047,23.6416016 11.3623047,23.3623047 C11.3623047,23.1676432 11.2945964,23.0047201 11.1591797,22.8735352 C11.023763,22.7423503 10.858724,22.6767578 10.6640625,22.6767578 C10.4863281,22.6767578 10.3170573,22.7317708 10.15625,22.8417969 C9.37760417,23.3834635 8.40852865,23.6542969 7.24902344,23.6542969 C6.58886719,23.6542969 6.00488281,23.5167643 5.49707031,23.2416992 C4.98925781,22.9666341 4.58300781,22.5942383 4.27832031,22.1245117 C3.97363281,21.6547852 3.74511719,21.1321615 3.59277344,20.5566406 C3.44042969,19.9811198 3.36425781,19.3717448 3.36425781,18.7285156 C3.36425781,17.188151 3.74088542,15.9630534 4.49414063,15.0532227 C5.24739583,14.1433919 6.22916667,13.6884766 7.43945313,13.6884766 C8.37044271,13.6884766 9.22102865,13.938151 9.99121094,14.4375 C10.1604818,14.547526 10.3382161,14.6025391 10.5244141,14.6025391 C10.7275391,14.6025391 10.8989258,14.5390625 11.0385742,14.4121094 C11.1782227,14.2851563 11.2480469,14.1285807 11.2480469,13.9423828 C11.2480469,13.680013 11.1041667,13.4345703 10.8164062,13.2060547 C10.452474,12.9013672 9.96158854,12.6474609 9.34375,12.4443359 C8.72591146,12.2412109 8.0530599,12.1396484 7.32519531,12.1396484 C6.08105469,12.1396484 4.98079427,12.4316406 4.02441406,13.015625 C3.06803385,13.5996094 2.3359375,14.3867188 1.828125,15.3769531 C1.3203125,16.3671875 1.06640625,17.4716797 1.06640625,18.6904297 C1.06640625,20.5777995 1.60807292,22.1414388 2.69140625,23.3813477 C3.77473958,24.6212565 5.23470052,25.2412109 7.07128906,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M14.2597656,25 C14.5729167,25 14.8458659,24.906901 15.0786133,24.7207031 C15.3113607,24.5345052 15.4277344,24.2721354 15.4277344,23.9335938 L15.4277344,17.3955078 C15.4277344,16.4052734 15.7430013,15.6139323 16.3735352,15.0214844 C17.004069,14.4290365 17.890625,14.1328125 19.0332031,14.1328125 C19.3040365,14.1328125 19.5113932,14.0418294 19.6552734,13.8598633 C19.7991536,13.6778971 19.8710938,13.4599609 19.8710938,13.2060547 C19.8710938,12.9352214 19.792806,12.6982422 19.6362305,12.4951172 C19.4796549,12.2919922 19.2659505,12.1904297 18.9951172,12.1904297 C18.0979818,12.1904297 17.3299154,12.4549154 16.690918,12.9838867 C16.0519206,13.5128581 15.6266276,14.1708984 15.4150391,14.9580078 L15.4277344,13.4345703 C15.4277344,13.1129557 15.3155924,12.8611654 15.0913086,12.6791992 C14.8670247,12.4972331 14.5983073,12.40625 14.2851563,12.40625 C13.9720052,12.40625 13.7011719,12.4972331 13.4726562,12.6791992 C13.2441406,12.8611654 13.1298828,13.1214193 13.1298828,13.4599609 L13.1298828,23.9335938 C13.1298828,24.2721354 13.2399089,24.5345052 13.4599609,24.7207031 C13.680013,24.906901 13.9466146,25 14.2597656,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M26.84375,25.2285156 C28.7988281,25.2285156 30.3476562,24.6149089 31.4902344,23.3876953 C32.6328125,22.1604818 33.2041016,20.5947266 33.2041016,18.6904297 C33.2041016,16.7692057 32.6306966,15.1971029 31.4838867,13.9741211 C30.3370768,12.7511393 28.7903646,12.1396484 26.84375,12.1396484 C24.8971354,12.1396484 23.3504232,12.7532552 22.2036133,13.9804688 C21.0568034,15.2076823 20.4833984,16.7776693 20.4833984,18.6904297 C20.4833984,20.5947266 21.0568034,22.1604818 22.2036133,23.3876953 C23.3504232,24.6149089 24.8971354,25.2285156 26.84375,25.2285156 Z M26.8183594,23.6542969 C25.5488281,23.6542969 24.5564779,23.2078451 23.8413086,22.3149414 C23.1261393,21.4220378 22.7685547,20.2138672 22.7685547,18.6904297 C22.7685547,17.1500651 23.1282552,15.933431 23.8476562,15.0405273 C24.5670573,14.1476237 25.5657552,13.7011719 26.84375,13.7011719 C28.1132813,13.7011719 29.1098633,14.1497396 29.8334961,15.046875 C30.5571289,15.9440104 30.9189453,17.1585286 30.9189453,18.6904297 C30.9189453,20.2307943 30.5613607,21.4431966 29.8461914,22.3276367 C29.1310221,23.2120768 28.1217448,23.6542969 26.8183594,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M34.9589844,23.8574219 C35.4667969,24.2298177 36.1459961,24.5535482 36.996582,24.8286133 C37.847168,25.1036784 38.7252604,25.2412109 39.6308594,25.2412109 C40.6041667,25.2412109 41.4589844,25.1079102 42.1953125,24.8413086 C42.9316406,24.574707 43.5262044,24.1451823 43.9790039,23.5527344 C44.4318034,22.9602865 44.6582031,22.2324219 44.6582031,21.3691406 C44.6582031,20.4550781 44.3852539,19.7039388 43.8393555,19.1157227 C43.293457,18.5275065 42.3815104,18.0768229 41.1035156,17.7636719 L38.9072266,17.2177734 C37.9847005,16.9892578 37.3837891,16.7713216 37.1044922,16.5639648 C36.8251953,16.3566081 36.6855469,15.9990234 36.6855469,15.4912109 C36.6855469,14.8649089 36.9563802,14.4057617 37.4980469,14.1137695 C38.0397135,13.8217773 38.7675781,13.6757813 39.6816406,13.6757813 C39.969401,13.6757813 40.2529297,13.6948242 40.5322266,13.7329102 C40.8115234,13.7709961 41.0865885,13.8260091 41.3574219,13.8979492 C41.6282552,13.9698893 41.8419596,14.03125 41.9985352,14.0820313 C42.1551107,14.1328125 42.3582357,14.2068685 42.6079102,14.3041992 C42.8575846,14.4015299 42.9951172,14.4544271 43.0205078,14.4628906 C43.1559245,14.5136719 43.2871094,14.5390625 43.4140625,14.5390625 C43.6341146,14.5390625 43.8097331,14.4692383 43.940918,14.3295898 C44.0721029,14.1899414 44.1376953,14.0227865 44.1376953,13.828125 C44.1376953,13.4980469 43.9726563,13.2483724 43.6425781,13.0791016 C43.1855469,12.8336589 42.5952148,12.6114909 41.871582,12.4125977 C41.1479492,12.2137044 40.375651,12.1142578 39.5546875,12.1142578 C38.8691406,12.1142578 38.234375,12.1798503 37.6503906,12.3110352 C37.0664062,12.4422201 36.5395508,12.6411133 36.0698242,12.9077148 C35.6000977,13.1743164 35.2298177,13.5361328 34.9589844,13.9931641 C34.688151,14.4501953 34.5527344,14.9791667 34.5527344,15.5800781 C34.5527344,15.9609375 34.5908203,16.2994792 34.6669922,16.5957031 C34.7431641,16.8919271 34.8658854,17.1500651 35.0351562,17.3701172 C35.2044271,17.5901693 35.3863932,17.7784831 35.5810547,17.9350586 C35.7757161,18.0916341 36.0359701,18.2376302 36.3618164,18.3730469 C36.6876628,18.5084635 36.9923503,18.6184896 37.2758789,18.703125 C37.5594076,18.7877604 37.9296875,18.8893229 38.3867188,19.0078125 L40.6337891,19.5664062 C41.3785807,19.7526042 41.9117839,19.9980469 42.2333984,20.3027344 C42.555013,20.6074219 42.7158203,21.0136719 42.7158203,21.5214844 C42.7158203,22.2324219 42.4301758,22.7719727 41.8588867,23.1401367 C41.2875977,23.5083008 40.5364583,23.6923828 39.6054688,23.6923828 C38.3105469,23.6839193 37.0833333,23.319987 35.9238281,22.6005859 C35.7376302,22.4820964 35.5429688,22.4228516 35.3398438,22.4228516 C35.1197917,22.4228516 34.9378255,22.4969076 34.7939453,22.6450195 C34.6500651,22.7931315 34.578125,22.96875 34.578125,23.171875 C34.578125,23.4511719 34.7050781,23.6796875 34.9589844,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M46.7304688,23.8574219 C47.2382812,24.2298177 47.9174805,24.5535482 48.7680664,24.8286133 C49.6186523,25.1036784 50.4967448,25.2412109 51.4023438,25.2412109 C52.375651,25.2412109 53.2304688,25.1079102 53.9667969,24.8413086 C54.703125,24.574707 55.2976888,24.1451823 55.7504883,23.5527344 C56.2032878,22.9602865 56.4296875,22.2324219 56.4296875,21.3691406 C56.4296875,20.4550781 56.1567383,19.7039388 55.6108398,19.1157227 C55.0649414,18.5275065 54.1529948,18.0768229 52.875,17.7636719 L50.6787109,17.2177734 C49.7561849,16.9892578 49.1552734,16.7713216 48.8759766,16.5639648 C48.5966797,16.3566081 48.4570313,15.9990234 48.4570313,15.4912109 C48.4570313,14.8649089 48.7278646,14.4057617 49.2695312,14.1137695 C49.8111979,13.8217773 50.5390625,13.6757813 51.453125,13.6757813 C51.7408854,13.6757813 52.0244141,13.6948242 52.3037109,13.7329102 C52.5830078,13.7709961 52.8580729,13.8260091 53.1289062,13.8979492 C53.3997396,13.9698893 53.613444,14.03125 53.7700195,14.0820313 C53.9265951,14.1328125 54.1297201,14.2068685 54.3793945,14.3041992 C54.629069,14.4015299 54.7666016,14.4544271 54.7919922,14.4628906 C54.9274089,14.5136719 55.0585938,14.5390625 55.1855469,14.5390625 C55.405599,14.5390625 55.5812174,14.4692383 55.7124023,14.3295898 C55.8435872,14.1899414 55.9091797,14.0227865 55.9091797,13.828125 C55.9091797,13.4980469 55.7441406,13.2483724 55.4140625,13.0791016 C54.9570312,12.8336589 54.3666992,12.6114909 53.6430664,12.4125977 C52.9194336,12.2137044 52.1471354,12.1142578 51.3261719,12.1142578 C50.640625,12.1142578 50.0058594,12.1798503 49.421875,12.3110352 C48.8378906,12.4422201 48.3110352,12.6411133 47.8413086,12.9077148 C47.371582,13.1743164 47.0013021,13.5361328 46.7304688,13.9931641 C46.4596354,14.4501953 46.3242188,14.9791667 46.3242188,15.5800781 C46.3242188,15.9609375 46.3623047,16.2994792 46.4384766,16.5957031 C46.5146484,16.8919271 46.6373698,17.1500651 46.8066406,17.3701172 C46.9759115,17.5901693 47.1578776,17.7784831 47.3525391,17.9350586 C47.5472005,18.0916341 47.8074544,18.2376302 48.1333008,18.3730469 C48.4591471,18.5084635 48.7638346,18.6184896 49.0473633,18.703125 C49.3308919,18.7877604 49.7011719,18.8893229 50.1582031,19.0078125 L52.4052734,19.5664062 C53.1500651,19.7526042 53.6832682,19.9980469 54.0048828,20.3027344 C54.3264974,20.6074219 54.4873047,21.0136719 54.4873047,21.5214844 C54.4873047,22.2324219 54.2016602,22.7719727 53.6303711,23.1401367 C53.059082,23.5083008 52.3079427,23.6923828 51.3769531,23.6923828 C50.0820312,23.6839193 48.8548177,23.319987 47.6953125,22.6005859 C47.5091146,22.4820964 47.3144531,22.4228516 47.1113281,22.4228516 C46.891276,22.4228516 46.7093099,22.4969076 46.5654297,22.6450195 C46.4215495,22.7931315 46.3496094,22.96875 46.3496094,23.171875 C46.3496094,23.4511719 46.4765625,23.6796875 46.7304688,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M58.171875,19.1728516 L63.8212891,19.1728516 C64.117513,19.1728516 64.3608398,19.0924479 64.5512695,18.9316406 C64.7416992,18.7708333 64.8369141,18.5634766 64.8369141,18.3095703 C64.8369141,18.038737 64.7438151,17.8208008 64.5576172,17.6557617 C64.3714193,17.4907227 64.1259766,17.4082031 63.8212891,17.4082031 L58.171875,17.4082031 C57.8671875,17.4082031 57.6217448,17.4907227 57.4355469,17.6557617 C57.249349,17.8208008 57.15625,18.038737 57.15625,18.3095703 C57.15625,18.5634766 57.2514648,18.7708333 57.4418945,18.9316406 C57.6323242,19.0924479 57.875651,19.1728516 58.171875,19.1728516 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M67.8232422,17.7382812 C67.8655599,17.2135417 67.9777018,16.7163086 68.159668,16.246582 C68.3416341,15.7768555 68.5870768,15.3494466 68.8959961,14.9643555 C69.2049154,14.5792643 69.6005859,14.2724609 70.0830078,14.0439453 C70.5654297,13.8154297 71.1028646,13.7011719 71.6953125,13.7011719 C72.8378906,13.7011719 73.7054036,14.0777995 74.2978516,14.8310547 C74.8902995,15.5843099 75.2246094,16.5533854 75.3007812,17.7382812 L67.8232422,17.7382812 Z M71.7333984,25.2285156 C73.6292318,25.2285156 75.2457682,24.6276042 76.5830078,23.4257812 C76.819987,23.2141927 76.9384766,22.9729818 76.9384766,22.7021484 C76.9384766,22.4905599 76.8686523,22.3064779 76.7290039,22.1499023 C76.5893555,21.9933268 76.4179688,21.9150391 76.2148438,21.9150391 C76.0455729,21.9150391 75.8847656,21.9742839 75.7324219,22.0927734 C75.1230469,22.5582682 74.5179036,22.9264323 73.9169922,23.1972656 C73.3160807,23.468099 72.6389974,23.6035156 71.8857422,23.6035156 C70.6923828,23.5865885 69.7148438,23.2036133 68.953125,22.4545898 C68.1914062,21.7055664 67.8020833,20.6031901 67.7851562,19.1474609 L76.4941406,19.1474609 C76.764974,19.1474609 76.968099,19.0670573 77.1035156,18.90625 C77.2389323,18.7454427 77.3066406,18.5423177 77.3066406,18.296875 C77.28125,17.4420573 77.1585286,16.6570638 76.9384766,15.9418945 C76.7184245,15.2267253 76.3883464,14.5792643 75.9482422,13.9995117 C75.508138,13.4197591 74.9156901,12.9648438 74.1708984,12.6347656 C73.4261068,12.3046875 72.5585938,12.1396484 71.5683594,12.1396484 C70.3157552,12.1396484 69.2197266,12.4443359 68.2802734,13.0537109 C67.3408203,13.6630859 66.6383464,14.4544271 66.1728516,15.4277344 C65.7073568,16.4010417 65.4746094,17.4759115 65.4746094,18.6523437 C65.4746094,19.9895833 65.7517904,21.159668 66.3061523,22.1625977 C66.8605143,23.1655273 67.6074219,23.9251302 68.546875,24.4414063 C69.4863281,24.9576823 70.5485026,25.2200521 71.7333984,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M80.5976562,25 C80.9108073,25 81.1837565,24.9026693 81.4165039,24.7080078 C81.6492513,24.5133464 81.765625,24.2467448 81.765625,23.9082031 L81.765625,17.5732422 C81.8248698,16.3798828 82.1888021,15.4361979 82.8574219,14.7421875 C83.5260417,14.0481771 84.3470052,13.7011719 85.3203125,13.7011719 C86.234375,13.7011719 86.9431966,13.9847005 87.4467773,14.5517578 C87.9503581,15.1188151 88.2021484,15.9440104 88.2021484,17.0273438 L88.2021484,23.9082031 C88.2021484,24.2552083 88.3142904,24.5239258 88.5385742,24.7143555 C88.7628581,24.9047852 89.0315755,25 89.3447266,25 C89.6578776,25 89.9265951,24.9047852 90.1508789,24.7143555 C90.3751628,24.5239258 90.4873047,24.2552083 90.4873047,23.9082031 L90.4873047,17.0527344 C90.4873047,15.4023437 90.0725911,14.1708984 89.2431641,13.3583984 C88.413737,12.5458984 87.3007812,12.1396484 85.9042969,12.1396484 C84.9309896,12.1396484 84.0782878,12.3470052 83.3461914,12.7617188 C82.6140951,13.1764323 82.0872396,13.7688802 81.765625,14.5390625 L81.765625,13.4345703 C81.765625,13.1044922 81.6534831,12.8484701 81.4291992,12.6665039 C81.2049154,12.4845378 80.9361979,12.3935547 80.6230469,12.3935547 C80.3098958,12.3935547 80.0390625,12.4887695 79.8105469,12.6791992 C79.5820312,12.8696289 79.4677734,13.1341146 79.4677734,13.4726563 L79.4677734,23.9082031 C79.4677734,24.2552083 79.5777995,24.5239258 79.7978516,24.7143555 C80.0179036,24.9047852 80.2845052,25 80.5976562,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M98.3486328,25.2412109 C100.007487,25.2412109 101.293945,24.8730469 102.208008,24.1367188 C102.495768,23.8997396 102.639648,23.6416016 102.639648,23.3623047 C102.639648,23.1676432 102.57194,23.0047201 102.436523,22.8735352 C102.301107,22.7423503 102.136068,22.6767578 101.941406,22.6767578 C101.763672,22.6767578 101.594401,22.7317708 101.433594,22.8417969 C100.654948,23.3834635 99.6858724,23.6542969 98.5263672,23.6542969 C97.8662109,23.6542969 97.2822266,23.5167643 96.7744141,23.2416992 C96.2666016,22.9666341 95.8603516,22.5942383 95.5556641,22.1245117 C95.2509766,21.6547852 95.0224609,21.1321615 94.8701172,20.5566406 C94.7177734,19.9811198 94.6416016,19.3717448 94.6416016,18.7285156 C94.6416016,17.188151 95.0182292,15.9630534 95.7714844,15.0532227 C96.5247396,14.1433919 97.5065104,13.6884766 98.7167969,13.6884766 C99.6477865,13.6884766 100.498372,13.938151 101.268555,14.4375 C101.437826,14.547526 101.61556,14.6025391 101.801758,14.6025391 C102.004883,14.6025391 102.17627,14.5390625 102.315918,14.4121094 C102.455566,14.2851563 102.525391,14.1285807 102.525391,13.9423828 C102.525391,13.680013 102.38151,13.4345703 102.09375,13.2060547 C101.729818,12.9013672 101.238932,12.6474609 100.621094,12.4443359 C100.003255,12.2412109 99.3304036,12.1396484 98.6025391,12.1396484 C97.3583984,12.1396484 96.258138,12.4316406 95.3017578,13.015625 C94.3453776,13.5996094 93.6132812,14.3867188 93.1054688,15.3769531 C92.5976562,16.3671875 92.34375,17.4716797 92.34375,18.6904297 C92.34375,20.5777995 92.8854167,22.1414388 93.96875,23.3813477 C95.0520833,24.6212565 96.5120443,25.2412109 98.3486328,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M109.878906,25.2285156 C111.833984,25.2285156 113.382812,24.6149089 114.525391,23.3876953 C115.667969,22.1604818 116.239258,20.5947266 116.239258,18.6904297 C116.239258,16.7692057 115.665853,15.1971029 114.519043,13.9741211 C113.372233,12.7511393 111.825521,12.1396484 109.878906,12.1396484 C107.932292,12.1396484 106.385579,12.7532552 105.23877,13.9804688 C104.09196,15.2076823 103.518555,16.7776693 103.518555,18.6904297 C103.518555,20.5947266 104.09196,22.1604818 105.23877,23.3876953 C106.385579,24.6149089 107.932292,25.2285156 109.878906,25.2285156 Z M109.853516,23.6542969 C108.583984,23.6542969 107.591634,23.2078451 106.876465,22.3149414 C106.161296,21.4220378 105.803711,20.2138672 105.803711,18.6904297 C105.803711,17.1500651 106.163411,15.933431 106.882812,15.0405273 C107.602214,14.1476237 108.600911,13.7011719 109.878906,13.7011719 C111.148438,13.7011719 112.14502,14.1497396 112.868652,15.046875 C113.592285,15.9440104 113.954102,17.1585286 113.954102,18.6904297 C113.954102,20.2307943 113.596517,21.4431966 112.881348,22.3276367 C112.166178,23.2120768 111.156901,23.6542969 109.853516,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M123.084961,23.6542969 C122.424805,23.6458333 121.8514,23.4871419 121.364746,23.1782227 C120.878092,22.8693034 120.507812,22.4609375 120.253906,21.953125 C120,21.4453125 119.813802,20.918457 119.695312,20.3725586 C119.576823,19.8266602 119.517578,19.257487 119.517578,18.6650391 C119.517578,18.0556641 119.583171,17.4716797 119.714355,16.9130859 C119.84554,16.3544922 120.046549,15.8276367 120.317383,15.3325195 C120.588216,14.8374023 120.971191,14.4417318 121.466309,14.1455078 C121.961426,13.8492839 122.539062,13.7011719 123.199219,13.7011719 C123.867839,13.7011719 124.453939,13.8408203 124.95752,14.1201172 C125.4611,14.3994141 125.858887,14.7760417 126.150879,15.25 C126.442871,15.7239583 126.660807,16.2444661 126.804688,16.8115234 C126.948568,17.3785807 127.020508,17.983724 127.020508,18.6269531 C127.020508,21.9785156 125.708659,23.6542969 123.084961,23.6542969 Z M122.678711,25.2285156 C123.660482,25.2285156 124.530111,25.0232747 125.287598,24.612793 C126.045085,24.2023112 126.626953,23.5908203 127.033203,22.7783203 L127.033203,23.9082031 C127.033203,24.2382813 127.130534,24.4985352 127.325195,24.6889648 C127.519857,24.8793945 127.807617,24.9746094 128.188477,24.9746094 C128.493164,24.9746094 128.759766,24.8793945 128.988281,24.6889648 C129.216797,24.4985352 129.331055,24.2340495 129.331055,23.8955078 L129.331055,7.92480469 C129.331055,7.59472656 129.216797,7.33447266 128.988281,7.14404297 C128.759766,6.95361328 128.493164,6.85839844 128.188477,6.85839844 C127.875326,6.85839844 127.604492,6.95361328 127.375977,7.14404297 C127.147461,7.33447266 127.033203,7.59472656 127.033203,7.92480469 L127.033203,14.5136719 C126.652344,13.7434896 126.100098,13.1552734 125.376465,12.7490234 C124.652832,12.3427734 123.821289,12.1396484 122.881836,12.1396484 C122.001628,12.1396484 121.199707,12.3068034 120.476074,12.6411133 C119.752441,12.9754232 119.155762,13.4366862 118.686035,14.0249023 C118.216309,14.6131185 117.854492,15.2986654 117.600586,16.081543 C117.34668,16.8644206 117.219727,17.7086589 117.219727,18.6142578 C117.219727,19.5029297 117.3361,20.3408203 117.568848,21.1279297 C117.801595,21.9150391 118.138021,22.6153971 118.578125,23.2290039 C119.018229,23.8426107 119.591634,24.3292643 120.29834,24.6889648 C121.005046,25.0486654 121.798503,25.2285156 122.678711,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M133.751953,17.7382812 C133.794271,17.2135417 133.906413,16.7163086 134.088379,16.246582 C134.270345,15.7768555 134.515788,15.3494466 134.824707,14.9643555 C135.133626,14.5792643 135.529297,14.2724609 136.011719,14.0439453 C136.494141,13.8154297 137.031576,13.7011719 137.624023,13.7011719 C138.766602,13.7011719 139.634115,14.0777995 140.226562,14.8310547 C140.81901,15.5843099 141.15332,16.5533854 141.229492,17.7382812 L133.751953,17.7382812 Z M137.662109,25.2285156 C139.557943,25.2285156 141.174479,24.6276042 142.511719,23.4257812 C142.748698,23.2141927 142.867188,22.9729818 142.867188,22.7021484 C142.867188,22.4905599 142.797363,22.3064779 142.657715,22.1499023 C142.518066,21.9933268 142.34668,21.9150391 142.143555,21.9150391 C141.974284,21.9150391 141.813477,21.9742839 141.661133,22.0927734 C141.051758,22.5582682 140.446615,22.9264323 139.845703,23.1972656 C139.244792,23.468099 138.567708,23.6035156 137.814453,23.6035156 C136.621094,23.5865885 135.643555,23.2036133 134.881836,22.4545898 C134.120117,21.7055664 133.730794,20.6031901 133.713867,19.1474609 L142.422852,19.1474609 C142.693685,19.1474609 142.89681,19.0670573 143.032227,18.90625 C143.167643,18.7454427 143.235352,18.5423177 143.235352,18.296875 C143.209961,17.4420573 143.08724,16.6570638 142.867188,15.9418945 C142.647135,15.2267253 142.317057,14.5792643 141.876953,13.9995117 C141.436849,13.4197591 140.844401,12.9648438 140.099609,12.6347656 C139.354818,12.3046875 138.487305,12.1396484 137.49707,12.1396484 C136.244466,12.1396484 135.148438,12.4443359 134.208984,13.0537109 C133.269531,13.6630859 132.567057,14.4544271 132.101562,15.4277344 C131.636068,16.4010417 131.40332,17.4759115 131.40332,18.6523437 C131.40332,19.9895833 131.680501,21.159668 132.234863,22.1625977 C132.789225,23.1655273 133.536133,23.9251302 134.475586,24.4414063 C135.415039,24.9576823 136.477214,25.2200521 137.662109,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M146.551758,25 C146.864909,25 147.137858,24.906901 147.370605,24.7207031 C147.603353,24.5345052 147.719727,24.2721354 147.719727,23.9335938 L147.719727,17.3955078 C147.719727,16.4052734 148.034993,15.6139323 148.665527,15.0214844 C149.296061,14.4290365 150.182617,14.1328125 151.325195,14.1328125 C151.596029,14.1328125 151.803385,14.0418294 151.947266,13.8598633 C152.091146,13.6778971 152.163086,13.4599609 152.163086,13.2060547 C152.163086,12.9352214 152.084798,12.6982422 151.928223,12.4951172 C151.771647,12.2919922 151.557943,12.1904297 151.287109,12.1904297 C150.389974,12.1904297 149.621908,12.4549154 148.98291,12.9838867 C148.343913,13.5128581 147.91862,14.1708984 147.707031,14.9580078 L147.719727,13.4345703 C147.719727,13.1129557 147.607585,12.8611654 147.383301,12.6791992 C147.159017,12.4972331 146.890299,12.40625 146.577148,12.40625 C146.263997,12.40625 145.993164,12.4972331 145.764648,12.6791992 C145.536133,12.8611654 145.421875,13.1214193 145.421875,13.4599609 L145.421875,23.9335938 C145.421875,24.2721354 145.531901,24.5345052 145.751953,24.7207031 C145.972005,24.906901 146.238607,25 146.551758,25 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-6" transform="translate(0, 52)" xlink:href="#path-7" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M8.17578125,23.6542969 C5.55208333,23.6542969 4.24023438,21.9785156 4.24023438,18.6269531 C4.24023438,17.983724 4.31217448,17.3785807 4.45605469,16.8115234 C4.5999349,16.2444661 4.81787109,15.7239583 5.10986328,15.25 C5.40185547,14.7760417 5.79964193,14.3994141 6.30322266,14.1201172 C6.80680339,13.8408203 7.39290365,13.7011719 8.06152344,13.7011719 C8.72167969,13.7011719 9.29931641,13.8492839 9.79443359,14.1455078 C10.2895508,14.4417318 10.672526,14.8374023 10.9433594,15.3325195 C11.2141927,15.8276367 11.4152018,16.3544922 11.5463867,16.9130859 C11.6775716,17.4716797 11.7431641,18.0556641 11.7431641,18.6650391 C11.7431641,19.257487 11.6839193,19.8266602 11.5654297,20.3725586 C11.4469401,20.918457 11.2607422,21.4453125 11.0068359,21.953125 C10.7529297,22.4609375 10.3826497,22.8693034 9.89599609,23.1782227 C9.40934245,23.4871419 8.8359375,23.6458333 8.17578125,23.6542969 Z M8.58203125,25.2285156 C9.46223958,25.2285156 10.2556966,25.0486654 10.9624023,24.6889648 C11.6691081,24.3292643 12.242513,23.8426107 12.6826172,23.2290039 C13.1227214,22.6153971 13.4591471,21.9150391 13.6918945,21.1279297 C13.9246419,20.3408203 14.0410156,19.5029297 14.0410156,18.6142578 C14.0410156,17.7086589 13.9140625,16.8644206 13.6601563,16.081543 C13.40625,15.2986654 13.0444336,14.6131185 12.574707,14.0249023 C12.1049805,13.4366862 11.5083008,12.9754232 10.784668,12.6411133 C10.0610352,12.3068034 9.25911458,12.1396484 8.37890625,12.1396484 C7.43945313,12.1396484 6.60791016,12.3427734 5.88427734,12.7490234 C5.16064453,13.1552734 4.60839844,13.7434896 4.22753906,14.5136719 L4.22753906,7.92480469 C4.22753906,7.59472656 4.11328125,7.33447266 3.88476562,7.14404297 C3.65625,6.95361328 3.38541667,6.85839844 3.07226563,6.85839844 C2.76757812,6.85839844 2.50097656,6.95361328 2.27246094,7.14404297 C2.04394531,7.33447266 1.9296875,7.59472656 1.9296875,7.92480469 L1.9296875,23.8955078 C1.9296875,24.2340495 2.04394531,24.4985352 2.27246094,24.6889648 C2.50097656,24.8793945 2.76757812,24.9746094 3.07226563,24.9746094 C3.453125,24.9746094 3.74088542,24.8793945 3.93554688,24.6889648 C4.13020833,24.4985352 4.22753906,24.2382813 4.22753906,23.9082031 L4.22753906,22.7783203 C4.63378906,23.5908203 5.21565755,24.2023112 5.97314453,24.612793 C6.73063151,25.0232747 7.60026042,25.2285156 8.58203125,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M17.3193359,9.44824219 C17.7679036,9.44824219 18.1276042,9.31917318 18.3984375,9.06103516 C18.6692708,8.80289714 18.8046875,8.46647135 18.8046875,8.05175781 C18.8046875,7.62858073 18.6692708,7.28792318 18.3984375,7.02978516 C18.1276042,6.77164714 17.7721354,6.64257812 17.3320312,6.64257812 C16.8834635,6.64257812 16.5216471,6.77376302 16.246582,7.03613281 C15.9715169,7.2985026 15.8339844,7.63704427 15.8339844,8.05175781 C15.8339844,8.46647135 15.969401,8.80289714 16.2402344,9.06103516 C16.5110677,9.31917318 16.8707682,9.44824219 17.3193359,9.44824219 Z M17.3066406,25 C17.6197917,25 17.8927409,24.8963216 18.1254883,24.6889648 C18.3582357,24.4816081 18.4746094,24.2001953 18.4746094,23.8447266 L18.4746094,13.5234375 C18.4746094,13.1679688 18.3624674,12.8886719 18.1381836,12.6855469 C17.9138997,12.4824219 17.6494141,12.3808594 17.3447266,12.3808594 C17.0315755,12.3808594 16.7586263,12.4824219 16.5258789,12.6855469 C16.2931315,12.8886719 16.1767578,13.1679688 16.1767578,13.5234375 L16.1767578,23.8447266 C16.1767578,24.2171224 16.2867839,24.5027669 16.5068359,24.7016602 C16.726888,24.9005534 16.9934896,25 17.3066406,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M20.8515625,19.1728516 L26.5009766,19.1728516 C26.7972005,19.1728516 27.0405273,19.0924479 27.230957,18.9316406 C27.4213867,18.7708333 27.5166016,18.5634766 27.5166016,18.3095703 C27.5166016,18.038737 27.4235026,17.8208008 27.2373047,17.6557617 C27.0511068,17.4907227 26.8056641,17.4082031 26.5009766,17.4082031 L20.8515625,17.4082031 C20.546875,17.4082031 20.3014323,17.4907227 20.1152344,17.6557617 C19.9290365,17.8208008 19.8359375,18.038737 19.8359375,18.3095703 C19.8359375,18.5634766 19.9311523,18.7708333 20.121582,18.9316406 C20.3120117,19.0924479 20.5553385,19.1728516 20.8515625,19.1728516 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M30.5029297,17.7382812 C30.5452474,17.2135417 30.6573893,16.7163086 30.8393555,16.246582 C31.0213216,15.7768555 31.2667643,15.3494466 31.5756836,14.9643555 C31.8846029,14.5792643 32.2802734,14.2724609 32.7626953,14.0439453 C33.2451172,13.8154297 33.7825521,13.7011719 34.375,13.7011719 C35.5175781,13.7011719 36.3850911,14.0777995 36.9775391,14.8310547 C37.569987,15.5843099 37.9042969,16.5533854 37.9804688,17.7382812 L30.5029297,17.7382812 Z M34.4130859,25.2285156 C36.3089193,25.2285156 37.9254557,24.6276042 39.2626953,23.4257812 C39.4996745,23.2141927 39.6181641,22.9729818 39.6181641,22.7021484 C39.6181641,22.4905599 39.5483398,22.3064779 39.4086914,22.1499023 C39.269043,21.9933268 39.0976562,21.9150391 38.8945312,21.9150391 C38.7252604,21.9150391 38.5644531,21.9742839 38.4121094,22.0927734 C37.8027344,22.5582682 37.1975911,22.9264323 36.5966797,23.1972656 C35.9957682,23.468099 35.3186849,23.6035156 34.5654297,23.6035156 C33.3720703,23.5865885 32.3945312,23.2036133 31.6328125,22.4545898 C30.8710937,21.7055664 30.4817708,20.6031901 30.4648438,19.1474609 L39.1738281,19.1474609 C39.4446615,19.1474609 39.6477865,19.0670573 39.7832031,18.90625 C39.9186198,18.7454427 39.9863281,18.5423177 39.9863281,18.296875 C39.9609375,17.4420573 39.8382161,16.6570638 39.6181641,15.9418945 C39.398112,15.2267253 39.0680339,14.5792643 38.6279297,13.9995117 C38.1878255,13.4197591 37.5953776,12.9648438 36.8505859,12.6347656 C36.1057943,12.3046875 35.2382812,12.1396484 34.2480469,12.1396484 C32.9954427,12.1396484 31.8994141,12.4443359 30.9599609,13.0537109 C30.0205078,13.6630859 29.3180339,14.4544271 28.8525391,15.4277344 C28.3870443,16.4010417 28.1542969,17.4759115 28.1542969,18.6523437 C28.1542969,19.9895833 28.4314779,21.159668 28.9858398,22.1625977 C29.5402018,23.1655273 30.2871094,23.9251302 31.2265625,24.4414063 C32.1660156,24.9576823 33.2281901,25.2200521 34.4130859,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M43.2773438,25 C43.5904948,25 43.863444,24.9026693 44.0961914,24.7080078 C44.3289388,24.5133464 44.4453125,24.2467448 44.4453125,23.9082031 L44.4453125,17.5732422 C44.5045573,16.3798828 44.8684896,15.4361979 45.5371094,14.7421875 C46.2057292,14.0481771 47.0266927,13.7011719 48,13.7011719 C48.9140625,13.7011719 49.6228841,13.9847005 50.1264648,14.5517578 C50.6300456,15.1188151 50.8818359,15.9440104 50.8818359,17.0273438 L50.8818359,23.9082031 C50.8818359,24.2552083 50.9939779,24.5239258 51.2182617,24.7143555 C51.4425456,24.9047852 51.711263,25 52.0244141,25 C52.3375651,25 52.6062826,24.9047852 52.8305664,24.7143555 C53.0548503,24.5239258 53.1669922,24.2552083 53.1669922,23.9082031 L53.1669922,17.0527344 C53.1669922,15.4023437 52.7522786,14.1708984 51.9228516,13.3583984 C51.0934245,12.5458984 49.9804688,12.1396484 48.5839844,12.1396484 C47.6106771,12.1396484 46.7579753,12.3470052 46.0258789,12.7617188 C45.2937826,13.1764323 44.7669271,13.7688802 44.4453125,14.5390625 L44.4453125,13.4345703 C44.4453125,13.1044922 44.3331706,12.8484701 44.1088867,12.6665039 C43.8846029,12.4845378 43.6158854,12.3935547 43.3027344,12.3935547 C42.9895833,12.3935547 42.71875,12.4887695 42.4902344,12.6791992 C42.2617188,12.8696289 42.1474609,13.1341146 42.1474609,13.4726563 L42.1474609,23.9082031 C42.1474609,24.2552083 42.257487,24.5239258 42.4775391,24.7143555 C42.6975911,24.9047852 42.9641927,25 43.2773438,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M61.0283203,25.2412109 C62.6871745,25.2412109 63.9736328,24.8730469 64.8876953,24.1367188 C65.1754557,23.8997396 65.3193359,23.6416016 65.3193359,23.3623047 C65.3193359,23.1676432 65.2516276,23.0047201 65.1162109,22.8735352 C64.9807943,22.7423503 64.8157552,22.6767578 64.6210938,22.6767578 C64.4433594,22.6767578 64.2740885,22.7317708 64.1132812,22.8417969 C63.3346354,23.3834635 62.3655599,23.6542969 61.2060547,23.6542969 C60.5458984,23.6542969 59.9619141,23.5167643 59.4541016,23.2416992 C58.9462891,22.9666341 58.5400391,22.5942383 58.2353516,22.1245117 C57.9306641,21.6547852 57.7021484,21.1321615 57.5498047,20.5566406 C57.3974609,19.9811198 57.3212891,19.3717448 57.3212891,18.7285156 C57.3212891,17.188151 57.6979167,15.9630534 58.4511719,15.0532227 C59.2044271,14.1433919 60.1861979,13.6884766 61.3964844,13.6884766 C62.327474,13.6884766 63.1780599,13.938151 63.9482422,14.4375 C64.117513,14.547526 64.2952474,14.6025391 64.4814453,14.6025391 C64.6845703,14.6025391 64.855957,14.5390625 64.9956055,14.4121094 C65.1352539,14.2851563 65.2050781,14.1285807 65.2050781,13.9423828 C65.2050781,13.680013 65.0611979,13.4345703 64.7734375,13.2060547 C64.4095052,12.9013672 63.9186198,12.6474609 63.3007812,12.4443359 C62.6829427,12.2412109 62.0100911,12.1396484 61.2822266,12.1396484 C60.0380859,12.1396484 58.9378255,12.4316406 57.9814453,13.015625 C57.0250651,13.5996094 56.2929688,14.3867188 55.7851562,15.3769531 C55.2773438,16.3671875 55.0234375,17.4716797 55.0234375,18.6904297 C55.0234375,20.5777995 55.5651042,22.1414388 56.6484375,23.3813477 C57.7317708,24.6212565 59.1917318,25.2412109 61.0283203,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M72.5585938,25.2285156 C74.5136719,25.2285156 76.0625,24.6149089 77.2050781,23.3876953 C78.3476562,22.1604818 78.9189453,20.5947266 78.9189453,18.6904297 C78.9189453,16.7692057 78.3455404,15.1971029 77.1987305,13.9741211 C76.0519206,12.7511393 74.5052083,12.1396484 72.5585938,12.1396484 C70.6119792,12.1396484 69.0652669,12.7532552 67.918457,13.9804688 C66.7716471,15.2076823 66.1982422,16.7776693 66.1982422,18.6904297 C66.1982422,20.5947266 66.7716471,22.1604818 67.918457,23.3876953 C69.0652669,24.6149089 70.6119792,25.2285156 72.5585938,25.2285156 Z M72.5332031,23.6542969 C71.2636719,23.6542969 70.2713216,23.2078451 69.5561523,22.3149414 C68.8409831,21.4220378 68.4833984,20.2138672 68.4833984,18.6904297 C68.4833984,17.1500651 68.843099,15.933431 69.5625,15.0405273 C70.281901,14.1476237 71.280599,13.7011719 72.5585938,13.7011719 C73.828125,13.7011719 74.824707,14.1497396 75.5483398,15.046875 C76.2719727,15.9440104 76.6337891,17.1585286 76.6337891,18.6904297 C76.6337891,20.2307943 76.2762044,21.4431966 75.5610352,22.3276367 C74.8458659,23.2120768 73.8365885,23.6542969 72.5332031,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M85.7646484,23.6542969 C85.1044922,23.6458333 84.5310872,23.4871419 84.0444336,23.1782227 C83.5577799,22.8693034 83.1875,22.4609375 82.9335938,21.953125 C82.6796875,21.4453125 82.4934896,20.918457 82.375,20.3725586 C82.2565104,19.8266602 82.1972656,19.257487 82.1972656,18.6650391 C82.1972656,18.0556641 82.2628581,17.4716797 82.394043,16.9130859 C82.5252279,16.3544922 82.726237,15.8276367 82.9970703,15.3325195 C83.2679036,14.8374023 83.6508789,14.4417318 84.1459961,14.1455078 C84.6411133,13.8492839 85.21875,13.7011719 85.8789062,13.7011719 C86.547526,13.7011719 87.1336263,13.8408203 87.637207,14.1201172 C88.1407878,14.3994141 88.5385742,14.7760417 88.8305664,15.25 C89.1225586,15.7239583 89.3404948,16.2444661 89.484375,16.8115234 C89.6282552,17.3785807 89.7001953,17.983724 89.7001953,18.6269531 C89.7001953,21.9785156 88.3883464,23.6542969 85.7646484,23.6542969 Z M85.3583984,25.2285156 C86.3401693,25.2285156 87.2097982,25.0232747 87.9672852,24.612793 C88.7247721,24.2023112 89.3066406,23.5908203 89.7128906,22.7783203 L89.7128906,23.9082031 C89.7128906,24.2382813 89.8102214,24.4985352 90.0048828,24.6889648 C90.1995443,24.8793945 90.4873047,24.9746094 90.8681641,24.9746094 C91.1728516,24.9746094 91.4394531,24.8793945 91.6679688,24.6889648 C91.8964844,24.4985352 92.0107422,24.2340495 92.0107422,23.8955078 L92.0107422,7.92480469 C92.0107422,7.59472656 91.8964844,7.33447266 91.6679688,7.14404297 C91.4394531,6.95361328 91.1728516,6.85839844 90.8681641,6.85839844 C90.555013,6.85839844 90.2841797,6.95361328 90.0556641,7.14404297 C89.8271484,7.33447266 89.7128906,7.59472656 89.7128906,7.92480469 L89.7128906,14.5136719 C89.3320312,13.7434896 88.7797852,13.1552734 88.0561523,12.7490234 C87.3325195,12.3427734 86.5009766,12.1396484 85.5615234,12.1396484 C84.6813151,12.1396484 83.8793945,12.3068034 83.1557617,12.6411133 C82.4321289,12.9754232 81.8354492,13.4366862 81.3657227,14.0249023 C80.8959961,14.6131185 80.5341797,15.2986654 80.2802734,16.081543 C80.0263672,16.8644206 79.8994141,17.7086589 79.8994141,18.6142578 C79.8994141,19.5029297 80.0157878,20.3408203 80.2485352,21.1279297 C80.4812826,21.9150391 80.8177083,22.6153971 81.2578125,23.2290039 C81.6979167,23.8426107 82.2713216,24.3292643 82.9780273,24.6889648 C83.6847331,25.0486654 84.4781901,25.2285156 85.3583984,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M96.4316406,17.7382812 C96.4739583,17.2135417 96.5861003,16.7163086 96.7680664,16.246582 C96.9500326,15.7768555 97.1954753,15.3494466 97.5043945,14.9643555 C97.8133138,14.5792643 98.2089844,14.2724609 98.6914062,14.0439453 C99.1738281,13.8154297 99.711263,13.7011719 100.303711,13.7011719 C101.446289,13.7011719 102.313802,14.0777995 102.90625,14.8310547 C103.498698,15.5843099 103.833008,16.5533854 103.90918,17.7382812 L96.4316406,17.7382812 Z M100.341797,25.2285156 C102.23763,25.2285156 103.854167,24.6276042 105.191406,23.4257812 C105.428385,23.2141927 105.546875,22.9729818 105.546875,22.7021484 C105.546875,22.4905599 105.477051,22.3064779 105.337402,22.1499023 C105.197754,21.9933268 105.026367,21.9150391 104.823242,21.9150391 C104.653971,21.9150391 104.493164,21.9742839 104.34082,22.0927734 C103.731445,22.5582682 103.126302,22.9264323 102.525391,23.1972656 C101.924479,23.468099 101.247396,23.6035156 100.494141,23.6035156 C99.3007812,23.5865885 98.3232422,23.2036133 97.5615234,22.4545898 C96.7998047,21.7055664 96.4104818,20.6031901 96.3935547,19.1474609 L105.102539,19.1474609 C105.373372,19.1474609 105.576497,19.0670573 105.711914,18.90625 C105.847331,18.7454427 105.915039,18.5423177 105.915039,18.296875 C105.889648,17.4420573 105.766927,16.6570638 105.546875,15.9418945 C105.326823,15.2267253 104.996745,14.5792643 104.556641,13.9995117 C104.116536,13.4197591 103.524089,12.9648438 102.779297,12.6347656 C102.034505,12.3046875 101.166992,12.1396484 100.176758,12.1396484 C98.9241536,12.1396484 97.828125,12.4443359 96.8886719,13.0537109 C95.9492188,13.6630859 95.2467448,14.4544271 94.78125,15.4277344 C94.3157552,16.4010417 94.0830078,17.4759115 94.0830078,18.6523437 C94.0830078,19.9895833 94.3601888,21.159668 94.9145508,22.1625977 C95.4689128,23.1655273 96.2158203,23.9251302 97.1552734,24.4414063 C98.0947266,24.9576823 99.156901,25.2200521 100.341797,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M109.231445,25 C109.544596,25 109.817546,24.906901 110.050293,24.7207031 C110.28304,24.5345052 110.399414,24.2721354 110.399414,23.9335938 L110.399414,17.3955078 C110.399414,16.4052734 110.714681,15.6139323 111.345215,15.0214844 C111.975749,14.4290365 112.862305,14.1328125 114.004883,14.1328125 C114.275716,14.1328125 114.483073,14.0418294 114.626953,13.8598633 C114.770833,13.6778971 114.842773,13.4599609 114.842773,13.2060547 C114.842773,12.9352214 114.764486,12.6982422 114.60791,12.4951172 C114.451335,12.2919922 114.23763,12.1904297 113.966797,12.1904297 C113.069661,12.1904297 112.301595,12.4549154 111.662598,12.9838867 C111.0236,13.5128581 110.598307,14.1708984 110.386719,14.9580078 L110.399414,13.4345703 C110.399414,13.1129557 110.287272,12.8611654 110.062988,12.6791992 C109.838704,12.4972331 109.569987,12.40625 109.256836,12.40625 C108.943685,12.40625 108.672852,12.4972331 108.444336,12.6791992 C108.21582,12.8611654 108.101562,13.1214193 108.101562,13.4599609 L108.101562,23.9335938 C108.101562,24.2721354 108.211589,24.5345052 108.431641,24.7207031 C108.651693,24.906901 108.918294,25 109.231445,25 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-7" transform="translate(0, 92)" xlink:href="#path-8" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.22460937,25 C3.53776042,25 3.81070964,24.8963216 4.04345703,24.6889648 C4.27620443,24.4816081 4.39257812,24.2001953 4.39257812,23.8447266 L4.39257812,8.01367188 C4.39257812,7.65820312 4.2804362,7.37890625 4.05615234,7.17578125 C3.83186849,6.97265625 3.56738281,6.87109375 3.26269531,6.87109375 C2.94954427,6.87109375 2.67659505,6.97265625 2.44384766,7.17578125 C2.21110026,7.37890625 2.09472656,7.65820312 2.09472656,8.01367188 L2.09472656,23.8447266 C2.09472656,24.2171224 2.2047526,24.5027669 2.42480469,24.7016602 C2.64485677,24.9005534 2.91145833,25 3.22460937,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M10.9082031,23.7050781 C10.1210937,23.7050781 9.49902344,23.5252279 9.04199219,23.1655273 C8.58496094,22.8058268 8.35644531,22.2408854 8.35644531,21.4707031 C8.35644531,21.047526 8.41780599,20.7005208 8.54052734,20.4296875 C8.6632487,20.1588542 8.8515625,19.9282227 9.10546875,19.737793 C9.359375,19.5473633 9.74023438,19.4034831 10.2480469,19.3061523 C10.7558594,19.2088216 11.3398437,19.1411133 12,19.1030273 C12.6601562,19.0649414 13.514974,19.0458984 14.5644531,19.0458984 L14.5644531,19.4267578 C14.5644531,20.6708984 14.1984049,21.694987 13.4663086,22.4990234 C12.7342122,23.3030599 11.8815104,23.7050781 10.9082031,23.7050781 Z M10.6669922,25.2285156 C12.4866536,25.2285156 13.7900391,24.4033203 14.5771484,22.7529297 L14.5771484,23.9716797 C14.5771484,24.3017578 14.6850586,24.5577799 14.9008789,24.7397461 C15.1166992,24.9217122 15.3769531,25.0126953 15.6816406,25.0126953 C15.9863281,25.0126953 16.2550456,24.9174805 16.487793,24.7270508 C16.7205404,24.5366211 16.8369141,24.2763672 16.8369141,23.9462891 L16.8369141,16.5449219 C16.8369141,15.046875 16.3989258,13.938151 15.5229492,13.21875 C14.6469727,12.499349 13.4345703,12.1396484 11.8857422,12.1396484 C10.0745443,12.1396484 8.51302083,12.5078125 7.20117188,13.2441406 C6.96419271,13.3795573 6.84570312,13.5742187 6.84570312,13.828125 C6.84570312,14.0481771 6.92610677,14.2491862 7.08691406,14.4311523 C7.24772135,14.6131185 7.43815104,14.7041016 7.65820312,14.7041016 C7.77669271,14.7041016 7.87825521,14.6829427 7.96289062,14.640625 C8.39453125,14.4459635 8.75846354,14.2936198 9.0546875,14.1835937 C9.35091146,14.0735677 9.75716146,13.9635417 10.2734375,13.8535156 C10.7897135,13.7434896 11.3059896,13.6884766 11.8222656,13.6884766 C12.6855469,13.6884766 13.3583984,13.9042969 13.8408203,14.3359375 C14.3232422,14.7675781 14.5644531,15.4361979 14.5644531,16.3417969 L14.5644531,17.7255859 C13.625,17.7255859 12.8230794,17.7382812 12.1586914,17.7636719 C11.4943034,17.7890625 10.8489583,17.8334961 10.2226562,17.8969727 C9.59635417,17.9604492 9.08007812,18.0535482 8.67382812,18.1762695 C8.26757813,18.2989909 7.89518229,18.4555664 7.55664063,18.6459961 C7.21809896,18.8364258 6.95572917,19.0691732 6.76953125,19.3442383 C6.58333333,19.6193034 6.43945312,19.9388021 6.33789062,20.3027344 C6.23632812,20.6666667 6.18554688,21.0898438 6.18554688,21.5722656 C6.18554688,22.7402344 6.60026042,23.6416016 7.4296875,24.2763672 C8.25911458,24.9111328 9.33821615,25.2285156 10.6669922,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M23.6572266,25.1777344 L24.40625,25.1777344 C24.7109375,25.1777344 24.9584961,25.1692708 25.1489258,25.1523438 C25.3393555,25.1354167 25.5297852,25.0973307 25.7202148,25.0380859 C25.9106445,24.9788411 26.0524089,24.8836263 26.1455078,24.7524414 C26.2386068,24.6212565 26.2851562,24.4498698 26.2851562,24.2382813 C26.2851562,24.0013021 26.2047526,23.7939453 26.0439453,23.6162109 C25.883138,23.4384766 25.6630859,23.3496094 25.3837891,23.3496094 L25.3076172,23.3496094 L24.4443359,23.3876953 L24.3046875,23.3876953 C23.7714844,23.3876953 23.3821615,23.1951497 23.1367188,22.8100586 C22.891276,22.4249674 22.7685547,21.7965495 22.7685547,20.9248047 L22.7685547,14.0058594 L25.1044922,14.0058594 C25.6630859,14.0058594 25.9423828,13.7646484 25.9423828,13.2822266 C25.9423828,13.0537109 25.8725586,12.867513 25.7329102,12.7236328 C25.5932617,12.5797526 25.3837891,12.5078125 25.1044922,12.5078125 L22.7685547,12.5078125 L22.7685547,8.82617188 C22.7685547,8.52994792 22.6839193,8.30354818 22.5146484,8.14697266 C22.3453776,7.99039714 22.1337891,7.91210938 21.8798828,7.91210938 C21.6090495,7.91210938 21.3614909,8.0094401 21.137207,8.20410156 C20.9129232,8.39876302 20.7923177,8.63151042 20.7753906,8.90234375 L20.4707031,12.5078125 L18.9091797,12.5078125 C18.6298828,12.5078125 18.4182943,12.5776367 18.2744141,12.7172852 C18.1305339,12.8569336 18.0585938,13.0367839 18.0585938,13.2568359 C18.0585938,13.4853516 18.1326497,13.6673177 18.2807617,13.8027344 C18.4288737,13.938151 18.6425781,14.0058594 18.921875,14.0058594 L20.4707031,14.0058594 L20.4707031,21.3183594 C20.4707031,22.6302083 20.7584635,23.6013997 21.3339844,24.2319336 C21.9095052,24.8624674 22.6839193,25.1777344 23.6572266,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M29.5126953,17.7382812 C29.555013,17.2135417 29.6671549,16.7163086 29.8491211,16.246582 C30.0310872,15.7768555 30.2765299,15.3494466 30.5854492,14.9643555 C30.8943685,14.5792643 31.2900391,14.2724609 31.7724609,14.0439453 C32.2548828,13.8154297 32.7923177,13.7011719 33.3847656,13.7011719 C34.5273438,13.7011719 35.3948568,14.0777995 35.9873047,14.8310547 C36.5797526,15.5843099 36.9140625,16.5533854 36.9902344,17.7382812 L29.5126953,17.7382812 Z M33.4228516,25.2285156 C35.3186849,25.2285156 36.9352214,24.6276042 38.2724609,23.4257812 C38.5094401,23.2141927 38.6279297,22.9729818 38.6279297,22.7021484 C38.6279297,22.4905599 38.5581055,22.3064779 38.418457,22.1499023 C38.2788086,21.9933268 38.1074219,21.9150391 37.9042969,21.9150391 C37.735026,21.9150391 37.5742188,21.9742839 37.421875,22.0927734 C36.8125,22.5582682 36.2073568,22.9264323 35.6064453,23.1972656 C35.0055339,23.468099 34.3284505,23.6035156 33.5751953,23.6035156 C32.3818359,23.5865885 31.4042969,23.2036133 30.6425781,22.4545898 C29.8808594,21.7055664 29.4915365,20.6031901 29.4746094,19.1474609 L38.1835938,19.1474609 C38.4544271,19.1474609 38.6575521,19.0670573 38.7929688,18.90625 C38.9283854,18.7454427 38.9960938,18.5423177 38.9960938,18.296875 C38.9707031,17.4420573 38.8479818,16.6570638 38.6279297,15.9418945 C38.4078776,15.2267253 38.0777995,14.5792643 37.6376953,13.9995117 C37.1975911,13.4197591 36.6051432,12.9648438 35.8603516,12.6347656 C35.1155599,12.3046875 34.2480469,12.1396484 33.2578125,12.1396484 C32.0052083,12.1396484 30.9091797,12.4443359 29.9697266,13.0537109 C29.0302734,13.6630859 28.3277995,14.4544271 27.8623047,15.4277344 C27.3968099,16.4010417 27.1640625,17.4759115 27.1640625,18.6523437 C27.1640625,19.9895833 27.4412435,21.159668 27.9956055,22.1625977 C28.5499674,23.1655273 29.296875,23.9251302 30.2363281,24.4414063 C31.1757812,24.9576823 32.2379557,25.2200521 33.4228516,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M40.7382812,19.1728516 L46.3876953,19.1728516 C46.6839193,19.1728516 46.9272461,19.0924479 47.1176758,18.9316406 C47.3081055,18.7708333 47.4033203,18.5634766 47.4033203,18.3095703 C47.4033203,18.038737 47.3102214,17.8208008 47.1240234,17.6557617 C46.9378255,17.4907227 46.6923828,17.4082031 46.3876953,17.4082031 L40.7382812,17.4082031 C40.4335938,17.4082031 40.188151,17.4907227 40.0019531,17.6557617 C39.8157552,17.8208008 39.7226562,18.038737 39.7226562,18.3095703 C39.7226562,18.5634766 39.8178711,18.7708333 40.0083008,18.9316406 C40.1987305,19.0924479 40.4420573,19.1728516 40.7382812,19.1728516 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M50.1103516,9.44824219 C50.5589193,9.44824219 50.9186198,9.31917318 51.1894531,9.06103516 C51.4602865,8.80289714 51.5957031,8.46647135 51.5957031,8.05175781 C51.5957031,7.62858073 51.4602865,7.28792318 51.1894531,7.02978516 C50.9186198,6.77164714 50.563151,6.64257812 50.1230469,6.64257812 C49.6744792,6.64257812 49.3126628,6.77376302 49.0375977,7.03613281 C48.7625326,7.2985026 48.625,7.63704427 48.625,8.05175781 C48.625,8.46647135 48.7604167,8.80289714 49.03125,9.06103516 C49.3020833,9.31917318 49.6617839,9.44824219 50.1103516,9.44824219 Z M50.0976562,25 C50.4108073,25 50.6837565,24.8963216 50.9165039,24.6889648 C51.1492513,24.4816081 51.265625,24.2001953 51.265625,23.8447266 L51.265625,13.5234375 C51.265625,13.1679688 51.1534831,12.8886719 50.9291992,12.6855469 C50.7049154,12.4824219 50.4404297,12.3808594 50.1357422,12.3808594 C49.8225911,12.3808594 49.5496419,12.4824219 49.3168945,12.6855469 C49.0841471,12.8886719 48.9677734,13.1679688 48.9677734,13.5234375 L48.9677734,23.8447266 C48.9677734,24.2171224 49.0777995,24.5027669 49.2978516,24.7016602 C49.5179036,24.9005534 49.7845052,25 50.0976562,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.1914063,25 C55.5045573,25 55.7775065,24.9026693 56.0102539,24.7080078 C56.2430013,24.5133464 56.359375,24.2467448 56.359375,23.9082031 L56.359375,17.5732422 C56.4186198,16.3798828 56.7825521,15.4361979 57.4511719,14.7421875 C58.1197917,14.0481771 58.9407552,13.7011719 59.9140625,13.7011719 C60.828125,13.7011719 61.5369466,13.9847005 62.0405273,14.5517578 C62.5441081,15.1188151 62.7958984,15.9440104 62.7958984,17.0273438 L62.7958984,23.9082031 C62.7958984,24.2552083 62.9080404,24.5239258 63.1323242,24.7143555 C63.3566081,24.9047852 63.6253255,25 63.9384766,25 C64.2516276,25 64.5203451,24.9047852 64.7446289,24.7143555 C64.9689128,24.5239258 65.0810547,24.2552083 65.0810547,23.9082031 L65.0810547,17.0527344 C65.0810547,15.4023437 64.6663411,14.1708984 63.8369141,13.3583984 C63.007487,12.5458984 61.8945313,12.1396484 60.4980469,12.1396484 C59.5247396,12.1396484 58.6720378,12.3470052 57.9399414,12.7617188 C57.2078451,13.1764323 56.6809896,13.7688802 56.359375,14.5390625 L56.359375,13.4345703 C56.359375,13.1044922 56.2472331,12.8484701 56.0229492,12.6665039 C55.7986654,12.4845378 55.5299479,12.3935547 55.2167969,12.3935547 C54.9036458,12.3935547 54.6328125,12.4887695 54.4042969,12.6791992 C54.1757812,12.8696289 54.0615234,13.1341146 54.0615234,13.4726563 L54.0615234,23.9082031 C54.0615234,24.2552083 54.1715495,24.5239258 54.3916016,24.7143555 C54.6116536,24.9047852 54.8782552,25 55.1914063,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M71.9013672,25.1777344 L72.6503906,25.1777344 C72.9550781,25.1777344 73.2026367,25.1692708 73.3930664,25.1523438 C73.5834961,25.1354167 73.7739258,25.0973307 73.9643555,25.0380859 C74.1547852,24.9788411 74.2965495,24.8836263 74.3896484,24.7524414 C74.4827474,24.6212565 74.5292969,24.4498698 74.5292969,24.2382813 C74.5292969,24.0013021 74.4488932,23.7939453 74.2880859,23.6162109 C74.1272786,23.4384766 73.9072266,23.3496094 73.6279297,23.3496094 L73.5517578,23.3496094 L72.6884766,23.3876953 L72.5488281,23.3876953 C72.015625,23.3876953 71.6263021,23.1951497 71.3808594,22.8100586 C71.1354167,22.4249674 71.0126953,21.7965495 71.0126953,20.9248047 L71.0126953,14.0058594 L73.3486328,14.0058594 C73.9072266,14.0058594 74.1865234,13.7646484 74.1865234,13.2822266 C74.1865234,13.0537109 74.1166992,12.867513 73.9770508,12.7236328 C73.8374023,12.5797526 73.6279297,12.5078125 73.3486328,12.5078125 L71.0126953,12.5078125 L71.0126953,8.82617188 C71.0126953,8.52994792 70.9280599,8.30354818 70.7587891,8.14697266 C70.5895182,7.99039714 70.3779297,7.91210938 70.1240234,7.91210938 C69.8531901,7.91210938 69.6056315,8.0094401 69.3813477,8.20410156 C69.1570638,8.39876302 69.0364583,8.63151042 69.0195312,8.90234375 L68.7148438,12.5078125 L67.1533203,12.5078125 C66.8740234,12.5078125 66.6624349,12.5776367 66.5185547,12.7172852 C66.3746745,12.8569336 66.3027344,13.0367839 66.3027344,13.2568359 C66.3027344,13.4853516 66.3767904,13.6673177 66.5249023,13.8027344 C66.6730143,13.938151 66.8867188,14.0058594 67.1660156,14.0058594 L68.7148438,14.0058594 L68.7148438,21.3183594 C68.7148438,22.6302083 69.0026042,23.6013997 69.578125,24.2319336 C70.1536458,24.8624674 70.9280599,25.1777344 71.9013672,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M77.7568359,17.7382812 C77.7991536,17.2135417 77.9112956,16.7163086 78.0932617,16.246582 C78.2752279,15.7768555 78.5206706,15.3494466 78.8295898,14.9643555 C79.1385091,14.5792643 79.5341797,14.2724609 80.0166016,14.0439453 C80.4990234,13.8154297 81.0364583,13.7011719 81.6289062,13.7011719 C82.7714844,13.7011719 83.6389974,14.0777995 84.2314453,14.8310547 C84.8238932,15.5843099 85.1582031,16.5533854 85.234375,17.7382812 L77.7568359,17.7382812 Z M81.6669922,25.2285156 C83.5628255,25.2285156 85.179362,24.6276042 86.5166016,23.4257812 C86.7535807,23.2141927 86.8720703,22.9729818 86.8720703,22.7021484 C86.8720703,22.4905599 86.8022461,22.3064779 86.6625977,22.1499023 C86.5229492,21.9933268 86.3515625,21.9150391 86.1484375,21.9150391 C85.9791667,21.9150391 85.8183594,21.9742839 85.6660156,22.0927734 C85.0566406,22.5582682 84.4514974,22.9264323 83.8505859,23.1972656 C83.2496745,23.468099 82.5725911,23.6035156 81.8193359,23.6035156 C80.6259766,23.5865885 79.6484375,23.2036133 78.8867188,22.4545898 C78.125,21.7055664 77.7356771,20.6031901 77.71875,19.1474609 L86.4277344,19.1474609 C86.6985677,19.1474609 86.9016927,19.0670573 87.0371094,18.90625 C87.172526,18.7454427 87.2402344,18.5423177 87.2402344,18.296875 C87.2148438,17.4420573 87.0921224,16.6570638 86.8720703,15.9418945 C86.6520182,15.2267253 86.3219401,14.5792643 85.8818359,13.9995117 C85.4417318,13.4197591 84.8492839,12.9648438 84.1044922,12.6347656 C83.3597005,12.3046875 82.4921875,12.1396484 81.5019531,12.1396484 C80.249349,12.1396484 79.1533203,12.4443359 78.2138672,13.0537109 C77.2744141,13.6630859 76.5719401,14.4544271 76.1064453,15.4277344 C75.6409505,16.4010417 75.4082031,17.4759115 75.4082031,18.6523437 C75.4082031,19.9895833 75.6853841,21.159668 76.2397461,22.1625977 C76.7941081,23.1655273 77.5410156,23.9251302 78.4804688,24.4414063 C79.4199219,24.9576823 80.4820964,25.2200521 81.6669922,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M90.5566406,25 C90.8697917,25 91.1427409,24.906901 91.3754883,24.7207031 C91.6082357,24.5345052 91.7246094,24.2721354 91.7246094,23.9335938 L91.7246094,17.3955078 C91.7246094,16.4052734 92.0398763,15.6139323 92.6704102,15.0214844 C93.300944,14.4290365 94.1875,14.1328125 95.3300781,14.1328125 C95.6009115,14.1328125 95.8082682,14.0418294 95.9521484,13.8598633 C96.0960286,13.6778971 96.1679688,13.4599609 96.1679688,13.2060547 C96.1679688,12.9352214 96.089681,12.6982422 95.9331055,12.4951172 C95.7765299,12.2919922 95.5628255,12.1904297 95.2919922,12.1904297 C94.3948568,12.1904297 93.6267904,12.4549154 92.987793,12.9838867 C92.3487956,13.5128581 91.9235026,14.1708984 91.7119141,14.9580078 L91.7246094,13.4345703 C91.7246094,13.1129557 91.6124674,12.8611654 91.3881836,12.6791992 C91.1638997,12.4972331 90.8951823,12.40625 90.5820312,12.40625 C90.2688802,12.40625 89.9980469,12.4972331 89.7695312,12.6791992 C89.5410156,12.8611654 89.4267578,13.1214193 89.4267578,13.4599609 L89.4267578,23.9335938 C89.4267578,24.2721354 89.5367839,24.5345052 89.7568359,24.7207031 C89.976888,24.906901 90.2434896,25 90.5566406,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M101.299805,23.7050781 C100.512695,23.7050781 99.890625,23.5252279 99.4335938,23.1655273 C98.9765625,22.8058268 98.7480469,22.2408854 98.7480469,21.4707031 C98.7480469,21.047526 98.8094076,20.7005208 98.9321289,20.4296875 C99.0548503,20.1588542 99.2431641,19.9282227 99.4970703,19.737793 C99.7509766,19.5473633 100.131836,19.4034831 100.639648,19.3061523 C101.147461,19.2088216 101.731445,19.1411133 102.391602,19.1030273 C103.051758,19.0649414 103.906576,19.0458984 104.956055,19.0458984 L104.956055,19.4267578 C104.956055,20.6708984 104.590007,21.694987 103.85791,22.4990234 C103.125814,23.3030599 102.273112,23.7050781 101.299805,23.7050781 Z M101.058594,25.2285156 C102.878255,25.2285156 104.181641,24.4033203 104.96875,22.7529297 L104.96875,23.9716797 C104.96875,24.3017578 105.07666,24.5577799 105.29248,24.7397461 C105.508301,24.9217122 105.768555,25.0126953 106.073242,25.0126953 C106.37793,25.0126953 106.646647,24.9174805 106.879395,24.7270508 C107.112142,24.5366211 107.228516,24.2763672 107.228516,23.9462891 L107.228516,16.5449219 C107.228516,15.046875 106.790527,13.938151 105.914551,13.21875 C105.038574,12.499349 103.826172,12.1396484 102.277344,12.1396484 C100.466146,12.1396484 98.9046224,12.5078125 97.5927734,13.2441406 C97.3557943,13.3795573 97.2373047,13.5742187 97.2373047,13.828125 C97.2373047,14.0481771 97.3177083,14.2491862 97.4785156,14.4311523 C97.6393229,14.6131185 97.8297526,14.7041016 98.0498047,14.7041016 C98.1682943,14.7041016 98.2698568,14.6829427 98.3544922,14.640625 C98.7861328,14.4459635 99.1500651,14.2936198 99.4462891,14.1835937 C99.742513,14.0735677 100.148763,13.9635417 100.665039,13.8535156 C101.181315,13.7434896 101.697591,13.6884766 102.213867,13.6884766 C103.077148,13.6884766 103.75,13.9042969 104.232422,14.3359375 C104.714844,14.7675781 104.956055,15.4361979 104.956055,16.3417969 L104.956055,17.7255859 C104.016602,17.7255859 103.214681,17.7382812 102.550293,17.7636719 C101.885905,17.7890625 101.24056,17.8334961 100.614258,17.8969727 C99.9879557,17.9604492 99.4716797,18.0535482 99.0654297,18.1762695 C98.6591797,18.2989909 98.2867839,18.4555664 97.9482422,18.6459961 C97.6097005,18.8364258 97.3473307,19.0691732 97.1611328,19.3442383 C96.9749349,19.6193034 96.8310547,19.9388021 96.7294922,20.3027344 C96.6279297,20.6666667 96.5771484,21.0898438 96.5771484,21.5722656 C96.5771484,22.7402344 96.991862,23.6416016 97.8212891,24.2763672 C98.6507161,24.9111328 99.7298177,25.2285156 101.058594,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M115.089844,25.2412109 C116.748698,25.2412109 118.035156,24.8730469 118.949219,24.1367188 C119.236979,23.8997396 119.380859,23.6416016 119.380859,23.3623047 C119.380859,23.1676432 119.313151,23.0047201 119.177734,22.8735352 C119.042318,22.7423503 118.877279,22.6767578 118.682617,22.6767578 C118.504883,22.6767578 118.335612,22.7317708 118.174805,22.8417969 C117.396159,23.3834635 116.427083,23.6542969 115.267578,23.6542969 C114.607422,23.6542969 114.023438,23.5167643 113.515625,23.2416992 C113.007812,22.9666341 112.601562,22.5942383 112.296875,22.1245117 C111.992188,21.6547852 111.763672,21.1321615 111.611328,20.5566406 C111.458984,19.9811198 111.382812,19.3717448 111.382812,18.7285156 C111.382812,17.188151 111.75944,15.9630534 112.512695,15.0532227 C113.265951,14.1433919 114.247721,13.6884766 115.458008,13.6884766 C116.388997,13.6884766 117.239583,13.938151 118.009766,14.4375 C118.179036,14.547526 118.356771,14.6025391 118.542969,14.6025391 C118.746094,14.6025391 118.91748,14.5390625 119.057129,14.4121094 C119.196777,14.2851563 119.266602,14.1285807 119.266602,13.9423828 C119.266602,13.680013 119.122721,13.4345703 118.834961,13.2060547 C118.471029,12.9013672 117.980143,12.6474609 117.362305,12.4443359 C116.744466,12.2412109 116.071615,12.1396484 115.34375,12.1396484 C114.099609,12.1396484 112.999349,12.4316406 112.042969,13.015625 C111.086589,13.5996094 110.354492,14.3867188 109.84668,15.3769531 C109.338867,16.3671875 109.084961,17.4716797 109.084961,18.6904297 C109.084961,20.5777995 109.626628,22.1414388 110.709961,23.3813477 C111.793294,24.6212565 113.253255,25.2412109 115.089844,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M125.223633,25.1777344 L125.972656,25.1777344 C126.277344,25.1777344 126.524902,25.1692708 126.715332,25.1523438 C126.905762,25.1354167 127.096191,25.0973307 127.286621,25.0380859 C127.477051,24.9788411 127.618815,24.8836263 127.711914,24.7524414 C127.805013,24.6212565 127.851562,24.4498698 127.851562,24.2382813 C127.851562,24.0013021 127.771159,23.7939453 127.610352,23.6162109 C127.449544,23.4384766 127.229492,23.3496094 126.950195,23.3496094 L126.874023,23.3496094 L126.010742,23.3876953 L125.871094,23.3876953 C125.337891,23.3876953 124.948568,23.1951497 124.703125,22.8100586 C124.457682,22.4249674 124.334961,21.7965495 124.334961,20.9248047 L124.334961,14.0058594 L126.670898,14.0058594 C127.229492,14.0058594 127.508789,13.7646484 127.508789,13.2822266 C127.508789,13.0537109 127.438965,12.867513 127.299316,12.7236328 C127.159668,12.5797526 126.950195,12.5078125 126.670898,12.5078125 L124.334961,12.5078125 L124.334961,8.82617188 C124.334961,8.52994792 124.250326,8.30354818 124.081055,8.14697266 C123.911784,7.99039714 123.700195,7.91210938 123.446289,7.91210938 C123.175456,7.91210938 122.927897,8.0094401 122.703613,8.20410156 C122.479329,8.39876302 122.358724,8.63151042 122.341797,8.90234375 L122.037109,12.5078125 L120.475586,12.5078125 C120.196289,12.5078125 119.984701,12.5776367 119.84082,12.7172852 C119.69694,12.8569336 119.625,13.0367839 119.625,13.2568359 C119.625,13.4853516 119.699056,13.6673177 119.847168,13.8027344 C119.99528,13.938151 120.208984,14.0058594 120.488281,14.0058594 L122.037109,14.0058594 L122.037109,21.3183594 C122.037109,22.6302083 122.32487,23.6013997 122.900391,24.2319336 C123.475911,24.8624674 124.250326,25.1777344 125.223633,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M130.799805,9.44824219 C131.248372,9.44824219 131.608073,9.31917318 131.878906,9.06103516 C132.14974,8.80289714 132.285156,8.46647135 132.285156,8.05175781 C132.285156,7.62858073 132.14974,7.28792318 131.878906,7.02978516 C131.608073,6.77164714 131.252604,6.64257812 130.8125,6.64257812 C130.363932,6.64257812 130.002116,6.77376302 129.727051,7.03613281 C129.451986,7.2985026 129.314453,7.63704427 129.314453,8.05175781 C129.314453,8.46647135 129.44987,8.80289714 129.720703,9.06103516 C129.991536,9.31917318 130.351237,9.44824219 130.799805,9.44824219 Z M130.787109,25 C131.10026,25 131.37321,24.8963216 131.605957,24.6889648 C131.838704,24.4816081 131.955078,24.2001953 131.955078,23.8447266 L131.955078,13.5234375 C131.955078,13.1679688 131.842936,12.8886719 131.618652,12.6855469 C131.394368,12.4824219 131.129883,12.3808594 130.825195,12.3808594 C130.512044,12.3808594 130.239095,12.4824219 130.006348,12.6855469 C129.7736,12.8886719 129.657227,13.1679688 129.657227,13.5234375 L129.657227,23.8447266 C129.657227,24.2171224 129.767253,24.5027669 129.987305,24.7016602 C130.207357,24.9005534 130.473958,25 130.787109,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M140.248047,25.2285156 C142.203125,25.2285156 143.751953,24.6149089 144.894531,23.3876953 C146.037109,22.1604818 146.608398,20.5947266 146.608398,18.6904297 C146.608398,16.7692057 146.034993,15.1971029 144.888184,13.9741211 C143.741374,12.7511393 142.194661,12.1396484 140.248047,12.1396484 C138.301432,12.1396484 136.75472,12.7532552 135.60791,13.9804688 C134.4611,15.2076823 133.887695,16.7776693 133.887695,18.6904297 C133.887695,20.5947266 134.4611,22.1604818 135.60791,23.3876953 C136.75472,24.6149089 138.301432,25.2285156 140.248047,25.2285156 Z M140.222656,23.6542969 C138.953125,23.6542969 137.960775,23.2078451 137.245605,22.3149414 C136.530436,21.4220378 136.172852,20.2138672 136.172852,18.6904297 C136.172852,17.1500651 136.532552,15.933431 137.251953,15.0405273 C137.971354,14.1476237 138.970052,13.7011719 140.248047,13.7011719 C141.517578,13.7011719 142.51416,14.1497396 143.237793,15.046875 C143.961426,15.9440104 144.323242,17.1585286 144.323242,18.6904297 C144.323242,20.2307943 143.965658,21.4431966 143.250488,22.3276367 C142.535319,23.2120768 141.526042,23.6542969 140.222656,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M149.582031,25 C149.895182,25 150.168132,24.9026693 150.400879,24.7080078 C150.633626,24.5133464 150.75,24.2467448 150.75,23.9082031 L150.75,17.5732422 C150.809245,16.3798828 151.173177,15.4361979 151.841797,14.7421875 C152.510417,14.0481771 153.33138,13.7011719 154.304688,13.7011719 C155.21875,13.7011719 155.927572,13.9847005 156.431152,14.5517578 C156.934733,15.1188151 157.186523,15.9440104 157.186523,17.0273438 L157.186523,23.9082031 C157.186523,24.2552083 157.298665,24.5239258 157.522949,24.7143555 C157.747233,24.9047852 158.015951,25 158.329102,25 C158.642253,25 158.91097,24.9047852 159.135254,24.7143555 C159.359538,24.5239258 159.47168,24.2552083 159.47168,23.9082031 L159.47168,17.0527344 C159.47168,15.4023437 159.056966,14.1708984 158.227539,13.3583984 C157.398112,12.5458984 156.285156,12.1396484 154.888672,12.1396484 C153.915365,12.1396484 153.062663,12.3470052 152.330566,12.7617188 C151.59847,13.1764323 151.071615,13.7688802 150.75,14.5390625 L150.75,13.4345703 C150.75,13.1044922 150.637858,12.8484701 150.413574,12.6665039 C150.18929,12.4845378 149.920573,12.3935547 149.607422,12.3935547 C149.294271,12.3935547 149.023438,12.4887695 148.794922,12.6791992 C148.566406,12.8696289 148.452148,13.1341146 148.452148,13.4726563 L148.452148,23.9082031 C148.452148,24.2552083 148.562174,24.5239258 148.782227,24.7143555 C149.002279,24.9047852 149.26888,25 149.582031,25 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="List-@Pooling-Tasks" stroke-width="1" transform="translate(258, 40)">
+            <g id="header-@2_standard" xlink:href="#path-9" fill="#9172E2">
+                <g id="bg">
+                    <path d="M20,0 L170,0 C181.045695,-3.55271368e-15 190,8.954305 190,20 L190,52 L190,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1"></path>
+                    <path d="M170,0 C181.045695,0 190,8.954305 190,20 L190,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L170,0 Z M170,1.6 L20,1.6 L18.118707,1.69499719 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.4 L188.4,50.4 L188.4,20 C188.4,9.8379606 180.162039,1.6 170,1.6 Z" id="header-bg" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)">
+                    <path d="M21.9277344,22.0117188 C22.3886719,22.0117188 22.7851562,21.8515625 23.1171875,21.53125 C23.4492188,21.2109375 23.6152344,20.7890625 23.6152344,20.265625 L23.6152344,15.2851563 L26.8847656,15.2851563 C31.1582031,15.2851563 33.2949219,13.5820313 33.2949219,10.1757813 C33.2949219,9.24609375 33.1503906,8.44726562 32.8613281,7.77929687 C32.5722656,7.11132813 32.1484375,6.58398438 31.5898438,6.19726562 C31.03125,5.81054688 30.3867188,5.52929688 29.65625,5.35351562 C28.9257813,5.17773438 28.0761719,5.08984375 27.1074219,5.08984375 L22.0683594,5.08984375 C21.4902344,5.08984375 21.0410156,5.28515625 20.7207031,5.67578125 C20.4003906,6.06640625 20.2402344,6.55859375 20.2402344,7.15234375 L20.2402344,20.265625 C20.2402344,20.7890625 20.4082031,21.2109375 20.7441406,21.53125 C21.0800781,21.8515625 21.4746094,22.0117188 21.9277344,22.0117188 Z M23.6152344,12.8476562 L23.6152344,7.65625 L26.7441406,7.65625 C27.2910156,7.65625 27.7460938,7.6875 28.109375,7.75 C28.4726562,7.8125 28.8046875,7.9296875 29.1054688,8.1015625 C29.40625,8.2734375 29.6269531,8.53125 29.7675781,8.875 C29.9082031,9.21875 29.9785156,9.65234375 29.9785156,10.1757813 C29.9785156,10.7148438 29.9082031,11.1601562 29.7675781,11.5117188 C29.6269531,11.8632813 29.40625,12.1347656 29.1054688,12.3261719 C28.8046875,12.5175781 28.4648438,12.6523438 28.0859375,12.7304688 C27.7070312,12.8085938 27.2324219,12.8476562 26.6621094,12.8476562 L23.6152344,12.8476562 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M40.7949219,20.1367188 C39.7871094,20.1367188 39.0019531,19.7851563 38.4394531,19.0820312 C37.8769531,18.3789062 37.5957031,17.3945313 37.5957031,16.1289062 C37.5957031,14.8476562 37.875,13.8535156 38.4335938,13.1464844 C38.9921875,12.4394531 39.7792969,12.0859375 40.7949219,12.0859375 C41.8105469,12.0859375 42.5996094,12.4414062 43.1621094,13.1523438 C43.7246094,13.8632813 44.0058594,14.8554688 44.0058594,16.1289062 C44.0058594,17.3945313 43.7246094,18.3789062 43.1621094,19.0820312 C42.5996094,19.7851563 41.8105469,20.1367188 40.7949219,20.1367188 Z M40.7949219,22.140625 C41.6230469,22.140625 42.3808594,22.0273437 43.0683594,21.8007813 C43.7558594,21.5742188 44.3359375,21.2714844 44.8085938,20.8925781 C45.28125,20.5136719 45.6796875,20.0644531 46.0039062,19.5449219 C46.328125,19.0253906 46.5664062,18.4804688 46.71875,17.9101562 C46.8710938,17.3398438 46.9472656,16.7460938 46.9472656,16.1289062 C46.9472656,15.4804688 46.8671875,14.859375 46.7070312,14.265625 C46.546875,13.671875 46.2988281,13.1191406 45.9628906,12.6074219 C45.6269531,12.0957031 45.2207031,11.6542969 44.7441406,11.2832031 C44.2675781,10.9121094 43.6914062,10.6191406 43.015625,10.4042969 C42.3398438,10.1894531 41.5996094,10.0820312 40.7949219,10.0820312 C39.9746094,10.0820312 39.2226562,10.1933594 38.5390625,10.4160156 C37.8554687,10.6386719 37.2773438,10.9414062 36.8046875,11.3242188 C36.3320312,11.7070312 35.9316406,12.15625 35.6035156,12.671875 C35.2753906,13.1875 35.0351562,13.7363281 34.8828125,14.3183594 C34.7304688,14.9003906 34.6542969,15.5039062 34.6542969,16.1289062 C34.6542969,16.9179687 34.7792969,17.6640625 35.0292969,18.3671875 C35.2792969,19.0703125 35.6445312,19.7070312 36.125,20.2773438 C36.6054688,20.8476563 37.2480469,21.3007813 38.0527344,21.6367188 C38.8574219,21.9726562 39.7714844,22.140625 40.7949219,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M54.9628906,20.1367188 C53.9550781,20.1367188 53.1699219,19.7851563 52.6074219,19.0820312 C52.0449219,18.3789062 51.7636719,17.3945313 51.7636719,16.1289062 C51.7636719,14.8476562 52.0429688,13.8535156 52.6015625,13.1464844 C53.1601562,12.4394531 53.9472656,12.0859375 54.9628906,12.0859375 C55.9785156,12.0859375 56.7675781,12.4414062 57.3300781,13.1523438 C57.8925781,13.8632813 58.1738281,14.8554688 58.1738281,16.1289062 C58.1738281,17.3945313 57.8925781,18.3789062 57.3300781,19.0820312 C56.7675781,19.7851563 55.9785156,20.1367188 54.9628906,20.1367188 Z M54.9628906,22.140625 C55.7910156,22.140625 56.5488281,22.0273437 57.2363281,21.8007813 C57.9238281,21.5742188 58.5039063,21.2714844 58.9765625,20.8925781 C59.4492188,20.5136719 59.8476563,20.0644531 60.171875,19.5449219 C60.4960938,19.0253906 60.734375,18.4804688 60.8867188,17.9101562 C61.0390625,17.3398438 61.1152344,16.7460938 61.1152344,16.1289062 C61.1152344,15.4804688 61.0351563,14.859375 60.875,14.265625 C60.7148438,13.671875 60.4667969,13.1191406 60.1308594,12.6074219 C59.7949219,12.0957031 59.3886719,11.6542969 58.9121094,11.2832031 C58.4355469,10.9121094 57.859375,10.6191406 57.1835938,10.4042969 C56.5078125,10.1894531 55.7675781,10.0820312 54.9628906,10.0820312 C54.1425781,10.0820312 53.390625,10.1933594 52.7070312,10.4160156 C52.0234375,10.6386719 51.4453125,10.9414062 50.9726562,11.3242188 C50.5,11.7070312 50.0996094,12.15625 49.7714844,12.671875 C49.4433594,13.1875 49.203125,13.7363281 49.0507812,14.3183594 C48.8984375,14.9003906 48.8222656,15.5039062 48.8222656,16.1289062 C48.8222656,16.9179687 48.9472656,17.6640625 49.1972656,18.3671875 C49.4472656,19.0703125 49.8125,19.7070312 50.2929688,20.2773438 C50.7734375,20.8476563 51.4160156,21.3007813 52.2207031,21.6367188 C53.0253906,21.9726562 53.9394531,22.140625 54.9628906,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M65.1113281,22 C65.5488281,22 65.9101562,21.8554688 66.1953125,21.5664062 C66.4804688,21.2773438 66.6230469,20.875 66.6230469,20.359375 L66.6230469,6.75390625 C66.6230469,6.23828125 66.4824219,5.8359375 66.2011719,5.546875 C65.9199219,5.2578125 65.5644531,5.11328125 65.1347656,5.11328125 C64.7050781,5.11328125 64.3535156,5.2578125 64.0800781,5.546875 C63.8066406,5.8359375 63.6699219,6.23828125 63.6699219,6.75390625 L63.6699219,20.359375 C63.6699219,20.8828125 63.8046875,21.2871094 64.0742188,21.5722656 C64.34375,21.8574219 64.6894531,22 65.1113281,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M71.2753906,22 C71.7050781,22 72.0585938,21.8554688 72.3359375,21.5664062 C72.6132812,21.2773437 72.7519531,20.875 72.7519531,20.359375 L72.7519531,11.8984375 C72.7519531,11.375 72.6132812,10.96875 72.3359375,10.6796875 C72.0585938,10.390625 71.7050781,10.2460938 71.2753906,10.2460938 C70.8457031,10.2460938 70.4941406,10.390625 70.2207031,10.6796875 C69.9472656,10.96875 69.8105469,11.375 69.8105469,11.8984375 L69.8105469,20.359375 C69.8105469,20.8828125 69.9472656,21.2871094 70.2207031,21.5722656 C70.4941406,21.8574219 70.8457031,22 71.2753906,22 Z M71.2753906,7.90234375 C71.7832031,7.90234375 72.1953125,7.75195312 72.5117188,7.45117188 C72.828125,7.15039063 72.9863281,6.7578125 72.9863281,6.2734375 C72.9863281,5.7890625 72.8300781,5.3984375 72.5175781,5.1015625 C72.2050781,4.8046875 71.7949219,4.65625 71.2871094,4.65625 C70.7714844,4.65625 70.3554688,4.8046875 70.0390625,5.1015625 C69.7226562,5.3984375 69.5644531,5.7890625 69.5644531,6.2734375 C69.5644531,6.7578125 69.7226562,7.15039063 70.0390625,7.45117188 C70.3554688,7.75195312 70.7675781,7.90234375 71.2753906,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M77.1933594,22 C77.6230469,22 77.9765625,21.8574219 78.2539062,21.5722656 C78.53125,21.2871094 78.6699219,20.890625 78.6699219,20.3828125 L78.6699219,15.4257812 C78.6699219,14.4335938 78.9550781,13.6289063 79.5253906,13.0117187 C80.0957031,12.3945312 80.7714844,12.0859375 81.5527344,12.0859375 C82.2011719,12.0859375 82.734375,12.2988281 83.1523438,12.7246094 C83.5703125,13.1503906 83.7792969,13.765625 83.7792969,14.5703125 L83.7792969,20.3828125 C83.7792969,20.890625 83.9160156,21.2871094 84.1894531,21.5722656 C84.4628906,21.8574219 84.8066406,22 85.2207031,22 C85.6582031,22 86.0175781,21.8574219 86.2988281,21.5722656 C86.5800781,21.2871094 86.7207031,20.890625 86.7207031,20.3828125 L86.7207031,14.5820312 C86.7207031,13.8320313 86.6074219,13.1640625 86.3808594,12.578125 C86.1542969,11.9921875 85.8457031,11.5214844 85.4550781,11.1660156 C85.0644531,10.8105469 84.6210938,10.5410156 84.125,10.3574219 C83.6289062,10.1738281 83.0996094,10.0820312 82.5371094,10.0820312 C81.6230469,10.0820312 80.8359375,10.2539062 80.1757812,10.5976562 C79.515625,10.9414062 79.0136719,11.4453125 78.6699219,12.109375 L78.6699219,11.6992188 C78.6699219,11.2382812 78.5332031,10.8808594 78.2597656,10.6269531 C77.9863281,10.3730469 77.6386719,10.2460938 77.2167969,10.2460938 C76.7871094,10.2460938 76.4316406,10.375 76.1503906,10.6328125 C75.8691406,10.890625 75.7285156,11.2539062 75.7285156,11.7226563 L75.7285156,20.3828125 C75.7285156,20.890625 75.8652344,21.2871094 76.1386719,21.5722656 C76.4121094,21.8574219 76.7636719,22 77.1933594,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M98.1582031,21.8476563 C98.1582031,22.9570313 97.8535156,23.7832031 97.2441406,24.3261719 C96.6347656,24.8691406 95.7832031,25.140625 94.6894531,25.140625 C94.3769531,25.140625 94.0585938,25.1132812 93.734375,25.0585938 C93.4101562,25.0039062 93.1660156,24.953125 93.0019531,24.90625 C92.8378906,24.859375 92.5800781,24.7753906 92.2285156,24.6542969 C91.8769531,24.5332031 91.6660156,24.4609375 91.5957031,24.4375 C91.4707031,24.390625 91.3457031,24.3671875 91.2207031,24.3671875 C90.9472656,24.3671875 90.7207031,24.46875 90.5410156,24.671875 C90.3613281,24.875 90.2714844,25.109375 90.2714844,25.375 C90.2714844,25.75 90.4511719,26.046875 90.8105469,26.265625 C91.2324219,26.5234375 91.8242188,26.7402344 92.5859375,26.9160156 C93.3476562,27.0917969 94.1464844,27.1796875 94.9824219,27.1796875 C96.8261719,27.1796875 98.2851562,26.6953125 99.359375,25.7265625 C100.433594,24.7578125 100.970703,23.3476563 100.970703,21.4960938 L100.970703,11.8867188 C100.970703,11.3710938 100.839844,10.96875 100.578125,10.6796875 C100.316406,10.390625 99.9824219,10.2460938 99.5761719,10.2460938 C99.2167969,10.2460938 98.9101562,10.3535156 98.65625,10.5683594 C98.4023438,10.7832031 98.2558594,11.0898438 98.2167969,11.4882813 L98.2167969,12.0273438 C97.8261719,11.4023438 97.3613281,10.9257812 96.8222656,10.5976562 C96.2832031,10.2695312 95.5410156,10.1054688 94.5957031,10.1054688 C92.9003906,10.1054688 91.5585938,10.6699219 90.5703125,11.7988281 C89.5820312,12.9277344 89.0878906,14.3945312 89.0878906,16.1992188 C89.0878906,17.9882812 89.5957031,19.4042969 90.6113281,20.4472656 C91.6269531,21.4902344 92.9785156,22.0117188 94.6660156,22.0117188 C96.3300781,22.0117188 97.4941406,21.3828125 98.1582031,20.125 L98.1582031,21.8476563 Z M95.1933594,20.0546875 C94.3027344,20.0390625 93.5625,19.7011719 92.9726562,19.0410156 C92.3828125,18.3808594 92.0878906,17.40625 92.0878906,16.1171875 C92.0878906,15.671875 92.1230469,15.2558594 92.1933594,14.8691406 C92.2636719,14.4824219 92.3769531,14.1152344 92.5332031,13.7675781 C92.6894531,13.4199219 92.8847656,13.1230469 93.1191406,12.8769531 C93.3535156,12.6308594 93.6464844,12.4355469 93.9980469,12.2910156 C94.3496094,12.1464844 94.7441406,12.0742188 95.1816406,12.0742188 C97.1660156,12.0742188 98.1582031,13.4296875 98.1582031,16.140625 C98.1582031,17.4765625 97.8886719,18.4589844 97.3496094,19.0878906 C96.8105469,19.7167969 96.0917969,20.0390625 95.1933594,20.0546875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M116.205078,22.0117188 C116.666016,22.0117188 117.0625,21.8496094 117.394531,21.5253906 C117.726562,21.2011719 117.892578,20.7773438 117.892578,20.2539063 L117.892578,7.76171875 L121.701172,7.76171875 C122.091797,7.76171875 122.404297,7.62890625 122.638672,7.36328125 C122.873047,7.09765625 122.990234,6.78515625 122.990234,6.42578125 C122.990234,6.07421875 122.871094,5.76367188 122.632812,5.49414063 C122.394531,5.22460938 122.083984,5.08984375 121.701172,5.08984375 L110.708984,5.08984375 C110.318359,5.08984375 110.003906,5.22460938 109.765625,5.49414063 C109.527344,5.76367188 109.408203,6.078125 109.408203,6.4375 C109.408203,6.796875 109.525391,7.10742188 109.759766,7.36914063 C109.994141,7.63085938 110.310547,7.76171875 110.708984,7.76171875 L114.529297,7.76171875 L114.529297,20.2539063 C114.529297,20.7773438 114.695312,21.2011719 115.027344,21.5253906 C115.359375,21.8496094 115.751953,22.0117188 116.205078,22.0117188 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M128.134766,22.140625 C129.041016,22.140625 129.773438,21.9550781 130.332031,21.5839844 C130.890625,21.2128906 131.294922,20.7304688 131.544922,20.1367188 L131.544922,20.5234375 C131.544922,20.9921875 131.683594,21.3554688 131.960938,21.6132812 C132.238281,21.8710938 132.568359,22 132.951172,22 C133.333984,22 133.662109,21.8710938 133.935547,21.6132812 C134.208984,21.3554688 134.345703,20.9921875 134.345703,20.5234375 L134.345703,14.5703125 C134.345703,13.7734375 134.226562,13.0820312 133.988281,12.4960937 C133.75,11.9101563 133.412109,11.4453125 132.974609,11.1015625 C132.537109,10.7578125 132.033203,10.5039062 131.462891,10.3398438 C130.892578,10.1757812 130.248047,10.09375 129.529297,10.09375 C127.943359,10.09375 126.501953,10.3984375 125.205078,11.0078125 C124.861328,11.171875 124.689453,11.4453125 124.689453,11.828125 C124.689453,12.1171875 124.785156,12.3769531 124.976562,12.6074219 C125.167969,12.8378906 125.400391,12.953125 125.673828,12.953125 C125.806641,12.953125 125.916016,12.9335938 126.001953,12.8945313 C126.009766,12.8945313 126.095703,12.8613281 126.259766,12.7949219 C126.423828,12.7285156 126.529297,12.6875 126.576172,12.671875 C126.623047,12.65625 126.728516,12.6191406 126.892578,12.5605469 C127.056641,12.5019531 127.181641,12.4609375 127.267578,12.4375 C127.353516,12.4140625 127.478516,12.3789062 127.642578,12.3320313 C127.806641,12.2851562 127.947266,12.2519531 128.064453,12.2324219 C128.181641,12.2128906 128.318359,12.1894531 128.474609,12.1621094 C128.630859,12.1347656 128.785156,12.1152344 128.9375,12.1035156 C129.089844,12.0917969 129.240234,12.0859375 129.388672,12.0859375 C130.076172,12.0859375 130.59375,12.2558594 130.941406,12.5957031 C131.289062,12.9355469 131.462891,13.4726562 131.462891,14.2070312 L131.462891,15.109375 C130.666016,15.109375 129.986328,15.1191406 129.423828,15.1386719 C128.861328,15.1582031 128.304688,15.1992187 127.753906,15.2617188 C127.203125,15.3242188 126.748047,15.4121094 126.388672,15.5253906 C126.029297,15.6386719 125.689453,15.7851562 125.369141,15.9648438 C125.048828,16.1445313 124.800781,16.3632812 124.625,16.6210938 C124.449219,16.8789063 124.3125,17.1855469 124.214844,17.5410156 C124.117188,17.8964844 124.068359,18.3085938 124.068359,18.7773438 C124.068359,19.8320313 124.449219,20.6542969 125.210938,21.2441406 C125.972656,21.8339844 126.947266,22.1328125 128.134766,22.140625 Z M128.498047,20.2890625 C127.974609,20.2890625 127.554688,20.1464844 127.238281,19.8613281 C126.921875,19.5761719 126.763672,19.1601562 126.763672,18.6132812 C126.763672,18.2539062 126.818359,17.9589844 126.927734,17.7285156 C127.037109,17.4980469 127.189453,17.3105469 127.384766,17.1660156 C127.580078,17.0214844 127.882812,16.9160156 128.292969,16.8496094 C128.703125,16.7832031 129.136719,16.7382812 129.59375,16.7148438 C130.050781,16.6914062 130.669922,16.6796875 131.451172,16.6796875 L131.451172,17.0195312 C131.451172,17.9648438 131.150391,18.7460938 130.548828,19.3632812 C129.947266,19.9804688 129.263672,20.2890625 128.498047,20.2890625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M137.451172,20.8164062 C138.693359,21.7226563 140.224609,22.1757812 142.044922,22.1757812 C143.451172,22.1757812 144.59375,21.859375 145.472656,21.2265625 C146.351562,20.59375 146.791016,19.6796875 146.791016,18.484375 C146.791016,17.53125 146.5,16.7851563 145.917969,16.2460937 C145.335938,15.7070312 144.416016,15.28125 143.158203,14.96875 L141.353516,14.5117188 C140.705078,14.3632813 140.265625,14.2011719 140.035156,14.0253906 C139.804688,13.8496094 139.689453,13.578125 139.689453,13.2109375 C139.689453,12.796875 139.900391,12.4902344 140.322266,12.2910156 C140.744141,12.0917969 141.255859,11.9921875 141.857422,11.9921875 C142.037109,11.9921875 142.214844,12 142.390625,12.015625 C142.566406,12.03125 142.744141,12.0566406 142.923828,12.0917969 C143.103516,12.1269531 143.257812,12.1582031 143.386719,12.1855469 C143.515625,12.2128906 143.669922,12.2558594 143.849609,12.3144531 C144.029297,12.3730469 144.152344,12.4121094 144.21875,12.4316406 C144.285156,12.4511719 144.400391,12.4921875 144.564453,12.5546875 C144.728516,12.6171875 144.814453,12.6484375 144.822266,12.6484375 C144.994141,12.7109375 145.146484,12.7421875 145.279297,12.7421875 C145.544922,12.7421875 145.757812,12.6542969 145.917969,12.4785156 C146.078125,12.3027344 146.158203,12.0898438 146.158203,11.8398438 C146.158203,11.4179688 145.947266,11.1054688 145.525391,10.9023438 C144.314453,10.3164062 143.033203,10.0234375 141.681641,10.0234375 C140.283203,10.0234375 139.154297,10.3105469 138.294922,10.8847656 C137.435547,11.4589844 137.005859,12.28125 137.005859,13.3515625 C137.005859,13.796875 137.064453,14.1855469 137.181641,14.5175781 C137.298828,14.8496094 137.451172,15.1269531 137.638672,15.3496094 C137.826172,15.5722656 138.095703,15.7753906 138.447266,15.9589844 C138.798828,16.1425781 139.146484,16.2929687 139.490234,16.4101562 C139.833984,16.5273438 140.283203,16.6601563 140.837891,16.8085938 L142.666016,17.2773438 C143.751953,17.5429688 144.294922,18.0078125 144.294922,18.671875 C144.294922,19.046875 144.167969,19.3535156 143.914062,19.5917969 C143.660156,19.8300781 143.355469,19.9941406 143,20.0839844 C142.644531,20.1738281 142.240234,20.21875 141.787109,20.21875 C141.326172,20.21875 140.865234,20.1542969 140.404297,20.0253906 C139.943359,19.8964844 139.609375,19.78125 139.402344,19.6796875 C139.195312,19.578125 138.919922,19.4296875 138.576172,19.234375 C138.365234,19.1171875 138.162109,19.0585937 137.966797,19.0585937 C137.693359,19.0585937 137.46875,19.15625 137.292969,19.3515625 C137.117188,19.546875 137.029297,19.7734375 137.029297,20.03125 C137.029297,20.3515625 137.169922,20.6132813 137.451172,20.8164062 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M150.623047,22 C151.060547,22 151.419922,21.8554688 151.701172,21.5664062 C151.982422,21.2773438 152.123047,20.8710937 152.123047,20.3476563 L152.123047,16.1054688 L157.232422,21.5429687 C157.513672,21.8710937 157.853516,22.0351562 158.251953,22.0351562 C158.611328,22.0351562 158.929688,21.9042969 159.207031,21.6425781 C159.484375,21.3808594 159.623047,21.0742187 159.623047,20.7226562 C159.623047,20.4179688 159.505859,20.1367187 159.271484,19.8789063 L155.228516,15.6601563 L158.767578,12.2617188 C158.986328,12.0351563 159.095703,11.765625 159.095703,11.453125 C159.095703,11.1015625 158.966797,10.7871094 158.708984,10.5097656 C158.451172,10.2324219 158.158203,10.09375 157.830078,10.09375 C157.509766,10.09375 157.224609,10.2304688 156.974609,10.5039062 L152.123047,15.3320312 L152.123047,6.75390625 C152.123047,6.23046875 151.984375,5.82421875 151.707031,5.53515625 C151.429688,5.24609375 151.076172,5.1015625 150.646484,5.1015625 C150.216797,5.1015625 149.865234,5.24609375 149.591797,5.53515625 C149.318359,5.82421875 149.181641,6.23046875 149.181641,6.75390625 L149.181641,20.3476563 C149.181641,20.8710937 149.316406,21.2773438 149.585938,21.5664062 C149.855469,21.8554688 150.201172,22 150.623047,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M161.568359,20.8164062 C162.810547,21.7226563 164.341797,22.1757812 166.162109,22.1757812 C167.568359,22.1757812 168.710938,21.859375 169.589844,21.2265625 C170.46875,20.59375 170.908203,19.6796875 170.908203,18.484375 C170.908203,17.53125 170.617188,16.7851563 170.035156,16.2460937 C169.453125,15.7070312 168.533203,15.28125 167.275391,14.96875 L165.470703,14.5117188 C164.822266,14.3632813 164.382812,14.2011719 164.152344,14.0253906 C163.921875,13.8496094 163.806641,13.578125 163.806641,13.2109375 C163.806641,12.796875 164.017578,12.4902344 164.439453,12.2910156 C164.861328,12.0917969 165.373047,11.9921875 165.974609,11.9921875 C166.154297,11.9921875 166.332031,12 166.507812,12.015625 C166.683594,12.03125 166.861328,12.0566406 167.041016,12.0917969 C167.220703,12.1269531 167.375,12.1582031 167.503906,12.1855469 C167.632812,12.2128906 167.787109,12.2558594 167.966797,12.3144531 C168.146484,12.3730469 168.269531,12.4121094 168.335938,12.4316406 C168.402344,12.4511719 168.517578,12.4921875 168.681641,12.5546875 C168.845703,12.6171875 168.931641,12.6484375 168.939453,12.6484375 C169.111328,12.7109375 169.263672,12.7421875 169.396484,12.7421875 C169.662109,12.7421875 169.875,12.6542969 170.035156,12.4785156 C170.195312,12.3027344 170.275391,12.0898438 170.275391,11.8398438 C170.275391,11.4179688 170.064453,11.1054688 169.642578,10.9023438 C168.431641,10.3164062 167.150391,10.0234375 165.798828,10.0234375 C164.400391,10.0234375 163.271484,10.3105469 162.412109,10.8847656 C161.552734,11.4589844 161.123047,12.28125 161.123047,13.3515625 C161.123047,13.796875 161.181641,14.1855469 161.298828,14.5175781 C161.416016,14.8496094 161.568359,15.1269531 161.755859,15.3496094 C161.943359,15.5722656 162.212891,15.7753906 162.564453,15.9589844 C162.916016,16.1425781 163.263672,16.2929687 163.607422,16.4101562 C163.951172,16.5273438 164.400391,16.6601563 164.955078,16.8085938 L166.783203,17.2773438 C167.869141,17.5429688 168.412109,18.0078125 168.412109,18.671875 C168.412109,19.046875 168.285156,19.3535156 168.03125,19.5917969 C167.777344,19.8300781 167.472656,19.9941406 167.117188,20.0839844 C166.761719,20.1738281 166.357422,20.21875 165.904297,20.21875 C165.443359,20.21875 164.982422,20.1542969 164.521484,20.0253906 C164.060547,19.8964844 163.726562,19.78125 163.519531,19.6796875 C163.3125,19.578125 163.037109,19.4296875 162.693359,19.234375 C162.482422,19.1171875 162.279297,19.0585937 162.083984,19.0585937 C161.810547,19.0585937 161.585938,19.15625 161.410156,19.3515625 C161.234375,19.546875 161.146484,19.7734375 161.146484,20.03125 C161.146484,20.3515625 161.287109,20.6132813 161.568359,20.8164062 Z" id="Path" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50)">
+                <g id="bg">
+                    <path d="M0,0 L190,0 L190,216 C190,220.418278 186.418278,224 182,224 L8,224 C3.581722,224 0,220.418278 0,216 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M190,0 L190,216 C190,220.418278 186.418278,224 182,224 L8,224 C3.581722,224 0,220.418278 0,216 L0,0 L190,0 Z M188.4,1.60000008 L1.6,1.60000008 L1.6,216 C1.6,219.534622 4.4653776,222.4 8,222.4 L182,222.4 C185.534622,222.4 188.4,219.534622 188.4,216 L188.4,1.60000008 Z" fill="#9172E2" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-10" fill="#333333">
+                    <g id="Text" transform="translate(17.0664, 10.6426)">
+                        <path d="M6.00488281,18.5986328 C7.66373698,18.5986328 8.95019531,18.2304688 9.86425781,17.4941406 C10.1520182,17.2571615 10.2958984,16.9990234 10.2958984,16.7197266 C10.2958984,16.5250651 10.2281901,16.3621419 10.0927734,16.230957 C9.95735677,16.0997721 9.79231771,16.0341797 9.59765625,16.0341797 C9.41992187,16.0341797 9.25065104,16.0891927 9.08984375,16.1992188 C8.31119792,16.7408854 7.3421224,17.0117188 6.18261719,17.0117188 C5.52246094,17.0117188 4.93847656,16.8741862 4.43066406,16.5991211 C3.92285156,16.324056 3.51660156,15.9516602 3.21191406,15.4819336 C2.90722656,15.012207 2.67871094,14.4895833 2.52636719,13.9140625 C2.37402344,13.3385417 2.29785156,12.7291667 2.29785156,12.0859375 C2.29785156,10.5455729 2.67447917,9.32047526 3.42773438,8.41064453 C4.18098958,7.5008138 5.16276042,7.04589844 6.37304688,7.04589844 C7.30403646,7.04589844 8.1546224,7.29557292 8.92480469,7.79492188 C9.09407552,7.90494792 9.2718099,7.95996094 9.45800781,7.95996094 C9.66113281,7.95996094 9.83251953,7.89648438 9.97216797,7.76953125 C10.1118164,7.64257813 10.1816406,7.4860026 10.1816406,7.29980469 C10.1816406,7.0374349 10.0377604,6.79199219 9.75,6.56347656 C9.38606771,6.25878906 8.89518229,6.00488281 8.27734375,5.80175781 C7.65950521,5.59863281 6.98665365,5.49707031 6.25878906,5.49707031 C5.01464844,5.49707031 3.91438802,5.7890625 2.95800781,6.37304688 C2.0016276,6.95703125 1.26953125,7.74414063 0.76171875,8.734375 C0.25390625,9.72460937 0,10.8291016 0,12.0478516 C0,13.9352214 0.541666667,15.4988607 1.625,16.7387695 C2.70833333,17.9786784 4.16829427,18.5986328 6.00488281,18.5986328 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M13.3330078,18.3574219 C13.6461589,18.3574219 13.9191081,18.2537435 14.1518555,18.0463867 C14.3846029,17.8390299 14.5009766,17.5576172 14.5009766,17.2021484 L14.5009766,1.37109375 C14.5009766,1.015625 14.3888346,0.736328125 14.1645508,0.533203125 C13.9402669,0.330078125 13.6757813,0.228515625 13.3710938,0.228515625 C13.0579427,0.228515625 12.7849935,0.330078125 12.5522461,0.533203125 C12.3194987,0.736328125 12.203125,1.015625 12.203125,1.37109375 L12.203125,17.2021484 C12.203125,17.5745443 12.313151,17.8601888 12.5332031,18.059082 C12.7532552,18.2579753 13.0198568,18.3574219 13.3330078,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M21.0166016,17.0625 C20.2294922,17.0625 19.6074219,16.8826497 19.1503906,16.5229492 C18.6933594,16.1632487 18.4648438,15.5983073 18.4648438,14.828125 C18.4648438,14.4049479 18.5262044,14.0579427 18.6489258,13.7871094 C18.7716471,13.516276 18.9599609,13.2856445 19.2138672,13.0952148 C19.4677734,12.9047852 19.8486328,12.7609049 20.3564453,12.6635742 C20.8642578,12.5662435 21.4482422,12.4985352 22.1083984,12.4604492 C22.7685547,12.4223633 23.6233724,12.4033203 24.6728516,12.4033203 L24.6728516,12.7841797 C24.6728516,14.0283203 24.3068034,15.0524089 23.574707,15.8564453 C22.8426107,16.6604818 21.9899089,17.0625 21.0166016,17.0625 Z M20.7753906,18.5859375 C22.5950521,18.5859375 23.8984375,17.7607422 24.6855469,16.1103516 L24.6855469,17.3291016 C24.6855469,17.6591797 24.793457,17.9152018 25.0092773,18.097168 C25.2250977,18.2791341 25.4853516,18.3701172 25.7900391,18.3701172 C26.0947266,18.3701172 26.363444,18.2749023 26.5961914,18.0844727 C26.8289388,17.894043 26.9453125,17.6337891 26.9453125,17.3037109 L26.9453125,9.90234375 C26.9453125,8.40429688 26.5073242,7.29557292 25.6313477,6.57617188 C24.7553711,5.85677083 23.5429688,5.49707031 21.9941406,5.49707031 C20.1829427,5.49707031 18.6214193,5.86523438 17.3095703,6.6015625 C17.0725911,6.73697917 16.9541016,6.93164062 16.9541016,7.18554688 C16.9541016,7.40559896 17.0345052,7.60660807 17.1953125,7.78857422 C17.3561198,7.97054036 17.5465495,8.06152344 17.7666016,8.06152344 C17.8850911,8.06152344 17.9866536,8.04036458 18.0712891,7.99804687 C18.5029297,7.80338542 18.866862,7.65104167 19.1630859,7.54101562 C19.4593099,7.43098958 19.8655599,7.32096354 20.3818359,7.2109375 C20.898112,7.10091146 21.414388,7.04589844 21.9306641,7.04589844 C22.7939453,7.04589844 23.4667969,7.26171875 23.9492188,7.69335937 C24.4316406,8.125 24.6728516,8.79361979 24.6728516,9.69921875 L24.6728516,11.0830078 C23.7333984,11.0830078 22.9314779,11.0957031 22.2670898,11.1210938 C21.6027018,11.1464844 20.9573568,11.190918 20.3310547,11.2543945 C19.7047526,11.3178711 19.1884766,11.4109701 18.7822266,11.5336914 C18.3759766,11.6564128 18.0035807,11.8129883 17.6650391,12.003418 C17.3264974,12.1938477 17.0641276,12.4265951 16.8779297,12.7016602 C16.6917318,12.9767253 16.5478516,13.296224 16.4462891,13.6601562 C16.3447266,14.0240885 16.2939453,14.4472656 16.2939453,14.9296875 C16.2939453,16.0976562 16.7086589,16.9990234 17.5380859,17.6337891 C18.367513,18.2685547 19.4466146,18.5859375 20.7753906,18.5859375 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M29.5761719,17.2148437 C30.0839844,17.5872396 30.7631836,17.9109701 31.6137695,18.1860352 C32.4643555,18.4611003 33.3424479,18.5986328 34.2480469,18.5986328 C35.2213542,18.5986328 36.0761719,18.465332 36.8125,18.1987305 C37.5488281,17.9321289 38.1433919,17.5026042 38.5961914,16.9101562 C39.0489909,16.3177083 39.2753906,15.5898438 39.2753906,14.7265625 C39.2753906,13.8125 39.0024414,13.0613607 38.456543,12.4731445 C37.9106445,11.8849284 36.9986979,11.4342448 35.7207031,11.1210938 L33.5244141,10.5751953 C32.601888,10.3466797 32.0009766,10.1287435 31.7216797,9.92138672 C31.4423828,9.71402995 31.3027344,9.35644531 31.3027344,8.84863281 C31.3027344,8.22233073 31.5735677,7.76318359 32.1152344,7.47119141 C32.656901,7.17919922 33.3847656,7.03320313 34.2988281,7.03320313 C34.5865885,7.03320313 34.8701172,7.05224609 35.1494141,7.09033203 C35.4287109,7.12841797 35.703776,7.18343099 35.9746094,7.25537109 C36.2454427,7.3273112 36.4591471,7.38867187 36.6157227,7.43945313 C36.7722982,7.49023438 36.9754232,7.56429036 37.2250977,7.66162109 C37.4747721,7.75895182 37.6123047,7.81184896 37.6376953,7.8203125 C37.773112,7.87109375 37.9042969,7.89648438 38.03125,7.89648438 C38.2513021,7.89648438 38.4269206,7.82666016 38.5581055,7.68701172 C38.6892904,7.54736328 38.7548828,7.38020833 38.7548828,7.18554688 C38.7548828,6.85546875 38.5898438,6.60579427 38.2597656,6.43652344 C37.8027344,6.19108073 37.2124023,5.96891276 36.4887695,5.77001953 C35.7651367,5.5711263 34.9928385,5.47167969 34.171875,5.47167969 C33.4863281,5.47167969 32.8515625,5.53727214 32.2675781,5.66845703 C31.6835938,5.79964193 31.1567383,5.99853516 30.6870117,6.26513672 C30.2172852,6.53173828 29.8470052,6.89355469 29.5761719,7.35058594 C29.3053385,7.80761719 29.1699219,8.33658854 29.1699219,8.9375 C29.1699219,9.31835938 29.2080078,9.65690104 29.2841797,9.953125 C29.3603516,10.249349 29.4830729,10.507487 29.6523438,10.7275391 C29.8216146,10.9475911 30.0035807,11.1359049 30.1982422,11.2924805 C30.3929036,11.449056 30.6531576,11.5950521 30.9790039,11.7304688 C31.3048503,11.8658854 31.6095378,11.9759115 31.8930664,12.0605469 C32.1765951,12.1451823 32.546875,12.2467448 33.0039062,12.3652344 L35.2509766,12.9238281 C35.9957682,13.110026 36.5289714,13.3554688 36.8505859,13.6601563 C37.1722005,13.9648438 37.3330078,14.3710938 37.3330078,14.8789063 C37.3330078,15.5898438 37.0473633,16.1293945 36.4760742,16.4975586 C35.9047852,16.8657227 35.1536458,17.0498047 34.2226562,17.0498047 C32.9277344,17.0413411 31.7005208,16.6774089 30.5410156,15.9580078 C30.3548177,15.8395182 30.1601562,15.7802734 29.9570312,15.7802734 C29.7369792,15.7802734 29.555013,15.8543294 29.4111328,16.0024414 C29.2672526,16.1505534 29.1953125,16.3261719 29.1953125,16.5292969 C29.1953125,16.8085938 29.3222656,17.0371094 29.5761719,17.2148437 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M41.3476562,17.2148437 C41.8554688,17.5872396 42.534668,17.9109701 43.3852539,18.1860352 C44.2358398,18.4611003 45.1139323,18.5986328 46.0195312,18.5986328 C46.9928385,18.5986328 47.8476562,18.465332 48.5839844,18.1987305 C49.3203125,17.9321289 49.9148763,17.5026042 50.3676758,16.9101562 C50.8204753,16.3177083 51.046875,15.5898438 51.046875,14.7265625 C51.046875,13.8125 50.7739258,13.0613607 50.2280273,12.4731445 C49.6821289,11.8849284 48.7701823,11.4342448 47.4921875,11.1210938 L45.2958984,10.5751953 C44.3733724,10.3466797 43.7724609,10.1287435 43.4931641,9.92138672 C43.2138672,9.71402995 43.0742188,9.35644531 43.0742188,8.84863281 C43.0742188,8.22233073 43.3450521,7.76318359 43.8867188,7.47119141 C44.4283854,7.17919922 45.15625,7.03320313 46.0703125,7.03320313 C46.3580729,7.03320313 46.6416016,7.05224609 46.9208984,7.09033203 C47.2001953,7.12841797 47.4752604,7.18343099 47.7460938,7.25537109 C48.0169271,7.3273112 48.2306315,7.38867187 48.387207,7.43945313 C48.5437826,7.49023438 48.7469076,7.56429036 48.996582,7.66162109 C49.2462565,7.75895182 49.3837891,7.81184896 49.4091797,7.8203125 C49.5445964,7.87109375 49.6757812,7.89648438 49.8027344,7.89648438 C50.0227865,7.89648438 50.1984049,7.82666016 50.3295898,7.68701172 C50.4607747,7.54736328 50.5263672,7.38020833 50.5263672,7.18554688 C50.5263672,6.85546875 50.3613281,6.60579427 50.03125,6.43652344 C49.5742188,6.19108073 48.9838867,5.96891276 48.2602539,5.77001953 C47.5366211,5.5711263 46.7643229,5.47167969 45.9433594,5.47167969 C45.2578125,5.47167969 44.6230469,5.53727214 44.0390625,5.66845703 C43.4550781,5.79964193 42.9282227,5.99853516 42.4584961,6.26513672 C41.9887695,6.53173828 41.6184896,6.89355469 41.3476562,7.35058594 C41.0768229,7.80761719 40.9414062,8.33658854 40.9414062,8.9375 C40.9414062,9.31835938 40.9794922,9.65690104 41.0556641,9.953125 C41.1318359,10.249349 41.2545573,10.507487 41.4238281,10.7275391 C41.593099,10.9475911 41.7750651,11.1359049 41.9697266,11.2924805 C42.164388,11.449056 42.4246419,11.5950521 42.7504883,11.7304688 C43.0763346,11.8658854 43.3810221,11.9759115 43.6645508,12.0605469 C43.9480794,12.1451823 44.3183594,12.2467448 44.7753906,12.3652344 L47.0224609,12.9238281 C47.7672526,13.110026 48.3004557,13.3554688 48.6220703,13.6601563 C48.9436849,13.9648438 49.1044922,14.3710938 49.1044922,14.8789063 C49.1044922,15.5898438 48.8188477,16.1293945 48.2475586,16.4975586 C47.6762695,16.8657227 46.9251302,17.0498047 45.9941406,17.0498047 C44.6992188,17.0413411 43.4720052,16.6774089 42.3125,15.9580078 C42.1263021,15.8395182 41.9316406,15.7802734 41.7285156,15.7802734 C41.5084635,15.7802734 41.3264974,15.8543294 41.1826172,16.0024414 C41.038737,16.1505534 40.9667969,16.3261719 40.9667969,16.5292969 C40.9667969,16.8085938 41.09375,17.0371094 41.3476562,17.2148437 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M54.4902344,2.80566406 C54.9388021,2.80566406 55.2985026,2.67659505 55.5693359,2.41845703 C55.8401693,2.16031901 55.9755859,1.82389323 55.9755859,1.40917969 C55.9755859,0.986002604 55.8401693,0.645345052 55.5693359,0.387207031 C55.2985026,0.12906901 54.9430339,0 54.5029297,0 C54.054362,0 53.6925456,0.131184896 53.4174805,0.393554688 C53.1424154,0.655924479 53.0048828,0.994466146 53.0048828,1.40917969 C53.0048828,1.82389323 53.1402995,2.16031901 53.4111328,2.41845703 C53.6819661,2.67659505 54.0416667,2.80566406 54.4902344,2.80566406 Z M54.4775391,18.3574219 C54.7906901,18.3574219 55.0636393,18.2537435 55.2963867,18.0463867 C55.5291341,17.8390299 55.6455078,17.5576172 55.6455078,17.2021484 L55.6455078,6.88085938 C55.6455078,6.52539062 55.5333659,6.24609375 55.309082,6.04296875 C55.0847982,5.83984375 54.8203125,5.73828125 54.515625,5.73828125 C54.202474,5.73828125 53.9295247,5.83984375 53.6967773,6.04296875 C53.4640299,6.24609375 53.3476562,6.52539062 53.3476562,6.88085938 L53.3476562,17.2021484 C53.3476562,17.5745443 53.4576823,17.8601888 53.6777344,18.059082 C53.8977865,18.2579753 54.164388,18.3574219 54.4775391,18.3574219 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M60.6503906,18.3574219 C60.9635417,18.3574219 61.234375,18.2579753 61.4628906,18.059082 C61.6914062,17.8601888 61.8056641,17.5830078 61.8056641,17.2275391 L61.8056641,7.36328125 L63.9511719,7.36328125 C64.4928385,7.36328125 64.7636719,7.11783854 64.7636719,6.62695312 C64.7636719,6.40690104 64.6980794,6.2249349 64.5668945,6.08105469 C64.4357096,5.93717448 64.2304688,5.86523438 63.9511719,5.86523438 L61.8056641,5.86523438 L61.8056641,4.875 C61.8056641,4.18945312 61.8331706,3.64567057 61.8881836,3.24365234 C61.9431966,2.84163411 62.0489909,2.53271484 62.2055664,2.31689453 C62.3621419,2.10107422 62.5419922,1.96142578 62.7451172,1.89794922 C62.9482422,1.83447266 63.2317708,1.80273438 63.5957031,1.80273438 L64.6748047,1.80273438 C64.9287109,1.80273438 65.1254883,1.71809896 65.2651367,1.54882813 C65.4047852,1.37955729 65.4746094,1.17643229 65.4746094,0.939453125 C65.4746094,0.702473958 65.4047852,0.497233073 65.2651367,0.323730469 C65.1254883,0.150227865 64.9329427,0.0634765625 64.6875,0.0634765625 L63.1386719,0.0634765625 C62.0045573,0.0634765625 61.1158854,0.38297526 60.4726562,1.02197266 C59.8294271,1.66097005 59.5078125,2.73795573 59.5078125,4.25292969 L59.5078125,5.86523438 L57.8447266,5.86523438 C57.5654297,5.86523438 57.3580729,5.93717448 57.2226562,6.08105469 C57.0872396,6.2249349 57.0195312,6.40690104 57.0195312,6.62695312 C57.0195312,6.83854167 57.0872396,7.01416016 57.2226562,7.15380859 C57.3580729,7.29345703 57.5654297,7.36328125 57.8447266,7.36328125 L59.5078125,7.36328125 L59.5078125,17.2275391 C59.5078125,17.5830078 59.6199544,17.8601888 59.8442383,18.059082 C60.0685221,18.2579753 60.3372396,18.3574219 60.6503906,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M70.0097656,22.6865234 L77.3476562,7.2109375 C77.406901,7.06705729 77.4365234,6.92317708 77.4365234,6.77929688 C77.4365234,6.49153646 77.3243815,6.24397786 77.1000977,6.03662109 C76.8758138,5.82926432 76.6155599,5.72558594 76.3193359,5.72558594 C75.8792318,5.72558594 75.5449219,5.95833333 75.3164062,6.42382813 L71.4316406,14.6503906 L67.5722656,6.46191406 C67.3606771,6.01334635 67.030599,5.7890625 66.5820312,5.7890625 C66.2858073,5.7890625 66.0213216,5.890625 65.7885742,6.09375 C65.5558268,6.296875 65.4394531,6.55078125 65.4394531,6.85546875 C65.4394531,7.04166667 65.4775391,7.2109375 65.5537109,7.36328125 L70.2763672,17.1005859 L68.0039062,21.9121094 C67.9277344,22.0813802 67.8896484,22.2421875 67.8896484,22.3945313 C67.8896484,22.6822917 67.9996745,22.9213867 68.2197266,23.1118164 C68.4397786,23.3022461 68.6936849,23.3974609 68.9814453,23.3974609 C69.4384766,23.3974609 69.78125,23.1604818 70.0097656,22.6865234 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy" transform="translate(0, 52)" xlink:href="#path-11" fill="#333333">
+                    <g id="Text" transform="translate(17.1426, 10.8584)">
+                        <path d="M2.34863281,10.8798828 C2.39095052,10.3551432 2.50309245,9.85791016 2.68505859,9.38818359 C2.86702474,8.91845703 3.11246745,8.49104818 3.42138672,8.10595703 C3.73030599,7.72086589 4.12597656,7.4140625 4.60839844,7.18554688 C5.09082031,6.95703125 5.62825521,6.84277344 6.22070313,6.84277344 C7.36328125,6.84277344 8.23079427,7.21940104 8.82324219,7.97265625 C9.4156901,8.72591146 9.75,9.69498698 9.82617188,10.8798828 L2.34863281,10.8798828 Z M6.25878906,18.3701172 C8.1546224,18.3701172 9.77115885,17.7692057 11.1083984,16.5673828 C11.3453776,16.3557943 11.4638672,16.1145833 11.4638672,15.84375 C11.4638672,15.6321615 11.394043,15.4480794 11.2543945,15.2915039 C11.1147461,15.1349284 10.9433594,15.0566406 10.7402344,15.0566406 C10.5709635,15.0566406 10.4101563,15.1158854 10.2578125,15.234375 C9.6484375,15.6998698 9.04329427,16.0680339 8.44238281,16.3388672 C7.84147135,16.6097005 7.16438802,16.7451172 6.41113281,16.7451172 C5.21777344,16.7281901 4.24023437,16.3452148 3.47851563,15.5961914 C2.71679687,14.847168 2.32747396,13.7447917 2.31054688,12.2890625 L11.0195312,12.2890625 C11.2903646,12.2890625 11.4934896,12.2086589 11.6289062,12.0478516 C11.7643229,11.8870443 11.8320312,11.6839193 11.8320312,11.4384766 C11.8066406,10.5836589 11.6839193,9.79866536 11.4638672,9.08349609 C11.2438151,8.36832682 10.913737,7.72086589 10.4736328,7.14111328 C10.0335286,6.56136068 9.44108073,6.10644531 8.69628906,5.77636719 C7.9514974,5.44628906 7.08398437,5.28125 6.09375,5.28125 C4.84114583,5.28125 3.74511719,5.5859375 2.80566406,6.1953125 C1.86621094,6.8046875 1.16373698,7.59602865 0.698242188,8.56933594 C0.232747396,9.54264323 0,10.617513 0,11.7939453 C0,13.1311849 0.27718099,14.3012695 0.831542969,15.3041992 C1.38590495,16.3071289 2.1328125,17.0667318 3.07226562,17.5830078 C4.01171875,18.0992839 5.07389323,18.3616536 6.25878906,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M15.1230469,18.1416016 C15.4446615,18.1416016 15.7218424,18.0463867 15.9545898,17.855957 C16.1873372,17.6655273 16.3037109,17.4010417 16.3037109,17.0625 L16.3037109,10.8671875 C16.3037109,9.59765625 16.6337891,8.60953776 17.2939453,7.90283203 C17.9541016,7.1961263 18.8343099,6.84277344 19.9345703,6.84277344 C21.7796224,6.84277344 22.7021484,8.01074219 22.7021484,10.3466797 L22.7021484,17.0498047 C22.7021484,17.3883464 22.8142904,17.6549479 23.0385742,17.8496094 C23.2628581,18.0442708 23.5273438,18.1416016 23.8320313,18.1416016 C24.1451823,18.1416016 24.4160156,18.0442708 24.6445313,17.8496094 C24.8730469,17.6549479 24.9873047,17.3883464 24.9873047,17.0498047 L24.9873047,10.9306641 C24.9873047,9.59342448 25.3300781,8.57779948 26.015625,7.88378906 C26.7011719,7.18977865 27.5729167,6.84277344 28.6308594,6.84277344 C29.5195312,6.84277344 30.2050781,7.11149089 30.6875,7.64892578 C31.1699219,8.18636068 31.4111328,8.97981771 31.4111328,10.0292969 L31.4111328,17.0625 C31.4111328,17.4010417 31.5211589,17.6655273 31.7412109,17.855957 C31.961263,18.0463867 32.2278646,18.1416016 32.5410156,18.1416016 C32.8541667,18.1416016 33.1271159,18.0463867 33.3598633,17.855957 C33.5926107,17.6655273 33.7089844,17.4010417 33.7089844,17.0625 L33.7089844,10.1181641 C33.7089844,9.28873698 33.5883789,8.55664063 33.347168,7.921875 C33.105957,7.28710938 32.7716471,6.78141276 32.3442383,6.40478516 C31.9168294,6.02815755 31.4322917,5.74674479 30.890625,5.56054688 C30.3489583,5.37434896 29.7565104,5.28125 29.1132813,5.28125 C27.9960937,5.28125 27.0481771,5.52880859 26.2695312,6.02392578 C25.4908854,6.51904297 24.9238281,7.25748698 24.5683594,8.23925781 C24.3059896,7.29134115 23.8066406,6.56136068 23.0703125,6.04931641 C22.3339844,5.53727214 21.4410807,5.28125 20.3916016,5.28125 C19.4690755,5.28125 18.648112,5.49072266 17.9287109,5.90966797 C17.2093099,6.32861328 16.6676432,6.92740885 16.3037109,7.70605469 L16.3037109,6.52539063 C16.3037109,6.21223958 16.1873372,5.96679688 15.9545898,5.7890625 C15.7218424,5.61132812 15.453125,5.52246094 15.1484375,5.52246094 C14.84375,5.52246094 14.5750326,5.61767578 14.3422852,5.80810547 C14.1095378,5.99853516 13.9931641,6.25878906 13.9931641,6.58886719 L13.9931641,17.0625 C13.9931641,17.4010417 14.1031901,17.6655273 14.3232422,17.855957 C14.5432943,18.0463867 14.8098958,18.1416016 15.1230469,18.1416016 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M42.6748047,16.7958984 C40.0511068,16.7958984 38.7392578,15.1201172 38.7392578,11.7685547 C38.7392578,11.1253255 38.8111979,10.5201823 38.9550781,9.953125 C39.0989583,9.38606771 39.3168945,8.8655599 39.6088867,8.39160156 C39.9008789,7.91764323 40.2986654,7.54101562 40.8022461,7.26171875 C41.3058268,6.98242188 41.8919271,6.84277344 42.5605469,6.84277344 C43.2207031,6.84277344 43.7983398,6.99088542 44.293457,7.28710938 C44.7885742,7.58333333 45.1715495,7.97900391 45.4423828,8.47412109 C45.7132161,8.96923828 45.9142253,9.49609375 46.0454102,10.0546875 C46.1765951,10.6132813 46.2421875,11.1972656 46.2421875,11.8066406 C46.2421875,12.3990885 46.1829427,12.9682617 46.0644531,13.5141602 C45.9459635,14.0600586 45.7597656,14.5869141 45.5058594,15.0947266 C45.2519531,15.6025391 44.8816732,16.0109049 44.3950195,16.3198242 C43.9083659,16.6287435 43.3349609,16.7874349 42.6748047,16.7958984 Z M43.0810547,18.3701172 C43.961263,18.3701172 44.7547201,18.1902669 45.4614258,17.8305664 C46.1681315,17.4708659 46.7415365,16.9842122 47.1816406,16.3706055 C47.6217448,15.7569987 47.9581706,15.0566406 48.190918,14.2695313 C48.4236654,13.4824219 48.5400391,12.6445312 48.5400391,11.7558594 C48.5400391,10.8502604 48.4130859,10.0060221 48.1591797,9.22314453 C47.9052734,8.44026693 47.543457,7.75472005 47.0737305,7.16650391 C46.6040039,6.57828776 46.0073242,6.11702474 45.2836914,5.78271484 C44.5600586,5.44840495 43.758138,5.28125 42.8779297,5.28125 C41.9384766,5.28125 41.1069336,5.484375 40.3833008,5.890625 C39.659668,6.296875 39.1074219,6.88509115 38.7265625,7.65527344 L38.7265625,1.06640625 C38.7265625,0.736328125 38.6123047,0.476074219 38.3837891,0.285644531 C38.1552734,0.0952148438 37.8844401,0 37.5712891,0 C37.2666016,0 37,0.0952148438 36.7714844,0.285644531 C36.5429688,0.476074219 36.4287109,0.736328125 36.4287109,1.06640625 L36.4287109,17.0371094 C36.4287109,17.375651 36.5429688,17.6401367 36.7714844,17.8305664 C37,18.0209961 37.2666016,18.1162109 37.5712891,18.1162109 C37.9521484,18.1162109 38.2399089,18.0209961 38.4345703,17.8305664 C38.6292318,17.6401367 38.7265625,17.3798828 38.7265625,17.0498047 L38.7265625,15.9199219 C39.1328125,16.7324219 39.714681,17.3439128 40.472168,17.7543945 C41.2296549,18.1648763 42.0992839,18.3701172 43.0810547,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M52.0976562,10.8798828 C52.139974,10.3551432 52.2521159,9.85791016 52.434082,9.38818359 C52.6160482,8.91845703 52.8614909,8.49104818 53.1704102,8.10595703 C53.4793294,7.72086589 53.875,7.4140625 54.3574219,7.18554688 C54.8398438,6.95703125 55.3772786,6.84277344 55.9697266,6.84277344 C57.1123047,6.84277344 57.9798177,7.21940104 58.5722656,7.97265625 C59.1647135,8.72591146 59.4990234,9.69498698 59.5751953,10.8798828 L52.0976562,10.8798828 Z M56.0078125,18.3701172 C57.9036458,18.3701172 59.5201823,17.7692057 60.8574219,16.5673828 C61.094401,16.3557943 61.2128906,16.1145833 61.2128906,15.84375 C61.2128906,15.6321615 61.1430664,15.4480794 61.003418,15.2915039 C60.8637695,15.1349284 60.6923828,15.0566406 60.4892578,15.0566406 C60.319987,15.0566406 60.1591797,15.1158854 60.0068359,15.234375 C59.3974609,15.6998698 58.7923177,16.0680339 58.1914062,16.3388672 C57.5904948,16.6097005 56.9134115,16.7451172 56.1601562,16.7451172 C54.9667969,16.7281901 53.9892578,16.3452148 53.2275391,15.5961914 C52.4658203,14.847168 52.0764974,13.7447917 52.0595703,12.2890625 L60.7685547,12.2890625 C61.039388,12.2890625 61.242513,12.2086589 61.3779297,12.0478516 C61.5133464,11.8870443 61.5810547,11.6839193 61.5810547,11.4384766 C61.5556641,10.5836589 61.4329427,9.79866536 61.2128906,9.08349609 C60.9928385,8.36832682 60.6627604,7.72086589 60.2226562,7.14111328 C59.7825521,6.56136068 59.1901042,6.10644531 58.4453125,5.77636719 C57.7005208,5.44628906 56.8330078,5.28125 55.8427734,5.28125 C54.5901693,5.28125 53.4941406,5.5859375 52.5546875,6.1953125 C51.6152344,6.8046875 50.9127604,7.59602865 50.4472656,8.56933594 C49.9817708,9.54264323 49.7490234,10.617513 49.7490234,11.7939453 C49.7490234,13.1311849 50.0262044,14.3012695 50.5805664,15.3041992 C51.1349284,16.3071289 51.8818359,17.0667318 52.8212891,17.5830078 C53.7607422,18.0992839 54.8229167,18.3616536 56.0078125,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M68.7441406,16.7958984 C68.0839844,16.7874349 67.5105794,16.6287435 67.0239258,16.3198242 C66.5372721,16.0109049 66.1669922,15.6025391 65.9130859,15.0947266 C65.6591797,14.5869141 65.4729818,14.0600586 65.3544922,13.5141602 C65.2360026,12.9682617 65.1767578,12.3990885 65.1767578,11.8066406 C65.1767578,11.1972656 65.2423503,10.6132813 65.3735352,10.0546875 C65.5047201,9.49609375 65.7057292,8.96923828 65.9765625,8.47412109 C66.2473958,7.97900391 66.6303711,7.58333333 67.1254883,7.28710938 C67.6206055,6.99088542 68.1982422,6.84277344 68.8583984,6.84277344 C69.5270182,6.84277344 70.1131185,6.98242188 70.6166992,7.26171875 C71.1202799,7.54101562 71.5180664,7.91764323 71.8100586,8.39160156 C72.1020508,8.8655599 72.319987,9.38606771 72.4638672,9.953125 C72.6077474,10.5201823 72.6796875,11.1253255 72.6796875,11.7685547 C72.6796875,15.1201172 71.3678385,16.7958984 68.7441406,16.7958984 Z M68.3378906,18.3701172 C69.3196615,18.3701172 70.1892904,18.1648763 70.9467773,17.7543945 C71.7042643,17.3439128 72.2861328,16.7324219 72.6923828,15.9199219 L72.6923828,17.0498047 C72.6923828,17.3798828 72.7897135,17.6401367 72.984375,17.8305664 C73.1790365,18.0209961 73.4667969,18.1162109 73.8476562,18.1162109 C74.1523438,18.1162109 74.4189453,18.0209961 74.6474609,17.8305664 C74.8759766,17.6401367 74.9902344,17.375651 74.9902344,17.0371094 L74.9902344,1.06640625 C74.9902344,0.736328125 74.8759766,0.476074219 74.6474609,0.285644531 C74.4189453,0.0952148438 74.1523438,0 73.8476562,0 C73.5345052,0 73.2636719,0.0952148438 73.0351562,0.285644531 C72.8066406,0.476074219 72.6923828,0.736328125 72.6923828,1.06640625 L72.6923828,7.65527344 C72.3115234,6.88509115 71.7592773,6.296875 71.0356445,5.890625 C70.3120117,5.484375 69.4804688,5.28125 68.5410156,5.28125 C67.6608073,5.28125 66.8588867,5.44840495 66.1352539,5.78271484 C65.4116211,6.11702474 64.8149414,6.57828776 64.3452148,7.16650391 C63.8754883,7.75472005 63.5136719,8.44026693 63.2597656,9.22314453 C63.0058594,10.0060221 62.8789062,10.8502604 62.8789062,11.7558594 C62.8789062,12.6445312 62.9952799,13.4824219 63.2280273,14.2695313 C63.4607747,15.0566406 63.7972005,15.7569987 64.2373047,16.3706055 C64.6774089,16.9842122 65.2508138,17.4708659 65.9575195,17.8305664 C66.6642253,18.1902669 67.4576823,18.3701172 68.3378906,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-4" transform="translate(0, 92)" xlink:href="#path-12" fill="#333333">
+                    <g id="Text" transform="translate(16.4316, 10.6426)">
+                        <path d="M5.59863281,18.5351562 L6.34765625,18.5351562 C6.65234375,18.5351562 6.89990234,18.5266927 7.09033203,18.5097656 C7.28076172,18.4928385 7.47119141,18.4547526 7.66162109,18.3955078 C7.85205078,18.336263 7.9938151,18.2410482 8.08691406,18.1098633 C8.18001302,17.9786784 8.2265625,17.8072917 8.2265625,17.5957031 C8.2265625,17.358724 8.14615885,17.1513672 7.98535156,16.9736328 C7.82454427,16.7958984 7.60449219,16.7070313 7.32519531,16.7070313 L7.24902344,16.7070313 L6.38574219,16.7451172 L6.24609375,16.7451172 C5.71289063,16.7451172 5.32356771,16.5525716 5.078125,16.1674805 C4.83268229,15.7823893 4.70996094,15.1539714 4.70996094,14.2822266 L4.70996094,7.36328125 L7.04589844,7.36328125 C7.60449219,7.36328125 7.88378906,7.12207031 7.88378906,6.63964844 C7.88378906,6.41113281 7.81396484,6.2249349 7.67431641,6.08105469 C7.53466797,5.93717448 7.32519531,5.86523437 7.04589844,5.86523437 L4.70996094,5.86523437 L4.70996094,2.18359375 C4.70996094,1.88736979 4.62532552,1.66097005 4.45605469,1.50439453 C4.28678385,1.34781901 4.07519531,1.26953125 3.82128906,1.26953125 C3.55045573,1.26953125 3.30289714,1.36686198 3.07861328,1.56152344 C2.85432943,1.7561849 2.73372396,1.98893229 2.71679688,2.25976562 L2.41210938,5.86523437 L0.850585937,5.86523437 C0.571289063,5.86523437 0.359700521,5.93505859 0.215820313,6.07470703 C0.0719401042,6.21435547 0,6.39420573 0,6.61425781 C0,6.84277344 0.0740559896,7.02473958 0.222167969,7.16015625 C0.370279948,7.29557292 0.583984375,7.36328125 0.86328125,7.36328125 L2.41210938,7.36328125 L2.41210938,14.6757812 C2.41210938,15.9876302 2.69986979,16.9588216 3.27539062,17.5893555 C3.85091146,18.2198893 4.62532552,18.5351562 5.59863281,18.5351562 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.3896484,18.5859375 C17.3447266,18.5859375 18.8935547,17.9723307 20.0361328,16.7451172 C21.1787109,15.5179036 21.75,13.9521484 21.75,12.0478516 C21.75,10.1266276 21.1765951,8.55452474 20.0297852,7.33154297 C18.8829753,6.1085612 17.336263,5.49707031 15.3896484,5.49707031 C13.4430339,5.49707031 11.8963216,6.11067708 10.7495117,7.33789063 C9.60270182,8.56510417 9.02929688,10.1350911 9.02929688,12.0478516 C9.02929688,13.9521484 9.60270182,15.5179036 10.7495117,16.7451172 C11.8963216,17.9723307 13.4430339,18.5859375 15.3896484,18.5859375 Z M15.3642578,17.0117188 C14.0947266,17.0117188 13.1023763,16.5652669 12.387207,15.6723633 C11.6720378,14.7794596 11.3144531,13.5712891 11.3144531,12.0478516 C11.3144531,10.507487 11.6741536,9.29085286 12.3935547,8.39794922 C13.1129557,7.50504557 14.1116536,7.05859375 15.3896484,7.05859375 C16.6591797,7.05859375 17.6557617,7.50716146 18.3793945,8.40429688 C19.1030273,9.30143229 19.4648438,10.5159505 19.4648438,12.0478516 C19.4648438,13.5882161 19.1072591,14.8006185 18.3920898,15.6850586 C17.6769206,16.5694987 16.6676432,17.0117188 15.3642578,17.0117188 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M24.7236328,18.3574219 C25.0367839,18.3574219 25.3097331,18.2558594 25.5424805,18.0527344 C25.7752279,17.8496094 25.8916016,17.5660807 25.8916016,17.2021484 L25.8916016,11.6669922 L31.8583984,18.0146484 C32.069987,18.2516276 32.3238932,18.3701172 32.6201172,18.3701172 C32.8994141,18.3701172 33.1469727,18.2643229 33.362793,18.0527344 C33.5786133,17.8411458 33.6865234,17.5999349 33.6865234,17.3291016 C33.6865234,17.0836589 33.5934245,16.8636068 33.4072266,16.6689453 L28.3671875,11.3623047 L32.9755859,7.23632812 C33.1787109,7.04166667 33.2802734,6.82584635 33.2802734,6.58886719 C33.2802734,6.3264974 33.1765951,6.08951823 32.9692383,5.87792969 C32.7618815,5.66634115 32.5227865,5.56054687 32.2519531,5.56054687 C32.0403646,5.56054687 31.8414714,5.64095052 31.6552734,5.80175781 L25.8916016,11.0830078 L25.8916016,1.37109375 C25.8916016,1.015625 25.7794596,0.736328125 25.5551758,0.533203125 C25.3308919,0.330078125 25.0664063,0.228515625 24.7617188,0.228515625 C24.4485677,0.228515625 24.1756185,0.330078125 23.9428711,0.533203125 C23.7101237,0.736328125 23.59375,1.015625 23.59375,1.37109375 L23.59375,17.2021484 C23.59375,17.5745443 23.703776,17.8601888 23.9238281,18.059082 C24.1438802,18.2579753 24.4104818,18.3574219 24.7236328,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M36.8378906,11.0957031 C36.8802083,10.5709635 36.9923503,10.0737305 37.1743164,9.60400391 C37.3562826,9.13427734 37.6017253,8.70686849 37.9106445,8.32177734 C38.2195638,7.9366862 38.6152344,7.62988281 39.0976562,7.40136719 C39.5800781,7.17285156 40.117513,7.05859375 40.7099609,7.05859375 C41.8525391,7.05859375 42.7200521,7.43522135 43.3125,8.18847656 C43.9049479,8.94173177 44.2392578,9.91080729 44.3154297,11.0957031 L36.8378906,11.0957031 Z M40.7480469,18.5859375 C42.6438802,18.5859375 44.2604167,17.985026 45.5976562,16.7832031 C45.8346354,16.5716146 45.953125,16.3304036 45.953125,16.0595703 C45.953125,15.8479818 45.8833008,15.6638997 45.7436523,15.5073242 C45.6040039,15.3507487 45.4326172,15.2724609 45.2294922,15.2724609 C45.0602214,15.2724609 44.8994141,15.3317057 44.7470703,15.4501953 C44.1376953,15.9156901 43.5325521,16.2838542 42.9316406,16.5546875 C42.3307292,16.8255208 41.6536458,16.9609375 40.9003906,16.9609375 C39.7070312,16.9440104 38.7294922,16.5610352 37.9677734,15.8120117 C37.2060547,15.0629883 36.8167318,13.960612 36.7998047,12.5048828 L45.5087891,12.5048828 C45.7796224,12.5048828 45.9827474,12.4244792 46.1181641,12.2636719 C46.2535807,12.1028646 46.3212891,11.8997396 46.3212891,11.6542969 C46.2958984,10.7994792 46.1731771,10.0144857 45.953125,9.29931641 C45.7330729,8.58414714 45.4029948,7.9366862 44.9628906,7.35693359 C44.5227865,6.77718099 43.9303385,6.32226562 43.1855469,5.9921875 C42.4407552,5.66210938 41.5732422,5.49707031 40.5830078,5.49707031 C39.3304036,5.49707031 38.234375,5.80175781 37.2949219,6.41113281 C36.3554688,7.02050781 35.6529948,7.81184896 35.1875,8.78515625 C34.7220052,9.75846354 34.4892578,10.8333333 34.4892578,12.0097656 C34.4892578,13.3470052 34.7664388,14.5170898 35.3208008,15.5200195 C35.8751628,16.5229492 36.6220703,17.2825521 37.5615234,17.7988281 C38.5009766,18.3151042 39.563151,18.577474 40.7480469,18.5859375 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M49.6123047,18.3574219 C49.9254557,18.3574219 50.1984049,18.2600911 50.4311523,18.0654297 C50.6638997,17.8707682 50.7802734,17.6041667 50.7802734,17.265625 L50.7802734,10.9306641 C50.8395182,9.73730469 51.2034505,8.79361979 51.8720703,8.09960938 C52.5406901,7.40559896 53.3616536,7.05859375 54.3349609,7.05859375 C55.2490234,7.05859375 55.9578451,7.3421224 56.4614258,7.90917969 C56.9650065,8.47623698 57.2167969,9.30143229 57.2167969,10.3847656 L57.2167969,17.265625 C57.2167969,17.6126302 57.3289388,17.8813477 57.5532227,18.0717773 C57.7775065,18.262207 58.046224,18.3574219 58.359375,18.3574219 C58.672526,18.3574219 58.9412435,18.262207 59.1655273,18.0717773 C59.3898112,17.8813477 59.5019531,17.6126302 59.5019531,17.265625 L59.5019531,10.4101563 C59.5019531,8.75976562 59.0872396,7.52832031 58.2578125,6.71582031 C57.4283854,5.90332031 56.3154297,5.49707031 54.9189453,5.49707031 C53.945638,5.49707031 53.0929362,5.70442708 52.3608398,6.11914062 C51.6287435,6.53385417 51.101888,7.12630208 50.7802734,7.89648437 L50.7802734,6.79199219 C50.7802734,6.46191406 50.6681315,6.20589193 50.4438477,6.02392578 C50.2195638,5.84195964 49.9508464,5.75097656 49.6376953,5.75097656 C49.3245443,5.75097656 49.0537109,5.84619141 48.8251953,6.03662109 C48.5966797,6.22705078 48.4824219,6.49153646 48.4824219,6.83007813 L48.4824219,17.265625 C48.4824219,17.6126302 48.5924479,17.8813477 48.8125,18.0717773 C49.0325521,18.262207 49.2991536,18.3574219 49.6123047,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M61.0029297,21.5820312 L72.5302734,21.5820312 C72.7672526,21.5820312 72.9513346,21.5058594 73.0825195,21.3535156 C73.2137044,21.2011719 73.2792969,21.0234375 73.2792969,20.8203125 C73.2792969,20.608724 73.2137044,20.4267578 73.0825195,20.2744141 C72.9513346,20.1220703 72.7672526,20.0458984 72.5302734,20.0458984 L61.0029297,20.0458984 C60.7828776,20.0458984 60.6072591,20.1220703 60.4760742,20.2744141 C60.3448893,20.4267578 60.2792969,20.608724 60.2792969,20.8203125 C60.2792969,21.0234375 60.3448893,21.2011719 60.4760742,21.3535156 C60.6072591,21.5058594 60.7828776,21.5820312 61.0029297,21.5820312 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M79.3632812,18.5986328 C81.0221354,18.5986328 82.3085938,18.2304688 83.2226562,17.4941406 C83.5104167,17.2571615 83.6542969,16.9990234 83.6542969,16.7197266 C83.6542969,16.5250651 83.5865885,16.3621419 83.4511719,16.230957 C83.3157552,16.0997721 83.1507161,16.0341797 82.9560547,16.0341797 C82.7783203,16.0341797 82.6090495,16.0891927 82.4482422,16.1992188 C81.6695964,16.7408854 80.7005208,17.0117188 79.5410156,17.0117188 C78.8808594,17.0117188 78.296875,16.8741862 77.7890625,16.5991211 C77.28125,16.324056 76.875,15.9516602 76.5703125,15.4819336 C76.265625,15.012207 76.0371094,14.4895833 75.8847656,13.9140625 C75.7324219,13.3385417 75.65625,12.7291667 75.65625,12.0859375 C75.65625,10.5455729 76.0328776,9.32047526 76.7861328,8.41064453 C77.539388,7.5008138 78.5211589,7.04589844 79.7314453,7.04589844 C80.6624349,7.04589844 81.5130208,7.29557292 82.2832031,7.79492188 C82.452474,7.90494792 82.6302083,7.95996094 82.8164062,7.95996094 C83.0195312,7.95996094 83.190918,7.89648438 83.3305664,7.76953125 C83.4702148,7.64257813 83.5400391,7.4860026 83.5400391,7.29980469 C83.5400391,7.0374349 83.3961589,6.79199219 83.1083984,6.56347656 C82.7444661,6.25878906 82.2535807,6.00488281 81.6357422,5.80175781 C81.0179036,5.59863281 80.3450521,5.49707031 79.6171875,5.49707031 C78.3730469,5.49707031 77.2727865,5.7890625 76.3164062,6.37304688 C75.360026,6.95703125 74.6279297,7.74414063 74.1201172,8.734375 C73.6123047,9.72460937 73.3583984,10.8291016 73.3583984,12.0478516 C73.3583984,13.9352214 73.9000651,15.4988607 74.9833984,16.7387695 C76.0667318,17.9786784 77.5266927,18.5986328 79.3632812,18.5986328 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M86.6914062,18.3574219 C87.0045573,18.3574219 87.2775065,18.2537435 87.5102539,18.0463867 C87.7430013,17.8390299 87.859375,17.5576172 87.859375,17.2021484 L87.859375,1.37109375 C87.859375,1.015625 87.7472331,0.736328125 87.5229492,0.533203125 C87.2986654,0.330078125 87.0341797,0.228515625 86.7294922,0.228515625 C86.4163411,0.228515625 86.1433919,0.330078125 85.9106445,0.533203125 C85.6778971,0.736328125 85.5615234,1.015625 85.5615234,1.37109375 L85.5615234,17.2021484 C85.5615234,17.5745443 85.6715495,17.8601888 85.8916016,18.059082 C86.1116536,18.2579753 86.3782552,18.3574219 86.6914062,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M94.375,17.0625 C93.5878906,17.0625 92.9658203,16.8826497 92.5087891,16.5229492 C92.0517578,16.1632487 91.8232422,15.5983073 91.8232422,14.828125 C91.8232422,14.4049479 91.8846029,14.0579427 92.0073242,13.7871094 C92.1300456,13.516276 92.3183594,13.2856445 92.5722656,13.0952148 C92.8261719,12.9047852 93.2070312,12.7609049 93.7148438,12.6635742 C94.2226562,12.5662435 94.8066406,12.4985352 95.4667969,12.4604492 C96.1269531,12.4223633 96.9817708,12.4033203 98.03125,12.4033203 L98.03125,12.7841797 C98.03125,14.0283203 97.6652018,15.0524089 96.9331055,15.8564453 C96.2010091,16.6604818 95.3483073,17.0625 94.375,17.0625 Z M94.1337891,18.5859375 C95.9534505,18.5859375 97.2568359,17.7607422 98.0439453,16.1103516 L98.0439453,17.3291016 C98.0439453,17.6591797 98.1518555,17.9152018 98.3676758,18.097168 C98.5834961,18.2791341 98.84375,18.3701172 99.1484375,18.3701172 C99.453125,18.3701172 99.7218424,18.2749023 99.9545898,18.0844727 C100.187337,17.894043 100.303711,17.6337891 100.303711,17.3037109 L100.303711,9.90234375 C100.303711,8.40429688 99.8657227,7.29557292 98.9897461,6.57617188 C98.1137695,5.85677083 96.9013672,5.49707031 95.3525391,5.49707031 C93.5413411,5.49707031 91.9798177,5.86523438 90.6679688,6.6015625 C90.4309896,6.73697917 90.3125,6.93164062 90.3125,7.18554688 C90.3125,7.40559896 90.3929036,7.60660807 90.5537109,7.78857422 C90.7145182,7.97054036 90.9049479,8.06152344 91.125,8.06152344 C91.2434896,8.06152344 91.3450521,8.04036458 91.4296875,7.99804687 C91.8613281,7.80338542 92.2252604,7.65104167 92.5214844,7.54101562 C92.8177083,7.43098958 93.2239583,7.32096354 93.7402344,7.2109375 C94.2565104,7.10091146 94.7727865,7.04589844 95.2890625,7.04589844 C96.1523438,7.04589844 96.8251953,7.26171875 97.3076172,7.69335937 C97.7900391,8.125 98.03125,8.79361979 98.03125,9.69921875 L98.03125,11.0830078 C97.0917969,11.0830078 96.2898763,11.0957031 95.6254883,11.1210938 C94.9611003,11.1464844 94.3157552,11.190918 93.6894531,11.2543945 C93.063151,11.3178711 92.546875,11.4109701 92.140625,11.5336914 C91.734375,11.6564128 91.3619792,11.8129883 91.0234375,12.003418 C90.6848958,12.1938477 90.422526,12.4265951 90.2363281,12.7016602 C90.0501302,12.9767253 89.90625,13.296224 89.8046875,13.6601562 C89.703125,14.0240885 89.6523438,14.4472656 89.6523438,14.9296875 C89.6523438,16.0976562 90.0670573,16.9990234 90.8964844,17.6337891 C91.7259115,18.2685547 92.805013,18.5859375 94.1337891,18.5859375 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M102.93457,17.2148437 C103.442383,17.5872396 104.121582,17.9109701 104.972168,18.1860352 C105.822754,18.4611003 106.700846,18.5986328 107.606445,18.5986328 C108.579753,18.5986328 109.43457,18.465332 110.170898,18.1987305 C110.907227,17.9321289 111.50179,17.5026042 111.95459,16.9101562 C112.407389,16.3177083 112.633789,15.5898438 112.633789,14.7265625 C112.633789,13.8125 112.36084,13.0613607 111.814941,12.4731445 C111.269043,11.8849284 110.357096,11.4342448 109.079102,11.1210938 L106.882812,10.5751953 C105.960286,10.3466797 105.359375,10.1287435 105.080078,9.92138672 C104.800781,9.71402995 104.661133,9.35644531 104.661133,8.84863281 C104.661133,8.22233073 104.931966,7.76318359 105.473633,7.47119141 C106.015299,7.17919922 106.743164,7.03320313 107.657227,7.03320313 C107.944987,7.03320313 108.228516,7.05224609 108.507812,7.09033203 C108.787109,7.12841797 109.062174,7.18343099 109.333008,7.25537109 C109.603841,7.3273112 109.817546,7.38867187 109.974121,7.43945313 C110.130697,7.49023438 110.333822,7.56429036 110.583496,7.66162109 C110.833171,7.75895182 110.970703,7.81184896 110.996094,7.8203125 C111.13151,7.87109375 111.262695,7.89648438 111.389648,7.89648438 C111.609701,7.89648438 111.785319,7.82666016 111.916504,7.68701172 C112.047689,7.54736328 112.113281,7.38020833 112.113281,7.18554688 C112.113281,6.85546875 111.948242,6.60579427 111.618164,6.43652344 C111.161133,6.19108073 110.570801,5.96891276 109.847168,5.77001953 C109.123535,5.5711263 108.351237,5.47167969 107.530273,5.47167969 C106.844727,5.47167969 106.209961,5.53727214 105.625977,5.66845703 C105.041992,5.79964193 104.515137,5.99853516 104.04541,6.26513672 C103.575684,6.53173828 103.205404,6.89355469 102.93457,7.35058594 C102.663737,7.80761719 102.52832,8.33658854 102.52832,8.9375 C102.52832,9.31835938 102.566406,9.65690104 102.642578,9.953125 C102.71875,10.249349 102.841471,10.507487 103.010742,10.7275391 C103.180013,10.9475911 103.361979,11.1359049 103.556641,11.2924805 C103.751302,11.449056 104.011556,11.5950521 104.337402,11.7304688 C104.663249,11.8658854 104.967936,11.9759115 105.251465,12.0605469 C105.534993,12.1451823 105.905273,12.2467448 106.362305,12.3652344 L108.609375,12.9238281 C109.354167,13.110026 109.88737,13.3554688 110.208984,13.6601563 C110.530599,13.9648438 110.691406,14.3710938 110.691406,14.8789063 C110.691406,15.5898438 110.405762,16.1293945 109.834473,16.4975586 C109.263184,16.8657227 108.512044,17.0498047 107.581055,17.0498047 C106.286133,17.0413411 105.058919,16.6774089 103.899414,15.9580078 C103.713216,15.8395182 103.518555,15.7802734 103.31543,15.7802734 C103.095378,15.7802734 102.913411,15.8543294 102.769531,16.0024414 C102.625651,16.1505534 102.553711,16.3261719 102.553711,16.5292969 C102.553711,16.8085938 102.680664,17.0371094 102.93457,17.2148437 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M114.706055,17.2148437 C115.213867,17.5872396 115.893066,17.9109701 116.743652,18.1860352 C117.594238,18.4611003 118.472331,18.5986328 119.37793,18.5986328 C120.351237,18.5986328 121.206055,18.465332 121.942383,18.1987305 C122.678711,17.9321289 123.273275,17.5026042 123.726074,16.9101562 C124.178874,16.3177083 124.405273,15.5898438 124.405273,14.7265625 C124.405273,13.8125 124.132324,13.0613607 123.586426,12.4731445 C123.040527,11.8849284 122.128581,11.4342448 120.850586,11.1210938 L118.654297,10.5751953 C117.731771,10.3466797 117.130859,10.1287435 116.851562,9.92138672 C116.572266,9.71402995 116.432617,9.35644531 116.432617,8.84863281 C116.432617,8.22233073 116.703451,7.76318359 117.245117,7.47119141 C117.786784,7.17919922 118.514648,7.03320313 119.428711,7.03320313 C119.716471,7.03320313 120,7.05224609 120.279297,7.09033203 C120.558594,7.12841797 120.833659,7.18343099 121.104492,7.25537109 C121.375326,7.3273112 121.58903,7.38867187 121.745605,7.43945313 C121.902181,7.49023438 122.105306,7.56429036 122.35498,7.66162109 C122.604655,7.75895182 122.742188,7.81184896 122.767578,7.8203125 C122.902995,7.87109375 123.03418,7.89648438 123.161133,7.89648438 C123.381185,7.89648438 123.556803,7.82666016 123.687988,7.68701172 C123.819173,7.54736328 123.884766,7.38020833 123.884766,7.18554688 C123.884766,6.85546875 123.719727,6.60579427 123.389648,6.43652344 C122.932617,6.19108073 122.342285,5.96891276 121.618652,5.77001953 C120.89502,5.5711263 120.122721,5.47167969 119.301758,5.47167969 C118.616211,5.47167969 117.981445,5.53727214 117.397461,5.66845703 C116.813477,5.79964193 116.286621,5.99853516 115.816895,6.26513672 C115.347168,6.53173828 114.976888,6.89355469 114.706055,7.35058594 C114.435221,7.80761719 114.299805,8.33658854 114.299805,8.9375 C114.299805,9.31835938 114.337891,9.65690104 114.414062,9.953125 C114.490234,10.249349 114.612956,10.507487 114.782227,10.7275391 C114.951497,10.9475911 115.133464,11.1359049 115.328125,11.2924805 C115.522786,11.449056 115.78304,11.5950521 116.108887,11.7304688 C116.434733,11.8658854 116.739421,11.9759115 117.022949,12.0605469 C117.306478,12.1451823 117.676758,12.2467448 118.133789,12.3652344 L120.380859,12.9238281 C121.125651,13.110026 121.658854,13.3554688 121.980469,13.6601563 C122.302083,13.9648438 122.462891,14.3710938 122.462891,14.8789063 C122.462891,15.5898438 122.177246,16.1293945 121.605957,16.4975586 C121.034668,16.8657227 120.283529,17.0498047 119.352539,17.0498047 C118.057617,17.0413411 116.830404,16.6774089 115.670898,15.9580078 C115.484701,15.8395182 115.290039,15.7802734 115.086914,15.7802734 C114.866862,15.7802734 114.684896,15.8543294 114.541016,16.0024414 C114.397135,16.1505534 114.325195,16.3261719 114.325195,16.5292969 C114.325195,16.8085938 114.452148,17.0371094 114.706055,17.2148437 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M127.848633,2.80566406 C128.297201,2.80566406 128.656901,2.67659505 128.927734,2.41845703 C129.198568,2.16031901 129.333984,1.82389323 129.333984,1.40917969 C129.333984,0.986002604 129.198568,0.645345052 128.927734,0.387207031 C128.656901,0.12906901 128.301432,0 127.861328,0 C127.41276,0 127.050944,0.131184896 126.775879,0.393554688 C126.500814,0.655924479 126.363281,0.994466146 126.363281,1.40917969 C126.363281,1.82389323 126.498698,2.16031901 126.769531,2.41845703 C127.040365,2.67659505 127.400065,2.80566406 127.848633,2.80566406 Z M127.835938,18.3574219 C128.149089,18.3574219 128.422038,18.2537435 128.654785,18.0463867 C128.887533,17.8390299 129.003906,17.5576172 129.003906,17.2021484 L129.003906,6.88085938 C129.003906,6.52539062 128.891764,6.24609375 128.66748,6.04296875 C128.443197,5.83984375 128.178711,5.73828125 127.874023,5.73828125 C127.560872,5.73828125 127.287923,5.83984375 127.055176,6.04296875 C126.822428,6.24609375 126.706055,6.52539062 126.706055,6.88085938 L126.706055,17.2021484 C126.706055,17.5745443 126.816081,17.8601888 127.036133,18.059082 C127.256185,18.2579753 127.522786,18.3574219 127.835938,18.3574219 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M134.008789,18.3574219 C134.32194,18.3574219 134.592773,18.2579753 134.821289,18.059082 C135.049805,17.8601888 135.164062,17.5830078 135.164062,17.2275391 L135.164062,7.36328125 L137.30957,7.36328125 C137.851237,7.36328125 138.12207,7.11783854 138.12207,6.62695312 C138.12207,6.40690104 138.056478,6.2249349 137.925293,6.08105469 C137.794108,5.93717448 137.588867,5.86523438 137.30957,5.86523438 L135.164062,5.86523438 L135.164062,4.875 C135.164062,4.18945312 135.191569,3.64567057 135.246582,3.24365234 C135.301595,2.84163411 135.407389,2.53271484 135.563965,2.31689453 C135.72054,2.10107422 135.900391,1.96142578 136.103516,1.89794922 C136.306641,1.83447266 136.590169,1.80273438 136.954102,1.80273438 L138.033203,1.80273438 C138.287109,1.80273438 138.483887,1.71809896 138.623535,1.54882813 C138.763184,1.37955729 138.833008,1.17643229 138.833008,0.939453125 C138.833008,0.702473958 138.763184,0.497233073 138.623535,0.323730469 C138.483887,0.150227865 138.291341,0.0634765625 138.045898,0.0634765625 L136.49707,0.0634765625 C135.362956,0.0634765625 134.474284,0.38297526 133.831055,1.02197266 C133.187826,1.66097005 132.866211,2.73795573 132.866211,4.25292969 L132.866211,5.86523438 L131.203125,5.86523438 C130.923828,5.86523438 130.716471,5.93717448 130.581055,6.08105469 C130.445638,6.2249349 130.37793,6.40690104 130.37793,6.62695312 C130.37793,6.83854167 130.445638,7.01416016 130.581055,7.15380859 C130.716471,7.29345703 130.923828,7.36328125 131.203125,7.36328125 L132.866211,7.36328125 L132.866211,17.2275391 C132.866211,17.5830078 132.978353,17.8601888 133.202637,18.059082 C133.426921,18.2579753 133.695638,18.3574219 134.008789,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M143.368164,22.6865234 L150.706055,7.2109375 C150.765299,7.06705729 150.794922,6.92317708 150.794922,6.77929688 C150.794922,6.49153646 150.68278,6.24397786 150.458496,6.03662109 C150.234212,5.82926432 149.973958,5.72558594 149.677734,5.72558594 C149.23763,5.72558594 148.90332,5.95833333 148.674805,6.42382813 L144.790039,14.6503906 L140.930664,6.46191406 C140.719076,6.01334635 140.388997,5.7890625 139.94043,5.7890625 C139.644206,5.7890625 139.37972,5.890625 139.146973,6.09375 C138.914225,6.296875 138.797852,6.55078125 138.797852,6.85546875 C138.797852,7.04166667 138.835938,7.2109375 138.912109,7.36328125 L143.634766,17.1005859 L141.362305,21.9121094 C141.286133,22.0813802 141.248047,22.2421875 141.248047,22.3945313 C141.248047,22.6822917 141.358073,22.9213867 141.578125,23.1118164 C141.798177,23.3022461 142.052083,23.3974609 142.339844,23.3974609 C142.796875,23.3974609 143.139648,23.1604818 143.368164,22.6865234 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-3" transform="translate(0, 132)" xlink:href="#path-13" fill="#333333">
+                    <g id="Text" transform="translate(16.4316, 10.8584)">
+                        <path d="M5.59863281,18.3193359 L6.34765625,18.3193359 C6.65234375,18.3193359 6.89990234,18.3108724 7.09033203,18.2939453 C7.28076172,18.2770182 7.47119141,18.2389323 7.66162109,18.1796875 C7.85205078,18.1204427 7.9938151,18.0252279 8.08691406,17.894043 C8.18001302,17.7628581 8.2265625,17.5914714 8.2265625,17.3798828 C8.2265625,17.1429036 8.14615885,16.9355469 7.98535156,16.7578125 C7.82454427,16.5800781 7.60449219,16.4912109 7.32519531,16.4912109 L7.24902344,16.4912109 L6.38574219,16.5292969 L6.24609375,16.5292969 C5.71289063,16.5292969 5.32356771,16.3367513 5.078125,15.9516602 C4.83268229,15.566569 4.70996094,14.938151 4.70996094,14.0664062 L4.70996094,7.14746094 L7.04589844,7.14746094 C7.60449219,7.14746094 7.88378906,6.90625 7.88378906,6.42382813 C7.88378906,6.1953125 7.81396484,6.00911458 7.67431641,5.86523437 C7.53466797,5.72135417 7.32519531,5.64941406 7.04589844,5.64941406 L4.70996094,5.64941406 L4.70996094,1.96777344 C4.70996094,1.67154948 4.62532552,1.44514974 4.45605469,1.28857422 C4.28678385,1.1319987 4.07519531,1.05371094 3.82128906,1.05371094 C3.55045573,1.05371094 3.30289714,1.15104167 3.07861328,1.34570313 C2.85432943,1.54036458 2.73372396,1.77311198 2.71679688,2.04394531 L2.41210938,5.64941406 L0.850585937,5.64941406 C0.571289063,5.64941406 0.359700521,5.71923828 0.215820313,5.85888672 C0.0719401042,5.99853516 0,6.17838542 0,6.3984375 C0,6.62695313 0.0740559896,6.80891927 0.222167969,6.94433594 C0.370279948,7.0797526 0.583984375,7.14746094 0.86328125,7.14746094 L2.41210938,7.14746094 L2.41210938,14.4599609 C2.41210938,15.7718099 2.69986979,16.7430013 3.27539062,17.3735352 C3.85091146,18.004069 4.62532552,18.3193359 5.59863281,18.3193359 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.3896484,18.3701172 C17.3447266,18.3701172 18.8935547,17.7565104 20.0361328,16.5292969 C21.1787109,15.3020833 21.75,13.7363281 21.75,11.8320312 C21.75,9.91080729 21.1765951,8.33870443 20.0297852,7.11572266 C18.8829753,5.89274089 17.336263,5.28125 15.3896484,5.28125 C13.4430339,5.28125 11.8963216,5.89485677 10.7495117,7.12207031 C9.60270182,8.34928385 9.02929688,9.91927083 9.02929688,11.8320312 C9.02929688,13.7363281 9.60270182,15.3020833 10.7495117,16.5292969 C11.8963216,17.7565104 13.4430339,18.3701172 15.3896484,18.3701172 Z M15.3642578,16.7958984 C14.0947266,16.7958984 13.1023763,16.3494466 12.387207,15.456543 C11.6720378,14.5636393 11.3144531,13.3554688 11.3144531,11.8320313 C11.3144531,10.2916667 11.6741536,9.07503255 12.3935547,8.18212891 C13.1129557,7.28922526 14.1116536,6.84277344 15.3896484,6.84277344 C16.6591797,6.84277344 17.6557617,7.29134115 18.3793945,8.18847656 C19.1030273,9.08561198 19.4648438,10.3001302 19.4648438,11.8320313 C19.4648438,13.3723958 19.1072591,14.5847982 18.3920898,15.4692383 C17.6769206,16.3536784 16.6676432,16.7958984 15.3642578,16.7958984 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M24.7236328,18.1416016 C25.0367839,18.1416016 25.3097331,18.0400391 25.5424805,17.8369141 C25.7752279,17.6337891 25.8916016,17.3502604 25.8916016,16.9863281 L25.8916016,11.4511719 L31.8583984,17.7988281 C32.069987,18.0358073 32.3238932,18.1542969 32.6201172,18.1542969 C32.8994141,18.1542969 33.1469727,18.0485026 33.362793,17.8369141 C33.5786133,17.6253255 33.6865234,17.3841146 33.6865234,17.1132813 C33.6865234,16.8678385 33.5934245,16.6477865 33.4072266,16.453125 L28.3671875,11.1464844 L32.9755859,7.02050781 C33.1787109,6.82584635 33.2802734,6.61002604 33.2802734,6.37304688 C33.2802734,6.11067708 33.1765951,5.87369792 32.9692383,5.66210937 C32.7618815,5.45052083 32.5227865,5.34472656 32.2519531,5.34472656 C32.0403646,5.34472656 31.8414714,5.42513021 31.6552734,5.5859375 L25.8916016,10.8671875 L25.8916016,1.15527344 C25.8916016,0.799804687 25.7794596,0.520507812 25.5551758,0.317382812 C25.3308919,0.114257813 25.0664063,0.0126953125 24.7617188,0.0126953125 C24.4485677,0.0126953125 24.1756185,0.114257813 23.9428711,0.317382812 C23.7101237,0.520507812 23.59375,0.799804687 23.59375,1.15527344 L23.59375,16.9863281 C23.59375,17.358724 23.703776,17.6443685 23.9238281,17.8432617 C24.1438802,18.0421549 24.4104818,18.1416016 24.7236328,18.1416016 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M36.8378906,10.8798828 C36.8802083,10.3551432 36.9923503,9.85791016 37.1743164,9.38818359 C37.3562826,8.91845703 37.6017253,8.49104818 37.9106445,8.10595703 C38.2195638,7.72086589 38.6152344,7.4140625 39.0976562,7.18554688 C39.5800781,6.95703125 40.117513,6.84277344 40.7099609,6.84277344 C41.8525391,6.84277344 42.7200521,7.21940104 43.3125,7.97265625 C43.9049479,8.72591146 44.2392578,9.69498698 44.3154297,10.8798828 L36.8378906,10.8798828 Z M40.7480469,18.3701172 C42.6438802,18.3701172 44.2604167,17.7692057 45.5976562,16.5673828 C45.8346354,16.3557943 45.953125,16.1145833 45.953125,15.84375 C45.953125,15.6321615 45.8833008,15.4480794 45.7436523,15.2915039 C45.6040039,15.1349284 45.4326172,15.0566406 45.2294922,15.0566406 C45.0602214,15.0566406 44.8994141,15.1158854 44.7470703,15.234375 C44.1376953,15.6998698 43.5325521,16.0680339 42.9316406,16.3388672 C42.3307292,16.6097005 41.6536458,16.7451172 40.9003906,16.7451172 C39.7070312,16.7281901 38.7294922,16.3452148 37.9677734,15.5961914 C37.2060547,14.847168 36.8167318,13.7447917 36.7998047,12.2890625 L45.5087891,12.2890625 C45.7796224,12.2890625 45.9827474,12.2086589 46.1181641,12.0478516 C46.2535807,11.8870443 46.3212891,11.6839193 46.3212891,11.4384766 C46.2958984,10.5836589 46.1731771,9.79866536 45.953125,9.08349609 C45.7330729,8.36832682 45.4029948,7.72086589 44.9628906,7.14111328 C44.5227865,6.56136068 43.9303385,6.10644531 43.1855469,5.77636719 C42.4407552,5.44628906 41.5732422,5.28125 40.5830078,5.28125 C39.3304036,5.28125 38.234375,5.5859375 37.2949219,6.1953125 C36.3554688,6.8046875 35.6529948,7.59602865 35.1875,8.56933594 C34.7220052,9.54264323 34.4892578,10.617513 34.4892578,11.7939453 C34.4892578,13.1311849 34.7664388,14.3012695 35.3208008,15.3041992 C35.8751628,16.3071289 36.6220703,17.0667318 37.5615234,17.5830078 C38.5009766,18.0992839 39.563151,18.3616536 40.7480469,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M49.6123047,18.1416016 C49.9254557,18.1416016 50.1984049,18.0442708 50.4311523,17.8496094 C50.6638997,17.6549479 50.7802734,17.3883464 50.7802734,17.0498047 L50.7802734,10.7148438 C50.8395182,9.52148438 51.2034505,8.57779948 51.8720703,7.88378906 C52.5406901,7.18977865 53.3616536,6.84277344 54.3349609,6.84277344 C55.2490234,6.84277344 55.9578451,7.12630208 56.4614258,7.69335938 C56.9650065,8.26041667 57.2167969,9.08561198 57.2167969,10.1689453 L57.2167969,17.0498047 C57.2167969,17.3968099 57.3289388,17.6655273 57.5532227,17.855957 C57.7775065,18.0463867 58.046224,18.1416016 58.359375,18.1416016 C58.672526,18.1416016 58.9412435,18.0463867 59.1655273,17.855957 C59.3898112,17.6655273 59.5019531,17.3968099 59.5019531,17.0498047 L59.5019531,10.1943359 C59.5019531,8.54394531 59.0872396,7.3125 58.2578125,6.5 C57.4283854,5.6875 56.3154297,5.28125 54.9189453,5.28125 C53.945638,5.28125 53.0929362,5.48860677 52.3608398,5.90332031 C51.6287435,6.31803385 51.101888,6.91048177 50.7802734,7.68066406 L50.7802734,6.57617187 C50.7802734,6.24609375 50.6681315,5.99007161 50.4438477,5.80810547 C50.2195638,5.62613932 49.9508464,5.53515625 49.6376953,5.53515625 C49.3245443,5.53515625 49.0537109,5.63037109 48.8251953,5.82080078 C48.5966797,6.01123047 48.4824219,6.27571615 48.4824219,6.61425781 L48.4824219,17.0498047 C48.4824219,17.3968099 48.5924479,17.6655273 48.8125,17.855957 C49.0325521,18.0463867 49.2991536,18.1416016 49.6123047,18.1416016 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M61.0029297,21.3662109 L72.5302734,21.3662109 C72.7672526,21.3662109 72.9513346,21.2900391 73.0825195,21.1376953 C73.2137044,20.9853516 73.2792969,20.8076172 73.2792969,20.6044922 C73.2792969,20.3929036 73.2137044,20.2109375 73.0825195,20.0585938 C72.9513346,19.90625 72.7672526,19.8300781 72.5302734,19.8300781 L61.0029297,19.8300781 C60.7828776,19.8300781 60.6072591,19.90625 60.4760742,20.0585938 C60.3448893,20.2109375 60.2792969,20.3929036 60.2792969,20.6044922 C60.2792969,20.8076172 60.3448893,20.9853516 60.4760742,21.1376953 C60.6072591,21.2900391 60.7828776,21.3662109 61.0029297,21.3662109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M75.7832031,10.8798828 C75.8255208,10.3551432 75.9376628,9.85791016 76.1196289,9.38818359 C76.3015951,8.91845703 76.5470378,8.49104818 76.855957,8.10595703 C77.1648763,7.72086589 77.5605469,7.4140625 78.0429688,7.18554688 C78.5253906,6.95703125 79.0628255,6.84277344 79.6552734,6.84277344 C80.7978516,6.84277344 81.6653646,7.21940104 82.2578125,7.97265625 C82.8502604,8.72591146 83.1845703,9.69498698 83.2607422,10.8798828 L75.7832031,10.8798828 Z M79.6933594,18.3701172 C81.5891927,18.3701172 83.2057292,17.7692057 84.5429688,16.5673828 C84.7799479,16.3557943 84.8984375,16.1145833 84.8984375,15.84375 C84.8984375,15.6321615 84.8286133,15.4480794 84.6889648,15.2915039 C84.5493164,15.1349284 84.3779297,15.0566406 84.1748047,15.0566406 C84.0055339,15.0566406 83.8447266,15.1158854 83.6923828,15.234375 C83.0830078,15.6998698 82.4778646,16.0680339 81.8769531,16.3388672 C81.2760417,16.6097005 80.5989583,16.7451172 79.8457031,16.7451172 C78.6523438,16.7281901 77.6748047,16.3452148 76.9130859,15.5961914 C76.1513672,14.847168 75.7620443,13.7447917 75.7451172,12.2890625 L84.4541016,12.2890625 C84.7249349,12.2890625 84.9280599,12.2086589 85.0634766,12.0478516 C85.1988932,11.8870443 85.2666016,11.6839193 85.2666016,11.4384766 C85.2412109,10.5836589 85.1184896,9.79866536 84.8984375,9.08349609 C84.6783854,8.36832682 84.3483073,7.72086589 83.9082031,7.14111328 C83.468099,6.56136068 82.875651,6.10644531 82.1308594,5.77636719 C81.3860677,5.44628906 80.5185547,5.28125 79.5283203,5.28125 C78.2757161,5.28125 77.1796875,5.5859375 76.2402344,6.1953125 C75.3007812,6.8046875 74.5983073,7.59602865 74.1328125,8.56933594 C73.6673177,9.54264323 73.4345703,10.617513 73.4345703,11.7939453 C73.4345703,13.1311849 73.7117513,14.3012695 74.2661133,15.3041992 C74.8204753,16.3071289 75.5673828,17.0667318 76.5068359,17.5830078 C77.4462891,18.0992839 78.5084635,18.3616536 79.6933594,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M88.5576172,18.1416016 C88.8792318,18.1416016 89.1564128,18.0463867 89.3891602,17.855957 C89.6219076,17.6655273 89.7382812,17.4010417 89.7382812,17.0625 L89.7382812,10.8671875 C89.7382812,9.59765625 90.0683594,8.60953776 90.7285156,7.90283203 C91.3886719,7.1961263 92.2688802,6.84277344 93.3691406,6.84277344 C95.2141927,6.84277344 96.1367187,8.01074219 96.1367187,10.3466797 L96.1367187,17.0498047 C96.1367187,17.3883464 96.2488607,17.6549479 96.4731445,17.8496094 C96.6974284,18.0442708 96.9619141,18.1416016 97.2666016,18.1416016 C97.5797526,18.1416016 97.8505859,18.0442708 98.0791016,17.8496094 C98.3076172,17.6549479 98.421875,17.3883464 98.421875,17.0498047 L98.421875,10.9306641 C98.421875,9.59342448 98.7646484,8.57779948 99.4501953,7.88378906 C100.135742,7.18977865 101.007487,6.84277344 102.06543,6.84277344 C102.954102,6.84277344 103.639648,7.11149089 104.12207,7.64892578 C104.604492,8.18636068 104.845703,8.97981771 104.845703,10.0292969 L104.845703,17.0625 C104.845703,17.4010417 104.955729,17.6655273 105.175781,17.855957 C105.395833,18.0463867 105.662435,18.1416016 105.975586,18.1416016 C106.288737,18.1416016 106.561686,18.0463867 106.794434,17.855957 C107.027181,17.6655273 107.143555,17.4010417 107.143555,17.0625 L107.143555,10.1181641 C107.143555,9.28873698 107.022949,8.55664063 106.781738,7.921875 C106.540527,7.28710938 106.206217,6.78141276 105.778809,6.40478516 C105.3514,6.02815755 104.866862,5.74674479 104.325195,5.56054688 C103.783529,5.37434896 103.191081,5.28125 102.547852,5.28125 C101.430664,5.28125 100.482747,5.52880859 99.7041016,6.02392578 C98.9254557,6.51904297 98.3583984,7.25748698 98.0029297,8.23925781 C97.7405599,7.29134115 97.2412109,6.56136068 96.5048828,6.04931641 C95.7685547,5.53727214 94.875651,5.28125 93.8261719,5.28125 C92.9036458,5.28125 92.0826823,5.49072266 91.3632812,5.90966797 C90.6438802,6.32861328 90.1022135,6.92740885 89.7382812,7.70605469 L89.7382812,6.52539063 C89.7382812,6.21223958 89.6219076,5.96679688 89.3891602,5.7890625 C89.1564128,5.61132812 88.8876953,5.52246094 88.5830078,5.52246094 C88.2783203,5.52246094 88.0096029,5.61767578 87.7768555,5.80810547 C87.5441081,5.99853516 87.4277344,6.25878906 87.4277344,6.58886719 L87.4277344,17.0625 C87.4277344,17.4010417 87.5377604,17.6655273 87.7578125,17.855957 C87.9778646,18.0463867 88.2444661,18.1416016 88.5576172,18.1416016 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M116.109375,16.7958984 C113.485677,16.7958984 112.173828,15.1201172 112.173828,11.7685547 C112.173828,11.1253255 112.245768,10.5201823 112.389648,9.953125 C112.533529,9.38606771 112.751465,8.8655599 113.043457,8.39160156 C113.335449,7.91764323 113.733236,7.54101562 114.236816,7.26171875 C114.740397,6.98242188 115.326497,6.84277344 115.995117,6.84277344 C116.655273,6.84277344 117.23291,6.99088542 117.728027,7.28710938 C118.223145,7.58333333 118.60612,7.97900391 118.876953,8.47412109 C119.147786,8.96923828 119.348796,9.49609375 119.47998,10.0546875 C119.611165,10.6132813 119.676758,11.1972656 119.676758,11.8066406 C119.676758,12.3990885 119.617513,12.9682617 119.499023,13.5141602 C119.380534,14.0600586 119.194336,14.5869141 118.94043,15.0947266 C118.686523,15.6025391 118.316243,16.0109049 117.82959,16.3198242 C117.342936,16.6287435 116.769531,16.7874349 116.109375,16.7958984 Z M116.515625,18.3701172 C117.395833,18.3701172 118.18929,18.1902669 118.895996,17.8305664 C119.602702,17.4708659 120.176107,16.9842122 120.616211,16.3706055 C121.056315,15.7569987 121.392741,15.0566406 121.625488,14.2695313 C121.858236,13.4824219 121.974609,12.6445312 121.974609,11.7558594 C121.974609,10.8502604 121.847656,10.0060221 121.59375,9.22314453 C121.339844,8.44026693 120.978027,7.75472005 120.508301,7.16650391 C120.038574,6.57828776 119.441895,6.11702474 118.718262,5.78271484 C117.994629,5.44840495 117.192708,5.28125 116.3125,5.28125 C115.373047,5.28125 114.541504,5.484375 113.817871,5.890625 C113.094238,6.296875 112.541992,6.88509115 112.161133,7.65527344 L112.161133,1.06640625 C112.161133,0.736328125 112.046875,0.476074219 111.818359,0.285644531 C111.589844,0.0952148438 111.31901,0 111.005859,0 C110.701172,0 110.43457,0.0952148438 110.206055,0.285644531 C109.977539,0.476074219 109.863281,0.736328125 109.863281,1.06640625 L109.863281,17.0371094 C109.863281,17.375651 109.977539,17.6401367 110.206055,17.8305664 C110.43457,18.0209961 110.701172,18.1162109 111.005859,18.1162109 C111.386719,18.1162109 111.674479,18.0209961 111.869141,17.8305664 C112.063802,17.6401367 112.161133,17.3798828 112.161133,17.0498047 L112.161133,15.9199219 C112.567383,16.7324219 113.149251,17.3439128 113.906738,17.7543945 C114.664225,18.1648763 115.533854,18.3701172 116.515625,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M125.532227,10.8798828 C125.574544,10.3551432 125.686686,9.85791016 125.868652,9.38818359 C126.050618,8.91845703 126.296061,8.49104818 126.60498,8.10595703 C126.9139,7.72086589 127.30957,7.4140625 127.791992,7.18554688 C128.274414,6.95703125 128.811849,6.84277344 129.404297,6.84277344 C130.546875,6.84277344 131.414388,7.21940104 132.006836,7.97265625 C132.599284,8.72591146 132.933594,9.69498698 133.009766,10.8798828 L125.532227,10.8798828 Z M129.442383,18.3701172 C131.338216,18.3701172 132.954753,17.7692057 134.291992,16.5673828 C134.528971,16.3557943 134.647461,16.1145833 134.647461,15.84375 C134.647461,15.6321615 134.577637,15.4480794 134.437988,15.2915039 C134.29834,15.1349284 134.126953,15.0566406 133.923828,15.0566406 C133.754557,15.0566406 133.59375,15.1158854 133.441406,15.234375 C132.832031,15.6998698 132.226888,16.0680339 131.625977,16.3388672 C131.025065,16.6097005 130.347982,16.7451172 129.594727,16.7451172 C128.401367,16.7281901 127.423828,16.3452148 126.662109,15.5961914 C125.900391,14.847168 125.511068,13.7447917 125.494141,12.2890625 L134.203125,12.2890625 C134.473958,12.2890625 134.677083,12.2086589 134.8125,12.0478516 C134.947917,11.8870443 135.015625,11.6839193 135.015625,11.4384766 C134.990234,10.5836589 134.867513,9.79866536 134.647461,9.08349609 C134.427409,8.36832682 134.097331,7.72086589 133.657227,7.14111328 C133.217122,6.56136068 132.624674,6.10644531 131.879883,5.77636719 C131.135091,5.44628906 130.267578,5.28125 129.277344,5.28125 C128.02474,5.28125 126.928711,5.5859375 125.989258,6.1953125 C125.049805,6.8046875 124.347331,7.59602865 123.881836,8.56933594 C123.416341,9.54264323 123.183594,10.617513 123.183594,11.7939453 C123.183594,13.1311849 123.460775,14.3012695 124.015137,15.3041992 C124.569499,16.3071289 125.316406,17.0667318 126.255859,17.5830078 C127.195312,18.0992839 128.257487,18.3616536 129.442383,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M142.178711,16.7958984 C141.518555,16.7874349 140.94515,16.6287435 140.458496,16.3198242 C139.971842,16.0109049 139.601562,15.6025391 139.347656,15.0947266 C139.09375,14.5869141 138.907552,14.0600586 138.789062,13.5141602 C138.670573,12.9682617 138.611328,12.3990885 138.611328,11.8066406 C138.611328,11.1972656 138.676921,10.6132813 138.808105,10.0546875 C138.93929,9.49609375 139.140299,8.96923828 139.411133,8.47412109 C139.681966,7.97900391 140.064941,7.58333333 140.560059,7.28710938 C141.055176,6.99088542 141.632812,6.84277344 142.292969,6.84277344 C142.961589,6.84277344 143.547689,6.98242188 144.05127,7.26171875 C144.55485,7.54101562 144.952637,7.91764323 145.244629,8.39160156 C145.536621,8.8655599 145.754557,9.38606771 145.898438,9.953125 C146.042318,10.5201823 146.114258,11.1253255 146.114258,11.7685547 C146.114258,15.1201172 144.802409,16.7958984 142.178711,16.7958984 Z M141.772461,18.3701172 C142.754232,18.3701172 143.623861,18.1648763 144.381348,17.7543945 C145.138835,17.3439128 145.720703,16.7324219 146.126953,15.9199219 L146.126953,17.0498047 C146.126953,17.3798828 146.224284,17.6401367 146.418945,17.8305664 C146.613607,18.0209961 146.901367,18.1162109 147.282227,18.1162109 C147.586914,18.1162109 147.853516,18.0209961 148.082031,17.8305664 C148.310547,17.6401367 148.424805,17.375651 148.424805,17.0371094 L148.424805,1.06640625 C148.424805,0.736328125 148.310547,0.476074219 148.082031,0.285644531 C147.853516,0.0952148438 147.586914,0 147.282227,0 C146.969076,0 146.698242,0.0952148438 146.469727,0.285644531 C146.241211,0.476074219 146.126953,0.736328125 146.126953,1.06640625 L146.126953,7.65527344 C145.746094,6.88509115 145.193848,6.296875 144.470215,5.890625 C143.746582,5.484375 142.915039,5.28125 141.975586,5.28125 C141.095378,5.28125 140.293457,5.44840495 139.569824,5.78271484 C138.846191,6.11702474 138.249512,6.57828776 137.779785,7.16650391 C137.310059,7.75472005 136.948242,8.44026693 136.694336,9.22314453 C136.44043,10.0060221 136.313477,10.8502604 136.313477,11.7558594 C136.313477,12.6445312 136.42985,13.4824219 136.662598,14.2695313 C136.895345,15.0566406 137.231771,15.7569987 137.671875,16.3706055 C138.111979,16.9842122 138.685384,17.4708659 139.39209,17.8305664 C140.098796,18.1902669 140.892253,18.3701172 141.772461,18.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-5" transform="translate(0, 172)" xlink:href="#path-14" fill="#333333">
+                    <g id="Text" transform="translate(17.9297, 10.6426)">
+                        <path d="M6.13183594,17.0117188 C5.46321615,17.0117188 4.87711589,16.8720703 4.37353516,16.5927734 C3.86995443,16.3134766 3.47216797,15.936849 3.18017578,15.4628906 C2.88818359,14.9889323 2.6702474,14.4705404 2.52636719,13.9077148 C2.38248698,13.3448893 2.31054688,12.741862 2.31054688,12.0986328 C2.31054688,8.73860677 3.62239583,7.05859375 6.24609375,7.05859375 C6.77929688,7.06705729 7.25537109,7.17708333 7.67431641,7.38867188 C8.09326172,7.60026042 8.43391927,7.87109375 8.69628906,8.20117188 C8.95865885,8.53125 9.17659505,8.92057292 9.35009766,9.36914062 C9.52360026,9.81770833 9.64420573,10.2620443 9.71191406,10.7021484 C9.7796224,11.1422526 9.81347656,11.5950521 9.81347656,12.0605469 C9.81347656,12.6699219 9.74788411,13.2539063 9.61669922,13.8125 C9.48551432,14.3710938 9.28450521,14.8979492 9.01367188,15.3930664 C8.74283854,15.8881836 8.35986328,16.2817383 7.86474609,16.5737305 C7.36962891,16.8657227 6.79199219,17.0117188 6.13183594,17.0117188 Z M1.14257813,23.1816406 C1.45572917,23.1816406 1.7265625,23.0864258 1.95507812,22.8959961 C2.18359375,22.7055664 2.29785156,22.4453125 2.29785156,22.1152344 L2.29785156,16.2119141 C2.67871094,16.9736328 3.23095703,17.5597331 3.95458984,17.9702148 C4.67822266,18.3806966 5.50976563,18.5859375 6.44921875,18.5859375 C7.32942708,18.5859375 8.13134766,18.4187826 8.85498047,18.0844727 C9.57861328,17.7501628 10.175293,17.2888997 10.6450195,16.7006836 C11.1147461,16.1124674 11.4765625,15.4269206 11.7304688,14.644043 C11.984375,13.8611654 12.1113281,13.0169271 12.1113281,12.1113281 C12.1113281,11.2226562 11.9949544,10.3847656 11.762207,9.59765625 C11.5294596,8.81054688 11.1930339,8.1101888 10.7529297,7.49658203 C10.3128255,6.88297526 9.73942057,6.39632161 9.03271484,6.03662109 C8.32600911,5.67692057 7.53255208,5.49707031 6.65234375,5.49707031 C5.67057292,5.49707031 4.80094401,5.7023112 4.04345703,6.11279297 C3.28597005,6.52327474 2.70410156,7.13476562 2.29785156,7.94726563 L2.29785156,6.81738281 C2.29785156,6.48730469 2.20052083,6.22705078 2.00585938,6.03662109 C1.81119792,5.84619141 1.5234375,5.75097656 1.14257813,5.75097656 C0.829427083,5.75097656 0.560709635,5.84407552 0.336425781,6.03027344 C0.112141927,6.21647135 0,6.47884115 0,6.81738281 L0,22.1152344 C0,22.4453125 0.114257813,22.7055664 0.342773437,22.8959961 C0.571289062,23.0864258 0.837890625,23.1816406 1.14257813,23.1816406 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M15.4023438,18.3574219 C15.7154948,18.3574219 15.988444,18.2537435 16.2211914,18.0463867 C16.4539388,17.8390299 16.5703125,17.5576172 16.5703125,17.2021484 L16.5703125,1.37109375 C16.5703125,1.015625 16.4581706,0.736328125 16.2338867,0.533203125 C16.0096029,0.330078125 15.7451172,0.228515625 15.4404297,0.228515625 C15.1272786,0.228515625 14.8543294,0.330078125 14.621582,0.533203125 C14.3888346,0.736328125 14.2724609,1.015625 14.2724609,1.37109375 L14.2724609,17.2021484 C14.2724609,17.5745443 14.382487,17.8601888 14.6025391,18.059082 C14.8225911,18.2579753 15.0891927,18.3574219 15.4023438,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M23.8730469,18.5859375 C24.8463542,18.5859375 25.7011719,18.3785807 26.4375,17.9638672 C27.1738281,17.5491536 27.7027995,16.9567057 28.0244141,16.1865234 L28.0244141,17.3164062 C28.0244141,17.6464844 28.1344401,17.9025065 28.3544922,18.0844727 C28.5745443,18.2664388 28.8411458,18.3574219 29.1542969,18.3574219 C29.4674479,18.3574219 29.7382813,18.262207 29.9667969,18.0717773 C30.1953125,17.8813477 30.3095703,17.616862 30.3095703,17.2783203 L30.3095703,6.83007812 C30.3095703,6.48307292 30.1995443,6.21435547 29.9794922,6.02392578 C29.7594401,5.83349609 29.4928385,5.73828125 29.1796875,5.73828125 C28.8665365,5.73828125 28.5957031,5.83561198 28.3671875,6.03027344 C28.1386719,6.2249349 28.0244141,6.49153646 28.0244141,6.83007812 L28.0244141,13.1396484 C27.9651693,14.3414714 27.5970052,15.2872721 26.9199219,15.9770508 C26.2428385,16.6668294 25.4134115,17.0117187 24.4316406,17.0117187 C23.5175781,17.0117187 22.8151042,16.730306 22.3242188,16.1674805 C21.8333333,15.6046549 21.5878906,14.7815755 21.5878906,13.6982422 L21.5878906,6.83007812 C21.5878906,6.48307292 21.4757487,6.21435547 21.2514648,6.02392578 C21.027181,5.83349609 20.7584635,5.73828125 20.4453125,5.73828125 C20.1321615,5.73828125 19.8613281,5.83349609 19.6328125,6.02392578 C19.4042969,6.21435547 19.2900391,6.48307292 19.2900391,6.83007812 L19.2900391,13.6728516 C19.2900391,15.3147786 19.7068685,16.5441081 20.5405273,17.3608398 C21.3741862,18.1775716 22.485026,18.5859375 23.8730469,18.5859375 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M38.4628906,16.8339844 C37.3457031,16.8339844 36.4231771,16.4277344 35.6953125,15.6152344 C34.9674479,14.8027344 34.6035156,13.609375 34.6035156,12.0351562 C34.6035156,11.0957031 34.7347005,10.2620443 34.9970703,9.53417969 C35.2594401,8.8063151 35.686849,8.2117513 36.2792969,7.75048828 C36.8717448,7.28922526 37.5996094,7.05859375 38.4628906,7.05859375 C39.0045573,7.05859375 39.4912109,7.14957682 39.9228516,7.33154297 C40.3544922,7.51350911 40.7078451,7.75895182 40.9829102,8.06787109 C41.2579753,8.37679036 41.4864909,8.74495443 41.668457,9.17236328 C41.8504232,9.59977214 41.9794922,10.0441081 42.0556641,10.5053711 C42.1318359,10.9666341 42.1699219,11.4596354 42.1699219,11.984375 C42.1699219,13.516276 41.8229167,14.7075195 41.1289062,15.5581055 C40.4348958,16.4086914 39.546224,16.8339844 38.4628906,16.8339844 Z M38.3613281,23.7275391 C40.2317708,23.7275391 41.706543,23.2705078 42.7856445,22.3564453 C43.8647461,21.4423828 44.4042969,20.0839844 44.4042969,18.28125 L44.4042969,6.81738281 C44.4042969,6.47884115 44.2985026,6.21435547 44.0869141,6.02392578 C43.8753255,5.83349609 43.6214193,5.73828125 43.3251953,5.73828125 C43.054362,5.73828125 42.8152669,5.82080078 42.6079102,5.98583984 C42.4005534,6.15087891 42.2841797,6.37727865 42.2587891,6.66503906 L42.2587891,7.94726562 C41.4208984,6.31380208 40.0159505,5.49707031 38.0439453,5.49707031 C36.8505859,5.49707031 35.8138021,5.79752604 34.9335938,6.3984375 C34.0533854,6.99934896 33.3953451,7.79280599 32.9594727,8.77880859 C32.5236003,9.7648112 32.3056641,10.875651 32.3056641,12.1113281 C32.3056641,12.9830729 32.4347331,13.7955729 32.6928711,14.5488281 C32.9510091,15.3020833 33.3191732,15.9622396 33.7973633,16.5292969 C34.2755534,17.0963542 34.8849284,17.542806 35.6254883,17.8686523 C36.3660482,18.1944987 37.1933594,18.3574219 38.1074219,18.3574219 C39.0807292,18.3574219 39.9165039,18.1310221 40.6147461,17.6782227 C41.3129883,17.2254232 41.835612,16.6139323 42.1826172,15.84375 L42.1826172,18.3574219 C42.1826172,19.5761719 41.8483073,20.5092773 41.1796875,21.1567383 C40.5110677,21.8041992 39.5208333,22.1279297 38.2089844,22.1279297 C38.0058594,22.1279297 37.8048503,22.1194661 37.605957,22.1025391 C37.4070638,22.085612 37.2293294,22.066569 37.0727539,22.0454102 C36.9161784,22.0242513 36.7426758,21.992513 36.5522461,21.9501953 C36.3618164,21.9078776 36.2094727,21.8740234 36.0952148,21.8486328 C35.980957,21.8232422 35.8328451,21.7809245 35.6508789,21.7216797 C35.4689128,21.6624349 35.3440755,21.6201172 35.2763672,21.5947266 C35.2086589,21.5693359 35.0880534,21.5227865 34.9145508,21.4550781 C34.7410482,21.3873698 34.6373698,21.3492839 34.6035156,21.3408203 C34.5104167,21.3069661 34.4130859,21.2900391 34.3115234,21.2900391 C34.0999349,21.2900391 33.9200846,21.3683268 33.7719727,21.5249023 C33.6238607,21.6814779 33.5498047,21.8655599 33.5498047,22.0771484 C33.5498047,22.3902995 33.710612,22.6357422 34.0322266,22.8134766 C35.1748047,23.4228516 36.6178385,23.7275391 38.3613281,23.7275391 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M48.40625,2.80566406 C48.8548177,2.80566406 49.2145182,2.67659505 49.4853516,2.41845703 C49.7561849,2.16031901 49.8916016,1.82389323 49.8916016,1.40917969 C49.8916016,0.986002604 49.7561849,0.645345052 49.4853516,0.387207031 C49.2145182,0.12906901 48.8590495,0 48.4189453,0 C47.9703776,0 47.6085612,0.131184896 47.3334961,0.393554688 C47.058431,0.655924479 46.9208984,0.994466146 46.9208984,1.40917969 C46.9208984,1.82389323 47.0563151,2.16031901 47.3271484,2.41845703 C47.5979818,2.67659505 47.9576823,2.80566406 48.40625,2.80566406 Z M48.3935547,18.3574219 C48.7067057,18.3574219 48.9796549,18.2537435 49.2124023,18.0463867 C49.4451497,17.8390299 49.5615234,17.5576172 49.5615234,17.2021484 L49.5615234,6.88085938 C49.5615234,6.52539062 49.4493815,6.24609375 49.2250977,6.04296875 C49.0008138,5.83984375 48.7363281,5.73828125 48.4316406,5.73828125 C48.1184896,5.73828125 47.8455404,5.83984375 47.612793,6.04296875 C47.3800456,6.24609375 47.2636719,6.52539062 47.2636719,6.88085938 L47.2636719,17.2021484 C47.2636719,17.5745443 47.3736979,17.8601888 47.59375,18.059082 C47.8138021,18.2579753 48.0804036,18.3574219 48.3935547,18.3574219 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M53.4873047,18.3574219 C53.8004557,18.3574219 54.0734049,18.2600911 54.3061523,18.0654297 C54.5388997,17.8707682 54.6552734,17.6041667 54.6552734,17.265625 L54.6552734,10.9306641 C54.7145182,9.73730469 55.0784505,8.79361979 55.7470703,8.09960938 C56.4156901,7.40559896 57.2366536,7.05859375 58.2099609,7.05859375 C59.1240234,7.05859375 59.8328451,7.3421224 60.3364258,7.90917969 C60.8400065,8.47623698 61.0917969,9.30143229 61.0917969,10.3847656 L61.0917969,17.265625 C61.0917969,17.6126302 61.2039388,17.8813477 61.4282227,18.0717773 C61.6525065,18.262207 61.921224,18.3574219 62.234375,18.3574219 C62.547526,18.3574219 62.8162435,18.262207 63.0405273,18.0717773 C63.2648112,17.8813477 63.3769531,17.6126302 63.3769531,17.265625 L63.3769531,10.4101563 C63.3769531,8.75976562 62.9622396,7.52832031 62.1328125,6.71582031 C61.3033854,5.90332031 60.1904297,5.49707031 58.7939453,5.49707031 C57.820638,5.49707031 56.9679362,5.70442708 56.2358398,6.11914062 C55.5037435,6.53385417 54.976888,7.12630208 54.6552734,7.89648437 L54.6552734,6.79199219 C54.6552734,6.46191406 54.5431315,6.20589193 54.3188477,6.02392578 C54.0945638,5.84195964 53.8258464,5.75097656 53.5126953,5.75097656 C53.1995443,5.75097656 52.9287109,5.84619141 52.7001953,6.03662109 C52.4716797,6.22705078 52.3574219,6.49153646 52.3574219,6.83007813 L52.3574219,17.265625 C52.3574219,17.6126302 52.4674479,17.8813477 52.6875,18.0717773 C52.9075521,18.262207 53.1741536,18.3574219 53.4873047,18.3574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="List-@Offline-APIs" stroke-width="1" transform="translate(1106, 40)">
+            <g id="header-@2_standard" xlink:href="#path-15" fill="#30A2FF">
+                <g id="bg">
+                    <path d="M20,0 L150,0 C161.045695,-3.55271368e-15 170,8.954305 170,20 L170,52 L170,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1"></path>
+                    <path d="M150,0 C161.045695,0 170,8.954305 170,20 L170,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L150,0 Z M150,1.6 L20,1.6 L18.1187069,1.69499726 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.3999999 L168.4,50.3999999 L168.4,20 C168.4,9.8379606 160.162039,1.6 150,1.6 Z" id="header-bg" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)">
+                    <path d="M28.0703125,19.65625 C26.4921875,19.65625 25.3183594,19.1230469 24.5488281,18.0566406 C23.7792969,16.9902344 23.3945312,15.4609375 23.3945312,13.46875 C23.3945312,11.4921875 23.78125,9.96679688 24.5546875,8.89257812 C25.328125,7.81835938 26.5,7.28125 28.0703125,7.28125 C29.6484375,7.28125 30.8222656,7.81640625 31.5917969,8.88671875 C32.3613281,9.95703125 32.7460938,11.484375 32.7460938,13.46875 C32.7460938,15.4609375 32.359375,16.9902344 31.5859375,18.0566406 C30.8125,19.1230469 29.640625,19.65625 28.0703125,19.65625 Z M28.0703125,22.3046875 C30.703125,22.3046875 32.7304688,21.5195313 34.1523438,19.9492188 C35.5742187,18.3789062 36.2851562,16.21875 36.2851562,13.46875 C36.2851562,10.71875 35.5742187,8.55664062 34.1523438,6.98242188 C32.7304688,5.40820312 30.703125,4.62109375 28.0703125,4.62109375 C25.453125,4.62109375 23.4296875,5.40820312 22,6.98242188 C20.5703125,8.55664062 19.8554688,10.71875 19.8554688,13.46875 C19.8554688,16.21875 20.5703125,18.3789062 22,19.9492188 C23.4296875,21.5195313 25.453125,22.3046875 28.0703125,22.3046875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M41.5117188,22 C41.9414062,22 42.2949219,21.8574219 42.5722656,21.5722656 C42.8496094,21.2871094 42.9882812,20.8828125 42.9882812,20.359375 L42.9882812,12.2148438 L44.546875,12.2148438 C44.890625,12.2148438 45.1523438,12.1289063 45.3320312,11.9570312 C45.5117188,11.7851563 45.6015625,11.5546875 45.6015625,11.265625 C45.6015625,10.9921875 45.5117188,10.765625 45.3320312,10.5859375 C45.1523438,10.40625 44.9023438,10.3164063 44.5820312,10.3164063 L42.9882812,10.3164063 L42.9882812,9.4609375 C42.9882812,8.8125 43.0429688,8.32421875 43.1523438,7.99609375 C43.2617188,7.66796875 43.4140625,7.45703125 43.609375,7.36328125 C43.8046875,7.26953125 44.0976562,7.22265625 44.4882812,7.22265625 L45.4257812,7.22265625 C45.7382812,7.22265625 45.9785156,7.11523437 46.1464844,6.90039063 C46.3144531,6.68554687 46.3984375,6.42578125 46.3984375,6.12109375 C46.3984375,5.80859375 46.3105469,5.53515625 46.1347656,5.30078125 C45.9589844,5.06640625 45.7226562,4.94921875 45.4257812,4.94921875 L43.984375,4.94921875 C41.3515625,4.94921875 40.0351562,6.34765625 40.0351562,9.14453125 L40.0351562,10.3164063 L38.8515625,10.3164063 C38.515625,10.3164063 38.2617188,10.4082031 38.0898438,10.5917969 C37.9179688,10.7753906 37.8320312,11 37.8320312,11.265625 C37.8320312,11.5625 37.921875,11.7929687 38.1015625,11.9570312 C38.28125,12.1210938 38.5429688,12.2070313 38.8867188,12.2148438 L40.0351562,12.2148438 L40.0351562,20.359375 C40.0351562,20.875 40.1757812,21.2773438 40.4570313,21.5664062 C40.7382812,21.8554688 41.0898438,22 41.5117188,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M49.1640625,22 C49.59375,22 49.9472656,21.8574219 50.2246094,21.5722656 C50.5019531,21.2871094 50.640625,20.8828125 50.640625,20.359375 L50.640625,12.2148438 L52.1992188,12.2148438 C52.5429688,12.2148438 52.8046875,12.1289063 52.984375,11.9570312 C53.1640625,11.7851563 53.2539063,11.5546875 53.2539063,11.265625 C53.2539063,10.9921875 53.1640625,10.765625 52.984375,10.5859375 C52.8046875,10.40625 52.5546875,10.3164063 52.234375,10.3164063 L50.640625,10.3164063 L50.640625,9.4609375 C50.640625,8.8125 50.6953125,8.32421875 50.8046875,7.99609375 C50.9140625,7.66796875 51.0664062,7.45703125 51.2617188,7.36328125 C51.4570312,7.26953125 51.75,7.22265625 52.140625,7.22265625 L53.078125,7.22265625 C53.390625,7.22265625 53.6308594,7.11523437 53.7988281,6.90039063 C53.9667969,6.68554687 54.0507812,6.42578125 54.0507812,6.12109375 C54.0507812,5.80859375 53.9628906,5.53515625 53.7871094,5.30078125 C53.6113281,5.06640625 53.375,4.94921875 53.078125,4.94921875 L51.6367188,4.94921875 C49.0039063,4.94921875 47.6875,6.34765625 47.6875,9.14453125 L47.6875,10.3164063 L46.5039062,10.3164063 C46.1679688,10.3164063 45.9140625,10.4082031 45.7421875,10.5917969 C45.5703125,10.7753906 45.484375,11 45.484375,11.265625 C45.484375,11.5625 45.5742188,11.7929687 45.7539062,11.9570312 C45.9335938,12.1210938 46.1953125,12.2070313 46.5390625,12.2148438 L47.6875,12.2148438 L47.6875,20.359375 C47.6875,20.875 47.828125,21.2773438 48.109375,21.5664062 C48.390625,21.8554688 48.7421875,22 49.1640625,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M56.7578125,22 C57.1953125,22 57.5566406,21.8554688 57.8417969,21.5664062 C58.1269531,21.2773438 58.2695312,20.875 58.2695312,20.359375 L58.2695312,6.75390625 C58.2695312,6.23828125 58.1289062,5.8359375 57.8476562,5.546875 C57.5664062,5.2578125 57.2109375,5.11328125 56.78125,5.11328125 C56.3515625,5.11328125 56,5.2578125 55.7265625,5.546875 C55.453125,5.8359375 55.3164062,6.23828125 55.3164062,6.75390625 L55.3164062,20.359375 C55.3164062,20.8828125 55.4511719,21.2871094 55.7207031,21.5722656 C55.9902344,21.8574219 56.3359375,22 56.7578125,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M62.921875,22 C63.3515625,22 63.7050781,21.8554688 63.9824219,21.5664062 C64.2597656,21.2773437 64.3984375,20.875 64.3984375,20.359375 L64.3984375,11.8984375 C64.3984375,11.375 64.2597656,10.96875 63.9824219,10.6796875 C63.7050781,10.390625 63.3515625,10.2460938 62.921875,10.2460938 C62.4921875,10.2460938 62.140625,10.390625 61.8671875,10.6796875 C61.59375,10.96875 61.4570312,11.375 61.4570312,11.8984375 L61.4570312,20.359375 C61.4570312,20.8828125 61.59375,21.2871094 61.8671875,21.5722656 C62.140625,21.8574219 62.4921875,22 62.921875,22 Z M62.921875,7.90234375 C63.4296875,7.90234375 63.8417969,7.75195312 64.1582031,7.45117188 C64.4746094,7.15039063 64.6328125,6.7578125 64.6328125,6.2734375 C64.6328125,5.7890625 64.4765625,5.3984375 64.1640625,5.1015625 C63.8515625,4.8046875 63.4414062,4.65625 62.9335938,4.65625 C62.4179688,4.65625 62.0019531,4.8046875 61.6855469,5.1015625 C61.3691406,5.3984375 61.2109375,5.7890625 61.2109375,6.2734375 C61.2109375,6.7578125 61.3691406,7.15039063 61.6855469,7.45117188 C62.0019531,7.75195312 62.4140625,7.90234375 62.921875,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M68.8398438,22 C69.2695312,22 69.6230469,21.8574219 69.9003906,21.5722656 C70.1777344,21.2871094 70.3164062,20.890625 70.3164062,20.3828125 L70.3164062,15.4257812 C70.3164062,14.4335938 70.6015625,13.6289063 71.171875,13.0117187 C71.7421875,12.3945312 72.4179688,12.0859375 73.1992188,12.0859375 C73.8476562,12.0859375 74.3808594,12.2988281 74.7988281,12.7246094 C75.2167969,13.1503906 75.4257812,13.765625 75.4257812,14.5703125 L75.4257812,20.3828125 C75.4257812,20.890625 75.5625,21.2871094 75.8359375,21.5722656 C76.109375,21.8574219 76.453125,22 76.8671875,22 C77.3046875,22 77.6640625,21.8574219 77.9453125,21.5722656 C78.2265625,21.2871094 78.3671875,20.890625 78.3671875,20.3828125 L78.3671875,14.5820312 C78.3671875,13.8320313 78.2539062,13.1640625 78.0273438,12.578125 C77.8007812,11.9921875 77.4921875,11.5214844 77.1015625,11.1660156 C76.7109375,10.8105469 76.2675781,10.5410156 75.7714844,10.3574219 C75.2753906,10.1738281 74.7460938,10.0820312 74.1835938,10.0820312 C73.2695312,10.0820312 72.4824219,10.2539062 71.8222656,10.5976562 C71.1621094,10.9414062 70.6601562,11.4453125 70.3164062,12.109375 L70.3164062,11.6992188 C70.3164062,11.2382812 70.1796875,10.8808594 69.90625,10.6269531 C69.6328125,10.3730469 69.2851562,10.2460938 68.8632812,10.2460938 C68.4335938,10.2460938 68.078125,10.375 67.796875,10.6328125 C67.515625,10.890625 67.375,11.2539062 67.375,11.7226563 L67.375,20.3828125 C67.375,20.890625 67.5117188,21.2871094 67.7851562,21.5722656 C68.0585938,21.8574219 68.4101562,22 68.8398438,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M80.7578125,16.0117187 C80.7578125,17.9414062 81.3144531,19.4453125 82.4277344,20.5234375 C83.5410156,21.6015625 85.0234375,22.140625 86.875,22.140625 C88.5234375,22.140625 89.9648438,21.6210938 91.1992188,20.5820312 C91.4492188,20.3632813 91.5742188,20.0976562 91.5742188,19.7851562 C91.5742188,19.5117187 91.4804688,19.265625 91.2929688,19.046875 C91.1054688,18.828125 90.890625,18.71875 90.6484375,18.71875 C90.484375,18.71875 90.3320312,18.7734375 90.1914062,18.8828125 C89.0898438,19.671875 88.0546875,20.0664063 87.0859375,20.0664063 C84.953125,20.0664063 83.8242188,18.9648438 83.6992188,16.7617188 L91.1054688,16.7617188 C91.3867188,16.7617188 91.5976562,16.6679688 91.7382812,16.4804687 C91.8789062,16.2929688 91.9492188,16.046875 91.9492188,15.7421875 C91.9414062,14.96875 91.828125,14.25 91.609375,13.5859375 C91.390625,12.921875 91.0683594,12.3242187 90.6425781,11.7929688 C90.2167969,11.2617188 89.6542969,10.8457031 88.9550781,10.5449219 C88.2558594,10.2441406 87.453125,10.09375 86.546875,10.09375 C85.3671875,10.09375 84.3320312,10.3671875 83.4414062,10.9140625 C82.5507812,11.4609375 81.8808594,12.1777344 81.4316406,13.0644531 C80.9824219,13.9511719 80.7578125,14.9335938 80.7578125,16.0117187 Z M83.7226562,15.109375 C83.7929688,14.2578125 84.0761719,13.53125 84.5722656,12.9296875 C85.0683594,12.328125 85.7539062,12.0273438 86.6289062,12.0273438 C87.5273438,12.0273438 88.2050781,12.3164062 88.6621094,12.8945312 C89.1191406,13.4726562 89.3789062,14.2109375 89.4414062,15.109375 L83.7226562,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M110.921875,15.8476562 L105.132812,15.8476562 L108.027344,8.125 L110.921875,15.8476562 Z M112.820312,20.921875 C112.953125,21.3046875 113.15625,21.5917969 113.429688,21.7832031 C113.703125,21.9746094 114.007812,22.0703125 114.34375,22.0703125 C114.765625,22.0703125 115.140625,21.9296875 115.46875,21.6484375 C115.796875,21.3671875 115.960938,21.0117188 115.960938,20.5820313 C115.960938,20.3867188 115.921875,20.1875 115.84375,19.984375 L110.699219,7.10546875 C110.371094,6.23828125 109.994141,5.65625 109.568359,5.359375 C109.142578,5.0625 108.628906,4.9140625 108.027344,4.9140625 C107.425781,4.9140625 106.912109,5.0625 106.486328,5.359375 C106.060547,5.65625 105.683594,6.23828125 105.355469,7.10546875 L100.210938,19.984375 C100.132812,20.1875 100.09375,20.3867188 100.09375,20.5820313 C100.09375,21.0117188 100.257812,21.3671875 100.585938,21.6484375 C100.914062,21.9296875 101.289062,22.0703125 101.710938,22.0703125 C102.046875,22.0703125 102.351562,21.9746094 102.625,21.7832031 C102.898438,21.5917969 103.101562,21.3046875 103.234375,20.921875 L104.289062,18.109375 L111.765625,18.109375 L112.820312,20.921875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M120.261719,22.0117188 C120.722656,22.0117188 121.119141,21.8515625 121.451172,21.53125 C121.783203,21.2109375 121.949219,20.7890625 121.949219,20.265625 L121.949219,15.2851563 L125.21875,15.2851563 C129.492188,15.2851563 131.628906,13.5820313 131.628906,10.1757813 C131.628906,9.24609375 131.484375,8.44726562 131.195312,7.77929687 C130.90625,7.11132813 130.482422,6.58398438 129.923828,6.19726562 C129.365234,5.81054688 128.720703,5.52929688 127.990234,5.35351562 C127.259766,5.17773438 126.410156,5.08984375 125.441406,5.08984375 L120.402344,5.08984375 C119.824219,5.08984375 119.375,5.28515625 119.054688,5.67578125 C118.734375,6.06640625 118.574219,6.55859375 118.574219,7.15234375 L118.574219,20.265625 C118.574219,20.7890625 118.742188,21.2109375 119.078125,21.53125 C119.414062,21.8515625 119.808594,22.0117188 120.261719,22.0117188 Z M121.949219,12.8476562 L121.949219,7.65625 L125.078125,7.65625 C125.625,7.65625 126.080078,7.6875 126.443359,7.75 C126.806641,7.8125 127.138672,7.9296875 127.439453,8.1015625 C127.740234,8.2734375 127.960938,8.53125 128.101562,8.875 C128.242188,9.21875 128.3125,9.65234375 128.3125,10.1757813 C128.3125,10.7148438 128.242188,11.1601562 128.101562,11.5117188 C127.960938,11.8632813 127.740234,12.1347656 127.439453,12.3261719 C127.138672,12.5175781 126.798828,12.6523438 126.419922,12.7304688 C126.041016,12.8085938 125.566406,12.8476562 124.996094,12.8476562 L121.949219,12.8476562 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M135.835938,21.9882812 C136.296875,21.9882812 136.695312,21.828125 137.03125,21.5078125 C137.367188,21.1875 137.535156,20.765625 137.535156,20.2421875 L137.535156,6.82421875 C137.535156,6.30078125 137.367188,5.87890625 137.03125,5.55859375 C136.695312,5.23828125 136.300781,5.078125 135.847656,5.078125 C135.394531,5.078125 135.001953,5.23828125 134.669922,5.55859375 C134.337891,5.87890625 134.171875,6.30078125 134.171875,6.82421875 L134.171875,20.2421875 C134.171875,20.765625 134.335938,21.1875 134.664062,21.5078125 C134.992188,21.828125 135.382812,21.9882812 135.835938,21.9882812 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M141.203125,20.8164062 C142.445312,21.7226563 143.976562,22.1757812 145.796875,22.1757812 C147.203125,22.1757812 148.345703,21.859375 149.224609,21.2265625 C150.103516,20.59375 150.542969,19.6796875 150.542969,18.484375 C150.542969,17.53125 150.251953,16.7851563 149.669922,16.2460937 C149.087891,15.7070312 148.167969,15.28125 146.910156,14.96875 L145.105469,14.5117188 C144.457031,14.3632813 144.017578,14.2011719 143.787109,14.0253906 C143.556641,13.8496094 143.441406,13.578125 143.441406,13.2109375 C143.441406,12.796875 143.652344,12.4902344 144.074219,12.2910156 C144.496094,12.0917969 145.007812,11.9921875 145.609375,11.9921875 C145.789062,11.9921875 145.966797,12 146.142578,12.015625 C146.318359,12.03125 146.496094,12.0566406 146.675781,12.0917969 C146.855469,12.1269531 147.009766,12.1582031 147.138672,12.1855469 C147.267578,12.2128906 147.421875,12.2558594 147.601562,12.3144531 C147.78125,12.3730469 147.904297,12.4121094 147.970703,12.4316406 C148.037109,12.4511719 148.152344,12.4921875 148.316406,12.5546875 C148.480469,12.6171875 148.566406,12.6484375 148.574219,12.6484375 C148.746094,12.7109375 148.898438,12.7421875 149.03125,12.7421875 C149.296875,12.7421875 149.509766,12.6542969 149.669922,12.4785156 C149.830078,12.3027344 149.910156,12.0898438 149.910156,11.8398438 C149.910156,11.4179688 149.699219,11.1054688 149.277344,10.9023438 C148.066406,10.3164062 146.785156,10.0234375 145.433594,10.0234375 C144.035156,10.0234375 142.90625,10.3105469 142.046875,10.8847656 C141.1875,11.4589844 140.757812,12.28125 140.757812,13.3515625 C140.757812,13.796875 140.816406,14.1855469 140.933594,14.5175781 C141.050781,14.8496094 141.203125,15.1269531 141.390625,15.3496094 C141.578125,15.5722656 141.847656,15.7753906 142.199219,15.9589844 C142.550781,16.1425781 142.898438,16.2929687 143.242188,16.4101562 C143.585938,16.5273438 144.035156,16.6601563 144.589844,16.8085938 L146.417969,17.2773438 C147.503906,17.5429688 148.046875,18.0078125 148.046875,18.671875 C148.046875,19.046875 147.919922,19.3535156 147.666016,19.5917969 C147.412109,19.8300781 147.107422,19.9941406 146.751953,20.0839844 C146.396484,20.1738281 145.992188,20.21875 145.539062,20.21875 C145.078125,20.21875 144.617188,20.1542969 144.15625,20.0253906 C143.695312,19.8964844 143.361328,19.78125 143.154297,19.6796875 C142.947266,19.578125 142.671875,19.4296875 142.328125,19.234375 C142.117188,19.1171875 141.914062,19.0585937 141.71875,19.0585937 C141.445312,19.0585937 141.220703,19.15625 141.044922,19.3515625 C140.869141,19.546875 140.78125,19.7734375 140.78125,20.03125 C140.78125,20.3515625 140.921875,20.6132813 141.203125,20.8164062 Z" id="Path" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50)">
+                <g id="bg">
+                    <path d="M0,0 L170,0 L170,216 C170,220.418278 166.418278,224 162,224 L8,224 C3.581722,224 0,220.418278 0,216 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M170,0 L170,216 C170,220.418278 166.418278,224 162,224 L8,224 C3.581722,224 0,220.418278 0,216 L0,0 L170,0 Z M168.4,1.60000008 L1.6,1.60000008 L1.6,216 C1.6,219.534622 4.4653776,222.4 8,222.4 L162,222.4 C165.534622,222.4 168.4,219.534622 168.4,216 L168.4,1.60000008 Z" fill="#30A2FF" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-16" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.54199219,25 L12.796875,25 C13.1184896,25 13.3702799,24.8984375 13.5522461,24.6953125 C13.7342122,24.4921875 13.8251953,24.2552083 13.8251953,23.984375 C13.8251953,23.7050781 13.7363281,23.4638672 13.5585937,23.2607422 C13.3808594,23.0576172 13.1269531,22.9560547 12.796875,22.9560547 L4.81152344,22.9560547 L4.81152344,8.08984375 C4.81152344,7.70052083 4.6866862,7.39794922 4.43701172,7.18212891 C4.18733724,6.96630859 3.8889974,6.85839844 3.54199219,6.85839844 C3.18652344,6.85839844 2.88183594,6.96630859 2.62792969,7.18212891 C2.37402344,7.39794922 2.24707031,7.70052083 2.24707031,8.08984375 L2.24707031,23.7304688 C2.24707031,24.0859375 2.37402344,24.3863932 2.62792969,24.6318359 C2.88183594,24.8772786 3.18652344,25 3.54199219,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M16.5703125,25 L25.8251953,25 C26.1468099,25 26.3986003,24.8984375 26.5805664,24.6953125 C26.7625326,24.4921875 26.8535156,24.2552083 26.8535156,23.984375 C26.8535156,23.7050781 26.7646484,23.4638672 26.5869141,23.2607422 C26.4091797,23.0576172 26.1552734,22.9560547 25.8251953,22.9560547 L17.8398437,22.9560547 L17.8398437,8.08984375 C17.8398437,7.70052083 17.7150065,7.39794922 17.465332,7.18212891 C17.2156576,6.96630859 16.9173177,6.85839844 16.5703125,6.85839844 C16.2148438,6.85839844 15.9101562,6.96630859 15.65625,7.18212891 C15.4023438,7.39794922 15.2753906,7.70052083 15.2753906,8.08984375 L15.2753906,23.7304688 C15.2753906,24.0859375 15.4023438,24.3863932 15.65625,24.6318359 C15.9101562,24.8772786 16.2148438,25 16.5703125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M38.1298828,24.5302734 C38.4261068,24.5302734 38.7138672,24.4329427 38.9931641,24.2382813 C39.2724609,24.0436198 39.492513,23.7643229 39.6533203,23.4003906 L45.5693359,10.1210938 L45.5693359,23.9462891 C45.5693359,24.2340495 45.6857096,24.4583333 45.918457,24.6191406 C46.1512044,24.7799479 46.4283854,24.8603516 46.75,24.8603516 C47.063151,24.8603516 47.3382161,24.7799479 47.5751953,24.6191406 C47.8121745,24.4583333 47.9306641,24.2340495 47.9306641,23.9462891 L47.9306641,8.91503906 C47.9306641,8.42415365 47.7360026,8.01367188 47.3466797,7.68359375 C46.9573568,7.35351562 46.5045573,7.18847656 45.9882812,7.18847656 C45.5651042,7.18847656 45.1652018,7.3133138 44.7885742,7.56298828 C44.4119466,7.81266276 44.1136068,8.19140625 43.8935547,8.69921875 L38.1298828,21.4960937 L32.3662109,8.69921875 C32.1376953,8.19140625 31.8351237,7.81266276 31.4584961,7.56298828 C31.0818685,7.3133138 30.6819661,7.18847656 30.2587891,7.18847656 C29.742513,7.18847656 29.2918294,7.35351562 28.9067383,7.68359375 C28.5216471,8.01367188 28.3291016,8.42415365 28.3291016,8.91503906 L28.3291016,23.9462891 C28.3291016,24.2340495 28.4475911,24.4583333 28.6845703,24.6191406 C28.9215495,24.7799479 29.1966146,24.8603516 29.5097656,24.8603516 C29.8313802,24.8603516 30.1085612,24.7799479 30.3413086,24.6191406 C30.574056,24.4583333 30.6904297,24.2340495 30.6904297,23.9462891 L30.6904297,10.1210938 L36.6064453,23.4003906 C36.7672526,23.7643229 36.9873047,24.0436198 37.2666016,24.2382813 C37.5458984,24.4329427 37.8336589,24.5302734 38.1298828,24.5302734 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M53.0117188,25.0253906 C53.4518229,25.0253906 53.8072917,24.889974 54.078125,24.6191406 C54.3489583,24.3483073 54.484375,24.0266927 54.484375,23.6542969 C54.484375,23.2903646 54.3468424,22.9750977 54.0717773,22.7084961 C53.7967122,22.4418945 53.4433594,22.3085938 53.0117188,22.3085938 C52.5716146,22.3085938 52.2161458,22.4440104 51.9453125,22.7148438 C51.6744792,22.9856771 51.5390625,23.2988281 51.5390625,23.6542969 C51.5390625,24.0266927 51.6744792,24.3483073 51.9453125,24.6191406 C52.2161458,24.889974 52.5716146,25.0253906 53.0117188,25.0253906 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M62.8535156,25.2412109 C64.5123698,25.2412109 65.7988281,24.8730469 66.7128906,24.1367188 C67.000651,23.8997396 67.1445312,23.6416016 67.1445312,23.3623047 C67.1445312,23.1676432 67.0768229,23.0047201 66.9414062,22.8735352 C66.8059896,22.7423503 66.6409505,22.6767578 66.4462891,22.6767578 C66.2685547,22.6767578 66.0992839,22.7317708 65.9384766,22.8417969 C65.1598307,23.3834635 64.1907552,23.6542969 63.03125,23.6542969 C62.3710938,23.6542969 61.7871094,23.5167643 61.2792969,23.2416992 C60.7714844,22.9666341 60.3652344,22.5942383 60.0605469,22.1245117 C59.7558594,21.6547852 59.5273438,21.1321615 59.375,20.5566406 C59.2226562,19.9811198 59.1464844,19.3717448 59.1464844,18.7285156 C59.1464844,17.188151 59.523112,15.9630534 60.2763672,15.0532227 C61.0296224,14.1433919 62.0113932,13.6884766 63.2216797,13.6884766 C64.1526693,13.6884766 65.0032552,13.938151 65.7734375,14.4375 C65.9427083,14.547526 66.1204427,14.6025391 66.3066406,14.6025391 C66.5097656,14.6025391 66.6811523,14.5390625 66.8208008,14.4121094 C66.9604492,14.2851563 67.0302734,14.1285807 67.0302734,13.9423828 C67.0302734,13.680013 66.8863932,13.4345703 66.5986328,13.2060547 C66.2347005,12.9013672 65.7438151,12.6474609 65.1259766,12.4443359 C64.508138,12.2412109 63.8352865,12.1396484 63.1074219,12.1396484 C61.8632813,12.1396484 60.7630208,12.4316406 59.8066406,13.015625 C58.8502604,13.5996094 58.1181641,14.3867188 57.6103516,15.3769531 C57.1025391,16.3671875 56.8486328,17.4716797 56.8486328,18.6904297 C56.8486328,20.5777995 57.3902995,22.1414388 58.4736328,23.3813477 C59.5569661,24.6212565 61.0169271,25.2412109 62.8535156,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M70.1816406,25 C70.4947917,25 70.7677409,24.8963216 71.0004883,24.6889648 C71.2332357,24.4816081 71.3496094,24.2001953 71.3496094,23.8447266 L71.3496094,8.01367188 C71.3496094,7.65820312 71.2374674,7.37890625 71.0131836,7.17578125 C70.7888997,6.97265625 70.5244141,6.87109375 70.2197266,6.87109375 C69.9065755,6.87109375 69.6336263,6.97265625 69.4008789,7.17578125 C69.1681315,7.37890625 69.0517578,7.65820312 69.0517578,8.01367188 L69.0517578,23.8447266 C69.0517578,24.2171224 69.1617839,24.5027669 69.3818359,24.7016602 C69.601888,24.9005534 69.8684896,25 70.1816406,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M77.8652344,23.7050781 C77.078125,23.7050781 76.4560547,23.5252279 75.9990234,23.1655273 C75.5419922,22.8058268 75.3134766,22.2408854 75.3134766,21.4707031 C75.3134766,21.047526 75.3748372,20.7005208 75.4975586,20.4296875 C75.6202799,20.1588542 75.8085938,19.9282227 76.0625,19.737793 C76.3164062,19.5473633 76.6972656,19.4034831 77.2050781,19.3061523 C77.7128906,19.2088216 78.296875,19.1411133 78.9570312,19.1030273 C79.6171875,19.0649414 80.4720052,19.0458984 81.5214844,19.0458984 L81.5214844,19.4267578 C81.5214844,20.6708984 81.1554362,21.694987 80.4233398,22.4990234 C79.6912435,23.3030599 78.8385417,23.7050781 77.8652344,23.7050781 Z M77.6240234,25.2285156 C79.4436849,25.2285156 80.7470703,24.4033203 81.5341797,22.7529297 L81.5341797,23.9716797 C81.5341797,24.3017578 81.6420898,24.5577799 81.8579102,24.7397461 C82.0737305,24.9217122 82.3339844,25.0126953 82.6386719,25.0126953 C82.9433594,25.0126953 83.2120768,24.9174805 83.4448242,24.7270508 C83.6775716,24.5366211 83.7939453,24.2763672 83.7939453,23.9462891 L83.7939453,16.5449219 C83.7939453,15.046875 83.355957,13.938151 82.4799805,13.21875 C81.6040039,12.499349 80.3916016,12.1396484 78.8427734,12.1396484 C77.0315755,12.1396484 75.4700521,12.5078125 74.1582031,13.2441406 C73.921224,13.3795573 73.8027344,13.5742187 73.8027344,13.828125 C73.8027344,14.0481771 73.883138,14.2491862 74.0439453,14.4311523 C74.2047526,14.6131185 74.3951823,14.7041016 74.6152344,14.7041016 C74.733724,14.7041016 74.8352865,14.6829427 74.9199219,14.640625 C75.3515625,14.4459635 75.7154948,14.2936198 76.0117188,14.1835937 C76.3079427,14.0735677 76.7141927,13.9635417 77.2304688,13.8535156 C77.7467448,13.7434896 78.2630208,13.6884766 78.7792969,13.6884766 C79.6425781,13.6884766 80.3154297,13.9042969 80.7978516,14.3359375 C81.2802734,14.7675781 81.5214844,15.4361979 81.5214844,16.3417969 L81.5214844,17.7255859 C80.5820312,17.7255859 79.7801107,17.7382812 79.1157227,17.7636719 C78.4513346,17.7890625 77.8059896,17.8334961 77.1796875,17.8969727 C76.5533854,17.9604492 76.0371094,18.0535482 75.6308594,18.1762695 C75.2246094,18.2989909 74.8522135,18.4555664 74.5136719,18.6459961 C74.1751302,18.8364258 73.9127604,19.0691732 73.7265625,19.3442383 C73.5403646,19.6193034 73.3964844,19.9388021 73.2949219,20.3027344 C73.1933594,20.6666667 73.1425781,21.0898438 73.1425781,21.5722656 C73.1425781,22.7402344 73.5572917,23.6416016 74.3867188,24.2763672 C75.2161458,24.9111328 76.2952474,25.2285156 77.6240234,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M86.4248047,23.8574219 C86.9326172,24.2298177 87.6118164,24.5535482 88.4624023,24.8286133 C89.3129883,25.1036784 90.1910807,25.2412109 91.0966797,25.2412109 C92.069987,25.2412109 92.9248047,25.1079102 93.6611328,24.8413086 C94.3974609,24.574707 94.9920247,24.1451823 95.4448242,23.5527344 C95.8976237,22.9602865 96.1240234,22.2324219 96.1240234,21.3691406 C96.1240234,20.4550781 95.8510742,19.7039388 95.3051758,19.1157227 C94.7592773,18.5275065 93.8473307,18.0768229 92.5693359,17.7636719 L90.3730469,17.2177734 C89.4505208,16.9892578 88.8496094,16.7713216 88.5703125,16.5639648 C88.2910156,16.3566081 88.1513672,15.9990234 88.1513672,15.4912109 C88.1513672,14.8649089 88.4222005,14.4057617 88.9638672,14.1137695 C89.5055339,13.8217773 90.2333984,13.6757813 91.1474609,13.6757813 C91.4352214,13.6757813 91.71875,13.6948242 91.9980469,13.7329102 C92.2773438,13.7709961 92.5524089,13.8260091 92.8232422,13.8979492 C93.0940755,13.9698893 93.3077799,14.03125 93.4643555,14.0820313 C93.620931,14.1328125 93.824056,14.2068685 94.0737305,14.3041992 C94.3234049,14.4015299 94.4609375,14.4544271 94.4863281,14.4628906 C94.6217448,14.5136719 94.7529297,14.5390625 94.8798828,14.5390625 C95.0999349,14.5390625 95.2755534,14.4692383 95.4067383,14.3295898 C95.5379232,14.1899414 95.6035156,14.0227865 95.6035156,13.828125 C95.6035156,13.4980469 95.4384766,13.2483724 95.1083984,13.0791016 C94.6513672,12.8336589 94.0610352,12.6114909 93.3374023,12.4125977 C92.6137695,12.2137044 91.8414714,12.1142578 91.0205078,12.1142578 C90.3349609,12.1142578 89.7001953,12.1798503 89.1162109,12.3110352 C88.5322266,12.4422201 88.0053711,12.6411133 87.5356445,12.9077148 C87.065918,13.1743164 86.695638,13.5361328 86.4248047,13.9931641 C86.1539714,14.4501953 86.0185547,14.9791667 86.0185547,15.5800781 C86.0185547,15.9609375 86.0566406,16.2994792 86.1328125,16.5957031 C86.2089844,16.8919271 86.3317057,17.1500651 86.5009766,17.3701172 C86.6702474,17.5901693 86.8522135,17.7784831 87.046875,17.9350586 C87.2415365,18.0916341 87.5017904,18.2376302 87.8276367,18.3730469 C88.1534831,18.5084635 88.4581706,18.6184896 88.7416992,18.703125 C89.0252279,18.7877604 89.3955078,18.8893229 89.8525391,19.0078125 L92.0996094,19.5664062 C92.844401,19.7526042 93.3776042,19.9980469 93.6992188,20.3027344 C94.0208333,20.6074219 94.1816406,21.0136719 94.1816406,21.5214844 C94.1816406,22.2324219 93.8959961,22.7719727 93.324707,23.1401367 C92.753418,23.5083008 92.0022786,23.6923828 91.0712891,23.6923828 C89.7763672,23.6839193 88.5491536,23.319987 87.3896484,22.6005859 C87.2034505,22.4820964 87.0087891,22.4228516 86.8056641,22.4228516 C86.585612,22.4228516 86.4036458,22.4969076 86.2597656,22.6450195 C86.1158854,22.7931315 86.0439453,22.96875 86.0439453,23.171875 C86.0439453,23.4511719 86.1708984,23.6796875 86.4248047,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M98.1962891,23.8574219 C98.7041016,24.2298177 99.3833008,24.5535482 100.233887,24.8286133 C101.084473,25.1036784 101.962565,25.2412109 102.868164,25.2412109 C103.841471,25.2412109 104.696289,25.1079102 105.432617,24.8413086 C106.168945,24.574707 106.763509,24.1451823 107.216309,23.5527344 C107.669108,22.9602865 107.895508,22.2324219 107.895508,21.3691406 C107.895508,20.4550781 107.622559,19.7039388 107.07666,19.1157227 C106.530762,18.5275065 105.618815,18.0768229 104.34082,17.7636719 L102.144531,17.2177734 C101.222005,16.9892578 100.621094,16.7713216 100.341797,16.5639648 C100.0625,16.3566081 99.9228516,15.9990234 99.9228516,15.4912109 C99.9228516,14.8649089 100.193685,14.4057617 100.735352,14.1137695 C101.277018,13.8217773 102.004883,13.6757813 102.918945,13.6757813 C103.206706,13.6757813 103.490234,13.6948242 103.769531,13.7329102 C104.048828,13.7709961 104.323893,13.8260091 104.594727,13.8979492 C104.86556,13.9698893 105.079264,14.03125 105.23584,14.0820313 C105.392415,14.1328125 105.59554,14.2068685 105.845215,14.3041992 C106.094889,14.4015299 106.232422,14.4544271 106.257812,14.4628906 C106.393229,14.5136719 106.524414,14.5390625 106.651367,14.5390625 C106.871419,14.5390625 107.047038,14.4692383 107.178223,14.3295898 C107.309408,14.1899414 107.375,14.0227865 107.375,13.828125 C107.375,13.4980469 107.209961,13.2483724 106.879883,13.0791016 C106.422852,12.8336589 105.83252,12.6114909 105.108887,12.4125977 C104.385254,12.2137044 103.612956,12.1142578 102.791992,12.1142578 C102.106445,12.1142578 101.47168,12.1798503 100.887695,12.3110352 C100.303711,12.4422201 99.7768555,12.6411133 99.3071289,12.9077148 C98.8374023,13.1743164 98.4671224,13.5361328 98.1962891,13.9931641 C97.9254557,14.4501953 97.7900391,14.9791667 97.7900391,15.5800781 C97.7900391,15.9609375 97.828125,16.2994792 97.9042969,16.5957031 C97.9804688,16.8919271 98.1031901,17.1500651 98.2724609,17.3701172 C98.4417318,17.5901693 98.6236979,17.7784831 98.8183594,17.9350586 C99.0130208,18.0916341 99.2732747,18.2376302 99.5991211,18.3730469 C99.9249674,18.5084635 100.229655,18.6184896 100.513184,18.703125 C100.796712,18.7877604 101.166992,18.8893229 101.624023,19.0078125 L103.871094,19.5664062 C104.615885,19.7526042 105.149089,19.9980469 105.470703,20.3027344 C105.792318,20.6074219 105.953125,21.0136719 105.953125,21.5214844 C105.953125,22.2324219 105.66748,22.7719727 105.096191,23.1401367 C104.524902,23.5083008 103.773763,23.6923828 102.842773,23.6923828 C101.547852,23.6839193 100.320638,23.319987 99.1611328,22.6005859 C98.9749349,22.4820964 98.7802734,22.4228516 98.5771484,22.4228516 C98.3570964,22.4228516 98.1751302,22.4969076 98.03125,22.6450195 C97.8873698,22.7931315 97.8154297,22.96875 97.8154297,23.171875 C97.8154297,23.4511719 97.9423828,23.6796875 98.1962891,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M111.338867,9.44824219 C111.787435,9.44824219 112.147135,9.31917318 112.417969,9.06103516 C112.688802,8.80289714 112.824219,8.46647135 112.824219,8.05175781 C112.824219,7.62858073 112.688802,7.28792318 112.417969,7.02978516 C112.147135,6.77164714 111.791667,6.64257812 111.351562,6.64257812 C110.902995,6.64257812 110.541178,6.77376302 110.266113,7.03613281 C109.991048,7.2985026 109.853516,7.63704427 109.853516,8.05175781 C109.853516,8.46647135 109.988932,8.80289714 110.259766,9.06103516 C110.530599,9.31917318 110.890299,9.44824219 111.338867,9.44824219 Z M111.326172,25 C111.639323,25 111.912272,24.8963216 112.14502,24.6889648 C112.377767,24.4816081 112.494141,24.2001953 112.494141,23.8447266 L112.494141,13.5234375 C112.494141,13.1679688 112.381999,12.8886719 112.157715,12.6855469 C111.933431,12.4824219 111.668945,12.3808594 111.364258,12.3808594 C111.051107,12.3808594 110.778158,12.4824219 110.54541,12.6855469 C110.312663,12.8886719 110.196289,13.1679688 110.196289,13.5234375 L110.196289,23.8447266 C110.196289,24.2171224 110.306315,24.5027669 110.526367,24.7016602 C110.746419,24.9005534 111.013021,25 111.326172,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M117.499023,25 C117.812174,25 118.083008,24.9005534 118.311523,24.7016602 C118.540039,24.5027669 118.654297,24.2255859 118.654297,23.8701172 L118.654297,14.0058594 L120.799805,14.0058594 C121.341471,14.0058594 121.612305,13.7604167 121.612305,13.2695312 C121.612305,13.0494792 121.546712,12.867513 121.415527,12.7236328 C121.284342,12.5797526 121.079102,12.5078125 120.799805,12.5078125 L118.654297,12.5078125 L118.654297,11.5175781 C118.654297,10.8320312 118.681803,10.2882487 118.736816,9.88623047 C118.791829,9.48421224 118.897624,9.17529297 119.054199,8.95947266 C119.210775,8.74365234 119.390625,8.60400391 119.59375,8.54052734 C119.796875,8.47705078 120.080404,8.4453125 120.444336,8.4453125 L121.523438,8.4453125 C121.777344,8.4453125 121.974121,8.36067708 122.11377,8.19140625 C122.253418,8.02213542 122.323242,7.81901042 122.323242,7.58203125 C122.323242,7.34505208 122.253418,7.1398112 122.11377,6.96630859 C121.974121,6.79280599 121.781576,6.70605469 121.536133,6.70605469 L119.987305,6.70605469 C118.85319,6.70605469 117.964518,7.02555339 117.321289,7.66455078 C116.67806,8.30354818 116.356445,9.38053385 116.356445,10.8955078 L116.356445,12.5078125 L114.693359,12.5078125 C114.414062,12.5078125 114.206706,12.5797526 114.071289,12.7236328 C113.935872,12.867513 113.868164,13.0494792 113.868164,13.2695312 C113.868164,13.4811198 113.935872,13.6567383 114.071289,13.7963867 C114.206706,13.9360352 114.414062,14.0058594 114.693359,14.0058594 L116.356445,14.0058594 L116.356445,23.8701172 C116.356445,24.2255859 116.468587,24.5027669 116.692871,24.7016602 C116.917155,24.9005534 117.185872,25 117.499023,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M126.858398,29.3291016 L134.196289,13.8535156 C134.255534,13.7096354 134.285156,13.5657552 134.285156,13.421875 C134.285156,13.1341146 134.173014,12.886556 133.94873,12.6791992 C133.724447,12.4718424 133.464193,12.3681641 133.167969,12.3681641 C132.727865,12.3681641 132.393555,12.6009115 132.165039,13.0664062 L128.280273,21.2929688 L124.420898,13.1044922 C124.20931,12.6559245 123.879232,12.4316406 123.430664,12.4316406 C123.13444,12.4316406 122.869954,12.5332031 122.637207,12.7363281 C122.40446,12.9394531 122.288086,13.1933594 122.288086,13.4980469 C122.288086,13.6842448 122.326172,13.8535156 122.402344,14.0058594 L127.125,23.7431641 L124.852539,28.5546875 C124.776367,28.7239583 124.738281,28.8847656 124.738281,29.0371094 C124.738281,29.3248698 124.848307,29.5639648 125.068359,29.7543945 C125.288411,29.9448242 125.542318,30.0400391 125.830078,30.0400391 C126.287109,30.0400391 126.629883,29.8030599 126.858398,29.3291016 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy" transform="translate(0, 52)" xlink:href="#path-17" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.54199219,25 L12.796875,25 C13.1184896,25 13.3702799,24.8984375 13.5522461,24.6953125 C13.7342122,24.4921875 13.8251953,24.2552083 13.8251953,23.984375 C13.8251953,23.7050781 13.7363281,23.4638672 13.5585937,23.2607422 C13.3808594,23.0576172 13.1269531,22.9560547 12.796875,22.9560547 L4.81152344,22.9560547 L4.81152344,8.08984375 C4.81152344,7.70052083 4.6866862,7.39794922 4.43701172,7.18212891 C4.18733724,6.96630859 3.8889974,6.85839844 3.54199219,6.85839844 C3.18652344,6.85839844 2.88183594,6.96630859 2.62792969,7.18212891 C2.37402344,7.39794922 2.24707031,7.70052083 2.24707031,8.08984375 L2.24707031,23.7304688 C2.24707031,24.0859375 2.37402344,24.3863932 2.62792969,24.6318359 C2.88183594,24.8772786 3.18652344,25 3.54199219,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M16.5703125,25 L25.8251953,25 C26.1468099,25 26.3986003,24.8984375 26.5805664,24.6953125 C26.7625326,24.4921875 26.8535156,24.2552083 26.8535156,23.984375 C26.8535156,23.7050781 26.7646484,23.4638672 26.5869141,23.2607422 C26.4091797,23.0576172 26.1552734,22.9560547 25.8251953,22.9560547 L17.8398437,22.9560547 L17.8398437,8.08984375 C17.8398437,7.70052083 17.7150065,7.39794922 17.465332,7.18212891 C17.2156576,6.96630859 16.9173177,6.85839844 16.5703125,6.85839844 C16.2148438,6.85839844 15.9101562,6.96630859 15.65625,7.18212891 C15.4023438,7.39794922 15.2753906,7.70052083 15.2753906,8.08984375 L15.2753906,23.7304688 C15.2753906,24.0859375 15.4023438,24.3863932 15.65625,24.6318359 C15.9101562,24.8772786 16.2148438,25 16.5703125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M38.1298828,24.5302734 C38.4261068,24.5302734 38.7138672,24.4329427 38.9931641,24.2382813 C39.2724609,24.0436198 39.492513,23.7643229 39.6533203,23.4003906 L45.5693359,10.1210938 L45.5693359,23.9462891 C45.5693359,24.2340495 45.6857096,24.4583333 45.918457,24.6191406 C46.1512044,24.7799479 46.4283854,24.8603516 46.75,24.8603516 C47.063151,24.8603516 47.3382161,24.7799479 47.5751953,24.6191406 C47.8121745,24.4583333 47.9306641,24.2340495 47.9306641,23.9462891 L47.9306641,8.91503906 C47.9306641,8.42415365 47.7360026,8.01367188 47.3466797,7.68359375 C46.9573568,7.35351562 46.5045573,7.18847656 45.9882812,7.18847656 C45.5651042,7.18847656 45.1652018,7.3133138 44.7885742,7.56298828 C44.4119466,7.81266276 44.1136068,8.19140625 43.8935547,8.69921875 L38.1298828,21.4960937 L32.3662109,8.69921875 C32.1376953,8.19140625 31.8351237,7.81266276 31.4584961,7.56298828 C31.0818685,7.3133138 30.6819661,7.18847656 30.2587891,7.18847656 C29.742513,7.18847656 29.2918294,7.35351562 28.9067383,7.68359375 C28.5216471,8.01367188 28.3291016,8.42415365 28.3291016,8.91503906 L28.3291016,23.9462891 C28.3291016,24.2340495 28.4475911,24.4583333 28.6845703,24.6191406 C28.9215495,24.7799479 29.1966146,24.8603516 29.5097656,24.8603516 C29.8313802,24.8603516 30.1085612,24.7799479 30.3413086,24.6191406 C30.574056,24.4583333 30.6904297,24.2340495 30.6904297,23.9462891 L30.6904297,10.1210938 L36.6064453,23.4003906 C36.7672526,23.7643229 36.9873047,24.0436198 37.2666016,24.2382813 C37.5458984,24.4329427 37.8336589,24.5302734 38.1298828,24.5302734 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M53.0117188,25.0253906 C53.4518229,25.0253906 53.8072917,24.889974 54.078125,24.6191406 C54.3489583,24.3483073 54.484375,24.0266927 54.484375,23.6542969 C54.484375,23.2903646 54.3468424,22.9750977 54.0717773,22.7084961 C53.7967122,22.4418945 53.4433594,22.3085938 53.0117188,22.3085938 C52.5716146,22.3085938 52.2161458,22.4440104 51.9453125,22.7148438 C51.6744792,22.9856771 51.5390625,23.2988281 51.5390625,23.6542969 C51.5390625,24.0266927 51.6744792,24.3483073 51.9453125,24.6191406 C52.2161458,24.889974 52.5716146,25.0253906 53.0117188,25.0253906 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M59.2734375,17.7382812 C59.3157552,17.2135417 59.4278971,16.7163086 59.6098633,16.246582 C59.7918294,15.7768555 60.0372721,15.3494466 60.3461914,14.9643555 C60.6551107,14.5792643 61.0507812,14.2724609 61.5332031,14.0439453 C62.015625,13.8154297 62.5530599,13.7011719 63.1455078,13.7011719 C64.2880859,13.7011719 65.155599,14.0777995 65.7480469,14.8310547 C66.3404948,15.5843099 66.6748047,16.5533854 66.7509766,17.7382812 L59.2734375,17.7382812 Z M63.1835938,25.2285156 C65.0794271,25.2285156 66.6959635,24.6276042 68.0332031,23.4257812 C68.2701823,23.2141927 68.3886719,22.9729818 68.3886719,22.7021484 C68.3886719,22.4905599 68.3188477,22.3064779 68.1791992,22.1499023 C68.0395508,21.9933268 67.8681641,21.9150391 67.6650391,21.9150391 C67.4957682,21.9150391 67.3349609,21.9742839 67.1826172,22.0927734 C66.5732422,22.5582682 65.968099,22.9264323 65.3671875,23.1972656 C64.766276,23.468099 64.0891927,23.6035156 63.3359375,23.6035156 C62.1425781,23.5865885 61.1650391,23.2036133 60.4033203,22.4545898 C59.6416016,21.7055664 59.2522786,20.6031901 59.2353516,19.1474609 L67.9443359,19.1474609 C68.2151693,19.1474609 68.4182943,19.0670573 68.5537109,18.90625 C68.6891276,18.7454427 68.7568359,18.5423177 68.7568359,18.296875 C68.7314453,17.4420573 68.608724,16.6570638 68.3886719,15.9418945 C68.1686198,15.2267253 67.8385417,14.5792643 67.3984375,13.9995117 C66.9583333,13.4197591 66.3658854,12.9648438 65.6210938,12.6347656 C64.8763021,12.3046875 64.0087891,12.1396484 63.0185547,12.1396484 C61.7659505,12.1396484 60.6699219,12.4443359 59.7304688,13.0537109 C58.7910156,13.6630859 58.0885417,14.4544271 57.6230469,15.4277344 C57.1575521,16.4010417 56.9248047,17.4759115 56.9248047,18.6523437 C56.9248047,19.9895833 57.2019857,21.159668 57.7563477,22.1625977 C58.3107096,23.1655273 59.0576172,23.9251302 59.9970703,24.4414063 C60.9365234,24.9576823 61.9986979,25.2200521 63.1835938,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M72.0478516,25 C72.3694661,25 72.6466471,24.9047852 72.8793945,24.7143555 C73.1121419,24.5239258 73.2285156,24.2594401 73.2285156,23.9208984 L73.2285156,17.7255859 C73.2285156,16.4560547 73.5585938,15.4679362 74.21875,14.7612305 C74.8789062,14.0545247 75.7591146,13.7011719 76.859375,13.7011719 C78.7044271,13.7011719 79.6269531,14.8691406 79.6269531,17.2050781 L79.6269531,23.9082031 C79.6269531,24.2467448 79.7390951,24.5133464 79.9633789,24.7080078 C80.1876628,24.9026693 80.4521484,25 80.7568359,25 C81.069987,25 81.3408203,24.9026693 81.5693359,24.7080078 C81.7978516,24.5133464 81.9121094,24.2467448 81.9121094,23.9082031 L81.9121094,17.7890625 C81.9121094,16.4518229 82.2548828,15.4361979 82.9404297,14.7421875 C83.6259766,14.0481771 84.4977214,13.7011719 85.5556641,13.7011719 C86.4443359,13.7011719 87.1298828,13.9698893 87.6123047,14.5073242 C88.0947266,15.0447591 88.3359375,15.8382161 88.3359375,16.8876953 L88.3359375,23.9208984 C88.3359375,24.2594401 88.4459635,24.5239258 88.6660156,24.7143555 C88.8860677,24.9047852 89.1526693,25 89.4658203,25 C89.7789714,25 90.0519206,24.9047852 90.284668,24.7143555 C90.5174154,24.5239258 90.6337891,24.2594401 90.6337891,23.9208984 L90.6337891,16.9765625 C90.6337891,16.1471354 90.5131836,15.4150391 90.2719727,14.7802734 C90.0307617,14.1455078 89.6964518,13.6398112 89.269043,13.2631836 C88.8416341,12.886556 88.3570964,12.6051432 87.8154297,12.4189453 C87.273763,12.2327474 86.6813151,12.1396484 86.0380859,12.1396484 C84.9208984,12.1396484 83.9729818,12.387207 83.1943359,12.8823242 C82.4156901,13.3774414 81.8486328,14.1158854 81.4931641,15.0976562 C81.2307943,14.1497396 80.7314453,13.4197591 79.9951172,12.9077148 C79.2587891,12.3956706 78.3658854,12.1396484 77.3164062,12.1396484 C76.3938802,12.1396484 75.5729167,12.3491211 74.8535156,12.7680664 C74.1341146,13.1870117 73.5924479,13.7858073 73.2285156,14.5644531 L73.2285156,13.3837891 C73.2285156,13.070638 73.1121419,12.8251953 72.8793945,12.6474609 C72.6466471,12.4697266 72.3779297,12.3808594 72.0732422,12.3808594 C71.7685547,12.3808594 71.4998372,12.4760742 71.2670898,12.6665039 C71.0343424,12.8569336 70.9179688,13.1171875 70.9179688,13.4472656 L70.9179688,23.9208984 C70.9179688,24.2594401 71.0279948,24.5239258 71.2480469,24.7143555 C71.468099,24.9047852 71.7347005,25 72.0478516,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M99.5996094,23.6542969 C96.9759115,23.6542969 95.6640625,21.9785156 95.6640625,18.6269531 C95.6640625,17.983724 95.7360026,17.3785807 95.8798828,16.8115234 C96.023763,16.2444661 96.2416992,15.7239583 96.5336914,15.25 C96.8256836,14.7760417 97.2234701,14.3994141 97.7270508,14.1201172 C98.2306315,13.8408203 98.8167318,13.7011719 99.4853516,13.7011719 C100.145508,13.7011719 100.723145,13.8492839 101.218262,14.1455078 C101.713379,14.4417318 102.096354,14.8374023 102.367188,15.3325195 C102.638021,15.8276367 102.83903,16.3544922 102.970215,16.9130859 C103.1014,17.4716797 103.166992,18.0556641 103.166992,18.6650391 C103.166992,19.257487 103.107747,19.8266602 102.989258,20.3725586 C102.870768,20.918457 102.68457,21.4453125 102.430664,21.953125 C102.176758,22.4609375 101.806478,22.8693034 101.319824,23.1782227 C100.833171,23.4871419 100.259766,23.6458333 99.5996094,23.6542969 Z M100.005859,25.2285156 C100.886068,25.2285156 101.679525,25.0486654 102.38623,24.6889648 C103.092936,24.3292643 103.666341,23.8426107 104.106445,23.2290039 C104.546549,22.6153971 104.882975,21.9150391 105.115723,21.1279297 C105.34847,20.3408203 105.464844,19.5029297 105.464844,18.6142578 C105.464844,17.7086589 105.337891,16.8644206 105.083984,16.081543 C104.830078,15.2986654 104.468262,14.6131185 103.998535,14.0249023 C103.528809,13.4366862 102.932129,12.9754232 102.208496,12.6411133 C101.484863,12.3068034 100.682943,12.1396484 99.8027344,12.1396484 C98.8632812,12.1396484 98.0317383,12.3427734 97.3081055,12.7490234 C96.5844727,13.1552734 96.0322266,13.7434896 95.6513672,14.5136719 L95.6513672,7.92480469 C95.6513672,7.59472656 95.5371094,7.33447266 95.3085938,7.14404297 C95.0800781,6.95361328 94.8092448,6.85839844 94.4960938,6.85839844 C94.1914062,6.85839844 93.9248047,6.95361328 93.6962891,7.14404297 C93.4677734,7.33447266 93.3535156,7.59472656 93.3535156,7.92480469 L93.3535156,23.8955078 C93.3535156,24.2340495 93.4677734,24.4985352 93.6962891,24.6889648 C93.9248047,24.8793945 94.1914062,24.9746094 94.4960938,24.9746094 C94.8769531,24.9746094 95.1647135,24.8793945 95.359375,24.6889648 C95.5540365,24.4985352 95.6513672,24.2382813 95.6513672,23.9082031 L95.6513672,22.7783203 C96.0576172,23.5908203 96.6394857,24.2023112 97.3969727,24.612793 C98.1544596,25.0232747 99.0240885,25.2285156 100.005859,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M109.022461,17.7382812 C109.064779,17.2135417 109.176921,16.7163086 109.358887,16.246582 C109.540853,15.7768555 109.786296,15.3494466 110.095215,14.9643555 C110.404134,14.5792643 110.799805,14.2724609 111.282227,14.0439453 C111.764648,13.8154297 112.302083,13.7011719 112.894531,13.7011719 C114.037109,13.7011719 114.904622,14.0777995 115.49707,14.8310547 C116.089518,15.5843099 116.423828,16.5533854 116.5,17.7382812 L109.022461,17.7382812 Z M112.932617,25.2285156 C114.828451,25.2285156 116.444987,24.6276042 117.782227,23.4257812 C118.019206,23.2141927 118.137695,22.9729818 118.137695,22.7021484 C118.137695,22.4905599 118.067871,22.3064779 117.928223,22.1499023 C117.788574,21.9933268 117.617188,21.9150391 117.414062,21.9150391 C117.244792,21.9150391 117.083984,21.9742839 116.931641,22.0927734 C116.322266,22.5582682 115.717122,22.9264323 115.116211,23.1972656 C114.515299,23.468099 113.838216,23.6035156 113.084961,23.6035156 C111.891602,23.5865885 110.914062,23.2036133 110.152344,22.4545898 C109.390625,21.7055664 109.001302,20.6031901 108.984375,19.1474609 L117.693359,19.1474609 C117.964193,19.1474609 118.167318,19.0670573 118.302734,18.90625 C118.438151,18.7454427 118.505859,18.5423177 118.505859,18.296875 C118.480469,17.4420573 118.357747,16.6570638 118.137695,15.9418945 C117.917643,15.2267253 117.587565,14.5792643 117.147461,13.9995117 C116.707357,13.4197591 116.114909,12.9648438 115.370117,12.6347656 C114.625326,12.3046875 113.757812,12.1396484 112.767578,12.1396484 C111.514974,12.1396484 110.418945,12.4443359 109.479492,13.0537109 C108.540039,13.6630859 107.837565,14.4544271 107.37207,15.4277344 C106.906576,16.4010417 106.673828,17.4759115 106.673828,18.6523437 C106.673828,19.9895833 106.951009,21.159668 107.505371,22.1625977 C108.059733,23.1655273 108.806641,23.9251302 109.746094,24.4414063 C110.685547,24.9576823 111.747721,25.2200521 112.932617,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M125.668945,23.6542969 C125.008789,23.6458333 124.435384,23.4871419 123.94873,23.1782227 C123.462077,22.8693034 123.091797,22.4609375 122.837891,21.953125 C122.583984,21.4453125 122.397786,20.918457 122.279297,20.3725586 C122.160807,19.8266602 122.101562,19.257487 122.101562,18.6650391 C122.101562,18.0556641 122.167155,17.4716797 122.29834,16.9130859 C122.429525,16.3544922 122.630534,15.8276367 122.901367,15.3325195 C123.172201,14.8374023 123.555176,14.4417318 124.050293,14.1455078 C124.54541,13.8492839 125.123047,13.7011719 125.783203,13.7011719 C126.451823,13.7011719 127.037923,13.8408203 127.541504,14.1201172 C128.045085,14.3994141 128.442871,14.7760417 128.734863,15.25 C129.026855,15.7239583 129.244792,16.2444661 129.388672,16.8115234 C129.532552,17.3785807 129.604492,17.983724 129.604492,18.6269531 C129.604492,21.9785156 128.292643,23.6542969 125.668945,23.6542969 Z M125.262695,25.2285156 C126.244466,25.2285156 127.114095,25.0232747 127.871582,24.612793 C128.629069,24.2023112 129.210938,23.5908203 129.617188,22.7783203 L129.617188,23.9082031 C129.617188,24.2382813 129.714518,24.4985352 129.90918,24.6889648 C130.103841,24.8793945 130.391602,24.9746094 130.772461,24.9746094 C131.077148,24.9746094 131.34375,24.8793945 131.572266,24.6889648 C131.800781,24.4985352 131.915039,24.2340495 131.915039,23.8955078 L131.915039,7.92480469 C131.915039,7.59472656 131.800781,7.33447266 131.572266,7.14404297 C131.34375,6.95361328 131.077148,6.85839844 130.772461,6.85839844 C130.45931,6.85839844 130.188477,6.95361328 129.959961,7.14404297 C129.731445,7.33447266 129.617188,7.59472656 129.617188,7.92480469 L129.617188,14.5136719 C129.236328,13.7434896 128.684082,13.1552734 127.960449,12.7490234 C127.236816,12.3427734 126.405273,12.1396484 125.46582,12.1396484 C124.585612,12.1396484 123.783691,12.3068034 123.060059,12.6411133 C122.336426,12.9754232 121.739746,13.4366862 121.27002,14.0249023 C120.800293,14.6131185 120.438477,15.2986654 120.18457,16.081543 C119.930664,16.8644206 119.803711,17.7086589 119.803711,18.6142578 C119.803711,19.5029297 119.920085,20.3408203 120.152832,21.1279297 C120.385579,21.9150391 120.722005,22.6153971 121.162109,23.2290039 C121.602214,23.8426107 122.175618,24.3292643 122.882324,24.6889648 C123.58903,25.0486654 124.382487,25.2285156 125.262695,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-4" transform="translate(0, 92)" xlink:href="#path-18" font-family="Nunito-Regular, Nunito" font-size="26" font-weight="normal" letter-spacing="-1" line-spacing="32">
+                    <text id="Text"></text>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-3" transform="translate(0, 132)" xlink:href="#path-19" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.54199219,25 L12.796875,25 C13.1184896,25 13.3702799,24.8984375 13.5522461,24.6953125 C13.7342122,24.4921875 13.8251953,24.2552083 13.8251953,23.984375 C13.8251953,23.7050781 13.7363281,23.4638672 13.5585937,23.2607422 C13.3808594,23.0576172 13.1269531,22.9560547 12.796875,22.9560547 L4.81152344,22.9560547 L4.81152344,8.08984375 C4.81152344,7.70052083 4.6866862,7.39794922 4.43701172,7.18212891 C4.18733724,6.96630859 3.8889974,6.85839844 3.54199219,6.85839844 C3.18652344,6.85839844 2.88183594,6.96630859 2.62792969,7.18212891 C2.37402344,7.39794922 2.24707031,7.70052083 2.24707031,8.08984375 L2.24707031,23.7304688 C2.24707031,24.0859375 2.37402344,24.3863932 2.62792969,24.6318359 C2.88183594,24.8772786 3.18652344,25 3.54199219,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M16.5703125,25 L25.8251953,25 C26.1468099,25 26.3986003,24.8984375 26.5805664,24.6953125 C26.7625326,24.4921875 26.8535156,24.2552083 26.8535156,23.984375 C26.8535156,23.7050781 26.7646484,23.4638672 26.5869141,23.2607422 C26.4091797,23.0576172 26.1552734,22.9560547 25.8251953,22.9560547 L17.8398437,22.9560547 L17.8398437,8.08984375 C17.8398437,7.70052083 17.7150065,7.39794922 17.465332,7.18212891 C17.2156576,6.96630859 16.9173177,6.85839844 16.5703125,6.85839844 C16.2148438,6.85839844 15.9101562,6.96630859 15.65625,7.18212891 C15.4023438,7.39794922 15.2753906,7.70052083 15.2753906,8.08984375 L15.2753906,23.7304688 C15.2753906,24.0859375 15.4023438,24.3863932 15.65625,24.6318359 C15.9101562,24.8772786 16.2148438,25 16.5703125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M38.1298828,24.5302734 C38.4261068,24.5302734 38.7138672,24.4329427 38.9931641,24.2382813 C39.2724609,24.0436198 39.492513,23.7643229 39.6533203,23.4003906 L45.5693359,10.1210938 L45.5693359,23.9462891 C45.5693359,24.2340495 45.6857096,24.4583333 45.918457,24.6191406 C46.1512044,24.7799479 46.4283854,24.8603516 46.75,24.8603516 C47.063151,24.8603516 47.3382161,24.7799479 47.5751953,24.6191406 C47.8121745,24.4583333 47.9306641,24.2340495 47.9306641,23.9462891 L47.9306641,8.91503906 C47.9306641,8.42415365 47.7360026,8.01367188 47.3466797,7.68359375 C46.9573568,7.35351562 46.5045573,7.18847656 45.9882812,7.18847656 C45.5651042,7.18847656 45.1652018,7.3133138 44.7885742,7.56298828 C44.4119466,7.81266276 44.1136068,8.19140625 43.8935547,8.69921875 L38.1298828,21.4960937 L32.3662109,8.69921875 C32.1376953,8.19140625 31.8351237,7.81266276 31.4584961,7.56298828 C31.0818685,7.3133138 30.6819661,7.18847656 30.2587891,7.18847656 C29.742513,7.18847656 29.2918294,7.35351562 28.9067383,7.68359375 C28.5216471,8.01367188 28.3291016,8.42415365 28.3291016,8.91503906 L28.3291016,23.9462891 C28.3291016,24.2340495 28.4475911,24.4583333 28.6845703,24.6191406 C28.9215495,24.7799479 29.1966146,24.8603516 29.5097656,24.8603516 C29.8313802,24.8603516 30.1085612,24.7799479 30.3413086,24.6191406 C30.574056,24.4583333 30.6904297,24.2340495 30.6904297,23.9462891 L30.6904297,10.1210938 L36.6064453,23.4003906 C36.7672526,23.7643229 36.9873047,24.0436198 37.2666016,24.2382813 C37.5458984,24.4329427 37.8336589,24.5302734 38.1298828,24.5302734 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M53.0117188,25.0253906 C53.4518229,25.0253906 53.8072917,24.889974 54.078125,24.6191406 C54.3489583,24.3483073 54.484375,24.0266927 54.484375,23.6542969 C54.484375,23.2903646 54.3468424,22.9750977 54.0717773,22.7084961 C53.7967122,22.4418945 53.4433594,22.3085938 53.0117188,22.3085938 C52.5716146,22.3085938 52.2161458,22.4440104 51.9453125,22.7148438 C51.6744792,22.9856771 51.5390625,23.2988281 51.5390625,23.6542969 C51.5390625,24.0266927 51.6744792,24.3483073 51.9453125,24.6191406 C52.2161458,24.889974 52.5716146,25.0253906 53.0117188,25.0253906 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M59.2734375,17.7382812 C59.3157552,17.2135417 59.4278971,16.7163086 59.6098633,16.246582 C59.7918294,15.7768555 60.0372721,15.3494466 60.3461914,14.9643555 C60.6551107,14.5792643 61.0507812,14.2724609 61.5332031,14.0439453 C62.015625,13.8154297 62.5530599,13.7011719 63.1455078,13.7011719 C64.2880859,13.7011719 65.155599,14.0777995 65.7480469,14.8310547 C66.3404948,15.5843099 66.6748047,16.5533854 66.7509766,17.7382812 L59.2734375,17.7382812 Z M63.1835938,25.2285156 C65.0794271,25.2285156 66.6959635,24.6276042 68.0332031,23.4257812 C68.2701823,23.2141927 68.3886719,22.9729818 68.3886719,22.7021484 C68.3886719,22.4905599 68.3188477,22.3064779 68.1791992,22.1499023 C68.0395508,21.9933268 67.8681641,21.9150391 67.6650391,21.9150391 C67.4957682,21.9150391 67.3349609,21.9742839 67.1826172,22.0927734 C66.5732422,22.5582682 65.968099,22.9264323 65.3671875,23.1972656 C64.766276,23.468099 64.0891927,23.6035156 63.3359375,23.6035156 C62.1425781,23.5865885 61.1650391,23.2036133 60.4033203,22.4545898 C59.6416016,21.7055664 59.2522786,20.6031901 59.2353516,19.1474609 L67.9443359,19.1474609 C68.2151693,19.1474609 68.4182943,19.0670573 68.5537109,18.90625 C68.6891276,18.7454427 68.7568359,18.5423177 68.7568359,18.296875 C68.7314453,17.4420573 68.608724,16.6570638 68.3886719,15.9418945 C68.1686198,15.2267253 67.8385417,14.5792643 67.3984375,13.9995117 C66.9583333,13.4197591 66.3658854,12.9648438 65.6210938,12.6347656 C64.8763021,12.3046875 64.0087891,12.1396484 63.0185547,12.1396484 C61.7659505,12.1396484 60.6699219,12.4443359 59.7304688,13.0537109 C58.7910156,13.6630859 58.0885417,14.4544271 57.6230469,15.4277344 C57.1575521,16.4010417 56.9248047,17.4759115 56.9248047,18.6523437 C56.9248047,19.9895833 57.2019857,21.159668 57.7563477,22.1625977 C58.3107096,23.1655273 59.0576172,23.9251302 59.9970703,24.4414063 C60.9365234,24.9576823 61.9986979,25.2200521 63.1835938,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M72.0478516,25 C72.3610026,25 72.6339518,24.9026693 72.8666992,24.7080078 C73.0994466,24.5133464 73.2158203,24.2467448 73.2158203,23.9082031 L73.2158203,17.5732422 C73.2750651,16.3798828 73.6389974,15.4361979 74.3076172,14.7421875 C74.976237,14.0481771 75.7972005,13.7011719 76.7705078,13.7011719 C77.6845703,13.7011719 78.3933919,13.9847005 78.8969727,14.5517578 C79.4005534,15.1188151 79.6523438,15.9440104 79.6523438,17.0273438 L79.6523438,23.9082031 C79.6523438,24.2552083 79.7644857,24.5239258 79.9887695,24.7143555 C80.2130534,24.9047852 80.4817708,25 80.7949219,25 C81.1080729,25 81.3767904,24.9047852 81.6010742,24.7143555 C81.8253581,24.5239258 81.9375,24.2552083 81.9375,23.9082031 L81.9375,17.0527344 C81.9375,15.4023437 81.5227865,14.1708984 80.6933594,13.3583984 C79.8639323,12.5458984 78.7509766,12.1396484 77.3544922,12.1396484 C76.3811849,12.1396484 75.5284831,12.3470052 74.7963867,12.7617188 C74.0642904,13.1764323 73.5374349,13.7688802 73.2158203,14.5390625 L73.2158203,13.4345703 C73.2158203,13.1044922 73.1036784,12.8484701 72.8793945,12.6665039 C72.6551107,12.4845378 72.3863932,12.3935547 72.0732422,12.3935547 C71.7600911,12.3935547 71.4892578,12.4887695 71.2607422,12.6791992 C71.0322266,12.8696289 70.9179688,13.1341146 70.9179688,13.4726563 L70.9179688,23.9082031 C70.9179688,24.2552083 71.0279948,24.5239258 71.2480469,24.7143555 C71.468099,24.9047852 71.7347005,25 72.0478516,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M89.7988281,25.2412109 C91.4576823,25.2412109 92.7441406,24.8730469 93.6582031,24.1367188 C93.9459635,23.8997396 94.0898438,23.6416016 94.0898438,23.3623047 C94.0898438,23.1676432 94.0221354,23.0047201 93.8867188,22.8735352 C93.7513021,22.7423503 93.586263,22.6767578 93.3916016,22.6767578 C93.2138672,22.6767578 93.0445964,22.7317708 92.8837891,22.8417969 C92.1051432,23.3834635 91.1360677,23.6542969 89.9765625,23.6542969 C89.3164062,23.6542969 88.7324219,23.5167643 88.2246094,23.2416992 C87.7167969,22.9666341 87.3105469,22.5942383 87.0058594,22.1245117 C86.7011719,21.6547852 86.4726562,21.1321615 86.3203125,20.5566406 C86.1679688,19.9811198 86.0917969,19.3717448 86.0917969,18.7285156 C86.0917969,17.188151 86.4684245,15.9630534 87.2216797,15.0532227 C87.9749349,14.1433919 88.9567057,13.6884766 90.1669922,13.6884766 C91.0979818,13.6884766 91.9485677,13.938151 92.71875,14.4375 C92.8880208,14.547526 93.0657552,14.6025391 93.2519531,14.6025391 C93.4550781,14.6025391 93.6264648,14.5390625 93.7661133,14.4121094 C93.9057617,14.2851563 93.9755859,14.1285807 93.9755859,13.9423828 C93.9755859,13.680013 93.8317057,13.4345703 93.5439453,13.2060547 C93.180013,12.9013672 92.6891276,12.6474609 92.0712891,12.4443359 C91.4534505,12.2412109 90.780599,12.1396484 90.0527344,12.1396484 C88.8085938,12.1396484 87.7083333,12.4316406 86.7519531,13.015625 C85.7955729,13.5996094 85.0634766,14.3867188 84.5556641,15.3769531 C84.0478516,16.3671875 83.7939453,17.4716797 83.7939453,18.6904297 C83.7939453,20.5777995 84.335612,22.1414388 85.4189453,23.3813477 C86.5022786,24.6212565 87.9622396,25.2412109 89.7988281,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M101.329102,25.2285156 C103.28418,25.2285156 104.833008,24.6149089 105.975586,23.3876953 C107.118164,22.1604818 107.689453,20.5947266 107.689453,18.6904297 C107.689453,16.7692057 107.116048,15.1971029 105.969238,13.9741211 C104.822428,12.7511393 103.275716,12.1396484 101.329102,12.1396484 C99.382487,12.1396484 97.8357747,12.7532552 96.6889648,13.9804688 C95.5421549,15.2076823 94.96875,16.7776693 94.96875,18.6904297 C94.96875,20.5947266 95.5421549,22.1604818 96.6889648,23.3876953 C97.8357747,24.6149089 99.382487,25.2285156 101.329102,25.2285156 Z M101.303711,23.6542969 C100.03418,23.6542969 99.0418294,23.2078451 98.3266602,22.3149414 C97.6114909,21.4220378 97.2539062,20.2138672 97.2539062,18.6904297 C97.2539062,17.1500651 97.6136068,15.933431 98.3330078,15.0405273 C99.0524089,14.1476237 100.051107,13.7011719 101.329102,13.7011719 C102.598633,13.7011719 103.595215,14.1497396 104.318848,15.046875 C105.04248,15.9440104 105.404297,17.1585286 105.404297,18.6904297 C105.404297,20.2307943 105.046712,21.4431966 104.331543,22.3276367 C103.616374,23.2120768 102.607096,23.6542969 101.303711,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M114.535156,23.6542969 C113.875,23.6458333 113.301595,23.4871419 112.814941,23.1782227 C112.328288,22.8693034 111.958008,22.4609375 111.704102,21.953125 C111.450195,21.4453125 111.263997,20.918457 111.145508,20.3725586 C111.027018,19.8266602 110.967773,19.257487 110.967773,18.6650391 C110.967773,18.0556641 111.033366,17.4716797 111.164551,16.9130859 C111.295736,16.3544922 111.496745,15.8276367 111.767578,15.3325195 C112.038411,14.8374023 112.421387,14.4417318 112.916504,14.1455078 C113.411621,13.8492839 113.989258,13.7011719 114.649414,13.7011719 C115.318034,13.7011719 115.904134,13.8408203 116.407715,14.1201172 C116.911296,14.3994141 117.309082,14.7760417 117.601074,15.25 C117.893066,15.7239583 118.111003,16.2444661 118.254883,16.8115234 C118.398763,17.3785807 118.470703,17.983724 118.470703,18.6269531 C118.470703,21.9785156 117.158854,23.6542969 114.535156,23.6542969 Z M114.128906,25.2285156 C115.110677,25.2285156 115.980306,25.0232747 116.737793,24.612793 C117.49528,24.2023112 118.077148,23.5908203 118.483398,22.7783203 L118.483398,23.9082031 C118.483398,24.2382813 118.580729,24.4985352 118.775391,24.6889648 C118.970052,24.8793945 119.257812,24.9746094 119.638672,24.9746094 C119.943359,24.9746094 120.209961,24.8793945 120.438477,24.6889648 C120.666992,24.4985352 120.78125,24.2340495 120.78125,23.8955078 L120.78125,7.92480469 C120.78125,7.59472656 120.666992,7.33447266 120.438477,7.14404297 C120.209961,6.95361328 119.943359,6.85839844 119.638672,6.85839844 C119.325521,6.85839844 119.054688,6.95361328 118.826172,7.14404297 C118.597656,7.33447266 118.483398,7.59472656 118.483398,7.92480469 L118.483398,14.5136719 C118.102539,13.7434896 117.550293,13.1552734 116.82666,12.7490234 C116.103027,12.3427734 115.271484,12.1396484 114.332031,12.1396484 C113.451823,12.1396484 112.649902,12.3068034 111.92627,12.6411133 C111.202637,12.9754232 110.605957,13.4366862 110.13623,14.0249023 C109.666504,14.6131185 109.304688,15.2986654 109.050781,16.081543 C108.796875,16.8644206 108.669922,17.7086589 108.669922,18.6142578 C108.669922,19.5029297 108.786296,20.3408203 109.019043,21.1279297 C109.25179,21.9150391 109.588216,22.6153971 110.02832,23.2290039 C110.468424,23.8426107 111.041829,24.3292643 111.748535,24.6889648 C112.455241,25.0486654 113.248698,25.2285156 114.128906,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M125.202148,17.7382812 C125.244466,17.2135417 125.356608,16.7163086 125.538574,16.246582 C125.72054,15.7768555 125.965983,15.3494466 126.274902,14.9643555 C126.583822,14.5792643 126.979492,14.2724609 127.461914,14.0439453 C127.944336,13.8154297 128.481771,13.7011719 129.074219,13.7011719 C130.216797,13.7011719 131.08431,14.0777995 131.676758,14.8310547 C132.269206,15.5843099 132.603516,16.5533854 132.679688,17.7382812 L125.202148,17.7382812 Z M129.112305,25.2285156 C131.008138,25.2285156 132.624674,24.6276042 133.961914,23.4257812 C134.198893,23.2141927 134.317383,22.9729818 134.317383,22.7021484 C134.317383,22.4905599 134.247559,22.3064779 134.10791,22.1499023 C133.968262,21.9933268 133.796875,21.9150391 133.59375,21.9150391 C133.424479,21.9150391 133.263672,21.9742839 133.111328,22.0927734 C132.501953,22.5582682 131.89681,22.9264323 131.295898,23.1972656 C130.694987,23.468099 130.017904,23.6035156 129.264648,23.6035156 C128.071289,23.5865885 127.09375,23.2036133 126.332031,22.4545898 C125.570312,21.7055664 125.18099,20.6031901 125.164062,19.1474609 L133.873047,19.1474609 C134.14388,19.1474609 134.347005,19.0670573 134.482422,18.90625 C134.617839,18.7454427 134.685547,18.5423177 134.685547,18.296875 C134.660156,17.4420573 134.537435,16.6570638 134.317383,15.9418945 C134.097331,15.2267253 133.767253,14.5792643 133.327148,13.9995117 C132.887044,13.4197591 132.294596,12.9648438 131.549805,12.6347656 C130.805013,12.3046875 129.9375,12.1396484 128.947266,12.1396484 C127.694661,12.1396484 126.598633,12.4443359 125.65918,13.0537109 C124.719727,13.6630859 124.017253,14.4544271 123.551758,15.4277344 C123.086263,16.4010417 122.853516,17.4759115 122.853516,18.6523437 C122.853516,19.9895833 123.130697,21.159668 123.685059,22.1625977 C124.239421,23.1655273 124.986328,23.9251302 125.925781,24.4414063 C126.865234,24.9576823 127.927409,25.2200521 129.112305,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-5" transform="translate(0, 172)" xlink:href="#path-20" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.54199219,25 L12.796875,25 C13.1184896,25 13.3702799,24.8984375 13.5522461,24.6953125 C13.7342122,24.4921875 13.8251953,24.2552083 13.8251953,23.984375 C13.8251953,23.7050781 13.7363281,23.4638672 13.5585937,23.2607422 C13.3808594,23.0576172 13.1269531,22.9560547 12.796875,22.9560547 L4.81152344,22.9560547 L4.81152344,8.08984375 C4.81152344,7.70052083 4.6866862,7.39794922 4.43701172,7.18212891 C4.18733724,6.96630859 3.8889974,6.85839844 3.54199219,6.85839844 C3.18652344,6.85839844 2.88183594,6.96630859 2.62792969,7.18212891 C2.37402344,7.39794922 2.24707031,7.70052083 2.24707031,8.08984375 L2.24707031,23.7304688 C2.24707031,24.0859375 2.37402344,24.3863932 2.62792969,24.6318359 C2.88183594,24.8772786 3.18652344,25 3.54199219,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M16.5703125,25 L25.8251953,25 C26.1468099,25 26.3986003,24.8984375 26.5805664,24.6953125 C26.7625326,24.4921875 26.8535156,24.2552083 26.8535156,23.984375 C26.8535156,23.7050781 26.7646484,23.4638672 26.5869141,23.2607422 C26.4091797,23.0576172 26.1552734,22.9560547 25.8251953,22.9560547 L17.8398437,22.9560547 L17.8398437,8.08984375 C17.8398437,7.70052083 17.7150065,7.39794922 17.465332,7.18212891 C17.2156576,6.96630859 16.9173177,6.85839844 16.5703125,6.85839844 C16.2148438,6.85839844 15.9101562,6.96630859 15.65625,7.18212891 C15.4023438,7.39794922 15.2753906,7.70052083 15.2753906,8.08984375 L15.2753906,23.7304688 C15.2753906,24.0859375 15.4023438,24.3863932 15.65625,24.6318359 C15.9101562,24.8772786 16.2148438,25 16.5703125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M38.1298828,24.5302734 C38.4261068,24.5302734 38.7138672,24.4329427 38.9931641,24.2382813 C39.2724609,24.0436198 39.492513,23.7643229 39.6533203,23.4003906 L45.5693359,10.1210938 L45.5693359,23.9462891 C45.5693359,24.2340495 45.6857096,24.4583333 45.918457,24.6191406 C46.1512044,24.7799479 46.4283854,24.8603516 46.75,24.8603516 C47.063151,24.8603516 47.3382161,24.7799479 47.5751953,24.6191406 C47.8121745,24.4583333 47.9306641,24.2340495 47.9306641,23.9462891 L47.9306641,8.91503906 C47.9306641,8.42415365 47.7360026,8.01367188 47.3466797,7.68359375 C46.9573568,7.35351562 46.5045573,7.18847656 45.9882812,7.18847656 C45.5651042,7.18847656 45.1652018,7.3133138 44.7885742,7.56298828 C44.4119466,7.81266276 44.1136068,8.19140625 43.8935547,8.69921875 L38.1298828,21.4960937 L32.3662109,8.69921875 C32.1376953,8.19140625 31.8351237,7.81266276 31.4584961,7.56298828 C31.0818685,7.3133138 30.6819661,7.18847656 30.2587891,7.18847656 C29.742513,7.18847656 29.2918294,7.35351562 28.9067383,7.68359375 C28.5216471,8.01367188 28.3291016,8.42415365 28.3291016,8.91503906 L28.3291016,23.9462891 C28.3291016,24.2340495 28.4475911,24.4583333 28.6845703,24.6191406 C28.9215495,24.7799479 29.1966146,24.8603516 29.5097656,24.8603516 C29.8313802,24.8603516 30.1085612,24.7799479 30.3413086,24.6191406 C30.574056,24.4583333 30.6904297,24.2340495 30.6904297,23.9462891 L30.6904297,10.1210938 L36.6064453,23.4003906 C36.7672526,23.7643229 36.9873047,24.0436198 37.2666016,24.2382813 C37.5458984,24.4329427 37.8336589,24.5302734 38.1298828,24.5302734 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M53.0117188,25.0253906 C53.4518229,25.0253906 53.8072917,24.889974 54.078125,24.6191406 C54.3489583,24.3483073 54.484375,24.0266927 54.484375,23.6542969 C54.484375,23.2903646 54.3468424,22.9750977 54.0717773,22.7084961 C53.7967122,22.4418945 53.4433594,22.3085938 53.0117188,22.3085938 C52.5716146,22.3085938 52.2161458,22.4440104 51.9453125,22.7148438 C51.6744792,22.9856771 51.5390625,23.2988281 51.5390625,23.6542969 C51.5390625,24.0266927 51.6744792,24.3483073 51.9453125,24.6191406 C52.2161458,24.889974 52.5716146,25.0253906 53.0117188,25.0253906 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M57.6230469,23.8574219 C58.1308594,24.2298177 58.8100586,24.5535482 59.6606445,24.8286133 C60.5112305,25.1036784 61.3893229,25.2412109 62.2949219,25.2412109 C63.2682292,25.2412109 64.1230469,25.1079102 64.859375,24.8413086 C65.5957031,24.574707 66.1902669,24.1451823 66.6430664,23.5527344 C67.0958659,22.9602865 67.3222656,22.2324219 67.3222656,21.3691406 C67.3222656,20.4550781 67.0493164,19.7039388 66.503418,19.1157227 C65.9575195,18.5275065 65.0455729,18.0768229 63.7675781,17.7636719 L61.5712891,17.2177734 C60.648763,16.9892578 60.0478516,16.7713216 59.7685547,16.5639648 C59.4892578,16.3566081 59.3496094,15.9990234 59.3496094,15.4912109 C59.3496094,14.8649089 59.6204427,14.4057617 60.1621094,14.1137695 C60.703776,13.8217773 61.4316406,13.6757813 62.3457031,13.6757813 C62.6334635,13.6757813 62.9169922,13.6948242 63.1962891,13.7329102 C63.4755859,13.7709961 63.750651,13.8260091 64.0214844,13.8979492 C64.2923177,13.9698893 64.5060221,14.03125 64.6625977,14.0820313 C64.8191732,14.1328125 65.0222982,14.2068685 65.2719727,14.3041992 C65.5216471,14.4015299 65.6591797,14.4544271 65.6845703,14.4628906 C65.819987,14.5136719 65.9511719,14.5390625 66.078125,14.5390625 C66.2981771,14.5390625 66.4737956,14.4692383 66.6049805,14.3295898 C66.7361654,14.1899414 66.8017578,14.0227865 66.8017578,13.828125 C66.8017578,13.4980469 66.6367188,13.2483724 66.3066406,13.0791016 C65.8496094,12.8336589 65.2592773,12.6114909 64.5356445,12.4125977 C63.8120117,12.2137044 63.0397135,12.1142578 62.21875,12.1142578 C61.5332031,12.1142578 60.8984375,12.1798503 60.3144531,12.3110352 C59.7304688,12.4422201 59.2036133,12.6411133 58.7338867,12.9077148 C58.2641602,13.1743164 57.8938802,13.5361328 57.6230469,13.9931641 C57.3522135,14.4501953 57.2167969,14.9791667 57.2167969,15.5800781 C57.2167969,15.9609375 57.2548828,16.2994792 57.3310547,16.5957031 C57.4072266,16.8919271 57.5299479,17.1500651 57.6992188,17.3701172 C57.8684896,17.5901693 58.0504557,17.7784831 58.2451172,17.9350586 C58.4397786,18.0916341 58.7000326,18.2376302 59.0258789,18.3730469 C59.3517253,18.5084635 59.6564128,18.6184896 59.9399414,18.703125 C60.2234701,18.7877604 60.59375,18.8893229 61.0507812,19.0078125 L63.2978516,19.5664062 C64.0426432,19.7526042 64.5758464,19.9980469 64.8974609,20.3027344 C65.2190755,20.6074219 65.3798828,21.0136719 65.3798828,21.5214844 C65.3798828,22.2324219 65.0942383,22.7719727 64.5229492,23.1401367 C63.9516602,23.5083008 63.2005208,23.6923828 62.2695312,23.6923828 C60.9746094,23.6839193 59.7473958,23.319987 58.5878906,22.6005859 C58.4016927,22.4820964 58.2070312,22.4228516 58.0039062,22.4228516 C57.7838542,22.4228516 57.601888,22.4969076 57.4580078,22.6450195 C57.3141276,22.7931315 57.2421875,22.96875 57.2421875,23.171875 C57.2421875,23.4511719 57.3691406,23.6796875 57.6230469,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M74.625,25.2412109 C76.2838542,25.2412109 77.5703125,24.8730469 78.484375,24.1367188 C78.7721354,23.8997396 78.9160156,23.6416016 78.9160156,23.3623047 C78.9160156,23.1676432 78.8483073,23.0047201 78.7128906,22.8735352 C78.577474,22.7423503 78.4124349,22.6767578 78.2177734,22.6767578 C78.0400391,22.6767578 77.8707682,22.7317708 77.7099609,22.8417969 C76.9313151,23.3834635 75.9622396,23.6542969 74.8027344,23.6542969 C74.1425781,23.6542969 73.5585938,23.5167643 73.0507812,23.2416992 C72.5429688,22.9666341 72.1367188,22.5942383 71.8320312,22.1245117 C71.5273438,21.6547852 71.2988281,21.1321615 71.1464844,20.5566406 C70.9941406,19.9811198 70.9179688,19.3717448 70.9179688,18.7285156 C70.9179688,17.188151 71.2945964,15.9630534 72.0478516,15.0532227 C72.8011068,14.1433919 73.7828776,13.6884766 74.9931641,13.6884766 C75.9241536,13.6884766 76.7747396,13.938151 77.5449219,14.4375 C77.7141927,14.547526 77.8919271,14.6025391 78.078125,14.6025391 C78.28125,14.6025391 78.4526367,14.5390625 78.5922852,14.4121094 C78.7319336,14.2851563 78.8017578,14.1285807 78.8017578,13.9423828 C78.8017578,13.680013 78.6578776,13.4345703 78.3701172,13.2060547 C78.0061849,12.9013672 77.5152995,12.6474609 76.8974609,12.4443359 C76.2796224,12.2412109 75.6067708,12.1396484 74.8789062,12.1396484 C73.6347656,12.1396484 72.5345052,12.4316406 71.578125,13.015625 C70.6217448,13.5996094 69.8896484,14.3867188 69.3818359,15.3769531 C68.8740234,16.3671875 68.6201172,17.4716797 68.6201172,18.6904297 C68.6201172,20.5777995 69.1617839,22.1414388 70.2451172,23.3813477 C71.3284505,24.6212565 72.7884115,25.2412109 74.625,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M86.1552734,25.2285156 C88.1103516,25.2285156 89.6591797,24.6149089 90.8017578,23.3876953 C91.9443359,22.1604818 92.515625,20.5947266 92.515625,18.6904297 C92.515625,16.7692057 91.9422201,15.1971029 90.7954102,13.9741211 C89.6486003,12.7511393 88.101888,12.1396484 86.1552734,12.1396484 C84.2086589,12.1396484 82.6619466,12.7532552 81.5151367,13.9804688 C80.3683268,15.2076823 79.7949219,16.7776693 79.7949219,18.6904297 C79.7949219,20.5947266 80.3683268,22.1604818 81.5151367,23.3876953 C82.6619466,24.6149089 84.2086589,25.2285156 86.1552734,25.2285156 Z M86.1298828,23.6542969 C84.8603516,23.6542969 83.8680013,23.2078451 83.152832,22.3149414 C82.4376628,21.4220378 82.0800781,20.2138672 82.0800781,18.6904297 C82.0800781,17.1500651 82.4397786,15.933431 83.1591797,15.0405273 C83.8785807,14.1476237 84.8772786,13.7011719 86.1552734,13.7011719 C87.4248047,13.7011719 88.4213867,14.1497396 89.1450195,15.046875 C89.8686523,15.9440104 90.2304688,17.1585286 90.2304688,18.6904297 C90.2304688,20.2307943 89.8728841,21.4431966 89.1577148,22.3276367 C88.4425456,23.2120768 87.4332682,23.6542969 86.1298828,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M95.5146484,25 C95.8277995,25 96.1007487,24.906901 96.3334961,24.7207031 C96.5662435,24.5345052 96.6826172,24.2721354 96.6826172,23.9335938 L96.6826172,17.3955078 C96.6826172,16.4052734 96.9978841,15.6139323 97.628418,15.0214844 C98.2589518,14.4290365 99.1455078,14.1328125 100.288086,14.1328125 C100.558919,14.1328125 100.766276,14.0418294 100.910156,13.8598633 C101.054036,13.6778971 101.125977,13.4599609 101.125977,13.2060547 C101.125977,12.9352214 101.047689,12.6982422 100.891113,12.4951172 C100.734538,12.2919922 100.520833,12.1904297 100.25,12.1904297 C99.3528646,12.1904297 98.5847982,12.4549154 97.9458008,12.9838867 C97.3068034,13.5128581 96.8815104,14.1708984 96.6699219,14.9580078 L96.6826172,13.4345703 C96.6826172,13.1129557 96.5704753,12.8611654 96.3461914,12.6791992 C96.1219076,12.4972331 95.8531901,12.40625 95.5400391,12.40625 C95.226888,12.40625 94.9560547,12.4972331 94.7275391,12.6791992 C94.4990234,12.8611654 94.3847656,13.1214193 94.3847656,13.4599609 L94.3847656,23.9335938 C94.3847656,24.2721354 94.4947917,24.5345052 94.7148438,24.7207031 C94.9348958,24.906901 95.2014974,25 95.5146484,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M104.163086,17.7382812 C104.205404,17.2135417 104.317546,16.7163086 104.499512,16.246582 C104.681478,15.7768555 104.926921,15.3494466 105.23584,14.9643555 C105.544759,14.5792643 105.94043,14.2724609 106.422852,14.0439453 C106.905273,13.8154297 107.442708,13.7011719 108.035156,13.7011719 C109.177734,13.7011719 110.045247,14.0777995 110.637695,14.8310547 C111.230143,15.5843099 111.564453,16.5533854 111.640625,17.7382812 L104.163086,17.7382812 Z M108.073242,25.2285156 C109.969076,25.2285156 111.585612,24.6276042 112.922852,23.4257812 C113.159831,23.2141927 113.27832,22.9729818 113.27832,22.7021484 C113.27832,22.4905599 113.208496,22.3064779 113.068848,22.1499023 C112.929199,21.9933268 112.757812,21.9150391 112.554688,21.9150391 C112.385417,21.9150391 112.224609,21.9742839 112.072266,22.0927734 C111.462891,22.5582682 110.857747,22.9264323 110.256836,23.1972656 C109.655924,23.468099 108.978841,23.6035156 108.225586,23.6035156 C107.032227,23.5865885 106.054688,23.2036133 105.292969,22.4545898 C104.53125,21.7055664 104.141927,20.6031901 104.125,19.1474609 L112.833984,19.1474609 C113.104818,19.1474609 113.307943,19.0670573 113.443359,18.90625 C113.578776,18.7454427 113.646484,18.5423177 113.646484,18.296875 C113.621094,17.4420573 113.498372,16.6570638 113.27832,15.9418945 C113.058268,15.2267253 112.72819,14.5792643 112.288086,13.9995117 C111.847982,13.4197591 111.255534,12.9648438 110.510742,12.6347656 C109.765951,12.3046875 108.898438,12.1396484 107.908203,12.1396484 C106.655599,12.1396484 105.55957,12.4443359 104.620117,13.0537109 C103.680664,13.6630859 102.97819,14.4544271 102.512695,15.4277344 C102.047201,16.4010417 101.814453,17.4759115 101.814453,18.6523437 C101.814453,19.9895833 102.091634,21.159668 102.645996,22.1625977 C103.200358,23.1655273 103.947266,23.9251302 104.886719,24.4414063 C105.826172,24.9576823 106.888346,25.2200521 108.073242,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="List-@Pooling-Usages" stroke-width="1" transform="translate(732, 40)">
+            <g id="header-/-01_purple-@2_standard" xlink:href="#path-21" fill="#9172E2">
+                <g id="bg">
+                    <path d="M20,0 L316,0 C327.045695,-3.55271368e-15 336,8.954305 336,20 L336,52 L336,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1"></path>
+                    <path d="M316,0 C327.045695,0 336,8.954305 336,20 L336,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L316,0 Z M316,1.6 L20.0000001,1.6 L18.1187071,1.69499727 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.3999998 L334.4,50.3999998 L334.4,20 C334.4,9.8379606 326.162039,1.6 316,1.6 Z" id="header-bg" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)">
+                    <path d="M91.5761719,22.0117188 C92.0371094,22.0117188 92.4335938,21.8515625 92.765625,21.53125 C93.0976562,21.2109375 93.2636719,20.7890625 93.2636719,20.265625 L93.2636719,15.2851563 L96.5332031,15.2851563 C100.806641,15.2851563 102.943359,13.5820313 102.943359,10.1757813 C102.943359,9.24609375 102.798828,8.44726562 102.509766,7.77929687 C102.220703,7.11132813 101.796875,6.58398438 101.238281,6.19726562 C100.679688,5.81054688 100.035156,5.52929688 99.3046875,5.35351562 C98.5742188,5.17773438 97.7246094,5.08984375 96.7558594,5.08984375 L91.7167969,5.08984375 C91.1386719,5.08984375 90.6894531,5.28515625 90.3691406,5.67578125 C90.0488281,6.06640625 89.8886719,6.55859375 89.8886719,7.15234375 L89.8886719,20.265625 C89.8886719,20.7890625 90.0566406,21.2109375 90.3925781,21.53125 C90.7285156,21.8515625 91.1230469,22.0117188 91.5761719,22.0117188 Z M93.2636719,12.8476562 L93.2636719,7.65625 L96.3925781,7.65625 C96.9394531,7.65625 97.3945312,7.6875 97.7578125,7.75 C98.1210938,7.8125 98.453125,7.9296875 98.7539062,8.1015625 C99.0546875,8.2734375 99.2753906,8.53125 99.4160156,8.875 C99.5566406,9.21875 99.6269531,9.65234375 99.6269531,10.1757813 C99.6269531,10.7148438 99.5566406,11.1601562 99.4160156,11.5117188 C99.2753906,11.8632813 99.0546875,12.1347656 98.7539062,12.3261719 C98.453125,12.5175781 98.1132812,12.6523438 97.734375,12.7304688 C97.3554688,12.8085938 96.8808594,12.8476562 96.3105469,12.8476562 L93.2636719,12.8476562 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M110.443359,20.1367188 C109.435547,20.1367188 108.650391,19.7851563 108.087891,19.0820312 C107.525391,18.3789062 107.244141,17.3945313 107.244141,16.1289062 C107.244141,14.8476562 107.523438,13.8535156 108.082031,13.1464844 C108.640625,12.4394531 109.427734,12.0859375 110.443359,12.0859375 C111.458984,12.0859375 112.248047,12.4414062 112.810547,13.1523438 C113.373047,13.8632813 113.654297,14.8554688 113.654297,16.1289062 C113.654297,17.3945313 113.373047,18.3789062 112.810547,19.0820312 C112.248047,19.7851563 111.458984,20.1367188 110.443359,20.1367188 Z M110.443359,22.140625 C111.271484,22.140625 112.029297,22.0273437 112.716797,21.8007813 C113.404297,21.5742188 113.984375,21.2714844 114.457031,20.8925781 C114.929688,20.5136719 115.328125,20.0644531 115.652344,19.5449219 C115.976562,19.0253906 116.214844,18.4804688 116.367188,17.9101562 C116.519531,17.3398438 116.595703,16.7460938 116.595703,16.1289062 C116.595703,15.4804688 116.515625,14.859375 116.355469,14.265625 C116.195312,13.671875 115.947266,13.1191406 115.611328,12.6074219 C115.275391,12.0957031 114.869141,11.6542969 114.392578,11.2832031 C113.916016,10.9121094 113.339844,10.6191406 112.664062,10.4042969 C111.988281,10.1894531 111.248047,10.0820312 110.443359,10.0820312 C109.623047,10.0820312 108.871094,10.1933594 108.1875,10.4160156 C107.503906,10.6386719 106.925781,10.9414062 106.453125,11.3242188 C105.980469,11.7070312 105.580078,12.15625 105.251953,12.671875 C104.923828,13.1875 104.683594,13.7363281 104.53125,14.3183594 C104.378906,14.9003906 104.302734,15.5039062 104.302734,16.1289062 C104.302734,16.9179687 104.427734,17.6640625 104.677734,18.3671875 C104.927734,19.0703125 105.292969,19.7070312 105.773438,20.2773438 C106.253906,20.8476563 106.896484,21.3007813 107.701172,21.6367188 C108.505859,21.9726562 109.419922,22.140625 110.443359,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M124.611328,20.1367188 C123.603516,20.1367188 122.818359,19.7851563 122.255859,19.0820312 C121.693359,18.3789062 121.412109,17.3945313 121.412109,16.1289062 C121.412109,14.8476562 121.691406,13.8535156 122.25,13.1464844 C122.808594,12.4394531 123.595703,12.0859375 124.611328,12.0859375 C125.626953,12.0859375 126.416016,12.4414062 126.978516,13.1523438 C127.541016,13.8632813 127.822266,14.8554688 127.822266,16.1289062 C127.822266,17.3945313 127.541016,18.3789062 126.978516,19.0820312 C126.416016,19.7851563 125.626953,20.1367188 124.611328,20.1367188 Z M124.611328,22.140625 C125.439453,22.140625 126.197266,22.0273437 126.884766,21.8007813 C127.572266,21.5742188 128.152344,21.2714844 128.625,20.8925781 C129.097656,20.5136719 129.496094,20.0644531 129.820312,19.5449219 C130.144531,19.0253906 130.382812,18.4804688 130.535156,17.9101562 C130.6875,17.3398438 130.763672,16.7460938 130.763672,16.1289062 C130.763672,15.4804688 130.683594,14.859375 130.523438,14.265625 C130.363281,13.671875 130.115234,13.1191406 129.779297,12.6074219 C129.443359,12.0957031 129.037109,11.6542969 128.560547,11.2832031 C128.083984,10.9121094 127.507812,10.6191406 126.832031,10.4042969 C126.15625,10.1894531 125.416016,10.0820312 124.611328,10.0820312 C123.791016,10.0820312 123.039062,10.1933594 122.355469,10.4160156 C121.671875,10.6386719 121.09375,10.9414062 120.621094,11.3242188 C120.148438,11.7070312 119.748047,12.15625 119.419922,12.671875 C119.091797,13.1875 118.851562,13.7363281 118.699219,14.3183594 C118.546875,14.9003906 118.470703,15.5039062 118.470703,16.1289062 C118.470703,16.9179687 118.595703,17.6640625 118.845703,18.3671875 C119.095703,19.0703125 119.460938,19.7070312 119.941406,20.2773438 C120.421875,20.8476563 121.064453,21.3007813 121.869141,21.6367188 C122.673828,21.9726562 123.587891,22.140625 124.611328,22.140625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M134.759766,22 C135.197266,22 135.558594,21.8554688 135.84375,21.5664062 C136.128906,21.2773438 136.271484,20.875 136.271484,20.359375 L136.271484,6.75390625 C136.271484,6.23828125 136.130859,5.8359375 135.849609,5.546875 C135.568359,5.2578125 135.212891,5.11328125 134.783203,5.11328125 C134.353516,5.11328125 134.001953,5.2578125 133.728516,5.546875 C133.455078,5.8359375 133.318359,6.23828125 133.318359,6.75390625 L133.318359,20.359375 C133.318359,20.8828125 133.453125,21.2871094 133.722656,21.5722656 C133.992188,21.8574219 134.337891,22 134.759766,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M140.923828,22 C141.353516,22 141.707031,21.8554688 141.984375,21.5664062 C142.261719,21.2773437 142.400391,20.875 142.400391,20.359375 L142.400391,11.8984375 C142.400391,11.375 142.261719,10.96875 141.984375,10.6796875 C141.707031,10.390625 141.353516,10.2460938 140.923828,10.2460938 C140.494141,10.2460938 140.142578,10.390625 139.869141,10.6796875 C139.595703,10.96875 139.458984,11.375 139.458984,11.8984375 L139.458984,20.359375 C139.458984,20.8828125 139.595703,21.2871094 139.869141,21.5722656 C140.142578,21.8574219 140.494141,22 140.923828,22 Z M140.923828,7.90234375 C141.431641,7.90234375 141.84375,7.75195312 142.160156,7.45117188 C142.476562,7.15039063 142.634766,6.7578125 142.634766,6.2734375 C142.634766,5.7890625 142.478516,5.3984375 142.166016,5.1015625 C141.853516,4.8046875 141.443359,4.65625 140.935547,4.65625 C140.419922,4.65625 140.003906,4.8046875 139.6875,5.1015625 C139.371094,5.3984375 139.212891,5.7890625 139.212891,6.2734375 C139.212891,6.7578125 139.371094,7.15039063 139.6875,7.45117188 C140.003906,7.75195312 140.416016,7.90234375 140.923828,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M146.841797,22 C147.271484,22 147.625,21.8574219 147.902344,21.5722656 C148.179688,21.2871094 148.318359,20.890625 148.318359,20.3828125 L148.318359,15.4257812 C148.318359,14.4335938 148.603516,13.6289063 149.173828,13.0117187 C149.744141,12.3945312 150.419922,12.0859375 151.201172,12.0859375 C151.849609,12.0859375 152.382812,12.2988281 152.800781,12.7246094 C153.21875,13.1503906 153.427734,13.765625 153.427734,14.5703125 L153.427734,20.3828125 C153.427734,20.890625 153.564453,21.2871094 153.837891,21.5722656 C154.111328,21.8574219 154.455078,22 154.869141,22 C155.306641,22 155.666016,21.8574219 155.947266,21.5722656 C156.228516,21.2871094 156.369141,20.890625 156.369141,20.3828125 L156.369141,14.5820312 C156.369141,13.8320313 156.255859,13.1640625 156.029297,12.578125 C155.802734,11.9921875 155.494141,11.5214844 155.103516,11.1660156 C154.712891,10.8105469 154.269531,10.5410156 153.773438,10.3574219 C153.277344,10.1738281 152.748047,10.0820312 152.185547,10.0820312 C151.271484,10.0820312 150.484375,10.2539062 149.824219,10.5976562 C149.164062,10.9414062 148.662109,11.4453125 148.318359,12.109375 L148.318359,11.6992188 C148.318359,11.2382812 148.181641,10.8808594 147.908203,10.6269531 C147.634766,10.3730469 147.287109,10.2460938 146.865234,10.2460938 C146.435547,10.2460938 146.080078,10.375 145.798828,10.6328125 C145.517578,10.890625 145.376953,11.2539062 145.376953,11.7226563 L145.376953,20.3828125 C145.376953,20.890625 145.513672,21.2871094 145.787109,21.5722656 C146.060547,21.8574219 146.412109,22 146.841797,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M167.806641,21.8476563 C167.806641,22.9570313 167.501953,23.7832031 166.892578,24.3261719 C166.283203,24.8691406 165.431641,25.140625 164.337891,25.140625 C164.025391,25.140625 163.707031,25.1132812 163.382812,25.0585938 C163.058594,25.0039062 162.814453,24.953125 162.650391,24.90625 C162.486328,24.859375 162.228516,24.7753906 161.876953,24.6542969 C161.525391,24.5332031 161.314453,24.4609375 161.244141,24.4375 C161.119141,24.390625 160.994141,24.3671875 160.869141,24.3671875 C160.595703,24.3671875 160.369141,24.46875 160.189453,24.671875 C160.009766,24.875 159.919922,25.109375 159.919922,25.375 C159.919922,25.75 160.099609,26.046875 160.458984,26.265625 C160.880859,26.5234375 161.472656,26.7402344 162.234375,26.9160156 C162.996094,27.0917969 163.794922,27.1796875 164.630859,27.1796875 C166.474609,27.1796875 167.933594,26.6953125 169.007812,25.7265625 C170.082031,24.7578125 170.619141,23.3476563 170.619141,21.4960938 L170.619141,11.8867188 C170.619141,11.3710938 170.488281,10.96875 170.226562,10.6796875 C169.964844,10.390625 169.630859,10.2460938 169.224609,10.2460938 C168.865234,10.2460938 168.558594,10.3535156 168.304688,10.5683594 C168.050781,10.7832031 167.904297,11.0898438 167.865234,11.4882813 L167.865234,12.0273438 C167.474609,11.4023438 167.009766,10.9257812 166.470703,10.5976562 C165.931641,10.2695312 165.189453,10.1054688 164.244141,10.1054688 C162.548828,10.1054688 161.207031,10.6699219 160.21875,11.7988281 C159.230469,12.9277344 158.736328,14.3945312 158.736328,16.1992188 C158.736328,17.9882812 159.244141,19.4042969 160.259766,20.4472656 C161.275391,21.4902344 162.626953,22.0117188 164.314453,22.0117188 C165.978516,22.0117188 167.142578,21.3828125 167.806641,20.125 L167.806641,21.8476563 Z M164.841797,20.0546875 C163.951172,20.0390625 163.210938,19.7011719 162.621094,19.0410156 C162.03125,18.3808594 161.736328,17.40625 161.736328,16.1171875 C161.736328,15.671875 161.771484,15.2558594 161.841797,14.8691406 C161.912109,14.4824219 162.025391,14.1152344 162.181641,13.7675781 C162.337891,13.4199219 162.533203,13.1230469 162.767578,12.8769531 C163.001953,12.6308594 163.294922,12.4355469 163.646484,12.2910156 C163.998047,12.1464844 164.392578,12.0742188 164.830078,12.0742188 C166.814453,12.0742188 167.806641,13.4296875 167.806641,16.140625 C167.806641,17.4765625 167.537109,18.4589844 166.998047,19.0878906 C166.458984,19.7167969 165.740234,20.0390625 164.841797,20.0546875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M187.669922,22.3046875 C188.654297,22.3046875 189.566406,22.1757813 190.40625,21.9179688 C191.246094,21.6601562 191.986328,21.2832031 192.626953,20.7871094 C193.267578,20.2910156 193.769531,19.6484375 194.132812,18.859375 C194.496094,18.0703125 194.677734,17.171875 194.677734,16.1640625 L194.677734,6.77734375 C194.677734,6.25390625 194.511719,5.83203125 194.179688,5.51171875 C193.847656,5.19140625 193.455078,5.03125 193.001953,5.03125 C192.541016,5.03125 192.144531,5.19140625 191.8125,5.51171875 C191.480469,5.83203125 191.314453,6.25390625 191.314453,6.77734375 L191.314453,16.0820312 C191.314453,16.6914062 191.210938,17.2324219 191.003906,17.7050781 C190.796875,18.1777344 190.517578,18.5507813 190.166016,18.8242188 C189.814453,19.0976563 189.427734,19.3046875 189.005859,19.4453125 C188.583984,19.5859375 188.138672,19.65625 187.669922,19.65625 C187.201172,19.65625 186.757812,19.5859375 186.339844,19.4453125 C185.921875,19.3046875 185.541016,19.0976563 185.197266,18.8242188 C184.853516,18.5507813 184.578125,18.1777344 184.371094,17.7050781 C184.164062,17.2324219 184.060547,16.6914062 184.060547,16.0820312 L184.060547,6.77734375 C184.060547,6.25390625 183.894531,5.83203125 183.5625,5.51171875 C183.230469,5.19140625 182.833984,5.03125 182.373047,5.03125 C181.912109,5.03125 181.517578,5.19140625 181.189453,5.51171875 C180.861328,5.83203125 180.697266,6.25390625 180.697266,6.77734375 L180.697266,16.1640625 C180.697266,17.171875 180.876953,18.0722656 181.236328,18.8652344 C181.595703,19.6582031 182.095703,20.3027344 182.736328,20.7988281 C183.376953,21.2949219 184.113281,21.6699219 184.945312,21.9238281 C185.777344,22.1777344 186.685547,22.3046875 187.669922,22.3046875 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M198.134766,20.8164062 C199.376953,21.7226563 200.908203,22.1757812 202.728516,22.1757812 C204.134766,22.1757812 205.277344,21.859375 206.15625,21.2265625 C207.035156,20.59375 207.474609,19.6796875 207.474609,18.484375 C207.474609,17.53125 207.183594,16.7851563 206.601562,16.2460937 C206.019531,15.7070312 205.099609,15.28125 203.841797,14.96875 L202.037109,14.5117188 C201.388672,14.3632813 200.949219,14.2011719 200.71875,14.0253906 C200.488281,13.8496094 200.373047,13.578125 200.373047,13.2109375 C200.373047,12.796875 200.583984,12.4902344 201.005859,12.2910156 C201.427734,12.0917969 201.939453,11.9921875 202.541016,11.9921875 C202.720703,11.9921875 202.898438,12 203.074219,12.015625 C203.25,12.03125 203.427734,12.0566406 203.607422,12.0917969 C203.787109,12.1269531 203.941406,12.1582031 204.070312,12.1855469 C204.199219,12.2128906 204.353516,12.2558594 204.533203,12.3144531 C204.712891,12.3730469 204.835938,12.4121094 204.902344,12.4316406 C204.96875,12.4511719 205.083984,12.4921875 205.248047,12.5546875 C205.412109,12.6171875 205.498047,12.6484375 205.505859,12.6484375 C205.677734,12.7109375 205.830078,12.7421875 205.962891,12.7421875 C206.228516,12.7421875 206.441406,12.6542969 206.601562,12.4785156 C206.761719,12.3027344 206.841797,12.0898438 206.841797,11.8398438 C206.841797,11.4179688 206.630859,11.1054688 206.208984,10.9023438 C204.998047,10.3164062 203.716797,10.0234375 202.365234,10.0234375 C200.966797,10.0234375 199.837891,10.3105469 198.978516,10.8847656 C198.119141,11.4589844 197.689453,12.28125 197.689453,13.3515625 C197.689453,13.796875 197.748047,14.1855469 197.865234,14.5175781 C197.982422,14.8496094 198.134766,15.1269531 198.322266,15.3496094 C198.509766,15.5722656 198.779297,15.7753906 199.130859,15.9589844 C199.482422,16.1425781 199.830078,16.2929687 200.173828,16.4101562 C200.517578,16.5273438 200.966797,16.6601563 201.521484,16.8085938 L203.349609,17.2773438 C204.435547,17.5429688 204.978516,18.0078125 204.978516,18.671875 C204.978516,19.046875 204.851562,19.3535156 204.597656,19.5917969 C204.34375,19.8300781 204.039062,19.9941406 203.683594,20.0839844 C203.328125,20.1738281 202.923828,20.21875 202.470703,20.21875 C202.009766,20.21875 201.548828,20.1542969 201.087891,20.0253906 C200.626953,19.8964844 200.292969,19.78125 200.085938,19.6796875 C199.878906,19.578125 199.603516,19.4296875 199.259766,19.234375 C199.048828,19.1171875 198.845703,19.0585937 198.650391,19.0585937 C198.376953,19.0585937 198.152344,19.15625 197.976562,19.3515625 C197.800781,19.546875 197.712891,19.7734375 197.712891,20.03125 C197.712891,20.3515625 197.853516,20.6132813 198.134766,20.8164062 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M213.181641,22.140625 C214.087891,22.140625 214.820312,21.9550781 215.378906,21.5839844 C215.9375,21.2128906 216.341797,20.7304688 216.591797,20.1367188 L216.591797,20.5234375 C216.591797,20.9921875 216.730469,21.3554688 217.007812,21.6132812 C217.285156,21.8710938 217.615234,22 217.998047,22 C218.380859,22 218.708984,21.8710938 218.982422,21.6132812 C219.255859,21.3554688 219.392578,20.9921875 219.392578,20.5234375 L219.392578,14.5703125 C219.392578,13.7734375 219.273438,13.0820312 219.035156,12.4960937 C218.796875,11.9101563 218.458984,11.4453125 218.021484,11.1015625 C217.583984,10.7578125 217.080078,10.5039062 216.509766,10.3398438 C215.939453,10.1757812 215.294922,10.09375 214.576172,10.09375 C212.990234,10.09375 211.548828,10.3984375 210.251953,11.0078125 C209.908203,11.171875 209.736328,11.4453125 209.736328,11.828125 C209.736328,12.1171875 209.832031,12.3769531 210.023438,12.6074219 C210.214844,12.8378906 210.447266,12.953125 210.720703,12.953125 C210.853516,12.953125 210.962891,12.9335938 211.048828,12.8945313 C211.056641,12.8945313 211.142578,12.8613281 211.306641,12.7949219 C211.470703,12.7285156 211.576172,12.6875 211.623047,12.671875 C211.669922,12.65625 211.775391,12.6191406 211.939453,12.5605469 C212.103516,12.5019531 212.228516,12.4609375 212.314453,12.4375 C212.400391,12.4140625 212.525391,12.3789062 212.689453,12.3320313 C212.853516,12.2851562 212.994141,12.2519531 213.111328,12.2324219 C213.228516,12.2128906 213.365234,12.1894531 213.521484,12.1621094 C213.677734,12.1347656 213.832031,12.1152344 213.984375,12.1035156 C214.136719,12.0917969 214.287109,12.0859375 214.435547,12.0859375 C215.123047,12.0859375 215.640625,12.2558594 215.988281,12.5957031 C216.335938,12.9355469 216.509766,13.4726562 216.509766,14.2070312 L216.509766,15.109375 C215.712891,15.109375 215.033203,15.1191406 214.470703,15.1386719 C213.908203,15.1582031 213.351562,15.1992187 212.800781,15.2617188 C212.25,15.3242188 211.794922,15.4121094 211.435547,15.5253906 C211.076172,15.6386719 210.736328,15.7851562 210.416016,15.9648438 C210.095703,16.1445313 209.847656,16.3632812 209.671875,16.6210938 C209.496094,16.8789063 209.359375,17.1855469 209.261719,17.5410156 C209.164062,17.8964844 209.115234,18.3085938 209.115234,18.7773438 C209.115234,19.8320313 209.496094,20.6542969 210.257812,21.2441406 C211.019531,21.8339844 211.994141,22.1328125 213.181641,22.140625 Z M213.544922,20.2890625 C213.021484,20.2890625 212.601562,20.1464844 212.285156,19.8613281 C211.96875,19.5761719 211.810547,19.1601562 211.810547,18.6132812 C211.810547,18.2539062 211.865234,17.9589844 211.974609,17.7285156 C212.083984,17.4980469 212.236328,17.3105469 212.431641,17.1660156 C212.626953,17.0214844 212.929688,16.9160156 213.339844,16.8496094 C213.75,16.7832031 214.183594,16.7382812 214.640625,16.7148438 C215.097656,16.6914062 215.716797,16.6796875 216.498047,16.6796875 L216.498047,17.0195312 C216.498047,17.9648438 216.197266,18.7460938 215.595703,19.3632812 C214.994141,19.9804688 214.310547,20.2890625 213.544922,20.2890625 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M230.912109,21.8476563 C230.912109,22.9570313 230.607422,23.7832031 229.998047,24.3261719 C229.388672,24.8691406 228.537109,25.140625 227.443359,25.140625 C227.130859,25.140625 226.8125,25.1132812 226.488281,25.0585938 C226.164062,25.0039062 225.919922,24.953125 225.755859,24.90625 C225.591797,24.859375 225.333984,24.7753906 224.982422,24.6542969 C224.630859,24.5332031 224.419922,24.4609375 224.349609,24.4375 C224.224609,24.390625 224.099609,24.3671875 223.974609,24.3671875 C223.701172,24.3671875 223.474609,24.46875 223.294922,24.671875 C223.115234,24.875 223.025391,25.109375 223.025391,25.375 C223.025391,25.75 223.205078,26.046875 223.564453,26.265625 C223.986328,26.5234375 224.578125,26.7402344 225.339844,26.9160156 C226.101562,27.0917969 226.900391,27.1796875 227.736328,27.1796875 C229.580078,27.1796875 231.039062,26.6953125 232.113281,25.7265625 C233.1875,24.7578125 233.724609,23.3476563 233.724609,21.4960938 L233.724609,11.8867188 C233.724609,11.3710938 233.59375,10.96875 233.332031,10.6796875 C233.070312,10.390625 232.736328,10.2460938 232.330078,10.2460938 C231.970703,10.2460938 231.664062,10.3535156 231.410156,10.5683594 C231.15625,10.7832031 231.009766,11.0898438 230.970703,11.4882813 L230.970703,12.0273438 C230.580078,11.4023438 230.115234,10.9257812 229.576172,10.5976562 C229.037109,10.2695312 228.294922,10.1054688 227.349609,10.1054688 C225.654297,10.1054688 224.3125,10.6699219 223.324219,11.7988281 C222.335938,12.9277344 221.841797,14.3945312 221.841797,16.1992188 C221.841797,17.9882812 222.349609,19.4042969 223.365234,20.4472656 C224.380859,21.4902344 225.732422,22.0117188 227.419922,22.0117188 C229.083984,22.0117188 230.248047,21.3828125 230.912109,20.125 L230.912109,21.8476563 Z M227.947266,20.0546875 C227.056641,20.0390625 226.316406,19.7011719 225.726562,19.0410156 C225.136719,18.3808594 224.841797,17.40625 224.841797,16.1171875 C224.841797,15.671875 224.876953,15.2558594 224.947266,14.8691406 C225.017578,14.4824219 225.130859,14.1152344 225.287109,13.7675781 C225.443359,13.4199219 225.638672,13.1230469 225.873047,12.8769531 C226.107422,12.6308594 226.400391,12.4355469 226.751953,12.2910156 C227.103516,12.1464844 227.498047,12.0742188 227.935547,12.0742188 C229.919922,12.0742188 230.912109,13.4296875 230.912109,16.140625 C230.912109,17.4765625 230.642578,18.4589844 230.103516,19.0878906 C229.564453,19.7167969 228.845703,20.0390625 227.947266,20.0546875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M236.138672,16.0117187 C236.138672,17.9414062 236.695312,19.4453125 237.808594,20.5234375 C238.921875,21.6015625 240.404297,22.140625 242.255859,22.140625 C243.904297,22.140625 245.345703,21.6210938 246.580078,20.5820312 C246.830078,20.3632813 246.955078,20.0976562 246.955078,19.7851562 C246.955078,19.5117187 246.861328,19.265625 246.673828,19.046875 C246.486328,18.828125 246.271484,18.71875 246.029297,18.71875 C245.865234,18.71875 245.712891,18.7734375 245.572266,18.8828125 C244.470703,19.671875 243.435547,20.0664063 242.466797,20.0664063 C240.333984,20.0664063 239.205078,18.9648438 239.080078,16.7617188 L246.486328,16.7617188 C246.767578,16.7617188 246.978516,16.6679688 247.119141,16.4804687 C247.259766,16.2929688 247.330078,16.046875 247.330078,15.7421875 C247.322266,14.96875 247.208984,14.25 246.990234,13.5859375 C246.771484,12.921875 246.449219,12.3242187 246.023438,11.7929688 C245.597656,11.2617188 245.035156,10.8457031 244.335938,10.5449219 C243.636719,10.2441406 242.833984,10.09375 241.927734,10.09375 C240.748047,10.09375 239.712891,10.3671875 238.822266,10.9140625 C237.931641,11.4609375 237.261719,12.1777344 236.8125,13.0644531 C236.363281,13.9511719 236.138672,14.9335938 236.138672,16.0117187 Z M239.103516,15.109375 C239.173828,14.2578125 239.457031,13.53125 239.953125,12.9296875 C240.449219,12.328125 241.134766,12.0273438 242.009766,12.0273438 C242.908203,12.0273438 243.585938,12.3164062 244.042969,12.8945312 C244.5,13.4726562 244.759766,14.2109375 244.822266,15.109375 L239.103516,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50)">
+                <g id="bg">
+                    <path d="M0,0 L336,0 L336,296 C336,300.418278 332.418278,304 328,304 L8,304 C3.581722,304 0,300.418278 0,296 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M336,0 L336,296 C336,300.418278 332.418278,304 328,304 L8,304 C3.581722,304 0,300.418278 0,296 L0,0 L336,0 Z M334.4,1.60000004 L1.6,1.60000004 L1.6,296 C1.6,299.534622 4.4653776,302.4 8,302.4 L328,302.4 C331.534622,302.4 334.4,299.534622 334.4,296 L334.4,1.60000004 Z" fill="#9172E2" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-22" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M9.953125,25.2792969 C11.874349,25.2792969 13.5416667,24.8180339 14.9550781,23.8955078 C15.3782552,23.6246745 15.5898438,23.2945964 15.5898438,22.9052734 C15.5898438,22.6429036 15.4967448,22.414388 15.3105469,22.2197266 C15.124349,22.0250651 14.9000651,21.9277344 14.6376953,21.9277344 C14.4853516,21.9277344 14.3245443,21.9742839 14.1552734,22.0673828 C13.4951172,22.4397786 12.8815104,22.7254232 12.3144531,22.9243164 C11.7473958,23.1232096 11.0872396,23.2226562 10.3339844,23.2226562 C9.49609375,23.2226562 8.73649089,23.0957031 8.05517578,22.8417969 C7.37386068,22.5878906 6.80257161,22.2408854 6.34130859,21.8007812 C5.88004557,21.3606771 5.49283854,20.8338216 5.1796875,20.2202148 C4.86653646,19.6066081 4.63802083,18.9443359 4.49414062,18.2333984 C4.35026042,17.5224609 4.27832031,16.764974 4.27832031,15.9609375 C4.27832031,15.156901 4.35026042,14.3972982 4.49414062,13.6821289 C4.63802083,12.9669596 4.86653646,12.3004557 5.1796875,11.6826172 C5.49283854,11.0647786 5.87792969,10.5315755 6.33496094,10.0830078 C6.79199219,9.6344401 7.35481771,9.28320313 8.0234375,9.02929688 C8.69205729,8.77539063 9.43684896,8.6484375 10.2578125,8.6484375 C10.5963542,8.6484375 10.9243164,8.66536458 11.2416992,8.69921875 C11.559082,8.73307292 11.8235677,8.77327474 12.0351562,8.81982422 C12.2467448,8.8663737 12.4667969,8.92985026 12.6953125,9.01025391 C12.9238281,9.09065755 13.093099,9.15413411 13.203125,9.20068359 C13.313151,9.24723307 13.4549154,9.31494141 13.628418,9.40380859 C13.8019206,9.49267578 13.905599,9.54557292 13.9394531,9.5625 C14.0917969,9.63020833 14.2314453,9.6640625 14.3583984,9.6640625 C14.6207682,9.6640625 14.8450521,9.5625 15.03125,9.359375 C15.2174479,9.15625 15.3105469,8.91927083 15.3105469,8.6484375 C15.3105469,8.26757812 15.124349,7.97558594 14.7519531,7.77246094 C13.3385417,6.98535156 11.8108724,6.59179688 10.1689453,6.59179688 C8.84016927,6.59179688 7.62988281,6.83723958 6.53808594,7.328125 C5.44628906,7.81901042 4.5406901,8.48974609 3.82128906,9.34033203 C3.10188802,10.190918 2.54541016,11.1875 2.15185547,12.3300781 C1.75830078,13.4726562 1.56152344,14.695638 1.56152344,15.9990234 C1.56152344,17.7509766 1.88313802,19.3209635 2.52636719,20.7089844 C3.16959635,22.0970052 4.1344401,23.2057292 5.42089844,24.0351562 C6.70735677,24.8645833 8.21809896,25.2792969 9.953125,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M19.4140625,25 C19.7272135,25 20.0001628,24.8963216 20.2329102,24.6889648 C20.4656576,24.4816081 20.5820312,24.2001953 20.5820312,23.8447266 L20.5820312,8.01367188 C20.5820312,7.65820312 20.4698893,7.37890625 20.2456055,7.17578125 C20.0213216,6.97265625 19.7568359,6.87109375 19.4521484,6.87109375 C19.1389974,6.87109375 18.8660482,6.97265625 18.6333008,7.17578125 C18.4005534,7.37890625 18.2841797,7.65820312 18.2841797,8.01367188 L18.2841797,23.8447266 C18.2841797,24.2171224 18.3942057,24.5027669 18.6142578,24.7016602 C18.8343099,24.9005534 19.1009115,25 19.4140625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M27.0976562,23.7050781 C26.3105469,23.7050781 25.6884766,23.5252279 25.2314453,23.1655273 C24.7744141,22.8058268 24.5458984,22.2408854 24.5458984,21.4707031 C24.5458984,21.047526 24.6072591,20.7005208 24.7299805,20.4296875 C24.8527018,20.1588542 25.0410156,19.9282227 25.2949219,19.737793 C25.5488281,19.5473633 25.9296875,19.4034831 26.4375,19.3061523 C26.9453125,19.2088216 27.5292969,19.1411133 28.1894531,19.1030273 C28.8496094,19.0649414 29.7044271,19.0458984 30.7539062,19.0458984 L30.7539062,19.4267578 C30.7539062,20.6708984 30.3878581,21.694987 29.6557617,22.4990234 C28.9236654,23.3030599 28.0709635,23.7050781 27.0976562,23.7050781 Z M26.8564453,25.2285156 C28.6761068,25.2285156 29.9794922,24.4033203 30.7666016,22.7529297 L30.7666016,23.9716797 C30.7666016,24.3017578 30.8745117,24.5577799 31.090332,24.7397461 C31.3061523,24.9217122 31.5664062,25.0126953 31.8710938,25.0126953 C32.1757812,25.0126953 32.4444987,24.9174805 32.6772461,24.7270508 C32.9099935,24.5366211 33.0263672,24.2763672 33.0263672,23.9462891 L33.0263672,16.5449219 C33.0263672,15.046875 32.5883789,13.938151 31.7124023,13.21875 C30.8364258,12.499349 29.6240234,12.1396484 28.0751953,12.1396484 C26.2639974,12.1396484 24.702474,12.5078125 23.390625,13.2441406 C23.1536458,13.3795573 23.0351562,13.5742187 23.0351562,13.828125 C23.0351562,14.0481771 23.1155599,14.2491862 23.2763672,14.4311523 C23.4371745,14.6131185 23.6276042,14.7041016 23.8476562,14.7041016 C23.9661458,14.7041016 24.0677083,14.6829427 24.1523438,14.640625 C24.5839844,14.4459635 24.9479167,14.2936198 25.2441406,14.1835937 C25.5403646,14.0735677 25.9466146,13.9635417 26.4628906,13.8535156 C26.9791667,13.7434896 27.4954427,13.6884766 28.0117188,13.6884766 C28.875,13.6884766 29.5478516,13.9042969 30.0302734,14.3359375 C30.5126953,14.7675781 30.7539063,15.4361979 30.7539063,16.3417969 L30.7539063,17.7255859 C29.8144531,17.7255859 29.0125326,17.7382812 28.3481445,17.7636719 C27.6837565,17.7890625 27.0384115,17.8334961 26.4121094,17.8969727 C25.7858073,17.9604492 25.2695312,18.0535482 24.8632812,18.1762695 C24.4570312,18.2989909 24.0846354,18.4555664 23.7460938,18.6459961 C23.4075521,18.8364258 23.1451823,19.0691732 22.9589844,19.3442383 C22.7727865,19.6193034 22.6289062,19.9388021 22.5273438,20.3027344 C22.4257812,20.6666667 22.375,21.0898438 22.375,21.5722656 C22.375,22.7402344 22.7897135,23.6416016 23.6191406,24.2763672 C24.4485677,24.9111328 25.5276693,25.2285156 26.8564453,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M35.6572266,23.8574219 C36.1650391,24.2298177 36.8442383,24.5535482 37.6948242,24.8286133 C38.5454102,25.1036784 39.4235026,25.2412109 40.3291016,25.2412109 C41.3024089,25.2412109 42.1572266,25.1079102 42.8935547,24.8413086 C43.6298828,24.574707 44.2244466,24.1451823 44.6772461,23.5527344 C45.1300456,22.9602865 45.3564453,22.2324219 45.3564453,21.3691406 C45.3564453,20.4550781 45.0834961,19.7039388 44.5375977,19.1157227 C43.9916992,18.5275065 43.0797526,18.0768229 41.8017578,17.7636719 L39.6054688,17.2177734 C38.6829427,16.9892578 38.0820312,16.7713216 37.8027344,16.5639648 C37.5234375,16.3566081 37.3837891,15.9990234 37.3837891,15.4912109 C37.3837891,14.8649089 37.6546224,14.4057617 38.1962891,14.1137695 C38.7379557,13.8217773 39.4658203,13.6757813 40.3798828,13.6757813 C40.6676432,13.6757813 40.9511719,13.6948242 41.2304688,13.7329102 C41.5097656,13.7709961 41.7848307,13.8260091 42.0556641,13.8979492 C42.3264974,13.9698893 42.5402018,14.03125 42.6967773,14.0820313 C42.8533529,14.1328125 43.0564779,14.2068685 43.3061523,14.3041992 C43.5558268,14.4015299 43.6933594,14.4544271 43.71875,14.4628906 C43.8541667,14.5136719 43.9853516,14.5390625 44.1123047,14.5390625 C44.3323568,14.5390625 44.5079753,14.4692383 44.6391602,14.3295898 C44.7703451,14.1899414 44.8359375,14.0227865 44.8359375,13.828125 C44.8359375,13.4980469 44.6708984,13.2483724 44.3408203,13.0791016 C43.8837891,12.8336589 43.293457,12.6114909 42.5698242,12.4125977 C41.8461914,12.2137044 41.0738932,12.1142578 40.2529297,12.1142578 C39.5673828,12.1142578 38.9326172,12.1798503 38.3486328,12.3110352 C37.7646484,12.4422201 37.237793,12.6411133 36.7680664,12.9077148 C36.2983398,13.1743164 35.9280599,13.5361328 35.6572266,13.9931641 C35.3863932,14.4501953 35.2509766,14.9791667 35.2509766,15.5800781 C35.2509766,15.9609375 35.2890625,16.2994792 35.3652344,16.5957031 C35.4414062,16.8919271 35.5641276,17.1500651 35.7333984,17.3701172 C35.9026693,17.5901693 36.0846354,17.7784831 36.2792969,17.9350586 C36.4739583,18.0916341 36.7342122,18.2376302 37.0600586,18.3730469 C37.3859049,18.5084635 37.6905924,18.6184896 37.9741211,18.703125 C38.2576497,18.7877604 38.6279297,18.8893229 39.0849609,19.0078125 L41.3320312,19.5664062 C42.0768229,19.7526042 42.610026,19.9980469 42.9316406,20.3027344 C43.2532552,20.6074219 43.4140625,21.0136719 43.4140625,21.5214844 C43.4140625,22.2324219 43.128418,22.7719727 42.5571289,23.1401367 C41.9858398,23.5083008 41.2347005,23.6923828 40.3037109,23.6923828 C39.0087891,23.6839193 37.7815755,23.319987 36.6220703,22.6005859 C36.4358724,22.4820964 36.2412109,22.4228516 36.0380859,22.4228516 C35.8180339,22.4228516 35.6360677,22.4969076 35.4921875,22.6450195 C35.3483073,22.7931315 35.2763672,22.96875 35.2763672,23.171875 C35.2763672,23.4511719 35.4033203,23.6796875 35.6572266,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M47.4287109,23.8574219 C47.9365234,24.2298177 48.6157227,24.5535482 49.4663086,24.8286133 C50.3168945,25.1036784 51.194987,25.2412109 52.1005859,25.2412109 C53.0738932,25.2412109 53.9287109,25.1079102 54.6650391,24.8413086 C55.4013672,24.574707 55.995931,24.1451823 56.4487305,23.5527344 C56.9015299,22.9602865 57.1279297,22.2324219 57.1279297,21.3691406 C57.1279297,20.4550781 56.8549805,19.7039388 56.309082,19.1157227 C55.7631836,18.5275065 54.851237,18.0768229 53.5732422,17.7636719 L51.3769531,17.2177734 C50.4544271,16.9892578 49.8535156,16.7713216 49.5742187,16.5639648 C49.2949219,16.3566081 49.1552734,15.9990234 49.1552734,15.4912109 C49.1552734,14.8649089 49.4261068,14.4057617 49.9677734,14.1137695 C50.5094401,13.8217773 51.2373047,13.6757813 52.1513672,13.6757813 C52.4391276,13.6757813 52.7226562,13.6948242 53.0019531,13.7329102 C53.28125,13.7709961 53.5563151,13.8260091 53.8271484,13.8979492 C54.0979818,13.9698893 54.3116862,14.03125 54.4682617,14.0820313 C54.6248372,14.1328125 54.8279622,14.2068685 55.0776367,14.3041992 C55.3273112,14.4015299 55.4648438,14.4544271 55.4902344,14.4628906 C55.625651,14.5136719 55.7568359,14.5390625 55.8837891,14.5390625 C56.1038411,14.5390625 56.2794596,14.4692383 56.4106445,14.3295898 C56.5418294,14.1899414 56.6074219,14.0227865 56.6074219,13.828125 C56.6074219,13.4980469 56.4423828,13.2483724 56.1123047,13.0791016 C55.6552734,12.8336589 55.0649414,12.6114909 54.3413086,12.4125977 C53.6176758,12.2137044 52.8453776,12.1142578 52.0244141,12.1142578 C51.3388672,12.1142578 50.7041016,12.1798503 50.1201172,12.3110352 C49.5361328,12.4422201 49.0092773,12.6411133 48.5395508,12.9077148 C48.0698242,13.1743164 47.6995443,13.5361328 47.4287109,13.9931641 C47.1578776,14.4501953 47.0224609,14.9791667 47.0224609,15.5800781 C47.0224609,15.9609375 47.0605469,16.2994792 47.1367188,16.5957031 C47.2128906,16.8919271 47.335612,17.1500651 47.5048828,17.3701172 C47.6741536,17.5901693 47.8561198,17.7784831 48.0507812,17.9350586 C48.2454427,18.0916341 48.5056966,18.2376302 48.831543,18.3730469 C49.1573893,18.5084635 49.4620768,18.6184896 49.7456055,18.703125 C50.0291341,18.7877604 50.3994141,18.8893229 50.8564453,19.0078125 L53.1035156,19.5664062 C53.8483073,19.7526042 54.3815104,19.9980469 54.703125,20.3027344 C55.0247396,20.6074219 55.1855469,21.0136719 55.1855469,21.5214844 C55.1855469,22.2324219 54.8999023,22.7719727 54.3286133,23.1401367 C53.7573242,23.5083008 53.0061849,23.6923828 52.0751953,23.6923828 C50.7802734,23.6839193 49.5530599,23.319987 48.3935547,22.6005859 C48.2073568,22.4820964 48.0126953,22.4228516 47.8095703,22.4228516 C47.5895182,22.4228516 47.4075521,22.4969076 47.2636719,22.6450195 C47.1197917,22.7931315 47.0478516,22.96875 47.0478516,23.171875 C47.0478516,23.4511719 47.1748047,23.6796875 47.4287109,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M60.5712891,9.44824219 C61.0198568,9.44824219 61.3795573,9.31917318 61.6503906,9.06103516 C61.921224,8.80289714 62.0566406,8.46647135 62.0566406,8.05175781 C62.0566406,7.62858073 61.921224,7.28792318 61.6503906,7.02978516 C61.3795573,6.77164714 61.0240885,6.64257812 60.5839844,6.64257812 C60.1354167,6.64257812 59.7736003,6.77376302 59.4985352,7.03613281 C59.2234701,7.2985026 59.0859375,7.63704427 59.0859375,8.05175781 C59.0859375,8.46647135 59.2213542,8.80289714 59.4921875,9.06103516 C59.7630208,9.31917318 60.1227214,9.44824219 60.5712891,9.44824219 Z M60.5585938,25 C60.8717448,25 61.144694,24.8963216 61.3774414,24.6889648 C61.6101888,24.4816081 61.7265625,24.2001953 61.7265625,23.8447266 L61.7265625,13.5234375 C61.7265625,13.1679688 61.6144206,12.8886719 61.3901367,12.6855469 C61.1658529,12.4824219 60.9013672,12.3808594 60.5966797,12.3808594 C60.2835286,12.3808594 60.0105794,12.4824219 59.777832,12.6855469 C59.5450846,12.8886719 59.4287109,13.1679688 59.4287109,13.5234375 L59.4287109,23.8447266 C59.4287109,24.2171224 59.538737,24.5027669 59.7587891,24.7016602 C59.9788411,24.9005534 60.2454427,25 60.5585938,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M66.7314453,25 C67.0445964,25 67.3154297,24.9005534 67.5439453,24.7016602 C67.7724609,24.5027669 67.8867188,24.2255859 67.8867188,23.8701172 L67.8867188,14.0058594 L70.0322266,14.0058594 C70.5738932,14.0058594 70.8447266,13.7604167 70.8447266,13.2695312 C70.8447266,13.0494792 70.7791341,12.867513 70.6479492,12.7236328 C70.5167643,12.5797526 70.3115234,12.5078125 70.0322266,12.5078125 L67.8867188,12.5078125 L67.8867188,11.5175781 C67.8867188,10.8320312 67.9142253,10.2882487 67.9692383,9.88623047 C68.0242513,9.48421224 68.1300456,9.17529297 68.2866211,8.95947266 C68.4431966,8.74365234 68.6230469,8.60400391 68.8261719,8.54052734 C69.0292969,8.47705078 69.3128255,8.4453125 69.6767578,8.4453125 L70.7558594,8.4453125 C71.0097656,8.4453125 71.206543,8.36067708 71.3461914,8.19140625 C71.4858398,8.02213542 71.5556641,7.81901042 71.5556641,7.58203125 C71.5556641,7.34505208 71.4858398,7.1398112 71.3461914,6.96630859 C71.206543,6.79280599 71.0139974,6.70605469 70.7685547,6.70605469 L69.2197266,6.70605469 C68.085612,6.70605469 67.1969401,7.02555339 66.5537109,7.66455078 C65.9104818,8.30354818 65.5888672,9.38053385 65.5888672,10.8955078 L65.5888672,12.5078125 L63.9257812,12.5078125 C63.6464844,12.5078125 63.4391276,12.5797526 63.3037109,12.7236328 C63.1682943,12.867513 63.1005859,13.0494792 63.1005859,13.2695312 C63.1005859,13.4811198 63.1682943,13.6567383 63.3037109,13.7963867 C63.4391276,13.9360352 63.6464844,14.0058594 63.9257812,14.0058594 L65.5888672,14.0058594 L65.5888672,23.8701172 C65.5888672,24.2255859 65.7010091,24.5027669 65.925293,24.7016602 C66.1495768,24.9005534 66.4182943,25 66.7314453,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M73.7421875,9.44824219 C74.1907552,9.44824219 74.5504557,9.31917318 74.8212891,9.06103516 C75.0921224,8.80289714 75.2275391,8.46647135 75.2275391,8.05175781 C75.2275391,7.62858073 75.0921224,7.28792318 74.8212891,7.02978516 C74.5504557,6.77164714 74.194987,6.64257812 73.7548828,6.64257812 C73.3063151,6.64257812 72.9444987,6.77376302 72.6694336,7.03613281 C72.3943685,7.2985026 72.2568359,7.63704427 72.2568359,8.05175781 C72.2568359,8.46647135 72.3922526,8.80289714 72.6630859,9.06103516 C72.9339193,9.31917318 73.2936198,9.44824219 73.7421875,9.44824219 Z M73.7294922,25 C74.0426432,25 74.3155924,24.8963216 74.5483398,24.6889648 C74.7810872,24.4816081 74.8974609,24.2001953 74.8974609,23.8447266 L74.8974609,13.5234375 C74.8974609,13.1679688 74.785319,12.8886719 74.5610352,12.6855469 C74.3367513,12.4824219 74.0722656,12.3808594 73.7675781,12.3808594 C73.4544271,12.3808594 73.1814779,12.4824219 72.9487305,12.6855469 C72.7159831,12.8886719 72.5996094,13.1679688 72.5996094,13.5234375 L72.5996094,23.8447266 C72.5996094,24.2171224 72.7096354,24.5027669 72.9296875,24.7016602 C73.1497396,24.9005534 73.4163411,25 73.7294922,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M82.8349609,25.2412109 C84.4938151,25.2412109 85.7802734,24.8730469 86.6943359,24.1367188 C86.9820964,23.8997396 87.1259766,23.6416016 87.1259766,23.3623047 C87.1259766,23.1676432 87.0582682,23.0047201 86.9228516,22.8735352 C86.7874349,22.7423503 86.6223958,22.6767578 86.4277344,22.6767578 C86.25,22.6767578 86.0807292,22.7317708 85.9199219,22.8417969 C85.141276,23.3834635 84.1722005,23.6542969 83.0126953,23.6542969 C82.3525391,23.6542969 81.7685547,23.5167643 81.2607422,23.2416992 C80.7529297,22.9666341 80.3466797,22.5942383 80.0419922,22.1245117 C79.7373047,21.6547852 79.5087891,21.1321615 79.3564453,20.5566406 C79.2041016,19.9811198 79.1279297,19.3717448 79.1279297,18.7285156 C79.1279297,17.188151 79.5045573,15.9630534 80.2578125,15.0532227 C81.0110677,14.1433919 81.9928385,13.6884766 83.203125,13.6884766 C84.1341146,13.6884766 84.9847005,13.938151 85.7548828,14.4375 C85.9241536,14.547526 86.101888,14.6025391 86.2880859,14.6025391 C86.4912109,14.6025391 86.6625977,14.5390625 86.8022461,14.4121094 C86.9418945,14.2851563 87.0117188,14.1285807 87.0117188,13.9423828 C87.0117188,13.680013 86.8678385,13.4345703 86.5800781,13.2060547 C86.2161458,12.9013672 85.7252604,12.6474609 85.1074219,12.4443359 C84.4895833,12.2412109 83.8167318,12.1396484 83.0888672,12.1396484 C81.8447266,12.1396484 80.7444661,12.4316406 79.7880859,13.015625 C78.8317057,13.5996094 78.0996094,14.3867188 77.5917969,15.3769531 C77.0839844,16.3671875 76.8300781,17.4716797 76.8300781,18.6904297 C76.8300781,20.5777995 77.3717448,22.1414388 78.4550781,23.3813477 C79.5384115,24.6212565 80.9983724,25.2412109 82.8349609,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M92.5244141,23.7050781 C91.7373047,23.7050781 91.1152344,23.5252279 90.6582031,23.1655273 C90.2011719,22.8058268 89.9726562,22.2408854 89.9726562,21.4707031 C89.9726562,21.047526 90.0340169,20.7005208 90.1567383,20.4296875 C90.2794596,20.1588542 90.4677734,19.9282227 90.7216797,19.737793 C90.9755859,19.5473633 91.3564453,19.4034831 91.8642578,19.3061523 C92.3720703,19.2088216 92.9560547,19.1411133 93.6162109,19.1030273 C94.2763672,19.0649414 95.1311849,19.0458984 96.1806641,19.0458984 L96.1806641,19.4267578 C96.1806641,20.6708984 95.8146159,21.694987 95.0825195,22.4990234 C94.3504232,23.3030599 93.4977214,23.7050781 92.5244141,23.7050781 Z M92.2832031,25.2285156 C94.1028646,25.2285156 95.40625,24.4033203 96.1933594,22.7529297 L96.1933594,23.9716797 C96.1933594,24.3017578 96.3012695,24.5577799 96.5170898,24.7397461 C96.7329102,24.9217122 96.9931641,25.0126953 97.2978516,25.0126953 C97.6025391,25.0126953 97.8712565,24.9174805 98.1040039,24.7270508 C98.3367513,24.5366211 98.453125,24.2763672 98.453125,23.9462891 L98.453125,16.5449219 C98.453125,15.046875 98.0151367,13.938151 97.1391602,13.21875 C96.2631836,12.499349 95.0507812,12.1396484 93.5019531,12.1396484 C91.6907552,12.1396484 90.1292318,12.5078125 88.8173828,13.2441406 C88.5804036,13.3795573 88.4619141,13.5742187 88.4619141,13.828125 C88.4619141,14.0481771 88.5423177,14.2491862 88.703125,14.4311523 C88.8639323,14.6131185 89.054362,14.7041016 89.2744141,14.7041016 C89.3929036,14.7041016 89.4944661,14.6829427 89.5791016,14.640625 C90.0107422,14.4459635 90.3746745,14.2936198 90.6708984,14.1835937 C90.9671224,14.0735677 91.3733724,13.9635417 91.8896484,13.8535156 C92.4059245,13.7434896 92.9222005,13.6884766 93.4384766,13.6884766 C94.3017578,13.6884766 94.9746094,13.9042969 95.4570312,14.3359375 C95.9394531,14.7675781 96.1806641,15.4361979 96.1806641,16.3417969 L96.1806641,17.7255859 C95.2412109,17.7255859 94.4392904,17.7382812 93.7749023,17.7636719 C93.1105143,17.7890625 92.4651693,17.8334961 91.8388672,17.8969727 C91.2125651,17.9604492 90.6962891,18.0535482 90.2900391,18.1762695 C89.8837891,18.2989909 89.5113932,18.4555664 89.1728516,18.6459961 C88.8343099,18.8364258 88.5719401,19.0691732 88.3857422,19.3442383 C88.1995443,19.6193034 88.0556641,19.9388021 87.9541016,20.3027344 C87.8525391,20.6666667 87.8017578,21.0898438 87.8017578,21.5722656 C87.8017578,22.7402344 88.2164714,23.6416016 89.0458984,24.2763672 C89.8753255,24.9111328 90.9544271,25.2285156 92.2832031,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M105.273438,25.1777344 L106.022461,25.1777344 C106.327148,25.1777344 106.574707,25.1692708 106.765137,25.1523438 C106.955566,25.1354167 107.145996,25.0973307 107.336426,25.0380859 C107.526855,24.9788411 107.66862,24.8836263 107.761719,24.7524414 C107.854818,24.6212565 107.901367,24.4498698 107.901367,24.2382813 C107.901367,24.0013021 107.820964,23.7939453 107.660156,23.6162109 C107.499349,23.4384766 107.279297,23.3496094 107,23.3496094 L106.923828,23.3496094 L106.060547,23.3876953 L105.920898,23.3876953 C105.387695,23.3876953 104.998372,23.1951497 104.75293,22.8100586 C104.507487,22.4249674 104.384766,21.7965495 104.384766,20.9248047 L104.384766,14.0058594 L106.720703,14.0058594 C107.279297,14.0058594 107.558594,13.7646484 107.558594,13.2822266 C107.558594,13.0537109 107.48877,12.867513 107.349121,12.7236328 C107.209473,12.5797526 107,12.5078125 106.720703,12.5078125 L104.384766,12.5078125 L104.384766,8.82617188 C104.384766,8.52994792 104.30013,8.30354818 104.130859,8.14697266 C103.961589,7.99039714 103.75,7.91210938 103.496094,7.91210938 C103.22526,7.91210938 102.977702,8.0094401 102.753418,8.20410156 C102.529134,8.39876302 102.408529,8.63151042 102.391602,8.90234375 L102.086914,12.5078125 L100.525391,12.5078125 C100.246094,12.5078125 100.034505,12.5776367 99.890625,12.7172852 C99.7467448,12.8569336 99.6748047,13.0367839 99.6748047,13.2568359 C99.6748047,13.4853516 99.7488607,13.6673177 99.8969727,13.8027344 C100.045085,13.938151 100.258789,14.0058594 100.538086,14.0058594 L102.086914,14.0058594 L102.086914,21.3183594 C102.086914,22.6302083 102.374674,23.6013997 102.950195,24.2319336 C103.525716,24.8624674 104.30013,25.1777344 105.273438,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M110.849609,9.44824219 C111.298177,9.44824219 111.657878,9.31917318 111.928711,9.06103516 C112.199544,8.80289714 112.334961,8.46647135 112.334961,8.05175781 C112.334961,7.62858073 112.199544,7.28792318 111.928711,7.02978516 C111.657878,6.77164714 111.302409,6.64257812 110.862305,6.64257812 C110.413737,6.64257812 110.051921,6.77376302 109.776855,7.03613281 C109.50179,7.2985026 109.364258,7.63704427 109.364258,8.05175781 C109.364258,8.46647135 109.499674,8.80289714 109.770508,9.06103516 C110.041341,9.31917318 110.401042,9.44824219 110.849609,9.44824219 Z M110.836914,25 C111.150065,25 111.423014,24.8963216 111.655762,24.6889648 C111.888509,24.4816081 112.004883,24.2001953 112.004883,23.8447266 L112.004883,13.5234375 C112.004883,13.1679688 111.892741,12.8886719 111.668457,12.6855469 C111.444173,12.4824219 111.179688,12.3808594 110.875,12.3808594 C110.561849,12.3808594 110.2889,12.4824219 110.056152,12.6855469 C109.823405,12.8886719 109.707031,13.1679688 109.707031,13.5234375 L109.707031,23.8447266 C109.707031,24.2171224 109.817057,24.5027669 110.037109,24.7016602 C110.257161,24.9005534 110.523763,25 110.836914,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M120.297852,25.2285156 C122.25293,25.2285156 123.801758,24.6149089 124.944336,23.3876953 C126.086914,22.1604818 126.658203,20.5947266 126.658203,18.6904297 C126.658203,16.7692057 126.084798,15.1971029 124.937988,13.9741211 C123.791178,12.7511393 122.244466,12.1396484 120.297852,12.1396484 C118.351237,12.1396484 116.804525,12.7532552 115.657715,13.9804688 C114.510905,15.2076823 113.9375,16.7776693 113.9375,18.6904297 C113.9375,20.5947266 114.510905,22.1604818 115.657715,23.3876953 C116.804525,24.6149089 118.351237,25.2285156 120.297852,25.2285156 Z M120.272461,23.6542969 C119.00293,23.6542969 118.010579,23.2078451 117.29541,22.3149414 C116.580241,21.4220378 116.222656,20.2138672 116.222656,18.6904297 C116.222656,17.1500651 116.582357,15.933431 117.301758,15.0405273 C118.021159,14.1476237 119.019857,13.7011719 120.297852,13.7011719 C121.567383,13.7011719 122.563965,14.1497396 123.287598,15.046875 C124.01123,15.9440104 124.373047,17.1585286 124.373047,18.6904297 C124.373047,20.2307943 124.015462,21.4431966 123.300293,22.3276367 C122.585124,23.2120768 121.575846,23.6542969 120.272461,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M129.631836,25 C129.944987,25 130.217936,24.9026693 130.450684,24.7080078 C130.683431,24.5133464 130.799805,24.2467448 130.799805,23.9082031 L130.799805,17.5732422 C130.859049,16.3798828 131.222982,15.4361979 131.891602,14.7421875 C132.560221,14.0481771 133.381185,13.7011719 134.354492,13.7011719 C135.268555,13.7011719 135.977376,13.9847005 136.480957,14.5517578 C136.984538,15.1188151 137.236328,15.9440104 137.236328,17.0273438 L137.236328,23.9082031 C137.236328,24.2552083 137.34847,24.5239258 137.572754,24.7143555 C137.797038,24.9047852 138.065755,25 138.378906,25 C138.692057,25 138.960775,24.9047852 139.185059,24.7143555 C139.409342,24.5239258 139.521484,24.2552083 139.521484,23.9082031 L139.521484,17.0527344 C139.521484,15.4023437 139.106771,14.1708984 138.277344,13.3583984 C137.447917,12.5458984 136.334961,12.1396484 134.938477,12.1396484 C133.965169,12.1396484 133.112467,12.3470052 132.380371,12.7617188 C131.648275,13.1764323 131.121419,13.7688802 130.799805,14.5390625 L130.799805,13.4345703 C130.799805,13.1044922 130.687663,12.8484701 130.463379,12.6665039 C130.239095,12.4845378 129.970378,12.3935547 129.657227,12.3935547 C129.344076,12.3935547 129.073242,12.4887695 128.844727,12.6791992 C128.616211,12.8696289 128.501953,13.1341146 128.501953,13.4726563 L128.501953,23.9082031 C128.501953,24.2552083 128.611979,24.5239258 128.832031,24.7143555 C129.052083,24.9047852 129.318685,25 129.631836,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M156.183594,25.2792969 C156.953776,25.2792969 157.687988,25.1946615 158.38623,25.0253906 C159.084473,24.8561198 159.738281,24.59375 160.347656,24.2382813 C160.957031,23.8828125 161.483887,23.449056 161.928223,22.9370117 C162.372559,22.4249674 162.723796,21.8071289 162.981934,21.0834961 C163.240072,20.3598633 163.369141,19.5621745 163.369141,18.6904297 L163.369141,8.06445312 C163.369141,7.67513021 163.244303,7.37044271 162.994629,7.15039063 C162.744954,6.93033854 162.446615,6.8203125 162.099609,6.8203125 C161.744141,6.8203125 161.439453,6.93033854 161.185547,7.15039063 C160.931641,7.37044271 160.804688,7.67513021 160.804688,8.06445312 L160.804688,18.6269531 C160.804688,19.3886719 160.673503,20.069987 160.411133,20.6708984 C160.148763,21.2718099 159.797526,21.7542318 159.357422,22.1181641 C158.917318,22.4820964 158.426432,22.7571615 157.884766,22.9433594 C157.343099,23.1295573 156.776042,23.2226562 156.183594,23.2226562 C155.591146,23.2226562 155.026204,23.1295573 154.48877,22.9433594 C153.951335,22.7571615 153.464681,22.4820964 153.028809,22.1181641 C152.592936,21.7542318 152.243815,21.2718099 151.981445,20.6708984 C151.719076,20.069987 151.587891,19.3886719 151.587891,18.6269531 L151.587891,8.06445312 C151.587891,7.67513021 151.460938,7.37044271 151.207031,7.15039063 C150.953125,6.93033854 150.652669,6.8203125 150.305664,6.8203125 C149.950195,6.8203125 149.645508,6.93033854 149.391602,7.15039063 C149.137695,7.37044271 149.010742,7.67513021 149.010742,8.06445312 L149.010742,18.6904297 C149.010742,19.7652995 149.205404,20.7301432 149.594727,21.5849609 C149.984049,22.4397786 150.513021,23.1316732 151.181641,23.6606445 C151.85026,24.1896159 152.609863,24.5916341 153.460449,24.8666992 C154.311035,25.1417643 155.21875,25.2792969 156.183594,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M166.520508,23.8574219 C167.02832,24.2298177 167.70752,24.5535482 168.558105,24.8286133 C169.408691,25.1036784 170.286784,25.2412109 171.192383,25.2412109 C172.16569,25.2412109 173.020508,25.1079102 173.756836,24.8413086 C174.493164,24.574707 175.087728,24.1451823 175.540527,23.5527344 C175.993327,22.9602865 176.219727,22.2324219 176.219727,21.3691406 C176.219727,20.4550781 175.946777,19.7039388 175.400879,19.1157227 C174.85498,18.5275065 173.943034,18.0768229 172.665039,17.7636719 L170.46875,17.2177734 C169.546224,16.9892578 168.945312,16.7713216 168.666016,16.5639648 C168.386719,16.3566081 168.24707,15.9990234 168.24707,15.4912109 C168.24707,14.8649089 168.517904,14.4057617 169.05957,14.1137695 C169.601237,13.8217773 170.329102,13.6757813 171.243164,13.6757813 C171.530924,13.6757813 171.814453,13.6948242 172.09375,13.7329102 C172.373047,13.7709961 172.648112,13.8260091 172.918945,13.8979492 C173.189779,13.9698893 173.403483,14.03125 173.560059,14.0820313 C173.716634,14.1328125 173.919759,14.2068685 174.169434,14.3041992 C174.419108,14.4015299 174.556641,14.4544271 174.582031,14.4628906 C174.717448,14.5136719 174.848633,14.5390625 174.975586,14.5390625 C175.195638,14.5390625 175.371257,14.4692383 175.502441,14.3295898 C175.633626,14.1899414 175.699219,14.0227865 175.699219,13.828125 C175.699219,13.4980469 175.53418,13.2483724 175.204102,13.0791016 C174.74707,12.8336589 174.156738,12.6114909 173.433105,12.4125977 C172.709473,12.2137044 171.937174,12.1142578 171.116211,12.1142578 C170.430664,12.1142578 169.795898,12.1798503 169.211914,12.3110352 C168.62793,12.4422201 168.101074,12.6411133 167.631348,12.9077148 C167.161621,13.1743164 166.791341,13.5361328 166.520508,13.9931641 C166.249674,14.4501953 166.114258,14.9791667 166.114258,15.5800781 C166.114258,15.9609375 166.152344,16.2994792 166.228516,16.5957031 C166.304688,16.8919271 166.427409,17.1500651 166.59668,17.3701172 C166.765951,17.5901693 166.947917,17.7784831 167.142578,17.9350586 C167.33724,18.0916341 167.597493,18.2376302 167.92334,18.3730469 C168.249186,18.5084635 168.553874,18.6184896 168.837402,18.703125 C169.120931,18.7877604 169.491211,18.8893229 169.948242,19.0078125 L172.195312,19.5664062 C172.940104,19.7526042 173.473307,19.9980469 173.794922,20.3027344 C174.116536,20.6074219 174.277344,21.0136719 174.277344,21.5214844 C174.277344,22.2324219 173.991699,22.7719727 173.42041,23.1401367 C172.849121,23.5083008 172.097982,23.6923828 171.166992,23.6923828 C169.87207,23.6839193 168.644857,23.319987 167.485352,22.6005859 C167.299154,22.4820964 167.104492,22.4228516 166.901367,22.4228516 C166.681315,22.4228516 166.499349,22.4969076 166.355469,22.6450195 C166.211589,22.7931315 166.139648,22.96875 166.139648,23.171875 C166.139648,23.4511719 166.266602,23.6796875 166.520508,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M182.037109,23.7050781 C181.25,23.7050781 180.62793,23.5252279 180.170898,23.1655273 C179.713867,22.8058268 179.485352,22.2408854 179.485352,21.4707031 C179.485352,21.047526 179.546712,20.7005208 179.669434,20.4296875 C179.792155,20.1588542 179.980469,19.9282227 180.234375,19.737793 C180.488281,19.5473633 180.869141,19.4034831 181.376953,19.3061523 C181.884766,19.2088216 182.46875,19.1411133 183.128906,19.1030273 C183.789062,19.0649414 184.64388,19.0458984 185.693359,19.0458984 L185.693359,19.4267578 C185.693359,20.6708984 185.327311,21.694987 184.595215,22.4990234 C183.863118,23.3030599 183.010417,23.7050781 182.037109,23.7050781 Z M181.795898,25.2285156 C183.61556,25.2285156 184.918945,24.4033203 185.706055,22.7529297 L185.706055,23.9716797 C185.706055,24.3017578 185.813965,24.5577799 186.029785,24.7397461 C186.245605,24.9217122 186.505859,25.0126953 186.810547,25.0126953 C187.115234,25.0126953 187.383952,24.9174805 187.616699,24.7270508 C187.849447,24.5366211 187.96582,24.2763672 187.96582,23.9462891 L187.96582,16.5449219 C187.96582,15.046875 187.527832,13.938151 186.651855,13.21875 C185.775879,12.499349 184.563477,12.1396484 183.014648,12.1396484 C181.203451,12.1396484 179.641927,12.5078125 178.330078,13.2441406 C178.093099,13.3795573 177.974609,13.5742187 177.974609,13.828125 C177.974609,14.0481771 178.055013,14.2491862 178.21582,14.4311523 C178.376628,14.6131185 178.567057,14.7041016 178.787109,14.7041016 C178.905599,14.7041016 179.007161,14.6829427 179.091797,14.640625 C179.523438,14.4459635 179.88737,14.2936198 180.183594,14.1835937 C180.479818,14.0735677 180.886068,13.9635417 181.402344,13.8535156 C181.91862,13.7434896 182.434896,13.6884766 182.951172,13.6884766 C183.814453,13.6884766 184.487305,13.9042969 184.969727,14.3359375 C185.452148,14.7675781 185.693359,15.4361979 185.693359,16.3417969 L185.693359,17.7255859 C184.753906,17.7255859 183.951986,17.7382812 183.287598,17.7636719 C182.62321,17.7890625 181.977865,17.8334961 181.351562,17.8969727 C180.72526,17.9604492 180.208984,18.0535482 179.802734,18.1762695 C179.396484,18.2989909 179.024089,18.4555664 178.685547,18.6459961 C178.347005,18.8364258 178.084635,19.0691732 177.898438,19.3442383 C177.71224,19.6193034 177.568359,19.9388021 177.466797,20.3027344 C177.365234,20.6666667 177.314453,21.0898438 177.314453,21.5722656 C177.314453,22.7402344 177.729167,23.6416016 178.558594,24.2763672 C179.388021,24.9111328 180.467122,25.2285156 181.795898,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M195.979492,23.4765625 C194.862305,23.4765625 193.939779,23.0703125 193.211914,22.2578125 C192.484049,21.4453125 192.120117,20.2519531 192.120117,18.6777344 C192.120117,17.7382812 192.251302,16.9046224 192.513672,16.1767578 C192.776042,15.4488932 193.203451,14.8543294 193.795898,14.3930664 C194.388346,13.9318034 195.116211,13.7011719 195.979492,13.7011719 C196.521159,13.7011719 197.007812,13.7921549 197.439453,13.9741211 C197.871094,14.1560872 198.224447,14.4015299 198.499512,14.7104492 C198.774577,15.0193685 199.003092,15.3875326 199.185059,15.8149414 C199.367025,16.2423503 199.496094,16.6866862 199.572266,17.1479492 C199.648438,17.6092122 199.686523,18.1022135 199.686523,18.6269531 C199.686523,20.1588542 199.339518,21.3500977 198.645508,22.2006836 C197.951497,23.0512695 197.062826,23.4765625 195.979492,23.4765625 Z M195.87793,30.3701172 C197.748372,30.3701172 199.223145,29.9130859 200.302246,28.9990234 C201.381348,28.0849609 201.920898,26.7265625 201.920898,24.9238281 L201.920898,13.4599609 C201.920898,13.1214193 201.815104,12.8569336 201.603516,12.6665039 C201.391927,12.4760742 201.138021,12.3808594 200.841797,12.3808594 C200.570964,12.3808594 200.331868,12.4633789 200.124512,12.628418 C199.917155,12.793457 199.800781,13.0198568 199.775391,13.3076172 L199.775391,14.5898437 C198.9375,12.9563802 197.532552,12.1396484 195.560547,12.1396484 C194.367188,12.1396484 193.330404,12.4401042 192.450195,13.0410156 C191.569987,13.6419271 190.911947,14.4353841 190.476074,15.4213867 C190.040202,16.4073893 189.822266,17.5182292 189.822266,18.7539063 C189.822266,19.625651 189.951335,20.438151 190.209473,21.1914062 C190.467611,21.9446615 190.835775,22.6048177 191.313965,23.171875 C191.792155,23.7389323 192.40153,24.1853841 193.14209,24.5112305 C193.88265,24.8370768 194.709961,25 195.624023,25 C196.597331,25 197.433105,24.7736003 198.131348,24.3208008 C198.82959,23.8680013 199.352214,23.2565104 199.699219,22.4863281 L199.699219,25 C199.699219,26.21875 199.364909,27.1518555 198.696289,27.7993164 C198.027669,28.4467773 197.037435,28.7705078 195.725586,28.7705078 C195.522461,28.7705078 195.321452,28.7620443 195.122559,28.7451172 C194.923665,28.7281901 194.745931,28.7091471 194.589355,28.6879883 C194.43278,28.6668294 194.259277,28.6350911 194.068848,28.5927734 C193.878418,28.5504557 193.726074,28.5166016 193.611816,28.4912109 C193.497559,28.4658203 193.349447,28.4235026 193.16748,28.3642578 C192.985514,28.305013 192.860677,28.2626953 192.792969,28.2373047 C192.72526,28.2119141 192.604655,28.1653646 192.431152,28.0976563 C192.25765,28.0299479 192.153971,27.991862 192.120117,27.9833984 C192.027018,27.9495443 191.929688,27.9326172 191.828125,27.9326172 C191.616536,27.9326172 191.436686,28.0109049 191.288574,28.1674805 C191.140462,28.324056 191.066406,28.508138 191.066406,28.7197266 C191.066406,29.0328776 191.227214,29.2783203 191.548828,29.4560547 C192.691406,30.0654297 194.13444,30.3701172 195.87793,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M206.202148,17.7382812 C206.244466,17.2135417 206.356608,16.7163086 206.538574,16.246582 C206.72054,15.7768555 206.965983,15.3494466 207.274902,14.9643555 C207.583822,14.5792643 207.979492,14.2724609 208.461914,14.0439453 C208.944336,13.8154297 209.481771,13.7011719 210.074219,13.7011719 C211.216797,13.7011719 212.08431,14.0777995 212.676758,14.8310547 C213.269206,15.5843099 213.603516,16.5533854 213.679688,17.7382812 L206.202148,17.7382812 Z M210.112305,25.2285156 C212.008138,25.2285156 213.624674,24.6276042 214.961914,23.4257812 C215.198893,23.2141927 215.317383,22.9729818 215.317383,22.7021484 C215.317383,22.4905599 215.247559,22.3064779 215.10791,22.1499023 C214.968262,21.9933268 214.796875,21.9150391 214.59375,21.9150391 C214.424479,21.9150391 214.263672,21.9742839 214.111328,22.0927734 C213.501953,22.5582682 212.89681,22.9264323 212.295898,23.1972656 C211.694987,23.468099 211.017904,23.6035156 210.264648,23.6035156 C209.071289,23.5865885 208.09375,23.2036133 207.332031,22.4545898 C206.570312,21.7055664 206.18099,20.6031901 206.164062,19.1474609 L214.873047,19.1474609 C215.14388,19.1474609 215.347005,19.0670573 215.482422,18.90625 C215.617839,18.7454427 215.685547,18.5423177 215.685547,18.296875 C215.660156,17.4420573 215.537435,16.6570638 215.317383,15.9418945 C215.097331,15.2267253 214.767253,14.5792643 214.327148,13.9995117 C213.887044,13.4197591 213.294596,12.9648438 212.549805,12.6347656 C211.805013,12.3046875 210.9375,12.1396484 209.947266,12.1396484 C208.694661,12.1396484 207.598633,12.4443359 206.65918,13.0537109 C205.719727,13.6630859 205.017253,14.4544271 204.551758,15.4277344 C204.086263,16.4010417 203.853516,17.4759115 203.853516,18.6523437 C203.853516,19.9895833 204.130697,21.159668 204.685059,22.1625977 C205.239421,23.1655273 205.986328,23.9251302 206.925781,24.4414063 C207.865234,24.9576823 208.927409,25.2200521 210.112305,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M217.757812,23.8574219 C218.265625,24.2298177 218.944824,24.5535482 219.79541,24.8286133 C220.645996,25.1036784 221.524089,25.2412109 222.429688,25.2412109 C223.402995,25.2412109 224.257812,25.1079102 224.994141,24.8413086 C225.730469,24.574707 226.325033,24.1451823 226.777832,23.5527344 C227.230632,22.9602865 227.457031,22.2324219 227.457031,21.3691406 C227.457031,20.4550781 227.184082,19.7039388 226.638184,19.1157227 C226.092285,18.5275065 225.180339,18.0768229 223.902344,17.7636719 L221.706055,17.2177734 C220.783529,16.9892578 220.182617,16.7713216 219.90332,16.5639648 C219.624023,16.3566081 219.484375,15.9990234 219.484375,15.4912109 C219.484375,14.8649089 219.755208,14.4057617 220.296875,14.1137695 C220.838542,13.8217773 221.566406,13.6757813 222.480469,13.6757813 C222.768229,13.6757813 223.051758,13.6948242 223.331055,13.7329102 C223.610352,13.7709961 223.885417,13.8260091 224.15625,13.8979492 C224.427083,13.9698893 224.640788,14.03125 224.797363,14.0820313 C224.953939,14.1328125 225.157064,14.2068685 225.406738,14.3041992 C225.656413,14.4015299 225.793945,14.4544271 225.819336,14.4628906 C225.954753,14.5136719 226.085938,14.5390625 226.212891,14.5390625 C226.432943,14.5390625 226.608561,14.4692383 226.739746,14.3295898 C226.870931,14.1899414 226.936523,14.0227865 226.936523,13.828125 C226.936523,13.4980469 226.771484,13.2483724 226.441406,13.0791016 C225.984375,12.8336589 225.394043,12.6114909 224.67041,12.4125977 C223.946777,12.2137044 223.174479,12.1142578 222.353516,12.1142578 C221.667969,12.1142578 221.033203,12.1798503 220.449219,12.3110352 C219.865234,12.4422201 219.338379,12.6411133 218.868652,12.9077148 C218.398926,13.1743164 218.028646,13.5361328 217.757812,13.9931641 C217.486979,14.4501953 217.351562,14.9791667 217.351562,15.5800781 C217.351562,15.9609375 217.389648,16.2994792 217.46582,16.5957031 C217.541992,16.8919271 217.664714,17.1500651 217.833984,17.3701172 C218.003255,17.5901693 218.185221,17.7784831 218.379883,17.9350586 C218.574544,18.0916341 218.834798,18.2376302 219.160645,18.3730469 C219.486491,18.5084635 219.791178,18.6184896 220.074707,18.703125 C220.358236,18.7877604 220.728516,18.8893229 221.185547,19.0078125 L223.432617,19.5664062 C224.177409,19.7526042 224.710612,19.9980469 225.032227,20.3027344 C225.353841,20.6074219 225.514648,21.0136719 225.514648,21.5214844 C225.514648,22.2324219 225.229004,22.7719727 224.657715,23.1401367 C224.086426,23.5083008 223.335286,23.6923828 222.404297,23.6923828 C221.109375,23.6839193 219.882161,23.319987 218.722656,22.6005859 C218.536458,22.4820964 218.341797,22.4228516 218.138672,22.4228516 C217.91862,22.4228516 217.736654,22.4969076 217.592773,22.6450195 C217.448893,22.7931315 217.376953,22.96875 217.376953,23.171875 C217.376953,23.4511719 217.503906,23.6796875 217.757812,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-8" transform="translate(0, 52)" xlink:href="#path-23" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M3.51660156,25 L13.1904297,25 C13.5120443,25 13.7617187,24.9005534 13.9394531,24.7016602 C14.1171875,24.5027669 14.2060547,24.2636719 14.2060547,23.984375 C14.2060547,23.7050781 14.1171875,23.4638672 13.9394531,23.2607422 C13.7617187,23.0576172 13.5120443,22.9560547 13.1904297,22.9560547 L4.81152344,22.9560547 L4.81152344,16.671875 L12.4287109,16.671875 C12.7503255,16.671875 13.0021159,16.5724284 13.184082,16.3735352 C13.3660482,16.1746419 13.4570313,15.9397786 13.4570313,15.6689453 C13.4570313,15.398112 13.3681641,15.1653646 13.1904297,14.9707031 C13.0126953,14.7760417 12.7587891,14.6787109 12.4287109,14.6787109 L4.81152344,14.6787109 L4.81152344,8.90234375 L12.8603516,8.90234375 C13.1819661,8.90234375 13.4316406,8.80078125 13.609375,8.59765625 C13.7871094,8.39453125 13.8759766,8.15332031 13.8759766,7.87402344 C13.8759766,7.59472656 13.7871094,7.35563151 13.609375,7.15673828 C13.4316406,6.95784505 13.1819661,6.85839844 12.8603516,6.85839844 L3.453125,6.85839844 C3.11458333,6.85839844 2.8289388,6.98323568 2.59619141,7.23291016 C2.36344401,7.48258464 2.24707031,7.77246094 2.24707031,8.10253906 L2.24707031,23.7558594 C2.24707031,24.0859375 2.37613932,24.3758138 2.63427734,24.6254883 C2.89241536,24.8751628 3.18652344,25 3.51660156,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M17.8144531,25 C18.1360677,25 18.4132487,24.9047852 18.6459961,24.7143555 C18.8787435,24.5239258 18.9951172,24.2594401 18.9951172,23.9208984 L18.9951172,17.7255859 C18.9951172,16.4560547 19.3251953,15.4679362 19.9853516,14.7612305 C20.6455078,14.0545247 21.5257161,13.7011719 22.6259766,13.7011719 C24.4710286,13.7011719 25.3935547,14.8691406 25.3935547,17.2050781 L25.3935547,23.9082031 C25.3935547,24.2467448 25.5056966,24.5133464 25.7299805,24.7080078 C25.9542643,24.9026693 26.21875,25 26.5234375,25 C26.8365885,25 27.1074219,24.9026693 27.3359375,24.7080078 C27.5644531,24.5133464 27.6787109,24.2467448 27.6787109,23.9082031 L27.6787109,17.7890625 C27.6787109,16.4518229 28.0214844,15.4361979 28.7070313,14.7421875 C29.3925781,14.0481771 30.2643229,13.7011719 31.3222656,13.7011719 C32.2109375,13.7011719 32.8964844,13.9698893 33.3789062,14.5073242 C33.8613281,15.0447591 34.1025391,15.8382161 34.1025391,16.8876953 L34.1025391,23.9208984 C34.1025391,24.2594401 34.2125651,24.5239258 34.4326172,24.7143555 C34.6526693,24.9047852 34.9192708,25 35.2324219,25 C35.5455729,25 35.8185221,24.9047852 36.0512695,24.7143555 C36.2840169,24.5239258 36.4003906,24.2594401 36.4003906,23.9208984 L36.4003906,16.9765625 C36.4003906,16.1471354 36.2797852,15.4150391 36.0385742,14.7802734 C35.7973633,14.1455078 35.4630534,13.6398112 35.0356445,13.2631836 C34.6082357,12.886556 34.1236979,12.6051432 33.5820312,12.4189453 C33.0403646,12.2327474 32.4479167,12.1396484 31.8046875,12.1396484 C30.6875,12.1396484 29.7395833,12.387207 28.9609375,12.8823242 C28.1822917,13.3774414 27.6152344,14.1158854 27.2597656,15.0976562 C26.9973958,14.1497396 26.4980469,13.4197591 25.7617188,12.9077148 C25.0253906,12.3956706 24.132487,12.1396484 23.0830078,12.1396484 C22.1604818,12.1396484 21.3395182,12.3491211 20.6201172,12.7680664 C19.9007161,13.1870117 19.3590495,13.7858073 18.9951172,14.5644531 L18.9951172,13.3837891 C18.9951172,13.070638 18.8787435,12.8251953 18.6459961,12.6474609 C18.4132487,12.4697266 18.1445312,12.3808594 17.8398438,12.3808594 C17.5351562,12.3808594 17.2664388,12.4760742 17.0336914,12.6665039 C16.800944,12.8569336 16.6845703,13.1171875 16.6845703,13.4472656 L16.6845703,23.9208984 C16.6845703,24.2594401 16.7945964,24.5239258 17.0146484,24.7143555 C17.2347005,24.9047852 17.5013021,25 17.8144531,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M45.3662109,23.6542969 C42.742513,23.6542969 41.4306641,21.9785156 41.4306641,18.6269531 C41.4306641,17.983724 41.5026042,17.3785807 41.6464844,16.8115234 C41.7903646,16.2444661 42.0083008,15.7239583 42.300293,15.25 C42.5922852,14.7760417 42.9900716,14.3994141 43.4936523,14.1201172 C43.9972331,13.8408203 44.5833333,13.7011719 45.2519531,13.7011719 C45.9121094,13.7011719 46.4897461,13.8492839 46.9848633,14.1455078 C47.4799805,14.4417318 47.8629557,14.8374023 48.1337891,15.3325195 C48.4046224,15.8276367 48.6056315,16.3544922 48.7368164,16.9130859 C48.8680013,17.4716797 48.9335938,18.0556641 48.9335938,18.6650391 C48.9335938,19.257487 48.874349,19.8266602 48.7558594,20.3725586 C48.6373698,20.918457 48.4511719,21.4453125 48.1972656,21.953125 C47.9433594,22.4609375 47.5730794,22.8693034 47.0864258,23.1782227 C46.5997721,23.4871419 46.0263672,23.6458333 45.3662109,23.6542969 Z M45.7724609,25.2285156 C46.6526693,25.2285156 47.4461263,25.0486654 48.152832,24.6889648 C48.8595378,24.3292643 49.4329427,23.8426107 49.8730469,23.2290039 C50.313151,22.6153971 50.6495768,21.9150391 50.8823242,21.1279297 C51.1150716,20.3408203 51.2314453,19.5029297 51.2314453,18.6142578 C51.2314453,17.7086589 51.1044922,16.8644206 50.8505859,16.081543 C50.5966797,15.2986654 50.2348633,14.6131185 49.7651367,14.0249023 C49.2954102,13.4366862 48.6987305,12.9754232 47.9750977,12.6411133 C47.2514648,12.3068034 46.4495443,12.1396484 45.5693359,12.1396484 C44.6298828,12.1396484 43.7983398,12.3427734 43.074707,12.7490234 C42.3510742,13.1552734 41.7988281,13.7434896 41.4179688,14.5136719 L41.4179688,7.92480469 C41.4179688,7.59472656 41.3037109,7.33447266 41.0751953,7.14404297 C40.8466797,6.95361328 40.5758464,6.85839844 40.2626953,6.85839844 C39.9580078,6.85839844 39.6914062,6.95361328 39.4628906,7.14404297 C39.234375,7.33447266 39.1201172,7.59472656 39.1201172,7.92480469 L39.1201172,23.8955078 C39.1201172,24.2340495 39.234375,24.4985352 39.4628906,24.6889648 C39.6914062,24.8793945 39.9580078,24.9746094 40.2626953,24.9746094 C40.6435547,24.9746094 40.9313151,24.8793945 41.1259766,24.6889648 C41.320638,24.4985352 41.4179688,24.2382813 41.4179688,23.9082031 L41.4179688,22.7783203 C41.8242187,23.5908203 42.4060872,24.2023112 43.1635742,24.612793 C43.9210612,25.0232747 44.7906901,25.2285156 45.7724609,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M54.7890625,17.7382812 C54.8313802,17.2135417 54.9435221,16.7163086 55.1254883,16.246582 C55.3074544,15.7768555 55.5528971,15.3494466 55.8618164,14.9643555 C56.1707357,14.5792643 56.5664062,14.2724609 57.0488281,14.0439453 C57.53125,13.8154297 58.0686849,13.7011719 58.6611328,13.7011719 C59.8037109,13.7011719 60.671224,14.0777995 61.2636719,14.8310547 C61.8561198,15.5843099 62.1904297,16.5533854 62.2666016,17.7382812 L54.7890625,17.7382812 Z M58.6992188,25.2285156 C60.5950521,25.2285156 62.2115885,24.6276042 63.5488281,23.4257812 C63.7858073,23.2141927 63.9042969,22.9729818 63.9042969,22.7021484 C63.9042969,22.4905599 63.8344727,22.3064779 63.6948242,22.1499023 C63.5551758,21.9933268 63.3837891,21.9150391 63.1806641,21.9150391 C63.0113932,21.9150391 62.8505859,21.9742839 62.6982422,22.0927734 C62.0888672,22.5582682 61.483724,22.9264323 60.8828125,23.1972656 C60.281901,23.468099 59.6048177,23.6035156 58.8515625,23.6035156 C57.6582031,23.5865885 56.6806641,23.2036133 55.9189453,22.4545898 C55.1572266,21.7055664 54.7679036,20.6031901 54.7509766,19.1474609 L63.4599609,19.1474609 C63.7307943,19.1474609 63.9339193,19.0670573 64.0693359,18.90625 C64.2047526,18.7454427 64.2724609,18.5423177 64.2724609,18.296875 C64.2470703,17.4420573 64.124349,16.6570638 63.9042969,15.9418945 C63.6842448,15.2267253 63.3541667,14.5792643 62.9140625,13.9995117 C62.4739583,13.4197591 61.8815104,12.9648438 61.1367188,12.6347656 C60.3919271,12.3046875 59.5244141,12.1396484 58.5341797,12.1396484 C57.2815755,12.1396484 56.1855469,12.4443359 55.2460938,13.0537109 C54.3066406,13.6630859 53.6041667,14.4544271 53.1386719,15.4277344 C52.6731771,16.4010417 52.4404297,17.4759115 52.4404297,18.6523437 C52.4404297,19.9895833 52.7176107,21.159668 53.2719727,22.1625977 C53.8263346,23.1655273 54.5732422,23.9251302 55.5126953,24.4414063 C56.4521484,24.9576823 57.5143229,25.2200521 58.6992188,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M71.4355469,23.6542969 C70.7753906,23.6458333 70.2019857,23.4871419 69.715332,23.1782227 C69.2286784,22.8693034 68.8583984,22.4609375 68.6044922,21.953125 C68.3505859,21.4453125 68.164388,20.918457 68.0458984,20.3725586 C67.9274089,19.8266602 67.8681641,19.257487 67.8681641,18.6650391 C67.8681641,18.0556641 67.9337565,17.4716797 68.0649414,16.9130859 C68.1961263,16.3544922 68.3971354,15.8276367 68.6679688,15.3325195 C68.9388021,14.8374023 69.3217773,14.4417318 69.8168945,14.1455078 C70.3120117,13.8492839 70.8896484,13.7011719 71.5498047,13.7011719 C72.2184245,13.7011719 72.8045247,13.8408203 73.3081055,14.1201172 C73.8116862,14.3994141 74.2094727,14.7760417 74.5014648,15.25 C74.793457,15.7239583 75.0113932,16.2444661 75.1552734,16.8115234 C75.2991536,17.3785807 75.3710938,17.983724 75.3710938,18.6269531 C75.3710938,21.9785156 74.0592448,23.6542969 71.4355469,23.6542969 Z M71.0292969,25.2285156 C72.0110677,25.2285156 72.8806966,25.0232747 73.6381836,24.612793 C74.3956706,24.2023112 74.9775391,23.5908203 75.3837891,22.7783203 L75.3837891,23.9082031 C75.3837891,24.2382813 75.4811198,24.4985352 75.6757812,24.6889648 C75.8704427,24.8793945 76.1582031,24.9746094 76.5390625,24.9746094 C76.84375,24.9746094 77.1103516,24.8793945 77.3388672,24.6889648 C77.5673828,24.4985352 77.6816406,24.2340495 77.6816406,23.8955078 L77.6816406,7.92480469 C77.6816406,7.59472656 77.5673828,7.33447266 77.3388672,7.14404297 C77.1103516,6.95361328 76.84375,6.85839844 76.5390625,6.85839844 C76.2259115,6.85839844 75.9550781,6.95361328 75.7265625,7.14404297 C75.4980469,7.33447266 75.3837891,7.59472656 75.3837891,7.92480469 L75.3837891,14.5136719 C75.0029297,13.7434896 74.4506836,13.1552734 73.7270508,12.7490234 C73.003418,12.3427734 72.171875,12.1396484 71.2324219,12.1396484 C70.3522135,12.1396484 69.550293,12.3068034 68.8266602,12.6411133 C68.1030273,12.9754232 67.5063477,13.4366862 67.0366211,14.0249023 C66.5668945,14.6131185 66.2050781,15.2986654 65.9511719,16.081543 C65.6972656,16.8644206 65.5703125,17.7086589 65.5703125,18.6142578 C65.5703125,19.5029297 65.6866862,20.3408203 65.9194336,21.1279297 C66.152181,21.9150391 66.4886068,22.6153971 66.9287109,23.2290039 C67.3688151,23.8426107 67.9422201,24.3292643 68.6489258,24.6889648 C69.3556315,25.0486654 70.1490885,25.2285156 71.0292969,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M85.5429688,23.6542969 C84.8828125,23.6458333 84.3094076,23.4871419 83.8227539,23.1782227 C83.3361003,22.8693034 82.9658203,22.4609375 82.7119141,21.953125 C82.4580078,21.4453125 82.2718099,20.918457 82.1533203,20.3725586 C82.0348307,19.8266602 81.9755859,19.257487 81.9755859,18.6650391 C81.9755859,18.0556641 82.0411784,17.4716797 82.1723633,16.9130859 C82.3035482,16.3544922 82.5045573,15.8276367 82.7753906,15.3325195 C83.046224,14.8374023 83.4291992,14.4417318 83.9243164,14.1455078 C84.4194336,13.8492839 84.9970703,13.7011719 85.6572266,13.7011719 C86.3258464,13.7011719 86.9119466,13.8408203 87.4155273,14.1201172 C87.9191081,14.3994141 88.3168945,14.7760417 88.6088867,15.25 C88.9008789,15.7239583 89.1188151,16.2444661 89.2626953,16.8115234 C89.4065755,17.3785807 89.4785156,17.983724 89.4785156,18.6269531 C89.4785156,21.9785156 88.1666667,23.6542969 85.5429688,23.6542969 Z M85.1367188,25.2285156 C86.1184896,25.2285156 86.9881185,25.0232747 87.7456055,24.612793 C88.5030924,24.2023112 89.0849609,23.5908203 89.4912109,22.7783203 L89.4912109,23.9082031 C89.4912109,24.2382813 89.5885417,24.4985352 89.7832031,24.6889648 C89.9778646,24.8793945 90.265625,24.9746094 90.6464844,24.9746094 C90.9511719,24.9746094 91.2177734,24.8793945 91.4462891,24.6889648 C91.6748047,24.4985352 91.7890625,24.2340495 91.7890625,23.8955078 L91.7890625,7.92480469 C91.7890625,7.59472656 91.6748047,7.33447266 91.4462891,7.14404297 C91.2177734,6.95361328 90.9511719,6.85839844 90.6464844,6.85839844 C90.3333333,6.85839844 90.0625,6.95361328 89.8339844,7.14404297 C89.6054688,7.33447266 89.4912109,7.59472656 89.4912109,7.92480469 L89.4912109,14.5136719 C89.1103516,13.7434896 88.5581055,13.1552734 87.8344727,12.7490234 C87.1108398,12.3427734 86.2792969,12.1396484 85.3398438,12.1396484 C84.4596354,12.1396484 83.6577148,12.3068034 82.934082,12.6411133 C82.2104492,12.9754232 81.6137695,13.4366862 81.144043,14.0249023 C80.6743164,14.6131185 80.3125,15.2986654 80.0585938,16.081543 C79.8046875,16.8644206 79.6777344,17.7086589 79.6777344,18.6142578 C79.6777344,19.5029297 79.7941081,20.3408203 80.0268555,21.1279297 C80.2596029,21.9150391 80.5960286,22.6153971 81.0361328,23.2290039 C81.476237,23.8426107 82.0496419,24.3292643 82.7563477,24.6889648 C83.4630534,25.0486654 84.2565104,25.2285156 85.1367188,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M95.9306641,9.44824219 C96.3792318,9.44824219 96.7389323,9.31917318 97.0097656,9.06103516 C97.280599,8.80289714 97.4160156,8.46647135 97.4160156,8.05175781 C97.4160156,7.62858073 97.280599,7.28792318 97.0097656,7.02978516 C96.7389323,6.77164714 96.3834635,6.64257812 95.9433594,6.64257812 C95.4947917,6.64257812 95.1329753,6.77376302 94.8579102,7.03613281 C94.5828451,7.2985026 94.4453125,7.63704427 94.4453125,8.05175781 C94.4453125,8.46647135 94.5807292,8.80289714 94.8515625,9.06103516 C95.1223958,9.31917318 95.4820964,9.44824219 95.9306641,9.44824219 Z M95.9179688,25 C96.2311198,25 96.504069,24.8963216 96.7368164,24.6889648 C96.9695638,24.4816081 97.0859375,24.2001953 97.0859375,23.8447266 L97.0859375,13.5234375 C97.0859375,13.1679688 96.9737956,12.8886719 96.7495117,12.6855469 C96.5252279,12.4824219 96.2607422,12.3808594 95.9560547,12.3808594 C95.6429036,12.3808594 95.3699544,12.4824219 95.137207,12.6855469 C94.9044596,12.8886719 94.7880859,13.1679688 94.7880859,13.5234375 L94.7880859,23.8447266 C94.7880859,24.2171224 94.898112,24.5027669 95.1181641,24.7016602 C95.3382161,24.9005534 95.6048177,25 95.9179688,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M101.011719,25 C101.32487,25 101.597819,24.9026693 101.830566,24.7080078 C102.063314,24.5133464 102.179688,24.2467448 102.179688,23.9082031 L102.179688,17.5732422 C102.238932,16.3798828 102.602865,15.4361979 103.271484,14.7421875 C103.940104,14.0481771 104.761068,13.7011719 105.734375,13.7011719 C106.648438,13.7011719 107.357259,13.9847005 107.86084,14.5517578 C108.364421,15.1188151 108.616211,15.9440104 108.616211,17.0273438 L108.616211,23.9082031 C108.616211,24.2552083 108.728353,24.5239258 108.952637,24.7143555 C109.176921,24.9047852 109.445638,25 109.758789,25 C110.07194,25 110.340658,24.9047852 110.564941,24.7143555 C110.789225,24.5239258 110.901367,24.2552083 110.901367,23.9082031 L110.901367,17.0527344 C110.901367,15.4023437 110.486654,14.1708984 109.657227,13.3583984 C108.827799,12.5458984 107.714844,12.1396484 106.318359,12.1396484 C105.345052,12.1396484 104.49235,12.3470052 103.760254,12.7617188 C103.028158,13.1764323 102.501302,13.7688802 102.179688,14.5390625 L102.179688,13.4345703 C102.179688,13.1044922 102.067546,12.8484701 101.843262,12.6665039 C101.618978,12.4845378 101.35026,12.3935547 101.037109,12.3935547 C100.723958,12.3935547 100.453125,12.4887695 100.224609,12.6791992 C99.9960938,12.8696289 99.8818359,13.1341146 99.8818359,13.4726563 L99.8818359,23.9082031 C99.8818359,24.2552083 99.991862,24.5239258 100.211914,24.7143555 C100.431966,24.9047852 100.698568,25 101.011719,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M118.915039,23.4765625 C117.797852,23.4765625 116.875326,23.0703125 116.147461,22.2578125 C115.419596,21.4453125 115.055664,20.2519531 115.055664,18.6777344 C115.055664,17.7382812 115.186849,16.9046224 115.449219,16.1767578 C115.711589,15.4488932 116.138997,14.8543294 116.731445,14.3930664 C117.323893,13.9318034 118.051758,13.7011719 118.915039,13.7011719 C119.456706,13.7011719 119.943359,13.7921549 120.375,13.9741211 C120.806641,14.1560872 121.159993,14.4015299 121.435059,14.7104492 C121.710124,15.0193685 121.938639,15.3875326 122.120605,15.8149414 C122.302572,16.2423503 122.431641,16.6866862 122.507812,17.1479492 C122.583984,17.6092122 122.62207,18.1022135 122.62207,18.6269531 C122.62207,20.1588542 122.275065,21.3500977 121.581055,22.2006836 C120.887044,23.0512695 119.998372,23.4765625 118.915039,23.4765625 Z M118.813477,30.3701172 C120.683919,30.3701172 122.158691,29.9130859 123.237793,28.9990234 C124.316895,28.0849609 124.856445,26.7265625 124.856445,24.9238281 L124.856445,13.4599609 C124.856445,13.1214193 124.750651,12.8569336 124.539062,12.6665039 C124.327474,12.4760742 124.073568,12.3808594 123.777344,12.3808594 C123.50651,12.3808594 123.267415,12.4633789 123.060059,12.628418 C122.852702,12.793457 122.736328,13.0198568 122.710938,13.3076172 L122.710938,14.5898437 C121.873047,12.9563802 120.468099,12.1396484 118.496094,12.1396484 C117.302734,12.1396484 116.265951,12.4401042 115.385742,13.0410156 C114.505534,13.6419271 113.847493,14.4353841 113.411621,15.4213867 C112.975749,16.4073893 112.757812,17.5182292 112.757812,18.7539063 C112.757812,19.625651 112.886882,20.438151 113.14502,21.1914062 C113.403158,21.9446615 113.771322,22.6048177 114.249512,23.171875 C114.727702,23.7389323 115.337077,24.1853841 116.077637,24.5112305 C116.818197,24.8370768 117.645508,25 118.55957,25 C119.532878,25 120.368652,24.7736003 121.066895,24.3208008 C121.765137,23.8680013 122.28776,23.2565104 122.634766,22.4863281 L122.634766,25 C122.634766,26.21875 122.300456,27.1518555 121.631836,27.7993164 C120.963216,28.4467773 119.972982,28.7705078 118.661133,28.7705078 C118.458008,28.7705078 118.256999,28.7620443 118.058105,28.7451172 C117.859212,28.7281901 117.681478,28.7091471 117.524902,28.6879883 C117.368327,28.6668294 117.194824,28.6350911 117.004395,28.5927734 C116.813965,28.5504557 116.661621,28.5166016 116.547363,28.4912109 C116.433105,28.4658203 116.284993,28.4235026 116.103027,28.3642578 C115.921061,28.305013 115.796224,28.2626953 115.728516,28.2373047 C115.660807,28.2119141 115.540202,28.1653646 115.366699,28.0976563 C115.193197,28.0299479 115.089518,27.991862 115.055664,27.9833984 C114.962565,27.9495443 114.865234,27.9326172 114.763672,27.9326172 C114.552083,27.9326172 114.372233,28.0109049 114.224121,28.1674805 C114.076009,28.324056 114.001953,28.508138 114.001953,28.7197266 C114.001953,29.0328776 114.16276,29.2783203 114.484375,29.4560547 C115.626953,30.0654297 117.069987,30.3701172 118.813477,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M141.518555,25.2792969 C142.288737,25.2792969 143.022949,25.1946615 143.721191,25.0253906 C144.419434,24.8561198 145.073242,24.59375 145.682617,24.2382813 C146.291992,23.8828125 146.818848,23.449056 147.263184,22.9370117 C147.70752,22.4249674 148.058757,21.8071289 148.316895,21.0834961 C148.575033,20.3598633 148.704102,19.5621745 148.704102,18.6904297 L148.704102,8.06445312 C148.704102,7.67513021 148.579264,7.37044271 148.32959,7.15039063 C148.079915,6.93033854 147.781576,6.8203125 147.43457,6.8203125 C147.079102,6.8203125 146.774414,6.93033854 146.520508,7.15039063 C146.266602,7.37044271 146.139648,7.67513021 146.139648,8.06445312 L146.139648,18.6269531 C146.139648,19.3886719 146.008464,20.069987 145.746094,20.6708984 C145.483724,21.2718099 145.132487,21.7542318 144.692383,22.1181641 C144.252279,22.4820964 143.761393,22.7571615 143.219727,22.9433594 C142.67806,23.1295573 142.111003,23.2226562 141.518555,23.2226562 C140.926107,23.2226562 140.361165,23.1295573 139.82373,22.9433594 C139.286296,22.7571615 138.799642,22.4820964 138.36377,22.1181641 C137.927897,21.7542318 137.578776,21.2718099 137.316406,20.6708984 C137.054036,20.069987 136.922852,19.3886719 136.922852,18.6269531 L136.922852,8.06445312 C136.922852,7.67513021 136.795898,7.37044271 136.541992,7.15039063 C136.288086,6.93033854 135.98763,6.8203125 135.640625,6.8203125 C135.285156,6.8203125 134.980469,6.93033854 134.726562,7.15039063 C134.472656,7.37044271 134.345703,7.67513021 134.345703,8.06445312 L134.345703,18.6904297 C134.345703,19.7652995 134.540365,20.7301432 134.929688,21.5849609 C135.31901,22.4397786 135.847982,23.1316732 136.516602,23.6606445 C137.185221,24.1896159 137.944824,24.5916341 138.79541,24.8666992 C139.645996,25.1417643 140.553711,25.2792969 141.518555,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M151.855469,23.8574219 C152.363281,24.2298177 153.04248,24.5535482 153.893066,24.8286133 C154.743652,25.1036784 155.621745,25.2412109 156.527344,25.2412109 C157.500651,25.2412109 158.355469,25.1079102 159.091797,24.8413086 C159.828125,24.574707 160.422689,24.1451823 160.875488,23.5527344 C161.328288,22.9602865 161.554688,22.2324219 161.554688,21.3691406 C161.554688,20.4550781 161.281738,19.7039388 160.73584,19.1157227 C160.189941,18.5275065 159.277995,18.0768229 158,17.7636719 L155.803711,17.2177734 C154.881185,16.9892578 154.280273,16.7713216 154.000977,16.5639648 C153.72168,16.3566081 153.582031,15.9990234 153.582031,15.4912109 C153.582031,14.8649089 153.852865,14.4057617 154.394531,14.1137695 C154.936198,13.8217773 155.664062,13.6757813 156.578125,13.6757813 C156.865885,13.6757813 157.149414,13.6948242 157.428711,13.7329102 C157.708008,13.7709961 157.983073,13.8260091 158.253906,13.8979492 C158.52474,13.9698893 158.738444,14.03125 158.89502,14.0820313 C159.051595,14.1328125 159.25472,14.2068685 159.504395,14.3041992 C159.754069,14.4015299 159.891602,14.4544271 159.916992,14.4628906 C160.052409,14.5136719 160.183594,14.5390625 160.310547,14.5390625 C160.530599,14.5390625 160.706217,14.4692383 160.837402,14.3295898 C160.968587,14.1899414 161.03418,14.0227865 161.03418,13.828125 C161.03418,13.4980469 160.869141,13.2483724 160.539062,13.0791016 C160.082031,12.8336589 159.491699,12.6114909 158.768066,12.4125977 C158.044434,12.2137044 157.272135,12.1142578 156.451172,12.1142578 C155.765625,12.1142578 155.130859,12.1798503 154.546875,12.3110352 C153.962891,12.4422201 153.436035,12.6411133 152.966309,12.9077148 C152.496582,13.1743164 152.126302,13.5361328 151.855469,13.9931641 C151.584635,14.4501953 151.449219,14.9791667 151.449219,15.5800781 C151.449219,15.9609375 151.487305,16.2994792 151.563477,16.5957031 C151.639648,16.8919271 151.76237,17.1500651 151.931641,17.3701172 C152.100911,17.5901693 152.282878,17.7784831 152.477539,17.9350586 C152.672201,18.0916341 152.932454,18.2376302 153.258301,18.3730469 C153.584147,18.5084635 153.888835,18.6184896 154.172363,18.703125 C154.455892,18.7877604 154.826172,18.8893229 155.283203,19.0078125 L157.530273,19.5664062 C158.275065,19.7526042 158.808268,19.9980469 159.129883,20.3027344 C159.451497,20.6074219 159.612305,21.0136719 159.612305,21.5214844 C159.612305,22.2324219 159.32666,22.7719727 158.755371,23.1401367 C158.184082,23.5083008 157.432943,23.6923828 156.501953,23.6923828 C155.207031,23.6839193 153.979818,23.319987 152.820312,22.6005859 C152.634115,22.4820964 152.439453,22.4228516 152.236328,22.4228516 C152.016276,22.4228516 151.83431,22.4969076 151.69043,22.6450195 C151.546549,22.7931315 151.474609,22.96875 151.474609,23.171875 C151.474609,23.4511719 151.601562,23.6796875 151.855469,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M167.37207,23.7050781 C166.584961,23.7050781 165.962891,23.5252279 165.505859,23.1655273 C165.048828,22.8058268 164.820312,22.2408854 164.820312,21.4707031 C164.820312,21.047526 164.881673,20.7005208 165.004395,20.4296875 C165.127116,20.1588542 165.31543,19.9282227 165.569336,19.737793 C165.823242,19.5473633 166.204102,19.4034831 166.711914,19.3061523 C167.219727,19.2088216 167.803711,19.1411133 168.463867,19.1030273 C169.124023,19.0649414 169.978841,19.0458984 171.02832,19.0458984 L171.02832,19.4267578 C171.02832,20.6708984 170.662272,21.694987 169.930176,22.4990234 C169.198079,23.3030599 168.345378,23.7050781 167.37207,23.7050781 Z M167.130859,25.2285156 C168.950521,25.2285156 170.253906,24.4033203 171.041016,22.7529297 L171.041016,23.9716797 C171.041016,24.3017578 171.148926,24.5577799 171.364746,24.7397461 C171.580566,24.9217122 171.84082,25.0126953 172.145508,25.0126953 C172.450195,25.0126953 172.718913,24.9174805 172.95166,24.7270508 C173.184408,24.5366211 173.300781,24.2763672 173.300781,23.9462891 L173.300781,16.5449219 C173.300781,15.046875 172.862793,13.938151 171.986816,13.21875 C171.11084,12.499349 169.898438,12.1396484 168.349609,12.1396484 C166.538411,12.1396484 164.976888,12.5078125 163.665039,13.2441406 C163.42806,13.3795573 163.30957,13.5742187 163.30957,13.828125 C163.30957,14.0481771 163.389974,14.2491862 163.550781,14.4311523 C163.711589,14.6131185 163.902018,14.7041016 164.12207,14.7041016 C164.24056,14.7041016 164.342122,14.6829427 164.426758,14.640625 C164.858398,14.4459635 165.222331,14.2936198 165.518555,14.1835937 C165.814779,14.0735677 166.221029,13.9635417 166.737305,13.8535156 C167.253581,13.7434896 167.769857,13.6884766 168.286133,13.6884766 C169.149414,13.6884766 169.822266,13.9042969 170.304688,14.3359375 C170.787109,14.7675781 171.02832,15.4361979 171.02832,16.3417969 L171.02832,17.7255859 C170.088867,17.7255859 169.286947,17.7382812 168.622559,17.7636719 C167.958171,17.7890625 167.312826,17.8334961 166.686523,17.8969727 C166.060221,17.9604492 165.543945,18.0535482 165.137695,18.1762695 C164.731445,18.2989909 164.359049,18.4555664 164.020508,18.6459961 C163.681966,18.8364258 163.419596,19.0691732 163.233398,19.3442383 C163.047201,19.6193034 162.90332,19.9388021 162.801758,20.3027344 C162.700195,20.6666667 162.649414,21.0898438 162.649414,21.5722656 C162.649414,22.7402344 163.064128,23.6416016 163.893555,24.2763672 C164.722982,24.9111328 165.802083,25.2285156 167.130859,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M181.314453,23.4765625 C180.197266,23.4765625 179.27474,23.0703125 178.546875,22.2578125 C177.81901,21.4453125 177.455078,20.2519531 177.455078,18.6777344 C177.455078,17.7382812 177.586263,16.9046224 177.848633,16.1767578 C178.111003,15.4488932 178.538411,14.8543294 179.130859,14.3930664 C179.723307,13.9318034 180.451172,13.7011719 181.314453,13.7011719 C181.85612,13.7011719 182.342773,13.7921549 182.774414,13.9741211 C183.206055,14.1560872 183.559408,14.4015299 183.834473,14.7104492 C184.109538,15.0193685 184.338053,15.3875326 184.52002,15.8149414 C184.701986,16.2423503 184.831055,16.6866862 184.907227,17.1479492 C184.983398,17.6092122 185.021484,18.1022135 185.021484,18.6269531 C185.021484,20.1588542 184.674479,21.3500977 183.980469,22.2006836 C183.286458,23.0512695 182.397786,23.4765625 181.314453,23.4765625 Z M181.212891,30.3701172 C183.083333,30.3701172 184.558105,29.9130859 185.637207,28.9990234 C186.716309,28.0849609 187.255859,26.7265625 187.255859,24.9238281 L187.255859,13.4599609 C187.255859,13.1214193 187.150065,12.8569336 186.938477,12.6665039 C186.726888,12.4760742 186.472982,12.3808594 186.176758,12.3808594 C185.905924,12.3808594 185.666829,12.4633789 185.459473,12.628418 C185.252116,12.793457 185.135742,13.0198568 185.110352,13.3076172 L185.110352,14.5898437 C184.272461,12.9563802 182.867513,12.1396484 180.895508,12.1396484 C179.702148,12.1396484 178.665365,12.4401042 177.785156,13.0410156 C176.904948,13.6419271 176.246908,14.4353841 175.811035,15.4213867 C175.375163,16.4073893 175.157227,17.5182292 175.157227,18.7539063 C175.157227,19.625651 175.286296,20.438151 175.544434,21.1914062 C175.802572,21.9446615 176.170736,22.6048177 176.648926,23.171875 C177.127116,23.7389323 177.736491,24.1853841 178.477051,24.5112305 C179.217611,24.8370768 180.044922,25 180.958984,25 C181.932292,25 182.768066,24.7736003 183.466309,24.3208008 C184.164551,23.8680013 184.687174,23.2565104 185.03418,22.4863281 L185.03418,25 C185.03418,26.21875 184.69987,27.1518555 184.03125,27.7993164 C183.36263,28.4467773 182.372396,28.7705078 181.060547,28.7705078 C180.857422,28.7705078 180.656413,28.7620443 180.45752,28.7451172 C180.258626,28.7281901 180.080892,28.7091471 179.924316,28.6879883 C179.767741,28.6668294 179.594238,28.6350911 179.403809,28.5927734 C179.213379,28.5504557 179.061035,28.5166016 178.946777,28.4912109 C178.83252,28.4658203 178.684408,28.4235026 178.502441,28.3642578 C178.320475,28.305013 178.195638,28.2626953 178.12793,28.2373047 C178.060221,28.2119141 177.939616,28.1653646 177.766113,28.0976563 C177.592611,28.0299479 177.488932,27.991862 177.455078,27.9833984 C177.361979,27.9495443 177.264648,27.9326172 177.163086,27.9326172 C176.951497,27.9326172 176.771647,28.0109049 176.623535,28.1674805 C176.475423,28.324056 176.401367,28.508138 176.401367,28.7197266 C176.401367,29.0328776 176.562174,29.2783203 176.883789,29.4560547 C178.026367,30.0654297 179.469401,30.3701172 181.212891,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M191.537109,17.7382812 C191.579427,17.2135417 191.691569,16.7163086 191.873535,16.246582 C192.055501,15.7768555 192.300944,15.3494466 192.609863,14.9643555 C192.918783,14.5792643 193.314453,14.2724609 193.796875,14.0439453 C194.279297,13.8154297 194.816732,13.7011719 195.40918,13.7011719 C196.551758,13.7011719 197.419271,14.0777995 198.011719,14.8310547 C198.604167,15.5843099 198.938477,16.5533854 199.014648,17.7382812 L191.537109,17.7382812 Z M195.447266,25.2285156 C197.343099,25.2285156 198.959635,24.6276042 200.296875,23.4257812 C200.533854,23.2141927 200.652344,22.9729818 200.652344,22.7021484 C200.652344,22.4905599 200.58252,22.3064779 200.442871,22.1499023 C200.303223,21.9933268 200.131836,21.9150391 199.928711,21.9150391 C199.75944,21.9150391 199.598633,21.9742839 199.446289,22.0927734 C198.836914,22.5582682 198.231771,22.9264323 197.630859,23.1972656 C197.029948,23.468099 196.352865,23.6035156 195.599609,23.6035156 C194.40625,23.5865885 193.428711,23.2036133 192.666992,22.4545898 C191.905273,21.7055664 191.515951,20.6031901 191.499023,19.1474609 L200.208008,19.1474609 C200.478841,19.1474609 200.681966,19.0670573 200.817383,18.90625 C200.952799,18.7454427 201.020508,18.5423177 201.020508,18.296875 C200.995117,17.4420573 200.872396,16.6570638 200.652344,15.9418945 C200.432292,15.2267253 200.102214,14.5792643 199.662109,13.9995117 C199.222005,13.4197591 198.629557,12.9648438 197.884766,12.6347656 C197.139974,12.3046875 196.272461,12.1396484 195.282227,12.1396484 C194.029622,12.1396484 192.933594,12.4443359 191.994141,13.0537109 C191.054688,13.6630859 190.352214,14.4544271 189.886719,15.4277344 C189.421224,16.4010417 189.188477,17.4759115 189.188477,18.6523437 C189.188477,19.9895833 189.465658,21.159668 190.02002,22.1625977 C190.574382,23.1655273 191.321289,23.9251302 192.260742,24.4414063 C193.200195,24.9576823 194.26237,25.2200521 195.447266,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M203.092773,23.8574219 C203.600586,24.2298177 204.279785,24.5535482 205.130371,24.8286133 C205.980957,25.1036784 206.859049,25.2412109 207.764648,25.2412109 C208.737956,25.2412109 209.592773,25.1079102 210.329102,24.8413086 C211.06543,24.574707 211.659993,24.1451823 212.112793,23.5527344 C212.565592,22.9602865 212.791992,22.2324219 212.791992,21.3691406 C212.791992,20.4550781 212.519043,19.7039388 211.973145,19.1157227 C211.427246,18.5275065 210.515299,18.0768229 209.237305,17.7636719 L207.041016,17.2177734 C206.11849,16.9892578 205.517578,16.7713216 205.238281,16.5639648 C204.958984,16.3566081 204.819336,15.9990234 204.819336,15.4912109 C204.819336,14.8649089 205.090169,14.4057617 205.631836,14.1137695 C206.173503,13.8217773 206.901367,13.6757813 207.81543,13.6757813 C208.10319,13.6757813 208.386719,13.6948242 208.666016,13.7329102 C208.945312,13.7709961 209.220378,13.8260091 209.491211,13.8979492 C209.762044,13.9698893 209.975749,14.03125 210.132324,14.0820313 C210.2889,14.1328125 210.492025,14.2068685 210.741699,14.3041992 C210.991374,14.4015299 211.128906,14.4544271 211.154297,14.4628906 C211.289714,14.5136719 211.420898,14.5390625 211.547852,14.5390625 C211.767904,14.5390625 211.943522,14.4692383 212.074707,14.3295898 C212.205892,14.1899414 212.271484,14.0227865 212.271484,13.828125 C212.271484,13.4980469 212.106445,13.2483724 211.776367,13.0791016 C211.319336,12.8336589 210.729004,12.6114909 210.005371,12.4125977 C209.281738,12.2137044 208.50944,12.1142578 207.688477,12.1142578 C207.00293,12.1142578 206.368164,12.1798503 205.78418,12.3110352 C205.200195,12.4422201 204.67334,12.6411133 204.203613,12.9077148 C203.733887,13.1743164 203.363607,13.5361328 203.092773,13.9931641 C202.82194,14.4501953 202.686523,14.9791667 202.686523,15.5800781 C202.686523,15.9609375 202.724609,16.2994792 202.800781,16.5957031 C202.876953,16.8919271 202.999674,17.1500651 203.168945,17.3701172 C203.338216,17.5901693 203.520182,17.7784831 203.714844,17.9350586 C203.909505,18.0916341 204.169759,18.2376302 204.495605,18.3730469 C204.821452,18.5084635 205.126139,18.6184896 205.409668,18.703125 C205.693197,18.7877604 206.063477,18.8893229 206.520508,19.0078125 L208.767578,19.5664062 C209.51237,19.7526042 210.045573,19.9980469 210.367188,20.3027344 C210.688802,20.6074219 210.849609,21.0136719 210.849609,21.5214844 C210.849609,22.2324219 210.563965,22.7719727 209.992676,23.1401367 C209.421387,23.5083008 208.670247,23.6923828 207.739258,23.6923828 C206.444336,23.6839193 205.217122,23.319987 204.057617,22.6005859 C203.871419,22.4820964 203.676758,22.4228516 203.473633,22.4228516 C203.253581,22.4228516 203.071615,22.4969076 202.927734,22.6450195 C202.783854,22.7931315 202.711914,22.96875 202.711914,23.171875 C202.711914,23.4511719 202.838867,23.6796875 203.092773,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-10" transform="translate(0, 92)" xlink:href="#path-24" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M7.45214844,25 C7.79915365,25 8.09960938,24.889974 8.35351563,24.6699219 C8.60742188,24.4498698 8.734375,24.1494141 8.734375,23.7685547 L8.734375,8.90234375 L13.4570313,8.90234375 C13.7871094,8.90234375 14.0452474,8.80078125 14.2314453,8.59765625 C14.4176432,8.39453125 14.5107422,8.15332031 14.5107422,7.87402344 C14.5107422,7.59472656 14.4197591,7.35563151 14.237793,7.15673828 C14.0558268,6.95784505 13.7955729,6.85839844 13.4570313,6.85839844 L1.38378906,6.85839844 C1.05371094,6.85839844 0.795572917,6.95996094 0.609375,7.16308594 C0.423177083,7.36621094 0.330078125,7.6031901 0.330078125,7.87402344 C0.330078125,8.15332031 0.423177083,8.39453125 0.609375,8.59765625 C0.795572917,8.80078125 1.05371094,8.90234375 1.38378906,8.90234375 L6.16992188,8.90234375 L6.16992188,23.7685547 C6.16992188,24.1494141 6.296875,24.4498698 6.55078125,24.6699219 C6.8046875,24.889974 7.10514323,25 7.45214844,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M21.2675781,25.2285156 C23.2226562,25.2285156 24.7714844,24.6149089 25.9140625,23.3876953 C27.0566406,22.1604818 27.6279297,20.5947266 27.6279297,18.6904297 C27.6279297,16.7692057 27.0545247,15.1971029 25.9077148,13.9741211 C24.7609049,12.7511393 23.2141927,12.1396484 21.2675781,12.1396484 C19.3209635,12.1396484 17.7742513,12.7532552 16.6274414,13.9804688 C15.4806315,15.2076823 14.9072266,16.7776693 14.9072266,18.6904297 C14.9072266,20.5947266 15.4806315,22.1604818 16.6274414,23.3876953 C17.7742513,24.6149089 19.3209635,25.2285156 21.2675781,25.2285156 Z M21.2421875,23.6542969 C19.9726562,23.6542969 18.980306,23.2078451 18.2651367,22.3149414 C17.5499674,21.4220378 17.1923828,20.2138672 17.1923828,18.6904297 C17.1923828,17.1500651 17.5520833,15.933431 18.2714844,15.0405273 C18.9908854,14.1476237 19.9895833,13.7011719 21.2675781,13.7011719 C22.5371094,13.7011719 23.5336914,14.1497396 24.2573242,15.046875 C24.980957,15.9440104 25.3427734,17.1585286 25.3427734,18.6904297 C25.3427734,20.2307943 24.9851888,21.4431966 24.2700195,22.3276367 C23.5548503,23.2120768 22.5455729,23.6542969 21.2421875,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M30.6015625,25 C30.9147135,25 31.1876628,24.8984375 31.4204102,24.6953125 C31.6531576,24.4921875 31.7695313,24.2086589 31.7695313,23.8447266 L31.7695313,18.3095703 L37.7363281,24.6572266 C37.9479167,24.8942057 38.2018229,25.0126953 38.4980469,25.0126953 C38.7773438,25.0126953 39.0249023,24.906901 39.2407227,24.6953125 C39.456543,24.483724 39.5644531,24.242513 39.5644531,23.9716797 C39.5644531,23.726237 39.4713542,23.5061849 39.2851562,23.3115234 L34.2451172,18.0048828 L38.8535156,13.8789062 C39.0566406,13.6842448 39.1582031,13.4684245 39.1582031,13.2314453 C39.1582031,12.9690755 39.0545247,12.7320964 38.847168,12.5205078 C38.6398112,12.3089193 38.4007161,12.203125 38.1298828,12.203125 C37.9182943,12.203125 37.719401,12.2835286 37.5332031,12.4443359 L31.7695313,17.7255859 L31.7695313,8.01367188 C31.7695313,7.65820312 31.6573893,7.37890625 31.4331055,7.17578125 C31.2088216,6.97265625 30.9443359,6.87109375 30.6396484,6.87109375 C30.3264974,6.87109375 30.0535482,6.97265625 29.8208008,7.17578125 C29.5880534,7.37890625 29.4716797,7.65820312 29.4716797,8.01367188 L29.4716797,23.8447266 C29.4716797,24.2171224 29.5817057,24.5027669 29.8017578,24.7016602 C30.0218099,24.9005534 30.2884115,25 30.6015625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M42.7158203,17.7382812 C42.758138,17.2135417 42.8702799,16.7163086 43.0522461,16.246582 C43.2342122,15.7768555 43.4796549,15.3494466 43.7885742,14.9643555 C44.0974935,14.5792643 44.4931641,14.2724609 44.9755859,14.0439453 C45.4580078,13.8154297 45.9954427,13.7011719 46.5878906,13.7011719 C47.7304688,13.7011719 48.5979818,14.0777995 49.1904297,14.8310547 C49.7828776,15.5843099 50.1171875,16.5533854 50.1933594,17.7382812 L42.7158203,17.7382812 Z M46.6259766,25.2285156 C48.5218099,25.2285156 50.1383464,24.6276042 51.4755859,23.4257812 C51.7125651,23.2141927 51.8310547,22.9729818 51.8310547,22.7021484 C51.8310547,22.4905599 51.7612305,22.3064779 51.621582,22.1499023 C51.4819336,21.9933268 51.3105469,21.9150391 51.1074219,21.9150391 C50.938151,21.9150391 50.7773438,21.9742839 50.625,22.0927734 C50.015625,22.5582682 49.4104818,22.9264323 48.8095703,23.1972656 C48.2086589,23.468099 47.5315755,23.6035156 46.7783203,23.6035156 C45.5849609,23.5865885 44.6074219,23.2036133 43.8457031,22.4545898 C43.0839844,21.7055664 42.6946615,20.6031901 42.6777344,19.1474609 L51.3867188,19.1474609 C51.6575521,19.1474609 51.8606771,19.0670573 51.9960938,18.90625 C52.1315104,18.7454427 52.1992188,18.5423177 52.1992188,18.296875 C52.1738281,17.4420573 52.0511068,16.6570638 51.8310547,15.9418945 C51.6110026,15.2267253 51.2809245,14.5792643 50.8408203,13.9995117 C50.4007161,13.4197591 49.8082682,12.9648438 49.0634766,12.6347656 C48.3186849,12.3046875 47.4511719,12.1396484 46.4609375,12.1396484 C45.2083333,12.1396484 44.1123047,12.4443359 43.1728516,13.0537109 C42.2333984,13.6630859 41.5309245,14.4544271 41.0654297,15.4277344 C40.5999349,16.4010417 40.3671875,17.4759115 40.3671875,18.6523437 C40.3671875,19.9895833 40.6443685,21.159668 41.1987305,22.1625977 C41.7530924,23.1655273 42.5,23.9251302 43.4394531,24.4414063 C44.3789062,24.9576823 45.4410807,25.2200521 46.6259766,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.4902344,25 C55.8033854,25 56.0763346,24.9026693 56.309082,24.7080078 C56.5418294,24.5133464 56.6582031,24.2467448 56.6582031,23.9082031 L56.6582031,17.5732422 C56.7174479,16.3798828 57.0813802,15.4361979 57.75,14.7421875 C58.4186198,14.0481771 59.2395833,13.7011719 60.2128906,13.7011719 C61.1269531,13.7011719 61.8357747,13.9847005 62.3393555,14.5517578 C62.8429362,15.1188151 63.0947266,15.9440104 63.0947266,17.0273438 L63.0947266,23.9082031 C63.0947266,24.2552083 63.2068685,24.5239258 63.4311523,24.7143555 C63.6554362,24.9047852 63.9241536,25 64.2373047,25 C64.5504557,25 64.8191732,24.9047852 65.043457,24.7143555 C65.2677409,24.5239258 65.3798828,24.2552083 65.3798828,23.9082031 L65.3798828,17.0527344 C65.3798828,15.4023437 64.9651693,14.1708984 64.1357422,13.3583984 C63.3063151,12.5458984 62.1933594,12.1396484 60.796875,12.1396484 C59.8235677,12.1396484 58.9708659,12.3470052 58.2387695,12.7617188 C57.5066732,13.1764323 56.9798177,13.7688802 56.6582031,14.5390625 L56.6582031,13.4345703 C56.6582031,13.1044922 56.5460612,12.8484701 56.3217773,12.6665039 C56.0974935,12.4845378 55.828776,12.3935547 55.515625,12.3935547 C55.202474,12.3935547 54.9316406,12.4887695 54.703125,12.6791992 C54.4746094,12.8696289 54.3603516,13.1341146 54.3603516,13.4726563 L54.3603516,23.9082031 C54.3603516,24.2552083 54.4703776,24.5239258 54.6904297,24.7143555 C54.9104818,24.9047852 55.1770833,25 55.4902344,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M82.4990234,25.2792969 C84.4202474,25.2792969 86.0875651,24.8180339 87.5009766,23.8955078 C87.9241536,23.6246745 88.1357422,23.2945964 88.1357422,22.9052734 C88.1357422,22.6429036 88.0426432,22.414388 87.8564453,22.2197266 C87.6702474,22.0250651 87.4459635,21.9277344 87.1835938,21.9277344 C87.03125,21.9277344 86.8704427,21.9742839 86.7011719,22.0673828 C86.0410156,22.4397786 85.4274089,22.7254232 84.8603516,22.9243164 C84.2932943,23.1232096 83.633138,23.2226562 82.8798828,23.2226562 C82.0419922,23.2226562 81.2823893,23.0957031 80.6010742,22.8417969 C79.9197591,22.5878906 79.3484701,22.2408854 78.887207,21.8007812 C78.425944,21.3606771 78.038737,20.8338216 77.7255859,20.2202148 C77.4124349,19.6066081 77.1839193,18.9443359 77.0400391,18.2333984 C76.8961589,17.5224609 76.8242188,16.764974 76.8242188,15.9609375 C76.8242188,15.156901 76.8961589,14.3972982 77.0400391,13.6821289 C77.1839193,12.9669596 77.4124349,12.3004557 77.7255859,11.6826172 C78.038737,11.0647786 78.4238281,10.5315755 78.8808594,10.0830078 C79.3378906,9.6344401 79.9007161,9.28320313 80.5693359,9.02929688 C81.2379557,8.77539063 81.9827474,8.6484375 82.8037109,8.6484375 C83.1422526,8.6484375 83.4702148,8.66536458 83.7875977,8.69921875 C84.1049805,8.73307292 84.3694661,8.77327474 84.5810547,8.81982422 C84.7926432,8.8663737 85.0126953,8.92985026 85.2412109,9.01025391 C85.4697266,9.09065755 85.6389974,9.15413411 85.7490234,9.20068359 C85.8590495,9.24723307 86.0008138,9.31494141 86.1743164,9.40380859 C86.347819,9.49267578 86.4514974,9.54557292 86.4853516,9.5625 C86.6376953,9.63020833 86.7773438,9.6640625 86.9042969,9.6640625 C87.1666667,9.6640625 87.3909505,9.5625 87.5771484,9.359375 C87.7633464,9.15625 87.8564453,8.91927083 87.8564453,8.6484375 C87.8564453,8.26757812 87.6702474,7.97558594 87.2978516,7.77246094 C85.8844401,6.98535156 84.3567708,6.59179688 82.7148438,6.59179688 C81.3860677,6.59179688 80.1757812,6.83723958 79.0839844,7.328125 C77.9921875,7.81901042 77.0865885,8.48974609 76.3671875,9.34033203 C75.6477865,10.190918 75.0913086,11.1875 74.6977539,12.3300781 C74.3041992,13.4726562 74.1074219,14.695638 74.1074219,15.9990234 C74.1074219,17.7509766 74.4290365,19.3209635 75.0722656,20.7089844 C75.7154948,22.0970052 76.6803385,23.2057292 77.9667969,24.0351562 C79.2532552,24.8645833 80.7639974,25.2792969 82.4990234,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M91.9599609,25 C92.273112,25 92.5460612,24.8963216 92.7788086,24.6889648 C93.011556,24.4816081 93.1279297,24.2001953 93.1279297,23.8447266 L93.1279297,8.01367188 C93.1279297,7.65820312 93.0157878,7.37890625 92.7915039,7.17578125 C92.5672201,6.97265625 92.3027344,6.87109375 91.9980469,6.87109375 C91.6848958,6.87109375 91.4119466,6.97265625 91.1791992,7.17578125 C90.9464518,7.37890625 90.8300781,7.65820312 90.8300781,8.01367188 L90.8300781,23.8447266 C90.8300781,24.2171224 90.9401042,24.5027669 91.1601562,24.7016602 C91.3802083,24.9005534 91.6468099,25 91.9599609,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M99.6435547,23.7050781 C98.8564453,23.7050781 98.234375,23.5252279 97.7773438,23.1655273 C97.3203125,22.8058268 97.0917969,22.2408854 97.0917969,21.4707031 C97.0917969,21.047526 97.1531576,20.7005208 97.2758789,20.4296875 C97.3986003,20.1588542 97.5869141,19.9282227 97.8408203,19.737793 C98.0947266,19.5473633 98.4755859,19.4034831 98.9833984,19.3061523 C99.4912109,19.2088216 100.075195,19.1411133 100.735352,19.1030273 C101.395508,19.0649414 102.250326,19.0458984 103.299805,19.0458984 L103.299805,19.4267578 C103.299805,20.6708984 102.933757,21.694987 102.20166,22.4990234 C101.469564,23.3030599 100.616862,23.7050781 99.6435547,23.7050781 Z M99.4023438,25.2285156 C101.222005,25.2285156 102.525391,24.4033203 103.3125,22.7529297 L103.3125,23.9716797 C103.3125,24.3017578 103.42041,24.5577799 103.63623,24.7397461 C103.852051,24.9217122 104.112305,25.0126953 104.416992,25.0126953 C104.72168,25.0126953 104.990397,24.9174805 105.223145,24.7270508 C105.455892,24.5366211 105.572266,24.2763672 105.572266,23.9462891 L105.572266,16.5449219 C105.572266,15.046875 105.134277,13.938151 104.258301,13.21875 C103.382324,12.499349 102.169922,12.1396484 100.621094,12.1396484 C98.8098958,12.1396484 97.2483724,12.5078125 95.9365234,13.2441406 C95.6995443,13.3795573 95.5810547,13.5742187 95.5810547,13.828125 C95.5810547,14.0481771 95.6614583,14.2491862 95.8222656,14.4311523 C95.9830729,14.6131185 96.1735026,14.7041016 96.3935547,14.7041016 C96.5120443,14.7041016 96.6136068,14.6829427 96.6982422,14.640625 C97.1298828,14.4459635 97.4938151,14.2936198 97.7900391,14.1835937 C98.086263,14.0735677 98.492513,13.9635417 99.0087891,13.8535156 C99.5250651,13.7434896 100.041341,13.6884766 100.557617,13.6884766 C101.420898,13.6884766 102.09375,13.9042969 102.576172,14.3359375 C103.058594,14.7675781 103.299805,15.4361979 103.299805,16.3417969 L103.299805,17.7255859 C102.360352,17.7255859 101.558431,17.7382812 100.894043,17.7636719 C100.229655,17.7890625 99.5843099,17.8334961 98.9580078,17.8969727 C98.3317057,17.9604492 97.8154297,18.0535482 97.4091797,18.1762695 C97.0029297,18.2989909 96.6305339,18.4555664 96.2919922,18.6459961 C95.9534505,18.8364258 95.6910807,19.0691732 95.5048828,19.3442383 C95.3186849,19.6193034 95.1748047,19.9388021 95.0732422,20.3027344 C94.9716797,20.6666667 94.9208984,21.0898438 94.9208984,21.5722656 C94.9208984,22.7402344 95.335612,23.6416016 96.1650391,24.2763672 C96.9944661,24.9111328 98.0735677,25.2285156 99.4023438,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M108.203125,23.8574219 C108.710938,24.2298177 109.390137,24.5535482 110.240723,24.8286133 C111.091309,25.1036784 111.969401,25.2412109 112.875,25.2412109 C113.848307,25.2412109 114.703125,25.1079102 115.439453,24.8413086 C116.175781,24.574707 116.770345,24.1451823 117.223145,23.5527344 C117.675944,22.9602865 117.902344,22.2324219 117.902344,21.3691406 C117.902344,20.4550781 117.629395,19.7039388 117.083496,19.1157227 C116.537598,18.5275065 115.625651,18.0768229 114.347656,17.7636719 L112.151367,17.2177734 C111.228841,16.9892578 110.62793,16.7713216 110.348633,16.5639648 C110.069336,16.3566081 109.929688,15.9990234 109.929688,15.4912109 C109.929688,14.8649089 110.200521,14.4057617 110.742188,14.1137695 C111.283854,13.8217773 112.011719,13.6757813 112.925781,13.6757813 C113.213542,13.6757813 113.49707,13.6948242 113.776367,13.7329102 C114.055664,13.7709961 114.330729,13.8260091 114.601562,13.8979492 C114.872396,13.9698893 115.0861,14.03125 115.242676,14.0820313 C115.399251,14.1328125 115.602376,14.2068685 115.852051,14.3041992 C116.101725,14.4015299 116.239258,14.4544271 116.264648,14.4628906 C116.400065,14.5136719 116.53125,14.5390625 116.658203,14.5390625 C116.878255,14.5390625 117.053874,14.4692383 117.185059,14.3295898 C117.316243,14.1899414 117.381836,14.0227865 117.381836,13.828125 C117.381836,13.4980469 117.216797,13.2483724 116.886719,13.0791016 C116.429688,12.8336589 115.839355,12.6114909 115.115723,12.4125977 C114.39209,12.2137044 113.619792,12.1142578 112.798828,12.1142578 C112.113281,12.1142578 111.478516,12.1798503 110.894531,12.3110352 C110.310547,12.4422201 109.783691,12.6411133 109.313965,12.9077148 C108.844238,13.1743164 108.473958,13.5361328 108.203125,13.9931641 C107.932292,14.4501953 107.796875,14.9791667 107.796875,15.5800781 C107.796875,15.9609375 107.834961,16.2994792 107.911133,16.5957031 C107.987305,16.8919271 108.110026,17.1500651 108.279297,17.3701172 C108.448568,17.5901693 108.630534,17.7784831 108.825195,17.9350586 C109.019857,18.0916341 109.280111,18.2376302 109.605957,18.3730469 C109.931803,18.5084635 110.236491,18.6184896 110.52002,18.703125 C110.803548,18.7877604 111.173828,18.8893229 111.630859,19.0078125 L113.87793,19.5664062 C114.622721,19.7526042 115.155924,19.9980469 115.477539,20.3027344 C115.799154,20.6074219 115.959961,21.0136719 115.959961,21.5214844 C115.959961,22.2324219 115.674316,22.7719727 115.103027,23.1401367 C114.531738,23.5083008 113.780599,23.6923828 112.849609,23.6923828 C111.554688,23.6839193 110.327474,23.319987 109.167969,22.6005859 C108.981771,22.4820964 108.787109,22.4228516 108.583984,22.4228516 C108.363932,22.4228516 108.181966,22.4969076 108.038086,22.6450195 C107.894206,22.7931315 107.822266,22.96875 107.822266,23.171875 C107.822266,23.4511719 107.949219,23.6796875 108.203125,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M119.974609,23.8574219 C120.482422,24.2298177 121.161621,24.5535482 122.012207,24.8286133 C122.862793,25.1036784 123.740885,25.2412109 124.646484,25.2412109 C125.619792,25.2412109 126.474609,25.1079102 127.210938,24.8413086 C127.947266,24.574707 128.541829,24.1451823 128.994629,23.5527344 C129.447428,22.9602865 129.673828,22.2324219 129.673828,21.3691406 C129.673828,20.4550781 129.400879,19.7039388 128.85498,19.1157227 C128.309082,18.5275065 127.397135,18.0768229 126.119141,17.7636719 L123.922852,17.2177734 C123.000326,16.9892578 122.399414,16.7713216 122.120117,16.5639648 C121.84082,16.3566081 121.701172,15.9990234 121.701172,15.4912109 C121.701172,14.8649089 121.972005,14.4057617 122.513672,14.1137695 C123.055339,13.8217773 123.783203,13.6757813 124.697266,13.6757813 C124.985026,13.6757813 125.268555,13.6948242 125.547852,13.7329102 C125.827148,13.7709961 126.102214,13.8260091 126.373047,13.8979492 C126.64388,13.9698893 126.857585,14.03125 127.01416,14.0820313 C127.170736,14.1328125 127.373861,14.2068685 127.623535,14.3041992 C127.87321,14.4015299 128.010742,14.4544271 128.036133,14.4628906 C128.171549,14.5136719 128.302734,14.5390625 128.429688,14.5390625 C128.64974,14.5390625 128.825358,14.4692383 128.956543,14.3295898 C129.087728,14.1899414 129.15332,14.0227865 129.15332,13.828125 C129.15332,13.4980469 128.988281,13.2483724 128.658203,13.0791016 C128.201172,12.8336589 127.61084,12.6114909 126.887207,12.4125977 C126.163574,12.2137044 125.391276,12.1142578 124.570312,12.1142578 C123.884766,12.1142578 123.25,12.1798503 122.666016,12.3110352 C122.082031,12.4422201 121.555176,12.6411133 121.085449,12.9077148 C120.615723,13.1743164 120.245443,13.5361328 119.974609,13.9931641 C119.703776,14.4501953 119.568359,14.9791667 119.568359,15.5800781 C119.568359,15.9609375 119.606445,16.2994792 119.682617,16.5957031 C119.758789,16.8919271 119.88151,17.1500651 120.050781,17.3701172 C120.220052,17.5901693 120.402018,17.7784831 120.59668,17.9350586 C120.791341,18.0916341 121.051595,18.2376302 121.377441,18.3730469 C121.703288,18.5084635 122.007975,18.6184896 122.291504,18.703125 C122.575033,18.7877604 122.945312,18.8893229 123.402344,19.0078125 L125.649414,19.5664062 C126.394206,19.7526042 126.927409,19.9980469 127.249023,20.3027344 C127.570638,20.6074219 127.731445,21.0136719 127.731445,21.5214844 C127.731445,22.2324219 127.445801,22.7719727 126.874512,23.1401367 C126.303223,23.5083008 125.552083,23.6923828 124.621094,23.6923828 C123.326172,23.6839193 122.098958,23.319987 120.939453,22.6005859 C120.753255,22.4820964 120.558594,22.4228516 120.355469,22.4228516 C120.135417,22.4228516 119.953451,22.4969076 119.80957,22.6450195 C119.66569,22.7931315 119.59375,22.96875 119.59375,23.171875 C119.59375,23.4511719 119.720703,23.6796875 119.974609,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M133.117188,9.44824219 C133.565755,9.44824219 133.925456,9.31917318 134.196289,9.06103516 C134.467122,8.80289714 134.602539,8.46647135 134.602539,8.05175781 C134.602539,7.62858073 134.467122,7.28792318 134.196289,7.02978516 C133.925456,6.77164714 133.569987,6.64257812 133.129883,6.64257812 C132.681315,6.64257812 132.319499,6.77376302 132.044434,7.03613281 C131.769368,7.2985026 131.631836,7.63704427 131.631836,8.05175781 C131.631836,8.46647135 131.767253,8.80289714 132.038086,9.06103516 C132.308919,9.31917318 132.66862,9.44824219 133.117188,9.44824219 Z M133.104492,25 C133.417643,25 133.690592,24.8963216 133.92334,24.6889648 C134.156087,24.4816081 134.272461,24.2001953 134.272461,23.8447266 L134.272461,13.5234375 C134.272461,13.1679688 134.160319,12.8886719 133.936035,12.6855469 C133.711751,12.4824219 133.447266,12.3808594 133.142578,12.3808594 C132.829427,12.3808594 132.556478,12.4824219 132.32373,12.6855469 C132.090983,12.8886719 131.974609,13.1679688 131.974609,13.5234375 L131.974609,23.8447266 C131.974609,24.2171224 132.084635,24.5027669 132.304688,24.7016602 C132.52474,24.9005534 132.791341,25 133.104492,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M139.277344,25 C139.590495,25 139.861328,24.9005534 140.089844,24.7016602 C140.318359,24.5027669 140.432617,24.2255859 140.432617,23.8701172 L140.432617,14.0058594 L142.578125,14.0058594 C143.119792,14.0058594 143.390625,13.7604167 143.390625,13.2695312 C143.390625,13.0494792 143.325033,12.867513 143.193848,12.7236328 C143.062663,12.5797526 142.857422,12.5078125 142.578125,12.5078125 L140.432617,12.5078125 L140.432617,11.5175781 C140.432617,10.8320312 140.460124,10.2882487 140.515137,9.88623047 C140.57015,9.48421224 140.675944,9.17529297 140.83252,8.95947266 C140.989095,8.74365234 141.168945,8.60400391 141.37207,8.54052734 C141.575195,8.47705078 141.858724,8.4453125 142.222656,8.4453125 L143.301758,8.4453125 C143.555664,8.4453125 143.752441,8.36067708 143.89209,8.19140625 C144.031738,8.02213542 144.101562,7.81901042 144.101562,7.58203125 C144.101562,7.34505208 144.031738,7.1398112 143.89209,6.96630859 C143.752441,6.79280599 143.559896,6.70605469 143.314453,6.70605469 L141.765625,6.70605469 C140.63151,6.70605469 139.742839,7.02555339 139.099609,7.66455078 C138.45638,8.30354818 138.134766,9.38053385 138.134766,10.8955078 L138.134766,12.5078125 L136.47168,12.5078125 C136.192383,12.5078125 135.985026,12.5797526 135.849609,12.7236328 C135.714193,12.867513 135.646484,13.0494792 135.646484,13.2695312 C135.646484,13.4811198 135.714193,13.6567383 135.849609,13.7963867 C135.985026,13.9360352 136.192383,14.0058594 136.47168,14.0058594 L138.134766,14.0058594 L138.134766,23.8701172 C138.134766,24.2255859 138.246908,24.5027669 138.471191,24.7016602 C138.695475,24.9005534 138.964193,25 139.277344,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M146.288086,9.44824219 C146.736654,9.44824219 147.096354,9.31917318 147.367188,9.06103516 C147.638021,8.80289714 147.773438,8.46647135 147.773438,8.05175781 C147.773438,7.62858073 147.638021,7.28792318 147.367188,7.02978516 C147.096354,6.77164714 146.740885,6.64257812 146.300781,6.64257812 C145.852214,6.64257812 145.490397,6.77376302 145.215332,7.03613281 C144.940267,7.2985026 144.802734,7.63704427 144.802734,8.05175781 C144.802734,8.46647135 144.938151,8.80289714 145.208984,9.06103516 C145.479818,9.31917318 145.839518,9.44824219 146.288086,9.44824219 Z M146.275391,25 C146.588542,25 146.861491,24.8963216 147.094238,24.6889648 C147.326986,24.4816081 147.443359,24.2001953 147.443359,23.8447266 L147.443359,13.5234375 C147.443359,13.1679688 147.331217,12.8886719 147.106934,12.6855469 C146.88265,12.4824219 146.618164,12.3808594 146.313477,12.3808594 C146.000326,12.3808594 145.727376,12.4824219 145.494629,12.6855469 C145.261882,12.8886719 145.145508,13.1679688 145.145508,13.5234375 L145.145508,23.8447266 C145.145508,24.2171224 145.255534,24.5027669 145.475586,24.7016602 C145.695638,24.9005534 145.96224,25 146.275391,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M155.380859,25.2412109 C157.039714,25.2412109 158.326172,24.8730469 159.240234,24.1367188 C159.527995,23.8997396 159.671875,23.6416016 159.671875,23.3623047 C159.671875,23.1676432 159.604167,23.0047201 159.46875,22.8735352 C159.333333,22.7423503 159.168294,22.6767578 158.973633,22.6767578 C158.795898,22.6767578 158.626628,22.7317708 158.46582,22.8417969 C157.687174,23.3834635 156.718099,23.6542969 155.558594,23.6542969 C154.898438,23.6542969 154.314453,23.5167643 153.806641,23.2416992 C153.298828,22.9666341 152.892578,22.5942383 152.587891,22.1245117 C152.283203,21.6547852 152.054688,21.1321615 151.902344,20.5566406 C151.75,19.9811198 151.673828,19.3717448 151.673828,18.7285156 C151.673828,17.188151 152.050456,15.9630534 152.803711,15.0532227 C153.556966,14.1433919 154.538737,13.6884766 155.749023,13.6884766 C156.680013,13.6884766 157.530599,13.938151 158.300781,14.4375 C158.470052,14.547526 158.647786,14.6025391 158.833984,14.6025391 C159.037109,14.6025391 159.208496,14.5390625 159.348145,14.4121094 C159.487793,14.2851563 159.557617,14.1285807 159.557617,13.9423828 C159.557617,13.680013 159.413737,13.4345703 159.125977,13.2060547 C158.762044,12.9013672 158.271159,12.6474609 157.65332,12.4443359 C157.035482,12.2412109 156.36263,12.1396484 155.634766,12.1396484 C154.390625,12.1396484 153.290365,12.4316406 152.333984,13.015625 C151.377604,13.5996094 150.645508,14.3867188 150.137695,15.3769531 C149.629883,16.3671875 149.375977,17.4716797 149.375977,18.6904297 C149.375977,20.5777995 149.917643,22.1414388 151.000977,23.3813477 C152.08431,24.6212565 153.544271,25.2412109 155.380859,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M165.070312,23.7050781 C164.283203,23.7050781 163.661133,23.5252279 163.204102,23.1655273 C162.74707,22.8058268 162.518555,22.2408854 162.518555,21.4707031 C162.518555,21.047526 162.579915,20.7005208 162.702637,20.4296875 C162.825358,20.1588542 163.013672,19.9282227 163.267578,19.737793 C163.521484,19.5473633 163.902344,19.4034831 164.410156,19.3061523 C164.917969,19.2088216 165.501953,19.1411133 166.162109,19.1030273 C166.822266,19.0649414 167.677083,19.0458984 168.726562,19.0458984 L168.726562,19.4267578 C168.726562,20.6708984 168.360514,21.694987 167.628418,22.4990234 C166.896322,23.3030599 166.04362,23.7050781 165.070312,23.7050781 Z M164.829102,25.2285156 C166.648763,25.2285156 167.952148,24.4033203 168.739258,22.7529297 L168.739258,23.9716797 C168.739258,24.3017578 168.847168,24.5577799 169.062988,24.7397461 C169.278809,24.9217122 169.539062,25.0126953 169.84375,25.0126953 C170.148438,25.0126953 170.417155,24.9174805 170.649902,24.7270508 C170.88265,24.5366211 170.999023,24.2763672 170.999023,23.9462891 L170.999023,16.5449219 C170.999023,15.046875 170.561035,13.938151 169.685059,13.21875 C168.809082,12.499349 167.59668,12.1396484 166.047852,12.1396484 C164.236654,12.1396484 162.67513,12.5078125 161.363281,13.2441406 C161.126302,13.3795573 161.007812,13.5742187 161.007812,13.828125 C161.007812,14.0481771 161.088216,14.2491862 161.249023,14.4311523 C161.409831,14.6131185 161.60026,14.7041016 161.820312,14.7041016 C161.938802,14.7041016 162.040365,14.6829427 162.125,14.640625 C162.556641,14.4459635 162.920573,14.2936198 163.216797,14.1835937 C163.513021,14.0735677 163.919271,13.9635417 164.435547,13.8535156 C164.951823,13.7434896 165.468099,13.6884766 165.984375,13.6884766 C166.847656,13.6884766 167.520508,13.9042969 168.00293,14.3359375 C168.485352,14.7675781 168.726562,15.4361979 168.726562,16.3417969 L168.726562,17.7255859 C167.787109,17.7255859 166.985189,17.7382812 166.320801,17.7636719 C165.656413,17.7890625 165.011068,17.8334961 164.384766,17.8969727 C163.758464,17.9604492 163.242188,18.0535482 162.835938,18.1762695 C162.429688,18.2989909 162.057292,18.4555664 161.71875,18.6459961 C161.380208,18.8364258 161.117839,19.0691732 160.931641,19.3442383 C160.745443,19.6193034 160.601562,19.9388021 160.5,20.3027344 C160.398438,20.6666667 160.347656,21.0898438 160.347656,21.5722656 C160.347656,22.7402344 160.76237,23.6416016 161.591797,24.2763672 C162.421224,24.9111328 163.500326,25.2285156 164.829102,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M177.819336,25.1777344 L178.568359,25.1777344 C178.873047,25.1777344 179.120605,25.1692708 179.311035,25.1523438 C179.501465,25.1354167 179.691895,25.0973307 179.882324,25.0380859 C180.072754,24.9788411 180.214518,24.8836263 180.307617,24.7524414 C180.400716,24.6212565 180.447266,24.4498698 180.447266,24.2382813 C180.447266,24.0013021 180.366862,23.7939453 180.206055,23.6162109 C180.045247,23.4384766 179.825195,23.3496094 179.545898,23.3496094 L179.469727,23.3496094 L178.606445,23.3876953 L178.466797,23.3876953 C177.933594,23.3876953 177.544271,23.1951497 177.298828,22.8100586 C177.053385,22.4249674 176.930664,21.7965495 176.930664,20.9248047 L176.930664,14.0058594 L179.266602,14.0058594 C179.825195,14.0058594 180.104492,13.7646484 180.104492,13.2822266 C180.104492,13.0537109 180.034668,12.867513 179.89502,12.7236328 C179.755371,12.5797526 179.545898,12.5078125 179.266602,12.5078125 L176.930664,12.5078125 L176.930664,8.82617188 C176.930664,8.52994792 176.846029,8.30354818 176.676758,8.14697266 C176.507487,7.99039714 176.295898,7.91210938 176.041992,7.91210938 C175.771159,7.91210938 175.5236,8.0094401 175.299316,8.20410156 C175.075033,8.39876302 174.954427,8.63151042 174.9375,8.90234375 L174.632812,12.5078125 L173.071289,12.5078125 C172.791992,12.5078125 172.580404,12.5776367 172.436523,12.7172852 C172.292643,12.8569336 172.220703,13.0367839 172.220703,13.2568359 C172.220703,13.4853516 172.294759,13.6673177 172.442871,13.8027344 C172.590983,13.938151 172.804688,14.0058594 173.083984,14.0058594 L174.632812,14.0058594 L174.632812,21.3183594 C174.632812,22.6302083 174.920573,23.6013997 175.496094,24.2319336 C176.071615,24.8624674 176.846029,25.1777344 177.819336,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M183.395508,9.44824219 C183.844076,9.44824219 184.203776,9.31917318 184.474609,9.06103516 C184.745443,8.80289714 184.880859,8.46647135 184.880859,8.05175781 C184.880859,7.62858073 184.745443,7.28792318 184.474609,7.02978516 C184.203776,6.77164714 183.848307,6.64257812 183.408203,6.64257812 C182.959635,6.64257812 182.597819,6.77376302 182.322754,7.03613281 C182.047689,7.2985026 181.910156,7.63704427 181.910156,8.05175781 C181.910156,8.46647135 182.045573,8.80289714 182.316406,9.06103516 C182.58724,9.31917318 182.94694,9.44824219 183.395508,9.44824219 Z M183.382812,25 C183.695964,25 183.968913,24.8963216 184.20166,24.6889648 C184.434408,24.4816081 184.550781,24.2001953 184.550781,23.8447266 L184.550781,13.5234375 C184.550781,13.1679688 184.438639,12.8886719 184.214355,12.6855469 C183.990072,12.4824219 183.725586,12.3808594 183.420898,12.3808594 C183.107747,12.3808594 182.834798,12.4824219 182.602051,12.6855469 C182.369303,12.8886719 182.25293,13.1679688 182.25293,13.5234375 L182.25293,23.8447266 C182.25293,24.2171224 182.362956,24.5027669 182.583008,24.7016602 C182.80306,24.9005534 183.069661,25 183.382812,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M192.84375,25.2285156 C194.798828,25.2285156 196.347656,24.6149089 197.490234,23.3876953 C198.632812,22.1604818 199.204102,20.5947266 199.204102,18.6904297 C199.204102,16.7692057 198.630697,15.1971029 197.483887,13.9741211 C196.337077,12.7511393 194.790365,12.1396484 192.84375,12.1396484 C190.897135,12.1396484 189.350423,12.7532552 188.203613,13.9804688 C187.056803,15.2076823 186.483398,16.7776693 186.483398,18.6904297 C186.483398,20.5947266 187.056803,22.1604818 188.203613,23.3876953 C189.350423,24.6149089 190.897135,25.2285156 192.84375,25.2285156 Z M192.818359,23.6542969 C191.548828,23.6542969 190.556478,23.2078451 189.841309,22.3149414 C189.126139,21.4220378 188.768555,20.2138672 188.768555,18.6904297 C188.768555,17.1500651 189.128255,15.933431 189.847656,15.0405273 C190.567057,14.1476237 191.565755,13.7011719 192.84375,13.7011719 C194.113281,13.7011719 195.109863,14.1497396 195.833496,15.046875 C196.557129,15.9440104 196.918945,17.1585286 196.918945,18.6904297 C196.918945,20.2307943 196.561361,21.4431966 195.846191,22.3276367 C195.131022,23.2120768 194.121745,23.6542969 192.818359,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M202.177734,25 C202.490885,25 202.763835,24.9026693 202.996582,24.7080078 C203.229329,24.5133464 203.345703,24.2467448 203.345703,23.9082031 L203.345703,17.5732422 C203.404948,16.3798828 203.76888,15.4361979 204.4375,14.7421875 C205.10612,14.0481771 205.927083,13.7011719 206.900391,13.7011719 C207.814453,13.7011719 208.523275,13.9847005 209.026855,14.5517578 C209.530436,15.1188151 209.782227,15.9440104 209.782227,17.0273438 L209.782227,23.9082031 C209.782227,24.2552083 209.894368,24.5239258 210.118652,24.7143555 C210.342936,24.9047852 210.611654,25 210.924805,25 C211.237956,25 211.506673,24.9047852 211.730957,24.7143555 C211.955241,24.5239258 212.067383,24.2552083 212.067383,23.9082031 L212.067383,17.0527344 C212.067383,15.4023437 211.652669,14.1708984 210.823242,13.3583984 C209.993815,12.5458984 208.880859,12.1396484 207.484375,12.1396484 C206.511068,12.1396484 205.658366,12.3470052 204.92627,12.7617188 C204.194173,13.1764323 203.667318,13.7688802 203.345703,14.5390625 L203.345703,13.4345703 C203.345703,13.1044922 203.233561,12.8484701 203.009277,12.6665039 C202.784993,12.4845378 202.516276,12.3935547 202.203125,12.3935547 C201.889974,12.3935547 201.619141,12.4887695 201.390625,12.6791992 C201.162109,12.8696289 201.047852,13.1341146 201.047852,13.4726563 L201.047852,23.9082031 C201.047852,24.2552083 201.157878,24.5239258 201.37793,24.7143555 C201.597982,24.9047852 201.864583,25 202.177734,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M228.729492,25.2792969 C229.499674,25.2792969 230.233887,25.1946615 230.932129,25.0253906 C231.630371,24.8561198 232.28418,24.59375 232.893555,24.2382813 C233.50293,23.8828125 234.029785,23.449056 234.474121,22.9370117 C234.918457,22.4249674 235.269694,21.8071289 235.527832,21.0834961 C235.78597,20.3598633 235.915039,19.5621745 235.915039,18.6904297 L235.915039,8.06445312 C235.915039,7.67513021 235.790202,7.37044271 235.540527,7.15039063 C235.290853,6.93033854 234.992513,6.8203125 234.645508,6.8203125 C234.290039,6.8203125 233.985352,6.93033854 233.731445,7.15039063 C233.477539,7.37044271 233.350586,7.67513021 233.350586,8.06445312 L233.350586,18.6269531 C233.350586,19.3886719 233.219401,20.069987 232.957031,20.6708984 C232.694661,21.2718099 232.343424,21.7542318 231.90332,22.1181641 C231.463216,22.4820964 230.972331,22.7571615 230.430664,22.9433594 C229.888997,23.1295573 229.32194,23.2226562 228.729492,23.2226562 C228.137044,23.2226562 227.572103,23.1295573 227.034668,22.9433594 C226.497233,22.7571615 226.010579,22.4820964 225.574707,22.1181641 C225.138835,21.7542318 224.789714,21.2718099 224.527344,20.6708984 C224.264974,20.069987 224.133789,19.3886719 224.133789,18.6269531 L224.133789,8.06445312 C224.133789,7.67513021 224.006836,7.37044271 223.75293,7.15039063 C223.499023,6.93033854 223.198568,6.8203125 222.851562,6.8203125 C222.496094,6.8203125 222.191406,6.93033854 221.9375,7.15039063 C221.683594,7.37044271 221.556641,7.67513021 221.556641,8.06445312 L221.556641,18.6904297 C221.556641,19.7652995 221.751302,20.7301432 222.140625,21.5849609 C222.529948,22.4397786 223.058919,23.1316732 223.727539,23.6606445 C224.396159,24.1896159 225.155762,24.5916341 226.006348,24.8666992 C226.856934,25.1417643 227.764648,25.2792969 228.729492,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M239.066406,23.8574219 C239.574219,24.2298177 240.253418,24.5535482 241.104004,24.8286133 C241.95459,25.1036784 242.832682,25.2412109 243.738281,25.2412109 C244.711589,25.2412109 245.566406,25.1079102 246.302734,24.8413086 C247.039062,24.574707 247.633626,24.1451823 248.086426,23.5527344 C248.539225,22.9602865 248.765625,22.2324219 248.765625,21.3691406 C248.765625,20.4550781 248.492676,19.7039388 247.946777,19.1157227 C247.400879,18.5275065 246.488932,18.0768229 245.210938,17.7636719 L243.014648,17.2177734 C242.092122,16.9892578 241.491211,16.7713216 241.211914,16.5639648 C240.932617,16.3566081 240.792969,15.9990234 240.792969,15.4912109 C240.792969,14.8649089 241.063802,14.4057617 241.605469,14.1137695 C242.147135,13.8217773 242.875,13.6757813 243.789062,13.6757813 C244.076823,13.6757813 244.360352,13.6948242 244.639648,13.7329102 C244.918945,13.7709961 245.19401,13.8260091 245.464844,13.8979492 C245.735677,13.9698893 245.949382,14.03125 246.105957,14.0820313 C246.262533,14.1328125 246.465658,14.2068685 246.715332,14.3041992 C246.965007,14.4015299 247.102539,14.4544271 247.12793,14.4628906 C247.263346,14.5136719 247.394531,14.5390625 247.521484,14.5390625 C247.741536,14.5390625 247.917155,14.4692383 248.04834,14.3295898 C248.179525,14.1899414 248.245117,14.0227865 248.245117,13.828125 C248.245117,13.4980469 248.080078,13.2483724 247.75,13.0791016 C247.292969,12.8336589 246.702637,12.6114909 245.979004,12.4125977 C245.255371,12.2137044 244.483073,12.1142578 243.662109,12.1142578 C242.976562,12.1142578 242.341797,12.1798503 241.757812,12.3110352 C241.173828,12.4422201 240.646973,12.6411133 240.177246,12.9077148 C239.70752,13.1743164 239.33724,13.5361328 239.066406,13.9931641 C238.795573,14.4501953 238.660156,14.9791667 238.660156,15.5800781 C238.660156,15.9609375 238.698242,16.2994792 238.774414,16.5957031 C238.850586,16.8919271 238.973307,17.1500651 239.142578,17.3701172 C239.311849,17.5901693 239.493815,17.7784831 239.688477,17.9350586 C239.883138,18.0916341 240.143392,18.2376302 240.469238,18.3730469 C240.795085,18.5084635 241.099772,18.6184896 241.383301,18.703125 C241.666829,18.7877604 242.037109,18.8893229 242.494141,19.0078125 L244.741211,19.5664062 C245.486003,19.7526042 246.019206,19.9980469 246.34082,20.3027344 C246.662435,20.6074219 246.823242,21.0136719 246.823242,21.5214844 C246.823242,22.2324219 246.537598,22.7719727 245.966309,23.1401367 C245.39502,23.5083008 244.64388,23.6923828 243.712891,23.6923828 C242.417969,23.6839193 241.190755,23.319987 240.03125,22.6005859 C239.845052,22.4820964 239.650391,22.4228516 239.447266,22.4228516 C239.227214,22.4228516 239.045247,22.4969076 238.901367,22.6450195 C238.757487,22.7931315 238.685547,22.96875 238.685547,23.171875 C238.685547,23.4511719 238.8125,23.6796875 239.066406,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M254.583008,23.7050781 C253.795898,23.7050781 253.173828,23.5252279 252.716797,23.1655273 C252.259766,22.8058268 252.03125,22.2408854 252.03125,21.4707031 C252.03125,21.047526 252.092611,20.7005208 252.215332,20.4296875 C252.338053,20.1588542 252.526367,19.9282227 252.780273,19.737793 C253.03418,19.5473633 253.415039,19.4034831 253.922852,19.3061523 C254.430664,19.2088216 255.014648,19.1411133 255.674805,19.1030273 C256.334961,19.0649414 257.189779,19.0458984 258.239258,19.0458984 L258.239258,19.4267578 C258.239258,20.6708984 257.87321,21.694987 257.141113,22.4990234 C256.409017,23.3030599 255.556315,23.7050781 254.583008,23.7050781 Z M254.341797,25.2285156 C256.161458,25.2285156 257.464844,24.4033203 258.251953,22.7529297 L258.251953,23.9716797 C258.251953,24.3017578 258.359863,24.5577799 258.575684,24.7397461 C258.791504,24.9217122 259.051758,25.0126953 259.356445,25.0126953 C259.661133,25.0126953 259.92985,24.9174805 260.162598,24.7270508 C260.395345,24.5366211 260.511719,24.2763672 260.511719,23.9462891 L260.511719,16.5449219 C260.511719,15.046875 260.07373,13.938151 259.197754,13.21875 C258.321777,12.499349 257.109375,12.1396484 255.560547,12.1396484 C253.749349,12.1396484 252.187826,12.5078125 250.875977,13.2441406 C250.638997,13.3795573 250.520508,13.5742187 250.520508,13.828125 C250.520508,14.0481771 250.600911,14.2491862 250.761719,14.4311523 C250.922526,14.6131185 251.112956,14.7041016 251.333008,14.7041016 C251.451497,14.7041016 251.55306,14.6829427 251.637695,14.640625 C252.069336,14.4459635 252.433268,14.2936198 252.729492,14.1835937 C253.025716,14.0735677 253.431966,13.9635417 253.948242,13.8535156 C254.464518,13.7434896 254.980794,13.6884766 255.49707,13.6884766 C256.360352,13.6884766 257.033203,13.9042969 257.515625,14.3359375 C257.998047,14.7675781 258.239258,15.4361979 258.239258,16.3417969 L258.239258,17.7255859 C257.299805,17.7255859 256.497884,17.7382812 255.833496,17.7636719 C255.169108,17.7890625 254.523763,17.8334961 253.897461,17.8969727 C253.271159,17.9604492 252.754883,18.0535482 252.348633,18.1762695 C251.942383,18.2989909 251.569987,18.4555664 251.231445,18.6459961 C250.892904,18.8364258 250.630534,19.0691732 250.444336,19.3442383 C250.258138,19.6193034 250.114258,19.9388021 250.012695,20.3027344 C249.911133,20.6666667 249.860352,21.0898438 249.860352,21.5722656 C249.860352,22.7402344 250.275065,23.6416016 251.104492,24.2763672 C251.933919,24.9111328 253.013021,25.2285156 254.341797,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M268.525391,23.4765625 C267.408203,23.4765625 266.485677,23.0703125 265.757812,22.2578125 C265.029948,21.4453125 264.666016,20.2519531 264.666016,18.6777344 C264.666016,17.7382812 264.797201,16.9046224 265.05957,16.1767578 C265.32194,15.4488932 265.749349,14.8543294 266.341797,14.3930664 C266.934245,13.9318034 267.662109,13.7011719 268.525391,13.7011719 C269.067057,13.7011719 269.553711,13.7921549 269.985352,13.9741211 C270.416992,14.1560872 270.770345,14.4015299 271.04541,14.7104492 C271.320475,15.0193685 271.548991,15.3875326 271.730957,15.8149414 C271.912923,16.2423503 272.041992,16.6866862 272.118164,17.1479492 C272.194336,17.6092122 272.232422,18.1022135 272.232422,18.6269531 C272.232422,20.1588542 271.885417,21.3500977 271.191406,22.2006836 C270.497396,23.0512695 269.608724,23.4765625 268.525391,23.4765625 Z M268.423828,30.3701172 C270.294271,30.3701172 271.769043,29.9130859 272.848145,28.9990234 C273.927246,28.0849609 274.466797,26.7265625 274.466797,24.9238281 L274.466797,13.4599609 C274.466797,13.1214193 274.361003,12.8569336 274.149414,12.6665039 C273.937826,12.4760742 273.683919,12.3808594 273.387695,12.3808594 C273.116862,12.3808594 272.877767,12.4633789 272.67041,12.628418 C272.463053,12.793457 272.34668,13.0198568 272.321289,13.3076172 L272.321289,14.5898437 C271.483398,12.9563802 270.078451,12.1396484 268.106445,12.1396484 C266.913086,12.1396484 265.876302,12.4401042 264.996094,13.0410156 C264.115885,13.6419271 263.457845,14.4353841 263.021973,15.4213867 C262.5861,16.4073893 262.368164,17.5182292 262.368164,18.7539063 C262.368164,19.625651 262.497233,20.438151 262.755371,21.1914062 C263.013509,21.9446615 263.381673,22.6048177 263.859863,23.171875 C264.338053,23.7389323 264.947428,24.1853841 265.687988,24.5112305 C266.428548,24.8370768 267.255859,25 268.169922,25 C269.143229,25 269.979004,24.7736003 270.677246,24.3208008 C271.375488,23.8680013 271.898112,23.2565104 272.245117,22.4863281 L272.245117,25 C272.245117,26.21875 271.910807,27.1518555 271.242188,27.7993164 C270.573568,28.4467773 269.583333,28.7705078 268.271484,28.7705078 C268.068359,28.7705078 267.86735,28.7620443 267.668457,28.7451172 C267.469564,28.7281901 267.291829,28.7091471 267.135254,28.6879883 C266.978678,28.6668294 266.805176,28.6350911 266.614746,28.5927734 C266.424316,28.5504557 266.271973,28.5166016 266.157715,28.4912109 C266.043457,28.4658203 265.895345,28.4235026 265.713379,28.3642578 C265.531413,28.305013 265.406576,28.2626953 265.338867,28.2373047 C265.271159,28.2119141 265.150553,28.1653646 264.977051,28.0976563 C264.803548,28.0299479 264.69987,27.991862 264.666016,27.9833984 C264.572917,27.9495443 264.475586,27.9326172 264.374023,27.9326172 C264.162435,27.9326172 263.982585,28.0109049 263.834473,28.1674805 C263.686361,28.324056 263.612305,28.508138 263.612305,28.7197266 C263.612305,29.0328776 263.773112,29.2783203 264.094727,29.4560547 C265.237305,30.0654297 266.680339,30.3701172 268.423828,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M278.748047,17.7382812 C278.790365,17.2135417 278.902507,16.7163086 279.084473,16.246582 C279.266439,15.7768555 279.511882,15.3494466 279.820801,14.9643555 C280.12972,14.5792643 280.525391,14.2724609 281.007812,14.0439453 C281.490234,13.8154297 282.027669,13.7011719 282.620117,13.7011719 C283.762695,13.7011719 284.630208,14.0777995 285.222656,14.8310547 C285.815104,15.5843099 286.149414,16.5533854 286.225586,17.7382812 L278.748047,17.7382812 Z M282.658203,25.2285156 C284.554036,25.2285156 286.170573,24.6276042 287.507812,23.4257812 C287.744792,23.2141927 287.863281,22.9729818 287.863281,22.7021484 C287.863281,22.4905599 287.793457,22.3064779 287.653809,22.1499023 C287.51416,21.9933268 287.342773,21.9150391 287.139648,21.9150391 C286.970378,21.9150391 286.80957,21.9742839 286.657227,22.0927734 C286.047852,22.5582682 285.442708,22.9264323 284.841797,23.1972656 C284.240885,23.468099 283.563802,23.6035156 282.810547,23.6035156 C281.617188,23.5865885 280.639648,23.2036133 279.87793,22.4545898 C279.116211,21.7055664 278.726888,20.6031901 278.709961,19.1474609 L287.418945,19.1474609 C287.689779,19.1474609 287.892904,19.0670573 288.02832,18.90625 C288.163737,18.7454427 288.231445,18.5423177 288.231445,18.296875 C288.206055,17.4420573 288.083333,16.6570638 287.863281,15.9418945 C287.643229,15.2267253 287.313151,14.5792643 286.873047,13.9995117 C286.432943,13.4197591 285.840495,12.9648438 285.095703,12.6347656 C284.350911,12.3046875 283.483398,12.1396484 282.493164,12.1396484 C281.24056,12.1396484 280.144531,12.4443359 279.205078,13.0537109 C278.265625,13.6630859 277.563151,14.4544271 277.097656,15.4277344 C276.632161,16.4010417 276.399414,17.4759115 276.399414,18.6523437 C276.399414,19.9895833 276.676595,21.159668 277.230957,22.1625977 C277.785319,23.1655273 278.532227,23.9251302 279.47168,24.4414063 C280.411133,24.9576823 281.473307,25.2200521 282.658203,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M290.303711,23.8574219 C290.811523,24.2298177 291.490723,24.5535482 292.341309,24.8286133 C293.191895,25.1036784 294.069987,25.2412109 294.975586,25.2412109 C295.948893,25.2412109 296.803711,25.1079102 297.540039,24.8413086 C298.276367,24.574707 298.870931,24.1451823 299.32373,23.5527344 C299.77653,22.9602865 300.00293,22.2324219 300.00293,21.3691406 C300.00293,20.4550781 299.72998,19.7039388 299.184082,19.1157227 C298.638184,18.5275065 297.726237,18.0768229 296.448242,17.7636719 L294.251953,17.2177734 C293.329427,16.9892578 292.728516,16.7713216 292.449219,16.5639648 C292.169922,16.3566081 292.030273,15.9990234 292.030273,15.4912109 C292.030273,14.8649089 292.301107,14.4057617 292.842773,14.1137695 C293.38444,13.8217773 294.112305,13.6757813 295.026367,13.6757813 C295.314128,13.6757813 295.597656,13.6948242 295.876953,13.7329102 C296.15625,13.7709961 296.431315,13.8260091 296.702148,13.8979492 C296.972982,13.9698893 297.186686,14.03125 297.343262,14.0820313 C297.499837,14.1328125 297.702962,14.2068685 297.952637,14.3041992 C298.202311,14.4015299 298.339844,14.4544271 298.365234,14.4628906 C298.500651,14.5136719 298.631836,14.5390625 298.758789,14.5390625 C298.978841,14.5390625 299.15446,14.4692383 299.285645,14.3295898 C299.416829,14.1899414 299.482422,14.0227865 299.482422,13.828125 C299.482422,13.4980469 299.317383,13.2483724 298.987305,13.0791016 C298.530273,12.8336589 297.939941,12.6114909 297.216309,12.4125977 C296.492676,12.2137044 295.720378,12.1142578 294.899414,12.1142578 C294.213867,12.1142578 293.579102,12.1798503 292.995117,12.3110352 C292.411133,12.4422201 291.884277,12.6411133 291.414551,12.9077148 C290.944824,13.1743164 290.574544,13.5361328 290.303711,13.9931641 C290.032878,14.4501953 289.897461,14.9791667 289.897461,15.5800781 C289.897461,15.9609375 289.935547,16.2994792 290.011719,16.5957031 C290.087891,16.8919271 290.210612,17.1500651 290.379883,17.3701172 C290.549154,17.5901693 290.73112,17.7784831 290.925781,17.9350586 C291.120443,18.0916341 291.380697,18.2376302 291.706543,18.3730469 C292.032389,18.5084635 292.337077,18.6184896 292.620605,18.703125 C292.904134,18.7877604 293.274414,18.8893229 293.731445,19.0078125 L295.978516,19.5664062 C296.723307,19.7526042 297.25651,19.9980469 297.578125,20.3027344 C297.89974,20.6074219 298.060547,21.0136719 298.060547,21.5214844 C298.060547,22.2324219 297.774902,22.7719727 297.203613,23.1401367 C296.632324,23.5083008 295.881185,23.6923828 294.950195,23.6923828 C293.655273,23.6839193 292.42806,23.319987 291.268555,22.6005859 C291.082357,22.4820964 290.887695,22.4228516 290.68457,22.4228516 C290.464518,22.4228516 290.282552,22.4969076 290.138672,22.6450195 C289.994792,22.7931315 289.922852,22.96875 289.922852,23.171875 C289.922852,23.4511719 290.049805,23.6796875 290.303711,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-9" transform="translate(0, 132)" xlink:href="#path-25" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M7.45214844,25 C7.79915365,25 8.09960938,24.889974 8.35351563,24.6699219 C8.60742188,24.4498698 8.734375,24.1494141 8.734375,23.7685547 L8.734375,8.90234375 L13.4570313,8.90234375 C13.7871094,8.90234375 14.0452474,8.80078125 14.2314453,8.59765625 C14.4176432,8.39453125 14.5107422,8.15332031 14.5107422,7.87402344 C14.5107422,7.59472656 14.4197591,7.35563151 14.237793,7.15673828 C14.0558268,6.95784505 13.7955729,6.85839844 13.4570313,6.85839844 L1.38378906,6.85839844 C1.05371094,6.85839844 0.795572917,6.95996094 0.609375,7.16308594 C0.423177083,7.36621094 0.330078125,7.6031901 0.330078125,7.87402344 C0.330078125,8.15332031 0.423177083,8.39453125 0.609375,8.59765625 C0.795572917,8.80078125 1.05371094,8.90234375 1.38378906,8.90234375 L6.16992188,8.90234375 L6.16992188,23.7685547 C6.16992188,24.1494141 6.296875,24.4498698 6.55078125,24.6699219 C6.8046875,24.889974 7.10514323,25 7.45214844,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M21.2675781,25.2285156 C23.2226562,25.2285156 24.7714844,24.6149089 25.9140625,23.3876953 C27.0566406,22.1604818 27.6279297,20.5947266 27.6279297,18.6904297 C27.6279297,16.7692057 27.0545247,15.1971029 25.9077148,13.9741211 C24.7609049,12.7511393 23.2141927,12.1396484 21.2675781,12.1396484 C19.3209635,12.1396484 17.7742513,12.7532552 16.6274414,13.9804688 C15.4806315,15.2076823 14.9072266,16.7776693 14.9072266,18.6904297 C14.9072266,20.5947266 15.4806315,22.1604818 16.6274414,23.3876953 C17.7742513,24.6149089 19.3209635,25.2285156 21.2675781,25.2285156 Z M21.2421875,23.6542969 C19.9726562,23.6542969 18.980306,23.2078451 18.2651367,22.3149414 C17.5499674,21.4220378 17.1923828,20.2138672 17.1923828,18.6904297 C17.1923828,17.1500651 17.5520833,15.933431 18.2714844,15.0405273 C18.9908854,14.1476237 19.9895833,13.7011719 21.2675781,13.7011719 C22.5371094,13.7011719 23.5336914,14.1497396 24.2573242,15.046875 C24.980957,15.9440104 25.3427734,17.1585286 25.3427734,18.6904297 C25.3427734,20.2307943 24.9851888,21.4431966 24.2700195,22.3276367 C23.5548503,23.2120768 22.5455729,23.6542969 21.2421875,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M30.6015625,25 C30.9147135,25 31.1876628,24.8984375 31.4204102,24.6953125 C31.6531576,24.4921875 31.7695313,24.2086589 31.7695313,23.8447266 L31.7695313,18.3095703 L37.7363281,24.6572266 C37.9479167,24.8942057 38.2018229,25.0126953 38.4980469,25.0126953 C38.7773438,25.0126953 39.0249023,24.906901 39.2407227,24.6953125 C39.456543,24.483724 39.5644531,24.242513 39.5644531,23.9716797 C39.5644531,23.726237 39.4713542,23.5061849 39.2851562,23.3115234 L34.2451172,18.0048828 L38.8535156,13.8789062 C39.0566406,13.6842448 39.1582031,13.4684245 39.1582031,13.2314453 C39.1582031,12.9690755 39.0545247,12.7320964 38.847168,12.5205078 C38.6398112,12.3089193 38.4007161,12.203125 38.1298828,12.203125 C37.9182943,12.203125 37.719401,12.2835286 37.5332031,12.4443359 L31.7695313,17.7255859 L31.7695313,8.01367188 C31.7695313,7.65820312 31.6573893,7.37890625 31.4331055,7.17578125 C31.2088216,6.97265625 30.9443359,6.87109375 30.6396484,6.87109375 C30.3264974,6.87109375 30.0535482,6.97265625 29.8208008,7.17578125 C29.5880534,7.37890625 29.4716797,7.65820312 29.4716797,8.01367188 L29.4716797,23.8447266 C29.4716797,24.2171224 29.5817057,24.5027669 29.8017578,24.7016602 C30.0218099,24.9005534 30.2884115,25 30.6015625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M42.7158203,17.7382812 C42.758138,17.2135417 42.8702799,16.7163086 43.0522461,16.246582 C43.2342122,15.7768555 43.4796549,15.3494466 43.7885742,14.9643555 C44.0974935,14.5792643 44.4931641,14.2724609 44.9755859,14.0439453 C45.4580078,13.8154297 45.9954427,13.7011719 46.5878906,13.7011719 C47.7304688,13.7011719 48.5979818,14.0777995 49.1904297,14.8310547 C49.7828776,15.5843099 50.1171875,16.5533854 50.1933594,17.7382812 L42.7158203,17.7382812 Z M46.6259766,25.2285156 C48.5218099,25.2285156 50.1383464,24.6276042 51.4755859,23.4257812 C51.7125651,23.2141927 51.8310547,22.9729818 51.8310547,22.7021484 C51.8310547,22.4905599 51.7612305,22.3064779 51.621582,22.1499023 C51.4819336,21.9933268 51.3105469,21.9150391 51.1074219,21.9150391 C50.938151,21.9150391 50.7773438,21.9742839 50.625,22.0927734 C50.015625,22.5582682 49.4104818,22.9264323 48.8095703,23.1972656 C48.2086589,23.468099 47.5315755,23.6035156 46.7783203,23.6035156 C45.5849609,23.5865885 44.6074219,23.2036133 43.8457031,22.4545898 C43.0839844,21.7055664 42.6946615,20.6031901 42.6777344,19.1474609 L51.3867188,19.1474609 C51.6575521,19.1474609 51.8606771,19.0670573 51.9960938,18.90625 C52.1315104,18.7454427 52.1992188,18.5423177 52.1992188,18.296875 C52.1738281,17.4420573 52.0511068,16.6570638 51.8310547,15.9418945 C51.6110026,15.2267253 51.2809245,14.5792643 50.8408203,13.9995117 C50.4007161,13.4197591 49.8082682,12.9648438 49.0634766,12.6347656 C48.3186849,12.3046875 47.4511719,12.1396484 46.4609375,12.1396484 C45.2083333,12.1396484 44.1123047,12.4443359 43.1728516,13.0537109 C42.2333984,13.6630859 41.5309245,14.4544271 41.0654297,15.4277344 C40.5999349,16.4010417 40.3671875,17.4759115 40.3671875,18.6523437 C40.3671875,19.9895833 40.6443685,21.159668 41.1987305,22.1625977 C41.7530924,23.1655273 42.5,23.9251302 43.4394531,24.4414063 C44.3789062,24.9576823 45.4410807,25.2200521 46.6259766,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.4902344,25 C55.8033854,25 56.0763346,24.9026693 56.309082,24.7080078 C56.5418294,24.5133464 56.6582031,24.2467448 56.6582031,23.9082031 L56.6582031,17.5732422 C56.7174479,16.3798828 57.0813802,15.4361979 57.75,14.7421875 C58.4186198,14.0481771 59.2395833,13.7011719 60.2128906,13.7011719 C61.1269531,13.7011719 61.8357747,13.9847005 62.3393555,14.5517578 C62.8429362,15.1188151 63.0947266,15.9440104 63.0947266,17.0273438 L63.0947266,23.9082031 C63.0947266,24.2552083 63.2068685,24.5239258 63.4311523,24.7143555 C63.6554362,24.9047852 63.9241536,25 64.2373047,25 C64.5504557,25 64.8191732,24.9047852 65.043457,24.7143555 C65.2677409,24.5239258 65.3798828,24.2552083 65.3798828,23.9082031 L65.3798828,17.0527344 C65.3798828,15.4023437 64.9651693,14.1708984 64.1357422,13.3583984 C63.3063151,12.5458984 62.1933594,12.1396484 60.796875,12.1396484 C59.8235677,12.1396484 58.9708659,12.3470052 58.2387695,12.7617188 C57.5066732,13.1764323 56.9798177,13.7688802 56.6582031,14.5390625 L56.6582031,13.4345703 C56.6582031,13.1044922 56.5460612,12.8484701 56.3217773,12.6665039 C56.0974935,12.4845378 55.828776,12.3935547 55.515625,12.3935547 C55.202474,12.3935547 54.9316406,12.4887695 54.703125,12.6791992 C54.4746094,12.8696289 54.3603516,13.1341146 54.3603516,13.4726563 L54.3603516,23.9082031 C54.3603516,24.2552083 54.4703776,24.5239258 54.6904297,24.7143555 C54.9104818,24.9047852 55.1770833,25 55.4902344,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M76.0625,25 L85.7363281,25 C86.0579427,25 86.3076172,24.9005534 86.4853516,24.7016602 C86.6630859,24.5027669 86.7519531,24.2636719 86.7519531,23.984375 C86.7519531,23.7050781 86.6630859,23.4638672 86.4853516,23.2607422 C86.3076172,23.0576172 86.0579427,22.9560547 85.7363281,22.9560547 L77.3574219,22.9560547 L77.3574219,16.671875 L84.9746094,16.671875 C85.296224,16.671875 85.5480143,16.5724284 85.7299805,16.3735352 C85.9119466,16.1746419 86.0029297,15.9397786 86.0029297,15.6689453 C86.0029297,15.398112 85.9140625,15.1653646 85.7363281,14.9707031 C85.5585938,14.7760417 85.3046875,14.6787109 84.9746094,14.6787109 L77.3574219,14.6787109 L77.3574219,8.90234375 L85.40625,8.90234375 C85.7278646,8.90234375 85.9775391,8.80078125 86.1552734,8.59765625 C86.3330078,8.39453125 86.421875,8.15332031 86.421875,7.87402344 C86.421875,7.59472656 86.3330078,7.35563151 86.1552734,7.15673828 C85.9775391,6.95784505 85.7278646,6.85839844 85.40625,6.85839844 L75.9990234,6.85839844 C75.6604818,6.85839844 75.3748372,6.98323568 75.1420898,7.23291016 C74.9093424,7.48258464 74.7929688,7.77246094 74.7929688,8.10253906 L74.7929688,23.7558594 C74.7929688,24.0859375 74.9220378,24.3758138 75.1801758,24.6254883 C75.4383138,24.8751628 75.7324219,25 76.0625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M90.3603516,25 C90.6819661,25 90.9591471,24.9047852 91.1918945,24.7143555 C91.4246419,24.5239258 91.5410156,24.2594401 91.5410156,23.9208984 L91.5410156,17.7255859 C91.5410156,16.4560547 91.8710938,15.4679362 92.53125,14.7612305 C93.1914062,14.0545247 94.0716146,13.7011719 95.171875,13.7011719 C97.0169271,13.7011719 97.9394531,14.8691406 97.9394531,17.2050781 L97.9394531,23.9082031 C97.9394531,24.2467448 98.0515951,24.5133464 98.2758789,24.7080078 C98.5001628,24.9026693 98.7646484,25 99.0693359,25 C99.382487,25 99.6533203,24.9026693 99.8818359,24.7080078 C100.110352,24.5133464 100.224609,24.2467448 100.224609,23.9082031 L100.224609,17.7890625 C100.224609,16.4518229 100.567383,15.4361979 101.25293,14.7421875 C101.938477,14.0481771 102.810221,13.7011719 103.868164,13.7011719 C104.756836,13.7011719 105.442383,13.9698893 105.924805,14.5073242 C106.407227,15.0447591 106.648438,15.8382161 106.648438,16.8876953 L106.648438,23.9208984 C106.648438,24.2594401 106.758464,24.5239258 106.978516,24.7143555 C107.198568,24.9047852 107.465169,25 107.77832,25 C108.091471,25 108.364421,24.9047852 108.597168,24.7143555 C108.829915,24.5239258 108.946289,24.2594401 108.946289,23.9208984 L108.946289,16.9765625 C108.946289,16.1471354 108.825684,15.4150391 108.584473,14.7802734 C108.343262,14.1455078 108.008952,13.6398112 107.581543,13.2631836 C107.154134,12.886556 106.669596,12.6051432 106.12793,12.4189453 C105.586263,12.2327474 104.993815,12.1396484 104.350586,12.1396484 C103.233398,12.1396484 102.285482,12.387207 101.506836,12.8823242 C100.72819,13.3774414 100.161133,14.1158854 99.8056641,15.0976562 C99.5432943,14.1497396 99.0439453,13.4197591 98.3076172,12.9077148 C97.5712891,12.3956706 96.6783854,12.1396484 95.6289062,12.1396484 C94.7063802,12.1396484 93.8854167,12.3491211 93.1660156,12.7680664 C92.4466146,13.1870117 91.9049479,13.7858073 91.5410156,14.5644531 L91.5410156,13.3837891 C91.5410156,13.070638 91.4246419,12.8251953 91.1918945,12.6474609 C90.9591471,12.4697266 90.6904297,12.3808594 90.3857422,12.3808594 C90.0810547,12.3808594 89.8123372,12.4760742 89.5795898,12.6665039 C89.3468424,12.8569336 89.2304688,13.1171875 89.2304688,13.4472656 L89.2304688,23.9208984 C89.2304688,24.2594401 89.3404948,24.5239258 89.5605469,24.7143555 C89.780599,24.9047852 90.0472005,25 90.3603516,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M117.912109,23.6542969 C115.288411,23.6542969 113.976562,21.9785156 113.976562,18.6269531 C113.976562,17.983724 114.048503,17.3785807 114.192383,16.8115234 C114.336263,16.2444661 114.554199,15.7239583 114.846191,15.25 C115.138184,14.7760417 115.53597,14.3994141 116.039551,14.1201172 C116.543132,13.8408203 117.129232,13.7011719 117.797852,13.7011719 C118.458008,13.7011719 119.035645,13.8492839 119.530762,14.1455078 C120.025879,14.4417318 120.408854,14.8374023 120.679688,15.3325195 C120.950521,15.8276367 121.15153,16.3544922 121.282715,16.9130859 C121.4139,17.4716797 121.479492,18.0556641 121.479492,18.6650391 C121.479492,19.257487 121.420247,19.8266602 121.301758,20.3725586 C121.183268,20.918457 120.99707,21.4453125 120.743164,21.953125 C120.489258,22.4609375 120.118978,22.8693034 119.632324,23.1782227 C119.145671,23.4871419 118.572266,23.6458333 117.912109,23.6542969 Z M118.318359,25.2285156 C119.198568,25.2285156 119.992025,25.0486654 120.69873,24.6889648 C121.405436,24.3292643 121.978841,23.8426107 122.418945,23.2290039 C122.859049,22.6153971 123.195475,21.9150391 123.428223,21.1279297 C123.66097,20.3408203 123.777344,19.5029297 123.777344,18.6142578 C123.777344,17.7086589 123.650391,16.8644206 123.396484,16.081543 C123.142578,15.2986654 122.780762,14.6131185 122.311035,14.0249023 C121.841309,13.4366862 121.244629,12.9754232 120.520996,12.6411133 C119.797363,12.3068034 118.995443,12.1396484 118.115234,12.1396484 C117.175781,12.1396484 116.344238,12.3427734 115.620605,12.7490234 C114.896973,13.1552734 114.344727,13.7434896 113.963867,14.5136719 L113.963867,7.92480469 C113.963867,7.59472656 113.849609,7.33447266 113.621094,7.14404297 C113.392578,6.95361328 113.121745,6.85839844 112.808594,6.85839844 C112.503906,6.85839844 112.237305,6.95361328 112.008789,7.14404297 C111.780273,7.33447266 111.666016,7.59472656 111.666016,7.92480469 L111.666016,23.8955078 C111.666016,24.2340495 111.780273,24.4985352 112.008789,24.6889648 C112.237305,24.8793945 112.503906,24.9746094 112.808594,24.9746094 C113.189453,24.9746094 113.477214,24.8793945 113.671875,24.6889648 C113.866536,24.4985352 113.963867,24.2382813 113.963867,23.9082031 L113.963867,22.7783203 C114.370117,23.5908203 114.951986,24.2023112 115.709473,24.612793 C116.46696,25.0232747 117.336589,25.2285156 118.318359,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M127.334961,17.7382812 C127.377279,17.2135417 127.489421,16.7163086 127.671387,16.246582 C127.853353,15.7768555 128.098796,15.3494466 128.407715,14.9643555 C128.716634,14.5792643 129.112305,14.2724609 129.594727,14.0439453 C130.077148,13.8154297 130.614583,13.7011719 131.207031,13.7011719 C132.349609,13.7011719 133.217122,14.0777995 133.80957,14.8310547 C134.402018,15.5843099 134.736328,16.5533854 134.8125,17.7382812 L127.334961,17.7382812 Z M131.245117,25.2285156 C133.140951,25.2285156 134.757487,24.6276042 136.094727,23.4257812 C136.331706,23.2141927 136.450195,22.9729818 136.450195,22.7021484 C136.450195,22.4905599 136.380371,22.3064779 136.240723,22.1499023 C136.101074,21.9933268 135.929688,21.9150391 135.726562,21.9150391 C135.557292,21.9150391 135.396484,21.9742839 135.244141,22.0927734 C134.634766,22.5582682 134.029622,22.9264323 133.428711,23.1972656 C132.827799,23.468099 132.150716,23.6035156 131.397461,23.6035156 C130.204102,23.5865885 129.226562,23.2036133 128.464844,22.4545898 C127.703125,21.7055664 127.313802,20.6031901 127.296875,19.1474609 L136.005859,19.1474609 C136.276693,19.1474609 136.479818,19.0670573 136.615234,18.90625 C136.750651,18.7454427 136.818359,18.5423177 136.818359,18.296875 C136.792969,17.4420573 136.670247,16.6570638 136.450195,15.9418945 C136.230143,15.2267253 135.900065,14.5792643 135.459961,13.9995117 C135.019857,13.4197591 134.427409,12.9648438 133.682617,12.6347656 C132.937826,12.3046875 132.070312,12.1396484 131.080078,12.1396484 C129.827474,12.1396484 128.731445,12.4443359 127.791992,13.0537109 C126.852539,13.6630859 126.150065,14.4544271 125.68457,15.4277344 C125.219076,16.4010417 124.986328,17.4759115 124.986328,18.6523437 C124.986328,19.9895833 125.263509,21.159668 125.817871,22.1625977 C126.372233,23.1655273 127.119141,23.9251302 128.058594,24.4414063 C128.998047,24.9576823 130.060221,25.2200521 131.245117,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M143.981445,23.6542969 C143.321289,23.6458333 142.747884,23.4871419 142.26123,23.1782227 C141.774577,22.8693034 141.404297,22.4609375 141.150391,21.953125 C140.896484,21.4453125 140.710286,20.918457 140.591797,20.3725586 C140.473307,19.8266602 140.414062,19.257487 140.414062,18.6650391 C140.414062,18.0556641 140.479655,17.4716797 140.61084,16.9130859 C140.742025,16.3544922 140.943034,15.8276367 141.213867,15.3325195 C141.484701,14.8374023 141.867676,14.4417318 142.362793,14.1455078 C142.85791,13.8492839 143.435547,13.7011719 144.095703,13.7011719 C144.764323,13.7011719 145.350423,13.8408203 145.854004,14.1201172 C146.357585,14.3994141 146.755371,14.7760417 147.047363,15.25 C147.339355,15.7239583 147.557292,16.2444661 147.701172,16.8115234 C147.845052,17.3785807 147.916992,17.983724 147.916992,18.6269531 C147.916992,21.9785156 146.605143,23.6542969 143.981445,23.6542969 Z M143.575195,25.2285156 C144.556966,25.2285156 145.426595,25.0232747 146.184082,24.612793 C146.941569,24.2023112 147.523438,23.5908203 147.929688,22.7783203 L147.929688,23.9082031 C147.929688,24.2382813 148.027018,24.4985352 148.22168,24.6889648 C148.416341,24.8793945 148.704102,24.9746094 149.084961,24.9746094 C149.389648,24.9746094 149.65625,24.8793945 149.884766,24.6889648 C150.113281,24.4985352 150.227539,24.2340495 150.227539,23.8955078 L150.227539,7.92480469 C150.227539,7.59472656 150.113281,7.33447266 149.884766,7.14404297 C149.65625,6.95361328 149.389648,6.85839844 149.084961,6.85839844 C148.77181,6.85839844 148.500977,6.95361328 148.272461,7.14404297 C148.043945,7.33447266 147.929688,7.59472656 147.929688,7.92480469 L147.929688,14.5136719 C147.548828,13.7434896 146.996582,13.1552734 146.272949,12.7490234 C145.549316,12.3427734 144.717773,12.1396484 143.77832,12.1396484 C142.898112,12.1396484 142.096191,12.3068034 141.372559,12.6411133 C140.648926,12.9754232 140.052246,13.4366862 139.58252,14.0249023 C139.112793,14.6131185 138.750977,15.2986654 138.49707,16.081543 C138.243164,16.8644206 138.116211,17.7086589 138.116211,18.6142578 C138.116211,19.5029297 138.232585,20.3408203 138.465332,21.1279297 C138.698079,21.9150391 139.034505,22.6153971 139.474609,23.2290039 C139.914714,23.8426107 140.488118,24.3292643 141.194824,24.6889648 C141.90153,25.0486654 142.694987,25.2285156 143.575195,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M158.088867,23.6542969 C157.428711,23.6458333 156.855306,23.4871419 156.368652,23.1782227 C155.881999,22.8693034 155.511719,22.4609375 155.257812,21.953125 C155.003906,21.4453125 154.817708,20.918457 154.699219,20.3725586 C154.580729,19.8266602 154.521484,19.257487 154.521484,18.6650391 C154.521484,18.0556641 154.587077,17.4716797 154.718262,16.9130859 C154.849447,16.3544922 155.050456,15.8276367 155.321289,15.3325195 C155.592122,14.8374023 155.975098,14.4417318 156.470215,14.1455078 C156.965332,13.8492839 157.542969,13.7011719 158.203125,13.7011719 C158.871745,13.7011719 159.457845,13.8408203 159.961426,14.1201172 C160.465007,14.3994141 160.862793,14.7760417 161.154785,15.25 C161.446777,15.7239583 161.664714,16.2444661 161.808594,16.8115234 C161.952474,17.3785807 162.024414,17.983724 162.024414,18.6269531 C162.024414,21.9785156 160.712565,23.6542969 158.088867,23.6542969 Z M157.682617,25.2285156 C158.664388,25.2285156 159.534017,25.0232747 160.291504,24.612793 C161.048991,24.2023112 161.630859,23.5908203 162.037109,22.7783203 L162.037109,23.9082031 C162.037109,24.2382813 162.13444,24.4985352 162.329102,24.6889648 C162.523763,24.8793945 162.811523,24.9746094 163.192383,24.9746094 C163.49707,24.9746094 163.763672,24.8793945 163.992188,24.6889648 C164.220703,24.4985352 164.334961,24.2340495 164.334961,23.8955078 L164.334961,7.92480469 C164.334961,7.59472656 164.220703,7.33447266 163.992188,7.14404297 C163.763672,6.95361328 163.49707,6.85839844 163.192383,6.85839844 C162.879232,6.85839844 162.608398,6.95361328 162.379883,7.14404297 C162.151367,7.33447266 162.037109,7.59472656 162.037109,7.92480469 L162.037109,14.5136719 C161.65625,13.7434896 161.104004,13.1552734 160.380371,12.7490234 C159.656738,12.3427734 158.825195,12.1396484 157.885742,12.1396484 C157.005534,12.1396484 156.203613,12.3068034 155.47998,12.6411133 C154.756348,12.9754232 154.159668,13.4366862 153.689941,14.0249023 C153.220215,14.6131185 152.858398,15.2986654 152.604492,16.081543 C152.350586,16.8644206 152.223633,17.7086589 152.223633,18.6142578 C152.223633,19.5029297 152.340007,20.3408203 152.572754,21.1279297 C152.805501,21.9150391 153.141927,22.6153971 153.582031,23.2290039 C154.022135,23.8426107 154.59554,24.3292643 155.302246,24.6889648 C156.008952,25.0486654 156.802409,25.2285156 157.682617,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M168.476562,9.44824219 C168.92513,9.44824219 169.284831,9.31917318 169.555664,9.06103516 C169.826497,8.80289714 169.961914,8.46647135 169.961914,8.05175781 C169.961914,7.62858073 169.826497,7.28792318 169.555664,7.02978516 C169.284831,6.77164714 168.929362,6.64257812 168.489258,6.64257812 C168.04069,6.64257812 167.678874,6.77376302 167.403809,7.03613281 C167.128743,7.2985026 166.991211,7.63704427 166.991211,8.05175781 C166.991211,8.46647135 167.126628,8.80289714 167.397461,9.06103516 C167.668294,9.31917318 168.027995,9.44824219 168.476562,9.44824219 Z M168.463867,25 C168.777018,25 169.049967,24.8963216 169.282715,24.6889648 C169.515462,24.4816081 169.631836,24.2001953 169.631836,23.8447266 L169.631836,13.5234375 C169.631836,13.1679688 169.519694,12.8886719 169.29541,12.6855469 C169.071126,12.4824219 168.806641,12.3808594 168.501953,12.3808594 C168.188802,12.3808594 167.915853,12.4824219 167.683105,12.6855469 C167.450358,12.8886719 167.333984,13.1679688 167.333984,13.5234375 L167.333984,23.8447266 C167.333984,24.2171224 167.44401,24.5027669 167.664062,24.7016602 C167.884115,24.9005534 168.150716,25 168.463867,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M173.557617,25 C173.870768,25 174.143717,24.9026693 174.376465,24.7080078 C174.609212,24.5133464 174.725586,24.2467448 174.725586,23.9082031 L174.725586,17.5732422 C174.784831,16.3798828 175.148763,15.4361979 175.817383,14.7421875 C176.486003,14.0481771 177.306966,13.7011719 178.280273,13.7011719 C179.194336,13.7011719 179.903158,13.9847005 180.406738,14.5517578 C180.910319,15.1188151 181.162109,15.9440104 181.162109,17.0273438 L181.162109,23.9082031 C181.162109,24.2552083 181.274251,24.5239258 181.498535,24.7143555 C181.722819,24.9047852 181.991536,25 182.304688,25 C182.617839,25 182.886556,24.9047852 183.11084,24.7143555 C183.335124,24.5239258 183.447266,24.2552083 183.447266,23.9082031 L183.447266,17.0527344 C183.447266,15.4023437 183.032552,14.1708984 182.203125,13.3583984 C181.373698,12.5458984 180.260742,12.1396484 178.864258,12.1396484 C177.890951,12.1396484 177.038249,12.3470052 176.306152,12.7617188 C175.574056,13.1764323 175.047201,13.7688802 174.725586,14.5390625 L174.725586,13.4345703 C174.725586,13.1044922 174.613444,12.8484701 174.38916,12.6665039 C174.164876,12.4845378 173.896159,12.3935547 173.583008,12.3935547 C173.269857,12.3935547 172.999023,12.4887695 172.770508,12.6791992 C172.541992,12.8696289 172.427734,13.1341146 172.427734,13.4726563 L172.427734,23.9082031 C172.427734,24.2552083 172.53776,24.5239258 172.757812,24.7143555 C172.977865,24.9047852 173.244466,25 173.557617,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M191.460938,23.4765625 C190.34375,23.4765625 189.421224,23.0703125 188.693359,22.2578125 C187.965495,21.4453125 187.601562,20.2519531 187.601562,18.6777344 C187.601562,17.7382812 187.732747,16.9046224 187.995117,16.1767578 C188.257487,15.4488932 188.684896,14.8543294 189.277344,14.3930664 C189.869792,13.9318034 190.597656,13.7011719 191.460938,13.7011719 C192.002604,13.7011719 192.489258,13.7921549 192.920898,13.9741211 C193.352539,14.1560872 193.705892,14.4015299 193.980957,14.7104492 C194.256022,15.0193685 194.484538,15.3875326 194.666504,15.8149414 C194.84847,16.2423503 194.977539,16.6866862 195.053711,17.1479492 C195.129883,17.6092122 195.167969,18.1022135 195.167969,18.6269531 C195.167969,20.1588542 194.820964,21.3500977 194.126953,22.2006836 C193.432943,23.0512695 192.544271,23.4765625 191.460938,23.4765625 Z M191.359375,30.3701172 C193.229818,30.3701172 194.70459,29.9130859 195.783691,28.9990234 C196.862793,28.0849609 197.402344,26.7265625 197.402344,24.9238281 L197.402344,13.4599609 C197.402344,13.1214193 197.296549,12.8569336 197.084961,12.6665039 C196.873372,12.4760742 196.619466,12.3808594 196.323242,12.3808594 C196.052409,12.3808594 195.813314,12.4633789 195.605957,12.628418 C195.3986,12.793457 195.282227,13.0198568 195.256836,13.3076172 L195.256836,14.5898437 C194.418945,12.9563802 193.013997,12.1396484 191.041992,12.1396484 C189.848633,12.1396484 188.811849,12.4401042 187.931641,13.0410156 C187.051432,13.6419271 186.393392,14.4353841 185.95752,15.4213867 C185.521647,16.4073893 185.303711,17.5182292 185.303711,18.7539063 C185.303711,19.625651 185.43278,20.438151 185.690918,21.1914062 C185.949056,21.9446615 186.31722,22.6048177 186.79541,23.171875 C187.2736,23.7389323 187.882975,24.1853841 188.623535,24.5112305 C189.364095,24.8370768 190.191406,25 191.105469,25 C192.078776,25 192.914551,24.7736003 193.612793,24.3208008 C194.311035,23.8680013 194.833659,23.2565104 195.180664,22.4863281 L195.180664,25 C195.180664,26.21875 194.846354,27.1518555 194.177734,27.7993164 C193.509115,28.4467773 192.51888,28.7705078 191.207031,28.7705078 C191.003906,28.7705078 190.802897,28.7620443 190.604004,28.7451172 C190.405111,28.7281901 190.227376,28.7091471 190.070801,28.6879883 C189.914225,28.6668294 189.740723,28.6350911 189.550293,28.5927734 C189.359863,28.5504557 189.20752,28.5166016 189.093262,28.4912109 C188.979004,28.4658203 188.830892,28.4235026 188.648926,28.3642578 C188.46696,28.305013 188.342122,28.2626953 188.274414,28.2373047 C188.206706,28.2119141 188.0861,28.1653646 187.912598,28.0976563 C187.739095,28.0299479 187.635417,27.991862 187.601562,27.9833984 C187.508464,27.9495443 187.411133,27.9326172 187.30957,27.9326172 C187.097982,27.9326172 186.918132,28.0109049 186.77002,28.1674805 C186.621908,28.324056 186.547852,28.508138 186.547852,28.7197266 C186.547852,29.0328776 186.708659,29.2783203 187.030273,29.4560547 C188.172852,30.0654297 189.615885,30.3701172 191.359375,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M214.064453,25.2792969 C214.834635,25.2792969 215.568848,25.1946615 216.26709,25.0253906 C216.965332,24.8561198 217.619141,24.59375 218.228516,24.2382813 C218.837891,23.8828125 219.364746,23.449056 219.809082,22.9370117 C220.253418,22.4249674 220.604655,21.8071289 220.862793,21.0834961 C221.120931,20.3598633 221.25,19.5621745 221.25,18.6904297 L221.25,8.06445312 C221.25,7.67513021 221.125163,7.37044271 220.875488,7.15039063 C220.625814,6.93033854 220.327474,6.8203125 219.980469,6.8203125 C219.625,6.8203125 219.320312,6.93033854 219.066406,7.15039063 C218.8125,7.37044271 218.685547,7.67513021 218.685547,8.06445312 L218.685547,18.6269531 C218.685547,19.3886719 218.554362,20.069987 218.291992,20.6708984 C218.029622,21.2718099 217.678385,21.7542318 217.238281,22.1181641 C216.798177,22.4820964 216.307292,22.7571615 215.765625,22.9433594 C215.223958,23.1295573 214.656901,23.2226562 214.064453,23.2226562 C213.472005,23.2226562 212.907064,23.1295573 212.369629,22.9433594 C211.832194,22.7571615 211.34554,22.4820964 210.909668,22.1181641 C210.473796,21.7542318 210.124674,21.2718099 209.862305,20.6708984 C209.599935,20.069987 209.46875,19.3886719 209.46875,18.6269531 L209.46875,8.06445312 C209.46875,7.67513021 209.341797,7.37044271 209.087891,7.15039063 C208.833984,6.93033854 208.533529,6.8203125 208.186523,6.8203125 C207.831055,6.8203125 207.526367,6.93033854 207.272461,7.15039063 C207.018555,7.37044271 206.891602,7.67513021 206.891602,8.06445312 L206.891602,18.6904297 C206.891602,19.7652995 207.086263,20.7301432 207.475586,21.5849609 C207.864909,22.4397786 208.39388,23.1316732 209.0625,23.6606445 C209.73112,24.1896159 210.490723,24.5916341 211.341309,24.8666992 C212.191895,25.1417643 213.099609,25.2792969 214.064453,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M224.401367,23.8574219 C224.90918,24.2298177 225.588379,24.5535482 226.438965,24.8286133 C227.289551,25.1036784 228.167643,25.2412109 229.073242,25.2412109 C230.046549,25.2412109 230.901367,25.1079102 231.637695,24.8413086 C232.374023,24.574707 232.968587,24.1451823 233.421387,23.5527344 C233.874186,22.9602865 234.100586,22.2324219 234.100586,21.3691406 C234.100586,20.4550781 233.827637,19.7039388 233.281738,19.1157227 C232.73584,18.5275065 231.823893,18.0768229 230.545898,17.7636719 L228.349609,17.2177734 C227.427083,16.9892578 226.826172,16.7713216 226.546875,16.5639648 C226.267578,16.3566081 226.12793,15.9990234 226.12793,15.4912109 C226.12793,14.8649089 226.398763,14.4057617 226.94043,14.1137695 C227.482096,13.8217773 228.209961,13.6757813 229.124023,13.6757813 C229.411784,13.6757813 229.695312,13.6948242 229.974609,13.7329102 C230.253906,13.7709961 230.528971,13.8260091 230.799805,13.8979492 C231.070638,13.9698893 231.284342,14.03125 231.440918,14.0820313 C231.597493,14.1328125 231.800618,14.2068685 232.050293,14.3041992 C232.299967,14.4015299 232.4375,14.4544271 232.462891,14.4628906 C232.598307,14.5136719 232.729492,14.5390625 232.856445,14.5390625 C233.076497,14.5390625 233.252116,14.4692383 233.383301,14.3295898 C233.514486,14.1899414 233.580078,14.0227865 233.580078,13.828125 C233.580078,13.4980469 233.415039,13.2483724 233.084961,13.0791016 C232.62793,12.8336589 232.037598,12.6114909 231.313965,12.4125977 C230.590332,12.2137044 229.818034,12.1142578 228.99707,12.1142578 C228.311523,12.1142578 227.676758,12.1798503 227.092773,12.3110352 C226.508789,12.4422201 225.981934,12.6411133 225.512207,12.9077148 C225.04248,13.1743164 224.672201,13.5361328 224.401367,13.9931641 C224.130534,14.4501953 223.995117,14.9791667 223.995117,15.5800781 C223.995117,15.9609375 224.033203,16.2994792 224.109375,16.5957031 C224.185547,16.8919271 224.308268,17.1500651 224.477539,17.3701172 C224.64681,17.5901693 224.828776,17.7784831 225.023438,17.9350586 C225.218099,18.0916341 225.478353,18.2376302 225.804199,18.3730469 C226.130046,18.5084635 226.434733,18.6184896 226.718262,18.703125 C227.00179,18.7877604 227.37207,18.8893229 227.829102,19.0078125 L230.076172,19.5664062 C230.820964,19.7526042 231.354167,19.9980469 231.675781,20.3027344 C231.997396,20.6074219 232.158203,21.0136719 232.158203,21.5214844 C232.158203,22.2324219 231.872559,22.7719727 231.30127,23.1401367 C230.72998,23.5083008 229.978841,23.6923828 229.047852,23.6923828 C227.75293,23.6839193 226.525716,23.319987 225.366211,22.6005859 C225.180013,22.4820964 224.985352,22.4228516 224.782227,22.4228516 C224.562174,22.4228516 224.380208,22.4969076 224.236328,22.6450195 C224.092448,22.7931315 224.020508,22.96875 224.020508,23.171875 C224.020508,23.4511719 224.147461,23.6796875 224.401367,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M239.917969,23.7050781 C239.130859,23.7050781 238.508789,23.5252279 238.051758,23.1655273 C237.594727,22.8058268 237.366211,22.2408854 237.366211,21.4707031 C237.366211,21.047526 237.427572,20.7005208 237.550293,20.4296875 C237.673014,20.1588542 237.861328,19.9282227 238.115234,19.737793 C238.369141,19.5473633 238.75,19.4034831 239.257812,19.3061523 C239.765625,19.2088216 240.349609,19.1411133 241.009766,19.1030273 C241.669922,19.0649414 242.52474,19.0458984 243.574219,19.0458984 L243.574219,19.4267578 C243.574219,20.6708984 243.208171,21.694987 242.476074,22.4990234 C241.743978,23.3030599 240.891276,23.7050781 239.917969,23.7050781 Z M239.676758,25.2285156 C241.496419,25.2285156 242.799805,24.4033203 243.586914,22.7529297 L243.586914,23.9716797 C243.586914,24.3017578 243.694824,24.5577799 243.910645,24.7397461 C244.126465,24.9217122 244.386719,25.0126953 244.691406,25.0126953 C244.996094,25.0126953 245.264811,24.9174805 245.497559,24.7270508 C245.730306,24.5366211 245.84668,24.2763672 245.84668,23.9462891 L245.84668,16.5449219 C245.84668,15.046875 245.408691,13.938151 244.532715,13.21875 C243.656738,12.499349 242.444336,12.1396484 240.895508,12.1396484 C239.08431,12.1396484 237.522786,12.5078125 236.210938,13.2441406 C235.973958,13.3795573 235.855469,13.5742187 235.855469,13.828125 C235.855469,14.0481771 235.935872,14.2491862 236.09668,14.4311523 C236.257487,14.6131185 236.447917,14.7041016 236.667969,14.7041016 C236.786458,14.7041016 236.888021,14.6829427 236.972656,14.640625 C237.404297,14.4459635 237.768229,14.2936198 238.064453,14.1835937 C238.360677,14.0735677 238.766927,13.9635417 239.283203,13.8535156 C239.799479,13.7434896 240.315755,13.6884766 240.832031,13.6884766 C241.695312,13.6884766 242.368164,13.9042969 242.850586,14.3359375 C243.333008,14.7675781 243.574219,15.4361979 243.574219,16.3417969 L243.574219,17.7255859 C242.634766,17.7255859 241.832845,17.7382812 241.168457,17.7636719 C240.504069,17.7890625 239.858724,17.8334961 239.232422,17.8969727 C238.60612,17.9604492 238.089844,18.0535482 237.683594,18.1762695 C237.277344,18.2989909 236.904948,18.4555664 236.566406,18.6459961 C236.227865,18.8364258 235.965495,19.0691732 235.779297,19.3442383 C235.593099,19.6193034 235.449219,19.9388021 235.347656,20.3027344 C235.246094,20.6666667 235.195312,21.0898438 235.195312,21.5722656 C235.195312,22.7402344 235.610026,23.6416016 236.439453,24.2763672 C237.26888,24.9111328 238.347982,25.2285156 239.676758,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M253.860352,23.4765625 C252.743164,23.4765625 251.820638,23.0703125 251.092773,22.2578125 C250.364909,21.4453125 250.000977,20.2519531 250.000977,18.6777344 C250.000977,17.7382812 250.132161,16.9046224 250.394531,16.1767578 C250.656901,15.4488932 251.08431,14.8543294 251.676758,14.3930664 C252.269206,13.9318034 252.99707,13.7011719 253.860352,13.7011719 C254.402018,13.7011719 254.888672,13.7921549 255.320312,13.9741211 C255.751953,14.1560872 256.105306,14.4015299 256.380371,14.7104492 C256.655436,15.0193685 256.883952,15.3875326 257.065918,15.8149414 C257.247884,16.2423503 257.376953,16.6866862 257.453125,17.1479492 C257.529297,17.6092122 257.567383,18.1022135 257.567383,18.6269531 C257.567383,20.1588542 257.220378,21.3500977 256.526367,22.2006836 C255.832357,23.0512695 254.943685,23.4765625 253.860352,23.4765625 Z M253.758789,30.3701172 C255.629232,30.3701172 257.104004,29.9130859 258.183105,28.9990234 C259.262207,28.0849609 259.801758,26.7265625 259.801758,24.9238281 L259.801758,13.4599609 C259.801758,13.1214193 259.695964,12.8569336 259.484375,12.6665039 C259.272786,12.4760742 259.01888,12.3808594 258.722656,12.3808594 C258.451823,12.3808594 258.212728,12.4633789 258.005371,12.628418 C257.798014,12.793457 257.681641,13.0198568 257.65625,13.3076172 L257.65625,14.5898437 C256.818359,12.9563802 255.413411,12.1396484 253.441406,12.1396484 C252.248047,12.1396484 251.211263,12.4401042 250.331055,13.0410156 C249.450846,13.6419271 248.792806,14.4353841 248.356934,15.4213867 C247.921061,16.4073893 247.703125,17.5182292 247.703125,18.7539063 C247.703125,19.625651 247.832194,20.438151 248.090332,21.1914062 C248.34847,21.9446615 248.716634,22.6048177 249.194824,23.171875 C249.673014,23.7389323 250.282389,24.1853841 251.022949,24.5112305 C251.763509,24.8370768 252.59082,25 253.504883,25 C254.47819,25 255.313965,24.7736003 256.012207,24.3208008 C256.710449,23.8680013 257.233073,23.2565104 257.580078,22.4863281 L257.580078,25 C257.580078,26.21875 257.245768,27.1518555 256.577148,27.7993164 C255.908529,28.4467773 254.918294,28.7705078 253.606445,28.7705078 C253.40332,28.7705078 253.202311,28.7620443 253.003418,28.7451172 C252.804525,28.7281901 252.62679,28.7091471 252.470215,28.6879883 C252.313639,28.6668294 252.140137,28.6350911 251.949707,28.5927734 C251.759277,28.5504557 251.606934,28.5166016 251.492676,28.4912109 C251.378418,28.4658203 251.230306,28.4235026 251.04834,28.3642578 C250.866374,28.305013 250.741536,28.2626953 250.673828,28.2373047 C250.60612,28.2119141 250.485514,28.1653646 250.312012,28.0976563 C250.138509,28.0299479 250.034831,27.991862 250.000977,27.9833984 C249.907878,27.9495443 249.810547,27.9326172 249.708984,27.9326172 C249.497396,27.9326172 249.317546,28.0109049 249.169434,28.1674805 C249.021322,28.324056 248.947266,28.508138 248.947266,28.7197266 C248.947266,29.0328776 249.108073,29.2783203 249.429688,29.4560547 C250.572266,30.0654297 252.015299,30.3701172 253.758789,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M264.083008,17.7382812 C264.125326,17.2135417 264.237467,16.7163086 264.419434,16.246582 C264.6014,15.7768555 264.846842,15.3494466 265.155762,14.9643555 C265.464681,14.5792643 265.860352,14.2724609 266.342773,14.0439453 C266.825195,13.8154297 267.36263,13.7011719 267.955078,13.7011719 C269.097656,13.7011719 269.965169,14.0777995 270.557617,14.8310547 C271.150065,15.5843099 271.484375,16.5533854 271.560547,17.7382812 L264.083008,17.7382812 Z M267.993164,25.2285156 C269.888997,25.2285156 271.505534,24.6276042 272.842773,23.4257812 C273.079753,23.2141927 273.198242,22.9729818 273.198242,22.7021484 C273.198242,22.4905599 273.128418,22.3064779 272.98877,22.1499023 C272.849121,21.9933268 272.677734,21.9150391 272.474609,21.9150391 C272.305339,21.9150391 272.144531,21.9742839 271.992188,22.0927734 C271.382812,22.5582682 270.777669,22.9264323 270.176758,23.1972656 C269.575846,23.468099 268.898763,23.6035156 268.145508,23.6035156 C266.952148,23.5865885 265.974609,23.2036133 265.212891,22.4545898 C264.451172,21.7055664 264.061849,20.6031901 264.044922,19.1474609 L272.753906,19.1474609 C273.02474,19.1474609 273.227865,19.0670573 273.363281,18.90625 C273.498698,18.7454427 273.566406,18.5423177 273.566406,18.296875 C273.541016,17.4420573 273.418294,16.6570638 273.198242,15.9418945 C272.97819,15.2267253 272.648112,14.5792643 272.208008,13.9995117 C271.767904,13.4197591 271.175456,12.9648438 270.430664,12.6347656 C269.685872,12.3046875 268.818359,12.1396484 267.828125,12.1396484 C266.575521,12.1396484 265.479492,12.4443359 264.540039,13.0537109 C263.600586,13.6630859 262.898112,14.4544271 262.432617,15.4277344 C261.967122,16.4010417 261.734375,17.4759115 261.734375,18.6523437 C261.734375,19.9895833 262.011556,21.159668 262.565918,22.1625977 C263.12028,23.1655273 263.867188,23.9251302 264.806641,24.4414063 C265.746094,24.9576823 266.808268,25.2200521 267.993164,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M275.638672,23.8574219 C276.146484,24.2298177 276.825684,24.5535482 277.67627,24.8286133 C278.526855,25.1036784 279.404948,25.2412109 280.310547,25.2412109 C281.283854,25.2412109 282.138672,25.1079102 282.875,24.8413086 C283.611328,24.574707 284.205892,24.1451823 284.658691,23.5527344 C285.111491,22.9602865 285.337891,22.2324219 285.337891,21.3691406 C285.337891,20.4550781 285.064941,19.7039388 284.519043,19.1157227 C283.973145,18.5275065 283.061198,18.0768229 281.783203,17.7636719 L279.586914,17.2177734 C278.664388,16.9892578 278.063477,16.7713216 277.78418,16.5639648 C277.504883,16.3566081 277.365234,15.9990234 277.365234,15.4912109 C277.365234,14.8649089 277.636068,14.4057617 278.177734,14.1137695 C278.719401,13.8217773 279.447266,13.6757813 280.361328,13.6757813 C280.649089,13.6757813 280.932617,13.6948242 281.211914,13.7329102 C281.491211,13.7709961 281.766276,13.8260091 282.037109,13.8979492 C282.307943,13.9698893 282.521647,14.03125 282.678223,14.0820313 C282.834798,14.1328125 283.037923,14.2068685 283.287598,14.3041992 C283.537272,14.4015299 283.674805,14.4544271 283.700195,14.4628906 C283.835612,14.5136719 283.966797,14.5390625 284.09375,14.5390625 C284.313802,14.5390625 284.489421,14.4692383 284.620605,14.3295898 C284.75179,14.1899414 284.817383,14.0227865 284.817383,13.828125 C284.817383,13.4980469 284.652344,13.2483724 284.322266,13.0791016 C283.865234,12.8336589 283.274902,12.6114909 282.55127,12.4125977 C281.827637,12.2137044 281.055339,12.1142578 280.234375,12.1142578 C279.548828,12.1142578 278.914062,12.1798503 278.330078,12.3110352 C277.746094,12.4422201 277.219238,12.6411133 276.749512,12.9077148 C276.279785,13.1743164 275.909505,13.5361328 275.638672,13.9931641 C275.367839,14.4501953 275.232422,14.9791667 275.232422,15.5800781 C275.232422,15.9609375 275.270508,16.2994792 275.34668,16.5957031 C275.422852,16.8919271 275.545573,17.1500651 275.714844,17.3701172 C275.884115,17.5901693 276.066081,17.7784831 276.260742,17.9350586 C276.455404,18.0916341 276.715658,18.2376302 277.041504,18.3730469 C277.36735,18.5084635 277.672038,18.6184896 277.955566,18.703125 C278.239095,18.7877604 278.609375,18.8893229 279.066406,19.0078125 L281.313477,19.5664062 C282.058268,19.7526042 282.591471,19.9980469 282.913086,20.3027344 C283.234701,20.6074219 283.395508,21.0136719 283.395508,21.5214844 C283.395508,22.2324219 283.109863,22.7719727 282.538574,23.1401367 C281.967285,23.5083008 281.216146,23.6923828 280.285156,23.6923828 C278.990234,23.6839193 277.763021,23.319987 276.603516,22.6005859 C276.417318,22.4820964 276.222656,22.4228516 276.019531,22.4228516 C275.799479,22.4228516 275.617513,22.4969076 275.473633,22.6450195 C275.329753,22.7931315 275.257812,22.96875 275.257812,23.171875 C275.257812,23.4511719 275.384766,23.6796875 275.638672,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-13" transform="translate(0, 172)" xlink:href="#path-26" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M4.81152344,14.7294922 L4.81152344,8.88964844 L8.84863281,8.88964844 C9.2125651,8.88964844 9.52571615,8.89811198 9.78808594,8.91503906 C10.0504557,8.93196615 10.3551432,8.9679362 10.7021484,9.02294922 C11.0491536,9.07796224 11.3432617,9.16682943 11.5844727,9.28955078 C11.8256836,9.41227214 12.0563151,9.57096354 12.2763672,9.765625 C12.4964193,9.96028646 12.6635742,10.2184245 12.777832,10.5400391 C12.8920898,10.8616536 12.9492188,11.2340495 12.9492188,11.6572266 C12.9492188,12.1735026 12.8645833,12.6199544 12.6953125,12.996582 C12.5260417,13.3732096 12.3081055,13.6715495 12.0415039,13.8916016 C11.7749023,14.1116536 11.4384766,14.2851562 11.0322266,14.4121094 C10.6259766,14.5390625 10.2281901,14.6236979 9.83886719,14.6660156 C9.44954427,14.7083333 9.00520833,14.7294922 8.50585938,14.7294922 L4.81152344,14.7294922 Z M3.51660156,24.9873047 C3.86360677,24.9873047 4.16617839,24.8815104 4.42431641,24.6699219 C4.68245443,24.4583333 4.81152344,24.1578776 4.81152344,23.7685547 L4.81152344,16.6972656 L9.05175781,16.6972656 C9.72884115,16.6972656 10.266276,16.745931 10.6640625,16.8432617 C11.061849,16.9405924 11.394043,17.133138 11.6606445,17.4208984 C11.9272461,17.7086589 12.1197917,18.0683594 12.2382813,18.5 C12.3567708,18.9316406 12.4583333,19.5325521 12.5429688,20.3027344 L12.9365234,23.8447266 C12.9703776,24.2086589 13.1248372,24.4900716 13.3999023,24.6889648 C13.6749674,24.8878581 13.9817708,24.9873047 14.3203125,24.9873047 C14.6503906,24.9873047 14.9339193,24.8815104 15.1708984,24.6699219 C15.4078776,24.4583333 15.5263672,24.1832682 15.5263672,23.8447266 C15.5263672,23.7685547 15.5221354,23.7093099 15.5136719,23.6669922 L15.0566406,19.6806641 C14.7942708,17.3955078 13.6686198,16.0371094 11.6796875,15.6054688 L11.6796875,15.5546875 C12.9492187,15.3177083 13.905599,14.8310547 14.5488281,14.0947266 C15.1920573,13.3583984 15.5136719,12.4654948 15.5136719,11.4160156 C15.5136719,10.688151 15.3888346,10.0470378 15.1391602,9.49267578 C14.8894857,8.9383138 14.5657552,8.49820964 14.1679688,8.17236328 C13.7701823,7.84651693 13.2792969,7.58203125 12.6953125,7.37890625 C12.1113281,7.17578125 11.5294596,7.0382487 10.949707,6.96630859 C10.3699544,6.89436849 9.73307292,6.85839844 9.0390625,6.85839844 L3.52929688,6.85839844 C3.17382813,6.85839844 2.87125651,6.97900391 2.62158203,7.22021484 C2.37190755,7.46142578 2.24707031,7.75553385 2.24707031,8.10253906 L2.24707031,23.7685547 C2.24707031,24.1578776 2.37190755,24.4583333 2.62158203,24.6699219 C2.87125651,24.8815104 3.16959635,24.9873047 3.51660156,24.9873047 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M19.8203125,17.7382812 C19.8626302,17.2135417 19.9747721,16.7163086 20.1567383,16.246582 C20.3387044,15.7768555 20.5841471,15.3494466 20.8930664,14.9643555 C21.2019857,14.5792643 21.5976562,14.2724609 22.0800781,14.0439453 C22.5625,13.8154297 23.0999349,13.7011719 23.6923828,13.7011719 C24.8349609,13.7011719 25.702474,14.0777995 26.2949219,14.8310547 C26.8873698,15.5843099 27.2216797,16.5533854 27.2978516,17.7382812 L19.8203125,17.7382812 Z M23.7304688,25.2285156 C25.6263021,25.2285156 27.2428385,24.6276042 28.5800781,23.4257812 C28.8170573,23.2141927 28.9355469,22.9729818 28.9355469,22.7021484 C28.9355469,22.4905599 28.8657227,22.3064779 28.7260742,22.1499023 C28.5864258,21.9933268 28.4150391,21.9150391 28.2119141,21.9150391 C28.0426432,21.9150391 27.8818359,21.9742839 27.7294922,22.0927734 C27.1201172,22.5582682 26.514974,22.9264323 25.9140625,23.1972656 C25.313151,23.468099 24.6360677,23.6035156 23.8828125,23.6035156 C22.6894531,23.5865885 21.7119141,23.2036133 20.9501953,22.4545898 C20.1884766,21.7055664 19.7991536,20.6031901 19.7822266,19.1474609 L28.4912109,19.1474609 C28.7620443,19.1474609 28.9651693,19.0670573 29.1005859,18.90625 C29.2360026,18.7454427 29.3037109,18.5423177 29.3037109,18.296875 C29.2783203,17.4420573 29.155599,16.6570638 28.9355469,15.9418945 C28.7154948,15.2267253 28.3854167,14.5792643 27.9453125,13.9995117 C27.5052083,13.4197591 26.9127604,12.9648438 26.1679688,12.6347656 C25.4231771,12.3046875 24.5556641,12.1396484 23.5654297,12.1396484 C22.3128255,12.1396484 21.2167969,12.4443359 20.2773437,13.0537109 C19.3378906,13.6630859 18.6354167,14.4544271 18.1699219,15.4277344 C17.7044271,16.4010417 17.4716797,17.4759115 17.4716797,18.6523437 C17.4716797,19.9895833 17.7488607,21.159668 18.3032227,22.1625977 C18.8575846,23.1655273 19.6044922,23.9251302 20.5439453,24.4414063 C21.4833984,24.9576823 22.5455729,25.2200521 23.7304688,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M45.2900391,25 C46.0009766,25 46.5003255,24.5768229 46.7880859,23.7304688 L50.2539062,13.8027344 C50.3046875,13.6673177 50.3300781,13.531901 50.3300781,13.3964844 C50.3300781,13.1002604 50.222168,12.8611654 50.0063477,12.6791992 C49.7905273,12.4972331 49.5429687,12.40625 49.2636719,12.40625 C48.7558594,12.40625 48.4215495,12.6559245 48.2607422,13.1552734 L45.1123047,22.5625 L41.9638672,13.6503906 C41.6676432,12.8040365 41.155599,12.3808594 40.4277344,12.3808594 C39.7083333,12.3808594 39.2005208,12.8040365 38.9042969,13.6503906 L35.7558594,22.5625 L32.6074219,13.1552734 C32.4466146,12.6559245 32.1123047,12.40625 31.6044922,12.40625 C31.3251953,12.40625 31.0776367,12.4972331 30.8618164,12.6791992 C30.6459961,12.8611654 30.5380859,13.1002604 30.5380859,13.3964844 C30.5380859,13.531901 30.5634766,13.6673177 30.6142578,13.8027344 L34.0800781,23.7304688 C34.3678385,24.5768229 34.8671875,25 35.578125,25 C36.3229167,25 36.8645833,24.5345052 37.203125,23.6035156 L40.4277344,14.5644531 L43.6650391,23.6035156 C43.9951172,24.5345052 44.5367839,25 45.2900391,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M55.703125,23.7050781 C54.9160156,23.7050781 54.2939453,23.5252279 53.8369141,23.1655273 C53.3798828,22.8058268 53.1513672,22.2408854 53.1513672,21.4707031 C53.1513672,21.047526 53.2127279,20.7005208 53.3354492,20.4296875 C53.4581706,20.1588542 53.6464844,19.9282227 53.9003906,19.737793 C54.1542969,19.5473633 54.5351562,19.4034831 55.0429688,19.3061523 C55.5507812,19.2088216 56.1347656,19.1411133 56.7949219,19.1030273 C57.4550781,19.0649414 58.3098958,19.0458984 59.359375,19.0458984 L59.359375,19.4267578 C59.359375,20.6708984 58.9933268,21.694987 58.2612305,22.4990234 C57.5291341,23.3030599 56.6764323,23.7050781 55.703125,23.7050781 Z M55.4619141,25.2285156 C57.2815755,25.2285156 58.5849609,24.4033203 59.3720703,22.7529297 L59.3720703,23.9716797 C59.3720703,24.3017578 59.4799805,24.5577799 59.6958008,24.7397461 C59.9116211,24.9217122 60.171875,25.0126953 60.4765625,25.0126953 C60.78125,25.0126953 61.0499674,24.9174805 61.2827148,24.7270508 C61.5154622,24.5366211 61.6318359,24.2763672 61.6318359,23.9462891 L61.6318359,16.5449219 C61.6318359,15.046875 61.1938477,13.938151 60.3178711,13.21875 C59.4418945,12.499349 58.2294922,12.1396484 56.6806641,12.1396484 C54.8694661,12.1396484 53.3079427,12.5078125 51.9960938,13.2441406 C51.7591146,13.3795573 51.640625,13.5742187 51.640625,13.828125 C51.640625,14.0481771 51.7210286,14.2491862 51.8818359,14.4311523 C52.0426432,14.6131185 52.2330729,14.7041016 52.453125,14.7041016 C52.5716146,14.7041016 52.6731771,14.6829427 52.7578125,14.640625 C53.1894531,14.4459635 53.5533854,14.2936198 53.8496094,14.1835937 C54.1458333,14.0735677 54.5520833,13.9635417 55.0683594,13.8535156 C55.5846354,13.7434896 56.1009115,13.6884766 56.6171875,13.6884766 C57.4804688,13.6884766 58.1533203,13.9042969 58.6357422,14.3359375 C59.1181641,14.7675781 59.359375,15.4361979 59.359375,16.3417969 L59.359375,17.7255859 C58.4199219,17.7255859 57.6180013,17.7382812 56.9536133,17.7636719 C56.2892253,17.7890625 55.6438802,17.8334961 55.0175781,17.8969727 C54.391276,17.9604492 53.875,18.0535482 53.46875,18.1762695 C53.0625,18.2989909 52.6901042,18.4555664 52.3515625,18.6459961 C52.0130208,18.8364258 51.750651,19.0691732 51.5644531,19.3442383 C51.3782552,19.6193034 51.234375,19.9388021 51.1328125,20.3027344 C51.03125,20.6666667 50.9804688,21.0898438 50.9804688,21.5722656 C50.9804688,22.7402344 51.3951823,23.6416016 52.2246094,24.2763672 C53.0540365,24.9111328 54.133138,25.2285156 55.4619141,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M65.5068359,25 C65.819987,25 66.0929362,24.906901 66.3256836,24.7207031 C66.558431,24.5345052 66.6748047,24.2721354 66.6748047,23.9335938 L66.6748047,17.3955078 C66.6748047,16.4052734 66.9900716,15.6139323 67.6206055,15.0214844 C68.2511393,14.4290365 69.1376953,14.1328125 70.2802734,14.1328125 C70.5511068,14.1328125 70.7584635,14.0418294 70.9023438,13.8598633 C71.046224,13.6778971 71.1181641,13.4599609 71.1181641,13.2060547 C71.1181641,12.9352214 71.0398763,12.6982422 70.8833008,12.4951172 C70.7267253,12.2919922 70.5130208,12.1904297 70.2421875,12.1904297 C69.3450521,12.1904297 68.5769857,12.4549154 67.9379883,12.9838867 C67.2989909,13.5128581 66.8736979,14.1708984 66.6621094,14.9580078 L66.6748047,13.4345703 C66.6748047,13.1129557 66.5626628,12.8611654 66.3383789,12.6791992 C66.1140951,12.4972331 65.8453776,12.40625 65.5322266,12.40625 C65.2190755,12.40625 64.9482422,12.4972331 64.7197266,12.6791992 C64.4912109,12.8611654 64.3769531,13.1214193 64.3769531,13.4599609 L64.3769531,23.9335938 C64.3769531,24.2721354 64.4869792,24.5345052 64.7070312,24.7207031 C64.9270833,24.906901 65.1936849,25 65.5068359,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M77.5957031,23.6542969 C76.9355469,23.6458333 76.3621419,23.4871419 75.8754883,23.1782227 C75.3888346,22.8693034 75.0185547,22.4609375 74.7646484,21.953125 C74.5107422,21.4453125 74.3245443,20.918457 74.2060547,20.3725586 C74.0875651,19.8266602 74.0283203,19.257487 74.0283203,18.6650391 C74.0283203,18.0556641 74.0939128,17.4716797 74.2250977,16.9130859 C74.3562826,16.3544922 74.5572917,15.8276367 74.828125,15.3325195 C75.0989583,14.8374023 75.4819336,14.4417318 75.9770508,14.1455078 C76.472168,13.8492839 77.0498047,13.7011719 77.7099609,13.7011719 C78.3785807,13.7011719 78.964681,13.8408203 79.4682617,14.1201172 C79.9718424,14.3994141 80.3696289,14.7760417 80.6616211,15.25 C80.9536133,15.7239583 81.1715495,16.2444661 81.3154297,16.8115234 C81.4593099,17.3785807 81.53125,17.983724 81.53125,18.6269531 C81.53125,21.9785156 80.219401,23.6542969 77.5957031,23.6542969 Z M77.1894531,25.2285156 C78.171224,25.2285156 79.0408529,25.0232747 79.7983398,24.612793 C80.5558268,24.2023112 81.1376953,23.5908203 81.5439453,22.7783203 L81.5439453,23.9082031 C81.5439453,24.2382813 81.641276,24.4985352 81.8359375,24.6889648 C82.030599,24.8793945 82.3183594,24.9746094 82.6992188,24.9746094 C83.0039062,24.9746094 83.2705078,24.8793945 83.4990234,24.6889648 C83.7275391,24.4985352 83.8417969,24.2340495 83.8417969,23.8955078 L83.8417969,7.92480469 C83.8417969,7.59472656 83.7275391,7.33447266 83.4990234,7.14404297 C83.2705078,6.95361328 83.0039062,6.85839844 82.6992188,6.85839844 C82.3860677,6.85839844 82.1152344,6.95361328 81.8867188,7.14404297 C81.6582031,7.33447266 81.5439453,7.59472656 81.5439453,7.92480469 L81.5439453,14.5136719 C81.1630859,13.7434896 80.6108398,13.1552734 79.887207,12.7490234 C79.1635742,12.3427734 78.3320312,12.1396484 77.3925781,12.1396484 C76.5123698,12.1396484 75.7104492,12.3068034 74.9868164,12.6411133 C74.2631836,12.9754232 73.6665039,13.4366862 73.1967773,14.0249023 C72.7270508,14.6131185 72.3652344,15.2986654 72.1113281,16.081543 C71.8574219,16.8644206 71.7304688,17.7086589 71.7304688,18.6142578 C71.7304688,19.5029297 71.8468424,20.3408203 72.0795898,21.1279297 C72.3123372,21.9150391 72.648763,22.6153971 73.0888672,23.2290039 C73.5289714,23.8426107 74.1023763,24.3292643 74.809082,24.6889648 C75.5157878,25.0486654 76.3092448,25.2285156 77.1894531,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M100.643555,25.2792969 C101.413737,25.2792969 102.147949,25.1946615 102.846191,25.0253906 C103.544434,24.8561198 104.198242,24.59375 104.807617,24.2382813 C105.416992,23.8828125 105.943848,23.449056 106.388184,22.9370117 C106.83252,22.4249674 107.183757,21.8071289 107.441895,21.0834961 C107.700033,20.3598633 107.829102,19.5621745 107.829102,18.6904297 L107.829102,8.06445312 C107.829102,7.67513021 107.704264,7.37044271 107.45459,7.15039063 C107.204915,6.93033854 106.906576,6.8203125 106.55957,6.8203125 C106.204102,6.8203125 105.899414,6.93033854 105.645508,7.15039063 C105.391602,7.37044271 105.264648,7.67513021 105.264648,8.06445312 L105.264648,18.6269531 C105.264648,19.3886719 105.133464,20.069987 104.871094,20.6708984 C104.608724,21.2718099 104.257487,21.7542318 103.817383,22.1181641 C103.377279,22.4820964 102.886393,22.7571615 102.344727,22.9433594 C101.80306,23.1295573 101.236003,23.2226562 100.643555,23.2226562 C100.051107,23.2226562 99.4861654,23.1295573 98.9487305,22.9433594 C98.4112956,22.7571615 97.9246419,22.4820964 97.4887695,22.1181641 C97.0528971,21.7542318 96.703776,21.2718099 96.4414062,20.6708984 C96.1790365,20.069987 96.0478516,19.3886719 96.0478516,18.6269531 L96.0478516,8.06445312 C96.0478516,7.67513021 95.9208984,7.37044271 95.6669922,7.15039063 C95.4130859,6.93033854 95.1126302,6.8203125 94.765625,6.8203125 C94.4101562,6.8203125 94.1054688,6.93033854 93.8515625,7.15039063 C93.5976562,7.37044271 93.4707031,7.67513021 93.4707031,8.06445312 L93.4707031,18.6904297 C93.4707031,19.7652995 93.6653646,20.7301432 94.0546875,21.5849609 C94.4440104,22.4397786 94.9729818,23.1316732 95.6416016,23.6606445 C96.3102214,24.1896159 97.0698242,24.5916341 97.9204102,24.8666992 C98.7709961,25.1417643 99.6787109,25.2792969 100.643555,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M110.980469,23.8574219 C111.488281,24.2298177 112.16748,24.5535482 113.018066,24.8286133 C113.868652,25.1036784 114.746745,25.2412109 115.652344,25.2412109 C116.625651,25.2412109 117.480469,25.1079102 118.216797,24.8413086 C118.953125,24.574707 119.547689,24.1451823 120.000488,23.5527344 C120.453288,22.9602865 120.679688,22.2324219 120.679688,21.3691406 C120.679688,20.4550781 120.406738,19.7039388 119.86084,19.1157227 C119.314941,18.5275065 118.402995,18.0768229 117.125,17.7636719 L114.928711,17.2177734 C114.006185,16.9892578 113.405273,16.7713216 113.125977,16.5639648 C112.84668,16.3566081 112.707031,15.9990234 112.707031,15.4912109 C112.707031,14.8649089 112.977865,14.4057617 113.519531,14.1137695 C114.061198,13.8217773 114.789062,13.6757813 115.703125,13.6757813 C115.990885,13.6757813 116.274414,13.6948242 116.553711,13.7329102 C116.833008,13.7709961 117.108073,13.8260091 117.378906,13.8979492 C117.64974,13.9698893 117.863444,14.03125 118.02002,14.0820313 C118.176595,14.1328125 118.37972,14.2068685 118.629395,14.3041992 C118.879069,14.4015299 119.016602,14.4544271 119.041992,14.4628906 C119.177409,14.5136719 119.308594,14.5390625 119.435547,14.5390625 C119.655599,14.5390625 119.831217,14.4692383 119.962402,14.3295898 C120.093587,14.1899414 120.15918,14.0227865 120.15918,13.828125 C120.15918,13.4980469 119.994141,13.2483724 119.664062,13.0791016 C119.207031,12.8336589 118.616699,12.6114909 117.893066,12.4125977 C117.169434,12.2137044 116.397135,12.1142578 115.576172,12.1142578 C114.890625,12.1142578 114.255859,12.1798503 113.671875,12.3110352 C113.087891,12.4422201 112.561035,12.6411133 112.091309,12.9077148 C111.621582,13.1743164 111.251302,13.5361328 110.980469,13.9931641 C110.709635,14.4501953 110.574219,14.9791667 110.574219,15.5800781 C110.574219,15.9609375 110.612305,16.2994792 110.688477,16.5957031 C110.764648,16.8919271 110.88737,17.1500651 111.056641,17.3701172 C111.225911,17.5901693 111.407878,17.7784831 111.602539,17.9350586 C111.797201,18.0916341 112.057454,18.2376302 112.383301,18.3730469 C112.709147,18.5084635 113.013835,18.6184896 113.297363,18.703125 C113.580892,18.7877604 113.951172,18.8893229 114.408203,19.0078125 L116.655273,19.5664062 C117.400065,19.7526042 117.933268,19.9980469 118.254883,20.3027344 C118.576497,20.6074219 118.737305,21.0136719 118.737305,21.5214844 C118.737305,22.2324219 118.45166,22.7719727 117.880371,23.1401367 C117.309082,23.5083008 116.557943,23.6923828 115.626953,23.6923828 C114.332031,23.6839193 113.104818,23.319987 111.945312,22.6005859 C111.759115,22.4820964 111.564453,22.4228516 111.361328,22.4228516 C111.141276,22.4228516 110.95931,22.4969076 110.81543,22.6450195 C110.671549,22.7931315 110.599609,22.96875 110.599609,23.171875 C110.599609,23.4511719 110.726562,23.6796875 110.980469,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M126.49707,23.7050781 C125.709961,23.7050781 125.087891,23.5252279 124.630859,23.1655273 C124.173828,22.8058268 123.945312,22.2408854 123.945312,21.4707031 C123.945312,21.047526 124.006673,20.7005208 124.129395,20.4296875 C124.252116,20.1588542 124.44043,19.9282227 124.694336,19.737793 C124.948242,19.5473633 125.329102,19.4034831 125.836914,19.3061523 C126.344727,19.2088216 126.928711,19.1411133 127.588867,19.1030273 C128.249023,19.0649414 129.103841,19.0458984 130.15332,19.0458984 L130.15332,19.4267578 C130.15332,20.6708984 129.787272,21.694987 129.055176,22.4990234 C128.323079,23.3030599 127.470378,23.7050781 126.49707,23.7050781 Z M126.255859,25.2285156 C128.075521,25.2285156 129.378906,24.4033203 130.166016,22.7529297 L130.166016,23.9716797 C130.166016,24.3017578 130.273926,24.5577799 130.489746,24.7397461 C130.705566,24.9217122 130.96582,25.0126953 131.270508,25.0126953 C131.575195,25.0126953 131.843913,24.9174805 132.07666,24.7270508 C132.309408,24.5366211 132.425781,24.2763672 132.425781,23.9462891 L132.425781,16.5449219 C132.425781,15.046875 131.987793,13.938151 131.111816,13.21875 C130.23584,12.499349 129.023438,12.1396484 127.474609,12.1396484 C125.663411,12.1396484 124.101888,12.5078125 122.790039,13.2441406 C122.55306,13.3795573 122.43457,13.5742187 122.43457,13.828125 C122.43457,14.0481771 122.514974,14.2491862 122.675781,14.4311523 C122.836589,14.6131185 123.027018,14.7041016 123.24707,14.7041016 C123.36556,14.7041016 123.467122,14.6829427 123.551758,14.640625 C123.983398,14.4459635 124.347331,14.2936198 124.643555,14.1835937 C124.939779,14.0735677 125.346029,13.9635417 125.862305,13.8535156 C126.378581,13.7434896 126.894857,13.6884766 127.411133,13.6884766 C128.274414,13.6884766 128.947266,13.9042969 129.429688,14.3359375 C129.912109,14.7675781 130.15332,15.4361979 130.15332,16.3417969 L130.15332,17.7255859 C129.213867,17.7255859 128.411947,17.7382812 127.747559,17.7636719 C127.083171,17.7890625 126.437826,17.8334961 125.811523,17.8969727 C125.185221,17.9604492 124.668945,18.0535482 124.262695,18.1762695 C123.856445,18.2989909 123.484049,18.4555664 123.145508,18.6459961 C122.806966,18.8364258 122.544596,19.0691732 122.358398,19.3442383 C122.172201,19.6193034 122.02832,19.9388021 121.926758,20.3027344 C121.825195,20.6666667 121.774414,21.0898438 121.774414,21.5722656 C121.774414,22.7402344 122.189128,23.6416016 123.018555,24.2763672 C123.847982,24.9111328 124.927083,25.2285156 126.255859,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M140.439453,23.4765625 C139.322266,23.4765625 138.39974,23.0703125 137.671875,22.2578125 C136.94401,21.4453125 136.580078,20.2519531 136.580078,18.6777344 C136.580078,17.7382812 136.711263,16.9046224 136.973633,16.1767578 C137.236003,15.4488932 137.663411,14.8543294 138.255859,14.3930664 C138.848307,13.9318034 139.576172,13.7011719 140.439453,13.7011719 C140.98112,13.7011719 141.467773,13.7921549 141.899414,13.9741211 C142.331055,14.1560872 142.684408,14.4015299 142.959473,14.7104492 C143.234538,15.0193685 143.463053,15.3875326 143.64502,15.8149414 C143.826986,16.2423503 143.956055,16.6866862 144.032227,17.1479492 C144.108398,17.6092122 144.146484,18.1022135 144.146484,18.6269531 C144.146484,20.1588542 143.799479,21.3500977 143.105469,22.2006836 C142.411458,23.0512695 141.522786,23.4765625 140.439453,23.4765625 Z M140.337891,30.3701172 C142.208333,30.3701172 143.683105,29.9130859 144.762207,28.9990234 C145.841309,28.0849609 146.380859,26.7265625 146.380859,24.9238281 L146.380859,13.4599609 C146.380859,13.1214193 146.275065,12.8569336 146.063477,12.6665039 C145.851888,12.4760742 145.597982,12.3808594 145.301758,12.3808594 C145.030924,12.3808594 144.791829,12.4633789 144.584473,12.628418 C144.377116,12.793457 144.260742,13.0198568 144.235352,13.3076172 L144.235352,14.5898437 C143.397461,12.9563802 141.992513,12.1396484 140.020508,12.1396484 C138.827148,12.1396484 137.790365,12.4401042 136.910156,13.0410156 C136.029948,13.6419271 135.371908,14.4353841 134.936035,15.4213867 C134.500163,16.4073893 134.282227,17.5182292 134.282227,18.7539063 C134.282227,19.625651 134.411296,20.438151 134.669434,21.1914062 C134.927572,21.9446615 135.295736,22.6048177 135.773926,23.171875 C136.252116,23.7389323 136.861491,24.1853841 137.602051,24.5112305 C138.342611,24.8370768 139.169922,25 140.083984,25 C141.057292,25 141.893066,24.7736003 142.591309,24.3208008 C143.289551,23.8680013 143.812174,23.2565104 144.15918,22.4863281 L144.15918,25 C144.15918,26.21875 143.82487,27.1518555 143.15625,27.7993164 C142.48763,28.4467773 141.497396,28.7705078 140.185547,28.7705078 C139.982422,28.7705078 139.781413,28.7620443 139.58252,28.7451172 C139.383626,28.7281901 139.205892,28.7091471 139.049316,28.6879883 C138.892741,28.6668294 138.719238,28.6350911 138.528809,28.5927734 C138.338379,28.5504557 138.186035,28.5166016 138.071777,28.4912109 C137.95752,28.4658203 137.809408,28.4235026 137.627441,28.3642578 C137.445475,28.305013 137.320638,28.2626953 137.25293,28.2373047 C137.185221,28.2119141 137.064616,28.1653646 136.891113,28.0976563 C136.717611,28.0299479 136.613932,27.991862 136.580078,27.9833984 C136.486979,27.9495443 136.389648,27.9326172 136.288086,27.9326172 C136.076497,27.9326172 135.896647,28.0109049 135.748535,28.1674805 C135.600423,28.324056 135.526367,28.508138 135.526367,28.7197266 C135.526367,29.0328776 135.687174,29.2783203 136.008789,29.4560547 C137.151367,30.0654297 138.594401,30.3701172 140.337891,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M150.662109,17.7382812 C150.704427,17.2135417 150.816569,16.7163086 150.998535,16.246582 C151.180501,15.7768555 151.425944,15.3494466 151.734863,14.9643555 C152.043783,14.5792643 152.439453,14.2724609 152.921875,14.0439453 C153.404297,13.8154297 153.941732,13.7011719 154.53418,13.7011719 C155.676758,13.7011719 156.544271,14.0777995 157.136719,14.8310547 C157.729167,15.5843099 158.063477,16.5533854 158.139648,17.7382812 L150.662109,17.7382812 Z M154.572266,25.2285156 C156.468099,25.2285156 158.084635,24.6276042 159.421875,23.4257812 C159.658854,23.2141927 159.777344,22.9729818 159.777344,22.7021484 C159.777344,22.4905599 159.70752,22.3064779 159.567871,22.1499023 C159.428223,21.9933268 159.256836,21.9150391 159.053711,21.9150391 C158.88444,21.9150391 158.723633,21.9742839 158.571289,22.0927734 C157.961914,22.5582682 157.356771,22.9264323 156.755859,23.1972656 C156.154948,23.468099 155.477865,23.6035156 154.724609,23.6035156 C153.53125,23.5865885 152.553711,23.2036133 151.791992,22.4545898 C151.030273,21.7055664 150.640951,20.6031901 150.624023,19.1474609 L159.333008,19.1474609 C159.603841,19.1474609 159.806966,19.0670573 159.942383,18.90625 C160.077799,18.7454427 160.145508,18.5423177 160.145508,18.296875 C160.120117,17.4420573 159.997396,16.6570638 159.777344,15.9418945 C159.557292,15.2267253 159.227214,14.5792643 158.787109,13.9995117 C158.347005,13.4197591 157.754557,12.9648438 157.009766,12.6347656 C156.264974,12.3046875 155.397461,12.1396484 154.407227,12.1396484 C153.154622,12.1396484 152.058594,12.4443359 151.119141,13.0537109 C150.179688,13.6630859 149.477214,14.4544271 149.011719,15.4277344 C148.546224,16.4010417 148.313477,17.4759115 148.313477,18.6523437 C148.313477,19.9895833 148.590658,21.159668 149.14502,22.1625977 C149.699382,23.1655273 150.446289,23.9251302 151.385742,24.4414063 C152.325195,24.9576823 153.38737,25.2200521 154.572266,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M162.217773,23.8574219 C162.725586,24.2298177 163.404785,24.5535482 164.255371,24.8286133 C165.105957,25.1036784 165.984049,25.2412109 166.889648,25.2412109 C167.862956,25.2412109 168.717773,25.1079102 169.454102,24.8413086 C170.19043,24.574707 170.784993,24.1451823 171.237793,23.5527344 C171.690592,22.9602865 171.916992,22.2324219 171.916992,21.3691406 C171.916992,20.4550781 171.644043,19.7039388 171.098145,19.1157227 C170.552246,18.5275065 169.640299,18.0768229 168.362305,17.7636719 L166.166016,17.2177734 C165.24349,16.9892578 164.642578,16.7713216 164.363281,16.5639648 C164.083984,16.3566081 163.944336,15.9990234 163.944336,15.4912109 C163.944336,14.8649089 164.215169,14.4057617 164.756836,14.1137695 C165.298503,13.8217773 166.026367,13.6757813 166.94043,13.6757813 C167.22819,13.6757813 167.511719,13.6948242 167.791016,13.7329102 C168.070312,13.7709961 168.345378,13.8260091 168.616211,13.8979492 C168.887044,13.9698893 169.100749,14.03125 169.257324,14.0820313 C169.4139,14.1328125 169.617025,14.2068685 169.866699,14.3041992 C170.116374,14.4015299 170.253906,14.4544271 170.279297,14.4628906 C170.414714,14.5136719 170.545898,14.5390625 170.672852,14.5390625 C170.892904,14.5390625 171.068522,14.4692383 171.199707,14.3295898 C171.330892,14.1899414 171.396484,14.0227865 171.396484,13.828125 C171.396484,13.4980469 171.231445,13.2483724 170.901367,13.0791016 C170.444336,12.8336589 169.854004,12.6114909 169.130371,12.4125977 C168.406738,12.2137044 167.63444,12.1142578 166.813477,12.1142578 C166.12793,12.1142578 165.493164,12.1798503 164.90918,12.3110352 C164.325195,12.4422201 163.79834,12.6411133 163.328613,12.9077148 C162.858887,13.1743164 162.488607,13.5361328 162.217773,13.9931641 C161.94694,14.4501953 161.811523,14.9791667 161.811523,15.5800781 C161.811523,15.9609375 161.849609,16.2994792 161.925781,16.5957031 C162.001953,16.8919271 162.124674,17.1500651 162.293945,17.3701172 C162.463216,17.5901693 162.645182,17.7784831 162.839844,17.9350586 C163.034505,18.0916341 163.294759,18.2376302 163.620605,18.3730469 C163.946452,18.5084635 164.251139,18.6184896 164.534668,18.703125 C164.818197,18.7877604 165.188477,18.8893229 165.645508,19.0078125 L167.892578,19.5664062 C168.63737,19.7526042 169.170573,19.9980469 169.492188,20.3027344 C169.813802,20.6074219 169.974609,21.0136719 169.974609,21.5214844 C169.974609,22.2324219 169.688965,22.7719727 169.117676,23.1401367 C168.546387,23.5083008 167.795247,23.6923828 166.864258,23.6923828 C165.569336,23.6839193 164.342122,23.319987 163.182617,22.6005859 C162.996419,22.4820964 162.801758,22.4228516 162.598633,22.4228516 C162.378581,22.4228516 162.196615,22.4969076 162.052734,22.6450195 C161.908854,22.7931315 161.836914,22.96875 161.836914,23.171875 C161.836914,23.4511719 161.963867,23.6796875 162.217773,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-12" transform="translate(0, 212)" xlink:href="#path-27" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M4.81152344,15.2373047 L4.81152344,8.88964844 L8.81054688,8.88964844 C10.2070312,8.88964844 11.2184245,9.1266276 11.8447266,9.60058594 C12.4710286,10.0745443 12.7841797,10.8828125 12.7841797,12.0253906 C12.7841797,13.1933594 12.4519857,14.0206706 11.7875977,14.5073242 C11.1232096,14.9939779 10.0843099,15.2373047 8.67089844,15.2373047 L4.81152344,15.2373047 Z M3.52929688,25 C3.88476562,25 4.18733724,24.8942057 4.43701172,24.6826172 C4.6866862,24.4710286 4.81152344,24.1705729 4.81152344,23.78125 L4.81152344,17.1923828 L8.68359375,17.1923828 C9.69075521,17.1923828 10.5836589,17.1035156 11.3623047,16.9257813 C12.1409505,16.7480469 12.8328451,16.4666341 13.4379883,16.081543 C14.0431315,15.6964518 14.5065104,15.1632487 14.828125,14.4819336 C15.1497396,13.8006185 15.3105469,12.9817708 15.3105469,12.0253906 C15.3105469,10.1888021 14.7646484,8.86848958 13.6728516,8.06445313 C12.5810547,7.26041667 10.9941406,6.85839844 8.91210938,6.85839844 L3.54199219,6.85839844 C3.1780599,6.85839844 2.87125651,6.98111979 2.62158203,7.2265625 C2.37190755,7.47200521 2.24707031,7.77246094 2.24707031,8.12792969 L2.24707031,23.78125 C2.24707031,24.1621094 2.37402344,24.4604492 2.62792969,24.6762695 C2.88183594,24.8920898 3.18229167,25 3.52929688,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M17.890625,25 C18.203776,25 18.4767253,24.8963216 18.7094727,24.6889648 C18.9422201,24.4816081 19.0585938,24.2001953 19.0585938,23.8447266 L19.0585938,8.01367188 C19.0585938,7.65820312 18.9464518,7.37890625 18.722168,7.17578125 C18.4978841,6.97265625 18.2333984,6.87109375 17.9287109,6.87109375 C17.6155599,6.87109375 17.3426107,6.97265625 17.1098633,7.17578125 C16.8771159,7.37890625 16.7607422,7.65820312 16.7607422,8.01367188 L16.7607422,23.8447266 C16.7607422,24.2171224 16.8707682,24.5027669 17.0908203,24.7016602 C17.3108724,24.9005534 17.577474,25 17.890625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M26.3613281,25.2285156 C27.3346354,25.2285156 28.1894531,25.0211589 28.9257812,24.6064453 C29.6621094,24.1917318 30.1910807,23.5992839 30.5126953,22.8291016 L30.5126953,23.9589844 C30.5126953,24.2890625 30.6227214,24.5450846 30.8427734,24.7270508 C31.0628255,24.9090169 31.3294271,25 31.6425781,25 C31.9557292,25 32.2265625,24.9047852 32.4550781,24.7143555 C32.6835938,24.5239258 32.7978516,24.2594401 32.7978516,23.9208984 L32.7978516,13.4726562 C32.7978516,13.125651 32.6878255,12.8569336 32.4677734,12.6665039 C32.2477214,12.4760742 31.9811198,12.3808594 31.6679688,12.3808594 C31.3548177,12.3808594 31.0839844,12.4781901 30.8554688,12.6728516 C30.6269531,12.867513 30.5126953,13.1341146 30.5126953,13.4726562 L30.5126953,19.7822266 C30.4534505,20.9840495 30.0852865,21.9298503 29.4082031,22.6196289 C28.7311198,23.3094076 27.9016927,23.6542969 26.9199219,23.6542969 C26.0058594,23.6542969 25.3033854,23.3728841 24.8125,22.8100586 C24.3216146,22.2472331 24.0761719,21.4241536 24.0761719,20.3408203 L24.0761719,13.4726562 C24.0761719,13.125651 23.9640299,12.8569336 23.7397461,12.6665039 C23.5154622,12.4760742 23.2467448,12.3808594 22.9335937,12.3808594 C22.6204427,12.3808594 22.3496094,12.4760742 22.1210938,12.6665039 C21.8925781,12.8569336 21.7783203,13.125651 21.7783203,13.4726562 L21.7783203,20.3154297 C21.7783203,21.9573568 22.1951497,23.1866862 23.0288086,24.003418 C23.8624674,24.8201497 24.9733073,25.2285156 26.3613281,25.2285156 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M40.9511719,23.4765625 C39.8339844,23.4765625 38.9114583,23.0703125 38.1835938,22.2578125 C37.4557292,21.4453125 37.0917969,20.2519531 37.0917969,18.6777344 C37.0917969,17.7382812 37.2229818,16.9046224 37.4853516,16.1767578 C37.7477214,15.4488932 38.1751302,14.8543294 38.7675781,14.3930664 C39.360026,13.9318034 40.0878906,13.7011719 40.9511719,13.7011719 C41.4928385,13.7011719 41.9794922,13.7921549 42.4111328,13.9741211 C42.8427734,14.1560872 43.1961263,14.4015299 43.4711914,14.7104492 C43.7462565,15.0193685 43.9747721,15.3875326 44.1567383,15.8149414 C44.3387044,16.2423503 44.4677734,16.6866862 44.5439453,17.1479492 C44.6201172,17.6092122 44.6582031,18.1022135 44.6582031,18.6269531 C44.6582031,20.1588542 44.3111979,21.3500977 43.6171875,22.2006836 C42.9231771,23.0512695 42.0345052,23.4765625 40.9511719,23.4765625 Z M40.8496094,30.3701172 C42.7200521,30.3701172 44.1948242,29.9130859 45.2739258,28.9990234 C46.3530273,28.0849609 46.8925781,26.7265625 46.8925781,24.9238281 L46.8925781,13.4599609 C46.8925781,13.1214193 46.7867839,12.8569336 46.5751953,12.6665039 C46.3636068,12.4760742 46.1097005,12.3808594 45.8134766,12.3808594 C45.5426432,12.3808594 45.3035482,12.4633789 45.0961914,12.628418 C44.8888346,12.793457 44.7724609,13.0198568 44.7470703,13.3076172 L44.7470703,14.5898437 C43.9091797,12.9563802 42.5042318,12.1396484 40.5322266,12.1396484 C39.3388672,12.1396484 38.3020833,12.4401042 37.421875,13.0410156 C36.5416667,13.6419271 35.8836263,14.4353841 35.4477539,15.4213867 C35.0118815,16.4073893 34.7939453,17.5182292 34.7939453,18.7539063 C34.7939453,19.625651 34.9230143,20.438151 35.1811523,21.1914062 C35.4392904,21.9446615 35.8074544,22.6048177 36.2856445,23.171875 C36.7638346,23.7389323 37.3732096,24.1853841 38.1137695,24.5112305 C38.8543294,24.8370768 39.6816406,25 40.5957031,25 C41.5690104,25 42.4047852,24.7736003 43.1030273,24.3208008 C43.8012695,23.8680013 44.3238932,23.2565104 44.6708984,22.4863281 L44.6708984,25 C44.6708984,26.21875 44.3365885,27.1518555 43.6679688,27.7993164 C42.999349,28.4467773 42.0091146,28.7705078 40.6972656,28.7705078 C40.4941406,28.7705078 40.2931315,28.7620443 40.0942383,28.7451172 C39.8953451,28.7281901 39.7176107,28.7091471 39.5610352,28.6879883 C39.4044596,28.6668294 39.230957,28.6350911 39.0405273,28.5927734 C38.8500977,28.5504557 38.6977539,28.5166016 38.5834961,28.4912109 C38.4692383,28.4658203 38.3211263,28.4235026 38.1391602,28.3642578 C37.957194,28.305013 37.8323568,28.2626953 37.7646484,28.2373047 C37.6969401,28.2119141 37.5763346,28.1653646 37.402832,28.0976563 C37.2293294,28.0299479 37.125651,27.991862 37.0917969,27.9833984 C36.9986979,27.9495443 36.9013672,27.9326172 36.7998047,27.9326172 C36.5882161,27.9326172 36.4083659,28.0109049 36.2602539,28.1674805 C36.1121419,28.324056 36.0380859,28.508138 36.0380859,28.7197266 C36.0380859,29.0328776 36.1988932,29.2783203 36.5205078,29.4560547 C37.6630859,30.0654297 39.1061198,30.3701172 40.8496094,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M50.8945312,9.44824219 C51.343099,9.44824219 51.7027995,9.31917318 51.9736328,9.06103516 C52.2444661,8.80289714 52.3798828,8.46647135 52.3798828,8.05175781 C52.3798828,7.62858073 52.2444661,7.28792318 51.9736328,7.02978516 C51.7027995,6.77164714 51.3473307,6.64257812 50.9072266,6.64257812 C50.4586589,6.64257812 50.0968424,6.77376302 49.8217773,7.03613281 C49.5467122,7.2985026 49.4091797,7.63704427 49.4091797,8.05175781 C49.4091797,8.46647135 49.5445964,8.80289714 49.8154297,9.06103516 C50.086263,9.31917318 50.4459635,9.44824219 50.8945312,9.44824219 Z M50.8818359,25 C51.194987,25 51.4679362,24.8963216 51.7006836,24.6889648 C51.933431,24.4816081 52.0498047,24.2001953 52.0498047,23.8447266 L52.0498047,13.5234375 C52.0498047,13.1679688 51.9376628,12.8886719 51.7133789,12.6855469 C51.4890951,12.4824219 51.2246094,12.3808594 50.9199219,12.3808594 C50.6067708,12.3808594 50.3338216,12.4824219 50.1010742,12.6855469 C49.8683268,12.8886719 49.7519531,13.1679688 49.7519531,13.5234375 L49.7519531,23.8447266 C49.7519531,24.2171224 49.8619792,24.5027669 50.0820312,24.7016602 C50.3020833,24.9005534 50.5686849,25 50.8818359,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.9755859,25 C56.288737,25 56.5616862,24.9026693 56.7944336,24.7080078 C57.027181,24.5133464 57.1435547,24.2467448 57.1435547,23.9082031 L57.1435547,17.5732422 C57.2027995,16.3798828 57.5667318,15.4361979 58.2353516,14.7421875 C58.9039714,14.0481771 59.7249349,13.7011719 60.6982422,13.7011719 C61.6123047,13.7011719 62.3211263,13.9847005 62.824707,14.5517578 C63.3282878,15.1188151 63.5800781,15.9440104 63.5800781,17.0273438 L63.5800781,23.9082031 C63.5800781,24.2552083 63.6922201,24.5239258 63.9165039,24.7143555 C64.1407878,24.9047852 64.4095052,25 64.7226562,25 C65.0358073,25 65.3045247,24.9047852 65.5288086,24.7143555 C65.7530924,24.5239258 65.8652344,24.2552083 65.8652344,23.9082031 L65.8652344,17.0527344 C65.8652344,15.4023437 65.4505208,14.1708984 64.6210938,13.3583984 C63.7916667,12.5458984 62.6787109,12.1396484 61.2822266,12.1396484 C60.3089193,12.1396484 59.4562174,12.3470052 58.7241211,12.7617188 C57.9920247,13.1764323 57.4651693,13.7688802 57.1435547,14.5390625 L57.1435547,13.4345703 C57.1435547,13.1044922 57.0314128,12.8484701 56.8071289,12.6665039 C56.5828451,12.4845378 56.3141276,12.3935547 56.0009766,12.3935547 C55.6878255,12.3935547 55.4169922,12.4887695 55.1884766,12.6791992 C54.9599609,12.8696289 54.8457031,13.1341146 54.8457031,13.4726563 L54.8457031,23.9082031 C54.8457031,24.2552083 54.9557292,24.5239258 55.1757812,24.7143555 C55.3958333,24.9047852 55.6624349,25 55.9755859,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M68.4960938,23.8574219 C69.0039062,24.2298177 69.6831055,24.5535482 70.5336914,24.8286133 C71.3842773,25.1036784 72.2623698,25.2412109 73.1679688,25.2412109 C74.141276,25.2412109 74.9960938,25.1079102 75.7324219,24.8413086 C76.46875,24.574707 77.0633138,24.1451823 77.5161133,23.5527344 C77.9689128,22.9602865 78.1953125,22.2324219 78.1953125,21.3691406 C78.1953125,20.4550781 77.9223633,19.7039388 77.3764648,19.1157227 C76.8305664,18.5275065 75.9186198,18.0768229 74.640625,17.7636719 L72.4443359,17.2177734 C71.5218099,16.9892578 70.9208984,16.7713216 70.6416016,16.5639648 C70.3623047,16.3566081 70.2226562,15.9990234 70.2226562,15.4912109 C70.2226562,14.8649089 70.4934896,14.4057617 71.0351562,14.1137695 C71.5768229,13.8217773 72.3046875,13.6757813 73.21875,13.6757813 C73.5065104,13.6757813 73.7900391,13.6948242 74.0693359,13.7329102 C74.3486328,13.7709961 74.6236979,13.8260091 74.8945312,13.8979492 C75.1653646,13.9698893 75.379069,14.03125 75.5356445,14.0820313 C75.6922201,14.1328125 75.8953451,14.2068685 76.1450195,14.3041992 C76.394694,14.4015299 76.5322266,14.4544271 76.5576172,14.4628906 C76.6930339,14.5136719 76.8242188,14.5390625 76.9511719,14.5390625 C77.171224,14.5390625 77.3468424,14.4692383 77.4780273,14.3295898 C77.6092122,14.1899414 77.6748047,14.0227865 77.6748047,13.828125 C77.6748047,13.4980469 77.5097656,13.2483724 77.1796875,13.0791016 C76.7226562,12.8336589 76.1323242,12.6114909 75.4086914,12.4125977 C74.6850586,12.2137044 73.9127604,12.1142578 73.0917969,12.1142578 C72.40625,12.1142578 71.7714844,12.1798503 71.1875,12.3110352 C70.6035156,12.4422201 70.0766602,12.6411133 69.6069336,12.9077148 C69.137207,13.1743164 68.7669271,13.5361328 68.4960938,13.9931641 C68.2252604,14.4501953 68.0898438,14.9791667 68.0898438,15.5800781 C68.0898438,15.9609375 68.1279297,16.2994792 68.2041016,16.5957031 C68.2802734,16.8919271 68.4029948,17.1500651 68.5722656,17.3701172 C68.7415365,17.5901693 68.9235026,17.7784831 69.1181641,17.9350586 C69.3128255,18.0916341 69.5730794,18.2376302 69.8989258,18.3730469 C70.2247721,18.5084635 70.5294596,18.6184896 70.8129883,18.703125 C71.0965169,18.7877604 71.4667969,18.8893229 71.9238281,19.0078125 L74.1708984,19.5664062 C74.9156901,19.7526042 75.4488932,19.9980469 75.7705078,20.3027344 C76.0921224,20.6074219 76.2529297,21.0136719 76.2529297,21.5214844 C76.2529297,22.2324219 75.9672852,22.7719727 75.3959961,23.1401367 C74.824707,23.5083008 74.0735677,23.6923828 73.1425781,23.6923828 C71.8476562,23.6839193 70.6204427,23.319987 69.4609375,22.6005859 C69.2747396,22.4820964 69.0800781,22.4228516 68.8769531,22.4228516 C68.656901,22.4228516 68.4749349,22.4969076 68.3310547,22.6450195 C68.1871745,22.7931315 68.1152344,22.96875 68.1152344,23.171875 C68.1152344,23.4511719 68.2421875,23.6796875 68.4960938,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M94.2988281,25.2792969 C95.0690104,25.2792969 95.8032227,25.1946615 96.5014648,25.0253906 C97.199707,24.8561198 97.8535156,24.59375 98.4628906,24.2382813 C99.0722656,23.8828125 99.5991211,23.449056 100.043457,22.9370117 C100.487793,22.4249674 100.83903,21.8071289 101.097168,21.0834961 C101.355306,20.3598633 101.484375,19.5621745 101.484375,18.6904297 L101.484375,8.06445312 C101.484375,7.67513021 101.359538,7.37044271 101.109863,7.15039063 C100.860189,6.93033854 100.561849,6.8203125 100.214844,6.8203125 C99.859375,6.8203125 99.5546875,6.93033854 99.3007812,7.15039063 C99.046875,7.37044271 98.9199219,7.67513021 98.9199219,8.06445312 L98.9199219,18.6269531 C98.9199219,19.3886719 98.788737,20.069987 98.5263672,20.6708984 C98.2639974,21.2718099 97.9127604,21.7542318 97.4726562,22.1181641 C97.0325521,22.4820964 96.5416667,22.7571615 96,22.9433594 C95.4583333,23.1295573 94.891276,23.2226562 94.2988281,23.2226562 C93.7063802,23.2226562 93.1414388,23.1295573 92.6040039,22.9433594 C92.066569,22.7571615 91.5799154,22.4820964 91.144043,22.1181641 C90.7081706,21.7542318 90.3590495,21.2718099 90.0966797,20.6708984 C89.8343099,20.069987 89.703125,19.3886719 89.703125,18.6269531 L89.703125,8.06445312 C89.703125,7.67513021 89.5761719,7.37044271 89.3222656,7.15039063 C89.0683594,6.93033854 88.7679036,6.8203125 88.4208984,6.8203125 C88.0654297,6.8203125 87.7607422,6.93033854 87.5068359,7.15039063 C87.2529297,7.37044271 87.1259766,7.67513021 87.1259766,8.06445312 L87.1259766,18.6904297 C87.1259766,19.7652995 87.320638,20.7301432 87.7099609,21.5849609 C88.0992839,22.4397786 88.6282552,23.1316732 89.296875,23.6606445 C89.9654948,24.1896159 90.7250977,24.5916341 91.5756836,24.8666992 C92.4262695,25.1417643 93.3339844,25.2792969 94.2988281,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M104.635742,23.8574219 C105.143555,24.2298177 105.822754,24.5535482 106.67334,24.8286133 C107.523926,25.1036784 108.402018,25.2412109 109.307617,25.2412109 C110.280924,25.2412109 111.135742,25.1079102 111.87207,24.8413086 C112.608398,24.574707 113.202962,24.1451823 113.655762,23.5527344 C114.108561,22.9602865 114.334961,22.2324219 114.334961,21.3691406 C114.334961,20.4550781 114.062012,19.7039388 113.516113,19.1157227 C112.970215,18.5275065 112.058268,18.0768229 110.780273,17.7636719 L108.583984,17.2177734 C107.661458,16.9892578 107.060547,16.7713216 106.78125,16.5639648 C106.501953,16.3566081 106.362305,15.9990234 106.362305,15.4912109 C106.362305,14.8649089 106.633138,14.4057617 107.174805,14.1137695 C107.716471,13.8217773 108.444336,13.6757813 109.358398,13.6757813 C109.646159,13.6757813 109.929688,13.6948242 110.208984,13.7329102 C110.488281,13.7709961 110.763346,13.8260091 111.03418,13.8979492 C111.305013,13.9698893 111.518717,14.03125 111.675293,14.0820313 C111.831868,14.1328125 112.034993,14.2068685 112.284668,14.3041992 C112.534342,14.4015299 112.671875,14.4544271 112.697266,14.4628906 C112.832682,14.5136719 112.963867,14.5390625 113.09082,14.5390625 C113.310872,14.5390625 113.486491,14.4692383 113.617676,14.3295898 C113.748861,14.1899414 113.814453,14.0227865 113.814453,13.828125 C113.814453,13.4980469 113.649414,13.2483724 113.319336,13.0791016 C112.862305,12.8336589 112.271973,12.6114909 111.54834,12.4125977 C110.824707,12.2137044 110.052409,12.1142578 109.231445,12.1142578 C108.545898,12.1142578 107.911133,12.1798503 107.327148,12.3110352 C106.743164,12.4422201 106.216309,12.6411133 105.746582,12.9077148 C105.276855,13.1743164 104.906576,13.5361328 104.635742,13.9931641 C104.364909,14.4501953 104.229492,14.9791667 104.229492,15.5800781 C104.229492,15.9609375 104.267578,16.2994792 104.34375,16.5957031 C104.419922,16.8919271 104.542643,17.1500651 104.711914,17.3701172 C104.881185,17.5901693 105.063151,17.7784831 105.257812,17.9350586 C105.452474,18.0916341 105.712728,18.2376302 106.038574,18.3730469 C106.364421,18.5084635 106.669108,18.6184896 106.952637,18.703125 C107.236165,18.7877604 107.606445,18.8893229 108.063477,19.0078125 L110.310547,19.5664062 C111.055339,19.7526042 111.588542,19.9980469 111.910156,20.3027344 C112.231771,20.6074219 112.392578,21.0136719 112.392578,21.5214844 C112.392578,22.2324219 112.106934,22.7719727 111.535645,23.1401367 C110.964355,23.5083008 110.213216,23.6923828 109.282227,23.6923828 C107.987305,23.6839193 106.760091,23.319987 105.600586,22.6005859 C105.414388,22.4820964 105.219727,22.4228516 105.016602,22.4228516 C104.796549,22.4228516 104.614583,22.4969076 104.470703,22.6450195 C104.326823,22.7931315 104.254883,22.96875 104.254883,23.171875 C104.254883,23.4511719 104.381836,23.6796875 104.635742,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M120.152344,23.7050781 C119.365234,23.7050781 118.743164,23.5252279 118.286133,23.1655273 C117.829102,22.8058268 117.600586,22.2408854 117.600586,21.4707031 C117.600586,21.047526 117.661947,20.7005208 117.784668,20.4296875 C117.907389,20.1588542 118.095703,19.9282227 118.349609,19.737793 C118.603516,19.5473633 118.984375,19.4034831 119.492188,19.3061523 C120,19.2088216 120.583984,19.1411133 121.244141,19.1030273 C121.904297,19.0649414 122.759115,19.0458984 123.808594,19.0458984 L123.808594,19.4267578 C123.808594,20.6708984 123.442546,21.694987 122.710449,22.4990234 C121.978353,23.3030599 121.125651,23.7050781 120.152344,23.7050781 Z M119.911133,25.2285156 C121.730794,25.2285156 123.03418,24.4033203 123.821289,22.7529297 L123.821289,23.9716797 C123.821289,24.3017578 123.929199,24.5577799 124.14502,24.7397461 C124.36084,24.9217122 124.621094,25.0126953 124.925781,25.0126953 C125.230469,25.0126953 125.499186,24.9174805 125.731934,24.7270508 C125.964681,24.5366211 126.081055,24.2763672 126.081055,23.9462891 L126.081055,16.5449219 C126.081055,15.046875 125.643066,13.938151 124.76709,13.21875 C123.891113,12.499349 122.678711,12.1396484 121.129883,12.1396484 C119.318685,12.1396484 117.757161,12.5078125 116.445312,13.2441406 C116.208333,13.3795573 116.089844,13.5742187 116.089844,13.828125 C116.089844,14.0481771 116.170247,14.2491862 116.331055,14.4311523 C116.491862,14.6131185 116.682292,14.7041016 116.902344,14.7041016 C117.020833,14.7041016 117.122396,14.6829427 117.207031,14.640625 C117.638672,14.4459635 118.002604,14.2936198 118.298828,14.1835937 C118.595052,14.0735677 119.001302,13.9635417 119.517578,13.8535156 C120.033854,13.7434896 120.55013,13.6884766 121.066406,13.6884766 C121.929688,13.6884766 122.602539,13.9042969 123.084961,14.3359375 C123.567383,14.7675781 123.808594,15.4361979 123.808594,16.3417969 L123.808594,17.7255859 C122.869141,17.7255859 122.06722,17.7382812 121.402832,17.7636719 C120.738444,17.7890625 120.093099,17.8334961 119.466797,17.8969727 C118.840495,17.9604492 118.324219,18.0535482 117.917969,18.1762695 C117.511719,18.2989909 117.139323,18.4555664 116.800781,18.6459961 C116.46224,18.8364258 116.19987,19.0691732 116.013672,19.3442383 C115.827474,19.6193034 115.683594,19.9388021 115.582031,20.3027344 C115.480469,20.6666667 115.429688,21.0898438 115.429688,21.5722656 C115.429688,22.7402344 115.844401,23.6416016 116.673828,24.2763672 C117.503255,24.9111328 118.582357,25.2285156 119.911133,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M134.094727,23.4765625 C132.977539,23.4765625 132.055013,23.0703125 131.327148,22.2578125 C130.599284,21.4453125 130.235352,20.2519531 130.235352,18.6777344 C130.235352,17.7382812 130.366536,16.9046224 130.628906,16.1767578 C130.891276,15.4488932 131.318685,14.8543294 131.911133,14.3930664 C132.503581,13.9318034 133.231445,13.7011719 134.094727,13.7011719 C134.636393,13.7011719 135.123047,13.7921549 135.554688,13.9741211 C135.986328,14.1560872 136.339681,14.4015299 136.614746,14.7104492 C136.889811,15.0193685 137.118327,15.3875326 137.300293,15.8149414 C137.482259,16.2423503 137.611328,16.6866862 137.6875,17.1479492 C137.763672,17.6092122 137.801758,18.1022135 137.801758,18.6269531 C137.801758,20.1588542 137.454753,21.3500977 136.760742,22.2006836 C136.066732,23.0512695 135.17806,23.4765625 134.094727,23.4765625 Z M133.993164,30.3701172 C135.863607,30.3701172 137.338379,29.9130859 138.41748,28.9990234 C139.496582,28.0849609 140.036133,26.7265625 140.036133,24.9238281 L140.036133,13.4599609 C140.036133,13.1214193 139.930339,12.8569336 139.71875,12.6665039 C139.507161,12.4760742 139.253255,12.3808594 138.957031,12.3808594 C138.686198,12.3808594 138.447103,12.4633789 138.239746,12.628418 C138.032389,12.793457 137.916016,13.0198568 137.890625,13.3076172 L137.890625,14.5898437 C137.052734,12.9563802 135.647786,12.1396484 133.675781,12.1396484 C132.482422,12.1396484 131.445638,12.4401042 130.56543,13.0410156 C129.685221,13.6419271 129.027181,14.4353841 128.591309,15.4213867 C128.155436,16.4073893 127.9375,17.5182292 127.9375,18.7539063 C127.9375,19.625651 128.066569,20.438151 128.324707,21.1914062 C128.582845,21.9446615 128.951009,22.6048177 129.429199,23.171875 C129.907389,23.7389323 130.516764,24.1853841 131.257324,24.5112305 C131.997884,24.8370768 132.825195,25 133.739258,25 C134.712565,25 135.54834,24.7736003 136.246582,24.3208008 C136.944824,23.8680013 137.467448,23.2565104 137.814453,22.4863281 L137.814453,25 C137.814453,26.21875 137.480143,27.1518555 136.811523,27.7993164 C136.142904,28.4467773 135.152669,28.7705078 133.84082,28.7705078 C133.637695,28.7705078 133.436686,28.7620443 133.237793,28.7451172 C133.0389,28.7281901 132.861165,28.7091471 132.70459,28.6879883 C132.548014,28.6668294 132.374512,28.6350911 132.184082,28.5927734 C131.993652,28.5504557 131.841309,28.5166016 131.727051,28.4912109 C131.612793,28.4658203 131.464681,28.4235026 131.282715,28.3642578 C131.100749,28.305013 130.975911,28.2626953 130.908203,28.2373047 C130.840495,28.2119141 130.719889,28.1653646 130.546387,28.0976563 C130.372884,28.0299479 130.269206,27.991862 130.235352,27.9833984 C130.142253,27.9495443 130.044922,27.9326172 129.943359,27.9326172 C129.731771,27.9326172 129.551921,28.0109049 129.403809,28.1674805 C129.255697,28.324056 129.181641,28.508138 129.181641,28.7197266 C129.181641,29.0328776 129.342448,29.2783203 129.664062,29.4560547 C130.806641,30.0654297 132.249674,30.3701172 133.993164,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M144.317383,17.7382812 C144.359701,17.2135417 144.471842,16.7163086 144.653809,16.246582 C144.835775,15.7768555 145.081217,15.3494466 145.390137,14.9643555 C145.699056,14.5792643 146.094727,14.2724609 146.577148,14.0439453 C147.05957,13.8154297 147.597005,13.7011719 148.189453,13.7011719 C149.332031,13.7011719 150.199544,14.0777995 150.791992,14.8310547 C151.38444,15.5843099 151.71875,16.5533854 151.794922,17.7382812 L144.317383,17.7382812 Z M148.227539,25.2285156 C150.123372,25.2285156 151.739909,24.6276042 153.077148,23.4257812 C153.314128,23.2141927 153.432617,22.9729818 153.432617,22.7021484 C153.432617,22.4905599 153.362793,22.3064779 153.223145,22.1499023 C153.083496,21.9933268 152.912109,21.9150391 152.708984,21.9150391 C152.539714,21.9150391 152.378906,21.9742839 152.226562,22.0927734 C151.617188,22.5582682 151.012044,22.9264323 150.411133,23.1972656 C149.810221,23.468099 149.133138,23.6035156 148.379883,23.6035156 C147.186523,23.5865885 146.208984,23.2036133 145.447266,22.4545898 C144.685547,21.7055664 144.296224,20.6031901 144.279297,19.1474609 L152.988281,19.1474609 C153.259115,19.1474609 153.46224,19.0670573 153.597656,18.90625 C153.733073,18.7454427 153.800781,18.5423177 153.800781,18.296875 C153.775391,17.4420573 153.652669,16.6570638 153.432617,15.9418945 C153.212565,15.2267253 152.882487,14.5792643 152.442383,13.9995117 C152.002279,13.4197591 151.409831,12.9648438 150.665039,12.6347656 C149.920247,12.3046875 149.052734,12.1396484 148.0625,12.1396484 C146.809896,12.1396484 145.713867,12.4443359 144.774414,13.0537109 C143.834961,13.6630859 143.132487,14.4544271 142.666992,15.4277344 C142.201497,16.4010417 141.96875,17.4759115 141.96875,18.6523437 C141.96875,19.9895833 142.245931,21.159668 142.800293,22.1625977 C143.354655,23.1655273 144.101562,23.9251302 145.041016,24.4414063 C145.980469,24.9576823 147.042643,25.2200521 148.227539,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M155.873047,23.8574219 C156.380859,24.2298177 157.060059,24.5535482 157.910645,24.8286133 C158.76123,25.1036784 159.639323,25.2412109 160.544922,25.2412109 C161.518229,25.2412109 162.373047,25.1079102 163.109375,24.8413086 C163.845703,24.574707 164.440267,24.1451823 164.893066,23.5527344 C165.345866,22.9602865 165.572266,22.2324219 165.572266,21.3691406 C165.572266,20.4550781 165.299316,19.7039388 164.753418,19.1157227 C164.20752,18.5275065 163.295573,18.0768229 162.017578,17.7636719 L159.821289,17.2177734 C158.898763,16.9892578 158.297852,16.7713216 158.018555,16.5639648 C157.739258,16.3566081 157.599609,15.9990234 157.599609,15.4912109 C157.599609,14.8649089 157.870443,14.4057617 158.412109,14.1137695 C158.953776,13.8217773 159.681641,13.6757813 160.595703,13.6757813 C160.883464,13.6757813 161.166992,13.6948242 161.446289,13.7329102 C161.725586,13.7709961 162.000651,13.8260091 162.271484,13.8979492 C162.542318,13.9698893 162.756022,14.03125 162.912598,14.0820313 C163.069173,14.1328125 163.272298,14.2068685 163.521973,14.3041992 C163.771647,14.4015299 163.90918,14.4544271 163.93457,14.4628906 C164.069987,14.5136719 164.201172,14.5390625 164.328125,14.5390625 C164.548177,14.5390625 164.723796,14.4692383 164.85498,14.3295898 C164.986165,14.1899414 165.051758,14.0227865 165.051758,13.828125 C165.051758,13.4980469 164.886719,13.2483724 164.556641,13.0791016 C164.099609,12.8336589 163.509277,12.6114909 162.785645,12.4125977 C162.062012,12.2137044 161.289714,12.1142578 160.46875,12.1142578 C159.783203,12.1142578 159.148438,12.1798503 158.564453,12.3110352 C157.980469,12.4422201 157.453613,12.6411133 156.983887,12.9077148 C156.51416,13.1743164 156.14388,13.5361328 155.873047,13.9931641 C155.602214,14.4501953 155.466797,14.9791667 155.466797,15.5800781 C155.466797,15.9609375 155.504883,16.2994792 155.581055,16.5957031 C155.657227,16.8919271 155.779948,17.1500651 155.949219,17.3701172 C156.11849,17.5901693 156.300456,17.7784831 156.495117,17.9350586 C156.689779,18.0916341 156.950033,18.2376302 157.275879,18.3730469 C157.601725,18.5084635 157.906413,18.6184896 158.189941,18.703125 C158.47347,18.7877604 158.84375,18.8893229 159.300781,19.0078125 L161.547852,19.5664062 C162.292643,19.7526042 162.825846,19.9980469 163.147461,20.3027344 C163.469076,20.6074219 163.629883,21.0136719 163.629883,21.5214844 C163.629883,22.2324219 163.344238,22.7719727 162.772949,23.1401367 C162.20166,23.5083008 161.450521,23.6923828 160.519531,23.6923828 C159.224609,23.6839193 157.997396,23.319987 156.837891,22.6005859 C156.651693,22.4820964 156.457031,22.4228516 156.253906,22.4228516 C156.033854,22.4228516 155.851888,22.4969076 155.708008,22.6450195 C155.564128,22.7931315 155.492188,22.96875 155.492188,23.171875 C155.492188,23.4511719 155.619141,23.6796875 155.873047,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-11" transform="translate(0, 252)" xlink:href="#path-28" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M8.07421875,25.2792969 C10.1731771,25.2792969 11.8341471,24.8180339 13.0571289,23.8955078 C14.2801107,22.9729818 14.8916016,21.6822917 14.8916016,20.0234375 C14.8916016,19.3378906 14.7879232,18.7327474 14.5805664,18.2080078 C14.3732096,17.6832682 14.0600586,17.2325846 13.6411133,16.855957 C13.222168,16.4793294 12.7566732,16.1598307 12.2446289,15.8974609 C11.7325846,15.6350911 11.1083984,15.3854167 10.3720703,15.1484375 L6.71582031,13.9677734 C5.85253906,13.680013 5.22412109,13.3393555 4.83056641,12.9458008 C4.43701172,12.5522461 4.24023438,12.0253906 4.24023438,11.3652344 C4.24023438,10.485026 4.62109375,9.81217448 5.3828125,9.34667969 C6.14453125,8.8811849 7.16015625,8.6484375 8.4296875,8.6484375 C9.31835938,8.6484375 10.1541341,8.79020182 10.9370117,9.07373047 C11.7198893,9.35725911 12.3694661,9.66829427 12.8857422,10.0068359 C13.0634766,10.1253255 13.2496745,10.1845703 13.4443359,10.1845703 C13.7151693,10.1845703 13.9479167,10.078776 14.1425781,9.8671875 C14.3372396,9.65559896 14.4345703,9.41861979 14.4345703,9.15625 C14.4345703,8.90234375 14.3372396,8.69075521 14.1425781,8.52148437 C13.5670573,8.00520833 12.7355143,7.55452474 11.6479492,7.16943359 C10.5603841,6.78434245 9.41992188,6.59179688 8.2265625,6.59179688 C7.0078125,6.59179688 5.91389974,6.78222656 4.94482422,7.16308594 C3.9757487,7.54394531 3.20556641,8.10677083 2.63427734,8.8515625 C2.06298828,9.59635417 1.77734375,10.4596354 1.77734375,11.4414062 C1.77734375,12.6601562 2.10742188,13.6165365 2.76757813,14.3105469 C3.42773437,15.0045573 4.43066406,15.5800781 5.77636719,16.0371094 L9.41992188,17.2558594 C9.96158854,17.4420573 10.4016927,17.6155599 10.7402344,17.7763672 C11.078776,17.9371745 11.3813477,18.1360677 11.6479492,18.3730469 C11.9145508,18.610026 12.1049805,18.887207 12.2192383,19.2045898 C12.3334961,19.5219727 12.390625,19.9049479 12.390625,20.3535156 C12.390625,21.2845052 11.9991862,21.9954427 11.2163086,22.4863281 C10.433431,22.9772135 9.42415365,23.2226562 8.18847656,23.2226562 C6.29264323,23.2226562 4.62532552,22.6640625 3.18652344,21.546875 C3.09342448,21.4791667 2.98763021,21.4453125 2.86914063,21.4453125 C2.60677083,21.4453125 2.34651693,21.5891927 2.08837891,21.8769531 C1.83024089,22.1647135 1.70117188,22.4440104 1.70117188,22.7148438 C1.70117188,22.875651 1.75195312,23.0026042 1.85351563,23.0957031 C3.41927083,24.5514323 5.49283854,25.2792969 8.07421875,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M22.3974609,25.2412109 C24.0563151,25.2412109 25.3427734,24.8730469 26.2568359,24.1367188 C26.5445964,23.8997396 26.6884766,23.6416016 26.6884766,23.3623047 C26.6884766,23.1676432 26.6207682,23.0047201 26.4853516,22.8735352 C26.3499349,22.7423503 26.1848958,22.6767578 25.9902344,22.6767578 C25.8125,22.6767578 25.6432292,22.7317708 25.4824219,22.8417969 C24.703776,23.3834635 23.7347005,23.6542969 22.5751953,23.6542969 C21.9150391,23.6542969 21.3310547,23.5167643 20.8232422,23.2416992 C20.3154297,22.9666341 19.9091797,22.5942383 19.6044922,22.1245117 C19.2998047,21.6547852 19.0712891,21.1321615 18.9189453,20.5566406 C18.7666016,19.9811198 18.6904297,19.3717448 18.6904297,18.7285156 C18.6904297,17.188151 19.0670573,15.9630534 19.8203125,15.0532227 C20.5735677,14.1433919 21.5553385,13.6884766 22.765625,13.6884766 C23.6966146,13.6884766 24.5472005,13.938151 25.3173828,14.4375 C25.4866536,14.547526 25.664388,14.6025391 25.8505859,14.6025391 C26.0537109,14.6025391 26.2250977,14.5390625 26.3647461,14.4121094 C26.5043945,14.2851563 26.5742188,14.1285807 26.5742188,13.9423828 C26.5742188,13.680013 26.4303385,13.4345703 26.1425781,13.2060547 C25.7786458,12.9013672 25.2877604,12.6474609 24.6699219,12.4443359 C24.0520833,12.2412109 23.3792318,12.1396484 22.6513672,12.1396484 C21.4072266,12.1396484 20.3069661,12.4316406 19.3505859,13.015625 C18.3942057,13.5996094 17.6621094,14.3867188 17.1542969,15.3769531 C16.6464844,16.3671875 16.3925781,17.4716797 16.3925781,18.6904297 C16.3925781,20.5777995 16.9342448,22.1414388 18.0175781,23.3813477 C19.1009115,24.6212565 20.5608724,25.2412109 22.3974609,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M33.9277344,25.2285156 C35.8828125,25.2285156 37.4316406,24.6149089 38.5742188,23.3876953 C39.7167969,22.1604818 40.2880859,20.5947266 40.2880859,18.6904297 C40.2880859,16.7692057 39.714681,15.1971029 38.5678711,13.9741211 C37.4210612,12.7511393 35.874349,12.1396484 33.9277344,12.1396484 C31.9811198,12.1396484 30.4344076,12.7532552 29.2875977,13.9804688 C28.1407878,15.2076823 27.5673828,16.7776693 27.5673828,18.6904297 C27.5673828,20.5947266 28.1407878,22.1604818 29.2875977,23.3876953 C30.4344076,24.6149089 31.9811198,25.2285156 33.9277344,25.2285156 Z M33.9023438,23.6542969 C32.6328125,23.6542969 31.6404622,23.2078451 30.925293,22.3149414 C30.2101237,21.4220378 29.8525391,20.2138672 29.8525391,18.6904297 C29.8525391,17.1500651 30.2122396,15.933431 30.9316406,15.0405273 C31.6510417,14.1476237 32.6497396,13.7011719 33.9277344,13.7011719 C35.1972656,13.7011719 36.1938477,14.1497396 36.9174805,15.046875 C37.6411133,15.9440104 38.0029297,17.1585286 38.0029297,18.6904297 C38.0029297,20.2307943 37.6453451,21.4431966 36.9301758,22.3276367 C36.2150065,23.2120768 35.2057292,23.6542969 33.9023438,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M43.2871094,25 C43.6002604,25 43.8732096,24.906901 44.105957,24.7207031 C44.3387044,24.5345052 44.4550781,24.2721354 44.4550781,23.9335938 L44.4550781,17.3955078 C44.4550781,16.4052734 44.7703451,15.6139323 45.4008789,15.0214844 C46.0314128,14.4290365 46.9179688,14.1328125 48.0605469,14.1328125 C48.3313802,14.1328125 48.538737,14.0418294 48.6826172,13.8598633 C48.8264974,13.6778971 48.8984375,13.4599609 48.8984375,13.2060547 C48.8984375,12.9352214 48.8201497,12.6982422 48.6635742,12.4951172 C48.5069987,12.2919922 48.2932943,12.1904297 48.0224609,12.1904297 C47.1253255,12.1904297 46.3572591,12.4549154 45.7182617,12.9838867 C45.0792643,13.5128581 44.6539714,14.1708984 44.4423828,14.9580078 L44.4550781,13.4345703 C44.4550781,13.1129557 44.3429362,12.8611654 44.1186523,12.6791992 C43.8943685,12.4972331 43.625651,12.40625 43.3125,12.40625 C42.999349,12.40625 42.7285156,12.4972331 42.5,12.6791992 C42.2714844,12.8611654 42.1572266,13.1214193 42.1572266,13.4599609 L42.1572266,23.9335938 C42.1572266,24.2721354 42.2672526,24.5345052 42.4873047,24.7207031 C42.7073568,24.906901 42.9739583,25 43.2871094,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M51.65625,9.44824219 C52.1048177,9.44824219 52.4645182,9.31917318 52.7353516,9.06103516 C53.0061849,8.80289714 53.1416016,8.46647135 53.1416016,8.05175781 C53.1416016,7.62858073 53.0061849,7.28792318 52.7353516,7.02978516 C52.4645182,6.77164714 52.1090495,6.64257812 51.6689453,6.64257812 C51.2203776,6.64257812 50.8585612,6.77376302 50.5834961,7.03613281 C50.308431,7.2985026 50.1708984,7.63704427 50.1708984,8.05175781 C50.1708984,8.46647135 50.3063151,8.80289714 50.5771484,9.06103516 C50.8479818,9.31917318 51.2076823,9.44824219 51.65625,9.44824219 Z M51.6435547,25 C51.9567057,25 52.2296549,24.8963216 52.4624023,24.6889648 C52.6951497,24.4816081 52.8115234,24.2001953 52.8115234,23.8447266 L52.8115234,13.5234375 C52.8115234,13.1679688 52.6993815,12.8886719 52.4750977,12.6855469 C52.2508138,12.4824219 51.9863281,12.3808594 51.6816406,12.3808594 C51.3684896,12.3808594 51.0955404,12.4824219 50.862793,12.6855469 C50.6300456,12.8886719 50.5136719,13.1679688 50.5136719,13.5234375 L50.5136719,23.8447266 C50.5136719,24.2171224 50.6236979,24.5027669 50.84375,24.7016602 C51.0638021,24.9005534 51.3304036,25 51.6435547,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M56.7373047,25 C57.0504557,25 57.3234049,24.9026693 57.5561523,24.7080078 C57.7888997,24.5133464 57.9052734,24.2467448 57.9052734,23.9082031 L57.9052734,17.5732422 C57.9645182,16.3798828 58.3284505,15.4361979 58.9970703,14.7421875 C59.6656901,14.0481771 60.4866536,13.7011719 61.4599609,13.7011719 C62.3740234,13.7011719 63.0828451,13.9847005 63.5864258,14.5517578 C64.0900065,15.1188151 64.3417969,15.9440104 64.3417969,17.0273438 L64.3417969,23.9082031 C64.3417969,24.2552083 64.4539388,24.5239258 64.6782227,24.7143555 C64.9025065,24.9047852 65.171224,25 65.484375,25 C65.797526,25 66.0662435,24.9047852 66.2905273,24.7143555 C66.5148112,24.5239258 66.6269531,24.2552083 66.6269531,23.9082031 L66.6269531,17.0527344 C66.6269531,15.4023437 66.2122396,14.1708984 65.3828125,13.3583984 C64.5533854,12.5458984 63.4404297,12.1396484 62.0439453,12.1396484 C61.070638,12.1396484 60.2179362,12.3470052 59.4858398,12.7617188 C58.7537435,13.1764323 58.226888,13.7688802 57.9052734,14.5390625 L57.9052734,13.4345703 C57.9052734,13.1044922 57.7931315,12.8484701 57.5688477,12.6665039 C57.3445638,12.4845378 57.0758464,12.3935547 56.7626953,12.3935547 C56.4495443,12.3935547 56.1787109,12.4887695 55.9501953,12.6791992 C55.7216797,12.8696289 55.6074219,13.1341146 55.6074219,13.4726563 L55.6074219,23.9082031 C55.6074219,24.2552083 55.7174479,24.5239258 55.9375,24.7143555 C56.1575521,24.9047852 56.4241536,25 56.7373047,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M74.640625,23.4765625 C73.5234375,23.4765625 72.6009115,23.0703125 71.8730469,22.2578125 C71.1451823,21.4453125 70.78125,20.2519531 70.78125,18.6777344 C70.78125,17.7382812 70.9124349,16.9046224 71.1748047,16.1767578 C71.4371745,15.4488932 71.8645833,14.8543294 72.4570312,14.3930664 C73.0494792,13.9318034 73.7773438,13.7011719 74.640625,13.7011719 C75.1822917,13.7011719 75.6689453,13.7921549 76.1005859,13.9741211 C76.5322266,14.1560872 76.8855794,14.4015299 77.1606445,14.7104492 C77.4357096,15.0193685 77.6642253,15.3875326 77.8461914,15.8149414 C78.0281576,16.2423503 78.1572266,16.6866862 78.2333984,17.1479492 C78.3095703,17.6092122 78.3476562,18.1022135 78.3476562,18.6269531 C78.3476562,20.1588542 78.000651,21.3500977 77.3066406,22.2006836 C76.6126302,23.0512695 75.7239583,23.4765625 74.640625,23.4765625 Z M74.5390625,30.3701172 C76.4095052,30.3701172 77.8842773,29.9130859 78.9633789,28.9990234 C80.0424805,28.0849609 80.5820312,26.7265625 80.5820312,24.9238281 L80.5820312,13.4599609 C80.5820312,13.1214193 80.476237,12.8569336 80.2646484,12.6665039 C80.0530599,12.4760742 79.7991536,12.3808594 79.5029297,12.3808594 C79.2320964,12.3808594 78.9930013,12.4633789 78.7856445,12.628418 C78.5782878,12.793457 78.4619141,13.0198568 78.4365234,13.3076172 L78.4365234,14.5898437 C77.5986328,12.9563802 76.1936849,12.1396484 74.2216797,12.1396484 C73.0283203,12.1396484 71.9915365,12.4401042 71.1113281,13.0410156 C70.2311198,13.6419271 69.5730794,14.4353841 69.137207,15.4213867 C68.7013346,16.4073893 68.4833984,17.5182292 68.4833984,18.7539063 C68.4833984,19.625651 68.6124674,20.438151 68.8706055,21.1914062 C69.1287435,21.9446615 69.4969076,22.6048177 69.9750977,23.171875 C70.4532878,23.7389323 71.0626628,24.1853841 71.8032227,24.5112305 C72.5437826,24.8370768 73.3710938,25 74.2851562,25 C75.2584635,25 76.0942383,24.7736003 76.7924805,24.3208008 C77.4907227,23.8680013 78.0133464,23.2565104 78.3603516,22.4863281 L78.3603516,25 C78.3603516,26.21875 78.0260417,27.1518555 77.3574219,27.7993164 C76.6888021,28.4467773 75.6985677,28.7705078 74.3867188,28.7705078 C74.1835938,28.7705078 73.9825846,28.7620443 73.7836914,28.7451172 C73.5847982,28.7281901 73.4070638,28.7091471 73.2504883,28.6879883 C73.0939128,28.6668294 72.9204102,28.6350911 72.7299805,28.5927734 C72.5395508,28.5504557 72.387207,28.5166016 72.2729492,28.4912109 C72.1586914,28.4658203 72.0105794,28.4235026 71.8286133,28.3642578 C71.6466471,28.305013 71.5218099,28.2626953 71.4541016,28.2373047 C71.3863932,28.2119141 71.2657878,28.1653646 71.0922852,28.0976563 C70.9187826,28.0299479 70.8151042,27.991862 70.78125,27.9833984 C70.688151,27.9495443 70.5908203,27.9326172 70.4892578,27.9326172 C70.2776693,27.9326172 70.097819,28.0109049 69.949707,28.1674805 C69.8015951,28.324056 69.7275391,28.508138 69.7275391,28.7197266 C69.7275391,29.0328776 69.8883464,29.2783203 70.2099609,29.4560547 C71.3525391,30.0654297 72.7955729,30.3701172 74.5390625,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M97.2441406,25.2792969 C98.0143229,25.2792969 98.7485352,25.1946615 99.4467773,25.0253906 C100.14502,24.8561198 100.798828,24.59375 101.408203,24.2382813 C102.017578,23.8828125 102.544434,23.449056 102.98877,22.9370117 C103.433105,22.4249674 103.784342,21.8071289 104.04248,21.0834961 C104.300618,20.3598633 104.429688,19.5621745 104.429688,18.6904297 L104.429688,8.06445312 C104.429688,7.67513021 104.30485,7.37044271 104.055176,7.15039063 C103.805501,6.93033854 103.507161,6.8203125 103.160156,6.8203125 C102.804688,6.8203125 102.5,6.93033854 102.246094,7.15039063 C101.992188,7.37044271 101.865234,7.67513021 101.865234,8.06445312 L101.865234,18.6269531 C101.865234,19.3886719 101.734049,20.069987 101.47168,20.6708984 C101.20931,21.2718099 100.858073,21.7542318 100.417969,22.1181641 C99.9778646,22.4820964 99.4869792,22.7571615 98.9453125,22.9433594 C98.4036458,23.1295573 97.8365885,23.2226562 97.2441406,23.2226562 C96.6516927,23.2226562 96.0867513,23.1295573 95.5493164,22.9433594 C95.0118815,22.7571615 94.5252279,22.4820964 94.0893555,22.1181641 C93.6534831,21.7542318 93.304362,21.2718099 93.0419922,20.6708984 C92.7796224,20.069987 92.6484375,19.3886719 92.6484375,18.6269531 L92.6484375,8.06445312 C92.6484375,7.67513021 92.5214844,7.37044271 92.2675781,7.15039063 C92.0136719,6.93033854 91.7132161,6.8203125 91.3662109,6.8203125 C91.0107422,6.8203125 90.7060547,6.93033854 90.4521484,7.15039063 C90.1982422,7.37044271 90.0712891,7.67513021 90.0712891,8.06445312 L90.0712891,18.6904297 C90.0712891,19.7652995 90.2659505,20.7301432 90.6552734,21.5849609 C91.0445964,22.4397786 91.5735677,23.1316732 92.2421875,23.6606445 C92.9108073,24.1896159 93.6704102,24.5916341 94.5209961,24.8666992 C95.371582,25.1417643 96.2792969,25.2792969 97.2441406,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M107.581055,23.8574219 C108.088867,24.2298177 108.768066,24.5535482 109.618652,24.8286133 C110.469238,25.1036784 111.347331,25.2412109 112.25293,25.2412109 C113.226237,25.2412109 114.081055,25.1079102 114.817383,24.8413086 C115.553711,24.574707 116.148275,24.1451823 116.601074,23.5527344 C117.053874,22.9602865 117.280273,22.2324219 117.280273,21.3691406 C117.280273,20.4550781 117.007324,19.7039388 116.461426,19.1157227 C115.915527,18.5275065 115.003581,18.0768229 113.725586,17.7636719 L111.529297,17.2177734 C110.606771,16.9892578 110.005859,16.7713216 109.726562,16.5639648 C109.447266,16.3566081 109.307617,15.9990234 109.307617,15.4912109 C109.307617,14.8649089 109.578451,14.4057617 110.120117,14.1137695 C110.661784,13.8217773 111.389648,13.6757813 112.303711,13.6757813 C112.591471,13.6757813 112.875,13.6948242 113.154297,13.7329102 C113.433594,13.7709961 113.708659,13.8260091 113.979492,13.8979492 C114.250326,13.9698893 114.46403,14.03125 114.620605,14.0820313 C114.777181,14.1328125 114.980306,14.2068685 115.22998,14.3041992 C115.479655,14.4015299 115.617188,14.4544271 115.642578,14.4628906 C115.777995,14.5136719 115.90918,14.5390625 116.036133,14.5390625 C116.256185,14.5390625 116.431803,14.4692383 116.562988,14.3295898 C116.694173,14.1899414 116.759766,14.0227865 116.759766,13.828125 C116.759766,13.4980469 116.594727,13.2483724 116.264648,13.0791016 C115.807617,12.8336589 115.217285,12.6114909 114.493652,12.4125977 C113.77002,12.2137044 112.997721,12.1142578 112.176758,12.1142578 C111.491211,12.1142578 110.856445,12.1798503 110.272461,12.3110352 C109.688477,12.4422201 109.161621,12.6411133 108.691895,12.9077148 C108.222168,13.1743164 107.851888,13.5361328 107.581055,13.9931641 C107.310221,14.4501953 107.174805,14.9791667 107.174805,15.5800781 C107.174805,15.9609375 107.212891,16.2994792 107.289062,16.5957031 C107.365234,16.8919271 107.487956,17.1500651 107.657227,17.3701172 C107.826497,17.5901693 108.008464,17.7784831 108.203125,17.9350586 C108.397786,18.0916341 108.65804,18.2376302 108.983887,18.3730469 C109.309733,18.5084635 109.614421,18.6184896 109.897949,18.703125 C110.181478,18.7877604 110.551758,18.8893229 111.008789,19.0078125 L113.255859,19.5664062 C114.000651,19.7526042 114.533854,19.9980469 114.855469,20.3027344 C115.177083,20.6074219 115.337891,21.0136719 115.337891,21.5214844 C115.337891,22.2324219 115.052246,22.7719727 114.480957,23.1401367 C113.909668,23.5083008 113.158529,23.6923828 112.227539,23.6923828 C110.932617,23.6839193 109.705404,23.319987 108.545898,22.6005859 C108.359701,22.4820964 108.165039,22.4228516 107.961914,22.4228516 C107.741862,22.4228516 107.559896,22.4969076 107.416016,22.6450195 C107.272135,22.7931315 107.200195,22.96875 107.200195,23.171875 C107.200195,23.4511719 107.327148,23.6796875 107.581055,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M123.097656,23.7050781 C122.310547,23.7050781 121.688477,23.5252279 121.231445,23.1655273 C120.774414,22.8058268 120.545898,22.2408854 120.545898,21.4707031 C120.545898,21.047526 120.607259,20.7005208 120.72998,20.4296875 C120.852702,20.1588542 121.041016,19.9282227 121.294922,19.737793 C121.548828,19.5473633 121.929688,19.4034831 122.4375,19.3061523 C122.945312,19.2088216 123.529297,19.1411133 124.189453,19.1030273 C124.849609,19.0649414 125.704427,19.0458984 126.753906,19.0458984 L126.753906,19.4267578 C126.753906,20.6708984 126.387858,21.694987 125.655762,22.4990234 C124.923665,23.3030599 124.070964,23.7050781 123.097656,23.7050781 Z M122.856445,25.2285156 C124.676107,25.2285156 125.979492,24.4033203 126.766602,22.7529297 L126.766602,23.9716797 C126.766602,24.3017578 126.874512,24.5577799 127.090332,24.7397461 C127.306152,24.9217122 127.566406,25.0126953 127.871094,25.0126953 C128.175781,25.0126953 128.444499,24.9174805 128.677246,24.7270508 C128.909993,24.5366211 129.026367,24.2763672 129.026367,23.9462891 L129.026367,16.5449219 C129.026367,15.046875 128.588379,13.938151 127.712402,13.21875 C126.836426,12.499349 125.624023,12.1396484 124.075195,12.1396484 C122.263997,12.1396484 120.702474,12.5078125 119.390625,13.2441406 C119.153646,13.3795573 119.035156,13.5742187 119.035156,13.828125 C119.035156,14.0481771 119.11556,14.2491862 119.276367,14.4311523 C119.437174,14.6131185 119.627604,14.7041016 119.847656,14.7041016 C119.966146,14.7041016 120.067708,14.6829427 120.152344,14.640625 C120.583984,14.4459635 120.947917,14.2936198 121.244141,14.1835937 C121.540365,14.0735677 121.946615,13.9635417 122.462891,13.8535156 C122.979167,13.7434896 123.495443,13.6884766 124.011719,13.6884766 C124.875,13.6884766 125.547852,13.9042969 126.030273,14.3359375 C126.512695,14.7675781 126.753906,15.4361979 126.753906,16.3417969 L126.753906,17.7255859 C125.814453,17.7255859 125.012533,17.7382812 124.348145,17.7636719 C123.683757,17.7890625 123.038411,17.8334961 122.412109,17.8969727 C121.785807,17.9604492 121.269531,18.0535482 120.863281,18.1762695 C120.457031,18.2989909 120.084635,18.4555664 119.746094,18.6459961 C119.407552,18.8364258 119.145182,19.0691732 118.958984,19.3442383 C118.772786,19.6193034 118.628906,19.9388021 118.527344,20.3027344 C118.425781,20.6666667 118.375,21.0898438 118.375,21.5722656 C118.375,22.7402344 118.789714,23.6416016 119.619141,24.2763672 C120.448568,24.9111328 121.527669,25.2285156 122.856445,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M137.040039,23.4765625 C135.922852,23.4765625 135.000326,23.0703125 134.272461,22.2578125 C133.544596,21.4453125 133.180664,20.2519531 133.180664,18.6777344 C133.180664,17.7382812 133.311849,16.9046224 133.574219,16.1767578 C133.836589,15.4488932 134.263997,14.8543294 134.856445,14.3930664 C135.448893,13.9318034 136.176758,13.7011719 137.040039,13.7011719 C137.581706,13.7011719 138.068359,13.7921549 138.5,13.9741211 C138.931641,14.1560872 139.284993,14.4015299 139.560059,14.7104492 C139.835124,15.0193685 140.063639,15.3875326 140.245605,15.8149414 C140.427572,16.2423503 140.556641,16.6866862 140.632812,17.1479492 C140.708984,17.6092122 140.74707,18.1022135 140.74707,18.6269531 C140.74707,20.1588542 140.400065,21.3500977 139.706055,22.2006836 C139.012044,23.0512695 138.123372,23.4765625 137.040039,23.4765625 Z M136.938477,30.3701172 C138.808919,30.3701172 140.283691,29.9130859 141.362793,28.9990234 C142.441895,28.0849609 142.981445,26.7265625 142.981445,24.9238281 L142.981445,13.4599609 C142.981445,13.1214193 142.875651,12.8569336 142.664062,12.6665039 C142.452474,12.4760742 142.198568,12.3808594 141.902344,12.3808594 C141.63151,12.3808594 141.392415,12.4633789 141.185059,12.628418 C140.977702,12.793457 140.861328,13.0198568 140.835938,13.3076172 L140.835938,14.5898437 C139.998047,12.9563802 138.593099,12.1396484 136.621094,12.1396484 C135.427734,12.1396484 134.390951,12.4401042 133.510742,13.0410156 C132.630534,13.6419271 131.972493,14.4353841 131.536621,15.4213867 C131.100749,16.4073893 130.882812,17.5182292 130.882812,18.7539063 C130.882812,19.625651 131.011882,20.438151 131.27002,21.1914062 C131.528158,21.9446615 131.896322,22.6048177 132.374512,23.171875 C132.852702,23.7389323 133.462077,24.1853841 134.202637,24.5112305 C134.943197,24.8370768 135.770508,25 136.68457,25 C137.657878,25 138.493652,24.7736003 139.191895,24.3208008 C139.890137,23.8680013 140.41276,23.2565104 140.759766,22.4863281 L140.759766,25 C140.759766,26.21875 140.425456,27.1518555 139.756836,27.7993164 C139.088216,28.4467773 138.097982,28.7705078 136.786133,28.7705078 C136.583008,28.7705078 136.381999,28.7620443 136.183105,28.7451172 C135.984212,28.7281901 135.806478,28.7091471 135.649902,28.6879883 C135.493327,28.6668294 135.319824,28.6350911 135.129395,28.5927734 C134.938965,28.5504557 134.786621,28.5166016 134.672363,28.4912109 C134.558105,28.4658203 134.409993,28.4235026 134.228027,28.3642578 C134.046061,28.305013 133.921224,28.2626953 133.853516,28.2373047 C133.785807,28.2119141 133.665202,28.1653646 133.491699,28.0976563 C133.318197,28.0299479 133.214518,27.991862 133.180664,27.9833984 C133.087565,27.9495443 132.990234,27.9326172 132.888672,27.9326172 C132.677083,27.9326172 132.497233,28.0109049 132.349121,28.1674805 C132.201009,28.324056 132.126953,28.508138 132.126953,28.7197266 C132.126953,29.0328776 132.28776,29.2783203 132.609375,29.4560547 C133.751953,30.0654297 135.194987,30.3701172 136.938477,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M147.262695,17.7382812 C147.305013,17.2135417 147.417155,16.7163086 147.599121,16.246582 C147.781087,15.7768555 148.02653,15.3494466 148.335449,14.9643555 C148.644368,14.5792643 149.040039,14.2724609 149.522461,14.0439453 C150.004883,13.8154297 150.542318,13.7011719 151.134766,13.7011719 C152.277344,13.7011719 153.144857,14.0777995 153.737305,14.8310547 C154.329753,15.5843099 154.664062,16.5533854 154.740234,17.7382812 L147.262695,17.7382812 Z M151.172852,25.2285156 C153.068685,25.2285156 154.685221,24.6276042 156.022461,23.4257812 C156.25944,23.2141927 156.37793,22.9729818 156.37793,22.7021484 C156.37793,22.4905599 156.308105,22.3064779 156.168457,22.1499023 C156.028809,21.9933268 155.857422,21.9150391 155.654297,21.9150391 C155.485026,21.9150391 155.324219,21.9742839 155.171875,22.0927734 C154.5625,22.5582682 153.957357,22.9264323 153.356445,23.1972656 C152.755534,23.468099 152.078451,23.6035156 151.325195,23.6035156 C150.131836,23.5865885 149.154297,23.2036133 148.392578,22.4545898 C147.630859,21.7055664 147.241536,20.6031901 147.224609,19.1474609 L155.933594,19.1474609 C156.204427,19.1474609 156.407552,19.0670573 156.542969,18.90625 C156.678385,18.7454427 156.746094,18.5423177 156.746094,18.296875 C156.720703,17.4420573 156.597982,16.6570638 156.37793,15.9418945 C156.157878,15.2267253 155.827799,14.5792643 155.387695,13.9995117 C154.947591,13.4197591 154.355143,12.9648438 153.610352,12.6347656 C152.86556,12.3046875 151.998047,12.1396484 151.007812,12.1396484 C149.755208,12.1396484 148.65918,12.4443359 147.719727,13.0537109 C146.780273,13.6630859 146.077799,14.4544271 145.612305,15.4277344 C145.14681,16.4010417 144.914062,17.4759115 144.914062,18.6523437 C144.914062,19.9895833 145.191243,21.159668 145.745605,22.1625977 C146.299967,23.1655273 147.046875,23.9251302 147.986328,24.4414063 C148.925781,24.9576823 149.987956,25.2200521 151.172852,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M158.818359,23.8574219 C159.326172,24.2298177 160.005371,24.5535482 160.855957,24.8286133 C161.706543,25.1036784 162.584635,25.2412109 163.490234,25.2412109 C164.463542,25.2412109 165.318359,25.1079102 166.054688,24.8413086 C166.791016,24.574707 167.385579,24.1451823 167.838379,23.5527344 C168.291178,22.9602865 168.517578,22.2324219 168.517578,21.3691406 C168.517578,20.4550781 168.244629,19.7039388 167.69873,19.1157227 C167.152832,18.5275065 166.240885,18.0768229 164.962891,17.7636719 L162.766602,17.2177734 C161.844076,16.9892578 161.243164,16.7713216 160.963867,16.5639648 C160.68457,16.3566081 160.544922,15.9990234 160.544922,15.4912109 C160.544922,14.8649089 160.815755,14.4057617 161.357422,14.1137695 C161.899089,13.8217773 162.626953,13.6757813 163.541016,13.6757813 C163.828776,13.6757813 164.112305,13.6948242 164.391602,13.7329102 C164.670898,13.7709961 164.945964,13.8260091 165.216797,13.8979492 C165.48763,13.9698893 165.701335,14.03125 165.85791,14.0820313 C166.014486,14.1328125 166.217611,14.2068685 166.467285,14.3041992 C166.71696,14.4015299 166.854492,14.4544271 166.879883,14.4628906 C167.015299,14.5136719 167.146484,14.5390625 167.273438,14.5390625 C167.49349,14.5390625 167.669108,14.4692383 167.800293,14.3295898 C167.931478,14.1899414 167.99707,14.0227865 167.99707,13.828125 C167.99707,13.4980469 167.832031,13.2483724 167.501953,13.0791016 C167.044922,12.8336589 166.45459,12.6114909 165.730957,12.4125977 C165.007324,12.2137044 164.235026,12.1142578 163.414062,12.1142578 C162.728516,12.1142578 162.09375,12.1798503 161.509766,12.3110352 C160.925781,12.4422201 160.398926,12.6411133 159.929199,12.9077148 C159.459473,13.1743164 159.089193,13.5361328 158.818359,13.9931641 C158.547526,14.4501953 158.412109,14.9791667 158.412109,15.5800781 C158.412109,15.9609375 158.450195,16.2994792 158.526367,16.5957031 C158.602539,16.8919271 158.72526,17.1500651 158.894531,17.3701172 C159.063802,17.5901693 159.245768,17.7784831 159.44043,17.9350586 C159.635091,18.0916341 159.895345,18.2376302 160.221191,18.3730469 C160.547038,18.5084635 160.851725,18.6184896 161.135254,18.703125 C161.418783,18.7877604 161.789062,18.8893229 162.246094,19.0078125 L164.493164,19.5664062 C165.237956,19.7526042 165.771159,19.9980469 166.092773,20.3027344 C166.414388,20.6074219 166.575195,21.0136719 166.575195,21.5214844 C166.575195,22.2324219 166.289551,22.7719727 165.718262,23.1401367 C165.146973,23.5083008 164.395833,23.6923828 163.464844,23.6923828 C162.169922,23.6839193 160.942708,23.319987 159.783203,22.6005859 C159.597005,22.4820964 159.402344,22.4228516 159.199219,22.4228516 C158.979167,22.4228516 158.797201,22.4969076 158.65332,22.6450195 C158.50944,22.7931315 158.4375,22.96875 158.4375,23.171875 C158.4375,23.4511719 158.564453,23.6796875 158.818359,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="List-@Online-Serving" stroke-width="1" transform="translate(1314, 40)">
+            <g id="header-/-01_purple-@2_standard" xlink:href="#path-29">
+                <g id="bg">
+                    <path d="M20,0 L580,0 C591.045695,-3.55271368e-15 600,8.954305 600,20 L600,52 L600,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 Z" id="header-bg" fill-opacity="0.1" fill="#FDB515"></path>
+                    <path d="M580,0 C591.045695,0 600,8.954305 600,20 L600,52 L0,52 L0,20 C0,8.954305 8.954305,0 20,0 L580,0 Z M580,1.6 L20.0000001,1.6 L18.1187074,1.6949974 C8.84039506,2.63726101 1.6,10.4730881 1.6,20 L1.6,50.3999998 L598.4,50.3999998 L598.4,20 C598.4,9.8379606 590.162039,1.6 580,1.6 Z" id="header-bg" fill="#EDA709" fill-rule="nonzero"></path>
+                </g>
+                <g id="Title" transform="translate(0, 12)" fill="#EDA709">
+                    <path d="M228.029297,19.65625 C226.451172,19.65625 225.277344,19.1230469 224.507812,18.0566406 C223.738281,16.9902344 223.353516,15.4609375 223.353516,13.46875 C223.353516,11.4921875 223.740234,9.96679688 224.513672,8.89257812 C225.287109,7.81835938 226.458984,7.28125 228.029297,7.28125 C229.607422,7.28125 230.78125,7.81640625 231.550781,8.88671875 C232.320312,9.95703125 232.705078,11.484375 232.705078,13.46875 C232.705078,15.4609375 232.318359,16.9902344 231.544922,18.0566406 C230.771484,19.1230469 229.599609,19.65625 228.029297,19.65625 Z M228.029297,22.3046875 C230.662109,22.3046875 232.689453,21.5195312 234.111328,19.9492188 C235.533203,18.3789062 236.244141,16.21875 236.244141,13.46875 C236.244141,10.71875 235.533203,8.55664062 234.111328,6.98242188 C232.689453,5.40820312 230.662109,4.62109375 228.029297,4.62109375 C225.412109,4.62109375 223.388672,5.40820312 221.958984,6.98242188 C220.529297,8.55664062 219.814453,10.71875 219.814453,13.46875 C219.814453,16.21875 220.529297,18.3789062 221.958984,19.9492188 C223.388672,21.5195312 225.412109,22.3046875 228.029297,22.3046875 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M240.474609,22 C240.904297,22 241.257812,21.8574219 241.535156,21.5722656 C241.8125,21.2871094 241.951172,20.890625 241.951172,20.3828125 L241.951172,15.4257812 C241.951172,14.4335938 242.236328,13.6289062 242.806641,13.0117188 C243.376953,12.3945312 244.052734,12.0859375 244.833984,12.0859375 C245.482422,12.0859375 246.015625,12.2988281 246.433594,12.7246094 C246.851562,13.1503906 247.060547,13.765625 247.060547,14.5703125 L247.060547,20.3828125 C247.060547,20.890625 247.197266,21.2871094 247.470703,21.5722656 C247.744141,21.8574219 248.087891,22 248.501953,22 C248.939453,22 249.298828,21.8574219 249.580078,21.5722656 C249.861328,21.2871094 250.001953,20.890625 250.001953,20.3828125 L250.001953,14.5820312 C250.001953,13.8320312 249.888672,13.1640625 249.662109,12.578125 C249.435547,11.9921875 249.126953,11.5214844 248.736328,11.1660156 C248.345703,10.8105469 247.902344,10.5410156 247.40625,10.3574219 C246.910156,10.1738281 246.380859,10.0820312 245.818359,10.0820312 C244.904297,10.0820312 244.117188,10.2539062 243.457031,10.5976562 C242.796875,10.9414062 242.294922,11.4453125 241.951172,12.109375 L241.951172,11.6992188 C241.951172,11.2382812 241.814453,10.8808594 241.541016,10.6269531 C241.267578,10.3730469 240.919922,10.2460938 240.498047,10.2460938 C240.068359,10.2460938 239.712891,10.375 239.431641,10.6328125 C239.150391,10.890625 239.009766,11.2539062 239.009766,11.7226562 L239.009766,20.3828125 C239.009766,20.890625 239.146484,21.2871094 239.419922,21.5722656 C239.693359,21.8574219 240.044922,22 240.474609,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M254.513672,22 C254.951172,22 255.3125,21.8554688 255.597656,21.5664062 C255.882812,21.2773438 256.025391,20.875 256.025391,20.359375 L256.025391,6.75390625 C256.025391,6.23828125 255.884766,5.8359375 255.603516,5.546875 C255.322266,5.2578125 254.966797,5.11328125 254.537109,5.11328125 C254.107422,5.11328125 253.755859,5.2578125 253.482422,5.546875 C253.208984,5.8359375 253.072266,6.23828125 253.072266,6.75390625 L253.072266,20.359375 C253.072266,20.8828125 253.207031,21.2871094 253.476562,21.5722656 C253.746094,21.8574219 254.091797,22 254.513672,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M260.677734,22 C261.107422,22 261.460938,21.8554688 261.738281,21.5664062 C262.015625,21.2773438 262.154297,20.875 262.154297,20.359375 L262.154297,11.8984375 C262.154297,11.375 262.015625,10.96875 261.738281,10.6796875 C261.460938,10.390625 261.107422,10.2460938 260.677734,10.2460938 C260.248047,10.2460938 259.896484,10.390625 259.623047,10.6796875 C259.349609,10.96875 259.212891,11.375 259.212891,11.8984375 L259.212891,20.359375 C259.212891,20.8828125 259.349609,21.2871094 259.623047,21.5722656 C259.896484,21.8574219 260.248047,22 260.677734,22 Z M260.677734,7.90234375 C261.185547,7.90234375 261.597656,7.75195312 261.914062,7.45117188 C262.230469,7.15039062 262.388672,6.7578125 262.388672,6.2734375 C262.388672,5.7890625 262.232422,5.3984375 261.919922,5.1015625 C261.607422,4.8046875 261.197266,4.65625 260.689453,4.65625 C260.173828,4.65625 259.757812,4.8046875 259.441406,5.1015625 C259.125,5.3984375 258.966797,5.7890625 258.966797,6.2734375 C258.966797,6.7578125 259.125,7.15039062 259.441406,7.45117188 C259.757812,7.75195312 260.169922,7.90234375 260.677734,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M266.595703,22 C267.025391,22 267.378906,21.8574219 267.65625,21.5722656 C267.933594,21.2871094 268.072266,20.890625 268.072266,20.3828125 L268.072266,15.4257812 C268.072266,14.4335938 268.357422,13.6289062 268.927734,13.0117188 C269.498047,12.3945312 270.173828,12.0859375 270.955078,12.0859375 C271.603516,12.0859375 272.136719,12.2988281 272.554688,12.7246094 C272.972656,13.1503906 273.181641,13.765625 273.181641,14.5703125 L273.181641,20.3828125 C273.181641,20.890625 273.318359,21.2871094 273.591797,21.5722656 C273.865234,21.8574219 274.208984,22 274.623047,22 C275.060547,22 275.419922,21.8574219 275.701172,21.5722656 C275.982422,21.2871094 276.123047,20.890625 276.123047,20.3828125 L276.123047,14.5820312 C276.123047,13.8320312 276.009766,13.1640625 275.783203,12.578125 C275.556641,11.9921875 275.248047,11.5214844 274.857422,11.1660156 C274.466797,10.8105469 274.023438,10.5410156 273.527344,10.3574219 C273.03125,10.1738281 272.501953,10.0820312 271.939453,10.0820312 C271.025391,10.0820312 270.238281,10.2539062 269.578125,10.5976562 C268.917969,10.9414062 268.416016,11.4453125 268.072266,12.109375 L268.072266,11.6992188 C268.072266,11.2382812 267.935547,10.8808594 267.662109,10.6269531 C267.388672,10.3730469 267.041016,10.2460938 266.619141,10.2460938 C266.189453,10.2460938 265.833984,10.375 265.552734,10.6328125 C265.271484,10.890625 265.130859,11.2539062 265.130859,11.7226562 L265.130859,20.3828125 C265.130859,20.890625 265.267578,21.2871094 265.541016,21.5722656 C265.814453,21.8574219 266.166016,22 266.595703,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M278.513672,16.0117188 C278.513672,17.9414062 279.070312,19.4453125 280.183594,20.5234375 C281.296875,21.6015625 282.779297,22.140625 284.630859,22.140625 C286.279297,22.140625 287.720703,21.6210938 288.955078,20.5820312 C289.205078,20.3632812 289.330078,20.0976562 289.330078,19.7851562 C289.330078,19.5117188 289.236328,19.265625 289.048828,19.046875 C288.861328,18.828125 288.646484,18.71875 288.404297,18.71875 C288.240234,18.71875 288.087891,18.7734375 287.947266,18.8828125 C286.845703,19.671875 285.810547,20.0664062 284.841797,20.0664062 C282.708984,20.0664062 281.580078,18.9648438 281.455078,16.7617188 L288.861328,16.7617188 C289.142578,16.7617188 289.353516,16.6679688 289.494141,16.4804688 C289.634766,16.2929688 289.705078,16.046875 289.705078,15.7421875 C289.697266,14.96875 289.583984,14.25 289.365234,13.5859375 C289.146484,12.921875 288.824219,12.3242188 288.398438,11.7929688 C287.972656,11.2617188 287.410156,10.8457031 286.710938,10.5449219 C286.011719,10.2441406 285.208984,10.09375 284.302734,10.09375 C283.123047,10.09375 282.087891,10.3671875 281.197266,10.9140625 C280.306641,11.4609375 279.636719,12.1777344 279.1875,13.0644531 C278.738281,13.9511719 278.513672,14.9335938 278.513672,16.0117188 Z M281.478516,15.109375 C281.548828,14.2578125 281.832031,13.53125 282.328125,12.9296875 C282.824219,12.328125 283.509766,12.0273438 284.384766,12.0273438 C285.283203,12.0273438 285.960938,12.3164062 286.417969,12.8945312 C286.875,13.4726562 287.134766,14.2109375 287.197266,15.109375 L281.478516,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M304.869141,22.3046875 C306.916016,22.3046875 308.546875,21.8457031 309.761719,20.9277344 C310.976562,20.0097656 311.583984,18.7617188 311.583984,17.1835938 C311.583984,16.5039062 311.482422,15.9042969 311.279297,15.3847656 C311.076172,14.8652344 310.761719,14.4140625 310.335938,14.03125 C309.910156,13.6484375 309.429688,13.3242188 308.894531,13.0585938 C308.359375,12.7929688 307.701172,12.5351562 306.919922,12.2851562 C306.880859,12.2695312 306.310547,12.1015625 305.208984,11.78125 C304.107422,11.4609375 303.537109,11.2929688 303.498047,11.2773438 C302.912109,11.0820312 302.476562,10.8300781 302.191406,10.5214844 C301.90625,10.2128906 301.763672,9.8125 301.763672,9.3203125 C301.763672,8.9375 301.857422,8.609375 302.044922,8.3359375 C302.232422,8.0625 302.494141,7.8515625 302.830078,7.703125 C303.166016,7.5546875 303.53125,7.44726562 303.925781,7.38085938 C304.320312,7.31445312 304.759766,7.28125 305.244141,7.28125 C306.705078,7.28125 308.064453,7.640625 309.322266,8.359375 C309.525391,8.46875 309.712891,8.5234375 309.884766,8.5234375 C310.212891,8.5234375 310.494141,8.38085938 310.728516,8.09570312 C310.962891,7.81054688 311.080078,7.4921875 311.080078,7.140625 C311.080078,6.796875 310.958984,6.515625 310.716797,6.296875 C310.154297,5.8125 309.345703,5.41210938 308.291016,5.09570312 C307.236328,4.77929688 306.150391,4.62109375 305.033203,4.62109375 C303.119141,4.62109375 301.558594,5.05859375 300.351562,5.93359375 C299.144531,6.80859375 298.541016,7.9921875 298.541016,9.484375 C298.541016,10.640625 298.835938,11.5566406 299.425781,12.2324219 C300.015625,12.9082031 300.978516,13.4765625 302.314453,13.9375 L306.181641,15.2265625 C306.908203,15.4609375 307.443359,15.7460938 307.787109,16.0820312 C308.130859,16.4179688 308.302734,16.8828125 308.302734,17.4765625 C308.302734,18.1953125 307.984375,18.7382812 307.347656,19.1054688 C306.710938,19.4726562 305.880859,19.65625 304.857422,19.65625 C303.255859,19.65625 301.724609,19.21875 300.263672,18.34375 C300.130859,18.265625 299.994141,18.2265625 299.853516,18.2265625 C299.517578,18.2265625 299.210938,18.4121094 298.933594,18.7832031 C298.65625,19.1542969 298.517578,19.53125 298.517578,19.9140625 C298.517578,20.203125 298.607422,20.4140625 298.787109,20.546875 C300.294922,21.71875 302.322266,22.3046875 304.869141,22.3046875 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M313.564453,16.0117188 C313.564453,17.9414062 314.121094,19.4453125 315.234375,20.5234375 C316.347656,21.6015625 317.830078,22.140625 319.681641,22.140625 C321.330078,22.140625 322.771484,21.6210938 324.005859,20.5820312 C324.255859,20.3632812 324.380859,20.0976562 324.380859,19.7851562 C324.380859,19.5117188 324.287109,19.265625 324.099609,19.046875 C323.912109,18.828125 323.697266,18.71875 323.455078,18.71875 C323.291016,18.71875 323.138672,18.7734375 322.998047,18.8828125 C321.896484,19.671875 320.861328,20.0664062 319.892578,20.0664062 C317.759766,20.0664062 316.630859,18.9648438 316.505859,16.7617188 L323.912109,16.7617188 C324.193359,16.7617188 324.404297,16.6679688 324.544922,16.4804688 C324.685547,16.2929688 324.755859,16.046875 324.755859,15.7421875 C324.748047,14.96875 324.634766,14.25 324.416016,13.5859375 C324.197266,12.921875 323.875,12.3242188 323.449219,11.7929688 C323.023438,11.2617188 322.460938,10.8457031 321.761719,10.5449219 C321.0625,10.2441406 320.259766,10.09375 319.353516,10.09375 C318.173828,10.09375 317.138672,10.3671875 316.248047,10.9140625 C315.357422,11.4609375 314.6875,12.1777344 314.238281,13.0644531 C313.789062,13.9511719 313.564453,14.9335938 313.564453,16.0117188 Z M316.529297,15.109375 C316.599609,14.2578125 316.882812,13.53125 317.378906,12.9296875 C317.875,12.328125 318.560547,12.0273438 319.435547,12.0273438 C320.333984,12.0273438 321.011719,12.3164062 321.46875,12.8945312 C321.925781,13.4726562 322.185547,14.2109375 322.248047,15.109375 L316.529297,15.109375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M328.529297,22 C328.966797,22 329.324219,21.8632812 329.601562,21.5898438 C329.878906,21.3164062 330.017578,20.9257812 330.017578,20.4179688 L330.017578,15.4726562 C330.017578,14.9179688 330.107422,14.4492188 330.287109,14.0664062 C330.466797,13.6835938 330.720703,13.3984375 331.048828,13.2109375 C331.376953,13.0234375 331.71875,12.8925781 332.074219,12.8183594 C332.429688,12.7441406 332.837891,12.7070312 333.298828,12.7070312 C333.603516,12.7070312 333.845703,12.5820312 334.025391,12.3320312 C334.205078,12.0820312 334.294922,11.7929688 334.294922,11.4648438 C334.294922,11.1210938 334.197266,10.828125 334.001953,10.5859375 C333.806641,10.34375 333.529297,10.2226562 333.169922,10.2226562 C332.435547,10.2226562 331.773438,10.4746094 331.183594,10.9785156 C330.59375,11.4824219 330.193359,12.0859375 329.982422,12.7890625 L330.017578,11.5820312 C330.025391,11.1679688 329.882812,10.8417969 329.589844,10.6035156 C329.296875,10.3652344 328.951172,10.2460938 328.552734,10.2460938 C328.146484,10.2460938 327.796875,10.3691406 327.503906,10.6152344 C327.210938,10.8613281 327.064453,11.2070312 327.064453,11.6523438 L327.064453,20.3945312 C327.064453,20.9101562 327.203125,21.3066406 327.480469,21.5839844 C327.757812,21.8613281 328.107422,22 328.529297,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M340.904297,22.0703125 C341.537109,22.0703125 342.027344,21.9003906 342.375,21.5605469 C342.722656,21.2207031 343.060547,20.609375 343.388672,19.7265625 L346.517578,12.1445312 C346.595703,11.9570312 346.634766,11.7617188 346.634766,11.5585938 C346.634766,11.1757812 346.494141,10.8554688 346.212891,10.5976562 C345.931641,10.3398438 345.611328,10.2109375 345.251953,10.2109375 C344.978516,10.2109375 344.728516,10.2929688 344.501953,10.4570312 C344.275391,10.6210938 344.099609,10.8710938 343.974609,11.2070312 L340.904297,19.0234375 L337.833984,11.2070312 C337.708984,10.8710938 337.533203,10.6210938 337.306641,10.4570312 C337.080078,10.2929688 336.830078,10.2109375 336.556641,10.2109375 C336.197266,10.2109375 335.876953,10.3398438 335.595703,10.5976562 C335.314453,10.8554688 335.173828,11.1757812 335.173828,11.5585938 C335.173828,11.7617188 335.212891,11.9570312 335.291016,12.1445312 L338.408203,19.7265625 C338.736328,20.609375 339.076172,21.2207031 339.427734,21.5605469 C339.779297,21.9003906 340.271484,22.0703125 340.904297,22.0703125 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M350.232422,22 C350.662109,22 351.015625,21.8554688 351.292969,21.5664062 C351.570312,21.2773438 351.708984,20.875 351.708984,20.359375 L351.708984,11.8984375 C351.708984,11.375 351.570312,10.96875 351.292969,10.6796875 C351.015625,10.390625 350.662109,10.2460938 350.232422,10.2460938 C349.802734,10.2460938 349.451172,10.390625 349.177734,10.6796875 C348.904297,10.96875 348.767578,11.375 348.767578,11.8984375 L348.767578,20.359375 C348.767578,20.8828125 348.904297,21.2871094 349.177734,21.5722656 C349.451172,21.8574219 349.802734,22 350.232422,22 Z M350.232422,7.90234375 C350.740234,7.90234375 351.152344,7.75195312 351.46875,7.45117188 C351.785156,7.15039062 351.943359,6.7578125 351.943359,6.2734375 C351.943359,5.7890625 351.787109,5.3984375 351.474609,5.1015625 C351.162109,4.8046875 350.751953,4.65625 350.244141,4.65625 C349.728516,4.65625 349.3125,4.8046875 348.996094,5.1015625 C348.679688,5.3984375 348.521484,5.7890625 348.521484,6.2734375 C348.521484,6.7578125 348.679688,7.15039062 348.996094,7.45117188 C349.3125,7.75195312 349.724609,7.90234375 350.232422,7.90234375 Z" id="Shape" fill-rule="nonzero"></path>
+                    <path d="M356.150391,22 C356.580078,22 356.933594,21.8574219 357.210938,21.5722656 C357.488281,21.2871094 357.626953,20.890625 357.626953,20.3828125 L357.626953,15.4257812 C357.626953,14.4335938 357.912109,13.6289062 358.482422,13.0117188 C359.052734,12.3945312 359.728516,12.0859375 360.509766,12.0859375 C361.158203,12.0859375 361.691406,12.2988281 362.109375,12.7246094 C362.527344,13.1503906 362.736328,13.765625 362.736328,14.5703125 L362.736328,20.3828125 C362.736328,20.890625 362.873047,21.2871094 363.146484,21.5722656 C363.419922,21.8574219 363.763672,22 364.177734,22 C364.615234,22 364.974609,21.8574219 365.255859,21.5722656 C365.537109,21.2871094 365.677734,20.890625 365.677734,20.3828125 L365.677734,14.5820312 C365.677734,13.8320312 365.564453,13.1640625 365.337891,12.578125 C365.111328,11.9921875 364.802734,11.5214844 364.412109,11.1660156 C364.021484,10.8105469 363.578125,10.5410156 363.082031,10.3574219 C362.585938,10.1738281 362.056641,10.0820312 361.494141,10.0820312 C360.580078,10.0820312 359.792969,10.2539062 359.132812,10.5976562 C358.472656,10.9414062 357.970703,11.4453125 357.626953,12.109375 L357.626953,11.6992188 C357.626953,11.2382812 357.490234,10.8808594 357.216797,10.6269531 C356.943359,10.3730469 356.595703,10.2460938 356.173828,10.2460938 C355.744141,10.2460938 355.388672,10.375 355.107422,10.6328125 C354.826172,10.890625 354.685547,11.2539062 354.685547,11.7226562 L354.685547,20.3828125 C354.685547,20.890625 354.822266,21.2871094 355.095703,21.5722656 C355.369141,21.8574219 355.720703,22 356.150391,22 Z" id="Path" fill-rule="nonzero"></path>
+                    <path d="M377.115234,21.8476562 C377.115234,22.9570312 376.810547,23.7832031 376.201172,24.3261719 C375.591797,24.8691406 374.740234,25.140625 373.646484,25.140625 C373.333984,25.140625 373.015625,25.1132812 372.691406,25.0585938 C372.367188,25.0039062 372.123047,24.953125 371.958984,24.90625 C371.794922,24.859375 371.537109,24.7753906 371.185547,24.6542969 C370.833984,24.5332031 370.623047,24.4609375 370.552734,24.4375 C370.427734,24.390625 370.302734,24.3671875 370.177734,24.3671875 C369.904297,24.3671875 369.677734,24.46875 369.498047,24.671875 C369.318359,24.875 369.228516,25.109375 369.228516,25.375 C369.228516,25.75 369.408203,26.046875 369.767578,26.265625 C370.189453,26.5234375 370.78125,26.7402344 371.542969,26.9160156 C372.304688,27.0917969 373.103516,27.1796875 373.939453,27.1796875 C375.783203,27.1796875 377.242188,26.6953125 378.316406,25.7265625 C379.390625,24.7578125 379.927734,23.3476562 379.927734,21.4960938 L379.927734,11.8867188 C379.927734,11.3710938 379.796875,10.96875 379.535156,10.6796875 C379.273438,10.390625 378.939453,10.2460938 378.533203,10.2460938 C378.173828,10.2460938 377.867188,10.3535156 377.613281,10.5683594 C377.359375,10.7832031 377.212891,11.0898438 377.173828,11.4882812 L377.173828,12.0273438 C376.783203,11.4023438 376.318359,10.9257812 375.779297,10.5976562 C375.240234,10.2695312 374.498047,10.1054688 373.552734,10.1054688 C371.857422,10.1054688 370.515625,10.6699219 369.527344,11.7988281 C368.539062,12.9277344 368.044922,14.3945312 368.044922,16.1992188 C368.044922,17.9882812 368.552734,19.4042969 369.568359,20.4472656 C370.583984,21.4902344 371.935547,22.0117188 373.623047,22.0117188 C375.287109,22.0117188 376.451172,21.3828125 377.115234,20.125 L377.115234,21.8476562 Z M374.150391,20.0546875 C373.259766,20.0390625 372.519531,19.7011719 371.929688,19.0410156 C371.339844,18.3808594 371.044922,17.40625 371.044922,16.1171875 C371.044922,15.671875 371.080078,15.2558594 371.150391,14.8691406 C371.220703,14.4824219 371.333984,14.1152344 371.490234,13.7675781 C371.646484,13.4199219 371.841797,13.1230469 372.076172,12.8769531 C372.310547,12.6308594 372.603516,12.4355469 372.955078,12.2910156 C373.306641,12.1464844 373.701172,12.0742188 374.138672,12.0742188 C376.123047,12.0742188 377.115234,13.4296875 377.115234,16.140625 C377.115234,17.4765625 376.845703,18.4589844 376.306641,19.0878906 C375.767578,19.7167969 375.048828,20.0390625 374.150391,20.0546875 Z" id="Shape" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="List" transform="translate(0, 50)">
+                <g id="bg">
+                    <path d="M0,0 L600,0 L600,256 C600,260.418278 596.418278,264 592,264 L8,264 C3.581722,264 0,260.418278 0,256 L0,0 L0,0 Z" fill="#FFFFFF"></path>
+                    <path d="M600,0 L600,256 C600,260.418278 596.418278,264 592,264 L8,264 C3.581722,264 0,260.418278 0,256 L0,0 L600,0 Z M598.4,1.60000053 L1.6,1.60000053 L1.6,256 C1.6,259.534622 4.4653776,262.4 8,262.4 L592,262.4 C595.534622,262.4 598.4,259.534622 598.4,256 L598.4,1.60000053 Z" fill="#FDB515" fill-rule="nonzero"></path>
+                </g>
+                <g id="List-Item-/-01_@text_#333" transform="translate(0, 12)" xlink:href="#path-30" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M9.953125,25.2792969 C11.874349,25.2792969 13.5416667,24.8180339 14.9550781,23.8955078 C15.3782552,23.6246745 15.5898438,23.2945964 15.5898438,22.9052734 C15.5898438,22.6429036 15.4967448,22.414388 15.3105469,22.2197266 C15.124349,22.0250651 14.9000651,21.9277344 14.6376953,21.9277344 C14.4853516,21.9277344 14.3245443,21.9742839 14.1552734,22.0673828 C13.4951172,22.4397786 12.8815104,22.7254232 12.3144531,22.9243164 C11.7473958,23.1232096 11.0872396,23.2226562 10.3339844,23.2226562 C9.49609375,23.2226562 8.73649089,23.0957031 8.05517578,22.8417969 C7.37386068,22.5878906 6.80257161,22.2408854 6.34130859,21.8007812 C5.88004557,21.3606771 5.49283854,20.8338216 5.1796875,20.2202148 C4.86653646,19.6066081 4.63802083,18.9443359 4.49414062,18.2333984 C4.35026042,17.5224609 4.27832031,16.764974 4.27832031,15.9609375 C4.27832031,15.156901 4.35026042,14.3972982 4.49414062,13.6821289 C4.63802083,12.9669596 4.86653646,12.3004557 5.1796875,11.6826172 C5.49283854,11.0647786 5.87792969,10.5315755 6.33496094,10.0830078 C6.79199219,9.6344401 7.35481771,9.28320312 8.0234375,9.02929688 C8.69205729,8.77539062 9.43684896,8.6484375 10.2578125,8.6484375 C10.5963542,8.6484375 10.9243164,8.66536458 11.2416992,8.69921875 C11.559082,8.73307292 11.8235677,8.77327474 12.0351562,8.81982422 C12.2467448,8.8663737 12.4667969,8.92985026 12.6953125,9.01025391 C12.9238281,9.09065755 13.093099,9.15413411 13.203125,9.20068359 C13.313151,9.24723307 13.4549154,9.31494141 13.628418,9.40380859 C13.8019206,9.49267578 13.905599,9.54557292 13.9394531,9.5625 C14.0917969,9.63020833 14.2314453,9.6640625 14.3583984,9.6640625 C14.6207682,9.6640625 14.8450521,9.5625 15.03125,9.359375 C15.2174479,9.15625 15.3105469,8.91927083 15.3105469,8.6484375 C15.3105469,8.26757812 15.124349,7.97558594 14.7519531,7.77246094 C13.3385417,6.98535156 11.8108724,6.59179688 10.1689453,6.59179688 C8.84016927,6.59179688 7.62988281,6.83723958 6.53808594,7.328125 C5.44628906,7.81901042 4.5406901,8.48974609 3.82128906,9.34033203 C3.10188802,10.190918 2.54541016,11.1875 2.15185547,12.3300781 C1.75830078,13.4726562 1.56152344,14.695638 1.56152344,15.9990234 C1.56152344,17.7509766 1.88313802,19.3209635 2.52636719,20.7089844 C3.16959635,22.0970052 4.1344401,23.2057292 5.42089844,24.0351562 C6.70735677,24.8645833 8.21809896,25.2792969 9.953125,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M19.4140625,25 C19.7272135,25 20.0001628,24.8963216 20.2329102,24.6889648 C20.4656576,24.4816081 20.5820312,24.2001953 20.5820312,23.8447266 L20.5820312,8.01367188 C20.5820312,7.65820312 20.4698893,7.37890625 20.2456055,7.17578125 C20.0213216,6.97265625 19.7568359,6.87109375 19.4521484,6.87109375 C19.1389974,6.87109375 18.8660482,6.97265625 18.6333008,7.17578125 C18.4005534,7.37890625 18.2841797,7.65820312 18.2841797,8.01367188 L18.2841797,23.8447266 C18.2841797,24.2171224 18.3942057,24.5027669 18.6142578,24.7016602 C18.8343099,24.9005534 19.1009115,25 19.4140625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M27.0976562,23.7050781 C26.3105469,23.7050781 25.6884766,23.5252279 25.2314453,23.1655273 C24.7744141,22.8058268 24.5458984,22.2408854 24.5458984,21.4707031 C24.5458984,21.047526 24.6072591,20.7005208 24.7299805,20.4296875 C24.8527018,20.1588542 25.0410156,19.9282227 25.2949219,19.737793 C25.5488281,19.5473633 25.9296875,19.4034831 26.4375,19.3061523 C26.9453125,19.2088216 27.5292969,19.1411133 28.1894531,19.1030273 C28.8496094,19.0649414 29.7044271,19.0458984 30.7539062,19.0458984 L30.7539062,19.4267578 C30.7539062,20.6708984 30.3878581,21.694987 29.6557617,22.4990234 C28.9236654,23.3030599 28.0709635,23.7050781 27.0976562,23.7050781 Z M26.8564453,25.2285156 C28.6761068,25.2285156 29.9794922,24.4033203 30.7666016,22.7529297 L30.7666016,23.9716797 C30.7666016,24.3017578 30.8745117,24.5577799 31.090332,24.7397461 C31.3061523,24.9217122 31.5664062,25.0126953 31.8710938,25.0126953 C32.1757812,25.0126953 32.4444987,24.9174805 32.6772461,24.7270508 C32.9099935,24.5366211 33.0263672,24.2763672 33.0263672,23.9462891 L33.0263672,16.5449219 C33.0263672,15.046875 32.5883789,13.938151 31.7124023,13.21875 C30.8364258,12.499349 29.6240234,12.1396484 28.0751953,12.1396484 C26.2639974,12.1396484 24.702474,12.5078125 23.390625,13.2441406 C23.1536458,13.3795573 23.0351562,13.5742188 23.0351562,13.828125 C23.0351562,14.0481771 23.1155599,14.2491862 23.2763672,14.4311523 C23.4371745,14.6131185 23.6276042,14.7041016 23.8476562,14.7041016 C23.9661458,14.7041016 24.0677083,14.6829427 24.1523438,14.640625 C24.5839844,14.4459635 24.9479167,14.2936198 25.2441406,14.1835938 C25.5403646,14.0735677 25.9466146,13.9635417 26.4628906,13.8535156 C26.9791667,13.7434896 27.4954427,13.6884766 28.0117188,13.6884766 C28.875,13.6884766 29.5478516,13.9042969 30.0302734,14.3359375 C30.5126953,14.7675781 30.7539062,15.4361979 30.7539062,16.3417969 L30.7539062,17.7255859 C29.8144531,17.7255859 29.0125326,17.7382812 28.3481445,17.7636719 C27.6837565,17.7890625 27.0384115,17.8334961 26.4121094,17.8969727 C25.7858073,17.9604492 25.2695312,18.0535482 24.8632812,18.1762695 C24.4570312,18.2989909 24.0846354,18.4555664 23.7460938,18.6459961 C23.4075521,18.8364258 23.1451823,19.0691732 22.9589844,19.3442383 C22.7727865,19.6193034 22.6289062,19.9388021 22.5273438,20.3027344 C22.4257812,20.6666667 22.375,21.0898438 22.375,21.5722656 C22.375,22.7402344 22.7897135,23.6416016 23.6191406,24.2763672 C24.4485677,24.9111328 25.5276693,25.2285156 26.8564453,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M35.6572266,23.8574219 C36.1650391,24.2298177 36.8442383,24.5535482 37.6948242,24.8286133 C38.5454102,25.1036784 39.4235026,25.2412109 40.3291016,25.2412109 C41.3024089,25.2412109 42.1572266,25.1079102 42.8935547,24.8413086 C43.6298828,24.574707 44.2244466,24.1451823 44.6772461,23.5527344 C45.1300456,22.9602865 45.3564453,22.2324219 45.3564453,21.3691406 C45.3564453,20.4550781 45.0834961,19.7039388 44.5375977,19.1157227 C43.9916992,18.5275065 43.0797526,18.0768229 41.8017578,17.7636719 L39.6054688,17.2177734 C38.6829427,16.9892578 38.0820312,16.7713216 37.8027344,16.5639648 C37.5234375,16.3566081 37.3837891,15.9990234 37.3837891,15.4912109 C37.3837891,14.8649089 37.6546224,14.4057617 38.1962891,14.1137695 C38.7379557,13.8217773 39.4658203,13.6757812 40.3798828,13.6757812 C40.6676432,13.6757812 40.9511719,13.6948242 41.2304688,13.7329102 C41.5097656,13.7709961 41.7848307,13.8260091 42.0556641,13.8979492 C42.3264974,13.9698893 42.5402018,14.03125 42.6967773,14.0820312 C42.8533529,14.1328125 43.0564779,14.2068685 43.3061523,14.3041992 C43.5558268,14.4015299 43.6933594,14.4544271 43.71875,14.4628906 C43.8541667,14.5136719 43.9853516,14.5390625 44.1123047,14.5390625 C44.3323568,14.5390625 44.5079753,14.4692383 44.6391602,14.3295898 C44.7703451,14.1899414 44.8359375,14.0227865 44.8359375,13.828125 C44.8359375,13.4980469 44.6708984,13.2483724 44.3408203,13.0791016 C43.8837891,12.8336589 43.293457,12.6114909 42.5698242,12.4125977 C41.8461914,12.2137044 41.0738932,12.1142578 40.2529297,12.1142578 C39.5673828,12.1142578 38.9326172,12.1798503 38.3486328,12.3110352 C37.7646484,12.4422201 37.237793,12.6411133 36.7680664,12.9077148 C36.2983398,13.1743164 35.9280599,13.5361328 35.6572266,13.9931641 C35.3863932,14.4501953 35.2509766,14.9791667 35.2509766,15.5800781 C35.2509766,15.9609375 35.2890625,16.2994792 35.3652344,16.5957031 C35.4414062,16.8919271 35.5641276,17.1500651 35.7333984,17.3701172 C35.9026693,17.5901693 36.0846354,17.7784831 36.2792969,17.9350586 C36.4739583,18.0916341 36.7342122,18.2376302 37.0600586,18.3730469 C37.3859049,18.5084635 37.6905924,18.6184896 37.9741211,18.703125 C38.2576497,18.7877604 38.6279297,18.8893229 39.0849609,19.0078125 L41.3320312,19.5664062 C42.0768229,19.7526042 42.610026,19.9980469 42.9316406,20.3027344 C43.2532552,20.6074219 43.4140625,21.0136719 43.4140625,21.5214844 C43.4140625,22.2324219 43.128418,22.7719727 42.5571289,23.1401367 C41.9858398,23.5083008 41.2347005,23.6923828 40.3037109,23.6923828 C39.0087891,23.6839193 37.7815755,23.319987 36.6220703,22.6005859 C36.4358724,22.4820964 36.2412109,22.4228516 36.0380859,22.4228516 C35.8180339,22.4228516 35.6360677,22.4969076 35.4921875,22.6450195 C35.3483073,22.7931315 35.2763672,22.96875 35.2763672,23.171875 C35.2763672,23.4511719 35.4033203,23.6796875 35.6572266,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M47.4287109,23.8574219 C47.9365234,24.2298177 48.6157227,24.5535482 49.4663086,24.8286133 C50.3168945,25.1036784 51.194987,25.2412109 52.1005859,25.2412109 C53.0738932,25.2412109 53.9287109,25.1079102 54.6650391,24.8413086 C55.4013672,24.574707 55.995931,24.1451823 56.4487305,23.5527344 C56.9015299,22.9602865 57.1279297,22.2324219 57.1279297,21.3691406 C57.1279297,20.4550781 56.8549805,19.7039388 56.309082,19.1157227 C55.7631836,18.5275065 54.851237,18.0768229 53.5732422,17.7636719 L51.3769531,17.2177734 C50.4544271,16.9892578 49.8535156,16.7713216 49.5742188,16.5639648 C49.2949219,16.3566081 49.1552734,15.9990234 49.1552734,15.4912109 C49.1552734,14.8649089 49.4261068,14.4057617 49.9677734,14.1137695 C50.5094401,13.8217773 51.2373047,13.6757812 52.1513672,13.6757812 C52.4391276,13.6757812 52.7226562,13.6948242 53.0019531,13.7329102 C53.28125,13.7709961 53.5563151,13.8260091 53.8271484,13.8979492 C54.0979818,13.9698893 54.3116862,14.03125 54.4682617,14.0820312 C54.6248372,14.1328125 54.8279622,14.2068685 55.0776367,14.3041992 C55.3273112,14.4015299 55.4648438,14.4544271 55.4902344,14.4628906 C55.625651,14.5136719 55.7568359,14.5390625 55.8837891,14.5390625 C56.1038411,14.5390625 56.2794596,14.4692383 56.4106445,14.3295898 C56.5418294,14.1899414 56.6074219,14.0227865 56.6074219,13.828125 C56.6074219,13.4980469 56.4423828,13.2483724 56.1123047,13.0791016 C55.6552734,12.8336589 55.0649414,12.6114909 54.3413086,12.4125977 C53.6176758,12.2137044 52.8453776,12.1142578 52.0244141,12.1142578 C51.3388672,12.1142578 50.7041016,12.1798503 50.1201172,12.3110352 C49.5361328,12.4422201 49.0092773,12.6411133 48.5395508,12.9077148 C48.0698242,13.1743164 47.6995443,13.5361328 47.4287109,13.9931641 C47.1578776,14.4501953 47.0224609,14.9791667 47.0224609,15.5800781 C47.0224609,15.9609375 47.0605469,16.2994792 47.1367188,16.5957031 C47.2128906,16.8919271 47.335612,17.1500651 47.5048828,17.3701172 C47.6741536,17.5901693 47.8561198,17.7784831 48.0507812,17.9350586 C48.2454427,18.0916341 48.5056966,18.2376302 48.831543,18.3730469 C49.1573893,18.5084635 49.4620768,18.6184896 49.7456055,18.703125 C50.0291341,18.7877604 50.3994141,18.8893229 50.8564453,19.0078125 L53.1035156,19.5664062 C53.8483073,19.7526042 54.3815104,19.9980469 54.703125,20.3027344 C55.0247396,20.6074219 55.1855469,21.0136719 55.1855469,21.5214844 C55.1855469,22.2324219 54.8999023,22.7719727 54.3286133,23.1401367 C53.7573242,23.5083008 53.0061849,23.6923828 52.0751953,23.6923828 C50.7802734,23.6839193 49.5530599,23.319987 48.3935547,22.6005859 C48.2073568,22.4820964 48.0126953,22.4228516 47.8095703,22.4228516 C47.5895182,22.4228516 47.4075521,22.4969076 47.2636719,22.6450195 C47.1197917,22.7931315 47.0478516,22.96875 47.0478516,23.171875 C47.0478516,23.4511719 47.1748047,23.6796875 47.4287109,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M60.5712891,9.44824219 C61.0198568,9.44824219 61.3795573,9.31917318 61.6503906,9.06103516 C61.921224,8.80289714 62.0566406,8.46647135 62.0566406,8.05175781 C62.0566406,7.62858073 61.921224,7.28792318 61.6503906,7.02978516 C61.3795573,6.77164714 61.0240885,6.64257812 60.5839844,6.64257812 C60.1354167,6.64257812 59.7736003,6.77376302 59.4985352,7.03613281 C59.2234701,7.2985026 59.0859375,7.63704427 59.0859375,8.05175781 C59.0859375,8.46647135 59.2213542,8.80289714 59.4921875,9.06103516 C59.7630208,9.31917318 60.1227214,9.44824219 60.5712891,9.44824219 Z M60.5585938,25 C60.8717448,25 61.144694,24.8963216 61.3774414,24.6889648 C61.6101888,24.4816081 61.7265625,24.2001953 61.7265625,23.8447266 L61.7265625,13.5234375 C61.7265625,13.1679688 61.6144206,12.8886719 61.3901367,12.6855469 C61.1658529,12.4824219 60.9013672,12.3808594 60.5966797,12.3808594 C60.2835286,12.3808594 60.0105794,12.4824219 59.777832,12.6855469 C59.5450846,12.8886719 59.4287109,13.1679688 59.4287109,13.5234375 L59.4287109,23.8447266 C59.4287109,24.2171224 59.538737,24.5027669 59.7587891,24.7016602 C59.9788411,24.9005534 60.2454427,25 60.5585938,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M66.7314453,25 C67.0445964,25 67.3154297,24.9005534 67.5439453,24.7016602 C67.7724609,24.5027669 67.8867188,24.2255859 67.8867188,23.8701172 L67.8867188,14.0058594 L70.0322266,14.0058594 C70.5738932,14.0058594 70.8447266,13.7604167 70.8447266,13.2695312 C70.8447266,13.0494792 70.7791341,12.867513 70.6479492,12.7236328 C70.5167643,12.5797526 70.3115234,12.5078125 70.0322266,12.5078125 L67.8867188,12.5078125 L67.8867188,11.5175781 C67.8867188,10.8320312 67.9142253,10.2882487 67.9692383,9.88623047 C68.0242513,9.48421224 68.1300456,9.17529297 68.2866211,8.95947266 C68.4431966,8.74365234 68.6230469,8.60400391 68.8261719,8.54052734 C69.0292969,8.47705078 69.3128255,8.4453125 69.6767578,8.4453125 L70.7558594,8.4453125 C71.0097656,8.4453125 71.206543,8.36067708 71.3461914,8.19140625 C71.4858398,8.02213542 71.5556641,7.81901042 71.5556641,7.58203125 C71.5556641,7.34505208 71.4858398,7.1398112 71.3461914,6.96630859 C71.206543,6.79280599 71.0139974,6.70605469 70.7685547,6.70605469 L69.2197266,6.70605469 C68.085612,6.70605469 67.1969401,7.02555339 66.5537109,7.66455078 C65.9104818,8.30354818 65.5888672,9.38053385 65.5888672,10.8955078 L65.5888672,12.5078125 L63.9257812,12.5078125 C63.6464844,12.5078125 63.4391276,12.5797526 63.3037109,12.7236328 C63.1682943,12.867513 63.1005859,13.0494792 63.1005859,13.2695312 C63.1005859,13.4811198 63.1682943,13.6567383 63.3037109,13.7963867 C63.4391276,13.9360352 63.6464844,14.0058594 63.9257812,14.0058594 L65.5888672,14.0058594 L65.5888672,23.8701172 C65.5888672,24.2255859 65.7010091,24.5027669 65.925293,24.7016602 C66.1495768,24.9005534 66.4182943,25 66.7314453,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M73.7421875,9.44824219 C74.1907552,9.44824219 74.5504557,9.31917318 74.8212891,9.06103516 C75.0921224,8.80289714 75.2275391,8.46647135 75.2275391,8.05175781 C75.2275391,7.62858073 75.0921224,7.28792318 74.8212891,7.02978516 C74.5504557,6.77164714 74.194987,6.64257812 73.7548828,6.64257812 C73.3063151,6.64257812 72.9444987,6.77376302 72.6694336,7.03613281 C72.3943685,7.2985026 72.2568359,7.63704427 72.2568359,8.05175781 C72.2568359,8.46647135 72.3922526,8.80289714 72.6630859,9.06103516 C72.9339193,9.31917318 73.2936198,9.44824219 73.7421875,9.44824219 Z M73.7294922,25 C74.0426432,25 74.3155924,24.8963216 74.5483398,24.6889648 C74.7810872,24.4816081 74.8974609,24.2001953 74.8974609,23.8447266 L74.8974609,13.5234375 C74.8974609,13.1679688 74.785319,12.8886719 74.5610352,12.6855469 C74.3367513,12.4824219 74.0722656,12.3808594 73.7675781,12.3808594 C73.4544271,12.3808594 73.1814779,12.4824219 72.9487305,12.6855469 C72.7159831,12.8886719 72.5996094,13.1679688 72.5996094,13.5234375 L72.5996094,23.8447266 C72.5996094,24.2171224 72.7096354,24.5027669 72.9296875,24.7016602 C73.1497396,24.9005534 73.4163411,25 73.7294922,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M82.8349609,25.2412109 C84.4938151,25.2412109 85.7802734,24.8730469 86.6943359,24.1367188 C86.9820964,23.8997396 87.1259766,23.6416016 87.1259766,23.3623047 C87.1259766,23.1676432 87.0582682,23.0047201 86.9228516,22.8735352 C86.7874349,22.7423503 86.6223958,22.6767578 86.4277344,22.6767578 C86.25,22.6767578 86.0807292,22.7317708 85.9199219,22.8417969 C85.141276,23.3834635 84.1722005,23.6542969 83.0126953,23.6542969 C82.3525391,23.6542969 81.7685547,23.5167643 81.2607422,23.2416992 C80.7529297,22.9666341 80.3466797,22.5942383 80.0419922,22.1245117 C79.7373047,21.6547852 79.5087891,21.1321615 79.3564453,20.5566406 C79.2041016,19.9811198 79.1279297,19.3717448 79.1279297,18.7285156 C79.1279297,17.188151 79.5045573,15.9630534 80.2578125,15.0532227 C81.0110677,14.1433919 81.9928385,13.6884766 83.203125,13.6884766 C84.1341146,13.6884766 84.9847005,13.938151 85.7548828,14.4375 C85.9241536,14.547526 86.101888,14.6025391 86.2880859,14.6025391 C86.4912109,14.6025391 86.6625977,14.5390625 86.8022461,14.4121094 C86.9418945,14.2851562 87.0117188,14.1285807 87.0117188,13.9423828 C87.0117188,13.680013 86.8678385,13.4345703 86.5800781,13.2060547 C86.2161458,12.9013672 85.7252604,12.6474609 85.1074219,12.4443359 C84.4895833,12.2412109 83.8167318,12.1396484 83.0888672,12.1396484 C81.8447266,12.1396484 80.7444661,12.4316406 79.7880859,13.015625 C78.8317057,13.5996094 78.0996094,14.3867188 77.5917969,15.3769531 C77.0839844,16.3671875 76.8300781,17.4716797 76.8300781,18.6904297 C76.8300781,20.5777995 77.3717448,22.1414388 78.4550781,23.3813477 C79.5384115,24.6212565 80.9983724,25.2412109 82.8349609,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M92.5244141,23.7050781 C91.7373047,23.7050781 91.1152344,23.5252279 90.6582031,23.1655273 C90.2011719,22.8058268 89.9726562,22.2408854 89.9726562,21.4707031 C89.9726562,21.047526 90.0340169,20.7005208 90.1567383,20.4296875 C90.2794596,20.1588542 90.4677734,19.9282227 90.7216797,19.737793 C90.9755859,19.5473633 91.3564453,19.4034831 91.8642578,19.3061523 C92.3720703,19.2088216 92.9560547,19.1411133 93.6162109,19.1030273 C94.2763672,19.0649414 95.1311849,19.0458984 96.1806641,19.0458984 L96.1806641,19.4267578 C96.1806641,20.6708984 95.8146159,21.694987 95.0825195,22.4990234 C94.3504232,23.3030599 93.4977214,23.7050781 92.5244141,23.7050781 Z M92.2832031,25.2285156 C94.1028646,25.2285156 95.40625,24.4033203 96.1933594,22.7529297 L96.1933594,23.9716797 C96.1933594,24.3017578 96.3012695,24.5577799 96.5170898,24.7397461 C96.7329102,24.9217122 96.9931641,25.0126953 97.2978516,25.0126953 C97.6025391,25.0126953 97.8712565,24.9174805 98.1040039,24.7270508 C98.3367513,24.5366211 98.453125,24.2763672 98.453125,23.9462891 L98.453125,16.5449219 C98.453125,15.046875 98.0151367,13.938151 97.1391602,13.21875 C96.2631836,12.499349 95.0507812,12.1396484 93.5019531,12.1396484 C91.6907552,12.1396484 90.1292318,12.5078125 88.8173828,13.2441406 C88.5804036,13.3795573 88.4619141,13.5742188 88.4619141,13.828125 C88.4619141,14.0481771 88.5423177,14.2491862 88.703125,14.4311523 C88.8639323,14.6131185 89.054362,14.7041016 89.2744141,14.7041016 C89.3929036,14.7041016 89.4944661,14.6829427 89.5791016,14.640625 C90.0107422,14.4459635 90.3746745,14.2936198 90.6708984,14.1835938 C90.9671224,14.0735677 91.3733724,13.9635417 91.8896484,13.8535156 C92.4059245,13.7434896 92.9222005,13.6884766 93.4384766,13.6884766 C94.3017578,13.6884766 94.9746094,13.9042969 95.4570312,14.3359375 C95.9394531,14.7675781 96.1806641,15.4361979 96.1806641,16.3417969 L96.1806641,17.7255859 C95.2412109,17.7255859 94.4392904,17.7382812 93.7749023,17.7636719 C93.1105143,17.7890625 92.4651693,17.8334961 91.8388672,17.8969727 C91.2125651,17.9604492 90.6962891,18.0535482 90.2900391,18.1762695 C89.8837891,18.2989909 89.5113932,18.4555664 89.1728516,18.6459961 C88.8343099,18.8364258 88.5719401,19.0691732 88.3857422,19.3442383 C88.1995443,19.6193034 88.0556641,19.9388021 87.9541016,20.3027344 C87.8525391,20.6666667 87.8017578,21.0898438 87.8017578,21.5722656 C87.8017578,22.7402344 88.2164714,23.6416016 89.0458984,24.2763672 C89.8753255,24.9111328 90.9544271,25.2285156 92.2832031,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M105.273438,25.1777344 L106.022461,25.1777344 C106.327148,25.1777344 106.574707,25.1692708 106.765137,25.1523438 C106.955566,25.1354167 107.145996,25.0973307 107.336426,25.0380859 C107.526855,24.9788411 107.66862,24.8836263 107.761719,24.7524414 C107.854818,24.6212565 107.901367,24.4498698 107.901367,24.2382812 C107.901367,24.0013021 107.820964,23.7939453 107.660156,23.6162109 C107.499349,23.4384766 107.279297,23.3496094 107,23.3496094 L106.923828,23.3496094 L106.060547,23.3876953 L105.920898,23.3876953 C105.387695,23.3876953 104.998372,23.1951497 104.75293,22.8100586 C104.507487,22.4249674 104.384766,21.7965495 104.384766,20.9248047 L104.384766,14.0058594 L106.720703,14.0058594 C107.279297,14.0058594 107.558594,13.7646484 107.558594,13.2822266 C107.558594,13.0537109 107.48877,12.867513 107.349121,12.7236328 C107.209473,12.5797526 107,12.5078125 106.720703,12.5078125 L104.384766,12.5078125 L104.384766,8.82617188 C104.384766,8.52994792 104.30013,8.30354818 104.130859,8.14697266 C103.961589,7.99039714 103.75,7.91210938 103.496094,7.91210938 C103.22526,7.91210938 102.977702,8.0094401 102.753418,8.20410156 C102.529134,8.39876302 102.408529,8.63151042 102.391602,8.90234375 L102.086914,12.5078125 L100.525391,12.5078125 C100.246094,12.5078125 100.034505,12.5776367 99.890625,12.7172852 C99.7467448,12.8569336 99.6748047,13.0367839 99.6748047,13.2568359 C99.6748047,13.4853516 99.7488607,13.6673177 99.8969727,13.8027344 C100.045085,13.938151 100.258789,14.0058594 100.538086,14.0058594 L102.086914,14.0058594 L102.086914,21.3183594 C102.086914,22.6302083 102.374674,23.6013997 102.950195,24.2319336 C103.525716,24.8624674 104.30013,25.1777344 105.273438,25.1777344 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M110.849609,9.44824219 C111.298177,9.44824219 111.657878,9.31917318 111.928711,9.06103516 C112.199544,8.80289714 112.334961,8.46647135 112.334961,8.05175781 C112.334961,7.62858073 112.199544,7.28792318 111.928711,7.02978516 C111.657878,6.77164714 111.302409,6.64257812 110.862305,6.64257812 C110.413737,6.64257812 110.051921,6.77376302 109.776855,7.03613281 C109.50179,7.2985026 109.364258,7.63704427 109.364258,8.05175781 C109.364258,8.46647135 109.499674,8.80289714 109.770508,9.06103516 C110.041341,9.31917318 110.401042,9.44824219 110.849609,9.44824219 Z M110.836914,25 C111.150065,25 111.423014,24.8963216 111.655762,24.6889648 C111.888509,24.4816081 112.004883,24.2001953 112.004883,23.8447266 L112.004883,13.5234375 C112.004883,13.1679688 111.892741,12.8886719 111.668457,12.6855469 C111.444173,12.4824219 111.179688,12.3808594 110.875,12.3808594 C110.561849,12.3808594 110.2889,12.4824219 110.056152,12.6855469 C109.823405,12.8886719 109.707031,13.1679688 109.707031,13.5234375 L109.707031,23.8447266 C109.707031,24.2171224 109.817057,24.5027669 110.037109,24.7016602 C110.257161,24.9005534 110.523763,25 110.836914,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M120.297852,25.2285156 C122.25293,25.2285156 123.801758,24.6149089 124.944336,23.3876953 C126.086914,22.1604818 126.658203,20.5947266 126.658203,18.6904297 C126.658203,16.7692057 126.084798,15.1971029 124.937988,13.9741211 C123.791178,12.7511393 122.244466,12.1396484 120.297852,12.1396484 C118.351237,12.1396484 116.804525,12.7532552 115.657715,13.9804688 C114.510905,15.2076823 113.9375,16.7776693 113.9375,18.6904297 C113.9375,20.5947266 114.510905,22.1604818 115.657715,23.3876953 C116.804525,24.6149089 118.351237,25.2285156 120.297852,25.2285156 Z M120.272461,23.6542969 C119.00293,23.6542969 118.010579,23.2078451 117.29541,22.3149414 C116.580241,21.4220378 116.222656,20.2138672 116.222656,18.6904297 C116.222656,17.1500651 116.582357,15.933431 117.301758,15.0405273 C118.021159,14.1476237 119.019857,13.7011719 120.297852,13.7011719 C121.567383,13.7011719 122.563965,14.1497396 123.287598,15.046875 C124.01123,15.9440104 124.373047,17.1585286 124.373047,18.6904297 C124.373047,20.2307943 124.015462,21.4431966 123.300293,22.3276367 C122.585124,23.2120768 121.575846,23.6542969 120.272461,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M129.631836,25 C129.944987,25 130.217936,24.9026693 130.450684,24.7080078 C130.683431,24.5133464 130.799805,24.2467448 130.799805,23.9082031 L130.799805,17.5732422 C130.859049,16.3798828 131.222982,15.4361979 131.891602,14.7421875 C132.560221,14.0481771 133.381185,13.7011719 134.354492,13.7011719 C135.268555,13.7011719 135.977376,13.9847005 136.480957,14.5517578 C136.984538,15.1188151 137.236328,15.9440104 137.236328,17.0273438 L137.236328,23.9082031 C137.236328,24.2552083 137.34847,24.5239258 137.572754,24.7143555 C137.797038,24.9047852 138.065755,25 138.378906,25 C138.692057,25 138.960775,24.9047852 139.185059,24.7143555 C139.409342,24.5239258 139.521484,24.2552083 139.521484,23.9082031 L139.521484,17.0527344 C139.521484,15.4023438 139.106771,14.1708984 138.277344,13.3583984 C137.447917,12.5458984 136.334961,12.1396484 134.938477,12.1396484 C133.965169,12.1396484 133.112467,12.3470052 132.380371,12.7617188 C131.648275,13.1764323 131.121419,13.7688802 130.799805,14.5390625 L130.799805,13.4345703 C130.799805,13.1044922 130.687663,12.8484701 130.463379,12.6665039 C130.239095,12.4845378 129.970378,12.3935547 129.657227,12.3935547 C129.344076,12.3935547 129.073242,12.4887695 128.844727,12.6791992 C128.616211,12.8696289 128.501953,13.1341146 128.501953,13.4726562 L128.501953,23.9082031 C128.501953,24.2552083 128.611979,24.5239258 128.832031,24.7143555 C129.052083,24.9047852 129.318685,25 129.631836,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M161.617188,24.2382812 C161.828776,24.796875 162.218099,25.0761719 162.785156,25.0761719 C163.115234,25.0761719 163.411458,24.9703776 163.673828,24.7587891 C163.936198,24.5472005 164.067383,24.280599 164.067383,23.9589844 C164.067383,23.8235677 164.033529,23.6669922 163.96582,23.4892578 L157.935547,8.52148438 C157.647786,7.80208333 157.336751,7.32600911 157.002441,7.09326172 C156.668132,6.86051432 156.251302,6.74414062 155.751953,6.74414062 C155.252604,6.74414062 154.835775,6.86051432 154.501465,7.09326172 C154.167155,7.32600911 153.85612,7.80208333 153.568359,8.52148438 L147.538086,23.4892578 C147.470378,23.6669922 147.436523,23.8235677 147.436523,23.9589844 C147.436523,24.280599 147.565592,24.5472005 147.82373,24.7587891 C148.081868,24.9703776 148.375977,25.0761719 148.706055,25.0761719 C149.281576,25.0761719 149.67513,24.796875 149.886719,24.2382812 L151.384766,20.3789062 L160.119141,20.3789062 L161.617188,24.2382812 Z M159.395508,18.5380859 L152.108398,18.5380859 L155.751953,9.15625 L159.395508,18.5380859 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M168.62793,15.2373047 L168.62793,8.88964844 L172.626953,8.88964844 C174.023438,8.88964844 175.034831,9.1266276 175.661133,9.60058594 C176.287435,10.0745443 176.600586,10.8828125 176.600586,12.0253906 C176.600586,13.1933594 176.268392,14.0206706 175.604004,14.5073242 C174.939616,14.9939779 173.900716,15.2373047 172.487305,15.2373047 L168.62793,15.2373047 Z M167.345703,25 C167.701172,25 168.003743,24.8942057 168.253418,24.6826172 C168.503092,24.4710286 168.62793,24.1705729 168.62793,23.78125 L168.62793,17.1923828 L172.5,17.1923828 C173.507161,17.1923828 174.400065,17.1035156 175.178711,16.9257812 C175.957357,16.7480469 176.649251,16.4666341 177.254395,16.081543 C177.859538,15.6964518 178.322917,15.1632487 178.644531,14.4819336 C178.966146,13.8006185 179.126953,12.9817708 179.126953,12.0253906 C179.126953,10.1888021 178.581055,8.86848958 177.489258,8.06445312 C176.397461,7.26041667 174.810547,6.85839844 172.728516,6.85839844 L167.358398,6.85839844 C166.994466,6.85839844 166.687663,6.98111979 166.437988,7.2265625 C166.188314,7.47200521 166.063477,7.77246094 166.063477,8.12792969 L166.063477,23.78125 C166.063477,24.1621094 166.19043,24.4604492 166.444336,24.6762695 C166.698242,24.8920898 166.998698,25 167.345703,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M182.316406,24.9873047 C182.671875,24.9873047 182.976562,24.8815104 183.230469,24.6699219 C183.484375,24.4583333 183.611328,24.1578776 183.611328,23.7685547 L183.611328,8.07714844 C183.611328,7.68782552 183.486491,7.38525391 183.236816,7.16943359 C182.987142,6.95361328 182.688802,6.84570312 182.341797,6.84570312 C181.994792,6.84570312 181.69222,6.95572917 181.434082,7.17578125 C181.175944,7.39583333 181.046875,7.69628906 181.046875,8.07714844 L181.046875,23.7685547 C181.046875,24.1494141 181.171712,24.4477539 181.421387,24.6635742 C181.671061,24.8793945 181.969401,24.9873047 182.316406,24.9873047 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M196.998047,27.8564453 C197.24349,27.8564453 197.452962,27.7760417 197.626465,27.6152344 C197.799967,27.4544271 197.886719,27.2470703 197.886719,26.9931641 C197.886719,26.8746745 197.852865,26.7223307 197.785156,26.5361328 C196.541016,23.0999349 195.918945,19.921875 195.918945,17.0019531 C195.918945,14.0481771 196.549479,10.8404948 197.810547,7.37890625 C197.869792,7.23502604 197.899414,7.08691406 197.899414,6.93457031 C197.899414,6.68066406 197.810547,6.46907552 197.632812,6.29980469 C197.455078,6.13053385 197.24349,6.04589844 196.998047,6.04589844 C196.566406,6.04589844 196.244792,6.3125 196.033203,6.84570312 C195.229167,8.79231771 194.60498,10.5802409 194.160645,12.2094727 C193.716309,13.8387044 193.494141,15.4404297 193.494141,17.0146484 C193.494141,18.6142578 193.72054,20.2286784 194.17334,21.8579102 C194.626139,23.4871419 195.263021,25.2708333 196.083984,27.2089844 C196.278646,27.640625 196.583333,27.8564453 196.998047,27.8564453 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M199.895508,27.1074219 C200.327148,27.1074219 200.623372,26.8873698 200.78418,26.4472656 L206.928711,8.02636719 C206.971029,7.87402344 206.992188,7.75130208 206.992188,7.65820312 C206.992188,7.39583333 206.896973,7.18424479 206.706543,7.0234375 C206.516113,6.86263021 206.293945,6.78222656 206.040039,6.78222656 C205.566081,6.78222656 205.257161,7.00227865 205.113281,7.44238281 L199.019531,25.8759766 C198.977214,26.0283203 198.956055,26.1468099 198.956055,26.2314453 C198.956055,26.4938151 199.049154,26.7054036 199.235352,26.8662109 C199.421549,27.0270182 199.641602,27.1074219 199.895508,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M213.787109,25.2412109 C215.445964,25.2412109 216.732422,24.8730469 217.646484,24.1367188 C217.934245,23.8997396 218.078125,23.6416016 218.078125,23.3623047 C218.078125,23.1676432 218.010417,23.0047201 217.875,22.8735352 C217.739583,22.7423503 217.574544,22.6767578 217.379883,22.6767578 C217.202148,22.6767578 217.032878,22.7317708 216.87207,22.8417969 C216.093424,23.3834635 215.124349,23.6542969 213.964844,23.6542969 C213.304688,23.6542969 212.720703,23.5167643 212.212891,23.2416992 C211.705078,22.9666341 211.298828,22.5942383 210.994141,22.1245117 C210.689453,21.6547852 210.460938,21.1321615 210.308594,20.5566406 C210.15625,19.9811198 210.080078,19.3717448 210.080078,18.7285156 C210.080078,17.188151 210.456706,15.9630534 211.209961,15.0532227 C211.963216,14.1433919 212.944987,13.6884766 214.155273,13.6884766 C215.086263,13.6884766 215.936849,13.938151 216.707031,14.4375 C216.876302,14.547526 217.054036,14.6025391 217.240234,14.6025391 C217.443359,14.6025391 217.614746,14.5390625 217.754395,14.4121094 C217.894043,14.2851562 217.963867,14.1285807 217.963867,13.9423828 C217.963867,13.680013 217.819987,13.4345703 217.532227,13.2060547 C217.168294,12.9013672 216.677409,12.6474609 216.05957,12.4443359 C215.441732,12.2412109 214.76888,12.1396484 214.041016,12.1396484 C212.796875,12.1396484 211.696615,12.4316406 210.740234,13.015625 C209.783854,13.5996094 209.051758,14.3867188 208.543945,15.3769531 C208.036133,16.3671875 207.782227,17.4716797 207.782227,18.6904297 C207.782227,20.5777995 208.323893,22.1414388 209.407227,23.3813477 C210.49056,24.6212565 211.950521,25.2412109 213.787109,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M221.115234,25 C221.428385,25 221.701335,24.8963216 221.934082,24.6889648 C222.166829,24.4816081 222.283203,24.2001953 222.283203,23.8447266 L222.283203,8.01367188 C222.283203,7.65820312 222.171061,7.37890625 221.946777,7.17578125 C221.722493,6.97265625 221.458008,6.87109375 221.15332,6.87109375 C220.840169,6.87109375 220.56722,6.97265625 220.334473,7.17578125 C220.101725,7.37890625 219.985352,7.65820312 219.985352,8.01367188 L219.985352,23.8447266 C219.985352,24.2171224 220.095378,24.5027669 220.31543,24.7016602 C220.535482,24.9005534 220.802083,25 221.115234,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M228.798828,23.7050781 C228.011719,23.7050781 227.389648,23.5252279 226.932617,23.1655273 C226.475586,22.8058268 226.24707,22.2408854 226.24707,21.4707031 C226.24707,21.047526 226.308431,20.7005208 226.431152,20.4296875 C226.553874,20.1588542 226.742188,19.9282227 226.996094,19.737793 C227.25,19.5473633 227.630859,19.4034831 228.138672,19.3061523 C228.646484,19.2088216 229.230469,19.1411133 229.890625,19.1030273 C230.550781,19.0649414 231.405599,19.0458984 232.455078,19.0458984 L232.455078,19.4267578 C232.455078,20.6708984 232.08903,21.694987 231.356934,22.4990234 C230.624837,23.3030599 229.772135,23.7050781 228.798828,23.7050781 Z M228.557617,25.2285156 C230.377279,25.2285156 231.680664,24.4033203 232.467773,22.7529297 L232.467773,23.9716797 C232.467773,24.3017578 232.575684,24.5577799 232.791504,24.7397461 C233.007324,24.9217122 233.267578,25.0126953 233.572266,25.0126953 C233.876953,25.0126953 234.145671,24.9174805 234.378418,24.7270508 C234.611165,24.5366211 234.727539,24.2763672 234.727539,23.9462891 L234.727539,16.5449219 C234.727539,15.046875 234.289551,13.938151 233.413574,13.21875 C232.537598,12.499349 231.325195,12.1396484 229.776367,12.1396484 C227.965169,12.1396484 226.403646,12.5078125 225.091797,13.2441406 C224.854818,13.3795573 224.736328,13.5742188 224.736328,13.828125 C224.736328,14.0481771 224.816732,14.2491862 224.977539,14.4311523 C225.138346,14.6131185 225.328776,14.7041016 225.548828,14.7041016 C225.667318,14.7041016 225.76888,14.6829427 225.853516,14.640625 C226.285156,14.4459635 226.649089,14.2936198 226.945312,14.1835938 C227.241536,14.0735677 227.647786,13.9635417 228.164062,13.8535156 C228.680339,13.7434896 229.196615,13.6884766 229.712891,13.6884766 C230.576172,13.6884766 231.249023,13.9042969 231.731445,14.3359375 C232.213867,14.7675781 232.455078,15.4361979 232.455078,16.3417969 L232.455078,17.7255859 C231.515625,17.7255859 230.713704,17.7382812 230.049316,17.7636719 C229.384928,17.7890625 228.739583,17.8334961 228.113281,17.8969727 C227.486979,17.9604492 226.970703,18.0535482 226.564453,18.1762695 C226.158203,18.2989909 225.785807,18.4555664 225.447266,18.6459961 C225.108724,18.8364258 224.846354,19.0691732 224.660156,19.3442383 C224.473958,19.6193034 224.330078,19.9388021 224.228516,20.3027344 C224.126953,20.6666667 224.076172,21.0898438 224.076172,21.5722656 C224.076172,22.7402344 224.490885,23.6416016 225.320312,24.2763672 C226.14974,24.9111328 227.228841,25.2285156 228.557617,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M237.358398,23.8574219 C237.866211,24.2298177 238.54541,24.5535482 239.395996,24.8286133 C240.246582,25.1036784 241.124674,25.2412109 242.030273,25.2412109 C243.003581,25.2412109 243.858398,25.1079102 244.594727,24.8413086 C245.331055,24.574707 245.925618,24.1451823 246.378418,23.5527344 C246.831217,22.9602865 247.057617,22.2324219 247.057617,21.3691406 C247.057617,20.4550781 246.784668,19.7039388 246.23877,19.1157227 C245.692871,18.5275065 244.780924,18.0768229 243.50293,17.7636719 L241.306641,17.2177734 C240.384115,16.9892578 239.783203,16.7713216 239.503906,16.5639648 C239.224609,16.3566081 239.084961,15.9990234 239.084961,15.4912109 C239.084961,14.8649089 239.355794,14.4057617 239.897461,14.1137695 C240.439128,13.8217773 241.166992,13.6757812 242.081055,13.6757812 C242.368815,13.6757812 242.652344,13.6948242 242.931641,13.7329102 C243.210938,13.7709961 243.486003,13.8260091 243.756836,13.8979492 C244.027669,13.9698893 244.241374,14.03125 244.397949,14.0820312 C244.554525,14.1328125 244.75765,14.2068685 245.007324,14.3041992 C245.256999,14.4015299 245.394531,14.4544271 245.419922,14.4628906 C245.555339,14.5136719 245.686523,14.5390625 245.813477,14.5390625 C246.033529,14.5390625 246.209147,14.4692383 246.340332,14.3295898 C246.471517,14.1899414 246.537109,14.0227865 246.537109,13.828125 C246.537109,13.4980469 246.37207,13.2483724 246.041992,13.0791016 C245.584961,12.8336589 244.994629,12.6114909 244.270996,12.4125977 C243.547363,12.2137044 242.775065,12.1142578 241.954102,12.1142578 C241.268555,12.1142578 240.633789,12.1798503 240.049805,12.3110352 C239.46582,12.4422201 238.938965,12.6411133 238.469238,12.9077148 C237.999512,13.1743164 237.629232,13.5361328 237.358398,13.9931641 C237.087565,14.4501953 236.952148,14.9791667 236.952148,15.5800781 C236.952148,15.9609375 236.990234,16.2994792 237.066406,16.5957031 C237.142578,16.8919271 237.265299,17.1500651 237.43457,17.3701172 C237.603841,17.5901693 237.785807,17.7784831 237.980469,17.9350586 C238.17513,18.0916341 238.435384,18.2376302 238.76123,18.3730469 C239.087077,18.5084635 239.391764,18.6184896 239.675293,18.703125 C239.958822,18.7877604 240.329102,18.8893229 240.786133,19.0078125 L243.033203,19.5664062 C243.777995,19.7526042 244.311198,19.9980469 244.632812,20.3027344 C244.954427,20.6074219 245.115234,21.0136719 245.115234,21.5214844 C245.115234,22.2324219 244.82959,22.7719727 244.258301,23.1401367 C243.687012,23.5083008 242.935872,23.6923828 242.004883,23.6923828 C240.709961,23.6839193 239.482747,23.319987 238.323242,22.6005859 C238.137044,22.4820964 237.942383,22.4228516 237.739258,22.4228516 C237.519206,22.4228516 237.33724,22.4969076 237.193359,22.6450195 C237.049479,22.7931315 236.977539,22.96875 236.977539,23.171875 C236.977539,23.4511719 237.104492,23.6796875 237.358398,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M249.129883,23.8574219 C249.637695,24.2298177 250.316895,24.5535482 251.16748,24.8286133 C252.018066,25.1036784 252.896159,25.2412109 253.801758,25.2412109 C254.775065,25.2412109 255.629883,25.1079102 256.366211,24.8413086 C257.102539,24.574707 257.697103,24.1451823 258.149902,23.5527344 C258.602702,22.9602865 258.829102,22.2324219 258.829102,21.3691406 C258.829102,20.4550781 258.556152,19.7039388 258.010254,19.1157227 C257.464355,18.5275065 256.552409,18.0768229 255.274414,17.7636719 L253.078125,17.2177734 C252.155599,16.9892578 251.554688,16.7713216 251.275391,16.5639648 C250.996094,16.3566081 250.856445,15.9990234 250.856445,15.4912109 C250.856445,14.8649089 251.127279,14.4057617 251.668945,14.1137695 C252.210612,13.8217773 252.938477,13.6757812 253.852539,13.6757812 C254.140299,13.6757812 254.423828,13.6948242 254.703125,13.7329102 C254.982422,13.7709961 255.257487,13.8260091 255.52832,13.8979492 C255.799154,13.9698893 256.012858,14.03125 256.169434,14.0820312 C256.326009,14.1328125 256.529134,14.2068685 256.778809,14.3041992 C257.028483,14.4015299 257.166016,14.4544271 257.191406,14.4628906 C257.326823,14.5136719 257.458008,14.5390625 257.584961,14.5390625 C257.805013,14.5390625 257.980632,14.4692383 258.111816,14.3295898 C258.243001,14.1899414 258.308594,14.0227865 258.308594,13.828125 C258.308594,13.4980469 258.143555,13.2483724 257.813477,13.0791016 C257.356445,12.8336589 256.766113,12.6114909 256.04248,12.4125977 C255.318848,12.2137044 254.546549,12.1142578 253.725586,12.1142578 C253.040039,12.1142578 252.405273,12.1798503 251.821289,12.3110352 C251.237305,12.4422201 250.710449,12.6411133 250.240723,12.9077148 C249.770996,13.1743164 249.400716,13.5361328 249.129883,13.9931641 C248.859049,14.4501953 248.723633,14.9791667 248.723633,15.5800781 C248.723633,15.9609375 248.761719,16.2994792 248.837891,16.5957031 C248.914062,16.8919271 249.036784,17.1500651 249.206055,17.3701172 C249.375326,17.5901693 249.557292,17.7784831 249.751953,17.9350586 C249.946615,18.0916341 250.206868,18.2376302 250.532715,18.3730469 C250.858561,18.5084635 251.163249,18.6184896 251.446777,18.703125 C251.730306,18.7877604 252.100586,18.8893229 252.557617,19.0078125 L254.804688,19.5664062 C255.549479,19.7526042 256.082682,19.9980469 256.404297,20.3027344 C256.725911,20.6074219 256.886719,21.0136719 256.886719,21.5214844 C256.886719,22.2324219 256.601074,22.7719727 256.029785,23.1401367 C255.458496,23.5083008 254.707357,23.6923828 253.776367,23.6923828 C252.481445,23.6839193 251.254232,23.319987 250.094727,22.6005859 C249.908529,22.4820964 249.713867,22.4228516 249.510742,22.4228516 C249.29069,22.4228516 249.108724,22.4969076 248.964844,22.6450195 C248.820964,22.7931315 248.749023,22.96875 248.749023,23.171875 C248.749023,23.4511719 248.875977,23.6796875 249.129883,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M262.272461,9.44824219 C262.721029,9.44824219 263.080729,9.31917318 263.351562,9.06103516 C263.622396,8.80289714 263.757812,8.46647135 263.757812,8.05175781 C263.757812,7.62858073 263.622396,7.28792318 263.351562,7.02978516 C263.080729,6.77164714 262.72526,6.64257812 262.285156,6.64257812 C261.836589,6.64257812 261.474772,6.77376302 261.199707,7.03613281 C260.924642,7.2985026 260.787109,7.63704427 260.787109,8.05175781 C260.787109,8.46647135 260.922526,8.80289714 261.193359,9.06103516 C261.464193,9.31917318 261.823893,9.44824219 262.272461,9.44824219 Z M262.259766,25 C262.572917,25 262.845866,24.8963216 263.078613,24.6889648 C263.311361,24.4816081 263.427734,24.2001953 263.427734,23.8447266 L263.427734,13.5234375 C263.427734,13.1679688 263.315592,12.8886719 263.091309,12.6855469 C262.867025,12.4824219 262.602539,12.3808594 262.297852,12.3808594 C261.984701,12.3808594 261.711751,12.4824219 261.479004,12.6855469 C261.246257,12.8886719 261.129883,13.1679688 261.129883,13.5234375 L261.129883,23.8447266 C261.129883,24.2171224 261.239909,24.5027669 261.459961,24.7016602 C261.680013,24.9005534 261.946615,25 262.259766,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M268.432617,25 C268.745768,25 269.016602,24.9005534 269.245117,24.7016602 C269.473633,24.5027669 269.587891,24.2255859 269.587891,23.8701172 L269.587891,14.0058594 L271.733398,14.0058594 C272.275065,14.0058594 272.545898,13.7604167 272.545898,13.2695312 C272.545898,13.0494792 272.480306,12.867513 272.349121,12.7236328 C272.217936,12.5797526 272.012695,12.5078125 271.733398,12.5078125 L269.587891,12.5078125 L269.587891,11.5175781 C269.587891,10.8320312 269.615397,10.2882487 269.67041,9.88623047 C269.725423,9.48421224 269.831217,9.17529297 269.987793,8.95947266 C270.144368,8.74365234 270.324219,8.60400391 270.527344,8.54052734 C270.730469,8.47705078 271.013997,8.4453125 271.37793,8.4453125 L272.457031,8.4453125 C272.710938,8.4453125 272.907715,8.36067708 273.047363,8.19140625 C273.187012,8.02213542 273.256836,7.81901042 273.256836,7.58203125 C273.256836,7.34505208 273.187012,7.1398112 273.047363,6.96630859 C272.907715,6.79280599 272.715169,6.70605469 272.469727,6.70605469 L270.920898,6.70605469 C269.786784,6.70605469 268.898112,7.02555339 268.254883,7.66455078 C267.611654,8.30354818 267.290039,9.38053385 267.290039,10.8955078 L267.290039,12.5078125 L265.626953,12.5078125 C265.347656,12.5078125 265.140299,12.5797526 265.004883,12.7236328 C264.869466,12.867513 264.801758,13.0494792 264.801758,13.2695312 C264.801758,13.4811198 264.869466,13.6567383 265.004883,13.7963867 C265.140299,13.9360352 265.347656,14.0058594 265.626953,14.0058594 L267.290039,14.0058594 L267.290039,23.8701172 C267.290039,24.2255859 267.402181,24.5027669 267.626465,24.7016602 C267.850749,24.9005534 268.119466,25 268.432617,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M277.791992,29.3291016 L285.129883,13.8535156 C285.189128,13.7096354 285.21875,13.5657552 285.21875,13.421875 C285.21875,13.1341146 285.106608,12.886556 284.882324,12.6791992 C284.65804,12.4718424 284.397786,12.3681641 284.101562,12.3681641 C283.661458,12.3681641 283.327148,12.6009115 283.098633,13.0664062 L279.213867,21.2929688 L275.354492,13.1044922 C275.142904,12.6559245 274.812826,12.4316406 274.364258,12.4316406 C274.068034,12.4316406 273.803548,12.5332031 273.570801,12.7363281 C273.338053,12.9394531 273.22168,13.1933594 273.22168,13.4980469 C273.22168,13.6842448 273.259766,13.8535156 273.335938,14.0058594 L278.058594,23.7431641 L275.786133,28.5546875 C275.709961,28.7239583 275.671875,28.8847656 275.671875,29.0371094 C275.671875,29.3248698 275.781901,29.5639648 276.001953,29.7543945 C276.222005,29.9448242 276.475911,30.0400391 276.763672,30.0400391 C277.220703,30.0400391 277.563477,29.8030599 277.791992,29.3291016 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M286.707031,27.8691406 C287.121745,27.8691406 287.430664,27.6490885 287.633789,27.2089844 C288.454753,25.2792969 289.091634,23.4977214 289.544434,21.8642578 C289.997233,20.2307943 290.223633,18.6184896 290.223633,17.0273438 C290.223633,15.4446615 290.001465,13.8387044 289.557129,12.2094727 C289.112793,10.5802409 288.488607,8.79231771 287.68457,6.84570312 C287.447591,6.32096354 287.130208,6.05859375 286.732422,6.05859375 C286.486979,6.05859375 286.273275,6.14322917 286.091309,6.3125 C285.909342,6.48177083 285.818359,6.6891276 285.818359,6.93457031 C285.818359,7.06998698 285.852214,7.21809896 285.919922,7.37890625 C287.172526,10.8658854 287.798828,14.0735677 287.798828,17.0019531 C287.798828,19.9726562 287.176758,23.1507161 285.932617,26.5361328 C285.864909,26.7138672 285.831055,26.8704427 285.831055,27.0058594 C285.831055,27.2513021 285.917806,27.456543 286.091309,27.621582 C286.264811,27.7866211 286.470052,27.8691406 286.707031,27.8691406 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-8" transform="translate(0, 52)" xlink:href="#path-31" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M9.953125,25.2792969 C11.874349,25.2792969 13.5416667,24.8180339 14.9550781,23.8955078 C15.3782552,23.6246745 15.5898438,23.2945964 15.5898438,22.9052734 C15.5898438,22.6429036 15.4967448,22.414388 15.3105469,22.2197266 C15.124349,22.0250651 14.9000651,21.9277344 14.6376953,21.9277344 C14.4853516,21.9277344 14.3245443,21.9742839 14.1552734,22.0673828 C13.4951172,22.4397786 12.8815104,22.7254232 12.3144531,22.9243164 C11.7473958,23.1232096 11.0872396,23.2226562 10.3339844,23.2226562 C9.49609375,23.2226562 8.73649089,23.0957031 8.05517578,22.8417969 C7.37386068,22.5878906 6.80257161,22.2408854 6.34130859,21.8007812 C5.88004557,21.3606771 5.49283854,20.8338216 5.1796875,20.2202148 C4.86653646,19.6066081 4.63802083,18.9443359 4.49414062,18.2333984 C4.35026042,17.5224609 4.27832031,16.764974 4.27832031,15.9609375 C4.27832031,15.156901 4.35026042,14.3972982 4.49414062,13.6821289 C4.63802083,12.9669596 4.86653646,12.3004557 5.1796875,11.6826172 C5.49283854,11.0647786 5.87792969,10.5315755 6.33496094,10.0830078 C6.79199219,9.6344401 7.35481771,9.28320312 8.0234375,9.02929688 C8.69205729,8.77539062 9.43684896,8.6484375 10.2578125,8.6484375 C10.5963542,8.6484375 10.9243164,8.66536458 11.2416992,8.69921875 C11.559082,8.73307292 11.8235677,8.77327474 12.0351562,8.81982422 C12.2467448,8.8663737 12.4667969,8.92985026 12.6953125,9.01025391 C12.9238281,9.09065755 13.093099,9.15413411 13.203125,9.20068359 C13.313151,9.24723307 13.4549154,9.31494141 13.628418,9.40380859 C13.8019206,9.49267578 13.905599,9.54557292 13.9394531,9.5625 C14.0917969,9.63020833 14.2314453,9.6640625 14.3583984,9.6640625 C14.6207682,9.6640625 14.8450521,9.5625 15.03125,9.359375 C15.2174479,9.15625 15.3105469,8.91927083 15.3105469,8.6484375 C15.3105469,8.26757812 15.124349,7.97558594 14.7519531,7.77246094 C13.3385417,6.98535156 11.8108724,6.59179688 10.1689453,6.59179688 C8.84016927,6.59179688 7.62988281,6.83723958 6.53808594,7.328125 C5.44628906,7.81901042 4.5406901,8.48974609 3.82128906,9.34033203 C3.10188802,10.190918 2.54541016,11.1875 2.15185547,12.3300781 C1.75830078,13.4726562 1.56152344,14.695638 1.56152344,15.9990234 C1.56152344,17.7509766 1.88313802,19.3209635 2.52636719,20.7089844 C3.16959635,22.0970052 4.1344401,23.2057292 5.42089844,24.0351562 C6.70735677,24.8645833 8.21809896,25.2792969 9.953125,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M23.6162109,25.2285156 C25.5712891,25.2285156 27.1201172,24.6149089 28.2626953,23.3876953 C29.4052734,22.1604818 29.9765625,20.5947266 29.9765625,18.6904297 C29.9765625,16.7692057 29.4031576,15.1971029 28.2563477,13.9741211 C27.1095378,12.7511393 25.5628255,12.1396484 23.6162109,12.1396484 C21.6695964,12.1396484 20.1228841,12.7532552 18.9760742,13.9804688 C17.8292643,15.2076823 17.2558594,16.7776693 17.2558594,18.6904297 C17.2558594,20.5947266 17.8292643,22.1604818 18.9760742,23.3876953 C20.1228841,24.6149089 21.6695964,25.2285156 23.6162109,25.2285156 Z M23.5908203,23.6542969 C22.3212891,23.6542969 21.3289388,23.2078451 20.6137695,22.3149414 C19.8986003,21.4220378 19.5410156,20.2138672 19.5410156,18.6904297 C19.5410156,17.1500651 19.9007161,15.933431 20.6201172,15.0405273 C21.3395182,14.1476237 22.3382161,13.7011719 23.6162109,13.7011719 C24.8857422,13.7011719 25.8823242,14.1497396 26.605957,15.046875 C27.3295898,15.9440104 27.6914062,17.1585286 27.6914062,18.6904297 C27.6914062,20.2307943 27.3338216,21.4431966 26.6186523,22.3276367 C25.9034831,23.2120768 24.8942057,23.6542969 23.5908203,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M32.9501953,25 C33.2633464,25 33.5362956,24.9026693 33.769043,24.7080078 C34.0017904,24.5133464 34.1181641,24.2467448 34.1181641,23.9082031 L34.1181641,17.7509766 C34.1181641,16.5068359 34.4736328,15.5441081 35.1845703,14.862793 C35.8955078,14.1814779 36.7714844,13.8408203 37.8125,13.8408203 C38.6673177,13.8408203 39.3380534,14.0904948 39.824707,14.5898438 C40.3113607,15.0891927 40.5546875,15.8424479 40.5546875,16.8496094 L40.5546875,23.9082031 C40.5546875,24.2467448 40.6668294,24.5133464 40.8911133,24.7080078 C41.1153971,24.9026693 41.3798828,25 41.6845703,25 C41.9977214,25 42.2685547,24.9026693 42.4970703,24.7080078 C42.7255859,24.5133464 42.8398438,24.2467448 42.8398438,23.9082031 L42.8398438,16.9765625 C42.8398438,16.1386719 42.7192383,15.4023438 42.4780273,14.7675781 C42.2368164,14.1328125 41.9025065,13.6271159 41.4750977,13.2504883 C41.0476888,12.8738607 40.5694987,12.5945638 40.0405273,12.4125977 C39.511556,12.2306315 38.9339193,12.1396484 38.3076172,12.1396484 C37.3766276,12.1396484 36.5302734,12.3364258 35.7685547,12.7299805 C35.0068359,13.1235352 34.4567057,13.6673177 34.1181641,14.3613281 L34.1181641,7.92480469 C34.1181641,7.58626302 34.0060221,7.32600911 33.7817383,7.14404297 C33.5574544,6.96207682 33.288737,6.87109375 32.9755859,6.87109375 C32.6624349,6.87109375 32.3916016,6.96419271 32.1630859,7.15039062 C31.9345703,7.33658854 31.8203125,7.59472656 31.8203125,7.92480469 L31.8203125,23.9082031 C31.8203125,24.2467448 31.9324544,24.5133464 32.1567383,24.7080078 C32.3810221,24.9026693 32.6455078,25 32.9501953,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M47.0830078,17.7382812 C47.1253255,17.2135417 47.2374674,16.7163086 47.4194336,16.246582 C47.6013997,15.7768555 47.8468424,15.3494466 48.1557617,14.9643555 C48.464681,14.5792643 48.8603516,14.2724609 49.3427734,14.0439453 C49.8251953,13.8154297 50.3626302,13.7011719 50.9550781,13.7011719 C52.0976562,13.7011719 52.9651693,14.0777995 53.5576172,14.8310547 C54.1500651,15.5843099 54.484375,16.5533854 54.5605469,17.7382812 L47.0830078,17.7382812 Z M50.9931641,25.2285156 C52.8889974,25.2285156 54.5055339,24.6276042 55.8427734,23.4257812 C56.0797526,23.2141927 56.1982422,22.9729818 56.1982422,22.7021484 C56.1982422,22.4905599 56.128418,22.3064779 55.9887695,22.1499023 C55.8491211,21.9933268 55.6777344,21.9150391 55.4746094,21.9150391 C55.3053385,21.9150391 55.1445312,21.9742839 54.9921875,22.0927734 C54.3828125,22.5582682 53.7776693,22.9264323 53.1767578,23.1972656 C52.5758464,23.468099 51.898763,23.6035156 51.1455078,23.6035156 C49.9521484,23.5865885 48.9746094,23.2036133 48.2128906,22.4545898 C47.4511719,21.7055664 47.061849,20.6031901 47.0449219,19.1474609 L55.7539062,19.1474609 C56.0247396,19.1474609 56.2278646,19.0670573 56.3632812,18.90625 C56.4986979,18.7454427 56.5664062,18.5423177 56.5664062,18.296875 C56.5410156,17.4420573 56.4182943,16.6570638 56.1982422,15.9418945 C55.9781901,15.2267253 55.648112,14.5792643 55.2080078,13.9995117 C54.7679036,13.4197591 54.1754557,12.9648438 53.4306641,12.6347656 C52.6858724,12.3046875 51.8183594,12.1396484 50.828125,12.1396484 C49.5755208,12.1396484 48.4794922,12.4443359 47.5400391,13.0537109 C46.6005859,13.6630859 45.898112,14.4544271 45.4326172,15.4277344 C44.9671224,16.4010417 44.734375,17.4759115 44.734375,18.6523438 C44.734375,19.9895833 45.011556,21.159668 45.565918,22.1625977 C46.1202799,23.1655273 46.8671875,23.9251302 47.8066406,24.4414062 C48.7460938,24.9576823 49.8082682,25.2200521 50.9931641,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M59.8828125,25 C60.1959635,25 60.4689128,24.906901 60.7016602,24.7207031 C60.9344076,24.5345052 61.0507812,24.2721354 61.0507812,23.9335938 L61.0507812,17.3955078 C61.0507812,16.4052734 61.3660482,15.6139323 61.996582,15.0214844 C62.6271159,14.4290365 63.5136719,14.1328125 64.65625,14.1328125 C64.9270833,14.1328125 65.1344401,14.0418294 65.2783203,13.8598633 C65.4222005,13.6778971 65.4941406,13.4599609 65.4941406,13.2060547 C65.4941406,12.9352214 65.4158529,12.6982422 65.2592773,12.4951172 C65.1027018,12.2919922 64.8889974,12.1904297 64.6181641,12.1904297 C63.7210286,12.1904297 62.9529622,12.4549154 62.3139648,12.9838867 C61.6749674,13.5128581 61.2496745,14.1708984 61.0380859,14.9580078 L61.0507812,13.4345703 C61.0507812,13.1129557 60.9386393,12.8611654 60.7143555,12.6791992 C60.4900716,12.4972331 60.2213542,12.40625 59.9082031,12.40625 C59.5950521,12.40625 59.3242188,12.4972331 59.0957031,12.6791992 C58.8671875,12.8611654 58.7529297,13.1214193 58.7529297,13.4599609 L58.7529297,23.9335938 C58.7529297,24.2721354 58.8629557,24.5345052 59.0830078,24.7207031 C59.3030599,24.906901 59.5696615,25 59.8828125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M68.53125,17.7382812 C68.5735677,17.2135417 68.6857096,16.7163086 68.8676758,16.246582 C69.0496419,15.7768555 69.2950846,15.3494466 69.6040039,14.9643555 C69.9129232,14.5792643 70.3085938,14.2724609 70.7910156,14.0439453 C71.2734375,13.8154297 71.8108724,13.7011719 72.4033203,13.7011719 C73.5458984,13.7011719 74.4134115,14.0777995 75.0058594,14.8310547 C75.5983073,15.5843099 75.9326172,16.5533854 76.0087891,17.7382812 L68.53125,17.7382812 Z M72.4414062,25.2285156 C74.3372396,25.2285156 75.953776,24.6276042 77.2910156,23.4257812 C77.5279948,23.2141927 77.6464844,22.9729818 77.6464844,22.7021484 C77.6464844,22.4905599 77.5766602,22.3064779 77.4370117,22.1499023 C77.2973633,21.9933268 77.1259766,21.9150391 76.9228516,21.9150391 C76.7535807,21.9150391 76.5927734,21.9742839 76.4404297,22.0927734 C75.8310547,22.5582682 75.2259115,22.9264323 74.625,23.1972656 C74.0240885,23.468099 73.3470052,23.6035156 72.59375,23.6035156 C71.4003906,23.5865885 70.4228516,23.2036133 69.6611328,22.4545898 C68.8994141,21.7055664 68.5100911,20.6031901 68.4931641,19.1474609 L77.2021484,19.1474609 C77.4729818,19.1474609 77.6761068,19.0670573 77.8115234,18.90625 C77.9469401,18.7454427 78.0146484,18.5423177 78.0146484,18.296875 C77.9892578,17.4420573 77.8665365,16.6570638 77.6464844,15.9418945 C77.4264323,15.2267253 77.0963542,14.5792643 76.65625,13.9995117 C76.2161458,13.4197591 75.6236979,12.9648438 74.8789062,12.6347656 C74.1341146,12.3046875 73.2666016,12.1396484 72.2763672,12.1396484 C71.023763,12.1396484 69.9277344,12.4443359 68.9882812,13.0537109 C68.0488281,13.6630859 67.3463542,14.4544271 66.8808594,15.4277344 C66.4153646,16.4010417 66.1826172,17.4759115 66.1826172,18.6523438 C66.1826172,19.9895833 66.4597982,21.159668 67.0141602,22.1625977 C67.5685221,23.1655273 68.3154297,23.9251302 69.2548828,24.4414062 C70.1943359,24.9576823 71.2565104,25.2200521 72.4414062,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M88.1386719,25 L97.8125,25 C98.1341146,25 98.3837891,24.9005534 98.5615234,24.7016602 C98.7392578,24.5027669 98.828125,24.2636719 98.828125,23.984375 C98.828125,23.7050781 98.7392578,23.4638672 98.5615234,23.2607422 C98.3837891,23.0576172 98.1341146,22.9560547 97.8125,22.9560547 L89.4335938,22.9560547 L89.4335938,16.671875 L97.0507812,16.671875 C97.3723958,16.671875 97.6241862,16.5724284 97.8061523,16.3735352 C97.9881185,16.1746419 98.0791016,15.9397786 98.0791016,15.6689453 C98.0791016,15.398112 97.9902344,15.1653646 97.8125,14.9707031 C97.6347656,14.7760417 97.3808594,14.6787109 97.0507812,14.6787109 L89.4335938,14.6787109 L89.4335938,8.90234375 L97.4824219,8.90234375 C97.8040365,8.90234375 98.0537109,8.80078125 98.2314453,8.59765625 C98.4091797,8.39453125 98.4980469,8.15332031 98.4980469,7.87402344 C98.4980469,7.59472656 98.4091797,7.35563151 98.2314453,7.15673828 C98.0537109,6.95784505 97.8040365,6.85839844 97.4824219,6.85839844 L88.0751953,6.85839844 C87.7366536,6.85839844 87.4510091,6.98323568 87.2182617,7.23291016 C86.9855143,7.48258464 86.8691406,7.77246094 86.8691406,8.10253906 L86.8691406,23.7558594 C86.8691406,24.0859375 86.9982096,24.3758138 87.2563477,24.6254883 C87.5144857,24.8751628 87.8085938,25 88.1386719,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M102.436523,25 C102.758138,25 103.035319,24.9047852 103.268066,24.7143555 C103.500814,24.5239258 103.617188,24.2594401 103.617188,23.9208984 L103.617188,17.7255859 C103.617188,16.4560547 103.947266,15.4679362 104.607422,14.7612305 C105.267578,14.0545247 106.147786,13.7011719 107.248047,13.7011719 C109.093099,13.7011719 110.015625,14.8691406 110.015625,17.2050781 L110.015625,23.9082031 C110.015625,24.2467448 110.127767,24.5133464 110.352051,24.7080078 C110.576335,24.9026693 110.84082,25 111.145508,25 C111.458659,25 111.729492,24.9026693 111.958008,24.7080078 C112.186523,24.5133464 112.300781,24.2467448 112.300781,23.9082031 L112.300781,17.7890625 C112.300781,16.4518229 112.643555,15.4361979 113.329102,14.7421875 C114.014648,14.0481771 114.886393,13.7011719 115.944336,13.7011719 C116.833008,13.7011719 117.518555,13.9698893 118.000977,14.5073242 C118.483398,15.0447591 118.724609,15.8382161 118.724609,16.8876953 L118.724609,23.9208984 C118.724609,24.2594401 118.834635,24.5239258 119.054688,24.7143555 C119.27474,24.9047852 119.541341,25 119.854492,25 C120.167643,25 120.440592,24.9047852 120.67334,24.7143555 C120.906087,24.5239258 121.022461,24.2594401 121.022461,23.9208984 L121.022461,16.9765625 C121.022461,16.1471354 120.901855,15.4150391 120.660645,14.7802734 C120.419434,14.1455078 120.085124,13.6398112 119.657715,13.2631836 C119.230306,12.886556 118.745768,12.6051432 118.204102,12.4189453 C117.662435,12.2327474 117.069987,12.1396484 116.426758,12.1396484 C115.30957,12.1396484 114.361654,12.387207 113.583008,12.8823242 C112.804362,13.3774414 112.237305,14.1158854 111.881836,15.0976562 C111.619466,14.1497396 111.120117,13.4197591 110.383789,12.9077148 C109.647461,12.3956706 108.754557,12.1396484 107.705078,12.1396484 C106.782552,12.1396484 105.961589,12.3491211 105.242188,12.7680664 C104.522786,13.1870117 103.98112,13.7858073 103.617188,14.5644531 L103.617188,13.3837891 C103.617188,13.070638 103.500814,12.8251953 103.268066,12.6474609 C103.035319,12.4697266 102.766602,12.3808594 102.461914,12.3808594 C102.157227,12.3808594 101.888509,12.4760742 101.655762,12.6665039 C101.423014,12.8569336 101.306641,13.1171875 101.306641,13.4472656 L101.306641,23.9208984 C101.306641,24.2594401 101.416667,24.5239258 101.636719,24.7143555 C101.856771,24.9047852 102.123372,25 102.436523,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M129.988281,23.6542969 C127.364583,23.6542969 126.052734,21.9785156 126.052734,18.6269531 C126.052734,17.983724 126.124674,17.3785807 126.268555,16.8115234 C126.412435,16.2444661 126.630371,15.7239583 126.922363,15.25 C127.214355,14.7760417 127.612142,14.3994141 128.115723,14.1201172 C128.619303,13.8408203 129.205404,13.7011719 129.874023,13.7011719 C130.53418,13.7011719 131.111816,13.8492839 131.606934,14.1455078 C132.102051,14.4417318 132.485026,14.8374023 132.755859,15.3325195 C133.026693,15.8276367 133.227702,16.3544922 133.358887,16.9130859 C133.490072,17.4716797 133.555664,18.0556641 133.555664,18.6650391 C133.555664,19.257487 133.496419,19.8266602 133.37793,20.3725586 C133.25944,20.918457 133.073242,21.4453125 132.819336,21.953125 C132.56543,22.4609375 132.19515,22.8693034 131.708496,23.1782227 C131.221842,23.4871419 130.648438,23.6458333 129.988281,23.6542969 Z M130.394531,25.2285156 C131.27474,25.2285156 132.068197,25.0486654 132.774902,24.6889648 C133.481608,24.3292643 134.055013,23.8426107 134.495117,23.2290039 C134.935221,22.6153971 135.271647,21.9150391 135.504395,21.1279297 C135.737142,20.3408203 135.853516,19.5029297 135.853516,18.6142578 C135.853516,17.7086589 135.726562,16.8644206 135.472656,16.081543 C135.21875,15.2986654 134.856934,14.6131185 134.387207,14.0249023 C133.91748,13.4366862 133.320801,12.9754232 132.597168,12.6411133 C131.873535,12.3068034 131.071615,12.1396484 130.191406,12.1396484 C129.251953,12.1396484 128.42041,12.3427734 127.696777,12.7490234 C126.973145,13.1552734 126.420898,13.7434896 126.040039,14.5136719 L126.040039,7.92480469 C126.040039,7.59472656 125.925781,7.33447266 125.697266,7.14404297 C125.46875,6.95361328 125.197917,6.85839844 124.884766,6.85839844 C124.580078,6.85839844 124.313477,6.95361328 124.084961,7.14404297 C123.856445,7.33447266 123.742188,7.59472656 123.742188,7.92480469 L123.742188,23.8955078 C123.742188,24.2340495 123.856445,24.4985352 124.084961,24.6889648 C124.313477,24.8793945 124.580078,24.9746094 124.884766,24.9746094 C125.265625,24.9746094 125.553385,24.8793945 125.748047,24.6889648 C125.942708,24.4985352 126.040039,24.2382812 126.040039,23.9082031 L126.040039,22.7783203 C126.446289,23.5908203 127.028158,24.2023112 127.785645,24.612793 C128.543132,25.0232747 129.41276,25.2285156 130.394531,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M139.411133,17.7382812 C139.453451,17.2135417 139.565592,16.7163086 139.747559,16.246582 C139.929525,15.7768555 140.174967,15.3494466 140.483887,14.9643555 C140.792806,14.5792643 141.188477,14.2724609 141.670898,14.0439453 C142.15332,13.8154297 142.690755,13.7011719 143.283203,13.7011719 C144.425781,13.7011719 145.293294,14.0777995 145.885742,14.8310547 C146.47819,15.5843099 146.8125,16.5533854 146.888672,17.7382812 L139.411133,17.7382812 Z M143.321289,25.2285156 C145.217122,25.2285156 146.833659,24.6276042 148.170898,23.4257812 C148.407878,23.2141927 148.526367,22.9729818 148.526367,22.7021484 C148.526367,22.4905599 148.456543,22.3064779 148.316895,22.1499023 C148.177246,21.9933268 148.005859,21.9150391 147.802734,21.9150391 C147.633464,21.9150391 147.472656,21.9742839 147.320312,22.0927734 C146.710938,22.5582682 146.105794,22.9264323 145.504883,23.1972656 C144.903971,23.468099 144.226888,23.6035156 143.473633,23.6035156 C142.280273,23.5865885 141.302734,23.2036133 140.541016,22.4545898 C139.779297,21.7055664 139.389974,20.6031901 139.373047,19.1474609 L148.082031,19.1474609 C148.352865,19.1474609 148.55599,19.0670573 148.691406,18.90625 C148.826823,18.7454427 148.894531,18.5423177 148.894531,18.296875 C148.869141,17.4420573 148.746419,16.6570638 148.526367,15.9418945 C148.306315,15.2267253 147.976237,14.5792643 147.536133,13.9995117 C147.096029,13.4197591 146.503581,12.9648438 145.758789,12.6347656 C145.013997,12.3046875 144.146484,12.1396484 143.15625,12.1396484 C141.903646,12.1396484 140.807617,12.4443359 139.868164,13.0537109 C138.928711,13.6630859 138.226237,14.4544271 137.760742,15.4277344 C137.295247,16.4010417 137.0625,17.4759115 137.0625,18.6523438 C137.0625,19.9895833 137.339681,21.159668 137.894043,22.1625977 C138.448405,23.1655273 139.195312,23.9251302 140.134766,24.4414062 C141.074219,24.9576823 142.136393,25.2200521 143.321289,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M156.057617,23.6542969 C155.397461,23.6458333 154.824056,23.4871419 154.337402,23.1782227 C153.850749,22.8693034 153.480469,22.4609375 153.226562,21.953125 C152.972656,21.4453125 152.786458,20.918457 152.667969,20.3725586 C152.549479,19.8266602 152.490234,19.257487 152.490234,18.6650391 C152.490234,18.0556641 152.555827,17.4716797 152.687012,16.9130859 C152.818197,16.3544922 153.019206,15.8276367 153.290039,15.3325195 C153.560872,14.8374023 153.943848,14.4417318 154.438965,14.1455078 C154.934082,13.8492839 155.511719,13.7011719 156.171875,13.7011719 C156.840495,13.7011719 157.426595,13.8408203 157.930176,14.1201172 C158.433757,14.3994141 158.831543,14.7760417 159.123535,15.25 C159.415527,15.7239583 159.633464,16.2444661 159.777344,16.8115234 C159.921224,17.3785807 159.993164,17.983724 159.993164,18.6269531 C159.993164,21.9785156 158.681315,23.6542969 156.057617,23.6542969 Z M155.651367,25.2285156 C156.633138,25.2285156 157.502767,25.0232747 158.260254,24.612793 C159.017741,24.2023112 159.599609,23.5908203 160.005859,22.7783203 L160.005859,23.9082031 C160.005859,24.2382812 160.10319,24.4985352 160.297852,24.6889648 C160.492513,24.8793945 160.780273,24.9746094 161.161133,24.9746094 C161.46582,24.9746094 161.732422,24.8793945 161.960938,24.6889648 C162.189453,24.4985352 162.303711,24.2340495 162.303711,23.8955078 L162.303711,7.92480469 C162.303711,7.59472656 162.189453,7.33447266 161.960938,7.14404297 C161.732422,6.95361328 161.46582,6.85839844 161.161133,6.85839844 C160.847982,6.85839844 160.577148,6.95361328 160.348633,7.14404297 C160.120117,7.33447266 160.005859,7.59472656 160.005859,7.92480469 L160.005859,14.5136719 C159.625,13.7434896 159.072754,13.1552734 158.349121,12.7490234 C157.625488,12.3427734 156.793945,12.1396484 155.854492,12.1396484 C154.974284,12.1396484 154.172363,12.3068034 153.44873,12.6411133 C152.725098,12.9754232 152.128418,13.4366862 151.658691,14.0249023 C151.188965,14.6131185 150.827148,15.2986654 150.573242,16.081543 C150.319336,16.8644206 150.192383,17.7086589 150.192383,18.6142578 C150.192383,19.5029297 150.308757,20.3408203 150.541504,21.1279297 C150.774251,21.9150391 151.110677,22.6153971 151.550781,23.2290039 C151.990885,23.8426107 152.56429,24.3292643 153.270996,24.6889648 C153.977702,25.0486654 154.771159,25.2285156 155.651367,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M184.539062,24.2382812 C184.750651,24.796875 185.139974,25.0761719 185.707031,25.0761719 C186.037109,25.0761719 186.333333,24.9703776 186.595703,24.7587891 C186.858073,24.5472005 186.989258,24.280599 186.989258,23.9589844 C186.989258,23.8235677 186.955404,23.6669922 186.887695,23.4892578 L180.857422,8.52148438 C180.569661,7.80208333 180.258626,7.32600911 179.924316,7.09326172 C179.590007,6.86051432 179.173177,6.74414062 178.673828,6.74414062 C178.174479,6.74414062 177.75765,6.86051432 177.42334,7.09326172 C177.08903,7.32600911 176.777995,7.80208333 176.490234,8.52148438 L170.459961,23.4892578 C170.392253,23.6669922 170.358398,23.8235677 170.358398,23.9589844 C170.358398,24.280599 170.487467,24.5472005 170.745605,24.7587891 C171.003743,24.9703776 171.297852,25.0761719 171.62793,25.0761719 C172.203451,25.0761719 172.597005,24.796875 172.808594,24.2382812 L174.306641,20.3789062 L183.041016,20.3789062 L184.539062,24.2382812 Z M182.317383,18.5380859 L175.030273,18.5380859 L178.673828,9.15625 L182.317383,18.5380859 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M191.549805,15.2373047 L191.549805,8.88964844 L195.548828,8.88964844 C196.945312,8.88964844 197.956706,9.1266276 198.583008,9.60058594 C199.20931,10.0745443 199.522461,10.8828125 199.522461,12.0253906 C199.522461,13.1933594 199.190267,14.0206706 198.525879,14.5073242 C197.861491,14.9939779 196.822591,15.2373047 195.40918,15.2373047 L191.549805,15.2373047 Z M190.267578,25 C190.623047,25 190.925618,24.8942057 191.175293,24.6826172 C191.424967,24.4710286 191.549805,24.1705729 191.549805,23.78125 L191.549805,17.1923828 L195.421875,17.1923828 C196.429036,17.1923828 197.32194,17.1035156 198.100586,16.9257812 C198.879232,16.7480469 199.571126,16.4666341 200.17627,16.081543 C200.781413,15.6964518 201.244792,15.1632487 201.566406,14.4819336 C201.888021,13.8006185 202.048828,12.9817708 202.048828,12.0253906 C202.048828,10.1888021 201.50293,8.86848958 200.411133,8.06445312 C199.319336,7.26041667 197.732422,6.85839844 195.650391,6.85839844 L190.280273,6.85839844 C189.916341,6.85839844 189.609538,6.98111979 189.359863,7.2265625 C189.110189,7.47200521 188.985352,7.77246094 188.985352,8.12792969 L188.985352,23.78125 C188.985352,24.1621094 189.112305,24.4604492 189.366211,24.6762695 C189.620117,24.8920898 189.920573,25 190.267578,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M205.238281,24.9873047 C205.59375,24.9873047 205.898438,24.8815104 206.152344,24.6699219 C206.40625,24.4583333 206.533203,24.1578776 206.533203,23.7685547 L206.533203,8.07714844 C206.533203,7.68782552 206.408366,7.38525391 206.158691,7.16943359 C205.909017,6.95361328 205.610677,6.84570312 205.263672,6.84570312 C204.916667,6.84570312 204.614095,6.95572917 204.355957,7.17578125 C204.097819,7.39583333 203.96875,7.69628906 203.96875,8.07714844 L203.96875,23.7685547 C203.96875,24.1494141 204.093587,24.4477539 204.343262,24.6635742 C204.592936,24.8793945 204.891276,24.9873047 205.238281,24.9873047 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M219.919922,27.8564453 C220.165365,27.8564453 220.374837,27.7760417 220.54834,27.6152344 C220.721842,27.4544271 220.808594,27.2470703 220.808594,26.9931641 C220.808594,26.8746745 220.77474,26.7223307 220.707031,26.5361328 C219.462891,23.0999349 218.84082,19.921875 218.84082,17.0019531 C218.84082,14.0481771 219.471354,10.8404948 220.732422,7.37890625 C220.791667,7.23502604 220.821289,7.08691406 220.821289,6.93457031 C220.821289,6.68066406 220.732422,6.46907552 220.554688,6.29980469 C220.376953,6.13053385 220.165365,6.04589844 219.919922,6.04589844 C219.488281,6.04589844 219.166667,6.3125 218.955078,6.84570312 C218.151042,8.79231771 217.526855,10.5802409 217.08252,12.2094727 C216.638184,13.8387044 216.416016,15.4404297 216.416016,17.0146484 C216.416016,18.6142578 216.642415,20.2286784 217.095215,21.8579102 C217.548014,23.4871419 218.184896,25.2708333 219.005859,27.2089844 C219.200521,27.640625 219.505208,27.8564453 219.919922,27.8564453 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M222.817383,27.1074219 C223.249023,27.1074219 223.545247,26.8873698 223.706055,26.4472656 L229.850586,8.02636719 C229.892904,7.87402344 229.914062,7.75130208 229.914062,7.65820312 C229.914062,7.39583333 229.818848,7.18424479 229.628418,7.0234375 C229.437988,6.86263021 229.21582,6.78222656 228.961914,6.78222656 C228.487956,6.78222656 228.179036,7.00227865 228.035156,7.44238281 L221.941406,25.8759766 C221.899089,26.0283203 221.87793,26.1468099 221.87793,26.2314453 C221.87793,26.4938151 221.971029,26.7054036 222.157227,26.8662109 C222.343424,27.0270182 222.563477,27.1074219 222.817383,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M236.290039,25.0126953 C236.772461,25.0126953 237.151204,24.8836263 237.42627,24.6254883 C237.701335,24.3673503 237.970052,23.9251302 238.232422,23.2988281 L242.193359,13.8154297 C242.244141,13.7054036 242.269531,13.5869141 242.269531,13.4599609 C242.269531,13.1722005 242.148926,12.9204102 241.907715,12.7045898 C241.666504,12.4887695 241.397786,12.3808594 241.101562,12.3808594 C240.661458,12.3808594 240.348307,12.6220703 240.162109,13.1044922 L236.290039,22.6640625 L232.405273,13.1044922 C232.219076,12.6220703 231.905924,12.3808594 231.46582,12.3808594 C231.169596,12.3808594 230.900879,12.4887695 230.659668,12.7045898 C230.418457,12.9204102 230.297852,13.1722005 230.297852,13.4599609 C230.297852,13.5869141 230.323242,13.7054036 230.374023,13.8154297 L234.334961,23.2988281 C234.597331,23.9251302 234.866048,24.3673503 235.141113,24.6254883 C235.416178,24.8836263 235.799154,25.0126953 236.290039,25.0126953 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M255.069336,11.8222656 C255.069336,10.4003906 254.54248,9.19433594 253.48877,8.20410156 C252.435059,7.21386719 251.11263,6.71875 249.521484,6.71875 C248.404297,6.71875 247.462728,6.85205078 246.696777,7.11865234 C245.930827,7.38525391 245.128906,7.77246094 244.291016,8.28027344 C243.960938,8.48339844 243.795898,8.74576823 243.795898,9.06738281 C243.795898,9.33821615 243.899577,9.58154297 244.106934,9.79736328 C244.31429,10.0131836 244.549154,10.1210938 244.811523,10.1210938 C244.938477,10.1210938 245.061198,10.0914714 245.179688,10.0322266 C245.255859,9.98990885 245.490723,9.8523763 245.884277,9.61962891 C246.277832,9.38688151 246.574056,9.2281901 246.772949,9.14355469 C246.971842,9.05891927 247.310384,8.96582031 247.788574,8.86425781 C248.266764,8.76269531 248.785156,8.71191406 249.34375,8.71191406 C250.283203,8.71191406 251.07666,9.01871745 251.724121,9.63232422 C252.371582,10.245931 252.695312,11.0520833 252.695312,12.0507812 C252.695312,12.6432292 252.534505,13.2335612 252.212891,13.8217773 C251.891276,14.4099935 251.387695,15.0553385 250.702148,15.7578125 L244.100586,22.4482422 C243.702799,22.8460286 243.503906,23.2607422 243.503906,23.6923828 C243.503906,24.0647786 243.65625,24.3758138 243.960938,24.6254883 C244.265625,24.8751628 244.667643,25 245.166992,25 L253.901367,25 C254.214518,25 254.459961,24.9047852 254.637695,24.7143555 C254.81543,24.5239258 254.904297,24.297526 254.904297,24.0351562 C254.904297,23.7727865 254.81543,23.5463867 254.637695,23.355957 C254.459961,23.1655273 254.214518,23.0703125 253.901367,23.0703125 L246.258789,23.0703125 L252.060547,17.4208984 C253.135417,16.4306641 253.905599,15.4954427 254.371094,14.6152344 C254.836589,13.735026 255.069336,12.8040365 255.069336,11.8222656 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M257.458984,27.1074219 C257.890625,27.1074219 258.186849,26.8873698 258.347656,26.4472656 L264.492188,8.02636719 C264.534505,7.87402344 264.555664,7.75130208 264.555664,7.65820312 C264.555664,7.39583333 264.460449,7.18424479 264.27002,7.0234375 C264.07959,6.86263021 263.857422,6.78222656 263.603516,6.78222656 C263.129557,6.78222656 262.820638,7.00227865 262.676758,7.44238281 L256.583008,25.8759766 C256.54069,26.0283203 256.519531,26.1468099 256.519531,26.2314453 C256.519531,26.4938151 256.61263,26.7054036 256.798828,26.8662109 C256.985026,27.0270182 257.205078,27.1074219 257.458984,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M267.770508,17.7382812 C267.812826,17.2135417 267.924967,16.7163086 268.106934,16.246582 C268.2889,15.7768555 268.534342,15.3494466 268.843262,14.9643555 C269.152181,14.5792643 269.547852,14.2724609 270.030273,14.0439453 C270.512695,13.8154297 271.05013,13.7011719 271.642578,13.7011719 C272.785156,13.7011719 273.652669,14.0777995 274.245117,14.8310547 C274.837565,15.5843099 275.171875,16.5533854 275.248047,17.7382812 L267.770508,17.7382812 Z M271.680664,25.2285156 C273.576497,25.2285156 275.193034,24.6276042 276.530273,23.4257812 C276.767253,23.2141927 276.885742,22.9729818 276.885742,22.7021484 C276.885742,22.4905599 276.815918,22.3064779 276.67627,22.1499023 C276.536621,21.9933268 276.365234,21.9150391 276.162109,21.9150391 C275.992839,21.9150391 275.832031,21.9742839 275.679688,22.0927734 C275.070312,22.5582682 274.465169,22.9264323 273.864258,23.1972656 C273.263346,23.468099 272.586263,23.6035156 271.833008,23.6035156 C270.639648,23.5865885 269.662109,23.2036133 268.900391,22.4545898 C268.138672,21.7055664 267.749349,20.6031901 267.732422,19.1474609 L276.441406,19.1474609 C276.71224,19.1474609 276.915365,19.0670573 277.050781,18.90625 C277.186198,18.7454427 277.253906,18.5423177 277.253906,18.296875 C277.228516,17.4420573 277.105794,16.6570638 276.885742,15.9418945 C276.66569,15.2267253 276.335612,14.5792643 275.895508,13.9995117 C275.455404,13.4197591 274.862956,12.9648438 274.118164,12.6347656 C273.373372,12.3046875 272.505859,12.1396484 271.515625,12.1396484 C270.263021,12.1396484 269.166992,12.4443359 268.227539,13.0537109 C267.288086,13.6630859 266.585612,14.4544271 266.120117,15.4277344 C265.654622,16.4010417 265.421875,17.4759115 265.421875,18.6523438 C265.421875,19.9895833 265.699056,21.159668 266.253418,22.1625977 C266.80778,23.1655273 267.554688,23.9251302 268.494141,24.4414062 C269.433594,24.9576823 270.495768,25.2200521 271.680664,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M280.544922,25 C280.866536,25 281.143717,24.9047852 281.376465,24.7143555 C281.609212,24.5239258 281.725586,24.2594401 281.725586,23.9208984 L281.725586,17.7255859 C281.725586,16.4560547 282.055664,15.4679362 282.71582,14.7612305 C283.375977,14.0545247 284.256185,13.7011719 285.356445,13.7011719 C287.201497,13.7011719 288.124023,14.8691406 288.124023,17.2050781 L288.124023,23.9082031 C288.124023,24.2467448 288.236165,24.5133464 288.460449,24.7080078 C288.684733,24.9026693 288.949219,25 289.253906,25 C289.567057,25 289.837891,24.9026693 290.066406,24.7080078 C290.294922,24.5133464 290.40918,24.2467448 290.40918,23.9082031 L290.40918,17.7890625 C290.40918,16.4518229 290.751953,15.4361979 291.4375,14.7421875 C292.123047,14.0481771 292.994792,13.7011719 294.052734,13.7011719 C294.941406,13.7011719 295.626953,13.9698893 296.109375,14.5073242 C296.591797,15.0447591 296.833008,15.8382161 296.833008,16.8876953 L296.833008,23.9208984 C296.833008,24.2594401 296.943034,24.5239258 297.163086,24.7143555 C297.383138,24.9047852 297.64974,25 297.962891,25 C298.276042,25 298.548991,24.9047852 298.781738,24.7143555 C299.014486,24.5239258 299.130859,24.2594401 299.130859,23.9208984 L299.130859,16.9765625 C299.130859,16.1471354 299.010254,15.4150391 298.769043,14.7802734 C298.527832,14.1455078 298.193522,13.6398112 297.766113,13.2631836 C297.338704,12.886556 296.854167,12.6051432 296.3125,12.4189453 C295.770833,12.2327474 295.178385,12.1396484 294.535156,12.1396484 C293.417969,12.1396484 292.470052,12.387207 291.691406,12.8823242 C290.91276,13.3774414 290.345703,14.1158854 289.990234,15.0976562 C289.727865,14.1497396 289.228516,13.4197591 288.492188,12.9077148 C287.755859,12.3956706 286.862956,12.1396484 285.813477,12.1396484 C284.890951,12.1396484 284.069987,12.3491211 283.350586,12.7680664 C282.631185,13.1870117 282.089518,13.7858073 281.725586,14.5644531 L281.725586,13.3837891 C281.725586,13.070638 281.609212,12.8251953 281.376465,12.6474609 C281.143717,12.4697266 280.875,12.3808594 280.570312,12.3808594 C280.265625,12.3808594 279.996908,12.4760742 279.76416,12.6665039 C279.531413,12.8569336 279.415039,13.1171875 279.415039,13.4472656 L279.415039,23.9208984 C279.415039,24.2594401 279.525065,24.5239258 279.745117,24.7143555 C279.965169,24.9047852 280.231771,25 280.544922,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M308.09668,23.6542969 C305.472982,23.6542969 304.161133,21.9785156 304.161133,18.6269531 C304.161133,17.983724 304.233073,17.3785807 304.376953,16.8115234 C304.520833,16.2444661 304.73877,15.7239583 305.030762,15.25 C305.322754,14.7760417 305.72054,14.3994141 306.224121,14.1201172 C306.727702,13.8408203 307.313802,13.7011719 307.982422,13.7011719 C308.642578,13.7011719 309.220215,13.8492839 309.715332,14.1455078 C310.210449,14.4417318 310.593424,14.8374023 310.864258,15.3325195 C311.135091,15.8276367 311.3361,16.3544922 311.467285,16.9130859 C311.59847,17.4716797 311.664062,18.0556641 311.664062,18.6650391 C311.664062,19.257487 311.604818,19.8266602 311.486328,20.3725586 C311.367839,20.918457 311.181641,21.4453125 310.927734,21.953125 C310.673828,22.4609375 310.303548,22.8693034 309.816895,23.1782227 C309.330241,23.4871419 308.756836,23.6458333 308.09668,23.6542969 Z M308.50293,25.2285156 C309.383138,25.2285156 310.176595,25.0486654 310.883301,24.6889648 C311.590007,24.3292643 312.163411,23.8426107 312.603516,23.2290039 C313.04362,22.6153971 313.380046,21.9150391 313.612793,21.1279297 C313.84554,20.3408203 313.961914,19.5029297 313.961914,18.6142578 C313.961914,17.7086589 313.834961,16.8644206 313.581055,16.081543 C313.327148,15.2986654 312.965332,14.6131185 312.495605,14.0249023 C312.025879,13.4366862 311.429199,12.9754232 310.705566,12.6411133 C309.981934,12.3068034 309.180013,12.1396484 308.299805,12.1396484 C307.360352,12.1396484 306.528809,12.3427734 305.805176,12.7490234 C305.081543,13.1552734 304.529297,13.7434896 304.148438,14.5136719 L304.148438,7.92480469 C304.148438,7.59472656 304.03418,7.33447266 303.805664,7.14404297 C303.577148,6.95361328 303.306315,6.85839844 302.993164,6.85839844 C302.688477,6.85839844 302.421875,6.95361328 302.193359,7.14404297 C301.964844,7.33447266 301.850586,7.59472656 301.850586,7.92480469 L301.850586,23.8955078 C301.850586,24.2340495 301.964844,24.4985352 302.193359,24.6889648 C302.421875,24.8793945 302.688477,24.9746094 302.993164,24.9746094 C303.374023,24.9746094 303.661784,24.8793945 303.856445,24.6889648 C304.051107,24.4985352 304.148438,24.2382812 304.148438,23.9082031 L304.148438,22.7783203 C304.554688,23.5908203 305.136556,24.2023112 305.894043,24.612793 C306.65153,25.0232747 307.521159,25.2285156 308.50293,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M317.519531,17.7382812 C317.561849,17.2135417 317.673991,16.7163086 317.855957,16.246582 C318.037923,15.7768555 318.283366,15.3494466 318.592285,14.9643555 C318.901204,14.5792643 319.296875,14.2724609 319.779297,14.0439453 C320.261719,13.8154297 320.799154,13.7011719 321.391602,13.7011719 C322.53418,13.7011719 323.401693,14.0777995 323.994141,14.8310547 C324.586589,15.5843099 324.920898,16.5533854 324.99707,17.7382812 L317.519531,17.7382812 Z M321.429688,25.2285156 C323.325521,25.2285156 324.942057,24.6276042 326.279297,23.4257812 C326.516276,23.2141927 326.634766,22.9729818 326.634766,22.7021484 C326.634766,22.4905599 326.564941,22.3064779 326.425293,22.1499023 C326.285645,21.9933268 326.114258,21.9150391 325.911133,21.9150391 C325.741862,21.9150391 325.581055,21.9742839 325.428711,22.0927734 C324.819336,22.5582682 324.214193,22.9264323 323.613281,23.1972656 C323.01237,23.468099 322.335286,23.6035156 321.582031,23.6035156 C320.388672,23.5865885 319.411133,23.2036133 318.649414,22.4545898 C317.887695,21.7055664 317.498372,20.6031901 317.481445,19.1474609 L326.19043,19.1474609 C326.461263,19.1474609 326.664388,19.0670573 326.799805,18.90625 C326.935221,18.7454427 327.00293,18.5423177 327.00293,18.296875 C326.977539,17.4420573 326.854818,16.6570638 326.634766,15.9418945 C326.414714,15.2267253 326.084635,14.5792643 325.644531,13.9995117 C325.204427,13.4197591 324.611979,12.9648438 323.867188,12.6347656 C323.122396,12.3046875 322.254883,12.1396484 321.264648,12.1396484 C320.012044,12.1396484 318.916016,12.4443359 317.976562,13.0537109 C317.037109,13.6630859 316.334635,14.4544271 315.869141,15.4277344 C315.403646,16.4010417 315.170898,17.4759115 315.170898,18.6523438 C315.170898,19.9895833 315.448079,21.159668 316.002441,22.1625977 C316.556803,23.1655273 317.303711,23.9251302 318.243164,24.4414062 C319.182617,24.9576823 320.244792,25.2200521 321.429688,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M334.166016,23.6542969 C333.505859,23.6458333 332.932454,23.4871419 332.445801,23.1782227 C331.959147,22.8693034 331.588867,22.4609375 331.334961,21.953125 C331.081055,21.4453125 330.894857,20.918457 330.776367,20.3725586 C330.657878,19.8266602 330.598633,19.257487 330.598633,18.6650391 C330.598633,18.0556641 330.664225,17.4716797 330.79541,16.9130859 C330.926595,16.3544922 331.127604,15.8276367 331.398438,15.3325195 C331.669271,14.8374023 332.052246,14.4417318 332.547363,14.1455078 C333.04248,13.8492839 333.620117,13.7011719 334.280273,13.7011719 C334.948893,13.7011719 335.534993,13.8408203 336.038574,14.1201172 C336.542155,14.3994141 336.939941,14.7760417 337.231934,15.25 C337.523926,15.7239583 337.741862,16.2444661 337.885742,16.8115234 C338.029622,17.3785807 338.101562,17.983724 338.101562,18.6269531 C338.101562,21.9785156 336.789714,23.6542969 334.166016,23.6542969 Z M333.759766,25.2285156 C334.741536,25.2285156 335.611165,25.0232747 336.368652,24.612793 C337.126139,24.2023112 337.708008,23.5908203 338.114258,22.7783203 L338.114258,23.9082031 C338.114258,24.2382812 338.211589,24.4985352 338.40625,24.6889648 C338.600911,24.8793945 338.888672,24.9746094 339.269531,24.9746094 C339.574219,24.9746094 339.84082,24.8793945 340.069336,24.6889648 C340.297852,24.4985352 340.412109,24.2340495 340.412109,23.8955078 L340.412109,7.92480469 C340.412109,7.59472656 340.297852,7.33447266 340.069336,7.14404297 C339.84082,6.95361328 339.574219,6.85839844 339.269531,6.85839844 C338.95638,6.85839844 338.685547,6.95361328 338.457031,7.14404297 C338.228516,7.33447266 338.114258,7.59472656 338.114258,7.92480469 L338.114258,14.5136719 C337.733398,13.7434896 337.181152,13.1552734 336.45752,12.7490234 C335.733887,12.3427734 334.902344,12.1396484 333.962891,12.1396484 C333.082682,12.1396484 332.280762,12.3068034 331.557129,12.6411133 C330.833496,12.9754232 330.236816,13.4366862 329.76709,14.0249023 C329.297363,14.6131185 328.935547,15.2986654 328.681641,16.081543 C328.427734,16.8644206 328.300781,17.7086589 328.300781,18.6142578 C328.300781,19.5029297 328.417155,20.3408203 328.649902,21.1279297 C328.88265,21.9150391 329.219076,22.6153971 329.65918,23.2290039 C330.099284,23.8426107 330.672689,24.3292643 331.379395,24.6889648 C332.0861,25.0486654 332.879557,25.2285156 333.759766,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M343.588867,27.8691406 C344.003581,27.8691406 344.3125,27.6490885 344.515625,27.2089844 C345.336589,25.2792969 345.97347,23.4977214 346.42627,21.8642578 C346.879069,20.2307943 347.105469,18.6184896 347.105469,17.0273438 C347.105469,15.4446615 346.883301,13.8387044 346.438965,12.2094727 C345.994629,10.5802409 345.370443,8.79231771 344.566406,6.84570312 C344.329427,6.32096354 344.012044,6.05859375 343.614258,6.05859375 C343.368815,6.05859375 343.155111,6.14322917 342.973145,6.3125 C342.791178,6.48177083 342.700195,6.6891276 342.700195,6.93457031 C342.700195,7.06998698 342.734049,7.21809896 342.801758,7.37890625 C344.054362,10.8658854 344.680664,14.0735677 344.680664,17.0019531 C344.680664,19.9726562 344.058594,23.1507161 342.814453,26.5361328 C342.746745,26.7138672 342.712891,26.8704427 342.712891,27.0058594 C342.712891,27.2513021 342.799642,27.456543 342.973145,27.621582 C343.146647,27.7866211 343.351888,27.8691406 343.588867,27.8691406 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-10" transform="translate(0, 92)" xlink:href="#path-32" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M9.765625,25.2685547 C12.3291016,25.2685547 14.3452962,24.4588216 15.814209,22.8393555 C17.2831217,21.2198893 18.0175781,19.0348307 18.0175781,16.2841797 C18.0175781,13.5335286 17.2831217,11.3484701 15.814209,9.72900391 C14.3452962,8.10953776 12.3291016,7.29980469 9.765625,7.29980469 C7.20214844,7.29980469 5.18391927,8.10953776 3.7109375,9.72900391 C2.23795573,11.3484701 1.50146484,13.5335286 1.50146484,16.2841797 C1.50146484,19.0348307 2.23795573,21.2198893 3.7109375,22.8393555 C5.18391927,24.4588216 7.20214844,25.2685547 9.765625,25.2685547 Z M9.765625,23.2910156 C7.98339844,23.2910156 6.59586589,22.6664225 5.60302734,21.4172363 C4.6101888,20.1680501 4.11376953,18.4570312 4.11376953,16.2841797 C4.11376953,14.1113281 4.6081543,12.4003092 5.59692383,11.151123 C6.58569336,9.90193685 7.97526042,9.27734375 9.765625,9.27734375 C11.5478516,9.27734375 12.9333496,9.90193685 13.9221191,11.151123 C14.9108887,12.4003092 15.4052734,14.1113281 15.4052734,16.2841797 C15.4052734,18.4570312 14.9108887,20.1680501 13.9221191,21.4172363 C12.9333496,22.6664225 11.5478516,23.2910156 9.765625,23.2910156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M26.2705078,23.7060547 C25.6276042,23.7060547 25.0640462,23.5717773 24.579834,23.3032227 C24.0956217,23.034668 23.7131348,22.672526 23.432373,22.2167969 C23.1516113,21.7610677 22.9420573,21.2626139 22.8037109,20.7214355 C22.6653646,20.1802572 22.5961914,19.6004232 22.5961914,18.9819336 C22.5961914,15.7511393 23.8575846,14.1357422 26.3803711,14.1357422 C26.8930664,14.1438802 27.3508301,14.2496745 27.7536621,14.453125 C28.1564941,14.6565755 28.4840495,14.9169922 28.7363281,15.234375 C28.9886068,15.5517578 29.1981608,15.9261068 29.3649902,16.3574219 C29.5318197,16.788737 29.6477865,17.2159831 29.7128906,17.6391602 C29.7779948,18.0623372 29.8105469,18.4977214 29.8105469,18.9453125 C29.8105469,19.53125 29.7474772,20.0927734 29.6213379,20.6298828 C29.4951986,21.1669922 29.3019206,21.673584 29.0415039,22.1496582 C28.7810872,22.6257324 28.4128418,23.0041504 27.9367676,23.2849121 C27.4606934,23.5656738 26.9052734,23.7060547 26.2705078,23.7060547 Z M21.4731445,29.6386719 C21.7742513,29.6386719 22.034668,29.5471191 22.2543945,29.3640137 C22.4741211,29.1809082 22.5839844,28.9306641 22.5839844,28.6132812 L22.5839844,22.9370117 C22.9501953,23.6694336 23.4812012,24.2329915 24.177002,24.6276855 C24.8728027,25.0223796 25.6723633,25.2197266 26.5756836,25.2197266 C27.4220378,25.2197266 28.1931152,25.0590007 28.888916,24.7375488 C29.5847168,24.416097 30.1584473,23.9725749 30.6101074,23.4069824 C31.0617676,22.84139 31.409668,22.1822103 31.6538086,21.4294434 C31.8979492,20.6766764 32.0200195,19.8649089 32.0200195,18.9941406 C32.0200195,18.1396484 31.9081217,17.3339844 31.6843262,16.5771484 C31.4605306,15.8203125 31.1370443,15.1468913 30.7138672,14.5568848 C30.2906901,13.9668783 29.7393392,13.4989421 29.0598145,13.1530762 C28.3802897,12.8072103 27.6173503,12.6342773 26.7709961,12.6342773 C25.8269857,12.6342773 24.990804,12.8316243 24.2624512,13.2263184 C23.5340983,13.6210124 22.9746094,14.2089844 22.5839844,14.9902344 L22.5839844,13.9038086 C22.5839844,13.5864258 22.4903971,13.3361816 22.3032227,13.1530762 C22.1160482,12.9699707 21.8393555,12.878418 21.4731445,12.878418 C21.1720378,12.878418 20.9136556,12.9679362 20.697998,13.1469727 C20.4823405,13.3260091 20.3745117,13.5782878 20.3745117,13.9038086 L20.3745117,28.6132812 C20.3745117,28.9306641 20.484375,29.1809082 20.7041016,29.3640137 C20.9238281,29.5471191 21.1801758,29.6386719 21.4731445,29.6386719 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M35.4023438,18.0175781 C35.4430339,17.5130208 35.5508626,17.0349121 35.7258301,16.583252 C35.9007975,16.1315918 36.1368001,15.7206217 36.4338379,15.3503418 C36.7308757,14.9800618 37.1113281,14.6850586 37.5751953,14.465332 C38.0390625,14.2456055 38.5558268,14.1357422 39.1254883,14.1357422 C40.2241211,14.1357422 41.0582682,14.4978841 41.6279297,15.222168 C42.1975911,15.9464518 42.519043,16.8782552 42.5922852,18.0175781 L35.4023438,18.0175781 Z M39.1621094,25.2197266 C40.985026,25.2197266 42.539388,24.6419271 43.8251953,23.4863281 C44.0530599,23.2828776 44.1669922,23.050944 44.1669922,22.7905273 C44.1669922,22.5870768 44.0998535,22.4100749 43.9655762,22.2595215 C43.8312988,22.1089681 43.6665039,22.0336914 43.4711914,22.0336914 C43.308431,22.0336914 43.1538086,22.0906576 43.0073242,22.2045898 C42.4213867,22.652181 41.8395182,23.0061849 41.2617188,23.2666016 C40.6839193,23.5270182 40.0328776,23.6572266 39.3085938,23.6572266 C38.1611328,23.6409505 37.2211914,23.2727051 36.4887695,22.5524902 C35.7563477,21.8322754 35.3819987,20.7722982 35.3657227,19.3725586 L43.7397461,19.3725586 C44.0001628,19.3725586 44.1954753,19.2952474 44.3256836,19.140625 C44.4558919,18.9860026 44.5209961,18.7906901 44.5209961,18.5546875 C44.496582,17.7327474 44.3785807,16.977946 44.1669922,16.2902832 C43.9554036,15.6026204 43.6380208,14.9800618 43.2148438,14.4226074 C42.7916667,13.865153 42.2220052,13.4277344 41.5058594,13.1103516 C40.7897135,12.7929688 39.9555664,12.6342773 39.003418,12.6342773 C37.7989909,12.6342773 36.7451172,12.9272461 35.8417969,13.5131836 C34.9384766,14.0991211 34.2630208,14.860026 33.8154297,15.7958984 C33.3678385,16.7317708 33.144043,17.7652995 33.144043,18.8964844 C33.144043,20.1822917 33.4105632,21.307373 33.9436035,22.2717285 C34.4766439,23.236084 35.1948242,23.9664714 36.0981445,24.4628906 C37.0014648,24.9593099 38.0227865,25.2115885 39.1621094,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M47.6469727,25 C47.9480794,25 48.2105306,24.9064128 48.4343262,24.7192383 C48.6581217,24.5320638 48.7700195,24.2757161 48.7700195,23.9501953 L48.7700195,17.8588867 C48.8269857,16.7114258 49.1769206,15.8040365 49.8198242,15.1367188 C50.4627279,14.469401 51.2521159,14.1357422 52.1879883,14.1357422 C53.0668945,14.1357422 53.7484538,14.4083659 54.232666,14.9536133 C54.7168783,15.4988607 54.9589844,16.2923177 54.9589844,17.3339844 L54.9589844,23.9501953 C54.9589844,24.2838542 55.0668132,24.5422363 55.2824707,24.7253418 C55.4981283,24.9084473 55.7565104,25 56.0576172,25 C56.358724,25 56.6171061,24.9084473 56.8327637,24.7253418 C57.0484212,24.5422363 57.15625,24.2838542 57.15625,23.9501953 L57.15625,17.3583984 C57.15625,15.7714844 56.757487,14.5874023 55.9599609,13.8061523 C55.1624349,13.0249023 54.0922852,12.6342773 52.7495117,12.6342773 C51.8136393,12.6342773 50.9937337,12.8336589 50.2897949,13.2324219 C49.5858561,13.6311849 49.0792643,14.2008464 48.7700195,14.9414062 L48.7700195,13.8793945 C48.7700195,13.5620117 48.6621908,13.3158366 48.4465332,13.1408691 C48.2308757,12.9659017 47.9724935,12.878418 47.6713867,12.878418 C47.3702799,12.878418 47.1098633,12.9699707 46.8901367,13.1530762 C46.6704102,13.3361816 46.5605469,13.5904948 46.5605469,13.9160156 L46.5605469,23.9501953 C46.5605469,24.2838542 46.6663411,24.5422363 46.8779297,24.7253418 C47.0895182,24.9084473 47.3458659,25 47.6469727,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M72.2329102,24.2675781 C72.4363607,24.8046875 72.8107096,25.0732422 73.355957,25.0732422 C73.6733398,25.0732422 73.9581706,24.9715169 74.2104492,24.7680664 C74.4627279,24.5646159 74.5888672,24.3082682 74.5888672,23.9990234 C74.5888672,23.8688151 74.5563151,23.7182617 74.4912109,23.5473633 L68.6928711,9.15527344 C68.4161784,8.46354167 68.1171061,8.00577799 67.7956543,7.78198242 C67.4742025,7.55818685 67.0734049,7.44628906 66.5932617,7.44628906 C66.1131185,7.44628906 65.712321,7.55818685 65.3908691,7.78198242 C65.0694173,8.00577799 64.7703451,8.46354167 64.4936523,9.15527344 L58.6953125,23.5473633 C58.6302083,23.7182617 58.5976562,23.8688151 58.5976562,23.9990234 C58.5976562,24.3082682 58.7217611,24.5646159 58.9699707,24.7680664 C59.2181803,24.9715169 59.5009766,25.0732422 59.8183594,25.0732422 C60.3717448,25.0732422 60.7501628,24.8046875 60.9536133,24.2675781 L62.394043,20.5566406 L70.7924805,20.5566406 L72.2329102,24.2675781 Z M70.0966797,18.7866211 L63.0898438,18.7866211 L66.5932617,9.765625 L70.0966797,18.7866211 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M77.9956055,24.987793 C78.3374023,24.987793 78.6303711,24.8860677 78.8745117,24.6826172 C79.1186523,24.4791667 79.2407227,24.1902669 79.2407227,23.815918 L79.2407227,8.72802734 C79.2407227,8.35367839 79.1206868,8.06274414 78.8806152,7.85522461 C78.6405436,7.64770508 78.3536784,7.54394531 78.0200195,7.54394531 C77.6863607,7.54394531 77.3954264,7.64973958 77.1472168,7.86132812 C76.8990072,8.07291667 76.7749023,8.36181641 76.7749023,8.72802734 L76.7749023,23.815918 C76.7749023,24.1821289 76.8949382,24.4689941 77.1350098,24.6765137 C77.3750814,24.8840332 77.6619466,24.987793 77.9956055,24.987793 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M82.1469727,19.3969727 L87.5791016,19.3969727 C87.8639323,19.3969727 88.0979004,19.3196615 88.2810059,19.1650391 C88.4641113,19.0104167 88.5556641,18.8110352 88.5556641,18.5668945 C88.5556641,18.3064779 88.4661458,18.0969238 88.2871094,17.9382324 C88.1080729,17.779541 87.8720703,17.7001953 87.5791016,17.7001953 L82.1469727,17.7001953 C81.8540039,17.7001953 81.6180013,17.779541 81.4389648,17.9382324 C81.2599284,18.0969238 81.1704102,18.3064779 81.1704102,18.5668945 C81.1704102,18.8110352 81.2619629,19.0104167 81.4450684,19.1650391 C81.6281738,19.3196615 81.8621419,19.3969727 82.1469727,19.3969727 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M94.8310547,25.2319336 C96.4261068,25.2319336 97.6630859,24.8779297 98.5419922,24.1699219 C98.8186849,23.9420573 98.9570312,23.6938477 98.9570312,23.425293 C98.9570312,23.2381185 98.8919271,23.0814616 98.7617188,22.9553223 C98.6315104,22.8291829 98.472819,22.7661133 98.2856445,22.7661133 C98.1147461,22.7661133 97.9519857,22.8190104 97.7973633,22.9248047 C97.0486654,23.445638 96.116862,23.7060547 95.0019531,23.7060547 C94.3671875,23.7060547 93.8056641,23.5738118 93.3173828,23.3093262 C92.8291016,23.0448405 92.4384766,22.6867676 92.1455078,22.2351074 C91.8525391,21.7834473 91.6328125,21.2809245 91.4863281,20.7275391 C91.3398438,20.1741536 91.2666016,19.5882161 91.2666016,18.9697266 C91.2666016,17.4886068 91.6287435,16.3106283 92.3530273,15.435791 C93.0773112,14.5609538 94.0213216,14.1235352 95.1850586,14.1235352 C96.0802409,14.1235352 96.898112,14.3636068 97.6386719,14.84375 C97.8014323,14.9495443 97.9723307,15.0024414 98.1513672,15.0024414 C98.3466797,15.0024414 98.5114746,14.9414062 98.645752,14.8193359 C98.7800293,14.6972656 98.847168,14.5467122 98.847168,14.3676758 C98.847168,14.1153971 98.7088216,13.8793945 98.4321289,13.659668 C98.082194,13.3666992 97.6101888,13.1225586 97.0161133,12.9272461 C96.4220378,12.7319336 95.7750651,12.6342773 95.0751953,12.6342773 C93.8789062,12.6342773 92.8209635,12.9150391 91.9013672,13.4765625 C90.9817708,14.0380859 90.277832,14.7949219 89.7895508,15.7470703 C89.3012695,16.6992188 89.0571289,17.7612305 89.0571289,18.9331055 C89.0571289,20.7478841 89.5779622,22.2513835 90.6196289,23.4436035 C91.6612956,24.6358236 93.0651042,25.2319336 94.8310547,25.2319336 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M105.879395,25.2197266 C107.759277,25.2197266 109.248535,24.6297201 110.347168,23.449707 C111.445801,22.269694 111.995117,20.7641602 111.995117,18.9331055 C111.995117,17.0857747 111.443766,15.5741374 110.341064,14.3981934 C109.238363,13.2222493 107.751139,12.6342773 105.879395,12.6342773 C104.00765,12.6342773 102.520426,13.2242839 101.417725,14.4042969 C100.315023,15.5843099 99.7636719,17.0939128 99.7636719,18.9331055 C99.7636719,20.7641602 100.315023,22.269694 101.417725,23.449707 C102.520426,24.6297201 104.00765,25.2197266 105.879395,25.2197266 Z M105.85498,23.7060547 C104.634277,23.7060547 103.680094,23.2767741 102.992432,22.4182129 C102.304769,21.5596517 101.960938,20.3979492 101.960938,18.9331055 C101.960938,17.4519857 102.306803,16.2821452 102.998535,15.423584 C103.690267,14.5650228 104.650553,14.1357422 105.879395,14.1357422 C107.100098,14.1357422 108.05835,14.5670573 108.75415,15.4296875 C109.449951,16.2923177 109.797852,17.4601237 109.797852,18.9331055 C109.797852,20.4142253 109.45402,21.5799967 108.766357,22.4304199 C108.078695,23.2808431 107.108236,23.7060547 105.85498,23.7060547 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M114.815918,25 C115.125163,25 115.391683,24.9084473 115.615479,24.7253418 C115.839274,24.5422363 115.951172,24.2879232 115.951172,23.9624023 L115.951172,18.0053711 C115.951172,16.784668 116.268555,15.834554 116.90332,15.1550293 C117.538086,14.4755046 118.38444,14.1357422 119.442383,14.1357422 C121.216471,14.1357422 122.103516,15.2587891 122.103516,17.5048828 L122.103516,23.9501953 C122.103516,24.2757161 122.211344,24.5320638 122.427002,24.7192383 C122.64266,24.9064128 122.896973,25 123.189941,25 C123.491048,25 123.751465,24.9064128 123.971191,24.7192383 C124.190918,24.5320638 124.300781,24.2757161 124.300781,23.9501953 L124.300781,18.0664062 C124.300781,16.780599 124.630371,15.8040365 125.289551,15.1367188 C125.94873,14.469401 126.786947,14.1357422 127.804199,14.1357422 C128.658691,14.1357422 129.317871,14.3941243 129.781738,14.9108887 C130.245605,15.427653 130.477539,16.1905924 130.477539,17.199707 L130.477539,23.9624023 C130.477539,24.2879232 130.583333,24.5422363 130.794922,24.7253418 C131.00651,24.9084473 131.262858,25 131.563965,25 C131.865072,25 132.127523,24.9084473 132.351318,24.7253418 C132.575114,24.5422363 132.687012,24.2879232 132.687012,23.9624023 L132.687012,17.2851562 C132.687012,16.4876302 132.571045,15.7836914 132.339111,15.1733398 C132.107178,14.5629883 131.785726,14.0767415 131.374756,13.7145996 C130.963786,13.3524577 130.497884,13.0818685 129.977051,12.902832 C129.456217,12.7237956 128.886556,12.6342773 128.268066,12.6342773 C127.193848,12.6342773 126.282389,12.8723145 125.533691,13.3483887 C124.784993,13.8244629 124.239746,14.5345052 123.897949,15.4785156 C123.645671,14.5670573 123.165527,13.865153 122.45752,13.3728027 C121.749512,12.8804525 120.890951,12.6342773 119.881836,12.6342773 C118.994792,12.6342773 118.205404,12.8356934 117.513672,13.2385254 C116.82194,13.6413574 116.301107,14.2171224 115.951172,14.9658203 L115.951172,13.8305664 C115.951172,13.5294596 115.839274,13.293457 115.615479,13.1225586 C115.391683,12.9516602 115.133301,12.8662109 114.840332,12.8662109 C114.547363,12.8662109 114.288981,12.9577637 114.065186,13.1408691 C113.84139,13.3239746 113.729492,13.5742188 113.729492,13.8916016 L113.729492,23.9624023 C113.729492,24.2879232 113.835286,24.5422363 114.046875,24.7253418 C114.258464,24.9084473 114.514811,25 114.815918,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M141.159668,23.7060547 C140.516764,23.7060547 139.953206,23.5717773 139.468994,23.3032227 C138.984782,23.034668 138.602295,22.672526 138.321533,22.2167969 C138.040771,21.7610677 137.831217,21.2626139 137.692871,20.7214355 C137.554525,20.1802572 137.485352,19.6004232 137.485352,18.9819336 C137.485352,15.7511393 138.746745,14.1357422 141.269531,14.1357422 C141.782227,14.1438802 142.23999,14.2496745 142.642822,14.453125 C143.045654,14.6565755 143.37321,14.9169922 143.625488,15.234375 C143.877767,15.5517578 144.087321,15.9261068 144.25415,16.3574219 C144.42098,16.788737 144.536947,17.2159831 144.602051,17.6391602 C144.667155,18.0623372 144.699707,18.4977214 144.699707,18.9453125 C144.699707,19.53125 144.636637,20.0927734 144.510498,20.6298828 C144.384359,21.1669922 144.191081,21.673584 143.930664,22.1496582 C143.670247,22.6257324 143.302002,23.0041504 142.825928,23.2849121 C142.349854,23.5656738 141.794434,23.7060547 141.159668,23.7060547 Z M136.362305,29.6386719 C136.663411,29.6386719 136.923828,29.5471191 137.143555,29.3640137 C137.363281,29.1809082 137.473145,28.9306641 137.473145,28.6132812 L137.473145,22.9370117 C137.839355,23.6694336 138.370361,24.2329915 139.066162,24.6276855 C139.761963,25.0223796 140.561523,25.2197266 141.464844,25.2197266 C142.311198,25.2197266 143.082275,25.0590007 143.778076,24.7375488 C144.473877,24.416097 145.047607,23.9725749 145.499268,23.4069824 C145.950928,22.84139 146.298828,22.1822103 146.542969,21.4294434 C146.787109,20.6766764 146.90918,19.8649089 146.90918,18.9941406 C146.90918,18.1396484 146.797282,17.3339844 146.573486,16.5771484 C146.349691,15.8203125 146.026204,15.1468913 145.603027,14.5568848 C145.17985,13.9668783 144.628499,13.4989421 143.948975,13.1530762 C143.26945,12.8072103 142.50651,12.6342773 141.660156,12.6342773 C140.716146,12.6342773 139.879964,12.8316243 139.151611,13.2263184 C138.423258,13.6210124 137.86377,14.2089844 137.473145,14.9902344 L137.473145,13.9038086 C137.473145,13.5864258 137.379557,13.3361816 137.192383,13.1530762 C137.005208,12.9699707 136.728516,12.878418 136.362305,12.878418 C136.061198,12.878418 135.802816,12.9679362 135.587158,13.1469727 C135.371501,13.3260091 135.263672,13.5782878 135.263672,13.9038086 L135.263672,28.6132812 C135.263672,28.9306641 135.373535,29.1809082 135.593262,29.3640137 C135.812988,29.5471191 136.069336,29.6386719 136.362305,29.6386719 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M152.305664,23.7548828 C151.548828,23.7548828 150.950684,23.5819499 150.51123,23.236084 C150.071777,22.8902181 149.852051,22.3470052 149.852051,21.6064453 C149.852051,21.1995443 149.911051,20.8658854 150.029053,20.6054688 C150.147054,20.3450521 150.328125,20.123291 150.572266,19.9401855 C150.816406,19.7570801 151.182617,19.6187337 151.670898,19.5251465 C152.15918,19.4315592 152.720703,19.3664551 153.355469,19.329834 C153.990234,19.2932129 154.812174,19.2749023 155.821289,19.2749023 L155.821289,19.6411133 C155.821289,20.8374023 155.46932,21.8221029 154.765381,22.5952148 C154.061442,23.3683268 153.241536,23.7548828 152.305664,23.7548828 Z M152.07373,25.2197266 C153.823405,25.2197266 155.07666,24.4262695 155.833496,22.8393555 L155.833496,24.0112305 C155.833496,24.3286133 155.937256,24.5747884 156.144775,24.7497559 C156.352295,24.9247233 156.602539,25.012207 156.895508,25.012207 C157.188477,25.012207 157.446859,24.9206543 157.670654,24.7375488 C157.89445,24.5544434 158.006348,24.3041992 158.006348,23.9868164 L158.006348,16.8701172 C158.006348,15.4296875 157.585205,14.3636068 156.74292,13.671875 C155.900635,12.9801432 154.734863,12.6342773 153.245605,12.6342773 C151.504069,12.6342773 150.002604,12.9882812 148.741211,13.6962891 C148.513346,13.8264974 148.399414,14.0136719 148.399414,14.2578125 C148.399414,14.469401 148.476725,14.662679 148.631348,14.8376465 C148.78597,15.0126139 148.969076,15.1000977 149.180664,15.1000977 C149.294596,15.1000977 149.392253,15.0797526 149.473633,15.0390625 C149.888672,14.851888 150.238607,14.7054036 150.523438,14.5996094 C150.808268,14.4938151 151.198893,14.3880208 151.695312,14.2822266 C152.191732,14.1764323 152.688151,14.1235352 153.18457,14.1235352 C154.014648,14.1235352 154.661621,14.3310547 155.125488,14.7460938 C155.589355,15.1611328 155.821289,15.8040365 155.821289,16.6748047 L155.821289,18.0053711 C154.917969,18.0053711 154.146891,18.0175781 153.508057,18.0419922 C152.869222,18.0664062 152.248698,18.1091309 151.646484,18.170166 C151.044271,18.2312012 150.547852,18.3207194 150.157227,18.4387207 C149.766602,18.556722 149.408529,18.7072754 149.083008,18.8903809 C148.757487,19.0734863 148.505208,19.2972819 148.326172,19.5617676 C148.147135,19.8262533 148.008789,20.1334635 147.911133,20.4833984 C147.813477,20.8333333 147.764648,21.2402344 147.764648,21.7041016 C147.764648,22.8271484 148.163411,23.6938477 148.960938,24.3041992 C149.758464,24.9145508 150.796061,25.2197266 152.07373,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M164.525879,25.1708984 L165.246094,25.1708984 C165.539062,25.1708984 165.7771,25.1627604 165.960205,25.1464844 C166.143311,25.1302083 166.326416,25.0935872 166.509521,25.0366211 C166.692627,24.9796549 166.828939,24.8881022 166.918457,24.7619629 C167.007975,24.6358236 167.052734,24.4710286 167.052734,24.2675781 C167.052734,24.0397135 166.975423,23.840332 166.820801,23.6694336 C166.666178,23.4985352 166.45459,23.4130859 166.186035,23.4130859 L166.112793,23.4130859 L165.282715,23.449707 L165.148438,23.449707 C164.635742,23.449707 164.261393,23.2645671 164.025391,22.8942871 C163.789388,22.5240072 163.671387,21.9197591 163.671387,21.081543 L163.671387,14.4287109 L165.91748,14.4287109 C166.45459,14.4287109 166.723145,14.1967773 166.723145,13.7329102 C166.723145,13.5131836 166.656006,13.3341471 166.521729,13.1958008 C166.387451,13.0574544 166.186035,12.9882812 165.91748,12.9882812 L163.671387,12.9882812 L163.671387,9.44824219 C163.671387,9.16341146 163.590007,8.9457194 163.427246,8.79516602 C163.264486,8.64461263 163.061035,8.56933594 162.816895,8.56933594 C162.556478,8.56933594 162.318441,8.66292318 162.102783,8.85009766 C161.887126,9.03727214 161.771159,9.26106771 161.754883,9.52148438 L161.461914,12.9882812 L159.960449,12.9882812 C159.691895,12.9882812 159.488444,13.0554199 159.350098,13.1896973 C159.211751,13.3239746 159.142578,13.4969076 159.142578,13.7084961 C159.142578,13.9282227 159.213786,14.1031901 159.356201,14.2333984 C159.498617,14.3636068 159.704102,14.4287109 159.972656,14.4287109 L161.461914,14.4287109 L161.461914,21.4599609 C161.461914,22.7213542 161.738607,23.6551921 162.291992,24.2614746 C162.845378,24.8677572 163.590007,25.1708984 164.525879,25.1708984 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M169.849121,10.0463867 C170.280436,10.0463867 170.626302,9.9222819 170.886719,9.67407227 C171.147135,9.42586263 171.277344,9.1023763 171.277344,8.70361328 C171.277344,8.29671224 171.147135,7.9691569 170.886719,7.72094727 C170.626302,7.47273763 170.284505,7.34863281 169.861328,7.34863281 C169.430013,7.34863281 169.082113,7.47477214 168.817627,7.72705078 C168.553141,7.97932943 168.420898,8.30485026 168.420898,8.70361328 C168.420898,9.1023763 168.551107,9.42586263 168.811523,9.67407227 C169.07194,9.9222819 169.417806,10.0463867 169.849121,10.0463867 Z M169.836914,25 C170.138021,25 170.400472,24.9003092 170.624268,24.7009277 C170.848063,24.5015462 170.959961,24.230957 170.959961,23.8891602 L170.959961,13.9648438 C170.959961,13.6230469 170.852132,13.3544922 170.636475,13.1591797 C170.420817,12.9638672 170.166504,12.8662109 169.873535,12.8662109 C169.572428,12.8662109 169.309977,12.9638672 169.086182,13.1591797 C168.862386,13.3544922 168.750488,13.6230469 168.750488,13.9648438 L168.750488,23.8891602 C168.750488,24.2472331 168.856283,24.5218913 169.067871,24.7131348 C169.27946,24.9043783 169.535807,25 169.836914,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M179.615723,23.7060547 C177.092936,23.7060547 175.831543,22.0947266 175.831543,18.8720703 C175.831543,18.2535807 175.900716,17.6717122 176.039062,17.1264648 C176.177409,16.5812174 176.386963,16.0807292 176.667725,15.625 C176.948486,15.1692708 177.330973,14.8071289 177.815186,14.5385742 C178.299398,14.2700195 178.862956,14.1357422 179.505859,14.1357422 C180.140625,14.1357422 180.696045,14.2781576 181.172119,14.5629883 C181.648193,14.847819 182.016439,15.2282715 182.276855,15.7043457 C182.537272,16.1804199 182.73055,16.6870117 182.856689,17.2241211 C182.982829,17.7612305 183.045898,18.3227539 183.045898,18.9086914 C183.045898,19.4783529 182.988932,20.0256348 182.875,20.5505371 C182.761068,21.0754395 182.582031,21.5820312 182.337891,22.0703125 C182.09375,22.5585938 181.737712,22.9512533 181.269775,23.248291 C180.801839,23.5453288 180.250488,23.6979167 179.615723,23.7060547 Z M180.006348,25.2197266 C180.852702,25.2197266 181.615641,25.0467936 182.295166,24.7009277 C182.974691,24.3550618 183.526042,23.8871257 183.949219,23.2971191 C184.372396,22.7071126 184.695882,22.0336914 184.919678,21.2768555 C185.143473,20.5200195 185.255371,19.7143555 185.255371,18.8598633 C185.255371,17.9890951 185.133301,17.1773275 184.88916,16.4245605 C184.64502,15.6717936 184.297119,15.0126139 183.845459,14.4470215 C183.393799,13.881429 182.820068,13.4379069 182.124268,13.1164551 C181.428467,12.7950033 180.657389,12.6342773 179.811035,12.6342773 C178.907715,12.6342773 178.108154,12.8295898 177.412354,13.2202148 C176.716553,13.6108398 176.185547,14.1764323 175.819336,14.9169922 L175.819336,8.58154297 C175.819336,8.26416016 175.709473,8.01391602 175.489746,7.83081055 C175.27002,7.64770508 175.009603,7.55615234 174.708496,7.55615234 C174.415527,7.55615234 174.15918,7.64770508 173.939453,7.83081055 C173.719727,8.01391602 173.609863,8.26416016 173.609863,8.58154297 L173.609863,23.9379883 C173.609863,24.2635091 173.719727,24.5178223 173.939453,24.7009277 C174.15918,24.8840332 174.415527,24.9755859 174.708496,24.9755859 C175.074707,24.9755859 175.3514,24.8840332 175.538574,24.7009277 C175.725749,24.5178223 175.819336,24.2675781 175.819336,23.9501953 L175.819336,22.8637695 C176.209961,23.6450195 176.76945,24.2329915 177.497803,24.6276855 C178.226156,25.0223796 179.062337,25.2197266 180.006348,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M188.381348,25 C188.682454,25 188.944906,24.9003092 189.168701,24.7009277 C189.392497,24.5015462 189.504395,24.230957 189.504395,23.8891602 L189.504395,8.66699219 C189.504395,8.32519531 189.396566,8.05664062 189.180908,7.86132812 C188.965251,7.66601562 188.710938,7.56835938 188.417969,7.56835938 C188.116862,7.56835938 187.854411,7.66601562 187.630615,7.86132812 C187.40682,8.05664062 187.294922,8.32519531 187.294922,8.66699219 L187.294922,23.8891602 C187.294922,24.2472331 187.400716,24.5218913 187.612305,24.7131348 C187.823893,24.9043783 188.080241,25 188.381348,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M193.716797,18.0175781 C193.757487,17.5130208 193.865316,17.0349121 194.040283,16.583252 C194.215251,16.1315918 194.451253,15.7206217 194.748291,15.3503418 C195.045329,14.9800618 195.425781,14.6850586 195.889648,14.465332 C196.353516,14.2456055 196.87028,14.1357422 197.439941,14.1357422 C198.538574,14.1357422 199.372721,14.4978841 199.942383,15.222168 C200.512044,15.9464518 200.833496,16.8782552 200.906738,18.0175781 L193.716797,18.0175781 Z M197.476562,25.2197266 C199.299479,25.2197266 200.853841,24.6419271 202.139648,23.4863281 C202.367513,23.2828776 202.481445,23.050944 202.481445,22.7905273 C202.481445,22.5870768 202.414307,22.4100749 202.280029,22.2595215 C202.145752,22.1089681 201.980957,22.0336914 201.785645,22.0336914 C201.622884,22.0336914 201.468262,22.0906576 201.321777,22.2045898 C200.73584,22.652181 200.153971,23.0061849 199.576172,23.2666016 C198.998372,23.5270182 198.347331,23.6572266 197.623047,23.6572266 C196.475586,23.6409505 195.535645,23.2727051 194.803223,22.5524902 C194.070801,21.8322754 193.696452,20.7722982 193.680176,19.3725586 L202.054199,19.3725586 C202.314616,19.3725586 202.509928,19.2952474 202.640137,19.140625 C202.770345,18.9860026 202.835449,18.7906901 202.835449,18.5546875 C202.811035,17.7327474 202.693034,16.977946 202.481445,16.2902832 C202.269857,15.6026204 201.952474,14.9800618 201.529297,14.4226074 C201.10612,13.865153 200.536458,13.4277344 199.820312,13.1103516 C199.104167,12.7929688 198.27002,12.6342773 197.317871,12.6342773 C196.113444,12.6342773 195.05957,12.9272461 194.15625,13.5131836 C193.25293,14.0991211 192.577474,14.860026 192.129883,15.7958984 C191.682292,16.7317708 191.458496,17.7652995 191.458496,18.8964844 C191.458496,20.1822917 191.725016,21.307373 192.258057,22.2717285 C192.791097,23.236084 193.509277,23.9664714 194.412598,24.4628906 C195.315918,24.9593099 196.33724,25.2115885 197.476562,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M212.493164,25 L221.794922,25 C222.104167,25 222.344238,24.9043783 222.515137,24.7131348 C222.686035,24.5218913 222.771484,24.2919922 222.771484,24.0234375 C222.771484,23.7548828 222.686035,23.5229492 222.515137,23.3276367 C222.344238,23.1323242 222.104167,23.034668 221.794922,23.034668 L213.738281,23.034668 L213.738281,16.9921875 L221.0625,16.9921875 C221.371745,16.9921875 221.613851,16.8965658 221.788818,16.7053223 C221.963786,16.5140788 222.05127,16.2882487 222.05127,16.027832 C222.05127,15.7674154 221.96582,15.5436198 221.794922,15.3564453 C221.624023,15.1692708 221.379883,15.0756836 221.0625,15.0756836 L213.738281,15.0756836 L213.738281,9.52148438 L221.477539,9.52148438 C221.786784,9.52148438 222.026855,9.42382812 222.197754,9.22851562 C222.368652,9.03320312 222.454102,8.80126953 222.454102,8.53271484 C222.454102,8.26416016 222.368652,8.03426107 222.197754,7.84301758 C222.026855,7.65177409 221.786784,7.55615234 221.477539,7.55615234 L212.432129,7.55615234 C212.106608,7.55615234 211.83195,7.67618815 211.608154,7.91625977 C211.384359,8.15633138 211.272461,8.43505859 211.272461,8.75244141 L211.272461,23.8037109 C211.272461,24.1210938 211.396566,24.399821 211.644775,24.6398926 C211.892985,24.8799642 212.175781,25 212.493164,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M226.202637,25 C226.511882,25 226.778402,24.9084473 227.002197,24.7253418 C227.225993,24.5422363 227.337891,24.2879232 227.337891,23.9624023 L227.337891,18.0053711 C227.337891,16.784668 227.655273,15.834554 228.290039,15.1550293 C228.924805,14.4755046 229.771159,14.1357422 230.829102,14.1357422 C232.60319,14.1357422 233.490234,15.2587891 233.490234,17.5048828 L233.490234,23.9501953 C233.490234,24.2757161 233.598063,24.5320638 233.813721,24.7192383 C234.029378,24.9064128 234.283691,25 234.57666,25 C234.877767,25 235.138184,24.9064128 235.35791,24.7192383 C235.577637,24.5320638 235.6875,24.2757161 235.6875,23.9501953 L235.6875,18.0664062 C235.6875,16.780599 236.01709,15.8040365 236.67627,15.1367188 C237.335449,14.469401 238.173665,14.1357422 239.190918,14.1357422 C240.04541,14.1357422 240.70459,14.3941243 241.168457,14.9108887 C241.632324,15.427653 241.864258,16.1905924 241.864258,17.199707 L241.864258,23.9624023 C241.864258,24.2879232 241.970052,24.5422363 242.181641,24.7253418 C242.393229,24.9084473 242.649577,25 242.950684,25 C243.25179,25 243.514242,24.9084473 243.738037,24.7253418 C243.961833,24.5422363 244.07373,24.2879232 244.07373,23.9624023 L244.07373,17.2851562 C244.07373,16.4876302 243.957764,15.7836914 243.72583,15.1733398 C243.493896,14.5629883 243.172445,14.0767415 242.761475,13.7145996 C242.350505,13.3524577 241.884603,13.0818685 241.36377,12.902832 C240.842936,12.7237956 240.273275,12.6342773 239.654785,12.6342773 C238.580566,12.6342773 237.669108,12.8723145 236.92041,13.3483887 C236.171712,13.8244629 235.626465,14.5345052 235.284668,15.4785156 C235.032389,14.5670573 234.552246,13.865153 233.844238,13.3728027 C233.13623,12.8804525 232.277669,12.6342773 231.268555,12.6342773 C230.38151,12.6342773 229.592122,12.8356934 228.900391,13.2385254 C228.208659,13.6413574 227.687826,14.2171224 227.337891,14.9658203 L227.337891,13.8305664 C227.337891,13.5294596 227.225993,13.293457 227.002197,13.1225586 C226.778402,12.9516602 226.52002,12.8662109 226.227051,12.8662109 C225.934082,12.8662109 225.6757,12.9577637 225.451904,13.1408691 C225.228109,13.3239746 225.116211,13.5742188 225.116211,13.8916016 L225.116211,23.9624023 C225.116211,24.2879232 225.222005,24.5422363 225.433594,24.7253418 C225.645182,24.9084473 225.90153,25 226.202637,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M252.65625,23.7060547 C250.133464,23.7060547 248.87207,22.0947266 248.87207,18.8720703 C248.87207,18.2535807 248.941243,17.6717122 249.07959,17.1264648 C249.217936,16.5812174 249.42749,16.0807292 249.708252,15.625 C249.989014,15.1692708 250.371501,14.8071289 250.855713,14.5385742 C251.339925,14.2700195 251.903483,14.1357422 252.546387,14.1357422 C253.181152,14.1357422 253.736572,14.2781576 254.212646,14.5629883 C254.688721,14.847819 255.056966,15.2282715 255.317383,15.7043457 C255.577799,16.1804199 255.771077,16.6870117 255.897217,17.2241211 C256.023356,17.7612305 256.086426,18.3227539 256.086426,18.9086914 C256.086426,19.4783529 256.02946,20.0256348 255.915527,20.5505371 C255.801595,21.0754395 255.622559,21.5820312 255.378418,22.0703125 C255.134277,22.5585938 254.778239,22.9512533 254.310303,23.248291 C253.842367,23.5453288 253.291016,23.6979167 252.65625,23.7060547 Z M253.046875,25.2197266 C253.893229,25.2197266 254.656169,25.0467936 255.335693,24.7009277 C256.015218,24.3550618 256.566569,23.8871257 256.989746,23.2971191 C257.412923,22.7071126 257.73641,22.0336914 257.960205,21.2768555 C258.184001,20.5200195 258.295898,19.7143555 258.295898,18.8598633 C258.295898,17.9890951 258.173828,17.1773275 257.929688,16.4245605 C257.685547,15.6717936 257.337646,15.0126139 256.885986,14.4470215 C256.434326,13.881429 255.860596,13.4379069 255.164795,13.1164551 C254.468994,12.7950033 253.697917,12.6342773 252.851562,12.6342773 C251.948242,12.6342773 251.148682,12.8295898 250.452881,13.2202148 C249.75708,13.6108398 249.226074,14.1764323 248.859863,14.9169922 L248.859863,8.58154297 C248.859863,8.26416016 248.75,8.01391602 248.530273,7.83081055 C248.310547,7.64770508 248.05013,7.55615234 247.749023,7.55615234 C247.456055,7.55615234 247.199707,7.64770508 246.97998,7.83081055 C246.760254,8.01391602 246.650391,8.26416016 246.650391,8.58154297 L246.650391,23.9379883 C246.650391,24.2635091 246.760254,24.5178223 246.97998,24.7009277 C247.199707,24.8840332 247.456055,24.9755859 247.749023,24.9755859 C248.115234,24.9755859 248.391927,24.8840332 248.579102,24.7009277 C248.766276,24.5178223 248.859863,24.2675781 248.859863,23.9501953 L248.859863,22.8637695 C249.250488,23.6450195 249.809977,24.2329915 250.53833,24.6276855 C251.266683,25.0223796 252.102865,25.2197266 253.046875,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M261.678223,18.0175781 C261.718913,17.5130208 261.826742,17.0349121 262.001709,16.583252 C262.176676,16.1315918 262.412679,15.7206217 262.709717,15.3503418 C263.006755,14.9800618 263.387207,14.6850586 263.851074,14.465332 C264.314941,14.2456055 264.831706,14.1357422 265.401367,14.1357422 C266.5,14.1357422 267.334147,14.4978841 267.903809,15.222168 C268.47347,15.9464518 268.794922,16.8782552 268.868164,18.0175781 L261.678223,18.0175781 Z M265.437988,25.2197266 C267.260905,25.2197266 268.815267,24.6419271 270.101074,23.4863281 C270.328939,23.2828776 270.442871,23.050944 270.442871,22.7905273 C270.442871,22.5870768 270.375732,22.4100749 270.241455,22.2595215 C270.107178,22.1089681 269.942383,22.0336914 269.74707,22.0336914 C269.58431,22.0336914 269.429688,22.0906576 269.283203,22.2045898 C268.697266,22.652181 268.115397,23.0061849 267.537598,23.2666016 C266.959798,23.5270182 266.308757,23.6572266 265.584473,23.6572266 C264.437012,23.6409505 263.49707,23.2727051 262.764648,22.5524902 C262.032227,21.8322754 261.657878,20.7722982 261.641602,19.3725586 L270.015625,19.3725586 C270.276042,19.3725586 270.471354,19.2952474 270.601562,19.140625 C270.731771,18.9860026 270.796875,18.7906901 270.796875,18.5546875 C270.772461,17.7327474 270.65446,16.977946 270.442871,16.2902832 C270.231283,15.6026204 269.9139,14.9800618 269.490723,14.4226074 C269.067546,13.865153 268.497884,13.4277344 267.781738,13.1103516 C267.065592,12.7929688 266.231445,12.6342773 265.279297,12.6342773 C264.07487,12.6342773 263.020996,12.9272461 262.117676,13.5131836 C261.214355,14.0991211 260.5389,14.860026 260.091309,15.7958984 C259.643717,16.7317708 259.419922,17.7652995 259.419922,18.8964844 C259.419922,20.1822917 259.686442,21.307373 260.219482,22.2717285 C260.752523,23.236084 261.470703,23.9664714 262.374023,24.4628906 C263.277344,24.9593099 264.298665,25.2115885 265.437988,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M277.645996,23.7060547 C277.01123,23.6979167 276.45988,23.5453288 275.991943,23.248291 C275.524007,22.9512533 275.167969,22.5585938 274.923828,22.0703125 C274.679688,21.5820312 274.500651,21.0754395 274.386719,20.5505371 C274.272786,20.0256348 274.21582,19.4783529 274.21582,18.9086914 C274.21582,18.3227539 274.27889,17.7612305 274.405029,17.2241211 C274.531169,16.6870117 274.724447,16.1804199 274.984863,15.7043457 C275.24528,15.2282715 275.613525,14.847819 276.0896,14.5629883 C276.565674,14.2781576 277.121094,14.1357422 277.755859,14.1357422 C278.398763,14.1357422 278.962321,14.2700195 279.446533,14.5385742 C279.930745,14.8071289 280.313232,15.1692708 280.593994,15.625 C280.874756,16.0807292 281.08431,16.5812174 281.222656,17.1264648 C281.361003,17.6717122 281.430176,18.2535807 281.430176,18.8720703 C281.430176,22.0947266 280.168783,23.7060547 277.645996,23.7060547 Z M277.255371,25.2197266 C278.199382,25.2197266 279.035563,25.0223796 279.763916,24.6276855 C280.492269,24.2329915 281.051758,23.6450195 281.442383,22.8637695 L281.442383,23.9501953 C281.442383,24.2675781 281.53597,24.5178223 281.723145,24.7009277 C281.910319,24.8840332 282.187012,24.9755859 282.553223,24.9755859 C282.846191,24.9755859 283.102539,24.8840332 283.322266,24.7009277 C283.541992,24.5178223 283.651855,24.2635091 283.651855,23.9379883 L283.651855,8.58154297 C283.651855,8.26416016 283.541992,8.01391602 283.322266,7.83081055 C283.102539,7.64770508 282.846191,7.55615234 282.553223,7.55615234 C282.252116,7.55615234 281.991699,7.64770508 281.771973,7.83081055 C281.552246,8.01391602 281.442383,8.26416016 281.442383,8.58154297 L281.442383,14.9169922 C281.076172,14.1764323 280.545166,13.6108398 279.849365,13.2202148 C279.153564,12.8295898 278.354004,12.6342773 277.450684,12.6342773 C276.604329,12.6342773 275.833252,12.7950033 275.137451,13.1164551 C274.44165,13.4379069 273.86792,13.881429 273.41626,14.4470215 C272.9646,15.0126139 272.616699,15.6717936 272.372559,16.4245605 C272.128418,17.1773275 272.006348,17.9890951 272.006348,18.8598633 C272.006348,19.7143555 272.118245,20.5200195 272.342041,21.2768555 C272.565837,22.0336914 272.889323,22.7071126 273.3125,23.2971191 C273.735677,23.8871257 274.287028,24.3550618 274.966553,24.7009277 C275.646077,25.0467936 276.409017,25.2197266 277.255371,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M291.172363,23.7060547 C290.537598,23.6979167 289.986247,23.5453288 289.518311,23.248291 C289.050374,22.9512533 288.694336,22.5585938 288.450195,22.0703125 C288.206055,21.5820312 288.027018,21.0754395 287.913086,20.5505371 C287.799154,20.0256348 287.742188,19.4783529 287.742188,18.9086914 C287.742188,18.3227539 287.805257,17.7612305 287.931396,17.2241211 C288.057536,16.6870117 288.250814,16.1804199 288.51123,15.7043457 C288.771647,15.2282715 289.139893,14.847819 289.615967,14.5629883 C290.092041,14.2781576 290.647461,14.1357422 291.282227,14.1357422 C291.92513,14.1357422 292.488688,14.2700195 292.9729,14.5385742 C293.457113,14.8071289 293.8396,15.1692708 294.120361,15.625 C294.401123,16.0807292 294.610677,16.5812174 294.749023,17.1264648 C294.88737,17.6717122 294.956543,18.2535807 294.956543,18.8720703 C294.956543,22.0947266 293.69515,23.7060547 291.172363,23.7060547 Z M290.781738,25.2197266 C291.725749,25.2197266 292.56193,25.0223796 293.290283,24.6276855 C294.018636,24.2329915 294.578125,23.6450195 294.96875,22.8637695 L294.96875,23.9501953 C294.96875,24.2675781 295.062337,24.5178223 295.249512,24.7009277 C295.436686,24.8840332 295.713379,24.9755859 296.07959,24.9755859 C296.372559,24.9755859 296.628906,24.8840332 296.848633,24.7009277 C297.068359,24.5178223 297.178223,24.2635091 297.178223,23.9379883 L297.178223,8.58154297 C297.178223,8.26416016 297.068359,8.01391602 296.848633,7.83081055 C296.628906,7.64770508 296.372559,7.55615234 296.07959,7.55615234 C295.778483,7.55615234 295.518066,7.64770508 295.29834,7.83081055 C295.078613,8.01391602 294.96875,8.26416016 294.96875,8.58154297 L294.96875,14.9169922 C294.602539,14.1764323 294.071533,13.6108398 293.375732,13.2202148 C292.679932,12.8295898 291.880371,12.6342773 290.977051,12.6342773 C290.130697,12.6342773 289.359619,12.7950033 288.663818,13.1164551 C287.968018,13.4379069 287.394287,13.881429 286.942627,14.4470215 C286.490967,15.0126139 286.143066,15.6717936 285.898926,16.4245605 C285.654785,17.1773275 285.532715,17.9890951 285.532715,18.8598633 C285.532715,19.7143555 285.644613,20.5200195 285.868408,21.2768555 C286.092204,22.0336914 286.41569,22.7071126 286.838867,23.2971191 C287.262044,23.8871257 287.813395,24.3550618 288.49292,24.7009277 C289.172445,25.0467936 289.935384,25.2197266 290.781738,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M301.12207,10.0463867 C301.553385,10.0463867 301.899251,9.9222819 302.159668,9.67407227 C302.420085,9.42586263 302.550293,9.1023763 302.550293,8.70361328 C302.550293,8.29671224 302.420085,7.9691569 302.159668,7.72094727 C301.899251,7.47273763 301.557454,7.34863281 301.134277,7.34863281 C300.702962,7.34863281 300.355062,7.47477214 300.090576,7.72705078 C299.82609,7.97932943 299.693848,8.30485026 299.693848,8.70361328 C299.693848,9.1023763 299.824056,9.42586263 300.084473,9.67407227 C300.344889,9.9222819 300.690755,10.0463867 301.12207,10.0463867 Z M301.109863,25 C301.41097,25 301.673421,24.9003092 301.897217,24.7009277 C302.121012,24.5015462 302.23291,24.230957 302.23291,23.8891602 L302.23291,13.9648438 C302.23291,13.6230469 302.125081,13.3544922 301.909424,13.1591797 C301.693766,12.9638672 301.439453,12.8662109 301.146484,12.8662109 C300.845378,12.8662109 300.582926,12.9638672 300.359131,13.1591797 C300.135335,13.3544922 300.023438,13.6230469 300.023438,13.9648438 L300.023438,23.8891602 C300.023438,24.2472331 300.129232,24.5218913 300.34082,24.7131348 C300.552409,24.9043783 300.808757,25 301.109863,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M305.969238,25 C306.270345,25 306.532796,24.9064128 306.756592,24.7192383 C306.980387,24.5320638 307.092285,24.2757161 307.092285,23.9501953 L307.092285,17.8588867 C307.149251,16.7114258 307.499186,15.8040365 308.14209,15.1367188 C308.784993,14.469401 309.574382,14.1357422 310.510254,14.1357422 C311.38916,14.1357422 312.070719,14.4083659 312.554932,14.9536133 C313.039144,15.4988607 313.28125,16.2923177 313.28125,17.3339844 L313.28125,23.9501953 C313.28125,24.2838542 313.389079,24.5422363 313.604736,24.7253418 C313.820394,24.9084473 314.078776,25 314.379883,25 C314.68099,25 314.939372,24.9084473 315.155029,24.7253418 C315.370687,24.5422363 315.478516,24.2838542 315.478516,23.9501953 L315.478516,17.3583984 C315.478516,15.7714844 315.079753,14.5874023 314.282227,13.8061523 C313.484701,13.0249023 312.414551,12.6342773 311.071777,12.6342773 C310.135905,12.6342773 309.315999,12.8336589 308.612061,13.2324219 C307.908122,13.6311849 307.40153,14.2008464 307.092285,14.9414062 L307.092285,13.8793945 C307.092285,13.5620117 306.984456,13.3158366 306.768799,13.1408691 C306.553141,12.9659017 306.294759,12.878418 305.993652,12.878418 C305.692546,12.878418 305.432129,12.9699707 305.212402,13.1530762 C304.992676,13.3361816 304.882812,13.5904948 304.882812,13.9160156 L304.882812,23.9501953 C304.882812,24.2838542 304.988607,24.5422363 305.200195,24.7253418 C305.411784,24.9084473 305.668132,25 305.969238,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M323.145508,23.5351562 C322.071289,23.5351562 321.184245,23.1445312 320.484375,22.3632812 C319.784505,21.5820312 319.43457,20.4345703 319.43457,18.9208984 C319.43457,18.0175781 319.56071,17.2159831 319.812988,16.5161133 C320.065267,15.8162435 320.476237,15.2445475 321.045898,14.8010254 C321.61556,14.3575033 322.31543,14.1357422 323.145508,14.1357422 C323.666341,14.1357422 324.134277,14.2232259 324.549316,14.3981934 C324.964355,14.5731608 325.304118,14.8091634 325.568604,15.1062012 C325.833089,15.4032389 326.052816,15.7572428 326.227783,16.1682129 C326.402751,16.5791829 326.526855,17.006429 326.600098,17.4499512 C326.67334,17.8934733 326.709961,18.367513 326.709961,18.8720703 C326.709961,20.3450521 326.376302,21.4904785 325.708984,22.3083496 C325.041667,23.1262207 324.187174,23.5351562 323.145508,23.5351562 Z M323.047852,30.1635742 C324.846354,30.1635742 326.264404,29.7241211 327.302002,28.8452148 C328.3396,27.9663086 328.858398,26.6601562 328.858398,24.9267578 L328.858398,13.9038086 C328.858398,13.5782878 328.756673,13.3239746 328.553223,13.1408691 C328.349772,12.9577637 328.105632,12.8662109 327.820801,12.8662109 C327.560384,12.8662109 327.330485,12.9455566 327.131104,13.104248 C326.931722,13.2629395 326.819824,13.4806315 326.79541,13.7573242 L326.79541,14.9902344 C325.989746,13.4195964 324.638835,12.6342773 322.742676,12.6342773 C321.595215,12.6342773 320.598307,12.9231771 319.751953,13.5009766 C318.905599,14.078776 318.272868,14.8417155 317.85376,15.7897949 C317.434652,16.7378743 317.225098,17.8059896 317.225098,18.9941406 C317.225098,19.8323568 317.349202,20.6136068 317.597412,21.3378906 C317.845622,22.0621745 318.199626,22.6969401 318.659424,23.2421875 C319.119222,23.7874349 319.70516,24.2167155 320.417236,24.5300293 C321.129313,24.8433431 321.924805,25 322.803711,25 C323.739583,25 324.543213,24.7823079 325.2146,24.3469238 C325.885986,23.9115397 326.388509,23.3235677 326.722168,22.5830078 L326.722168,25 C326.722168,26.171875 326.400716,27.0690918 325.757812,27.6916504 C325.114909,28.314209 324.16276,28.6254883 322.901367,28.6254883 C322.706055,28.6254883 322.512777,28.6173503 322.321533,28.6010742 C322.13029,28.5847982 321.959391,28.5664876 321.808838,28.5461426 C321.658285,28.5257975 321.491455,28.4952799 321.30835,28.4545898 C321.125244,28.4138997 320.97876,28.3813477 320.868896,28.3569336 C320.759033,28.3325195 320.616618,28.2918294 320.44165,28.2348633 C320.266683,28.1778971 320.146647,28.137207 320.081543,28.112793 C320.016439,28.0883789 319.900472,28.0436198 319.733643,27.9785156 C319.566813,27.9134115 319.467122,27.8767904 319.43457,27.8686523 C319.345052,27.8361003 319.251465,27.8198242 319.153809,27.8198242 C318.950358,27.8198242 318.777425,27.8951009 318.63501,28.0456543 C318.492594,28.1962077 318.421387,28.3732096 318.421387,28.5766602 C318.421387,28.8777669 318.576009,29.1137695 318.885254,29.284668 C319.983887,29.8706055 321.371419,30.1635742 323.047852,30.1635742 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M331.349609,23.9013672 C331.837891,24.2594401 332.490967,24.5707194 333.308838,24.8352051 C334.126709,25.0996908 334.971029,25.2319336 335.841797,25.2319336 C336.777669,25.2319336 337.599609,25.1037598 338.307617,24.8474121 C339.015625,24.5910645 339.587321,24.1780599 340.022705,23.6083984 C340.458089,23.038737 340.675781,22.3388672 340.675781,21.5087891 C340.675781,20.6298828 340.41333,19.9076335 339.888428,19.342041 C339.363525,18.7764486 338.486654,18.343099 337.257812,18.0419922 L335.145996,17.5170898 C334.258952,17.2973633 333.681152,17.0878092 333.412598,16.8884277 C333.144043,16.6890462 333.009766,16.3452148 333.009766,15.8569336 C333.009766,15.2547201 333.270182,14.8132324 333.791016,14.5324707 C334.311849,14.251709 335.011719,14.1113281 335.890625,14.1113281 C336.167318,14.1113281 336.439941,14.1296387 336.708496,14.1662598 C336.977051,14.2028809 337.241536,14.255778 337.501953,14.3249512 C337.76237,14.3941243 337.967855,14.453125 338.118408,14.5019531 C338.268962,14.5507812 338.464274,14.6219889 338.704346,14.7155762 C338.944417,14.8091634 339.07666,14.860026 339.101074,14.8681641 C339.231283,14.9169922 339.357422,14.9414062 339.479492,14.9414062 C339.691081,14.9414062 339.859945,14.8742676 339.986084,14.7399902 C340.112223,14.6057129 340.175293,14.444987 340.175293,14.2578125 C340.175293,13.9404297 340.016602,13.7003581 339.699219,13.5375977 C339.259766,13.3015951 338.692139,13.087972 337.996338,12.8967285 C337.300537,12.705485 336.557943,12.6098633 335.768555,12.6098633 C335.109375,12.6098633 334.499023,12.6729329 333.9375,12.7990723 C333.375977,12.9252116 332.869385,13.1164551 332.417725,13.3728027 C331.966064,13.6291504 331.610026,13.9770508 331.349609,14.4165039 C331.089193,14.855957 330.958984,15.3645833 330.958984,15.9423828 C330.958984,16.3085938 330.995605,16.6341146 331.068848,16.9189453 C331.14209,17.203776 331.260091,17.4519857 331.422852,17.6635742 C331.585612,17.8751628 331.760579,18.0562337 331.947754,18.2067871 C332.134928,18.3573405 332.385173,18.4977214 332.698486,18.6279297 C333.0118,18.758138 333.304769,18.8639323 333.577393,18.9453125 C333.850016,19.0266927 334.206055,19.124349 334.645508,19.2382812 L336.806152,19.7753906 C337.522298,19.9544271 338.034993,20.1904297 338.344238,20.4833984 C338.653483,20.7763672 338.808105,21.1669922 338.808105,21.6552734 C338.808105,22.3388672 338.533447,22.857666 337.984131,23.2116699 C337.434814,23.5656738 336.712565,23.7426758 335.817383,23.7426758 C334.572266,23.7345378 333.392253,23.3846029 332.277344,22.6928711 C332.098307,22.5789388 331.911133,22.5219727 331.71582,22.5219727 C331.504232,22.5219727 331.329264,22.5931803 331.190918,22.7355957 C331.052572,22.8780111 330.983398,23.046875 330.983398,23.2421875 C330.983398,23.5107422 331.105469,23.7304688 331.349609,23.9013672 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M361.307617,24.2675781 C361.511068,24.8046875 361.885417,25.0732422 362.430664,25.0732422 C362.748047,25.0732422 363.032878,24.9715169 363.285156,24.7680664 C363.537435,24.5646159 363.663574,24.3082682 363.663574,23.9990234 C363.663574,23.8688151 363.631022,23.7182617 363.565918,23.5473633 L357.767578,9.15527344 C357.490885,8.46354167 357.191813,8.00577799 356.870361,7.78198242 C356.54891,7.55818685 356.148112,7.44628906 355.667969,7.44628906 C355.187826,7.44628906 354.787028,7.55818685 354.465576,7.78198242 C354.144124,8.00577799 353.845052,8.46354167 353.568359,9.15527344 L347.77002,23.5473633 C347.704915,23.7182617 347.672363,23.8688151 347.672363,23.9990234 C347.672363,24.3082682 347.796468,24.5646159 348.044678,24.7680664 C348.292887,24.9715169 348.575684,25.0732422 348.893066,25.0732422 C349.446452,25.0732422 349.82487,24.8046875 350.02832,24.2675781 L351.46875,20.5566406 L359.867188,20.5566406 L361.307617,24.2675781 Z M359.171387,18.7866211 L352.164551,18.7866211 L355.667969,9.765625 L359.171387,18.7866211 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M368.010254,15.612793 L368.010254,9.50927734 L371.855469,9.50927734 C373.198242,9.50927734 374.170736,9.73714193 374.772949,10.1928711 C375.375163,10.6486003 375.67627,11.4257812 375.67627,12.5244141 C375.67627,13.6474609 375.356852,14.4429525 374.718018,14.9108887 C374.079183,15.3788249 373.080241,15.612793 371.721191,15.612793 L368.010254,15.612793 Z M366.777344,25 C367.119141,25 367.410075,24.8982747 367.650146,24.6948242 C367.890218,24.4913737 368.010254,24.202474 368.010254,23.828125 L368.010254,17.4926758 L371.733398,17.4926758 C372.701823,17.4926758 373.560384,17.4072266 374.309082,17.2363281 C375.05778,17.0654297 375.723063,16.7948405 376.304932,16.4245605 C376.8868,16.0542806 377.332357,15.5415853 377.641602,14.8864746 C377.950846,14.2313639 378.105469,13.4440104 378.105469,12.5244141 C378.105469,10.7584635 377.580566,9.48893229 376.530762,8.71582031 C375.480957,7.94270833 373.955078,7.55615234 371.953125,7.55615234 L366.789551,7.55615234 C366.439616,7.55615234 366.144613,7.67415365 365.904541,7.91015625 C365.664469,8.14615885 365.544434,8.43505859 365.544434,8.77685547 L365.544434,23.828125 C365.544434,24.1943359 365.666504,24.4812012 365.910645,24.6887207 C366.154785,24.8962402 366.443685,25 366.777344,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M381.133789,24.987793 C381.475586,24.987793 381.768555,24.8860677 382.012695,24.6826172 C382.256836,24.4791667 382.378906,24.1902669 382.378906,23.815918 L382.378906,8.72802734 C382.378906,8.35367839 382.25887,8.06274414 382.018799,7.85522461 C381.778727,7.64770508 381.491862,7.54394531 381.158203,7.54394531 C380.824544,7.54394531 380.53361,7.64973958 380.2854,7.86132812 C380.037191,8.07291667 379.913086,8.36181641 379.913086,8.72802734 L379.913086,23.815918 C379.913086,24.1821289 380.033122,24.4689941 380.273193,24.6765137 C380.513265,24.8840332 380.80013,24.987793 381.133789,24.987793 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M395.173828,27.746582 C395.409831,27.746582 395.611247,27.6692708 395.778076,27.5146484 C395.944906,27.360026 396.02832,27.1606445 396.02832,26.9165039 C396.02832,26.8025716 395.995768,26.6560872 395.930664,26.4770508 C394.734375,23.1730143 394.13623,20.1171875 394.13623,17.3095703 C394.13623,14.469401 394.742513,11.3850911 395.955078,8.05664062 C396.012044,7.91829427 396.040527,7.77587891 396.040527,7.62939453 C396.040527,7.38525391 395.955078,7.18180339 395.78418,7.01904297 C395.613281,6.85628255 395.409831,6.77490234 395.173828,6.77490234 C394.758789,6.77490234 394.449544,7.03125 394.246094,7.54394531 C393.472982,9.4156901 392.872803,11.134847 392.445557,12.701416 C392.018311,14.267985 391.804688,15.8081055 391.804688,17.3217773 C391.804688,18.8598633 392.02238,20.4121908 392.457764,21.9787598 C392.893148,23.5453288 393.505534,25.2604167 394.294922,27.1240234 C394.482096,27.5390625 394.775065,27.746582 395.173828,27.746582 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M397.921387,27.0263672 C398.336426,27.0263672 398.621257,26.8147786 398.775879,26.3916016 L404.684082,8.67919922 C404.724772,8.53271484 404.745117,8.41471354 404.745117,8.32519531 C404.745117,8.07291667 404.653564,7.86946615 404.470459,7.71484375 C404.287354,7.56022135 404.07373,7.48291016 403.82959,7.48291016 C403.373861,7.48291016 403.076823,7.6944987 402.938477,8.11767578 L397.079102,25.8422852 C397.038411,25.9887695 397.018066,26.1027018 397.018066,26.184082 C397.018066,26.4363607 397.107585,26.6398112 397.286621,26.7944336 C397.465658,26.949056 397.677246,27.0263672 397.921387,27.0263672 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M410.837402,25.012207 C411.30127,25.012207 411.665446,24.8881022 411.929932,24.6398926 C412.194417,24.3916829 412.452799,23.9664714 412.705078,23.3642578 L416.513672,14.2456055 C416.5625,14.1398112 416.586914,14.0258789 416.586914,13.9038086 C416.586914,13.6271159 416.470947,13.3850098 416.239014,13.1774902 C416.00708,12.9699707 415.748698,12.8662109 415.463867,12.8662109 C415.04069,12.8662109 414.739583,13.0981445 414.560547,13.5620117 L410.837402,22.7539062 L407.102051,13.5620117 C406.923014,13.0981445 406.621908,12.8662109 406.19873,12.8662109 C405.9139,12.8662109 405.655518,12.9699707 405.423584,13.1774902 C405.19165,13.3850098 405.075684,13.6271159 405.075684,13.9038086 C405.075684,14.0258789 405.100098,14.1398112 405.148926,14.2456055 L408.95752,23.3642578 C409.209798,23.9664714 409.46818,24.3916829 409.732666,24.6398926 C409.997152,24.8881022 410.365397,25.012207 410.837402,25.012207 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M424.15625,25 C424.489909,25 424.778809,24.8982747 425.022949,24.6948242 C425.26709,24.4913737 425.38916,24.206543 425.38916,23.840332 L425.376953,8.75244141 C425.376953,8.36995443 425.265055,8.07495117 425.04126,7.86743164 C424.817464,7.65991211 424.542806,7.55615234 424.217285,7.55615234 C423.94873,7.55615234 423.724935,7.6171875 423.545898,7.73925781 L419.358887,10.3515625 C419.082194,10.538737 418.943848,10.7747396 418.943848,11.0595703 C418.943848,11.3037109 419.031331,11.5152995 419.206299,11.6943359 C419.381266,11.8733724 419.59082,11.9628906 419.834961,11.9628906 C419.997721,11.9628906 420.156413,11.9181315 420.311035,11.8286133 L422.947754,10.1318359 L422.947754,23.840332 C422.947754,24.206543 423.06779,24.4913737 423.307861,24.6948242 C423.547933,24.8982747 423.830729,25 424.15625,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M431.066406,27.0263672 C431.481445,27.0263672 431.766276,26.8147786 431.920898,26.3916016 L437.829102,8.67919922 C437.869792,8.53271484 437.890137,8.41471354 437.890137,8.32519531 C437.890137,8.07291667 437.798584,7.86946615 437.615479,7.71484375 C437.432373,7.56022135 437.21875,7.48291016 436.974609,7.48291016 C436.51888,7.48291016 436.221842,7.6944987 436.083496,8.11767578 L430.224121,25.8422852 C430.183431,25.9887695 430.163086,26.1027018 430.163086,26.184082 C430.163086,26.4363607 430.252604,26.6398112 430.431641,26.7944336 C430.610677,26.949056 430.822266,27.0263672 431.066406,27.0263672 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M440.942871,18.0175781 C440.983561,17.5130208 441.09139,17.0349121 441.266357,16.583252 C441.441325,16.1315918 441.677327,15.7206217 441.974365,15.3503418 C442.271403,14.9800618 442.651855,14.6850586 443.115723,14.465332 C443.57959,14.2456055 444.096354,14.1357422 444.666016,14.1357422 C445.764648,14.1357422 446.598796,14.4978841 447.168457,15.222168 C447.738118,15.9464518 448.05957,16.8782552 448.132812,18.0175781 L440.942871,18.0175781 Z M444.702637,25.2197266 C446.525553,25.2197266 448.079915,24.6419271 449.365723,23.4863281 C449.593587,23.2828776 449.70752,23.050944 449.70752,22.7905273 C449.70752,22.5870768 449.640381,22.4100749 449.506104,22.2595215 C449.371826,22.1089681 449.207031,22.0336914 449.011719,22.0336914 C448.848958,22.0336914 448.694336,22.0906576 448.547852,22.2045898 C447.961914,22.652181 447.380046,23.0061849 446.802246,23.2666016 C446.224447,23.5270182 445.573405,23.6572266 444.849121,23.6572266 C443.70166,23.6409505 442.761719,23.2727051 442.029297,22.5524902 C441.296875,21.8322754 440.922526,20.7722982 440.90625,19.3725586 L449.280273,19.3725586 C449.54069,19.3725586 449.736003,19.2952474 449.866211,19.140625 C449.996419,18.9860026 450.061523,18.7906901 450.061523,18.5546875 C450.037109,17.7327474 449.919108,16.977946 449.70752,16.2902832 C449.495931,15.6026204 449.178548,14.9800618 448.755371,14.4226074 C448.332194,13.865153 447.762533,13.4277344 447.046387,13.1103516 C446.330241,12.7929688 445.496094,12.6342773 444.543945,12.6342773 C443.339518,12.6342773 442.285645,12.9272461 441.382324,13.5131836 C440.479004,14.0991211 439.803548,14.860026 439.355957,15.7958984 C438.908366,16.7317708 438.68457,17.7652995 438.68457,18.8964844 C438.68457,20.1822917 438.95109,21.307373 439.484131,22.2717285 C440.017171,23.236084 440.735352,23.9664714 441.638672,24.4628906 C442.541992,24.9593099 443.563314,25.2115885 444.702637,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M453.1875,25 C453.496745,25 453.763265,24.9084473 453.987061,24.7253418 C454.210856,24.5422363 454.322754,24.2879232 454.322754,23.9624023 L454.322754,18.0053711 C454.322754,16.784668 454.640137,15.834554 455.274902,15.1550293 C455.909668,14.4755046 456.756022,14.1357422 457.813965,14.1357422 C459.588053,14.1357422 460.475098,15.2587891 460.475098,17.5048828 L460.475098,23.9501953 C460.475098,24.2757161 460.582926,24.5320638 460.798584,24.7192383 C461.014242,24.9064128 461.268555,25 461.561523,25 C461.86263,25 462.123047,24.9064128 462.342773,24.7192383 C462.5625,24.5320638 462.672363,24.2757161 462.672363,23.9501953 L462.672363,18.0664062 C462.672363,16.780599 463.001953,15.8040365 463.661133,15.1367188 C464.320312,14.469401 465.158529,14.1357422 466.175781,14.1357422 C467.030273,14.1357422 467.689453,14.3941243 468.15332,14.9108887 C468.617188,15.427653 468.849121,16.1905924 468.849121,17.199707 L468.849121,23.9624023 C468.849121,24.2879232 468.954915,24.5422363 469.166504,24.7253418 C469.378092,24.9084473 469.63444,25 469.935547,25 C470.236654,25 470.499105,24.9084473 470.7229,24.7253418 C470.946696,24.5422363 471.058594,24.2879232 471.058594,23.9624023 L471.058594,17.2851562 C471.058594,16.4876302 470.942627,15.7836914 470.710693,15.1733398 C470.47876,14.5629883 470.157308,14.0767415 469.746338,13.7145996 C469.335368,13.3524577 468.869466,13.0818685 468.348633,12.902832 C467.827799,12.7237956 467.258138,12.6342773 466.639648,12.6342773 C465.56543,12.6342773 464.653971,12.8723145 463.905273,13.3483887 C463.156576,13.8244629 462.611328,14.5345052 462.269531,15.4785156 C462.017253,14.5670573 461.537109,13.865153 460.829102,13.3728027 C460.121094,12.8804525 459.262533,12.6342773 458.253418,12.6342773 C457.366374,12.6342773 456.576986,12.8356934 455.885254,13.2385254 C455.193522,13.6413574 454.672689,14.2171224 454.322754,14.9658203 L454.322754,13.8305664 C454.322754,13.5294596 454.210856,13.293457 453.987061,13.1225586 C453.763265,12.9516602 453.504883,12.8662109 453.211914,12.8662109 C452.918945,12.8662109 452.660563,12.9577637 452.436768,13.1408691 C452.212972,13.3239746 452.101074,13.5742188 452.101074,13.8916016 L452.101074,23.9624023 C452.101074,24.2879232 452.206868,24.5422363 452.418457,24.7253418 C452.630046,24.9084473 452.886393,25 453.1875,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M479.641113,23.7060547 C477.118327,23.7060547 475.856934,22.0947266 475.856934,18.8720703 C475.856934,18.2535807 475.926107,17.6717122 476.064453,17.1264648 C476.202799,16.5812174 476.412354,16.0807292 476.693115,15.625 C476.973877,15.1692708 477.356364,14.8071289 477.840576,14.5385742 C478.324788,14.2700195 478.888346,14.1357422 479.53125,14.1357422 C480.166016,14.1357422 480.721436,14.2781576 481.19751,14.5629883 C481.673584,14.847819 482.041829,15.2282715 482.302246,15.7043457 C482.562663,16.1804199 482.755941,16.6870117 482.88208,17.2241211 C483.008219,17.7612305 483.071289,18.3227539 483.071289,18.9086914 C483.071289,19.4783529 483.014323,20.0256348 482.900391,20.5505371 C482.786458,21.0754395 482.607422,21.5820312 482.363281,22.0703125 C482.119141,22.5585938 481.763102,22.9512533 481.295166,23.248291 C480.82723,23.5453288 480.275879,23.6979167 479.641113,23.7060547 Z M480.031738,25.2197266 C480.878092,25.2197266 481.641032,25.0467936 482.320557,24.7009277 C483.000081,24.3550618 483.551432,23.8871257 483.974609,23.2971191 C484.397786,22.7071126 484.721273,22.0336914 484.945068,21.2768555 C485.168864,20.5200195 485.280762,19.7143555 485.280762,18.8598633 C485.280762,17.9890951 485.158691,17.1773275 484.914551,16.4245605 C484.67041,15.6717936 484.32251,15.0126139 483.87085,14.4470215 C483.419189,13.881429 482.845459,13.4379069 482.149658,13.1164551 C481.453857,12.7950033 480.68278,12.6342773 479.836426,12.6342773 C478.933105,12.6342773 478.133545,12.8295898 477.437744,13.2202148 C476.741943,13.6108398 476.210938,14.1764323 475.844727,14.9169922 L475.844727,8.58154297 C475.844727,8.26416016 475.734863,8.01391602 475.515137,7.83081055 C475.29541,7.64770508 475.034993,7.55615234 474.733887,7.55615234 C474.440918,7.55615234 474.18457,7.64770508 473.964844,7.83081055 C473.745117,8.01391602 473.635254,8.26416016 473.635254,8.58154297 L473.635254,23.9379883 C473.635254,24.2635091 473.745117,24.5178223 473.964844,24.7009277 C474.18457,24.8840332 474.440918,24.9755859 474.733887,24.9755859 C475.100098,24.9755859 475.37679,24.8840332 475.563965,24.7009277 C475.751139,24.5178223 475.844727,24.2675781 475.844727,23.9501953 L475.844727,22.8637695 C476.235352,23.6450195 476.79484,24.2329915 477.523193,24.6276855 C478.251546,25.0223796 479.087728,25.2197266 480.031738,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M488.663086,18.0175781 C488.703776,17.5130208 488.811605,17.0349121 488.986572,16.583252 C489.16154,16.1315918 489.397542,15.7206217 489.69458,15.3503418 C489.991618,14.9800618 490.37207,14.6850586 490.835938,14.465332 C491.299805,14.2456055 491.816569,14.1357422 492.38623,14.1357422 C493.484863,14.1357422 494.31901,14.4978841 494.888672,15.222168 C495.458333,15.9464518 495.779785,16.8782552 495.853027,18.0175781 L488.663086,18.0175781 Z M492.422852,25.2197266 C494.245768,25.2197266 495.80013,24.6419271 497.085938,23.4863281 C497.313802,23.2828776 497.427734,23.050944 497.427734,22.7905273 C497.427734,22.5870768 497.360596,22.4100749 497.226318,22.2595215 C497.092041,22.1089681 496.927246,22.0336914 496.731934,22.0336914 C496.569173,22.0336914 496.414551,22.0906576 496.268066,22.2045898 C495.682129,22.652181 495.10026,23.0061849 494.522461,23.2666016 C493.944661,23.5270182 493.29362,23.6572266 492.569336,23.6572266 C491.421875,23.6409505 490.481934,23.2727051 489.749512,22.5524902 C489.01709,21.8322754 488.642741,20.7722982 488.626465,19.3725586 L497.000488,19.3725586 C497.260905,19.3725586 497.456217,19.2952474 497.586426,19.140625 C497.716634,18.9860026 497.781738,18.7906901 497.781738,18.5546875 C497.757324,17.7327474 497.639323,16.977946 497.427734,16.2902832 C497.216146,15.6026204 496.898763,14.9800618 496.475586,14.4226074 C496.052409,13.865153 495.482747,13.4277344 494.766602,13.1103516 C494.050456,12.7929688 493.216309,12.6342773 492.26416,12.6342773 C491.059733,12.6342773 490.005859,12.9272461 489.102539,13.5131836 C488.199219,14.0991211 487.523763,14.860026 487.076172,15.7958984 C486.628581,16.7317708 486.404785,17.7652995 486.404785,18.8964844 C486.404785,20.1822917 486.671305,21.307373 487.204346,22.2717285 C487.737386,23.236084 488.455566,23.9664714 489.358887,24.4628906 C490.262207,24.9593099 491.283529,25.2115885 492.422852,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M504.630859,23.7060547 C503.996094,23.6979167 503.444743,23.5453288 502.976807,23.248291 C502.50887,22.9512533 502.152832,22.5585938 501.908691,22.0703125 C501.664551,21.5820312 501.485514,21.0754395 501.371582,20.5505371 C501.25765,20.0256348 501.200684,19.4783529 501.200684,18.9086914 C501.200684,18.3227539 501.263753,17.7612305 501.389893,17.2241211 C501.516032,16.6870117 501.70931,16.1804199 501.969727,15.7043457 C502.230143,15.2282715 502.598389,14.847819 503.074463,14.5629883 C503.550537,14.2781576 504.105957,14.1357422 504.740723,14.1357422 C505.383626,14.1357422 505.947184,14.2700195 506.431396,14.5385742 C506.915609,14.8071289 507.298096,15.1692708 507.578857,15.625 C507.859619,16.0807292 508.069173,16.5812174 508.20752,17.1264648 C508.345866,17.6717122 508.415039,18.2535807 508.415039,18.8720703 C508.415039,22.0947266 507.153646,23.7060547 504.630859,23.7060547 Z M504.240234,25.2197266 C505.184245,25.2197266 506.020426,25.0223796 506.748779,24.6276855 C507.477132,24.2329915 508.036621,23.6450195 508.427246,22.8637695 L508.427246,23.9501953 C508.427246,24.2675781 508.520833,24.5178223 508.708008,24.7009277 C508.895182,24.8840332 509.171875,24.9755859 509.538086,24.9755859 C509.831055,24.9755859 510.087402,24.8840332 510.307129,24.7009277 C510.526855,24.5178223 510.636719,24.2635091 510.636719,23.9379883 L510.636719,8.58154297 C510.636719,8.26416016 510.526855,8.01391602 510.307129,7.83081055 C510.087402,7.64770508 509.831055,7.55615234 509.538086,7.55615234 C509.236979,7.55615234 508.976562,7.64770508 508.756836,7.83081055 C508.537109,8.01391602 508.427246,8.26416016 508.427246,8.58154297 L508.427246,14.9169922 C508.061035,14.1764323 507.530029,13.6108398 506.834229,13.2202148 C506.138428,12.8295898 505.338867,12.6342773 504.435547,12.6342773 C503.589193,12.6342773 502.818115,12.7950033 502.122314,13.1164551 C501.426514,13.4379069 500.852783,13.881429 500.401123,14.4470215 C499.949463,15.0126139 499.601562,15.6717936 499.357422,16.4245605 C499.113281,17.1773275 498.991211,17.9890951 498.991211,18.8598633 C498.991211,19.7143555 499.103109,20.5200195 499.326904,21.2768555 C499.5507,22.0336914 499.874186,22.7071126 500.297363,23.2971191 C500.72054,23.8871257 501.271891,24.3550618 501.951416,24.7009277 C502.630941,25.0467936 503.39388,25.2197266 504.240234,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M518.157227,23.7060547 C517.522461,23.6979167 516.97111,23.5453288 516.503174,23.248291 C516.035238,22.9512533 515.679199,22.5585938 515.435059,22.0703125 C515.190918,21.5820312 515.011882,21.0754395 514.897949,20.5505371 C514.784017,20.0256348 514.727051,19.4783529 514.727051,18.9086914 C514.727051,18.3227539 514.79012,17.7612305 514.91626,17.2241211 C515.042399,16.6870117 515.235677,16.1804199 515.496094,15.7043457 C515.75651,15.2282715 516.124756,14.847819 516.60083,14.5629883 C517.076904,14.2781576 517.632324,14.1357422 518.26709,14.1357422 C518.909993,14.1357422 519.473551,14.2700195 519.957764,14.5385742 C520.441976,14.8071289 520.824463,15.1692708 521.105225,15.625 C521.385986,16.0807292 521.59554,16.5812174 521.733887,17.1264648 C521.872233,17.6717122 521.941406,18.2535807 521.941406,18.8720703 C521.941406,22.0947266 520.680013,23.7060547 518.157227,23.7060547 Z M517.766602,25.2197266 C518.710612,25.2197266 519.546794,25.0223796 520.275146,24.6276855 C521.003499,24.2329915 521.562988,23.6450195 521.953613,22.8637695 L521.953613,23.9501953 C521.953613,24.2675781 522.047201,24.5178223 522.234375,24.7009277 C522.421549,24.8840332 522.698242,24.9755859 523.064453,24.9755859 C523.357422,24.9755859 523.61377,24.8840332 523.833496,24.7009277 C524.053223,24.5178223 524.163086,24.2635091 524.163086,23.9379883 L524.163086,8.58154297 C524.163086,8.26416016 524.053223,8.01391602 523.833496,7.83081055 C523.61377,7.64770508 523.357422,7.55615234 523.064453,7.55615234 C522.763346,7.55615234 522.50293,7.64770508 522.283203,7.83081055 C522.063477,8.01391602 521.953613,8.26416016 521.953613,8.58154297 L521.953613,14.9169922 C521.587402,14.1764323 521.056396,13.6108398 520.360596,13.2202148 C519.664795,12.8295898 518.865234,12.6342773 517.961914,12.6342773 C517.11556,12.6342773 516.344482,12.7950033 515.648682,13.1164551 C514.952881,13.4379069 514.37915,13.881429 513.92749,14.4470215 C513.47583,15.0126139 513.12793,15.6717936 512.883789,16.4245605 C512.639648,17.1773275 512.517578,17.9890951 512.517578,18.8598633 C512.517578,19.7143555 512.629476,20.5200195 512.853271,21.2768555 C513.077067,22.0336914 513.400553,22.7071126 513.82373,23.2971191 C514.246908,23.8871257 514.798258,24.3550618 515.477783,24.7009277 C516.157308,25.0467936 516.920247,25.2197266 517.766602,25.2197266 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M528.106934,10.0463867 C528.538249,10.0463867 528.884115,9.9222819 529.144531,9.67407227 C529.404948,9.42586263 529.535156,9.1023763 529.535156,8.70361328 C529.535156,8.29671224 529.404948,7.9691569 529.144531,7.72094727 C528.884115,7.47273763 528.542318,7.34863281 528.119141,7.34863281 C527.687826,7.34863281 527.339925,7.47477214 527.075439,7.72705078 C526.810954,7.97932943 526.678711,8.30485026 526.678711,8.70361328 C526.678711,9.1023763 526.808919,9.42586263 527.069336,9.67407227 C527.329753,9.9222819 527.675618,10.0463867 528.106934,10.0463867 Z M528.094727,25 C528.395833,25 528.658285,24.9003092 528.88208,24.7009277 C529.105876,24.5015462 529.217773,24.230957 529.217773,23.8891602 L529.217773,13.9648438 C529.217773,13.6230469 529.109945,13.3544922 528.894287,13.1591797 C528.67863,12.9638672 528.424316,12.8662109 528.131348,12.8662109 C527.830241,12.8662109 527.56779,12.9638672 527.343994,13.1591797 C527.120199,13.3544922 527.008301,13.6230469 527.008301,13.9648438 L527.008301,23.8891602 C527.008301,24.2472331 527.114095,24.5218913 527.325684,24.7131348 C527.537272,24.9043783 527.79362,25 528.094727,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M532.954102,25 C533.255208,25 533.51766,24.9064128 533.741455,24.7192383 C533.965251,24.5320638 534.077148,24.2757161 534.077148,23.9501953 L534.077148,17.8588867 C534.134115,16.7114258 534.484049,15.8040365 535.126953,15.1367188 C535.769857,14.469401 536.559245,14.1357422 537.495117,14.1357422 C538.374023,14.1357422 539.055583,14.4083659 539.539795,14.9536133 C540.024007,15.4988607 540.266113,16.2923177 540.266113,17.3339844 L540.266113,23.9501953 C540.266113,24.2838542 540.373942,24.5422363 540.5896,24.7253418 C540.805257,24.9084473 541.063639,25 541.364746,25 C541.665853,25 541.924235,24.9084473 542.139893,24.7253418 C542.35555,24.5422363 542.463379,24.2838542 542.463379,23.9501953 L542.463379,17.3583984 C542.463379,15.7714844 542.064616,14.5874023 541.26709,13.8061523 C540.469564,13.0249023 539.399414,12.6342773 538.056641,12.6342773 C537.120768,12.6342773 536.300863,12.8336589 535.596924,13.2324219 C534.892985,13.6311849 534.386393,14.2008464 534.077148,14.9414062 L534.077148,13.8793945 C534.077148,13.5620117 533.96932,13.3158366 533.753662,13.1408691 C533.538005,12.9659017 533.279622,12.878418 532.978516,12.878418 C532.677409,12.878418 532.416992,12.9699707 532.197266,13.1530762 C531.977539,13.3361816 531.867676,13.5904948 531.867676,13.9160156 L531.867676,23.9501953 C531.867676,24.2838542 531.97347,24.5422363 532.185059,24.7253418 C532.396647,24.9084473 532.652995,25 532.954102,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M550.130371,23.5351562 C549.056152,23.5351562 548.169108,23.1445312 547.469238,22.3632812 C546.769368,21.5820312 546.419434,20.4345703 546.419434,18.9208984 C546.419434,18.0175781 546.545573,17.2159831 546.797852,16.5161133 C547.05013,15.8162435 547.4611,15.2445475 548.030762,14.8010254 C548.600423,14.3575033 549.300293,14.1357422 550.130371,14.1357422 C550.651204,14.1357422 551.119141,14.2232259 551.53418,14.3981934 C551.949219,14.5731608 552.288981,14.8091634 552.553467,15.1062012 C552.817952,15.4032389 553.037679,15.7572428 553.212646,16.1682129 C553.387614,16.5791829 553.511719,17.006429 553.584961,17.4499512 C553.658203,17.8934733 553.694824,18.367513 553.694824,18.8720703 C553.694824,20.3450521 553.361165,21.4904785 552.693848,22.3083496 C552.02653,23.1262207 551.172038,23.5351562 550.130371,23.5351562 Z M550.032715,30.1635742 C551.831217,30.1635742 553.249268,29.7241211 554.286865,28.8452148 C555.324463,27.9663086 555.843262,26.6601562 555.843262,24.9267578 L555.843262,13.9038086 C555.843262,13.5782878 555.741536,13.3239746 555.538086,13.1408691 C555.334635,12.9577637 555.090495,12.8662109 554.805664,12.8662109 C554.545247,12.8662109 554.315348,12.9455566 554.115967,13.104248 C553.916585,13.2629395 553.804688,13.4806315 553.780273,13.7573242 L553.780273,14.9902344 C552.974609,13.4195964 551.623698,12.6342773 549.727539,12.6342773 C548.580078,12.6342773 547.583171,12.9231771 546.736816,13.5009766 C545.890462,14.078776 545.257731,14.8417155 544.838623,15.7897949 C544.419515,16.7378743 544.209961,17.8059896 544.209961,18.9941406 C544.209961,19.8323568 544.334066,20.6136068 544.582275,21.3378906 C544.830485,22.0621745 545.184489,22.6969401 545.644287,23.2421875 C546.104085,23.7874349 546.690023,24.2167155 547.4021,24.5300293 C548.114176,24.8433431 548.909668,25 549.788574,25 C550.724447,25 551.528076,24.7823079 552.199463,24.3469238 C552.87085,23.9115397 553.373372,23.3235677 553.707031,22.5830078 L553.707031,25 C553.707031,26.171875 553.385579,27.0690918 552.742676,27.6916504 C552.099772,28.314209 551.147624,28.6254883 549.88623,28.6254883 C549.690918,28.6254883 549.49764,28.6173503 549.306396,28.6010742 C549.115153,28.5847982 548.944255,28.5664876 548.793701,28.5461426 C548.643148,28.5257975 548.476318,28.4952799 548.293213,28.4545898 C548.110107,28.4138997 547.963623,28.3813477 547.85376,28.3569336 C547.743896,28.3325195 547.601481,28.2918294 547.426514,28.2348633 C547.251546,28.1778971 547.13151,28.137207 547.066406,28.112793 C547.001302,28.0883789 546.885335,28.0436198 546.718506,27.9785156 C546.551676,27.9134115 546.451986,27.8767904 546.419434,27.8686523 C546.329915,27.8361003 546.236328,27.8198242 546.138672,27.8198242 C545.935221,27.8198242 545.762288,27.8951009 545.619873,28.0456543 C545.477458,28.1962077 545.40625,28.3732096 545.40625,28.5766602 C545.40625,28.8777669 545.560872,29.1137695 545.870117,29.284668 C546.96875,29.8706055 548.356283,30.1635742 550.032715,30.1635742 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M558.334473,23.9013672 C558.822754,24.2594401 559.47583,24.5707194 560.293701,24.8352051 C561.111572,25.0996908 561.955892,25.2319336 562.82666,25.2319336 C563.762533,25.2319336 564.584473,25.1037598 565.29248,24.8474121 C566.000488,24.5910645 566.572184,24.1780599 567.007568,23.6083984 C567.442952,23.038737 567.660645,22.3388672 567.660645,21.5087891 C567.660645,20.6298828 567.398193,19.9076335 566.873291,19.342041 C566.348389,18.7764486 565.471517,18.343099 564.242676,18.0419922 L562.130859,17.5170898 C561.243815,17.2973633 560.666016,17.0878092 560.397461,16.8884277 C560.128906,16.6890462 559.994629,16.3452148 559.994629,15.8569336 C559.994629,15.2547201 560.255046,14.8132324 560.775879,14.5324707 C561.296712,14.251709 561.996582,14.1113281 562.875488,14.1113281 C563.152181,14.1113281 563.424805,14.1296387 563.693359,14.1662598 C563.961914,14.2028809 564.2264,14.255778 564.486816,14.3249512 C564.747233,14.3941243 564.952718,14.453125 565.103271,14.5019531 C565.253825,14.5507812 565.449137,14.6219889 565.689209,14.7155762 C565.929281,14.8091634 566.061523,14.860026 566.085938,14.8681641 C566.216146,14.9169922 566.342285,14.9414062 566.464355,14.9414062 C566.675944,14.9414062 566.844808,14.8742676 566.970947,14.7399902 C567.097087,14.6057129 567.160156,14.444987 567.160156,14.2578125 C567.160156,13.9404297 567.001465,13.7003581 566.684082,13.5375977 C566.244629,13.3015951 565.677002,13.087972 564.981201,12.8967285 C564.2854,12.705485 563.542806,12.6098633 562.753418,12.6098633 C562.094238,12.6098633 561.483887,12.6729329 560.922363,12.7990723 C560.36084,12.9252116 559.854248,13.1164551 559.402588,13.3728027 C558.950928,13.6291504 558.594889,13.9770508 558.334473,14.4165039 C558.074056,14.855957 557.943848,15.3645833 557.943848,15.9423828 C557.943848,16.3085938 557.980469,16.6341146 558.053711,16.9189453 C558.126953,17.203776 558.244954,17.4519857 558.407715,17.6635742 C558.570475,17.8751628 558.745443,18.0562337 558.932617,18.2067871 C559.119792,18.3573405 559.370036,18.4977214 559.68335,18.6279297 C559.996663,18.758138 560.289632,18.8639323 560.562256,18.9453125 C560.83488,19.0266927 561.190918,19.124349 561.630371,19.2382812 L563.791016,19.7753906 C564.507161,19.9544271 565.019857,20.1904297 565.329102,20.4833984 C565.638346,20.7763672 565.792969,21.1669922 565.792969,21.6552734 C565.792969,22.3388672 565.518311,22.857666 564.968994,23.2116699 C564.419678,23.5656738 563.697428,23.7426758 562.802246,23.7426758 C561.557129,23.7345378 560.377116,23.3846029 559.262207,22.6928711 C559.083171,22.5789388 558.895996,22.5219727 558.700684,22.5219727 C558.489095,22.5219727 558.314128,22.5931803 558.175781,22.7355957 C558.037435,22.8780111 557.968262,23.046875 557.968262,23.2421875 C557.968262,23.5107422 558.090332,23.7304688 558.334473,23.9013672 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M570.005371,27.7587891 C570.404134,27.7587891 570.701172,27.5472005 570.896484,27.1240234 C571.685872,25.2685547 572.298258,23.5555013 572.733643,21.9848633 C573.169027,20.4142253 573.386719,18.8639323 573.386719,17.3339844 C573.386719,15.8121745 573.173096,14.267985 572.74585,12.701416 C572.318604,11.134847 571.718424,9.4156901 570.945312,7.54394531 C570.717448,7.03938802 570.412272,6.78710938 570.029785,6.78710938 C569.793783,6.78710938 569.588298,6.86848958 569.41333,7.03125 C569.238363,7.19401042 569.150879,7.39339193 569.150879,7.62939453 C569.150879,7.75960286 569.183431,7.90201823 569.248535,8.05664062 C570.452962,11.4095052 571.055176,14.4938151 571.055176,17.3095703 C571.055176,20.1660156 570.457031,23.2218424 569.260742,26.4770508 C569.195638,26.6479492 569.163086,26.7985026 569.163086,26.9287109 C569.163086,27.1647135 569.246501,27.3620605 569.41333,27.520752 C569.58016,27.6794434 569.777507,27.7587891 570.005371,27.7587891 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-9" transform="translate(0, 132)" xlink:href="#path-33" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M4.81152344,15.2373047 L4.81152344,8.88964844 L8.81054688,8.88964844 C10.2070312,8.88964844 11.2184245,9.1266276 11.8447266,9.60058594 C12.4710286,10.0745443 12.7841797,10.8828125 12.7841797,12.0253906 C12.7841797,13.1933594 12.4519857,14.0206706 11.7875977,14.5073242 C11.1232096,14.9939779 10.0843099,15.2373047 8.67089844,15.2373047 L4.81152344,15.2373047 Z M3.52929688,25 C3.88476562,25 4.18733724,24.8942057 4.43701172,24.6826172 C4.6866862,24.4710286 4.81152344,24.1705729 4.81152344,23.78125 L4.81152344,17.1923828 L8.68359375,17.1923828 C9.69075521,17.1923828 10.5836589,17.1035156 11.3623047,16.9257812 C12.1409505,16.7480469 12.8328451,16.4666341 13.4379883,16.081543 C14.0431315,15.6964518 14.5065104,15.1632487 14.828125,14.4819336 C15.1497396,13.8006185 15.3105469,12.9817708 15.3105469,12.0253906 C15.3105469,10.1888021 14.7646484,8.86848958 13.6728516,8.06445312 C12.5810547,7.26041667 10.9941406,6.85839844 8.91210938,6.85839844 L3.54199219,6.85839844 C3.1780599,6.85839844 2.87125651,6.98111979 2.62158203,7.2265625 C2.37190755,7.47200521 2.24707031,7.77246094 2.24707031,8.12792969 L2.24707031,23.78125 C2.24707031,24.1621094 2.37402344,24.4604492 2.62792969,24.6762695 C2.88183594,24.8920898 3.18229167,25 3.52929688,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M22.0927734,25.2285156 C24.0478516,25.2285156 25.5966797,24.6149089 26.7392578,23.3876953 C27.8818359,22.1604818 28.453125,20.5947266 28.453125,18.6904297 C28.453125,16.7692057 27.8797201,15.1971029 26.7329102,13.9741211 C25.5861003,12.7511393 24.039388,12.1396484 22.0927734,12.1396484 C20.1461589,12.1396484 18.5994466,12.7532552 17.4526367,13.9804688 C16.3058268,15.2076823 15.7324219,16.7776693 15.7324219,18.6904297 C15.7324219,20.5947266 16.3058268,22.1604818 17.4526367,23.3876953 C18.5994466,24.6149089 20.1461589,25.2285156 22.0927734,25.2285156 Z M22.0673828,23.6542969 C20.7978516,23.6542969 19.8055013,23.2078451 19.090332,22.3149414 C18.3751628,21.4220378 18.0175781,20.2138672 18.0175781,18.6904297 C18.0175781,17.1500651 18.3772786,15.933431 19.0966797,15.0405273 C19.8160807,14.1476237 20.8147786,13.7011719 22.0927734,13.7011719 C23.3623047,13.7011719 24.3588867,14.1497396 25.0825195,15.046875 C25.8061523,15.9440104 26.1679688,17.1585286 26.1679688,18.6904297 C26.1679688,20.2307943 25.8103841,21.4431966 25.0952148,22.3276367 C24.3800456,23.2120768 23.3707682,23.6542969 22.0673828,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M35.7939453,25.2285156 C37.7490234,25.2285156 39.2978516,24.6149089 40.4404297,23.3876953 C41.5830078,22.1604818 42.1542969,20.5947266 42.1542969,18.6904297 C42.1542969,16.7692057 41.5808919,15.1971029 40.434082,13.9741211 C39.2872721,12.7511393 37.7405599,12.1396484 35.7939453,12.1396484 C33.8473307,12.1396484 32.3006185,12.7532552 31.1538086,13.9804688 C30.0069987,15.2076823 29.4335938,16.7776693 29.4335938,18.6904297 C29.4335938,20.5947266 30.0069987,22.1604818 31.1538086,23.3876953 C32.3006185,24.6149089 33.8473307,25.2285156 35.7939453,25.2285156 Z M35.7685547,23.6542969 C34.4990234,23.6542969 33.5066732,23.2078451 32.7915039,22.3149414 C32.0763346,21.4220378 31.71875,20.2138672 31.71875,18.6904297 C31.71875,17.1500651 32.0784505,15.933431 32.7978516,15.0405273 C33.5172526,14.1476237 34.5159505,13.7011719 35.7939453,13.7011719 C37.0634766,13.7011719 38.0600586,14.1497396 38.7836914,15.046875 C39.5073242,15.9440104 39.8691406,17.1585286 39.8691406,18.6904297 C39.8691406,20.2307943 39.511556,21.4431966 38.7963867,22.3276367 C38.0812174,23.2120768 37.0719401,23.6542969 35.7685547,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M45.2929688,25 C45.6061198,25 45.879069,24.8963216 46.1118164,24.6889648 C46.3445638,24.4816081 46.4609375,24.2001953 46.4609375,23.8447266 L46.4609375,8.01367188 C46.4609375,7.65820312 46.3487956,7.37890625 46.1245117,7.17578125 C45.9002279,6.97265625 45.6357422,6.87109375 45.3310547,6.87109375 C45.0179036,6.87109375 44.7449544,6.97265625 44.512207,7.17578125 C44.2794596,7.37890625 44.1630859,7.65820312 44.1630859,8.01367188 L44.1630859,23.8447266 C44.1630859,24.2171224 44.273112,24.5027669 44.4931641,24.7016602 C44.7132161,24.9005534 44.9798177,25 45.2929688,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M50.6025391,9.44824219 C51.0511068,9.44824219 51.4108073,9.31917318 51.6816406,9.06103516 C51.952474,8.80289714 52.0878906,8.46647135 52.0878906,8.05175781 C52.0878906,7.62858073 51.952474,7.28792318 51.6816406,7.02978516 C51.4108073,6.77164714 51.0553385,6.64257812 50.6152344,6.64257812 C50.1666667,6.64257812 49.8048503,6.77376302 49.5297852,7.03613281 C49.2547201,7.2985026 49.1171875,7.63704427 49.1171875,8.05175781 C49.1171875,8.46647135 49.2526042,8.80289714 49.5234375,9.06103516 C49.7942708,9.31917318 50.1539714,9.44824219 50.6025391,9.44824219 Z M50.5898438,25 C50.9029948,25 51.175944,24.8963216 51.4086914,24.6889648 C51.6414388,24.4816081 51.7578125,24.2001953 51.7578125,23.8447266 L51.7578125,13.5234375 C51.7578125,13.1679688 51.6456706,12.8886719 51.4213867,12.6855469 C51.1971029,12.4824219 50.9326172,12.3808594 50.6279297,12.3808594 C50.3147786,12.3808594 50.0418294,12.4824219 49.809082,12.6855469 C49.5763346,12.8886719 49.4599609,13.1679688 49.4599609,13.5234375 L49.4599609,23.8447266 C49.4599609,24.2171224 49.569987,24.5027669 49.7900391,24.7016602 C50.0100911,24.9005534 50.2766927,25 50.5898438,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M55.6835938,25 C55.9967448,25 56.269694,24.9026693 56.5024414,24.7080078 C56.7351888,24.5133464 56.8515625,24.2467448 56.8515625,23.9082031 L56.8515625,17.5732422 C56.9108073,16.3798828 57.2747396,15.4361979 57.9433594,14.7421875 C58.6119792,14.0481771 59.4329427,13.7011719 60.40625,13.7011719 C61.3203125,13.7011719 62.0291341,13.9847005 62.5327148,14.5517578 C63.0362956,15.1188151 63.2880859,15.9440104 63.2880859,17.0273438 L63.2880859,23.9082031 C63.2880859,24.2552083 63.4002279,24.5239258 63.6245117,24.7143555 C63.8487956,24.9047852 64.117513,25 64.4306641,25 C64.7438151,25 65.0125326,24.9047852 65.2368164,24.7143555 C65.4611003,24.5239258 65.5732422,24.2552083 65.5732422,23.9082031 L65.5732422,17.0527344 C65.5732422,15.4023438 65.1585286,14.1708984 64.3291016,13.3583984 C63.4996745,12.5458984 62.3867188,12.1396484 60.9902344,12.1396484 C60.0169271,12.1396484 59.1642253,12.3470052 58.4321289,12.7617188 C57.7000326,13.1764323 57.1731771,13.7688802 56.8515625,14.5390625 L56.8515625,13.4345703 C56.8515625,13.1044922 56.7394206,12.8484701 56.5151367,12.6665039 C56.2908529,12.4845378 56.0221354,12.3935547 55.7089844,12.3935547 C55.3958333,12.3935547 55.125,12.4887695 54.8964844,12.6791992 C54.6679688,12.8696289 54.5537109,13.1341146 54.5537109,13.4726562 L54.5537109,23.9082031 C54.5537109,24.2552083 54.663737,24.5239258 54.8837891,24.7143555 C55.1038411,24.9047852 55.3704427,25 55.6835938,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M73.5869141,23.4765625 C72.4697266,23.4765625 71.5472005,23.0703125 70.8193359,22.2578125 C70.0914714,21.4453125 69.7275391,20.2519531 69.7275391,18.6777344 C69.7275391,17.7382812 69.858724,16.9046224 70.1210938,16.1767578 C70.3834635,15.4488932 70.8108724,14.8543294 71.4033203,14.3930664 C71.9957682,13.9318034 72.7236328,13.7011719 73.5869141,13.7011719 C74.1285807,13.7011719 74.6152344,13.7921549 75.046875,13.9741211 C75.4785156,14.1560872 75.8318685,14.4015299 76.1069336,14.7104492 C76.3819987,15.0193685 76.6105143,15.3875326 76.7924805,15.8149414 C76.9744466,16.2423503 77.1035156,16.6866862 77.1796875,17.1479492 C77.2558594,17.6092122 77.2939453,18.1022135 77.2939453,18.6269531 C77.2939453,20.1588542 76.9469401,21.3500977 76.2529297,22.2006836 C75.5589193,23.0512695 74.6702474,23.4765625 73.5869141,23.4765625 Z M73.4853516,30.3701172 C75.3557943,30.3701172 76.8305664,29.9130859 77.909668,28.9990234 C78.9887695,28.0849609 79.5283203,26.7265625 79.5283203,24.9238281 L79.5283203,13.4599609 C79.5283203,13.1214193 79.422526,12.8569336 79.2109375,12.6665039 C78.999349,12.4760742 78.7454427,12.3808594 78.4492188,12.3808594 C78.1783854,12.3808594 77.9392904,12.4633789 77.7319336,12.628418 C77.5245768,12.793457 77.4082031,13.0198568 77.3828125,13.3076172 L77.3828125,14.5898438 C76.5449219,12.9563802 75.139974,12.1396484 73.1679688,12.1396484 C71.9746094,12.1396484 70.9378255,12.4401042 70.0576172,13.0410156 C69.1774089,13.6419271 68.5193685,14.4353841 68.0834961,15.4213867 C67.6476237,16.4073893 67.4296875,17.5182292 67.4296875,18.7539062 C67.4296875,19.625651 67.5587565,20.438151 67.8168945,21.1914062 C68.0750326,21.9446615 68.4431966,22.6048177 68.9213867,23.171875 C69.3995768,23.7389323 70.0089518,24.1853841 70.7495117,24.5112305 C71.4900716,24.8370768 72.3173828,25 73.2314453,25 C74.2047526,25 75.0405273,24.7736003 75.7387695,24.3208008 C76.4370117,23.8680013 76.9596354,23.2565104 77.3066406,22.4863281 L77.3066406,25 C77.3066406,26.21875 76.9723307,27.1518555 76.3037109,27.7993164 C75.6350911,28.4467773 74.6448568,28.7705078 73.3330078,28.7705078 C73.1298828,28.7705078 72.9288737,28.7620443 72.7299805,28.7451172 C72.5310872,28.7281901 72.3533529,28.7091471 72.1967773,28.6879883 C72.0402018,28.6668294 71.8666992,28.6350911 71.6762695,28.5927734 C71.4858398,28.5504557 71.3334961,28.5166016 71.2192383,28.4912109 C71.1049805,28.4658203 70.9568685,28.4235026 70.7749023,28.3642578 C70.5929362,28.305013 70.468099,28.2626953 70.4003906,28.2373047 C70.3326823,28.2119141 70.2120768,28.1653646 70.0385742,28.0976562 C69.8650716,28.0299479 69.7613932,27.991862 69.7275391,27.9833984 C69.6344401,27.9495443 69.5371094,27.9326172 69.4355469,27.9326172 C69.2239583,27.9326172 69.0441081,28.0109049 68.8959961,28.1674805 C68.7478841,28.324056 68.6738281,28.508138 68.6738281,28.7197266 C68.6738281,29.0328776 68.8346354,29.2783203 69.15625,29.4560547 C70.2988281,30.0654297 71.741862,30.3701172 73.4853516,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M101.624023,24.2382812 C101.835612,24.796875 102.224935,25.0761719 102.791992,25.0761719 C103.12207,25.0761719 103.418294,24.9703776 103.680664,24.7587891 C103.943034,24.5472005 104.074219,24.280599 104.074219,23.9589844 C104.074219,23.8235677 104.040365,23.6669922 103.972656,23.4892578 L97.9423828,8.52148438 C97.6546224,7.80208333 97.3435872,7.32600911 97.0092773,7.09326172 C96.6749674,6.86051432 96.258138,6.74414062 95.7587891,6.74414062 C95.2594401,6.74414062 94.8426107,6.86051432 94.5083008,7.09326172 C94.1739909,7.32600911 93.8629557,7.80208333 93.5751953,8.52148438 L87.5449219,23.4892578 C87.4772135,23.6669922 87.4433594,23.8235677 87.4433594,23.9589844 C87.4433594,24.280599 87.5724284,24.5472005 87.8305664,24.7587891 C88.0887044,24.9703776 88.3828125,25.0761719 88.7128906,25.0761719 C89.2884115,25.0761719 89.6819661,24.796875 89.8935547,24.2382812 L91.3916016,20.3789062 L100.125977,20.3789062 L101.624023,24.2382812 Z M99.4023438,18.5380859 L92.1152344,18.5380859 L95.7587891,9.15625 L99.4023438,18.5380859 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M108.634766,15.2373047 L108.634766,8.88964844 L112.633789,8.88964844 C114.030273,8.88964844 115.041667,9.1266276 115.667969,9.60058594 C116.294271,10.0745443 116.607422,10.8828125 116.607422,12.0253906 C116.607422,13.1933594 116.275228,14.0206706 115.61084,14.5073242 C114.946452,14.9939779 113.907552,15.2373047 112.494141,15.2373047 L108.634766,15.2373047 Z M107.352539,25 C107.708008,25 108.010579,24.8942057 108.260254,24.6826172 C108.509928,24.4710286 108.634766,24.1705729 108.634766,23.78125 L108.634766,17.1923828 L112.506836,17.1923828 C113.513997,17.1923828 114.406901,17.1035156 115.185547,16.9257812 C115.964193,16.7480469 116.656087,16.4666341 117.26123,16.081543 C117.866374,15.6964518 118.329753,15.1632487 118.651367,14.4819336 C118.972982,13.8006185 119.133789,12.9817708 119.133789,12.0253906 C119.133789,10.1888021 118.587891,8.86848958 117.496094,8.06445312 C116.404297,7.26041667 114.817383,6.85839844 112.735352,6.85839844 L107.365234,6.85839844 C107.001302,6.85839844 106.694499,6.98111979 106.444824,7.2265625 C106.19515,7.47200521 106.070312,7.77246094 106.070312,8.12792969 L106.070312,23.78125 C106.070312,24.1621094 106.197266,24.4604492 106.451172,24.6762695 C106.705078,24.8920898 107.005534,25 107.352539,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M122.323242,24.9873047 C122.678711,24.9873047 122.983398,24.8815104 123.237305,24.6699219 C123.491211,24.4583333 123.618164,24.1578776 123.618164,23.7685547 L123.618164,8.07714844 C123.618164,7.68782552 123.493327,7.38525391 123.243652,7.16943359 C122.993978,6.95361328 122.695638,6.84570312 122.348633,6.84570312 C122.001628,6.84570312 121.699056,6.95572917 121.440918,7.17578125 C121.18278,7.39583333 121.053711,7.69628906 121.053711,8.07714844 L121.053711,23.7685547 C121.053711,24.1494141 121.178548,24.4477539 121.428223,24.6635742 C121.677897,24.8793945 121.976237,24.9873047 122.323242,24.9873047 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M137.004883,27.8564453 C137.250326,27.8564453 137.459798,27.7760417 137.633301,27.6152344 C137.806803,27.4544271 137.893555,27.2470703 137.893555,26.9931641 C137.893555,26.8746745 137.859701,26.7223307 137.791992,26.5361328 C136.547852,23.0999349 135.925781,19.921875 135.925781,17.0019531 C135.925781,14.0481771 136.556315,10.8404948 137.817383,7.37890625 C137.876628,7.23502604 137.90625,7.08691406 137.90625,6.93457031 C137.90625,6.68066406 137.817383,6.46907552 137.639648,6.29980469 C137.461914,6.13053385 137.250326,6.04589844 137.004883,6.04589844 C136.573242,6.04589844 136.251628,6.3125 136.040039,6.84570312 C135.236003,8.79231771 134.611816,10.5802409 134.16748,12.2094727 C133.723145,13.8387044 133.500977,15.4404297 133.500977,17.0146484 C133.500977,18.6142578 133.727376,20.2286784 134.180176,21.8579102 C134.632975,23.4871419 135.269857,25.2708333 136.09082,27.2089844 C136.285482,27.640625 136.590169,27.8564453 137.004883,27.8564453 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M139.902344,27.1074219 C140.333984,27.1074219 140.630208,26.8873698 140.791016,26.4472656 L146.935547,8.02636719 C146.977865,7.87402344 146.999023,7.75130208 146.999023,7.65820312 C146.999023,7.39583333 146.903809,7.18424479 146.713379,7.0234375 C146.522949,6.86263021 146.300781,6.78222656 146.046875,6.78222656 C145.572917,6.78222656 145.263997,7.00227865 145.120117,7.44238281 L139.026367,25.8759766 C138.984049,26.0283203 138.962891,26.1468099 138.962891,26.2314453 C138.962891,26.4938151 139.05599,26.7054036 139.242188,26.8662109 C139.428385,27.0270182 139.648438,27.1074219 139.902344,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M154.78418,23.6542969 C154.11556,23.6542969 153.52946,23.5146484 153.025879,23.2353516 C152.522298,22.9560547 152.124512,22.5794271 151.83252,22.1054688 C151.540527,21.6315104 151.322591,21.1131185 151.178711,20.550293 C151.034831,19.9874674 150.962891,19.3844401 150.962891,18.7412109 C150.962891,15.3811849 152.27474,13.7011719 154.898438,13.7011719 C155.431641,13.7096354 155.907715,13.8196615 156.32666,14.03125 C156.745605,14.2428385 157.086263,14.5136719 157.348633,14.84375 C157.611003,15.1738281 157.828939,15.563151 158.002441,16.0117188 C158.175944,16.4602865 158.296549,16.9046224 158.364258,17.3447266 C158.431966,17.7848307 158.46582,18.2376302 158.46582,18.703125 C158.46582,19.3125 158.400228,19.8964844 158.269043,20.4550781 C158.137858,21.0136719 157.936849,21.5405273 157.666016,22.0356445 C157.395182,22.5307617 157.012207,22.9243164 156.51709,23.2163086 C156.021973,23.5083008 155.444336,23.6542969 154.78418,23.6542969 Z M149.794922,29.8242188 C150.108073,29.8242188 150.378906,29.7290039 150.607422,29.5385742 C150.835938,29.3481445 150.950195,29.0878906 150.950195,28.7578125 L150.950195,22.8544922 C151.331055,23.6162109 151.883301,24.2023112 152.606934,24.612793 C153.330566,25.0232747 154.162109,25.2285156 155.101562,25.2285156 C155.981771,25.2285156 156.783691,25.0613607 157.507324,24.7270508 C158.230957,24.3927409 158.827637,23.9314779 159.297363,23.3432617 C159.76709,22.7550456 160.128906,22.0694987 160.382812,21.2866211 C160.636719,20.5037435 160.763672,19.6595052 160.763672,18.7539062 C160.763672,17.8652344 160.647298,17.0273438 160.414551,16.2402344 C160.181803,15.453125 159.845378,14.7527669 159.405273,14.1391602 C158.965169,13.5255534 158.391764,13.0388997 157.685059,12.6791992 C156.978353,12.3194987 156.184896,12.1396484 155.304688,12.1396484 C154.322917,12.1396484 153.453288,12.3448893 152.695801,12.7553711 C151.938314,13.1658529 151.356445,13.7773438 150.950195,14.5898438 L150.950195,13.4599609 C150.950195,13.1298828 150.852865,12.8696289 150.658203,12.6791992 C150.463542,12.4887695 150.175781,12.3935547 149.794922,12.3935547 C149.481771,12.3935547 149.213053,12.4866536 148.98877,12.6728516 C148.764486,12.8590495 148.652344,13.1214193 148.652344,13.4599609 L148.652344,28.7578125 C148.652344,29.0878906 148.766602,29.3481445 148.995117,29.5385742 C149.223633,29.7290039 149.490234,29.8242188 149.794922,29.8242188 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M168.256836,25.2285156 C170.211914,25.2285156 171.760742,24.6149089 172.90332,23.3876953 C174.045898,22.1604818 174.617188,20.5947266 174.617188,18.6904297 C174.617188,16.7692057 174.043783,15.1971029 172.896973,13.9741211 C171.750163,12.7511393 170.203451,12.1396484 168.256836,12.1396484 C166.310221,12.1396484 164.763509,12.7532552 163.616699,13.9804688 C162.469889,15.2076823 161.896484,16.7776693 161.896484,18.6904297 C161.896484,20.5947266 162.469889,22.1604818 163.616699,23.3876953 C164.763509,24.6149089 166.310221,25.2285156 168.256836,25.2285156 Z M168.231445,23.6542969 C166.961914,23.6542969 165.969564,23.2078451 165.254395,22.3149414 C164.539225,21.4220378 164.181641,20.2138672 164.181641,18.6904297 C164.181641,17.1500651 164.541341,15.933431 165.260742,15.0405273 C165.980143,14.1476237 166.978841,13.7011719 168.256836,13.7011719 C169.526367,13.7011719 170.522949,14.1497396 171.246582,15.046875 C171.970215,15.9440104 172.332031,17.1585286 172.332031,18.6904297 C172.332031,20.2307943 171.974447,21.4431966 171.259277,22.3276367 C170.544108,23.2120768 169.534831,23.6542969 168.231445,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M181.958008,25.2285156 C183.913086,25.2285156 185.461914,24.6149089 186.604492,23.3876953 C187.74707,22.1604818 188.318359,20.5947266 188.318359,18.6904297 C188.318359,16.7692057 187.744954,15.1971029 186.598145,13.9741211 C185.451335,12.7511393 183.904622,12.1396484 181.958008,12.1396484 C180.011393,12.1396484 178.464681,12.7532552 177.317871,13.9804688 C176.171061,15.2076823 175.597656,16.7776693 175.597656,18.6904297 C175.597656,20.5947266 176.171061,22.1604818 177.317871,23.3876953 C178.464681,24.6149089 180.011393,25.2285156 181.958008,25.2285156 Z M181.932617,23.6542969 C180.663086,23.6542969 179.670736,23.2078451 178.955566,22.3149414 C178.240397,21.4220378 177.882812,20.2138672 177.882812,18.6904297 C177.882812,17.1500651 178.242513,15.933431 178.961914,15.0405273 C179.681315,14.1476237 180.680013,13.7011719 181.958008,13.7011719 C183.227539,13.7011719 184.224121,14.1497396 184.947754,15.046875 C185.671387,15.9440104 186.033203,17.1585286 186.033203,18.6904297 C186.033203,20.2307943 185.675618,21.4431966 184.960449,22.3276367 C184.24528,23.2120768 183.236003,23.6542969 181.932617,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M191.457031,25 C191.770182,25 192.043132,24.8963216 192.275879,24.6889648 C192.508626,24.4816081 192.625,24.2001953 192.625,23.8447266 L192.625,8.01367188 C192.625,7.65820312 192.512858,7.37890625 192.288574,7.17578125 C192.06429,6.97265625 191.799805,6.87109375 191.495117,6.87109375 C191.181966,6.87109375 190.909017,6.97265625 190.67627,7.17578125 C190.443522,7.37890625 190.327148,7.65820312 190.327148,8.01367188 L190.327148,23.8447266 C190.327148,24.2171224 190.437174,24.5027669 190.657227,24.7016602 C190.877279,24.9005534 191.14388,25 191.457031,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M196.766602,9.44824219 C197.215169,9.44824219 197.57487,9.31917318 197.845703,9.06103516 C198.116536,8.80289714 198.251953,8.46647135 198.251953,8.05175781 C198.251953,7.62858073 198.116536,7.28792318 197.845703,7.02978516 C197.57487,6.77164714 197.219401,6.64257812 196.779297,6.64257812 C196.330729,6.64257812 195.968913,6.77376302 195.693848,7.03613281 C195.418783,7.2985026 195.28125,7.63704427 195.28125,8.05175781 C195.28125,8.46647135 195.416667,8.80289714 195.6875,9.06103516 C195.958333,9.31917318 196.318034,9.44824219 196.766602,9.44824219 Z M196.753906,25 C197.067057,25 197.340007,24.8963216 197.572754,24.6889648 C197.805501,24.4816081 197.921875,24.2001953 197.921875,23.8447266 L197.921875,13.5234375 C197.921875,13.1679688 197.809733,12.8886719 197.585449,12.6855469 C197.361165,12.4824219 197.09668,12.3808594 196.791992,12.3808594 C196.478841,12.3808594 196.205892,12.4824219 195.973145,12.6855469 C195.740397,12.8886719 195.624023,13.1679688 195.624023,13.5234375 L195.624023,23.8447266 C195.624023,24.2171224 195.734049,24.5027669 195.954102,24.7016602 C196.174154,24.9005534 196.440755,25 196.753906,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M201.847656,25 C202.160807,25 202.433757,24.9026693 202.666504,24.7080078 C202.899251,24.5133464 203.015625,24.2467448 203.015625,23.9082031 L203.015625,17.5732422 C203.07487,16.3798828 203.438802,15.4361979 204.107422,14.7421875 C204.776042,14.0481771 205.597005,13.7011719 206.570312,13.7011719 C207.484375,13.7011719 208.193197,13.9847005 208.696777,14.5517578 C209.200358,15.1188151 209.452148,15.9440104 209.452148,17.0273438 L209.452148,23.9082031 C209.452148,24.2552083 209.56429,24.5239258 209.788574,24.7143555 C210.012858,24.9047852 210.281576,25 210.594727,25 C210.907878,25 211.176595,24.9047852 211.400879,24.7143555 C211.625163,24.5239258 211.737305,24.2552083 211.737305,23.9082031 L211.737305,17.0527344 C211.737305,15.4023438 211.322591,14.1708984 210.493164,13.3583984 C209.663737,12.5458984 208.550781,12.1396484 207.154297,12.1396484 C206.18099,12.1396484 205.328288,12.3470052 204.596191,12.7617188 C203.864095,13.1764323 203.33724,13.7688802 203.015625,14.5390625 L203.015625,13.4345703 C203.015625,13.1044922 202.903483,12.8484701 202.679199,12.6665039 C202.454915,12.4845378 202.186198,12.3935547 201.873047,12.3935547 C201.559896,12.3935547 201.289062,12.4887695 201.060547,12.6791992 C200.832031,12.8696289 200.717773,13.1341146 200.717773,13.4726562 L200.717773,23.9082031 C200.717773,24.2552083 200.827799,24.5239258 201.047852,24.7143555 C201.267904,24.9047852 201.534505,25 201.847656,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M219.750977,23.4765625 C218.633789,23.4765625 217.711263,23.0703125 216.983398,22.2578125 C216.255534,21.4453125 215.891602,20.2519531 215.891602,18.6777344 C215.891602,17.7382812 216.022786,16.9046224 216.285156,16.1767578 C216.547526,15.4488932 216.974935,14.8543294 217.567383,14.3930664 C218.159831,13.9318034 218.887695,13.7011719 219.750977,13.7011719 C220.292643,13.7011719 220.779297,13.7921549 221.210938,13.9741211 C221.642578,14.1560872 221.995931,14.4015299 222.270996,14.7104492 C222.546061,15.0193685 222.774577,15.3875326 222.956543,15.8149414 C223.138509,16.2423503 223.267578,16.6866862 223.34375,17.1479492 C223.419922,17.6092122 223.458008,18.1022135 223.458008,18.6269531 C223.458008,20.1588542 223.111003,21.3500977 222.416992,22.2006836 C221.722982,23.0512695 220.83431,23.4765625 219.750977,23.4765625 Z M219.649414,30.3701172 C221.519857,30.3701172 222.994629,29.9130859 224.07373,28.9990234 C225.152832,28.0849609 225.692383,26.7265625 225.692383,24.9238281 L225.692383,13.4599609 C225.692383,13.1214193 225.586589,12.8569336 225.375,12.6665039 C225.163411,12.4760742 224.909505,12.3808594 224.613281,12.3808594 C224.342448,12.3808594 224.103353,12.4633789 223.895996,12.628418 C223.688639,12.793457 223.572266,13.0198568 223.546875,13.3076172 L223.546875,14.5898438 C222.708984,12.9563802 221.304036,12.1396484 219.332031,12.1396484 C218.138672,12.1396484 217.101888,12.4401042 216.22168,13.0410156 C215.341471,13.6419271 214.683431,14.4353841 214.247559,15.4213867 C213.811686,16.4073893 213.59375,17.5182292 213.59375,18.7539062 C213.59375,19.625651 213.722819,20.438151 213.980957,21.1914062 C214.239095,21.9446615 214.607259,22.6048177 215.085449,23.171875 C215.563639,23.7389323 216.173014,24.1853841 216.913574,24.5112305 C217.654134,24.8370768 218.481445,25 219.395508,25 C220.368815,25 221.20459,24.7736003 221.902832,24.3208008 C222.601074,23.8680013 223.123698,23.2565104 223.470703,22.4863281 L223.470703,25 C223.470703,26.21875 223.136393,27.1518555 222.467773,27.7993164 C221.799154,28.4467773 220.808919,28.7705078 219.49707,28.7705078 C219.293945,28.7705078 219.092936,28.7620443 218.894043,28.7451172 C218.69515,28.7281901 218.517415,28.7091471 218.36084,28.6879883 C218.204264,28.6668294 218.030762,28.6350911 217.840332,28.5927734 C217.649902,28.5504557 217.497559,28.5166016 217.383301,28.4912109 C217.269043,28.4658203 217.120931,28.4235026 216.938965,28.3642578 C216.756999,28.305013 216.632161,28.2626953 216.564453,28.2373047 C216.496745,28.2119141 216.376139,28.1653646 216.202637,28.0976562 C216.029134,28.0299479 215.925456,27.991862 215.891602,27.9833984 C215.798503,27.9495443 215.701172,27.9326172 215.599609,27.9326172 C215.388021,27.9326172 215.208171,28.0109049 215.060059,28.1674805 C214.911947,28.324056 214.837891,28.508138 214.837891,28.7197266 C214.837891,29.0328776 214.998698,29.2783203 215.320312,29.4560547 C216.462891,30.0654297 217.905924,30.3701172 219.649414,30.3701172 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M228.729492,27.8691406 C229.144206,27.8691406 229.453125,27.6490885 229.65625,27.2089844 C230.477214,25.2792969 231.114095,23.4977214 231.566895,21.8642578 C232.019694,20.2307943 232.246094,18.6184896 232.246094,17.0273438 C232.246094,15.4446615 232.023926,13.8387044 231.57959,12.2094727 C231.135254,10.5802409 230.511068,8.79231771 229.707031,6.84570312 C229.470052,6.32096354 229.152669,6.05859375 228.754883,6.05859375 C228.50944,6.05859375 228.295736,6.14322917 228.11377,6.3125 C227.931803,6.48177083 227.84082,6.6891276 227.84082,6.93457031 C227.84082,7.06998698 227.874674,7.21809896 227.942383,7.37890625 C229.194987,10.8658854 229.821289,14.0735677 229.821289,17.0019531 C229.821289,19.9726562 229.199219,23.1507161 227.955078,26.5361328 C227.88737,26.7138672 227.853516,26.8704427 227.853516,27.0058594 C227.853516,27.2513021 227.940267,27.456543 228.11377,27.621582 C228.287272,27.7866211 228.492513,27.8691406 228.729492,27.8691406 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-13" transform="translate(0, 172)" xlink:href="#path-34" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M8.07421875,25.2792969 C10.1731771,25.2792969 11.8341471,24.8180339 13.0571289,23.8955078 C14.2801107,22.9729818 14.8916016,21.6822917 14.8916016,20.0234375 C14.8916016,19.3378906 14.7879232,18.7327474 14.5805664,18.2080078 C14.3732096,17.6832682 14.0600586,17.2325846 13.6411133,16.855957 C13.222168,16.4793294 12.7566732,16.1598307 12.2446289,15.8974609 C11.7325846,15.6350911 11.1083984,15.3854167 10.3720703,15.1484375 L6.71582031,13.9677734 C5.85253906,13.680013 5.22412109,13.3393555 4.83056641,12.9458008 C4.43701172,12.5522461 4.24023438,12.0253906 4.24023438,11.3652344 C4.24023438,10.485026 4.62109375,9.81217448 5.3828125,9.34667969 C6.14453125,8.8811849 7.16015625,8.6484375 8.4296875,8.6484375 C9.31835938,8.6484375 10.1541341,8.79020182 10.9370117,9.07373047 C11.7198893,9.35725911 12.3694661,9.66829427 12.8857422,10.0068359 C13.0634766,10.1253255 13.2496745,10.1845703 13.4443359,10.1845703 C13.7151693,10.1845703 13.9479167,10.078776 14.1425781,9.8671875 C14.3372396,9.65559896 14.4345703,9.41861979 14.4345703,9.15625 C14.4345703,8.90234375 14.3372396,8.69075521 14.1425781,8.52148438 C13.5670573,8.00520833 12.7355143,7.55452474 11.6479492,7.16943359 C10.5603841,6.78434245 9.41992188,6.59179688 8.2265625,6.59179688 C7.0078125,6.59179688 5.91389974,6.78222656 4.94482422,7.16308594 C3.9757487,7.54394531 3.20556641,8.10677083 2.63427734,8.8515625 C2.06298828,9.59635417 1.77734375,10.4596354 1.77734375,11.4414062 C1.77734375,12.6601562 2.10742188,13.6165365 2.76757812,14.3105469 C3.42773438,15.0045573 4.43066406,15.5800781 5.77636719,16.0371094 L9.41992188,17.2558594 C9.96158854,17.4420573 10.4016927,17.6155599 10.7402344,17.7763672 C11.078776,17.9371745 11.3813477,18.1360677 11.6479492,18.3730469 C11.9145508,18.610026 12.1049805,18.887207 12.2192383,19.2045898 C12.3334961,19.5219727 12.390625,19.9049479 12.390625,20.3535156 C12.390625,21.2845052 11.9991862,21.9954427 11.2163086,22.4863281 C10.433431,22.9772135 9.42415365,23.2226562 8.18847656,23.2226562 C6.29264323,23.2226562 4.62532552,22.6640625 3.18652344,21.546875 C3.09342448,21.4791667 2.98763021,21.4453125 2.86914062,21.4453125 C2.60677083,21.4453125 2.34651693,21.5891927 2.08837891,21.8769531 C1.83024089,22.1647135 1.70117188,22.4440104 1.70117188,22.7148438 C1.70117188,22.875651 1.75195312,23.0026042 1.85351562,23.0957031 C3.41927083,24.5514323 5.49283854,25.2792969 8.07421875,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M22.3974609,25.2412109 C24.0563151,25.2412109 25.3427734,24.8730469 26.2568359,24.1367188 C26.5445964,23.8997396 26.6884766,23.6416016 26.6884766,23.3623047 C26.6884766,23.1676432 26.6207682,23.0047201 26.4853516,22.8735352 C26.3499349,22.7423503 26.1848958,22.6767578 25.9902344,22.6767578 C25.8125,22.6767578 25.6432292,22.7317708 25.4824219,22.8417969 C24.703776,23.3834635 23.7347005,23.6542969 22.5751953,23.6542969 C21.9150391,23.6542969 21.3310547,23.5167643 20.8232422,23.2416992 C20.3154297,22.9666341 19.9091797,22.5942383 19.6044922,22.1245117 C19.2998047,21.6547852 19.0712891,21.1321615 18.9189453,20.5566406 C18.7666016,19.9811198 18.6904297,19.3717448 18.6904297,18.7285156 C18.6904297,17.188151 19.0670573,15.9630534 19.8203125,15.0532227 C20.5735677,14.1433919 21.5553385,13.6884766 22.765625,13.6884766 C23.6966146,13.6884766 24.5472005,13.938151 25.3173828,14.4375 C25.4866536,14.547526 25.664388,14.6025391 25.8505859,14.6025391 C26.0537109,14.6025391 26.2250977,14.5390625 26.3647461,14.4121094 C26.5043945,14.2851562 26.5742188,14.1285807 26.5742188,13.9423828 C26.5742188,13.680013 26.4303385,13.4345703 26.1425781,13.2060547 C25.7786458,12.9013672 25.2877604,12.6474609 24.6699219,12.4443359 C24.0520833,12.2412109 23.3792318,12.1396484 22.6513672,12.1396484 C21.4072266,12.1396484 20.3069661,12.4316406 19.3505859,13.015625 C18.3942057,13.5996094 17.6621094,14.3867188 17.1542969,15.3769531 C16.6464844,16.3671875 16.3925781,17.4716797 16.3925781,18.6904297 C16.3925781,20.5777995 16.9342448,22.1414388 18.0175781,23.3813477 C19.1009115,24.6212565 20.5608724,25.2412109 22.3974609,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M33.9277344,25.2285156 C35.8828125,25.2285156 37.4316406,24.6149089 38.5742188,23.3876953 C39.7167969,22.1604818 40.2880859,20.5947266 40.2880859,18.6904297 C40.2880859,16.7692057 39.714681,15.1971029 38.5678711,13.9741211 C37.4210612,12.7511393 35.874349,12.1396484 33.9277344,12.1396484 C31.9811198,12.1396484 30.4344076,12.7532552 29.2875977,13.9804688 C28.1407878,15.2076823 27.5673828,16.7776693 27.5673828,18.6904297 C27.5673828,20.5947266 28.1407878,22.1604818 29.2875977,23.3876953 C30.4344076,24.6149089 31.9811198,25.2285156 33.9277344,25.2285156 Z M33.9023438,23.6542969 C32.6328125,23.6542969 31.6404622,23.2078451 30.925293,22.3149414 C30.2101237,21.4220378 29.8525391,20.2138672 29.8525391,18.6904297 C29.8525391,17.1500651 30.2122396,15.933431 30.9316406,15.0405273 C31.6510417,14.1476237 32.6497396,13.7011719 33.9277344,13.7011719 C35.1972656,13.7011719 36.1938477,14.1497396 36.9174805,15.046875 C37.6411133,15.9440104 38.0029297,17.1585286 38.0029297,18.6904297 C38.0029297,20.2307943 37.6453451,21.4431966 36.9301758,22.3276367 C36.2150065,23.2120768 35.2057292,23.6542969 33.9023438,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M43.2871094,25 C43.6002604,25 43.8732096,24.906901 44.105957,24.7207031 C44.3387044,24.5345052 44.4550781,24.2721354 44.4550781,23.9335938 L44.4550781,17.3955078 C44.4550781,16.4052734 44.7703451,15.6139323 45.4008789,15.0214844 C46.0314128,14.4290365 46.9179688,14.1328125 48.0605469,14.1328125 C48.3313802,14.1328125 48.538737,14.0418294 48.6826172,13.8598633 C48.8264974,13.6778971 48.8984375,13.4599609 48.8984375,13.2060547 C48.8984375,12.9352214 48.8201497,12.6982422 48.6635742,12.4951172 C48.5069987,12.2919922 48.2932943,12.1904297 48.0224609,12.1904297 C47.1253255,12.1904297 46.3572591,12.4549154 45.7182617,12.9838867 C45.0792643,13.5128581 44.6539714,14.1708984 44.4423828,14.9580078 L44.4550781,13.4345703 C44.4550781,13.1129557 44.3429362,12.8611654 44.1186523,12.6791992 C43.8943685,12.4972331 43.625651,12.40625 43.3125,12.40625 C42.999349,12.40625 42.7285156,12.4972331 42.5,12.6791992 C42.2714844,12.8611654 42.1572266,13.1214193 42.1572266,13.4599609 L42.1572266,23.9335938 C42.1572266,24.2721354 42.2672526,24.5345052 42.4873047,24.7207031 C42.7073568,24.906901 42.9739583,25 43.2871094,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M51.9355469,17.7382812 C51.9778646,17.2135417 52.0900065,16.7163086 52.2719727,16.246582 C52.4539388,15.7768555 52.6993815,15.3494466 53.0083008,14.9643555 C53.3172201,14.5792643 53.7128906,14.2724609 54.1953125,14.0439453 C54.6777344,13.8154297 55.2151693,13.7011719 55.8076172,13.7011719 C56.9501953,13.7011719 57.8177083,14.0777995 58.4101562,14.8310547 C59.0026042,15.5843099 59.3369141,16.5533854 59.4130859,17.7382812 L51.9355469,17.7382812 Z M55.8457031,25.2285156 C57.7415365,25.2285156 59.3580729,24.6276042 60.6953125,23.4257812 C60.9322917,23.2141927 61.0507812,22.9729818 61.0507812,22.7021484 C61.0507812,22.4905599 60.980957,22.3064779 60.8413086,22.1499023 C60.7016602,21.9933268 60.5302734,21.9150391 60.3271484,21.9150391 C60.1578776,21.9150391 59.9970703,21.9742839 59.8447266,22.0927734 C59.2353516,22.5582682 58.6302083,22.9264323 58.0292969,23.1972656 C57.4283854,23.468099 56.7513021,23.6035156 55.9980469,23.6035156 C54.8046875,23.5865885 53.8271484,23.2036133 53.0654297,22.4545898 C52.3037109,21.7055664 51.914388,20.6031901 51.8974609,19.1474609 L60.6064453,19.1474609 C60.8772786,19.1474609 61.0804036,19.0670573 61.2158203,18.90625 C61.351237,18.7454427 61.4189453,18.5423177 61.4189453,18.296875 C61.3935547,17.4420573 61.2708333,16.6570638 61.0507812,15.9418945 C60.8307292,15.2267253 60.500651,14.5792643 60.0605469,13.9995117 C59.6204427,13.4197591 59.0279948,12.9648438 58.2832031,12.6347656 C57.5384115,12.3046875 56.6708984,12.1396484 55.6806641,12.1396484 C54.4280599,12.1396484 53.3320312,12.4443359 52.3925781,13.0537109 C51.453125,13.6630859 50.750651,14.4544271 50.2851562,15.4277344 C49.8196615,16.4010417 49.5869141,17.4759115 49.5869141,18.6523438 C49.5869141,19.9895833 49.8640951,21.159668 50.418457,22.1625977 C50.972819,23.1655273 51.7197266,23.9251302 52.6591797,24.4414062 C53.5986328,24.9576823 54.6608073,25.2200521 55.8457031,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M82.9560547,24.2382812 C83.1676432,24.796875 83.5569661,25.0761719 84.1240234,25.0761719 C84.4541016,25.0761719 84.7503255,24.9703776 85.0126953,24.7587891 C85.2750651,24.5472005 85.40625,24.280599 85.40625,23.9589844 C85.40625,23.8235677 85.3723958,23.6669922 85.3046875,23.4892578 L79.2744141,8.52148438 C78.9866536,7.80208333 78.6756185,7.32600911 78.3413086,7.09326172 C78.0069987,6.86051432 77.5901693,6.74414062 77.0908203,6.74414062 C76.5914714,6.74414062 76.1746419,6.86051432 75.840332,7.09326172 C75.5060221,7.32600911 75.194987,7.80208333 74.9072266,8.52148438 L68.8769531,23.4892578 C68.8092448,23.6669922 68.7753906,23.8235677 68.7753906,23.9589844 C68.7753906,24.280599 68.9044596,24.5472005 69.1625977,24.7587891 C69.4207357,24.9703776 69.7148438,25.0761719 70.0449219,25.0761719 C70.6204427,25.0761719 71.0139974,24.796875 71.2255859,24.2382812 L72.7236328,20.3789062 L81.4580078,20.3789062 L82.9560547,24.2382812 Z M80.734375,18.5380859 L73.4472656,18.5380859 L77.0908203,9.15625 L80.734375,18.5380859 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M89.9667969,15.2373047 L89.9667969,8.88964844 L93.9658203,8.88964844 C95.3623047,8.88964844 96.3736979,9.1266276 97,9.60058594 C97.6263021,10.0745443 97.9394531,10.8828125 97.9394531,12.0253906 C97.9394531,13.1933594 97.6072591,14.0206706 96.9428711,14.5073242 C96.2784831,14.9939779 95.2395833,15.2373047 93.8261719,15.2373047 L89.9667969,15.2373047 Z M88.6845703,25 C89.0400391,25 89.3426107,24.8942057 89.5922852,24.6826172 C89.8419596,24.4710286 89.9667969,24.1705729 89.9667969,23.78125 L89.9667969,17.1923828 L93.8388672,17.1923828 C94.8460286,17.1923828 95.7389323,17.1035156 96.5175781,16.9257812 C97.296224,16.7480469 97.9881185,16.4666341 98.5932617,16.081543 C99.1984049,15.6964518 99.6617839,15.1632487 99.9833984,14.4819336 C100.305013,13.8006185 100.46582,12.9817708 100.46582,12.0253906 C100.46582,10.1888021 99.9199219,8.86848958 98.828125,8.06445312 C97.7363281,7.26041667 96.1494141,6.85839844 94.0673828,6.85839844 L88.6972656,6.85839844 C88.3333333,6.85839844 88.0265299,6.98111979 87.7768555,7.2265625 C87.527181,7.47200521 87.4023438,7.77246094 87.4023438,8.12792969 L87.4023438,23.78125 C87.4023438,24.1621094 87.5292969,24.4604492 87.7832031,24.6762695 C88.0371094,24.8920898 88.3375651,25 88.6845703,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M103.655273,24.9873047 C104.010742,24.9873047 104.31543,24.8815104 104.569336,24.6699219 C104.823242,24.4583333 104.950195,24.1578776 104.950195,23.7685547 L104.950195,8.07714844 C104.950195,7.68782552 104.825358,7.38525391 104.575684,7.16943359 C104.326009,6.95361328 104.027669,6.84570312 103.680664,6.84570312 C103.333659,6.84570312 103.031087,6.95572917 102.772949,7.17578125 C102.514811,7.39583333 102.385742,7.69628906 102.385742,8.07714844 L102.385742,23.7685547 C102.385742,24.1494141 102.510579,24.4477539 102.760254,24.6635742 C103.009928,24.8793945 103.308268,24.9873047 103.655273,24.9873047 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M118.336914,27.8564453 C118.582357,27.8564453 118.791829,27.7760417 118.965332,27.6152344 C119.138835,27.4544271 119.225586,27.2470703 119.225586,26.9931641 C119.225586,26.8746745 119.191732,26.7223307 119.124023,26.5361328 C117.879883,23.0999349 117.257812,19.921875 117.257812,17.0019531 C117.257812,14.0481771 117.888346,10.8404948 119.149414,7.37890625 C119.208659,7.23502604 119.238281,7.08691406 119.238281,6.93457031 C119.238281,6.68066406 119.149414,6.46907552 118.97168,6.29980469 C118.793945,6.13053385 118.582357,6.04589844 118.336914,6.04589844 C117.905273,6.04589844 117.583659,6.3125 117.37207,6.84570312 C116.568034,8.79231771 115.943848,10.5802409 115.499512,12.2094727 C115.055176,13.8387044 114.833008,15.4404297 114.833008,17.0146484 C114.833008,18.6142578 115.059408,20.2286784 115.512207,21.8579102 C115.965007,23.4871419 116.601888,25.2708333 117.422852,27.2089844 C117.617513,27.640625 117.922201,27.8564453 118.336914,27.8564453 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M121.234375,27.1074219 C121.666016,27.1074219 121.96224,26.8873698 122.123047,26.4472656 L128.267578,8.02636719 C128.309896,7.87402344 128.331055,7.75130208 128.331055,7.65820312 C128.331055,7.39583333 128.23584,7.18424479 128.04541,7.0234375 C127.85498,6.86263021 127.632812,6.78222656 127.378906,6.78222656 C126.904948,6.78222656 126.596029,7.00227865 126.452148,7.44238281 L120.358398,25.8759766 C120.316081,26.0283203 120.294922,26.1468099 120.294922,26.2314453 C120.294922,26.4938151 120.388021,26.7054036 120.574219,26.8662109 C120.760417,27.0270182 120.980469,27.1074219 121.234375,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M129.895508,23.8574219 C130.40332,24.2298177 131.08252,24.5535482 131.933105,24.8286133 C132.783691,25.1036784 133.661784,25.2412109 134.567383,25.2412109 C135.54069,25.2412109 136.395508,25.1079102 137.131836,24.8413086 C137.868164,24.574707 138.462728,24.1451823 138.915527,23.5527344 C139.368327,22.9602865 139.594727,22.2324219 139.594727,21.3691406 C139.594727,20.4550781 139.321777,19.7039388 138.775879,19.1157227 C138.22998,18.5275065 137.318034,18.0768229 136.040039,17.7636719 L133.84375,17.2177734 C132.921224,16.9892578 132.320312,16.7713216 132.041016,16.5639648 C131.761719,16.3566081 131.62207,15.9990234 131.62207,15.4912109 C131.62207,14.8649089 131.892904,14.4057617 132.43457,14.1137695 C132.976237,13.8217773 133.704102,13.6757812 134.618164,13.6757812 C134.905924,13.6757812 135.189453,13.6948242 135.46875,13.7329102 C135.748047,13.7709961 136.023112,13.8260091 136.293945,13.8979492 C136.564779,13.9698893 136.778483,14.03125 136.935059,14.0820312 C137.091634,14.1328125 137.294759,14.2068685 137.544434,14.3041992 C137.794108,14.4015299 137.931641,14.4544271 137.957031,14.4628906 C138.092448,14.5136719 138.223633,14.5390625 138.350586,14.5390625 C138.570638,14.5390625 138.746257,14.4692383 138.877441,14.3295898 C139.008626,14.1899414 139.074219,14.0227865 139.074219,13.828125 C139.074219,13.4980469 138.90918,13.2483724 138.579102,13.0791016 C138.12207,12.8336589 137.531738,12.6114909 136.808105,12.4125977 C136.084473,12.2137044 135.312174,12.1142578 134.491211,12.1142578 C133.805664,12.1142578 133.170898,12.1798503 132.586914,12.3110352 C132.00293,12.4422201 131.476074,12.6411133 131.006348,12.9077148 C130.536621,13.1743164 130.166341,13.5361328 129.895508,13.9931641 C129.624674,14.4501953 129.489258,14.9791667 129.489258,15.5800781 C129.489258,15.9609375 129.527344,16.2994792 129.603516,16.5957031 C129.679688,16.8919271 129.802409,17.1500651 129.97168,17.3701172 C130.140951,17.5901693 130.322917,17.7784831 130.517578,17.9350586 C130.71224,18.0916341 130.972493,18.2376302 131.29834,18.3730469 C131.624186,18.5084635 131.928874,18.6184896 132.212402,18.703125 C132.495931,18.7877604 132.866211,18.8893229 133.323242,19.0078125 L135.570312,19.5664062 C136.315104,19.7526042 136.848307,19.9980469 137.169922,20.3027344 C137.491536,20.6074219 137.652344,21.0136719 137.652344,21.5214844 C137.652344,22.2324219 137.366699,22.7719727 136.79541,23.1401367 C136.224121,23.5083008 135.472982,23.6923828 134.541992,23.6923828 C133.24707,23.6839193 132.019857,23.319987 130.860352,22.6005859 C130.674154,22.4820964 130.479492,22.4228516 130.276367,22.4228516 C130.056315,22.4228516 129.874349,22.4969076 129.730469,22.6450195 C129.586589,22.7931315 129.514648,22.96875 129.514648,23.171875 C129.514648,23.4511719 129.641602,23.6796875 129.895508,23.8574219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M146.897461,25.2412109 C148.556315,25.2412109 149.842773,24.8730469 150.756836,24.1367188 C151.044596,23.8997396 151.188477,23.6416016 151.188477,23.3623047 C151.188477,23.1676432 151.120768,23.0047201 150.985352,22.8735352 C150.849935,22.7423503 150.684896,22.6767578 150.490234,22.6767578 C150.3125,22.6767578 150.143229,22.7317708 149.982422,22.8417969 C149.203776,23.3834635 148.234701,23.6542969 147.075195,23.6542969 C146.415039,23.6542969 145.831055,23.5167643 145.323242,23.2416992 C144.81543,22.9666341 144.40918,22.5942383 144.104492,22.1245117 C143.799805,21.6547852 143.571289,21.1321615 143.418945,20.5566406 C143.266602,19.9811198 143.19043,19.3717448 143.19043,18.7285156 C143.19043,17.188151 143.567057,15.9630534 144.320312,15.0532227 C145.073568,14.1433919 146.055339,13.6884766 147.265625,13.6884766 C148.196615,13.6884766 149.047201,13.938151 149.817383,14.4375 C149.986654,14.547526 150.164388,14.6025391 150.350586,14.6025391 C150.553711,14.6025391 150.725098,14.5390625 150.864746,14.4121094 C151.004395,14.2851562 151.074219,14.1285807 151.074219,13.9423828 C151.074219,13.680013 150.930339,13.4345703 150.642578,13.2060547 C150.278646,12.9013672 149.78776,12.6474609 149.169922,12.4443359 C148.552083,12.2412109 147.879232,12.1396484 147.151367,12.1396484 C145.907227,12.1396484 144.806966,12.4316406 143.850586,13.015625 C142.894206,13.5996094 142.162109,14.3867188 141.654297,15.3769531 C141.146484,16.3671875 140.892578,17.4716797 140.892578,18.6904297 C140.892578,20.5777995 141.434245,22.1414388 142.517578,23.3813477 C143.600911,24.6212565 145.060872,25.2412109 146.897461,25.2412109 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M158.427734,25.2285156 C160.382812,25.2285156 161.931641,24.6149089 163.074219,23.3876953 C164.216797,22.1604818 164.788086,20.5947266 164.788086,18.6904297 C164.788086,16.7692057 164.214681,15.1971029 163.067871,13.9741211 C161.921061,12.7511393 160.374349,12.1396484 158.427734,12.1396484 C156.48112,12.1396484 154.934408,12.7532552 153.787598,13.9804688 C152.640788,15.2076823 152.067383,16.7776693 152.067383,18.6904297 C152.067383,20.5947266 152.640788,22.1604818 153.787598,23.3876953 C154.934408,24.6149089 156.48112,25.2285156 158.427734,25.2285156 Z M158.402344,23.6542969 C157.132812,23.6542969 156.140462,23.2078451 155.425293,22.3149414 C154.710124,21.4220378 154.352539,20.2138672 154.352539,18.6904297 C154.352539,17.1500651 154.71224,15.933431 155.431641,15.0405273 C156.151042,14.1476237 157.14974,13.7011719 158.427734,13.7011719 C159.697266,13.7011719 160.693848,14.1497396 161.41748,15.046875 C162.141113,15.9440104 162.50293,17.1585286 162.50293,18.6904297 C162.50293,20.2307943 162.145345,21.4431966 161.430176,22.3276367 C160.715007,23.2120768 159.705729,23.6542969 158.402344,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M167.787109,25 C168.10026,25 168.37321,24.906901 168.605957,24.7207031 C168.838704,24.5345052 168.955078,24.2721354 168.955078,23.9335938 L168.955078,17.3955078 C168.955078,16.4052734 169.270345,15.6139323 169.900879,15.0214844 C170.531413,14.4290365 171.417969,14.1328125 172.560547,14.1328125 C172.83138,14.1328125 173.038737,14.0418294 173.182617,13.8598633 C173.326497,13.6778971 173.398438,13.4599609 173.398438,13.2060547 C173.398438,12.9352214 173.32015,12.6982422 173.163574,12.4951172 C173.006999,12.2919922 172.793294,12.1904297 172.522461,12.1904297 C171.625326,12.1904297 170.857259,12.4549154 170.218262,12.9838867 C169.579264,13.5128581 169.153971,14.1708984 168.942383,14.9580078 L168.955078,13.4345703 C168.955078,13.1129557 168.842936,12.8611654 168.618652,12.6791992 C168.394368,12.4972331 168.125651,12.40625 167.8125,12.40625 C167.499349,12.40625 167.228516,12.4972331 167,12.6791992 C166.771484,12.8611654 166.657227,13.1214193 166.657227,13.4599609 L166.657227,23.9335938 C166.657227,24.2721354 166.767253,24.5345052 166.987305,24.7207031 C167.207357,24.906901 167.473958,25 167.787109,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M176.435547,17.7382812 C176.477865,17.2135417 176.590007,16.7163086 176.771973,16.246582 C176.953939,15.7768555 177.199382,15.3494466 177.508301,14.9643555 C177.81722,14.5792643 178.212891,14.2724609 178.695312,14.0439453 C179.177734,13.8154297 179.715169,13.7011719 180.307617,13.7011719 C181.450195,13.7011719 182.317708,14.0777995 182.910156,14.8310547 C183.502604,15.5843099 183.836914,16.5533854 183.913086,17.7382812 L176.435547,17.7382812 Z M180.345703,25.2285156 C182.241536,25.2285156 183.858073,24.6276042 185.195312,23.4257812 C185.432292,23.2141927 185.550781,22.9729818 185.550781,22.7021484 C185.550781,22.4905599 185.480957,22.3064779 185.341309,22.1499023 C185.20166,21.9933268 185.030273,21.9150391 184.827148,21.9150391 C184.657878,21.9150391 184.49707,21.9742839 184.344727,22.0927734 C183.735352,22.5582682 183.130208,22.9264323 182.529297,23.1972656 C181.928385,23.468099 181.251302,23.6035156 180.498047,23.6035156 C179.304688,23.5865885 178.327148,23.2036133 177.56543,22.4545898 C176.803711,21.7055664 176.414388,20.6031901 176.397461,19.1474609 L185.106445,19.1474609 C185.377279,19.1474609 185.580404,19.0670573 185.71582,18.90625 C185.851237,18.7454427 185.918945,18.5423177 185.918945,18.296875 C185.893555,17.4420573 185.770833,16.6570638 185.550781,15.9418945 C185.330729,15.2267253 185.000651,14.5792643 184.560547,13.9995117 C184.120443,13.4197591 183.527995,12.9648438 182.783203,12.6347656 C182.038411,12.3046875 181.170898,12.1396484 180.180664,12.1396484 C178.92806,12.1396484 177.832031,12.4443359 176.892578,13.0537109 C175.953125,13.6630859 175.250651,14.4544271 174.785156,15.4277344 C174.319661,16.4010417 174.086914,17.4759115 174.086914,18.6523438 C174.086914,19.9895833 174.364095,21.159668 174.918457,22.1625977 C175.472819,23.1655273 176.219727,23.9251302 177.15918,24.4414062 C178.098633,24.9576823 179.160807,25.2200521 180.345703,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M188.397461,27.8691406 C188.812174,27.8691406 189.121094,27.6490885 189.324219,27.2089844 C190.145182,25.2792969 190.782064,23.4977214 191.234863,21.8642578 C191.687663,20.2307943 191.914062,18.6184896 191.914062,17.0273438 C191.914062,15.4446615 191.691895,13.8387044 191.247559,12.2094727 C190.803223,10.5802409 190.179036,8.79231771 189.375,6.84570312 C189.138021,6.32096354 188.820638,6.05859375 188.422852,6.05859375 C188.177409,6.05859375 187.963704,6.14322917 187.781738,6.3125 C187.599772,6.48177083 187.508789,6.6891276 187.508789,6.93457031 C187.508789,7.06998698 187.542643,7.21809896 187.610352,7.37890625 C188.862956,10.8658854 189.489258,14.0735677 189.489258,17.0019531 C189.489258,19.9726562 188.867188,23.1507161 187.623047,26.5361328 C187.555339,26.7138672 187.521484,26.8704427 187.521484,27.0058594 C187.521484,27.2513021 187.608236,27.456543 187.781738,27.621582 C187.955241,27.7866211 188.160482,27.8691406 188.397461,27.8691406 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="List-Item-/-01_@text_#333-Copy-12" transform="translate(0, 212)" xlink:href="#path-35" fill="#333333">
+                    <g id="Text" transform="translate(16, 4)">
+                        <path d="M9.953125,25.2792969 C11.874349,25.2792969 13.5416667,24.8180339 14.9550781,23.8955078 C15.3782552,23.6246745 15.5898438,23.2945964 15.5898438,22.9052734 C15.5898438,22.6429036 15.4967448,22.414388 15.3105469,22.2197266 C15.124349,22.0250651 14.9000651,21.9277344 14.6376953,21.9277344 C14.4853516,21.9277344 14.3245443,21.9742839 14.1552734,22.0673828 C13.4951172,22.4397786 12.8815104,22.7254232 12.3144531,22.9243164 C11.7473958,23.1232096 11.0872396,23.2226562 10.3339844,23.2226562 C9.49609375,23.2226562 8.73649089,23.0957031 8.05517578,22.8417969 C7.37386068,22.5878906 6.80257161,22.2408854 6.34130859,21.8007812 C5.88004557,21.3606771 5.49283854,20.8338216 5.1796875,20.2202148 C4.86653646,19.6066081 4.63802083,18.9443359 4.49414062,18.2333984 C4.35026042,17.5224609 4.27832031,16.764974 4.27832031,15.9609375 C4.27832031,15.156901 4.35026042,14.3972982 4.49414062,13.6821289 C4.63802083,12.9669596 4.86653646,12.3004557 5.1796875,11.6826172 C5.49283854,11.0647786 5.87792969,10.5315755 6.33496094,10.0830078 C6.79199219,9.6344401 7.35481771,9.28320312 8.0234375,9.02929688 C8.69205729,8.77539062 9.43684896,8.6484375 10.2578125,8.6484375 C10.5963542,8.6484375 10.9243164,8.66536458 11.2416992,8.69921875 C11.559082,8.73307292 11.8235677,8.77327474 12.0351562,8.81982422 C12.2467448,8.8663737 12.4667969,8.92985026 12.6953125,9.01025391 C12.9238281,9.09065755 13.093099,9.15413411 13.203125,9.20068359 C13.313151,9.24723307 13.4549154,9.31494141 13.628418,9.40380859 C13.8019206,9.49267578 13.905599,9.54557292 13.9394531,9.5625 C14.0917969,9.63020833 14.2314453,9.6640625 14.3583984,9.6640625 C14.6207682,9.6640625 14.8450521,9.5625 15.03125,9.359375 C15.2174479,9.15625 15.3105469,8.91927083 15.3105469,8.6484375 C15.3105469,8.26757812 15.124349,7.97558594 14.7519531,7.77246094 C13.3385417,6.98535156 11.8108724,6.59179688 10.1689453,6.59179688 C8.84016927,6.59179688 7.62988281,6.83723958 6.53808594,7.328125 C5.44628906,7.81901042 4.5406901,8.48974609 3.82128906,9.34033203 C3.10188802,10.190918 2.54541016,11.1875 2.15185547,12.3300781 C1.75830078,13.4726562 1.56152344,14.695638 1.56152344,15.9990234 C1.56152344,17.7509766 1.88313802,19.3209635 2.52636719,20.7089844 C3.16959635,22.0970052 4.1344401,23.2057292 5.42089844,24.0351562 C6.70735677,24.8645833 8.21809896,25.2792969 9.953125,25.2792969 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M23.6162109,25.2285156 C25.5712891,25.2285156 27.1201172,24.6149089 28.2626953,23.3876953 C29.4052734,22.1604818 29.9765625,20.5947266 29.9765625,18.6904297 C29.9765625,16.7692057 29.4031576,15.1971029 28.2563477,13.9741211 C27.1095378,12.7511393 25.5628255,12.1396484 23.6162109,12.1396484 C21.6695964,12.1396484 20.1228841,12.7532552 18.9760742,13.9804688 C17.8292643,15.2076823 17.2558594,16.7776693 17.2558594,18.6904297 C17.2558594,20.5947266 17.8292643,22.1604818 18.9760742,23.3876953 C20.1228841,24.6149089 21.6695964,25.2285156 23.6162109,25.2285156 Z M23.5908203,23.6542969 C22.3212891,23.6542969 21.3289388,23.2078451 20.6137695,22.3149414 C19.8986003,21.4220378 19.5410156,20.2138672 19.5410156,18.6904297 C19.5410156,17.1500651 19.9007161,15.933431 20.6201172,15.0405273 C21.3395182,14.1476237 22.3382161,13.7011719 23.6162109,13.7011719 C24.8857422,13.7011719 25.8823242,14.1497396 26.605957,15.046875 C27.3295898,15.9440104 27.6914062,17.1585286 27.6914062,18.6904297 C27.6914062,20.2307943 27.3338216,21.4431966 26.6186523,22.3276367 C25.9034831,23.2120768 24.8942057,23.6542969 23.5908203,23.6542969 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M32.9501953,25 C33.2633464,25 33.5362956,24.9026693 33.769043,24.7080078 C34.0017904,24.5133464 34.1181641,24.2467448 34.1181641,23.9082031 L34.1181641,17.7509766 C34.1181641,16.5068359 34.4736328,15.5441081 35.1845703,14.862793 C35.8955078,14.1814779 36.7714844,13.8408203 37.8125,13.8408203 C38.6673177,13.8408203 39.3380534,14.0904948 39.824707,14.5898438 C40.3113607,15.0891927 40.5546875,15.8424479 40.5546875,16.8496094 L40.5546875,23.9082031 C40.5546875,24.2467448 40.6668294,24.5133464 40.8911133,24.7080078 C41.1153971,24.9026693 41.3798828,25 41.6845703,25 C41.9977214,25 42.2685547,24.9026693 42.4970703,24.7080078 C42.7255859,24.5133464 42.8398438,24.2467448 42.8398438,23.9082031 L42.8398438,16.9765625 C42.8398438,16.1386719 42.7192383,15.4023438 42.4780273,14.7675781 C42.2368164,14.1328125 41.9025065,13.6271159 41.4750977,13.2504883 C41.0476888,12.8738607 40.5694987,12.5945638 40.0405273,12.4125977 C39.511556,12.2306315 38.9339193,12.1396484 38.3076172,12.1396484 C37.3766276,12.1396484 36.5302734,12.3364258 35.7685547,12.7299805 C35.0068359,13.1235352 34.4567057,13.6673177 34.1181641,14.3613281 L34.1181641,7.92480469 C34.1181641,7.58626302 34.0060221,7.32600911 33.7817383,7.14404297 C33.5574544,6.96207682 33.288737,6.87109375 32.9755859,6.87109375 C32.6624349,6.87109375 32.3916016,6.96419271 32.1630859,7.15039062 C31.9345703,7.33658854 31.8203125,7.59472656 31.8203125,7.92480469 L31.8203125,23.9082031 C31.8203125,24.2467448 31.9324544,24.5133464 32.1567383,24.7080078 C32.3810221,24.9026693 32.6455078,25 32.9501953,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M47.0830078,17.7382812 C47.1253255,17.2135417 47.2374674,16.7163086 47.4194336,16.246582 C47.6013997,15.7768555 47.8468424,15.3494466 48.1557617,14.9643555 C48.464681,14.5792643 48.8603516,14.2724609 49.3427734,14.0439453 C49.8251953,13.8154297 50.3626302,13.7011719 50.9550781,13.7011719 C52.0976562,13.7011719 52.9651693,14.0777995 53.5576172,14.8310547 C54.1500651,15.5843099 54.484375,16.5533854 54.5605469,17.7382812 L47.0830078,17.7382812 Z M50.9931641,25.2285156 C52.8889974,25.2285156 54.5055339,24.6276042 55.8427734,23.4257812 C56.0797526,23.2141927 56.1982422,22.9729818 56.1982422,22.7021484 C56.1982422,22.4905599 56.128418,22.3064779 55.9887695,22.1499023 C55.8491211,21.9933268 55.6777344,21.9150391 55.4746094,21.9150391 C55.3053385,21.9150391 55.1445312,21.9742839 54.9921875,22.0927734 C54.3828125,22.5582682 53.7776693,22.9264323 53.1767578,23.1972656 C52.5758464,23.468099 51.898763,23.6035156 51.1455078,23.6035156 C49.9521484,23.5865885 48.9746094,23.2036133 48.2128906,22.4545898 C47.4511719,21.7055664 47.061849,20.6031901 47.0449219,19.1474609 L55.7539062,19.1474609 C56.0247396,19.1474609 56.2278646,19.0670573 56.3632812,18.90625 C56.4986979,18.7454427 56.5664062,18.5423177 56.5664062,18.296875 C56.5410156,17.4420573 56.4182943,16.6570638 56.1982422,15.9418945 C55.9781901,15.2267253 55.648112,14.5792643 55.2080078,13.9995117 C54.7679036,13.4197591 54.1754557,12.9648438 53.4306641,12.6347656 C52.6858724,12.3046875 51.8183594,12.1396484 50.828125,12.1396484 C49.5755208,12.1396484 48.4794922,12.4443359 47.5400391,13.0537109 C46.6005859,13.6630859 45.898112,14.4544271 45.4326172,15.4277344 C44.9671224,16.4010417 44.734375,17.4759115 44.734375,18.6523438 C44.734375,19.9895833 45.011556,21.159668 45.565918,22.1625977 C46.1202799,23.1655273 46.8671875,23.9251302 47.8066406,24.4414062 C48.7460938,24.9576823 49.8082682,25.2200521 50.9931641,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M59.8828125,25 C60.1959635,25 60.4689128,24.906901 60.7016602,24.7207031 C60.9344076,24.5345052 61.0507812,24.2721354 61.0507812,23.9335938 L61.0507812,17.3955078 C61.0507812,16.4052734 61.3660482,15.6139323 61.996582,15.0214844 C62.6271159,14.4290365 63.5136719,14.1328125 64.65625,14.1328125 C64.9270833,14.1328125 65.1344401,14.0418294 65.2783203,13.8598633 C65.4222005,13.6778971 65.4941406,13.4599609 65.4941406,13.2060547 C65.4941406,12.9352214 65.4158529,12.6982422 65.2592773,12.4951172 C65.1027018,12.2919922 64.8889974,12.1904297 64.6181641,12.1904297 C63.7210286,12.1904297 62.9529622,12.4549154 62.3139648,12.9838867 C61.6749674,13.5128581 61.2496745,14.1708984 61.0380859,14.9580078 L61.0507812,13.4345703 C61.0507812,13.1129557 60.9386393,12.8611654 60.7143555,12.6791992 C60.4900716,12.4972331 60.2213542,12.40625 59.9082031,12.40625 C59.5950521,12.40625 59.3242188,12.4972331 59.0957031,12.6791992 C58.8671875,12.8611654 58.7529297,13.1214193 58.7529297,13.4599609 L58.7529297,23.9335938 C58.7529297,24.2721354 58.8629557,24.5345052 59.0830078,24.7207031 C59.3030599,24.906901 59.5696615,25 59.8828125,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M68.53125,17.7382812 C68.5735677,17.2135417 68.6857096,16.7163086 68.8676758,16.246582 C69.0496419,15.7768555 69.2950846,15.3494466 69.6040039,14.9643555 C69.9129232,14.5792643 70.3085938,14.2724609 70.7910156,14.0439453 C71.2734375,13.8154297 71.8108724,13.7011719 72.4033203,13.7011719 C73.5458984,13.7011719 74.4134115,14.0777995 75.0058594,14.8310547 C75.5983073,15.5843099 75.9326172,16.5533854 76.0087891,17.7382812 L68.53125,17.7382812 Z M72.4414062,25.2285156 C74.3372396,25.2285156 75.953776,24.6276042 77.2910156,23.4257812 C77.5279948,23.2141927 77.6464844,22.9729818 77.6464844,22.7021484 C77.6464844,22.4905599 77.5766602,22.3064779 77.4370117,22.1499023 C77.2973633,21.9933268 77.1259766,21.9150391 76.9228516,21.9150391 C76.7535807,21.9150391 76.5927734,21.9742839 76.4404297,22.0927734 C75.8310547,22.5582682 75.2259115,22.9264323 74.625,23.1972656 C74.0240885,23.468099 73.3470052,23.6035156 72.59375,23.6035156 C71.4003906,23.5865885 70.4228516,23.2036133 69.6611328,22.4545898 C68.8994141,21.7055664 68.5100911,20.6031901 68.4931641,19.1474609 L77.2021484,19.1474609 C77.4729818,19.1474609 77.6761068,19.0670573 77.8115234,18.90625 C77.9469401,18.7454427 78.0146484,18.5423177 78.0146484,18.296875 C77.9892578,17.4420573 77.8665365,16.6570638 77.6464844,15.9418945 C77.4264323,15.2267253 77.0963542,14.5792643 76.65625,13.9995117 C76.2161458,13.4197591 75.6236979,12.9648438 74.8789062,12.6347656 C74.1341146,12.3046875 73.2666016,12.1396484 72.2763672,12.1396484 C71.023763,12.1396484 69.9277344,12.4443359 68.9882812,13.0537109 C68.0488281,13.6630859 67.3463542,14.4544271 66.8808594,15.4277344 C66.4153646,16.4010417 66.1826172,17.4759115 66.1826172,18.6523438 C66.1826172,19.9895833 66.4597982,21.159668 67.0141602,22.1625977 C67.5685221,23.1655273 68.3154297,23.9251302 69.2548828,24.4414062 C70.1943359,24.9576823 71.2565104,25.2200521 72.4414062,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M89.4335938,14.7294922 L89.4335938,8.88964844 L93.4707031,8.88964844 C93.8346354,8.88964844 94.1477865,8.89811198 94.4101562,8.91503906 C94.672526,8.93196615 94.9772135,8.9679362 95.3242188,9.02294922 C95.671224,9.07796224 95.965332,9.16682943 96.206543,9.28955078 C96.4477539,9.41227214 96.6783854,9.57096354 96.8984375,9.765625 C97.1184896,9.96028646 97.2856445,10.2184245 97.3999023,10.5400391 C97.5141602,10.8616536 97.5712891,11.2340495 97.5712891,11.6572266 C97.5712891,12.1735026 97.4866536,12.6199544 97.3173828,12.996582 C97.148112,13.3732096 96.9301758,13.6715495 96.6635742,13.8916016 C96.3969727,14.1116536 96.0605469,14.2851562 95.6542969,14.4121094 C95.2480469,14.5390625 94.8502604,14.6236979 94.4609375,14.6660156 C94.0716146,14.7083333 93.6272786,14.7294922 93.1279297,14.7294922 L89.4335938,14.7294922 Z M88.1386719,24.9873047 C88.4856771,24.9873047 88.7882487,24.8815104 89.0463867,24.6699219 C89.3045247,24.4583333 89.4335938,24.1578776 89.4335938,23.7685547 L89.4335938,16.6972656 L93.6738281,16.6972656 C94.3509115,16.6972656 94.8883464,16.745931 95.2861328,16.8432617 C95.6839193,16.9405924 96.0161133,17.133138 96.2827148,17.4208984 C96.5493164,17.7086589 96.741862,18.0683594 96.8603516,18.5 C96.9788411,18.9316406 97.0804036,19.5325521 97.1650391,20.3027344 L97.5585938,23.8447266 C97.5924479,24.2086589 97.7469076,24.4900716 98.0219727,24.6889648 C98.2970378,24.8878581 98.6038411,24.9873047 98.9423828,24.9873047 C99.2724609,24.9873047 99.5559896,24.8815104 99.7929688,24.6699219 C100.029948,24.4583333 100.148438,24.1832682 100.148438,23.8447266 C100.148438,23.7685547 100.144206,23.7093099 100.135742,23.6669922 L99.6787109,19.6806641 C99.4163411,17.3955078 98.2906901,16.0371094 96.3017578,15.6054688 L96.3017578,15.5546875 C97.5712891,15.3177083 98.5276693,14.8310547 99.1708984,14.0947266 C99.8141276,13.3583984 100.135742,12.4654948 100.135742,11.4160156 C100.135742,10.688151 100.010905,10.0470378 99.7612305,9.49267578 C99.511556,8.9383138 99.1878255,8.49820964 98.7900391,8.17236328 C98.3922526,7.84651693 97.9013672,7.58203125 97.3173828,7.37890625 C96.7333984,7.17578125 96.1515299,7.0382487 95.5717773,6.96630859 C94.9920247,6.89436849 94.3551432,6.85839844 93.6611328,6.85839844 L88.1513672,6.85839844 C87.7958984,6.85839844 87.4933268,6.97900391 87.2436523,7.22021484 C86.9939779,7.46142578 86.8691406,7.75553385 86.8691406,8.10253906 L86.8691406,23.7685547 C86.8691406,24.1578776 86.9939779,24.4583333 87.2436523,24.6699219 C87.4933268,24.8815104 87.7916667,24.9873047 88.1386719,24.9873047 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M104.442383,17.7382812 C104.484701,17.2135417 104.596842,16.7163086 104.778809,16.246582 C104.960775,15.7768555 105.206217,15.3494466 105.515137,14.9643555 C105.824056,14.5792643 106.219727,14.2724609 106.702148,14.0439453 C107.18457,13.8154297 107.722005,13.7011719 108.314453,13.7011719 C109.457031,13.7011719 110.324544,14.0777995 110.916992,14.8310547 C111.50944,15.5843099 111.84375,16.5533854 111.919922,17.7382812 L104.442383,17.7382812 Z M108.352539,25.2285156 C110.248372,25.2285156 111.864909,24.6276042 113.202148,23.4257812 C113.439128,23.2141927 113.557617,22.9729818 113.557617,22.7021484 C113.557617,22.4905599 113.487793,22.3064779 113.348145,22.1499023 C113.208496,21.9933268 113.037109,21.9150391 112.833984,21.9150391 C112.664714,21.9150391 112.503906,21.9742839 112.351562,22.0927734 C111.742188,22.5582682 111.137044,22.9264323 110.536133,23.1972656 C109.935221,23.468099 109.258138,23.6035156 108.504883,23.6035156 C107.311523,23.5865885 106.333984,23.2036133 105.572266,22.4545898 C104.810547,21.7055664 104.421224,20.6031901 104.404297,19.1474609 L113.113281,19.1474609 C113.384115,19.1474609 113.58724,19.0670573 113.722656,18.90625 C113.858073,18.7454427 113.925781,18.5423177 113.925781,18.296875 C113.900391,17.4420573 113.777669,16.6570638 113.557617,15.9418945 C113.337565,15.2267253 113.007487,14.5792643 112.567383,13.9995117 C112.127279,13.4197591 111.534831,12.9648438 110.790039,12.6347656 C110.045247,12.3046875 109.177734,12.1396484 108.1875,12.1396484 C106.934896,12.1396484 105.838867,12.4443359 104.899414,13.0537109 C103.959961,13.6630859 103.257487,14.4544271 102.791992,15.4277344 C102.326497,16.4010417 102.09375,17.4759115 102.09375,18.6523438 C102.09375,19.9895833 102.370931,21.159668 102.925293,22.1625977 C103.479655,23.1655273 104.226562,23.9251302 105.166016,24.4414062 C106.105469,24.9576823 107.167643,25.2200521 108.352539,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M117.242188,25 C117.555339,25 117.828288,24.906901 118.061035,24.7207031 C118.293783,24.5345052 118.410156,24.2721354 118.410156,23.9335938 L118.410156,17.3955078 C118.410156,16.4052734 118.725423,15.6139323 119.355957,15.0214844 C119.986491,14.4290365 120.873047,14.1328125 122.015625,14.1328125 C122.286458,14.1328125 122.493815,14.0418294 122.637695,13.8598633 C122.781576,13.6778971 122.853516,13.4599609 122.853516,13.2060547 C122.853516,12.9352214 122.775228,12.6982422 122.618652,12.4951172 C122.462077,12.2919922 122.248372,12.1904297 121.977539,12.1904297 C121.080404,12.1904297 120.312337,12.4549154 119.67334,12.9838867 C119.034342,13.5128581 118.609049,14.1708984 118.397461,14.9580078 L118.410156,13.4345703 C118.410156,13.1129557 118.298014,12.8611654 118.07373,12.6791992 C117.849447,12.4972331 117.580729,12.40625 117.267578,12.40625 C116.954427,12.40625 116.683594,12.4972331 116.455078,12.6791992 C116.226562,12.8611654 116.112305,13.1214193 116.112305,13.4599609 L116.112305,23.9335938 C116.112305,24.2721354 116.222331,24.5345052 116.442383,24.7207031 C116.662435,24.906901 116.929036,25 117.242188,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M127.985352,23.7050781 C127.198242,23.7050781 126.576172,23.5252279 126.119141,23.1655273 C125.662109,22.8058268 125.433594,22.2408854 125.433594,21.4707031 C125.433594,21.047526 125.494954,20.7005208 125.617676,20.4296875 C125.740397,20.1588542 125.928711,19.9282227 126.182617,19.737793 C126.436523,19.5473633 126.817383,19.4034831 127.325195,19.3061523 C127.833008,19.2088216 128.416992,19.1411133 129.077148,19.1030273 C129.737305,19.0649414 130.592122,19.0458984 131.641602,19.0458984 L131.641602,19.4267578 C131.641602,20.6708984 131.275553,21.694987 130.543457,22.4990234 C129.811361,23.3030599 128.958659,23.7050781 127.985352,23.7050781 Z M127.744141,25.2285156 C129.563802,25.2285156 130.867188,24.4033203 131.654297,22.7529297 L131.654297,23.9716797 C131.654297,24.3017578 131.762207,24.5577799 131.978027,24.7397461 C132.193848,24.9217122 132.454102,25.0126953 132.758789,25.0126953 C133.063477,25.0126953 133.332194,24.9174805 133.564941,24.7270508 C133.797689,24.5366211 133.914062,24.2763672 133.914062,23.9462891 L133.914062,16.5449219 C133.914062,15.046875 133.476074,13.938151 132.600098,13.21875 C131.724121,12.499349 130.511719,12.1396484 128.962891,12.1396484 C127.151693,12.1396484 125.590169,12.5078125 124.27832,13.2441406 C124.041341,13.3795573 123.922852,13.5742188 123.922852,13.828125 C123.922852,14.0481771 124.003255,14.2491862 124.164062,14.4311523 C124.32487,14.6131185 124.515299,14.7041016 124.735352,14.7041016 C124.853841,14.7041016 124.955404,14.6829427 125.040039,14.640625 C125.47168,14.4459635 125.835612,14.2936198 126.131836,14.1835938 C126.42806,14.0735677 126.83431,13.9635417 127.350586,13.8535156 C127.866862,13.7434896 128.383138,13.6884766 128.899414,13.6884766 C129.762695,13.6884766 130.435547,13.9042969 130.917969,14.3359375 C131.400391,14.7675781 131.641602,15.4361979 131.641602,16.3417969 L131.641602,17.7255859 C130.702148,17.7255859 129.900228,17.7382812 129.23584,17.7636719 C128.571452,17.7890625 127.926107,17.8334961 127.299805,17.8969727 C126.673503,17.9604492 126.157227,18.0535482 125.750977,18.1762695 C125.344727,18.2989909 124.972331,18.4555664 124.633789,18.6459961 C124.295247,18.8364258 124.032878,19.0691732 123.84668,19.3442383 C123.660482,19.6193034 123.516602,19.9388021 123.415039,20.3027344 C123.313477,20.6666667 123.262695,21.0898438 123.262695,21.5722656 C123.262695,22.7402344 123.677409,23.6416016 124.506836,24.2763672 C125.336263,24.9111328 126.415365,25.2285156 127.744141,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M137.763672,25 C138.076823,25 138.349772,24.9026693 138.58252,24.7080078 C138.815267,24.5133464 138.931641,24.2467448 138.931641,23.9082031 L138.931641,17.5732422 C138.990885,16.3798828 139.354818,15.4361979 140.023438,14.7421875 C140.692057,14.0481771 141.513021,13.7011719 142.486328,13.7011719 C143.400391,13.7011719 144.109212,13.9847005 144.612793,14.5517578 C145.116374,15.1188151 145.368164,15.9440104 145.368164,17.0273438 L145.368164,23.9082031 C145.368164,24.2552083 145.480306,24.5239258 145.70459,24.7143555 C145.928874,24.9047852 146.197591,25 146.510742,25 C146.823893,25 147.092611,24.9047852 147.316895,24.7143555 C147.541178,24.5239258 147.65332,24.2552083 147.65332,23.9082031 L147.65332,17.0527344 C147.65332,15.4023438 147.238607,14.1708984 146.40918,13.3583984 C145.579753,12.5458984 144.466797,12.1396484 143.070312,12.1396484 C142.097005,12.1396484 141.244303,12.3470052 140.512207,12.7617188 C139.780111,13.1764323 139.253255,13.7688802 138.931641,14.5390625 L138.931641,13.4345703 C138.931641,13.1044922 138.819499,12.8484701 138.595215,12.6665039 C138.370931,12.4845378 138.102214,12.3935547 137.789062,12.3935547 C137.475911,12.3935547 137.205078,12.4887695 136.976562,12.6791992 C136.748047,12.8696289 136.633789,13.1341146 136.633789,13.4726562 L136.633789,23.9082031 C136.633789,24.2552083 136.743815,24.5239258 136.963867,24.7143555 C137.183919,24.9047852 137.450521,25 137.763672,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M151.50293,25 C151.816081,25 152.08903,24.8984375 152.321777,24.6953125 C152.554525,24.4921875 152.670898,24.2086589 152.670898,23.8447266 L152.670898,18.3095703 L158.637695,24.6572266 C158.849284,24.8942057 159.10319,25.0126953 159.399414,25.0126953 C159.678711,25.0126953 159.92627,24.906901 160.14209,24.6953125 C160.35791,24.483724 160.46582,24.242513 160.46582,23.9716797 C160.46582,23.726237 160.372721,23.5061849 160.186523,23.3115234 L155.146484,18.0048828 L159.754883,13.8789062 C159.958008,13.6842448 160.05957,13.4684245 160.05957,13.2314453 C160.05957,12.9690755 159.955892,12.7320964 159.748535,12.5205078 C159.541178,12.3089193 159.302083,12.203125 159.03125,12.203125 C158.819661,12.203125 158.620768,12.2835286 158.43457,12.4443359 L152.670898,17.7255859 L152.670898,8.01367188 C152.670898,7.65820312 152.558757,7.37890625 152.334473,7.17578125 C152.110189,6.97265625 151.845703,6.87109375 151.541016,6.87109375 C151.227865,6.87109375 150.954915,6.97265625 150.722168,7.17578125 C150.489421,7.37890625 150.373047,7.65820312 150.373047,8.01367188 L150.373047,23.8447266 C150.373047,24.2171224 150.483073,24.5027669 150.703125,24.7016602 C150.923177,24.9005534 151.189779,25 151.50293,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M181.431641,24.2382812 C181.643229,24.796875 182.032552,25.0761719 182.599609,25.0761719 C182.929688,25.0761719 183.225911,24.9703776 183.488281,24.7587891 C183.750651,24.5472005 183.881836,24.280599 183.881836,23.9589844 C183.881836,23.8235677 183.847982,23.6669922 183.780273,23.4892578 L177.75,8.52148438 C177.46224,7.80208333 177.151204,7.32600911 176.816895,7.09326172 C176.482585,6.86051432 176.065755,6.74414062 175.566406,6.74414062 C175.067057,6.74414062 174.650228,6.86051432 174.315918,7.09326172 C173.981608,7.32600911 173.670573,7.80208333 173.382812,8.52148438 L167.352539,23.4892578 C167.284831,23.6669922 167.250977,23.8235677 167.250977,23.9589844 C167.250977,24.280599 167.380046,24.5472005 167.638184,24.7587891 C167.896322,24.9703776 168.19043,25.0761719 168.520508,25.0761719 C169.096029,25.0761719 169.489583,24.796875 169.701172,24.2382812 L171.199219,20.3789062 L179.933594,20.3789062 L181.431641,24.2382812 Z M179.209961,18.5380859 L171.922852,18.5380859 L175.566406,9.15625 L179.209961,18.5380859 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M188.442383,15.2373047 L188.442383,8.88964844 L192.441406,8.88964844 C193.837891,8.88964844 194.849284,9.1266276 195.475586,9.60058594 C196.101888,10.0745443 196.415039,10.8828125 196.415039,12.0253906 C196.415039,13.1933594 196.082845,14.0206706 195.418457,14.5073242 C194.754069,14.9939779 193.715169,15.2373047 192.301758,15.2373047 L188.442383,15.2373047 Z M187.160156,25 C187.515625,25 187.818197,24.8942057 188.067871,24.6826172 C188.317546,24.4710286 188.442383,24.1705729 188.442383,23.78125 L188.442383,17.1923828 L192.314453,17.1923828 C193.321615,17.1923828 194.214518,17.1035156 194.993164,16.9257812 C195.77181,16.7480469 196.463704,16.4666341 197.068848,16.081543 C197.673991,15.6964518 198.13737,15.1632487 198.458984,14.4819336 C198.780599,13.8006185 198.941406,12.9817708 198.941406,12.0253906 C198.941406,10.1888021 198.395508,8.86848958 197.303711,8.06445312 C196.211914,7.26041667 194.625,6.85839844 192.542969,6.85839844 L187.172852,6.85839844 C186.808919,6.85839844 186.502116,6.98111979 186.252441,7.2265625 C186.002767,7.47200521 185.87793,7.77246094 185.87793,8.12792969 L185.87793,23.78125 C185.87793,24.1621094 186.004883,24.4604492 186.258789,24.6762695 C186.512695,24.8920898 186.813151,25 187.160156,25 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M202.130859,24.9873047 C202.486328,24.9873047 202.791016,24.8815104 203.044922,24.6699219 C203.298828,24.4583333 203.425781,24.1578776 203.425781,23.7685547 L203.425781,8.07714844 C203.425781,7.68782552 203.300944,7.38525391 203.05127,7.16943359 C202.801595,6.95361328 202.503255,6.84570312 202.15625,6.84570312 C201.809245,6.84570312 201.506673,6.95572917 201.248535,7.17578125 C200.990397,7.39583333 200.861328,7.69628906 200.861328,8.07714844 L200.861328,23.7685547 C200.861328,24.1494141 200.986165,24.4477539 201.23584,24.6635742 C201.485514,24.8793945 201.783854,24.9873047 202.130859,24.9873047 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M216.8125,27.8564453 C217.057943,27.8564453 217.267415,27.7760417 217.440918,27.6152344 C217.614421,27.4544271 217.701172,27.2470703 217.701172,26.9931641 C217.701172,26.8746745 217.667318,26.7223307 217.599609,26.5361328 C216.355469,23.0999349 215.733398,19.921875 215.733398,17.0019531 C215.733398,14.0481771 216.363932,10.8404948 217.625,7.37890625 C217.684245,7.23502604 217.713867,7.08691406 217.713867,6.93457031 C217.713867,6.68066406 217.625,6.46907552 217.447266,6.29980469 C217.269531,6.13053385 217.057943,6.04589844 216.8125,6.04589844 C216.380859,6.04589844 216.059245,6.3125 215.847656,6.84570312 C215.04362,8.79231771 214.419434,10.5802409 213.975098,12.2094727 C213.530762,13.8387044 213.308594,15.4404297 213.308594,17.0146484 C213.308594,18.6142578 213.534993,20.2286784 213.987793,21.8579102 C214.440592,23.4871419 215.077474,25.2708333 215.898438,27.2089844 C216.093099,27.640625 216.397786,27.8564453 216.8125,27.8564453 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M219.709961,27.1074219 C220.141602,27.1074219 220.437826,26.8873698 220.598633,26.4472656 L226.743164,8.02636719 C226.785482,7.87402344 226.806641,7.75130208 226.806641,7.65820312 C226.806641,7.39583333 226.711426,7.18424479 226.520996,7.0234375 C226.330566,6.86263021 226.108398,6.78222656 225.854492,6.78222656 C225.380534,6.78222656 225.071615,7.00227865 224.927734,7.44238281 L218.833984,25.8759766 C218.791667,26.0283203 218.770508,26.1468099 218.770508,26.2314453 C218.770508,26.4938151 218.863607,26.7054036 219.049805,26.8662109 C219.236003,27.0270182 219.456055,27.1074219 219.709961,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M229.615234,25 C229.928385,25 230.201335,24.906901 230.434082,24.7207031 C230.666829,24.5345052 230.783203,24.2721354 230.783203,23.9335938 L230.783203,17.3955078 C230.783203,16.4052734 231.09847,15.6139323 231.729004,15.0214844 C232.359538,14.4290365 233.246094,14.1328125 234.388672,14.1328125 C234.659505,14.1328125 234.866862,14.0418294 235.010742,13.8598633 C235.154622,13.6778971 235.226562,13.4599609 235.226562,13.2060547 C235.226562,12.9352214 235.148275,12.6982422 234.991699,12.4951172 C234.835124,12.2919922 234.621419,12.1904297 234.350586,12.1904297 C233.453451,12.1904297 232.685384,12.4549154 232.046387,12.9838867 C231.407389,13.5128581 230.982096,14.1708984 230.770508,14.9580078 L230.783203,13.4345703 C230.783203,13.1129557 230.671061,12.8611654 230.446777,12.6791992 C230.222493,12.4972331 229.953776,12.40625 229.640625,12.40625 C229.327474,12.40625 229.056641,12.4972331 228.828125,12.6791992 C228.599609,12.8611654 228.485352,13.1214193 228.485352,13.4599609 L228.485352,23.9335938 C228.485352,24.2721354 228.595378,24.5345052 228.81543,24.7207031 C229.035482,24.906901 229.302083,25 229.615234,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M238.263672,17.7382812 C238.30599,17.2135417 238.418132,16.7163086 238.600098,16.246582 C238.782064,15.7768555 239.027507,15.3494466 239.336426,14.9643555 C239.645345,14.5792643 240.041016,14.2724609 240.523438,14.0439453 C241.005859,13.8154297 241.543294,13.7011719 242.135742,13.7011719 C243.27832,13.7011719 244.145833,14.0777995 244.738281,14.8310547 C245.330729,15.5843099 245.665039,16.5533854 245.741211,17.7382812 L238.263672,17.7382812 Z M242.173828,25.2285156 C244.069661,25.2285156 245.686198,24.6276042 247.023438,23.4257812 C247.260417,23.2141927 247.378906,22.9729818 247.378906,22.7021484 C247.378906,22.4905599 247.309082,22.3064779 247.169434,22.1499023 C247.029785,21.9933268 246.858398,21.9150391 246.655273,21.9150391 C246.486003,21.9150391 246.325195,21.9742839 246.172852,22.0927734 C245.563477,22.5582682 244.958333,22.9264323 244.357422,23.1972656 C243.75651,23.468099 243.079427,23.6035156 242.326172,23.6035156 C241.132812,23.5865885 240.155273,23.2036133 239.393555,22.4545898 C238.631836,21.7055664 238.242513,20.6031901 238.225586,19.1474609 L246.93457,19.1474609 C247.205404,19.1474609 247.408529,19.0670573 247.543945,18.90625 C247.679362,18.7454427 247.74707,18.5423177 247.74707,18.296875 C247.72168,17.4420573 247.598958,16.6570638 247.378906,15.9418945 C247.158854,15.2267253 246.828776,14.5792643 246.388672,13.9995117 C245.948568,13.4197591 245.35612,12.9648438 244.611328,12.6347656 C243.866536,12.3046875 242.999023,12.1396484 242.008789,12.1396484 C240.756185,12.1396484 239.660156,12.4443359 238.720703,13.0537109 C237.78125,13.6630859 237.078776,14.4544271 236.613281,15.4277344 C236.147786,16.4010417 235.915039,17.4759115 235.915039,18.6523438 C235.915039,19.9895833 236.19222,21.159668 236.746582,22.1625977 C237.300944,23.1655273 238.047852,23.9251302 238.987305,24.4414062 C239.926758,24.9576823 240.988932,25.2200521 242.173828,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M251.063477,25 C251.376628,25 251.649577,24.906901 251.882324,24.7207031 C252.115072,24.5345052 252.231445,24.2721354 252.231445,23.9335938 L252.231445,17.3955078 C252.231445,16.4052734 252.546712,15.6139323 253.177246,15.0214844 C253.80778,14.4290365 254.694336,14.1328125 255.836914,14.1328125 C256.107747,14.1328125 256.315104,14.0418294 256.458984,13.8598633 C256.602865,13.6778971 256.674805,13.4599609 256.674805,13.2060547 C256.674805,12.9352214 256.596517,12.6982422 256.439941,12.4951172 C256.283366,12.2919922 256.069661,12.1904297 255.798828,12.1904297 C254.901693,12.1904297 254.133626,12.4549154 253.494629,12.9838867 C252.855632,13.5128581 252.430339,14.1708984 252.21875,14.9580078 L252.231445,13.4345703 C252.231445,13.1129557 252.119303,12.8611654 251.89502,12.6791992 C251.670736,12.4972331 251.402018,12.40625 251.088867,12.40625 C250.775716,12.40625 250.504883,12.4972331 250.276367,12.6791992 C250.047852,12.8611654 249.933594,13.1214193 249.933594,13.4599609 L249.933594,23.9335938 C249.933594,24.2721354 250.04362,24.5345052 250.263672,24.7207031 C250.483724,24.906901 250.750326,25 251.063477,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M261.806641,23.7050781 C261.019531,23.7050781 260.397461,23.5252279 259.94043,23.1655273 C259.483398,22.8058268 259.254883,22.2408854 259.254883,21.4707031 C259.254883,21.047526 259.316243,20.7005208 259.438965,20.4296875 C259.561686,20.1588542 259.75,19.9282227 260.003906,19.737793 C260.257812,19.5473633 260.638672,19.4034831 261.146484,19.3061523 C261.654297,19.2088216 262.238281,19.1411133 262.898438,19.1030273 C263.558594,19.0649414 264.413411,19.0458984 265.462891,19.0458984 L265.462891,19.4267578 C265.462891,20.6708984 265.096842,21.694987 264.364746,22.4990234 C263.63265,23.3030599 262.779948,23.7050781 261.806641,23.7050781 Z M261.56543,25.2285156 C263.385091,25.2285156 264.688477,24.4033203 265.475586,22.7529297 L265.475586,23.9716797 C265.475586,24.3017578 265.583496,24.5577799 265.799316,24.7397461 C266.015137,24.9217122 266.275391,25.0126953 266.580078,25.0126953 C266.884766,25.0126953 267.153483,24.9174805 267.38623,24.7270508 C267.618978,24.5366211 267.735352,24.2763672 267.735352,23.9462891 L267.735352,16.5449219 C267.735352,15.046875 267.297363,13.938151 266.421387,13.21875 C265.54541,12.499349 264.333008,12.1396484 262.78418,12.1396484 C260.972982,12.1396484 259.411458,12.5078125 258.099609,13.2441406 C257.86263,13.3795573 257.744141,13.5742188 257.744141,13.828125 C257.744141,14.0481771 257.824544,14.2491862 257.985352,14.4311523 C258.146159,14.6131185 258.336589,14.7041016 258.556641,14.7041016 C258.67513,14.7041016 258.776693,14.6829427 258.861328,14.640625 C259.292969,14.4459635 259.656901,14.2936198 259.953125,14.1835938 C260.249349,14.0735677 260.655599,13.9635417 261.171875,13.8535156 C261.688151,13.7434896 262.204427,13.6884766 262.720703,13.6884766 C263.583984,13.6884766 264.256836,13.9042969 264.739258,14.3359375 C265.22168,14.7675781 265.462891,15.4361979 265.462891,16.3417969 L265.462891,17.7255859 C264.523438,17.7255859 263.721517,17.7382812 263.057129,17.7636719 C262.392741,17.7890625 261.747396,17.8334961 261.121094,17.8969727 C260.494792,17.9604492 259.978516,18.0535482 259.572266,18.1762695 C259.166016,18.2989909 258.79362,18.4555664 258.455078,18.6459961 C258.116536,18.8364258 257.854167,19.0691732 257.667969,19.3442383 C257.481771,19.6193034 257.337891,19.9388021 257.236328,20.3027344 C257.134766,20.6666667 257.083984,21.0898438 257.083984,21.5722656 C257.083984,22.7402344 257.498698,23.6416016 258.328125,24.2763672 C259.157552,24.9111328 260.236654,25.2285156 261.56543,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M271.584961,25 C271.898112,25 272.171061,24.9026693 272.403809,24.7080078 C272.636556,24.5133464 272.75293,24.2467448 272.75293,23.9082031 L272.75293,17.5732422 C272.812174,16.3798828 273.176107,15.4361979 273.844727,14.7421875 C274.513346,14.0481771 275.33431,13.7011719 276.307617,13.7011719 C277.22168,13.7011719 277.930501,13.9847005 278.434082,14.5517578 C278.937663,15.1188151 279.189453,15.9440104 279.189453,17.0273438 L279.189453,23.9082031 C279.189453,24.2552083 279.301595,24.5239258 279.525879,24.7143555 C279.750163,24.9047852 280.01888,25 280.332031,25 C280.645182,25 280.9139,24.9047852 281.138184,24.7143555 C281.362467,24.5239258 281.474609,24.2552083 281.474609,23.9082031 L281.474609,17.0527344 C281.474609,15.4023438 281.059896,14.1708984 280.230469,13.3583984 C279.401042,12.5458984 278.288086,12.1396484 276.891602,12.1396484 C275.918294,12.1396484 275.065592,12.3470052 274.333496,12.7617188 C273.6014,13.1764323 273.074544,13.7688802 272.75293,14.5390625 L272.75293,13.4345703 C272.75293,13.1044922 272.640788,12.8484701 272.416504,12.6665039 C272.19222,12.4845378 271.923503,12.3935547 271.610352,12.3935547 C271.297201,12.3935547 271.026367,12.4887695 270.797852,12.6791992 C270.569336,12.8696289 270.455078,13.1341146 270.455078,13.4726562 L270.455078,23.9082031 C270.455078,24.2552083 270.565104,24.5239258 270.785156,24.7143555 C271.005208,24.9047852 271.27181,25 271.584961,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M285.324219,25 C285.63737,25 285.910319,24.8984375 286.143066,24.6953125 C286.375814,24.4921875 286.492188,24.2086589 286.492188,23.8447266 L286.492188,18.3095703 L292.458984,24.6572266 C292.670573,24.8942057 292.924479,25.0126953 293.220703,25.0126953 C293.5,25.0126953 293.747559,24.906901 293.963379,24.6953125 C294.179199,24.483724 294.287109,24.242513 294.287109,23.9716797 C294.287109,23.726237 294.19401,23.5061849 294.007812,23.3115234 L288.967773,18.0048828 L293.576172,13.8789062 C293.779297,13.6842448 293.880859,13.4684245 293.880859,13.2314453 C293.880859,12.9690755 293.777181,12.7320964 293.569824,12.5205078 C293.362467,12.3089193 293.123372,12.203125 292.852539,12.203125 C292.640951,12.203125 292.442057,12.2835286 292.255859,12.4443359 L286.492188,17.7255859 L286.492188,8.01367188 C286.492188,7.65820312 286.380046,7.37890625 286.155762,7.17578125 C285.931478,6.97265625 285.666992,6.87109375 285.362305,6.87109375 C285.049154,6.87109375 284.776204,6.97265625 284.543457,7.17578125 C284.31071,7.37890625 284.194336,7.65820312 284.194336,8.01367188 L284.194336,23.8447266 C284.194336,24.2171224 284.304362,24.5027669 284.524414,24.7016602 C284.744466,24.9005534 285.011068,25 285.324219,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M295.902344,29.2529297 L296.613281,29.2529297 C296.934896,29.2529297 297.15918,29.0709635 297.286133,28.7070312 L299.368164,22.6767578 C299.393555,22.5751953 299.40625,22.4990234 299.40625,22.4482422 C299.40625,22.1520182 299.249674,21.9912109 298.936523,21.9658203 L297.59082,21.9658203 C297.049154,21.9827474 296.740234,22.2197266 296.664062,22.6767578 L295.597656,28.7070312 C295.589193,28.7324219 295.584961,28.7832031 295.584961,28.859375 C295.584961,29.1132812 295.690755,29.2444661 295.902344,29.2529297 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M308.514648,27.1074219 C308.946289,27.1074219 309.242513,26.8873698 309.40332,26.4472656 L315.547852,8.02636719 C315.590169,7.87402344 315.611328,7.75130208 315.611328,7.65820312 C315.611328,7.39583333 315.516113,7.18424479 315.325684,7.0234375 C315.135254,6.86263021 314.913086,6.78222656 314.65918,6.78222656 C314.185221,6.78222656 313.876302,7.00227865 313.732422,7.44238281 L307.638672,25.8759766 C307.596354,26.0283203 307.575195,26.1468099 307.575195,26.2314453 C307.575195,26.4938151 307.668294,26.7054036 307.854492,26.8662109 C308.04069,27.0270182 308.260742,27.1074219 308.514648,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M321.987305,25.0126953 C322.469727,25.0126953 322.84847,24.8836263 323.123535,24.6254883 C323.3986,24.3673503 323.667318,23.9251302 323.929688,23.2988281 L327.890625,13.8154297 C327.941406,13.7054036 327.966797,13.5869141 327.966797,13.4599609 C327.966797,13.1722005 327.846191,12.9204102 327.60498,12.7045898 C327.36377,12.4887695 327.095052,12.3808594 326.798828,12.3808594 C326.358724,12.3808594 326.045573,12.6220703 325.859375,13.1044922 L321.987305,22.6640625 L318.102539,13.1044922 C317.916341,12.6220703 317.60319,12.3808594 317.163086,12.3808594 C316.866862,12.3808594 316.598145,12.4887695 316.356934,12.7045898 C316.115723,12.9204102 315.995117,13.1722005 315.995117,13.4599609 C315.995117,13.5869141 316.020508,13.7054036 316.071289,13.8154297 L320.032227,23.2988281 C320.294596,23.9251302 320.563314,24.3673503 320.838379,24.6254883 C321.113444,24.8836263 321.496419,25.0126953 321.987305,25.0126953 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M335.878906,25 C336.225911,25 336.526367,24.8942057 336.780273,24.6826172 C337.03418,24.4710286 337.161133,24.1748047 337.161133,23.7939453 L337.148438,8.10253906 C337.148438,7.7047526 337.032064,7.39794922 336.799316,7.18212891 C336.566569,6.96630859 336.280924,6.85839844 335.942383,6.85839844 C335.663086,6.85839844 335.430339,6.921875 335.244141,7.04882812 L330.889648,9.765625 C330.601888,9.96028646 330.458008,10.2057292 330.458008,10.5019531 C330.458008,10.7558594 330.548991,10.9759115 330.730957,11.1621094 C330.912923,11.3483073 331.130859,11.4414062 331.384766,11.4414062 C331.554036,11.4414062 331.719076,11.3948568 331.879883,11.3017578 L334.62207,9.53710938 L334.62207,23.7939453 C334.62207,24.1748047 334.746908,24.4710286 334.996582,24.6826172 C335.246257,24.8942057 335.540365,25 335.878906,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M343.105469,27.1074219 C343.537109,27.1074219 343.833333,26.8873698 343.994141,26.4472656 L350.138672,8.02636719 C350.18099,7.87402344 350.202148,7.75130208 350.202148,7.65820312 C350.202148,7.39583333 350.106934,7.18424479 349.916504,7.0234375 C349.726074,6.86263021 349.503906,6.78222656 349.25,6.78222656 C348.776042,6.78222656 348.467122,7.00227865 348.323242,7.44238281 L342.229492,25.8759766 C342.187174,26.0283203 342.166016,26.1468099 342.166016,26.2314453 C342.166016,26.4938151 342.259115,26.7054036 342.445312,26.8662109 C342.63151,27.0270182 342.851562,27.1074219 343.105469,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M353.010742,25 C353.323893,25 353.596842,24.906901 353.82959,24.7207031 C354.062337,24.5345052 354.178711,24.2721354 354.178711,23.9335938 L354.178711,17.3955078 C354.178711,16.4052734 354.493978,15.6139323 355.124512,15.0214844 C355.755046,14.4290365 356.641602,14.1328125 357.78418,14.1328125 C358.055013,14.1328125 358.26237,14.0418294 358.40625,13.8598633 C358.55013,13.6778971 358.62207,13.4599609 358.62207,13.2060547 C358.62207,12.9352214 358.543783,12.6982422 358.387207,12.4951172 C358.230632,12.2919922 358.016927,12.1904297 357.746094,12.1904297 C356.848958,12.1904297 356.080892,12.4549154 355.441895,12.9838867 C354.802897,13.5128581 354.377604,14.1708984 354.166016,14.9580078 L354.178711,13.4345703 C354.178711,13.1129557 354.066569,12.8611654 353.842285,12.6791992 C353.618001,12.4972331 353.349284,12.40625 353.036133,12.40625 C352.722982,12.40625 352.452148,12.4972331 352.223633,12.6791992 C351.995117,12.8611654 351.880859,13.1214193 351.880859,13.4599609 L351.880859,23.9335938 C351.880859,24.2721354 351.990885,24.5345052 352.210938,24.7207031 C352.43099,24.906901 352.697591,25 353.010742,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M361.65918,17.7382812 C361.701497,17.2135417 361.813639,16.7163086 361.995605,16.246582 C362.177572,15.7768555 362.423014,15.3494466 362.731934,14.9643555 C363.040853,14.5792643 363.436523,14.2724609 363.918945,14.0439453 C364.401367,13.8154297 364.938802,13.7011719 365.53125,13.7011719 C366.673828,13.7011719 367.541341,14.0777995 368.133789,14.8310547 C368.726237,15.5843099 369.060547,16.5533854 369.136719,17.7382812 L361.65918,17.7382812 Z M365.569336,25.2285156 C367.465169,25.2285156 369.081706,24.6276042 370.418945,23.4257812 C370.655924,23.2141927 370.774414,22.9729818 370.774414,22.7021484 C370.774414,22.4905599 370.70459,22.3064779 370.564941,22.1499023 C370.425293,21.9933268 370.253906,21.9150391 370.050781,21.9150391 C369.88151,21.9150391 369.720703,21.9742839 369.568359,22.0927734 C368.958984,22.5582682 368.353841,22.9264323 367.75293,23.1972656 C367.152018,23.468099 366.474935,23.6035156 365.72168,23.6035156 C364.52832,23.5865885 363.550781,23.2036133 362.789062,22.4545898 C362.027344,21.7055664 361.638021,20.6031901 361.621094,19.1474609 L370.330078,19.1474609 C370.600911,19.1474609 370.804036,19.0670573 370.939453,18.90625 C371.07487,18.7454427 371.142578,18.5423177 371.142578,18.296875 C371.117188,17.4420573 370.994466,16.6570638 370.774414,15.9418945 C370.554362,15.2267253 370.224284,14.5792643 369.78418,13.9995117 C369.344076,13.4197591 368.751628,12.9648438 368.006836,12.6347656 C367.262044,12.3046875 366.394531,12.1396484 365.404297,12.1396484 C364.151693,12.1396484 363.055664,12.4443359 362.116211,13.0537109 C361.176758,13.6630859 360.474284,14.4544271 360.008789,15.4277344 C359.543294,16.4010417 359.310547,17.4759115 359.310547,18.6523438 C359.310547,19.9895833 359.587728,21.159668 360.14209,22.1625977 C360.696452,23.1655273 361.443359,23.9251302 362.382812,24.4414062 C363.322266,24.9576823 364.38444,25.2200521 365.569336,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M374.458984,25 C374.772135,25 375.045085,24.906901 375.277832,24.7207031 C375.510579,24.5345052 375.626953,24.2721354 375.626953,23.9335938 L375.626953,17.3955078 C375.626953,16.4052734 375.94222,15.6139323 376.572754,15.0214844 C377.203288,14.4290365 378.089844,14.1328125 379.232422,14.1328125 C379.503255,14.1328125 379.710612,14.0418294 379.854492,13.8598633 C379.998372,13.6778971 380.070312,13.4599609 380.070312,13.2060547 C380.070312,12.9352214 379.992025,12.6982422 379.835449,12.4951172 C379.678874,12.2919922 379.465169,12.1904297 379.194336,12.1904297 C378.297201,12.1904297 377.529134,12.4549154 376.890137,12.9838867 C376.251139,13.5128581 375.825846,14.1708984 375.614258,14.9580078 L375.626953,13.4345703 C375.626953,13.1129557 375.514811,12.8611654 375.290527,12.6791992 C375.066243,12.4972331 374.797526,12.40625 374.484375,12.40625 C374.171224,12.40625 373.900391,12.4972331 373.671875,12.6791992 C373.443359,12.8611654 373.329102,13.1214193 373.329102,13.4599609 L373.329102,23.9335938 C373.329102,24.2721354 373.439128,24.5345052 373.65918,24.7207031 C373.879232,24.906901 374.145833,25 374.458984,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M385.202148,23.7050781 C384.415039,23.7050781 383.792969,23.5252279 383.335938,23.1655273 C382.878906,22.8058268 382.650391,22.2408854 382.650391,21.4707031 C382.650391,21.047526 382.711751,20.7005208 382.834473,20.4296875 C382.957194,20.1588542 383.145508,19.9282227 383.399414,19.737793 C383.65332,19.5473633 384.03418,19.4034831 384.541992,19.3061523 C385.049805,19.2088216 385.633789,19.1411133 386.293945,19.1030273 C386.954102,19.0649414 387.808919,19.0458984 388.858398,19.0458984 L388.858398,19.4267578 C388.858398,20.6708984 388.49235,21.694987 387.760254,22.4990234 C387.028158,23.3030599 386.175456,23.7050781 385.202148,23.7050781 Z M384.960938,25.2285156 C386.780599,25.2285156 388.083984,24.4033203 388.871094,22.7529297 L388.871094,23.9716797 C388.871094,24.3017578 388.979004,24.5577799 389.194824,24.7397461 C389.410645,24.9217122 389.670898,25.0126953 389.975586,25.0126953 C390.280273,25.0126953 390.548991,24.9174805 390.781738,24.7270508 C391.014486,24.5366211 391.130859,24.2763672 391.130859,23.9462891 L391.130859,16.5449219 C391.130859,15.046875 390.692871,13.938151 389.816895,13.21875 C388.940918,12.499349 387.728516,12.1396484 386.179688,12.1396484 C384.36849,12.1396484 382.806966,12.5078125 381.495117,13.2441406 C381.258138,13.3795573 381.139648,13.5742188 381.139648,13.828125 C381.139648,14.0481771 381.220052,14.2491862 381.380859,14.4311523 C381.541667,14.6131185 381.732096,14.7041016 381.952148,14.7041016 C382.070638,14.7041016 382.172201,14.6829427 382.256836,14.640625 C382.688477,14.4459635 383.052409,14.2936198 383.348633,14.1835938 C383.644857,14.0735677 384.051107,13.9635417 384.567383,13.8535156 C385.083659,13.7434896 385.599935,13.6884766 386.116211,13.6884766 C386.979492,13.6884766 387.652344,13.9042969 388.134766,14.3359375 C388.617188,14.7675781 388.858398,15.4361979 388.858398,16.3417969 L388.858398,17.7255859 C387.918945,17.7255859 387.117025,17.7382812 386.452637,17.7636719 C385.788249,17.7890625 385.142904,17.8334961 384.516602,17.8969727 C383.890299,17.9604492 383.374023,18.0535482 382.967773,18.1762695 C382.561523,18.2989909 382.189128,18.4555664 381.850586,18.6459961 C381.512044,18.8364258 381.249674,19.0691732 381.063477,19.3442383 C380.877279,19.6193034 380.733398,19.9388021 380.631836,20.3027344 C380.530273,20.6666667 380.479492,21.0898438 380.479492,21.5722656 C380.479492,22.7402344 380.894206,23.6416016 381.723633,24.2763672 C382.55306,24.9111328 383.632161,25.2285156 384.960938,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M394.980469,25 C395.29362,25 395.566569,24.9026693 395.799316,24.7080078 C396.032064,24.5133464 396.148438,24.2467448 396.148438,23.9082031 L396.148438,17.5732422 C396.207682,16.3798828 396.571615,15.4361979 397.240234,14.7421875 C397.908854,14.0481771 398.729818,13.7011719 399.703125,13.7011719 C400.617188,13.7011719 401.326009,13.9847005 401.82959,14.5517578 C402.333171,15.1188151 402.584961,15.9440104 402.584961,17.0273438 L402.584961,23.9082031 C402.584961,24.2552083 402.697103,24.5239258 402.921387,24.7143555 C403.145671,24.9047852 403.414388,25 403.727539,25 C404.04069,25 404.309408,24.9047852 404.533691,24.7143555 C404.757975,24.5239258 404.870117,24.2552083 404.870117,23.9082031 L404.870117,17.0527344 C404.870117,15.4023438 404.455404,14.1708984 403.625977,13.3583984 C402.796549,12.5458984 401.683594,12.1396484 400.287109,12.1396484 C399.313802,12.1396484 398.4611,12.3470052 397.729004,12.7617188 C396.996908,13.1764323 396.470052,13.7688802 396.148438,14.5390625 L396.148438,13.4345703 C396.148438,13.1044922 396.036296,12.8484701 395.812012,12.6665039 C395.587728,12.4845378 395.31901,12.3935547 395.005859,12.3935547 C394.692708,12.3935547 394.421875,12.4887695 394.193359,12.6791992 C393.964844,12.8696289 393.850586,13.1341146 393.850586,13.4726562 L393.850586,23.9082031 C393.850586,24.2552083 393.960612,24.5239258 394.180664,24.7143555 C394.400716,24.9047852 394.667318,25 394.980469,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M408.719727,25 C409.032878,25 409.305827,24.8984375 409.538574,24.6953125 C409.771322,24.4921875 409.887695,24.2086589 409.887695,23.8447266 L409.887695,18.3095703 L415.854492,24.6572266 C416.066081,24.8942057 416.319987,25.0126953 416.616211,25.0126953 C416.895508,25.0126953 417.143066,24.906901 417.358887,24.6953125 C417.574707,24.483724 417.682617,24.242513 417.682617,23.9716797 C417.682617,23.726237 417.589518,23.5061849 417.40332,23.3115234 L412.363281,18.0048828 L416.97168,13.8789062 C417.174805,13.6842448 417.276367,13.4684245 417.276367,13.2314453 C417.276367,12.9690755 417.172689,12.7320964 416.965332,12.5205078 C416.757975,12.3089193 416.51888,12.203125 416.248047,12.203125 C416.036458,12.203125 415.837565,12.2835286 415.651367,12.4443359 L409.887695,17.7255859 L409.887695,8.01367188 C409.887695,7.65820312 409.775553,7.37890625 409.55127,7.17578125 C409.326986,6.97265625 409.0625,6.87109375 408.757812,6.87109375 C408.444661,6.87109375 408.171712,6.97265625 407.938965,7.17578125 C407.706217,7.37890625 407.589844,7.65820312 407.589844,8.01367188 L407.589844,23.8447266 C407.589844,24.2171224 407.69987,24.5027669 407.919922,24.7016602 C408.139974,24.9005534 408.406576,25 408.719727,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M419.297852,29.2529297 L420.008789,29.2529297 C420.330404,29.2529297 420.554688,29.0709635 420.681641,28.7070312 L422.763672,22.6767578 C422.789062,22.5751953 422.801758,22.4990234 422.801758,22.4482422 C422.801758,22.1520182 422.645182,21.9912109 422.332031,21.9658203 L420.986328,21.9658203 C420.444661,21.9827474 420.135742,22.2197266 420.05957,22.6767578 L418.993164,28.7070312 C418.984701,28.7324219 418.980469,28.7832031 418.980469,28.859375 C418.980469,29.1132812 419.086263,29.2444661 419.297852,29.2529297 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M431.910156,27.1074219 C432.341797,27.1074219 432.638021,26.8873698 432.798828,26.4472656 L438.943359,8.02636719 C438.985677,7.87402344 439.006836,7.75130208 439.006836,7.65820312 C439.006836,7.39583333 438.911621,7.18424479 438.721191,7.0234375 C438.530762,6.86263021 438.308594,6.78222656 438.054688,6.78222656 C437.580729,6.78222656 437.27181,7.00227865 437.12793,7.44238281 L431.03418,25.8759766 C430.991862,26.0283203 430.970703,26.1468099 430.970703,26.2314453 C430.970703,26.4938151 431.063802,26.7054036 431.25,26.8662109 C431.436198,27.0270182 431.65625,27.1074219 431.910156,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M445.382812,25.0126953 C445.865234,25.0126953 446.243978,24.8836263 446.519043,24.6254883 C446.794108,24.3673503 447.062826,23.9251302 447.325195,23.2988281 L451.286133,13.8154297 C451.336914,13.7054036 451.362305,13.5869141 451.362305,13.4599609 C451.362305,13.1722005 451.241699,12.9204102 451.000488,12.7045898 C450.759277,12.4887695 450.49056,12.3808594 450.194336,12.3808594 C449.754232,12.3808594 449.441081,12.6220703 449.254883,13.1044922 L445.382812,22.6640625 L441.498047,13.1044922 C441.311849,12.6220703 440.998698,12.3808594 440.558594,12.3808594 C440.26237,12.3808594 439.993652,12.4887695 439.752441,12.7045898 C439.51123,12.9204102 439.390625,13.1722005 439.390625,13.4599609 C439.390625,13.5869141 439.416016,13.7054036 439.466797,13.8154297 L443.427734,23.2988281 C443.690104,23.9251302 443.958822,24.3673503 444.233887,24.6254883 C444.508952,24.8836263 444.891927,25.0126953 445.382812,25.0126953 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M464.162109,11.8222656 C464.162109,10.4003906 463.635254,9.19433594 462.581543,8.20410156 C461.527832,7.21386719 460.205404,6.71875 458.614258,6.71875 C457.49707,6.71875 456.555501,6.85205078 455.789551,7.11865234 C455.0236,7.38525391 454.22168,7.77246094 453.383789,8.28027344 C453.053711,8.48339844 452.888672,8.74576823 452.888672,9.06738281 C452.888672,9.33821615 452.99235,9.58154297 453.199707,9.79736328 C453.407064,10.0131836 453.641927,10.1210938 453.904297,10.1210938 C454.03125,10.1210938 454.153971,10.0914714 454.272461,10.0322266 C454.348633,9.98990885 454.583496,9.8523763 454.977051,9.61962891 C455.370605,9.38688151 455.666829,9.2281901 455.865723,9.14355469 C456.064616,9.05891927 456.403158,8.96582031 456.881348,8.86425781 C457.359538,8.76269531 457.87793,8.71191406 458.436523,8.71191406 C459.375977,8.71191406 460.169434,9.01871745 460.816895,9.63232422 C461.464355,10.245931 461.788086,11.0520833 461.788086,12.0507812 C461.788086,12.6432292 461.627279,13.2335612 461.305664,13.8217773 C460.984049,14.4099935 460.480469,15.0553385 459.794922,15.7578125 L453.193359,22.4482422 C452.795573,22.8460286 452.59668,23.2607422 452.59668,23.6923828 C452.59668,24.0647786 452.749023,24.3758138 453.053711,24.6254883 C453.358398,24.8751628 453.760417,25 454.259766,25 L462.994141,25 C463.307292,25 463.552734,24.9047852 463.730469,24.7143555 C463.908203,24.5239258 463.99707,24.297526 463.99707,24.0351562 C463.99707,23.7727865 463.908203,23.5463867 463.730469,23.355957 C463.552734,23.1655273 463.307292,23.0703125 462.994141,23.0703125 L455.351562,23.0703125 L461.15332,17.4208984 C462.22819,16.4306641 462.998372,15.4954427 463.463867,14.6152344 C463.929362,13.735026 464.162109,12.8040365 464.162109,11.8222656 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M466.551758,27.1074219 C466.983398,27.1074219 467.279622,26.8873698 467.44043,26.4472656 L473.584961,8.02636719 C473.627279,7.87402344 473.648438,7.75130208 473.648438,7.65820312 C473.648438,7.39583333 473.553223,7.18424479 473.362793,7.0234375 C473.172363,6.86263021 472.950195,6.78222656 472.696289,6.78222656 C472.222331,6.78222656 471.913411,7.00227865 471.769531,7.44238281 L465.675781,25.8759766 C465.633464,26.0283203 465.612305,26.1468099 465.612305,26.2314453 C465.612305,26.4938151 465.705404,26.7054036 465.891602,26.8662109 C466.077799,27.0270182 466.297852,27.1074219 466.551758,27.1074219 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M476.457031,25 C476.770182,25 477.043132,24.906901 477.275879,24.7207031 C477.508626,24.5345052 477.625,24.2721354 477.625,23.9335938 L477.625,17.3955078 C477.625,16.4052734 477.940267,15.6139323 478.570801,15.0214844 C479.201335,14.4290365 480.087891,14.1328125 481.230469,14.1328125 C481.501302,14.1328125 481.708659,14.0418294 481.852539,13.8598633 C481.996419,13.6778971 482.068359,13.4599609 482.068359,13.2060547 C482.068359,12.9352214 481.990072,12.6982422 481.833496,12.4951172 C481.676921,12.2919922 481.463216,12.1904297 481.192383,12.1904297 C480.295247,12.1904297 479.527181,12.4549154 478.888184,12.9838867 C478.249186,13.5128581 477.823893,14.1708984 477.612305,14.9580078 L477.625,13.4345703 C477.625,13.1129557 477.512858,12.8611654 477.288574,12.6791992 C477.06429,12.4972331 476.795573,12.40625 476.482422,12.40625 C476.169271,12.40625 475.898438,12.4972331 475.669922,12.6791992 C475.441406,12.8611654 475.327148,13.1214193 475.327148,13.4599609 L475.327148,23.9335938 C475.327148,24.2721354 475.437174,24.5345052 475.657227,24.7207031 C475.877279,24.906901 476.14388,25 476.457031,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M485.105469,17.7382812 C485.147786,17.2135417 485.259928,16.7163086 485.441895,16.246582 C485.623861,15.7768555 485.869303,15.3494466 486.178223,14.9643555 C486.487142,14.5792643 486.882812,14.2724609 487.365234,14.0439453 C487.847656,13.8154297 488.385091,13.7011719 488.977539,13.7011719 C490.120117,13.7011719 490.98763,14.0777995 491.580078,14.8310547 C492.172526,15.5843099 492.506836,16.5533854 492.583008,17.7382812 L485.105469,17.7382812 Z M489.015625,25.2285156 C490.911458,25.2285156 492.527995,24.6276042 493.865234,23.4257812 C494.102214,23.2141927 494.220703,22.9729818 494.220703,22.7021484 C494.220703,22.4905599 494.150879,22.3064779 494.01123,22.1499023 C493.871582,21.9933268 493.700195,21.9150391 493.49707,21.9150391 C493.327799,21.9150391 493.166992,21.9742839 493.014648,22.0927734 C492.405273,22.5582682 491.80013,22.9264323 491.199219,23.1972656 C490.598307,23.468099 489.921224,23.6035156 489.167969,23.6035156 C487.974609,23.5865885 486.99707,23.2036133 486.235352,22.4545898 C485.473633,21.7055664 485.08431,20.6031901 485.067383,19.1474609 L493.776367,19.1474609 C494.047201,19.1474609 494.250326,19.0670573 494.385742,18.90625 C494.521159,18.7454427 494.588867,18.5423177 494.588867,18.296875 C494.563477,17.4420573 494.440755,16.6570638 494.220703,15.9418945 C494.000651,15.2267253 493.670573,14.5792643 493.230469,13.9995117 C492.790365,13.4197591 492.197917,12.9648438 491.453125,12.6347656 C490.708333,12.3046875 489.84082,12.1396484 488.850586,12.1396484 C487.597982,12.1396484 486.501953,12.4443359 485.5625,13.0537109 C484.623047,13.6630859 483.920573,14.4544271 483.455078,15.4277344 C482.989583,16.4010417 482.756836,17.4759115 482.756836,18.6523438 C482.756836,19.9895833 483.034017,21.159668 483.588379,22.1625977 C484.142741,23.1655273 484.889648,23.9251302 485.829102,24.4414062 C486.768555,24.9576823 487.830729,25.2200521 489.015625,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M497.905273,25 C498.218424,25 498.491374,24.906901 498.724121,24.7207031 C498.956868,24.5345052 499.073242,24.2721354 499.073242,23.9335938 L499.073242,17.3955078 C499.073242,16.4052734 499.388509,15.6139323 500.019043,15.0214844 C500.649577,14.4290365 501.536133,14.1328125 502.678711,14.1328125 C502.949544,14.1328125 503.156901,14.0418294 503.300781,13.8598633 C503.444661,13.6778971 503.516602,13.4599609 503.516602,13.2060547 C503.516602,12.9352214 503.438314,12.6982422 503.281738,12.4951172 C503.125163,12.2919922 502.911458,12.1904297 502.640625,12.1904297 C501.74349,12.1904297 500.975423,12.4549154 500.336426,12.9838867 C499.697428,13.5128581 499.272135,14.1708984 499.060547,14.9580078 L499.073242,13.4345703 C499.073242,13.1129557 498.9611,12.8611654 498.736816,12.6791992 C498.512533,12.4972331 498.243815,12.40625 497.930664,12.40625 C497.617513,12.40625 497.34668,12.4972331 497.118164,12.6791992 C496.889648,12.8611654 496.775391,13.1214193 496.775391,13.4599609 L496.775391,23.9335938 C496.775391,24.2721354 496.885417,24.5345052 497.105469,24.7207031 C497.325521,24.906901 497.592122,25 497.905273,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M508.648438,23.7050781 C507.861328,23.7050781 507.239258,23.5252279 506.782227,23.1655273 C506.325195,22.8058268 506.09668,22.2408854 506.09668,21.4707031 C506.09668,21.047526 506.15804,20.7005208 506.280762,20.4296875 C506.403483,20.1588542 506.591797,19.9282227 506.845703,19.737793 C507.099609,19.5473633 507.480469,19.4034831 507.988281,19.3061523 C508.496094,19.2088216 509.080078,19.1411133 509.740234,19.1030273 C510.400391,19.0649414 511.255208,19.0458984 512.304688,19.0458984 L512.304688,19.4267578 C512.304688,20.6708984 511.938639,21.694987 511.206543,22.4990234 C510.474447,23.3030599 509.621745,23.7050781 508.648438,23.7050781 Z M508.407227,25.2285156 C510.226888,25.2285156 511.530273,24.4033203 512.317383,22.7529297 L512.317383,23.9716797 C512.317383,24.3017578 512.425293,24.5577799 512.641113,24.7397461 C512.856934,24.9217122 513.117188,25.0126953 513.421875,25.0126953 C513.726562,25.0126953 513.99528,24.9174805 514.228027,24.7270508 C514.460775,24.5366211 514.577148,24.2763672 514.577148,23.9462891 L514.577148,16.5449219 C514.577148,15.046875 514.13916,13.938151 513.263184,13.21875 C512.387207,12.499349 511.174805,12.1396484 509.625977,12.1396484 C507.814779,12.1396484 506.253255,12.5078125 504.941406,13.2441406 C504.704427,13.3795573 504.585938,13.5742188 504.585938,13.828125 C504.585938,14.0481771 504.666341,14.2491862 504.827148,14.4311523 C504.987956,14.6131185 505.178385,14.7041016 505.398438,14.7041016 C505.516927,14.7041016 505.61849,14.6829427 505.703125,14.640625 C506.134766,14.4459635 506.498698,14.2936198 506.794922,14.1835938 C507.091146,14.0735677 507.497396,13.9635417 508.013672,13.8535156 C508.529948,13.7434896 509.046224,13.6884766 509.5625,13.6884766 C510.425781,13.6884766 511.098633,13.9042969 511.581055,14.3359375 C512.063477,14.7675781 512.304688,15.4361979 512.304688,16.3417969 L512.304688,17.7255859 C511.365234,17.7255859 510.563314,17.7382812 509.898926,17.7636719 C509.234538,17.7890625 508.589193,17.8334961 507.962891,17.8969727 C507.336589,17.9604492 506.820312,18.0535482 506.414062,18.1762695 C506.007812,18.2989909 505.635417,18.4555664 505.296875,18.6459961 C504.958333,18.8364258 504.695964,19.0691732 504.509766,19.3442383 C504.323568,19.6193034 504.179688,19.9388021 504.078125,20.3027344 C503.976562,20.6666667 503.925781,21.0898438 503.925781,21.5722656 C503.925781,22.7402344 504.340495,23.6416016 505.169922,24.2763672 C505.999349,24.9111328 507.078451,25.2285156 508.407227,25.2285156 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M518.426758,25 C518.739909,25 519.012858,24.9026693 519.245605,24.7080078 C519.478353,24.5133464 519.594727,24.2467448 519.594727,23.9082031 L519.594727,17.5732422 C519.653971,16.3798828 520.017904,15.4361979 520.686523,14.7421875 C521.355143,14.0481771 522.176107,13.7011719 523.149414,13.7011719 C524.063477,13.7011719 524.772298,13.9847005 525.275879,14.5517578 C525.77946,15.1188151 526.03125,15.9440104 526.03125,17.0273438 L526.03125,23.9082031 C526.03125,24.2552083 526.143392,24.5239258 526.367676,24.7143555 C526.59196,24.9047852 526.860677,25 527.173828,25 C527.486979,25 527.755697,24.9047852 527.97998,24.7143555 C528.204264,24.5239258 528.316406,24.2552083 528.316406,23.9082031 L528.316406,17.0527344 C528.316406,15.4023438 527.901693,14.1708984 527.072266,13.3583984 C526.242839,12.5458984 525.129883,12.1396484 523.733398,12.1396484 C522.760091,12.1396484 521.907389,12.3470052 521.175293,12.7617188 C520.443197,13.1764323 519.916341,13.7688802 519.594727,14.5390625 L519.594727,13.4345703 C519.594727,13.1044922 519.482585,12.8484701 519.258301,12.6665039 C519.034017,12.4845378 518.765299,12.3935547 518.452148,12.3935547 C518.138997,12.3935547 517.868164,12.4887695 517.639648,12.6791992 C517.411133,12.8696289 517.296875,13.1341146 517.296875,13.4726562 L517.296875,23.9082031 C517.296875,24.2552083 517.406901,24.5239258 517.626953,24.7143555 C517.847005,24.9047852 518.113607,25 518.426758,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M532.166016,25 C532.479167,25 532.752116,24.8984375 532.984863,24.6953125 C533.217611,24.4921875 533.333984,24.2086589 533.333984,23.8447266 L533.333984,18.3095703 L539.300781,24.6572266 C539.51237,24.8942057 539.766276,25.0126953 540.0625,25.0126953 C540.341797,25.0126953 540.589355,24.906901 540.805176,24.6953125 C541.020996,24.483724 541.128906,24.242513 541.128906,23.9716797 C541.128906,23.726237 541.035807,23.5061849 540.849609,23.3115234 L535.80957,18.0048828 L540.417969,13.8789062 C540.621094,13.6842448 540.722656,13.4684245 540.722656,13.2314453 C540.722656,12.9690755 540.618978,12.7320964 540.411621,12.5205078 C540.204264,12.3089193 539.965169,12.203125 539.694336,12.203125 C539.482747,12.203125 539.283854,12.2835286 539.097656,12.4443359 L533.333984,17.7255859 L533.333984,8.01367188 C533.333984,7.65820312 533.221842,7.37890625 532.997559,7.17578125 C532.773275,6.97265625 532.508789,6.87109375 532.204102,6.87109375 C531.890951,6.87109375 531.618001,6.97265625 531.385254,7.17578125 C531.152507,7.37890625 531.036133,7.65820312 531.036133,8.01367188 L531.036133,23.8447266 C531.036133,24.2171224 531.146159,24.5027669 531.366211,24.7016602 C531.586263,24.9005534 531.852865,25 532.166016,25 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M543.036133,27.8691406 C543.450846,27.8691406 543.759766,27.6490885 543.962891,27.2089844 C544.783854,25.2792969 545.420736,23.4977214 545.873535,21.8642578 C546.326335,20.2307943 546.552734,18.6184896 546.552734,17.0273438 C546.552734,15.4446615 546.330566,13.8387044 545.88623,12.2094727 C545.441895,10.5802409 544.817708,8.79231771 544.013672,6.84570312 C543.776693,6.32096354 543.45931,6.05859375 543.061523,6.05859375 C542.816081,6.05859375 542.602376,6.14322917 542.42041,6.3125 C542.238444,6.48177083 542.147461,6.6891276 542.147461,6.93457031 C542.147461,7.06998698 542.181315,7.21809896 542.249023,7.37890625 C543.501628,10.8658854 544.12793,14.0735677 544.12793,17.0019531 C544.12793,19.9726562 543.505859,23.1507161 542.261719,26.5361328 C542.19401,26.7138672 542.160156,26.8704427 542.160156,27.0058594 C542.160156,27.2513021 542.246908,27.456543 542.42041,27.621582 C542.593913,27.7866211 542.799154,27.8691406 543.036133,27.8691406 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="lines-@lotus-purple" stroke-width="1" transform="translate(179, 122.5)" fill="#D85EAD" fill-opacity="0.7">
+            <path d="M34.1196441,5.87282542 L34.1047822,5.88037382 C34.0673007,5.89886895 34.0296556,5.91695518 33.9918539,5.93463184 C33.942479,5.95782698 33.8926954,5.98036241 33.842555,6.00223256 C33.8294027,6.00768506 33.8160659,6.01342442 33.8027115,6.01911386 C33.7439571,6.04455648 33.684941,6.06867708 33.6254638,6.09187613 C33.606479,6.09868128 33.5874532,6.10596603 33.5683959,6.11315168 C33.4348389,6.16432492 33.2988132,6.21027967 33.1607032,6.25139539 C33.1410635,6.25622811 33.121664,6.26188641 33.1022399,6.26744583 C32.7186609,6.37843257 32.318977,6.45119627 31.9077023,6.48249671 C31.8816972,6.48422038 31.8564145,6.48601174 31.8311205,6.48764386 C31.7720811,6.49158261 31.7122183,6.4944951 31.6521316,6.49652409 C31.6378113,6.49702608 31.6231584,6.49748276 31.6085032,6.49788605 C31.5543177,6.49927962 31.5002213,6.5 31.4459566,6.5 L31.3975554,6.50000116 C31.3695518,6.49979215 31.3415458,6.49938802 31.3135397,6.49878856 L31.4459566,6.5 C31.3601757,6.5 31.2748153,6.49819986 31.189911,6.49463531 C31.1470641,6.49307933 31.1036168,6.49078946 31.0601851,6.48802764 C31.022531,6.48540679 30.9852455,6.48269095 30.9480561,6.47963595 C30.9157993,6.47718663 30.883786,6.47428991 30.8517884,6.4711355 C30.8046864,6.4663237 30.7573703,6.46110597 30.7102232,6.45534136 C30.6748693,6.45114976 30.6395668,6.44650344 30.6042938,6.44154096 C30.5589901,6.43507112 30.5139745,6.42822516 30.4691262,6.42088194 C30.4386915,6.41595885 30.4083362,6.41073944 30.37801,6.40528369 C30.3143357,6.39380412 30.2506382,6.38128515 30.1873108,6.36776697 C30.1752599,6.3651961 30.1636113,6.36267034 30.1519681,6.36010925 C29.9945001,6.32551048 29.8388862,6.28458273 29.6858745,6.23770232 C29.6574484,6.22897316 29.6291057,6.22005863 29.6008121,6.21092613 C29.5597765,6.19769192 29.5191494,6.18408659 29.4787188,6.17006103 C29.4461187,6.15875248 29.4131045,6.14697905 29.3801672,6.13490357 C29.3246099,6.11451977 29.2700079,6.09356991 29.2157965,6.0718518 C29.2020926,6.06638064 29.1884146,6.06084288 29.1747516,6.05525192 C29.1083931,6.02807557 29.0424402,5.99967997 28.9771107,5.97014843 C28.9669765,5.96559273 28.9570057,5.96105183 28.9470439,5.95648181 C28.9035172,5.93649211 28.8600565,5.91591748 28.8168863,5.89483994 C28.7498155,5.86210325 28.6830597,5.82793792 28.6167869,5.79241947 C28.5823084,5.77396797 28.5483451,5.75534456 28.5145836,5.73640411 C28.5007116,5.72856725 28.4866714,5.72061183 28.4726546,5.71259441 C28.411516,5.67770783 28.3512324,5.64182362 28.2916428,5.60492517 C28.2781728,5.59644098 28.2645061,5.58789683 28.2508642,5.57929154 C28.1999473,5.5473482 28.1498668,5.51471929 28.100311,5.48137043 C28.0770093,5.46546007 28.0530503,5.44907945 28.029177,5.43250294 C27.8963051,5.34052084 27.7679335,5.24354252 27.6437241,5.14167647 C27.6394536,5.13794864 27.6351748,5.1344334 27.6308995,5.13091125 L27.6006118,5.1059835 C27.5523512,5.06564935 27.5047341,5.02457089 27.4577782,4.98276576 L27.6308995,5.13091125 C27.5594586,5.07205656 27.4897283,5.01189543 27.4217147,4.95049102 C27.3782963,4.91111011 27.3355903,4.87132536 27.2934724,4.83093223 C27.274515,4.81316787 27.2559297,4.79515501 27.2374837,4.77704624 C27.2099897,4.74948175 27.1824721,4.72187345 27.1552239,4.6940007 C26.7771352,4.30886214 26.4606861,3.88215254 26.2068652,3.4276686 C26.1953154,3.40595412 26.1833962,3.38435926 26.1716067,3.36268364 C26.1488816,3.32170662 26.1272119,3.28060119 26.1060448,3.23928822 C26.0896309,3.20678293 26.0729166,3.17351738 26.0565045,3.14007673 C26.0430036,3.11273557 26.0301767,3.08599799 26.0175563,3.05918103 C25.7638351,2.52209415 25.5873782,1.9406422 25.5028733,1.32981251 C25.5005469,1.30803251 25.4976434,1.28633482 25.494858,1.26462267 C25.4777735,1.135361 25.4654296,1.00462788 25.4573461,0.872742988 C25.4561424,0.85059139 25.4549083,0.828925364 25.4537909,0.807252338 C25.450778,0.74961703 25.4486762,0.691514882 25.4474061,0.63320779 C25.4467517,0.607448636 25.4463351,0.581820945 25.4460818,0.556188992 C25.4460425,0.537468344 25.4459566,0.51874421 25.4459566,0.5 L25.4460133,0.444419592 C25.4463538,0.405074597 25.4470795,0.365725324 25.448191,0.326377848 L25.4459566,0.5 C25.4459566,0.409491044 25.4479606,0.319450158 25.4519268,0.229919294 C25.4536272,0.186127454 25.4560587,0.14211793 25.4589746,0.0981253712 C25.4616621,0.060938722 25.4644506,0.0239658292 25.4675727,-0.0129120316 C25.4707322,-0.0525782003 25.4745307,-0.0925912378 25.4787322,-0.132577855 C25.4817463,-0.159685459 25.4847621,-0.186694827 25.4879565,-0.213649422 C25.4935976,-0.262363351 25.4999551,-0.310805202 25.5069079,-0.359190212 C25.5106843,-0.384816827 25.5145885,-0.410835424 25.5186593,-0.436798789 C25.5259347,-0.483606381 25.5337732,-0.529932214 25.5421618,-0.576189805 C25.6086706,-0.943444018 25.7088045,-1.2996869 25.8392034,-1.64097165 C25.8511766,-1.67234436 25.8634595,-1.70370071 25.87602,-1.73498016 C25.892326,-1.77553061 25.9089719,-1.81569399 25.9260386,-1.8556346 C25.9407188,-1.89005203 25.955795,-1.92443638 25.9712142,-1.9587155 C26.0183928,-2.06353754 26.068386,-2.16660483 26.1211891,-2.26797516 C26.133149,-2.29091859 26.1455206,-2.31428537 26.1580597,-2.33759215 C26.2159904,-2.44535076 26.2769829,-2.55087797 26.3410314,-2.65431376 C26.3607046,-2.68584604 26.3808739,-2.71772865 26.4013777,-2.74947252 C26.4216926,-2.78121698 26.4419911,-2.81202804 26.4625643,-2.84263915 C26.5493023,-2.97132436 26.6419802,-3.09832455 26.740356,-3.22258052 C26.8107372,-3.31178657 26.8839756,-3.39904616 26.9595983,-3.48413957 L26.8150453,-3.31505714 C26.8844007,-3.3992442 26.9555703,-3.48105588 27.0284507,-3.56048219 C27.0438545,-3.57705025 27.0593368,-3.59369605 27.0749122,-3.61025335 C27.1126602,-3.65076867 27.1509623,-3.69047231 27.1896971,-3.72954296 C27.2023164,-3.74167903 27.2150797,-3.75441006 27.2278997,-3.76708374 C27.6220514,-4.15830614 28.0603707,-4.48447715 28.5279355,-4.74446762 C28.5439053,-4.75277895 28.5601954,-4.76176514 28.5765315,-4.7706776 C28.6152316,-4.79224222 28.6538964,-4.81272154 28.6927462,-4.83275554 C29.2193136,-5.10484529 29.7912644,-5.30152957 30.3943066,-5.40814346 C30.4131401,-5.41057876 30.4317853,-5.41379788 30.4504441,-5.41692904 C30.5095134,-5.42765954 30.5691798,-5.43670499 30.6291277,-5.44486501 C30.6379065,-5.4453736 30.6465625,-5.44654156 30.6552207,-5.44769071 C30.7178454,-5.45660415 30.7808746,-5.46389406 30.8441937,-5.47019956 C30.8659938,-5.47194501 30.8879626,-5.47403791 30.9099424,-5.47601032 C30.9634606,-5.48112049 31.0170749,-5.48514837 31.0708811,-5.4884671 C31.0926989,-5.4896581 31.1146977,-5.49092037 31.1367037,-5.49206219 C31.1836786,-5.49455187 31.2306768,-5.4963924 31.2778108,-5.49768934 C31.3152189,-5.49879342 31.352621,-5.4995094 31.3900322,-5.49987737 C31.4084883,-5.49991405 31.4272124,-5.5 31.4459566,-5.5 L31.5062975,-5.49990023 C31.5406422,-5.49957587 31.57499,-5.49895799 31.6093368,-5.49804619 L31.4459566,-5.5 C31.5446712,-5.5 31.6428289,-5.49761611 31.7403755,-5.49290275 C31.7784296,-5.49130219 31.8167688,-5.48908301 31.8550947,-5.48649611 C31.8991824,-5.48329623 31.9425043,-5.47990323 31.9856946,-5.47605258 C32.0119417,-5.47390681 32.038618,-5.47134391 32.065283,-5.46860191 C32.1138549,-5.4634424 32.1620885,-5.45790207 32.2101447,-5.45179329 C32.2471842,-5.44720665 32.28419,-5.44213992 32.3211618,-5.43672531 C32.3554545,-5.43161337 32.3894381,-5.42634546 32.4233261,-5.42079364 C32.4772545,-5.41201389 32.5312721,-5.4023885 32.5851904,-5.39201298 C32.6012877,-5.38889184 32.6175909,-5.3856882 32.6338702,-5.38241868 C32.6873551,-5.37168079 32.7404461,-5.360252 32.79342,-5.3480895 C32.8285201,-5.34004416 32.8635867,-5.33166151 32.8985323,-5.32297251 C32.9213597,-5.31727559 32.9440624,-5.31148669 32.9667402,-5.30556161 C33.0230119,-5.29088517 33.0790134,-5.27537425 33.1346795,-5.25907818 C33.1560592,-5.25279583 33.1773628,-5.2464302 33.1986404,-5.23994235 C33.2428221,-5.2264899 33.2867789,-5.21252275 33.3305129,-5.19806615 C33.3742588,-5.1835993 33.4182227,-5.16850152 33.4620548,-5.15287165 C33.4846052,-5.14482713 33.506584,-5.13683903 33.5285024,-5.12872656 C33.5722402,-5.11255172 33.6162903,-5.09565974 33.6601907,-5.07822218 C33.6964508,-5.06379948 33.7324766,-5.04907562 33.7683252,-5.0340136 C33.7999233,-5.0207614 33.8312915,-5.00726425 33.8625741,-4.99348306 C33.9072789,-4.9737629 33.9517428,-4.95351797 33.9959122,-4.93275025 C34.020562,-4.92118367 34.0455453,-4.90922451 34.0704675,-4.89707986 C34.1101419,-4.87773031 34.1491056,-4.85821218 34.1878244,-4.83828518 C34.2185999,-4.82244094 34.2495412,-4.80616792 34.2803776,-4.7896017 C34.3257293,-4.76527281 34.3708514,-4.74028712 34.4156128,-4.71474173 C34.4365996,-4.7026953 34.4578739,-4.69037415 34.4790929,-4.67790943 C34.5183125,-4.65496836 34.5567658,-4.63180464 34.5949349,-4.60822507 C34.6258996,-4.58895047 34.6571983,-4.56920401 34.6883638,-4.54913548 C34.7270078,-4.52444527 34.7651879,-4.49925365 34.8030597,-4.47364086 C34.824825,-4.45868373 34.8467286,-4.44365083 34.8685602,-4.42845392 L34.8885347,-4.41475247 C35.0103469,-4.3292701 35.1288363,-4.23937412 35.2437704,-4.1452968 L35.2610137,-4.13091125 C35.3564468,-4.05229115 35.4488275,-3.97133976 35.5381413,-3.88820765 C35.5599077,-3.86766386 35.5815204,-3.84719818 35.60298,-3.82657437 C35.631847,-3.79928363 35.6602982,-3.77146048 35.688419,-3.74341234 C35.7164361,-3.7148103 35.74463,-3.68605521 35.7725344,-3.65701986 C35.8041461,-3.62500678 35.8349578,-3.59228928 35.8653402,-3.55929515 C35.881781,-3.54036825 35.8986919,-3.5217351 35.9154866,-3.50299573 C36.1930032,-3.19506082 36.4330491,-2.86330885 36.6356443,-2.51441401 C36.6510154,-2.48672602 36.666696,-2.45929108 36.6821651,-2.43172153 C36.7017002,-2.39791553 36.720471,-2.36358139 36.7388869,-2.32909551 C36.757957,-2.29265094 36.7769967,-2.25620725 36.7956707,-2.21954606 C36.8131662,-2.18561356 36.8301852,-2.15127762 36.8468604,-2.11680637 C36.8607821,-2.087935 36.8746001,-2.05887504 36.8881902,-2.02968755 C36.906118,-1.99093889 36.9237155,-1.95191832 36.9408803,-1.91274132 C36.9599405,-1.86994284 36.9783871,-1.82661312 36.9963352,-1.78302557 C37.0072219,-1.75543761 37.0183261,-1.7277898 37.0292191,-1.70007244 C37.0812124,-1.56960959 37.1282168,-1.43691197 37.170595,-1.30216424 C37.1747766,-1.28580209 37.1797329,-1.26977125 37.1846211,-1.25372255 C37.2019115,-1.20028987 37.2175568,-1.14631126 37.2324586,-1.092025 C37.2375126,-1.06976542 37.2434241,-1.04771518 37.249209,-1.02563588 C37.3121588,-0.789585344 37.3600657,-0.547582216 37.3929765,-0.300836892 L74.0529801,-0.300836892 C74.44446,-3.23591796 76.9578277,-5.5 80,-5.5 C83.3137085,-5.5 86,-2.8137085 86,0.5 C86,3.8137085 83.3137085,6.5 80,6.5 C76.9578277,6.5 74.44446,4.23591796 74.0529801,1.30083689 L37.3929765,1.30083689 L37.3879666,1.33832233 C37.3857226,1.35433739 37.3834135,1.3703464 37.3810392,1.38634895 L37.3929765,1.30083689 C37.3808137,1.39202652 37.3666026,1.48256845 37.3503912,1.5724149 C37.3458007,1.59806317 37.3409885,1.62373696 37.3360065,1.64938804 C37.311302,1.77656168 37.2826828,1.90201245 37.2502022,2.02589977 C37.2404844,2.06288434 37.230291,2.10028476 37.2197251,2.13761148 C37.2073511,2.18140318 37.1945657,2.22467229 37.1813086,2.2677347 C37.1741884,2.29079981 37.1669503,2.31381744 37.1595684,2.33680284 C37.1207187,2.45781146 37.0780379,2.57734132 37.0317436,2.69505412 C36.959108,2.87983822 36.8767281,3.06216749 36.7846152,3.24113965 C36.7626462,3.28383909 36.7402826,3.32605024 36.7174339,3.3679588 C36.7105259,3.3805674 36.7033914,3.39353396 36.6962047,3.40648129 L36.7174339,3.3679588 C36.6646359,3.46479942 36.6092471,3.56002425 36.5513607,3.65354017 C36.5489966,3.65713353 36.5469404,3.66044547 36.5448805,3.66375597 L75.9187521,36.1009328 C78.0871369,34.0841188 81.4668954,33.9346161 83.8150571,35.8690887 C86.3726388,37.9760866 86.7379091,41.7574755 84.6309113,44.3150571 C82.5239134,46.8726388 78.7425245,47.2379091 76.1849429,45.1309113 C73.8367864,43.1964429 73.3366099,39.8505687 74.9010761,37.336244 L35.5272045,4.89906718 C35.4415352,4.97874816 35.3539751,5.05551443 35.2646624,5.12935264 C35.2498345,5.14036343 35.2354779,5.15210906 35.221066,5.16378923 C35.1789198,5.19924185 35.1355865,5.23340673 35.0918732,5.26690122 C35.0712202,5.28142396 35.0513194,5.2964713 35.0313224,5.3113969 C34.8266242,5.46548919 34.6130022,5.60428947 34.3927928,5.72834023 C34.3608622,5.74573452 34.3290432,5.76332733 34.2970491,5.78063786 C34.291159,5.78430228 34.2847767,5.78773066 34.2783893,5.79114686 L34.1196441,5.87282542 Z M-0.209608455,86.4978981 L-1.66977543e-13,86.5 C-0.0786887566,86.5 -0.157023717,86.4984852 -0.234977312,86.4954832 C-0.445473131,86.4888636 -0.656788715,86.4695207 -0.867676992,86.4386719 C-0.889108914,86.4346172 -0.910467371,86.4314051 -0.931788651,86.4280807 C-0.983630097,86.420777 -1.03559978,86.4118902 -1.08752521,86.402297 C-1.13674549,86.392677 -1.18582955,86.3830498 -1.23469517,86.3728282 C-1.25433078,86.3690438 -1.27399659,86.3647917 -1.29365356,86.360437 C-2.00851711,86.2031092 -2.67573208,85.9184811 -3.27010115,85.5313854 C-3.97645595,85.073506 -4.55965071,84.4847447 -5.00309838,83.8138737 C-5.02499167,83.7801415 -5.04681257,83.7465376 -5.06830401,83.7127039 C-5.08942444,83.6796571 -5.11009061,83.6462638 -5.13041873,83.6126856 C-5.5505622,82.9217992 -5.83506521,82.139131 -5.94688645,81.3018368 C-6.03028618,80.6861534 -6.01893927,80.0547214 -5.90529839,79.4294986 C-5.90074063,79.4070739 -5.89658722,79.384745 -5.89231052,79.36246 C-5.88577747,79.3266268 -5.87852345,79.2909208 -5.87093257,79.255242 C-5.86076466,79.2080794 -5.85024257,79.1610427 -5.83917182,79.1142179 C-5.83503488,79.0970057 -5.83082366,79.079674 -5.82653192,79.0623504 C-5.64662719,78.3303334 -5.33237985,77.6513407 -4.91148319,77.0527679 C-4.54790393,76.5328886 -4.11103797,76.0846795 -3.62254802,75.7154214 C-3.61076227,75.7076425 -3.59947952,75.699149 -3.58816594,75.6906944 C-3.49325112,75.6189332 -3.39605678,75.550958 -3.29709155,75.4860146 C-3.2688946,75.4677133 -3.24023054,75.449171 -3.21139969,75.4308678 C-3.18393661,75.4134559 -3.15628171,75.3962865 -3.12849918,75.379349 C-2.53816519,75.0178103 -1.88096272,74.7557522 -1.17967658,74.615933 C-1.17540986,74.6160763 -1.17106659,74.6152072 -1.16672225,74.6143428 C-1.11097304,74.6023791 -1.05502401,74.5921782 -0.998810048,74.5827589 C-0.97732661,74.5797583 -0.955982977,74.5762562 -0.93461744,74.5728688 C-0.889971478,74.5653767 -0.844983564,74.5588607 -0.799836952,74.5528468 C-0.774572205,74.5496399 -0.749246616,74.5463745 -0.723894933,74.5432699 C-0.677889653,74.5376923 -0.6319299,74.5327022 -0.585817387,74.5282339 C-0.551026958,74.524532 -0.516028512,74.5213742 -0.480990247,74.5185232 C-0.46124868,74.5174333 -0.441595888,74.5159714 -0.421916962,74.5146044 C-0.050802879,74.4874939 0.324619511,74.4954049 0.699417011,74.5393598 C0.704226941,74.5408884 0.709555155,74.5415137 0.714881229,74.5421461 C0.781445679,74.549183 0.847446899,74.5582264 0.913400202,74.5683979 C0.925340113,74.570905 0.937317295,74.5727716 0.949282692,74.5746735 C1.12908879,74.6029014 1.3084161,74.6401378 1.48681731,74.6859454 C2.06549983,74.8332765 2.60977363,75.0645453 3.10678694,75.3659502 C3.12303404,75.3765224 3.13977089,75.386736 3.15644193,75.3970211 C3.20164512,75.4244163 3.24591122,75.4526702 3.28977687,75.4814837 C3.32957981,75.5077356 3.36903487,75.5342052 3.40809109,75.5610829 C3.42964965,75.5761295 3.45127461,75.591291 3.4727943,75.6065909 C3.50903691,75.631895 3.54466805,75.6577126 3.57994907,75.6838731 C3.61165371,75.7081683 3.64362021,75.7324677 3.67533325,75.7570781 C3.70889032,75.7820378 3.7414879,75.8077927 3.77376532,75.8338454 C3.79059812,75.8487522 3.80798448,75.8629926 3.82528903,75.8773283 C4.32109322,76.285427 4.73901407,76.7652567 5.07206317,77.2931036 C5.09006383,77.3216308 5.10795808,77.3504074 5.12561415,77.3793452 C5.14161024,77.405921 5.15748113,77.4325056 5.17314075,77.4592027 C5.57013408,78.1325002 5.8391414,78.890355 5.9470199,79.6991631 L74.0529801,79.6991631 C74.44446,76.764082 76.9578277,74.5 80,74.5 C83.3137085,74.5 86,77.1862915 86,80.5 C86,83.8137085 83.3137085,86.5 80,86.5 C76.9578277,86.5 74.44446,84.235918 74.0529801,81.3008369 L5.9470199,81.3008369 C5.89439695,81.6953721 5.80343385,82.0777832 5.67799993,82.4442007 L75.039029,117.124033 C76.7018045,114.673961 79.9623079,113.772953 82.6832762,115.133434 C85.6471487,116.615367 86.8484984,120.219404 85.3665658,123.183276 C83.8846333,126.147149 80.2805963,127.348498 77.3167238,125.866566 C74.595553,124.505984 73.3600692,121.356612 74.3228547,118.55619 L4.96129431,83.8755907 C4.893093,83.9761049 4.82220284,84.0740125 4.74877136,84.1692643 C4.73879381,84.1806792 4.7292336,84.1929374 4.71962694,84.2051574 C4.30839358,84.7316267 3.81888195,85.1748402 3.27681467,85.5274331 C2.62797663,85.9507782 1.89228515,86.2522822 1.10221985,86.3989786 C1.07227553,86.403944 1.04233345,86.4093278 1.01234569,86.4144855 C0.977832559,86.420822 0.943147775,86.426403 0.908364904,86.4316854 C0.85973539,86.4389261 0.810772646,86.4458532 0.761709028,86.4521776 C0.729201179,86.4562235 0.697151025,86.4600196 0.665024472,86.4635621 C0.617668227,86.4691756 0.569609355,86.4739998 0.521472227,86.4782449 C0.498283811,86.4796474 0.475587937,86.4814704 0.452856777,86.4831666 C0.387497734,86.4889134 0.321520799,86.4928811 0.255434199,86.4957567 C0.245769947,86.4950652 0.236436068,86.4954341 0.227096732,86.4957816 C0.160257402,86.499518 0.0929206751,86.5009981 0.0255028708,86.5013383 C0.0167443543,86.4999828 0.00837418062,86.5 -1.66977543e-13,86.5 L-0.0987377828,86.5006738 C-0.135677161,86.5000921 -0.172635601,86.4991674 -0.209608455,86.4978981 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="sky-blue" stroke-width="1" transform="translate(999.5, 121.4)" fill="#4FB0FF" fill-opacity="0.85">
+            <path d="M8.84370961,-0.171662898 L101.056701,0.272010602 C101.461663,-2.66183982 103.986195,-4.91454373 107.028816,-4.8999308 C110.342486,-4.88401609 113.015846,-2.18485415 113.000001,1.12881613 C112.984016,4.44248641 110.284854,7.11584551 106.971184,7.09999851 C103.928838,7.0853192 101.426239,4.80888175 101.049125,1.8716629 L8.83607789,1.42839005 C8.4309414,4.3620449 5.90650125,6.61454307 2.86401853,6.5999308 C-0.449651748,6.58401609 -3.12301085,3.88485415 -3.10716683,0.571183872 C-3.09118143,-2.74248641 -0.392019492,-5.41584551 2.92165079,-5.40000149 C5.96399626,-5.3853192 8.46659558,-3.10888175 8.84370961,-0.171662898 Z M113.500001,41.282101 C113.509859,44.5957947 110.831593,47.2900879 107.517899,47.2999733 C104.475395,47.3090497 101.955057,45.0519619 101.555259,42.1175748 L12.8857204,42.3830923 C12.502998,45.3193282 9.99639574,47.590898 6.95423694,47.6000006 C3.64054319,47.6098587 0.946249969,44.9315928 0.936337331,41.617899 C0.926479247,38.3042053 3.60474513,35.6099121 6.91843888,35.6000267 C9.96094314,35.5909503 12.4812806,37.8480381 12.8810786,40.7824252 L101.550618,40.5169077 C101.93334,37.5806718 104.439942,35.309102 107.482101,35.2999994 C110.795795,35.2901413 113.490088,37.9684072 113.500001,41.282101 Z M322,1.1 C322,4.4137085 319.313708,7.1 316,7.1 C312.957482,7.1 310.443889,4.83540371 310.052847,1.89983695 L280.947153,1.89983695 C280.556111,4.83540371 278.042518,7.1 275,7.1 C271.686292,7.1 269,4.4137085 269,1.1 C269,-2.2137085 271.686292,-4.9 275,-4.9 C278.042172,-4.9 280.55554,-2.63591796 280.94702,0.299163108 L310.05298,0.299163108 C310.44446,-2.63591796 312.957828,-4.9 316,-4.9 C319.313708,-4.9 322,-2.2137085 322,1.1 Z M316,34.1 C319.313708,34.1 322,36.7862915 322,40.1 C322,43.4137085 319.313708,46.1 316,46.1 C312.957482,46.1 310.443889,43.8354037 310.052847,40.899837 L280.970288,40.8992833 C280.868147,41.9241833 280.50259,42.9292398 279.870548,43.8051565 L312.227699,74.3948359 C314.52857,72.5301437 317.910997,72.6113041 320.12195,74.7015472 C322.529907,76.9780376 322.636482,80.7755323 320.359991,83.183489 C318.083501,85.5914457 314.286006,85.6980202 311.87805,83.4215298 C309.667182,81.3313682 309.396382,77.9589987 311.128872,75.5571867 L278.772301,44.9667026 C276.47143,46.8313948 273.089003,46.7502343 270.87805,44.6599913 C269.598912,43.4506904 268.969195,41.8121819 269.001142,40.1807599 C269.000177,40.1537526 269,40.126897 269,40.1 C269,36.7862915 271.686292,34.1 275,34.1 C278.042172,34.1 280.55554,36.364082 280.94702,39.2991631 L310.05298,39.2991631 C310.44446,36.364082 312.957828,34.1 316,34.1 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="lines-@vivid-green" stroke-width="1" transform="translate(422.3282, 123.0001)" fill="#21B959" fill-opacity="0.7">
+            <path d="M18.1995088,120.164405 C18.9731516,123.10638 17.4229386,126.112733 14.6842209,127.237946 L81.4255421,381.044875 C84.3640374,380.676484 87.1934325,382.531496 87.9672079,385.473975 C88.8099528,388.678729 86.8951664,391.959875 83.6904129,392.80262 C80.4856594,393.645364 77.2045137,391.730578 76.3617688,388.525825 C75.5880321,385.583492 77.1387153,382.576767 79.8780539,381.451874 L13.1353379,127.645405 C10.196985,128.013579 7.36781026,126.158601 6.59406972,123.216254 C5.75132485,120.011501 7.66611119,116.730355 10.8708647,115.88761 C14.0756182,115.044865 17.356764,116.959652 18.1995088,120.164405 Z M5.79600741,38.4486753 C6.58251497,41.3873879 5.04524746,44.4006231 2.31121938,45.5376479 L81.39957,341.048155 C84.3364302,340.666948 87.173891,342.509599 87.9604958,345.448675 C88.8172135,348.649722 86.9167596,351.93919 83.7157131,352.795907 C80.5146666,353.652625 77.2251987,351.752171 76.3684809,348.551125 C75.5819155,345.612196 77.119467,342.598734 79.8538721,341.461901 L0.764520917,45.9516967 C-2.17219786,46.3326854 -5.0094381,44.4900681 -5.79600741,41.5511247 C-6.65272516,38.3500782 -4.75227121,35.0606103 -1.5512247,34.2038926 C1.6498218,33.3471748 4.93928967,35.2476288 5.79600741,38.4486753 Z M6.8006994,-1.53358616 C7.57841533,1.4080786 6.03123436,4.41722292 3.29276235,5.54541265 L81.4186795,301.045729 C84.356328,300.674005 87.1876155,302.525193 87.9651877,305.466314 C88.812164,308.669952 86.9017123,311.953623 83.6980745,312.800599 C80.4944367,313.647576 77.2107652,311.737124 76.363789,308.533486 C75.5861674,305.592178 77.1328786,302.583407 79.8707293,301.454898 L1.74680969,5.95404446 C-1.19119657,6.32631533 -4.02303886,4.47504136 -4.8006994,1.53358616 C-5.64767565,-1.67005165 -3.73722398,-4.95372315 -0.533586163,-5.8006994 C2.67005165,-6.64767565 5.95372315,-4.73722398 6.8006994,-1.53358616 Z M304.906393,238.966487 L304.906605,238.892826 C304.908834,238.76804 304.914994,238.64292 304.925161,238.517635 C304.935383,238.391628 304.949518,238.265273 304.967749,238.139149 C304.972054,238.112028 304.976236,238.084698 304.980612,238.057369 C305.061525,237.544922 305.210689,237.037255 305.431028,236.546962 C305.448537,236.508512 305.465977,236.470544 305.483772,236.432837 C305.49315,236.413047 305.502631,236.393353 305.512229,236.37369 C305.540386,236.315362 305.569546,236.257671 305.59954,236.200625 C305.607529,236.186698 305.614996,236.1727 305.622524,236.15872 C305.988289,235.474857 306.475125,234.885181 307.045402,234.406611 L307.063159,234.390961 C307.431859,234.083467 307.83326,233.824306 308.257228,233.616105 C308.281913,233.604694 308.305978,233.593054 308.330119,233.581574 C308.375031,233.5596 308.420992,233.538492 308.467192,233.517974 C308.484827,233.510629 308.501831,233.503154 308.51887,233.495758 C308.554146,233.480036 308.590147,233.464861 308.626282,233.450041 C308.729233,233.408019 308.833124,233.368606 308.938078,233.332075 C308.977357,233.318498 309.017058,233.305186 309.056888,233.292289 C309.084868,233.283008 309.113149,233.274048 309.141499,233.265296 C309.176923,233.254683 309.211957,233.244257 309.24708,233.234149 C309.28322,233.223315 309.320221,233.213006 309.357328,233.20305 C309.385881,233.195923 309.413534,233.188745 309.441236,233.181763 C309.4928,233.168136 309.545502,233.155553 309.598393,233.143679 C309.611441,233.141466 309.624089,233.13868 309.636746,233.135934 C309.686624,233.124343 309.737,233.114052 309.787533,233.104404 C309.815093,233.099986 309.842476,233.09497 309.869896,233.090143 C310.06394,233.055017 310.260538,233.030211 310.458635,233.015237 C310.473084,233.015067 310.486754,233.014078 310.50043,233.013137 C310.70252,232.998351 310.906833,232.994735 311.111927,233.001763 C311.131769,233.002907 311.151773,233.003672 311.171778,233.004537 C311.220551,233.00628 311.269181,233.009035 311.317841,233.012393 C311.35052,233.014861 311.383202,233.017346 311.41588,233.020102 C311.446694,233.022609 311.477736,233.025511 311.508784,233.028661 C311.552572,233.033053 311.59623,233.03791 311.639868,233.043255 C311.667003,233.046787 311.693716,233.050282 311.72043,233.053961 C311.772163,233.060726 311.824499,233.068564 311.876785,233.07711 C311.893035,233.080289 311.908694,233.082936 311.924352,233.085647 C311.978665,233.094408 312.033422,233.104599 312.088105,233.115575 C312.111176,233.121015 312.134141,233.125795 312.1571,233.130714 C312.214063,233.141987 312.270754,233.154936 312.32734,233.168739 C312.333672,233.171339 312.340665,233.173064 312.347657,233.174802 C312.559845,233.226309 312.769878,233.290681 312.977474,233.367252 C312.981845,233.36996 312.986508,233.371681 312.991171,233.373407 L313.012534,233.380308 C313.071379,233.402433 313.130024,233.425541 313.188446,233.449638 L312.991171,233.373407 C313.071007,233.402976 313.149839,233.434064 313.227647,233.466627 C313.283842,233.489612 313.340116,233.514306 313.396157,233.539934 C313.411433,233.547111 313.426781,233.554166 313.442087,233.561279 C313.48472,233.581105 313.527056,233.601502 313.569247,233.622442 C314.118423,233.893219 314.610444,234.240199 315.03725,234.645378 C315.164015,234.765689 315.28474,234.890554 315.399292,235.019786 C315.435651,235.061554 315.472136,235.103981 315.507994,235.146878 C315.912785,235.629332 316.233914,236.168329 316.466092,236.740839 C316.47695,236.768255 316.487863,236.79566 316.498575,236.823146 C316.514773,236.864332 316.530408,236.905923 316.545578,236.947673 C316.554459,236.972166 316.563156,236.996512 316.571697,237.020915 C316.584869,237.058784 316.597804,237.097094 316.610351,237.135525 C316.781761,237.658341 316.881904,238.205629 316.904163,238.762743 C316.904116,238.78228 316.904805,238.801633 316.905401,238.820993 C316.910029,238.947689 316.909819,239.074964 316.90549,239.202542 C316.904435,239.219718 316.903828,239.236053 316.903154,239.25239 C316.902512,239.277919 316.901224,239.304285 316.89976,239.330661 L316.894471,239.411453 C316.893406,239.427127 316.892279,239.442801 316.89109,239.458475 L316.887909,239.498173 C316.944985,240.177876 316.886253,240.878943 316.696775,241.575272 C315.898017,244.51071 313.053455,246.341435 310.118563,245.948539 L273.241568,381.471834 C275.970832,382.620246 277.495526,385.639863 276.696775,388.575272 C275.826722,391.772719 272.52936,393.659445 269.331913,392.789391 C266.134465,391.919337 264.24774,388.621976 265.117794,385.424528 C265.916552,382.48909 268.761114,380.658365 271.696006,381.051261 L306.438512,253.371524 L273.723565,341.701132 C276.340096,343.087586 277.590393,346.230924 276.533777,349.083786 C275.382879,352.191212 271.930824,353.777291 268.823398,352.626393 C265.715972,351.475494 264.129893,348.02344 265.280792,344.916014 C266.337404,342.06316 269.333578,340.492595 272.221969,341.144989 L307.022512,247.181524 L274.612879,302.280053 C276.944184,304.105639 277.621318,307.419894 276.07889,310.042021 C274.398773,312.898219 270.721363,313.851622 267.865164,312.171505 C265.008965,310.491388 264.055562,306.813978 265.735679,303.957779 C267.278239,301.335427 270.504431,300.317049 273.232898,301.468183 L306.811292,244.385935 C305.494805,243.154396 304.771567,241.362214 304.926476,239.50729 C304.913522,239.345386 304.906259,239.182991 304.905691,239.019926 L304.906393,238.966487 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="blue" stroke-width="1" transform="translate(422.3282, 118.2904)" fill="#526AE4" fill-opacity="0.8">
+            <path d="M310.233171,157.712771 L310.246013,157.713287 C310.847695,157.737552 311.452256,157.853069 312.039436,158.066097 C312.05667,158.07285 312.073571,158.079025 312.09043,158.085271 C312.120103,158.095929 312.149819,158.107284 312.179485,158.118892 C312.746827,158.339552 313.263742,158.639103 313.721587,159.001271 C313.877157,159.124632 314.025345,159.254048 314.166025,159.389584 C314.19556,159.41861 314.225221,159.447866 314.254557,159.477411 C314.695547,159.919685 315.058726,160.42398 315.337559,160.96799 C315.484982,161.255916 315.609881,161.555905 315.710032,161.864947 C315.718622,161.893521 315.727681,161.922302 315.736525,161.95115 C315.967797,162.700817 316.053372,163.501609 315.970765,164.311593 C315.922515,164.797857 315.813196,165.283904 315.640107,165.758516 C315.63253,165.778007 315.625298,165.797661 315.617971,165.817255 C315.60653,165.848609 315.594448,165.879899 315.582085,165.911133 C315.336868,166.534564 314.99607,167.096774 314.581624,167.586042 C314.153364,168.092837 313.65109,168.516074 313.099687,168.847992 C313.087381,168.854396 313.075759,168.86135 313.064114,168.868264 C312.958168,168.932039 312.849726,168.991853 312.739661,169.048216 C312.717432,169.059075 312.695134,169.07035 312.672766,169.081487 C312.631739,169.102317 312.590389,169.12226 312.54883,169.141722 C312.513249,169.158149 312.47793,169.174322 312.442453,169.190155 C312.422203,169.199295 312.401451,169.208389 312.380651,169.217364 C312.334547,169.237276 312.28866,169.256467 312.242529,169.275092 C312.216253,169.285516 312.189537,169.296046 312.16275,169.306385 C312.124077,169.321618 312.08547,169.336114 312.046707,169.350215 C312.022347,169.358642 311.99761,169.367429 311.972818,169.376054 C311.425922,169.567427 310.849311,169.680205 310.259513,169.705654 C310.12263,169.71156 309.985037,169.712763 309.846945,169.709144 C309.79599,169.707756 309.744677,169.705829 309.693347,169.703236 C309.673954,169.702127 309.654317,169.701009 309.634672,169.699792 C308.992572,169.661177 308.348996,169.518181 307.728231,169.263396 C307.689176,169.247558 307.650876,169.231476 307.612816,169.21503 C307.606819,169.212011 307.600472,169.209238 307.594128,169.206453 L307.612816,169.21503 C307.163014,169.020664 306.746771,168.775442 306.369023,168.488275 C306.359152,168.479928 306.348749,168.47199 306.338377,168.464021 C306.286098,168.424533 306.235028,168.383817 306.184706,168.342331 C306.162314,168.323495 306.13982,168.304729 306.117488,168.285819 C306.079971,168.254215 306.042939,168.221963 306.006342,168.189289 C305.987614,168.172655 305.968594,168.155506 305.949701,168.138251 C305.915592,168.106819 305.882025,168.075347 305.848848,168.043515 C305.817488,168.013969 305.786476,167.98364 305.755837,167.953015 C305.727728,167.924142 305.699665,167.89545 305.671908,167.866495 C305.649328,167.843918 305.627262,167.820576 305.605404,167.797076 C305.585456,167.7745 305.565325,167.752485 305.545366,167.73033 C305.0932,167.230561 304.733988,166.661612 304.475943,166.05208 C304.451095,165.993618 304.427039,165.934395 304.403922,165.874779 C304.370897,165.788631 304.339827,165.702407 304.310738,165.615529 L304.305476,165.601518 C304.063807,164.876267 303.958786,164.100005 304.01196,163.311643 L304.013169,163.29023 C304.023669,163.139163 304.039953,162.988038 304.062119,162.837173 L28.093751,83.5660842 C26.90726,86.279208 23.8663839,87.7615447 20.9423365,86.921624 C17.7574182,86.0067691 15.9171681,82.6832463 16.832023,79.498328 C17.7468778,76.3134097 21.0704006,74.4731597 24.255319,75.3880145 C27.1793862,76.2279409 28.9700272,79.0981432 28.5357494,82.0273674 L302.781795,160.802795 L4.88480726,3.48526083 C3.16801898,5.89786712 -0.11176375,6.72624584 -2.80187053,5.30561226 C-5.73207928,3.7581819 -6.85304263,0.128338207 -5.30561226,-2.80187053 C-3.7581819,-5.73207928 -0.128338207,-6.85304263 2.80187053,-5.30561226 C5.49209415,-3.88491699 6.65729223,-0.708941028 5.63259343,2.06936474 L305.114794,160.224938 L305.220175,160.080226 C305.22894,160.068688 305.237745,160.057186 305.246588,160.045719 L305.114794,160.224938 C305.174526,160.140976 305.236151,160.058932 305.299579,159.978834 C305.329779,159.940011 305.360678,159.902085 305.392008,159.86458 C305.417614,159.834257 305.443549,159.803741 305.469751,159.773531 C305.493286,159.746339 305.517111,159.719449 305.541161,159.692792 C305.56493,159.666265 305.58878,159.640224 305.612832,159.614426 C305.650692,159.574258 305.68907,159.534336 305.727961,159.494985 C305.752942,159.469005 305.778654,159.443394 305.804571,159.418046 C305.820112,159.403693 305.835155,159.389168 305.85027,159.374726 C305.89588,159.330151 305.942607,159.286827 305.989942,159.244327 C305.998348,159.237911 306.006686,159.230478 306.015044,159.223069 C306.179543,159.075882 306.351421,158.939621 306.529557,158.813568 C306.539387,158.807921 306.54837,158.801599 306.55737,158.795302 C306.673263,158.712947 306.792754,158.635596 306.914627,158.562696 C306.925064,158.557441 306.935055,158.551494 306.945062,158.545577 C307.065997,158.473221 307.189832,158.405895 307.315732,158.343055 C307.332173,158.335305 307.348078,158.327433 307.364019,158.319631 C307.415658,158.29402 307.467878,158.269457 307.520423,158.245657 C307.537467,158.238093 307.555088,158.230186 307.572748,158.222363 C307.626775,158.198404 307.680751,158.175512 307.735036,158.153415 C308.43943,157.86563 309.201105,157.710136 309.981638,157.70811 C310.065282,157.707892 310.149142,157.709438 310.233171,157.712771 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="lines-@yellow" stroke-width="1" transform="translate(417.3282, 121)" fill="#FFCA00" fill-opacity="0.85">
+            <path d="M5.94811401,-0.192663861 L308.312459,0.192196768 C308.707411,-2.74267671 311.223794,-5.00386759 314.266172,-4.99999514 C317.579878,-4.99577733 320.262748,-2.3060688 320.258536,1.00763702 C320.254313,4.32134283 317.564604,7.00421295 314.250898,7.00000011 C311.20852,6.99612269 308.697901,4.72853325 308.310421,1.79266386 L5.94607687,1.40780323 C5.55112472,4.34267671 3.03474108,6.60386759 -0.00763701789,6.59999989 C-3.32134283,6.59577733 -6.00421295,3.9060688 -6.00000011,0.592362982 C-5.99577733,-2.72134283 -3.3060688,-5.40421295 0.00763701789,-5.40000011 C3.05001511,-5.39612269 5.56063435,-3.12853325 5.94811401,-0.192663861 Z M5.95162971,39.1649395 L308.198232,40.9647905 C308.606734,38.0316025 311.133758,35.7820026 314.176222,35.8001062 C317.489871,35.8198234 320.160131,38.5220513 320.140523,41.8357012 C320.120697,45.149351 317.418469,47.819611 314.104819,47.8000023 C311.062701,47.7817923 308.562849,45.5027953 308.188841,42.5654368 L5.94228872,40.7659788 C5.53378606,43.6991668 3.00676276,45.9487666 -0.0357011762,45.930663 C-3.34935101,45.9109458 -6.019611,43.2087179 -6.00000228,39.8950681 C-5.98017657,36.5814182 -3.27794866,33.9111582 0.0357011762,33.930767 C3.077684,33.9489762 5.57744812,36.2277707 5.95162971,39.1649395 Z M33.5518941,81.2454292 L308.32045,83.3545463 C308.73401,80.422067 311.264908,78.1768275 314.307336,78.2001767 C317.620947,78.2256071 320.286544,80.9324349 320.261294,84.2460458 C320.235683,87.5596567 317.528855,90.2252537 314.215244,90.1998233 C311.172954,90.1764752 308.676902,87.8928605 308.308224,84.9545708 L33.5396676,82.8454537 C33.1261083,85.777933 30.5952101,88.0231725 27.5527819,87.9998233 C24.239171,87.9743929 21.573574,85.2675651 21.598824,81.9539542 C21.6244348,78.6403433 24.3312626,75.9747463 27.6448736,76.0001767 C30.6871645,76.0235248 33.1832163,78.3071395 33.5518941,81.2454292 Z M23.3459999,120.986365 L308.195683,121.724615 C308.595051,118.790857 311.114153,116.533623 314.156105,116.541524 C317.469802,116.550131 320.149107,119.243391 320.140521,122.557088 C320.131893,125.870785 317.438634,128.550091 314.124936,128.541504 C311.082429,128.533581 308.574726,126.262464 308.19131,123.325891 L23.3417091,122.587033 C22.942607,125.521087 20.4233672,127.778633 17.3812051,127.770732 C14.0675078,127.762125 11.3882026,125.068865 11.3967888,121.755168 C11.4054164,118.44147 14.0986761,115.762165 17.4123734,115.770752 C20.454881,115.778675 22.9625835,118.049792 23.3459999,120.986365 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="line-@purple" stroke-width="1" transform="translate(346.9995, 276.9996)" fill="#AA63CC">
+            <path d="M6.62820848,1.03341274 C9.65375791,1.35169357 11.9164992,3.86641222 11.9986599,6.82643444 L380.4055,45.5813429 C381.102066,42.7035531 383.83839,40.715055 386.86372,41.0333127 C390.159244,41.3799942 392.549753,44.3325849 392.203071,47.6281085 C391.85639,50.9236321 388.903799,53.314141 385.608275,52.9674595 C382.582726,52.6491786 380.319985,50.13446 380.237824,47.1744378 L11.8312259,8.41853047 C11.1350513,11.296859 8.39844361,13.285854 5.37276373,12.9675595 C2.07724011,12.620878 -0.313268758,9.66828736 0.0334127362,6.37276373 C0.380094231,3.07724011 3.33268486,0.686731242 6.62820848,1.03341274 Z" fill-rule="nonzero"></path>
+        </g>
+        <g id="lines-@orange" stroke-width="1" transform="translate(934.4237, 122.2454)" fill="#F07813" fill-opacity="0.8">
+            <path d="M174.382355,125.477786 L174.328303,125.494788 C174.321925,125.496732 174.315542,125.498666 174.309155,125.50059 L174.243015,125.519422 C174.231341,125.522813 174.219662,125.526166 174.207978,125.529483 C174.200578,125.531323 174.195898,125.532632 174.191218,125.533935 C174.172893,125.539572 174.154209,125.544679 174.135506,125.549694 C174.080421,125.564807 174.0254,125.578733 173.970082,125.591893 C173.952002,125.595832 173.9333,125.600187 173.914582,125.604451 L173.970082,125.591893 C173.91896,125.604054 173.867584,125.615562 173.815963,125.626406 C173.772831,125.635706 173.729435,125.644402 173.686011,125.652602 C173.677365,125.653818 173.669059,125.655353 173.660746,125.65687 C173.360655,125.712661 173.05898,125.744403 172.758455,125.753371 L172.670434,125.755348 C172.631774,125.755931 172.593138,125.756138 172.55453,125.755971 L172.514118,125.7558 C172.473254,125.75541 172.432451,125.754599 172.391713,125.753371 C172.380139,125.752105 172.367813,125.7517 172.355486,125.751258 C172.329315,125.751218 172.303989,125.750135 172.278691,125.748891 L172.14404,125.739935 C172.066992,125.735132 171.99565,125.728635 171.924577,125.720871 L171.902543,125.717388 L171.886958,125.715701 C171.879972,125.714887 171.872988,125.71406 171.866006,125.713221 C171.860313,125.713499 171.856987,125.713098 171.853662,125.712695 C171.85004,125.711282 171.84604,125.710789 171.842042,125.710292 C171.815629,125.708012 171.789602,125.704624 171.763614,125.701067 C171.716085,125.694136 171.668407,125.687074 171.620781,125.67943 C171.593256,125.6753 171.565926,125.670685 171.538645,125.665883 C171.508728,125.660427 171.478751,125.65495 171.448801,125.64924 C171.428993,125.645068 171.417747,125.642879 171.406513,125.640659 L171.367946,125.633266 C171.35054,125.629676 171.333155,125.62601 171.315793,125.622268 C171.265849,125.611609 171.215581,125.600156 171.165412,125.588037 C171.14125,125.58195 171.117921,125.576135 171.094637,125.570182 C171.063707,125.562612 171.032041,125.554278 171.00042,125.545676 L170.975763,125.538789 C170.970127,125.537233 170.964495,125.535669 170.958866,125.534097 L170.926045,125.524446 C170.919758,125.522642 170.913474,125.520827 170.907194,125.519003 L170.889813,125.513735 C170.874764,125.509343 170.859728,125.504892 170.844708,125.50038 C170.827282,125.495779 170.809607,125.490361 170.791949,125.484858 C170.783942,125.482389 170.775974,125.479894 170.768011,125.477381 L170.751977,125.472254 C170.74801,125.470989 170.744044,125.46972 170.74008,125.468447 L170.678772,125.447903 C170.662775,125.442586 170.646796,125.437201 170.630836,125.431747 C170.615645,125.426908 170.600371,125.421598 170.585112,125.416222 L170.572609,125.411915 C170.524004,125.3947 170.475688,125.376877 170.427669,125.358454 C170.288711,125.305303 170.150919,125.246478 170.014912,125.182192 C169.996542,125.173547 169.978207,125.164786 169.959909,125.155927 C169.93827,125.145348 169.916686,125.134712 169.895148,125.123936 L169.825627,125.088812 C169.808014,125.079729 169.790437,125.070554 169.772897,125.061286 L169.7238,125.034705 C169.695371,125.01931 169.667033,125.003665 169.638789,124.987771 C169.591079,124.961414 169.543731,124.933957 169.496694,124.905803 C169.479035,124.894625 169.461008,124.883689 169.443023,124.872648 C169.436379,124.868282 169.429479,124.864048 169.422586,124.859799 L169.39051,124.840731 C169.361702,124.822659 169.333016,124.80432 169.304457,124.785715 C169.294028,124.778148 169.28311,124.770977 169.272209,124.763767 L169.251829,124.750515 C169.242549,124.744353 169.233285,124.738164 169.224037,124.731948 L169.1968,124.713903 C169.156212,124.686188 169.115895,124.657923 169.07586,124.629106 C169.060577,124.618129 169.045415,124.607153 169.030299,124.596103 L168.989122,124.565481 C168.955534,124.540375 168.922154,124.514874 168.888989,124.488978 C168.859468,124.466595 168.830806,124.44388 168.802338,124.420889 C168.794899,124.413339 168.786671,124.406641 168.778462,124.399921 L168.761277,124.387426 C168.688625,124.327681 168.617259,124.266115 168.547254,124.202755 C168.540997,124.195815 168.533746,124.189228 168.526511,124.182622 L168.50344,124.162701 C168.411585,124.077886 168.32214,123.989941 168.235281,123.89893 C168.225013,123.887325 168.214068,123.875791 168.203166,123.864212 L168.186241,123.846923 C168.150225,123.808266 168.114669,123.769071 168.079587,123.729343 L168.048512,123.693483 C168.041663,123.685624 168.034834,123.677746 168.028025,123.669849 C167.988376,123.624044 167.950009,123.578145 167.912408,123.531767 C167.901241,123.517835 167.890025,123.503878 167.878868,123.489867 C167.856125,123.461434 167.833965,123.433062 167.812086,123.404521 L167.775902,123.356746 C167.762947,123.339501 167.75008,123.322178 167.737302,123.304777 C167.713942,123.272984 167.691103,123.241206 167.668602,123.209238 C167.665727,123.204935 167.662652,123.200561 167.659583,123.196183 L167.64096,123.169681 C167.621769,123.141962 167.602798,123.114059 167.584051,123.085973 C167.573484,123.070002 167.563473,123.054838 167.553536,123.039635 L167.546597,123.029607 L5.71485133,199.924639 C6.62155868,202.743959 5.3231166,205.868202 2.57501393,207.173969 C-0.418007823,208.59611 -3.99720436,207.32266 -5.41934528,204.329638 C-6.8414862,201.336616 -5.56803568,197.75742 -2.57501393,196.335279 C0.172908812,195.029597 3.41493334,195.996099 5.0280781,198.479443 L165.475597,122.241607 L5.97253558,159.673952 C6.26284965,162.62144 4.33307352,165.400753 1.37082707,166.095928 C-1.85523608,166.853015 -5.08421717,164.851514 -5.84130406,161.625451 C-6.59839095,158.399388 -4.59689022,155.170407 -1.37082707,154.41332 C1.59114309,153.71821 4.55557301,155.348525 5.60703534,158.116978 L165.661597,120.554607 L117.530016,120.555461 C117.138537,123.490542 114.625169,125.754624 111.582997,125.754624 C108.269288,125.754624 105.582997,123.068332 105.582997,119.754624 C105.582997,116.440915 108.269288,113.754624 111.582997,113.754624 C114.625169,113.754624 117.138537,116.018706 117.530016,118.953787 L166.629255,118.953787 L166.641444,118.863856 C166.645831,118.834468 166.65044,118.805097 166.65527,118.775748 L166.629255,118.953787 C166.641464,118.862257 166.655735,118.77138 166.672022,118.681203 C166.677976,118.646077 166.684575,118.611416 166.691485,118.576792 C166.695859,118.557077 166.699955,118.53705 166.704154,118.517039 L166.708469,118.496162 C166.712557,118.47701 166.716736,118.457891 166.721006,118.438808 C166.724895,118.420217 166.729164,118.401599 166.733526,118.382994 L166.745338,118.335006 C166.75629,118.290183 166.767752,118.245521 166.779719,118.201028 C166.787547,118.169917 166.79621,118.138599 166.805137,118.107328 C166.807539,118.099622 166.809638,118.09231 166.81175,118.085 L166.821556,118.051082 C166.843149,117.978025 166.866096,117.905549 166.890367,117.833683 L166.913875,117.766623 C166.916316,117.759653 166.918769,117.752687 166.921236,117.745725 L166.936321,117.703062 C166.942518,117.686058 166.948796,117.669073 166.955155,117.652107 C166.969765,117.612661 166.984848,117.573593 167.00036,117.534646 C167.011719,117.5065 167.023379,117.4778 167.03527,117.449176 C167.043561,117.429027 167.051842,117.409438 167.060233,117.389883 L167.078769,117.347146 C167.088799,117.324213 167.098979,117.301332 167.109308,117.278503 L167.14777,117.195555 C167.152785,117.184934 167.157833,117.174325 167.162915,117.163726 L167.173744,117.140796 C167.194299,117.09829 167.215381,117.055982 167.236992,117.01388 C167.242197,117.004575 167.247214,116.994886 167.252257,116.985211 L124.72714,87.0339345 C122.716975,89.2080389 119.358489,89.6117865 116.871338,87.860071 C114.162135,85.951962 113.512719,82.2088906 115.420828,79.499687 C116.766536,77.589001 119.024933,76.702829 121.18967,77.0157703 L83.1758128,44.6212179 C80.9741305,46.6014825 77.5924672,46.6946111 75.2769145,44.7213452 C72.754784,42.5720379 72.452551,38.7850893 74.6018583,36.2629588 C76.7511656,33.7408283 80.5381141,33.4385954 83.0602446,35.5879027 C85.3756835,37.5610716 85.8201625,40.9144668 84.2144497,43.4023279 L162.009597,109.698607 L63.5703598,5.1331222 C61.1649041,6.86054944 57.7931124,6.58264705 55.7076121,4.36738224 C53.4361984,1.95463613 53.5507709,-1.84262567 55.9635171,-4.11403937 C58.3762632,-6.38545308 62.173525,-6.27088052 64.4449387,-3.85813441 C66.5305204,-1.6427832 66.6045565,1.73980705 64.7350224,4.03674542 L169.083184,114.875413 L169.112989,114.853066 C169.126154,114.843764 169.139353,114.834519 169.152583,114.825332 C169.161036,114.819746 169.169441,114.813932 169.177858,114.808142 L169.195609,114.795844 C169.250906,114.758128 169.306925,114.721285 169.363653,114.685338 C169.389596,114.668932 169.415193,114.652974 169.440888,114.637224 C169.484845,114.610327 169.529257,114.583932 169.57396,114.558143 C169.607419,114.538688 169.641414,114.519489 169.675563,114.500643 C169.709275,114.482261 169.743248,114.463963 169.777373,114.446007 C169.796115,114.435869 169.814305,114.426409 169.832535,114.417047 L169.884314,114.390979 C169.887282,114.389492 169.890251,114.388008 169.893222,114.386527 L169.917101,114.374301 C169.949674,114.358188 169.982361,114.34239 170.015155,114.326907 C170.029818,114.320258 170.044574,114.313379 170.059353,114.306563 L170.088857,114.292909 C170.094555,114.29031 170.100258,114.28772 170.105965,114.285138 L170.13473,114.272062 C170.178222,114.252693 170.221892,114.233869 170.265731,114.215591 C170.30113,114.200876 170.336436,114.186528 170.371909,114.172508 C170.405987,114.159069 170.440051,114.145991 170.474205,114.133236 C170.518701,114.116512 170.563987,114.100187 170.609522,114.084395 C170.642143,114.07326 170.674483,114.062376 170.706893,114.051776 C170.738974,114.041055 170.771255,114.030783 170.803654,114.020779 C170.835655,114.011707 170.866951,114.002273 170.89828,113.993107 C170.940708,113.980076 170.984042,113.968002 171.027571,113.956406 C171.04017,113.95347 171.052484,113.950209 171.064803,113.946988 L171.103334,113.937105 C171.143551,113.926962 171.183849,113.917242 171.224222,113.907946 C171.236421,113.905859 171.239434,113.905172 171.242448,113.904488 C171.25956,113.899539 171.270249,113.897135 171.28094,113.894762 L171.329941,113.884605 C171.3366,113.883198 171.343261,113.881802 171.349924,113.880418 C171.368878,113.876121 171.387679,113.872311 171.406513,113.868589 L171.464734,113.856987 C171.516064,113.847277 171.567435,113.838259 171.618835,113.829929 C171.624315,113.829721 171.629669,113.82887 171.635025,113.828026 C171.709965,113.815502 171.776105,113.806214 171.842262,113.798052 C171.8499,113.798333 171.857241,113.797433 171.864581,113.796547 L171.881954,113.794559 C171.887292,113.793942 171.892631,113.793331 171.897971,113.792728 C171.955331,113.78492 172.012271,113.779276 172.069201,113.774458 C172.075844,113.774826 172.082166,113.774292 172.088487,113.773768 C172.15222,113.767667 172.216266,113.763496 172.280273,113.760362 C172.287135,113.760636 172.29443,113.76028 172.301724,113.759938 L172.316327,113.758705 C172.377077,113.756099 172.437788,113.754425 172.498441,113.753676 C172.508778,113.753808 172.519457,113.753687 172.530132,113.753596 C172.5794,113.753025 172.628331,113.753285 172.67721,113.754145 C172.70616,113.754611 172.734848,113.755281 172.763504,113.756157 C172.801372,113.757496 172.839763,113.759093 172.878114,113.761058 C172.920371,113.762879 172.962028,113.765401 173.003603,113.768356 C173.013744,113.769543 173.024084,113.770317 173.034421,113.771119 C173.09843,113.775512 173.162064,113.781405 173.225482,113.788305 C173.232105,113.790051 173.2394,113.790848 173.24669,113.791659 C173.26685,113.793319 173.286515,113.795612 173.306175,113.798004 C173.329885,113.800551 173.353176,113.803549 173.376436,113.806683 L173.422476,113.813937 C173.43148,113.815228 173.440479,113.816539 173.449475,113.817871 C173.464973,113.819461 173.480935,113.821861 173.496891,113.824325 L173.586874,113.838871 C173.604333,113.841861 173.621773,113.844928 173.639192,113.848072 C173.671902,113.85393 173.704843,113.860121 173.737751,113.866593 L173.780432,113.875323 C173.785424,113.876349 173.790414,113.877381 173.795403,113.878419 L173.842897,113.888565 C173.870634,113.894526 173.8983,113.900678 173.925894,113.907021 C173.937927,113.909979 173.949857,113.912761 173.961781,113.91558 L173.987956,113.921829 C174.005684,113.926134 174.023387,113.93052 174.041065,113.934985 C174.078296,113.944043 174.115333,113.953717 174.152229,113.963734 C174.171006,113.969021 174.189655,113.974181 174.208286,113.979433 L174.274324,113.998284 C174.285949,114.001708 174.297559,114.005166 174.309155,114.008658 L174.326566,114.014524 C174.348972,114.021334 174.371345,114.028276 174.393684,114.035353 C174.400222,114.037714 174.407537,114.040061 174.414847,114.042421 C174.42549,114.045138 174.436086,114.048576 174.446675,114.052044 L174.513316,114.074178 L174.56067,114.090962 C174.582001,114.098416 174.603299,114.105994 174.62456,114.113695 C174.652576,114.123606 174.681204,114.134242 174.709771,114.145105 L174.781045,114.172867 C174.806386,114.182857 174.831671,114.193025 174.856898,114.203371 C174.896509,114.21959 174.935858,114.236261 174.975071,114.253377 C174.98851,114.259219 175.00186,114.265107 175.015184,114.271042 C175.036285,114.280348 175.056944,114.289669 175.077559,114.299112 C175.094674,114.307113 175.112344,114.315331 175.129967,114.323632 C175.152075,114.334048 175.174144,114.344607 175.196165,114.35531 C175.201007,114.357666 175.205681,114.359945 175.21035,114.36223 C175.236631,114.374763 175.262741,114.38775 175.288774,114.400937 C175.314587,114.414469 175.333102,114.424023 175.351559,114.43367 C175.37155,114.444119 175.391559,114.454723 175.411523,114.46545 C175.435432,114.478289 175.459005,114.491158 175.482481,114.50418 C175.523226,114.526226 175.55602,114.54486 175.58867,114.563826 C175.595552,114.568553 175.602818,114.572801 175.610076,114.577066 L175.624744,114.58495 C175.687245,114.621845 175.749209,114.659964 175.810601,114.699307 C175.827283,114.710695 175.843939,114.721447 175.860545,114.732284 C175.907125,114.762177 175.953395,114.79342 175.999311,114.825373 C176.011132,114.833817 176.022804,114.841969 176.034449,114.850164 L176.048704,114.861193 L176.065647,114.872268 C176.302013,115.040873 176.527091,115.227358 176.738614,115.430918 C176.747209,115.440259 176.756056,115.448808 176.764872,115.45738 L176.788765,115.479752 C176.865253,115.555113 176.939908,115.632744 177.01262,115.712606 C177.025122,115.727149 177.0383,115.741727 177.051409,115.756373 C177.063478,115.769461 177.075133,115.782601 177.086723,115.795784 L177.10569,115.817278 C177.121041,115.834956 177.136298,115.852738 177.151462,115.870623 C177.163789,115.886101 177.176629,115.901341 177.189383,115.916633 C177.205391,115.934939 177.220885,115.953773 177.236252,115.972687 L177.254314,115.99533 C177.266113,116.010037 177.27785,116.024811 177.289524,116.039649 C177.306734,116.061368 177.323756,116.083244 177.340633,116.105251 C177.353632,116.121911 177.365924,116.138125 177.378126,116.154392 L177.404108,116.189549 C177.416647,116.206513 177.429102,116.22355 177.441471,116.24066 C177.45382,116.258022 177.466192,116.275317 177.478463,116.292665 C177.489917,116.308447 177.501151,116.324524 177.512301,116.340648 L177.527394,116.362707 C177.539249,116.380002 177.551019,116.397369 177.562702,116.414806 C177.579127,116.439466 177.595402,116.464132 177.611479,116.488897 C177.620611,116.502852 177.629704,116.517013 177.638734,116.531206 L177.655023,116.556923 C177.657994,116.561643 177.66096,116.566368 177.663919,116.571098 L177.678522,116.594587 C177.699911,116.629103 177.720923,116.663799 177.741556,116.698668 C177.75632,116.723855 177.771026,116.749168 177.785538,116.774574 C177.791581,116.784819 177.797297,116.794882 177.802982,116.804958 C177.806502,116.811551 177.810282,116.818267 177.814049,116.824989 L177.802982,116.804958 C177.819482,116.834201 177.835721,116.863558 177.851698,116.893027 C177.857012,116.903344 177.862623,116.913779 177.868201,116.924229 L177.873676,116.934216 C177.881065,116.948116 177.888402,116.962051 177.895688,116.976023 C177.905596,116.995317 177.915796,117.015068 177.92589,117.034882 L177.939688,117.06169 C177.947675,117.077614 177.955588,117.093568 177.963426,117.109551 L178.01862,117.225175 C178.024615,117.238052 178.030566,117.250953 178.036472,117.26388 C178.042398,117.276463 178.047956,117.288756 178.05347,117.301063 C178.070774,117.339986 178.087977,117.379461 178.104766,117.419153 L178.127517,117.473583 C178.133666,117.488556 178.139751,117.503548 178.145774,117.518559 C178.151064,117.533982 178.15686,117.54864 178.162601,117.56333 C178.177545,117.598996 178.191544,117.635708 178.205191,117.67259 C178.211129,117.690402 178.217555,117.708079 178.223896,117.725782 C178.269381,117.850471 178.310172,117.9771 178.346794,118.105487 C178.349503,118.11839 178.353074,118.131087 178.356602,118.143794 L178.36047,118.155389 C178.36469,118.170703 178.368848,118.186031 178.372944,118.201371 C178.37911,118.224708 178.385335,118.248408 178.391408,118.272124 C178.405757,118.327322 178.419092,118.383131 178.431641,118.439237 L178.436138,118.459289 C178.537453,118.91568 178.583409,119.376471 178.577109,119.832935 C178.573105,120.163574 178.541606,120.489887 178.484411,120.809388 C178.464147,120.922659 178.440637,121.035807 178.413773,121.148469 C178.407264,121.174484 178.400836,121.200541 178.394234,121.226544 C178.386354,121.259162 178.377813,121.291787 178.368987,121.324365 C178.358915,121.360893 178.348617,121.397607 178.33796,121.434246 C178.331468,121.456717 178.324944,121.478657 178.318298,121.500544 C178.312005,121.521152 178.305528,121.542084 178.298933,121.562989 L178.28119,121.618335 C178.269418,121.654392 178.257313,121.6903 178.244881,121.726053 C178.239851,121.740222 178.234797,121.754556 178.229686,121.768877 C178.227894,121.774305 178.225996,121.779612 178.22409,121.784917 L178.209255,121.825643 C178.198049,121.856113 178.186602,121.886467 178.174918,121.916702 C178.160885,121.952427 178.146379,121.988849 178.131494,122.025174 C178.118475,122.057675 178.105094,122.089574 178.091448,122.121333 C178.079964,122.147219 178.068701,122.17289 178.057245,122.198508 C178.054518,122.205784 178.050963,122.213705 178.047389,122.221621 L178.040553,122.236415 C178.03331,122.252338 178.025999,122.268224 178.01862,122.284073 L178.00058,122.322933 C177.997451,122.329548 177.994308,122.336157 177.991153,122.342762 L177.983068,122.358036 C177.973558,122.377726 177.963931,122.397381 177.954188,122.417001 L177.92589,122.474366 C177.920757,122.484441 177.915598,122.494499 177.91041,122.504541 C177.893174,122.538816 177.875015,122.573218 177.8565,122.607474 C177.846334,122.625255 177.83657,122.64307 177.826719,122.660829 L177.791379,122.724778 C177.776314,122.751209 177.761032,122.777549 177.745535,122.803794 C177.739171,122.812722 177.733506,122.822285 177.727814,122.831833 L177.721254,122.843318 C177.709528,122.862808 177.697695,122.882226 177.685754,122.901571 C177.666209,122.934514 177.6457,122.96706 177.62485,122.999446 C177.605699,123.027915 177.586838,123.056656 177.567737,123.085225 C177.506832,123.177206 177.442891,123.267718 177.376363,123.356532 C177.365438,123.370662 177.354821,123.384646 177.344134,123.398594 C177.328367,123.418081 177.317582,123.432046 177.306735,123.445965 C177.304252,123.44897 177.301928,123.451937 177.299602,123.454903 L177.280831,123.480648 C177.131066,123.670038 176.969199,123.85114 176.795689,124.022664 L176.758755,124.057944 C176.750917,124.065576 176.743057,124.073188 176.735176,124.080779 C176.725974,124.089797 176.716483,124.098882 176.706968,124.10793 L176.694581,124.120283 C176.675509,124.138266 176.656304,124.156137 176.636968,124.173893 L176.605189,124.202313 C176.597,124.209744 176.588789,124.217154 176.580557,124.224542 C176.56548,124.238239 176.550326,124.251698 176.535115,124.265071 L176.51967,124.27878 C176.503888,124.292521 176.488024,124.306187 176.472077,124.319779 L176.451978,124.3362 C176.436843,124.349034 176.421658,124.361783 176.406421,124.374446 C176.390577,124.388164 176.374424,124.401429 176.358212,124.4146 L176.34062,124.428607 C176.32392,124.442034 176.307134,124.455381 176.290265,124.468647 C176.25916,124.49311 176.22793,124.517269 176.196507,124.541083 C176.1811,124.553066 176.165448,124.56479 176.149747,124.57643 L176.124082,124.594691 C176.089546,124.620404 176.061523,124.640549 176.033358,124.660427 C176.019257,124.670497 176.004773,124.680607 175.990251,124.690649 L175.96425,124.708611 C175.944033,124.722444 175.923719,124.736161 175.903309,124.749759 C175.884721,124.762049 175.866413,124.77411 175.84805,124.786061 C175.832092,124.79636 175.815914,124.806763 175.799693,124.817083 L175.779132,124.830285 C175.756343,124.844664 175.73344,124.858898 175.710425,124.872987 C175.692379,124.884037 175.674397,124.894922 175.656368,124.905705 C175.639311,124.915635 175.622218,124.925727 175.605083,124.93573 L175.596314,124.941097 C175.563586,124.960139 175.530641,124.97889 175.49748,124.997345 C175.490651,125.001247 175.483608,125.005152 175.476558,125.009042 C175.416524,125.041917 175.364115,125.069696 175.311195,125.096737 C175.303502,125.100426 175.295796,125.104358 175.288081,125.108274 C175.282094,125.11171 175.275704,125.114929 175.269309,125.118137 L175.25614,125.12451 C175.217445,125.143781 175.178482,125.162658 175.139256,125.181138 C175.127956,125.186577 175.116849,125.191789 175.105724,125.196969 L175.092894,125.203126 C175.043926,125.225752 174.994722,125.247679 174.945297,125.268908 C174.93393,125.273692 174.922847,125.278419 174.911746,125.283114 L174.893363,125.290924 C174.843949,125.311597 174.794319,125.331576 174.74449,125.350862 C174.719181,125.360703 174.693903,125.370306 174.668543,125.379743 C174.65491,125.384315 174.641528,125.389224 174.62813,125.394084 C174.610861,125.400773 174.596548,125.405901 174.58222,125.410974 C174.528532,125.430054 174.484194,125.445109 174.439626,125.459656 L174.382355,125.477786 Z M125.648935,85.725483 L158.845597,109.104607 L126.050852,81.156589 C126.514775,82.6374002 126.402102,84.2831319 125.648935,85.725483 Z M386.076275,120.254624 C386.076275,123.568332 383.389984,126.254624 380.076275,126.254624 C377.034103,126.254624 374.520735,123.990542 374.129255,121.055461 L347.023295,121.055461 C346.631815,123.990542 344.118448,126.254624 341.076275,126.254624 C337.762567,126.254624 335.076275,123.568332 335.076275,120.254624 C335.076275,116.940915 337.762567,114.254624 341.076275,114.254624 C344.118448,114.254624 346.631815,116.518706 347.023295,119.453787 L374.129255,119.453787 C374.520735,116.518706 377.034103,114.254624 380.076275,114.254624 C383.389984,114.254624 386.076275,116.940915 386.076275,120.254624 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+        <g id="lines-@grass-green" stroke-width="1" transform="translate(928.4223, 275.4986)" fill="#8BBF1D" fill-opacity="0.8">
+            <path d="M183.524511,4.48500433 C184.914303,7.49318351 183.602341,11.0584437 180.594162,12.4482357 C177.832483,13.7241437 174.60128,12.7229373 173.014902,10.2226647 L11.7358419,84.7336574 C12.6118028,87.5624597 11.2797498,90.6722427 8.51788667,91.9482357 C5.50970749,93.3380278 1.94444735,92.0260659 0.554655269,89.0178867 C-0.835136812,86.0097075 0.476825153,82.4444474 3.48500433,81.0546553 C6.2466835,79.7787473 9.4778862,80.7799537 11.0642641,83.2802263 L172.343325,8.7692336 C171.467364,5.94043134 172.799417,2.83064826 175.56128,1.55465527 C178.569459,0.164863188 182.134719,1.47682515 183.524511,4.48500433 Z M326.745036,2.66373281 L326.854093,2.69091521 C326.892809,2.70089241 326.931374,2.71124636 326.969783,2.72197292 L327.188033,2.78763586 C327.279938,2.81721456 327.371398,2.84911727 327.462332,2.88336045 C327.811635,3.01422828 328.144849,3.17678853 328.459163,3.36739867 C328.469082,3.37442789 328.479799,3.38096793 328.490503,3.38754555 C328.615009,3.46311471 328.73572,3.54354606 328.853248,3.62812388 L328.671928,3.503493 C328.754355,3.55823824 328.83499,3.61466926 328.913819,3.67271006 C328.955669,3.70320669 328.996928,3.73451878 329.037764,3.76634754 C329.062673,3.78579055 329.087024,3.80500845 329.11119,3.82438364 C329.145446,3.85206065 329.179928,3.88043969 329.214085,3.90919282 C329.245607,3.93520829 329.27621,3.96141302 329.306494,3.98787546 C329.321491,4.00173816 329.336869,4.01535953 329.352176,4.02905833 C329.840806,4.46411127 330.243925,4.96720928 330.557234,5.51404646 C330.572017,5.54025174 330.586718,5.56625905 330.601229,5.59238606 C330.618342,5.62311884 330.6353,5.65442623 330.651968,5.68586567 C330.672235,5.72374837 330.691954,5.76176843 330.711277,5.80002353 C330.726654,5.83128469 330.742085,5.86264382 330.757234,5.89412159 C330.777113,5.93414685 330.795921,5.97420122 330.814297,6.01449447 C330.825162,6.04009423 330.836773,6.0661418 330.848195,6.09226292 C331.070225,6.59611446 331.224076,7.13594896 331.299391,7.70061849 L379.630569,7.70169868 C380.021616,4.76613256 382.535213,2.5014455 385.577731,2.5014455 C388.891439,2.5014455 391.577721,5.18784697 391.577721,8.50155547 C391.577721,11.815264 388.891419,14.5015455 385.577711,14.5015455 C382.535539,14.5015455 380.022175,12.2374542 379.6307,9.30237252 L331.299521,9.30129233 C331.268649,9.53304755 331.224548,9.76062139 331.168001,9.98322961 C331.137252,10.1045217 331.10342,10.221903 331.065842,10.3385662 C331.056046,10.3692218 331.04605,10.3994905 331.035821,10.4296516 C331.02417,10.4635438 331.012245,10.497607 330.999995,10.5316004 C330.929425,10.7284601 330.848799,10.9202191 330.758819,11.1066155 C330.755196,11.1126945 330.752136,11.1189973 330.749063,11.1252967 L381.066605,44.5443998 C383.016535,42.3157814 386.363008,41.8202044 388.89728,43.503393 C391.657628,45.3367347 392.409115,49.0606562 390.575773,51.8210044 C388.742432,54.5813526 385.01851,55.3328398 382.258162,53.499498 C379.723813,51.8162581 378.882846,48.5393417 380.181027,45.8776943 L329.862922,12.4592355 L329.809864,12.5178678 C329.798687,12.5302637 329.78746,12.5426129 329.776182,12.5549148 L329.862922,12.4592355 C329.452491,12.9281911 328.980214,13.3203854 328.46668,13.6316638 C328.438327,13.6482433 328.409994,13.6651631 328.381511,13.6818547 C328.347845,13.7019793 328.313858,13.721373 328.279706,13.7404197 C328.237145,13.7640187 328.194182,13.7873937 328.150906,13.8102551 C328.132849,13.819694 328.115146,13.8288917 328.097403,13.8379982 C328.048653,13.8633072 327.999046,13.8880168 327.949054,13.912054 C327.919526,13.9256996 327.890501,13.939308 327.861379,13.9526783 C327.301794,14.2113985 326.694824,14.3859552 326.057314,14.4605689 C325.996267,14.4673704 325.934445,14.4737551 325.872519,14.4791837 C325.856647,14.4806739 325.840911,14.4819628 325.825157,14.4831908 C325.534051,14.506362 325.240632,14.5084474 324.947493,14.4888982 C324.92707,14.486739 324.906784,14.485284 324.886527,14.4837284 C324.830455,14.4802057 324.774371,14.4750955 324.718321,14.4691878 C324.700087,14.4665272 324.68145,14.4644827 324.662838,14.4623531 C324.610126,14.4570188 324.558072,14.450341 324.506066,14.4429702 C324.483176,14.4390924 324.459808,14.4356542 324.436485,14.4320817 C324.380684,14.424106 324.325341,14.4147935 324.270076,14.4046884 C324.252622,14.4010057 324.235293,14.3977691 324.217991,14.3944582 C324.163313,14.384425 324.108652,14.3731321 324.05409,14.3610557 C324.031362,14.3556956 324.007983,14.350393 323.984656,14.3449543 C323.936042,14.3338754 323.888075,14.3220209 323.840202,14.3095535 C323.696722,14.2721268 323.554867,14.2295518 323.415325,14.1819794 L323.396493,14.1753492 C323.3767,14.1685109 323.356929,14.1615634 323.337182,14.1545067 C322.879445,13.9915246 322.447544,13.7744278 322.048926,13.5110278 C322.044055,13.5070528 322.038431,13.5033307 322.032811,13.499598 L321.998709,13.4774814 C321.953646,13.4470501 321.90902,13.4160228 321.86484,13.3844109 L322.032811,13.499598 C321.948566,13.4436449 321.866192,13.3859308 321.785705,13.3265369 C321.757478,13.3059123 321.729164,13.284561 321.701046,13.2629663 C321.666905,13.2367814 321.633604,13.2107313 321.600647,13.1843892 C321.574207,13.1629462 321.547517,13.1411595 321.52102,13.119149 C321.491054,13.0948211 321.461871,13.0701918 321.432975,13.0453297 C321.414073,13.0282615 321.394632,13.0112594 321.375302,12.9941345 C320.861089,12.5409738 320.440518,12.0129342 320.118481,11.4378034 C320.060098,11.3334854 320.004442,11.2271746 319.951938,11.1190501 C319.941583,11.0967455 319.930931,11.0743647 319.920421,11.0519254 C319.643716,10.4647778 319.459944,9.82548573 319.387325,9.15288184 C319.383596,9.11564688 319.379916,9.07898432 319.376572,9.04228423 C319.373204,9.00637948 319.370275,8.96989277 319.367673,8.93331543 C319.321119,8.29458424 319.376212,7.64615392 319.538102,7.01362096 C319.544872,6.98776643 319.551598,6.9621236 319.558489,6.93654836 C319.570828,6.89048339 319.583858,6.84447004 319.597461,6.79855713 C319.606615,6.76756334 319.615971,6.73681794 319.625566,6.70617826 C319.633333,6.68169734 319.64136,6.65670817 319.649558,6.63175265 C319.665454,6.5828093 319.681879,6.53478632 319.698892,6.48704221 C319.708119,6.46196336 319.717488,6.43628443 319.727043,6.41064685 C319.886449,5.98110806 320.093699,5.57524165 320.342191,5.19901003 C320.346438,5.19387206 320.350371,5.18792711 320.354317,5.18198662 L320.376402,5.14778477 C320.406832,5.10272236 320.437859,5.05809555 320.46947,5.01391564 L320.354317,5.18198662 C320.41027,5.09774154 320.467984,5.01536762 320.527378,4.93488124 C320.551245,4.90212489 320.575957,4.86944658 320.600994,4.83703154 C320.623805,4.80756446 320.646554,4.77856313 320.669526,4.74982294 C320.687743,4.72725516 320.706282,4.70448502 320.724983,4.68185439 C321.160975,4.15199378 321.673623,3.71390532 322.235769,3.37281611 C322.26117,3.35789339 322.286104,3.34295874 322.311153,3.32820084 C322.345747,3.3075264 322.380841,3.28740642 322.416112,3.26765701 C322.460617,3.24276088 322.505612,3.21821505 322.550951,3.1942332 C322.562602,3.18827489 322.57398,3.18232931 322.585375,3.17642145 C322.64138,3.14700506 322.698135,3.11858866 322.755394,3.09105445 C322.766602,3.08629705 322.778301,3.08073946 322.790018,3.07522067 C323.382044,2.79459969 324.026522,2.60876934 324.704833,2.53598096 C324.725638,2.53416791 324.746634,2.53198813 324.767643,2.52991847 C324.888814,2.51785303 325.011133,2.50973289 325.134399,2.50533117 C325.192828,2.50288357 325.251042,2.50156602 325.309294,2.50109489 C325.323596,2.50149618 325.337977,2.5014455 325.352369,2.5014455 L325.395124,2.50101353 C325.846349,2.50380809 326.299302,2.55744616 326.745036,2.66373281 Z" id="Combined-Shape" fill-rule="nonzero"></path>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/docs/assets/models/pooling_models/pooling_types.svg b/docs/assets/models/pooling_models/pooling_types.svg
new file mode 100644
index 000000000000..678b793f4346
--- /dev/null
+++ b/docs/assets/models/pooling_models/pooling_types.svg
@@ -0,0 +1,633 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="1020px" height="360px" viewBox="0 0 1020 360" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>Pooling</title>
+    <defs>
+        <path d="M0,0 L1020,0 L1020,360 L0,360 L0,0 Z" id="path-1"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-2"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-3"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-4"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-5"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-6"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-7"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-8"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-9"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-10"></path>
+    </defs>
+    <g id="Pooling" stroke="none" fill="none" xlink:href="#path-1" fill-rule="evenodd">
+        <use fill="#FFFFFF" xlink:href="#path-1"></use>
+        <g id="dash-box-@pooling" stroke-width="1" transform="translate(30, 58)" fill="#9172E2">
+            <g id="dash-box-Pooling">
+                <path d="M8,0 L952,0 C956.418278,0 960,3.581722 960,8 L960,66 C960,70.418278 956.418278,74 952,74 L8,74 C3.581722,74 0,70.418278 0,66 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill-opacity="0.1"></path>
+                <path d="M10.9746435,72.9999998 L10.974,73.9999998 L8.974,73.9999998 L8.97464335,72.9999998 L10.9746435,72.9999998 Z M14.9746434,72.9999998 L14.974,73.9999998 L12.974,73.9999998 L12.9746433,72.9999998 L14.9746434,72.9999998 Z M18.9746442,72.9999998 L18.974,73.9999998 L16.974,73.9999998 L16.9746433,72.9999998 L18.9746442,72.9999998 Z M22.974644,72.9999998 L22.974,73.9999998 L20.974,73.9999998 L20.9746433,72.9999998 L22.974644,72.9999998 Z M26.9746439,72.9999998 L26.974,73.9999998 L24.974,73.9999998 L24.9746433,72.9999998 L26.9746439,72.9999998 Z M30.9746438,72.9999998 L30.974,73.9999998 L28.974,73.9999998 L28.9746433,72.9999998 L30.9746438,72.9999998 Z M34.9746437,72.9999998 L34.974,73.9999998 L32.974,73.9999998 L32.9746433,72.9999998 L34.9746437,72.9999998 Z M38.9746436,72.9999998 L38.974,73.9999998 L36.974,73.9999998 L36.9746433,72.9999998 L38.9746436,72.9999998 Z M42.9746434,72.9999998 L42.974,73.9999998 L40.974,73.9999998 L40.9746433,72.9999998 L42.9746434,72.9999998 Z M46.9746442,72.9999998 L46.974,73.9999998 L44.974,73.9999998 L44.9746433,72.9999998 L46.9746442,72.9999998 Z M50.9746441,72.9999998 L50.974,73.9999998 L48.974,73.9999998 L48.9746433,72.9999998 L50.9746441,72.9999998 Z M54.974644,72.9999998 L54.974,73.9999998 L52.974,73.9999998 L52.9746433,72.9999998 L54.974644,72.9999998 Z M58.9746439,72.9999998 L58.974,73.9999998 L56.974,73.9999998 L56.9746433,72.9999998 L58.9746439,72.9999998 Z M62.9746437,72.9999998 L62.974,73.9999998 L60.974,73.9999998 L60.9746433,72.9999998 L62.9746437,72.9999998 Z M66.9746436,72.9999998 L66.974,73.9999998 L64.974,73.9999998 L64.9746433,72.9999998 L66.9746436,72.9999998 Z M70.9746435,72.9999998 L70.974,73.9999998 L68.974,73.9999998 L68.9746433,72.9999998 L70.9746435,72.9999998 Z M74.9746434,72.9999998 L74.974,73.9999998 L72.974,73.9999998 L72.9746433,72.9999998 L74.9746434,72.9999998 Z M78.9746441,72.9999998 L78.974,73.9999998 L76.974,73.9999998 L76.9746433,72.9999998 L78.9746441,72.9999998 Z M82.974644,72.9999998 L82.974,73.9999998 L80.974,73.9999998 L80.9746433,72.9999998 L82.974644,72.9999998 Z M86.9746439,72.9999998 L86.974,73.9999998 L84.974,73.9999998 L84.9746433,72.9999998 L86.9746439,72.9999998 Z M90.9746438,72.9999998 L90.974,73.9999998 L88.974,73.9999998 L88.9746433,72.9999998 L90.9746438,72.9999998 Z M94.9746437,72.9999998 L94.974,73.9999998 L92.974,73.9999998 L92.9746433,72.9999998 L94.9746437,72.9999998 Z M98.9746435,72.9999998 L98.974,73.9999998 L96.974,73.9999998 L96.9746433,72.9999998 L98.9746435,72.9999998 Z M102.974643,72.9999998 L102.974,73.9999998 L100.974,73.9999998 L100.974643,72.9999998 L102.974643,72.9999998 Z M106.974644,72.9999998 L106.974,73.9999998 L104.974,73.9999998 L104.974643,72.9999998 L106.974644,72.9999998 Z M110.974644,72.9999998 L110.974,73.9999998 L108.974,73.9999998 L108.974643,72.9999998 L110.974644,72.9999998 Z M114.974644,72.9999998 L114.974,73.9999998 L112.974,73.9999998 L112.974643,72.9999998 L114.974644,72.9999998 Z M118.974644,72.9999998 L118.974,73.9999998 L116.974,73.9999998 L116.974643,72.9999998 L118.974644,72.9999998 Z M122.974644,72.9999998 L122.974,73.9999998 L120.974,73.9999998 L120.974643,72.9999998 L122.974644,72.9999998 Z M126.974644,72.9999998 L126.974,73.9999998 L124.974,73.9999998 L124.974643,72.9999998 L126.974644,72.9999998 Z M130.974643,72.9999998 L130.974,73.9999998 L128.974,73.9999998 L128.974643,72.9999998 L130.974643,72.9999998 Z M134.974644,72.9999998 L134.974,73.9999998 L132.974,73.9999998 L132.974643,72.9999998 L134.974644,72.9999998 Z M138.974644,72.9999998 L138.974,73.9999998 L136.974,73.9999998 L136.974643,72.9999998 L138.974644,72.9999998 Z M142.974644,72.9999998 L142.974,73.9999998 L140.974,73.9999998 L140.974643,72.9999998 L142.974644,72.9999998 Z M146.974644,72.9999998 L146.974,73.9999998 L144.974,73.9999998 L144.974643,72.9999998 L146.974644,72.9999998 Z M150.974644,72.9999998 L150.974,73.9999998 L148.974,73.9999998 L148.974643,72.9999998 L150.974644,72.9999998 Z M154.974644,72.9999998 L154.974,73.9999998 L152.974,73.9999998 L152.974643,72.9999998 L154.974644,72.9999998 Z M158.974644,72.9999998 L158.974,73.9999998 L156.974,73.9999998 L156.974643,72.9999998 L158.974644,72.9999998 Z M162.974643,72.9999998 L162.974,73.9999998 L160.974,73.9999998 L160.974643,72.9999998 L162.974643,72.9999998 Z M166.974644,72.9999998 L166.974,73.9999998 L164.974,73.9999998 L164.974643,72.9999998 L166.974644,72.9999998 Z M170.974644,72.9999998 L170.974,73.9999998 L168.974,73.9999998 L168.974643,72.9999998 L170.974644,72.9999998 Z M174.974644,72.9999998 L174.974,73.9999998 L172.974,73.9999998 L172.974643,72.9999998 L174.974644,72.9999998 Z M178.974644,72.9999998 L178.974,73.9999998 L176.974,73.9999998 L176.974643,72.9999998 L178.974644,72.9999998 Z M182.974644,72.9999998 L182.974,73.9999998 L180.974,73.9999998 L180.974643,72.9999998 L182.974644,72.9999998 Z M186.974644,72.9999998 L186.974,73.9999998 L184.974,73.9999998 L184.974643,72.9999998 L186.974644,72.9999998 Z M190.974643,72.9999998 L190.974,73.9999998 L188.974,73.9999998 L188.974643,72.9999998 L190.974643,72.9999998 Z M194.974644,72.9999998 L194.974,73.9999998 L192.974,73.9999998 L192.974643,72.9999998 L194.974644,72.9999998 Z M198.974644,72.9999998 L198.974,73.9999998 L196.974,73.9999998 L196.974643,72.9999998 L198.974644,72.9999998 Z M202.974644,72.9999998 L202.974,73.9999998 L200.974,73.9999998 L200.974643,72.9999998 L202.974644,72.9999998 Z M206.974644,72.9999998 L206.974,73.9999998 L204.974,73.9999998 L204.974643,72.9999998 L206.974644,72.9999998 Z M210.974644,72.9999998 L210.974,73.9999998 L208.974,73.9999998 L208.974643,72.9999998 L210.974644,72.9999998 Z M214.974644,72.9999998 L214.974,73.9999998 L212.974,73.9999998 L212.974643,72.9999998 L214.974644,72.9999998 Z M218.974643,72.9999998 L218.974,73.9999998 L216.974,73.9999998 L216.974643,72.9999998 L218.974643,72.9999998 Z M222.974643,72.9999998 L222.974,73.9999998 L220.974,73.9999998 L220.974643,72.9999998 L222.974643,72.9999998 Z M226.974644,72.9999998 L226.974,73.9999998 L224.974,73.9999998 L224.974643,72.9999998 L226.974644,72.9999998 Z M230.974644,72.9999998 L230.974,73.9999998 L228.974,73.9999998 L228.974643,72.9999998 L230.974644,72.9999998 Z M234.974644,72.9999998 L234.974,73.9999998 L232.974,73.9999998 L232.974643,72.9999998 L234.974644,72.9999998 Z M238.974644,72.9999998 L238.974,73.9999998 L236.974,73.9999998 L236.974643,72.9999998 L238.974644,72.9999998 Z M242.974644,72.9999998 L242.974,73.9999998 L240.974,73.9999998 L240.974643,72.9999998 L242.974644,72.9999998 Z M246.974644,72.9999998 L246.974,73.9999998 L244.974,73.9999998 L244.974643,72.9999998 L246.974644,72.9999998 Z M250.974643,72.9999998 L250.974,73.9999998 L248.974,73.9999998 L248.974643,72.9999998 L250.974643,72.9999998 Z M254.974644,72.9999998 L254.974,73.9999998 L252.974,73.9999998 L252.974643,72.9999998 L254.974644,72.9999998 Z M258.974644,72.9999998 L258.974,73.9999998 L256.974,73.9999998 L256.974643,72.9999998 L258.974644,72.9999998 Z M262.974644,72.9999998 L262.974,73.9999998 L260.974,73.9999998 L260.974643,72.9999998 L262.974644,72.9999998 Z M266.974644,72.9999998 L266.974,73.9999998 L264.974,73.9999998 L264.974643,72.9999998 L266.974644,72.9999998 Z M270.974644,72.9999998 L270.974,73.9999998 L268.974,73.9999998 L268.974643,72.9999998 L270.974644,72.9999998 Z M274.974644,72.9999998 L274.974,73.9999998 L272.974,73.9999998 L272.974643,72.9999998 L274.974644,72.9999998 Z M278.974643,72.9999998 L278.974,73.9999998 L276.974,73.9999998 L276.974643,72.9999998 L278.974643,72.9999998 Z M282.974644,72.9999998 L282.974,73.9999998 L280.974,73.9999998 L280.974643,72.9999998 L282.974644,72.9999998 Z M286.974644,72.9999998 L286.974,73.9999998 L284.974,73.9999998 L284.974643,72.9999998 L286.974644,72.9999998 Z M290.974644,72.9999998 L290.974,73.9999998 L288.974,73.9999998 L288.974643,72.9999998 L290.974644,72.9999998 Z M294.974644,72.9999998 L294.974,73.9999998 L292.974,73.9999998 L292.974643,72.9999998 L294.974644,72.9999998 Z M298.974644,72.9999998 L298.974,73.9999998 L296.974,73.9999998 L296.974643,72.9999998 L298.974644,72.9999998 Z M302.974644,72.9999998 L302.974,73.9999998 L300.974,73.9999998 L300.974643,72.9999998 L302.974644,72.9999998 Z M306.974644,72.9999998 L306.974,73.9999998 L304.974,73.9999998 L304.974643,72.9999998 L306.974644,72.9999998 Z M310.974643,72.9999998 L310.974,73.9999998 L308.974,73.9999998 L308.974643,72.9999998 L310.974643,72.9999998 Z M314.974644,72.9999998 L314.974,73.9999998 L312.974,73.9999998 L312.974643,72.9999998 L314.974644,72.9999998 Z M318.974644,72.9999998 L318.974,73.9999998 L316.974,73.9999998 L316.974643,72.9999998 L318.974644,72.9999998 Z M322.974644,72.9999998 L322.974,73.9999998 L320.974,73.9999998 L320.974643,72.9999998 L322.974644,72.9999998 Z M326.974644,72.9999998 L326.974,73.9999998 L324.974,73.9999998 L324.974643,72.9999998 L326.974644,72.9999998 Z M330.974644,72.9999998 L330.974,73.9999998 L328.974,73.9999998 L328.974643,72.9999998 L330.974644,72.9999998 Z M334.974644,72.9999998 L334.974,73.9999998 L332.974,73.9999998 L332.974643,72.9999998 L334.974644,72.9999998 Z M338.974643,72.9999998 L338.974,73.9999998 L336.974,73.9999998 L336.974643,72.9999998 L338.974643,72.9999998 Z M342.974644,72.9999998 L342.974,73.9999998 L340.974,73.9999998 L340.974643,72.9999998 L342.974644,72.9999998 Z M346.974644,72.9999998 L346.974,73.9999998 L344.974,73.9999998 L344.974643,72.9999998 L346.974644,72.9999998 Z M350.974644,72.9999998 L350.974,73.9999998 L348.974,73.9999998 L348.974643,72.9999998 L350.974644,72.9999998 Z M354.974644,72.9999998 L354.974,73.9999998 L352.974,73.9999998 L352.974643,72.9999998 L354.974644,72.9999998 Z M358.974644,72.9999998 L358.974,73.9999998 L356.974,73.9999998 L356.974643,72.9999998 L358.974644,72.9999998 Z M362.974644,72.9999998 L362.974,73.9999998 L360.974,73.9999998 L360.974643,72.9999998 L362.974644,72.9999998 Z M366.974643,72.9999998 L366.974,73.9999998 L364.974,73.9999998 L364.974643,72.9999998 L366.974643,72.9999998 Z M370.974643,72.9999998 L370.974,73.9999998 L368.974,73.9999998 L368.974643,72.9999998 L370.974643,72.9999998 Z M374.974644,72.9999998 L374.974,73.9999998 L372.974,73.9999998 L372.974643,72.9999998 L374.974644,72.9999998 Z M378.974644,72.9999998 L378.974,73.9999998 L376.974,73.9999998 L376.974643,72.9999998 L378.974644,72.9999998 Z M382.974644,72.9999998 L382.974,73.9999998 L380.974,73.9999998 L380.974643,72.9999998 L382.974644,72.9999998 Z M386.974644,72.9999998 L386.974,73.9999998 L384.974,73.9999998 L384.974643,72.9999998 L386.974644,72.9999998 Z M390.974644,72.9999998 L390.974,73.9999998 L388.974,73.9999998 L388.974643,72.9999998 L390.974644,72.9999998 Z M394.974644,72.9999998 L394.974,73.9999998 L392.974,73.9999998 L392.974643,72.9999998 L394.974644,72.9999998 Z M398.974643,72.9999998 L398.974,73.9999998 L396.974,73.9999998 L396.974643,72.9999998 L398.974643,72.9999998 Z M402.974644,72.9999998 L402.974,73.9999998 L400.974,73.9999998 L400.974643,72.9999998 L402.974644,72.9999998 Z M406.974644,72.9999998 L406.974,73.9999998 L404.974,73.9999998 L404.974643,72.9999998 L406.974644,72.9999998 Z M410.974644,72.9999998 L410.974,73.9999998 L408.974,73.9999998 L408.974643,72.9999998 L410.974644,72.9999998 Z M414.974644,72.9999998 L414.974,73.9999998 L412.974,73.9999998 L412.974643,72.9999998 L414.974644,72.9999998 Z M418.974644,72.9999998 L418.974,73.9999998 L416.974,73.9999998 L416.974643,72.9999998 L418.974644,72.9999998 Z M422.974644,72.9999998 L422.974,73.9999998 L420.974,73.9999998 L420.974643,72.9999998 L422.974644,72.9999998 Z M426.974643,72.9999998 L426.974,73.9999998 L424.974,73.9999998 L424.974643,72.9999998 L426.974643,72.9999998 Z M430.974644,72.9999998 L430.974,73.9999998 L428.974,73.9999998 L428.974643,72.9999998 L430.974644,72.9999998 Z M434.974644,72.9999998 L434.974,73.9999998 L432.974,73.9999998 L432.974643,72.9999998 L434.974644,72.9999998 Z M438.974644,72.9999998 L438.974,73.9999998 L436.974,73.9999998 L436.974643,72.9999998 L438.974644,72.9999998 Z M442.974644,72.9999998 L442.974,73.9999998 L440.974,73.9999998 L440.974643,72.9999998 L442.974644,72.9999998 Z M446.974644,72.9999998 L446.974,73.9999998 L444.974,73.9999998 L444.974643,72.9999998 L446.974644,72.9999998 Z M450.974644,72.9999998 L450.974,73.9999998 L448.974,73.9999998 L448.974643,72.9999998 L450.974644,72.9999998 Z M454.974644,72.9999998 L454.974,73.9999998 L452.974,73.9999998 L452.974643,72.9999998 L454.974644,72.9999998 Z M458.974643,72.9999998 L458.974,73.9999998 L456.974,73.9999998 L456.974643,72.9999998 L458.974643,72.9999998 Z M462.974644,72.9999998 L462.974,73.9999998 L460.974,73.9999998 L460.974643,72.9999998 L462.974644,72.9999998 Z M466.974644,72.9999998 L466.974,73.9999998 L464.974,73.9999998 L464.974643,72.9999998 L466.974644,72.9999998 Z M470.974644,72.9999998 L470.974,73.9999998 L468.974,73.9999998 L468.974643,72.9999998 L470.974644,72.9999998 Z M474.974644,72.9999998 L474.974,73.9999998 L472.974,73.9999998 L472.974643,72.9999998 L474.974644,72.9999998 Z M478.974644,72.9999998 L478.974,73.9999998 L476.974,73.9999998 L476.974643,72.9999998 L478.974644,72.9999998 Z M482.974643,72.9999998 L482.974,73.9999998 L480.974,73.9999998 L480.974643,72.9999998 L482.974643,72.9999998 Z M486.974643,72.9999998 L486.974,73.9999998 L484.974,73.9999998 L484.974643,72.9999998 L486.974643,72.9999998 Z M490.974643,72.9999998 L490.974,73.9999998 L488.974,73.9999998 L488.974643,72.9999998 L490.974643,72.9999998 Z M494.974643,72.9999998 L494.974,73.9999998 L492.974,73.9999998 L492.974643,72.9999998 L494.974643,72.9999998 Z M498.974643,72.9999998 L498.974,73.9999998 L496.974,73.9999998 L496.974643,72.9999998 L498.974643,72.9999998 Z M502.974643,72.9999998 L502.974,73.9999998 L500.974,73.9999998 L500.974643,72.9999998 L502.974643,72.9999998 Z M506.974643,72.9999998 L506.974,73.9999998 L504.974,73.9999998 L504.974643,72.9999998 L506.974643,72.9999998 Z M510.974643,72.9999998 L510.974,73.9999998 L508.974,73.9999998 L508.974643,72.9999998 L510.974643,72.9999998 Z M514.974643,72.9999998 L514.974,73.9999998 L512.974,73.9999998 L512.974643,72.9999998 L514.974643,72.9999998 Z M518.974642,72.9999998 L518.974,73.9999998 L516.974,73.9999998 L516.974643,72.9999998 L518.974642,72.9999998 Z M522.974643,72.9999998 L522.974,73.9999998 L520.974,73.9999998 L520.974643,72.9999998 L522.974643,72.9999998 Z M526.974643,72.9999998 L526.974,73.9999998 L524.974,73.9999998 L524.974643,72.9999998 L526.974643,72.9999998 Z M530.974643,72.9999998 L530.974,73.9999998 L528.974,73.9999998 L528.974643,72.9999998 L530.974643,72.9999998 Z M534.974643,72.9999998 L534.974,73.9999998 L532.974,73.9999998 L532.974643,72.9999998 L534.974643,72.9999998 Z M538.974643,72.9999998 L538.974,73.9999998 L536.974,73.9999998 L536.974643,72.9999998 L538.974643,72.9999998 Z M542.974643,72.9999998 L542.974,73.9999998 L540.974,73.9999998 L540.974643,72.9999998 L542.974643,72.9999998 Z M546.974643,72.9999998 L546.974,73.9999998 L544.974,73.9999998 L544.974643,72.9999998 L546.974643,72.9999998 Z M550.974643,72.9999998 L550.974,73.9999998 L548.974,73.9999998 L548.974643,72.9999998 L550.974643,72.9999998 Z M554.974643,72.9999998 L554.974,73.9999998 L552.974,73.9999998 L552.974643,72.9999998 L554.974643,72.9999998 Z M558.974643,72.9999998 L558.974,73.9999998 L556.974,73.9999998 L556.974643,72.9999998 L558.974643,72.9999998 Z M562.974643,72.9999998 L562.974,73.9999998 L560.974,73.9999998 L560.974643,72.9999998 L562.974643,72.9999998 Z M566.974643,72.9999998 L566.974,73.9999998 L564.974,73.9999998 L564.974643,72.9999998 L566.974643,72.9999998 Z M570.974643,72.9999998 L570.974,73.9999998 L568.974,73.9999998 L568.974643,72.9999998 L570.974643,72.9999998 Z M574.974643,72.9999998 L574.974,73.9999998 L572.974,73.9999998 L572.974643,72.9999998 L574.974643,72.9999998 Z M578.974643,72.9999998 L578.974,73.9999998 L576.974,73.9999998 L576.974643,72.9999998 L578.974643,72.9999998 Z M582.974643,72.9999998 L582.974,73.9999998 L580.974,73.9999998 L580.974643,72.9999998 L582.974643,72.9999998 Z M586.974643,72.9999998 L586.974,73.9999998 L584.974,73.9999998 L584.974643,72.9999998 L586.974643,72.9999998 Z M590.974643,72.9999998 L590.974,73.9999998 L588.974,73.9999998 L588.974643,72.9999998 L590.974643,72.9999998 Z M594.974643,72.9999998 L594.974,73.9999998 L592.974,73.9999998 L592.974643,72.9999998 L594.974643,72.9999998 Z M598.974643,72.9999998 L598.974,73.9999998 L596.974,73.9999998 L596.974643,72.9999998 L598.974643,72.9999998 Z M602.974643,72.9999998 L602.974,73.9999998 L600.974,73.9999998 L600.974643,72.9999998 L602.974643,72.9999998 Z M606.974642,72.9999998 L606.974,73.9999998 L604.974,73.9999998 L604.974643,72.9999998 L606.974642,72.9999998 Z M610.974643,72.9999998 L610.974,73.9999998 L608.974,73.9999998 L608.974643,72.9999998 L610.974643,72.9999998 Z M614.974643,72.9999998 L614.974,73.9999998 L612.974,73.9999998 L612.974643,72.9999998 L614.974643,72.9999998 Z M618.974643,72.9999998 L618.974,73.9999998 L616.974,73.9999998 L616.974643,72.9999998 L618.974643,72.9999998 Z M622.974643,72.9999998 L622.974,73.9999998 L620.974,73.9999998 L620.974643,72.9999998 L622.974643,72.9999998 Z M626.974643,72.9999998 L626.974,73.9999998 L624.974,73.9999998 L624.974643,72.9999998 L626.974643,72.9999998 Z M630.974643,72.9999998 L630.974,73.9999998 L628.974,73.9999998 L628.974643,72.9999998 L630.974643,72.9999998 Z M634.974643,72.9999998 L634.974,73.9999998 L632.974,73.9999998 L632.974643,72.9999998 L634.974643,72.9999998 Z M638.974643,72.9999998 L638.974,73.9999998 L636.974,73.9999998 L636.974643,72.9999998 L638.974643,72.9999998 Z M642.974643,72.9999998 L642.974,73.9999998 L640.974,73.9999998 L640.974643,72.9999998 L642.974643,72.9999998 Z M646.974643,72.9999998 L646.974,73.9999998 L644.974,73.9999998 L644.974643,72.9999998 L646.974643,72.9999998 Z M650.974643,72.9999998 L650.974,73.9999998 L648.974,73.9999998 L648.974643,72.9999998 L650.974643,72.9999998 Z M654.974643,72.9999998 L654.974,73.9999998 L652.974,73.9999998 L652.974643,72.9999998 L654.974643,72.9999998 Z M658.974643,72.9999998 L658.974,73.9999998 L656.974,73.9999998 L656.974643,72.9999998 L658.974643,72.9999998 Z M662.974643,72.9999998 L662.974,73.9999998 L660.974,73.9999998 L660.974643,72.9999998 L662.974643,72.9999998 Z M666.974642,72.9999998 L666.974,73.9999998 L664.974,73.9999998 L664.974643,72.9999998 L666.974642,72.9999998 Z M670.974643,72.9999998 L670.974,73.9999998 L668.974,73.9999998 L668.974643,72.9999998 L670.974643,72.9999998 Z M674.974643,72.9999998 L674.974,73.9999998 L672.974,73.9999998 L672.974643,72.9999998 L674.974643,72.9999998 Z M678.974643,72.9999998 L678.974,73.9999998 L676.974,73.9999998 L676.974643,72.9999998 L678.974643,72.9999998 Z M682.974643,72.9999998 L682.974,73.9999998 L680.974,73.9999998 L680.974643,72.9999998 L682.974643,72.9999998 Z M686.974643,72.9999998 L686.974,73.9999998 L684.974,73.9999998 L684.974643,72.9999998 L686.974643,72.9999998 Z M690.974643,72.9999998 L690.974,73.9999998 L688.974,73.9999998 L688.974643,72.9999998 L690.974643,72.9999998 Z M694.974643,72.9999998 L694.974,73.9999998 L692.974,73.9999998 L692.974643,72.9999998 L694.974643,72.9999998 Z M698.974643,72.9999998 L698.974,73.9999998 L696.974,73.9999998 L696.974643,72.9999998 L698.974643,72.9999998 Z M702.974643,72.9999998 L702.974,73.9999998 L700.974,73.9999998 L700.974643,72.9999998 L702.974643,72.9999998 Z M706.974643,72.9999998 L706.974,73.9999998 L704.974,73.9999998 L704.974643,72.9999998 L706.974643,72.9999998 Z M710.974643,72.9999998 L710.974,73.9999998 L708.974,73.9999998 L708.974643,72.9999998 L710.974643,72.9999998 Z M714.974643,72.9999998 L714.974,73.9999998 L712.974,73.9999998 L712.974643,72.9999998 L714.974643,72.9999998 Z M718.974643,72.9999998 L718.974,73.9999998 L716.974,73.9999998 L716.974643,72.9999998 L718.974643,72.9999998 Z M722.974643,72.9999998 L722.974,73.9999998 L720.974,73.9999998 L720.974643,72.9999998 L722.974643,72.9999998 Z M726.974643,72.9999998 L726.974,73.9999998 L724.974,73.9999998 L724.974643,72.9999998 L726.974643,72.9999998 Z M730.974643,72.9999998 L730.974,73.9999998 L728.974,73.9999998 L728.974643,72.9999998 L730.974643,72.9999998 Z M734.974643,72.9999998 L734.974,73.9999998 L732.974,73.9999998 L732.974643,72.9999998 L734.974643,72.9999998 Z M738.974643,72.9999998 L738.974,73.9999998 L736.974,73.9999998 L736.974643,72.9999998 L738.974643,72.9999998 Z M742.974643,72.9999998 L742.974,73.9999998 L740.974,73.9999998 L740.974643,72.9999998 L742.974643,72.9999998 Z M746.974643,72.9999998 L746.974,73.9999998 L744.974,73.9999998 L744.974643,72.9999998 L746.974643,72.9999998 Z M750.974643,72.9999998 L750.974,73.9999998 L748.974,73.9999998 L748.974643,72.9999998 L750.974643,72.9999998 Z M754.974642,72.9999998 L754.974,73.9999998 L752.974,73.9999998 L752.974643,72.9999998 L754.974642,72.9999998 Z M758.974643,72.9999998 L758.974,73.9999998 L756.974,73.9999998 L756.974643,72.9999998 L758.974643,72.9999998 Z M762.974643,72.9999998 L762.974,73.9999998 L760.974,73.9999998 L760.974643,72.9999998 L762.974643,72.9999998 Z M766.974643,72.9999998 L766.974,73.9999998 L764.974,73.9999998 L764.974643,72.9999998 L766.974643,72.9999998 Z M770.974643,72.9999998 L770.974,73.9999998 L768.974,73.9999998 L768.974643,72.9999998 L770.974643,72.9999998 Z M774.974643,72.9999998 L774.974,73.9999998 L772.974,73.9999998 L772.974643,72.9999998 L774.974643,72.9999998 Z M778.974643,72.9999998 L778.974,73.9999998 L776.974,73.9999998 L776.974643,72.9999998 L778.974643,72.9999998 Z M782.974643,72.9999998 L782.974,73.9999998 L780.974,73.9999998 L780.974643,72.9999998 L782.974643,72.9999998 Z M786.974643,72.9999998 L786.974,73.9999998 L784.974,73.9999998 L784.974643,72.9999998 L786.974643,72.9999998 Z M790.974643,72.9999998 L790.974,73.9999998 L788.974,73.9999998 L788.974643,72.9999998 L790.974643,72.9999998 Z M794.974643,72.9999998 L794.974,73.9999998 L792.974,73.9999998 L792.974643,72.9999998 L794.974643,72.9999998 Z M798.974643,72.9999998 L798.974,73.9999998 L796.974,73.9999998 L796.974643,72.9999998 L798.974643,72.9999998 Z M802.974643,72.9999998 L802.974,73.9999998 L800.974,73.9999998 L800.974643,72.9999998 L802.974643,72.9999998 Z M806.974643,72.9999998 L806.974,73.9999998 L804.974,73.9999998 L804.974643,72.9999998 L806.974643,72.9999998 Z M810.974643,72.9999998 L810.974,73.9999998 L808.974,73.9999998 L808.974643,72.9999998 L810.974643,72.9999998 Z M814.974642,72.9999998 L814.974,73.9999998 L812.974,73.9999998 L812.974643,72.9999998 L814.974642,72.9999998 Z M818.974643,72.9999998 L818.974,73.9999998 L816.974,73.9999998 L816.974643,72.9999998 L818.974643,72.9999998 Z M822.974643,72.9999998 L822.974,73.9999998 L820.974,73.9999998 L820.974643,72.9999998 L822.974643,72.9999998 Z M826.974643,72.9999998 L826.974,73.9999998 L824.974,73.9999998 L824.974643,72.9999998 L826.974643,72.9999998 Z M830.974643,72.9999998 L830.974,73.9999998 L828.974,73.9999998 L828.974643,72.9999998 L830.974643,72.9999998 Z M834.974643,72.9999998 L834.974,73.9999998 L832.974,73.9999998 L832.974643,72.9999998 L834.974643,72.9999998 Z M838.974643,72.9999998 L838.974,73.9999998 L836.974,73.9999998 L836.974643,72.9999998 L838.974643,72.9999998 Z M842.974643,72.9999998 L842.974,73.9999998 L840.974,73.9999998 L840.974643,72.9999998 L842.974643,72.9999998 Z M846.974643,72.9999998 L846.974,73.9999998 L844.974,73.9999998 L844.974643,72.9999998 L846.974643,72.9999998 Z M850.974643,72.9999998 L850.974,73.9999998 L848.974,73.9999998 L848.974643,72.9999998 L850.974643,72.9999998 Z M854.974643,72.9999998 L854.974,73.9999998 L852.974,73.9999998 L852.974643,72.9999998 L854.974643,72.9999998 Z M858.974643,72.9999998 L858.974,73.9999998 L856.974,73.9999998 L856.974643,72.9999998 L858.974643,72.9999998 Z M862.974643,72.9999998 L862.974,73.9999998 L860.974,73.9999998 L860.974643,72.9999998 L862.974643,72.9999998 Z M866.974643,72.9999998 L866.974,73.9999998 L864.974,73.9999998 L864.974643,72.9999998 L866.974643,72.9999998 Z M870.974643,72.9999998 L870.974,73.9999998 L868.974,73.9999998 L868.974643,72.9999998 L870.974643,72.9999998 Z M874.974643,72.9999998 L874.974,73.9999998 L872.974,73.9999998 L872.974643,72.9999998 L874.974643,72.9999998 Z M878.974643,72.9999998 L878.974,73.9999998 L876.974,73.9999998 L876.974643,72.9999998 L878.974643,72.9999998 Z M882.974643,72.9999998 L882.974,73.9999998 L880.974,73.9999998 L880.974643,72.9999998 L882.974643,72.9999998 Z M886.974643,72.9999998 L886.974,73.9999998 L884.974,73.9999998 L884.974643,72.9999998 L886.974643,72.9999998 Z M890.974643,72.9999998 L890.974,73.9999998 L888.974,73.9999998 L888.974643,72.9999998 L890.974643,72.9999998 Z M894.974643,72.9999998 L894.974,73.9999998 L892.974,73.9999998 L892.974643,72.9999998 L894.974643,72.9999998 Z M898.974643,72.9999998 L898.974,73.9999998 L896.974,73.9999998 L896.974643,72.9999998 L898.974643,72.9999998 Z M902.974642,72.9999998 L902.974,73.9999998 L900.974,73.9999998 L900.974643,72.9999998 L902.974642,72.9999998 Z M906.974643,72.9999998 L906.974,73.9999998 L904.974,73.9999998 L904.974643,72.9999998 L906.974643,72.9999998 Z M910.974643,72.9999998 L910.974,73.9999998 L908.974,73.9999998 L908.974643,72.9999998 L910.974643,72.9999998 Z M914.974643,72.9999998 L914.974,73.9999998 L912.974,73.9999998 L912.974643,72.9999998 L914.974643,72.9999998 Z M918.974643,72.9999998 L918.974,73.9999998 L916.974,73.9999998 L916.974643,72.9999998 L918.974643,72.9999998 Z M922.974643,72.9999998 L922.974,73.9999998 L920.974,73.9999998 L920.974643,72.9999998 L922.974643,72.9999998 Z M926.974643,72.9999998 L926.974,73.9999998 L924.974,73.9999998 L924.974643,72.9999998 L926.974643,72.9999998 Z M930.974643,72.9999998 L930.974,73.9999998 L928.974,73.9999998 L928.974643,72.9999998 L930.974643,72.9999998 Z M934.974643,72.9999998 L934.974,73.9999998 L932.974,73.9999998 L932.974643,72.9999998 L934.974643,72.9999998 Z M938.974643,72.9999998 L938.974,73.9999998 L936.974,73.9999998 L936.974643,72.9999998 L938.974643,72.9999998 Z M942.974643,72.9999998 L942.974,73.9999998 L940.974,73.9999998 L940.974643,72.9999998 L942.974643,72.9999998 Z M946.974643,72.9999998 L946.974,73.9999998 L944.974,73.9999998 L944.974643,72.9999998 L946.974643,72.9999998 Z M950.974643,72.9999998 L950.974,73.9999998 L948.974,73.9999998 L948.974643,72.9999998 L950.974643,72.9999998 Z M954.489743,72.544574 L954.845614,73.4791091 C954.230519,73.7132701 953.579078,73.8737323 952.902082,73.9497049 L952.79022,72.9559811 C953.375654,72.8903126 953.945502,72.7517531 954.489743,72.544574 Z M5.41029967,72.5056361 C5.95157753,72.7213183 6.51934132,72.8686603 7.10343083,72.9432289 L6.97697115,73.9352007 C6.30134705,73.8489616 5.65205616,73.6784493 5.03997173,73.4345371 L5.41029967,72.5056361 Z M957.388302,70.4686843 L958.157467,71.1077344 C957.730317,71.6220994 957.239943,72.0820804 956.698265,72.4757556 L956.11069,71.6665857 C956.584521,71.3221774 957.013992,70.9194515 957.388302,70.4686843 Z M2.56157145,70.4076246 C2.93082185,70.8625901 3.35581002,71.2701375 3.82581503,71.6198877 L3.22929764,72.422488 C2.69189028,72.0226419 2.20638657,71.5569385 1.78473163,71.0373231 L2.56157145,70.4076246 Z M958.876065,67.3192015 L959.858348,67.506607 C959.73129,68.1733384 959.521483,68.8107286 959.240021,69.407685 L958.335201,68.9818892 C958.584106,68.4541311 958.766097,67.896352 958.876065,67.3192015 Z M1.10775661,67.2312252 C1.21033515,67.8099081 1.38509885,68.3698753 1.62708778,68.9005768 L0.717340098,69.3157385 C0.443469481,68.7151789 0.241701076,68.07482 0.123099669,67.4057265 L1.10775661,67.2312252 Z M960,63.512 L960,65.512 L959,65.5126783 L959,63.5126783 L960,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M960,59.512 L960,61.512 L959,61.5126783 L959,59.5126783 L960,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M960,55.512 L960,57.512 L959,57.5126783 L959,55.5126783 L960,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M960,51.512 L960,53.512 L959,53.5126783 L959,51.5126783 L960,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M960,47.512 L960,49.512 L959,49.5126783 L959,47.5126783 L960,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M960,43.512 L960,45.512 L959,45.5126783 L959,43.5126783 L960,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M960,39.512 L960,41.512 L959,41.5126783 L959,39.5126783 L960,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M960,35.512 L960,37.512 L959,37.5126783 L959,35.5126783 L960,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M960,31.512 L960,33.512 L959,33.5126783 L959,31.5126783 L960,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M960,27.512 L960,29.512 L959,29.5126783 L959,27.5126783 L960,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M960,23.512 L960,25.512 L959,25.5126783 L959,23.5126783 L960,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M960,19.512 L960,21.512 L959,21.5126783 L959,19.5126783 L960,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M960,15.512 L960,17.512 L959,17.5126783 L959,15.5126783 L960,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M960,11.512 L960,13.512 L959,13.5126783 L959,11.5126783 L960,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M960,8 L960,9.512 L959,9.51268141 L958.999996,7.99230721 C958.999843,7.84931676 958.995414,7.70690426 958.986748,7.5651841 L959.984907,7.50453602 C959.99492,7.6684065 960,7.83361003 960,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M958.669225,5.86656128 C958.490852,5.30842466 958.242774,4.77610859 957.93156,4.28094453 L958.777667,3.74792999 C959.130817,4.30965578 959.416068,4.91838179 959.621909,5.56259574 L958.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M957.488127,2.17927796 L956.801538,2.90632361 C956.375276,2.5043409 955.900412,2.15691661 955.388372,1.8730526 L955.873899,0.998830409 C956.461218,1.32450134 957.00324,1.72192978 957.488127,2.17927796 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M952,0 C952.685315,0 953.350503,0.0861719866 953.985321,0.248271715 L953.737382,1.21704772 C953.174749,1.07342708 952.592644,1 952,1 L952,0 Z M10,0 L10,1 L8.00000086,1 L7.59669512,1.01139575 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M482,0 L482,1 L480,1 L480,0 L482,0 Z M942,0 L942,1 L940,1 L940,0 L942,0 Z M938,0 L938,1 L936,1 L936,0 L938,0 Z M934,0 L934,1 L932,1 L932,0 L934,0 Z M930,0 L930,1 L928,1 L928,0 L930,0 Z M926,0 L926,1 L924,1 L924,0 L926,0 Z M922,0 L922,1 L920,1 L920,0 L922,0 Z M918,0 L918,1 L916,1 L916,0 L918,0 Z M914,0 L914,1 L912,1 L912,0 L914,0 Z M910,0 L910,1 L908,1 L908,0 L910,0 Z M906,0 L906,1 L904,1 L904,0 L906,0 Z M902,0 L902,1 L900,1 L900,0 L902,0 Z M898,0 L898,1 L896,1 L896,0 L898,0 Z M894,0 L894,1 L892,1 L892,0 L894,0 Z M890,0 L890,1 L888,1 L888,0 L890,0 Z M886,0 L886,1 L884,1 L884,0 L886,0 Z M882,0 L882,1 L880,1 L880,0 L882,0 Z M878,0 L878,1 L876,1 L876,0 L878,0 Z M874,0 L874,1 L872,1 L872,0 L874,0 Z M870,0 L870,1 L868,1 L868,0 L870,0 Z M866,0 L866,1 L864,1 L864,0 L866,0 Z M862,0 L862,1 L860,1 L860,0 L862,0 Z M858,0 L858,1 L856,1 L856,0 L858,0 Z M854,0 L854,1 L852,1 L852,0 L854,0 Z M850,0 L850,1 L848,1 L848,0 L850,0 Z M846,0 L846,1 L844,1 L844,0 L846,0 Z M842,0 L842,1 L840,1 L840,0 L842,0 Z M838,0 L838,1 L836,1 L836,0 L838,0 Z M834,0 L834,1 L832,1 L832,0 L834,0 Z M830,0 L830,1 L828,1 L828,0 L830,0 Z M826,0 L826,1 L824,1 L824,0 L826,0 Z M822,0 L822,1 L820,1 L820,0 L822,0 Z M818,0 L818,1 L816,1 L816,0 L818,0 Z M814,0 L814,1 L812,1 L812,0 L814,0 Z M810,0 L810,1 L808,1 L808,0 L810,0 Z M806,0 L806,1 L804,1 L804,0 L806,0 Z M802,0 L802,1 L800,1 L800,0 L802,0 Z M798,0 L798,1 L796,1 L796,0 L798,0 Z M794,0 L794,1 L792,1 L792,0 L794,0 Z M790,0 L790,1 L788,1 L788,0 L790,0 Z M786,0 L786,1 L784,1 L784,0 L786,0 Z M782,0 L782,1 L780,1 L780,0 L782,0 Z M778,0 L778,1 L776,1 L776,0 L778,0 Z M774,0 L774,1 L772,1 L772,0 L774,0 Z M770,0 L770,1 L768,1 L768,0 L770,0 Z M766,0 L766,1 L764,1 L764,0 L766,0 Z M762,0 L762,1 L760,1 L760,0 L762,0 Z M758,0 L758,1 L756,1 L756,0 L758,0 Z M754,0 L754,1 L752,1 L752,0 L754,0 Z M750,0 L750,1 L748,1 L748,0 L750,0 Z M746,0 L746,1 L744,1 L744,0 L746,0 Z M742,0 L742,1 L740,1 L740,0 L742,0 Z M738,0 L738,1 L736,1 L736,0 L738,0 Z M734,0 L734,1 L732,1 L732,0 L734,0 Z M730,0 L730,1 L728,1 L728,0 L730,0 Z M726,0 L726,1 L724,1 L724,0 L726,0 Z M722,0 L722,1 L720,1 L720,0 L722,0 Z M718,0 L718,1 L716,1 L716,0 L718,0 Z M714,0 L714,1 L712,1 L712,0 L714,0 Z M710,0 L710,1 L708,1 L708,0 L710,0 Z M706,0 L706,1 L704,1 L704,0 L706,0 Z M702,0 L702,1 L700,1 L700,0 L702,0 Z M698,0 L698,1 L696,1 L696,0 L698,0 Z M694,0 L694,1 L692,1 L692,0 L694,0 Z M690,0 L690,1 L688,1 L688,0 L690,0 Z M686,0 L686,1 L684,1 L684,0 L686,0 Z M682,0 L682,1 L680,1 L680,0 L682,0 Z M678,0 L678,1 L676,1 L676,0 L678,0 Z M674,0 L674,1 L672,1 L672,0 L674,0 Z M670,0 L670,1 L668,1 L668,0 L670,0 Z M666,0 L666,1 L664,1 L664,0 L666,0 Z M662,0 L662,1 L660,1 L660,0 L662,0 Z M658,0 L658,1 L656,1 L656,0 L658,0 Z M654,0 L654,1 L652,1 L652,0 L654,0 Z M650,0 L650,1 L648,1 L648,0 L650,0 Z M646,0 L646,1 L644,1 L644,0 L646,0 Z M642,0 L642,1 L640,1 L640,0 L642,0 Z M638,0 L638,1 L636,1 L636,0 L638,0 Z M634,0 L634,1 L632,1 L632,0 L634,0 Z M630,0 L630,1 L628,1 L628,0 L630,0 Z M626,0 L626,1 L624,1 L624,0 L626,0 Z M622,0 L622,1 L620,1 L620,0 L622,0 Z M618,0 L618,1 L616,1 L616,0 L618,0 Z M614,0 L614,1 L612,1 L612,0 L614,0 Z M610,0 L610,1 L608,1 L608,0 L610,0 Z M606,0 L606,1 L604,1 L604,0 L606,0 Z M602,0 L602,1 L600,1 L600,0 L602,0 Z M598,0 L598,1 L596,1 L596,0 L598,0 Z M594,0 L594,1 L592,1 L592,0 L594,0 Z M590,0 L590,1 L588,1 L588,0 L590,0 Z M586,0 L586,1 L584,1 L584,0 L586,0 Z M582,0 L582,1 L580,1 L580,0 L582,0 Z M578,0 L578,1 L576,1 L576,0 L578,0 Z M574,0 L574,1 L572,1 L572,0 L574,0 Z M570,0 L570,1 L568,1 L568,0 L570,0 Z M566,0 L566,1 L564,1 L564,0 L566,0 Z M562,0 L562,1 L560,1 L560,0 L562,0 Z M558,0 L558,1 L556,1 L556,0 L558,0 Z M554,0 L554,1 L552,1 L552,0 L554,0 Z M550,0 L550,1 L548,1 L548,0 L550,0 Z M546,0 L546,1 L544,1 L544,0 L546,0 Z M542,0 L542,1 L540,1 L540,0 L542,0 Z M538,0 L538,1 L536,1 L536,0 L538,0 Z M534,0 L534,1 L532,1 L532,0 L534,0 Z M530,0 L530,1 L528,1 L528,0 L530,0 Z M526,0 L526,1 L524,1 L524,0 L526,0 Z M522,0 L522,1 L520,1 L520,0 L522,0 Z M518,0 L518,1 L516,1 L516,0 L518,0 Z M514,0 L514,1 L512,1 L512,0 L514,0 Z M510,0 L510,1 L508,1 L508,0 L510,0 Z M506,0 L506,1 L504,1 L504,0 L506,0 Z M502,0 L502,1 L500,1 L500,0 L502,0 Z M497.999,0 L498,1 L496,1 L495.999,0 L497.999,0 Z M493.999,0 L494,1 L492,1 L491.999,0 L493.999,0 Z M490,0 L490,1 L488,1 L488,0 L490,0 Z M486,0 L486,1 L484,1 L484,0 L486,0 Z M950,0 L950,1 L948,1 L948,0 L950,0 Z M946,0 L946,1 L944,1 L944,0 L946,0 Z M474,0 L474,1 L472,1 L472,0 L474,0 Z M470,0 L470,1 L468,1 L468,0 L470,0 Z M466,0 L466,1 L464,1 L464,0 L466,0 Z M462,0 L462,1 L460,1 L460,0 L462,0 Z M458,0 L458,1 L456,1 L456,0 L458,0 Z M454,0 L454,1 L452,1 L452,0 L454,0 Z M450,0 L450,1 L448,1 L448,0 L450,0 Z M446,0 L446,1 L444,1 L444,0 L446,0 Z M442,0 L442,1 L440,1 L440,0 L442,0 Z M438,0 L438,1 L436,1 L436,0 L438,0 Z M434,0 L434,1 L432,1 L432,0 L434,0 Z M430,0 L430,1 L428,1 L428,0 L430,0 Z M426,0 L426,1 L424,1 L424,0 L426,0 Z M422,0 L422,1 L420,1 L420,0 L422,0 Z M418,0 L418,1 L416,1 L416,0 L418,0 Z M414,0 L414,1 L412,1 L412,0 L414,0 Z M410,0 L410,1 L408,1 L408,0 L410,0 Z M406,0 L406,1 L404,1 L404,0 L406,0 Z M402,0 L402,1 L400,1 L400,0 L402,0 Z M398,0 L398,1 L396,1 L396,0 L398,0 Z M394,0 L394,1 L392,1 L392,0 L394,0 Z M390,0 L390,1 L388,1 L388,0 L390,0 Z M386,0 L386,1 L384,1 L384,0 L386,0 Z M382,0 L382,1 L380,1 L380,0 L382,0 Z M378,0 L378,1 L376,1 L376,0 L378,0 Z M374,0 L374,1 L372,1 L372,0 L374,0 Z M370,0 L370,1 L368,1 L368,0 L370,0 Z M366,0 L366,1 L364,1 L364,0 L366,0 Z M362,0 L362,1 L360,1 L360,0 L362,0 Z M358,0 L358,1 L356,1 L356,0 L358,0 Z M354,0 L354,1 L352,1 L352,0 L354,0 Z M350,0 L350,1 L348,1 L348,0 L350,0 Z M346,0 L346,1 L344,1 L344,0 L346,0 Z M342,0 L342,1 L340,1 L340,0 L342,0 Z M338,0 L338,1 L336,1 L336,0 L338,0 Z M334,0 L334,1 L332,1 L332,0 L334,0 Z M330,0 L330,1 L328,1 L328,0 L330,0 Z M326,0 L326,1 L324,1 L324,0 L326,0 Z M322,0 L322,1 L320,1 L320,0 L322,0 Z M318,0 L318,1 L316,1 L316,0 L318,0 Z M314,0 L314,1 L312,1 L312,0 L314,0 Z M310,0 L310,1 L308,1 L308,0 L310,0 Z M306,0 L306,1 L304,1 L304,0 L306,0 Z M302,0 L302,1 L300,1 L300,0 L302,0 Z M298,0 L298,1 L296,1 L296,0 L298,0 Z M294,0 L294,1 L292,1 L292,0 L294,0 Z M290,0 L290,1 L288,1 L288,0 L290,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M253.999,0 L254,1 L252,1 L251.999,0 L253.999,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M198,0 L198,1 L196,1 L196,0 L198,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M478,0 L478,1 L476,1 L476,0 L478,0 Z" id="dash" fill-rule="nonzero"></path>
+            </g>
+            <g id="Pooling" transform="translate(12, 9)" fill-rule="nonzero">
+                <path d="M1.98046875,13.0063477 C2.23014323,13.0063477 2.4449056,12.9195964 2.62475586,12.7460938 C2.80460612,12.5725911 2.89453125,12.3440755 2.89453125,12.0605469 L2.89453125,9.36279297 L4.66552734,9.36279297 C6.98030599,9.36279297 8.13769531,8.44026693 8.13769531,6.59521484 C8.13769531,6.09163411 8.05940755,5.65893555 7.90283203,5.29711914 C7.74625651,4.93530273 7.51668294,4.6496582 7.21411133,4.44018555 C6.91153971,4.23071289 6.56241862,4.07836914 6.16674805,3.9831543 C5.77107747,3.88793945 5.3108724,3.84033203 4.78613281,3.84033203 L2.05664063,3.84033203 C1.74348958,3.84033203 1.50016276,3.9461263 1.32666016,4.15771484 C1.15315755,4.36930339 1.06640625,4.63590495 1.06640625,4.95751953 L1.06640625,12.0605469 C1.06640625,12.3440755 1.15738932,12.5725911 1.33935547,12.7460938 C1.52132161,12.9195964 1.73502604,13.0063477 1.98046875,13.0063477 Z M2.89453125,8.04248047 L2.89453125,5.23046875 L4.58935547,5.23046875 C4.88557943,5.23046875 5.13208008,5.24739583 5.32885742,5.28125 C5.52563477,5.31510417 5.70548503,5.37858073 5.8684082,5.47167969 C6.03133138,5.56477865 6.15087891,5.70442708 6.22705078,5.890625 C6.30322266,6.07682292 6.34130859,6.3116862 6.34130859,6.59521484 C6.34130859,6.88720703 6.30322266,7.12841797 6.22705078,7.31884766 C6.15087891,7.50927734 6.03133138,7.65633138 5.8684082,7.76000977 C5.70548503,7.86368815 5.52140299,7.9366862 5.31616211,7.97900391 C5.11092122,8.02132161 4.85384115,8.04248047 4.54492187,8.04248047 L2.89453125,8.04248047 Z" id="Shape"></path>
+                <path d="M12.2001953,11.9907227 C11.6542969,11.9907227 11.2290039,11.800293 10.9243164,11.4194336 C10.6196289,11.0385742 10.4672852,10.5053711 10.4672852,9.81982422 C10.4672852,9.1258138 10.618571,8.58732096 10.9211426,8.2043457 C11.2237142,7.82137044 11.6500651,7.62988281 12.2001953,7.62988281 C12.7503255,7.62988281 13.1777344,7.82242839 13.4824219,8.20751953 C13.7871094,8.59261068 13.9394531,9.13004557 13.9394531,9.81982422 C13.9394531,10.5053711 13.7871094,11.0385742 13.4824219,11.4194336 C13.1777344,11.800293 12.7503255,11.9907227 12.2001953,11.9907227 Z M12.2001953,13.0761719 C12.648763,13.0761719 13.0592448,13.0148112 13.4316406,12.8920898 C13.8040365,12.7693685 14.1182454,12.6053874 14.3742676,12.4001465 C14.6302897,12.1949056 14.84611,11.9515788 15.0217285,11.670166 C15.197347,11.3887533 15.326416,11.0935872 15.4089355,10.784668 C15.4914551,10.4757487 15.5327148,10.1541341 15.5327148,9.81982422 C15.5327148,9.46858724 15.4893392,9.13216146 15.4025879,8.81054688 C15.3158366,8.48893229 15.1814779,8.18953451 14.9995117,7.91235352 C14.8175456,7.63517253 14.5974935,7.39607747 14.3393555,7.19506836 C14.0812174,6.99405924 13.7691243,6.83536784 13.4030762,6.71899414 C13.037028,6.60262044 12.6360677,6.54443359 12.2001953,6.54443359 C11.7558594,6.54443359 11.3485514,6.60473633 10.9782715,6.7253418 C10.6079915,6.84594727 10.2948405,7.00992839 10.0388184,7.21728516 C9.78279622,7.42464193 9.56591797,7.66796875 9.38818359,7.94726562 C9.21044922,8.2265625 9.08032227,8.5238444 8.99780273,8.83911133 C8.9152832,9.15437826 8.87402344,9.48128255 8.87402344,9.81982422 C8.87402344,10.2472331 8.94173177,10.6513672 9.07714844,11.0322266 C9.2125651,11.4130859 9.41040039,11.7579753 9.6706543,12.0668945 C9.9309082,12.3758138 10.2789714,12.6212565 10.7148438,12.8032227 C11.1507161,12.9851888 11.6458333,13.0761719 12.2001953,13.0761719 Z" id="Shape"></path>
+                <path d="M19.8745117,11.9907227 C19.3286133,11.9907227 18.9033203,11.800293 18.5986328,11.4194336 C18.2939453,11.0385742 18.1416016,10.5053711 18.1416016,9.81982422 C18.1416016,9.1258138 18.2928874,8.58732096 18.595459,8.2043457 C18.8980306,7.82137044 19.3243815,7.62988281 19.8745117,7.62988281 C20.4246419,7.62988281 20.8520508,7.82242839 21.1567383,8.20751953 C21.4614258,8.59261068 21.6137695,9.13004557 21.6137695,9.81982422 C21.6137695,10.5053711 21.4614258,11.0385742 21.1567383,11.4194336 C20.8520508,11.800293 20.4246419,11.9907227 19.8745117,11.9907227 Z M19.8745117,13.0761719 C20.3230794,13.0761719 20.7335612,13.0148112 21.105957,12.8920898 C21.4783529,12.7693685 21.7925618,12.6053874 22.048584,12.4001465 C22.3046061,12.1949056 22.5204264,11.9515788 22.6960449,11.670166 C22.8716634,11.3887533 23.0007324,11.0935872 23.083252,10.784668 C23.1657715,10.4757487 23.2070312,10.1541341 23.2070312,9.81982422 C23.2070312,9.46858724 23.1636556,9.13216146 23.0769043,8.81054688 C22.990153,8.48893229 22.8557943,8.18953451 22.6738281,7.91235352 C22.491862,7.63517253 22.2718099,7.39607747 22.0136719,7.19506836 C21.7555339,6.99405924 21.4434408,6.83536784 21.0773926,6.71899414 C20.7113444,6.60262044 20.3103841,6.54443359 19.8745117,6.54443359 C19.4301758,6.54443359 19.0228678,6.60473633 18.6525879,6.7253418 C18.2823079,6.84594727 17.9691569,7.00992839 17.7131348,7.21728516 C17.4571126,7.42464193 17.2402344,7.66796875 17.0625,7.94726562 C16.8847656,8.2265625 16.7546387,8.5238444 16.6721191,8.83911133 C16.5895996,9.15437826 16.5483398,9.48128255 16.5483398,9.81982422 C16.5483398,10.2472331 16.6160482,10.6513672 16.7514648,11.0322266 C16.8868815,11.4130859 17.0847168,11.7579753 17.3449707,12.0668945 C17.6052246,12.3758138 17.9532878,12.6212565 18.3891602,12.8032227 C18.8250326,12.9851888 19.3201497,13.0761719 19.8745117,13.0761719 Z" id="Shape"></path>
+                <path d="M25.371582,13 C25.6085612,13 25.8042806,12.9217122 25.9587402,12.7651367 C26.1131999,12.6085612 26.1904297,12.390625 26.1904297,12.1113281 L26.1904297,4.74169922 C26.1904297,4.46240234 26.1142578,4.24446615 25.9619141,4.08789062 C25.8095703,3.9313151 25.6170247,3.85302734 25.3842773,3.85302734 C25.1515299,3.85302734 24.9611003,3.9313151 24.8129883,4.08789062 C24.6648763,4.24446615 24.5908203,4.46240234 24.5908203,4.74169922 L24.5908203,12.1113281 C24.5908203,12.3948568 24.6638184,12.6138509 24.8098145,12.7683105 C24.9558105,12.9227702 25.1430664,13 25.371582,13 Z" id="Path"></path>
+                <path d="M28.7104492,13 C28.9431966,13 29.1346842,12.9217122 29.2849121,12.7651367 C29.43514,12.6085612 29.5102539,12.390625 29.5102539,12.1113281 L29.5102539,7.52832031 C29.5102539,7.24479167 29.43514,7.02473958 29.2849121,6.86816406 C29.1346842,6.71158854 28.9431966,6.63330078 28.7104492,6.63330078 C28.4777018,6.63330078 28.2872721,6.71158854 28.1391602,6.86816406 C27.9910482,7.02473958 27.9169922,7.24479167 27.9169922,7.52832031 L27.9169922,12.1113281 C27.9169922,12.3948568 27.9910482,12.6138509 28.1391602,12.7683105 C28.2872721,12.9227702 28.4777018,13 28.7104492,13 Z M28.7104492,5.36376953 C28.9855143,5.36376953 29.2087402,5.28230794 29.380127,5.11938477 C29.5515137,4.95646159 29.637207,4.7438151 29.637207,4.48144531 C29.637207,4.21907552 29.5525716,4.00748698 29.3833008,3.84667969 C29.2140299,3.6858724 28.991862,3.60546875 28.7167969,3.60546875 C28.4375,3.60546875 28.2121582,3.6858724 28.0407715,3.84667969 C27.8693848,4.00748698 27.7836914,4.21907552 27.7836914,4.48144531 C27.7836914,4.7438151 27.8693848,4.95646159 28.0407715,5.11938477 C28.2121582,5.28230794 28.4353841,5.36376953 28.7104492,5.36376953 Z" id="Shape"></path>
+                <path d="M31.9160156,13 C32.148763,13 32.3402507,12.9227702 32.4904785,12.7683105 C32.6407064,12.6138509 32.7158203,12.3990885 32.7158203,12.1240234 L32.7158203,9.43896484 C32.7158203,8.90152995 32.8702799,8.46565755 33.1791992,8.13134766 C33.4881185,7.79703776 33.8541667,7.62988281 34.2773438,7.62988281 C34.6285807,7.62988281 34.9173991,7.74519857 35.1437988,7.97583008 C35.3701986,8.20646159 35.4833984,8.53971354 35.4833984,8.97558594 L35.4833984,12.1240234 C35.4833984,12.3990885 35.5574544,12.6138509 35.7055664,12.7683105 C35.8536784,12.9227702 36.0398763,13 36.2641602,13 C36.5011393,13 36.6958008,12.9227702 36.8481445,12.7683105 C37.0004883,12.6138509 37.0766602,12.3990885 37.0766602,12.1240234 L37.0766602,8.98193359 C37.0766602,8.57568359 37.0152995,8.21386719 36.8925781,7.89648438 C36.7698568,7.57910156 36.6027018,7.32413737 36.3911133,7.1315918 C36.1795247,6.93904622 35.9393717,6.79305013 35.6706543,6.69360352 C35.4019368,6.5941569 35.1152344,6.54443359 34.8105469,6.54443359 C34.3154297,6.54443359 33.8890788,6.63753255 33.5314941,6.82373047 C33.1739095,7.00992839 32.9020182,7.2828776 32.7158203,7.64257813 L32.7158203,7.42041016 C32.7158203,7.17073568 32.6417643,6.97713216 32.4936523,6.83959961 C32.3455404,6.70206706 32.1572266,6.63330078 31.9287109,6.63330078 C31.6959635,6.63330078 31.503418,6.703125 31.3510742,6.84277344 C31.1987305,6.98242188 31.1225586,7.17919922 31.1225586,7.43310547 L31.1225586,12.1240234 C31.1225586,12.3990885 31.1966146,12.6138509 31.3447266,12.7683105 C31.4928385,12.9227702 31.6832682,13 31.9160156,13 Z" id="Path"></path>
+                <path d="M43.2719727,12.9174805 C43.2719727,13.5183919 43.1069336,13.9659017 42.7768555,14.2600098 C42.4467773,14.5541178 41.9855143,14.7011719 41.3930664,14.7011719 C41.2237956,14.7011719 41.0513509,14.6863607 40.8757324,14.6567383 C40.7001139,14.6271159 40.5678711,14.5996094 40.4790039,14.5742188 C40.3901367,14.5488281 40.2504883,14.5033366 40.0600586,14.4377441 C39.8696289,14.3721517 39.7553711,14.3330078 39.7172852,14.3203125 C39.6495768,14.2949219 39.5818685,14.2822266 39.5141602,14.2822266 C39.3660482,14.2822266 39.2433268,14.3372396 39.1459961,14.4472656 C39.0486654,14.5572917 39,14.6842448 39,14.828125 C39,15.03125 39.0973307,15.1920573 39.2919922,15.3105469 C39.5205078,15.4501953 39.8410645,15.567627 40.2536621,15.6628418 C40.6662598,15.7580566 41.0989583,15.8056641 41.5517578,15.8056641 C42.5504557,15.8056641 43.3407389,15.5432943 43.9226074,15.0185547 C44.5044759,14.4938151 44.7954102,13.7299805 44.7954102,12.7270508 L44.7954102,7.52197266 C44.7954102,7.24267578 44.724528,7.02473958 44.5827637,6.86816406 C44.4409993,6.71158854 44.2600911,6.63330078 44.0400391,6.63330078 C43.8453776,6.63330078 43.6792806,6.69148763 43.541748,6.80786133 C43.4042155,6.92423503 43.3248698,7.09033203 43.3037109,7.30615234 L43.3037109,7.59814453 C43.0921224,7.25960286 42.840332,7.00146484 42.5483398,6.82373047 C42.2563477,6.64599609 41.8543294,6.55712891 41.3422852,6.55712891 C40.4239909,6.55712891 39.6971842,6.86287435 39.1618652,7.47436523 C38.6265462,8.08585612 38.3588867,8.88037109 38.3588867,9.85791016 C38.3588867,10.8269857 38.6339518,11.5939941 39.184082,12.1589355 C39.7342122,12.723877 40.4663086,13.0063477 41.3803711,13.0063477 C42.2817383,13.0063477 42.9122721,12.6656901 43.2719727,11.984375 L43.2719727,12.9174805 Z M41.6660156,11.9462891 C41.1835938,11.9378255 40.7826335,11.7548014 40.4631348,11.3972168 C40.1436361,11.0396322 39.9838867,10.5117188 39.9838867,9.81347656 C39.9838867,9.57226562 40.0029297,9.34692383 40.0410156,9.13745117 C40.0791016,8.92797852 40.1404622,8.72908529 40.2250977,8.54077148 C40.3097331,8.35245768 40.4155273,8.19165039 40.5424805,8.05834961 C40.6694336,7.92504883 40.828125,7.81925456 41.0185547,7.7409668 C41.2089844,7.66267904 41.4226888,7.62353516 41.659668,7.62353516 C42.7345378,7.62353516 43.2719727,8.3577474 43.2719727,9.82617188 C43.2719727,10.5498047 43.1259766,11.0819499 42.8339844,11.4226074 C42.5419922,11.763265 42.1526693,11.9378255 41.6660156,11.9462891 Z" id="Shape"></path>
+            </g>
+        </g>
+        <g id="dash-box-@sequence" stroke-width="1" transform="translate(108, 26)" fill="#9172E2">
+            <path d="M7.10343083,306.943229 C7.39891719,306.980953 7.69814426,307 8,307 L8.97464335,307 L8.974,308 L8,308 C7.65340057,308 7.31194921,307.977958 6.97697115,307.935201 L7.10343083,306.943229 Z M12.9746437,307 L12.974,308 L10.974,308 L10.9746433,307 L12.9746437,307 Z M16.9746435,307 L16.974,308 L14.974,308 L14.9746433,307 L16.9746435,307 Z M20.9746438,307 L20.974,308 L18.974,308 L18.9746433,307 L20.9746438,307 Z M24.9746436,307 L24.974,308 L22.974,308 L22.9746433,307 L24.9746436,307 Z M28.9746435,307 L28.974,308 L26.974,308 L26.9746433,307 L28.9746435,307 Z M32.9746437,307 L32.974,308 L30.974,308 L30.9746433,307 L32.9746437,307 Z M36.9746436,307 L36.974,308 L34.974,308 L34.9746433,307 L36.9746436,307 Z M40.9746434,307 L40.974,308 L38.974,308 L38.9746433,307 L40.9746434,307 Z M44.9746437,307 L44.974,308 L42.974,308 L42.9746433,307 L44.9746437,307 Z M48.9746435,307 L48.974,308 L46.974,308 L46.9746433,307 L48.9746435,307 Z M52.9746438,307 L52.974,308 L50.974,308 L50.9746433,307 L52.9746438,307 Z M56.9746436,307 L56.974,308 L54.974,308 L54.9746433,307 L56.9746436,307 Z M60.9746435,307 L60.974,308 L58.974,308 L58.9746433,307 L60.9746435,307 Z M64.9746438,307 L64.974,308 L62.974,308 L62.9746433,307 L64.9746438,307 Z M68.9746436,307 L68.974,308 L66.974,308 L66.9746433,307 L68.9746436,307 Z M72.9746434,307 L72.974,308 L70.974,308 L70.9746433,307 L72.9746434,307 Z M76.9746437,307 L76.974,308 L74.974,308 L74.9746433,307 L76.9746437,307 Z M80.9746435,307 L80.974,308 L78.974,308 L78.9746433,307 L80.9746435,307 Z M84.9746434,307 L84.974,308 L82.974,308 L82.9746433,307 L84.9746434,307 Z M88.9746437,307 L88.974,308 L86.974,308 L86.9746433,307 L88.9746437,307 Z M92.9746435,307 L92.974,308 L90.974,308 L90.9746433,307 L92.9746435,307 Z M96.9746438,307 L96.974,308 L94.974,308 L94.9746433,307 L96.9746438,307 Z M100.974644,307 L100.974,308 L98.974,308 L98.9746433,307 L100.974644,307 Z M104.974643,307 L104.974,308 L102.974,308 L102.974643,307 L104.974643,307 Z M108.974644,307 L108.974,308 L106.974,308 L106.974643,307 L108.974644,307 Z M112.974644,307 L112.974,308 L110.974,308 L110.974643,307 L112.974644,307 Z M116.974643,307 L116.974,308 L114.974,308 L114.974643,307 L116.974643,307 Z M120.974644,307 L120.974,308 L118.974,308 L118.974643,307 L120.974644,307 Z M124.974643,307 L124.974,308 L122.974,308 L122.974643,307 L124.974643,307 Z M128.974644,307 L128.974,308 L126.974,308 L126.974643,307 L128.974644,307 Z M132.974644,307 L132.974,308 L130.974,308 L130.974643,307 L132.974644,307 Z M136.974643,307 L136.974,308 L134.974,308 L134.974643,307 L136.974643,307 Z M140.974644,307 L140.974,308 L138.974,308 L138.974643,307 L140.974644,307 Z M144.974644,307 L144.974,308 L142.974,308 L142.974643,307 L144.974644,307 Z M148.974643,307 L148.974,308 L146.974,308 L146.974643,307 L148.974643,307 Z M152.974644,307 L152.974,308 L150.974,308 L150.974643,307 L152.974644,307 Z M156.974644,307 L156.974,308 L154.974,308 L154.974643,307 L156.974644,307 Z M160.974644,307 L160.974,308 L158.974,308 L158.974643,307 L160.974644,307 Z M164.974644,307 L164.974,308 L162.974,308 L162.974643,307 L164.974644,307 Z M168.974643,307 L168.974,308 L166.974,308 L166.974643,307 L168.974643,307 Z M172.974644,307 L172.974,308 L170.974,308 L170.974643,307 L172.974644,307 Z M176.974644,307 L176.974,308 L174.974,308 L174.974643,307 L176.974644,307 Z M180.974643,307 L180.974,308 L178.974,308 L178.974643,307 L180.974643,307 Z M184.974644,307 L184.974,308 L182.974,308 L182.974643,307 L184.974644,307 Z M188.974644,307 L188.974,308 L186.974,308 L186.974643,307 L188.974644,307 Z M192.974643,307 L192.974,308 L190.974,308 L190.974643,307 L192.974643,307 Z M196.974644,307 L196.974,308 L194.974,308 L194.974643,307 L196.974644,307 Z M200.974643,307 L200.974,308 L198.974,308 L198.974643,307 L200.974643,307 Z M204.974644,307 L204.974,308 L202.974,308 L202.974643,307 L204.974644,307 Z M208.974644,307 L208.974,308 L206.974,308 L206.974643,307 L208.974644,307 Z M212.974643,307 L212.974,308 L210.974,308 L210.974643,307 L212.974643,307 Z M216.974644,307 L216.974,308 L214.974,308 L214.974643,307 L216.974644,307 Z M220.974644,307 L220.974,308 L218.974,308 L218.974643,307 L220.974644,307 Z M224.974643,307 L224.974,308 L222.974,308 L222.974643,307 L224.974643,307 Z M228.974644,307 L228.974,308 L226.974,308 L226.974643,307 L228.974644,307 Z M232.974643,307 L232.974,308 L230.974,308 L230.974643,307 L232.974643,307 Z M236.974644,307 L236.974,308 L234.974,308 L234.974643,307 L236.974644,307 Z M240.974644,307 L240.974,308 L238.974,308 L238.974643,307 L240.974644,307 Z M244.974643,307 L244.974,308 L242.974,308 L242.974643,307 L244.974643,307 Z M248.974643,307 L248.974,308 L246.974,308 L246.974643,307 L248.974643,307 Z M252.974643,307 L252.974,308 L250.974,308 L250.974643,307 L252.974643,307 Z M256.974643,307 L256.974,308 L254.974,308 L254.974643,307 L256.974643,307 Z M260.974643,307 L260.974,308 L258.974,308 L258.974643,307 L260.974643,307 Z M264.974643,307 L264.974,308 L262.974,308 L262.974643,307 L264.974643,307 Z M268.974643,307 L268.974,308 L266.974,308 L266.974643,307 L268.974643,307 Z M272.974643,307 L272.974,308 L270.974,308 L270.974643,307 L272.974643,307 Z M276.974643,307 L276.974,308 L274.974,308 L274.974643,307 L276.974643,307 Z M280.974643,307 L280.974,308 L278.974,308 L278.974643,307 L280.974643,307 Z M284.974643,307 L284.974,308 L282.974,308 L282.974643,307 L284.974643,307 Z M288.974643,307 L288.974,308 L286.974,308 L286.974643,307 L288.974643,307 Z M292.974643,307 L292.974,308 L290.974,308 L290.974643,307 L292.974643,307 Z M296.974643,307 L296.974,308 L294.974,308 L294.974643,307 L296.974643,307 Z M300.974643,307 L300.974,308 L298.974,308 L298.974643,307 L300.974643,307 Z M304.974643,307 L304.974,308 L302.974,308 L302.974643,307 L304.974643,307 Z M308.974643,307 L308.974,308 L306.974,308 L306.974643,307 L308.974643,307 Z M312.974643,307 L312.974,308 L310.974,308 L310.974643,307 L312.974643,307 Z M316.974643,307 L316.974,308 L314.974,308 L314.974643,307 L316.974643,307 Z M320.974643,307 L320.974,308 L318.974,308 L318.974643,307 L320.974643,307 Z M324.974643,307 L324.974,308 L322.974,308 L322.974643,307 L324.974643,307 Z M328.974643,307 L328.974,308 L326.974,308 L326.974643,307 L328.974643,307 Z M332.974643,307 L332.974,308 L330.974,308 L330.974643,307 L332.974643,307 Z M336.974643,307 L336.974,308 L334.974,308 L334.974643,307 L336.974643,307 Z M340.974643,307 L340.974,308 L338.974,308 L338.974643,307 L340.974643,307 Z M344.974643,307 L344.974,308 L342.974,308 L342.974643,307 L344.974643,307 Z M348.974643,307 L348.974,308 L346.974,308 L346.974643,307 L348.974643,307 Z M352.974643,307 L352.974,308 L350.974,308 L350.974643,307 L352.974643,307 Z M356.974643,307 L356.974,308 L354.974,308 L354.974643,307 L356.974643,307 Z M360.974643,307 L360.974,308 L358.974,308 L358.974643,307 L360.974643,307 Z M364.974643,307 L364.974,308 L362.974,308 L362.974643,307 L364.974643,307 Z M368.974643,307 L368.974,308 L366.974,308 L366.974643,307 L368.974643,307 Z M372.974643,307 L372.974,308 L370.974,308 L370.974643,307 L372.974643,307 Z M376.974643,307 L376.974,308 L374.974,308 L374.974643,307 L376.974643,307 Z M380.974643,307 L380.974,308 L378.974,308 L378.974643,307 L380.974643,307 Z M384.974643,307 L384.974,308 L382.974,308 L382.974643,307 L384.974643,307 Z M388.974643,307 L388.974,308 L386.974,308 L386.974643,307 L388.974643,307 Z M392.974643,307 L392.974,308 L390.974,308 L390.974643,307 L392.974643,307 Z M396.974643,307 L396.974,308 L394.974,308 L394.974643,307 L396.974643,307 Z M400.974643,307 L400.974,308 L398.974,308 L398.974643,307 L400.974643,307 Z M404.974643,307 L404.974,308 L402.974,308 L402.974643,307 L404.974643,307 Z M408.974643,307 L408.974,308 L406.974,308 L406.974643,307 L408.974643,307 Z M412.974643,307 L412.974,308 L410.974,308 L410.974643,307 L412.974643,307 Z M416.974643,307 L416.974,308 L414.974,308 L414.974643,307 L416.974643,307 Z M420.974643,307 L420.974,308 L418.974,308 L418.974643,307 L420.974643,307 Z M424.974643,307 L424.974,308 L422.974,308 L422.974643,307 L424.974643,307 Z M428.974643,307 L428.974,308 L426.974,308 L426.974643,307 L428.974643,307 Z M432.974643,307 L432.974,308 L430.974,308 L430.974643,307 L432.974643,307 Z M436.974643,307 L436.974,308 L434.974,308 L434.974643,307 L436.974643,307 Z M440.974643,307 L440.974,308 L438.974,308 L438.974643,307 L440.974643,307 Z M444.974643,307 L444.974,308 L442.974,308 L442.974643,307 L444.974643,307 Z M448.974643,307 L448.974,308 L446.974,308 L446.974643,307 L448.974643,307 Z M452.974643,307 L452.974,308 L450.974,308 L450.974643,307 L452.974643,307 Z M456.974643,307 L456.974,308 L454.974,308 L454.974643,307 L456.974643,307 Z M460.974643,307 L460.974,308 L458.974,308 L458.974643,307 L460.974643,307 Z M464.974643,307 L464.974,308 L462.974,308 L462.974643,307 L464.974643,307 Z M468.974643,307 L468.974,308 L466.974,308 L466.974643,307 L468.974643,307 Z M472.974643,307 L472.974,308 L470.974,308 L470.974643,307 L472.974643,307 Z M476.974643,307 L476.974,308 L474.974,308 L474.974643,307 L476.974643,307 Z M480.974643,307 L480.974,308 L478.974,308 L478.974643,307 L480.974643,307 Z M484.974643,307 L484.974,308 L482.974,308 L482.974643,307 L484.974643,307 Z M488.974643,307 L488.974,308 L486.974,308 L486.974643,307 L488.974643,307 Z M492.489743,306.544574 L492.845614,307.479109 C492.230519,307.71327 491.579078,307.873732 490.902082,307.949705 L490.79022,306.955981 C491.375654,306.890313 491.945502,306.751753 492.489743,306.544574 Z M3.87046676,305.652842 C4.34287209,305.998616 4.85665464,306.284039 5.40029041,306.501639 L5.02897703,307.430146 C4.4032811,307.179729 3.81658971,306.852567 3.28054319,306.460301 L3.87046676,305.652842 Z M495.388302,304.468684 L496.157467,305.107734 C495.730317,305.622099 495.239943,306.08208 494.698265,306.475756 L494.11069,305.666586 C494.584521,305.322177 495.013992,304.919451 495.388302,304.468684 Z M1.6495736,302.94939 C1.8965782,303.480194 2.20969373,303.977971 2.58076411,304.431168 L1.80668123,305.064253 C1.38539268,304.54965 1.0269759,303.981583 0.743159818,303.371781 L1.6495736,302.94939 Z M496.876065,301.319201 L497.858348,301.506607 C497.73129,302.173338 497.521483,302.810729 497.240021,303.407685 L496.335201,302.981889 C496.584106,302.454131 496.766097,301.896352 496.876065,301.319201 Z M1,299.461907 L1.00002881,300.020331 C1.00125212,300.451897 1.04142314,300.877924 1.11937861,301.295058 L0.136531747,301.479483 C0.0468808413,301.000012 1.77635684e-15,300.505481 1.77635684e-15,300 L1.77635684e-15,299.462 L1,299.461907 Z M498,297.512 L498,299.512 L497,299.512678 L497,297.512678 L498,297.512 Z M1,295.461965 L1,297.461965 L1.77635684e-15,297.461 L1.77635684e-15,295.461 L1,295.461965 Z M498,293.512 L498,295.512 L497,295.512678 L497,293.512678 L498,293.512 Z M1,291.461965 L1,293.461965 L1.77635684e-15,293.461 L1.77635684e-15,291.461 L1,291.461965 Z M498,289.512 L498,291.512 L497,291.512678 L497,289.512678 L498,289.512 Z M1,287.461965 L1,289.461965 L1.77635684e-15,289.461 L1.77635684e-15,287.461 L1,287.461965 Z M498,285.512 L498,287.512 L497,287.512678 L497,285.512678 L498,285.512 Z M1,283.461965 L1,285.461965 L1.77635684e-15,285.461 L1.77635684e-15,283.461 L1,283.461965 Z M498,281.512 L498,283.512 L497,283.512678 L497,281.512678 L498,281.512 Z M1,279.461965 L1,281.461965 L1.77635684e-15,281.461 L1.77635684e-15,279.461 L1,279.461965 Z M498,277.512 L498,279.512 L497,279.512678 L497,277.512678 L498,277.512 Z M1,275.461965 L1,277.461965 L1.77635684e-15,277.461 L1.77635684e-15,275.461 L1,275.461965 Z M498,273.512 L498,275.512 L497,275.512678 L497,273.512678 L498,273.512 Z M1,271.461965 L1,273.461965 L1.77635684e-15,273.461 L1.77635684e-15,271.461 L1,271.461965 Z M498,269.512 L498,271.512 L497,271.512678 L497,269.512678 L498,269.512 Z M1,267.461965 L1,269.461965 L1.66533454e-15,269.461 L1.66533454e-15,267.461 L1,267.461965 Z M498,265.512 L498,267.512 L497,267.512678 L497,265.512678 L498,265.512 Z M1,263.461965 L1,265.461965 L1.66533454e-15,265.461 L1.66533454e-15,263.461 L1,263.461965 Z M498,261.512 L498,263.512 L497,263.512678 L497,261.512678 L498,261.512 Z M1,259.461965 L1,261.461965 L1.66533454e-15,261.461 L1.66533454e-15,259.461 L1,259.461965 Z M498,257.512 L498,259.512 L497,259.512678 L497,257.512678 L498,257.512 Z M1,255.461965 L1,257.461965 L1.66533454e-15,257.461 L1.66533454e-15,255.461 L1,255.461965 Z M498,253.512 L498,255.512 L497,255.512678 L497,253.512678 L498,253.512 Z M1,251.461965 L1,253.461965 L1.55431223e-15,253.461 L1.55431223e-15,251.461 L1,251.461965 Z M498,249.512 L498,251.512 L497,251.512678 L497,249.512678 L498,249.512 Z M1,247.461965 L1,249.461965 L1.55431223e-15,249.461 L1.55431223e-15,247.461 L1,247.461965 Z M498,245.512 L498,247.512 L497,247.512678 L497,245.512678 L498,245.512 Z M1,243.461965 L1,245.461965 L1.55431223e-15,245.461 L1.55431223e-15,243.461 L1,243.461965 Z M498,241.512 L498,243.512 L497,243.512678 L497,241.512678 L498,241.512 Z M1,239.461965 L1,241.461965 L1.55431223e-15,241.461 L1.55431223e-15,239.461 L1,239.461965 Z M498,237.512 L498,239.512 L497,239.512678 L497,237.512678 L498,237.512 Z M1,235.461965 L1,237.461965 L1.55431223e-15,237.461 L1.55431223e-15,235.461 L1,235.461965 Z M498,233.512 L498,235.512 L497,235.512678 L497,233.512678 L498,233.512 Z M1,231.461965 L1,233.461965 L1.55431223e-15,233.461 L1.55431223e-15,231.461 L1,231.461965 Z M498,229.512 L498,231.512 L497,231.512678 L497,229.512678 L498,229.512 Z M1,227.461965 L1,229.461965 L1.55431223e-15,229.461 L1.55431223e-15,227.461 L1,227.461965 Z M498,225.512 L498,227.512 L497,227.512678 L497,225.512678 L498,225.512 Z M1,223.461965 L1,225.461965 L1.55431223e-15,225.461 L1.55431223e-15,223.461 L1,223.461965 Z M498,221.512 L498,223.512 L497,223.512678 L497,221.512678 L498,221.512 Z M1,219.461965 L1,221.461965 L1.55431223e-15,221.461 L1.55431223e-15,219.461 L1,219.461965 Z M498,217.512 L498,219.512 L497,219.512678 L497,217.512678 L498,217.512 Z M1,215.461965 L1,217.461965 L1.55431223e-15,217.461 L1.55431223e-15,215.461 L1,215.461965 Z M498,213.512 L498,215.512 L497,215.512678 L497,213.512678 L498,213.512 Z M1,211.461965 L1,213.461965 L1.55431223e-15,213.461 L1.55431223e-15,211.461 L1,211.461965 Z M498,209.512 L498,211.512 L497,211.512678 L497,209.512678 L498,209.512 Z M1,207.461965 L1,209.461965 L1.55431223e-15,209.461 L1.55431223e-15,207.461 L1,207.461965 Z M498,205.512 L498,207.512 L497,207.512678 L497,205.512678 L498,205.512 Z M1,203.461965 L1,205.461965 L1.55431223e-15,205.461 L1.55431223e-15,203.461 L1,203.461965 Z M498,201.512 L498,203.512 L497,203.512678 L497,201.512678 L498,201.512 Z M1,199.461965 L1,201.461965 L1.55431223e-15,201.461 L1.55431223e-15,199.461 L1,199.461965 Z M498,197.512 L498,199.512 L497,199.512678 L497,197.512678 L498,197.512 Z M1,195.461965 L1,197.461965 L1.44328993e-15,197.461 L1.44328993e-15,195.461 L1,195.461965 Z M498,193.512 L498,195.512 L497,195.512678 L497,193.512678 L498,193.512 Z M1,191.461965 L1,193.461965 L1.44328993e-15,193.461 L1.44328993e-15,191.461 L1,191.461965 Z M498,189.512 L498,191.512 L497,191.512678 L497,189.512678 L498,189.512 Z M1,187.461965 L1,189.461965 L1.44328993e-15,189.461 L1.44328993e-15,187.461 L1,187.461965 Z M498,185.512 L498,187.512 L497,187.512678 L497,185.512678 L498,185.512 Z M1,183.461965 L1,185.461965 L1.44328993e-15,185.461 L1.44328993e-15,183.461 L1,183.461965 Z M498,181.512 L498,183.512 L497,183.512678 L497,181.512678 L498,181.512 Z M1,179.461965 L1,181.461965 L1.44328993e-15,181.461 L1.44328993e-15,179.461 L1,179.461965 Z M498,177.512 L498,179.512 L497,179.512678 L497,177.512678 L498,177.512 Z M1,175.461965 L1,177.461965 L1.33226763e-15,177.461 L1.33226763e-15,175.461 L1,175.461965 Z M498,173.512 L498,175.512 L497,175.512678 L497,173.512678 L498,173.512 Z M1,171.461965 L1,173.461965 L1.33226763e-15,173.461 L1.33226763e-15,171.461 L1,171.461965 Z M498,169.512 L498,171.512 L497,171.512678 L497,169.512678 L498,169.512 Z M1,167.461965 L1,169.461965 L1.33226763e-15,169.461 L1.33226763e-15,167.461 L1,167.461965 Z M498,165.512 L498,167.512 L497,167.512678 L497,165.512678 L498,165.512 Z M1,163.461965 L1,165.461965 L1.33226763e-15,165.461 L1.33226763e-15,163.461 L1,163.461965 Z M498,161.512 L498,163.512 L497,163.512678 L497,161.512678 L498,161.512 Z M1,159.461965 L1,161.461965 L1.33226763e-15,161.461 L1.33226763e-15,159.461 L1,159.461965 Z M498,157.512 L498,159.512 L497,159.512678 L497,157.512678 L498,157.512 Z M1,155.461965 L1,157.461965 L1.33226763e-15,157.461 L1.33226763e-15,155.461 L1,155.461965 Z M498,153.512 L498,155.512 L497,155.512678 L497,153.512678 L498,153.512 Z M1,151.461965 L1,153.461965 L1.33226763e-15,153.461 L1.33226763e-15,151.461 L1,151.461965 Z M498,149.512 L498,151.512 L497,151.512678 L497,149.512678 L498,149.512 Z M1,147.461965 L1,149.461965 L1.33226763e-15,149.461 L1.33226763e-15,147.461 L1,147.461965 Z M498,145.512 L498,147.512 L497,147.512678 L497,145.512678 L498,145.512 Z M1,143.461965 L1,145.461965 L1.33226763e-15,145.461 L1.33226763e-15,143.461 L1,143.461965 Z M498,141.512 L498,143.512 L497,143.512678 L497,141.512678 L498,141.512 Z M1,139.461965 L1,141.461965 L1.33226763e-15,141.461 L1.33226763e-15,139.461 L1,139.461965 Z M498,137.512 L498,139.512 L497,139.512678 L497,137.512678 L498,137.512 Z M1,135.461965 L1,137.461965 L1.33226763e-15,137.461 L1.33226763e-15,135.461 L1,135.461965 Z M498,133.512 L498,135.512 L497,135.512678 L497,133.512678 L498,133.512 Z M1,131.461965 L1,133.461965 L1.33226763e-15,133.461 L1.33226763e-15,131.461 L1,131.461965 Z M498,129.512 L498,131.512 L497,131.512678 L497,129.512678 L498,129.512 Z M1,127.461965 L1,129.461965 L1.33226763e-15,129.461 L1.33226763e-15,127.461 L1,127.461965 Z M498,125.512 L498,127.512 L497,127.512678 L497,125.512678 L498,125.512 Z M1,123.461965 L1,125.461965 L1.22124533e-15,125.461 L1.22124533e-15,123.461 L1,123.461965 Z M498,121.512 L498,123.512 L497,123.512678 L497,121.512678 L498,121.512 Z M1,119.461965 L1,121.461965 L1.22124533e-15,121.461 L1.22124533e-15,119.461 L1,119.461965 Z M498,117.512 L498,119.512 L497,119.512678 L497,117.512678 L498,117.512 Z M1,115.461965 L1,117.461965 L1.22124533e-15,117.461 L1.22124533e-15,115.461 L1,115.461965 Z M498,113.512 L498,115.512 L497,115.512678 L497,113.512678 L498,113.512 Z M1,111.461965 L1,113.461965 L1.22124533e-15,113.461 L1.22124533e-15,111.461 L1,111.461965 Z M498,109.512 L498,111.512 L497,111.512678 L497,109.512678 L498,109.512 Z M1,107.461965 L1,109.461965 L1.22124533e-15,109.461 L1.22124533e-15,107.461 L1,107.461965 Z M498,105.512 L498,107.512 L497,107.512678 L497,105.512678 L498,105.512 Z M1,103.461965 L1,105.461965 L1.11022302e-15,105.461 L1.11022302e-15,103.461 L1,103.461965 Z M498,101.512 L498,103.512 L497,103.512678 L497,101.512678 L498,101.512 Z M1,99.461965 L1,101.461965 L1.11022302e-15,101.461 L1.11022302e-15,99.461 L1,99.461965 Z M498,97.512 L498,99.512 L497,99.5126783 L497,97.5126783 L498,97.512 Z M1,95.461965 L1,97.461965 L1.11022302e-15,97.461 L1.11022302e-15,95.461 L1,95.461965 Z M498,93.512 L498,95.512 L497,95.5126783 L497,93.5126783 L498,93.512 Z M1,91.461965 L1,93.461965 L1.11022302e-15,93.461 L1.11022302e-15,91.461 L1,91.461965 Z M498,89.512 L498,91.512 L497,91.5126783 L497,89.5126783 L498,89.512 Z M1,87.461965 L1,89.461965 L1.11022302e-15,89.461 L1.11022302e-15,87.461 L1,87.461965 Z M498,85.512 L498,87.512 L497,87.5126783 L497,85.5126783 L498,85.512 Z M1,83.461965 L1,85.461965 L1.11022302e-15,85.461 L1.11022302e-15,83.461 L1,83.461965 Z M498,81.512 L498,83.512 L497,83.5126783 L497,81.5126783 L498,81.512 Z M1,79.461965 L1,81.461965 L1.11022302e-15,81.461 L1.11022302e-15,79.461 L1,79.461965 Z M498,77.512 L498,79.512 L497,79.5126783 L497,77.5126783 L498,77.512 Z M1,75.461965 L1,77.461965 L1.11022302e-15,77.461 L1.11022302e-15,75.461 L1,75.461965 Z M498,73.512 L498,75.512 L497,75.5126783 L497,73.5126783 L498,73.512 Z M1,71.461965 L1,73.461965 L1.11022302e-15,73.461 L1.11022302e-15,71.461 L1,71.461965 Z M498,69.512 L498,71.512 L497,71.5126783 L497,69.5126783 L498,69.512 Z M1,67.461965 L1,69.461965 L1.11022302e-15,69.461 L1.11022302e-15,67.461 L1,67.461965 Z M498,65.512 L498,67.512 L497,67.5126783 L497,65.5126783 L498,65.512 Z M1,63.461965 L1,65.461965 L1.11022302e-15,65.461 L1.11022302e-15,63.461 L1,63.461965 Z M498,61.512 L498,63.512 L497,63.5126783 L497,61.5126783 L498,61.512 Z M1,59.461965 L1,61.461965 L1.11022302e-15,61.461 L1.11022302e-15,59.461 L1,59.461965 Z M498,57.512 L498,59.512 L497,59.5126783 L497,57.5126783 L498,57.512 Z M1,55.461965 L1,57.461965 L1.11022302e-15,57.461 L1.11022302e-15,55.461 L1,55.461965 Z M498,53.512 L498,55.512 L497,55.5126783 L497,53.5126783 L498,53.512 Z M1,51.461965 L1,53.461965 L9.99200722e-16,53.461 L9.99200722e-16,51.461 L1,51.461965 Z M498,49.512 L498,51.512 L497,51.5126783 L497,49.5126783 L498,49.512 Z M1,47.461965 L1,49.461965 L9.99200722e-16,49.461 L9.99200722e-16,47.461 L1,47.461965 Z M498,45.512 L498,47.512 L497,47.5126783 L497,45.5126783 L498,45.512 Z M1,43.461965 L1,45.461965 L9.99200722e-16,45.461 L9.99200722e-16,43.461 L1,43.461965 Z M498,41.512 L498,43.512 L497,43.5126783 L497,41.5126783 L498,41.512 Z M1,39.461965 L1,41.461965 L9.99200722e-16,41.461 L9.99200722e-16,39.461 L1,39.461965 Z M498,37.512 L498,39.512 L497,39.5126783 L497,37.5126783 L498,37.512 Z M1,35.461965 L1,37.461965 L9.99200722e-16,37.461 L9.99200722e-16,35.461 L1,35.461965 Z M498,33.512 L498,35.512 L497,35.5126783 L497,33.5126783 L498,33.512 Z M1,31.461965 L1,33.461965 L8.8817842e-16,33.461 L8.8817842e-16,31.461 L1,31.461965 Z M498,29.512 L498,31.512 L497,31.5126783 L497,29.5126783 L498,29.512 Z M1,27.461965 L1,29.461965 L8.8817842e-16,29.461 L8.8817842e-16,27.461 L1,27.461965 Z M498,25.512 L498,27.512 L497,27.5126783 L497,25.5126783 L498,25.512 Z M1,23.461965 L1,25.461965 L8.8817842e-16,25.461 L8.8817842e-16,23.461 L1,23.461965 Z M498,21.512 L498,23.512 L497,23.5126783 L497,21.5126783 L498,21.512 Z M1,19.461965 L1,21.461965 L8.8817842e-16,21.461 L8.8817842e-16,19.461 L1,19.461965 Z M498,17.512 L498,19.512 L497,19.5126783 L497,17.5126783 L498,17.512 Z M1,15.461965 L1,17.461965 L8.8817842e-16,17.461 L8.8817842e-16,15.461 L1,15.461965 Z M498,13.512 L498,15.512 L497,15.5126783 L497,13.5126783 L498,13.512 Z M1,11.461965 L1,13.461965 L8.8817842e-16,13.461 L8.8817842e-16,11.461 L1,11.461965 Z M498,9.512 L498,11.512 L497,11.5126783 L497,9.51267833 L498,9.512 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M497.990584,7.60841969 L496.991738,7.65643888 C496.96328,7.06621553 496.861526,6.4886371 496.69013,5.93312344 L497.645809,5.63871559 C497.839269,6.26585924 497.957689,6.92592236 497.990584,7.60841969 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M495.910283,4.24730362 C495.595874,3.75330722 495.220381,3.30005668 494.793473,2.8987301 L495.478141,2.16987536 C495.964541,2.62708633 496.393716,3.14447851 496.753775,3.71016071 L495.910283,4.24730362 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M493.363686,1.85943199 C492.851542,1.57820747 492.305758,1.36213399 491.737382,1.21704772 L491.985321,0.248271715 C492.641414,0.415804123 493.265066,0.664438906 493.844969,0.982866923 L493.363686,1.85943199 Z M10,0 L10,1 L8.00000003,1 L7.59669462,1.01139522 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M482,0 L482,1 L480,1 L480,0 L482,0 Z M478,0 L478,1 L476,1 L476,0 L478,0 Z M474,0 L474,1 L472,1 L472,0 L474,0 Z M470,0 L470,1 L468,1 L468,0 L470,0 Z M466,0 L466,1 L464,1 L464,0 L466,0 Z M462,0 L462,1 L460,1 L460,0 L462,0 Z M458,0 L458,1 L456,1 L456,0 L458,0 Z M454,0 L454,1 L452,1 L452,0 L454,0 Z M450,0 L450,1 L448,1 L448,0 L450,0 Z M446,0 L446,1 L444,1 L444,0 L446,0 Z M442,0 L442,1 L440,1 L440,0 L442,0 Z M438,0 L438,1 L436,1 L436,0 L438,0 Z M434,0 L434,1 L432,1 L432,0 L434,0 Z M430,0 L430,1 L428,1 L428,0 L430,0 Z M426,0 L426,1 L424,1 L424,0 L426,0 Z M422,0 L422,1 L420,1 L420,0 L422,0 Z M418,0 L418,1 L416,1 L416,0 L418,0 Z M414,0 L414,1 L412,1 L412,0 L414,0 Z M410,0 L410,1 L408,1 L408,0 L410,0 Z M406,0 L406,1 L404,1 L404,0 L406,0 Z M402,0 L402,1 L400,1 L400,0 L402,0 Z M398,0 L398,1 L396,1 L396,0 L398,0 Z M394,0 L394,1 L392,1 L392,0 L394,0 Z M390,0 L390,1 L388,1 L388,0 L390,0 Z M386,0 L386,1 L384,1 L384,0 L386,0 Z M382,0 L382,1 L380,1 L380,0 L382,0 Z M378,0 L378,1 L376,1 L376,0 L378,0 Z M374,0 L374,1 L372,1 L372,0 L374,0 Z M370,0 L370,1 L368,1 L368,0 L370,0 Z M366,0 L366,1 L364,1 L364,0 L366,0 Z M362,0 L362,1 L360,1 L360,0 L362,0 Z M358,0 L358,1 L356,1 L356,0 L358,0 Z M354,0 L354,1 L352,1 L352,0 L354,0 Z M350,0 L350,1 L348,1 L348,0 L350,0 Z M346,0 L346,1 L344,1 L344,0 L346,0 Z M342,0 L342,1 L340,1 L340,0 L342,0 Z M338,0 L338,1 L336,1 L336,0 L338,0 Z M334,0 L334,1 L332,1 L332,0 L334,0 Z M330,0 L330,1 L328,1 L328,0 L330,0 Z M326,0 L326,1 L324,1 L324,0 L326,0 Z M322,0 L322,1 L320,1 L320,0 L322,0 Z M318,0 L318,1 L316,1 L316,0 L318,0 Z M314,0 L314,1 L312,1 L312,0 L314,0 Z M310,0 L310,1 L308,1 L308,0 L310,0 Z M306,0 L306,1 L304,1 L304,0 L306,0 Z M302,0 L302,1 L300,1 L300,0 L302,0 Z M298,0 L298,1 L296,1 L296,0 L298,0 Z M294,0 L294,1 L292,1 L292,0 L294,0 Z M290,0 L290,1 L288,1 L288,0 L290,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L255.999,0 L258,0 Z M490,0 L490,1 L488,1 L488,0 L490,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M198,0 L198,1 L196,1 L196,0 L198,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M133.999,0 L134,1 L132,1 L131.999,0 L133.999,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M486,0 L486,1 L484,1 L484,0 L486,0 Z" id="dash-box-1" fill-rule="nonzero"></path>
+            <g id="Sequence-Pooling-Typ" transform="translate(0, 6)" fill-rule="nonzero">
+                <path d="M182.879639,15.1650391 C183.988363,15.1650391 184.871745,14.9164225 185.529785,14.4191895 C186.187826,13.9219564 186.516846,13.245931 186.516846,12.3911133 C186.516846,12.0229492 186.461833,11.6981608 186.351807,11.416748 C186.241781,11.1353353 186.071452,10.8909505 185.84082,10.6835938 C185.610189,10.476237 185.349935,10.3006185 185.060059,10.1567383 C184.770182,10.0128581 184.413656,9.87320964 183.990479,9.73779297 C183.96932,9.72932943 183.6604,9.63834635 183.063721,9.46484375 C182.467041,9.29134115 182.158122,9.20035807 182.136963,9.19189453 C181.81958,9.08610026 181.583659,8.94962565 181.429199,8.7824707 C181.27474,8.61531576 181.19751,8.3984375 181.19751,8.13183594 C181.19751,7.92447917 181.248291,7.74674479 181.349854,7.59863281 C181.451416,7.45052083 181.59318,7.33626302 181.775146,7.25585937 C181.957113,7.17545573 182.154948,7.11726888 182.368652,7.08129883 C182.582357,7.04532878 182.820394,7.02734375 183.082764,7.02734375 C183.874105,7.02734375 184.610433,7.22200521 185.291748,7.61132813 C185.401774,7.67057292 185.503337,7.70019531 185.596436,7.70019531 C185.77417,7.70019531 185.926514,7.62296549 186.053467,7.46850586 C186.18042,7.31404622 186.243896,7.14160156 186.243896,6.95117187 C186.243896,6.76497396 186.178304,6.61263021 186.047119,6.49414063 C185.742432,6.23177083 185.304443,6.01489258 184.733154,5.84350586 C184.161865,5.67211914 183.573649,5.58642578 182.968506,5.58642578 C181.931722,5.58642578 181.086426,5.82340495 180.432617,6.29736328 C179.778809,6.77132161 179.451904,7.4124349 179.451904,8.22070312 C179.451904,8.84700521 179.611654,9.34318034 179.931152,9.70922852 C180.250651,10.0752767 180.772217,10.383138 181.49585,10.6328125 L183.590576,11.3310547 C183.984131,11.4580078 184.274007,11.6124674 184.460205,11.7944336 C184.646403,11.9763997 184.739502,12.2281901 184.739502,12.5498047 C184.739502,12.9391276 184.567057,13.2332357 184.222168,13.4321289 C183.877279,13.6310221 183.427653,13.7304688 182.873291,13.7304688 C182.005778,13.7304688 181.176351,13.4934896 180.38501,13.0195313 C180.31307,12.9772135 180.239014,12.9560547 180.162842,12.9560547 C179.980876,12.9560547 179.814779,13.0565592 179.664551,13.2575684 C179.514323,13.4585775 179.439209,13.6627604 179.439209,13.8701172 C179.439209,14.0266927 179.487874,14.1409505 179.585205,14.2128906 C180.401937,14.8476562 181.500081,15.1650391 182.879639,15.1650391 Z" id="Path"></path>
+                <path d="M187.5896,11.7563477 C187.5896,12.8015951 187.891113,13.6162109 188.494141,14.2001953 C189.097168,14.7841797 189.900146,15.0761719 190.903076,15.0761719 C191.79598,15.0761719 192.576742,14.7947591 193.245361,14.2319336 C193.380778,14.113444 193.448486,13.9695638 193.448486,13.800293 C193.448486,13.652181 193.397705,13.5188802 193.296143,13.4003906 C193.19458,13.281901 193.078206,13.2226562 192.947021,13.2226562 C192.858154,13.2226562 192.775635,13.2522786 192.699463,13.3115234 C192.102783,13.7389323 191.542074,13.9526367 191.017334,13.9526367 C189.862061,13.9526367 189.25057,13.355957 189.182861,12.1625977 L193.19458,12.1625977 C193.346924,12.1625977 193.461182,12.1118164 193.537354,12.0102539 C193.613525,11.9086914 193.651611,11.7753906 193.651611,11.6103516 C193.64738,11.1914062 193.586019,10.8020833 193.467529,10.4423828 C193.34904,10.0826823 193.174479,9.75895182 192.943848,9.47119141 C192.713216,9.18343099 192.408529,8.95808919 192.029785,8.79516602 C191.651042,8.63224284 191.216227,8.55078125 190.725342,8.55078125 C190.086344,8.55078125 189.525635,8.69889323 189.043213,8.99511719 C188.560791,9.29134115 188.197917,9.67960612 187.95459,10.1599121 C187.711263,10.6402181 187.5896,11.1723633 187.5896,11.7563477 Z M189.195557,11.2675781 C189.233643,10.8063151 189.387044,10.4127604 189.655762,10.0869141 C189.924479,9.76106771 190.295817,9.59814453 190.769775,9.59814453 C191.256429,9.59814453 191.623535,9.75472005 191.871094,10.0678711 C192.118652,10.3810221 192.259359,10.7809245 192.293213,11.2675781 L189.195557,11.2675781 Z" id="Shape"></path>
+                <path d="M200.215088,17.4057617 C200.447835,17.4057617 200.637207,17.3306478 200.783203,17.1804199 C200.929199,17.0301921 201.002197,16.8175456 201.002197,16.5424805 L201.002197,9.54101563 C201.002197,9.26595052 200.924967,9.04589844 200.770508,8.88085938 C200.616048,8.71582031 200.424561,8.63330078 200.196045,8.63330078 C199.954834,8.63330078 199.764404,8.70100911 199.624756,8.83642578 C199.485107,8.97184245 199.415283,9.16861979 199.415283,9.42675781 L199.415283,9.64892578 C199.004801,8.91259766 198.304443,8.54443359 197.314209,8.54443359 C196.751383,8.54443359 196.25944,8.69148763 195.838379,8.9855957 C195.417318,9.27970378 195.099935,9.67431641 194.88623,10.1694336 C194.672526,10.6645508 194.565674,11.2252604 194.565674,11.8515625 C194.565674,12.8460286 194.823812,13.6320801 195.340088,14.2097168 C195.856364,14.7873535 196.544027,15.0761719 197.403076,15.0761719 C198.334066,15.0761719 199.002686,14.7355143 199.408936,14.0541992 L199.408936,16.5424805 C199.408936,16.8175456 199.484049,17.0301921 199.634277,17.1804199 C199.784505,17.3306478 199.978109,17.4057617 200.215088,17.4057617 Z M197.739502,13.9970703 C197.451742,13.9970703 197.201009,13.9357096 196.987305,13.8129883 C196.7736,13.6902669 196.607503,13.5241699 196.489014,13.3146973 C196.370524,13.1052246 196.282715,12.8777669 196.225586,12.6323242 C196.168457,12.3868815 196.139893,12.1223958 196.139893,11.8388672 C196.139893,11.5807292 196.166341,11.3299967 196.219238,11.0866699 C196.272135,10.8433431 196.355713,10.6074219 196.469971,10.3789062 C196.584229,10.1503906 196.749268,9.96630859 196.965088,9.82666016 C197.180908,9.68701172 197.434814,9.6171875 197.726807,9.6171875 C198.33195,9.6171875 198.762533,9.80338542 199.018555,10.1757812 C199.274577,10.5481771 199.402588,11.0983073 199.402588,11.8261719 C199.402588,12.2366536 199.348633,12.599528 199.240723,12.9147949 C199.132812,13.2300618 198.950846,13.4892578 198.694824,13.6923828 C198.438802,13.8955078 198.120361,13.9970703 197.739502,13.9970703 Z" id="Shape"></path>
+                <path d="M204.912354,15.0761719 C205.915283,15.0761719 206.607178,14.7122396 206.988037,13.984375 L206.988037,14.2192383 C206.988037,14.4689128 207.062093,14.6614583 207.210205,14.796875 C207.358317,14.9322917 207.546631,15 207.775146,15 C208.007894,15 208.200439,14.9301758 208.352783,14.7905273 C208.505127,14.6508789 208.581299,14.4541016 208.581299,14.2001953 L208.581299,9.515625 C208.581299,9.23632812 208.507243,9.01944987 208.359131,8.86499023 C208.211019,8.7105306 208.020589,8.63330078 207.787842,8.63330078 C207.555094,8.63330078 207.363607,8.7105306 207.213379,8.86499023 C207.063151,9.01944987 206.988037,9.23632812 206.988037,9.515625 L206.988037,12.1816406 C206.988037,12.7190755 206.83252,13.1549479 206.521484,13.4892578 C206.210449,13.8235677 205.841227,13.9907227 205.413818,13.9907227 C205.066813,13.9907227 204.781169,13.8754069 204.556885,13.6447754 C204.332601,13.4141439 204.220459,13.0808919 204.220459,12.6450195 L204.220459,9.515625 C204.220459,9.23632812 204.146403,9.01944987 203.998291,8.86499023 C203.850179,8.7105306 203.663981,8.63330078 203.439697,8.63330078 C203.202718,8.63330078 203.008057,8.7105306 202.855713,8.86499023 C202.703369,9.01944987 202.627197,9.23632812 202.627197,9.515625 L202.627197,12.6450195 C202.627197,13.0470378 202.689616,13.4067383 202.814453,13.7241211 C202.93929,14.0415039 203.109619,14.2964681 203.325439,14.4890137 C203.54126,14.6815592 203.783529,14.8275553 204.052246,14.927002 C204.320964,15.0264486 204.607666,15.0761719 204.912354,15.0761719 Z" id="Path"></path>
+                <path d="M209.927002,11.7563477 C209.927002,12.8015951 210.228516,13.6162109 210.831543,14.2001953 C211.43457,14.7841797 212.237549,15.0761719 213.240479,15.0761719 C214.133382,15.0761719 214.914144,14.7947591 215.582764,14.2319336 C215.71818,14.113444 215.785889,13.9695638 215.785889,13.800293 C215.785889,13.652181 215.735107,13.5188802 215.633545,13.4003906 C215.531982,13.281901 215.415609,13.2226562 215.284424,13.2226562 C215.195557,13.2226562 215.113037,13.2522786 215.036865,13.3115234 C214.440186,13.7389323 213.879476,13.9526367 213.354736,13.9526367 C212.199463,13.9526367 211.587972,13.355957 211.520264,12.1625977 L215.531982,12.1625977 C215.684326,12.1625977 215.798584,12.1118164 215.874756,12.0102539 C215.950928,11.9086914 215.989014,11.7753906 215.989014,11.6103516 C215.984782,11.1914062 215.923421,10.8020833 215.804932,10.4423828 C215.686442,10.0826823 215.511882,9.75895182 215.28125,9.47119141 C215.050618,9.18343099 214.745931,8.95808919 214.367188,8.79516602 C213.988444,8.63224284 213.55363,8.55078125 213.062744,8.55078125 C212.423747,8.55078125 211.863037,8.69889323 211.380615,8.99511719 C210.898193,9.29134115 210.535319,9.67960612 210.291992,10.1599121 C210.048665,10.6402181 209.927002,11.1723633 209.927002,11.7563477 Z M211.532959,11.2675781 C211.571045,10.8063151 211.724447,10.4127604 211.993164,10.0869141 C212.261882,9.76106771 212.633219,9.59814453 213.107178,9.59814453 C213.593831,9.59814453 213.960938,9.75472005 214.208496,10.0678711 C214.456055,10.3810221 214.596761,10.7809245 214.630615,11.2675781 L211.532959,11.2675781 Z" id="Shape"></path>
+                <path d="M218.026611,15 C218.259359,15 218.450846,14.9227702 218.601074,14.7683105 C218.751302,14.6138509 218.826416,14.3990885 218.826416,14.1240234 L218.826416,11.4389648 C218.826416,10.9015299 218.980876,10.4656576 219.289795,10.1313477 C219.598714,9.79703776 219.964762,9.62988281 220.387939,9.62988281 C220.739176,9.62988281 221.027995,9.74519857 221.254395,9.97583008 C221.480794,10.2064616 221.593994,10.5397135 221.593994,10.9755859 L221.593994,14.1240234 C221.593994,14.3990885 221.66805,14.6138509 221.816162,14.7683105 C221.964274,14.9227702 222.150472,15 222.374756,15 C222.611735,15 222.806396,14.9227702 222.95874,14.7683105 C223.111084,14.6138509 223.187256,14.3990885 223.187256,14.1240234 L223.187256,10.9819336 C223.187256,10.5756836 223.125895,10.2138672 223.003174,9.89648438 C222.880452,9.57910156 222.713298,9.32413737 222.501709,9.1315918 C222.29012,8.93904622 222.049967,8.79305013 221.78125,8.69360352 C221.512533,8.5941569 221.22583,8.54443359 220.921143,8.54443359 C220.426025,8.54443359 219.999674,8.63753255 219.64209,8.82373047 C219.284505,9.00992839 219.012614,9.2828776 218.826416,9.64257813 L218.826416,9.42041016 C218.826416,9.17073568 218.75236,8.97713216 218.604248,8.83959961 C218.456136,8.70206706 218.267822,8.63330078 218.039307,8.63330078 C217.806559,8.63330078 217.614014,8.703125 217.46167,8.84277344 C217.309326,8.98242188 217.233154,9.17919922 217.233154,9.43310547 L217.233154,14.1240234 C217.233154,14.3990885 217.30721,14.6138509 217.455322,14.7683105 C217.603434,14.9227702 217.793864,15 218.026611,15 Z" id="Path"></path>
+                <path d="M226.088135,11.7944336 C226.088135,11.1554362 226.243652,10.6455078 226.554688,10.2646484 C226.865723,9.88378906 227.296305,9.69335938 227.846436,9.69335938 C228.210368,9.69335938 228.582764,9.78857422 228.963623,9.97900391 C229.060954,10.0340169 229.1604,10.0615234 229.261963,10.0615234 C229.405843,10.0615234 229.527507,10.0107422 229.626953,9.90917969 C229.7264,9.80761719 229.776123,9.68701172 229.776123,9.54736328 C229.776123,9.35270182 229.680908,9.18766276 229.490479,9.05224609 C229.024984,8.71370443 228.421956,8.54443359 227.681396,8.54443359 C227.059326,8.54443359 226.502848,8.69148763 226.011963,8.9855957 C225.521077,9.27970378 225.143392,9.67431641 224.878906,10.1694336 C224.614421,10.6645508 224.482178,11.2104492 224.482178,11.8071289 C224.482178,12.7635091 224.778402,13.5474447 225.37085,14.1589355 C225.963298,14.7704264 226.727132,15.0761719 227.662354,15.0761719 C228.127848,15.0761719 228.529867,15.0222168 228.868408,14.9143066 C229.20695,14.8063965 229.465088,14.6720378 229.642822,14.5112305 C229.812093,14.3673503 229.896729,14.1980794 229.896729,14.003418 C229.896729,13.8595378 229.850179,13.7357585 229.75708,13.6320801 C229.663981,13.5284017 229.549723,13.4765625 229.414307,13.4765625 C229.304281,13.4765625 229.19637,13.5125326 229.090576,13.5844727 C228.760498,13.8045247 228.335205,13.9145508 227.814697,13.9145508 C227.277262,13.9145508 226.855143,13.7209473 226.54834,13.3337402 C226.241536,12.9465332 226.088135,12.433431 226.088135,11.7944336 Z" id="Path"></path>
+                <path d="M230.709229,11.7563477 C230.709229,12.8015951 231.010742,13.6162109 231.61377,14.2001953 C232.216797,14.7841797 233.019775,15.0761719 234.022705,15.0761719 C234.915609,15.0761719 235.69637,14.7947591 236.36499,14.2319336 C236.500407,14.113444 236.568115,13.9695638 236.568115,13.800293 C236.568115,13.652181 236.517334,13.5188802 236.415771,13.4003906 C236.314209,13.281901 236.197835,13.2226562 236.06665,13.2226562 C235.977783,13.2226562 235.895264,13.2522786 235.819092,13.3115234 C235.222412,13.7389323 234.661702,13.9526367 234.136963,13.9526367 C232.981689,13.9526367 232.370199,13.355957 232.30249,12.1625977 L236.314209,12.1625977 C236.466553,12.1625977 236.580811,12.1118164 236.656982,12.0102539 C236.733154,11.9086914 236.77124,11.7753906 236.77124,11.6103516 C236.767008,11.1914062 236.705648,10.8020833 236.587158,10.4423828 C236.468669,10.0826823 236.294108,9.75895182 236.063477,9.47119141 C235.832845,9.18343099 235.528158,8.95808919 235.149414,8.79516602 C234.770671,8.63224284 234.335856,8.55078125 233.844971,8.55078125 C233.205973,8.55078125 232.645264,8.69889323 232.162842,8.99511719 C231.68042,9.29134115 231.317546,9.67960612 231.074219,10.1599121 C230.830892,10.6402181 230.709229,11.1723633 230.709229,11.7563477 Z M232.315186,11.2675781 C232.353271,10.8063151 232.506673,10.4127604 232.775391,10.0869141 C233.044108,9.76106771 233.415446,9.59814453 233.889404,9.59814453 C234.376058,9.59814453 234.743164,9.75472005 234.990723,10.0678711 C235.238281,10.3810221 235.378988,10.7809245 235.412842,11.2675781 L232.315186,11.2675781 Z" id="Shape"></path>
+                <path d="M242.788818,15.0063477 C243.038493,15.0063477 243.253255,14.9195964 243.433105,14.7460938 C243.612956,14.5725911 243.702881,14.3440755 243.702881,14.0605469 L243.702881,11.362793 L245.473877,11.362793 C247.788656,11.362793 248.946045,10.4402669 248.946045,8.59521484 C248.946045,8.09163411 248.867757,7.65893555 248.711182,7.29711914 C248.554606,6.93530273 248.325033,6.6496582 248.022461,6.44018555 C247.719889,6.23071289 247.370768,6.07836914 246.975098,5.9831543 C246.579427,5.88793945 246.119222,5.84033203 245.594482,5.84033203 L242.86499,5.84033203 C242.551839,5.84033203 242.308512,5.9461263 242.13501,6.15771484 C241.961507,6.36930339 241.874756,6.63590495 241.874756,6.95751953 L241.874756,14.0605469 C241.874756,14.3440755 241.965739,14.5725911 242.147705,14.7460938 C242.329671,14.9195964 242.543376,15.0063477 242.788818,15.0063477 Z M243.702881,10.0424805 L243.702881,7.23046875 L245.397705,7.23046875 C245.693929,7.23046875 245.94043,7.24739583 246.137207,7.28125 C246.333984,7.31510417 246.513835,7.37858073 246.676758,7.47167969 C246.839681,7.56477865 246.959229,7.70442708 247.0354,7.890625 C247.111572,8.07682292 247.149658,8.3116862 247.149658,8.59521484 C247.149658,8.88720703 247.111572,9.12841797 247.0354,9.31884766 C246.959229,9.50927734 246.839681,9.65633138 246.676758,9.76000977 C246.513835,9.86368815 246.329753,9.9366862 246.124512,9.97900391 C245.919271,10.0213216 245.662191,10.0424805 245.353271,10.0424805 L243.702881,10.0424805 Z" id="Shape"></path>
+                <path d="M253.008545,13.9907227 C252.462646,13.9907227 252.037354,13.800293 251.732666,13.4194336 C251.427979,13.0385742 251.275635,12.5053711 251.275635,11.8198242 C251.275635,11.1258138 251.426921,10.587321 251.729492,10.2043457 C252.032064,9.82137044 252.458415,9.62988281 253.008545,9.62988281 C253.558675,9.62988281 253.986084,9.82242839 254.290771,10.2075195 C254.595459,10.5926107 254.747803,11.1300456 254.747803,11.8198242 C254.747803,12.5053711 254.595459,13.0385742 254.290771,13.4194336 C253.986084,13.800293 253.558675,13.9907227 253.008545,13.9907227 Z M253.008545,15.0761719 C253.457113,15.0761719 253.867594,15.0148112 254.23999,14.8920898 C254.612386,14.7693685 254.926595,14.6053874 255.182617,14.4001465 C255.438639,14.1949056 255.65446,13.9515788 255.830078,13.670166 C256.005697,13.3887533 256.134766,13.0935872 256.217285,12.784668 C256.299805,12.4757487 256.341064,12.1541341 256.341064,11.8198242 C256.341064,11.4685872 256.297689,11.1321615 256.210938,10.8105469 C256.124186,10.4889323 255.989827,10.1895345 255.807861,9.91235352 C255.625895,9.63517253 255.405843,9.39607747 255.147705,9.19506836 C254.889567,8.99405924 254.577474,8.83536784 254.211426,8.71899414 C253.845378,8.60262044 253.444417,8.54443359 253.008545,8.54443359 C252.564209,8.54443359 252.156901,8.60473633 251.786621,8.7253418 C251.416341,8.84594727 251.10319,9.00992839 250.847168,9.21728516 C250.591146,9.42464193 250.374268,9.66796875 250.196533,9.94726562 C250.018799,10.2265625 249.888672,10.5238444 249.806152,10.8391113 C249.723633,11.1543783 249.682373,11.4812826 249.682373,11.8198242 C249.682373,12.2472331 249.750081,12.6513672 249.885498,13.0322266 C250.020915,13.4130859 250.21875,13.7579753 250.479004,14.0668945 C250.739258,14.3758138 251.087321,14.6212565 251.523193,14.8032227 C251.959066,14.9851888 252.454183,15.0761719 253.008545,15.0761719 Z" id="Shape"></path>
+                <path d="M260.682861,13.9907227 C260.136963,13.9907227 259.71167,13.800293 259.406982,13.4194336 C259.102295,13.0385742 258.949951,12.5053711 258.949951,11.8198242 C258.949951,11.1258138 259.101237,10.587321 259.403809,10.2043457 C259.70638,9.82137044 260.132731,9.62988281 260.682861,9.62988281 C261.232992,9.62988281 261.6604,9.82242839 261.965088,10.2075195 C262.269775,10.5926107 262.422119,11.1300456 262.422119,11.8198242 C262.422119,12.5053711 262.269775,13.0385742 261.965088,13.4194336 C261.6604,13.800293 261.232992,13.9907227 260.682861,13.9907227 Z M260.682861,15.0761719 C261.131429,15.0761719 261.541911,15.0148112 261.914307,14.8920898 C262.286702,14.7693685 262.600911,14.6053874 262.856934,14.4001465 C263.112956,14.1949056 263.328776,13.9515788 263.504395,13.670166 C263.680013,13.3887533 263.809082,13.0935872 263.891602,12.784668 C263.974121,12.4757487 264.015381,12.1541341 264.015381,11.8198242 C264.015381,11.4685872 263.972005,11.1321615 263.885254,10.8105469 C263.798503,10.4889323 263.664144,10.1895345 263.482178,9.91235352 C263.300212,9.63517253 263.08016,9.39607747 262.822021,9.19506836 C262.563883,8.99405924 262.25179,8.83536784 261.885742,8.71899414 C261.519694,8.60262044 261.118734,8.54443359 260.682861,8.54443359 C260.238525,8.54443359 259.831217,8.60473633 259.460938,8.7253418 C259.090658,8.84594727 258.777507,9.00992839 258.521484,9.21728516 C258.265462,9.42464193 258.048584,9.66796875 257.87085,9.94726562 C257.693115,10.2265625 257.562988,10.5238444 257.480469,10.8391113 C257.397949,11.1543783 257.356689,11.4812826 257.356689,11.8198242 C257.356689,12.2472331 257.424398,12.6513672 257.559814,13.0322266 C257.695231,13.4130859 257.893066,13.7579753 258.15332,14.0668945 C258.413574,14.3758138 258.761637,14.6212565 259.19751,14.8032227 C259.633382,14.9851888 260.128499,15.0761719 260.682861,15.0761719 Z" id="Shape"></path>
+                <path d="M266.179932,15 C266.416911,15 266.61263,14.9217122 266.76709,14.7651367 C266.921549,14.6085612 266.998779,14.390625 266.998779,14.1113281 L266.998779,6.74169922 C266.998779,6.46240234 266.922607,6.24446615 266.770264,6.08789062 C266.61792,5.9313151 266.425374,5.85302734 266.192627,5.85302734 C265.95988,5.85302734 265.76945,5.9313151 265.621338,6.08789062 C265.473226,6.24446615 265.39917,6.46240234 265.39917,6.74169922 L265.39917,14.1113281 C265.39917,14.3948568 265.472168,14.6138509 265.618164,14.7683105 C265.76416,14.9227702 265.951416,15 266.179932,15 Z" id="Path"></path>
+                <path d="M269.518799,15 C269.751546,15 269.943034,14.9217122 270.093262,14.7651367 C270.24349,14.6085612 270.318604,14.390625 270.318604,14.1113281 L270.318604,9.52832031 C270.318604,9.24479167 270.24349,9.02473958 270.093262,8.86816406 C269.943034,8.71158854 269.751546,8.63330078 269.518799,8.63330078 C269.286051,8.63330078 269.095622,8.71158854 268.94751,8.86816406 C268.799398,9.02473958 268.725342,9.24479167 268.725342,9.52832031 L268.725342,14.1113281 C268.725342,14.3948568 268.799398,14.6138509 268.94751,14.7683105 C269.095622,14.9227702 269.286051,15 269.518799,15 Z M269.518799,7.36376953 C269.793864,7.36376953 270.01709,7.28230794 270.188477,7.11938477 C270.359863,6.95646159 270.445557,6.7438151 270.445557,6.48144531 C270.445557,6.21907552 270.360921,6.00748698 270.19165,5.84667969 C270.02238,5.6858724 269.800212,5.60546875 269.525146,5.60546875 C269.24585,5.60546875 269.020508,5.6858724 268.849121,5.84667969 C268.677734,6.00748698 268.592041,6.21907552 268.592041,6.48144531 C268.592041,6.7438151 268.677734,6.95646159 268.849121,7.11938477 C269.020508,7.28230794 269.243734,7.36376953 269.518799,7.36376953 Z" id="Shape"></path>
+                <path d="M272.724365,15 C272.957113,15 273.1486,14.9227702 273.298828,14.7683105 C273.449056,14.6138509 273.52417,14.3990885 273.52417,14.1240234 L273.52417,11.4389648 C273.52417,10.9015299 273.67863,10.4656576 273.987549,10.1313477 C274.296468,9.79703776 274.662516,9.62988281 275.085693,9.62988281 C275.43693,9.62988281 275.725749,9.74519857 275.952148,9.97583008 C276.178548,10.2064616 276.291748,10.5397135 276.291748,10.9755859 L276.291748,14.1240234 C276.291748,14.3990885 276.365804,14.6138509 276.513916,14.7683105 C276.662028,14.9227702 276.848226,15 277.07251,15 C277.309489,15 277.50415,14.9227702 277.656494,14.7683105 C277.808838,14.6138509 277.88501,14.3990885 277.88501,14.1240234 L277.88501,10.9819336 C277.88501,10.5756836 277.823649,10.2138672 277.700928,9.89648438 C277.578206,9.57910156 277.411051,9.32413737 277.199463,9.1315918 C276.987874,8.93904622 276.747721,8.79305013 276.479004,8.69360352 C276.210286,8.5941569 275.923584,8.54443359 275.618896,8.54443359 C275.123779,8.54443359 274.697428,8.63753255 274.339844,8.82373047 C273.982259,9.00992839 273.710368,9.2828776 273.52417,9.64257813 L273.52417,9.42041016 C273.52417,9.17073568 273.450114,8.97713216 273.302002,8.83959961 C273.15389,8.70206706 272.965576,8.63330078 272.737061,8.63330078 C272.504313,8.63330078 272.311768,8.703125 272.159424,8.84277344 C272.00708,8.98242188 271.930908,9.17919922 271.930908,9.43310547 L271.930908,14.1240234 C271.930908,14.3990885 272.004964,14.6138509 272.153076,14.7683105 C272.301188,14.9227702 272.491618,15 272.724365,15 Z" id="Path"></path>
+                <path d="M284.080322,14.9174805 C284.080322,15.5183919 283.915283,15.9659017 283.585205,16.2600098 C283.255127,16.5541178 282.793864,16.7011719 282.201416,16.7011719 C282.032145,16.7011719 281.859701,16.6863607 281.684082,16.6567383 C281.508464,16.6271159 281.376221,16.5996094 281.287354,16.5742188 C281.198486,16.5488281 281.058838,16.5033366 280.868408,16.4377441 C280.677979,16.3721517 280.563721,16.3330078 280.525635,16.3203125 C280.457926,16.2949219 280.390218,16.2822266 280.32251,16.2822266 C280.174398,16.2822266 280.051676,16.3372396 279.954346,16.4472656 C279.857015,16.5572917 279.80835,16.6842448 279.80835,16.828125 C279.80835,17.03125 279.90568,17.1920573 280.100342,17.3105469 C280.328857,17.4501953 280.649414,17.567627 281.062012,17.6628418 C281.474609,17.7580566 281.907308,17.8056641 282.360107,17.8056641 C283.358805,17.8056641 284.149089,17.5432943 284.730957,17.0185547 C285.312826,16.4938151 285.60376,15.7299805 285.60376,14.7270508 L285.60376,9.52197266 C285.60376,9.24267578 285.532878,9.02473958 285.391113,8.86816406 C285.249349,8.71158854 285.068441,8.63330078 284.848389,8.63330078 C284.653727,8.63330078 284.48763,8.69148763 284.350098,8.80786133 C284.212565,8.92423503 284.133219,9.09033203 284.112061,9.30615234 L284.112061,9.59814453 C283.900472,9.25960286 283.648682,9.00146484 283.356689,8.82373047 C283.064697,8.64599609 282.662679,8.55712891 282.150635,8.55712891 C281.23234,8.55712891 280.505534,8.86287435 279.970215,9.47436523 C279.434896,10.0858561 279.167236,10.8803711 279.167236,11.8579102 C279.167236,12.8269857 279.442301,13.5939941 279.992432,14.1589355 C280.542562,14.723877 281.274658,15.0063477 282.188721,15.0063477 C283.090088,15.0063477 283.720622,14.6656901 284.080322,13.984375 L284.080322,14.9174805 Z M282.474365,13.9462891 C281.991943,13.9378255 281.590983,13.7548014 281.271484,13.3972168 C280.951986,13.0396322 280.792236,12.5117188 280.792236,11.8134766 C280.792236,11.5722656 280.811279,11.3469238 280.849365,11.1374512 C280.887451,10.9279785 280.948812,10.7290853 281.033447,10.5407715 C281.118083,10.3524577 281.223877,10.1916504 281.35083,10.0583496 C281.477783,9.92504883 281.636475,9.81925456 281.826904,9.7409668 C282.017334,9.66267904 282.231038,9.62353516 282.468018,9.62353516 C283.542887,9.62353516 284.080322,10.3577474 284.080322,11.8261719 C284.080322,12.5498047 283.934326,13.0819499 283.642334,13.4226074 C283.350342,13.763265 282.961019,13.9378255 282.474365,13.9462891 Z" id="Shape"></path>
+                <path d="M293.855713,15.0063477 C294.105387,15.0063477 294.32015,14.9185384 294.5,14.7429199 C294.67985,14.5673014 294.769775,14.3377279 294.769775,14.0541992 L294.769775,7.28759766 L296.832764,7.28759766 C297.044352,7.28759766 297.213623,7.21565755 297.340576,7.07177734 C297.467529,6.92789714 297.531006,6.7586263 297.531006,6.56396484 C297.531006,6.37353516 297.466471,6.20532227 297.337402,6.05932617 C297.208333,5.91333008 297.04012,5.84033203 296.832764,5.84033203 L290.878662,5.84033203 C290.667074,5.84033203 290.496745,5.91333008 290.367676,6.05932617 C290.238607,6.20532227 290.174072,6.37565104 290.174072,6.5703125 C290.174072,6.76497396 290.237549,6.93318685 290.364502,7.07495117 C290.491455,7.21671549 290.662842,7.28759766 290.878662,7.28759766 L292.947998,7.28759766 L292.947998,14.0541992 C292.947998,14.3377279 293.037923,14.5673014 293.217773,14.7429199 C293.397624,14.9185384 293.61027,15.0063477 293.855713,15.0063477 Z" id="Path"></path>
+                <path d="M300.469971,17.2597656 L304.259521,9.67431641 C304.318766,9.55582682 304.348389,9.43310547 304.348389,9.30615234 C304.348389,9.10725911 304.275391,8.93904622 304.129395,8.80151367 C303.983398,8.66398112 303.81307,8.59521484 303.618408,8.59521484 C303.309489,8.59521484 303.074626,8.76025391 302.913818,9.09033203 L301.218994,12.4736328 L299.530518,9.14746094 C299.373942,8.84277344 299.145426,8.69042969 298.844971,8.69042969 C298.641846,8.69042969 298.461995,8.76025391 298.30542,8.89990234 C298.148844,9.03955078 298.070557,9.20882161 298.070557,9.40771484 C298.070557,9.5304362 298.100179,9.64892578 298.159424,9.76318359 L300.425537,14.0668945 L299.156006,16.5996094 C299.100993,16.7096354 299.073486,16.8238932 299.073486,16.9423828 C299.073486,17.1497396 299.151774,17.326416 299.30835,17.4724121 C299.464925,17.6184082 299.64266,17.6914062 299.841553,17.6914062 C300.116618,17.6914062 300.32609,17.547526 300.469971,17.2597656 Z" id="Path"></path>
+                <path d="M306.163818,17.4057617 C306.400798,17.4057617 306.594401,17.3306478 306.744629,17.1804199 C306.894857,17.0301921 306.969971,16.8175456 306.969971,16.5424805 L306.969971,14.0541992 C307.384684,14.7355143 308.053304,15.0761719 308.97583,15.0761719 C309.83488,15.0761719 310.522542,14.7873535 311.038818,14.2097168 C311.555094,13.6320801 311.813232,12.8460286 311.813232,11.8515625 C311.813232,11.2252604 311.70638,10.6645508 311.492676,10.1694336 C311.278971,9.67431641 310.961589,9.27970378 310.540527,8.9855957 C310.119466,8.69148763 309.627523,8.54443359 309.064697,8.54443359 C308.074463,8.54443359 307.376221,8.91259766 306.969971,9.64892578 L306.969971,9.42675781 C306.969971,9.16861979 306.899089,8.97184245 306.757324,8.83642578 C306.61556,8.70100911 306.426188,8.63330078 306.189209,8.63330078 C305.960693,8.63330078 305.768148,8.71582031 305.611572,8.88085938 C305.454997,9.04589844 305.376709,9.26595052 305.376709,9.54101563 L305.376709,16.5424805 C305.376709,16.8133138 305.450765,17.0249023 305.598877,17.1772461 C305.746989,17.3295898 305.935303,17.4057617 306.163818,17.4057617 Z M308.639404,13.9907227 C308.258545,13.9907227 307.940104,13.8902181 307.684082,13.689209 C307.42806,13.4881999 307.246094,13.2300618 307.138184,12.9147949 C307.030273,12.599528 306.976318,12.2366536 306.976318,11.8261719 C306.976318,11.0983073 307.104329,10.550293 307.360352,10.1821289 C307.616374,9.81396484 308.049072,9.62988281 308.658447,9.62988281 C308.895426,9.62988281 309.107015,9.67643229 309.293213,9.76953125 C309.479411,9.86263021 309.630697,9.98323568 309.74707,10.1313477 C309.863444,10.2794596 309.959717,10.4529622 310.035889,10.6518555 C310.112061,10.8507487 310.164958,11.047526 310.19458,11.2421875 C310.224202,11.436849 310.239014,11.6357422 310.239014,11.8388672 C310.239014,12.063151 310.222087,12.2747396 310.188232,12.4736328 C310.154378,12.672526 310.098307,12.8661296 310.02002,13.0544434 C309.941732,13.2427572 309.843343,13.4046224 309.724854,13.5400391 C309.606364,13.6754557 309.45402,13.7844238 309.267822,13.8669434 C309.081624,13.9494629 308.872152,13.9907227 308.639404,13.9907227 Z" id="Shape"></path>
+                <path d="M312.828857,11.7563477 C312.828857,12.8015951 313.130371,13.6162109 313.733398,14.2001953 C314.336426,14.7841797 315.139404,15.0761719 316.142334,15.0761719 C317.035238,15.0761719 317.815999,14.7947591 318.484619,14.2319336 C318.620036,14.113444 318.687744,13.9695638 318.687744,13.800293 C318.687744,13.652181 318.636963,13.5188802 318.5354,13.4003906 C318.433838,13.281901 318.317464,13.2226562 318.186279,13.2226562 C318.097412,13.2226562 318.014893,13.2522786 317.938721,13.3115234 C317.342041,13.7389323 316.781331,13.9526367 316.256592,13.9526367 C315.101318,13.9526367 314.489827,13.355957 314.422119,12.1625977 L318.433838,12.1625977 C318.586182,12.1625977 318.700439,12.1118164 318.776611,12.0102539 C318.852783,11.9086914 318.890869,11.7753906 318.890869,11.6103516 C318.886637,11.1914062 318.825277,10.8020833 318.706787,10.4423828 C318.588298,10.0826823 318.413737,9.75895182 318.183105,9.47119141 C317.952474,9.18343099 317.647786,8.95808919 317.269043,8.79516602 C316.890299,8.63224284 316.455485,8.55078125 315.9646,8.55078125 C315.325602,8.55078125 314.764893,8.69889323 314.282471,8.99511719 C313.800049,9.29134115 313.437174,9.67960612 313.193848,10.1599121 C312.950521,10.6402181 312.828857,11.1723633 312.828857,11.7563477 Z M314.434814,11.2675781 C314.4729,10.8063151 314.626302,10.4127604 314.89502,10.0869141 C315.163737,9.76106771 315.535075,9.59814453 316.009033,9.59814453 C316.495687,9.59814453 316.862793,9.75472005 317.110352,10.0678711 C317.35791,10.3810221 317.498617,10.7809245 317.532471,11.2675781 L314.434814,11.2675781 Z" id="Shape"></path>
+            </g>
+        </g>
+        <g id="dash-box-@token" stroke-width="1" transform="translate(634, 26)" fill="#9172E2">
+            <path d="M10.9746435,309 L10.974,310 L8.974,310 L8.97464335,309 L10.9746435,309 Z M14.9746435,309 L14.974,310 L12.974,310 L12.9746433,309 L14.9746435,309 Z M18.9746435,309 L18.974,310 L16.974,310 L16.9746433,309 L18.9746435,309 Z M22.9746435,309 L22.974,310 L20.974,310 L20.9746433,309 L22.9746435,309 Z M26.9746435,309 L26.974,310 L24.974,310 L24.9746433,309 L26.9746435,309 Z M30.9746435,309 L30.974,310 L28.974,310 L28.9746433,309 L30.9746435,309 Z M34.9746435,309 L34.974,310 L32.974,310 L32.9746433,309 L34.9746435,309 Z M38.9746435,309 L38.974,310 L36.974,310 L36.9746433,309 L38.9746435,309 Z M42.9746435,309 L42.974,310 L40.974,310 L40.9746433,309 L42.9746435,309 Z M46.9746435,309 L46.974,310 L44.974,310 L44.9746433,309 L46.9746435,309 Z M50.9746435,309 L50.974,310 L48.974,310 L48.9746433,309 L50.9746435,309 Z M54.9746435,309 L54.974,310 L52.974,310 L52.9746433,309 L54.9746435,309 Z M58.9746434,309 L58.974,310 L56.974,310 L56.9746433,309 L58.9746434,309 Z M62.9746434,309 L62.974,310 L60.974,310 L60.9746433,309 L62.9746434,309 Z M66.9746434,309 L66.974,310 L64.974,310 L64.9746433,309 L66.9746434,309 Z M70.9746434,309 L70.974,310 L68.974,310 L68.9746433,309 L70.9746434,309 Z M74.9746434,309 L74.974,310 L72.974,310 L72.9746433,309 L74.9746434,309 Z M78.9746434,309 L78.974,310 L76.974,310 L76.9746433,309 L78.9746434,309 Z M82.9746434,309 L82.974,310 L80.974,310 L80.9746433,309 L82.9746434,309 Z M86.9746434,309 L86.974,310 L84.974,310 L84.9746433,309 L86.9746434,309 Z M90.9746434,309 L90.974,310 L88.974,310 L88.9746433,309 L90.9746434,309 Z M94.9746434,309 L94.974,310 L92.974,310 L92.9746433,309 L94.9746434,309 Z M98.9746434,309 L98.974,310 L96.974,310 L96.9746433,309 L98.9746434,309 Z M102.974643,309 L102.974,310 L100.974,310 L100.974643,309 L102.974643,309 Z M106.974643,309 L106.974,310 L104.974,310 L104.974643,309 L106.974643,309 Z M110.974644,309 L110.974,310 L108.974,310 L108.974643,309 L110.974644,309 Z M114.974644,309 L114.974,310 L112.974,310 L112.974643,309 L114.974644,309 Z M118.974644,309 L118.974,310 L116.974,310 L116.974643,309 L118.974644,309 Z M122.974644,309 L122.974,310 L120.974,310 L120.974643,309 L122.974644,309 Z M126.974644,309 L126.974,310 L124.974,310 L124.974643,309 L126.974644,309 Z M130.974644,309 L130.974,310 L128.974,310 L128.974643,309 L130.974644,309 Z M134.974644,309 L134.974,310 L132.974,310 L132.974643,309 L134.974644,309 Z M138.974644,309 L138.974,310 L136.974,310 L136.974643,309 L138.974644,309 Z M142.974644,309 L142.974,310 L140.974,310 L140.974643,309 L142.974644,309 Z M146.974644,309 L146.974,310 L144.974,310 L144.974643,309 L146.974644,309 Z M150.974644,309 L150.974,310 L148.974,310 L148.974643,309 L150.974644,309 Z M154.974644,309 L154.974,310 L152.974,310 L152.974643,309 L154.974644,309 Z M158.974644,309 L158.974,310 L156.974,310 L156.974643,309 L158.974644,309 Z M162.974644,309 L162.974,310 L160.974,310 L160.974643,309 L162.974644,309 Z M166.974643,309 L166.974,310 L164.974,310 L164.974643,309 L166.974643,309 Z M170.974643,309 L170.974,310 L168.974,310 L168.974643,309 L170.974643,309 Z M174.974643,309 L174.974,310 L172.974,310 L172.974643,309 L174.974643,309 Z M178.974643,309 L178.974,310 L176.974,310 L176.974643,309 L178.974643,309 Z M182.974643,309 L182.974,310 L180.974,310 L180.974643,309 L182.974643,309 Z M186.974643,309 L186.974,310 L184.974,310 L184.974643,309 L186.974643,309 Z M190.974643,309 L190.974,310 L188.974,310 L188.974643,309 L190.974643,309 Z M194.974643,309 L194.974,310 L192.974,310 L192.974643,309 L194.974643,309 Z M198.974643,309 L198.974,310 L196.974,310 L196.974643,309 L198.974643,309 Z M202.974643,309 L202.974,310 L200.974,310 L200.974643,309 L202.974643,309 Z M206.974643,309 L206.974,310 L204.974,310 L204.974643,309 L206.974643,309 Z M210.974643,309 L210.974,310 L208.974,310 L208.974643,309 L210.974643,309 Z M214.974643,309 L214.974,310 L212.974,310 L212.974643,309 L214.974643,309 Z M218.974643,309 L218.974,310 L216.974,310 L216.974643,309 L218.974643,309 Z M222.974643,309 L222.974,310 L220.974,310 L220.974643,309 L222.974643,309 Z M226.974643,309 L226.974,310 L224.974,310 L224.974643,309 L226.974643,309 Z M230.974643,309 L230.974,310 L228.974,310 L228.974643,309 L230.974643,309 Z M234.974643,309 L234.974,310 L232.974,310 L232.974643,309 L234.974643,309 Z M238.974643,309 L238.974,310 L236.974,310 L236.974643,309 L238.974643,309 Z M242.974643,309 L242.974,310 L240.974,310 L240.974643,309 L242.974643,309 Z M246.974643,309 L246.974,310 L244.974,310 L244.974643,309 L246.974643,309 Z M250.974643,309 L250.974,310 L248.974,310 L248.974643,309 L250.974643,309 Z M254.974643,309 L254.974,310 L252.974,310 L252.974643,309 L254.974643,309 Z M258.974643,309 L258.974,310 L256.974,310 L256.974643,309 L258.974643,309 Z M262.974643,309 L262.974,310 L260.974,310 L260.974643,309 L262.974643,309 Z M266.974643,309 L266.974,310 L264.974,310 L264.974643,309 L266.974643,309 Z M270.974643,309 L270.974,310 L268.974,310 L268.974643,309 L270.974643,309 Z M274.974643,309 L274.974,310 L272.974,310 L272.974643,309 L274.974643,309 Z M278.974643,309 L278.974,310 L276.974,310 L276.974643,309 L278.974643,309 Z M282.974643,309 L282.974,310 L280.974,310 L280.974643,309 L282.974643,309 Z M286.974643,309 L286.974,310 L284.974,310 L284.974643,309 L286.974643,309 Z M290.974643,309 L290.974,310 L288.974,310 L288.974643,309 L290.974643,309 Z M294.974643,309 L294.974,310 L292.974,310 L292.974643,309 L294.974643,309 Z M298.974643,309 L298.974,310 L296.974,310 L296.974643,309 L298.974643,309 Z M302.974643,309 L302.974,310 L300.974,310 L300.974643,309 L302.974643,309 Z M306.974643,309 L306.974,310 L304.974,310 L304.974643,309 L306.974643,309 Z M310.974643,309 L310.974,310 L308.974,310 L308.974643,309 L310.974643,309 Z M314.974643,309 L314.974,310 L312.974,310 L312.974643,309 L314.974643,309 Z M318.974643,309 L318.974,310 L316.974,310 L316.974643,309 L318.974643,309 Z M322.867128,308.946924 L322.989911,309.939358 C322.665554,309.979386 322.335184,310 322,310 L320.974,310 L320.974627,309 L322.014777,308.999985 C322.301601,308.999394 322.586036,308.981601 322.867128,308.946924 Z M5.41029967,308.505636 C5.95157753,308.721318 6.51934132,308.86866 7.10343083,308.943229 L6.97697115,309.935201 C6.30134705,309.848962 5.65205616,309.678449 5.03997173,309.434537 L5.41029967,308.505636 Z M326.083941,307.685933 L326.667418,308.498063 C326.128578,308.885784 325.539554,309.208137 324.911958,309.453512 L324.547268,308.522383 C325.092723,308.309177 325.608785,308.027904 326.083941,307.685933 Z M2.56157145,306.407625 C2.93082185,306.86259 3.35581002,307.270137 3.82581503,307.619888 L3.22929764,308.422488 C2.69189028,308.022642 2.20638657,307.556939 1.78473163,307.037323 L2.56157145,306.407625 Z M328.327269,304.99865 L329.230602,305.42759 C328.941958,306.035413 328.57893,306.601062 328.153278,307.112774 L327.384567,306.473178 C327.759145,306.022849 328.076129,305.527491 328.327269,304.99865 Z M1.10775661,303.231225 C1.21033515,303.809908 1.38509885,304.369875 1.62708778,304.900577 L0.717340098,305.315739 C0.443469481,304.715179 0.241701076,304.07482 0.123099669,303.405726 L1.10775661,303.231225 Z M330,301.512 L330,302 C330,302.515018 329.951333,303.01867 329.858348,303.506607 L328.876065,303.319201 C328.958258,302.887827 329,302.446854 329,302 L329,301.512678 L330,301.512 Z M1,299.461965 L1,301.461965 L0,301.461 L0,299.461 L1,299.461965 Z M330,297.512 L330,299.512 L329,299.512678 L329,297.512678 L330,297.512 Z M1,295.461965 L1,297.461965 L0,297.461 L0,295.461 L1,295.461965 Z M330,293.512 L330,295.512 L329,295.512678 L329,293.512678 L330,293.512 Z M1,291.461965 L1,293.461965 L0,293.461 L0,291.461 L1,291.461965 Z M330,289.512 L330,291.512 L329,291.512678 L329,289.512678 L330,289.512 Z M1,287.461965 L1,289.461965 L0,289.461 L0,287.461 L1,287.461965 Z M330,285.512 L330,287.512 L329,287.512678 L329,285.512678 L330,285.512 Z M1,283.461965 L1,285.461965 L0,285.461 L0,283.461 L1,283.461965 Z M330,281.512 L330,283.512 L329,283.512678 L329,281.512678 L330,281.512 Z M1,279.461965 L1,281.461965 L0,281.461 L0,279.461 L1,279.461965 Z M330,277.512 L330,279.512 L329,279.512678 L329,277.512678 L330,277.512 Z M1,275.461965 L1,277.461965 L0,277.461 L0,275.461 L1,275.461965 Z M330,273.512 L330,275.512 L329,275.512678 L329,273.512678 L330,273.512 Z M1,271.461965 L1,273.461965 L0,273.461 L0,271.461 L1,271.461965 Z M330,269.512 L330,271.512 L329,271.512678 L329,269.512678 L330,269.512 Z M1,267.461965 L1,269.461965 L0,269.461 L0,267.461 L1,267.461965 Z M330,265.512 L330,267.512 L329,267.512678 L329,265.512678 L330,265.512 Z M1,263.461965 L1,265.461965 L0,265.461 L0,263.461 L1,263.461965 Z M330,261.512 L330,263.512 L329,263.512678 L329,261.512678 L330,261.512 Z M1,259.461965 L1,261.461965 L0,261.461 L0,259.461 L1,259.461965 Z M330,257.512 L330,259.512 L329,259.512678 L329,257.512678 L330,257.512 Z M1,255.461965 L1,257.461965 L0,257.461 L0,255.461 L1,255.461965 Z M330,253.512 L330,255.512 L329,255.512678 L329,253.512678 L330,253.512 Z M1,251.461965 L1,253.461965 L0,253.461 L0,251.461 L1,251.461965 Z M330,249.512 L330,251.512 L329,251.512678 L329,249.512678 L330,249.512 Z M1,247.461965 L1,249.461965 L0,249.461 L0,247.461 L1,247.461965 Z M330,245.512 L330,247.512 L329,247.512678 L329,245.512678 L330,245.512 Z M1,243.461965 L1,245.461965 L0,245.461 L0,243.461 L1,243.461965 Z M330,241.512 L330,243.512 L329,243.512678 L329,241.512678 L330,241.512 Z M1,239.461965 L1,241.461965 L0,241.461 L0,239.461 L1,239.461965 Z M330,237.512 L330,239.512 L329,239.512678 L329,237.512678 L330,237.512 Z M1,235.461965 L1,237.461965 L0,237.461 L0,235.461 L1,235.461965 Z M330,233.512 L330,235.512 L329,235.512678 L329,233.512678 L330,233.512 Z M1,231.461965 L1,233.461965 L0,233.461 L0,231.461 L1,231.461965 Z M330,229.512 L330,231.512 L329,231.512678 L329,229.512678 L330,229.512 Z M1,227.461965 L1,229.461965 L0,229.461 L0,227.461 L1,227.461965 Z M330,225.512 L330,227.512 L329,227.512678 L329,225.512678 L330,225.512 Z M1,223.461965 L1,225.461965 L0,225.461 L0,223.461 L1,223.461965 Z M330,221.512 L330,223.512 L329,223.512678 L329,221.512678 L330,221.512 Z M1,219.461965 L1,221.461965 L0,221.461 L0,219.461 L1,219.461965 Z M330,217.512 L330,219.512 L329,219.512678 L329,217.512678 L330,217.512 Z M1,215.461965 L1,217.461965 L0,217.461 L0,215.461 L1,215.461965 Z M330,213.512 L330,215.512 L329,215.512678 L329,213.512678 L330,213.512 Z M1,211.461965 L1,213.461965 L0,213.461 L0,211.461 L1,211.461965 Z M330,209.512 L330,211.512 L329,211.512678 L329,209.512678 L330,209.512 Z M1,207.461965 L1,209.461965 L0,209.461 L0,207.461 L1,207.461965 Z M330,205.512 L330,207.512 L329,207.512678 L329,205.512678 L330,205.512 Z M1,203.461965 L1,205.461965 L0,205.461 L0,203.461 L1,203.461965 Z M330,201.512 L330,203.512 L329,203.512678 L329,201.512678 L330,201.512 Z M1,199.461965 L1,201.461965 L0,201.461 L0,199.461 L1,199.461965 Z M330,197.512 L330,199.512 L329,199.512678 L329,197.512678 L330,197.512 Z M1,195.461965 L1,197.461965 L0,197.461 L0,195.461 L1,195.461965 Z M330,193.512 L330,195.512 L329,195.512678 L329,193.512678 L330,193.512 Z M1,191.461965 L1,193.461965 L0,193.461 L0,191.461 L1,191.461965 Z M330,189.512 L330,191.512 L329,191.512678 L329,189.512678 L330,189.512 Z M1,187.461965 L1,189.461965 L0,189.461 L0,187.461 L1,187.461965 Z M330,185.512 L330,187.512 L329,187.512678 L329,185.512678 L330,185.512 Z M1,183.461965 L1,185.461965 L0,185.461 L0,183.461 L1,183.461965 Z M330,181.512 L330,183.512 L329,183.512678 L329,181.512678 L330,181.512 Z M1,179.461965 L1,181.461965 L0,181.461 L0,179.461 L1,179.461965 Z M330,177.512 L330,179.512 L329,179.512678 L329,177.512678 L330,177.512 Z M1,175.461965 L1,177.461965 L0,177.461 L0,175.461 L1,175.461965 Z M330,173.512 L330,175.512 L329,175.512678 L329,173.512678 L330,173.512 Z M1,171.461965 L1,173.461965 L0,173.461 L0,171.461 L1,171.461965 Z M330,169.512 L330,171.512 L329,171.512678 L329,169.512678 L330,169.512 Z M1,167.461965 L1,169.461965 L0,169.461 L0,167.461 L1,167.461965 Z M330,165.512 L330,167.512 L329,167.512678 L329,165.512678 L330,165.512 Z M1,163.461965 L1,165.461965 L0,165.461 L0,163.461 L1,163.461965 Z M330,161.512 L330,163.512 L329,163.512678 L329,161.512678 L330,161.512 Z M1,159.461965 L1,161.461965 L0,161.461 L0,159.461 L1,159.461965 Z M330,157.512 L330,159.512 L329,159.512678 L329,157.512678 L330,157.512 Z M1,155.461965 L1,157.461965 L0,157.461 L0,155.461 L1,155.461965 Z M330,153.512 L330,155.512 L329,155.512678 L329,153.512678 L330,153.512 Z M1,151.461965 L1,153.461965 L0,153.461 L0,151.461 L1,151.461965 Z M330,149.512 L330,151.512 L329,151.512678 L329,149.512678 L330,149.512 Z M1,147.461965 L1,149.461965 L0,149.461 L0,147.461 L1,147.461965 Z M330,145.512 L330,147.512 L329,147.512678 L329,145.512678 L330,145.512 Z M1,143.461965 L1,145.461965 L0,145.461 L0,143.461 L1,143.461965 Z M330,141.512 L330,143.512 L329,143.512678 L329,141.512678 L330,141.512 Z M1,139.461965 L1,141.461965 L0,141.461 L0,139.461 L1,139.461965 Z M330,137.512 L330,139.512 L329,139.512678 L329,137.512678 L330,137.512 Z M1,135.461965 L1,137.461965 L0,137.461 L0,135.461 L1,135.461965 Z M330,133.512 L330,135.512 L329,135.512678 L329,133.512678 L330,133.512 Z M1,131.461965 L1,133.461965 L0,133.461 L0,131.461 L1,131.461965 Z M330,129.512 L330,131.512 L329,131.512678 L329,129.512678 L330,129.512 Z M1,127.461965 L1,129.461965 L0,129.461 L0,127.461 L1,127.461965 Z M330,125.512 L330,127.512 L329,127.512678 L329,125.512678 L330,125.512 Z M1,123.461965 L1,125.461965 L0,125.461 L0,123.461 L1,123.461965 Z M330,121.512 L330,123.512 L329,123.512678 L329,121.512678 L330,121.512 Z M1,119.461965 L1,121.461965 L0,121.461 L0,119.461 L1,119.461965 Z M330,117.512 L330,119.512 L329,119.512678 L329,117.512678 L330,117.512 Z M1,115.461965 L1,117.461965 L0,117.461 L0,115.461 L1,115.461965 Z M330,113.512 L330,115.512 L329,115.512678 L329,113.512678 L330,113.512 Z M1,111.461965 L1,113.461965 L0,113.461 L0,111.461 L1,111.461965 Z M330,109.512 L330,111.512 L329,111.512678 L329,109.512678 L330,109.512 Z M1,107.461965 L1,109.461965 L0,109.461 L0,107.461 L1,107.461965 Z M330,105.512 L330,107.512 L329,107.512678 L329,105.512678 L330,105.512 Z M1,103.461965 L1,105.461965 L0,105.461 L0,103.461 L1,103.461965 Z M330,101.512 L330,103.512 L329,103.512678 L329,101.512678 L330,101.512 Z M1,99.461965 L1,101.461965 L0,101.461 L0,99.461 L1,99.461965 Z M330,97.512 L330,99.512 L329,99.5126783 L329,97.5126783 L330,97.512 Z M1,95.461965 L1,97.461965 L0,97.461 L0,95.461 L1,95.461965 Z M330,93.512 L330,95.512 L329,95.5126783 L329,93.5126783 L330,93.512 Z M1,91.461965 L1,93.461965 L0,93.461 L0,91.461 L1,91.461965 Z M330,89.512 L330,91.512 L329,91.5126783 L329,89.5126783 L330,89.512 Z M1,87.461965 L1,89.461965 L0,89.461 L0,87.461 L1,87.461965 Z M330,85.512 L330,87.512 L329,87.5126783 L329,85.5126783 L330,85.512 Z M1,83.461965 L1,85.461965 L0,85.461 L0,83.461 L1,83.461965 Z M330,81.512 L330,83.512 L329,83.5126783 L329,81.5126783 L330,81.512 Z M1,79.461965 L1,81.461965 L0,81.461 L0,79.461 L1,79.461965 Z M330,77.512 L330,79.512 L329,79.5126783 L329,77.5126783 L330,77.512 Z M1,75.461965 L1,77.461965 L0,77.461 L0,75.461 L1,75.461965 Z M330,73.512 L330,75.512 L329,75.5126783 L329,73.5126783 L330,73.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M330,69.512 L330,71.512 L329,71.5126783 L329,69.5126783 L330,69.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M330,65.512 L330,67.512 L329,67.5126783 L329,65.5126783 L330,65.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M330,61.512 L330,63.512 L329,63.5126783 L329,61.5126783 L330,61.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M330,57.512 L330,59.512 L329,59.5126783 L329,57.5126783 L330,57.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M330,53.512 L330,55.512 L329,55.5126783 L329,53.5126783 L330,53.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M330,49.512 L330,51.512 L329,51.5126783 L329,49.5126783 L330,49.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M330,45.512 L330,47.512 L329,47.5126783 L329,45.5126783 L330,45.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M330,41.512 L330,43.512 L329,43.5126783 L329,41.5126783 L330,41.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M330,37.512 L330,39.512 L329,39.5126783 L329,37.5126783 L330,37.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M330,33.512 L330,35.512 L329,35.5126783 L329,33.5126783 L330,33.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M330,29.512 L330,31.512 L329,31.5126783 L329,29.5126783 L330,29.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M330,25.512 L330,27.512 L329,27.5126783 L329,25.5126783 L330,25.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M330,21.512 L330,23.512 L329,23.5126783 L329,21.5126783 L330,21.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M330,17.512 L330,19.512 L329,19.5126783 L329,17.5126783 L330,17.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M330,13.512 L330,15.512 L329,15.5126783 L329,13.5126783 L330,13.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M330,9.512 L330,11.512 L329,11.5126783 L329,9.51267833 L330,9.512 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M328.991738,7.65643888 C328.96328,7.06621553 328.861526,6.4886371 328.69013,5.93312344 L329.645809,5.63871559 C329.839269,6.26585924 329.957689,6.92592236 329.990584,7.60841969 L328.991738,7.65643888 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M327.910283,4.24730362 C327.595874,3.75330722 327.220381,3.30005668 326.793473,2.8987301 L327.478141,2.16987536 C327.964541,2.62708633 328.393716,3.14447851 328.753775,3.71016071 L327.910283,4.24730362 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M325.844969,0.982866923 L325.363686,1.85943199 C324.851542,1.57820747 324.305758,1.36213399 323.737382,1.21704772 L323.985321,0.248271715 C324.641414,0.415804123 325.265066,0.664438906 325.844969,0.982866923 Z M10,0 L10,1 L8.00000014,1 L7.59669449,1.01139517 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M314,0 L314,1 L312,1 L312,0 L314,0 Z M310,0 L310,1 L308,1 L308,0 L310,0 Z M306,0 L306,1 L304,1 L304,0 L306,0 Z M302,0 L302,1 L300,1 L300,0 L302,0 Z M298,0 L298,1 L296,1 L296,0 L298,0 Z M294,0 L294,1 L292,1 L292,0 L294,0 Z M290,0 L290,1 L288,1 L288,0 L290,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M241.999,0 L242,1 L240,1 L239.999,0 L241.999,0 Z M237.999,0 L238,1 L236,1 L235.999,0 L237.999,0 Z M233.999,0 L234,1 L232,1 L231.999,0 L233.999,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M198,0 L198,1 L196,1 L196,0 L198,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M322,0 L322,1 L320,1 L320,0 L322,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M125.999,0 L126,1 L124,1 L123.999,0 L125.999,0 Z M121.999,0 L122,1 L120,1 L119.999,0 L121.999,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L63.999,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M318,0 L318,1 L316,1 L316,0 L318,0 Z" id="dash-box-2" fill-rule="nonzero"></path>
+            <g id="Token-Pooling-Type" transform="translate(0, 6)" fill-rule="nonzero">
+                <path d="M109.473877,15.0063477 C109.723551,15.0063477 109.938314,14.9185384 110.118164,14.7429199 C110.298014,14.5673014 110.387939,14.3377279 110.387939,14.0541992 L110.387939,7.28759766 L112.450928,7.28759766 C112.662516,7.28759766 112.831787,7.21565755 112.95874,7.07177734 C113.085693,6.92789714 113.14917,6.7586263 113.14917,6.56396484 C113.14917,6.37353516 113.084635,6.20532227 112.955566,6.05932617 C112.826497,5.91333008 112.658285,5.84033203 112.450928,5.84033203 L106.496826,5.84033203 C106.285238,5.84033203 106.114909,5.91333008 105.98584,6.05932617 C105.856771,6.20532227 105.792236,6.37565104 105.792236,6.5703125 C105.792236,6.76497396 105.855713,6.93318685 105.982666,7.07495117 C106.109619,7.21671549 106.281006,7.28759766 106.496826,7.28759766 L108.566162,7.28759766 L108.566162,14.0541992 C108.566162,14.3377279 108.656087,14.5673014 108.835938,14.7429199 C109.015788,14.9185384 109.228434,15.0063477 109.473877,15.0063477 Z" id="Path"></path>
+                <path d="M117.122803,13.9907227 C116.576904,13.9907227 116.151611,13.800293 115.846924,13.4194336 C115.542236,13.0385742 115.389893,12.5053711 115.389893,11.8198242 C115.389893,11.1258138 115.541178,10.587321 115.84375,10.2043457 C116.146322,9.82137044 116.572673,9.62988281 117.122803,9.62988281 C117.672933,9.62988281 118.100342,9.82242839 118.405029,10.2075195 C118.709717,10.5926107 118.862061,11.1300456 118.862061,11.8198242 C118.862061,12.5053711 118.709717,13.0385742 118.405029,13.4194336 C118.100342,13.800293 117.672933,13.9907227 117.122803,13.9907227 Z M117.122803,15.0761719 C117.57137,15.0761719 117.981852,15.0148112 118.354248,14.8920898 C118.726644,14.7693685 119.040853,14.6053874 119.296875,14.4001465 C119.552897,14.1949056 119.768717,13.9515788 119.944336,13.670166 C120.119954,13.3887533 120.249023,13.0935872 120.331543,12.784668 C120.414062,12.4757487 120.455322,12.1541341 120.455322,11.8198242 C120.455322,11.4685872 120.411947,11.1321615 120.325195,10.8105469 C120.238444,10.4889323 120.104085,10.1895345 119.922119,9.91235352 C119.740153,9.63517253 119.520101,9.39607747 119.261963,9.19506836 C119.003825,8.99405924 118.691732,8.83536784 118.325684,8.71899414 C117.959635,8.60262044 117.558675,8.54443359 117.122803,8.54443359 C116.678467,8.54443359 116.271159,8.60473633 115.900879,8.7253418 C115.530599,8.84594727 115.217448,9.00992839 114.961426,9.21728516 C114.705404,9.42464193 114.488525,9.66796875 114.310791,9.94726562 C114.133057,10.2265625 114.00293,10.5238444 113.92041,10.8391113 C113.837891,11.1543783 113.796631,11.4812826 113.796631,11.8198242 C113.796631,12.2472331 113.864339,12.6513672 113.999756,13.0322266 C114.135173,13.4130859 114.333008,13.7579753 114.593262,14.0668945 C114.853516,14.3758138 115.201579,14.6212565 115.637451,14.8032227 C116.073324,14.9851888 116.568441,15.0761719 117.122803,15.0761719 Z" id="Shape"></path>
+                <path d="M122.594482,15 C122.831462,15 123.026123,14.9217122 123.178467,14.7651367 C123.330811,14.6085612 123.406982,14.3885091 123.406982,14.1049805 L123.406982,11.8071289 L126.174561,14.7524414 C126.326904,14.9301758 126.510986,15.019043 126.726807,15.019043 C126.921468,15.019043 127.093913,14.9481608 127.244141,14.8063965 C127.394368,14.6646322 127.469482,14.4985352 127.469482,14.3081055 C127.469482,14.1430664 127.406006,13.9907227 127.279053,13.8510742 L125.089111,11.565918 L127.006104,9.72509766 C127.124593,9.6023763 127.183838,9.45638021 127.183838,9.28710937 C127.183838,9.09667969 127.114014,8.92635091 126.974365,8.77612305 C126.834717,8.62589518 126.676025,8.55078125 126.498291,8.55078125 C126.324788,8.55078125 126.170329,8.62483724 126.034912,8.77294922 L123.406982,11.3881836 L123.406982,6.74169922 C123.406982,6.45817057 123.331868,6.23811849 123.181641,6.08154297 C123.031413,5.92496745 122.839925,5.84667969 122.607178,5.84667969 C122.37443,5.84667969 122.184001,5.92496745 122.035889,6.08154297 C121.887777,6.23811849 121.813721,6.45817057 121.813721,6.74169922 L121.813721,14.1049805 C121.813721,14.3885091 121.886719,14.6085612 122.032715,14.7651367 C122.178711,14.9217122 122.365967,15 122.594482,15 Z" id="Path"></path>
+                <path d="M128.18042,11.7563477 C128.18042,12.8015951 128.481934,13.6162109 129.084961,14.2001953 C129.687988,14.7841797 130.490967,15.0761719 131.493896,15.0761719 C132.3868,15.0761719 133.167562,14.7947591 133.836182,14.2319336 C133.971598,14.113444 134.039307,13.9695638 134.039307,13.800293 C134.039307,13.652181 133.988525,13.5188802 133.886963,13.4003906 C133.7854,13.281901 133.669027,13.2226562 133.537842,13.2226562 C133.448975,13.2226562 133.366455,13.2522786 133.290283,13.3115234 C132.693604,13.7389323 132.132894,13.9526367 131.608154,13.9526367 C130.452881,13.9526367 129.84139,13.355957 129.773682,12.1625977 L133.7854,12.1625977 C133.937744,12.1625977 134.052002,12.1118164 134.128174,12.0102539 C134.204346,11.9086914 134.242432,11.7753906 134.242432,11.6103516 C134.2382,11.1914062 134.176839,10.8020833 134.05835,10.4423828 C133.93986,10.0826823 133.765299,9.75895182 133.534668,9.47119141 C133.304036,9.18343099 132.999349,8.95808919 132.620605,8.79516602 C132.241862,8.63224284 131.807048,8.55078125 131.316162,8.55078125 C130.677165,8.55078125 130.116455,8.69889323 129.634033,8.99511719 C129.151611,9.29134115 128.788737,9.67960612 128.54541,10.1599121 C128.302083,10.6402181 128.18042,11.1723633 128.18042,11.7563477 Z M129.786377,11.2675781 C129.824463,10.8063151 129.977865,10.4127604 130.246582,10.0869141 C130.515299,9.76106771 130.886637,9.59814453 131.360596,9.59814453 C131.847249,9.59814453 132.214355,9.75472005 132.461914,10.0678711 C132.709473,10.3810221 132.850179,10.7809245 132.884033,11.2675781 L129.786377,11.2675781 Z" id="Shape"></path>
+                <path d="M136.280029,15 C136.512777,15 136.704264,14.9227702 136.854492,14.7683105 C137.00472,14.6138509 137.079834,14.3990885 137.079834,14.1240234 L137.079834,11.4389648 C137.079834,10.9015299 137.234294,10.4656576 137.543213,10.1313477 C137.852132,9.79703776 138.21818,9.62988281 138.641357,9.62988281 C138.992594,9.62988281 139.281413,9.74519857 139.507812,9.97583008 C139.734212,10.2064616 139.847412,10.5397135 139.847412,10.9755859 L139.847412,14.1240234 C139.847412,14.3990885 139.921468,14.6138509 140.06958,14.7683105 C140.217692,14.9227702 140.40389,15 140.628174,15 C140.865153,15 141.059814,14.9227702 141.212158,14.7683105 C141.364502,14.6138509 141.440674,14.3990885 141.440674,14.1240234 L141.440674,10.9819336 C141.440674,10.5756836 141.379313,10.2138672 141.256592,9.89648438 C141.13387,9.57910156 140.966715,9.32413737 140.755127,9.1315918 C140.543538,8.93904622 140.303385,8.79305013 140.034668,8.69360352 C139.765951,8.5941569 139.479248,8.54443359 139.174561,8.54443359 C138.679443,8.54443359 138.253092,8.63753255 137.895508,8.82373047 C137.537923,9.00992839 137.266032,9.2828776 137.079834,9.64257813 L137.079834,9.42041016 C137.079834,9.17073568 137.005778,8.97713216 136.857666,8.83959961 C136.709554,8.70206706 136.52124,8.63330078 136.292725,8.63330078 C136.059977,8.63330078 135.867432,8.703125 135.715088,8.84277344 C135.562744,8.98242188 135.486572,9.17919922 135.486572,9.43310547 L135.486572,14.1240234 C135.486572,14.3990885 135.560628,14.6138509 135.70874,14.7683105 C135.856852,14.9227702 136.047282,15 136.280029,15 Z" id="Path"></path>
+                <path d="M147.839111,15.0063477 C148.088786,15.0063477 148.303548,14.9195964 148.483398,14.7460938 C148.663249,14.5725911 148.753174,14.3440755 148.753174,14.0605469 L148.753174,11.362793 L150.52417,11.362793 C152.838949,11.362793 153.996338,10.4402669 153.996338,8.59521484 C153.996338,8.09163411 153.91805,7.65893555 153.761475,7.29711914 C153.604899,6.93530273 153.375326,6.6496582 153.072754,6.44018555 C152.770182,6.23071289 152.421061,6.07836914 152.025391,5.9831543 C151.62972,5.88793945 151.169515,5.84033203 150.644775,5.84033203 L147.915283,5.84033203 C147.602132,5.84033203 147.358805,5.9461263 147.185303,6.15771484 C147.0118,6.36930339 146.925049,6.63590495 146.925049,6.95751953 L146.925049,14.0605469 C146.925049,14.3440755 147.016032,14.5725911 147.197998,14.7460938 C147.379964,14.9195964 147.593669,15.0063477 147.839111,15.0063477 Z M148.753174,10.0424805 L148.753174,7.23046875 L150.447998,7.23046875 C150.744222,7.23046875 150.990723,7.24739583 151.1875,7.28125 C151.384277,7.31510417 151.564128,7.37858073 151.727051,7.47167969 C151.889974,7.56477865 152.009521,7.70442708 152.085693,7.890625 C152.161865,8.07682292 152.199951,8.3116862 152.199951,8.59521484 C152.199951,8.88720703 152.161865,9.12841797 152.085693,9.31884766 C152.009521,9.50927734 151.889974,9.65633138 151.727051,9.76000977 C151.564128,9.86368815 151.380046,9.9366862 151.174805,9.97900391 C150.969564,10.0213216 150.712484,10.0424805 150.403564,10.0424805 L148.753174,10.0424805 Z" id="Shape"></path>
+                <path d="M158.058838,13.9907227 C157.512939,13.9907227 157.087646,13.800293 156.782959,13.4194336 C156.478271,13.0385742 156.325928,12.5053711 156.325928,11.8198242 C156.325928,11.1258138 156.477214,10.587321 156.779785,10.2043457 C157.082357,9.82137044 157.508708,9.62988281 158.058838,9.62988281 C158.608968,9.62988281 159.036377,9.82242839 159.341064,10.2075195 C159.645752,10.5926107 159.798096,11.1300456 159.798096,11.8198242 C159.798096,12.5053711 159.645752,13.0385742 159.341064,13.4194336 C159.036377,13.800293 158.608968,13.9907227 158.058838,13.9907227 Z M158.058838,15.0761719 C158.507406,15.0761719 158.917887,15.0148112 159.290283,14.8920898 C159.662679,14.7693685 159.976888,14.6053874 160.23291,14.4001465 C160.488932,14.1949056 160.704753,13.9515788 160.880371,13.670166 C161.05599,13.3887533 161.185059,13.0935872 161.267578,12.784668 C161.350098,12.4757487 161.391357,12.1541341 161.391357,11.8198242 C161.391357,11.4685872 161.347982,11.1321615 161.26123,10.8105469 C161.174479,10.4889323 161.04012,10.1895345 160.858154,9.91235352 C160.676188,9.63517253 160.456136,9.39607747 160.197998,9.19506836 C159.93986,8.99405924 159.627767,8.83536784 159.261719,8.71899414 C158.895671,8.60262044 158.49471,8.54443359 158.058838,8.54443359 C157.614502,8.54443359 157.207194,8.60473633 156.836914,8.7253418 C156.466634,8.84594727 156.153483,9.00992839 155.897461,9.21728516 C155.641439,9.42464193 155.424561,9.66796875 155.246826,9.94726562 C155.069092,10.2265625 154.938965,10.5238444 154.856445,10.8391113 C154.773926,11.1543783 154.732666,11.4812826 154.732666,11.8198242 C154.732666,12.2472331 154.800374,12.6513672 154.935791,13.0322266 C155.071208,13.4130859 155.269043,13.7579753 155.529297,14.0668945 C155.789551,14.3758138 156.137614,14.6212565 156.573486,14.8032227 C157.009359,14.9851888 157.504476,15.0761719 158.058838,15.0761719 Z" id="Shape"></path>
+                <path d="M165.733154,13.9907227 C165.187256,13.9907227 164.761963,13.800293 164.457275,13.4194336 C164.152588,13.0385742 164.000244,12.5053711 164.000244,11.8198242 C164.000244,11.1258138 164.15153,10.587321 164.454102,10.2043457 C164.756673,9.82137044 165.183024,9.62988281 165.733154,9.62988281 C166.283285,9.62988281 166.710693,9.82242839 167.015381,10.2075195 C167.320068,10.5926107 167.472412,11.1300456 167.472412,11.8198242 C167.472412,12.5053711 167.320068,13.0385742 167.015381,13.4194336 C166.710693,13.800293 166.283285,13.9907227 165.733154,13.9907227 Z M165.733154,15.0761719 C166.181722,15.0761719 166.592204,15.0148112 166.9646,14.8920898 C167.336995,14.7693685 167.651204,14.6053874 167.907227,14.4001465 C168.163249,14.1949056 168.379069,13.9515788 168.554688,13.670166 C168.730306,13.3887533 168.859375,13.0935872 168.941895,12.784668 C169.024414,12.4757487 169.065674,12.1541341 169.065674,11.8198242 C169.065674,11.4685872 169.022298,11.1321615 168.935547,10.8105469 C168.848796,10.4889323 168.714437,10.1895345 168.532471,9.91235352 C168.350505,9.63517253 168.130452,9.39607747 167.872314,9.19506836 C167.614176,8.99405924 167.302083,8.83536784 166.936035,8.71899414 C166.569987,8.60262044 166.169027,8.54443359 165.733154,8.54443359 C165.288818,8.54443359 164.88151,8.60473633 164.51123,8.7253418 C164.140951,8.84594727 163.827799,9.00992839 163.571777,9.21728516 C163.315755,9.42464193 163.098877,9.66796875 162.921143,9.94726562 C162.743408,10.2265625 162.613281,10.5238444 162.530762,10.8391113 C162.448242,11.1543783 162.406982,11.4812826 162.406982,11.8198242 C162.406982,12.2472331 162.474691,12.6513672 162.610107,13.0322266 C162.745524,13.4130859 162.943359,13.7579753 163.203613,14.0668945 C163.463867,14.3758138 163.81193,14.6212565 164.247803,14.8032227 C164.683675,14.9851888 165.178792,15.0761719 165.733154,15.0761719 Z" id="Shape"></path>
+                <path d="M171.230225,15 C171.467204,15 171.662923,14.9217122 171.817383,14.7651367 C171.971842,14.6085612 172.049072,14.390625 172.049072,14.1113281 L172.049072,6.74169922 C172.049072,6.46240234 171.9729,6.24446615 171.820557,6.08789062 C171.668213,5.9313151 171.475667,5.85302734 171.24292,5.85302734 C171.010173,5.85302734 170.819743,5.9313151 170.671631,6.08789062 C170.523519,6.24446615 170.449463,6.46240234 170.449463,6.74169922 L170.449463,14.1113281 C170.449463,14.3948568 170.522461,14.6138509 170.668457,14.7683105 C170.814453,14.9227702 171.001709,15 171.230225,15 Z" id="Path"></path>
+                <path d="M174.569092,15 C174.801839,15 174.993327,14.9217122 175.143555,14.7651367 C175.293783,14.6085612 175.368896,14.390625 175.368896,14.1113281 L175.368896,9.52832031 C175.368896,9.24479167 175.293783,9.02473958 175.143555,8.86816406 C174.993327,8.71158854 174.801839,8.63330078 174.569092,8.63330078 C174.336344,8.63330078 174.145915,8.71158854 173.997803,8.86816406 C173.849691,9.02473958 173.775635,9.24479167 173.775635,9.52832031 L173.775635,14.1113281 C173.775635,14.3948568 173.849691,14.6138509 173.997803,14.7683105 C174.145915,14.9227702 174.336344,15 174.569092,15 Z M174.569092,7.36376953 C174.844157,7.36376953 175.067383,7.28230794 175.23877,7.11938477 C175.410156,6.95646159 175.49585,6.7438151 175.49585,6.48144531 C175.49585,6.21907552 175.411214,6.00748698 175.241943,5.84667969 C175.072673,5.6858724 174.850505,5.60546875 174.575439,5.60546875 C174.296143,5.60546875 174.070801,5.6858724 173.899414,5.84667969 C173.728027,6.00748698 173.642334,6.21907552 173.642334,6.48144531 C173.642334,6.7438151 173.728027,6.95646159 173.899414,7.11938477 C174.070801,7.28230794 174.294027,7.36376953 174.569092,7.36376953 Z" id="Shape"></path>
+                <path d="M177.774658,15 C178.007406,15 178.198893,14.9227702 178.349121,14.7683105 C178.499349,14.6138509 178.574463,14.3990885 178.574463,14.1240234 L178.574463,11.4389648 C178.574463,10.9015299 178.728923,10.4656576 179.037842,10.1313477 C179.346761,9.79703776 179.712809,9.62988281 180.135986,9.62988281 C180.487223,9.62988281 180.776042,9.74519857 181.002441,9.97583008 C181.228841,10.2064616 181.342041,10.5397135 181.342041,10.9755859 L181.342041,14.1240234 C181.342041,14.3990885 181.416097,14.6138509 181.564209,14.7683105 C181.712321,14.9227702 181.898519,15 182.122803,15 C182.359782,15 182.554443,14.9227702 182.706787,14.7683105 C182.859131,14.6138509 182.935303,14.3990885 182.935303,14.1240234 L182.935303,10.9819336 C182.935303,10.5756836 182.873942,10.2138672 182.751221,9.89648438 C182.628499,9.57910156 182.461344,9.32413737 182.249756,9.1315918 C182.038167,8.93904622 181.798014,8.79305013 181.529297,8.69360352 C181.260579,8.5941569 180.973877,8.54443359 180.669189,8.54443359 C180.174072,8.54443359 179.747721,8.63753255 179.390137,8.82373047 C179.032552,9.00992839 178.760661,9.2828776 178.574463,9.64257813 L178.574463,9.42041016 C178.574463,9.17073568 178.500407,8.97713216 178.352295,8.83959961 C178.204183,8.70206706 178.015869,8.63330078 177.787354,8.63330078 C177.554606,8.63330078 177.362061,8.703125 177.209717,8.84277344 C177.057373,8.98242188 176.981201,9.17919922 176.981201,9.43310547 L176.981201,14.1240234 C176.981201,14.3990885 177.055257,14.6138509 177.203369,14.7683105 C177.351481,14.9227702 177.541911,15 177.774658,15 Z" id="Path"></path>
+                <path d="M189.130615,14.9174805 C189.130615,15.5183919 188.965576,15.9659017 188.635498,16.2600098 C188.30542,16.5541178 187.844157,16.7011719 187.251709,16.7011719 C187.082438,16.7011719 186.909993,16.6863607 186.734375,16.6567383 C186.558757,16.6271159 186.426514,16.5996094 186.337646,16.5742188 C186.248779,16.5488281 186.109131,16.5033366 185.918701,16.4377441 C185.728271,16.3721517 185.614014,16.3330078 185.575928,16.3203125 C185.508219,16.2949219 185.440511,16.2822266 185.372803,16.2822266 C185.224691,16.2822266 185.101969,16.3372396 185.004639,16.4472656 C184.907308,16.5572917 184.858643,16.6842448 184.858643,16.828125 C184.858643,17.03125 184.955973,17.1920573 185.150635,17.3105469 C185.37915,17.4501953 185.699707,17.567627 186.112305,17.6628418 C186.524902,17.7580566 186.957601,17.8056641 187.4104,17.8056641 C188.409098,17.8056641 189.199382,17.5432943 189.78125,17.0185547 C190.363118,16.4938151 190.654053,15.7299805 190.654053,14.7270508 L190.654053,9.52197266 C190.654053,9.24267578 190.583171,9.02473958 190.441406,8.86816406 C190.299642,8.71158854 190.118734,8.63330078 189.898682,8.63330078 C189.70402,8.63330078 189.537923,8.69148763 189.400391,8.80786133 C189.262858,8.92423503 189.183512,9.09033203 189.162354,9.30615234 L189.162354,9.59814453 C188.950765,9.25960286 188.698975,9.00146484 188.406982,8.82373047 C188.11499,8.64599609 187.712972,8.55712891 187.200928,8.55712891 C186.282633,8.55712891 185.555827,8.86287435 185.020508,9.47436523 C184.485189,10.0858561 184.217529,10.8803711 184.217529,11.8579102 C184.217529,12.8269857 184.492594,13.5939941 185.042725,14.1589355 C185.592855,14.723877 186.324951,15.0063477 187.239014,15.0063477 C188.140381,15.0063477 188.770915,14.6656901 189.130615,13.984375 L189.130615,14.9174805 Z M187.524658,13.9462891 C187.042236,13.9378255 186.641276,13.7548014 186.321777,13.3972168 C186.002279,13.0396322 185.842529,12.5117188 185.842529,11.8134766 C185.842529,11.5722656 185.861572,11.3469238 185.899658,11.1374512 C185.937744,10.9279785 185.999105,10.7290853 186.08374,10.5407715 C186.168376,10.3524577 186.27417,10.1916504 186.401123,10.0583496 C186.528076,9.92504883 186.686768,9.81925456 186.877197,9.7409668 C187.067627,9.66267904 187.281331,9.62353516 187.518311,9.62353516 C188.59318,9.62353516 189.130615,10.3577474 189.130615,11.8261719 C189.130615,12.5498047 188.984619,13.0819499 188.692627,13.4226074 C188.400635,13.763265 188.011312,13.9378255 187.524658,13.9462891 Z" id="Shape"></path>
+                <path d="M198.906006,15.0063477 C199.15568,15.0063477 199.370443,14.9185384 199.550293,14.7429199 C199.730143,14.5673014 199.820068,14.3377279 199.820068,14.0541992 L199.820068,7.28759766 L201.883057,7.28759766 C202.094645,7.28759766 202.263916,7.21565755 202.390869,7.07177734 C202.517822,6.92789714 202.581299,6.7586263 202.581299,6.56396484 C202.581299,6.37353516 202.516764,6.20532227 202.387695,6.05932617 C202.258626,5.91333008 202.090413,5.84033203 201.883057,5.84033203 L195.928955,5.84033203 C195.717367,5.84033203 195.547038,5.91333008 195.417969,6.05932617 C195.2889,6.20532227 195.224365,6.37565104 195.224365,6.5703125 C195.224365,6.76497396 195.287842,6.93318685 195.414795,7.07495117 C195.541748,7.21671549 195.713135,7.28759766 195.928955,7.28759766 L197.998291,7.28759766 L197.998291,14.0541992 C197.998291,14.3377279 198.088216,14.5673014 198.268066,14.7429199 C198.447917,14.9185384 198.660563,15.0063477 198.906006,15.0063477 Z" id="Path"></path>
+                <path d="M205.520264,17.2597656 L209.309814,9.67431641 C209.369059,9.55582682 209.398682,9.43310547 209.398682,9.30615234 C209.398682,9.10725911 209.325684,8.93904622 209.179688,8.80151367 C209.033691,8.66398112 208.863363,8.59521484 208.668701,8.59521484 C208.359782,8.59521484 208.124919,8.76025391 207.964111,9.09033203 L206.269287,12.4736328 L204.580811,9.14746094 C204.424235,8.84277344 204.195719,8.69042969 203.895264,8.69042969 C203.692139,8.69042969 203.512288,8.76025391 203.355713,8.89990234 C203.199137,9.03955078 203.12085,9.20882161 203.12085,9.40771484 C203.12085,9.5304362 203.150472,9.64892578 203.209717,9.76318359 L205.47583,14.0668945 L204.206299,16.5996094 C204.151286,16.7096354 204.123779,16.8238932 204.123779,16.9423828 C204.123779,17.1497396 204.202067,17.326416 204.358643,17.4724121 C204.515218,17.6184082 204.692952,17.6914062 204.891846,17.6914062 C205.166911,17.6914062 205.376383,17.547526 205.520264,17.2597656 Z" id="Path"></path>
+                <path d="M211.214111,17.4057617 C211.45109,17.4057617 211.644694,17.3306478 211.794922,17.1804199 C211.94515,17.0301921 212.020264,16.8175456 212.020264,16.5424805 L212.020264,14.0541992 C212.434977,14.7355143 213.103597,15.0761719 214.026123,15.0761719 C214.885173,15.0761719 215.572835,14.7873535 216.089111,14.2097168 C216.605387,13.6320801 216.863525,12.8460286 216.863525,11.8515625 C216.863525,11.2252604 216.756673,10.6645508 216.542969,10.1694336 C216.329264,9.67431641 216.011882,9.27970378 215.59082,8.9855957 C215.169759,8.69148763 214.677816,8.54443359 214.11499,8.54443359 C213.124756,8.54443359 212.426514,8.91259766 212.020264,9.64892578 L212.020264,9.42675781 C212.020264,9.16861979 211.949382,8.97184245 211.807617,8.83642578 C211.665853,8.70100911 211.476481,8.63330078 211.239502,8.63330078 C211.010986,8.63330078 210.818441,8.71582031 210.661865,8.88085938 C210.50529,9.04589844 210.427002,9.26595052 210.427002,9.54101563 L210.427002,16.5424805 C210.427002,16.8133138 210.501058,17.0249023 210.64917,17.1772461 C210.797282,17.3295898 210.985596,17.4057617 211.214111,17.4057617 Z M213.689697,13.9907227 C213.308838,13.9907227 212.990397,13.8902181 212.734375,13.689209 C212.478353,13.4881999 212.296387,13.2300618 212.188477,12.9147949 C212.080566,12.599528 212.026611,12.2366536 212.026611,11.8261719 C212.026611,11.0983073 212.154622,10.550293 212.410645,10.1821289 C212.666667,9.81396484 213.099365,9.62988281 213.70874,9.62988281 C213.945719,9.62988281 214.157308,9.67643229 214.343506,9.76953125 C214.529704,9.86263021 214.68099,9.98323568 214.797363,10.1313477 C214.913737,10.2794596 215.01001,10.4529622 215.086182,10.6518555 C215.162354,10.8507487 215.215251,11.047526 215.244873,11.2421875 C215.274495,11.436849 215.289307,11.6357422 215.289307,11.8388672 C215.289307,12.063151 215.27238,12.2747396 215.238525,12.4736328 C215.204671,12.672526 215.1486,12.8661296 215.070312,13.0544434 C214.992025,13.2427572 214.893636,13.4046224 214.775146,13.5400391 C214.656657,13.6754557 214.504313,13.7844238 214.318115,13.8669434 C214.131917,13.9494629 213.922445,13.9907227 213.689697,13.9907227 Z" id="Shape"></path>
+                <path d="M217.87915,11.7563477 C217.87915,12.8015951 218.180664,13.6162109 218.783691,14.2001953 C219.386719,14.7841797 220.189697,15.0761719 221.192627,15.0761719 C222.085531,15.0761719 222.866292,14.7947591 223.534912,14.2319336 C223.670329,14.113444 223.738037,13.9695638 223.738037,13.800293 C223.738037,13.652181 223.687256,13.5188802 223.585693,13.4003906 C223.484131,13.281901 223.367757,13.2226562 223.236572,13.2226562 C223.147705,13.2226562 223.065186,13.2522786 222.989014,13.3115234 C222.392334,13.7389323 221.831624,13.9526367 221.306885,13.9526367 C220.151611,13.9526367 219.54012,13.355957 219.472412,12.1625977 L223.484131,12.1625977 C223.636475,12.1625977 223.750732,12.1118164 223.826904,12.0102539 C223.903076,11.9086914 223.941162,11.7753906 223.941162,11.6103516 C223.93693,11.1914062 223.87557,10.8020833 223.75708,10.4423828 C223.63859,10.0826823 223.46403,9.75895182 223.233398,9.47119141 C223.002767,9.18343099 222.698079,8.95808919 222.319336,8.79516602 C221.940592,8.63224284 221.505778,8.55078125 221.014893,8.55078125 C220.375895,8.55078125 219.815186,8.69889323 219.332764,8.99511719 C218.850342,9.29134115 218.487467,9.67960612 218.244141,10.1599121 C218.000814,10.6402181 217.87915,11.1723633 217.87915,11.7563477 Z M219.485107,11.2675781 C219.523193,10.8063151 219.676595,10.4127604 219.945312,10.0869141 C220.21403,9.76106771 220.585368,9.59814453 221.059326,9.59814453 C221.54598,9.59814453 221.913086,9.75472005 222.160645,10.0678711 C222.408203,10.3810221 222.54891,10.7809245 222.582764,11.2675781 L219.485107,11.2675781 Z" id="Shape"></path>
+            </g>
+        </g>
+        <g id="STEP-Pooling" stroke-width="1" transform="translate(824, 70)">
+            <g id="Body" transform="translate(0, 50)">
+                <g id="STEP-Pooling" transform="translate(0, 186)" fill="#7D7D7D" fill-rule="nonzero">
+                    <path d="M28.0195312,12.1396484 C28.9576823,12.1396484 29.7051595,11.9292806 30.2619629,11.5085449 C30.8187663,11.0878092 31.097168,10.5157878 31.097168,9.79248047 C31.097168,9.48095703 31.0506185,9.20613607 30.9575195,8.96801758 C30.8644206,8.72989909 30.7202962,8.52311198 30.5251465,8.34765625 C30.3299967,8.17220052 30.1097819,8.02360026 29.864502,7.90185547 C29.619222,7.78011068 29.3175456,7.66194661 28.9594727,7.54736328 C28.941569,7.54020182 28.6801758,7.46321615 28.175293,7.31640625 C27.6704102,7.16959635 27.4090169,7.09261068 27.3911133,7.08544922 C27.1225586,6.99593099 26.9229329,6.88045247 26.7922363,6.73901367 C26.6615397,6.59757487 26.5961914,6.4140625 26.5961914,6.18847656 C26.5961914,6.01302083 26.6391602,5.86263021 26.7250977,5.73730469 C26.8110352,5.61197917 26.9309896,5.51529948 27.0849609,5.44726562 C27.2389323,5.37923177 27.4063314,5.32999674 27.5871582,5.29956055 C27.767985,5.26912435 27.969401,5.25390625 28.1914062,5.25390625 C28.8610026,5.25390625 29.4840495,5.41861979 30.0605469,5.74804688 C30.1536458,5.79817708 30.2395833,5.82324219 30.3183594,5.82324219 C30.46875,5.82324219 30.5976562,5.75789388 30.7050781,5.62719727 C30.8125,5.49650065 30.8662109,5.35058594 30.8662109,5.18945312 C30.8662109,5.03190104 30.8107096,4.90299479 30.699707,4.80273438 C30.4418945,4.58072917 30.0712891,4.3972168 29.5878906,4.25219727 C29.1044922,4.10717773 28.6067708,4.03466797 28.0947266,4.03466797 C27.2174479,4.03466797 26.5021973,4.2351888 25.9489746,4.63623047 C25.395752,5.03727214 25.1191406,5.5797526 25.1191406,6.26367187 C25.1191406,6.79361979 25.2543132,7.21346029 25.5246582,7.52319336 C25.7950033,7.83292643 26.2363281,8.09342448 26.8486328,8.3046875 L28.6210938,8.89550781 C28.9541016,9.00292969 29.1993815,9.1336263 29.3569336,9.28759766 C29.5144857,9.44156901 29.5932617,9.6546224 29.5932617,9.92675781 C29.5932617,10.2561849 29.447347,10.5050456 29.1555176,10.6733398 C28.8636882,10.8416341 28.4832357,10.9257812 28.0141602,10.9257812 C27.2801107,10.9257812 26.5782878,10.7252604 25.9086914,10.3242188 C25.847819,10.2884115 25.7851562,10.2705078 25.7207031,10.2705078 C25.5667318,10.2705078 25.4261882,10.3555501 25.2990723,10.5256348 C25.1719564,10.6957194 25.1083984,10.8684896 25.1083984,11.0439453 C25.1083984,11.1764323 25.1495768,11.273112 25.2319336,11.3339844 C25.9230143,11.8710937 26.8522135,12.1396484 28.0195312,12.1396484 Z" id="Path"></path>
+                    <path d="M34.8085938,12.0053711 C35.0198568,12.0053711 35.2015788,11.931071 35.3537598,11.7824707 C35.5059408,11.6338704 35.5820312,11.4396159 35.5820312,11.199707 L35.5820312,5.47412109 L37.3276367,5.47412109 C37.5066732,5.47412109 37.6499023,5.4132487 37.7573242,5.29150391 C37.8647461,5.16975911 37.918457,5.02652995 37.918457,4.86181641 C37.918457,4.70068359 37.8638509,4.55834961 37.7546387,4.43481445 C37.6454264,4.3112793 37.5030924,4.24951172 37.3276367,4.24951172 L32.2895508,4.24951172 C32.1105143,4.24951172 31.96639,4.3112793 31.8571777,4.43481445 C31.7479655,4.55834961 31.6933594,4.70247396 31.6933594,4.8671875 C31.6933594,5.03190104 31.7470703,5.17423503 31.8544922,5.29418945 C31.9619141,5.41414388 32.1069336,5.47412109 32.2895508,5.47412109 L34.0405273,5.47412109 L34.0405273,11.199707 C34.0405273,11.4396159 34.1166178,11.6338704 34.2687988,11.7824707 C34.4209798,11.931071 34.6009115,12.0053711 34.8085938,12.0053711 Z" id="Path"></path>
+                    <path d="M39.6962891,12 L43.6762695,12 C43.8481445,12 43.9851074,11.9400228 44.0871582,11.8200684 C44.189209,11.7001139 44.2402344,11.5595703 44.2402344,11.3984375 C44.2402344,11.2373047 44.189209,11.0958659 44.0871582,10.9741211 C43.9851074,10.8523763 43.8481445,10.7915039 43.6762695,10.7915039 L40.4697266,10.7915039 L40.4697266,8.63769531 L43.3432617,8.63769531 C43.5187174,8.63769531 43.6592611,8.57861328 43.7648926,8.46044922 C43.8705241,8.34228516 43.9233398,8.20263672 43.9233398,8.04150391 C43.9233398,7.88037109 43.8714193,7.74072266 43.7675781,7.62255859 C43.663737,7.50439453 43.5222982,7.4453125 43.3432617,7.4453125 L40.4697266,7.4453125 L40.4697266,5.46875 L43.5527344,5.46875 C43.7281901,5.46875 43.8678385,5.4078776 43.9716797,5.28613281 C44.0755208,5.16438802 44.1274414,5.02294922 44.1274414,4.86181641 C44.1274414,4.70068359 44.0755208,4.55924479 43.9716797,4.4375 C43.8678385,4.31575521 43.7281901,4.25488281 43.5527344,4.25488281 L39.7338867,4.25488281 C39.4796549,4.25488281 39.28361,4.33992513 39.145752,4.51000977 C39.0078939,4.6800944 38.9389648,4.8976237 38.9389648,5.16259766 L38.9389648,11.1352539 C38.9389648,11.3608398 39.0123698,11.5613607 39.1591797,11.7368164 C39.3059896,11.9122721 39.485026,12 39.6962891,12 Z" id="Path"></path>
+                    <path d="M46.4584961,12.0053711 C46.6697591,12.0053711 46.8514811,11.9319661 47.0036621,11.7851562 C47.1558431,11.6383464 47.2319336,11.444987 47.2319336,11.2050781 L47.2319336,8.92236328 L48.7304688,8.92236328 C50.6891276,8.92236328 51.668457,8.14176432 51.668457,6.58056641 C51.668457,6.15445964 51.6022135,5.78833008 51.4697266,5.48217773 C51.3372396,5.17602539 51.142985,4.93432617 50.8869629,4.75708008 C50.6309408,4.57983398 50.3355306,4.45092773 50.0007324,4.37036133 C49.6659342,4.28979492 49.2765299,4.24951172 48.8325195,4.24951172 L46.5229492,4.24951172 C46.2579753,4.24951172 46.0520833,4.33902995 45.9052734,4.51806641 C45.7584635,4.69710286 45.6850586,4.9226888 45.6850586,5.19482422 L45.6850586,11.2050781 C45.6850586,11.444987 45.7620443,11.6383464 45.9160156,11.7851562 C46.069987,11.9319661 46.2508138,12.0053711 46.4584961,12.0053711 Z M47.2319336,7.80517578 L47.2319336,5.42578125 L48.6660156,5.42578125 C48.9166667,5.42578125 49.1252441,5.44010417 49.291748,5.46875 C49.458252,5.49739583 49.6104329,5.55110677 49.748291,5.62988281 C49.8861491,5.70865885 49.9873047,5.82682292 50.0517578,5.984375 C50.1162109,6.14192708 50.1484375,6.34065755 50.1484375,6.58056641 C50.1484375,6.82763672 50.1162109,7.03173828 50.0517578,7.19287109 C49.9873047,7.35400391 49.8861491,7.47843424 49.748291,7.56616211 C49.6104329,7.65388997 49.4546712,7.71565755 49.2810059,7.75146484 C49.1073405,7.78727214 48.8898112,7.80517578 48.628418,7.80517578 L47.2319336,7.80517578 Z" id="Shape"></path>
+                    <path d="M56.6098633,12.0053711 C56.8211263,12.0053711 57.0028483,11.9319661 57.1550293,11.7851562 C57.3072103,11.6383464 57.3833008,11.444987 57.3833008,11.2050781 L57.3833008,8.92236328 L58.8818359,8.92236328 C60.8404948,8.92236328 61.8198242,8.14176432 61.8198242,6.58056641 C61.8198242,6.15445964 61.7535807,5.78833008 61.6210938,5.48217773 C61.4886068,5.17602539 61.2943522,4.93432617 61.0383301,4.75708008 C60.7823079,4.57983398 60.4868978,4.45092773 60.1520996,4.37036133 C59.8173014,4.28979492 59.4278971,4.24951172 58.9838867,4.24951172 L56.6743164,4.24951172 C56.4093424,4.24951172 56.2034505,4.33902995 56.0566406,4.51806641 C55.9098307,4.69710286 55.8364258,4.9226888 55.8364258,5.19482422 L55.8364258,11.2050781 C55.8364258,11.444987 55.9134115,11.6383464 56.0673828,11.7851562 C56.2213542,11.9319661 56.402181,12.0053711 56.6098633,12.0053711 Z M57.3833008,7.80517578 L57.3833008,5.42578125 L58.8173828,5.42578125 C59.0680339,5.42578125 59.2766113,5.44010417 59.4431152,5.46875 C59.6096191,5.49739583 59.7618001,5.55110677 59.8996582,5.62988281 C60.0375163,5.70865885 60.1386719,5.82682292 60.203125,5.984375 C60.2675781,6.14192708 60.2998047,6.34065755 60.2998047,6.58056641 C60.2998047,6.82763672 60.2675781,7.03173828 60.203125,7.19287109 C60.1386719,7.35400391 60.0375163,7.47843424 59.8996582,7.56616211 C59.7618001,7.65388997 59.6060384,7.71565755 59.432373,7.75146484 C59.2587077,7.78727214 59.0411784,7.80517578 58.7797852,7.80517578 L57.3833008,7.80517578 Z" id="Shape"></path>
+                    <path d="M65.2573242,11.1459961 C64.7954102,11.1459961 64.4355469,10.9848633 64.1777344,10.6625977 C63.9199219,10.340332 63.7910156,9.88916016 63.7910156,9.30908203 C63.7910156,8.72184245 63.9190267,8.26619466 64.1750488,7.94213867 C64.431071,7.61808268 64.7918294,7.45605469 65.2573242,7.45605469 C65.722819,7.45605469 66.0844727,7.61897786 66.3422852,7.94482422 C66.6000977,8.27067057 66.7290039,8.72542318 66.7290039,9.30908203 C66.7290039,9.88916016 66.6000977,10.340332 66.3422852,10.6625977 C66.0844727,10.9848633 65.722819,11.1459961 65.2573242,11.1459961 Z M65.2573242,12.0644531 C65.6368815,12.0644531 65.9842122,12.0125326 66.2993164,11.9086914 C66.6144206,11.8048503 66.8802897,11.666097 67.0969238,11.4924316 C67.3135579,11.3187663 67.4961751,11.1128743 67.6447754,10.8747559 C67.7933757,10.6366374 67.9025879,10.3868815 67.9724121,10.1254883 C68.0422363,9.86409505 68.0771484,9.59195964 68.0771484,9.30908203 C68.0771484,9.01188151 68.040446,8.72721354 67.967041,8.45507812 C67.8936361,8.18294271 67.7799479,7.92960612 67.6259766,7.69506836 C67.4720052,7.4605306 67.2858073,7.2582194 67.0673828,7.08813477 C66.8489583,6.91805013 66.5848796,6.78377279 66.2751465,6.68530273 C65.9654134,6.58683268 65.6261393,6.53759766 65.2573242,6.53759766 C64.8813477,6.53759766 64.5367025,6.58862305 64.2233887,6.69067383 C63.9100749,6.79272461 63.6451009,6.93147786 63.4284668,7.10693359 C63.2118327,7.28238932 63.0283203,7.48828125 62.8779297,7.72460937 C62.7275391,7.9609375 62.6174316,8.21248372 62.5476074,8.47924805 C62.4777832,8.74601237 62.4428711,9.0226237 62.4428711,9.30908203 C62.4428711,9.67073568 62.5001628,10.0126953 62.6147461,10.3349609 C62.7293294,10.6572266 62.8967285,10.949056 63.1169434,11.2104492 C63.3371582,11.4718424 63.6316732,11.6795247 64.0004883,11.8334961 C64.3693034,11.9874674 64.7882487,12.0644531 65.2573242,12.0644531 Z" id="Shape"></path>
+                    <path d="M71.7509766,11.1459961 C71.2890625,11.1459961 70.9291992,10.9848633 70.6713867,10.6625977 C70.4135742,10.340332 70.284668,9.88916016 70.284668,9.30908203 C70.284668,8.72184245 70.412679,8.26619466 70.6687012,7.94213867 C70.9247233,7.61808268 71.2854818,7.45605469 71.7509766,7.45605469 C72.2164714,7.45605469 72.578125,7.61897786 72.8359375,7.94482422 C73.09375,8.27067057 73.2226562,8.72542318 73.2226562,9.30908203 C73.2226562,9.88916016 73.09375,10.340332 72.8359375,10.6625977 C72.578125,10.9848633 72.2164714,11.1459961 71.7509766,11.1459961 Z M71.7509766,12.0644531 C72.1305339,12.0644531 72.4778646,12.0125326 72.7929688,11.9086914 C73.1080729,11.8048503 73.3739421,11.666097 73.5905762,11.4924316 C73.8072103,11.3187663 73.9898275,11.1128743 74.1384277,10.8747559 C74.287028,10.6366374 74.3962402,10.3868815 74.4660645,10.1254883 C74.5358887,9.86409505 74.5708008,9.59195964 74.5708008,9.30908203 C74.5708008,9.01188151 74.5340983,8.72721354 74.4606934,8.45507812 C74.3872884,8.18294271 74.2736003,7.92960612 74.1196289,7.69506836 C73.9656576,7.4605306 73.7794596,7.2582194 73.5610352,7.08813477 C73.3426107,6.91805013 73.0785319,6.78377279 72.7687988,6.68530273 C72.4590658,6.58683268 72.1197917,6.53759766 71.7509766,6.53759766 C71.375,6.53759766 71.0303548,6.58862305 70.717041,6.69067383 C70.4037272,6.79272461 70.1387533,6.93147786 69.9221191,7.10693359 C69.705485,7.28238932 69.5219727,7.48828125 69.371582,7.72460937 C69.2211914,7.9609375 69.111084,8.21248372 69.0412598,8.47924805 C68.9714355,8.74601237 68.9365234,9.0226237 68.9365234,9.30908203 C68.9365234,9.67073568 68.9938151,10.0126953 69.1083984,10.3349609 C69.2229818,10.6572266 69.3903809,10.949056 69.6105957,11.2104492 C69.8308105,11.4718424 70.1253255,11.6795247 70.4941406,11.8334961 C70.8629557,11.9874674 71.281901,12.0644531 71.7509766,12.0644531 Z" id="Shape"></path>
+                    <path d="M76.4023438,12 C76.6028646,12 76.7684733,11.9337565 76.8991699,11.8012695 C77.0298665,11.6687826 77.0952148,11.484375 77.0952148,11.2480469 L77.0952148,5.01220703 C77.0952148,4.77587891 77.0307617,4.59147135 76.9018555,4.45898438 C76.7729492,4.3264974 76.610026,4.26025391 76.4130859,4.26025391 C76.2161458,4.26025391 76.055013,4.3264974 75.9296875,4.45898438 C75.804362,4.59147135 75.7416992,4.77587891 75.7416992,5.01220703 L75.7416992,11.2480469 C75.7416992,11.4879557 75.8034668,11.6732585 75.927002,11.8039551 C76.0505371,11.9346517 76.2089844,12 76.4023438,12 Z" id="Path"></path>
+                    <path d="M79.2275391,12 C79.4244792,12 79.5865072,11.9337565 79.713623,11.8012695 C79.8407389,11.6687826 79.9042969,11.484375 79.9042969,11.2480469 L79.9042969,7.37011719 C79.9042969,7.13020833 79.8407389,6.94401042 79.713623,6.81152344 C79.5865072,6.67903646 79.4244792,6.61279297 79.2275391,6.61279297 C79.030599,6.61279297 78.8694661,6.67903646 78.7441406,6.81152344 C78.6188151,6.94401042 78.5561523,7.13020833 78.5561523,7.37011719 L78.5561523,11.2480469 C78.5561523,11.4879557 78.6188151,11.6732585 78.7441406,11.8039551 C78.8694661,11.9346517 79.030599,12 79.2275391,12 Z M79.2275391,5.53857422 C79.4602865,5.53857422 79.6491699,5.46964518 79.7941895,5.33178711 C79.939209,5.19392904 80.0117188,5.0139974 80.0117188,4.79199219 C80.0117188,4.56998698 79.9401042,4.39095052 79.796875,4.25488281 C79.6536458,4.1188151 79.4656576,4.05078125 79.2329102,4.05078125 C78.996582,4.05078125 78.8059082,4.1188151 78.6608887,4.25488281 C78.5158691,4.39095052 78.4433594,4.56998698 78.4433594,4.79199219 C78.4433594,5.0139974 78.5158691,5.19392904 78.6608887,5.33178711 C78.8059082,5.46964518 78.9947917,5.53857422 79.2275391,5.53857422 Z" id="Shape"></path>
+                    <path d="M81.9399414,12 C82.1368815,12 82.2989095,11.9346517 82.4260254,11.8039551 C82.5531413,11.6732585 82.6166992,11.4915365 82.6166992,11.2587891 L82.6166992,8.98681641 C82.6166992,8.5320638 82.7473958,8.1632487 83.0087891,7.88037109 C83.2701823,7.59749349 83.5799154,7.45605469 83.9379883,7.45605469 C84.2351888,7.45605469 84.4795736,7.55362956 84.6711426,7.7487793 C84.8627116,7.94392904 84.9584961,8.22591146 84.9584961,8.59472656 L84.9584961,11.2587891 C84.9584961,11.4915365 85.0211589,11.6732585 85.1464844,11.8039551 C85.2718099,11.9346517 85.429362,12 85.6191406,12 C85.8196615,12 85.984375,11.9346517 86.1132812,11.8039551 C86.2421875,11.6732585 86.3066406,11.4915365 86.3066406,11.2587891 L86.3066406,8.60009766 C86.3066406,8.25634766 86.2547201,7.95019531 86.1508789,7.68164063 C86.0470378,7.41308594 85.905599,7.19734701 85.7265625,7.03442383 C85.547526,6.87150065 85.3443197,6.74796549 85.1169434,6.66381836 C84.8895671,6.57967122 84.6469727,6.53759766 84.3891602,6.53759766 C83.9702148,6.53759766 83.6094564,6.6163737 83.3068848,6.77392578 C83.0043132,6.93147786 82.7742513,7.1624349 82.6166992,7.46679688 L82.6166992,7.27880859 C82.6166992,7.06754557 82.5540365,6.90372721 82.4287109,6.78735352 C82.3033854,6.67097982 82.144043,6.61279297 81.9506836,6.61279297 C81.7537435,6.61279297 81.5908203,6.671875 81.4619141,6.79003906 C81.3330078,6.90820312 81.2685547,7.07470703 81.2685547,7.28955078 L81.2685547,11.2587891 C81.2685547,11.4915365 81.3312174,11.6732585 81.456543,11.8039551 C81.5818685,11.9346517 81.7430013,12 81.9399414,12 Z" id="Path"></path>
+                    <path d="M91.5488281,11.9301758 C91.5488281,12.4386393 91.4091797,12.8173014 91.1298828,13.0661621 C90.8505859,13.3150228 90.4602865,13.4394531 89.9589844,13.4394531 C89.8157552,13.4394531 89.6698405,13.4269206 89.5212402,13.4018555 C89.37264,13.3767904 89.2607422,13.3535156 89.1855469,13.3320313 C89.1103516,13.3105469 88.9921875,13.272054 88.8310547,13.2165527 C88.6699219,13.1610514 88.5732422,13.1279297 88.5410156,13.1171875 C88.483724,13.0957031 88.4264323,13.0849609 88.3691406,13.0849609 C88.2438151,13.0849609 88.139974,13.1315104 88.0576172,13.2246094 C87.9752604,13.3177083 87.934082,13.4251302 87.934082,13.546875 C87.934082,13.71875 88.0164388,13.8548177 88.1811523,13.9550781 C88.3745117,14.0732422 88.645752,14.1726074 88.994873,14.2531738 C89.3439941,14.3337402 89.7101237,14.3740234 90.0932617,14.3740234 C90.9383138,14.3740234 91.607015,14.1520182 92.0993652,13.7080078 C92.5917155,13.2639974 92.8378906,12.6176758 92.8378906,11.769043 L92.8378906,7.36474609 C92.8378906,7.12841797 92.7779134,6.94401042 92.657959,6.81152344 C92.5380046,6.67903646 92.3849284,6.61279297 92.1987305,6.61279297 C92.0340169,6.61279297 91.8934733,6.66202799 91.7770996,6.76049805 C91.6607259,6.8589681 91.5935872,6.99951172 91.5756836,7.18212891 L91.5756836,7.42919922 C91.3966471,7.14274089 91.1835938,6.92431641 90.9365234,6.77392578 C90.6894531,6.62353516 90.3492839,6.54833984 89.9160156,6.54833984 C89.1389974,6.54833984 88.5240072,6.80704753 88.0710449,7.32446289 C87.6180827,7.84187826 87.3916016,8.51416016 87.3916016,9.34130859 C87.3916016,10.1612956 87.624349,10.8103027 88.0898438,11.2883301 C88.5553385,11.7663574 89.1748047,12.0053711 89.9482422,12.0053711 C90.7109375,12.0053711 91.2444661,11.7171224 91.5488281,11.140625 L91.5488281,11.9301758 Z M90.1899414,11.1083984 C89.7817383,11.101237 89.4424642,10.9463704 89.1721191,10.6437988 C88.9017741,10.3412272 88.7666016,9.89453125 88.7666016,9.30371094 C88.7666016,9.09960938 88.7827148,8.90893555 88.8149414,8.73168945 C88.847168,8.55444336 88.8990885,8.38614909 88.9707031,8.22680664 C89.0423177,8.06746419 89.1318359,7.93139648 89.2392578,7.81860352 C89.3466797,7.70581055 89.480957,7.61629232 89.6420898,7.55004883 C89.8032227,7.48380534 89.9840495,7.45068359 90.1845703,7.45068359 C91.0940755,7.45068359 91.5488281,8.0719401 91.5488281,9.31445312 C91.5488281,9.92675781 91.425293,10.3770345 91.1782227,10.6652832 C90.9311523,10.9535319 90.6017253,11.101237 90.1899414,11.1083984 Z" id="Shape"></path>
+                </g>
+                <g id="vLLM-dash-box-Copy" transform="translate(0, 22)" xlink:href="#path-2">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" id="vLLM-dash-box" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L77.999,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L76.999,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M31.5,-0.277777778 L34,4.72222222 L31.999,4.72222222 L32,20 C32,20.2761424 31.7761424,20.5 31.5,20.5 C31.2238576,20.5 31,20.2761424 31,20 L30.999,4.72222222 L29,4.72222222 L31.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253504e-06,16.2761424 -1.49253504e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="Rectangle-@blue" transform="translate(8, 42)">
+                    <g id="L5" transform="translate(0, 116)">
+                        <g id="1" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="2" transform="translate(32, 0)" fill="#7E88FD">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <g id="3" transform="translate(78, 0)" fill="#7E88FD">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                    <g id="L1" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="L2" transform="translate(0, 28)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 56)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="L4" transform="translate(0, 84)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(-1.4, 0)">
+                <g id="dash-box" transform="translate(0, 27.6)">
+                    <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="vLLM-dash-box" fill="#F7F7F7"></path>
+                    <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="vLLM-dash-box" fill="#8E8E8E" fill-rule="nonzero"></path>
+                </g>
+                <g id="row-@blue" transform="translate(9.4, 33)">
+                    <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <g id="1" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <path d="M52.9000002,12.2222222 L55.4000002,17.2222222 L53.3990002,17.2222222 L53.4000002,32.9992391 C53.4000002,33.2753815 53.1761426,33.4992391 52.9000002,33.4992391 C52.6238578,33.4992391 52.4000002,33.2753815 52.4000002,32.9992391 L52.3990002,17.2222222 L50.4000002,17.2222222 L52.9000002,12.2222222 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <path d="M98.9000002,12.2222222 L101.4,17.2222222 L99.3990002,17.2222222 L99.4000002,32.9992391 C99.4000002,33.2753815 99.1761426,33.4992391 98.9000002,33.4992391 C98.6238578,33.4992391 98.4000002,33.2753815 98.4000002,32.9992391 L98.3990002,17.2222222 L96.4000002,17.2222222 L98.9000002,12.2222222 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="row-@purple" transform="translate(9.4, 0)" fill="#7E88FD" fill-opacity="0.6" stroke="#7E88FD">
+                    <g id="Rectangle-@01_yellow-Copy-16" transform="translate(32, 0)" xlink:href="#path-3">
+                        <rect id="Rectangle" x="0.5" y="0.5" width="23" height="11" rx="3"></rect>
+                    </g>
+                    <g id="Rectangle-@01_yellow-Copy-18" transform="translate(78, 0)" xlink:href="#path-4">
+                        <rect id="Rectangle" x="0.5" y="0.5" width="23" height="11" rx="3"></rect>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="ALL-Pooling" stroke-width="1" transform="translate(656, 70)">
+            <g id="Body" transform="translate(0, 50)">
+                <g id="ALL-Pooling" transform="translate(0, 186)" fill="#7D7D7D" fill-rule="nonzero">
+                    <path d="M33.3825684,9.18017578 L30.729248,9.18017578 L32.0559082,5.640625 L33.3825684,9.18017578 Z M34.2526855,11.5058594 C34.3135579,11.6813151 34.4066569,11.8129069 34.5319824,11.9006348 C34.6573079,11.9883626 34.7969564,12.0322266 34.9509277,12.0322266 C35.1442871,12.0322266 35.3161621,11.9677734 35.4665527,11.8388672 C35.6169434,11.7099609 35.6921387,11.5470378 35.6921387,11.3500977 C35.6921387,11.2605794 35.674235,11.1692708 35.6384277,11.0761719 L33.2805176,5.17333984 C33.130127,4.77587891 32.9573568,4.50911458 32.762207,4.37304688 C32.5670573,4.23697917 32.3316243,4.16894531 32.0559082,4.16894531 C31.7801921,4.16894531 31.5447591,4.23697917 31.3496094,4.37304688 C31.1544596,4.50911458 30.9816895,4.77587891 30.8312988,5.17333984 L28.4733887,11.0761719 C28.4375814,11.1692708 28.4196777,11.2605794 28.4196777,11.3500977 C28.4196777,11.5470378 28.494873,11.7099609 28.6452637,11.8388672 C28.7956543,11.9677734 28.9675293,12.0322266 29.1608887,12.0322266 C29.31486,12.0322266 29.4545085,11.9883626 29.579834,11.9006348 C29.7051595,11.8129069 29.7982585,11.6813151 29.8591309,11.5058594 L30.3425293,10.2167969 L33.7692871,10.2167969 L34.2526855,11.5058594 Z" id="Shape"></path>
+                    <path d="M37.6740723,12 L41.4660645,12 C41.6415202,12 41.7820638,11.9391276 41.8876953,11.8173828 C41.9933268,11.695638 42.0461426,11.5541992 42.0461426,11.3930664 C42.0461426,11.2283529 41.9933268,11.0842285 41.8876953,10.9606934 C41.7820638,10.8371582 41.6415202,10.7753906 41.4660645,10.7753906 L38.4367676,10.7753906 L38.4313965,5.06054688 C38.4313965,4.82063802 38.3562012,4.62727865 38.2058105,4.48046875 C38.0554199,4.33365885 37.8763835,4.26025391 37.6687012,4.26025391 C37.4574382,4.26025391 37.274821,4.33365885 37.1208496,4.48046875 C36.9668783,4.62727865 36.8898926,4.82063802 36.8898926,5.06054688 L36.8898926,11.0600586 C36.8898926,11.3178711 36.9632975,11.5389811 37.1101074,11.7233887 C37.2569173,11.9077962 37.4449056,12 37.6740723,12 Z" id="Path"></path>
+                    <path d="M43.8024902,12 L47.5944824,12 C47.7699382,12 47.9104818,11.9391276 48.0161133,11.8173828 C48.1217448,11.695638 48.1745605,11.5541992 48.1745605,11.3930664 C48.1745605,11.2283529 48.1217448,11.0842285 48.0161133,10.9606934 C47.9104818,10.8371582 47.7699382,10.7753906 47.5944824,10.7753906 L44.5651855,10.7753906 L44.5598145,5.06054688 C44.5598145,4.82063802 44.4846191,4.62727865 44.3342285,4.48046875 C44.1838379,4.33365885 44.0048014,4.26025391 43.7971191,4.26025391 C43.5858561,4.26025391 43.4032389,4.33365885 43.2492676,4.48046875 C43.0952962,4.62727865 43.0183105,4.82063802 43.0183105,5.06054688 L43.0183105,11.0600586 C43.0183105,11.3178711 43.0917155,11.5389811 43.2385254,11.7233887 C43.3853353,11.9077962 43.5733236,12 43.8024902,12 Z" id="Path"></path>
+                    <path d="M52.9924316,12.0053711 C53.2036947,12.0053711 53.3854167,11.9319661 53.5375977,11.7851562 C53.6897786,11.6383464 53.7658691,11.444987 53.7658691,11.2050781 L53.7658691,8.92236328 L55.2644043,8.92236328 C57.2230632,8.92236328 58.2023926,8.14176432 58.2023926,6.58056641 C58.2023926,6.15445964 58.1361491,5.78833008 58.0036621,5.48217773 C57.8711751,5.17602539 57.6769206,4.93432617 57.4208984,4.75708008 C57.1648763,4.57983398 56.8694661,4.45092773 56.534668,4.37036133 C56.1998698,4.28979492 55.8104655,4.24951172 55.3664551,4.24951172 L53.0568848,4.24951172 C52.7919108,4.24951172 52.5860189,4.33902995 52.439209,4.51806641 C52.2923991,4.69710286 52.2189941,4.9226888 52.2189941,5.19482422 L52.2189941,11.2050781 C52.2189941,11.444987 52.2959798,11.6383464 52.4499512,11.7851562 C52.6039225,11.9319661 52.7847493,12.0053711 52.9924316,12.0053711 Z M53.7658691,7.80517578 L53.7658691,5.42578125 L55.1999512,5.42578125 C55.4506022,5.42578125 55.6591797,5.44010417 55.8256836,5.46875 C55.9921875,5.49739583 56.1443685,5.55110677 56.2822266,5.62988281 C56.4200846,5.70865885 56.5212402,5.82682292 56.5856934,5.984375 C56.6501465,6.14192708 56.682373,6.34065755 56.682373,6.58056641 C56.682373,6.82763672 56.6501465,7.03173828 56.5856934,7.19287109 C56.5212402,7.35400391 56.4200846,7.47843424 56.2822266,7.56616211 C56.1443685,7.65388997 55.9886068,7.71565755 55.8149414,7.75146484 C55.641276,7.78727214 55.4237467,7.80517578 55.1623535,7.80517578 L53.7658691,7.80517578 Z" id="Shape"></path>
+                    <path d="M61.6398926,11.1459961 C61.1779785,11.1459961 60.8181152,10.9848633 60.5603027,10.6625977 C60.3024902,10.340332 60.173584,9.88916016 60.173584,9.30908203 C60.173584,8.72184245 60.3015951,8.26619466 60.5576172,7.94213867 C60.8136393,7.61808268 61.1743978,7.45605469 61.6398926,7.45605469 C62.1053874,7.45605469 62.467041,7.61897786 62.7248535,7.94482422 C62.982666,8.27067057 63.1115723,8.72542318 63.1115723,9.30908203 C63.1115723,9.88916016 62.982666,10.340332 62.7248535,10.6625977 C62.467041,10.9848633 62.1053874,11.1459961 61.6398926,11.1459961 Z M61.6398926,12.0644531 C62.0194499,12.0644531 62.3667806,12.0125326 62.6818848,11.9086914 C62.9969889,11.8048503 63.2628581,11.666097 63.4794922,11.4924316 C63.6961263,11.3187663 63.8787435,11.1128743 64.0273438,10.8747559 C64.175944,10.6366374 64.2851562,10.3868815 64.3549805,10.1254883 C64.4248047,9.86409505 64.4597168,9.59195964 64.4597168,9.30908203 C64.4597168,9.01188151 64.4230143,8.72721354 64.3496094,8.45507812 C64.2762044,8.18294271 64.1625163,7.92960612 64.0085449,7.69506836 C63.8545736,7.4605306 63.6683757,7.2582194 63.4499512,7.08813477 C63.2315267,6.91805013 62.9674479,6.78377279 62.6577148,6.68530273 C62.3479818,6.58683268 62.0087077,6.53759766 61.6398926,6.53759766 C61.263916,6.53759766 60.9192708,6.58862305 60.605957,6.69067383 C60.2926432,6.79272461 60.0276693,6.93147786 59.8110352,7.10693359 C59.594401,7.28238932 59.4108887,7.48828125 59.260498,7.72460937 C59.1101074,7.9609375 59,8.21248372 58.9301758,8.47924805 C58.8603516,8.74601237 58.8254395,9.0226237 58.8254395,9.30908203 C58.8254395,9.67073568 58.8827311,10.0126953 58.9973145,10.3349609 C59.1118978,10.6572266 59.2792969,10.949056 59.4995117,11.2104492 C59.7197266,11.4718424 60.0142415,11.6795247 60.3830566,11.8334961 C60.7518717,11.9874674 61.1708171,12.0644531 61.6398926,12.0644531 Z" id="Shape"></path>
+                    <path d="M68.1335449,11.1459961 C67.6716309,11.1459961 67.3117676,10.9848633 67.0539551,10.6625977 C66.7961426,10.340332 66.6672363,9.88916016 66.6672363,9.30908203 C66.6672363,8.72184245 66.7952474,8.26619466 67.0512695,7.94213867 C67.3072917,7.61808268 67.6680501,7.45605469 68.1335449,7.45605469 C68.5990397,7.45605469 68.9606934,7.61897786 69.2185059,7.94482422 C69.4763184,8.27067057 69.6052246,8.72542318 69.6052246,9.30908203 C69.6052246,9.88916016 69.4763184,10.340332 69.2185059,10.6625977 C68.9606934,10.9848633 68.5990397,11.1459961 68.1335449,11.1459961 Z M68.1335449,12.0644531 C68.5131022,12.0644531 68.8604329,12.0125326 69.1755371,11.9086914 C69.4906413,11.8048503 69.7565104,11.666097 69.9731445,11.4924316 C70.1897786,11.3187663 70.3723958,11.1128743 70.5209961,10.8747559 C70.6695964,10.6366374 70.7788086,10.3868815 70.8486328,10.1254883 C70.918457,9.86409505 70.9533691,9.59195964 70.9533691,9.30908203 C70.9533691,9.01188151 70.9166667,8.72721354 70.8432617,8.45507812 C70.7698568,8.18294271 70.6561686,7.92960612 70.5021973,7.69506836 C70.3482259,7.4605306 70.162028,7.2582194 69.9436035,7.08813477 C69.725179,6.91805013 69.4611003,6.78377279 69.1513672,6.68530273 C68.8416341,6.58683268 68.50236,6.53759766 68.1335449,6.53759766 C67.7575684,6.53759766 67.4129232,6.58862305 67.0996094,6.69067383 C66.7862956,6.79272461 66.5213216,6.93147786 66.3046875,7.10693359 C66.0880534,7.28238932 65.904541,7.48828125 65.7541504,7.72460937 C65.6037598,7.9609375 65.4936523,8.21248372 65.4238281,8.47924805 C65.3540039,8.74601237 65.3190918,9.0226237 65.3190918,9.30908203 C65.3190918,9.67073568 65.3763835,10.0126953 65.4909668,10.3349609 C65.6055501,10.6572266 65.7729492,10.949056 65.9931641,11.2104492 C66.2133789,11.4718424 66.5078939,11.6795247 66.876709,11.8334961 C67.2455241,11.9874674 67.6644694,12.0644531 68.1335449,12.0644531 Z" id="Shape"></path>
+                    <path d="M72.7849121,12 C72.9854329,12 73.1510417,11.9337565 73.2817383,11.8012695 C73.4124349,11.6687826 73.4777832,11.484375 73.4777832,11.2480469 L73.4777832,5.01220703 C73.4777832,4.77587891 73.4133301,4.59147135 73.2844238,4.45898438 C73.1555176,4.3264974 72.9925944,4.26025391 72.7956543,4.26025391 C72.5987142,4.26025391 72.4375814,4.3264974 72.3122559,4.45898438 C72.1869303,4.59147135 72.1242676,4.77587891 72.1242676,5.01220703 L72.1242676,11.2480469 C72.1242676,11.4879557 72.1860352,11.6732585 72.3095703,11.8039551 C72.4331055,11.9346517 72.5915527,12 72.7849121,12 Z" id="Path"></path>
+                    <path d="M75.6101074,12 C75.8070475,12 75.9690755,11.9337565 76.0961914,11.8012695 C76.2233073,11.6687826 76.2868652,11.484375 76.2868652,11.2480469 L76.2868652,7.37011719 C76.2868652,7.13020833 76.2233073,6.94401042 76.0961914,6.81152344 C75.9690755,6.67903646 75.8070475,6.61279297 75.6101074,6.61279297 C75.4131673,6.61279297 75.2520345,6.67903646 75.126709,6.81152344 C75.0013835,6.94401042 74.9387207,7.13020833 74.9387207,7.37011719 L74.9387207,11.2480469 C74.9387207,11.4879557 75.0013835,11.6732585 75.126709,11.8039551 C75.2520345,11.9346517 75.4131673,12 75.6101074,12 Z M75.6101074,5.53857422 C75.8428548,5.53857422 76.0317383,5.46964518 76.1767578,5.33178711 C76.3217773,5.19392904 76.3942871,5.0139974 76.3942871,4.79199219 C76.3942871,4.56998698 76.3226725,4.39095052 76.1794434,4.25488281 C76.0362142,4.1188151 75.8482259,4.05078125 75.6154785,4.05078125 C75.3791504,4.05078125 75.1884766,4.1188151 75.043457,4.25488281 C74.8984375,4.39095052 74.8259277,4.56998698 74.8259277,4.79199219 C74.8259277,5.0139974 74.8984375,5.19392904 75.043457,5.33178711 C75.1884766,5.46964518 75.37736,5.53857422 75.6101074,5.53857422 Z" id="Shape"></path>
+                    <path d="M78.3225098,12 C78.5194499,12 78.6814779,11.9346517 78.8085938,11.8039551 C78.9357096,11.6732585 78.9992676,11.4915365 78.9992676,11.2587891 L78.9992676,8.98681641 C78.9992676,8.5320638 79.1299642,8.1632487 79.3913574,7.88037109 C79.6527507,7.59749349 79.9624837,7.45605469 80.3205566,7.45605469 C80.6177572,7.45605469 80.8621419,7.55362956 81.0537109,7.7487793 C81.2452799,7.94392904 81.3410645,8.22591146 81.3410645,8.59472656 L81.3410645,11.2587891 C81.3410645,11.4915365 81.4037272,11.6732585 81.5290527,11.8039551 C81.6543783,11.9346517 81.8119303,12 82.001709,12 C82.2022298,12 82.3669434,11.9346517 82.4958496,11.8039551 C82.6247559,11.6732585 82.689209,11.4915365 82.689209,11.2587891 L82.689209,8.60009766 C82.689209,8.25634766 82.6372884,7.95019531 82.5334473,7.68164063 C82.4296061,7.41308594 82.2881673,7.19734701 82.1091309,7.03442383 C81.9300944,6.87150065 81.726888,6.74796549 81.4995117,6.66381836 C81.2721354,6.57967122 81.029541,6.53759766 80.7717285,6.53759766 C80.3527832,6.53759766 79.9920247,6.6163737 79.6894531,6.77392578 C79.3868815,6.93147786 79.1568197,7.1624349 78.9992676,7.46679688 L78.9992676,7.27880859 C78.9992676,7.06754557 78.9366048,6.90372721 78.8112793,6.78735352 C78.6859538,6.67097982 78.5266113,6.61279297 78.333252,6.61279297 C78.1363118,6.61279297 77.9733887,6.671875 77.8444824,6.79003906 C77.7155762,6.90820312 77.651123,7.07470703 77.651123,7.28955078 L77.651123,11.2587891 C77.651123,11.4915365 77.7137858,11.6732585 77.8391113,11.8039551 C77.9644368,11.9346517 78.1255697,12 78.3225098,12 Z" id="Path"></path>
+                    <path d="M87.9313965,11.9301758 C87.9313965,12.4386393 87.791748,12.8173014 87.5124512,13.0661621 C87.2331543,13.3150228 86.8428548,13.4394531 86.3415527,13.4394531 C86.1983236,13.4394531 86.0524089,13.4269206 85.9038086,13.4018555 C85.7552083,13.3767904 85.6433105,13.3535156 85.5681152,13.3320313 C85.4929199,13.3105469 85.3747559,13.272054 85.213623,13.2165527 C85.0524902,13.1610514 84.9558105,13.1279297 84.923584,13.1171875 C84.8662923,13.0957031 84.8090007,13.0849609 84.751709,13.0849609 C84.6263835,13.0849609 84.5225423,13.1315104 84.4401855,13.2246094 C84.3578288,13.3177083 84.3166504,13.4251302 84.3166504,13.546875 C84.3166504,13.71875 84.3990072,13.8548177 84.5637207,13.9550781 C84.7570801,14.0732422 85.0283203,14.1726074 85.3774414,14.2531738 C85.7265625,14.3337402 86.0926921,14.3740234 86.4758301,14.3740234 C87.3208822,14.3740234 87.9895833,14.1520182 88.4819336,13.7080078 C88.9742839,13.2639974 89.220459,12.6176758 89.220459,11.769043 L89.220459,7.36474609 C89.220459,7.12841797 89.1604818,6.94401042 89.0405273,6.81152344 C88.9205729,6.67903646 88.7674967,6.61279297 88.5812988,6.61279297 C88.4165853,6.61279297 88.2760417,6.66202799 88.159668,6.76049805 C88.0432943,6.8589681 87.9761556,6.99951172 87.958252,7.18212891 L87.958252,7.42919922 C87.7792155,7.14274089 87.5661621,6.92431641 87.3190918,6.77392578 C87.0720215,6.62353516 86.7318522,6.54833984 86.298584,6.54833984 C85.5215658,6.54833984 84.9065755,6.80704753 84.4536133,7.32446289 C84.000651,7.84187826 83.7741699,8.51416016 83.7741699,9.34130859 C83.7741699,10.1612956 84.0069173,10.8103027 84.4724121,11.2883301 C84.9379069,11.7663574 85.557373,12.0053711 86.3308105,12.0053711 C87.0935059,12.0053711 87.6270345,11.7171224 87.9313965,11.140625 L87.9313965,11.9301758 Z M86.5725098,11.1083984 C86.1643066,11.101237 85.8250326,10.9463704 85.5546875,10.6437988 C85.2843424,10.3412272 85.1491699,9.89453125 85.1491699,9.30371094 C85.1491699,9.09960938 85.1652832,8.90893555 85.1975098,8.73168945 C85.2297363,8.55444336 85.2816569,8.38614909 85.3532715,8.22680664 C85.4248861,8.06746419 85.5144043,7.93139648 85.6218262,7.81860352 C85.729248,7.70581055 85.8635254,7.61629232 86.0246582,7.55004883 C86.185791,7.48380534 86.3666178,7.45068359 86.5671387,7.45068359 C87.4766439,7.45068359 87.9313965,8.0719401 87.9313965,9.31445312 C87.9313965,9.92675781 87.8078613,10.3770345 87.560791,10.6652832 C87.3137207,10.9535319 86.9842936,11.101237 86.5725098,11.1083984 Z" id="Shape"></path>
+                </g>
+                <g id="vLLM-dash-box-Copy" transform="translate(0, 22)" xlink:href="#path-5">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" id="vLLM-dash-box" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L77.999,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L76.999,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M32.5,-0.277777778 L35,4.72222222 L32.999,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L31.999,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253504e-06,16.2761424 -1.49253504e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="L5" transform="translate(8, 158)">
+                    <g id="1" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="L4" transform="translate(8, 126)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 98)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 70)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                </g>
+                <g id="L1" transform="translate(8, 42)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(-1.4, 0)">
+                <g id="dash-box" transform="translate(1, 27.6)">
+                    <path d="M3.40000021,0.4 L115.4,0.4 C117.056854,0.4 118.4,1.74314575 118.4,3.4 L118.4,19.4 C118.4,21.0568542 117.056854,22.4 115.4,22.4 L3.40000021,22.4 C1.74314596,22.4 0.400000207,21.0568542 0.400000207,19.4 L0.400000207,3.4 C0.400000207,1.74314575 1.74314596,0.4 3.40000021,0.4 Z" id="vLLM-dash-box" fill="#F7F7F7"></path>
+                    <path d="M96.1844026,22.4 C96.1844026,22.6209139 96.0053165,22.8 95.7844026,22.8 L94.7844026,22.8 C94.5634887,22.8 94.3844026,22.6209139 94.3844026,22.4 C94.3844026,22.1790861 94.5634887,22 94.7844026,22 L95.7844025,22 C96.0053165,22 96.1844026,22.1790861 96.1844026,22.4 Z M6.58440258,22.4 C6.58440258,22.6209139 6.40531648,22.8 6.18440258,22.8 L5.18440258,22.8 C4.96348868,22.8 4.78440258,22.6209139 4.78440258,22.4 C4.78440258,22.1790861 4.96348868,22 5.18440258,22 L6.18440261,22 C6.40531648,22 6.58440258,22.1790861 6.58440258,22.4 Z M9.78440258,22.4 C9.78440258,22.6209139 9.60531648,22.8 9.38440258,22.8 L8.38440258,22.8 C8.16348868,22.8 7.98440258,22.6209139 7.98440258,22.4 C7.98440258,22.1790861 8.16348868,22 8.38440258,22 L9.38440261,22 C9.60531648,22 9.78440258,22.1790861 9.78440258,22.4 Z M99.3844026,22.4 C99.3844026,22.6209139 99.2053165,22.8 98.9844026,22.8 L97.9844026,22.8 C97.7634887,22.8 97.5844026,22.6209139 97.5844026,22.4 C97.5844026,22.1790861 97.7634887,22 97.9844026,22 L98.9844025,22 C99.2053165,22 99.3844026,22.1790861 99.3844026,22.4 Z M16.1844026,22.4 C16.1844026,22.6209139 16.0053165,22.8 15.7844026,22.8 L14.7844026,22.8 C14.5634887,22.8 14.3844026,22.6209139 14.3844026,22.4 C14.3844026,22.1790861 14.5634887,22 14.7844026,22 L15.7844026,22 C16.0053165,22 16.1844026,22.1790861 16.1844026,22.4 Z M19.3844026,22.4 C19.3844026,22.6209139 19.2053165,22.8 18.9844026,22.8 L17.9844026,22.8 C17.7634887,22.8 17.5844026,22.6209139 17.5844026,22.4 C17.5844026,22.1790861 17.7634887,22 17.9844026,22 L18.9844026,22 C19.2053165,22 19.3844026,22.1790861 19.3844026,22.4 Z M22.5844026,22.4 C22.5844026,22.6209139 22.4053165,22.8 22.1844026,22.8 L21.1844026,22.8 C20.9634887,22.8 20.7844026,22.6209139 20.7844026,22.4 C20.7844026,22.1790861 20.9634887,22 21.1844026,22 L22.1844026,22 C22.4053165,22 22.5844026,22.1790861 22.5844026,22.4 Z M25.7844026,22.4 C25.7844026,22.6209139 25.6053165,22.8 25.3844026,22.8 L24.3844026,22.8 C24.1634887,22.8 23.9844026,22.6209139 23.9844026,22.4 C23.9844026,22.1790861 24.1634887,22 24.3844026,22 L25.3844026,22 C25.6053165,22 25.7844026,22.1790861 25.7844026,22.4 Z M28.9844026,22.4 C28.9844026,22.6209139 28.8053165,22.8 28.5844026,22.8 L27.5844026,22.8 C27.3634887,22.8 27.1844026,22.6209139 27.1844026,22.4 C27.1844026,22.1790861 27.3634887,22 27.5844026,22 L28.5844027,22 C28.8053165,22 28.9844026,22.1790861 28.9844026,22.4 Z M32.1844026,22.4 C32.1844026,22.6209139 32.0053165,22.8 31.7844026,22.8 L30.7844026,22.8 C30.5634887,22.8 30.3844026,22.6209139 30.3844026,22.4 C30.3844026,22.1790861 30.5634887,22 30.7844026,22 L31.7844027,22 C32.0053165,22 32.1844026,22.1790861 32.1844026,22.4 Z M35.3844026,22.4 C35.3844026,22.6209139 35.2053165,22.8 34.9844026,22.8 L33.9844026,22.8 C33.7634887,22.8 33.5844026,22.6209139 33.5844026,22.4 C33.5844026,22.1790861 33.7634887,22 33.9844026,22 L34.9844027,22 C35.2053165,22 35.3844026,22.1790861 35.3844026,22.4 Z M38.5844026,22.4 C38.5844026,22.6209139 38.4053165,22.8 38.1844026,22.8 L37.1844026,22.8 C36.9634887,22.8 36.7844026,22.6209139 36.7844026,22.4 C36.7844026,22.1790861 36.9634887,22 37.1844026,22 L38.1844027,22 C38.4053165,22 38.5844026,22.1790861 38.5844026,22.4 Z M41.7844026,22.4 C41.7844026,22.6209139 41.6053165,22.8 41.3844026,22.8 L40.3844026,22.8 C40.1634887,22.8 39.9844026,22.6209139 39.9844026,22.4 C39.9844026,22.1790861 40.1634887,22 40.3844026,22 L41.3844027,22 C41.6053165,22 41.7844026,22.1790861 41.7844026,22.4 Z M44.9844026,22.4 C44.9844026,22.6209139 44.8053165,22.8 44.5844026,22.8 L43.5844026,22.8 C43.3634887,22.8 43.1844026,22.6209139 43.1844026,22.4 C43.1844026,22.1790861 43.3634887,22 43.5844026,22 L44.5844027,22 C44.8053165,22 44.9844026,22.1790861 44.9844026,22.4 Z M48.1844026,22.4 C48.1844026,22.6209139 48.0053165,22.8 47.7844026,22.8 L46.7844026,22.8 C46.5634887,22.8 46.3844026,22.6209139 46.3844026,22.4 C46.3844026,22.1790861 46.5634887,22 46.7844026,22 L47.7844027,22 C48.0053165,22 48.1844026,22.1790861 48.1844026,22.4 Z M51.3844026,22.4 C51.3844026,22.6209139 51.2053165,22.8 50.9844026,22.8 L49.9844026,22.8 C49.7634887,22.8 49.5844026,22.6209139 49.5844026,22.4 C49.5844026,22.1790861 49.7634887,22 49.9844026,22 L50.9844027,22 C51.2053165,22 51.3844026,22.1790861 51.3844026,22.4 Z M54.5844026,22.4 C54.5844026,22.6209139 54.4053165,22.8 54.1844026,22.8 L53.1844026,22.8 C52.9634887,22.8 52.7844026,22.6209139 52.7844026,22.4 C52.7844026,22.1790861 52.9634887,22 53.1844026,22 L54.1844027,22 C54.4053165,22 54.5844026,22.1790861 54.5844026,22.4 Z M57.7844026,22.4 C57.7844026,22.6209139 57.6053165,22.8 57.3844026,22.8 L56.3844026,22.8 C56.1634887,22.8 55.9844026,22.6209139 55.9844026,22.4 C55.9844026,22.1790861 56.1634887,22 56.3844026,22 L57.3844027,22 C57.6053165,22 57.7844026,22.1790861 57.7844026,22.4 Z M60.9844026,22.4 C60.9844026,22.6209139 60.8053165,22.8 60.5844026,22.8 L59.5844026,22.8 C59.3634887,22.8 59.1844026,22.6209139 59.1844026,22.4 C59.1844026,22.1790861 59.3634887,22 59.5844026,22 L60.5844025,22 C60.8053165,22 60.9844026,22.1790861 60.9844026,22.4 Z M64.1844026,22.4 C64.1844026,22.6209139 64.0053165,22.8 63.7844026,22.8 L62.7844026,22.8 C62.5634887,22.8 62.3844026,22.6209139 62.3844026,22.4 C62.3844026,22.1790861 62.5634887,22 62.7844026,22 L63.7844025,22 C64.0053165,22 64.1844026,22.1790861 64.1844026,22.4 Z M67.3844026,22.4 C67.3844026,22.6209139 67.2053165,22.8 66.9844026,22.8 L65.9844026,22.8 C65.7634887,22.8 65.5844026,22.6209139 65.5844026,22.4 C65.5844026,22.1790861 65.7634887,22 65.9844026,22 L66.9844025,22 C67.2053165,22 67.3844026,22.1790861 67.3844026,22.4 Z M12.9844026,22.4 C12.9844026,22.6209139 12.8053165,22.8 12.5844026,22.8 L11.5844026,22.8 C11.3634887,22.8 11.1844026,22.6209139 11.1844026,22.4 C11.1844026,22.1790861 11.3634887,22 11.5844026,22 L12.5844026,22 C12.8053165,22 12.9844026,22.1790861 12.9844026,22.4 Z M70.5844026,22.4 C70.5844026,22.6209139 70.4053165,22.8 70.1844026,22.8 L69.1844026,22.8 C68.9634887,22.8 68.7844026,22.6209139 68.7844026,22.4 C68.7844026,22.1790861 68.9634887,22 69.1844026,22 L70.1844025,22 C70.4053165,22 70.5844026,22.1790861 70.5844026,22.4 Z M73.7844026,22.4 C73.7844026,22.6209139 73.6053165,22.8 73.3844026,22.8 L72.3844026,22.8 C72.1634887,22.8 71.9844026,22.6209139 71.9844026,22.4 C71.9844026,22.1790861 72.1634887,22 72.3844026,22 L73.3844025,22 C73.6053165,22 73.7844026,22.1790861 73.7844026,22.4 Z M76.9844026,22.4 C76.9844026,22.6209139 76.8053165,22.8 76.5844026,22.8 L75.5844026,22.8 C75.3634887,22.8 75.1844026,22.6209139 75.1844026,22.4 C75.1844026,22.1790861 75.3634887,22 75.5844026,22 L76.5844025,22 C76.8053165,22 76.9844026,22.1790861 76.9844026,22.4 Z M80.1844026,22.4 C80.1844026,22.6209139 80.0053165,22.8 79.7844026,22.8 L78.7844026,22.8 C78.5634887,22.8 78.3844026,22.6209139 78.3844026,22.4 C78.3844026,22.1790861 78.5634887,22 78.7844026,22 L79.7844025,22 C80.0053165,22 80.1844026,22.1790861 80.1844026,22.4 Z M83.3844026,22.4 C83.3844026,22.6209139 83.2053165,22.8 82.9844026,22.8 L81.9844026,22.8 C81.7634887,22.8 81.5844026,22.6209139 81.5844026,22.4 C81.5844026,22.1790861 81.7634887,22 81.9844026,22 L82.9844025,22 C83.2053165,22 83.3844026,22.1790861 83.3844026,22.4 Z M86.5844026,22.4 C86.5844026,22.6209139 86.4053165,22.8 86.1844026,22.8 L85.1844026,22.8 C84.9634887,22.8 84.7844026,22.6209139 84.7844026,22.4 C84.7844026,22.1790861 84.9634887,22 85.1844026,22 L86.1844025,22 C86.4053165,22 86.5844026,22.1790861 86.5844026,22.4 Z M89.7844026,22.4 C89.7844026,22.6209139 89.6053165,22.8 89.3844026,22.8 L88.3844026,22.8 C88.1634887,22.8 87.9844026,22.6209139 87.9844026,22.4 C87.9844026,22.1790861 88.1634887,22 88.3844026,22 L89.3844025,22 C89.6053165,22 89.7844026,22.1790861 89.7844026,22.4 Z M92.9844026,22.4 C92.9844026,22.6209139 92.8053165,22.8 92.5844026,22.8 L91.5844026,22.8 C91.3634887,22.8 91.1844026,22.6209139 91.1844026,22.4 C91.1844026,22.1790861 91.3634887,22 91.5844026,22 L92.5844025,22 C92.8053165,22 92.9844026,22.1790861 92.9844026,22.4 Z M115.384403,22.4 C115.384403,22.6209139 115.205316,22.8 114.984403,22.8 L113.984403,22.8 C113.763489,22.8 113.584403,22.6209139 113.584403,22.4 C113.584403,22.1790861 113.763489,22 113.984403,22 L114.984403,22 C115.205316,22 115.384403,22.1790861 115.384403,22.4 Z M112.184403,22.4 C112.184403,22.6209139 112.005316,22.8 111.784403,22.8 L110.784403,22.8 C110.563489,22.8 110.384403,22.6209139 110.384403,22.4 C110.384403,22.1790861 110.563489,22 110.784403,22 L111.784403,22 C112.005316,22 112.184403,22.1790861 112.184403,22.4 Z M108.984403,22.4 C108.984403,22.6209139 108.805316,22.8 108.584403,22.8 L107.584403,22.8 C107.363489,22.8 107.184403,22.6209139 107.184403,22.4 C107.184403,22.1790861 107.363489,22 107.584403,22 L108.584403,22 C108.805316,22 108.984403,22.1790861 108.984403,22.4 Z M105.784403,22.4 C105.784403,22.6209139 105.605316,22.8 105.384403,22.8 L104.384403,22.8 C104.163489,22.8 103.984403,22.6209139 103.984403,22.4 C103.984403,22.1790861 104.163489,22 104.384403,22 L105.384402,22 C105.605316,22 105.784403,22.1790861 105.784403,22.4 Z M102.584403,22.4 C102.584403,22.6209139 102.405316,22.8 102.184403,22.8 L101.184403,22.8 C100.963489,22.8 100.784403,22.6209139 100.784403,22.4 C100.784403,22.1790861 100.963489,22 101.184403,22 L102.184402,22 C102.405316,22 102.584403,22.1790861 102.584403,22.4 Z M2.21445335,21.7146857 C2.47201296,21.8469101 2.7503283,21.9352706 3.04005994,21.9753362 C3.25889142,22.0055973 3.41175792,22.2075267 3.38149683,22.4263582 C3.35123574,22.6451896 3.14930631,22.7980562 2.93047483,22.7677951 C2.55112172,22.7153362 2.1863813,22.5995372 1.84908821,22.4263797 C1.65255935,22.3254869 1.575031,22.084379 1.6759238,21.8878502 C1.77681659,21.6913213 2.01792449,21.6137929 2.21445335,21.7146857 Z M118.085667,20.7967074 C118.266363,20.923797 118.30982,21.1733068 118.182731,21.3540032 C117.964109,21.6648408 117.694714,21.9373837 117.386455,22.1596216 C117.207257,22.2888142 116.957256,22.2482762 116.828063,22.0690774 C116.698871,21.8898786 116.739409,21.6398779 116.918607,21.5106853 C117.154633,21.3405235 117.360988,21.1317568 117.528371,20.8937713 C117.655461,20.7130749 117.904971,20.6696179 118.085667,20.7967074 Z M0.799990534,19.3738218 L0.800500418,19.4515966 C0.806078183,19.7390086 0.858259793,20.0190186 0.953961149,20.2838222 C1.02904737,20.4915841 0.921492483,20.7208778 0.713730576,20.795964 C0.505968669,20.8710502 0.276674954,20.7634953 0.201588729,20.5557334 C0.0762509298,20.2089265 0.00793261037,19.8423259 0.000585394996,19.4621397 L9.8794887e-06,19.3793853 C-0.00152643006,19.1584767 0.176309914,18.9781495 0.397218472,18.9766034 C0.61812703,18.9750769 0.798454225,19.1529133 0.799990534,19.3738218 Z M118.8,18.0077988 L118.8,19.0077988 C118.8,19.2287127 118.620914,19.4077988 118.4,19.4077988 C118.179086,19.4077988 118,19.2287127 118,19.0077988 L118,18.0077988 C118,17.7868849 118.179086,17.6077988 118.4,17.6077988 C118.620914,17.6077988 118.8,17.7868849 118.8,18.0077988 Z M0.800000207,16.1766036 L0.800000207,17.1766036 C0.800000207,17.3975175 0.620914107,17.5766036 0.400000207,17.5766036 C0.179086307,17.5766036 -0.0999997932,17.3975175 -0.0999997932,17.1766036 L-0.0999997932,16.1766036 C-0.0999997932,15.9556897 0.179086307,15.7766036 0.400000207,15.7766036 C0.620914107,15.7766036 0.800000207,15.9556897 0.800000207,16.1766036 Z M118.8,14.8077988 L118.8,15.8077988 C118.8,16.0287127 118.620914,16.2077988 118.4,16.2077988 C118.179086,16.2077988 118,16.0287127 118,15.8077988 L118,14.8077988 C118,14.5868849 118.179086,14.4077988 118.4,14.4077988 C118.620914,14.4077988 118.8,14.5868849 118.8,14.8077988 Z M0.800000207,12.9766036 L0.800000207,13.9766036 C0.800000207,14.1975175 0.620914107,14.3766036 0.400000207,14.3766036 C0.179086307,14.3766036 -0.0999997932,14.1975175 -0.0999997932,13.9766036 L-0.0999997932,12.9766036 C-0.0999997932,12.7556897 0.179086307,12.5766036 0.400000207,12.5766036 C0.620914107,12.5766036 0.800000207,12.7556897 0.800000207,12.9766036 Z M118.8,11.6077988 L118.8,12.6077988 C118.8,12.8287127 118.620914,13.0077988 118.4,13.0077988 C118.179086,13.0077988 118,12.8287127 118,12.6077988 L118,11.6077988 C118,11.3868849 118.179086,11.2077988 118.4,11.2077988 C118.620914,11.2077988 118.8,11.3868849 118.8,11.6077988 Z M0.800000207,9.77660357 L0.800000207,10.7766036 C0.800000207,10.9975175 0.620914107,11.1766036 0.400000207,11.1766036 C0.179086307,11.1766036 -0.0999997932,10.9975175 -0.0999997932,10.7766036 L-0.0999997932,9.77660357 C-0.0999997932,9.55568967 0.179086307,9.37660357 0.400000207,9.37660357 C0.620914107,9.37660357 0.800000207,9.55568967 0.800000207,9.77660357 Z M118.8,8.40779881 L118.8,9.40779881 C118.8,9.62871271 118.620914,9.80779881 118.4,9.80779881 C118.179086,9.80779881 118,9.62871271 118,9.40779881 L118,8.40779881 C118,8.18688491 118.179086,8.00779881 118.4,8.00779881 C118.620914,8.00779881 118.8,8.18688491 118.8,8.40779881 Z M0.800000207,6.57660357 L0.800000207,7.57660357 C0.800000207,7.79751746 0.620914107,7.97660357 0.400000207,7.97660357 C0.179086307,7.97660357 -0.0999997932,7.79751746 -0.0999997932,7.57660357 L-0.0999997932,6.57660357 C-0.0999997932,6.35568967 0.179086307,6.17660357 0.400000207,6.17660357 C0.620914107,6.17660357 0.800000207,6.35568967 0.800000207,6.57660357 Z M118.8,5.20779881 L118.8,6.20779881 C118.8,6.42871271 118.620914,6.60779881 118.4,6.60779881 C118.179086,6.60779881 118,6.42871271 118,6.20779881 L118,5.20779881 C118,4.98688491 118.179086,4.80779881 118.4,4.80779881 C118.620914,4.80779881 118.8,4.98688491 118.8,5.20779881 Z M0.800078494,3.37957686 C0.800013273,3.3897814 0.800013273,3.3897814 0.800000207,3.4 L0.800000207,4.37660357 C0.800000207,4.59751746 0.620914107,4.77660357 0.400000207,4.77660357 C0.179086307,4.77660357 2.06813115e-07,4.59751746 2.06813115e-07,4.37660357 L2.06813115e-07,3.4 C1.7193848e-05,3.38671374 1.7193848e-05,3.38671374 0.000102019275,3.37344172 C0.00179619156,3.15253431 0.182250423,2.97482688 0.403157827,2.97650904 C0.62406523,2.97821522 0.801772666,3.15866946 0.800078494,3.37957686 Z M118.421116,1.83887624 C118.595442,2.17563306 118.712485,2.54002136 118.766215,2.9191864 C118.79721,3.13791515 118.645022,3.34035621 118.426293,3.37135118 C118.207564,3.40234614 118.005123,3.25015784 117.974128,3.0314291 C117.933094,2.74185398 117.843784,2.46380655 117.710664,2.20665099 C117.609106,2.01046515 117.685816,1.76909592 117.882002,1.66753772 C118.078188,1.56597953 118.319557,1.6426904 118.421116,1.83887624 Z M2.37834391,0.550095989 C2.47987763,0.746294496 2.40313666,0.987654157 2.20693815,1.08918788 C1.94867982,1.22283788 1.71432455,1.39926119 1.51394916,1.61035064 C1.36185852,1.77057357 1.10867834,1.77716609 0.948455413,1.62507544 C0.788232485,1.4729848 0.781639967,1.21980462 0.933730612,1.05958169 C1.19539698,0.783924034 1.50154333,0.553456192 1.83925202,0.378690235 C2.03545052,0.277156512 2.27681019,0.353897482 2.37834391,0.550095989 Z M116.517808,0.188136945 C116.72645,0.260742276 116.836729,0.488738155 116.764124,0.697379994 C116.691519,0.906021834 116.463523,1.01630121 116.254881,0.943695883 C115.98269,0.84897599 115.695024,0.800014594 115.400132,0.800000003 C115.179218,0.799989073 115.000152,0.620894112 115.000152,0.399980212 C115.000152,0.179066313 115.179258,-1.09263043e-05 115.400172,4.24230608e-09 C115.784867,1.90385059e-05 116.161375,0.064101627 116.517808,0.188136945 Z M59.2000002,0.5 C59.2000002,0.7209139 59.0209141,0.9 58.8000002,0.9 L57.8000002,0.9 C57.5790863,0.9 57.4000002,0.7209139 57.4000002,0.5 C57.4000002,0.2790861 57.5790863,0 57.8000002,0 L58.8000002,0 C59.0209141,0 59.2000002,0.2790861 59.2000002,0.5 Z M4.80000021,0.5 C4.80000021,0.7209139 4.62091411,0.9 4.40000021,0.9 L3.40000021,0.9 C3.17908631,0.9 3.00000021,0.7209139 3.00000021,0.5 C3.00000021,0.2790861 3.17908631,0 3.40000021,0 L4.40000021,0 C4.62091411,0 4.80000021,0.2790861 4.80000021,0.5 Z M107.2,0.5 C107.2,0.7209139 107.020914,0.9 106.8,0.9 L105.8,0.9 C105.579086,0.9 105.4,0.7209139 105.4,0.5 C105.4,0.2790861 105.579086,0 105.8,0 L106.8,0 C107.020914,0 107.2,0.2790861 107.2,0.5 Z M104,0.5 C104,0.7209139 103.820914,0.9 103.6,0.9 L102.6,0.9 C102.379086,0.9 102.2,0.7209139 102.2,0.5 C102.2,0.2790861 102.379086,0 102.6,0 L103.6,0 C103.820914,0 104,0.2790861 104,0.5 Z M100.8,0.5 C100.8,0.7209139 100.620914,0.9 100.4,0.9 L99.4000002,0.9 C99.1790863,0.9 99.0000002,0.7209139 99.0000002,0.5 C99.0000002,0.2790861 99.1790863,0 99.4000002,0 L100.4,0 C100.620914,0 100.8,0.2790861 100.8,0.5 Z M97.6000002,0.5 C97.6000002,0.7209139 97.4209141,0.9 97.2000002,0.9 L96.2000002,0.9 C95.9790863,0.9 95.8000002,0.7209139 95.8000002,0.5 C95.8000002,0.2790861 95.9790863,0 96.2000002,0 L97.2000002,0 C97.4209141,0 97.6000002,0.2790861 97.6000002,0.5 Z M94.4000002,0.5 C94.4000002,0.7209139 94.2209141,0.9 94.0000002,0.9 L93.0000002,0.9 C92.7790863,0.9 92.6000002,0.7209139 92.6000002,0.5 C92.6000002,0.2790861 92.7790863,0 93.0000002,0 L94.0000002,0 C94.2209141,0 94.4000002,0.2790861 94.4000002,0.5 Z M91.2000002,0.5 C91.2000002,0.7209139 91.0209141,0.9 90.8000002,0.9 L89.8000002,0.9 C89.5790863,0.9 89.4000002,0.7209139 89.4000002,0.5 C89.4000002,0.2790861 89.5790863,0 89.8000002,0 L90.8000002,0 C91.0209141,0 91.2000002,0.2790861 91.2000002,0.5 Z M88.0000002,0.5 C88.0000002,0.7209139 87.8209141,0.9 87.6000002,0.9 L86.6000002,0.9 C86.3790863,0.9 86.2000002,0.7209139 86.2000002,0.5 C86.2000002,0.2790861 86.3790863,0 86.6000002,0 L87.6000002,0 C87.8209141,0 88.0000002,0.2790861 88.0000002,0.5 Z M84.8000002,0.5 C84.8000002,0.7209139 84.6209141,0.9 84.4000002,0.9 L83.4000002,0.9 C83.1790863,0.9 83.0000002,0.7209139 83.0000002,0.5 C83.0000002,0.2790861 83.1790863,0 83.4000002,0 L84.4000002,0 C84.6209141,0 84.8000002,0.2790861 84.8000002,0.5 Z M81.6000002,0.5 C81.6000002,0.7209139 81.4209141,0.9 81.2000002,0.9 L80.2000002,0.9 C79.9790863,0.9 79.8000002,0.7209139 79.8000002,0.5 C79.8000002,0.2790861 79.9790863,0 80.2000002,0 L81.2000002,0 C81.4209141,0 81.6000002,0.2790861 81.6000002,0.5 Z M78.4000002,0.5 C78.4000002,0.7209139 78.2209141,0.9 78.0000002,0.9 L77.0000002,0.9 C76.7790863,0.9 76.6000002,0.7209139 76.6000002,0.5 C76.6000002,0.2790861 76.7790863,0 77.0000002,0 L78.0000002,0 C78.2209141,0 78.4000002,0.2790861 78.4000002,0.5 Z M75.2000002,0.5 C75.2000002,0.7209139 75.0209141,0.9 74.8000002,0.9 L73.8000002,0.9 C73.5790863,0.9 73.4000002,0.7209139 73.4000002,0.5 C73.4000002,0.2790861 73.5790863,0 73.8000002,0 L74.8000002,0 C75.0209141,0 75.2000002,0.2790861 75.2000002,0.5 Z M72.0000002,0.5 C72.0000002,0.7209139 71.8209141,0.9 71.6000002,0.9 L70.6000002,0.9 C70.3790863,0.9 70.2000002,0.7209139 70.2000002,0.5 C70.2000002,0.2790861 70.3790863,0 70.6000002,0 L71.6000002,0 C71.8209141,0 72.0000002,0.2790861 72.0000002,0.5 Z M68.8000002,0.5 C68.8000002,0.7209139 68.6209141,0.9 68.4000002,0.9 L67.4000002,0.9 C67.1790863,0.9 67.0000002,0.7209139 67.0000002,0.5 C67.0000002,0.2790861 67.1790863,0 67.4000002,0 L68.4000002,0 C68.6209141,0 68.8000002,0.2790861 68.8000002,0.5 Z M65.6000002,0.5 C65.6000002,0.7209139 65.4209141,0.9 65.2000002,0.9 L64.2000002,0.9 C63.9790863,0.9 63.8000002,0.7209139 63.8000002,0.5 C63.8000002,0.2790861 63.9790863,0 64.2000002,0 L65.2000002,0 C65.4209141,0 65.6000002,0.2790861 65.6000002,0.5 Z M62.4000002,0.5 C62.4000002,0.7209139 62.2209141,0.9 62.0000002,0.9 L61.0000002,0.9 C60.7790863,0.9 60.6000002,0.7209139 60.6000002,0.5 C60.6000002,0.2790861 60.7790863,0 61.0000002,0 L62.0000002,0 C62.2209141,0 62.4000002,0.2790861 62.4000002,0.5 Z M113.6,0.5 C113.6,0.7209139 113.420914,0.9 113.2,0.9 L112.2,0.9 C111.979086,0.9 111.8,0.7209139 111.8,0.5 C111.8,0.2790861 111.979086,0 112.2,0 L113.2,0 C113.420914,0 113.6,0.2790861 113.6,0.5 Z M56.0000002,0.5 C56.0000002,0.7209139 55.8209141,0.9 55.6000002,0.9 L54.6000002,0.9 C54.3790863,0.9 54.2000002,0.7209139 54.2000002,0.5 C54.2000002,0.2790861 54.3790863,0 54.6000002,0 L55.6000002,0 C55.8209141,0 56.0000002,0.2790861 56.0000002,0.5 Z M52.8000002,0.5 C52.8000002,0.7209139 52.6209141,0.9 52.4000002,0.9 L51.4000002,0.9 C51.1790863,0.9 51.0000002,0.7209139 51.0000002,0.5 C51.0000002,0.2790861 51.1790863,0 51.4000002,0 L52.4000002,0 C52.6209141,0 52.8000002,0.2790861 52.8000002,0.5 Z M49.6000002,0.5 C49.6000002,0.7209139 49.4209141,0.9 49.2000002,0.9 L48.2000002,0.9 C47.9790863,0.9 47.8000002,0.7209139 47.8000002,0.5 C47.8000002,0.2790861 47.9790863,0 48.2000002,0 L49.2000002,0 C49.4209141,0 49.6000002,0.2790861 49.6000002,0.5 Z M46.4000002,0.5 C46.4000002,0.7209139 46.2209141,0.9 46.0000002,0.9 L45.0000002,0.9 C44.7790863,0.9 44.6000002,0.7209139 44.6000002,0.5 C44.6000002,0.2790861 44.7790863,0 45.0000002,0 L46.0000002,0 C46.2209141,0 46.4000002,0.2790861 46.4000002,0.5 Z M43.2000002,0.5 C43.2000002,0.7209139 43.0209141,0.9 42.8000002,0.9 L41.8000002,0.9 C41.5790863,0.9 41.4000002,0.7209139 41.4000002,0.5 C41.4000002,0.2790861 41.5790863,0 41.8000002,0 L42.8000002,0 C43.0209141,0 43.2000002,0.2790861 43.2000002,0.5 Z M40.0000002,0.5 C40.0000002,0.7209139 39.8209141,0.9 39.6000002,0.9 L38.6000002,0.9 C38.3790863,0.9 38.2000002,0.7209139 38.2000002,0.5 C38.2000002,0.2790861 38.3790863,0 38.6000002,0 L39.6000002,0 C39.8209141,0 40.0000002,0.2790861 40.0000002,0.5 Z M36.8000002,0.5 C36.8000002,0.7209139 36.6209141,0.9 36.4000002,0.9 L35.4000002,0.9 C35.1790863,0.9 35.0000002,0.7209139 35.0000002,0.5 C35.0000002,0.2790861 35.1790863,0 35.4000002,0 L36.4000002,0 C36.6209141,0 36.8000002,0.2790861 36.8000002,0.5 Z M33.6000002,0.5 C33.6000002,0.7209139 33.4209141,0.9 33.2000002,0.9 L32.2000002,0.9 C31.9790863,0.9 31.8000002,0.7209139 31.8000002,0.5 C31.8000002,0.2790861 31.9790863,0 32.2000002,0 L33.2000002,0 C33.4209141,0 33.6000002,0.2790861 33.6000002,0.5 Z M30.4000002,0.5 C30.4000002,0.7209139 30.2209141,0.9 30.0000002,0.9 L29.0000002,0.9 C28.7790863,0.9 28.6000002,0.7209139 28.6000002,0.5 C28.6000002,0.2790861 28.7790863,0 29.0000002,0 L30.0000002,0 C30.2209141,0 30.4000002,0.2790861 30.4000002,0.5 Z M27.2000002,0.5 C27.2000002,0.7209139 27.0209141,0.9 26.8000002,0.9 L25.8000002,0.9 C25.5790863,0.9 25.4000002,0.7209139 25.4000002,0.5 C25.4000002,0.2790861 25.5790863,0 25.8000002,0 L26.8000002,0 C27.0209141,0 27.2000002,0.2790861 27.2000002,0.5 Z M24.0000002,0.5 C24.0000002,0.7209139 23.8209141,0.9 23.6000002,0.9 L22.6000002,0.9 C22.3790863,0.9 22.2000002,0.7209139 22.2000002,0.5 C22.2000002,0.2790861 22.3790863,0 22.6000002,0 L23.6000002,0 C23.8209141,0 24.0000002,0.2790861 24.0000002,0.5 Z M20.8000002,0.5 C20.8000002,0.7209139 20.6209141,0.9 20.4000002,0.9 L19.4000002,0.9 C19.1790863,0.9 19.0000002,0.7209139 19.0000002,0.5 C19.0000002,0.2790861 19.1790863,0 19.4000002,0 L20.4000002,0 C20.6209141,0 20.8000002,0.2790861 20.8000002,0.5 Z M17.6000002,0.5 C17.6000002,0.7209139 17.4209141,0.9 17.2000002,0.9 L16.2000002,0.9 C15.9790863,0.9 15.8000002,0.7209139 15.8000002,0.5 C15.8000002,0.2790861 15.9790863,0 16.2000002,0 L17.2000002,0 C17.4209141,0 17.6000002,0.2790861 17.6000002,0.5 Z M14.4000002,0.5 C14.4000002,0.7209139 14.2209141,0.9 14.0000002,0.9 L13.0000002,0.9 C12.7790863,0.9 12.6000002,0.7209139 12.6000002,0.5 C12.6000002,0.2790861 12.7790863,0 13.0000002,0 L14.0000002,0 C14.2209141,0 14.4000002,0.2790861 14.4000002,0.5 Z M11.2000002,0.5 C11.2000002,0.7209139 11.0209141,0.9 10.8000002,0.9 L9.80000021,0.9 C9.57908631,0.9 9.40000021,0.7209139 9.40000021,0.5 C9.40000021,0.2790861 9.57908631,0 9.80000021,0 L10.8000002,0 C11.0209141,0 11.2000002,0.2790861 11.2000002,0.5 Z M8.00000021,0.5 C8.00000021,0.7209139 7.82091411,0.9 7.60000021,0.9 L6.60000021,0.9 C6.37908631,0.9 6.20000021,0.7209139 6.20000021,0.5 C6.20000021,0.2790861 6.37908631,0 6.60000021,0 L7.60000021,0 C7.82091411,0 8.00000021,0.2790861 8.00000021,0.5 Z M110.4,0.5 C110.4,0.7209139 110.220914,0.9 110,0.9 L109,0.9 C108.779086,0.9 108.6,0.7209139 108.6,0.5 C108.6,0.2790861 108.779086,0 109,0 L110,0 C110.220914,0 110.4,0.2790861 110.4,0.5 Z" id="vLLM-dash-box" fill="#8E8E8E" fill-rule="nonzero"></path>
+                </g>
+                <g id="row-top2" transform="translate(9.4, 33)">
+                    <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="32" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M53,0 C54.6568542,0 56,1.34314575 56,3 L56,9 C56,10.6568542 54.6568542,12 53,12 L35,12 C33.3431458,12 32,10.6568542 32,9 L32,3 C32,1.34314575 33.3431458,0 35,0 L53,0 Z M53,1 L35,1 L34.7955116,1.0103258 C33.7869995,1.11274576 33,1.96446609 33,3 L33,9 C33,10.1045695 33.8954305,11 35,11 L53,11 C54.1045695,11 55,10.1045695 55,9 L55,3 C55,1.8954305 54.1045695,1 53,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="78" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L81,12 C79.3431458,12 78,10.6568542 78,9 L78,3 C78,1.34314575 79.3431458,0 81,0 L99,0 Z M99,1 L81,1 L80.7955116,1.0103258 C79.7869995,1.11274576 79,1.96446609 79,3 L79,9 C79,10.1045695 79.8954305,11 81,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                </g>
+                <path d="M20.9000002,12.2222222 L23.4000002,17.2222222 L21.3990002,17.2222222 L21.4000002,32.9992391 C21.4000002,33.2753815 21.1761426,33.4992391 20.9000002,33.4992391 C20.6238578,33.4992391 20.4000002,33.2753815 20.4000002,32.9992391 L20.3990002,17.2222222 L18.4000002,17.2222222 L20.9000002,12.2222222 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <path d="M52.9000002,12.2222222 L55.4000002,17.2222222 L53.3990002,17.2222222 L53.4000002,32.9992391 C53.4000002,33.2753815 53.1761426,33.4992391 52.9000002,33.4992391 C52.6238578,33.4992391 52.4000002,33.2753815 52.4000002,32.9992391 L52.3990002,17.2222222 L50.4000002,17.2222222 L52.9000002,12.2222222 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <path d="M99.9000002,12.2222222 L102.4,17.2222222 L100.399,17.2222222 L100.4,32.9992391 C100.4,33.2753815 100.176143,33.4992391 99.9000002,33.4992391 C99.6238578,33.4992391 99.4000002,33.2753815 99.4000002,32.9992391 L99.3990002,17.2222222 L97.4000002,17.2222222 L99.9000002,12.2222222 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="row-top1" transform="translate(9.4, 0)">
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="32" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M53,0 C54.6568542,0 56,1.34314575 56,3 L56,9 C56,10.6568542 54.6568542,12 53,12 L35,12 C33.3431458,12 32,10.6568542 32,9 L32,3 C32,1.34314575 33.3431458,0 35,0 L53,0 Z M53,1 L35,1 L34.7955116,1.0103258 C33.7869995,1.11274576 33,1.96446609 33,3 L33,9 C33,10.1045695 33.8954305,11 35,11 L53,11 C54.1045695,11 55,10.1045695 55,9 L55,3 C55,1.8954305 54.1045695,1 53,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#30A2FF" x="78" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L81,12 C79.3431458,12 78,10.6568542 78,9 L78,3 C78,1.34314575 79.3431458,0 81,0 L99,0 Z M99,1 L81,1 L80.7955116,1.0103258 C79.7869995,1.11274576 79,1.96446609 79,3 L79,9 C79,10.1045695 79.8954305,11 81,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill="#30A2FF" fill-rule="nonzero"></path>
+                    <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="MEAN-Pooling" stroke-width="1" transform="translate(466, 70)">
+            <g id="body" transform="translate(0, 50)">
+                <g id="MEAN-Pooling" transform="translate(0, 186)" fill="#7D7D7D" fill-rule="nonzero">
+                    <path d="M26.6069336,11.7099609 L26.6069336,9.83544922 L24.737793,5.37207031 C24.6196289,5.06770833 24.4477539,4.83943685 24.222168,4.68725586 C23.996582,4.53507487 23.7530924,4.45898438 23.4916992,4.45898438 C23.1801758,4.45898438 22.9098307,4.5538737 22.6806641,4.74365234 C22.4514974,4.93343099 22.3369141,5.17692057 22.3369141,5.47412109 L22.3422852,11.3608398 C22.3422852,11.5398763 22.4112142,11.6786296 22.5490723,11.7770996 C22.6869303,11.8755697 22.8507487,11.9248047 23.0405273,11.9248047 C23.230306,11.9248047 23.3932292,11.8755697 23.5292969,11.7770996 C23.6653646,11.6786296 23.7333984,11.5398763 23.7333984,11.3608398 L23.7333984,6.15087891 L25.7260742,10.96875 C25.9122721,11.4628906 26.2058919,11.7099609 26.6069336,11.7099609 Z M26.6123047,11.7099609 C27.0097656,11.7099609 27.3051758,11.4628906 27.4985352,10.96875 L29.4858398,6.15087891 L29.4858398,11.3608398 C29.4858398,11.5398763 29.5547689,11.6786296 29.692627,11.7770996 C29.830485,11.8755697 29.9943034,11.9248047 30.184082,11.9248047 C30.3738607,11.9248047 30.537679,11.8755697 30.6755371,11.7770996 C30.8133952,11.6786296 30.8823242,11.5398763 30.8823242,11.3608398 L30.8823242,5.47412109 C30.8823242,5.17692057 30.7677409,4.93343099 30.5385742,4.74365234 C30.3094076,4.5538737 30.0408529,4.45898438 29.7329102,4.45898438 C29.4715169,4.45898438 29.2280273,4.53507487 29.0024414,4.68725586 C28.7768555,4.83943685 28.6049805,5.06770833 28.4868164,5.37207031 L26.6123047,9.83544922 L26.6123047,11.7099609 Z" id="Shape"></path>
+                    <path d="M33.5195312,12 L37.4995117,12 C37.6713867,12 37.8083496,11.9400228 37.9104004,11.8200684 C38.0124512,11.7001139 38.0634766,11.5595703 38.0634766,11.3984375 C38.0634766,11.2373047 38.0124512,11.0958659 37.9104004,10.9741211 C37.8083496,10.8523763 37.6713867,10.7915039 37.4995117,10.7915039 L34.2929688,10.7915039 L34.2929688,8.63769531 L37.1665039,8.63769531 C37.3419596,8.63769531 37.4825033,8.57861328 37.5881348,8.46044922 C37.6937663,8.34228516 37.746582,8.20263672 37.746582,8.04150391 C37.746582,7.88037109 37.6946615,7.74072266 37.5908203,7.62255859 C37.4869792,7.50439453 37.3455404,7.4453125 37.1665039,7.4453125 L34.2929688,7.4453125 L34.2929688,5.46875 L37.3759766,5.46875 C37.5514323,5.46875 37.6910807,5.4078776 37.7949219,5.28613281 C37.898763,5.16438802 37.9506836,5.02294922 37.9506836,4.86181641 C37.9506836,4.70068359 37.898763,4.55924479 37.7949219,4.4375 C37.6910807,4.31575521 37.5514323,4.25488281 37.3759766,4.25488281 L33.5571289,4.25488281 C33.3028971,4.25488281 33.1068522,4.33992513 32.9689941,4.51000977 C32.8311361,4.6800944 32.762207,4.8976237 32.762207,5.16259766 L32.762207,11.1352539 C32.762207,11.3608398 32.835612,11.5613607 32.9824219,11.7368164 C33.1292318,11.9122721 33.3082682,12 33.5195312,12 Z" id="Path"></path>
+                    <path d="M43.8857422,9.18017578 L41.2324219,9.18017578 L42.559082,5.640625 L43.8857422,9.18017578 Z M44.7558594,11.5058594 C44.8167318,11.6813151 44.9098307,11.8129069 45.0351562,11.9006348 C45.1604818,11.9883626 45.3001302,12.0322266 45.4541016,12.0322266 C45.6474609,12.0322266 45.8193359,11.9677734 45.9697266,11.8388672 C46.1201172,11.7099609 46.1953125,11.5470378 46.1953125,11.3500977 C46.1953125,11.2605794 46.1774089,11.1692708 46.1416016,11.0761719 L43.7836914,5.17333984 C43.6333008,4.77587891 43.4605306,4.50911458 43.2653809,4.37304688 C43.0702311,4.23697917 42.8347982,4.16894531 42.559082,4.16894531 C42.2833659,4.16894531 42.0479329,4.23697917 41.8527832,4.37304688 C41.6576335,4.50911458 41.4848633,4.77587891 41.3344727,5.17333984 L38.9765625,11.0761719 C38.9407552,11.1692708 38.9228516,11.2605794 38.9228516,11.3500977 C38.9228516,11.5470378 38.9980469,11.7099609 39.1484375,11.8388672 C39.2988281,11.9677734 39.4707031,12.0322266 39.6640625,12.0322266 C39.8180339,12.0322266 39.9576823,11.9883626 40.0830078,11.9006348 C40.2083333,11.8129069 40.3014323,11.6813151 40.3623047,11.5058594 L40.8457031,10.2167969 L44.2724609,10.2167969 L44.7558594,11.5058594 Z" id="Shape"></path>
+                    <path d="M48.059082,12 C48.2452799,12 48.4082031,11.9373372 48.5478516,11.8120117 C48.6875,11.6866862 48.7573242,11.523763 48.7573242,11.3232422 L48.7573242,6.56445313 L52.2646484,11.4199219 C52.3828125,11.5882161 52.5314128,11.7269694 52.7104492,11.8361816 C52.8894857,11.9453939 53.0559896,12 53.2099609,12 C53.4534505,12 53.6503906,11.9024251 53.8007812,11.7072754 C53.9511719,11.5121257 54.0263672,11.2605794 54.0263672,10.9526367 L54.0263672,4.93164062 C54.0263672,4.72753906 53.9574382,4.56282552 53.8195801,4.4375 C53.681722,4.31217448 53.5214844,4.24951172 53.3388672,4.24951172 C53.1526693,4.24951172 52.9906413,4.31306966 52.8527832,4.44018555 C52.7149251,4.56730143 52.6459961,4.73111979 52.6459961,4.93164062 L52.6459961,9.69042969 L49.1118164,4.79736328 C49.0008138,4.62906901 48.8584798,4.49658203 48.6848145,4.39990234 C48.5111491,4.30322266 48.34375,4.25488281 48.1826172,4.25488281 C47.9534505,4.25488281 47.7609863,4.33813477 47.6052246,4.50463867 C47.4494629,4.67114258 47.371582,4.8976237 47.371582,5.18408203 L47.371582,11.3232422 C47.371582,11.5273438 47.4405111,11.6911621 47.5783691,11.8146973 C47.7162272,11.9382324 47.8764648,12 48.059082,12 Z" id="Path"></path>
+                    <path d="M59.6606445,12.0053711 C59.8719076,12.0053711 60.0536296,11.9319661 60.2058105,11.7851562 C60.3579915,11.6383464 60.434082,11.444987 60.434082,11.2050781 L60.434082,8.92236328 L61.9326172,8.92236328 C63.891276,8.92236328 64.8706055,8.14176432 64.8706055,6.58056641 C64.8706055,6.15445964 64.804362,5.78833008 64.671875,5.48217773 C64.539388,5.17602539 64.3451335,4.93432617 64.0891113,4.75708008 C63.8330892,4.57983398 63.537679,4.45092773 63.2028809,4.37036133 C62.8680827,4.28979492 62.4786784,4.24951172 62.034668,4.24951172 L59.7250977,4.24951172 C59.4601237,4.24951172 59.2542318,4.33902995 59.1074219,4.51806641 C58.960612,4.69710286 58.887207,4.9226888 58.887207,5.19482422 L58.887207,11.2050781 C58.887207,11.444987 58.9641927,11.6383464 59.1181641,11.7851562 C59.2721354,11.9319661 59.4529622,12.0053711 59.6606445,12.0053711 Z M60.434082,7.80517578 L60.434082,5.42578125 L61.8681641,5.42578125 C62.1188151,5.42578125 62.3273926,5.44010417 62.4938965,5.46875 C62.6604004,5.49739583 62.8125814,5.55110677 62.9504395,5.62988281 C63.0882975,5.70865885 63.1894531,5.82682292 63.2539062,5.984375 C63.3183594,6.14192708 63.3505859,6.34065755 63.3505859,6.58056641 C63.3505859,6.82763672 63.3183594,7.03173828 63.2539062,7.19287109 C63.1894531,7.35400391 63.0882975,7.47843424 62.9504395,7.56616211 C62.8125814,7.65388997 62.6568197,7.71565755 62.4831543,7.75146484 C62.3094889,7.78727214 62.0919596,7.80517578 61.8305664,7.80517578 L60.434082,7.80517578 Z" id="Shape"></path>
+                    <path d="M68.3081055,11.1459961 C67.8461914,11.1459961 67.4863281,10.9848633 67.2285156,10.6625977 C66.9707031,10.340332 66.8417969,9.88916016 66.8417969,9.30908203 C66.8417969,8.72184245 66.9698079,8.26619466 67.2258301,7.94213867 C67.4818522,7.61808268 67.8426107,7.45605469 68.3081055,7.45605469 C68.7736003,7.45605469 69.1352539,7.61897786 69.3930664,7.94482422 C69.6508789,8.27067057 69.7797852,8.72542318 69.7797852,9.30908203 C69.7797852,9.88916016 69.6508789,10.340332 69.3930664,10.6625977 C69.1352539,10.9848633 68.7736003,11.1459961 68.3081055,11.1459961 Z M68.3081055,12.0644531 C68.6876628,12.0644531 69.0349935,12.0125326 69.3500977,11.9086914 C69.6652018,11.8048503 69.931071,11.666097 70.1477051,11.4924316 C70.3643392,11.3187663 70.5469564,11.1128743 70.6955566,10.8747559 C70.8441569,10.6366374 70.9533691,10.3868815 71.0231934,10.1254883 C71.0930176,9.86409505 71.1279297,9.59195964 71.1279297,9.30908203 C71.1279297,9.01188151 71.0912272,8.72721354 71.0178223,8.45507812 C70.9444173,8.18294271 70.8307292,7.92960612 70.6767578,7.69506836 C70.5227865,7.4605306 70.3365885,7.2582194 70.1181641,7.08813477 C69.8997396,6.91805013 69.6356608,6.78377279 69.3259277,6.68530273 C69.0161947,6.58683268 68.6769206,6.53759766 68.3081055,6.53759766 C67.9321289,6.53759766 67.5874837,6.58862305 67.2741699,6.69067383 C66.9608561,6.79272461 66.6958822,6.93147786 66.479248,7.10693359 C66.2626139,7.28238932 66.0791016,7.48828125 65.9287109,7.72460937 C65.7783203,7.9609375 65.6682129,8.21248372 65.5983887,8.47924805 C65.5285645,8.74601237 65.4936523,9.0226237 65.4936523,9.30908203 C65.4936523,9.67073568 65.550944,10.0126953 65.6655273,10.3349609 C65.7801107,10.6572266 65.9475098,10.949056 66.1677246,11.2104492 C66.3879395,11.4718424 66.6824544,11.6795247 67.0512695,11.8334961 C67.4200846,11.9874674 67.8390299,12.0644531 68.3081055,12.0644531 Z" id="Shape"></path>
+                    <path d="M74.8017578,11.1459961 C74.3398438,11.1459961 73.9799805,10.9848633 73.722168,10.6625977 C73.4643555,10.340332 73.3354492,9.88916016 73.3354492,9.30908203 C73.3354492,8.72184245 73.4634603,8.26619466 73.7194824,7.94213867 C73.9755046,7.61808268 74.336263,7.45605469 74.8017578,7.45605469 C75.2672526,7.45605469 75.6289062,7.61897786 75.8867188,7.94482422 C76.1445312,8.27067057 76.2734375,8.72542318 76.2734375,9.30908203 C76.2734375,9.88916016 76.1445312,10.340332 75.8867188,10.6625977 C75.6289062,10.9848633 75.2672526,11.1459961 74.8017578,11.1459961 Z M74.8017578,12.0644531 C75.1813151,12.0644531 75.5286458,12.0125326 75.84375,11.9086914 C76.1588542,11.8048503 76.4247233,11.666097 76.6413574,11.4924316 C76.8579915,11.3187663 77.0406087,11.1128743 77.189209,10.8747559 C77.3378092,10.6366374 77.4470215,10.3868815 77.5168457,10.1254883 C77.5866699,9.86409505 77.621582,9.59195964 77.621582,9.30908203 C77.621582,9.01188151 77.5848796,8.72721354 77.5114746,8.45507812 C77.4380697,8.18294271 77.3243815,7.92960612 77.1704102,7.69506836 C77.0164388,7.4605306 76.8302409,7.2582194 76.6118164,7.08813477 C76.3933919,6.91805013 76.1293132,6.78377279 75.8195801,6.68530273 C75.509847,6.58683268 75.1705729,6.53759766 74.8017578,6.53759766 C74.4257812,6.53759766 74.0811361,6.58862305 73.7678223,6.69067383 C73.4545085,6.79272461 73.1895345,6.93147786 72.9729004,7.10693359 C72.7562663,7.28238932 72.5727539,7.48828125 72.4223633,7.72460937 C72.2719727,7.9609375 72.1618652,8.21248372 72.092041,8.47924805 C72.0222168,8.74601237 71.9873047,9.0226237 71.9873047,9.30908203 C71.9873047,9.67073568 72.0445964,10.0126953 72.1591797,10.3349609 C72.273763,10.6572266 72.4411621,10.949056 72.661377,11.2104492 C72.8815918,11.4718424 73.1761068,11.6795247 73.5449219,11.8334961 C73.913737,11.9874674 74.3326823,12.0644531 74.8017578,12.0644531 Z" id="Shape"></path>
+                    <path d="M79.453125,12 C79.6536458,12 79.8192546,11.9337565 79.9499512,11.8012695 C80.0806478,11.6687826 80.1459961,11.484375 80.1459961,11.2480469 L80.1459961,5.01220703 C80.1459961,4.77587891 80.081543,4.59147135 79.9526367,4.45898438 C79.8237305,4.3264974 79.6608073,4.26025391 79.4638672,4.26025391 C79.2669271,4.26025391 79.1057943,4.3264974 78.9804688,4.45898438 C78.8551432,4.59147135 78.7924805,4.77587891 78.7924805,5.01220703 L78.7924805,11.2480469 C78.7924805,11.4879557 78.854248,11.6732585 78.9777832,11.8039551 C79.1013184,11.9346517 79.2597656,12 79.453125,12 Z" id="Path"></path>
+                    <path d="M82.2783203,12 C82.4752604,12 82.6372884,11.9337565 82.7644043,11.8012695 C82.8915202,11.6687826 82.9550781,11.484375 82.9550781,11.2480469 L82.9550781,7.37011719 C82.9550781,7.13020833 82.8915202,6.94401042 82.7644043,6.81152344 C82.6372884,6.67903646 82.4752604,6.61279297 82.2783203,6.61279297 C82.0813802,6.61279297 81.9202474,6.67903646 81.7949219,6.81152344 C81.6695964,6.94401042 81.6069336,7.13020833 81.6069336,7.37011719 L81.6069336,11.2480469 C81.6069336,11.4879557 81.6695964,11.6732585 81.7949219,11.8039551 C81.9202474,11.9346517 82.0813802,12 82.2783203,12 Z M82.2783203,5.53857422 C82.5110677,5.53857422 82.6999512,5.46964518 82.8449707,5.33178711 C82.9899902,5.19392904 83.0625,5.0139974 83.0625,4.79199219 C83.0625,4.56998698 82.9908854,4.39095052 82.8476562,4.25488281 C82.7044271,4.1188151 82.5164388,4.05078125 82.2836914,4.05078125 C82.0473633,4.05078125 81.8566895,4.1188151 81.7116699,4.25488281 C81.5666504,4.39095052 81.4941406,4.56998698 81.4941406,4.79199219 C81.4941406,5.0139974 81.5666504,5.19392904 81.7116699,5.33178711 C81.8566895,5.46964518 82.0455729,5.53857422 82.2783203,5.53857422 Z" id="Shape"></path>
+                    <path d="M84.9907227,12 C85.1876628,12 85.3496908,11.9346517 85.4768066,11.8039551 C85.6039225,11.6732585 85.6674805,11.4915365 85.6674805,11.2587891 L85.6674805,8.98681641 C85.6674805,8.5320638 85.7981771,8.1632487 86.0595703,7.88037109 C86.3209635,7.59749349 86.6306966,7.45605469 86.9887695,7.45605469 C87.2859701,7.45605469 87.5303548,7.55362956 87.7219238,7.7487793 C87.9134928,7.94392904 88.0092773,8.22591146 88.0092773,8.59472656 L88.0092773,11.2587891 C88.0092773,11.4915365 88.0719401,11.6732585 88.1972656,11.8039551 C88.3225911,11.9346517 88.4801432,12 88.6699219,12 C88.8704427,12 89.0351562,11.9346517 89.1640625,11.8039551 C89.2929688,11.6732585 89.3574219,11.4915365 89.3574219,11.2587891 L89.3574219,8.60009766 C89.3574219,8.25634766 89.3055013,7.95019531 89.2016602,7.68164063 C89.097819,7.41308594 88.9563802,7.19734701 88.7773438,7.03442383 C88.5983073,6.87150065 88.3951009,6.74796549 88.1677246,6.66381836 C87.9403483,6.57967122 87.6977539,6.53759766 87.4399414,6.53759766 C87.0209961,6.53759766 86.6602376,6.6163737 86.357666,6.77392578 C86.0550944,6.93147786 85.8250326,7.1624349 85.6674805,7.46679688 L85.6674805,7.27880859 C85.6674805,7.06754557 85.6048177,6.90372721 85.4794922,6.78735352 C85.3541667,6.67097982 85.1948242,6.61279297 85.0014648,6.61279297 C84.8045247,6.61279297 84.6416016,6.671875 84.5126953,6.79003906 C84.3837891,6.90820312 84.3193359,7.07470703 84.3193359,7.28955078 L84.3193359,11.2587891 C84.3193359,11.4915365 84.3819987,11.6732585 84.5073242,11.8039551 C84.6326497,11.9346517 84.7937826,12 84.9907227,12 Z" id="Path"></path>
+                    <path d="M94.5996094,11.9301758 C94.5996094,12.4386393 94.4599609,12.8173014 94.1806641,13.0661621 C93.9013672,13.3150228 93.5110677,13.4394531 93.0097656,13.4394531 C92.8665365,13.4394531 92.7206217,13.4269206 92.5720215,13.4018555 C92.4234212,13.3767904 92.3115234,13.3535156 92.2363281,13.3320313 C92.1611328,13.3105469 92.0429688,13.272054 91.8818359,13.2165527 C91.7207031,13.1610514 91.6240234,13.1279297 91.5917969,13.1171875 C91.5345052,13.0957031 91.4772135,13.0849609 91.4199219,13.0849609 C91.2945964,13.0849609 91.1907552,13.1315104 91.1083984,13.2246094 C91.0260417,13.3177083 90.9848633,13.4251302 90.9848633,13.546875 C90.9848633,13.71875 91.0672201,13.8548177 91.2319336,13.9550781 C91.425293,14.0732422 91.6965332,14.1726074 92.0456543,14.2531738 C92.3947754,14.3337402 92.7609049,14.3740234 93.144043,14.3740234 C93.9890951,14.3740234 94.6577962,14.1520182 95.1501465,13.7080078 C95.6424967,13.2639974 95.8886719,12.6176758 95.8886719,11.769043 L95.8886719,7.36474609 C95.8886719,7.12841797 95.8286947,6.94401042 95.7087402,6.81152344 C95.5887858,6.67903646 95.4357096,6.61279297 95.2495117,6.61279297 C95.0847982,6.61279297 94.9442546,6.66202799 94.8278809,6.76049805 C94.7115072,6.8589681 94.6443685,6.99951172 94.6264648,7.18212891 L94.6264648,7.42919922 C94.4474284,7.14274089 94.234375,6.92431641 93.9873047,6.77392578 C93.7402344,6.62353516 93.4000651,6.54833984 92.9667969,6.54833984 C92.1897786,6.54833984 91.5747884,6.80704753 91.1218262,7.32446289 C90.6688639,7.84187826 90.4423828,8.51416016 90.4423828,9.34130859 C90.4423828,10.1612956 90.6751302,10.8103027 91.140625,11.2883301 C91.6061198,11.7663574 92.2255859,12.0053711 92.9990234,12.0053711 C93.7617188,12.0053711 94.2952474,11.7171224 94.5996094,11.140625 L94.5996094,11.9301758 Z M93.2407227,11.1083984 C92.8325195,11.101237 92.4932454,10.9463704 92.2229004,10.6437988 C91.9525553,10.3412272 91.8173828,9.89453125 91.8173828,9.30371094 C91.8173828,9.09960938 91.8334961,8.90893555 91.8657227,8.73168945 C91.8979492,8.55444336 91.9498698,8.38614909 92.0214844,8.22680664 C92.093099,8.06746419 92.1826172,7.93139648 92.2900391,7.81860352 C92.3974609,7.70581055 92.5317383,7.61629232 92.6928711,7.55004883 C92.8540039,7.48380534 93.0348307,7.45068359 93.2353516,7.45068359 C94.1448568,7.45068359 94.5996094,8.0719401 94.5996094,9.31445312 C94.5996094,9.92675781 94.4760742,10.3770345 94.2290039,10.6652832 C93.9819336,10.9535319 93.6525065,11.101237 93.2407227,11.1083984 Z" id="Shape"></path>
+                </g>
+                <g id="vLLM-dash-box-Copy" transform="translate(0, 22)" xlink:href="#path-6">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" id="vLLM-dash-box" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L77.999,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L76.999,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M32.5,-0.277777778 L35,4.72222222 L32.999,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L31.999,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253504e-06,16.2761424 -1.49253504e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="1" transform="translate(8, 158)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="2" transform="translate(40, 158)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="3" transform="translate(86, 158)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L1" transform="translate(8, 42)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 70)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 98)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L4" transform="translate(8, 126)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="..." transform="translate(70.7, 163)" fill="#8E8E8E">
+                    <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(0, 28)">
+                <g id="dash-box" transform="translate(-1.4, -0.4)">
+                    <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="vLLM-dash-box" fill="#F7F7F7"></path>
+                    <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="vLLM-dash-box" fill="#8E8E8E" fill-rule="nonzero"></path>
+                </g>
+                <g id="..." transform="translate(70.7, 11)" fill="#8E8E8E">
+                    <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                </g>
+                <g id="1" transform="translate(8, 5)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="2" transform="translate(40, 5)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="3" transform="translate(86, 5)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <path d="M59.3307741,-15.7917554 C62.4712809,-11.4212425 65.7404947,-9.26615514 69.1397849,-9.26615514 C73.8898307,-9.26615514 73.8898307,-9.26615514 81.3607764,-9.26615514 C83.1052904,-9.26615514 83.1052904,-9.26615514 84.8713515,-9.26615514 C92.8047541,-9.26615514 92.8047541,-9.26615514 98.7849462,-9.26615514 C105.274325,-9.26615514 109.5,-4.56459639 109.5,8.8299931e-06 C109.5,0.276142375 109.276142,0.50000883 109,0.50000883 C108.723858,0.50000883 108.5,0.276142375 108.5,8.8299931e-06 C108.5,-4.03469391 104.69684,-8.26615514 98.7849462,-8.26615514 C92.8047541,-8.26615514 92.8047541,-8.26615514 84.8713515,-8.26615514 C83.1052904,-8.26615514 83.1052904,-8.26615514 81.3607764,-8.26615514 C73.8898307,-8.26615514 73.8898307,-8.26615514 69.1397849,-8.26615514 C65.5122906,-8.26615514 62.0962993,-10.4281171 58.8936783,-14.704646 C55.1460058,-10.4249168 51.7634204,-8.26615514 48.7096774,-8.26615514 C43.6745799,-8.26615514 43.6745799,-8.26615514 34.9592194,-8.26615514 C34.0042849,-8.26615514 34.0042849,-8.26615514 33.0456022,-8.26615514 C25.0739267,-8.26615514 25.0739267,-8.26615514 19.2572712,-8.26615514 C13.3850931,-8.26615514 9.5,-4.21458987 9.5,8.8299931e-06 C9.5,0.276142375 9.27614237,0.50000883 9,0.50000883 C8.72385763,0.50000883 8.5,0.276142375 8.5,8.8299931e-06 C8.5,-4.75770619 12.8232042,-9.26615514 19.2572712,-9.26615514 C25.0739267,-9.26615514 25.0739267,-9.26615514 33.0456022,-9.26615514 C34.0042849,-9.26615514 34.0042849,-9.26615514 34.9592194,-9.26615514 C43.6745799,-9.26615514 43.6745799,-9.26615514 48.7096774,-9.26615514 C51.5132414,-9.26615514 54.804443,-11.4357371 58.5441665,-15.8242864 C58.7547447,-16.071399 59.1413192,-16.0554118 59.3307741,-15.7917554 Z" id="Path" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="top" transform="translate(47, -28)" xlink:href="#path-7" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" fill-rule="evenodd" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+        </g>
+        <g id="LAST-Pooling" stroke-width="1" transform="translate(298, 70)">
+            <g id="Body">
+                <g id="LAST-Pooling" transform="translate(0, 236)" fill="#7D7D7D" fill-rule="nonzero">
+                    <path d="M26.0778809,12 L29.869873,12 C30.0453288,12 30.1858724,11.9391276 30.2915039,11.8173828 C30.3971354,11.695638 30.4499512,11.5541992 30.4499512,11.3930664 C30.4499512,11.2283529 30.3971354,11.0842285 30.2915039,10.9606934 C30.1858724,10.8371582 30.0453288,10.7753906 29.869873,10.7753906 L26.8405762,10.7753906 L26.8352051,5.06054688 C26.8352051,4.82063802 26.7600098,4.62727865 26.6096191,4.48046875 C26.4592285,4.33365885 26.2801921,4.26025391 26.0725098,4.26025391 C25.8612467,4.26025391 25.6786296,4.33365885 25.5246582,4.48046875 C25.3706868,4.62727865 25.2937012,4.82063802 25.2937012,5.06054688 L25.2937012,11.0600586 C25.2937012,11.3178711 25.3671061,11.5389811 25.513916,11.7233887 C25.6607259,11.9077962 25.8487142,12 26.0778809,12 Z" id="Path"></path>
+                    <path d="M35.7995605,9.18017578 L33.1462402,9.18017578 L34.4729004,5.640625 L35.7995605,9.18017578 Z M36.6696777,11.5058594 C36.7305501,11.6813151 36.8236491,11.8129069 36.9489746,11.9006348 C37.0743001,11.9883626 37.2139486,12.0322266 37.3679199,12.0322266 C37.5612793,12.0322266 37.7331543,11.9677734 37.8835449,11.8388672 C38.0339355,11.7099609 38.1091309,11.5470378 38.1091309,11.3500977 C38.1091309,11.2605794 38.0912272,11.1692708 38.0554199,11.0761719 L35.6975098,5.17333984 C35.5471191,4.77587891 35.374349,4.50911458 35.1791992,4.37304688 C34.9840495,4.23697917 34.7486165,4.16894531 34.4729004,4.16894531 C34.1971842,4.16894531 33.9617513,4.23697917 33.7666016,4.37304688 C33.5714518,4.50911458 33.3986816,4.77587891 33.248291,5.17333984 L30.8903809,11.0761719 C30.8545736,11.1692708 30.8366699,11.2605794 30.8366699,11.3500977 C30.8366699,11.5470378 30.9118652,11.7099609 31.0622559,11.8388672 C31.2126465,11.9677734 31.3845215,12.0322266 31.5778809,12.0322266 C31.7318522,12.0322266 31.8715007,11.9883626 31.9968262,11.9006348 C32.1221517,11.8129069 32.2152507,11.6813151 32.276123,11.5058594 L32.7595215,10.2167969 L36.1862793,10.2167969 L36.6696777,11.5058594 Z" id="Shape"></path>
+                    <path d="M41.9387207,12.1396484 C42.8768717,12.1396484 43.624349,11.9292806 44.1811523,11.5085449 C44.7379557,11.0878092 45.0163574,10.5157878 45.0163574,9.79248047 C45.0163574,9.48095703 44.9698079,9.20613607 44.876709,8.96801758 C44.78361,8.72989909 44.6394857,8.52311198 44.4443359,8.34765625 C44.2491862,8.17220052 44.0289714,8.02360026 43.7836914,7.90185547 C43.5384115,7.78011068 43.236735,7.66194661 42.8786621,7.54736328 C42.8607585,7.54020182 42.5993652,7.46321615 42.0944824,7.31640625 C41.5895996,7.16959635 41.3282064,7.09261068 41.3103027,7.08544922 C41.041748,6.99593099 40.8421224,6.88045247 40.7114258,6.73901367 C40.5807292,6.59757487 40.5153809,6.4140625 40.5153809,6.18847656 C40.5153809,6.01302083 40.5583496,5.86263021 40.6442871,5.73730469 C40.7302246,5.61197917 40.850179,5.51529948 41.0041504,5.44726562 C41.1581217,5.37923177 41.3255208,5.32999674 41.5063477,5.29956055 C41.6871745,5.26912435 41.8885905,5.25390625 42.1105957,5.25390625 C42.7801921,5.25390625 43.4032389,5.41861979 43.9797363,5.74804688 C44.0728353,5.79817708 44.1587728,5.82324219 44.2375488,5.82324219 C44.3879395,5.82324219 44.5168457,5.75789388 44.6242676,5.62719727 C44.7316895,5.49650065 44.7854004,5.35058594 44.7854004,5.18945312 C44.7854004,5.03190104 44.7298991,4.90299479 44.6188965,4.80273438 C44.361084,4.58072917 43.9904785,4.3972168 43.5070801,4.25219727 C43.0236816,4.10717773 42.5259603,4.03466797 42.013916,4.03466797 C41.1366374,4.03466797 40.4213867,4.2351888 39.8681641,4.63623047 C39.3149414,5.03727214 39.0383301,5.5797526 39.0383301,6.26367187 C39.0383301,6.79361979 39.1735026,7.21346029 39.4438477,7.52319336 C39.7141927,7.83292643 40.1555176,8.09342448 40.7678223,8.3046875 L42.5402832,8.89550781 C42.873291,9.00292969 43.118571,9.1336263 43.276123,9.28759766 C43.4336751,9.44156901 43.5124512,9.6546224 43.5124512,9.92675781 C43.5124512,10.2561849 43.3665365,10.5050456 43.074707,10.6733398 C42.7828776,10.8416341 42.4024251,10.9257812 41.9333496,10.9257812 C41.1993001,10.9257812 40.4974772,10.7252604 39.8278809,10.3242188 C39.7670085,10.2884115 39.7043457,10.2705078 39.6398926,10.2705078 C39.4859212,10.2705078 39.3453776,10.3555501 39.2182617,10.5256348 C39.0911458,10.6957194 39.0275879,10.8684896 39.0275879,11.0439453 C39.0275879,11.1764323 39.0687663,11.273112 39.151123,11.3339844 C39.8422038,11.8710937 40.771403,12.1396484 41.9387207,12.1396484 Z" id="Path"></path>
+                    <path d="M48.7277832,12.0053711 C48.9390462,12.0053711 49.1207682,11.931071 49.2729492,11.7824707 C49.4251302,11.6338704 49.5012207,11.4396159 49.5012207,11.199707 L49.5012207,5.47412109 L51.2468262,5.47412109 C51.4258626,5.47412109 51.5690918,5.4132487 51.6765137,5.29150391 C51.7839355,5.16975911 51.8376465,5.02652995 51.8376465,4.86181641 C51.8376465,4.70068359 51.7830404,4.55834961 51.6738281,4.43481445 C51.5646159,4.3112793 51.4222819,4.24951172 51.2468262,4.24951172 L46.2087402,4.24951172 C46.0297038,4.24951172 45.8855794,4.3112793 45.7763672,4.43481445 C45.6671549,4.55834961 45.6125488,4.70247396 45.6125488,4.8671875 C45.6125488,5.03190104 45.6662598,5.17423503 45.7736816,5.29418945 C45.8811035,5.41414388 46.026123,5.47412109 46.2087402,5.47412109 L47.9597168,5.47412109 L47.9597168,11.199707 C47.9597168,11.4396159 48.0358073,11.6338704 48.1879883,11.7824707 C48.3401693,11.931071 48.5201009,12.0053711 48.7277832,12.0053711 Z" id="Path"></path>
+                    <path d="M56.7038574,12.0053711 C56.9151204,12.0053711 57.0968424,11.9319661 57.2490234,11.7851562 C57.4012044,11.6383464 57.4772949,11.444987 57.4772949,11.2050781 L57.4772949,8.92236328 L58.9758301,8.92236328 C60.9344889,8.92236328 61.9138184,8.14176432 61.9138184,6.58056641 C61.9138184,6.15445964 61.8475749,5.78833008 61.7150879,5.48217773 C61.5826009,5.17602539 61.3883464,4.93432617 61.1323242,4.75708008 C60.8763021,4.57983398 60.5808919,4.45092773 60.2460938,4.37036133 C59.9112956,4.28979492 59.5218913,4.24951172 59.0778809,4.24951172 L56.7683105,4.24951172 C56.5033366,4.24951172 56.2974447,4.33902995 56.1506348,4.51806641 C56.0038249,4.69710286 55.9304199,4.9226888 55.9304199,5.19482422 L55.9304199,11.2050781 C55.9304199,11.444987 56.0074056,11.6383464 56.161377,11.7851562 C56.3153483,11.9319661 56.4961751,12.0053711 56.7038574,12.0053711 Z M57.4772949,7.80517578 L57.4772949,5.42578125 L58.911377,5.42578125 C59.162028,5.42578125 59.3706055,5.44010417 59.5371094,5.46875 C59.7036133,5.49739583 59.8557943,5.55110677 59.9936523,5.62988281 C60.1315104,5.70865885 60.232666,5.82682292 60.2971191,5.984375 C60.3615723,6.14192708 60.3937988,6.34065755 60.3937988,6.58056641 C60.3937988,6.82763672 60.3615723,7.03173828 60.2971191,7.19287109 C60.232666,7.35400391 60.1315104,7.47843424 59.9936523,7.56616211 C59.8557943,7.65388997 59.7000326,7.71565755 59.5263672,7.75146484 C59.3527018,7.78727214 59.1351725,7.80517578 58.8737793,7.80517578 L57.4772949,7.80517578 Z" id="Shape"></path>
+                    <path d="M65.3513184,11.1459961 C64.8894043,11.1459961 64.529541,10.9848633 64.2717285,10.6625977 C64.013916,10.340332 63.8850098,9.88916016 63.8850098,9.30908203 C63.8850098,8.72184245 64.0130208,8.26619466 64.269043,7.94213867 C64.5250651,7.61808268 64.8858236,7.45605469 65.3513184,7.45605469 C65.8168132,7.45605469 66.1784668,7.61897786 66.4362793,7.94482422 C66.6940918,8.27067057 66.822998,8.72542318 66.822998,9.30908203 C66.822998,9.88916016 66.6940918,10.340332 66.4362793,10.6625977 C66.1784668,10.9848633 65.8168132,11.1459961 65.3513184,11.1459961 Z M65.3513184,12.0644531 C65.7308757,12.0644531 66.0782064,12.0125326 66.3933105,11.9086914 C66.7084147,11.8048503 66.9742839,11.666097 67.190918,11.4924316 C67.4075521,11.3187663 67.5901693,11.1128743 67.7387695,10.8747559 C67.8873698,10.6366374 67.996582,10.3868815 68.0664062,10.1254883 C68.1362305,9.86409505 68.1711426,9.59195964 68.1711426,9.30908203 C68.1711426,9.01188151 68.1344401,8.72721354 68.0610352,8.45507812 C67.9876302,8.18294271 67.8739421,7.92960612 67.7199707,7.69506836 C67.5659993,7.4605306 67.3798014,7.2582194 67.161377,7.08813477 C66.9429525,6.91805013 66.6788737,6.78377279 66.3691406,6.68530273 C66.0594076,6.58683268 65.7201335,6.53759766 65.3513184,6.53759766 C64.9753418,6.53759766 64.6306966,6.58862305 64.3173828,6.69067383 C64.004069,6.79272461 63.7390951,6.93147786 63.5224609,7.10693359 C63.3058268,7.28238932 63.1223145,7.48828125 62.9719238,7.72460937 C62.8215332,7.9609375 62.7114258,8.21248372 62.6416016,8.47924805 C62.5717773,8.74601237 62.5368652,9.0226237 62.5368652,9.30908203 C62.5368652,9.67073568 62.5941569,10.0126953 62.7087402,10.3349609 C62.8233236,10.6572266 62.9907227,10.949056 63.2109375,11.2104492 C63.4311523,11.4718424 63.7256673,11.6795247 64.0944824,11.8334961 C64.4632975,11.9874674 64.8822428,12.0644531 65.3513184,12.0644531 Z" id="Shape"></path>
+                    <path d="M71.8449707,11.1459961 C71.3830566,11.1459961 71.0231934,10.9848633 70.7653809,10.6625977 C70.5075684,10.340332 70.3786621,9.88916016 70.3786621,9.30908203 C70.3786621,8.72184245 70.5066732,8.26619466 70.7626953,7.94213867 C71.0187174,7.61808268 71.3794759,7.45605469 71.8449707,7.45605469 C72.3104655,7.45605469 72.6721191,7.61897786 72.9299316,7.94482422 C73.1877441,8.27067057 73.3166504,8.72542318 73.3166504,9.30908203 C73.3166504,9.88916016 73.1877441,10.340332 72.9299316,10.6625977 C72.6721191,10.9848633 72.3104655,11.1459961 71.8449707,11.1459961 Z M71.8449707,12.0644531 C72.224528,12.0644531 72.5718587,12.0125326 72.8869629,11.9086914 C73.2020671,11.8048503 73.4679362,11.666097 73.6845703,11.4924316 C73.9012044,11.3187663 74.0838216,11.1128743 74.2324219,10.8747559 C74.3810221,10.6366374 74.4902344,10.3868815 74.5600586,10.1254883 C74.6298828,9.86409505 74.6647949,9.59195964 74.6647949,9.30908203 C74.6647949,9.01188151 74.6280924,8.72721354 74.5546875,8.45507812 C74.4812826,8.18294271 74.3675944,7.92960612 74.213623,7.69506836 C74.0596517,7.4605306 73.8734538,7.2582194 73.6550293,7.08813477 C73.4366048,6.91805013 73.172526,6.78377279 72.862793,6.68530273 C72.5530599,6.58683268 72.2137858,6.53759766 71.8449707,6.53759766 C71.4689941,6.53759766 71.124349,6.58862305 70.8110352,6.69067383 C70.4977214,6.79272461 70.2327474,6.93147786 70.0161133,7.10693359 C69.7994792,7.28238932 69.6159668,7.48828125 69.4655762,7.72460937 C69.3151855,7.9609375 69.2050781,8.21248372 69.1352539,8.47924805 C69.0654297,8.74601237 69.0305176,9.0226237 69.0305176,9.30908203 C69.0305176,9.67073568 69.0878092,10.0126953 69.2023926,10.3349609 C69.3169759,10.6572266 69.484375,10.949056 69.7045898,11.2104492 C69.9248047,11.4718424 70.2193197,11.6795247 70.5881348,11.8334961 C70.9569499,11.9874674 71.3758952,12.0644531 71.8449707,12.0644531 Z" id="Shape"></path>
+                    <path d="M76.4963379,12 C76.6968587,12 76.8624674,11.9337565 76.9931641,11.8012695 C77.1238607,11.6687826 77.189209,11.484375 77.189209,11.2480469 L77.189209,5.01220703 C77.189209,4.77587891 77.1247559,4.59147135 76.9958496,4.45898438 C76.8669434,4.3264974 76.7040202,4.26025391 76.5070801,4.26025391 C76.31014,4.26025391 76.1490072,4.3264974 76.0236816,4.45898438 C75.8983561,4.59147135 75.8356934,4.77587891 75.8356934,5.01220703 L75.8356934,11.2480469 C75.8356934,11.4879557 75.8974609,11.6732585 76.0209961,11.8039551 C76.1445312,11.9346517 76.3029785,12 76.4963379,12 Z" id="Path"></path>
+                    <path d="M79.3215332,12 C79.5184733,12 79.6805013,11.9337565 79.8076172,11.8012695 C79.9347331,11.6687826 79.998291,11.484375 79.998291,11.2480469 L79.998291,7.37011719 C79.998291,7.13020833 79.9347331,6.94401042 79.8076172,6.81152344 C79.6805013,6.67903646 79.5184733,6.61279297 79.3215332,6.61279297 C79.1245931,6.61279297 78.9634603,6.67903646 78.8381348,6.81152344 C78.7128092,6.94401042 78.6501465,7.13020833 78.6501465,7.37011719 L78.6501465,11.2480469 C78.6501465,11.4879557 78.7128092,11.6732585 78.8381348,11.8039551 C78.9634603,11.9346517 79.1245931,12 79.3215332,12 Z M79.3215332,5.53857422 C79.5542806,5.53857422 79.7431641,5.46964518 79.8881836,5.33178711 C80.0332031,5.19392904 80.1057129,5.0139974 80.1057129,4.79199219 C80.1057129,4.56998698 80.0340983,4.39095052 79.8908691,4.25488281 C79.74764,4.1188151 79.5596517,4.05078125 79.3269043,4.05078125 C79.0905762,4.05078125 78.8999023,4.1188151 78.7548828,4.25488281 C78.6098633,4.39095052 78.5373535,4.56998698 78.5373535,4.79199219 C78.5373535,5.0139974 78.6098633,5.19392904 78.7548828,5.33178711 C78.8999023,5.46964518 79.0887858,5.53857422 79.3215332,5.53857422 Z" id="Shape"></path>
+                    <path d="M82.0339355,12 C82.2308757,12 82.3929036,11.9346517 82.5200195,11.8039551 C82.6471354,11.6732585 82.7106934,11.4915365 82.7106934,11.2587891 L82.7106934,8.98681641 C82.7106934,8.5320638 82.84139,8.1632487 83.1027832,7.88037109 C83.3641764,7.59749349 83.6739095,7.45605469 84.0319824,7.45605469 C84.3291829,7.45605469 84.5735677,7.55362956 84.7651367,7.7487793 C84.9567057,7.94392904 85.0524902,8.22591146 85.0524902,8.59472656 L85.0524902,11.2587891 C85.0524902,11.4915365 85.115153,11.6732585 85.2404785,11.8039551 C85.365804,11.9346517 85.5233561,12 85.7131348,12 C85.9136556,12 86.0783691,11.9346517 86.2072754,11.8039551 C86.3361816,11.6732585 86.4006348,11.4915365 86.4006348,11.2587891 L86.4006348,8.60009766 C86.4006348,8.25634766 86.3487142,7.95019531 86.244873,7.68164063 C86.1410319,7.41308594 85.9995931,7.19734701 85.8205566,7.03442383 C85.6415202,6.87150065 85.4383138,6.74796549 85.2109375,6.66381836 C84.9835612,6.57967122 84.7409668,6.53759766 84.4831543,6.53759766 C84.064209,6.53759766 83.7034505,6.6163737 83.4008789,6.77392578 C83.0983073,6.93147786 82.8682454,7.1624349 82.7106934,7.46679688 L82.7106934,7.27880859 C82.7106934,7.06754557 82.6480306,6.90372721 82.5227051,6.78735352 C82.3973796,6.67097982 82.2380371,6.61279297 82.0446777,6.61279297 C81.8477376,6.61279297 81.6848145,6.671875 81.5559082,6.79003906 C81.427002,6.90820312 81.3625488,7.07470703 81.3625488,7.28955078 L81.3625488,11.2587891 C81.3625488,11.4915365 81.4252116,11.6732585 81.5505371,11.8039551 C81.6758626,11.9346517 81.8369954,12 82.0339355,12 Z" id="Path"></path>
+                    <path d="M91.6428223,11.9301758 C91.6428223,12.4386393 91.5031738,12.8173014 91.223877,13.0661621 C90.9445801,13.3150228 90.5542806,13.4394531 90.0529785,13.4394531 C89.9097493,13.4394531 89.7638346,13.4269206 89.6152344,13.4018555 C89.4666341,13.3767904 89.3547363,13.3535156 89.279541,13.3320313 C89.2043457,13.3105469 89.0861816,13.272054 88.9250488,13.2165527 C88.763916,13.1610514 88.6672363,13.1279297 88.6350098,13.1171875 C88.5777181,13.0957031 88.5204264,13.0849609 88.4631348,13.0849609 C88.3378092,13.0849609 88.2339681,13.1315104 88.1516113,13.2246094 C88.0692546,13.3177083 88.0280762,13.4251302 88.0280762,13.546875 C88.0280762,13.71875 88.1104329,13.8548177 88.2751465,13.9550781 C88.4685059,14.0732422 88.7397461,14.1726074 89.0888672,14.2531738 C89.4379883,14.3337402 89.8041178,14.3740234 90.1872559,14.3740234 C91.0323079,14.3740234 91.7010091,14.1520182 92.1933594,13.7080078 C92.6857096,13.2639974 92.9318848,12.6176758 92.9318848,11.769043 L92.9318848,7.36474609 C92.9318848,7.12841797 92.8719076,6.94401042 92.7519531,6.81152344 C92.6319987,6.67903646 92.4789225,6.61279297 92.2927246,6.61279297 C92.1280111,6.61279297 91.9874674,6.66202799 91.8710938,6.76049805 C91.7547201,6.8589681 91.6875814,6.99951172 91.6696777,7.18212891 L91.6696777,7.42919922 C91.4906413,7.14274089 91.2775879,6.92431641 91.0305176,6.77392578 C90.7834473,6.62353516 90.443278,6.54833984 90.0100098,6.54833984 C89.2329915,6.54833984 88.6180013,6.80704753 88.1650391,7.32446289 C87.7120768,7.84187826 87.4855957,8.51416016 87.4855957,9.34130859 C87.4855957,10.1612956 87.7183431,10.8103027 88.1838379,11.2883301 C88.6493327,11.7663574 89.2687988,12.0053711 90.0422363,12.0053711 C90.8049316,12.0053711 91.3384603,11.7171224 91.6428223,11.140625 L91.6428223,11.9301758 Z M90.2839355,11.1083984 C89.8757324,11.101237 89.5364583,10.9463704 89.2661133,10.6437988 C88.9957682,10.3412272 88.8605957,9.89453125 88.8605957,9.30371094 C88.8605957,9.09960938 88.876709,8.90893555 88.9089355,8.73168945 C88.9411621,8.55444336 88.9930827,8.38614909 89.0646973,8.22680664 C89.1363118,8.06746419 89.2258301,7.93139648 89.333252,7.81860352 C89.4406738,7.70581055 89.5749512,7.61629232 89.736084,7.55004883 C89.8972168,7.48380534 90.0780436,7.45068359 90.2785645,7.45068359 C91.1880697,7.45068359 91.6428223,8.0719401 91.6428223,9.31445312 C91.6428223,9.92675781 91.5192871,10.3770345 91.2722168,10.6652832 C91.0251465,10.9535319 90.6957194,11.101237 90.2839355,11.1083984 Z" id="Shape"></path>
+                </g>
+                <g id="vLLM-dash-box" transform="translate(0, 72)" xlink:href="#path-8">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 50)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L77.999,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L76.999,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M32.5,-0.277777778 L35,4.72222222 L32.999,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L31.999,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253509e-06,16.2761424 -1.49253509e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253509e-06,16.2761424 -1.49253509e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9988985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9988985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253509e-06,16.2761424 -1.49253509e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 5.21804822e-14,42.2761424 5.21804822e-14,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="1" transform="translate(8, 208)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="2" transform="translate(40, 208)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="3" transform="translate(86, 208)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L1" transform="translate(8, 92)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 120)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 148)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L4" transform="translate(8, 176)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="..." transform="translate(70.7, 213)" fill="#8E8E8E">
+                    <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(0, 12.0363)">
+                <g id="MEAN-pooling" transform="translate(0, 15.9637)">
+                    <g id="dash-box" transform="translate(-1.4, -0.4)">
+                        <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="vLLM-dash-box" fill="#F7F7F7"></path>
+                        <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="vLLM-dash-box" fill="#8E8E8E" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(70.7, 11)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <g id="1" transform="translate(8, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(40, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(86, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <path d="M97.5,0.185947956 L100,5.18594796 L97.999,5.18594796 L98,20.4637121 C98,20.7398544 97.7761424,20.9637121 97.5,20.9637121 C97.2238576,20.9637121 97,20.7398544 97,20.4637121 L96.999,5.18594796 L95,5.18594796 L97.5,0.185947956 Z" id="arrow-top" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="top-@01_yellow" transform="translate(86, -12.0363)" xlink:href="#path-9" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" fill-rule="evenodd" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+        </g>
+        <g id="CLS-Pooling" stroke-width="1" transform="translate(130, 70)">
+            <g id="body">
+                <g id="CLS-Pooling" transform="translate(0, 236)" fill="#7D7D7D" fill-rule="nonzero">
+                    <path d="M32.1660156,12.1396484 C33.0683594,12.1396484 33.8328451,11.9301758 34.4594727,11.5112305 C34.6599935,11.371582 34.7602539,11.1907552 34.7602539,10.96875 C34.7602539,10.8111979 34.7092285,10.6715495 34.6071777,10.5498047 C34.505127,10.4280599 34.3806966,10.3671875 34.2338867,10.3671875 C34.1586914,10.3671875 34.0834961,10.3868815 34.0083008,10.4262695 C33.4353841,10.737793 32.9018555,10.8935547 32.4077148,10.8935547 C31.6629232,10.8935547 31.0926921,10.6509603 30.6970215,10.1657715 C30.3013509,9.68058268 30.1035156,8.99039714 30.1035156,8.09521484 C30.1035156,7.15348307 30.3004557,6.44986979 30.6943359,5.984375 C31.0882161,5.51888021 31.6486003,5.28613281 32.3754883,5.28613281 C32.5151367,5.28613281 32.6494141,5.29508464 32.7783203,5.31298828 C32.9072266,5.33089193 33.0289714,5.35685221 33.1435547,5.39086914 C33.258138,5.42488607 33.3521322,5.45532227 33.4255371,5.48217773 C33.4989421,5.5090332 33.5866699,5.54573568 33.6887207,5.59228516 C33.7907715,5.63883464 33.8561198,5.66748047 33.8847656,5.67822266 C33.9599609,5.70686849 34.0279948,5.72119141 34.0888672,5.72119141 C34.25,5.72119141 34.3896484,5.65673828 34.5078125,5.52783203 C34.6259766,5.39892578 34.6850586,5.25211589 34.6850586,5.08740234 C34.6850586,4.85107422 34.5615234,4.6648763 34.3144531,4.52880859 C33.655599,4.19938151 32.9842122,4.03466797 32.300293,4.03466797 C31.7560221,4.03466797 31.2502441,4.13045247 30.782959,4.32202148 C30.3156738,4.51359049 29.9119466,4.78304036 29.5717773,5.13037109 C29.2316081,5.47770182 28.9648438,5.90917969 28.7714844,6.42480469 C28.578125,6.94042969 28.4814453,7.50976562 28.4814453,8.1328125 C28.4814453,8.9313151 28.6416829,9.63582357 28.9621582,10.2463379 C29.2826335,10.8568522 29.7212728,11.3250326 30.2780762,11.6508789 C30.8348796,11.9767253 31.4641927,12.1396484 32.1660156,12.1396484 Z" id="Path"></path>
+                    <path d="M36.9140625,12 L40.7060547,12 C40.8815104,12 41.022054,11.9391276 41.1276855,11.8173828 C41.2333171,11.695638 41.2861328,11.5541992 41.2861328,11.3930664 C41.2861328,11.2283529 41.2333171,11.0842285 41.1276855,10.9606934 C41.022054,10.8371582 40.8815104,10.7753906 40.7060547,10.7753906 L37.6767578,10.7753906 L37.6713867,5.06054688 C37.6713867,4.82063802 37.5961914,4.62727865 37.4458008,4.48046875 C37.2954102,4.33365885 37.1163737,4.26025391 36.9086914,4.26025391 C36.6974284,4.26025391 36.5148112,4.33365885 36.3608398,4.48046875 C36.2068685,4.62727865 36.1298828,4.82063802 36.1298828,5.06054688 L36.1298828,11.0600586 C36.1298828,11.3178711 36.2032878,11.5389811 36.3500977,11.7233887 C36.4969076,11.9077962 36.6848958,12 36.9140625,12 Z" id="Path"></path>
+                    <path d="M44.8901367,12.1396484 C45.8282878,12.1396484 46.575765,11.9292806 47.1325684,11.5085449 C47.6893717,11.0878092 47.9677734,10.5157878 47.9677734,9.79248047 C47.9677734,9.48095703 47.921224,9.20613607 47.828125,8.96801758 C47.735026,8.72989909 47.5909017,8.52311198 47.395752,8.34765625 C47.2006022,8.17220052 46.9803874,8.02360026 46.7351074,7.90185547 C46.4898275,7.78011068 46.188151,7.66194661 45.8300781,7.54736328 C45.8121745,7.54020182 45.5507812,7.46321615 45.0458984,7.31640625 C44.5410156,7.16959635 44.2796224,7.09261068 44.2617188,7.08544922 C43.9931641,6.99593099 43.7935384,6.88045247 43.6628418,6.73901367 C43.5321452,6.59757487 43.4667969,6.4140625 43.4667969,6.18847656 C43.4667969,6.01302083 43.5097656,5.86263021 43.5957031,5.73730469 C43.6816406,5.61197917 43.8015951,5.51529948 43.9555664,5.44726562 C44.1095378,5.37923177 44.2769368,5.32999674 44.4577637,5.29956055 C44.6385905,5.26912435 44.8400065,5.25390625 45.0620117,5.25390625 C45.7316081,5.25390625 46.3546549,5.41861979 46.9311523,5.74804688 C47.0242513,5.79817708 47.1101888,5.82324219 47.1889648,5.82324219 C47.3393555,5.82324219 47.4682617,5.75789388 47.5756836,5.62719727 C47.6831055,5.49650065 47.7368164,5.35058594 47.7368164,5.18945312 C47.7368164,5.03190104 47.6813151,4.90299479 47.5703125,4.80273438 C47.3125,4.58072917 46.9418945,4.3972168 46.4584961,4.25219727 C45.9750977,4.10717773 45.4773763,4.03466797 44.965332,4.03466797 C44.0880534,4.03466797 43.3728027,4.2351888 42.8195801,4.63623047 C42.2663574,5.03727214 41.9897461,5.5797526 41.9897461,6.26367187 C41.9897461,6.79361979 42.1249186,7.21346029 42.3952637,7.52319336 C42.6656087,7.83292643 43.1069336,8.09342448 43.7192383,8.3046875 L45.4916992,8.89550781 C45.824707,9.00292969 46.069987,9.1336263 46.2275391,9.28759766 C46.3850911,9.44156901 46.4638672,9.6546224 46.4638672,9.92675781 C46.4638672,10.2561849 46.3179525,10.5050456 46.026123,10.6733398 C45.7342936,10.8416341 45.3538411,10.9257812 44.8847656,10.9257812 C44.1507161,10.9257812 43.4488932,10.7252604 42.7792969,10.3242188 C42.7184245,10.2884115 42.6557617,10.2705078 42.5913086,10.2705078 C42.4373372,10.2705078 42.2967936,10.3555501 42.1696777,10.5256348 C42.0425618,10.6957194 41.9790039,10.8684896 41.9790039,11.0439453 C41.9790039,11.1764323 42.0201823,11.273112 42.1025391,11.3339844 C42.7936198,11.8710937 43.722819,12.1396484 44.8901367,12.1396484 Z" id="Path"></path>
+                    <path d="M53.1938477,12.0053711 C53.4051107,12.0053711 53.5868327,11.9319661 53.7390137,11.7851562 C53.8911947,11.6383464 53.9672852,11.444987 53.9672852,11.2050781 L53.9672852,8.92236328 L55.4658203,8.92236328 C57.4244792,8.92236328 58.4038086,8.14176432 58.4038086,6.58056641 C58.4038086,6.15445964 58.3375651,5.78833008 58.2050781,5.48217773 C58.0725911,5.17602539 57.8783366,4.93432617 57.6223145,4.75708008 C57.3662923,4.57983398 57.0708822,4.45092773 56.736084,4.37036133 C56.4012858,4.28979492 56.0118815,4.24951172 55.5678711,4.24951172 L53.2583008,4.24951172 C52.9933268,4.24951172 52.7874349,4.33902995 52.640625,4.51806641 C52.4938151,4.69710286 52.4204102,4.9226888 52.4204102,5.19482422 L52.4204102,11.2050781 C52.4204102,11.444987 52.4973958,11.6383464 52.6513672,11.7851562 C52.8053385,11.9319661 52.9861654,12.0053711 53.1938477,12.0053711 Z M53.9672852,7.80517578 L53.9672852,5.42578125 L55.4013672,5.42578125 C55.6520182,5.42578125 55.8605957,5.44010417 56.0270996,5.46875 C56.1936035,5.49739583 56.3457845,5.55110677 56.4836426,5.62988281 C56.6215007,5.70865885 56.7226562,5.82682292 56.7871094,5.984375 C56.8515625,6.14192708 56.8837891,6.34065755 56.8837891,6.58056641 C56.8837891,6.82763672 56.8515625,7.03173828 56.7871094,7.19287109 C56.7226562,7.35400391 56.6215007,7.47843424 56.4836426,7.56616211 C56.3457845,7.65388997 56.1900228,7.71565755 56.0163574,7.75146484 C55.8426921,7.78727214 55.6251628,7.80517578 55.3637695,7.80517578 L53.9672852,7.80517578 Z" id="Shape"></path>
+                    <path d="M61.8413086,11.1459961 C61.3793945,11.1459961 61.0195312,10.9848633 60.7617188,10.6625977 C60.5039062,10.340332 60.375,9.88916016 60.375,9.30908203 C60.375,8.72184245 60.5030111,8.26619466 60.7590332,7.94213867 C61.0150553,7.61808268 61.3758138,7.45605469 61.8413086,7.45605469 C62.3068034,7.45605469 62.668457,7.61897786 62.9262695,7.94482422 C63.184082,8.27067057 63.3129883,8.72542318 63.3129883,9.30908203 C63.3129883,9.88916016 63.184082,10.340332 62.9262695,10.6625977 C62.668457,10.9848633 62.3068034,11.1459961 61.8413086,11.1459961 Z M61.8413086,12.0644531 C62.2208659,12.0644531 62.5681966,12.0125326 62.8833008,11.9086914 C63.1984049,11.8048503 63.4642741,11.666097 63.6809082,11.4924316 C63.8975423,11.3187663 64.0801595,11.1128743 64.2287598,10.8747559 C64.37736,10.6366374 64.4865723,10.3868815 64.5563965,10.1254883 C64.6262207,9.86409505 64.6611328,9.59195964 64.6611328,9.30908203 C64.6611328,9.01188151 64.6244303,8.72721354 64.5510254,8.45507812 C64.4776204,8.18294271 64.3639323,7.92960612 64.2099609,7.69506836 C64.0559896,7.4605306 63.8697917,7.2582194 63.6513672,7.08813477 C63.4329427,6.91805013 63.1688639,6.78377279 62.8591309,6.68530273 C62.5493978,6.58683268 62.2101237,6.53759766 61.8413086,6.53759766 C61.465332,6.53759766 61.1206868,6.58862305 60.807373,6.69067383 C60.4940592,6.79272461 60.2290853,6.93147786 60.0124512,7.10693359 C59.7958171,7.28238932 59.6123047,7.48828125 59.4619141,7.72460937 C59.3115234,7.9609375 59.201416,8.21248372 59.1315918,8.47924805 C59.0617676,8.74601237 59.0268555,9.0226237 59.0268555,9.30908203 C59.0268555,9.67073568 59.0841471,10.0126953 59.1987305,10.3349609 C59.3133138,10.6572266 59.4807129,10.949056 59.7009277,11.2104492 C59.9211426,11.4718424 60.2156576,11.6795247 60.5844727,11.8334961 C60.9532878,11.9874674 61.3722331,12.0644531 61.8413086,12.0644531 Z" id="Shape"></path>
+                    <path d="M68.3349609,11.1459961 C67.8730469,11.1459961 67.5131836,10.9848633 67.2553711,10.6625977 C66.9975586,10.340332 66.8686523,9.88916016 66.8686523,9.30908203 C66.8686523,8.72184245 66.9966634,8.26619466 67.2526855,7.94213867 C67.5087077,7.61808268 67.8694661,7.45605469 68.3349609,7.45605469 C68.8004557,7.45605469 69.1621094,7.61897786 69.4199219,7.94482422 C69.6777344,8.27067057 69.8066406,8.72542318 69.8066406,9.30908203 C69.8066406,9.88916016 69.6777344,10.340332 69.4199219,10.6625977 C69.1621094,10.9848633 68.8004557,11.1459961 68.3349609,11.1459961 Z M68.3349609,12.0644531 C68.7145182,12.0644531 69.061849,12.0125326 69.3769531,11.9086914 C69.6920573,11.8048503 69.9579264,11.666097 70.1745605,11.4924316 C70.3911947,11.3187663 70.5738118,11.1128743 70.7224121,10.8747559 C70.8710124,10.6366374 70.9802246,10.3868815 71.0500488,10.1254883 C71.119873,9.86409505 71.1547852,9.59195964 71.1547852,9.30908203 C71.1547852,9.01188151 71.1180827,8.72721354 71.0446777,8.45507812 C70.9712728,8.18294271 70.8575846,7.92960612 70.7036133,7.69506836 C70.5496419,7.4605306 70.363444,7.2582194 70.1450195,7.08813477 C69.9265951,6.91805013 69.6625163,6.78377279 69.3527832,6.68530273 C69.0430501,6.58683268 68.703776,6.53759766 68.3349609,6.53759766 C67.9589844,6.53759766 67.6143392,6.58862305 67.3010254,6.69067383 C66.9877116,6.79272461 66.7227376,6.93147786 66.5061035,7.10693359 C66.2894694,7.28238932 66.105957,7.48828125 65.9555664,7.72460937 C65.8051758,7.9609375 65.6950684,8.21248372 65.6252441,8.47924805 C65.5554199,8.74601237 65.5205078,9.0226237 65.5205078,9.30908203 C65.5205078,9.67073568 65.5777995,10.0126953 65.6923828,10.3349609 C65.8069661,10.6572266 65.9743652,10.949056 66.1945801,11.2104492 C66.4147949,11.4718424 66.7093099,11.6795247 67.078125,11.8334961 C67.4469401,11.9874674 67.8658854,12.0644531 68.3349609,12.0644531 Z" id="Shape"></path>
+                    <path d="M72.9863281,12 C73.186849,12 73.3524577,11.9337565 73.4831543,11.8012695 C73.6138509,11.6687826 73.6791992,11.484375 73.6791992,11.2480469 L73.6791992,5.01220703 C73.6791992,4.77587891 73.6147461,4.59147135 73.4858398,4.45898438 C73.3569336,4.3264974 73.1940104,4.26025391 72.9970703,4.26025391 C72.8001302,4.26025391 72.6389974,4.3264974 72.5136719,4.45898438 C72.3883464,4.59147135 72.3256836,4.77587891 72.3256836,5.01220703 L72.3256836,11.2480469 C72.3256836,11.4879557 72.3874512,11.6732585 72.5109863,11.8039551 C72.6345215,11.9346517 72.7929688,12 72.9863281,12 Z" id="Path"></path>
+                    <path d="M75.8115234,12 C76.0084635,12 76.1704915,11.9337565 76.2976074,11.8012695 C76.4247233,11.6687826 76.4882812,11.484375 76.4882812,11.2480469 L76.4882812,7.37011719 C76.4882812,7.13020833 76.4247233,6.94401042 76.2976074,6.81152344 C76.1704915,6.67903646 76.0084635,6.61279297 75.8115234,6.61279297 C75.6145833,6.61279297 75.4534505,6.67903646 75.328125,6.81152344 C75.2027995,6.94401042 75.1401367,7.13020833 75.1401367,7.37011719 L75.1401367,11.2480469 C75.1401367,11.4879557 75.2027995,11.6732585 75.328125,11.8039551 C75.4534505,11.9346517 75.6145833,12 75.8115234,12 Z M75.8115234,5.53857422 C76.0442708,5.53857422 76.2331543,5.46964518 76.3781738,5.33178711 C76.5231934,5.19392904 76.5957031,5.0139974 76.5957031,4.79199219 C76.5957031,4.56998698 76.5240885,4.39095052 76.3808594,4.25488281 C76.2376302,4.1188151 76.0496419,4.05078125 75.8168945,4.05078125 C75.5805664,4.05078125 75.3898926,4.1188151 75.244873,4.25488281 C75.0998535,4.39095052 75.0273438,4.56998698 75.0273438,4.79199219 C75.0273438,5.0139974 75.0998535,5.19392904 75.244873,5.33178711 C75.3898926,5.46964518 75.578776,5.53857422 75.8115234,5.53857422 Z" id="Shape"></path>
+                    <path d="M78.5239258,12 C78.7208659,12 78.8828939,11.9346517 79.0100098,11.8039551 C79.1371257,11.6732585 79.2006836,11.4915365 79.2006836,11.2587891 L79.2006836,8.98681641 C79.2006836,8.5320638 79.3313802,8.1632487 79.5927734,7.88037109 C79.8541667,7.59749349 80.1638997,7.45605469 80.5219727,7.45605469 C80.8191732,7.45605469 81.0635579,7.55362956 81.255127,7.7487793 C81.446696,7.94392904 81.5424805,8.22591146 81.5424805,8.59472656 L81.5424805,11.2587891 C81.5424805,11.4915365 81.6051432,11.6732585 81.7304688,11.8039551 C81.8557943,11.9346517 82.0133464,12 82.203125,12 C82.4036458,12 82.5683594,11.9346517 82.6972656,11.8039551 C82.8261719,11.6732585 82.890625,11.4915365 82.890625,11.2587891 L82.890625,8.60009766 C82.890625,8.25634766 82.8387044,7.95019531 82.7348633,7.68164063 C82.6310221,7.41308594 82.4895833,7.19734701 82.3105469,7.03442383 C82.1315104,6.87150065 81.928304,6.74796549 81.7009277,6.66381836 C81.4735514,6.57967122 81.230957,6.53759766 80.9731445,6.53759766 C80.5541992,6.53759766 80.1934408,6.6163737 79.8908691,6.77392578 C79.5882975,6.93147786 79.3582357,7.1624349 79.2006836,7.46679688 L79.2006836,7.27880859 C79.2006836,7.06754557 79.1380208,6.90372721 79.0126953,6.78735352 C78.8873698,6.67097982 78.7280273,6.61279297 78.534668,6.61279297 C78.3377279,6.61279297 78.1748047,6.671875 78.0458984,6.79003906 C77.9169922,6.90820312 77.8525391,7.07470703 77.8525391,7.28955078 L77.8525391,11.2587891 C77.8525391,11.4915365 77.9152018,11.6732585 78.0405273,11.8039551 C78.1658529,11.9346517 78.3269857,12 78.5239258,12 Z" id="Path"></path>
+                    <path d="M88.1328125,11.9301758 C88.1328125,12.4386393 87.9931641,12.8173014 87.7138672,13.0661621 C87.4345703,13.3150228 87.0442708,13.4394531 86.5429688,13.4394531 C86.3997396,13.4394531 86.2538249,13.4269206 86.1052246,13.4018555 C85.9566243,13.3767904 85.8447266,13.3535156 85.7695312,13.3320313 C85.6943359,13.3105469 85.5761719,13.272054 85.4150391,13.2165527 C85.2539062,13.1610514 85.1572266,13.1279297 85.125,13.1171875 C85.0677083,13.0957031 85.0104167,13.0849609 84.953125,13.0849609 C84.8277995,13.0849609 84.7239583,13.1315104 84.6416016,13.2246094 C84.5592448,13.3177083 84.5180664,13.4251302 84.5180664,13.546875 C84.5180664,13.71875 84.6004232,13.8548177 84.7651367,13.9550781 C84.9584961,14.0732422 85.2297363,14.1726074 85.5788574,14.2531738 C85.9279785,14.3337402 86.2941081,14.3740234 86.6772461,14.3740234 C87.5222982,14.3740234 88.1909993,14.1520182 88.6833496,13.7080078 C89.1756999,13.2639974 89.421875,12.6176758 89.421875,11.769043 L89.421875,7.36474609 C89.421875,7.12841797 89.3618978,6.94401042 89.2419434,6.81152344 C89.1219889,6.67903646 88.9689128,6.61279297 88.7827148,6.61279297 C88.6180013,6.61279297 88.4774577,6.66202799 88.361084,6.76049805 C88.2447103,6.8589681 88.1775716,6.99951172 88.159668,7.18212891 L88.159668,7.42919922 C87.9806315,7.14274089 87.7675781,6.92431641 87.5205078,6.77392578 C87.2734375,6.62353516 86.9332682,6.54833984 86.5,6.54833984 C85.7229818,6.54833984 85.1079915,6.80704753 84.6550293,7.32446289 C84.2020671,7.84187826 83.9755859,8.51416016 83.9755859,9.34130859 C83.9755859,10.1612956 84.2083333,10.8103027 84.6738281,11.2883301 C85.1393229,11.7663574 85.7587891,12.0053711 86.5322266,12.0053711 C87.2949219,12.0053711 87.8284505,11.7171224 88.1328125,11.140625 L88.1328125,11.9301758 Z M86.7739258,11.1083984 C86.3657227,11.101237 86.0264486,10.9463704 85.7561035,10.6437988 C85.4857585,10.3412272 85.3505859,9.89453125 85.3505859,9.30371094 C85.3505859,9.09960938 85.3666992,8.90893555 85.3989258,8.73168945 C85.4311523,8.55444336 85.4830729,8.38614909 85.5546875,8.22680664 C85.6263021,8.06746419 85.7158203,7.93139648 85.8232422,7.81860352 C85.9306641,7.70581055 86.0649414,7.61629232 86.2260742,7.55004883 C86.387207,7.48380534 86.5680339,7.45068359 86.7685547,7.45068359 C87.6780599,7.45068359 88.1328125,8.0719401 88.1328125,9.31445312 C88.1328125,9.92675781 88.0092773,10.3770345 87.762207,10.6652832 C87.5151367,10.9535319 87.1857096,11.101237 86.7739258,11.1083984 Z" id="Shape"></path>
+                </g>
+                <g id="vLLM-dash-box" transform="translate(0, 72)" xlink:href="#path-10">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 50)">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path id="5-r_w-arrow" d="M77.5,-0.1 C77.5552285,-0.1 77.6,-0.055228475 77.6,0 L77.6,15.277 L80,15.2777778 L77.5,20.2777778 L75,15.2777778 L77.4,15.277 L77.4,0 C77.4,-0.055228475 77.4447715,-0.1 77.5,-0.1 Z" transform="translate(77.5, 10) rotate(180) translate(-77.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.5" y1="19.3708333" x2="77.5" y2="1.125" id="5-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.5, 10.5625) rotate(180) translate(-77.5, -10.5625)"></line>
+                        <path id="5-m_w-arrow" d="M32.5,-0.1 C32.5552285,-0.1 32.6,-0.055228475 32.6,0 L32.6,15.277 L35,15.2777778 L32.5,20.2777778 L30,15.2777778 L32.4,15.277 L32.4,0 C32.4,-0.055228475 32.4447715,-0.1 32.5,-0.1 Z" transform="translate(32.5, 10) rotate(180) translate(-32.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="32.5" y1="19.3708333" x2="32.5" y2="1.125" id="5-m" stroke="#8E8E8E" stroke-linecap="round" transform="translate(32.5, 10.5625) rotate(180) translate(-32.5, -10.5625)"></line>
+                        <path id="5-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,15.277 L3,15.2777778 L0.5,20.2777778 L-2,15.2777778 L0.4,15.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 10) rotate(180) translate(-0.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.5" y1="19.3708333" x2="0.5" y2="1.125" id="5-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 10.5625) rotate(180) translate(-0.5, -10.5625)"></line>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path id="4-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="4-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="4-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4821429" x2="0.499999254" y2="1.5" id="4-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.75) rotate(180) translate(-0.5, -8.75)"></line>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path id="3-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="3-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="3-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4821429" x2="0.499999254" y2="1.5" id="3-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.75) rotate(180) translate(-0.5, -8.75)"></line>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path id="2-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="2-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="2-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4916667" x2="0.499999254" y2="0.75" id="2-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.375) rotate(180) translate(-0.5, -8.375)"></line>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path id="1_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,36.277 L3,36.2777778 L0.5,41.2777778 L-2,36.2777778 L0.4,36.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 21) rotate(180) translate(-0.5, -21)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.5" y1="40.9166667" x2="0.5" y2="3" id="1" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 22.5) rotate(180) translate(-0.5, -22.5)"></line>
+                    </g>
+                </g>
+                <g id="rectangles-@yellow" transform="translate(8, 92)">
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="116" width="24" height="12" rx="3"></rect>
+                    <path d="M21,116 C22.6568542,116 24,117.343146 24,119 L24,125 C24,126.656854 22.6568542,128 21,128 L3,128 C1.34314575,128 0,126.656854 0,125 L0,119 C0,117.343146 1.34314575,116 3,116 L21,116 Z M21,117 L3,117 L2.79551164,117.010326 C1.78699946,117.112746 1,117.964466 1,119 L1,125 C1,126.104569 1.8954305,127 3,127 L21,127 C22.1045695,127 23,126.104569 23,125 L23,119 C23,117.895431 22.1045695,117 21,117 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="32" y="116" width="24" height="12" rx="3"></rect>
+                    <path d="M53,116 C54.6568542,116 56,117.343146 56,119 L56,125 C56,126.656854 54.6568542,128 53,128 L35,128 C33.3431458,128 32,126.656854 32,125 L32,119 C32,117.343146 33.3431458,116 35,116 L53,116 Z M53,117 L35,117 L34.7955116,117.010326 C33.7869995,117.112746 33,117.964466 33,119 L33,125 C33,126.104569 33.8954305,127 35,127 L53,127 C54.1045695,127 55,126.104569 55,125 L55,119 C55,117.895431 54.1045695,117 53,117 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="78" y="116" width="24" height="12" rx="3"></rect>
+                    <path d="M99,116 C100.656854,116 102,117.343146 102,119 L102,125 C102,126.656854 100.656854,128 99,128 L81,128 C79.3431458,128 78,126.656854 78,125 L78,119 C78,117.343146 79.3431458,116 81,116 L99,116 Z M99,117 L81,117 L80.7955116,117.010326 C79.7869995,117.112746 79,117.964466 79,119 L79,125 C79,126.104569 79.8954305,127 81,127 L99,127 C100.104569,127 101,126.104569 101,125 L101,119 C101,117.895431 100.104569,117 99,117 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="28" width="102" height="12" rx="3"></rect>
+                    <path d="M99,28 C100.656854,28 102,29.3431458 102,31 L102,37 C102,38.6568542 100.656854,40 99,40 L3,40 C1.34314575,40 0,38.6568542 0,37 L0,31 C0,29.3431458 1.34314575,28 3,28 L99,28 Z M99,29 L3,29 L2.79551169,29.0103258 C1.78699946,29.1127458 1,29.9644661 1,31 L1,37 C1,38.1045695 1.8954305,39 3,39 L99,39 C100.104569,39 101,38.1045695 101,37 L101,31 C101,29.8954305 100.104569,29 99,29 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="56" width="102" height="12" rx="3"></rect>
+                    <path d="M99,56 C100.656854,56 102,57.3431458 102,59 L102,65 C102,66.6568542 100.656854,68 99,68 L3,68 C1.34314575,68 0,66.6568542 0,65 L0,59 C0,57.3431458 1.34314575,56 3,56 L99,56 Z M99,57 L3,57 L2.79551169,57.0103258 C1.78699946,57.1127458 1,57.9644661 1,59 L1,65 C1,66.1045695 1.8954305,67 3,67 L99,67 C100.104569,67 101,66.1045695 101,65 L101,59 C101,57.8954305 100.104569,57 99,57 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="84" width="102" height="12" rx="3"></rect>
+                    <path d="M99,84 C100.656854,84 102,85.3431458 102,87 L102,93 C102,94.6568542 100.656854,96 99,96 L3,96 C1.34314575,96 0,94.6568542 0,93 L0,87 C0,85.3431458 1.34314575,84 3,84 L99,84 Z M99,85 L3,85 L2.79551169,85.0103258 C1.78699946,85.1127458 1,85.9644661 1,87 L1,93 C1,94.1045695 1.8954305,95 3,95 L99,95 C100.104569,95 101,94.1045695 101,93 L101,87 C101,85.8954305 100.104569,85 99,85 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    <g id="..." transform="translate(62.7, 121)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(0, 12.0363)">
+                <g id="MEAN-pooling" transform="translate(0, 15.9637)">
+                    <g id="dash-box" transform="translate(-1.4, -0.4)">
+                        <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="vLLM-dash-box" fill="#F7F7F7"></path>
+                        <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="vLLM-dash-box" fill="#8E8E8E" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(70.7, 11)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <g id="1" transform="translate(8, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(40, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(86, 5)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <path d="M19.5,0.185947956 L22,5.18594796 L19.999,5.18594796 L20,20.4637121 C20,20.7398544 19.7761424,20.9637121 19.5,20.9637121 C19.2238576,20.9637121 19,20.7398544 19,20.4637121 L18.999,5.18594796 L17,5.18594796 L19.5,0.185947956 Z" id="arrow-top" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="top" transform="translate(8, -12.0363)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                    <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/docs/assets/models/pooling_models/score_types.svg b/docs/assets/models/pooling_models/score_types.svg
new file mode 100644
index 000000000000..8e063524d204
--- /dev/null
+++ b/docs/assets/models/pooling_models/score_types.svg
@@ -0,0 +1,902 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="1180px" height="430px" viewBox="0 0 1180 430" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>Score Function</title>
+    <defs>
+        <path d="M0,0 L1180,0 L1180,430 L0,430 L0,0 Z" id="path-1"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-2"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-3"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-4"></path>
+        <path d="M0,0 L24,0 L24,12 L0,12 L0,0 Z" id="path-5"></path>
+        <path d="M0,0 L102,0 L102,12 L0,12 L0,0 Z" id="path-6"></path>
+        <path d="M0,0 L102,0 L102,12 L0,12 L0,0 Z" id="path-7"></path>
+        <path d="M0,0 L102,0 L102,12 L0,12 L0,0 Z" id="path-8"></path>
+        <path d="M0,0 L102,0 L102,12 L0,12 L0,0 Z" id="path-9"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-10"></path>
+        <rect id="path-11" x="0" y="0" width="296" height="70" rx="8"></rect>
+        <mask id="mask-12" maskContentUnits="userSpaceOnUse" maskUnits="objectBoundingBox" x="0" y="0" width="296" height="70" fill="white">
+            <use xlink:href="#path-11"></use>
+        </mask>
+        <path d="M10.377,0 L39.605,-1.77635684e-15 C45.3360588,-3.55271368e-15 49.982,4.64594115 49.982,10.377 C49.982,16.1080588 45.3360588,20.754 39.605,20.754 L10.377,20.754 C4.64594115,20.754 0,16.1080588 0,10.377 C0,4.64594115 4.64594115,0 10.377,0 Z" id="path-13"></path>
+        <path d="M0,0 L260,0 L260,128 L0,128 L0,0 Z" id="path-14"></path>
+        <path d="M10.377,0 L39.605,-1.77635684e-15 C45.3360588,-3.55271368e-15 49.982,4.64594115 49.982,10.377 C49.982,16.1080588 45.3360588,20.754 39.605,20.754 L10.377,20.754 C4.64594115,20.754 0,16.1080588 0,10.377 C0,4.64594115 4.64594115,0 10.377,0 Z" id="path-15"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-16"></path>
+        <path d="M0,0 L118,0 L118,128 L0,128 L0,0 Z" id="path-17"></path>
+        <path d="M0,0 L68,0 L68,12 L0,12 L0,0 Z" id="path-18"></path>
+        <path d="M0,0 L68,0 L68,12 L0,12 L0,0 Z" id="path-19"></path>
+        <path d="M0,0 L138,0 L138,12 L0,12 L0,0 Z" id="path-20"></path>
+        <path d="M6,0 L45.703,8.8817842e-16 C49.0167085,0 51.703,2.6862915 51.703,6 L51.703,14 C51.703,17.3137085 49.0167085,20 45.703,20 L6,20 C2.6862915,20 8.8817842e-16,17.3137085 8.8817842e-16,14 L8.8817842e-16,6 C8.8817842e-16,2.6862915 2.6862915,8.8817842e-16 6,8.8817842e-16 Z" id="path-21"></path>
+        <path d="M6,0 L45.703,8.8817842e-16 C49.0167085,0 51.703,2.6862915 51.703,6 L51.703,14 C51.703,17.3137085 49.0167085,20 45.703,20 L6,20 C2.6862915,20 8.8817842e-16,17.3137085 8.8817842e-16,14 L8.8817842e-16,6 C8.8817842e-16,2.6862915 2.6862915,8.8817842e-16 6,8.8817842e-16 Z" id="path-22"></path>
+        <path d="M6,0 L45.703,8.8817842e-16 C49.0167085,0 51.703,2.6862915 51.703,6 L51.703,14 C51.703,17.3137085 49.0167085,20 45.703,20 L6,20 C2.6862915,20 8.8817842e-16,17.3137085 8.8817842e-16,14 L8.8817842e-16,6 C8.8817842e-16,2.6862915 2.6862915,8.8817842e-16 6,8.8817842e-16 Z" id="path-23"></path>
+        <path d="M10.377,0 L39.605,-1.77635684e-15 C45.3360588,-3.55271368e-15 49.982,4.64594115 49.982,10.377 C49.982,16.1080588 45.3360588,20.754 39.605,20.754 L10.377,20.754 C4.64594115,20.754 0,16.1080588 0,10.377 C0,4.64594115 4.64594115,0 10.377,0 Z" id="path-24"></path>
+    </defs>
+    <g id="Score-Function" stroke="none" fill="none" xlink:href="#path-1" fill-rule="evenodd">
+        <use fill="#FFFFFF" xlink:href="#path-1"></use>
+        <g id="block-@pooling" stroke-width="1" transform="translate(42, 130)" fill="#9172E2">
+            <path d="M8,0 L80,0 C84.418278,0 88,3.581722 88,8 L88,74 C88,78.418278 84.418278,82 80,82 L8,82 C3.581722,82 0,78.418278 0,74 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill-opacity="0.1"></path>
+            <path d="M10.9746434,80.9999999 L10.974,81.9999999 L8.974,81.9999999 L8.97464335,80.9999999 L10.9746434,80.9999999 Z M14.9746434,80.9999999 L14.974,81.9999999 L12.974,81.9999999 L12.9746433,80.9999999 L14.9746434,80.9999999 Z M18.9746434,80.9999999 L18.974,81.9999999 L16.974,81.9999999 L16.9746433,80.9999999 L18.9746434,80.9999999 Z M22.9746434,80.9999999 L22.974,81.9999999 L20.974,81.9999999 L20.9746433,80.9999999 L22.9746434,80.9999999 Z M26.9746434,80.9999999 L26.974,81.9999999 L24.974,81.9999999 L24.9746433,80.9999999 L26.9746434,80.9999999 Z M30.9746434,80.9999999 L30.974,81.9999999 L28.974,81.9999999 L28.9746433,80.9999999 L30.9746434,80.9999999 Z M34.9746434,80.9999999 L34.974,81.9999999 L32.974,81.9999999 L32.9746433,80.9999999 L34.9746434,80.9999999 Z M38.9746434,80.9999999 L38.974,81.9999999 L36.974,81.9999999 L36.9746433,80.9999999 L38.9746434,80.9999999 Z M42.9746434,80.9999999 L42.974,81.9999999 L40.974,81.9999999 L40.9746433,80.9999999 L42.9746434,80.9999999 Z M46.9746433,80.9999999 L46.974,81.9999999 L44.974,81.9999999 L44.9746433,80.9999999 L46.9746433,80.9999999 Z M50.9746433,80.9999999 L50.974,81.9999999 L48.974,81.9999999 L48.9746433,80.9999999 L50.9746433,80.9999999 Z M54.9746433,80.9999999 L54.974,81.9999999 L52.974,81.9999999 L52.9746433,80.9999999 L54.9746433,80.9999999 Z M58.9746433,80.9999999 L58.974,81.9999999 L56.974,81.9999999 L56.9746433,80.9999999 L58.9746433,80.9999999 Z M62.9746433,80.9999999 L62.974,81.9999999 L60.974,81.9999999 L60.9746433,80.9999999 L62.9746433,80.9999999 Z M66.9746433,80.9999999 L66.974,81.9999999 L64.974,81.9999999 L64.9746433,80.9999999 L66.9746433,80.9999999 Z M70.9746433,80.9999999 L70.974,81.9999999 L68.974,81.9999999 L68.9746433,80.9999999 L70.9746433,80.9999999 Z M74.9746433,80.9999999 L74.974,81.9999999 L72.974,81.9999999 L72.9746433,80.9999999 L74.9746433,80.9999999 Z M78.9746433,80.9999999 L78.974,81.9999999 L76.974,81.9999999 L76.9746433,80.9999999 L78.9746433,80.9999999 Z M82.4897431,80.544574 L82.8456141,81.4791091 C82.2305193,81.7132701 81.5790784,81.8737323 80.9020819,81.9497049 L80.7902201,80.9559811 C81.3756541,80.8903126 81.945502,80.7517531 82.4897431,80.544574 Z M5.41029967,80.5056361 C5.95157753,80.7213183 6.51934132,80.8686603 7.10343083,80.9432289 L6.97697115,81.9352007 C6.30134705,81.8489616 5.65205616,81.6784493 5.03997173,81.4345371 L5.41029967,80.5056361 Z M85.3883016,78.4686843 L86.1574667,79.1077344 C85.7303171,79.6220994 85.2399427,80.0820804 84.6982653,80.4757556 L84.1106904,79.6665857 C84.584521,79.3221774 85.0139915,78.9194515 85.3883016,78.4686843 Z M2.56157145,78.4076246 C2.93082185,78.8625901 3.35581002,79.2701375 3.82581503,79.6198877 L3.22929764,80.422488 C2.69189028,80.0226419 2.20638657,79.5569385 1.78473163,79.0373231 L2.56157145,78.4076246 Z M86.8760654,75.3192015 L87.858348,75.506607 C87.7312901,76.1733384 87.5214835,76.8107286 87.2400208,77.407685 L86.3352011,76.9818892 C86.5841061,76.4541311 86.7660968,75.896352 86.8760654,75.3192015 Z M1.10775661,75.2312252 C1.21033515,75.8099081 1.38509885,76.3698753 1.62708778,76.9005768 L0.717340098,77.3157385 C0.443469481,76.7151789 0.241701076,76.07482 0.123099669,75.4057265 L1.10775661,75.2312252 Z M88,71.512 L88,73.512 L87,73.5126783 L87,71.5126783 L88,71.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M88,67.512 L88,69.512 L87,69.5126783 L87,67.5126783 L88,67.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M88,63.512 L88,65.512 L87,65.5126783 L87,63.5126783 L88,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M88,59.512 L88,61.512 L87,61.5126783 L87,59.5126783 L88,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M88,55.512 L88,57.512 L87,57.5126783 L87,55.5126783 L88,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M88,51.512 L88,53.512 L87,53.5126783 L87,51.5126783 L88,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M88,47.512 L88,49.512 L87,49.5126783 L87,47.5126783 L88,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M88,43.512 L88,45.512 L87,45.5126783 L87,43.5126783 L88,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M88,39.512 L88,41.512 L87,41.5126783 L87,39.5126783 L88,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M88,35.512 L88,37.512 L87,37.5126783 L87,35.5126783 L88,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M88,31.512 L88,33.512 L87,33.5126783 L87,31.5126783 L88,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M88,27.512 L88,29.512 L87,29.5126783 L87,27.5126783 L88,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M88,23.512 L88,25.512 L87,25.5126783 L87,23.5126783 L88,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M88,19.512 L88,21.512 L87,21.5126783 L87,19.5126783 L88,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M88,15.512 L88,17.512 L87,17.5126783 L87,15.5126783 L88,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M88,11.512 L88,13.512 L87,13.5126783 L87,11.5126783 L88,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M88,8 L88,9.512 L87,9.51268141 L86.9999959,7.99230721 C86.9998426,7.84931676 86.9954144,7.70690426 86.9867482,7.5651841 L87.9849074,7.50453602 C87.9949203,7.6684065 88,7.83361003 88,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M86.6692254,5.86656128 C86.4908516,5.30842466 86.2427737,4.77610859 85.9315604,4.28094453 L86.7776671,3.74792999 C87.1308169,4.30965578 87.4160681,4.91838179 87.6219085,5.56259574 L86.6692254,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M85.4881271,2.17927796 L84.8015377,2.90632361 C84.3752759,2.5043409 83.9004118,2.15691661 83.3883721,1.8730526 L83.8738987,0.998830409 C84.4612176,1.32450134 85.0032397,1.72192978 85.4881271,2.17927796 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M80,0 C80.6853149,0 81.3505032,0.0861719866 81.9853207,0.248271715 L81.7373824,1.21704772 C81.1747486,1.07342708 80.5926435,1 80,1 L80,0 Z M10,0 L10,1 L8.00000005,1 L7.59669451,1.01139497 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z" id="dash-box" fill-rule="nonzero"></path>
+            <g id="Pooling" transform="translate(0, 35)" fill-rule="nonzero">
+                <path d="M24.7841797,10.0058594 C25.0146484,10.0058594 25.2128906,9.92578125 25.3789063,9.765625 C25.5449219,9.60546875 25.6279297,9.39453125 25.6279297,9.1328125 L25.6279297,6.64257813 L27.2626953,6.64257813 C29.3994141,6.64257813 30.4677734,5.79101563 30.4677734,4.08789063 C30.4677734,3.62304687 30.3955078,3.22363281 30.2509766,2.88964844 C30.1064453,2.55566406 29.8945313,2.29199219 29.6152344,2.09863281 C29.3359375,1.90527344 29.0136719,1.76464844 28.6484375,1.67675781 C28.2832031,1.58886719 27.8583984,1.54492188 27.3740234,1.54492188 L24.8544922,1.54492188 C24.5654297,1.54492188 24.3408203,1.64257813 24.1806641,1.83789062 C24.0205078,2.03320312 23.9404297,2.27929688 23.9404297,2.57617187 L23.9404297,9.1328125 C23.9404297,9.39453125 24.0244141,9.60546875 24.1923828,9.765625 C24.3603516,9.92578125 24.5576172,10.0058594 24.7841797,10.0058594 Z M25.6279297,5.42382812 L25.6279297,2.828125 L27.1923828,2.828125 C27.4658203,2.828125 27.6933594,2.84375 27.875,2.875 C28.0566406,2.90625 28.2226562,2.96484375 28.3730469,3.05078125 C28.5234375,3.13671875 28.6337891,3.265625 28.7041016,3.4375 C28.7744141,3.609375 28.8095703,3.82617187 28.8095703,4.08789063 C28.8095703,4.35742188 28.7744141,4.58007812 28.7041016,4.75585938 C28.6337891,4.93164063 28.5234375,5.06738281 28.3730469,5.16308594 C28.2226562,5.25878906 28.0527344,5.32617188 27.8632812,5.36523438 C27.6738281,5.40429688 27.4365234,5.42382812 27.1513672,5.42382812 L25.6279297,5.42382812 Z" id="Shape"></path>
+                <path d="M34.2177734,9.06835938 C33.7138672,9.06835938 33.3212891,8.89257813 33.0400391,8.54101562 C32.7587891,8.18945312 32.6181641,7.69726563 32.6181641,7.06445312 C32.6181641,6.42382812 32.7578125,5.92675781 33.0371094,5.57324219 C33.3164062,5.21972656 33.7099609,5.04296875 34.2177734,5.04296875 C34.7255859,5.04296875 35.1201172,5.22070312 35.4013672,5.57617188 C35.6826172,5.93164063 35.8232422,6.42773438 35.8232422,7.06445312 C35.8232422,7.69726563 35.6826172,8.18945312 35.4013672,8.54101562 C35.1201172,8.89257813 34.7255859,9.06835938 34.2177734,9.06835938 Z M34.2177734,10.0703125 C34.6318359,10.0703125 35.0107422,10.0136719 35.3544922,9.90039063 C35.6982422,9.78710938 35.9882812,9.63574219 36.2246094,9.44628906 C36.4609375,9.25683594 36.6601563,9.03222656 36.8222656,8.77246094 C36.984375,8.51269531 37.1035156,8.24023438 37.1796875,7.95507812 C37.2558594,7.66992188 37.2939453,7.37304688 37.2939453,7.06445313 C37.2939453,6.74023438 37.2539062,6.4296875 37.1738281,6.1328125 C37.09375,5.8359375 36.9697266,5.55957031 36.8017578,5.30371094 C36.6337891,5.04785156 36.4306641,4.82714844 36.1923828,4.64160156 C35.9541016,4.45605469 35.6660156,4.30957031 35.328125,4.20214844 C34.9902344,4.09472656 34.6201172,4.04101562 34.2177734,4.04101562 C33.8076172,4.04101562 33.4316406,4.09667969 33.0898438,4.20800781 C32.7480469,4.31933594 32.4589844,4.47070312 32.2226562,4.66210938 C31.9863281,4.85351562 31.7861328,5.078125 31.6220703,5.3359375 C31.4580078,5.59375 31.3378906,5.86816406 31.2617188,6.15917969 C31.1855469,6.45019531 31.1474609,6.75195312 31.1474609,7.06445313 C31.1474609,7.45898437 31.2099609,7.83203125 31.3349609,8.18359375 C31.4599609,8.53515625 31.6425781,8.85351562 31.8828125,9.13867188 C32.1230469,9.42382813 32.4443359,9.65039063 32.8466797,9.81835938 C33.2490234,9.98632812 33.7060547,10.0703125 34.2177734,10.0703125 Z" id="Shape"></path>
+                <path d="M41.3017578,9.06835938 C40.7978516,9.06835938 40.4052734,8.89257813 40.1240234,8.54101562 C39.8427734,8.18945312 39.7021484,7.69726563 39.7021484,7.06445312 C39.7021484,6.42382812 39.8417969,5.92675781 40.1210938,5.57324219 C40.4003906,5.21972656 40.7939453,5.04296875 41.3017578,5.04296875 C41.8095703,5.04296875 42.2041016,5.22070312 42.4853516,5.57617188 C42.7666016,5.93164063 42.9072266,6.42773438 42.9072266,7.06445312 C42.9072266,7.69726563 42.7666016,8.18945312 42.4853516,8.54101562 C42.2041016,8.89257813 41.8095703,9.06835938 41.3017578,9.06835938 Z M41.3017578,10.0703125 C41.7158203,10.0703125 42.0947266,10.0136719 42.4384766,9.90039063 C42.7822266,9.78710938 43.0722656,9.63574219 43.3085938,9.44628906 C43.5449219,9.25683594 43.7441406,9.03222656 43.90625,8.77246094 C44.0683594,8.51269531 44.1875,8.24023438 44.2636719,7.95507812 C44.3398438,7.66992188 44.3779297,7.37304688 44.3779297,7.06445313 C44.3779297,6.74023438 44.3378906,6.4296875 44.2578125,6.1328125 C44.1777344,5.8359375 44.0537109,5.55957031 43.8857422,5.30371094 C43.7177734,5.04785156 43.5146484,4.82714844 43.2763672,4.64160156 C43.0380859,4.45605469 42.75,4.30957031 42.4121094,4.20214844 C42.0742188,4.09472656 41.7041016,4.04101562 41.3017578,4.04101562 C40.8916016,4.04101562 40.515625,4.09667969 40.1738281,4.20800781 C39.8320312,4.31933594 39.5429688,4.47070312 39.3066406,4.66210938 C39.0703125,4.85351562 38.8701172,5.078125 38.7060547,5.3359375 C38.5419922,5.59375 38.421875,5.86816406 38.3457031,6.15917969 C38.2695312,6.45019531 38.2314453,6.75195312 38.2314453,7.06445313 C38.2314453,7.45898437 38.2939453,7.83203125 38.4189453,8.18359375 C38.5439453,8.53515625 38.7265625,8.85351562 38.9667969,9.13867188 C39.2070312,9.42382813 39.5283203,9.65039063 39.9306641,9.81835938 C40.3330078,9.98632812 40.7900391,10.0703125 41.3017578,10.0703125 Z" id="Shape"></path>
+                <path d="M46.3759766,10 C46.5947266,10 46.7753906,9.92773438 46.9179688,9.78320312 C47.0605469,9.63867188 47.1318359,9.4375 47.1318359,9.1796875 L47.1318359,2.37695313 C47.1318359,2.11914062 47.0615234,1.91796875 46.9208984,1.7734375 C46.7802734,1.62890625 46.6025391,1.55664062 46.3876953,1.55664062 C46.1728516,1.55664062 45.9970703,1.62890625 45.8603516,1.7734375 C45.7236328,1.91796875 45.6552734,2.11914062 45.6552734,2.37695313 L45.6552734,9.1796875 C45.6552734,9.44140625 45.7226562,9.64355469 45.8574219,9.78613281 C45.9921875,9.92871094 46.1650391,10 46.3759766,10 Z" id="Path"></path>
+                <path d="M49.4580078,10 C49.6728516,10 49.8496094,9.92773438 49.9882812,9.78320312 C50.1269531,9.63867187 50.1962891,9.4375 50.1962891,9.1796875 L50.1962891,4.94921875 C50.1962891,4.6875 50.1269531,4.484375 49.9882812,4.33984375 C49.8496094,4.1953125 49.6728516,4.12304688 49.4580078,4.12304688 C49.2431641,4.12304688 49.0673828,4.1953125 48.9306641,4.33984375 C48.7939453,4.484375 48.7255859,4.6875 48.7255859,4.94921875 L48.7255859,9.1796875 C48.7255859,9.44140625 48.7939453,9.64355469 48.9306641,9.78613281 C49.0673828,9.92871094 49.2431641,10 49.4580078,10 Z M49.4580078,2.95117188 C49.7119141,2.95117188 49.9179688,2.87597656 50.0761719,2.72558594 C50.234375,2.57519531 50.3134766,2.37890625 50.3134766,2.13671875 C50.3134766,1.89453125 50.2353516,1.69921875 50.0791016,1.55078125 C49.9228516,1.40234375 49.7177734,1.328125 49.4638672,1.328125 C49.2060547,1.328125 48.9980469,1.40234375 48.8398438,1.55078125 C48.6816406,1.69921875 48.6025391,1.89453125 48.6025391,2.13671875 C48.6025391,2.37890625 48.6816406,2.57519531 48.8398438,2.72558594 C48.9980469,2.87597656 49.2041016,2.95117188 49.4580078,2.95117188 Z" id="Shape"></path>
+                <path d="M52.4169922,10 C52.6318359,10 52.8085938,9.92871094 52.9472656,9.78613281 C53.0859375,9.64355469 53.1552734,9.4453125 53.1552734,9.19140625 L53.1552734,6.71289062 C53.1552734,6.21679688 53.2978516,5.81445313 53.5830078,5.50585937 C53.8681641,5.19726562 54.2060547,5.04296875 54.5966797,5.04296875 C54.9208984,5.04296875 55.1875,5.14941406 55.3964844,5.36230469 C55.6054688,5.57519531 55.7099609,5.8828125 55.7099609,6.28515625 L55.7099609,9.19140625 C55.7099609,9.4453125 55.7783203,9.64355469 55.9150391,9.78613281 C56.0517578,9.92871094 56.2236328,10 56.4306641,10 C56.6494141,10 56.8291016,9.92871094 56.9697266,9.78613281 C57.1103516,9.64355469 57.1806641,9.4453125 57.1806641,9.19140625 L57.1806641,6.29101563 C57.1806641,5.91601563 57.1240234,5.58203125 57.0107422,5.2890625 C56.8974609,4.99609375 56.7431641,4.76074219 56.5478516,4.58300781 C56.3525391,4.40527344 56.1308594,4.27050781 55.8828125,4.17871094 C55.6347656,4.08691406 55.3701172,4.04101562 55.0888672,4.04101562 C54.6318359,4.04101562 54.2382812,4.12695312 53.9082031,4.29882812 C53.578125,4.47070312 53.3271484,4.72265625 53.1552734,5.0546875 L53.1552734,4.84960938 C53.1552734,4.61914062 53.0869141,4.44042969 52.9501953,4.31347656 C52.8134766,4.18652344 52.6396484,4.12304688 52.4287109,4.12304688 C52.2138672,4.12304688 52.0361328,4.1875 51.8955078,4.31640625 C51.7548828,4.4453125 51.6845703,4.62695312 51.6845703,4.86132813 L51.6845703,9.19140625 C51.6845703,9.4453125 51.7529297,9.64355469 51.8896484,9.78613281 C52.0263672,9.92871094 52.2021484,10 52.4169922,10 Z" id="Path"></path>
+                <path d="M62.8994141,9.92382813 C62.8994141,10.4785156 62.7470703,10.8916016 62.4423828,11.1630859 C62.1376953,11.4345703 61.7119141,11.5703125 61.1650391,11.5703125 C61.0087891,11.5703125 60.8496094,11.5566406 60.6875,11.5292969 C60.5253906,11.5019531 60.4033203,11.4765625 60.3212891,11.453125 C60.2392578,11.4296875 60.1103516,11.3876953 59.9345703,11.3271484 C59.7587891,11.2666016 59.6533203,11.2304688 59.6181641,11.21875 C59.5556641,11.1953125 59.4931641,11.1835937 59.4306641,11.1835937 C59.2939453,11.1835937 59.1806641,11.234375 59.0908203,11.3359375 C59.0009766,11.4375 58.9560547,11.5546875 58.9560547,11.6875 C58.9560547,11.875 59.0458984,12.0234375 59.2255859,12.1328125 C59.4365234,12.2617188 59.7324219,12.3701172 60.1132812,12.4580078 C60.4941406,12.5458984 60.8935547,12.5898438 61.3115234,12.5898438 C62.2333984,12.5898438 62.9628906,12.3476563 63.5,11.8632813 C64.0371094,11.3789062 64.3056641,10.6738281 64.3056641,9.74804688 L64.3056641,4.94335938 C64.3056641,4.68554688 64.2402344,4.484375 64.109375,4.33984375 C63.9785156,4.1953125 63.8115234,4.12304688 63.6083984,4.12304688 C63.4287109,4.12304688 63.2753906,4.17675781 63.1484375,4.28417969 C63.0214844,4.39160156 62.9482422,4.54492188 62.9287109,4.74414063 L62.9287109,5.01367188 C62.7333984,4.70117188 62.5009766,4.46289062 62.2314453,4.29882812 C61.9619141,4.13476562 61.5908203,4.05273438 61.1181641,4.05273438 C60.2705078,4.05273438 59.5996094,4.33496094 59.1054688,4.89941406 C58.6113281,5.46386719 58.3642578,6.19726562 58.3642578,7.09960938 C58.3642578,7.99414062 58.6181641,8.70214844 59.1259766,9.22363281 C59.6337891,9.74511719 60.3095703,10.0058594 61.1533203,10.0058594 C61.9853516,10.0058594 62.5673828,9.69140625 62.8994141,9.0625 L62.8994141,9.92382813 Z M61.4169922,9.02734375 C60.9716797,9.01953125 60.6015625,8.85058594 60.3066406,8.52050781 C60.0117188,8.19042969 59.8642578,7.703125 59.8642578,7.05859375 C59.8642578,6.8359375 59.8818359,6.62792969 59.9169922,6.43457031 C59.9521484,6.24121094 60.0087891,6.05761719 60.0869141,5.88378906 C60.1650391,5.70996094 60.2626953,5.56152344 60.3798828,5.43847656 C60.4970703,5.31542969 60.6435547,5.21777344 60.8193359,5.14550781 C60.9951172,5.07324219 61.1923828,5.03710938 61.4111328,5.03710938 C62.4033203,5.03710938 62.8994141,5.71484375 62.8994141,7.0703125 C62.8994141,7.73828125 62.7646484,8.22949219 62.4951172,8.54394531 C62.2255859,8.85839844 61.8662109,9.01953125 61.4169922,9.02734375 Z" id="Shape"></path>
+            </g>
+        </g>
+        <g id="block-@score" stroke-width="1" transform="translate(42, 50)">
+            <path d="M8,0 L80,0 C84.418278,0 88,3.581722 88,8 L88,62 C88,66.418278 84.418278,70 80,70 L8,70 C3.581722,70 0,66.418278 0,62 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill="#F0FDF8"></path>
+            <path d="M10.9746434,69 L10.974,70 L8.974,70 L8.97464335,69 L10.9746434,69 Z M14.9746434,69 L14.974,70 L12.974,70 L12.9746433,69 L14.9746434,69 Z M18.9746434,69 L18.974,70 L16.974,70 L16.9746433,69 L18.9746434,69 Z M22.9746434,69 L22.974,70 L20.974,70 L20.9746433,69 L22.9746434,69 Z M26.9746434,69 L26.974,70 L24.974,70 L24.9746433,69 L26.9746434,69 Z M30.9746434,69 L30.974,70 L28.974,70 L28.9746433,69 L30.9746434,69 Z M34.9746434,69 L34.974,70 L32.974,70 L32.9746433,69 L34.9746434,69 Z M38.9746434,69 L38.974,70 L36.974,70 L36.9746433,69 L38.9746434,69 Z M42.9746434,69 L42.974,70 L40.974,70 L40.9746433,69 L42.9746434,69 Z M46.9746433,69 L46.974,70 L44.974,70 L44.9746433,69 L46.9746433,69 Z M50.9746433,69 L50.974,70 L48.974,70 L48.9746433,69 L50.9746433,69 Z M54.9746433,69 L54.974,70 L52.974,70 L52.9746433,69 L54.9746433,69 Z M58.9746433,69 L58.974,70 L56.974,70 L56.9746433,69 L58.9746433,69 Z M62.9746433,69 L62.974,70 L60.974,70 L60.9746433,69 L62.9746433,69 Z M66.9746433,69 L66.974,70 L64.974,70 L64.9746433,69 L66.9746433,69 Z M70.9746433,69 L70.974,70 L68.974,70 L68.9746433,69 L70.9746433,69 Z M74.9746433,69 L74.974,70 L72.974,70 L72.9746433,69 L74.9746433,69 Z M78.9746433,69 L78.974,70 L76.974,70 L76.9746433,69 L78.9746433,69 Z M82.4897431,68.544574 L82.8456141,69.4791091 C82.2305193,69.7132701 81.5790784,69.8737323 80.9020819,69.9497049 L80.7902201,68.9559811 C81.3756541,68.8903126 81.945502,68.7517531 82.4897431,68.544574 Z M5.41029967,68.5056361 C5.95157753,68.7213183 6.51934132,68.8686603 7.10343083,68.9432289 L6.97697115,69.9352007 C6.30134705,69.8489616 5.65205616,69.6784493 5.03997173,69.4345371 L5.41029967,68.5056361 Z M85.3883016,66.4686843 L86.1574667,67.1077344 C85.7303171,67.6220994 85.2399427,68.0820804 84.6982653,68.4757556 L84.1106904,67.6665857 C84.584521,67.3221774 85.0139915,66.9194515 85.3883016,66.4686843 Z M2.56157145,66.4076246 C2.93082185,66.8625901 3.35581002,67.2701375 3.82581503,67.6198877 L3.22929764,68.422488 C2.69189028,68.0226419 2.20638657,67.5569385 1.78473163,67.0373231 L2.56157145,66.4076246 Z M86.8760654,63.3192015 L87.858348,63.506607 C87.7312901,64.1733384 87.5214835,64.8107286 87.2400208,65.407685 L86.3352011,64.9818892 C86.5841061,64.4541311 86.7660968,63.896352 86.8760654,63.3192015 Z M1.10775661,63.2312252 C1.21033515,63.8099081 1.38509885,64.3698753 1.62708778,64.9005768 L0.717340098,65.3157385 C0.443469481,64.7151789 0.241701076,64.07482 0.123099669,63.4057265 L1.10775661,63.2312252 Z M88,59.512 L88,61.512 L87,61.5126783 L87,59.5126783 L88,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M88,55.512 L88,57.512 L87,57.5126783 L87,55.5126783 L88,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M88,51.512 L88,53.512 L87,53.5126783 L87,51.5126783 L88,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M88,47.512 L88,49.512 L87,49.5126783 L87,47.5126783 L88,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M88,43.512 L88,45.512 L87,45.5126783 L87,43.5126783 L88,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M88,39.512 L88,41.512 L87,41.5126783 L87,39.5126783 L88,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M88,35.512 L88,37.512 L87,37.5126783 L87,35.5126783 L88,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M88,31.512 L88,33.512 L87,33.5126783 L87,31.5126783 L88,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M88,27.512 L88,29.512 L87,29.5126783 L87,27.5126783 L88,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M88,23.512 L88,25.512 L87,25.5126783 L87,23.5126783 L88,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M88,19.512 L88,21.512 L87,21.5126783 L87,19.5126783 L88,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M88,15.512 L88,17.512 L87,17.5126783 L87,15.5126783 L88,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M88,11.512 L88,13.512 L87,13.5126783 L87,11.5126783 L88,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M88,8 L88,9.512 L87,9.51268141 L86.9999959,7.99230721 C86.9998426,7.84931676 86.9954144,7.70690426 86.9867482,7.5651841 L87.9849074,7.50453602 C87.9949203,7.6684065 88,7.83361003 88,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M86.6692254,5.86656128 C86.4908516,5.30842466 86.2427737,4.77610859 85.9315604,4.28094453 L86.7776671,3.74792999 C87.1308169,4.30965578 87.4160681,4.91838179 87.6219085,5.56259574 L86.6692254,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M85.4881271,2.17927796 L84.8015377,2.90632361 C84.3752759,2.5043409 83.9004118,2.15691661 83.3883721,1.8730526 L83.8738987,0.998830409 C84.4612176,1.32450134 85.0032397,1.72192978 85.4881271,2.17927796 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M80,0 C80.6853149,0 81.3505032,0.0861719866 81.9853207,0.248271715 L81.7373824,1.21704772 C81.1747486,1.07342708 80.5926435,1 80,1 L80,0 Z M10,0 L10,1 L8.00000005,1 L7.59669451,1.011395 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z" id="dash-box" fill="#1CBB8B" fill-rule="nonzero"></path>
+            <g id="Score-Function" transform="translate(0, 21)" fill="#12BE8B" fill-rule="nonzero">
+                <path d="M32.0820312,11.1523438 C33.1054688,11.1523438 33.9208984,10.9228516 34.5283203,10.4638672 C35.1357422,10.0048828 35.4394531,9.38085938 35.4394531,8.59179688 C35.4394531,8.25195313 35.3886719,7.95214844 35.2871094,7.69238281 C35.1855469,7.43261719 35.0283203,7.20703125 34.8154297,7.015625 C34.6025391,6.82421875 34.3623047,6.66210937 34.0947266,6.52929688 C33.8271484,6.39648437 33.4980469,6.26757813 33.1074219,6.14257813 C33.0878906,6.13476562 32.8027344,6.05078125 32.2519531,5.890625 C31.7011719,5.73046875 31.4160156,5.64648437 31.3964844,5.63867188 C31.1035156,5.54101562 30.8857422,5.41503906 30.7431641,5.26074219 C30.6005859,5.10644531 30.5292969,4.90625 30.5292969,4.66015625 C30.5292969,4.46875 30.5761719,4.3046875 30.6699219,4.16796875 C30.7636719,4.03125 30.8945312,3.92578125 31.0625,3.8515625 C31.2304687,3.77734375 31.4130859,3.72363281 31.6103516,3.69042969 C31.8076172,3.65722656 32.0273438,3.640625 32.2695312,3.640625 C33,3.640625 33.6796875,3.8203125 34.3085938,4.1796875 C34.4101562,4.234375 34.5039062,4.26171875 34.5898438,4.26171875 C34.7539062,4.26171875 34.8945312,4.19042969 35.0117188,4.04785156 C35.1289062,3.90527344 35.1875,3.74609375 35.1875,3.5703125 C35.1875,3.3984375 35.1269531,3.2578125 35.0058594,3.1484375 C34.7246094,2.90625 34.3203125,2.70605469 33.7929688,2.54785156 C33.265625,2.38964844 32.7226562,2.31054688 32.1640625,2.31054688 C31.2070312,2.31054688 30.4267578,2.52929688 29.8232422,2.96679688 C29.2197266,3.40429688 28.9179688,3.99609375 28.9179688,4.7421875 C28.9179688,5.3203125 29.0654297,5.77832031 29.3603516,6.11621094 C29.6552734,6.45410156 30.1367188,6.73828125 30.8046875,6.96875 L32.7382812,7.61328125 C33.1015625,7.73046875 33.3691406,7.87304688 33.5410156,8.04101562 C33.7128906,8.20898438 33.7988281,8.44140625 33.7988281,8.73828125 C33.7988281,9.09765625 33.6396484,9.36914063 33.3212891,9.55273438 C33.0029297,9.73632812 32.5878906,9.828125 32.0761719,9.828125 C31.2753906,9.828125 30.5097656,9.609375 29.7792969,9.171875 C29.7128906,9.1328125 29.6445312,9.11328125 29.5742188,9.11328125 C29.40625,9.11328125 29.2529297,9.20605469 29.1142578,9.39160156 C28.9755859,9.57714844 28.90625,9.765625 28.90625,9.95703125 C28.90625,10.1015625 28.9511719,10.2070312 29.0410156,10.2734375 C29.7949219,10.859375 30.8085937,11.1523438 32.0820312,11.1523438 Z" id="Path"></path>
+                <path d="M37.9121094,8.04101562 C37.9121094,7.45117188 38.0556641,6.98046875 38.3427734,6.62890625 C38.6298828,6.27734375 39.0273438,6.1015625 39.5351562,6.1015625 C39.8710938,6.1015625 40.2148438,6.18945312 40.5664062,6.36523438 C40.65625,6.41601563 40.7480469,6.44140625 40.8417969,6.44140625 C40.9746094,6.44140625 41.0869141,6.39453125 41.1787109,6.30078125 C41.2705078,6.20703125 41.3164062,6.09570313 41.3164062,5.96679688 C41.3164062,5.78710938 41.2285156,5.63476562 41.0527344,5.50976562 C40.6230469,5.19726562 40.0664062,5.04101562 39.3828125,5.04101562 C38.8085938,5.04101562 38.2949219,5.17675781 37.8417969,5.44824219 C37.3886719,5.71972656 37.0400391,6.08398438 36.7958984,6.54101562 C36.5517578,6.99804687 36.4296875,7.50195312 36.4296875,8.05273438 C36.4296875,8.93554687 36.703125,9.65917969 37.25,10.2236328 C37.796875,10.7880859 38.5019531,11.0703125 39.3652344,11.0703125 C39.7949219,11.0703125 40.1660156,11.0205078 40.4785156,10.9208984 C40.7910156,10.8212891 41.0292969,10.6972656 41.1933594,10.5488281 C41.3496094,10.4160156 41.4277344,10.2597656 41.4277344,10.0800781 C41.4277344,9.94726563 41.3847656,9.83300781 41.2988281,9.73730469 C41.2128906,9.64160156 41.1074219,9.59375 40.9824219,9.59375 C40.8808594,9.59375 40.78125,9.62695313 40.6835938,9.69335938 C40.3789062,9.89648438 39.9863281,9.99804687 39.5058594,9.99804687 C39.0097656,9.99804687 38.6201172,9.81933594 38.3369141,9.46191406 C38.0537109,9.10449219 37.9121094,8.63085938 37.9121094,8.04101562 Z" id="Path"></path>
+                <path d="M45.2480469,10.0683594 C44.7441406,10.0683594 44.3515625,9.89257813 44.0703125,9.54101562 C43.7890625,9.18945312 43.6484375,8.69726563 43.6484375,8.06445312 C43.6484375,7.42382812 43.7880859,6.92675781 44.0673828,6.57324219 C44.3466797,6.21972656 44.7402344,6.04296875 45.2480469,6.04296875 C45.7558594,6.04296875 46.1503906,6.22070312 46.4316406,6.57617188 C46.7128906,6.93164063 46.8535156,7.42773438 46.8535156,8.06445312 C46.8535156,8.69726563 46.7128906,9.18945312 46.4316406,9.54101562 C46.1503906,9.89257813 45.7558594,10.0683594 45.2480469,10.0683594 Z M45.2480469,11.0703125 C45.6621094,11.0703125 46.0410156,11.0136719 46.3847656,10.9003906 C46.7285156,10.7871094 47.0185547,10.6357422 47.2548828,10.4462891 C47.4912109,10.2568359 47.6904297,10.0322266 47.8525391,9.77246094 C48.0146484,9.51269531 48.1337891,9.24023438 48.2099609,8.95507812 C48.2861328,8.66992188 48.3242188,8.37304688 48.3242188,8.06445312 C48.3242188,7.74023438 48.2841797,7.4296875 48.2041016,7.1328125 C48.1240234,6.8359375 48,6.55957031 47.8320312,6.30371094 C47.6640625,6.04785156 47.4609375,5.82714844 47.2226562,5.64160156 C46.984375,5.45605469 46.6962891,5.30957031 46.3583984,5.20214844 C46.0205078,5.09472656 45.6503906,5.04101562 45.2480469,5.04101562 C44.8378906,5.04101562 44.4619141,5.09667969 44.1201172,5.20800781 C43.7783203,5.31933594 43.4892578,5.47070312 43.2529297,5.66210938 C43.0166016,5.85351562 42.8164062,6.078125 42.6523438,6.3359375 C42.4882812,6.59375 42.3681641,6.86816406 42.2919922,7.15917969 C42.2158203,7.45019531 42.1777344,7.75195312 42.1777344,8.06445312 C42.1777344,8.45898437 42.2402344,8.83203125 42.3652344,9.18359375 C42.4902344,9.53515625 42.6728516,9.85351562 42.9130859,10.1386719 C43.1533203,10.4238281 43.4746094,10.6503906 43.8769531,10.8183594 C44.2792969,10.9863281 44.7363281,11.0703125 45.2480469,11.0703125 Z" id="Shape"></path>
+                <path d="M50.3046875,11 C50.5234375,11 50.7021484,10.9316406 50.8408203,10.7949219 C50.9794922,10.6582031 51.0488281,10.4628906 51.0488281,10.2089844 L51.0488281,7.73632812 C51.0488281,7.45898437 51.09375,7.22460938 51.1835938,7.03320312 C51.2734375,6.84179687 51.4003906,6.69921875 51.5644531,6.60546875 C51.7285156,6.51171875 51.8994141,6.44628906 52.0771484,6.40917969 C52.2548828,6.37207031 52.4589844,6.35351563 52.6894531,6.35351563 C52.8417969,6.35351563 52.9628906,6.29101563 53.0527344,6.16601562 C53.1425781,6.04101562 53.1875,5.89648437 53.1875,5.73242187 C53.1875,5.56054688 53.1386719,5.4140625 53.0410156,5.29296875 C52.9433594,5.171875 52.8046875,5.11132812 52.625,5.11132812 C52.2578125,5.11132812 51.9267578,5.23730469 51.6318359,5.48925781 C51.3369141,5.74121094 51.1367188,6.04296875 51.03125,6.39453125 L51.0488281,5.79101562 C51.0527344,5.58398438 50.9814453,5.42089844 50.8349609,5.30175781 C50.6884766,5.18261719 50.515625,5.12304688 50.3164062,5.12304688 C50.1132812,5.12304688 49.9384766,5.18457031 49.7919922,5.30761719 C49.6455078,5.43066406 49.5722656,5.60351562 49.5722656,5.82617188 L49.5722656,10.1972656 C49.5722656,10.4550781 49.6416016,10.6533203 49.7802734,10.7919922 C49.9189453,10.9306641 50.09375,11 50.3046875,11 Z" id="Path"></path>
+                <path d="M53.8027344,8.00585937 C53.8027344,8.97070312 54.0810547,9.72265625 54.6376953,10.2617188 C55.1943359,10.8007813 55.9355469,11.0703125 56.8613281,11.0703125 C57.6855469,11.0703125 58.40625,10.8105469 59.0234375,10.2910156 C59.1484375,10.1816406 59.2109375,10.0488281 59.2109375,9.89257812 C59.2109375,9.75585937 59.1640625,9.6328125 59.0703125,9.5234375 C58.9765625,9.4140625 58.8691406,9.359375 58.7480469,9.359375 C58.6660156,9.359375 58.5898438,9.38671875 58.5195312,9.44140625 C57.96875,9.8359375 57.4511719,10.0332031 56.9667969,10.0332031 C55.9003906,10.0332031 55.3359375,9.48242188 55.2734375,8.38085938 L58.9765625,8.38085938 C59.1171875,8.38085938 59.2226562,8.33398438 59.2929688,8.24023437 C59.3632812,8.14648438 59.3984375,8.0234375 59.3984375,7.87109375 C59.3945312,7.484375 59.3378906,7.125 59.2285156,6.79296875 C59.1191406,6.4609375 58.9580078,6.16210937 58.7451172,5.89648438 C58.5322266,5.63085938 58.2509766,5.42285156 57.9013672,5.27246094 C57.5517578,5.12207031 57.1503906,5.046875 56.6972656,5.046875 C56.1074219,5.046875 55.5898438,5.18359375 55.1445312,5.45703125 C54.6992188,5.73046875 54.3642578,6.08886719 54.1396484,6.53222656 C53.9150391,6.97558594 53.8027344,7.46679688 53.8027344,8.00585937 Z M55.2851562,7.5546875 C55.3203125,7.12890625 55.4619141,6.765625 55.7099609,6.46484375 C55.9580078,6.1640625 56.3007812,6.01367188 56.7382812,6.01367188 C57.1875,6.01367188 57.5263672,6.15820312 57.7548828,6.44726562 C57.9833984,6.73632812 58.1132812,7.10546875 58.1445312,7.5546875 L55.2851562,7.5546875 Z" id="Shape"></path>
+                <path d="M21.6640625,25 C21.8945312,25 22.09375,24.9199219 22.2617188,24.7597656 C22.4296875,24.5996094 22.5136719,24.3886719 22.5136719,24.1269531 L22.5136719,21.3085938 L25.5078125,21.3085938 C25.6992188,21.3085938 25.8496094,21.2451172 25.9589844,21.1181641 C26.0683594,20.9912109 26.1230469,20.8378906 26.1230469,20.6582031 C26.1230469,20.4824219 26.0673828,20.3300781 25.9560547,20.2011719 C25.8447266,20.0722656 25.6953125,20.0078125 25.5078125,20.0078125 L22.5136719,20.0078125 L22.5136719,17.875 L25.6777344,17.875 C25.8652344,17.875 26.0146484,17.8085938 26.1259766,17.6757812 C26.2373047,17.5429688 26.2929688,17.3886719 26.2929688,17.2128906 C26.2929688,17.0371094 26.2373047,16.8828125 26.1259766,16.75 C26.0146484,16.6171875 25.8652344,16.5507812 25.6777344,16.5507812 L21.7167969,16.5507812 C21.4707031,16.5507812 21.2617188,16.6298828 21.0898438,16.7880859 C20.9179688,16.9462891 20.8320312,17.15625 20.8320312,17.4179688 L20.8320312,24.1269531 C20.8320312,24.3886719 20.9130859,24.5996094 21.0751953,24.7597656 C21.2373047,24.9199219 21.4335938,25 21.6640625,25 Z" id="Path"></path>
+                <path d="M29.5625,25.0703125 C30.4882812,25.0703125 31.1269531,24.734375 31.4785156,24.0625 L31.4785156,24.2792969 C31.4785156,24.5097656 31.546875,24.6875 31.6835938,24.8125 C31.8203125,24.9375 31.9941406,25 32.2050781,25 C32.4199219,25 32.5976562,24.9355469 32.7382812,24.8066406 C32.8789062,24.6777344 32.9492188,24.4960938 32.9492188,24.2617188 L32.9492188,19.9375 C32.9492188,19.6796875 32.8808594,19.4794922 32.7441406,19.3369141 C32.6074219,19.1943359 32.4316406,19.1230469 32.2167969,19.1230469 C32.0019531,19.1230469 31.8251953,19.1943359 31.6865234,19.3369141 C31.5478516,19.4794922 31.4785156,19.6796875 31.4785156,19.9375 L31.4785156,22.3984375 C31.4785156,22.8945312 31.3349609,23.296875 31.0478516,23.6054688 C30.7607422,23.9140625 30.4199219,24.0683594 30.0253906,24.0683594 C29.7050781,24.0683594 29.4414062,23.9619141 29.234375,23.7490234 C29.0273437,23.5361328 28.9238281,23.2285156 28.9238281,22.8261719 L28.9238281,19.9375 C28.9238281,19.6796875 28.8554687,19.4794922 28.71875,19.3369141 C28.5820312,19.1943359 28.4101562,19.1230469 28.203125,19.1230469 C27.984375,19.1230469 27.8046875,19.1943359 27.6640625,19.3369141 C27.5234375,19.4794922 27.453125,19.6796875 27.453125,19.9375 L27.453125,22.8261719 C27.453125,23.1972656 27.5107422,23.5292969 27.6259766,23.8222656 C27.7412109,24.1152344 27.8984375,24.3505859 28.0976562,24.5283203 C28.296875,24.7060547 28.5205078,24.8408203 28.7685547,24.9326172 C29.0166016,25.0244141 29.28125,25.0703125 29.5625,25.0703125 Z" id="Path"></path>
+                <path d="M35.2285156,25 C35.4433594,25 35.6201172,24.9287109 35.7587891,24.7861328 C35.8974609,24.6435547 35.9667969,24.4453125 35.9667969,24.1914062 L35.9667969,21.7128906 C35.9667969,21.2167969 36.109375,20.8144531 36.3945312,20.5058594 C36.6796875,20.1972656 37.0175781,20.0429688 37.4082031,20.0429688 C37.7324219,20.0429688 37.9990234,20.1494141 38.2080078,20.3623047 C38.4169922,20.5751953 38.5214844,20.8828125 38.5214844,21.2851562 L38.5214844,24.1914062 C38.5214844,24.4453125 38.5898438,24.6435547 38.7265625,24.7861328 C38.8632812,24.9287109 39.0351562,25 39.2421875,25 C39.4609375,25 39.640625,24.9287109 39.78125,24.7861328 C39.921875,24.6435547 39.9921875,24.4453125 39.9921875,24.1914062 L39.9921875,21.2910156 C39.9921875,20.9160156 39.9355469,20.5820313 39.8222656,20.2890625 C39.7089844,19.9960938 39.5546875,19.7607422 39.359375,19.5830078 C39.1640625,19.4052734 38.9423828,19.2705078 38.6943359,19.1787109 C38.4462891,19.0869141 38.1816406,19.0410156 37.9003906,19.0410156 C37.4433594,19.0410156 37.0498047,19.1269531 36.7197266,19.2988281 C36.3896484,19.4707031 36.1386719,19.7226562 35.9667969,20.0546875 L35.9667969,19.8496094 C35.9667969,19.6191406 35.8984375,19.4404297 35.7617188,19.3134766 C35.625,19.1865234 35.4511719,19.1230469 35.2402344,19.1230469 C35.0253906,19.1230469 34.8476562,19.1875 34.7070312,19.3164062 C34.5664062,19.4453125 34.4960938,19.6269531 34.4960938,19.8613281 L34.4960938,24.1914062 C34.4960938,24.4453125 34.5644531,24.6435547 34.7011719,24.7861328 C34.8378906,24.9287109 35.0136719,25 35.2285156,25 Z" id="Path"></path>
+                <path d="M42.6699219,22.0410156 C42.6699219,21.4511719 42.8134766,20.9804687 43.1005859,20.6289062 C43.3876953,20.2773438 43.7851562,20.1015625 44.2929688,20.1015625 C44.6289062,20.1015625 44.9726562,20.1894531 45.3242188,20.3652344 C45.4140625,20.4160156 45.5058594,20.4414063 45.5996094,20.4414063 C45.7324219,20.4414063 45.8447266,20.3945312 45.9365234,20.3007812 C46.0283203,20.2070312 46.0742188,20.0957031 46.0742188,19.9667969 C46.0742188,19.7871094 45.9863281,19.6347656 45.8105469,19.5097656 C45.3808594,19.1972656 44.8242188,19.0410156 44.140625,19.0410156 C43.5664062,19.0410156 43.0527344,19.1767578 42.5996094,19.4482422 C42.1464844,19.7197266 41.7978516,20.0839844 41.5537109,20.5410156 C41.3095703,20.9980469 41.1875,21.5019531 41.1875,22.0527344 C41.1875,22.9355469 41.4609375,23.6591797 42.0078125,24.2236328 C42.5546875,24.7880859 43.2597656,25.0703125 44.1230469,25.0703125 C44.5527344,25.0703125 44.9238281,25.0205078 45.2363281,24.9208984 C45.5488281,24.8212891 45.7871094,24.6972656 45.9511719,24.5488281 C46.1074219,24.4160156 46.1855469,24.2597656 46.1855469,24.0800781 C46.1855469,23.9472656 46.1425781,23.8330078 46.0566406,23.7373047 C45.9707031,23.6416016 45.8652344,23.59375 45.7402344,23.59375 C45.6386719,23.59375 45.5390625,23.6269531 45.4414062,23.6933594 C45.1367188,23.8964844 44.7441406,23.9980469 44.2636719,23.9980469 C43.7675781,23.9980469 43.3779297,23.8193359 43.0947266,23.4619141 C42.8115234,23.1044922 42.6699219,22.6308594 42.6699219,22.0410156 Z" id="Path"></path>
+                <path d="M49.3847656,25.0527344 L49.5136719,25.0527344 C49.6542969,25.0527344 49.7675781,25.0507812 49.8535156,25.046875 C49.9394531,25.0429688 50.0419922,25.0361328 50.1611328,25.0263672 C50.2802734,25.0166016 50.375,24.9980469 50.4453125,24.9707031 C50.515625,24.9433594 50.5849609,24.9082031 50.6533203,24.8652344 C50.7216797,24.8222656 50.7714844,24.765625 50.8027344,24.6953125 C50.8339844,24.625 50.8496094,24.5429688 50.8496094,24.4492188 C50.8496094,24.3007812 50.8027344,24.171875 50.7089844,24.0625 C50.6152344,23.953125 50.4882812,23.8984375 50.328125,23.8984375 L50.2871094,23.8984375 L49.8710938,23.921875 L49.8066406,23.921875 C49.5683594,23.921875 49.4023438,23.8300781 49.3085938,23.6464844 C49.2148438,23.4628906 49.1679688,23.1835938 49.1679688,22.8085938 L49.1679688,20.1425781 L50.1054688,20.1425781 C50.2734375,20.1425781 50.4003906,20.0996094 50.4863281,20.0136719 C50.5722656,19.9277344 50.6152344,19.8105469 50.6152344,19.6621094 C50.6152344,19.5097656 50.5722656,19.3876953 50.4863281,19.2958984 C50.4003906,19.2041016 50.2753906,19.1582031 50.1113281,19.1582031 L49.1679688,19.1582031 L49.1679688,17.6640625 C49.1679688,17.4375 49.1201172,17.2675781 49.0244141,17.1542969 C48.9287109,17.0410156 48.7949219,16.984375 48.6230469,16.984375 C48.4082031,16.984375 48.2392578,17.0517578 48.1162109,17.1865234 C47.9931641,17.3212891 47.9199219,17.4941406 47.8964844,17.7050781 L47.7207031,19.1582031 L47.1054688,19.1582031 C46.9375,19.1582031 46.8105469,19.2021484 46.7246094,19.2900391 C46.6386719,19.3779297 46.5957031,19.4941406 46.5957031,19.6386719 C46.5957031,19.7910156 46.6416016,19.9130859 46.7333984,20.0048828 C46.8251953,20.0966797 46.9550781,20.1425781 47.1230469,20.1425781 L47.6914062,20.1425781 L47.6914062,23.171875 C47.6914062,23.7773438 47.8408203,24.2412109 48.1396484,24.5634766 C48.4384766,24.8857422 48.8535156,25.0488281 49.3847656,25.0527344 Z" id="Path"></path>
+                <path d="M52.6132812,25 C52.828125,25 53.0048828,24.9277344 53.1435547,24.7832031 C53.2822266,24.6386719 53.3515625,24.4375 53.3515625,24.1796875 L53.3515625,19.9492188 C53.3515625,19.6875 53.2822266,19.484375 53.1435547,19.3398438 C53.0048828,19.1953125 52.828125,19.1230469 52.6132812,19.1230469 C52.3984375,19.1230469 52.2226562,19.1953125 52.0859375,19.3398438 C51.9492188,19.484375 51.8808594,19.6875 51.8808594,19.9492188 L51.8808594,24.1796875 C51.8808594,24.4414062 51.9492188,24.6435547 52.0859375,24.7861328 C52.2226562,24.9287109 52.3984375,25 52.6132812,25 Z M52.6132812,17.9511719 C52.8671875,17.9511719 53.0732422,17.8759766 53.2314453,17.7255859 C53.3896484,17.5751953 53.46875,17.3789062 53.46875,17.1367188 C53.46875,16.8945312 53.390625,16.6992188 53.234375,16.5507812 C53.078125,16.4023438 52.8730469,16.328125 52.6191406,16.328125 C52.3613281,16.328125 52.1533203,16.4023438 51.9951172,16.5507812 C51.8369141,16.6992188 51.7578125,16.8945312 51.7578125,17.1367188 C51.7578125,17.3789062 51.8369141,17.5751953 51.9951172,17.7255859 C52.1533203,17.8759766 52.359375,17.9511719 52.6132812,17.9511719 Z" id="Shape"></path>
+                <path d="M57.6054688,24.0683594 C57.1015625,24.0683594 56.7089844,23.8925781 56.4277344,23.5410156 C56.1464844,23.1894531 56.0058594,22.6972656 56.0058594,22.0644531 C56.0058594,21.4238281 56.1455078,20.9267578 56.4248047,20.5732422 C56.7041016,20.2197266 57.0976562,20.0429688 57.6054688,20.0429688 C58.1132812,20.0429688 58.5078125,20.2207031 58.7890625,20.5761719 C59.0703125,20.9316406 59.2109375,21.4277344 59.2109375,22.0644531 C59.2109375,22.6972656 59.0703125,23.1894531 58.7890625,23.5410156 C58.5078125,23.8925781 58.1132812,24.0683594 57.6054688,24.0683594 Z M57.6054688,25.0703125 C58.0195312,25.0703125 58.3984375,25.0136719 58.7421875,24.9003906 C59.0859375,24.7871094 59.3759766,24.6357422 59.6123047,24.4462891 C59.8486328,24.2568359 60.0478516,24.0322266 60.2099609,23.7724609 C60.3720703,23.5126953 60.4912109,23.2402344 60.5673828,22.9550781 C60.6435547,22.6699219 60.6816406,22.3730469 60.6816406,22.0644531 C60.6816406,21.7402344 60.6416016,21.4296875 60.5615234,21.1328125 C60.4814453,20.8359375 60.3574219,20.5595703 60.1894531,20.3037109 C60.0214844,20.0478516 59.8183594,19.8271484 59.5800781,19.6416016 C59.3417969,19.4560547 59.0537109,19.3095703 58.7158203,19.2021484 C58.3779297,19.0947266 58.0078125,19.0410156 57.6054688,19.0410156 C57.1953125,19.0410156 56.8193359,19.0966797 56.4775391,19.2080078 C56.1357422,19.3193359 55.8466797,19.4707031 55.6103516,19.6621094 C55.3740234,19.8535156 55.1738281,20.078125 55.0097656,20.3359375 C54.8457031,20.59375 54.7255859,20.8681641 54.6494141,21.1591797 C54.5732422,21.4501953 54.5351562,21.7519531 54.5351562,22.0644531 C54.5351562,22.4589844 54.5976562,22.8320312 54.7226562,23.1835938 C54.8476562,23.5351562 55.0302734,23.8535156 55.2705078,24.1386719 C55.5107422,24.4238281 55.8320312,24.6503906 56.234375,24.8183594 C56.6367188,24.9863281 57.09375,25.0703125 57.6054688,25.0703125 Z" id="Shape"></path>
+                <path d="M62.65625,25 C62.8710938,25 63.0478516,24.9287109 63.1865234,24.7861328 C63.3251953,24.6435547 63.3945312,24.4453125 63.3945312,24.1914062 L63.3945312,21.7128906 C63.3945312,21.2167969 63.5371094,20.8144531 63.8222656,20.5058594 C64.1074219,20.1972656 64.4453125,20.0429688 64.8359375,20.0429688 C65.1601562,20.0429688 65.4267578,20.1494141 65.6357422,20.3623047 C65.8447266,20.5751953 65.9492188,20.8828125 65.9492188,21.2851562 L65.9492188,24.1914062 C65.9492188,24.4453125 66.0175781,24.6435547 66.1542969,24.7861328 C66.2910156,24.9287109 66.4628906,25 66.6699219,25 C66.8886719,25 67.0683594,24.9287109 67.2089844,24.7861328 C67.3496094,24.6435547 67.4199219,24.4453125 67.4199219,24.1914062 L67.4199219,21.2910156 C67.4199219,20.9160156 67.3632812,20.5820313 67.25,20.2890625 C67.1367188,19.9960938 66.9824219,19.7607422 66.7871094,19.5830078 C66.5917969,19.4052734 66.3701172,19.2705078 66.1220703,19.1787109 C65.8740234,19.0869141 65.609375,19.0410156 65.328125,19.0410156 C64.8710938,19.0410156 64.4775391,19.1269531 64.1474609,19.2988281 C63.8173828,19.4707031 63.5664062,19.7226562 63.3945312,20.0546875 L63.3945312,19.8496094 C63.3945312,19.6191406 63.3261719,19.4404297 63.1894531,19.3134766 C63.0527344,19.1865234 62.8789062,19.1230469 62.6679688,19.1230469 C62.453125,19.1230469 62.2753906,19.1875 62.1347656,19.3164062 C61.9941406,19.4453125 61.9238281,19.6269531 61.9238281,19.8613281 L61.9238281,24.1914062 C61.9238281,24.4453125 61.9921875,24.6435547 62.1289062,24.7861328 C62.265625,24.9287109 62.4414062,25 62.65625,25 Z" id="Path"></path>
+            </g>
+        </g>
+        <g id="Bi-encoder" stroke-width="1" transform="translate(160, 50)">
+            <g id="bi-encoder" transform="translate(20, 340)" fill="#333333" fill-rule="nonzero">
+                <path d="M98.7011719,12.0957031 C99.3072917,12.0957031 99.8370768,11.9384766 100.290527,11.6240234 C100.743978,11.3095703 101.085775,10.8846029 101.315918,10.3491211 C101.546061,9.81363932 101.661133,9.2109375 101.661133,8.54101562 C101.661133,7.46549479 101.384277,6.61783854 100.830566,5.99804688 C100.276855,5.37825521 99.5351562,5.06835937 98.6054688,5.06835937 C97.6119792,5.06835937 96.8919271,5.43294271 96.4453125,6.16210937 L96.4453125,3.06542969 C96.4453125,2.76920573 96.3644206,2.54020182 96.2026367,2.37841797 C96.0408529,2.21663411 95.8323568,2.13574219 95.5771484,2.13574219 C95.3310547,2.13574219 95.1282552,2.21777344 94.96875,2.38183594 C94.8092448,2.54589844 94.7294922,2.77376302 94.7294922,3.06542969 L94.7294922,11.0292969 C94.7294922,11.3255208 94.8138021,11.5613607 94.9824219,11.7368164 C95.1510417,11.9122721 95.3583984,12 95.6044922,12 C95.8597005,12 96.0636393,11.9282227 96.2163086,11.784668 C96.3689779,11.6411133 96.4453125,11.4303385 96.4453125,11.1523438 L96.4453125,10.9130859 C96.8782552,11.7014974 97.6302083,12.0957031 98.7011719,12.0957031 Z M98.2636719,10.9472656 C97.6074219,10.9472656 97.1414388,10.7467448 96.8657227,10.3457031 C96.5900065,9.94466146 96.4521484,9.35221354 96.4521484,8.56835938 C96.4521484,8.12630208 96.5102539,7.73551432 96.6264648,7.39599609 C96.7426758,7.05647786 96.9386393,6.77734375 97.2143555,6.55859375 C97.4900716,6.33984375 97.8330078,6.23046875 98.2431641,6.23046875 C98.4938151,6.23046875 98.719401,6.27490234 98.9199219,6.36376953 C99.1204427,6.45263672 99.2845052,6.56998698 99.4121094,6.71582031 C99.5397135,6.86165365 99.6456706,7.03710937 99.7299805,7.2421875 C99.8142904,7.44726563 99.8746745,7.65690104 99.9111328,7.87109375 C99.9475911,8.08528646 99.9658203,8.31315104 99.9658203,8.5546875 C99.9658203,8.7734375 99.9498698,8.98876953 99.9179688,9.20068359 C99.8860677,9.41259766 99.8291016,9.62565104 99.7470703,9.83984375 C99.6650391,10.0540365 99.5613607,10.2420247 99.4360352,10.4038086 C99.3107096,10.5655924 99.1477865,10.6966146 98.9472656,10.796875 C98.7467448,10.8971354 98.5188802,10.9472656 98.2636719,10.9472656 Z" id="Shape"></path>
+                <path d="M104.005859,12 C104.25651,12 104.462728,11.9156901 104.624512,11.7470703 C104.786296,11.5784505 104.867188,11.34375 104.867188,11.0429688 L104.867188,6.10742188 C104.867188,5.80208333 104.786296,5.56510417 104.624512,5.39648438 C104.462728,5.22786458 104.25651,5.14355469 104.005859,5.14355469 C103.755208,5.14355469 103.55013,5.22786458 103.390625,5.39648438 C103.23112,5.56510417 103.151367,5.80208333 103.151367,6.10742188 L103.151367,11.0429688 C103.151367,11.3483073 103.23112,11.5841471 103.390625,11.7504883 C103.55013,11.9168294 103.755208,12 104.005859,12 Z M104.005859,3.77636719 C104.302083,3.77636719 104.54248,3.68863932 104.727051,3.51318359 C104.911621,3.33772786 105.003906,3.10872396 105.003906,2.82617187 C105.003906,2.54361979 104.91276,2.31575521 104.730469,2.14257813 C104.548177,1.96940104 104.308919,1.8828125 104.012695,1.8828125 C103.711914,1.8828125 103.469238,1.96940104 103.284668,2.14257813 C103.100098,2.31575521 103.007812,2.54361979 103.007812,2.82617187 C103.007812,3.10872396 103.100098,3.33772786 103.284668,3.51318359 C103.469238,3.68863932 103.709635,3.77636719 104.005859,3.77636719 Z" id="Shape"></path>
+                <path d="M106.630859,9.02636719 L109.37207,9.02636719 C109.572591,9.02636719 109.741211,8.9625651 109.87793,8.83496094 C110.014648,8.70735677 110.083008,8.54785156 110.083008,8.35644531 C110.083008,8.16048177 110.016927,7.99983724 109.884766,7.87451172 C109.752604,7.7491862 109.581706,7.68652344 109.37207,7.68652344 L106.665039,7.68652344 C106.455404,7.68652344 106.284505,7.7491862 106.152344,7.87451172 C106.020182,7.99983724 105.954102,8.16048177 105.954102,8.35644531 C105.954102,8.54785156 106.019043,8.70735677 106.148926,8.83496094 C106.278809,8.9625651 106.439453,9.02636719 106.630859,9.02636719 Z" id="Path"></path>
+                <path d="M110.882812,8.50683594 C110.882812,9.63248698 111.20752,10.5097656 111.856934,11.1386719 C112.506348,11.7675781 113.371094,12.0820312 114.451172,12.0820312 C115.41276,12.0820312 116.253581,11.7789714 116.973633,11.1728516 C117.119466,11.0452474 117.192383,10.8902995 117.192383,10.7080078 C117.192383,10.5485026 117.137695,10.4049479 117.02832,10.2773437 C116.918945,10.1497396 116.79362,10.0859375 116.652344,10.0859375 C116.556641,10.0859375 116.467773,10.1178385 116.385742,10.1816406 C115.743164,10.6419271 115.139323,10.8720703 114.574219,10.8720703 C113.330078,10.8720703 112.671549,10.2294922 112.598633,8.94433594 L116.918945,8.94433594 C117.083008,8.94433594 117.206055,8.88964844 117.288086,8.78027344 C117.370117,8.67089844 117.411133,8.52734375 117.411133,8.34960937 C117.406576,7.8984375 117.340495,7.47916667 117.212891,7.09179687 C117.085286,6.70442708 116.897298,6.35579427 116.648926,6.04589844 C116.400553,5.7360026 116.072428,5.49332682 115.664551,5.31787109 C115.256673,5.14241536 114.788411,5.0546875 114.259766,5.0546875 C113.571615,5.0546875 112.967773,5.21419271 112.448242,5.53320312 C111.928711,5.85221354 111.537923,6.27034505 111.275879,6.78759766 C111.013835,7.30485026 110.882812,7.87792969 110.882812,8.50683594 Z M112.612305,7.98046875 C112.65332,7.48372396 112.818522,7.05989583 113.10791,6.70898438 C113.397298,6.35807292 113.797201,6.18261719 114.307617,6.18261719 C114.831706,6.18261719 115.227051,6.35123698 115.493652,6.68847656 C115.760254,7.02571615 115.911784,7.45638021 115.948242,7.98046875 L112.612305,7.98046875 Z" id="Shape"></path>
+                <path d="M119.605469,12 C119.85612,12 120.062337,11.9168294 120.224121,11.7504883 C120.385905,11.5841471 120.466797,11.3528646 120.466797,11.0566406 L120.466797,8.16503906 C120.466797,7.58626302 120.633138,7.11686198 120.96582,6.75683594 C121.298503,6.3968099 121.692708,6.21679688 122.148438,6.21679688 C122.526693,6.21679688 122.837728,6.34098307 123.081543,6.58935547 C123.325358,6.83772786 123.447266,7.19661458 123.447266,7.66601562 L123.447266,11.0566406 C123.447266,11.3528646 123.527018,11.5841471 123.686523,11.7504883 C123.846029,11.9168294 124.046549,12 124.288086,12 C124.543294,12 124.75293,11.9168294 124.916992,11.7504883 C125.081055,11.5841471 125.163086,11.3528646 125.163086,11.0566406 L125.163086,7.67285156 C125.163086,7.23535156 125.097005,6.84570313 124.964844,6.50390625 C124.832682,6.16210938 124.652669,5.88753255 124.424805,5.68017578 C124.19694,5.47281901 123.938314,5.31559245 123.648926,5.20849609 C123.359538,5.10139974 123.050781,5.04785156 122.722656,5.04785156 C122.189453,5.04785156 121.730306,5.14811198 121.345215,5.34863281 C120.960124,5.54915365 120.667318,5.84309896 120.466797,6.23046875 L120.466797,5.99121094 C120.466797,5.72233073 120.387044,5.51383464 120.227539,5.36572266 C120.068034,5.21761068 119.865234,5.14355469 119.619141,5.14355469 C119.36849,5.14355469 119.161133,5.21875 118.99707,5.36914062 C118.833008,5.51953125 118.750977,5.73144531 118.750977,6.00488281 L118.750977,11.0566406 C118.750977,11.3528646 118.830729,11.5841471 118.990234,11.7504883 C119.14974,11.9168294 119.354818,12 119.605469,12 Z" id="Path"></path>
+                <path d="M128.287109,8.54785156 C128.287109,7.85970052 128.45459,7.31054687 128.789551,6.90039063 C129.124512,6.49023437 129.588216,6.28515625 130.180664,6.28515625 C130.572591,6.28515625 130.973633,6.38769531 131.383789,6.59277344 C131.488607,6.65201823 131.595703,6.68164063 131.705078,6.68164063 C131.860026,6.68164063 131.991048,6.62695312 132.098145,6.51757813 C132.205241,6.40820312 132.258789,6.27832031 132.258789,6.12792969 C132.258789,5.91829427 132.15625,5.7405599 131.951172,5.59472656 C131.44987,5.23014323 130.800456,5.04785156 130.00293,5.04785156 C129.333008,5.04785156 128.733724,5.20621745 128.205078,5.52294922 C127.676432,5.83968099 127.269694,6.26464844 126.984863,6.79785156 C126.700033,7.33105469 126.557617,7.91894531 126.557617,8.56152344 C126.557617,9.59147135 126.876628,10.4357096 127.514648,11.0942383 C128.152669,11.7527669 128.97526,12.0820312 129.982422,12.0820312 C130.483724,12.0820312 130.916667,12.0239258 131.28125,11.9077148 C131.645833,11.7915039 131.923828,11.6468099 132.115234,11.4736328 C132.297526,11.3186849 132.388672,11.1363932 132.388672,10.9267578 C132.388672,10.7718099 132.338542,10.6385091 132.238281,10.5268555 C132.138021,10.4152018 132.014974,10.359375 131.869141,10.359375 C131.750651,10.359375 131.63444,10.398112 131.520508,10.4755859 C131.165039,10.7125651 130.707031,10.8310547 130.146484,10.8310547 C129.567708,10.8310547 129.113118,10.6225586 128.782715,10.2055664 C128.452311,9.78857422 128.287109,9.2360026 128.287109,8.54785156 Z" id="Path"></path>
+                <path d="M136.845703,10.9130859 C136.257812,10.9130859 135.799805,10.7080078 135.47168,10.2978516 C135.143555,9.88769531 134.979492,9.31347656 134.979492,8.57519531 C134.979492,7.82779948 135.142415,7.24788411 135.468262,6.83544922 C135.794108,6.42301432 136.253255,6.21679688 136.845703,6.21679688 C137.438151,6.21679688 137.898438,6.42415365 138.226562,6.83886719 C138.554688,7.25358073 138.71875,7.83235677 138.71875,8.57519531 C138.71875,9.31347656 138.554688,9.88769531 138.226562,10.2978516 C137.898438,10.7080078 137.438151,10.9130859 136.845703,10.9130859 Z M136.845703,12.0820312 C137.328776,12.0820312 137.770833,12.0159505 138.171875,11.8837891 C138.572917,11.7516276 138.911296,11.5750326 139.187012,11.3540039 C139.462728,11.1329753 139.69515,10.870931 139.884277,10.5678711 C140.073405,10.2648112 140.212402,9.9469401 140.30127,9.61425781 C140.390137,9.28157552 140.43457,8.93522135 140.43457,8.57519531 C140.43457,8.1969401 140.387858,7.83463542 140.294434,7.48828125 C140.201009,7.14192708 140.056315,6.8194987 139.860352,6.52099609 C139.664388,6.22249349 139.427409,5.96500651 139.149414,5.74853516 C138.871419,5.5320638 138.535319,5.36116536 138.141113,5.23583984 C137.746908,5.11051432 137.315104,5.04785156 136.845703,5.04785156 C136.367188,5.04785156 135.928548,5.11279297 135.529785,5.24267578 C135.131022,5.37255859 134.793783,5.54915365 134.518066,5.77246094 C134.24235,5.99576823 134.008789,6.2578125 133.817383,6.55859375 C133.625977,6.859375 133.48584,7.17952474 133.396973,7.51904297 C133.308105,7.8585612 133.263672,8.21061198 133.263672,8.57519531 C133.263672,9.03548177 133.336589,9.47070312 133.482422,9.88085938 C133.628255,10.2910156 133.841309,10.6624349 134.121582,10.9951172 C134.401855,11.3277995 134.776693,11.5921224 135.246094,11.7880859 C135.715495,11.9840495 136.248698,12.0820312 136.845703,12.0820312 Z" id="Shape"></path>
+                <path d="M144.488281,12.0957031 C145.559245,12.0957031 146.313477,11.7014974 146.750977,10.9130859 L146.750977,11.1523438 C146.750977,11.4303385 146.826172,11.6411133 146.976562,11.784668 C147.126953,11.9282227 147.332031,12 147.591797,12 C147.837891,12 148.044108,11.9122721 148.210449,11.7368164 C148.37679,11.5613607 148.459961,11.3255208 148.459961,11.0292969 L148.459961,3.06542969 C148.459961,2.76920573 148.381348,2.54020182 148.224121,2.37841797 C148.066895,2.21663411 147.862956,2.13574219 147.612305,2.13574219 C147.357096,2.13574219 147.1486,2.21663411 146.986816,2.37841797 C146.825033,2.54020182 146.744141,2.76920573 146.744141,3.06542969 L146.744141,6.16210937 C146.302083,5.43294271 145.582031,5.06835937 144.583984,5.06835937 C143.658854,5.06835937 142.918294,5.37825521 142.362305,5.99804688 C141.806315,6.61783854 141.52832,7.46549479 141.52832,8.54101562 C141.52832,9.2109375 141.643392,9.81363932 141.873535,10.3491211 C142.103678,10.8846029 142.445475,11.3095703 142.898926,11.6240234 C143.352376,11.9384766 143.882161,12.0957031 144.488281,12.0957031 Z M144.932617,10.9472656 C144.618164,10.9472656 144.344727,10.8732096 144.112305,10.7250977 C143.879883,10.5769857 143.702148,10.3798828 143.579102,10.1337891 C143.456055,9.88769531 143.366048,9.6336263 143.309082,9.37158203 C143.252116,9.10953776 143.223633,8.83723958 143.223633,8.5546875 C143.223633,8.24934896 143.254395,7.96451823 143.315918,7.70019531 C143.377441,7.4358724 143.472005,7.19091797 143.599609,6.96533203 C143.727214,6.73974609 143.906087,6.5608724 144.13623,6.42871094 C144.366374,6.29654948 144.636393,6.23046875 144.946289,6.23046875 C145.356445,6.23046875 145.699382,6.33984375 145.975098,6.55859375 C146.250814,6.77734375 146.446777,7.05647786 146.562988,7.39599609 C146.679199,7.73551432 146.737305,8.12630208 146.737305,8.56835938 C146.737305,9.35221354 146.599447,9.94466146 146.32373,10.3457031 C146.048014,10.7467448 145.58431,10.9472656 144.932617,10.9472656 Z" id="Shape"></path>
+                <path d="M149.90918,8.50683594 C149.90918,9.63248698 150.233887,10.5097656 150.883301,11.1386719 C151.532715,11.7675781 152.397461,12.0820312 153.477539,12.0820312 C154.439128,12.0820312 155.279948,11.7789714 156,11.1728516 C156.145833,11.0452474 156.21875,10.8902995 156.21875,10.7080078 C156.21875,10.5485026 156.164062,10.4049479 156.054688,10.2773437 C155.945312,10.1497396 155.819987,10.0859375 155.678711,10.0859375 C155.583008,10.0859375 155.494141,10.1178385 155.412109,10.1816406 C154.769531,10.6419271 154.16569,10.8720703 153.600586,10.8720703 C152.356445,10.8720703 151.697917,10.2294922 151.625,8.94433594 L155.945312,8.94433594 C156.109375,8.94433594 156.232422,8.88964844 156.314453,8.78027344 C156.396484,8.67089844 156.4375,8.52734375 156.4375,8.34960937 C156.432943,7.8984375 156.366862,7.47916667 156.239258,7.09179687 C156.111654,6.70442708 155.923665,6.35579427 155.675293,6.04589844 C155.426921,5.7360026 155.098796,5.49332682 154.690918,5.31787109 C154.28304,5.14241536 153.814779,5.0546875 153.286133,5.0546875 C152.597982,5.0546875 151.994141,5.21419271 151.474609,5.53320312 C150.955078,5.85221354 150.56429,6.27034505 150.302246,6.78759766 C150.040202,7.30485026 149.90918,7.87792969 149.90918,8.50683594 Z M151.638672,7.98046875 C151.679688,7.48372396 151.844889,7.05989583 152.134277,6.70898438 C152.423665,6.35807292 152.823568,6.18261719 153.333984,6.18261719 C153.858073,6.18261719 154.253418,6.35123698 154.52002,6.68847656 C154.786621,7.02571615 154.938151,7.45638021 154.974609,7.98046875 L151.638672,7.98046875 Z" id="Shape"></path>
+                <path d="M158.638672,12 C158.89388,12 159.102376,11.9202474 159.26416,11.7607422 C159.425944,11.601237 159.506836,11.3733724 159.506836,11.0771484 L159.506836,8.19238281 C159.506836,7.8688151 159.559245,7.5953776 159.664062,7.37207031 C159.76888,7.14876302 159.916992,6.98242187 160.108398,6.87304687 C160.299805,6.76367188 160.499186,6.68733724 160.706543,6.64404297 C160.9139,6.6007487 161.152018,6.57910156 161.420898,6.57910156 C161.598633,6.57910156 161.739909,6.5061849 161.844727,6.36035156 C161.949544,6.21451823 162.001953,6.04589844 162.001953,5.85449219 C162.001953,5.65397135 161.944987,5.48307292 161.831055,5.34179688 C161.717122,5.20052083 161.555339,5.12988281 161.345703,5.12988281 C160.917318,5.12988281 160.531087,5.27685547 160.187012,5.57080078 C159.842936,5.86474609 159.609375,6.21679688 159.486328,6.62695312 L159.506836,5.92285156 C159.511393,5.6813151 159.428223,5.49104818 159.257324,5.35205078 C159.086426,5.21305339 158.884766,5.14355469 158.652344,5.14355469 C158.415365,5.14355469 158.211426,5.21533203 158.040527,5.35888672 C157.869629,5.50244141 157.78418,5.70410156 157.78418,5.96386719 L157.78418,11.0634766 C157.78418,11.3642578 157.865072,11.5955404 158.026855,11.7573242 C158.188639,11.9191081 158.392578,12 158.638672,12 Z" id="Path"></path>
+            </g>
+            <g id="Body-@yellow" transform="translate(20, 150)">
+                <g id="vLLM-dash-box-Copy" transform="translate(0, 22)" xlink:href="#path-2">
+                    <path d="M8,0 L110,0 C114.418278,0 118,3.581722 118,8 L118,120 C118,124.418278 114.418278,128 110,128 L8,128 C3.581722,128 0,124.418278 0,120 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="vLLM-dash-box" stroke="#999999" stroke-width="1" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1.2,3"></path>
+                    <text id="vLLM" font-family="Nunito-SemiBold, Nunito" font-size="10" font-weight="500" line-spacing="10" fill="#7D7D7D">
+                        <tspan x="6.5" y="17">LLM</tspan>
+                    </text>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path id="5-r_w-arrow" d="M77.5,-0.1 C77.5552285,-0.1 77.6,-0.055228475 77.6,0 L77.6,15.277 L80,15.2777778 L77.5,20.2777778 L75,15.2777778 L77.4,15.277 L77.4,0 C77.4,-0.055228475 77.4447715,-0.1 77.5,-0.1 Z" transform="translate(77.5, 10) rotate(180) translate(-77.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.5" y1="19.3708333" x2="77.5" y2="1.125" id="5-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.5, 10.5625) rotate(180) translate(-77.5, -10.5625)"></line>
+                        <path id="5-m_w-arrow" d="M32.5,-0.1 C32.5552285,-0.1 32.6,-0.055228475 32.6,0 L32.6,15.277 L35,15.2777778 L32.5,20.2777778 L30,15.2777778 L32.4,15.277 L32.4,0 C32.4,-0.055228475 32.4447715,-0.1 32.5,-0.1 Z" transform="translate(32.5, 10) rotate(180) translate(-32.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="32.5" y1="19.3708333" x2="32.5" y2="1.125" id="5-m" stroke="#8E8E8E" stroke-linecap="round" transform="translate(32.5, 10.5625) rotate(180) translate(-32.5, -10.5625)"></line>
+                        <path id="5-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,15.277 L3,15.2777778 L0.5,20.2777778 L-2,15.2777778 L0.4,15.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 10) rotate(180) translate(-0.5, -10)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.5" y1="19.3708333" x2="0.5" y2="1.125" id="5-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 10.5625) rotate(180) translate(-0.5, -10.5625)"></line>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path id="4-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="4-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="4-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4821429" x2="0.499999254" y2="1.5" id="4-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.75) rotate(180) translate(-0.5, -8.75)"></line>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path id="3-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="3-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="3-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4821429" x2="0.499999254" y2="1.5" id="3-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.75) rotate(180) translate(-0.5, -8.75)"></line>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path id="2-r_w-arrow" d="M77.4999,-0.1 C77.5551285,-0.1 77.5999,-0.055228475 77.5999,0 L77.599,11.277 L79.9999,11.2777778 L77.4999,16.2777778 L74.9999,11.2777778 L77.399,11.277 L77.3999,0 C77.3999,-0.055228475 77.4446715,-0.1 77.4999,-0.1 Z" transform="translate(77.4999, 8) rotate(180) translate(-77.4999, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="77.4998993" y1="15.4916667" x2="77.4998993" y2="0.75" id="2-r" stroke="#8E8E8E" stroke-linecap="round" transform="translate(77.4999, 8.375) rotate(180) translate(-77.4999, -8.375)"></line>
+                        <path id="2-l_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,11.277 L3,11.2777778 L0.5,16.2777778 L-2,11.2777778 L0.4,11.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 8) rotate(180) translate(-0.5, -8)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.499999254" y1="15.4916667" x2="0.499999254" y2="0.75" id="2-l" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 8.375) rotate(180) translate(-0.5, -8.375)"></line>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path id="1_w-arrow" d="M0.5,-0.1 C0.555228475,-0.1 0.6,-0.055228475 0.6,0 L0.6,36.277 L3,36.2777778 L0.5,41.2777778 L-2,36.2777778 L0.4,36.277 L0.4,0 C0.4,-0.055228475 0.444771525,-0.1 0.5,-0.1 Z" transform="translate(0.5, 21) rotate(180) translate(-0.5, -21)" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        <line x1="0.5" y1="40.9166667" x2="0.5" y2="3" id="1" stroke="#8E8E8E" stroke-linecap="round" transform="translate(0.5, 22.5) rotate(180) translate(-0.5, -22.5)"></line>
+                    </g>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-13" transform="translate(8, 158)" xlink:href="#path-3" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="23" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-14" transform="translate(40, 158)" xlink:href="#path-4" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="23" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-15" transform="translate(86, 158)" xlink:href="#path-5" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="23" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-2" transform="translate(8, 42)" xlink:href="#path-6" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="101" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-4" transform="translate(8, 70)" xlink:href="#path-7" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="101" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-8" transform="translate(8, 98)" xlink:href="#path-8" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="101" height="11" rx="3"></rect>
+                </g>
+                <g id="Rectangle-@01_yellow-Copy-6" transform="translate(8, 126)" xlink:href="#path-9" fill="#FDB515" fill-opacity="0.6" stroke="#FDB515" stroke-opacity="0.8">
+                    <rect id="Rectangle" x="0.5" y="0.5" width="101" height="11" rx="3"></rect>
+                </g>
+                <g id="..." transform="translate(70.7, 163)" fill="#8E8E8E">
+                    <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                    <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                </g>
+            </g>
+            <g id="Body-@blue" transform="translate(158, 150)">
+                <g id="vLLM-dash-box-Copy" transform="translate(0, 22)" xlink:href="#path-10">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" id="vLLM-dash-box" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L77.999,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L76.999,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M32.5,-0.277777778 L35,4.72222222 L32.999,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L31.999,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="L5" transform="translate(8, 158)">
+                    <g id="1" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+                <g id="L4" transform="translate(8, 126)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 98)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 70)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L1" transform="translate(8, 42)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(0, 80)">
+                <g id="dash-box" fill="#9172E2">
+                    <path d="M8,0 L288,0 C292.418278,0 296,3.581722 296,8 L296,74 C296,78.418278 292.418278,82 288,82 L8,82 C3.581722,82 0,78.418278 0,74 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill-opacity="0.1"></path>
+                    <path d="M10.9746435,80.9999998 L10.974,81.9999998 L8.974,81.9999998 L8.97464335,80.9999998 L10.9746435,80.9999998 Z M14.9746436,80.9999998 L14.974,81.9999998 L12.974,81.9999998 L12.9746433,80.9999998 L14.9746436,80.9999998 Z M18.9746435,80.9999998 L18.974,81.9999998 L16.974,81.9999998 L16.9746433,80.9999998 L18.9746435,80.9999998 Z M22.9746433,80.9999998 L22.974,81.9999998 L20.974,81.9999998 L20.9746433,80.9999998 L22.9746433,80.9999998 Z M26.9746435,80.9999998 L26.974,81.9999998 L24.974,81.9999998 L24.9746433,80.9999998 L26.9746435,80.9999998 Z M30.9746434,80.9999998 L30.974,81.9999998 L28.974,81.9999998 L28.9746433,80.9999998 L30.9746434,80.9999998 Z M34.9746435,80.9999998 L34.974,81.9999998 L32.974,81.9999998 L32.9746433,80.9999998 L34.9746435,80.9999998 Z M38.9746434,80.9999998 L38.974,81.9999998 L36.974,81.9999998 L36.9746433,80.9999998 L38.9746434,80.9999998 Z M42.9746435,80.9999998 L42.974,81.9999998 L40.974,81.9999998 L40.9746433,80.9999998 L42.9746435,80.9999998 Z M46.9746434,80.9999998 L46.974,81.9999998 L44.974,81.9999998 L44.9746433,80.9999998 L46.9746434,80.9999998 Z M50.9746435,80.9999998 L50.974,81.9999998 L48.974,81.9999998 L48.9746433,80.9999998 L50.9746435,80.9999998 Z M54.9746434,80.9999998 L54.974,81.9999998 L52.974,81.9999998 L52.9746433,80.9999998 L54.9746434,80.9999998 Z M58.9746435,80.9999998 L58.974,81.9999998 L56.974,81.9999998 L56.9746433,80.9999998 L58.9746435,80.9999998 Z M62.9746434,80.9999998 L62.974,81.9999998 L60.974,81.9999998 L60.9746433,80.9999998 L62.9746434,80.9999998 Z M66.9746436,80.9999998 L66.974,81.9999998 L64.974,81.9999998 L64.9746433,80.9999998 L66.9746436,80.9999998 Z M70.9746434,80.9999998 L70.974,81.9999998 L68.974,81.9999998 L68.9746433,80.9999998 L70.9746434,80.9999998 Z M74.9746436,80.9999998 L74.974,81.9999998 L72.974,81.9999998 L72.9746433,80.9999998 L74.9746436,80.9999998 Z M78.9746434,80.9999998 L78.974,81.9999998 L76.974,81.9999998 L76.9746433,80.9999998 L78.9746434,80.9999998 Z M82.9746436,80.9999998 L82.974,81.9999998 L80.974,81.9999998 L80.9746433,80.9999998 L82.9746436,80.9999998 Z M86.9746435,80.9999998 L86.974,81.9999998 L84.974,81.9999998 L84.9746433,80.9999998 L86.9746435,80.9999998 Z M90.9746436,80.9999998 L90.974,81.9999998 L88.974,81.9999998 L88.9746433,80.9999998 L90.9746436,80.9999998 Z M94.9746435,80.9999998 L94.974,81.9999998 L92.974,81.9999998 L92.9746433,80.9999998 L94.9746435,80.9999998 Z M98.9746436,80.9999998 L98.974,81.9999998 L96.974,81.9999998 L96.9746433,80.9999998 L98.9746436,80.9999998 Z M102.974643,80.9999998 L102.974,81.9999998 L100.974,81.9999998 L100.974643,80.9999998 L102.974643,80.9999998 Z M106.974644,80.9999998 L106.974,81.9999998 L104.974,81.9999998 L104.974643,80.9999998 L106.974644,80.9999998 Z M110.974643,80.9999998 L110.974,81.9999998 L108.974,81.9999998 L108.974643,80.9999998 L110.974643,80.9999998 Z M114.974643,80.9999998 L114.974,81.9999998 L112.974,81.9999998 L112.974643,80.9999998 L114.974643,80.9999998 Z M118.974644,80.9999998 L118.974,81.9999998 L116.974,81.9999998 L116.974643,80.9999998 L118.974644,80.9999998 Z M122.974643,80.9999998 L122.974,81.9999998 L120.974,81.9999998 L120.974643,80.9999998 L122.974643,80.9999998 Z M126.974644,80.9999998 L126.974,81.9999998 L124.974,81.9999998 L124.974643,80.9999998 L126.974644,80.9999998 Z M130.974643,80.9999998 L130.974,81.9999998 L128.974,81.9999998 L128.974643,80.9999998 L130.974643,80.9999998 Z M134.974644,80.9999998 L134.974,81.9999998 L132.974,81.9999998 L132.974643,80.9999998 L134.974644,80.9999998 Z M138.974643,80.9999998 L138.974,81.9999998 L136.974,81.9999998 L136.974643,80.9999998 L138.974643,80.9999998 Z M142.974644,80.9999998 L142.974,81.9999998 L140.974,81.9999998 L140.974643,80.9999998 L142.974644,80.9999998 Z M146.974643,80.9999998 L146.974,81.9999998 L144.974,81.9999998 L144.974643,80.9999998 L146.974643,80.9999998 Z M150.974643,80.9999998 L150.974,81.9999998 L148.974,81.9999998 L148.974643,80.9999998 L150.974643,80.9999998 Z M154.974643,80.9999998 L154.974,81.9999998 L152.974,81.9999998 L152.974643,80.9999998 L154.974643,80.9999998 Z M158.974643,80.9999998 L158.974,81.9999998 L156.974,81.9999998 L156.974643,80.9999998 L158.974643,80.9999998 Z M162.974643,80.9999998 L162.974,81.9999998 L160.974,81.9999998 L160.974643,80.9999998 L162.974643,80.9999998 Z M166.974643,80.9999998 L166.974,81.9999998 L164.974,81.9999998 L164.974643,80.9999998 L166.974643,80.9999998 Z M170.974643,80.9999998 L170.974,81.9999998 L168.974,81.9999998 L168.974643,80.9999998 L170.974643,80.9999998 Z M174.974643,80.9999998 L174.974,81.9999998 L172.974,81.9999998 L172.974643,80.9999998 L174.974643,80.9999998 Z M178.974643,80.9999998 L178.974,81.9999998 L176.974,81.9999998 L176.974643,80.9999998 L178.974643,80.9999998 Z M182.974643,80.9999998 L182.974,81.9999998 L180.974,81.9999998 L180.974643,80.9999998 L182.974643,80.9999998 Z M186.974643,80.9999998 L186.974,81.9999998 L184.974,81.9999998 L184.974643,80.9999998 L186.974643,80.9999998 Z M190.974643,80.9999998 L190.974,81.9999998 L188.974,81.9999998 L188.974643,80.9999998 L190.974643,80.9999998 Z M194.974643,80.9999998 L194.974,81.9999998 L192.974,81.9999998 L192.974643,80.9999998 L194.974643,80.9999998 Z M198.974643,80.9999998 L198.974,81.9999998 L196.974,81.9999998 L196.974643,80.9999998 L198.974643,80.9999998 Z M202.974643,80.9999998 L202.974,81.9999998 L200.974,81.9999998 L200.974643,80.9999998 L202.974643,80.9999998 Z M206.974643,80.9999998 L206.974,81.9999998 L204.974,81.9999998 L204.974643,80.9999998 L206.974643,80.9999998 Z M210.974643,80.9999998 L210.974,81.9999998 L208.974,81.9999998 L208.974643,80.9999998 L210.974643,80.9999998 Z M214.974643,80.9999998 L214.974,81.9999998 L212.974,81.9999998 L212.974643,80.9999998 L214.974643,80.9999998 Z M218.974643,80.9999998 L218.974,81.9999998 L216.974,81.9999998 L216.974643,80.9999998 L218.974643,80.9999998 Z M222.974643,80.9999998 L222.974,81.9999998 L220.974,81.9999998 L220.974643,80.9999998 L222.974643,80.9999998 Z M226.974643,80.9999998 L226.974,81.9999998 L224.974,81.9999998 L224.974643,80.9999998 L226.974643,80.9999998 Z M230.974643,80.9999998 L230.974,81.9999998 L228.974,81.9999998 L228.974643,80.9999998 L230.974643,80.9999998 Z M234.974643,80.9999998 L234.974,81.9999998 L232.974,81.9999998 L232.974643,80.9999998 L234.974643,80.9999998 Z M238.974643,80.9999998 L238.974,81.9999998 L236.974,81.9999998 L236.974643,80.9999998 L238.974643,80.9999998 Z M242.974643,80.9999998 L242.974,81.9999998 L240.974,81.9999998 L240.974643,80.9999998 L242.974643,80.9999998 Z M246.974643,80.9999998 L246.974,81.9999998 L244.974,81.9999998 L244.974643,80.9999998 L246.974643,80.9999998 Z M250.974643,80.9999998 L250.974,81.9999998 L248.974,81.9999998 L248.974643,80.9999998 L250.974643,80.9999998 Z M254.974643,80.9999998 L254.974,81.9999998 L252.974,81.9999998 L252.974643,80.9999998 L254.974643,80.9999998 Z M258.974643,80.9999998 L258.974,81.9999998 L256.974,81.9999998 L256.974643,80.9999998 L258.974643,80.9999998 Z M262.974643,80.9999998 L262.974,81.9999998 L260.974,81.9999998 L260.974643,80.9999998 L262.974643,80.9999998 Z M266.974643,80.9999998 L266.974,81.9999998 L264.974,81.9999998 L264.974643,80.9999998 L266.974643,80.9999998 Z M270.974643,80.9999998 L270.974,81.9999998 L268.974,81.9999998 L268.974643,80.9999998 L270.974643,80.9999998 Z M274.974643,80.9999998 L274.974,81.9999998 L272.974,81.9999998 L272.974643,80.9999998 L274.974643,80.9999998 Z M278.974643,80.9999998 L278.974,81.9999998 L276.974,81.9999998 L276.974643,80.9999998 L278.974643,80.9999998 Z M282.974643,80.9999998 L282.974,81.9999998 L280.974,81.9999998 L280.974643,80.9999998 L282.974643,80.9999998 Z M286.974643,80.9999998 L286.974,81.9999998 L284.974,81.9999998 L284.974643,80.9999998 L286.974643,80.9999998 Z M290.489743,80.544574 L290.845614,81.4791091 C290.230519,81.7132701 289.579078,81.8737323 288.902082,81.9497049 L288.79022,80.9559811 C289.375654,80.8903126 289.945502,80.7517531 290.489743,80.544574 Z M5.41029967,80.5056361 C5.95157753,80.7213183 6.51934132,80.8686603 7.10343083,80.9432289 L6.97697115,81.9352007 C6.30134705,81.8489616 5.65205616,81.6784493 5.03997173,81.4345371 L5.41029967,80.5056361 Z M293.388302,78.4686843 L294.157467,79.1077344 C293.730317,79.6220994 293.239943,80.0820804 292.698265,80.4757556 L292.11069,79.6665857 C292.584521,79.3221774 293.013992,78.9194515 293.388302,78.4686843 Z M2.56157145,78.4076246 C2.93082185,78.8625901 3.35581002,79.2701375 3.82581503,79.6198877 L3.22929764,80.422488 C2.69189028,80.0226419 2.20638657,79.5569385 1.78473163,79.0373231 L2.56157145,78.4076246 Z M294.876065,75.3192015 L295.858348,75.506607 C295.73129,76.1733384 295.521483,76.8107286 295.240021,77.407685 L294.335201,76.9818892 C294.584106,76.4541311 294.766097,75.896352 294.876065,75.3192015 Z M1.10775661,75.2312252 C1.21033515,75.8099081 1.38509885,76.3698753 1.62708778,76.9005768 L0.717340098,77.3157385 C0.443469481,76.7151789 0.241701076,76.07482 0.123099669,75.4057265 L1.10775661,75.2312252 Z M296,71.512 L296,73.512 L295,73.5126783 L295,71.5126783 L296,71.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M296,67.512 L296,69.512 L295,69.5126783 L295,67.5126783 L296,67.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M296,63.512 L296,65.512 L295,65.5126783 L295,63.5126783 L296,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M296,59.512 L296,61.512 L295,61.5126783 L295,59.5126783 L296,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M296,55.512 L296,57.512 L295,57.5126783 L295,55.5126783 L296,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M296,51.512 L296,53.512 L295,53.5126783 L295,51.5126783 L296,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M296,47.512 L296,49.512 L295,49.5126783 L295,47.5126783 L296,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M296,43.512 L296,45.512 L295,45.5126783 L295,43.5126783 L296,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M296,39.512 L296,41.512 L295,41.5126783 L295,39.5126783 L296,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M296,35.512 L296,37.512 L295,37.5126783 L295,35.5126783 L296,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M296,31.512 L296,33.512 L295,33.5126783 L295,31.5126783 L296,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M296,27.512 L296,29.512 L295,29.5126783 L295,27.5126783 L296,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M296,23.512 L296,25.512 L295,25.5126783 L295,23.5126783 L296,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M296,19.512 L296,21.512 L295,21.5126783 L295,19.5126783 L296,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M296,15.512 L296,17.512 L295,17.5126783 L295,15.5126783 L296,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M296,11.512 L296,13.512 L295,13.5126783 L295,11.5126783 L296,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M296,8 L296,9.512 L295,9.51268141 L294.999996,7.99230721 C294.999843,7.84931676 294.995414,7.70690426 294.986748,7.5651841 L295.984907,7.50453602 C295.99492,7.6684065 296,7.83361003 296,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M294.669225,5.86656128 C294.490852,5.30842466 294.242774,4.77610859 293.93156,4.28094453 L294.777667,3.74792999 C295.130817,4.30965578 295.416068,4.91838179 295.621909,5.56259574 L294.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M292.801538,2.90632361 C292.375276,2.5043409 291.900412,2.15691661 291.388372,1.8730526 L291.873899,0.998830409 C292.461218,1.32450134 293.00324,1.72192978 293.488127,2.17927796 L292.801538,2.90632361 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M288,0 C288.685315,0 289.350503,0.0861719866 289.985321,0.248271715 L289.737382,1.21704772 C289.174749,1.07342708 288.592644,1 288,1 L288,0 Z M10,0 L10,1 L8.00000019,1 L7.59669449,1.0113952 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M197.999,0 L198,1 L196,1 L195.999,0 L197.999,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z" fill-rule="nonzero"></path>
+                </g>
+                <g id="pooling-@yellow" transform="translate(20, 10)">
+                    <g id="Row" transform="translate(0, 38)">
+                        <g id="dash-box" transform="translate(-1.4, -0.4)">
+                            <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="bg" fill="#F7F7F7"></path>
+                            <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="dash" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="..." transform="translate(70.7, 11)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <g id="1" transform="translate(8, 5)" fill="#FDB515">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="2" transform="translate(40, 5)" fill="#FDB515">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="3" transform="translate(86, 5)" fill="#FDB515">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                    <g id="SequencePooling" transform="translate(19, 23)" fill="#9172E2" fill-rule="nonzero">
+                        <path d="M3.16894531,9.10742188 C3.71907552,9.10742188 4.19921875,9.02604167 4.609375,8.86328125 C5.01953125,8.70052083 5.33691406,8.46207682 5.56152344,8.14794922 C5.78613281,7.83382161 5.8984375,7.46191406 5.8984375,7.03222656 C5.8984375,6.8141276 5.87076823,6.6155599 5.81542969,6.43652344 C5.76009115,6.25748698 5.68359375,6.10042318 5.5859375,5.96533203 C5.48828125,5.83024089 5.36214193,5.70735677 5.20751953,5.59667969 C5.05289714,5.4860026 4.88525391,5.38916016 4.70458984,5.30615234 C4.52392578,5.22314453 4.31315104,5.14257813 4.07226563,5.06445313 L2.52929687,4.57128906 C2.27539063,4.48665365 2.09065755,4.37679036 1.97509766,4.24169922 C1.85953776,4.10660807 1.80175781,3.93001302 1.80175781,3.71191406 C1.80175781,3.48730469 1.8733724,3.30338542 2.01660156,3.16015625 C2.15983073,3.01692708 2.33968099,2.91764323 2.55615234,2.86230469 C2.7726237,2.80696615 3.02897135,2.77929688 3.32519531,2.77929688 C3.91764323,2.77929688 4.48730469,2.93717448 5.03417969,3.25292969 C5.1155599,3.30175781 5.1953125,3.32617188 5.2734375,3.32617188 C5.39713542,3.32617188 5.50292969,3.27408854 5.59082031,3.16992187 C5.67871094,3.06575521 5.72265625,2.95019531 5.72265625,2.82324219 C5.72265625,2.76464844 5.71207682,2.7101237 5.69091797,2.65966797 C5.66975911,2.60921224 5.63639323,2.56608073 5.59082031,2.53027344 C5.35644531,2.33170573 5.02360026,2.1624349 4.59228516,2.02246094 C4.16097005,1.88248698 3.7093099,1.8125 3.23730469,1.8125 C2.47884115,1.8125 1.85546875,1.98665365 1.3671875,2.33496094 C0.87890625,2.68326823 0.634765625,3.15690104 0.634765625,3.75585937 C0.634765625,4.234375 0.758463542,4.61279297 1.00585937,4.89111328 C1.25325521,5.16943359 1.64388021,5.39811198 2.17773438,5.57714844 L3.73046875,6.09472656 C4.08203125,6.21191406 4.33349609,6.3453776 4.48486328,6.49511719 C4.63623047,6.64485677 4.71191406,6.86132812 4.71191406,7.14453125 C4.71191406,7.47330729 4.5711263,7.72151693 4.28955078,7.88916016 C4.00797526,8.05680339 3.64095052,8.140625 3.18847656,8.140625 C2.47558594,8.140625 1.81966146,7.93391927 1.22070312,7.52050781 C1.20117187,7.50748698 1.18001302,7.49772135 1.15722656,7.49121094 C1.1344401,7.48470052 1.11165365,7.48144531 1.08886719,7.48144531 C1.03027344,7.48144531 0.969238281,7.50260417 0.905761719,7.54492188 C0.842285156,7.58723958 0.786132813,7.64013672 0.737304688,7.70361328 C0.688476562,7.76708984 0.64860026,7.83626302 0.617675781,7.91113281 C0.586751302,7.9860026 0.571289062,8.05598958 0.571289062,8.12109375 C0.571289062,8.19921875 0.595703125,8.2578125 0.64453125,8.296875 C1.27604167,8.83723958 2.11751302,9.10742188 3.16894531,9.10742188 Z" id="Path"></path>
+                        <path d="M9.26757812,9.06835938 C9.97721354,9.06835938 10.5761719,8.85351563 11.0644531,8.42382813 C11.171875,8.3359375 11.2255859,8.23014323 11.2255859,8.10644531 C11.2255859,8.00878906 11.1922201,7.92252604 11.1254883,7.84765625 C11.0587565,7.77278646 10.9798177,7.73535156 10.8886719,7.73535156 C10.8268229,7.73535156 10.764974,7.75651042 10.703125,7.79882813 C10.2050781,8.15039062 9.75260417,8.32617188 9.34570312,8.32617188 C8.90625,8.32291667 8.5555013,8.19840495 8.29345703,7.95263672 C8.03141276,7.70686849 7.8889974,7.32519531 7.86621094,6.80761719 L11.0693359,6.80761719 C11.180013,6.80761719 11.2605794,6.76936849 11.3110352,6.69287109 C11.3614909,6.6163737 11.3867188,6.51953125 11.3867188,6.40234375 C11.3802083,6.08007812 11.3313802,5.78141276 11.2402344,5.50634766 C11.1490885,5.23128255 11.0164388,4.98388672 10.8422852,4.76416016 C10.6681315,4.54443359 10.4370117,4.37109375 10.1489258,4.24414062 C9.86083984,4.1171875 9.5296224,4.05371094 9.15527344,4.05371094 C8.7874349,4.05371094 8.45214844,4.12044271 8.14941406,4.25390625 C7.84667969,4.38736979 7.59765625,4.56803385 7.40234375,4.79589844 C7.20703125,5.02376302 7.05647786,5.28499349 6.95068359,5.57958984 C6.84488932,5.8741862 6.79199219,6.1858724 6.79199219,6.51464844 C6.79199219,7.30240885 7.01904297,7.92333984 7.47314453,8.37744141 C7.92724609,8.83154297 8.52539062,9.06184896 9.26757812,9.06835938 Z M7.87597656,6.16308594 C7.8922526,5.98730469 7.9305013,5.81966146 7.99072266,5.66015625 C8.05094401,5.50065104 8.13313802,5.35416667 8.23730469,5.22070312 C8.34147135,5.08723958 8.4765625,4.97981771 8.64257812,4.8984375 C8.80859375,4.81705729 8.99414062,4.77636719 9.19921875,4.77636719 C9.59635417,4.77636719 9.89908854,4.90820312 10.1074219,5.171875 C10.3157552,5.43554688 10.4345703,5.76595052 10.4638672,6.16308594 L7.87597656,6.16308594 Z" id="Shape"></path>
+                        <path d="M16.40625,10.8603516 C16.5527344,10.8603516 16.6780599,10.8164063 16.7822266,10.7285156 C16.8863932,10.640625 16.9384766,10.5185547 16.9384766,10.3623047 L16.9384766,4.63964844 C16.9384766,4.48014323 16.8839518,4.35400391 16.7749023,4.26123047 C16.6658529,4.16845703 16.5397135,4.12207031 16.3964844,4.12207031 C16.233724,4.12207031 16.1051432,4.16438802 16.0107422,4.24902344 C15.9163411,4.33365885 15.8691406,4.46223958 15.8691406,4.63476562 L15.8691406,4.90820312 C15.5501302,4.33854167 15.0065104,4.05371094 14.2382813,4.05371094 C13.9029948,4.05371094 13.6002604,4.12288411 13.3300781,4.26123047 C13.0598958,4.39957682 12.8385417,4.5859375 12.6660156,4.8203125 C12.4934896,5.0546875 12.3608398,5.32405599 12.2680664,5.62841797 C12.175293,5.93277995 12.1289062,6.25423177 12.1289062,6.59277344 C12.1289062,7.33170573 12.3250326,7.92903646 12.7172852,8.38476563 C13.1095378,8.84049479 13.6409505,9.06835938 14.3115234,9.06835938 C15.0276693,9.06835938 15.546875,8.79817708 15.8691406,8.2578125 L15.8691406,10.3623047 C15.8691406,10.5185547 15.9220378,10.640625 16.027832,10.7285156 C16.1336263,10.8164063 16.2597656,10.8603516 16.40625,10.8603516 Z M14.5019531,8.34082031 C14.2643229,8.34082031 14.0576172,8.28873698 13.8818359,8.18457031 C13.7060547,8.08040365 13.5701497,7.94124349 13.4741211,7.76708984 C13.3780924,7.5929362 13.3081055,7.40738932 13.2641602,7.21044922 C13.2202148,7.01350911 13.1982422,6.80598958 13.1982422,6.58789063 C13.1982422,6.42513021 13.2104492,6.2639974 13.2348633,6.10449219 C13.2592773,5.94498698 13.3024089,5.78385417 13.3642578,5.62109375 C13.4261068,5.45833333 13.503418,5.31591797 13.5961914,5.19384766 C13.6889648,5.07177734 13.811849,4.97167969 13.9648438,4.89355469 C14.1178385,4.81542969 14.2903646,4.77636719 14.4824219,4.77636719 C14.983724,4.77636719 15.3393555,4.93017578 15.5493164,5.23779297 C15.7592773,5.54541016 15.8642578,5.9921875 15.8642578,6.578125 C15.8642578,6.80924479 15.8390299,7.02652995 15.7885742,7.22998047 C15.7381185,7.43343099 15.6616211,7.61979167 15.559082,7.7890625 C15.456543,7.95833333 15.3149414,8.09261068 15.1342773,8.19189453 C14.9536133,8.29117839 14.7428385,8.34082031 14.5019531,8.34082031 Z" id="Shape"></path>
+                        <path d="M20.0341797,9.06835938 C20.4020182,9.06835938 20.7250977,8.99674479 21.003418,8.85351562 C21.2817383,8.71028646 21.4892578,8.50358073 21.6259766,8.23339844 L21.6259766,8.51660156 C21.6259766,8.66959635 21.6780599,8.78841146 21.7822266,8.87304688 C21.8863932,8.95768229 22.0100911,9 22.1533203,9 C22.2998047,9 22.4267578,8.95605469 22.5341797,8.86816406 C22.6416016,8.78027344 22.6953125,8.65657552 22.6953125,8.49707031 L22.6953125,4.63476563 C22.6953125,4.47200521 22.6432292,4.34586589 22.5390625,4.25634766 C22.4348958,4.16682943 22.3095703,4.12207031 22.1630859,4.12207031 C22.0166016,4.12207031 21.8904622,4.16764323 21.784668,4.25878906 C21.6788737,4.3499349 21.6259766,4.47526042 21.6259766,4.63476563 L21.6259766,6.8515625 C21.6194661,7.30078125 21.4892578,7.66129557 21.2353516,7.93310547 C20.9814453,8.20491536 20.6803385,8.34082031 20.3320312,8.34082031 C20.0325521,8.34082031 19.7924805,8.24397786 19.6118164,8.05029297 C19.4311523,7.85660807 19.3408203,7.57421875 19.3408203,7.203125 L19.3408203,4.63476563 C19.3408203,4.47200521 19.2879232,4.34586589 19.1821289,4.25634766 C19.0763346,4.16682943 18.9518229,4.12207031 18.8085938,4.12207031 C18.6621094,4.12207031 18.5359701,4.16682943 18.4301758,4.25634766 C18.3243815,4.34586589 18.2714844,4.47200521 18.2714844,4.63476563 L18.2714844,7.19335938 C18.2714844,7.51236979 18.3186849,7.79394531 18.4130859,8.03808594 C18.507487,8.28222656 18.6368815,8.47835286 18.8012695,8.62646484 C18.9656576,8.77457682 19.1520182,8.88525391 19.3603516,8.95849609 C19.5686849,9.03173828 19.7932943,9.06835938 20.0341797,9.06835938 Z" id="Path"></path>
+                        <path d="M26.2597656,9.06835938 C26.969401,9.06835938 27.5683594,8.85351563 28.0566406,8.42382813 C28.1640625,8.3359375 28.2177734,8.23014323 28.2177734,8.10644531 C28.2177734,8.00878906 28.1844076,7.92252604 28.1176758,7.84765625 C28.050944,7.77278646 27.9720052,7.73535156 27.8808594,7.73535156 C27.8190104,7.73535156 27.7571615,7.75651042 27.6953125,7.79882813 C27.1972656,8.15039062 26.7447917,8.32617188 26.3378906,8.32617188 C25.8984375,8.32291667 25.5476888,8.19840495 25.2856445,7.95263672 C25.0236003,7.70686849 24.8811849,7.32519531 24.8583984,6.80761719 L28.0615234,6.80761719 C28.1722005,6.80761719 28.2527669,6.76936849 28.3032227,6.69287109 C28.3536784,6.6163737 28.3789062,6.51953125 28.3789062,6.40234375 C28.3723958,6.08007812 28.3235677,5.78141276 28.2324219,5.50634766 C28.141276,5.23128255 28.0086263,4.98388672 27.8344727,4.76416016 C27.660319,4.54443359 27.4291992,4.37109375 27.1411133,4.24414062 C26.8530273,4.1171875 26.5218099,4.05371094 26.1474609,4.05371094 C25.7796224,4.05371094 25.4443359,4.12044271 25.1416016,4.25390625 C24.8388672,4.38736979 24.5898438,4.56803385 24.3945312,4.79589844 C24.1992188,5.02376302 24.0486654,5.28499349 23.9428711,5.57958984 C23.8370768,5.8741862 23.7841797,6.1858724 23.7841797,6.51464844 C23.7841797,7.30240885 24.0112305,7.92333984 24.465332,8.37744141 C24.9194336,8.83154297 25.5175781,9.06184896 26.2597656,9.06835938 Z M24.8681641,6.16308594 C24.8844401,5.98730469 24.9226888,5.81966146 24.9829102,5.66015625 C25.0431315,5.50065104 25.1253255,5.35416667 25.2294922,5.22070312 C25.3336589,5.08723958 25.46875,4.97981771 25.6347656,4.8984375 C25.8007812,4.81705729 25.9863281,4.77636719 26.1914062,4.77636719 C26.5885417,4.77636719 26.891276,4.90820312 27.0996094,5.171875 C27.3079427,5.43554688 27.4267578,5.76595052 27.4560547,6.16308594 L24.8681641,6.16308594 Z" id="Shape"></path>
+                        <path d="M29.9462891,9 C30.0927734,9 30.2189128,8.95442708 30.324707,8.86328125 C30.4305013,8.77213542 30.4833984,8.6468099 30.4833984,8.48730469 L30.4833984,6.265625 C30.4866536,5.81966146 30.6136068,5.45996094 30.8642578,5.18652344 C31.1149089,4.91308594 31.414388,4.77636719 31.7626953,4.77636719 C32.0621745,4.77636719 32.3046875,4.87402344 32.4902344,5.06933594 C32.6757812,5.26464844 32.7685547,5.54622396 32.7685547,5.9140625 L32.7685547,8.48730469 C32.7685547,8.6500651 32.820638,8.77620443 32.9248047,8.86572266 C33.0289714,8.95524089 33.1526693,9 33.2958984,9 C33.4423828,9 33.5693359,8.95524089 33.6767578,8.86572266 C33.7841797,8.77620443 33.8378906,8.6500651 33.8378906,8.48730469 L33.8378906,5.92382813 C33.8378906,5.60481771 33.7906901,5.32324219 33.6962891,5.07910156 C33.601888,4.83496094 33.4716797,4.63964844 33.3056641,4.49316406 C33.1396484,4.34667969 32.9516602,4.23681641 32.7416992,4.16357422 C32.5317383,4.09033203 32.3063151,4.05371094 32.0654297,4.05371094 C31.6943359,4.05371094 31.3720703,4.12451172 31.0986328,4.26611328 C30.8251953,4.40771484 30.6201172,4.61360677 30.4833984,4.88378906 L30.4833984,4.61035156 C30.4833984,4.45735677 30.4313151,4.33772786 30.3271484,4.25146484 C30.2229818,4.16520182 30.0976562,4.12207031 29.9511719,4.12207031 C29.8046875,4.12207031 29.6777344,4.16682943 29.5703125,4.25634766 C29.4628906,4.34586589 29.4091797,4.46875 29.4091797,4.625 L29.4091797,8.48730469 C29.4091797,8.6500651 29.4620768,8.77620443 29.5678711,8.86572266 C29.6736654,8.95524089 29.7998047,9 29.9462891,9 Z" id="Path"></path>
+                        <path d="M37.265625,9.06835938 C37.9264323,9.06835938 38.4244792,8.9235026 38.7597656,8.63378906 C38.8769531,8.5328776 38.9355469,8.42220052 38.9355469,8.30175781 C38.9355469,8.27246094 38.9322917,8.24397786 38.9257812,8.21630859 C38.9192708,8.18863932 38.9086914,8.16259766 38.894043,8.13818359 C38.8793945,8.11376953 38.8623047,8.09179688 38.8427734,8.07226562 C38.8232422,8.05273438 38.8020833,8.03645833 38.7792969,8.0234375 C38.7565104,8.01041667 38.7312826,7.99983724 38.7036133,7.99169922 C38.675944,7.9835612 38.6474609,7.97949219 38.6181641,7.97949219 C38.5791016,7.97949219 38.5392253,7.9860026 38.4985352,7.99902344 C38.4578451,8.01204427 38.4179688,8.03320313 38.3789062,8.0625 C38.1217448,8.24804688 37.7783203,8.34082031 37.3486328,8.34082031 C36.9091797,8.34082031 36.5649414,8.17643229 36.315918,7.84765625 C36.0668945,7.51888021 35.9423828,7.0875651 35.9423828,6.55371094 C35.9423828,6.02311198 36.0709635,5.59342448 36.328125,5.26464844 C36.5852865,4.9358724 36.9352214,4.77148437 37.3779297,4.77148437 C37.6936849,4.77148437 37.9947917,4.84960938 38.28125,5.00585938 C38.3170573,5.02539062 38.3536784,5.04003906 38.3911133,5.04980469 C38.4285482,5.05957031 38.4651693,5.06445312 38.5009766,5.06445312 C38.5400391,5.06445312 38.577474,5.05957031 38.6132812,5.04980469 C38.6490885,5.04003906 38.6808268,5.02539062 38.7084961,5.00585938 C38.7361654,4.98632813 38.7605794,4.96354167 38.7817383,4.9375 C38.8028971,4.91145833 38.8191732,4.88297526 38.8305664,4.85205078 C38.8419596,4.8211263 38.8476562,4.78776042 38.8476562,4.75195312 C38.8476562,4.71614583 38.840332,4.68033854 38.8256836,4.64453125 C38.8110352,4.60872396 38.7882487,4.57210286 38.7573242,4.53466797 C38.7263997,4.49723307 38.688151,4.46223958 38.6425781,4.4296875 C38.2845052,4.17903646 37.8336589,4.05371094 37.2900391,4.05371094 C36.9319661,4.05371094 36.6007487,4.12044271 36.2963867,4.25390625 C35.9920247,4.38736979 35.7364909,4.56803385 35.5297852,4.79589844 C35.3230794,5.02376302 35.1619466,5.2898763 35.0463867,5.59423828 C34.9308268,5.89860026 34.8730469,6.22005208 34.8730469,6.55859375 C34.8730469,7.02083333 34.9682617,7.44075521 35.1586914,7.81835938 C35.3491211,8.19596354 35.6282552,8.49869792 35.9960938,8.7265625 C36.3639323,8.95442708 36.7871094,9.06835938 37.265625,9.06835938 Z" id="Path"></path>
+                        <path d="M42.0654297,9.06835938 C42.7750651,9.06835938 43.3740234,8.85351563 43.8623047,8.42382813 C43.9697266,8.3359375 44.0234375,8.23014323 44.0234375,8.10644531 C44.0234375,8.00878906 43.9900716,7.92252604 43.9233398,7.84765625 C43.8566081,7.77278646 43.7776693,7.73535156 43.6865234,7.73535156 C43.6246745,7.73535156 43.5628255,7.75651042 43.5009766,7.79882813 C43.0029297,8.15039062 42.5504557,8.32617188 42.1435547,8.32617188 C41.7041016,8.32291667 41.3533529,8.19840495 41.0913086,7.95263672 C40.8292643,7.70686849 40.686849,7.32519531 40.6640625,6.80761719 L43.8671875,6.80761719 C43.9778646,6.80761719 44.058431,6.76936849 44.1088867,6.69287109 C44.1593424,6.6163737 44.1845703,6.51953125 44.1845703,6.40234375 C44.1780599,6.08007812 44.1292318,5.78141276 44.0380859,5.50634766 C43.9469401,5.23128255 43.8142904,4.98388672 43.6401367,4.76416016 C43.4659831,4.54443359 43.2348633,4.37109375 42.9467773,4.24414062 C42.6586914,4.1171875 42.327474,4.05371094 41.953125,4.05371094 C41.5852865,4.05371094 41.25,4.12044271 40.9472656,4.25390625 C40.6445312,4.38736979 40.3955078,4.56803385 40.2001953,4.79589844 C40.0048828,5.02376302 39.8543294,5.28499349 39.7485352,5.57958984 C39.6427409,5.8741862 39.5898438,6.1858724 39.5898438,6.51464844 C39.5898438,7.30240885 39.8168945,7.92333984 40.2709961,8.37744141 C40.7250977,8.83154297 41.3232422,9.06184896 42.0654297,9.06835938 Z M40.6738281,6.16308594 C40.6901042,5.98730469 40.7283529,5.81966146 40.7885742,5.66015625 C40.8487956,5.50065104 40.9309896,5.35416667 41.0351562,5.22070312 C41.1393229,5.08723958 41.2744141,4.97981771 41.4404297,4.8984375 C41.6064453,4.81705729 41.7919922,4.77636719 41.9970703,4.77636719 C42.3942057,4.77636719 42.6969401,4.90820312 42.9052734,5.171875 C43.1136068,5.43554688 43.2324219,5.76595052 43.2617188,6.16308594 L40.6738281,6.16308594 Z" id="Shape"></path>
+                        <path d="M45.9863281,9.00488281 C46.1523438,9.00488281 46.2947591,8.95361328 46.4135742,8.85107422 C46.5323893,8.74853516 46.5917969,8.60449219 46.5917969,8.41894531 L46.5917969,6.11914062 L48.0126953,6.11914062 C48.8557943,6.11914062 49.5035807,5.95719401 49.9560547,5.63330078 C50.4085286,5.30940755 50.6347656,4.78125 50.6347656,4.04882813 C50.6347656,3.32617188 50.4191081,2.80208333 49.987793,2.4765625 C49.5564779,2.15104167 48.9339193,1.98828125 48.1201172,1.98828125 L45.9960938,1.98828125 C45.8268229,1.98828125 45.6819661,2.0476888 45.5615234,2.16650391 C45.4410807,2.28531901 45.3808594,2.42773438 45.3808594,2.59375 L45.3808594,8.41894531 C45.3808594,8.60449219 45.4410807,8.74853516 45.5615234,8.85107422 C45.6819661,8.95361328 45.8235677,9.00488281 45.9863281,9.00488281 Z M46.5917969,5.19628906 L46.5917969,2.95019531 L48.0126953,2.95019531 C48.507487,2.95019531 48.8696289,3.02587891 49.0991211,3.17724609 C49.3286133,3.32861328 49.4433594,3.61914063 49.4433594,4.04882813 C49.4433594,4.48502604 49.3253581,4.78531901 49.0893555,4.94970703 C48.8533529,5.11409505 48.4830729,5.19628906 47.9785156,5.19628906 L46.5917969,5.19628906 Z" id="Shape"></path>
+                        <path d="M53.6962891,8.34082031 C53.2438151,8.34082031 52.8898112,8.18457031 52.6342773,7.87207031 C52.3787435,7.55957031 52.2509766,7.125 52.2509766,6.56835937 C52.2509766,6.00520833 52.3787435,5.56575521 52.6342773,5.25 C52.8898112,4.93424479 53.2438151,4.77636719 53.6962891,4.77636719 C54.148763,4.77636719 54.5027669,4.93505859 54.7583008,5.25244141 C55.0138346,5.56982422 55.1416016,6.00846354 55.1416016,6.56835937 C55.1416016,7.125 55.0146484,7.55957031 54.7607422,7.87207031 C54.5068359,8.18457031 54.1520182,8.34082031 53.6962891,8.34082031 Z M53.6962891,9.06835938 C54.0283203,9.06835938 54.3326823,9.02197266 54.609375,8.92919922 C54.8860677,8.83642578 55.1212565,8.71110026 55.3149414,8.55322266 C55.5086263,8.39534505 55.6730143,8.20898438 55.8081055,7.99414063 C55.9431966,7.77929688 56.0424805,7.55224609 56.105957,7.31298828 C56.1694336,7.07373047 56.2011719,6.82552083 56.2011719,6.56835938 C56.2011719,6.22981771 56.1474609,5.91080729 56.0400391,5.61132813 C55.9326172,5.31184896 55.777181,5.04492188 55.5737305,4.81054688 C55.3702799,4.57617187 55.1074219,4.3914388 54.7851562,4.25634766 C54.4628906,4.12125651 54.0999349,4.05371094 53.6962891,4.05371094 C53.289388,4.05371094 52.9231771,4.12288411 52.5976562,4.26123047 C52.2721354,4.39957682 52.0092773,4.5867513 51.809082,4.82275391 C51.6088867,5.05875651 51.4558919,5.32568359 51.3500977,5.62353516 C51.2443034,5.92138672 51.1914062,6.23632813 51.1914062,6.56835938 C51.1914062,6.89388021 51.2434896,7.2039388 51.3476562,7.49853516 C51.4518229,7.79313151 51.6040039,8.05843099 51.8041992,8.29443359 C52.0043945,8.5304362 52.2672526,8.71842448 52.5927734,8.85839844 C52.9182943,8.9983724 53.2861328,9.06835938 53.6962891,9.06835938 Z" id="Shape"></path>
+                        <path d="M59.4824219,8.34082031 C59.0299479,8.34082031 58.675944,8.18457031 58.4204102,7.87207031 C58.1648763,7.55957031 58.0371094,7.125 58.0371094,6.56835937 C58.0371094,6.00520833 58.1648763,5.56575521 58.4204102,5.25 C58.675944,4.93424479 59.0299479,4.77636719 59.4824219,4.77636719 C59.9348958,4.77636719 60.2888997,4.93505859 60.5444336,5.25244141 C60.7999674,5.56982422 60.9277344,6.00846354 60.9277344,6.56835937 C60.9277344,7.125 60.8007812,7.55957031 60.546875,7.87207031 C60.2929688,8.18457031 59.938151,8.34082031 59.4824219,8.34082031 Z M59.4824219,9.06835938 C59.8144531,9.06835938 60.1188151,9.02197266 60.3955078,8.92919922 C60.6722005,8.83642578 60.9073893,8.71110026 61.1010742,8.55322266 C61.2947591,8.39534505 61.4591471,8.20898438 61.5942383,7.99414063 C61.7293294,7.77929688 61.8286133,7.55224609 61.8920898,7.31298828 C61.9555664,7.07373047 61.9873047,6.82552083 61.9873047,6.56835938 C61.9873047,6.22981771 61.9335938,5.91080729 61.8261719,5.61132813 C61.71875,5.31184896 61.5633138,5.04492188 61.3598633,4.81054688 C61.1564128,4.57617187 60.8935547,4.3914388 60.5712891,4.25634766 C60.2490234,4.12125651 59.8860677,4.05371094 59.4824219,4.05371094 C59.0755208,4.05371094 58.7093099,4.12288411 58.3837891,4.26123047 C58.0582682,4.39957682 57.7954102,4.5867513 57.5952148,4.82275391 C57.3950195,5.05875651 57.2420247,5.32568359 57.1362305,5.62353516 C57.0304362,5.92138672 56.9775391,6.23632813 56.9775391,6.56835938 C56.9775391,6.89388021 57.0296224,7.2039388 57.1337891,7.49853516 C57.2379557,7.79313151 57.3901367,8.05843099 57.590332,8.29443359 C57.7905273,8.5304362 58.0533854,8.71842448 58.3789062,8.85839844 C58.7044271,8.9983724 59.0722656,9.06835938 59.4824219,9.06835938 Z" id="Shape"></path>
+                        <path d="M63.6083984,9 C63.7548828,9 63.8826497,8.95198568 63.9916992,8.85595703 C64.1007487,8.75992839 64.1552734,8.63053385 64.1552734,8.46777344 L64.1552734,2.52539062 C64.1552734,2.36263021 64.1015625,2.23323568 63.9941406,2.13720703 C63.8867188,2.04117839 63.7613932,1.99316406 63.6181641,1.99316406 C63.4716797,1.99316406 63.3455404,2.04036458 63.2397461,2.13476562 C63.1339518,2.22916667 63.0810547,2.359375 63.0810547,2.52539062 L63.0810547,8.46777344 C63.0810547,8.63704427 63.1323242,8.76806641 63.2348633,8.86083984 C63.3374023,8.95361328 63.4619141,9 63.6083984,9 Z" id="Path"></path>
+                        <path d="M66.0791016,9 C66.2255859,9 66.3533529,8.95198568 66.4624023,8.85595703 C66.5714518,8.75992839 66.6259766,8.63053385 66.6259766,8.46777344 L66.6259766,4.65429688 C66.6259766,4.48828125 66.5730794,4.35807292 66.4672852,4.26367188 C66.3614909,4.16927083 66.2369792,4.12207031 66.09375,4.12207031 C65.9472656,4.12207031 65.8203125,4.16927083 65.7128906,4.26367188 C65.6054688,4.35807292 65.5517578,4.48828125 65.5517578,4.65429688 L65.5517578,8.46777344 C65.5517578,8.63704427 65.6030273,8.76806641 65.7055664,8.86083984 C65.8081055,8.95361328 65.9326172,9 66.0791016,9 Z M66.0839844,3.1015625 C66.2792969,3.1015625 66.4363607,3.04541016 66.5551758,2.93310547 C66.6739909,2.82080078 66.7333984,2.671875 66.7333984,2.48632812 C66.7333984,2.30403646 66.6748047,2.15592448 66.5576172,2.04199219 C66.4404297,1.9280599 66.2841797,1.87109375 66.0888672,1.87109375 C65.8935547,1.87109375 65.7364909,1.9280599 65.6176758,2.04199219 C65.4988607,2.15592448 65.4394531,2.30403646 65.4394531,2.48632812 C65.4394531,2.671875 65.4988607,2.82080078 65.6176758,2.93310547 C65.7364909,3.04541016 65.8919271,3.1015625 66.0839844,3.1015625 Z" id="Shape"></path>
+                        <path d="M68.4765625,9 C68.6230469,9 68.7491862,8.95442708 68.8549805,8.86328125 C68.9607747,8.77213542 69.0136719,8.6468099 69.0136719,8.48730469 L69.0136719,6.265625 C69.0169271,5.81966146 69.1438802,5.45996094 69.3945312,5.18652344 C69.6451823,4.91308594 69.9446615,4.77636719 70.2929688,4.77636719 C70.5924479,4.77636719 70.8349609,4.87402344 71.0205078,5.06933594 C71.2060547,5.26464844 71.2988281,5.54622396 71.2988281,5.9140625 L71.2988281,8.48730469 C71.2988281,8.6500651 71.3509115,8.77620443 71.4550781,8.86572266 C71.5592448,8.95524089 71.6829427,9 71.8261719,9 C71.9726562,9 72.0996094,8.95524089 72.2070312,8.86572266 C72.3144531,8.77620443 72.3681641,8.6500651 72.3681641,8.48730469 L72.3681641,5.92382813 C72.3681641,5.60481771 72.3209635,5.32324219 72.2265625,5.07910156 C72.1321615,4.83496094 72.0019531,4.63964844 71.8359375,4.49316406 C71.6699219,4.34667969 71.4819336,4.23681641 71.2719727,4.16357422 C71.0620117,4.09033203 70.8365885,4.05371094 70.5957031,4.05371094 C70.2246094,4.05371094 69.9023438,4.12451172 69.6289062,4.26611328 C69.3554688,4.40771484 69.1503906,4.61360677 69.0136719,4.88378906 L69.0136719,4.61035156 C69.0136719,4.45735677 68.9615885,4.33772786 68.8574219,4.25146484 C68.7532552,4.16520182 68.6279297,4.12207031 68.4814453,4.12207031 C68.3349609,4.12207031 68.2080078,4.16682943 68.1005859,4.25634766 C67.9931641,4.34586589 67.9394531,4.46875 67.9394531,4.625 L67.9394531,8.48730469 C67.9394531,8.6500651 67.9923503,8.77620443 68.0981445,8.86572266 C68.2039388,8.95524089 68.3300781,9 68.4765625,9 Z" id="Path"></path>
+                        <path d="M75.7958984,11.1337891 C76.5348307,11.1337891 77.1232096,10.9498698 77.5610352,10.5820312 C77.9988607,10.2141927 78.2177734,9.66080729 78.2177734,8.921875 L78.2177734,4.62988281 C78.2177734,4.4703776 78.1673177,4.34586589 78.0664062,4.25634766 C77.9654948,4.16682943 77.8483073,4.12207031 77.7148438,4.12207031 C77.5911458,4.12207031 77.4812826,4.15869141 77.3852539,4.23193359 C77.2892253,4.30517578 77.2330729,4.40690104 77.2167969,4.53710938 L77.2167969,4.86425781 C77.1061198,4.69173177 76.9913737,4.55013021 76.8725586,4.43945312 C76.7537435,4.32877604 76.5917969,4.23681641 76.3867188,4.16357422 C76.1816406,4.09033203 75.9407552,4.05371094 75.6640625,4.05371094 C75.2018229,4.05371094 74.7981771,4.16438802 74.453125,4.38574219 C74.1080729,4.60709635 73.8468424,4.90820313 73.6694336,5.2890625 C73.4920247,5.66992188 73.4033203,6.10449219 73.4033203,6.59277344 C73.4033203,7.3186849 73.610026,7.90136719 74.0234375,8.34082031 C74.436849,8.78027344 74.9934896,9 75.6933594,9 C76.3964844,9 76.8961589,8.7281901 77.1923828,8.18457031 L77.1923828,9 C77.1923828,9.44596354 77.0613607,9.79264323 76.7993164,10.0400391 C76.5372721,10.2874349 76.1767578,10.4111328 75.7177734,10.4111328 C75.5745443,10.4111328 75.4305013,10.3989258 75.2856445,10.3745117 C75.1407878,10.3500977 75.0292969,10.3256836 74.9511719,10.3012695 C74.8730469,10.2768555 74.7648112,10.2386068 74.6264648,10.1865234 C74.4881185,10.1344401 74.3994141,10.101888 74.3603516,10.0888672 C74.3440755,10.0823568 74.3269857,10.0766602 74.309082,10.0717773 C74.2911784,10.0668945 74.2749023,10.0636393 74.2602539,10.0620117 C74.2456055,10.0603841 74.2301432,10.0595703 74.2138672,10.0595703 C74.148763,10.0595703 74.0901693,10.0766602 74.0380859,10.1108398 C73.9860026,10.1450195 73.9461263,10.1889648 73.918457,10.2426758 C73.8907878,10.2963867 73.8769531,10.3533529 73.8769531,10.4135742 C73.8769531,10.4737956 73.8924154,10.5323893 73.9233398,10.5893555 C73.9542643,10.6463216 74.000651,10.6943359 74.0625,10.7333984 C74.2415365,10.8440755 74.4864909,10.9384766 74.7973633,11.0166016 C75.1082357,11.0947266 75.4410807,11.1337891 75.7958984,11.1337891 Z M75.8544922,8.30175781 C75.4638672,8.29199219 75.1359049,8.1414388 74.8706055,7.85009766 C74.605306,7.55875651 74.4726562,7.12988281 74.4726562,6.56347656 C74.4726562,6.31933594 74.4995117,6.09309896 74.5532227,5.88476562 C74.6069336,5.67643229 74.6875,5.48763021 74.7949219,5.31835938 C74.9023438,5.14908854 75.0480143,5.0164388 75.2319336,4.92041016 C75.4158529,4.82438151 75.6282552,4.77636719 75.8691406,4.77636719 C76.110026,4.77636719 76.3175456,4.8219401 76.4916992,4.91308594 C76.6658529,5.00423177 76.8025716,5.1336263 76.9018555,5.30126953 C77.0011393,5.46891276 77.0735677,5.65690104 77.1191406,5.86523438 C77.1647135,6.07356771 77.1875,6.30957031 77.1875,6.57324219 C77.1842448,7.15592448 77.0605469,7.58886719 76.8164062,7.87207031 C76.5722656,8.15527344 76.2516276,8.2985026 75.8544922,8.30175781 Z" id="Shape"></path>
+                    </g>
+                    <path d="M59.3225439,12.2031441 C62.6560848,16.7208844 66.1301415,18.9510713 69.748172,18.9510713 C74.7538911,18.9510713 74.7538911,18.9510713 82.6259887,18.9510713 C84.5126631,18.9510713 84.5126631,18.9510713 86.4236326,18.9510713 C94.8330394,18.9510713 94.8330394,18.9510713 101.172043,18.9510713 C108.030214,18.9510713 112.5,23.7940815 112.5,28.5000091 C112.5,28.776156 112.276142,29.0000091 112,29.0000091 C111.723858,29.0000091 111.5,28.776156 111.5,28.5000091 C111.5,24.3292512 107.459222,19.9510713 101.172043,19.9510713 C94.8330394,19.9510713 94.8330394,19.9510713 86.4236326,19.9510713 C84.5126631,19.9510713 84.5126631,19.9510713 82.6259887,19.9510713 C74.7538911,19.9510713 74.7538911,19.9510713 69.748172,19.9510713 C65.9077041,19.9510713 62.2868773,17.7134178 58.8894866,13.2828606 C54.9106667,17.71626 51.3228866,19.9510713 48.0922581,19.9510713 C42.7550547,19.9510713 42.7550547,19.9510713 33.5167726,19.9510713 C32.504542,19.9510713 32.504542,19.9510713 31.4883383,19.9510713 C23.0383623,19.9510713 23.0383623,19.9510713 16.8727074,19.9510713 C10.6282824,19.9510713 6.5,24.1435765 6.5,28.5000091 C6.5,28.776156 6.27614237,29.0000091 6,29.0000091 C5.72385763,29.0000091 5.5,28.776156 5.5,28.5000091 C5.5,23.5947177 10.0725127,18.9510713 16.8727074,18.9510713 C23.0383623,18.9510713 23.0383623,18.9510713 31.4883383,18.9510713 C32.504542,18.9510713 32.504542,18.9510713 33.5167726,18.9510713 C42.7550547,18.9510713 42.7550547,18.9510713 48.0922581,18.9510713 C51.0786702,18.9510713 54.5746062,16.7068389 58.5439371,12.1707495 C58.7542549,11.9304016 59.1329181,11.9461562 59.3225439,12.2031441 Z" id="Path" fill="#8E8E8E" fill-rule="nonzero"></path>
+                    <g id="top" transform="translate(47, 0)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="pooling-@blue" transform="translate(158, 10)">
+                    <g id="Row" transform="translate(0, 38)">
+                        <g id="dash-box" transform="translate(-1.4, -0.4)">
+                            <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="bg" fill="#F7F7F7"></path>
+                            <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="dash" fill="#8E8E8E" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="..." transform="translate(70.7, 11)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <g id="1" transform="translate(8, 5)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="2" transform="translate(40, 5)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="3" transform="translate(86, 5)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                    <g id="SequencePooling-Copy" transform="translate(20, 23)" fill="#9172E2" fill-rule="nonzero">
+                        <path d="M3.16894531,9.10742188 C3.71907552,9.10742188 4.19921875,9.02604167 4.609375,8.86328125 C5.01953125,8.70052083 5.33691406,8.46207682 5.56152344,8.14794922 C5.78613281,7.83382161 5.8984375,7.46191406 5.8984375,7.03222656 C5.8984375,6.8141276 5.87076823,6.6155599 5.81542969,6.43652344 C5.76009115,6.25748698 5.68359375,6.10042318 5.5859375,5.96533203 C5.48828125,5.83024089 5.36214193,5.70735677 5.20751953,5.59667969 C5.05289714,5.4860026 4.88525391,5.38916016 4.70458984,5.30615234 C4.52392578,5.22314453 4.31315104,5.14257813 4.07226563,5.06445313 L2.52929687,4.57128906 C2.27539063,4.48665365 2.09065755,4.37679036 1.97509766,4.24169922 C1.85953776,4.10660807 1.80175781,3.93001302 1.80175781,3.71191406 C1.80175781,3.48730469 1.8733724,3.30338542 2.01660156,3.16015625 C2.15983073,3.01692708 2.33968099,2.91764323 2.55615234,2.86230469 C2.7726237,2.80696615 3.02897135,2.77929688 3.32519531,2.77929688 C3.91764323,2.77929688 4.48730469,2.93717448 5.03417969,3.25292969 C5.1155599,3.30175781 5.1953125,3.32617188 5.2734375,3.32617188 C5.39713542,3.32617188 5.50292969,3.27408854 5.59082031,3.16992187 C5.67871094,3.06575521 5.72265625,2.95019531 5.72265625,2.82324219 C5.72265625,2.76464844 5.71207682,2.7101237 5.69091797,2.65966797 C5.66975911,2.60921224 5.63639323,2.56608073 5.59082031,2.53027344 C5.35644531,2.33170573 5.02360026,2.1624349 4.59228516,2.02246094 C4.16097005,1.88248698 3.7093099,1.8125 3.23730469,1.8125 C2.47884115,1.8125 1.85546875,1.98665365 1.3671875,2.33496094 C0.87890625,2.68326823 0.634765625,3.15690104 0.634765625,3.75585937 C0.634765625,4.234375 0.758463542,4.61279297 1.00585937,4.89111328 C1.25325521,5.16943359 1.64388021,5.39811198 2.17773438,5.57714844 L3.73046875,6.09472656 C4.08203125,6.21191406 4.33349609,6.3453776 4.48486328,6.49511719 C4.63623047,6.64485677 4.71191406,6.86132812 4.71191406,7.14453125 C4.71191406,7.47330729 4.5711263,7.72151693 4.28955078,7.88916016 C4.00797526,8.05680339 3.64095052,8.140625 3.18847656,8.140625 C2.47558594,8.140625 1.81966146,7.93391927 1.22070312,7.52050781 C1.20117187,7.50748698 1.18001302,7.49772135 1.15722656,7.49121094 C1.1344401,7.48470052 1.11165365,7.48144531 1.08886719,7.48144531 C1.03027344,7.48144531 0.969238281,7.50260417 0.905761719,7.54492188 C0.842285156,7.58723958 0.786132813,7.64013672 0.737304688,7.70361328 C0.688476562,7.76708984 0.64860026,7.83626302 0.617675781,7.91113281 C0.586751302,7.9860026 0.571289062,8.05598958 0.571289062,8.12109375 C0.571289062,8.19921875 0.595703125,8.2578125 0.64453125,8.296875 C1.27604167,8.83723958 2.11751302,9.10742188 3.16894531,9.10742188 Z" id="Path"></path>
+                        <path d="M9.26757812,9.06835938 C9.97721354,9.06835938 10.5761719,8.85351563 11.0644531,8.42382813 C11.171875,8.3359375 11.2255859,8.23014323 11.2255859,8.10644531 C11.2255859,8.00878906 11.1922201,7.92252604 11.1254883,7.84765625 C11.0587565,7.77278646 10.9798177,7.73535156 10.8886719,7.73535156 C10.8268229,7.73535156 10.764974,7.75651042 10.703125,7.79882813 C10.2050781,8.15039062 9.75260417,8.32617188 9.34570312,8.32617188 C8.90625,8.32291667 8.5555013,8.19840495 8.29345703,7.95263672 C8.03141276,7.70686849 7.8889974,7.32519531 7.86621094,6.80761719 L11.0693359,6.80761719 C11.180013,6.80761719 11.2605794,6.76936849 11.3110352,6.69287109 C11.3614909,6.6163737 11.3867188,6.51953125 11.3867188,6.40234375 C11.3802083,6.08007812 11.3313802,5.78141276 11.2402344,5.50634766 C11.1490885,5.23128255 11.0164388,4.98388672 10.8422852,4.76416016 C10.6681315,4.54443359 10.4370117,4.37109375 10.1489258,4.24414062 C9.86083984,4.1171875 9.5296224,4.05371094 9.15527344,4.05371094 C8.7874349,4.05371094 8.45214844,4.12044271 8.14941406,4.25390625 C7.84667969,4.38736979 7.59765625,4.56803385 7.40234375,4.79589844 C7.20703125,5.02376302 7.05647786,5.28499349 6.95068359,5.57958984 C6.84488932,5.8741862 6.79199219,6.1858724 6.79199219,6.51464844 C6.79199219,7.30240885 7.01904297,7.92333984 7.47314453,8.37744141 C7.92724609,8.83154297 8.52539062,9.06184896 9.26757812,9.06835938 Z M7.87597656,6.16308594 C7.8922526,5.98730469 7.9305013,5.81966146 7.99072266,5.66015625 C8.05094401,5.50065104 8.13313802,5.35416667 8.23730469,5.22070312 C8.34147135,5.08723958 8.4765625,4.97981771 8.64257812,4.8984375 C8.80859375,4.81705729 8.99414062,4.77636719 9.19921875,4.77636719 C9.59635417,4.77636719 9.89908854,4.90820312 10.1074219,5.171875 C10.3157552,5.43554688 10.4345703,5.76595052 10.4638672,6.16308594 L7.87597656,6.16308594 Z" id="Shape"></path>
+                        <path d="M16.40625,10.8603516 C16.5527344,10.8603516 16.6780599,10.8164063 16.7822266,10.7285156 C16.8863932,10.640625 16.9384766,10.5185547 16.9384766,10.3623047 L16.9384766,4.63964844 C16.9384766,4.48014323 16.8839518,4.35400391 16.7749023,4.26123047 C16.6658529,4.16845703 16.5397135,4.12207031 16.3964844,4.12207031 C16.233724,4.12207031 16.1051432,4.16438802 16.0107422,4.24902344 C15.9163411,4.33365885 15.8691406,4.46223958 15.8691406,4.63476562 L15.8691406,4.90820312 C15.5501302,4.33854167 15.0065104,4.05371094 14.2382813,4.05371094 C13.9029948,4.05371094 13.6002604,4.12288411 13.3300781,4.26123047 C13.0598958,4.39957682 12.8385417,4.5859375 12.6660156,4.8203125 C12.4934896,5.0546875 12.3608398,5.32405599 12.2680664,5.62841797 C12.175293,5.93277995 12.1289062,6.25423177 12.1289062,6.59277344 C12.1289062,7.33170573 12.3250326,7.92903646 12.7172852,8.38476563 C13.1095378,8.84049479 13.6409505,9.06835938 14.3115234,9.06835938 C15.0276693,9.06835938 15.546875,8.79817708 15.8691406,8.2578125 L15.8691406,10.3623047 C15.8691406,10.5185547 15.9220378,10.640625 16.027832,10.7285156 C16.1336263,10.8164063 16.2597656,10.8603516 16.40625,10.8603516 Z M14.5019531,8.34082031 C14.2643229,8.34082031 14.0576172,8.28873698 13.8818359,8.18457031 C13.7060547,8.08040365 13.5701497,7.94124349 13.4741211,7.76708984 C13.3780924,7.5929362 13.3081055,7.40738932 13.2641602,7.21044922 C13.2202148,7.01350911 13.1982422,6.80598958 13.1982422,6.58789063 C13.1982422,6.42513021 13.2104492,6.2639974 13.2348633,6.10449219 C13.2592773,5.94498698 13.3024089,5.78385417 13.3642578,5.62109375 C13.4261068,5.45833333 13.503418,5.31591797 13.5961914,5.19384766 C13.6889648,5.07177734 13.811849,4.97167969 13.9648438,4.89355469 C14.1178385,4.81542969 14.2903646,4.77636719 14.4824219,4.77636719 C14.983724,4.77636719 15.3393555,4.93017578 15.5493164,5.23779297 C15.7592773,5.54541016 15.8642578,5.9921875 15.8642578,6.578125 C15.8642578,6.80924479 15.8390299,7.02652995 15.7885742,7.22998047 C15.7381185,7.43343099 15.6616211,7.61979167 15.559082,7.7890625 C15.456543,7.95833333 15.3149414,8.09261068 15.1342773,8.19189453 C14.9536133,8.29117839 14.7428385,8.34082031 14.5019531,8.34082031 Z" id="Shape"></path>
+                        <path d="M20.0341797,9.06835938 C20.4020182,9.06835938 20.7250977,8.99674479 21.003418,8.85351562 C21.2817383,8.71028646 21.4892578,8.50358073 21.6259766,8.23339844 L21.6259766,8.51660156 C21.6259766,8.66959635 21.6780599,8.78841146 21.7822266,8.87304688 C21.8863932,8.95768229 22.0100911,9 22.1533203,9 C22.2998047,9 22.4267578,8.95605469 22.5341797,8.86816406 C22.6416016,8.78027344 22.6953125,8.65657552 22.6953125,8.49707031 L22.6953125,4.63476563 C22.6953125,4.47200521 22.6432292,4.34586589 22.5390625,4.25634766 C22.4348958,4.16682943 22.3095703,4.12207031 22.1630859,4.12207031 C22.0166016,4.12207031 21.8904622,4.16764323 21.784668,4.25878906 C21.6788737,4.3499349 21.6259766,4.47526042 21.6259766,4.63476563 L21.6259766,6.8515625 C21.6194661,7.30078125 21.4892578,7.66129557 21.2353516,7.93310547 C20.9814453,8.20491536 20.6803385,8.34082031 20.3320312,8.34082031 C20.0325521,8.34082031 19.7924805,8.24397786 19.6118164,8.05029297 C19.4311523,7.85660807 19.3408203,7.57421875 19.3408203,7.203125 L19.3408203,4.63476563 C19.3408203,4.47200521 19.2879232,4.34586589 19.1821289,4.25634766 C19.0763346,4.16682943 18.9518229,4.12207031 18.8085938,4.12207031 C18.6621094,4.12207031 18.5359701,4.16682943 18.4301758,4.25634766 C18.3243815,4.34586589 18.2714844,4.47200521 18.2714844,4.63476563 L18.2714844,7.19335938 C18.2714844,7.51236979 18.3186849,7.79394531 18.4130859,8.03808594 C18.507487,8.28222656 18.6368815,8.47835286 18.8012695,8.62646484 C18.9656576,8.77457682 19.1520182,8.88525391 19.3603516,8.95849609 C19.5686849,9.03173828 19.7932943,9.06835938 20.0341797,9.06835938 Z" id="Path"></path>
+                        <path d="M26.2597656,9.06835938 C26.969401,9.06835938 27.5683594,8.85351563 28.0566406,8.42382813 C28.1640625,8.3359375 28.2177734,8.23014323 28.2177734,8.10644531 C28.2177734,8.00878906 28.1844076,7.92252604 28.1176758,7.84765625 C28.050944,7.77278646 27.9720052,7.73535156 27.8808594,7.73535156 C27.8190104,7.73535156 27.7571615,7.75651042 27.6953125,7.79882813 C27.1972656,8.15039062 26.7447917,8.32617188 26.3378906,8.32617188 C25.8984375,8.32291667 25.5476888,8.19840495 25.2856445,7.95263672 C25.0236003,7.70686849 24.8811849,7.32519531 24.8583984,6.80761719 L28.0615234,6.80761719 C28.1722005,6.80761719 28.2527669,6.76936849 28.3032227,6.69287109 C28.3536784,6.6163737 28.3789062,6.51953125 28.3789062,6.40234375 C28.3723958,6.08007812 28.3235677,5.78141276 28.2324219,5.50634766 C28.141276,5.23128255 28.0086263,4.98388672 27.8344727,4.76416016 C27.660319,4.54443359 27.4291992,4.37109375 27.1411133,4.24414062 C26.8530273,4.1171875 26.5218099,4.05371094 26.1474609,4.05371094 C25.7796224,4.05371094 25.4443359,4.12044271 25.1416016,4.25390625 C24.8388672,4.38736979 24.5898438,4.56803385 24.3945312,4.79589844 C24.1992188,5.02376302 24.0486654,5.28499349 23.9428711,5.57958984 C23.8370768,5.8741862 23.7841797,6.1858724 23.7841797,6.51464844 C23.7841797,7.30240885 24.0112305,7.92333984 24.465332,8.37744141 C24.9194336,8.83154297 25.5175781,9.06184896 26.2597656,9.06835938 Z M24.8681641,6.16308594 C24.8844401,5.98730469 24.9226888,5.81966146 24.9829102,5.66015625 C25.0431315,5.50065104 25.1253255,5.35416667 25.2294922,5.22070312 C25.3336589,5.08723958 25.46875,4.97981771 25.6347656,4.8984375 C25.8007812,4.81705729 25.9863281,4.77636719 26.1914062,4.77636719 C26.5885417,4.77636719 26.891276,4.90820312 27.0996094,5.171875 C27.3079427,5.43554688 27.4267578,5.76595052 27.4560547,6.16308594 L24.8681641,6.16308594 Z" id="Shape"></path>
+                        <path d="M29.9462891,9 C30.0927734,9 30.2189128,8.95442708 30.324707,8.86328125 C30.4305013,8.77213542 30.4833984,8.6468099 30.4833984,8.48730469 L30.4833984,6.265625 C30.4866536,5.81966146 30.6136068,5.45996094 30.8642578,5.18652344 C31.1149089,4.91308594 31.414388,4.77636719 31.7626953,4.77636719 C32.0621745,4.77636719 32.3046875,4.87402344 32.4902344,5.06933594 C32.6757812,5.26464844 32.7685547,5.54622396 32.7685547,5.9140625 L32.7685547,8.48730469 C32.7685547,8.6500651 32.820638,8.77620443 32.9248047,8.86572266 C33.0289714,8.95524089 33.1526693,9 33.2958984,9 C33.4423828,9 33.5693359,8.95524089 33.6767578,8.86572266 C33.7841797,8.77620443 33.8378906,8.6500651 33.8378906,8.48730469 L33.8378906,5.92382813 C33.8378906,5.60481771 33.7906901,5.32324219 33.6962891,5.07910156 C33.601888,4.83496094 33.4716797,4.63964844 33.3056641,4.49316406 C33.1396484,4.34667969 32.9516602,4.23681641 32.7416992,4.16357422 C32.5317383,4.09033203 32.3063151,4.05371094 32.0654297,4.05371094 C31.6943359,4.05371094 31.3720703,4.12451172 31.0986328,4.26611328 C30.8251953,4.40771484 30.6201172,4.61360677 30.4833984,4.88378906 L30.4833984,4.61035156 C30.4833984,4.45735677 30.4313151,4.33772786 30.3271484,4.25146484 C30.2229818,4.16520182 30.0976562,4.12207031 29.9511719,4.12207031 C29.8046875,4.12207031 29.6777344,4.16682943 29.5703125,4.25634766 C29.4628906,4.34586589 29.4091797,4.46875 29.4091797,4.625 L29.4091797,8.48730469 C29.4091797,8.6500651 29.4620768,8.77620443 29.5678711,8.86572266 C29.6736654,8.95524089 29.7998047,9 29.9462891,9 Z" id="Path"></path>
+                        <path d="M37.265625,9.06835938 C37.9264323,9.06835938 38.4244792,8.9235026 38.7597656,8.63378906 C38.8769531,8.5328776 38.9355469,8.42220052 38.9355469,8.30175781 C38.9355469,8.27246094 38.9322917,8.24397786 38.9257812,8.21630859 C38.9192708,8.18863932 38.9086914,8.16259766 38.894043,8.13818359 C38.8793945,8.11376953 38.8623047,8.09179688 38.8427734,8.07226562 C38.8232422,8.05273438 38.8020833,8.03645833 38.7792969,8.0234375 C38.7565104,8.01041667 38.7312826,7.99983724 38.7036133,7.99169922 C38.675944,7.9835612 38.6474609,7.97949219 38.6181641,7.97949219 C38.5791016,7.97949219 38.5392253,7.9860026 38.4985352,7.99902344 C38.4578451,8.01204427 38.4179688,8.03320313 38.3789062,8.0625 C38.1217448,8.24804688 37.7783203,8.34082031 37.3486328,8.34082031 C36.9091797,8.34082031 36.5649414,8.17643229 36.315918,7.84765625 C36.0668945,7.51888021 35.9423828,7.0875651 35.9423828,6.55371094 C35.9423828,6.02311198 36.0709635,5.59342448 36.328125,5.26464844 C36.5852865,4.9358724 36.9352214,4.77148437 37.3779297,4.77148437 C37.6936849,4.77148437 37.9947917,4.84960938 38.28125,5.00585938 C38.3170573,5.02539062 38.3536784,5.04003906 38.3911133,5.04980469 C38.4285482,5.05957031 38.4651693,5.06445312 38.5009766,5.06445312 C38.5400391,5.06445312 38.577474,5.05957031 38.6132812,5.04980469 C38.6490885,5.04003906 38.6808268,5.02539062 38.7084961,5.00585938 C38.7361654,4.98632813 38.7605794,4.96354167 38.7817383,4.9375 C38.8028971,4.91145833 38.8191732,4.88297526 38.8305664,4.85205078 C38.8419596,4.8211263 38.8476562,4.78776042 38.8476562,4.75195312 C38.8476562,4.71614583 38.840332,4.68033854 38.8256836,4.64453125 C38.8110352,4.60872396 38.7882487,4.57210286 38.7573242,4.53466797 C38.7263997,4.49723307 38.688151,4.46223958 38.6425781,4.4296875 C38.2845052,4.17903646 37.8336589,4.05371094 37.2900391,4.05371094 C36.9319661,4.05371094 36.6007487,4.12044271 36.2963867,4.25390625 C35.9920247,4.38736979 35.7364909,4.56803385 35.5297852,4.79589844 C35.3230794,5.02376302 35.1619466,5.2898763 35.0463867,5.59423828 C34.9308268,5.89860026 34.8730469,6.22005208 34.8730469,6.55859375 C34.8730469,7.02083333 34.9682617,7.44075521 35.1586914,7.81835938 C35.3491211,8.19596354 35.6282552,8.49869792 35.9960938,8.7265625 C36.3639323,8.95442708 36.7871094,9.06835938 37.265625,9.06835938 Z" id="Path"></path>
+                        <path d="M42.0654297,9.06835938 C42.7750651,9.06835938 43.3740234,8.85351563 43.8623047,8.42382813 C43.9697266,8.3359375 44.0234375,8.23014323 44.0234375,8.10644531 C44.0234375,8.00878906 43.9900716,7.92252604 43.9233398,7.84765625 C43.8566081,7.77278646 43.7776693,7.73535156 43.6865234,7.73535156 C43.6246745,7.73535156 43.5628255,7.75651042 43.5009766,7.79882813 C43.0029297,8.15039062 42.5504557,8.32617188 42.1435547,8.32617188 C41.7041016,8.32291667 41.3533529,8.19840495 41.0913086,7.95263672 C40.8292643,7.70686849 40.686849,7.32519531 40.6640625,6.80761719 L43.8671875,6.80761719 C43.9778646,6.80761719 44.058431,6.76936849 44.1088867,6.69287109 C44.1593424,6.6163737 44.1845703,6.51953125 44.1845703,6.40234375 C44.1780599,6.08007812 44.1292318,5.78141276 44.0380859,5.50634766 C43.9469401,5.23128255 43.8142904,4.98388672 43.6401367,4.76416016 C43.4659831,4.54443359 43.2348633,4.37109375 42.9467773,4.24414062 C42.6586914,4.1171875 42.327474,4.05371094 41.953125,4.05371094 C41.5852865,4.05371094 41.25,4.12044271 40.9472656,4.25390625 C40.6445312,4.38736979 40.3955078,4.56803385 40.2001953,4.79589844 C40.0048828,5.02376302 39.8543294,5.28499349 39.7485352,5.57958984 C39.6427409,5.8741862 39.5898438,6.1858724 39.5898438,6.51464844 C39.5898438,7.30240885 39.8168945,7.92333984 40.2709961,8.37744141 C40.7250977,8.83154297 41.3232422,9.06184896 42.0654297,9.06835938 Z M40.6738281,6.16308594 C40.6901042,5.98730469 40.7283529,5.81966146 40.7885742,5.66015625 C40.8487956,5.50065104 40.9309896,5.35416667 41.0351562,5.22070312 C41.1393229,5.08723958 41.2744141,4.97981771 41.4404297,4.8984375 C41.6064453,4.81705729 41.7919922,4.77636719 41.9970703,4.77636719 C42.3942057,4.77636719 42.6969401,4.90820312 42.9052734,5.171875 C43.1136068,5.43554688 43.2324219,5.76595052 43.2617188,6.16308594 L40.6738281,6.16308594 Z" id="Shape"></path>
+                        <path d="M45.9863281,9.00488281 C46.1523438,9.00488281 46.2947591,8.95361328 46.4135742,8.85107422 C46.5323893,8.74853516 46.5917969,8.60449219 46.5917969,8.41894531 L46.5917969,6.11914062 L48.0126953,6.11914062 C48.8557943,6.11914062 49.5035807,5.95719401 49.9560547,5.63330078 C50.4085286,5.30940755 50.6347656,4.78125 50.6347656,4.04882813 C50.6347656,3.32617188 50.4191081,2.80208333 49.987793,2.4765625 C49.5564779,2.15104167 48.9339193,1.98828125 48.1201172,1.98828125 L45.9960938,1.98828125 C45.8268229,1.98828125 45.6819661,2.0476888 45.5615234,2.16650391 C45.4410807,2.28531901 45.3808594,2.42773438 45.3808594,2.59375 L45.3808594,8.41894531 C45.3808594,8.60449219 45.4410807,8.74853516 45.5615234,8.85107422 C45.6819661,8.95361328 45.8235677,9.00488281 45.9863281,9.00488281 Z M46.5917969,5.19628906 L46.5917969,2.95019531 L48.0126953,2.95019531 C48.507487,2.95019531 48.8696289,3.02587891 49.0991211,3.17724609 C49.3286133,3.32861328 49.4433594,3.61914063 49.4433594,4.04882813 C49.4433594,4.48502604 49.3253581,4.78531901 49.0893555,4.94970703 C48.8533529,5.11409505 48.4830729,5.19628906 47.9785156,5.19628906 L46.5917969,5.19628906 Z" id="Shape"></path>
+                        <path d="M53.6962891,8.34082031 C53.2438151,8.34082031 52.8898112,8.18457031 52.6342773,7.87207031 C52.3787435,7.55957031 52.2509766,7.125 52.2509766,6.56835937 C52.2509766,6.00520833 52.3787435,5.56575521 52.6342773,5.25 C52.8898112,4.93424479 53.2438151,4.77636719 53.6962891,4.77636719 C54.148763,4.77636719 54.5027669,4.93505859 54.7583008,5.25244141 C55.0138346,5.56982422 55.1416016,6.00846354 55.1416016,6.56835937 C55.1416016,7.125 55.0146484,7.55957031 54.7607422,7.87207031 C54.5068359,8.18457031 54.1520182,8.34082031 53.6962891,8.34082031 Z M53.6962891,9.06835938 C54.0283203,9.06835938 54.3326823,9.02197266 54.609375,8.92919922 C54.8860677,8.83642578 55.1212565,8.71110026 55.3149414,8.55322266 C55.5086263,8.39534505 55.6730143,8.20898438 55.8081055,7.99414063 C55.9431966,7.77929688 56.0424805,7.55224609 56.105957,7.31298828 C56.1694336,7.07373047 56.2011719,6.82552083 56.2011719,6.56835938 C56.2011719,6.22981771 56.1474609,5.91080729 56.0400391,5.61132813 C55.9326172,5.31184896 55.777181,5.04492188 55.5737305,4.81054688 C55.3702799,4.57617187 55.1074219,4.3914388 54.7851562,4.25634766 C54.4628906,4.12125651 54.0999349,4.05371094 53.6962891,4.05371094 C53.289388,4.05371094 52.9231771,4.12288411 52.5976562,4.26123047 C52.2721354,4.39957682 52.0092773,4.5867513 51.809082,4.82275391 C51.6088867,5.05875651 51.4558919,5.32568359 51.3500977,5.62353516 C51.2443034,5.92138672 51.1914062,6.23632813 51.1914062,6.56835938 C51.1914062,6.89388021 51.2434896,7.2039388 51.3476562,7.49853516 C51.4518229,7.79313151 51.6040039,8.05843099 51.8041992,8.29443359 C52.0043945,8.5304362 52.2672526,8.71842448 52.5927734,8.85839844 C52.9182943,8.9983724 53.2861328,9.06835938 53.6962891,9.06835938 Z" id="Shape"></path>
+                        <path d="M59.4824219,8.34082031 C59.0299479,8.34082031 58.675944,8.18457031 58.4204102,7.87207031 C58.1648763,7.55957031 58.0371094,7.125 58.0371094,6.56835937 C58.0371094,6.00520833 58.1648763,5.56575521 58.4204102,5.25 C58.675944,4.93424479 59.0299479,4.77636719 59.4824219,4.77636719 C59.9348958,4.77636719 60.2888997,4.93505859 60.5444336,5.25244141 C60.7999674,5.56982422 60.9277344,6.00846354 60.9277344,6.56835937 C60.9277344,7.125 60.8007812,7.55957031 60.546875,7.87207031 C60.2929688,8.18457031 59.938151,8.34082031 59.4824219,8.34082031 Z M59.4824219,9.06835938 C59.8144531,9.06835938 60.1188151,9.02197266 60.3955078,8.92919922 C60.6722005,8.83642578 60.9073893,8.71110026 61.1010742,8.55322266 C61.2947591,8.39534505 61.4591471,8.20898438 61.5942383,7.99414063 C61.7293294,7.77929688 61.8286133,7.55224609 61.8920898,7.31298828 C61.9555664,7.07373047 61.9873047,6.82552083 61.9873047,6.56835938 C61.9873047,6.22981771 61.9335938,5.91080729 61.8261719,5.61132813 C61.71875,5.31184896 61.5633138,5.04492188 61.3598633,4.81054688 C61.1564128,4.57617187 60.8935547,4.3914388 60.5712891,4.25634766 C60.2490234,4.12125651 59.8860677,4.05371094 59.4824219,4.05371094 C59.0755208,4.05371094 58.7093099,4.12288411 58.3837891,4.26123047 C58.0582682,4.39957682 57.7954102,4.5867513 57.5952148,4.82275391 C57.3950195,5.05875651 57.2420247,5.32568359 57.1362305,5.62353516 C57.0304362,5.92138672 56.9775391,6.23632813 56.9775391,6.56835938 C56.9775391,6.89388021 57.0296224,7.2039388 57.1337891,7.49853516 C57.2379557,7.79313151 57.3901367,8.05843099 57.590332,8.29443359 C57.7905273,8.5304362 58.0533854,8.71842448 58.3789062,8.85839844 C58.7044271,8.9983724 59.0722656,9.06835938 59.4824219,9.06835938 Z" id="Shape"></path>
+                        <path d="M63.6083984,9 C63.7548828,9 63.8826497,8.95198568 63.9916992,8.85595703 C64.1007487,8.75992839 64.1552734,8.63053385 64.1552734,8.46777344 L64.1552734,2.52539062 C64.1552734,2.36263021 64.1015625,2.23323568 63.9941406,2.13720703 C63.8867188,2.04117839 63.7613932,1.99316406 63.6181641,1.99316406 C63.4716797,1.99316406 63.3455404,2.04036458 63.2397461,2.13476562 C63.1339518,2.22916667 63.0810547,2.359375 63.0810547,2.52539062 L63.0810547,8.46777344 C63.0810547,8.63704427 63.1323242,8.76806641 63.2348633,8.86083984 C63.3374023,8.95361328 63.4619141,9 63.6083984,9 Z" id="Path"></path>
+                        <path d="M66.0791016,9 C66.2255859,9 66.3533529,8.95198568 66.4624023,8.85595703 C66.5714518,8.75992839 66.6259766,8.63053385 66.6259766,8.46777344 L66.6259766,4.65429688 C66.6259766,4.48828125 66.5730794,4.35807292 66.4672852,4.26367188 C66.3614909,4.16927083 66.2369792,4.12207031 66.09375,4.12207031 C65.9472656,4.12207031 65.8203125,4.16927083 65.7128906,4.26367188 C65.6054688,4.35807292 65.5517578,4.48828125 65.5517578,4.65429688 L65.5517578,8.46777344 C65.5517578,8.63704427 65.6030273,8.76806641 65.7055664,8.86083984 C65.8081055,8.95361328 65.9326172,9 66.0791016,9 Z M66.0839844,3.1015625 C66.2792969,3.1015625 66.4363607,3.04541016 66.5551758,2.93310547 C66.6739909,2.82080078 66.7333984,2.671875 66.7333984,2.48632812 C66.7333984,2.30403646 66.6748047,2.15592448 66.5576172,2.04199219 C66.4404297,1.9280599 66.2841797,1.87109375 66.0888672,1.87109375 C65.8935547,1.87109375 65.7364909,1.9280599 65.6176758,2.04199219 C65.4988607,2.15592448 65.4394531,2.30403646 65.4394531,2.48632812 C65.4394531,2.671875 65.4988607,2.82080078 65.6176758,2.93310547 C65.7364909,3.04541016 65.8919271,3.1015625 66.0839844,3.1015625 Z" id="Shape"></path>
+                        <path d="M68.4765625,9 C68.6230469,9 68.7491862,8.95442708 68.8549805,8.86328125 C68.9607747,8.77213542 69.0136719,8.6468099 69.0136719,8.48730469 L69.0136719,6.265625 C69.0169271,5.81966146 69.1438802,5.45996094 69.3945312,5.18652344 C69.6451823,4.91308594 69.9446615,4.77636719 70.2929688,4.77636719 C70.5924479,4.77636719 70.8349609,4.87402344 71.0205078,5.06933594 C71.2060547,5.26464844 71.2988281,5.54622396 71.2988281,5.9140625 L71.2988281,8.48730469 C71.2988281,8.6500651 71.3509115,8.77620443 71.4550781,8.86572266 C71.5592448,8.95524089 71.6829427,9 71.8261719,9 C71.9726562,9 72.0996094,8.95524089 72.2070312,8.86572266 C72.3144531,8.77620443 72.3681641,8.6500651 72.3681641,8.48730469 L72.3681641,5.92382813 C72.3681641,5.60481771 72.3209635,5.32324219 72.2265625,5.07910156 C72.1321615,4.83496094 72.0019531,4.63964844 71.8359375,4.49316406 C71.6699219,4.34667969 71.4819336,4.23681641 71.2719727,4.16357422 C71.0620117,4.09033203 70.8365885,4.05371094 70.5957031,4.05371094 C70.2246094,4.05371094 69.9023438,4.12451172 69.6289062,4.26611328 C69.3554688,4.40771484 69.1503906,4.61360677 69.0136719,4.88378906 L69.0136719,4.61035156 C69.0136719,4.45735677 68.9615885,4.33772786 68.8574219,4.25146484 C68.7532552,4.16520182 68.6279297,4.12207031 68.4814453,4.12207031 C68.3349609,4.12207031 68.2080078,4.16682943 68.1005859,4.25634766 C67.9931641,4.34586589 67.9394531,4.46875 67.9394531,4.625 L67.9394531,8.48730469 C67.9394531,8.6500651 67.9923503,8.77620443 68.0981445,8.86572266 C68.2039388,8.95524089 68.3300781,9 68.4765625,9 Z" id="Path"></path>
+                        <path d="M75.7958984,11.1337891 C76.5348307,11.1337891 77.1232096,10.9498698 77.5610352,10.5820312 C77.9988607,10.2141927 78.2177734,9.66080729 78.2177734,8.921875 L78.2177734,4.62988281 C78.2177734,4.4703776 78.1673177,4.34586589 78.0664062,4.25634766 C77.9654948,4.16682943 77.8483073,4.12207031 77.7148438,4.12207031 C77.5911458,4.12207031 77.4812826,4.15869141 77.3852539,4.23193359 C77.2892253,4.30517578 77.2330729,4.40690104 77.2167969,4.53710938 L77.2167969,4.86425781 C77.1061198,4.69173177 76.9913737,4.55013021 76.8725586,4.43945312 C76.7537435,4.32877604 76.5917969,4.23681641 76.3867188,4.16357422 C76.1816406,4.09033203 75.9407552,4.05371094 75.6640625,4.05371094 C75.2018229,4.05371094 74.7981771,4.16438802 74.453125,4.38574219 C74.1080729,4.60709635 73.8468424,4.90820313 73.6694336,5.2890625 C73.4920247,5.66992188 73.4033203,6.10449219 73.4033203,6.59277344 C73.4033203,7.3186849 73.610026,7.90136719 74.0234375,8.34082031 C74.436849,8.78027344 74.9934896,9 75.6933594,9 C76.3964844,9 76.8961589,8.7281901 77.1923828,8.18457031 L77.1923828,9 C77.1923828,9.44596354 77.0613607,9.79264323 76.7993164,10.0400391 C76.5372721,10.2874349 76.1767578,10.4111328 75.7177734,10.4111328 C75.5745443,10.4111328 75.4305013,10.3989258 75.2856445,10.3745117 C75.1407878,10.3500977 75.0292969,10.3256836 74.9511719,10.3012695 C74.8730469,10.2768555 74.7648112,10.2386068 74.6264648,10.1865234 C74.4881185,10.1344401 74.3994141,10.101888 74.3603516,10.0888672 C74.3440755,10.0823568 74.3269857,10.0766602 74.309082,10.0717773 C74.2911784,10.0668945 74.2749023,10.0636393 74.2602539,10.0620117 C74.2456055,10.0603841 74.2301432,10.0595703 74.2138672,10.0595703 C74.148763,10.0595703 74.0901693,10.0766602 74.0380859,10.1108398 C73.9860026,10.1450195 73.9461263,10.1889648 73.918457,10.2426758 C73.8907878,10.2963867 73.8769531,10.3533529 73.8769531,10.4135742 C73.8769531,10.4737956 73.8924154,10.5323893 73.9233398,10.5893555 C73.9542643,10.6463216 74.000651,10.6943359 74.0625,10.7333984 C74.2415365,10.8440755 74.4864909,10.9384766 74.7973633,11.0166016 C75.1082357,11.0947266 75.4410807,11.1337891 75.7958984,11.1337891 Z M75.8544922,8.30175781 C75.4638672,8.29199219 75.1359049,8.1414388 74.8706055,7.85009766 C74.605306,7.55875651 74.4726562,7.12988281 74.4726562,6.56347656 C74.4726562,6.31933594 74.4995117,6.09309896 74.5532227,5.88476562 C74.6069336,5.67643229 74.6875,5.48763021 74.7949219,5.31835938 C74.9023438,5.14908854 75.0480143,5.0164388 75.2319336,4.92041016 C75.4158529,4.82438151 75.6282552,4.77636719 75.8691406,4.77636719 C76.110026,4.77636719 76.3175456,4.8219401 76.4916992,4.91308594 C76.6658529,5.00423177 76.8025716,5.1336263 76.9018555,5.30126953 C77.0011393,5.46891276 77.0735677,5.65690104 77.1191406,5.86523438 C77.1647135,6.07356771 77.1875,6.30957031 77.1875,6.57324219 C77.1842448,7.15592448 77.0605469,7.58886719 76.8164062,7.87207031 C76.5722656,8.15527344 76.2516276,8.2985026 75.8544922,8.30175781 Z" id="Shape"></path>
+                    </g>
+                    <path d="M59.3225439,12.2031441 C62.6560848,16.7208844 66.1301415,18.9510713 69.748172,18.9510713 C74.7538911,18.9510713 74.7538911,18.9510713 82.6259887,18.9510713 C84.5126631,18.9510713 84.5126631,18.9510713 86.4236326,18.9510713 C94.8330394,18.9510713 94.8330394,18.9510713 101.172043,18.9510713 C108.030214,18.9510713 112.5,23.7940815 112.5,28.5000091 C112.5,28.776156 112.276142,29.0000091 112,29.0000091 C111.723858,29.0000091 111.5,28.776156 111.5,28.5000091 C111.5,24.3292512 107.459222,19.9510713 101.172043,19.9510713 C94.8330394,19.9510713 94.8330394,19.9510713 86.4236326,19.9510713 C84.5126631,19.9510713 84.5126631,19.9510713 82.6259887,19.9510713 C74.7538911,19.9510713 74.7538911,19.9510713 69.748172,19.9510713 C65.9077041,19.9510713 62.2868773,17.7134178 58.8894866,13.2828606 C54.9106667,17.71626 51.3228866,19.9510713 48.0922581,19.9510713 C42.7550547,19.9510713 42.7550547,19.9510713 33.5167726,19.9510713 C32.504542,19.9510713 32.504542,19.9510713 31.4883383,19.9510713 C23.0383623,19.9510713 23.0383623,19.9510713 16.8727074,19.9510713 C10.6282824,19.9510713 6.5,24.1435765 6.5,28.5000091 C6.5,28.776156 6.27614237,29.0000091 6,29.0000091 C5.72385763,29.0000091 5.5,28.776156 5.5,28.5000091 C5.5,23.5947177 10.0725127,18.9510713 16.8727074,18.9510713 C23.0383623,18.9510713 23.0383623,18.9510713 31.4883383,18.9510713 C32.504542,18.9510713 32.504542,18.9510713 33.5167726,18.9510713 C42.7550547,18.9510713 42.7550547,18.9510713 48.0922581,18.9510713 C51.0786702,18.9510713 54.5746062,16.7068389 58.5439371,12.1707495 C58.7542549,11.9304016 59.1329181,11.9461562 59.3225439,12.2031441 Z" id="Path" fill="#8E8E8E" fill-rule="nonzero"></path>
+                    <g id="top" transform="translate(47, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+            <g id="Score">
+                <use id="pooling-box-copy-2" stroke="#1CBB8B" mask="url(#mask-12)" stroke-width="2" fill="#F0FDF8" stroke-dasharray="2,2" xlink:href="#path-11"></use>
+                <path d="M133.01938,36.7254346 L132.12297,42.3792294 L130.704374,41.3302422 C124.079541,51.654147 114.856669,60.376468 103.041834,67.4957693 C91.0436911,74.725527 83.3506166,82.0454718 79.9522598,89.4314632 C79.8345038,89.6873941 79.5365601,89.7970548 79.286784,89.6763972 C79.0370078,89.5557396 78.9299842,89.2504543 79.0477402,88.9945234 C80.6136193,85.5912405 83.0474105,82.2184133 86.3481858,78.8738782 C87.3298864,77.8639965 88.3960175,76.8648598 89.5465215,75.8765831 C93.1187653,72.7650001 97.4483398,69.6775685 102.534912,66.6125371 C109.203685,62.5941142 115.033231,58.0636092 120.024679,53.0207552 C122.728875,50.1321654 125.426269,46.9169831 128.115216,43.3750281 C128.72765,42.5050826 129.319806,41.6239849 129.892216,40.7307994 L128.06727,39.3829126 L133.01938,36.7254346 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <path d="M163.337541,36.7254346 L168.289651,39.3829126 L166.562222,40.6596141 C173.105155,50.8952557 182.237396,59.5457462 193.965088,66.6125371 C206.120774,73.9372258 213.953281,81.389841 217.45226,88.9945234 C217.570016,89.2504543 217.462992,89.5557396 217.213216,89.6763972 C216.96344,89.7970548 216.665496,89.6873941 216.54774,89.4314632 C213.149383,82.0454718 205.456309,74.725527 193.458166,67.4957693 C181.616196,60.3601171 172.378108,51.6140735 165.750022,41.2590843 L164.233951,42.3792294 L163.337541,36.7254346 Z" id="Combined-Shape" fill="#8E8E8E" fill-rule="nonzero"></path>
+                <g id="cosine-similarity" transform="translate(196, 48)" fill="#12BE8B" fill-rule="nonzero">
+                    <path d="M3.49609375,10.0820312 C4.2890625,10.0820312 4.88671875,9.90820312 5.2890625,9.56054688 C5.4296875,9.43945312 5.5,9.30664063 5.5,9.16210938 C5.5,9.12695313 5.49609375,9.09277344 5.48828125,9.05957031 C5.48046875,9.02636719 5.46777344,8.99511719 5.45019531,8.96582031 C5.43261719,8.93652344 5.41210938,8.91015625 5.38867188,8.88671875 C5.36523437,8.86328125 5.33984375,8.84375 5.3125,8.828125 C5.28515625,8.8125 5.25488281,8.79980469 5.22167969,8.79003906 C5.18847656,8.78027344 5.15429688,8.77539062 5.11914062,8.77539062 C5.07226563,8.77539062 5.02441406,8.78320312 4.97558594,8.79882812 C4.92675781,8.81445312 4.87890625,8.83984375 4.83203125,8.875 C4.5234375,9.09765625 4.11132813,9.20898438 3.59570312,9.20898438 C3.06835937,9.20898438 2.65527344,9.01171875 2.35644531,8.6171875 C2.05761719,8.22265625 1.90820313,7.70507813 1.90820313,7.06445312 C1.90820313,6.42773437 2.0625,5.91210938 2.37109375,5.51757812 C2.6796875,5.12304687 3.09960938,4.92578125 3.63085938,4.92578125 C4.00976563,4.92578125 4.37109375,5.01953125 4.71484375,5.20703125 C4.7578125,5.23046875 4.80175781,5.24804687 4.84667969,5.25976562 C4.89160156,5.27148438 4.93554688,5.27734375 4.97851562,5.27734375 C5.02539063,5.27734375 5.0703125,5.27148438 5.11328125,5.25976562 C5.15625,5.24804687 5.19433594,5.23046875 5.22753906,5.20703125 C5.26074219,5.18359375 5.29003906,5.15625 5.31542969,5.125 C5.34082031,5.09375 5.36035156,5.05957031 5.37402344,5.02246094 C5.38769531,4.98535156 5.39453125,4.9453125 5.39453125,4.90234375 C5.39453125,4.859375 5.38574219,4.81640625 5.36816406,4.7734375 C5.35058594,4.73046875 5.32324219,4.68652344 5.28613281,4.64160156 C5.24902344,4.59667969 5.203125,4.5546875 5.1484375,4.515625 C4.71875,4.21484375 4.17773438,4.06445312 3.52539062,4.06445312 C3.09570313,4.06445312 2.69824219,4.14453125 2.33300781,4.3046875 C1.96777344,4.46484375 1.66113281,4.68164063 1.41308594,4.95507813 C1.16503906,5.22851562 0.971679688,5.54785156 0.833007812,5.91308594 C0.694335938,6.27832031 0.625,6.6640625 0.625,7.0703125 C0.625,7.625 0.739257812,8.12890625 0.967773438,8.58203125 C1.19628906,9.03515625 1.53125,9.3984375 1.97265625,9.671875 C2.4140625,9.9453125 2.921875,10.0820312 3.49609375,10.0820312 Z" id="Path"></path>
+                    <path d="M9.27929688,9.20898438 C8.73632812,9.20898438 8.31152344,9.02148438 8.00488281,8.64648438 C7.69824219,8.27148438 7.54492188,7.75 7.54492188,7.08203125 C7.54492188,6.40625 7.69824219,5.87890625 8.00488281,5.5 C8.31152344,5.12109375 8.73632812,4.93164062 9.27929688,4.93164062 C9.82226563,4.93164062 10.2470703,5.12207031 10.5537109,5.50292969 C10.8603516,5.88378906 11.0136719,6.41015625 11.0136719,7.08203125 C11.0136719,7.75 10.8613281,8.27148438 10.5566406,8.64648438 C10.2519531,9.02148438 9.82617188,9.20898438 9.27929688,9.20898438 Z M9.27929688,10.0820312 C9.67773437,10.0820312 10.0429688,10.0263672 10.375,9.91503906 C10.7070312,9.80371094 10.9892578,9.65332031 11.2216797,9.46386719 C11.4541016,9.27441406 11.6513672,9.05078125 11.8134766,8.79296875 C11.9755859,8.53515625 12.0947266,8.26269531 12.1708984,7.97558594 C12.2470703,7.68847656 12.2851562,7.390625 12.2851562,7.08203125 C12.2851562,6.67578125 12.2207031,6.29296875 12.0917969,5.93359375 C11.9628906,5.57421875 11.7763672,5.25390625 11.5322266,4.97265625 C11.2880859,4.69140625 10.9726563,4.46972656 10.5859375,4.30761719 C10.1992188,4.14550781 9.76367188,4.06445312 9.27929688,4.06445312 C8.79101562,4.06445312 8.3515625,4.14746094 7.9609375,4.31347656 C7.5703125,4.47949219 7.25488281,4.70410156 7.01464844,4.98730469 C6.77441406,5.27050781 6.59082031,5.59082031 6.46386719,5.94824219 C6.33691406,6.30566406 6.2734375,6.68359375 6.2734375,7.08203125 C6.2734375,7.47265625 6.3359375,7.84472656 6.4609375,8.19824219 C6.5859375,8.55175781 6.76855469,8.87011719 7.00878906,9.15332031 C7.24902344,9.43652344 7.56445312,9.66210938 7.95507812,9.83007812 C8.34570312,9.99804687 8.78710937,10.0820312 9.27929688,10.0820312 Z" id="Shape"></path>
+                    <path d="M13.5507812,9.44335938 C14.0117188,9.77148437 14.5742188,9.97851563 15.2382813,10.0644531 C15.4257813,10.0878906 15.6113281,10.0996094 15.7949219,10.0996094 C16.1308594,10.0996094 16.4404297,10.0625 16.7236328,9.98828125 C17.0068359,9.9140625 17.2539062,9.80371094 17.4648438,9.65722656 C17.6757813,9.51074219 17.8408203,9.31933594 17.9599609,9.08300781 C18.0791016,8.84667969 18.1386719,8.578125 18.1386719,8.27734375 C18.1386719,7.828125 18.0019531,7.46875 17.7285156,7.19921875 C17.4550781,6.9296875 17.0117188,6.71875 16.3984375,6.56640625 L15.4492188,6.33203125 C15.0820313,6.2421875 14.8378906,6.15136719 14.7167969,6.05957031 C14.5957031,5.96777344 14.5351562,5.81640625 14.5351562,5.60546875 C14.5351562,5.4375 14.59375,5.29882813 14.7109375,5.18945313 C14.828125,5.08007813 14.9746094,5.00292969 15.1503906,4.95800781 C15.3261719,4.91308594 15.5273437,4.890625 15.7539062,4.890625 C15.8632813,4.890625 15.9746094,4.89746094 16.0878906,4.91113281 C16.2011719,4.92480469 16.2978516,4.93945312 16.3779297,4.95507813 C16.4580078,4.97070313 16.5527344,4.99414063 16.6621094,5.02539062 C16.7714844,5.05664062 16.8476562,5.08007813 16.890625,5.09570312 C16.9335938,5.11132812 17.0039062,5.13769531 17.1015625,5.17480469 C17.1992188,5.21191406 17.25,5.23046875 17.2539062,5.23046875 C17.2929688,5.2421875 17.3291016,5.25195313 17.3623047,5.25976562 C17.3955078,5.26757812 17.4296875,5.27148438 17.4648438,5.27148438 C17.5429688,5.27148438 17.6123047,5.25292969 17.6728516,5.21582031 C17.7333984,5.17871094 17.7792969,5.13085937 17.8105469,5.07226562 C17.8417969,5.01367188 17.8574219,4.95117188 17.8574219,4.88476562 C17.8574219,4.70117187 17.7636719,4.56445312 17.5761719,4.47460938 C16.9980469,4.18164062 16.3671875,4.03515625 15.6835937,4.03515625 C14.8203125,4.03515625 14.1816406,4.23242188 13.7675781,4.62695312 C13.4863281,4.90429688 13.3457031,5.25 13.3457031,5.6640625 C13.3457031,5.8203125 13.3603516,5.96191406 13.3896484,6.08886719 C13.4189453,6.21582031 13.4580078,6.32714844 13.5068359,6.42285156 C13.5556641,6.51855469 13.6240234,6.60644531 13.7119141,6.68652344 C13.7998047,6.76660156 13.8886719,6.83398437 13.9785156,6.88867188 C14.0683594,6.94335938 14.1845703,6.99804688 14.3271484,7.05273438 C14.4697266,7.10742187 14.6035156,7.15332031 14.7285156,7.19042969 C14.8535156,7.22753906 15.0117188,7.27148438 15.203125,7.32226563 L16.1699219,7.56835938 C16.4746094,7.64648437 16.6972656,7.74902344 16.8378906,7.87597656 C16.9785156,8.00292969 17.0488281,8.1640625 17.0488281,8.359375 C17.0488281,8.5625 16.984375,8.73339844 16.8554688,8.87207031 C16.7265625,9.01074219 16.5644531,9.10839844 16.3691406,9.16503906 C16.1738281,9.22167969 15.953125,9.25 15.7070313,9.25 C15.4804688,9.25 15.2558594,9.22167969 15.0332031,9.16503906 C14.8105469,9.10839844 14.6328125,9.04882812 14.5,8.98632812 C14.3671875,8.92382813 14.2226562,8.84570312 14.0664062,8.75195313 C13.96875,8.69726562 13.8730469,8.66992188 13.7792969,8.66992188 C13.7558594,8.66992188 13.7324219,8.671875 13.7089844,8.67578125 C13.6855469,8.6796875 13.6630859,8.68554688 13.6416016,8.69335938 C13.6201172,8.70117188 13.5996094,8.70996094 13.5800781,8.71972656 C13.5605469,8.72949219 13.5419922,8.74121094 13.5244141,8.75488281 C13.5068359,8.76855469 13.4902344,8.78320312 13.4746094,8.79882813 C13.4589844,8.81445312 13.4453125,8.83105469 13.4335938,8.84863281 C13.421875,8.86621094 13.4111328,8.88476563 13.4013672,8.90429688 C13.3916016,8.92382813 13.3837891,8.94335938 13.3779297,8.96289062 C13.3720703,8.98242188 13.3671875,9.00292969 13.3632812,9.02441406 C13.359375,9.04589844 13.3574219,9.06835938 13.3574219,9.09179688 C13.3574219,9.23242188 13.421875,9.34960938 13.5507812,9.44335938 Z" id="Path"></path>
+                    <path d="M20.1015625,10 C20.2773438,10 20.4306641,9.94238281 20.5615234,9.82714844 C20.6923828,9.71191406 20.7578125,9.55664063 20.7578125,9.36132812 L20.7578125,4.78515625 C20.7578125,4.5859375 20.6943359,4.4296875 20.5673828,4.31640625 C20.4404297,4.203125 20.2910156,4.14648438 20.1191406,4.14648438 C19.9433594,4.14648438 19.7910156,4.203125 19.6621094,4.31640625 C19.5332031,4.4296875 19.46875,4.5859375 19.46875,4.78515625 L19.46875,9.36132812 C19.46875,9.56445312 19.5302734,9.72167969 19.6533203,9.83300781 C19.7763672,9.94433594 19.9257812,10 20.1015625,10 Z M20.1074219,2.921875 C20.3417969,2.921875 20.5302734,2.85449219 20.6728516,2.71972656 C20.8154297,2.58496094 20.8867188,2.40625 20.8867188,2.18359375 C20.8867188,1.96484375 20.8164062,1.78710937 20.6757812,1.65039063 C20.5351562,1.51367188 20.3476562,1.4453125 20.1132812,1.4453125 C19.8789062,1.4453125 19.6904297,1.51367188 19.5478516,1.65039063 C19.4052734,1.78710937 19.3339844,1.96484375 19.3339844,2.18359375 C19.3339844,2.40625 19.4052734,2.58496094 19.5478516,2.71972656 C19.6904297,2.85449219 19.8769531,2.921875 20.1074219,2.921875 Z" id="Shape"></path>
+                    <path d="M22.9785156,10 C23.1542969,10 23.3056641,9.9453125 23.4326172,9.8359375 C23.5595703,9.7265625 23.6230469,9.57617188 23.6230469,9.38476563 L23.6230469,6.71875 C23.6269531,6.18359375 23.7792969,5.75195313 24.0800781,5.42382812 C24.3808594,5.09570312 24.7402344,4.93164063 25.1582031,4.93164063 C25.5175781,4.93164063 25.8085938,5.04882812 26.03125,5.28320312 C26.2539062,5.51757812 26.3652344,5.85546875 26.3652344,6.296875 L26.3652344,9.38476563 C26.3652344,9.58007812 26.4277344,9.73144531 26.5527344,9.83886719 C26.6777344,9.94628906 26.8261719,10 26.9980469,10 C27.1738281,10 27.3261719,9.94628906 27.4550781,9.83886719 C27.5839844,9.73144531 27.6484375,9.58007812 27.6484375,9.38476563 L27.6484375,6.30859375 C27.6484375,5.92578125 27.5917969,5.58789063 27.4785156,5.29492188 C27.3652344,5.00195313 27.2089844,4.76757813 27.0097656,4.59179688 C26.8105469,4.41601562 26.5849609,4.28417969 26.3330078,4.19628906 C26.0810547,4.10839844 25.8105469,4.06445312 25.5214844,4.06445312 C25.0761719,4.06445312 24.6894531,4.14941406 24.3613281,4.31933594 C24.0332031,4.48925781 23.7871094,4.73632812 23.6230469,5.06054687 L23.6230469,4.73242188 C23.6230469,4.54882812 23.5605469,4.40527344 23.4355469,4.30175781 C23.3105469,4.19824219 23.1601562,4.14648438 22.984375,4.14648438 C22.8085938,4.14648438 22.65625,4.20019531 22.5273438,4.30761719 C22.3984375,4.41503906 22.3339844,4.5625 22.3339844,4.75 L22.3339844,9.38476563 C22.3339844,9.58007812 22.3974609,9.73144531 22.5244141,9.83886719 C22.6513672,9.94628906 22.8027344,10 22.9785156,10 Z" id="Path"></path>
+                    <path d="M31.8730469,10.0820312 C32.7246094,10.0820312 33.4433594,9.82421875 34.0292969,9.30859375 C34.1582031,9.203125 34.2226562,9.07617188 34.2226562,8.92773437 C34.2226562,8.81054688 34.1826172,8.70703125 34.1025391,8.6171875 C34.0224609,8.52734375 33.9277344,8.48242188 33.8183594,8.48242188 C33.7441406,8.48242188 33.6699219,8.5078125 33.5957031,8.55859375 C32.9980469,8.98046875 32.4550781,9.19140625 31.9667969,9.19140625 C31.4394531,9.1875 31.0185547,9.03808594 30.7041016,8.74316406 C30.3896484,8.44824219 30.21875,7.99023438 30.1914063,7.36914062 L34.0351562,7.36914062 C34.1679688,7.36914062 34.2646484,7.32324219 34.3251953,7.23144531 C34.3857422,7.13964844 34.4160156,7.0234375 34.4160156,6.8828125 C34.4082031,6.49609375 34.3496094,6.13769531 34.2402344,5.80761719 C34.1308594,5.47753906 33.9716797,5.18066406 33.7626953,4.91699219 C33.5537109,4.65332031 33.2763672,4.4453125 32.9306641,4.29296875 C32.5849609,4.140625 32.1875,4.06445312 31.7382812,4.06445312 C31.296875,4.06445312 30.8945312,4.14453125 30.53125,4.3046875 C30.1679687,4.46484375 29.8691406,4.68164063 29.6347656,4.95507813 C29.4003906,5.22851562 29.2197266,5.54199219 29.0927734,5.89550781 C28.9658203,6.24902344 28.9023438,6.62304687 28.9023438,7.01757812 C28.9023438,7.96289062 29.1748047,8.70800781 29.7197266,9.25292969 C30.2646484,9.79785156 30.9824219,10.0742188 31.8730469,10.0820312 Z M30.203125,6.59570312 C30.2226562,6.38476562 30.2685547,6.18359375 30.3408203,5.9921875 C30.4130859,5.80078125 30.5117188,5.625 30.6367188,5.46484375 C30.7617188,5.3046875 30.9238281,5.17578125 31.1230469,5.078125 C31.3222656,4.98046875 31.5449219,4.93164062 31.7910156,4.93164062 C32.2675781,4.93164062 32.6308594,5.08984375 32.8808594,5.40625 C33.1308594,5.72265625 33.2734375,6.11914062 33.3085938,6.59570312 L30.203125,6.59570312 Z" id="Shape"></path>
+                    <path d="M39.015625,9.44335938 C39.4765625,9.77148437 40.0390625,9.97851563 40.703125,10.0644531 C40.890625,10.0878906 41.0761719,10.0996094 41.2597656,10.0996094 C41.5957031,10.0996094 41.9052734,10.0625 42.1884766,9.98828125 C42.4716797,9.9140625 42.71875,9.80371094 42.9296875,9.65722656 C43.140625,9.51074219 43.3056641,9.31933594 43.4248047,9.08300781 C43.5439453,8.84667969 43.6035156,8.578125 43.6035156,8.27734375 C43.6035156,7.828125 43.4667969,7.46875 43.1933594,7.19921875 C42.9199219,6.9296875 42.4765625,6.71875 41.8632812,6.56640625 L40.9140625,6.33203125 C40.546875,6.2421875 40.3027344,6.15136719 40.1816406,6.05957031 C40.0605469,5.96777344 40,5.81640625 40,5.60546875 C40,5.4375 40.0585938,5.29882813 40.1757812,5.18945313 C40.2929688,5.08007813 40.4394531,5.00292969 40.6152344,4.95800781 C40.7910156,4.91308594 40.9921875,4.890625 41.21875,4.890625 C41.328125,4.890625 41.4394531,4.89746094 41.5527344,4.91113281 C41.6660156,4.92480469 41.7626953,4.93945312 41.8427734,4.95507813 C41.9228516,4.97070313 42.0175781,4.99414063 42.1269531,5.02539062 C42.2363281,5.05664062 42.3125,5.08007813 42.3554688,5.09570312 C42.3984375,5.11132812 42.46875,5.13769531 42.5664062,5.17480469 C42.6640625,5.21191406 42.7148438,5.23046875 42.71875,5.23046875 C42.7578125,5.2421875 42.7939453,5.25195313 42.8271484,5.25976562 C42.8603516,5.26757812 42.8945312,5.27148438 42.9296875,5.27148438 C43.0078125,5.27148438 43.0771484,5.25292969 43.1376953,5.21582031 C43.1982422,5.17871094 43.2441406,5.13085937 43.2753906,5.07226562 C43.3066406,5.01367188 43.3222656,4.95117188 43.3222656,4.88476562 C43.3222656,4.70117187 43.2285156,4.56445312 43.0410156,4.47460938 C42.4628906,4.18164062 41.8320312,4.03515625 41.1484375,4.03515625 C40.2851562,4.03515625 39.6464844,4.23242188 39.2324219,4.62695312 C38.9511719,4.90429688 38.8105469,5.25 38.8105469,5.6640625 C38.8105469,5.8203125 38.8251953,5.96191406 38.8544922,6.08886719 C38.8837891,6.21582031 38.9228516,6.32714844 38.9716797,6.42285156 C39.0205078,6.51855469 39.0888672,6.60644531 39.1767578,6.68652344 C39.2646484,6.76660156 39.3535156,6.83398437 39.4433594,6.88867188 C39.5332031,6.94335938 39.6494141,6.99804688 39.7919922,7.05273438 C39.9345703,7.10742187 40.0683594,7.15332031 40.1933594,7.19042969 C40.3183594,7.22753906 40.4765625,7.27148438 40.6679688,7.32226563 L41.6347656,7.56835938 C41.9394531,7.64648437 42.1621094,7.74902344 42.3027344,7.87597656 C42.4433594,8.00292969 42.5136719,8.1640625 42.5136719,8.359375 C42.5136719,8.5625 42.4492188,8.73339844 42.3203125,8.87207031 C42.1914062,9.01074219 42.0292969,9.10839844 41.8339844,9.16503906 C41.6386719,9.22167969 41.4179688,9.25 41.171875,9.25 C40.9453125,9.25 40.7207031,9.22167969 40.4980469,9.16503906 C40.2753906,9.10839844 40.0976562,9.04882812 39.9648438,8.98632812 C39.8320312,8.92382813 39.6875,8.84570312 39.53125,8.75195313 C39.4335938,8.69726562 39.3378906,8.66992188 39.2441406,8.66992188 C39.2207031,8.66992188 39.1972656,8.671875 39.1738281,8.67578125 C39.1503906,8.6796875 39.1279297,8.68554688 39.1064453,8.69335938 C39.0849609,8.70117188 39.0644531,8.70996094 39.0449219,8.71972656 C39.0253906,8.72949219 39.0068359,8.74121094 38.9892578,8.75488281 C38.9716797,8.76855469 38.9550781,8.78320312 38.9394531,8.79882813 C38.9238281,8.81445312 38.9101562,8.83105469 38.8984375,8.84863281 C38.8867188,8.86621094 38.8759766,8.88476563 38.8662109,8.90429688 C38.8564453,8.92382813 38.8486328,8.94335938 38.8427734,8.96289062 C38.8369141,8.98242188 38.8320312,9.00292969 38.828125,9.02441406 C38.8242188,9.04589844 38.8222656,9.06835938 38.8222656,9.09179688 C38.8222656,9.23242188 38.8867188,9.34960938 39.015625,9.44335938 Z" id="Path"></path>
+                    <path d="M45.5664062,10 C45.7421875,10 45.8955078,9.94238281 46.0263672,9.82714844 C46.1572266,9.71191406 46.2226562,9.55664063 46.2226562,9.36132812 L46.2226562,4.78515625 C46.2226562,4.5859375 46.1591797,4.4296875 46.0322266,4.31640625 C45.9052734,4.203125 45.7558594,4.14648438 45.5839844,4.14648438 C45.4082031,4.14648438 45.2558594,4.203125 45.1269531,4.31640625 C44.9980469,4.4296875 44.9335938,4.5859375 44.9335938,4.78515625 L44.9335938,9.36132812 C44.9335938,9.56445312 44.9951172,9.72167969 45.1181641,9.83300781 C45.2412109,9.94433594 45.390625,10 45.5664062,10 Z M45.5722656,2.921875 C45.8066406,2.921875 45.9951172,2.85449219 46.1376953,2.71972656 C46.2802734,2.58496094 46.3515625,2.40625 46.3515625,2.18359375 C46.3515625,1.96484375 46.28125,1.78710937 46.140625,1.65039063 C46,1.51367188 45.8125,1.4453125 45.578125,1.4453125 C45.34375,1.4453125 45.1552734,1.51367188 45.0126953,1.65039063 C44.8701172,1.78710937 44.7988281,1.96484375 44.7988281,2.18359375 C44.7988281,2.40625 44.8701172,2.58496094 45.0126953,2.71972656 C45.1552734,2.85449219 45.3417969,2.921875 45.5722656,2.921875 Z" id="Shape"></path>
+                    <path d="M48.4375,10 C48.6132812,10 48.7666016,9.94628906 48.8974609,9.83886719 C49.0283203,9.73144531 49.09375,9.58007812 49.09375,9.38476563 L49.09375,6.68359375 C49.09375,6.14453125 49.2324219,5.71777344 49.5097656,5.40332031 C49.7871094,5.08886719 50.1582031,4.93164063 50.6230469,4.93164063 C50.9980469,4.93164063 51.2890625,5.046875 51.4960938,5.27734375 C51.703125,5.5078125 51.8066406,5.85546875 51.8066406,6.3203125 L51.8066406,9.37890625 C51.8066406,9.57421875 51.8691406,9.7265625 51.9941406,9.8359375 C52.1191406,9.9453125 52.2695312,10 52.4453125,10 C52.6210938,10 52.7724609,9.9453125 52.8994141,9.8359375 C53.0263672,9.7265625 53.0898438,9.57421875 53.0898438,9.37890625 L53.0898438,6.80078125 C53.0898438,6.21484375 53.2333984,5.75683594 53.5205078,5.42675781 C53.8076172,5.09667969 54.1777344,4.93164063 54.6308594,4.93164063 C55.0097656,4.93164063 55.3017578,5.04394531 55.5068359,5.26855469 C55.7119141,5.49316406 55.8144531,5.83007812 55.8144531,6.27929687 L55.8144531,9.38476563 C55.8144531,9.58007812 55.8769531,9.73144531 56.0019531,9.83886719 C56.1269531,9.94628906 56.2773438,10 56.453125,10 C56.6289062,10 56.78125,9.94628906 56.9101562,9.83886719 C57.0390625,9.73144531 57.1035156,9.58007812 57.1035156,9.38476563 L57.1035156,6.30273437 C57.1035156,5.55273438 56.9042969,4.9921875 56.5058594,4.62109375 C56.1074219,4.25 55.5859375,4.06445312 54.9414062,4.06445312 C54.4570313,4.06445312 54.0380859,4.17382812 53.6845703,4.39257812 C53.3310547,4.61132812 53.0664063,4.93945312 52.890625,5.37695313 C52.7695313,4.98632812 52.5400391,4.66992188 52.2021484,4.42773438 C51.8642578,4.18554688 51.4492188,4.06445312 50.9570312,4.06445312 C50.0898438,4.06445312 49.46875,4.4140625 49.09375,5.11328125 L49.09375,4.70898438 C49.09375,4.53320312 49.0292969,4.39550781 48.9003906,4.29589844 C48.7714844,4.19628906 48.6210938,4.14648438 48.4492188,4.14648438 C48.2734375,4.14648438 48.1210938,4.20019531 47.9921875,4.30761719 C47.8632812,4.41503906 47.7988281,4.5625 47.7988281,4.75 L47.7988281,9.38476563 C47.7988281,9.58007812 47.8623047,9.73144531 47.9892578,9.83886719 C48.1162109,9.94628906 48.265625,10 48.4375,10 Z" id="Path"></path>
+                    <path d="M59.359375,10 C59.5351562,10 59.6884766,9.94238281 59.8193359,9.82714844 C59.9501953,9.71191406 60.015625,9.55664063 60.015625,9.36132812 L60.015625,4.78515625 C60.015625,4.5859375 59.9521484,4.4296875 59.8251953,4.31640625 C59.6982422,4.203125 59.5488281,4.14648438 59.3769531,4.14648438 C59.2011719,4.14648438 59.0488281,4.203125 58.9199219,4.31640625 C58.7910156,4.4296875 58.7265625,4.5859375 58.7265625,4.78515625 L58.7265625,9.36132812 C58.7265625,9.56445312 58.7880859,9.72167969 58.9111328,9.83300781 C59.0341797,9.94433594 59.1835938,10 59.359375,10 Z M59.3652344,2.921875 C59.5996094,2.921875 59.7880859,2.85449219 59.9306641,2.71972656 C60.0732422,2.58496094 60.1445312,2.40625 60.1445312,2.18359375 C60.1445312,1.96484375 60.0742188,1.78710937 59.9335938,1.65039063 C59.7929688,1.51367188 59.6054688,1.4453125 59.3710938,1.4453125 C59.1367188,1.4453125 58.9482422,1.51367188 58.8056641,1.65039063 C58.6630859,1.78710937 58.5917969,1.96484375 58.5917969,2.18359375 C58.5917969,2.40625 58.6630859,2.58496094 58.8056641,2.71972656 C58.9482422,2.85449219 59.1347656,2.921875 59.3652344,2.921875 Z" id="Shape"></path>
+                    <path d="M62.2597656,10 C62.4355469,10 62.5888672,9.94238281 62.7197266,9.82714844 C62.8505859,9.71191406 62.9160156,9.55664063 62.9160156,9.36132812 L62.9160156,2.23046875 C62.9160156,2.03515625 62.8515625,1.87988281 62.7226562,1.76464844 C62.59375,1.64941406 62.4433594,1.59179688 62.2714844,1.59179688 C62.0957031,1.59179688 61.9443359,1.6484375 61.8173828,1.76171875 C61.6904297,1.875 61.6269531,2.03125 61.6269531,2.23046875 L61.6269531,9.36132812 C61.6269531,9.56445313 61.6884766,9.72167969 61.8115234,9.83300781 C61.9345703,9.94433594 62.0839844,10 62.2597656,10 Z" id="Path"></path>
+                    <path d="M66.2148438,10.0820312 C67.0078125,10.0820312 67.5742188,9.74609375 67.9140625,9.07421875 L67.9140625,9.390625 C67.9140625,9.65625 68.0292969,9.83984375 68.2597656,9.94140625 C68.3496094,9.98046875 68.4433594,10 68.5410156,10 C68.7128906,10 68.8613281,9.94628906 68.9863281,9.83886719 C69.1113281,9.73144531 69.1738281,9.58007812 69.1738281,9.38476562 L69.1738281,6.17382812 C69.1738281,5.45117188 68.9628906,4.91894531 68.5410156,4.57714844 C68.1191406,4.23535156 67.546875,4.06445312 66.8242188,4.06445312 C66.0234375,4.06445312 65.3066406,4.22265625 64.6738281,4.5390625 C64.5292969,4.609375 64.4570312,4.72070313 64.4570312,4.87304687 C64.4570312,4.99804687 64.5009766,5.11132813 64.5888672,5.21289062 C64.6767578,5.31445312 64.7832031,5.36523438 64.9082031,5.36523438 C64.9238281,5.36523438 64.9394531,5.36425781 64.9550781,5.36230469 C64.9707031,5.36035156 64.9873047,5.35742187 65.0048828,5.35351562 C65.0224609,5.34960938 65.0390625,5.34375 65.0546875,5.3359375 C65.2734375,5.25 65.4345703,5.18847656 65.5380859,5.15136719 C65.6416016,5.11425781 65.8183594,5.06738281 66.0683594,5.01074219 C66.3183594,4.95410156 66.5527344,4.92578125 66.7714844,4.92578125 C67.5136719,4.92578125 67.8847656,5.30859375 67.8847656,6.07421875 L67.8847656,6.58984375 C67.5253906,6.58984375 67.2207031,6.59277344 66.9707031,6.59863281 C66.7207031,6.60449219 66.4648438,6.61816406 66.203125,6.63964844 C65.9414062,6.66113281 65.7236328,6.69042969 65.5498047,6.72753906 C65.3759766,6.76464844 65.2050781,6.81347656 65.0371094,6.87402344 C64.8691406,6.93457031 64.7353516,7.00878906 64.6357422,7.09667969 C64.5361328,7.18457031 64.4462891,7.29101563 64.3662109,7.41601563 C64.2861328,7.54101563 64.2294922,7.68359375 64.1962891,7.84375 C64.1630859,8.00390625 64.1464844,8.1875 64.1464844,8.39453125 C64.1464844,8.93359375 64.3398438,9.34960938 64.7265625,9.64257813 C65.1132812,9.93554688 65.609375,10.0820312 66.2148438,10.0820312 Z M66.3261719,9.24414062 C66.0371094,9.24414062 65.8046875,9.17089844 65.6289062,9.02441406 C65.453125,8.87792969 65.3652344,8.6484375 65.3652344,8.3359375 C65.3652344,8.22265625 65.3740234,8.12207031 65.3916016,8.03417969 C65.4091797,7.94628906 65.4394531,7.8671875 65.4824219,7.796875 C65.5253906,7.7265625 65.5771484,7.66699219 65.6376953,7.61816406 C65.6982422,7.56933594 65.7802734,7.52636719 65.8837891,7.48925781 C65.9873047,7.45214844 66.0976562,7.42285156 66.2148438,7.40136719 C66.3320312,7.37988281 66.4794922,7.36230469 66.6572266,7.34863281 C66.8349609,7.33496094 67.0166016,7.32617188 67.2021484,7.32226562 C67.3876953,7.31835938 67.6132812,7.31640625 67.8789062,7.31640625 L67.8789062,7.46875 C67.8789062,7.984375 67.7226562,8.40917969 67.4101562,8.74316406 C67.0976562,9.07714844 66.7363281,9.24414062 66.3261719,9.24414062 Z" id="Shape"></path>
+                    <path d="M71.4296875,10 C71.6054688,10 71.7587891,9.94628906 71.8896484,9.83886719 C72.0205078,9.73144531 72.0859375,9.58203125 72.0859375,9.390625 L72.0859375,6.66015625 C72.0859375,6.37890625 72.1289062,6.140625 72.2148438,5.9453125 C72.3007812,5.75 72.4238281,5.60058594 72.5839844,5.49707031 C72.7441406,5.39355469 72.9189453,5.3203125 73.1083984,5.27734375 C73.2978516,5.234375 73.515625,5.21289062 73.7617188,5.21289062 C73.890625,5.21289062 73.9921875,5.16210937 74.0664062,5.06054688 C74.140625,4.95898438 74.1777344,4.83789062 74.1777344,4.69726562 C74.1777344,4.62695312 74.1679688,4.55859375 74.1484375,4.4921875 C74.1289062,4.42578125 74.0996094,4.36621094 74.0605469,4.31347656 C74.0214844,4.26074219 73.9707031,4.21875 73.9082031,4.1875 C73.8457031,4.15625 73.7753906,4.140625 73.6972656,4.140625 C73.3183594,4.140625 72.9775391,4.26367188 72.6748047,4.50976562 C72.3720703,4.75585938 72.1699219,5.06054688 72.0683594,5.42382812 L72.0859375,4.69726562 C72.0898438,4.52539062 72.0292969,4.390625 71.9042969,4.29296875 C71.7792969,4.1953125 71.6289062,4.14648438 71.453125,4.14648438 C71.3398438,4.14648438 71.234375,4.16699219 71.1367188,4.20800781 C71.0390625,4.24902344 70.9580078,4.31542969 70.8935547,4.40722656 C70.8291016,4.49902344 70.796875,4.609375 70.796875,4.73828125 L70.796875,9.390625 C70.796875,9.5859375 70.859375,9.73632812 70.984375,9.84179688 C71.109375,9.94726562 71.2578125,10 71.4296875,10 Z" id="Path"></path>
+                    <path d="M75.859375,10 C76.0351562,10 76.1884766,9.94238281 76.3193359,9.82714844 C76.4501953,9.71191406 76.515625,9.55664063 76.515625,9.36132812 L76.515625,4.78515625 C76.515625,4.5859375 76.4521484,4.4296875 76.3251953,4.31640625 C76.1982422,4.203125 76.0488281,4.14648438 75.8769531,4.14648438 C75.7011719,4.14648438 75.5488281,4.203125 75.4199219,4.31640625 C75.2910156,4.4296875 75.2265625,4.5859375 75.2265625,4.78515625 L75.2265625,9.36132812 C75.2265625,9.56445312 75.2880859,9.72167969 75.4111328,9.83300781 C75.5341797,9.94433594 75.6835938,10 75.859375,10 Z M75.8652344,2.921875 C76.0996094,2.921875 76.2880859,2.85449219 76.4306641,2.71972656 C76.5732422,2.58496094 76.6445312,2.40625 76.6445312,2.18359375 C76.6445312,1.96484375 76.5742188,1.78710937 76.4335938,1.65039063 C76.2929688,1.51367188 76.1054688,1.4453125 75.8710938,1.4453125 C75.6367188,1.4453125 75.4482422,1.51367188 75.3056641,1.65039063 C75.1630859,1.78710937 75.0917969,1.96484375 75.0917969,2.18359375 C75.0917969,2.40625 75.1630859,2.58496094 75.3056641,2.71972656 C75.4482422,2.85449219 75.6347656,2.921875 75.8652344,2.921875 Z" id="Shape"></path>
+                    <path d="M80.1484375,10.0585938 C80.1679688,10.0585938 80.1923828,10.0585938 80.2216797,10.0585938 C80.2509766,10.0585938 80.2792969,10.0585938 80.3066406,10.0585938 C80.3339844,10.0585938 80.3574219,10.0585938 80.3769531,10.0585938 C80.4628906,10.0585938 80.5351562,10.0576172 80.59375,10.0556641 C80.6523438,10.0537109 80.71875,10.0507812 80.7929688,10.046875 C80.8671875,10.0429687 80.9316406,10.0351563 80.9863281,10.0234375 C81.0410156,10.0117188 81.0966797,9.99804687 81.1533203,9.98242188 C81.2099609,9.96679688 81.2578125,9.9453125 81.296875,9.91796875 C81.3359375,9.890625 81.3710938,9.859375 81.4023438,9.82421875 C81.4335938,9.7890625 81.4570312,9.74707031 81.4726562,9.69824219 C81.4882812,9.64941406 81.4960938,9.59375 81.4960938,9.53125 C81.4960938,9.4453125 81.4775391,9.36523438 81.4404297,9.29101562 C81.4033203,9.21679687 81.3466797,9.15527344 81.2705078,9.10644531 C81.1943359,9.05761719 81.1035156,9.03320312 80.9980469,9.03320312 C80.9824219,9.03320312 80.96875,9.03320312 80.9570312,9.03320312 L80.5292969,9.0625 L80.4765625,9.0625 C80.3164062,9.0625 80.1875,9.01855469 80.0898438,8.93066406 C79.9921875,8.84277344 79.9238281,8.72167969 79.8847656,8.56738281 C79.8457031,8.41308594 79.8261719,8.21875 79.8261719,7.984375 L79.8261719,5.03710937 L80.8515625,5.03710937 C81.0039062,5.03710937 81.1171875,4.99902344 81.1914062,4.92285156 C81.265625,4.84667969 81.3027344,4.74804688 81.3027344,4.62695313 C81.3027344,4.50585937 81.2646484,4.40527344 81.1884766,4.32519531 C81.1123047,4.24511719 81,4.20507812 80.8515625,4.20507812 L79.8261719,4.20507812 L79.8261719,2.54101562 C79.8261719,2.37695312 79.7822266,2.25488281 79.6943359,2.17480469 C79.6064453,2.09472656 79.4921875,2.0546875 79.3515625,2.0546875 C79.1796875,2.0546875 79.0361328,2.10644531 78.9208984,2.20996094 C78.8056641,2.31347656 78.7382812,2.44921875 78.71875,2.6171875 L78.5429688,4.20507812 L77.8808594,4.20507812 C77.7324219,4.20507812 77.6191406,4.24414062 77.5410156,4.32226562 C77.4628906,4.40039062 77.4238281,4.49804688 77.4238281,4.61523438 C77.4238281,4.73242188 77.4648438,4.83203125 77.546875,4.9140625 C77.6289062,4.99609375 77.7441406,5.03710937 77.8925781,5.03710937 L78.5429688,5.03710937 L78.5429688,8.2890625 C78.5429688,8.84765625 78.6865234,9.28222656 78.9736328,9.59277344 C79.2607422,9.90332031 79.6523438,10.0585938 80.1484375,10.0585938 Z" id="Path"></path>
+                    <path d="M84.2792969,12.0800781 L87.7773438,4.97265625 C87.8203125,4.88671875 87.8417969,4.79882812 87.8417969,4.70898438 C87.8417969,4.54882812 87.7783203,4.41210938 87.6513672,4.29882812 C87.5244141,4.18554688 87.3808594,4.12890625 87.2207031,4.12890625 C86.9746094,4.12890625 86.7871094,4.25585938 86.6582031,4.50976562 L84.9882812,7.9140625 L83.3535156,4.58007812 C83.2910156,4.45117188 83.2099609,4.35644531 83.1103516,4.29589844 C83.0107422,4.23535156 82.9042969,4.20507812 82.7910156,4.20507812 C82.6855469,4.20507812 82.5839844,4.22949219 82.4863281,4.27832031 C82.3886719,4.32714844 82.3095703,4.39648438 82.2490234,4.48632812 C82.1884766,4.57617188 82.1582031,4.67578125 82.1582031,4.78515625 C82.1582031,4.875 82.1816406,4.96679688 82.2285156,5.06054687 L84.3496094,9.21484375 L83.1894531,11.5703125 C83.1699219,11.6132812 83.1542969,11.65625 83.1425781,11.6992188 C83.1308594,11.7421875 83.125,11.7851563 83.125,11.828125 C83.125,11.9921875 83.1904297,12.1328125 83.3212891,12.25 C83.4521484,12.3671875 83.5996094,12.4257812 83.7636719,12.4257812 C83.9941406,12.4257812 84.1660156,12.3105469 84.2792969,12.0800781 Z" id="Path"></path>
+                </g>
+                <g id="node-/-01_top" transform="translate(122, 16)" xlink:href="#path-13">
+                    <path stroke="#8E8E8E" d="M39.605,0.5 C45.0599165,0.5 49.482,4.92208353 49.482,10.377 C49.482,15.8319165 45.0599165,20.254 39.605,20.254 L10.377,20.254 C4.92208353,20.254 0.5,15.8319165 0.5,10.377 C0.5,5.29810944 4.35169591,1.06030081 9.36652997,0.551016719 L10.4022157,0.499363801 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="Score" transform="translate(8.8564, 4.625)" fill="#8E8E8E">
+                        <path d="M4.26074219,10.1289062 C4.92089844,10.1289062 5.49707031,10.03125 5.98925781,9.8359375 C6.48144531,9.640625 6.86230469,9.35449219 7.13183594,8.97753906 C7.40136719,8.60058594 7.53613281,8.15429688 7.53613281,7.63867188 C7.53613281,7.37695312 7.50292969,7.13867188 7.43652344,6.92382813 C7.37011719,6.70898438 7.27832031,6.52050781 7.16113281,6.35839844 C7.04394531,6.19628906 6.89257813,6.04882813 6.70703125,5.91601563 C6.52148438,5.78320312 6.3203125,5.66699219 6.10351562,5.56738281 C5.88671875,5.46777344 5.63378906,5.37109375 5.34472656,5.27734375 L3.49316406,4.68554688 C3.18847656,4.58398437 2.96679687,4.45214844 2.828125,4.29003906 C2.68945313,4.12792969 2.62011719,3.91601562 2.62011719,3.65429687 C2.62011719,3.38476562 2.70605469,3.1640625 2.87792969,2.9921875 C3.04980469,2.8203125 3.265625,2.70117187 3.52539063,2.63476562 C3.78515625,2.56835938 4.09277344,2.53515625 4.44824219,2.53515625 C5.15917969,2.53515625 5.84277344,2.72460937 6.49902344,3.10351563 C6.59667969,3.16210938 6.69238281,3.19140625 6.78613281,3.19140625 C6.93457031,3.19140625 7.06152344,3.12890625 7.16699219,3.00390625 C7.27246094,2.87890625 7.32519531,2.74023437 7.32519531,2.58789063 C7.32519531,2.51757813 7.3125,2.45214844 7.28710938,2.39160156 C7.26171875,2.33105469 7.22167969,2.27929688 7.16699219,2.23632813 C6.88574219,1.99804688 6.48632812,1.79492187 5.96875,1.62695312 C5.45117188,1.45898438 4.90917969,1.375 4.34277344,1.375 C3.43261719,1.375 2.68457031,1.58398438 2.09863281,2.00195312 C1.51269531,2.41992187 1.21972656,2.98828125 1.21972656,3.70703125 C1.21972656,4.28125 1.36816406,4.73535156 1.66503906,5.06933594 C1.96191406,5.40332031 2.43066406,5.67773438 3.07128906,5.89257813 L4.93457031,6.51367188 C5.35644531,6.65429688 5.65820312,6.81445313 5.83984375,6.99414062 C6.02148438,7.17382813 6.11230469,7.43359375 6.11230469,7.7734375 C6.11230469,8.16796875 5.94335938,8.46582031 5.60546875,8.66699219 C5.26757813,8.86816406 4.82714844,8.96875 4.28417969,8.96875 C3.42871094,8.96875 2.64160156,8.72070313 1.92285156,8.22460938 C1.89941406,8.20898438 1.87402344,8.19726563 1.84667969,8.18945313 C1.81933594,8.18164063 1.79199219,8.17773438 1.76464844,8.17773438 C1.69433594,8.17773438 1.62109375,8.203125 1.54492188,8.25390625 C1.46875,8.3046875 1.40136719,8.36816406 1.34277344,8.44433594 C1.28417969,8.52050781 1.23632812,8.60351562 1.19921875,8.69335937 C1.16210938,8.78320313 1.14355469,8.8671875 1.14355469,8.9453125 C1.14355469,9.0390625 1.17285156,9.109375 1.23144531,9.15625 C1.98925781,9.8046875 2.99902344,10.1289062 4.26074219,10.1289062 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M11.4677734,10.0820312 C12.2607422,10.0820312 12.8583984,9.90820312 13.2607422,9.56054688 C13.4013672,9.43945312 13.4716797,9.30664063 13.4716797,9.16210938 C13.4716797,9.12695313 13.4677734,9.09277344 13.4599609,9.05957031 C13.4521484,9.02636719 13.4394531,8.99511719 13.421875,8.96582031 C13.4042969,8.93652344 13.3837891,8.91015625 13.3603516,8.88671875 C13.3369141,8.86328125 13.3115234,8.84375 13.2841797,8.828125 C13.2568359,8.8125 13.2265625,8.79980469 13.1933594,8.79003906 C13.1601563,8.78027344 13.1259766,8.77539062 13.0908203,8.77539062 C13.0439453,8.77539062 12.9960938,8.78320312 12.9472656,8.79882812 C12.8984375,8.81445312 12.8505859,8.83984375 12.8037109,8.875 C12.4951172,9.09765625 12.0830078,9.20898438 11.5673828,9.20898438 C11.0400391,9.20898438 10.6269531,9.01171875 10.328125,8.6171875 C10.0292969,8.22265625 9.87988281,7.70507813 9.87988281,7.06445312 C9.87988281,6.42773437 10.0341797,5.91210938 10.3427734,5.51757812 C10.6513672,5.12304687 11.0712891,4.92578125 11.6025391,4.92578125 C11.9814453,4.92578125 12.3427734,5.01953125 12.6865234,5.20703125 C12.7294922,5.23046875 12.7734375,5.24804687 12.8183594,5.25976562 C12.8632813,5.27148438 12.9072266,5.27734375 12.9501953,5.27734375 C12.9970703,5.27734375 13.0419922,5.27148438 13.0849609,5.25976562 C13.1279297,5.24804687 13.1660156,5.23046875 13.1992188,5.20703125 C13.2324219,5.18359375 13.2617188,5.15625 13.2871094,5.125 C13.3125,5.09375 13.3320313,5.05957031 13.3457031,5.02246094 C13.359375,4.98535156 13.3662109,4.9453125 13.3662109,4.90234375 C13.3662109,4.859375 13.3574219,4.81640625 13.3398438,4.7734375 C13.3222656,4.73046875 13.2949219,4.68652344 13.2578125,4.64160156 C13.2207031,4.59667969 13.1748047,4.5546875 13.1201172,4.515625 C12.6904297,4.21484375 12.1494141,4.06445312 11.4970703,4.06445312 C11.0673828,4.06445312 10.6699219,4.14453125 10.3046875,4.3046875 C9.93945312,4.46484375 9.6328125,4.68164063 9.38476562,4.95507813 C9.13671875,5.22851562 8.94335938,5.54785156 8.8046875,5.91308594 C8.66601562,6.27832031 8.59667969,6.6640625 8.59667969,7.0703125 C8.59667969,7.625 8.7109375,8.12890625 8.93945312,8.58203125 C9.16796875,9.03515625 9.50292969,9.3984375 9.94433594,9.671875 C10.3857422,9.9453125 10.8935547,10.0820312 11.4677734,10.0820312 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M17.2509766,9.20898438 C16.7080078,9.20898438 16.2832031,9.02148438 15.9765625,8.64648438 C15.6699219,8.27148438 15.5166016,7.75 15.5166016,7.08203125 C15.5166016,6.40625 15.6699219,5.87890625 15.9765625,5.5 C16.2832031,5.12109375 16.7080078,4.93164062 17.2509766,4.93164062 C17.7939453,4.93164062 18.21875,5.12207031 18.5253906,5.50292969 C18.8320312,5.88378906 18.9853516,6.41015625 18.9853516,7.08203125 C18.9853516,7.75 18.8330078,8.27148438 18.5283203,8.64648438 C18.2236328,9.02148438 17.7978516,9.20898438 17.2509766,9.20898438 Z M17.2509766,10.0820312 C17.6494141,10.0820312 18.0146484,10.0263672 18.3466797,9.91503906 C18.6787109,9.80371094 18.9609375,9.65332031 19.1933594,9.46386719 C19.4257812,9.27441406 19.6230469,9.05078125 19.7851562,8.79296875 C19.9472656,8.53515625 20.0664062,8.26269531 20.1425781,7.97558594 C20.21875,7.68847656 20.2568359,7.390625 20.2568359,7.08203125 C20.2568359,6.67578125 20.1923828,6.29296875 20.0634766,5.93359375 C19.9345703,5.57421875 19.7480469,5.25390625 19.5039062,4.97265625 C19.2597656,4.69140625 18.9443359,4.46972656 18.5576172,4.30761719 C18.1708984,4.14550781 17.7353516,4.06445312 17.2509766,4.06445312 C16.7626953,4.06445312 16.3232422,4.14746094 15.9326172,4.31347656 C15.5419922,4.47949219 15.2265625,4.70410156 14.9863281,4.98730469 C14.7460938,5.27050781 14.5625,5.59082031 14.4355469,5.94824219 C14.3085938,6.30566406 14.2451172,6.68359375 14.2451172,7.08203125 C14.2451172,7.47265625 14.3076172,7.84472656 14.4326172,8.19824219 C14.5576172,8.55175781 14.7402344,8.87011719 14.9804688,9.15332031 C15.2207031,9.43652344 15.5361328,9.66210938 15.9267578,9.83007812 C16.3173828,9.99804687 16.7587891,10.0820312 17.2509766,10.0820312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M22.1787109,10 C22.3544922,10 22.5078125,9.94628906 22.6386719,9.83886719 C22.7695312,9.73144531 22.8349609,9.58203125 22.8349609,9.390625 L22.8349609,6.66015625 C22.8349609,6.37890625 22.8779297,6.140625 22.9638672,5.9453125 C23.0498047,5.75 23.1728516,5.60058594 23.3330078,5.49707031 C23.4931641,5.39355469 23.6679688,5.3203125 23.8574219,5.27734375 C24.046875,5.234375 24.2646484,5.21289062 24.5107422,5.21289062 C24.6396484,5.21289062 24.7412109,5.16210937 24.8154297,5.06054688 C24.8896484,4.95898438 24.9267578,4.83789062 24.9267578,4.69726562 C24.9267578,4.62695312 24.9169922,4.55859375 24.8974609,4.4921875 C24.8779297,4.42578125 24.8486328,4.36621094 24.8095703,4.31347656 C24.7705078,4.26074219 24.7197266,4.21875 24.6572266,4.1875 C24.5947266,4.15625 24.5244141,4.140625 24.4462891,4.140625 C24.0673828,4.140625 23.7265625,4.26367188 23.4238281,4.50976562 C23.1210938,4.75585938 22.9189453,5.06054688 22.8173828,5.42382812 L22.8349609,4.69726562 C22.8388672,4.52539062 22.7783203,4.390625 22.6533203,4.29296875 C22.5283203,4.1953125 22.3779297,4.14648438 22.2021484,4.14648438 C22.0888672,4.14648438 21.9833984,4.16699219 21.8857422,4.20800781 C21.7880859,4.24902344 21.7070312,4.31542969 21.6425781,4.40722656 C21.578125,4.49902344 21.5458984,4.609375 21.5458984,4.73828125 L21.5458984,9.390625 C21.5458984,9.5859375 21.6083984,9.73632812 21.7333984,9.84179688 C21.8583984,9.94726562 22.0068359,10 22.1787109,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M28.5830078,10.0820312 C29.4345703,10.0820312 30.1533203,9.82421875 30.7392578,9.30859375 C30.8681641,9.203125 30.9326172,9.07617188 30.9326172,8.92773437 C30.9326172,8.81054688 30.8925781,8.70703125 30.8125,8.6171875 C30.7324219,8.52734375 30.6376953,8.48242188 30.5283203,8.48242188 C30.4541016,8.48242188 30.3798828,8.5078125 30.3056641,8.55859375 C29.7080078,8.98046875 29.1650391,9.19140625 28.6767578,9.19140625 C28.1494141,9.1875 27.7285156,9.03808594 27.4140625,8.74316406 C27.0996094,8.44824219 26.9287109,7.99023438 26.9013672,7.36914062 L30.7451172,7.36914062 C30.8779297,7.36914062 30.9746094,7.32324219 31.0351562,7.23144531 C31.0957031,7.13964844 31.1259766,7.0234375 31.1259766,6.8828125 C31.1181641,6.49609375 31.0595703,6.13769531 30.9501953,5.80761719 C30.8408203,5.47753906 30.6816406,5.18066406 30.4726562,4.91699219 C30.2636719,4.65332031 29.9863281,4.4453125 29.640625,4.29296875 C29.2949219,4.140625 28.8974609,4.06445312 28.4482422,4.06445312 C28.0068359,4.06445312 27.6044922,4.14453125 27.2412109,4.3046875 C26.8779297,4.46484375 26.5791016,4.68164063 26.3447266,4.95507813 C26.1103516,5.22851562 25.9296875,5.54199219 25.8027344,5.89550781 C25.6757812,6.24902344 25.6123047,6.62304687 25.6123047,7.01757812 C25.6123047,7.96289062 25.8847656,8.70800781 26.4296875,9.25292969 C26.9746094,9.79785156 27.6923828,10.0742188 28.5830078,10.0820312 Z M26.9130859,6.59570312 C26.9326172,6.38476562 26.9785156,6.18359375 27.0507812,5.9921875 C27.1230469,5.80078125 27.2216797,5.625 27.3466797,5.46484375 C27.4716797,5.3046875 27.6337891,5.17578125 27.8330078,5.078125 C28.0322266,4.98046875 28.2548828,4.93164062 28.5009766,4.93164062 C28.9775391,4.93164062 29.3408203,5.08984375 29.5908203,5.40625 C29.8408203,5.72265625 29.9833984,6.11914062 30.0185547,6.59570312 L26.9130859,6.59570312 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+        <g id="cross-encoder" stroke-width="1" transform="translate(506, 50)">
+            <g transform="translate(18, 340)" fill="#333333" fill-rule="nonzero">
+                <path d="M75.0634766,8.54785156 C75.0634766,7.85970052 75.230957,7.31054687 75.565918,6.90039063 C75.9008789,6.49023437 76.3645833,6.28515625 76.9570312,6.28515625 C77.3489583,6.28515625 77.75,6.38769531 78.1601562,6.59277344 C78.264974,6.65201823 78.3720703,6.68164063 78.4814453,6.68164063 C78.6363932,6.68164063 78.7674154,6.62695312 78.8745117,6.51757813 C78.9816081,6.40820312 79.0351562,6.27832031 79.0351562,6.12792969 C79.0351562,5.91829427 78.9326172,5.7405599 78.7275391,5.59472656 C78.226237,5.23014323 77.5768229,5.04785156 76.7792969,5.04785156 C76.109375,5.04785156 75.5100911,5.20621745 74.9814453,5.52294922 C74.4527995,5.83968099 74.0460612,6.26464844 73.7612305,6.79785156 C73.4763997,7.33105469 73.3339844,7.91894531 73.3339844,8.56152344 C73.3339844,9.59147135 73.6529948,10.4357096 74.2910156,11.0942383 C74.9290365,11.7527669 75.7516276,12.0820312 76.7587891,12.0820312 C77.2600911,12.0820312 77.6930339,12.0239258 78.0576172,11.9077148 C78.4222005,11.7915039 78.7001953,11.6468099 78.8916016,11.4736328 C79.0738932,11.3186849 79.1650391,11.1363932 79.1650391,10.9267578 C79.1650391,10.7718099 79.1149089,10.6385091 79.0146484,10.5268555 C78.914388,10.4152018 78.7913411,10.359375 78.6455078,10.359375 C78.5270182,10.359375 78.4108073,10.398112 78.296875,10.4755859 C77.9414062,10.7125651 77.4833984,10.8310547 76.9228516,10.8310547 C76.3440755,10.8310547 75.8894857,10.6225586 75.559082,10.2055664 C75.2286784,9.78857422 75.0634766,9.2360026 75.0634766,8.54785156 Z" id="Path"></path>
+                <path d="M81.2568359,12 C81.5120443,12 81.7205404,11.9202474 81.8823242,11.7607422 C82.0441081,11.601237 82.125,11.3733724 82.125,11.0771484 L82.125,8.19238281 C82.125,7.8688151 82.1774089,7.5953776 82.2822266,7.37207031 C82.3870443,7.14876302 82.5351562,6.98242187 82.7265625,6.87304687 C82.9179688,6.76367188 83.1173503,6.68733724 83.324707,6.64404297 C83.5320638,6.6007487 83.7701823,6.57910156 84.0390625,6.57910156 C84.2167969,6.57910156 84.3580729,6.5061849 84.4628906,6.36035156 C84.5677083,6.21451823 84.6201172,6.04589844 84.6201172,5.85449219 C84.6201172,5.65397135 84.563151,5.48307292 84.4492188,5.34179688 C84.3352865,5.20052083 84.1735026,5.12988281 83.9638672,5.12988281 C83.5354818,5.12988281 83.1492513,5.27685547 82.8051758,5.57080078 C82.4611003,5.86474609 82.2275391,6.21679688 82.1044922,6.62695312 L82.125,5.92285156 C82.1295573,5.6813151 82.0463867,5.49104818 81.8754883,5.35205078 C81.7045898,5.21305339 81.5029297,5.14355469 81.2705078,5.14355469 C81.0335286,5.14355469 80.8295898,5.21533203 80.6586914,5.35888672 C80.487793,5.50244141 80.4023438,5.70410156 80.4023438,5.96386719 L80.4023438,11.0634766 C80.4023438,11.3642578 80.4832357,11.5955404 80.6450195,11.7573242 C80.8068034,11.9191081 81.0107422,12 81.2568359,12 Z" id="Path"></path>
+                <path d="M88.9199219,10.9130859 C88.3320312,10.9130859 87.8740234,10.7080078 87.5458984,10.2978516 C87.2177734,9.88769531 87.0537109,9.31347656 87.0537109,8.57519531 C87.0537109,7.82779948 87.2166341,7.24788411 87.5424805,6.83544922 C87.8683268,6.42301432 88.327474,6.21679688 88.9199219,6.21679688 C89.5123698,6.21679688 89.9726562,6.42415365 90.3007812,6.83886719 C90.6289062,7.25358073 90.7929688,7.83235677 90.7929688,8.57519531 C90.7929688,9.31347656 90.6289062,9.88769531 90.3007812,10.2978516 C89.9726562,10.7080078 89.5123698,10.9130859 88.9199219,10.9130859 Z M88.9199219,12.0820312 C89.4029948,12.0820312 89.8450521,12.0159505 90.2460938,11.8837891 C90.6471354,11.7516276 90.9855143,11.5750326 91.2612305,11.3540039 C91.5369466,11.1329753 91.7693685,10.870931 91.9584961,10.5678711 C92.1476237,10.2648112 92.2866211,9.9469401 92.3754883,9.61425781 C92.4643555,9.28157552 92.5087891,8.93522135 92.5087891,8.57519531 C92.5087891,8.1969401 92.4620768,7.83463542 92.3686523,7.48828125 C92.2752279,7.14192708 92.1305339,6.8194987 91.9345703,6.52099609 C91.7386068,6.22249349 91.5016276,5.96500651 91.2236328,5.74853516 C90.945638,5.5320638 90.6095378,5.36116536 90.215332,5.23583984 C89.8211263,5.11051432 89.3893229,5.04785156 88.9199219,5.04785156 C88.4414062,5.04785156 88.0027669,5.11279297 87.6040039,5.24267578 C87.2052409,5.37255859 86.8680013,5.54915365 86.5922852,5.77246094 C86.316569,5.99576823 86.0830078,6.2578125 85.8916016,6.55859375 C85.7001953,6.859375 85.5600586,7.17952474 85.4711914,7.51904297 C85.3823242,7.8585612 85.3378906,8.21061198 85.3378906,8.57519531 C85.3378906,9.03548177 85.4108073,9.47070312 85.5566406,9.88085938 C85.702474,10.2910156 85.9155273,10.6624349 86.1958008,10.9951172 C86.4760742,11.3277995 86.8509115,11.5921224 87.3203125,11.7880859 C87.7897135,11.9840495 88.3229167,12.0820312 88.9199219,12.0820312 Z" id="Shape"></path>
+                <path d="M93.9716797,11.3095703 C94.6962891,11.8382161 95.5895182,12.1025391 96.6513672,12.1025391 C97.4716797,12.1025391 98.1381836,11.9179688 98.6508789,11.5488281 C99.1635742,11.1796875 99.4199219,10.6464844 99.4199219,9.94921875 C99.4199219,9.39322917 99.2501628,8.95800781 98.9106445,8.64355469 C98.5711263,8.32910156 98.0345052,8.08072917 97.3007812,7.8984375 L96.2480469,7.63183594 C95.8697917,7.5452474 95.613444,7.45068359 95.4790039,7.34814453 C95.3445638,7.24560547 95.2773438,7.08723958 95.2773438,6.87304687 C95.2773438,6.63151042 95.4003906,6.45263672 95.6464844,6.33642578 C95.8925781,6.22021484 96.1910807,6.16210938 96.5419922,6.16210938 C96.6468099,6.16210938 96.7504883,6.16666667 96.8530273,6.17578125 C96.9555664,6.18489583 97.0592448,6.19970703 97.1640625,6.22021484 C97.2688802,6.24072266 97.3588867,6.25895182 97.434082,6.27490234 C97.5092773,6.29085286 97.5992839,6.31591797 97.7041016,6.35009766 C97.8089193,6.38427734 97.8806966,6.4070638 97.9194336,6.41845703 C97.9581706,6.42985026 98.0253906,6.45377604 98.1210938,6.49023438 C98.2167969,6.52669271 98.2669271,6.54492187 98.2714844,6.54492187 C98.3717448,6.58138021 98.460612,6.59960937 98.5380859,6.59960937 C98.6930339,6.59960937 98.8172201,6.54833984 98.9106445,6.44580078 C99.004069,6.34326172 99.0507812,6.21907552 99.0507812,6.07324219 C99.0507812,5.82714844 98.9277344,5.64485677 98.6816406,5.52636719 C97.9752604,5.18457031 97.2278646,5.01367188 96.4394531,5.01367188 C95.6236979,5.01367188 94.9651693,5.18115234 94.4638672,5.51611328 C93.9625651,5.85107422 93.7119141,6.33072917 93.7119141,6.95507813 C93.7119141,7.21484375 93.7460938,7.44156901 93.8144531,7.63525391 C93.8828125,7.8289388 93.9716797,7.99072266 94.0810547,8.12060547 C94.1904297,8.25048828 94.3476562,8.36897786 94.5527344,8.47607422 C94.7578125,8.58317057 94.960612,8.67089844 95.1611328,8.73925781 C95.3616536,8.80761719 95.6236979,8.88509115 95.9472656,8.97167969 L97.0136719,9.24511719 C97.6471354,9.4000651 97.9638672,9.67122396 97.9638672,10.0585938 C97.9638672,10.2773438 97.8898112,10.4562174 97.7416992,10.5952148 C97.5935872,10.7342122 97.4158529,10.8299154 97.2084961,10.8823242 C97.0011393,10.9347331 96.7652995,10.9609375 96.5009766,10.9609375 C96.2320964,10.9609375 95.9632161,10.9233398 95.6943359,10.8481445 C95.4254557,10.7729492 95.2306315,10.7057292 95.1098633,10.6464844 C94.9890951,10.5872396 94.8284505,10.500651 94.6279297,10.3867188 C94.5048828,10.3183594 94.3863932,10.2841797 94.2724609,10.2841797 C94.1129557,10.2841797 93.9819336,10.3411458 93.8793945,10.4550781 C93.7768555,10.5690104 93.7255859,10.7011719 93.7255859,10.8515625 C93.7255859,11.0384115 93.8076172,11.1910807 93.9716797,11.3095703 Z" id="Path"></path>
+                <path d="M100.814453,11.3095703 C101.539062,11.8382161 102.432292,12.1025391 103.494141,12.1025391 C104.314453,12.1025391 104.980957,11.9179688 105.493652,11.5488281 C106.006348,11.1796875 106.262695,10.6464844 106.262695,9.94921875 C106.262695,9.39322917 106.092936,8.95800781 105.753418,8.64355469 C105.4139,8.32910156 104.877279,8.08072917 104.143555,7.8984375 L103.09082,7.63183594 C102.712565,7.5452474 102.456217,7.45068359 102.321777,7.34814453 C102.187337,7.24560547 102.120117,7.08723958 102.120117,6.87304687 C102.120117,6.63151042 102.243164,6.45263672 102.489258,6.33642578 C102.735352,6.22021484 103.033854,6.16210938 103.384766,6.16210938 C103.489583,6.16210938 103.593262,6.16666667 103.695801,6.17578125 C103.79834,6.18489583 103.902018,6.19970703 104.006836,6.22021484 C104.111654,6.24072266 104.20166,6.25895182 104.276855,6.27490234 C104.352051,6.29085286 104.442057,6.31591797 104.546875,6.35009766 C104.651693,6.38427734 104.72347,6.4070638 104.762207,6.41845703 C104.800944,6.42985026 104.868164,6.45377604 104.963867,6.49023438 C105.05957,6.52669271 105.109701,6.54492187 105.114258,6.54492187 C105.214518,6.58138021 105.303385,6.59960937 105.380859,6.59960937 C105.535807,6.59960937 105.659993,6.54833984 105.753418,6.44580078 C105.846842,6.34326172 105.893555,6.21907552 105.893555,6.07324219 C105.893555,5.82714844 105.770508,5.64485677 105.524414,5.52636719 C104.818034,5.18457031 104.070638,5.01367188 103.282227,5.01367188 C102.466471,5.01367188 101.807943,5.18115234 101.306641,5.51611328 C100.805339,5.85107422 100.554688,6.33072917 100.554688,6.95507813 C100.554688,7.21484375 100.588867,7.44156901 100.657227,7.63525391 C100.725586,7.8289388 100.814453,7.99072266 100.923828,8.12060547 C101.033203,8.25048828 101.19043,8.36897786 101.395508,8.47607422 C101.600586,8.58317057 101.803385,8.67089844 102.003906,8.73925781 C102.204427,8.80761719 102.466471,8.88509115 102.790039,8.97167969 L103.856445,9.24511719 C104.489909,9.4000651 104.806641,9.67122396 104.806641,10.0585938 C104.806641,10.2773438 104.732585,10.4562174 104.584473,10.5952148 C104.436361,10.7342122 104.258626,10.8299154 104.05127,10.8823242 C103.843913,10.9347331 103.608073,10.9609375 103.34375,10.9609375 C103.07487,10.9609375 102.80599,10.9233398 102.537109,10.8481445 C102.268229,10.7729492 102.073405,10.7057292 101.952637,10.6464844 C101.831868,10.5872396 101.671224,10.500651 101.470703,10.3867188 C101.347656,10.3183594 101.229167,10.2841797 101.115234,10.2841797 C100.955729,10.2841797 100.824707,10.3411458 100.722168,10.4550781 C100.619629,10.5690104 100.568359,10.7011719 100.568359,10.8515625 C100.568359,11.0384115 100.650391,11.1910807 100.814453,11.3095703 Z" id="Path"></path>
+                <path d="M107.670898,9.02636719 L110.412109,9.02636719 C110.61263,9.02636719 110.78125,8.9625651 110.917969,8.83496094 C111.054688,8.70735677 111.123047,8.54785156 111.123047,8.35644531 C111.123047,8.16048177 111.056966,7.99983724 110.924805,7.87451172 C110.792643,7.7491862 110.621745,7.68652344 110.412109,7.68652344 L107.705078,7.68652344 C107.495443,7.68652344 107.324544,7.7491862 107.192383,7.87451172 C107.060221,7.99983724 106.994141,8.16048177 106.994141,8.35644531 C106.994141,8.54785156 107.059082,8.70735677 107.188965,8.83496094 C107.318848,8.9625651 107.479492,9.02636719 107.670898,9.02636719 Z" id="Path"></path>
+                <path d="M111.922852,8.50683594 C111.922852,9.63248698 112.247559,10.5097656 112.896973,11.1386719 C113.546387,11.7675781 114.411133,12.0820312 115.491211,12.0820312 C116.452799,12.0820312 117.29362,11.7789714 118.013672,11.1728516 C118.159505,11.0452474 118.232422,10.8902995 118.232422,10.7080078 C118.232422,10.5485026 118.177734,10.4049479 118.068359,10.2773437 C117.958984,10.1497396 117.833659,10.0859375 117.692383,10.0859375 C117.59668,10.0859375 117.507812,10.1178385 117.425781,10.1816406 C116.783203,10.6419271 116.179362,10.8720703 115.614258,10.8720703 C114.370117,10.8720703 113.711589,10.2294922 113.638672,8.94433594 L117.958984,8.94433594 C118.123047,8.94433594 118.246094,8.88964844 118.328125,8.78027344 C118.410156,8.67089844 118.451172,8.52734375 118.451172,8.34960937 C118.446615,7.8984375 118.380534,7.47916667 118.25293,7.09179687 C118.125326,6.70442708 117.937337,6.35579427 117.688965,6.04589844 C117.440592,5.7360026 117.112467,5.49332682 116.70459,5.31787109 C116.296712,5.14241536 115.828451,5.0546875 115.299805,5.0546875 C114.611654,5.0546875 114.007812,5.21419271 113.488281,5.53320312 C112.96875,5.85221354 112.577962,6.27034505 112.315918,6.78759766 C112.053874,7.30485026 111.922852,7.87792969 111.922852,8.50683594 Z M113.652344,7.98046875 C113.693359,7.48372396 113.858561,7.05989583 114.147949,6.70898438 C114.437337,6.35807292 114.83724,6.18261719 115.347656,6.18261719 C115.871745,6.18261719 116.26709,6.35123698 116.533691,6.68847656 C116.800293,7.02571615 116.951823,7.45638021 116.988281,7.98046875 L113.652344,7.98046875 Z" id="Shape"></path>
+                <path d="M120.645508,12 C120.896159,12 121.102376,11.9168294 121.26416,11.7504883 C121.425944,11.5841471 121.506836,11.3528646 121.506836,11.0566406 L121.506836,8.16503906 C121.506836,7.58626302 121.673177,7.11686198 122.005859,6.75683594 C122.338542,6.3968099 122.732747,6.21679688 123.188477,6.21679688 C123.566732,6.21679688 123.877767,6.34098307 124.121582,6.58935547 C124.365397,6.83772786 124.487305,7.19661458 124.487305,7.66601562 L124.487305,11.0566406 C124.487305,11.3528646 124.567057,11.5841471 124.726562,11.7504883 C124.886068,11.9168294 125.086589,12 125.328125,12 C125.583333,12 125.792969,11.9168294 125.957031,11.7504883 C126.121094,11.5841471 126.203125,11.3528646 126.203125,11.0566406 L126.203125,7.67285156 C126.203125,7.23535156 126.137044,6.84570313 126.004883,6.50390625 C125.872721,6.16210938 125.692708,5.88753255 125.464844,5.68017578 C125.236979,5.47281901 124.978353,5.31559245 124.688965,5.20849609 C124.399577,5.10139974 124.09082,5.04785156 123.762695,5.04785156 C123.229492,5.04785156 122.770345,5.14811198 122.385254,5.34863281 C122.000163,5.54915365 121.707357,5.84309896 121.506836,6.23046875 L121.506836,5.99121094 C121.506836,5.72233073 121.427083,5.51383464 121.267578,5.36572266 C121.108073,5.21761068 120.905273,5.14355469 120.65918,5.14355469 C120.408529,5.14355469 120.201172,5.21875 120.037109,5.36914062 C119.873047,5.51953125 119.791016,5.73144531 119.791016,6.00488281 L119.791016,11.0566406 C119.791016,11.3528646 119.870768,11.5841471 120.030273,11.7504883 C120.189779,11.9168294 120.394857,12 120.645508,12 Z" id="Path"></path>
+                <path d="M129.327148,8.54785156 C129.327148,7.85970052 129.494629,7.31054687 129.82959,6.90039063 C130.164551,6.49023437 130.628255,6.28515625 131.220703,6.28515625 C131.61263,6.28515625 132.013672,6.38769531 132.423828,6.59277344 C132.528646,6.65201823 132.635742,6.68164063 132.745117,6.68164063 C132.900065,6.68164063 133.031087,6.62695312 133.138184,6.51757813 C133.24528,6.40820312 133.298828,6.27832031 133.298828,6.12792969 C133.298828,5.91829427 133.196289,5.7405599 132.991211,5.59472656 C132.489909,5.23014323 131.840495,5.04785156 131.042969,5.04785156 C130.373047,5.04785156 129.773763,5.20621745 129.245117,5.52294922 C128.716471,5.83968099 128.309733,6.26464844 128.024902,6.79785156 C127.740072,7.33105469 127.597656,7.91894531 127.597656,8.56152344 C127.597656,9.59147135 127.916667,10.4357096 128.554688,11.0942383 C129.192708,11.7527669 130.015299,12.0820312 131.022461,12.0820312 C131.523763,12.0820312 131.956706,12.0239258 132.321289,11.9077148 C132.685872,11.7915039 132.963867,11.6468099 133.155273,11.4736328 C133.337565,11.3186849 133.428711,11.1363932 133.428711,10.9267578 C133.428711,10.7718099 133.378581,10.6385091 133.27832,10.5268555 C133.17806,10.4152018 133.055013,10.359375 132.90918,10.359375 C132.79069,10.359375 132.674479,10.398112 132.560547,10.4755859 C132.205078,10.7125651 131.74707,10.8310547 131.186523,10.8310547 C130.607747,10.8310547 130.153158,10.6225586 129.822754,10.2055664 C129.49235,9.78857422 129.327148,9.2360026 129.327148,8.54785156 Z" id="Path"></path>
+                <path d="M137.885742,10.9130859 C137.297852,10.9130859 136.839844,10.7080078 136.511719,10.2978516 C136.183594,9.88769531 136.019531,9.31347656 136.019531,8.57519531 C136.019531,7.82779948 136.182454,7.24788411 136.508301,6.83544922 C136.834147,6.42301432 137.293294,6.21679688 137.885742,6.21679688 C138.47819,6.21679688 138.938477,6.42415365 139.266602,6.83886719 C139.594727,7.25358073 139.758789,7.83235677 139.758789,8.57519531 C139.758789,9.31347656 139.594727,9.88769531 139.266602,10.2978516 C138.938477,10.7080078 138.47819,10.9130859 137.885742,10.9130859 Z M137.885742,12.0820312 C138.368815,12.0820312 138.810872,12.0159505 139.211914,11.8837891 C139.612956,11.7516276 139.951335,11.5750326 140.227051,11.3540039 C140.502767,11.1329753 140.735189,10.870931 140.924316,10.5678711 C141.113444,10.2648112 141.252441,9.9469401 141.341309,9.61425781 C141.430176,9.28157552 141.474609,8.93522135 141.474609,8.57519531 C141.474609,8.1969401 141.427897,7.83463542 141.334473,7.48828125 C141.241048,7.14192708 141.096354,6.8194987 140.900391,6.52099609 C140.704427,6.22249349 140.467448,5.96500651 140.189453,5.74853516 C139.911458,5.5320638 139.575358,5.36116536 139.181152,5.23583984 C138.786947,5.11051432 138.355143,5.04785156 137.885742,5.04785156 C137.407227,5.04785156 136.968587,5.11279297 136.569824,5.24267578 C136.171061,5.37255859 135.833822,5.54915365 135.558105,5.77246094 C135.282389,5.99576823 135.048828,6.2578125 134.857422,6.55859375 C134.666016,6.859375 134.525879,7.17952474 134.437012,7.51904297 C134.348145,7.8585612 134.303711,8.21061198 134.303711,8.57519531 C134.303711,9.03548177 134.376628,9.47070312 134.522461,9.88085938 C134.668294,10.2910156 134.881348,10.6624349 135.161621,10.9951172 C135.441895,11.3277995 135.816732,11.5921224 136.286133,11.7880859 C136.755534,11.9840495 137.288737,12.0820312 137.885742,12.0820312 Z" id="Shape"></path>
+                <path d="M145.52832,12.0957031 C146.599284,12.0957031 147.353516,11.7014974 147.791016,10.9130859 L147.791016,11.1523438 C147.791016,11.4303385 147.866211,11.6411133 148.016602,11.784668 C148.166992,11.9282227 148.37207,12 148.631836,12 C148.87793,12 149.084147,11.9122721 149.250488,11.7368164 C149.416829,11.5613607 149.5,11.3255208 149.5,11.0292969 L149.5,3.06542969 C149.5,2.76920573 149.421387,2.54020182 149.26416,2.37841797 C149.106934,2.21663411 148.902995,2.13574219 148.652344,2.13574219 C148.397135,2.13574219 148.188639,2.21663411 148.026855,2.37841797 C147.865072,2.54020182 147.78418,2.76920573 147.78418,3.06542969 L147.78418,6.16210937 C147.342122,5.43294271 146.62207,5.06835937 145.624023,5.06835937 C144.698893,5.06835937 143.958333,5.37825521 143.402344,5.99804688 C142.846354,6.61783854 142.568359,7.46549479 142.568359,8.54101562 C142.568359,9.2109375 142.683431,9.81363932 142.913574,10.3491211 C143.143717,10.8846029 143.485514,11.3095703 143.938965,11.6240234 C144.392415,11.9384766 144.922201,12.0957031 145.52832,12.0957031 Z M145.972656,10.9472656 C145.658203,10.9472656 145.384766,10.8732096 145.152344,10.7250977 C144.919922,10.5769857 144.742188,10.3798828 144.619141,10.1337891 C144.496094,9.88769531 144.406087,9.6336263 144.349121,9.37158203 C144.292155,9.10953776 144.263672,8.83723958 144.263672,8.5546875 C144.263672,8.24934896 144.294434,7.96451823 144.355957,7.70019531 C144.41748,7.4358724 144.512044,7.19091797 144.639648,6.96533203 C144.767253,6.73974609 144.946126,6.5608724 145.17627,6.42871094 C145.406413,6.29654948 145.676432,6.23046875 145.986328,6.23046875 C146.396484,6.23046875 146.739421,6.33984375 147.015137,6.55859375 C147.290853,6.77734375 147.486816,7.05647786 147.603027,7.39599609 C147.719238,7.73551432 147.777344,8.12630208 147.777344,8.56835938 C147.777344,9.35221354 147.639486,9.94466146 147.36377,10.3457031 C147.088053,10.7467448 146.624349,10.9472656 145.972656,10.9472656 Z" id="Shape"></path>
+                <path d="M150.949219,8.50683594 C150.949219,9.63248698 151.273926,10.5097656 151.92334,11.1386719 C152.572754,11.7675781 153.4375,12.0820312 154.517578,12.0820312 C155.479167,12.0820312 156.319987,11.7789714 157.040039,11.1728516 C157.185872,11.0452474 157.258789,10.8902995 157.258789,10.7080078 C157.258789,10.5485026 157.204102,10.4049479 157.094727,10.2773437 C156.985352,10.1497396 156.860026,10.0859375 156.71875,10.0859375 C156.623047,10.0859375 156.53418,10.1178385 156.452148,10.1816406 C155.80957,10.6419271 155.205729,10.8720703 154.640625,10.8720703 C153.396484,10.8720703 152.737956,10.2294922 152.665039,8.94433594 L156.985352,8.94433594 C157.149414,8.94433594 157.272461,8.88964844 157.354492,8.78027344 C157.436523,8.67089844 157.477539,8.52734375 157.477539,8.34960937 C157.472982,7.8984375 157.406901,7.47916667 157.279297,7.09179687 C157.151693,6.70442708 156.963704,6.35579427 156.715332,6.04589844 C156.46696,5.7360026 156.138835,5.49332682 155.730957,5.31787109 C155.323079,5.14241536 154.854818,5.0546875 154.326172,5.0546875 C153.638021,5.0546875 153.03418,5.21419271 152.514648,5.53320312 C151.995117,5.85221354 151.604329,6.27034505 151.342285,6.78759766 C151.080241,7.30485026 150.949219,7.87792969 150.949219,8.50683594 Z M152.678711,7.98046875 C152.719727,7.48372396 152.884928,7.05989583 153.174316,6.70898438 C153.463704,6.35807292 153.863607,6.18261719 154.374023,6.18261719 C154.898112,6.18261719 155.293457,6.35123698 155.560059,6.68847656 C155.82666,7.02571615 155.97819,7.45638021 156.014648,7.98046875 L152.678711,7.98046875 Z" id="Shape"></path>
+                <path d="M159.678711,12 C159.933919,12 160.142415,11.9202474 160.304199,11.7607422 C160.465983,11.601237 160.546875,11.3733724 160.546875,11.0771484 L160.546875,8.19238281 C160.546875,7.8688151 160.599284,7.5953776 160.704102,7.37207031 C160.808919,7.14876302 160.957031,6.98242187 161.148438,6.87304687 C161.339844,6.76367188 161.539225,6.68733724 161.746582,6.64404297 C161.953939,6.6007487 162.192057,6.57910156 162.460938,6.57910156 C162.638672,6.57910156 162.779948,6.5061849 162.884766,6.36035156 C162.989583,6.21451823 163.041992,6.04589844 163.041992,5.85449219 C163.041992,5.65397135 162.985026,5.48307292 162.871094,5.34179688 C162.757161,5.20052083 162.595378,5.12988281 162.385742,5.12988281 C161.957357,5.12988281 161.571126,5.27685547 161.227051,5.57080078 C160.882975,5.86474609 160.649414,6.21679688 160.526367,6.62695312 L160.546875,5.92285156 C160.551432,5.6813151 160.468262,5.49104818 160.297363,5.35205078 C160.126465,5.21305339 159.924805,5.14355469 159.692383,5.14355469 C159.455404,5.14355469 159.251465,5.21533203 159.080566,5.35888672 C158.909668,5.50244141 158.824219,5.70410156 158.824219,5.96386719 L158.824219,11.0634766 C158.824219,11.3642578 158.905111,11.5955404 159.066895,11.7573242 C159.228678,11.9191081 159.432617,12 159.678711,12 Z" id="Path"></path>
+            </g>
+            <g id="Body" transform="translate(10, 150)">
+                <g id="vLLM-dash-box" transform="translate(0, 22)" xlink:href="#path-14">
+                    <path d="M133.674643,128 C133.674643,128.276142 133.450786,128.5 133.174643,128.5 L131.974643,128.5 C131.698501,128.5 131.474643,128.276142 131.474643,128 C131.474643,127.723858 131.698501,127.5 131.974643,127.5 L133.174643,127.5 C133.450786,127.5 133.674643,127.723858 133.674643,128 Z M247.074643,128 C247.074643,128.276142 246.850786,128.5 246.574643,128.5 L245.374643,128.5 C245.098501,128.5 244.874643,128.276142 244.874643,128 C244.874643,127.723858 245.098501,127.5 245.374643,127.5 L246.574643,127.5 C246.850786,127.5 247.074643,127.723858 247.074643,128 Z M142.074643,128 C142.074643,128.276142 141.850786,128.5 141.574643,128.5 L140.374643,128.5 C140.098501,128.5 139.874643,128.276142 139.874643,128 C139.874643,127.723858 140.098501,127.5 140.374643,127.5 L141.574643,127.5 C141.850786,127.5 142.074643,127.723858 142.074643,128 Z M146.274643,128 C146.274643,128.276142 146.050786,128.5 145.774643,128.5 L144.574643,128.5 C144.298501,128.5 144.074643,128.276142 144.074643,128 C144.074643,127.723858 144.298501,127.5 144.574643,127.5 L145.774643,127.5 C146.050786,127.5 146.274643,127.723858 146.274643,128 Z M11.8746433,128 C11.8746433,128.276142 11.6507857,128.5 11.3746433,128.5 L10.1746433,128.5 C9.89850097,128.5 9.67464335,128.276142 9.67464335,128 C9.67464335,127.723858 9.89850097,127.5 10.1746433,127.5 L11.3746434,127.5 C11.6507857,127.5 11.8746433,127.723858 11.8746433,128 Z M121.074643,128 C121.074643,128.276142 120.850786,128.5 120.574643,128.5 L119.374643,128.5 C119.098501,128.5 118.874643,128.276142 118.874643,128 C118.874643,127.723858 119.098501,127.5 119.374643,127.5 L120.574643,127.5 C120.850786,127.5 121.074643,127.723858 121.074643,128 Z M20.2746433,128 C20.2746433,128.276142 20.0507857,128.5 19.7746433,128.5 L18.5746433,128.5 C18.298501,128.5 18.0746433,128.276142 18.0746433,128 C18.0746433,127.723858 18.298501,127.5 18.5746433,127.5 L19.7746435,127.5 C20.0507857,127.5 20.2746433,127.723858 20.2746433,128 Z M24.4746433,128 C24.4746433,128.276142 24.2507857,128.5 23.9746433,128.5 L22.7746433,128.5 C22.498501,128.5 22.2746433,128.276142 22.2746433,128 C22.2746433,127.723858 22.498501,127.5 22.7746433,127.5 L23.9746435,127.5 C24.2507857,127.5 24.4746433,127.723858 24.4746433,128 Z M28.6746433,128 C28.6746433,128.276142 28.4507857,128.5 28.1746433,128.5 L26.9746433,128.5 C26.698501,128.5 26.4746433,128.276142 26.4746433,128 C26.4746433,127.723858 26.698501,127.5 26.9746433,127.5 L28.1746436,127.5 C28.4507857,127.5 28.6746433,127.723858 28.6746433,128 Z M32.8746433,128 C32.8746433,128.276142 32.6507857,128.5 32.3746433,128.5 L31.1746433,128.5 C30.898501,128.5 30.6746433,128.276142 30.6746433,128 C30.6746433,127.723858 30.898501,127.5 31.1746433,127.5 L32.3746434,127.5 C32.6507857,127.5 32.8746433,127.723858 32.8746433,128 Z M37.0746433,128 C37.0746433,128.276142 36.8507857,128.5 36.5746433,128.5 L35.3746433,128.5 C35.098501,128.5 34.8746433,128.276142 34.8746433,128 C34.8746433,127.723858 35.098501,127.5 35.3746433,127.5 L36.5746434,127.5 C36.8507857,127.5 37.0746433,127.723858 37.0746433,128 Z M41.2746433,128 C41.2746433,128.276142 41.0507857,128.5 40.7746433,128.5 L39.5746433,128.5 C39.298501,128.5 39.0746433,128.276142 39.0746433,128 C39.0746433,127.723858 39.298501,127.5 39.5746433,127.5 L40.7746434,127.5 C41.0507857,127.5 41.2746433,127.723858 41.2746433,128 Z M45.4746433,128 C45.4746433,128.276142 45.2507857,128.5 44.9746433,128.5 L43.7746433,128.5 C43.498501,128.5 43.2746433,128.276142 43.2746433,128 C43.2746433,127.723858 43.498501,127.5 43.7746433,127.5 L44.9746435,127.5 C45.2507857,127.5 45.4746433,127.723858 45.4746433,128 Z M49.6746433,128 C49.6746433,128.276142 49.4507857,128.5 49.1746433,128.5 L47.9746433,128.5 C47.698501,128.5 47.4746433,128.276142 47.4746433,128 C47.4746433,127.723858 47.698501,127.5 47.9746433,127.5 L49.1746435,127.5 C49.4507857,127.5 49.6746433,127.723858 49.6746433,128 Z M53.8746433,128 C53.8746433,128.276142 53.6507857,128.5 53.3746433,128.5 L52.1746433,128.5 C51.898501,128.5 51.6746433,128.276142 51.6746433,128 C51.6746433,127.723858 51.898501,127.5 52.1746433,127.5 L53.3746435,127.5 C53.6507857,127.5 53.8746433,127.723858 53.8746433,128 Z M150.474643,128 C150.474643,128.276142 150.250786,128.5 149.974643,128.5 L148.774643,128.5 C148.498501,128.5 148.274643,128.276142 148.274643,128 C148.274643,127.723858 148.498501,127.5 148.774643,127.5 L149.974643,127.5 C150.250786,127.5 150.474643,127.723858 150.474643,128 Z M154.674643,128 C154.674643,128.276142 154.450786,128.5 154.174643,128.5 L152.974643,128.5 C152.698501,128.5 152.474643,128.276142 152.474643,128 C152.474643,127.723858 152.698501,127.5 152.974643,127.5 L154.174643,127.5 C154.450786,127.5 154.674643,127.723858 154.674643,128 Z M158.874643,128 C158.874643,128.276142 158.650786,128.5 158.374643,128.5 L157.174643,128.5 C156.898501,128.5 156.674643,128.276142 156.674643,128 C156.674643,127.723858 156.898501,127.5 157.174643,127.5 L158.374643,127.5 C158.650786,127.5 158.874643,127.723858 158.874643,128 Z M163.074643,128 C163.074643,128.276142 162.850786,128.5 162.574643,128.5 L161.374643,128.5 C161.098501,128.5 160.874643,128.276142 160.874643,128 C160.874643,127.723858 161.098501,127.5 161.374643,127.5 L162.574643,127.5 C162.850786,127.5 163.074643,127.723858 163.074643,128 Z M167.274643,128 C167.274643,128.276142 167.050786,128.5 166.774643,128.5 L165.574643,128.5 C165.298501,128.5 165.074643,128.276142 165.074643,128 C165.074643,127.723858 165.298501,127.5 165.574643,127.5 L166.774643,127.5 C167.050786,127.5 167.274643,127.723858 167.274643,128 Z M171.474643,128 C171.474643,128.276142 171.250786,128.5 170.974643,128.5 L169.774643,128.5 C169.498501,128.5 169.274643,128.276142 169.274643,128 C169.274643,127.723858 169.498501,127.5 169.774643,127.5 L170.974643,127.5 C171.250786,127.5 171.474643,127.723858 171.474643,128 Z M175.674643,128 C175.674643,128.276142 175.450786,128.5 175.174643,128.5 L173.974643,128.5 C173.698501,128.5 173.474643,128.276142 173.474643,128 C173.474643,127.723858 173.698501,127.5 173.974643,127.5 L175.174643,127.5 C175.450786,127.5 175.674643,127.723858 175.674643,128 Z M179.874643,128 C179.874643,128.276142 179.650786,128.5 179.374643,128.5 L178.174643,128.5 C177.898501,128.5 177.674643,128.276142 177.674643,128 C177.674643,127.723858 177.898501,127.5 178.174643,127.5 L179.374643,127.5 C179.650786,127.5 179.874643,127.723858 179.874643,128 Z M184.074643,128 C184.074643,128.276142 183.850786,128.5 183.574643,128.5 L182.374643,128.5 C182.098501,128.5 181.874643,128.276142 181.874643,128 C181.874643,127.723858 182.098501,127.5 182.374643,127.5 L183.574643,127.5 C183.850786,127.5 184.074643,127.723858 184.074643,128 Z M188.274643,128 C188.274643,128.276142 188.050786,128.5 187.774643,128.5 L186.574643,128.5 C186.298501,128.5 186.074643,128.276142 186.074643,128 C186.074643,127.723858 186.298501,127.5 186.574643,127.5 L187.774643,127.5 C188.050786,127.5 188.274643,127.723858 188.274643,128 Z M192.474643,128 C192.474643,128.276142 192.250786,128.5 191.974643,128.5 L190.774643,128.5 C190.498501,128.5 190.274643,128.276142 190.274643,128 C190.274643,127.723858 190.498501,127.5 190.774643,127.5 L191.974643,127.5 C192.250786,127.5 192.474643,127.723858 192.474643,128 Z M196.674643,128 C196.674643,128.276142 196.450786,128.5 196.174643,128.5 L194.974643,128.5 C194.698501,128.5 194.474643,128.276142 194.474643,128 C194.474643,127.723858 194.698501,127.5 194.974643,127.5 L196.174643,127.5 C196.450786,127.5 196.674643,127.723858 196.674643,128 Z M200.874643,128 C200.874643,128.276142 200.650786,128.5 200.374643,128.5 L199.174643,128.5 C198.898501,128.5 198.674643,128.276142 198.674643,128 C198.674643,127.723858 198.898501,127.5 199.174643,127.5 L200.374643,127.5 C200.650786,127.5 200.874643,127.723858 200.874643,128 Z M205.074643,128 C205.074643,128.276142 204.850786,128.5 204.574643,128.5 L203.374643,128.5 C203.098501,128.5 202.874643,128.276142 202.874643,128 C202.874643,127.723858 203.098501,127.5 203.374643,127.5 L204.574643,127.5 C204.850786,127.5 205.074643,127.723858 205.074643,128 Z M209.274643,128 C209.274643,128.276142 209.050786,128.5 208.774643,128.5 L207.574643,128.5 C207.298501,128.5 207.074643,128.276142 207.074643,128 C207.074643,127.723858 207.298501,127.5 207.574643,127.5 L208.774643,127.5 C209.050786,127.5 209.274643,127.723858 209.274643,128 Z M213.474643,128 C213.474643,128.276142 213.250786,128.5 212.974643,128.5 L211.774643,128.5 C211.498501,128.5 211.274643,128.276142 211.274643,128 C211.274643,127.723858 211.498501,127.5 211.774643,127.5 L212.974643,127.5 C213.250786,127.5 213.474643,127.723858 213.474643,128 Z M217.674643,128 C217.674643,128.276142 217.450786,128.5 217.174643,128.5 L215.974643,128.5 C215.698501,128.5 215.474643,128.276142 215.474643,128 C215.474643,127.723858 215.698501,127.5 215.974643,127.5 L217.174643,127.5 C217.450786,127.5 217.674643,127.723858 217.674643,128 Z M221.874643,128 C221.874643,128.276142 221.650786,128.5 221.374643,128.5 L220.174643,128.5 C219.898501,128.5 219.674643,128.276142 219.674643,128 C219.674643,127.723858 219.898501,127.5 220.174643,127.5 L221.374643,127.5 C221.650786,127.5 221.874643,127.723858 221.874643,128 Z M226.074643,128 C226.074643,128.276142 225.850786,128.5 225.574643,128.5 L224.374643,128.5 C224.098501,128.5 223.874643,128.276142 223.874643,128 C223.874643,127.723858 224.098501,127.5 224.374643,127.5 L225.574643,127.5 C225.850786,127.5 226.074643,127.723858 226.074643,128 Z M230.274643,128 C230.274643,128.276142 230.050786,128.5 229.774643,128.5 L228.574643,128.5 C228.298501,128.5 228.074643,128.276142 228.074643,128 C228.074643,127.723858 228.298501,127.5 228.574643,127.5 L229.774643,127.5 C230.050786,127.5 230.274643,127.723858 230.274643,128 Z M234.474643,128 C234.474643,128.276142 234.250786,128.5 233.974643,128.5 L232.774643,128.5 C232.498501,128.5 232.274643,128.276142 232.274643,128 C232.274643,127.723858 232.498501,127.5 232.774643,127.5 L233.974643,127.5 C234.250786,127.5 234.474643,127.723858 234.474643,128 Z M238.674643,128 C238.674643,128.276142 238.450786,128.5 238.174643,128.5 L236.974643,128.5 C236.698501,128.5 236.474643,128.276142 236.474643,128 C236.474643,127.723858 236.698501,127.5 236.974643,127.5 L238.174643,127.5 C238.450786,127.5 238.674643,127.723858 238.674643,128 Z M242.874643,128 C242.874643,128.276142 242.650786,128.5 242.374643,128.5 L241.174643,128.5 C240.898501,128.5 240.674643,128.276142 240.674643,128 C240.674643,127.723858 240.898501,127.5 241.174643,127.5 L242.374643,127.5 C242.650786,127.5 242.874643,127.723858 242.874643,128 Z M125.274643,128 C125.274643,128.276142 125.050786,128.5 124.774643,128.5 L123.574643,128.5 C123.298501,128.5 123.074643,128.276142 123.074643,128 C123.074643,127.723858 123.298501,127.5 123.574643,127.5 L124.774643,127.5 C125.050786,127.5 125.274643,127.723858 125.274643,128 Z M251.274643,128 C251.274643,128.276142 251.050786,128.5 250.774643,128.5 L249.574643,128.5 C249.298501,128.5 249.074643,128.276142 249.074643,128 C249.074643,127.723858 249.298501,127.5 249.574643,127.5 L250.774643,127.5 C251.050786,127.5 251.274643,127.723858 251.274643,128 Z M137.874643,128 C137.874643,128.276142 137.650786,128.5 137.374643,128.5 L136.174643,128.5 C135.898501,128.5 135.674643,128.276142 135.674643,128 C135.674643,127.723858 135.898501,127.5 136.174643,127.5 L137.374643,127.5 C137.650786,127.5 137.874643,127.723858 137.874643,128 Z M16.0746433,128 C16.0746433,128.276142 15.8507857,128.5 15.5746433,128.5 L14.3746433,128.5 C14.098501,128.5 13.8746433,128.276142 13.8746433,128 C13.8746433,127.723858 14.098501,127.5 14.3746433,127.5 L15.5746435,127.5 C15.8507857,127.5 16.0746433,127.723858 16.0746433,128 Z M129.474643,128 C129.474643,128.276142 129.250786,128.5 128.974643,128.5 L127.774643,128.5 C127.498501,128.5 127.274643,128.276142 127.274643,128 C127.274643,127.723858 127.498501,127.5 127.774643,127.5 L128.974643,127.5 C129.250786,127.5 129.474643,127.723858 129.474643,128 Z M58.0746433,128 C58.0746433,128.276142 57.8507857,128.5 57.5746433,128.5 L56.3746433,128.5 C56.098501,128.5 55.8746433,128.276142 55.8746433,128 C55.8746433,127.723858 56.098501,127.5 56.3746433,127.5 L57.5746436,127.5 C57.8507857,127.5 58.0746433,127.723858 58.0746433,128 Z M62.2746433,128 C62.2746433,128.276142 62.0507857,128.5 61.7746433,128.5 L60.5746433,128.5 C60.298501,128.5 60.0746433,128.276142 60.0746433,128 C60.0746433,127.723858 60.298501,127.5 60.5746433,127.5 L61.7746434,127.5 C62.0507857,127.5 62.2746433,127.723858 62.2746433,128 Z M66.4746433,128 C66.4746433,128.276142 66.2507857,128.5 65.9746433,128.5 L64.7746433,128.5 C64.498501,128.5 64.2746433,128.276142 64.2746433,128 C64.2746433,127.723858 64.498501,127.5 64.7746433,127.5 L65.9746434,127.5 C66.2507857,127.5 66.4746433,127.723858 66.4746433,128 Z M70.6746433,128 C70.6746433,128.276142 70.4507857,128.5 70.1746433,128.5 L68.9746433,128.5 C68.698501,128.5 68.4746433,128.276142 68.4746433,128 C68.4746433,127.723858 68.698501,127.5 68.9746433,127.5 L70.1746434,127.5 C70.4507857,127.5 70.6746433,127.723858 70.6746433,128 Z M74.8746433,128 C74.8746433,128.276142 74.6507857,128.5 74.3746433,128.5 L73.1746433,128.5 C72.898501,128.5 72.6746433,128.276142 72.6746433,128 C72.6746433,127.723858 72.898501,127.5 73.1746433,127.5 L74.3746435,127.5 C74.6507857,127.5 74.8746433,127.723858 74.8746433,128 Z M79.0746433,128 C79.0746433,128.276142 78.8507857,128.5 78.5746433,128.5 L77.3746433,128.5 C77.098501,128.5 76.8746433,128.276142 76.8746433,128 C76.8746433,127.723858 77.098501,127.5 77.3746433,127.5 L78.5746435,127.5 C78.8507857,127.5 79.0746433,127.723858 79.0746433,128 Z M83.2746433,128 C83.2746433,128.276142 83.0507857,128.5 82.7746433,128.5 L81.5746433,128.5 C81.298501,128.5 81.0746433,128.276142 81.0746433,128 C81.0746433,127.723858 81.298501,127.5 81.5746433,127.5 L82.7746435,127.5 C83.0507857,127.5 83.2746433,127.723858 83.2746433,128 Z M87.4746433,128 C87.4746433,128.276142 87.2507857,128.5 86.9746433,128.5 L85.7746433,128.5 C85.498501,128.5 85.2746433,128.276142 85.2746433,128 C85.2746433,127.723858 85.498501,127.5 85.7746433,127.5 L86.9746436,127.5 C87.2507857,127.5 87.4746433,127.723858 87.4746433,128 Z M91.6746433,128 C91.6746433,128.276142 91.4507857,128.5 91.1746433,128.5 L89.9746433,128.5 C89.698501,128.5 89.4746433,128.276142 89.4746433,128 C89.4746433,127.723858 89.698501,127.5 89.9746433,127.5 L91.1746434,127.5 C91.4507857,127.5 91.6746433,127.723858 91.6746433,128 Z M95.8746433,128 C95.8746433,128.276142 95.6507857,128.5 95.3746433,128.5 L94.1746433,128.5 C93.898501,128.5 93.6746433,128.276142 93.6746433,128 C93.6746433,127.723858 93.898501,127.5 94.1746433,127.5 L95.3746434,127.5 C95.6507857,127.5 95.8746433,127.723858 95.8746433,128 Z M100.074643,128 C100.074643,128.276142 99.8507857,128.5 99.5746433,128.5 L98.3746433,128.5 C98.098501,128.5 97.8746433,128.276142 97.8746433,128 C97.8746433,127.723858 98.098501,127.5 98.3746433,127.5 L99.5746434,127.5 C99.8507857,127.5 100.074643,127.723858 100.074643,128 Z M104.274643,128 C104.274643,128.276142 104.050786,128.5 103.774643,128.5 L102.574643,128.5 C102.298501,128.5 102.074643,128.276142 102.074643,128 C102.074643,127.723858 102.298501,127.5 102.574643,127.5 L103.774643,127.5 C104.050786,127.5 104.274643,127.723858 104.274643,128 Z M108.474643,128 C108.474643,128.276142 108.250786,128.5 107.974643,128.5 L106.774643,128.5 C106.498501,128.5 106.274643,128.276142 106.274643,128 C106.274643,127.723858 106.498501,127.5 106.774643,127.5 L107.974644,127.5 C108.250786,127.5 108.474643,127.723858 108.474643,128 Z M112.674643,128 C112.674643,128.276142 112.450786,128.5 112.174643,128.5 L110.974643,128.5 C110.698501,128.5 110.474643,128.276142 110.474643,128 C110.474643,127.723858 110.698501,127.5 110.974643,127.5 L112.174644,127.5 C112.450786,127.5 112.674643,127.723858 112.674643,128 Z M116.874643,128 C116.874643,128.276142 116.650786,128.5 116.374643,128.5 L115.174643,128.5 C114.898501,128.5 114.674643,128.276142 114.674643,128 C114.674643,127.723858 114.898501,127.5 115.174643,127.5 L116.374644,127.5 C116.650786,127.5 116.874643,127.723858 116.874643,128 Z M6.11917166,127.26234 C6.4811827,127.355807 6.85096734,127.422182 7.22628506,127.460615 C7.50099087,127.488746 7.70087959,127.734243 7.672749,128.008949 C7.64461842,128.283655 7.39912102,128.483543 7.12441521,128.455413 C6.6990453,128.411854 6.27974917,128.336592 5.86917977,128.230588 C5.60180548,128.161554 5.44101839,127.888842 5.51005175,127.621468 C5.57908511,127.354093 5.85179738,127.193306 6.11917166,127.26234 Z M255.312799,127.301263 C255.411042,127.559339 255.281472,127.848192 255.023397,127.946435 C254.626331,128.097588 254.217733,128.218817 253.800449,128.308837 C253.530516,128.367069 253.264486,128.195451 253.206254,127.925519 C253.148023,127.655586 253.31964,127.389556 253.589573,127.331324 C253.957441,127.251965 254.317608,127.145105 254.667628,127.011861 C254.925703,126.913618 255.214556,127.043188 255.312799,127.301263 Z M2.67705638,125.283586 C2.94172901,125.550217 3.22599483,125.796783 3.52732116,126.021013 C3.74885648,126.185866 3.79480608,126.499097 3.62995247,126.720632 C3.46509885,126.942167 3.15186874,126.988117 2.93033342,126.823263 C2.58902858,126.569284 2.26708722,126.290038 1.967348,125.988081 C1.77280692,125.792101 1.77397385,125.47552 1.96995441,125.280979 C2.16593497,125.086438 2.4825153,125.087605 2.67705638,125.283586 Z M258.549902,124.621502 C258.765056,124.794601 258.799149,125.109343 258.626049,125.324497 C258.359396,125.655935 258.068254,125.967097 257.755266,126.25521 C257.552097,126.442231 257.235785,126.429141 257.048763,126.225972 C256.861742,126.022803 256.874832,125.706491 257.078001,125.51947 C257.354378,125.26506 257.611464,124.990294 257.846908,124.697649 C258.020007,124.482495 258.334748,124.448403 258.549902,124.621502 Z M0.733384372,121.864188 C0.826565468,122.228563 0.947008602,122.584456 1.09349591,122.929363 C1.20144497,123.183532 1.08291072,123.477086 0.82874225,123.585035 C0.574573778,123.692984 0.281019519,123.57445 0.173070459,123.320282 C0.00690582348,122.929043 -0.129731356,122.525299 -0.235438195,122.111943 C-0.303854013,121.84441 -0.142437673,121.57207 0.125095291,121.503654 C0.392628256,121.435238 0.664968554,121.596655 0.733384372,121.864188 Z M259.99237,120.614598 C260.26589,120.652559 260.456849,120.905064 260.418888,121.178585 C260.36014,121.601886 260.269795,122.01828 260.148916,122.425045 C260.070255,122.689747 259.791904,122.840562 259.527202,122.7619 C259.2625,122.683239 259.111685,122.404888 259.190347,122.140186 C259.296911,121.78159 259.376567,121.414462 259.428382,121.041116 C259.466343,120.767596 259.718849,120.576637 259.99237,120.614598 Z M0.5,117.861965 L0.5,119.061965 C0.5,119.338107 0.276142375,119.561965 0,119.561965 C-0.276142375,119.561965 -0.5,119.338107 -0.5,119.061965 L-0.5,117.861965 C-0.5,117.585823 -0.276142375,117.361965 0,117.361965 C0.276142375,117.361965 0.5,117.585823 0.5,117.861965 Z M260.5,116.912678 L260.5,118.112678 C260.5,118.388821 260.276142,118.612678 260,118.612678 C259.723858,118.612678 259.5,118.388821 259.5,118.112678 L259.5,116.912678 C259.5,116.636536 259.723858,116.412678 260,116.412678 C260.276142,116.412678 260.5,116.636536 260.5,116.912678 Z M0.5,113.661965 L0.5,114.861965 C0.5,115.138107 0.276142375,115.361965 0,115.361965 C-0.276142375,115.361965 -0.5,115.138107 -0.5,114.861965 L-0.5,113.661965 C-0.5,113.385823 -0.276142375,113.161965 0,113.161965 C0.276142375,113.161965 0.5,113.385823 0.5,113.661965 Z M260.5,112.712678 L260.5,113.912678 C260.5,114.188821 260.276142,114.412678 260,114.412678 C259.723858,114.412678 259.5,114.188821 259.5,113.912678 L259.5,112.712678 C259.5,112.436536 259.723858,112.212678 260,112.212678 C260.276142,112.212678 260.5,112.436536 260.5,112.712678 Z M0.5,109.461965 L0.5,110.661965 C0.5,110.938107 0.276142375,111.161965 0,111.161965 C-0.276142375,111.161965 -0.5,110.938107 -0.5,110.661965 L-0.5,109.461965 C-0.5,109.185823 -0.276142375,108.961965 0,108.961965 C0.276142375,108.961965 0.5,109.185823 0.5,109.461965 Z M260.5,108.512678 L260.5,109.712678 C260.5,109.988821 260.276142,110.212678 260,110.212678 C259.723858,110.212678 259.5,109.988821 259.5,109.712678 L259.5,108.512678 C259.5,108.236536 259.723858,108.012678 260,108.012678 C260.276142,108.012678 260.5,108.236536 260.5,108.512678 Z M0.5,105.261965 L0.5,106.461965 C0.5,106.738107 0.276142375,106.961965 0,106.961965 C-0.276142375,106.961965 -0.5,106.738107 -0.5,106.461965 L-0.5,105.261965 C-0.5,104.985823 -0.276142375,104.761965 0,104.761965 C0.276142375,104.761965 0.5,104.985823 0.5,105.261965 Z M260.5,104.312678 L260.5,105.512678 C260.5,105.788821 260.276142,106.012678 260,106.012678 C259.723858,106.012678 259.5,105.788821 259.5,105.512678 L259.5,104.312678 C259.5,104.036536 259.723858,103.812678 260,103.812678 C260.276142,103.812678 260.5,104.036536 260.5,104.312678 Z M0.5,101.061965 L0.5,102.261965 C0.5,102.538107 0.276142375,102.761965 0,102.761965 C-0.276142375,102.761965 -0.5,102.538107 -0.5,102.261965 L-0.5,101.061965 C-0.5,100.785823 -0.276142375,100.561965 0,100.561965 C0.276142375,100.561965 0.5,100.785823 0.5,101.061965 Z M260.5,100.112678 L260.5,101.312678 C260.5,101.588821 260.276142,101.812678 260,101.812678 C259.723858,101.812678 259.5,101.588821 259.5,101.312678 L259.5,100.112678 C259.5,99.836536 259.723858,99.6126783 260,99.6126783 C260.276142,99.6126783 260.5,99.836536 260.5,100.112678 Z M0.5,96.861965 L0.5,98.061965 C0.5,98.3381074 0.276142375,98.561965 0,98.561965 C-0.276142375,98.561965 -0.5,98.3381074 -0.5,98.061965 L-0.5,96.861965 C-0.5,96.5858226 -0.276142375,96.361965 0,96.361965 C0.276142375,96.361965 0.5,96.5858226 0.5,96.861965 Z M260.5,95.9126783 L260.5,97.1126783 C260.5,97.3888207 260.276142,97.6126783 260,97.6126783 C259.723858,97.6126783 259.5,97.3888207 259.5,97.1126783 L259.5,95.9126783 C259.5,95.636536 259.723858,95.4126783 260,95.4126783 C260.276142,95.4126783 260.5,95.636536 260.5,95.9126783 Z M0.5,92.661965 L0.5,93.861965 C0.5,94.1381074 0.276142375,94.361965 0,94.361965 C-0.276142375,94.361965 -0.5,94.1381074 -0.5,93.861965 L-0.5,92.661965 C-0.5,92.3858226 -0.276142375,92.161965 0,92.161965 C0.276142375,92.161965 0.5,92.3858226 0.5,92.661965 Z M260.5,91.7126783 L260.5,92.9126783 C260.5,93.1888207 260.276142,93.4126783 260,93.4126783 C259.723858,93.4126783 259.5,93.1888207 259.5,92.9126783 L259.5,91.7126783 C259.5,91.436536 259.723858,91.2126783 260,91.2126783 C260.276142,91.2126783 260.5,91.436536 260.5,91.7126783 Z M0.5,88.461965 L0.5,89.661965 C0.5,89.9381074 0.276142375,90.161965 0,90.161965 C-0.276142375,90.161965 -0.5,89.9381074 -0.5,89.661965 L-0.5,88.461965 C-0.5,88.1858226 -0.276142375,87.961965 0,87.961965 C0.276142375,87.961965 0.5,88.1858226 0.5,88.461965 Z M260.5,87.5126783 L260.5,88.7126783 C260.5,88.9888207 260.276142,89.2126783 260,89.2126783 C259.723858,89.2126783 259.5,88.9888207 259.5,88.7126783 L259.5,87.5126783 C259.5,87.236536 259.723858,87.0126783 260,87.0126783 C260.276142,87.0126783 260.5,87.236536 260.5,87.5126783 Z M0.5,84.261965 L0.5,85.461965 C0.5,85.7381074 0.276142375,85.961965 0,85.961965 C-0.276142375,85.961965 -0.5,85.7381074 -0.5,85.461965 L-0.5,84.261965 C-0.5,83.9858226 -0.276142375,83.761965 0,83.761965 C0.276142375,83.761965 0.5,83.9858226 0.5,84.261965 Z M260.5,83.3126783 L260.5,84.5126783 C260.5,84.7888207 260.276142,85.0126783 260,85.0126783 C259.723858,85.0126783 259.5,84.7888207 259.5,84.5126783 L259.5,83.3126783 C259.5,83.036536 259.723858,82.8126783 260,82.8126783 C260.276142,82.8126783 260.5,83.036536 260.5,83.3126783 Z M0.5,80.061965 L0.5,81.261965 C0.5,81.5381074 0.276142375,81.761965 0,81.761965 C-0.276142375,81.761965 -0.5,81.5381074 -0.5,81.261965 L-0.5,80.061965 C-0.5,79.7858226 -0.276142375,79.561965 0,79.561965 C0.276142375,79.561965 0.5,79.7858226 0.5,80.061965 Z M260.5,79.1126783 L260.5,80.3126783 C260.5,80.5888207 260.276142,80.8126783 260,80.8126783 C259.723858,80.8126783 259.5,80.5888207 259.5,80.3126783 L259.5,79.1126783 C259.5,78.836536 259.723858,78.6126783 260,78.6126783 C260.276142,78.6126783 260.5,78.836536 260.5,79.1126783 Z M0.5,75.861965 L0.5,77.061965 C0.5,77.3381074 0.276142375,77.561965 0,77.561965 C-0.276142375,77.561965 -0.5,77.3381074 -0.5,77.061965 L-0.5,75.861965 C-0.5,75.5858226 -0.276142375,75.361965 0,75.361965 C0.276142375,75.361965 0.5,75.5858226 0.5,75.861965 Z M260.5,74.9126783 L260.5,76.1126783 C260.5,76.3888207 260.276142,76.6126783 260,76.6126783 C259.723858,76.6126783 259.5,76.3888207 259.5,76.1126783 L259.5,74.9126783 C259.5,74.636536 259.723858,74.4126783 260,74.4126783 C260.276142,74.4126783 260.5,74.636536 260.5,74.9126783 Z M0.5,71.661965 L0.5,72.861965 C0.5,73.1381074 0.276142375,73.361965 0,73.361965 C-0.276142375,73.361965 -0.5,73.1381074 -0.5,72.861965 L-0.5,71.661965 C-0.5,71.3858226 -0.276142375,71.161965 0,71.161965 C0.276142375,71.161965 0.5,71.3858226 0.5,71.661965 Z M260.5,70.7126783 L260.5,71.9126783 C260.5,72.1888207 260.276142,72.4126783 260,72.4126783 C259.723858,72.4126783 259.5,72.1888207 259.5,71.9126783 L259.5,70.7126783 C259.5,70.436536 259.723858,70.2126783 260,70.2126783 C260.276142,70.2126783 260.5,70.436536 260.5,70.7126783 Z M0.5,67.461965 L0.5,68.661965 C0.5,68.9381074 0.276142375,69.161965 0,69.161965 C-0.276142375,69.161965 -0.5,68.9381074 -0.5,68.661965 L-0.5,67.461965 C-0.5,67.1858226 -0.276142375,66.961965 0,66.961965 C0.276142375,66.961965 0.5,67.1858226 0.5,67.461965 Z M260.5,66.5126783 L260.5,67.7126783 C260.5,67.9888207 260.276142,68.2126783 260,68.2126783 C259.723858,68.2126783 259.5,67.9888207 259.5,67.7126783 L259.5,66.5126783 C259.5,66.236536 259.723858,66.0126783 260,66.0126783 C260.276142,66.0126783 260.5,66.236536 260.5,66.5126783 Z M0.5,63.261965 L0.5,64.461965 C0.5,64.7381074 0.276142375,64.961965 0,64.961965 C-0.276142375,64.961965 -0.5,64.7381074 -0.5,64.461965 L-0.5,63.261965 C-0.5,62.9858226 -0.276142375,62.761965 0,62.761965 C0.276142375,62.761965 0.5,62.9858226 0.5,63.261965 Z M260.5,62.3126783 L260.5,63.5126783 C260.5,63.7888207 260.276142,64.0126783 260,64.0126783 C259.723858,64.0126783 259.5,63.7888207 259.5,63.5126783 L259.5,62.3126783 C259.5,62.036536 259.723858,61.8126783 260,61.8126783 C260.276142,61.8126783 260.5,62.036536 260.5,62.3126783 Z M0.5,59.061965 L0.5,60.261965 C0.5,60.5381074 0.276142375,60.761965 0,60.761965 C-0.276142375,60.761965 -0.5,60.5381074 -0.5,60.261965 L-0.5,59.061965 C-0.5,58.7858226 -0.276142375,58.561965 0,58.561965 C0.276142375,58.561965 0.5,58.7858226 0.5,59.061965 Z M260.5,58.1126783 L260.5,59.3126783 C260.5,59.5888207 260.276142,59.8126783 260,59.8126783 C259.723858,59.8126783 259.5,59.5888207 259.5,59.3126783 L259.5,58.1126783 C259.5,57.836536 259.723858,57.6126783 260,57.6126783 C260.276142,57.6126783 260.5,57.836536 260.5,58.1126783 Z M0.5,54.861965 L0.5,56.061965 C0.5,56.3381074 0.276142375,56.561965 0,56.561965 C-0.276142375,56.561965 -0.5,56.3381074 -0.5,56.061965 L-0.5,54.861965 C-0.5,54.5858226 -0.276142375,54.361965 0,54.361965 C0.276142375,54.361965 0.5,54.5858226 0.5,54.861965 Z M260.5,53.9126783 L260.5,55.1126783 C260.5,55.3888207 260.276142,55.6126783 260,55.6126783 C259.723858,55.6126783 259.5,55.3888207 259.5,55.1126783 L259.5,53.9126783 C259.5,53.636536 259.723858,53.4126783 260,53.4126783 C260.276142,53.4126783 260.5,53.636536 260.5,53.9126783 Z M0.5,50.661965 L0.5,51.861965 C0.5,52.1381074 0.276142375,52.361965 0,52.361965 C-0.276142375,52.361965 -0.5,52.1381074 -0.5,51.861965 L-0.5,50.661965 C-0.5,50.3858226 -0.276142375,50.161965 0,50.161965 C0.276142375,50.161965 0.5,50.3858226 0.5,50.661965 Z M260.5,49.7126783 L260.5,50.9126783 C260.5,51.1888207 260.276142,51.4126783 260,51.4126783 C259.723858,51.4126783 259.5,51.1888207 259.5,50.9126783 L259.5,49.7126783 C259.5,49.436536 259.723858,49.2126783 260,49.2126783 C260.276142,49.2126783 260.5,49.436536 260.5,49.7126783 Z M0.5,46.461965 L0.5,47.661965 C0.5,47.9381074 0.276142375,48.161965 0,48.161965 C-0.276142375,48.161965 -0.5,47.9381074 -0.5,47.661965 L-0.5,46.461965 C-0.5,46.1858226 -0.276142375,45.961965 0,45.961965 C0.276142375,45.961965 0.5,46.1858226 0.5,46.461965 Z M260.5,45.5126783 L260.5,46.7126783 C260.5,46.9888207 260.276142,47.2126783 260,47.2126783 C259.723858,47.2126783 259.5,46.9888207 259.5,46.7126783 L259.5,45.5126783 C259.5,45.236536 259.723858,45.0126783 260,45.0126783 C260.276142,45.0126783 260.5,45.236536 260.5,45.5126783 Z M0.5,42.261965 L0.5,43.461965 C0.5,43.7381074 0.276142375,43.961965 0,43.961965 C-0.276142375,43.961965 -0.5,43.7381074 -0.5,43.461965 L-0.5,42.261965 C-0.5,41.9858226 -0.276142375,41.761965 0,41.761965 C0.276142375,41.761965 0.5,41.9858226 0.5,42.261965 Z M260.5,41.3126783 L260.5,42.5126783 C260.5,42.7888207 260.276142,43.0126783 260,43.0126783 C259.723858,43.0126783 259.5,42.7888207 259.5,42.5126783 L259.5,41.3126783 C259.5,41.036536 259.723858,40.8126783 260,40.8126783 C260.276142,40.8126783 260.5,41.036536 260.5,41.3126783 Z M0.5,38.061965 L0.5,39.261965 C0.5,39.5381074 0.276142375,39.761965 0,39.761965 C-0.276142375,39.761965 -0.5,39.5381074 -0.5,39.261965 L-0.5,38.061965 C-0.5,37.7858226 -0.276142375,37.561965 0,37.561965 C0.276142375,37.561965 0.5,37.7858226 0.5,38.061965 Z M260.5,37.1126783 L260.5,38.3126783 C260.5,38.5888207 260.276142,38.8126783 260,38.8126783 C259.723858,38.8126783 259.5,38.5888207 259.5,38.3126783 L259.5,37.1126783 C259.5,36.836536 259.723858,36.6126783 260,36.6126783 C260.276142,36.6126783 260.5,36.836536 260.5,37.1126783 Z M0.5,33.861965 L0.5,35.061965 C0.5,35.3381074 0.276142375,35.561965 0,35.561965 C-0.276142375,35.561965 -0.5,35.3381074 -0.5,35.061965 L-0.5,33.861965 C-0.5,33.5858226 -0.276142375,33.361965 0,33.361965 C0.276142375,33.361965 0.5,33.5858226 0.5,33.861965 Z M260.5,32.9126783 L260.5,34.1126783 C260.5,34.3888207 260.276142,34.6126783 260,34.6126783 C259.723858,34.6126783 259.5,34.3888207 259.5,34.1126783 L259.5,32.9126783 C259.5,32.636536 259.723858,32.4126783 260,32.4126783 C260.276142,32.4126783 260.5,32.636536 260.5,32.9126783 Z M0.5,29.661965 L0.5,30.861965 C0.5,31.1381074 0.276142375,31.361965 0,31.361965 C-0.276142375,31.361965 -0.5,31.1381074 -0.5,30.861965 L-0.5,29.661965 C-0.5,29.3858226 -0.276142375,29.161965 0,29.161965 C0.276142375,29.161965 0.5,29.3858226 0.5,29.661965 Z M260.5,28.7126783 L260.5,29.9126783 C260.5,30.1888207 260.276142,30.4126783 260,30.4126783 C259.723858,30.4126783 259.5,30.1888207 259.5,29.9126783 L259.5,28.7126783 C259.5,28.436536 259.723858,28.2126783 260,28.2126783 C260.276142,28.2126783 260.5,28.436536 260.5,28.7126783 Z M0.5,25.461965 L0.5,26.661965 C0.5,26.9381074 0.276142375,27.161965 0,27.161965 C-0.276142375,27.161965 -0.5,26.9381074 -0.5,26.661965 L-0.5,25.461965 C-0.5,25.1858226 -0.276142375,24.961965 0,24.961965 C0.276142375,24.961965 0.5,25.1858226 0.5,25.461965 Z M260.5,24.5126783 L260.5,25.7126783 C260.5,25.9888207 260.276142,26.2126783 260,26.2126783 C259.723858,26.2126783 259.5,25.9888207 259.5,25.7126783 L259.5,24.5126783 C259.5,24.236536 259.723858,24.0126783 260,24.0126783 C260.276142,24.0126783 260.5,24.236536 260.5,24.5126783 Z M0.5,21.261965 L0.5,22.461965 C0.5,22.7381074 0.276142375,22.961965 0,22.961965 C-0.276142375,22.961965 -0.5,22.7381074 -0.5,22.461965 L-0.5,21.261965 C-0.5,20.9858226 -0.276142375,20.761965 0,20.761965 C0.276142375,20.761965 0.5,20.9858226 0.5,21.261965 Z M260.5,20.3126783 L260.5,21.5126783 C260.5,21.7888207 260.276142,22.0126783 260,22.0126783 C259.723858,22.0126783 259.5,21.7888207 259.5,21.5126783 L259.5,20.3126783 C259.5,20.036536 259.723858,19.8126783 260,19.8126783 C260.276142,19.8126783 260.5,20.036536 260.5,20.3126783 Z M0.5,17.061965 L0.5,18.261965 C0.5,18.5381074 0.276142375,18.761965 0,18.761965 C-0.276142375,18.761965 -0.5,18.5381074 -0.5,18.261965 L-0.5,17.061965 C-0.5,16.7858226 -0.276142375,16.561965 0,16.561965 C0.276142375,16.561965 0.5,16.7858226 0.5,17.061965 Z M260.5,16.1126783 L260.5,17.3126783 C260.5,17.5888207 260.276142,17.8126783 260,17.8126783 C259.723858,17.8126783 259.5,17.5888207 259.5,17.3126783 L259.5,16.1126783 C259.5,15.836536 259.723858,15.6126783 260,15.6126783 C260.276142,15.6126783 260.5,15.836536 260.5,16.1126783 Z M0.5,12.861965 L0.5,14.061965 C0.5,14.3381074 0.276142375,14.561965 0,14.561965 C-0.276142375,14.561965 -0.5,14.3381074 -0.5,14.061965 L-0.5,12.861965 C-0.5,12.5858226 -0.276142375,12.361965 0,12.361965 C0.276142375,12.361965 0.5,12.5858226 0.5,12.861965 Z M260.5,11.9126783 L260.5,13.1126783 C260.5,13.3888207 260.276142,13.6126783 260,13.6126783 C259.723858,13.6126783 259.5,13.3888207 259.5,13.1126783 L259.5,11.9126783 C259.5,11.636536 259.723858,11.4126783 260,11.4126783 C260.276142,11.4126783 260.5,11.636536 260.5,11.9126783 Z M0.5,8.66196502 L0.5,9.86196502 C0.5,10.1381074 0.276142375,10.361965 0,10.361965 C-0.276142375,10.361965 -0.5,10.1381074 -0.5,9.86196502 L-0.5,8.66196502 C-0.5,8.38582265 -0.276142375,8.16196502 0,8.16196502 C0.276142375,8.16196502 0.5,8.38582265 0.5,8.66196502 Z M260.494418,7.68921224 C260.498066,7.79062789 260.499929,7.89227575 260.5,7.99446217 L260.5,8.91267731 C260.5,9.18881968 260.276143,9.41267833 260.000001,9.41267833 C259.723859,9.41267833 259.500001,9.18882172 259.5,8.91267935 L259.499998,7.99480185 C259.499937,7.90471609 259.49829,7.81482938 259.495064,7.72516749 C259.485135,7.44920367 259.700799,7.21744193 259.976763,7.20751317 C260.252727,7.1975844 260.484489,7.41324842 260.494418,7.68921224 Z M0.986610647,4.11583191 C1.23597179,4.23446426 1.34194842,4.53278192 1.22331607,4.78214306 C1.06222828,5.12074411 0.926750683,5.4712836 0.818218193,5.83120237 C0.738494071,6.09558595 0.459539664,6.24528201 0.195156088,6.16555789 C-0.0692274888,6.08583377 -0.218923554,5.80687936 -0.139199432,5.54249579 C-0.0160787008,5.13419905 0.13759981,4.73656586 0.320299502,4.35253734 C0.438931846,4.1031762 0.737249507,3.99719957 0.986610647,4.11583191 Z M259.201553,3.48274737 C259.427969,3.84299185 259.627119,4.2198564 259.797096,4.61033798 C259.907312,4.86353175 259.791405,5.15813342 259.538212,5.26834922 C259.285018,5.37856502 258.990416,5.26265847 258.8802,5.00946469 C258.730302,4.66511031 258.554643,4.33269867 258.354891,4.01487881 C258.207947,3.78107962 258.278357,3.47242625 258.512156,3.32548221 C258.745956,3.17853817 259.054609,3.24894818 259.201553,3.48274737 Z M3.91062135,1.10459525 C4.06622605,1.33272194 4.00743536,1.6437977 3.77930867,1.7994024 C3.46903131,2.01104188 3.17490728,2.2457359 2.89951969,2.50130689 C2.69711083,2.68915061 2.3807486,2.67734324 2.19290488,2.47493438 C2.00506115,2.27252553 2.01686852,1.9561633 2.21927738,1.76831957 C2.53117247,1.47886812 2.86431697,1.21303803 3.2158142,0.973282564 C3.44394089,0.817677864 3.75501665,0.876468555 3.91062135,1.10459525 Z M255.922472,0.457306874 C256.299878,0.653949266 256.661908,0.878721049 257.00557,1.12950333 C257.228635,1.29228151 257.277507,1.60506899 257.114729,1.82813373 C256.951951,2.05119846 256.639163,2.10007038 256.416098,1.9372922 C256.112796,1.71596195 255.793344,1.51762524 255.460396,1.34414712 C255.215502,1.21654831 255.120415,0.914583085 255.248014,0.669688914 C255.375613,0.424794743 255.677578,0.329708065 255.922472,0.457306874 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.91420864,0.5 7.83346384,0.478393078 7.76294433,0.440266681 C7.69492345,0.482649328 7.61568448,0.509179032 7.53008021,0.51446587 C7.15305784,0.537750414 6.78082077,0.589041093 6.4155231,0.667578396 C6.14554973,0.725621416 5.87963991,0.553817925 5.82159689,0.283844558 C5.76355387,0.0138711903 5.93535736,-0.252038631 6.20533073,-0.310081651 C6.61949823,-0.39912574 7.04139004,-0.457258364 7.46843861,-0.483632478 C7.56517969,-0.489607115 7.65719159,-0.46765798 7.736331,-0.424258653 C7.81263294,-0.47243373 7.90309725,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z M252.847959,-0.458196419 C253.122755,-0.430960125 253.323442,-0.186114641 253.296205,0.0886812751 C253.268969,0.363477191 253.024123,0.564163916 252.749328,0.536927622 C252.501532,0.51236745 252.251563,0.5 252,0.5 L251.6,0.5 C251.323858,0.5 251.1,0.276142375 251.1,0 C251.1,-0.276142375 251.323858,-0.5 251.6,-0.5 L252,-0.5 C252.284514,-0.5 252.5674,-0.486003939 252.847959,-0.458196419 Z M135.7,0 C135.7,0.276142375 135.476142,0.5 135.2,0.5 L134,0.5 C133.723858,0.5 133.5,0.276142375 133.5,0 C133.5,-0.276142375 133.723858,-0.5 134,-0.5 L135.2,-0.5 C135.476142,-0.5 135.7,-0.276142375 135.7,0 Z M240.7,0 C240.7,0.276142375 240.476142,0.5 240.2,0.5 L239,0.5 C238.723858,0.5 238.5,0.276142375 238.5,0 C238.5,-0.276142375 238.723858,-0.5 239,-0.5 L240.2,-0.5 C240.476142,-0.5 240.7,-0.276142375 240.7,0 Z M236.5,0 C236.5,0.276142375 236.276142,0.5 236,0.5 L234.8,0.5 C234.523858,0.5 234.3,0.276142375 234.3,0 C234.3,-0.276142375 234.523858,-0.5 234.8,-0.5 L236,-0.5 C236.276142,-0.5 236.5,-0.276142375 236.5,0 Z M232.3,0 C232.3,0.276142375 232.076142,0.5 231.8,0.5 L230.6,0.5 C230.323858,0.5 230.1,0.276142375 230.1,0 C230.1,-0.276142375 230.323858,-0.5 230.6,-0.5 L231.8,-0.5 C232.076142,-0.5 232.3,-0.276142375 232.3,0 Z M228.1,0 C228.1,0.276142375 227.876142,0.5 227.6,0.5 L226.4,0.5 C226.123858,0.5 225.9,0.276142375 225.9,0 C225.9,-0.276142375 226.123858,-0.5 226.4,-0.5 L227.6,-0.5 C227.876142,-0.5 228.1,-0.276142375 228.1,0 Z M223.9,0 C223.9,0.276142375 223.676142,0.5 223.4,0.5 L222.2,0.5 C221.923858,0.5 221.7,0.276142375 221.7,0 C221.7,-0.276142375 221.923858,-0.5 222.2,-0.5 L223.4,-0.5 C223.676142,-0.5 223.9,-0.276142375 223.9,0 Z M219.7,0 C219.7,0.276142375 219.476142,0.5 219.2,0.5 L218,0.5 C217.723858,0.5 217.5,0.276142375 217.5,0 C217.5,-0.276142375 217.723858,-0.5 218,-0.5 L219.2,-0.5 C219.476142,-0.5 219.7,-0.276142375 219.7,0 Z M215.5,0 C215.5,0.276142375 215.276142,0.5 215,0.5 L213.8,0.5 C213.523858,0.5 213.3,0.276142375 213.3,0 C213.3,-0.276142375 213.523858,-0.5 213.8,-0.5 L215,-0.5 C215.276142,-0.5 215.5,-0.276142375 215.5,0 Z M211.3,0 C211.3,0.276142375 211.076142,0.5 210.8,0.5 L209.6,0.5 C209.323858,0.5 209.1,0.276142375 209.1,0 C209.1,-0.276142375 209.323858,-0.5 209.6,-0.5 L210.8,-0.5 C211.076142,-0.5 211.3,-0.276142375 211.3,0 Z M207.1,0 C207.1,0.276142375 206.876142,0.5 206.6,0.5 L205.4,0.5 C205.123858,0.5 204.9,0.276142375 204.9,0 C204.9,-0.276142375 205.123858,-0.5 205.4,-0.5 L206.6,-0.5 C206.876142,-0.5 207.1,-0.276142375 207.1,0 Z M202.9,0 C202.9,0.276142375 202.676142,0.5 202.4,0.5 L201.2,0.5 C200.923858,0.5 200.7,0.276142375 200.7,0 C200.7,-0.276142375 200.923858,-0.5 201.2,-0.5 L202.4,-0.5 C202.676142,-0.5 202.9,-0.276142375 202.9,0 Z M198.7,0 C198.7,0.276142375 198.476142,0.5 198.2,0.5 L197,0.5 C196.723858,0.5 196.5,0.276142375 196.5,0 C196.5,-0.276142375 196.723858,-0.5 197,-0.5 L198.2,-0.5 C198.476142,-0.5 198.7,-0.276142375 198.7,0 Z M194.5,0 C194.5,0.276142375 194.276142,0.5 194,0.5 L192.8,0.5 C192.523858,0.5 192.3,0.276142375 192.3,0 C192.3,-0.276142375 192.523858,-0.5 192.8,-0.5 L194,-0.5 C194.276142,-0.5 194.5,-0.276142375 194.5,0 Z M190.3,0 C190.3,0.276142375 190.076142,0.5 189.8,0.5 L188.6,0.5 C188.323858,0.5 188.1,0.276142375 188.1,0 C188.1,-0.276142375 188.323858,-0.5 188.6,-0.5 L189.8,-0.5 C190.076142,-0.5 190.3,-0.276142375 190.3,0 Z M186.1,0 C186.1,0.276142375 185.876142,0.5 185.6,0.5 L184.4,0.5 C184.123858,0.5 183.9,0.276142375 183.9,0 C183.9,-0.276142375 184.123858,-0.5 184.4,-0.5 L185.6,-0.5 C185.876142,-0.5 186.1,-0.276142375 186.1,0 Z M181.9,0 C181.9,0.276142375 181.676142,0.5 181.4,0.5 L180.2,0.5 C179.923858,0.5 179.7,0.276142375 179.7,0 C179.7,-0.276142375 179.923858,-0.5 180.2,-0.5 L181.4,-0.5 C181.676142,-0.5 181.9,-0.276142375 181.9,0 Z M177.7,0 C177.7,0.276142375 177.476142,0.5 177.2,0.5 L176,0.5 C175.723858,0.5 175.5,0.276142375 175.5,0 C175.5,-0.276142375 175.723858,-0.5 176,-0.5 L177.2,-0.5 C177.476142,-0.5 177.7,-0.276142375 177.7,0 Z M173.5,0 C173.5,0.276142375 173.276142,0.5 173,0.5 L171.8,0.5 C171.523858,0.5 171.3,0.276142375 171.3,0 C171.3,-0.276142375 171.523858,-0.5 171.8,-0.5 L173,-0.5 C173.276142,-0.5 173.5,-0.276142375 173.5,0 Z M169.3,0 C169.3,0.276142375 169.076142,0.5 168.8,0.5 L167.6,0.5 C167.323858,0.5 167.1,0.276142375 167.1,0 C167.1,-0.276142375 167.323858,-0.5 167.6,-0.5 L168.8,-0.5 C169.076142,-0.5 169.3,-0.276142375 169.3,0 Z M165.1,0 C165.1,0.276142375 164.876142,0.5 164.6,0.5 L163.4,0.5 C163.123858,0.5 162.9,0.276142375 162.9,0 C162.9,-0.276142375 163.123858,-0.5 163.4,-0.5 L164.6,-0.5 C164.876142,-0.5 165.1,-0.276142375 165.1,0 Z M160.9,0 C160.9,0.276142375 160.676142,0.5 160.4,0.5 L159.2,0.5 C158.923858,0.5 158.7,0.276142375 158.7,0 C158.7,-0.276142375 158.923858,-0.5 159.2,-0.5 L160.4,-0.5 C160.676142,-0.5 160.9,-0.276142375 160.9,0 Z M156.7,0 C156.7,0.276142375 156.476142,0.5 156.2,0.5 L155,0.5 C154.723858,0.5 154.5,0.276142375 154.5,0 C154.5,-0.276142375 154.723858,-0.5 155,-0.5 L156.2,-0.5 C156.476142,-0.5 156.7,-0.276142375 156.7,0 Z M152.5,0 C152.5,0.276142375 152.276142,0.5 152,0.5 L150.8,0.5 C150.523858,0.5 150.3,0.276142375 150.3,0 C150.3,-0.276142375 150.523858,-0.5 150.8,-0.5 L152,-0.5 C152.276142,-0.5 152.5,-0.276142375 152.5,0 Z M148.3,0 C148.3,0.276142375 148.076142,0.5 147.8,0.5 L146.6,0.5 C146.323858,0.5 146.1,0.276142375 146.1,0 C146.1,-0.276142375 146.323858,-0.5 146.6,-0.5 L147.8,-0.5 C148.076142,-0.5 148.3,-0.276142375 148.3,0 Z M144.1,0 C144.1,0.276142375 143.876142,0.5 143.6,0.5 L142.4,0.5 C142.123858,0.5 141.9,0.276142375 141.9,0 C141.9,-0.276142375 142.123858,-0.5 142.4,-0.5 L143.6,-0.5 C143.876142,-0.5 144.1,-0.276142375 144.1,0 Z M139.9,0 C139.9,0.276142375 139.676142,0.5 139.4,0.5 L138.2,0.5 C137.923858,0.5 137.7,0.276142375 137.7,0 C137.7,-0.276142375 137.923858,-0.5 138.2,-0.5 L139.4,-0.5 C139.676142,-0.5 139.9,-0.276142375 139.9,0 Z M249.1,0 C249.1,0.276142375 248.876142,0.5 248.6,0.5 L247.4,0.5 C247.123858,0.5 246.9,0.276142375 246.9,0 C246.9,-0.276142375 247.123858,-0.5 247.4,-0.5 L248.6,-0.5 C248.876142,-0.5 249.1,-0.276142375 249.1,0 Z M131.5,0 C131.5,0.276142375 131.276142,0.5 131,0.5 L129.8,0.5 C129.523858,0.5 129.3,0.276142375 129.3,0 C129.3,-0.276142375 129.523858,-0.5 129.8,-0.5 L131,-0.5 C131.276142,-0.5 131.5,-0.276142375 131.5,0 Z M127.3,0 C127.3,0.276142375 127.076142,0.5 126.8,0.5 L125.6,0.5 C125.323858,0.5 125.1,0.276142375 125.1,0 C125.1,-0.276142375 125.323858,-0.5 125.6,-0.5 L126.8,-0.5 C127.076142,-0.5 127.3,-0.276142375 127.3,0 Z M123.1,0 C123.1,0.276142375 122.876142,0.5 122.6,0.5 L121.4,0.5 C121.123858,0.5 120.9,0.276142375 120.9,0 C120.9,-0.276142375 121.123858,-0.5 121.4,-0.5 L122.6,-0.5 C122.876142,-0.5 123.1,-0.276142375 123.1,0 Z M118.9,0 C118.9,0.276142375 118.676142,0.5 118.4,0.5 L117.2,0.5 C116.923858,0.5 116.7,0.276142375 116.7,0 C116.7,-0.276142375 116.923858,-0.5 117.2,-0.5 L118.4,-0.5 C118.676142,-0.5 118.9,-0.276142375 118.9,0 Z M114.7,0 C114.7,0.276142375 114.476142,0.5 114.2,0.5 L113,0.5 C112.723858,0.5 112.5,0.276142375 112.5,0 C112.5,-0.276142375 112.723858,-0.5 113,-0.5 L114.2,-0.5 C114.476142,-0.5 114.7,-0.276142375 114.7,0 Z M110.5,0 C110.5,0.276142375 110.276142,0.5 110,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276142,-0.5 110.5,-0.276142375 110.5,0 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M244.9,0 C244.9,0.276142375 244.676142,0.5 244.4,0.5 L243.2,0.5 C242.923858,0.5 242.7,0.276142375 242.7,0 C242.7,-0.276142375 242.923858,-0.5 243.2,-0.5 L244.4,-0.5 C244.676142,-0.5 244.9,-0.276142375 244.9,0 Z" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="vLLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(31.5, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5-l" transform="translate(0, 138)">
+                        <path d="M80,-0.277777778 L82.5,4.72222222 L80.5,4.72222222 L80.5,20 C80.5,20.2761424 80.2761424,20.5 80,20.5 C79.7238576,20.5 79.5,20.2761424 79.5,20 L79.5,4.72222222 L77.5,4.72222222 L80,-0.277777778 Z M33,-0.277777778 L35.5,4.72222222 L33.5,4.72222222 L33.5,20 C33.5,20.2761424 33.2761424,20.5 33,20.5 C32.7238576,20.5 32.5,20.2761424 32.5,20 L32.5,4.72222222 L30.5,4.72222222 L33,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L5-r" transform="translate(116, 138)">
+                        <path d="M80.5,-0.277777778 L83,4.72222222 L81,4.72222222 L81,20 C81,20.2761424 80.7761424,20.5 80.5,20.5 C80.2238576,20.5 80,20.2761424 80,20 L80,4.72222222 L78,4.72222222 L80.5,-0.277777778 Z M34.5,-0.277777778 L37,4.72222222 L35,4.72222222 L35,20 C35,20.2761424 34.7761424,20.5 34.5,20.5 C34.2238576,20.5 34,20.2761424 34,20 L34,4.72222222 L32,4.72222222 L34.5,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0, 110)">
+                        <path d="M196.499999,-0.277777778 L198.999999,4.72222222 L196.998999,4.72222222 L196.999999,16 C196.999999,16.2761424 196.776141,16.5 196.499999,16.5 C196.223856,16.5 195.999999,16.2761424 195.999999,16 L195.998999,4.72222222 L193.999999,4.72222222 L196.499999,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M130.499999,-0.277777778 L132.999999,4.72222222 L130.998999,4.72222222 L130.999999,16 C130.999999,16.2761424 130.776141,16.5 130.499999,16.5 C130.223856,16.5 129.999999,16.2761424 129.999999,16 L129.998999,4.72222222 L127.999999,4.72222222 L130.499999,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M65.4999985,-0.277777778 L67.9999985,4.72222222 L65.9989985,4.72222222 L65.9999985,16 C65.9999985,16.2761424 65.7761409,16.5 65.4999985,16.5 C65.2238561,16.5 64.9999985,16.2761424 64.9999985,16 L64.9989985,4.72222222 L62.9999985,4.72222222 L65.4999985,-0.277777778 Z" id="Combined-Shape"></path>
+                        <path d="M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M65.4999985,-0.277777778 L67.9999985,4.72222222 L65.9999985,4.72222222 L65.9999985,16 C65.9999985,16.2761424 65.7761409,16.5 65.4999985,16.5 C65.2238561,16.5 64.9999985,16.2761424 64.9999985,16 L64.9999985,4.72222222 L62.9999985,4.72222222 L65.4999985,-0.277777778 Z M130.499999,-0.277777778 L132.999999,4.72222222 L130.999999,4.72222222 L130.999999,16 C130.999999,16.2761424 130.776141,16.5 130.499999,16.5 C130.223856,16.5 129.999999,16.2761424 129.999999,16 L129.999999,4.72222222 L127.999999,4.72222222 L130.499999,-0.277777778 Z M196.499999,-0.277777778 L198.999999,4.72222222 L196.999999,4.72222222 L196.999999,16 C196.999999,16.2761424 196.776141,16.5 196.499999,16.5 C196.223856,16.5 195.999999,16.2761424 195.999999,16 L195.999999,4.72222222 L193.999999,4.72222222 L196.499999,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0, 54)">
+                        <path d="M65.4999985,-0.277777778 L67.9999985,4.72222222 L65.9999985,4.72222222 L65.9999985,16 C65.9999985,16.2761424 65.7761409,16.5 65.4999985,16.5 C65.2238561,16.5 64.9999985,16.2761424 64.9999985,16 L64.9999985,4.72222222 L62.9999985,4.72222222 L65.4999985,-0.277777778 Z M130.499999,-0.277777778 L132.999999,4.72222222 L130.999999,4.72222222 L130.999999,16 C130.999999,16.2761424 130.776141,16.5 130.499999,16.5 C130.223856,16.5 129.999999,16.2761424 129.999999,16 L129.999999,4.72222222 L127.999999,4.72222222 L130.499999,-0.277777778 Z M196.499999,-0.277777778 L198.999999,4.72222222 L196.999999,4.72222222 L196.999999,16 C196.999999,16.2761424 196.776141,16.5 196.499999,16.5 C196.223856,16.5 195.999999,16.2761424 195.999999,16 L195.999999,4.72222222 L193.999999,4.72222222 L196.499999,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(196, 2)">
+                        <path d="M-130.5,0.722222222 L-128,5.72222222 L-130.001,5.72222222 L-130,42 C-130,42.2761424 -130.223858,42.5 -130.5,42.5 C-130.776142,42.5 -131,42.2761424 -131,42 L-131.001,5.72222222 L-133,5.72222222 L-130.5,0.722222222 Z" id="Combined-Shape"></path>
+                        <path d="M-195.5,0.722222222 L-193,5.72222222 L-195.001,5.72222222 L-195,42 C-195,42.2761424 -195.223858,42.5 -195.5,42.5 C-195.776142,42.5 -196,42.2761424 -196,42 L-196.001,5.72222222 L-198,5.72222222 L-195.5,0.722222222 Z" id="Combined-Shape"></path>
+                        <path d="M-65.5,0.722222222 L-63,5.72222222 L-65.001,5.72222222 L-65,42 C-65,42.2761424 -65.2238576,42.5 -65.5,42.5 C-65.7761424,42.5 -66,42.2761424 -66,42 L-66.001,5.72222222 L-68,5.72222222 L-65.5,0.722222222 Z" id="Combined-Shape"></path>
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="Row-Bottom" transform="translate(20, 158)">
+                    <g id="yellow-row" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" x="34" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M55,0 C56.6568542,0 58,1.34314575 58,3 L58,9 C58,10.6568542 56.6568542,12 55,12 L37,12 C35.3431458,12 34,10.6568542 34,9 L34,3 C34,1.34314575 35.3431458,0 37,0 L55,0 Z M55,1 L37,1 L36.7955116,1.0103258 C35.7869995,1.11274576 35,1.96446609 35,3 L35,9 C35,10.1045695 35.8954305,11 37,11 L55,11 C56.1045695,11 57,10.1045695 57,9 L57,3 C57,1.8954305 56.1045695,1 55,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" x="80" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M101,0 C102.656854,0 104,1.34314575 104,3 L104,9 C104,10.6568542 102.656854,12 101,12 L83,12 C81.3431458,12 80,10.6568542 80,9 L80,3 C80,1.34314575 81.3431458,0 83,0 L101,0 Z M101,1 L83,1 L82.7955116,1.0103258 C81.7869995,1.11274576 81,1.96446609 81,3 L81,9 C81,10.1045695 81.8954305,11 83,11 L101,11 C102.104569,11 103,10.1045695 103,9 L103,3 C103,1.8954305 102.104569,1 101,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(64.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                    <g id="blue-row" transform="translate(116, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" x="34" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M55,0 C56.6568542,0 58,1.34314575 58,3 L58,9 C58,10.6568542 56.6568542,12 55,12 L37,12 C35.3431458,12 34,10.6568542 34,9 L34,3 C34,1.34314575 35.3431458,0 37,0 L55,0 Z M55,1 L37,1 L36.7955116,1.0103258 C35.7869995,1.11274576 35,1.96446609 35,3 L35,9 C35,10.1045695 35.8954305,11 37,11 L55,11 C56.1045695,11 57,10.1045695 57,9 L57,3 C57,1.8954305 56.1045695,1 55,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" x="80" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M101,0 C102.656854,0 104,1.34314575 104,3 L104,9 C104,10.6568542 102.656854,12 101,12 L83,12 C81.3431458,12 80,10.6568542 80,9 L80,3 C80,1.34314575 81.3431458,0 83,0 L101,0 Z M101,1 L83,1 L82.7955116,1.0103258 C81.7869995,1.11274576 81,1.96446609 81,3 L81,9 C81,10.1045695 81.8954305,11 83,11 L101,11 C102.104569,11 103,10.1045695 103,9 L103,3 C103,1.8954305 102.104569,1 101,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(180.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+                <g id="L1" transform="translate(20, 42)">
+                    <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="220" height="12" rx="3"></rect>
+                    <path d="M217,0 C218.656854,0 220,1.34314575 220,3 L220,9 C220,10.6568542 218.656854,12 217,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L217,0 Z M217,1 L3,1 L2.79551167,1.01032579 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L217,11 C218.104569,11 219,10.1045695 219,9 L219,3 C219,1.8954305 218.104569,1 217,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(20, 70)">
+                    <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="220" height="12" rx="3"></rect>
+                    <path d="M217,0 C218.656854,0 220,1.34314575 220,3 L220,9 C220,10.6568542 218.656854,12 217,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L217,0 Z M217,1 L3,1 L2.79551167,1.01032579 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L217,11 C218.104569,11 219,10.1045695 219,9 L219,3 C219,1.8954305 218.104569,1 217,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(20, 98)">
+                    <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="220" height="12" rx="3"></rect>
+                    <path d="M217,0 C218.656854,0 220,1.34314575 220,3 L220,9 C220,10.6568542 218.656854,12 217,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L217,0 Z M217,1 L3,1 L2.79551167,1.01032579 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L217,11 C218.104569,11 219,10.1045695 219,9 L219,3 C219,1.8954305 218.104569,1 217,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                </g>
+                <g id="L4" transform="translate(20, 126)">
+                    <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="220" height="12" rx="3"></rect>
+                    <path d="M217,0 C218.656854,0 220,1.34314575 220,3 L220,9 C220,10.6568542 218.656854,12 217,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L217,0 Z M217,1 L3,1 L2.79551167,1.01032579 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L217,11 C218.104569,11 219,10.1045695 219,9 L219,3 C219,1.8954305 218.104569,1 217,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="pooling" transform="translate(0, 80)">
+                <g id="dash-box-@purple" fill="#9172E2">
+                    <path d="M8,0 L272,0 C276.418278,0 280,3.581722 280,8 L280,74 C280,78.418278 276.418278,82 272,82 L8,82 C3.581722,82 0,78.418278 0,74 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill-opacity="0.1"></path>
+                    <path d="M10.9746434,80.9999997 L10.974,81.9999997 L8.974,81.9999997 L8.97464335,80.9999997 L10.9746434,80.9999997 Z M14.9746436,80.9999997 L14.974,81.9999997 L12.974,81.9999997 L12.9746433,80.9999997 L14.9746436,80.9999997 Z M18.9746435,80.9999997 L18.974,81.9999997 L16.974,81.9999997 L16.9746433,80.9999997 L18.9746435,80.9999997 Z M22.9746434,80.9999997 L22.974,81.9999997 L20.974,81.9999997 L20.9746433,80.9999997 L22.9746434,80.9999997 Z M26.9746436,80.9999997 L26.974,81.9999997 L24.974,81.9999997 L24.9746433,80.9999997 L26.9746436,80.9999997 Z M30.9746435,80.9999997 L30.974,81.9999997 L28.974,81.9999997 L28.9746433,80.9999997 L30.9746435,80.9999997 Z M34.9746434,80.9999997 L34.974,81.9999997 L32.974,81.9999997 L32.9746433,80.9999997 L34.9746434,80.9999997 Z M38.9746434,80.9999997 L38.974,81.9999997 L36.974,81.9999997 L36.9746433,80.9999997 L38.9746434,80.9999997 Z M42.9746435,80.9999997 L42.974,81.9999997 L40.974,81.9999997 L40.9746433,80.9999997 L42.9746435,80.9999997 Z M46.9746435,80.9999997 L46.974,81.9999997 L44.974,81.9999997 L44.9746433,80.9999997 L46.9746435,80.9999997 Z M50.9746434,80.9999997 L50.974,81.9999997 L48.974,81.9999997 L48.9746433,80.9999997 L50.9746434,80.9999997 Z M54.9746436,80.9999997 L54.974,81.9999997 L52.974,81.9999997 L52.9746433,80.9999997 L54.9746436,80.9999997 Z M58.9746435,80.9999997 L58.974,81.9999997 L56.974,81.9999997 L56.9746433,80.9999997 L58.9746435,80.9999997 Z M62.9746434,80.9999997 L62.974,81.9999997 L60.974,81.9999997 L60.9746433,80.9999997 L62.9746434,80.9999997 Z M66.9746436,80.9999997 L66.974,81.9999997 L64.974,81.9999997 L64.9746433,80.9999997 L66.9746436,80.9999997 Z M70.9746435,80.9999997 L70.974,81.9999997 L68.974,81.9999997 L68.9746433,80.9999997 L70.9746435,80.9999997 Z M74.9746434,80.9999997 L74.974,81.9999997 L72.974,81.9999997 L72.9746433,80.9999997 L74.9746434,80.9999997 Z M78.9746434,80.9999997 L78.974,81.9999997 L76.974,81.9999997 L76.9746433,80.9999997 L78.9746434,80.9999997 Z M82.9746435,80.9999997 L82.974,81.9999997 L80.974,81.9999997 L80.9746433,80.9999997 L82.9746435,80.9999997 Z M86.9746435,80.9999997 L86.974,81.9999997 L84.974,81.9999997 L84.9746433,80.9999997 L86.9746435,80.9999997 Z M90.9746434,80.9999997 L90.974,81.9999997 L88.974,81.9999997 L88.9746433,80.9999997 L90.9746434,80.9999997 Z M94.9746436,80.9999997 L94.974,81.9999997 L92.974,81.9999997 L92.9746433,80.9999997 L94.9746436,80.9999997 Z M98.9746435,80.9999997 L98.974,81.9999997 L96.974,81.9999997 L96.9746433,80.9999997 L98.9746435,80.9999997 Z M102.974643,80.9999997 L102.974,81.9999997 L100.974,81.9999997 L100.974643,80.9999997 L102.974643,80.9999997 Z M106.974644,80.9999997 L106.974,81.9999997 L104.974,81.9999997 L104.974643,80.9999997 L106.974644,80.9999997 Z M110.974644,80.9999997 L110.974,81.9999997 L108.974,81.9999997 L108.974643,80.9999997 L110.974644,80.9999997 Z M114.974643,80.9999997 L114.974,81.9999997 L112.974,81.9999997 L112.974643,80.9999997 L114.974643,80.9999997 Z M118.974643,80.9999997 L118.974,81.9999997 L116.974,81.9999997 L116.974643,80.9999997 L118.974643,80.9999997 Z M122.974644,80.9999997 L122.974,81.9999997 L120.974,81.9999997 L120.974643,80.9999997 L122.974644,80.9999997 Z M126.974643,80.9999997 L126.974,81.9999997 L124.974,81.9999997 L124.974643,80.9999997 L126.974643,80.9999997 Z M130.974643,80.9999997 L130.974,81.9999997 L128.974,81.9999997 L128.974643,80.9999997 L130.974643,80.9999997 Z M134.974644,80.9999997 L134.974,81.9999997 L132.974,81.9999997 L132.974643,80.9999997 L134.974644,80.9999997 Z M138.974644,80.9999997 L138.974,81.9999997 L136.974,81.9999997 L136.974643,80.9999997 L138.974644,80.9999997 Z M142.974643,80.9999997 L142.974,81.9999997 L140.974,81.9999997 L140.974643,80.9999997 L142.974643,80.9999997 Z M146.974643,80.9999997 L146.974,81.9999997 L144.974,81.9999997 L144.974643,80.9999997 L146.974643,80.9999997 Z M150.974643,80.9999997 L150.974,81.9999997 L148.974,81.9999997 L148.974643,80.9999997 L150.974643,80.9999997 Z M154.974643,80.9999997 L154.974,81.9999997 L152.974,81.9999997 L152.974643,80.9999997 L154.974643,80.9999997 Z M158.974643,80.9999997 L158.974,81.9999997 L156.974,81.9999997 L156.974643,80.9999997 L158.974643,80.9999997 Z M162.974643,80.9999997 L162.974,81.9999997 L160.974,81.9999997 L160.974643,80.9999997 L162.974643,80.9999997 Z M166.974643,80.9999997 L166.974,81.9999997 L164.974,81.9999997 L164.974643,80.9999997 L166.974643,80.9999997 Z M170.974643,80.9999997 L170.974,81.9999997 L168.974,81.9999997 L168.974643,80.9999997 L170.974643,80.9999997 Z M174.974643,80.9999997 L174.974,81.9999997 L172.974,81.9999997 L172.974643,80.9999997 L174.974643,80.9999997 Z M178.974643,80.9999997 L178.974,81.9999997 L176.974,81.9999997 L176.974643,80.9999997 L178.974643,80.9999997 Z M182.974643,80.9999997 L182.974,81.9999997 L180.974,81.9999997 L180.974643,80.9999997 L182.974643,80.9999997 Z M186.974643,80.9999997 L186.974,81.9999997 L184.974,81.9999997 L184.974643,80.9999997 L186.974643,80.9999997 Z M190.974643,80.9999997 L190.974,81.9999997 L188.974,81.9999997 L188.974643,80.9999997 L190.974643,80.9999997 Z M194.974643,80.9999997 L194.974,81.9999997 L192.974,81.9999997 L192.974643,80.9999997 L194.974643,80.9999997 Z M198.974643,80.9999997 L198.974,81.9999997 L196.974,81.9999997 L196.974643,80.9999997 L198.974643,80.9999997 Z M202.974643,80.9999997 L202.974,81.9999997 L200.974,81.9999997 L200.974643,80.9999997 L202.974643,80.9999997 Z M206.974643,80.9999997 L206.974,81.9999997 L204.974,81.9999997 L204.974643,80.9999997 L206.974643,80.9999997 Z M210.974643,80.9999997 L210.974,81.9999997 L208.974,81.9999997 L208.974643,80.9999997 L210.974643,80.9999997 Z M214.974643,80.9999997 L214.974,81.9999997 L212.974,81.9999997 L212.974643,80.9999997 L214.974643,80.9999997 Z M218.974643,80.9999997 L218.974,81.9999997 L216.974,81.9999997 L216.974643,80.9999997 L218.974643,80.9999997 Z M222.974643,80.9999997 L222.974,81.9999997 L220.974,81.9999997 L220.974643,80.9999997 L222.974643,80.9999997 Z M226.974643,80.9999997 L226.974,81.9999997 L224.974,81.9999997 L224.974643,80.9999997 L226.974643,80.9999997 Z M230.974643,80.9999997 L230.974,81.9999997 L228.974,81.9999997 L228.974643,80.9999997 L230.974643,80.9999997 Z M234.974643,80.9999997 L234.974,81.9999997 L232.974,81.9999997 L232.974643,80.9999997 L234.974643,80.9999997 Z M238.974643,80.9999997 L238.974,81.9999997 L236.974,81.9999997 L236.974643,80.9999997 L238.974643,80.9999997 Z M242.974643,80.9999997 L242.974,81.9999997 L240.974,81.9999997 L240.974643,80.9999997 L242.974643,80.9999997 Z M246.974643,80.9999997 L246.974,81.9999997 L244.974,81.9999997 L244.974643,80.9999997 L246.974643,80.9999997 Z M250.974643,80.9999997 L250.974,81.9999997 L248.974,81.9999997 L248.974643,80.9999997 L250.974643,80.9999997 Z M254.974643,80.9999997 L254.974,81.9999997 L252.974,81.9999997 L252.974643,80.9999997 L254.974643,80.9999997 Z M258.974643,80.9999997 L258.974,81.9999997 L256.974,81.9999997 L256.974643,80.9999997 L258.974643,80.9999997 Z M262.974643,80.9999997 L262.974,81.9999997 L260.974,81.9999997 L260.974643,80.9999997 L262.974643,80.9999997 Z M266.974643,80.9999997 L266.974,81.9999997 L264.974,81.9999997 L264.974643,80.9999997 L266.974643,80.9999997 Z M270.974643,80.9999997 L270.974,81.9999997 L268.974,81.9999997 L268.974643,80.9999997 L270.974643,80.9999997 Z M274.489743,80.544574 L274.845614,81.4791091 C274.230519,81.7132701 273.579078,81.8737323 272.902082,81.9497049 L272.79022,80.9559811 C273.375654,80.8903126 273.945502,80.7517531 274.489743,80.544574 Z M5.41029967,80.5056361 C5.95157753,80.7213183 6.51934132,80.8686603 7.10343083,80.9432289 L6.97697115,81.9352007 C6.30134705,81.8489616 5.65205616,81.6784493 5.03997173,81.4345371 L5.41029967,80.5056361 Z M277.388302,78.4686843 L278.157467,79.1077344 C277.730317,79.6220994 277.239943,80.0820804 276.698265,80.4757556 L276.11069,79.6665857 C276.584521,79.3221774 277.013992,78.9194515 277.388302,78.4686843 Z M2.56157145,78.4076246 C2.93082185,78.8625901 3.35581002,79.2701375 3.82581503,79.6198877 L3.22929764,80.422488 C2.69189028,80.0226419 2.20638657,79.5569385 1.78473163,79.0373231 L2.56157145,78.4076246 Z M278.876065,75.3192015 L279.858348,75.506607 C279.73129,76.1733384 279.521483,76.8107286 279.240021,77.407685 L278.335201,76.9818892 C278.584106,76.4541311 278.766097,75.896352 278.876065,75.3192015 Z M1.10775661,75.2312252 C1.21033515,75.8099081 1.38509885,76.3698753 1.62708778,76.9005768 L0.717340098,77.3157385 C0.443469481,76.7151789 0.241701076,76.07482 0.123099669,75.4057265 L1.10775661,75.2312252 Z M280,71.512 L280,73.512 L279,73.5126783 L279,71.5126783 L280,71.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M280,67.512 L280,69.512 L279,69.5126783 L279,67.5126783 L280,67.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M280,63.512 L280,65.512 L279,65.5126783 L279,63.5126783 L280,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M280,59.512 L280,61.512 L279,61.5126783 L279,59.5126783 L280,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M280,55.512 L280,57.512 L279,57.5126783 L279,55.5126783 L280,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M280,51.512 L280,53.512 L279,53.5126783 L279,51.5126783 L280,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M280,47.512 L280,49.512 L279,49.5126783 L279,47.5126783 L280,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M280,43.512 L280,45.512 L279,45.5126783 L279,43.5126783 L280,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M280,39.512 L280,41.512 L279,41.5126783 L279,39.5126783 L280,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M280,35.512 L280,37.512 L279,37.5126783 L279,35.5126783 L280,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M280,31.512 L280,33.512 L279,33.5126783 L279,31.5126783 L280,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M280,27.512 L280,29.512 L279,29.5126783 L279,27.5126783 L280,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M280,23.512 L280,25.512 L279,25.5126783 L279,23.5126783 L280,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M280,19.512 L280,21.512 L279,21.5126783 L279,19.5126783 L280,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M280,15.512 L280,17.512 L279,17.5126783 L279,15.5126783 L280,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M280,11.512 L280,13.512 L279,13.5126783 L279,11.5126783 L280,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M280,8 L280,9.512 L279,9.51268141 L278.999996,7.99230721 C278.999843,7.84931676 278.995414,7.70690426 278.986748,7.5651841 L279.984907,7.50453602 C279.99492,7.6684065 280,7.83361003 280,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M278.669225,5.86656128 C278.490852,5.30842466 278.242774,4.77610859 277.93156,4.28094453 L278.777667,3.74792999 C279.130817,4.30965578 279.416068,4.91838179 279.621909,5.56259574 L278.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M276.801538,2.90632361 C276.375276,2.5043409 275.900412,2.15691661 275.388372,1.8730526 L275.873899,0.998830409 C276.461218,1.32450134 277.00324,1.72192978 277.488127,2.17927796 L276.801538,2.90632361 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M272,0 C272.685315,0 273.350503,0.0861719866 273.985321,0.248271715 L273.737382,1.21704772 C273.174749,1.07342708 272.592644,1 272,1 L272,0 Z M10,0 L10,1 L8.00000022,1 L7.59669465,1.01139504 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M198,0 L198,1 L196,1 L196,0 L198,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z" id="dash" fill-rule="nonzero"></path>
+                </g>
+                <g id="dash-box-@grey" transform="translate(16.6, 45.6)">
+                    <path d="M4.40000021,0.4 L242.4,0.4 C244.056854,0.4 245.4,1.74314575 245.4,3.4 L245.4,23.4 C245.4,25.0568542 244.056854,26.4 242.4,26.4 L4.40000021,26.4 C2.74314596,26.4 1.40000021,25.0568542 1.40000021,23.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="bg" fill="#F7F7F7"></path>
+                    <path d="M228.384403,26.3999999 C228.384403,26.6209139 228.205316,26.8 227.984403,26.8 L226.984403,26.8 C226.763489,26.8 226.584403,26.6209139 226.584403,26.3999999 C226.584403,26.1790861 226.763489,26 226.984403,26 L227.984402,25.9999998 C228.205316,26 228.384403,26.1790861 228.384403,26.3999999 Z M241.184403,26.3999999 C241.184403,26.6209139 241.005316,26.8 240.784403,26.8 L239.784403,26.8 C239.563489,26.8 239.384403,26.6209139 239.384403,26.3999999 C239.384403,26.1790861 239.563489,26 239.784403,26 L240.784403,25.9999998 C241.005316,26 241.184403,26.1790861 241.184403,26.3999999 Z M7.58440258,26.3999999 C7.58440258,26.6209139 7.40531648,26.8 7.18440258,26.8 L6.18440258,26.8 C5.96348868,26.8 5.78440258,26.6209139 5.78440258,26.3999999 C5.78440258,26.1790861 5.96348868,26 6.18440258,26 L7.18440274,25.9999998 C7.40531648,26 7.58440258,26.1790861 7.58440258,26.3999999 Z M10.7844026,26.3999999 C10.7844026,26.6209139 10.6053165,26.8 10.3844026,26.8 L9.38440258,26.8 C9.16348868,26.8 8.98440258,26.6209139 8.98440258,26.3999999 C8.98440258,26.1790861 9.16348868,26 9.38440258,26 L10.3844027,25.9999998 C10.6053165,26 10.7844026,26.1790861 10.7844026,26.3999999 Z M13.9844026,26.3999999 C13.9844026,26.6209139 13.8053165,26.8 13.5844026,26.8 L12.5844026,26.8 C12.3634887,26.8 12.1844026,26.6209139 12.1844026,26.3999999 C12.1844026,26.1790861 12.3634887,26 12.5844026,26 L13.5844027,25.9999998 C13.8053165,26 13.9844026,26.1790861 13.9844026,26.3999999 Z M17.1844026,26.3999999 C17.1844026,26.6209139 17.0053165,26.8 16.7844026,26.8 L15.7844026,26.8 C15.5634887,26.8 15.3844026,26.6209139 15.3844026,26.3999999 C15.3844026,26.1790861 15.5634887,26 15.7844026,26 L16.7844026,25.9999998 C17.0053165,26 17.1844026,26.1790861 17.1844026,26.3999999 Z M231.584403,26.3999999 C231.584403,26.6209139 231.405316,26.8 231.184403,26.8 L230.184403,26.8 C229.963489,26.8 229.784403,26.6209139 229.784403,26.3999999 C229.784403,26.1790861 229.963489,26 230.184403,26 L231.184402,25.9999998 C231.405316,26 231.584403,26.1790861 231.584403,26.3999999 Z M23.5844026,26.3999999 C23.5844026,26.6209139 23.4053165,26.8 23.1844026,26.8 L22.1844026,26.8 C21.9634887,26.8 21.7844026,26.6209139 21.7844026,26.3999999 C21.7844026,26.1790861 21.9634887,26 22.1844026,26 L23.1844028,25.9999998 C23.4053165,26 23.5844026,26.1790861 23.5844026,26.3999999 Z M26.7844026,26.3999999 C26.7844026,26.6209139 26.6053165,26.8 26.3844026,26.8 L25.3844026,26.8 C25.1634887,26.8 24.9844026,26.6209139 24.9844026,26.3999999 C24.9844026,26.1790861 25.1634887,26 25.3844026,26 L26.3844027,25.9999998 C26.6053165,26 26.7844026,26.1790861 26.7844026,26.3999999 Z M29.9844026,26.3999999 C29.9844026,26.6209139 29.8053165,26.8 29.5844026,26.8 L28.5844026,26.8 C28.3634887,26.8 28.1844026,26.6209139 28.1844026,26.3999999 C28.1844026,26.1790861 28.3634887,26 28.5844026,26 L29.5844027,25.9999998 C29.8053165,26 29.9844026,26.1790861 29.9844026,26.3999999 Z M33.1844026,26.3999999 C33.1844026,26.6209139 33.0053165,26.8 32.7844026,26.8 L31.7844026,26.8 C31.5634887,26.8 31.3844026,26.6209139 31.3844026,26.3999999 C31.3844026,26.1790861 31.5634887,26 31.7844026,26 L32.7844026,25.9999998 C33.0053165,26 33.1844026,26.1790861 33.1844026,26.3999999 Z M36.3844026,26.3999999 C36.3844026,26.6209139 36.2053165,26.8 35.9844026,26.8 L34.9844026,26.8 C34.7634887,26.8 34.5844026,26.6209139 34.5844026,26.3999999 C34.5844026,26.1790861 34.7634887,26 34.9844026,26 L35.9844028,25.9999998 C36.2053165,26 36.3844026,26.1790861 36.3844026,26.3999999 Z M39.5844026,26.3999999 C39.5844026,26.6209139 39.4053165,26.8 39.1844026,26.8 L38.1844026,26.8 C37.9634887,26.8 37.7844026,26.6209139 37.7844026,26.3999999 C37.7844026,26.1790861 37.9634887,26 38.1844026,26 L39.1844028,25.9999998 C39.4053165,26 39.5844026,26.1790861 39.5844026,26.3999999 Z M42.7844026,26.3999999 C42.7844026,26.6209139 42.6053165,26.8 42.3844026,26.8 L41.3844026,26.8 C41.1634887,26.8 40.9844026,26.6209139 40.9844026,26.3999999 C40.9844026,26.1790861 41.1634887,26 41.3844026,26 L42.3844027,25.9999998 C42.6053165,26 42.7844026,26.1790861 42.7844026,26.3999999 Z M45.9844026,26.3999999 C45.9844026,26.6209139 45.8053165,26.8 45.5844026,26.8 L44.5844026,26.8 C44.3634887,26.8 44.1844026,26.6209139 44.1844026,26.3999999 C44.1844026,26.1790861 44.3634887,26 44.5844026,26 L45.5844027,25.9999998 C45.8053165,26 45.9844026,26.1790861 45.9844026,26.3999999 Z M49.1844026,26.3999999 C49.1844026,26.6209139 49.0053165,26.8 48.7844026,26.8 L47.7844026,26.8 C47.5634887,26.8 47.3844026,26.6209139 47.3844026,26.3999999 C47.3844026,26.1790861 47.5634887,26 47.7844026,26 L48.7844026,25.9999998 C49.0053165,26 49.1844026,26.1790861 49.1844026,26.3999999 Z M52.3844026,26.3999999 C52.3844026,26.6209139 52.2053165,26.8 51.9844026,26.8 L50.9844026,26.8 C50.7634887,26.8 50.5844026,26.6209139 50.5844026,26.3999999 C50.5844026,26.1790861 50.7634887,26 50.9844026,26 L51.9844026,25.9999998 C52.2053165,26 52.3844026,26.1790861 52.3844026,26.3999999 Z M55.5844026,26.3999999 C55.5844026,26.6209139 55.4053165,26.8 55.1844026,26.8 L54.1844026,26.8 C53.9634887,26.8 53.7844026,26.6209139 53.7844026,26.3999999 C53.7844026,26.1790861 53.9634887,26 54.1844026,26 L55.1844028,25.9999998 C55.4053165,26 55.5844026,26.1790861 55.5844026,26.3999999 Z M58.7844026,26.3999999 C58.7844026,26.6209139 58.6053165,26.8 58.3844026,26.8 L57.3844026,26.8 C57.1634887,26.8 56.9844026,26.6209139 56.9844026,26.3999999 C56.9844026,26.1790861 57.1634887,26 57.3844026,26 L58.3844027,25.9999998 C58.6053165,26 58.7844026,26.1790861 58.7844026,26.3999999 Z M61.9844026,26.3999999 C61.9844026,26.6209139 61.8053165,26.8 61.5844026,26.8 L60.5844026,26.8 C60.3634887,26.8 60.1844026,26.6209139 60.1844026,26.3999999 C60.1844026,26.1790861 60.3634887,26 60.5844026,26 L61.5844027,25.9999998 C61.8053165,26 61.9844026,26.1790861 61.9844026,26.3999999 Z M65.1844026,26.3999999 C65.1844026,26.6209139 65.0053165,26.8 64.7844026,26.8 L63.7844026,26.8 C63.5634887,26.8 63.3844026,26.6209139 63.3844026,26.3999999 C63.3844026,26.1790861 63.5634887,26 63.7844026,26 L64.7844026,25.9999998 C65.0053165,26 65.1844026,26.1790861 65.1844026,26.3999999 Z M68.3844026,26.3999999 C68.3844026,26.6209139 68.2053165,26.8 67.9844026,26.8 L66.9844026,26.8 C66.7634887,26.8 66.5844026,26.6209139 66.5844026,26.3999999 C66.5844026,26.1790861 66.7634887,26 66.9844026,26 L67.9844026,25.9999998 C68.2053165,26 68.3844026,26.1790861 68.3844026,26.3999999 Z M71.5844026,26.3999999 C71.5844026,26.6209139 71.4053165,26.8 71.1844026,26.8 L70.1844026,26.8 C69.9634887,26.8 69.7844026,26.6209139 69.7844026,26.3999999 C69.7844026,26.1790861 69.9634887,26 70.1844026,26 L71.1844028,25.9999998 C71.4053165,26 71.5844026,26.1790861 71.5844026,26.3999999 Z M74.7844026,26.3999999 C74.7844026,26.6209139 74.6053165,26.8 74.3844026,26.8 L73.3844026,26.8 C73.1634887,26.8 72.9844026,26.6209139 72.9844026,26.3999999 C72.9844026,26.1790861 73.1634887,26 73.3844026,26 L74.3844027,25.9999998 C74.6053165,26 74.7844026,26.1790861 74.7844026,26.3999999 Z M77.9844026,26.3999999 C77.9844026,26.6209139 77.8053165,26.8 77.5844026,26.8 L76.5844026,26.8 C76.3634887,26.8 76.1844026,26.6209139 76.1844026,26.3999999 C76.1844026,26.1790861 76.3634887,26 76.5844026,26 L77.5844027,25.9999998 C77.8053165,26 77.9844026,26.1790861 77.9844026,26.3999999 Z M81.1844026,26.3999999 C81.1844026,26.6209139 81.0053165,26.8 80.7844026,26.8 L79.7844026,26.8 C79.5634887,26.8 79.3844026,26.6209139 79.3844026,26.3999999 C79.3844026,26.1790861 79.5634887,26 79.7844026,26 L80.7844027,25.9999998 C81.0053165,26 81.1844026,26.1790861 81.1844026,26.3999999 Z M84.3844026,26.3999999 C84.3844026,26.6209139 84.2053165,26.8 83.9844026,26.8 L82.9844026,26.8 C82.7634887,26.8 82.5844026,26.6209139 82.5844026,26.3999999 C82.5844026,26.1790861 82.7634887,26 82.9844026,26 L83.9844026,25.9999998 C84.2053165,26 84.3844026,26.1790861 84.3844026,26.3999999 Z M87.5844026,26.3999999 C87.5844026,26.6209139 87.4053165,26.8 87.1844026,26.8 L86.1844026,26.8 C85.9634887,26.8 85.7844026,26.6209139 85.7844026,26.3999999 C85.7844026,26.1790861 85.9634887,26 86.1844026,26 L87.1844028,25.9999998 C87.4053165,26 87.5844026,26.1790861 87.5844026,26.3999999 Z M90.7844026,26.3999999 C90.7844026,26.6209139 90.6053165,26.8 90.3844026,26.8 L89.3844026,26.8 C89.1634887,26.8 88.9844026,26.6209139 88.9844026,26.3999999 C88.9844026,26.1790861 89.1634887,26 89.3844026,26 L90.3844028,25.9999998 C90.6053165,26 90.7844026,26.1790861 90.7844026,26.3999999 Z M93.9844026,26.3999999 C93.9844026,26.6209139 93.8053165,26.8 93.5844026,26.8 L92.5844026,26.8 C92.3634887,26.8 92.1844026,26.6209139 92.1844026,26.3999999 C92.1844026,26.1790861 92.3634887,26 92.5844026,26 L93.5844027,25.9999998 C93.8053165,26 93.9844026,26.1790861 93.9844026,26.3999999 Z M97.1844026,26.3999999 C97.1844026,26.6209139 97.0053165,26.8 96.7844026,26.8 L95.7844026,26.8 C95.5634887,26.8 95.3844026,26.6209139 95.3844026,26.3999999 C95.3844026,26.1790861 95.5634887,26 95.7844026,26 L96.7844027,25.9999998 C97.0053165,26 97.1844026,26.1790861 97.1844026,26.3999999 Z M100.384403,26.3999999 C100.384403,26.6209139 100.205316,26.8 99.9844026,26.8 L98.9844026,26.8 C98.7634887,26.8 98.5844026,26.6209139 98.5844026,26.3999999 C98.5844026,26.1790861 98.7634887,26 98.9844026,26 L99.9844026,25.9999998 C100.205316,26 100.384403,26.1790861 100.384403,26.3999999 Z M103.584403,26.3999999 C103.584403,26.6209139 103.405316,26.8 103.184403,26.8 L102.184403,26.8 C101.963489,26.8 101.784403,26.6209139 101.784403,26.3999999 C101.784403,26.1790861 101.963489,26 102.184403,26 L103.184403,25.9999998 C103.405316,26 103.584403,26.1790861 103.584403,26.3999999 Z M106.784403,26.3999999 C106.784403,26.6209139 106.605316,26.8 106.384403,26.8 L105.384403,26.8 C105.163489,26.8 104.984403,26.6209139 104.984403,26.3999999 C104.984403,26.1790861 105.163489,26 105.384403,26 L106.384403,25.9999998 C106.605316,26 106.784403,26.1790861 106.784403,26.3999999 Z M109.984403,26.3999999 C109.984403,26.6209139 109.805316,26.8 109.584403,26.8 L108.584403,26.8 C108.363489,26.8 108.184403,26.6209139 108.184403,26.3999999 C108.184403,26.1790861 108.363489,26 108.584403,26 L109.584403,25.9999998 C109.805316,26 109.984403,26.1790861 109.984403,26.3999999 Z M113.184403,26.3999999 C113.184403,26.6209139 113.005316,26.8 112.784403,26.8 L111.784403,26.8 C111.563489,26.8 111.384403,26.6209139 111.384403,26.3999999 C111.384403,26.1790861 111.563489,26 111.784403,26 L112.784403,25.9999998 C113.005316,26 113.184403,26.1790861 113.184403,26.3999999 Z M116.384403,26.3999999 C116.384403,26.6209139 116.205316,26.8 115.984403,26.8 L114.984403,26.8 C114.763489,26.8 114.584403,26.6209139 114.584403,26.3999999 C114.584403,26.1790861 114.763489,26 114.984403,26 L115.984403,25.9999998 C116.205316,26 116.384403,26.1790861 116.384403,26.3999999 Z M119.584403,26.3999999 C119.584403,26.6209139 119.405316,26.8 119.184403,26.8 L118.184403,26.8 C117.963489,26.8 117.784403,26.6209139 117.784403,26.3999999 C117.784403,26.1790861 117.963489,26 118.184403,26 L119.184403,25.9999998 C119.405316,26 119.584403,26.1790861 119.584403,26.3999999 Z M122.784403,26.3999999 C122.784403,26.6209139 122.605316,26.8 122.384403,26.8 L121.384403,26.8 C121.163489,26.8 120.984403,26.6209139 120.984403,26.3999999 C120.984403,26.1790861 121.163489,26 121.384403,26 L122.384403,25.9999998 C122.605316,26 122.784403,26.1790861 122.784403,26.3999999 Z M125.984403,26.3999999 C125.984403,26.6209139 125.805316,26.8 125.584403,26.8 L124.584403,26.8 C124.363489,26.8 124.184403,26.6209139 124.184403,26.3999999 C124.184403,26.1790861 124.363489,26 124.584403,26 L125.584403,25.9999998 C125.805316,26 125.984403,26.1790861 125.984403,26.3999999 Z M129.184403,26.3999999 C129.184403,26.6209139 129.005316,26.8 128.784403,26.8 L127.784403,26.8 C127.563489,26.8 127.384403,26.6209139 127.384403,26.3999999 C127.384403,26.1790861 127.563489,26 127.784403,26 L128.784402,25.9999998 C129.005316,26 129.184403,26.1790861 129.184403,26.3999999 Z M132.384403,26.3999999 C132.384403,26.6209139 132.205316,26.8 131.984403,26.8 L130.984403,26.8 C130.763489,26.8 130.584403,26.6209139 130.584403,26.3999999 C130.584403,26.1790861 130.763489,26 130.984403,26 L131.984402,25.9999998 C132.205316,26 132.384403,26.1790861 132.384403,26.3999999 Z M135.584403,26.3999999 C135.584403,26.6209139 135.405316,26.8 135.184403,26.8 L134.184403,26.8 C133.963489,26.8 133.784403,26.6209139 133.784403,26.3999999 C133.784403,26.1790861 133.963489,26 134.184403,26 L135.184402,25.9999998 C135.405316,26 135.584403,26.1790861 135.584403,26.3999999 Z M138.784403,26.3999999 C138.784403,26.6209139 138.605316,26.8 138.384403,26.8 L137.384403,26.8 C137.163489,26.8 136.984403,26.6209139 136.984403,26.3999999 C136.984403,26.1790861 137.163489,26 137.384403,26 L138.384403,25.9999998 C138.605316,26 138.784403,26.1790861 138.784403,26.3999999 Z M141.984403,26.3999999 C141.984403,26.6209139 141.805316,26.8 141.584403,26.8 L140.584403,26.8 C140.363489,26.8 140.184403,26.6209139 140.184403,26.3999999 C140.184403,26.1790861 140.363489,26 140.584403,26 L141.584403,25.9999998 C141.805316,26 141.984403,26.1790861 141.984403,26.3999999 Z M20.3844026,26.3999999 C20.3844026,26.6209139 20.2053165,26.8 19.9844026,26.8 L18.9844026,26.8 C18.7634887,26.8 18.5844026,26.6209139 18.5844026,26.3999999 C18.5844026,26.1790861 18.7634887,26 18.9844026,26 L19.9844028,25.9999998 C20.2053165,26 20.3844026,26.1790861 20.3844026,26.3999999 Z M145.184403,26.3999999 C145.184403,26.6209139 145.005316,26.8 144.784403,26.8 L143.784403,26.8 C143.563489,26.8 143.384403,26.6209139 143.384403,26.3999999 C143.384403,26.1790861 143.563489,26 143.784403,26 L144.784402,25.9999998 C145.005316,26 145.184403,26.1790861 145.184403,26.3999999 Z M148.384403,26.3999999 C148.384403,26.6209139 148.205316,26.8 147.984403,26.8 L146.984403,26.8 C146.763489,26.8 146.584403,26.6209139 146.584403,26.3999999 C146.584403,26.1790861 146.763489,26 146.984403,26 L147.984402,25.9999998 C148.205316,26 148.384403,26.1790861 148.384403,26.3999999 Z M151.584403,26.3999999 C151.584403,26.6209139 151.405316,26.8 151.184403,26.8 L150.184403,26.8 C149.963489,26.8 149.784403,26.6209139 149.784403,26.3999999 C149.784403,26.1790861 149.963489,26 150.184403,26 L151.184402,25.9999998 C151.405316,26 151.584403,26.1790861 151.584403,26.3999999 Z M154.784403,26.3999999 C154.784403,26.6209139 154.605316,26.8 154.384403,26.8 L153.384403,26.8 C153.163489,26.8 152.984403,26.6209139 152.984403,26.3999999 C152.984403,26.1790861 153.163489,26 153.384403,26 L154.384403,25.9999998 C154.605316,26 154.784403,26.1790861 154.784403,26.3999999 Z M157.984403,26.3999999 C157.984403,26.6209139 157.805316,26.8 157.584403,26.8 L156.584403,26.8 C156.363489,26.8 156.184403,26.6209139 156.184403,26.3999999 C156.184403,26.1790861 156.363489,26 156.584403,26 L157.584403,25.9999998 C157.805316,26 157.984403,26.1790861 157.984403,26.3999999 Z M161.184403,26.3999999 C161.184403,26.6209139 161.005316,26.8 160.784403,26.8 L159.784403,26.8 C159.563489,26.8 159.384403,26.6209139 159.384403,26.3999999 C159.384403,26.1790861 159.563489,26 159.784403,26 L160.784402,25.9999998 C161.005316,26 161.184403,26.1790861 161.184403,26.3999999 Z M164.384403,26.3999999 C164.384403,26.6209139 164.205316,26.8 163.984403,26.8 L162.984403,26.8 C162.763489,26.8 162.584403,26.6209139 162.584403,26.3999999 C162.584403,26.1790861 162.763489,26 162.984403,26 L163.984402,25.9999998 C164.205316,26 164.384403,26.1790861 164.384403,26.3999999 Z M167.584403,26.3999999 C167.584403,26.6209139 167.405316,26.8 167.184403,26.8 L166.184403,26.8 C165.963489,26.8 165.784403,26.6209139 165.784403,26.3999999 C165.784403,26.1790861 165.963489,26 166.184403,26 L167.184402,25.9999998 C167.405316,26 167.584403,26.1790861 167.584403,26.3999999 Z M170.784403,26.3999999 C170.784403,26.6209139 170.605316,26.8 170.384403,26.8 L169.384403,26.8 C169.163489,26.8 168.984403,26.6209139 168.984403,26.3999999 C168.984403,26.1790861 169.163489,26 169.384403,26 L170.384403,25.9999998 C170.605316,26 170.784403,26.1790861 170.784403,26.3999999 Z M173.984403,26.3999999 C173.984403,26.6209139 173.805316,26.8 173.584403,26.8 L172.584403,26.8 C172.363489,26.8 172.184403,26.6209139 172.184403,26.3999999 C172.184403,26.1790861 172.363489,26 172.584403,26 L173.584403,25.9999998 C173.805316,26 173.984403,26.1790861 173.984403,26.3999999 Z M177.184403,26.3999999 C177.184403,26.6209139 177.005316,26.8 176.784403,26.8 L175.784403,26.8 C175.563489,26.8 175.384403,26.6209139 175.384403,26.3999999 C175.384403,26.1790861 175.563489,26 175.784403,26 L176.784402,25.9999998 C177.005316,26 177.184403,26.1790861 177.184403,26.3999999 Z M180.384403,26.3999999 C180.384403,26.6209139 180.205316,26.8 179.984403,26.8 L178.984403,26.8 C178.763489,26.8 178.584403,26.6209139 178.584403,26.3999999 C178.584403,26.1790861 178.763489,26 178.984403,26 L179.984402,25.9999998 C180.205316,26 180.384403,26.1790861 180.384403,26.3999999 Z M183.584403,26.3999999 C183.584403,26.6209139 183.405316,26.8 183.184403,26.8 L182.184403,26.8 C181.963489,26.8 181.784403,26.6209139 181.784403,26.3999999 C181.784403,26.1790861 181.963489,26 182.184403,26 L183.184402,25.9999998 C183.405316,26 183.584403,26.1790861 183.584403,26.3999999 Z M186.784403,26.3999999 C186.784403,26.6209139 186.605316,26.8 186.384403,26.8 L185.384403,26.8 C185.163489,26.8 184.984403,26.6209139 184.984403,26.3999999 C184.984403,26.1790861 185.163489,26 185.384403,26 L186.384402,25.9999998 C186.605316,26 186.784403,26.1790861 186.784403,26.3999999 Z M189.984403,26.3999999 C189.984403,26.6209139 189.805316,26.8 189.584403,26.8 L188.584403,26.8 C188.363489,26.8 188.184403,26.6209139 188.184403,26.3999999 C188.184403,26.1790861 188.363489,26 188.584403,26 L189.584403,25.9999998 C189.805316,26 189.984403,26.1790861 189.984403,26.3999999 Z M193.184403,26.3999999 C193.184403,26.6209139 193.005316,26.8 192.784403,26.8 L191.784403,26.8 C191.563489,26.8 191.384403,26.6209139 191.384403,26.3999999 C191.384403,26.1790861 191.563489,26 191.784403,26 L192.784403,25.9999998 C193.005316,26 193.184403,26.1790861 193.184403,26.3999999 Z M196.384403,26.3999999 C196.384403,26.6209139 196.205316,26.8 195.984403,26.8 L194.984403,26.8 C194.763489,26.8 194.584403,26.6209139 194.584403,26.3999999 C194.584403,26.1790861 194.763489,26 194.984403,26 L195.984402,25.9999998 C196.205316,26 196.384403,26.1790861 196.384403,26.3999999 Z M199.584403,26.3999999 C199.584403,26.6209139 199.405316,26.8 199.184403,26.8 L198.184403,26.8 C197.963489,26.8 197.784403,26.6209139 197.784403,26.3999999 C197.784403,26.1790861 197.963489,26 198.184403,26 L199.184402,25.9999998 C199.405316,26 199.584403,26.1790861 199.584403,26.3999999 Z M202.784403,26.3999999 C202.784403,26.6209139 202.605316,26.8 202.384403,26.8 L201.384403,26.8 C201.163489,26.8 200.984403,26.6209139 200.984403,26.3999999 C200.984403,26.1790861 201.163489,26 201.384403,26 L202.384402,25.9999998 C202.605316,26 202.784403,26.1790861 202.784403,26.3999999 Z M205.984403,26.3999999 C205.984403,26.6209139 205.805316,26.8 205.584403,26.8 L204.584403,26.8 C204.363489,26.8 204.184403,26.6209139 204.184403,26.3999999 C204.184403,26.1790861 204.363489,26 204.584403,26 L205.584403,25.9999998 C205.805316,26 205.984403,26.1790861 205.984403,26.3999999 Z M209.184403,26.3999999 C209.184403,26.6209139 209.005316,26.8 208.784403,26.8 L207.784403,26.8 C207.563489,26.8 207.384403,26.6209139 207.384403,26.3999999 C207.384403,26.1790861 207.563489,26 207.784403,26 L208.784403,25.9999998 C209.005316,26 209.184403,26.1790861 209.184403,26.3999999 Z M212.384403,26.3999999 C212.384403,26.6209139 212.205316,26.8 211.984403,26.8 L210.984403,26.8 C210.763489,26.8 210.584403,26.6209139 210.584403,26.3999999 C210.584403,26.1790861 210.763489,26 210.984403,26 L211.984402,25.9999998 C212.205316,26 212.384403,26.1790861 212.384403,26.3999999 Z M215.584403,26.3999999 C215.584403,26.6209139 215.405316,26.8 215.184403,26.8 L214.184403,26.8 C213.963489,26.8 213.784403,26.6209139 213.784403,26.3999999 C213.784403,26.1790861 213.963489,26 214.184403,26 L215.184402,25.9999998 C215.405316,26 215.584403,26.1790861 215.584403,26.3999999 Z M218.784403,26.3999999 C218.784403,26.6209139 218.605316,26.8 218.384403,26.8 L217.384403,26.8 C217.163489,26.8 216.984403,26.6209139 216.984403,26.3999999 C216.984403,26.1790861 217.163489,26 217.384403,26 L218.384402,25.9999998 C218.605316,26 218.784403,26.1790861 218.784403,26.3999999 Z M221.984403,26.3999999 C221.984403,26.6209139 221.805316,26.8 221.584403,26.8 L220.584403,26.8 C220.363489,26.8 220.184403,26.6209139 220.184403,26.3999999 C220.184403,26.1790861 220.363489,26 220.584403,26 L221.584403,25.9999998 C221.805316,26 221.984403,26.1790861 221.984403,26.3999999 Z M225.184403,26.3999999 C225.184403,26.6209139 225.005316,26.8 224.784403,26.8 L223.784403,26.8 C223.563489,26.8 223.384403,26.6209139 223.384403,26.3999999 C223.384403,26.1790861 223.563489,26 223.784403,26 L224.784403,25.9999998 C225.005316,26 225.184403,26.1790861 225.184403,26.3999999 Z M237.984403,26.3999999 C237.984403,26.6209139 237.805316,26.8 237.584403,26.8 L236.584403,26.8 C236.363489,26.8 236.184403,26.6209139 236.184403,26.3999999 C236.184403,26.1790861 236.363489,26 236.584403,26 L237.584403,25.9999998 C237.805316,26 237.984403,26.1790861 237.984403,26.3999999 Z M234.784403,26.3999999 C234.784403,26.6209139 234.605316,26.8 234.384403,26.8 L233.384403,26.8 C233.163489,26.8 232.984403,26.6209139 232.984403,26.3999999 C232.984403,26.1790861 233.163489,26 233.384403,26 L234.384402,25.9999998 C234.605316,26 234.784403,26.1790861 234.784403,26.3999999 Z M3.21445335,25.7146857 C3.47201296,25.8469101 3.7503283,25.9352706 4.04005994,25.9753362 C4.25889142,26.0055973 4.41175792,26.2075267 4.38149683,26.4263582 C4.35123574,26.6451896 4.14930631,26.7980562 3.93047483,26.7677951 C3.55112172,26.7153362 3.1863813,26.5995372 2.84908821,26.4263797 C2.65255935,26.3254869 2.575031,26.084379 2.6759238,25.8878502 C2.77681659,25.6913213 3.01792449,25.6137929 3.21445335,25.7146857 Z M244.239465,25.8035768 C244.349541,25.9951133 244.283504,26.2396185 244.091968,26.3496946 C243.762898,26.538811 243.403867,26.6718147 243.027924,26.7420613 C242.810768,26.7826378 242.601835,26.6394922 242.561259,26.4223367 C242.520682,26.2051813 242.663828,25.9962482 242.880983,25.9556718 C243.167977,25.9020457 243.441977,25.8005418 243.693347,25.6560796 C243.884884,25.5460036 244.129389,25.6120404 244.239465,25.8035768 Z M1.79999053,23.3738218 L1.80050042,23.4515966 C1.80607818,23.7390086 1.85825979,24.0190186 1.95396115,24.2838222 C2.02904737,24.4915841 1.92149248,24.7208778 1.71373058,24.795964 C1.50596867,24.8710502 1.27667495,24.7634953 1.20158873,24.5557334 C1.07625093,24.2089265 1.00793261,23.8423259 1.00058539,23.4621397 L1.00000988,23.3793853 C0.99847357,23.1584767 1.17630991,22.9781495 1.39721847,22.9766034 C1.61812703,22.9750769 1.79845422,23.1529133 1.79999053,23.3738218 Z M245.8,23.2077988 L245.8,23.4 C245.8,23.7094854 245.758543,24.0138606 245.677568,24.307034 C245.618753,24.5199748 245.398452,24.6449188 245.185511,24.5861041 C244.97257,24.5272894 244.847626,24.3069881 244.906441,24.0940473 C244.9683,23.8700848 245,23.6373439 245,23.4 L245,23.2077988 C245,22.9868849 245.179086,22.8077988 245.4,22.8077988 C245.620914,22.8077988 245.8,22.9868849 245.8,23.2077988 Z M1.80000021,20.1766036 L1.80000021,21.1766036 C1.80000021,21.3975175 1.62091411,21.5766036 1.40000021,21.5766036 C1.17908631,21.5766036 0.900000207,21.3975175 0.900000207,21.1766036 L0.900000207,20.1766036 C0.900000207,19.9556897 1.17908631,19.7766036 1.40000021,19.7766036 C1.62091411,19.7766036 1.80000021,19.9556897 1.80000021,20.1766036 Z M245.8,20.0077988 L245.8,21.0077988 C245.8,21.2287127 245.620914,21.4077988 245.4,21.4077988 C245.179086,21.4077988 245,21.2287127 245,21.0077988 L245,20.0077988 C245,19.7868849 245.179086,19.6077988 245.4,19.6077988 C245.620914,19.6077988 245.8,19.7868849 245.8,20.0077988 Z M1.80000021,16.9766036 L1.80000021,17.9766036 C1.80000021,18.1975175 1.62091411,18.3766036 1.40000021,18.3766036 C1.17908631,18.3766036 0.900000207,18.1975175 0.900000207,17.9766036 L0.900000207,16.9766036 C0.900000207,16.7556897 1.17908631,16.5766036 1.40000021,16.5766036 C1.62091411,16.5766036 1.80000021,16.7556897 1.80000021,16.9766036 Z M245.8,16.8077988 L245.8,17.8077988 C245.8,18.0287127 245.620914,18.2077988 245.4,18.2077988 C245.179086,18.2077988 245,18.0287127 245,17.8077988 L245,16.8077988 C245,16.5868849 245.179086,16.4077988 245.4,16.4077988 C245.620914,16.4077988 245.8,16.5868849 245.8,16.8077988 Z M1.80000021,13.7766036 L1.80000021,14.7766036 C1.80000021,14.9975175 1.62091411,15.1766036 1.40000021,15.1766036 C1.17908631,15.1766036 0.900000207,14.9975175 0.900000207,14.7766036 L0.900000207,13.7766036 C0.900000207,13.5556897 1.17908631,13.3766036 1.40000021,13.3766036 C1.62091411,13.3766036 1.80000021,13.5556897 1.80000021,13.7766036 Z M245.8,13.6077988 L245.8,14.6077988 C245.8,14.8287127 245.620914,15.0077988 245.4,15.0077988 C245.179086,15.0077988 245,14.8287127 245,14.6077988 L245,13.6077988 C245,13.3868849 245.179086,13.2077988 245.4,13.2077988 C245.620914,13.2077988 245.8,13.3868849 245.8,13.6077988 Z M1.80000021,10.5766036 L1.80000021,11.5766036 C1.80000021,11.7975175 1.62091411,11.9766036 1.40000021,11.9766036 C1.17908631,11.9766036 0.900000207,11.7975175 0.900000207,11.5766036 L0.900000207,10.5766036 C0.900000207,10.3556897 1.17908631,10.1766036 1.40000021,10.1766036 C1.62091411,10.1766036 1.80000021,10.3556897 1.80000021,10.5766036 Z M245.8,10.4077988 L245.8,11.4077988 C245.8,11.6287127 245.620914,11.8077988 245.4,11.8077988 C245.179086,11.8077988 245,11.6287127 245,11.4077988 L245,10.4077988 C245,10.1868849 245.179086,10.0077988 245.4,10.0077988 C245.620914,10.0077988 245.8,10.1868849 245.8,10.4077988 Z M1.80000021,7.37660357 L1.80000021,8.37660357 C1.80000021,8.59751746 1.62091411,8.77660357 1.40000021,8.77660357 C1.17908631,8.77660357 0.900000207,8.59751746 0.900000207,8.37660357 L0.900000207,7.37660357 C0.900000207,7.15568967 1.17908631,6.97660357 1.40000021,6.97660357 C1.62091411,6.97660357 1.80000021,7.15568967 1.80000021,7.37660357 Z M245.8,7.20779881 L245.8,8.20779881 C245.8,8.42871271 245.620914,8.60779881 245.4,8.60779881 C245.179086,8.60779881 245,8.42871271 245,8.20779881 L245,7.20779881 C245,6.98688491 245.179086,6.80779881 245.4,6.80779881 C245.620914,6.80779881 245.8,6.98688491 245.8,7.20779881 Z M1.80000021,4.17660357 L1.80000021,5.17660357 C1.80000021,5.39751746 1.62091411,5.57660357 1.40000021,5.57660357 C1.17908631,5.57660357 0.900000207,5.39751746 0.900000207,5.17660357 L0.900000207,4.17660357 C0.900000207,3.95568967 1.17908631,3.77660357 1.40000021,3.77660357 C1.62091411,3.77660357 1.80000021,3.95568967 1.80000021,4.17660357 Z M245.8,4.00779881 L245.8,5.00779881 C245.8,5.22871271 245.620914,5.40779881 245.4,5.40779881 C245.179086,5.40779881 245,5.22871271 245,5.00779881 L245,4.00779881 C245,3.78688491 245.179086,3.60779881 245.4,3.60779881 C245.620914,3.60779881 245.8,3.78688491 245.8,4.00779881 Z M2.63072048,0.944369874 C2.78244408,1.10494043 2.7752722,1.35810485 2.61470164,1.50982845 C2.40315196,1.70972236 2.22618834,1.94368046 2.09194129,2.20163281 C1.98995519,2.39759654 1.74841912,2.47378047 1.55245538,2.37179437 C1.35649164,2.26980827 1.28030772,2.0282722 1.38229382,1.83230846 C1.5578362,1.49500818 1.78900526,1.18938676 2.06526191,0.928351032 C2.22583246,0.77662744 2.47899689,0.78379932 2.63072048,0.944369874 Z M244.537335,0.755669627 C244.832721,0.994684472 245.086582,1.28167811 245.287543,1.60420989 C245.404367,1.79170677 245.347076,2.03840767 245.159579,2.15523177 C244.972082,2.27205587 244.725381,2.21476427 244.608557,2.02726738 C244.454756,1.78042533 244.260325,1.56061743 244.034113,1.37757597 C243.862378,1.23861502 243.83581,0.986746434 243.974771,0.81501174 C244.113732,0.643277045 244.3656,0.616708676 244.537335,0.755669627 Z M127.4,0.5 C127.4,0.7209139 127.220914,0.9 127,0.9 L126,0.9 C125.779086,0.9 125.6,0.7209139 125.6,0.5 C125.6,0.2790861 125.779086,0 126,0 L127,0 C127.220914,0 127.4,0.2790861 127.4,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M236.2,0.5 C236.2,0.7209139 236.020914,0.9 235.8,0.9 L234.8,0.9 C234.579086,0.9 234.4,0.7209139 234.4,0.5 C234.4,0.2790861 234.579086,0 234.8,0 L235.8,0 C236.020914,0 236.2,0.2790861 236.2,0.5 Z M233,0.5 C233,0.7209139 232.820914,0.9 232.6,0.9 L231.6,0.9 C231.379086,0.9 231.2,0.7209139 231.2,0.5 C231.2,0.2790861 231.379086,0 231.6,0 L232.6,0 C232.820914,0 233,0.2790861 233,0.5 Z M229.8,0.5 C229.8,0.7209139 229.620914,0.9 229.4,0.9 L228.4,0.9 C228.179086,0.9 228,0.7209139 228,0.5 C228,0.2790861 228.179086,0 228.4,0 L229.4,0 C229.620914,0 229.8,0.2790861 229.8,0.5 Z M226.6,0.5 C226.6,0.7209139 226.420914,0.9 226.2,0.9 L225.2,0.9 C224.979086,0.9 224.8,0.7209139 224.8,0.5 C224.8,0.2790861 224.979086,0 225.2,0 L226.2,0 C226.420914,0 226.6,0.2790861 226.6,0.5 Z M223.4,0.5 C223.4,0.7209139 223.220914,0.9 223,0.9 L222,0.9 C221.779086,0.9 221.6,0.7209139 221.6,0.5 C221.6,0.2790861 221.779086,0 222,0 L223,0 C223.220914,0 223.4,0.2790861 223.4,0.5 Z M220.2,0.5 C220.2,0.7209139 220.020914,0.9 219.8,0.9 L218.8,0.9 C218.579086,0.9 218.4,0.7209139 218.4,0.5 C218.4,0.2790861 218.579086,0 218.8,0 L219.8,0 C220.020914,0 220.2,0.2790861 220.2,0.5 Z M217,0.5 C217,0.7209139 216.820914,0.9 216.6,0.9 L215.6,0.9 C215.379086,0.9 215.2,0.7209139 215.2,0.5 C215.2,0.2790861 215.379086,0 215.6,0 L216.6,0 C216.820914,0 217,0.2790861 217,0.5 Z M213.8,0.5 C213.8,0.7209139 213.620914,0.9 213.4,0.9 L212.4,0.9 C212.179086,0.9 212,0.7209139 212,0.5 C212,0.2790861 212.179086,0 212.4,0 L213.4,0 C213.620914,0 213.8,0.2790861 213.8,0.5 Z M210.6,0.5 C210.6,0.7209139 210.420914,0.9 210.2,0.9 L209.2,0.9 C208.979086,0.9 208.8,0.7209139 208.8,0.5 C208.8,0.2790861 208.979086,0 209.2,0 L210.2,0 C210.420914,0 210.6,0.2790861 210.6,0.5 Z M207.4,0.5 C207.4,0.7209139 207.220914,0.9 207,0.9 L206,0.9 C205.779086,0.9 205.6,0.7209139 205.6,0.5 C205.6,0.2790861 205.779086,0 206,0 L207,0 C207.220914,0 207.4,0.2790861 207.4,0.5 Z M204.2,0.5 C204.2,0.7209139 204.020914,0.9 203.8,0.9 L202.8,0.9 C202.579086,0.9 202.4,0.7209139 202.4,0.5 C202.4,0.2790861 202.579086,0 202.8,0 L203.8,0 C204.020914,0 204.2,0.2790861 204.2,0.5 Z M201,0.5 C201,0.7209139 200.820914,0.9 200.6,0.9 L199.6,0.9 C199.379086,0.9 199.2,0.7209139 199.2,0.5 C199.2,0.2790861 199.379086,0 199.6,0 L200.6,0 C200.820914,0 201,0.2790861 201,0.5 Z M197.8,0.5 C197.8,0.7209139 197.620914,0.9 197.4,0.9 L196.4,0.9 C196.179086,0.9 196,0.7209139 196,0.5 C196,0.2790861 196.179086,0 196.4,0 L197.4,0 C197.620914,0 197.8,0.2790861 197.8,0.5 Z M194.6,0.5 C194.6,0.7209139 194.420914,0.9 194.2,0.9 L193.2,0.9 C192.979086,0.9 192.8,0.7209139 192.8,0.5 C192.8,0.2790861 192.979086,0 193.2,0 L194.2,0 C194.420914,0 194.6,0.2790861 194.6,0.5 Z M191.4,0.5 C191.4,0.7209139 191.220914,0.9 191,0.9 L190,0.9 C189.779086,0.9 189.6,0.7209139 189.6,0.5 C189.6,0.2790861 189.779086,0 190,0 L191,0 C191.220914,0 191.4,0.2790861 191.4,0.5 Z M188.2,0.5 C188.2,0.7209139 188.020914,0.9 187.8,0.9 L186.8,0.9 C186.579086,0.9 186.4,0.7209139 186.4,0.5 C186.4,0.2790861 186.579086,0 186.8,0 L187.8,0 C188.020914,0 188.2,0.2790861 188.2,0.5 Z M185,0.5 C185,0.7209139 184.820914,0.9 184.6,0.9 L183.6,0.9 C183.379086,0.9 183.2,0.7209139 183.2,0.5 C183.2,0.2790861 183.379086,0 183.6,0 L184.6,0 C184.820914,0 185,0.2790861 185,0.5 Z M181.8,0.5 C181.8,0.7209139 181.620914,0.9 181.4,0.9 L180.4,0.9 C180.179086,0.9 180,0.7209139 180,0.5 C180,0.2790861 180.179086,0 180.4,0 L181.4,0 C181.620914,0 181.8,0.2790861 181.8,0.5 Z M178.6,0.5 C178.6,0.7209139 178.420914,0.9 178.2,0.9 L177.2,0.9 C176.979086,0.9 176.8,0.7209139 176.8,0.5 C176.8,0.2790861 176.979086,0 177.2,0 L178.2,0 C178.420914,0 178.6,0.2790861 178.6,0.5 Z M175.4,0.5 C175.4,0.7209139 175.220914,0.9 175,0.9 L174,0.9 C173.779086,0.9 173.6,0.7209139 173.6,0.5 C173.6,0.2790861 173.779086,0 174,0 L175,0 C175.220914,0 175.4,0.2790861 175.4,0.5 Z M172.2,0.5 C172.2,0.7209139 172.020914,0.9 171.8,0.9 L170.8,0.9 C170.579086,0.9 170.4,0.7209139 170.4,0.5 C170.4,0.2790861 170.579086,0 170.8,0 L171.8,0 C172.020914,0 172.2,0.2790861 172.2,0.5 Z M169,0.5 C169,0.7209139 168.820914,0.9 168.6,0.9 L167.6,0.9 C167.379086,0.9 167.2,0.7209139 167.2,0.5 C167.2,0.2790861 167.379086,0 167.6,0 L168.6,0 C168.820914,0 169,0.2790861 169,0.5 Z M165.8,0.5 C165.8,0.7209139 165.620914,0.9 165.4,0.9 L164.4,0.9 C164.179086,0.9 164,0.7209139 164,0.5 C164,0.2790861 164.179086,0 164.4,0 L165.4,0 C165.620914,0 165.8,0.2790861 165.8,0.5 Z M162.6,0.5 C162.6,0.7209139 162.420914,0.9 162.2,0.9 L161.2,0.9 C160.979086,0.9 160.8,0.7209139 160.8,0.5 C160.8,0.2790861 160.979086,0 161.2,0 L162.2,0 C162.420914,0 162.6,0.2790861 162.6,0.5 Z M159.4,0.5 C159.4,0.7209139 159.220914,0.9 159,0.9 L158,0.9 C157.779086,0.9 157.6,0.7209139 157.6,0.5 C157.6,0.2790861 157.779086,0 158,0 L159,0 C159.220914,0 159.4,0.2790861 159.4,0.5 Z M156.2,0.5 C156.2,0.7209139 156.020914,0.9 155.8,0.9 L154.8,0.9 C154.579086,0.9 154.4,0.7209139 154.4,0.5 C154.4,0.2790861 154.579086,0 154.8,0 L155.8,0 C156.020914,0 156.2,0.2790861 156.2,0.5 Z M153,0.5 C153,0.7209139 152.820914,0.9 152.6,0.9 L151.6,0.9 C151.379086,0.9 151.2,0.7209139 151.2,0.5 C151.2,0.2790861 151.379086,0 151.6,0 L152.6,0 C152.820914,0 153,0.2790861 153,0.5 Z M149.8,0.5 C149.8,0.7209139 149.620914,0.9 149.4,0.9 L148.4,0.9 C148.179086,0.9 148,0.7209139 148,0.5 C148,0.2790861 148.179086,0 148.4,0 L149.4,0 C149.620914,0 149.8,0.2790861 149.8,0.5 Z M146.6,0.5 C146.6,0.7209139 146.420914,0.9 146.2,0.9 L145.2,0.9 C144.979086,0.9 144.8,0.7209139 144.8,0.5 C144.8,0.2790861 144.979086,0 145.2,0 L146.2,0 C146.420914,0 146.6,0.2790861 146.6,0.5 Z M143.4,0.5 C143.4,0.7209139 143.220914,0.9 143,0.9 L142,0.9 C141.779086,0.9 141.6,0.7209139 141.6,0.5 C141.6,0.2790861 141.779086,0 142,0 L143,0 C143.220914,0 143.4,0.2790861 143.4,0.5 Z M140.2,0.5 C140.2,0.7209139 140.020914,0.9 139.8,0.9 L138.8,0.9 C138.579086,0.9 138.4,0.7209139 138.4,0.5 C138.4,0.2790861 138.579086,0 138.8,0 L139.8,0 C140.020914,0 140.2,0.2790861 140.2,0.5 Z M137,0.5 C137,0.7209139 136.820914,0.9 136.6,0.9 L135.6,0.9 C135.379086,0.9 135.2,0.7209139 135.2,0.5 C135.2,0.2790861 135.379086,0 135.6,0 L136.6,0 C136.820914,0 137,0.2790861 137,0.5 Z M133.8,0.5 C133.8,0.7209139 133.620914,0.9 133.4,0.9 L132.4,0.9 C132.179086,0.9 132,0.7209139 132,0.5 C132,0.2790861 132.179086,0 132.4,0 L133.4,0 C133.620914,0 133.8,0.2790861 133.8,0.5 Z M130.6,0.5 C130.6,0.7209139 130.420914,0.9 130.2,0.9 L129.2,0.9 C128.979086,0.9 128.8,0.7209139 128.8,0.5 C128.8,0.2790861 128.979086,0 129.2,0 L130.2,0 C130.420914,0 130.6,0.2790861 130.6,0.5 Z M242.6,0.5 C242.6,0.7209139 242.420914,0.9 242.2,0.9 L241.2,0.9 C240.979086,0.9 240.8,0.7209139 240.8,0.5 C240.8,0.2790861 240.979086,0 241.2,0 L242.2,0 C242.420914,0 242.6,0.2790861 242.6,0.5 Z M124.2,0.5 C124.2,0.7209139 124.020914,0.9 123.8,0.9 L122.8,0.9 C122.579086,0.9 122.4,0.7209139 122.4,0.5 C122.4,0.2790861 122.579086,0 122.8,0 L123.8,0 C124.020914,0 124.2,0.2790861 124.2,0.5 Z M121,0.5 C121,0.7209139 120.820914,0.9 120.6,0.9 L119.6,0.9 C119.379086,0.9 119.2,0.7209139 119.2,0.5 C119.2,0.2790861 119.379086,0 119.6,0 L120.6,0 C120.820914,0 121,0.2790861 121,0.5 Z M117.8,0.5 C117.8,0.7209139 117.620914,0.9 117.4,0.9 L116.4,0.9 C116.179086,0.9 116,0.7209139 116,0.5 C116,0.2790861 116.179086,0 116.4,0 L117.4,0 C117.620914,0 117.8,0.2790861 117.8,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M239.4,0.5 C239.4,0.7209139 239.220914,0.9 239,0.9 L238,0.9 C237.779086,0.9 237.6,0.7209139 237.6,0.5 C237.6,0.2790861 237.779086,0 238,0 L239,0 C239.220914,0 239.4,0.2790861 239.4,0.5 Z" id="dash" fill="#8E8E8E" fill-rule="nonzero"></path>
+                </g>
+                <g id="Row" transform="translate(30, 53)">
+                    <g id="Group">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="Group" transform="translate(36, 0)">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="Group" transform="translate(72, 0)">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="Group" transform="translate(108, 0)">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="Group" transform="translate(144, 0)">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="Group" transform="translate(196, 0)">
+                        <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(177.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+                <g id="SequencePooling" transform="translate(100, 31)" fill="#9172E2" fill-rule="nonzero">
+                    <path d="M3.16894531,9.10742188 C3.71907552,9.10742188 4.19921875,9.02604167 4.609375,8.86328125 C5.01953125,8.70052083 5.33691406,8.46207682 5.56152344,8.14794922 C5.78613281,7.83382161 5.8984375,7.46191406 5.8984375,7.03222656 C5.8984375,6.8141276 5.87076823,6.6155599 5.81542969,6.43652344 C5.76009115,6.25748698 5.68359375,6.10042318 5.5859375,5.96533203 C5.48828125,5.83024089 5.36214193,5.70735677 5.20751953,5.59667969 C5.05289714,5.4860026 4.88525391,5.38916016 4.70458984,5.30615234 C4.52392578,5.22314453 4.31315104,5.14257813 4.07226563,5.06445313 L2.52929687,4.57128906 C2.27539063,4.48665365 2.09065755,4.37679036 1.97509766,4.24169922 C1.85953776,4.10660807 1.80175781,3.93001302 1.80175781,3.71191406 C1.80175781,3.48730469 1.8733724,3.30338542 2.01660156,3.16015625 C2.15983073,3.01692708 2.33968099,2.91764323 2.55615234,2.86230469 C2.7726237,2.80696615 3.02897135,2.77929688 3.32519531,2.77929688 C3.91764323,2.77929688 4.48730469,2.93717448 5.03417969,3.25292969 C5.1155599,3.30175781 5.1953125,3.32617188 5.2734375,3.32617188 C5.39713542,3.32617188 5.50292969,3.27408854 5.59082031,3.16992187 C5.67871094,3.06575521 5.72265625,2.95019531 5.72265625,2.82324219 C5.72265625,2.76464844 5.71207682,2.7101237 5.69091797,2.65966797 C5.66975911,2.60921224 5.63639323,2.56608073 5.59082031,2.53027344 C5.35644531,2.33170573 5.02360026,2.1624349 4.59228516,2.02246094 C4.16097005,1.88248698 3.7093099,1.8125 3.23730469,1.8125 C2.47884115,1.8125 1.85546875,1.98665365 1.3671875,2.33496094 C0.87890625,2.68326823 0.634765625,3.15690104 0.634765625,3.75585937 C0.634765625,4.234375 0.758463542,4.61279297 1.00585937,4.89111328 C1.25325521,5.16943359 1.64388021,5.39811198 2.17773438,5.57714844 L3.73046875,6.09472656 C4.08203125,6.21191406 4.33349609,6.3453776 4.48486328,6.49511719 C4.63623047,6.64485677 4.71191406,6.86132812 4.71191406,7.14453125 C4.71191406,7.47330729 4.5711263,7.72151693 4.28955078,7.88916016 C4.00797526,8.05680339 3.64095052,8.140625 3.18847656,8.140625 C2.47558594,8.140625 1.81966146,7.93391927 1.22070312,7.52050781 C1.20117187,7.50748698 1.18001302,7.49772135 1.15722656,7.49121094 C1.1344401,7.48470052 1.11165365,7.48144531 1.08886719,7.48144531 C1.03027344,7.48144531 0.969238281,7.50260417 0.905761719,7.54492188 C0.842285156,7.58723958 0.786132813,7.64013672 0.737304688,7.70361328 C0.688476562,7.76708984 0.64860026,7.83626302 0.617675781,7.91113281 C0.586751302,7.9860026 0.571289062,8.05598958 0.571289062,8.12109375 C0.571289062,8.19921875 0.595703125,8.2578125 0.64453125,8.296875 C1.27604167,8.83723958 2.11751302,9.10742188 3.16894531,9.10742188 Z" id="Path"></path>
+                    <path d="M9.26757812,9.06835938 C9.97721354,9.06835938 10.5761719,8.85351563 11.0644531,8.42382813 C11.171875,8.3359375 11.2255859,8.23014323 11.2255859,8.10644531 C11.2255859,8.00878906 11.1922201,7.92252604 11.1254883,7.84765625 C11.0587565,7.77278646 10.9798177,7.73535156 10.8886719,7.73535156 C10.8268229,7.73535156 10.764974,7.75651042 10.703125,7.79882813 C10.2050781,8.15039062 9.75260417,8.32617188 9.34570312,8.32617188 C8.90625,8.32291667 8.5555013,8.19840495 8.29345703,7.95263672 C8.03141276,7.70686849 7.8889974,7.32519531 7.86621094,6.80761719 L11.0693359,6.80761719 C11.180013,6.80761719 11.2605794,6.76936849 11.3110352,6.69287109 C11.3614909,6.6163737 11.3867188,6.51953125 11.3867188,6.40234375 C11.3802083,6.08007812 11.3313802,5.78141276 11.2402344,5.50634766 C11.1490885,5.23128255 11.0164388,4.98388672 10.8422852,4.76416016 C10.6681315,4.54443359 10.4370117,4.37109375 10.1489258,4.24414062 C9.86083984,4.1171875 9.5296224,4.05371094 9.15527344,4.05371094 C8.7874349,4.05371094 8.45214844,4.12044271 8.14941406,4.25390625 C7.84667969,4.38736979 7.59765625,4.56803385 7.40234375,4.79589844 C7.20703125,5.02376302 7.05647786,5.28499349 6.95068359,5.57958984 C6.84488932,5.8741862 6.79199219,6.1858724 6.79199219,6.51464844 C6.79199219,7.30240885 7.01904297,7.92333984 7.47314453,8.37744141 C7.92724609,8.83154297 8.52539062,9.06184896 9.26757812,9.06835938 Z M7.87597656,6.16308594 C7.8922526,5.98730469 7.9305013,5.81966146 7.99072266,5.66015625 C8.05094401,5.50065104 8.13313802,5.35416667 8.23730469,5.22070312 C8.34147135,5.08723958 8.4765625,4.97981771 8.64257812,4.8984375 C8.80859375,4.81705729 8.99414062,4.77636719 9.19921875,4.77636719 C9.59635417,4.77636719 9.89908854,4.90820312 10.1074219,5.171875 C10.3157552,5.43554688 10.4345703,5.76595052 10.4638672,6.16308594 L7.87597656,6.16308594 Z" id="Shape"></path>
+                    <path d="M16.40625,10.8603516 C16.5527344,10.8603516 16.6780599,10.8164063 16.7822266,10.7285156 C16.8863932,10.640625 16.9384766,10.5185547 16.9384766,10.3623047 L16.9384766,4.63964844 C16.9384766,4.48014323 16.8839518,4.35400391 16.7749023,4.26123047 C16.6658529,4.16845703 16.5397135,4.12207031 16.3964844,4.12207031 C16.233724,4.12207031 16.1051432,4.16438802 16.0107422,4.24902344 C15.9163411,4.33365885 15.8691406,4.46223958 15.8691406,4.63476562 L15.8691406,4.90820312 C15.5501302,4.33854167 15.0065104,4.05371094 14.2382813,4.05371094 C13.9029948,4.05371094 13.6002604,4.12288411 13.3300781,4.26123047 C13.0598958,4.39957682 12.8385417,4.5859375 12.6660156,4.8203125 C12.4934896,5.0546875 12.3608398,5.32405599 12.2680664,5.62841797 C12.175293,5.93277995 12.1289062,6.25423177 12.1289062,6.59277344 C12.1289062,7.33170573 12.3250326,7.92903646 12.7172852,8.38476563 C13.1095378,8.84049479 13.6409505,9.06835938 14.3115234,9.06835938 C15.0276693,9.06835938 15.546875,8.79817708 15.8691406,8.2578125 L15.8691406,10.3623047 C15.8691406,10.5185547 15.9220378,10.640625 16.027832,10.7285156 C16.1336263,10.8164063 16.2597656,10.8603516 16.40625,10.8603516 Z M14.5019531,8.34082031 C14.2643229,8.34082031 14.0576172,8.28873698 13.8818359,8.18457031 C13.7060547,8.08040365 13.5701497,7.94124349 13.4741211,7.76708984 C13.3780924,7.5929362 13.3081055,7.40738932 13.2641602,7.21044922 C13.2202148,7.01350911 13.1982422,6.80598958 13.1982422,6.58789063 C13.1982422,6.42513021 13.2104492,6.2639974 13.2348633,6.10449219 C13.2592773,5.94498698 13.3024089,5.78385417 13.3642578,5.62109375 C13.4261068,5.45833333 13.503418,5.31591797 13.5961914,5.19384766 C13.6889648,5.07177734 13.811849,4.97167969 13.9648438,4.89355469 C14.1178385,4.81542969 14.2903646,4.77636719 14.4824219,4.77636719 C14.983724,4.77636719 15.3393555,4.93017578 15.5493164,5.23779297 C15.7592773,5.54541016 15.8642578,5.9921875 15.8642578,6.578125 C15.8642578,6.80924479 15.8390299,7.02652995 15.7885742,7.22998047 C15.7381185,7.43343099 15.6616211,7.61979167 15.559082,7.7890625 C15.456543,7.95833333 15.3149414,8.09261068 15.1342773,8.19189453 C14.9536133,8.29117839 14.7428385,8.34082031 14.5019531,8.34082031 Z" id="Shape"></path>
+                    <path d="M20.0341797,9.06835938 C20.4020182,9.06835938 20.7250977,8.99674479 21.003418,8.85351562 C21.2817383,8.71028646 21.4892578,8.50358073 21.6259766,8.23339844 L21.6259766,8.51660156 C21.6259766,8.66959635 21.6780599,8.78841146 21.7822266,8.87304688 C21.8863932,8.95768229 22.0100911,9 22.1533203,9 C22.2998047,9 22.4267578,8.95605469 22.5341797,8.86816406 C22.6416016,8.78027344 22.6953125,8.65657552 22.6953125,8.49707031 L22.6953125,4.63476563 C22.6953125,4.47200521 22.6432292,4.34586589 22.5390625,4.25634766 C22.4348958,4.16682943 22.3095703,4.12207031 22.1630859,4.12207031 C22.0166016,4.12207031 21.8904622,4.16764323 21.784668,4.25878906 C21.6788737,4.3499349 21.6259766,4.47526042 21.6259766,4.63476563 L21.6259766,6.8515625 C21.6194661,7.30078125 21.4892578,7.66129557 21.2353516,7.93310547 C20.9814453,8.20491536 20.6803385,8.34082031 20.3320312,8.34082031 C20.0325521,8.34082031 19.7924805,8.24397786 19.6118164,8.05029297 C19.4311523,7.85660807 19.3408203,7.57421875 19.3408203,7.203125 L19.3408203,4.63476563 C19.3408203,4.47200521 19.2879232,4.34586589 19.1821289,4.25634766 C19.0763346,4.16682943 18.9518229,4.12207031 18.8085938,4.12207031 C18.6621094,4.12207031 18.5359701,4.16682943 18.4301758,4.25634766 C18.3243815,4.34586589 18.2714844,4.47200521 18.2714844,4.63476563 L18.2714844,7.19335938 C18.2714844,7.51236979 18.3186849,7.79394531 18.4130859,8.03808594 C18.507487,8.28222656 18.6368815,8.47835286 18.8012695,8.62646484 C18.9656576,8.77457682 19.1520182,8.88525391 19.3603516,8.95849609 C19.5686849,9.03173828 19.7932943,9.06835938 20.0341797,9.06835938 Z" id="Path"></path>
+                    <path d="M26.2597656,9.06835938 C26.969401,9.06835938 27.5683594,8.85351563 28.0566406,8.42382813 C28.1640625,8.3359375 28.2177734,8.23014323 28.2177734,8.10644531 C28.2177734,8.00878906 28.1844076,7.92252604 28.1176758,7.84765625 C28.050944,7.77278646 27.9720052,7.73535156 27.8808594,7.73535156 C27.8190104,7.73535156 27.7571615,7.75651042 27.6953125,7.79882813 C27.1972656,8.15039062 26.7447917,8.32617188 26.3378906,8.32617188 C25.8984375,8.32291667 25.5476888,8.19840495 25.2856445,7.95263672 C25.0236003,7.70686849 24.8811849,7.32519531 24.8583984,6.80761719 L28.0615234,6.80761719 C28.1722005,6.80761719 28.2527669,6.76936849 28.3032227,6.69287109 C28.3536784,6.6163737 28.3789062,6.51953125 28.3789062,6.40234375 C28.3723958,6.08007812 28.3235677,5.78141276 28.2324219,5.50634766 C28.141276,5.23128255 28.0086263,4.98388672 27.8344727,4.76416016 C27.660319,4.54443359 27.4291992,4.37109375 27.1411133,4.24414062 C26.8530273,4.1171875 26.5218099,4.05371094 26.1474609,4.05371094 C25.7796224,4.05371094 25.4443359,4.12044271 25.1416016,4.25390625 C24.8388672,4.38736979 24.5898438,4.56803385 24.3945312,4.79589844 C24.1992188,5.02376302 24.0486654,5.28499349 23.9428711,5.57958984 C23.8370768,5.8741862 23.7841797,6.1858724 23.7841797,6.51464844 C23.7841797,7.30240885 24.0112305,7.92333984 24.465332,8.37744141 C24.9194336,8.83154297 25.5175781,9.06184896 26.2597656,9.06835938 Z M24.8681641,6.16308594 C24.8844401,5.98730469 24.9226888,5.81966146 24.9829102,5.66015625 C25.0431315,5.50065104 25.1253255,5.35416667 25.2294922,5.22070312 C25.3336589,5.08723958 25.46875,4.97981771 25.6347656,4.8984375 C25.8007812,4.81705729 25.9863281,4.77636719 26.1914062,4.77636719 C26.5885417,4.77636719 26.891276,4.90820312 27.0996094,5.171875 C27.3079427,5.43554688 27.4267578,5.76595052 27.4560547,6.16308594 L24.8681641,6.16308594 Z" id="Shape"></path>
+                    <path d="M29.9462891,9 C30.0927734,9 30.2189128,8.95442708 30.324707,8.86328125 C30.4305013,8.77213542 30.4833984,8.6468099 30.4833984,8.48730469 L30.4833984,6.265625 C30.4866536,5.81966146 30.6136068,5.45996094 30.8642578,5.18652344 C31.1149089,4.91308594 31.414388,4.77636719 31.7626953,4.77636719 C32.0621745,4.77636719 32.3046875,4.87402344 32.4902344,5.06933594 C32.6757812,5.26464844 32.7685547,5.54622396 32.7685547,5.9140625 L32.7685547,8.48730469 C32.7685547,8.6500651 32.820638,8.77620443 32.9248047,8.86572266 C33.0289714,8.95524089 33.1526693,9 33.2958984,9 C33.4423828,9 33.5693359,8.95524089 33.6767578,8.86572266 C33.7841797,8.77620443 33.8378906,8.6500651 33.8378906,8.48730469 L33.8378906,5.92382813 C33.8378906,5.60481771 33.7906901,5.32324219 33.6962891,5.07910156 C33.601888,4.83496094 33.4716797,4.63964844 33.3056641,4.49316406 C33.1396484,4.34667969 32.9516602,4.23681641 32.7416992,4.16357422 C32.5317383,4.09033203 32.3063151,4.05371094 32.0654297,4.05371094 C31.6943359,4.05371094 31.3720703,4.12451172 31.0986328,4.26611328 C30.8251953,4.40771484 30.6201172,4.61360677 30.4833984,4.88378906 L30.4833984,4.61035156 C30.4833984,4.45735677 30.4313151,4.33772786 30.3271484,4.25146484 C30.2229818,4.16520182 30.0976562,4.12207031 29.9511719,4.12207031 C29.8046875,4.12207031 29.6777344,4.16682943 29.5703125,4.25634766 C29.4628906,4.34586589 29.4091797,4.46875 29.4091797,4.625 L29.4091797,8.48730469 C29.4091797,8.6500651 29.4620768,8.77620443 29.5678711,8.86572266 C29.6736654,8.95524089 29.7998047,9 29.9462891,9 Z" id="Path"></path>
+                    <path d="M37.265625,9.06835938 C37.9264323,9.06835938 38.4244792,8.9235026 38.7597656,8.63378906 C38.8769531,8.5328776 38.9355469,8.42220052 38.9355469,8.30175781 C38.9355469,8.27246094 38.9322917,8.24397786 38.9257812,8.21630859 C38.9192708,8.18863932 38.9086914,8.16259766 38.894043,8.13818359 C38.8793945,8.11376953 38.8623047,8.09179688 38.8427734,8.07226562 C38.8232422,8.05273438 38.8020833,8.03645833 38.7792969,8.0234375 C38.7565104,8.01041667 38.7312826,7.99983724 38.7036133,7.99169922 C38.675944,7.9835612 38.6474609,7.97949219 38.6181641,7.97949219 C38.5791016,7.97949219 38.5392253,7.9860026 38.4985352,7.99902344 C38.4578451,8.01204427 38.4179688,8.03320313 38.3789062,8.0625 C38.1217448,8.24804688 37.7783203,8.34082031 37.3486328,8.34082031 C36.9091797,8.34082031 36.5649414,8.17643229 36.315918,7.84765625 C36.0668945,7.51888021 35.9423828,7.0875651 35.9423828,6.55371094 C35.9423828,6.02311198 36.0709635,5.59342448 36.328125,5.26464844 C36.5852865,4.9358724 36.9352214,4.77148437 37.3779297,4.77148437 C37.6936849,4.77148437 37.9947917,4.84960938 38.28125,5.00585938 C38.3170573,5.02539062 38.3536784,5.04003906 38.3911133,5.04980469 C38.4285482,5.05957031 38.4651693,5.06445312 38.5009766,5.06445312 C38.5400391,5.06445312 38.577474,5.05957031 38.6132812,5.04980469 C38.6490885,5.04003906 38.6808268,5.02539062 38.7084961,5.00585938 C38.7361654,4.98632813 38.7605794,4.96354167 38.7817383,4.9375 C38.8028971,4.91145833 38.8191732,4.88297526 38.8305664,4.85205078 C38.8419596,4.8211263 38.8476562,4.78776042 38.8476562,4.75195312 C38.8476562,4.71614583 38.840332,4.68033854 38.8256836,4.64453125 C38.8110352,4.60872396 38.7882487,4.57210286 38.7573242,4.53466797 C38.7263997,4.49723307 38.688151,4.46223958 38.6425781,4.4296875 C38.2845052,4.17903646 37.8336589,4.05371094 37.2900391,4.05371094 C36.9319661,4.05371094 36.6007487,4.12044271 36.2963867,4.25390625 C35.9920247,4.38736979 35.7364909,4.56803385 35.5297852,4.79589844 C35.3230794,5.02376302 35.1619466,5.2898763 35.0463867,5.59423828 C34.9308268,5.89860026 34.8730469,6.22005208 34.8730469,6.55859375 C34.8730469,7.02083333 34.9682617,7.44075521 35.1586914,7.81835938 C35.3491211,8.19596354 35.6282552,8.49869792 35.9960938,8.7265625 C36.3639323,8.95442708 36.7871094,9.06835938 37.265625,9.06835938 Z" id="Path"></path>
+                    <path d="M42.0654297,9.06835938 C42.7750651,9.06835938 43.3740234,8.85351563 43.8623047,8.42382813 C43.9697266,8.3359375 44.0234375,8.23014323 44.0234375,8.10644531 C44.0234375,8.00878906 43.9900716,7.92252604 43.9233398,7.84765625 C43.8566081,7.77278646 43.7776693,7.73535156 43.6865234,7.73535156 C43.6246745,7.73535156 43.5628255,7.75651042 43.5009766,7.79882813 C43.0029297,8.15039062 42.5504557,8.32617188 42.1435547,8.32617188 C41.7041016,8.32291667 41.3533529,8.19840495 41.0913086,7.95263672 C40.8292643,7.70686849 40.686849,7.32519531 40.6640625,6.80761719 L43.8671875,6.80761719 C43.9778646,6.80761719 44.058431,6.76936849 44.1088867,6.69287109 C44.1593424,6.6163737 44.1845703,6.51953125 44.1845703,6.40234375 C44.1780599,6.08007812 44.1292318,5.78141276 44.0380859,5.50634766 C43.9469401,5.23128255 43.8142904,4.98388672 43.6401367,4.76416016 C43.4659831,4.54443359 43.2348633,4.37109375 42.9467773,4.24414062 C42.6586914,4.1171875 42.327474,4.05371094 41.953125,4.05371094 C41.5852865,4.05371094 41.25,4.12044271 40.9472656,4.25390625 C40.6445312,4.38736979 40.3955078,4.56803385 40.2001953,4.79589844 C40.0048828,5.02376302 39.8543294,5.28499349 39.7485352,5.57958984 C39.6427409,5.8741862 39.5898438,6.1858724 39.5898438,6.51464844 C39.5898438,7.30240885 39.8168945,7.92333984 40.2709961,8.37744141 C40.7250977,8.83154297 41.3232422,9.06184896 42.0654297,9.06835938 Z M40.6738281,6.16308594 C40.6901042,5.98730469 40.7283529,5.81966146 40.7885742,5.66015625 C40.8487956,5.50065104 40.9309896,5.35416667 41.0351562,5.22070312 C41.1393229,5.08723958 41.2744141,4.97981771 41.4404297,4.8984375 C41.6064453,4.81705729 41.7919922,4.77636719 41.9970703,4.77636719 C42.3942057,4.77636719 42.6969401,4.90820312 42.9052734,5.171875 C43.1136068,5.43554688 43.2324219,5.76595052 43.2617188,6.16308594 L40.6738281,6.16308594 Z" id="Shape"></path>
+                    <path d="M45.9863281,9.00488281 C46.1523438,9.00488281 46.2947591,8.95361328 46.4135742,8.85107422 C46.5323893,8.74853516 46.5917969,8.60449219 46.5917969,8.41894531 L46.5917969,6.11914062 L48.0126953,6.11914062 C48.8557943,6.11914062 49.5035807,5.95719401 49.9560547,5.63330078 C50.4085286,5.30940755 50.6347656,4.78125 50.6347656,4.04882813 C50.6347656,3.32617188 50.4191081,2.80208333 49.987793,2.4765625 C49.5564779,2.15104167 48.9339193,1.98828125 48.1201172,1.98828125 L45.9960938,1.98828125 C45.8268229,1.98828125 45.6819661,2.0476888 45.5615234,2.16650391 C45.4410807,2.28531901 45.3808594,2.42773438 45.3808594,2.59375 L45.3808594,8.41894531 C45.3808594,8.60449219 45.4410807,8.74853516 45.5615234,8.85107422 C45.6819661,8.95361328 45.8235677,9.00488281 45.9863281,9.00488281 Z M46.5917969,5.19628906 L46.5917969,2.95019531 L48.0126953,2.95019531 C48.507487,2.95019531 48.8696289,3.02587891 49.0991211,3.17724609 C49.3286133,3.32861328 49.4433594,3.61914063 49.4433594,4.04882813 C49.4433594,4.48502604 49.3253581,4.78531901 49.0893555,4.94970703 C48.8533529,5.11409505 48.4830729,5.19628906 47.9785156,5.19628906 L46.5917969,5.19628906 Z" id="Shape"></path>
+                    <path d="M53.6962891,8.34082031 C53.2438151,8.34082031 52.8898112,8.18457031 52.6342773,7.87207031 C52.3787435,7.55957031 52.2509766,7.125 52.2509766,6.56835937 C52.2509766,6.00520833 52.3787435,5.56575521 52.6342773,5.25 C52.8898112,4.93424479 53.2438151,4.77636719 53.6962891,4.77636719 C54.148763,4.77636719 54.5027669,4.93505859 54.7583008,5.25244141 C55.0138346,5.56982422 55.1416016,6.00846354 55.1416016,6.56835937 C55.1416016,7.125 55.0146484,7.55957031 54.7607422,7.87207031 C54.5068359,8.18457031 54.1520182,8.34082031 53.6962891,8.34082031 Z M53.6962891,9.06835938 C54.0283203,9.06835938 54.3326823,9.02197266 54.609375,8.92919922 C54.8860677,8.83642578 55.1212565,8.71110026 55.3149414,8.55322266 C55.5086263,8.39534505 55.6730143,8.20898438 55.8081055,7.99414063 C55.9431966,7.77929688 56.0424805,7.55224609 56.105957,7.31298828 C56.1694336,7.07373047 56.2011719,6.82552083 56.2011719,6.56835938 C56.2011719,6.22981771 56.1474609,5.91080729 56.0400391,5.61132813 C55.9326172,5.31184896 55.777181,5.04492188 55.5737305,4.81054688 C55.3702799,4.57617187 55.1074219,4.3914388 54.7851562,4.25634766 C54.4628906,4.12125651 54.0999349,4.05371094 53.6962891,4.05371094 C53.289388,4.05371094 52.9231771,4.12288411 52.5976562,4.26123047 C52.2721354,4.39957682 52.0092773,4.5867513 51.809082,4.82275391 C51.6088867,5.05875651 51.4558919,5.32568359 51.3500977,5.62353516 C51.2443034,5.92138672 51.1914062,6.23632813 51.1914062,6.56835938 C51.1914062,6.89388021 51.2434896,7.2039388 51.3476562,7.49853516 C51.4518229,7.79313151 51.6040039,8.05843099 51.8041992,8.29443359 C52.0043945,8.5304362 52.2672526,8.71842448 52.5927734,8.85839844 C52.9182943,8.9983724 53.2861328,9.06835938 53.6962891,9.06835938 Z" id="Shape"></path>
+                    <path d="M59.4824219,8.34082031 C59.0299479,8.34082031 58.675944,8.18457031 58.4204102,7.87207031 C58.1648763,7.55957031 58.0371094,7.125 58.0371094,6.56835937 C58.0371094,6.00520833 58.1648763,5.56575521 58.4204102,5.25 C58.675944,4.93424479 59.0299479,4.77636719 59.4824219,4.77636719 C59.9348958,4.77636719 60.2888997,4.93505859 60.5444336,5.25244141 C60.7999674,5.56982422 60.9277344,6.00846354 60.9277344,6.56835937 C60.9277344,7.125 60.8007812,7.55957031 60.546875,7.87207031 C60.2929688,8.18457031 59.938151,8.34082031 59.4824219,8.34082031 Z M59.4824219,9.06835938 C59.8144531,9.06835938 60.1188151,9.02197266 60.3955078,8.92919922 C60.6722005,8.83642578 60.9073893,8.71110026 61.1010742,8.55322266 C61.2947591,8.39534505 61.4591471,8.20898438 61.5942383,7.99414063 C61.7293294,7.77929688 61.8286133,7.55224609 61.8920898,7.31298828 C61.9555664,7.07373047 61.9873047,6.82552083 61.9873047,6.56835938 C61.9873047,6.22981771 61.9335938,5.91080729 61.8261719,5.61132813 C61.71875,5.31184896 61.5633138,5.04492188 61.3598633,4.81054688 C61.1564128,4.57617187 60.8935547,4.3914388 60.5712891,4.25634766 C60.2490234,4.12125651 59.8860677,4.05371094 59.4824219,4.05371094 C59.0755208,4.05371094 58.7093099,4.12288411 58.3837891,4.26123047 C58.0582682,4.39957682 57.7954102,4.5867513 57.5952148,4.82275391 C57.3950195,5.05875651 57.2420247,5.32568359 57.1362305,5.62353516 C57.0304362,5.92138672 56.9775391,6.23632813 56.9775391,6.56835938 C56.9775391,6.89388021 57.0296224,7.2039388 57.1337891,7.49853516 C57.2379557,7.79313151 57.3901367,8.05843099 57.590332,8.29443359 C57.7905273,8.5304362 58.0533854,8.71842448 58.3789062,8.85839844 C58.7044271,8.9983724 59.0722656,9.06835938 59.4824219,9.06835938 Z" id="Shape"></path>
+                    <path d="M63.6083984,9 C63.7548828,9 63.8826497,8.95198568 63.9916992,8.85595703 C64.1007487,8.75992839 64.1552734,8.63053385 64.1552734,8.46777344 L64.1552734,2.52539062 C64.1552734,2.36263021 64.1015625,2.23323568 63.9941406,2.13720703 C63.8867188,2.04117839 63.7613932,1.99316406 63.6181641,1.99316406 C63.4716797,1.99316406 63.3455404,2.04036458 63.2397461,2.13476562 C63.1339518,2.22916667 63.0810547,2.359375 63.0810547,2.52539062 L63.0810547,8.46777344 C63.0810547,8.63704427 63.1323242,8.76806641 63.2348633,8.86083984 C63.3374023,8.95361328 63.4619141,9 63.6083984,9 Z" id="Path"></path>
+                    <path d="M66.0791016,9 C66.2255859,9 66.3533529,8.95198568 66.4624023,8.85595703 C66.5714518,8.75992839 66.6259766,8.63053385 66.6259766,8.46777344 L66.6259766,4.65429688 C66.6259766,4.48828125 66.5730794,4.35807292 66.4672852,4.26367188 C66.3614909,4.16927083 66.2369792,4.12207031 66.09375,4.12207031 C65.9472656,4.12207031 65.8203125,4.16927083 65.7128906,4.26367188 C65.6054688,4.35807292 65.5517578,4.48828125 65.5517578,4.65429688 L65.5517578,8.46777344 C65.5517578,8.63704427 65.6030273,8.76806641 65.7055664,8.86083984 C65.8081055,8.95361328 65.9326172,9 66.0791016,9 Z M66.0839844,3.1015625 C66.2792969,3.1015625 66.4363607,3.04541016 66.5551758,2.93310547 C66.6739909,2.82080078 66.7333984,2.671875 66.7333984,2.48632812 C66.7333984,2.30403646 66.6748047,2.15592448 66.5576172,2.04199219 C66.4404297,1.9280599 66.2841797,1.87109375 66.0888672,1.87109375 C65.8935547,1.87109375 65.7364909,1.9280599 65.6176758,2.04199219 C65.4988607,2.15592448 65.4394531,2.30403646 65.4394531,2.48632812 C65.4394531,2.671875 65.4988607,2.82080078 65.6176758,2.93310547 C65.7364909,3.04541016 65.8919271,3.1015625 66.0839844,3.1015625 Z" id="Shape"></path>
+                    <path d="M68.4765625,9 C68.6230469,9 68.7491862,8.95442708 68.8549805,8.86328125 C68.9607747,8.77213542 69.0136719,8.6468099 69.0136719,8.48730469 L69.0136719,6.265625 C69.0169271,5.81966146 69.1438802,5.45996094 69.3945312,5.18652344 C69.6451823,4.91308594 69.9446615,4.77636719 70.2929688,4.77636719 C70.5924479,4.77636719 70.8349609,4.87402344 71.0205078,5.06933594 C71.2060547,5.26464844 71.2988281,5.54622396 71.2988281,5.9140625 L71.2988281,8.48730469 C71.2988281,8.6500651 71.3509115,8.77620443 71.4550781,8.86572266 C71.5592448,8.95524089 71.6829427,9 71.8261719,9 C71.9726562,9 72.0996094,8.95524089 72.2070312,8.86572266 C72.3144531,8.77620443 72.3681641,8.6500651 72.3681641,8.48730469 L72.3681641,5.92382813 C72.3681641,5.60481771 72.3209635,5.32324219 72.2265625,5.07910156 C72.1321615,4.83496094 72.0019531,4.63964844 71.8359375,4.49316406 C71.6699219,4.34667969 71.4819336,4.23681641 71.2719727,4.16357422 C71.0620117,4.09033203 70.8365885,4.05371094 70.5957031,4.05371094 C70.2246094,4.05371094 69.9023438,4.12451172 69.6289062,4.26611328 C69.3554688,4.40771484 69.1503906,4.61360677 69.0136719,4.88378906 L69.0136719,4.61035156 C69.0136719,4.45735677 68.9615885,4.33772786 68.8574219,4.25146484 C68.7532552,4.16520182 68.6279297,4.12207031 68.4814453,4.12207031 C68.3349609,4.12207031 68.2080078,4.16682943 68.1005859,4.25634766 C67.9931641,4.34586589 67.9394531,4.46875 67.9394531,4.625 L67.9394531,8.48730469 C67.9394531,8.6500651 67.9923503,8.77620443 68.0981445,8.86572266 C68.2039388,8.95524089 68.3300781,9 68.4765625,9 Z" id="Path"></path>
+                    <path d="M75.7958984,11.1337891 C76.5348307,11.1337891 77.1232096,10.9498698 77.5610352,10.5820312 C77.9988607,10.2141927 78.2177734,9.66080729 78.2177734,8.921875 L78.2177734,4.62988281 C78.2177734,4.4703776 78.1673177,4.34586589 78.0664062,4.25634766 C77.9654948,4.16682943 77.8483073,4.12207031 77.7148438,4.12207031 C77.5911458,4.12207031 77.4812826,4.15869141 77.3852539,4.23193359 C77.2892253,4.30517578 77.2330729,4.40690104 77.2167969,4.53710938 L77.2167969,4.86425781 C77.1061198,4.69173177 76.9913737,4.55013021 76.8725586,4.43945312 C76.7537435,4.32877604 76.5917969,4.23681641 76.3867188,4.16357422 C76.1816406,4.09033203 75.9407552,4.05371094 75.6640625,4.05371094 C75.2018229,4.05371094 74.7981771,4.16438802 74.453125,4.38574219 C74.1080729,4.60709635 73.8468424,4.90820313 73.6694336,5.2890625 C73.4920247,5.66992188 73.4033203,6.10449219 73.4033203,6.59277344 C73.4033203,7.3186849 73.610026,7.90136719 74.0234375,8.34082031 C74.436849,8.78027344 74.9934896,9 75.6933594,9 C76.3964844,9 76.8961589,8.7281901 77.1923828,8.18457031 L77.1923828,9 C77.1923828,9.44596354 77.0613607,9.79264323 76.7993164,10.0400391 C76.5372721,10.2874349 76.1767578,10.4111328 75.7177734,10.4111328 C75.5745443,10.4111328 75.4305013,10.3989258 75.2856445,10.3745117 C75.1407878,10.3500977 75.0292969,10.3256836 74.9511719,10.3012695 C74.8730469,10.2768555 74.7648112,10.2386068 74.6264648,10.1865234 C74.4881185,10.1344401 74.3994141,10.101888 74.3603516,10.0888672 C74.3440755,10.0823568 74.3269857,10.0766602 74.309082,10.0717773 C74.2911784,10.0668945 74.2749023,10.0636393 74.2602539,10.0620117 C74.2456055,10.0603841 74.2301432,10.0595703 74.2138672,10.0595703 C74.148763,10.0595703 74.0901693,10.0766602 74.0380859,10.1108398 C73.9860026,10.1450195 73.9461263,10.1889648 73.918457,10.2426758 C73.8907878,10.2963867 73.8769531,10.3533529 73.8769531,10.4135742 C73.8769531,10.4737956 73.8924154,10.5323893 73.9233398,10.5893555 C73.9542643,10.6463216 74.000651,10.6943359 74.0625,10.7333984 C74.2415365,10.8440755 74.4864909,10.9384766 74.7973633,11.0166016 C75.1082357,11.0947266 75.4410807,11.1337891 75.7958984,11.1337891 Z M75.8544922,8.30175781 C75.4638672,8.29199219 75.1359049,8.1414388 74.8706055,7.85009766 C74.605306,7.55875651 74.4726562,7.12988281 74.4726562,6.56347656 C74.4726562,6.31933594 74.4995117,6.09309896 74.5532227,5.88476562 C74.6069336,5.67643229 74.6875,5.48763021 74.7949219,5.31835938 C74.9023438,5.14908854 75.0480143,5.0164388 75.2319336,4.92041016 C75.4158529,4.82438151 75.6282552,4.77636719 75.8691406,4.77636719 C76.110026,4.77636719 76.3175456,4.8219401 76.4916992,4.91308594 C76.6658529,5.00423177 76.8025716,5.1336263 76.9018555,5.30126953 C77.0011393,5.46891276 77.0735677,5.65690104 77.1191406,5.86523438 C77.1647135,6.07356771 77.1875,6.30957031 77.1875,6.57324219 C77.1842448,7.15592448 77.0605469,7.58886719 76.8164062,7.87207031 C76.5722656,8.15527344 76.2516276,8.2985026 75.8544922,8.30175781 Z" id="Shape"></path>
+                </g>
+                <path d="M26,37.5000137 C26,33.8656019 30.377872,27.4510713 39.0819672,27.4510713 C47.7860624,27.4510713 111.941046,27.4510713 121.311475,27.4510713 C127.558428,27.4510713 133.735421,25.1340521 139.842454,20.5000137 C144.904298,25.1340521 151.186321,27.4510713 158.688525,27.4510713 C169.94183,27.4510713 229.704918,27.4510713 240.918033,27.4510713 C252.131148,27.4510713 254,34.4032367 254,37.5000137" id="Path" stroke="#8E8E8E" stroke-linecap="round" stroke-linejoin="round"></path>
+                <g id="top" transform="translate(124, 8)">
+                    <rect id="Rectangle" fill="#E3EDE6" x="0" y="0" width="32" height="12" rx="3"></rect>
+                    <path d="M29,0 C30.6568542,0 32,1.34314575 32,3 L32,9 C32,10.6568542 30.6568542,12 29,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L29,0 Z M29,1 L3,1 L2.79551163,1.01032578 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L29,11 C30.1045695,11 31,10.1045695 31,9 L31,3 C31,1.8954305 30.1045695,1 29,1 Z" id="Rectangle" fill="#8CA595" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="Score">
+                <g id="dash-box-@green">
+                    <path d="M8,0 L272,0 C276.418278,0 280,3.581722 280,8 L280,62 C280,66.418278 276.418278,70 272,70 L8,70 C3.581722,70 0,66.418278 0,62 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill="#F0FDF8"></path>
+                    <path d="M10.9746434,68.9999999 L10.974,69.9999999 L8.974,69.9999999 L8.97464335,68.9999999 L10.9746434,68.9999999 Z M14.9746436,68.9999999 L14.974,69.9999999 L12.974,69.9999999 L12.9746433,68.9999999 L14.9746436,68.9999999 Z M18.9746435,68.9999999 L18.974,69.9999999 L16.974,69.9999999 L16.9746433,68.9999999 L18.9746435,68.9999999 Z M22.9746434,68.9999999 L22.974,69.9999999 L20.974,69.9999999 L20.9746433,68.9999999 L22.9746434,68.9999999 Z M26.9746436,68.9999999 L26.974,69.9999999 L24.974,69.9999999 L24.9746433,68.9999999 L26.9746436,68.9999999 Z M30.9746435,68.9999999 L30.974,69.9999999 L28.974,69.9999999 L28.9746433,68.9999999 L30.9746435,68.9999999 Z M34.9746434,68.9999999 L34.974,69.9999999 L32.974,69.9999999 L32.9746433,68.9999999 L34.9746434,68.9999999 Z M38.9746434,68.9999999 L38.974,69.9999999 L36.974,69.9999999 L36.9746433,68.9999999 L38.9746434,68.9999999 Z M42.9746435,68.9999999 L42.974,69.9999999 L40.974,69.9999999 L40.9746433,68.9999999 L42.9746435,68.9999999 Z M46.9746435,68.9999999 L46.974,69.9999999 L44.974,69.9999999 L44.9746433,68.9999999 L46.9746435,68.9999999 Z M50.9746434,68.9999999 L50.974,69.9999999 L48.974,69.9999999 L48.9746433,68.9999999 L50.9746434,68.9999999 Z M54.9746436,68.9999999 L54.974,69.9999999 L52.974,69.9999999 L52.9746433,68.9999999 L54.9746436,68.9999999 Z M58.9746435,68.9999999 L58.974,69.9999999 L56.974,69.9999999 L56.9746433,68.9999999 L58.9746435,68.9999999 Z M62.9746434,68.9999999 L62.974,69.9999999 L60.974,69.9999999 L60.9746433,68.9999999 L62.9746434,68.9999999 Z M66.9746436,68.9999999 L66.974,69.9999999 L64.974,69.9999999 L64.9746433,68.9999999 L66.9746436,68.9999999 Z M70.9746435,68.9999999 L70.974,69.9999999 L68.974,69.9999999 L68.9746433,68.9999999 L70.9746435,68.9999999 Z M74.9746434,68.9999999 L74.974,69.9999999 L72.974,69.9999999 L72.9746433,68.9999999 L74.9746434,68.9999999 Z M78.9746434,68.9999999 L78.974,69.9999999 L76.974,69.9999999 L76.9746433,68.9999999 L78.9746434,68.9999999 Z M82.9746435,68.9999999 L82.974,69.9999999 L80.974,69.9999999 L80.9746433,68.9999999 L82.9746435,68.9999999 Z M86.9746435,68.9999999 L86.974,69.9999999 L84.974,69.9999999 L84.9746433,68.9999999 L86.9746435,68.9999999 Z M90.9746434,68.9999999 L90.974,69.9999999 L88.974,69.9999999 L88.9746433,68.9999999 L90.9746434,68.9999999 Z M94.9746436,68.9999999 L94.974,69.9999999 L92.974,69.9999999 L92.9746433,68.9999999 L94.9746436,68.9999999 Z M98.9746435,68.9999999 L98.974,69.9999999 L96.974,69.9999999 L96.9746433,68.9999999 L98.9746435,68.9999999 Z M102.974643,68.9999999 L102.974,69.9999999 L100.974,69.9999999 L100.974643,68.9999999 L102.974643,68.9999999 Z M106.974644,68.9999999 L106.974,69.9999999 L104.974,69.9999999 L104.974643,68.9999999 L106.974644,68.9999999 Z M110.974644,68.9999999 L110.974,69.9999999 L108.974,69.9999999 L108.974643,68.9999999 L110.974644,68.9999999 Z M114.974643,68.9999999 L114.974,69.9999999 L112.974,69.9999999 L112.974643,68.9999999 L114.974643,68.9999999 Z M118.974643,68.9999999 L118.974,69.9999999 L116.974,69.9999999 L116.974643,68.9999999 L118.974643,68.9999999 Z M122.974644,68.9999999 L122.974,69.9999999 L120.974,69.9999999 L120.974643,68.9999999 L122.974644,68.9999999 Z M126.974643,68.9999999 L126.974,69.9999999 L124.974,69.9999999 L124.974643,68.9999999 L126.974643,68.9999999 Z M130.974643,68.9999999 L130.974,69.9999999 L128.974,69.9999999 L128.974643,68.9999999 L130.974643,68.9999999 Z M134.974644,68.9999999 L134.974,69.9999999 L132.974,69.9999999 L132.974643,68.9999999 L134.974644,68.9999999 Z M138.974644,68.9999999 L138.974,69.9999999 L136.974,69.9999999 L136.974643,68.9999999 L138.974644,68.9999999 Z M142.974643,68.9999999 L142.974,69.9999999 L140.974,69.9999999 L140.974643,68.9999999 L142.974643,68.9999999 Z M146.974643,68.9999999 L146.974,69.9999999 L144.974,69.9999999 L144.974643,68.9999999 L146.974643,68.9999999 Z M150.974643,68.9999999 L150.974,69.9999999 L148.974,69.9999999 L148.974643,68.9999999 L150.974643,68.9999999 Z M154.974643,68.9999999 L154.974,69.9999999 L152.974,69.9999999 L152.974643,68.9999999 L154.974643,68.9999999 Z M158.974643,68.9999999 L158.974,69.9999999 L156.974,69.9999999 L156.974643,68.9999999 L158.974643,68.9999999 Z M162.974643,68.9999999 L162.974,69.9999999 L160.974,69.9999999 L160.974643,68.9999999 L162.974643,68.9999999 Z M166.974643,68.9999999 L166.974,69.9999999 L164.974,69.9999999 L164.974643,68.9999999 L166.974643,68.9999999 Z M170.974643,68.9999999 L170.974,69.9999999 L168.974,69.9999999 L168.974643,68.9999999 L170.974643,68.9999999 Z M174.974643,68.9999999 L174.974,69.9999999 L172.974,69.9999999 L172.974643,68.9999999 L174.974643,68.9999999 Z M178.974643,68.9999999 L178.974,69.9999999 L176.974,69.9999999 L176.974643,68.9999999 L178.974643,68.9999999 Z M182.974643,68.9999999 L182.974,69.9999999 L180.974,69.9999999 L180.974643,68.9999999 L182.974643,68.9999999 Z M186.974643,68.9999999 L186.974,69.9999999 L184.974,69.9999999 L184.974643,68.9999999 L186.974643,68.9999999 Z M190.974643,68.9999999 L190.974,69.9999999 L188.974,69.9999999 L188.974643,68.9999999 L190.974643,68.9999999 Z M194.974643,68.9999999 L194.974,69.9999999 L192.974,69.9999999 L192.974643,68.9999999 L194.974643,68.9999999 Z M198.974643,68.9999999 L198.974,69.9999999 L196.974,69.9999999 L196.974643,68.9999999 L198.974643,68.9999999 Z M202.974643,68.9999999 L202.974,69.9999999 L200.974,69.9999999 L200.974643,68.9999999 L202.974643,68.9999999 Z M206.974643,68.9999999 L206.974,69.9999999 L204.974,69.9999999 L204.974643,68.9999999 L206.974643,68.9999999 Z M210.974643,68.9999999 L210.974,69.9999999 L208.974,69.9999999 L208.974643,68.9999999 L210.974643,68.9999999 Z M214.974643,68.9999999 L214.974,69.9999999 L212.974,69.9999999 L212.974643,68.9999999 L214.974643,68.9999999 Z M218.974643,68.9999999 L218.974,69.9999999 L216.974,69.9999999 L216.974643,68.9999999 L218.974643,68.9999999 Z M222.974643,68.9999999 L222.974,69.9999999 L220.974,69.9999999 L220.974643,68.9999999 L222.974643,68.9999999 Z M226.974643,68.9999999 L226.974,69.9999999 L224.974,69.9999999 L224.974643,68.9999999 L226.974643,68.9999999 Z M230.974643,68.9999999 L230.974,69.9999999 L228.974,69.9999999 L228.974643,68.9999999 L230.974643,68.9999999 Z M234.974643,68.9999999 L234.974,69.9999999 L232.974,69.9999999 L232.974643,68.9999999 L234.974643,68.9999999 Z M238.974643,68.9999999 L238.974,69.9999999 L236.974,69.9999999 L236.974643,68.9999999 L238.974643,68.9999999 Z M242.974643,68.9999999 L242.974,69.9999999 L240.974,69.9999999 L240.974643,68.9999999 L242.974643,68.9999999 Z M246.974643,68.9999999 L246.974,69.9999999 L244.974,69.9999999 L244.974643,68.9999999 L246.974643,68.9999999 Z M250.974643,68.9999999 L250.974,69.9999999 L248.974,69.9999999 L248.974643,68.9999999 L250.974643,68.9999999 Z M254.974643,68.9999999 L254.974,69.9999999 L252.974,69.9999999 L252.974643,68.9999999 L254.974643,68.9999999 Z M258.974643,68.9999999 L258.974,69.9999999 L256.974,69.9999999 L256.974643,68.9999999 L258.974643,68.9999999 Z M262.974643,68.9999999 L262.974,69.9999999 L260.974,69.9999999 L260.974643,68.9999999 L262.974643,68.9999999 Z M266.974643,68.9999999 L266.974,69.9999999 L264.974,69.9999999 L264.974643,68.9999999 L266.974643,68.9999999 Z M270.974643,68.9999999 L270.974,69.9999999 L268.974,69.9999999 L268.974643,68.9999999 L270.974643,68.9999999 Z M274.489743,68.544574 L274.845614,69.4791091 C274.230519,69.7132701 273.579078,69.8737323 272.902082,69.9497049 L272.79022,68.9559811 C273.375654,68.8903126 273.945502,68.7517531 274.489743,68.544574 Z M5.41029967,68.5056361 C5.95157753,68.7213183 6.51934132,68.8686603 7.10343083,68.9432289 L6.97697115,69.9352007 C6.30134705,69.8489616 5.65205616,69.6784493 5.03997173,69.4345371 L5.41029967,68.5056361 Z M277.388302,66.4686843 L278.157467,67.1077344 C277.730317,67.6220994 277.239943,68.0820804 276.698265,68.4757556 L276.11069,67.6665857 C276.584521,67.3221774 277.013992,66.9194515 277.388302,66.4686843 Z M2.56157145,66.4076246 C2.93082185,66.8625901 3.35581002,67.2701375 3.82581503,67.6198877 L3.22929764,68.422488 C2.69189028,68.0226419 2.20638657,67.5569385 1.78473163,67.0373231 L2.56157145,66.4076246 Z M278.876065,63.3192015 L279.858348,63.506607 C279.73129,64.1733384 279.521483,64.8107286 279.240021,65.407685 L278.335201,64.9818892 C278.584106,64.4541311 278.766097,63.896352 278.876065,63.3192015 Z M1.10775661,63.2312252 C1.21033515,63.8099081 1.38509885,64.3698753 1.62708778,64.9005768 L0.717340098,65.3157385 C0.443469481,64.7151789 0.241701076,64.07482 0.123099669,63.4057265 L1.10775661,63.2312252 Z M280,59.512 L280,61.512 L279,61.5126783 L279,59.5126783 L280,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M280,55.512 L280,57.512 L279,57.5126783 L279,55.5126783 L280,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M280,51.512 L280,53.512 L279,53.5126783 L279,51.5126783 L280,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M280,47.512 L280,49.512 L279,49.5126783 L279,47.5126783 L280,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M280,43.512 L280,45.512 L279,45.5126783 L279,43.5126783 L280,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M280,39.512 L280,41.512 L279,41.5126783 L279,39.5126783 L280,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M280,35.512 L280,37.512 L279,37.5126783 L279,35.5126783 L280,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M280,31.512 L280,33.512 L279,33.5126783 L279,31.5126783 L280,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M280,27.512 L280,29.512 L279,29.5126783 L279,27.5126783 L280,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M280,23.512 L280,25.512 L279,25.5126783 L279,23.5126783 L280,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M280,19.512 L280,21.512 L279,21.5126783 L279,19.5126783 L280,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M280,15.512 L280,17.512 L279,17.5126783 L279,15.5126783 L280,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M280,11.512 L280,13.512 L279,13.5126783 L279,11.5126783 L280,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M280,8 L280,9.512 L279,9.51268141 L278.999996,7.99230721 C278.999843,7.84931676 278.995414,7.70690426 278.986748,7.5651841 L279.984907,7.50453602 C279.99492,7.6684065 280,7.83361003 280,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M278.669225,5.86656128 C278.490852,5.30842466 278.242774,4.77610859 277.93156,4.28094453 L278.777667,3.74792999 C279.130817,4.30965578 279.416068,4.91838179 279.621909,5.56259574 L278.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M276.801538,2.90632361 C276.375276,2.5043409 275.900412,2.15691661 275.388372,1.8730526 L275.873899,0.998830409 C276.461218,1.32450134 277.00324,1.72192978 277.488127,2.17927796 L276.801538,2.90632361 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M272,0 C272.685315,0 273.350503,0.0861719866 273.985321,0.248271715 L273.737382,1.21704772 C273.174749,1.07342708 272.592644,1 272,1 L272,0 Z M10,0 L10,1 L8.00000022,1 L7.59669465,1.01139518 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M198,0 L198,1 L196,1 L196,0 L198,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z" id="dash" fill="#1CBB8B" fill-rule="nonzero"></path>
+                </g>
+                <g id="linear-classifier" transform="translate(156, 48)" fill="#12BE8B" fill-rule="nonzero">
+                    <path d="M1.48828125,10 C1.6640625,10 1.81738281,9.94238281 1.94824219,9.82714844 C2.07910156,9.71191406 2.14453125,9.55664063 2.14453125,9.36132812 L2.14453125,2.23046875 C2.14453125,2.03515625 2.08007812,1.87988281 1.95117188,1.76464844 C1.82226562,1.64941406 1.671875,1.59179688 1.5,1.59179688 C1.32421875,1.59179688 1.17285156,1.6484375 1.04589844,1.76171875 C0.918945312,1.875 0.85546875,2.03125 0.85546875,2.23046875 L0.85546875,9.36132812 C0.85546875,9.56445313 0.916992188,9.72167969 1.04003906,9.83300781 C1.16308594,9.94433594 1.3125,10 1.48828125,10 Z" id="Path"></path>
+                    <path d="M4.453125,10 C4.62890625,10 4.78222656,9.94238281 4.91308594,9.82714844 C5.04394531,9.71191406 5.109375,9.55664063 5.109375,9.36132812 L5.109375,4.78515625 C5.109375,4.5859375 5.04589844,4.4296875 4.91894531,4.31640625 C4.79199219,4.203125 4.64257812,4.14648438 4.47070313,4.14648438 C4.29492188,4.14648438 4.14257812,4.203125 4.01367188,4.31640625 C3.88476562,4.4296875 3.8203125,4.5859375 3.8203125,4.78515625 L3.8203125,9.36132812 C3.8203125,9.56445312 3.88183594,9.72167969 4.00488281,9.83300781 C4.12792969,9.94433594 4.27734375,10 4.453125,10 Z M4.45898438,2.921875 C4.69335938,2.921875 4.88183594,2.85449219 5.02441406,2.71972656 C5.16699219,2.58496094 5.23828125,2.40625 5.23828125,2.18359375 C5.23828125,1.96484375 5.16796875,1.78710937 5.02734375,1.65039063 C4.88671875,1.51367188 4.69921875,1.4453125 4.46484375,1.4453125 C4.23046875,1.4453125 4.04199219,1.51367188 3.89941406,1.65039063 C3.75683594,1.78710937 3.68554688,1.96484375 3.68554688,2.18359375 C3.68554688,2.40625 3.75683594,2.58496094 3.89941406,2.71972656 C4.04199219,2.85449219 4.22851562,2.921875 4.45898438,2.921875 Z" id="Shape"></path>
+                    <path d="M7.33007813,10 C7.50585938,10 7.65722656,9.9453125 7.78417969,9.8359375 C7.91113281,9.7265625 7.97460938,9.57617188 7.97460938,9.38476563 L7.97460938,6.71875 C7.97851562,6.18359375 8.13085938,5.75195313 8.43164062,5.42382812 C8.73242187,5.09570312 9.09179687,4.93164063 9.50976563,4.93164063 C9.86914063,4.93164063 10.1601562,5.04882812 10.3828125,5.28320312 C10.6054687,5.51757812 10.7167969,5.85546875 10.7167969,6.296875 L10.7167969,9.38476563 C10.7167969,9.58007812 10.7792969,9.73144531 10.9042969,9.83886719 C11.0292969,9.94628906 11.1777344,10 11.3496094,10 C11.5253906,10 11.6777344,9.94628906 11.8066406,9.83886719 C11.9355469,9.73144531 12,9.58007812 12,9.38476563 L12,6.30859375 C12,5.92578125 11.9433594,5.58789063 11.8300781,5.29492188 C11.7167969,5.00195313 11.5605469,4.76757813 11.3613281,4.59179688 C11.1621094,4.41601562 10.9365234,4.28417969 10.6845703,4.19628906 C10.4326172,4.10839844 10.1621094,4.06445312 9.87304688,4.06445312 C9.42773438,4.06445312 9.04101563,4.14941406 8.71289063,4.31933594 C8.38476563,4.48925781 8.13867187,4.73632812 7.97460938,5.06054687 L7.97460938,4.73242188 C7.97460938,4.54882812 7.91210938,4.40527344 7.78710938,4.30175781 C7.66210937,4.19824219 7.51171875,4.14648438 7.3359375,4.14648438 C7.16015625,4.14648438 7.0078125,4.20019531 6.87890625,4.30761719 C6.75,4.41503906 6.68554688,4.5625 6.68554688,4.75 L6.68554688,9.38476563 C6.68554688,9.58007812 6.74902344,9.73144531 6.87597656,9.83886719 C7.00292969,9.94628906 7.15429688,10 7.33007813,10 Z" id="Path"></path>
+                    <path d="M16.2246094,10.0820312 C17.0761719,10.0820312 17.7949219,9.82421875 18.3808594,9.30859375 C18.5097656,9.203125 18.5742188,9.07617188 18.5742188,8.92773437 C18.5742188,8.81054688 18.5341797,8.70703125 18.4541016,8.6171875 C18.3740234,8.52734375 18.2792969,8.48242188 18.1699219,8.48242188 C18.0957031,8.48242188 18.0214844,8.5078125 17.9472656,8.55859375 C17.3496094,8.98046875 16.8066406,9.19140625 16.3183594,9.19140625 C15.7910156,9.1875 15.3701172,9.03808594 15.0556641,8.74316406 C14.7412109,8.44824219 14.5703125,7.99023438 14.5429688,7.36914062 L18.3867188,7.36914062 C18.5195312,7.36914062 18.6162109,7.32324219 18.6767578,7.23144531 C18.7373047,7.13964844 18.7675781,7.0234375 18.7675781,6.8828125 C18.7597656,6.49609375 18.7011719,6.13769531 18.5917969,5.80761719 C18.4824219,5.47753906 18.3232422,5.18066406 18.1142578,4.91699219 C17.9052734,4.65332031 17.6279297,4.4453125 17.2822266,4.29296875 C16.9365234,4.140625 16.5390625,4.06445312 16.0898438,4.06445312 C15.6484375,4.06445312 15.2460938,4.14453125 14.8828125,4.3046875 C14.5195312,4.46484375 14.2207031,4.68164063 13.9863281,4.95507813 C13.7519531,5.22851562 13.5712891,5.54199219 13.4443359,5.89550781 C13.3173828,6.24902344 13.2539062,6.62304687 13.2539062,7.01757812 C13.2539062,7.96289062 13.5263672,8.70800781 14.0712891,9.25292969 C14.6162109,9.79785156 15.3339844,10.0742188 16.2246094,10.0820312 Z M14.5546875,6.59570312 C14.5742188,6.38476562 14.6201172,6.18359375 14.6923828,5.9921875 C14.7646484,5.80078125 14.8632812,5.625 14.9882812,5.46484375 C15.1132812,5.3046875 15.2753906,5.17578125 15.4746094,5.078125 C15.6738281,4.98046875 15.8964844,4.93164062 16.1425781,4.93164062 C16.6191406,4.93164062 16.9824219,5.08984375 17.2324219,5.40625 C17.4824219,5.72265625 17.625,6.11914062 17.6601562,6.59570312 L14.5546875,6.59570312 Z" id="Shape"></path>
+                    <path d="M21.65625,10.0820312 C22.4492188,10.0820312 23.015625,9.74609375 23.3554688,9.07421875 L23.3554688,9.390625 C23.3554688,9.65625 23.4707031,9.83984375 23.7011719,9.94140625 C23.7910156,9.98046875 23.8847656,10 23.9824219,10 C24.1542969,10 24.3027344,9.94628906 24.4277344,9.83886719 C24.5527344,9.73144531 24.6152344,9.58007812 24.6152344,9.38476562 L24.6152344,6.17382812 C24.6152344,5.45117188 24.4042969,4.91894531 23.9824219,4.57714844 C23.5605469,4.23535156 22.9882812,4.06445312 22.265625,4.06445312 C21.4648438,4.06445312 20.7480469,4.22265625 20.1152344,4.5390625 C19.9707031,4.609375 19.8984375,4.72070313 19.8984375,4.87304687 C19.8984375,4.99804687 19.9423828,5.11132813 20.0302734,5.21289062 C20.1181641,5.31445312 20.2246094,5.36523438 20.3496094,5.36523438 C20.3652344,5.36523438 20.3808594,5.36425781 20.3964844,5.36230469 C20.4121094,5.36035156 20.4287109,5.35742187 20.4462891,5.35351562 C20.4638672,5.34960938 20.4804688,5.34375 20.4960938,5.3359375 C20.7148438,5.25 20.8759766,5.18847656 20.9794922,5.15136719 C21.0830078,5.11425781 21.2597656,5.06738281 21.5097656,5.01074219 C21.7597656,4.95410156 21.9941406,4.92578125 22.2128906,4.92578125 C22.9550781,4.92578125 23.3261719,5.30859375 23.3261719,6.07421875 L23.3261719,6.58984375 C22.9667969,6.58984375 22.6621094,6.59277344 22.4121094,6.59863281 C22.1621094,6.60449219 21.90625,6.61816406 21.6445312,6.63964844 C21.3828125,6.66113281 21.1650391,6.69042969 20.9912109,6.72753906 C20.8173828,6.76464844 20.6464844,6.81347656 20.4785156,6.87402344 C20.3105469,6.93457031 20.1767578,7.00878906 20.0771484,7.09667969 C19.9775391,7.18457031 19.8876953,7.29101563 19.8076172,7.41601563 C19.7275391,7.54101563 19.6708984,7.68359375 19.6376953,7.84375 C19.6044922,8.00390625 19.5878906,8.1875 19.5878906,8.39453125 C19.5878906,8.93359375 19.78125,9.34960938 20.1679687,9.64257813 C20.5546875,9.93554688 21.0507812,10.0820312 21.65625,10.0820312 Z M21.7675781,9.24414062 C21.4785156,9.24414062 21.2460938,9.17089844 21.0703125,9.02441406 C20.8945312,8.87792969 20.8066406,8.6484375 20.8066406,8.3359375 C20.8066406,8.22265625 20.8154297,8.12207031 20.8330078,8.03417969 C20.8505859,7.94628906 20.8808594,7.8671875 20.9238281,7.796875 C20.9667969,7.7265625 21.0185547,7.66699219 21.0791016,7.61816406 C21.1396484,7.56933594 21.2216797,7.52636719 21.3251953,7.48925781 C21.4287109,7.45214844 21.5390625,7.42285156 21.65625,7.40136719 C21.7734375,7.37988281 21.9208984,7.36230469 22.0986328,7.34863281 C22.2763672,7.33496094 22.4580078,7.32617188 22.6435547,7.32226562 C22.8291016,7.31835938 23.0546875,7.31640625 23.3203125,7.31640625 L23.3203125,7.46875 C23.3203125,7.984375 23.1640625,8.40917969 22.8515625,8.74316406 C22.5390625,9.07714844 22.1777344,9.24414062 21.7675781,9.24414062 Z" id="Shape"></path>
+                    <path d="M26.8710938,10 C27.046875,10 27.2001953,9.94628906 27.3310547,9.83886719 C27.4619141,9.73144531 27.5273438,9.58203125 27.5273438,9.390625 L27.5273438,6.66015625 C27.5273438,6.37890625 27.5703125,6.140625 27.65625,5.9453125 C27.7421875,5.75 27.8652344,5.60058594 28.0253906,5.49707031 C28.1855469,5.39355469 28.3603516,5.3203125 28.5498047,5.27734375 C28.7392578,5.234375 28.9570312,5.21289062 29.203125,5.21289062 C29.3320312,5.21289062 29.4335938,5.16210937 29.5078125,5.06054688 C29.5820312,4.95898438 29.6191406,4.83789062 29.6191406,4.69726562 C29.6191406,4.62695312 29.609375,4.55859375 29.5898438,4.4921875 C29.5703125,4.42578125 29.5410156,4.36621094 29.5019531,4.31347656 C29.4628906,4.26074219 29.4121094,4.21875 29.3496094,4.1875 C29.2871094,4.15625 29.2167969,4.140625 29.1386719,4.140625 C28.7597656,4.140625 28.4189453,4.26367188 28.1162109,4.50976562 C27.8134766,4.75585938 27.6113281,5.06054688 27.5097656,5.42382812 L27.5273438,4.69726562 C27.53125,4.52539062 27.4707031,4.390625 27.3457031,4.29296875 C27.2207031,4.1953125 27.0703125,4.14648438 26.8945312,4.14648438 C26.78125,4.14648438 26.6757812,4.16699219 26.578125,4.20800781 C26.4804688,4.24902344 26.3994141,4.31542969 26.3349609,4.40722656 C26.2705078,4.49902344 26.2382812,4.609375 26.2382812,4.73828125 L26.2382812,9.390625 C26.2382812,9.5859375 26.3007812,9.73632812 26.4257812,9.84179688 C26.5507812,9.94726562 26.6992188,10 26.8710938,10 Z" id="Path"></path>
+                    <path d="M36.5390625,10.0820312 C37.3320312,10.0820312 37.9296875,9.90820312 38.3320312,9.56054688 C38.4726562,9.43945312 38.5429688,9.30664063 38.5429688,9.16210938 C38.5429688,9.12695313 38.5390625,9.09277344 38.53125,9.05957031 C38.5234375,9.02636719 38.5107422,8.99511719 38.4931641,8.96582031 C38.4755859,8.93652344 38.4550781,8.91015625 38.4316406,8.88671875 C38.4082031,8.86328125 38.3828125,8.84375 38.3554688,8.828125 C38.328125,8.8125 38.2978516,8.79980469 38.2646484,8.79003906 C38.2314453,8.78027344 38.1972656,8.77539062 38.1621094,8.77539062 C38.1152344,8.77539062 38.0673828,8.78320312 38.0185547,8.79882812 C37.9697266,8.81445312 37.921875,8.83984375 37.875,8.875 C37.5664062,9.09765625 37.1542969,9.20898438 36.6386719,9.20898438 C36.1113281,9.20898438 35.6982422,9.01171875 35.3994141,8.6171875 C35.1005859,8.22265625 34.9511719,7.70507813 34.9511719,7.06445312 C34.9511719,6.42773437 35.1054688,5.91210938 35.4140625,5.51757812 C35.7226562,5.12304687 36.1425781,4.92578125 36.6738281,4.92578125 C37.0527344,4.92578125 37.4140625,5.01953125 37.7578125,5.20703125 C37.8007812,5.23046875 37.8447266,5.24804687 37.8896484,5.25976562 C37.9345703,5.27148438 37.9785156,5.27734375 38.0214844,5.27734375 C38.0683594,5.27734375 38.1132812,5.27148438 38.15625,5.25976562 C38.1992188,5.24804687 38.2373047,5.23046875 38.2705078,5.20703125 C38.3037109,5.18359375 38.3330078,5.15625 38.3583984,5.125 C38.3837891,5.09375 38.4033203,5.05957031 38.4169922,5.02246094 C38.4306641,4.98535156 38.4375,4.9453125 38.4375,4.90234375 C38.4375,4.859375 38.4287109,4.81640625 38.4111328,4.7734375 C38.3935547,4.73046875 38.3662109,4.68652344 38.3291016,4.64160156 C38.2919922,4.59667969 38.2460938,4.5546875 38.1914062,4.515625 C37.7617188,4.21484375 37.2207031,4.06445312 36.5683594,4.06445312 C36.1386719,4.06445312 35.7412109,4.14453125 35.3759766,4.3046875 C35.0107422,4.46484375 34.7041016,4.68164063 34.4560547,4.95507813 C34.2080078,5.22851562 34.0146484,5.54785156 33.8759766,5.91308594 C33.7373047,6.27832031 33.6679688,6.6640625 33.6679688,7.0703125 C33.6679688,7.625 33.7822266,8.12890625 34.0107422,8.58203125 C34.2392578,9.03515625 34.5742188,9.3984375 35.015625,9.671875 C35.4570312,9.9453125 35.9648438,10.0820312 36.5390625,10.0820312 Z" id="Path"></path>
+                    <path d="M40.3300781,10 C40.5058594,10 40.6591797,9.94238281 40.7900391,9.82714844 C40.9208984,9.71191406 40.9863281,9.55664063 40.9863281,9.36132812 L40.9863281,2.23046875 C40.9863281,2.03515625 40.921875,1.87988281 40.7929688,1.76464844 C40.6640625,1.64941406 40.5136719,1.59179688 40.3417969,1.59179688 C40.1660156,1.59179688 40.0146484,1.6484375 39.8876953,1.76171875 C39.7607422,1.875 39.6972656,2.03125 39.6972656,2.23046875 L39.6972656,9.36132812 C39.6972656,9.56445313 39.7587891,9.72167969 39.8818359,9.83300781 C40.0048828,9.94433594 40.1542969,10 40.3300781,10 Z" id="Path"></path>
+                    <path d="M44.2851562,10.0820312 C45.078125,10.0820312 45.6445312,9.74609375 45.984375,9.07421875 L45.984375,9.390625 C45.984375,9.65625 46.0996094,9.83984375 46.3300781,9.94140625 C46.4199219,9.98046875 46.5136719,10 46.6113281,10 C46.7832031,10 46.9316406,9.94628906 47.0566406,9.83886719 C47.1816406,9.73144531 47.2441406,9.58007812 47.2441406,9.38476562 L47.2441406,6.17382812 C47.2441406,5.45117188 47.0332031,4.91894531 46.6113281,4.57714844 C46.1894531,4.23535156 45.6171875,4.06445312 44.8945312,4.06445312 C44.09375,4.06445312 43.3769531,4.22265625 42.7441406,4.5390625 C42.5996094,4.609375 42.5273438,4.72070313 42.5273438,4.87304687 C42.5273438,4.99804687 42.5712891,5.11132813 42.6591797,5.21289062 C42.7470703,5.31445312 42.8535156,5.36523438 42.9785156,5.36523438 C42.9941406,5.36523438 43.0097656,5.36425781 43.0253906,5.36230469 C43.0410156,5.36035156 43.0576172,5.35742187 43.0751953,5.35351562 C43.0927734,5.34960938 43.109375,5.34375 43.125,5.3359375 C43.34375,5.25 43.5048828,5.18847656 43.6083984,5.15136719 C43.7119141,5.11425781 43.8886719,5.06738281 44.1386719,5.01074219 C44.3886719,4.95410156 44.6230469,4.92578125 44.8417969,4.92578125 C45.5839844,4.92578125 45.9550781,5.30859375 45.9550781,6.07421875 L45.9550781,6.58984375 C45.5957031,6.58984375 45.2910156,6.59277344 45.0410156,6.59863281 C44.7910156,6.60449219 44.5351562,6.61816406 44.2734375,6.63964844 C44.0117188,6.66113281 43.7939453,6.69042969 43.6201172,6.72753906 C43.4462891,6.76464844 43.2753906,6.81347656 43.1074219,6.87402344 C42.9394531,6.93457031 42.8056641,7.00878906 42.7060547,7.09667969 C42.6064453,7.18457031 42.5166016,7.29101563 42.4365234,7.41601563 C42.3564453,7.54101563 42.2998047,7.68359375 42.2666016,7.84375 C42.2333984,8.00390625 42.2167969,8.1875 42.2167969,8.39453125 C42.2167969,8.93359375 42.4101562,9.34960938 42.796875,9.64257813 C43.1835938,9.93554688 43.6796875,10.0820312 44.2851562,10.0820312 Z M44.3964844,9.24414062 C44.1074219,9.24414062 43.875,9.17089844 43.6992188,9.02441406 C43.5234375,8.87792969 43.4355469,8.6484375 43.4355469,8.3359375 C43.4355469,8.22265625 43.4443359,8.12207031 43.4619141,8.03417969 C43.4794922,7.94628906 43.5097656,7.8671875 43.5527344,7.796875 C43.5957031,7.7265625 43.6474609,7.66699219 43.7080078,7.61816406 C43.7685547,7.56933594 43.8505859,7.52636719 43.9541016,7.48925781 C44.0576172,7.45214844 44.1679688,7.42285156 44.2851562,7.40136719 C44.4023438,7.37988281 44.5498047,7.36230469 44.7275391,7.34863281 C44.9052734,7.33496094 45.0869141,7.32617188 45.2724609,7.32226562 C45.4580078,7.31835938 45.6835938,7.31640625 45.9492188,7.31640625 L45.9492188,7.46875 C45.9492188,7.984375 45.7929688,8.40917969 45.4804688,8.74316406 C45.1679688,9.07714844 44.8066406,9.24414062 44.3964844,9.24414062 Z" id="Shape"></path>
+                    <path d="M48.84375,9.44335938 C49.3046875,9.77148437 49.8671875,9.97851563 50.53125,10.0644531 C50.71875,10.0878906 50.9042969,10.0996094 51.0878906,10.0996094 C51.4238281,10.0996094 51.7333984,10.0625 52.0166016,9.98828125 C52.2998047,9.9140625 52.546875,9.80371094 52.7578125,9.65722656 C52.96875,9.51074219 53.1337891,9.31933594 53.2529297,9.08300781 C53.3720703,8.84667969 53.4316406,8.578125 53.4316406,8.27734375 C53.4316406,7.828125 53.2949219,7.46875 53.0214844,7.19921875 C52.7480469,6.9296875 52.3046875,6.71875 51.6914062,6.56640625 L50.7421875,6.33203125 C50.375,6.2421875 50.1308594,6.15136719 50.0097656,6.05957031 C49.8886719,5.96777344 49.828125,5.81640625 49.828125,5.60546875 C49.828125,5.4375 49.8867188,5.29882813 50.0039062,5.18945313 C50.1210938,5.08007813 50.2675781,5.00292969 50.4433594,4.95800781 C50.6191406,4.91308594 50.8203125,4.890625 51.046875,4.890625 C51.15625,4.890625 51.2675781,4.89746094 51.3808594,4.91113281 C51.4941406,4.92480469 51.5908203,4.93945312 51.6708984,4.95507813 C51.7509766,4.97070313 51.8457031,4.99414063 51.9550781,5.02539062 C52.0644531,5.05664062 52.140625,5.08007813 52.1835938,5.09570312 C52.2265625,5.11132812 52.296875,5.13769531 52.3945312,5.17480469 C52.4921875,5.21191406 52.5429688,5.23046875 52.546875,5.23046875 C52.5859375,5.2421875 52.6220703,5.25195313 52.6552734,5.25976562 C52.6884766,5.26757812 52.7226562,5.27148438 52.7578125,5.27148438 C52.8359375,5.27148438 52.9052734,5.25292969 52.9658203,5.21582031 C53.0263672,5.17871094 53.0722656,5.13085937 53.1035156,5.07226562 C53.1347656,5.01367188 53.1503906,4.95117188 53.1503906,4.88476562 C53.1503906,4.70117187 53.0566406,4.56445312 52.8691406,4.47460938 C52.2910156,4.18164062 51.6601562,4.03515625 50.9765625,4.03515625 C50.1132812,4.03515625 49.4746094,4.23242188 49.0605469,4.62695312 C48.7792969,4.90429688 48.6386719,5.25 48.6386719,5.6640625 C48.6386719,5.8203125 48.6533203,5.96191406 48.6826172,6.08886719 C48.7119141,6.21582031 48.7509766,6.32714844 48.7998047,6.42285156 C48.8486328,6.51855469 48.9169922,6.60644531 49.0048828,6.68652344 C49.0927734,6.76660156 49.1816406,6.83398437 49.2714844,6.88867188 C49.3613281,6.94335938 49.4775391,6.99804688 49.6201172,7.05273438 C49.7626953,7.10742187 49.8964844,7.15332031 50.0214844,7.19042969 C50.1464844,7.22753906 50.3046875,7.27148438 50.4960938,7.32226563 L51.4628906,7.56835938 C51.7675781,7.64648437 51.9902344,7.74902344 52.1308594,7.87597656 C52.2714844,8.00292969 52.3417969,8.1640625 52.3417969,8.359375 C52.3417969,8.5625 52.2773438,8.73339844 52.1484375,8.87207031 C52.0195312,9.01074219 51.8574219,9.10839844 51.6621094,9.16503906 C51.4667969,9.22167969 51.2460938,9.25 51,9.25 C50.7734375,9.25 50.5488281,9.22167969 50.3261719,9.16503906 C50.1035156,9.10839844 49.9257812,9.04882812 49.7929688,8.98632812 C49.6601562,8.92382813 49.515625,8.84570312 49.359375,8.75195313 C49.2617188,8.69726562 49.1660156,8.66992188 49.0722656,8.66992188 C49.0488281,8.66992188 49.0253906,8.671875 49.0019531,8.67578125 C48.9785156,8.6796875 48.9560547,8.68554688 48.9345703,8.69335938 C48.9130859,8.70117188 48.8925781,8.70996094 48.8730469,8.71972656 C48.8535156,8.72949219 48.8349609,8.74121094 48.8173828,8.75488281 C48.7998047,8.76855469 48.7832031,8.78320312 48.7675781,8.79882813 C48.7519531,8.81445312 48.7382812,8.83105469 48.7265625,8.84863281 C48.7148438,8.86621094 48.7041016,8.88476563 48.6943359,8.90429688 C48.6845703,8.92382813 48.6767578,8.94335938 48.6708984,8.96289062 C48.6650391,8.98242188 48.6601562,9.00292969 48.65625,9.02441406 C48.6523438,9.04589844 48.6503906,9.06835938 48.6503906,9.09179688 C48.6503906,9.23242188 48.7148438,9.34960938 48.84375,9.44335938 Z" id="Path"></path>
+                    <path d="M54.7207031,9.44335938 C55.1816406,9.77148437 55.7441406,9.97851563 56.4082031,10.0644531 C56.5957031,10.0878906 56.78125,10.0996094 56.9648438,10.0996094 C57.3007812,10.0996094 57.6103516,10.0625 57.8935547,9.98828125 C58.1767578,9.9140625 58.4238281,9.80371094 58.6347656,9.65722656 C58.8457031,9.51074219 59.0107422,9.31933594 59.1298828,9.08300781 C59.2490234,8.84667969 59.3085938,8.578125 59.3085938,8.27734375 C59.3085938,7.828125 59.171875,7.46875 58.8984375,7.19921875 C58.625,6.9296875 58.1816406,6.71875 57.5683594,6.56640625 L56.6191406,6.33203125 C56.2519531,6.2421875 56.0078125,6.15136719 55.8867188,6.05957031 C55.765625,5.96777344 55.7050781,5.81640625 55.7050781,5.60546875 C55.7050781,5.4375 55.7636719,5.29882813 55.8808594,5.18945313 C55.9980469,5.08007813 56.1445312,5.00292969 56.3203125,4.95800781 C56.4960938,4.91308594 56.6972656,4.890625 56.9238281,4.890625 C57.0332031,4.890625 57.1445312,4.89746094 57.2578125,4.91113281 C57.3710938,4.92480469 57.4677734,4.93945312 57.5478516,4.95507813 C57.6279297,4.97070313 57.7226562,4.99414063 57.8320312,5.02539062 C57.9414062,5.05664062 58.0175781,5.08007813 58.0605469,5.09570312 C58.1035156,5.11132812 58.1738281,5.13769531 58.2714844,5.17480469 C58.3691406,5.21191406 58.4199219,5.23046875 58.4238281,5.23046875 C58.4628906,5.2421875 58.4990234,5.25195313 58.5322266,5.25976562 C58.5654297,5.26757812 58.5996094,5.27148438 58.6347656,5.27148438 C58.7128906,5.27148438 58.7822266,5.25292969 58.8427734,5.21582031 C58.9033203,5.17871094 58.9492188,5.13085937 58.9804688,5.07226562 C59.0117188,5.01367188 59.0273438,4.95117188 59.0273438,4.88476562 C59.0273438,4.70117187 58.9335938,4.56445312 58.7460938,4.47460938 C58.1679688,4.18164062 57.5371094,4.03515625 56.8535156,4.03515625 C55.9902344,4.03515625 55.3515625,4.23242188 54.9375,4.62695312 C54.65625,4.90429688 54.515625,5.25 54.515625,5.6640625 C54.515625,5.8203125 54.5302734,5.96191406 54.5595703,6.08886719 C54.5888672,6.21582031 54.6279297,6.32714844 54.6767578,6.42285156 C54.7255859,6.51855469 54.7939453,6.60644531 54.8818359,6.68652344 C54.9697266,6.76660156 55.0585938,6.83398437 55.1484375,6.88867188 C55.2382812,6.94335938 55.3544922,6.99804688 55.4970703,7.05273438 C55.6396484,7.10742187 55.7734375,7.15332031 55.8984375,7.19042969 C56.0234375,7.22753906 56.1816406,7.27148438 56.3730469,7.32226563 L57.3398438,7.56835938 C57.6445312,7.64648437 57.8671875,7.74902344 58.0078125,7.87597656 C58.1484375,8.00292969 58.21875,8.1640625 58.21875,8.359375 C58.21875,8.5625 58.1542969,8.73339844 58.0253906,8.87207031 C57.8964844,9.01074219 57.734375,9.10839844 57.5390625,9.16503906 C57.34375,9.22167969 57.1230469,9.25 56.8769531,9.25 C56.6503906,9.25 56.4257812,9.22167969 56.203125,9.16503906 C55.9804688,9.10839844 55.8027344,9.04882812 55.6699219,8.98632812 C55.5371094,8.92382813 55.3925781,8.84570312 55.2363281,8.75195313 C55.1386719,8.69726562 55.0429688,8.66992188 54.9492188,8.66992188 C54.9257812,8.66992188 54.9023438,8.671875 54.8789062,8.67578125 C54.8554688,8.6796875 54.8330078,8.68554688 54.8115234,8.69335938 C54.7900391,8.70117188 54.7695312,8.70996094 54.75,8.71972656 C54.7304688,8.72949219 54.7119141,8.74121094 54.6943359,8.75488281 C54.6767578,8.76855469 54.6601562,8.78320312 54.6445312,8.79882813 C54.6289062,8.81445312 54.6152344,8.83105469 54.6035156,8.84863281 C54.5917969,8.86621094 54.5810547,8.88476563 54.5712891,8.90429688 C54.5615234,8.92382813 54.5537109,8.94335938 54.5478516,8.96289062 C54.5419922,8.98242188 54.5371094,9.00292969 54.5332031,9.02441406 C54.5292969,9.04589844 54.5273438,9.06835938 54.5273438,9.09179688 C54.5273438,9.23242188 54.5917969,9.34960938 54.7207031,9.44335938 Z" id="Path"></path>
+                    <path d="M61.2714844,10 C61.4472656,10 61.6005859,9.94238281 61.7314453,9.82714844 C61.8623047,9.71191406 61.9277344,9.55664063 61.9277344,9.36132812 L61.9277344,4.78515625 C61.9277344,4.5859375 61.8642578,4.4296875 61.7373047,4.31640625 C61.6103516,4.203125 61.4609375,4.14648438 61.2890625,4.14648438 C61.1132812,4.14648438 60.9609375,4.203125 60.8320312,4.31640625 C60.703125,4.4296875 60.6386719,4.5859375 60.6386719,4.78515625 L60.6386719,9.36132812 C60.6386719,9.56445312 60.7001953,9.72167969 60.8232422,9.83300781 C60.9462891,9.94433594 61.0957031,10 61.2714844,10 Z M61.2773438,2.921875 C61.5117188,2.921875 61.7001953,2.85449219 61.8427734,2.71972656 C61.9853516,2.58496094 62.0566406,2.40625 62.0566406,2.18359375 C62.0566406,1.96484375 61.9863281,1.78710937 61.8457031,1.65039063 C61.7050781,1.51367188 61.5175781,1.4453125 61.2832031,1.4453125 C61.0488281,1.4453125 60.8603516,1.51367188 60.7177734,1.65039063 C60.5751953,1.78710937 60.5039062,1.96484375 60.5039062,2.18359375 C60.5039062,2.40625 60.5751953,2.58496094 60.7177734,2.71972656 C60.8603516,2.85449219 61.046875,2.921875 61.2773438,2.921875 Z" id="Shape"></path>
+                    <path d="M64.6464844,10 C64.8222656,10 64.9736328,9.94433594 65.1005859,9.83300781 C65.2275391,9.72167969 65.2910156,9.56640625 65.2910156,9.3671875 L65.2910156,5.03710938 L66.1699219,5.03710938 C66.3183594,5.03710938 66.4306641,4.99804688 66.5068359,4.91992188 C66.5830078,4.84179688 66.6210938,4.7421875 66.6210938,4.62109375 C66.6210938,4.5078125 66.5820312,4.41015625 66.5039062,4.328125 C66.4257812,4.24609375 66.3144531,4.20507812 66.1699219,4.20507812 L65.2910156,4.20507812 L65.2910156,3.68359375 C65.2910156,3.421875 65.3046875,3.21191406 65.3320312,3.05371094 C65.359375,2.89550781 65.4082031,2.77734375 65.4785156,2.69921875 C65.5488281,2.62109375 65.6269531,2.5703125 65.7128906,2.546875 C65.7988281,2.5234375 65.9179688,2.51171875 66.0703125,2.51171875 L66.5683594,2.51171875 C66.7089844,2.51171875 66.8164062,2.46386719 66.890625,2.36816406 C66.9648438,2.27246094 67.0019531,2.15820312 67.0019531,2.02539062 C67.0019531,1.88867187 66.9638672,1.76953125 66.8876953,1.66796875 C66.8115234,1.56640625 66.7050781,1.515625 66.5683594,1.515625 L65.8359375,1.515625 C65.2695312,1.515625 64.8222656,1.67480469 64.4941406,1.99316406 C64.1660156,2.31152344 64.0019531,2.81640625 64.0019531,3.5078125 L64.0019531,4.20507812 L63.3398438,4.20507812 C63.3359375,4.20507812 63.3320312,4.20507812 63.328125,4.20507812 C63.1835938,4.20507812 63.0722656,4.24609375 62.9941406,4.328125 C62.9160156,4.41015625 62.8769531,4.5078125 62.8769531,4.62109375 C62.8769531,4.73828125 62.9169922,4.8359375 62.9970703,4.9140625 C63.0771484,4.9921875 63.1914062,5.03320313 63.3398438,5.03710938 L64.0019531,5.03710938 L64.0019531,9.3671875 C64.0019531,9.56640625 64.0654297,9.72167969 64.1923828,9.83300781 C64.3193359,9.94433594 64.4707031,10 64.6464844,10 Z" id="Path"></path>
+                    <path d="M68.3730469,10 C68.5488281,10 68.7021484,9.94238281 68.8330078,9.82714844 C68.9638672,9.71191406 69.0292969,9.55664063 69.0292969,9.36132812 L69.0292969,4.78515625 C69.0292969,4.5859375 68.9658203,4.4296875 68.8388672,4.31640625 C68.7119141,4.203125 68.5625,4.14648438 68.390625,4.14648438 C68.2148438,4.14648438 68.0625,4.203125 67.9335938,4.31640625 C67.8046875,4.4296875 67.7402344,4.5859375 67.7402344,4.78515625 L67.7402344,9.36132812 C67.7402344,9.56445312 67.8017578,9.72167969 67.9248047,9.83300781 C68.0478516,9.94433594 68.1972656,10 68.3730469,10 Z M68.3789062,2.921875 C68.6132812,2.921875 68.8017578,2.85449219 68.9443359,2.71972656 C69.0869141,2.58496094 69.1582031,2.40625 69.1582031,2.18359375 C69.1582031,1.96484375 69.0878906,1.78710937 68.9472656,1.65039063 C68.8066406,1.51367188 68.6191406,1.4453125 68.3847656,1.4453125 C68.1503906,1.4453125 67.9619141,1.51367188 67.8193359,1.65039063 C67.6767578,1.78710937 67.6054688,1.96484375 67.6054688,2.18359375 C67.6054688,2.40625 67.6767578,2.58496094 67.8193359,2.71972656 C67.9619141,2.85449219 68.1484375,2.921875 68.3789062,2.921875 Z" id="Shape"></path>
+                    <path d="M73.2421875,10.0820312 C74.09375,10.0820312 74.8125,9.82421875 75.3984375,9.30859375 C75.5273438,9.203125 75.5917969,9.07617188 75.5917969,8.92773437 C75.5917969,8.81054688 75.5517578,8.70703125 75.4716797,8.6171875 C75.3916016,8.52734375 75.296875,8.48242188 75.1875,8.48242188 C75.1132812,8.48242188 75.0390625,8.5078125 74.9648438,8.55859375 C74.3671875,8.98046875 73.8242188,9.19140625 73.3359375,9.19140625 C72.8085938,9.1875 72.3876953,9.03808594 72.0732422,8.74316406 C71.7587891,8.44824219 71.5878906,7.99023438 71.5605469,7.36914062 L75.4042969,7.36914062 C75.5371094,7.36914062 75.6337891,7.32324219 75.6943359,7.23144531 C75.7548828,7.13964844 75.7851562,7.0234375 75.7851562,6.8828125 C75.7773438,6.49609375 75.71875,6.13769531 75.609375,5.80761719 C75.5,5.47753906 75.3408203,5.18066406 75.1318359,4.91699219 C74.9228516,4.65332031 74.6455078,4.4453125 74.2998047,4.29296875 C73.9541016,4.140625 73.5566406,4.06445312 73.1074219,4.06445312 C72.6660156,4.06445312 72.2636719,4.14453125 71.9003906,4.3046875 C71.5371094,4.46484375 71.2382812,4.68164063 71.0039062,4.95507813 C70.7695312,5.22851562 70.5888672,5.54199219 70.4619141,5.89550781 C70.3349609,6.24902344 70.2714844,6.62304687 70.2714844,7.01757812 C70.2714844,7.96289062 70.5439453,8.70800781 71.0888672,9.25292969 C71.6337891,9.79785156 72.3515625,10.0742188 73.2421875,10.0820312 Z M71.5722656,6.59570312 C71.5917969,6.38476562 71.6376953,6.18359375 71.7099609,5.9921875 C71.7822266,5.80078125 71.8808594,5.625 72.0058594,5.46484375 C72.1308594,5.3046875 72.2929688,5.17578125 72.4921875,5.078125 C72.6914062,4.98046875 72.9140625,4.93164062 73.1601562,4.93164062 C73.6367188,4.93164062 74,5.08984375 74.25,5.40625 C74.5,5.72265625 74.6425781,6.11914062 74.6777344,6.59570312 L71.5722656,6.59570312 Z" id="Shape"></path>
+                    <path d="M77.6660156,10 C77.8417969,10 77.9951172,9.94628906 78.1259766,9.83886719 C78.2568359,9.73144531 78.3222656,9.58203125 78.3222656,9.390625 L78.3222656,6.66015625 C78.3222656,6.37890625 78.3652344,6.140625 78.4511719,5.9453125 C78.5371094,5.75 78.6601562,5.60058594 78.8203125,5.49707031 C78.9804688,5.39355469 79.1552734,5.3203125 79.3447266,5.27734375 C79.5341797,5.234375 79.7519531,5.21289062 79.9980469,5.21289062 C80.1269531,5.21289062 80.2285156,5.16210937 80.3027344,5.06054688 C80.3769531,4.95898438 80.4140625,4.83789062 80.4140625,4.69726562 C80.4140625,4.62695312 80.4042969,4.55859375 80.3847656,4.4921875 C80.3652344,4.42578125 80.3359375,4.36621094 80.296875,4.31347656 C80.2578125,4.26074219 80.2070312,4.21875 80.1445312,4.1875 C80.0820312,4.15625 80.0117188,4.140625 79.9335938,4.140625 C79.5546875,4.140625 79.2138672,4.26367188 78.9111328,4.50976562 C78.6083984,4.75585938 78.40625,5.06054688 78.3046875,5.42382812 L78.3222656,4.69726562 C78.3261719,4.52539062 78.265625,4.390625 78.140625,4.29296875 C78.015625,4.1953125 77.8652344,4.14648438 77.6894531,4.14648438 C77.5761719,4.14648438 77.4707031,4.16699219 77.3730469,4.20800781 C77.2753906,4.24902344 77.1943359,4.31542969 77.1298828,4.40722656 C77.0654297,4.49902344 77.0332031,4.609375 77.0332031,4.73828125 L77.0332031,9.390625 C77.0332031,9.5859375 77.0957031,9.73632812 77.2207031,9.84179688 C77.3457031,9.94726562 77.4941406,10 77.6660156,10 Z" id="Path"></path>
+                </g>
+                <g id="node-/-01_top" transform="translate(115.0088, 16)" xlink:href="#path-15">
+                    <path stroke="#8E8E8E" d="M39.605,0.5 C45.0599165,0.5 49.482,4.92208353 49.482,10.377 C49.482,15.8319165 45.0599165,20.254 39.605,20.254 L10.377,20.254 C4.92208353,20.254 0.5,15.8319165 0.5,10.377 C0.5,5.29810944 4.35169591,1.06030081 9.36652997,0.551016719 L10.4022157,0.499363801 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="Score" transform="translate(8.8564, 4.625)" fill="#8E8E8E">
+                        <path d="M4.26074219,10.1289062 C4.92089844,10.1289062 5.49707031,10.03125 5.98925781,9.8359375 C6.48144531,9.640625 6.86230469,9.35449219 7.13183594,8.97753906 C7.40136719,8.60058594 7.53613281,8.15429688 7.53613281,7.63867188 C7.53613281,7.37695312 7.50292969,7.13867188 7.43652344,6.92382813 C7.37011719,6.70898438 7.27832031,6.52050781 7.16113281,6.35839844 C7.04394531,6.19628906 6.89257813,6.04882813 6.70703125,5.91601563 C6.52148438,5.78320312 6.3203125,5.66699219 6.10351562,5.56738281 C5.88671875,5.46777344 5.63378906,5.37109375 5.34472656,5.27734375 L3.49316406,4.68554688 C3.18847656,4.58398437 2.96679687,4.45214844 2.828125,4.29003906 C2.68945313,4.12792969 2.62011719,3.91601562 2.62011719,3.65429687 C2.62011719,3.38476562 2.70605469,3.1640625 2.87792969,2.9921875 C3.04980469,2.8203125 3.265625,2.70117187 3.52539063,2.63476562 C3.78515625,2.56835938 4.09277344,2.53515625 4.44824219,2.53515625 C5.15917969,2.53515625 5.84277344,2.72460937 6.49902344,3.10351563 C6.59667969,3.16210938 6.69238281,3.19140625 6.78613281,3.19140625 C6.93457031,3.19140625 7.06152344,3.12890625 7.16699219,3.00390625 C7.27246094,2.87890625 7.32519531,2.74023437 7.32519531,2.58789063 C7.32519531,2.51757813 7.3125,2.45214844 7.28710938,2.39160156 C7.26171875,2.33105469 7.22167969,2.27929688 7.16699219,2.23632813 C6.88574219,1.99804688 6.48632812,1.79492187 5.96875,1.62695312 C5.45117188,1.45898438 4.90917969,1.375 4.34277344,1.375 C3.43261719,1.375 2.68457031,1.58398438 2.09863281,2.00195312 C1.51269531,2.41992187 1.21972656,2.98828125 1.21972656,3.70703125 C1.21972656,4.28125 1.36816406,4.73535156 1.66503906,5.06933594 C1.96191406,5.40332031 2.43066406,5.67773438 3.07128906,5.89257813 L4.93457031,6.51367188 C5.35644531,6.65429688 5.65820312,6.81445313 5.83984375,6.99414062 C6.02148438,7.17382813 6.11230469,7.43359375 6.11230469,7.7734375 C6.11230469,8.16796875 5.94335938,8.46582031 5.60546875,8.66699219 C5.26757813,8.86816406 4.82714844,8.96875 4.28417969,8.96875 C3.42871094,8.96875 2.64160156,8.72070313 1.92285156,8.22460938 C1.89941406,8.20898438 1.87402344,8.19726563 1.84667969,8.18945313 C1.81933594,8.18164063 1.79199219,8.17773438 1.76464844,8.17773438 C1.69433594,8.17773438 1.62109375,8.203125 1.54492188,8.25390625 C1.46875,8.3046875 1.40136719,8.36816406 1.34277344,8.44433594 C1.28417969,8.52050781 1.23632812,8.60351562 1.19921875,8.69335937 C1.16210938,8.78320313 1.14355469,8.8671875 1.14355469,8.9453125 C1.14355469,9.0390625 1.17285156,9.109375 1.23144531,9.15625 C1.98925781,9.8046875 2.99902344,10.1289062 4.26074219,10.1289062 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M11.4677734,10.0820312 C12.2607422,10.0820312 12.8583984,9.90820312 13.2607422,9.56054688 C13.4013672,9.43945312 13.4716797,9.30664063 13.4716797,9.16210938 C13.4716797,9.12695313 13.4677734,9.09277344 13.4599609,9.05957031 C13.4521484,9.02636719 13.4394531,8.99511719 13.421875,8.96582031 C13.4042969,8.93652344 13.3837891,8.91015625 13.3603516,8.88671875 C13.3369141,8.86328125 13.3115234,8.84375 13.2841797,8.828125 C13.2568359,8.8125 13.2265625,8.79980469 13.1933594,8.79003906 C13.1601563,8.78027344 13.1259766,8.77539062 13.0908203,8.77539062 C13.0439453,8.77539062 12.9960938,8.78320312 12.9472656,8.79882812 C12.8984375,8.81445312 12.8505859,8.83984375 12.8037109,8.875 C12.4951172,9.09765625 12.0830078,9.20898438 11.5673828,9.20898438 C11.0400391,9.20898438 10.6269531,9.01171875 10.328125,8.6171875 C10.0292969,8.22265625 9.87988281,7.70507813 9.87988281,7.06445312 C9.87988281,6.42773437 10.0341797,5.91210938 10.3427734,5.51757812 C10.6513672,5.12304687 11.0712891,4.92578125 11.6025391,4.92578125 C11.9814453,4.92578125 12.3427734,5.01953125 12.6865234,5.20703125 C12.7294922,5.23046875 12.7734375,5.24804687 12.8183594,5.25976562 C12.8632813,5.27148438 12.9072266,5.27734375 12.9501953,5.27734375 C12.9970703,5.27734375 13.0419922,5.27148438 13.0849609,5.25976562 C13.1279297,5.24804687 13.1660156,5.23046875 13.1992188,5.20703125 C13.2324219,5.18359375 13.2617188,5.15625 13.2871094,5.125 C13.3125,5.09375 13.3320313,5.05957031 13.3457031,5.02246094 C13.359375,4.98535156 13.3662109,4.9453125 13.3662109,4.90234375 C13.3662109,4.859375 13.3574219,4.81640625 13.3398438,4.7734375 C13.3222656,4.73046875 13.2949219,4.68652344 13.2578125,4.64160156 C13.2207031,4.59667969 13.1748047,4.5546875 13.1201172,4.515625 C12.6904297,4.21484375 12.1494141,4.06445312 11.4970703,4.06445312 C11.0673828,4.06445312 10.6699219,4.14453125 10.3046875,4.3046875 C9.93945312,4.46484375 9.6328125,4.68164063 9.38476562,4.95507813 C9.13671875,5.22851562 8.94335938,5.54785156 8.8046875,5.91308594 C8.66601562,6.27832031 8.59667969,6.6640625 8.59667969,7.0703125 C8.59667969,7.625 8.7109375,8.12890625 8.93945312,8.58203125 C9.16796875,9.03515625 9.50292969,9.3984375 9.94433594,9.671875 C10.3857422,9.9453125 10.8935547,10.0820312 11.4677734,10.0820312 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M17.2509766,9.20898438 C16.7080078,9.20898438 16.2832031,9.02148438 15.9765625,8.64648438 C15.6699219,8.27148438 15.5166016,7.75 15.5166016,7.08203125 C15.5166016,6.40625 15.6699219,5.87890625 15.9765625,5.5 C16.2832031,5.12109375 16.7080078,4.93164062 17.2509766,4.93164062 C17.7939453,4.93164062 18.21875,5.12207031 18.5253906,5.50292969 C18.8320312,5.88378906 18.9853516,6.41015625 18.9853516,7.08203125 C18.9853516,7.75 18.8330078,8.27148438 18.5283203,8.64648438 C18.2236328,9.02148438 17.7978516,9.20898438 17.2509766,9.20898438 Z M17.2509766,10.0820312 C17.6494141,10.0820312 18.0146484,10.0263672 18.3466797,9.91503906 C18.6787109,9.80371094 18.9609375,9.65332031 19.1933594,9.46386719 C19.4257812,9.27441406 19.6230469,9.05078125 19.7851562,8.79296875 C19.9472656,8.53515625 20.0664062,8.26269531 20.1425781,7.97558594 C20.21875,7.68847656 20.2568359,7.390625 20.2568359,7.08203125 C20.2568359,6.67578125 20.1923828,6.29296875 20.0634766,5.93359375 C19.9345703,5.57421875 19.7480469,5.25390625 19.5039062,4.97265625 C19.2597656,4.69140625 18.9443359,4.46972656 18.5576172,4.30761719 C18.1708984,4.14550781 17.7353516,4.06445312 17.2509766,4.06445312 C16.7626953,4.06445312 16.3232422,4.14746094 15.9326172,4.31347656 C15.5419922,4.47949219 15.2265625,4.70410156 14.9863281,4.98730469 C14.7460938,5.27050781 14.5625,5.59082031 14.4355469,5.94824219 C14.3085938,6.30566406 14.2451172,6.68359375 14.2451172,7.08203125 C14.2451172,7.47265625 14.3076172,7.84472656 14.4326172,8.19824219 C14.5576172,8.55175781 14.7402344,8.87011719 14.9804688,9.15332031 C15.2207031,9.43652344 15.5361328,9.66210938 15.9267578,9.83007812 C16.3173828,9.99804687 16.7587891,10.0820312 17.2509766,10.0820312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M22.1787109,10 C22.3544922,10 22.5078125,9.94628906 22.6386719,9.83886719 C22.7695312,9.73144531 22.8349609,9.58203125 22.8349609,9.390625 L22.8349609,6.66015625 C22.8349609,6.37890625 22.8779297,6.140625 22.9638672,5.9453125 C23.0498047,5.75 23.1728516,5.60058594 23.3330078,5.49707031 C23.4931641,5.39355469 23.6679688,5.3203125 23.8574219,5.27734375 C24.046875,5.234375 24.2646484,5.21289062 24.5107422,5.21289062 C24.6396484,5.21289062 24.7412109,5.16210937 24.8154297,5.06054688 C24.8896484,4.95898438 24.9267578,4.83789062 24.9267578,4.69726562 C24.9267578,4.62695312 24.9169922,4.55859375 24.8974609,4.4921875 C24.8779297,4.42578125 24.8486328,4.36621094 24.8095703,4.31347656 C24.7705078,4.26074219 24.7197266,4.21875 24.6572266,4.1875 C24.5947266,4.15625 24.5244141,4.140625 24.4462891,4.140625 C24.0673828,4.140625 23.7265625,4.26367188 23.4238281,4.50976562 C23.1210938,4.75585938 22.9189453,5.06054688 22.8173828,5.42382812 L22.8349609,4.69726562 C22.8388672,4.52539062 22.7783203,4.390625 22.6533203,4.29296875 C22.5283203,4.1953125 22.3779297,4.14648438 22.2021484,4.14648438 C22.0888672,4.14648438 21.9833984,4.16699219 21.8857422,4.20800781 C21.7880859,4.24902344 21.7070312,4.31542969 21.6425781,4.40722656 C21.578125,4.49902344 21.5458984,4.609375 21.5458984,4.73828125 L21.5458984,9.390625 C21.5458984,9.5859375 21.6083984,9.73632812 21.7333984,9.84179688 C21.8583984,9.94726562 22.0068359,10 22.1787109,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M28.5830078,10.0820312 C29.4345703,10.0820312 30.1533203,9.82421875 30.7392578,9.30859375 C30.8681641,9.203125 30.9326172,9.07617188 30.9326172,8.92773437 C30.9326172,8.81054688 30.8925781,8.70703125 30.8125,8.6171875 C30.7324219,8.52734375 30.6376953,8.48242188 30.5283203,8.48242188 C30.4541016,8.48242188 30.3798828,8.5078125 30.3056641,8.55859375 C29.7080078,8.98046875 29.1650391,9.19140625 28.6767578,9.19140625 C28.1494141,9.1875 27.7285156,9.03808594 27.4140625,8.74316406 C27.0996094,8.44824219 26.9287109,7.99023438 26.9013672,7.36914062 L30.7451172,7.36914062 C30.8779297,7.36914062 30.9746094,7.32324219 31.0351562,7.23144531 C31.0957031,7.13964844 31.1259766,7.0234375 31.1259766,6.8828125 C31.1181641,6.49609375 31.0595703,6.13769531 30.9501953,5.80761719 C30.8408203,5.47753906 30.6816406,5.18066406 30.4726562,4.91699219 C30.2636719,4.65332031 29.9863281,4.4453125 29.640625,4.29296875 C29.2949219,4.140625 28.8974609,4.06445312 28.4482422,4.06445312 C28.0068359,4.06445312 27.6044922,4.14453125 27.2412109,4.3046875 C26.8779297,4.46484375 26.5791016,4.68164063 26.3447266,4.95507813 C26.1103516,5.22851562 25.9296875,5.54199219 25.8027344,5.89550781 C25.6757812,6.24902344 25.6123047,6.62304687 25.6123047,7.01757812 C25.6123047,7.96289062 25.8847656,8.70800781 26.4296875,9.25292969 C26.9746094,9.79785156 27.6923828,10.0742188 28.5830078,10.0820312 Z M26.9130859,6.59570312 C26.9326172,6.38476562 26.9785156,6.18359375 27.0507812,5.9921875 C27.1230469,5.80078125 27.2216797,5.625 27.3466797,5.46484375 C27.4716797,5.3046875 27.6337891,5.17578125 27.8330078,5.078125 C28.0322266,4.98046875 28.2548828,4.93164062 28.5009766,4.93164062 C28.9775391,4.93164062 29.3408203,5.08984375 29.5908203,5.40625 C29.8408203,5.72265625 29.9833984,6.11914062 30.0185547,6.59570312 L26.9130859,6.59570312 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+            <g id="L-to-Score-@score" transform="translate(139.5, 40)" fill="#8E8E8E" fill-rule="nonzero">
+                <path d="M0.5,-3.5 L3,1.83029613 L0.999,1.831 L1,47.9669704 C1,48.2613545 0.776142375,48.5 0.5,48.5 C0.223857625,48.5 3.10862447e-15,48.2613545 3.10862447e-15,47.9669704 L-0.001,1.831 L-2,1.83029613 L0.5,-3.5 Z" id="Combined-Shape"></path>
+            </g>
+        </g>
+        <g id="late-interaction" stroke-width="1" transform="translate(836, 24)">
+            <g transform="translate(0, 366)" fill="#333333" fill-rule="nonzero">
+                <path d="M102.944824,12 C103.200033,12 103.410807,11.9156901 103.577148,11.7470703 C103.74349,11.5784505 103.82666,11.34375 103.82666,11.0429688 L103.82666,3.10644531 C103.82666,2.80566406 103.744629,2.57096354 103.580566,2.40234375 C103.416504,2.23372396 103.209147,2.14941406 102.958496,2.14941406 C102.707845,2.14941406 102.502767,2.23372396 102.343262,2.40234375 C102.183757,2.57096354 102.104004,2.80566406 102.104004,3.10644531 L102.104004,11.0429688 C102.104004,11.3483073 102.182617,11.5841471 102.339844,11.7504883 C102.49707,11.9168294 102.69873,12 102.944824,12 Z" id="Path"></path>
+                <path d="M107.593262,12.0820312 C108.121908,12.0820312 108.549154,11.9737956 108.875,11.7573242 C109.200846,11.5408529 109.436686,11.2594401 109.58252,10.9130859 L109.58252,11.1386719 C109.58252,11.4121094 109.663411,11.6240234 109.825195,11.7744141 C109.986979,11.9248047 110.179525,12 110.402832,12 C110.626139,12 110.817546,11.9248047 110.977051,11.7744141 C111.136556,11.6240234 111.216309,11.4121094 111.216309,11.1386719 L111.216309,7.66601563 C111.216309,7.20117188 111.14681,6.79785156 111.007812,6.45605469 C110.868815,6.11425781 110.671712,5.84309896 110.416504,5.64257812 C110.161296,5.44205729 109.86735,5.29394531 109.534668,5.19824219 C109.201986,5.10253906 108.826009,5.0546875 108.406738,5.0546875 C107.481608,5.0546875 106.640788,5.23242188 105.884277,5.58789062 C105.683757,5.68359375 105.583496,5.84309896 105.583496,6.06640625 C105.583496,6.23502604 105.639323,6.38655599 105.750977,6.52099609 C105.86263,6.6554362 105.99821,6.72265625 106.157715,6.72265625 C106.235189,6.72265625 106.298991,6.71126302 106.349121,6.68847656 C106.353678,6.68847656 106.403809,6.66910807 106.499512,6.63037109 C106.595215,6.59163411 106.656738,6.56770833 106.684082,6.55859375 C106.711426,6.54947917 106.772949,6.52783203 106.868652,6.49365234 C106.964355,6.45947266 107.037272,6.43554687 107.087402,6.421875 C107.137533,6.40820313 107.210449,6.38769531 107.306152,6.36035156 C107.401855,6.33300781 107.483887,6.31363932 107.552246,6.30224609 C107.620605,6.29085286 107.700358,6.27718099 107.791504,6.26123047 C107.88265,6.24527995 107.972656,6.23388672 108.061523,6.22705078 C108.150391,6.22021484 108.238118,6.21679687 108.324707,6.21679687 C108.725749,6.21679687 109.027669,6.31591797 109.230469,6.51416016 C109.433268,6.71240234 109.534668,7.02571615 109.534668,7.45410156 L109.534668,7.98046875 C109.069824,7.98046875 108.67334,7.98616536 108.345215,7.99755859 C108.01709,8.00895182 107.692383,8.0328776 107.371094,8.06933594 C107.049805,8.10579427 106.784342,8.1570638 106.574707,8.22314453 C106.365072,8.28922526 106.166829,8.37467448 105.97998,8.47949219 C105.793132,8.5843099 105.648438,8.71191406 105.545898,8.86230469 C105.443359,9.01269531 105.363607,9.19156901 105.306641,9.39892578 C105.249674,9.60628255 105.221191,9.84667969 105.221191,10.1201172 C105.221191,10.7353516 105.443359,11.2150065 105.887695,11.559082 C106.332031,11.9031576 106.900553,12.077474 107.593262,12.0820312 Z M107.805176,11.0019531 C107.499837,11.0019531 107.254883,10.9187826 107.070312,10.7524414 C106.885742,10.5861003 106.793457,10.3434245 106.793457,10.0244141 C106.793457,9.81477865 106.825358,9.64274089 106.88916,9.50830078 C106.952962,9.37386068 107.041829,9.26448568 107.155762,9.18017578 C107.269694,9.09586589 107.446289,9.03434245 107.685547,8.99560547 C107.924805,8.95686849 108.177734,8.93066406 108.444336,8.91699219 C108.710938,8.90332031 109.072103,8.89648438 109.527832,8.89648438 L109.527832,9.09472656 C109.527832,9.64615885 109.352376,10.101888 109.001465,10.4619141 C108.650553,10.8219401 108.25179,11.0019531 107.805176,11.0019531 Z" id="Shape"></path>
+                <path d="M115.516113,12.0615234 L115.666504,12.0615234 C115.830566,12.0615234 115.962728,12.0592448 116.062988,12.0546875 C116.163249,12.0501302 116.282878,12.0421549 116.421875,12.0307617 C116.560872,12.0193685 116.671387,11.9977214 116.753418,11.9658203 C116.835449,11.9339193 116.916341,11.8929036 116.996094,11.8427734 C117.075846,11.7926432 117.133952,11.7265625 117.17041,11.6445313 C117.206868,11.5625 117.225098,11.4667969 117.225098,11.3574219 C117.225098,11.1842448 117.17041,11.0338542 117.061035,10.90625 C116.95166,10.7786458 116.803548,10.7148437 116.616699,10.7148437 L116.568848,10.7148437 L116.083496,10.7421875 L116.008301,10.7421875 C115.730306,10.7421875 115.536621,10.6350911 115.427246,10.4208984 C115.317871,10.2067057 115.263184,9.88085938 115.263184,9.44335938 L115.263184,6.33300781 L116.356934,6.33300781 C116.552897,6.33300781 116.701009,6.2828776 116.80127,6.18261719 C116.90153,6.08235677 116.95166,5.94563802 116.95166,5.77246094 C116.95166,5.59472656 116.90153,5.4523112 116.80127,5.34521484 C116.701009,5.23811849 116.555176,5.18457031 116.36377,5.18457031 L115.263184,5.18457031 L115.263184,3.44140625 C115.263184,3.17708333 115.207357,2.97884115 115.095703,2.84667969 C114.984049,2.71451823 114.827962,2.6484375 114.627441,2.6484375 C114.37679,2.6484375 114.179688,2.72705078 114.036133,2.88427734 C113.892578,3.04150391 113.807129,3.24316406 113.779785,3.48925781 L113.574707,5.18457031 L112.856934,5.18457031 C112.66097,5.18457031 112.512858,5.23583984 112.412598,5.33837891 C112.312337,5.44091797 112.262207,5.5764974 112.262207,5.74511719 C112.262207,5.92285156 112.315755,6.06526693 112.422852,6.17236328 C112.529948,6.27945964 112.681478,6.33300781 112.877441,6.33300781 L113.540527,6.33300781 L113.540527,9.8671875 C113.540527,10.5735677 113.714844,11.1147461 114.063477,11.4907227 C114.412109,11.8666992 114.896322,12.0569661 115.516113,12.0615234 Z" id="Path"></path>
+                <path d="M118.031738,8.50683594 C118.031738,9.63248698 118.356445,10.5097656 119.005859,11.1386719 C119.655273,11.7675781 120.52002,12.0820312 121.600098,12.0820312 C122.561686,12.0820312 123.402507,11.7789714 124.122559,11.1728516 C124.268392,11.0452474 124.341309,10.8902995 124.341309,10.7080078 C124.341309,10.5485026 124.286621,10.4049479 124.177246,10.2773437 C124.067871,10.1497396 123.942546,10.0859375 123.80127,10.0859375 C123.705566,10.0859375 123.616699,10.1178385 123.534668,10.1816406 C122.89209,10.6419271 122.288249,10.8720703 121.723145,10.8720703 C120.479004,10.8720703 119.820475,10.2294922 119.747559,8.94433594 L124.067871,8.94433594 C124.231934,8.94433594 124.35498,8.88964844 124.437012,8.78027344 C124.519043,8.67089844 124.560059,8.52734375 124.560059,8.34960937 C124.555501,7.8984375 124.489421,7.47916667 124.361816,7.09179687 C124.234212,6.70442708 124.046224,6.35579427 123.797852,6.04589844 C123.549479,5.7360026 123.221354,5.49332682 122.813477,5.31787109 C122.405599,5.14241536 121.937337,5.0546875 121.408691,5.0546875 C120.72054,5.0546875 120.116699,5.21419271 119.597168,5.53320312 C119.077637,5.85221354 118.686849,6.27034505 118.424805,6.78759766 C118.16276,7.30485026 118.031738,7.87792969 118.031738,8.50683594 Z M119.76123,7.98046875 C119.802246,7.48372396 119.967448,7.05989583 120.256836,6.70898438 C120.546224,6.35807292 120.946126,6.18261719 121.456543,6.18261719 C121.980632,6.18261719 122.375977,6.35123698 122.642578,6.68847656 C122.90918,7.02571615 123.06071,7.45638021 123.097168,7.98046875 L119.76123,7.98046875 Z" id="Shape"></path>
+                <path d="M125.927246,9.02636719 L128.668457,9.02636719 C128.868978,9.02636719 129.037598,8.9625651 129.174316,8.83496094 C129.311035,8.70735677 129.379395,8.54785156 129.379395,8.35644531 C129.379395,8.16048177 129.313314,7.99983724 129.181152,7.87451172 C129.048991,7.7491862 128.878092,7.68652344 128.668457,7.68652344 L125.961426,7.68652344 C125.75179,7.68652344 125.580892,7.7491862 125.44873,7.87451172 C125.316569,7.99983724 125.250488,8.16048177 125.250488,8.35644531 C125.250488,8.54785156 125.31543,8.70735677 125.445312,8.83496094 C125.575195,8.9625651 125.73584,9.02636719 125.927246,9.02636719 Z" id="Path"></path>
+                <path d="M131.430176,12 C131.680827,12 131.887044,11.9156901 132.048828,11.7470703 C132.210612,11.5784505 132.291504,11.34375 132.291504,11.0429688 L132.291504,6.10742188 C132.291504,5.80208333 132.210612,5.56510417 132.048828,5.39648438 C131.887044,5.22786458 131.680827,5.14355469 131.430176,5.14355469 C131.179525,5.14355469 130.974447,5.22786458 130.814941,5.39648438 C130.655436,5.56510417 130.575684,5.80208333 130.575684,6.10742188 L130.575684,11.0429688 C130.575684,11.3483073 130.655436,11.5841471 130.814941,11.7504883 C130.974447,11.9168294 131.179525,12 131.430176,12 Z M131.430176,3.77636719 C131.7264,3.77636719 131.966797,3.68863932 132.151367,3.51318359 C132.335938,3.33772786 132.428223,3.10872396 132.428223,2.82617187 C132.428223,2.54361979 132.337077,2.31575521 132.154785,2.14257813 C131.972493,1.96940104 131.733236,1.8828125 131.437012,1.8828125 C131.13623,1.8828125 130.893555,1.96940104 130.708984,2.14257813 C130.524414,2.31575521 130.432129,2.54361979 130.432129,2.82617187 C130.432129,3.10872396 130.524414,3.33772786 130.708984,3.51318359 C130.893555,3.68863932 131.133952,3.77636719 131.430176,3.77636719 Z" id="Shape"></path>
+                <path d="M134.882324,12 C135.132975,12 135.339193,11.9168294 135.500977,11.7504883 C135.66276,11.5841471 135.743652,11.3528646 135.743652,11.0566406 L135.743652,8.16503906 C135.743652,7.58626302 135.909993,7.11686198 136.242676,6.75683594 C136.575358,6.3968099 136.969564,6.21679688 137.425293,6.21679688 C137.803548,6.21679688 138.114583,6.34098307 138.358398,6.58935547 C138.602214,6.83772786 138.724121,7.19661458 138.724121,7.66601562 L138.724121,11.0566406 C138.724121,11.3528646 138.803874,11.5841471 138.963379,11.7504883 C139.122884,11.9168294 139.323405,12 139.564941,12 C139.82015,12 140.029785,11.9168294 140.193848,11.7504883 C140.35791,11.5841471 140.439941,11.3528646 140.439941,11.0566406 L140.439941,7.67285156 C140.439941,7.23535156 140.373861,6.84570313 140.241699,6.50390625 C140.109538,6.16210938 139.929525,5.88753255 139.70166,5.68017578 C139.473796,5.47281901 139.215169,5.31559245 138.925781,5.20849609 C138.636393,5.10139974 138.327637,5.04785156 137.999512,5.04785156 C137.466309,5.04785156 137.007161,5.14811198 136.62207,5.34863281 C136.236979,5.54915365 135.944173,5.84309896 135.743652,6.23046875 L135.743652,5.99121094 C135.743652,5.72233073 135.6639,5.51383464 135.504395,5.36572266 C135.344889,5.21761068 135.14209,5.14355469 134.895996,5.14355469 C134.645345,5.14355469 134.437988,5.21875 134.273926,5.36914062 C134.109863,5.51953125 134.027832,5.73144531 134.027832,6.00488281 L134.027832,11.0566406 C134.027832,11.3528646 134.107585,11.5841471 134.26709,11.7504883 C134.426595,11.9168294 134.631673,12 134.882324,12 Z" id="Path"></path>
+                <path d="M144.691895,12.0615234 L144.842285,12.0615234 C145.006348,12.0615234 145.138509,12.0592448 145.23877,12.0546875 C145.33903,12.0501302 145.458659,12.0421549 145.597656,12.0307617 C145.736654,12.0193685 145.847168,11.9977214 145.929199,11.9658203 C146.01123,11.9339193 146.092122,11.8929036 146.171875,11.8427734 C146.251628,11.7926432 146.309733,11.7265625 146.346191,11.6445313 C146.38265,11.5625 146.400879,11.4667969 146.400879,11.3574219 C146.400879,11.1842448 146.346191,11.0338542 146.236816,10.90625 C146.127441,10.7786458 145.979329,10.7148437 145.79248,10.7148437 L145.744629,10.7148437 L145.259277,10.7421875 L145.184082,10.7421875 C144.906087,10.7421875 144.712402,10.6350911 144.603027,10.4208984 C144.493652,10.2067057 144.438965,9.88085938 144.438965,9.44335938 L144.438965,6.33300781 L145.532715,6.33300781 C145.728678,6.33300781 145.87679,6.2828776 145.977051,6.18261719 C146.077311,6.08235677 146.127441,5.94563802 146.127441,5.77246094 C146.127441,5.59472656 146.077311,5.4523112 145.977051,5.34521484 C145.87679,5.23811849 145.730957,5.18457031 145.539551,5.18457031 L144.438965,5.18457031 L144.438965,3.44140625 C144.438965,3.17708333 144.383138,2.97884115 144.271484,2.84667969 C144.159831,2.71451823 144.003743,2.6484375 143.803223,2.6484375 C143.552572,2.6484375 143.355469,2.72705078 143.211914,2.88427734 C143.068359,3.04150391 142.98291,3.24316406 142.955566,3.48925781 L142.750488,5.18457031 L142.032715,5.18457031 C141.836751,5.18457031 141.688639,5.23583984 141.588379,5.33837891 C141.488118,5.44091797 141.437988,5.5764974 141.437988,5.74511719 C141.437988,5.92285156 141.491536,6.06526693 141.598633,6.17236328 C141.705729,6.27945964 141.857259,6.33300781 142.053223,6.33300781 L142.716309,6.33300781 L142.716309,9.8671875 C142.716309,10.5735677 142.890625,11.1147461 143.239258,11.4907227 C143.587891,11.8666992 144.072103,12.0569661 144.691895,12.0615234 Z" id="Path"></path>
+                <path d="M147.20752,8.50683594 C147.20752,9.63248698 147.532227,10.5097656 148.181641,11.1386719 C148.831055,11.7675781 149.695801,12.0820312 150.775879,12.0820312 C151.737467,12.0820312 152.578288,11.7789714 153.29834,11.1728516 C153.444173,11.0452474 153.51709,10.8902995 153.51709,10.7080078 C153.51709,10.5485026 153.462402,10.4049479 153.353027,10.2773437 C153.243652,10.1497396 153.118327,10.0859375 152.977051,10.0859375 C152.881348,10.0859375 152.79248,10.1178385 152.710449,10.1816406 C152.067871,10.6419271 151.46403,10.8720703 150.898926,10.8720703 C149.654785,10.8720703 148.996257,10.2294922 148.92334,8.94433594 L153.243652,8.94433594 C153.407715,8.94433594 153.530762,8.88964844 153.612793,8.78027344 C153.694824,8.67089844 153.73584,8.52734375 153.73584,8.34960937 C153.731283,7.8984375 153.665202,7.47916667 153.537598,7.09179687 C153.409993,6.70442708 153.222005,6.35579427 152.973633,6.04589844 C152.72526,5.7360026 152.397135,5.49332682 151.989258,5.31787109 C151.58138,5.14241536 151.113118,5.0546875 150.584473,5.0546875 C149.896322,5.0546875 149.29248,5.21419271 148.772949,5.53320312 C148.253418,5.85221354 147.86263,6.27034505 147.600586,6.78759766 C147.338542,7.30485026 147.20752,7.87792969 147.20752,8.50683594 Z M148.937012,7.98046875 C148.978027,7.48372396 149.143229,7.05989583 149.432617,6.70898438 C149.722005,6.35807292 150.121908,6.18261719 150.632324,6.18261719 C151.156413,6.18261719 151.551758,6.35123698 151.818359,6.68847656 C152.084961,7.02571615 152.236491,7.45638021 152.272949,7.98046875 L148.937012,7.98046875 Z" id="Shape"></path>
+                <path d="M155.937012,12 C156.19222,12 156.400716,11.9202474 156.5625,11.7607422 C156.724284,11.601237 156.805176,11.3733724 156.805176,11.0771484 L156.805176,8.19238281 C156.805176,7.8688151 156.857585,7.5953776 156.962402,7.37207031 C157.06722,7.14876302 157.215332,6.98242187 157.406738,6.87304687 C157.598145,6.76367188 157.797526,6.68733724 158.004883,6.64404297 C158.21224,6.6007487 158.450358,6.57910156 158.719238,6.57910156 C158.896973,6.57910156 159.038249,6.5061849 159.143066,6.36035156 C159.247884,6.21451823 159.300293,6.04589844 159.300293,5.85449219 C159.300293,5.65397135 159.243327,5.48307292 159.129395,5.34179688 C159.015462,5.20052083 158.853678,5.12988281 158.644043,5.12988281 C158.215658,5.12988281 157.829427,5.27685547 157.485352,5.57080078 C157.141276,5.86474609 156.907715,6.21679688 156.784668,6.62695312 L156.805176,5.92285156 C156.809733,5.6813151 156.726562,5.49104818 156.555664,5.35205078 C156.384766,5.21305339 156.183105,5.14355469 155.950684,5.14355469 C155.713704,5.14355469 155.509766,5.21533203 155.338867,5.35888672 C155.167969,5.50244141 155.08252,5.70410156 155.08252,5.96386719 L155.08252,11.0634766 C155.08252,11.3642578 155.163411,11.5955404 155.325195,11.7573242 C155.486979,11.9191081 155.690918,12 155.937012,12 Z" id="Path"></path>
+                <path d="M162.321777,12.0820312 C162.850423,12.0820312 163.277669,11.9737956 163.603516,11.7573242 C163.929362,11.5408529 164.165202,11.2594401 164.311035,10.9130859 L164.311035,11.1386719 C164.311035,11.4121094 164.391927,11.6240234 164.553711,11.7744141 C164.715495,11.9248047 164.90804,12 165.131348,12 C165.354655,12 165.546061,11.9248047 165.705566,11.7744141 C165.865072,11.6240234 165.944824,11.4121094 165.944824,11.1386719 L165.944824,7.66601563 C165.944824,7.20117188 165.875326,6.79785156 165.736328,6.45605469 C165.597331,6.11425781 165.400228,5.84309896 165.14502,5.64257812 C164.889811,5.44205729 164.595866,5.29394531 164.263184,5.19824219 C163.930501,5.10253906 163.554525,5.0546875 163.135254,5.0546875 C162.210124,5.0546875 161.369303,5.23242188 160.612793,5.58789062 C160.412272,5.68359375 160.312012,5.84309896 160.312012,6.06640625 C160.312012,6.23502604 160.367839,6.38655599 160.479492,6.52099609 C160.591146,6.6554362 160.726725,6.72265625 160.88623,6.72265625 C160.963704,6.72265625 161.027507,6.71126302 161.077637,6.68847656 C161.082194,6.68847656 161.132324,6.66910807 161.228027,6.63037109 C161.32373,6.59163411 161.385254,6.56770833 161.412598,6.55859375 C161.439941,6.54947917 161.501465,6.52783203 161.597168,6.49365234 C161.692871,6.45947266 161.765788,6.43554687 161.815918,6.421875 C161.866048,6.40820313 161.938965,6.38769531 162.034668,6.36035156 C162.130371,6.33300781 162.212402,6.31363932 162.280762,6.30224609 C162.349121,6.29085286 162.428874,6.27718099 162.52002,6.26123047 C162.611165,6.24527995 162.701172,6.23388672 162.790039,6.22705078 C162.878906,6.22021484 162.966634,6.21679687 163.053223,6.21679687 C163.454264,6.21679687 163.756185,6.31591797 163.958984,6.51416016 C164.161784,6.71240234 164.263184,7.02571615 164.263184,7.45410156 L164.263184,7.98046875 C163.79834,7.98046875 163.401855,7.98616536 163.07373,7.99755859 C162.745605,8.00895182 162.420898,8.0328776 162.099609,8.06933594 C161.77832,8.10579427 161.512858,8.1570638 161.303223,8.22314453 C161.093587,8.28922526 160.895345,8.37467448 160.708496,8.47949219 C160.521647,8.5843099 160.376953,8.71191406 160.274414,8.86230469 C160.171875,9.01269531 160.092122,9.19156901 160.035156,9.39892578 C159.97819,9.60628255 159.949707,9.84667969 159.949707,10.1201172 C159.949707,10.7353516 160.171875,11.2150065 160.616211,11.559082 C161.060547,11.9031576 161.629069,12.077474 162.321777,12.0820312 Z M162.533691,11.0019531 C162.228353,11.0019531 161.983398,10.9187826 161.798828,10.7524414 C161.614258,10.5861003 161.521973,10.3434245 161.521973,10.0244141 C161.521973,9.81477865 161.553874,9.64274089 161.617676,9.50830078 C161.681478,9.37386068 161.770345,9.26448568 161.884277,9.18017578 C161.99821,9.09586589 162.174805,9.03434245 162.414062,8.99560547 C162.65332,8.95686849 162.90625,8.93066406 163.172852,8.91699219 C163.439453,8.90332031 163.800618,8.89648438 164.256348,8.89648438 L164.256348,9.09472656 C164.256348,9.64615885 164.080892,10.101888 163.72998,10.4619141 C163.379069,10.8219401 162.980306,11.0019531 162.533691,11.0019531 Z" id="Shape"></path>
+                <path d="M169.116699,8.54785156 C169.116699,7.85970052 169.28418,7.31054687 169.619141,6.90039063 C169.954102,6.49023437 170.417806,6.28515625 171.010254,6.28515625 C171.402181,6.28515625 171.803223,6.38769531 172.213379,6.59277344 C172.318197,6.65201823 172.425293,6.68164063 172.534668,6.68164063 C172.689616,6.68164063 172.820638,6.62695312 172.927734,6.51757813 C173.034831,6.40820312 173.088379,6.27832031 173.088379,6.12792969 C173.088379,5.91829427 172.98584,5.7405599 172.780762,5.59472656 C172.27946,5.23014323 171.630046,5.04785156 170.83252,5.04785156 C170.162598,5.04785156 169.563314,5.20621745 169.034668,5.52294922 C168.506022,5.83968099 168.099284,6.26464844 167.814453,6.79785156 C167.529622,7.33105469 167.387207,7.91894531 167.387207,8.56152344 C167.387207,9.59147135 167.706217,10.4357096 168.344238,11.0942383 C168.982259,11.7527669 169.80485,12.0820312 170.812012,12.0820312 C171.313314,12.0820312 171.746257,12.0239258 172.11084,11.9077148 C172.475423,11.7915039 172.753418,11.6468099 172.944824,11.4736328 C173.127116,11.3186849 173.218262,11.1363932 173.218262,10.9267578 C173.218262,10.7718099 173.168132,10.6385091 173.067871,10.5268555 C172.967611,10.4152018 172.844564,10.359375 172.69873,10.359375 C172.580241,10.359375 172.46403,10.398112 172.350098,10.4755859 C171.994629,10.7125651 171.536621,10.8310547 170.976074,10.8310547 C170.397298,10.8310547 169.942708,10.6225586 169.612305,10.2055664 C169.281901,9.78857422 169.116699,9.2360026 169.116699,8.54785156 Z" id="Path"></path>
+                <path d="M176.950684,12.0615234 L177.101074,12.0615234 C177.265137,12.0615234 177.397298,12.0592448 177.497559,12.0546875 C177.597819,12.0501302 177.717448,12.0421549 177.856445,12.0307617 C177.995443,12.0193685 178.105957,11.9977214 178.187988,11.9658203 C178.27002,11.9339193 178.350911,11.8929036 178.430664,11.8427734 C178.510417,11.7926432 178.568522,11.7265625 178.60498,11.6445313 C178.641439,11.5625 178.659668,11.4667969 178.659668,11.3574219 C178.659668,11.1842448 178.60498,11.0338542 178.495605,10.90625 C178.38623,10.7786458 178.238118,10.7148437 178.05127,10.7148437 L178.003418,10.7148437 L177.518066,10.7421875 L177.442871,10.7421875 C177.164876,10.7421875 176.971191,10.6350911 176.861816,10.4208984 C176.752441,10.2067057 176.697754,9.88085938 176.697754,9.44335938 L176.697754,6.33300781 L177.791504,6.33300781 C177.987467,6.33300781 178.135579,6.2828776 178.23584,6.18261719 C178.3361,6.08235677 178.38623,5.94563802 178.38623,5.77246094 C178.38623,5.59472656 178.3361,5.4523112 178.23584,5.34521484 C178.135579,5.23811849 177.989746,5.18457031 177.79834,5.18457031 L176.697754,5.18457031 L176.697754,3.44140625 C176.697754,3.17708333 176.641927,2.97884115 176.530273,2.84667969 C176.41862,2.71451823 176.262533,2.6484375 176.062012,2.6484375 C175.811361,2.6484375 175.614258,2.72705078 175.470703,2.88427734 C175.327148,3.04150391 175.241699,3.24316406 175.214355,3.48925781 L175.009277,5.18457031 L174.291504,5.18457031 C174.09554,5.18457031 173.947428,5.23583984 173.847168,5.33837891 C173.746908,5.44091797 173.696777,5.5764974 173.696777,5.74511719 C173.696777,5.92285156 173.750326,6.06526693 173.857422,6.17236328 C173.964518,6.27945964 174.116048,6.33300781 174.312012,6.33300781 L174.975098,6.33300781 L174.975098,9.8671875 C174.975098,10.5735677 175.149414,11.1147461 175.498047,11.4907227 C175.84668,11.8666992 176.330892,12.0569661 176.950684,12.0615234 Z" id="Path"></path>
+                <path d="M180.717285,12 C180.967936,12 181.174154,11.9156901 181.335938,11.7470703 C181.497721,11.5784505 181.578613,11.34375 181.578613,11.0429688 L181.578613,6.10742188 C181.578613,5.80208333 181.497721,5.56510417 181.335938,5.39648438 C181.174154,5.22786458 180.967936,5.14355469 180.717285,5.14355469 C180.466634,5.14355469 180.261556,5.22786458 180.102051,5.39648438 C179.942546,5.56510417 179.862793,5.80208333 179.862793,6.10742188 L179.862793,11.0429688 C179.862793,11.3483073 179.942546,11.5841471 180.102051,11.7504883 C180.261556,11.9168294 180.466634,12 180.717285,12 Z M180.717285,3.77636719 C181.013509,3.77636719 181.253906,3.68863932 181.438477,3.51318359 C181.623047,3.33772786 181.715332,3.10872396 181.715332,2.82617187 C181.715332,2.54361979 181.624186,2.31575521 181.441895,2.14257813 C181.259603,1.96940104 181.020345,1.8828125 180.724121,1.8828125 C180.42334,1.8828125 180.180664,1.96940104 179.996094,2.14257813 C179.811523,2.31575521 179.719238,2.54361979 179.719238,2.82617187 C179.719238,3.10872396 179.811523,3.33772786 179.996094,3.51318359 C180.180664,3.68863932 180.421061,3.77636719 180.717285,3.77636719 Z" id="Shape"></path>
+                <path d="M186.541504,10.9130859 C185.953613,10.9130859 185.495605,10.7080078 185.16748,10.2978516 C184.839355,9.88769531 184.675293,9.31347656 184.675293,8.57519531 C184.675293,7.82779948 184.838216,7.24788411 185.164062,6.83544922 C185.489909,6.42301432 185.949056,6.21679688 186.541504,6.21679688 C187.133952,6.21679688 187.594238,6.42415365 187.922363,6.83886719 C188.250488,7.25358073 188.414551,7.83235677 188.414551,8.57519531 C188.414551,9.31347656 188.250488,9.88769531 187.922363,10.2978516 C187.594238,10.7080078 187.133952,10.9130859 186.541504,10.9130859 Z M186.541504,12.0820312 C187.024577,12.0820312 187.466634,12.0159505 187.867676,11.8837891 C188.268717,11.7516276 188.607096,11.5750326 188.882812,11.3540039 C189.158529,11.1329753 189.390951,10.870931 189.580078,10.5678711 C189.769206,10.2648112 189.908203,9.9469401 189.99707,9.61425781 C190.085938,9.28157552 190.130371,8.93522135 190.130371,8.57519531 C190.130371,8.1969401 190.083659,7.83463542 189.990234,7.48828125 C189.89681,7.14192708 189.752116,6.8194987 189.556152,6.52099609 C189.360189,6.22249349 189.12321,5.96500651 188.845215,5.74853516 C188.56722,5.5320638 188.23112,5.36116536 187.836914,5.23583984 C187.442708,5.11051432 187.010905,5.04785156 186.541504,5.04785156 C186.062988,5.04785156 185.624349,5.11279297 185.225586,5.24267578 C184.826823,5.37255859 184.489583,5.54915365 184.213867,5.77246094 C183.938151,5.99576823 183.70459,6.2578125 183.513184,6.55859375 C183.321777,6.859375 183.181641,7.17952474 183.092773,7.51904297 C183.003906,7.8585612 182.959473,8.21061198 182.959473,8.57519531 C182.959473,9.03548177 183.032389,9.47070312 183.178223,9.88085938 C183.324056,10.2910156 183.537109,10.6624349 183.817383,10.9951172 C184.097656,11.3277995 184.472493,11.5921224 184.941895,11.7880859 C185.411296,11.9840495 185.944499,12.0820312 186.541504,12.0820312 Z" id="Shape"></path>
+                <path d="M192.434082,12 C192.684733,12 192.890951,11.9168294 193.052734,11.7504883 C193.214518,11.5841471 193.29541,11.3528646 193.29541,11.0566406 L193.29541,8.16503906 C193.29541,7.58626302 193.461751,7.11686198 193.794434,6.75683594 C194.127116,6.3968099 194.521322,6.21679688 194.977051,6.21679688 C195.355306,6.21679688 195.666341,6.34098307 195.910156,6.58935547 C196.153971,6.83772786 196.275879,7.19661458 196.275879,7.66601562 L196.275879,11.0566406 C196.275879,11.3528646 196.355632,11.5841471 196.515137,11.7504883 C196.674642,11.9168294 196.875163,12 197.116699,12 C197.371908,12 197.581543,11.9168294 197.745605,11.7504883 C197.909668,11.5841471 197.991699,11.3528646 197.991699,11.0566406 L197.991699,7.67285156 C197.991699,7.23535156 197.925618,6.84570313 197.793457,6.50390625 C197.661296,6.16210938 197.481283,5.88753255 197.253418,5.68017578 C197.025553,5.47281901 196.766927,5.31559245 196.477539,5.20849609 C196.188151,5.10139974 195.879395,5.04785156 195.55127,5.04785156 C195.018066,5.04785156 194.558919,5.14811198 194.173828,5.34863281 C193.788737,5.54915365 193.495931,5.84309896 193.29541,6.23046875 L193.29541,5.99121094 C193.29541,5.72233073 193.215658,5.51383464 193.056152,5.36572266 C192.896647,5.21761068 192.693848,5.14355469 192.447754,5.14355469 C192.197103,5.14355469 191.989746,5.21875 191.825684,5.36914062 C191.661621,5.51953125 191.57959,5.73144531 191.57959,6.00488281 L191.57959,11.0566406 C191.57959,11.3528646 191.659342,11.5841471 191.818848,11.7504883 C191.978353,11.9168294 192.183431,12 192.434082,12 Z" id="Path"></path>
+            </g>
+            <g id="MEAN-@yellow" transform="translate(20, 176)">
+                <g id="LLM-dash-box" transform="translate(0, 22)" xlink:href="#path-16">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="LLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L78,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L77,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z M32.5,-0.277777778 L35,4.72222222 L33,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L32,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253515e-06,16.2761424 -1.49253515e-06,16 L-0.00100149254,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="L5" transform="translate(8, 158)">
+                    <g id="1" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(32, 0)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(78, 0)" fill="#FDB515">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+                <g id="L1" transform="translate(8, 42)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 70)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 98)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L4" transform="translate(8, 126)" fill="#FDB515">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="MEAN-@blue" transform="translate(159, 176)">
+                <g id="LLM-dash-box" transform="translate(0, 22)" xlink:href="#path-17">
+                    <path d="M7.60126829,127.489595 C7.73368411,127.496523 7.86662278,127.5 8,127.5 L8.77464335,127.5 C9.05078572,127.5 9.27464335,127.723858 9.27464335,128 C9.27464335,128.276142 9.05078572,128.5 8.77464335,128.5 L8,128.5 C7.8491935,128.5 7.69883087,128.496068 7.54901422,128.488228 C7.27324911,128.473799 7.06139478,128.23855 7.07582434,127.962785 C7.09025391,127.687019 7.32550317,127.475165 7.60126829,127.489595 Z M101.674643,128 C101.674643,128.276142 101.450786,128.5 101.174643,128.5 L99.9746433,128.5 C99.698501,128.5 99.4746433,128.276142 99.4746433,128 C99.4746433,127.723858 99.698501,127.5 99.9746433,127.5 L101.174643,127.5 C101.450786,127.5 101.674643,127.723858 101.674643,128 Z M17.6746433,128 C17.6746433,128.276142 17.4507857,128.5 17.1746433,128.5 L15.9746433,128.5 C15.698501,128.5 15.4746433,128.276142 15.4746433,128 C15.4746433,127.723858 15.698501,127.5 15.9746433,127.5 L17.1746434,127.5 C17.4507857,127.5 17.6746433,127.723858 17.6746433,128 Z M21.8746433,128 C21.8746433,128.276142 21.6507857,128.5 21.3746433,128.5 L20.1746433,128.5 C19.898501,128.5 19.6746433,128.276142 19.6746433,128 C19.6746433,127.723858 19.898501,127.5 20.1746433,127.5 L21.3746434,127.5 C21.6507857,127.5 21.8746433,127.723858 21.8746433,128 Z M26.0746433,128 C26.0746433,128.276142 25.8507857,128.5 25.5746433,128.5 L24.3746433,128.5 C24.098501,128.5 23.8746433,128.276142 23.8746433,128 C23.8746433,127.723858 24.098501,127.5 24.3746433,127.5 L25.5746435,127.5 C25.8507857,127.5 26.0746433,127.723858 26.0746433,128 Z M30.2746433,128 C30.2746433,128.276142 30.0507857,128.5 29.7746433,128.5 L28.5746433,128.5 C28.298501,128.5 28.0746433,128.276142 28.0746433,128 C28.0746433,127.723858 28.298501,127.5 28.5746433,127.5 L29.7746434,127.5 C30.0507857,127.5 30.2746433,127.723858 30.2746433,128 Z M34.4746433,128 C34.4746433,128.276142 34.2507857,128.5 33.9746433,128.5 L32.7746433,128.5 C32.498501,128.5 32.2746433,128.276142 32.2746433,128 C32.2746433,127.723858 32.498501,127.5 32.7746433,127.5 L33.9746434,127.5 C34.2507857,127.5 34.4746433,127.723858 34.4746433,128 Z M38.6746433,128 C38.6746433,128.276142 38.4507857,128.5 38.1746433,128.5 L36.9746433,128.5 C36.698501,128.5 36.4746433,128.276142 36.4746433,128 C36.4746433,127.723858 36.698501,127.5 36.9746433,127.5 L38.1746434,127.5 C38.4507857,127.5 38.6746433,127.723858 38.6746433,128 Z M42.8746433,128 C42.8746433,128.276142 42.6507857,128.5 42.3746433,128.5 L41.1746433,128.5 C40.898501,128.5 40.6746433,128.276142 40.6746433,128 C40.6746433,127.723858 40.898501,127.5 41.1746433,127.5 L42.3746434,127.5 C42.6507857,127.5 42.8746433,127.723858 42.8746433,128 Z M47.0746433,128 C47.0746433,128.276142 46.8507857,128.5 46.5746433,128.5 L45.3746433,128.5 C45.098501,128.5 44.8746433,128.276142 44.8746433,128 C44.8746433,127.723858 45.098501,127.5 45.3746433,127.5 L46.5746434,127.5 C46.8507857,127.5 47.0746433,127.723858 47.0746433,128 Z M51.2746433,128 C51.2746433,128.276142 51.0507857,128.5 50.7746433,128.5 L49.5746433,128.5 C49.298501,128.5 49.0746433,128.276142 49.0746433,128 C49.0746433,127.723858 49.298501,127.5 49.5746433,127.5 L50.7746434,127.5 C51.0507857,127.5 51.2746433,127.723858 51.2746433,128 Z M55.4746433,128 C55.4746433,128.276142 55.2507857,128.5 54.9746433,128.5 L53.7746433,128.5 C53.498501,128.5 53.2746433,128.276142 53.2746433,128 C53.2746433,127.723858 53.498501,127.5 53.7746433,127.5 L54.9746434,127.5 C55.2507857,127.5 55.4746433,127.723858 55.4746433,128 Z M59.6746433,128 C59.6746433,128.276142 59.4507857,128.5 59.1746433,128.5 L57.9746433,128.5 C57.698501,128.5 57.4746433,128.276142 57.4746433,128 C57.4746433,127.723858 57.698501,127.5 57.9746433,127.5 L59.1746433,127.5 C59.4507857,127.5 59.6746433,127.723858 59.6746433,128 Z M105.874643,128 C105.874643,128.276142 105.650786,128.5 105.374643,128.5 L104.174643,128.5 C103.898501,128.5 103.674643,128.276142 103.674643,128 C103.674643,127.723858 103.898501,127.5 104.174643,127.5 L105.374643,127.5 C105.650786,127.5 105.874643,127.723858 105.874643,128 Z M110.074643,128 C110.074643,128.276142 109.850786,128.5 109.574643,128.5 L108.374643,128.5 C108.098501,128.5 107.874643,128.276142 107.874643,128 C107.874643,127.723858 108.098501,127.5 108.374643,127.5 L109.574643,127.5 C109.850786,127.5 110.074643,127.723858 110.074643,128 Z M13.4746433,128 C13.4746433,128.276142 13.2507857,128.5 12.9746433,128.5 L11.7746433,128.5 C11.498501,128.5 11.2746433,128.276142 11.2746433,128 C11.2746433,127.723858 11.498501,127.5 11.7746433,127.5 L12.9746434,127.5 C13.2507857,127.5 13.4746433,127.723858 13.4746433,128 Z M63.8746433,128 C63.8746433,128.276142 63.6507857,128.5 63.3746433,128.5 L62.1746433,128.5 C61.898501,128.5 61.6746433,128.276142 61.6746433,128 C61.6746433,127.723858 61.898501,127.5 62.1746433,127.5 L63.3746432,127.5 C63.6507857,127.5 63.8746433,127.723858 63.8746433,128 Z M68.0746433,128 C68.0746433,128.276142 67.8507857,128.5 67.5746433,128.5 L66.3746433,128.5 C66.098501,128.5 65.8746433,128.276142 65.8746433,128 C65.8746433,127.723858 66.098501,127.5 66.3746433,127.5 L67.5746433,127.5 C67.8507857,127.5 68.0746433,127.723858 68.0746433,128 Z M72.2746433,128 C72.2746433,128.276142 72.0507857,128.5 71.7746433,128.5 L70.5746433,128.5 C70.298501,128.5 70.0746433,128.276142 70.0746433,128 C70.0746433,127.723858 70.298501,127.5 70.5746433,127.5 L71.7746433,127.5 C72.0507857,127.5 72.2746433,127.723858 72.2746433,128 Z M76.4746433,128 C76.4746433,128.276142 76.2507857,128.5 75.9746433,128.5 L74.7746433,128.5 C74.498501,128.5 74.2746433,128.276142 74.2746433,128 C74.2746433,127.723858 74.498501,127.5 74.7746433,127.5 L75.9746433,127.5 C76.2507857,127.5 76.4746433,127.723858 76.4746433,128 Z M80.6746433,128 C80.6746433,128.276142 80.4507857,128.5 80.1746433,128.5 L78.9746433,128.5 C78.698501,128.5 78.4746433,128.276142 78.4746433,128 C78.4746433,127.723858 78.698501,127.5 78.9746433,127.5 L80.1746433,127.5 C80.4507857,127.5 80.6746433,127.723858 80.6746433,128 Z M84.8746433,128 C84.8746433,128.276142 84.6507857,128.5 84.3746433,128.5 L83.1746433,128.5 C82.898501,128.5 82.6746433,128.276142 82.6746433,128 C82.6746433,127.723858 82.898501,127.5 83.1746433,127.5 L84.3746433,127.5 C84.6507857,127.5 84.8746433,127.723858 84.8746433,128 Z M89.0746433,128 C89.0746433,128.276142 88.8507857,128.5 88.5746433,128.5 L87.3746433,128.5 C87.098501,128.5 86.8746433,128.276142 86.8746433,128 C86.8746433,127.723858 87.098501,127.5 87.3746433,127.5 L88.5746433,127.5 C88.8507857,127.5 89.0746433,127.723858 89.0746433,128 Z M93.2746433,128 C93.2746433,128.276142 93.0507857,128.5 92.7746433,128.5 L91.5746433,128.5 C91.298501,128.5 91.0746433,128.276142 91.0746433,128 C91.0746433,127.723858 91.298501,127.5 91.5746433,127.5 L92.7746433,127.5 C93.0507857,127.5 93.2746433,127.723858 93.2746433,128 Z M97.4746433,128 C97.4746433,128.276142 97.2507857,128.5 96.9746433,128.5 L95.7746433,128.5 C95.498501,128.5 95.2746433,128.276142 95.2746433,128 C95.2746433,127.723858 95.498501,127.5 95.7746433,127.5 L96.9746433,127.5 C97.2507857,127.5 97.4746433,127.723858 97.4746433,128 Z M114.055358,126.915582 C114.180112,127.161937 114.081535,127.462781 113.835179,127.587535 C113.455748,127.779678 113.062077,127.943136 112.657126,128.076292 C112.394802,128.16255 112.11222,128.01982 112.025961,127.757496 C111.939703,127.495171 112.082433,127.212589 112.344758,127.126331 C112.701744,127.008946 113.048821,126.864836 113.383405,126.695403 C113.62976,126.570649 113.930604,126.669226 114.055358,126.915582 Z M3.87987036,126.26801 C4.19335033,126.474514 4.52190256,126.657254 4.86287326,126.814501 C5.11363396,126.930146 5.22316717,127.227176 5.10752253,127.477937 C4.9918779,127.728697 4.69484768,127.838231 4.44408698,127.722586 C4.05748525,127.544295 3.68504135,127.337142 3.32975792,127.103101 C3.09915402,126.951191 3.03535949,126.641103 3.18726884,126.410499 C3.3391782,126.179895 3.64926646,126.116101 3.87987036,126.26801 Z M116.999048,123.909773 C117.231117,124.059434 117.297922,124.368887 117.148261,124.600957 C116.917678,124.958505 116.660543,125.298348 116.37924,125.617559 C116.196668,125.824735 115.880714,125.844681 115.673538,125.662108 C115.466362,125.479535 115.446417,125.163582 115.62899,124.956406 C115.877349,124.674578 116.104345,124.374568 116.307864,124.058986 C116.457525,123.826916 116.766979,123.760111 116.999048,123.909773 Z M1.27256369,123.319388 C1.43898255,123.655985 1.63055181,123.979556 1.84546492,124.287446 C2.00352124,124.513881 1.94808936,124.825572 1.72165435,124.983629 C1.49521934,125.141685 1.18352754,125.086253 1.02547122,124.859818 C0.781933905,124.510921 0.564805969,124.14418 0.376143395,123.762593 C0.25375572,123.515054 0.355211426,123.215168 0.602751055,123.092781 C0.850290685,122.970393 1.15017602,123.071849 1.27256369,123.319388 Z M118.493626,120.332066 C118.477178,120.759793 118.428966,121.182879 118.349775,121.598809 C118.298127,121.870079 118.036351,122.048117 117.765081,121.996469 C117.493812,121.944821 117.315774,121.683044 117.367422,121.411775 C117.437286,121.044832 117.479837,120.671419 117.494364,120.29364 C117.504975,120.017702 117.73727,119.802611 118.013208,119.813222 C118.289146,119.823833 118.504237,120.056128 118.493626,120.332066 Z M0.5,119.461965 L0.5,120.011642 C0.500324493,120.219706 0.509101847,120.42668 0.526243834,120.632238 C0.549192342,120.907426 0.344712508,121.149112 0.0695253402,121.172061 C-0.205661827,121.195009 -0.447348546,120.990529 -0.470297054,120.715342 C-0.489701403,120.482655 -0.49963303,120.248463 -0.5,120.01241 L-0.5,119.461965 C-0.5,119.185831 -0.276150919,118.961965 7.30095984e-11,118.961965 C0.27613383,118.961965 0.5,119.185814 0.5,119.461965 Z M118.5,116.112678 L118.5,117.312678 C118.5,117.588821 118.276142,117.812678 118,117.812678 C117.723858,117.812678 117.5,117.588821 117.5,117.312678 L117.5,116.112678 C117.5,115.836536 117.723858,115.612678 118,115.612678 C118.276142,115.612678 118.5,115.836536 118.5,116.112678 Z M0.5,115.261965 L0.5,116.461965 C0.5,116.738107 0.276142375,116.961965 0,116.961965 C-0.276142375,116.961965 -0.5,116.738107 -0.5,116.461965 L-0.5,115.261965 C-0.5,114.985823 -0.276142375,114.761965 0,114.761965 C0.276142375,114.761965 0.5,114.985823 0.5,115.261965 Z M118.5,111.912678 L118.5,113.112678 C118.5,113.388821 118.276142,113.612678 118,113.612678 C117.723858,113.612678 117.5,113.388821 117.5,113.112678 L117.5,111.912678 C117.5,111.636536 117.723858,111.412678 118,111.412678 C118.276142,111.412678 118.5,111.636536 118.5,111.912678 Z M0.5,111.061965 L0.5,112.261965 C0.5,112.538107 0.276142375,112.761965 0,112.761965 C-0.276142375,112.761965 -0.5,112.538107 -0.5,112.261965 L-0.5,111.061965 C-0.5,110.785823 -0.276142375,110.561965 0,110.561965 C0.276142375,110.561965 0.5,110.785823 0.5,111.061965 Z M118.5,107.712678 L118.5,108.912678 C118.5,109.188821 118.276142,109.412678 118,109.412678 C117.723858,109.412678 117.5,109.188821 117.5,108.912678 L117.5,107.712678 C117.5,107.436536 117.723858,107.212678 118,107.212678 C118.276142,107.212678 118.5,107.436536 118.5,107.712678 Z M0.5,106.861965 L0.5,108.061965 C0.5,108.338107 0.276142375,108.561965 0,108.561965 C-0.276142375,108.561965 -0.5,108.338107 -0.5,108.061965 L-0.5,106.861965 C-0.5,106.585823 -0.276142375,106.361965 0,106.361965 C0.276142375,106.361965 0.5,106.585823 0.5,106.861965 Z M118.5,103.512678 L118.5,104.712678 C118.5,104.988821 118.276142,105.212678 118,105.212678 C117.723858,105.212678 117.5,104.988821 117.5,104.712678 L117.5,103.512678 C117.5,103.236536 117.723858,103.012678 118,103.012678 C118.276142,103.012678 118.5,103.236536 118.5,103.512678 Z M0.5,102.661965 L0.5,103.861965 C0.5,104.138107 0.276142375,104.361965 0,104.361965 C-0.276142375,104.361965 -0.5,104.138107 -0.5,103.861965 L-0.5,102.661965 C-0.5,102.385823 -0.276142375,102.161965 0,102.161965 C0.276142375,102.161965 0.5,102.385823 0.5,102.661965 Z M118.5,99.3126783 L118.5,100.512678 C118.5,100.788821 118.276142,101.012678 118,101.012678 C117.723858,101.012678 117.5,100.788821 117.5,100.512678 L117.5,99.3126783 C117.5,99.036536 117.723858,98.8126783 118,98.8126783 C118.276142,98.8126783 118.5,99.036536 118.5,99.3126783 Z M0.5,98.461965 L0.5,99.661965 C0.5,99.9381074 0.276142375,100.161965 0,100.161965 C-0.276142375,100.161965 -0.5,99.9381074 -0.5,99.661965 L-0.5,98.461965 C-0.5,98.1858226 -0.276142375,97.961965 0,97.961965 C0.276142375,97.961965 0.5,98.1858226 0.5,98.461965 Z M118.5,95.1126783 L118.5,96.3126783 C118.5,96.5888207 118.276142,96.8126783 118,96.8126783 C117.723858,96.8126783 117.5,96.5888207 117.5,96.3126783 L117.5,95.1126783 C117.5,94.836536 117.723858,94.6126783 118,94.6126783 C118.276142,94.6126783 118.5,94.836536 118.5,95.1126783 Z M0.5,94.261965 L0.5,95.461965 C0.5,95.7381074 0.276142375,95.961965 0,95.961965 C-0.276142375,95.961965 -0.5,95.7381074 -0.5,95.461965 L-0.5,94.261965 C-0.5,93.9858226 -0.276142375,93.761965 0,93.761965 C0.276142375,93.761965 0.5,93.9858226 0.5,94.261965 Z M118.5,90.9126783 L118.5,92.1126783 C118.5,92.3888207 118.276142,92.6126783 118,92.6126783 C117.723858,92.6126783 117.5,92.3888207 117.5,92.1126783 L117.5,90.9126783 C117.5,90.636536 117.723858,90.4126783 118,90.4126783 C118.276142,90.4126783 118.5,90.636536 118.5,90.9126783 Z M0.5,90.061965 L0.5,91.261965 C0.5,91.5381074 0.276142375,91.761965 0,91.761965 C-0.276142375,91.761965 -0.5,91.5381074 -0.5,91.261965 L-0.5,90.061965 C-0.5,89.7858226 -0.276142375,89.561965 0,89.561965 C0.276142375,89.561965 0.5,89.7858226 0.5,90.061965 Z M118.5,86.7126783 L118.5,87.9126783 C118.5,88.1888207 118.276142,88.4126783 118,88.4126783 C117.723858,88.4126783 117.5,88.1888207 117.5,87.9126783 L117.5,86.7126783 C117.5,86.436536 117.723858,86.2126783 118,86.2126783 C118.276142,86.2126783 118.5,86.436536 118.5,86.7126783 Z M0.5,85.861965 L0.5,87.061965 C0.5,87.3381074 0.276142375,87.561965 0,87.561965 C-0.276142375,87.561965 -0.5,87.3381074 -0.5,87.061965 L-0.5,85.861965 C-0.5,85.5858226 -0.276142375,85.361965 0,85.361965 C0.276142375,85.361965 0.5,85.5858226 0.5,85.861965 Z M118.5,82.5126783 L118.5,83.7126783 C118.5,83.9888207 118.276142,84.2126783 118,84.2126783 C117.723858,84.2126783 117.5,83.9888207 117.5,83.7126783 L117.5,82.5126783 C117.5,82.236536 117.723858,82.0126783 118,82.0126783 C118.276142,82.0126783 118.5,82.236536 118.5,82.5126783 Z M0.5,81.661965 L0.5,82.861965 C0.5,83.1381074 0.276142375,83.361965 0,83.361965 C-0.276142375,83.361965 -0.5,83.1381074 -0.5,82.861965 L-0.5,81.661965 C-0.5,81.3858226 -0.276142375,81.161965 0,81.161965 C0.276142375,81.161965 0.5,81.3858226 0.5,81.661965 Z M118.5,78.3126783 L118.5,79.5126783 C118.5,79.7888207 118.276142,80.0126783 118,80.0126783 C117.723858,80.0126783 117.5,79.7888207 117.5,79.5126783 L117.5,78.3126783 C117.5,78.036536 117.723858,77.8126783 118,77.8126783 C118.276142,77.8126783 118.5,78.036536 118.5,78.3126783 Z M0.5,77.461965 L0.5,78.661965 C0.5,78.9381074 0.276142375,79.161965 0,79.161965 C-0.276142375,79.161965 -0.5,78.9381074 -0.5,78.661965 L-0.5,77.461965 C-0.5,77.1858226 -0.276142375,76.961965 0,76.961965 C0.276142375,76.961965 0.5,77.1858226 0.5,77.461965 Z M118.5,74.1126783 L118.5,75.3126783 C118.5,75.5888207 118.276142,75.8126783 118,75.8126783 C117.723858,75.8126783 117.5,75.5888207 117.5,75.3126783 L117.5,74.1126783 C117.5,73.836536 117.723858,73.6126783 118,73.6126783 C118.276142,73.6126783 118.5,73.836536 118.5,74.1126783 Z M0.5,73.261965 L0.5,74.461965 C0.5,74.7381074 0.276142375,74.961965 0,74.961965 C-0.276142375,74.961965 -0.5,74.7381074 -0.5,74.461965 L-0.5,73.261965 C-0.5,72.9858226 -0.276142375,72.761965 0,72.761965 C0.276142375,72.761965 0.5,72.9858226 0.5,73.261965 Z M118.5,69.9126783 L118.5,71.1126783 C118.5,71.3888207 118.276142,71.6126783 118,71.6126783 C117.723858,71.6126783 117.5,71.3888207 117.5,71.1126783 L117.5,69.9126783 C117.5,69.636536 117.723858,69.4126783 118,69.4126783 C118.276142,69.4126783 118.5,69.636536 118.5,69.9126783 Z M0.5,69.061965 L0.5,70.261965 C0.5,70.5381074 0.276142375,70.761965 0,70.761965 C-0.276142375,70.761965 -0.5,70.5381074 -0.5,70.261965 L-0.5,69.061965 C-0.5,68.7858226 -0.276142375,68.561965 0,68.561965 C0.276142375,68.561965 0.5,68.7858226 0.5,69.061965 Z M118.5,65.7126783 L118.5,66.9126783 C118.5,67.1888207 118.276142,67.4126783 118,67.4126783 C117.723858,67.4126783 117.5,67.1888207 117.5,66.9126783 L117.5,65.7126783 C117.5,65.436536 117.723858,65.2126783 118,65.2126783 C118.276142,65.2126783 118.5,65.436536 118.5,65.7126783 Z M0.5,64.861965 L0.5,66.061965 C0.5,66.3381074 0.276142375,66.561965 0,66.561965 C-0.276142375,66.561965 -0.5,66.3381074 -0.5,66.061965 L-0.5,64.861965 C-0.5,64.5858226 -0.276142375,64.361965 0,64.361965 C0.276142375,64.361965 0.5,64.5858226 0.5,64.861965 Z M118.5,61.5126783 L118.5,62.7126783 C118.5,62.9888207 118.276142,63.2126783 118,63.2126783 C117.723858,63.2126783 117.5,62.9888207 117.5,62.7126783 L117.5,61.5126783 C117.5,61.236536 117.723858,61.0126783 118,61.0126783 C118.276142,61.0126783 118.5,61.236536 118.5,61.5126783 Z M0.5,60.661965 L0.5,61.861965 C0.5,62.1381074 0.276142375,62.361965 0,62.361965 C-0.276142375,62.361965 -0.5,62.1381074 -0.5,61.861965 L-0.5,60.661965 C-0.5,60.3858226 -0.276142375,60.161965 0,60.161965 C0.276142375,60.161965 0.5,60.3858226 0.5,60.661965 Z M118.5,57.3126783 L118.5,58.5126783 C118.5,58.7888207 118.276142,59.0126783 118,59.0126783 C117.723858,59.0126783 117.5,58.7888207 117.5,58.5126783 L117.5,57.3126783 C117.5,57.036536 117.723858,56.8126783 118,56.8126783 C118.276142,56.8126783 118.5,57.036536 118.5,57.3126783 Z M0.5,56.461965 L0.5,57.661965 C0.5,57.9381074 0.276142375,58.161965 0,58.161965 C-0.276142375,58.161965 -0.5,57.9381074 -0.5,57.661965 L-0.5,56.461965 C-0.5,56.1858226 -0.276142375,55.961965 0,55.961965 C0.276142375,55.961965 0.5,56.1858226 0.5,56.461965 Z M118.5,53.1126783 L118.5,54.3126783 C118.5,54.5888207 118.276142,54.8126783 118,54.8126783 C117.723858,54.8126783 117.5,54.5888207 117.5,54.3126783 L117.5,53.1126783 C117.5,52.836536 117.723858,52.6126783 118,52.6126783 C118.276142,52.6126783 118.5,52.836536 118.5,53.1126783 Z M0.5,52.261965 L0.5,53.461965 C0.5,53.7381074 0.276142375,53.961965 0,53.961965 C-0.276142375,53.961965 -0.5,53.7381074 -0.5,53.461965 L-0.5,52.261965 C-0.5,51.9858226 -0.276142375,51.761965 0,51.761965 C0.276142375,51.761965 0.5,51.9858226 0.5,52.261965 Z M118.5,48.9126783 L118.5,50.1126783 C118.5,50.3888207 118.276142,50.6126783 118,50.6126783 C117.723858,50.6126783 117.5,50.3888207 117.5,50.1126783 L117.5,48.9126783 C117.5,48.636536 117.723858,48.4126783 118,48.4126783 C118.276142,48.4126783 118.5,48.636536 118.5,48.9126783 Z M0.5,48.061965 L0.5,49.261965 C0.5,49.5381074 0.276142375,49.761965 0,49.761965 C-0.276142375,49.761965 -0.5,49.5381074 -0.5,49.261965 L-0.5,48.061965 C-0.5,47.7858226 -0.276142375,47.561965 0,47.561965 C0.276142375,47.561965 0.5,47.7858226 0.5,48.061965 Z M118.5,44.7126783 L118.5,45.9126783 C118.5,46.1888207 118.276142,46.4126783 118,46.4126783 C117.723858,46.4126783 117.5,46.1888207 117.5,45.9126783 L117.5,44.7126783 C117.5,44.436536 117.723858,44.2126783 118,44.2126783 C118.276142,44.2126783 118.5,44.436536 118.5,44.7126783 Z M0.5,43.861965 L0.5,45.061965 C0.5,45.3381074 0.276142375,45.561965 0,45.561965 C-0.276142375,45.561965 -0.5,45.3381074 -0.5,45.061965 L-0.5,43.861965 C-0.5,43.5858226 -0.276142375,43.361965 0,43.361965 C0.276142375,43.361965 0.5,43.5858226 0.5,43.861965 Z M118.5,40.5126783 L118.5,41.7126783 C118.5,41.9888207 118.276142,42.2126783 118,42.2126783 C117.723858,42.2126783 117.5,41.9888207 117.5,41.7126783 L117.5,40.5126783 C117.5,40.236536 117.723858,40.0126783 118,40.0126783 C118.276142,40.0126783 118.5,40.236536 118.5,40.5126783 Z M0.5,39.661965 L0.5,40.861965 C0.5,41.1381074 0.276142375,41.361965 0,41.361965 C-0.276142375,41.361965 -0.5,41.1381074 -0.5,40.861965 L-0.5,39.661965 C-0.5,39.3858226 -0.276142375,39.161965 0,39.161965 C0.276142375,39.161965 0.5,39.3858226 0.5,39.661965 Z M118.5,36.3126783 L118.5,37.5126783 C118.5,37.7888207 118.276142,38.0126783 118,38.0126783 C117.723858,38.0126783 117.5,37.7888207 117.5,37.5126783 L117.5,36.3126783 C117.5,36.036536 117.723858,35.8126783 118,35.8126783 C118.276142,35.8126783 118.5,36.036536 118.5,36.3126783 Z M0.5,35.461965 L0.5,36.661965 C0.5,36.9381074 0.276142375,37.161965 0,37.161965 C-0.276142375,37.161965 -0.5,36.9381074 -0.5,36.661965 L-0.5,35.461965 C-0.5,35.1858226 -0.276142375,34.961965 0,34.961965 C0.276142375,34.961965 0.5,35.1858226 0.5,35.461965 Z M118.5,32.1126783 L118.5,33.3126783 C118.5,33.5888207 118.276142,33.8126783 118,33.8126783 C117.723858,33.8126783 117.5,33.5888207 117.5,33.3126783 L117.5,32.1126783 C117.5,31.836536 117.723858,31.6126783 118,31.6126783 C118.276142,31.6126783 118.5,31.836536 118.5,32.1126783 Z M0.5,31.261965 L0.5,32.461965 C0.5,32.7381074 0.276142375,32.961965 0,32.961965 C-0.276142375,32.961965 -0.5,32.7381074 -0.5,32.461965 L-0.5,31.261965 C-0.5,30.9858226 -0.276142375,30.761965 0,30.761965 C0.276142375,30.761965 0.5,30.9858226 0.5,31.261965 Z M118.5,27.9126783 L118.5,29.1126783 C118.5,29.3888207 118.276142,29.6126783 118,29.6126783 C117.723858,29.6126783 117.5,29.3888207 117.5,29.1126783 L117.5,27.9126783 C117.5,27.636536 117.723858,27.4126783 118,27.4126783 C118.276142,27.4126783 118.5,27.636536 118.5,27.9126783 Z M0.5,27.061965 L0.5,28.261965 C0.5,28.5381074 0.276142375,28.761965 0,28.761965 C-0.276142375,28.761965 -0.5,28.5381074 -0.5,28.261965 L-0.5,27.061965 C-0.5,26.7858226 -0.276142375,26.561965 0,26.561965 C0.276142375,26.561965 0.5,26.7858226 0.5,27.061965 Z M118.5,23.7126783 L118.5,24.9126783 C118.5,25.1888207 118.276142,25.4126783 118,25.4126783 C117.723858,25.4126783 117.5,25.1888207 117.5,24.9126783 L117.5,23.7126783 C117.5,23.436536 117.723858,23.2126783 118,23.2126783 C118.276142,23.2126783 118.5,23.436536 118.5,23.7126783 Z M0.5,22.861965 L0.5,24.061965 C0.5,24.3381074 0.276142375,24.561965 0,24.561965 C-0.276142375,24.561965 -0.5,24.3381074 -0.5,24.061965 L-0.5,22.861965 C-0.5,22.5858226 -0.276142375,22.361965 0,22.361965 C0.276142375,22.361965 0.5,22.5858226 0.5,22.861965 Z M118.5,19.5126783 L118.5,20.7126783 C118.5,20.9888207 118.276142,21.2126783 118,21.2126783 C117.723858,21.2126783 117.5,20.9888207 117.5,20.7126783 L117.5,19.5126783 C117.5,19.236536 117.723858,19.0126783 118,19.0126783 C118.276142,19.0126783 118.5,19.236536 118.5,19.5126783 Z M0.5,18.661965 L0.5,19.861965 C0.5,20.1381074 0.276142375,20.361965 0,20.361965 C-0.276142375,20.361965 -0.5,20.1381074 -0.5,19.861965 L-0.5,18.661965 C-0.5,18.3858226 -0.276142375,18.161965 0,18.161965 C0.276142375,18.161965 0.5,18.3858226 0.5,18.661965 Z M118.5,15.3126783 L118.5,16.5126783 C118.5,16.7888207 118.276142,17.0126783 118,17.0126783 C117.723858,17.0126783 117.5,16.7888207 117.5,16.5126783 L117.5,15.3126783 C117.5,15.036536 117.723858,14.8126783 118,14.8126783 C118.276142,14.8126783 118.5,15.036536 118.5,15.3126783 Z M0.5,14.461965 L0.5,15.661965 C0.5,15.9381074 0.276142375,16.161965 0,16.161965 C-0.276142375,16.161965 -0.5,15.9381074 -0.5,15.661965 L-0.5,14.461965 C-0.5,14.1858226 -0.276142375,13.961965 0,13.961965 C0.276142375,13.961965 0.5,14.1858226 0.5,14.461965 Z M118.5,11.1126783 L118.5,12.3126783 C118.5,12.5888207 118.276142,12.8126783 118,12.8126783 C117.723858,12.8126783 117.5,12.5888207 117.5,12.3126783 L117.5,11.1126783 C117.5,10.836536 117.723858,10.6126783 118,10.6126783 C118.276142,10.6126783 118.5,10.836536 118.5,11.1126783 Z M0.5,10.261965 L0.5,11.461965 C0.5,11.7381074 0.276142375,11.961965 0,11.961965 C-0.276142375,11.961965 -0.5,11.7381074 -0.5,11.461965 L-0.5,10.261965 C-0.5,9.98582265 -0.276142375,9.76196502 0,9.76196502 C0.276142375,9.76196502 0.5,9.98582265 0.5,10.261965 Z M118.419805,6.82803952 C118.47219,7.20768546 118.499081,7.59225728 118.499978,7.98103351 L118.5,8.11259543 C118.500046,8.3887378 118.276225,8.61263254 118.000083,8.61267833 C117.723941,8.6127241 117.500046,8.3889036 117.5,8.11276123 L117.49998,7.98227163 C117.499188,7.63955287 117.475436,7.29987535 117.429191,6.96472849 C117.391445,6.69117799 117.582603,6.43882262 117.856153,6.401077 C118.129704,6.36333138 118.382059,6.55448901 118.419805,6.82803952 Z M0.351560452,5.59425596 C0.619685241,5.66031433 0.783492437,5.93122331 0.717434064,6.1993481 C0.628029624,6.5622324 0.565753874,6.93265807 0.531429382,7.30840907 C0.506308636,7.58340645 0.263014824,7.78597148 -0.0119825569,7.76085074 C-0.286979937,7.73572999 -0.489544973,7.49243618 -0.464424227,7.2174388 C-0.425527861,6.79163939 -0.354921179,6.37166007 -0.25353169,5.96012957 C-0.187473317,5.69200478 0.0834356631,5.52819758 0.351560452,5.59425596 Z M116.698898,2.76733419 C116.96108,3.1025346 117.197883,3.45694423 117.407088,3.8275859 C117.542824,4.06806518 117.457913,4.37304812 117.217434,4.50878435 C116.976955,4.64452059 116.671972,4.55960957 116.536235,4.31913028 C116.35164,3.9920895 116.14265,3.67930561 115.911223,3.38342515 C115.741094,3.1659147 115.779505,2.85167098 115.997015,2.68154216 C116.214525,2.51141334 116.528769,2.54982374 116.698898,2.76733419 Z M2.65018329,2.03098857 C2.84694705,2.22473746 2.84939109,2.54131052 2.6556422,2.73807428 C2.39207973,3.00573797 2.14880502,3.29275366 1.92806459,3.59657449 C1.76575137,3.81997777 1.45306636,3.86950105 1.22966307,3.70718783 C1.00625978,3.54487461 0.956736501,3.23218959 1.11904973,3.00878631 C1.36908653,2.66464279 1.64461146,2.33957825 1.94309758,2.03644748 C2.13684647,1.83968372 2.45341952,1.83723968 2.65018329,2.03098857 Z M113.131516,0.0955761984 C113.527572,0.252607282 113.910844,0.439252343 114.278336,0.653707596 C114.516838,0.792888857 114.597354,1.09906161 114.458172,1.33756357 C114.318991,1.57606554 114.012818,1.65658088 113.774317,1.51739962 C113.450161,1.32823426 113.112159,1.1636344 112.762943,1.0251747 C112.506241,0.923395871 112.380651,0.63279011 112.48243,0.376088571 C112.584209,0.119387032 112.874815,-0.00620263532 113.131516,0.0955761984 Z M6.40653195,0.142669081 C6.47802057,0.409397386 6.31974754,0.683576376 6.05301923,0.755064996 C5.68981674,0.852410663 5.33533565,0.976934286 4.9920952,1.12738697 C4.73918251,1.23824627 4.44428705,1.12308924 4.33342776,0.870176548 C4.22256847,0.617263859 4.3377255,0.322368401 4.59063819,0.211509108 C4.97997232,0.0408521629 5.38210756,-0.100411613 5.79413604,-0.21084364 C6.06086434,-0.282332259 6.33504333,-0.124059223 6.40653195,0.142669081 Z M110.500405,2.47493888e-05 C110.500405,0.276167124 110.276522,0.500013673 110.00038,0.5 L108.8,0.5 C108.523858,0.5 108.3,0.276142375 108.3,0 C108.3,-0.276142375 108.523858,-0.5 108.8,-0.5 L110,-0.5 C110.276572,-0.499986326 110.500405,-0.276117625 110.500405,2.47493888e-05 Z M106.3,0 C106.3,0.276142375 106.076142,0.5 105.8,0.5 L104.6,0.5 C104.323858,0.5 104.1,0.276142375 104.1,0 C104.1,-0.276142375 104.323858,-0.5 104.6,-0.5 L105.8,-0.5 C106.076142,-0.5 106.3,-0.276142375 106.3,0 Z M102.1,0 C102.1,0.276142375 101.876142,0.5 101.6,0.5 L100.4,0.5 C100.123858,0.5 99.9,0.276142375 99.9,0 C99.9,-0.276142375 100.123858,-0.5 100.4,-0.5 L101.6,-0.5 C101.876142,-0.5 102.1,-0.276142375 102.1,0 Z M97.9,0 C97.9,0.276142375 97.6761424,0.5 97.4,0.5 L96.2,0.5 C95.9238576,0.5 95.7,0.276142375 95.7,0 C95.7,-0.276142375 95.9238576,-0.5 96.2,-0.5 L97.4,-0.5 C97.6761424,-0.5 97.9,-0.276142375 97.9,0 Z M93.7,0 C93.7,0.276142375 93.4761424,0.5 93.2,0.5 L92,0.5 C91.7238576,0.5 91.5,0.276142375 91.5,0 C91.5,-0.276142375 91.7238576,-0.5 92,-0.5 L93.2,-0.5 C93.4761424,-0.5 93.7,-0.276142375 93.7,0 Z M89.5,0 C89.5,0.276142375 89.2761424,0.5 89,0.5 L87.8,0.5 C87.5238576,0.5 87.3,0.276142375 87.3,0 C87.3,-0.276142375 87.5238576,-0.5 87.8,-0.5 L89,-0.5 C89.2761424,-0.5 89.5,-0.276142375 89.5,0 Z M85.3,0 C85.3,0.276142375 85.0761424,0.5 84.8,0.5 L83.6,0.5 C83.3238576,0.5 83.1,0.276142375 83.1,0 C83.1,-0.276142375 83.3238576,-0.5 83.6,-0.5 L84.8,-0.5 C85.0761424,-0.5 85.3,-0.276142375 85.3,0 Z M81.1,0 C81.1,0.276142375 80.8761424,0.5 80.6,0.5 L79.4,0.5 C79.1238576,0.5 78.9,0.276142375 78.9,0 C78.9,-0.276142375 79.1238576,-0.5 79.4,-0.5 L80.6,-0.5 C80.8761424,-0.5 81.1,-0.276142375 81.1,0 Z M76.9,0 C76.9,0.276142375 76.6761424,0.5 76.4,0.5 L75.2,0.5 C74.9238576,0.5 74.7,0.276142375 74.7,0 C74.7,-0.276142375 74.9238576,-0.5 75.2,-0.5 L76.4,-0.5 C76.6761424,-0.5 76.9,-0.276142375 76.9,0 Z M72.7,0 C72.7,0.276142375 72.4761424,0.5 72.2,0.5 L71,0.5 C70.7238576,0.5 70.5,0.276142375 70.5,0 C70.5,-0.276142375 70.7238576,-0.5 71,-0.5 L72.2,-0.5 C72.4761424,-0.5 72.7,-0.276142375 72.7,0 Z M68.5,0 C68.5,0.276142375 68.2761424,0.5 68,0.5 L66.8,0.5 C66.5238576,0.5 66.3,0.276142375 66.3,0 C66.3,-0.276142375 66.5238576,-0.5 66.8,-0.5 L68,-0.5 C68.2761424,-0.5 68.5,-0.276142375 68.5,0 Z M64.3,0 C64.3,0.276142375 64.0761424,0.5 63.8,0.5 L62.6,0.5 C62.3238576,0.5 62.1,0.276142375 62.1,0 C62.1,-0.276142375 62.3238576,-0.5 62.6,-0.5 L63.8,-0.5 C64.0761424,-0.5 64.3,-0.276142375 64.3,0 Z M60.1,0 C60.1,0.276142375 59.8761424,0.5 59.6,0.5 L58.4,0.5 C58.1238576,0.5 57.9,0.276142375 57.9,0 C57.9,-0.276142375 58.1238576,-0.5 58.4,-0.5 L59.6,-0.5 C59.8761424,-0.5 60.1,-0.276142375 60.1,0 Z M55.9,0 C55.9,0.276142375 55.6761424,0.5 55.4,0.5 L54.2,0.5 C53.9238576,0.5 53.7,0.276142375 53.7,0 C53.7,-0.276142375 53.9238576,-0.5 54.2,-0.5 L55.4,-0.5 C55.6761424,-0.5 55.9,-0.276142375 55.9,0 Z M51.7,0 C51.7,0.276142375 51.4761424,0.5 51.2,0.5 L50,0.5 C49.7238576,0.5 49.5,0.276142375 49.5,0 C49.5,-0.276142375 49.7238576,-0.5 50,-0.5 L51.2,-0.5 C51.4761424,-0.5 51.7,-0.276142375 51.7,0 Z M47.5,0 C47.5,0.276142375 47.2761424,0.5 47,0.5 L45.8,0.5 C45.5238576,0.5 45.3,0.276142375 45.3,0 C45.3,-0.276142375 45.5238576,-0.5 45.8,-0.5 L47,-0.5 C47.2761424,-0.5 47.5,-0.276142375 47.5,0 Z M43.3,0 C43.3,0.276142375 43.0761424,0.5 42.8,0.5 L41.6,0.5 C41.3238576,0.5 41.1,0.276142375 41.1,0 C41.1,-0.276142375 41.3238576,-0.5 41.6,-0.5 L42.8,-0.5 C43.0761424,-0.5 43.3,-0.276142375 43.3,0 Z M39.1,0 C39.1,0.276142375 38.8761424,0.5 38.6,0.5 L37.4,0.5 C37.1238576,0.5 36.9,0.276142375 36.9,0 C36.9,-0.276142375 37.1238576,-0.5 37.4,-0.5 L38.6,-0.5 C38.8761424,-0.5 39.1,-0.276142375 39.1,0 Z M34.9,0 C34.9,0.276142375 34.6761424,0.5 34.4,0.5 L33.2,0.5 C32.9238576,0.5 32.7,0.276142375 32.7,0 C32.7,-0.276142375 32.9238576,-0.5 33.2,-0.5 L34.4,-0.5 C34.6761424,-0.5 34.9,-0.276142375 34.9,0 Z M30.7,0 C30.7,0.276142375 30.4761424,0.5 30.2,0.5 L29,0.5 C28.7238576,0.5 28.5,0.276142375 28.5,0 C28.5,-0.276142375 28.7238576,-0.5 29,-0.5 L30.2,-0.5 C30.4761424,-0.5 30.7,-0.276142375 30.7,0 Z M26.5,0 C26.5,0.276142375 26.2761424,0.5 26,0.5 L24.8,0.5 C24.5238576,0.5 24.3,0.276142375 24.3,0 C24.3,-0.276142375 24.5238576,-0.5 24.8,-0.5 L26,-0.5 C26.2761424,-0.5 26.5,-0.276142375 26.5,0 Z M22.3,0 C22.3,0.276142375 22.0761424,0.5 21.8,0.5 L20.6,0.5 C20.3238576,0.5 20.1,0.276142375 20.1,0 C20.1,-0.276142375 20.3238576,-0.5 20.6,-0.5 L21.8,-0.5 C22.0761424,-0.5 22.3,-0.276142375 22.3,0 Z M18.1,0 C18.1,0.276142375 17.8761424,0.5 17.6,0.5 L16.4,0.5 C16.1238576,0.5 15.9,0.276142375 15.9,0 C15.9,-0.276142375 16.1238576,-0.5 16.4,-0.5 L17.6,-0.5 C17.8761424,-0.5 18.1,-0.276142375 18.1,0 Z M13.9,0 C13.9,0.276142375 13.6761424,0.5 13.4,0.5 L12.2,0.5 C11.9238576,0.5 11.7,0.276142375 11.7,0 C11.7,-0.276142375 11.9238576,-0.5 12.2,-0.5 L13.4,-0.5 C13.6761424,-0.5 13.9,-0.276142375 13.9,0 Z M9.7,0 C9.7,0.276142375 9.47614237,0.5 9.2,0.5 L8,0.5 C7.72385763,0.5 7.5,0.276142375 7.5,0 C7.5,-0.276142375 7.72385763,-0.5 8,-0.5 L9.2,-0.5 C9.47614237,-0.5 9.7,-0.276142375 9.7,0 Z" id="LLM-dash" fill="#999999" fill-rule="nonzero"></path>
+                    <g id="LLM" stroke-width="1" fill-rule="evenodd" transform="translate(6.5, 4)" fill="#7D7D7D">
+                        <path d="M1.80126953,8 L5.30224609,8 C5.44873047,8 5.56266276,7.95198568 5.64404297,7.85595703 C5.72542318,7.75992839 5.76611328,7.6484375 5.76611328,7.52148437 C5.76611328,7.39453125 5.72460938,7.28222656 5.64160156,7.18457031 C5.55859375,7.08691406 5.44547526,7.03808594 5.30224609,7.03808594 L2.39697266,7.03808594 L2.39697266,1.58398437 C2.39697266,1.3984375 2.33837891,1.25439453 2.22119141,1.15185547 C2.10400391,1.04931641 1.96240234,0.998046875 1.79638672,0.998046875 C1.63037109,0.998046875 1.48714193,1.04931641 1.36669922,1.15185547 C1.24625651,1.25439453 1.18603516,1.3984375 1.18603516,1.58398437 L1.18603516,7.39453125 C1.18603516,7.56054687 1.24707031,7.70296224 1.36914062,7.82177734 C1.49121094,7.94059245 1.63525391,8 1.80126953,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M7.27978516,8 L10.7807617,8 C10.9272461,8 11.0411784,7.95198568 11.1225586,7.85595703 C11.2039388,7.75992839 11.2446289,7.6484375 11.2446289,7.52148437 C11.2446289,7.39453125 11.203125,7.28222656 11.1201172,7.18457031 C11.0371094,7.08691406 10.9239909,7.03808594 10.7807617,7.03808594 L7.87548828,7.03808594 L7.87548828,1.58398437 C7.87548828,1.3984375 7.81689453,1.25439453 7.69970703,1.15185547 C7.58251953,1.04931641 7.44091797,0.998046875 7.27490234,0.998046875 C7.10888672,0.998046875 6.96565755,1.04931641 6.84521484,1.15185547 C6.72477214,1.25439453 6.66455078,1.3984375 6.66455078,1.58398437 L6.66455078,7.39453125 C6.66455078,7.56054687 6.72558594,7.70296224 6.84765625,7.82177734 C6.96972656,7.94059245 7.11376953,8 7.27978516,8 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M15.9418945,7.78027344 L15.9418945,6.31054688 L14.0473633,1.88183594 C13.9529622,1.65722656 13.8162435,1.48876953 13.637207,1.37646484 C13.4581706,1.26416016 13.2661133,1.20800781 13.0610352,1.20800781 C12.8136393,1.20800781 12.5996094,1.2796224 12.4189453,1.42285156 C12.2382812,1.56608073 12.1479492,1.75 12.1479492,1.97460938 L12.1479492,7.47753906 C12.1479492,7.60449219 12.2032878,7.70377604 12.3139648,7.77539063 C12.4246419,7.84700521 12.5556641,7.8828125 12.7070312,7.8828125 C12.8583984,7.8828125 12.9894206,7.84700521 13.1000977,7.77539063 C13.2107747,7.70377604 13.2661133,7.60449219 13.2661133,7.47753906 L13.2661133,2.5703125 L15.2436523,7.25292969 C15.3152669,7.42220052 15.4145508,7.55240885 15.5415039,7.64355469 C15.668457,7.73470052 15.8019206,7.78027344 15.9418945,7.78027344 Z M15.9467773,7.78027344 C16.0867513,7.78027344 16.2210286,7.73388672 16.3496094,7.64111328 C16.4781901,7.54833984 16.5766602,7.41894531 16.6450195,7.25292969 L18.6274414,2.5703125 L18.6274414,7.47753906 C18.6274414,7.60449219 18.6827799,7.70377604 18.793457,7.77539063 C18.9041341,7.84700521 19.0351562,7.8828125 19.1865234,7.8828125 C19.3378906,7.8828125 19.4689128,7.84700521 19.5795898,7.77539063 C19.6902669,7.70377604 19.7456055,7.60449219 19.7456055,7.47753906 L19.7456055,1.97460938 C19.7456055,1.82486979 19.702474,1.69059245 19.6162109,1.57177734 C19.5299479,1.45296224 19.4176432,1.36263021 19.2792969,1.30078125 C19.1409505,1.23893229 18.9920247,1.20800781 18.8325195,1.20800781 C18.6274414,1.20800781 18.4353841,1.26416016 18.2563477,1.37646484 C18.0773112,1.48876953 17.9389648,1.65722656 17.8413086,1.88183594 L15.9467773,6.31054688 L15.9467773,7.78027344 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-@blue-w-arrow" transform="translate(20, 0)" fill="#8E8E8E" fill-rule="nonzero">
+                    <g id="L5" transform="translate(0, 138)">
+                        <path d="M77.5,-0.277777778 L80,4.72222222 L78,4.72222222 L78,20 C78,20.2761424 77.7761424,20.5 77.5,20.5 C77.2238576,20.5 77,20.2761424 77,20 L77,4.72222222 L75,4.72222222 L77.5,-0.277777778 Z M32.5,-0.277777778 L35,4.72222222 L33,4.72222222 L33,20 C33,20.2761424 32.7761424,20.5 32.5,20.5 C32.2238576,20.5 32,20.2761424 32,20 L32,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L0.999,4.72222222 L1,20 C1,20.2761424 0.776142375,20.5 0.5,20.5 C0.223857625,20.5 1.33226763e-15,20.2761424 1.33226763e-15,20 L-0.001,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L4" transform="translate(0.0001, 110)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253492e-06,16.2761424 -1.49253492e-06,16 L-0.00100149253,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L3" transform="translate(0, 82)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253492e-06,16.2761424 -1.49253492e-06,16 L-0.00100149253,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L2" transform="translate(0.0001, 54)">
+                        <path d="M77.4998985,-0.277777778 L79.9998985,4.72222222 L77.9989985,4.72222222 L77.9998985,16 C77.9998985,16.2761424 77.7760409,16.5 77.4998985,16.5 C77.2237561,16.5 76.9998985,16.2761424 76.9998985,16 L76.9989985,4.72222222 L74.9998985,4.72222222 L77.4998985,-0.277777778 Z M0.499998507,-0.277777778 L2.99999851,4.72222222 L0.998998507,4.72222222 L0.999998507,16 C0.999998507,16.2761424 0.776140882,16.5 0.499998507,16.5 C0.223856133,16.5 -1.49253492e-06,16.2761424 -1.49253492e-06,16 L-0.00100149253,4.72222222 L-2.00000149,4.72222222 L0.499998507,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="L1" transform="translate(39, 0)">
+                        <path d="M0.5,0.722222222 L3,5.72222222 L0.999,5.72222222 L1,42 C1,42.2761424 0.776142375,42.5 0.5,42.5 C0.223857625,42.5 2.22044605e-15,42.2761424 2.22044605e-15,42 L-0.001,5.72222222 L-2,5.72222222 L0.5,0.722222222 Z" id="Combined-Shape"></path>
+                    </g>
+                </g>
+                <g id="L5" transform="translate(8, 158)">
+                    <g id="1" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                        <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="..." transform="translate(62.7, 5)" fill="#8E8E8E">
+                        <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                        <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                    </g>
+                </g>
+                <g id="L1" transform="translate(8, 42)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L2" transform="translate(8, 70)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L3" transform="translate(8, 98)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+                <g id="L4" transform="translate(8, 126)" fill="#30A2FF">
+                    <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="102" height="12" rx="3"></rect>
+                    <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L99,0 Z M99,1 L3,1 L2.79551169,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                </g>
+            </g>
+            <g id="Pooling" transform="translate(0, 106)">
+                <g id="dash-box-@purple" fill="#9172E2">
+                    <path d="M8,0 L288,0 C292.418278,0 296,3.581722 296,8 L296,74 C296,78.418278 292.418278,82 288,82 L8,82 C3.581722,82 0,78.418278 0,74 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill-opacity="0.1"></path>
+                    <path d="M10.9746435,80.9999998 L10.974,81.9999998 L8.974,81.9999998 L8.97464335,80.9999998 L10.9746435,80.9999998 Z M14.9746436,80.9999998 L14.974,81.9999998 L12.974,81.9999998 L12.9746433,80.9999998 L14.9746436,80.9999998 Z M18.9746435,80.9999998 L18.974,81.9999998 L16.974,81.9999998 L16.9746433,80.9999998 L18.9746435,80.9999998 Z M22.9746433,80.9999998 L22.974,81.9999998 L20.974,81.9999998 L20.9746433,80.9999998 L22.9746433,80.9999998 Z M26.9746435,80.9999998 L26.974,81.9999998 L24.974,81.9999998 L24.9746433,80.9999998 L26.9746435,80.9999998 Z M30.9746434,80.9999998 L30.974,81.9999998 L28.974,81.9999998 L28.9746433,80.9999998 L30.9746434,80.9999998 Z M34.9746435,80.9999998 L34.974,81.9999998 L32.974,81.9999998 L32.9746433,80.9999998 L34.9746435,80.9999998 Z M38.9746434,80.9999998 L38.974,81.9999998 L36.974,81.9999998 L36.9746433,80.9999998 L38.9746434,80.9999998 Z M42.9746435,80.9999998 L42.974,81.9999998 L40.974,81.9999998 L40.9746433,80.9999998 L42.9746435,80.9999998 Z M46.9746434,80.9999998 L46.974,81.9999998 L44.974,81.9999998 L44.9746433,80.9999998 L46.9746434,80.9999998 Z M50.9746435,80.9999998 L50.974,81.9999998 L48.974,81.9999998 L48.9746433,80.9999998 L50.9746435,80.9999998 Z M54.9746434,80.9999998 L54.974,81.9999998 L52.974,81.9999998 L52.9746433,80.9999998 L54.9746434,80.9999998 Z M58.9746435,80.9999998 L58.974,81.9999998 L56.974,81.9999998 L56.9746433,80.9999998 L58.9746435,80.9999998 Z M62.9746434,80.9999998 L62.974,81.9999998 L60.974,81.9999998 L60.9746433,80.9999998 L62.9746434,80.9999998 Z M66.9746436,80.9999998 L66.974,81.9999998 L64.974,81.9999998 L64.9746433,80.9999998 L66.9746436,80.9999998 Z M70.9746434,80.9999998 L70.974,81.9999998 L68.974,81.9999998 L68.9746433,80.9999998 L70.9746434,80.9999998 Z M74.9746436,80.9999998 L74.974,81.9999998 L72.974,81.9999998 L72.9746433,80.9999998 L74.9746436,80.9999998 Z M78.9746434,80.9999998 L78.974,81.9999998 L76.974,81.9999998 L76.9746433,80.9999998 L78.9746434,80.9999998 Z M82.9746436,80.9999998 L82.974,81.9999998 L80.974,81.9999998 L80.9746433,80.9999998 L82.9746436,80.9999998 Z M86.9746435,80.9999998 L86.974,81.9999998 L84.974,81.9999998 L84.9746433,80.9999998 L86.9746435,80.9999998 Z M90.9746436,80.9999998 L90.974,81.9999998 L88.974,81.9999998 L88.9746433,80.9999998 L90.9746436,80.9999998 Z M94.9746435,80.9999998 L94.974,81.9999998 L92.974,81.9999998 L92.9746433,80.9999998 L94.9746435,80.9999998 Z M98.9746436,80.9999998 L98.974,81.9999998 L96.974,81.9999998 L96.9746433,80.9999998 L98.9746436,80.9999998 Z M102.974643,80.9999998 L102.974,81.9999998 L100.974,81.9999998 L100.974643,80.9999998 L102.974643,80.9999998 Z M106.974644,80.9999998 L106.974,81.9999998 L104.974,81.9999998 L104.974643,80.9999998 L106.974644,80.9999998 Z M110.974643,80.9999998 L110.974,81.9999998 L108.974,81.9999998 L108.974643,80.9999998 L110.974643,80.9999998 Z M114.974643,80.9999998 L114.974,81.9999998 L112.974,81.9999998 L112.974643,80.9999998 L114.974643,80.9999998 Z M118.974644,80.9999998 L118.974,81.9999998 L116.974,81.9999998 L116.974643,80.9999998 L118.974644,80.9999998 Z M122.974643,80.9999998 L122.974,81.9999998 L120.974,81.9999998 L120.974643,80.9999998 L122.974643,80.9999998 Z M126.974644,80.9999998 L126.974,81.9999998 L124.974,81.9999998 L124.974643,80.9999998 L126.974644,80.9999998 Z M130.974643,80.9999998 L130.974,81.9999998 L128.974,81.9999998 L128.974643,80.9999998 L130.974643,80.9999998 Z M134.974644,80.9999998 L134.974,81.9999998 L132.974,81.9999998 L132.974643,80.9999998 L134.974644,80.9999998 Z M138.974643,80.9999998 L138.974,81.9999998 L136.974,81.9999998 L136.974643,80.9999998 L138.974643,80.9999998 Z M142.974644,80.9999998 L142.974,81.9999998 L140.974,81.9999998 L140.974643,80.9999998 L142.974644,80.9999998 Z M146.974643,80.9999998 L146.974,81.9999998 L144.974,81.9999998 L144.974643,80.9999998 L146.974643,80.9999998 Z M150.974643,80.9999998 L150.974,81.9999998 L148.974,81.9999998 L148.974643,80.9999998 L150.974643,80.9999998 Z M154.974643,80.9999998 L154.974,81.9999998 L152.974,81.9999998 L152.974643,80.9999998 L154.974643,80.9999998 Z M158.974643,80.9999998 L158.974,81.9999998 L156.974,81.9999998 L156.974643,80.9999998 L158.974643,80.9999998 Z M162.974643,80.9999998 L162.974,81.9999998 L160.974,81.9999998 L160.974643,80.9999998 L162.974643,80.9999998 Z M166.974643,80.9999998 L166.974,81.9999998 L164.974,81.9999998 L164.974643,80.9999998 L166.974643,80.9999998 Z M170.974643,80.9999998 L170.974,81.9999998 L168.974,81.9999998 L168.974643,80.9999998 L170.974643,80.9999998 Z M174.974643,80.9999998 L174.974,81.9999998 L172.974,81.9999998 L172.974643,80.9999998 L174.974643,80.9999998 Z M178.974643,80.9999998 L178.974,81.9999998 L176.974,81.9999998 L176.974643,80.9999998 L178.974643,80.9999998 Z M182.974643,80.9999998 L182.974,81.9999998 L180.974,81.9999998 L180.974643,80.9999998 L182.974643,80.9999998 Z M186.974643,80.9999998 L186.974,81.9999998 L184.974,81.9999998 L184.974643,80.9999998 L186.974643,80.9999998 Z M190.974643,80.9999998 L190.974,81.9999998 L188.974,81.9999998 L188.974643,80.9999998 L190.974643,80.9999998 Z M194.974643,80.9999998 L194.974,81.9999998 L192.974,81.9999998 L192.974643,80.9999998 L194.974643,80.9999998 Z M198.974643,80.9999998 L198.974,81.9999998 L196.974,81.9999998 L196.974643,80.9999998 L198.974643,80.9999998 Z M202.974643,80.9999998 L202.974,81.9999998 L200.974,81.9999998 L200.974643,80.9999998 L202.974643,80.9999998 Z M206.974643,80.9999998 L206.974,81.9999998 L204.974,81.9999998 L204.974643,80.9999998 L206.974643,80.9999998 Z M210.974643,80.9999998 L210.974,81.9999998 L208.974,81.9999998 L208.974643,80.9999998 L210.974643,80.9999998 Z M214.974643,80.9999998 L214.974,81.9999998 L212.974,81.9999998 L212.974643,80.9999998 L214.974643,80.9999998 Z M218.974643,80.9999998 L218.974,81.9999998 L216.974,81.9999998 L216.974643,80.9999998 L218.974643,80.9999998 Z M222.974643,80.9999998 L222.974,81.9999998 L220.974,81.9999998 L220.974643,80.9999998 L222.974643,80.9999998 Z M226.974643,80.9999998 L226.974,81.9999998 L224.974,81.9999998 L224.974643,80.9999998 L226.974643,80.9999998 Z M230.974643,80.9999998 L230.974,81.9999998 L228.974,81.9999998 L228.974643,80.9999998 L230.974643,80.9999998 Z M234.974643,80.9999998 L234.974,81.9999998 L232.974,81.9999998 L232.974643,80.9999998 L234.974643,80.9999998 Z M238.974643,80.9999998 L238.974,81.9999998 L236.974,81.9999998 L236.974643,80.9999998 L238.974643,80.9999998 Z M242.974643,80.9999998 L242.974,81.9999998 L240.974,81.9999998 L240.974643,80.9999998 L242.974643,80.9999998 Z M246.974643,80.9999998 L246.974,81.9999998 L244.974,81.9999998 L244.974643,80.9999998 L246.974643,80.9999998 Z M250.974643,80.9999998 L250.974,81.9999998 L248.974,81.9999998 L248.974643,80.9999998 L250.974643,80.9999998 Z M254.974643,80.9999998 L254.974,81.9999998 L252.974,81.9999998 L252.974643,80.9999998 L254.974643,80.9999998 Z M258.974643,80.9999998 L258.974,81.9999998 L256.974,81.9999998 L256.974643,80.9999998 L258.974643,80.9999998 Z M262.974643,80.9999998 L262.974,81.9999998 L260.974,81.9999998 L260.974643,80.9999998 L262.974643,80.9999998 Z M266.974643,80.9999998 L266.974,81.9999998 L264.974,81.9999998 L264.974643,80.9999998 L266.974643,80.9999998 Z M270.974643,80.9999998 L270.974,81.9999998 L268.974,81.9999998 L268.974643,80.9999998 L270.974643,80.9999998 Z M274.974643,80.9999998 L274.974,81.9999998 L272.974,81.9999998 L272.974643,80.9999998 L274.974643,80.9999998 Z M278.974643,80.9999998 L278.974,81.9999998 L276.974,81.9999998 L276.974643,80.9999998 L278.974643,80.9999998 Z M282.974643,80.9999998 L282.974,81.9999998 L280.974,81.9999998 L280.974643,80.9999998 L282.974643,80.9999998 Z M286.974643,80.9999998 L286.974,81.9999998 L284.974,81.9999998 L284.974643,80.9999998 L286.974643,80.9999998 Z M290.489743,80.544574 L290.845614,81.4791091 C290.230519,81.7132701 289.579078,81.8737323 288.902082,81.9497049 L288.79022,80.9559811 C289.375654,80.8903126 289.945502,80.7517531 290.489743,80.544574 Z M5.41029967,80.5056361 C5.95157753,80.7213183 6.51934132,80.8686603 7.10343083,80.9432289 L6.97697115,81.9352007 C6.30134705,81.8489616 5.65205616,81.6784493 5.03997173,81.4345371 L5.41029967,80.5056361 Z M293.388302,78.4686843 L294.157467,79.1077344 C293.730317,79.6220994 293.239943,80.0820804 292.698265,80.4757556 L292.11069,79.6665857 C292.584521,79.3221774 293.013992,78.9194515 293.388302,78.4686843 Z M2.56157145,78.4076246 C2.93082185,78.8625901 3.35581002,79.2701375 3.82581503,79.6198877 L3.22929764,80.422488 C2.69189028,80.0226419 2.20638657,79.5569385 1.78473163,79.0373231 L2.56157145,78.4076246 Z M294.876065,75.3192015 L295.858348,75.506607 C295.73129,76.1733384 295.521483,76.8107286 295.240021,77.407685 L294.335201,76.9818892 C294.584106,76.4541311 294.766097,75.896352 294.876065,75.3192015 Z M1.10775661,75.2312252 C1.21033515,75.8099081 1.38509885,76.3698753 1.62708778,76.9005768 L0.717340098,77.3157385 C0.443469481,76.7151789 0.241701076,76.07482 0.123099669,75.4057265 L1.10775661,75.2312252 Z M296,71.512 L296,73.512 L295,73.5126783 L295,71.5126783 L296,71.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M296,67.512 L296,69.512 L295,69.5126783 L295,67.5126783 L296,67.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M296,63.512 L296,65.512 L295,65.5126783 L295,63.5126783 L296,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M296,59.512 L296,61.512 L295,61.5126783 L295,59.5126783 L296,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M296,55.512 L296,57.512 L295,57.5126783 L295,55.5126783 L296,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M296,51.512 L296,53.512 L295,53.5126783 L295,51.5126783 L296,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M296,47.512 L296,49.512 L295,49.5126783 L295,47.5126783 L296,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M296,43.512 L296,45.512 L295,45.5126783 L295,43.5126783 L296,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M296,39.512 L296,41.512 L295,41.5126783 L295,39.5126783 L296,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M296,35.512 L296,37.512 L295,37.5126783 L295,35.5126783 L296,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M296,31.512 L296,33.512 L295,33.5126783 L295,31.5126783 L296,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M296,27.512 L296,29.512 L295,29.5126783 L295,27.5126783 L296,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M296,23.512 L296,25.512 L295,25.5126783 L295,23.5126783 L296,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M296,19.512 L296,21.512 L295,21.5126783 L295,19.5126783 L296,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M296,15.512 L296,17.512 L295,17.5126783 L295,15.5126783 L296,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M296,11.512 L296,13.512 L295,13.5126783 L295,11.5126783 L296,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M296,8 L296,9.512 L295,9.51268141 L294.999996,7.99230721 C294.999843,7.84931676 294.995414,7.70690426 294.986748,7.5651841 L295.984907,7.50453602 C295.99492,7.6684065 296,7.83361003 296,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M294.669225,5.86656128 C294.490852,5.30842466 294.242774,4.77610859 293.93156,4.28094453 L294.777667,3.74792999 C295.130817,4.30965578 295.416068,4.91838179 295.621909,5.56259574 L294.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M292.801538,2.90632361 C292.375276,2.5043409 291.900412,2.15691661 291.388372,1.8730526 L291.873899,0.998830409 C292.461218,1.32450134 293.00324,1.72192978 293.488127,2.17927796 L292.801538,2.90632361 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M288,0 C288.685315,0 289.350503,0.0861719866 289.985321,0.248271715 L289.737382,1.21704772 C289.174749,1.07342708 288.592644,1 288,1 L288,0 Z M10,0 L10,1 L8.00000019,1 L7.59669449,1.0113952 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M197.999,0 L198,1 L196,1 L195.999,0 L197.999,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z" id="dash" fill-rule="nonzero"></path>
+                </g>
+                <g id="Pooling-@blue" transform="translate(158, 12)">
+                    <path d="M3,36 L115,36 C116.656854,36 118,37.3431458 118,39 L118,55 C118,56.6568542 116.656854,58 115,58 L3,58 C1.34314575,58 0,56.6568542 0,55 L0,39 C0,37.3431458 1.34314575,36 3,36 Z" id="dash-box-@grey" stroke="#8E8E8E" stroke-width="0.8" fill="#F7F7F7" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1,2.2"></path>
+                    <g id="arrows" transform="translate(19.5, 12)" fill="#8E8E8E" fill-rule="nonzero">
+                        <path d="M78.5,-0.277777778 L81,4.72222222 L79,4.72222222 L79,29.5 C79,29.7761424 78.7761424,30 78.5,30 C78.2238576,30 78,29.7761424 78,29.5 L78,4.72222222 L76,4.72222222 L78.5,-0.277777778 Z M32.5,-0.277777778 L35,4.72222222 L33,4.72222222 L33,29.5 C32.999999,29.7761442 32.7761406,30 32.4999982,30 C32.2238558,30 32,29.7761406 32,29.5 L32,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L1,4.72222222 L1,29.5 C0.999998995,29.7761442 0.776140555,30 0.49999818,30 C0.223855805,30 5.91970917e-13,29.7761406 5.91970917e-13,29.5 L0,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="txt-TokenPooling" transform="translate(24, 20)">
+                        <g id="bg" xlink:href="#path-18" fill="#F4F1FC">
+                            <use xlink:href="#path-18"></use>
+                        </g>
+                        <g id="TokenPooling" transform="translate(1, 0)" fill="#9172E2" fill-rule="nonzero">
+                            <path d="M5.88720703,9.00488281 C6.05322266,9.00488281 6.19645182,8.95279948 6.31689453,8.84863281 C6.43733724,8.74446615 6.49755859,8.59960938 6.49755859,8.4140625 L6.49755859,2.95507812 L8.19677734,2.95507812 C8.34326172,2.95507812 8.45800781,2.90625 8.54101563,2.80859375 C8.62402344,2.7109375 8.66552734,2.59863281 8.66552734,2.47167969 C8.66552734,2.34472656 8.62402344,2.23242188 8.54101563,2.13476562 C8.45800781,2.03710938 8.34326172,1.98828125 8.19677734,1.98828125 L3.57763672,1.98828125 C3.42789714,1.98828125 3.31070964,2.03710938 3.22607422,2.13476562 C3.1414388,2.23242188 3.09912109,2.34472656 3.09912109,2.47167969 C3.09912109,2.59863281 3.1414388,2.7109375 3.22607422,2.80859375 C3.31070964,2.90625 3.42789714,2.95507812 3.57763672,2.95507812 L5.28173828,2.95507812 L5.28173828,8.4140625 C5.28173828,8.59960938 5.34195964,8.74446615 5.46240234,8.84863281 C5.58284505,8.95279948 5.72444661,9.00488281 5.88720703,9.00488281 Z" id="Path"></path>
+                            <path d="M11.6928711,8.34082031 C11.2403971,8.34082031 10.8863932,8.18457031 10.6308594,7.87207031 C10.3753255,7.55957031 10.2475586,7.125 10.2475586,6.56835937 C10.2475586,6.00520833 10.3753255,5.56575521 10.6308594,5.25 C10.8863932,4.93424479 11.2403971,4.77636719 11.6928711,4.77636719 C12.1453451,4.77636719 12.499349,4.93505859 12.7548828,5.25244141 C13.0104167,5.56982422 13.1381836,6.00846354 13.1381836,6.56835937 C13.1381836,7.125 13.0112305,7.55957031 12.7573242,7.87207031 C12.503418,8.18457031 12.1486003,8.34082031 11.6928711,8.34082031 Z M11.6928711,9.06835938 C12.0249023,9.06835938 12.3292643,9.02197266 12.605957,8.92919922 C12.8826497,8.83642578 13.1178385,8.71110026 13.3115234,8.55322266 C13.5052083,8.39534505 13.6695964,8.20898438 13.8046875,7.99414063 C13.9397786,7.77929688 14.0390625,7.55224609 14.1025391,7.31298828 C14.1660156,7.07373047 14.1977539,6.82552083 14.1977539,6.56835938 C14.1977539,6.22981771 14.144043,5.91080729 14.0366211,5.61132813 C13.9291992,5.31184896 13.773763,5.04492188 13.5703125,4.81054688 C13.366862,4.57617187 13.1040039,4.3914388 12.7817383,4.25634766 C12.4594727,4.12125651 12.0965169,4.05371094 11.6928711,4.05371094 C11.2859701,4.05371094 10.9197591,4.12288411 10.5942383,4.26123047 C10.2687174,4.39957682 10.0058594,4.5867513 9.80566406,4.82275391 C9.60546875,5.05875651 9.45247396,5.32568359 9.34667969,5.62353516 C9.24088542,5.92138672 9.18798828,6.23632813 9.18798828,6.56835938 C9.18798828,6.89388021 9.24007161,7.2039388 9.34423828,7.49853516 C9.44840495,7.79313151 9.60058594,8.05843099 9.80078125,8.29443359 C10.0009766,8.5304362 10.2638346,8.71842448 10.5893555,8.85839844 C10.9148763,8.9983724 11.2827148,9.06835938 11.6928711,9.06835938 Z" id="Shape"></path>
+                            <path d="M15.7993164,9 C15.9458008,9 16.0727539,8.95198568 16.1801758,8.85595703 C16.2875977,8.75992839 16.3413086,8.62890625 16.3413086,8.46289062 L16.3413086,6.50976562 L18.5776367,8.86328125 C18.6785482,8.97070312 18.7957357,9.02441406 18.9291992,9.02441406 C19.0138346,9.02441406 19.0952148,9.00244141 19.1733398,8.95849609 C19.2514648,8.91455078 19.3141276,8.85514323 19.3613281,8.78027344 C19.4085286,8.70540365 19.4321289,8.62727865 19.4321289,8.54589844 C19.4321289,8.4938151 19.4215495,8.44173177 19.4003906,8.38964844 C19.3792318,8.3375651 19.3458659,8.28710938 19.300293,8.23828125 L17.4887695,6.34375 L19.0952148,4.83496094 C19.1245117,4.80891927 19.1489258,4.7796224 19.168457,4.74707031 C19.1879883,4.71451823 19.2018229,4.68196615 19.2099609,4.64941406 C19.218099,4.61686198 19.222168,4.58268229 19.222168,4.546875 C19.222168,4.46875 19.2001953,4.3914388 19.15625,4.31494141 C19.1123047,4.23844401 19.0537109,4.17659505 18.9804688,4.12939453 C18.9072266,4.08219401 18.8299154,4.05859375 18.7485352,4.05859375 C18.699707,4.05859375 18.6508789,4.06917318 18.6020508,4.09033203 C18.5532227,4.11149089 18.5060221,4.14322917 18.4604492,4.18554688 L16.3413086,6.21679688 L16.3413086,2.52539063 C16.3413086,2.36263021 16.2892253,2.23323568 16.1850586,2.13720703 C16.0808919,2.04117839 15.957194,1.99316406 15.8139648,1.99316406 C15.6674805,1.99316406 15.5405273,2.04036458 15.4331055,2.13476562 C15.3256836,2.22916667 15.2719727,2.359375 15.2719727,2.52539063 L15.2719727,8.46289062 C15.2719727,8.63216146 15.324056,8.7639974 15.4282227,8.85839844 C15.5323893,8.95279948 15.6560872,9 15.7993164,9 Z" id="Path"></path>
+                            <path d="M22.4790039,9.06835938 C23.1886393,9.06835938 23.7875977,8.85351563 24.2758789,8.42382813 C24.3833008,8.3359375 24.4370117,8.23014323 24.4370117,8.10644531 C24.4370117,8.00878906 24.4036458,7.92252604 24.3369141,7.84765625 C24.2701823,7.77278646 24.1912435,7.73535156 24.1000977,7.73535156 C24.0382487,7.73535156 23.9763997,7.75651042 23.9145508,7.79882813 C23.4165039,8.15039062 22.9640299,8.32617188 22.5571289,8.32617188 C22.1176758,8.32291667 21.7669271,8.19840495 21.5048828,7.95263672 C21.2428385,7.70686849 21.1004232,7.32519531 21.0776367,6.80761719 L24.2807617,6.80761719 C24.3914388,6.80761719 24.4720052,6.76936849 24.5224609,6.69287109 C24.5729167,6.6163737 24.5981445,6.51953125 24.5981445,6.40234375 C24.5916341,6.08007812 24.542806,5.78141276 24.4516602,5.50634766 C24.3605143,5.23128255 24.2278646,4.98388672 24.0537109,4.76416016 C23.8795573,4.54443359 23.6484375,4.37109375 23.3603516,4.24414062 C23.0722656,4.1171875 22.7410482,4.05371094 22.3666992,4.05371094 C21.9988607,4.05371094 21.6635742,4.12044271 21.3608398,4.25390625 C21.0581055,4.38736979 20.809082,4.56803385 20.6137695,4.79589844 C20.418457,5.02376302 20.2679036,5.28499349 20.1621094,5.57958984 C20.0563151,5.8741862 20.003418,6.1858724 20.003418,6.51464844 C20.003418,7.30240885 20.2304688,7.92333984 20.6845703,8.37744141 C21.1386719,8.83154297 21.7368164,9.06184896 22.4790039,9.06835938 Z M21.0874023,6.16308594 C21.1036784,5.98730469 21.1419271,5.81966146 21.2021484,5.66015625 C21.2623698,5.50065104 21.3445638,5.35416667 21.4487305,5.22070312 C21.5528971,5.08723958 21.6879883,4.97981771 21.8540039,4.8984375 C22.0200195,4.81705729 22.2055664,4.77636719 22.4106445,4.77636719 C22.8077799,4.77636719 23.1105143,4.90820312 23.3188477,5.171875 C23.527181,5.43554688 23.6459961,5.76595052 23.675293,6.16308594 L21.0874023,6.16308594 Z" id="Shape"></path>
+                            <path d="M26.1655273,9 C26.3120117,9 26.438151,8.95442708 26.5439453,8.86328125 C26.6497396,8.77213542 26.7026367,8.6468099 26.7026367,8.48730469 L26.7026367,6.265625 C26.7058919,5.81966146 26.8328451,5.45996094 27.0834961,5.18652344 C27.3341471,4.91308594 27.6336263,4.77636719 27.9819336,4.77636719 C28.2814128,4.77636719 28.5239258,4.87402344 28.7094727,5.06933594 C28.8950195,5.26464844 28.987793,5.54622396 28.987793,5.9140625 L28.987793,8.48730469 C28.987793,8.6500651 29.0398763,8.77620443 29.144043,8.86572266 C29.2482096,8.95524089 29.3719076,9 29.5151367,9 C29.6616211,9 29.7885742,8.95524089 29.8959961,8.86572266 C30.003418,8.77620443 30.0571289,8.6500651 30.0571289,8.48730469 L30.0571289,5.92382813 C30.0571289,5.60481771 30.0099284,5.32324219 29.9155273,5.07910156 C29.8211263,4.83496094 29.690918,4.63964844 29.5249023,4.49316406 C29.3588867,4.34667969 29.1708984,4.23681641 28.9609375,4.16357422 C28.7509766,4.09033203 28.5255534,4.05371094 28.284668,4.05371094 C27.9135742,4.05371094 27.5913086,4.12451172 27.3178711,4.26611328 C27.0444336,4.40771484 26.8393555,4.61360677 26.7026367,4.88378906 L26.7026367,4.61035156 C26.7026367,4.45735677 26.6505534,4.33772786 26.5463867,4.25146484 C26.4422201,4.16520182 26.3168945,4.12207031 26.1704102,4.12207031 C26.0239258,4.12207031 25.8969727,4.16682943 25.7895508,4.25634766 C25.6821289,4.34586589 25.628418,4.46875 25.628418,4.625 L25.628418,8.48730469 C25.628418,8.6500651 25.6813151,8.77620443 25.7871094,8.86572266 C25.8929036,8.95524089 26.019043,9 26.1655273,9 Z" id="Path"></path>
+                            <path d="M32.1518555,9.00488281 C32.3178711,9.00488281 32.4602865,8.95361328 32.5791016,8.85107422 C32.6979167,8.74853516 32.7573242,8.60449219 32.7573242,8.41894531 L32.7573242,6.11914062 L34.1782227,6.11914062 C35.0213216,6.11914062 35.6691081,5.95719401 36.121582,5.63330078 C36.574056,5.30940755 36.800293,4.78125 36.800293,4.04882813 C36.800293,3.32617188 36.5846354,2.80208333 36.1533203,2.4765625 C35.7220052,2.15104167 35.0994466,1.98828125 34.2856445,1.98828125 L32.1616211,1.98828125 C31.9923503,1.98828125 31.8474935,2.0476888 31.7270508,2.16650391 C31.6066081,2.28531901 31.5463867,2.42773438 31.5463867,2.59375 L31.5463867,8.41894531 C31.5463867,8.60449219 31.6066081,8.74853516 31.7270508,8.85107422 C31.8474935,8.95361328 31.9890951,9.00488281 32.1518555,9.00488281 Z M32.7573242,5.19628906 L32.7573242,2.95019531 L34.1782227,2.95019531 C34.6730143,2.95019531 35.0351562,3.02587891 35.2646484,3.17724609 C35.4941406,3.32861328 35.6088867,3.61914063 35.6088867,4.04882813 C35.6088867,4.48502604 35.4908854,4.78531901 35.2548828,4.94970703 C35.0188802,5.11409505 34.6486003,5.19628906 34.144043,5.19628906 L32.7573242,5.19628906 Z" id="Shape"></path>
+                            <path d="M39.8618164,8.34082031 C39.4093424,8.34082031 39.0553385,8.18457031 38.7998047,7.87207031 C38.5442708,7.55957031 38.4165039,7.125 38.4165039,6.56835937 C38.4165039,6.00520833 38.5442708,5.56575521 38.7998047,5.25 C39.0553385,4.93424479 39.4093424,4.77636719 39.8618164,4.77636719 C40.3142904,4.77636719 40.6682943,4.93505859 40.9238281,5.25244141 C41.179362,5.56982422 41.3071289,6.00846354 41.3071289,6.56835937 C41.3071289,7.125 41.1801758,7.55957031 40.9262695,7.87207031 C40.6723633,8.18457031 40.3175456,8.34082031 39.8618164,8.34082031 Z M39.8618164,9.06835938 C40.1938477,9.06835938 40.4982096,9.02197266 40.7749023,8.92919922 C41.0515951,8.83642578 41.2867839,8.71110026 41.4804688,8.55322266 C41.6741536,8.39534505 41.8385417,8.20898438 41.9736328,7.99414063 C42.108724,7.77929688 42.2080078,7.55224609 42.2714844,7.31298828 C42.3349609,7.07373047 42.3666992,6.82552083 42.3666992,6.56835938 C42.3666992,6.22981771 42.3129883,5.91080729 42.2055664,5.61132813 C42.0981445,5.31184896 41.9427083,5.04492188 41.7392578,4.81054688 C41.5358073,4.57617187 41.2729492,4.3914388 40.9506836,4.25634766 C40.628418,4.12125651 40.2654622,4.05371094 39.8618164,4.05371094 C39.4549154,4.05371094 39.0887044,4.12288411 38.7631836,4.26123047 C38.4376628,4.39957682 38.1748047,4.5867513 37.9746094,4.82275391 C37.7744141,5.05875651 37.6214193,5.32568359 37.515625,5.62353516 C37.4098307,5.92138672 37.3569336,6.23632813 37.3569336,6.56835938 C37.3569336,6.89388021 37.4090169,7.2039388 37.5131836,7.49853516 C37.6173503,7.79313151 37.7695312,8.05843099 37.9697266,8.29443359 C38.1699219,8.5304362 38.4327799,8.71842448 38.7583008,8.85839844 C39.0838216,8.9983724 39.4516602,9.06835938 39.8618164,9.06835938 Z" id="Shape"></path>
+                            <path d="M45.6479492,8.34082031 C45.1954753,8.34082031 44.8414714,8.18457031 44.5859375,7.87207031 C44.3304036,7.55957031 44.2026367,7.125 44.2026367,6.56835937 C44.2026367,6.00520833 44.3304036,5.56575521 44.5859375,5.25 C44.8414714,4.93424479 45.1954753,4.77636719 45.6479492,4.77636719 C46.1004232,4.77636719 46.4544271,4.93505859 46.7099609,5.25244141 C46.9654948,5.56982422 47.0932617,6.00846354 47.0932617,6.56835937 C47.0932617,7.125 46.9663086,7.55957031 46.7124023,7.87207031 C46.4584961,8.18457031 46.1036784,8.34082031 45.6479492,8.34082031 Z M45.6479492,9.06835938 C45.9799805,9.06835938 46.2843424,9.02197266 46.5610352,8.92919922 C46.8377279,8.83642578 47.0729167,8.71110026 47.2666016,8.55322266 C47.4602865,8.39534505 47.6246745,8.20898438 47.7597656,7.99414063 C47.8948568,7.77929688 47.9941406,7.55224609 48.0576172,7.31298828 C48.1210938,7.07373047 48.152832,6.82552083 48.152832,6.56835938 C48.152832,6.22981771 48.0991211,5.91080729 47.9916992,5.61132813 C47.8842773,5.31184896 47.7288411,5.04492188 47.5253906,4.81054688 C47.3219401,4.57617187 47.059082,4.3914388 46.7368164,4.25634766 C46.4145508,4.12125651 46.0515951,4.05371094 45.6479492,4.05371094 C45.2410482,4.05371094 44.8748372,4.12288411 44.5493164,4.26123047 C44.2237956,4.39957682 43.9609375,4.5867513 43.7607422,4.82275391 C43.5605469,5.05875651 43.4075521,5.32568359 43.3017578,5.62353516 C43.1959635,5.92138672 43.1430664,6.23632813 43.1430664,6.56835938 C43.1430664,6.89388021 43.1951497,7.2039388 43.2993164,7.49853516 C43.4034831,7.79313151 43.5556641,8.05843099 43.7558594,8.29443359 C43.9560547,8.5304362 44.2189128,8.71842448 44.5444336,8.85839844 C44.8699544,8.9983724 45.237793,9.06835938 45.6479492,9.06835938 Z" id="Shape"></path>
+                            <path d="M49.7739258,9 C49.9204102,9 50.0481771,8.95198568 50.1572266,8.85595703 C50.266276,8.75992839 50.3208008,8.63053385 50.3208008,8.46777344 L50.3208008,2.52539062 C50.3208008,2.36263021 50.2670898,2.23323568 50.159668,2.13720703 C50.0522461,2.04117839 49.9269206,1.99316406 49.7836914,1.99316406 C49.637207,1.99316406 49.5110677,2.04036458 49.4052734,2.13476562 C49.2994792,2.22916667 49.246582,2.359375 49.246582,2.52539062 L49.246582,8.46777344 C49.246582,8.63704427 49.2978516,8.76806641 49.4003906,8.86083984 C49.5029297,8.95361328 49.6274414,9 49.7739258,9 Z" id="Path"></path>
+                            <path d="M52.2446289,9 C52.3911133,9 52.5188802,8.95198568 52.6279297,8.85595703 C52.7369792,8.75992839 52.7915039,8.63053385 52.7915039,8.46777344 L52.7915039,4.65429688 C52.7915039,4.48828125 52.7386068,4.35807292 52.6328125,4.26367188 C52.5270182,4.16927083 52.4025065,4.12207031 52.2592773,4.12207031 C52.112793,4.12207031 51.9858398,4.16927083 51.878418,4.26367188 C51.7709961,4.35807292 51.7172852,4.48828125 51.7172852,4.65429688 L51.7172852,8.46777344 C51.7172852,8.63704427 51.7685547,8.76806641 51.8710938,8.86083984 C51.9736328,8.95361328 52.0981445,9 52.2446289,9 Z M52.2495117,3.1015625 C52.4448242,3.1015625 52.601888,3.04541016 52.7207031,2.93310547 C52.8395182,2.82080078 52.8989258,2.671875 52.8989258,2.48632812 C52.8989258,2.30403646 52.840332,2.15592448 52.7231445,2.04199219 C52.605957,1.9280599 52.449707,1.87109375 52.2543945,1.87109375 C52.059082,1.87109375 51.9020182,1.9280599 51.7832031,2.04199219 C51.664388,2.15592448 51.6049805,2.30403646 51.6049805,2.48632812 C51.6049805,2.671875 51.664388,2.82080078 51.7832031,2.93310547 C51.9020182,3.04541016 52.0574544,3.1015625 52.2495117,3.1015625 Z" id="Shape"></path>
+                            <path d="M54.6420898,9 C54.7885742,9 54.9147135,8.95442708 55.0205078,8.86328125 C55.1263021,8.77213542 55.1791992,8.6468099 55.1791992,8.48730469 L55.1791992,6.265625 C55.1824544,5.81966146 55.3094076,5.45996094 55.5600586,5.18652344 C55.8107096,4.91308594 56.1101888,4.77636719 56.4584961,4.77636719 C56.7579753,4.77636719 57.0004883,4.87402344 57.1860352,5.06933594 C57.371582,5.26464844 57.4643555,5.54622396 57.4643555,5.9140625 L57.4643555,8.48730469 C57.4643555,8.6500651 57.5164388,8.77620443 57.6206055,8.86572266 C57.7247721,8.95524089 57.8484701,9 57.9916992,9 C58.1381836,9 58.2651367,8.95524089 58.3725586,8.86572266 C58.4799805,8.77620443 58.5336914,8.6500651 58.5336914,8.48730469 L58.5336914,5.92382813 C58.5336914,5.60481771 58.4864909,5.32324219 58.3920898,5.07910156 C58.2976888,4.83496094 58.1674805,4.63964844 58.0014648,4.49316406 C57.8354492,4.34667969 57.6474609,4.23681641 57.4375,4.16357422 C57.2275391,4.09033203 57.0021159,4.05371094 56.7612305,4.05371094 C56.3901367,4.05371094 56.0678711,4.12451172 55.7944336,4.26611328 C55.5209961,4.40771484 55.315918,4.61360677 55.1791992,4.88378906 L55.1791992,4.61035156 C55.1791992,4.45735677 55.1271159,4.33772786 55.0229492,4.25146484 C54.9187826,4.16520182 54.793457,4.12207031 54.6469727,4.12207031 C54.5004883,4.12207031 54.3735352,4.16682943 54.2661133,4.25634766 C54.1586914,4.34586589 54.1049805,4.46875 54.1049805,4.625 L54.1049805,8.48730469 C54.1049805,8.6500651 54.1578776,8.77620443 54.2636719,8.86572266 C54.3694661,8.95524089 54.4956055,9 54.6420898,9 Z" id="Path"></path>
+                            <path d="M61.9614258,11.1337891 C62.7003581,11.1337891 63.288737,10.9498698 63.7265625,10.5820312 C64.164388,10.2141927 64.3833008,9.66080729 64.3833008,8.921875 L64.3833008,4.62988281 C64.3833008,4.4703776 64.3328451,4.34586589 64.2319336,4.25634766 C64.1310221,4.16682943 64.0138346,4.12207031 63.8803711,4.12207031 C63.7566732,4.12207031 63.6468099,4.15869141 63.5507812,4.23193359 C63.4547526,4.30517578 63.3986003,4.40690104 63.3823242,4.53710938 L63.3823242,4.86425781 C63.2716471,4.69173177 63.156901,4.55013021 63.0380859,4.43945312 C62.9192708,4.32877604 62.7573242,4.23681641 62.5522461,4.16357422 C62.347168,4.09033203 62.1062826,4.05371094 61.8295898,4.05371094 C61.3673503,4.05371094 60.9637044,4.16438802 60.6186523,4.38574219 C60.2736003,4.60709635 60.0123698,4.90820313 59.8349609,5.2890625 C59.6575521,5.66992188 59.5688477,6.10449219 59.5688477,6.59277344 C59.5688477,7.3186849 59.7755534,7.90136719 60.1889648,8.34082031 C60.6023763,8.78027344 61.1590169,9 61.8588867,9 C62.5620117,9 63.0616862,8.7281901 63.3579102,8.18457031 L63.3579102,9 C63.3579102,9.44596354 63.226888,9.79264323 62.9648438,10.0400391 C62.7027995,10.2874349 62.3422852,10.4111328 61.8833008,10.4111328 C61.7400716,10.4111328 61.5960286,10.3989258 61.4511719,10.3745117 C61.3063151,10.3500977 61.1948242,10.3256836 61.1166992,10.3012695 C61.0385742,10.2768555 60.9303385,10.2386068 60.7919922,10.1865234 C60.6536458,10.1344401 60.5649414,10.101888 60.5258789,10.0888672 C60.5096029,10.0823568 60.492513,10.0766602 60.4746094,10.0717773 C60.4567057,10.0668945 60.4404297,10.0636393 60.4257812,10.0620117 C60.4111328,10.0603841 60.3956706,10.0595703 60.3793945,10.0595703 C60.3142904,10.0595703 60.2556966,10.0766602 60.2036133,10.1108398 C60.1515299,10.1450195 60.1116536,10.1889648 60.0839844,10.2426758 C60.0563151,10.2963867 60.0424805,10.3533529 60.0424805,10.4135742 C60.0424805,10.4737956 60.0579427,10.5323893 60.0888672,10.5893555 C60.1197917,10.6463216 60.1661784,10.6943359 60.2280273,10.7333984 C60.4070638,10.8440755 60.6520182,10.9384766 60.9628906,11.0166016 C61.273763,11.0947266 61.6066081,11.1337891 61.9614258,11.1337891 Z M62.0200195,8.30175781 C61.6293945,8.29199219 61.3014323,8.1414388 61.0361328,7.85009766 C60.7708333,7.55875651 60.6381836,7.12988281 60.6381836,6.56347656 C60.6381836,6.31933594 60.6650391,6.09309896 60.71875,5.88476562 C60.7724609,5.67643229 60.8530273,5.48763021 60.9604492,5.31835938 C61.0678711,5.14908854 61.2135417,5.0164388 61.3974609,4.92041016 C61.5813802,4.82438151 61.7937826,4.77636719 62.034668,4.77636719 C62.2755534,4.77636719 62.4830729,4.8219401 62.6572266,4.91308594 C62.8313802,5.00423177 62.968099,5.1336263 63.0673828,5.30126953 C63.1666667,5.46891276 63.2390951,5.65690104 63.284668,5.86523438 C63.3302409,6.07356771 63.3530273,6.30957031 63.3530273,6.57324219 C63.3497721,7.15592448 63.2260742,7.58886719 62.9819336,7.87207031 C62.737793,8.15527344 62.4171549,8.2985026 62.0200195,8.30175781 Z" id="Shape"></path>
+                        </g>
+                    </g>
+                    <g id="row-bottom" transform="translate(8, 41)">
+                        <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <g id="1" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                    <g id="row-top" transform="translate(8, 0)">
+                        <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <g id="1" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="2" transform="translate(32, 0)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                        <g id="3" transform="translate(78, 0)" fill="#30A2FF">
+                            <rect id="Rectangle" fill-opacity="0.6" x="0" y="0" width="24" height="12" rx="3"></rect>
+                            <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                </g>
+                <g id="Pooling-@yellow" transform="translate(20, 12)">
+                    <g id="dash-box-@grey" transform="translate(-1.4, 35.6)">
+                        <path d="M4.40000021,0.4 L116.4,0.4 C118.056854,0.4 119.4,1.74314575 119.4,3.4 L119.4,19.4 C119.4,21.0568542 118.056854,22.4 116.4,22.4 L4.40000021,22.4 C2.74314596,22.4 1.40000021,21.0568542 1.40000021,19.4 L1.40000021,3.4 C1.40000021,1.74314575 2.74314596,0.4 4.40000021,0.4 Z" id="bg" fill="#F7F7F7"></path>
+                        <path d="M97.1844026,22.4 C97.1844026,22.6209139 97.0053165,22.8 96.7844026,22.8 L95.7844026,22.8 C95.5634887,22.8 95.3844026,22.6209139 95.3844026,22.4 C95.3844026,22.1790861 95.5634887,22 95.7844026,22 L96.7844025,22 C97.0053165,22 97.1844026,22.1790861 97.1844026,22.4 Z M7.58440258,22.4 C7.58440258,22.6209139 7.40531648,22.8 7.18440258,22.8 L6.18440258,22.8 C5.96348868,22.8 5.78440258,22.6209139 5.78440258,22.4 C5.78440258,22.1790861 5.96348868,22 6.18440258,22 L7.18440261,22 C7.40531648,22 7.58440258,22.1790861 7.58440258,22.4 Z M10.7844026,22.4 C10.7844026,22.6209139 10.6053165,22.8 10.3844026,22.8 L9.38440258,22.8 C9.16348868,22.8 8.98440258,22.6209139 8.98440258,22.4 C8.98440258,22.1790861 9.16348868,22 9.38440258,22 L10.3844026,22 C10.6053165,22 10.7844026,22.1790861 10.7844026,22.4 Z M100.384403,22.4 C100.384403,22.6209139 100.205316,22.8 99.9844026,22.8 L98.9844026,22.8 C98.7634887,22.8 98.5844026,22.6209139 98.5844026,22.4 C98.5844026,22.1790861 98.7634887,22 98.9844026,22 L99.9844025,22 C100.205316,22 100.384403,22.1790861 100.384403,22.4 Z M17.1844026,22.4 C17.1844026,22.6209139 17.0053165,22.8 16.7844026,22.8 L15.7844026,22.8 C15.5634887,22.8 15.3844026,22.6209139 15.3844026,22.4 C15.3844026,22.1790861 15.5634887,22 15.7844026,22 L16.7844026,22 C17.0053165,22 17.1844026,22.1790861 17.1844026,22.4 Z M20.3844026,22.4 C20.3844026,22.6209139 20.2053165,22.8 19.9844026,22.8 L18.9844026,22.8 C18.7634887,22.8 18.5844026,22.6209139 18.5844026,22.4 C18.5844026,22.1790861 18.7634887,22 18.9844026,22 L19.9844026,22 C20.2053165,22 20.3844026,22.1790861 20.3844026,22.4 Z M23.5844026,22.4 C23.5844026,22.6209139 23.4053165,22.8 23.1844026,22.8 L22.1844026,22.8 C21.9634887,22.8 21.7844026,22.6209139 21.7844026,22.4 C21.7844026,22.1790861 21.9634887,22 22.1844026,22 L23.1844026,22 C23.4053165,22 23.5844026,22.1790861 23.5844026,22.4 Z M26.7844026,22.4 C26.7844026,22.6209139 26.6053165,22.8 26.3844026,22.8 L25.3844026,22.8 C25.1634887,22.8 24.9844026,22.6209139 24.9844026,22.4 C24.9844026,22.1790861 25.1634887,22 25.3844026,22 L26.3844026,22 C26.6053165,22 26.7844026,22.1790861 26.7844026,22.4 Z M29.9844026,22.4 C29.9844026,22.6209139 29.8053165,22.8 29.5844026,22.8 L28.5844026,22.8 C28.3634887,22.8 28.1844026,22.6209139 28.1844026,22.4 C28.1844026,22.1790861 28.3634887,22 28.5844026,22 L29.5844027,22 C29.8053165,22 29.9844026,22.1790861 29.9844026,22.4 Z M33.1844026,22.4 C33.1844026,22.6209139 33.0053165,22.8 32.7844026,22.8 L31.7844026,22.8 C31.5634887,22.8 31.3844026,22.6209139 31.3844026,22.4 C31.3844026,22.1790861 31.5634887,22 31.7844026,22 L32.7844027,22 C33.0053165,22 33.1844026,22.1790861 33.1844026,22.4 Z M36.3844026,22.4 C36.3844026,22.6209139 36.2053165,22.8 35.9844026,22.8 L34.9844026,22.8 C34.7634887,22.8 34.5844026,22.6209139 34.5844026,22.4 C34.5844026,22.1790861 34.7634887,22 34.9844026,22 L35.9844027,22 C36.2053165,22 36.3844026,22.1790861 36.3844026,22.4 Z M39.5844026,22.4 C39.5844026,22.6209139 39.4053165,22.8 39.1844026,22.8 L38.1844026,22.8 C37.9634887,22.8 37.7844026,22.6209139 37.7844026,22.4 C37.7844026,22.1790861 37.9634887,22 38.1844026,22 L39.1844027,22 C39.4053165,22 39.5844026,22.1790861 39.5844026,22.4 Z M42.7844026,22.4 C42.7844026,22.6209139 42.6053165,22.8 42.3844026,22.8 L41.3844026,22.8 C41.1634887,22.8 40.9844026,22.6209139 40.9844026,22.4 C40.9844026,22.1790861 41.1634887,22 41.3844026,22 L42.3844027,22 C42.6053165,22 42.7844026,22.1790861 42.7844026,22.4 Z M45.9844026,22.4 C45.9844026,22.6209139 45.8053165,22.8 45.5844026,22.8 L44.5844026,22.8 C44.3634887,22.8 44.1844026,22.6209139 44.1844026,22.4 C44.1844026,22.1790861 44.3634887,22 44.5844026,22 L45.5844027,22 C45.8053165,22 45.9844026,22.1790861 45.9844026,22.4 Z M49.1844026,22.4 C49.1844026,22.6209139 49.0053165,22.8 48.7844026,22.8 L47.7844026,22.8 C47.5634887,22.8 47.3844026,22.6209139 47.3844026,22.4 C47.3844026,22.1790861 47.5634887,22 47.7844026,22 L48.7844027,22 C49.0053165,22 49.1844026,22.1790861 49.1844026,22.4 Z M52.3844026,22.4 C52.3844026,22.6209139 52.2053165,22.8 51.9844026,22.8 L50.9844026,22.8 C50.7634887,22.8 50.5844026,22.6209139 50.5844026,22.4 C50.5844026,22.1790861 50.7634887,22 50.9844026,22 L51.9844027,22 C52.2053165,22 52.3844026,22.1790861 52.3844026,22.4 Z M55.5844026,22.4 C55.5844026,22.6209139 55.4053165,22.8 55.1844026,22.8 L54.1844026,22.8 C53.9634887,22.8 53.7844026,22.6209139 53.7844026,22.4 C53.7844026,22.1790861 53.9634887,22 54.1844026,22 L55.1844027,22 C55.4053165,22 55.5844026,22.1790861 55.5844026,22.4 Z M58.7844026,22.4 C58.7844026,22.6209139 58.6053165,22.8 58.3844026,22.8 L57.3844026,22.8 C57.1634887,22.8 56.9844026,22.6209139 56.9844026,22.4 C56.9844026,22.1790861 57.1634887,22 57.3844026,22 L58.3844027,22 C58.6053165,22 58.7844026,22.1790861 58.7844026,22.4 Z M61.9844026,22.4 C61.9844026,22.6209139 61.8053165,22.8 61.5844026,22.8 L60.5844026,22.8 C60.3634887,22.8 60.1844026,22.6209139 60.1844026,22.4 C60.1844026,22.1790861 60.3634887,22 60.5844026,22 L61.5844025,22 C61.8053165,22 61.9844026,22.1790861 61.9844026,22.4 Z M65.1844026,22.4 C65.1844026,22.6209139 65.0053165,22.8 64.7844026,22.8 L63.7844026,22.8 C63.5634887,22.8 63.3844026,22.6209139 63.3844026,22.4 C63.3844026,22.1790861 63.5634887,22 63.7844026,22 L64.7844025,22 C65.0053165,22 65.1844026,22.1790861 65.1844026,22.4 Z M68.3844026,22.4 C68.3844026,22.6209139 68.2053165,22.8 67.9844026,22.8 L66.9844026,22.8 C66.7634887,22.8 66.5844026,22.6209139 66.5844026,22.4 C66.5844026,22.1790861 66.7634887,22 66.9844026,22 L67.9844025,22 C68.2053165,22 68.3844026,22.1790861 68.3844026,22.4 Z M13.9844026,22.4 C13.9844026,22.6209139 13.8053165,22.8 13.5844026,22.8 L12.5844026,22.8 C12.3634887,22.8 12.1844026,22.6209139 12.1844026,22.4 C12.1844026,22.1790861 12.3634887,22 12.5844026,22 L13.5844026,22 C13.8053165,22 13.9844026,22.1790861 13.9844026,22.4 Z M71.5844026,22.4 C71.5844026,22.6209139 71.4053165,22.8 71.1844026,22.8 L70.1844026,22.8 C69.9634887,22.8 69.7844026,22.6209139 69.7844026,22.4 C69.7844026,22.1790861 69.9634887,22 70.1844026,22 L71.1844025,22 C71.4053165,22 71.5844026,22.1790861 71.5844026,22.4 Z M74.7844026,22.4 C74.7844026,22.6209139 74.6053165,22.8 74.3844026,22.8 L73.3844026,22.8 C73.1634887,22.8 72.9844026,22.6209139 72.9844026,22.4 C72.9844026,22.1790861 73.1634887,22 73.3844026,22 L74.3844025,22 C74.6053165,22 74.7844026,22.1790861 74.7844026,22.4 Z M77.9844026,22.4 C77.9844026,22.6209139 77.8053165,22.8 77.5844026,22.8 L76.5844026,22.8 C76.3634887,22.8 76.1844026,22.6209139 76.1844026,22.4 C76.1844026,22.1790861 76.3634887,22 76.5844026,22 L77.5844025,22 C77.8053165,22 77.9844026,22.1790861 77.9844026,22.4 Z M81.1844026,22.4 C81.1844026,22.6209139 81.0053165,22.8 80.7844026,22.8 L79.7844026,22.8 C79.5634887,22.8 79.3844026,22.6209139 79.3844026,22.4 C79.3844026,22.1790861 79.5634887,22 79.7844026,22 L80.7844025,22 C81.0053165,22 81.1844026,22.1790861 81.1844026,22.4 Z M84.3844026,22.4 C84.3844026,22.6209139 84.2053165,22.8 83.9844026,22.8 L82.9844026,22.8 C82.7634887,22.8 82.5844026,22.6209139 82.5844026,22.4 C82.5844026,22.1790861 82.7634887,22 82.9844026,22 L83.9844025,22 C84.2053165,22 84.3844026,22.1790861 84.3844026,22.4 Z M87.5844026,22.4 C87.5844026,22.6209139 87.4053165,22.8 87.1844026,22.8 L86.1844026,22.8 C85.9634887,22.8 85.7844026,22.6209139 85.7844026,22.4 C85.7844026,22.1790861 85.9634887,22 86.1844026,22 L87.1844025,22 C87.4053165,22 87.5844026,22.1790861 87.5844026,22.4 Z M90.7844026,22.4 C90.7844026,22.6209139 90.6053165,22.8 90.3844026,22.8 L89.3844026,22.8 C89.1634887,22.8 88.9844026,22.6209139 88.9844026,22.4 C88.9844026,22.1790861 89.1634887,22 89.3844026,22 L90.3844025,22 C90.6053165,22 90.7844026,22.1790861 90.7844026,22.4 Z M93.9844026,22.4 C93.9844026,22.6209139 93.8053165,22.8 93.5844026,22.8 L92.5844026,22.8 C92.3634887,22.8 92.1844026,22.6209139 92.1844026,22.4 C92.1844026,22.1790861 92.3634887,22 92.5844026,22 L93.5844025,22 C93.8053165,22 93.9844026,22.1790861 93.9844026,22.4 Z M116.384403,22.4 C116.384403,22.6209139 116.205316,22.8 115.984403,22.8 L114.984403,22.8 C114.763489,22.8 114.584403,22.6209139 114.584403,22.4 C114.584403,22.1790861 114.763489,22 114.984403,22 L115.984403,22 C116.205316,22 116.384403,22.1790861 116.384403,22.4 Z M113.184403,22.4 C113.184403,22.6209139 113.005316,22.8 112.784403,22.8 L111.784403,22.8 C111.563489,22.8 111.384403,22.6209139 111.384403,22.4 C111.384403,22.1790861 111.563489,22 111.784403,22 L112.784403,22 C113.005316,22 113.184403,22.1790861 113.184403,22.4 Z M109.984403,22.4 C109.984403,22.6209139 109.805316,22.8 109.584403,22.8 L108.584403,22.8 C108.363489,22.8 108.184403,22.6209139 108.184403,22.4 C108.184403,22.1790861 108.363489,22 108.584403,22 L109.584403,22 C109.805316,22 109.984403,22.1790861 109.984403,22.4 Z M106.784403,22.4 C106.784403,22.6209139 106.605316,22.8 106.384403,22.8 L105.384403,22.8 C105.163489,22.8 104.984403,22.6209139 104.984403,22.4 C104.984403,22.1790861 105.163489,22 105.384403,22 L106.384402,22 C106.605316,22 106.784403,22.1790861 106.784403,22.4 Z M103.584403,22.4 C103.584403,22.6209139 103.405316,22.8 103.184403,22.8 L102.184403,22.8 C101.963489,22.8 101.784403,22.6209139 101.784403,22.4 C101.784403,22.1790861 101.963489,22 102.184403,22 L103.184402,22 C103.405316,22 103.584403,22.1790861 103.584403,22.4 Z M3.21445335,21.7146857 C3.47201296,21.8469101 3.7503283,21.9352706 4.04005994,21.9753362 C4.25889142,22.0055973 4.41175792,22.2075267 4.38149683,22.4263582 C4.35123574,22.6451896 4.14930631,22.7980562 3.93047483,22.7677951 C3.55112172,22.7153362 3.1863813,22.5995372 2.84908821,22.4263797 C2.65255935,22.3254869 2.575031,22.084379 2.6759238,21.8878502 C2.77681659,21.6913213 3.01792449,21.6137929 3.21445335,21.7146857 Z M119.085667,20.7967074 C119.266363,20.923797 119.30982,21.1733068 119.182731,21.3540032 C118.964109,21.6648408 118.694714,21.9373837 118.386455,22.1596216 C118.207257,22.2888142 117.957256,22.2482762 117.828063,22.0690774 C117.698871,21.8898786 117.739409,21.6398779 117.918607,21.5106853 C118.154633,21.3405235 118.360988,21.1317568 118.528371,20.8937713 C118.655461,20.7130749 118.904971,20.6696179 119.085667,20.7967074 Z M1.79999053,19.3738218 L1.80050042,19.4515966 C1.80607818,19.7390086 1.85825979,20.0190186 1.95396115,20.2838222 C2.02904737,20.4915841 1.92149248,20.7208778 1.71373058,20.795964 C1.50596867,20.8710502 1.27667495,20.7634953 1.20158873,20.5557334 C1.07625093,20.2089265 1.00793261,19.8423259 1.00058539,19.4621397 L1.00000988,19.3793853 C0.99847357,19.1584767 1.17630991,18.9781495 1.39721847,18.9766034 C1.61812703,18.9750769 1.79845422,19.1529133 1.79999053,19.3738218 Z M119.8,18.0077988 L119.8,19.0077988 C119.8,19.2287127 119.620914,19.4077988 119.4,19.4077988 C119.179086,19.4077988 119,19.2287127 119,19.0077988 L119,18.0077988 C119,17.7868849 119.179086,17.6077988 119.4,17.6077988 C119.620914,17.6077988 119.8,17.7868849 119.8,18.0077988 Z M1.80000021,16.1766036 L1.80000021,17.1766036 C1.80000021,17.3975175 1.62091411,17.5766036 1.40000021,17.5766036 C1.17908631,17.5766036 0.900000207,17.3975175 0.900000207,17.1766036 L0.900000207,16.1766036 C0.900000207,15.9556897 1.17908631,15.7766036 1.40000021,15.7766036 C1.62091411,15.7766036 1.80000021,15.9556897 1.80000021,16.1766036 Z M119.8,14.8077988 L119.8,15.8077988 C119.8,16.0287127 119.620914,16.2077988 119.4,16.2077988 C119.179086,16.2077988 119,16.0287127 119,15.8077988 L119,14.8077988 C119,14.5868849 119.179086,14.4077988 119.4,14.4077988 C119.620914,14.4077988 119.8,14.5868849 119.8,14.8077988 Z M1.80000021,12.9766036 L1.80000021,13.9766036 C1.80000021,14.1975175 1.62091411,14.3766036 1.40000021,14.3766036 C1.17908631,14.3766036 0.900000207,14.1975175 0.900000207,13.9766036 L0.900000207,12.9766036 C0.900000207,12.7556897 1.17908631,12.5766036 1.40000021,12.5766036 C1.62091411,12.5766036 1.80000021,12.7556897 1.80000021,12.9766036 Z M119.8,11.6077988 L119.8,12.6077988 C119.8,12.8287127 119.620914,13.0077988 119.4,13.0077988 C119.179086,13.0077988 119,12.8287127 119,12.6077988 L119,11.6077988 C119,11.3868849 119.179086,11.2077988 119.4,11.2077988 C119.620914,11.2077988 119.8,11.3868849 119.8,11.6077988 Z M1.80000021,9.77660357 L1.80000021,10.7766036 C1.80000021,10.9975175 1.62091411,11.1766036 1.40000021,11.1766036 C1.17908631,11.1766036 0.900000207,10.9975175 0.900000207,10.7766036 L0.900000207,9.77660357 C0.900000207,9.55568967 1.17908631,9.37660357 1.40000021,9.37660357 C1.62091411,9.37660357 1.80000021,9.55568967 1.80000021,9.77660357 Z M119.8,8.40779881 L119.8,9.40779881 C119.8,9.62871271 119.620914,9.80779881 119.4,9.80779881 C119.179086,9.80779881 119,9.62871271 119,9.40779881 L119,8.40779881 C119,8.18688491 119.179086,8.00779881 119.4,8.00779881 C119.620914,8.00779881 119.8,8.18688491 119.8,8.40779881 Z M1.80000021,6.57660357 L1.80000021,7.57660357 C1.80000021,7.79751746 1.62091411,7.97660357 1.40000021,7.97660357 C1.17908631,7.97660357 0.900000207,7.79751746 0.900000207,7.57660357 L0.900000207,6.57660357 C0.900000207,6.35568967 1.17908631,6.17660357 1.40000021,6.17660357 C1.62091411,6.17660357 1.80000021,6.35568967 1.80000021,6.57660357 Z M119.8,5.20779881 L119.8,6.20779881 C119.8,6.42871271 119.620914,6.60779881 119.4,6.60779881 C119.179086,6.60779881 119,6.42871271 119,6.20779881 L119,5.20779881 C119,4.98688491 119.179086,4.80779881 119.4,4.80779881 C119.620914,4.80779881 119.8,4.98688491 119.8,5.20779881 Z M1.80007849,3.37957686 C1.80001327,3.3897814 1.80001327,3.3897814 1.80000021,3.4 L1.80000021,4.37660357 C1.80000021,4.59751746 1.62091411,4.77660357 1.40000021,4.77660357 C1.17908631,4.77660357 1.00000021,4.59751746 1.00000021,4.37660357 L1.00000021,3.4 C1.00001719,3.38671374 1.00001719,3.38671374 1.00010202,3.37344172 C1.00179619,3.15253431 1.18225042,2.97482688 1.40315783,2.97650904 C1.62406523,2.97821522 1.80177267,3.15866946 1.80007849,3.37957686 Z M119.421116,1.83887624 C119.595442,2.17563306 119.712485,2.54002136 119.766215,2.9191864 C119.79721,3.13791515 119.645022,3.34035621 119.426293,3.37135118 C119.207564,3.40234614 119.005123,3.25015784 118.974128,3.0314291 C118.933094,2.74185398 118.843784,2.46380655 118.710664,2.20665099 C118.609106,2.01046515 118.685816,1.76909592 118.882002,1.66753772 C119.078188,1.56597953 119.319557,1.6426904 119.421116,1.83887624 Z M3.37834391,0.550095989 C3.47987763,0.746294496 3.40313666,0.987654157 3.20693815,1.08918788 C2.94867982,1.22283788 2.71432455,1.39926119 2.51394916,1.61035064 C2.36185852,1.77057357 2.10867834,1.77716609 1.94845541,1.62507544 C1.78823248,1.4729848 1.78163997,1.21980462 1.93373061,1.05958169 C2.19539698,0.783924034 2.50154333,0.553456192 2.83925202,0.378690235 C3.03545052,0.277156512 3.27681019,0.353897482 3.37834391,0.550095989 Z M117.517808,0.188136945 C117.72645,0.260742276 117.836729,0.488738155 117.764124,0.697379994 C117.691519,0.906021834 117.463523,1.01630121 117.254881,0.943695883 C116.98269,0.84897599 116.695024,0.800014594 116.400132,0.800000003 C116.179218,0.799989073 116.000152,0.620894112 116.000152,0.399980212 C116.000152,0.179066313 116.179258,-1.09263043e-05 116.400172,4.24230608e-09 C116.784867,1.90385059e-05 117.161375,0.064101627 117.517808,0.188136945 Z M60.2000002,0.5 C60.2000002,0.7209139 60.0209141,0.9 59.8000002,0.9 L58.8000002,0.9 C58.5790863,0.9 58.4000002,0.7209139 58.4000002,0.5 C58.4000002,0.2790861 58.5790863,0 58.8000002,0 L59.8000002,0 C60.0209141,0 60.2000002,0.2790861 60.2000002,0.5 Z M5.80000021,0.5 C5.80000021,0.7209139 5.62091411,0.9 5.40000021,0.9 L4.40000021,0.9 C4.17908631,0.9 4.00000021,0.7209139 4.00000021,0.5 C4.00000021,0.2790861 4.17908631,0 4.40000021,0 L5.40000021,0 C5.62091411,0 5.80000021,0.2790861 5.80000021,0.5 Z M108.2,0.5 C108.2,0.7209139 108.020914,0.9 107.8,0.9 L106.8,0.9 C106.579086,0.9 106.4,0.7209139 106.4,0.5 C106.4,0.2790861 106.579086,0 106.8,0 L107.8,0 C108.020914,0 108.2,0.2790861 108.2,0.5 Z M105,0.5 C105,0.7209139 104.820914,0.9 104.6,0.9 L103.6,0.9 C103.379086,0.9 103.2,0.7209139 103.2,0.5 C103.2,0.2790861 103.379086,0 103.6,0 L104.6,0 C104.820914,0 105,0.2790861 105,0.5 Z M101.8,0.5 C101.8,0.7209139 101.620914,0.9 101.4,0.9 L100.4,0.9 C100.179086,0.9 100,0.7209139 100,0.5 C100,0.2790861 100.179086,0 100.4,0 L101.4,0 C101.620914,0 101.8,0.2790861 101.8,0.5 Z M98.6000002,0.5 C98.6000002,0.7209139 98.4209141,0.9 98.2000002,0.9 L97.2000002,0.9 C96.9790863,0.9 96.8000002,0.7209139 96.8000002,0.5 C96.8000002,0.2790861 96.9790863,0 97.2000002,0 L98.2000002,0 C98.4209141,0 98.6000002,0.2790861 98.6000002,0.5 Z M95.4000002,0.5 C95.4000002,0.7209139 95.2209141,0.9 95.0000002,0.9 L94.0000002,0.9 C93.7790863,0.9 93.6000002,0.7209139 93.6000002,0.5 C93.6000002,0.2790861 93.7790863,0 94.0000002,0 L95.0000002,0 C95.2209141,0 95.4000002,0.2790861 95.4000002,0.5 Z M92.2000002,0.5 C92.2000002,0.7209139 92.0209141,0.9 91.8000002,0.9 L90.8000002,0.9 C90.5790863,0.9 90.4000002,0.7209139 90.4000002,0.5 C90.4000002,0.2790861 90.5790863,0 90.8000002,0 L91.8000002,0 C92.0209141,0 92.2000002,0.2790861 92.2000002,0.5 Z M89.0000002,0.5 C89.0000002,0.7209139 88.8209141,0.9 88.6000002,0.9 L87.6000002,0.9 C87.3790863,0.9 87.2000002,0.7209139 87.2000002,0.5 C87.2000002,0.2790861 87.3790863,0 87.6000002,0 L88.6000002,0 C88.8209141,0 89.0000002,0.2790861 89.0000002,0.5 Z M85.8000002,0.5 C85.8000002,0.7209139 85.6209141,0.9 85.4000002,0.9 L84.4000002,0.9 C84.1790863,0.9 84.0000002,0.7209139 84.0000002,0.5 C84.0000002,0.2790861 84.1790863,0 84.4000002,0 L85.4000002,0 C85.6209141,0 85.8000002,0.2790861 85.8000002,0.5 Z M82.6000002,0.5 C82.6000002,0.7209139 82.4209141,0.9 82.2000002,0.9 L81.2000002,0.9 C80.9790863,0.9 80.8000002,0.7209139 80.8000002,0.5 C80.8000002,0.2790861 80.9790863,0 81.2000002,0 L82.2000002,0 C82.4209141,0 82.6000002,0.2790861 82.6000002,0.5 Z M79.4000002,0.5 C79.4000002,0.7209139 79.2209141,0.9 79.0000002,0.9 L78.0000002,0.9 C77.7790863,0.9 77.6000002,0.7209139 77.6000002,0.5 C77.6000002,0.2790861 77.7790863,0 78.0000002,0 L79.0000002,0 C79.2209141,0 79.4000002,0.2790861 79.4000002,0.5 Z M76.2000002,0.5 C76.2000002,0.7209139 76.0209141,0.9 75.8000002,0.9 L74.8000002,0.9 C74.5790863,0.9 74.4000002,0.7209139 74.4000002,0.5 C74.4000002,0.2790861 74.5790863,0 74.8000002,0 L75.8000002,0 C76.0209141,0 76.2000002,0.2790861 76.2000002,0.5 Z M73.0000002,0.5 C73.0000002,0.7209139 72.8209141,0.9 72.6000002,0.9 L71.6000002,0.9 C71.3790863,0.9 71.2000002,0.7209139 71.2000002,0.5 C71.2000002,0.2790861 71.3790863,0 71.6000002,0 L72.6000002,0 C72.8209141,0 73.0000002,0.2790861 73.0000002,0.5 Z M69.8000002,0.5 C69.8000002,0.7209139 69.6209141,0.9 69.4000002,0.9 L68.4000002,0.9 C68.1790863,0.9 68.0000002,0.7209139 68.0000002,0.5 C68.0000002,0.2790861 68.1790863,0 68.4000002,0 L69.4000002,0 C69.6209141,0 69.8000002,0.2790861 69.8000002,0.5 Z M66.6000002,0.5 C66.6000002,0.7209139 66.4209141,0.9 66.2000002,0.9 L65.2000002,0.9 C64.9790863,0.9 64.8000002,0.7209139 64.8000002,0.5 C64.8000002,0.2790861 64.9790863,0 65.2000002,0 L66.2000002,0 C66.4209141,0 66.6000002,0.2790861 66.6000002,0.5 Z M63.4000002,0.5 C63.4000002,0.7209139 63.2209141,0.9 63.0000002,0.9 L62.0000002,0.9 C61.7790863,0.9 61.6000002,0.7209139 61.6000002,0.5 C61.6000002,0.2790861 61.7790863,0 62.0000002,0 L63.0000002,0 C63.2209141,0 63.4000002,0.2790861 63.4000002,0.5 Z M114.6,0.5 C114.6,0.7209139 114.420914,0.9 114.2,0.9 L113.2,0.9 C112.979086,0.9 112.8,0.7209139 112.8,0.5 C112.8,0.2790861 112.979086,0 113.2,0 L114.2,0 C114.420914,0 114.6,0.2790861 114.6,0.5 Z M57.0000002,0.5 C57.0000002,0.7209139 56.8209141,0.9 56.6000002,0.9 L55.6000002,0.9 C55.3790863,0.9 55.2000002,0.7209139 55.2000002,0.5 C55.2000002,0.2790861 55.3790863,0 55.6000002,0 L56.6000002,0 C56.8209141,0 57.0000002,0.2790861 57.0000002,0.5 Z M53.8000002,0.5 C53.8000002,0.7209139 53.6209141,0.9 53.4000002,0.9 L52.4000002,0.9 C52.1790863,0.9 52.0000002,0.7209139 52.0000002,0.5 C52.0000002,0.2790861 52.1790863,0 52.4000002,0 L53.4000002,0 C53.6209141,0 53.8000002,0.2790861 53.8000002,0.5 Z M50.6000002,0.5 C50.6000002,0.7209139 50.4209141,0.9 50.2000002,0.9 L49.2000002,0.9 C48.9790863,0.9 48.8000002,0.7209139 48.8000002,0.5 C48.8000002,0.2790861 48.9790863,0 49.2000002,0 L50.2000002,0 C50.4209141,0 50.6000002,0.2790861 50.6000002,0.5 Z M47.4000002,0.5 C47.4000002,0.7209139 47.2209141,0.9 47.0000002,0.9 L46.0000002,0.9 C45.7790863,0.9 45.6000002,0.7209139 45.6000002,0.5 C45.6000002,0.2790861 45.7790863,0 46.0000002,0 L47.0000002,0 C47.2209141,0 47.4000002,0.2790861 47.4000002,0.5 Z M44.2000002,0.5 C44.2000002,0.7209139 44.0209141,0.9 43.8000002,0.9 L42.8000002,0.9 C42.5790863,0.9 42.4000002,0.7209139 42.4000002,0.5 C42.4000002,0.2790861 42.5790863,0 42.8000002,0 L43.8000002,0 C44.0209141,0 44.2000002,0.2790861 44.2000002,0.5 Z M41.0000002,0.5 C41.0000002,0.7209139 40.8209141,0.9 40.6000002,0.9 L39.6000002,0.9 C39.3790863,0.9 39.2000002,0.7209139 39.2000002,0.5 C39.2000002,0.2790861 39.3790863,0 39.6000002,0 L40.6000002,0 C40.8209141,0 41.0000002,0.2790861 41.0000002,0.5 Z M37.8000002,0.5 C37.8000002,0.7209139 37.6209141,0.9 37.4000002,0.9 L36.4000002,0.9 C36.1790863,0.9 36.0000002,0.7209139 36.0000002,0.5 C36.0000002,0.2790861 36.1790863,0 36.4000002,0 L37.4000002,0 C37.6209141,0 37.8000002,0.2790861 37.8000002,0.5 Z M34.6000002,0.5 C34.6000002,0.7209139 34.4209141,0.9 34.2000002,0.9 L33.2000002,0.9 C32.9790863,0.9 32.8000002,0.7209139 32.8000002,0.5 C32.8000002,0.2790861 32.9790863,0 33.2000002,0 L34.2000002,0 C34.4209141,0 34.6000002,0.2790861 34.6000002,0.5 Z M31.4000002,0.5 C31.4000002,0.7209139 31.2209141,0.9 31.0000002,0.9 L30.0000002,0.9 C29.7790863,0.9 29.6000002,0.7209139 29.6000002,0.5 C29.6000002,0.2790861 29.7790863,0 30.0000002,0 L31.0000002,0 C31.2209141,0 31.4000002,0.2790861 31.4000002,0.5 Z M28.2000002,0.5 C28.2000002,0.7209139 28.0209141,0.9 27.8000002,0.9 L26.8000002,0.9 C26.5790863,0.9 26.4000002,0.7209139 26.4000002,0.5 C26.4000002,0.2790861 26.5790863,0 26.8000002,0 L27.8000002,0 C28.0209141,0 28.2000002,0.2790861 28.2000002,0.5 Z M25.0000002,0.5 C25.0000002,0.7209139 24.8209141,0.9 24.6000002,0.9 L23.6000002,0.9 C23.3790863,0.9 23.2000002,0.7209139 23.2000002,0.5 C23.2000002,0.2790861 23.3790863,0 23.6000002,0 L24.6000002,0 C24.8209141,0 25.0000002,0.2790861 25.0000002,0.5 Z M21.8000002,0.5 C21.8000002,0.7209139 21.6209141,0.9 21.4000002,0.9 L20.4000002,0.9 C20.1790863,0.9 20.0000002,0.7209139 20.0000002,0.5 C20.0000002,0.2790861 20.1790863,0 20.4000002,0 L21.4000002,0 C21.6209141,0 21.8000002,0.2790861 21.8000002,0.5 Z M18.6000002,0.5 C18.6000002,0.7209139 18.4209141,0.9 18.2000002,0.9 L17.2000002,0.9 C16.9790863,0.9 16.8000002,0.7209139 16.8000002,0.5 C16.8000002,0.2790861 16.9790863,0 17.2000002,0 L18.2000002,0 C18.4209141,0 18.6000002,0.2790861 18.6000002,0.5 Z M15.4000002,0.5 C15.4000002,0.7209139 15.2209141,0.9 15.0000002,0.9 L14.0000002,0.9 C13.7790863,0.9 13.6000002,0.7209139 13.6000002,0.5 C13.6000002,0.2790861 13.7790863,0 14.0000002,0 L15.0000002,0 C15.2209141,0 15.4000002,0.2790861 15.4000002,0.5 Z M12.2000002,0.5 C12.2000002,0.7209139 12.0209141,0.9 11.8000002,0.9 L10.8000002,0.9 C10.5790863,0.9 10.4000002,0.7209139 10.4000002,0.5 C10.4000002,0.2790861 10.5790863,0 10.8000002,0 L11.8000002,0 C12.0209141,0 12.2000002,0.2790861 12.2000002,0.5 Z M9.00000021,0.5 C9.00000021,0.7209139 8.82091411,0.9 8.60000021,0.9 L7.60000021,0.9 C7.37908631,0.9 7.20000021,0.7209139 7.20000021,0.5 C7.20000021,0.2790861 7.37908631,0 7.60000021,0 L8.60000021,0 C8.82091411,0 9.00000021,0.2790861 9.00000021,0.5 Z M111.4,0.5 C111.4,0.7209139 111.220914,0.9 111,0.9 L110,0.9 C109.779086,0.9 109.6,0.7209139 109.6,0.5 C109.6,0.2790861 109.779086,0 110,0 L111,0 C111.220914,0 111.4,0.2790861 111.4,0.5 Z" id="dash" fill="#8E8E8E" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="arrows" transform="translate(19.5, 12)" fill="#8E8E8E" fill-rule="nonzero">
+                        <path d="M78.5,-0.277777778 L81,4.72222222 L79,4.72222222 L79,29.5 C79,29.7761424 78.7761424,30 78.5,30 C78.2238576,30 78,29.7761424 78,29.5 L78,4.72222222 L76,4.72222222 L78.5,-0.277777778 Z M32.5,-0.277777778 L35,4.72222222 L33,4.72222222 L33,29.5 C32.999999,29.7761442 32.7761406,30 32.4999982,30 C32.2238558,30 32,29.7761406 32,29.5 L32,4.72222222 L30,4.72222222 L32.5,-0.277777778 Z M0.5,-0.277777778 L3,4.72222222 L1,4.72222222 L1,29.5 C0.999998995,29.7761442 0.776140555,30 0.49999818,30 C0.223855805,30 5.91970917e-13,29.7761406 5.91970917e-13,29.5 L0,4.72222222 L-2,4.72222222 L0.5,-0.277777778 Z" id="Combined-Shape"></path>
+                    </g>
+                    <g id="Txt-TokenPooling" transform="translate(25, 20)" xlink:href="#path-19">
+                        <use fill="#F4F1FC" xlink:href="#path-19"></use>
+                        <g id="TokenPooling" stroke-width="1" fill="#9172E2">
+                            <path d="M5.88720703,9.00488281 C6.05322266,9.00488281 6.19645182,8.95279948 6.31689453,8.84863281 C6.43733724,8.74446615 6.49755859,8.59960938 6.49755859,8.4140625 L6.49755859,2.95507812 L8.19677734,2.95507812 C8.34326172,2.95507812 8.45800781,2.90625 8.54101563,2.80859375 C8.62402344,2.7109375 8.66552734,2.59863281 8.66552734,2.47167969 C8.66552734,2.34472656 8.62402344,2.23242188 8.54101563,2.13476562 C8.45800781,2.03710938 8.34326172,1.98828125 8.19677734,1.98828125 L3.57763672,1.98828125 C3.42789714,1.98828125 3.31070964,2.03710938 3.22607422,2.13476562 C3.1414388,2.23242188 3.09912109,2.34472656 3.09912109,2.47167969 C3.09912109,2.59863281 3.1414388,2.7109375 3.22607422,2.80859375 C3.31070964,2.90625 3.42789714,2.95507812 3.57763672,2.95507812 L5.28173828,2.95507812 L5.28173828,8.4140625 C5.28173828,8.59960938 5.34195964,8.74446615 5.46240234,8.84863281 C5.58284505,8.95279948 5.72444661,9.00488281 5.88720703,9.00488281 Z" id="Path" fill-rule="nonzero"></path>
+                            <path d="M11.6928711,8.34082031 C11.2403971,8.34082031 10.8863932,8.18457031 10.6308594,7.87207031 C10.3753255,7.55957031 10.2475586,7.125 10.2475586,6.56835937 C10.2475586,6.00520833 10.3753255,5.56575521 10.6308594,5.25 C10.8863932,4.93424479 11.2403971,4.77636719 11.6928711,4.77636719 C12.1453451,4.77636719 12.499349,4.93505859 12.7548828,5.25244141 C13.0104167,5.56982422 13.1381836,6.00846354 13.1381836,6.56835937 C13.1381836,7.125 13.0112305,7.55957031 12.7573242,7.87207031 C12.503418,8.18457031 12.1486003,8.34082031 11.6928711,8.34082031 Z M11.6928711,9.06835938 C12.0249023,9.06835938 12.3292643,9.02197266 12.605957,8.92919922 C12.8826497,8.83642578 13.1178385,8.71110026 13.3115234,8.55322266 C13.5052083,8.39534505 13.6695964,8.20898438 13.8046875,7.99414063 C13.9397786,7.77929688 14.0390625,7.55224609 14.1025391,7.31298828 C14.1660156,7.07373047 14.1977539,6.82552083 14.1977539,6.56835938 C14.1977539,6.22981771 14.144043,5.91080729 14.0366211,5.61132813 C13.9291992,5.31184896 13.773763,5.04492188 13.5703125,4.81054688 C13.366862,4.57617187 13.1040039,4.3914388 12.7817383,4.25634766 C12.4594727,4.12125651 12.0965169,4.05371094 11.6928711,4.05371094 C11.2859701,4.05371094 10.9197591,4.12288411 10.5942383,4.26123047 C10.2687174,4.39957682 10.0058594,4.5867513 9.80566406,4.82275391 C9.60546875,5.05875651 9.45247396,5.32568359 9.34667969,5.62353516 C9.24088542,5.92138672 9.18798828,6.23632813 9.18798828,6.56835938 C9.18798828,6.89388021 9.24007161,7.2039388 9.34423828,7.49853516 C9.44840495,7.79313151 9.60058594,8.05843099 9.80078125,8.29443359 C10.0009766,8.5304362 10.2638346,8.71842448 10.5893555,8.85839844 C10.9148763,8.9983724 11.2827148,9.06835938 11.6928711,9.06835938 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M15.7993164,9 C15.9458008,9 16.0727539,8.95198568 16.1801758,8.85595703 C16.2875977,8.75992839 16.3413086,8.62890625 16.3413086,8.46289062 L16.3413086,6.50976562 L18.5776367,8.86328125 C18.6785482,8.97070312 18.7957357,9.02441406 18.9291992,9.02441406 C19.0138346,9.02441406 19.0952148,9.00244141 19.1733398,8.95849609 C19.2514648,8.91455078 19.3141276,8.85514323 19.3613281,8.78027344 C19.4085286,8.70540365 19.4321289,8.62727865 19.4321289,8.54589844 C19.4321289,8.4938151 19.4215495,8.44173177 19.4003906,8.38964844 C19.3792318,8.3375651 19.3458659,8.28710938 19.300293,8.23828125 L17.4887695,6.34375 L19.0952148,4.83496094 C19.1245117,4.80891927 19.1489258,4.7796224 19.168457,4.74707031 C19.1879883,4.71451823 19.2018229,4.68196615 19.2099609,4.64941406 C19.218099,4.61686198 19.222168,4.58268229 19.222168,4.546875 C19.222168,4.46875 19.2001953,4.3914388 19.15625,4.31494141 C19.1123047,4.23844401 19.0537109,4.17659505 18.9804688,4.12939453 C18.9072266,4.08219401 18.8299154,4.05859375 18.7485352,4.05859375 C18.699707,4.05859375 18.6508789,4.06917318 18.6020508,4.09033203 C18.5532227,4.11149089 18.5060221,4.14322917 18.4604492,4.18554688 L16.3413086,6.21679688 L16.3413086,2.52539063 C16.3413086,2.36263021 16.2892253,2.23323568 16.1850586,2.13720703 C16.0808919,2.04117839 15.957194,1.99316406 15.8139648,1.99316406 C15.6674805,1.99316406 15.5405273,2.04036458 15.4331055,2.13476562 C15.3256836,2.22916667 15.2719727,2.359375 15.2719727,2.52539063 L15.2719727,8.46289062 C15.2719727,8.63216146 15.324056,8.7639974 15.4282227,8.85839844 C15.5323893,8.95279948 15.6560872,9 15.7993164,9 Z" id="Path" fill-rule="nonzero"></path>
+                            <path d="M22.4790039,9.06835938 C23.1886393,9.06835938 23.7875977,8.85351563 24.2758789,8.42382813 C24.3833008,8.3359375 24.4370117,8.23014323 24.4370117,8.10644531 C24.4370117,8.00878906 24.4036458,7.92252604 24.3369141,7.84765625 C24.2701823,7.77278646 24.1912435,7.73535156 24.1000977,7.73535156 C24.0382487,7.73535156 23.9763997,7.75651042 23.9145508,7.79882813 C23.4165039,8.15039062 22.9640299,8.32617188 22.5571289,8.32617188 C22.1176758,8.32291667 21.7669271,8.19840495 21.5048828,7.95263672 C21.2428385,7.70686849 21.1004232,7.32519531 21.0776367,6.80761719 L24.2807617,6.80761719 C24.3914388,6.80761719 24.4720052,6.76936849 24.5224609,6.69287109 C24.5729167,6.6163737 24.5981445,6.51953125 24.5981445,6.40234375 C24.5916341,6.08007812 24.542806,5.78141276 24.4516602,5.50634766 C24.3605143,5.23128255 24.2278646,4.98388672 24.0537109,4.76416016 C23.8795573,4.54443359 23.6484375,4.37109375 23.3603516,4.24414062 C23.0722656,4.1171875 22.7410482,4.05371094 22.3666992,4.05371094 C21.9988607,4.05371094 21.6635742,4.12044271 21.3608398,4.25390625 C21.0581055,4.38736979 20.809082,4.56803385 20.6137695,4.79589844 C20.418457,5.02376302 20.2679036,5.28499349 20.1621094,5.57958984 C20.0563151,5.8741862 20.003418,6.1858724 20.003418,6.51464844 C20.003418,7.30240885 20.2304688,7.92333984 20.6845703,8.37744141 C21.1386719,8.83154297 21.7368164,9.06184896 22.4790039,9.06835938 Z M21.0874023,6.16308594 C21.1036784,5.98730469 21.1419271,5.81966146 21.2021484,5.66015625 C21.2623698,5.50065104 21.3445638,5.35416667 21.4487305,5.22070312 C21.5528971,5.08723958 21.6879883,4.97981771 21.8540039,4.8984375 C22.0200195,4.81705729 22.2055664,4.77636719 22.4106445,4.77636719 C22.8077799,4.77636719 23.1105143,4.90820312 23.3188477,5.171875 C23.527181,5.43554688 23.6459961,5.76595052 23.675293,6.16308594 L21.0874023,6.16308594 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M26.1655273,9 C26.3120117,9 26.438151,8.95442708 26.5439453,8.86328125 C26.6497396,8.77213542 26.7026367,8.6468099 26.7026367,8.48730469 L26.7026367,6.265625 C26.7058919,5.81966146 26.8328451,5.45996094 27.0834961,5.18652344 C27.3341471,4.91308594 27.6336263,4.77636719 27.9819336,4.77636719 C28.2814128,4.77636719 28.5239258,4.87402344 28.7094727,5.06933594 C28.8950195,5.26464844 28.987793,5.54622396 28.987793,5.9140625 L28.987793,8.48730469 C28.987793,8.6500651 29.0398763,8.77620443 29.144043,8.86572266 C29.2482096,8.95524089 29.3719076,9 29.5151367,9 C29.6616211,9 29.7885742,8.95524089 29.8959961,8.86572266 C30.003418,8.77620443 30.0571289,8.6500651 30.0571289,8.48730469 L30.0571289,5.92382813 C30.0571289,5.60481771 30.0099284,5.32324219 29.9155273,5.07910156 C29.8211263,4.83496094 29.690918,4.63964844 29.5249023,4.49316406 C29.3588867,4.34667969 29.1708984,4.23681641 28.9609375,4.16357422 C28.7509766,4.09033203 28.5255534,4.05371094 28.284668,4.05371094 C27.9135742,4.05371094 27.5913086,4.12451172 27.3178711,4.26611328 C27.0444336,4.40771484 26.8393555,4.61360677 26.7026367,4.88378906 L26.7026367,4.61035156 C26.7026367,4.45735677 26.6505534,4.33772786 26.5463867,4.25146484 C26.4422201,4.16520182 26.3168945,4.12207031 26.1704102,4.12207031 C26.0239258,4.12207031 25.8969727,4.16682943 25.7895508,4.25634766 C25.6821289,4.34586589 25.628418,4.46875 25.628418,4.625 L25.628418,8.48730469 C25.628418,8.6500651 25.6813151,8.77620443 25.7871094,8.86572266 C25.8929036,8.95524089 26.019043,9 26.1655273,9 Z" id="Path" fill-rule="nonzero"></path>
+                            <path d="M32.1518555,9.00488281 C32.3178711,9.00488281 32.4602865,8.95361328 32.5791016,8.85107422 C32.6979167,8.74853516 32.7573242,8.60449219 32.7573242,8.41894531 L32.7573242,6.11914062 L34.1782227,6.11914062 C35.0213216,6.11914062 35.6691081,5.95719401 36.121582,5.63330078 C36.574056,5.30940755 36.800293,4.78125 36.800293,4.04882813 C36.800293,3.32617188 36.5846354,2.80208333 36.1533203,2.4765625 C35.7220052,2.15104167 35.0994466,1.98828125 34.2856445,1.98828125 L32.1616211,1.98828125 C31.9923503,1.98828125 31.8474935,2.0476888 31.7270508,2.16650391 C31.6066081,2.28531901 31.5463867,2.42773438 31.5463867,2.59375 L31.5463867,8.41894531 C31.5463867,8.60449219 31.6066081,8.74853516 31.7270508,8.85107422 C31.8474935,8.95361328 31.9890951,9.00488281 32.1518555,9.00488281 Z M32.7573242,5.19628906 L32.7573242,2.95019531 L34.1782227,2.95019531 C34.6730143,2.95019531 35.0351562,3.02587891 35.2646484,3.17724609 C35.4941406,3.32861328 35.6088867,3.61914063 35.6088867,4.04882813 C35.6088867,4.48502604 35.4908854,4.78531901 35.2548828,4.94970703 C35.0188802,5.11409505 34.6486003,5.19628906 34.144043,5.19628906 L32.7573242,5.19628906 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M39.8618164,8.34082031 C39.4093424,8.34082031 39.0553385,8.18457031 38.7998047,7.87207031 C38.5442708,7.55957031 38.4165039,7.125 38.4165039,6.56835937 C38.4165039,6.00520833 38.5442708,5.56575521 38.7998047,5.25 C39.0553385,4.93424479 39.4093424,4.77636719 39.8618164,4.77636719 C40.3142904,4.77636719 40.6682943,4.93505859 40.9238281,5.25244141 C41.179362,5.56982422 41.3071289,6.00846354 41.3071289,6.56835937 C41.3071289,7.125 41.1801758,7.55957031 40.9262695,7.87207031 C40.6723633,8.18457031 40.3175456,8.34082031 39.8618164,8.34082031 Z M39.8618164,9.06835938 C40.1938477,9.06835938 40.4982096,9.02197266 40.7749023,8.92919922 C41.0515951,8.83642578 41.2867839,8.71110026 41.4804688,8.55322266 C41.6741536,8.39534505 41.8385417,8.20898438 41.9736328,7.99414063 C42.108724,7.77929688 42.2080078,7.55224609 42.2714844,7.31298828 C42.3349609,7.07373047 42.3666992,6.82552083 42.3666992,6.56835938 C42.3666992,6.22981771 42.3129883,5.91080729 42.2055664,5.61132813 C42.0981445,5.31184896 41.9427083,5.04492188 41.7392578,4.81054688 C41.5358073,4.57617187 41.2729492,4.3914388 40.9506836,4.25634766 C40.628418,4.12125651 40.2654622,4.05371094 39.8618164,4.05371094 C39.4549154,4.05371094 39.0887044,4.12288411 38.7631836,4.26123047 C38.4376628,4.39957682 38.1748047,4.5867513 37.9746094,4.82275391 C37.7744141,5.05875651 37.6214193,5.32568359 37.515625,5.62353516 C37.4098307,5.92138672 37.3569336,6.23632813 37.3569336,6.56835938 C37.3569336,6.89388021 37.4090169,7.2039388 37.5131836,7.49853516 C37.6173503,7.79313151 37.7695312,8.05843099 37.9697266,8.29443359 C38.1699219,8.5304362 38.4327799,8.71842448 38.7583008,8.85839844 C39.0838216,8.9983724 39.4516602,9.06835938 39.8618164,9.06835938 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M45.6479492,8.34082031 C45.1954753,8.34082031 44.8414714,8.18457031 44.5859375,7.87207031 C44.3304036,7.55957031 44.2026367,7.125 44.2026367,6.56835937 C44.2026367,6.00520833 44.3304036,5.56575521 44.5859375,5.25 C44.8414714,4.93424479 45.1954753,4.77636719 45.6479492,4.77636719 C46.1004232,4.77636719 46.4544271,4.93505859 46.7099609,5.25244141 C46.9654948,5.56982422 47.0932617,6.00846354 47.0932617,6.56835937 C47.0932617,7.125 46.9663086,7.55957031 46.7124023,7.87207031 C46.4584961,8.18457031 46.1036784,8.34082031 45.6479492,8.34082031 Z M45.6479492,9.06835938 C45.9799805,9.06835938 46.2843424,9.02197266 46.5610352,8.92919922 C46.8377279,8.83642578 47.0729167,8.71110026 47.2666016,8.55322266 C47.4602865,8.39534505 47.6246745,8.20898438 47.7597656,7.99414063 C47.8948568,7.77929688 47.9941406,7.55224609 48.0576172,7.31298828 C48.1210938,7.07373047 48.152832,6.82552083 48.152832,6.56835938 C48.152832,6.22981771 48.0991211,5.91080729 47.9916992,5.61132813 C47.8842773,5.31184896 47.7288411,5.04492188 47.5253906,4.81054688 C47.3219401,4.57617187 47.059082,4.3914388 46.7368164,4.25634766 C46.4145508,4.12125651 46.0515951,4.05371094 45.6479492,4.05371094 C45.2410482,4.05371094 44.8748372,4.12288411 44.5493164,4.26123047 C44.2237956,4.39957682 43.9609375,4.5867513 43.7607422,4.82275391 C43.5605469,5.05875651 43.4075521,5.32568359 43.3017578,5.62353516 C43.1959635,5.92138672 43.1430664,6.23632813 43.1430664,6.56835938 C43.1430664,6.89388021 43.1951497,7.2039388 43.2993164,7.49853516 C43.4034831,7.79313151 43.5556641,8.05843099 43.7558594,8.29443359 C43.9560547,8.5304362 44.2189128,8.71842448 44.5444336,8.85839844 C44.8699544,8.9983724 45.237793,9.06835938 45.6479492,9.06835938 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M49.7739258,9 C49.9204102,9 50.0481771,8.95198568 50.1572266,8.85595703 C50.266276,8.75992839 50.3208008,8.63053385 50.3208008,8.46777344 L50.3208008,2.52539062 C50.3208008,2.36263021 50.2670898,2.23323568 50.159668,2.13720703 C50.0522461,2.04117839 49.9269206,1.99316406 49.7836914,1.99316406 C49.637207,1.99316406 49.5110677,2.04036458 49.4052734,2.13476562 C49.2994792,2.22916667 49.246582,2.359375 49.246582,2.52539062 L49.246582,8.46777344 C49.246582,8.63704427 49.2978516,8.76806641 49.4003906,8.86083984 C49.5029297,8.95361328 49.6274414,9 49.7739258,9 Z" id="Path" fill-rule="nonzero"></path>
+                            <path d="M52.2446289,9 C52.3911133,9 52.5188802,8.95198568 52.6279297,8.85595703 C52.7369792,8.75992839 52.7915039,8.63053385 52.7915039,8.46777344 L52.7915039,4.65429688 C52.7915039,4.48828125 52.7386068,4.35807292 52.6328125,4.26367188 C52.5270182,4.16927083 52.4025065,4.12207031 52.2592773,4.12207031 C52.112793,4.12207031 51.9858398,4.16927083 51.878418,4.26367188 C51.7709961,4.35807292 51.7172852,4.48828125 51.7172852,4.65429688 L51.7172852,8.46777344 C51.7172852,8.63704427 51.7685547,8.76806641 51.8710938,8.86083984 C51.9736328,8.95361328 52.0981445,9 52.2446289,9 Z M52.2495117,3.1015625 C52.4448242,3.1015625 52.601888,3.04541016 52.7207031,2.93310547 C52.8395182,2.82080078 52.8989258,2.671875 52.8989258,2.48632812 C52.8989258,2.30403646 52.840332,2.15592448 52.7231445,2.04199219 C52.605957,1.9280599 52.449707,1.87109375 52.2543945,1.87109375 C52.059082,1.87109375 51.9020182,1.9280599 51.7832031,2.04199219 C51.664388,2.15592448 51.6049805,2.30403646 51.6049805,2.48632812 C51.6049805,2.671875 51.664388,2.82080078 51.7832031,2.93310547 C51.9020182,3.04541016 52.0574544,3.1015625 52.2495117,3.1015625 Z" id="Shape" fill-rule="nonzero"></path>
+                            <path d="M54.6420898,9 C54.7885742,9 54.9147135,8.95442708 55.0205078,8.86328125 C55.1263021,8.77213542 55.1791992,8.6468099 55.1791992,8.48730469 L55.1791992,6.265625 C55.1824544,5.81966146 55.3094076,5.45996094 55.5600586,5.18652344 C55.8107096,4.91308594 56.1101888,4.77636719 56.4584961,4.77636719 C56.7579753,4.77636719 57.0004883,4.87402344 57.1860352,5.06933594 C57.371582,5.26464844 57.4643555,5.54622396 57.4643555,5.9140625 L57.4643555,8.48730469 C57.4643555,8.6500651 57.5164388,8.77620443 57.6206055,8.86572266 C57.7247721,8.95524089 57.8484701,9 57.9916992,9 C58.1381836,9 58.2651367,8.95524089 58.3725586,8.86572266 C58.4799805,8.77620443 58.5336914,8.6500651 58.5336914,8.48730469 L58.5336914,5.92382813 C58.5336914,5.60481771 58.4864909,5.32324219 58.3920898,5.07910156 C58.2976888,4.83496094 58.1674805,4.63964844 58.0014648,4.49316406 C57.8354492,4.34667969 57.6474609,4.23681641 57.4375,4.16357422 C57.2275391,4.09033203 57.0021159,4.05371094 56.7612305,4.05371094 C56.3901367,4.05371094 56.0678711,4.12451172 55.7944336,4.26611328 C55.5209961,4.40771484 55.315918,4.61360677 55.1791992,4.88378906 L55.1791992,4.61035156 C55.1791992,4.45735677 55.1271159,4.33772786 55.0229492,4.25146484 C54.9187826,4.16520182 54.793457,4.12207031 54.6469727,4.12207031 C54.5004883,4.12207031 54.3735352,4.16682943 54.2661133,4.25634766 C54.1586914,4.34586589 54.1049805,4.46875 54.1049805,4.625 L54.1049805,8.48730469 C54.1049805,8.6500651 54.1578776,8.77620443 54.2636719,8.86572266 C54.3694661,8.95524089 54.4956055,9 54.6420898,9 Z" id="Path" fill-rule="nonzero"></path>
+                            <path d="M61.9614258,11.1337891 C62.7003581,11.1337891 63.288737,10.9498698 63.7265625,10.5820312 C64.164388,10.2141927 64.3833008,9.66080729 64.3833008,8.921875 L64.3833008,4.62988281 C64.3833008,4.4703776 64.3328451,4.34586589 64.2319336,4.25634766 C64.1310221,4.16682943 64.0138346,4.12207031 63.8803711,4.12207031 C63.7566732,4.12207031 63.6468099,4.15869141 63.5507812,4.23193359 C63.4547526,4.30517578 63.3986003,4.40690104 63.3823242,4.53710938 L63.3823242,4.86425781 C63.2716471,4.69173177 63.156901,4.55013021 63.0380859,4.43945312 C62.9192708,4.32877604 62.7573242,4.23681641 62.5522461,4.16357422 C62.347168,4.09033203 62.1062826,4.05371094 61.8295898,4.05371094 C61.3673503,4.05371094 60.9637044,4.16438802 60.6186523,4.38574219 C60.2736003,4.60709635 60.0123698,4.90820313 59.8349609,5.2890625 C59.6575521,5.66992188 59.5688477,6.10449219 59.5688477,6.59277344 C59.5688477,7.3186849 59.7755534,7.90136719 60.1889648,8.34082031 C60.6023763,8.78027344 61.1590169,9 61.8588867,9 C62.5620117,9 63.0616862,8.7281901 63.3579102,8.18457031 L63.3579102,9 C63.3579102,9.44596354 63.226888,9.79264323 62.9648438,10.0400391 C62.7027995,10.2874349 62.3422852,10.4111328 61.8833008,10.4111328 C61.7400716,10.4111328 61.5960286,10.3989258 61.4511719,10.3745117 C61.3063151,10.3500977 61.1948242,10.3256836 61.1166992,10.3012695 C61.0385742,10.2768555 60.9303385,10.2386068 60.7919922,10.1865234 C60.6536458,10.1344401 60.5649414,10.101888 60.5258789,10.0888672 C60.5096029,10.0823568 60.492513,10.0766602 60.4746094,10.0717773 C60.4567057,10.0668945 60.4404297,10.0636393 60.4257812,10.0620117 C60.4111328,10.0603841 60.3956706,10.0595703 60.3793945,10.0595703 C60.3142904,10.0595703 60.2556966,10.0766602 60.2036133,10.1108398 C60.1515299,10.1450195 60.1116536,10.1889648 60.0839844,10.2426758 C60.0563151,10.2963867 60.0424805,10.3533529 60.0424805,10.4135742 C60.0424805,10.4737956 60.0579427,10.5323893 60.0888672,10.5893555 C60.1197917,10.6463216 60.1661784,10.6943359 60.2280273,10.7333984 C60.4070638,10.8440755 60.6520182,10.9384766 60.9628906,11.0166016 C61.273763,11.0947266 61.6066081,11.1337891 61.9614258,11.1337891 Z M62.0200195,8.30175781 C61.6293945,8.29199219 61.3014323,8.1414388 61.0361328,7.85009766 C60.7708333,7.55875651 60.6381836,7.12988281 60.6381836,6.56347656 C60.6381836,6.31933594 60.6650391,6.09309896 60.71875,5.88476562 C60.7724609,5.67643229 60.8530273,5.48763021 60.9604492,5.31835938 C61.0678711,5.14908854 61.2135417,5.0164388 61.3974609,4.92041016 C61.5813802,4.82438151 61.7937826,4.77636719 62.034668,4.77636719 C62.2755534,4.77636719 62.4830729,4.8219401 62.6572266,4.91308594 C62.8313802,5.00423177 62.968099,5.1336263 63.0673828,5.30126953 C63.1666667,5.46891276 63.2390951,5.65690104 63.284668,5.86523438 C63.3302409,6.07356771 63.3530273,6.30957031 63.3530273,6.57324219 C63.3497721,7.15592448 63.2260742,7.58886719 62.9819336,7.87207031 C62.737793,8.15527344 62.4171549,8.2985026 62.0200195,8.30175781 Z" id="Shape" fill-rule="nonzero"></path>
+                        </g>
+                    </g>
+                    <g id="row-bottom" transform="translate(8, 41)">
+                        <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="32" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M53,0 C54.6568542,0 56,1.34314575 56,3 L56,9 C56,10.6568542 54.6568542,12 53,12 L35,12 C33.3431458,12 32,10.6568542 32,9 L32,3 C32,1.34314575 33.3431458,0 35,0 L53,0 Z M53,1 L35,1 L34.7955116,1.0103258 C33.7869995,1.11274576 33,1.96446609 33,3 L33,9 C33,10.1045695 33.8954305,11 35,11 L53,11 C54.1045695,11 55,10.1045695 55,9 L55,3 C55,1.8954305 54.1045695,1 53,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="78" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L81,12 C79.3431458,12 78,10.6568542 78,9 L78,3 C78,1.34314575 79.3431458,0 81,0 L99,0 Z M99,1 L81,1 L80.7955116,1.0103258 C79.7869995,1.11274576 79,1.96446609 79,3 L79,9 C79,10.1045695 79.8954305,11 81,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    </g>
+                    <g id="row-top" transform="translate(8, 0)">
+                        <g id="..." transform="translate(62.7, 6)" fill="#8E8E8E">
+                            <circle id="Oval" cx="0.9" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy" cx="4.3" cy="0.9" r="1"></circle>
+                            <circle id="Oval-Copy-2" cx="7.7" cy="0.9" r="1"></circle>
+                        </g>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="0" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M21,0 C22.6568542,0 24,1.34314575 24,3 L24,9 C24,10.6568542 22.6568542,12 21,12 L3,12 C1.34314575,12 0,10.6568542 0,9 L0,3 C0,1.34314575 1.34314575,0 3,0 L21,0 Z M21,1 L3,1 L2.79551164,1.0103258 C1.78699946,1.11274576 1,1.96446609 1,3 L1,9 C1,10.1045695 1.8954305,11 3,11 L21,11 C22.1045695,11 23,10.1045695 23,9 L23,3 C23,1.8954305 22.1045695,1 21,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="32" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M53,0 C54.6568542,0 56,1.34314575 56,3 L56,9 C56,10.6568542 54.6568542,12 53,12 L35,12 C33.3431458,12 32,10.6568542 32,9 L32,3 C32,1.34314575 33.3431458,0 35,0 L53,0 Z M53,1 L35,1 L34.7955116,1.0103258 C33.7869995,1.11274576 33,1.96446609 33,3 L33,9 C33,10.1045695 33.8954305,11 35,11 L53,11 C54.1045695,11 55,10.1045695 55,9 L55,3 C55,1.8954305 54.1045695,1 53,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                        <rect id="Rectangle" fill-opacity="0.6" fill="#FDB515" x="78" y="0" width="24" height="12" rx="3"></rect>
+                        <path d="M99,0 C100.656854,0 102,1.34314575 102,3 L102,9 C102,10.6568542 100.656854,12 99,12 L81,12 C79.3431458,12 78,10.6568542 78,9 L78,3 C78,1.34314575 79.3431458,0 81,0 L99,0 Z M99,1 L81,1 L80.7955116,1.0103258 C79.7869995,1.11274576 79,1.96446609 79,3 L79,9 C79,10.1045695 79.8954305,11 81,11 L99,11 C100.104569,11 101,10.1045695 101,9 L101,3 C101,1.8954305 100.104569,1 99,1 Z" id="Rectangle" fill-opacity="0.8" fill="#FDB515" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+            <g id="Score">
+                <g id="dash-box-@green">
+                    <path d="M8,0 L288,0 C292.418278,0 296,3.581722 296,8 L296,88 C296,92.418278 292.418278,96 288,96 L8,96 C3.581722,96 0,92.418278 0,88 L0,8 C0,3.581722 3.581722,8.8817842e-16 8,0 Z" id="bg" fill="#F0FDF8"></path>
+                    <path d="M7.10343083,94.9432289 C7.39891719,94.9809526 7.69814426,95 8,95 L8.97464335,95 L8.974,96 L8,96 C7.65340057,96 7.31194921,95.9779585 6.97697115,95.9352007 L7.10343083,94.9432289 Z M12.9746435,94.9999999 L12.974,95.9999999 L10.974,95.9999999 L10.9746433,94.9999999 L12.9746435,94.9999999 Z M16.9746434,94.9999999 L16.974,95.9999999 L14.974,95.9999999 L14.9746433,94.9999999 L16.9746434,94.9999999 Z M20.9746436,94.9999999 L20.974,95.9999999 L18.974,95.9999999 L18.9746433,94.9999999 L20.9746436,94.9999999 Z M24.9746434,94.9999999 L24.974,95.9999999 L22.974,95.9999999 L22.9746433,94.9999999 L24.9746434,94.9999999 Z M28.9746436,94.9999999 L28.974,95.9999999 L26.974,95.9999999 L26.9746433,94.9999999 L28.9746436,94.9999999 Z M32.9746434,94.9999999 L32.974,95.9999999 L30.974,95.9999999 L30.9746433,94.9999999 L32.9746434,94.9999999 Z M36.9746436,94.9999999 L36.974,95.9999999 L34.974,95.9999999 L34.9746433,94.9999999 L36.9746436,94.9999999 Z M40.9746434,94.9999999 L40.974,95.9999999 L38.974,95.9999999 L38.9746433,94.9999999 L40.9746434,94.9999999 Z M44.9746436,94.9999999 L44.974,95.9999999 L42.974,95.9999999 L42.9746433,94.9999999 L44.9746436,94.9999999 Z M48.9746435,94.9999999 L48.974,95.9999999 L46.974,95.9999999 L46.9746433,94.9999999 L48.9746435,94.9999999 Z M52.9746436,94.9999999 L52.974,95.9999999 L50.974,95.9999999 L50.9746433,94.9999999 L52.9746436,94.9999999 Z M56.9746435,94.9999999 L56.974,95.9999999 L54.974,95.9999999 L54.9746433,94.9999999 L56.9746435,94.9999999 Z M60.9746436,94.9999999 L60.974,95.9999999 L58.974,95.9999999 L58.9746433,94.9999999 L60.9746436,94.9999999 Z M64.9746435,94.9999999 L64.974,95.9999999 L62.974,95.9999999 L62.9746433,94.9999999 L64.9746435,94.9999999 Z M68.9746434,94.9999999 L68.974,95.9999999 L66.974,95.9999999 L66.9746433,94.9999999 L68.9746434,94.9999999 Z M72.9746435,94.9999999 L72.974,95.9999999 L70.974,95.9999999 L70.9746433,94.9999999 L72.9746435,94.9999999 Z M76.9746434,94.9999999 L76.974,95.9999999 L74.974,95.9999999 L74.9746433,94.9999999 L76.9746434,94.9999999 Z M80.9746435,94.9999999 L80.974,95.9999999 L78.974,95.9999999 L78.9746433,94.9999999 L80.9746435,94.9999999 Z M84.9746434,94.9999999 L84.974,95.9999999 L82.974,95.9999999 L82.9746433,94.9999999 L84.9746434,94.9999999 Z M88.9746435,94.9999999 L88.974,95.9999999 L86.974,95.9999999 L86.9746433,94.9999999 L88.9746435,94.9999999 Z M92.9746434,94.9999999 L92.974,95.9999999 L90.974,95.9999999 L90.9746433,94.9999999 L92.9746434,94.9999999 Z M96.9746435,94.9999999 L96.974,95.9999999 L94.974,95.9999999 L94.9746433,94.9999999 L96.9746435,94.9999999 Z M100.974643,94.9999999 L100.974,95.9999999 L98.974,95.9999999 L98.9746433,94.9999999 L100.974643,94.9999999 Z M104.974644,94.9999999 L104.974,95.9999999 L102.974,95.9999999 L102.974643,94.9999999 L104.974644,94.9999999 Z M108.974643,94.9999999 L108.974,95.9999999 L106.974,95.9999999 L106.974643,94.9999999 L108.974643,94.9999999 Z M112.974644,94.9999999 L112.974,95.9999999 L110.974,95.9999999 L110.974643,94.9999999 L112.974644,94.9999999 Z M116.974643,94.9999999 L116.974,95.9999999 L114.974,95.9999999 L114.974643,94.9999999 L116.974643,94.9999999 Z M120.974644,94.9999999 L120.974,95.9999999 L118.974,95.9999999 L118.974643,94.9999999 L120.974644,94.9999999 Z M124.974643,94.9999999 L124.974,95.9999999 L122.974,95.9999999 L122.974643,94.9999999 L124.974643,94.9999999 Z M128.974644,94.9999999 L128.974,95.9999999 L126.974,95.9999999 L126.974643,94.9999999 L128.974644,94.9999999 Z M132.974643,94.9999999 L132.974,95.9999999 L130.974,95.9999999 L130.974643,94.9999999 L132.974643,94.9999999 Z M136.974644,94.9999999 L136.974,95.9999999 L134.974,95.9999999 L134.974643,94.9999999 L136.974644,94.9999999 Z M140.974643,94.9999999 L140.974,95.9999999 L138.974,95.9999999 L138.974643,94.9999999 L140.974643,94.9999999 Z M144.974644,94.9999999 L144.974,95.9999999 L142.974,95.9999999 L142.974643,94.9999999 L144.974644,94.9999999 Z M148.974643,94.9999999 L148.974,95.9999999 L146.974,95.9999999 L146.974643,94.9999999 L148.974643,94.9999999 Z M152.974643,94.9999999 L152.974,95.9999999 L150.974,95.9999999 L150.974643,94.9999999 L152.974643,94.9999999 Z M156.974643,94.9999999 L156.974,95.9999999 L154.974,95.9999999 L154.974643,94.9999999 L156.974643,94.9999999 Z M160.974643,94.9999999 L160.974,95.9999999 L158.974,95.9999999 L158.974643,94.9999999 L160.974643,94.9999999 Z M164.974643,94.9999999 L164.974,95.9999999 L162.974,95.9999999 L162.974643,94.9999999 L164.974643,94.9999999 Z M168.974643,94.9999999 L168.974,95.9999999 L166.974,95.9999999 L166.974643,94.9999999 L168.974643,94.9999999 Z M172.974643,94.9999999 L172.974,95.9999999 L170.974,95.9999999 L170.974643,94.9999999 L172.974643,94.9999999 Z M176.974643,94.9999999 L176.974,95.9999999 L174.974,95.9999999 L174.974643,94.9999999 L176.974643,94.9999999 Z M180.974643,94.9999999 L180.974,95.9999999 L178.974,95.9999999 L178.974643,94.9999999 L180.974643,94.9999999 Z M184.974643,94.9999999 L184.974,95.9999999 L182.974,95.9999999 L182.974643,94.9999999 L184.974643,94.9999999 Z M188.974643,94.9999999 L188.974,95.9999999 L186.974,95.9999999 L186.974643,94.9999999 L188.974643,94.9999999 Z M192.974643,94.9999999 L192.974,95.9999999 L190.974,95.9999999 L190.974643,94.9999999 L192.974643,94.9999999 Z M196.974643,94.9999999 L196.974,95.9999999 L194.974,95.9999999 L194.974643,94.9999999 L196.974643,94.9999999 Z M200.974643,94.9999999 L200.974,95.9999999 L198.974,95.9999999 L198.974643,94.9999999 L200.974643,94.9999999 Z M204.974643,94.9999999 L204.974,95.9999999 L202.974,95.9999999 L202.974643,94.9999999 L204.974643,94.9999999 Z M208.974643,94.9999999 L208.974,95.9999999 L206.974,95.9999999 L206.974643,94.9999999 L208.974643,94.9999999 Z M212.974643,94.9999999 L212.974,95.9999999 L210.974,95.9999999 L210.974643,94.9999999 L212.974643,94.9999999 Z M216.974643,94.9999999 L216.974,95.9999999 L214.974,95.9999999 L214.974643,94.9999999 L216.974643,94.9999999 Z M220.974643,94.9999999 L220.974,95.9999999 L218.974,95.9999999 L218.974643,94.9999999 L220.974643,94.9999999 Z M224.974643,94.9999999 L224.974,95.9999999 L222.974,95.9999999 L222.974643,94.9999999 L224.974643,94.9999999 Z M228.974643,94.9999999 L228.974,95.9999999 L226.974,95.9999999 L226.974643,94.9999999 L228.974643,94.9999999 Z M232.974643,94.9999999 L232.974,95.9999999 L230.974,95.9999999 L230.974643,94.9999999 L232.974643,94.9999999 Z M236.974643,94.9999999 L236.974,95.9999999 L234.974,95.9999999 L234.974643,94.9999999 L236.974643,94.9999999 Z M240.974643,94.9999999 L240.974,95.9999999 L238.974,95.9999999 L238.974643,94.9999999 L240.974643,94.9999999 Z M244.974643,94.9999999 L244.974,95.9999999 L242.974,95.9999999 L242.974643,94.9999999 L244.974643,94.9999999 Z M248.974643,94.9999999 L248.974,95.9999999 L246.974,95.9999999 L246.974643,94.9999999 L248.974643,94.9999999 Z M252.974643,94.9999999 L252.974,95.9999999 L250.974,95.9999999 L250.974643,94.9999999 L252.974643,94.9999999 Z M256.974643,94.9999999 L256.974,95.9999999 L254.974,95.9999999 L254.974643,94.9999999 L256.974643,94.9999999 Z M260.974643,94.9999999 L260.974,95.9999999 L258.974,95.9999999 L258.974643,94.9999999 L260.974643,94.9999999 Z M264.974643,94.9999999 L264.974,95.9999999 L262.974,95.9999999 L262.974643,94.9999999 L264.974643,94.9999999 Z M268.974643,94.9999999 L268.974,95.9999999 L266.974,95.9999999 L266.974643,94.9999999 L268.974643,94.9999999 Z M272.974643,94.9999999 L272.974,95.9999999 L270.974,95.9999999 L270.974643,94.9999999 L272.974643,94.9999999 Z M276.974643,94.9999999 L276.974,95.9999999 L274.974,95.9999999 L274.974643,94.9999999 L276.974643,94.9999999 Z M280.974643,94.9999999 L280.974,95.9999999 L278.974,95.9999999 L278.974643,94.9999999 L280.974643,94.9999999 Z M284.974643,94.9999999 L284.974,95.9999999 L282.974,95.9999999 L282.974643,94.9999999 L284.974643,94.9999999 Z M288.867128,94.9469242 L288.989911,95.9393578 C288.665554,95.9793864 288.335184,96 288,96 L286.974,96 L286.974627,95 L288.014777,94.9999848 C288.301601,94.9993939 288.586036,94.9816011 288.867128,94.9469242 Z M292.083941,93.6859331 L292.667418,94.4980631 C292.128578,94.8857838 291.539554,95.2081369 290.911958,95.453512 L290.547268,94.5223827 C291.092723,94.309177 291.608785,94.0279038 292.083941,93.6859331 Z M3.87046676,93.6528418 C4.34287209,93.9986163 4.85665464,94.2840388 5.40029041,94.5016387 L5.02897703,95.4301463 C4.4032811,95.1797291 3.81658971,94.8525673 3.28054319,94.4603013 L3.87046676,93.6528418 Z M294.327269,90.9986503 L295.230602,91.4275896 C294.941958,92.0354131 294.57893,92.6010621 294.153278,93.1127737 L293.384567,92.4731779 C293.759145,92.0228491 294.076129,91.5274911 294.327269,90.9986503 Z M1.6495736,90.9493899 C1.8965782,91.4801941 2.20969373,91.9779709 2.58076411,92.4311685 L1.80668123,93.064253 C1.38539268,92.5496499 1.0269759,91.9815829 0.743159818,91.3717811 L1.6495736,90.9493899 Z M296,87.512 L296,88 C296,88.5150183 295.951333,89.01867 295.858348,89.506607 L294.876065,89.3192015 C294.958258,88.8878273 295,88.4468538 295,88 L295,87.5126783 L296,87.512 Z M1,87.4619065 L1.00002881,88.0203315 C1.00125212,88.4518975 1.04142314,88.8779241 1.11937861,89.2950584 L0.136531747,89.4794833 C0.0468808413,89.0000123 0,88.505481 0,88 L0,87.462 L1,87.4619065 Z M296,83.512 L296,85.512 L295,85.5126783 L295,83.5126783 L296,83.512 Z M1,83.461965 L1,85.461965 L0,85.461 L0,83.461 L1,83.461965 Z M296,79.512 L296,81.512 L295,81.5126783 L295,79.5126783 L296,79.512 Z M1,79.461965 L1,81.461965 L0,81.461 L0,79.461 L1,79.461965 Z M296,75.512 L296,77.512 L295,77.5126783 L295,75.5126783 L296,75.512 Z M1,75.461965 L1,77.461965 L0,77.461 L0,75.461 L1,75.461965 Z M296,71.512 L296,73.512 L295,73.5126783 L295,71.5126783 L296,71.512 Z M1,71.461965 L1,73.461965 L0,73.461 L0,71.461 L1,71.461965 Z M296,67.512 L296,69.512 L295,69.5126783 L295,67.5126783 L296,67.512 Z M1,67.461965 L1,69.461965 L0,69.461 L0,67.461 L1,67.461965 Z M296,63.512 L296,65.512 L295,65.5126783 L295,63.5126783 L296,63.512 Z M1,63.461965 L1,65.461965 L0,65.461 L0,63.461 L1,63.461965 Z M296,59.512 L296,61.512 L295,61.5126783 L295,59.5126783 L296,59.512 Z M1,59.461965 L1,61.461965 L0,61.461 L0,59.461 L1,59.461965 Z M296,55.512 L296,57.512 L295,57.5126783 L295,55.5126783 L296,55.512 Z M1,55.461965 L1,57.461965 L0,57.461 L0,55.461 L1,55.461965 Z M296,51.512 L296,53.512 L295,53.5126783 L295,51.5126783 L296,51.512 Z M1,51.461965 L1,53.461965 L0,53.461 L0,51.461 L1,51.461965 Z M296,47.512 L296,49.512 L295,49.5126783 L295,47.5126783 L296,47.512 Z M1,47.461965 L1,49.461965 L0,49.461 L0,47.461 L1,47.461965 Z M296,43.512 L296,45.512 L295,45.5126783 L295,43.5126783 L296,43.512 Z M1,43.461965 L1,45.461965 L0,45.461 L0,43.461 L1,43.461965 Z M296,39.512 L296,41.512 L295,41.5126783 L295,39.5126783 L296,39.512 Z M1,39.461965 L1,41.461965 L0,41.461 L0,39.461 L1,39.461965 Z M296,35.512 L296,37.512 L295,37.5126783 L295,35.5126783 L296,35.512 Z M1,35.461965 L1,37.461965 L0,37.461 L0,35.461 L1,35.461965 Z M296,31.512 L296,33.512 L295,33.5126783 L295,31.5126783 L296,31.512 Z M1,31.461965 L1,33.461965 L0,33.461 L0,31.461 L1,31.461965 Z M296,27.512 L296,29.512 L295,29.5126783 L295,27.5126783 L296,27.512 Z M1,27.461965 L1,29.461965 L0,29.461 L0,27.461 L1,27.461965 Z M296,23.512 L296,25.512 L295,25.5126783 L295,23.5126783 L296,23.512 Z M1,23.461965 L1,25.461965 L0,25.461 L0,23.461 L1,23.461965 Z M296,19.512 L296,21.512 L295,21.5126783 L295,19.5126783 L296,19.512 Z M1,19.461965 L1,21.461965 L0,21.461 L0,19.461 L1,19.461965 Z M296,15.512 L296,17.512 L295,17.5126783 L295,15.5126783 L296,15.512 Z M1,15.461965 L1,17.461965 L0,17.461 L0,15.461 L1,15.461965 Z M296,11.512 L296,13.512 L295,13.5126783 L295,11.5126783 L296,11.512 Z M1,11.461965 L1,13.461965 L0,13.461 L0,11.461 L1,11.461965 Z M296,8 L296,9.512 L295,9.51268141 L294.999996,7.99230721 C294.999843,7.84931676 294.995414,7.70690426 294.986748,7.5651841 L295.984907,7.50453602 C295.99492,7.6684065 296,7.83361003 296,8 Z M0.0177810396,7.46235014 L1.01560984,7.52821226 C1.00521986,7.68466746 1,7.84198159 1,8 L1,9.46196502 L0,9.461 L0,8 C0,7.81932186 0.00598958881,7.64004267 0.0177810396,7.46235014 Z M294.669225,5.86656128 C294.490852,5.30842466 294.242774,4.77610859 293.93156,4.28094453 L294.777667,3.74792999 C295.130817,4.30965578 295.416068,4.91838179 295.621909,5.56259574 L294.669225,5.86656128 Z M1.26026912,3.68817589 L2.10224493,4.22769122 C1.78656347,4.72006919 1.53367638,5.25013249 1.3502652,5.80651827 L0.400554683,5.49338901 C0.612012081,4.85193918 0.902417359,4.24636767 1.26026912,3.68817589 Z M292.801538,2.90632361 C292.375276,2.5043409 291.900412,2.15691661 291.388372,1.8730526 L291.873899,0.998830409 C292.461218,1.32450134 293.00324,1.72192978 293.488127,2.17927796 L292.801538,2.90632361 Z M4.18303226,0.967564053 L4.66099377,1.84594486 C4.14669322,2.12566396 3.66909236,2.46925191 3.23964084,2.86780054 L2.55894601,2.13523322 C3.04750167,1.68176376 3.5928096,1.28859486 4.18303226,0.967564053 Z M288,0 C288.685315,0 289.350503,0.0861719866 289.985321,0.248271715 L289.737382,1.21704772 C289.174749,1.07342708 288.592644,1 288,1 L288,0 Z M10,0 L10,1 L8.00000019,1 L7.59669449,1.01139508 C7.19522811,1.0341476 6.79959301,1.09086155 6.41261147,1.18053077 L6.18734244,0.206234069 C6.76976317,0.0713155963 7.37655326,0 8,0 L10,0 Z M150,0 L150,1 L148,1 L148,0 L150,0 Z M278,0 L278,1 L276,1 L276,0 L278,0 Z M274,0 L274,1 L272,1 L272,0 L274,0 Z M270,0 L270,1 L268,1 L268,0 L270,0 Z M266,0 L266,1 L264,1 L264,0 L266,0 Z M262,0 L262,1 L260,1 L260,0 L262,0 Z M258,0 L258,1 L256,1 L256,0 L258,0 Z M254,0 L254,1 L252,1 L252,0 L254,0 Z M250,0 L250,1 L248,1 L248,0 L250,0 Z M246,0 L246,1 L244,1 L244,0 L246,0 Z M242,0 L242,1 L240,1 L240,0 L242,0 Z M238,0 L238,1 L236,1 L236,0 L238,0 Z M234,0 L234,1 L232,1 L232,0 L234,0 Z M230,0 L230,1 L228,1 L228,0 L230,0 Z M226,0 L226,1 L224,1 L224,0 L226,0 Z M222,0 L222,1 L220,1 L220,0 L222,0 Z M218,0 L218,1 L216,1 L216,0 L218,0 Z M214,0 L214,1 L212,1 L212,0 L214,0 Z M210,0 L210,1 L208,1 L208,0 L210,0 Z M206,0 L206,1 L204,1 L204,0 L206,0 Z M202,0 L202,1 L200,1 L200,0 L202,0 Z M197.999,0 L198,1 L196,1 L195.999,0 L197.999,0 Z M194,0 L194,1 L192,1 L192,0 L194,0 Z M190,0 L190,1 L188,1 L188,0 L190,0 Z M186,0 L186,1 L184,1 L184,0 L186,0 Z M182,0 L182,1 L180,1 L180,0 L182,0 Z M178,0 L178,1 L176,1 L176,0 L178,0 Z M174,0 L174,1 L172,1 L172,0 L174,0 Z M170,0 L170,1 L168,1 L168,0 L170,0 Z M166,0 L166,1 L164,1 L164,0 L166,0 Z M162,0 L162,1 L160,1 L160,0 L162,0 Z M158,0 L158,1 L156,1 L156,0 L158,0 Z M154,0 L154,1 L152,1 L152,0 L154,0 Z M286,0 L286,1 L284,1 L284,0 L286,0 Z M146,0 L146,1 L144,1 L144,0 L146,0 Z M142,0 L142,1 L140,1 L140,0 L142,0 Z M138,0 L138,1 L136,1 L136,0 L138,0 Z M134,0 L134,1 L132,1 L132,0 L134,0 Z M130,0 L130,1 L128,1 L128,0 L130,0 Z M126,0 L126,1 L124,1 L124,0 L126,0 Z M122,0 L122,1 L120,1 L120,0 L122,0 Z M118,0 L118,1 L116,1 L116,0 L118,0 Z M114,0 L114,1 L112,1 L112,0 L114,0 Z M110,0 L110,1 L108,1 L108,0 L110,0 Z M106,0 L106,1 L104,1 L104,0 L106,0 Z M102,0 L102,1 L100,1 L100,0 L102,0 Z M98,0 L98,1 L96,1 L96,0 L98,0 Z M94,0 L94,1 L92,1 L92,0 L94,0 Z M90,0 L90,1 L88,1 L88,0 L90,0 Z M86,0 L86,1 L84,1 L84,0 L86,0 Z M82,0 L82,1 L80,1 L80,0 L82,0 Z M78,0 L78,1 L76,1 L76,0 L78,0 Z M74,0 L74,1 L72,1 L72,0 L74,0 Z M70,0 L70,1 L68,1 L68,0 L70,0 Z M66,0 L66,1 L64,1 L64,0 L66,0 Z M62,0 L62,1 L60,1 L60,0 L62,0 Z M58,0 L58,1 L56,1 L56,0 L58,0 Z M54,0 L54,1 L52,1 L52,0 L54,0 Z M50,0 L50,1 L48,1 L48,0 L50,0 Z M46,0 L46,1 L44,1 L44,0 L46,0 Z M42,0 L42,1 L40,1 L40,0 L42,0 Z M38,0 L38,1 L36,1 L36,0 L38,0 Z M34,0 L34,1 L32,1 L32,0 L34,0 Z M30,0 L30,1 L28,1 L28,0 L30,0 Z M26,0 L26,1 L24,1 L24,0 L26,0 Z M22,0 L22,1 L20,1 L20,0 L22,0 Z M18,0 L18,1 L16,1 L16,0 L18,0 Z M14,0 L14,1 L12,1 L12,0 L14,0 Z M282,0 L282,1 L280,1 L280,0 L282,0 Z" id="dash" fill="#1CBB8B" fill-rule="nonzero"></path>
+                </g>
+                <g id="Lines-@MaxSim" transform="translate(40, 72)" stroke="#979797" stroke-linecap="round" stroke-linejoin="round">
+                    <line x1="13" y1="0" x2="-2.58637556e-12" y2="46" id="Path"></line>
+                    <line x1="106" y1="0" x2="32.0001" y2="46" id="Path"></line>
+                    <line x1="198.881582" y1="0" x2="78.0001" y2="46" id="Path"></line>
+                    <line x1="106" y1="0" x2="138.0001" y2="46" id="Path"></line>
+                    <line x1="198.881582" y1="-4.97379915e-14" x2="138.0001" y2="46" id="Path"></line>
+                    <line x1="198.881582" y1="-4.97379915e-14" x2="216.0001" y2="46" id="Path"></line>
+                    <line x1="106.5" y1="1.13686838e-13" x2="216.0001" y2="46" id="Path"></line>
+                    <line x1="13" y1="-4.97379915e-14" x2="216" y2="46" id="Path"></line>
+                    <line x1="198.881582" y1="-4.97379915e-14" x2="170.0001" y2="46" id="Path"></line>
+                    <line x1="106" y1="1.13686838e-13" x2="170.0001" y2="46" id="Path"></line>
+                    <line x1="13" y1="1.13686838e-13" x2="170.0001" y2="46" id="Path"></line>
+                    <line x1="13" y1="0" x2="138.0001" y2="46" id="Path"></line>
+                </g>
+                <g id="txt-@late-interaction" transform="translate(152, 78)" xlink:href="#path-20">
+                    <rect id="bg-transparent" fill-opacity="0.85" fill="#F0FDF8" x="0" y="0" width="138" height="12" rx="3"></rect>
+                    <g id="tex" stroke-width="1" fill="#12BE8B">
+                        <path d="M1.71386719,10 C1.88964844,10 2.04296875,9.94238281 2.17382813,9.82714844 C2.3046875,9.71191406 2.37011719,9.55664063 2.37011719,9.36132812 L2.37011719,2.23046875 C2.37011719,2.03515625 2.30566406,1.87988281 2.17675781,1.76464844 C2.04785156,1.64941406 1.89746094,1.59179688 1.72558594,1.59179688 C1.54980469,1.59179688 1.3984375,1.6484375 1.27148438,1.76171875 C1.14453125,1.875 1.08105469,2.03125 1.08105469,2.23046875 L1.08105469,9.36132812 C1.08105469,9.56445313 1.14257812,9.72167969 1.265625,9.83300781 C1.38867188,9.94433594 1.53808594,10 1.71386719,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M5.66894531,10.0820312 C6.46191406,10.0820312 7.02832031,9.74609375 7.36816406,9.07421875 L7.36816406,9.390625 C7.36816406,9.65625 7.48339844,9.83984375 7.71386719,9.94140625 C7.80371094,9.98046875 7.89746094,10 7.99511719,10 C8.16699219,10 8.31542969,9.94628906 8.44042969,9.83886719 C8.56542969,9.73144531 8.62792969,9.58007812 8.62792969,9.38476562 L8.62792969,6.17382812 C8.62792969,5.45117188 8.41699219,4.91894531 7.99511719,4.57714844 C7.57324219,4.23535156 7.00097656,4.06445312 6.27832031,4.06445312 C5.47753906,4.06445312 4.76074219,4.22265625 4.12792969,4.5390625 C3.98339844,4.609375 3.91113281,4.72070313 3.91113281,4.87304687 C3.91113281,4.99804687 3.95507812,5.11132813 4.04296875,5.21289062 C4.13085937,5.31445312 4.23730469,5.36523438 4.36230469,5.36523438 C4.37792969,5.36523438 4.39355469,5.36425781 4.40917969,5.36230469 C4.42480469,5.36035156 4.44140625,5.35742187 4.45898438,5.35351562 C4.4765625,5.34960938 4.49316406,5.34375 4.50878906,5.3359375 C4.72753906,5.25 4.88867187,5.18847656 4.9921875,5.15136719 C5.09570312,5.11425781 5.27246094,5.06738281 5.52246094,5.01074219 C5.77246094,4.95410156 6.00683594,4.92578125 6.22558594,4.92578125 C6.96777344,4.92578125 7.33886719,5.30859375 7.33886719,6.07421875 L7.33886719,6.58984375 C6.97949219,6.58984375 6.67480469,6.59277344 6.42480469,6.59863281 C6.17480469,6.60449219 5.91894531,6.61816406 5.65722656,6.63964844 C5.39550781,6.66113281 5.17773438,6.69042969 5.00390625,6.72753906 C4.83007813,6.76464844 4.65917969,6.81347656 4.49121094,6.87402344 C4.32324219,6.93457031 4.18945312,7.00878906 4.08984375,7.09667969 C3.99023438,7.18457031 3.90039062,7.29101563 3.8203125,7.41601563 C3.74023438,7.54101563 3.68359375,7.68359375 3.65039062,7.84375 C3.6171875,8.00390625 3.60058594,8.1875 3.60058594,8.39453125 C3.60058594,8.93359375 3.79394531,9.34960938 4.18066406,9.64257813 C4.56738281,9.93554688 5.06347656,10.0820312 5.66894531,10.0820312 Z M5.78027344,9.24414062 C5.49121094,9.24414062 5.25878906,9.17089844 5.08300781,9.02441406 C4.90722656,8.87792969 4.81933594,8.6484375 4.81933594,8.3359375 C4.81933594,8.22265625 4.828125,8.12207031 4.84570312,8.03417969 C4.86328125,7.94628906 4.89355469,7.8671875 4.93652344,7.796875 C4.97949219,7.7265625 5.03125,7.66699219 5.09179687,7.61816406 C5.15234375,7.56933594 5.234375,7.52636719 5.33789062,7.48925781 C5.44140625,7.45214844 5.55175781,7.42285156 5.66894531,7.40136719 C5.78613281,7.37988281 5.93359375,7.36230469 6.11132812,7.34863281 C6.2890625,7.33496094 6.47070312,7.32617188 6.65625,7.32226562 C6.84179688,7.31835938 7.06738281,7.31640625 7.33300781,7.31640625 L7.33300781,7.46875 C7.33300781,7.984375 7.17675781,8.40917969 6.86425781,8.74316406 C6.55175781,9.07714844 6.19042969,9.24414062 5.78027344,9.24414062 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M12.2958984,10.0585938 C12.3154297,10.0585938 12.3398438,10.0585938 12.3691406,10.0585938 C12.3984375,10.0585938 12.4267578,10.0585938 12.4541016,10.0585938 C12.4814453,10.0585938 12.5048828,10.0585938 12.5244141,10.0585938 C12.6103516,10.0585938 12.6826172,10.0576172 12.7412109,10.0556641 C12.7998047,10.0537109 12.8662109,10.0507812 12.9404297,10.046875 C13.0146484,10.0429687 13.0791016,10.0351563 13.1337891,10.0234375 C13.1884766,10.0117188 13.2441406,9.99804687 13.3007812,9.98242188 C13.3574219,9.96679688 13.4052734,9.9453125 13.4443359,9.91796875 C13.4833984,9.890625 13.5185547,9.859375 13.5498047,9.82421875 C13.5810547,9.7890625 13.6044922,9.74707031 13.6201172,9.69824219 C13.6357422,9.64941406 13.6435547,9.59375 13.6435547,9.53125 C13.6435547,9.4453125 13.625,9.36523438 13.5878906,9.29101562 C13.5507812,9.21679687 13.4941406,9.15527344 13.4179688,9.10644531 C13.3417969,9.05761719 13.2509766,9.03320312 13.1455078,9.03320312 C13.1298828,9.03320312 13.1162109,9.03320312 13.1044922,9.03320312 L12.6767578,9.0625 L12.6240234,9.0625 C12.4638672,9.0625 12.3349609,9.01855469 12.2373047,8.93066406 C12.1396484,8.84277344 12.0712891,8.72167969 12.0322266,8.56738281 C11.9931641,8.41308594 11.9736328,8.21875 11.9736328,7.984375 L11.9736328,5.03710937 L12.9990234,5.03710937 C13.1513672,5.03710937 13.2646484,4.99902344 13.3388672,4.92285156 C13.4130859,4.84667969 13.4501953,4.74804688 13.4501953,4.62695313 C13.4501953,4.50585937 13.4121094,4.40527344 13.3359375,4.32519531 C13.2597656,4.24511719 13.1474609,4.20507812 12.9990234,4.20507812 L11.9736328,4.20507812 L11.9736328,2.54101562 C11.9736328,2.37695312 11.9296875,2.25488281 11.8417969,2.17480469 C11.7539062,2.09472656 11.6396484,2.0546875 11.4990234,2.0546875 C11.3271484,2.0546875 11.1835937,2.10644531 11.0683594,2.20996094 C10.953125,2.31347656 10.8857422,2.44921875 10.8662109,2.6171875 L10.6904297,4.20507812 L10.0283203,4.20507812 C9.87988281,4.20507812 9.76660156,4.24414062 9.68847656,4.32226562 C9.61035156,4.40039062 9.57128906,4.49804688 9.57128906,4.61523438 C9.57128906,4.73242188 9.61230469,4.83203125 9.69433594,4.9140625 C9.77636719,4.99609375 9.89160156,5.03710937 10.0400391,5.03710937 L10.6904297,5.03710937 L10.6904297,8.2890625 C10.6904297,8.84765625 10.8339844,9.28222656 11.1210937,9.59277344 C11.4082031,9.90332031 11.7998047,10.0585938 12.2958984,10.0585938 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M17.3583984,10.0820312 C18.2099609,10.0820312 18.9287109,9.82421875 19.5146484,9.30859375 C19.6435547,9.203125 19.7080078,9.07617188 19.7080078,8.92773437 C19.7080078,8.81054688 19.6679688,8.70703125 19.5878906,8.6171875 C19.5078125,8.52734375 19.4130859,8.48242188 19.3037109,8.48242188 C19.2294922,8.48242188 19.1552734,8.5078125 19.0810547,8.55859375 C18.4833984,8.98046875 17.9404297,9.19140625 17.4521484,9.19140625 C16.9248047,9.1875 16.5039062,9.03808594 16.1894531,8.74316406 C15.875,8.44824219 15.7041016,7.99023438 15.6767578,7.36914062 L19.5205078,7.36914062 C19.6533203,7.36914062 19.75,7.32324219 19.8105469,7.23144531 C19.8710938,7.13964844 19.9013672,7.0234375 19.9013672,6.8828125 C19.8935547,6.49609375 19.8349609,6.13769531 19.7255859,5.80761719 C19.6162109,5.47753906 19.4570313,5.18066406 19.2480469,4.91699219 C19.0390625,4.65332031 18.7617188,4.4453125 18.4160156,4.29296875 C18.0703125,4.140625 17.6728516,4.06445312 17.2236328,4.06445312 C16.7822266,4.06445312 16.3798828,4.14453125 16.0166016,4.3046875 C15.6533203,4.46484375 15.3544922,4.68164063 15.1201172,4.95507813 C14.8857422,5.22851562 14.7050781,5.54199219 14.578125,5.89550781 C14.4511719,6.24902344 14.3876953,6.62304687 14.3876953,7.01757812 C14.3876953,7.96289062 14.6601562,8.70800781 15.2050781,9.25292969 C15.75,9.79785156 16.4677734,10.0742188 17.3583984,10.0820312 Z M15.6884766,6.59570312 C15.7080078,6.38476562 15.7539062,6.18359375 15.8261719,5.9921875 C15.8984375,5.80078125 15.9970703,5.625 16.1220703,5.46484375 C16.2470703,5.3046875 16.4091797,5.17578125 16.6083984,5.078125 C16.8076172,4.98046875 17.0302734,4.93164062 17.2763672,4.93164062 C17.7529297,4.93164062 18.1162109,5.08984375 18.3662109,5.40625 C18.6162109,5.72265625 18.7587891,6.11914062 18.7939453,6.59570312 L15.6884766,6.59570312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M25.1748047,10 C25.3505859,10 25.5039062,9.94238281 25.6347656,9.82714844 C25.765625,9.71191406 25.8310547,9.55664063 25.8310547,9.36132812 L25.8310547,4.78515625 C25.8310547,4.5859375 25.7675781,4.4296875 25.640625,4.31640625 C25.5136719,4.203125 25.3642578,4.14648438 25.1923828,4.14648438 C25.0166016,4.14648438 24.8642578,4.203125 24.7353516,4.31640625 C24.6064453,4.4296875 24.5419922,4.5859375 24.5419922,4.78515625 L24.5419922,9.36132812 C24.5419922,9.56445312 24.6035156,9.72167969 24.7265625,9.83300781 C24.8496094,9.94433594 24.9990234,10 25.1748047,10 Z M25.1806641,2.921875 C25.4150391,2.921875 25.6035156,2.85449219 25.7460938,2.71972656 C25.8886719,2.58496094 25.9599609,2.40625 25.9599609,2.18359375 C25.9599609,1.96484375 25.8896484,1.78710937 25.7490234,1.65039063 C25.6083984,1.51367188 25.4208984,1.4453125 25.1865234,1.4453125 C24.9521484,1.4453125 24.7636719,1.51367188 24.6210938,1.65039063 C24.4785156,1.78710937 24.4072266,1.96484375 24.4072266,2.18359375 C24.4072266,2.40625 24.4785156,2.58496094 24.6210938,2.71972656 C24.7636719,2.85449219 24.9501953,2.921875 25.1806641,2.921875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M28.0517578,10 C28.2275391,10 28.3789062,9.9453125 28.5058594,9.8359375 C28.6328125,9.7265625 28.6962891,9.57617188 28.6962891,9.38476563 L28.6962891,6.71875 C28.7001953,6.18359375 28.8525391,5.75195313 29.1533203,5.42382812 C29.4541016,5.09570312 29.8134766,4.93164063 30.2314453,4.93164063 C30.5908203,4.93164063 30.8818359,5.04882812 31.1044922,5.28320312 C31.3271484,5.51757812 31.4384766,5.85546875 31.4384766,6.296875 L31.4384766,9.38476563 C31.4384766,9.58007812 31.5009766,9.73144531 31.6259766,9.83886719 C31.7509766,9.94628906 31.8994141,10 32.0712891,10 C32.2470703,10 32.3994141,9.94628906 32.5283203,9.83886719 C32.6572266,9.73144531 32.7216797,9.58007812 32.7216797,9.38476563 L32.7216797,6.30859375 C32.7216797,5.92578125 32.6650391,5.58789063 32.5517578,5.29492188 C32.4384766,5.00195313 32.2822266,4.76757813 32.0830078,4.59179688 C31.8837891,4.41601562 31.6582031,4.28417969 31.40625,4.19628906 C31.1542969,4.10839844 30.8837891,4.06445312 30.5947266,4.06445312 C30.1494141,4.06445312 29.7626953,4.14941406 29.4345703,4.31933594 C29.1064453,4.48925781 28.8603516,4.73632812 28.6962891,5.06054687 L28.6962891,4.73242188 C28.6962891,4.54882812 28.6337891,4.40527344 28.5087891,4.30175781 C28.3837891,4.19824219 28.2333984,4.14648438 28.0576172,4.14648438 C27.8818359,4.14648438 27.7294922,4.20019531 27.6005859,4.30761719 C27.4716797,4.41503906 27.4072266,4.5625 27.4072266,4.75 L27.4072266,9.38476563 C27.4072266,9.58007812 27.4707031,9.73144531 27.5976562,9.83886719 C27.7246094,9.94628906 27.8759766,10 28.0517578,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M36.3662109,10.0585938 C36.3857422,10.0585938 36.4101562,10.0585938 36.4394531,10.0585938 C36.46875,10.0585938 36.4970703,10.0585938 36.5244141,10.0585938 C36.5517578,10.0585938 36.5751953,10.0585938 36.5947266,10.0585938 C36.6806641,10.0585938 36.7529297,10.0576172 36.8115234,10.0556641 C36.8701172,10.0537109 36.9365234,10.0507812 37.0107422,10.046875 C37.0849609,10.0429687 37.1494141,10.0351563 37.2041016,10.0234375 C37.2587891,10.0117188 37.3144531,9.99804687 37.3710938,9.98242188 C37.4277344,9.96679688 37.4755859,9.9453125 37.5146484,9.91796875 C37.5537109,9.890625 37.5888672,9.859375 37.6201172,9.82421875 C37.6513672,9.7890625 37.6748047,9.74707031 37.6904297,9.69824219 C37.7060547,9.64941406 37.7138672,9.59375 37.7138672,9.53125 C37.7138672,9.4453125 37.6953125,9.36523438 37.6582031,9.29101562 C37.6210938,9.21679687 37.5644531,9.15527344 37.4882812,9.10644531 C37.4121094,9.05761719 37.3212891,9.03320312 37.2158203,9.03320312 C37.2001953,9.03320312 37.1865234,9.03320312 37.1748047,9.03320312 L36.7470703,9.0625 L36.6943359,9.0625 C36.5341797,9.0625 36.4052734,9.01855469 36.3076172,8.93066406 C36.2099609,8.84277344 36.1416016,8.72167969 36.1025391,8.56738281 C36.0634766,8.41308594 36.0439453,8.21875 36.0439453,7.984375 L36.0439453,5.03710937 L37.0693359,5.03710937 C37.2216797,5.03710937 37.3349609,4.99902344 37.4091797,4.92285156 C37.4833984,4.84667969 37.5205078,4.74804688 37.5205078,4.62695313 C37.5205078,4.50585937 37.4824219,4.40527344 37.40625,4.32519531 C37.3300781,4.24511719 37.2177734,4.20507812 37.0693359,4.20507812 L36.0439453,4.20507812 L36.0439453,2.54101562 C36.0439453,2.37695312 36,2.25488281 35.9121094,2.17480469 C35.8242188,2.09472656 35.7099609,2.0546875 35.5693359,2.0546875 C35.3974609,2.0546875 35.2539062,2.10644531 35.1386719,2.20996094 C35.0234375,2.31347656 34.9560547,2.44921875 34.9365234,2.6171875 L34.7607422,4.20507812 L34.0986328,4.20507812 C33.9501953,4.20507812 33.8369141,4.24414062 33.7587891,4.32226562 C33.6806641,4.40039062 33.6416016,4.49804688 33.6416016,4.61523438 C33.6416016,4.73242188 33.6826172,4.83203125 33.7646484,4.9140625 C33.8466797,4.99609375 33.9619141,5.03710937 34.1103516,5.03710937 L34.7607422,5.03710937 L34.7607422,8.2890625 C34.7607422,8.84765625 34.9042969,9.28222656 35.1914062,9.59277344 C35.4785156,9.90332031 35.8701172,10.0585938 36.3662109,10.0585938 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M41.4287109,10.0820312 C42.2802734,10.0820312 42.9990234,9.82421875 43.5849609,9.30859375 C43.7138672,9.203125 43.7783203,9.07617188 43.7783203,8.92773437 C43.7783203,8.81054688 43.7382812,8.70703125 43.6582031,8.6171875 C43.578125,8.52734375 43.4833984,8.48242188 43.3740234,8.48242188 C43.2998047,8.48242188 43.2255859,8.5078125 43.1513672,8.55859375 C42.5537109,8.98046875 42.0107422,9.19140625 41.5224609,9.19140625 C40.9951172,9.1875 40.5742188,9.03808594 40.2597656,8.74316406 C39.9453125,8.44824219 39.7744141,7.99023438 39.7470703,7.36914062 L43.5908203,7.36914062 C43.7236328,7.36914062 43.8203125,7.32324219 43.8808594,7.23144531 C43.9414062,7.13964844 43.9716797,7.0234375 43.9716797,6.8828125 C43.9638672,6.49609375 43.9052734,6.13769531 43.7958984,5.80761719 C43.6865234,5.47753906 43.5273438,5.18066406 43.3183594,4.91699219 C43.109375,4.65332031 42.8320312,4.4453125 42.4863281,4.29296875 C42.140625,4.140625 41.7431641,4.06445312 41.2939453,4.06445312 C40.8525391,4.06445312 40.4501953,4.14453125 40.0869141,4.3046875 C39.7236328,4.46484375 39.4248047,4.68164063 39.1904297,4.95507813 C38.9560547,5.22851562 38.7753906,5.54199219 38.6484375,5.89550781 C38.5214844,6.24902344 38.4580078,6.62304687 38.4580078,7.01757812 C38.4580078,7.96289062 38.7304688,8.70800781 39.2753906,9.25292969 C39.8203125,9.79785156 40.5380859,10.0742188 41.4287109,10.0820312 Z M39.7587891,6.59570312 C39.7783203,6.38476562 39.8242188,6.18359375 39.8964844,5.9921875 C39.96875,5.80078125 40.0673828,5.625 40.1923828,5.46484375 C40.3173828,5.3046875 40.4794922,5.17578125 40.6787109,5.078125 C40.8779297,4.98046875 41.1005859,4.93164062 41.3466797,4.93164062 C41.8232422,4.93164062 42.1865234,5.08984375 42.4365234,5.40625 C42.6865234,5.72265625 42.8291016,6.11914062 42.8642578,6.59570312 L39.7587891,6.59570312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M45.8525391,10 C46.0283203,10 46.1816406,9.94628906 46.3125,9.83886719 C46.4433594,9.73144531 46.5087891,9.58203125 46.5087891,9.390625 L46.5087891,6.66015625 C46.5087891,6.37890625 46.5517578,6.140625 46.6376953,5.9453125 C46.7236328,5.75 46.8466797,5.60058594 47.0068359,5.49707031 C47.1669922,5.39355469 47.3417969,5.3203125 47.53125,5.27734375 C47.7207031,5.234375 47.9384766,5.21289062 48.1845703,5.21289062 C48.3134766,5.21289062 48.4150391,5.16210937 48.4892578,5.06054688 C48.5634766,4.95898438 48.6005859,4.83789062 48.6005859,4.69726562 C48.6005859,4.62695312 48.5908203,4.55859375 48.5712891,4.4921875 C48.5517578,4.42578125 48.5224609,4.36621094 48.4833984,4.31347656 C48.4443359,4.26074219 48.3935547,4.21875 48.3310547,4.1875 C48.2685547,4.15625 48.1982422,4.140625 48.1201172,4.140625 C47.7412109,4.140625 47.4003906,4.26367188 47.0976562,4.50976562 C46.7949219,4.75585938 46.5927734,5.06054688 46.4912109,5.42382812 L46.5087891,4.69726562 C46.5126953,4.52539062 46.4521484,4.390625 46.3271484,4.29296875 C46.2021484,4.1953125 46.0517578,4.14648438 45.8759766,4.14648438 C45.7626953,4.14648438 45.6572266,4.16699219 45.5595703,4.20800781 C45.4619141,4.24902344 45.3808594,4.31542969 45.3164062,4.40722656 C45.2519531,4.49902344 45.2197266,4.609375 45.2197266,4.73828125 L45.2197266,9.390625 C45.2197266,9.5859375 45.2822266,9.73632812 45.4072266,9.84179688 C45.5322266,9.94726562 45.6806641,10 45.8525391,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M51.2724609,10.0820312 C52.0654297,10.0820312 52.6318359,9.74609375 52.9716797,9.07421875 L52.9716797,9.390625 C52.9716797,9.65625 53.0869141,9.83984375 53.3173828,9.94140625 C53.4072266,9.98046875 53.5009766,10 53.5986328,10 C53.7705078,10 53.9189453,9.94628906 54.0439453,9.83886719 C54.1689453,9.73144531 54.2314453,9.58007812 54.2314453,9.38476562 L54.2314453,6.17382812 C54.2314453,5.45117188 54.0205078,4.91894531 53.5986328,4.57714844 C53.1767578,4.23535156 52.6044922,4.06445312 51.8818359,4.06445312 C51.0810547,4.06445312 50.3642578,4.22265625 49.7314453,4.5390625 C49.5869141,4.609375 49.5146484,4.72070313 49.5146484,4.87304687 C49.5146484,4.99804687 49.5585938,5.11132813 49.6464844,5.21289062 C49.734375,5.31445312 49.8408203,5.36523438 49.9658203,5.36523438 C49.9814453,5.36523438 49.9970703,5.36425781 50.0126953,5.36230469 C50.0283203,5.36035156 50.0449219,5.35742187 50.0625,5.35351562 C50.0800781,5.34960938 50.0966797,5.34375 50.1123047,5.3359375 C50.3310547,5.25 50.4921875,5.18847656 50.5957031,5.15136719 C50.6992188,5.11425781 50.8759766,5.06738281 51.1259766,5.01074219 C51.3759766,4.95410156 51.6103516,4.92578125 51.8291016,4.92578125 C52.5712891,4.92578125 52.9423828,5.30859375 52.9423828,6.07421875 L52.9423828,6.58984375 C52.5830078,6.58984375 52.2783203,6.59277344 52.0283203,6.59863281 C51.7783203,6.60449219 51.5224609,6.61816406 51.2607422,6.63964844 C50.9990234,6.66113281 50.78125,6.69042969 50.6074219,6.72753906 C50.4335938,6.76464844 50.2626953,6.81347656 50.0947266,6.87402344 C49.9267578,6.93457031 49.7929688,7.00878906 49.6933594,7.09667969 C49.59375,7.18457031 49.5039062,7.29101563 49.4238281,7.41601563 C49.34375,7.54101563 49.2871094,7.68359375 49.2539062,7.84375 C49.2207031,8.00390625 49.2041016,8.1875 49.2041016,8.39453125 C49.2041016,8.93359375 49.3974609,9.34960938 49.7841797,9.64257813 C50.1708984,9.93554688 50.6669922,10.0820312 51.2724609,10.0820312 Z M51.3837891,9.24414062 C51.0947266,9.24414062 50.8623047,9.17089844 50.6865234,9.02441406 C50.5107422,8.87792969 50.4228516,8.6484375 50.4228516,8.3359375 C50.4228516,8.22265625 50.4316406,8.12207031 50.4492188,8.03417969 C50.4667969,7.94628906 50.4970703,7.8671875 50.5400391,7.796875 C50.5830078,7.7265625 50.6347656,7.66699219 50.6953125,7.61816406 C50.7558594,7.56933594 50.8378906,7.52636719 50.9414062,7.48925781 C51.0449219,7.45214844 51.1552734,7.42285156 51.2724609,7.40136719 C51.3896484,7.37988281 51.5371094,7.36230469 51.7148438,7.34863281 C51.8925781,7.33496094 52.0742188,7.32617188 52.2597656,7.32226562 C52.4453125,7.31835938 52.6708984,7.31640625 52.9365234,7.31640625 L52.9365234,7.46875 C52.9365234,7.984375 52.7802734,8.40917969 52.4677734,8.74316406 C52.1552734,9.07714844 51.7939453,9.24414062 51.3837891,9.24414062 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M58.3681641,10.0820312 C59.1611328,10.0820312 59.7587891,9.90820312 60.1611328,9.56054688 C60.3017578,9.43945312 60.3720703,9.30664063 60.3720703,9.16210938 C60.3720703,9.12695313 60.3681641,9.09277344 60.3603516,9.05957031 C60.3525391,9.02636719 60.3398438,8.99511719 60.3222656,8.96582031 C60.3046875,8.93652344 60.2841797,8.91015625 60.2607422,8.88671875 C60.2373047,8.86328125 60.2119141,8.84375 60.1845703,8.828125 C60.1572266,8.8125 60.1269531,8.79980469 60.09375,8.79003906 C60.0605469,8.78027344 60.0263672,8.77539062 59.9912109,8.77539062 C59.9443359,8.77539062 59.8964844,8.78320312 59.8476562,8.79882812 C59.7988281,8.81445312 59.7509766,8.83984375 59.7041016,8.875 C59.3955078,9.09765625 58.9833984,9.20898438 58.4677734,9.20898438 C57.9404297,9.20898438 57.5273438,9.01171875 57.2285156,8.6171875 C56.9296875,8.22265625 56.7802734,7.70507813 56.7802734,7.06445312 C56.7802734,6.42773437 56.9345703,5.91210938 57.2431641,5.51757812 C57.5517578,5.12304687 57.9716797,4.92578125 58.5029297,4.92578125 C58.8818359,4.92578125 59.2431641,5.01953125 59.5869141,5.20703125 C59.6298828,5.23046875 59.6738281,5.24804687 59.71875,5.25976562 C59.7636719,5.27148438 59.8076172,5.27734375 59.8505859,5.27734375 C59.8974609,5.27734375 59.9423828,5.27148438 59.9853516,5.25976562 C60.0283203,5.24804687 60.0664062,5.23046875 60.0996094,5.20703125 C60.1328125,5.18359375 60.1621094,5.15625 60.1875,5.125 C60.2128906,5.09375 60.2324219,5.05957031 60.2460938,5.02246094 C60.2597656,4.98535156 60.2666016,4.9453125 60.2666016,4.90234375 C60.2666016,4.859375 60.2578125,4.81640625 60.2402344,4.7734375 C60.2226562,4.73046875 60.1953125,4.68652344 60.1582031,4.64160156 C60.1210938,4.59667969 60.0751953,4.5546875 60.0205078,4.515625 C59.5908203,4.21484375 59.0498047,4.06445312 58.3974609,4.06445312 C57.9677734,4.06445312 57.5703125,4.14453125 57.2050781,4.3046875 C56.8398438,4.46484375 56.5332031,4.68164063 56.2851562,4.95507813 C56.0371094,5.22851562 55.84375,5.54785156 55.7050781,5.91308594 C55.5664062,6.27832031 55.4970703,6.6640625 55.4970703,7.0703125 C55.4970703,7.625 55.6113281,8.12890625 55.8398438,8.58203125 C56.0683594,9.03515625 56.4033203,9.3984375 56.8447266,9.671875 C57.2861328,9.9453125 57.7939453,10.0820312 58.3681641,10.0820312 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M63.5478516,10.0585938 C63.5673828,10.0585938 63.5917969,10.0585938 63.6210938,10.0585938 C63.6503906,10.0585938 63.6787109,10.0585938 63.7060547,10.0585938 C63.7333984,10.0585938 63.7568359,10.0585938 63.7763672,10.0585938 C63.8623047,10.0585938 63.9345703,10.0576172 63.9931641,10.0556641 C64.0517578,10.0537109 64.1181641,10.0507812 64.1923828,10.046875 C64.2666016,10.0429687 64.3310547,10.0351563 64.3857422,10.0234375 C64.4404297,10.0117188 64.4960938,9.99804687 64.5527344,9.98242188 C64.609375,9.96679688 64.6572266,9.9453125 64.6962891,9.91796875 C64.7353516,9.890625 64.7705078,9.859375 64.8017578,9.82421875 C64.8330078,9.7890625 64.8564453,9.74707031 64.8720703,9.69824219 C64.8876953,9.64941406 64.8955078,9.59375 64.8955078,9.53125 C64.8955078,9.4453125 64.8769531,9.36523438 64.8398438,9.29101562 C64.8027344,9.21679687 64.7460938,9.15527344 64.6699219,9.10644531 C64.59375,9.05761719 64.5029297,9.03320312 64.3974609,9.03320312 C64.3818359,9.03320312 64.3681641,9.03320312 64.3564453,9.03320312 L63.9287109,9.0625 L63.8759766,9.0625 C63.7158203,9.0625 63.5869141,9.01855469 63.4892578,8.93066406 C63.3916016,8.84277344 63.3232422,8.72167969 63.2841797,8.56738281 C63.2451172,8.41308594 63.2255859,8.21875 63.2255859,7.984375 L63.2255859,5.03710937 L64.2509766,5.03710937 C64.4033203,5.03710937 64.5166016,4.99902344 64.5908203,4.92285156 C64.6650391,4.84667969 64.7021484,4.74804688 64.7021484,4.62695313 C64.7021484,4.50585937 64.6640625,4.40527344 64.5878906,4.32519531 C64.5117188,4.24511719 64.3994141,4.20507812 64.2509766,4.20507812 L63.2255859,4.20507812 L63.2255859,2.54101562 C63.2255859,2.37695312 63.1816406,2.25488281 63.09375,2.17480469 C63.0058594,2.09472656 62.8916016,2.0546875 62.7509766,2.0546875 C62.5791016,2.0546875 62.4355469,2.10644531 62.3203125,2.20996094 C62.2050781,2.31347656 62.1376953,2.44921875 62.1181641,2.6171875 L61.9423828,4.20507812 L61.2802734,4.20507812 C61.1318359,4.20507812 61.0185547,4.24414062 60.9404297,4.32226562 C60.8623047,4.40039062 60.8232422,4.49804688 60.8232422,4.61523438 C60.8232422,4.73242188 60.8642578,4.83203125 60.9462891,4.9140625 C61.0283203,4.99609375 61.1435547,5.03710937 61.2919922,5.03710937 L61.9423828,5.03710937 L61.9423828,8.2890625 C61.9423828,8.84765625 62.0859375,9.28222656 62.3730469,9.59277344 C62.6601562,9.90332031 63.0517578,10.0585938 63.5478516,10.0585938 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M66.6357422,10 C66.8115234,10 66.9648438,9.94238281 67.0957031,9.82714844 C67.2265625,9.71191406 67.2919922,9.55664063 67.2919922,9.36132812 L67.2919922,4.78515625 C67.2919922,4.5859375 67.2285156,4.4296875 67.1015625,4.31640625 C66.9746094,4.203125 66.8251953,4.14648438 66.6533203,4.14648438 C66.4775391,4.14648438 66.3251953,4.203125 66.1962891,4.31640625 C66.0673828,4.4296875 66.0029297,4.5859375 66.0029297,4.78515625 L66.0029297,9.36132812 C66.0029297,9.56445312 66.0644531,9.72167969 66.1875,9.83300781 C66.3105469,9.94433594 66.4599609,10 66.6357422,10 Z M66.6416016,2.921875 C66.8759766,2.921875 67.0644531,2.85449219 67.2070312,2.71972656 C67.3496094,2.58496094 67.4208984,2.40625 67.4208984,2.18359375 C67.4208984,1.96484375 67.3505859,1.78710937 67.2099609,1.65039063 C67.0693359,1.51367188 66.8818359,1.4453125 66.6474609,1.4453125 C66.4130859,1.4453125 66.2246094,1.51367188 66.0820312,1.65039063 C65.9394531,1.78710937 65.8681641,1.96484375 65.8681641,2.18359375 C65.8681641,2.40625 65.9394531,2.58496094 66.0820312,2.71972656 C66.2246094,2.85449219 66.4111328,2.921875 66.6416016,2.921875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M71.5283203,9.20898438 C70.9853516,9.20898438 70.5605469,9.02148438 70.2539062,8.64648438 C69.9472656,8.27148438 69.7939453,7.75 69.7939453,7.08203125 C69.7939453,6.40625 69.9472656,5.87890625 70.2539062,5.5 C70.5605469,5.12109375 70.9853516,4.93164062 71.5283203,4.93164062 C72.0712891,4.93164062 72.4960938,5.12207031 72.8027344,5.50292969 C73.109375,5.88378906 73.2626953,6.41015625 73.2626953,7.08203125 C73.2626953,7.75 73.1103516,8.27148438 72.8056641,8.64648438 C72.5009766,9.02148438 72.0751953,9.20898438 71.5283203,9.20898438 Z M71.5283203,10.0820312 C71.9267578,10.0820312 72.2919922,10.0263672 72.6240234,9.91503906 C72.9560547,9.80371094 73.2382812,9.65332031 73.4707031,9.46386719 C73.703125,9.27441406 73.9003906,9.05078125 74.0625,8.79296875 C74.2246094,8.53515625 74.34375,8.26269531 74.4199219,7.97558594 C74.4960938,7.68847656 74.5341797,7.390625 74.5341797,7.08203125 C74.5341797,6.67578125 74.4697266,6.29296875 74.3408203,5.93359375 C74.2119141,5.57421875 74.0253906,5.25390625 73.78125,4.97265625 C73.5371094,4.69140625 73.2216797,4.46972656 72.8349609,4.30761719 C72.4482422,4.14550781 72.0126953,4.06445312 71.5283203,4.06445312 C71.0400391,4.06445312 70.6005859,4.14746094 70.2099609,4.31347656 C69.8193359,4.47949219 69.5039062,4.70410156 69.2636719,4.98730469 C69.0234375,5.27050781 68.8398438,5.59082031 68.7128906,5.94824219 C68.5859375,6.30566406 68.5224609,6.68359375 68.5224609,7.08203125 C68.5224609,7.47265625 68.5849609,7.84472656 68.7099609,8.19824219 C68.8349609,8.55175781 69.0175781,8.87011719 69.2578125,9.15332031 C69.4980469,9.43652344 69.8134766,9.66210938 70.2041016,9.83007812 C70.5947266,9.99804687 71.0361328,10.0820312 71.5283203,10.0820312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M76.4560547,10 C76.6318359,10 76.7832031,9.9453125 76.9101562,9.8359375 C77.0371094,9.7265625 77.1005859,9.57617188 77.1005859,9.38476563 L77.1005859,6.71875 C77.1044922,6.18359375 77.2568359,5.75195313 77.5576172,5.42382812 C77.8583984,5.09570312 78.2177734,4.93164063 78.6357422,4.93164063 C78.9951172,4.93164063 79.2861328,5.04882812 79.5087891,5.28320312 C79.7314453,5.51757812 79.8427734,5.85546875 79.8427734,6.296875 L79.8427734,9.38476563 C79.8427734,9.58007812 79.9052734,9.73144531 80.0302734,9.83886719 C80.1552734,9.94628906 80.3037109,10 80.4755859,10 C80.6513672,10 80.8037109,9.94628906 80.9326172,9.83886719 C81.0615234,9.73144531 81.1259766,9.58007812 81.1259766,9.38476563 L81.1259766,6.30859375 C81.1259766,5.92578125 81.0693359,5.58789063 80.9560547,5.29492188 C80.8427734,5.00195313 80.6865234,4.76757813 80.4873047,4.59179688 C80.2880859,4.41601562 80.0625,4.28417969 79.8105469,4.19628906 C79.5585938,4.10839844 79.2880859,4.06445312 78.9990234,4.06445312 C78.5537109,4.06445312 78.1669922,4.14941406 77.8388672,4.31933594 C77.5107422,4.48925781 77.2646484,4.73632812 77.1005859,5.06054687 L77.1005859,4.73242188 C77.1005859,4.54882812 77.0380859,4.40527344 76.9130859,4.30175781 C76.7880859,4.19824219 76.6376953,4.14648438 76.4619141,4.14648438 C76.2861328,4.14648438 76.1337891,4.20019531 76.0048828,4.30761719 C75.8759766,4.41503906 75.8115234,4.5625 75.8115234,4.75 L75.8115234,9.38476563 C75.8115234,9.58007812 75.875,9.73144531 76.0019531,9.83886719 C76.1289062,9.94628906 76.2802734,10 76.4560547,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M87.8466797,11.3828125 C87.9833984,11.3828125 88.1005859,11.3349609 88.1982422,11.2392578 C88.2958984,11.1435547 88.3447266,11.0234375 88.3447266,10.8789063 C88.3447266,10.8125 88.3212891,10.7089844 88.2744141,10.5683594 C87.7470703,9.07226563 87.4833984,7.66796875 87.4833984,6.35546875 C87.4833984,5.01953125 87.7529297,3.59179688 88.2919922,2.07226562 C88.3349609,1.94335937 88.3564453,1.84179688 88.3564453,1.76757812 C88.3564453,1.61523437 88.3076172,1.4921875 88.2099609,1.3984375 C88.1123047,1.3046875 87.9931641,1.2578125 87.8525391,1.2578125 C87.7275391,1.2578125 87.6132812,1.30761719 87.5097656,1.40722656 C87.40625,1.50683594 87.3232422,1.64257812 87.2607422,1.81445313 C86.9130859,2.67382812 86.6435547,3.46972656 86.4521484,4.20214844 C86.2607422,4.93457031 86.1650391,5.65625 86.1650391,6.3671875 C86.1650391,7.09765625 86.2666016,7.83886719 86.4697266,8.59082031 C86.6728516,9.34277344 86.9599609,10.1503906 87.3310547,11.0136719 C87.4326172,11.2597656 87.6044922,11.3828125 87.8466797,11.3828125 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M94.5439453,9.73632812 L94.5439453,7.97265625 L92.2705078,2.65820313 C92.1572266,2.38867188 91.9931641,2.18652344 91.7783203,2.05175781 C91.5634766,1.91699219 91.3330078,1.84960938 91.0869141,1.84960938 C90.7900391,1.84960938 90.5332031,1.93554688 90.3164062,2.10742188 C90.0996094,2.27929688 89.9912109,2.5 89.9912109,2.76953125 L89.9912109,9.37304688 C89.9912109,9.52539062 90.0576172,9.64453125 90.1904297,9.73046875 C90.3232422,9.81640625 90.4804688,9.859375 90.6621094,9.859375 C90.84375,9.859375 91.0009766,9.81640625 91.1337891,9.73046875 C91.2666016,9.64453125 91.3330078,9.52539062 91.3330078,9.37304688 L91.3330078,3.484375 L93.7060547,9.10351563 C93.7919922,9.30664062 93.9111328,9.46289063 94.0634766,9.57226562 C94.2158203,9.68164063 94.3759766,9.73632812 94.5439453,9.73632812 Z M94.5498047,9.73632812 C94.7177734,9.73632812 94.8789062,9.68066406 95.0332031,9.56933594 C95.1875,9.45800781 95.3056641,9.30273438 95.3876953,9.10351563 L97.7666016,3.484375 L97.7666016,9.37304688 C97.7666016,9.52539062 97.8330078,9.64453125 97.9658203,9.73046875 C98.0986328,9.81640625 98.2558594,9.859375 98.4375,9.859375 C98.6191406,9.859375 98.7763672,9.81640625 98.9091797,9.73046875 C99.0419922,9.64453125 99.1083984,9.52539062 99.1083984,9.37304688 L99.1083984,2.76953125 C99.1083984,2.58984375 99.0566406,2.42871094 98.953125,2.28613281 C98.8496094,2.14355469 98.7148438,2.03515625 98.5488281,1.9609375 C98.3828125,1.88671875 98.2041016,1.84960938 98.0126953,1.84960938 C97.7666016,1.84960938 97.5361328,1.91699219 97.3212891,2.05175781 C97.1064453,2.18652344 96.9404297,2.38867188 96.8232422,2.65820313 L94.5498047,7.97265625 L94.5498047,9.73632812 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M102.682617,10.0820312 C103.475586,10.0820312 104.041992,9.74609375 104.381836,9.07421875 L104.381836,9.390625 C104.381836,9.65625 104.49707,9.83984375 104.727539,9.94140625 C104.817383,9.98046875 104.911133,10 105.008789,10 C105.180664,10 105.329102,9.94628906 105.454102,9.83886719 C105.579102,9.73144531 105.641602,9.58007812 105.641602,9.38476562 L105.641602,6.17382812 C105.641602,5.45117188 105.430664,4.91894531 105.008789,4.57714844 C104.586914,4.23535156 104.014648,4.06445312 103.291992,4.06445312 C102.491211,4.06445312 101.774414,4.22265625 101.141602,4.5390625 C100.99707,4.609375 100.924805,4.72070313 100.924805,4.87304687 C100.924805,4.99804687 100.96875,5.11132813 101.056641,5.21289062 C101.144531,5.31445312 101.250977,5.36523438 101.375977,5.36523438 C101.391602,5.36523438 101.407227,5.36425781 101.422852,5.36230469 C101.438477,5.36035156 101.455078,5.35742187 101.472656,5.35351562 C101.490234,5.34960938 101.506836,5.34375 101.522461,5.3359375 C101.741211,5.25 101.902344,5.18847656 102.005859,5.15136719 C102.109375,5.11425781 102.286133,5.06738281 102.536133,5.01074219 C102.786133,4.95410156 103.020508,4.92578125 103.239258,4.92578125 C103.981445,4.92578125 104.352539,5.30859375 104.352539,6.07421875 L104.352539,6.58984375 C103.993164,6.58984375 103.688477,6.59277344 103.438477,6.59863281 C103.188477,6.60449219 102.932617,6.61816406 102.670898,6.63964844 C102.40918,6.66113281 102.191406,6.69042969 102.017578,6.72753906 C101.84375,6.76464844 101.672852,6.81347656 101.504883,6.87402344 C101.336914,6.93457031 101.203125,7.00878906 101.103516,7.09667969 C101.003906,7.18457031 100.914062,7.29101563 100.833984,7.41601563 C100.753906,7.54101563 100.697266,7.68359375 100.664062,7.84375 C100.630859,8.00390625 100.614258,8.1875 100.614258,8.39453125 C100.614258,8.93359375 100.807617,9.34960938 101.194336,9.64257813 C101.581055,9.93554688 102.077148,10.0820312 102.682617,10.0820312 Z M102.793945,9.24414062 C102.504883,9.24414062 102.272461,9.17089844 102.09668,9.02441406 C101.920898,8.87792969 101.833008,8.6484375 101.833008,8.3359375 C101.833008,8.22265625 101.841797,8.12207031 101.859375,8.03417969 C101.876953,7.94628906 101.907227,7.8671875 101.950195,7.796875 C101.993164,7.7265625 102.044922,7.66699219 102.105469,7.61816406 C102.166016,7.56933594 102.248047,7.52636719 102.351562,7.48925781 C102.455078,7.45214844 102.56543,7.42285156 102.682617,7.40136719 C102.799805,7.37988281 102.947266,7.36230469 103.125,7.34863281 C103.302734,7.33496094 103.484375,7.32617188 103.669922,7.32226562 C103.855469,7.31835938 104.081055,7.31640625 104.34668,7.31640625 L104.34668,7.46875 C104.34668,7.984375 104.19043,8.40917969 103.87793,8.74316406 C103.56543,9.07714844 103.204102,9.24414062 102.793945,9.24414062 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M111.506836,10 C111.674805,10 111.825195,9.94042969 111.958008,9.82128906 C112.09082,9.70214844 112.157227,9.56640625 112.157227,9.4140625 C112.157227,9.29296875 112.116211,9.1796875 112.03418,9.07421875 L110.305664,6.97070313 L111.911133,5.00195312 C111.989258,4.90039062 112.02832,4.796875 112.02832,4.69140625 C112.02832,4.546875 111.97168,4.41992188 111.858398,4.31054688 C111.745117,4.20117188 111.612305,4.14648438 111.459961,4.14648438 C111.295898,4.14648438 111.147461,4.2265625 111.014648,4.38671875 L109.602539,6.11523437 L108.19043,4.40429688 C108.053711,4.23242188 107.889648,4.14648438 107.698242,4.14648438 C107.530273,4.14648438 107.379883,4.20605469 107.24707,4.32519531 C107.114258,4.44433594 107.047852,4.58398438 107.047852,4.74414062 C107.047852,4.85742188 107.088867,4.97070312 107.170898,5.08398437 L108.75293,7.00585938 L107.006836,9.13867188 C106.932617,9.22851563 106.895508,9.328125 106.895508,9.4375 C106.895508,9.5859375 106.955078,9.71679687 107.074219,9.83007812 C107.193359,9.94335938 107.331055,10 107.487305,10 C107.651367,10 107.795898,9.91992188 107.920898,9.75976562 L109.456055,7.84960938 L111.014648,9.7421875 C111.147461,9.9140625 111.311523,10 111.506836,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M116.422852,10.1289062 C117.083008,10.1289062 117.65918,10.03125 118.151367,9.8359375 C118.643555,9.640625 119.024414,9.35449219 119.293945,8.97753906 C119.563477,8.60058594 119.698242,8.15429688 119.698242,7.63867188 C119.698242,7.37695312 119.665039,7.13867188 119.598633,6.92382813 C119.532227,6.70898438 119.44043,6.52050781 119.323242,6.35839844 C119.206055,6.19628906 119.054688,6.04882813 118.869141,5.91601563 C118.683594,5.78320312 118.482422,5.66699219 118.265625,5.56738281 C118.048828,5.46777344 117.795898,5.37109375 117.506836,5.27734375 L115.655273,4.68554688 C115.350586,4.58398437 115.128906,4.45214844 114.990234,4.29003906 C114.851562,4.12792969 114.782227,3.91601562 114.782227,3.65429687 C114.782227,3.38476562 114.868164,3.1640625 115.040039,2.9921875 C115.211914,2.8203125 115.427734,2.70117187 115.6875,2.63476562 C115.947266,2.56835938 116.254883,2.53515625 116.610352,2.53515625 C117.321289,2.53515625 118.004883,2.72460937 118.661133,3.10351563 C118.758789,3.16210938 118.854492,3.19140625 118.948242,3.19140625 C119.09668,3.19140625 119.223633,3.12890625 119.329102,3.00390625 C119.43457,2.87890625 119.487305,2.74023437 119.487305,2.58789063 C119.487305,2.51757813 119.474609,2.45214844 119.449219,2.39160156 C119.423828,2.33105469 119.383789,2.27929688 119.329102,2.23632813 C119.047852,1.99804688 118.648438,1.79492187 118.130859,1.62695312 C117.613281,1.45898438 117.071289,1.375 116.504883,1.375 C115.594727,1.375 114.84668,1.58398438 114.260742,2.00195312 C113.674805,2.41992187 113.381836,2.98828125 113.381836,3.70703125 C113.381836,4.28125 113.530273,4.73535156 113.827148,5.06933594 C114.124023,5.40332031 114.592773,5.67773438 115.233398,5.89257813 L117.09668,6.51367188 C117.518555,6.65429688 117.820312,6.81445313 118.001953,6.99414062 C118.183594,7.17382813 118.274414,7.43359375 118.274414,7.7734375 C118.274414,8.16796875 118.105469,8.46582031 117.767578,8.66699219 C117.429688,8.86816406 116.989258,8.96875 116.446289,8.96875 C115.59082,8.96875 114.803711,8.72070313 114.084961,8.22460938 C114.061523,8.20898438 114.036133,8.19726563 114.008789,8.18945313 C113.981445,8.18164063 113.954102,8.17773438 113.926758,8.17773438 C113.856445,8.17773438 113.783203,8.203125 113.707031,8.25390625 C113.630859,8.3046875 113.563477,8.36816406 113.504883,8.44433594 C113.446289,8.52050781 113.398438,8.60351562 113.361328,8.69335937 C113.324219,8.78320313 113.305664,8.8671875 113.305664,8.9453125 C113.305664,9.0390625 113.334961,9.109375 113.393555,9.15625 C114.151367,9.8046875 115.161133,10.1289062 116.422852,10.1289062 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M121.766602,10 C121.942383,10 122.095703,9.94238281 122.226562,9.82714844 C122.357422,9.71191406 122.422852,9.55664063 122.422852,9.36132812 L122.422852,4.78515625 C122.422852,4.5859375 122.359375,4.4296875 122.232422,4.31640625 C122.105469,4.203125 121.956055,4.14648438 121.78418,4.14648438 C121.608398,4.14648438 121.456055,4.203125 121.327148,4.31640625 C121.198242,4.4296875 121.133789,4.5859375 121.133789,4.78515625 L121.133789,9.36132812 C121.133789,9.56445312 121.195312,9.72167969 121.318359,9.83300781 C121.441406,9.94433594 121.59082,10 121.766602,10 Z M121.772461,2.921875 C122.006836,2.921875 122.195312,2.85449219 122.337891,2.71972656 C122.480469,2.58496094 122.551758,2.40625 122.551758,2.18359375 C122.551758,1.96484375 122.481445,1.78710937 122.34082,1.65039063 C122.200195,1.51367188 122.012695,1.4453125 121.77832,1.4453125 C121.543945,1.4453125 121.355469,1.51367188 121.212891,1.65039063 C121.070312,1.78710937 120.999023,1.96484375 120.999023,2.18359375 C120.999023,2.40625 121.070312,2.58496094 121.212891,2.71972656 C121.355469,2.85449219 121.541992,2.921875 121.772461,2.921875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M124.637695,10 C124.813477,10 124.966797,9.94628906 125.097656,9.83886719 C125.228516,9.73144531 125.293945,9.58007812 125.293945,9.38476563 L125.293945,6.68359375 C125.293945,6.14453125 125.432617,5.71777344 125.709961,5.40332031 C125.987305,5.08886719 126.358398,4.93164063 126.823242,4.93164063 C127.198242,4.93164063 127.489258,5.046875 127.696289,5.27734375 C127.90332,5.5078125 128.006836,5.85546875 128.006836,6.3203125 L128.006836,9.37890625 C128.006836,9.57421875 128.069336,9.7265625 128.194336,9.8359375 C128.319336,9.9453125 128.469727,10 128.645508,10 C128.821289,10 128.972656,9.9453125 129.099609,9.8359375 C129.226562,9.7265625 129.290039,9.57421875 129.290039,9.37890625 L129.290039,6.80078125 C129.290039,6.21484375 129.433594,5.75683594 129.720703,5.42675781 C130.007812,5.09667969 130.37793,4.93164063 130.831055,4.93164063 C131.209961,4.93164063 131.501953,5.04394531 131.707031,5.26855469 C131.912109,5.49316406 132.014648,5.83007812 132.014648,6.27929687 L132.014648,9.38476563 C132.014648,9.58007812 132.077148,9.73144531 132.202148,9.83886719 C132.327148,9.94628906 132.477539,10 132.65332,10 C132.829102,10 132.981445,9.94628906 133.110352,9.83886719 C133.239258,9.73144531 133.303711,9.58007812 133.303711,9.38476563 L133.303711,6.30273437 C133.303711,5.55273438 133.104492,4.9921875 132.706055,4.62109375 C132.307617,4.25 131.786133,4.06445312 131.141602,4.06445312 C130.657227,4.06445312 130.238281,4.17382812 129.884766,4.39257812 C129.53125,4.61132812 129.266602,4.93945312 129.09082,5.37695313 C128.969727,4.98632812 128.740234,4.66992188 128.402344,4.42773438 C128.064453,4.18554688 127.649414,4.06445312 127.157227,4.06445312 C126.290039,4.06445312 125.668945,4.4140625 125.293945,5.11328125 L125.293945,4.70898438 C125.293945,4.53320312 125.229492,4.39550781 125.100586,4.29589844 C124.97168,4.19628906 124.821289,4.14648438 124.649414,4.14648438 C124.473633,4.14648438 124.321289,4.20019531 124.192383,4.30761719 C124.063477,4.41503906 123.999023,4.5625 123.999023,4.75 L123.999023,9.38476563 C123.999023,9.58007812 124.0625,9.73144531 124.189453,9.83886719 C124.316406,9.94628906 124.46582,10 124.637695,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M135.208008,11.3828125 C135.454102,11.3828125 135.629883,11.2597656 135.735352,11.0136719 C136.106445,10.1425781 136.394531,9.33300781 136.599609,8.58496094 C136.804688,7.83691406 136.907227,7.09765625 136.907227,6.3671875 C136.907227,5.6640625 136.810547,4.94628906 136.617188,4.21386719 C136.423828,3.48144531 136.15332,2.68164063 135.805664,1.81445312 C135.657227,1.44726562 135.461914,1.26367188 135.219727,1.26367188 C135.079102,1.26367188 134.958984,1.31152344 134.859375,1.40722656 C134.759766,1.50292969 134.709961,1.62109375 134.709961,1.76171875 C134.709961,1.83984375 134.731445,1.94335938 134.774414,2.07226562 C135.317383,3.60742187 135.588867,5.03320313 135.588867,6.34960938 C135.588867,7.66992188 135.323242,9.078125 134.791992,10.5742188 C134.741211,10.703125 134.71582,10.8066406 134.71582,10.8847656 C134.71582,11.0292969 134.763672,11.1484375 134.859375,11.2421875 C134.955078,11.3359375 135.071289,11.3828125 135.208008,11.3828125 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="node-/-02_MaxSim-@1_grey" transform="translate(32, 52)" xlink:href="#path-21">
+                    <path stroke="#8E8E8E" d="M51.203,6 L51.203,14 C51.203,17.0375661 48.7405661,19.5 45.703,19.5 L6,19.5 C2.96243388,19.5 0.5,17.0375661 0.5,14 L0.5,6 C0.5,2.96243388 2.96243388,0.5 6,0.5 L45.703,0.500000028 C48.7405661,0.5 51.203,2.96243388 51.203,6 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="MaxSim" transform="translate(4.7361, 3.894)" fill="#8E8E8E">
+                        <path d="M5.43725586,9.75830078 L5.43725586,8.14160156 L3.35327148,3.27001953 C3.24943034,3.02294922 3.09903971,2.83764648 2.90209961,2.71411133 C2.70515951,2.59057617 2.49389648,2.52880859 2.26831055,2.52880859 C1.99617513,2.52880859 1.76074219,2.60758464 1.56201172,2.76513672 C1.36328125,2.9226888 1.26391602,3.125 1.26391602,3.37207031 L1.26391602,9.42529297 C1.26391602,9.56494141 1.32478841,9.67415365 1.4465332,9.75292969 C1.56827799,9.83170573 1.71240234,9.87109375 1.87890625,9.87109375 C2.04541016,9.87109375 2.18953451,9.83170573 2.3112793,9.75292969 C2.43302409,9.67415365 2.49389648,9.56494141 2.49389648,9.42529297 L2.49389648,4.02734375 L4.66918945,9.17822266 C4.74796549,9.36442057 4.85717773,9.50764974 4.99682617,9.60791016 C5.13647461,9.70817057 5.28328451,9.75830078 5.43725586,9.75830078 Z M5.44262695,9.75830078 C5.59659831,9.75830078 5.74430339,9.70727539 5.88574219,9.60522461 C6.02718099,9.50317383 6.13549805,9.36083984 6.21069336,9.17822266 L8.39135742,4.02734375 L8.39135742,9.42529297 C8.39135742,9.56494141 8.45222982,9.67415365 8.57397461,9.75292969 C8.6957194,9.83170573 8.83984375,9.87109375 9.00634766,9.87109375 C9.17285156,9.87109375 9.31697591,9.83170573 9.4387207,9.75292969 C9.56046549,9.67415365 9.62133789,9.56494141 9.62133789,9.42529297 L9.62133789,3.37207031 C9.62133789,3.20735677 9.57389323,3.05965169 9.47900391,2.92895508 C9.38411458,2.79825846 9.26057943,2.69889323 9.10839844,2.63085938 C8.95621745,2.56282552 8.79239909,2.52880859 8.61694336,2.52880859 C8.39135742,2.52880859 8.1800944,2.59057617 7.9831543,2.71411133 C7.78621419,2.83764648 7.6340332,3.02294922 7.52661133,3.27001953 L5.44262695,8.14160156 L5.44262695,9.75830078 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M12.8977051,10.0751953 C13.6245931,10.0751953 14.1437988,9.7672526 14.4553223,9.15136719 L14.4553223,9.44140625 C14.4553223,9.68489583 14.5609538,9.8531901 14.7722168,9.94628906 C14.8545736,9.98209635 14.9405111,10 15.0300293,10 C15.1875814,10 15.3236491,9.95076497 15.4382324,9.85229492 C15.5528158,9.75382487 15.6101074,9.61507161 15.6101074,9.43603516 L15.6101074,6.49267578 C15.6101074,5.83024089 15.416748,5.34236654 15.0300293,5.02905273 C14.6433105,4.71573893 14.1187337,4.55908203 13.4562988,4.55908203 C12.7222493,4.55908203 12.0651855,4.70410156 11.4851074,4.99414062 C11.3526204,5.05859375 11.286377,5.16064453 11.286377,5.30029297 C11.286377,5.4148763 11.3266602,5.51871745 11.4072266,5.61181641 C11.487793,5.70491536 11.5853678,5.75146484 11.6999512,5.75146484 C11.7142741,5.75146484 11.728597,5.75056966 11.7429199,5.7487793 C11.7572428,5.74698893 11.7724609,5.74430339 11.7885742,5.74072266 C11.8046875,5.73714193 11.8199056,5.73177083 11.8342285,5.72460937 C12.0347493,5.64583333 12.1824544,5.58943685 12.2773438,5.55541992 C12.3722331,5.52140299 12.5342611,5.47843424 12.7634277,5.42651367 C12.9925944,5.3745931 13.2074382,5.34863281 13.407959,5.34863281 C14.0882975,5.34863281 14.4284668,5.69954427 14.4284668,6.40136719 L14.4284668,6.87402344 C14.0990397,6.87402344 13.8197428,6.87670898 13.5905762,6.88208008 C13.3614095,6.88745117 13.1268717,6.89998372 12.8869629,6.91967773 C12.647054,6.93937174 12.4474284,6.96622721 12.2880859,7.00024414 C12.1287435,7.03426107 11.9720866,7.07902018 11.8181152,7.13452148 C11.6641439,7.19002279 11.5415039,7.25805664 11.4501953,7.33862305 C11.3588867,7.41918945 11.2765299,7.51676432 11.203125,7.63134766 C11.1297201,7.74593099 11.0777995,7.8766276 11.0473633,8.0234375 C11.0169271,8.1702474 11.001709,8.33854167 11.001709,8.52832031 C11.001709,9.02246094 11.1789551,9.40380859 11.5334473,9.67236328 C11.8879395,9.94091797 12.3426921,10.0751953 12.8977051,10.0751953 Z M12.9997559,9.30712891 C12.7347819,9.30712891 12.5217285,9.23999023 12.3605957,9.10571289 C12.1994629,8.97143555 12.1188965,8.76106771 12.1188965,8.47460938 C12.1188965,8.37076823 12.1269531,8.27856445 12.1430664,8.19799805 C12.1591797,8.11743164 12.1869303,8.04492188 12.2263184,7.98046875 C12.2657064,7.91601563 12.313151,7.86140951 12.3686523,7.81665039 C12.4241536,7.77189128 12.499349,7.73250326 12.5942383,7.69848633 C12.6891276,7.6644694 12.7902832,7.63761393 12.8977051,7.61791992 C13.005127,7.59822591 13.1402995,7.58211263 13.3032227,7.56958008 C13.4661458,7.55704753 13.6326497,7.54899089 13.8027344,7.54541016 C13.972819,7.54182943 14.1796061,7.54003906 14.4230957,7.54003906 L14.4230957,7.6796875 C14.4230957,8.15234375 14.2798665,8.54174805 13.9934082,8.84790039 C13.7069499,9.15405273 13.3757324,9.30712891 12.9997559,9.30712891 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M20.9865723,10 C21.1405436,10 21.2784017,9.94539388 21.4001465,9.83618164 C21.5218913,9.7269694 21.5827637,9.60253906 21.5827637,9.46289062 C21.5827637,9.35188802 21.545166,9.24804688 21.4699707,9.15136719 L19.885498,7.22314453 L21.3571777,5.41845703 C21.4287923,5.32535807 21.4645996,5.23046875 21.4645996,5.13378906 C21.4645996,5.00130208 21.412679,4.88492839 21.3088379,4.78466797 C21.2049967,4.68440755 21.083252,4.63427734 20.9436035,4.63427734 C20.7932129,4.63427734 20.6571452,4.70768229 20.5354004,4.85449219 L19.2409668,6.43896484 L17.9465332,4.87060547 C17.8212077,4.71305339 17.6708171,4.63427734 17.4953613,4.63427734 C17.34139,4.63427734 17.2035319,4.68888346 17.0817871,4.7980957 C16.9600423,4.90730794 16.8991699,5.03531901 16.8991699,5.18212891 C16.8991699,5.28597005 16.9367676,5.3898112 17.0119629,5.49365234 L18.4621582,7.25537109 L16.8615723,9.21044922 C16.7935384,9.29280599 16.7595215,9.38411458 16.7595215,9.484375 C16.7595215,9.62044271 16.8141276,9.74039714 16.9233398,9.84423828 C17.0325521,9.94807943 17.1587728,10 17.302002,10 C17.4523926,10 17.5848796,9.92659505 17.6994629,9.77978516 L19.1066895,8.02880859 L20.5354004,9.76367188 C20.6571452,9.92122396 20.8075358,10 20.9865723,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M25.4929199,10.1181641 C26.0980632,10.1181641 26.6262207,10.0286458 27.0773926,9.84960938 C27.5285645,9.67057292 27.8776855,9.40828451 28.1247559,9.06274414 C28.3718262,8.71720378 28.4953613,8.30810547 28.4953613,7.83544922 C28.4953613,7.59554036 28.4649251,7.37711589 28.4040527,7.18017578 C28.3431803,6.98323568 28.2590332,6.81046549 28.1516113,6.66186523 C28.0441895,6.51326497 27.9054362,6.37809245 27.7353516,6.25634766 C27.5652669,6.13460286 27.3808594,6.02807617 27.1821289,5.93676758 C26.9833984,5.84545898 26.7515462,5.75683594 26.4865723,5.67089844 L24.7893066,5.12841797 C24.5100098,5.03531901 24.3068034,4.9144694 24.1796875,4.76586914 C24.0525716,4.61726888 23.9890137,4.42301432 23.9890137,4.18310547 C23.9890137,3.93603516 24.0677897,3.73372396 24.2253418,3.57617187 C24.3828939,3.41861979 24.5807292,3.30940755 24.8188477,3.24853516 C25.0569661,3.18766276 25.3389486,3.15722656 25.6647949,3.15722656 C26.3164876,3.15722656 26.9431152,3.33089193 27.5446777,3.67822266 C27.634196,3.73193359 27.7219238,3.75878906 27.8078613,3.75878906 C27.943929,3.75878906 28.0603027,3.7014974 28.1569824,3.58691406 C28.2536621,3.47233073 28.302002,3.34521484 28.302002,3.20556641 C28.302002,3.14111328 28.2903646,3.08113607 28.2670898,3.02563477 C28.2438151,2.97013346 28.2071126,2.9226888 28.1569824,2.88330078 C27.8991699,2.6648763 27.5330404,2.47867839 27.0585938,2.32470703 C26.5841471,2.17073568 26.087321,2.09375 25.5681152,2.09375 C24.7338053,2.09375 24.0480957,2.28531901 23.5109863,2.66845703 C22.973877,3.05159505 22.7053223,3.57259115 22.7053223,4.23144531 C22.7053223,4.7578125 22.84139,5.17407227 23.1135254,5.48022461 C23.3856608,5.78637695 23.8153483,6.03792318 24.4025879,6.23486328 L26.1105957,6.80419922 C26.4973145,6.93310547 26.7739258,7.07991536 26.9404297,7.24462891 C27.1069336,7.40934245 27.1901855,7.64746094 27.1901855,7.95898438 C27.1901855,8.32063802 27.035319,8.59366862 26.7255859,8.77807617 C26.4158529,8.96248372 26.0121257,9.0546875 25.5144043,9.0546875 C24.7302246,9.0546875 24.0087077,8.8273112 23.3498535,8.37255859 C23.3283691,8.35823568 23.3050944,8.34749349 23.2800293,8.34033203 C23.2549642,8.33317057 23.2298991,8.32958984 23.204834,8.32958984 C23.1403809,8.32958984 23.0732422,8.35286458 23.003418,8.39941406 C22.9335938,8.44596354 22.8718262,8.50415039 22.8181152,8.57397461 C22.7644043,8.64379883 22.7205404,8.71988932 22.6865234,8.80224609 C22.6525065,8.88460286 22.635498,8.96158854 22.635498,9.03320312 C22.635498,9.11914063 22.6623535,9.18359375 22.7160645,9.2265625 C23.4107259,9.82096354 24.3363444,10.1181641 25.4929199,10.1181641 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M30.3913574,10 C30.5524902,10 30.6930339,9.94718424 30.8129883,9.84155273 C30.9329427,9.73592122 30.9929199,9.59358724 30.9929199,9.41455078 L30.9929199,5.21972656 C30.9929199,5.03710938 30.9347331,4.89388021 30.8183594,4.79003906 C30.7019857,4.68619792 30.5650228,4.63427734 30.4074707,4.63427734 C30.2463379,4.63427734 30.1066895,4.68619792 29.9885254,4.79003906 C29.8703613,4.89388021 29.8112793,5.03710938 29.8112793,5.21972656 L29.8112793,9.41455078 C29.8112793,9.6007487 29.8676758,9.74487305 29.9804688,9.84692383 C30.0932617,9.94897461 30.2302246,10 30.3913574,10 Z M30.3967285,3.51171875 C30.6115723,3.51171875 30.7843424,3.44995117 30.9150391,3.32641602 C31.0457357,3.20288086 31.111084,3.0390625 31.111084,2.83496094 C31.111084,2.6344401 31.0466309,2.47151693 30.9177246,2.34619141 C30.7888184,2.22086589 30.6169434,2.15820312 30.4020996,2.15820312 C30.1872559,2.15820312 30.0144857,2.22086589 29.8837891,2.34619141 C29.7530924,2.47151693 29.6877441,2.6344401 29.6877441,2.83496094 C29.6877441,3.0390625 29.7530924,3.20288086 29.8837891,3.32641602 C30.0144857,3.44995117 30.1854655,3.51171875 30.3967285,3.51171875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M33.0231934,10 C33.1843262,10 33.3248698,9.95076497 33.4448242,9.85229492 C33.5647786,9.75382487 33.6247559,9.61507161 33.6247559,9.43603516 L33.6247559,6.95996094 C33.6247559,6.46582031 33.7518717,6.07462565 34.0061035,5.78637695 C34.2603353,5.49812826 34.6005046,5.35400391 35.0266113,5.35400391 C35.3703613,5.35400391 35.6371257,5.45963542 35.8269043,5.67089844 C36.0166829,5.88216146 36.1115723,6.20084635 36.1115723,6.62695313 L36.1115723,9.43066406 C36.1115723,9.60970052 36.1688639,9.74934896 36.2834473,9.84960938 C36.3980306,9.94986979 36.5358887,10 36.6970215,10 C36.8581543,10 36.9969076,9.94986979 37.1132812,9.84960938 C37.2296549,9.74934896 37.2878418,9.60970052 37.2878418,9.43066406 L37.2878418,7.06738281 C37.2878418,6.53027344 37.4194336,6.11043294 37.6826172,5.80786133 C37.9458008,5.50528971 38.2850749,5.35400391 38.7004395,5.35400391 C39.0477702,5.35400391 39.3154297,5.45694987 39.503418,5.6628418 C39.6914062,5.86873372 39.7854004,6.17757161 39.7854004,6.58935547 L39.7854004,9.43603516 C39.7854004,9.61507161 39.8426921,9.75382487 39.9572754,9.85229492 C40.0718587,9.95076497 40.2097168,10 40.3708496,10 C40.5319824,10 40.6716309,9.95076497 40.7897949,9.85229492 C40.907959,9.75382487 40.967041,9.61507161 40.967041,9.43603516 L40.967041,6.61083984 C40.967041,5.92333984 40.7844238,5.40950521 40.4191895,5.06933594 C40.0539551,4.72916667 39.5759277,4.55908203 38.9851074,4.55908203 C38.541097,4.55908203 38.1570638,4.65934245 37.8330078,4.85986328 C37.5089518,5.06038411 37.2663574,5.36116536 37.1052246,5.76220703 C36.994222,5.40413411 36.7838542,5.11409505 36.4741211,4.89208984 C36.164388,4.67008464 35.7839355,4.55908203 35.3327637,4.55908203 C34.5378418,4.55908203 33.9685059,4.87955729 33.6247559,5.52050781 L33.6247559,5.14990234 C33.6247559,4.98876953 33.5656738,4.86254883 33.4475098,4.77124023 C33.3293457,4.67993164 33.1914876,4.63427734 33.0339355,4.63427734 C32.8728027,4.63427734 32.7331543,4.68351237 32.6149902,4.78198242 C32.4968262,4.88045247 32.4377441,5.015625 32.4377441,5.1875 L32.4377441,9.43603516 C32.4377441,9.61507161 32.495931,9.75382487 32.6123047,9.85229492 C32.7286784,9.95076497 32.8656413,10 33.0231934,10 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="node-/-02_MaxSim-@1_grey-Copy" transform="translate(122.103, 52)" xlink:href="#path-22">
+                    <path stroke="#8E8E8E" d="M51.203,6 L51.203,14 C51.203,17.0375661 48.7405661,19.5 45.703,19.5 L6,19.5 C2.96243388,19.5 0.5,17.0375661 0.5,14 L0.5,6 C0.5,2.96243388 2.96243388,0.5 6,0.5 L45.703,0.500000028 C48.7405661,0.5 51.203,2.96243388 51.203,6 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="MaxSim" transform="translate(4.7361, 3.894)" fill="#8E8E8E">
+                        <path d="M5.43725586,9.75830078 L5.43725586,8.14160156 L3.35327148,3.27001953 C3.24943034,3.02294922 3.09903971,2.83764648 2.90209961,2.71411133 C2.70515951,2.59057617 2.49389648,2.52880859 2.26831055,2.52880859 C1.99617513,2.52880859 1.76074219,2.60758464 1.56201172,2.76513672 C1.36328125,2.9226888 1.26391602,3.125 1.26391602,3.37207031 L1.26391602,9.42529297 C1.26391602,9.56494141 1.32478841,9.67415365 1.4465332,9.75292969 C1.56827799,9.83170573 1.71240234,9.87109375 1.87890625,9.87109375 C2.04541016,9.87109375 2.18953451,9.83170573 2.3112793,9.75292969 C2.43302409,9.67415365 2.49389648,9.56494141 2.49389648,9.42529297 L2.49389648,4.02734375 L4.66918945,9.17822266 C4.74796549,9.36442057 4.85717773,9.50764974 4.99682617,9.60791016 C5.13647461,9.70817057 5.28328451,9.75830078 5.43725586,9.75830078 Z M5.44262695,9.75830078 C5.59659831,9.75830078 5.74430339,9.70727539 5.88574219,9.60522461 C6.02718099,9.50317383 6.13549805,9.36083984 6.21069336,9.17822266 L8.39135742,4.02734375 L8.39135742,9.42529297 C8.39135742,9.56494141 8.45222982,9.67415365 8.57397461,9.75292969 C8.6957194,9.83170573 8.83984375,9.87109375 9.00634766,9.87109375 C9.17285156,9.87109375 9.31697591,9.83170573 9.4387207,9.75292969 C9.56046549,9.67415365 9.62133789,9.56494141 9.62133789,9.42529297 L9.62133789,3.37207031 C9.62133789,3.20735677 9.57389323,3.05965169 9.47900391,2.92895508 C9.38411458,2.79825846 9.26057943,2.69889323 9.10839844,2.63085938 C8.95621745,2.56282552 8.79239909,2.52880859 8.61694336,2.52880859 C8.39135742,2.52880859 8.1800944,2.59057617 7.9831543,2.71411133 C7.78621419,2.83764648 7.6340332,3.02294922 7.52661133,3.27001953 L5.44262695,8.14160156 L5.44262695,9.75830078 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M12.8977051,10.0751953 C13.6245931,10.0751953 14.1437988,9.7672526 14.4553223,9.15136719 L14.4553223,9.44140625 C14.4553223,9.68489583 14.5609538,9.8531901 14.7722168,9.94628906 C14.8545736,9.98209635 14.9405111,10 15.0300293,10 C15.1875814,10 15.3236491,9.95076497 15.4382324,9.85229492 C15.5528158,9.75382487 15.6101074,9.61507161 15.6101074,9.43603516 L15.6101074,6.49267578 C15.6101074,5.83024089 15.416748,5.34236654 15.0300293,5.02905273 C14.6433105,4.71573893 14.1187337,4.55908203 13.4562988,4.55908203 C12.7222493,4.55908203 12.0651855,4.70410156 11.4851074,4.99414062 C11.3526204,5.05859375 11.286377,5.16064453 11.286377,5.30029297 C11.286377,5.4148763 11.3266602,5.51871745 11.4072266,5.61181641 C11.487793,5.70491536 11.5853678,5.75146484 11.6999512,5.75146484 C11.7142741,5.75146484 11.728597,5.75056966 11.7429199,5.7487793 C11.7572428,5.74698893 11.7724609,5.74430339 11.7885742,5.74072266 C11.8046875,5.73714193 11.8199056,5.73177083 11.8342285,5.72460937 C12.0347493,5.64583333 12.1824544,5.58943685 12.2773438,5.55541992 C12.3722331,5.52140299 12.5342611,5.47843424 12.7634277,5.42651367 C12.9925944,5.3745931 13.2074382,5.34863281 13.407959,5.34863281 C14.0882975,5.34863281 14.4284668,5.69954427 14.4284668,6.40136719 L14.4284668,6.87402344 C14.0990397,6.87402344 13.8197428,6.87670898 13.5905762,6.88208008 C13.3614095,6.88745117 13.1268717,6.89998372 12.8869629,6.91967773 C12.647054,6.93937174 12.4474284,6.96622721 12.2880859,7.00024414 C12.1287435,7.03426107 11.9720866,7.07902018 11.8181152,7.13452148 C11.6641439,7.19002279 11.5415039,7.25805664 11.4501953,7.33862305 C11.3588867,7.41918945 11.2765299,7.51676432 11.203125,7.63134766 C11.1297201,7.74593099 11.0777995,7.8766276 11.0473633,8.0234375 C11.0169271,8.1702474 11.001709,8.33854167 11.001709,8.52832031 C11.001709,9.02246094 11.1789551,9.40380859 11.5334473,9.67236328 C11.8879395,9.94091797 12.3426921,10.0751953 12.8977051,10.0751953 Z M12.9997559,9.30712891 C12.7347819,9.30712891 12.5217285,9.23999023 12.3605957,9.10571289 C12.1994629,8.97143555 12.1188965,8.76106771 12.1188965,8.47460938 C12.1188965,8.37076823 12.1269531,8.27856445 12.1430664,8.19799805 C12.1591797,8.11743164 12.1869303,8.04492188 12.2263184,7.98046875 C12.2657064,7.91601563 12.313151,7.86140951 12.3686523,7.81665039 C12.4241536,7.77189128 12.499349,7.73250326 12.5942383,7.69848633 C12.6891276,7.6644694 12.7902832,7.63761393 12.8977051,7.61791992 C13.005127,7.59822591 13.1402995,7.58211263 13.3032227,7.56958008 C13.4661458,7.55704753 13.6326497,7.54899089 13.8027344,7.54541016 C13.972819,7.54182943 14.1796061,7.54003906 14.4230957,7.54003906 L14.4230957,7.6796875 C14.4230957,8.15234375 14.2798665,8.54174805 13.9934082,8.84790039 C13.7069499,9.15405273 13.3757324,9.30712891 12.9997559,9.30712891 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M20.9865723,10 C21.1405436,10 21.2784017,9.94539388 21.4001465,9.83618164 C21.5218913,9.7269694 21.5827637,9.60253906 21.5827637,9.46289062 C21.5827637,9.35188802 21.545166,9.24804688 21.4699707,9.15136719 L19.885498,7.22314453 L21.3571777,5.41845703 C21.4287923,5.32535807 21.4645996,5.23046875 21.4645996,5.13378906 C21.4645996,5.00130208 21.412679,4.88492839 21.3088379,4.78466797 C21.2049967,4.68440755 21.083252,4.63427734 20.9436035,4.63427734 C20.7932129,4.63427734 20.6571452,4.70768229 20.5354004,4.85449219 L19.2409668,6.43896484 L17.9465332,4.87060547 C17.8212077,4.71305339 17.6708171,4.63427734 17.4953613,4.63427734 C17.34139,4.63427734 17.2035319,4.68888346 17.0817871,4.7980957 C16.9600423,4.90730794 16.8991699,5.03531901 16.8991699,5.18212891 C16.8991699,5.28597005 16.9367676,5.3898112 17.0119629,5.49365234 L18.4621582,7.25537109 L16.8615723,9.21044922 C16.7935384,9.29280599 16.7595215,9.38411458 16.7595215,9.484375 C16.7595215,9.62044271 16.8141276,9.74039714 16.9233398,9.84423828 C17.0325521,9.94807943 17.1587728,10 17.302002,10 C17.4523926,10 17.5848796,9.92659505 17.6994629,9.77978516 L19.1066895,8.02880859 L20.5354004,9.76367188 C20.6571452,9.92122396 20.8075358,10 20.9865723,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M25.4929199,10.1181641 C26.0980632,10.1181641 26.6262207,10.0286458 27.0773926,9.84960938 C27.5285645,9.67057292 27.8776855,9.40828451 28.1247559,9.06274414 C28.3718262,8.71720378 28.4953613,8.30810547 28.4953613,7.83544922 C28.4953613,7.59554036 28.4649251,7.37711589 28.4040527,7.18017578 C28.3431803,6.98323568 28.2590332,6.81046549 28.1516113,6.66186523 C28.0441895,6.51326497 27.9054362,6.37809245 27.7353516,6.25634766 C27.5652669,6.13460286 27.3808594,6.02807617 27.1821289,5.93676758 C26.9833984,5.84545898 26.7515462,5.75683594 26.4865723,5.67089844 L24.7893066,5.12841797 C24.5100098,5.03531901 24.3068034,4.9144694 24.1796875,4.76586914 C24.0525716,4.61726888 23.9890137,4.42301432 23.9890137,4.18310547 C23.9890137,3.93603516 24.0677897,3.73372396 24.2253418,3.57617187 C24.3828939,3.41861979 24.5807292,3.30940755 24.8188477,3.24853516 C25.0569661,3.18766276 25.3389486,3.15722656 25.6647949,3.15722656 C26.3164876,3.15722656 26.9431152,3.33089193 27.5446777,3.67822266 C27.634196,3.73193359 27.7219238,3.75878906 27.8078613,3.75878906 C27.943929,3.75878906 28.0603027,3.7014974 28.1569824,3.58691406 C28.2536621,3.47233073 28.302002,3.34521484 28.302002,3.20556641 C28.302002,3.14111328 28.2903646,3.08113607 28.2670898,3.02563477 C28.2438151,2.97013346 28.2071126,2.9226888 28.1569824,2.88330078 C27.8991699,2.6648763 27.5330404,2.47867839 27.0585938,2.32470703 C26.5841471,2.17073568 26.087321,2.09375 25.5681152,2.09375 C24.7338053,2.09375 24.0480957,2.28531901 23.5109863,2.66845703 C22.973877,3.05159505 22.7053223,3.57259115 22.7053223,4.23144531 C22.7053223,4.7578125 22.84139,5.17407227 23.1135254,5.48022461 C23.3856608,5.78637695 23.8153483,6.03792318 24.4025879,6.23486328 L26.1105957,6.80419922 C26.4973145,6.93310547 26.7739258,7.07991536 26.9404297,7.24462891 C27.1069336,7.40934245 27.1901855,7.64746094 27.1901855,7.95898438 C27.1901855,8.32063802 27.035319,8.59366862 26.7255859,8.77807617 C26.4158529,8.96248372 26.0121257,9.0546875 25.5144043,9.0546875 C24.7302246,9.0546875 24.0087077,8.8273112 23.3498535,8.37255859 C23.3283691,8.35823568 23.3050944,8.34749349 23.2800293,8.34033203 C23.2549642,8.33317057 23.2298991,8.32958984 23.204834,8.32958984 C23.1403809,8.32958984 23.0732422,8.35286458 23.003418,8.39941406 C22.9335938,8.44596354 22.8718262,8.50415039 22.8181152,8.57397461 C22.7644043,8.64379883 22.7205404,8.71988932 22.6865234,8.80224609 C22.6525065,8.88460286 22.635498,8.96158854 22.635498,9.03320312 C22.635498,9.11914063 22.6623535,9.18359375 22.7160645,9.2265625 C23.4107259,9.82096354 24.3363444,10.1181641 25.4929199,10.1181641 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M30.3913574,10 C30.5524902,10 30.6930339,9.94718424 30.8129883,9.84155273 C30.9329427,9.73592122 30.9929199,9.59358724 30.9929199,9.41455078 L30.9929199,5.21972656 C30.9929199,5.03710938 30.9347331,4.89388021 30.8183594,4.79003906 C30.7019857,4.68619792 30.5650228,4.63427734 30.4074707,4.63427734 C30.2463379,4.63427734 30.1066895,4.68619792 29.9885254,4.79003906 C29.8703613,4.89388021 29.8112793,5.03710938 29.8112793,5.21972656 L29.8112793,9.41455078 C29.8112793,9.6007487 29.8676758,9.74487305 29.9804688,9.84692383 C30.0932617,9.94897461 30.2302246,10 30.3913574,10 Z M30.3967285,3.51171875 C30.6115723,3.51171875 30.7843424,3.44995117 30.9150391,3.32641602 C31.0457357,3.20288086 31.111084,3.0390625 31.111084,2.83496094 C31.111084,2.6344401 31.0466309,2.47151693 30.9177246,2.34619141 C30.7888184,2.22086589 30.6169434,2.15820312 30.4020996,2.15820312 C30.1872559,2.15820312 30.0144857,2.22086589 29.8837891,2.34619141 C29.7530924,2.47151693 29.6877441,2.6344401 29.6877441,2.83496094 C29.6877441,3.0390625 29.7530924,3.20288086 29.8837891,3.32641602 C30.0144857,3.44995117 30.1854655,3.51171875 30.3967285,3.51171875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M33.0231934,10 C33.1843262,10 33.3248698,9.95076497 33.4448242,9.85229492 C33.5647786,9.75382487 33.6247559,9.61507161 33.6247559,9.43603516 L33.6247559,6.95996094 C33.6247559,6.46582031 33.7518717,6.07462565 34.0061035,5.78637695 C34.2603353,5.49812826 34.6005046,5.35400391 35.0266113,5.35400391 C35.3703613,5.35400391 35.6371257,5.45963542 35.8269043,5.67089844 C36.0166829,5.88216146 36.1115723,6.20084635 36.1115723,6.62695313 L36.1115723,9.43066406 C36.1115723,9.60970052 36.1688639,9.74934896 36.2834473,9.84960938 C36.3980306,9.94986979 36.5358887,10 36.6970215,10 C36.8581543,10 36.9969076,9.94986979 37.1132812,9.84960938 C37.2296549,9.74934896 37.2878418,9.60970052 37.2878418,9.43066406 L37.2878418,7.06738281 C37.2878418,6.53027344 37.4194336,6.11043294 37.6826172,5.80786133 C37.9458008,5.50528971 38.2850749,5.35400391 38.7004395,5.35400391 C39.0477702,5.35400391 39.3154297,5.45694987 39.503418,5.6628418 C39.6914062,5.86873372 39.7854004,6.17757161 39.7854004,6.58935547 L39.7854004,9.43603516 C39.7854004,9.61507161 39.8426921,9.75382487 39.9572754,9.85229492 C40.0718587,9.95076497 40.2097168,10 40.3708496,10 C40.5319824,10 40.6716309,9.95076497 40.7897949,9.85229492 C40.907959,9.75382487 40.967041,9.61507161 40.967041,9.43603516 L40.967041,6.61083984 C40.967041,5.92333984 40.7844238,5.40950521 40.4191895,5.06933594 C40.0539551,4.72916667 39.5759277,4.55908203 38.9851074,4.55908203 C38.541097,4.55908203 38.1570638,4.65934245 37.8330078,4.85986328 C37.5089518,5.06038411 37.2663574,5.36116536 37.1052246,5.76220703 C36.994222,5.40413411 36.7838542,5.11409505 36.4741211,4.89208984 C36.164388,4.67008464 35.7839355,4.55908203 35.3327637,4.55908203 C34.5378418,4.55908203 33.9685059,4.87955729 33.6247559,5.52050781 L33.6247559,5.14990234 C33.6247559,4.98876953 33.5656738,4.86254883 33.4475098,4.77124023 C33.3293457,4.67993164 33.1914876,4.63427734 33.0339355,4.63427734 C32.8728027,4.63427734 32.7331543,4.68351237 32.6149902,4.78198242 C32.4968262,4.88045247 32.4377441,5.015625 32.4377441,5.1875 L32.4377441,9.43603516 C32.4377441,9.61507161 32.495931,9.75382487 32.6123047,9.85229492 C32.7286784,9.95076497 32.8656413,10 33.0231934,10 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="node-/-02_MaxSim-@1_grey-Copy-2" transform="translate(212.206, 52)" xlink:href="#path-23">
+                    <path stroke="#8E8E8E" d="M51.203,6 L51.203,14 C51.203,17.0375661 48.7405661,19.5 45.703,19.5 L6,19.5 C2.96243388,19.5 0.5,17.0375661 0.5,14 L0.5,6 C0.5,2.96243388 2.96243388,0.5 6,0.5 L45.703,0.500000028 C48.7405661,0.5 51.203,2.96243388 51.203,6 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="MaxSim" transform="translate(4.7361, 3.894)" fill="#8E8E8E">
+                        <path d="M5.43725586,9.75830078 L5.43725586,8.14160156 L3.35327148,3.27001953 C3.24943034,3.02294922 3.09903971,2.83764648 2.90209961,2.71411133 C2.70515951,2.59057617 2.49389648,2.52880859 2.26831055,2.52880859 C1.99617513,2.52880859 1.76074219,2.60758464 1.56201172,2.76513672 C1.36328125,2.9226888 1.26391602,3.125 1.26391602,3.37207031 L1.26391602,9.42529297 C1.26391602,9.56494141 1.32478841,9.67415365 1.4465332,9.75292969 C1.56827799,9.83170573 1.71240234,9.87109375 1.87890625,9.87109375 C2.04541016,9.87109375 2.18953451,9.83170573 2.3112793,9.75292969 C2.43302409,9.67415365 2.49389648,9.56494141 2.49389648,9.42529297 L2.49389648,4.02734375 L4.66918945,9.17822266 C4.74796549,9.36442057 4.85717773,9.50764974 4.99682617,9.60791016 C5.13647461,9.70817057 5.28328451,9.75830078 5.43725586,9.75830078 Z M5.44262695,9.75830078 C5.59659831,9.75830078 5.74430339,9.70727539 5.88574219,9.60522461 C6.02718099,9.50317383 6.13549805,9.36083984 6.21069336,9.17822266 L8.39135742,4.02734375 L8.39135742,9.42529297 C8.39135742,9.56494141 8.45222982,9.67415365 8.57397461,9.75292969 C8.6957194,9.83170573 8.83984375,9.87109375 9.00634766,9.87109375 C9.17285156,9.87109375 9.31697591,9.83170573 9.4387207,9.75292969 C9.56046549,9.67415365 9.62133789,9.56494141 9.62133789,9.42529297 L9.62133789,3.37207031 C9.62133789,3.20735677 9.57389323,3.05965169 9.47900391,2.92895508 C9.38411458,2.79825846 9.26057943,2.69889323 9.10839844,2.63085938 C8.95621745,2.56282552 8.79239909,2.52880859 8.61694336,2.52880859 C8.39135742,2.52880859 8.1800944,2.59057617 7.9831543,2.71411133 C7.78621419,2.83764648 7.6340332,3.02294922 7.52661133,3.27001953 L5.44262695,8.14160156 L5.44262695,9.75830078 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M12.8977051,10.0751953 C13.6245931,10.0751953 14.1437988,9.7672526 14.4553223,9.15136719 L14.4553223,9.44140625 C14.4553223,9.68489583 14.5609538,9.8531901 14.7722168,9.94628906 C14.8545736,9.98209635 14.9405111,10 15.0300293,10 C15.1875814,10 15.3236491,9.95076497 15.4382324,9.85229492 C15.5528158,9.75382487 15.6101074,9.61507161 15.6101074,9.43603516 L15.6101074,6.49267578 C15.6101074,5.83024089 15.416748,5.34236654 15.0300293,5.02905273 C14.6433105,4.71573893 14.1187337,4.55908203 13.4562988,4.55908203 C12.7222493,4.55908203 12.0651855,4.70410156 11.4851074,4.99414062 C11.3526204,5.05859375 11.286377,5.16064453 11.286377,5.30029297 C11.286377,5.4148763 11.3266602,5.51871745 11.4072266,5.61181641 C11.487793,5.70491536 11.5853678,5.75146484 11.6999512,5.75146484 C11.7142741,5.75146484 11.728597,5.75056966 11.7429199,5.7487793 C11.7572428,5.74698893 11.7724609,5.74430339 11.7885742,5.74072266 C11.8046875,5.73714193 11.8199056,5.73177083 11.8342285,5.72460937 C12.0347493,5.64583333 12.1824544,5.58943685 12.2773438,5.55541992 C12.3722331,5.52140299 12.5342611,5.47843424 12.7634277,5.42651367 C12.9925944,5.3745931 13.2074382,5.34863281 13.407959,5.34863281 C14.0882975,5.34863281 14.4284668,5.69954427 14.4284668,6.40136719 L14.4284668,6.87402344 C14.0990397,6.87402344 13.8197428,6.87670898 13.5905762,6.88208008 C13.3614095,6.88745117 13.1268717,6.89998372 12.8869629,6.91967773 C12.647054,6.93937174 12.4474284,6.96622721 12.2880859,7.00024414 C12.1287435,7.03426107 11.9720866,7.07902018 11.8181152,7.13452148 C11.6641439,7.19002279 11.5415039,7.25805664 11.4501953,7.33862305 C11.3588867,7.41918945 11.2765299,7.51676432 11.203125,7.63134766 C11.1297201,7.74593099 11.0777995,7.8766276 11.0473633,8.0234375 C11.0169271,8.1702474 11.001709,8.33854167 11.001709,8.52832031 C11.001709,9.02246094 11.1789551,9.40380859 11.5334473,9.67236328 C11.8879395,9.94091797 12.3426921,10.0751953 12.8977051,10.0751953 Z M12.9997559,9.30712891 C12.7347819,9.30712891 12.5217285,9.23999023 12.3605957,9.10571289 C12.1994629,8.97143555 12.1188965,8.76106771 12.1188965,8.47460938 C12.1188965,8.37076823 12.1269531,8.27856445 12.1430664,8.19799805 C12.1591797,8.11743164 12.1869303,8.04492188 12.2263184,7.98046875 C12.2657064,7.91601563 12.313151,7.86140951 12.3686523,7.81665039 C12.4241536,7.77189128 12.499349,7.73250326 12.5942383,7.69848633 C12.6891276,7.6644694 12.7902832,7.63761393 12.8977051,7.61791992 C13.005127,7.59822591 13.1402995,7.58211263 13.3032227,7.56958008 C13.4661458,7.55704753 13.6326497,7.54899089 13.8027344,7.54541016 C13.972819,7.54182943 14.1796061,7.54003906 14.4230957,7.54003906 L14.4230957,7.6796875 C14.4230957,8.15234375 14.2798665,8.54174805 13.9934082,8.84790039 C13.7069499,9.15405273 13.3757324,9.30712891 12.9997559,9.30712891 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M20.9865723,10 C21.1405436,10 21.2784017,9.94539388 21.4001465,9.83618164 C21.5218913,9.7269694 21.5827637,9.60253906 21.5827637,9.46289062 C21.5827637,9.35188802 21.545166,9.24804688 21.4699707,9.15136719 L19.885498,7.22314453 L21.3571777,5.41845703 C21.4287923,5.32535807 21.4645996,5.23046875 21.4645996,5.13378906 C21.4645996,5.00130208 21.412679,4.88492839 21.3088379,4.78466797 C21.2049967,4.68440755 21.083252,4.63427734 20.9436035,4.63427734 C20.7932129,4.63427734 20.6571452,4.70768229 20.5354004,4.85449219 L19.2409668,6.43896484 L17.9465332,4.87060547 C17.8212077,4.71305339 17.6708171,4.63427734 17.4953613,4.63427734 C17.34139,4.63427734 17.2035319,4.68888346 17.0817871,4.7980957 C16.9600423,4.90730794 16.8991699,5.03531901 16.8991699,5.18212891 C16.8991699,5.28597005 16.9367676,5.3898112 17.0119629,5.49365234 L18.4621582,7.25537109 L16.8615723,9.21044922 C16.7935384,9.29280599 16.7595215,9.38411458 16.7595215,9.484375 C16.7595215,9.62044271 16.8141276,9.74039714 16.9233398,9.84423828 C17.0325521,9.94807943 17.1587728,10 17.302002,10 C17.4523926,10 17.5848796,9.92659505 17.6994629,9.77978516 L19.1066895,8.02880859 L20.5354004,9.76367188 C20.6571452,9.92122396 20.8075358,10 20.9865723,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M25.4929199,10.1181641 C26.0980632,10.1181641 26.6262207,10.0286458 27.0773926,9.84960938 C27.5285645,9.67057292 27.8776855,9.40828451 28.1247559,9.06274414 C28.3718262,8.71720378 28.4953613,8.30810547 28.4953613,7.83544922 C28.4953613,7.59554036 28.4649251,7.37711589 28.4040527,7.18017578 C28.3431803,6.98323568 28.2590332,6.81046549 28.1516113,6.66186523 C28.0441895,6.51326497 27.9054362,6.37809245 27.7353516,6.25634766 C27.5652669,6.13460286 27.3808594,6.02807617 27.1821289,5.93676758 C26.9833984,5.84545898 26.7515462,5.75683594 26.4865723,5.67089844 L24.7893066,5.12841797 C24.5100098,5.03531901 24.3068034,4.9144694 24.1796875,4.76586914 C24.0525716,4.61726888 23.9890137,4.42301432 23.9890137,4.18310547 C23.9890137,3.93603516 24.0677897,3.73372396 24.2253418,3.57617187 C24.3828939,3.41861979 24.5807292,3.30940755 24.8188477,3.24853516 C25.0569661,3.18766276 25.3389486,3.15722656 25.6647949,3.15722656 C26.3164876,3.15722656 26.9431152,3.33089193 27.5446777,3.67822266 C27.634196,3.73193359 27.7219238,3.75878906 27.8078613,3.75878906 C27.943929,3.75878906 28.0603027,3.7014974 28.1569824,3.58691406 C28.2536621,3.47233073 28.302002,3.34521484 28.302002,3.20556641 C28.302002,3.14111328 28.2903646,3.08113607 28.2670898,3.02563477 C28.2438151,2.97013346 28.2071126,2.9226888 28.1569824,2.88330078 C27.8991699,2.6648763 27.5330404,2.47867839 27.0585938,2.32470703 C26.5841471,2.17073568 26.087321,2.09375 25.5681152,2.09375 C24.7338053,2.09375 24.0480957,2.28531901 23.5109863,2.66845703 C22.973877,3.05159505 22.7053223,3.57259115 22.7053223,4.23144531 C22.7053223,4.7578125 22.84139,5.17407227 23.1135254,5.48022461 C23.3856608,5.78637695 23.8153483,6.03792318 24.4025879,6.23486328 L26.1105957,6.80419922 C26.4973145,6.93310547 26.7739258,7.07991536 26.9404297,7.24462891 C27.1069336,7.40934245 27.1901855,7.64746094 27.1901855,7.95898438 C27.1901855,8.32063802 27.035319,8.59366862 26.7255859,8.77807617 C26.4158529,8.96248372 26.0121257,9.0546875 25.5144043,9.0546875 C24.7302246,9.0546875 24.0087077,8.8273112 23.3498535,8.37255859 C23.3283691,8.35823568 23.3050944,8.34749349 23.2800293,8.34033203 C23.2549642,8.33317057 23.2298991,8.32958984 23.204834,8.32958984 C23.1403809,8.32958984 23.0732422,8.35286458 23.003418,8.39941406 C22.9335938,8.44596354 22.8718262,8.50415039 22.8181152,8.57397461 C22.7644043,8.64379883 22.7205404,8.71988932 22.6865234,8.80224609 C22.6525065,8.88460286 22.635498,8.96158854 22.635498,9.03320312 C22.635498,9.11914063 22.6623535,9.18359375 22.7160645,9.2265625 C23.4107259,9.82096354 24.3363444,10.1181641 25.4929199,10.1181641 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M30.3913574,10 C30.5524902,10 30.6930339,9.94718424 30.8129883,9.84155273 C30.9329427,9.73592122 30.9929199,9.59358724 30.9929199,9.41455078 L30.9929199,5.21972656 C30.9929199,5.03710938 30.9347331,4.89388021 30.8183594,4.79003906 C30.7019857,4.68619792 30.5650228,4.63427734 30.4074707,4.63427734 C30.2463379,4.63427734 30.1066895,4.68619792 29.9885254,4.79003906 C29.8703613,4.89388021 29.8112793,5.03710938 29.8112793,5.21972656 L29.8112793,9.41455078 C29.8112793,9.6007487 29.8676758,9.74487305 29.9804688,9.84692383 C30.0932617,9.94897461 30.2302246,10 30.3913574,10 Z M30.3967285,3.51171875 C30.6115723,3.51171875 30.7843424,3.44995117 30.9150391,3.32641602 C31.0457357,3.20288086 31.111084,3.0390625 31.111084,2.83496094 C31.111084,2.6344401 31.0466309,2.47151693 30.9177246,2.34619141 C30.7888184,2.22086589 30.6169434,2.15820312 30.4020996,2.15820312 C30.1872559,2.15820312 30.0144857,2.22086589 29.8837891,2.34619141 C29.7530924,2.47151693 29.6877441,2.6344401 29.6877441,2.83496094 C29.6877441,3.0390625 29.7530924,3.20288086 29.8837891,3.32641602 C30.0144857,3.44995117 30.1854655,3.51171875 30.3967285,3.51171875 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M33.0231934,10 C33.1843262,10 33.3248698,9.95076497 33.4448242,9.85229492 C33.5647786,9.75382487 33.6247559,9.61507161 33.6247559,9.43603516 L33.6247559,6.95996094 C33.6247559,6.46582031 33.7518717,6.07462565 34.0061035,5.78637695 C34.2603353,5.49812826 34.6005046,5.35400391 35.0266113,5.35400391 C35.3703613,5.35400391 35.6371257,5.45963542 35.8269043,5.67089844 C36.0166829,5.88216146 36.1115723,6.20084635 36.1115723,6.62695313 L36.1115723,9.43066406 C36.1115723,9.60970052 36.1688639,9.74934896 36.2834473,9.84960938 C36.3980306,9.94986979 36.5358887,10 36.6970215,10 C36.8581543,10 36.9969076,9.94986979 37.1132812,9.84960938 C37.2296549,9.74934896 37.2878418,9.60970052 37.2878418,9.43066406 L37.2878418,7.06738281 C37.2878418,6.53027344 37.4194336,6.11043294 37.6826172,5.80786133 C37.9458008,5.50528971 38.2850749,5.35400391 38.7004395,5.35400391 C39.0477702,5.35400391 39.3154297,5.45694987 39.503418,5.6628418 C39.6914062,5.86873372 39.7854004,6.17757161 39.7854004,6.58935547 L39.7854004,9.43603516 C39.7854004,9.61507161 39.8426921,9.75382487 39.9572754,9.85229492 C40.0718587,9.95076497 40.2097168,10 40.3708496,10 C40.5319824,10 40.6716309,9.95076497 40.7897949,9.85229492 C40.907959,9.75382487 40.967041,9.61507161 40.967041,9.43603516 L40.967041,6.61083984 C40.967041,5.92333984 40.7844238,5.40950521 40.4191895,5.06933594 C40.0539551,4.72916667 39.5759277,4.55908203 38.9851074,4.55908203 C38.541097,4.55908203 38.1570638,4.65934245 37.8330078,4.85986328 C37.5089518,5.06038411 37.2663574,5.36116536 37.1052246,5.76220703 C36.994222,5.40413411 36.7838542,5.11409505 36.4741211,4.89208984 C36.164388,4.67008464 35.7839355,4.55908203 35.3327637,4.55908203 C34.5378418,4.55908203 33.9685059,4.87955729 33.6247559,5.52050781 L33.6247559,5.14990234 C33.6247559,4.98876953 33.5656738,4.86254883 33.4475098,4.77124023 C33.3293457,4.67993164 33.1914876,4.63427734 33.0339355,4.63427734 C32.8728027,4.63427734 32.7331543,4.68351237 32.6149902,4.78198242 C32.4968262,4.88045247 32.4377441,5.015625 32.4377441,5.1875 L32.4377441,9.43603516 C32.4377441,9.61507161 32.495931,9.75382487 32.6123047,9.85229492 C32.7286784,9.95076497 32.8656413,10 33.0231934,10 Z" id="Path" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+                <g id="Lines-To-Score" transform="translate(56.5, 28.0674)" fill="#8E8E8E" fill-rule="nonzero">
+                    <path d="M91.5000209,4.65483147 L94.0000209,9.65483147 L91.9990209,9.65483147 L92.0000209,23.9326092 C92.0000209,24.2087516 91.7761633,24.4326092 91.5000209,24.4326092 C91.2238785,24.4326092 91.0000209,24.2087516 91.0000209,23.9326092 L90.9990209,9.65483147 L89.0000209,9.65483147 L91.5000209,4.65483147 Z" id="Combined-Shape"></path>
+                    <path d="M63.1650932,1 L68.752352,1.18038384 L65.2556896,5.54196063 L64.4514788,3.79556713 C56.6542293,7.64056739 45.3377025,11.102166 30.493831,14.1883314 C15.2598962,17.3555943 5.35129903,20.7478928 0.80940584,24.3253954 C0.592476046,24.496264 0.27810334,24.458924 0.107234747,24.2419942 C-0.063633845,24.0250644 -0.0262938637,23.7106917 0.19063593,23.5398231 C4.89167063,19.8369699 14.911095,16.4067287 30.2902755,13.209268 C45.0769842,10.1349872 56.3275285,6.69142596 64.0338014,2.88659193 L63.1650932,1 Z" id="Combined-Shape"></path>
+                    <path d="M119.834949,1 L118.966392,2.88666703 C126.672661,6.69147128 137.923155,10.1350074 152.709766,13.209268 C168.088947,16.4067287 178.108371,19.8369699 182.809406,23.5398231 C183.026336,23.7106917 183.063676,24.0250644 182.892807,24.2419942 C182.721938,24.458924 182.407566,24.496264 182.190636,24.3253954 C177.648743,20.7478928 167.740146,17.3555943 152.506211,14.1883314 C137.662383,11.102175 126.345879,7.64058774 118.548632,3.79560102 L117.744352,5.54196063 L114.24769,1.18038384 L119.834949,1 Z" id="Combined-Shape"></path>
+                </g>
+                <g id="node-/-01_top" transform="translate(123.0088, 12)" xlink:href="#path-24">
+                    <path stroke="#8E8E8E" d="M39.605,0.5 C45.0599165,0.5 49.482,4.92208353 49.482,10.377 C49.482,15.8319165 45.0599165,20.254 39.605,20.254 L10.377,20.254 C4.92208353,20.254 0.5,15.8319165 0.5,10.377 C0.5,5.29810944 4.35169591,1.06030081 9.36652997,0.551016719 L10.4022157,0.499363801 Z" stroke-linejoin="square" fill="#F2F2F2"></path>
+                    <g id="Score" transform="translate(8.8564, 4.625)" fill="#8E8E8E">
+                        <path d="M4.26074219,10.1289062 C4.92089844,10.1289062 5.49707031,10.03125 5.98925781,9.8359375 C6.48144531,9.640625 6.86230469,9.35449219 7.13183594,8.97753906 C7.40136719,8.60058594 7.53613281,8.15429688 7.53613281,7.63867188 C7.53613281,7.37695312 7.50292969,7.13867188 7.43652344,6.92382813 C7.37011719,6.70898438 7.27832031,6.52050781 7.16113281,6.35839844 C7.04394531,6.19628906 6.89257813,6.04882813 6.70703125,5.91601563 C6.52148438,5.78320312 6.3203125,5.66699219 6.10351562,5.56738281 C5.88671875,5.46777344 5.63378906,5.37109375 5.34472656,5.27734375 L3.49316406,4.68554688 C3.18847656,4.58398437 2.96679687,4.45214844 2.828125,4.29003906 C2.68945313,4.12792969 2.62011719,3.91601562 2.62011719,3.65429687 C2.62011719,3.38476562 2.70605469,3.1640625 2.87792969,2.9921875 C3.04980469,2.8203125 3.265625,2.70117187 3.52539063,2.63476562 C3.78515625,2.56835938 4.09277344,2.53515625 4.44824219,2.53515625 C5.15917969,2.53515625 5.84277344,2.72460937 6.49902344,3.10351563 C6.59667969,3.16210938 6.69238281,3.19140625 6.78613281,3.19140625 C6.93457031,3.19140625 7.06152344,3.12890625 7.16699219,3.00390625 C7.27246094,2.87890625 7.32519531,2.74023437 7.32519531,2.58789063 C7.32519531,2.51757813 7.3125,2.45214844 7.28710938,2.39160156 C7.26171875,2.33105469 7.22167969,2.27929688 7.16699219,2.23632813 C6.88574219,1.99804688 6.48632812,1.79492187 5.96875,1.62695312 C5.45117188,1.45898438 4.90917969,1.375 4.34277344,1.375 C3.43261719,1.375 2.68457031,1.58398438 2.09863281,2.00195312 C1.51269531,2.41992187 1.21972656,2.98828125 1.21972656,3.70703125 C1.21972656,4.28125 1.36816406,4.73535156 1.66503906,5.06933594 C1.96191406,5.40332031 2.43066406,5.67773438 3.07128906,5.89257813 L4.93457031,6.51367188 C5.35644531,6.65429688 5.65820312,6.81445313 5.83984375,6.99414062 C6.02148438,7.17382813 6.11230469,7.43359375 6.11230469,7.7734375 C6.11230469,8.16796875 5.94335938,8.46582031 5.60546875,8.66699219 C5.26757813,8.86816406 4.82714844,8.96875 4.28417969,8.96875 C3.42871094,8.96875 2.64160156,8.72070313 1.92285156,8.22460938 C1.89941406,8.20898438 1.87402344,8.19726563 1.84667969,8.18945313 C1.81933594,8.18164063 1.79199219,8.17773438 1.76464844,8.17773438 C1.69433594,8.17773438 1.62109375,8.203125 1.54492188,8.25390625 C1.46875,8.3046875 1.40136719,8.36816406 1.34277344,8.44433594 C1.28417969,8.52050781 1.23632812,8.60351562 1.19921875,8.69335937 C1.16210938,8.78320313 1.14355469,8.8671875 1.14355469,8.9453125 C1.14355469,9.0390625 1.17285156,9.109375 1.23144531,9.15625 C1.98925781,9.8046875 2.99902344,10.1289062 4.26074219,10.1289062 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M11.4677734,10.0820312 C12.2607422,10.0820312 12.8583984,9.90820312 13.2607422,9.56054688 C13.4013672,9.43945312 13.4716797,9.30664063 13.4716797,9.16210938 C13.4716797,9.12695313 13.4677734,9.09277344 13.4599609,9.05957031 C13.4521484,9.02636719 13.4394531,8.99511719 13.421875,8.96582031 C13.4042969,8.93652344 13.3837891,8.91015625 13.3603516,8.88671875 C13.3369141,8.86328125 13.3115234,8.84375 13.2841797,8.828125 C13.2568359,8.8125 13.2265625,8.79980469 13.1933594,8.79003906 C13.1601563,8.78027344 13.1259766,8.77539062 13.0908203,8.77539062 C13.0439453,8.77539062 12.9960938,8.78320312 12.9472656,8.79882812 C12.8984375,8.81445312 12.8505859,8.83984375 12.8037109,8.875 C12.4951172,9.09765625 12.0830078,9.20898438 11.5673828,9.20898438 C11.0400391,9.20898438 10.6269531,9.01171875 10.328125,8.6171875 C10.0292969,8.22265625 9.87988281,7.70507813 9.87988281,7.06445312 C9.87988281,6.42773437 10.0341797,5.91210938 10.3427734,5.51757812 C10.6513672,5.12304687 11.0712891,4.92578125 11.6025391,4.92578125 C11.9814453,4.92578125 12.3427734,5.01953125 12.6865234,5.20703125 C12.7294922,5.23046875 12.7734375,5.24804687 12.8183594,5.25976562 C12.8632813,5.27148438 12.9072266,5.27734375 12.9501953,5.27734375 C12.9970703,5.27734375 13.0419922,5.27148438 13.0849609,5.25976562 C13.1279297,5.24804687 13.1660156,5.23046875 13.1992188,5.20703125 C13.2324219,5.18359375 13.2617188,5.15625 13.2871094,5.125 C13.3125,5.09375 13.3320313,5.05957031 13.3457031,5.02246094 C13.359375,4.98535156 13.3662109,4.9453125 13.3662109,4.90234375 C13.3662109,4.859375 13.3574219,4.81640625 13.3398438,4.7734375 C13.3222656,4.73046875 13.2949219,4.68652344 13.2578125,4.64160156 C13.2207031,4.59667969 13.1748047,4.5546875 13.1201172,4.515625 C12.6904297,4.21484375 12.1494141,4.06445312 11.4970703,4.06445312 C11.0673828,4.06445312 10.6699219,4.14453125 10.3046875,4.3046875 C9.93945312,4.46484375 9.6328125,4.68164063 9.38476562,4.95507813 C9.13671875,5.22851562 8.94335938,5.54785156 8.8046875,5.91308594 C8.66601562,6.27832031 8.59667969,6.6640625 8.59667969,7.0703125 C8.59667969,7.625 8.7109375,8.12890625 8.93945312,8.58203125 C9.16796875,9.03515625 9.50292969,9.3984375 9.94433594,9.671875 C10.3857422,9.9453125 10.8935547,10.0820312 11.4677734,10.0820312 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M17.2509766,9.20898438 C16.7080078,9.20898438 16.2832031,9.02148438 15.9765625,8.64648438 C15.6699219,8.27148438 15.5166016,7.75 15.5166016,7.08203125 C15.5166016,6.40625 15.6699219,5.87890625 15.9765625,5.5 C16.2832031,5.12109375 16.7080078,4.93164062 17.2509766,4.93164062 C17.7939453,4.93164062 18.21875,5.12207031 18.5253906,5.50292969 C18.8320312,5.88378906 18.9853516,6.41015625 18.9853516,7.08203125 C18.9853516,7.75 18.8330078,8.27148438 18.5283203,8.64648438 C18.2236328,9.02148438 17.7978516,9.20898438 17.2509766,9.20898438 Z M17.2509766,10.0820312 C17.6494141,10.0820312 18.0146484,10.0263672 18.3466797,9.91503906 C18.6787109,9.80371094 18.9609375,9.65332031 19.1933594,9.46386719 C19.4257812,9.27441406 19.6230469,9.05078125 19.7851562,8.79296875 C19.9472656,8.53515625 20.0664062,8.26269531 20.1425781,7.97558594 C20.21875,7.68847656 20.2568359,7.390625 20.2568359,7.08203125 C20.2568359,6.67578125 20.1923828,6.29296875 20.0634766,5.93359375 C19.9345703,5.57421875 19.7480469,5.25390625 19.5039062,4.97265625 C19.2597656,4.69140625 18.9443359,4.46972656 18.5576172,4.30761719 C18.1708984,4.14550781 17.7353516,4.06445312 17.2509766,4.06445312 C16.7626953,4.06445312 16.3232422,4.14746094 15.9326172,4.31347656 C15.5419922,4.47949219 15.2265625,4.70410156 14.9863281,4.98730469 C14.7460938,5.27050781 14.5625,5.59082031 14.4355469,5.94824219 C14.3085938,6.30566406 14.2451172,6.68359375 14.2451172,7.08203125 C14.2451172,7.47265625 14.3076172,7.84472656 14.4326172,8.19824219 C14.5576172,8.55175781 14.7402344,8.87011719 14.9804688,9.15332031 C15.2207031,9.43652344 15.5361328,9.66210938 15.9267578,9.83007812 C16.3173828,9.99804687 16.7587891,10.0820312 17.2509766,10.0820312 Z" id="Shape" fill-rule="nonzero"></path>
+                        <path d="M22.1787109,10 C22.3544922,10 22.5078125,9.94628906 22.6386719,9.83886719 C22.7695312,9.73144531 22.8349609,9.58203125 22.8349609,9.390625 L22.8349609,6.66015625 C22.8349609,6.37890625 22.8779297,6.140625 22.9638672,5.9453125 C23.0498047,5.75 23.1728516,5.60058594 23.3330078,5.49707031 C23.4931641,5.39355469 23.6679688,5.3203125 23.8574219,5.27734375 C24.046875,5.234375 24.2646484,5.21289062 24.5107422,5.21289062 C24.6396484,5.21289062 24.7412109,5.16210937 24.8154297,5.06054688 C24.8896484,4.95898438 24.9267578,4.83789062 24.9267578,4.69726562 C24.9267578,4.62695312 24.9169922,4.55859375 24.8974609,4.4921875 C24.8779297,4.42578125 24.8486328,4.36621094 24.8095703,4.31347656 C24.7705078,4.26074219 24.7197266,4.21875 24.6572266,4.1875 C24.5947266,4.15625 24.5244141,4.140625 24.4462891,4.140625 C24.0673828,4.140625 23.7265625,4.26367188 23.4238281,4.50976562 C23.1210938,4.75585938 22.9189453,5.06054688 22.8173828,5.42382812 L22.8349609,4.69726562 C22.8388672,4.52539062 22.7783203,4.390625 22.6533203,4.29296875 C22.5283203,4.1953125 22.3779297,4.14648438 22.2021484,4.14648438 C22.0888672,4.14648438 21.9833984,4.16699219 21.8857422,4.20800781 C21.7880859,4.24902344 21.7070312,4.31542969 21.6425781,4.40722656 C21.578125,4.49902344 21.5458984,4.609375 21.5458984,4.73828125 L21.5458984,9.390625 C21.5458984,9.5859375 21.6083984,9.73632812 21.7333984,9.84179688 C21.8583984,9.94726562 22.0068359,10 22.1787109,10 Z" id="Path" fill-rule="nonzero"></path>
+                        <path d="M28.5830078,10.0820312 C29.4345703,10.0820312 30.1533203,9.82421875 30.7392578,9.30859375 C30.8681641,9.203125 30.9326172,9.07617188 30.9326172,8.92773437 C30.9326172,8.81054688 30.8925781,8.70703125 30.8125,8.6171875 C30.7324219,8.52734375 30.6376953,8.48242188 30.5283203,8.48242188 C30.4541016,8.48242188 30.3798828,8.5078125 30.3056641,8.55859375 C29.7080078,8.98046875 29.1650391,9.19140625 28.6767578,9.19140625 C28.1494141,9.1875 27.7285156,9.03808594 27.4140625,8.74316406 C27.0996094,8.44824219 26.9287109,7.99023438 26.9013672,7.36914062 L30.7451172,7.36914062 C30.8779297,7.36914062 30.9746094,7.32324219 31.0351562,7.23144531 C31.0957031,7.13964844 31.1259766,7.0234375 31.1259766,6.8828125 C31.1181641,6.49609375 31.0595703,6.13769531 30.9501953,5.80761719 C30.8408203,5.47753906 30.6816406,5.18066406 30.4726562,4.91699219 C30.2636719,4.65332031 29.9863281,4.4453125 29.640625,4.29296875 C29.2949219,4.140625 28.8974609,4.06445312 28.4482422,4.06445312 C28.0068359,4.06445312 27.6044922,4.14453125 27.2412109,4.3046875 C26.8779297,4.46484375 26.5791016,4.68164063 26.3447266,4.95507813 C26.1103516,5.22851562 25.9296875,5.54199219 25.8027344,5.89550781 C25.6757812,6.24902344 25.6123047,6.62304687 25.6123047,7.01757812 C25.6123047,7.96289062 25.8847656,8.70800781 26.4296875,9.25292969 C26.9746094,9.79785156 27.6923828,10.0742188 28.5830078,10.0820312 Z M26.9130859,6.59570312 C26.9326172,6.38476562 26.9785156,6.18359375 27.0507812,5.9921875 C27.1230469,5.80078125 27.2216797,5.625 27.3466797,5.46484375 C27.4716797,5.3046875 27.6337891,5.17578125 27.8330078,5.078125 C28.0322266,4.98046875 28.2548828,4.93164062 28.5009766,4.93164062 C28.9775391,4.93164062 29.3408203,5.08984375 29.5908203,5.40625 C29.8408203,5.72265625 29.9833984,6.11914062 30.0185547,6.59570312 L26.9130859,6.59570312 Z" id="Shape" fill-rule="nonzero"></path>
+                    </g>
+                </g>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/docs/assets/training/layerwise.png b/docs/assets/training/layerwise.png
new file mode 100644
index 000000000000..bc1e4f24d4a8
Binary files /dev/null and b/docs/assets/training/layerwise.png differ
diff --git a/docs/assets/training/layerwise_bad_loading.png b/docs/assets/training/layerwise_bad_loading.png
new file mode 100644
index 000000000000..c93de6c61ff1
Binary files /dev/null and b/docs/assets/training/layerwise_bad_loading.png differ
diff --git a/docs/assets/training/layerwise_good_loading.png b/docs/assets/training/layerwise_good_loading.png
new file mode 100644
index 000000000000..0d7c7a7a56e0
Binary files /dev/null and b/docs/assets/training/layerwise_good_loading.png differ
diff --git a/docs/benchmarking/cli.md b/docs/benchmarking/cli.md
index f78ae8a95366..ea56b00ba26b 100644
--- a/docs/benchmarking/cli.md
+++ b/docs/benchmarking/cli.md
@@ -34,11 +34,15 @@ th {
 | HuggingFace-AIMO | ✅ | ✅ | `AI-MO/aimo-validation-aime`, `AI-MO/NuminaMath-1.5`, `AI-MO/NuminaMath-CoT` |
 | HuggingFace-Other | ✅ | ✅ | `lmms-lab/LLaVA-OneVision-Data`, `Aeala/ShareGPT_Vicuna_unfiltered` |
 | HuggingFace-MTBench | ✅ | ✅ | `philschmid/mt-bench` |
+| HuggingFace-HumanEval | ✅ | ✅ | `openai/openai_humaneval` |
+| HuggingFace-GSM8K | ✅ | ✅ | `openai/gsm8k` |
 | HuggingFace-Blazedit | ✅ | ✅ | `vdaita/edit_5k_char`, `vdaita/edit_10k_char` |
 | HuggingFace-ASR | ✅ | ✅ | `openslr/librispeech_asr`, `facebook/voxpopuli`,  `LIUM/tedlium`, `edinburghcstr/ami`,        `speechcolab/gigaspeech`,        `kensho/spgispeech` |
 | Spec Bench | ✅ | ✅ | `wget https://raw.githubusercontent.com/hemingkx/Spec-Bench/refs/heads/main/data/spec_bench/question.jsonl` |
+| SPEED-Bench | ✅ | ✅ | `curl -LsSf https://raw.githubusercontent.com/NVIDIA-NeMo/Skills/refs/heads/main/nemo_skills/dataset/speed-bench/prepare.py \| python3 -` |
 | Custom | ✅ | ✅ | Local file: `data.jsonl` |
-| Custom MM | ✅ | ✅ | Local file: `mm_data.jsonl` |
+| Custom Audio | ✅ | ✅ | Local file: `audio_data.jsonl` |
+| Custom Image | ✅ | ✅ | Local file: `image_data.jsonl` |
 
 Legend:
 
@@ -107,9 +111,41 @@ P99 ITL (ms):                            8.39
 ==================================================
 ```
 
+#### Results Visualization
+
+The `--plot-timeline` and `--plot-dataset-stats` can be used to generate respectively the requests completion timeline and dataset prompt and output tokens statistics, which can be useful for debugging purpose or for deeper analysis.
+
+```bash
+vllm bench serve \
+    --backend vllm \
+    --model meta-llama/Llama-3.1-8B-Instruct \
+    --endpoint /v1/completions \
+    --dataset-name sharegpt \
+    --dataset-path <your data path>/ShareGPT_V3_unfiltered_cleaned_split.json \
+    --num-prompts 100 \
+    --plot-timeline \
+    --timeline-itl-thresholds 2,5 \
+    --plot-dataset-stats \
+    --save-result
+```
+
+##### Interactive Timeline
+
+The generated timeline is an interactive visualization in the form of an HTML file that can be rendered in most browsers. To customize the ITL color thresholds, one can use `--timeline-itl-thresholds` flag (default: 25ms, 50ms)
+
+Example output:
+
+<iframe src="../assets/contributing/vllm_bench_serve_timeline.html" width="100%" height="600" frameborder="0"></iframe>
+
+##### Dataset statistics
+
+The generated figure shows the input prompt and output tokens distribution.
+
+Example output: ![Dataset Statistics](../assets/contributing/vllm_bench_serve_dataset_stats.png)
+
 #### Custom Dataset
 
-If the dataset you want to benchmark is not supported yet in vLLM, even then you can benchmark on it using `CustomDataset`. Your data needs to be in `.jsonl` format and needs to have "prompt" field per entry, e.g., data.jsonl
+If the dataset you want to benchmark is not supported yet in vLLM, even then you can benchmark on it using `CustomDataset`. At inference time, use the option `--dataset-name custom`. Your data needs to be in the `.jsonl` format and needs to have "prompt" field per entry, e.g., data.jsonl
 
 ```json
 {"prompt": "What is the capital of India?"}
@@ -140,9 +176,62 @@ vllm bench serve --port 9001 --save-result --save-detailed \
 
 You can skip applying chat template if your data already has it by using `--custom-skip-chat-template`.
 
-#### Custom multimodal dataset
+#### Custom Audio Dataset
 
-If the multimodal dataset you want to benchmark is not supported yet in vLLM, then you can benchmark on it using `CustomMMDataset`. Your data needs to be in `.jsonl` format and needs to have "prompt" and "image_files" field per entry, e.g., `mm_data.jsonl`:
+If the audio dataset you want to benchmark is not supported yet in vLLM, then you can benchmark on it using `CustomAudioDataset`. At inference time, use the option `--dataset-name custom_audio`. Your data needs to be in the `.jsonl` format and needs to have "prompt" and "audio" fields per entry, e.g., `audio_data.jsonl`:
+
+```json
+{"prompt": "What does this audio say?", "audio": "/path/to/audio_1.wav"}
+{"prompt": "Transcribe the audio.", "audio": "/path/to/audio_2.wav"}
+```
+
+- **Supported models:** The `CustomAudioDataset` class supports two types of audio models: ASR models (e.g. Whisper) which do not require a "prompt" field; and multimodal audio-text chat models (e.g. Qwen2-Audio). Since these model types require different arguments at inference, we are giving two examples.
+
+- **Example 1: Whisper**
+
+Whisper is a dedicated ASR encoder-decoder model, so it uses `--backend openai-audio` and `--endpoint /v1/audio/transcriptions`.
+
+```bash
+# start server
+vllm serve openai/whisper-tiny
+```
+
+```bash
+vllm bench serve \
+  --model openai/whisper-tiny \
+  --backend openai-audio \
+  --endpoint /v1/audio/transcriptions \
+  --dataset-name custom_audio \
+  --dataset-path audio_data.jsonl \
+  --no-oversample \
+  --custom-output-len 256 \
+  --save-result \
+  --save-detailed \
+  --result-filename whisper_bench.json
+```
+
+- **Example 2: Qwen2-Audio**
+
+Qwen2-Audio is a multimodal chat model that can do ASR and speech analysis, so it uses `--backend openai-chat`, and `--endpoint /v1/chat/completions`. It also requires `--enable-multimodal-chat` to enable multimodal chat transformation.
+
+```bash
+vllm bench serve \
+  --model Qwen/Qwen2-Audio-7B-Instruct \
+  --backend openai-chat \
+  --endpoint /v1/chat/completions \
+  --dataset-name custom_audio \
+  --dataset-path audio_data.jsonl \
+  --no-oversample \
+  --custom-output-len 256 \
+  --enable-multimodal-chat \
+  --save-result \
+  --save-detailed \
+  --result-filename qwen_bench.json
+```
+
+#### Custom Image Dataset
+
+If the image dataset you want to benchmark is not supported yet in vLLM, then you can benchmark on it using `CustomImageDataset`. At inference time, use the option `--dataset-name custom_image`. Your data needs to be in the `.jsonl` format and needs to have "prompt" and "image_files" fields per entry, e.g., `image_data.jsonl`:
 
 ```json
 {"prompt": "How many animals are present in the given image?", "image_files": ["/path/to/image/folder/horsepony.jpg"]}
@@ -160,8 +249,8 @@ vllm bench serve--save-result --save-detailed \
   --backend openai-chat \
   --model Qwen/Qwen2-VL-7B-Instruct \
   --endpoint /v1/chat/completions \
-  --dataset-name custom_mm \
-  --dataset-path <path-to-your-mm-data-jsonl> \
+  --dataset-name custom_image \
+  --dataset-path <path-to-your-image-data-jsonl> \
   --allowed-local-media-path /path/to/image/folder
 ```
 
@@ -239,6 +328,69 @@ vllm bench serve \
     --spec-bench-category "summarization"
 ```
 
+#### SPEED-Bench Benchmark with Speculative Decoding
+
+[SPEED-Bench](https://huggingface.co/datasets/nvidia/SPEED-Bench) is a unified and diverse dataset for speculative decoding, supporting acceptance rate and length measurements using the Qualitative split and throughput measurements using the Throughput splits in 5 configuration of input sequence length (1k, 2k, 8k, 16k, 32k).
+
+!!! note
+    This dataset is governed by the [NVIDIA Evaluation Dataset License Agreement](https://huggingface.co/datasets/nvidia/SPEED-Bench/blob/main/License.pdf). For each dataset a user elects to use, the user is responsible for checking if the dataset license is fit for the intended purpose. The `prepare.py` script automatically fetches data from all the source datasets.
+
+First, download the dataset to a folder, using this one liner:
+
+```bash
+curl -LsSf https://raw.githubusercontent.com/NVIDIA-NeMo/Skills/refs/heads/main/nemo_skills/dataset/speed-bench/prepare.py \| python3 -
+```
+
+The command supports also the following arguments:
+
+- `--config`: download only a subset of the dataset: `qualitative`, `throughput_1k`, `throughput_2k`, `throughput_8k`, `throughput_16k` and `throughput_32k`. By default, it will download all subsets.
+- `--output_dir`: download to a specified folder. By default, it will download to the current directory.
+
+Start a server with speculative decoding:
+
+```bash
+vllm serve meta-llama/Llama-3.3-70B-Instruct \
+    --speculative-config $'{"method": "eagle3",
+    "num_speculative_tokens": 3,
+    "model": "nvidia/Llama-3.3-70B-Instruct-Eagle3"}'
+```
+
+Run all categories in the Qualitative split:
+
+```bash
+vllm bench serve \
+    --model meta-llama/Llama-3.3-70B-Instruct \
+    --dataset-name speed_bench \
+    --dataset-path "<YOUR_DOWNLOADED_PATH>/data/speed_bench" \
+    --num-prompts -1
+```
+
+Available categories include `[writing, roleplay, reasoning, math, coding, stem, humanities, multilingual, summarization, qa, rag]`.
+
+Run only a specific category like "multilingual":
+
+```bash
+vllm bench serve \
+    --model meta-llama/Llama-3.3-70B-Instruct \
+    --dataset-name speed_bench \
+    --dataset-path "<YOUR_DOWNLOADED_PATH>/data/speed_bench" \
+    --num-prompts -1
+    --speed-bench-category "multilingual"
+```
+
+Run all categories in the Throughput split (2k ISL):
+
+```bash
+vllm bench serve \
+    --model meta-llama/Llama-3.3-70B-Instruct \
+    --dataset-name speed_bench \
+    --speed-bench-dataset-subset throughput_2k
+    --dataset-path "<YOUR_DOWNLOADED_PATH>/data/speed_bench/" \
+    --num-prompts -1
+```
+
+Available categories include `[high_entropy, mixed, low_entropy]`, where high entropy data contains unstructued data such as creative writing while low entropy data contains more structured data such as coding, more details are in the dataset card.
+
 #### Other HuggingFaceDataset Examples
 
 ```bash
@@ -293,6 +445,26 @@ vllm bench serve \
     --num-prompts 80
 ```
 
+`openai/openai_humaneval`:
+
+``` bash
+vllm bench serve \
+    --model NousResearch/Hermes-3-Llama-3.1-8B \
+    --dataset-name hf \
+    --dataset-path openai/openai_humaneval \
+    --num-prompts 80
+```
+
+`openai/gsm8k`:
+
+``` bash
+vllm bench serve \
+    --model NousResearch/Hermes-3-Llama-3.1-8B \
+    --dataset-name hf \
+    --dataset-path openai/gsm8k \
+    --num-prompts 80
+```
+
 `vdaita/edit_5k_char` or `vdaita/edit_10k_char`:
 
 ``` bash
diff --git a/docs/cli/.nav.yml b/docs/cli/.nav.yml
index d2d2905703ec..586685c5a10a 100644
--- a/docs/cli/.nav.yml
+++ b/docs/cli/.nav.yml
@@ -6,3 +6,5 @@ nav:
   - run-batch.md
   - vllm bench:
     - bench/**/*.md
+  - vllm launch:
+    - launch/**/*.md
diff --git a/docs/cli/README.md b/docs/cli/README.md
index c708eb795898..08e986a74630 100644
--- a/docs/cli/README.md
+++ b/docs/cli/README.md
@@ -9,7 +9,7 @@ vllm --help
 Available Commands:
 
 ```bash
-vllm {chat,complete,serve,bench,collect-env,run-batch}
+vllm {chat,complete,serve,launch,bench,collect-env,run-batch}
 ```
 
 ## serve
@@ -37,24 +37,36 @@ vllm serve meta-llama/Llama-2-7b-hf --uds /tmp/vllm.sock
 Check with --help for more options:
 
 ```bash
-# To list all groups
-vllm serve --help=listgroup
+# To list all flags
+vllm serve --help=all
 
-# To view a argument group
+# To view an argument group
 vllm serve --help=ModelConfig
 
 # To view a single argument
 vllm serve --help=max-num-seqs
 
-# To search by keyword
+# To search by keyword or flag name
 vllm serve --help=max
-
-# To view full help with pager (less/more)
-vllm serve --help=page
 ```
 
 See [vllm serve](./serve.md) for the full reference of all available arguments.
 
+## launch
+
+Launch individual vLLM components.
+
+```bash
+# Launch the rendering server component
+vllm launch render meta-llama/Llama-3.2-1B-Instruct
+
+# Inspect all available flags for the render component
+vllm launch render --help=all
+```
+
+See [vllm launch render](./launch/render.md) for the current launch
+component reference.
+
 ## chat
 
 Generate chat completions via the running API server.
@@ -163,7 +175,7 @@ Running with a local file:
 
 ```bash
 vllm run-batch \
-    -i offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
@@ -172,7 +184,7 @@ Using remote file:
 
 ```bash
 vllm run-batch \
-    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
diff --git a/docs/cli/launch/render.md b/docs/cli/launch/render.md
new file mode 100644
index 000000000000..4d15e5f1162d
--- /dev/null
+++ b/docs/cli/launch/render.md
@@ -0,0 +1,22 @@
+# vllm launch render
+
+## Overview
+
+`vllm launch render` starts a GPU-less rendering server for preprocessing and
+postprocessing only.
+
+```bash
+vllm launch render meta-llama/Llama-3.2-1B-Instruct --port 8100
+```
+
+This command reuses the standard serving parser, so model, frontend,
+networking, and related CLI options follow the same conventions as
+[`vllm serve`](../serve.md).
+
+## JSON CLI Arguments
+
+--8<-- "docs/cli/json_tip.inc.md"
+
+## Arguments
+
+--8<-- "docs/generated/argparse/launch_render.inc.md"
diff --git a/docs/configuration/README.md b/docs/configuration/README.md
index 85ae642ba6dd..9a1c3c9c0f63 100644
--- a/docs/configuration/README.md
+++ b/docs/configuration/README.md
@@ -4,6 +4,6 @@ This section lists the most common options for running vLLM.
 
 There are three main levels of configuration, from highest priority to lowest priority:
 
-- [Request parameters](../serving/openai_compatible_server.md#completions-api) and [input arguments](../api/README.md#inference-parameters)
+- [Request parameters](../serving/online_serving/openai_compatible_server.md#completions-api) and [input arguments](../api/README.md#inference-parameters)
 - [Engine arguments](./engine_args.md)
 - [Environment variables](./env_vars.md)
diff --git a/docs/configuration/conserving_memory.md b/docs/configuration/conserving_memory.md
index 8ea241c582e5..2c098118dbb1 100644
--- a/docs/configuration/conserving_memory.md
+++ b/docs/configuration/conserving_memory.md
@@ -23,7 +23,7 @@ llm = LLM(model="ibm-granite/granite-3.1-8b-instruct", tensor_parallel_size=2)
 !!! note
     With tensor parallelism enabled, each process will read the whole model and split it into chunks, which makes the disk reading time even longer (proportional to the size of tensor parallelism).
 
-    You can convert the model checkpoint to a sharded checkpoint using [examples/offline_inference/save_sharded_state.py](../../examples/offline_inference/save_sharded_state.py). The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism.
+    You can convert the model checkpoint to a sharded checkpoint using [examples/features/sharded_state/load_sharded_state_offline.py](../../examples/features/sharded_state/load_sharded_state_offline.py). The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism.
 
 ## Quantization
 
diff --git a/docs/configuration/engine_args.md b/docs/configuration/engine_args.md
index 14589478821f..b619cbf3db02 100644
--- a/docs/configuration/engine_args.md
+++ b/docs/configuration/engine_args.md
@@ -7,7 +7,7 @@ toc_depth: 3
 Engine arguments control the behavior of the vLLM engine.
 
 - For [offline inference](../serving/offline_inference.md), they are part of the arguments to [LLM][vllm.LLM] class.
-- For [online serving](../serving/openai_compatible_server.md), they are part of the arguments to `vllm serve`.
+- For [online serving](../serving/online_serving/README.md), they are part of the arguments to `vllm serve`.
 
 The engine argument classes, [EngineArgs][vllm.engine.arg_utils.EngineArgs] and [AsyncEngineArgs][vllm.engine.arg_utils.AsyncEngineArgs], are a combination of the configuration classes defined in [vllm.config][]. Therefore, if you are interested in developer documentation, we recommend looking at these configuration classes as they are the source of truth for types, defaults and docstrings.
 
diff --git a/docs/configuration/optimization.md b/docs/configuration/optimization.md
index 56329a6edcc5..eb6bdce37b99 100644
--- a/docs/configuration/optimization.md
+++ b/docs/configuration/optimization.md
@@ -140,6 +140,80 @@ Data parallelism replicates the entire model across multiple GPU sets and proces
 Data parallelism can be combined with the other parallelism strategies and is set by `data_parallel_size=N`.
 Note that MoE layers will be sharded according to the product of the tensor parallel size and data parallel size.
 
+### NUMA Binding for Multi-Socket GPU Nodes
+
+On multi-socket GPU servers, GPU worker processes can lose performance if their
+CPU execution and memory allocation drift away from the NUMA node nearest to the
+GPU. vLLM can pin each worker with `numactl` before the Python subprocess starts,
+so the interpreter, imports, and early allocator state are created with the
+desired NUMA policy from the beginning.
+
+Use `--numa-bind` to enable the feature. By default, vLLM auto-detects the
+GPU-to-NUMA mapping and uses `--cpunodebind=<node> --membind=<node>` for each
+worker. When you need a custom CPU policy, add `--numa-bind-cpus` and vLLM will
+switch to `--physcpubind=<cpu-list> --membind=<node>`.
+
+These `--numa-bind*` options only apply to GPU execution processes. They do not
+configure the CPU backend's separate thread-affinity controls. Automatic
+GPU-to-NUMA detection is currently implemented for CUDA/NVML-based as well as
+ROCM-based platforms; other GPU backends must provide explicit binding lists if
+they use these options.
+
+`--numa-bind-nodes` takes one non-negative NUMA node index per visible GPU, in
+the same order as the GPU indices.
+`--numa-bind-cpus` takes one `numactl` CPU list per visible GPU, in the same
+order as the GPU indices. Each CPU list must use
+`numactl --physcpubind` syntax such as `0-3`, `0,2,4-7`, or `16-31,48-63`.
+
+```bash
+# Auto-detect NUMA nodes for visible GPUs
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+  --tensor-parallel-size 4 \
+  --numa-bind
+
+# Explicit NUMA-node mapping
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+  --tensor-parallel-size 4 \
+  --numa-bind \
+  --numa-bind-nodes 0 0 1 1
+
+# Explicit CPU pinning, useful for PCT or other high-frequency core layouts
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+  --tensor-parallel-size 4 \
+  --numa-bind \
+  --numa-bind-nodes 0 0 1 1 \
+  --numa-bind-cpus 0-3 4-7 48-51 52-55
+```
+
+Notes:
+
+- CLI usage forces multiprocessing to use the `spawn` method automatically. If you enable NUMA binding through the Python API, also set `VLLM_WORKER_MULTIPROC_METHOD=spawn`.
+- Automatic detection relies on NVML and NUMA support from the host. If it cannot determine the mapping reliably, pass `--numa-bind-nodes` explicitly.
+- Explicit `--numa-bind-nodes` and `--numa-bind-cpus` values must be valid `numactl` inputs. vLLM does a small amount of validation, but the effective binding semantics are still determined by `numactl`.
+- The current implementation binds GPU execution processes such as `EngineCore` and multiprocessing workers. It does not apply NUMA binding to frontend API server processes or the DP coordinator.
+- In containerized environments, NUMA policy syscalls may require extra permissions, such as `--cap-add SYS_NICE` when running via `docker run`.
+
+### CPU Backend Thread Affinity
+
+The CPU backend uses a different mechanism from `--numa-bind`. CPU execution is
+configured through CPU-specific environment variables such as
+`VLLM_CPU_OMP_THREADS_BIND`, `VLLM_CPU_NUM_OF_RESERVED_CPU`, and
+`CPU_VISIBLE_MEMORY_NODES`, rather than the GPU-oriented `--numa-bind*` CLI
+options.
+
+By default, `VLLM_CPU_OMP_THREADS_BIND=auto` derives OpenMP placement from the
+available CPU and NUMA topology for each CPU worker. To override the automatic
+policy, set `VLLM_CPU_OMP_THREADS_BIND` explicitly using the CPU list format
+documented for the CPU backend, or use `nobind` to disable this behavior.
+
+For the current CPU backend setup and tuning guidance, see:
+
+- [Related runtime environment variables](../getting_started/installation/cpu.md#related-runtime-environment-variables)
+- [How to decide `VLLM_CPU_OMP_THREADS_BIND`](../getting_started/installation/cpu.md#how-to-decide-vllm_cpu_omp_threads_bind)
+
+The GPU-only `--numa-bind`, `--numa-bind-nodes`, and `--numa-bind-cpus` options
+do not configure CPU worker affinity.
+
 ### Batch-level DP for Multi-Modal Encoders
 
 By default, TP is used to shard the weights of multi-modal encoders just like for language decoders,
@@ -196,6 +270,39 @@ Known supported models (with corresponding benchmarks):
 
 ## Input Processing
 
+### fastokens Backend
+
+By default vLLM uses the standard Hugging Face `tokenizers` library to power
+the fast tokenizer. For BPE tokenizers (Qwen, Llama, DeepSeek, GPT-OSS, etc.)
+you can switch to the [fastokens](https://github.com/crusoecloud/fastokens)
+Rust backend, a drop-in replacement that's substantially faster on
+encode/decode and on streaming detokenization. Enable it by setting
+`VLLM_USE_FASTOKENS=1`:
+
+```console
+VLLM_USE_FASTOKENS=1 vllm serve Qwen/Qwen3-8B
+```
+
+Equivalent in the offline API:
+
+```python
+import os
+os.environ["VLLM_USE_FASTOKENS"] = "1"
+
+from vllm import LLM
+llm = LLM(model="Qwen/Qwen3-8B")
+```
+
+The `fastokens` Python package (>= 0.2.0) must be installed; if it isn't,
+vLLM raises a clear `ImportError` at tokenizer load. The override applies to
+any `--tokenizer-mode` that ends up loading an HF fast tokenizer (`hf`,
+`deepseek_v32`, `deepseek_v4`, `qwen_vl`, …). Modes that don't use the HF
+fast tokenizer (`mistral`, `grok2`, `kimi_audio`) ignore the flag.
+
+Tokenizer-bound workloads — long shared prefixes, bursty short prompts,
+batch detokenization — see the largest wins. If your bottleneck is GPU
+prefill/decode, the tokenizer change is unlikely to be visible end-to-end.
+
 ### Parallel Processing
 
 You can run input processing in parallel via [API server scale-out](../serving/data_parallel_deployment.md#internal-load-balancing).
diff --git a/docs/contributing/README.md b/docs/contributing/README.md
index 24e7d1c5be06..9b5e26d0fed8 100644
--- a/docs/contributing/README.md
+++ b/docs/contributing/README.md
@@ -43,16 +43,23 @@ If you are only developing vLLM's Python code, install vLLM using:
 VLLM_USE_PRECOMPILED=1 uv pip install -e .
 ```
 
+To rebuild only the Rust frontend binary:
+
+```bash
+./build_rust.sh          # release build
+./build_rust.sh --debug  # faster build for development
+```
+
 If you are developing vLLM's Python and CUDA/C++ code, install Pytorch first:
 
 ```bash
 uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129
 ```
 
-Then install the necessary build dependencies from `requirements/build.txt`, skipping `torch` as it was installed in the previous step:
+Then install the necessary build dependencies from `requirements/build/cuda.txt`, skipping `torch` as it was installed in the previous step:
 
 ```bash
-grep -v '^torch==' requirements/build.txt | uv pip install -r -
+grep -v '^torch==' requirements/build/cuda.txt | uv pip install -r -
 ```
 
 Finally install vLLM using:
@@ -297,6 +304,18 @@ review process:
   resources. The reviewer will add `ready` label to the PR when the PR is
   ready to merge or a full CI run is needed.
 
+### Escalating Stalled Contributions
+
+If you have an important contribution that has not yet received maintainer attention, please email us at:
+
+<pr-review-request@vllm.ai>
+
+Using a verifiable company or university email, include:
+
+- your production or research use case
+- the problem you encountered
+- how your contribution addresses it
+
 ## Thank You
 
 Finally, thank you for taking the time to read these guidelines and for your interest in contributing to vLLM.
diff --git a/docs/contributing/ci/failures.md b/docs/contributing/ci/failures.md
index dad04e75fbb6..a0038f461a04 100644
--- a/docs/contributing/ci/failures.md
+++ b/docs/contributing/ci/failures.md
@@ -60,9 +60,19 @@ the failure?
 
 ## Logs Wrangling
 
-Download the full log file from Buildkite locally.
+Download a job's log (no Buildkite login required):
 
-Strip timestamps and colorization:
+[.buildkite/scripts/ci-fetch-log.sh](../../../.buildkite/scripts/ci-fetch-log.sh)
+
+```bash
+# Find the failing job. Each row's URL is .../builds/<N>#<job_uuid>:
+gh pr checks <PR> --repo vllm-project/vllm
+
+# Download + strip timestamps/ANSI in one step:
+.buildkite/scripts/ci-fetch-log.sh "https://buildkite.com/vllm/ci/builds/<N>#<job_uuid>"
+```
+
+To clean an already-downloaded log:
 
 [.buildkite/scripts/ci-clean-log.sh](../../../.buildkite/scripts/ci-clean-log.sh)
 
diff --git a/docs/contributing/ci/nightly_builds.md b/docs/contributing/ci/nightly_builds.md
index a07b9c1c2fa4..8f3512db3d40 100644
--- a/docs/contributing/ci/nightly_builds.md
+++ b/docs/contributing/ci/nightly_builds.md
@@ -136,10 +136,10 @@ When installing vLLM with `VLLM_USE_PRECOMPILED=1`, the `setup.py` script:
 3. **Selects compatible wheel** based on:
     - Package name (`vllm`)
     - Platform tag (architecture match)
-4. **Downloads and extracts** precompiled binaries from the wheel:
-    - C++ extension modules (`.so` files)
-    - Flash Attention Python modules
-    - Triton kernel Python files
+4. **Downloads and extracts** precompiled artifacts from the wheel:
+    - Native extension modules (`.so` files)
+    - The `vllm-rs` Rust frontend binary
+    - Flash Attention Python modules and Triton/FlashMLA Python files
 5. **Patches package_data** to include extracted files in the installation
 
 !!! note "What is the base commit?"
diff --git a/docs/contributing/incremental_build.md b/docs/contributing/incremental_build.md
index cc01a60ce1e7..6be35af6131b 100644
--- a/docs/contributing/incremental_build.md
+++ b/docs/contributing/incremental_build.md
@@ -16,10 +16,10 @@ Before setting up the incremental build:
 
 2. **CUDA Toolkit:** Verify that the NVIDIA CUDA Toolkit is correctly installed and `nvcc` is accessible in your `PATH`. CMake relies on `nvcc` to compile CUDA code. You can typically find `nvcc` in `$CUDA_HOME/bin/nvcc` or by running `which nvcc`. If you encounter issues, refer to the [official CUDA Toolkit installation guides](https://developer.nvidia.com/cuda-toolkit-archive) and vLLM's main [GPU installation documentation](../getting_started/installation/gpu.md#troubleshooting) for troubleshooting. The `CMAKE_CUDA_COMPILER` variable in your `CMakeUserPresets.json` should also point to your `nvcc` binary.
 
-3. **Build Tools:** It is highly recommended to install `ccache` for fast rebuilds by caching compilation results (e.g., `sudo apt install ccache` or `conda install ccache`). Also, ensure the core build dependencies like `cmake` and `ninja` are installed. These are installable through `requirements/build.txt` or your system's package manager.
+3. **Build Tools:** It is highly recommended to install `ccache` for fast rebuilds by caching compilation results (e.g., `sudo apt install ccache` or `conda install ccache`). Also, ensure the core build dependencies like `cmake` and `ninja` are installed. These are installable through `requirements/build/cuda.txt` or your system's package manager.
 
     ```console
-    uv pip install -r requirements/build.txt --torch-backend=auto
+    uv pip install -r requirements/build/cuda.txt --torch-backend=auto
     ```
 
 ## Setting up the CMake Build Environment
diff --git a/docs/contributing/model/basic.md b/docs/contributing/model/basic.md
index ba1f5e43d61e..dceb78f52638 100644
--- a/docs/contributing/model/basic.md
+++ b/docs/contributing/model/basic.md
@@ -142,7 +142,7 @@ We use "mamba-like" to refer to layers that possess a state that is updated in-p
 For implementing new custom mamba-like layers, one should inherit from `MambaBase` and implement the methods `get_state_dtype`, `get_state_shape` to calculate the data types and state shapes at runtime, as well as `mamba_type` and `get_attn_backend`.
 It is also necessary to implement the "attention meta-data" class which handles the meta-data that is common across all layers.
 Please see [`LinearAttentionMetadata`](../../../vllm/v1/attention/backends/linear_attn.py) or [`ShortConvAttentionMetadata`](../../../vllm/v1/attention/backends/short_conv_attn.py) for examples of this.
-It is also worth noting that we should update `MAMBA_TYPE_TO_BACKEND_MAP` and `MambaAttentionBackendEnum` in [`registry.py`](../../../vllm/v1/attention/backends/registry.py) when adding a new mamba backend.
+It is also worth noting that we should update `MambaAttentionBackendEnum` in [`registry.py`](../../../vllm/v1/attention/backends/registry.py) when adding a new mamba backend.
 Finally, if one wants to support torch compile and CUDA graphs, it necessary to wrap the call to the mamba-like layer inside a custom op and register it.
 Please see the calls to `direct_register_custom_op` in [vllm/model_executor/models/minimax_text_01.py](../../../vllm/model_executor/models/minimax_text_01.py) or [vllm/model_executor/layers/mamba/short_conv.py](../../../vllm/model_executor/layers/mamba/short_conv.py) for examples of this.
 The new custom op should then be added to the list `_attention_ops` in [vllm/config/compilation.py](../../../vllm/config/compilation.py) to ensure that piecewise CUDA graphs works as intended.
diff --git a/docs/contributing/model/transcription.md b/docs/contributing/model/transcription.md
index a23de100da39..b076ef84a46c 100644
--- a/docs/contributing/model/transcription.md
+++ b/docs/contributing/model/transcription.md
@@ -66,7 +66,7 @@ This is for controlling general behavior of the API when serving your model:
 
 See [Audio preprocessing and chunking](#audio-preprocessing-and-chunking) for what each field controls.
 
-Implement the prompt construction via [get_generation_prompt][vllm.model_executor.models.interfaces.SupportsTranscription.get_generation_prompt]. The server passes you the resampled waveform and task parameters; you return a valid [PromptType][vllm.inputs.llm.PromptType]. There are two common patterns:
+Implement the prompt construction via [get_generation_prompt][vllm.model_executor.models.interfaces.SupportsTranscription.get_generation_prompt]. The server builds a [SpeechToTextParams][vllm.config.speech_to_text.SpeechToTextParams] object that bundles the resampled waveform, task parameters, and request-specific options. Your model receives this single object and returns a valid [PromptType][vllm.inputs.llm.PromptType]. There are two common patterns:
 
 #### Multimodal LLM with audio embeddings (e.g., Voxtral, Gemma3n)
 
@@ -75,21 +75,20 @@ Return a dict containing `multi_modal_data` with the audio, and either a `prompt
 ??? code "get_generation_prompt()"
 
     ```python
+    from vllm.config.speech_to_text import SpeechToTextParams
+
     class YourASRModel(nn.Module, SupportsTranscription):
         ...
 
         @classmethod
         def get_generation_prompt(
             cls,
-            audio: np.ndarray,
-            stt_config: SpeechToTextConfig,
-            model_config: ModelConfig,
-            language: str | None,
-            task_type: Literal["transcribe", "translate"],
-            request_prompt: str,
-            to_language: str | None,
+            stt_params: SpeechToTextParams,
         ) -> PromptType:
-            # Example with a free-form instruction prompt
+            audio = stt_params.audio
+            stt_config = stt_params.stt_config
+            task_type = stt_params.task_type
+
             task_word = "Transcribe" if task_type == "transcribe" else "Translate"
             prompt = (
                 "<start_of_turn>user\n"
@@ -112,20 +111,22 @@ Return a dict with separate `encoder_prompt` and `decoder_prompt` entries:
 ??? code "get_generation_prompt()"
 
     ```python
+    from vllm.config.speech_to_text import SpeechToTextParams
+
     class YourASRModel(nn.Module, SupportsTranscription):
         ...
 
         @classmethod
         def get_generation_prompt(
             cls,
-            audio: np.ndarray,
-            stt_config: SpeechToTextConfig,
-            model_config: ModelConfig,
-            language: str | None,
-            task_type: Literal["transcribe", "translate"],
-            request_prompt: str,
-            to_language: str | None,
+            stt_params: SpeechToTextParams,
         ) -> PromptType:
+            audio = stt_params.audio
+            stt_config = stt_params.stt_config
+            language = stt_params.language
+            task_type = stt_params.task_type
+            request_prompt = stt_params.request_prompt
+
             if language is None:
                 raise ValueError("Language must be specified")
 
@@ -193,7 +194,7 @@ Provide a fast duration→token estimate to improve streaming usage statistics:
 
 The API server takes care of basic audio I/O and optional chunking before building prompts:
 
-- Resampling: Input audio is resampled to `SpeechToTextConfig.sample_rate` using `librosa`.
+- Resampling: Input audio is resampled to `SpeechToTextConfig.sample_rate` using `AudioResampler`.
 - Chunking: If `SpeechToTextConfig.allow_audio_chunking` is True and the duration exceeds `max_audio_clip_s`, the server splits the audio into overlapping chunks and generates a prompt per chunk. Overlap is controlled by `overlap_chunk_second`.
 - Energy-aware splitting: When `min_energy_split_window_size` is set, the server finds low-energy regions to minimize cutting within words.
 
@@ -206,22 +207,20 @@ Relevant server logic:
     async def _preprocess_speech_to_text(...):
         language = self.model_cls.validate_language(request.language)
         ...
-        y, sr = librosa.load(bytes_, sr=self.asr_config.sample_rate)
-        duration = librosa.get_duration(y=y, sr=sr)
+        y, sr = load_audio(bytes_, sr=self.asr_config.sample_rate)
+        duration = get_audio_duration(y=y, sr=sr)
         do_split_audio = (self.asr_config.allow_audio_chunking
                         and duration > self.asr_config.max_audio_clip_s)
         chunks = [y] if not do_split_audio else self._split_audio(y, int(sr))
         prompts = []
         for chunk in chunks:
-            prompt = self.model_cls.get_generation_prompt(
+            stt_params = request.build_stt_params(
                 audio=chunk,
                 stt_config=self.asr_config,
                 model_config=self.model_config,
-                language=language,
                 task_type=self.task_type,
-                request_prompt=request.prompt,
-                to_language=to_language,
             )
+            prompt = self.model_cls.get_generation_prompt(stt_params)
             prompts.append(prompt)
         return prompts, duration
     ```
@@ -279,7 +278,7 @@ Once your model implements `SupportsTranscription`, you can test the endpoints (
       http://localhost:8000/v1/audio/translations
     ```
 
-Or check out more examples in [examples/online_serving](../../../examples/online_serving).
+Or check out more examples in [examples/speech_to_text](../../../examples/speech_to_text).
 
 !!! note
     - If your model handles chunking internally (e.g., via its processor or encoder), set `min_energy_split_window_size=None` in the returned `SpeechToTextConfig` to disable server-side chunking.
diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md
index 1d12d63549a0..91757c40e4f8 100644
--- a/docs/contributing/profiling.md
+++ b/docs/contributing/profiling.md
@@ -42,7 +42,7 @@ Traces can be visualized using <https://ui.perfetto.dev/>.
 
 #### Offline Inference
 
-Refer to [examples/offline_inference/simple_profiling.py](../../examples/offline_inference/simple_profiling.py) for an example.
+Refer to [examples/features/profiling/simple_profiling_offline.py](../../examples/features/profiling/simple_profiling_offline.py) for an example.
 
 #### OpenAI Server
 
@@ -206,8 +206,8 @@ Both the `vllm.utils.profiling.cprofile` and `vllm.utils.profiling.cprofile_cont
 used to profile a section of code.
 
 !!! note
-    The legacy import paths `vllm.utils.cprofile` and `vllm.utils.cprofile_context` are deprecated.
-    Please use `vllm.utils.profiling.cprofile` and `vllm.utils.profiling.cprofile_context` instead.
+    The `vllm.utils.profiling` helpers are deprecated and will be removed in
+    `v0.21`. Please use Python's `cProfile` module directly instead.
 
 ### Example usage - decorator
 
diff --git a/docs/contributing/vulnerability_management.md b/docs/contributing/vulnerability_management.md
index 847883f74297..75efbac90a16 100644
--- a/docs/contributing/vulnerability_management.md
+++ b/docs/contributing/vulnerability_management.md
@@ -34,6 +34,7 @@ you may contact the following individuals:
 
 - Simon Mo - <simon.mo@hey.com>
 - Russell Bryant - <rbryant@redhat.com>
+- Juan Pérez de Algaba - <jperezde@redhat.com>
 - Huzaifa Sidhpurwala - <huzaifas@redhat.com>
 
 ## Slack Discussion
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
index 39cd085b26e7..a8debf2cdb3a 100644
--- a/docs/deployment/docker.md
+++ b/docs/deployment/docker.md
@@ -8,6 +8,64 @@ toc_depth: 2
 
 --8<-- "docs/getting_started/installation/gpu.md:pre-built-images"
 
+## Run as a non-root user
+
+The CUDA `vllm/vllm-openai` image runs as root by default for backward
+compatibility. It is also prepared to run as the built-in `vllm` user
+(UID 2000, GID 0):
+
+```bash
+docker run --rm --gpus all \
+    --user 2000:0 \
+    -p 8000:8000 \
+    vllm/vllm-openai:latest \
+    meta-llama/Llama-3.1-8B-Instruct
+```
+
+When mounting model or cache volumes for a non-root container, mount writable
+paths under `/home/vllm` instead of `/root`. For example, mount the Hugging
+Face cache at `/home/vllm/.cache/huggingface` and make the mounted directory
+writable by group 0.
+
+```bash
+docker run --rm --gpus all \
+    --user 2000:0 \
+    -v ~/.cache/huggingface:/home/vllm/.cache/huggingface \
+    -p 8000:8000 \
+    vllm/vllm-openai:latest \
+    meta-llama/Llama-3.1-8B-Instruct
+```
+
+To build an image that defaults to the non-root `vllm` user, use the opt-in
+`vllm-openai-nonroot` target:
+
+```bash
+docker build --target vllm-openai-nonroot \
+    -t vllm-openai-nonroot:local \
+    -f docker/Dockerfile .
+
+docker run --rm --gpus all \
+    -p 8000:8000 \
+    vllm-openai-nonroot:local \
+    meta-llama/Llama-3.1-8B-Instruct
+```
+
+The `vllm-openai-nonroot` target also supports OpenShift-style arbitrary UIDs
+when the runtime UID is a member of group 0. In Kubernetes manifests, set the
+container security context accordingly and keep mounted cache/model paths
+writable by group 0:
+
+```yaml
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 1000540000
+  runAsGroup: 0
+  fsGroup: 0
+```
+
+Runtime UIDs outside group 0 are not part of the documented support matrix
+because they may be unable to write to `/home/vllm` or `/opt/uv/cache`.
+
 ## Build image from source
 
 --8<-- "docs/getting_started/installation/gpu.md:build-image-from-source"
diff --git a/docs/deployment/frameworks/anyscale.md b/docs/deployment/frameworks/anyscale.md
index 965742ec0726..6888e4dbf0b6 100644
--- a/docs/deployment/frameworks/anyscale.md
+++ b/docs/deployment/frameworks/anyscale.md
@@ -3,7 +3,7 @@
 [Anyscale](https://www.anyscale.com) is a managed, multi-cloud platform developed by the creators of Ray.
 
 Anyscale automates the entire lifecycle of Ray clusters in your AWS, GCP, or Azure account, delivering the flexibility of open-source Ray
-without the operational overhead of maintaining Kubernetes control planes, configuring autoscalers, managing observability stacks, or manually managing head and worker nodes with helper scripts like [examples/online_serving/run_cluster.sh](../../../examples/online_serving/run_cluster.sh).
+without the operational overhead of maintaining Kubernetes control planes, configuring autoscalers, managing observability stacks, or manually managing head and worker nodes with helper scripts like [examples/ray_serving/run_cluster.sh](../../../examples/ray_serving/run_cluster.sh).
 
 When serving large language models with vLLM, Anyscale can rapidly provision [production-ready HTTPS endpoints](https://docs.anyscale.com/examples/deploy-ray-serve-llms) or [fault-tolerant batch inference jobs](https://docs.anyscale.com/examples/ray-data-llm).
 
diff --git a/docs/deployment/frameworks/helm.md b/docs/deployment/frameworks/helm.md
index a0aee70b1b32..45924dd6f7c3 100644
--- a/docs/deployment/frameworks/helm.md
+++ b/docs/deployment/frameworks/helm.md
@@ -17,7 +17,7 @@ Before you begin, ensure that you have the following:
 
 ## Installing the chart
 
-This guide uses the Helm chart at [examples/online_serving/chart-helm](../../../examples/online_serving/chart-helm).
+This guide uses the Helm chart at [examples/deployment/chart-helm](../../../examples/deployment/chart-helm).
 
 To install the chart with the release name `test-vllm`:
 
diff --git a/docs/deployment/frameworks/lws.md b/docs/deployment/frameworks/lws.md
index 14710a8dc333..47586bcd7003 100644
--- a/docs/deployment/frameworks/lws.md
+++ b/docs/deployment/frameworks/lws.md
@@ -40,7 +40,7 @@ Deploy the following yaml file `lws.yaml`
                 command:
                   - sh
                   - -c
-                  - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); 
+                  - "bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); 
                     vllm serve meta-llama/Meta-Llama-3.1-405B-Instruct --port 8080 --tensor-parallel-size 8 --pipeline_parallel_size 2"
                 resources:
                   limits:
@@ -73,7 +73,7 @@ Deploy the following yaml file `lws.yaml`
                 command:
                   - sh
                   - -c
-                  - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
+                  - "bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
                 resources:
                   limits:
                     nvidia.com/gpu: "8"
diff --git a/docs/deployment/frameworks/retrieval_augmented_generation.md b/docs/deployment/frameworks/retrieval_augmented_generation.md
index 8a5d18807d06..c23862cde72f 100644
--- a/docs/deployment/frameworks/retrieval_augmented_generation.md
+++ b/docs/deployment/frameworks/retrieval_augmented_generation.md
@@ -36,7 +36,7 @@ pip install -U vllm \
     vllm serve qwen/Qwen1.5-0.5B-Chat --port 8001
     ```
 
-1. Use the script: [examples/online_serving/retrieval_augmented_generation_with_langchain.py](../../../examples/online_serving/retrieval_augmented_generation_with_langchain.py)
+1. Use the script: [examples/applications/rag/retrieval_augmented_generation_with_langchain.py](../../../examples/applications/rag/retrieval_augmented_generation_with_langchain.py)
 
 1. Run the script
 
@@ -74,7 +74,7 @@ pip install vllm \
     vllm serve qwen/Qwen1.5-0.5B-Chat --port 8001
     ```
 
-1. Use the script: [examples/online_serving/retrieval_augmented_generation_with_llamaindex.py](../../../examples/online_serving/retrieval_augmented_generation_with_llamaindex.py)
+1. Use the script: [examples/applications/rag/retrieval_augmented_generation_with_llamaindex.py](../../../examples/applications/rag/retrieval_augmented_generation_with_llamaindex.py)
 
 1. Run the script:
 
diff --git a/docs/deployment/frameworks/runpod.md b/docs/deployment/frameworks/runpod.md
index 61ca3c4e68ce..b00350386e48 100644
--- a/docs/deployment/frameworks/runpod.md
+++ b/docs/deployment/frameworks/runpod.md
@@ -12,8 +12,7 @@ vLLM can be deployed on [RunPod](https://www.runpod.io/), a cloud GPU platform t
 SSH into your RunPod pod and launch the vLLM OpenAI-compatible server:
 
 ```bash
-python -m vllm.entrypoints.openai.api_server \
-    --model <model-name> \
+vllm serve <model-name> \
     --host 0.0.0.0 \
     --port 8000
 ```
diff --git a/docs/deployment/frameworks/skypilot.md b/docs/deployment/frameworks/skypilot.md
index e9b0d5f0671c..e032769f15e8 100644
--- a/docs/deployment/frameworks/skypilot.md
+++ b/docs/deployment/frameworks/skypilot.md
@@ -59,7 +59,7 @@ See the vLLM SkyPilot YAML for serving, [serving.yaml](https://github.com/skypil
 
       echo 'Starting gradio server...'
       git clone https://github.com/vllm-project/vllm.git || true
-      python vllm/examples/online_serving/gradio_openai_chatbot_webserver.py \
+      python vllm/examples/applications/chatbot/gradio_openai_chatbot_webserver.py \
         -m $MODEL_NAME \
         --port 8811 \
         --model-url http://localhost:8081/v1 \
@@ -305,7 +305,7 @@ It is also possible to access the Llama-3 service with a separate GUI frontend,
 
       echo 'Starting gradio server...'
       git clone https://github.com/vllm-project/vllm.git || true
-      python vllm/examples/online_serving/gradio_openai_chatbot_webserver.py \
+      python vllm/examples/applications/api_client/gradio_openai_chatbot_webserver.py \
         -m $MODEL_NAME \
         --port 8811 \
         --model-url http://$ENDPOINT/v1 \
diff --git a/docs/deployment/frameworks/streamlit.md b/docs/deployment/frameworks/streamlit.md
index 1b214e1a32aa..6c0c8c6c1430 100644
--- a/docs/deployment/frameworks/streamlit.md
+++ b/docs/deployment/frameworks/streamlit.md
@@ -20,7 +20,7 @@ pip install vllm streamlit openai
     vllm serve Qwen/Qwen1.5-0.5B-Chat
     ```
 
-1. Use the script: [examples/online_serving/streamlit_openai_chatbot_webserver.py](../../../examples/online_serving/streamlit_openai_chatbot_webserver.py)
+1. Use the script: [examples/applications/chatbot/streamlit_openai_chatbot_webserver.py](../../../examples/applications/chatbot/streamlit_openai_chatbot_webserver.py)
 
 1. Start the streamlit web UI and start to chat:
 
diff --git a/docs/deployment/integrations/kthena.md b/docs/deployment/integrations/kthena.md
index 483dd7474440..03ef190e558c 100644
--- a/docs/deployment/integrations/kthena.md
+++ b/docs/deployment/integrations/kthena.md
@@ -78,10 +78,9 @@ Key points from the example YAML:
     - sh
     - -c
     - >
-      bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=2;
-      python3 -m vllm.entrypoints.openai.api_server
+      bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh leader --ray_cluster_size=2;
+      vllm serve meta-llama/Llama-3.1-405B-Instruct
         --port 8080
-        --model meta-llama/Llama-3.1-405B-Instruct
         --tensor-parallel-size 8
         --pipeline-parallel-size 2
   ```
@@ -93,7 +92,7 @@ Key points from the example YAML:
     - sh
     - -c
     - >
-      bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(ENTRY_ADDRESS)
+      bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh worker --ray_address=$(ENTRY_ADDRESS)
   ```
 
 ---
@@ -144,8 +143,8 @@ spec:
                 command:
                   - sh
                   - -c
-                  - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=2; 
-                    python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline-parallel-size 2"
+                  - "bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh leader --ray_cluster_size=2; 
+                    vllm serve meta-llama/Llama-3.1-405B-Instruct --port 8080 --tensor-parallel-size 8 --pipeline-parallel-size 2"
                 resources:
                   limits:
                     nvidia.com/gpu: "8"
@@ -178,7 +177,7 @@ spec:
                 command:
                   - sh
                   - -c
-                  - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(ENTRY_ADDRESS)"
+                  - "bash /vllm-workspace/examples/ray_serving/multi-node-serving.sh worker --ray_address=$(ENTRY_ADDRESS)"
                 resources:
                   limits:
                     nvidia.com/gpu: "8"
diff --git a/docs/deployment/integrations/llm-d.md b/docs/deployment/integrations/llm-d.md
index cccf1773c6be..6060b98f6421 100644
--- a/docs/deployment/integrations/llm-d.md
+++ b/docs/deployment/integrations/llm-d.md
@@ -2,4 +2,4 @@
 
 vLLM can be deployed with [llm-d](https://github.com/llm-d/llm-d), a Kubernetes-native distributed inference serving stack providing well-lit paths for anyone to serve large generative AI models at scale. It helps achieve the fastest "time to state-of-the-art (SOTA) performance" for key OSS models across most hardware accelerators and infrastructure providers.
 
-You can use vLLM with llm-d directly by following [this guide](https://llm-d.ai/docs/guide) or via [KServe's LLMInferenceService](https://kserve.github.io/website/docs/model-serving/generative-inference/llmisvc/llmisvc-overview).
+You can use vLLM with llm-d directly by following [the official guides](https://llm-d.ai/docs/guides) or via [KServe's LLMInferenceService](https://kserve.github.io/website/docs/model-serving/generative-inference/llmisvc/llmisvc-overview).
diff --git a/docs/deployment/k8s.md b/docs/deployment/k8s.md
index dbcb277278c9..7a92c99b2c4a 100644
--- a/docs/deployment/k8s.md
+++ b/docs/deployment/k8s.md
@@ -4,6 +4,7 @@ Deploying vLLM on Kubernetes is a scalable and efficient way to serve machine le
 
 - [Deployment with CPUs](#deployment-with-cpus)
 - [Deployment with GPUs](#deployment-with-gpus)
+- [Serving with gRPC](#serving-with-grpc)
 - [Troubleshooting](#troubleshooting)
     - [Startup Probe or Readiness Probe Failure, container log contains "KeyboardInterrupt: terminated"](#startup-probe-or-readiness-probe-failure-container-log-contains-keyboardinterrupt-terminated)
 - [Conclusion](#conclusion)
@@ -387,6 +388,49 @@ INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
 
       If the service is correctly deployed, you should receive a response from the vLLM model.
 
+## Serving with gRPC
+
+vLLM can serve models over gRPC instead of HTTP by passing the `--grpc` flag. This requires the optional gRPC dependencies:
+
+```bash
+pip install vllm[grpc]
+```
+
+When using `--grpc`, the server exposes the standard [gRPC Health Checking Protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md) (`grpc.health.v1.Health`), which integrates with Kubernetes [native gRPC probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-grpc-liveness-probe) (available since Kubernetes 1.24).
+
+To deploy with gRPC, change the `vllm serve` command to include `--grpc` and replace `httpGet` probes with `grpc` probes:
+
+```yaml
+containers:
+- name: mistral-7b
+  image: vllm/vllm-openai:latest
+  command: ["/bin/sh", "-c"]
+  args: [
+    "pip install vllm[grpc] && vllm serve mistralai/Mistral-7B-Instruct-v0.3 --grpc --port 50051 --trust-remote-code"
+  ]
+  ports:
+  - containerPort: 50051
+  livenessProbe:
+    grpc:
+      port: 50051
+    initialDelaySeconds: 120
+    periodSeconds: 10
+  readinessProbe:
+    grpc:
+      port: 50051
+    initialDelaySeconds: 120
+    periodSeconds: 5
+```
+
+!!! note
+    The gRPC health service checks the engine status on every probe. If the engine is unhealthy or the server is shutting down, the probe returns `NOT_SERVING`.
+
+You can also verify the health service manually with `grpcurl`:
+
+```bash
+grpcurl -plaintext localhost:50051 grpc.health.v1.Health/Check
+```
+
 ## Troubleshooting
 
 ### Startup Probe or Readiness Probe Failure, container log contains "KeyboardInterrupt: terminated"
diff --git a/docs/design/arch_overview.md b/docs/design/arch_overview.md
index f8bc66d6d4b2..e419104bae37 100644
--- a/docs/design/arch_overview.md
+++ b/docs/design/arch_overview.md
@@ -51,9 +51,9 @@ More API details can be found in the [Offline Inference](../api/README.md#offlin
 
 The code for the `LLM` class can be found in [vllm/entrypoints/llm.py](../../vllm/entrypoints/llm.py).
 
-### OpenAI-Compatible API Server
+### Online Serving
 
-The second primary interface to vLLM is via its OpenAI-compatible API server.
+The second primary interface to vLLM is via its online server.
 This server can be started using the `vllm serve` command.
 
 ```bash
@@ -76,7 +76,7 @@ python -m vllm.entrypoints.openai.api_server --model <model>
 
 That code can be found in [vllm/entrypoints/openai/api_server.py](../../vllm/entrypoints/openai/api_server.py).
 
-More details on the API server can be found in the [OpenAI-Compatible Server](../serving/openai_compatible_server.md) document.
+More details on the API server can be found in the [Online Serving](../serving/online_serving/README.md) document.
 
 ## V1 Process Architecture
 
diff --git a/docs/design/attention_backends.md b/docs/design/attention_backends.md
index 47ac91464667..d70cd6fd74cb 100644
--- a/docs/design/attention_backends.md
+++ b/docs/design/attention_backends.md
@@ -106,6 +106,7 @@ Priority is **1 = highest** (tried first).
 | 2 | `FLASH_ATTN` |
 | 3 | `TRITON_ATTN` |
 | 4 | `FLEX_ATTENTION` |
+| 5 | `TURBOQUANT` |
 
 **Ampere/Hopper (SM 8.x-9.x):**
 
@@ -115,6 +116,7 @@ Priority is **1 = highest** (tried first).
 | 2 | `FLASHINFER` |
 | 3 | `TRITON_ATTN` |
 | 4 | `FLEX_ATTENTION` |
+| 5 | `TURBOQUANT` |
 
 ### MLA Attention (DeepSeek-style)
 
@@ -123,12 +125,13 @@ Priority is **1 = highest** (tried first).
 | Priority | Backend |
 | -------- | ------- |
 | 1 | `FLASHINFER_MLA` |
-| 2 | `CUTLASS_MLA` |
-| 3 | `FLASH_ATTN_MLA` |
-| 4 | `FLASHMLA` |
-| 5 | `TRITON_MLA` |
-| 6 | `FLASHINFER_MLA_SPARSE`**\*** |
-| 7 | `FLASHMLA_SPARSE` |
+| 2 | `TOKENSPEED_MLA` |
+| 3 | `CUTLASS_MLA` |
+| 4 | `FLASH_ATTN_MLA` |
+| 5 | `FLASHMLA` |
+| 6 | `TRITON_MLA` |
+| 7 | `FLASHINFER_MLA_SPARSE`**\*** |
+| 8 | `FLASHMLA_SPARSE` |
 
 **Ampere/Hopper (SM 8.x-9.x):**
 
@@ -153,6 +156,7 @@ Priority is **1 = highest** (tried first).
 | **Block Sizes** | Supported KV cache block sizes (%N means multiples of N) |
 | **Head Sizes** | Supported attention head sizes |
 | **Sink** | Attention sink support (for StreamingLLM) |
+| **Non-Causal** | Non-causal (bidirectional) attention support for decoder models |
 | **Sparse** | Sparse attention support (MLA only) |
 | **MM Prefix** | Multimodal prefix full attention support |
 | **DCP** | Decode Context Parallelism support (`--decode-context-parallel-size`) |
@@ -163,21 +167,21 @@ Priority is **1 = highest** (tried first).
 
 ## Standard Attention (MHA, MQA, GQA) Backends
 
-| Backend | Version | Dtypes | KV Dtypes | Block Sizes | Head Sizes | Sink | MM Prefix | DCP | Attention Types | Compute Cap. |
-| ------- | ------- | ------ | --------- | ----------- | ---------- | ---- | --------- | --- | --------------- | ------------ |
-| `CPU_ATTN` | | fp16, bf16, fp32 | `auto` | Any | 32, 64, 80, 96, 112, 128, 160, 192, 224, 256 | ❌ | ❌ | ❌ | All | N/A |
-| `FLASHINFER` | Native† | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 16, 32, 64 | 64, 128, 256 | ❌ | ❌ | ✅ | Decoder | 7.x-9.x |
-| `FLASHINFER` | TRTLLM† | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 16, 32, 64 | 64, 128, 256 | ✅ | ❌ | ✅ | Decoder | 10.x |
-| `FLASH_ATTN` | FA2* | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ❌ | ✅ | All | ≥8.0 |
-| `FLASH_ATTN` | FA3* | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | Any | ✅ | ❌ | ✅ | All | 9.x |
-| `FLASH_ATTN` | FA4* | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ❌ | ✅ | All | ≥10.0 |
-| `FLASH_ATTN_DIFFKV` | | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | ✅ | Decoder | Any |
-| `FLEX_ATTENTION` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16` | Any | Any | ❌ | ✅ | ❌ | Decoder, Encoder Only | Any |
-| `ROCM_AITER_FA` | | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 16, 32 | 64, 128, 256 | ❌ | ❌ | ❌ | Decoder | N/A |
-| `ROCM_AITER_UNIFIED_ATTN` | | fp16, bf16 | `auto` | %16 | Any | ✅ | ✅ | ❌ | All | N/A |
-| `ROCM_ATTN` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | 32, 64, 80, 96, 128, 160, 192, 224, 256 | ❌ | ✅ | ❌ | Decoder, Encoder, Encoder Only | N/A |
-| `TREE_ATTN` | | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | 32, 64, 96, 128, 160, 192, 224, 256 | ❌ | ❌ | ❌ | Decoder | Any |
-| `TRITON_ATTN` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | Any | ✅ | ✅ | ❌ | All | Any |
+| Backend | Version | Dtypes | KV Dtypes | Block Sizes | Head Sizes | Sink | Non-Causal | MM Prefix | DCP | Attention Types | Compute Cap. |
+| ------- | ------- | ------ | --------- | ----------- | ---------- | ---- | ---------- | --------- | --- | --------------- | ------------ |
+| `CPU_ATTN` | | fp16, bf16, fp32 | `auto`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | 32, 64, 80, 96, 112, 128, 160, 192, 224, 256, 512 | ❌ | ❌ | ❌ | ❌ | All | N/A |
+| `FLASHINFER` | Native† | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 16, 32, 64 | 64, 128, 256, 512 | ❌ | ❌ | ❌ | ✅ | Decoder | 7.x-9.x |
+| `FLASHINFER` | TRTLLM† | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2`, `nvfp4` | 16, 32, 64 | 64, 128, 256, 512 | ✅ | ❌ | ❌ | ✅ | Decoder | 10.x |
+| `FLASH_ATTN` | FA2* | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ✅ | ❌ | ✅ | All | ≥8.0 |
+| `FLASH_ATTN` | FA3* | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | Any | ✅ | ✅ | ❌ | ✅ | All | 9.x |
+| `FLASH_ATTN` | FA4* | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ✅ | ✅ | ❌ | ✅ | All | ≥10.0 |
+| `FLASH_ATTN_DIFFKV` | | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | ❌ | ✅ | Decoder | Any |
+| `FLEX_ATTENTION` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ✅ | ✅ | ❌ | Decoder, Encoder Only | Any |
+| `ROCM_AITER_FA` | | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 16, 32 | 64, 128, 256 | ❌ | ✅ | ❌ | ❌ | Decoder | N/A |
+| `ROCM_AITER_UNIFIED_ATTN` | | fp16, bf16 | `auto` | %16 | Any | ✅ | ❌ | ✅ | ❌ | All | N/A |
+| `ROCM_ATTN` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | 32, 64, 80, 96, 128, 160, 192, 224, 256 | ❌ | ✅ | ✅ | ❌ | Decoder, Encoder, Encoder Only | N/A |
+| `TRITON_ATTN` | | fp16, bf16, fp32 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2`, `int8_per_token_head`, `fp8_per_token_head` | %16 | Any | ✅ | ❌ | ✅ | ❌ | All | Any |
+| `TURBOQUANT` | | fp16, bf16 | `turboquant_k8v4`, `turboquant_4bit_nc`, `turboquant_k3v4_nc`, `turboquant_3bit_nc` | 16, 32, 64, 128 | Any | ❌ | ❌ | ❌ | ❌ | Decoder | Any |
 
 > **†** FlashInfer uses TRTLLM attention on Blackwell (SM100), which supports sinks. Disable via `--attention-config.use_trtllm_attention=0`.
 >
@@ -189,31 +193,37 @@ MLA uses separate backends for prefill and decode phases.
 
 ### Prefill Backends
 
-The prefill backend is selected at runtime based on hardware and
-configuration.
+To explicitly select a prefill backend, use
+`-ac.mla_prefill_backend=<BACKEND>` (e.g., `FLASH_ATTN`, `FLASHINFER`).
+Otherwise, the prefill backend is selected automatically at runtime based on
+hardware and configuration.
 
-| Backend | Description | Compute Cap. | Enable | Disable | Notes |
-| ------- | ----------- | ------------ | ------ | ------- | ----- |
-| TRT-LLM Ragged‡ | TensorRT-LLM ragged attention | 10.x | Default on SM100 | `-ac.use_trtllm_ragged_deepseek_prefill=0` | DeepSeek R1 dims only |
-| FlashInfer | FlashInfer CUTLASS backend | 10.x | `-ac.disable_flashinfer_prefill=0` | `-ac.disable_flashinfer_prefill=1` | DeepSeek R1 dims only |
-| cuDNN | cuDNN-based attention | 10.x | `-ac.use_cudnn_prefill=1` | `-ac.use_cudnn_prefill=0` | |
-| FlashAttention | FlashAttention varlen (FA2/FA3) | Any | Default fallback | Use other backends | FA3 on SM90, FA2 otherwise |
+| Backend | Description | Dtypes | Compute Cap. | Notes |
+| ------- | ----------- | ------ | ------------ | ----- |
+| `FLASH_ATTN`‡ | FlashAttention varlen (FA2/FA3/FA4) | fp16, bf16 | Any | FA4 on SM100+, FA3 on SM90, FA2 otherwise |
+| `TRTLLM_RAGGED` | TensorRT-LLM ragged attention | fp16, bf16 | 10.x | DeepSeek R1 dims only |
+| `FLASHINFER` | FlashInfer CUTLASS backend | fp16, bf16 | 10.x | DeepSeek R1 dims only |
+| `TOKENSPEED_MLA` | | fp16, bf16 | 10.x | DeepSeek R1 dims only |
 
 > **‡** TRT-LLM Ragged is the default on Blackwell (SM100).
 > On other GPUs, FlashAttention is used as the default.
 
 ### Decode Backends
 
-| Backend | Dtypes | KV Dtypes | Block Sizes | Head Sizes | Sink | Sparse | MM Prefix | DCP | Attention Types | Compute Cap. |
-| ------- | ------ | --------- | ----------- | ---------- | ---- | ------ | --------- | --- | --------------- | ------------ |
-| `CUTLASS_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 128 | Any | ❌ | ❌ | ❌ | ✅ | Decoder | 10.x |
-| `FLASHINFER_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 32, 64 | Any | ❌ | ❌ | ❌ | ❌ | Decoder | 10.x |
-| `FLASHINFER_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 32, 64 | 576 | ❌ | ✅ | ❌ | ❌ | Decoder | 10.x |
-| `FLASHMLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 64 | Any | ❌ | ❌ | ❌ | ✅ | Decoder | 9.x-10.x |
-| `FLASHMLA_SPARSE` | bf16 | `auto`, `bfloat16`, `fp8_ds_mla` | 64 | 576 | ❌ | ✅ | ❌ | ❌ | Decoder | 9.x-10.x |
-| `FLASH_ATTN_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ❌ | ❌ | ✅ | Decoder | 9.x |
-| `ROCM_AITER_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | 1 | Any | ❌ | ❌ | ❌ | ❌ | Decoder | N/A |
-| `ROCM_AITER_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16` | 1 | Any | ❌ | ✅ | ❌ | ❌ | Decoder | N/A |
-| `ROCM_AITER_TRITON_MLA` | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | ❌ | ❌ | Decoder | N/A |
-| `TRITON_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | %16 | Any | ❌ | ❌ | ❌ | ✅ | Decoder | Any |
-| `XPU_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16` | Any | 576 | ❌ | ✅ | ❌ | ❌ | Decoder | Any |
+MLA decode backends are selected using the standard
+`-ac.backend=<BACKEND>` argument (e.g., `FLASHMLA`, `TRITON_MLA`).
+
+| Backend | Dtypes | KV Dtypes | Block Sizes | Head Sizes | Sink | Non-Causal | Sparse | MM Prefix | DCP | Attention Types | Compute Cap. |
+| ------- | ------ | --------- | ----------- | ---------- | ---- | ---------- | ------ | --------- | --- | --------------- | ------------ |
+| `CUTLASS_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 128 | Any | ❌ | ❌ | ❌ | ❌ | ✅ | Decoder | 10.x |
+| `FLASHINFER_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 32, 64 | Any | ❌ | ❌ | ❌ | ❌ | ❌ | Decoder | 10.x |
+| `FLASHINFER_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 32, 64 | 576 | ❌ | ❌ | ✅ | ❌ | ❌ | Decoder | 10.x |
+| `FLASHMLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 64 | Any | ❌ | ❌ | ❌ | ❌ | ✅ | Decoder | 9.x-10.x |
+| `FLASHMLA_SPARSE` | bf16 | `auto`, `bfloat16`, `fp8_ds_mla` | 64 | 576 | ❌ | ❌ | ✅ | ❌ | ❌ | Decoder | 9.x-10.x |
+| `FLASH_ATTN_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16` | %16 | Any | ❌ | ❌ | ❌ | ❌ | ✅ | Decoder | 9.x |
+| `ROCM_AITER_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %1 | Any | ❌ | ❌ | ❌ | ❌ | ❌ | Decoder | N/A |
+| `ROCM_AITER_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | 1, 64 | Any | ❌ | ❌ | ✅ | ❌ | ❌ | Decoder | N/A |
+| `ROCM_AITER_TRITON_MLA` | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | ❌ | ❌ | ❌ | Decoder | N/A |
+| `TOKENSPEED_MLA` | fp16, bf16 | `fp8`, `fp8_e4m3` | 32, 64 | Any | ❌ | ❌ | ❌ | ❌ | ❌ | Decoder | 10.x |
+| `TRITON_MLA` | fp16, bf16 | `auto`, `float16`, `bfloat16`, `fp8`, `fp8_e4m3` | %16 | Any | ❌ | ❌ | ❌ | ❌ | ✅ | Decoder | Any |
+| `XPU_MLA_SPARSE` | fp16, bf16 | `auto`, `float16`, `bfloat16` | Any | 576 | ❌ | ❌ | ✅ | ❌ | ❌ | Decoder | Any |
diff --git a/docs/design/cuda_graphs.md b/docs/design/cuda_graphs.md
index f860ce290b3f..718a4a8154d9 100644
--- a/docs/design/cuda_graphs.md
+++ b/docs/design/cuda_graphs.md
@@ -225,7 +225,7 @@ outputs = model.generate(
 
 ### Piecewise compilation and full graph custom passes (attention fusion, sequence parallelism)
 
-Unfortunately, some custom compile passes have to see the whole graph to be effective and hence aren't compatible with piecewise compilation. This includes `AttnFusionPass` and `SequenceParallelismPass`. As a short-term solution, we automatically disable piecewise compilation (by setting `splitting_ops=[]`) when attention fusion is enabled. We use CUDA Graph modes `FULL` or `FULL_DECODE_ONLY` (depending on backend support). However, this leads to another optimization incompatibility and confusing performance tradeoffs.
+Unfortunately, some custom compile passes have to see the whole graph to be effective and hence aren't compatible with piecewise compilation. This includes `AttnQuantFusionPass` and `SequenceParallelismPass`. As a short-term solution, we automatically disable piecewise compilation (by setting `splitting_ops=[]`) when attention fusion is enabled. We use CUDA Graph modes `FULL` or `FULL_DECODE_ONLY` (depending on backend support). However, this leads to another optimization incompatibility and confusing performance tradeoffs.
 
 Long term, we've added the ability to partition the graph in Inductor instead of right after Dynamo. It can be enabled with `CompilationConfig.use_inductor_graph_partition=True` but is currently experimental and only available with `torch>=2.9`. This also increases compilation time as it has to compile the whole graph and cannot reuse piecewise compilation artifacts. Once vLLM supports 2.9, we plan to make this the default approach as it will also speed up piecewise cudagraph capture.
 
diff --git a/docs/design/cuda_graphs_multimodal.md b/docs/design/cuda_graphs_multimodal.md
index 5515c91a8b69..14781f6a5a38 100644
--- a/docs/design/cuda_graphs_multimodal.md
+++ b/docs/design/cuda_graphs_multimodal.md
@@ -28,6 +28,7 @@ Multiple CUDA Graphs are pre-captured at different **token budget** levels (e.g.
 class BudgetGraphMetadata:
     token_budget: int
     max_batch_size: int
+    max_frames_per_batch: int
     graph: torch.cuda.CUDAGraph
     input_buffer: torch.Tensor       # e.g. pixel_values
     metadata_buffers: dict[str, torch.Tensor]  # e.g. embeddings, seq metadata
@@ -51,6 +52,15 @@ For each graph replay:
 
 When `mm_encoder_tp_mode="data"`, the manager distributes images across TP ranks using load-balanced assignment via `get_load_balance_assignment`, executes locally on each rank, then gathers results back in the original order via `tensor_model_parallel_all_gather`.
 
+### Video inference support
+
+Following <https://github.com/vllm-project/vllm/pull/35963> (ViT full CUDA graph support for image inference), <https://github.com/vllm-project/vllm/pull/38061> extends the encoder CUDA graph framework to support video inference for Qwen3-VL. Previously, the CUDA graph capture/replay path only handled image inputs (`pixel_values` + `image_grid_thw`). Video inputs use different keys (`pixel_values_videos` + `video_grid_thw`) and require larger `cu_seqlens` buffers because each video item contributes multiple frames (`T` attention sequences). This PR generalizes the protocol and manager to handle both modalities through a single shared graph manager.
+
+!!! note
+    Video CUDA graphs are automatically disabled when EVS (Efficient Video Sampling) pruning is enabled, since EVS makes the token count data-dependent and incompatible with CUDA graph capture.
+
+    Mixed inputs (image+video) per prompt are also supported now.
+
 ## Model integration via `SupportsEncoderCudaGraph`
 
 Models opt-in to encoder CUDA Graphs by implementing the [SupportsEncoderCudaGraph][vllm.model_executor.models.interfaces.SupportsEncoderCudaGraph] protocol. This protocol encapsulates all model-specific logic so that the manager remains model-agnostic. The protocol defines the following methods:
@@ -65,14 +75,26 @@ Models opt-in to encoder CUDA Graphs by implementing the [SupportsEncoderCudaGra
 * `prepare_encoder_cudagraph_replay_buffers(...)` — computes new buffer values from actual batch inputs before replay.
 * `encoder_cudagraph_forward(...)` — forward pass using precomputed buffers (called during capture and replay).
 * `encoder_eager_forward(...)` — fallback eager forward when no graph fits.
-
-Currently supported: **Qwen3-VL** (see `vllm/model_executor/models/qwen3_vl.py`).
+* `get_input_modality(...)` - return the modality of the inputs.
+* `get_max_frames_per_video()` - return model-specific max frames per video.
+* `postprocess_encoder_output(...)` - post process encoder output, directly call scatter_output_slices by default
 
 !!! note
     The `SupportsEncoderCudaGraph` protocol is designed to be model-agnostic. New vision encoder models can opt-in by implementing the protocol methods without modifying the manager.
 
+**Supported models:**
+
+| Architecture | Models | CG for Image | CG for Video |
+| ------------ | ------ | ------------ | ------------ |
+| `Qwen2VLForConditionalGeneration` | `Qwen2-VL` | ✅︎ | ✅︎ |
+| `Qwen2_5_VLForConditionalGeneration` | `Qwen2.5-VL` | ✅︎ | ✅︎ |
+| `Qwen3VLForConditionalGeneration` | `Qwen3-VL` | ✅︎ | ✅︎ |
+| `Qwen3_5ForConditionalGeneration` | `Qwen3.5` | ✅︎ | ✅︎ |
+| `Step3VLForConditionalGeneration` | `Step3-VL` | ✅︎ | ❌︎ |
+
 !!! note
     Encoder CUDA Graphs have currently been tested with `--mm-encoder-attn-backend=FLASH_ATTN` and `--mm-encoder-attn-backend=FLASHINFER` on Blackwell GPUs.
+    For Qwen2-VL and Qwen2.5-VL only FA2 and FA3 has been tested.
 
 ## Configuration
 
@@ -80,10 +102,13 @@ Three fields in `CompilationConfig` control encoder CUDA Graphs:
 
 * `cudagraph_mm_encoder` (`bool`, default `False`) — enable CUDA Graph capture for multimodal encoder. When enabled, captures the full encoder forward as a CUDA Graph for each token budget level.
 * `encoder_cudagraph_token_budgets` (`list[int]`, default `[]`) — token budget levels for capture. If empty (default), auto-inferred from model architecture as power-of-2 levels. User-provided values override auto-inference.
-* `encoder_cudagraph_max_images_per_batch` (`int`, default `0`) — maximum number of images per batch during capture. If 0 (default), auto-inferred as `max_budget // min_budget`.
+* `encoder_cudagraph_max_vision_items_per_batch` (`int`, default `0`) — maximum number of images/videos per batch during capture. If 0 (default), auto-inferred as `max_budget // min_budget`.
+* `encoder_cudagraph_max_frames_per_batch` (`int`, default `None`) — maximum number of video frames per batch during capture. If `None` (default), auto-inferred as `encoder_cudagraph_max_vision_items_per_batch * max_frames_per_video` (`max_frames_per_video` is a model-specific value according to its `processing_info`). If we limit the video count per prompt to `0`, it will also be set to `0` (i.e., fall back to image-only mode).
 
 ## Usage guide
 
+### Image inference
+
 Enable encoder CUDA Graphs via `compilation_config`:
 
 ```bash
@@ -95,7 +120,7 @@ With explicit budgets:
 
 ```bash
 vllm serve Qwen/Qwen3-VL-32B \
-  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [2048, 4096, 8192, 13824], "encoder_cudagraph_max_images_per_batch": 8}'
+  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [2048, 4096, 8192, 13824], "encoder_cudagraph_max_vision_items_per_batch": 8}'
 ```
 
 Python example:
@@ -107,7 +132,7 @@ compilation_config = {
     "cudagraph_mm_encoder": True,
     # Optional: override auto-inferred budgets
     # "encoder_cudagraph_token_budgets": [2048, 4096, 8192, 13824],
-    # "encoder_cudagraph_max_images_per_batch": 8,
+    # "encoder_cudagraph_max_vision_items_per_batch": 8,
 }
 
 model = vllm.LLM(
@@ -118,6 +143,41 @@ model = vllm.LLM(
 
 The manager tracks hit/miss statistics and logs them periodically. A "hit" means an image was processed via CUDA Graph replay; a "miss" means eager fallback (image exceeded all budgets).
 
+### Video inference
+
+Enable encoder CUDA Graphs via `compilation_config`:
+
+```bash
+vllm serve Qwen/Qwen3-VL-32B \
+  --compilation-config '{"cudagraph_mm_encoder": true}'
+```
+
+With explicit budgets:
+
+```bash
+vllm serve Qwen/Qwen3-VL-32B \
+  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [2048, 4096, 8192, 13824], "encoder_cudagraph_max_vision_items_per_batch": 8, "encoder_cudagraph_max_frames_per_batch": 64}'
+```
+
+Python example:
+
+```python
+import vllm
+
+compilation_config = {
+    "cudagraph_mm_encoder": True,
+    # Optional: override auto-inferred budgets
+    # "encoder_cudagraph_token_budgets": [2048, 4096, 8192, 13824],
+    # "encoder_cudagraph_max_vision_items_per_batch": 8,
+    # "encoder_cudagraph_max_frames_per_batch": 64,
+}
+
+model = vllm.LLM(
+    model="Qwen/Qwen3-VL-32B",
+    compilation_config=compilation_config,
+)
+```
+
 ## About the Performance
 
 The following benchmarks were run on Blackwell GPUs (GB200) using `vllm bench mm-processor`. See [#35963](https://github.com/vllm-project/vllm/pull/35963) for full details.
@@ -140,7 +200,7 @@ vllm bench mm-processor \
   --num-prompts 3000 --num-warmups 300 \
   --max-model-len 32768 --seed 42 \
   --mm-encoder-attn-backend FLASH_ATTN \
-  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [512, 1024, 1536, 2048, 2560, 3072, 3584, 4096, 4864], "encoder_cudagraph_max_images_per_batch": 8}'
+  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [512, 1024, 1536, 2048, 2560, 3072, 3584, 4096, 4864], "encoder_cudagraph_max_vision_items_per_batch": 8}'
 ```
 
 ### Multi-GPU (4x GB200, TP=4, DP=4)
@@ -165,5 +225,8 @@ vllm bench mm-processor \
   --max-model-len 8192 --seed 42 \
   --mm-encoder-attn-backend FLASHINFER \
   --tensor-parallel-size 4 --mm-encoder-tp-mode data \
-  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [512, 1024, 1536, 2048, 2560, 3072, 3584, 4096, 4864], "encoder_cudagraph_max_images_per_batch": 8}'
+  --compilation-config '{"cudagraph_mm_encoder": true, "encoder_cudagraph_token_budgets": [512, 1024, 1536, 2048, 2560, 3072, 3584, 4096, 4864], "encoder_cudagraph_max_vision_items_per_batch": 8}'
 ```
+
+!!! note
+    Find more details about benchmarks on GPUs (A100) for video inference at [#38061](https://github.com/vllm-project/vllm/pull/38061).
diff --git a/docs/design/custom_op.md b/docs/design/custom_op.md
index 4aefeb5585fe..d2557a2281cf 100644
--- a/docs/design/custom_op.md
+++ b/docs/design/custom_op.md
@@ -138,7 +138,7 @@ For example:
 
 --8<-- "vllm/model_executor/models/transformers/moe.py:transformers_fused_moe"
 
---8<-- "vllm/model_executor/layers/fused_moe/fused_moe.py:grouped_topk"
+--8<-- "vllm/model_executor/layers/fused_moe/router/grouped_topk_router.py:grouped_topk"
 ```
 
 **9. Norm:**
diff --git a/docs/design/debug_vllm_compile.md b/docs/design/debug_vllm_compile.md
index fbee9f4c3e3e..7edda6fa6476 100644
--- a/docs/design/debug_vllm_compile.md
+++ b/docs/design/debug_vllm_compile.md
@@ -5,12 +5,14 @@ TL;DR:
 - use tlparse to acquire torch.compile logs. Include these logs in bug reports and/or support asks.
 - The vLLM-torch.compile integration is multiple pieces. vLLM exposes flags to turn off each piece:
 
-| Online Flag | Offline Flag | Result |
-| ----------- | ------------ | ------ |
-| --enforce-eager | enforce_eager=True | Turn off torch.compile and CUDAGraphs |
-| -cc.mode=0 | mode=CompilationMode.NONE | Turn off torch.compile only |
-| -cc.cudagraph_mode=NONE | compilation_config=CompilationConfig(cudagraph_mode=CUDAGraphMode.NONE) | Turn off CUDAGraphs only |
-| -cc.backend=eager | compilation_config=CompilationConfig(backend='eager') | Turn off TorchInductor |
+| Online Flag                    | Offline Flag                                                                   | Result                                               |
+|--------------------------------|--------------------------------------------------------------------------------|------------------------------------------------------|
+| --enforce-eager                | enforce_eager=True                                                             | Turn off torch.compile and CUDAGraphs                |
+| -cc.mode=0                     | compilation_config=CompilationConfig(mode=CompilationMode.NONE)                | Turn off torch.compile only                          |
+| -cc.mode=1                     | compilation_config=CompilationConfig(mode=CompilationMode.STOCK_TORCH_COMPILE) | Turn off vLLM-compile modifications to torch.compile |
+| -cc.cudagraph_mode=NONE        | compilation_config=CompilationConfig(cudagraph_mode=CUDAGraphMode.NONE)        | Turn off CUDAGraphs only                             |
+| -cc.backend=eager              | compilation_config=CompilationConfig(backend='eager')                          | Turn off TorchInductor                               |
+| -cc.ir_enable_torch_wrap=False | compilation_config=CompilationConfig(ir_enable_torch_wrap=False)               | Turn off vLLM IR wrapping                            |
 
 ## vLLM-torch.compile overview
 
@@ -22,7 +24,7 @@ Most notably, vLLM-compile is NOT torch.compile, it is a custom compiler built u
 
 - Given a model, we do a full graph capture via TorchDynamo that is dynamic on the batch size (number of tokens)
 - vLLM then optionally splits and/or specializes this graph and then uses TorchInductor to compile each graph into a compiled artifact.
-This step may use vLLM custom Inductor passes to further optimize the graph.
+This step may use vLLM custom Inductor passes to further optimize the graph. This includes vLLM IR lowering to remove dispatch overhead.
 - The compiled artifact is saved to vLLM's compile cache so that it can be loaded in the future.
 - vLLM applies CUDAGraphs to reduce CPU overheads.
 
@@ -34,6 +36,7 @@ For more details on the design, please see the following resources:
 
 - [Introduction to vLLM-torch.compile blogpost](https://blog.vllm.ai/2025/08/20/torch-compile.html)
 - [vLLM-torch.compile integration design](./torch_compile.md)
+- [vLLM IR design](./vllm_ir.md)
 - [vLLM Office Hours #26](https://www.youtube.com/live/xLyxc7hxCJc?si=Xulo9pe53C6ywf0V&t=561)
 - [Talk at PyTorch Conference 2025](https://youtu.be/1wV1ESbGrVQ?si=s1GqymUfwiwOrDTg&t=725)
 
@@ -117,6 +120,21 @@ from vllm.config.compilation import CompilationConfig, CUDAGraphMode
 LLM(model, compilation_config=CompilationConfig(cudagraph_mode=CUDAGraphMode.NONE))
 ```
 
+vLLM IR makes heavy use of the compilation pipeline, from functionalization, custom fusions, and lowering.
+To turn that off and capture eager-mode dispatching behavior of vLLM IR, run with `ir_enable_torch_wrap=False`.
+IR torch wrap is only enabled by default when using `mode=VLLM_COMPILE` and `backend="inductor"` (default).
+
+```sh
+# Online
+vllm serve -cc.ir_enable_torch_wrap=False
+```
+
+```py
+# Offline
+from vllm.config.compilation import CompilationConfig
+LLM(model, compilation_config=CompilationConfig(ir_enable_torch_wrap=False))
+```
+
 ## Debugging TorchDynamo
 
 vLLM requires model code be capturable into a full graph via TorchDynamo (torch.compile's frontend).
diff --git a/docs/design/fusions.md b/docs/design/fusions.md
index 28a29a7f3516..046c509d4b82 100644
--- a/docs/design/fusions.md
+++ b/docs/design/fusions.md
@@ -21,7 +21,9 @@ or just on the low or high end.
 | Fusion                                                                         | `PassConfig` flag            | Fused operations                               | Default at                     | E2E Speedup        | Fullgraph | `num_tokens` |
 | ------------------------------------------------------------------------------ | ---------------------------- | ---------------------------------------------- | ------------------------------ | ------------------ | --------- | ------------ |
 | [AllReduce + RMSNorm](#allreduce--rmsnorm-fuse_allreduce_rms)                  | `fuse_allreduce_rms`         | All-reduce → RMSNorm (+residual_add) (→ quant) | O2 (Hopper/Blackwell + TP > 1) | 5-20%              | No        | Low          |
+| [MiniMax QK Norm](#minimax-qk-norm-fuse_minimax_qk_norm)                       | `fuse_minimax_qk_norm`       | Q/K variance all-reduce → Q/K RMSNorm          | Off by default                 | 2-3%               | No        | Low          |
 | [Attention + Quant](#attention--quantization-fuse_attn_quant)                  | `fuse_attn_quant`            | Attention output → FP8/NVFP4 quant             | Off by default                 | 3-7%               | Yes       | Always       |
+| [MLA Attention + Quant](#attention--quantization-fuse_attn_quant)              | `fuse_attn_quant`            | MLA Attention output → FP8/NVFP4 quant         | Off by default                 | TBD                | Yes       | Always       |
 | [RoPE + KV-Cache Update](#rope--kv-cache-update-fuse_rope_kvcache)             | `fuse_rope_kvcache`          | Rotary embedding → KV cache write              | O2 (ROCm/AITER only)           | 2-4%               | No        | Low          |
 | [QK Norm + RoPE](#qk-norm--rope-enable_qk_norm_rope_fusion)                    | `enable_qk_norm_rope_fusion` | Q/K RMSNorm → rotary embedding                 | Off by default                 | 2-3%               | No        | Low          |
 | [Sequence Parallelism](#sequence-parallelism-enable_sp)                        | `enable_sp`                  | AllReduce → ReduceScatter + AllGather          | Off by default                 | Prereq for AsyncTP | Yes       | High         |
@@ -29,6 +31,7 @@ or just on the low or high end.
 | [RMSNorm + Quant](#rmsnorm--quantization-fuse_norm_quant)                      | `fuse_norm_quant`            | RMSNorm (+residual add) → FP8/FP4 quant        | O1 (conditional)               | 1-4%               | No        | Always       |
 | [SiLU+Mul + Quant](#silumul--quantization-fuse_act_quant)                      | `fuse_act_quant`             | SiLU+Mul activation → FP8/FP4 quant            | O1 (conditional)               | 1-4%               | No        | Always       |
 | [RMSNorm + Padding](#rmsnorm--padding-fuse_act_padding)                        | `fuse_act_padding`           | Residual add + RMSNorm → padding               | O1 (ROCm/AITER only)           | TBD                | No        | Always       |
+| [MLA Dual RMSNorm](#mla-dual-rmsnorm-fuse_mla_dual_rms_norm)                   | `fuse_mla_dual_rms_norm`     | Paired Q + KV RMSNorm → single kernel          | O1 (ROCm/AITER only)           | ~2%                | No        | Always       |
 
 ## Support Matrix
 
@@ -39,19 +42,25 @@ The table below lists the quantization schemes supported by each fusion on each
 | Fusion                       | SM100 (Blackwell)                        | SM90 (Hopper)                            | SM89 (Ada)                               | SM80 (Ampere) | ROCm                                     |
 | ---------------------------- | ---------------------------------------- | ---------------------------------------- | ---------------------------------------- | ------------- | ---------------------------------------- |
 | `fuse_allreduce_rms`         | FP16/BF16, FP8 static, NVFP4             | FP16/BF16, FP8 static                    | —                                        | —             | —                                        |
+| `fuse_minimax_qk_norm`\*     | FP16/BF16                                | FP16/BF16                                | FP16/BF16                                | FP16/BF16     | —                                        |
 | `fuse_attn_quant`\*          | FP8 static\*, NVFP4\*                    | FP8 static\*                             | FP8 static\*                             | —             | FP8 static\*                             |
+| `fuse_attn_quant` (MLA)\*    | FP8 static\*, FP8 per-group\*, NVFP4\*   | FP8 static\*, FP8 per-group\*            | FP8 static\*, FP8 per-group\*            | —             | FP8 static\* (untested)                  |
 | `fuse_rope_kvcache`          | —                                        | —                                        | —                                        | —             | FP16/BF16                                |
 | `enable_qk_norm_rope_fusion` | FP16/BF16                                | FP16/BF16                                | FP16/BF16†                               | FP16/BF16†    | —                                        |
 | `enable_sp`                  | FP16/BF16, FP8 static†                   | FP16/BF16, FP8 static                    | FP16/BF16†                               | FP16/BF16†    | —                                        |
 | `fuse_gemm_comms`            | FP16/BF16, FP8 static†                   | FP16/BF16, FP8 static                    | FP16/BF16†                               | FP16/BF16†    | —                                        |
 | `fuse_norm_quant`            | FP8 static, FP8 per-token, FP8 per-group | FP8 static, FP8 per-token, FP8 per-group | FP8 static, FP8 per-token, FP8 per-group | —             | FP8 static, FP8 per-token, FP8 per-group |
-| `fuse_act_quant`             | FP8 static, NVFP4                        | FP8 static                               | FP8 static                               | —             | FP8 per-group                            |
+| `fuse_act_quant`             | FP8 static, NVFP4                        | FP8 static, FP8 per-group (128/64)       | FP8 static, FP8 per-group (128/64)       | —             | FP8 per-group                            |
 | `fuse_act_padding`           | —                                        | —                                        | —                                        | —             | FP16/BF16                                |
+| `fuse_mla_dual_rms_norm`     | —                                        | —                                        | —                                        | —             | BF16                                     |
 
 \* `fuse_attn_quant` support depends on the attention backend in use; not all backends support
 fused quantization output. See the [`fuse_attn_quant` section](#attention--quantization-fuse_attn_quant)
 for per-backend details.
 
+\* `fuse_minimax_qk_norm` is a model-specific pass for `MiniMaxM2ForCausalLM`. It also requires
+tensor parallelism (`tp_size > 1`) and the CUDA custom op `minimax_allreduce_rms_qk`.
+
 † `enable_sp` and `fuse_gemm_comms` are only autoconfigured for SM90 today;
 other architectures support requires setting `PassConfig.sp_min_token_num` explicitly.
 SM100 support also requires setting `VLLM_DISABLED_KERNELS=FlashInferFP8ScaledMMLinearKernel`.
@@ -129,7 +138,8 @@ on SM90/SM100) and configurable via `PassConfig.fi_allreduce_fusion_max_size_mb`
     explicitly. It requires the full model graph to be visible (Inductor partition or `splitting_ops=[]`).
 
 **What it fuses.** Fuses the attention output quantization directly after the attention computation,
-eliminating a full-precision memory round-trip of the attention output. Patterns covered:
+eliminating a full-precision memory round-trip of the attention output. This fusion supports both
+standard `Attention` and `MLAAttention` (used by DeepSeek-V2/V3/R1 models). Patterns covered:
 
 `Attention → FP8 static quant`:
 
@@ -142,11 +152,24 @@ eliminating a full-precision memory round-trip of the attention output. Patterns
 
 - `FLASHINFER`: CUDA sm100+ with FlashInfer installed
 
+`MLAAttention → FP8 static, FP8 per-group, NVFP4 dynamic quant`
+
+The MLA fusion operates at the graph level on the `unified_mla_attention_with_output` op and works
+with all MLA decode and prefill backend combinations. Unlike standard `Attention` backends (where
+the kernel writes FP8 output directly), no MLA prefill or decode backend currently supports direct
+FP8/FP4 output. The fusion writes to an intermediate buffer and quantizes in a separate step, so
+there is no memory round-trip elimination yet.
+
+!!! info
+    The MLA attention fusion is not expected to yield a measurable speedup yet.
+    This will improve once MLA prefill/decode kernels support direct FP8/FP4 output.
+
 Other attention backends do not support fused output quantization yet.
 
 **Code locations.**
 
-- Pass: [`vllm/compilation/passes/fusion/attn_quant_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/attn_quant_fusion.py)
+- Pass (Attention): [`vllm/compilation/passes/fusion/attn_quant_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/attn_quant_fusion.py)
+- Pass (MLAAttention): [`vllm/compilation/passes/fusion/mla_attn_quant_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/mla_attn_quant_fusion.py)
 - Attention backends: [`vllm/v1/attention/backends/`](https://github.com/vllm-project/vllm/blob/main/vllm/v1/attention/backends/)
 
 ### RoPE + KV-Cache Update (`fuse_rope_kvcache`)
@@ -168,6 +191,35 @@ If these conditions are set, the fusion is enabled automatically for optimizatio
 
 - Pass: [`vllm/compilation/passes/fusion/rope_kvcache_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/rope_kvcache_fusion.py)
 
+### MiniMax QK Norm (`fuse_minimax_qk_norm`)
+
+!!! info
+    This is a MiniMax-specific compile pass. It is currently only enabled when all of the following hold:
+    the model architecture is `MiniMaxM2ForCausalLM`, tensor parallelism is enabled (`tp_size > 1`),
+    and the CUDA custom op `minimax_allreduce_rms_qk` is available. It is not enabled by default at any
+    optimization level.
+
+**What it fuses.** Fuses the MiniMax M2 Q/K normalization path that performs an all-reduce over the
+per-token Q/K variances before applying RMS normalization to Q and K.
+
+This pass is distinct from [`enable_qk_norm_rope_fusion`](#qk-norm--rope-enable_qk_norm_rope_fusion):
+`fuse_minimax_qk_norm` targets MiniMax M2's tensor-parallel all-reduce + RMSNorm sequence, while
+`enable_qk_norm_rope_fusion` targets the later Q/K RMSNorm + RoPE sequence used by several other models.
+
+Example:
+
+```bash
+vllm serve MiniMaxAI/MiniMax-M2.5 \
+  --tensor-parallel-size 4 \
+  --compilation-config '{"mode": 3, "pass_config": {"fuse_minimax_qk_norm": true}}'
+```
+
+**Code locations.**
+
+- Pass: [`vllm/compilation/passes/fusion/minimax_qk_norm_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/minimax_qk_norm_fusion.py)
+- CUDA op: [`csrc/minimax_reduce_rms_kernel.cu`](https://github.com/vllm-project/vllm/blob/main/csrc/minimax_reduce_rms_kernel.cu) (`minimax_allreduce_rms_qk`)
+- Workspace helper: [`vllm/model_executor/layers/mamba/lamport_workspace.py`](https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/mamba/lamport_workspace.py)
+
 ### Sequence Parallelism (`enable_sp`)
 
 **What it fuses.** Replaces all-reduce collectives with reduce-scatter + local RMSNorm + all-gather,
@@ -305,6 +357,7 @@ Note that AITER fusions are in a separate pass in `vllm.compilation.passes.fusio
 Supported quantization scheme/hardware combinations:
 
 - FP8 static per-tensor: CUDA & HIP kernel
+- FP8 dynamic per-group (128/64): CUDA kernel (sm89+, not active when DeepGemm is used on sm100+)
 - NVFP4 dynamic: CUDA sm100+ only with FlashInfer
 - FP8 per-token-group (128): ROCm AITER only
 
@@ -313,6 +366,7 @@ Supported quantization scheme/hardware combinations:
 - Pass: [`vllm/compilation/passes/fusion/act_quant_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/act_quant_fusion.py)
 - ROCm AITER pass: [`vllm/compilation/passes/fusion/rocm_aiter_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/rocm_aiter_fusion.py)
 - CUDA/HIP kernels: [`csrc/quantization/`](https://github.com/vllm-project/vllm/blob/main/csrc/quantization/)
+- Fused SiLU+Mul+BlockQuant kernel: [`csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu`](https://github.com/vllm-project/vllm/blob/main/csrc/quantization/fused_kernels/fused_silu_mul_block_quant.cu)
 
 ### RMSNorm + Padding (`fuse_act_padding`)
 
@@ -329,6 +383,44 @@ when the hidden size is 2880 and AITER Triton GEMMs *not* enabled.
 
 - Pass: [`vllm/compilation/passes/fusion/rocm_aiter_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/rocm_aiter_fusion.py) (`RocmAiterTritonAddRMSNormPadFusionPass`)
 
+### MLA Dual RMSNorm (`fuse_mla_dual_rms_norm`)
+
+!!! info
+    ROCm/AITER-only. Targeted at DeepSeek-V3 / Kimi-K2 MLA attention.
+
+!!! note
+    When the native implementation of `rms_norm` is used (the default on CUDA and
+    ROCm for now), Inductor's built-in fusion already handles merging these norms
+    automatically. This explicit pass targets the case where AITER's custom
+    `rms_norm` op is active, which Inductor cannot fuse on its own.
+
+**What it fuses.** Fuses the paired `q_a_layernorm` and `kv_a_layernorm` RMS norm
+operations in MLA attention into a single `fused_qk_rmsnorm` HIP kernel call via AITER,
+reducing kernel launch overhead from 2 launches to 1 per MLA layer.
+
+```text
+# Unfused:
+q_c, kv_lora = split(projected, [q_dim, kv_dim])
+kv_c, k_pe   = split(kv_lora,  [kv_c_dim, k_pe_dim])
+q_c  = rms_norm(q_c,  q_weight,  eps)
+kv_c = rms_norm(kv_c, kv_weight, eps)
+
+# Fused:
+q_c, kv_lora = split(projected, [q_dim, kv_dim])
+kv_c, k_pe   = split(kv_lora,  [kv_c_dim, k_pe_dim])
+q_normed, kv_normed = fused_mla_dual_rms_norm(
+    q_c, q_weight, kv_c, kv_weight, eps1, eps2)
+```
+
+Requires: AMD ROCm with AITER enabled. Enabled by default at optimization level O1 and above
+when AITER is available.
+
+**Code locations.**
+
+- Pass: [`vllm/compilation/passes/fusion/rocm_aiter_fusion.py`](https://github.com/vllm-project/vllm/blob/main/vllm/compilation/passes/fusion/rocm_aiter_fusion.py) (`MLADualRMSNormFusionPass`)
+- Custom op: [`vllm/_aiter_ops.py`](https://github.com/vllm-project/vllm/blob/main/vllm/_aiter_ops.py) (`fused_mla_dual_rms_norm`)
+- AITER kernel: [`fused_qk_rmsnorm`](https://github.com/ROCm/aiter/pull/2442)
+
 ## See Also
 
 - [Optimization Levels](optimization_levels.md) — high-level presets that set
diff --git a/docs/design/huggingface_integration.md b/docs/design/huggingface_integration.md
index 1109abf6cb93..c3937d15db89 100644
--- a/docs/design/huggingface_integration.md
+++ b/docs/design/huggingface_integration.md
@@ -21,7 +21,7 @@ Let's say we want to serve the popular Qwen model by running `vllm serve Qwen/Qw
 
 Beyond that, there are two more things vLLM depends on Hugging Face for.
 
-1. **Tokenizer**: vLLM uses the tokenizer from Hugging Face to tokenize the input text. The tokenizer is loaded using [AutoTokenizer.from_pretrained](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained) with the `model` argument as the model name and the `--revision` argument as the revision. It is also possible to use a tokenizer from another model by specifying the `--tokenizer` argument in the `vllm serve` command. Other relevant arguments are `--tokenizer-revision` and `--tokenizer-mode`. Please check Hugging Face's documentation for the meaning of these arguments. This part of the logic can be found in the [get_tokenizer](https://github.com/vllm-project/vllm/blob/127c07480ecea15e4c2990820c457807ff78a057/vllm/transformers_utils/tokenizer.py#L87) function. After obtaining the tokenizer, notably, vLLM will cache some expensive attributes of the tokenizer in [vllm.tokenizers.hf.get_cached_tokenizer][].
+1. **Tokenizer**: vLLM uses the tokenizer from Hugging Face to tokenize the input text. The tokenizer is loaded using [AutoTokenizer.from_pretrained](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained) with the `model` argument as the model name and the `--revision` argument as the revision. It is also possible to use a tokenizer from another model by specifying the `--tokenizer` argument in the `vllm serve` command. Other relevant arguments are `--tokenizer-revision` and `--tokenizer-mode`. Setting `VLLM_USE_FASTOKENS=1` swaps in a drop-in Rust BPE backend for any HF fast tokenizer loaded by vLLM (see [fastokens Backend](../configuration/optimization.md#fastokens-backend)). Please check Hugging Face's documentation for the meaning of these arguments. This part of the logic can be found in the [get_tokenizer](https://github.com/vllm-project/vllm/blob/127c07480ecea15e4c2990820c457807ff78a057/vllm/transformers_utils/tokenizer.py#L87) function. After obtaining the tokenizer, notably, vLLM will cache some expensive attributes of the tokenizer in [vllm.tokenizers.hf.get_cached_tokenizer][].
 
 2. **Model weight**: vLLM downloads the model weight from the Hugging Face model hub using the `model` argument as the model name and the `--revision` argument as the revision. vLLM provides the argument `--load-format` to control what files to download from the model hub. By default, it will try to load the weights in the safetensors format and fall back to the PyTorch bin format if the safetensors format is not available. We can also pass `--load-format dummy` to skip downloading the weights.
     - It is recommended to use the safetensors format, as it is efficient for loading in distributed inference and also safe from arbitrary code execution. See the [documentation](https://huggingface.co/docs/safetensors/en/index) for more information on the safetensors format. This part of the logic can be found [here](https://github.com/vllm-project/vllm/blob/10b67d865d92e376956345becafc249d4c3c0ab7/vllm/model_executor/model_loader/loader.py#L385). Please note that:
diff --git a/docs/design/lora_resolver_plugins.md b/docs/design/lora_resolver_plugins.md
index ad644cbc50aa..019c4eef93d9 100644
--- a/docs/design/lora_resolver_plugins.md
+++ b/docs/design/lora_resolver_plugins.md
@@ -62,8 +62,7 @@ The filesystem resolver is installed with vLLM by default and enables loading Lo
 3. **Start vLLM server**:
    Your base model can be `meta-llama/Llama-2-7b-hf`. Please make sure you set up the Hugging Face token in your env var `export HF_TOKEN=xxx235`.
    ```bash
-   python -m vllm.entrypoints.openai.api_server \
-       --model your-base-model \
+   vllm serve your-base-model \
        --enable-lora
    ```
 
diff --git a/docs/design/metrics.md b/docs/design/metrics.md
index be917c0dc614..0ae420399767 100644
--- a/docs/design/metrics.md
+++ b/docs/design/metrics.md
@@ -42,7 +42,7 @@ These are documented under [Inferencing and Serving -> Production Metrics](../us
 
 ### Grafana Dashboard
 
-vLLM also provides [a reference example](../../examples/online_serving/prometheus_grafana/README.md) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
+vLLM also provides [a reference example](../../examples/observability/prometheus_grafana/README.md) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
 
 The subset of metrics exposed in the Grafana dashboard gives us an indication of which metrics are especially important:
 
@@ -657,7 +657,7 @@ vLLM has support for OpenTelemetry tracing:
 - Added by <https://github.com/vllm-project/vllm/pull/4687> and reinstated by <https://github.com/vllm-project/vllm/pull/20372>
 - Configured with `--oltp-traces-endpoint` and `--collect-detailed-traces`
 - [OpenTelemetry blog post](https://opentelemetry.io/blog/2024/llm-observability/)
-- [User-facing docs](../../examples/online_serving/opentelemetry/README.md)
+- [User-facing docs](../../examples/observability/opentelemetry/README.md)
 - [Blog post](https://medium.com/@ronen.schaffer/follow-the-trail-supercharging-vllm-with-opentelemetry-distributed-tracing-aa655229b46f)
 - [IBM product docs](https://www.ibm.com/docs/en/instana-observability/current?topic=mgaa-monitoring-large-language-models-llms-vllm-public-preview)
 
diff --git a/docs/design/moe_kernel_features.md b/docs/design/moe_kernel_features.md
index 03d25a9b1cbf..1a11c6685a45 100644
--- a/docs/design/moe_kernel_features.md
+++ b/docs/design/moe_kernel_features.md
@@ -36,7 +36,7 @@ th {
 | deepep_high_throughput | standard | fp8 | G(128),A,T<sup>2</sup> | Y | Y | [`DeepEPHTPrepareAndFinalize`][vllm.model_executor.layers.fused_moe.prepare_finalize.deepep_ht.DeepEPHTPrepareAndFinalize] |
 | deepep_low_latency | batched | fp8 | G(128),A,T<sup>3</sup> | Y | Y | [`DeepEPLLPrepareAndFinalize`][vllm.model_executor.layers.fused_moe.prepare_finalize.deepep_ll.DeepEPLLPrepareAndFinalize] |
 | flashinfer_nvlink_two_sided | standard | nvfp4,fp8 | G,A,T | N | N | [`FlashInferNVLinkTwoSidedPrepareAndFinalize`][vllm.model_executor.layers.fused_moe.prepare_finalize.flashinfer_nvlink_two_sided.FlashInferNVLinkTwoSidedPrepareAndFinalize] |
-| flashinfer_nvlink_one_sided | standard | nvfp4 | G,A,T | N | N | [`FlashInferNVLinkOneSidedPrepareAndFinalize`][vllm.model_executor.layers.fused_moe.prepare_finalize.flashinfer_nvlink_one_sided.FlashInferNVLinkOneSidedPrepareAndFinalize] |
+| flashinfer_nvlink_one_sided | standard | nvfp4,bf16,mxfp8 | G,A,T | N | N | [`FlashInferNVLinkOneSidedPrepareAndFinalize`][vllm.model_executor.layers.fused_moe.prepare_finalize.flashinfer_nvlink_one_sided.FlashInferNVLinkOneSidedPrepareAndFinalize] |
 
 !!! info "Table key"
     1. All types: mxfp4, nvfp4, int4, int8, fp8
@@ -57,9 +57,9 @@ Modular kernels are supported by the following `FusedMoEMethodBase` classes.
 
 - [`ModelOptFp8MoEMethod`][vllm.model_executor.layers.quantization.modelopt.ModelOptFp8MoEMethod]
 - [`Fp8MoEMethod`][vllm.model_executor.layers.quantization.fp8.Fp8MoEMethod]
-- [`CompressedTensorsW4A4Nvfp4MoEMethod`][vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW4A4Nvfp4MoEMethod]
-- [`CompressedTensorsW8A8Fp8MoEMethod`][vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoEMethod]
-- [`Mxfp4MoEMethod`][vllm.model_executor.layers.quantization.mxfp4.Mxfp4MoEMethod]
+- [`CompressedTensorsW4A4Nvfp4MoEMethod`][vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.compressed_tensors_moe_w4a4_nvfp4.CompressedTensorsW4A4Nvfp4MoEMethod]
+- [`CompressedTensorsW8A8Fp8MoEMethod`][vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.compressed_tensors_moe_w8a8_fp8.CompressedTensorsW8A8Fp8MoEMethod]
+- [`GptOssMxfp4MoEMethod`][vllm.model_executor.layers.quantization.mxfp4.GptOssMxfp4MoEMethod]
 - [`UnquantizedFusedMoEMethod`][vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod]
 
 ## Fused Experts Kernels
@@ -80,18 +80,18 @@ To be used with a particular `FusedMoEPrepareAndFinalizeModular` subclass, MoE k
 
 | Kernel | Input act. format | Quant. types | Quant. format | Activation function | Apply Weight On Input | Modular | Source |
 | ------ | ----------------- | ------------ | ------------- | ------------------- | --------------------- | ------- | ------ |
-| triton | standard | all<sup>1</sup> | G,A,T | silu, gelu,</br>swigluoai,</br>silu_no_mul,</br>gelu_no_mul | Y | Y | [`fused_experts`][vllm.model_executor.layers.fused_moe.fused_moe.fused_experts],</br>[`TritonExperts`][vllm.model_executor.layers.fused_moe.fused_moe.TritonExperts] |
-| triton (batched) | batched | all<sup>1</sup> | G,A,T | silu, gelu | <sup>6</sup> | Y | [`BatchedTritonExperts`][vllm.model_executor.layers.fused_moe.fused_batched_moe.BatchedTritonExperts] |
-| deep gemm | standard,</br>batched | fp8 | G(128),A,T | silu, gelu | <sup>6</sup> | Y | </br>[`DeepGemmExperts`][vllm.model_executor.layers.fused_moe.deep_gemm_moe.DeepGemmExperts],</br>[`BatchedDeepGemmExperts`][vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe.BatchedDeepGemmExperts] |
-| cutlass_fp4 | standard,</br>batched | nvfp4 | A,T | silu | Y | Y | [`CutlassExpertsFp4`][vllm.model_executor.layers.fused_moe.cutlass_moe.CutlassExpertsFp4] |
-| cutlass_fp8 | standard,</br>batched | fp8 | A,T | silu, gelu | Y | Y | [`CutlassExpertsFp8`][vllm.model_executor.layers.fused_moe.cutlass_moe.CutlassExpertsFp8],</br>[`CutlasBatchedExpertsFp8`][vllm.model_executor.layers.fused_moe.cutlass_moe.CutlassBatchedExpertsFp8] |
-| flashinfer | standard | nvfp4,</br>fp8 | T | <sup>5</sup> | N | Y | [`FlashInferExperts`][vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe.FlashInferExperts] |
-| gpt oss triton | standard | N/A | N/A | <sup>5</sup> | Y | Y | [`triton_kernel_fused_experts`][vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe.triton_kernel_fused_experts],</br>[`OAITritonExperts`][vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe.OAITritonExperts] |
-| marlin | standard,</br>batched | <sup>3</sup> / N/A | <sup>3</sup> / N/A | silu,</br>swigluoai | Y | Y | [`fused_marlin_moe`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.fused_marlin_moe],</br>[`MarlinExperts`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.MarlinExperts],</br>[`BatchedMarlinExperts`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.BatchedMarlinExperts] |
+| triton | standard | all<sup>1</sup> | G,A,T | silu, gelu,</br>swigluoai,</br>silu_no_mul,</br>gelu_no_mul | Y | Y | [`fused_experts`][vllm.model_executor.layers.fused_moe.fused_moe.fused_experts],</br>[`TritonExperts`][vllm.model_executor.layers.fused_moe.experts.triton_moe.TritonExperts] |
+| triton (batched) | batched | all<sup>1</sup> | G,A,T | silu, gelu | <sup>6</sup> | Y | [`BatchedTritonExperts`][vllm.model_executor.layers.fused_moe.experts.fused_batched_moe.BatchedTritonExperts] |
+| deep gemm | standard,</br>batched | fp8 | G(128),A,T | silu, gelu | <sup>6</sup> | Y | </br>[`DeepGemmExperts`][vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe.DeepGemmExperts],</br>[`BatchedDeepGemmExperts`][vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe.BatchedDeepGemmExperts] |
+| cutlass_fp4 | standard,</br>batched | nvfp4 | A,T | silu | Y | Y | [`CutlassExpertsFp4`][vllm.model_executor.layers.fused_moe.experts.cutlass_moe.CutlassExpertsFp4] |
+| cutlass_fp8 | standard,</br>batched | fp8 | A,T | silu, gelu | Y | Y | [`CutlassExpertsFp8`][vllm.model_executor.layers.fused_moe.experts.cutlass_moe.CutlassExpertsFp8],</br>[`CutlasBatchedExpertsFp8`][vllm.model_executor.layers.fused_moe.experts.cutlass_moe.CutlassBatchedExpertsFp8] |
+| flashinfer | standard | nvfp4,</br>fp8 | T | <sup>5</sup> | N | Y | [`FlashInferExperts`][vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe.FlashInferExperts] |
+| gpt oss triton | standard | N/A | N/A | <sup>5</sup> | Y | Y | [`triton_kernel_fused_experts`][vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe.triton_kernel_fused_experts],</br>[`OAITritonExperts`][vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe.OAITritonExperts] |
+| marlin | standard,</br>batched | <sup>3</sup> / N/A | <sup>3</sup> / N/A | silu,</br>swigluoai | Y | Y | [`fused_marlin_moe`][vllm.model_executor.layers.fused_moe.experts.marlin_moe.fused_marlin_moe],</br>[`MarlinExperts`][vllm.model_executor.layers.fused_moe.experts.marlin_moe.MarlinExperts],</br>[`BatchedMarlinExperts`][vllm.model_executor.layers.fused_moe.experts.marlin_moe.BatchedMarlinExperts] |
 | trtllm | standard | mxfp4,</br>nvfp4 | G(16),G(32) | <sup>5</sup> | N | Y | [`TrtLlmMxfp4ExpertsMonolithic`][vllm.model_executor.layers.fused_moe.experts.trtllm_mxfp4_moe.TrtLlmMxfp4ExpertsMonolithic],</br>[`TrtLlmMxfp4ExpertsModular`][vllm.model_executor.layers.fused_moe.experts.trtllm_mxfp4_moe.TrtLlmMxfp4ExpertsModular],</br>[`TrtLlmNvFp4ExpertsMonolithic`][vllm.model_executor.layers.fused_moe.experts.trtllm_nvfp4_moe.TrtLlmNvFp4ExpertsMonolithic],</br>[`TrtLlmNvfp4ExpertsModular`][vllm.model_executor.layers.fused_moe.experts.trtllm_nvfp4_moe.TrtLlmNvFp4ExpertsModular] |
 | rocm aiter moe | standard | mxfp4,</br>fp8 | G(32),G(128),A,T | silu, gelu,</br>swigluoai | Y | N | `rocm_aiter_fused_experts`,</br>`AiterExperts` |
 | cpu_fused_moe | standard | N/A | N/A | silu | N | N | [`CPUFusedMOE`][vllm.model_executor.layers.fused_moe.cpu_fused_moe.CPUFusedMOE] |
-| naive batched<sup>4</sup> | batched | int8,</br>fp8 | G,A,T | silu, gelu | <sup>6</sup> | Y | [`NaiveBatchedExperts`][vllm.model_executor.layers.fused_moe.fused_batched_moe.NaiveBatchedExperts] |
+| naive batched<sup>4</sup> | batched | int8,</br>fp8 | G,A,T | silu, gelu | <sup>6</sup> | Y | [`NaiveBatchedExperts`][vllm.model_executor.layers.fused_moe.experts.fused_batched_moe.NaiveBatchedExperts] |
 
 !!! info "Table key"
     1. All types: mxfp4, nvfp4, int4, int8, fp8
diff --git a/docs/design/nixl_kv_cache_lease.md b/docs/design/nixl_kv_cache_lease.md
new file mode 100644
index 000000000000..a3fdaafe3453
--- /dev/null
+++ b/docs/design/nixl_kv_cache_lease.md
@@ -0,0 +1,136 @@
+# NIXL KV Cache Lease Renewal
+
+In disaggregated prefill/decode deployments, the Prefill instance (P) must hold KV cache blocks in GPU memory after completing a prefill, waiting for the Decode instance (D) to read them via RDMA. A mechanism is needed to determine when those blocks can safely be freed when D isn't able to retrieve them. This mechanism was introduced in [PR #41383](https://github.com/vllm-project/vllm/pull/41383).
+
+## Motivation
+
+### The single-timeout problem
+
+The original design used a single, large timeout (`VLLM_NIXL_ABORT_REQUEST_TIMEOUT`, default 480s) to control how long P retains KV blocks. When D crashed or disconnected, P would hold onto potentially several GBs of "dead" blocks for up to 8 minutes before reclaiming them. During this window, subsequent requests hitting P would find reduced cache capacity and experience degraded performance.
+
+### The overloading problem
+
+Simply lowering the timeout introduces a different failure mode. Under traffic surges, requests can sit in D's waiting queue for a long time before being scheduled. If the fixed timeout on P is too short, blocks get freed before D ever has a chance to read them --- causing unnecessary recomputation and wasted prefill work.
+
+### Solution: lease renewal via heartbeats
+
+The lease renewal mechanism addresses both problems simultaneously. P grants a **short initial lease** (default 30s) when prefill completes. While a request is **queued or in-flight** on D, D **periodically sends heartbeats** to P extending the lease. If D crashes and stops heartbeating, P reclaims blocks within seconds of the last heartbeat rather than waiting minutes. If D is merely overloaded, the heartbeats keep the blocks alive for as long as needed.
+
+## How It Works
+
+### Lease lifecycle
+
+When P finishes a prefill, it pins the KV blocks with an initial lease duration (`kv_lease_duration`, default 30s). From that point, the blocks are held until either:
+
+1. **D completes the KV transfer** --- P receives a read-completion notification and frees the blocks immediately.
+2. **D keeps heartbeating** --- each heartbeat extends the lease by `lease_duration * 2/3` (~20s), keeping blocks alive indefinitely while D is healthy.
+3. **No heartbeat arrives** --- the lease expires and P reclaims the blocks.
+
+### Piggybacking on NIXL notifications
+
+Rather than introducing a new transport channel, heartbeats reuse NIXL's existing notification system (`send_notif` / `get_new_notifs`). The notification medium is backend-specific, with automatic fallback from IB/RoCE to TCP already handled by NIXL. Each single heartbeat message sent from D to a particular P renews all requests pinned in P on behalf of that D --- in other words, a single batched message per iteration renews the lease of multiple requests.
+
+### Scheduler-side tracking (D)
+
+A critical insight is that heartbeating must start **as soon as a request enters D's scheduler** --- not when it gets scheduled for execution. Under heavy load, a request may sit in the waiting queue for much longer than the initial lease duration, and the gap between arrival and scheduling is unbounded.
+
+To achieve this, D's connector (`NixlConnectorScheduler`) hooks into the scheduler via `on_new_request()`. When a request with `do_remote_prefill=True` arrives, the connector immediately starts tracking it for heartbeats. Requests are grouped by `remote_engine_id` for efficient batching. On each scheduler step, heartbeat metadata is packaged into `NixlConnectorMetadata` and sent to the worker, throttled by a heartbeat interval of `lease_duration // 6` (~5s).
+
+Tracking stops when either the KV transfer completes (via `update_connector_output`) or the request finishes/aborts (via `request_finished`).
+
+### Timing and simplicity
+
+Heartbeat sending and processing happen **in the forward loop**, not in a background thread. This means timing is not millisecond-precise --- a long model forward pass will delay heartbeats. However, the lease durations are configured with sufficient margin: with default settings, the heartbeat interval (~5s) and lease extension (~20s) are at least an order of magnitude larger than a typical forward pass. This avoids lock complexity between threads while keeping the design simple and extensible.
+
+## Happy Path
+
+```mermaid
+sequenceDiagram
+    participant R as Routing Proxy
+    participant P as Prefill Instance
+    participant D as Decode Instance
+
+    R->>P: Request (do_remote_decode=True)
+    P->>P: Run prefill
+    P->>P: Grant lease (30s)
+    P->>R: Response (with kv_transfer_params)
+
+    R->>D: Request (do_remote_prefill=True)
+    note over D: Request enters waiting queue
+    D->>D: on_new_request() starts tracking
+
+    loop Every ~5s (heartbeat interval)
+        D->>P: Heartbeat (extend lease)
+        P->>P: Lease extended by ~20s
+    end
+
+    note over D: Request scheduled for execution
+    D->>P: KV transfer (RDMA read)
+    P-->D: Transfer complete
+    D->>D: Stop heartbeating
+    P->>P: Free KV blocks
+```
+
+## Decode Instance Crash
+
+```mermaid
+sequenceDiagram
+    participant R as Routing Proxy
+    participant P as Prefill Instance
+    participant D as Decode Instance
+
+    R->>P: Request (do_remote_decode=True)
+    P->>P: Run prefill (holds onto KVs with lease)
+    P->>R: Response
+
+    R->>D: Request (do_remote_prefill=True)
+    D->>P: Heartbeat (extend lease)
+    D->>P: Heartbeat (extend lease)
+    note over D: D crashes
+    note over P: No heartbeat received
+    P->>P: Lease expires (~20s, not 480s)
+    P->>P: Free KV blocks
+```
+
+### Worker-side sending and receiving
+
+**On D (sending):** During `start_load_kv()` (called every forward pass), the worker reads `metadata.heartbeat_by_engine` and sends batched heartbeat notifications to each remote P engine. If D hasn't yet handshaked with P for a given engine (common for requests still in the waiting queue), it triggers a **proactive handshake** in a background thread.
+The heartbeat is deferred to the next step once the handshake completes --- the early handshake also **speeds up the eventual KV transfer.**
+
+**On P (receiving):** In `_get_new_notifs()`, P's worker checks incoming NIXL notifications. Messages starting with `"HB:"` are routed to `_handle_heartbeat()`, which extends the lease expiry for each referenced request using `max(old_expiry, now + lease_extension)`. This ensures leases are never accidentally shortened.
+
+## Bidirectional KV Transfer
+
+For multi-turn conversations, [bidirectional KV transfer](../features/disagg_prefill.md) allows D to cache KV blocks that P can pull from on subsequent turns. Since the timing of the next conversational turn is **client-dependent** (not controlled by the system), the heartbeat-based lease mechanism does not apply here. Instead, a separate `decoder_kv_blocks_ttl` (default 480s) provides a simple fixed timeout for blocks cached on D. If the client takes too long to continue the conversation, the blocks expire and P recomputes. Future work may extend a symmetric heartbeat mechanism to this case.
+
+## Key Design Decisions
+
+- **Per-request leasing, not per-instance.** P has no notion of which D its KV blocks belong to --- block ownership is only resolved after prefill completes and the router selects a D. Leasing at the request level avoids coupling P/D selection in the load balancer. In practice, D batches lease extensions toward the same P by grouping requests with the same `remote_engine_id`.
+
+- **NIXL notifications as transport.** Heartbeats reuse the existing `send_notif`/`get_new_notifs` system rather than adding ZMQ connections or API changes. The notification medium is backend-specific with IB/RoCE-to-TCP fallback already handled, making heartbeats work across any NIXL-supported transport.
+
+- **No background thread.** Heartbeat sending and processing happen in the forward loop (`start_load_kv` / `get_finished`). This avoids lock complexity between threads. Lease durations provide sufficient margin over forward-pass latency (seconds vs. milliseconds).
+
+- **Proactive handshake.** When D needs to heartbeat a P engine it hasn't connected to yet (common for requests still in the waiting queue), it triggers an early handshake in a background thread. This also speeds up the eventual KV transfer.
+
+- **Heterogeneous TP support.** When P TP > D TP (e.g., P TP=4, D TP=2), a single D worker pulls from multiple P workers. Heartbeats must be sent to all P workers for a given engine. Conversely, when D TP > P TP, a single P receives notifications from multiple Ds, which simply refreshes the TTL multiple times with no downside.
+
+## Configuration
+
+The lease mechanism is controlled through `kv_connector_extra_config` in `--kv-transfer-config`:
+
+| Parameter               | Default | Description                                                                                                                                                   |
+|-------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `kv_lease_duration`     | 30s     | Initial lease duration on P. Heartbeat interval and extension amount are derived automatically (`interval = duration // 6`, `extension = duration * 2 // 3`). |
+| `decoder_kv_blocks_ttl` | 480s    | TTL for KV blocks cached on D in bidirectional transfer mode. Simple fixed timeout, not renewed via heartbeats.                                               |
+
+```bash
+vllm serve <MODEL> \
+  --kv-transfer-config '{
+    "kv_connector": "NixlConnector",
+    "kv_role": "kv_both",
+    "kv_connector_extra_config": {"kv_lease_duration": 60}
+  }'
+```
+
+For full NixlConnector configuration details, see the [NixlConnector Usage Guide](../features/nixl_connector_usage.md).
diff --git a/docs/design/optimization_levels.md b/docs/design/optimization_levels.md
index 591978b542e6..3261aed74794 100644
--- a/docs/design/optimization_levels.md
+++ b/docs/design/optimization_levels.md
@@ -16,7 +16,7 @@ User-set flags take precedence over optimization level defaults.
 
 ```bash
 # CLI usage
-python -m vllm.entrypoints.api_server --model RedHatAI/Llama-3.2-1B-FP8 -O1
+vllm serve RedHatAI/Llama-3.2-1B-FP8 -O1
 
 # Python API usage
 from vllm.entrypoints.llm import LLM
@@ -56,6 +56,7 @@ Fusions:
 - `-cc.pass_config.fuse_norm_quant=True`*
 - `-cc.pass_config.fuse_act_quant=True`*
 - `-cc.pass_config.fuse_act_padding=True`†
+- `-cc.pass_config.fuse_mla_dual_rms_norm=True`†
 
 \* These fusions are only enabled when either op is using a custom kernel, otherwise Inductor fusion is better.</br>
 † These fusions are ROCm-only and require AITER.
diff --git a/docs/design/p2p_nccl_connector.md b/docs/design/p2p_nccl_connector.md
index 4674bef8d2b6..c1de955b6ffe 100644
--- a/docs/design/p2p_nccl_connector.md
+++ b/docs/design/p2p_nccl_connector.md
@@ -88,7 +88,7 @@ pip install "vllm>=0.9.2"
 #### Proxy (e.g. 10.0.1.1)
 
 ```shell
-cd {your vllm directory}/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/
+cd {your vllm directory}/examples/disaggregated/p2p_nccl_xpyd/
 python3 disagg_proxy_p2p_nccl_xpyd.py &
 ```
 
@@ -181,7 +181,7 @@ python3 disagg_proxy_p2p_nccl_xpyd.py &
 #### Proxy (e.g. 10.0.1.1)
 
 ```shell
-cd {your vllm directory}/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/
+cd {your vllm directory}/examples/disaggregated/p2p_nccl_xpyd/
 python3 disagg_proxy_p2p_nccl_xpyd.py &
 ```
 
diff --git a/docs/design/vllm_ir.md b/docs/design/vllm_ir.md
new file mode 100644
index 000000000000..82628f3762fe
--- /dev/null
+++ b/docs/design/vllm_ir.md
@@ -0,0 +1,615 @@
+# vLLM IR: Functional Intermediate Representation
+
+## Motivation
+
+vLLM IR is a **functional intermediate representation (IR)** that fills the gap between
+low-level `torch` ops and vLLM layers like `RMSNorm` and quantization operators,
+By separating operator **semantics** from the **implementation** and **dispatching**,
+vLLM IR simplifies both compilation and kernel registration & dispatching simultaneously.
+It operates as a **dialect** in the torch FX representation, allowing full interoperability
+with “regular” torch ops & custom torch ops/kernels, as well as a piecewise migration from
+the previous `CustomOp` approach.
+
+Key design principles:
+
+- **Eager-compile consistency**: identical behavior (barring minor numerics) in eager and compiled modes
+- **Simple, transparent, yet powerful kernel selection**: good visibility and control allowing easy debugging
+- **Convention over configuration**: near-zero boilerplate required to register ops and implementations
+- **Extensibility**: ops and implementations can be registered anywhere, in-tree or out-of-tree
+- **Interoperability**: fully compatible with “regular” torch ops & custom torch ops/kernels,
+reducing developer friction and allowing piecewise migration
+
+The clean semantics/implementation separation enables a unified and extensible dispatching mechanism,
+allowing multiple kernels per-platform and powerful kernel selection. The separation also facilitates
+cleaner testing and benchmarking, removing much of the boilerplate standard for legacy approaches.
+
+By delaying kernel selection until late in the compilation process, the compiler can operate on
+a higher-level representation, which has the following main benefits:
+
+- Pattern matching in fusion/transformation passes only requires a single, simple pattern per op
+- OOT compiler backends can lower from the higher-level representation (in-progress)
+- The compiler can autotune over available implementations (future feature)
+
+## Quick Overview
+
+### Declaring an IR Operation
+
+IR operations are declared using the `@register_op` decorator with a native PyTorch implementation that defines the op's semantics:
+
+```python
+# vllm/ir/ops/layernorm.py
+from torch import Tensor
+from vllm.ir import register_op
+
+@register_op
+def rms_norm(x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None) -> Tensor:
+    """Weighted root-mean-square layer normalization"""
+    orig_dtype = x.dtype
+    x = x.to(torch.float32)
+    x_var = x if variance_size is None else x[..., :variance_size]
+    variance = x_var.pow(2).mean(dim=-1, keepdim=True)
+    x = x * torch.rsqrt(variance + epsilon)
+    x = x.to(orig_dtype)
+    if weight is not None:
+        x = x * weight
+    return x
+```
+
+The native implementation serves three purposes:
+
+1. **Semantic definition**: Specifies the exact semantics of the operation, including shapes and strides
+2. **Default implementation**: Used when no other (better) implementation is available
+3. **Reference for testing**: Other implementations must match these semantics
+
+### Registering Implementations
+
+Kernel implementations are registered using the `register_impl` decorator on the IR op object:
+
+```python
+# vllm/kernels/vllm_c.py
+from vllm import ir
+
+rms_norm_no_var = lambda x, weight, epsilon, variance_size=None: variance_size is None
+
+@ir.ops.rms_norm.register_impl("vllm_c", supports_args=rms_norm_no_var, supported=current_platform.is_cuda_alike())
+def rms_norm(x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None) -> Tensor:
+    output = torch.empty_like(x)
+    torch.ops._C.rms_norm(output, x, weight, epsilon)
+    return output
+```
+
+Implementations can specify:
+
+- `supported`: Static boolean indicating if this implementation is available
+- `supports_args`: Function checking if the implementation supports specific arguments
+- `inplace`: Whether this implementation reuses input memory for outputs
+
+### Using IR Operations in Models
+
+IR operations are imported and called directly in model code:
+
+```python
+# vllm/model_executor/layers/layernorm.py
+from vllm import ir
+
+class RMSNorm(nn.Module):
+    def __init__(self, hidden_size: int, eps: float = 1e-6):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, x: Tensor, residual: Tensor | None = None):
+        if residual is None:
+            return ir.ops.rms_norm(x, self.weight, self.variance_epsilon)
+
+        # Use maybe_inplace overload to allow implementation to reuse input memory for outputs
+        # (using x or residual after this call is undefined behavior)
+        return ir.ops.fused_add_rms_norm.maybe_inplace(
+            x, residual, self.weight, self.variance_epsilon
+        )
+```
+
+### Configuring Kernel Selection
+
+Kernel selection is controlled via priority lists in the configuration.
+Priority lists specify the order in which implementations are considered,
+with the first supported implementation being selected.
+This includes the static support check (`supported=...`) and
+the dynamic arg support check (`supports_args=...`).
+
+#### Command Line Configuration
+
+Use `--ir-op-priority.<op_name>=<provider1>,<provider2>,...`:
+
+```bash
+# CUDA: Use vllm_c implementation for rms_norm
+vllm serve meta-llama/Llama-3.2-1B \
+  --ir-op-priority.rms_norm=vllm_c
+
+# ROCm: Try aiter first, fall back to vllm_c, then native
+vllm serve meta-llama/Llama-3.2-1B \
+  --ir-op-priority.rms_norm=aiter,vllm_c,native
+
+# Configure multiple operations
+vllm serve meta-llama/Llama-3.2-1B \
+  --ir-op-priority.rms_norm=vllm_c \
+  --ir-op-priority.fused_add_rms_norm=vllm_c
+```
+
+#### Python Configuration
+
+```python
+from vllm import LLM
+from vllm.config import VllmConfig, KernelConfig
+
+llm = LLM(
+    model="meta-llama/Llama-3.2-1B",
+    vllm_config=VllmConfig(
+        kernel_config=KernelConfig(
+            ir_op_priority={
+                "rms_norm": ["vllm_c", "native"],
+                "fused_add_rms_norm": ["vllm_c", "native"],
+            }
+        )
+    )
+)
+```
+
+#### Platform Defaults
+
+Each platform provides default priority lists that are automatically applied:
+
+```python
+# CUDA/XPU/ROCm platform defaults (when compiling with Inductor)
+{
+  "rms_norm": ["native"],  # Native torch is default
+  "fused_add_rms_norm": ["native"],
+}
+
+# CUDA platform defaults (eager or Dynamo-only)
+{
+  "rms_norm": ["vllm_c", "native"],
+  "fused_add_rms_norm": ["vllm_c", "native"],
+}
+
+# ROCm platform defaults (future - currently same as CUDA)
+{
+    "rms_norm": ["aiter", "vllm_c", "native"],
+    "fused_add_rms_norm": ["aiter", "vllm_c", "native"],
+}
+
+# XPU platform defaults (eager or Dynamo-only)
+{
+    "rms_norm": ["xpu_kernels", "native"],
+    "fused_add_rms_norm": ["xpu_kernels", "native"],
+}
+```
+
+User-specified priorities are prepended to platform defaults,
+so you only need to specify the out-of-order implementations,
+other implementations are appended automatically.
+
+## Compilation Pipeline
+
+vLLM IR heavily customizes the `torch.compile`-based compilation process to allow custom compile
+passes to operate on high-level IR while still producing efficient low-level code at the end.
+The compilation pipeline consists of several stages:
+
+### 1. Dynamo Tracing
+
+When `torch.compile` traces the model's forward pass, vLLM IR operations appear as custom operations
+in the `vllm_ir` torch library. These operations are opaque to Dynamo, meaning they appear directly
+in the FX graph without decomposition:
+
+```python
+# Python code (epsilon=1e-5)
+x1 = ir.ops.rms_norm(x, weight, epsilon)
+x2, residual_out = ir.ops.fused_add_rms_norm.maybe_inplace(x1, residual, weight, epsilon)
+
+# FX graph after Dynamo tracing
+x1 = torch.ops.vllm_ir.rms_norm.default(x, weight, 1e-5); x = None
+out = torch.ops.vllm_ir.fused_add_rms_norm.maybe_inplace(x1, residual, weight, 1e-5); x1 = residual = None
+x2 = out[0]
+residual_out = out[1]
+```
+
+### 2. AOTAutograd and Functionalization
+
+AOTAutograd functionalizes the graph, converting any mutating operations to functional equivalents.
+For vLLM IR operations with `maybe_inplace` overloads, we perform this manually before AOTAutograd,
+converting them to the functional `default` overload using the pre-grad custom pass hook.
+
+```python
+# After functionalization
+x1 = torch.ops.vllm_ir.rms_norm.default(x, weight, 1e-5); x = None
+out = torch.ops.vllm_ir.fused_add_rms_norm.default(x1, residual, weight, 1e-5); x1 = residual = None
+x2 = out[0]
+residual_out = out[1]
+```
+
+The pass also tracks which inputs were "donated" (passed to `maybe_inplace`),
+storing this information in vLLM's `PassContext` for later use in clone elimination.
+
+### 3. IR Fusion and Transformation Passes
+
+After functionalization, custom vLLM passes operate on the functional FX graph containing high-level IR operations.
+These passes can perform fusion, distribute operations for sequence parallelism, and other transformations:
+
+```python
+# Example: Sequence Parallelism (see SequenceParallelismPass)
+# Before SP pass
+
+all_reduce = torch.ops.vllm.all_reduce(x, "tp:0")
+rms_norm = torch.ops.vllm_ir.rms_norm(all_reduce, weight, 1e-5)
+
+# after SP pass
+reduce_scatter = torch.ops.vllm.reduce_scatter(x, "tp:0")
+rms_norm = torch.ops.vllm_ir.rms_norm(all_reduce, weight, 1e-5)
+all_gather = torch.ops.vllm.all_gather(x, "tp:0")
+```
+
+Fusion passes benefit from the high-level representation: they don't need to match against low-level PyTorch operations,
+handle different kernel implementations separately, or deal with functionalization of custom kernels.
+
+### 4. IR Lowering
+
+The lowering pass (`VllmIRLoweringPass`) replaces each vLLM IR operation with its selected implementation.
+The implementation is chosen based on the priority list and support predicates,
+using the **fake tensors** in the graph's metadata in place of op arguments:
+
+```python
+# Implementation selection, same in eager dispatch and compile lowering
+def dispatch(*args) -> IrOpImpl:
+  for provider in priority_list:  # e.g., ["vllm_c", "native"]
+    impl = ir_op.impls[provider]
+    if not impl.supported:
+      continue
+    if impl.supports_args and not impl.supports_args(*args):
+      continue
+    return impl
+
+# make_fx uses torch.fx.symbolic_trace
+impl_graph = make_fx(selected_impl.impl_fn)
+# Replace IR op node with impl_graph's nodes
+match.replace_by_example(selected_impl.impl_fn, node.args)
+```
+
+For example, lowering `rms_norm` with the `vllm_c` implementation:
+
+```python
+# Before lowering (IR op)
+rms_norm = torch.ops.vllm_ir.rms_norm.default(x, weight, 1e-5)
+
+# After lowering (vllm_c implementation traced)
+# Note: Lowering does not currently functionalize, this will likely change in the future.
+empty =  torch.ops.aten.empty.memory_format(x.shape, ...)
+rms_norm = torch.ops._C.rms_norm(empty, x, weight, 1e-5)
+```
+
+When lowering an implementation that mutates inputs (`inplace=True`),
+the lowering pass inserts clones to preserve functional semantics:
+
+```python
+# vllm_c implementation for fused_add_rms_norm mutates its first two arguments
+# Lowered with clones for safety
+clone_default = torch.ops.aten.clone.default(x)
+clone_default_1 = torch.ops.aten.clone.default(residual)
+fused_add_rms_norm = torch.ops._C.fused_add_rms_norm.default(clone_default, clone_default_1, weight, 1e-5)
+```
+
+### 5. Clone Cleanup
+
+After lowering, the clone elimination pass (`UnsafeCloneEliminationPass`) removes unnecessary clones introduced during lowering.
+This pass is essential for achieving zero-copy behavior when using in-place kernels with `maybe_inplace`.
+The pass removes a clone if:
+
+- the cloned input is created in the graph and not used again in the graph
+- the cloned input is a graph parameter, marked as donated
+
+```python
+# After cleanup (donated inputs, no subsequent uses)
+fused_add_rms_norm = torch.ops._C.fused_add_rms_norm.default(x, residual, weight, 1e-5)
+```
+
+The combination of inplace functionalization (tracking donated inputs) and clone cleanup enables the compiler to safely
+use in-place kernels without adding redundant copies or increasing the memory usage.
+
+### 6. Inductor Optimization and Codegen
+
+After IR lowering and cleanup, the graph contains only standard PyTorch operations and platform-specific custom ops.
+Inductor then performs its standard codegen:
+
+- **Inductor lowering and pointwise fusion**: Fusing element-wise operations, reductions, etc.
+- **Memory planning**: Determining buffer allocation and reuse
+- **Kernel generation**: Generating Triton or C++ code for fused operations
+- **Autotuning**: Selecting the best kernel configurations
+
+### Pipeline Summary
+
+```text
+Model Forward Pass
+    ↓
+[Dynamo Tracing] → FX Graph with vllm_ir.* ops
+    ↓
+[Pre-grad: Inplace Functionalization] → maybe_inplace → default, track donated inputs
+    ↓
+[AOTAutograd] → Functionalization
+    ↓
+[Post-grad: IR Fusion Passes] → Fuse high-level IR ops (e.g., rms_norm + quant)
+    ↓
+[Post-grad: IR Lowering] → vllm_ir.* ops → impl ops (with clones if needed)
+    ↓
+[Post-grad: Clone Cleanup] → Remove unnecessary clones using donated input info
+    ↓
+[Inductor] → Pattern matching, fusion, memory planning, codegen
+    ↓
+Compiled Code
+```
+
+## Core vLLM IR Concepts
+
+### Operation Declaration
+
+Operations are declared with the `@register_op` decorator, which creates an `IrOp` object:
+
+```python
+@register_op(
+    name=None,           # Operation name (defaults to function name)
+    activations=None,    # List of activation parameters (defaults to params starting with 'x')
+    allow_inplace=False, # Whether to create a maybe_inplace overload
+)
+def op_name(...):
+    ...
+```
+
+**Parameters:**
+
+- `activations`: List of parameter names considered "activations" (typically consumed by `maybe_inplace`). Defaults to parameters starting with `x`.
+- `allow_inplace`: Creates a `maybe_inplace` overload for memory-efficient execution (see below).
+
+### The `maybe_inplace` Overload
+
+The `maybe_inplace` overload is a critical feature for memory efficiency in LLM inference.
+It signals that the caller doesn't need to preserve the activation inputs after the operation,
+allowing in-place implementations to reuse input memory for outputs.
+
+#### Semantics and Usage
+
+```python
+# Standard usage: inputs are preserved
+out, res_out = ir.ops.fused_add_rms_norm(x, residual, weight, epsilon)
+# x and residual are unchanged, out and res_out are new tensors
+
+# maybe_inplace: inputs may be modified
+out, res_out = ir.ops.fused_add_rms_norm.maybe_inplace(x, residual, weight, epsilon)
+# x and residual may be modified (undefined behavior to use them after this)
+# out and res_out may alias x and residual
+```
+
+Using an activation input after passing it to `maybe_inplace` is **undefined behavior**:
+
+```python
+# WRONG: Using x after donating it
+out, res_out = ir.ops.fused_add_rms_norm.maybe_inplace(x, residual, weight, epsilon)
+result = out + x  # ERROR: x was donated!
+```
+
+If you need to preserve an input, either use the default overload or clone manually:
+
+```python
+# Option 1: Use default overload
+out, res_out = ir.ops.fused_add_rms_norm(x, residual, weight, epsilon)
+result = out + x  # OK: x is preserved
+
+# Option 2: Clone before maybe_inplace
+out, res_out = ir.ops.fused_add_rms_norm.maybe_inplace(x.clone(), residual, weight, epsilon)
+result = out + x  # OK: x is preserved, clone was donated
+```
+
+#### Compilation Behavior
+
+During compilation, the inplace functionalization pass validates that donated inputs are
+not used again and converts `maybe_inplace` to the functional `default` overload:
+
+```python
+# Inplace functionalization pass (pre-grad)
+for node in graph.nodes:
+    if node.target == torch.ops.vllm_ir.fused_add_rms_norm.maybe_inplace:
+        # Check that activation inputs aren't used after this node
+        for activation_arg in activation_inputs:
+            for user in activation_arg.users:
+                if user appears after node:
+                    raise ValueError(f"Input {activation_arg} donated but used again")
+
+        # Convert to default overload
+        node.target = torch.ops.vllm_ir.fused_add_rms_norm.default
+
+        # Track donated graph inputs for later clone elimination
+        for i, arg in enumerate(node.args):
+            if arg.op == "placeholder" and i in activation_indices:
+                pass_context.donated_input_ids.add(node_to_idx[arg])
+```
+
+The donated input information is then used by the clone cleanup pass to eliminate
+unnecessary copies when in-place kernels are lowered.
+
+#### Eager Mode Behavior
+
+In eager mode (without `torch.compile`), `maybe_inplace` enables **maximally memory-efficient**
+execution by allowing the IR operation to dispatch directly to in-place implementations:
+
+```python
+# Eager dispatch logic for maybe_inplace
+impl: IrOpImpl = ir_op.dispatch(*args)
+return impl.impl_fn(*args)
+
+# Eager dispatch logic for default:
+impl: IrOpImpl = ir_op.dispatch(*args)
+if impl.inplace:
+  args = [
+    arg.clone() if i in ir_op.activations else arg
+    for i, arg in enumerate(args)
+  ]
+return impl.impl_fn(*args)
+```
+
+The combination of `maybe_inplace` in model code and in-place kernel implementations provides optimal memory efficiency
+in both eager and compiled modes, with identical semantics in both cases.
+
+#### Memory Savings Example
+
+Consider a transformer layer with residual connections:
+
+```python
+# Without maybe_inplace (2 allocations per layer)
+hidden_states = self.attention(input)
+normed, residual = ir.ops.fused_add_rms_norm(hidden_states, input, weight, eps)
+# Memory: input (preserved), hidden_states (preserved), normed (new), residual (new)
+
+# With maybe_inplace (0 allocations per layer when using in-place kernel)
+hidden_states = self.attention(input)
+normed, residual = ir.ops.fused_add_rms_norm.maybe_inplace(hidden_states, input, weight, eps)
+# Memory: normed (reuses hidden_states), residual (reuses input)
+```
+
+### Implementation Registration
+
+Implementations are registered using the `register_impl` method:
+
+```python
+@ir.ops.op_name.register_impl(
+    provider="provider_name",  # Unique identifier (e.g., "vllm_c", "aiter", "triton")
+    supported=True,            # Static availability check
+    supports_args=None,        # Dynamic argument support check
+)
+def impl_fn(...):
+    ...
+```
+
+**Provider naming conventions:**
+
+- `native`: Reserved for the native torch implementation (declared with `@register_op`)
+- `vllm_c`: C++/CUDA kernels via `torch.ops._C`
+- `aiter`: AMD AITER library
+- `xpu_kernels`: SYCL/SYCLTLA kernels implemented in `vllm-xpu-kernels`
+- `triton_*`: Triton kernels
+- Platform/library names for other implementations
+
+**Support checking:**
+
+- `supported`: Static boolean, checked once at import time (e.g., `HAS_TRITON`, `is_cuda_alike()`)
+- `supports_args`: Function `(*args, **kwargs) -> bool` checking argument compatibility
+    - Called with **fake tensors** during compilation for zero-cost checking
+    - Called with **real tensors** during eager mode dispatch
+    - Should NOT check batch sizes or add guards based on values
+
+Example support predicate:
+
+```python
+def aiter_rms_norm_supports(x, weight, epsilon, variance_size=None):
+    # Check dtype (OK: doesn't depend on batch size)
+    if x.dtype not in [torch.float16, torch.bfloat16]:
+        return False
+    # Check optional parameter (OK: static check)
+    if variance_size is not None:
+        return False
+    return True
+
+@ir.ops.rms_norm.register_impl("aiter", supports_args=aiter_rms_norm_supports)
+def rms_norm(...):
+    ...
+```
+
+Batch-invariant kernels are automatically selected when `VLLM_BATCH_INVARIANT=1` is set.
+
+### Eager Mode vs Compile Mode
+
+vLLM IR operations behave identically in eager and compile modes:
+
+**Eager mode:**
+
+- Direct dispatch to implementation based on priority list
+- Support checked with real tensor arguments
+- Minimal overhead (can be optimized further if needed)
+
+**Compile mode:**
+
+- IR ops appear in FX graph as `torch.ops.vllm_ir.*` custom ops
+- Lowering selects implementation using fake tensors
+- Full integration with Inductor optimizations
+
+This consistency enables:
+
+- Prototyping in eager mode with confidence
+- Debugging by disabling compilation
+- Gradual migration from eager to compiled execution
+
+## Other Topics
+
+### Out-of-Tree Implementations
+
+External platforms can register implementations without modifying vLLM:
+
+```python
+# In external package
+from vllm import ir
+
+@ir.ops.rms_norm.register_impl("my_platform", supported=is_my_platform())
+def rms_norm(x, weight, epsilon, variance_size=None):
+    return my_platform.rms_norm(x, weight, epsilon)
+```
+
+Then configure priority to use your implementation:
+
+```python
+class MyPlatform(Platform):
+  def get_default_ir_op_priority(self):
+    return IrOpPriorityConfig(rms_norm=['my_platform', 'native'])
+
+# Users can still override priority in the same way
+llm = LLM(ir_op_priority=IrOpPriorityConfig(rms_norm=['custom_oot_kernel']))
+```
+
+### Debugging and Observability
+
+!!! note
+    Please let us know how observability can be improved for your use-case!
+
+Enable debug logging to see kernel selection:
+
+```bash
+VLLM_LOGGING_LEVEL=DEBUG vllm serve ...
+```
+
+This logs:
+
+- Which implementations are selected for each operation
+- Why implementations were rejected (unsupported, args not supported)
+- Compilation cache hits/misses
+- IR lowering statistics
+
+Check selected implementations in compiled graphs:
+
+```python
+# After compilation, inspect the lowering pass
+lowering_pass = backend.lowering_pass
+print(lowering_pass.selected_impls)
+# Output: {'rms_norm': {'node_123': 'vllm_c', 'node_456': 'vllm_c'}}
+```
+
+## Migration from CustomOp
+
+vLLM IR is designed to coexist with and gradually replace `CustomOp`:
+
+1. **Op declaration**: Convert `CustomOp` class `PluggableLayer` and move `forward_native` to `@register_op` function
+2. **Implementation registration**: Use `@ir.ops.op_name.register_impl` instead of overriding methods
+3. **Layer usage**: Replace `self.op(...)` with `ir.ops.op_name(...)`
+4. **Configuration**: Migrate `--compilation-config.custom-ops` to `--ir-op-priority`
+
+The migration can be done incrementally, one operation at a time.
+
+## See Also
+
+- [torch.compile Integration](torch_compile.md) - General compilation infrastructure
+- [Fusions](fusions.md) - Custom fusion and transformation passes in vLLM
+- [Custom Operations](custom_op.md) - Legacy custom op system
diff --git a/docs/examples/README.md b/docs/examples/README.md
index f5707ab6eeed..9d6126a65c41 100644
--- a/docs/examples/README.md
+++ b/docs/examples/README.md
@@ -1,7 +1,17 @@
 # Examples
 
-vLLM's examples are split into three categories:
+vLLM's examples are organized into the following categories:
 
-- If you are using vLLM from within Python code, see the [Offline Inference](../../examples/offline_inference) section.
-- If you are using vLLM from an HTTP application or client, see the [Online Serving](../../examples/online_serving) section.
-- For examples of using some of vLLM's advanced features (e.g. LMCache or Tensorizer) which are not specific to either of the above use cases, see the [Others](../../examples/others) section.
+- **[`basic/`](../../examples/basic)** – Minimal examples for offline inference and online serving.
+- **[`generate/`](../../examples/generate)** – Text generation examples, including multimodal models.
+- **[`pooling/`](../../examples/pooling)** – Examples for embedding, classification, scoring, reward, etc.
+- **[`speech_to_text/`](../../examples/speech_to_text)** – Speech transcription, translation and real-time audio examples.
+- **[`features/`](../../examples/features)** – Demonstrations of individual vLLM features: automatic prefix caching, speculative decoding, LoRA, structured outputs, prompt embedding, pause/resume, batch invariance, KV events, data parallelism, and more.
+- **[`reasoning/`](../../examples/reasoning)** – Examples for reasoning with vLLM.
+- **[`tool_calling/`](../../examples/tool_calling)** – Examples for function/tool calling with vLLM.
+- **[`applications/`](../../examples/applications)** – Application examples such as chatbots and RAG (Retrieval-Augmented Generation).
+- **[`rl/`](../../examples/rl)** – Reinforcement learning examples.
+- **[`deployment/`](../../examples/deployment)** – Examples for deploying vLLM in production.
+- **[`ray_serving/`](../../examples/ray_serving)** – Scalable serving using Ray.
+- **[`disaggregated/`](../../examples/disaggregated)** – Examples for disaggregated serving (separate prefill and decode), including various kv cache connectors (LMCache, Mooncake, FlexKV, P2P NCCL) and failure recovery.
+- **[`observability/`](../../examples/observability)** – Metrics, logging, tracing (OpenTelemetry), and dashboards (Grafana, Perses).
diff --git a/docs/features/README.md b/docs/features/README.md
index e62d9cddee76..28362f401477 100644
--- a/docs/features/README.md
+++ b/docs/features/README.md
@@ -52,10 +52,10 @@ th:not(:first-child) {
 | [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](https://github.com/vllm-project/vllm/pull/4194)<sup>^</sup> | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | | |
 | best-of | ✅ | ✅ | ✅ | [❌](https://github.com/vllm-project/vllm/issues/6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](https://github.com/vllm-project/vllm/issues/7968) | ✅ | ✅ | | |
 | beam-search | ✅ | ✅ | ✅ | [❌](https://github.com/vllm-project/vllm/issues/6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](https://github.com/vllm-project/vllm/issues/7968) | ❔ | ✅ | ✅ | |
-| [prompt-embeds](prompt_embeds.md) | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❔ | ❔ | ❌ | ❔ | ❔ | ✅ |
+| [prompt-embeds](prompt_embeds.md) | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❔ | ❔ | ✅ | ❔ | ❔ | ✅ |
 
 \* Chunked prefill and prefix caching are only applicable to last-token or all pooling with causal attention.  
-<sup>^</sup> LoRA is only applicable to the language backbone of multimodal models.
+<sup>^</sup> LoRA is only applicable to the language backbone of multimodal models.  
 
 ### Feature x Hardware
 
diff --git a/docs/features/automatic_prefix_caching.md b/docs/features/automatic_prefix_caching.md
index 3718a4b74eb2..fe7977ee23d0 100644
--- a/docs/features/automatic_prefix_caching.md
+++ b/docs/features/automatic_prefix_caching.md
@@ -11,7 +11,7 @@ Automatic Prefix Caching (APC in short) caches the KV cache of existing queries,
 
 Set `enable_prefix_caching=True` in vLLM engine to enable APC. Here is an example:
 
-[examples/offline_inference/automatic_prefix_caching.py](../../examples/offline_inference/automatic_prefix_caching.py)
+[examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py](../../examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py)
 
 ## Example workloads
 
diff --git a/docs/features/batch_invariance.md b/docs/features/batch_invariance.md
index 85487697fd37..b23631484508 100644
--- a/docs/features/batch_invariance.md
+++ b/docs/features/batch_invariance.md
@@ -104,8 +104,8 @@ for output in outputs:
 Batch invariance has been tested and verified on the following models:
 
 - **DeepSeek series**: `deepseek-ai/DeepSeek-V3`, `deepseek-ai/DeepSeek-V3-0324`, `deepseek-ai/DeepSeek-R1`, `deepseek-ai/DeepSeek-V3.1`
-- **Qwen3 (Dense)**: `Qwen/Qwen3-1.7B`, `Qwen/Qwen3-8B`
-- **Qwen3 (MoE)**: `Qwen/Qwen3-30B-A3B`, `Qwen/Qwen3-Next-80B-A3B-Instruct`
+- **Qwen3 (Dense)**: `Qwen/Qwen3-1.7B`, `Qwen/Qwen3-8B`, `Qwen/Qwen3-4B-AWQ`, `Qwen/Qwen3-8B-AWQ`
+- **Qwen3 (MoE)**: `Qwen/Qwen3-30B-A3B`, `Qwen/Qwen3-Next-80B-A3B-Instruct`, `Qwen/Qwen3-30B-A3B-Thinking-2507-FP8`
 - **Qwen2.5**: `Qwen/Qwen2.5-0.5B-Instruct`, `Qwen/Qwen2.5-1.5B-Instruct`, `Qwen/Qwen2.5-3B-Instruct`, `Qwen/Qwen2.5-7B-Instruct`, `Qwen/Qwen2.5-14B-Instruct`, `Qwen/Qwen2.5-32B-Instruct`
 - **Llama 3**: `meta-llama/Llama-3.1-8B-Instruct`, `meta-llama/Llama-3.2-1B-Instruct`
 - **GPT-OSS**: `openai/gpt-oss-20b`, `openai/gpt-oss-120b`
diff --git a/docs/features/context_extension.md b/docs/features/context_extension.md
new file mode 100644
index 000000000000..f96340c3183f
--- /dev/null
+++ b/docs/features/context_extension.md
@@ -0,0 +1,70 @@
+# Context Extension
+
+!!! note
+    The `--rope-scaling` parameter used in older versions of vLLM is no longer supported. Please use the `--hf-overrides` method with `rope_parameters` instead.
+This directory contains examples for extending the context length of models using vLLM.
+
+## Offline Inference Example
+
+The [`context_extension.py`](../../examples/features/context_extension/context_extension_offline.py) script demonstrates how to extend the context length of a Qwen model using the YARN method (rope_parameters) and run a simple chat example.
+
+### Usage
+
+```bash
+python examples/features/context_extension/context_extension_offline.py
+```
+
+## OpenAI Online Method
+
+You can also use vLLM's OpenAI-compatible API to serve models with extended context length.
+
+### Usage
+
+Run the vLLM server with the following command to extend the context length using YARN:
+
+```bash
+vllm serve Qwen/Qwen3-0.6B \
+  --hf-overrides '{"rope_parameters": {"factor": 4.0, "original_max_position_embeddings": 32768, "rope_theta": 1000000, "rope_type": "yarn"}}' \
+  --max-model-len 131072
+```
+
+### Client Example
+
+After starting the server, you can use the OpenAI Python client to interact with it:
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="token-abc123"  # Dummy API key, required by the client
+)
+
+response = client.chat.completions.create(
+    model="Qwen/Qwen3-0.6B",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant"},
+        {"role": "user", "content": "Hello"}
+    ],
+    max_tokens=128,
+    temperature=0.8,
+    top_p=0.95
+)
+
+print(response.choices[0].message.content)
+```
+
+### Key Parameters
+
+The available parameters depend on the `rope_type` you choose. For detailed information about all supported RoPE types and their specific parameters, please refer to the [Hugging Face Transformers RoPE documentation](https://huggingface.co/docs/transformers/main/en/internal/rope_utils#transformers.RopeParameters).
+
+Common parameters include:
+
+- `rope_type`: The type of RoPE implementation (e.g., "yarn", "linear", "dynamic")
+- `factor`: The factor by which to extend the context length
+- `original_max_position_embeddings`: The original maximum position embeddings of the model
+
+The following parameters are specific to vLLM:
+
+- `max_model_len`: The new maximum sequence length after extension (original * factor).
+  Used for KV cache pre‑allocation and request limit at serving time.
diff --git a/docs/features/disagg_encoder.md b/docs/features/disagg_encoder.md
index d95427464196..c27d6b277284 100644
--- a/docs/features/disagg_encoder.md
+++ b/docs/features/disagg_encoder.md
@@ -36,10 +36,10 @@ The current reference pathway is **ExampleConnector**.
 Below ready-to-run scripts shows the workflow:
 
 1 Encoder instance + 1 PD instance:
-`examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh`
+`examples/disaggregated/disaggregated_encoder/disagg_1e1pd_example.sh`
 
 1 Encoder instance + 1 Prefill instance + 1 Decode instance:
-`examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh`
+`examples/disaggregated/disaggregated_encoder/disagg_1e1p1d_example.sh`
 
 ---
 
@@ -72,4 +72,4 @@ For the PD disaggregation part, the Prefill instance receives cache exactly the
 
 `docs/features/disagg_prefill.md` shows the brief idea about the disaggregated prefill (v0)
 
-We create the example setup with the **NixlConnector** from `vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py` and referred to the `tests/v1/kv_connector/nixl_integration/toy_proxy_server.py` to facilitate the kv transfer between P and D;
+We create the example setup with the **NixlConnector** from `vllm/distributed/kv_transfer/kv_connector/v1/nixl/` and referred to the `tests/v1/kv_connector/nixl_integration/toy_proxy_server.py` to facilitate the kv transfer between P and D;
diff --git a/docs/features/disagg_prefill.md b/docs/features/disagg_prefill.md
index f7d3f9a70f7e..9ad005be3fed 100644
--- a/docs/features/disagg_prefill.md
+++ b/docs/features/disagg_prefill.md
@@ -17,15 +17,15 @@ Two main reasons:
 
 ## Usage example
 
-Please refer to [examples/online_serving/disaggregated_prefill.sh](../../examples/online_serving/disaggregated_prefill.sh) for the example usage of disaggregated prefilling.
+Please refer to [examples/disaggregated/disaggregated_prefill.sh](../../examples/disaggregated/disaggregated_prefill.sh) for the example usage of disaggregated prefilling.
 
 Now supports 6 types of connectors:
 
-- **ExampleConnector**: refer to [examples/offline_inference/disaggregated-prefill-v1/run.sh](../../examples/offline_inference/disaggregated-prefill-v1/run.sh) for the example usage of ExampleConnector disaggregated prefilling.
-- **LMCacheConnectorV1**: refer to [examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh](../../examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh) for the example usage of LMCacheConnectorV1 disaggregated prefilling which uses NIXL as the underlying KV transmission.
-- **NixlConnector**: refer to [tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh](../../tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) for the example usage of NixlConnector disaggregated prefilling which support fully async send/recv. For detailed usage guide, see [NixlConnector Usage Guide](nixl_connector_usage.md).
-- **P2pNcclConnector**: refer to [examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh](../../examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh) for the example usage of P2pNcclConnector disaggregated prefilling.
-- **MooncakeConnector**: refer to [examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh](../../examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh) for the example usage of ExampleConnector disaggregated prefilling. For detailed usage guide, see [MooncakeConnector Usage Guide](mooncake_connector_usage.md).
+- **ExampleConnector**: refer to [examples/disaggregated/example_connector/run.sh](../../examples/disaggregated/example_connector/run.sh) for the example usage of ExampleConnector disaggregated prefilling.
+- **LMCacheConnectorV1**: refer to [examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh](../../examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh) for the example usage of LMCacheConnectorV1 disaggregated prefilling which uses NIXL as the underlying KV transmission.
+- **NixlConnector**: refer to [tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh](../../tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) for the example usage of NixlConnector disaggregated prefilling which support fully async send/recv. For detailed usage guide, see [NixlConnector Usage Guide](nixl_connector_usage.md). For feature compatibility details, see [NixlConnector Compatibility Matrix](nixl_connector_compatibility.md).
+- **P2pNcclConnector**: refer to [examples/disaggregated/p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh](../../examples/disaggregated/p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh) for the example usage of P2pNcclConnector disaggregated prefilling.
+- **MooncakeConnector**: refer to [examples/disaggregated/mooncake_connector/run_mooncake_connector.sh](../../examples/disaggregated/mooncake_connector/run_mooncake_connector.sh) for the example usage of MooncakeConnector disaggregated prefilling. For detailed usage guide, see [MooncakeConnector Usage Guide](mooncake_connector_usage.md).
 - **MultiConnector**: take advantage of the kv_connector_extra_config: dict[str, Any] already present in KVTransferConfig to stash all the connectors we want in an ordered list of kwargs.such as:
 
   ```bash
@@ -44,7 +44,7 @@ For NixlConnector, you may also specify one or multiple NIXL_Backend. Such as:
   --kv-transfer-config '{"kv_connector":"OffloadingConnector","kv_role":"kv_both","kv_connector_extra_config":{"block_size": 64, "cpu_bytes_to_use": 1000000000}}'
   ```
 
-- **FlexKVConnectorV1**: refer to [examples/offline_inference/prefix_caching_flexkv.py](../../examples/offline_inference/prefix_caching_flexkv.py) for the example usage of FlexKVConnectorV1. FlexKV is a distributed KV Store and multi-level cache management system for ultra-large-scale LLM inference.
+- **FlexKVConnectorV1**: refer to [examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py](../../examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py) for the example usage of FlexKVConnectorV1. FlexKV is a distributed KV Store and multi-level cache management system for ultra-large-scale LLM inference.
 
   ```bash
   --kv-transfer-config '{"kv_connector":"FlexKVConnectorV1","kv_role":"kv_both"}'
diff --git a/docs/features/index_cache.md b/docs/features/index_cache.md
new file mode 100644
index 000000000000..7e73d0949554
--- /dev/null
+++ b/docs/features/index_cache.md
@@ -0,0 +1,54 @@
+# IndexCache
+
+IndexCache reduces redundant top-k computation in DeepSeek-V3.2 (DSA) models by caching and reusing top-k indices across layers.
+
+## Background
+
+DeepSeek-V3.2 uses a DeepSeek Sparse Attention (DSA) mechanism where top-k token selection is computed per layer. For deep models with many layers, this computation can be expensive. IndexCache allows skipping redundant top-k computations by reusing indices from previous layers.
+
+See: [IndexCache Paper](https://arxiv.org/abs/2603.12201)
+
+## Usage
+
+### CLI
+
+```bash
+vllm serve deepseek-ai/DeepSeek-V3.2 \
+    --hf-overrides '{"use_index_cache": true, "index_topk_freq": 4}' ...
+```
+
+### Configuration Reference
+
+| Parameter            | Type | Default | Description                                                                                                                                      |
+|----------------------|------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------|
+| `use_index_cache`    | bool | false   | Enable IndexCache. Must be set to true to use this feature                                                                                       |
+| `index_topk_freq`    | int  | 1       | Frequency (in layers) at which top-k is computed. 1 = compute on every layer (disabled), 4 = compute on 1/4 of layers                            |
+| `index_topk_pattern` | str  | null    | Per-layer F/S pattern. Overrides index_topk_freq if set. Each character maps to one DSA layer: F = Full, S = Shared                              |
+
+### Configuration Examples
+
+**Using `index_topk_freq`** (compute every N layers):
+
+```bash
+vllm serve deepseek-ai/DeepSeek-V3.2 \
+    --hf-overrides '{"use_index_cache": true, "index_topk_freq": 4}' ...
+```
+
+**Using `index_topk_pattern`** (explicit per-layer control):
+
+```bash
+# custom pattern for 61 layers: F = compute, S = reuse
+vllm serve deepseek-ai/DeepSeek-V3.2 \
+    --hf-overrides '{"use_index_cache": true, "index_topk_pattern": "FFSFSSSFSSFFFSSSFFFSFSSSSSSFFSFFSFFSSFFFFFFSFFFFFSFFSSSSSSFSF"}'
+```
+
+## How It Works
+
+1. When IndexCache is enabled, layers marked with `"F"` (Full) calculate and store top-k indices
+2. Subsequent layers marked with `"S"` (Shared) receive the cached indices from the previous layer instead of recomputing
+3. The cached indices are passed through the layer stack, reducing total computation
+
+## Requirements
+
+- DeepSeek-V3.2 or compatible DSA model
+- `use_index_cache: true` via `--hf-overrides`
diff --git a/docs/features/lora.md b/docs/features/lora.md
index 2e7b36545d46..839624542ec5 100644
--- a/docs/features/lora.md
+++ b/docs/features/lora.md
@@ -47,7 +47,7 @@ the third parameter is the path to the LoRA adapter.
     )
     ```
 
-Check out [examples/offline_inference/multilora_inference.py](../../examples/offline_inference/multilora_inference.py) for an example of how to use LoRA adapters with the async engine and how to use more advanced configuration options.
+Check out [examples/features/lora/multilora_offline.py](../../examples/features/lora/multilora_offline.py) for an example of how to use LoRA adapters with the async engine and how to use more advanced configuration options.
 
 ## Serving LoRA Adapters
 
@@ -248,6 +248,57 @@ Now, you can specify a base_model_name alongside the name and path using JSON fo
 
 To provide the backward compatibility support, you can still use the old key-value format (name=path), but the `base_model_name` will remain unspecified in that case.
 
+## Mixing 2D and 3D MoE LoRA Adapters
+
+To serve 2D-format(based on `megatron`) and 3D-format (based on `peft`) adapters from the same engine instance, start the server with `--enable-mixed-moe-lora-format`
+and declare the layout of each adapter explicitly via the `is_3d_lora_weight` field.
+
+Server startup (static modules):
+
+```bash
+vllm serve Qwen/Qwen3.6-35B-A3B \
+    --enable-lora \
+    --enable-mixed-moe-lora-format \
+    --tensor-parallel-size 4 \
+    --enable-expert-parallel \
+    --lora-modules \
+        '{"name": "lora-2d", "path": "jeeejeee/qwen36-35ba3b-2d-weights-poken-lora", "is_3d_lora_weight": false}' \
+        '{"name": "lora-3d", "path": "jeeejeee/qwen36-35ba3b-moe-all-linear-poken-lora", "is_3d_lora_weight": true}'
+```
+
+Dynamic load via `/v1/load_lora_adapter`:
+
+```bash
+curl -X POST http://localhost:8000/v1/load_lora_adapter \
+-H "Content-Type: application/json" \
+-d '{
+    "lora_name": "lora-3d",
+    "lora_path": "/path/to/3d-format-lora",
+    "is_3d_lora_weight": true
+}'
+```
+
+!!! warning "You must know your adapter's layout"
+    Under `--enable-mixed-moe-lora-format`, vLLM trusts whatever
+    `is_3d_lora_weight` the caller declares — it does **not** inspect the
+    checkpoint to verify. A wrong declaration will load weights into the
+    wrong stacked buffers and silently produce garbage outputs, with no
+    error at load time. Confirm the layout before serving:
+
+    - **2D (per-expert, megatron-style)** → set `is_3d_lora_weight: false`.
+      Adapter keys look like `...experts.{idx}.gate_proj.lora_A.weight`,
+      `...experts.{idx}.up_proj.lora_A.weight`,
+      `...experts.{idx}.down_proj.lora_A.weight` — one set per expert.
+    - **3D (fused, peft-style)** → set `is_3d_lora_weight: true`.
+      Adapter keys look like `...experts.gate_up_proj.lora_A.weight`,
+      `...experts.down_proj.lora_A.weight` — a single tensor that stacks
+      all experts on the leading dim.
+
+When `--enable-mixed-moe-lora-format` is **not** set, `is_3d_lora_weight`
+is ignored: vLLM picks the wrapper from the base model's
+`is_3d_moe_weight` and the adapter is required to match. The field is
+also ignored for non-MoE models.
+
 ## LoRA model lineage in model card
 
 The new format of `--lora-modules` is mainly to support the display of parent model information in the model card. Here's an explanation of how your current response supports this:
diff --git a/docs/features/mooncake_connector_usage.md b/docs/features/mooncake_connector_usage.md
index 0e2478924ead..cc8c8ecff258 100644
--- a/docs/features/mooncake_connector_usage.md
+++ b/docs/features/mooncake_connector_usage.md
@@ -31,7 +31,7 @@ vllm serve Qwen/Qwen2.5-7B-Instruct --port 8020 --kv-transfer-config '{"kv_conne
 ### Proxy
 
 ```bash
-python examples/online_serving/disaggregated_serving/mooncake_connector/mooncake_connector_proxy.py --prefill http://192.168.0.2:8010 --decode http://192.168.0.3:8020
+python examples/disaggregated/disaggregated_serving/mooncake_connector/mooncake_connector_proxy.py --prefill http://192.168.0.2:8010 --decode http://192.168.0.3:8020
 ```
 
 Now you can send requests to the proxy server through port 8000.
@@ -65,5 +65,5 @@ Now you can send requests to the proxy server through port 8000.
 
 Refer to these example scripts in the vLLM repository:
 
-- [run_mooncake_connector.sh](../../examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh)
-- [mooncake_connector_proxy.py](../../examples/online_serving/disaggregated_serving/mooncake_connector/mooncake_connector_proxy.py)
+- [run_mooncake_connector.sh](../../examples/disaggregated/mooncake_connector/run_mooncake_connector.sh)
+- [mooncake_connector_proxy.py](../../examples/disaggregated/mooncake_connector/mooncake_connector_proxy.py)
diff --git a/docs/features/mooncake_store_connector_usage.md b/docs/features/mooncake_store_connector_usage.md
new file mode 100644
index 000000000000..f23acae10c4f
--- /dev/null
+++ b/docs/features/mooncake_store_connector_usage.md
@@ -0,0 +1,219 @@
+# MooncakeStoreConnector Usage Guide
+
+MooncakeStoreConnector is a KV cache connector that uses [MooncakeDistributedStore](https://github.com/kvcache-ai/Mooncake) as a shared KV cache pool. Unlike `MooncakeConnector` which does direct point-to-point KV transfer between prefiller and decoder, MooncakeStoreConnector enables KV cache offloading to an external distributed store, supporting:
+
+- **CPU/disk offloading**: Extend effective KV cache capacity by offloading to CPU memory or disk via Mooncake's transfer engine.
+- **Prefix caching across instances**: Hash-based deduplication allows multiple vLLM instances to share cached KV blocks through the store.
+- **Single-node and multi-node deployment**: Works both as a standalone KV cache extension and in disaggregated prefill-decode setups.
+
+## Prerequisites
+
+### Install Mooncake
+
+Install mooncake through pip:
+
+```bash
+uv pip install mooncake-transfer-engine
+```
+
+Refer to the [Mooncake official repository](https://github.com/kvcache-ai/Mooncake) for more installation instructions and building from source.
+
+### Start the Mooncake Master Server
+
+The Mooncake master manages metadata and coordinates the distributed store. Start it before launching vLLM:
+
+```bash
+mooncake_master --port 50051
+```
+
+Default ports:
+
+- RPC: 50051
+
+Multiple vLLM instances can share the same master server.
+
+### Configure Mooncake
+
+Create a JSON configuration file (e.g., `mooncake_config.json`):
+
+```json
+{
+  "mode": "embedded",
+  "metadata_server": "P2PHANDSHAKE",
+  "master_server_address": "127.0.0.1:50051",
+  "global_segment_size": "80GB",
+  "local_buffer_size": "4GB",
+  "protocol": "rdma",
+  "device_name": "",
+  "enable_offload": false
+}
+```
+
+- `mode`: Topology selection. `"embedded"` (default, PR-40900 baseline) has each
+  vLLM rank contribute `global_segment_size` to the pool in-process.
+  `"standalone-store"` makes ranks pure requesters — an external
+  `mooncake_client` process owns the CPU pool and (optionally) the SSD tier.
+- `protocol`: Use `"rdma"` for best performance. `"tcp"` works as a fallback.
+- `global_segment_size`: CPU memory contributed to the distributed pool (per
+  GPU). Must be `> 0` in `embedded` mode and `0` in `standalone-store` mode.
+- `local_buffer_size`: Private buffer for this node's own operations (per GPU).
+- `enable_offload`: When `true`, vLLM allocates a DirectIO staging buffer so
+  large prefills do not exceed the owner's SSD-write budget. Set this together
+  with the matching `--enable_offload=true` flag on `mooncake_master` and on
+  the external `mooncake_client` (if any).
+
+Set the config path via environment variable:
+
+```bash
+export MOONCAKE_CONFIG_PATH=/path/to/mooncake_config.json
+```
+
+## Usage
+
+### Single-Node KV Cache Offloading
+
+Use MooncakeStoreConnector to offload KV cache to CPU memory, extending the effective cache size:
+
+```bash
+MOONCAKE_CONFIG_PATH=mooncake_config.json \
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+    --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_both"}'
+```
+
+### Disaggregated Prefill-Decode (XpYd)
+
+In disaggregated prefill-decode mode, use `MultiConnector` to combine `MooncakeConnector` (point-to-point KV transfer) with `MooncakeStoreConnector` (shared KV cache pool). This enables both direct P2P transfer between prefiller and decoder, and cross-instance prefix cache sharing via the distributed store.
+**Prefiller Node:**
+
+```bash
+MOONCAKE_CONFIG_PATH=mooncake_config.json \
+VLLM_MOONCAKE_BOOTSTRAP_PORT=50052 \
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+    --port 8100 \
+    --kv-transfer-config '{
+        "kv_connector": "MultiConnector",
+        "kv_role": "kv_producer",
+        "kv_connector_extra_config": {
+            "connectors": [
+                {
+                    "kv_connector": "MooncakeConnector",
+                    "kv_role": "kv_producer"
+                },
+                {
+                    "kv_connector": "MooncakeStoreConnector",
+                    "kv_role": "kv_both"
+                }
+            ]
+        }
+    }'
+```
+
+**Decoder Node:**
+
+```bash
+MOONCAKE_CONFIG_PATH=mooncake_config.json \
+VLLM_MOONCAKE_BOOTSTRAP_PORT=50053 \
+vllm serve meta-llama/Llama-3.1-8B-Instruct \
+    --port 8200 \
+    --kv-transfer-config '{
+        "kv_connector": "MultiConnector",
+        "kv_role": "kv_consumer",
+        "kv_connector_extra_config": {
+            "connectors": [
+                {
+                    "kv_connector": "MooncakeConnector",
+                    "kv_role": "kv_consumer"
+                },
+                {
+                    "kv_connector": "MooncakeStoreConnector",
+                    "kv_role": "kv_consumer"
+                }
+            ]
+        }
+    }'
+```
+
+**Proxy:**
+
+A disaggregation proxy is required to route requests between prefiller and decoder nodes. The proxy assigns `do_remote_prefill=True` / `do_remote_decode=True` to coordinate P2P transfer via `MooncakeConnector`. Refer to the [MooncakeConnector usage guide](mooncake_connector_usage.md) for proxy setup details.
+
+### Disk Offloading
+
+Disk offloading is most commonly run in `standalone-store` mode: an external
+`mooncake_client` process owns the CPU pool and the SSD tier, and each vLLM
+rank is a pure requester. This avoids per-rank duplication of the SSD pool
+and keeps DirectIO budget tracking on a single process.
+
+Three things need to be aligned for end-to-end disk offloading:
+
+1. **`mooncake_master`** is started with `--enable_offload=true`.
+2. **`mooncake_client`** (the owner) is started with `--enable_offload=true`
+   plus an SSD path via `MOONCAKE_OFFLOAD_FILE_STORAGE_PATH`.
+3. **vLLM-side** sets `"enable_offload": true` in the JSON config file (this is
+   read by the connector and is **not** an environment variable).
+
+Example `mooncake_config.json` for the vLLM side:
+
+```json
+{
+  "mode": "standalone-store",
+  "metadata_server": "P2PHANDSHAKE",
+  "master_server_address": "127.0.0.1:50051",
+  "global_segment_size": 0,
+  "local_buffer_size": "4GB",
+  "protocol": "rdma",
+  "device_name": "mlx5_0",
+  "enable_offload": true
+}
+```
+
+Steer this rank to the local owner segment with:
+
+```bash
+export MOONCAKE_PREFERRED_SEGMENT=127.0.0.1:50053
+```
+
+The owner's SSD directory, on-disk eviction policy, and the DirectIO staging
+buffer size are controlled on the `mooncake_client` side via the standard
+Mooncake environment variables (`MOONCAKE_OFFLOAD_FILE_STORAGE_PATH`,
+`MOONCAKE_BUCKET_EVICTION_POLICY`, `MOONCAKE_USE_URING`,
+`MOONCAKE_OFFLOAD_LOCAL_BUFFER_SIZE_BYTES`,
+`MOONCAKE_OFFLOAD_TOTAL_SIZE_LIMIT_BYTES`, etc.). Those are independent of
+the vLLM JSON config.
+
+## Environment Variables
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `MOONCAKE_CONFIG_PATH` | Path to Mooncake JSON config file | (required) |
+| `VLLM_MOONCAKE_BOOTSTRAP_PORT` | Bootstrap port for MooncakeConnector P2P transfer (disagg mode only) | 8998 |
+| `MOONCAKE_PREFERRED_SEGMENT` | Pin this rank's replicas to a specific owner segment (`host:port`); used in `standalone-store` mode | — |
+| `MOONCAKE_REQUESTER_LOCAL_HOSTNAME` | Override the hostname the vLLM rank registers with Mooncake as a requester. Defaults to the rank's resolved IP. | — |
+| `VLLM_MOONCAKE_STORE_TIER_LOG` | When `1`, logs a per-batch tier summary (memory vs disk hits) for observability | disabled |
+| `VLLM_MOONCAKE_DISK_STAGING_USABLE_RATIO` | Fraction of the owner's DirectIO staging buffer that the requester will fill in a single `batch_get_into_multi_buffers` call. Lower → more conservative pre-split, more round trips. | 0.9 |
+
+## KV Transfer Config
+
+### KV Role Options
+
+- **kv_producer**: For instances that store KV caches to the pool.
+- **kv_consumer**: For instances that load KV caches from the pool.
+- **kv_both**: The instance both stores and loads KV caches. Use this for single-node CPU offloading or prefiller instances.
+
+### kv_connector_extra_config
+
+- `load_async` (bool): Enable asynchronous loading for better compute-I/O overlap. Default: `true`.
+- `enable_cross_layers_blocks` (bool): Enable cross-layer block packing for reduced store operations. Default: `false`.
+- `lookup_rpc_port` (int): Custom port for the ZMQ lookup RPC socket. Default: `0`.
+
+## Notes
+
+### Reproducible Block Hashes Across Processes
+
+The `MooncakeStoreConnector` relies on consistent block hashes across all vLLM processes sharing the distributed store. Because Python randomizes its hash seed per process by default, identical prompts can produce different block hashes on different processes — preventing cross-process prefix cache hits.
+
+Set a fixed `PYTHONHASHSEED` on every instance that shares the store (DP ranks, separate prefiller/decoder nodes, and any other vLLM process pointed at the same Mooncake store):
+
+```bash
+PYTHONHASHSEED=0 vllm serve ...
+```
diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
index ee82c34fa0eb..f6d4f3f86d80 100644
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@@ -68,7 +68,7 @@ You can pass a single image to the `'image'` field of the multi-modal dictionary
         print(generated_text)
     ```
 
-Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
+Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
 
 To substitute multiple images inside the same text prompt, you can pass in a list of images instead:
 
@@ -101,7 +101,7 @@ To substitute multiple images inside the same text prompt, you can pass in a lis
         print(generated_text)
     ```
 
-Full example: [examples/offline_inference/vision_language_multi_image.py](../../examples/offline_inference/vision_language_multi_image.py)
+Full example: [examples/generate/multimodal/vision_language_multi_image_offline.py](../../examples/generate/multimodal/vision_language_multi_image_offline.py)
 
 If using the [LLM.chat](../models/generative_models.md#llmchat) method, you can pass images directly in the message content using various formats: image URLs, PIL Image objects, or pre-computed embeddings:
 
@@ -215,6 +215,67 @@ When loading RGBA images (images with transparency), vLLM converts them to RGB f
     - This setting only affects RGBA images with transparency; RGB images are unchanged
     - If not specified, the default white background `(255, 255, 255)` is used for backward compatibility
 
+#### Moondream3 Prompt Recipes { #moondream3-prompt-recipes }
+
+`Moondream3ForCausalLM` supports two task-specific prompt formats:
+
+- `query`: ask a question about the image.
+- `caption`: generate a caption for the image.
+
+```python
+from vllm import LLM, SamplingParams
+from vllm.assets.image import ImageAsset
+
+llm = LLM(
+    model="moondream/moondream3-preview",
+    tokenizer="moondream/starmie-v1",
+    trust_remote_code=True,
+    max_model_len=2048,
+    limit_mm_per_prompt={"image": 1},
+)
+
+image = ImageAsset("stop_sign").pil_image
+
+
+def make_query_prompt(question: str) -> str:
+    return (
+        "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>"
+        f"{question}<|md_reserved_2|>"
+    )
+
+
+def make_caption_prompt(length: str = "normal") -> str:
+    return (
+        "<|endoftext|><image><|md_reserved_0|>"
+        f"describe<|md_reserved_1|>{length}<|md_reserved_2|>"
+    )
+
+
+query_out = llm.generate(
+    {
+        "prompt": make_query_prompt("What is shown in this image?"),
+        "multi_modal_data": {"image": image},
+    },
+    SamplingParams(max_tokens=64, temperature=0),
+)[0].outputs[0].text
+
+caption_out = llm.generate(
+    {
+        "prompt": make_caption_prompt(),
+        "multi_modal_data": {"image": image},
+    },
+    SamplingParams(max_tokens=100, temperature=0),
+)[0].outputs[0].text
+
+print("query:", query_out)
+print("caption:", caption_out)
+```
+
+!!! note
+    The native Moondream3 model also has `detect` and `point` skills. Those
+    require custom coordinate decoding and are not exposed by this vLLM
+    implementation.
+
 ### Video Inputs
 
 You can pass a list of NumPy arrays directly to the `'video'` field of the multi-modal dictionary
@@ -287,25 +348,25 @@ Instead of NumPy arrays, you can also pass `'torch.Tensor'` instances, as shown
     !!! note
         'process_vision_info' is only applicable to Qwen2.5-VL and similar models.
 
-Full example: [examples/offline_inference/vision_language.py](../../examples/offline_inference/vision_language.py)
+Full example: [examples/generate/multimodal/vision_language_offline.py](../../examples/generate/multimodal/vision_language_offline.py)
 
 ### Audio Inputs
 
 You can pass a tuple `(array, sampling_rate)` to the `'audio'` field of the multi-modal dictionary.
 
-Full example: [examples/offline_inference/audio_language.py](../../examples/offline_inference/audio_language.py)
+Full example: [examples/generate/multimodal/audio_language_offline.py](../../examples/generate/multimodal/audio_language_offline.py)
 
 #### Chunking Long Audio for Transcription
 
 Speech-to-text models like Whisper have a maximum audio length they can process (typically 30 seconds). For longer audio files, vLLM provides a utility to intelligently split audio into chunks at quiet points to minimize cutting through speech.
 
 ```python
-import librosa
 from vllm import LLM, SamplingParams
 from vllm.multimodal.audio import split_audio
+from vllm.multimodal.media.audio import load_audio
 
 # Load long audio file
-audio, sr = librosa.load("long_audio.wav", sr=16000)
+audio, sr = load_audio("long_audio.wav", sr=16000)
 
 # Split into chunks at low-energy (quiet) regions
 chunks = split_audio(
@@ -674,7 +735,7 @@ Then, you can use the OpenAI client as follows:
     print("Chat completion output:", chat_response.choices[0].message.content)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! tip
     Loading from local file paths is also supported on vLLM: You can specify the allowed local media path via `--allowed-local-media-path` when launching the API server/engine,
@@ -745,7 +806,7 @@ Then, you can use the OpenAI client as follows:
     print("Chat completion output from image url:", result)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! note
     By default, the timeout for fetching videos through HTTP URL is `30` seconds.
@@ -780,6 +841,70 @@ vllm serve Qwen/Qwen3-VL-30B-A3B-Instruct \
 
 Works with common video formats like MP4 when using OpenCV backends.
 
+#### Pre-extracted Frame Sequences with `media_io_kwargs`
+
+When you extract video frames on the client side and send them as `video/jpeg` (base64-concatenated JPEG frames), you can preserve the original video metadata by using `media_io_kwargs` in your request. This enables more accurate video understanding by preserving temporal information that would otherwise be lost during client-side frame extraction.
+
+**Supported Parameters:**
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `fps` | float | Frame rate of the original video |
+| `frames_indices` | list[int] | Indices of the actually sampled frames |
+| `total_num_frames` | int | Total frame count of the original video |
+| `duration` | float | Duration of the original video in seconds |
+| `do_sample_frames` | bool | Whether to perform frame sampling |
+
+??? code
+
+    ```python
+    from openai import OpenAI
+
+    client = OpenAI(base_url="http://localhost:8000/v1", api_key="EMPTY")
+
+    # Client-side frame extraction
+    frames = extract_frames(video_path, num_frames=32)
+    frames_b64 = ",".join([encode_image(f) for f in frames])
+    video_url = f"data:video/jpeg;base64,{frames_b64}"
+
+    # Pass video metadata via media_io_kwargs
+    response = client.chat.completions.create(
+        model="your-multimodal-model",
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "video_url", "video_url": {"url": video_url}},
+                {"type": "text", "text": "Describe what happens in this video."}
+            ]
+        }],
+        extra_body={
+            "media_io_kwargs": {
+                "video": {
+                    "fps": 30.0,
+                    "frames_indices": [0, 10, 20, 30, 40, 50, 60, 70, 80, 90,
+                                       100, 110, 120, 130, 140, 150, 160, 170,
+                                       180, 190, 200, 210, 220, 230, 240, 250,
+                                       260, 270, 280, 290, 300, 310],
+                    "total_num_frames": 900,
+                    "duration": 30.0,
+                }
+            }
+        },
+    )
+
+    print(response.choices[0].message.content)
+    ```
+
+**Why use `media_io_kwargs`?**
+
+When extracting frames client-side, the server loses important context about the original video:
+
+- **Temporal information**: Which frames were sampled and their positions in the original timeline
+- **Video duration**: How long the original video was
+- **Frame rate**: The original playback speed
+
+By passing this metadata, the model can better understand the temporal distribution of the sampled frames and whether important moments might have been skipped.
+
 #### Custom RGBA Background Color
 
 To use a custom background color for RGBA images, pass the `rgba_background_color` parameter via `--media-io-kwargs`:
@@ -832,7 +957,7 @@ Then, you can use the OpenAI client as follows:
         base_url=openai_api_base,
     )
 
-    # Any format supported by librosa is supported
+    # Any format supported by soundfile/PyAV is supported
     audio_url = AudioAsset("winning_call").url
     audio_base64 = encode_base64_content_from_url(audio_url)
 
@@ -894,7 +1019,7 @@ Alternatively, you can pass `audio_url`, which is the audio counterpart of `imag
     print("Chat completion output from audio url:", result)
     ```
 
-Full example: [examples/online_serving/openai_chat_completion_client_for_multimodal.py](../../examples/online_serving/openai_chat_completion_client_for_multimodal.py)
+Full example: [examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py](../../examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py)
 
 !!! note
     By default, the timeout for fetching audios through HTTP URL is `10` seconds.
diff --git a/docs/features/nixl_connector_compatibility.md b/docs/features/nixl_connector_compatibility.md
new file mode 100644
index 000000000000..5541cd99bd80
--- /dev/null
+++ b/docs/features/nixl_connector_compatibility.md
@@ -0,0 +1,104 @@
+# NixlConnector Compatibility Matrix
+
+This page documents the feature compatibility of **disaggregated prefilling with the NixlConnector**. For general usage instructions, see the [NixlConnector Usage Guide](nixl_connector_usage.md). For an overview of disaggregated prefilling, see [Disaggregated Prefilling](disagg_prefill.md).
+
+!!! note
+    This page reflects the current state of the codebase and is subject to change as features evolve. Entries marked 🟠 or ❌ may link to tracking issues. See the [NIXL connector roadmap](https://github.com/vllm-project/vllm/issues/33702) for upcoming feature development.
+
+**Legend:**
+
+- ✅ = Fully supported
+- 🟠 = Partial support (see footnotes)
+- ❌ = Not supported
+- ❔ = Unknown / not yet validated
+- 🚧 = Work in progress
+
+!!! info "Universally supported features"
+    The following features work with **all** model architectures when using NixlConnector PD disaggregated serving:
+
+    [Chunked Prefill](../configuration/optimization.md#chunked-prefill) |
+    [APC (Prefix Caching)](automatic_prefix_caching.md) |
+    [Data Parallel](../serving/data_parallel_deployment.md) |
+    CUDA graph |
+    Logprobs |
+    Prompt Logprobs |
+    [Prompt Embeds](prompt_embeds.md) |
+    Multiple NIXL backends (UCX, GDS, LIBFABRIC, etc.)
+
+## Model Architecture x Capability
+
+<style>
+td:not(:first-child) {
+  text-align: center !important;
+}
+td {
+  padding: 0.5rem !important;
+  white-space: nowrap;
+}
+
+th {
+  padding: 0.5rem !important;
+  min-width: 0 !important;
+}
+
+th:not(:first-child) {
+  writing-mode: vertical-lr;
+  transform: rotate(180deg)
+}
+</style>
+
+| Model type | <abbr title="Basic Prefill/Decode disaggregation">Basic PD</abbr> | <abbr title="Speculative Decoding">Spec Decode</abbr> | <abbr title="Heterogeneous Tensor Parallelism (P TP != D TP)">Hetero TP</abbr> | <abbr title="Cross-layer blocks optimization">Cross-layer blocks</abbr> | <abbr title="Sliding Window Attention">SWA</abbr> | <abbr title="CPU host buffer offload (e.g. TPU)">Host buffer</abbr> | <abbr title="Different block sizes on P and D">Hetero block size</abbr> |
+| - | - | - | - | - | - | - | - |
+| Dense Transformers | ✅ | ✅<sup>1</sup> | ✅ | ✅<sup>2</sup> | ✅ | ✅ | 🟠<sup>3</sup> |
+| MLA (e.g. DeepSeek-V2/V3) | ✅ | ✅<sup>1</sup> | 🟠<sup>4</sup> | ✅<sup>2</sup> | ✅ | ✅ | 🟠<sup>3</sup> |
+| Sparse MLA (e.g. DeepSeek-V3.2) | ✅ | ✅<sup>1</sup> | 🟠<sup>4</sup> | ✅<sup>2</sup> | ✅ | ✅ | 🟠<sup>3</sup> |
+| Hybrid SSM / Mamba | ✅ | ❔ | 🚧<sup>5</sup> | ❌ | ✅ | ✅ | ❌<sup>6</sup> |
+| MoE | ✅ | ✅<sup>1</sup> | ✅ | ✅<sup>2</sup> | ✅ | ✅ | 🟠<sup>3</sup> |
+| Multimodal | ❔ | ❔ | ❔ | ❔ | ❔ | ❔ | ❔ |
+| Encoder-Decoder | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+
+<sup>1</sup> P and D instances must use the same speculation configuration.
+
+<sup>2</sup> Requires `FLASH_ATTN` or `FLASHINFER` backend **and** `HND` KV cache layout. Enable via `--kv-transfer-config '{"kv_connector_extra_config": {"enable_cross_layers_blocks": "True"}}'`.
+
+<sup>3</sup> Supported only when HMA is **not** required (i.e., non-hybrid models). Block IDs are remapped automatically. Only P block size < D block size is supported.
+
+<sup>4</sup> MLA KV cache is replicated across TP workers, so heterogeneous TP works but there is no head-splitting. When P TP > D TP, only a single read is executed (redundant ranks are skipped). D TP > P TP also works.
+
+<sup>5</sup> Hybrid SSM (Mamba) models require **homogeneous TP** (`P TP == D TP`). Heterogeneous TP is not yet supported for Mamba layers.
+
+<sup>6</sup> HMA (required by hybrid models) does not support different remote block sizes.
+
+## Configuration Notes
+
+### What must match between P and D
+
+By default, a **compatibility hash** is checked during handshake. P and D instances must agree on:
+
+- vLLM version and NIXL connector version
+- Model (architecture, dtype, number of KV heads, head size, number of hidden layers)
+- Attention backend
+- KV cache dtype (`cache_dtype`)
+
+!!! warning
+    Disable the hash check with `--kv-transfer-config '{"kv_connector_extra_config": {"enforce_handshake_compat": false}}'` at your own risk.
+
+### What can safely differ between P and D
+
+- `tensor-parallel-size` (heterogeneous TP, subject to model restrictions above)
+- `block-size` (heterogeneous block size, subject to restrictions above)
+- Number of KV cache blocks (determined by available memory on each instance)
+
+### KV cache layout
+
+- NixlConnector defaults to **`HND`** layout for optimal transfer performance (non-MLA models).
+- `NHD` layout is supported but does **not** allow heterogeneous TP head splitting.
+- Experimental `HND` ↔ `NHD` permute: enable via `--kv-transfer-config '{"enable_permute_local_kv": true}'`. Not supported with HMA.
+
+### Quantized KV cache
+
+[Quantized KV cache](quantization/quantized_kvcache.md) (e.g., FP8) requires both P and D instances to use the **same** `cache_dtype`. Mismatched cache dtypes will fail the compatibility hash check during handshake.
+
+- **Static quantization** (scales loaded from checkpoint): ✅ Supported. Scales are loaded independently by each instance from the model checkpoint.
+- **Dynamic quantization** (scales computed at runtime): ❌ Not supported. Per-block scales are not transferred alongside KV cache data.
+- **Packed-layout scales** (scales stored inline with weights): ✅ Supported. Scales are transferred together with the KV cache blocks.
diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md
index a9039f0daf84..cb5a3dca035a 100644
--- a/docs/features/nixl_connector_usage.md
+++ b/docs/features/nixl_connector_usage.md
@@ -2,6 +2,8 @@
 
 NixlConnector is a high-performance KV cache transfer connector for vLLM's disaggregated prefilling feature. It provides fully asynchronous send/receive operations using the NIXL library for efficient cross-process KV cache transfer.
 
+For feature compatibility details (supported model architectures, TP configurations, and feature interactions), see the [NixlConnector Compatibility Matrix](nixl_connector_compatibility.md).
+
 ## Prerequisites
 
 ### Installation
@@ -11,7 +13,7 @@ Install the NIXL library: `uv pip install nixl`, as a quick start on Nvidia plat
 - Refer to [NIXL official repository](https://github.com/ai-dynamo/nixl) for more installation instructions
 - The specified required NIXL version can be found in [requirements/kv_connectors.txt](../../requirements/kv_connectors.txt) and other relevant config files
 
-For ROCm platform, the [base ROCm docker file](../../docker/Dockerfile.rocm_base) includes RIXL and ucx already.
+For ROCm platform, the [ROCm docker file](../../docker/Dockerfile.rocm) includes RIXL and ucx already.
 
 - Refer to [RIXL official repository](https://github.com/rocm/rixl) for more information
 - The supportive libraries for RIXL can be found in [requirements/kv_connectors_rocm.txt](../../requirements/kv_connectors_rocm.txt)
@@ -124,9 +126,179 @@ python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \
     - Set when prefiller and decoder are on different machines
     - Connection info is passed via KVTransferParams from prefiller to decoder for handshake
 
-- `VLLM_NIXL_ABORT_REQUEST_TIMEOUT`: Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. (Optional)
+- `kv_lease_duration` (via `kv_connector_extra_config`): Lease duration (in seconds) for the prefiller's KV cache blocks. (Optional)
+    - Default: 30
+    - When a prefill request finishes, its KV blocks are held for this duration waiting for the decoder to read them. While the request is queued on the decoder, periodic heartbeats automatically extend the lease. If neither a heartbeat nor a read notification arrives before the lease expires, the blocks are freed. The heartbeat interval and extension amount are derived automatically from this value.
+    - Example: `--kv-transfer-config '{"kv_connector_extra_config": {"kv_lease_duration": 60}}'`
+
+- `decoder_kv_blocks_ttl` (via `kv_connector_extra_config`): TTL (in seconds) for KV blocks cached on the decoder in bidirectional transfer mode. (Optional)
     - Default: 480
-    - If a request is aborted and the decoder has not yet read the KV-cache blocks through the nixl channel, the prefill instance will release its KV-cache blocks after this timeout to avoid holding them indefinitely.
+    - In bidirectional mode, the decoder caches KV blocks for multi-turn conversations. This TTL controls how long those blocks are held before being released. Unlike the prefiller lease, this TTL is not renewed via heartbeats.
+    - Example: `--kv-transfer-config '{"kv_connector_extra_config": {"decoder_kv_blocks_ttl": 600}}'`
+
+## Bidirectional KV Transfer (Multi-turn)
+
+In standard disaggregated prefilling, KV cache flows in one direction: Prefill (P) computes the KV cache and Decode (D) reads from P. For multi-turn conversations this is wasteful — D already holds the KV cache corresponding to the generated tokens from prior turns, yet P must recompute it from scratch on every new turn. Bidirectional KV transfer lets P **pull** existing KV blocks from D via RDMA before computing only the new tokens, significantly reducing Time-To-First-Token (TTFT) for long-prefill such as **multi-turn heavy scenarios**.
+
+### How it works
+
+The feature relies on a **stateful proxy** that sits between the client and the P/D instances. The proxy tracks `kv_transfer_params` returned by D at the end of each turn, and attaches them to the next turn's request so P knows which blocks to pull from D.
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant Proxy
+    participant P as Prefill (P)
+    participant D as Decode (D)
+
+    rect rgb(240, 240, 250)
+    note right of Client: Turn 1 — Cache Miss
+    Client->>Proxy: chat request + conversation_id
+    Proxy->>P: request (no remote blocks)
+    activate P
+    note over P: full prefill
+    P-->>Proxy: kv_transfer_params (P's blocks)
+    deactivate P
+    Proxy->>D: request + P's kv_transfer_params
+    activate D
+    D-->P: RDMA read (D pulls KV from P)
+    note over D: decode
+    D-->>Proxy: stream response + kv_transfer_params
+    deactivate D
+    note over Proxy: cache D's kv_transfer_params
+    Proxy-->>Client: response
+    end
+
+    rect rgb(255, 245, 235)
+    note right of Client: Turn 2+ — Cache Hit (Bidirectional)
+    Client->>Proxy: chat request + conversation_id
+    note over Proxy: lookup cached D blocks
+    Proxy->>P: request + D's remote_block_ids
+    activate P
+    P-->D: RDMA read (P pulls KV from D)
+    note over P: prefill new tokens only
+    P-->>Proxy: kv_transfer_params (P's blocks)
+    deactivate P
+    Proxy->>D: request + P's kv_transfer_params
+    activate D
+    D-->P: RDMA read (D pulls new KV from P)
+    note over D: decode
+    D-->>Proxy: stream response + kv_transfer_params
+    deactivate D
+    note over Proxy: update cached kv_transfer_params
+    Proxy-->>Client: response
+    end
+```
+
+**Turn 1 (cache miss):**
+
+1. Client sends a chat request with a `conversation_id` to the proxy.
+2. Proxy forwards the request to P with no remote block info — P computes the full KV cache.
+3. Proxy forwards the request to D along with P's `kv_transfer_params` (block IDs, engine ID, host/port).
+4. D reads KV blocks from P via RDMA (peer-to-peer pull), then generates the response.
+5. D streams the response back through the proxy. The final chunk includes D's own `kv_transfer_params`.
+6. Proxy caches D's `kv_transfer_params` keyed by `conversation_id`, then returns the response to the client.
+
+**Turn 2+ (cache hit — bidirectional):**
+
+1. Client sends the next turn with the same `conversation_id`.
+2. Proxy looks up cached `kv_transfer_params` from the previous turn and attaches D's `remote_block_ids` to the request sent to P.
+3. P reads the existing KV cache from D via RDMA (D→P pull), then computes KV only for the new tokens.
+4. Proxy forwards the request to D with P's updated `kv_transfer_params`.
+5. D reads the new KV blocks from P, generates the response, and returns updated `kv_transfer_params` which the proxy caches for the next turn.
+
+### Configuration
+
+Enable bidirectional KV transfer by setting `bidirectional_kv_xfer` in `kv_connector_extra_config` on **both** P and D instances:
+
+```bash
+vllm serve <MODEL> \
+  --kv-transfer-config '{
+    "kv_connector": "NixlConnector",
+    "kv_role": "kv_both",
+    "kv_connector_extra_config": {
+      "bidirectional_kv_xfer": true
+    }
+  }'
+```
+
+Additional configuration options in `kv_connector_extra_config`:
+
+| Parameter | Default | Description |
+| --------- | ------- | ----------- |
+| `bidirectional_kv_xfer` | `false` | Enable bidirectional D→P KV transfer. |
+| `kv_recompute_threshold` | `64` | Minimum number of remote tokens required to trigger a D→P pull. Below this threshold, P recomputes locally instead of pulling (to amortize transfer latency). |
+| `decoder_kv_blocks_ttl` | `480` | TTL (seconds) for KV blocks cached on D for bidirectional reuse. Blocks are released after this duration. Not renewed via heartbeats. |
+
+### Multi-turn proxy setup
+
+Use the provided multi-turn proxy to manage `kv_transfer_params` caching across conversation turns:
+
+```bash
+python examples/disaggregated/disaggregated_serving/disagg_proxy_multiturn.py \
+  --host 0.0.0.0 --port 8000 \
+  --prefiller-host <P_IP> --prefiller-port 8100 \
+  --decoder-host <D_IP> --decoder-port 8200
+```
+
+The proxy supports multiple P and D instances via round-robin:
+
+```bash
+python examples/disaggregated/disaggregated_serving/disagg_proxy_multiturn.py \
+  --host 0.0.0.0 --port 8000 \
+  --prefiller-hosts <P_IP1> <P_IP2> --prefiller-ports 8100 8100 \
+  --decoder-hosts <D_IP1> <D_IP2> --decoder-ports 8200 8200
+```
+
+### Client usage
+
+Include a `conversation_id` field in the request body to enable cross-turn KV reuse. Without it, the proxy cannot link turns and falls back to full recomputation.
+
+```bash
+# Turn 1
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen3-0.6B",
+    "conversation_id": "session-42",
+    "messages": [
+      {"role": "user", "content": "What is vLLM?"}
+    ]
+  }'
+
+# Turn 2 — same conversation_id triggers bidirectional KV pull
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen3-0.6B",
+    "conversation_id": "session-42",
+    "messages": [
+      {"role": "user", "content": "What is vLLM?"},
+      {"role": "assistant", "content": "vLLM is a high-throughput LLM serving engine..."},
+      {"role": "user", "content": "How does disaggregated prefilling work?"}
+    ]
+  }'
+```
+
+!!! note
+    The `conversation_id` field is a non-standard extension to the OpenAI API. It is consumed by the proxy and not forwarded to the vLLM engine.
+
+### Limitations
+
+- Requires a stateful proxy (or equivalent router) to track and forward `kv_transfer_params` between turns.
+- Currently supported on CUDA with device-buffer KV cache. Host-buffer support (e.g., for Intel XPU) is planned for future work.
+
+!!! warning "Reasoning models with stripped thinking traces"
+    When using reasoning models (e.g. DeepSeek-R1) that produce thinking traces
+    (`<think>...</think>`), D's KV blocks cover the full token sequence including
+    thinking tokens. If the client strips thinking traces from the conversation
+    history before sending the next turn, the prompt P receives will be missing
+    tokens from the middle of what D generated. The block-alignment logic assumes
+    P's prompt is a prefix of D's sequence, so pulling KV blocks from D in this
+    case transfers cache computed for the wrong token positions, producing
+    incorrect results.
+
+    We currently assume the router is able to detect such mismatch across turns. See [#43094](https://github.com/vllm-project/vllm/issues/43094). 
 
 ## Multi-Instance Setup
 
@@ -203,6 +375,10 @@ The `kv_load_failure_policy` setting controls how the system handles failures wh
 !!! warning
     Using `kv_load_failure_policy="recompute"` can lead to performance degradation in production deployments. When KV loads fail, the decode instance will execute prefill work with decode-optimized configurations, which is inefficient and defeats the purpose of disaggregated prefilling. This also increases tail latency for other ongoing decode requests.
 
+### For NVIDIA GB-series GPUs
+
+GB-series GPUs support multi-node NVLink. NIXL supports this capability, but KVCache must be registered as VMM during KVCache registration. To enable this feature, you need to set `--enable-cumem-allocator` or `--enable-sleep-mode` flags, and set `UCX_CUDA_IPC_ENABLE_MNNVL: 'y'` env. Otherwise, NIXL can only use RDMA/TCP for cross-node KVCache transfers.
+
 ## Experimental Feature
 
 ### Heterogeneous KV Layout support
diff --git a/docs/features/prompt_embeds.md b/docs/features/prompt_embeds.md
index b81d2f28e3b9..dd0b4d62c423 100644
--- a/docs/features/prompt_embeds.md
+++ b/docs/features/prompt_embeds.md
@@ -16,16 +16,51 @@ To input multi-modal data, follow this schema in [vllm.inputs.EmbedsPrompt][]:
 
 You can pass prompt embeddings from Hugging Face Transformers models to the  `'prompt_embeds'` field of the prompt embedding dictionary, as shown in the following examples:
 
-[examples/offline_inference/prompt_embed_inference.py](../../examples/offline_inference/prompt_embed_inference.py)
+[examples/features/prompt_embed/prompt_embed_offline.py](../../examples/features/prompt_embed/prompt_embed_offline.py)
 
 ## Online Serving
 
-Our OpenAI-compatible server accepts prompt embeddings inputs via the [Completions API](https://platform.openai.com/docs/api-reference/completions). Prompt embeddings inputs are added via a new `'prompt_embeds'` key in the JSON package and are enabled by the `--enable-prompt-embeds` flag in `vllm serve`.
+Our OpenAI-compatible server accepts prompt embeddings inputs via both the [Completions API](https://platform.openai.com/docs/api-reference/completions) and the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat). Both are enabled by the `--enable-prompt-embeds` flag in `vllm serve`.
+
+### Completions API
+
+Prompt embeddings inputs are added via a `'prompt_embeds'` key in the JSON request body.
 
 When a mixture of `'prompt_embeds'` and `'prompt'` inputs are provided in a single request, the prompt embeds are always returned first.
 
 Prompt embeddings are passed in as base64 encoded torch tensors.
 
+The Completions endpoint does **not** apply a chat template to `prompt_embeds`. If the model assumes some chat template, the caller is responsible for producing embeddings for the full, already-templated prompt: apply the chat template, then embed the resulting token IDs. Anything the model would normally need (system prompt, role markers, generation prompt, etc.) must already be baked into the embedded tokens.
+
+### Chat Completions API
+
+Prompt embeddings can be included as content parts in chat messages, interleaved with text:
+
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": [
+        {"type": "text", "text": "You are a helpful assistant."},
+        {"type": "prompt_embeds", "data": "<base64_encoded_tensor>"}
+      ]
+    },
+    {
+      "role": "user",
+      "content": [
+        {"type": "prompt_embeds", "data": "<base64_encoded_tensor>"},
+        {"type": "text", "text": "Summarize the above."}
+      ]
+    }
+  ]
+}
+```
+
+Each `prompt_embeds` content part contains a `data` field with a base64-encoded `torch.Tensor` of shape `(num_tokens, hidden_size)`. Multiple `prompt_embeds` parts can appear in any message, in any position relative to text parts. The server expands each part into the correct number of placeholder tokens during chat template rendering, then splices the pre-computed embeddings into the model's input at the corresponding positions.
+
+Unlike the Completions API, a `prompt_embeds` content part should encode **only** the content, not a templated conversation. The server wraps the chat template around the embedded content at request time, the same way it would for a plain text `content` string. Embedding a full templated conversation here would double-apply the template and produce incorrect inputs to the model.
+
 !!! warning
     The vLLM engine may crash if incorrect shape of embeddings is passed.
     Only enable this flag for trusted users!
@@ -41,4 +76,4 @@ vllm serve meta-llama/Llama-3.2-1B-Instruct --runner generate \
 
 Then, you can use the OpenAI client as follows:
 
-[examples/online_serving/prompt_embed_inference_with_openai_client.py](../../examples/online_serving/prompt_embed_inference_with_openai_client.py)
+[examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py](../../examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py)
diff --git a/docs/features/quantization/README.md b/docs/features/quantization/README.md
index 0b8fc71d3f30..6c4aa7d8aaac 100644
--- a/docs/features/quantization/README.md
+++ b/docs/features/quantization/README.md
@@ -16,9 +16,11 @@ The following are the supported quantization formats for vLLM:
 - [INT8 W8A8](int8.md)
 - [FP8 W8A8](fp8.md)
 - [NVIDIA Model Optimizer](modelopt.md)
+- [Online Quantization](online.md)
 - [AMD Quark](quark.md)
 - [Quantized KV Cache](quantized_kvcache.md)
 - [TorchAO](torchao.md)
+- [FP8 ViT Encoder Attention](fp8_vit_attn.md)
 
 ## Supported Hardware
 
diff --git a/docs/features/quantization/auto_awq.md b/docs/features/quantization/auto_awq.md
index e77e8b5a1f41..e93005f26321 100644
--- a/docs/features/quantization/auto_awq.md
+++ b/docs/features/quantization/auto_awq.md
@@ -47,7 +47,7 @@ After installing AutoAWQ, you are ready to quantize a model. Please refer to the
 To run an AWQ model with vLLM, you can use [TheBloke/Llama-2-7b-Chat-AWQ](https://huggingface.co/TheBloke/Llama-2-7b-Chat-AWQ) with the following command:
 
 ```bash
-python examples/offline_inference/llm_engine_example.py \
+python examples/deployment/llm_engine_example.py \
     --model TheBloke/Llama-2-7b-Chat-AWQ \
     --quantization awq
 ```
diff --git a/docs/features/quantization/fp8.md b/docs/features/quantization/fp8.md
index 2165161ce891..2de71ce8da16 100644
--- a/docs/features/quantization/fp8.md
+++ b/docs/features/quantization/fp8.md
@@ -84,7 +84,7 @@ Since simple RTN does not require data for weight quantization and the activatio
 Install `vllm` and `lm-evaluation-harness` for evaluation:
 
 ```bash
-pip install vllm "lm-eval[api]>=0.4.11"
+pip install vllm "lm-eval[api]>=0.4.12"
 ```
 
 Load and run the model in `vllm`:
diff --git a/docs/features/quantization/fp8_vit_attn.md b/docs/features/quantization/fp8_vit_attn.md
new file mode 100644
index 000000000000..bf628cd8a72a
--- /dev/null
+++ b/docs/features/quantization/fp8_vit_attn.md
@@ -0,0 +1,109 @@
+# FP8 ViT Encoder Attention
+
+For visual understanding workloads with large images (e.g. QHD, 4K) and relatively
+short text prompts/generation, the ViT encoder attention can become a significant
+bottleneck, especially when the text model is quantized (e.g. NVFP4). vLLM
+supports optional FP8 quantization for the ViT encoder attention via the
+FlashInfer cuDNN backend. Q/K/V are quantized on-the-fly to FP8 before the
+cuDNN attention call.
+
+!!! note
+    - Currently supports Qwen3-VL family models only (`qwen3_vl`, `qwen3_vl_moe`,
+      `qwen3_5`, `qwen3_5_moe`, and other models using Qwen3 ViT).
+    - Dynamic scaling is not compatible with ViT full CUDA graphs.
+    - Performance gains are mostly visible at QHD/4K resolutions or multi-image
+      requests. Smaller images may see no speedup due to quantization overhead
+      (3 quantization kernel launches + un-padding).
+    - FP8 tensor-core speedup is more pronounced on GB300 than GB200.
+
+## Requirements
+
+- FlashInfer cuDNN backend with cuDNN >= 9.17.1.
+
+## Usage
+
+Enable FP8 ViT attention by passing `--mm-encoder-attn-dtype fp8` together
+with `--mm-encoder-attn-backend FLASHINFER`:
+
+```bash
+vllm serve $MODEL \
+    --mm-encoder-attn-backend FLASHINFER \
+    --mm-encoder-attn-dtype fp8
+```
+
+By default (no scale file), **dynamic scaling** is used: a 16-entry circular
+buffer of observed Q/K/V amax values drives per-forward scale updates. This
+matches BF16 accuracy without any calibration but adds a small per-forward
+overhead.
+
+## Calibrate-Once, Reuse Workflow (Recommended)
+
+For production, calibrate static scales on a representative dataset once and
+reuse them to avoid the dynamic overhead:
+
+```bash
+# Step 1: calibrate and save scales (runs dynamic scaling for 16 passes,
+# then dumps the learned scales to JSON).
+vllm bench mm-processor \
+    --model $MODEL --mm-encoder-attn-backend FLASHINFER \
+    --mm-encoder-attn-dtype fp8 \
+    --mm-encoder-fp8-scale-save-path /path/to/scales.json \
+    --dataset-name hf --dataset-path lmarena-ai/VisionArena-Chat \
+    --num-prompts 100
+
+# Step 2: serve with static scales (no dynamic overhead).
+vllm serve $MODEL \
+    --mm-encoder-attn-backend FLASHINFER \
+    --mm-encoder-attn-dtype fp8 \
+    --mm-encoder-fp8-scale-path /path/to/scales.json
+```
+
+Saved scales are multiplied by `--mm-encoder-fp8-scale-save-margin` (default
+`1.5`) to leave headroom against activation outliers not present in the
+calibration set. The default has been validated to generalize across datasets
+(e.g. VisionArena-Chat calibration maintains BF16 accuracy on ChartQA).
+
+## Scale File Format
+
+```json
+{
+    "visual.blocks.0.attn.attn": {"q": 224.0, "k": 198.0, "v": 210.0},
+    "visual.blocks.1.attn.attn": {"q": 218.0, "k": 195.0, "v": 207.0}
+}
+```
+
+Keys `q_scale` / `k_scale` / `v_scale` are accepted as aliases.
+
+## Performance
+
+**Core cuDNN attention kernel** (PyTorch profiler, `cudnn_generated_fort_native_sdpa_sm100_flash_fprop`, head_dim=128, seq_len=8192):
+
+| Hardware | BF16 | FP8 | Speedup |
+| -------- | ---- | ---- | ------- |
+| GB200 | 350 us | 312 us | **1.12x** |
+| GB300 | 300 us | 211 us | **1.42x** |
+
+**End-to-end encoder forward time** (Qwen3-VL-30B-A3B-Instruct on GB200, 3 images/request):
+
+| Resolution | BF16 median | FP8 median | Speedup |
+| ---------- | ----------- | ---------- | ------- |
+| HD (720x1280) | 31.77 ms | 36.39 ms | 0.87x |
+| FullHD (1080x1920) | 57.99 ms | 58.73 ms | ~same |
+| QHD (1440x2560) | 131.83 ms | 122.30 ms | **1.08x** |
+| 4K (2160x3840) | 543.44 ms | 460.31 ms | **1.18x** |
+
+Crossover is around FullHD with 3 images/request. At QHD and above, FP8 wins.
+
+## Accuracy
+
+ChartQA, Qwen3-VL-8B-Instruct, 500 samples. FP8 static uses scales calibrated
+on VisionArena-Chat (with default 1.5x margin):
+
+| Metric | BF16 | FP8 dynamic | FP8 static |
+| ------ | ---- | ----------- | ---------- |
+| relaxed_accuracy | 0.780 | 0.776 | 0.780 |
+| anywhere_accuracy | 0.806 | 0.816 | 0.814 |
+| exact_match | 0.584 | 0.582 | 0.578 |
+
+All three configurations match within statistical noise, confirming that
+static scales calibrated on one dataset generalize to another.
diff --git a/docs/features/quantization/gptqmodel.md b/docs/features/quantization/gptqmodel.md
index f14a931725da..636a952b6551 100644
--- a/docs/features/quantization/gptqmodel.md
+++ b/docs/features/quantization/gptqmodel.md
@@ -58,7 +58,7 @@ Here is an example of how to quantize `meta-llama/Llama-3.2-1B-Instruct`:
 To run an GPTQModel quantized model with vLLM, you can use [DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2](https://huggingface.co/ModelCloud/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2) with the following command:
 
 ```bash
-python examples/offline_inference/llm_engine_example.py \
+python examples/deployment/llm_engine_example.py \
     --model ModelCloud/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2
 ```
 
diff --git a/docs/features/quantization/int4.md b/docs/features/quantization/int4.md
index ed8a08a6aef8..41c4b40574fe 100644
--- a/docs/features/quantization/int4.md
+++ b/docs/features/quantization/int4.md
@@ -18,7 +18,7 @@ pip install llmcompressor
 Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
 
 ```bash
-pip install vllm "lm-eval[api]>=0.4.11"
+pip install vllm "lm-eval[api]>=0.4.12"
 ```
 
 ## Quantization Process
diff --git a/docs/features/quantization/int8.md b/docs/features/quantization/int8.md
index 53a5e7506609..547eb5aedc21 100644
--- a/docs/features/quantization/int8.md
+++ b/docs/features/quantization/int8.md
@@ -23,7 +23,7 @@ pip install llmcompressor
 Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
 
 ```bash
-pip install vllm "lm-eval[api]>=0.4.11"
+pip install vllm "lm-eval[api]>=0.4.12"
 ```
 
 ## Quantization Process
diff --git a/docs/features/quantization/online.md b/docs/features/quantization/online.md
new file mode 100644
index 000000000000..4b9571b38b98
--- /dev/null
+++ b/docs/features/quantization/online.md
@@ -0,0 +1,139 @@
+# Online Quantization
+
+Online quantization lets you take a BF16/FP16 model and quantize its Linear
+and MoE weights to lower precision (such as FP8) at load time, without needing
+a pre-quantized checkpoint or calibration data. Weights are converted during
+model loading and activations are dynamically scaled during each forward pass.
+
+## Quick Start
+
+Pass a scheme name to the `quantization` parameter:
+
+```python
+from vllm import LLM
+
+# Per-tensor FP8 quantization (one scale per weight tensor)
+llm = LLM("meta-llama/Llama-3.1-8B", quantization="fp8_per_tensor")
+
+# Per-block FP8 quantization (128x128 block scaling for weights and 1x128 block scaling for activations)
+llm = LLM("meta-llama/Llama-3.1-8B", quantization="fp8_per_block")
+
+# MXFP8 quantization for weights and activations
+llm = LLM("meta-llama/Llama-3.1-8B", quantization="mxfp8")
+```
+
+Or with the CLI:
+
+```bash
+vllm serve meta-llama/Llama-3.1-8B --quantization fp8_per_tensor
+vllm serve meta-llama/Llama-3.1-8B --quantization fp8_per_block
+vllm serve meta-llama/Llama-3.1-8B --quantization mxfp8
+```
+
+## Supported Schemes
+
+| Scheme | Weight recipe | Activation recipe | Notes |
+| ------ | ------------- | ------------------ | ----- |
+| `fp8_per_tensor` | fp8_e4m3 data, fp32 per-tensor scale | fp8_e4m3 data, fp32 per-tensor scale | On some GPUs (Ada, Hopper) linear activations use per-token scaling for better performance |
+| `fp8_per_block` | fp8_e4m3 data, fp32 per-128x128-block scale | fp8_e4m3 data, fp32 per-1x128-block scale | |
+| `mxfp8` | fp8_e4m3 data, e8m0 per-1x32-block scale | fp8_e4m3 data, e8m0 per-1x32-block scale | Requires SM 100+ (Blackwell or newer) for w8a8, other GPUs use a w8a16 fallback |
+
+## Advanced Configuration
+
+For fine-grained control, use a `quantization_config` dictionary.
+
+### Schema
+
+```yaml
+quantization_config:
+  linear:
+    weight: <name>      # see QUANT_KEY_NAMES in vllm/config/quantization.py
+    activation: <name>
+  moe:
+    weight: <name>
+    activation: <name>
+  ignore: [<layer-name-or-regex>, ...]
+```
+
+`linear` and `moe` accept a full `{weight, activation}` dict, or a bare
+string. A string resolves first against the `--quantization` shorthands
+(taking the matching layer-kind slot), then against `QUANT_KEY_NAMES` as a
+weight name. Unset fields fall back to the `--quantization` shorthand's
+defaults, or for already-quantized checkpoints to whatever the checkpoint
+declares.
+
+The CLI accepts the same shape as JSON or as dotted keys:
+
+```bash
+vllm serve <model> --quantization-config '{"moe":{"activation":"mxfp8"}}'
+vllm serve <model> --quantization-config.moe.activation mxfp8
+```
+
+### Activation overrides on already-quantized checkpoints
+
+For checkpoint-quantized models, `quantization_config` lets you pick an
+activation format independently of the baked-in weights. The supported
+overrides are checkpoint-specific; today this is wired up for MXFP4 MoE
+checkpoints (gpt-oss) where you can opt into FP8 activations:
+
+```bash
+vllm serve openai/gpt-oss-20b --quantization-config.moe.activation mxfp8
+```
+
+Combine with `--moe-backend` to pin a specific kernel family.
+
+### Separate Schemes for Dense and MoE Layers
+
+You can apply different quantization schemes to dense linear layers and MoE expert layers via the `linear` and `moe` fields. Each accepts either a full spec dict, or a bare string naming an online shorthand (e.g. `"fp8_per_block"`) or weight format (e.g. `"fp8_per_block_static"`); fields not set fall back to the shorthand defaults.
+
+```python
+from vllm import LLM
+
+# Linear: per-block FP8; MoE: per-tensor FP8 (inherited from the shorthand)
+llm = LLM(
+    "ibm-granite/granite-3.0-1b-a400m-base",
+    quantization="fp8_per_tensor",
+    quantization_config={
+        "linear": "fp8_per_block",
+    },
+)
+```
+
+Or,
+
+```python
+from vllm import LLM
+
+# Linear: per-tensor FP8 (inherited); MoE: per-block FP8
+llm = LLM(
+    "ibm-granite/granite-3.0-1b-a400m-base",
+    quantization="fp8_per_tensor",
+    quantization_config={
+        "moe": "fp8_per_block",
+    },
+)
+```
+
+### Excluding Layers from Quantization
+
+Use the `ignore` parameter to skip specific layers. It accepts exact layer names and regex patterns (prefixed with `re:`):
+
+```python
+from vllm import LLM
+
+llm = LLM(
+    "ibm-granite/granite-3.0-1b-a400m-base",
+    quantization="fp8_per_tensor",
+    quantization_config={
+        "ignore": [
+            # exact layer name
+            "model.layers.1.self_attn.o_proj",
+            # regex: skip all QKV projections
+            "re:.*[qkv]_proj",
+        ],
+    },
+)
+```
+
+!!! note
+    For fused layers (e.g., `qkv_proj` which fuses `q_proj`, `k_proj`, `v_proj`), the ignore pattern must match the **unfused** shard names (`q_proj`, `k_proj`, `v_proj`), not the fused name.
diff --git a/docs/features/quantization/quark.md b/docs/features/quantization/quark.md
index 1961d73099a9..8ed6b2ca60cd 100644
--- a/docs/features/quantization/quark.md
+++ b/docs/features/quantization/quark.md
@@ -20,7 +20,7 @@ for more installation details.
 Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
 
 ```bash
-pip install vllm "lm-eval[api]>=0.4.11"
+pip install vllm "lm-eval[api]>=0.4.12"
 ```
 
 ## Quantization Process
diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
index 4a9f279e0db8..f1cc18a25cbb 100644
--- a/docs/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -13,6 +13,7 @@ vLLM currently supports the following reasoning models:
 
 | Model Series | Parser Name | Structured Output Support | Tool Calling |
 | ------------ | ----------- | ---------------- | ----------- |
+| [Cohere Command A Reasoning](https://huggingface.co/CohereLabs/command-a-reasoning-08-2025) | `cohere_command3` | `json`, `regex` | ✅ |
 | [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `json`, `regex` | ❌ |
 | [DeepSeek-V3.1](https://huggingface.co/collections/deepseek-ai/deepseek-v31-68a491bed32bd77e7fca048f) | `deepseek_v3` | `json`, `regex` | ❌ |
 | [ERNIE-4.5-VL series](https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-PT) | `ernie45` | `json`, `regex` | ❌ |
@@ -156,7 +157,7 @@ OpenAI Python client library does not officially support `reasoning` attribute f
             print(content, end="", flush=True)
     ```
 
-Remember to check whether the `reasoning` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py).
+Remember to check whether the `reasoning` exists in the response before accessing it. You could check out the [example](https://github.com/vllm-project/vllm/blob/main/examples/reasoning/openai_chat_completion_with_reasoning_streaming.py).
 
 ## Tool Calling
 
@@ -202,7 +203,7 @@ The reasoning content is also available when both tool calling and the reasoning
     print(f"Arguments: {tool_call.arguments}")
     ```
 
-For more examples, please refer to [examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py).
+For more examples, please refer to [examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py](../../examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py).
 
 ## Server-Level Default Chat Template Kwargs
 
@@ -244,12 +245,12 @@ response = client.chat.completions.create(
 
 Some models, such as [Qwen3](https://qwen.readthedocs.io/en/latest/getting_started/quickstart.html#thinking-budget), [DeepSeek](https://www.alibabacloud.com/help/en/model-studio/deep-thinking), and [Nemotron3](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16), support a thinking budget that limits the maximum number of tokens used for reasoning.
 
-Token counting starts from `think_start_str`. Once the reasoning token count reaches the configured `thinking_token_budget`, vLLM forces the model to produce `think_end_str`, effectively terminating the reasoning block.
+Token counting starts from `reasoning_start_str`. Once the reasoning token count reaches the configured `thinking_token_budget`, vLLM forces the model to produce `reasoning_end_str`, effectively terminating the reasoning block.
 
 To use this feature:
 
 - `--reasoning-parser` enables reasoning extraction.
-- `--reasoning-config` defines the reasoning boundary tokens (e.g., `think_start_str`, `think_end_str`).
+- `--reasoning-config` defines the reasoning boundary tokens (e.g., `reasoning_start_str`, `reasoning_end_str`). If not set, vLLM will attempt to automatically initialize these tokens from the reasoning parser.
 - `thinking_token_budget` (a sampling parameter) sets the per-request reasoning token limit.
 
 If `thinking_token_budget` is not specified, no explicit reasoning limit is applied beyond normal generation constraints such as `max_tokens`.
@@ -257,20 +258,20 @@ If `thinking_token_budget` is not specified, no explicit reasoning limit is appl
 `--reasoning-config` accepts a JSON object corresponding to  
 [ReasoningConfig][vllm.config.ReasoningConfig] with the following fields:
 
-| Field             | Type           | Description                                      |
-|-------------------|----------------|--------------------------------------------------|
-| `think_start_str` | `str \| null`  | String that marks the start of reasoning content |
-| `think_end_str`   | `str \| null`  | String that marks the end of reasoning content   |
+| Field                 | Type           | Description                                      |
+|-----------------------|----------------|--------------------------------------------------|
+| `reasoning_start_str` | `str \| null`  | String that marks the start of reasoning content |
+| `reasoning_end_str`   | `str \| null`  | String that marks the end of reasoning content   |
 
 !!! note
-    `think_end_str` can include a transition phrase before the think end token. For example, setting `think_end_str` to `"I have to give the solution based on the thinking directly now.</think>"` instructs the model to emit that phrase when the budget is exhausted, making the reasoning termination more natural.
+    `reasoning_end_str` can include a transition phrase before the reasoning end token. For example, setting `reasoning_end_str` to `"I have to give the solution based on the reasoning directly now.</think>"` instructs the model to emit that phrase when the budget is exhausted, making the reasoning termination more natural.
 
 ### Online Serving
 
 ```bash
 vllm serve Qwen/Qwen3-0.6B \
     --reasoning-parser qwen3 \
-    --reasoning-config '{"think_start_str": "<think>", "think_end_str": "I have to give the solution based on the thinking directly now.</think>"}'
+    --reasoning-config '{"reasoning_start_str": "<think>", "reasoning_end_str": "I have to give the solution based on the reasoning directly now.</think>"}'
 ```
 
 Then make a request with `thinking_token_budget` to limit the reasoning tokens:
@@ -283,9 +284,7 @@ curl http://localhost:8000/v1/chat/completions \
     "messages": [
       { "role": "user", "content": "9.11 and 9.8, which is greater?" }
     ],
-    "extra_body": {
-      "thinking_token_budget": 10
-    }
+    "thinking_token_budget": 10
   }'
 ```
 
@@ -298,8 +297,8 @@ from vllm.config import ReasoningConfig
 llm = LLM(
     model="Qwen/Qwen3-0.6B",
     reasoning_config=ReasoningConfig(
-        think_start_str="<think>",
-        think_end_str="I have to give the solution based on the thinking directly now.</think>",
+        reasoning_start_str="<think>",
+        reasoning_end_str="I have to give the solution based on the thinking directly now.</think>",
     ),
 )
 
diff --git a/docs/features/speculative_decoding/README.md b/docs/features/speculative_decoding/README.md
index 9793de3f4c35..768e9f78d401 100644
--- a/docs/features/speculative_decoding/README.md
+++ b/docs/features/speculative_decoding/README.md
@@ -15,6 +15,8 @@ vLLM supports a variety of methods of speculative decoding. Model-based methods
 - [Multi-Layer Perceptron](mlp.md)
 - [N-Gram](n_gram.md)
 - [Suffix Decoding](suffix.md)
+- [Hidden State Extraction](extract_hidden_states.md)
+- [Custom Proposer Backend (Experimental)](#custom-proposer-backend-experimental)
 
 ## Method Selection at a Glance
 
@@ -30,11 +32,126 @@ depend on your model family, traffic pattern, hardware, and sampling settings.
 | MLP speculator | Medium to high gain | Medium gain | Good when compatible MLP speculators are available. |
 | N-gram | Low to medium gain | Medium gain | Lightweight and easy to enable. |
 | Suffix decoding | Low to medium gain | Medium gain | No extra draft model; dynamic speculation depth. |
+| Custom Proposer | Varies | Varies | Bring your own proposer class (experimental). |
 
 For reproducible measurements in your environment, use
-[`examples/offline_inference/spec_decode.py`](../../../examples/offline_inference/spec_decode.py)
+[`examples/features/speculative_decoding/spec_decode_offline.py`](../../../examples/features/speculative_decoding/spec_decode_offline.py)
 or the [benchmark CLI guide](../../benchmarking/cli.md).
 
+## Custom Proposer Backend (Experimental)
+
+You can plug in your own custom proposer class for speculative decoding by setting the method to `custom_class` and providing the full module path to your class.
+Your custom class must accept a `VllmConfig` upon instantiation and implement a `propose` method.
+
+**Example configuration:**
+
+- `speculative_config.method = "custom_class"`
+- `speculative_config.model = "your_module.YourCustomProposerClass"`
+
+## `--speculative-config` schema
+
+Use `--speculative-config` to pass speculative decoding settings as a JSON
+object on the CLI:
+
+```bash
+vllm serve <target-model> \
+  --speculative-config '{
+    "method": "draft_model",
+    "model": "<draft-model>",
+    "num_speculative_tokens": 5
+  }'
+```
+
+The same keys are accepted from Python via `LLM(..., speculative_config={...})`.
+The tables below highlight common user-facing keys accepted in this JSON
+object; they are not an exhaustive schema reference.
+For more details, see the generated [engine arguments reference](../../configuration/engine_args.md)
+and the API docs for [vllm.config.SpeculativeConfig][].
+
+### Common keys
+
+These keys are commonly used across speculative decoding setups, though some
+only apply to model-based methods such as `draft_model`, `mtp`, `eagle3`, and
+`dflash`.
+
+| Key | Type | Default | Allowed values / meaning |
+| --- | --- | --- | --- |
+| `method` | `string` | `None` | Speculation method. Common values include `draft_model`, `ngram`, `suffix`, `mtp`, `eagle3`, and `dflash`. If omitted, vLLM infers the method from the provided configuration when possible. |
+| `model` | `string` | `None` | Draft model, EAGLE head, or auxiliary model identifier. For `ngram`, `ngram_gpu`, `suffix`, and `mtp`, this can often be omitted. |
+| `num_speculative_tokens` | `integer > 0` | `None` | Number of speculative tokens to propose per step. Required for methods that do not infer it from model metadata. |
+| `draft_tensor_parallel_size` | `integer >= 1` | `None` | Tensor parallel size for the draft model. |
+| `max_model_len` | `integer >= 1` | `None` | Maximum context length for the draft model. |
+| `parallel_drafting` | `boolean` | `false` | Enable parallel draft token generation. Only compatible with EAGLE and draft-model methods. |
+| `rejection_sample_method` | `string` | `strict` | `strict`, `probabilistic`, or `synthetic`. |
+| `synthetic_acceptance_rate` | `float` | `None` | Average acceptance rate to target when `rejection_sample_method` is `synthetic`. Valid range is `[0, 1]`. |
+
+!!! note
+    Gemma 4 assistant checkpoints are handled as Gemma 4 MTP speculators, not
+    as generic draft models. Use `"method": "mtp"` with the assistant
+    checkpoint in `model`, as shown in the [MTP guide](mtp.md#gemma-4-assistant-models).
+
+    If startup logs show `SpeculativeConfig(method='draft_model', ...)` for a
+    Gemma 4 assistant checkpoint, the installed vLLM version does not include
+    Gemma 4 MTP support for that path. Upgrade to a version that includes
+    Gemma 4 MTP support instead of forcing the assistant checkpoint through
+    generic draft-model speculative decoding.
+
+### Method-specific keys
+
+#### N-gram
+
+| Key | Type | Default | Meaning |
+| --- | --- | --- | --- |
+| `prompt_lookup_max` | `integer >= 1` | `5` if both lookup bounds are omitted; otherwise mirrors `prompt_lookup_min` when omitted | Maximum n-gram window size. |
+| `prompt_lookup_min` | `integer >= 1` | `5` if both lookup bounds are omitted; otherwise mirrors `prompt_lookup_max` when omitted | Minimum n-gram window size. |
+
+Example:
+
+```bash
+vllm serve <target-model> \
+  --speculative-config '{
+    "method": "ngram",
+    "num_speculative_tokens": 4,
+    "prompt_lookup_min": 2,
+    "prompt_lookup_max": 5
+  }'
+```
+
+#### Suffix decoding
+
+| Key | Type | Default | Meaning |
+| --- | --- | --- | --- |
+| `suffix_decoding_max_tree_depth` | `integer` | `24` | Maximum combined prefix-match and speculation tree depth. |
+| `suffix_decoding_max_cached_requests` | `integer` | `10000` | Maximum number of requests cached in the global suffix tree. Set `0` to disable the global cache. |
+| `suffix_decoding_max_spec_factor` | `float` | `1.0` | Caps speculative length as a multiple of prefix-match length. |
+| `suffix_decoding_min_token_prob` | `float` | `0.1` | Minimum estimated token probability required to speculate a token. |
+
+Example:
+
+```bash
+vllm serve <target-model> \
+  --speculative-config '{
+    "method": "suffix",
+    "num_speculative_tokens": 8,
+    "suffix_decoding_max_tree_depth": 24,
+    "suffix_decoding_max_cached_requests": 10000,
+    "suffix_decoding_max_spec_factor": 1.0,
+    "suffix_decoding_min_token_prob": 0.1
+  }'
+```
+
+### Notes
+
+- `--speculative-config` expects a JSON object on the CLI. In YAML config
+  files, use a nested mapping instead of an escaped JSON string.
+- `tensor_parallel_size` is not a valid key in `speculative_config`. Use
+  `draft_tensor_parallel_size` instead.
+- Keys such as `temperature` and `top_p` are sampling parameters, not
+  `--speculative-config` fields.
+- Internal fields such as `target_model_config`, `draft_model_config`,
+  `target_parallel_config`, `draft_parallel_config`, and `draft_load_config`
+  are populated by vLLM and are not intended to be set by users.
+
 ## Lossless guarantees of Speculative Decoding
 
 In vLLM, speculative decoding aims to enhance inference efficiency while maintaining accuracy. This section addresses the lossless guarantees of
diff --git a/docs/features/speculative_decoding/draft_model.md b/docs/features/speculative_decoding/draft_model.md
index ee0eaf176e76..b4662e6438f9 100644
--- a/docs/features/speculative_decoding/draft_model.md
+++ b/docs/features/speculative_decoding/draft_model.md
@@ -33,9 +33,9 @@ vllm serve Qwen/Qwen3-4B-Thinking-2507 \
     --port 8000 \
     --seed 42 \
     -tp 1 \
-    --max_model_len 2048 \
-    --gpu_memory_utilization 0.8 \
-    --speculative_config '{"model": "Qwen/Qwen3-0.6B", "num_speculative_tokens": 5, "method": "draft_model"}'
+    --max-model-len 2048 \
+    --gpu-memory-utilization 0.8 \
+    --speculative-config '{"model": "Qwen/Qwen3-0.6B", "num_speculative_tokens": 5, "method": "draft_model"}'
 ```
 
 The code used to request as completions as a client remains unchanged:
@@ -77,4 +77,8 @@ The code used to request as completions as a client remains unchanged:
     ```
 
 !!! warning
-    Note: Please use `--speculative_config` to set all configurations related to speculative decoding. The previous method of specifying the model through `--speculative_model` and adding related parameters (e.g., `--num_speculative_tokens`) separately has been deprecated.
+    Note: Please use `--speculative-config` to set all configurations related
+    to speculative decoding. The previous method of specifying the model
+    through `--speculative-model` and adding related parameters such as
+    `--num-speculative-tokens` separately has been deprecated. For supported
+    keys and examples, see the [`--speculative-config` schema](README.md#--speculative-config-schema).
diff --git a/docs/features/speculative_decoding/eagle.md b/docs/features/speculative_decoding/eagle.md
index 3e0f3add416e..cc9e4fd4c0c1 100644
--- a/docs/features/speculative_decoding/eagle.md
+++ b/docs/features/speculative_decoding/eagle.md
@@ -1,6 +1,6 @@
 # EAGLE Draft Models
 
-The following code configures vLLM to use speculative decoding where proposals are generated by an [EAGLE (Extrapolation Algorithm for Greater Language-model Efficiency)](https://arxiv.org/pdf/2401.15077) based draft model. A more detailed example for offline mode, including how to extract request level acceptance rate, can be found in [examples/offline_inference/spec_decode.py](../../../examples/offline_inference/spec_decode.py)
+The following code configures vLLM to use speculative decoding where proposals are generated by an [EAGLE (Extrapolation Algorithm for Greater Language-model Efficiency)](https://arxiv.org/pdf/2401.15077) based draft model. A more detailed example for offline mode, including how to extract request level acceptance rate, can be found in [examples/features/speculative_decoding/spec_decode_offline.py](../../../examples/features/speculative_decoding/spec_decode_offline.py)
 
 ## Eagle Drafter Example
 
diff --git a/docs/features/speculative_decoding/extract_hidden_states.md b/docs/features/speculative_decoding/extract_hidden_states.md
new file mode 100644
index 000000000000..2184a71f489f
--- /dev/null
+++ b/docs/features/speculative_decoding/extract_hidden_states.md
@@ -0,0 +1,86 @@
+# Hidden State Extraction
+
+The Hidden State Extraction feature allows vLLM to save intermediate layer activations from a target model during inference. This is useful for training [EAGLE](eagle.md)-style draft models, knowledge distillation, or offline analysis of model internals.
+
+!!! note
+    It is possible to save the last-layer's output hidden states by passing `num_hidden_layers` as a layer id. Note that these are _not_ normalized using the output norm.
+
+## Offline Example
+
+```python
+import tempfile
+
+from vllm import LLM, SamplingParams
+from vllm.config.kv_transfer import KVTransferConfig
+from vllm.distributed.kv_transfer.kv_connector.v1 import (
+    example_hidden_states_connector,
+)
+
+with tempfile.TemporaryDirectory() as tmpdir:
+    llm = LLM(
+        model="Qwen/Qwen3-8B",
+        enable_chunked_prefill=False,
+        speculative_config={
+            "method": "extract_hidden_states",
+            "num_speculative_tokens": 1,
+            "draft_model_config": {
+                "hf_config": {
+                    "eagle_aux_hidden_state_layer_ids": [1, 2, 3, 4],
+                },
+            },
+        },
+        kv_transfer_config=KVTransferConfig(
+            kv_connector="ExampleHiddenStatesConnector",
+            kv_role="kv_producer",
+            kv_connector_extra_config={
+                "shared_storage_path": tmpdir,
+            },
+        ),
+    )
+
+    outputs = llm.generate(
+        ["The future of AI is"],
+        SamplingParams(max_tokens=1),
+    )
+
+    for output in outputs:
+        path = output.kv_transfer_params["hidden_states_path"]
+        obj = example_hidden_states_connector.load_hidden_states(path)
+        print(f"token_ids: {obj['token_ids'].shape}")
+        print(f"hidden_states: {obj['hidden_states'].shape}")
+```
+
+A complete example is available at [`examples/features/speculative_decoding/extract_hidden_states_offline.py`](../../../examples/features/speculative_decoding/extract_hidden_states_offline.py).
+
+## Online Example
+
+For improved performance, it is recommended to use a RAM-mounted file system such as `/dev/shm/` for online usage in which the client cleans up the files soon after they are generated.
+
+```bash
+vllm serve Qwen/Qwen3-8B \
+    --speculative_config '{"method": "extract_hidden_states", "num_speculative_tokens": 1, "draft_model_config": {"hf_config": {"eagle_aux_hidden_state_layer_ids": [1, 2, 3, 4]}}}' \
+    --kv_transfer_config '{"kv_connector": "ExampleHiddenStatesConnector", "kv_role": "kv_producer", "kv_connector_extra_config": {"shared_storage_path": "/dev/shm/hidden_states"}}' \
+    --no-enable-chunked-prefill
+```
+
+## Configuration
+
+The `kv_connector_extra_config` dict accepts these options:
+
+| Parameter | Default | Description |
+| --- | --- | --- |
+| `shared_storage_path` | `/tmp` | Directory where hidden state files are saved |
+| `num_writer_threads` | `8` | Thread pool size for async disk writes |
+| `use_synchronization_lock` | `True` | Use file locks so concurrent readers block until writes complete. Can be disabled for batch generation where synchronization is not needed. |
+
+## Output Format
+
+Each request produces a `.safetensors` file containing:
+
+- **`hidden_states`** — shape `[num_tokens, num_extracted_layers, hidden_size]`
+- **`token_ids`** — shape `[num_tokens]`
+
+The file path is returned in `output.kv_transfer_params["hidden_states_path"]`. Use `load_hidden_states()` from the connector module to read the file with proper synchronization.
+
+!!! note
+    Chunked prefill is not compatible with this feature and must be disabled.
diff --git a/docs/features/speculative_decoding/mtp.md b/docs/features/speculative_decoding/mtp.md
index bcd7153deb51..d60f8ff27ba2 100644
--- a/docs/features/speculative_decoding/mtp.md
+++ b/docs/features/speculative_decoding/mtp.md
@@ -9,6 +9,31 @@ MTP is useful when:
 - Your model natively supports MTP.
 - You want model-based speculative decoding with minimal extra configuration.
 
+## Gemma 4 Assistant Models
+
+Gemma 4 assistant checkpoints use vLLM's Gemma 4 MTP path. They are not generic
+draft models, even though they are passed through the `model` field in
+`--speculative-config`.
+
+Use `"method": "mtp"` when serving Gemma 4 with an assistant checkpoint:
+
+```bash
+vllm serve google/gemma-4-E2B-it \
+    --tensor-parallel-size 1 \
+    --max-model-len 8192 \
+    --speculative-config '{"method":"mtp","model":"gg-hf-am/gemma-4-E2B-it-assistant","num_speculative_tokens":1}'
+```
+
+The E2B, E4B, 26B-A4B, and 31B Gemma 4 IT assistant checkpoints are supported
+when their configuration uses `model_type: gemma4_assistant`. vLLM maps those
+checkpoints to `Gemma4MTPModel` internally and wires the assistant layers to
+share KV cache with the target model.
+
+If an older vLLM release logs `SpeculativeConfig(method='draft_model', ...)`
+for a Gemma 4 assistant checkpoint, that release is treating the assistant as a
+generic draft model and may fail during initialization for multimodal Gemma 4
+targets. Upgrade to a version with Gemma 4 MTP support instead.
+
 ## Offline Example
 
 ```python
@@ -38,7 +63,7 @@ for output in outputs:
 ```bash
 vllm serve XiaomiMiMo/MiMo-7B-Base \
     --tensor-parallel-size 1 \
-    --speculative_config '{"method":"mtp","num_speculative_tokens":1}'
+    --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
 ```
 
 ## Notes
diff --git a/docs/features/speculative_decoding/parallel_draft_model.md b/docs/features/speculative_decoding/parallel_draft_model.md
index 2a3f11a302d3..c31b8e2d2f40 100644
--- a/docs/features/speculative_decoding/parallel_draft_model.md
+++ b/docs/features/speculative_decoding/parallel_draft_model.md
@@ -36,9 +36,9 @@ vllm serve Qwen/Qwen3-4B \
     --port 8000 \
     --seed 42 \
     -tp 1 \
-    --max_model_len 2048 \
-    --gpu_memory_utilization 0.8 \
-    --speculative_config '{"model": "amd/PARD-Qwen3-0.6B", "num_speculative_tokens": 12, "method": "draft_model", "parallel_drafting": true}'
+    --max-model-len 2048 \
+    --gpu-memory-utilization 0.8 \
+    --speculative-config '{"model": "amd/PARD-Qwen3-0.6B", "num_speculative_tokens": 12, "method": "draft_model", "parallel_drafting": true}'
 ```
 
 ## Pre-trained PARD weights
diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md
index 41cf7be89291..a46138238d47 100644
--- a/docs/features/structured_outputs.md
+++ b/docs/features/structured_outputs.md
@@ -29,7 +29,7 @@ The following parameters are supported, which must be added as extra parameters:
 - `grammar`: the output will follow the context free grammar.
 - `structural_tag`: Follow a JSON schema within a set of specified tags within the generated text.
 
-You can see the complete list of supported parameters on the [OpenAI-Compatible Server](../serving/openai_compatible_server.md) page.
+You can see the complete list of supported parameters on the [OpenAI-Compatible Server](../serving/online_serving/openai_compatible_server.md) page.
 
 Structured outputs are supported by default in the OpenAI-Compatible Server. You
 may choose to specify the backend to use by setting the
@@ -165,7 +165,7 @@ As an example, we can use to define a specific format of simplified SQL queries:
     print(completion.choices[0].message.content)
     ```
 
-See also: [full example](../examples/online_serving/structured_outputs.md)
+See also: [full example](../../examples/features/structured_outputs/README.md)
 
 ## Reasoning Outputs
 
@@ -208,7 +208,7 @@ Note that you can use reasoning with any provided structured outputs feature. Th
     print("content: ", completion.choices[0].message.content)
     ```
 
-See also: [full example](../examples/online_serving/structured_outputs.md)
+See also: [full example](../../examples/features/structured_outputs/README.md)
 
 !!! note
     When using Qwen3 Coder models with reasoning enabled, structured outputs might become disabled if the reasoning content does not get parsed into the `reasoning` field separately (v0.11.2+).
@@ -304,7 +304,7 @@ Step #2: explanation="Next, let's isolate 'x' by dividing both sides of the equa
 Answer: x = -29/8
 ```
 
-An example of using `structural_tag` can be found here: [examples/online_serving/structured_outputs](../../examples/online_serving/structured_outputs)
+An example of using `structural_tag` can be found here: [examples/features/structured_outputs](../../examples/features/structured_outputs/README.md)
 
 ## Offline Inference
 
@@ -339,4 +339,4 @@ shown below:
     print(outputs[0].outputs[0].text)
     ```
 
-See also: [full example](../examples/online_serving/structured_outputs.md)
+See also: [full example](../../examples/features/structured_outputs/structured_outputs_offline.py)
diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md
index cea1175413fe..95092734f3d4 100644
--- a/docs/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -369,6 +369,16 @@ Flags:
 * For non-reasoning: `--tool-call-parser hunyuan_a13b`
 * For reasoning: `--tool-call-parser hunyuan_a13b --reasoning-parser hunyuan_a13b`
 
+### Cohere Command A Reasoning (`cohere_command3`)
+
+Supported models:
+
+* [`CohereLabs/command-a-reasoning-08-2025`](https://huggingface.co/CohereLabs/command-a-reasoning-08-2025)
+
+Flags: `--tool-call-parser cohere_command3 --reasoning-parser cohere_command3`
+
+Note: the Cohere tool parser requires the `cohere_melody` package, which is not installed by default. Before using this parser please install the [cohere_melody](https://pypi.org/project/cohere-melody/) package.
+
 ### LongCat-Flash-Chat Models (`longcat`)
 
 Supported models:
@@ -454,6 +464,17 @@ Supported models:
 
 Flags: `--tool-call-parser gigachat3`
 
+### Apertus Models (`apertus`)
+
+Use the chat template from the examples folder; it fixes several OpenAI compatibility issues: `--chat-template /vllm-workspace/examples/tool_chat_template_apertus.jinja`
+
+Supported models:
+
+* `swiss-ai/Apertus-8B-Instruct-2509`
+* `swiss-ai/Apertus-70B-Instruct-2509`
+
+Flags: `--tool-call-parser apertus`
+
 ### Models with Pythonic Tool Calls (`pythonic`)
 
 A growing number of models output a python list to represent tool calls instead of using JSON. This has the advantage of inherently supporting parallel tool calls and removing ambiguity around the JSON schema required for tool calls. The `pythonic` tool parser can support such models.
@@ -505,7 +526,7 @@ Here is a summary of a plugin file:
 
         # adjust request. e.g.: set skip special tokens
         # to False for tool call output.
-        def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+        def adjust_request(self, request: ChatCompletionRequest | ResponsesRequest) -> ChatCompletionRequest | ResponsesRequest:
             return request
 
         # implement the tool call parse for stream call
diff --git a/docs/getting_started/installation/README.md b/docs/getting_started/installation/README.md
index ac3309b23414..a0eb56302056 100644
--- a/docs/getting_started/installation/README.md
+++ b/docs/getting_started/installation/README.md
@@ -3,9 +3,10 @@
 vLLM supports the following hardware platforms:
 
 - [GPU](gpu.md)
-    - [NVIDIA CUDA](gpu.md#nvidia-cuda)
-    - [AMD ROCm](gpu.md#amd-rocm)
-    - [Intel XPU](gpu.md#intel-xpu)
+    - [NVIDIA CUDA](gpu.md)
+    - [AMD ROCm](gpu.md)
+    - [Intel XPU](gpu.md)
+    - [Apple Silicon](gpu.md) (via [vLLM-Metal](https://github.com/vllm-project/vllm-metal))
 - [CPU](cpu.md)
     - [Intel/AMD x86](cpu.md#intelamd-x86)
     - [ARM AArch64](cpu.md#arm-aarch64)
diff --git a/docs/getting_started/installation/cpu.arm.inc.md b/docs/getting_started/installation/cpu.arm.inc.md
index b266e96db559..f01ba429ee03 100644
--- a/docs/getting_started/installation/cpu.arm.inc.md
+++ b/docs/getting_started/installation/cpu.arm.inc.md
@@ -20,12 +20,12 @@ Pre-built vLLM wheels for Arm are available since version 0.11.2. These wheels c
 
 ```bash
 export VLLM_VERSION=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//')
-uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl
+uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl --torch-backend cpu
 ```
 
 ??? console "pip"
     ```bash
-    pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl
+    pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl --extra-index-url https://download.pytorch.org/whl/cpu
     ```
 
 !!! warning "set `LD_PRELOAD`"
@@ -53,7 +53,7 @@ LLM inference is a fast-evolving field, and the latest code may contain bug fixe
 To install from nightly index, run:
 
 ```bash
-uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly/cpu --index-strategy first-index
+uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly/cpu --index-strategy first-index --torch-backend cpu
 ```
 
 ??? console "pip (there's a caveat)"
@@ -63,7 +63,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly/cpu --index
     If you insist on using `pip`, you have to specify the full URL (link address) of the wheel file (which can be obtained from https://wheels.vllm.ai/nightly/cpu/vllm).
 
     ```bash
-    pip install https://wheels.vllm.ai/4fa7ce46f31cbd97b4651694caf9991cc395a259/vllm-0.13.0rc2.dev104%2Bg4fa7ce46f.cpu-cp38-abi3-manylinux_2_35_aarch64.whl # current nightly build (the filename will change!)
+    pip install https://wheels.vllm.ai/4fa7ce46f31cbd97b4651694caf9991cc395a259/vllm-0.13.0rc2.dev104%2Bg4fa7ce46f.cpu-cp38-abi3-manylinux_2_35_aarch64.whl --extra-index-url https://download.pytorch.org/whl/cpu # current nightly build (the filename will change!)
     ```
 
 #### Install specific revisions
@@ -72,7 +72,7 @@ If you want to access the wheels for previous commits (e.g. to bisect the behavi
 
 ```bash
 export VLLM_COMMIT=730bd35378bf2a5b56b6d3a45be28b3092d26519 # use full commit hash from the main branch
-uv pip install vllm --extra-index-url https://wheels.vllm.ai/${VLLM_COMMIT}/cpu --index-strategy first-index
+uv pip install vllm --extra-index-url https://wheels.vllm.ai/${VLLM_COMMIT}/cpu --index-strategy first-index --torch-backend cpu
 ```
 
 --8<-- [end:pre-built-wheels]
@@ -96,14 +96,14 @@ cd vllm_source
 Third, install required dependencies:
 
 ```bash
-uv pip install -r requirements/cpu-build.txt --torch-backend cpu
+uv pip install -r requirements/build/cpu.txt --torch-backend cpu
 uv pip install -r requirements/cpu.txt --torch-backend cpu
 ```
 
 ??? console "pip"
     ```bash
     pip install --upgrade pip
-    pip install -v -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    pip install -v -r requirements/build/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
     pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
     ```
 
diff --git a/docs/getting_started/installation/cpu.s390x.inc.md b/docs/getting_started/installation/cpu.s390x.inc.md
index eeb20b8bf063..1e36b4317647 100644
--- a/docs/getting_started/installation/cpu.s390x.inc.md
+++ b/docs/getting_started/installation/cpu.s390x.inc.md
@@ -3,15 +3,15 @@
 
 vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform.
 
-Currently, the CPU implementation for s390x architecture supports FP32 datatype only.
+Currently, the CPU implementation for s390x architecture supports FP32, BF16 and FP16.
 
 --8<-- [end:installation]
 --8<-- [start:requirements]
 
 - OS: `Linux`
-- SDK: `gcc/g++ >= 12.3.0` or later with Command Line Tools
+- SDK: `gcc/g++ >= 14.0.0` or later with Command Line Tools
 - Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above.
-- Build install python packages: `pyarrow`, `torch` and `torchvision`
+- Build install python packages: `torchvision`, `llvmlite`, `numba`, `pyarrow (for testing)`, `opencv-headless`
 
 --8<-- [end:requirements]
 --8<-- [start:set-up-using-python]
@@ -24,13 +24,14 @@ Currently, there are no pre-built IBM Z CPU wheels.
 --8<-- [end:pre-built-wheels]
 --8<-- [start:build-wheel-from-source]
 
-Install the following packages from the package manager before building the vLLM. For example on RHEL 9.4:
+Install the following packages from the package manager before building the vLLM. For example on RHEL 9.6:
 
 ```bash
 dnf install -y \
-    which procps findutils tar vim git gcc g++ make patch make cython zlib-devel \
+    which procps findutils tar vim git gcc-toolset-14 gcc-toolset-14-binutils gcc-toolset-14-libatomic-devel zlib-devel \
     libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
-    openssl-devel openblas openblas-devel wget autoconf automake libtool cmake numactl-devel
+    openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile \
+    clang llvm-devel llvm-static clang-devel
 ```
 
 Install rust>=1.80 which is needed for `outlines-core` and `uvloop` python packages installation.
@@ -43,13 +44,13 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
 Execute the following commands to build and install vLLM from source.
 
 !!! tip
-    Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM.
+    Please build the following dependencies, `torchvision`, `llvmlite`, `numba`, `llguidance`, `pyarrow`, `opencv-headless` from source before building vLLM.
 
 ```bash
-    sed -i '/^torch/d' requirements/build.txt    # remove torch from requirements/build.txt since we use nightly builds
     uv pip install -v \
+        --extra-index-url https://download.pytorch.org/whl/cpu \
         --torch-backend auto \
-        -r requirements/build.txt \
+        -r requirements/build/cpu.txt \
         -r requirements/cpu.txt \
     VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
         uv pip install dist/*.whl
@@ -57,10 +58,9 @@ Execute the following commands to build and install vLLM from source.
 
 ??? console "pip"
     ```bash
-        sed -i '/^torch/d' requirements/build.txt    # remove torch from requirements/build.txt since we use nightly builds
         pip install -v \
-            --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
-            -r requirements/build.txt \
+            --extra-index-url https://download.pytorch.org/whl/cpu \
+            -r requirements/build/cpu.txt \
             -r requirements/cpu.txt \
         VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
             pip install dist/*.whl
diff --git a/docs/getting_started/installation/cpu.x86.inc.md b/docs/getting_started/installation/cpu.x86.inc.md
index 8b855e919f44..ad051d22dc8c 100644
--- a/docs/getting_started/installation/cpu.x86.inc.md
+++ b/docs/getting_started/installation/cpu.x86.inc.md
@@ -88,14 +88,14 @@ cd vllm_source
 Install the required dependencies:
 
 ```bash
-uv pip install -r requirements/cpu-build.txt --torch-backend cpu
+uv pip install -r requirements/build/cpu.txt --torch-backend cpu
 uv pip install -r requirements/cpu.txt --torch-backend cpu
 ```
 
 ??? console "pip"
     ```bash
     pip install --upgrade pip
-    pip install -v -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    pip install -v -r requirements/build/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
     pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
     ```
 
diff --git a/docs/getting_started/installation/gpu.apple.inc.md b/docs/getting_started/installation/gpu.apple.inc.md
new file mode 100644
index 000000000000..1a1d37b53bc5
--- /dev/null
+++ b/docs/getting_started/installation/gpu.apple.inc.md
@@ -0,0 +1,125 @@
+<!-- markdownlint-disable MD041 -->
+--8<-- [start:installation]
+
+For GPU-accelerated inference on Apple Silicon, use [vLLM-Metal](https://github.com/vllm-project/vllm-metal), a community-maintained hardware plugin that uses MLX as the compute backend and provides native GPU acceleration via Apple's Metal framework.
+
+vLLM-Metal works with MLX-optimized models from the [mlx-community](https://huggingface.co/mlx-community) organization on Hugging Face, which provides quantized versions of popular models optimized for Apple Silicon.
+
+!!! tip
+    For installation and usage instructions, see the [Set up using vLLM-Metal](#set-up-using-vllm-metal) section below.
+
+--8<-- [end:installation]
+--8<-- [start:requirements]
+
+- OS: macOS Sonoma or later
+- Hardware: Apple Silicon
+- Metal support enabled
+
+!!! note
+    See the [Set up using vLLM-Metal](#set-up-using-vllm-metal) section below for installation instructions.
+
+--8<-- [end:requirements]
+--8<-- [start:set-up-using-python]
+
+## Set up using vLLM-Metal
+
+vLLM-Metal is distributed as a separate package that provides native GPU acceleration on Apple Silicon.
+
+To install vLLM-Metal, follow the installation instructions in the [vLLM-Metal documentation](https://github.com/vllm-project/vllm-metal#installation).
+
+The installation will:
+
+1. Set up the appropriate Python environment
+2. Install MLX and required dependencies
+3. Install the vLLM-Metal package
+
+After installation, you can start using vLLM with Metal GPU acceleration.
+
+!!! tip
+    When using vLLM-Metal, use models from the [mlx-community](https://huggingface.co/mlx-community) on Hugging Face for best performance. These models are optimized for MLX and often include quantized versions (4-bit, 8-bit) that run efficiently on Apple Silicon.
+
+    Example model: `mlx-community/Qwen2.5-0.5B-Instruct-4bit`
+
+### Using vLLM-Metal
+
+After installation, vLLM-Metal provides an easy-to-use CLI for running an OpenAI-compatible API server:
+
+```bash
+# Activate the vLLM-Metal environment
+source ~/.venv-vllm-metal/bin/activate
+
+# Start the API server (specify your mlx-community model or it will use default)
+vllm serve
+```
+
+Once the server is running, you have multiple options to interact with it:
+
+#### Option 1: Interactive chat
+
+Open a new terminal and start an interactive chat session:
+
+```bash
+source ~/.venv-vllm-metal/bin/activate
+vllm chat
+```
+
+#### Option 2: API requests with curl
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 50
+  }'
+```
+
+#### Option 3: Python with OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="dummy"  # No auth required for local server
+)
+
+response = client.chat.completions.create(
+    model="mlx-community/Qwen2.5-0.5B-Instruct-4bit",
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+For more details on the `vllm` CLI commands, see the [OpenAI-compatible server documentation](../../serving/online_serving/openai_compatible_server.md).
+
+--8<-- [end:set-up-using-python]
+--8<-- [start:pre-built-wheels]
+
+vLLM-Metal is installed via the vLLM-Metal package. See the [Set up using vLLM-Metal](#set-up-using-vllm-metal) section above.
+
+--8<-- [end:pre-built-wheels]
+--8<-- [start:build-wheel-from-source]
+
+For build instructions from source, refer to the [vLLM-Metal documentation](https://github.com/vllm-project/vllm-metal#installation).
+
+--8<-- [end:build-wheel-from-source]
+--8<-- [start:pre-built-images]
+
+--8<-- [end:pre-built-images]
+--8<-- [start:build-image-from-source]
+
+--8<-- [end:build-image-from-source]
+--8<-- [start:supported-features]
+
+vLLM-Metal provides:
+
+- Native GPU acceleration using Metal
+- MLX-based compute backend optimized for Apple Silicon
+- OpenAI-compatible API server
+- Support for popular model architectures
+
+For specific feature support and limitations, refer to the [vLLM-Metal documentation](https://github.com/vllm-project/vllm-metal).
+
+--8<-- [end:supported-features]
diff --git a/docs/getting_started/installation/gpu.cuda.inc.md b/docs/getting_started/installation/gpu.cuda.inc.md
index e46fecc45cd5..ec333b3ee1bf 100644
--- a/docs/getting_started/installation/gpu.cuda.inc.md
+++ b/docs/getting_started/installation/gpu.cuda.inc.md
@@ -1,12 +1,12 @@
 <!-- markdownlint-disable MD041 MD051 -->
 --8<-- [start:installation]
 
-vLLM contains pre-compiled C++ and CUDA (12.8) binaries.
+vLLM contains pre-compiled C++ and CUDA (12.9) binaries.
 
 --8<-- [end:installation]
 --8<-- [start:requirements]
 
-- GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
+- GPU: compute capability 7.5 or higher (e.g., T4, RTX20xx, A100, L4, H100, B200, etc.)
 
 --8<-- [end:requirements]
 --8<-- [start:set-up-using-python]
@@ -31,7 +31,7 @@ uv pip install vllm --torch-backend=auto
     pip install vllm --extra-index-url https://download.pytorch.org/whl/cu129
     ```
 
-We recommend leveraging `uv` to [automatically select the appropriate PyTorch index at runtime](https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection) by inspecting the installed CUDA driver version via `--torch-backend=auto` (or `UV_TORCH_BACKEND=auto`). To select a specific backend (e.g., `cu128`), set `--torch-backend=cu128` (or `UV_TORCH_BACKEND=cu128`). If this doesn't work, try running `uv self update` to update `uv` first.
+We recommend leveraging `uv` to [automatically select the appropriate PyTorch index at runtime](https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection) by inspecting the installed CUDA driver version via `--torch-backend=auto` (or `UV_TORCH_BACKEND=auto`). To select a specific backend (e.g., `cu130`), set `--torch-backend=cu130` (or `UV_TORCH_BACKEND=cu130`). If this doesn't work, try running `uv self update` to update `uv` first.
 
 !!! note
     NVIDIA Blackwell GPUs (B200, GB200) require a minimum of CUDA 12.8, so make sure you are installing PyTorch wheels with at least that version. PyTorch itself offers a [dedicated interface](https://pytorch.org/get-started/locally/) to determine the appropriate pip command to run for a given target configuration.
@@ -93,7 +93,7 @@ If you only need to change Python code, you can build and install vLLM without c
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-VLLM_USE_PRECOMPILED=1 uv pip install --editable .
+VLLM_USE_PRECOMPILED=1 uv pip install --editable . --torch-backend=auto
 ```
 
 This command will do the following:
@@ -101,16 +101,26 @@ This command will do the following:
 1. Look for the current branch in your vLLM clone.
 1. Identify the corresponding base commit in the main branch.
 1. Download the pre-built wheel of the base commit.
-1. Use its compiled libraries in the installation.
+1. Use its compiled libraries and `vllm-rs` binary in the installation.
 
 !!! note
     1. If you change C++ or kernel code, you cannot use Python-only build; otherwise you will see an import error about library not found or undefined symbol.
     2. If you rebase your dev branch, it is recommended to uninstall vllm and re-run the above command to make sure your libraries are up to date.
 
-In case you see an error about wheel not found when running the above command, it might be because the commit you based on in the main branch was just merged and the wheel is being built. In this case, you can wait for around an hour to try again, or manually assign the previous commit in the installation using the `VLLM_PRECOMPILED_WHEEL_LOCATION` environment variable.
+!!! tip "Rebuilding the Rust frontend"
+If you need to recompile the `vllm-rs` Rust frontend binary, you can rebuild and install it without re-running the full pip install:
+
+    ```bash
+    ./build_rust.sh          # release build
+    ./build_rust.sh --debug  # faster build for development
+    ```
+
+    This will install the required Rust toolchain if needed, build the binary, and place it in `vllm/vllm-rs`.
+
+In case you see an error about wheel not found when running the above command, it might be because the commit you based on in the `main` branch was just merged and its precompiled wheel is not available yet. You can wait around an hour and retry, or set `VLLM_PRECOMPILED_WHEEL_COMMIT=nightly` to automatically select the most recent already-built commit on `main`.
 
 ```bash
-export VLLM_PRECOMPILED_WHEEL_COMMIT=$(git rev-parse HEAD~1) # or earlier commit on main
+export VLLM_PRECOMPILED_WHEEL_COMMIT=nightly
 export VLLM_USE_PRECOMPILED=1
 uv pip install --editable .
 ```
@@ -134,7 +144,7 @@ If you want to modify C++ or CUDA code, you'll need to build vLLM from source. T
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-uv pip install -e .
+uv pip install -e . --torch-backend=auto
 ```
 
 !!! tip
@@ -162,7 +172,7 @@ To build vLLM using an existing PyTorch installation:
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
 python use_existing_torch.py
-uv pip install -r requirements/build.txt
+uv pip install -r requirements/build/cuda.txt
 uv pip install --no-build-isolation -e .
 ```
 
@@ -185,7 +195,7 @@ To achieve this, you can set the environment variable VLLM_CUTLASS_SRC_DIR to po
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-VLLM_CUTLASS_SRC_DIR=/path/to/cutlass uv pip install -e .
+VLLM_CUTLASS_SRC_DIR=/path/to/cutlass uv pip install -e . --torch-backend=auto
 ```
 
 ##### Troubleshooting
@@ -375,8 +385,8 @@ For (G)B300, we recommend using CUDA 13, as shown in the following command.
 
     ```bash
     DOCKER_BUILDKIT=1 docker build \
-    --build-arg CUDA_VERSION=13.0.1 \
-    --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 \
+    --build-arg CUDA_VERSION=13.0.2 \
+    --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu22.04 \
     --build-arg max_jobs=256 \
     --build-arg nvcc_threads=2 \
     --build-arg RUN_WHEEL_CHECK=false \
diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md
index 475c67ce9d05..91d933dd4e86 100644
--- a/docs/getting_started/installation/gpu.md
+++ b/docs/getting_started/installation/gpu.md
@@ -18,6 +18,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:installation"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:installation"
+
 ## Requirements
 
 - OS: Linux
@@ -38,6 +42,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:requirements"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:requirements"
+
 ## Set up using Python
 
 ### Create a new Python environment
@@ -56,6 +64,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:set-up-using-python"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:set-up-using-python"
+
 ### Pre-built wheels {#pre-built-wheels}
 
 === "NVIDIA CUDA"
@@ -70,6 +82,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:pre-built-wheels"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:pre-built-wheels"
+
 ### Build wheel from source
 
 === "NVIDIA CUDA"
@@ -84,6 +100,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:build-wheel-from-source"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:build-wheel-from-source"
+
 ## Set up using Docker
 
 ### Pre-built images
@@ -102,6 +122,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:pre-built-images"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:pre-built-images"
+
 --8<-- [end:pre-built-images]
 
 ### Build image from source
@@ -120,6 +144,10 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:build-image-from-source"
 
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:build-image-from-source"
+
 --8<-- [end:build-image-from-source]
 
 ## Supported features
@@ -135,3 +163,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G
 === "Intel XPU"
 
     --8<-- "docs/getting_started/installation/gpu.xpu.inc.md:supported-features"
+
+=== "Apple Silicon"
+
+    --8<-- "docs/getting_started/installation/gpu.apple.inc.md:supported-features"
diff --git a/docs/getting_started/installation/gpu.rocm.inc.md b/docs/getting_started/installation/gpu.rocm.inc.md
index 101ab9d56119..f8385997eea3 100644
--- a/docs/getting_started/installation/gpu.rocm.inc.md
+++ b/docs/getting_started/installation/gpu.rocm.inc.md
@@ -1,7 +1,14 @@
 <!-- markdownlint-disable MD041 MD051 -->
 --8<-- [start:installation]
 
-vLLM supports AMD GPUs with ROCm 6.3 or above. Pre-built wheels are available for ROCm 7.0.
+vLLM supports AMD GPUs with ROCm 6.3 or above. Pre-built wheels are available for ROCm 7.0 and ROCm 7.2.1.
+
+#### Prebuilt Wheels
+
+| ROCm Variant | Python Version | ROCm Version | glibc Requirement | Supported Versions |
+| ------------ | -------------- | ------------ | ----------------- | ------------------ |
+| `rocm700` | 3.12 | 7.0 | >= 2.35 | `0.14.0` to `0.18.0` |
+| `rocm721` | 3.12 | 7.2.1 | >= 2.35 | Nightly releases after commit `171775f306a333a9cf105bfd533bf3e113d401d9` |
 
 --8<-- [end:installation]
 --8<-- [start:requirements]
@@ -23,26 +30,112 @@ If you need a different ROCm version or want to use an existing PyTorch installa
 To install the latest version of vLLM for Python 3.12, ROCm 7.0 and `glibc >= 2.35`.
 
 ```bash
-uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/
+uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/ --upgrade
 ```
 
 !!! tip
-    You can find out about which ROCm version the latest vLLM supports by checking the index in extra-index-url [https://wheels.vllm.ai/rocm/](https://wheels.vllm.ai/rocm/) .
+    You can find out about which ROCm version the latest vLLM supports by checking the `vllm` package in index in extra-index-url <https://wheels.vllm.ai/rocm/> at [https://wheels.vllm.ai/rocm/vllm](https://wheels.vllm.ai/rocm/vllm) .
+
+    Another approach is that you can use this following commands to automatically extract the wheel variants:
+
+    ```bash
+    # automatically extract the available rocm variant
+    export VLLM_ROCM_VARIANT=$(curl -s https://wheels.vllm.ai/rocm/vllm | grep -oP 'rocm\d+' | head -1)
+
+    # automatically extract the vLLM version
+    export VLLM_VERSION=$(curl -s https://wheels.vllm.ai/rocm/vllm | grep -oP 'vllm-\K[0-9.]+' | head -1)
+
+    # inspect if the ROCm version is compatible with your environment
+    echo $VLLM_ROCM_VARIANT
+    echo $VLLM_VERSION
+    ```
 
 To install a specific version and ROCm variant of vLLM wheel.
 
 ```bash
-uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
+# version without the `v`
+uv pip install vllm==${VLLM_VERSION} --extra-index-url https://wheels.vllm.ai/rocm/${VLLM_VERSION}/${VLLM_ROCM_VARIANT}
+
+# Example
+uv pip install vllm==0.18.0 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700
 ```
 
 !!! warning "Caveats for using `pip`"
 
-    We recommend leveraging `uv` to install vLLM wheel. Using `pip` to install from custom indices is cumbersome, because `pip` combines packages from `--extra-index-url` and the default index, choosing only the latest version, which makes it difficult to install wheel from custom index if exact versions of all packages are specified exactly. In contrast, `uv` gives the extra index [higher priority than the default index](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes).
+    We recommend leveraging `uv` to install the vLLM wheel. Using `pip` to install from custom indices is cumbersome because `pip` combines packages from `--extra-index-url` and the default index, choosing only the latest version. This makes it difficult to install a wheel from a custom index unless exact versions of all packages are specified. In contrast, `uv` gives the extra index [higher priority than the default index](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes).
 
-    If you insist on using `pip`, you have to specify the exact vLLM version and full URL of the wheel path `https://wheels.vllm.ai/rocm/<version>/<rocm-variant>` (which can be obtained from the web page).
+    If you insist on using `pip`, you need to specify the exact vLLM version in the package name and provide the custom index URL `https://wheels.vllm.ai/rocm/${VLLM_VERSION}/${VLLM_ROCM_VARIANT}` via `--extra-index-url`.
 
     ```bash
-    pip install vllm==0.15.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
+    pip install vllm==0.18.0+rocm700 --extra-index-url https://wheels.vllm.ai/rocm/0.18.0/rocm700
+    ```
+
+#### Install the latest code
+
+LLM inference is a fast-evolving field, and the latest code may contain bug fixes, performance improvements, and new features that are not released yet. To allow users to try the latest code without waiting for the next release, vLLM provides wheels for every commit since commit `171775f306a333a9cf105bfd533bf3e113d401d9` on <https://wheels.vllm.ai/rocm/nightly/>. The custom index to be used is `https://wheels.vllm.ai/rocm/nightly/${VLLM_ROCM_VARIANT}`
+
+**NOTE:** The first ROCm Variant that supports nightly wheel is ROCm 7.2.1
+
+To install from latest nightly index, run:
+
+```bash
+# automatically extract the available rocm variant
+export VLLM_ROCM_VARIANT=$(curl -s https://wheels.vllm.ai/rocm/nightly | \
+    grep -oP 'rocm\d+' | head -1  | sed 's/%2B/+/g')
+
+# inspect if the ROCm version is compatible with your environment
+echo $VLLM_ROCM_VARIANT
+
+uv pip install --pre vllm \
+    --extra-index-url https://wheels.vllm.ai/rocm/nightly/${VLLM_ROCM_VARIANT} \
+    --index-strategy unsafe-best-match
+```
+
+##### Install specific revisions
+
+If you want to access the wheels for previous commits (e.g. to bisect the behavior change, performance regression), you can specify the commit hash in the URL, example:
+
+```bash
+export VLLM_COMMIT=5b8c30d62b754b575e043ce2fc0dcbf8a64f6306
+
+export VLLM_ROCM_VARIANT=$(curl -s https://wheels.vllm.ai/rocm/${VLLM_COMMIT} | \
+    grep -oP 'rocm\d+' | head -1  | sed 's/%2B/+/g')
+
+# Extract the version from the wheel URL
+export VLLM_VERSION=$(curl -s https://wheels.vllm.ai/rocm/${VLLM_COMMIT}/${VLLM_ROCM_VARIANT}/vllm/ | \
+    grep -oP 'vllm-\K[^-]+' | head -1  | sed 's/%2B/+/g')
+
+# inspect the version if it is compatible with the ROCm version of your environment
+echo $VLLM_ROCM_VARIANT
+echo $VLLM_VERSION
+
+uv pip install vllm==${VLLM_VERSION} \
+  --extra-index-url https://wheels.vllm.ai/rocm/${VLLM_COMMIT}/${VLLM_ROCM_VARIANT} \
+  --index-strategy unsafe-best-match
+```
+
+!!! warning "`pip` caveat"
+
+    Using `pip` to install from nightly indices is _not supported_, because `pip` combines packages from `--extra-index-url` and the default index, choosing only the latest version, which makes it difficult to install a development version prior to the released version. In contrast, `uv` gives the extra index [higher priority than the default index](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes).
+
+    If you insist on using `pip`, you need to specify the exact vLLM version in the package name and provide the custom index URL (which can be obtained from the web page).
+
+    ```bash
+    export VLLM_COMMIT=5b8c30d62b754b575e043ce2fc0dcbf8a64f6306
+
+    export VLLM_ROCM_VARIANT=$(curl -s https://wheels.vllm.ai/rocm/${VLLM_COMMIT} | \
+        grep -oP 'rocm\d+' | head -1  | sed 's/%2B/+/g')
+
+    # Extract the version from the wheel URL
+    export VLLM_VERSION=$(curl -s https://wheels.vllm.ai/rocm/${VLLM_COMMIT}/${VLLM_ROCM_VARIANT}/vllm/ | \
+        grep -oP 'vllm-\K[^-]+' | head -1  | sed 's/%2B/+/g')
+
+    # inspect the version if it is compatible with the ROCm version of your environment
+    echo $VLLM_ROCM_VARIANT
+    echo $VLLM_VERSION
+
+    pip install vllm==${VLLM_VERSION} \
+    --extra-index-url https://wheels.vllm.ai/rocm/${VLLM_COMMIT}/${VLLM_ROCM_VARIANT}
     ```
 
 --8<-- [end:pre-built-wheels]
@@ -147,7 +240,7 @@ uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/0.15.0/rocm700
         # Install dependencies
         pip install --upgrade numba \
             scipy \
-            huggingface-hub[cli,hf_transfer] \
+            huggingface-hub[cli] \
             setuptools_scm
         pip install -r requirements/rocm.txt
 
@@ -193,6 +286,24 @@ docker run --rm \
     --model Qwen/Qwen3-0.6B
 ```
 
+To use the docker image as base for development, you can launch it in interactive session through overriding the entrypoint.
+
+???+ console "Commands"
+    ```bash
+    docker run --rm -it \
+        --group-add=video \
+        --cap-add=SYS_PTRACE \
+        --security-opt seccomp=unconfined \
+        --device /dev/kfd \
+        --device /dev/dri \
+        -v ~/.cache/huggingface:/root/.cache/huggingface \
+        --env "HF_TOKEN=$HF_TOKEN" \
+        --network=host \
+        --ipc=host \
+        --entrypoint /bin/bash \
+        vllm/vllm-openai-rocm:<tag>
+    ```
+
 #### Use AMD's Docker Images (Deprecated)
 
 !!! warning "Deprecated"
diff --git a/docs/getting_started/installation/gpu.xpu.inc.md b/docs/getting_started/installation/gpu.xpu.inc.md
index 9e71860d62fd..e8b74a06f079 100644
--- a/docs/getting_started/installation/gpu.xpu.inc.md
+++ b/docs/getting_started/installation/gpu.xpu.inc.md
@@ -46,7 +46,7 @@ pip install -v -r requirements/xpu.txt
 
     !!! note
         - `triton` (without suffix) is for NVIDIA GPUs only. On XPU, using it instead of `triton-xpu` can cause correctness or runtime issues.
-        - For torch 2.10 (the version used in `requirements/xpu.txt`), the matching package is `triton-xpu==3.6.0`. If you use a different version of torch, check the corresponding `triton-xpu` version in [docker/Dockerfile.xpu](https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile.xpu).
+        - For torch 2.11 (the version used in `requirements/xpu.txt`), the matching package is `triton-xpu==3.7.0`. If you use a different version of torch, check the corresponding `triton-xpu` version in [docker/Dockerfile.xpu](https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile.xpu).
 
 - Finally, build and install vLLM XPU backend:
 
@@ -88,7 +88,7 @@ vllm serve facebook/opt-13b \
      -tp=8
 ```
 
-By default, a ray instance will be launched automatically if no existing one is detected in the system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the [examples/online_serving/run_cluster.sh](https://github.com/vllm-project/vllm/blob/main/examples/online_serving/run_cluster.sh) helper script.
+By default, a ray instance will be launched automatically if no existing one is detected in the system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the [examples/ray_serving/run_cluster.sh](https://github.com/vllm-project/vllm/blob/main/examples/ray_serving/run_cluster.sh) helper script.
 
 --8<-- [end:supported-features]
 --8<-- [start:distributed-backend]
diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 015514def33f..6b2a1cf41467 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -3,13 +3,16 @@
 This guide will help you quickly get started with vLLM to perform:
 
 - [Offline batched inference](#offline-batched-inference)
-- [Online serving using OpenAI-compatible server](#openai-compatible-server)
+- [Online serving](#online-serving)
 
 ## Prerequisites
 
 - OS: Linux
 - Python: 3.10 -- 3.13
 
+!!! note
+    vLLM also works on macOS with [vLLM-Metal](https://github.com/vllm-project/vllm-metal) for Apple Silicon GPU acceleration. See the [GPU installation guide](installation/gpu.md) and select the "Apple Silicon" tab.
+
 ## Installation
 
 === "NVIDIA CUDA"
@@ -73,6 +76,18 @@ This guide will help you quickly get started with vLLM to perform:
     !!! note
         For more detailed instructions, including Docker, installing from source, and troubleshooting, please refer to the [vLLM on TPU documentation](https://docs.vllm.ai/projects/tpu/en/latest/).
 
+=== "Apple Silicon (Mac)"
+
+    If you are using Apple Silicon Macs, you can use vLLM-Metal for GPU-accelerated inference via Apple's Metal framework.
+
+    Follow the installation instructions in the [vLLM-Metal documentation](https://github.com/vllm-project/vllm-metal#installation).
+
+    !!! note
+        vLLM-Metal uses MLX instead of PyTorch as the compute backend and requires MLX-optimized models from the [mlx-community](https://huggingface.co/mlx-community) on Hugging Face.
+
+    !!! tip
+        For more detailed instructions, please refer to the [GPU installation guide](installation/gpu.md) and select the "Apple Silicon" tab.
+
 !!! note
     For more detail and non-CUDA platforms, please refer to the [installation guide](installation/README.md) for specific instructions on how to install vLLM.
 
@@ -167,7 +182,7 @@ for output in outputs:
             print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
         ```
 
-## OpenAI-Compatible Server
+## Online Serving
 
 vLLM can be deployed as a server that implements the OpenAI API protocol. This allows vLLM to be used as a drop-in replacement for applications using OpenAI API.
 By default, it starts the server at `http://localhost:8000`. You can specify the address with `--host` and `--port` arguments. The server currently hosts one model at a time and implements endpoints such as [list models](https://platform.openai.com/docs/api-reference/models/list), [create chat completion](https://platform.openai.com/docs/api-reference/chat/completions/create), and [create completion](https://platform.openai.com/docs/api-reference/completions/create) endpoints.
@@ -180,7 +195,7 @@ vllm serve Qwen/Qwen2.5-1.5B-Instruct
 
 !!! note
     By default, the server uses a predefined chat template stored in the tokenizer.
-    You can learn about overriding it [here](../serving/openai_compatible_server.md#chat-template).
+    You can learn about overriding it [here](../serving/online_serving/README.md#chat-template).
 !!! important
     By default, the server applies `generation_config.json` from the huggingface model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
 
diff --git a/docs/governance/committers.md b/docs/governance/committers.md
index df874418f1c4..386e4f2a4bb5 100644
--- a/docs/governance/committers.md
+++ b/docs/governance/committers.md
@@ -14,12 +14,13 @@ Sorted alphabetically by GitHub handle:
 - [@aarnphm](https://github.com/aarnphm): Structured output
 - [@alexm-redhat](https://github.com/alexm-redhat): Performance
 - [@ApostaC](https://github.com/ApostaC): Connectors, offloading
+- [@bbrowning](https://github.com/bbrowning): Tool use and reasoning parser
 - [@benchislett](https://github.com/benchislett): Engine core and spec decode
 - [@bigPYJ1151](https://github.com/bigPYJ1151): Intel CPU/XPU integration
 - [@chaunceyjiang](https://github.com/chaunceyjiang): Tool use and reasoning parser
 - [@DarkLight1337](https://github.com/DarkLight1337): Multimodality, API server
 - [@esmeetu](https://github.com/esmeetu): developer marketing, community
-- [@gshtras](https://github.com/gshtras): AMD integration
+- [@dllehr-amd](https://github.com/dllehr-amd): AMD integration
 - [@heheda12345](https://github.com/heheda12345): Hybrid memory allocator
 - [@hmellor](https://github.com/hmellor): Hugging Face integration, documentation
 - [@houseroad](https://github.com/houseroad): Engine core and Llama models
@@ -31,6 +32,7 @@ Sorted alphabetically by GitHub handle:
 - [@LucasWilkinson](https://github.com/LucasWilkinson): Kernels and performance
 - [@luccafong](https://github.com/luccafong): Llama models, speculative decoding, distributed
 - [@markmc](https://github.com/markmc): Observability
+- [@MatthewBonanni](https://github.com/MatthewBonanni): Kernels and performance
 - [@mgoin](https://github.com/mgoin): Quantization and performance
 - [@NickLucche](https://github.com/NickLucche): KV connector
 - [@njhill](https://github.com/njhill): Distributed, API server, engine core
@@ -41,6 +43,7 @@ Sorted alphabetically by GitHub handle:
 - [@robertgshaw2-redhat](https://github.com/robertgshaw2-redhat): Core, distributed, disagg
 - [@ruisearch42](https://github.com/ruisearch42): Pipeline parallelism, Ray Support
 - [@russellb](https://github.com/russellb): Structured output, engine core, security
+- [@sfeng33](https://github.com/sfeng33): Tool use and reasoning parser
 - [@sighingnow](https://github.com/sighingnow): Qwen models, new model support
 - [@simon-mo](https://github.com/simon-mo): Project lead, API entrypoints, community
 - [@tdoublep](https://github.com/tdoublep): State space models
@@ -56,6 +59,7 @@ Sorted alphabetically by GitHub handle:
 - [@zhuohan123](https://github.com/zhuohan123): Project lead, RL integration, numerics
 - [@zou3519](https://github.com/zou3519): Compilation
 - [@BoyuanFeng](https://github.com/BoyuanFeng): Compilation, CUDAGraph
+- [@xuechendi](https://github.com/xuechendi): Intel CPU/XPU integration, KV connector
 
 ### Emeritus Committers
 
@@ -85,7 +89,7 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 - AsyncLLM: the zmq based protocol hosting engine core and making it accessible for entrypoints
     - @robertgshaw2-redhat, @njhill, @russellb
 - ModelRunner, Executor, Worker: the abstractions for engine wrapping model implementation
-    - @WoosukKwon, @tlrmchlsmth, @heheda12345, @LucasWilkinson, @ProExpertProg
+    - @WoosukKwon, @tlrmchlsmth, @heheda12345, @LucasWilkinson, @ProExpertProg, @MatthewBonanni
 - KV Connector: Connector interface and implementation for KV cache offload and transfer
     - @robertgshaw2-redhat, @njhill, @KuntaiDu, @NickLucche, @ApostaC
 - Distributed, Parallelism, Process Management: Process launchers managing each worker, and assign them to the right DP/TP/PP/EP ranks
@@ -104,7 +108,7 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 - Custom Layers: Utility layers in vLLM such as rotary embedding and rms norms
     - @ProExpertProg
 - Attention: Attention interface for paged attention
-    - @WoosukKwon, @LucasWilkinson, @heheda12345
+    - @WoosukKwon, @LucasWilkinson, @heheda12345, @MatthewBonanni
 - FusedMoE: FusedMoE kernel, Modular kernel framework, EPLB
     - @tlrmchlsmth
 - Quantization: Various quantization config, weight loading, and kernel.
@@ -118,7 +122,7 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 - State space models: The state space models implementation in vLLM
     - @tdoublep, @tlrmchlsmth
 - Reasoning and tool calling parsers
-    - @chaunceyjiang, @aarnphm
+    - @chaunceyjiang, @aarnphm, @sfeng33, @bbrowning
 
 ### Entrypoints
 
@@ -132,7 +136,7 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 ### Features
 
 - Spec Decode: Covers model definition, attention, sampler, and scheduler related to n-grams, EAGLE, and MTP.
-    - @WoosukKwon, @benchislett, @luccafong
+    - @WoosukKwon, @benchislett, @luccafong, @MatthewBonanni
 - Structured Output: The structured output implementation
     - @russellb, @aarnphm
 - RL: The RL related features such as collective rpc, sleep mode, etc.
@@ -152,8 +156,8 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 
 ### External Kernels Integration
 
-- FlashAttention: @LucasWilkinson
-- FlashInfer: @LucasWilkinson, @mgoin, @WoosukKwon
+- FlashAttention: @LucasWilkinson, @MatthewBonanni
+- FlashInfer: @LucasWilkinson, @mgoin, @WoosukKwon, @MatthewBonanni
 - Blackwell Kernels: @mgoin, @yewentao256
 - DeepEP/DeepGEMM: @mgoin, @yewentao256
 
@@ -175,7 +179,7 @@ If you have PRs touching the area, please feel free to ping the area owner for r
 - Plugin Interface: @youkaichao, @Yikun
 - NVIDIA GPU: @pavanimajety
 - AMD GPU: @gshtras, @tjtanaa
-- Intel CPU/GPU: @jikunshang, @bigPYJ1151
+- Intel CPU/GPU: @jikunshang, @bigPYJ1151, @xuechendi
 - Google TPU: @yaochengji
 
 ### Ecosystem Projects
diff --git a/docs/maybe_skip_pr_build.sh b/docs/maybe_skip_pr_build.sh
deleted file mode 100755
index 2a0b338a0198..000000000000
--- a/docs/maybe_skip_pr_build.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: Apache-2.0
-# Skip PR builds unless the PR has the "documentation" or "ready" label.
-# Used by Read the Docs (see .readthedocs.yaml).
-
-if [[ "$READTHEDOCS_VERSION_TYPE" != "external" ]]; then
-  exit 0
-fi
-
-PR_URL="https://api.github.com/repos/vllm-project/vllm/pulls/${READTHEDOCS_VERSION}"
-CURL_ARGS=(-s -o /tmp/pr_response.json -w "%{http_code}")
-if [[ -n "$GITHUB_TOKEN" ]]; then
-  CURL_ARGS+=(-H "Authorization: token ${GITHUB_TOKEN}")
-fi
-HTTP_CODE=$(curl "${CURL_ARGS[@]}" "$PR_URL")
-
-if [[ "$HTTP_CODE" -ne 200 ]]; then
-  echo "GitHub API returned HTTP ${HTTP_CODE}, proceeding with build."
-elif grep -qE '"name": *"(documentation|ready)"' /tmp/pr_response.json; then
-  echo "Found required label, proceeding with build."
-else
-  echo "PR #${READTHEDOCS_VERSION} lacks 'documentation' or 'ready' label, cancelling build."
-  exit 1
-fi
diff --git a/docs/mkdocs/hooks/autoref_code.py b/docs/mkdocs/hooks/autoref_code.py
new file mode 100644
index 000000000000..647f74f202d8
--- /dev/null
+++ b/docs/mkdocs/hooks/autoref_code.py
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+MkDocs hook to automatically convert inline code references to API doc links.
+
+For example, `WeightTransferConfig` becomes
+[`WeightTransferConfig`][vllm.config.WeightTransferConfig]
+
+This works with the `autorefs` plugin to create clickable cross-references
+to API documentation pages generated by `mkdocstrings`.
+
+The hook builds an index of all documented public Python names (classes and
+functions with docstrings) from the vllm package at startup using AST parsing,
+then substitutes matching inline code spans on each page. Names without
+docstrings are excluded because mkdocstrings will not generate a page for them.
+"""
+
+import ast
+import logging
+from pathlib import Path
+
+import regex as re
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+
+logger = logging.getLogger("mkdocs")
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
+VLLM_DIR = ROOT_DIR / "vllm"
+
+# Maps short name -> qualified name (e.g. "ModelConfig" -> "vllm.config.ModelConfig")
+_name_index: dict[str, str] = {}
+
+# Fenced code block pattern (``` or ~~~, with optional language specifier).
+_FENCED_BLOCK = re.compile(
+    r"(?:^|\n)(?P<fence>`{3,}|~{3,})[^\n]*\n.*?(?:\n(?P=fence))", re.DOTALL
+)
+
+# Inline code that is NOT already part of a markdown link.
+# Matches `Name` but not [`Name`] and not [`Name`][...] or [`Name`](...).
+_INLINE_CODE = re.compile(
+    r"(?<!\[)"  # not preceded by [
+    r"`(?P<name>[A-Za-z0-9_]*)`"  # `UpperCamelCase` or `UPPER_SNAKE`
+    r"(?!\])"  # not followed by ]
+)
+
+
+def _has_docstring(node: ast.AST) -> bool:
+    """Check if a class or function node has a docstring."""
+    if not isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef):
+        return False
+    return ast.get_docstring(node, clean=False) is not None
+
+
+def _module_path(filepath: Path) -> str:
+    """Convert a filesystem path to a dotted module path."""
+    rel = filepath.relative_to(ROOT_DIR)
+    parts = list(rel.with_suffix("").parts)
+    if parts[-1] == "__init__":
+        parts = parts[:-1]
+    return ".".join(parts)
+
+
+def _index_file(filepath: Path) -> dict[str, str]:
+    """Extract documented public names from a Python file using AST parsing.
+
+    Only classes and functions with docstrings are included, since
+    mkdocstrings won't generate a page for undocumented symbols.
+    """
+    names: dict[str, str] = {}
+    try:
+        source = filepath.read_text(encoding="utf-8")
+        tree = ast.parse(source, filename=str(filepath))
+    except (SyntaxError, UnicodeDecodeError):
+        return names
+
+    module = _module_path(filepath)
+
+    for node in ast.iter_child_nodes(tree):
+        if (
+            # Class definitions (with docstring)
+            isinstance(node, ast.ClassDef)
+            and not node.name.startswith("_")
+            and _has_docstring(node)
+        ) or (
+            # Function definitions (with docstring, only uppercase/CamelCase)
+            isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef)
+            and not node.name.startswith("_")
+            and node.name[0].isupper()
+            and _has_docstring(node)
+        ):
+            names[node.name] = f"{module}.{node.name}"
+
+    return names
+
+
+def _build_index() -> dict[str, str]:
+    """Walk the vllm package and build a name -> qualified path index."""
+    index: dict[str, str] = {}
+    # Track conflicts: if multiple modules define the same name,
+    # prefer shallower modules (more likely to be the public API).
+    depth: dict[str, int] = {}
+
+    for filepath in sorted(VLLM_DIR.rglob("*.py")):
+        # Skip internal/private modules
+        if any(part.startswith("_") and part != "__init__" for part in filepath.parts):
+            continue
+        # Skip third-party vendored code
+        rel = filepath.relative_to(VLLM_DIR)
+        if rel.parts and rel.parts[0] in ("third_party", "vllm_flash_attn"):
+            continue
+
+        module_depth = len(filepath.relative_to(ROOT_DIR).parts)
+        file_names = _index_file(filepath)
+
+        for name, qualified in file_names.items():
+            if name not in index or module_depth < depth[name]:
+                index[name] = qualified
+                depth[name] = module_depth
+
+    return index
+
+
+def on_startup(*, command: str, dirty: bool) -> None:
+    """Build the name index once at startup."""
+    global _name_index
+    _name_index = _build_index()
+    logger.info("autoref_code: indexed %d names from vllm/", len(_name_index))
+
+
+def on_page_markdown(
+    markdown: str, *, page: Page, config: MkDocsConfig, files: Files
+) -> str:
+    """Replace inline code references with autoref links."""
+    if not _name_index:
+        return markdown
+
+    # Skip API reference pages to avoid circular/redundant links.
+    if page.file.src_path.startswith("api/"):
+        return markdown
+
+    # Step 1: Mask fenced code blocks so we don't touch code inside them.
+    masks: list[str] = []
+
+    def _mask_block(match: re.Match) -> str:
+        masks.append(match.group(0))
+        return f"\ue000CODEBLOCK{len(masks) - 1}\ue000"
+
+    masked = _FENCED_BLOCK.sub(_mask_block, markdown)
+
+    # Step 2: Replace inline code references.
+    def _replace(match: re.Match) -> str:
+        name = match.group("name")
+        qualified = _name_index.get(name)
+        if qualified is None:
+            return match.group(0)
+        logger.debug("autoref_code: linking `%s` to [%s]", name, qualified)
+        return f"[`{name}`][{qualified}]"
+
+    result = _INLINE_CODE.sub(_replace, masked)
+
+    # Step 3: Restore masked code blocks.
+    result = re.sub(
+        r"\ue000CODEBLOCK(\d+)\ue000", lambda m: masks[int(m.group(1))], result
+    )
+    return result
diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py
index 298eea96de3d..4548e33f8814 100644
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@@ -7,7 +7,7 @@
 import textwrap
 import traceback
 from argparse import SUPPRESS, Action, HelpFormatter
-from collections.abc import Iterable
+from collections.abc import Callable, Iterable
 from importlib.machinery import ModuleSpec
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal
@@ -38,15 +38,28 @@ def decorator(cls):
         return decorator
 
 
+class MockPluggableLayer:
+    @staticmethod
+    def register(name):
+        def decorator(cls):
+            return cls
+
+        return decorator
+
+
 mock_if_no_torch("vllm._C", MagicMock())
-mock_if_no_torch("vllm.model_executor.custom_op", MagicMock(CustomOp=MockCustomOp))
+mock_if_no_torch("vllm._C_stable_libtorch", MagicMock())
+mock_if_no_torch(
+    "vllm.model_executor.custom_op",
+    MagicMock(CustomOp=MockCustomOp, PluggableLayer=MockPluggableLayer),
+)
 mock_if_no_torch(
     "vllm.utils.torch_utils", MagicMock(direct_register_custom_op=lambda *a, **k: None)
 )
 
 
 # Mock any version checks by reading from compiled CI requirements
-with open(ROOT_DIR / "requirements/test.txt") as f:
+with open(ROOT_DIR / "requirements/test/cuda.txt") as f:
     VERSIONS = dict(line.strip().split("==") for line in f if "==" in line)
 importlib.metadata.version = lambda name: VERSIONS.get(name) or "0.0.0"
 
@@ -55,11 +68,36 @@ def decorator(cls):
 mock_if_no_torch("torch.nn", MagicMock(Parameter=object))
 
 
+# Mock torch.library.infer_schema for vllm.ir.ops.IrOpInplaceOverload.__init__
+# We need to return the corresponding number of inputs, as IR infra will assert it
+def get_outputs(native_fn: Callable) -> str:
+    """
+    Extract output schema from function's return type annotation,
+    e.g. 'Tensor' or 'Tensor, Tensor'.
+    """
+    import typing
+
+    return_type = typing.get_type_hints(native_fn)["return"]
+    origin = typing.get_origin(return_type)
+    arg_name = lambda a: a.__name__ if hasattr(a, "__name__") else str(a)
+    if origin is tuple:
+        args = typing.get_args(return_type)
+        return ", ".join(arg_name(arg) for arg in args)
+    else:
+        return f"{arg_name(return_type)}"
+
+
+mock_if_no_torch(
+    "torch.library",
+    MagicMock(infer_schema=lambda fn, **k: f"(Tensor x) -> {get_outputs(fn)}"),
+)
+
+
 class PydanticMagicMock(MagicMock):
     """`MagicMock` that's able to generate pydantic-core schemas."""
 
     def __init__(self, *args, **kwargs):
-        name = kwargs.pop("name", None)
+        name = kwargs.get("name")
         super().__init__(*args, **kwargs)
         self.__spec__ = ModuleSpec(name, None)
 
@@ -85,7 +123,8 @@ def auto_mock(module_name: str, attr: str, max_mocks: int = 100):
             logger.info("Mocking %s for argparse doc generation", e.name)
             sys.modules[e.name] = PydanticMagicMock(name=e.name)
         except Exception:
-            logger.exception("Failed to import %s.%s: %s", module_name, attr)
+            logger.exception("Failed to import %s.%s", module_name, attr)
+            raise
 
     raise ImportError(
         f"Failed to import {module_name}.{attr} after mocking {max_mocks} imports"
@@ -108,6 +147,7 @@ def auto_mock(module_name: str, attr: str, max_mocks: int = 100):
 EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
 ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
 CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
+RenderSubcommand = auto_mock("vllm.entrypoints.cli.launch", "RenderSubcommand")
 openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
 openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
 
@@ -221,6 +261,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
         "serve": create_parser(openai_cli_args.make_arg_parser),
         "chat": create_parser(ChatCommand.add_cli_args),
         "complete": create_parser(CompleteCommand.add_cli_args),
+        "launch_render": create_parser(RenderSubcommand.add_cli_args),
         "run-batch": create_parser(openai_run_batch.make_arg_parser),
         # Benchmark CLI
         "bench_latency": create_parser(bench_latency.add_cli_args),
diff --git a/docs/mkdocs/hooks/generate_metrics.py b/docs/mkdocs/hooks/generate_metrics.py
index 4565861c4f7f..97282aaee7d0 100644
--- a/docs/mkdocs/hooks/generate_metrics.py
+++ b/docs/mkdocs/hooks/generate_metrics.py
@@ -19,7 +19,7 @@
         "output": "spec_decode.inc.md",
     },
     {
-        "path": "vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py",
+        "path": "vllm/distributed/kv_transfer/kv_connector/v1/nixl/stats.py",
         "output": "nixl_connector.inc.md",
     },
     {"path": "vllm/v1/metrics/perf.py", "output": "perf.inc.md"},
diff --git a/docs/models/extensions/runai_model_streamer.md b/docs/models/extensions/runai_model_streamer.md
index 38c603b46e10..965b2932ffaa 100644
--- a/docs/models/extensions/runai_model_streamer.md
+++ b/docs/models/extensions/runai_model_streamer.md
@@ -101,7 +101,7 @@ vllm serve /path/to/sharded/model \
     --model-loader-extra-config '{"pattern":"custom-model-rank-{rank}-part-{part}.safetensors"}'
 ```
 
-To create sharded model files, you can use the script provided in [examples/offline_inference/save_sharded_state.py](../../../examples/offline_inference/save_sharded_state.py). This script demonstrates how to save a model in the sharded format that is compatible with the Run:ai Model Streamer sharded loader.
+To create sharded model files, you can use the script provided in [examples/features/sharded_state/save_sharded_state_offline.py](../../../examples/features/sharded_state/save_sharded_state_offline.py). This script demonstrates how to save a model in the sharded format that is compatible with the Run:ai Model Streamer sharded loader.
 
 The sharded loader supports all the same tunable parameters as the regular Run:ai Model Streamer, including `concurrency` and `memory_limit`. These can be configured in the same way:
 
diff --git a/docs/models/extensions/tensorizer.md b/docs/models/extensions/tensorizer.md
index 3df80d5af6c4..0f3ee1ede436 100644
--- a/docs/models/extensions/tensorizer.md
+++ b/docs/models/extensions/tensorizer.md
@@ -14,7 +14,7 @@ To install `tensorizer`, run `pip install vllm[tensorizer]`.
 ## The basics
 
 To load a model using Tensorizer, the model first needs to be serialized by
-Tensorizer. [The example script](../../examples/others/tensorize_vllm_model.md) takes care of this process.
+Tensorizer. [The example script](../../../examples/features/tensorize_vllm_model.py) takes care of this process.
 
 Let's walk through a basic example by serializing `facebook/opt-125m` using the script, and then loading it for inference.
 
@@ -25,7 +25,7 @@ CLI arguments. The docstring for the script itself explains the CLI args
 and how to use it properly in great detail, and we'll use one of the examples from the docstring directly, assuming we want to serialize and save our model at our S3 bucket example `s3://my-bucket`:
 
 ```bash
-python examples/others/tensorize_vllm_model.py \
+python examples/features/tensorize_vllm_model.py \
    --model facebook/opt-125m \
    serialize \
    --serialized-directory s3://my-bucket \
@@ -35,7 +35,7 @@ python examples/others/tensorize_vllm_model.py \
 This saves the model tensors at `s3://my-bucket/vllm/facebook/opt-125m/v1`. If you intend on applying a LoRA adapter to your tensorized model, you can pass the HF id of the LoRA adapter in the above command, and the artifacts will be saved there too:
 
 ```bash
-python examples/others/tensorize_vllm_model.py \
+python examples/features/tensorize_vllm_model.py \
    --model facebook/opt-125m \
    --lora-path <lora_id> \
    serialize \
@@ -71,7 +71,7 @@ llm = LLM(
 As an example, CPU concurrency can be limited when serializing with `tensorizer` via the `limit_cpu_concurrency` parameter in the initializer for `TensorSerializer`. To set `limit_cpu_concurrency` to some arbitrary value, you would do so like this when serializing:
 
 ```bash
-python examples/others/tensorize_vllm_model.py \
+python examples/features/tensorize_vllm_model.py \
    --model facebook/opt-125m \
    --lora-path <lora_id> \
    serialize \
diff --git a/docs/models/generative_models.md b/docs/models/generative_models.md
index 76dba5977160..46f8f4096d6a 100644
--- a/docs/models/generative_models.md
+++ b/docs/models/generative_models.md
@@ -138,7 +138,7 @@ outputs = llm.chat(conversation, chat_template=custom_template)
 
 ## Online Serving
 
-Our [OpenAI-Compatible Server](../serving/openai_compatible_server.md) provides endpoints that correspond to the offline APIs:
+Our [OpenAI-Compatible Server](../serving/online_serving/openai_compatible_server.md) provides endpoints that correspond to the offline APIs:
 
-- [Completions API](../serving/openai_compatible_server.md#completions-api) is similar to `LLM.generate` but only accepts text.
-- [Chat API](../serving/openai_compatible_server.md#chat-api)  is similar to `LLM.chat`, accepting both text and [multi-modal inputs](../features/multimodal_inputs.md) for models with a chat template.
+- [Completions API](../serving/online_serving/openai_compatible_server.md#completions-api) is similar to `LLM.generate` but only accepts text.
+- [Chat API](../serving/online_serving/openai_compatible_server.md#chat-api)  is similar to `LLM.chat`, accepting both text and [multi-modal inputs](../features/multimodal_inputs.md) for models with a chat template.
diff --git a/docs/models/hardware_supported_models/cpu.md b/docs/models/hardware_supported_models/cpu.md
index 361310f18cbd..9c6dd9feb793 100644
--- a/docs/models/hardware_supported_models/cpu.md
+++ b/docs/models/hardware_supported_models/cpu.md
@@ -11,24 +11,50 @@
 
 ### Text-only Language Models
 
-| Model                                | Architecture                             | Supported |
+| Model | Architecture | Supported |
 | ------------------------------------ | ---------------------------------------- | --------- |
-| meta-llama/Llama-3.1-8B-Instruct     | LlamaForCausalLM                         | ✅        |
-| meta-llama/Llama-3.2-3B-Instruct     | LlamaForCausalLM                         | ✅        |
-| ibm-granite/granite-3.2-2b-instruct  | GraniteForCausalLM                       | ✅        |
-| Qwen/Qwen3-1.7B                      | Qwen3ForCausalLM                         | ✅        |
-| Qwen/Qwen3-4B                        | Qwen3ForCausalLM                         | ✅        |
-| Qwen/Qwen3-8B                        | Qwen3ForCausalLM                         | ✅        |
-| zai-org/glm-4-9b-hf                  | GLMForCausalLM                           | ✅        |
-| google/gemma-7b                      | GemmaForCausalLM                         | ✅        |
+| unsloth/gpt-oss-20b | GptOssForCausalLM | ✅ |
+| meta-llama/Llama-3.1-8B-Instruct | LlamaForCausalLM | ✅ |
+| meta-llama/Llama-3.2-1B | LlamaForCausalLM | ✅ |
+| meta-llama/Llama-3.2-3B-Instruct | LlamaForCausalLM | ✅ |
+| meta-llama/Llama-3.3-70B-Instruct | LlamaForCausalLM | ✅ |
+| RedHatAI/Meta-Llama-3.1-8B-quantized.w8a8 | LlamaForCausalLM | ✅ |
+| RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 | LlamaForCausalLM | ✅ |
+| RedHatAI/Llama-3.2-1B-Instruct-quantized.w8a8 | LlamaForCausalLM | ✅ |
+| RedHatAI/Llama-3.2-3B-Instruct-quantized.w8a8 | LlamaForCausalLM | ✅ |
+| RedHatAI/DeepSeek-R1-Distill-Llama-70B-quantized.w8a8 | LlamaForCausalLM | ✅ |
+| hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4 | LlamaForCausalLM | ✅ |
+| AMead10/Llama-3.2-1B-Instruct-AWQ | LlamaForCausalLM | ✅ |
+| AMead10/Llama-3.2-3B-Instruct-AWQ | LlamaForCausalLM | ✅ |
+| TheBloke/TinyLlama-1.1B-Chat-v1.0-AWQ | LlamaForCausalLM | ✅ |
+| TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ | LlamaForCausalLM | ✅ |
+| ibm-granite/granite-3.2-2b-instruct | GraniteForCausalLM | ✅ |
+| Qwen/Qwen3-1.7B | Qwen3ForCausalLM | ✅ |
+| Qwen/Qwen3-4B | Qwen3ForCausalLM | ✅ |
+| Qwen/Qwen3-8B | Qwen3ForCausalLM | ✅ |
+| Qwen/Qwen3-14B | Qwen3ForCausalLM | ✅ |
+| Qwen/Qwen3-14B-AWQ | Qwen3ForCausalLM | ✅ |
+| Qwen/Qwen3-30B-A3B | Qwen3MoeForCausalLM | ✅ |
+| Qwen/QwQ-32B-AWQ | Qwen2ForCausalLM | ✅ |
+| Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4 | Qwen2ForCausalLM | ✅ |
+| RedHatAI/QwQ-32B-quantized.w8a8 | Qwen2ForCausalLM | ✅ |
+| zai-org/glm-4-9b-hf | GLMForCausalLM | ✅ |
+| google/gemma-7b | GemmaForCausalLM | ✅ |
+| microsoft/Phi-4-reasoning | Phi3ForCausalLM | ✅ |
+| TheBloke/Mistral-7B-Instruct-v0.2-AWQ | MistralForCausalLM | ✅ |
 
 ### Multimodal Language Models
 
-| Model                                | Architecture                             | Supported |
+| Model | Architecture | Supported |
 | ------------------------------------ | ---------------------------------------- | --------- |
-| Qwen/Qwen2.5-VL-7B-Instruct          | Qwen2VLForConditionalGeneration          | ✅        |
-| openai/whisper-large-v3              | WhisperForConditionalGeneration          | ✅        |
-
-✅ Runs and optimized.  
-🟨 Runs and correct but not optimized to green yet.  
-❌ Does not pass accuracy test or does not run.  
+| meta-llama/Llama-4-Scout-17B-16E-Instruct | Llama4ForConditionalGeneration | ✅ |
+| google/gemma-3-4b-it | Gemma3ForConditionalGeneration | ✅ |
+| google/gemma-3-12b-it | Gemma3ForConditionalGeneration | ✅ |
+| google/gemma-4-E4B-it | Gemma4ForConditionalGeneration | ✅ |
+| google/gemma-4-E2B-it | Gemma4ForConditionalGeneration | ✅ |
+| google/gemma-4-26B-A4B-it | Gemma4ForConditionalGeneration | ✅ |
+| microsoft/Phi-4-multimodal-instruct | Phi4MMForCausalLM | ✅ |
+| Qwen/Qwen2.5-VL-7B-Instruct | Qwen2VLForConditionalGeneration | ✅ |
+| openai/whisper-large-v3 | WhisperForConditionalGeneration | ✅ |
+
+✅ Runs and optimized.
diff --git a/docs/models/hardware_supported_models/xpu.md b/docs/models/hardware_supported_models/xpu.md
index 2857d80a7698..cfda6c76f05b 100644
--- a/docs/models/hardware_supported_models/xpu.md
+++ b/docs/models/hardware_supported_models/xpu.md
@@ -10,39 +10,40 @@
 
 ### Text-only Language Models
 
-| Model                                     | Architecture                                         | FP16 | Dynamic FP8 | MXFP4 |
-| ----------------------------------------- | ---------------------------------------------------- | ---- | ----------- | ----- |
-| openai/gpt-oss-20b                        | GPTForCausalLM                                       |      |             | ✅    |
-| openai/gpt-oss-120b                       | GPTForCausalLM                                       |      |             | ✅    |
-| deepseek-ai/DeepSeek-R1-Distill-Llama-8B  | LlamaForCausalLM                                     | ✅   | ✅          |       |
-| deepseek-ai/DeepSeek-R1-Distill-Qwen-14B  | QwenForCausalLM                                      | ✅   | ✅          |       |
-| deepseek-ai/DeepSeek-R1-Distill-Qwen-32B  | QwenForCausalLM                                      | ✅   | ✅          |       |
-| deepseek-ai/DeepSeek-R1-Distill-Llama-70B | LlamaForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen2.5-72B-Instruct                 | Qwen2ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen3-14B                            | Qwen3ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen3-32B                            | Qwen3ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen3-30B-A3B                        | Qwen3ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen3-30B-A3B-GPTQ-Int4              | Qwen3ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/Qwen3-coder-30B-A3B-Instruct         | Qwen3ForCausalLM                                     | ✅   | ✅          |       |
-| Qwen/QwQ-32B                              | QwenForCausalLM                                      | ✅   | ✅          |       |
-| deepseek-ai/DeepSeek-V2-Lite              | DeepSeekForCausalLM                                  | ✅   | ✅          |       |
-| meta-llama/Llama-3.1-8B-Instruct          | LlamaForCausalLM                                     | ✅   | ✅          |       |
-| baichuan-inc/Baichuan2-13B-Chat           | BaichuanForCausalLM                                  | ✅   | ✅          |       |
-| THUDM/GLM-4-9B-chat                       | GLMForCausalLM                                       | ✅   | ✅          |       |
-| THUDM/CodeGeex4-All-9B                    | CodeGeexForCausalLM                                  | ✅   | ✅          |       |
-| chuhac/TeleChat2-35B                      | LlamaForCausalLM (TeleChat2 based on Llama arch)     | ✅   | ✅          |       |
-| 01-ai/Yi1.5-34B-Chat                      | YiForCausalLM                                        | ✅   | ✅          |       |
-| THUDM/CodeGeex4-All-9B                    | CodeGeexForCausalLM                                  | ✅   | ✅          |       |
-| deepseek-ai/DeepSeek-Coder-33B-base       | DeepSeekCoderForCausalLM                             | ✅   | ✅          |       |
-| baichuan-inc/Baichuan2-13B-Chat           | BaichuanForCausalLM                                  | ✅   | ✅          |       |
-| meta-llama/Llama-2-13b-chat-hf            | LlamaForCausalLM                                     | ✅   | ✅          |       |
-| THUDM/CodeGeex4-All-9B                    | CodeGeexForCausalLM                                  | ✅   | ✅          |       |
-| Qwen/Qwen1.5-14B-Chat                     | QwenForCausalLM                                      | ✅   | ✅          |       |
-| Qwen/Qwen1.5-32B-Chat                     | QwenForCausalLM                                      | ✅   | ✅          |       |
+| Model                                              | Architecture                                     | BF16/FP16/Dynamic FP8 | Compressed_tensors FP8 | MXFP4 |
+| -------------------------------------------------- | ------------------------------------------------ | --------------------- | ---------------------- | ----- |
+| openai/gpt-oss-20b                                 | GPTForCausalLM                                   |                       |                        | ✅    |
+| openai/gpt-oss-120b                                | GPTForCausalLM                                   |                       |                        | ✅    |
+| deepseek-ai/DeepSeek-R1-Distill-Llama-8B           | LlamaForCausalLM                                 | ✅                    |                        |       |
+| deepseek-ai/DeepSeek-R1-Distill-Qwen-14B           | QwenForCausalLM                                  | ✅                    |                        |       |
+| deepseek-ai/DeepSeek-R1-Distill-Qwen-32B           | QwenForCausalLM                                  | ✅                    |                        |       |
+| deepseek-ai/DeepSeek-R1-Distill-Llama-70B          | LlamaForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen2.5-72B-Instruct                          | Qwen2ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen3-14B                                     | Qwen3ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen3-32B                                     | Qwen3ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen3-30B-A3B                                 | Qwen3ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen3-30B-A3B-GPTQ-Int4                       | Qwen3ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/Qwen3-coder-30B-A3B-Instruct                  | Qwen3ForCausalLM                                 | ✅                    |                        |       |
+| Qwen/QwQ-32B                                       | QwenForCausalLM                                  | ✅                    |                        |       |
+| deepseek-ai/DeepSeek-V2-Lite                       | DeepSeekForCausalLM                              | ✅                    |                        |       |
+| meta-llama/Llama-3.1-8B-Instruct                   | LlamaForCausalLM                                 | ✅                    |                        |       |
+| baichuan-inc/Baichuan2-13B-Chat                    | BaichuanForCausalLM                              | ✅                    |                        |       |
+| THUDM/GLM-4-9B-chat                                | GLMForCausalLM                                   | ✅                    |                        |       |
+| THUDM/CodeGeex4-All-9B                             | CodeGeexForCausalLM                              | ✅                    |                        |       |
+| chuhac/TeleChat2-35B                               | LlamaForCausalLM (TeleChat2 based on Llama arch) | ✅                    |                        |       |
+| 01-ai/Yi1.5-34B-Chat                               | YiForCausalLM                                    | ✅                    |                        |       |
+| THUDM/CodeGeex4-All-9B                             | CodeGeexForCausalLM                              | ✅                    |                        |       |
+| deepseek-ai/DeepSeek-Coder-33B-base                | DeepSeekCoderForCausalLM                         | ✅                    |                        |       |
+| baichuan-inc/Baichuan2-13B-Chat                    | BaichuanForCausalLM                              | ✅                    |                        |       |
+| meta-llama/Llama-2-13b-chat-hf                     | LlamaForCausalLM                                 | ✅                    |                        |       |
+| THUDM/CodeGeex4-All-9B                             | CodeGeexForCausalLM                              | ✅                    |                        |       |
+| Qwen/Qwen1.5-14B-Chat                              | QwenForCausalLM                                  | ✅                    |                        |       |
+| Qwen/Qwen1.5-32B-Chat                              | QwenForCausalLM                                  | ✅                    |                        |       |
+| RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic    | LlamaForCausalLM                                 |                       | ✅                     |       |
 
 ### Multimodal Language Models
 
-| Model                        | Architecture                     | FP16 | Dynamic FP8 | MXFP4 |
+| Model                        | Architecture                     | BF16 | Dynamic FP8 | MXFP4 |
 | ---------------------------- | -------------------------------- | ---- | ----------- | ----- |
 | OpenGVLab/InternVL3_5-8B     | InternVLForConditionalGeneration | ✅   | ✅          |       |
 | OpenGVLab/InternVL3_5-14B    | InternVLForConditionalGeneration | ✅   | ✅          |       |
@@ -55,7 +56,7 @@
 
 ### Embedding and Reranker Language Models
 
-| Model                   | Architecture                   | FP16 | Dynamic FP8 | MXFP4 |
+| Model                   | Architecture                   | BF16 | Dynamic FP8 | MXFP4 |
 | ----------------------- | ------------------------------ | ---- | ----------- | ----- |
 | Qwen/Qwen3-Embedding-8B | Qwen3ForTextEmbedding          | ✅   | ✅          |       |
 | Qwen/Qwen3-Reranker-8B  | Qwen3ForSequenceClassification | ✅   | ✅          |       |
diff --git a/docs/models/pooling_models/README.md b/docs/models/pooling_models/README.md
index 2cf721f5eefe..2a5357e4fee6 100644
--- a/docs/models/pooling_models/README.md
+++ b/docs/models/pooling_models/README.md
@@ -33,6 +33,12 @@ from large language models, allowing them to benefit from the continuous improve
 similarity enables them to reuse much of vLLM’s infrastructure. If compatible, we would be happy to help them leverage
 the latest features of vLLM as well.
 
+### Cheat Sheet
+
+As illustrated in the figure below, we have summarized the relationships among the key elements of pooling models as a takeaway.
+
+![Cheat Sheet](../../assets/models/pooling_models/cheat_sheet.svg)
+
 ### Sequence-wise Task and Token-wise Task
 
 The key distinction between sequence-wise task and token-wise task lies in their output granularity: sequence-wise task
@@ -59,8 +65,22 @@ please refer to [IO Processor Plugins](../../design/io_processor_plugins.md).
     Within classification tasks, there is a specialized subcategory: Cross-encoder (aka reranker) models. These models
 are a subset of classification models that accept two prompts as input and output num_labels equal to 1.
 
+### Pooling Types
+
+![Pooling Types](../../assets/models/pooling_models/pooling_types.svg)
+
+| Pooling Tasks  | Granularity   | Description                                                                                                                                                                                       |
+|----------------|---------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `CLS` pooling  | Sequence-wise | For BERT‑like (bidirectional self‑attention) models, CLS pooling is used by default. This means the last_hidden_states corresponding to the first token (the [CLS] token) is taken as the output. |
+| `LAST` pooling | Sequence-wise | For GPT‑like (causal self‑attention) models, LAST pooling is used by default. This means the last_hidden_states corresponding to the last token is taken as the output.                           |
+| `MEAN` pooling | Sequence-wise | Many studies have shown that averaging the last_hidden_states over all input tokens performs better on certain downstream tasks. Therefore, more and more models are using MEAN pooling.          |
+| `ALL` pooling  | Token-wise    | Outputs the last_hidden_states for all input tokens.                                                                                                                                              |
+| `STEP` pooling | Token-wise    | Filters and outputs the last_hidden_states corresponding to the token IDs returned by returned_token_ids.                                                                                         |
+
 ### Score Types
 
+![Score Types](../../assets/models/pooling_models/score_types.svg)
+
 The scoring models is designed to compute similarity scores between two input prompts. It supports three model types
 (aka `score_type`): `cross-encoder`, `late-interaction`, and `bi-encoder`.
 
@@ -68,7 +88,7 @@ The scoring models is designed to compute similarity scores between two input pr
 |-----------------------|---------------|----------------------------------------------|--------------------|--------------------------|
 | `classify` (see note) | Sequence-wise | reranker score for each sequence             | `cross-encoder`    | linear classifier        |
 | `embed`               | Sequence-wise | vector representations for each sequence     | `bi-encoder`       | cosine similarity        |
-| `token_classify`      | Token-wise    | probability vector of classes for each token | nan                | nan                      |
+| `token_classify`      | Token-wise    | probability vector of classes for each token | N/A                | N/A                      |
 | `token_embed`         | Token-wise    | vector representations for each token        | `late-interaction` | late interaction(MaxSim) |
 
 !!! note
@@ -76,14 +96,15 @@ The scoring models is designed to compute similarity scores between two input pr
 
 ### Pooling Usages
 
-| Pooling Usages              | Description                                                                                                                                             |
-|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Classification Usages       | Predicting which predefined category, class, or label best corresponds to a given input.                                                                |
-| Embedding Usages            | Converts unstructured data (text, images, audio, etc.) into structured numerical vectors (embeddings).                                                  |
-| Token Classification Usages | Token-wise classification                                                                                                                               |
-| Token Embedding Usages      | Token-wise embedding                                                                                                                                    |
-| Scoring Usages              | Computes similarity scores between two inputs. It supports three model types (aka `score_type`): `cross-encoder`, `late-interaction`, and `bi-encoder`. |
-| Reward Usages               | Evaluates the quality of outputs generated by a language model, acting as a proxy for human preferences.                                                |
+| Pooling Usages              | Description                                                                                                                                               |
+|-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Classification Usages       | Predicting which predefined category, class, or label best corresponds to a given input.                                                                  |
+| Embedding Usages            | Converts unstructured data (text, images, audio, etc.) into structured numerical vectors (embeddings).                                                    |
+| Token Classification Usages | Token-wise classification                                                                                                                                 |
+| Token Embedding Usages      | Token-wise embedding                                                                                                                                      |
+| Reward Usages               | Evaluates the quality of outputs generated by a language model, acting as a proxy for human preferences.                                                  |
+| Scoring Usages              | Computes similarity scores between two inputs. It supports three model types (aka `score_type`): `cross-encoder`, `late-interaction`, and `bi-encoder`.   |
+| Plugins Usages              | Allow users to customize input and output processors. For more information, please refer to [IO Processor Plugins](../../design/io_processor_plugins.md). |
 
 We also have some special models that support multiple pooling tasks, or have specific usage scenarios, or support special inputs and outputs.
 
@@ -91,9 +112,9 @@ For more detailed information, please refer to the link below.
 
 - [Classification Usages](classify.md)
 - [Embedding Usages](embed.md)
-- [Reward Usages](reward.md)
 - [Token Classification Usages](token_classify.md)
 - [Token Embedding Usages](token_embed.md)
+- [Reward Usages](reward.md)
 - [Scoring Usages](scoring.md)
 - [Specific Model Examples](specific_models.md)
 
@@ -103,15 +124,17 @@ Each pooling model in vLLM supports one or more of these tasks according to
 [Pooler.get_supported_tasks][vllm.model_executor.layers.pooler.Pooler.get_supported_tasks],
 enabling the corresponding APIs.
 
-### Offline APIs corresponding to pooling tasks
+### Offline APIs corresponding to pooling usages
 
-| Task             | APIs                                                                                  |
-|------------------|---------------------------------------------------------------------------------------|
-| `embed`          | `LLM.embed(...)`, `LLM.encode(..., pooling_task="embed")`, `LLM.score(...)`(see note) |
-| `classify`       | `LLM.classify(...)`, `LLM.encode(..., pooling_task="classify")`, `LLM.score(...)`     |
-| `token_classify` | `LLM.reward(...)`, `LLM.encode(..., pooling_task="token_classify")`                   |
-| `token_embed`    | `LLM.encode(..., pooling_task="token_embed")`, `LLM.score(...)`                       |
-| `plugin`         | `LLM.encode(..., pooling_task="plugin")`                                              |
+| Pooling Usages              | Dedicated API       | Pooling task for `LLM.encode` API | Score Types                | scoring function         |
+|-----------------------------|---------------------|-----------------------------------|----------------------------|--------------------------|
+| Classification Usages       | `LLM.classify(...)` | `classify`                        | `cross-encoder` (see note) | linear classifier        |
+| Embedding Usages            | `LLM.embed(...)`    | `embed`                           | `bi-encoder`               | cosine similarity        |
+| Token Classification Usages | N/A                 | `token_classify`                  | N/A                        | N/A                      |
+| Token Embedding Usages      | N/A                 | `token_embed`                     | `late-interaction`         | late interaction(MaxSim) |
+| Reward Usages               | N/A                 | `classify` & `token_classify`     | N/A                        | N/A                      |
+| Scoring Usages              | `LLM.score(...)`    | N/A                               | N/A                        | N/A                      |
+| Plugins Usages              | N/A                 | `plugin`                          | N/A                        | N/A                      |
 
 !!! note
     Only when a classification model outputs num_labels equal to 1 can it be used as a scoring model and have its scoring API enabled.
@@ -137,7 +160,7 @@ It is primarily designed for [score models](scoring.md).
 
 The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
 
-Please use one of the more specific methods or set the task directly when using `LLM.encode`, refer to the [table above](#offline-apis-corresponding-to-pooling-tasks).
+Please use one of the more specific methods or set the task directly when using `LLM.encode`, refer to the [table above](#offline-apis-corresponding-to-pooling-usages).
 
 ### Examples
 
@@ -157,12 +180,12 @@ Our online Server provides endpoints that correspond to the offline APIs:
 
 - Corresponding to `LLM.embed`:
     - [Cohere Embed API](embed.md#cohere-embed-api) (`/v2/embed`)
-    - [Openai-compatible Embeddings API](embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
+    - [OpenAI-compatible Embeddings API](embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
 - Corresponding to `LLM.classify`:
     - [Classification API](classify.md#online-serving)(`/classify`)
 - Corresponding to `LLM.score`:
     - [Score API](scoring.md#score-api)(`/score`)
-    - [Rerank API](scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
+    - [Cohere Rerank API](scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
 - Pooling API (`/pooling`) is similar to `LLM.encode`, being applicable to all types of pooling models.
 
 The following introduces the Pooling API. For other APIs, please refer to the link above.
@@ -173,9 +196,12 @@ Our Pooling API (`/pooling`) is similar to `LLM.encode`, being applicable to all
 
 The input format is the same as [Embeddings API](embed.md#openai-compatible-embeddings-api), but the output data can contain an arbitrary nested list, not just a 1-D list of floats.
 
-Please use one of the more specific APIs or set the task directly when using the Pooling API, refer to the [table above](#offline-apis-corresponding-to-pooling-tasks).
+Please use one of the more specific APIs or set the task directly when using the Pooling API, refer to the [table above](#offline-apis-corresponding-to-pooling-usages).
+
+Code examples:
 
-Code example: [examples/pooling/pooling/pooling_online.py](../../../examples/pooling/pooling/pooling_online.py)
+- [Online example](../../../examples/pooling/reward/token_reward_online.py)
+- [Offline example](../../../examples/pooling/reward/token_reward_offline.py)
 
 ### Examples
 
@@ -276,10 +302,10 @@ Pooling models now support token-wise task.
 
 ### Score task
 
-`score` task is deprecated and will be removed in v0.20. Please use `classify` instead. Only when a
-classification model outputs num_labels equal to 1 can it be used as a scoring model and have its scoring API enabled.
+`score` task have has been removed in v0.21, use `classify` instead. Only when a classification model outputs num_labels
+equal to 1 can it be used as a scoring model and have its scoring API enabled.
 
 ### Pooling multitask support
 
-Pooling multitask support is deprecated and will be removed in v0.20. When the default pooling task is not what you want,
+Pooling multitask support has been removed in v0.21. When the default pooling task is not what you want,
 you need to manually specify it via `PoolerConfig(task=<task>)` offline or `--pooler-config.task <task>` online.
diff --git a/docs/models/pooling_models/classify.md b/docs/models/pooling_models/classify.md
index 69a6fe75d374..6f361e3fd3ae 100644
--- a/docs/models/pooling_models/classify.md
+++ b/docs/models/pooling_models/classify.md
@@ -77,7 +77,7 @@ The following [pooling parameters][vllm.PoolingParams] are supported.
 
 ### `LLM.classify`
 
-The [classify][vllm.LLM.classify] method outputs a probability vector for each prompt.
+The [classify][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.classify] method outputs a probability vector for each prompt.
 
 ```python
 from vllm import LLM
@@ -93,7 +93,7 @@ A code example can be found here: [examples/basic/offline_inference/classify.py]
 
 ### `LLM.encode`
 
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+The [encode][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.encode] method is available to all pooling models in vLLM.
 
 Set `pooling_task="classify"` when using `LLM.encode` for classification Models:
 
@@ -267,12 +267,39 @@ You can modify the `problem_type` via problem_type in the Hugging Face config. T
 
 Implement alignment with transformers [ForSequenceClassificationLoss](https://github.com/huggingface/transformers/blob/57bb6db6ee4cfaccc45b8d474dfad5a17811ca60/src/transformers/loss/loss_utils.py#L92).
 
-### Logit bias
+### Affine Score Calibration
 
-You can modify the `logit_bias` (aka `sigmoid_normalize`) through the logit_bias parameter in `vllm.config.PoolerConfig`.
+Affine Score Calibration, also known as [Platt Scaling](https://en.wikipedia.org/wiki/Platt_scaling) (Platt, 1999), is the most widely used method for calibrating classifier outputs into well-calibrated probabilities.
+
+The calibration follows the transformation:
+
+`activation((logit - logit_mean) / logit_sigma)`
+
+| Parameter | Default | Description |
+| --------- | ------- | ----------- |
+| `logit_mean` | `None` | Mean subtracted from logits (centers scores) |
+| `logit_sigma` | `None` | Standard deviation used to scale logits after mean subtraction |
+
+The computation order is as follows:
+
+```python
+logits -= logit_mean   # subtract mean (center scores)
+logits /= logit_sigma  # divide by sigma (scale)
+logits = activation(logits)  # e.g. sigmoid
+```
+
+Example configuration:
+
+```bash
+--pooler-config '{"use_activation": true, "logit_mean": 4.5, "logit_sigma": 1.0}'
+```
 
 ## Removed Features
 
 ### Remove softmax from PoolingParams
 
 We have already removed `softmax` and `activation` from PoolingParams. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function.
+
+### Remove `logit_bias` and `logit_scale`
+
+`logit_bias` and `logit_scale` are deprecated aliases for `logit_mean` and `logit_sigma` respectively. When using `logit_scale`, it is automatically converted to `logit_sigma = 1/logit_scale`. These deprecated parameters will be removed in v0.21.
diff --git a/docs/models/pooling_models/embed.md b/docs/models/pooling_models/embed.md
index 8b3632a9f33c..47f85b7440e2 100644
--- a/docs/models/pooling_models/embed.md
+++ b/docs/models/pooling_models/embed.md
@@ -12,7 +12,7 @@ Embedding models are a class of machine learning models designed to transform un
     - `LLM.score(...)`
 - Online APIs:
     - [Cohere Embed API](embed.md#cohere-embed-api) (`/v2/embed`)
-    - [Openai-compatible Embeddings API](embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
+    - [OpenAI-compatible Embeddings API](embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
     - Pooling API (`/pooling`)
 
 The primary distinction between (sequence) embedding and token embedding lies in their output granularity: (sequence) embedding produces a single embedding vector for an entire input sequence, whereas token embedding generates an embedding for each individual token within the sequence.
@@ -45,6 +45,7 @@ You can compute pairwise similarity scores to build a similarity matrix using th
 | `GritLM` | GritLM | `parasail-ai/GritLM-7B-vllm`. | ✅︎ | ✅︎ |
 | `GteModel` | Arctic-Embed-2.0-M | `Snowflake/snowflake-arctic-embed-m-v2.0`. | | |
 | `GteNewModel` | mGTE-TRM (see note) | `Alibaba-NLP/gte-multilingual-base`, etc. | | |
+| `JinaEmbeddingsV5Model`<sup>C</sup> | Qwen3-based with task-specific LoRA adapters | `jinaai/jina-embeddings-v5-text-small` (see note) | ✅︎ | ✅︎ |
 | `LlamaBidirectionalModel`<sup>C</sup> | Llama-based with bidirectional attention | `nvidia/llama-nemotron-embed-1b-v2`, etc. | ✅︎ | ✅︎ |
 | `LlamaModel`<sup>C</sup>, `LlamaForCausalLM`<sup>C</sup>, `MistralModel`<sup>C</sup>, etc. | Llama-based | `intfloat/e5-mistral-7b-instruct`, etc. | ✅︎ | ✅︎ |
 | `ModernBertModel` | ModernBERT-based | `Alibaba-NLP/gte-modernbert-base`, etc. | | |
@@ -73,6 +74,12 @@ You can compute pairwise similarity scores to build a similarity matrix using th
 !!! note
     `jinaai/jina-embeddings-v3` supports multiple tasks through LoRA, while vllm temporarily only supports text-matching tasks by merging LoRA weights.
 
+!!! note
+    `jinaai/jina-embeddings-v5-text-small` ships with four task-specific LoRA adapters
+    (`retrieval`, `text-matching`, `classification`, `clustering`). vLLM merges the
+    selected adapter into the base weights at load time. Choose the task with
+    `--hf-overrides '{"jina_task": "<task>"}'`; the default is `retrieval`.
+
 ### Multimodal Models
 
 !!! note
@@ -84,7 +91,7 @@ You can compute pairwise similarity scores to build a similarity matrix using th
 | `LlamaNemotronVLModel` | Llama Nemotron Embedding + SigLIP | T + I | `nvidia/llama-nemotron-embed-vl-1b-v2` | | |
 | `LlavaNextForConditionalGeneration`<sup>C</sup> | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
 | `Phi3VForCausalLM`<sup>C</sup> | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
-| `Qwen3VLForConditionalGeneration`<sup>C</sup> | Qwen3-VL | T + I + V | `Qwen/Qwen3-VL-Embedding-2B`, etc. | ✅︎ | ✅︎ |
+| `Qwen3VLForConditionalGeneration`<sup>C</sup> (see note) | Qwen3-VL | T + I + V | `Qwen/Qwen3-VL-Embedding-2B`, etc. | ✅︎ | ✅︎ |
 | `SiglipModel` | SigLIP, SigLIP2 | T / I | `google/siglip-base-patch16-224`, `google/siglip2-base-patch16-224` | | |
 | `*ForConditionalGeneration`<sup>C</sup>, `*ForCausalLM`<sup>C</sup>, etc. | Generative models | \* | N/A | \* | \* |
 
@@ -95,6 +102,9 @@ If your model is not in the above list, we will try to automatically convert the
 [as_embedding_model][vllm.model_executor.models.adapters.as_embedding_model]. By default, the embeddings
 of the whole prompt are extracted from the normalized hidden state corresponding to the last token.
 
+!!! note
+    `Qwen3-VL-Embedding` officially uses `qwen_vl_utils` for image preprocessing, while vLLM uses `transformers`' `video_processing_qwen3_vl`, which leads to slightly different results compared to the official Hugging Face repository examples. Example code for offline inference using `qwen_vl_utils` can be found in the [vision_embedding_offline.py](../../../examples/pooling/embed/vision_embedding_offline.py) example.
+
 !!! note
     Although vLLM supports automatically converting models of any architecture into embedding models via --convert embed, to get the best results, you should use pooling models that are specifically trained as such.
 
@@ -113,7 +123,7 @@ The following [pooling parameters][vllm.PoolingParams] are supported.
 
 ### `LLM.embed`
 
-The [embed][vllm.LLM.embed] method outputs an embedding vector for each prompt.
+The [embed][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.embed] method outputs an embedding vector for each prompt.
 
 ```python
 from vllm import LLM
@@ -129,7 +139,7 @@ A code example can be found here: [examples/basic/offline_inference/embed.py](..
 
 ### `LLM.encode`
 
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+The [encode][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.encode] method is available to all pooling models in vLLM.
 
 Set `pooling_task="embed"` when using `LLM.encode` for embedding Models:
 
@@ -145,7 +155,7 @@ print(f"Data: {data!r}")
 
 ### `LLM.score`
 
-The [score][vllm.LLM.score] method outputs similarity scores between sentence pairs.
+The [score][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.score] method outputs similarity scores between sentence pairs.
 
 All models that support embedding task also support using the score API to compute similarity scores by calculating the cosine similarity of two input prompt's embeddings.
 
@@ -221,7 +231,7 @@ these extra parameters are supported instead:
 
 #### Examples
 
-If the model has a [chat template](../../serving/openai_compatible_server.md#chat-template), you can replace `inputs` with a list of `messages` (same schema as [Chat API](../../serving/openai_compatible_server.md#chat-api))
+If the model has a [chat template](../../serving/online_serving/README.md#chat-template), you can replace `inputs` with a list of `messages` (same schema as [Chat API](../../serving/online_serving/openai_compatible_server.md#chat-api))
 which will be treated as a single prompt to the model. Here is a convenience function for calling the API while retaining OpenAI's type annotations:
 
 ??? code
diff --git a/docs/models/pooling_models/reward.md b/docs/models/pooling_models/reward.md
index 8555060e66be..4acacda50045 100644
--- a/docs/models/pooling_models/reward.md
+++ b/docs/models/pooling_models/reward.md
@@ -87,7 +87,7 @@ The following [pooling parameters][vllm.PoolingParams] are supported.
 
 ### `LLM.encode`
 
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+The [encode][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.encode] method is available to all pooling models in vLLM.
 
 - Reward Models
 
@@ -133,4 +133,14 @@ print(f"Data: {data!r}")
 
 ## Online Serving
 
-Please refer to the [pooling API](README.md#pooling-api). Pooling task corresponding to reward model types refer to the [table above](#summary).
+Please refer to the [Pooling API](README.md#pooling-api). Pooling task corresponding to reward model types refer to the [table above](#summary).
+
+## More examples
+
+More examples can be found here: [examples/pooling/reward](../../../examples/pooling/reward)
+
+## Deprecated Features
+
+### `LLM.reward`
+
+`llm.reward` api is deprecated and will be removed in v0.23. Please use `LLM.encode` with `pooling_task="classify"` or `pooling_task="token_classify"` instead.
diff --git a/docs/models/pooling_models/scoring.md b/docs/models/pooling_models/scoring.md
index ac94a0cd76bc..c8b4c73cfb30 100644
--- a/docs/models/pooling_models/scoring.md
+++ b/docs/models/pooling_models/scoring.md
@@ -20,11 +20,17 @@ The score models is designed to compute similarity scores between two input prom
     - `LLM.score`
 - Online APIs:
     - [Score API](scoring.md#score-api) (`/score`)
-    - [Rerank API](scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
+    - [Cohere Rerank API](scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
 
 !!! note
     Only when a classification model outputs num_labels equal to 1 can it be used as a scoring model and have its scoring API enabled.
 
+### Score Types
+
+The three supported scoring functions are as illustrated in the figure below.
+
+![Score Types](../../assets/models/pooling_models/score_types.svg)
+
 ## Supported Models
 
 ### Cross-encoder models
@@ -41,6 +47,7 @@ The score models is designed to compute similarity scores between two input prom
 | `GemmaForSequenceClassification` | Gemma-based | `BAAI/bge-reranker-v2-gemma`(see note), etc. | [bge-reranker-v2-gemma.jinja](../../../examples/pooling/score/template/bge-reranker-v2-gemma.jinja) | ✅︎ | ✅︎ |
 | `GteNewForSequenceClassification` | mGTE-TRM (see note) | `Alibaba-NLP/gte-multilingual-reranker-base`, etc. | N/A | | |
 | `LlamaBidirectionalForSequenceClassification`<sup>C</sup> | Llama-based with bidirectional attention | `nvidia/llama-nemotron-rerank-1b-v2`, etc. | [nemotron-rerank.jinja](../../../examples/pooling/score/template/nemotron-rerank.jinja) | ✅︎ | ✅︎ |
+| `ModernBertForSequenceClassification` | ModernBERT-based | `Alibaba-NLP/gte-reranker-modernbert-base`, etc. | N/A | | |
 | `Qwen2ForSequenceClassification`<sup>C</sup> | Qwen2-based | `mixedbread-ai/mxbai-rerank-base-v2`(see note), etc. | [mxbai_rerank_v2.jinja](../../../examples/pooling/score/template/mxbai_rerank_v2.jinja) | ✅︎ | ✅︎ |
 | `Qwen3ForSequenceClassification`<sup>C</sup> | Qwen3-based | `tomaarsen/Qwen3-Reranker-0.6B-seq-cls`, `Qwen/Qwen3-Reranker-0.6B`(see note), etc. | [qwen3_reranker.jinja](../../../examples/pooling/score/template/qwen3_reranker.jinja) | ✅︎ | ✅︎ |
 | `RobertaForSequenceClassification` | RoBERTa-based | `cross-encoder/quora-roberta-base`, etc. | N/A | | |
@@ -96,7 +103,7 @@ The score models is designed to compute similarity scores between two input prom
 \* Feature support is the same as that of the original model.
 
 !!! note
-    Similar to Qwen3-Reranker, you need to use the following `--hf_overrides` to load the official original `Qwen3-VL-Reranker`.
+    Similar to Qwen3-Reranker, you need to use the following `--hf_overrides` to load the official original `Qwen3-VL-Reranker`. `Qwen3-VL` officially uses `qwen_vl_utils` for image preprocessing, while vLLM uses `transformers`' `video_processing_qwen3_vl`, which leads to slightly different results compared to the official Hugging Face repository examples.
 
     ```bash
     vllm serve Qwen/Qwen3-VL-Reranker-2B --hf_overrides '{"architectures": ["Qwen3VLForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}'
@@ -129,7 +136,7 @@ The following [pooling parameters][vllm.PoolingParams] are only supported by cro
 
 ### `LLM.score`
 
-The [score][vllm.LLM.score] method outputs similarity scores between sentence pairs.
+The [score][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.score] method outputs similarity scores between sentence pairs.
 
 ```python
 from vllm import LLM
@@ -160,6 +167,8 @@ The following Score API parameters are supported:
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
+--8<-- "vllm/entrypoints/pooling/scoring/protocol.py:scoring-common-params"
+--8<-- "vllm/entrypoints/pooling/scoring/protocol.py:score-request-params"
 ```
 
 #### Examples
@@ -354,7 +363,7 @@ Full example:
 - [examples/pooling/score/vision_score_api_online.py](../../../examples/pooling/score/vision_score_api_online.py)
 - [examples/pooling/score/vision_rerank_api_online.py](../../../examples/pooling/score/vision_rerank_api_online.py)
 
-### Rerank API
+### Cohere Rerank API
 
 `/rerank`, `/v1/rerank`, and `/v2/rerank` APIs are compatible with both [Jina AI's rerank API interface](https://jina.ai/reranker/) and
 [Cohere's rerank API interface](https://docs.cohere.com/v2/reference/rerank) to ensure compatibility with
@@ -370,6 +379,8 @@ The following rerank api parameters are supported:
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
 --8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
+--8<-- "vllm/entrypoints/pooling/scoring/protocol.py:scoring-common-params"
+--8<-- "vllm/entrypoints/pooling/scoring/protocol.py:rerank-request-params"
 ```
 
 #### Examples
@@ -435,7 +446,7 @@ AS cross-encoder models are a subset of classification models that accept two pr
 
 Score templates are supported for **cross-encoder** models only. If you are using an **embedding** model for scoring, vLLM does not apply a score template.
 
-Some scoring models require a specific prompt format to work correctly. You can specify a custom score template using the `--chat-template` parameter (see [Chat Template](../../serving/openai_compatible_server.md#chat-template)).
+Some scoring models require a specific prompt format to work correctly. You can specify a custom score template using the `--chat-template` parameter (see [Chat Template](../../serving/online_serving/README.md#chat-template)).
 
 Like chat templates, the score template receives a `messages` list. For scoring, each message has a `role` attribute—either `"query"` or `"document"`. For the usual kind of point-wise cross-encoder, you can expect exactly two messages: one query and one document. To access the query and document content, use Jinja's `selectattr` filter:
 
diff --git a/docs/models/pooling_models/specific_models.md b/docs/models/pooling_models/specific_models.md
index 0d908c1aa1a3..8753f1fd07c3 100644
--- a/docs/models/pooling_models/specific_models.md
+++ b/docs/models/pooling_models/specific_models.md
@@ -60,7 +60,7 @@ curl -s http://localhost:8000/score -H "Content-Type: application/json" -d '{
 }'
 ```
 
-You can also get the raw token embeddings using the pooling API with `token_embed` task:
+You can also get the raw token embeddings using the Pooling API with `token_embed` task:
 
 ```shell
 curl -s http://localhost:8000/pooling -H "Content-Type: application/json" -d '{
@@ -174,7 +174,7 @@ curl -s http://localhost:8000/pooling -H "Content-Type: application/json" -d '{
 }'
 ```
 
-For **image inputs** via the pooling API, use the chat-style `messages` field:
+For **image inputs** via the Pooling API, use the chat-style `messages` field:
 
 ```shell
 curl -s http://localhost:8000/pooling -H "Content-Type: application/json" -d '{
diff --git a/docs/models/pooling_models/token_classify.md b/docs/models/pooling_models/token_classify.md
index 201ce4ea6dcb..5c4798935bfb 100644
--- a/docs/models/pooling_models/token_classify.md
+++ b/docs/models/pooling_models/token_classify.md
@@ -15,7 +15,7 @@ Many classification models support both (sequence) classification and token clas
 
 !!! note
 
-    Pooling multitask support is deprecated and will be removed in v0.20. When the default pooling task (classify) is not
+    Pooling multitask support has been removed since v0.21. When the default pooling task (classify) is not
     what you want, you need to manually specify it via `PoolerConfig(task="token_classify")` offline or
     `--pooler-config.task token_classify` online.
 
@@ -68,7 +68,7 @@ If your model is not in the above list, we will try to automatically convert the
     Forced alignment usage requires `--hf-overrides '{"architectures": ["Qwen3ASRForcedAlignerForTokenClassification"]}'`.
     Please refer to [examples/pooling/token_classify/forced_alignment_offline.py](../../../examples/pooling/token_classify/forced_alignment_offline.py).
 
-### As Reward Models
+### Reward Models
 
 Using token classification models as reward models. For details on reward models, see [Reward Models](reward.md).
 
@@ -87,7 +87,7 @@ The following [pooling parameters][vllm.PoolingParams] are supported.
 
 ### `LLM.encode`
 
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+The [encode][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.encode] method is available to all pooling models in vLLM.
 
 Set `pooling_task="token_classify"` when using `LLM.encode` for token classification Models:
 
@@ -103,7 +103,7 @@ print(f"Data: {data!r}")
 
 ## Online Serving
 
-Please refer to the [pooling API](README.md#pooling-api) and use `"task":"token_classify"`.
+Please refer to the [Pooling API](README.md#pooling-api) and use `"task":"token_classify"`.
 
 ## More examples
 
diff --git a/docs/models/pooling_models/token_embed.md b/docs/models/pooling_models/token_embed.md
index 3396f4eac2d2..02050b7f50f0 100644
--- a/docs/models/pooling_models/token_embed.md
+++ b/docs/models/pooling_models/token_embed.md
@@ -9,14 +9,14 @@
 - Online APIs:
     - Pooling API (`/pooling`)
 
-The difference between the (sequence) embedding task and the token embedding task is that (sequence) embedding outputs one embedding for each sequence, while token embedding outputs a embedding for each token.
+The difference between the (sequence) embedding task and the token embedding task is that (sequence) embedding outputs one embedding for each sequence, while token embedding outputs an embedding for each token.
 
 Many embedding models support both (sequence) embedding and token embedding. For further details on (sequence) embedding, please refer to [this page](embed.md).
 
 !!! note
 
-    Pooling multitask support is deprecated and will be removed in v0.20. When the default pooling task (embed) is not 
-    what you want, you need to manually specify it via via `PoolerConfig(task="token_embed")` offline or
+    Pooling multitask support has been removed since v0.21. When the default pooling task (embed) is not 
+    what you want, you need to manually specify it via `PoolerConfig(task="token_embed")` offline or
     `--pooler-config.task token_embed` online.
 
 ## Typical Use Cases
@@ -71,6 +71,14 @@ Models of any architecture can be converted into embedding models using `--conve
 
 If your model is not in the above list, we will try to automatically convert the model using [as_embedding_model][vllm.model_executor.models.adapters.as_embedding_model].
 
+### Special models
+
+| Architecture | Models | Example HF Models | [LoRA](../../features/lora.md) | [PP](../../serving/parallelism_scaling.md) |
+| ------------ | ------ | ----------------- | -------------------- | ------------------------- |
+| `JinaForRanking` | Qwen3-based | `jinaai/jina-reranker-v3` | | |
+
+jina-reranker-v3 is a listwise document reranker model with a novel `last but not late interaction` architecture. More information can be found at: [examples/pooling/token_embed/jina_reranker_v3_offline.py](../../../examples/pooling/token_embed/jina_reranker_v3_offline.py)
+
 --8<-- [end:supported-token-embed-models]
 
 ## Offline Inference
@@ -86,7 +94,7 @@ The following [pooling parameters][vllm.PoolingParams] are supported.
 
 ### `LLM.encode`
 
-The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
+The [encode][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.encode] method is available to all pooling models in vLLM.
 
 Set `pooling_task="token_embed"` when using `LLM.encode` for token embedding Models:
 
@@ -102,7 +110,7 @@ print(f"Data: {data!r}")
 
 ### `LLM.score`
 
-The [score][vllm.LLM.score] method outputs similarity scores between sentence pairs.
+The [score][vllm.entrypoints.pooling.offline.PoolingOfflineMixin.score] method outputs similarity scores between sentence pairs.
 
 All models that support token embedding task also support using the score API to compute similarity scores by calculating the late interaction of two input prompts.
 
@@ -121,7 +129,7 @@ print(f"Score: {score}")
 
 ## Online Serving
 
-Please refer to the [pooling API](README.md#pooling-api) and use `"task":"token_embed"`.
+Please refer to the [Pooling API](README.md#pooling-api) and use `"task":"token_embed"`.
 
 ## More examples
 
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index e5c85dbe8436..599da3c6b5e8 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -44,7 +44,7 @@ llm.apply_model(lambda model: print(type(model)))
 
 If the printed type starts with `Transformers...` then it's using the Transformers model implementation!
 
-If a model has a vLLM implementation but you would prefer to use the Transformers implementation via the Transformers modeling backend, set `model_impl="transformers"` for [offline inference](../serving/offline_inference.md) or `--model-impl transformers` for the [online serving](../serving/openai_compatible_server.md).
+If a model has a vLLM implementation but you would prefer to use the Transformers implementation via the Transformers modeling backend, set `model_impl="transformers"` for [offline inference](../serving/offline_inference.md) or `--model-impl transformers` for the [online serving](../serving/online_serving/README.md).
 
 !!! note
     For vision-language models, if you are loading with `dtype="auto"`, vLLM loads the whole model with config's `dtype` if it exists. In contrast the native Transformers will respect the `dtype` attribute of each backbone in the model. That might cause a slight difference in performance.
@@ -63,8 +63,8 @@ For a model to be compatible with the Transformers modeling backend for vLLM it
 
 If the compatible model is:
 
-- on the Hugging Face Model Hub, simply set `trust_remote_code=True` for [offline-inference](../serving/offline_inference.md) or `--trust-remote-code` for the [openai-compatible-server](../serving/openai_compatible_server.md).
-- in a local directory, simply pass directory path to `model=<MODEL_DIR>` for [offline-inference](../serving/offline_inference.md) or `vllm serve <MODEL_DIR>` for the [openai-compatible-server](../serving/openai_compatible_server.md).
+- on the Hugging Face Model Hub, simply set `trust_remote_code=True` for [offline-inference](../serving/offline_inference.md) or `--trust-remote-code` for the [online serving](../serving/online_serving/README.md).
+- in a local directory, simply pass directory path to `model=<MODEL_DIR>` for [offline-inference](../serving/offline_inference.md) or `vllm serve <MODEL_DIR>` for the [online serving](../serving/online_serving/README.md).
 
 This means that, with the Transformers modeling backend for vLLM, new models can be used before they are officially supported in Transformers or vLLM!
 
@@ -378,12 +378,14 @@ th {
 | `BloomForCausalLM` | BLOOM, BLOOMZ, BLOOMChat | `bigscience/bloom`, `bigscience/bloomz`, etc. | | ✅︎ |
 | `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `zai-org/chatglm2-6b`, `zai-org/chatglm3-6b`, `thu-coai/ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ |
 | `CohereForCausalLM`, `Cohere2ForCausalLM` | Command-R, Command-A | `CohereLabs/c4ai-command-r-v01`, `CohereLabs/c4ai-command-r7b-12-2024`, `CohereLabs/c4ai-command-a-03-2025`, `CohereLabs/command-a-reasoning-08-2025`, etc. | ✅︎ | ✅︎ |
+| `Cohere2MoeForCausalLM` | Command-A+ | `CohereLabs/command-a-plus-05-2026`, etc. | ✅︎ | ✅︎ |
 | `CwmForCausalLM` | CWM | `facebook/cwm`, etc. | ✅︎ | ✅︎ |
 | `DbrxForCausalLM` | DBRX | `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc. | | ✅︎ |
 | `DeciLMForCausalLM` | DeciLM | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc. | ✅︎ | ✅︎ |
 | `DeepseekForCausalLM` | DeepSeek | `deepseek-ai/deepseek-llm-67b-base`, `deepseek-ai/deepseek-llm-7b-chat`, etc. | ✅︎ | ✅︎ |
 | `DeepseekV2ForCausalLM` | DeepSeek-V2 | `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat`, etc. | ✅︎ | ✅︎ |
 | `DeepseekV3ForCausalLM` | DeepSeek-V3 | `deepseek-ai/DeepSeek-V3`, `deepseek-ai/DeepSeek-R1`, `deepseek-ai/DeepSeek-V3.1`, etc. | ✅︎ | ✅︎ |
+| `DeepseekV4ForCausalLM` | DeepSeek-V4 | `deepseek-ai/DeepSeek-V4-Flash`, `deepseek-ai/DeepSeek-V4-Pro`, etc. | | ✅︎ |
 | `Dots1ForCausalLM` | dots.llm1 | `rednote-hilab/dots.llm1.base`, `rednote-hilab/dots.llm1.inst`, etc. | | ✅︎ |
 | `DotsOCRForCausalLM` | dots_ocr | `rednote-hilab/dots.ocr` | ✅︎ | ✅︎ |
 | `Ernie4_5ForCausalLM` | Ernie4.5 | `baidu/ERNIE-4.5-0.3B-PT`, etc. | ✅︎ | ✅︎ |
@@ -400,6 +402,7 @@ th {
 | `Gemma2ForCausalLM` | Gemma 2 | `google/gemma-2-9b`, `google/gemma-2-27b`, etc. | ✅︎ | ✅︎ |
 | `Gemma3ForCausalLM` | Gemma 3 | `google/gemma-3-1b-it`, etc. | ✅︎ | ✅︎ |
 | `Gemma3nForCausalLM` | Gemma 3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
+| `Gemma4ForCausalLM` | Gemma 4 | `google/gemma-4-E2B-it`, etc. | ✅︎ | ✅︎ |
 | `GlmForCausalLM` | GLM-4 | `zai-org/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ |
 | `Glm4ForCausalLM` | GLM-4-0414 | `zai-org/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ |
 | `Glm4MoeForCausalLM` | GLM-4.5, GLM-4.6, GLM-4.7 | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ |
@@ -418,6 +421,7 @@ th {
 | `Grok1ForCausalLM` | Grok2 | `xai-org/grok-2` | ✅︎ | ✅︎ |
 | `HunYuanDenseV1ForCausalLM` | Hunyuan Dense | `tencent/Hunyuan-7B-Instruct` | ✅︎ | ✅︎ |
 | `HunYuanMoEV1ForCausalLM` | Hunyuan-A13B | `tencent/Hunyuan-A13B-Instruct`, `tencent/Hunyuan-A13B-Pretrain`, `tencent/Hunyuan-A13B-Instruct-FP8`, etc. | ✅︎ | ✅︎ |
+| `HYV3ForCausalLM` | HY3 | `tencent/Hy3-preview-Base`, `tencent/Hy3-preview` | ✅︎ | ✅︎ |
 | `HyperCLOVAXForCausalLM` | HyperCLOVAX-SEED-Think-14B | `naver-hyperclovax/HyperCLOVAX-SEED-Think-14B` | ✅︎ | ✅︎ |
 | `InternLMForCausalLM` | InternLM | `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc. | ✅︎ | ✅︎ |
 | `InternLM2ForCausalLM` | InternLM2 | `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc. | ✅︎ | ✅︎ |
@@ -436,6 +440,7 @@ th {
 | `Mamba2ForCausalLM` | Mamba2 | `mistralai/Mamba-Codestral-7B-v0.1`, etc. | | ✅︎ |
 | `MiMoForCausalLM` | MiMo | `XiaomiMiMo/MiMo-7B-RL`, etc. | ✅︎ | ✅︎ |
 | `MiMoV2FlashForCausalLM` | MiMoV2Flash | `XiaomiMiMo/MiMo-V2-Flash`, etc. | | ✅︎ |
+| `MiMoV2ForCausalLM` | MiMoV2Pro | `XiaomiMiMo/MiMo-V2.5-Pro`, etc. | | ✅︎ |
 | `MiniCPMForCausalLM` | MiniCPM | `openbmb/MiniCPM-2B-sft-bf16`, `openbmb/MiniCPM-2B-dpo-bf16`, `openbmb/MiniCPM-S-1B-sft`, etc. | ✅︎ | ✅︎ |
 | `MiniCPM3ForCausalLM` | MiniCPM3 | `openbmb/MiniCPM3-4B`, etc. | ✅︎ | ✅︎ |
 | `MiniMaxForCausalLM` | MiniMax-Text | `MiniMaxAI/MiniMax-Text-01-hf`, etc. | | |
@@ -457,6 +462,7 @@ th {
 | `PanguEmbeddedForCausalLM` | openPangu-Embedded-7B | `FreedomIntelligence/openPangu-Embedded-7B-V1.1` | ✅︎ | ✅︎ |
 | `PanguProMoEV2ForCausalLM` | openpangu-pro-moe-v2 | | ✅︎ | ✅︎ |
 | `PanguUltraMoEForCausalLM` | openpangu-ultra-moe-718b-model | `FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1` | ✅︎ | ✅︎ |
+| `Param2MoEForCausalLM` | param2moe | `bharatgenai/Param2-17B-A2.4B-Thinking`, etc. | ✅︎ | ✅︎ |
 | `PhiForCausalLM` | Phi | `microsoft/phi-1_5`, `microsoft/phi-2`, etc. | ✅︎ | ✅︎ |
 | `Phi3ForCausalLM` | Phi-4, Phi-3 | `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc. | ✅︎ | ✅︎ |
 | `PhiMoEForCausalLM` | Phi-3.5-MoE | `microsoft/Phi-3.5-MoE-instruct`, etc. | ✅︎ | ✅︎ |
@@ -470,6 +476,7 @@ th {
 | `Qwen3MoeForCausalLM` | Qwen3MoE | `Qwen/Qwen3-30B-A3B`, etc. | ✅︎ | ✅︎ |
 | `Qwen3NextForCausalLM` | Qwen3NextMoE | `Qwen/Qwen3-Next-80B-A3B-Instruct`, etc. | ✅︎ | ✅︎ |
 | `RWForCausalLM` | Falcon RW | `tiiuae/falcon-40b`, etc. | | ✅︎ |
+| `Rnj1ForCausalLM` | Rnj1 | `EssentialAI/rnj-1-instruct`, etc. | | |
 | `SarvamMoEForCausalLM` | Sarvam 2 | `sarvamai/sarvam2-30b-a3b`, etc. | ✅︎ | ✅︎ |
 | `SarvamMLAForCausalLM` | Sarvam 2 | `sarvamai/sarvam2-105b-a9b`, etc. | | ✅︎ |
 | `SeedOssForCausalLM` | SeedOss | `ByteDance-Seed/Seed-OSS-36B-Instruct`, etc. | ✅︎ | ✅︎ |
@@ -481,6 +488,7 @@ th {
 | `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/Step-3.5-Flash`, etc. | | ✅︎ |
 | `TeleChatForCausalLM` | TeleChat | `chuhac/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
 | `TeleChat2ForCausalLM` | TeleChat2 | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
+| `TeleChat3ForCausalLM` | TeleChat3 | `Tele-AI/TeleChat3-36B-Thinking`, `Tele-AI/TeleChat3-Coder-36B-Thinking`, etc. | ✅︎ | ✅︎ |
 | `TeleFLMForCausalLM` | TeleFLM | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc. | ✅︎ | ✅︎ |
 | `XverseForCausalLM` | XVERSE | `xverse/XVERSE-7B-Chat`, `xverse/XVERSE-13B-Chat`, `xverse/XVERSE-65B-Chat`, etc. | ✅︎ | ✅︎ |
 | `MiniMaxM1ForCausalLM` | MiniMax-Text | `MiniMaxAI/MiniMax-M1-40k`, `MiniMaxAI/MiniMax-M1-80k`, etc. | | |
@@ -541,19 +549,23 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `BeeForConditionalGeneration` | Bee-8B | T + I<sup>E+</sup> | `Open-Bee/Bee-8B-RL`, `Open-Bee/Bee-8B-SFT` | | ✅︎ |
 | `Blip2ForConditionalGeneration` | BLIP-2 | T + I<sup>E</sup> | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | ✅︎ | ✅︎ |
 | `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ |
+| `CheersForConditionalGeneration` | Cheers | T + I | `ai9stars/Cheers` | | ✅︎ |
 | `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I<sup>+</sup> | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ |
 | `DeepseekVLV2ForCausalLM` | DeepSeek-VL2 | T + I<sup>+</sup> | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ |
 | `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ |
 | `DeepseekOCR2ForCausalLM` | DeepSeek-OCR-2 | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR-2`, etc. | ✅︎ | ✅︎ |
 | `Eagle2_5_VLForConditionalGeneration` | Eagle2.5-VL | T + I<sup>E+</sup> | `nvidia/Eagle2.5-8B`, etc. | ✅︎ | ✅︎ |
 | `Ernie4_5_VLMoeForConditionalGeneration` | Ernie4.5-VL | T + I<sup>+</sup>/ V<sup>+</sup> | `baidu/ERNIE-4.5-VL-28B-A3B-PT`, `baidu/ERNIE-4.5-VL-424B-A47B-PT` | | ✅︎ |
+| `Exaone4_5_ForConditionalGeneration` | EXAONE-4.5 | T + I<sup>E+</sup> | `LGAI-EXAONE/EXAONE-4.5-33B`, etc. | ✅︎ | ✅︎ |
 | `FuyuForCausalLM` | Fuyu | T + I | `adept/fuyu-8b`, etc. | | ✅︎ |
 | `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>E+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ |
 | `Gemma3nForConditionalGeneration` | Gemma 3n | T + I + A | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
+| `Gemma4ForConditionalGeneration` | Gemma 4 | T + I<sup>+</sup> + V + A<sup>*</sup> | `google/gemma-4-E2B-it`, etc. | | ✅︎ |
 | `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `zai-org/glm-4v-9b`, `zai-org/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ |
 | `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ |
 | `Glm4vMoeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V`, etc. | ✅︎ | ✅︎ |
 | `GlmOcrForConditionalGeneration` | GLM-OCR | T + I<sup>E+</sup> | `zai-org/GLM-OCR`, etc. | ✅︎ | ✅︎ |
+| `Granite4VisionForConditionalGeneration` | Granite 4 Vision | T + I<sup>E+</sup> | `ibm-granite/granite-4.1-3b-vision`, etc. | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ |
 | `HCXVisionForCausalLM` | HyperCLOVAX-SEED-Vision-Instruct-3B | T + I<sup>+</sup> + V<sup>+</sup> | `naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B` | | |
 | `HCXVisionV2ForCausalLM` | HyperCLOVAX-SEED-Think-32B | T + I<sup>+</sup> + V<sup>+</sup> | `naver-hyperclovax/HyperCLOVAX-SEED-Think-32B` | | |
@@ -563,6 +575,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `IsaacForConditionalGeneration` | Isaac | T + I<sup>+</sup> | `PerceptronAI/Isaac-0.1` | ✅︎ | ✅︎ |
 | `InternS1ForConditionalGeneration` | Intern-S1 | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S1`, `internlm/Intern-S1-mini`, etc. | ✅︎ | ✅︎ |
 | `InternS1ProForConditionalGeneration` | Intern-S1-Pro | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S1-Pro`, etc. | ✅︎ | ✅︎ |
+| `InternS2PreviewForConditionalGeneration` | Intern-S2-Preview | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S2-Preview`, etc. | ✅︎ | ✅︎ |
 | `InternVLChatModel` | InternVL 3.5, InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I<sup>E+</sup> + (V<sup>E+</sup>) | `OpenGVLab/InternVL3_5-14B`, `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc. | ✅︎ | ✅︎ |
 | `InternVLForConditionalGeneration` | InternVL 3.0 (HF format) | T + I<sup>E+</sup> + V<sup>E+</sup> | `OpenGVLab/InternVL3-1B-hf`, etc. | ✅︎ | ✅︎ |
 | `KananaVForConditionalGeneration` | Kanana-V | T + I<sup>+</sup> | `kakaocorp/kanana-1.5-v-3b-instruct`, etc. | | ✅︎ |
@@ -576,20 +589,23 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `Llama4ForConditionalGeneration` | Llama 4 | T + I<sup>+</sup> | `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct`, etc. | ✅︎ | ✅︎ |
 | `Llama_Nemotron_Nano_VL` | Llama Nemotron Nano VL | T + I<sup>E+</sup> | `nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1` | ✅︎ | ✅︎ |
 | `LlavaForConditionalGeneration` | LLaVA-1.5, Pixtral (HF Transformers) | T + I<sup>E+</sup> | `llava-hf/llava-1.5-7b-hf`, `TIGER-Lab/Mantis-8B-siglip-llama3` (see note), `mistral-community/pixtral-12b`, etc. | ✅︎ | ✅︎ |
-| `LlavaNextForConditionalGeneration` | LLaVA-NeXT | T + I<sup>E+</sup> | `llava-hf/llava-v1.6-mistral-7b-hf`, `llava-hf/llava-v1.6-vicuna-7b-hf`, etc. | | ✅︎ |
+| `LlavaNextForConditionalGeneration` | LLaVA-NeXT, Granite Vision | T + I<sup>E+</sup> | `llava-hf/llava-v1.6-mistral-7b-hf`, `llava-hf/llava-v1.6-vicuna-7b-hf`, `ibm-granite/granite-vision-3.3-2b`, etc. | | ✅︎ |
 | `LlavaNextVideoForConditionalGeneration` | LLaVA-NeXT-Video | T + V | `llava-hf/LLaVA-NeXT-Video-7B-hf`, etc. | | ✅︎ |
 | `LlavaOnevisionForConditionalGeneration` | LLaVA-Onevision | T + I<sup>+</sup> + V<sup>+</sup> | `llava-hf/llava-onevision-qwen2-7b-ov-hf`, `llava-hf/llava-onevision-qwen2-0.5b-ov-hf`, etc. | | ✅︎ |
 | `MiDashengLMModel` | MiDashengLM | T + A<sup>+</sup> | `mispeech/midashenglm-7b` | | ✅︎ |
+| `MiMoV2OmniForCausalLM` | MiMo-V2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `XiaomiMiMo/MiMo-V2.5-Omni` | | ✅︎ |
 | `MiniCPMO` | MiniCPM-O | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>E+</sup> | `openbmb/MiniCPM-o-2_6`, etc. | ✅︎ | ✅︎ |
 | `MiniCPMV` | MiniCPM-V | T + I<sup>E+</sup> + V<sup>E+</sup> | `openbmb/MiniCPM-V-2` (see note), `openbmb/MiniCPM-Llama3-V-2_5`, `openbmb/MiniCPM-V-2_6`, `openbmb/MiniCPM-V-4`, `openbmb/MiniCPM-V-4_5`, etc. | ✅︎ | |
 | `MiniMaxVL01ForConditionalGeneration` | MiniMax-VL | T + I<sup>E+</sup> | `MiniMaxAI/MiniMax-VL-01`, etc. | | ✅︎ |
 | `Mistral3ForConditionalGeneration` | Mistral3 (HF Transformers) | T + I<sup>+</sup> | `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, etc. | ✅︎ | ✅︎ |
 | `MolmoForCausalLM` | Molmo | T + I<sup>+</sup> | `allenai/Molmo-7B-D-0924`, `allenai/Molmo-7B-O-0924`, etc. | ✅︎ | ✅︎ |
-| `Molmo2ForConditionalGeneration` | Molmo2 | T + I<sup>+</sup> / V | `allenai/Molmo2-4B`, `allenai/Molmo2-8B`, `allenai/Molmo2-O-7B` | ✅︎ | ✅︎ |
+| `Molmo2ForConditionalGeneration` | Molmo2 | T + I<sup>+</sup> / V | `allenai/Molmo2-4B`, `allenai/Molmo2-8B`, `allenai/Molmo2-O-7B`, `allenai/MolmoWeb-4B`<sup>^</sup>, `allenai/MolmoWeb-8B`<sup>^</sup> | ✅︎ | ✅︎ |
+| `Moondream3ForCausalLM` | Moondream3 | T + I | `moondream/moondream3-preview` | | ✅︎ |
 | `MusicFlamingoForConditionalGeneration` | MusicFlamingo | T + A | `nvidia/music-flamingo-2601-hf`, `nvidia/music-flamingo-think-2601-hf` | ✅︎ | ✅︎ |
 | `NVLM_D_Model` | NVLM-D 1.0 | T + I<sup>+</sup> | `nvidia/NVLM-D-72B`, etc. | | ✅︎ |
 | `OpenCUAForConditionalGeneration` | OpenCUA-7B | T + I<sup>E+</sup> | `xlangai/OpenCUA-7B` | ✅︎ | ✅︎ |
 | `OpenPanguVLForConditionalGeneration` | openpangu-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `FreedomIntelligence/openPangu-VL-7B` | ✅︎ | ✅︎ |
+| `OpenVLAForActionPrediction` | OpenVLA | T + I | `openvla/openvla-7b` | | ✅︎ |
 | `Ovis` | Ovis2, Ovis1.6 | T + I<sup>+</sup> | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc. | | ✅︎ |
 | `Ovis2_5` | Ovis2.5 | T + I<sup>+</sup> + V | `AIDC-AI/Ovis2.5-9B`, etc. | | |
 | `Ovis2_6ForCausalLM` | Ovis2.6 | T + I<sup>+</sup> + V | `AIDC-AI/Ovis2.6-2B`, etc. | | |
@@ -598,16 +614,18 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
 | `PaliGemmaForConditionalGeneration` | PaliGemma, PaliGemma 2 | T + I<sup>E</sup> | `google/paligemma-3b-pt-224`, `google/paligemma-3b-mix-224`, `google/paligemma2-3b-ft-docci-448`, etc. | ✅︎ | ✅︎ |
 | `Phi3VForCausalLM` | Phi-3-Vision, Phi-3.5-Vision | T + I<sup>E+</sup> | `microsoft/Phi-3-vision-128k-instruct`, `microsoft/Phi-3.5-vision-instruct`, etc. | | ✅︎ |
 | `Phi4MMForCausalLM` | Phi-4-multimodal | T + I<sup>+</sup> / T + A<sup>+</sup> / I<sup>+</sup> + A<sup>+</sup> | `microsoft/Phi-4-multimodal-instruct`, etc. | ✅︎ | ✅︎ |
+| `Phi4ForCausalLMV` | Phi-4-reasoning-vision | T + I<sup>+</sup> | `microsoft/Phi-4-reasoning-vision-15B`, etc. | | ✅︎ |
 | `PixtralForConditionalGeneration` | Ministral 3 (Mistral format), Mistral 3 (Mistral format), Mistral Large 3 (Mistral format), Pixtral (Mistral format) | T + I<sup>+</sup> | `mistralai/Ministral-3-3B-Instruct-2512`, `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistralai/Mistral-Large-3-675B-Instruct-2512` `mistralai/Pixtral-12B-2409` etc. | ✅︎ | ✅︎ |
+| `QianfanOCRForConditionalGeneration` | QianfanOCR | T + I<sup>E+</sup> | `baidu/Qianfan-OCR`, etc. | ✅︎ | ✅︎ |
 | `QwenVLForConditionalGeneration`<sup>^</sup> | Qwen-VL | T + I<sup>E+</sup> | `Qwen/Qwen-VL`, `Qwen/Qwen-VL-Chat`, etc. | ✅︎ | ✅︎ |
 | `Qwen2AudioForConditionalGeneration` | Qwen2-Audio | T + A<sup>+</sup> | `Qwen/Qwen2-Audio-7B-Instruct` | | ✅︎ |
-| `Qwen2VLForConditionalGeneration` | QVQ, Qwen2-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ |
-| `Qwen2_5_VLForConditionalGeneration` | Qwen2.5-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen2.5-VL-3B-Instruct`, `Qwen/Qwen2.5-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ |
+| `Qwen2VLForConditionalGeneration` <sup>Q</sup> | QVQ, Qwen2-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ |
+| `Qwen2_5_VLForConditionalGeneration` <sup>Q</sup> | Qwen2.5-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen2.5-VL-3B-Instruct`, `Qwen/Qwen2.5-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ |
 | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-3B`, `Qwen/Qwen2.5-Omni-7B` | ✅︎ | ✅︎ |
 | `Qwen3_5ForConditionalGeneration` | Qwen3.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3.5-9B-Instruct`, etc. | ✅︎ | ✅︎ |
 | `Qwen3_5MoeForConditionalGeneration` | Qwen3.5-MOE | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3.5-35B-A3B-Instruct`, etc. | ✅︎ | ✅︎ |
-| `Qwen3VLForConditionalGeneration` | Qwen3-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-4B-Instruct`, etc. | ✅︎ | ✅︎ |
-| `Qwen3VLMoeForConditionalGeneration` | Qwen3-VL-MOE | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-30B-A3B-Instruct`, etc. | ✅︎ | ✅︎ |
+| `Qwen3VLForConditionalGeneration` <sup>Q</sup> | Qwen3-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-4B-Instruct`, etc. | ✅︎ | ✅︎ |
+| `Qwen3VLMoeForConditionalGeneration` <sup>Q</sup> | Qwen3-VL-MOE | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-30B-A3B-Instruct`, etc. | ✅︎ | ✅︎ |
 | `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, `Qwen/Qwen3-Omni-30B-A3B-Thinking` | ✅︎ | ✅︎ |
 | `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | T + A<sup>+</sup> | `Qwen/Qwen3-ASR-1.7B` | ✅︎ | ✅︎ |
 | `RForConditionalGeneration` | R-VL-4B | T + I<sup>E+</sup> | `YannQi/R-4B` | | ✅︎ |
@@ -628,19 +646,40 @@ Some models are supported only via the [Transformers modeling backend](#transfor
 <sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM.</br>
 <sup>E</sup> Pre-computed embeddings can be inputted for this modality.</br>
 <sup>+</sup> Multiple items can be inputted per text prompt for this modality.
+<sup>*</sup> Only specific variants of the model support this modality (see notes below).</br>
+<sup>Q</sup> `Qwen*-VL` officially uses `qwen_vl_utils` for image preprocessing, while vLLM uses `transformers`' `video_processing_qwen*`, which leads to slightly different results compared to the official Hugging Face repository examples.
 
 !!! note
     `Gemma3nForConditionalGeneration` is only supported on V1 due to shared KV caching and it depends on `timm>=1.0.17` to make use of its
     MobileNet-v5 vision backbone.
-  
+
     Performance is not yet fully optimized mainly due to:
-  
-    - Both audio and vision MM encoders use `transformers.AutoModel` implementation.  
+
+    - Both audio and vision MM encoders use `transformers.AutoModel` implementation.
     - There's no PLE caching or out-of-memory swapping support, as described in [Google's blog](https://developers.googleblog.com/en/introducing-gemma-3n/). These features might be too model-specific for vLLM, and swapping in particular may be better suited for constrained setups.
 
+!!! note
+    For `Gemma4ForConditionalGeneration`:
+    - audio input is only supported by the `gemma-4-E2B` and `gemma-4-E4B` variants.
+    - The model does not ingest videos directly. However, vLLM’s Gemma 4 implementation supports video inputs by handling video processing internally. Users can send videos directly in the message structure to vLLM, where they are converted into text and image frames before being passed to the model.
+    - Gemma 4 assistant checkpoints for speculative decoding use vLLM's Gemma
+      4 MTP path, not generic draft-model speculative decoding. See the
+      [Gemma 4 assistant model MTP example](../features/speculative_decoding/mtp.md#gemma-4-assistant-models).
+
 !!! note
     For `InternVLChatModel`, only InternVL2.5 with Qwen2.5 text backbone (`OpenGVLab/InternVL2.5-1B` etc.), InternVL3 and InternVL3.5 have video inputs support currently.
 
+!!! note
+    To use `allenai/MolmoWeb-4B` or `allenai/MolmoWeb-8B`, serve the checkpoint
+    with the Molmo2 architecture and disable multimodal-prefix attention:
+    `--hf-overrides '{"architectures": ["Molmo2ForConditionalGeneration"], "is_mm_prefix_lm": false}'`.
+
+!!! note
+    `Moondream3ForCausalLM` uses task-specific prompt templates for `query`
+    and `caption`. The native `detect` and `point` skills require custom
+    coordinate decoding and are not exposed by this vLLM implementation.
+    See [Moondream3 prompt recipes](../features/multimodal_inputs.md#moondream3-prompt-recipes).
+
 !!! note
     To use `TIGER-Lab/Mantis-8B-siglip-llama3`, you have to pass `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'` when running vLLM.
 
@@ -656,11 +695,12 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 | ------------ | ------ | ----------------- | -------------------- | ------------------------- |
 | `CohereAsrForConditionalGeneration` | Cohere-Transcribe | `CohereLabs/cohere-transcribe-03-2026` | | |
 | `FireRedASR2ForConditionalGeneration` | FireRedASR2 | `allendou/FireRedASR2-LLM-vllm`, etc. | | |
+| `FireRedLIDForConditionalGeneration` | FireRedLID | `PatchyTisa/FireRedLID-vllm`, etc. | | |
 | `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
 | `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
 | `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-4.0-1b-speech`, `ibm-granite/granite-speech-3.3-2b`, etc. | ✅︎ | ✅︎ |
-| `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | | ✅︎ |
+| `Qwen3ASRForConditionalGeneration` | Qwen3-ASR | `Qwen/Qwen3-ASR-1.7B`, etc. | ✅︎ | ✅︎ |
 | `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, etc. | | ✅︎ |
 | `VoxtralForConditionalGeneration` | Voxtral (Mistral format) | `mistralai/Voxtral-Mini-3B-2507`, `mistralai/Voxtral-Small-24B-2507`, etc. | ✅︎ | ✅︎ |
 | `WhisperForConditionalGeneration` | Whisper | `openai/whisper-small`, `openai/whisper-large-v3-turbo`, etc. | | |
@@ -668,6 +708,24 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 !!! note
     `VoxtralForConditionalGeneration` requires `mistral-common[audio]` to be installed.
 
+#### Realtime Transcription
+
+Speech models that support streaming transcription via the
+[`/v1/realtime`](../serving/online_serving/speech_to_text.md#realtime-api)
+WebSocket endpoint.
+
+| Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/parallelism_scaling.md) |
+| ------------ | ------ | ----------------- | -------------------- | ------------------------- |
+| `VoxtralRealtimeGeneration` | Voxtral Realtime | `mistralai/Voxtral-Mini-4B-Realtime-2602` | | |
+| `Qwen3ASRRealtimeGeneration` | Qwen3-ASR Realtime | `Qwen/Qwen3-ASR-0.6B` | | |
+
+!!! note
+    `VoxtralRealtimeGeneration` requires `mistral-common[audio]` to be installed, and must be served with `--tokenizer-mode mistral`.
+
+    `Qwen3ASRRealtimeGeneration` is not auto-detected from `config.json`.
+    You must pass `--hf-overrides '{"architectures":["Qwen3ASRRealtimeGeneration"]}'`
+    when serving.
+
 ## Pooling Models
 
 See [this page](pooling_models/README.md) for more information on how to use pooling models.
diff --git a/docs/pre_run_check.sh b/docs/pre_run_check.sh
new file mode 100644
index 000000000000..de93f82faf1d
--- /dev/null
+++ b/docs/pre_run_check.sh
@@ -0,0 +1,41 @@
+if [ "$READTHEDOCS_VERSION_TYPE" = "external" ]; then
+  MAX_WAIT=300
+  INTERVAL=60
+  ELAPSED=0
+  while :; do
+    RAW=$(curl -sS -w "\n%{http_code}" "https://api.github.com/repos/vllm-project/vllm/commits/${READTHEDOCS_GIT_COMMIT_HASH}/check-runs?check_name=pre-run-check&filter=latest")
+    HTTP_CODE=$(printf %s "$RAW" | tail -n1)
+    BODY=$(printf %s "$RAW" | sed '$d')
+    if [ "$HTTP_CODE" != "200" ]; then
+      echo "GitHub API returned HTTP $HTTP_CODE (likely rate-limited); skipping pre-run-check gate."
+      break
+    fi
+    STATUS=$(printf %s "$BODY" | python3 -c "import sys, json; r=json.load(sys.stdin).get(\"check_runs\",[]); print((r[0].get(\"status\") or \"\") if r else \"none\")")
+    CONCLUSION=$(printf %s "$BODY" | python3 -c "import sys, json; r=json.load(sys.stdin).get(\"check_runs\",[]); print((r[0].get(\"conclusion\") or \"\") if r else \"\")")
+    CHECK_URL=$(printf %s "$BODY" | python3 -c "import sys, json; r=json.load(sys.stdin).get(\"check_runs\",[]); print((r[0].get(\"html_url\") or \"\") if r else \"\")")
+    if [ "$STATUS" = "none" ]; then
+      echo "no pre-run-check found for this commit; skipping gate."
+      break
+    fi
+    if [ -n "$CONCLUSION" ]; then
+      echo "pre-run-check conclusion: $CONCLUSION"
+      if [ "$CONCLUSION" = "failure" ] || [ "$CONCLUSION" = "cancelled" ] || [ "$CONCLUSION" = "timed_out" ]; then
+        echo "pre-run-check did not pass; skipping docs build."
+        if [ -n "$CHECK_URL" ]; then
+          echo "pre-run-check failure reason: $CHECK_URL"
+        fi
+        exit 1
+      fi
+      break
+    fi
+    if [ "$ELAPSED" -ge "$MAX_WAIT" ]; then
+      echo "pre-run-check status=$STATUS after ${MAX_WAIT}s; skipping gate."
+      break
+    fi
+    echo "pre-run-check status=$STATUS; waiting ${INTERVAL}s..."
+    sleep "$INTERVAL"
+    ELAPSED=$((ELAPSED + INTERVAL))
+  done
+else
+  echo "Not a PR build (version type=$READTHEDOCS_VERSION_TYPE); skipping pre-run-check gate."
+fi
\ No newline at end of file
diff --git a/docs/serving/data_parallel_deployment.md b/docs/serving/data_parallel_deployment.md
index f0946eaf407a..1f18b92f95b4 100644
--- a/docs/serving/data_parallel_deployment.md
+++ b/docs/serving/data_parallel_deployment.md
@@ -16,7 +16,7 @@ For MoE models, when any requests are in progress in any rank, we must ensure th
 
 In all cases, it is beneficial to load-balance requests between DP ranks. For online deployments, this balancing can be optimized by taking into account the state of each DP engine - in particular its currently scheduled and waiting (queued) requests, and KV cache state. Each DP engine has an independent KV cache, and the benefit of prefix caching can be maximized by directing prompts intelligently.
 
-This document focuses on online deployments (with the API server). DP + EP is also supported for offline usage (via the LLM class), for an example see [examples/offline_inference/data_parallel.py](../../examples/offline_inference/data_parallel.py).
+This document focuses on online deployments (with the API server). DP + EP is also supported for offline usage (via the LLM class), for an example see [examples/features/data_parallel/data_parallel_offline.py](../../examples/features/data_parallel/data_parallel_offline.py).
 
 There are two distinct modes supported for online deployments - self-contained with internal load balancing, or externally per-rank process deployment and load balancing.
 
@@ -98,7 +98,7 @@ For larger scale deployments especially, it can make sense to handle the orchest
 
 In this case, it's more convenient to treat each DP rank like a separate vLLM deployment, with its own endpoint, and have an external router balance HTTP requests between them, making use of appropriate real-time telemetry from each server for routing decisions.
 
-This can already be done trivially for non-MoE models, since each deployed server is fully independent. No data parallel CLI options need to be used for this.
+This can already be done trivially for non-MoE models, since each deployed server is fully independent. In that case, launch independent vLLM instances without any `--data-parallel-*` arguments; external DP CLI options are only supported for MoE deployments.
 
 We support an equivalent topology for MoE DP+EP which can be configured via the following CLI arguments.
 
diff --git a/docs/serving/distributed_troubleshooting.md b/docs/serving/distributed_troubleshooting.md
index b5354a7e55d5..e6dde4944284 100644
--- a/docs/serving/distributed_troubleshooting.md
+++ b/docs/serving/distributed_troubleshooting.md
@@ -4,11 +4,11 @@ For general troubleshooting, see [Troubleshooting](../usage/troubleshooting.md).
 
 ## Verify inter-node GPU communication
 
-After you start the Ray cluster, verify GPU-to-GPU communication across nodes. Proper configuration can be non-trivial. For more information, see [troubleshooting script](../usage/troubleshooting.md#incorrect-hardwaredriver). If you need additional environment variables for communication configuration, append them to [examples/online_serving/run_cluster.sh](../../examples/online_serving/run_cluster.sh), for example `-e NCCL_SOCKET_IFNAME=eth0`. Setting environment variables during cluster creation is recommended because the variables propagate to all nodes. In contrast, setting environment variables in the shell affects only the local node. For more information, see <https://github.com/vllm-project/vllm/issues/6803>.
+After you start the Ray cluster, verify GPU-to-GPU communication across nodes. Proper configuration can be non-trivial. For more information, see [troubleshooting script](../usage/troubleshooting.md#incorrect-hardwaredriver). If you need additional environment variables for communication configuration, append them to [examples/ray_serving/run_cluster.sh](../../examples/ray_serving/run_cluster.sh), for example `-e NCCL_SOCKET_IFNAME=eth0`. Setting environment variables during cluster creation is recommended because the variables propagate to all nodes. In contrast, setting environment variables in the shell affects only the local node. For more information, see <https://github.com/vllm-project/vllm/issues/6803>.
 
 ## No available node types can fulfill resource request
 
-The error message `Error: No available node types can fulfill resource request` can appear even when the cluster has enough GPUs. The issue often occurs when nodes have multiple IP addresses and vLLM can't select the correct one. Ensure that vLLM and Ray use the same IP address by setting `VLLM_HOST_IP` in [examples/online_serving/run_cluster.sh](../../examples/online_serving/run_cluster.sh) (with a different value on each node). Use `ray status` and `ray list nodes` to verify the chosen IP address. For more information, see <https://github.com/vllm-project/vllm/issues/7815>.
+The error message `Error: No available node types can fulfill resource request` can appear even when the cluster has enough GPUs. The issue often occurs when nodes have multiple IP addresses and vLLM can't select the correct one. Ensure that vLLM and Ray use the same IP address by setting `VLLM_HOST_IP` in [examples/ray_serving/run_cluster.sh](../../examples/ray_serving/run_cluster.sh) (with a different value on each node). Use `ray status` and `ray list nodes` to verify the chosen IP address. For more information, see <https://github.com/vllm-project/vllm/issues/7815>.
 
 ## Ray observability
 
diff --git a/docs/serving/expert_parallel_deployment.md b/docs/serving/expert_parallel_deployment.md
index d75ae7feb49e..fef4df770fa3 100644
--- a/docs/serving/expert_parallel_deployment.md
+++ b/docs/serving/expert_parallel_deployment.md
@@ -153,6 +153,7 @@ Configure EPLB with the `--eplb-config` argument, which accepts a JSON string. T
 | `num_redundant_experts` | Additional global experts per EP rank beyond equal distribution | `0` |
 | `use_async` | Use non-blocking EPLB for reduced latency overhead | `false` |
 | `policy` | The policy type for expert parallel load balancing | `"default"` |
+| `communicator` | Backend for expert weight transfers: `"torch_nccl"`, `"torch_gloo"`, `"pynccl"`, `"nixl"`,  or `null` (auto) | `null` |
 
 For example:
 
diff --git a/docs/serving/integrations/codex.md b/docs/serving/integrations/codex.md
new file mode 100644
index 000000000000..48148acfd51f
--- /dev/null
+++ b/docs/serving/integrations/codex.md
@@ -0,0 +1,88 @@
+# Codex
+
+[Codex](https://github.com/openai/codex) is OpenAI's official agentic coding tool that lives in your terminal. It can understand your codebase, edit files, run commands, and help you write code more efficiently.
+
+By pointing Codex at a vLLM server, you can use your own models as the backend instead of the OpenAI API. This is useful for:
+
+- Running fully local/private coding assistance
+- Using open-weight models with tool calling capabilities
+- Testing and developing with custom models
+
+## How It Works
+
+vLLM implements the OpenAI-Responses API, which is the same API that Codex uses to communicate with OpenAI's servers. By configuring Codex to point at your vLLM server, Codex sends its requests to vLLM instead of OpenAI. vLLM then translates these requests to work with your local model and returns responses in the format Codex expects.
+
+This means any model served by vLLM with proper tool calling support can act as a drop-in replacement for OpenAI models in Codex.
+
+## Requirements
+
+Codex requires a model with strong tool calling capabilities. The model must support the OpenAI-Responses tool calling API. See [Tool Calling](../../features/tool_calling.md) for details on enabling tool calling for your model.
+
+## Installation
+
+First, install Codex by following the [official installation guide](https://github.com/openai/codex).
+
+## Starting the vLLM Server
+
+Start vLLM with a tool-calling capable model - here's an example using `Qwen/Qwen3-27B`:
+
+```bash
+vllm serve Qwen/Qwen3.6-27B --port 8000 --tensor-parallel-size 8 --max-model-len 262144 --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser qwen3_coder
+
+```
+
+For other models, you'll need to enable tool calling explicitly with `--enable-auto-tool-choice` and the right `--tool-call-parser`. Refer to the [Tool Calling documentation](../../features/tool_calling.md) for the correct flags for your model.
+
+## Configuring Codex
+
+Codex is configured via a TOML file located at `~/.codex/config.toml`. Create or edit this file to point Codex at your vLLM server:
+
+```toml
+model = "my-model"
+model_provider = "vllm"
+
+[model_providers.vllm]
+name = "vLLM"
+env_key = "VLLM_API_KEY"
+base_url = "http://localhost:8000/v1"
+wire_api = "responses"
+```
+
+The configuration fields:
+
+| Field | Description |
+| ----- | ----------- |
+| `model` | The model name to use. Must match the `--served-model-name` you passed to vLLM. |
+| `model_provider` | Set to `"vllm"` to use your local vLLM server. |
+| `[model_providers.vllm]` | Configuration section for the vLLM provider. |
+| `name` | A display name for your vLLM provider. |
+| `env_key` | The name of an environment variable that Codex will read for the API key. vLLM does not require authentication by default, so this can be any value. |
+| `base_url` | The URL of your vLLM server's OpenAI-compatible API endpoint (default is `http://localhost:8000/v1`). |
+| `wire_api` | The API style to use. Set to `"responses"` for the OpenAI Responses API |
+
+!!! tip
+    You can set the `env_key` to any dummy environment variable since vLLM doesn't require authentication by default:
+    ```bash
+    export VLLM_API_KEY=dummy
+    ```
+
+!!! warning
+    When using the `responses` API, ensure your vLLM version supports the OpenAI Responses API.
+
+## Testing the Setup
+
+Once Codex is configured, launch it in your project directory:
+
+```bash
+codex
+```
+
+Try a simple prompt to verify the connection, such as asking it to explain a file in your project. If the model responds correctly, your setup is working. You can now use Codex with your vLLM-served model for coding tasks.
+
+## Troubleshooting
+
+**Connection refused**: Ensure vLLM is running and accessible at the specified URL. Check that the port matches and that `base_url` includes the `/v1` path suffix.
+
+**Tool calls not working**: Verify that your model supports tool calling and that you've enabled it with the correct `--tool-call-parser` flag. See [Tool Calling](../../features/tool_calling.md).
+
+**Model not found**: Ensure the `model` field in `~/.codex/config.toml` matches the `--served-model-name` you passed to vLLM.
diff --git a/docs/serving/offline_inference.md b/docs/serving/offline_inference.md
index 535bc2a62eae..4512f4a07201 100644
--- a/docs/serving/offline_inference.md
+++ b/docs/serving/offline_inference.md
@@ -2,24 +2,78 @@
 
 Offline inference is possible in your own code using vLLM's [`LLM`][vllm.LLM] class.
 
-For example, the following code downloads the [`facebook/opt-125m`](https://huggingface.co/facebook/opt-125m) model from HuggingFace
-and runs it in vLLM using the default configuration.
+## Model Types
 
-```python
-from vllm import LLM
+vLLM models can be categorized into two types:
 
-# Initialize the vLLM engine.
-llm = LLM(model="facebook/opt-125m")
-```
+- **[Generative Models](../models/supported_models.md)** - Models that produce text completions or chat responses (e.g., LLaMA, Qwen, DeepSeek). Use `LLM.generate()` and `LLM.chat()` for these models.
 
-After initializing the `LLM` instance, use the available APIs to perform model inference.
-The available APIs depend on the model type:
+- **[Pooling Models](../models/pooling_models/README.md)** - These models do not generate content. They are primarily used for classification and retrieval tasks, such as bge-m3 and Qwen3 Reranker.
 
-- [Generative models](../models/generative_models.md) output logprobs which are sampled from to obtain the final output text.
-- [Pooling models](../models/pooling_models/README.md) output their hidden states directly.
+## Generative APIs
 
-!!! info
-    [API Reference](../api/README.md#offline-inference)
+For further details on generative models, please refer to [this page](../models/supported_models.md).
+
+- `LLM.generate` - Generates completions for the given input prompts.
+- `LLM.chat` - Generates responses for a chat conversation.
+
+## Asynchronous Queue APIs
+
+- `LLM.enqueue` - Enqueues prompts for generation without waiting for completion.
+- `LLM.enqueue_chat` - Enqueues chat conversations for generation without waiting.
+- `LLM.wait_for_completion` - Waits for all enqueued requests to complete and returns results.
+
+## Pooling APIs
+
+For further details on pooling models, please refer to [this page](../models/pooling_models/README.md).
+
+- `LLM.classify` - Only applicable to [classification models](../models/pooling_models/classify.md).
+- `LLM.embed` - Only applicable to [embedding models](../models/pooling_models/embed.md).
+- `LLM.score` - Applicable to [score models](../models/pooling_models/scoring.md) (cross-encoder, bi-encoder, late-interaction).
+- `LLM.encode` - Applicable to all [pooling models](../models/pooling_models/README.md).
+
+## Profiling APIs
+
+For further details on profiling, please refer to [this page](../contributing/profiling.md).
+
+- `LLM.start_profile` - Starts profiling with an optional custom trace prefix.
+- `LLM.stop_profile` - Stops the ongoing profiling session.
+
+## Sleep Mode APIs
+
+For further details on sleep mode, please refer to [this page](../features/sleep_mode.md).
+
+- `LLM.sleep` - Puts the engine into sleep mode.
+- `LLM.wake_up` - Wakes up the engine from sleep mode.
+
+## Cache Management APIs
+
+- `LLM.reset_mm_cache` - Resets the multi-modal cache.
+- `LLM.reset_prefix_cache` - Resets the prefix cache.
+
+## Metrics APIs
+
+For further details on metrics, please refer to [this page](../design/metrics.md).
+
+- `LLM.get_metrics` - Returns a snapshot of aggregated metrics from Prometheus.
+
+## Weight Transfer APIs (RL Training)
+
+For further details on Weight Transfer, please refer to [this page](../training/weight_transfer/README.md).
+
+- `LLM.init_weight_transfer_engine` - Initializes the weight transfer engine for RL training.
+- `LLM.start_weight_update` - Starts a new weight update cycle.
+- `LLM.update_weights` - Updates the model weights.
+- `LLM.finish_weight_update` - Finishes the current weight update cycle.
+
+## Additional APIs
+
+- `LLM.collective_rpc` - Executes a method or callable collectively across all workers.
+- `LLM.apply_model` - Applies a function directly to the model inside each worker.
+
+## API Reference
+
+[Offline Inference](../api/README.md#offline-inference)
 
 ## Ray Data LLM API
 
diff --git a/docs/serving/online_serving/README.md b/docs/serving/online_serving/README.md
new file mode 100644
index 000000000000..c84377044476
--- /dev/null
+++ b/docs/serving/online_serving/README.md
@@ -0,0 +1,182 @@
+# Online Serving
+
+vLLM provides an HTTP server that is compatible with many interfaces!
+
+## OpenAI-Compatible Server
+
+We currently support the following OpenAI APIs:
+
+- [Completions API](./openai_compatible_server.md#completions-api) (`/v1/completions`)
+    - Only applicable to [text generation models](../../models/generative_models.md).
+    - *Note: `suffix` parameter is not supported.*
+- [Responses API](./openai_compatible_server.md#responses-api) (`/v1/responses`)
+    - Only applicable to [text generation models](../../models/generative_models.md).
+- [Chat Completions API](./openai_compatible_server.md#chat-api) (`/v1/chat/completions`)
+    - Only applicable to [text generation models](../../models/generative_models.md) with a [chat template](./openai_compatible_server.md#chat-template).
+    - *Note: `user` parameter is ignored.*
+    - *Note:* Setting the `parallel_tool_calls` parameter to `false` ensures vLLM only returns zero or one tool call per request. Setting it to `true` (the default) allows returning more than one tool call per request. There is no guarantee more than one tool call will be returned if this is set to `true`, as that behavior is model dependent and not all models are designed to support parallel tool calls.
+- [Embeddings API](../../models/pooling_models/embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
+    - Only applicable to [embedding models](../../models/pooling_models/embed.md).
+- [Transcriptions API](./speech_to_text.md#transcriptions-api) (`/v1/audio/transcriptions`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+- [Translation API](./speech_to_text.md#translations-api) (`/v1/audio/translations`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+
+## Anthropic APIs
+
+- Anthropic messages API (`/v1/messages`)
+
+## Cohere APIs
+
+- [Cohere Embed API](../../models/pooling_models/embed.md#cohere-embed-api) (`/v2/embed`)
+    - Compatible with [Cohere's Embed API](https://docs.cohere.com/reference/embed)
+    - Works with any [embedding model](../../models/pooling_models/embed.md#supported-models), including multimodal models.
+- [Cohere Rerank API](../../models/pooling_models/scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
+    - Implements [Jina AI's v1 rerank API](https://jina.ai/reranker/)
+    - compatible with [Cohere's v1 & v2 rerank APIs](https://docs.cohere.com/v2/reference/rerank)
+
+## SageMaker APIs
+
+- `/invocations` - SageMaker-compatible endpoint (routes to the same inference functions as `/v1` endpoints)
+
+## Pooling APIs
+
+For further details on pooling models, please refer to [this page](../../models/pooling_models/README.md).
+
+- [Classification Usages](../../models/pooling_models/classify.md)
+    - [Classification API](../../models/pooling_models/classify.md#online-serving) (`/classify`)
+    - Only applicable to [classification models](../../models/pooling_models/classify.md).
+- [Embedding Usages](../../models/pooling_models/embed.md)
+    - [Cohere Embed API](../../models/pooling_models/embed.md#cohere-embed-api) (`/v2/embed`)
+    - [OpenAI-compatible Embeddings API](../../models/pooling_models/embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
+    - Only applicable to [embedding models](../../models/pooling_models/embed.md).
+- [Scoring Usages](../../models/pooling_models/scoring.md)
+    - [Score API](../../models/pooling_models/scoring.md#score-api) (`/score`)
+    - [Cohere Rerank API](../../models/pooling_models/scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
+    - Applicable to [score models](../../models/pooling_models/scoring.md) (cross-encoder, bi-encoder, late-interaction).
+- [Pooling API](../../models/pooling_models/README.md#pooling-api) (`/pooling`)
+    - Applicable to all [pooling models](../../models/pooling_models/README.md).
+
+## Speech to Text APIs
+
+For further details on speech to text, please refer to [this page](speech_to_text.md).
+
+- [Transcriptions API](./speech_to_text.md#transcriptions-api) (`/v1/audio/transcriptions`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+- [Translation API](./speech_to_text.md#translations-api) (`/v1/audio/translations`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+- [Realtime API](./speech_to_text.md#realtime-api) (`/v1/realtime`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#realtime-transcription).
+
+## Disaggregated APIs
+
+### Renderer APIs
+
+For further details on renderer APIs, please refer to [this page](renderer.md).
+
+- [Completions Render API](renderer.md) (`/v1/completions/render`)
+    - Render completion requests
+- [Chat Completions Render API](renderer.md) (`/v1/chat/completions/render`)
+    - Render chat completions
+
+## Custom APIs
+
+- [Classification API](../../models/pooling_models/classify.md#classification-api) (`/classify`)
+    - Only applicable to [classification models](../../models/pooling_models/classify.md).
+- [Score API](../../models/pooling_models/scoring.md#score-api) (`/score`, `/v1/score`)
+    - Applicable to [score models](../../models/pooling_models/scoring.md) (cross-encoder, bi-encoder, late-interaction).
+- [Pooling API](../../models/pooling_models/README.md#pooling-api) (`/pooling`)
+    - Applicable to all [pooling models](../../models/pooling_models/README.md).
+- [Generative Scoring API](generative_scoring.md#generative-scoring-api) (`/generative_scoring`)
+    - Applicable to [CausalLM models](../../models/generative_models.md) (task `"generate"`).
+    - Computes next-token probabilities for specified `label_token_ids`.
+
+## Utility APIs
+
+- `/tokenize` - Tokenize text
+- `/detokenize` - Detokenize tokens
+- `/health` - Health check
+- `/ping` - SageMaker health check
+- `/version` - Version information
+- `/load` - Server load metrics
+
+## Sleep Mode APIs
+
+For further details on sleep mode, please refer to [this page](../../features/sleep_mode.md).
+
+- `/sleep` - Put engine to sleep (causes denial of service)
+- `/wake_up` - Wake engine from sleep
+- `/is_sleeping` - Check if engine is sleeping
+- `/collective_rpc` - Execute arbitrary RPC methods on the engine (extremely dangerous)
+
+## Chat Template
+
+In order for the language model to support chat protocol, vLLM requires the model to include
+a chat template in its tokenizer configuration. The chat template is a Jinja2 template that
+specifies how roles, messages, and other chat-specific tokens are encoded in the input.
+
+An example chat template for `NousResearch/Meta-Llama-3-8B-Instruct` can be found [here](https://llama.com/docs/model-cards-and-prompt-formats/meta-llama-3/#prompt-template-for-meta-llama-3)
+
+Some models do not provide a chat template even though they are instruction/chat fine-tuned. For those models,
+you can manually specify their chat template in the `--chat-template` parameter with the file path to the chat
+template, or the template in string form. Without a chat template, the server will not be able to process chat
+and all chat requests will error.
+
+```bash
+vllm serve <model> --chat-template ./path-to-chat-template.jinja
+```
+
+vLLM community provides a set of chat templates for popular models. You can find them under the [examples](../../../examples) directory.
+
+With the inclusion of multi-modal chat APIs, the OpenAI spec now accepts chat messages in a new format which specifies
+both a `type` and a `text` field. An example is provided below:
+
+```python
+completion = client.chat.completions.create(
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"},
+            ],
+        },
+    ],
+)
+```
+
+Most chat templates for LLMs expect the `content` field to be a string, but there are some newer models like
+`meta-llama/Llama-Guard-3-1B` that expect the content to be formatted according to the OpenAI schema in the
+request. vLLM provides best-effort support to detect this automatically, which is logged as a string like
+*"Detected the chat template content format to be..."*, and internally converts incoming requests to match
+the detected format, which can be one of:
+
+- `"string"`: A string.
+    - Example: `"Hello world"`
+- `"openai"`: A list of dictionaries, similar to OpenAI schema.
+    - Example: `[{"type": "text", "text": "Hello world!"}]`
+
+If the result is not what you expect, you can set the `--chat-template-content-format` CLI argument
+to override which format to use.
+
+## Offline API Documentation
+
+The FastAPI `/docs` endpoint requires an internet connection by default. To enable offline access in air-gapped environments, use the `--enable-offline-docs` flag:
+
+```bash
+vllm serve NousResearch/Meta-Llama-3-8B-Instruct --enable-offline-docs
+```
+
+## Ray Serve LLM
+
+Ray Serve LLM enables scalable, production-grade serving of the vLLM engine. It integrates tightly with vLLM and extends it with features such as auto-scaling, load balancing, and back-pressure.
+
+Key capabilities:
+
+- Exposes an OpenAI-compatible HTTP API as well as a Pythonic API.
+- Scales from a single GPU to a multi-node cluster without code changes.
+- Provides observability and autoscaling policies through Ray dashboards and metrics.
+
+The following example shows how to deploy a large model like DeepSeek R1 with Ray Serve LLM: [examples/ray_serving/ray_serve_deepseek.py](../../../examples/ray_serving/ray_serve_deepseek.py).
+
+Learn more about Ray Serve LLM with the official [Ray Serve LLM documentation](https://docs.ray.io/en/latest/serve/llm/index.html).
diff --git a/docs/serving/online_serving/generative_scoring.md b/docs/serving/online_serving/generative_scoring.md
new file mode 100644
index 000000000000..d0c578f2105b
--- /dev/null
+++ b/docs/serving/online_serving/generative_scoring.md
@@ -0,0 +1,64 @@
+# Generative Scoring
+
+The `/generative_scoring` endpoint uses a CausalLM model (e.g., Llama, Qwen, Mistral) to compute the probability of specified token IDs appearing as the next token. Each item (document) is concatenated with the query to form a prompt, and the model predicts how likely each label token is as the next token after that prompt. This lets you score items against a query — for example, asking "Is this the capital of France?" and scoring each city by how likely the model is to answer "Yes".
+
+This endpoint is automatically available when the server is started with a generative model (task `"generate"`). It is separate from the pooling-based [Score API](../../models/pooling_models/scoring.md#score-api), which uses cross-encoder, bi-encoder, or late-interaction models.
+
+**Requirements:**
+
+- The `label_token_ids` parameter is **required** and must contain **at least 1 token ID**.
+- When 2 label tokens are provided, the score equals `P(label_token_ids[0]) / (P(label_token_ids[0]) + P(label_token_ids[1]))` (softmax over the two labels).
+- When more labels are provided, the score is the softmax-normalized probability of the first label token across all label tokens.
+
+## How it works
+
+1. **Prompt Construction**: For each item, builds `prompt = query + item` (or `item + query` if `item_first=true`)
+2. **Forward Pass**: Runs the model on each prompt to get next-token logits
+3. **Probability Extraction**: Extracts logprobs for the specified `label_token_ids`
+4. **Softmax Normalization**: Applies softmax over only the label tokens (when `apply_softmax=true`)
+5. **Score**: Returns the normalized probability of the first label token
+
+## Finding Token IDs
+
+To find the token IDs for your labels, use the tokenizer:
+
+```python
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
+yes_id = tokenizer.encode("Yes", add_special_tokens=False)[0]
+no_id = tokenizer.encode("No", add_special_tokens=False)[0]
+print(f"Yes: {yes_id}, No: {no_id}")
+```
+
+## Example
+
+```bash
+curl -X POST http://localhost:8000/generative_scoring \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen3-0.6B",
+    "query": "Is this city the capital of France?",
+    "items": ["Paris", "London", "Berlin"],
+    "label_token_ids": [9454, 2753]
+  }'
+```
+
+Here, each item is appended to the query to form prompts like `"Is this city the capital of France? Paris"`, `"... London"`, etc. The model then predicts the next token, and the score reflects the probability of "Yes" (token 9454) vs "No" (token 2753).
+
+??? console "Response"
+
+    ```json
+    {
+      "id": "generative-scoring-abc123",
+      "object": "list",
+      "created": 1234567890,
+      "model": "Qwen/Qwen3-0.6B",
+      "data": [
+        {"index": 0, "object": "score", "score": 0.95},
+        {"index": 1, "object": "score", "score": 0.12},
+        {"index": 2, "object": "score", "score": 0.08}
+      ],
+      "usage": {"prompt_tokens": 45, "total_tokens": 48, "completion_tokens": 3}
+    }
+    ```
diff --git a/docs/serving/online_serving/openai_compatible_server.md b/docs/serving/online_serving/openai_compatible_server.md
new file mode 100644
index 000000000000..245de012bff1
--- /dev/null
+++ b/docs/serving/online_serving/openai_compatible_server.md
@@ -0,0 +1,193 @@
+# OpenAI-Compatible Server
+
+vLLM provides an HTTP server that implements OpenAI's [Completions API](https://platform.openai.com/docs/api-reference/completions), [Chat API](https://platform.openai.com/docs/api-reference/chat), and more! This functionality lets you serve models and interact with them using an HTTP client.
+
+## Supported APIs
+
+We currently support the following OpenAI APIs:
+
+- [Completions API](#completions-api) (`/v1/completions`)
+    - Only applicable to [text generation models](../../models/generative_models.md).
+    - *Note: `suffix` parameter is not supported.*
+- [Responses API](#responses-api) (`/v1/responses`)
+    - Only applicable to [text generation models](../../models/generative_models.md).
+- [Chat Completions API](#chat-api) (`/v1/chat/completions`)
+    - Only applicable to [text generation models](../../models/generative_models.md) with a [chat template](../online_serving/README.md#chat-template).
+    - *Note: `user` parameter is ignored.*
+    - *Note:* Setting the `parallel_tool_calls` parameter to `false` ensures vLLM only returns zero or one tool call per request. Setting it to `true` (the default) allows returning more than one tool call per request. There is no guarantee more than one tool call will be returned if this is set to `true`, as that behavior is model dependent and not all models are designed to support parallel tool calls.
+- [Embeddings API](../../models/pooling_models/embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
+    - Only applicable to [embedding models](../../models/pooling_models/embed.md).
+- [Transcriptions API](./speech_to_text.md#transcriptions-api) (`/v1/audio/transcriptions`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+- [Translation API](./speech_to_text.md#translations-api) (`/v1/audio/translations`)
+    - Only applicable to [Automatic Speech Recognition (ASR) models](../../models/supported_models.md#transcription).
+
+## Completions API
+
+In your terminal, you can [install](../../getting_started/installation/README.md) vLLM, then start the server with the [`vllm serve`](../../configuration/serve_args.md) command. (You can also use our [Docker](../../deployment/docker.md) image.)
+
+```bash
+vllm serve NousResearch/Meta-Llama-3-8B-Instruct \
+  --dtype auto \
+  --api-key token-abc123
+```
+
+To call the server, in your preferred text editor, create a script that uses an HTTP client. Include any messages that you want to send to the model. Then run that script. Below is an example script using the [official OpenAI Python client](https://github.com/openai/openai-python).
+
+??? code
+
+    ```python
+    from openai import OpenAI
+    client = OpenAI(
+        base_url="http://localhost:8000/v1",
+        api_key="token-abc123",
+    )
+
+    completion = client.chat.completions.create(
+        model="NousResearch/Meta-Llama-3-8B-Instruct",
+        messages=[
+            {"role": "user", "content": "Hello!"},
+        ],
+    )
+
+    print(completion.choices[0].message)
+    ```
+
+!!! tip
+    vLLM supports some parameters that are not supported by OpenAI, `top_k` for example.
+    You can pass these parameters to vLLM using the OpenAI client in the `extra_body` parameter of your requests, i.e. `extra_body={"top_k": 50}` for `top_k`.
+
+!!! important
+    By default, the server applies `generation_config.json` from the Hugging Face model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
+
+    To disable this behavior, please pass `--generation-config vllm` when launching the server.
+
+## Extra Parameters
+
+vLLM supports a set of parameters that are not part of the OpenAI API.
+In order to use them, you can pass them as extra parameters in the OpenAI client.
+Or directly merge them into the JSON payload if you are using HTTP call directly.
+
+```python
+completion = client.chat.completions.create(
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"},
+    ],
+    extra_body={
+        "structured_outputs": {"choice": ["positive", "negative"]},
+    },
+)
+```
+
+## Extra HTTP Headers
+
+Only `X-Request-Id` HTTP request header is supported for now. It can be enabled
+with `--enable-request-id-headers`.
+
+??? code
+
+    ```python
+    completion = client.chat.completions.create(
+        model="NousResearch/Meta-Llama-3-8B-Instruct",
+        messages=[
+            {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"},
+        ],
+        extra_headers={
+            "x-request-id": "sentiment-classification-00001",
+        },
+    )
+    print(completion._request_id)
+
+    completion = client.completions.create(
+        model="NousResearch/Meta-Llama-3-8B-Instruct",
+        prompt="A robot may not injure a human being",
+        extra_headers={
+            "x-request-id": "completion-test",
+        },
+    )
+    print(completion._request_id)
+    ```
+
+## API Reference
+
+### Completions API
+
+Our Completions API is compatible with [OpenAI's Completions API](https://platform.openai.com/docs/api-reference/completions);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+
+Code example: [examples/basic/online_serving/openai_completion_client.py](../../../examples/basic/online_serving/openai_completion_client.py)
+
+#### Extra parameters
+
+The following [sampling parameters](../../api/README.md#inference-parameters) are supported.
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-sampling-params"
+    ```
+
+The following extra parameters are supported:
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-extra-params"
+    ```
+
+### Chat API
+
+Our Chat API is compatible with [OpenAI's Chat Completions API](https://platform.openai.com/docs/api-reference/chat);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+
+We support both [Vision](https://platform.openai.com/docs/guides/vision)- and
+[Audio](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in)-related parameters;
+see our [Multimodal Inputs](../../features/multimodal_inputs.md) guide for more information.
+
+- *Note: `image_url.detail` parameter is not supported.*
+
+Code example: [examples/basic/online_serving/openai_chat_completion_client.py](../../../examples/basic/online_serving/openai_chat_completion_client.py)
+
+#### Extra parameters
+
+The following [sampling parameters](../../api/README.md#inference-parameters) are supported.
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-sampling-params"
+    ```
+
+The following extra parameters are supported:
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-extra-params"
+    ```
+
+### Responses API
+
+Our Responses API is compatible with [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+
+Code example: [examples/tool_calling/openai_responses_client_with_tools.py](../../../examples/tool_calling/openai_responses_client_with_tools.py)
+
+#### Extra parameters
+
+The following extra parameters in the request object are supported:
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-extra-params"
+    ```
+
+The following extra parameters in the response object are supported:
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-response-extra-params"
+    ```
diff --git a/docs/serving/online_serving/renderer.md b/docs/serving/online_serving/renderer.md
new file mode 100644
index 000000000000..9ea2f369db81
--- /dev/null
+++ b/docs/serving/online_serving/renderer.md
@@ -0,0 +1,14 @@
+# Renderer APIs
+
+Our renderer API is designed to disaggregate the render phase(preprocessing) and enable a token-in / token-out API server.
+
+- GPU-less deployment of frontend: Allow preprocessing (tokenization, MM input processing) and postprocessing (detokenization, tool call parsing, reasoning parsing) to run without GPU.
+- Disaggregated tokenization: Support use cases such as llm-d, Dynamo, and custom frontends that need to leverage vLLM's preprocessing logic without running the full inference engine.
+- Tokens-in / tokens-out engine: Make the engine a pure token-in / token-out service, decoupled from request preprocessing.
+
+## API Reference
+
+- [Completions Render API](renderer.md) (`/v1/completions/render`)
+    - Render completion requests
+- [Chat Completions Render API](renderer.md) (`/v1/chat/completions/render`)
+    - Render chat completions
diff --git a/docs/serving/online_serving/speech_to_text.md b/docs/serving/online_serving/speech_to_text.md
new file mode 100644
index 000000000000..d503923c4f9d
--- /dev/null
+++ b/docs/serving/online_serving/speech_to_text.md
@@ -0,0 +1,189 @@
+# Speech to Text APIs
+
+## Transcriptions API
+
+Our Transcriptions API is compatible with [OpenAI's Transcriptions API](https://platform.openai.com/docs/api-reference/audio/createTranscription);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+
+!!! note
+    To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
+
+Code example: [examples/speech_to_text/openai/openai_transcription_client.py](../../../examples/speech_to_text/openai/openai_transcription_client.py)
+
+NOTE: beam search is currently supported in the transcriptions endpoint for encoder-decoder multimodal models, e.g., whisper, but highly inefficient as work for handling the encoder/decoder cache is actively ongoing. This is an active point of ongoing optimization and will be handled properly in the very near future.
+
+### API Enforced Limits
+
+Set the maximum audio file size (in MB) that VLLM will accept, via the
+`VLLM_MAX_AUDIO_CLIP_FILESIZE_MB` environment variable. Default is 25 MB.
+
+### Uploading Audio Files
+
+The Transcriptions API supports uploading audio files in various formats including FLAC, MP3, MP4, MPEG, MPGA, M4A, OGG, WAV, and WEBM.
+
+**Using OpenAI Python Client:**
+
+??? code
+
+    ```python
+    from openai import OpenAI
+
+    client = OpenAI(
+        base_url="http://localhost:8000/v1",
+        api_key="token-abc123",
+    )
+
+    # Upload audio file from disk
+    with open("audio.mp3", "rb") as audio_file:
+        transcription = client.audio.transcriptions.create(
+            model="openai/whisper-large-v3-turbo",
+            file=audio_file,
+            language="en",
+            response_format="verbose_json",
+        )
+
+    print(transcription.text)
+    ```
+
+**Using curl with multipart/form-data:**
+
+??? code
+
+    ```bash
+    curl -X POST "http://localhost:8000/v1/audio/transcriptions" \
+      -H "Authorization: Bearer token-abc123" \
+      -F "file=@audio.mp3" \
+      -F "model=openai/whisper-large-v3-turbo" \
+      -F "language=en" \
+      -F "response_format=verbose_json"
+    ```
+
+**Supported Parameters:**
+
+- `file`: The audio file to transcribe (required)
+- `model`: The model to use for transcription (required)
+- `language`: The language code (e.g., "en", "zh") (optional)
+- `prompt`: Optional text to guide the transcription style (optional)
+- `response_format`: Format of the response ("json", "text") (optional)
+- `temperature`: Sampling temperature between 0 and 1 (optional)
+
+For the complete list of supported parameters including sampling parameters and vLLM extensions, see the [protocol definitions](https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L2182).
+
+**Response Format:**
+
+For `verbose_json` response format:
+
+??? code
+
+    ```json
+    {
+      "text": "Hello, this is a transcription of the audio file.",
+      "language": "en",
+      "duration": 5.42,
+      "segments": [
+        {
+          "id": 0,
+          "seek": 0,
+          "start": 0.0,
+          "end": 2.5,
+          "text": "Hello, this is a transcription",
+          "tokens": [50364, 938, 428, 307, 275, 28347],
+          "temperature": 0.0,
+          "avg_logprob": -0.245,
+          "compression_ratio": 1.235,
+          "no_speech_prob": 0.012
+        }
+      ]
+    }
+    ```
+Currently “verbose_json” response format doesn’t support no_speech_prob.
+
+### Extra Parameters
+
+The following [sampling parameters](../../api/README.md#inference-parameters) are supported.
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/speech_to_text/transcription/protocol.py:transcription-sampling-params"
+    ```
+
+The following extra parameters are supported:
+
+??? code
+
+    ```python
+    --8<-- "vllm/entrypoints/speech_to_text/transcription/protocol.py:transcription-extra-params"
+    ```
+
+## Translations API
+
+Our Translation API is compatible with [OpenAI's Translations API](https://platform.openai.com/docs/api-reference/audio/createTranslation);
+you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
+Whisper models can translate audio from one of the 55 non-English supported languages into English.
+Please mind that the popular `openai/whisper-large-v3-turbo` model does not support translating.
+
+!!! note
+    To use the Translation API, please install with extra audio dependencies using `pip install vllm[audio]`.
+
+Code example: [examples/speech_to_text/openai/openai_translation_client.py](../../../examples/speech_to_text/openai/openai_translation_client.py)
+
+### Extra Parameters
+
+The following [sampling parameters](../../api/README.md#inference-parameters) are supported.
+
+```python
+--8<-- "vllm/entrypoints/speech_to_text/translation/protocol.py:translation-sampling-params"
+```
+
+The following extra parameters are supported:
+
+```python
+--8<-- "vllm/entrypoints/speech_to_text/translation/protocol.py:translation-extra-params"
+```
+
+## Realtime API
+
+The Realtime API provides WebSocket-based streaming audio transcription, allowing real-time speech-to-text as audio is being recorded.
+
+!!! note
+    To use the Realtime API, please install with extra audio dependencies using `uv pip install vllm[audio]`.
+
+### Audio Format
+
+Audio must be sent as base64-encoded PCM16 audio at 16kHz sample rate, mono channel.
+
+### Protocol Overview
+
+1. Client connects to `ws://host/v1/realtime`
+2. Server sends `session.created` event
+3. Client optionally sends `session.update` with model/params
+4. Client sends `input_audio_buffer.commit` when ready
+5. Client sends `input_audio_buffer.append` events with base64 PCM16 chunks
+6. Server sends `transcription.delta` events with incremental text
+7. Server sends `transcription.done` with final text + usage
+8. Repeat from step 5 for next utterance
+9. Optionally, client sends input_audio_buffer.commit with final=True
+    to signal audio input is finished. Useful when streaming audio files
+
+### Client → Server Events
+
+| Event | Description |
+| ----- | ----------- |
+| `input_audio_buffer.append` | Send base64-encoded audio chunk: `{"type": "input_audio_buffer.append", "audio": "<base64>"}` |
+| `input_audio_buffer.commit` | Trigger transcription processing or end: `{"type": "input_audio_buffer.commit", "final": bool}` |
+| `session.update` | Configure session: `{"type": "session.update", "model": "model-name"}` |
+
+### Server → Client Events
+
+| Event | Description |
+| ----- | ----------- |
+| `session.created` | Connection established with session ID and timestamp |
+| `transcription.delta` | Incremental transcription text: `{"type": "transcription.delta", "delta": "text"}` |
+| `transcription.done` | Final transcription with usage stats |
+| `error` | Error notification with message and optional code |
+
+#### Example Clients
+
+- [openai_realtime_client.py](https://github.com/vllm-project/vllm/tree/main/examples/speech_to_text/realtime/openai_realtime_client.py) - Upload and transcribe an audio file
+- [openai_realtime_microphone_client.py](https://github.com/vllm-project/vllm/tree/main/examples/speech_to_text/realtime/openai_realtime_microphone_client.py) - Gradio demo for live microphone transcription
diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
deleted file mode 100644
index 157904aa8310..000000000000
--- a/docs/serving/openai_compatible_server.md
+++ /dev/null
@@ -1,496 +0,0 @@
-# OpenAI-Compatible Server
-
-vLLM provides an HTTP server that implements OpenAI's [Completions API](https://platform.openai.com/docs/api-reference/completions), [Chat API](https://platform.openai.com/docs/api-reference/chat), and more! This functionality lets you serve models and interact with them using an HTTP client.
-
-In your terminal, you can [install](../getting_started/installation/README.md) vLLM, then start the server with the [`vllm serve`](../configuration/serve_args.md) command. (You can also use our [Docker](../deployment/docker.md) image.)
-
-```bash
-vllm serve NousResearch/Meta-Llama-3-8B-Instruct \
-  --dtype auto \
-  --api-key token-abc123
-```
-
-To call the server, in your preferred text editor, create a script that uses an HTTP client. Include any messages that you want to send to the model. Then run that script. Below is an example script using the [official OpenAI Python client](https://github.com/openai/openai-python).
-
-??? code
-
-    ```python
-    from openai import OpenAI
-    client = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="token-abc123",
-    )
-
-    completion = client.chat.completions.create(
-        model="NousResearch/Meta-Llama-3-8B-Instruct",
-        messages=[
-            {"role": "user", "content": "Hello!"},
-        ],
-    )
-
-    print(completion.choices[0].message)
-    ```
-
-!!! tip
-    vLLM supports some parameters that are not supported by OpenAI, `top_k` for example.
-    You can pass these parameters to vLLM using the OpenAI client in the `extra_body` parameter of your requests, i.e. `extra_body={"top_k": 50}` for `top_k`.
-
-!!! important
-    By default, the server applies `generation_config.json` from the Hugging Face model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
-
-    To disable this behavior, please pass `--generation-config vllm` when launching the server.
-
-## Supported APIs
-
-We currently support the following OpenAI APIs:
-
-- [Completions API](#completions-api) (`/v1/completions`)
-    - Only applicable to [text generation models](../models/generative_models.md).
-    - *Note: `suffix` parameter is not supported.*
-- [Responses API](#responses-api) (`/v1/responses`)
-    - Only applicable to [text generation models](../models/generative_models.md).
-- [Chat Completions API](#chat-api) (`/v1/chat/completions`)
-    - Only applicable to [text generation models](../models/generative_models.md) with a [chat template](../serving/openai_compatible_server.md#chat-template).
-    - *Note: `user` parameter is ignored.*
-    - *Note:* Setting the `parallel_tool_calls` parameter to `false` ensures vLLM only returns zero or one tool call per request. Setting it to `true` (the default) allows returning more than one tool call per request. There is no guarantee more than one tool call will be returned if this is set to `true`, as that behavior is model dependent and not all models are designed to support parallel tool calls.
-- [Embeddings API](../models/pooling_models/embed.md#openai-compatible-embeddings-api) (`/v1/embeddings`)
-    - Only applicable to [embedding models](../models/pooling_models/embed.md).
-- [Transcriptions API](#transcriptions-api) (`/v1/audio/transcriptions`)
-    - Only applicable to [Automatic Speech Recognition (ASR) models](../models/supported_models.md#transcription).
-- [Translation API](#translations-api) (`/v1/audio/translations`)
-    - Only applicable to [Automatic Speech Recognition (ASR) models](../models/supported_models.md#transcription).
-- [Realtime API](#realtime-api) (`/v1/realtime`)
-    - Only applicable to [Automatic Speech Recognition (ASR) models](../models/supported_models.md#transcription).
-
-In addition, we have the following custom APIs:
-
-- [Tokenizer API](#tokenizer-api) (`/tokenize`, `/detokenize`)
-    - Applicable to any model with a tokenizer.
-- [pooling API](../models/pooling_models/README.md#pooling-api) (`/pooling`)
-    - Applicable to all [pooling models](../models/pooling_models/README.md).
-- [Classification API](../models/pooling_models/classify.md#classification-api) (`/classify`)
-    - Only applicable to [classification models](../models/pooling_models/classify.md).
-- [Cohere Embed API](../models/pooling_models/embed.md#cohere-embed-api) (`/v2/embed`)
-    - Compatible with [Cohere's Embed API](https://docs.cohere.com/reference/embed)
-    - Works with any [embedding model](../models/pooling_models/embed.md#supported-models), including multimodal models.
-- [Score API](../models/pooling_models/scoring.md#score-api) (`/score`)
-    - Applicable to [score models](../models/pooling_models/scoring.md).
-- [Rerank API](../models/pooling_models/scoring.md#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
-    - Implements [Jina AI's v1 rerank API](https://jina.ai/reranker/)
-    - Also compatible with [Cohere's v1 & v2 rerank APIs](https://docs.cohere.com/v2/reference/rerank)
-    - Jina and Cohere's APIs are very similar; Jina's includes extra information in the rerank endpoint's response.
-
-## Chat Template
-
-In order for the language model to support chat protocol, vLLM requires the model to include
-a chat template in its tokenizer configuration. The chat template is a Jinja2 template that
-specifies how roles, messages, and other chat-specific tokens are encoded in the input.
-
-An example chat template for `NousResearch/Meta-Llama-3-8B-Instruct` can be found [here](https://llama.com/docs/model-cards-and-prompt-formats/meta-llama-3/#prompt-template-for-meta-llama-3)
-
-Some models do not provide a chat template even though they are instruction/chat fine-tuned. For those models,
-you can manually specify their chat template in the `--chat-template` parameter with the file path to the chat
-template, or the template in string form. Without a chat template, the server will not be able to process chat
-and all chat requests will error.
-
-```bash
-vllm serve <model> --chat-template ./path-to-chat-template.jinja
-```
-
-vLLM community provides a set of chat templates for popular models. You can find them under the [examples](../../examples) directory.
-
-With the inclusion of multi-modal chat APIs, the OpenAI spec now accepts chat messages in a new format which specifies
-both a `type` and a `text` field. An example is provided below:
-
-```python
-completion = client.chat.completions.create(
-    model="NousResearch/Meta-Llama-3-8B-Instruct",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"},
-            ],
-        },
-    ],
-)
-```
-
-Most chat templates for LLMs expect the `content` field to be a string, but there are some newer models like
-`meta-llama/Llama-Guard-3-1B` that expect the content to be formatted according to the OpenAI schema in the
-request. vLLM provides best-effort support to detect this automatically, which is logged as a string like
-*"Detected the chat template content format to be..."*, and internally converts incoming requests to match
-the detected format, which can be one of:
-
-- `"string"`: A string.
-    - Example: `"Hello world"`
-- `"openai"`: A list of dictionaries, similar to OpenAI schema.
-    - Example: `[{"type": "text", "text": "Hello world!"}]`
-
-If the result is not what you expect, you can set the `--chat-template-content-format` CLI argument
-to override which format to use.
-
-## Extra Parameters
-
-vLLM supports a set of parameters that are not part of the OpenAI API.
-In order to use them, you can pass them as extra parameters in the OpenAI client.
-Or directly merge them into the JSON payload if you are using HTTP call directly.
-
-```python
-completion = client.chat.completions.create(
-    model="NousResearch/Meta-Llama-3-8B-Instruct",
-    messages=[
-        {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"},
-    ],
-    extra_body={
-        "structured_outputs": {"choice": ["positive", "negative"]},
-    },
-)
-```
-
-## Extra HTTP Headers
-
-Only `X-Request-Id` HTTP request header is supported for now. It can be enabled
-with `--enable-request-id-headers`.
-
-??? code
-
-    ```python
-    completion = client.chat.completions.create(
-        model="NousResearch/Meta-Llama-3-8B-Instruct",
-        messages=[
-            {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"},
-        ],
-        extra_headers={
-            "x-request-id": "sentiment-classification-00001",
-        },
-    )
-    print(completion._request_id)
-
-    completion = client.completions.create(
-        model="NousResearch/Meta-Llama-3-8B-Instruct",
-        prompt="A robot may not injure a human being",
-        extra_headers={
-            "x-request-id": "completion-test",
-        },
-    )
-    print(completion._request_id)
-    ```
-
-## Offline API Documentation
-
-The FastAPI `/docs` endpoint requires an internet connection by default. To enable offline access in air-gapped environments, use the `--enable-offline-docs` flag:
-
-```bash
-vllm serve NousResearch/Meta-Llama-3-8B-Instruct --enable-offline-docs
-```
-
-## API Reference
-
-### Completions API
-
-Our Completions API is compatible with [OpenAI's Completions API](https://platform.openai.com/docs/api-reference/completions);
-you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
-
-Code example: [examples/basic/online_serving/openai_completion_client.py](../../examples/basic/online_serving/openai_completion_client.py)
-
-#### Extra parameters
-
-The following [sampling parameters](../api/README.md#inference-parameters) are supported.
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-sampling-params"
-    ```
-
-The following extra parameters are supported:
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-extra-params"
-    ```
-
-### Chat API
-
-Our Chat API is compatible with [OpenAI's Chat Completions API](https://platform.openai.com/docs/api-reference/chat);
-you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
-
-We support both [Vision](https://platform.openai.com/docs/guides/vision)- and
-[Audio](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in)-related parameters;
-see our [Multimodal Inputs](../features/multimodal_inputs.md) guide for more information.
-
-- *Note: `image_url.detail` parameter is not supported.*
-
-Code example: [examples/basic/online_serving/openai_chat_completion_client.py](../../examples/basic/online_serving/openai_chat_completion_client.py)
-
-#### Extra parameters
-
-The following [sampling parameters](../api/README.md#inference-parameters) are supported.
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-sampling-params"
-    ```
-
-The following extra parameters are supported:
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-extra-params"
-    ```
-
-### Responses API
-
-Our Responses API is compatible with [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses);
-you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
-
-Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/online_serving/openai_responses_client_with_tools.py)
-
-#### Extra parameters
-
-The following extra parameters in the request object are supported:
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-extra-params"
-    ```
-
-The following extra parameters in the response object are supported:
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-response-extra-params"
-    ```
-
-### Transcriptions API
-
-Our Transcriptions API is compatible with [OpenAI's Transcriptions API](https://platform.openai.com/docs/api-reference/audio/createTranscription);
-you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
-
-!!! note
-    To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
-
-Code example: [examples/online_serving/openai_transcription_client.py](../../examples/online_serving/openai_transcription_client.py)
-
-NOTE: beam search is currently supported in the transcriptions endpoint for encoder-decoder multimodal models, e.g., whisper, but highly inefficient as work for handling the encoder/decoder cache is actively ongoing. This is an active point of ongoing optimization and will be handled properly in the very near future.
-
-#### API Enforced Limits
-
-Set the maximum audio file size (in MB) that VLLM will accept, via the
-`VLLM_MAX_AUDIO_CLIP_FILESIZE_MB` environment variable. Default is 25 MB.
-
-#### Uploading Audio Files
-
-The Transcriptions API supports uploading audio files in various formats including FLAC, MP3, MP4, MPEG, MPGA, M4A, OGG, WAV, and WEBM.
-
-**Using OpenAI Python Client:**
-
-??? code
-
-    ```python
-    from openai import OpenAI
-
-    client = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="token-abc123",
-    )
-
-    # Upload audio file from disk
-    with open("audio.mp3", "rb") as audio_file:
-        transcription = client.audio.transcriptions.create(
-            model="openai/whisper-large-v3-turbo",
-            file=audio_file,
-            language="en",
-            response_format="verbose_json",
-        )
-
-    print(transcription.text)
-    ```
-
-**Using curl with multipart/form-data:**
-
-??? code
-
-    ```bash
-    curl -X POST "http://localhost:8000/v1/audio/transcriptions" \
-      -H "Authorization: Bearer token-abc123" \
-      -F "file=@audio.mp3" \
-      -F "model=openai/whisper-large-v3-turbo" \
-      -F "language=en" \
-      -F "response_format=verbose_json"
-    ```
-
-**Supported Parameters:**
-
-- `file`: The audio file to transcribe (required)
-- `model`: The model to use for transcription (required)
-- `language`: The language code (e.g., "en", "zh") (optional)
-- `prompt`: Optional text to guide the transcription style (optional)
-- `response_format`: Format of the response ("json", "text") (optional)
-- `temperature`: Sampling temperature between 0 and 1 (optional)
-
-For the complete list of supported parameters including sampling parameters and vLLM extensions, see the [protocol definitions](https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L2182).
-
-**Response Format:**
-
-For `verbose_json` response format:
-
-??? code
-
-    ```json
-    {
-      "text": "Hello, this is a transcription of the audio file.",
-      "language": "en",
-      "duration": 5.42,
-      "segments": [
-        {
-          "id": 0,
-          "seek": 0,
-          "start": 0.0,
-          "end": 2.5,
-          "text": "Hello, this is a transcription",
-          "tokens": [50364, 938, 428, 307, 275, 28347],
-          "temperature": 0.0,
-          "avg_logprob": -0.245,
-          "compression_ratio": 1.235,
-          "no_speech_prob": 0.012
-        }
-      ]
-    }
-    ```
-Currently “verbose_json” response format doesn’t support no_speech_prob.
-
-#### Extra Parameters
-
-The following [sampling parameters](../api/README.md#inference-parameters) are supported.
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/speech_to_text/protocol.py:transcription-sampling-params"
-    ```
-
-The following extra parameters are supported:
-
-??? code
-
-    ```python
-    --8<-- "vllm/entrypoints/openai/speech_to_text/protocol.py:transcription-extra-params"
-    ```
-
-### Translations API
-
-Our Translation API is compatible with [OpenAI's Translations API](https://platform.openai.com/docs/api-reference/audio/createTranslation);
-you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
-Whisper models can translate audio from one of the 55 non-English supported languages into English.
-Please mind that the popular `openai/whisper-large-v3-turbo` model does not support translating.
-
-!!! note
-    To use the Translation API, please install with extra audio dependencies using `pip install vllm[audio]`.
-
-Code example: [examples/online_serving/openai_translation_client.py](../../examples/online_serving/openai_translation_client.py)
-
-#### Extra Parameters
-
-The following [sampling parameters](../api/README.md#inference-parameters) are supported.
-
-```python
---8<-- "vllm/entrypoints/openai/speech_to_text/protocol.py:translation-sampling-params"
-```
-
-The following extra parameters are supported:
-
-```python
---8<-- "vllm/entrypoints/openai/speech_to_text/protocol.py:translation-extra-params"
-```
-
-### Realtime API
-
-The Realtime API provides WebSocket-based streaming audio transcription, allowing real-time speech-to-text as audio is being recorded.
-
-!!! note
-    To use the Realtime API, please install with extra audio dependencies using `uv pip install vllm[audio]`.
-
-#### Audio Format
-
-Audio must be sent as base64-encoded PCM16 audio at 16kHz sample rate, mono channel.
-
-#### Protocol Overview
-
-1. Client connects to `ws://host/v1/realtime`
-2. Server sends `session.created` event
-3. Client optionally sends `session.update` with model/params
-4. Client sends `input_audio_buffer.commit` when ready
-5. Client sends `input_audio_buffer.append` events with base64 PCM16 chunks
-6. Server sends `transcription.delta` events with incremental text
-7. Server sends `transcription.done` with final text + usage
-8. Repeat from step 5 for next utterance
-9. Optionally, client sends input_audio_buffer.commit with final=True
-    to signal audio input is finished. Useful when streaming audio files
-
-#### Client → Server Events
-
-| Event | Description |
-| ----- | ----------- |
-| `input_audio_buffer.append` | Send base64-encoded audio chunk: `{"type": "input_audio_buffer.append", "audio": "<base64>"}` |
-| `input_audio_buffer.commit` | Trigger transcription processing or end: `{"type": "input_audio_buffer.commit", "final": bool}` |
-| `session.update` | Configure session: `{"type": "session.update", "model": "model-name"}` |
-
-#### Server → Client Events
-
-| Event | Description |
-| ----- | ----------- |
-| `session.created` | Connection established with session ID and timestamp |
-| `transcription.delta` | Incremental transcription text: `{"type": "transcription.delta", "delta": "text"}` |
-| `transcription.done` | Final transcription with usage stats |
-| `error` | Error notification with message and optional code |
-
-#### Example Clients
-
-- [openai_realtime_client.py](https://github.com/vllm-project/vllm/tree/main/examples/online_serving/openai_realtime_client.py) - Upload and transcribe an audio file
-- [openai_realtime_microphone_client.py](https://github.com/vllm-project/vllm/tree/main/examples/online_serving/openai_realtime_microphone_client.py) - Gradio demo for live microphone transcription
-
-### Tokenizer API
-
-Our Tokenizer API is a simple wrapper over [HuggingFace-style tokenizers](https://huggingface.co/docs/transformers/en/main_classes/tokenizer).
-It consists of two endpoints:
-
-- `/tokenize` corresponds to calling `tokenizer.encode()`.
-- `/detokenize` corresponds to calling `tokenizer.decode()`.
-
-### Score API
-
-#### Score Template
-
-Some scoring models require a specific prompt format to work correctly. You can specify a custom score template using the `--chat-template` parameter (see [Chat Template](#chat-template)).
-
-Score templates are supported for **cross-encoder** models only. If you are using an **embedding** model for scoring, vLLM does not apply a score template.
-
-Like chat templates, the score template receives a `messages` list. For scoring, each message has a `role` attribute—either `"query"` or `"document"`. For the usual kind of point-wise cross-encoder, you can expect exactly two messages: one query and one document. To access the query and document content, use Jinja's `selectattr` filter:
-
-- **Query**: `{{ (messages | selectattr("role", "eq", "query") | first).content }}`
-- **Document**: `{{ (messages | selectattr("role", "eq", "document") | first).content }}`
-
-This approach is more robust than index-based access (`messages[0]`, `messages[1]`) because it selects messages by their semantic role. It also avoids assumptions about message ordering if additional message types are added to `messages` in the future.
-
-Example template file: [examples/pooling/score/template/nemotron-rerank.jinja](../../examples/pooling/score/template/nemotron-rerank.jinja)
-
-## Ray Serve LLM
-
-Ray Serve LLM enables scalable, production-grade serving of the vLLM engine. It integrates tightly with vLLM and extends it with features such as auto-scaling, load balancing, and back-pressure.
-
-Key capabilities:
-
-- Exposes an OpenAI-compatible HTTP API as well as a Pythonic API.
-- Scales from a single GPU to a multi-node cluster without code changes.
-- Provides observability and autoscaling policies through Ray dashboards and metrics.
-
-The following example shows how to deploy a large model like DeepSeek R1 with Ray Serve LLM: [examples/online_serving/ray_serve_deepseek.py](../../examples/online_serving/ray_serve_deepseek.py).
-
-Learn more about Ray Serve LLM with the official [Ray Serve LLM documentation](https://docs.ray.io/en/latest/serve/llm/index.html).
diff --git a/docs/serving/parallelism_scaling.md b/docs/serving/parallelism_scaling.md
index b69ca17e8334..0f86a256727c 100644
--- a/docs/serving/parallelism_scaling.md
+++ b/docs/serving/parallelism_scaling.md
@@ -78,7 +78,7 @@ For details, see the [Ray documentation](https://docs.ray.io/en/latest/index.htm
 
 ### Ray cluster setup with containers
 
-The helper script [examples/online_serving/run_cluster.sh](../../examples/online_serving/run_cluster.sh) starts containers across nodes and initializes Ray. By default, the script runs Docker without administrative privileges, which prevents access to the GPU performance counters when profiling or tracing. To enable admin privileges, add the `--cap-add=CAP_SYS_ADMIN` flag to the Docker command.
+The helper script [examples/ray_serving/run_cluster.sh](../../examples/ray_serving/run_cluster.sh) starts containers across nodes and initializes Ray. By default, the script runs Docker without administrative privileges, which prevents access to the GPU performance counters when profiling or tracing. To enable admin privileges, add the `--cap-add=CAP_SYS_ADMIN` flag to the Docker command.
 
 Choose one node as the head node and run:
 
@@ -162,7 +162,7 @@ vllm serve /path/to/the/model/in/the/container \
 
 Efficient tensor parallelism requires fast internode communication, preferably through high-speed network adapters such as InfiniBand.
 To set up the cluster to use InfiniBand, append additional arguments like `--privileged -e NCCL_IB_HCA=mlx5` to the
-[examples/online_serving/run_cluster.sh](../../examples/online_serving/run_cluster.sh) helper script.
+[examples/ray_serving/run_cluster.sh](../../examples/ray_serving/run_cluster.sh) helper script.
 Contact your system administrator for more information about the required flags.
 
 ## Enabling GPUDirect RDMA
diff --git a/docs/training/async_rl.md b/docs/training/async_rl.md
index 172466f89039..d3be23fe698d 100644
--- a/docs/training/async_rl.md
+++ b/docs/training/async_rl.md
@@ -60,4 +60,4 @@ The key insight is that requests paused with `mode="keep"` will produce tokens f
 
 ## Example
 
-The [async RLHF example](../examples/rl/rlhf_async_new_apis.md) demonstrates this pattern with `vllm.AsyncLLMEngine`, NCCL weight transfer, and mid-flight pause/resume with validation.
+The [async RLHF example](../../examples/rl/rlhf_async_new_apis.py) demonstrates this pattern with `vllm.AsyncLLMEngine`, NCCL weight transfer, and mid-flight pause/resume with validation.
diff --git a/docs/training/layerwise.md b/docs/training/layerwise.md
new file mode 100644
index 000000000000..d304c4a8425d
--- /dev/null
+++ b/docs/training/layerwise.md
@@ -0,0 +1,146 @@
+# What is Layerwise (Re)loading?
+
+Layerwise reloading is the system used to handle the loading of new weight data into existing weight data destinations without triggering recompilation of the cuda graph and other runtime artifacts. This system is used to enable [QeRL](https://arxiv.org/pdf/2510.11696)-style post training flows, where full-precision trainer weights are quantized and loaded into a target vLLM instance for fast, high-exploration rollouts. The core implementation can be found in [layerwise.py](../../vllm/model_executor/model_loader/reload/layerwise.py).
+
+![Layerwise](../assets/training/layerwise.png)
+
+## Layerwise Reloading for QeRL
+
+In order to load new weights into existing weight data destinations, a weight must undergo the following operations:
+
+- Transfer: weights must be transferred from trainer model to target node/device
+- Fuse: weight partitions must be fused, for example qkv/gate_up
+- Process: this typically means online quantization and kernel-specific padding or striding
+- Shard: weights must be sharded according to the selected parallelism strategy
+- Copy: weights must be copied into the existing weight data destinations
+
+Layerwise reloading achieves this using the following steps:
+
+1. Weights are **transferred** from the trainer to the target (see [weight_transfer](weight_transfer/README.md))
+2. Weights loaded via `model.load_weights`, during which they are **sharded** and **fused**
+3. Weights are **processed** in an online fashion as soon as all of a layer's weights are loaded
+4. Weights are **copied** into the existing weight data destinations
+
+For more information on implementation, see [Low Level `layerwise` API](#low-level-layerwise-api).
+
+## Layerwise Loading with Online Quantization
+
+Online quantization refers to when a user provides full precision weights and those weights are quantized on-the-fly as they are loaded into the model. The layerwise reloading system handles this by treating online quantization as a **processing** step, which is then handled in an online way both during first-time load and during reload. A typical online quantization method implementation should look like this:
+
+```python
+class Fp8OnlineLinearMethod(Fp8LinearMethod):
+    """Online version of Fp8LinearMethod which loads a full precision checkpoint
+    and quantizes weights during loading."""
+
+    uses_meta_device: bool = True
+
+    def create_weights(self, layer: torch.nn.Module, ...):
+        # weight is materialized and processed during loading
+        layer.weight = ModelWeightParameter(
+            data=torch.empty(..., device="meta"),
+            weight_loader=weight_loader,
+        )
+
+        # set up online processing
+        initialize_online_processing(layer)
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        layer.weight, layer.weight_scale = ops.scaled_fp8_quant(layer.weight)
+
+        # Prevent duplicate processing (e.g., during weight reload)
+        layer._already_called_process_weights_after_loading = True
+```
+
+## Example Usages
+
+### High Level Weight Transfer API
+
+The layerwise reloading system is integrated with the post-training weight transfer system. To use layerwise reloading in conjunction to the weight transfer system, follow the examples found [here](../../examples/rl/). Layerwise reloading is controlled by the `WeightTransferUpdateInfo.is_checkpoint_format` flag and is set to `True` by default.
+
+### Mid Level `reload_weights` API
+
+Layerwise reloading is also exposed via the `reload_weights` API. This interface can be called using the following code:
+
+```python
+from vllm import LLM
+
+llm = LLM("Qwen/Qwen3-0.6B")
+llm.collective_rpc("reload_weights")
+```
+
+This interface also allows specifying a `weights_path` which can be used to select a checkpoint path to load from:
+
+```python
+from vllm import LLM
+
+# fine tuned model checkpoints for testing
+mul_path = "inference-optimization/Qwen3-0.6B-debug-multiply"
+add_path = "inference-optimization/Qwen3-0.6B-debug-add"
+
+llm = LLM("Qwen/Qwen3-0.6B")
+llm.collective_rpc("reload_weights", kwargs={"weights_path": mul_path})
+llm.generate("3 4 = ")  # 12
+
+llm.collective_rpc("reload_weights", kwargs={"weights_path": add_path})
+llm.generate("3 4 = ")  # 7
+```
+
+Finally, a `weights_iterator` can be provided directly. This iterator can be lazy or eagerly defined.
+
+```python
+from vllm import LLM
+
+weights_iterator = [("q_proj", ...), ("k_proj", ...), ...]
+
+llm = LLM("Qwen/Qwen3-0.6B")
+llm.collective_rpc("reload_weights", kwargs={"weights_iterator": weights_iterator})
+```
+
+### Low Level `layerwise` API
+
+[layerwise.py](../../vllm/model_executor/model_loader/reload/layerwise.py) Implements the following functions to execute its lifecycle:
+
+| Function | Purpose | Quantized Reload | Online Quantization |
+| - | - | - | - |
+| `record_metadata_for_reloading` | Record tensor metadata so that layers can be restored on the meta device | Called by `BaseModelLoader` | Called by `BaseModelLoader` |
+| `restore_layer_on_meta` | Restore layer to model format at start of reload | Called by `initialize_layerwise_reload` | Not called. Online quantized weights already start on meta device via `...OnlineLinearMethod.create_weights` |
+| `initialize_online_processing` | Wrap weight loaders with the `online_process_loader` wrapper, which buffers weights until all layer weights have been loaded | Called by `initialize_layerwise_reload` | Called by `...OnlineLinearMethod.create_weights` |
+| `_layerwise_process` | Process layer once all weights are loaded | Called by `online_process_loader` during loading | Called by `online_process_loader` during loading |
+| `_copy_and_restore_kernel_tensors` | Copy processed weights into original tensor locations to affect compiled cuda graphs, etc. | Called by `_layerwise_process` after `process_weights_after_loading` | Not called. There is no compiled cuda graph yet |
+| `finalize_layerwise_processing` | Catch any layers which did not load all weights (for example attention weights or weights with padding) | Called by `BaseModelLoader` | Called by `BaseModelLoader` |
+
+You can plug into this lifecycle directly by calling the `initialize_layerwise_reload`, loading weights, then calling `finalize_layerwise_processing`:
+
+```python
+from vllm import LLM
+from vllm.model_executor.model_loader.reload import initialize_layerwise_reload, finalize_layerwise_processing
+
+llm = LLM("Qwen/Qwen3-0.6B")
+
+# this model path requires `VLLM_ENABLE_V1_MULTIPROCESSING=0` and is not stable
+model = llm.llm_engine.engine_core.engine_core.model_executor.driver_worker.worker.get_model()
+
+# layerwise reload
+initialize_layerwise_reload(model)
+model.load_weights(...)
+finalize_layerwise_processing(model, llm.model_config)
+```
+
+## Troubleshooting Excessive Memory Usage
+
+Layerwise reloading allows users to incrementally load and process weights as they are loaded into the model. This system relies on buffering layer weights on device until all weights of a layer have been loaded. However, without offloading, this approach necessarily causes excessive buffering if weights are loaded out of order.
+
+For this reason, users must take care as to the order of weights when they are reloading into the model. Weight should be loaded "in order", meaning that each layer's weights are fully loaded before beginning to load the next layer's weights. "Out of order" loading can cause layer weights to stay buffered while other layer weights are loading, leading to excessive memory usage. In the example below, q_proj, k_proj, v_proj, and up_proj are all buffered at the same time, using more memory than if up_proj was loaded after q_proj, k_proj and v_proj.
+
+| Correct Loading | Incorrect Loading |
+| - | - |
+| ![Layerwise](../assets/training/layerwise_good_loading.png) | ![Layerwise](../assets/training/layerwise_bad_loading.png) |
+
+Users will see a warning like the one below if weights are loaded out-of-order.
+
+```console
+WARNING [layerwise.py:198] Allocating 28.5 MB of device memory to buffers to load ["QKVParallelLinear", "MergedColumnParallelLinear"] layers. This extra memory usage can be avoided by ordering weights by their parent layer when reloading.
+```
diff --git a/docs/training/weight_transfer/README.md b/docs/training/weight_transfer/README.md
index 17afd2bc8965..4368483e8ba2 100644
--- a/docs/training/weight_transfer/README.md
+++ b/docs/training/weight_transfer/README.md
@@ -4,10 +4,12 @@ vLLM provides a pluggable weight transfer system for synchronizing model weights
 
 ## Architecture
 
-The weight transfer system follows a **two-phase protocol** with a pluggable backend design:
+The weight transfer system follows a **four-phase protocol** with a pluggable backend design:
 
 1. **Initialization** (`init_weight_transfer_engine`): Establishes the communication channel between the trainer and inference workers. Called once before the training loop begins.
-2. **Weight Update** (`update_weights`): Transfers updated weights from the trainer to the inference engine. Called after each training step (or batch of steps).
+2. **Start** (`start_weight_update`): Prepares the inference engine for a weight update.
+3. **Weight Update** (`update_weights`): Transfers updated weights from the trainer to the inference engine. May be called one or more times (e.g., for chunked transfers).
+4. **Finish** (`finish_weight_update`): Finalizes the weight update (e.g., runs post-processing for checkpoint-format weights). Called once after all weights have been transferred.
 
 ## Available Backends
 
@@ -48,7 +50,9 @@ When running vLLM as an HTTP server, the following endpoints are available for w
 | Endpoint | Method | Description |
 | -------- | ------ | ----------- |
 | `/init_weight_transfer_engine` | POST | Initialize the weight transfer engine with backend-specific info |
-| `/update_weights` | POST | Trigger a weight update with backend-specific metadata |
+| `/start_weight_update` | POST | Start a weight update |
+| `/update_weights` | POST | Transfer a batch of weights with backend-specific metadata |
+| `/finish_weight_update` | POST | Finish the weight update and run post-processing |
 | `/pause` | POST | Pause generation before weight sync to handle inflight requests |
 | `/resume` | POST | Resume generation after weight sync |
 | `/get_world_size` | GET | Get the number of inference workers (useful for NCCL world size calculation) |
@@ -64,11 +68,17 @@ Both backends provide static methods that the trainer calls to send weights. The
 # 1. Initialize the transfer engine (backend-specific)
 EngineClass.trainer_init(init_info)
 
-# 2. Send weights to inference workers
+# 2. Start weight update on inference side
+llm.start_weight_update(is_checkpoint_format=True)
+
+# 3. Send weights to inference workers
 EngineClass.trainer_send_weights(
     iterator=model.named_parameters(),
     trainer_args=backend_specific_args,
 )
+
+# 4. Finish weight update on inference side
+llm.finish_weight_update()
 ```
 
 See the [NCCL](nccl.md) and [IPC](ipc.md) pages for backend-specific trainer APIs and full examples.
diff --git a/docs/training/weight_transfer/base.md b/docs/training/weight_transfer/base.md
index 973ec8ad9f55..6c768c87fd90 100644
--- a/docs/training/weight_transfer/base.md
+++ b/docs/training/weight_transfer/base.md
@@ -43,16 +43,14 @@ update_request = WeightTransferUpdateRequest(
 
 ### WeightTransferUpdateInfo
 
-The base `WeightTransferUpdateInfo` includes an `is_checkpoint_format` flag:
+The base `WeightTransferUpdateInfo` is a marker class for backend-specific update info:
 
 ```python
 @dataclass
 class WeightTransferUpdateInfo(ABC):
-    is_checkpoint_format: bool = True
+    pass
 ```
 
-When `is_checkpoint_format=True` (the default), vLLM applies layerwise weight processing (repacking, renaming, etc.) on the received weights before loading them. Set to `False` if the trainer has already converted weights to the kernel format expected by the model.
-
 ## Implementing a Custom Engine
 
 To create a custom weight transfer backend:
diff --git a/docs/training/weight_transfer/ipc.md b/docs/training/weight_transfer/ipc.md
index 8e19fa7b429b..21fc8ad70da6 100644
--- a/docs/training/weight_transfer/ipc.md
+++ b/docs/training/weight_transfer/ipc.md
@@ -1,21 +1,37 @@
 # IPC Engine
 
-The IPC weight transfer engine uses **CUDA IPC** (Inter-Process Communication) handles to share GPU memory directly between the trainer and inference workers on the **same node and same GPU**. This avoids any data copying, making it a efficient option when colocating training and inference.
+The IPC weight transfer engine uses **CUDA IPC** (Inter-Process Communication) handles to share GPU memory directly between the trainer and inference workers on the **same GPU**. This avoids any data copying, making it the most efficient option when colocating training and inference. Multi-GPU setups are supported — weights are all gathered by each GPU and are extracted by the correct colocated process.
 
 ## When to Use IPC
 
-- Training and inference on the **same GPU** (colocated)
-- You want to minimize memory overhead by sharing tensors in-place
+- Training and inference share the **same GPU(s)** (colocated)
 
 ## How It Works
 
-1. The trainer creates CUDA tensors for each weight and generates IPC handles using `torch.multiprocessing.reductions.reduce_tensor`.
-2. IPC handles are sent to the inference engine via **Ray.remote()** or **HTTP POST**.
-3. The inference worker reconstructs the tensors from the handles, reading directly from the trainer's GPU memory.
+1. The trainer creates CUDA tensors for each weight and generates IPC handles using `torch.multiprocessing.reductions.reduce_tensor`. In multi-GPU setups (e.g. FSDP), each trainer rank must all-gather the full tensor for each layer onto its own GPU before generating the IPC handle.
+2. IPC handles for each gpu are sent to the inference engine via **Ray**, **HTTP**, or a **custom callable**. Each rank only reads the handle corresponding to its own GPU.
+3. The inference worker reconstructs the tensors from the handles using `rebuild_cuda_tensor`, reading directly from the trainer's GPU memory.
 
 !!! warning
     IPC handles involve sending serialized Python objects. When using HTTP transport, you must set `VLLM_ALLOW_INSECURE_SERIALIZATION=1` on both the server and client. This is because IPC handles are pickled and base64-encoded for HTTP transmission.
 
+## Packed (Chunked) Transfer
+
+By default, all weights are sent in a single API call. For large models, this requires the full model to reside in GPU memory on both sides simultaneously. Setting `packed=True` enables **chunked transfer** with bounded GPU memory:
+
+- Weights are concatenated into fixed-size packed buffers (controlled by `packed_buffer_size_bytes`).
+- Each chunk is sent as a separate `update_weights` call within a single `start_weight_update` / `finish_weight_update` bracket, so the layerwise reload pass is initialized once at the start and finalized once at the end regardless of chunk count.
+- After each chunk is consumed, the GPU memory for that chunk can be reclaimed.
+
+```python
+trainer_args = IPCTrainerSendWeightsArgs(
+    send_mode="ray",
+    llm_handle=llm_actor_handle,
+    packed=True,
+    packed_buffer_size_bytes=256 * 1024 * 1024,  # 256 MB chunks
+)
+```
+
 ## Initialization
 
 The IPC backend requires no initialization on either side. The `init_transfer_engine` call is a no-op for IPC.
@@ -35,14 +51,18 @@ from vllm.distributed.weight_transfer.ipc_engine import (
 )
 
 trainer_args = IPCTrainerSendWeightsArgs(
-    mode="ray",
+    send_mode="ray",
     llm_handle=llm_actor_handle,
 )
-
+# start
+ray.get(llm_actor_handle.start_weight_update.remote(is_checkpoint_format=True))
+# send weights
 IPCWeightTransferEngine.trainer_send_weights(
     iterator=model.named_parameters(),
     trainer_args=trainer_args,
 )
+# finish
+ray.get(llm_actor_handle.finish_weight_update.remote())
 ```
 
 In Ray mode, the engine calls `llm_handle.update_weights.remote(...)` directly, passing the IPC handles via Ray's serialization.
@@ -53,21 +73,46 @@ Used when vLLM is running as an HTTP server:
 
 ```python
 trainer_args = IPCTrainerSendWeightsArgs(
-    mode="http",
+    send_mode="http",
     url="http://localhost:8000",
 )
 
+# start
+base_url = "http://localhost:8000"
+url = f"{base_url}/start_weight_update"
+response = requests.post(url, json={"is_checkpoint_format": True}, timeout=60)
+response.raise_for_status()
+# send weights
 IPCWeightTransferEngine.trainer_send_weights(
     iterator=model.named_parameters(),
     trainer_args=trainer_args,
 )
+# finish
+url = f"{base_url}/finish_weight_update"
+response = requests.post(url, json={}, timeout=60)
+response.raise_for_status()
 ```
 
-In HTTP mode, IPC handles are pickled, base64-encoded, and sent as JSON to the `/update_weights` endpoint.
+In HTTP mode, IPC handles are pickled, base64-encoded, and sent as JSON to the `/update_weights` endpoint. Because the worker deserializes the payload via `pickle.loads`, the vLLM server must be started with `VLLM_ALLOW_INSECURE_SERIALIZATION=1`.
+
+```python
+def my_custom_sender(update_info: IPCWeightTransferUpdateInfo):
+    # Custom logic to deliver update_info to vLLM
+    ...
+
+trainer_args = IPCTrainerSendWeightsArgs(
+    send_mode=my_custom_sender,
+)
+
+IPCWeightTransferEngine.trainer_send_weights(
+    iterator=model.named_parameters(),
+    trainer_args=trainer_args,
+)
+```
 
 See [`IPCTrainerSendWeightsArgs`](https://github.com/vllm-project/vllm/blob/main/vllm/distributed/weight_transfer/ipc_engine.py) for the full list of configurable fields.
 
 ## Examples
 
-- [RLHF with IPC weight syncing (offline, Ray)](../../examples/rl/rlhf_ipc.md) - Colocated training and inference on a single GPU using Ray placement groups and CUDA IPC handles
-- [RLHF with IPC weight syncing (online serving, HTTP)](../../examples/rl/rlhf_http_ipc.md) - Weight transfer with a vLLM HTTP server where both server and trainer share the same GPU
+- [RLHF with IPC weight syncing (offline, Ray)](../../../examples/rl/rlhf_ipc.py) - Colocated training and inference on a single GPU using Ray placement groups and CUDA IPC handles
+- [RLHF with IPC weight syncing (online serving, HTTP)](../../../examples/rl/rlhf_http_ipc.py) - Weight transfer with a vLLM HTTP server where both server and trainer share the same GPU
diff --git a/docs/training/weight_transfer/nccl.md b/docs/training/weight_transfer/nccl.md
index a50b3664d89d..bfde1ee2ae37 100644
--- a/docs/training/weight_transfer/nccl.md
+++ b/docs/training/weight_transfer/nccl.md
@@ -84,11 +84,15 @@ Both the trainer (`NCCLTrainerSendWeightsArgs`) and inference side (`NCCLWeightT
 
 ## Receiving Weights (Inference Side)
 
-The inference side triggers weight reception by calling `update_weights`:
+The inference side triggers weight reception using the four-phase protocol — `init_weight_transfer_engine`, `start_weight_update`, `update_weights`, `finish_weight_update`. The init phase is shown [above](#initialization). The remaining three steps are:
 
 ```python
 from vllm.distributed.weight_transfer.base import WeightTransferUpdateRequest
 
+# 1. Start the weight update
+llm.start_weight_update(is_checkpoint_format=True)
+
+# 2. Receive weights (can be called multiple times for chunked transfers)
 llm.update_weights(
     WeightTransferUpdateRequest(
         update_info=dict(
@@ -99,12 +103,17 @@ llm.update_weights(
         )
     )
 )
+
+# 3. Finish the weight update
+llm.finish_weight_update()
 ```
 
 The `names`, `dtype_names`, and `shapes` lists describe each parameter. These must match the order in which the trainer iterates over its parameters.
 
+`start_weight_update` must be called before `update_weights`, and `finish_weight_update` must be called after all weight chunks have been transferred. The `is_checkpoint_format` flag controls whether layerwise reload processing is applied (`True` for checkpoint-format weights, `False` for pre-processed kernel-format weights).
+
 ## Examples
 
-- [RLHF with NCCL weight syncing (offline, Ray)](../../examples/rl/rlhf_nccl.md) - Trainer on one GPU, 2x tensor-parallel vLLM engine on two others, with packed NCCL weight broadcast
-- [RLHF with async weight syncing (offline, Ray)](../../examples/rl/rlhf_async_new_apis.md) - Async generation with mid-flight pause, weight sync, resume, and validation against a fresh model
-- [RLHF with NCCL weight syncing (online serving, HTTP)](../../examples/rl/rlhf_http_nccl.md) - Weight transfer with a running vLLM HTTP server using HTTP control plane and NCCL data plane
+- [RLHF with NCCL weight syncing (offline, Ray)](../../../examples/rl/rlhf_nccl.py) - Trainer on one GPU, 2x tensor-parallel vLLM engine on two others, with packed NCCL weight broadcast
+- [RLHF with async weight syncing (offline, Ray)](../../../examples/rl/rlhf_async_new_apis.py) - Async generation with mid-flight pause, weight sync, resume, and validation against a fresh model
+- [RLHF with NCCL weight syncing (online serving, HTTP)](../../../examples/rl/rlhf_http_nccl.py) - Weight transfer with a running vLLM HTTP server using HTTP control plane and NCCL data plane
diff --git a/docs/usage/reproducibility.md b/docs/usage/reproducibility.md
index a8e49d0a3398..680791bbe24a 100644
--- a/docs/usage/reproducibility.md
+++ b/docs/usage/reproducibility.md
@@ -7,7 +7,7 @@ reproducible results:
   or enable [batch invariance](../features/batch_invariance.md) to make the outputs insensitive to scheduling.
 - In online mode, you can only enable [batch invariance](../features/batch_invariance.md).
 
-Example: [examples/offline_inference/reproducibility.py](../../examples/offline_inference/reproducibility.py)
+Example: [examples/features/batch_invariance/reproducibility_offline.py](../../examples/features/batch_invariance/reproducibility_offline.py)
 
 !!! warning
 
diff --git a/docs/usage/security.md b/docs/usage/security.md
index b126d2a1ec7f..1cc91c3a8a90 100644
--- a/docs/usage/security.md
+++ b/docs/usage/security.md
@@ -66,6 +66,10 @@ Restrict domains that vLLM can access for media URLs by setting
 `--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
 (e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)
 
+This protection applies to both the online serving API (multimodal inputs) and
+the **batch runner** (`vllm run-batch`), where `file_url` values in batch
+transcription/translation requests are validated against the same allowlist.
+
 Without domain restrictions, a malicious user could supply URLs that:
 
 - **Target internal services**: Access internal network endpoints, cloud metadata
@@ -124,7 +128,7 @@ firewall configuration instructions.
 
 ### Overview
 
-The `--api-key` flag (or `VLLM_API_KEY` environment variable) provides authentication for vLLM's HTTP server, but **only for OpenAI-compatible API endpoints under the `/v1` path prefix**. Many other sensitive endpoints are exposed on the same HTTP server without any authentication enforcement.
+The `--api-key` flag (or `VLLM_API_KEY` environment variable) provides authentication for vLLM's HTTP server, but **only for OpenAI-compatible API endpoints under the `/v1` path prefix**, and other similar `/v2`, `/inference` path prefix**. Many other sensitive endpoints are exposed on the same HTTP server without any authentication enforcement.
 
 **Important:** Do not rely exclusively on `--api-key` for securing access to vLLM. Additional security measures are required for production deployments.
 
@@ -134,14 +138,25 @@ When `--api-key` is configured, the following `/v1` endpoints require Bearer tok
 
 - `/v1/models` - List available models
 - `/v1/chat/completions` - Chat completions
+- `/v1/chat/completions/batch` - Batch chat completions
+- `/v1/chat/completions/render` - Render chat completion requests
 - `/v1/completions` - Text completions
+- `/v1/completions/render` - Render completion requests
 - `/v1/embeddings` - Generate embeddings
 - `/v1/audio/transcriptions` - Audio transcription
 - `/v1/audio/translations` - Audio translation
 - `/v1/messages` - Anthropic-compatible messages API
-- `/v1/responses` - Response management
+- `/v1/messages/count_tokens` - Count tokens for Anthropic messages
+- `/v1/responses` - Create a response
+- `/v1/responses/{response_id}` - Retrieve a response
+- `/v1/responses/{response_id}/cancel` - Cancel a response
 - `/v1/score` - Scoring API
 - `/v1/rerank` - Reranking API
+- `/v1/load_lora_adapter` - Load a LoRA adapter (can alter model behavior; only available when `--enable-lora` is set and `VLLM_ALLOW_RUNTIME_LORA_UPDATING=True`)
+- `/v1/unload_lora_adapter` - Unload a LoRA adapter (can alter model behavior; only available when `--enable-lora` is set and `VLLM_ALLOW_RUNTIME_LORA_UPDATING=True`)
+- `/inference/v1/generate` - Generate completions
+- `/v2/embed` - Cohere Embed API
+- `/v2/rerank` - Cohere Rerank API
 
 ### Unprotected Endpoints (No API Key Required)
 
@@ -150,17 +165,23 @@ The following endpoints **do not require authentication** even when `--api-key`
 **Inference endpoints:**
 
 - `/invocations` - SageMaker-compatible endpoint (routes to the same inference functions as `/v1` endpoints)
-- `/inference/v1/generate` - Generate completions
+- `/generative_scoring` - Generative scoring API
 - `/pooling` - Pooling API
 - `/classify` - Classification API
 - `/score` - Scoring API (non-`/v1` variant)
 - `/rerank` - Reranking API (non-`/v1` variant)
 
-**Operational control endpoints (always enabled):**
+**Operational control endpoints (only when `"generate"` task is supported):**
 
 - `/pause` - Pause generation (causes denial of service)
 - `/resume` - Resume generation
+- `/is_paused` - Check if generation is paused
 - `/scale_elastic_ep` - Trigger scaling operations
+- `/is_scaling_elastic_ep` - Check if scaling is in progress
+- `/init_weight_transfer_engine` - Initialize weight transfer engine for RLHF
+- `/update_weights` - Update model weights (can alter model behavior)
+- `/get_world_size` - Get distributed world size
+- `/abort_requests` - Abort in-flight requests (only when `--tokens-only` is also set)
 
 **Utility endpoints:**
 
@@ -203,9 +224,9 @@ These endpoints are only available when profiling is enabled and should only be
 
 An attacker who can reach the vLLM HTTP server can:
 
-1. **Bypass authentication** by using non-`/v1` endpoints like `/invocations`, `/inference/v1/generate`, `/pooling`, `/classify`, `/score`, or `/rerank` to run arbitrary inference without credentials
-2. **Cause denial of service** by calling `/pause` or `/scale_elastic_ep` without a token
-3. **Access operational controls** to manipulate server state (e.g., pausing generation)
+1. **Bypass authentication** by using non-`/v1` endpoints like `/invocations`, `/inference/v1/generate`, `/generative_scoring`, `/pooling`, `/classify`, `/score`, or `/rerank` to run arbitrary inference without credentials
+2. **Cause denial of service** by calling `/pause`, `/scale_elastic_ep`, or `/abort_requests` without a token
+3. **Access operational controls** to manipulate server state (e.g., pausing generation, updating model weights via `/update_weights`)
 4. **If `--enable-tokenizer-info-endpoint` is set:** Access sensitive tokenizer configuration including chat templates, which may reveal prompt engineering strategies or other implementation details
 5. **If `VLLM_SERVER_DEV_MODE=1` is set:** Execute arbitrary RPC commands via `/collective_rpc`, reset caches, put the engine to sleep, and access detailed server configuration
 
@@ -284,6 +305,74 @@ To disable the Python code interpreter specifically, omit `code_interpreter` fro
 
 **Consider a custom implementation**: The GPT-OSS Python tool is a reference implementation. For production deployments, consider implementing a custom code execution sandbox with stricter isolation guarantees. See the [GPT-OSS documentation](https://github.com/openai/gpt-oss?tab=readme-ov-file#python) for guidance.
 
+## Dynamic LoRA Loading
+
+vLLM supports dynamically loading and unloading LoRA adapters at runtime via the `/v1/load_lora_adapter` and `/v1/unload_lora_adapter` API endpoints. This functionality is **not enabled by default** — it requires both `--enable-lora` and the environment variable `VLLM_ALLOW_RUNTIME_LORA_UPDATING=True` to be set.
+
+**Warning:** Dynamic LoRA loading is not a secure operation and should not be enabled in deployments exposed to untrusted clients. If you must enable dynamic LoRA loading, restrict access to the `/v1/load_lora_adapter` and `/v1/unload_lora_adapter` endpoints to trusted administrators only, using a reverse proxy or network-level access controls. Do not expose these endpoints to end users. For details on configuring LoRA adapters, see the [LoRA Adapters documentation](../features/lora.md).
+
+## Cache Directory Security
+
+vLLM assumes that its cache directories are **private and trusted**. Cache contents are loaded without cryptographic integrity verification, including formats that support arbitrary code execution. If an untrusted user or process can write to vLLM's cache directories, they may be able to crash vLLM or cause it to execute arbitrary code.
+
+**Do not share vLLM cache directories with untrusted users or mount them from untrusted storage.** Treat the cache directory with the same care as the vLLM installation itself.
+
+### Cache Directory Configuration
+
+Most cache paths default to subdirectories under a single root. Changing `VLLM_CACHE_ROOT` changes the default location for all features that inherit from it. When `torch.compile` caching is enabled (the default), vLLM also redirects `TRITON_CACHE_DIR` into this tree. If compile caching is disabled, Triton falls back to its own default location (`~/.triton/cache`).
+
+| Environment Variable | Default | Description |
+| --- | --- | --- |
+| `VLLM_CACHE_ROOT` | `~/.cache/vllm` | Base cache directory. Respects `XDG_CACHE_HOME` if set. All paths below inherit from this unless explicitly overridden. |
+| *(torch.compile)* | `$VLLM_CACHE_ROOT/torch_compile_cache/` | Compilation cache for AOT-compiled models, Inductor graphs, and Triton kernels. Controlled by `VLLM_DISABLE_COMPILE_CACHE` (set to `1` to disable). |
+| `VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR` | `$VLLM_CACHE_ROOT/flashinfer_autotune_cache/<flashinfer-version>/<arch>/<cache-hash>/` | FlashInfer autotune config cache. |
+| `VLLM_ASSETS_CACHE` | `$VLLM_CACHE_ROOT/assets/` | Downloaded assets (e.g., tokenizer files). |
+| `VLLM_XLA_CACHE_PATH` | `$VLLM_CACHE_ROOT/xla_cache/` | XLA/TPU compilation cache. |
+| `VLLM_MEDIA_CACHE` | *(disabled)* | Optional cache for downloaded media (images, video, audio). Not enabled unless explicitly set. |
+
+### Recommendations
+
+- **Restrict file permissions** on `VLLM_CACHE_ROOT` (and any other cache directories used by dependencies, such as `~/.triton` if compile caching is disabled) so that only the vLLM process owner can read and write to them.
+- **Do not copy cache contents from untrusted sources.** If you distribute cache artifacts between environments, ensure they originate from a trusted build pipeline.
+- **Container deployments:** If mounting cache directories into containers, ensure the volume source is trusted.
+
+## FIPS Compatibility
+
+FIPS compliance depends on many factors, so a vLLM deployment is not automatically FIPS compliant. Recent changes have improved vLLM's *tolerance* of FIPS-enabled hosts — that is, avoiding crashes when non-approved algorithms are blocked — but tolerance is not the same as compliance. Whether a deployment satisfies FIPS requirements depends on the host operating system, the OpenSSL provider backing Python's `hashlib` and `ssl` modules, and which optional dependencies are installed.
+
+### FIPS-relevant configuration
+
+Operators running vLLM on FIPS-enabled hosts should select FIPS-approved algorithms via the following knobs:
+
+- **Multimodal input hashing** — `VLLM_MM_HASHER_ALGORITHM` defaults to `blake3`, which is not FIPS-approved. Set it to `sha256` or `sha512` in FIPS-enabled environments.
+- **Prefix-cache hashing** — set `--prefix-caching-hash-algo` (config field `prefix_caching_hash_algo`) to `sha256` or `sha256_cbor`. The `xxhash` and `xxhash_cbor` options are not FIPS-approved.
+- **TLS ciphers** — use `--ssl-ciphers` to restrict the API server's TLS handshake to FIPS-approved cipher suites that match your environment's policy.
+
+### Automatic fallback for non-security MD5 use
+
+vLLM uses MD5 in a few places to derive non-security cache keys (for example, configuration hashes). These call sites pass `usedforsecurity=False` and additionally fall back to SHA-256 when the underlying OpenSSL provider refuses MD5 outright (see `safe_hash()` in `vllm/utils/hashing.py`). No user action is required; this behavior is documented so that auditors and security reviewers can identify the MD5 references and understand their purpose.
+
+### Dependencies that provide non-FIPS hash implementations
+
+Some dependencies expose hash implementations that are not FIPS-approved. vLLM only invokes them when the corresponding algorithm is selected, but operators with strict cryptographic controls may want to ensure the code paths are not exercised — and, where policy requires, that the packages themselves are absent:
+
+- `blake3` — currently listed in `requirements/common.txt`, so a standard install pulls it in. It is imported lazily and only used when `VLLM_MM_HASHER_ALGORITHM=blake3` (the default). Setting `VLLM_MM_HASHER_ALGORITHM` to `sha256` or `sha512` is sufficient to keep the non-FIPS code path dormant. If your policy additionally forbids the package being present, uninstall it after `pip install` (`pip uninstall blake3`); vLLM will continue to function as long as `VLLM_MM_HASHER_ALGORITHM` is set to a non-blake3 value.
+- `xxhash` — a true optional dependency (not in `requirements/common.txt`). It is only imported when an `xxhash`-based prefix-cache algorithm is selected. Leave it uninstalled and select a `sha256`-based prefix-cache algorithm.
+
+### Beyond hashing: other FIPS considerations
+
+Hashing is the area where vLLM has explicit FIPS-aware code, but a FIPS-compliant deployment depends on several factors that sit outside vLLM itself. Operators should evaluate the following with their platform and security teams:
+
+- **Host crypto provider.** Python's `hashlib` and `ssl` modules are FIPS-aware only when Python is linked against a FIPS-validated OpenSSL (or equivalent) provider supplied by the host OS. vLLM inherits whatever provider the host configures — it does not bundle one.
+- **API server TLS.** TLS termination for the OpenAI-compatible API server uses the host's OpenSSL via Python's `ssl` module. Restrict the cipher suite with `--ssl-ciphers` to match your environment's FIPS policy, and ensure server certificates are issued with FIPS-approved algorithms and key sizes.
+- **Outbound HTTPS.** Model and asset downloads (for example, via `huggingface_hub`) use the same host TLS stack. The same provider/cipher considerations apply.
+- **Inter-node communication is unencrypted by default.** As described in [Inter-Node Communication](#inter-node-communication), PyTorch Distributed, KV-cache transfer, and data-parallel channels do not encrypt traffic. FIPS environments that require FIPS-approved cryptography for data in transit must provide that protection externally — for example, via an mTLS sidecar or IPsec terminated by a FIPS-validated module — since vLLM's internal channels cannot satisfy the requirement on their own. Network isolation alone is not cryptography and does not meet a "FIPS-approved cryptography for data in transit" requirement, though it remains a useful defense-in-depth measure.
+- **Dependencies that bundle their own OpenSSL.** Some Python wheels statically link OpenSSL builds that fail the kernel FIPS self-test on FIPS-enabled hosts (`FATAL FIPS SELFTEST FAILURE`). `opencv-python-headless` is a known example; other manylinux wheels may behave similarly. Audit your installed wheels for bundled crypto libraries when troubleshooting FIPS startup failures.
+- **Accelerator and ML libraries.** PyTorch, CUDA, cuDNN, NCCL, and similar components have their own crypto and FIPS posture independent of vLLM. NVIDIA publishes FIPS-validated builds for some libraries; vLLM does not pin to those builds, so selecting and validating them is the operator's responsibility.
+- **What is *not* a FIPS concern in vLLM.** Random number generation used for token sampling (Python/NumPy/PyTorch RNGs) is not a cryptographic use and is out of scope for FIPS. Pickled cache artifacts are a separate security concern covered under [Cache Directory Security](#cache-directory-security).
+
+In short: the configuration knobs above let vLLM avoid non-approved algorithms, and the automatic fallbacks let it run without crashing on FIPS-enabled hosts. End-to-end FIPS compliance, however, is a property of the full deployment — host OS, crypto provider, transitive dependencies, and network architecture — not of vLLM alone.
+
 ## Reporting Security Vulnerabilities
 
 If you believe you have found a security vulnerability in vLLM, please report it following the project's security policy. For more information on how to report security issues and the project's security policy, please see the [vLLM Security Policy](https://github.com/vllm-project/vllm/blob/main/SECURITY.md).
diff --git a/examples/online_serving/api_client.py b/examples/applications/chatbot/api_client.py
similarity index 100%
rename from examples/online_serving/api_client.py
rename to examples/applications/chatbot/api_client.py
diff --git a/examples/online_serving/gradio_openai_chatbot_webserver.py b/examples/applications/chatbot/gradio_openai_chatbot_webserver.py
similarity index 97%
rename from examples/online_serving/gradio_openai_chatbot_webserver.py
rename to examples/applications/chatbot/gradio_openai_chatbot_webserver.py
index c76c60cc4472..2a67aefc0278 100644
--- a/examples/online_serving/gradio_openai_chatbot_webserver.py
+++ b/examples/applications/chatbot/gradio_openai_chatbot_webserver.py
@@ -5,7 +5,7 @@
     vllm serve meta-llama/Llama-2-7b-chat-hf
 
 Start Gradio OpenAI Chatbot Webserver:
-    python examples/online_serving/gradio_openai_chatbot_webserver.py \
+    python examples/applications/chatbot/gradio_openai_chatbot_webserver.py \
                     -m meta-llama/Llama-2-7b-chat-hf
 
 Note that `pip install --upgrade gradio` is needed to run this example.
diff --git a/examples/online_serving/gradio_webserver.py b/examples/applications/chatbot/gradio_webserver.py
similarity index 97%
rename from examples/online_serving/gradio_webserver.py
rename to examples/applications/chatbot/gradio_webserver.py
index 86d9ceb48bb0..f75636409c2f 100644
--- a/examples/online_serving/gradio_webserver.py
+++ b/examples/applications/chatbot/gradio_webserver.py
@@ -6,7 +6,7 @@
         --model meta-llama/Llama-2-7b-chat-hf
 
 Start Webserver:
-    python examples/online_serving/gradio_webserver.py
+    python examples/applications/chatbot/gradio_webserver.py
 
 Note that `pip install --upgrade gradio` is needed to run this example.
 More details: https://github.com/gradio-app/gradio
diff --git a/examples/online_serving/streamlit_openai_chatbot_webserver.py b/examples/applications/chatbot/streamlit_openai_chatbot_webserver.py
similarity index 100%
rename from examples/online_serving/streamlit_openai_chatbot_webserver.py
rename to examples/applications/chatbot/streamlit_openai_chatbot_webserver.py
diff --git a/examples/online_serving/retrieval_augmented_generation_with_langchain.py b/examples/applications/rag/retrieval_augmented_generation_with_langchain.py
similarity index 100%
rename from examples/online_serving/retrieval_augmented_generation_with_langchain.py
rename to examples/applications/rag/retrieval_augmented_generation_with_langchain.py
diff --git a/examples/online_serving/retrieval_augmented_generation_with_llamaindex.py b/examples/applications/rag/retrieval_augmented_generation_with_llamaindex.py
similarity index 100%
rename from examples/online_serving/retrieval_augmented_generation_with_llamaindex.py
rename to examples/applications/rag/retrieval_augmented_generation_with_llamaindex.py
diff --git a/examples/offline_inference/async_llm_streaming.py b/examples/deployment/async_llm_streaming.py
similarity index 98%
rename from examples/offline_inference/async_llm_streaming.py
rename to examples/deployment/async_llm_streaming.py
index b876d536e3a1..ef69089a0460 100644
--- a/examples/offline_inference/async_llm_streaming.py
+++ b/examples/deployment/async_llm_streaming.py
@@ -8,7 +8,7 @@
 streaming where you receive new tokens as they are generated.
 
 Usage:
-    python examples/offline_inference/async_llm_streaming.py
+    python examples/deployment/async_llm_streaming.py
 """
 
 import asyncio
diff --git a/examples/online_serving/chart-helm/.helmignore b/examples/deployment/chart-helm/.helmignore
similarity index 100%
rename from examples/online_serving/chart-helm/.helmignore
rename to examples/deployment/chart-helm/.helmignore
diff --git a/examples/online_serving/chart-helm/Chart.yaml b/examples/deployment/chart-helm/Chart.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/Chart.yaml
rename to examples/deployment/chart-helm/Chart.yaml
diff --git a/examples/online_serving/chart-helm/README.md b/examples/deployment/chart-helm/README.md
similarity index 100%
rename from examples/online_serving/chart-helm/README.md
rename to examples/deployment/chart-helm/README.md
diff --git a/examples/online_serving/chart-helm/ct.yaml b/examples/deployment/chart-helm/ct.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/ct.yaml
rename to examples/deployment/chart-helm/ct.yaml
diff --git a/examples/online_serving/chart-helm/lintconf.yaml b/examples/deployment/chart-helm/lintconf.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/lintconf.yaml
rename to examples/deployment/chart-helm/lintconf.yaml
diff --git a/examples/online_serving/chart-helm/templates/_helpers.tpl b/examples/deployment/chart-helm/templates/_helpers.tpl
similarity index 100%
rename from examples/online_serving/chart-helm/templates/_helpers.tpl
rename to examples/deployment/chart-helm/templates/_helpers.tpl
diff --git a/examples/online_serving/chart-helm/templates/configmap.yaml b/examples/deployment/chart-helm/templates/configmap.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/configmap.yaml
rename to examples/deployment/chart-helm/templates/configmap.yaml
diff --git a/examples/online_serving/chart-helm/templates/custom-objects.yaml b/examples/deployment/chart-helm/templates/custom-objects.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/custom-objects.yaml
rename to examples/deployment/chart-helm/templates/custom-objects.yaml
diff --git a/examples/online_serving/chart-helm/templates/deployment.yaml b/examples/deployment/chart-helm/templates/deployment.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/deployment.yaml
rename to examples/deployment/chart-helm/templates/deployment.yaml
diff --git a/examples/online_serving/chart-helm/templates/hpa.yaml b/examples/deployment/chart-helm/templates/hpa.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/hpa.yaml
rename to examples/deployment/chart-helm/templates/hpa.yaml
diff --git a/examples/online_serving/chart-helm/templates/job.yaml b/examples/deployment/chart-helm/templates/job.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/job.yaml
rename to examples/deployment/chart-helm/templates/job.yaml
diff --git a/examples/online_serving/chart-helm/templates/poddisruptionbudget.yaml b/examples/deployment/chart-helm/templates/poddisruptionbudget.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/poddisruptionbudget.yaml
rename to examples/deployment/chart-helm/templates/poddisruptionbudget.yaml
diff --git a/examples/online_serving/chart-helm/templates/pvc.yaml b/examples/deployment/chart-helm/templates/pvc.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/pvc.yaml
rename to examples/deployment/chart-helm/templates/pvc.yaml
diff --git a/examples/online_serving/chart-helm/templates/secrets.yaml b/examples/deployment/chart-helm/templates/secrets.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/secrets.yaml
rename to examples/deployment/chart-helm/templates/secrets.yaml
diff --git a/examples/online_serving/chart-helm/templates/service.yaml b/examples/deployment/chart-helm/templates/service.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/templates/service.yaml
rename to examples/deployment/chart-helm/templates/service.yaml
diff --git a/examples/online_serving/chart-helm/tests/deployment_test.yaml b/examples/deployment/chart-helm/tests/deployment_test.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/tests/deployment_test.yaml
rename to examples/deployment/chart-helm/tests/deployment_test.yaml
diff --git a/examples/online_serving/chart-helm/tests/job_test.yaml b/examples/deployment/chart-helm/tests/job_test.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/tests/job_test.yaml
rename to examples/deployment/chart-helm/tests/job_test.yaml
diff --git a/examples/online_serving/chart-helm/tests/pvc_test.yaml b/examples/deployment/chart-helm/tests/pvc_test.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/tests/pvc_test.yaml
rename to examples/deployment/chart-helm/tests/pvc_test.yaml
diff --git a/examples/online_serving/chart-helm/values.schema.json b/examples/deployment/chart-helm/values.schema.json
similarity index 100%
rename from examples/online_serving/chart-helm/values.schema.json
rename to examples/deployment/chart-helm/values.schema.json
diff --git a/examples/online_serving/chart-helm/values.yaml b/examples/deployment/chart-helm/values.yaml
similarity index 100%
rename from examples/online_serving/chart-helm/values.yaml
rename to examples/deployment/chart-helm/values.yaml
diff --git a/examples/offline_inference/llm_engine_example.py b/examples/deployment/llm_engine_example.py
similarity index 100%
rename from examples/offline_inference/llm_engine_example.py
rename to examples/deployment/llm_engine_example.py
diff --git a/examples/online_serving/sagemaker-entrypoint.sh b/examples/deployment/sagemaker-entrypoint.sh
similarity index 100%
rename from examples/online_serving/sagemaker-entrypoint.sh
rename to examples/deployment/sagemaker-entrypoint.sh
diff --git a/examples/online_serving/disaggregated_encoder/README.md b/examples/disaggregated/disaggregated_encoder/README.md
similarity index 93%
rename from examples/online_serving/disaggregated_encoder/README.md
rename to examples/disaggregated/disaggregated_encoder/README.md
index efe6e3a7d920..bd2a46abbe52 100644
--- a/examples/online_serving/disaggregated_encoder/README.md
+++ b/examples/disaggregated/disaggregated_encoder/README.md
@@ -26,8 +26,13 @@ MODEL="Qwen/Qwen2.5-VL-3B-Instruct" bash disagg_1e1p1d_example.sh
 
 # Use specific storage path
 EC_SHARED_STORAGE_PATH="/tmp/my_ec_cache" bash disagg_1e1p1d_example.sh
+
+# Run on XPU; scripts switch from CUDA_VISIBLE_DEVICES to ZE_AFFINITY_MASK
+DEVICE_PLATFORM=xpu GPU_E=0 GPU_PD=1 bash disagg_1e1pd_example.sh
 ```
 
+`DEVICE_PLATFORM` defaults to `cuda`. Set `DEVICE_PLATFORM=xpu` when running these examples on Intel GPUs so the scripts use `ZE_AFFINITY_MASK` instead of `CUDA_VISIBLE_DEVICES` for device selection.
+
 ## Encoder Instances
 
 Encoder engines should be launched with the following flags:
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh b/examples/disaggregated/disaggregated_encoder/disagg_1e1p1d_example.sh
similarity index 85%
rename from examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
rename to examples/disaggregated/disaggregated_encoder/disagg_1e1p1d_example.sh
index 19459acc9eac..cb51333e908d 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
+++ b/examples/disaggregated/disaggregated_encoder/disagg_1e1p1d_example.sh
@@ -19,11 +19,29 @@ GPU_E="${GPU_E:-2}"
 GPU_P="${GPU_P:-2}"
 GPU_D="${GPU_D:-3}"
 
+# Device platform and affinity env name.
+# DEVICE_PLATFORM supports: cuda, xpu
+DEVICE_PLATFORM="${DEVICE_PLATFORM:-cuda}"
+if [[ -z "${DEVICE_AFFINITY_ENV:-}" ]]; then
+    if [[ "${DEVICE_PLATFORM,,}" == "xpu" ]]; then
+        DEVICE_AFFINITY_ENV="ZE_AFFINITY_MASK"
+    else
+        DEVICE_AFFINITY_ENV="CUDA_VISIBLE_DEVICES"
+    fi
+fi
+
 EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
 TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-12000}"   # wait_for_server timeout
 
 NUM_PROMPTS="${NUM_PROMPTS:-100}"    # number of prompts to send in benchmark
 
+# Serve args
+GPU_MEMORY_UTILIZATION_E="${GPU_MEMORY_UTILIZATION_E:-0.01}"
+GPU_MEMORY_UTILIZATION_P="${GPU_MEMORY_UTILIZATION_P:-0.7}"
+GPU_MEMORY_UTILIZATION_D="${GPU_MEMORY_UTILIZATION_D:-0.7}"
+MAX_NUM_SEQS="${MAX_NUM_SEQS:-128}"
+MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
+
 export UCX_TLS=all
 export UCX_NET_DEVICES=all
 
@@ -92,14 +110,14 @@ mkdir -p "$EC_SHARED_STORAGE_PATH"
 ###############################################################################
 # Encoder worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
-    --gpu-memory-utilization 0.01 \
+env "$DEVICE_AFFINITY_ENV=$GPU_E" vllm serve "$MODEL" \
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION_E" \
     --port "$ENCODE_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
     --no-enable-prefix-caching \
     --max-num-batched-tokens 114688 \
-    --max-num-seqs 128 \
+    --max-num-seqs "$MAX_NUM_SEQS" \
     --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
@@ -115,15 +133,16 @@ PIDS+=($!)
 ###############################################################################
 # Prefill worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_P" \
+env "$DEVICE_AFFINITY_ENV=$GPU_P" \
 UCX_NET_DEVICES=all \
 VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \
 vllm serve "$MODEL" \
-    --gpu-memory-utilization 0.7 \
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION_P" \
     --port "$PREFILL_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
-    --max-num-seqs 128 \
+    --max-num-seqs "$MAX_NUM_SEQS" \
+    --max-model-len "$MAX_MODEL_LEN" \
     --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
@@ -143,15 +162,16 @@ PIDS+=($!)
 ###############################################################################
 # Decode worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_D" \
+env "$DEVICE_AFFINITY_ENV=$GPU_D" \
 UCX_NET_DEVICES=all \
 VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \
 vllm serve "$MODEL" \
-    --gpu-memory-utilization 0.7 \
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION_D" \
     --port "$DECODE_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
-    --max-num-seqs 128 \
+    --max-num-seqs "$MAX_NUM_SEQS" \
+    --max-model-len "$MAX_MODEL_LEN" \
     --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --kv-transfer-config '{
         "kv_connector": "NixlConnector",
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh b/examples/disaggregated/disaggregated_encoder/disagg_1e1pd_example.sh
similarity index 86%
rename from examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
rename to examples/disaggregated/disaggregated_encoder/disagg_1e1pd_example.sh
index 18c278b2abff..ed752a38c6fd 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
+++ b/examples/disaggregated/disaggregated_encoder/disagg_1e1pd_example.sh
@@ -17,11 +17,28 @@ PROXY_PORT="${PROXY_PORT:-10001}"
 GPU_E="${GPU_E:-0}"
 GPU_PD="${GPU_PD:-1}"
 
+# Device platform and affinity env name.
+# DEVICE_PLATFORM supports: cuda, xpu
+DEVICE_PLATFORM="${DEVICE_PLATFORM:-cuda}"
+if [[ -z "${DEVICE_AFFINITY_ENV:-}" ]]; then
+    if [[ "${DEVICE_PLATFORM,,}" == "xpu" ]]; then
+        DEVICE_AFFINITY_ENV="ZE_AFFINITY_MASK"
+    else
+        DEVICE_AFFINITY_ENV="CUDA_VISIBLE_DEVICES"
+    fi
+fi
+
 EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
 TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-12000}"   # wait_for_server timeout
 
 NUM_PROMPTS="${NUM_PROMPTS:-100}"    # number of prompts to send in benchmark
 
+# Serve args
+GPU_MEMORY_UTILIZATION_E="${GPU_MEMORY_UTILIZATION_E:-0.01}"
+GPU_MEMORY_UTILIZATION_PD="${GPU_MEMORY_UTILIZATION_PD:-0.7}"
+MAX_NUM_SEQS="${MAX_NUM_SEQS:-128}"
+MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
+
 ###############################################################################
 # Helpers
 ###############################################################################
@@ -86,14 +103,14 @@ mkdir -p "$EC_SHARED_STORAGE_PATH"
 ###############################################################################
 # Encoder worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
-    --gpu-memory-utilization 0.01 \
+env "$DEVICE_AFFINITY_ENV=$GPU_E" vllm serve "$MODEL" \
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION_E" \
     --port "$ENCODE_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
     --no-enable-prefix-caching \
     --max-num-batched-tokens 114688 \
-    --max-num-seqs 128 \
+    --max-num-seqs "$MAX_NUM_SEQS" \
     --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
@@ -109,12 +126,13 @@ PIDS+=($!)
 ###############################################################################
 # Prefill+Decode worker
 ###############################################################################
-CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
-    --gpu-memory-utilization 0.7 \
+env "$DEVICE_AFFINITY_ENV=$GPU_PD" vllm serve "$MODEL" \
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION_PD" \
     --port "$PREFILL_DECODE_PORT" \
     --enforce-eager \
     --enable-request-id-headers \
-    --max-num-seqs 128 \
+    --max-num-seqs "$MAX_NUM_SEQS" \
+    --max-model-len "$MAX_MODEL_LEN" \
     --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
diff --git a/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py b/examples/disaggregated/disaggregated_encoder/disagg_epd_proxy.py
similarity index 100%
rename from examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py
rename to examples/disaggregated/disaggregated_encoder/disagg_epd_proxy.py
diff --git a/examples/offline_inference/disaggregated_prefill.py b/examples/disaggregated/disaggregated_prefill.py
similarity index 100%
rename from examples/offline_inference/disaggregated_prefill.py
rename to examples/disaggregated/disaggregated_prefill.py
diff --git a/examples/online_serving/disaggregated_prefill.sh b/examples/disaggregated/disaggregated_prefill.sh
similarity index 100%
rename from examples/online_serving/disaggregated_prefill.sh
rename to examples/disaggregated/disaggregated_prefill.sh
diff --git a/examples/online_serving/disaggregated_serving/README.md b/examples/disaggregated/disaggregated_serving/README.md
similarity index 100%
rename from examples/online_serving/disaggregated_serving/README.md
rename to examples/disaggregated/disaggregated_serving/README.md
diff --git a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py b/examples/disaggregated/disaggregated_serving/disagg_proxy_demo.py
similarity index 99%
rename from examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
rename to examples/disaggregated/disaggregated_serving/disagg_proxy_demo.py
index 763361a30e02..57deef6a15d2 100644
--- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
+++ b/examples/disaggregated/disaggregated_serving/disagg_proxy_demo.py
@@ -5,7 +5,7 @@
 example usage of XpYd disaggregated prefilling.
 We can launch multiple vllm instances (2 for prefill and 2 for decode), and
 launch this proxy demo through:
-  python3 examples/online_serving/disaggregated_serving/disagg_proxy_demo.py  \
+  python3 examples/disaggregated/disaggregated_serving/disagg_proxy_demo.py  \
        --model $model_name  \
        --prefill localhost:8100 localhost:8101   \
        --decode localhost:8200 localhost:8201   \
diff --git a/examples/disaggregated/disaggregated_serving/disagg_proxy_multiturn.py b/examples/disaggregated/disaggregated_serving/disagg_proxy_multiturn.py
new file mode 100644
index 000000000000..24d90eab0292
--- /dev/null
+++ b/examples/disaggregated/disaggregated_serving/disagg_proxy_multiturn.py
@@ -0,0 +1,562 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Disaggregated Prefill/Decode Proxy with Bidirectional KV Transfer
+
+This proxy sits between clients and a vLLM Prefill/Decode (P/D) deployment,
+routing multi-turn chat requests so that each turn reuses KV cache blocks
+from the previous turn's Decode node via bidirectional KV transfer.
+
+Architecture:
+    Client  ──►  Proxy  ──►  Prefill (P)  ──►  Decode (D)
+                   │              │                 │
+                   │   kv_transfer_params flow:     │
+                   │   D finish ──► proxy caches    │
+                   │   next turn ──► proxy sends    │
+                   │   cached D blocks to P ──►     │
+                   │   P reads D blocks (bidir)     │
+                   │   P sends its blocks to D      │
+
+Per-request flow:
+    1. Client sends chat/completions request to proxy.
+    2. Proxy looks up cached D block info from the previous turn
+       (keyed by conversation_id).
+    3. If cache hit, proxy attaches D's block info to the request
+       so P can read D's KV blocks instead of recomputing.
+    4. Proxy sends request to P (max_tokens=1, non-streaming).
+    5. P returns kv_transfer_params with its own block info.
+    6. Proxy forwards request + P's block info to D (streaming).
+    7. D streams the response. The final chunk includes D's
+       kv_transfer_params, which the proxy caches for the next turn.
+    8. Proxy returns D's response to the client.
+
+Conversation isolation:
+    Each request must include a ``conversation_id`` field (top-level in
+    the JSON body) to scope the KV cache across turns. Without it, the
+    proxy cannot link turns and falls back to no-cache behavior.
+
+Usage:
+    python disagg_proxy_multiturn.py \\
+        --host 0.0.0.0 --port 8000 \\
+        --prefiller-host 10.0.0.1 --prefiller-port 8100 \\
+        --decoder-host 10.0.0.2 --decoder-port 8200
+
+Dependencies:
+    pip install fastapi uvicorn httpx
+"""
+
+from __future__ import annotations
+
+import argparse
+import itertools
+import json
+import logging
+import os
+import time
+import uuid
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import Any
+
+import httpx
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+
+# Logging
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger("disagg_proxy")
+
+
+# Data structures
+@dataclass
+class CachedKVEntry:
+    """KV transfer parameters cached from D's response for one turn."""
+
+    kv_transfer_params: dict[str, Any]
+    timestamp: float = field(default_factory=time.time)
+
+
+class ConversationKVCache:
+    """Per-conversation KV block cache.
+
+    Each conversation is identified by a ``conversation_id`` supplied by
+    the client. After D finishes a turn, its ``kv_transfer_params`` are
+    stored here. On the next turn, the proxy retrieves them so P can
+    read D's blocks via bidirectional KV transfer.
+    """
+
+    def __init__(self, ttl_seconds: float = 600.0) -> None:
+        self._store: dict[str, CachedKVEntry] = {}
+        self._ttl = ttl_seconds
+
+    def get(self, conversation_id: str) -> dict[str, Any] | None:
+        """Retrieve and consume cached KV params for a conversation.
+
+        Returns a *copy* of the kv_transfer_params dict, or None.
+        The entry is removed after retrieval (single-use).
+        """
+        entry = self._store.pop(conversation_id, None)
+        if entry is None:
+            return None
+        age = time.time() - entry.timestamp
+        if age > self._ttl:
+            logger.info(
+                "conv=%s: stale cache entry (age=%.1fs > ttl=%.1fs), discarding",
+                conversation_id,
+                age,
+                self._ttl,
+            )
+            return None
+        logger.info(
+            "conv=%s: cache HIT (age=%.1fs)",
+            conversation_id,
+            age,
+        )
+        return dict(entry.kv_transfer_params)
+
+    def put(self, conversation_id: str, kv_params: dict[str, Any]) -> None:
+        """Store D's kv_transfer_params for a conversation."""
+        self._store[conversation_id] = CachedKVEntry(
+            kv_transfer_params=dict(kv_params),  # defensive copy
+        )
+        logger.info(
+            "conv=%s: cached D blocks (remote_request_id=%s, blocks=%d)",
+            conversation_id,
+            kv_params.get("remote_request_id", "?"),
+            len(kv_params.get("remote_block_ids", [[]])[0])
+            if kv_params.get("remote_block_ids")
+            else 0,
+        )
+
+    def evict_stale(self) -> int:
+        """Remove entries older than TTL. Returns count of evicted entries."""
+        now = time.time()
+        stale = [
+            cid
+            for cid, entry in self._store.items()
+            if now - entry.timestamp > self._ttl
+        ]
+        for cid in stale:
+            del self._store[cid]
+        return len(stale)
+
+    @property
+    def size(self) -> int:
+        return len(self._store)
+
+
+# Global state
+kv_cache = ConversationKVCache(
+    ttl_seconds=450.0
+)  # Must be < VLLM_NIXL_ABORT_REQUEST_TIMEOUT (480s)
+
+
+# Service client helpers
+@dataclass
+class ServiceClient:
+    """Wrapper around an httpx.AsyncClient for a P or D instance."""
+
+    client: httpx.AsyncClient
+    host: str
+    port: int
+    id: int
+
+
+def _make_headers(request_id: str) -> dict[str, str]:
+    """Build HTTP headers for upstream requests."""
+    headers = {"X-Request-Id": request_id}
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+
+
+async def _send_to_prefill(
+    client: ServiceClient,
+    endpoint: str,
+    req_data: dict[str, Any],
+    request_id: str,
+) -> dict[str, Any]:
+    """Send a non-streaming prefill request (max_tokens=1).
+
+    Returns the JSON response from P, which includes kv_transfer_params.
+    """
+    payload = req_data.copy()
+    payload["stream"] = False
+    payload["max_tokens"] = 1
+    payload.pop("max_completion_tokens", None)
+    payload.pop("min_tokens", None)
+    payload.pop("stream_options", None)
+
+    resp = await client.client.post(
+        endpoint,
+        json=payload,
+        headers=_make_headers(request_id),
+    )
+    resp.raise_for_status()
+    return resp.json()
+
+
+async def _stream_from_decode(
+    client: ServiceClient,
+    endpoint: str,
+    req_data: dict[str, Any],
+    request_id: str,
+    conversation_id: str,
+) -> tuple[str, str | None, dict[str, Any] | None, str, str | None, int | None]:
+    """Stream response from D, capturing text and kv_transfer_params.
+
+    Returns (collected_text, finish_reason, kv_params, response_id, created).
+    Also stores kv_params in the conversation cache.
+    """
+    payload = req_data.copy()
+    payload["stream"] = True
+
+    collected_text = ""
+    finish_reason: str | None = None
+    response_id: str | None = None
+    model_name: str | None = None
+    created: int | None = None
+    captured_kv: dict[str, Any] | None = None
+
+    async with client.client.stream(
+        "POST",
+        endpoint,
+        json=payload,
+        headers=_make_headers(request_id),
+    ) as resp:
+        resp.raise_for_status()
+        async for line in resp.aiter_lines():
+            if not line or not line.startswith("data: "):
+                continue
+            if line == "data: [DONE]":
+                break
+            try:
+                chunk = json.loads(line[6:])
+            except json.JSONDecodeError:
+                continue
+
+            if response_id is None:
+                response_id = chunk.get("id")
+                model_name = chunk.get("model")
+                created = chunk.get("created")
+
+            for choice in chunk.get("choices", []):
+                collected_text += choice.get("text", "")
+                delta = choice.get("delta", {})
+                collected_text += delta.get("content", "")
+                if choice.get("finish_reason"):
+                    finish_reason = choice["finish_reason"]
+
+            kv_params = chunk.get("kv_transfer_params")
+            if kv_params:
+                kv_params["remote_host"] = client.host
+                captured_kv = kv_params
+                if conversation_id:
+                    kv_cache.put(conversation_id, kv_params)
+
+    return (
+        collected_text,
+        finish_reason,
+        captured_kv,
+        response_id or request_id,
+        model_name,
+        created,
+    )
+
+
+async def _stream_from_decode_sse(
+    client: ServiceClient,
+    endpoint: str,
+    req_data: dict[str, Any],
+    request_id: str,
+    conversation_id: str,
+):
+    """Yield SSE chunks from D to the client, capturing kv_transfer_params."""
+    payload = req_data.copy()
+    payload["stream"] = True
+
+    async with client.client.stream(
+        "POST",
+        endpoint,
+        json=payload,
+        headers=_make_headers(request_id),
+    ) as resp:
+        resp.raise_for_status()
+        async for line in resp.aiter_lines():
+            if not line:
+                yield "\n"
+                continue
+
+            if line.startswith("data: ") and line != "data: [DONE]":
+                try:
+                    chunk = json.loads(line[6:])
+                    kv_params = chunk.get("kv_transfer_params")
+                    if kv_params and conversation_id:
+                        kv_params["remote_host"] = client.host
+                        kv_cache.put(conversation_id, kv_params)
+                except json.JSONDecodeError:
+                    pass
+
+            yield line + "\n"
+
+
+# FastAPI application
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize HTTP clients for P and D instances."""
+    app.state.prefill_clients: list[ServiceClient] = []
+    app.state.decode_clients: list[ServiceClient] = []
+
+    for i, (host, port) in enumerate(global_args.prefiller_instances):
+        app.state.prefill_clients.append(
+            ServiceClient(
+                client=httpx.AsyncClient(
+                    timeout=None,
+                    base_url=f"http://{host}:{port}/v1",
+                ),
+                host=host,
+                port=port,
+                id=i,
+            )
+        )
+
+    for i, (host, port) in enumerate(global_args.decoder_instances):
+        app.state.decode_clients.append(
+            ServiceClient(
+                client=httpx.AsyncClient(
+                    timeout=None,
+                    base_url=f"http://{host}:{port}/v1",
+                ),
+                host=host,
+                port=port,
+                id=i,
+            )
+        )
+
+    app.state.prefill_iter = itertools.cycle(range(len(app.state.prefill_clients)))
+    app.state.decode_iter = itertools.cycle(range(len(app.state.decode_clients)))
+
+    logger.info(
+        "Ready: %d prefill, %d decode instances",
+        len(app.state.prefill_clients),
+        len(app.state.decode_clients),
+    )
+    yield
+
+    for sc in app.state.prefill_clients + app.state.decode_clients:
+        await sc.client.aclose()
+
+
+app = FastAPI(title="Disaggregated P/D Proxy (Multi-turn)", lifespan=lifespan)
+
+
+def _next_client(app_state, role: str) -> ServiceClient:
+    if role == "prefill":
+        return app_state.prefill_clients[next(app_state.prefill_iter)]
+    return app_state.decode_clients[next(app_state.decode_iter)]
+
+
+# Request handler
+async def _handle_request(api_path: str, request: Request):
+    """Core request handler for both /v1/chat/completions and /v1/completions."""
+    req_data = await request.json()
+    request_id = str(uuid.uuid4())
+    conversation_id: str = req_data.pop("conversation_id", "")
+    client_wants_stream = req_data.get("stream", False)
+
+    if not conversation_id:
+        logger.warning(
+            "[%s] No conversation_id provided — KV cache reuse disabled "
+            "for this request. Add a 'conversation_id' field to enable "
+            "cross-turn KV sharing.",
+            request_id,
+        )
+
+    # Step 1: Look up cached D blocks from the previous turn
+    cached_kv = kv_cache.get(conversation_id) if conversation_id else None
+
+    if cached_kv:
+        # Tell P to read D's blocks (bidirectional transfer)
+        cached_kv["do_remote_decode"] = True
+        cached_kv["do_remote_prefill"] = False
+        req_data["kv_transfer_params"] = cached_kv
+        logger.info(
+            "[%s] conv=%s: sending D's cached blocks to P (remote_request_id=%s)",
+            request_id,
+            conversation_id,
+            cached_kv.get("remote_request_id"),
+        )
+    else:
+        # No cached blocks — P recomputes from scratch
+        req_data["kv_transfer_params"] = {
+            "do_remote_decode": True,
+            "do_remote_prefill": False,
+            "remote_engine_id": None,
+            "remote_block_ids": None,
+            "remote_host": None,
+            "remote_port": None,
+        }
+        logger.info("[%s] conv=%s: cache MISS", request_id, conversation_id)
+
+    # Step 2: Send to Prefill node (non-streaming, max_tokens=1)
+    prefill_client = _next_client(request.app.state, "prefill")
+    t0 = time.time()
+    prefill_resp = await _send_to_prefill(
+        prefill_client,
+        api_path,
+        req_data,
+        request_id,
+    )
+    logger.info(
+        "[%s] Prefill done in %.0fms",
+        request_id,
+        (time.time() - t0) * 1000,
+    )
+
+    # Attach P's kv_transfer_params for D to read P's blocks
+    p_kv_params = prefill_resp.get("kv_transfer_params", {})
+    if p_kv_params:
+        p_kv_params["remote_host"] = prefill_client.host
+        req_data["kv_transfer_params"] = p_kv_params
+
+    # Step 3: Stream from Decode node, capturing kv_transfer_params
+    decode_client = _next_client(request.app.state, "decode")
+
+    if client_wants_stream:
+        return StreamingResponse(
+            _stream_from_decode_sse(
+                decode_client,
+                api_path,
+                req_data,
+                request_id,
+                conversation_id,
+            ),
+            media_type="text/event-stream",
+        )
+
+    text, finish_reason, _, resp_id, model, created = await _stream_from_decode(
+        decode_client,
+        api_path,
+        req_data,
+        request_id,
+        conversation_id,
+    )
+
+    # Build OpenAI-compatible response
+    is_chat = "messages" in req_data
+    if is_chat:
+        body = {
+            "id": resp_id,
+            "object": "chat.completion",
+            "created": created or int(time.time()),
+            "model": model or req_data.get("model", ""),
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": text},
+                    "finish_reason": finish_reason,
+                }
+            ],
+            "usage": None,
+        }
+    else:
+        body = {
+            "id": resp_id,
+            "object": "text_completion",
+            "created": created or int(time.time()),
+            "model": model or req_data.get("model", ""),
+            "choices": [
+                {
+                    "index": 0,
+                    "text": text,
+                    "logprobs": None,
+                    "finish_reason": finish_reason,
+                }
+            ],
+            "usage": None,
+        }
+    return JSONResponse(content=body)
+
+
+# Routes
+@app.post("/v1/chat/completions")
+async def chat_completions(request: Request):
+    return await _handle_request("/chat/completions", request)
+
+
+@app.post("/v1/completions")
+async def completions(request: Request):
+    return await _handle_request("/completions", request)
+
+
+@app.get("/health")
+async def health():
+    evicted = kv_cache.evict_stale()
+    return {
+        "status": "ok",
+        "cached_conversations": kv_cache.size,
+        "evicted_stale": evicted,
+    }
+
+
+# CLI
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description="Disaggregated P/D proxy with bidirectional KV transfer",
+    )
+    p.add_argument("--host", default="0.0.0.0")
+    p.add_argument("--port", type=int, default=8000)
+    p.add_argument(
+        "--prefiller-host",
+        "--prefiller-hosts",
+        dest="prefiller_hosts",
+        nargs="+",
+        default=["localhost"],
+    )
+    p.add_argument(
+        "--prefiller-port",
+        "--prefiller-ports",
+        dest="prefiller_ports",
+        type=int,
+        nargs="+",
+        default=[8100],
+    )
+    p.add_argument(
+        "--decoder-host",
+        "--decoder-hosts",
+        dest="decoder_hosts",
+        nargs="+",
+        default=["localhost"],
+    )
+    p.add_argument(
+        "--decoder-port",
+        "--decoder-ports",
+        dest="decoder_ports",
+        type=int,
+        nargs="+",
+        default=[8200],
+    )
+    args = p.parse_args()
+
+    if len(args.prefiller_hosts) != len(args.prefiller_ports):
+        p.error("Number of prefiller hosts must match ports")
+    if len(args.decoder_hosts) != len(args.decoder_ports):
+        p.error("Number of decoder hosts must match ports")
+
+    args.prefiller_instances = list(zip(args.prefiller_hosts, args.prefiller_ports))
+    args.decoder_instances = list(zip(args.decoder_hosts, args.decoder_ports))
+    return args
+
+
+if __name__ == "__main__":
+    global global_args
+    global_args = parse_args()
+
+    import uvicorn
+
+    uvicorn.run(app, host=global_args.host, port=global_args.port)
diff --git a/examples/disaggregated/disaggregated_serving/example_mm_serve.py b/examples/disaggregated/disaggregated_serving/example_mm_serve.py
new file mode 100644
index 000000000000..11d81236c577
--- /dev/null
+++ b/examples/disaggregated/disaggregated_serving/example_mm_serve.py
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Disaggregated multimodal serving: render → generate round-trip.
+
+Demonstrates the two-phase disaggregated flow:
+  1. /v1/chat/completions/render  – preprocesses a multimodal chat request
+     into token IDs and serialized tensor features.
+  2. /inference/v1/generate       – runs inference on the preprocessed tokens.
+
+The render response is passed *directly* to generate with only
+``sampling_params`` added, showing that the two endpoints compose with
+zero client-side transformation.
+
+Launch the server first:
+
+    vllm serve Qwen/Qwen3-VL-2B-Instruct \
+        --dtype bfloat16 --max-model-len 4096 --enforce-eager
+
+Then run this script:
+
+    python example_mm_serve.py
+"""
+
+import io
+
+import pybase64 as base64
+import requests
+from PIL import Image
+from transformers import AutoTokenizer
+
+BASE_URL = "http://localhost:8000"
+MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
+
+
+def make_data_url(image: Image.Image) -> str:
+    """Encode a PIL image as a base64 data URL."""
+    buf = io.BytesIO()
+    image.save(buf, format="PNG")
+    b64 = base64.b64encode(buf.getvalue()).decode()
+    return f"data:image/png;base64,{b64}"
+
+
+def main():
+    # -- Step 1: Create a test image (solid red) -------------------------
+    image = Image.new("RGB", (224, 224), color=(255, 0, 0))
+    data_url = make_data_url(image)
+    print("Created 224x224 red test image")
+
+    # -- Step 2: Render (preprocess) -------------------------------------
+    render_payload = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {
+                        "type": "text",
+                        "text": "What color is this image? Answer in one word.",
+                    },
+                ],
+            }
+        ],
+    }
+
+    print("\n--- Render ---")
+    render_resp = requests.post(
+        f"{BASE_URL}/v1/chat/completions/render", json=render_payload
+    )
+    render_resp.raise_for_status()
+    render_data = render_resp.json()
+
+    print(f"Response keys: {list(render_data.keys())}")
+    print(f"Number of token_ids: {len(render_data['token_ids'])}")
+
+    features = render_data.get("features")
+    if features and features.get("kwargs_data"):
+        print(f"kwargs_data modalities: {list(features['kwargs_data'].keys())}")
+        for modality, items in features["kwargs_data"].items():
+            print(
+                f"  {modality}: {len(items)} item(s), "
+                f"first item type: {type(items[0])} length: {len(items[0])}"
+                if items
+                else "First item: (empty)"
+            )
+    else:
+        print("WARNING: no kwargs_data in render response")
+
+    # -- Step 3: Generate (inference) ------------------------------------
+    # Pass the render output directly — only add sampling_params.
+    generate_payload = render_data
+    generate_payload["sampling_params"] = {
+        "max_tokens": 20,
+        "temperature": 0.0,
+    }
+
+    print("\n--- Generate ---")
+    gen_resp = requests.post(f"{BASE_URL}/inference/v1/generate", json=generate_payload)
+    gen_resp.raise_for_status()
+    gen_data = gen_resp.json()
+
+    # -- Step 4: Decode & print ------------------------------------------
+    output_ids = gen_data["choices"][0]["token_ids"]
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    text = tokenizer.decode(output_ids, skip_special_tokens=True)
+
+    print(f"Output token count: {len(output_ids)}")
+    print(f"Generated text: {text!r}")
+
+    if "red" in text.lower():
+        print("\nModel correctly identified the red image.")
+    else:
+        print(f"\nWARNING: Expected 'red' in output, got: {text!r}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/online_serving/disaggregated_serving/kv_events.sh b/examples/disaggregated/disaggregated_serving/kv_events.sh
similarity index 100%
rename from examples/online_serving/disaggregated_serving/kv_events.sh
rename to examples/disaggregated/disaggregated_serving/kv_events.sh
diff --git a/examples/disaggregated/disaggregated_serving/moriio_toy_proxy_server.py b/examples/disaggregated/disaggregated_serving/moriio_toy_proxy_server.py
new file mode 100644
index 000000000000..aceb7a9b81c3
--- /dev/null
+++ b/examples/disaggregated/disaggregated_serving/moriio_toy_proxy_server.py
@@ -0,0 +1,418 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+import asyncio
+import copy
+import logging
+import os
+import socket
+import threading
+import uuid
+from urllib.parse import urlparse
+
+import aiohttp
+import msgpack
+import zmq
+from quart import Quart, Request, make_response, request
+
+from vllm.distributed.kv_transfer.kv_connector.v1.moriio.moriio_common import (
+    MoRIIOConstants,
+)
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+prefill_instances: list[dict] = []
+decode_instances: list[dict] = []
+request_nums = 0
+app = Quart(__name__)
+
+
+TRANSFER_TYPE = None
+
+
+_list_lock = threading.RLock()
+
+
+def _listen_for_register(hostname, port):
+    context = zmq.Context()
+    router_socket = context.socket(zmq.ROUTER)
+    router_socket.bind(f"tcp://{hostname}:{port}")
+    poller = zmq.Poller()
+    poller.register(router_socket, zmq.POLLIN)
+    global prefill_instances
+    global decode_instances
+
+    while True:
+        socks = dict(poller.poll())
+        if router_socket in socks:
+            remote_addr, msg = router_socket.recv_multipart()
+            data = msgpack.loads(msg)
+            if data.get("type") == "HELLO":
+                pass
+            elif data.get("type") in ("P", "D"):
+                role = data["type"]
+                required_keys = {
+                    "http_address",
+                    "zmq_address",
+                    "dp_size",
+                    "tp_size",
+                    "transfer_mode",
+                }
+                missing = required_keys - data.keys()
+                if missing:
+                    logger.error(
+                        "Registration message missing required keys %s; skipping",
+                        missing,
+                    )
+                    continue
+                # Derive request_address from http_address
+                # api path suffix is appended at request time
+                instance = {
+                    "role": role,
+                    "request_address": f"http://{data['http_address']}/v1",
+                    "http_address": data["http_address"],
+                    "zmq_address": data["zmq_address"],
+                    "dp_size": data["dp_size"],
+                    "tp_size": data["tp_size"],
+                    "transfer_mode": data["transfer_mode"],
+                }
+                # zmq_address format: "host:IP,handshake:PORT,notify:PORT"
+                # Stored verbatim; embedded into the request_id by handle_request.
+
+                global TRANSFER_TYPE
+                transfer_mode = instance["transfer_mode"]
+                target_list = prefill_instances if role == "P" else decode_instances
+                with _list_lock:
+                    if TRANSFER_TYPE is None:
+                        TRANSFER_TYPE = transfer_mode
+                        logger.info("SET TRANSFER TYPE TO %s", TRANSFER_TYPE)
+                    elif transfer_mode != TRANSFER_TYPE:
+                        logger.error(
+                            "Mismatched transfer mode: expected %s, got %s;"
+                            " skipping registration of %s",
+                            TRANSFER_TYPE,
+                            transfer_mode,
+                            data["http_address"],
+                        )
+                        continue
+                    existing_idx = next(
+                        (
+                            idx
+                            for idx, i in enumerate(target_list)
+                            if i.get("http_address") == data["http_address"]
+                        ),
+                        None,
+                    )
+                    if existing_idx is not None:
+                        target_list[existing_idx] = instance
+                        logger.info(
+                            "Updated existing %s instance: %s",
+                            "Prefill" if role == "P" else "Decode",
+                            instance,
+                        )
+                    else:
+                        target_list.append(instance)
+                        logger.info(
+                            "Registered %s instance: %s",
+                            "Prefill" if role == "P" else "Decode",
+                            instance,
+                        )
+            else:
+                logger.warning(
+                    "Received message with unrecognized type %r; ignoring",
+                    data.get("type"),
+                )
+
+
+def start_service_discovery(hostname, port):
+    if not hostname:
+        hostname = socket.gethostname()
+    if port == 0:
+        raise ValueError("Port cannot be 0")
+
+    _listener_thread = threading.Thread(
+        target=_listen_for_register, args=(hostname, port), daemon=True
+    )
+    _listener_thread.start()
+    return _listener_thread
+
+
+async def send_request_to_prefill(
+    endpoint, req_data, request_id, selected_prefill_dp_rank
+):
+    req_data_copy = req_data
+
+    req_data_copy["kv_transfer_params"].update(
+        {
+            "do_remote_decode": True,
+            "do_remote_prefill": False,
+            "remote_engine_id": None,
+            "remote_block_ids": None,
+        }
+    )
+    req_data_copy["stream"] = False
+    req_data_copy["max_tokens"] = 1
+    if "max_completion_tokens" in req_data_copy:
+        req_data_copy["max_completion_tokens"] = 1
+    if "stream_options" in req_data_copy:
+        del req_data_copy["stream_options"]
+    async with aiohttp.ClientSession(
+        timeout=aiohttp.ClientTimeout(total=6 * 6000 * 6000)
+    ) as session:
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+            "X-Request-Id": request_id,
+        }
+        if selected_prefill_dp_rank is not None:
+            headers["X-data-parallel-rank"] = str(selected_prefill_dp_rank)
+        async with session.post(
+            url=endpoint, json=req_data_copy, headers=headers
+        ) as response:
+            if response.status == 200:
+                return await response.json()
+
+            else:
+                error_message = (
+                    f"send_request_to_prefill response ={response},"
+                    f"reason={response.reason}, status={response.status},"
+                    f"method={response.method}, url={response.url},"
+                    f"real_url={response.real_url}"
+                )
+                raise RuntimeError(error_message)
+
+
+async def start_decode_request(endpoint, req_data, request_id):
+    session = aiohttp.ClientSession(
+        timeout=aiohttp.ClientTimeout(total=6 * 6000 * 6000)
+    )
+    headers = {
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+        "X-Request-Id": request_id,
+    }
+    response = await session.post(url=endpoint, json=req_data, headers=headers)
+    return session, response
+
+
+async def stream_decode_response(session, response, request_id):
+    try:
+        if response.status == 200:
+            async for chunk_bytes in response.content.iter_chunked(1024):
+                yield chunk_bytes
+        else:
+            error_message = (
+                f"stream_decode_response response ={response},"
+                f"reason={response.reason}, status={response.status},"
+                f"method={response.method}, url={response.url},"
+                f"real_url={response.real_url}"
+            )
+            raise RuntimeError(error_message)
+    finally:
+        await session.close()
+
+
+def example_round_robin_dp_loader(request_number, dp_size):
+    return request_nums % dp_size
+
+
+@app.route("/v1/completions", methods=["POST"])
+async def handle_completions_request():
+    return await handle_request("/completions", request)
+
+
+@app.route("/v1/chat/completions", methods=["POST"])
+async def handle_chat_completions_request():
+    return await handle_request("/chat/completions", request)
+
+
+async def handle_request(api: str, request: Request):
+    try:
+        with _list_lock:
+            global request_nums
+            request_nums += 1
+
+        req_data = await request.get_json()
+
+        prefill_instance_endpoint = None
+        decode_instance_endpoint = None
+        error_msg = (
+            "Service Unavailable: No prefill or decode instances are registered."
+        )
+        if not prefill_instances or not decode_instances:
+            return await make_response(
+                (
+                    error_msg,
+                    503,
+                )
+            )
+        pid = request_nums % len(prefill_instances)
+        did = request_nums % len(decode_instances)
+        prefill_instance_endpoint = prefill_instances[pid]
+        decode_instance_endpoint = decode_instances[did]
+
+        selected_prefill_dp_rank = None
+        if prefill_instance_endpoint["dp_size"] > 1:
+            selected_prefill_dp_rank = example_round_robin_dp_loader(
+                request_nums // len(prefill_instance_endpoint),
+                prefill_instance_endpoint["dp_size"],
+            )
+
+        # Embed both zmq_addresses in the request_id so the connector can parse
+        # the peer's host/ports from it, similar to P2P-NCCL
+        uid = str(uuid.uuid4()).replace("-", "")
+        request_id = (
+            f"___prefill_addr_{prefill_instance_endpoint['zmq_address']}"
+            f"___decode_addr_{decode_instance_endpoint['zmq_address']}"
+            f"_{uid}"
+        )
+
+        transfer_id = f"{MoRIIOConstants.TRANSFER_PREFIX}-{str(uuid.uuid4())}"
+
+        req_data_to_prefill = copy.deepcopy(req_data)
+        req_data_to_prefill["kv_transfer_params"] = {}
+        req_data["kv_transfer_params"] = {}
+        req_data_to_prefill["kv_transfer_params"]["remote_dp_size"] = (
+            decode_instance_endpoint["dp_size"]
+        )
+        req_data_to_prefill["kv_transfer_params"]["remote_tp_size"] = (
+            decode_instance_endpoint["tp_size"]
+        )
+        req_data_to_prefill["kv_transfer_params"]["transfer_id"] = transfer_id
+
+        prefill_request_url = prefill_instance_endpoint["request_address"] + api
+        send_prefill_task = asyncio.create_task(
+            send_request_to_prefill(
+                prefill_request_url,
+                req_data_to_prefill,
+                request_id,
+                selected_prefill_dp_rank,
+            )
+        )
+
+        req_data["max_tokens"] -= 1
+
+        req_data["kv_transfer_params"] = {
+            "do_remote_decode": False,
+            "do_remote_prefill": True,
+            "remote_engine_id": None,
+            "remote_block_ids": None,
+            "transfer_id": transfer_id,
+        }
+        if TRANSFER_TYPE == "READ":
+            # In read mode, prefill and decode are executed serially.
+            prefill_response = await send_prefill_task
+            prefill_kv = prefill_response["kv_transfer_params"]
+            req_data["kv_transfer_params"]["remote_engine_id"] = prefill_kv[
+                "remote_engine_id"
+            ]
+            req_data["kv_transfer_params"]["remote_block_ids"] = prefill_kv[
+                "remote_block_ids"
+            ]
+            req_data["kv_transfer_params"]["transfer_id"] = prefill_kv["transfer_id"]
+
+        req_data["kv_transfer_params"]["remote_dp_size"] = prefill_instance_endpoint[
+            "dp_size"
+        ]
+        req_data["kv_transfer_params"]["remote_tp_size"] = prefill_instance_endpoint[
+            "tp_size"
+        ]
+
+        if selected_prefill_dp_rank is not None:
+            req_data["kv_transfer_params"]["remote_dp_rank"] = selected_prefill_dp_rank
+
+        decode_request_url = decode_instance_endpoint["request_address"] + api
+        decode_request_task = asyncio.create_task(
+            start_decode_request(decode_request_url, req_data, request_id)
+        )
+
+        session, decode_response = await decode_request_task
+        stream_generator = stream_decode_response(session, decode_response, request_id)
+        response = await make_response(stream_generator)
+        return response
+    except Exception as e:
+        logger.exception("An error occurred while handling the request: %s", e)
+        return await make_response(
+            (
+                f"Internal Server Error: {e!s}",
+                500,
+            )
+        )
+
+
+async def send_profile_cmd(req_data: dict, profiler_cmd: str):
+    assert profiler_cmd in {"start", "stop"}
+
+    with _list_lock:
+        p_instances = list(prefill_instances)
+        d_instances = list(decode_instances)
+
+    if not p_instances and not d_instances:
+        raise RuntimeError(
+            "Service Unavailable: No prefill or decode instances are registered."
+        )
+
+    headers = {
+        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
+    }
+
+    tasks = []
+
+    async with aiohttp.ClientSession(
+        timeout=aiohttp.ClientTimeout(total=60)
+    ) as session:
+        for instances in (p_instances, d_instances):
+            for inst in instances:
+                _p = urlparse(inst["request_address"])
+                url = f"http://{_p.hostname}:{_p.port}/{profiler_cmd}_profile"
+
+                tasks.append(
+                    session.post(
+                        url,
+                        json=req_data,
+                        headers=headers,
+                    )
+                )
+
+        responses = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for r in responses:
+            if isinstance(r, Exception):
+                raise r
+            if r.status >= 400:
+                msg = await r.text()
+                raise RuntimeError(f"{profiler_cmd}_profile failed: {r.status}, {msg}")
+
+        return await responses[0].json()
+
+
+@app.post("/start_profile")
+async def start_profile():
+    try:
+        req_data = await request.get_json()
+        return await send_profile_cmd(req_data, "start")
+    except Exception as e:
+        logger.exception("start_profile failed: %s", e)
+        return await make_response((str(e), 500))
+
+
+@app.post("/stop_profile")
+async def stop_profile():
+    try:
+        req_data = await request.get_json()
+        return await send_profile_cmd(req_data, "stop")
+    except Exception as e:
+        logger.exception("stop_profile failed: %s", e)
+        return await make_response((str(e), 500))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=10001)
+    args = parser.parse_args()
+
+    t = start_service_discovery("0.0.0.0", 36367)
+    app.debug = True
+    app.config["BODY_TIMEOUT"] = 360000
+    app.config["RESPONSE_TIMEOUT"] = 360000
+
+    app.run(host="0.0.0.0", port=args.port)
+    t.join()
diff --git a/examples/online_serving/ec_both_encoder/ec_both_encoder.sh b/examples/disaggregated/ec_both_encoder/ec_both_encoder.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/online_serving/ec_both_encoder/ec_both_encoder.sh
rename to examples/disaggregated/ec_both_encoder/ec_both_encoder.sh
diff --git a/examples/offline_inference/disaggregated-prefill-v1/README.md b/examples/disaggregated/example_connector/README.md
similarity index 81%
rename from examples/offline_inference/disaggregated-prefill-v1/README.md
rename to examples/disaggregated/example_connector/README.md
index abf6883f8d3e..43f16223896c 100644
--- a/examples/offline_inference/disaggregated-prefill-v1/README.md
+++ b/examples/disaggregated/example_connector/README.md
@@ -5,6 +5,6 @@ This example contains scripts that demonstrate disaggregated prefill in the offl
 ## Files
 
 - `run.sh` - A helper script that will run `prefill_example.py` and `decode_example.py` sequentially.
-    - Make sure you are in the `examples/offline_inference/disaggregated-prefill-v1` directory before running `run.sh`.
+    - Make sure you are in the `examples/disaggregated/example_connector` directory before running `run.sh`.
 - `prefill_example.py` - A script which performs prefill only, saving the KV state to the `local_storage` directory and the prompts to `output.txt`.
 - `decode_example.py` - A script which performs decode only, loading the KV state from the `local_storage` directory and the prompts from `output.txt`.
diff --git a/examples/offline_inference/disaggregated-prefill-v1/decode_example.py b/examples/disaggregated/example_connector/decode_example.py
similarity index 100%
rename from examples/offline_inference/disaggregated-prefill-v1/decode_example.py
rename to examples/disaggregated/example_connector/decode_example.py
diff --git a/examples/offline_inference/disaggregated-prefill-v1/prefill_example.py b/examples/disaggregated/example_connector/prefill_example.py
similarity index 100%
rename from examples/offline_inference/disaggregated-prefill-v1/prefill_example.py
rename to examples/disaggregated/example_connector/prefill_example.py
diff --git a/examples/offline_inference/disaggregated-prefill-v1/run.sh b/examples/disaggregated/example_connector/run.sh
similarity index 100%
rename from examples/offline_inference/disaggregated-prefill-v1/run.sh
rename to examples/disaggregated/example_connector/run.sh
diff --git a/examples/offline_inference/prefix_caching_flexkv.py b/examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py
similarity index 98%
rename from examples/offline_inference/prefix_caching_flexkv.py
rename to examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py
index f2ffb75ef845..b67c2459319f 100644
--- a/examples/offline_inference/prefix_caching_flexkv.py
+++ b/examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py
@@ -14,7 +14,7 @@
 
 Usage:
     1. Run this script:
-       python examples/offline_inference/prefix_caching_flexkv.py \
+       python examples/disaggregated/flexkv_connector/prefix_caching_flexkv.py \
            --model /path/to/your/model
 
     2. Arguments:
diff --git a/examples/offline_inference/kv_load_failure_recovery/README.md b/examples/disaggregated/kv_load_failure_recovery_offline/README.md
similarity index 91%
rename from examples/offline_inference/kv_load_failure_recovery/README.md
rename to examples/disaggregated/kv_load_failure_recovery_offline/README.md
index 176141b5de4a..7205b2135ebb 100644
--- a/examples/offline_inference/kv_load_failure_recovery/README.md
+++ b/examples/disaggregated/kv_load_failure_recovery_offline/README.md
@@ -1,12 +1,12 @@
 # KV Load Failure Recovery Test
 
-This example builds upon the `disaggregated-prefill-v1` example in `examples/offline_inference`.
+This example builds upon the `example_connector` example in `examples/disaggregated`.
 
 It demonstrates vLLM's ability to recover from KV load failures in both synchronous and asynchronous loading modes. The goal is to verify that vLLM correctly identifies invalid KV blocks, reschedules the affected requests, and ensures successful and consistent output.
 
 ## Files
 
-- `prefill_example.py` – performs the prefill stage and saves KV data (same as in `disaggregated-prefill-v1`).
+- `prefill_example.py` – performs the prefill stage and saves KV data (same as in `example_connector`).
 - `decode_example.py` – performs the decode stage. Accepts:
     - `--simulate-failure`: simulates KV load failure using a custom connector.
     - `--async-load`: enables asynchronous KV loading mode.
diff --git a/examples/offline_inference/kv_load_failure_recovery/decode_example.py b/examples/disaggregated/kv_load_failure_recovery_offline/decode_example.py
similarity index 100%
rename from examples/offline_inference/kv_load_failure_recovery/decode_example.py
rename to examples/disaggregated/kv_load_failure_recovery_offline/decode_example.py
diff --git a/examples/offline_inference/kv_load_failure_recovery/load_recovery_example_connector.py b/examples/disaggregated/kv_load_failure_recovery_offline/load_recovery_example_connector.py
similarity index 93%
rename from examples/offline_inference/kv_load_failure_recovery/load_recovery_example_connector.py
rename to examples/disaggregated/kv_load_failure_recovery_offline/load_recovery_example_connector.py
index 7aab07f8a2c3..5bae04426746 100644
--- a/examples/offline_inference/kv_load_failure_recovery/load_recovery_example_connector.py
+++ b/examples/disaggregated/kv_load_failure_recovery_offline/load_recovery_example_connector.py
@@ -20,6 +20,7 @@
 
 if TYPE_CHECKING:
     from vllm.v1.core.sched.output import SchedulerOutput
+    from vllm.v1.kv_cache_interface import KVCacheConfig
 
 logger = logging.getLogger()
 logging.basicConfig(level=logging.INFO)
@@ -35,8 +36,17 @@ def from_base(cls, base: ExampleConnectorMetadata):
 
 
 class LoadRecoveryExampleConnector(ExampleConnector):
-    def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
-        super().__init__(vllm_config=vllm_config, role=role)
+    def __init__(
+        self,
+        vllm_config: "VllmConfig",
+        role: KVConnectorRole,
+        kv_cache_config: "KVCacheConfig",
+    ):
+        super().__init__(
+            vllm_config=vllm_config,
+            role=role,
+            kv_cache_config=kv_cache_config,
+        )
         self._async_load = vllm_config.kv_transfer_config.get_from_extra_config(
             "async_load", False
         )
diff --git a/examples/offline_inference/kv_load_failure_recovery/prefill_example.py b/examples/disaggregated/kv_load_failure_recovery_offline/prefill_example.py
similarity index 100%
rename from examples/offline_inference/kv_load_failure_recovery/prefill_example.py
rename to examples/disaggregated/kv_load_failure_recovery_offline/prefill_example.py
diff --git a/examples/offline_inference/kv_load_failure_recovery/run.sh b/examples/disaggregated/kv_load_failure_recovery_offline/run.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/offline_inference/kv_load_failure_recovery/run.sh
rename to examples/disaggregated/kv_load_failure_recovery_offline/run.sh
diff --git a/examples/others/lmcache/README.md b/examples/disaggregated/lmcache/README.md
similarity index 100%
rename from examples/others/lmcache/README.md
rename to examples/disaggregated/lmcache/README.md
diff --git a/examples/others/lmcache/cpu_offload_lmcache.py b/examples/disaggregated/lmcache/cpu_offload_lmcache.py
similarity index 100%
rename from examples/others/lmcache/cpu_offload_lmcache.py
rename to examples/disaggregated/lmcache/cpu_offload_lmcache.py
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v0.py b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v0.py
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v0.py
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v0.py
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
similarity index 100%
rename from examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
rename to examples/disaggregated/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
diff --git a/examples/others/lmcache/kv_cache_sharing_lmcache_v1.py b/examples/disaggregated/lmcache/kv_cache_sharing_lmcache_v1.py
similarity index 100%
rename from examples/others/lmcache/kv_cache_sharing_lmcache_v1.py
rename to examples/disaggregated/lmcache/kv_cache_sharing_lmcache_v1.py
diff --git a/examples/online_serving/disaggregated_serving/mooncake_connector/mooncake_connector_proxy.py b/examples/disaggregated/mooncake_connector/mooncake_connector_proxy.py
similarity index 100%
rename from examples/online_serving/disaggregated_serving/mooncake_connector/mooncake_connector_proxy.py
rename to examples/disaggregated/mooncake_connector/mooncake_connector_proxy.py
diff --git a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh b/examples/disaggregated/mooncake_connector/run_mooncake_connector.sh
similarity index 100%
rename from examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
rename to examples/disaggregated/mooncake_connector/run_mooncake_connector.sh
diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh b/examples/disaggregated/p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
similarity index 100%
rename from examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
rename to examples/disaggregated/p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py b/examples/disaggregated/p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
similarity index 100%
rename from examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
rename to examples/disaggregated/p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py
diff --git a/examples/offline_inference/automatic_prefix_caching.py b/examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py
similarity index 98%
rename from examples/offline_inference/automatic_prefix_caching.py
rename to examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py
index 2d3c28d9dd4f..801b4b769792 100644
--- a/examples/offline_inference/automatic_prefix_caching.py
+++ b/examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py
@@ -15,7 +15,7 @@
 but ask different questions.
 
 Run:
-python examples/offline_inference/automatic_prefix_caching.py
+python examples/features/automatic_prefix_caching/automatic_prefix_caching_offline.py
 """
 
 import time
diff --git a/examples/offline_inference/prefix_caching.py b/examples/features/automatic_prefix_caching/prefix_caching_offline.py
similarity index 100%
rename from examples/offline_inference/prefix_caching.py
rename to examples/features/automatic_prefix_caching/prefix_caching_offline.py
diff --git a/examples/offline_inference/reproducibility.py b/examples/features/batch_invariance/reproducibility_offline.py
similarity index 100%
rename from examples/offline_inference/reproducibility.py
rename to examples/features/batch_invariance/reproducibility_offline.py
diff --git a/examples/offline_inference/context_extension.py b/examples/features/context_extension/context_extension_offline.py
similarity index 96%
rename from examples/offline_inference/context_extension.py
rename to examples/features/context_extension/context_extension_offline.py
index fae8590f914e..3874288b5e11 100644
--- a/examples/offline_inference/context_extension.py
+++ b/examples/features/context_extension/context_extension_offline.py
@@ -6,7 +6,7 @@
 and run a simple chat example.
 
 Usage:
-    python examples/offline_inference/context_extension.py
+    python examples/features/context_extension/context_extension_offline.py
 """
 
 from vllm import LLM, RequestOutput, SamplingParams
diff --git a/examples/offline_inference/data_parallel.py b/examples/features/data_parallel/data_parallel_offline.py
similarity index 96%
rename from examples/offline_inference/data_parallel.py
rename to examples/features/data_parallel/data_parallel_offline.py
index 287409fa2b5c..c38ff7297afc 100644
--- a/examples/offline_inference/data_parallel.py
+++ b/examples/features/data_parallel/data_parallel_offline.py
@@ -3,14 +3,14 @@
 """
 Usage:
 Single node:
-    python examples/offline_inference/data_parallel.py \
+    python examples/features/data_parallel/data_parallel_offline.py \
             --model="ibm-research/PowerMoE-3b" \
             -dp=2 \
             -tp=2
 
 Multi-node:
     Node 0 (assume the node has ip of 10.99.48.128):
-            python examples/offline_inference/data_parallel.py \
+            python examples/features/data_parallel/data_parallel_offline.py \
                     --model="ibm-research/PowerMoE-3b" \
                     -dp=2 \
                     -tp=2 \
@@ -19,7 +19,7 @@
                     --dp-master-addr=10.99.48.128 \
                     --dp-master-port=13345
     Node 1:
-            python examples/offline_inference/data_parallel.py \
+            python examples/features/data_parallel/data_parallel_offline.py \
                     --model="ibm-research/PowerMoE-3b" \
                     -dp=2 \
                     -tp=2 \
diff --git a/examples/online_serving/multi_instance_data_parallel.py b/examples/features/data_parallel/multi_instance_data_parallel.py
similarity index 97%
rename from examples/online_serving/multi_instance_data_parallel.py
rename to examples/features/data_parallel/multi_instance_data_parallel.py
index 04d21e048940..66fcd3d24644 100644
--- a/examples/online_serving/multi_instance_data_parallel.py
+++ b/examples/features/data_parallel/multi_instance_data_parallel.py
@@ -12,7 +12,7 @@
 """
 To run this example, run the following commands simultaneously with
 different CUDA_VISIBLE_DEVICES:
-    python examples/online_serving/multi_instance_data_parallel.py
+    python examples/features/data_parallel/multi_instance_data_parallel.py
 
     vllm serve ibm-research/PowerMoE-3b -dp 2 -dpr 1 \
         --data-parallel-address 127.0.0.1 --data-parallel-rpc-port 62300 \
diff --git a/examples/online_serving/kv_events_subscriber.py b/examples/features/kv_events/kv_events_subscriber.py
similarity index 98%
rename from examples/online_serving/kv_events_subscriber.py
rename to examples/features/kv_events/kv_events_subscriber.py
index 499ab1f39466..0512297fcf4f 100644
--- a/examples/online_serving/kv_events_subscriber.py
+++ b/examples/features/kv_events/kv_events_subscriber.py
@@ -43,10 +43,13 @@ class BlockStored(KVCacheEvent):
     prompt embeddings data, etc. for that specific block.
     """
 
+    group_idx: int | None = None
+
 
 class BlockRemoved(KVCacheEvent):
     block_hashes: list[ExternalBlockHash]
     medium: str | None
+    group_idx: int | None = None
 
 
 class AllBlocksCleared(KVCacheEvent):
diff --git a/examples/others/logging_configuration.md b/examples/features/logging_configuration.md
similarity index 100%
rename from examples/others/logging_configuration.md
rename to examples/features/logging_configuration.md
diff --git a/examples/offline_inference/logits_processor/README.md b/examples/features/logits_processor/README.md
similarity index 90%
rename from examples/offline_inference/logits_processor/README.md
rename to examples/features/logits_processor/README.md
index 6b6e16942f85..07ca07dc71ed 100644
--- a/examples/offline_inference/logits_processor/README.md
+++ b/examples/features/logits_processor/README.md
@@ -9,7 +9,7 @@ This directory contains examples demonstrating how to use custom logits processo
 Demonstrates how to instantiate vLLM with a custom logits processor class that operates at the batch level. The example uses a `DummyLogitsProcessor` that masks out all tokens except a specified `target_token` when passed via `SamplingParams.extra_args`.
 
 ```bash
-python examples/offline_inference/logits_processor/custom.py
+python examples/features/logits_processor/custom.py
 ```
 
 ### `custom_req.py` — Request-level logits processor wrapper
@@ -17,7 +17,7 @@ python examples/offline_inference/logits_processor/custom.py
 Shows how to wrap a request-level logits processor (which operates on individual requests) to be compatible with vLLM's batch-level logits processing interface.
 
 ```bash
-python examples/offline_inference/logits_processor/custom_req.py
+python examples/features/logits_processor/custom_req.py
 ```
 
 ### `custom_req_init.py` — Request-level processor with engine config
@@ -25,7 +25,7 @@ python examples/offline_inference/logits_processor/custom_req.py
 A special case of wrapping a request-level logits processor where the processor needs access to engine configuration or model metadata during initialization (e.g., vocabulary size, tokenizer info).
 
 ```bash
-python examples/offline_inference/logits_processor/custom_req_init.py
+python examples/features/logits_processor/custom_req_init.py
 ```
 
 ## Key Concepts
diff --git a/examples/offline_inference/logits_processor/custom.py b/examples/features/logits_processor/custom.py
similarity index 100%
rename from examples/offline_inference/logits_processor/custom.py
rename to examples/features/logits_processor/custom.py
diff --git a/examples/offline_inference/logits_processor/custom_req.py b/examples/features/logits_processor/custom_req.py
similarity index 100%
rename from examples/offline_inference/logits_processor/custom_req.py
rename to examples/features/logits_processor/custom_req.py
diff --git a/examples/offline_inference/logits_processor/custom_req_init.py b/examples/features/logits_processor/custom_req_init.py
similarity index 100%
rename from examples/offline_inference/logits_processor/custom_req_init.py
rename to examples/features/logits_processor/custom_req_init.py
diff --git a/examples/offline_inference/lora_with_quantization_inference.py b/examples/features/lora/lora_with_quantization_offline.py
similarity index 100%
rename from examples/offline_inference/lora_with_quantization_inference.py
rename to examples/features/lora/lora_with_quantization_offline.py
diff --git a/examples/offline_inference/multilora_inference.py b/examples/features/lora/multilora_offline.py
similarity index 100%
rename from examples/offline_inference/multilora_inference.py
rename to examples/features/lora/multilora_offline.py
diff --git a/examples/offline_inference/openai_batch/README.md b/examples/features/openai_batch/README.md
similarity index 93%
rename from examples/offline_inference/openai_batch/README.md
rename to examples/features/openai_batch/README.md
index ef4e438d6b72..d010e04e10f9 100644
--- a/examples/offline_inference/openai_batch/README.md
+++ b/examples/features/openai_batch/README.md
@@ -8,7 +8,7 @@ This is a guide to performing batch inference using the OpenAI batch file format
 
 The OpenAI batch file format consists of a series of json objects on new lines.
 
-[See here for an example file.](https://github.com/vllm-project/vllm/blob/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl)
+[See here for an example file.](https://github.com/vllm-project/vllm/blob/main/examples/features/openai_batch/openai_example_batch.jsonl)
 
 Each line represents a separate request. See the [OpenAI package reference](https://platform.openai.com/docs/api-reference/batch/requestInput) for more details.
 
@@ -30,13 +30,13 @@ We currently support `/v1/chat/completions`, `/v1/embeddings`, and `/v1/score` e
 To follow along with this example, you can download the example batch, or create your own batch file in your working directory.
 
 ```bash
-wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl
+wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl
 ```
 
 Once you've created your batch file it should look like this
 
 ```bash
-cat offline_inference/openai_batch/openai_example_batch.jsonl
+cat features/openai_batch/openai_example_batch.jsonl
 {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 ```
@@ -49,7 +49,7 @@ You can run the batch with the following command, which will write its results t
 
 ```bash
 python -m vllm.entrypoints.openai.run_batch \
-    -i offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
@@ -58,7 +58,7 @@ or use command-line:
 
 ```bash
 vllm run-batch \
-    -i offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
@@ -77,11 +77,11 @@ cat results.jsonl
 
 The batch runner supports remote input and output urls that are accessible via http/https.
 
-For example, to run against our example input file located at `https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl`, you can run
+For example, to run against our example input file located at `https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl`, you can run
 
 ```bash
 python -m vllm.entrypoints.openai.run_batch \
-    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
@@ -90,7 +90,7 @@ or use command-line:
 
 ```bash
 vllm run-batch \
-    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl \
+    -i https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl \
     -o results.jsonl \
     --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
@@ -113,13 +113,13 @@ To integrate with cloud blob storage, we recommend using presigned urls.
 To follow along with this example, you can download the example batch, or create your own batch file in your working directory.
 
 ```bash
-wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/offline_inference/openai_batch/openai_example_batch.jsonl
+wget https://raw.githubusercontent.com/vllm-project/vllm/main/examples/features/openai_batch/openai_example_batch.jsonl
 ```
 
 Once you've created your batch file it should look like this
 
 ```bash
-cat offline_inference/openai_batch/openai_example_batch.jsonl
+cat features/openai_batch/openai_example_batch.jsonl
 {"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 {"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_completion_tokens": 1000}}
 ```
@@ -127,7 +127,7 @@ cat offline_inference/openai_batch/openai_example_batch.jsonl
 Now upload your batch file to your S3 bucket.
 
 ```bash
-aws s3 cp offline_inference/openai_batch/openai_example_batch.jsonl s3://MY_BUCKET/MY_INPUT_FILE.jsonl
+aws s3 cp features/openai_batch/openai_example_batch.jsonl s3://MY_BUCKET/MY_INPUT_FILE.jsonl
 ```
 
 ### Step 2: Generate your presigned urls
@@ -193,7 +193,7 @@ You can now run the batch runner, using the urls generated in the previous secti
 python -m vllm.entrypoints.openai.run_batch \
     -i "https://s3.us-west-2.amazonaws.com/MY_BUCKET/MY_INPUT_FILE.jsonl?AWSAccessKeyId=ABCDEFGHIJKLMNOPQRST&Signature=abcdefghijklmnopqrstuvwxyz12345&Expires=1715800091" \
     -o "https://s3.us-west-2.amazonaws.com/MY_BUCKET/MY_OUTPUT_FILE.jsonl?AWSAccessKeyId=ABCDEFGHIJKLMNOPQRST&Signature=abcdefghijklmnopqrstuvwxyz12345&Expires=1715800091" \
-    --model --model meta-llama/Meta-Llama-3-8B-Instruct
+    --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
 
 or use command-line:
@@ -202,7 +202,7 @@ or use command-line:
 vllm run-batch \
     -i "https://s3.us-west-2.amazonaws.com/MY_BUCKET/MY_INPUT_FILE.jsonl?AWSAccessKeyId=ABCDEFGHIJKLMNOPQRST&Signature=abcdefghijklmnopqrstuvwxyz12345&Expires=1715800091" \
     -o "https://s3.us-west-2.amazonaws.com/MY_BUCKET/MY_OUTPUT_FILE.jsonl?AWSAccessKeyId=ABCDEFGHIJKLMNOPQRST&Signature=abcdefghijklmnopqrstuvwxyz12345&Expires=1715800091" \
-    --model --model meta-llama/Meta-Llama-3-8B-Instruct
+    --model meta-llama/Meta-Llama-3-8B-Instruct
 ```
 
 ### Step 4: View your results
diff --git a/examples/offline_inference/openai_batch/openai_example_batch.jsonl b/examples/features/openai_batch/openai_example_batch.jsonl
similarity index 100%
rename from examples/offline_inference/openai_batch/openai_example_batch.jsonl
rename to examples/features/openai_batch/openai_example_batch.jsonl
diff --git a/examples/online_serving/data_parallel_pause_resume.py b/examples/features/pause_resume/data_parallel_pause_resume.py
similarity index 96%
rename from examples/online_serving/data_parallel_pause_resume.py
rename to examples/features/pause_resume/data_parallel_pause_resume.py
index e94de22a1271..1f11536e5366 100644
--- a/examples/online_serving/data_parallel_pause_resume.py
+++ b/examples/features/pause_resume/data_parallel_pause_resume.py
@@ -1,135 +1,135 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Test pause/resume with Data Parallel (DP) via HTTP API.
-
-This example demonstrates coordinated pause/resume across multiple DP ranks.
-The pause synchronizes across all DP engines via all-reduce.
-
-Prerequisites:
-    Start a vLLM server with data parallelism:
-
-    $ VLLM_SERVER_DEV_MODE=1 vllm serve facebook/opt-125m \
-        --enforce-eager \
-        --data-parallel-size 4 \
-        --tensor-parallel-size 1
-
-    Then run this script:
-
-    $ python data_parallel_pause_resume.py
-
-The test verifies pause works by:
-1. Starting a streaming generation request
-2. Pausing the server mid-generation
-3. Sleeping for PAUSE_DURATION seconds
-4. Resuming the server
-5. Verifying there was a gap in token generation matching the pause duration
-"""
-
-import argparse
-import threading
-import time
-
-import requests
-from openai import OpenAI
-
-BASE_URL = "http://localhost:8000"
-MODEL_NAME = "facebook/opt-125m"
-PAUSE_DURATION = 3.0
-
-
-def pause_generation(base_url: str, mode: str = "keep") -> None:
-    """Pause generation via HTTP endpoint."""
-    url = f"{base_url}/pause"
-    response = requests.post(url, params={"mode": mode}, timeout=60)
-    response.raise_for_status()
-    print("Server paused")
-
-
-def resume_generation(base_url: str) -> None:
-    """Resume generation via HTTP endpoint."""
-    url = f"{base_url}/resume"
-    response = requests.post(url, timeout=60)
-    response.raise_for_status()
-    print("Server resumed")
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--base-url", default=BASE_URL)
-    parser.add_argument("--model", default=MODEL_NAME)
-    args = parser.parse_args()
-
-    client = OpenAI(
-        base_url=f"{args.base_url}/v1",
-        api_key="EMPTY",
-    )
-
-    prompt = "Write a long story about a dragon. Once upon a time"
-    token_times: list[float] = []
-    pause_token_idx = 0
-    pause_triggered = threading.Event()
-
-    def generator_thread():
-        """Stream tokens and record timestamps."""
-        stream = client.completions.create(
-            model=args.model,
-            prompt=prompt,
-            max_tokens=50,
-            stream=True,
-        )
-        for chunk in stream:
-            if chunk.choices[0].text:
-                token_times.append(time.monotonic())
-                token_count = len(token_times)
-                print(f"Token {token_count}: {chunk.choices[0].text!r}")
-
-                # Signal controller after some tokens
-                if token_count >= 5 and not pause_triggered.is_set():
-                    pause_triggered.set()
-
-    def controller_thread():
-        """Pause and resume the server."""
-        nonlocal pause_token_idx
-
-        # Wait for some tokens
-        pause_triggered.wait()
-
-        print(f"\nPausing server (keep mode) at token {len(token_times)}...")
-        pause_generation(args.base_url, mode="keep")
-        pause_token_idx = len(token_times)
-        print(f"Sleeping for {PAUSE_DURATION}s...")
-
-        time.sleep(PAUSE_DURATION)
-
-        print("Resuming server...")
-        resume_generation(args.base_url)
-        print("Resumed!\n")
-
-    # Run both threads
-    gen_thread = threading.Thread(target=generator_thread)
-    ctrl_thread = threading.Thread(target=controller_thread)
-
-    gen_thread.start()
-    ctrl_thread.start()
-
-    gen_thread.join()
-    ctrl_thread.join()
-
-    # Check gap at the pause point
-    if pause_token_idx < len(token_times):
-        pause_gap = token_times[pause_token_idx] - token_times[pause_token_idx - 1]
-        print(
-            f"\nGap after pause (token {pause_token_idx} -> "
-            f"{pause_token_idx + 1}): {pause_gap:.3f}s"
-        )
-        if pause_gap >= PAUSE_DURATION * 0.9:
-            print("Test passed! Pause synchronized across DP ranks.")
-        else:
-            print(f"Test failed! Expected ~{PAUSE_DURATION}s gap, got {pause_gap:.3f}s")
-    else:
-        print("Test failed! No tokens were generated after resuming.")
-
-
-if __name__ == "__main__":
-    main()
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Test pause/resume with Data Parallel (DP) via HTTP API.
+
+This example demonstrates coordinated pause/resume across multiple DP ranks.
+The pause synchronizes across all DP engines via all-reduce.
+
+Prerequisites:
+    Start a vLLM server with data parallelism:
+
+    $ VLLM_SERVER_DEV_MODE=1 vllm serve facebook/opt-125m \
+        --enforce-eager \
+        --data-parallel-size 4 \
+        --tensor-parallel-size 1
+
+    Then run this script:
+
+    $ python data_parallel_pause_resume.py
+
+The test verifies pause works by:
+1. Starting a streaming generation request
+2. Pausing the server mid-generation
+3. Sleeping for PAUSE_DURATION seconds
+4. Resuming the server
+5. Verifying there was a gap in token generation matching the pause duration
+"""
+
+import argparse
+import threading
+import time
+
+import requests
+from openai import OpenAI
+
+BASE_URL = "http://localhost:8000"
+MODEL_NAME = "facebook/opt-125m"
+PAUSE_DURATION = 3.0
+
+
+def pause_generation(base_url: str, mode: str = "keep") -> None:
+    """Pause generation via HTTP endpoint."""
+    url = f"{base_url}/pause"
+    response = requests.post(url, params={"mode": mode}, timeout=60)
+    response.raise_for_status()
+    print("Server paused")
+
+
+def resume_generation(base_url: str) -> None:
+    """Resume generation via HTTP endpoint."""
+    url = f"{base_url}/resume"
+    response = requests.post(url, timeout=60)
+    response.raise_for_status()
+    print("Server resumed")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--base-url", default=BASE_URL)
+    parser.add_argument("--model", default=MODEL_NAME)
+    args = parser.parse_args()
+
+    client = OpenAI(
+        base_url=f"{args.base_url}/v1",
+        api_key="EMPTY",
+    )
+
+    prompt = "Write a long story about a dragon. Once upon a time"
+    token_times: list[float] = []
+    pause_token_idx = 0
+    pause_triggered = threading.Event()
+
+    def generator_thread():
+        """Stream tokens and record timestamps."""
+        stream = client.completions.create(
+            model=args.model,
+            prompt=prompt,
+            max_tokens=50,
+            stream=True,
+        )
+        for chunk in stream:
+            if chunk.choices[0].text:
+                token_times.append(time.monotonic())
+                token_count = len(token_times)
+                print(f"Token {token_count}: {chunk.choices[0].text!r}")
+
+                # Signal controller after some tokens
+                if token_count >= 5 and not pause_triggered.is_set():
+                    pause_triggered.set()
+
+    def controller_thread():
+        """Pause and resume the server."""
+        nonlocal pause_token_idx
+
+        # Wait for some tokens
+        pause_triggered.wait()
+
+        print(f"\nPausing server (keep mode) at token {len(token_times)}...")
+        pause_generation(args.base_url, mode="keep")
+        pause_token_idx = len(token_times)
+        print(f"Sleeping for {PAUSE_DURATION}s...")
+
+        time.sleep(PAUSE_DURATION)
+
+        print("Resuming server...")
+        resume_generation(args.base_url)
+        print("Resumed!\n")
+
+    # Run both threads
+    gen_thread = threading.Thread(target=generator_thread)
+    ctrl_thread = threading.Thread(target=controller_thread)
+
+    gen_thread.start()
+    ctrl_thread.start()
+
+    gen_thread.join()
+    ctrl_thread.join()
+
+    # Check gap at the pause point
+    if pause_token_idx < len(token_times):
+        pause_gap = token_times[pause_token_idx] - token_times[pause_token_idx - 1]
+        print(
+            f"\nGap after pause (token {pause_token_idx} -> "
+            f"{pause_token_idx + 1}): {pause_gap:.3f}s"
+        )
+        if pause_gap >= PAUSE_DURATION * 0.9:
+            print("Test passed! Pause synchronized across DP ranks.")
+        else:
+            print(f"Test failed! Expected ~{PAUSE_DURATION}s gap, got {pause_gap:.3f}s")
+    else:
+        print("Test failed! No tokens were generated after resuming.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/pause_resume.py b/examples/features/pause_resume/pause_resume_offline.py
similarity index 100%
rename from examples/offline_inference/pause_resume.py
rename to examples/features/pause_resume/pause_resume_offline.py
diff --git a/examples/offline_inference/run_one_batch.py b/examples/features/profiling/run_one_batch_offline.py
similarity index 100%
rename from examples/offline_inference/run_one_batch.py
rename to examples/features/profiling/run_one_batch_offline.py
diff --git a/examples/offline_inference/simple_profiling.py b/examples/features/profiling/simple_profiling_offline.py
similarity index 100%
rename from examples/offline_inference/simple_profiling.py
rename to examples/features/profiling/simple_profiling_offline.py
diff --git a/examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py b/examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py
new file mode 100644
index 000000000000..f3204645d0a0
--- /dev/null
+++ b/examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""vLLM OpenAI-Compatible Client with Prompt Embeddings.
+
+This script demonstrates how to:
+1. Generate prompt embeddings using Hugging Face Transformers.
+2. Encode them in base64 format.
+3. Send them to a vLLM server for inference via both:
+    - OpenAI-compatible Chat Completions API
+    - OpenAI-compatible Completions API
+
+Important distinction between the two APIs:
+
+- Chat Completions API: `prompt_embeds` content parts should encode ONLY
+  the user-provided content, not a templated conversation. The server
+  renders the surrounding chat template around the embedded content at
+  request time, the same way it would for a plain text `content` string.
+  Embedding a full templated conversation here would double-apply the
+  template and likely produce undesirable results.
+
+- Completions API: the server does NOT apply a chat template to
+  `prompt_embeds`. The caller is responsible for producing embeddings for
+  the full, already-templated prompt (i.e. apply the chat template first, 
+  then embed the resulting token IDs). Anything the model would normally
+  need (system prompt, role markers, generation prompt, etc.) must already
+  be baked into the embedded tokens.
+
+Run the vLLM server first:
+vllm serve meta-llama/Llama-3.2-1B-Instruct \
+  --runner generate \
+  --max-model-len 4096 \
+  --enable-prompt-embeds
+
+Run the client:
+python examples/features/prompt_embed/prompt_embed_inference_with_openai_client.py
+
+Model: meta-llama/Llama-3.2-1B-Instruct
+Note: This model is gated on Hugging Face Hub.
+      You must request access to use it:
+      https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
+
+Dependencies:
+- transformers
+- torch
+- openai
+"""
+
+import transformers
+from openai import OpenAI
+
+from vllm.utils.serial_utils import tensor2base64
+
+
+def run_chat_completion_prompt_embeds(
+    client: OpenAI,
+    model_name: str,
+    tokenizer: transformers.PreTrainedTokenizerBase,
+    embedding_layer,
+    messages: list[dict],
+) -> None:
+    """Run a Chat Completions API request using prompt_embeds content parts.
+
+    This example embeds ONLY the user-provided content of the final user turn, the
+    vLLM server applies the chat template around it at request time.
+    """
+    user_content = messages[-1]["content"]
+    content_token_ids = tokenizer(
+        user_content, return_tensors="pt", add_special_tokens=False
+    ).input_ids
+    content_prompt_embeds = embedding_layer(content_token_ids).squeeze(0)
+    encoded_embeds = tensor2base64(content_prompt_embeds)
+
+    api_messages = [
+        *messages[:-1],
+        {
+            "role": messages[-1]["role"],
+            "content": [{"type": "prompt_embeds", "data": encoded_embeds}],
+        },
+    ]
+
+    chat_completion = client.chat.completions.create(
+        model=model_name,
+        max_tokens=6,
+        temperature=0.0,
+        messages=api_messages,
+    )
+
+    print("-" * 30)
+    print("Chat Completions API")
+    print(chat_completion.choices[0].message.content)
+    print("-" * 30)
+
+
+def run_completion_prompt_embeds(
+    client: OpenAI,
+    model_name: str,
+    tokenizer: transformers.PreTrainedTokenizerBase,
+    embedding_layer,
+    messages: list[dict],
+) -> None:
+    """Run a Completions API request using prompt embeddings.
+
+    The Completions endpoint does not apply a chat template,
+    so the caller must apply it and embed the full templated prompt.
+    """
+    templated_token_ids = tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
+    ).input_ids
+    templated_prompt_embeds = embedding_layer(templated_token_ids).squeeze(0)
+    encoded_embeds = tensor2base64(templated_prompt_embeds)
+
+    completion = client.completions.create(
+        model=model_name,
+        prompt=None,
+        max_tokens=6,
+        temperature=0.0,
+        # NOTE: The OpenAI client allows passing in extra JSON body via the
+        # `extra_body` argument.
+        extra_body={"prompt_embeds": encoded_embeds},
+    )
+
+    print("-" * 30)
+    print("Completions API")
+    print(completion.choices[0].text)
+    print("-" * 30)
+
+
+def main() -> None:
+    client = OpenAI(
+        api_key="EMPTY",
+        base_url="http://localhost:8000/v1",
+    )
+
+    model_name = "meta-llama/Llama-3.2-1B-Instruct"
+
+    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+    transformers_model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
+    embedding_layer = transformers_model.get_input_embeddings()
+
+    messages = [
+        {"role": "user", "content": "Please tell me about the capital of France."}
+    ]
+
+    # Chat Completions API: embed ONLY the user content. The server wraps
+    # the embedding in the chat template when it renders the messages.
+    run_chat_completion_prompt_embeds(
+        client, model_name, tokenizer, embedding_layer, messages
+    )
+
+    # Completions API: embed the FULL templated prompt. The caller must
+    # apply the chat template up-front.
+    run_completion_prompt_embeds(
+        client, model_name, tokenizer, embedding_layer, messages
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/prompt_embed_inference.py b/examples/features/prompt_embed/prompt_embed_offline.py
similarity index 97%
rename from examples/offline_inference/prompt_embed_inference.py
rename to examples/features/prompt_embed/prompt_embed_offline.py
index a0eaeb6810a2..29853bce9673 100644
--- a/examples/offline_inference/prompt_embed_inference.py
+++ b/examples/features/prompt_embed/prompt_embed_offline.py
@@ -15,7 +15,7 @@
 - transformers
 
 Run:
-    python examples/offline_inference/prompt_embed_inference.py
+    python examples/features/prompt_embed/prompt_embed_offline.py
 """
 
 import torch
diff --git a/examples/offline_inference/llm_engine_reset_kv.py b/examples/features/reset_kv/reset_kv_offline.py
similarity index 100%
rename from examples/offline_inference/llm_engine_reset_kv.py
rename to examples/features/reset_kv/reset_kv_offline.py
diff --git a/examples/offline_inference/load_sharded_state.py b/examples/features/sharded_state/load_sharded_state_offline.py
similarity index 94%
rename from examples/offline_inference/load_sharded_state.py
rename to examples/features/sharded_state/load_sharded_state_offline.py
index 0085e8e8e32b..e867db5d12fe 100644
--- a/examples/offline_inference/load_sharded_state.py
+++ b/examples/features/sharded_state/load_sharded_state_offline.py
@@ -3,16 +3,16 @@
 """
 Validates the loading of a model saved with the sharded_state format.
 This script demonstrates how to load a model that was previously saved
-using save_sharded_state.py and validates it by running inference.
+using save_sharded_state_offline.py and validates it by running inference.
 Example usage:
 (First need to save a sharded_state mode)
 
-python save_sharded_state.py \
+python save_sharded_state_offline.py \
     --model /path/to/load \
     --tensor-parallel-size 8 \
     --output /path/to/save/sharded/model
 
-python load_sharded_state.py \
+python load_sharded_state_offline.py \
     --model /path/to/saved/sharded/model \
     --load-format sharded_state \
     --tensor-parallel-size 8 \
diff --git a/examples/offline_inference/save_sharded_state.py b/examples/features/sharded_state/save_sharded_state_offline.py
similarity index 98%
rename from examples/offline_inference/save_sharded_state.py
rename to examples/features/sharded_state/save_sharded_state_offline.py
index 14d472ee3f23..675f2e35a53f 100644
--- a/examples/offline_inference/save_sharded_state.py
+++ b/examples/features/sharded_state/save_sharded_state_offline.py
@@ -7,7 +7,7 @@
 
 Example usage:
 
-python save_sharded_state.py \
+python save_sharded_state_offline.py \
     --model /path/to/load \
     --tensor-parallel-size 8 \
     --output /path/to/save
diff --git a/examples/offline_inference/extract_hidden_states.py b/examples/features/speculative_decoding/extract_hidden_states_offline.py
similarity index 55%
rename from examples/offline_inference/extract_hidden_states.py
rename to examples/features/speculative_decoding/extract_hidden_states_offline.py
index 61299101cb47..f8909566f402 100644
--- a/examples/offline_inference/extract_hidden_states.py
+++ b/examples/features/speculative_decoding/extract_hidden_states_offline.py
@@ -2,9 +2,15 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import tempfile
 
-from safetensors import safe_open
-
 from vllm import LLM, SamplingParams
+from vllm.config.kv_transfer import KVTransferConfig
+from vllm.distributed.kv_transfer.kv_connector.v1 import (
+    example_hidden_states_connector,
+)
+
+# NOTE: If changing the interface of the ExampleHiddenStatesConnector, please also
+# update the benchmark in benchmarks/benchmark_hidden_state_extraction.py
+# and the docs in docs/features/speculative_decoding/extract_hidden_states.md
 
 # Example: Using the custom "extract_hidden_states" speculator method and
 # ExampleHiddenStatesConnector to extract and save hidden states from vllm
@@ -12,6 +18,7 @@
 with tempfile.TemporaryDirectory() as tmpdirname:
     llm = LLM(
         model="Qwen/Qwen3-8B",  # Your target model
+        enable_chunked_prefill=False,  # required
         speculative_config={
             "method": "extract_hidden_states",
             "num_speculative_tokens": 1,
@@ -23,16 +30,16 @@
                         3,
                         4,
                     ],
-                }
+                },
             },
         },
-        kv_transfer_config={
-            "kv_connector": "ExampleHiddenStatesConnector",
-            "kv_role": "kv_producer",
-            "kv_connector_extra_config": {
+        kv_transfer_config=KVTransferConfig(
+            kv_connector="ExampleHiddenStatesConnector",
+            kv_role="kv_producer",
+            kv_connector_extra_config={
                 "shared_storage_path": tmpdirname,
             },
-        },
+        ),
     )
 
     prompts = ["Generate a sentence with hidden states", "Write a python function"]
@@ -47,12 +54,14 @@
         assert hidden_states_path is not None
         print("Prompt hidden states path:", hidden_states_path)
 
-        with safe_open(hidden_states_path, "pt") as f:
-            token_ids = f.get_tensor("token_ids")
-            hidden_states = f.get_tensor("hidden_states")
+        obj = example_hidden_states_connector.load_hidden_states(hidden_states_path)
+        token_ids = obj["token_ids"]
+        hidden_states = obj["hidden_states"]
+
+        print("Extracted token ids:", token_ids)  # Matches prompt token ids
+        print(
+            "Extracted hidden states shape:", hidden_states.shape
+        )  # [prompt_len, num_extracted_layers, hidden_size]
+        print("Extracted hidden states:", hidden_states)
 
-            print("Extracted token ids:", token_ids)  # Matches prompt token ids
-            print(
-                "Extracted hidden states shape:", hidden_states.shape
-            )  # [num_hidden_layers, prompt len, hidden size]
-            print("Extracted hidden states:", hidden_states)
+        example_hidden_states_connector.cleanup_hidden_states(hidden_states_path)
diff --git a/examples/offline_inference/mlpspeculator.py b/examples/features/speculative_decoding/mlpspeculator_offline.py
similarity index 100%
rename from examples/offline_inference/mlpspeculator.py
rename to examples/features/speculative_decoding/mlpspeculator_offline.py
diff --git a/examples/offline_inference/spec_decode.py b/examples/features/speculative_decoding/spec_decode_offline.py
similarity index 100%
rename from examples/offline_inference/spec_decode.py
rename to examples/features/speculative_decoding/spec_decode_offline.py
diff --git a/examples/online_serving/structured_outputs/README.md b/examples/features/structured_outputs/README.md
similarity index 85%
rename from examples/online_serving/structured_outputs/README.md
rename to examples/features/structured_outputs/README.md
index 7f539716ecf8..f2863eb0cbcf 100644
--- a/examples/online_serving/structured_outputs/README.md
+++ b/examples/features/structured_outputs/README.md
@@ -20,7 +20,7 @@ vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
 If you want to run this script standalone with `uv`, you can use the following:
 
 ```bash
-uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/online_serving/structured_outputs \
+uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/features/structured_outputs \
     structured-outputs
 ```
 
@@ -34,19 +34,19 @@ See [feature docs](https://docs.vllm.ai/en/latest/features/structured_outputs.ht
 Run all constraints, non-streaming:
 
 ```bash
-uv run structured_outputs.py
+uv run structured_outputs_offline.py
 ```
 
 Run all constraints, streaming:
 
 ```bash
-uv run structured_outputs.py --stream
+uv run structured_outputs_offline.py --stream
 ```
 
 Run certain constraints, for example `structural_tag` and `regex`, streaming:
 
 ```bash
-uv run structured_outputs.py \
+uv run structured_outputs_offline.py \
     --constraint structural_tag regex \
     --stream
 ```
@@ -54,5 +54,5 @@ uv run structured_outputs.py \
 Run all constraints, with reasoning models and streaming:
 
 ```bash
-uv run structured_outputs.py --reasoning --stream
+uv run structured_outputs_offline.py --reasoning --stream
 ```
diff --git a/examples/online_serving/structured_outputs/pyproject.toml b/examples/features/structured_outputs/pyproject.toml
similarity index 100%
rename from examples/online_serving/structured_outputs/pyproject.toml
rename to examples/features/structured_outputs/pyproject.toml
diff --git a/examples/online_serving/structured_outputs/structured_outputs.py b/examples/features/structured_outputs/structured_outputs_client.py
similarity index 100%
rename from examples/online_serving/structured_outputs/structured_outputs.py
rename to examples/features/structured_outputs/structured_outputs_client.py
diff --git a/examples/offline_inference/structured_outputs.py b/examples/features/structured_outputs/structured_outputs_offline.py
similarity index 100%
rename from examples/offline_inference/structured_outputs.py
rename to examples/features/structured_outputs/structured_outputs_offline.py
diff --git a/examples/others/tensorize_vllm_model.py b/examples/features/tensorize_vllm_model.py
similarity index 97%
rename from examples/others/tensorize_vllm_model.py
rename to examples/features/tensorize_vllm_model.py
index 3644a03b32ed..a89b1781264d 100644
--- a/examples/others/tensorize_vllm_model.py
+++ b/examples/features/tensorize_vllm_model.py
@@ -33,7 +33,7 @@
 To serialize a model, install vLLM from source, then run something 
 like this from the root level of this repository:
 
-python examples/others/tensorize_vllm_model.py \
+python examples/features/tensorize_vllm_model.py \
    --model facebook/opt-125m \
    serialize \
    --serialized-directory s3://my-bucket \
@@ -53,7 +53,7 @@
 To deserialize a model, you can run something like this from the root 
 level of this repository:
 
-python examples/others/tensorize_vllm_model.py \
+python examples/features/tensorize_vllm_model.py \
    --model EleutherAI/gpt-j-6B \
    --dtype float16 \
    deserialize \
@@ -71,11 +71,11 @@
 model-rank-%03d.tensors
 
 For more information on the available arguments for serializing, run 
-`python -m examples.others.tensorize_vllm_model serialize --help`.
+`python -m examples.features.tensorize_vllm_model serialize --help`.
 
 Or for deserializing:
 
-`python examples/others/tensorize_vllm_model.py deserialize --help`.
+`python examples/features/tensorize_vllm_model.py deserialize --help`.
 
 Once a model is serialized, tensorizer can be invoked with the `LLM` class 
 directly to load models:
@@ -100,7 +100,7 @@
 In order to see all of the available arguments usable to configure 
 loading with tensorizer that are given to `TensorizerConfig`, run:
 
-`python examples/others/tensorize_vllm_model.py deserialize --help`
+`python examples/features/tensorize_vllm_model.py deserialize --help`
 
 under the `tensorizer options` section. These can also be used for
 deserialization in this example script, although `--tensorizer-uri` and
diff --git a/examples/offline_inference/torchrun_dp_example.py b/examples/features/torchrun/torchrun_dp_example_offline.py
similarity index 95%
rename from examples/offline_inference/torchrun_dp_example.py
rename to examples/features/torchrun/torchrun_dp_example_offline.py
index eb7ed969ea4b..f18f6042e9c6 100644
--- a/examples/offline_inference/torchrun_dp_example.py
+++ b/examples/features/torchrun/torchrun_dp_example_offline.py
@@ -7,15 +7,15 @@
 
 To run this example:
 ```bash
-$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py
+$ torchrun --nproc-per-node=2 examples/features/torchrun/torchrun_dp_example_offline.py
 ```
 
 With custom parallelism settings:
 ```bash
-$ torchrun --nproc-per-node=8 examples/offline_inference/torchrun_dp_example.py \
+$ torchrun --nproc-per-node=8 examples/features/torchrun/torchrun_dp_example_offline.py \
     --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
 ```
-"""
+"""  # noqa: E501
 
 import argparse
 
diff --git a/examples/offline_inference/torchrun_example.py b/examples/features/torchrun/torchrun_example_offline.py
similarity index 89%
rename from examples/offline_inference/torchrun_example.py
rename to examples/features/torchrun/torchrun_example_offline.py
index 3d3d7946cdb4..e41bcd420c20 100644
--- a/examples/offline_inference/torchrun_example.py
+++ b/examples/features/torchrun/torchrun_example_offline.py
@@ -4,9 +4,10 @@
 experimental support for tensor-parallel inference with torchrun,
 see https://github.com/vllm-project/vllm/issues/11400 for
 the motivation and use case for this example.
-run the script with `torchrun --nproc-per-node=2 torchrun_example.py`,
-the argument 2 should match the `tensor_parallel_size` below.
-see `tests/distributed/test_torchrun_example.py` for the unit test.
+run the script with `torchrun --nproc-per-node=4 torchrun_example_offline.py`,
+the argument `4` should match the product of `tensor_parallel_size` and
+`pipeline_parallel_size` below. see `tests/distributed/test_torchrun_example.py`
+for the unit test.
 """
 
 import torch.distributed as dist
diff --git a/examples/online_serving/batched_chat_completions.py b/examples/generate/batched_chat_completions_online.py
similarity index 100%
rename from examples/online_serving/batched_chat_completions.py
rename to examples/generate/batched_chat_completions_online.py
diff --git a/examples/offline_inference/audio_language.py b/examples/generate/multimodal/audio_language_offline.py
old mode 100755
new mode 100644
similarity index 97%
rename from examples/offline_inference/audio_language.py
rename to examples/generate/multimodal/audio_language_offline.py
index 690aada03aba..c480f1b4145f
--- a/examples/offline_inference/audio_language.py
+++ b/examples/generate/multimodal/audio_language_offline.py
@@ -537,9 +537,30 @@ def run_whisper(question: str, audio_count: int) -> ModelRequestData:
     )
 
 
+# FireRedLID
+def run_fireredlid(question: str, audio_count: int) -> ModelRequestData:
+    assert audio_count == 1, "FireRedLID only supports single audio input per prompt"
+    model_name = "PatchyTisa/FireRedLID-vllm"
+
+    prompt = "<sos>"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=8,
+        max_num_seqs=5,
+        limit_mm_per_prompt={"audio": audio_count},
+    )
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+    )
+
+
 model_example_map = {
     "audioflamingo3": run_audioflamingo3,
     "cohere_asr": run_cohere_asr,
+    "fireredlid": run_fireredlid,
     "funaudiochat": run_funaudiochat,
     "gemma3n": run_gemma3n,
     "glmasr": run_glmasr,
diff --git a/examples/offline_inference/encoder_decoder_multimodal.py b/examples/generate/multimodal/encoder_decoder_multimodal_offline.py
similarity index 60%
rename from examples/offline_inference/encoder_decoder_multimodal.py
rename to examples/generate/multimodal/encoder_decoder_multimodal_offline.py
index 2f72b7d06705..4fc74e9555f8 100644
--- a/examples/offline_inference/encoder_decoder_multimodal.py
+++ b/examples/generate/multimodal/encoder_decoder_multimodal_offline.py
@@ -55,7 +55,91 @@ def run_whisper():
     )
 
 
+def run_fireredasr2():
+    """
+    FireRedASR2 – Automatic Speech Recognition model.
+
+    This model uses a Conformer encoder + Qwen2 LLM decoder architecture
+    for speech-to-text transcription.  Audio is passed via the implicit
+    prompt format with the ``<|AUDIO|>`` placeholder token.
+    """
+    engine_args = EngineArgs(
+        model="allendou/FireRedASR2-LLM-vllm",
+        max_model_len=448,
+        max_num_seqs=16,
+        limit_mm_per_prompt={"audio": 1},
+    )
+
+    prompt_str = (
+        "<|im_start|>user\n<|AUDIO|>请转写音频为文字<|im_end|>\n<|im_start|>assistant\n"
+    )
+
+    prompts = [
+        {  # Implicit prompt with audio
+            "prompt": prompt_str,
+            "multi_modal_data": {
+                "audio": AudioAsset("mary_had_lamb").audio_and_sample_rate,
+            },
+        },
+        {  # Another audio sample
+            "prompt": prompt_str,
+            "multi_modal_data": {
+                "audio": AudioAsset("winning_call").audio_and_sample_rate,
+            },
+        },
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
+def run_fireredlid():
+    """
+    FireRedLID – Language Identification model.
+
+    This encoder-decoder model identifies the spoken language of an audio
+    clip. It outputs at most 2 tokens representing the detected language
+    (e.g. "en", "zh mandarin").
+    """
+    engine_args = EngineArgs(
+        model="PatchyTisa/FireRedLID-vllm",
+        max_model_len=8,
+        max_num_seqs=16,
+        limit_mm_per_prompt={"audio": 1},
+    )
+
+    prompts = [
+        {  # Test explicit encoder/decoder prompt
+            "encoder_prompt": {
+                "prompt": "",
+                "multi_modal_data": {
+                    "audio": AudioAsset("mary_had_lamb").audio_and_sample_rate,
+                },
+            },
+            "decoder_prompt": "<sos>",
+        },
+        {  # Another audio sample
+            "encoder_prompt": {
+                "prompt": "",
+                "multi_modal_data": {
+                    "audio": AudioAsset("winning_call").audio_and_sample_rate,
+                },
+            },
+            "decoder_prompt": "<sos>",
+        },
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 model_example_map = {
+    "fireredasr2": run_fireredasr2,
+    "fireredlid": run_fireredlid,
     "whisper": run_whisper,
 }
 
diff --git a/examples/offline_inference/mistral-small.py b/examples/generate/multimodal/mistral-small_offline.py
similarity index 100%
rename from examples/offline_inference/mistral-small.py
rename to examples/generate/multimodal/mistral-small_offline.py
diff --git a/examples/online_serving/openai_chat_completion_client_for_multimodal.py b/examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py
similarity index 97%
rename from examples/online_serving/openai_chat_completion_client_for_multimodal.py
rename to examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py
index c4407923ed2d..3a007731c74d 100644
--- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py
+++ b/examples/generate/multimodal/openai_chat_completion_client_for_multimodal.py
@@ -25,7 +25,6 @@
 import pybase64 as base64
 import requests
 from openai import OpenAI
-from utils import get_first_model
 
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
@@ -267,7 +266,7 @@ def run_audio(model: str, max_completion_tokens: int) -> None:
                     {
                         "type": "input_audio",
                         "input_audio": {
-                            # Any format supported by librosa is supported
+                            # Any format supported by soundfile/PyAV is supported
                             "data": audio_base64,
                             "format": "wav",
                         },
@@ -292,7 +291,7 @@ def run_audio(model: str, max_completion_tokens: int) -> None:
                     {
                         "type": "audio_url",
                         "audio_url": {
-                            # Any format supported by librosa is supported
+                            # Any format supported by soundfile/PyAV is supported
                             "url": audio_url
                         },
                     },
@@ -316,7 +315,7 @@ def run_audio(model: str, max_completion_tokens: int) -> None:
                     {
                         "type": "audio_url",
                         "audio_url": {
-                            # Any format supported by librosa is supported
+                            # Any format supported by soundfile/PyAV is supported
                             "url": f"data:audio/ogg;base64,{audio_base64}"
                         },
                     },
@@ -407,7 +406,7 @@ def parse_args():
 
 def main(args) -> None:
     chat_type = args.chat_type
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
     example_function_map[chat_type](model, args.max_completion_tokens)
 
 
diff --git a/examples/offline_inference/qwen2_5_omni/README.md b/examples/generate/multimodal/qwen2_5_omni/README.md
similarity index 63%
rename from examples/offline_inference/qwen2_5_omni/README.md
rename to examples/generate/multimodal/qwen2_5_omni/README.md
index 409ac0223b55..bd96b080f67d 100644
--- a/examples/offline_inference/qwen2_5_omni/README.md
+++ b/examples/generate/multimodal/qwen2_5_omni/README.md
@@ -6,15 +6,15 @@ This folder provides several example scripts on how to inference Qwen2.5-Omni of
 
 ```bash
 # Audio + image + video
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q mixed_modalities
 
 # Read vision and audio inputs from a single video file
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q use_audio_in_video
 
 # Multiple audios
-python examples/offline_inference/qwen2_5_omni/only_thinker.py \
+python examples/generate/multimodal/qwen2_5_omni/only_thinker.py \
     -q multi_audios
 ```
 
@@ -24,16 +24,16 @@ You can also test Qwen2.5-Omni on a single modality:
 
 ```bash
 # Process audio inputs
-python examples/offline_inference/audio_language.py \
+python examples/generate/multimodal/audio_language_offline.py \
     --model-type qwen2_5_omni
 
 # Process image inputs
-python examples/offline_inference/vision_language.py \
+python examples/generate/multimodal/vision_language_offline.py \
     --modality image \
     --model-type qwen2_5_omni
 
 # Process video inputs
-python examples/offline_inference/vision_language.py \
+python examples/generate/multimodal/vision_language_offline.py \
     --modality video \
     --model-type qwen2_5_omni
 ```
diff --git a/examples/offline_inference/qwen2_5_omni/only_thinker.py b/examples/generate/multimodal/qwen2_5_omni/only_thinker.py
similarity index 100%
rename from examples/offline_inference/qwen2_5_omni/only_thinker.py
rename to examples/generate/multimodal/qwen2_5_omni/only_thinker.py
diff --git a/examples/offline_inference/qwen3_omni/only_thinker.py b/examples/generate/multimodal/qwen3_omni/only_thinker.py
similarity index 100%
rename from examples/offline_inference/qwen3_omni/only_thinker.py
rename to examples/generate/multimodal/qwen3_omni/only_thinker.py
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/generate/multimodal/vision_language_multi_image_offline.py
old mode 100755
new mode 100644
similarity index 95%
rename from examples/offline_inference/vision_language_multi_image.py
rename to examples/generate/multimodal/vision_language_multi_image_offline.py
index 38a34a68ee5d..1b68a23b3bd0
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/generate/multimodal/vision_language_multi_image_offline.py
@@ -241,6 +241,41 @@ def load_deepseek_ocr(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
+# exaone4_5
+def load_exaone4_5(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "LGAI-EXAONE/EXAONE-4.5-33B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=8192,
+        max_num_seqs=2,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+
+    placeholders = [{"type": "image", "image": url} for url in image_urls]
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                *placeholders,
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+
+    processor = AutoProcessor.from_pretrained(model_name)
+
+    prompt = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 def load_gemma3(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "google/gemma-3-4b-it"
 
@@ -275,6 +310,38 @@ def load_gemma3(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
+def load_granite4_vision(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "ibm-granite/granite-vision-4.1-4b"
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=16,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+
+    placeholders = [{"type": "image", "image": url} for url in image_urls]
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                *placeholders,
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+
+    processor = AutoProcessor.from_pretrained(model_name)
+    prompt = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 def load_h2ovl(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "h2oai/h2ovl-mississippi-800m"
 
@@ -957,6 +1024,24 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData:
     )
 
 
+def load_phi4siglip(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "microsoft/Phi-4-reasoning-vision-15B"
+    placeholders = "\n".join("<image>" for _ in image_urls)
+    prompt = f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n"
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=8192,
+        max_num_seqs=2,
+        limit_mm_per_prompt={"image": len(image_urls)},
+    )
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 def load_qwen_vl_chat(question: str, image_urls: list[str]) -> ModelRequestData:
     model_name = "Qwen/Qwen-VL-Chat"
     engine_args = EngineArgs(
@@ -1432,7 +1517,9 @@ def load_molmo2(question: str, image_urls: list[str]) -> ModelRequestData:
     "command_a_vision": load_command_a_vision,
     "deepseek_vl_v2": load_deepseek_vl2,
     "deepseek_ocr": load_deepseek_ocr,
+    "exaone4_5": load_exaone4_5,
     "gemma3": load_gemma3,
+    "granite4_vision": load_granite4_vision,
     "h2ovl_chat": load_h2ovl,
     "hunyuan_vl": load_hunyuan_vl,
     "hyperclovax_seed_vision": load_hyperclovax_seed_vision,
@@ -1455,6 +1542,7 @@ def load_molmo2(question: str, image_urls: list[str]) -> ModelRequestData:
     "paddleocr_vl": load_paddleocr_vl,
     "phi3_v": load_phi3v,
     "phi4_mm": load_phi4mm,
+    "phi4_siglip": load_phi4siglip,
     "pixtral_hf": load_pixtral_hf,
     "qwen_vl_chat": load_qwen_vl_chat,
     "qwen2_vl": load_qwen2_vl,
diff --git a/examples/offline_inference/vision_language.py b/examples/generate/multimodal/vision_language_offline.py
old mode 100755
new mode 100644
similarity index 78%
rename from examples/offline_inference/vision_language.py
rename to examples/generate/multimodal/vision_language_offline.py
index 56154c122125..b4e34bd64382
--- a/examples/offline_inference/vision_language.py
+++ b/examples/generate/multimodal/vision_language_offline.py
@@ -179,6 +179,33 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
+# Cheers
+def run_cheers(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+    model_name = "ai9stars/Cheers"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=4096,
+        limit_mm_per_prompt={modality: 1},
+    )
+
+    prompts = [
+        (
+            f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+            f"<|im_start|>user\n<|image_pad|>{question}<|im_end|>\n"
+            f"<|im_start|>assistant\n"
+        )
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
 
@@ -367,18 +394,24 @@ def run_eagle2_5(questions: list[str], modality: str) -> ModelRequestData:
 def run_ernie45_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "baidu/ERNIE-4.5-VL-28B-A3B-PT"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
         max_num_seqs=5,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         trust_remote_code=True,
     )
 
+    image_placeholder = "Picture 1:<|IMAGE_START|><|image@placeholder|><|IMAGE_END|>"
+    video_placeholder = "Video 1:<|VIDEO_START|><|video@placeholder|><|VIDEO_END|>"
+
     if modality == "image":
-        placeholder = "Picture 1:<|IMAGE_START|><|image@placeholder|><|IMAGE_END|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "Video 1:<|VIDEO_START|><|video@placeholder|><|VIDEO_END|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -394,6 +427,49 @@ def run_ernie45_vl(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
+# EXAONE-4.5
+def run_exaone4_5(questions: list[str], modality: str) -> ModelRequestData:
+    model_name = "LGAI-EXAONE/EXAONE-4.5-33B"
+
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=5,
+        mm_processor_kwargs={
+            "min_pixels": 28 * 28,
+            "max_pixels": 1280 * 28 * 28,
+            "fps": 1,
+        },
+        limit_mm_per_prompt=mm_limit,
+    )
+
+    image_placeholder = "<vision><|image_pad|></vision>"
+    video_placeholder = "<vision><|video_pad|></vision>"
+
+    if modality == "image":
+        placeholder = image_placeholder
+    elif modality == "video":
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
+
+    prompts = [
+        (
+            "<|system|>\nYou are a helpful assistant.<|endofturn|>\n"
+            f"<|user|>\n{placeholder}"
+            f"{question}<|endofturn|>\n"
+            "<|assistant|>\n"
+        )
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 # Fuyu
 def run_fuyu(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -502,6 +578,7 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
 def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "zai-org/GLM-4.1V-9B-Thinking"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -510,14 +587,19 @@ def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
             "size": {"shortest_edge": 12544, "longest_edge": 47040000},
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
     )
 
+    image_placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+    video_placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+
     if modality == "image":
-        placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -538,6 +620,7 @@ def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
 def run_glm4_5v(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "zai-org/GLM-4.5V"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -546,15 +629,20 @@ def run_glm4_5v(questions: list[str], modality: str) -> ModelRequestData:
             "size": {"shortest_edge": 12544, "longest_edge": 47040000},
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
         tensor_parallel_size=4,
     )
 
+    image_placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+    video_placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+
     if modality == "image":
-        placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -575,6 +663,7 @@ def run_glm4_5v(questions: list[str], modality: str) -> ModelRequestData:
 def run_glm4_5v_fp8(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "zai-org/GLM-4.5V-FP8"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -583,15 +672,20 @@ def run_glm4_5v_fp8(questions: list[str], modality: str) -> ModelRequestData:
             "size": {"shortest_edge": 12544, "longest_edge": 47040000},
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
         tensor_parallel_size=4,
     )
 
+    image_placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+    video_placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+
     if modality == "image":
-        placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -612,6 +706,7 @@ def run_glm4_5v_fp8(questions: list[str], modality: str) -> ModelRequestData:
 def run_glm_ocr(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "zai-org/GLM-OCR"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -620,14 +715,19 @@ def run_glm_ocr(questions: list[str], modality: str) -> ModelRequestData:
             "size": {"shortest_edge": 12544, "longest_edge": 47040000},
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
     )
 
+    image_placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+    video_placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+
     if modality == "image":
-        placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -708,11 +808,12 @@ def run_hyperclovax_seed_vision(
     model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         trust_remote_code=True,
-        max_model_len=8192 if modality == "image" else 16384,
-        limit_mm_per_prompt={modality: 1},
+        max_model_len=16384 if modality in ("video", "image+video") else 8192,
+        limit_mm_per_prompt=mm_limit,
     )
 
     messages = list()
@@ -764,6 +865,29 @@ def run_hyperclovax_seed_vision(
                     }
                 ]
             )
+        elif modality == "image+video":
+            messages.append(
+                [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image",
+                                "ocr": "",
+                                "lens_keywords": "",
+                                "lens_local_keywords": "",
+                            },
+                            {
+                                "type": "video",
+                            },
+                            {
+                                "type": "text",
+                                "text": question,
+                            },
+                        ],
+                    }
+                ]
+            )
         else:
             raise ValueError(f"Unsupported modality: {modality}")
 
@@ -812,19 +936,25 @@ def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
 def run_interns1(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "internlm/Intern-S1-mini"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         trust_remote_code=True,
         max_model_len=8192,
         max_num_seqs=2,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
     )
 
+    image_placeholder = "<IMG_CONTEXT>"
+    video_placeholder = "<video>"
+
     if modality == "image":
-        placeholder = "<IMG_CONTEXT>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<video>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + "\n" + video_placeholder
 
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     messages = [
@@ -845,20 +975,26 @@ def run_interns1(questions: list[str], modality: str) -> ModelRequestData:
 def run_interns1_pro(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "internlm/Intern-S1-Pro"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         trust_remote_code=True,
         max_model_len=8192,
         max_num_seqs=2,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         enforce_eager=True,
         tensor_parallel_size=4,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     messages = [
@@ -879,17 +1015,23 @@ def run_interns1_pro(questions: list[str], modality: str) -> ModelRequestData:
 def run_internvl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "OpenGVLab/InternVL3-2B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         trust_remote_code=True,
         max_model_len=8192,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<image>"
+    video_placeholder = "<video>"
+
     if modality == "image":
-        placeholder = "<image>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<video>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + "\n" + video_placeholder
 
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     messages = [
@@ -946,21 +1088,27 @@ def run_kanana_v(questions: list[str], modality: str) -> ModelRequestData:
 def run_keye_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Kwai-Keye/Keye-VL-8B-Preview"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=8192,
         trust_remote_code=True,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -977,21 +1125,27 @@ def run_keye_vl(questions: list[str], modality: str) -> ModelRequestData:
 def run_keye_vl1_5(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Kwai-Keye/Keye-VL-1.5-8B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=8192,
         trust_remote_code=True,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -1195,22 +1349,26 @@ def run_llava_next_video(questions: list[str], modality: str) -> ModelRequestDat
 
 # LLaVA-OneVision
 def run_llava_onevision(questions: list[str], modality: str) -> ModelRequestData:
-    if modality == "video":
-        prompts = [
-            f"<|im_start|>user <video>\n{question}<|im_end|><|im_start|>assistant\n"
-            for question in questions
-        ]
+    image_placeholder = "<image>"
+    video_placeholder = "<video>"
 
-    elif modality == "image":
-        prompts = [
-            f"<|im_start|>user <image>\n{question}<|im_end|><|im_start|>assistant\n"
-            for question in questions
-        ]
+    if modality == "image":
+        placeholder = image_placeholder
+    elif modality == "video":
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + "\n" + video_placeholder
 
+    prompts = [
+        (f"<|im_start|>user {placeholder}\n{question}<|im_end|><|im_start|>assistant\n")
+        for question in questions
+    ]
+
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model="llava-hf/llava-onevision-qwen2-7b-ov-hf",
         max_model_len=16384,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
     return ModelRequestData(
@@ -1243,8 +1401,8 @@ def run_mantis(questions: list[str], modality: str) -> ModelRequestData:
 
 # MiniCPM-V
 def run_minicpmv_base(questions: list[str], modality: str, model_name):
-    assert modality in ["image", "video"]
-    # If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language.py` # noqa
+    assert modality in ["image", "video", "image+video"]
+    # If you want to use `MiniCPM-o-2_6` with audio inputs, check `audio_language_offline.py` # noqa
 
     # 2.0
     # The official repo doesn't work yet, so we need to use a fork for now
@@ -1265,12 +1423,13 @@ def run_minicpmv_base(questions: list[str], modality: str, model_name):
     # o2.6: image, video, audio
     # model_name = "openbmb/MiniCPM-o-2_6"
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
         max_num_seqs=2,
         trust_remote_code=True,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
     # NOTE The stop_token_ids are different for various versions of MiniCPM-V
     # 2.0
@@ -1283,17 +1442,22 @@ def run_minicpmv_base(questions: list[str], modality: str, model_name):
     stop_tokens = ["<|im_end|>", "<|endoftext|>"]
     stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
 
-    modality_placeholder = {
-        "image": "(<image>./</image>)",
-        "video": "(<video>./</video>)",
-    }
+    image_placeholder = "(<image>./</image>)"
+    video_placeholder = "(<video>./</video>)"
+
+    if modality == "image":
+        placeholder = image_placeholder
+    elif modality == "video":
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + "\n" + video_placeholder
 
     prompts = [
         tokenizer.apply_chat_template(
             [
                 {
                     "role": "user",
-                    "content": f"{modality_placeholder[modality]}\n{question}",
+                    "content": f"{placeholder}\n{question}",
                 }
             ],
             tokenize=False,
@@ -1402,20 +1566,24 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
 def run_molmo2(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "allenai/Molmo2-8B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         trust_remote_code=True,
         dtype="bfloat16",
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
         max_num_batched_tokens=36864,
     )
 
+    image_placeholder = "<|image|>"
+    video_placeholder = "<|video|>"
+
     if modality == "image":
-        placeholder = "<|image|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video|>"
-    else:
-        raise ValueError(f"Unsupported modality for molmo2: {modality}")
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         f"{placeholder}<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
@@ -1499,19 +1667,25 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
 def run_openpangu_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "FreedomIntelligence/openPangu-VL-7B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
         max_num_seqs=4,
         trust_remote_code=True,
         enforce_eager=True,
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "[unused19]"
+    video_placeholder = "[unused32]"
+
     if modality == "image":
-        placeholder = "[unused19]"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "[unused32]"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
@@ -1559,18 +1733,25 @@ def run_ovis(questions: list[str], modality: str) -> ModelRequestData:
 def run_ovis2_5(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "AIDC-AI/Ovis2.5-2B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
         max_num_seqs=2,
         trust_remote_code=True,
         dtype="half",
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
+
+    image_placeholder = "<image>"
+    video_placeholder = "<video>"
+
     if modality == "image":
-        placeholder = "<image>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<video>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + "\n" + video_placeholder
 
     prompts = [
         f"<|im_start|>user\n\n{placeholder}\n{question}<|im_end|>\n<|im_start|>assistant\n"
@@ -1714,6 +1895,27 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
     )
 
 
+# Phi-4-reasoning-vision
+def run_phi4siglip(questions: list[str], modality: str) -> ModelRequestData:
+    assert modality == "image"
+    model_name = "microsoft/Phi-4-reasoning-vision-15B"
+    prompts = [
+        f"<|user|>\n<image>\n{question}<|end|>\n<|assistant|>\n"
+        for question in questions
+    ]
+    engine_args = EngineArgs(
+        model=model_name,
+        trust_remote_code=True,
+        max_model_len=8192,
+        max_num_seqs=2,
+        limit_mm_per_prompt={modality: 1},
+    )
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 # Pixtral HF-format
 def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
     assert modality == "image"
@@ -1761,6 +1963,7 @@ def run_qwen_vl(questions: list[str], modality: str) -> ModelRequestData:
 def run_qwen2_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Qwen/Qwen2-VL-7B-Instruct"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -1770,18 +1973,23 @@ def run_qwen2_vl(questions: list[str], modality: str) -> ModelRequestData:
             "min_pixels": 28 * 28,
             "max_pixels": 1280 * 28 * 28,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
             "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -1798,6 +2006,7 @@ def run_qwen2_vl(questions: list[str], modality: str) -> ModelRequestData:
 def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Qwen/Qwen2.5-VL-3B-Instruct"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -1807,18 +2016,23 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
             "max_pixels": 1280 * 28 * 28,
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
             "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -1835,6 +2049,7 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
 def run_qwen2_5_omni(questions: list[str], modality: str):
     model_name = "Qwen/Qwen2.5-Omni-7B"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -1844,13 +2059,18 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
             "max_pixels": 1280 * 28 * 28,
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_bos|><|IMAGE|><|vision_eos|>"
+    video_placeholder = "<|vision_bos|><|VIDEO|><|vision_eos|>"
+
     if modality == "image":
-        placeholder = "<|IMAGE|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|VIDEO|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     default_system = (
         "You are Qwen, a virtual human developed by the Qwen Team, Alibaba "
@@ -1861,7 +2081,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
     prompts = [
         (
             f"<|im_start|>system\n{default_system}<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_bos|>{placeholder}<|vision_eos|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -1877,6 +2097,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
 def run_qwen3_vl(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Qwen/Qwen3-VL-4B-Instruct"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -1886,18 +2107,23 @@ def run_qwen3_vl(questions: list[str], modality: str) -> ModelRequestData:
             "max_pixels": 1280 * 28 * 28,
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
             "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -1914,6 +2140,7 @@ def run_qwen3_vl(questions: list[str], modality: str) -> ModelRequestData:
 def run_qwen3_vl_moe(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "Qwen/Qwen3-VL-30B-A3B-Instruct"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -1923,18 +2150,109 @@ def run_qwen3_vl_moe(questions: list[str], modality: str) -> ModelRequestData:
             "max_pixels": 1280 * 28 * 28,
             "fps": 1,
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
             "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
+            f"{question}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
+# Qwen3.5-Dense
+def run_qwen3_5(questions: list[str], modality: str) -> ModelRequestData:
+    model_name = "Qwen/Qwen3.5-4B"
+
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=5,
+        mm_processor_kwargs={
+            "min_pixels": 28 * 28,
+            "max_pixels": 1280 * 28 * 28,
+            "fps": 1,
+        },
+        limit_mm_per_prompt=mm_limit,
+    )
+
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
+    if modality == "image":
+        placeholder = image_placeholder
+    elif modality == "video":
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
+
+    prompts = [
+        (
+            "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+            f"<|im_start|>user\n{placeholder}"
+            f"{question}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+        for question in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
+# Qwen3.5-MoE
+def run_qwen3_5_moe(questions: list[str], modality: str) -> ModelRequestData:
+    model_name = "Qwen/Qwen3.5-35B-A3B"
+
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=5,
+        mm_processor_kwargs={
+            "min_pixels": 28 * 28,
+            "max_pixels": 1280 * 28 * 28,
+            "fps": 1,
+        },
+        limit_mm_per_prompt=mm_limit,
+    )
+
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
+    if modality == "image":
+        placeholder = image_placeholder
+    elif modality == "video":
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
+
+    prompts = [
+        (
+            "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -2105,6 +2423,7 @@ def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
 def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     model_name = "omni-research/Tarsier2-Recap-7b"
 
+    mm_limit = {"image": 1, "video": 1} if modality == "image+video" else {modality: 1}
     engine_args = EngineArgs(
         model=model_name,
         max_model_len=4096,
@@ -2112,18 +2431,23 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
             "architectures": ["Tarsier2ForConditionalGeneration"],
             "model_type": "tarsier2",
         },
-        limit_mm_per_prompt={modality: 1},
+        limit_mm_per_prompt=mm_limit,
     )
 
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    video_placeholder = "<|vision_start|><|video_pad|><|vision_end|>"
+
     if modality == "image":
-        placeholder = "<|image_pad|>"
+        placeholder = image_placeholder
     elif modality == "video":
-        placeholder = "<|video_pad|>"
+        placeholder = video_placeholder
+    elif modality == "image+video":
+        placeholder = image_placeholder + video_placeholder
 
     prompts = [
         (
             "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
-            f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
+            f"<|im_start|>user\n{placeholder}"
             f"{question}<|im_end|>\n"
             "<|im_start|>assistant\n"
         )
@@ -2140,6 +2464,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "aria": run_aria,
     "aya_vision": run_aya_vision,
     "bagel": run_bagel,
+    "cheers": run_cheers,
     "bee": run_bee,
     "blip-2": run_blip2,
     "chameleon": run_chameleon,
@@ -2150,6 +2475,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "dots_ocr": run_dots_ocr,
     "eagle2_5": run_eagle2_5,
     "ernie45_vl": run_ernie45_vl,
+    "exaone4_5": run_exaone4_5,
     "fuyu": run_fuyu,
     "gemma3": run_gemma3,
     "gemma3n": run_gemma3n,
@@ -2194,6 +2520,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "paligemma2": run_paligemma2,
     "phi3_v": run_phi3v,
     "phi4_mm": run_phi4mm,
+    "phi4_siglip": run_phi4siglip,
     "pixtral_hf": run_pixtral_hf,
     "qwen_vl": run_qwen_vl,
     "qwen2_vl": run_qwen2_vl,
@@ -2201,6 +2528,8 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "qwen2_5_omni": run_qwen2_5_omni,
     "qwen3_vl": run_qwen3_vl,
     "qwen3_vl_moe": run_qwen3_vl_moe,
+    "qwen3_5": run_qwen3_5,
+    "qwen3_5_moe": run_qwen3_5_moe,
     "rvl": run_r_vl,
     "skywork_chat": run_skyworkr1v,
     "smolvlm": run_smolvlm,
@@ -2219,6 +2548,19 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
     "molmo2",
     "qwen3_vl",
     "qwen3_vl_moe",
+    "qwen3_5",
+    "qwen3_5_moe",
+]
+
+
+MODELS_SUPPORT_VIT_CUDA_GRAPH = [
+    "qwen2_5_vl",
+    "qwen3_vl",
+    "qwen3_vl_moe",
+    "qwen2_vl",
+    "qwen3_5",
+    "qwen3_5_moe",
+    "stepvl",
 ]
 
 
@@ -2269,6 +2611,24 @@ def get_multi_modal_input(args):
             "questions": vision_chunk_questions,
         }
 
+    if args.modality == "image+video":
+        image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
+        needs_metadata = args.model_type in MODELS_NEED_VIDEO_METADATA
+        video = VideoAsset(name="baby_reading", num_frames=args.num_frames).np_ndarrays
+        metadata = VideoAsset(name="baby_reading", num_frames=args.num_frames).metadata
+        img_video_questions = [
+            "What is shown in the image? What happens in the video?",
+            "Describe both the image and the video content.",
+        ]
+
+        return {
+            "data": {
+                "image": image,
+                "video": ([(video, metadata)] if needs_metadata else video),
+            },
+            "questions": img_video_questions,
+        }
+
     msg = f"Modality {args.modality} is not supported."
     raise ValueError(msg)
 
@@ -2316,6 +2676,29 @@ def apply_image_repeat(
     return inputs, inputs_with_empty_media
 
 
+def maybe_add_vit_cuda_graph_compilation_config(args, engine_args):
+    model = args.model_type
+    modality = args.modality
+    enable_vit_cuda_graph = args.enable_vit_cuda_graph
+
+    if enable_vit_cuda_graph and model in MODELS_SUPPORT_VIT_CUDA_GRAPH:
+        if modality == "image" or modality == "video":
+            vision_items_per_batch = 1
+        elif modality == "image+video":
+            vision_items_per_batch = 2
+        else:
+            raise ValueError(
+                f"modality={modality} is not supported for vit cuda graph."
+            )
+
+        engine_args.compilation_config = {
+            "cudagraph_mm_encoder": True,
+            "encoder_cudagraph_max_vision_items_per_batch": vision_items_per_batch,
+        }
+
+    return engine_args
+
+
 @contextmanager
 def time_counter(enable: bool):
     if enable:
@@ -2351,7 +2734,7 @@ def parse_args():
         "--modality",
         type=str,
         default="image",
-        choices=["image", "video", "vision_chunk"],
+        choices=["image", "video", "image+video", "vision_chunk"],
         help="Modality of the input.",
     )
     parser.add_argument(
@@ -2366,33 +2749,28 @@ def parse_args():
         default=0,
         help="Set the seed when initializing `vllm.LLM`.",
     )
-
     parser.add_argument(
         "--image-repeat-prob",
         type=float,
         default=None,
         help="Simulates the hit-ratio for multi-modal preprocessor cache (if enabled)",
     )
-
     parser.add_argument(
         "--disable-mm-processor-cache",
         action="store_true",
         help="If True, disables caching of multi-modal processor.",
     )
-
     parser.add_argument(
         "--time-generate",
         action="store_true",
         help="If True, then print the total generate() call time",
     )
-
     parser.add_argument(
         "--use-different-prompt-per-request",
         action="store_true",
         help="If True, then use different prompt (with the same multi-modal "
         "data) for each request.",
     )
-
     parser.add_argument(
         "--verify-mm-cache-hit-with-uuids",
         action="store_true",
@@ -2406,6 +2784,11 @@ def parse_args():
         default=None,
         help="Tensor parallel size to override the model's default setting. ",
     )
+    parser.add_argument(
+        "--enable-vit-cuda-graph",
+        action="store_true",
+        help="If True, will enable vit cuda graph capture and replay for the model.",
+    )
     return parser.parse_args()
 
 
@@ -2439,6 +2822,7 @@ def main(args):
     engine_args.mm_processor_cache_gb = mm_processor_cache_gb
     if args.tensor_parallel_size is not None:
         engine_args.tensor_parallel_size = args.tensor_parallel_size
+    engine_args = maybe_add_vit_cuda_graph_compilation_config(args, engine_args)
     llm = LLM.from_engine_args(engine_args)
 
     # Don't want to check the flag multiple times, so just hijack `prompts`.
@@ -2458,23 +2842,42 @@ def main(args):
         else req_data.sampling_params
     )
 
+    def _mm_data(data, modality):
+        if modality == "image+video":
+            return {"image": data["image"], "video": data["video"]}
+        return {modality: data}
+
+    def _mm_uuid(uuid, modality):
+        if modality == "image+video":
+            return {"image": uuid, "video": uuid + "v"}
+        return {modality: uuid}
+
+    def _mm_empty(modality):
+        if modality == "image+video":
+            return {"image": None, "video": None}
+        return {modality: None}
+
     assert args.num_prompts > 0
     if args.num_prompts == 1:
         # Single inference
         uuid = "uuid_0"
         inputs = {
             "prompt": prompts[0],
-            "multi_modal_data": {modality: data},
-            "multi_modal_uuids": {modality: uuid},
+            "multi_modal_data": _mm_data(data, modality),
+            "multi_modal_uuids": _mm_uuid(uuid, modality),
         }
         inputs_with_empty_media = {
             "prompt": prompts[0],
-            "multi_modal_data": {modality: None},
-            "multi_modal_uuids": {modality: uuid},
+            "multi_modal_data": _mm_empty(modality),
+            "multi_modal_uuids": _mm_uuid(uuid, modality),
         }
     else:
         # Batch inference
         if args.image_repeat_prob is not None:
+            if modality == "image+video":
+                raise ValueError(
+                    "--image-repeat-prob is not supported for 'image+video' modality"
+                )
             # Repeat images with specified probability of "image_repeat_prob"
             inputs, inputs_with_empty_media = apply_image_repeat(
                 args.image_repeat_prob,
@@ -2484,7 +2887,7 @@ def main(args):
                 modality,
             )
         else:
-            # Use the same image for all prompts
+            # Use the same image/video for all prompts
             inputs = []
             inputs_with_empty_media = []
             for i in range(args.num_prompts):
@@ -2492,15 +2895,15 @@ def main(args):
                 inputs.append(
                     {
                         "prompt": prompts[i % len(prompts)],
-                        "multi_modal_data": {modality: data},
-                        "multi_modal_uuids": {modality: uuid},
+                        "multi_modal_data": _mm_data(data, modality),
+                        "multi_modal_uuids": _mm_uuid(uuid, modality),
                     }
                 )
                 inputs_with_empty_media.append(
                     {
                         "prompt": prompts[i % len(prompts)],
-                        "multi_modal_data": {modality: None},
-                        "multi_modal_uuids": {modality: uuid},
+                        "multi_modal_data": _mm_empty(modality),
+                        "multi_modal_uuids": _mm_uuid(uuid, modality),
                     }
                 )
 
diff --git a/examples/offline_inference/qwen_1m.py b/examples/generate/qwen_1m_offline.py
similarity index 100%
rename from examples/offline_inference/qwen_1m.py
rename to examples/generate/qwen_1m_offline.py
diff --git a/examples/online_serving/token_generation_client.py b/examples/generate/token_generation_client.py
similarity index 100%
rename from examples/online_serving/token_generation_client.py
rename to examples/generate/token_generation_client.py
diff --git a/examples/online_serving/dashboards/README.md b/examples/observability/dashboards/README.md
similarity index 91%
rename from examples/online_serving/dashboards/README.md
rename to examples/observability/dashboards/README.md
index 10b9a864f572..29ec932cef24 100644
--- a/examples/online_serving/dashboards/README.md
+++ b/examples/observability/dashboards/README.md
@@ -43,7 +43,7 @@ Both platforms provide equivalent monitoring capabilities:
 First, navigate to this example's directory:
 
 ```bash
-cd examples/online_serving/dashboards
+cd examples/observability/dashboards
 ```
 
 ### Grafana
@@ -74,8 +74,8 @@ percli apply -f perses/performance_statistics.yaml
 
 For detailed deployment instructions and platform-specific options, see:
 
-- **[Grafana Documentation](./grafana)** - JSON dashboards, operator usage, manual import
-- **[Perses Documentation](./perses)** - YAML specs, CLI usage, operator wrapping
+- **[Grafana Documentation](grafana)** - JSON dashboards, operator usage, manual import
+- **[Perses Documentation](perses)** - YAML specs, CLI usage, operator wrapping
 
 ## Contributing
 
diff --git a/examples/online_serving/dashboards/grafana/README.md b/examples/observability/dashboards/grafana/README.md
similarity index 100%
rename from examples/online_serving/dashboards/grafana/README.md
rename to examples/observability/dashboards/grafana/README.md
diff --git a/examples/online_serving/dashboards/grafana/performance_statistics.json b/examples/observability/dashboards/grafana/performance_statistics.json
similarity index 100%
rename from examples/online_serving/dashboards/grafana/performance_statistics.json
rename to examples/observability/dashboards/grafana/performance_statistics.json
diff --git a/examples/online_serving/dashboards/grafana/query_statistics.json b/examples/observability/dashboards/grafana/query_statistics.json
similarity index 100%
rename from examples/online_serving/dashboards/grafana/query_statistics.json
rename to examples/observability/dashboards/grafana/query_statistics.json
diff --git a/examples/online_serving/dashboards/perses/README.md b/examples/observability/dashboards/perses/README.md
similarity index 100%
rename from examples/online_serving/dashboards/perses/README.md
rename to examples/observability/dashboards/perses/README.md
diff --git a/examples/online_serving/dashboards/perses/performance_statistics.yaml b/examples/observability/dashboards/perses/performance_statistics.yaml
similarity index 100%
rename from examples/online_serving/dashboards/perses/performance_statistics.yaml
rename to examples/observability/dashboards/perses/performance_statistics.yaml
diff --git a/examples/online_serving/dashboards/perses/query_statistics.yaml b/examples/observability/dashboards/perses/query_statistics.yaml
similarity index 100%
rename from examples/online_serving/dashboards/perses/query_statistics.yaml
rename to examples/observability/dashboards/perses/query_statistics.yaml
diff --git a/examples/offline_inference/metrics.py b/examples/observability/metrics/offline.py
similarity index 100%
rename from examples/offline_inference/metrics.py
rename to examples/observability/metrics/offline.py
diff --git a/examples/online_serving/opentelemetry/README.md b/examples/observability/opentelemetry/README.md
similarity index 100%
rename from examples/online_serving/opentelemetry/README.md
rename to examples/observability/opentelemetry/README.md
diff --git a/examples/online_serving/opentelemetry/dummy_client.py b/examples/observability/opentelemetry/dummy_client.py
similarity index 100%
rename from examples/online_serving/opentelemetry/dummy_client.py
rename to examples/observability/opentelemetry/dummy_client.py
diff --git a/examples/online_serving/prometheus_grafana/README.md b/examples/observability/prometheus_grafana/README.md
similarity index 100%
rename from examples/online_serving/prometheus_grafana/README.md
rename to examples/observability/prometheus_grafana/README.md
diff --git a/examples/online_serving/prometheus_grafana/docker-compose.yaml b/examples/observability/prometheus_grafana/docker-compose.yaml
similarity index 100%
rename from examples/online_serving/prometheus_grafana/docker-compose.yaml
rename to examples/observability/prometheus_grafana/docker-compose.yaml
diff --git a/examples/online_serving/prometheus_grafana/grafana.json b/examples/observability/prometheus_grafana/grafana.json
similarity index 100%
rename from examples/online_serving/prometheus_grafana/grafana.json
rename to examples/observability/prometheus_grafana/grafana.json
diff --git a/examples/online_serving/prometheus_grafana/prometheus.yaml b/examples/observability/prometheus_grafana/prometheus.yaml
similarity index 100%
rename from examples/online_serving/prometheus_grafana/prometheus.yaml
rename to examples/observability/prometheus_grafana/prometheus.yaml
diff --git a/examples/online_serving/disaggregated_serving/moriio_toy_proxy_server.py b/examples/online_serving/disaggregated_serving/moriio_toy_proxy_server.py
deleted file mode 100644
index 33fb56c88020..000000000000
--- a/examples/online_serving/disaggregated_serving/moriio_toy_proxy_server.py
+++ /dev/null
@@ -1,306 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import asyncio
-import copy
-import logging
-import os
-import socket
-import threading
-import uuid
-
-import aiohttp
-import msgpack
-import regex as re
-import zmq
-from quart import Quart, make_response, request
-
-from vllm.distributed.kv_transfer.kv_connector.v1.moriio.moriio_common import (
-    MoRIIOConstants,
-)
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-prefill_instances: list[dict] = []
-decode_instances: list[dict] = []
-request_nums = 0
-app = Quart(__name__)
-
-IP_PORT_PATTERN = re.compile(r"//(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)")
-
-
-TRANSFER_TYPE = None
-
-
-def _append_whole_dict_unique(target_list, data_dict):
-    new_filtered = {k: v for k, v in data_dict.items() if k != "index"}
-    for existed in target_list:
-        existed_filtered = {k: v for k, v in existed.items() if k != "index"}
-        if existed_filtered == new_filtered:
-            return False
-    print("!!APPEND!!", data_dict)
-    target_list.append(data_dict)
-    transfer_mode = data_dict.get("transfer_mode", "unknown")
-    global TRANSFER_TYPE
-
-    if TRANSFER_TYPE is None:
-        TRANSFER_TYPE = transfer_mode
-        logger.info("SET TRANSFER TYPE TO %s", TRANSFER_TYPE)
-    elif transfer_mode != TRANSFER_TYPE:
-        raise ValueError(f"mismatched transfer mode {TRANSFER_TYPE} vs {transfer_mode}")
-
-    return True
-
-
-_list_lock = threading.RLock()
-
-
-def _listen_for_register(hostname, port):
-    context = zmq.Context()
-    router_socket = context.socket(zmq.ROUTER)
-    router_socket.bind(f"tcp://{hostname}:{port}")
-    poller = zmq.Poller()
-    poller.register(router_socket, zmq.POLLIN)
-    global prefill_instances
-    global decode_instances
-
-    while True:
-        socks = dict(poller.poll())
-        if router_socket in socks:
-            remote_addr, msg = router_socket.recv_multipart()
-            data = msgpack.loads(msg)
-            if data["type"] == "HELLO":
-                pass
-            elif (
-                data["type"] == "register"
-                and data["role"] == "P"
-                and data["request_address"] not in prefill_instances
-            ):
-                with _list_lock:
-                    _append_whole_dict_unique(prefill_instances, data)
-
-            elif (
-                data["type"] == "register"
-                and data["role"] == "D"
-                and data["request_address"] not in decode_instances
-            ):
-                with _list_lock:
-                    _append_whole_dict_unique(decode_instances, data)
-
-
-def start_service_discovery(hostname, port):
-    if not hostname:
-        hostname = socket.gethostname()
-    if port == 0:
-        raise ValueError("Port cannot be 0")
-
-    _listener_thread = threading.Thread(
-        target=_listen_for_register, args=(hostname, port), daemon=True
-    )
-    _listener_thread.start()
-    return _listener_thread
-
-
-async def send_request_to_prefill(
-    endpoint, req_data, request_id, d_endpoint, dip, dport, selected_prefill_dp_rank
-):
-    req_data_copy = req_data
-
-    req_data_copy["kv_transfer_params"].update(
-        {
-            "do_remote_decode": True,
-            "do_remote_prefill": False,
-            "remote_handshake_port": d_endpoint["handshake_port"],
-            "remote_notify_port": d_endpoint["notify_port"],
-            "remote_engine_id": None,
-            "remote_block_ids": None,
-            "remote_host": dip,
-            "remote_port": dport,
-        }
-    )
-    req_data_copy["stream"] = False
-    req_data_copy["max_tokens"] = 1
-    if "max_completion_tokens" in req_data_copy:
-        req_data_copy["max_completion_tokens"] = 1
-    if "stream_options" in req_data_copy:
-        del req_data_copy["stream_options"]
-    async with aiohttp.ClientSession(
-        timeout=aiohttp.ClientTimeout(total=6 * 6000 * 6000)
-    ) as session:
-        headers = {
-            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
-            "X-Request-Id": request_id,
-        }
-        if selected_prefill_dp_rank is not None:
-            headers["X-data-parallel-rank"] = str(selected_prefill_dp_rank)
-        async with session.post(
-            url=endpoint, json=req_data_copy, headers=headers
-        ) as response:
-            if response.status == 200:
-                return await response.json()
-
-            else:
-                raise RuntimeError(
-                    "send_request_to_prefill response.status != 200response.status = ",
-                    response.status,
-                )
-
-
-async def start_decode_request(endpoint, req_data, request_id):
-    session = aiohttp.ClientSession(
-        timeout=aiohttp.ClientTimeout(total=6 * 6000 * 6000)
-    )
-    headers = {
-        "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
-        "X-Request-Id": request_id,
-    }
-    response = await session.post(url=endpoint, json=req_data, headers=headers)
-    return session, response
-
-
-async def stream_decode_response(session, response, request_id):
-    try:
-        if response.status == 200:
-            async for chunk_bytes in response.content.iter_chunked(1024):
-                yield chunk_bytes
-        else:
-            raise RuntimeError(
-                f"decode response.status != 200, status = {response.status}"
-            )
-    finally:
-        await session.close()
-
-
-def example_round_robin_dp_loader(request_number, dp_size):
-    return request_nums % dp_size
-
-
-@app.route("/v1/completions", methods=["POST"])
-@app.route("/v1/chat/completions", methods=["POST"])
-async def handle_request():
-    try:
-        with _list_lock:
-            global request_nums
-            request_nums += 1
-
-        def extract_ip_port_fast(url):
-            match = IP_PORT_PATTERN.search(url)
-            if not match:
-                raise ValueError(f"Invalid URL format: {url}")
-            return match.groups()
-
-        req_data = await request.get_json()
-        request_id = str(uuid.uuid4())
-
-        prefill_instance_endpoint = None
-        decode_instance_endpoint = None
-        error_msg = (
-            "Service Unavailable: No prefill or decode instances are registered."
-        )
-        if not prefill_instances or not decode_instances:
-            return await make_response(
-                (
-                    error_msg,
-                    503,
-                )
-            )
-        pid = request_nums % len(prefill_instances)
-        did = request_nums % len(decode_instances)
-        prefill_instance_endpoint = prefill_instances[pid]
-        decode_instance_endpoint = decode_instances[did]
-
-        selected_prefill_dp_rank = None
-        if prefill_instance_endpoint["dp_size"] > 1:
-            selected_prefill_dp_rank = example_round_robin_dp_loader(
-                request_nums // len(prefill_instance_endpoint),
-                prefill_instance_endpoint["dp_size"],
-            )
-
-        dip, dport = extract_ip_port_fast(decode_instance_endpoint["request_address"])
-
-        transfer_id = f"{MoRIIOConstants.TRANSFER_PREFIX}-{str(uuid.uuid4())}"
-
-        req_data_to_prefill = copy.deepcopy(req_data)
-        req_data_to_prefill["kv_transfer_params"] = {}
-        req_data["kv_transfer_params"] = {}
-        req_data_to_prefill["kv_transfer_params"]["remote_dp_size"] = (
-            decode_instance_endpoint["dp_size"]
-        )
-        req_data_to_prefill["kv_transfer_params"]["remote_tp_size"] = (
-            decode_instance_endpoint["tp_size"]
-        )
-        req_data_to_prefill["kv_transfer_params"]["transfer_id"] = transfer_id
-
-        send_prefill_task = asyncio.create_task(
-            send_request_to_prefill(
-                prefill_instance_endpoint["request_address"],
-                req_data_to_prefill,
-                request_id,
-                decode_instance_endpoint,
-                dip,
-                dport,
-                selected_prefill_dp_rank,
-            )
-        )
-        ip, port = extract_ip_port_fast(prefill_instance_endpoint["request_address"])
-
-        req_data["max_tokens"] -= 1
-
-        req_data["kv_transfer_params"] = {
-            "do_remote_decode": False,
-            "do_remote_prefill": True,
-            "remote_handshake_port": prefill_instance_endpoint["handshake_port"],
-            "remote_notify_port": prefill_instance_endpoint["notify_port"],
-            "remote_engine_id": None,
-            "remote_block_ids": None,
-            "remote_host": ip,
-            "remote_port": port,
-        }
-        if TRANSFER_TYPE == "READ":
-            # In read mode, prefill and decode are executed serially.
-            prefill_response = await send_prefill_task
-            req_data["kv_transfer_params"]["remote_engine_id"] = prefill_response[
-                "kv_transfer_params"
-            ]["remote_engine_id"]
-            req_data["kv_transfer_params"]["remote_block_ids"] = prefill_response[
-                "kv_transfer_params"
-            ]["remote_block_ids"]
-
-        req_data["kv_transfer_params"]["remote_dp_size"] = prefill_instance_endpoint[
-            "dp_size"
-        ]
-        req_data["kv_transfer_params"]["remote_tp_size"] = prefill_instance_endpoint[
-            "tp_size"
-        ]
-
-        if selected_prefill_dp_rank is not None:
-            req_data["kv_transfer_params"]["remote_dp_rank"] = selected_prefill_dp_rank
-        req_data["kv_transfer_params"]["transfer_id"] = transfer_id
-
-        decode_request_task = asyncio.create_task(
-            start_decode_request(
-                decode_instance_endpoint["request_address"], req_data, request_id
-            )
-        )
-
-        session, decode_response = await decode_request_task
-        stream_generator = stream_decode_response(session, decode_response, request_id)
-        response = await make_response(stream_generator)
-        return response
-    except Exception as e:
-        logger.exception("An error occurred while handling the request: %s", e)
-        return await make_response(
-            (
-                f"Internal Server Error: {e!s}",
-                500,
-            )
-        )
-
-
-if __name__ == "__main__":
-    t = start_service_discovery("0.0.0.0", 36367)
-    app.debug = True
-    app.config["BODY_TIMEOUT"] = 360000
-    app.config["RESPONSE_TIMEOUT"] = 360000
-
-    app.run(host="0.0.0.0", port=10001)
-    t.join()
diff --git a/examples/online_serving/prompt_embed_inference_with_openai_client.py b/examples/online_serving/prompt_embed_inference_with_openai_client.py
deleted file mode 100644
index fa4b64c00703..000000000000
--- a/examples/online_serving/prompt_embed_inference_with_openai_client.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-vLLM OpenAI-Compatible Client with Prompt Embeddings
-
-This script demonstrates how to:
-1. Generate prompt embeddings using Hugging Face Transformers
-2. Encode them in base64 format
-3. Send them to a vLLM server via the OpenAI-compatible Completions API
-
-Run the vLLM server first:
-vllm serve meta-llama/Llama-3.2-1B-Instruct \
-  --runner generate \
-  --max-model-len 4096 \
-  --enable-prompt-embeds
-
-Run the client:
-python examples/online_serving/prompt_embed_inference_with_openai_client.py
-
-Model: meta-llama/Llama-3.2-1B-Instruct
-Note: This model is gated on Hugging Face Hub.
-      You must request access to use it:
-      https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
-
-Dependencies:
-- transformers
-- torch
-- openai
-"""
-
-import transformers
-from openai import OpenAI
-
-from vllm.utils.serial_utils import tensor2base64
-
-
-def main():
-    client = OpenAI(
-        api_key="EMPTY",
-        base_url="http://localhost:8000/v1",
-    )
-
-    model_name = "meta-llama/Llama-3.2-1B-Instruct"
-
-    # Transformers
-    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
-    transformers_model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
-
-    # Refer to the HuggingFace repo for the correct format to use
-    chat = [{"role": "user", "content": "Please tell me about the capital of France."}]
-    token_ids = tokenizer.apply_chat_template(
-        chat, add_generation_prompt=True, return_tensors="pt", return_dict=True
-    ).input_ids
-
-    embedding_layer = transformers_model.get_input_embeddings()
-    prompt_embeds = embedding_layer(token_ids).squeeze(0)
-
-    # Prompt embeddings
-    encoded_embeds = tensor2base64(prompt_embeds)
-
-    completion = client.completions.create(
-        model=model_name,
-        prompt=None,
-        max_tokens=5,
-        temperature=0.0,
-        # NOTE: The OpenAI client allows passing in extra JSON body via the
-        # `extra_body` argument.
-        extra_body={"prompt_embeds": encoded_embeds},
-    )
-
-    print("-" * 30)
-    print(completion.choices[0].text)
-    print("-" * 30)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/online_serving/utils.py b/examples/online_serving/utils.py
deleted file mode 100644
index a512d8a31b53..000000000000
--- a/examples/online_serving/utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from openai import APIConnectionError, OpenAI
-from openai.pagination import SyncPage
-from openai.types.model import Model
-
-
-def get_first_model(client: OpenAI) -> str:
-    """
-    Get the first model from the vLLM server.
-    """
-    try:
-        models: SyncPage[Model] = client.models.list()
-    except APIConnectionError as e:
-        raise RuntimeError(
-            "Failed to get the list of models from the vLLM server at "
-            f"{client.base_url} with API key {client.api_key}. Check\n"
-            "1. the server is running\n"
-            "2. the server URL is correct\n"
-            "3. the API key is correct"
-        ) from e
-
-    if len(models.data) == 0:
-        raise RuntimeError(f"No models found on the vLLM server at {client.base_url}")
-
-    return models.data[0].id
diff --git a/examples/pooling/embed/openai_embedding_long_text/service.sh b/examples/pooling/embed/openai_embedding_long_text/service.sh
index 37a8b625b7f9..68950d04ee14 100644
--- a/examples/pooling/embed/openai_embedding_long_text/service.sh
+++ b/examples/pooling/embed/openai_embedding_long_text/service.sh
@@ -119,7 +119,7 @@ echo "   - API Key: $API_KEY"
 echo "   - Native Pooling: $POOLING_TYPE | Cross-chunk: MEAN"
 echo ""
 echo "🧪 Test the server with:"
-echo "   python examples/online_serving/openai_embedding_long_text/client.py"
+echo "   python examples/pooling/embed/openai_embedding_long_text/client.py"
 echo ""
 echo "📚 Enhanced features enabled:"
 echo "   ✅ Intelligent native pooling type detection"
diff --git a/examples/pooling/reward/sequence_reward_offline.py b/examples/pooling/reward/sequence_reward_offline.py
new file mode 100644
index 000000000000..0727bceee111
--- /dev/null
+++ b/examples/pooling/reward/sequence_reward_offline.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Example offline usage of sequence reward models.
+
+The key distinction between sequence classification and token classification
+lies in their output granularity: sequence classification produces a single
+result for an entire input sequence, whereas token classification yields a
+result for each individual token within the sequence.
+"""
+
+from argparse import Namespace
+
+from vllm import LLM, EngineArgs
+from vllm.utils.argparse_utils import FlexibleArgumentParser
+from vllm.utils.print_utils import print_embeddings
+
+
+def parse_args():
+    parser = FlexibleArgumentParser()
+    parser = EngineArgs.add_cli_args(parser)
+    # Set example specific arguments
+    parser.set_defaults(
+        model="Skywork/Skywork-Reward-V2-Qwen3-0.6B",
+        runner="pooling",
+        enforce_eager=True,
+        max_model_len=1024,
+        trust_remote_code=True,
+    )
+    return parser.parse_args()
+
+
+def main(args: Namespace):
+    # Sample prompts.
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+
+    # Create an LLM.
+    # You should pass runner="pooling" for reward models
+    llm = LLM(**vars(args))
+
+    # Generate rewards. The output is a list of PoolingRequestOutput.
+    # Use pooling_task="classify" for sequence reward models.
+    outputs = llm.encode(prompts, pooling_task="classify")
+
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for prompt, output in zip(prompts, outputs):
+        rewards = output.outputs.data
+        print(f"Prompt: {prompt!r}")
+        print_embeddings(rewards.tolist(), prefix="Reward")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/examples/pooling/reward/sequence_reward_online.py b/examples/pooling/reward/sequence_reward_online.py
new file mode 100644
index 000000000000..40d8d28e3908
--- /dev/null
+++ b/examples/pooling/reward/sequence_reward_online.py
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Example online usage of sequence reward models.
+
+Run `vllm serve <model> --runner pooling`
+to start up the server in vLLM. e.g.
+
+vllm serve Skywork/Skywork-Reward-V2-Qwen3-0.6B
+
+The key distinction between sequence classification and token classification
+lies in their output granularity: sequence classification produces a single
+result for an entire input sequence, whereas token classification yields a
+result for each individual token within the sequence.
+"""
+
+import argparse
+import pprint
+
+import requests
+
+
+def post_http_request(prompt: dict, api_url: str) -> requests.Response:
+    headers = {"User-Agent": "Test Client"}
+    response = requests.post(api_url, headers=headers, json=prompt)
+    return response
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+
+    return parser.parse_args()
+
+
+def main(args):
+    base_url = f"http://{args.host}:{args.port}"
+    models_url = base_url + "/v1/models"
+    pooing_url = base_url + "/pooling"
+
+    response = requests.get(models_url)
+    model = response.json()["data"][0]["id"]
+
+    # Input like Completions API
+    prompt = {"model": model, "input": "vLLM is great!"}
+    pooling_response = post_http_request(prompt=prompt, api_url=pooing_url)
+    print("-" * 50)
+    print("Pooling Response:")
+    pprint.pprint(pooling_response.json())
+    print("-" * 50)
+
+    # Input like Chat API
+    prompt = {
+        "model": model,
+        "messages": [
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": "vLLM is great!"}],
+            }
+        ],
+    }
+    pooling_response = post_http_request(prompt=prompt, api_url=pooing_url)
+    print("Pooling Response:")
+    pprint.pprint(pooling_response.json())
+    print("-" * 50)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/examples/basic/offline_inference/reward.py b/examples/pooling/reward/token_reward_offline.py
similarity index 74%
rename from examples/basic/offline_inference/reward.py
rename to examples/pooling/reward/token_reward_offline.py
index b6aece26ace1..4705c0491241 100644
--- a/examples/basic/offline_inference/reward.py
+++ b/examples/pooling/reward/token_reward_offline.py
@@ -1,6 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+"""
+Example offline usage of token reward models.
+
+The key distinction between sequence classification and token classification
+lies in their output granularity: sequence classification produces a single
+result for an entire input sequence, whereas token classification yields a
+result for each individual token within the sequence.
+"""
+
 from argparse import Namespace
 
 from vllm import LLM, EngineArgs
@@ -36,14 +45,14 @@ def main(args: Namespace):
     llm = LLM(**vars(args))
 
     # Generate rewards. The output is a list of PoolingRequestOutput.
-    outputs = llm.reward(prompts)
+    outputs = llm.encode(prompts, pooling_task="token_classify")
 
     # Print the outputs.
     print("\nGenerated Outputs:\n" + "-" * 60)
     for prompt, output in zip(prompts, outputs):
         rewards = output.outputs.data
         print(f"Prompt: {prompt!r}")
-        print_embeddings(rewards, prefix="Reward")
+        print_embeddings(rewards.tolist(), prefix="Reward")
         print("-" * 60)
 
 
diff --git a/examples/pooling/pooling/pooling_online.py b/examples/pooling/reward/token_reward_online.py
similarity index 83%
rename from examples/pooling/pooling/pooling_online.py
rename to examples/pooling/reward/token_reward_online.py
index e8ff38889a16..64ee0c9dfdcc 100644
--- a/examples/pooling/pooling/pooling_online.py
+++ b/examples/pooling/reward/token_reward_online.py
@@ -1,12 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-Example online usage of Pooling API.
+Example online usage of token reward models.
 
 Run `vllm serve <model> --runner pooling`
 to start up the server in vLLM. e.g.
 
 vllm serve internlm/internlm2-1_8b-reward --trust-remote-code
+
+The key distinction between sequence classification and token classification
+lies in their output granularity: sequence classification produces a single
+result for an entire input sequence, whereas token classification yields a
+result for each individual token within the sequence.
 """
 
 import argparse
diff --git a/examples/pooling/score/template/qwen3_reranker.jinja b/examples/pooling/score/template/qwen3_reranker.jinja
index f33f526dc054..558e9b9566d5 100644
--- a/examples/pooling/score/template/qwen3_reranker.jinja
+++ b/examples/pooling/score/template/qwen3_reranker.jinja
@@ -1,7 +1,7 @@
 <|im_start|>system
 Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>
 <|im_start|>user
-<Instruct>: {{ messages | selectattr("role", "eq", "system") | map(attribute="content") | first | default("Given a web search query, retrieve relevant passages that answer the query") }}
+<Instruct>: {{ instruction | default(instruct | default(messages | selectattr("role", "eq", "system") | map(attribute="content") | first | default("Given a web search query, retrieve relevant passages that answer the query", true), true), true) }}
 <Query>: {{ messages | selectattr("role", "eq", "query") | map(attribute="content") | first }}
 <Document>: {{ messages | selectattr("role", "eq", "document") | map(attribute="content") | first }}<|im_end|>
 <|im_start|>assistant
diff --git a/examples/pooling/score/template/qwen3_vl_reranker.jinja b/examples/pooling/score/template/qwen3_vl_reranker.jinja
index ed89f2a547ac..90488d0f5d32 100644
--- a/examples/pooling/score/template/qwen3_vl_reranker.jinja
+++ b/examples/pooling/score/template/qwen3_vl_reranker.jinja
@@ -1,13 +1,7 @@
 <|im_start|>system
 Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>
 <|im_start|>user
-<Instruct>: {{
-    messages
-    | selectattr("role", "eq", "system")
-    | map(attribute="content")
-    | first
-    | default("Given a search query, retrieve relevant candidates that answer the query.")
-}}<Query>:{{
+<Instruct>: {{ instruction | default(instruct | default(messages | selectattr("role", "eq", "system") | map(attribute="content") | first | default("Given a search query, retrieve relevant candidates that answer the query.", true), true), true) }}<Query>:{{
     messages
     | selectattr("role", "eq", "query")
     | map(attribute="content")
diff --git a/examples/pooling/token_embed/jina_embeddings_v4_offline.py b/examples/pooling/token_embed/jina_embeddings_v4_offline.py
index 3822d2b42dcd..8aa14fda2e5e 100644
--- a/examples/pooling/token_embed/jina_embeddings_v4_offline.py
+++ b/examples/pooling/token_embed/jina_embeddings_v4_offline.py
@@ -4,68 +4,74 @@
 import torch
 
 from vllm import LLM
+from vllm.config import PoolerConfig
 from vllm.inputs import TextPrompt
 from vllm.multimodal.utils import fetch_image
 
-# Initialize model
-model = LLM(
-    model="jinaai/jina-embeddings-v4-vllm-text-matching",
-    runner="pooling",
-    max_model_len=1024,
-    gpu_memory_utilization=0.8,
-)
 
-# Create text prompts
-text1 = "Ein wunderschöner Sonnenuntergang am Strand"
-text1_prompt = TextPrompt(prompt=f"Query: {text1}")
+def main():
+    # Initialize model
+    model = LLM(
+        model="jinaai/jina-embeddings-v4-vllm-text-matching",
+        pooler_config=PoolerConfig(task="token_embed"),
+        runner="pooling",
+        max_model_len=1024,
+        gpu_memory_utilization=0.8,
+    )
 
-text2 = "浜辺に沈む美しい夕日"
-text2_prompt = TextPrompt(prompt=f"Query: {text2}")
+    # Create text prompts
+    text1 = "Ein wunderschöner Sonnenuntergang am Strand"
+    text1_prompt = TextPrompt(prompt=f"Query: {text1}")
 
-# Create image prompt
-image = fetch_image(
-    "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/eskimo.jpg"  # noqa: E501
-)
-image_prompt = TextPrompt(
-    prompt="<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Describe the image.<|im_end|>\n",  # noqa: E501
-    multi_modal_data={"image": image},
-)
+    text2 = "浜辺に沈む美しい夕日"
+    text2_prompt = TextPrompt(prompt=f"Query: {text2}")
 
-# Encode all prompts
-prompts = [text1_prompt, text2_prompt, image_prompt]
-outputs = model.encode(prompts, pooling_task="token_embed")
+    # Create image prompt
+    image = fetch_image(
+        "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/eskimo.jpg"  # noqa: E501
+    )
+    image_prompt = TextPrompt(
+        prompt="<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Describe the image.<|im_end|>\n",  # noqa: E501
+        multi_modal_data={"image": image},
+    )
 
+    # Encode all prompts
+    prompts = [text1_prompt, text2_prompt, image_prompt]
+    outputs = model.encode(prompts, pooling_task="token_embed")
 
-def get_embeddings(outputs):
-    VISION_START_TOKEN_ID, VISION_END_TOKEN_ID = 151652, 151653
+    def get_embeddings(outputs):
+        VISION_START_TOKEN_ID, VISION_END_TOKEN_ID = 151652, 151653
 
-    embeddings = []
-    for output in outputs:
-        if VISION_START_TOKEN_ID in output.prompt_token_ids:
-            # Gather only vision tokens
-            img_start_pos = torch.where(
-                torch.tensor(output.prompt_token_ids) == VISION_START_TOKEN_ID
-            )[0][0]
-            img_end_pos = torch.where(
-                torch.tensor(output.prompt_token_ids) == VISION_END_TOKEN_ID
-            )[0][0]
-            embeddings_tensor = output.outputs.data.detach().clone()[
-                img_start_pos : img_end_pos + 1
-            ]
-        else:
-            # Use all tokens for text-only prompts
-            embeddings_tensor = output.outputs.data.detach().clone()
+        embeddings = []
+        for output in outputs:
+            if VISION_START_TOKEN_ID in output.prompt_token_ids:
+                # Gather only vision tokens
+                img_start_pos = torch.where(
+                    torch.tensor(output.prompt_token_ids) == VISION_START_TOKEN_ID
+                )[0][0]
+                img_end_pos = torch.where(
+                    torch.tensor(output.prompt_token_ids) == VISION_END_TOKEN_ID
+                )[0][0]
+                embeddings_tensor = output.outputs.data.detach().clone()[
+                    img_start_pos : img_end_pos + 1
+                ]
+            else:
+                # Use all tokens for text-only prompts
+                embeddings_tensor = output.outputs.data.detach().clone()
 
-        # Pool and normalize embeddings
-        pooled_output = (
-            embeddings_tensor.sum(dim=0, dtype=torch.float32)
-            / embeddings_tensor.shape[0]
-        )
-        embeddings.append(torch.nn.functional.normalize(pooled_output, dim=-1))
-    return embeddings
+            # Pool and normalize embeddings
+            pooled_output = (
+                embeddings_tensor.sum(dim=0, dtype=torch.float32)
+                / embeddings_tensor.shape[0]
+            )
+            embeddings.append(torch.nn.functional.normalize(pooled_output, dim=-1))
+        return embeddings
 
+    embeddings = get_embeddings(outputs)
 
-embeddings = get_embeddings(outputs)
+    for embedding in embeddings:
+        print(embedding.shape)
 
-for embedding in embeddings:
-    print(embedding.shape)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pooling/token_embed/jina_reranker_v3_offline.py b/examples/pooling/token_embed/jina_reranker_v3_offline.py
new file mode 100644
index 000000000000..c250eccc62a6
--- /dev/null
+++ b/examples/pooling/token_embed/jina_reranker_v3_offline.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+
+import torch.nn.functional as F
+
+from vllm import LLM
+
+query = "What are the health benefits of green tea?"
+documents = [
+    "Green tea contains antioxidants called catechins that may help reduce inflammation and protect cells from damage.",
+    "El precio del café ha aumentado un 20% este año debido a problemas en la cadena de suministro.",
+    "Studies show that drinking green tea regularly can improve brain function and boost metabolism.",
+    "Basketball is one of the most popular sports in the United States.",
+    "绿茶富含儿茶素等抗氧化剂，可以降低心脏病风险，还有助于控制体重。",
+    "Le thé vert est riche en antioxydants et peut améliorer la fonction cérébrale.",
+]
+
+
+def main():
+    # Initialize model
+    llm = LLM(
+        model="jinaai/jina-reranker-v3",
+        runner="pooling",
+    )
+
+    # Generate scores.
+    outputs = llm.score(query, documents)
+
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for document, output in zip(documents, outputs):
+        score = output.outputs.score
+        print(f"Pair: {[query, document]!r} \nScore: {score}")
+        print("-" * 60)
+
+    # Generate embeddings.
+    # The JinaForRanking model concatenates docs first, then query.
+    # Let's stay consistent with this novel design.
+    outputs = llm.encode(documents + [query], pooling_task="token_embed")
+    embeds = outputs[0].outputs.data.float()
+
+    doc_embeds = embeds[:-1]
+    query_embeds = embeds[-1]
+
+    scores = F.cosine_similarity(query_embeds, doc_embeds)
+
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for document, score in zip(documents, scores):
+        print(f"Pair: {[query, document]!r} \nScore: {score}")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pooling/token_embed/multi_vector_retrieval_offline.py b/examples/pooling/token_embed/multi_vector_retrieval_offline.py
index fa7d1c3ba216..5456936111cf 100644
--- a/examples/pooling/token_embed/multi_vector_retrieval_offline.py
+++ b/examples/pooling/token_embed/multi_vector_retrieval_offline.py
@@ -4,6 +4,7 @@
 from argparse import Namespace
 
 from vllm import LLM, EngineArgs
+from vllm.config import PoolerConfig
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
 
@@ -13,6 +14,7 @@ def parse_args():
     # Set example specific arguments
     parser.set_defaults(
         model="BAAI/bge-m3",
+        pooler_config=PoolerConfig(task="token_embed"),
         runner="pooling",
         enforce_eager=True,
     )
@@ -32,15 +34,6 @@ def main(args: Namespace):
     # You should pass runner="pooling" for embedding models
     llm = LLM(**vars(args))
 
-    # Generate embedding. The output is a list of EmbeddingRequestOutputs.
-    outputs = llm.embed(prompts)
-
-    # Print the outputs.
-    print("\nGenerated Outputs:\n" + "-" * 60)
-    for prompt, output in zip(prompts, outputs):
-        embeds = output.outputs.embedding
-        print(len(embeds))
-
     # Generate embedding for each token. The output is a list of PoolingRequestOutput.
     outputs = llm.encode(prompts, pooling_task="token_embed")
 
@@ -50,6 +43,20 @@ def main(args: Namespace):
         multi_vector = output.outputs.data
         print(multi_vector.shape)
 
+    query = "What is the capital of France?"
+    documents = [
+        "The capital of Brazil is Brasilia.",
+        "The capital of France is Paris.",
+    ]
+    # Generate scores.
+    outputs = llm.score(query, documents)
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for document, output in zip(documents, outputs):
+        score = output.outputs.score
+        print(f"Pair: {[query, document]!r} \nScore: {score}")
+        print("-" * 60)
+
 
 if __name__ == "__main__":
     args = parse_args()
diff --git a/examples/pooling/token_embed/multi_vector_retrieval_online.py b/examples/pooling/token_embed/multi_vector_retrieval_online.py
index ef8c4745aa53..c2aae88294e8 100644
--- a/examples/pooling/token_embed/multi_vector_retrieval_online.py
+++ b/examples/pooling/token_embed/multi_vector_retrieval_online.py
@@ -7,10 +7,11 @@
 Run `vllm serve <model> --runner pooling`
 to start up the server in vLLM. e.g.
 
-vllm serve BAAI/bge-m3
+vllm serve BAAI/bge-m3 --pooler-config.task token_embed
 """
 
 import argparse
+import pprint
 
 import requests
 import torch
@@ -32,7 +33,8 @@ def parse_args():
 
 
 def main(args):
-    api_url = f"http://{args.host}:{args.port}/pooling"
+    pooling_url = f"http://{args.host}:{args.port}/pooling"
+    score_url = f"http://{args.host}:{args.port}/score"
     model_name = args.model
 
     prompts = [
@@ -43,11 +45,23 @@ def main(args):
     ]
     prompt = {"model": model_name, "input": prompts}
 
-    pooling_response = post_http_request(prompt=prompt, api_url=api_url)
+    pooling_response = post_http_request(prompt=prompt, api_url=pooling_url)
     for output in pooling_response.json()["data"]:
         multi_vector = torch.tensor(output["data"])
         print(multi_vector.shape)
 
+    queries = "What is the capital of France?"
+    documents = [
+        "The capital of Brazil is Brasilia.",
+        "The capital of France is Paris.",
+    ]
+    prompt = {"model": model_name, "queries": queries, "documents": documents}
+    score_response = post_http_request(prompt=prompt, api_url=score_url)
+    print("\nPrompt when queries is string and documents is a list:")
+    pprint.pprint(prompt)
+    print("\nScore Response:")
+    pprint.pprint(score_response.json())
+
 
 if __name__ == "__main__":
     args = parse_args()
diff --git a/examples/offline_inference/batch_llm_inference.py b/examples/ray_serving/batch_llm_inference.py
similarity index 100%
rename from examples/offline_inference/batch_llm_inference.py
rename to examples/ray_serving/batch_llm_inference.py
diff --git a/examples/online_serving/elastic_ep/bench.sh b/examples/ray_serving/elastic_ep/bench.sh
similarity index 100%
rename from examples/online_serving/elastic_ep/bench.sh
rename to examples/ray_serving/elastic_ep/bench.sh
diff --git a/examples/online_serving/elastic_ep/scale.py b/examples/ray_serving/elastic_ep/scale.py
similarity index 100%
rename from examples/online_serving/elastic_ep/scale.py
rename to examples/ray_serving/elastic_ep/scale.py
diff --git a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh b/examples/ray_serving/elastic_ep/serve_deepseek_v2.sh
similarity index 100%
rename from examples/online_serving/elastic_ep/serve_deepseek_v2.sh
rename to examples/ray_serving/elastic_ep/serve_deepseek_v2.sh
diff --git a/examples/online_serving/multi-node-serving.sh b/examples/ray_serving/multi-node-serving.sh
similarity index 100%
rename from examples/online_serving/multi-node-serving.sh
rename to examples/ray_serving/multi-node-serving.sh
diff --git a/examples/online_serving/ray_serve_deepseek.py b/examples/ray_serving/ray_serve_deepseek.py
similarity index 100%
rename from examples/online_serving/ray_serve_deepseek.py
rename to examples/ray_serving/ray_serve_deepseek.py
diff --git a/examples/online_serving/run_cluster.sh b/examples/ray_serving/run_cluster.sh
similarity index 100%
rename from examples/online_serving/run_cluster.sh
rename to examples/ray_serving/run_cluster.sh
diff --git a/examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py b/examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
rename to examples/reasoning/openai_chat_completion_tool_calls_with_reasoning.py
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning.py b/examples/reasoning/openai_chat_completion_with_reasoning.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_with_reasoning.py
rename to examples/reasoning/openai_chat_completion_with_reasoning.py
diff --git a/examples/online_serving/openai_chat_completion_with_reasoning_streaming.py b/examples/reasoning/openai_chat_completion_with_reasoning_streaming.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
rename to examples/reasoning/openai_chat_completion_with_reasoning_streaming.py
diff --git a/examples/online_serving/openai_responses_client.py b/examples/reasoning/openai_responses_client.py
similarity index 100%
rename from examples/online_serving/openai_responses_client.py
rename to examples/reasoning/openai_responses_client.py
diff --git a/examples/rl/rlhf_async_new_apis.py b/examples/rl/rlhf_async_new_apis.py
index 1d264d779859..a6adc2088607 100644
--- a/examples/rl/rlhf_async_new_apis.py
+++ b/examples/rl/rlhf_async_new_apis.py
@@ -131,16 +131,9 @@ def __init__(self, model_name: str):
         from vllm.model_executor.layers.batch_invariant import (
             init_batch_invariance,
         )
-        from vllm.platforms import current_platform
-        from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
         # need to init all env vars for batch invariance which affect nccl ops
-        attn_backend = (
-            AttentionBackendEnum.TRITON_ATTN
-            if current_platform.is_rocm()
-            else AttentionBackendEnum.FLASH_ATTN
-        )
-        init_batch_invariance(attn_backend)
+        init_batch_invariance()
 
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name, dtype=torch.bfloat16
@@ -314,6 +307,8 @@ def generate(self, token_ids: list[int], max_new_tokens: int) -> list[int]:
 
 ray.get(llm.pause_after_n_tokens.remote())
 
+ray.get(llm.start_weight_update.remote(is_checkpoint_format=True))
+
 inference_handle = llm.update_weights.remote(
     WeightTransferUpdateRequest(
         update_info=asdict(
@@ -329,6 +324,8 @@ def generate(self, token_ids: list[int], max_new_tokens: int) -> list[int]:
 train_handle = train_model.broadcast_weights.remote(packed=True)
 ray.get([train_handle, inference_handle])
 
+ray.get(llm.finish_weight_update.remote())
+
 ray.get(llm.resume_generation.remote())
 results = ray.get(gen_futures)
 
diff --git a/examples/rl/rlhf_http_ipc.py b/examples/rl/rlhf_http_ipc.py
index 1a6a96d9c092..16c5be8dd65b 100644
--- a/examples/rl/rlhf_http_ipc.py
+++ b/examples/rl/rlhf_http_ipc.py
@@ -80,6 +80,24 @@ def init_weight_transfer_engine(base_url: str) -> None:
     response.raise_for_status()
 
 
+def start_weight_update(
+    base_url: str,
+    is_checkpoint_format: bool = True,
+) -> None:
+    """Start a weight update via HTTP endpoint."""
+    url = f"{base_url}/start_weight_update"
+    payload = {"is_checkpoint_format": is_checkpoint_format}
+    response = requests.post(url, json=payload, timeout=60)
+    response.raise_for_status()
+
+
+def finish_weight_update(base_url: str) -> None:
+    """Finish a weight update via HTTP endpoint."""
+    url = f"{base_url}/finish_weight_update"
+    response = requests.post(url, json={}, timeout=60)
+    response.raise_for_status()
+
+
 def pause_generation(base_url: str) -> None:
     """Pause generation via HTTP endpoint."""
     url = f"{base_url}/pause"
@@ -151,14 +169,18 @@ def main():
     # Pause generation before weight sync
     pause_generation(BASE_URL)
 
-    # Broadcast weights via IPC handles using HTTP mode
+    # Start weight update, broadcast via IPC, then finish
+    start_weight_update(BASE_URL, is_checkpoint_format=False)
+
     print("Broadcasting weights via CUDA IPC (HTTP)...")
-    trainer_args = IPCTrainerSendWeightsArgs(mode="http", url=BASE_URL)
+    trainer_args = IPCTrainerSendWeightsArgs(send_mode="http", url=BASE_URL)
     IPCWeightTransferEngine.trainer_send_weights(
         iterator=train_model.named_parameters(),
         trainer_args=trainer_args,
     )
 
+    finish_weight_update(BASE_URL)
+
     # Resume generation after weight sync
     resume_generation(BASE_URL)
 
diff --git a/examples/rl/rlhf_http_nccl.py b/examples/rl/rlhf_http_nccl.py
index afc4cda2e306..01aafe43f160 100644
--- a/examples/rl/rlhf_http_nccl.py
+++ b/examples/rl/rlhf_http_nccl.py
@@ -83,6 +83,17 @@ def init_weight_transfer_engine(
     response.raise_for_status()
 
 
+def start_weight_update(
+    base_url: str,
+    is_checkpoint_format: bool = True,
+) -> None:
+    """Start a weight update via HTTP endpoint."""
+    url = f"{base_url}/start_weight_update"
+    payload = {"is_checkpoint_format": is_checkpoint_format}
+    response = requests.post(url, json=payload, timeout=60)
+    response.raise_for_status()
+
+
 def update_weights(
     base_url: str,
     names: list[str],
@@ -104,6 +115,13 @@ def update_weights(
     response.raise_for_status()
 
 
+def finish_weight_update(base_url: str) -> None:
+    """Finish a weight update via HTTP endpoint."""
+    url = f"{base_url}/finish_weight_update"
+    response = requests.post(url, json={}, timeout=60)
+    response.raise_for_status()
+
+
 def pause_generation(base_url: str) -> None:
     """Pause generation via HTTP endpoint."""
     url = f"{base_url}/pause"
@@ -204,6 +222,9 @@ def main():
         dtype_names.append(str(p.dtype).split(".")[-1])
         shapes.append(list(p.shape))
 
+    # Start weight update
+    start_weight_update(BASE_URL, is_checkpoint_format=True)
+
     # Start the update_weights call in a separate thread since it will block
     # waiting for NCCL broadcasts
     # packed=True enables efficient batched tensor broadcasting
@@ -227,6 +248,9 @@ def main():
     # Wait for update_weights to complete
     update_thread.join()
 
+    # Finish weight update
+    finish_weight_update(BASE_URL)
+
     # Resume generation after weight sync
     resume_generation(BASE_URL)
 
diff --git a/examples/rl/rlhf_ipc.py b/examples/rl/rlhf_ipc.py
index 169b1026ad4a..afebbd240a4e 100644
--- a/examples/rl/rlhf_ipc.py
+++ b/examples/rl/rlhf_ipc.py
@@ -70,10 +70,14 @@ def init_weight_transfer(self):
             self.llm_handle.init_weight_transfer_engine.remote(dict(init_info=dict()))
         )
 
-    def broadcast_weights(self, llm_handle: ray.actor.ActorHandle):
+    def broadcast_weights(
+        self, llm_handle: ray.actor.ActorHandle, packed: bool = False
+    ):
         """Broadcast weights to the inference engine using IPC."""
         self.llm_handle = llm_handle
-        trainer_args = IPCTrainerSendWeightsArgs(mode="ray", llm_handle=llm_handle)
+        trainer_args = IPCTrainerSendWeightsArgs(
+            send_mode="ray", llm_handle=llm_handle, packed=packed
+        )
         IPCWeightTransferEngine.trainer_send_weights(
             iterator=self.train_model.named_parameters(),
             trainer_args=trainer_args,
@@ -134,15 +138,17 @@ def broadcast_weights(self, llm_handle: ray.actor.ActorHandle):
 ray.get(llm.sleep.remote(level=0))
 
 ray.get(train_model.init_weight_transfer.remote())
-# Synchronize the updated weights to the inference engine using batched API.
+# Start weight update, sync weights, then finish
+ray.get(llm.start_weight_update.remote(is_checkpoint_format=True))
 ray.get(train_model.broadcast_weights.remote(llm))
+ray.get(llm.finish_weight_update.remote())
 
 ray.get(llm.wake_up.remote(tags=["scheduling"]))
 
-# Generate text with the updated model.
-outputs_updated = ray.get(llm.generate.remote(prompts, sampling_params))
+outputs_packed = ray.get(llm.generate.remote(prompts, sampling_params))
 print("-" * 50)
-for output in outputs_updated:
+print("Results after packed/chunked IPC weight sync:")
+for output in outputs_packed:
     prompt = output.prompt
     generated_text = output.outputs[0].text
     print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
diff --git a/examples/rl/rlhf_ipc_fsdp_ep.py b/examples/rl/rlhf_ipc_fsdp_ep.py
new file mode 100644
index 000000000000..0fb0a93ca82f
--- /dev/null
+++ b/examples/rl/rlhf_ipc_fsdp_ep.py
@@ -0,0 +1,425 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+RLHF with FSDP2 training and vLLM expert-parallel inference using **CUDA IPC**
+weight transfer and **packed** tensors.
+
+Layout (4 GPUs, TP=1, DP=4, EP):
+  * One Ray placement group per GPU.
+  * Each PG holds one FSDP training worker and one vLLM ``LLM`` instance
+    (sync API) using fractional GPUs so both fit on the same device.
+  * The 4 ``LLM`` instances form a DP group via env-var-based SPMD
+    coordination (``VLLM_DP_RANK``, ``VLLM_DP_SIZE``, etc.), the same
+    mechanism used by ``examples/offline_inference/data_parallel.py``.
+  * A ``DataParallelInferenceEngine`` actor spawns all 4 LLM actors,
+    waits for initialization, and orchestrates generation / weight-sync.
+
+Uses the built-in ``ray`` send_mode: each FSDP worker calls
+``trainer_send_weights`` targeting its colocated LLM actor.
+
+This example was run on 4xH100.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import asdict
+
+import ray
+import torch
+import torch.distributed as dist
+from huggingface_hub import snapshot_download
+from ray.util.placement_group import placement_group
+from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+from torch.distributed._tensor import DTensor
+from torch.distributed.fsdp import fully_shard
+from transformers import AutoModelForCausalLM
+
+from vllm import LLM, SamplingParams
+from vllm.config import WeightTransferConfig
+from vllm.distributed.weight_transfer.ipc_engine import (
+    IPCTrainerSendWeightsArgs,
+    IPCWeightTransferEngine,
+    IPCWeightTransferInitInfo,
+)
+from vllm.utils.network_utils import get_ip, get_open_port
+
+TRAIN_GPU_FRACTION = float(os.environ.get("RLHF_IPC_TRAIN_GPU_FRACTION", "0.42"))
+VLLM_GPU_FRACTION = float(os.environ.get("RLHF_IPC_VLLM_GPU_FRACTION", "0.42"))
+
+MODEL_NAME = "Qwen/Qwen3-30B-A3B"
+
+FSDP_WORLD_SIZE = 4
+INFERENCE_TP_SIZE = 1
+INFERENCE_DP_SIZE = 4
+
+
+class MyLLM(LLM):
+    """LLM subclass that configures DP env vars for SPMD coordination."""
+
+    def __init__(
+        self,
+        *args,
+        dp_rank: int = 0,
+        dp_size: int = 1,
+        dp_master_ip: str = "127.0.0.1",
+        dp_master_port: int = 0,
+        **kwargs,
+    ):
+        os.environ.pop("CUDA_VISIBLE_DEVICES", None)
+        os.environ["VLLM_RAY_PER_WORKER_GPUS"] = str(VLLM_GPU_FRACTION)
+        os.environ["VLLM_RAY_BUNDLE_INDICES"] = "0"
+        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
+
+        os.environ["VLLM_DP_RANK"] = str(dp_rank)
+        os.environ["VLLM_DP_RANK_LOCAL"] = str(dp_rank)
+        os.environ["VLLM_DP_SIZE"] = str(dp_size)
+        os.environ["VLLM_DP_MASTER_IP"] = dp_master_ip
+        os.environ["VLLM_DP_MASTER_PORT"] = str(dp_master_port)
+
+        super().__init__(*args, **kwargs)
+
+    def ready(self):
+        return True
+
+
+@ray.remote(num_cpus=0, num_gpus=TRAIN_GPU_FRACTION)
+class FSDPTrainWorker:
+    """One FSDP2 worker per GPU; colocated with vLLM DP rank via placement group."""
+
+    def __init__(
+        self,
+        model_name: str,
+        rank: int,
+        fsdp_world_size: int,
+        fsdp_master_addr: str,
+        fsdp_master_port: int,
+    ):
+        self.rank = rank
+
+        os.environ["MASTER_ADDR"] = fsdp_master_addr
+        os.environ["MASTER_PORT"] = str(fsdp_master_port)
+
+        dist.init_process_group(backend="nccl", rank=rank, world_size=fsdp_world_size)
+        torch.accelerator.set_device_index(0)
+
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name, torch_dtype=torch.bfloat16
+        )
+
+        self.weight_names = [n for n, _ in model.named_parameters()]
+        self.weight_dtype_names = [
+            str(p.dtype).split(".")[-1] for _, p in model.named_parameters()
+        ]
+        self.weight_shapes = [list(p.shape) for _, p in model.named_parameters()]
+
+        for layer in model.model.layers:
+            fully_shard(layer)
+        fully_shard(model)
+
+        self.model = model
+
+    def get_rank(self):
+        return self.rank
+
+    def get_weight_metadata(self):
+        return self.weight_names, self.weight_dtype_names, self.weight_shapes
+
+    def gather_and_broadcast_weights_ipc(self, llm_handle, packed: bool = True):
+        """All-gather full params; all ranks create IPC handles, rank 0 sends.
+
+        All ranks must call trainer_send_weights so they participate in the
+        all_gather_object collective inside _all_gather_and_merge_handles.
+        Only rank 0 actually sends the payload to vLLM (gated by _is_rank_zero).
+        """
+
+        def _full_param_iter():
+            # HF's Qwen3MoeExperts (and other recent HF MoE impls) packs
+            # all experts into two fused 3-D tensors per layer:
+            #   experts.gate_up_proj  shape (E, 2*I, H)
+            #   experts.down_proj     shape (E, H, I)
+            # vLLM's Qwen3MoE load_weights still expects the older
+            # per-expert HF layout (experts.<i>.gate_proj.weight,
+            # experts.<i>.up_proj.weight, experts.<i>.down_proj.weight),
+            # so we un-fuse on the fly. Split order matches HF's forward:
+            #   gate, up = linear(x, gate_up_proj[i]).chunk(2, dim=-1)
+            # → rows [:I] of gate_up_proj[i] are gate, rows [I:] are up.
+            params = self.model.state_dict()
+            for name in list(params.keys()):
+                param = params.pop(name)
+                if isinstance(param, DTensor):
+                    tensor = param.full_tensor().detach().contiguous()
+                else:
+                    tensor = param.detach().contiguous()
+                del param
+
+                if name.endswith(".experts.gate_up_proj") and tensor.dim() == 3:
+                    prefix = name[: -len(".gate_up_proj")]
+                    num_experts, two_inter, _ = tensor.shape
+                    inter = two_inter // 2
+                    for i in range(num_experts):
+                        expert = tensor[i]
+                        yield (
+                            f"{prefix}.{i}.gate_proj.weight",
+                            expert[:inter].contiguous(),
+                        )
+                        yield (
+                            f"{prefix}.{i}.up_proj.weight",
+                            expert[inter:].contiguous(),
+                        )
+                    del tensor
+                elif name.endswith(".experts.down_proj") and tensor.dim() == 3:
+                    prefix = name[: -len(".down_proj")]
+                    num_experts = tensor.shape[0]
+                    for i in range(num_experts):
+                        yield (
+                            f"{prefix}.{i}.down_proj.weight",
+                            tensor[i].contiguous(),
+                        )
+                    del tensor
+                else:
+                    yield name, tensor
+
+        trainer_args = IPCTrainerSendWeightsArgs(
+            send_mode="ray",
+            llm_handle=llm_handle,
+            packed=packed,
+            packed_buffer_size_bytes=1024 * 1024 * 1024,  # 1 GB
+        )
+        IPCWeightTransferEngine.trainer_send_weights(
+            iterator=_full_param_iter(),
+            trainer_args=trainer_args,
+        )
+
+
+@ray.remote(num_cpus=1)
+class DataParallelInferenceEngine:
+    """Manages a pool of DP-sharded vLLM LLM actors.
+
+    Spawns one MyLLM actor per placement group, waits for all engines to
+    finish initializing, and exposes generation / weight-sync helpers.
+    """
+
+    def __init__(
+        self,
+        model: str,
+        pgs: list,
+        dp_master_ip: str,
+        dp_master_port: int,
+    ):
+        dp_size = len(pgs)
+        self.llm_actors = []
+        for r in range(dp_size):
+            sched = PlacementGroupSchedulingStrategy(
+                placement_group=pgs[r],
+                placement_group_capture_child_tasks=True,
+            )
+            actor = (
+                ray.remote(num_cpus=0, num_gpus=0)(MyLLM)
+                .options(scheduling_strategy=sched)
+                .remote(
+                    model=model,
+                    enforce_eager=True,
+                    tensor_parallel_size=INFERENCE_TP_SIZE,
+                    distributed_executor_backend="ray",
+                    enable_expert_parallel=True,
+                    gpu_memory_utilization=0.35,
+                    weight_transfer_config=WeightTransferConfig(backend="ipc"),
+                    enable_sleep_mode=True,
+                    load_format="dummy",
+                    dp_rank=r,
+                    dp_size=dp_size,
+                    dp_master_ip=dp_master_ip,
+                    dp_master_port=dp_master_port,
+                )
+            )
+            self.llm_actors.append(actor)
+
+        ray.get([actor.ready.remote() for actor in self.llm_actors])
+
+    def get_llm_actors(self):
+        return self.llm_actors
+
+    def generate(self, prompts: list[str], sampling_params):
+        """Distribute prompts round-robin across DP ranks and collect results."""
+        dp_size = len(self.llm_actors)
+        per_rank: list[list[str]] = [[] for _ in range(dp_size)]
+        indices: list[list[int]] = [[] for _ in range(dp_size)]
+
+        for i, prompt in enumerate(prompts):
+            rank = i % dp_size
+            per_rank[rank].append(prompt)
+            indices[rank].append(i)
+
+        refs = [
+            actor.generate.remote(per_rank[r], sampling_params)
+            for r, actor in enumerate(self.llm_actors)
+            if per_rank[r]
+        ]
+        all_outputs = ray.get(refs)
+
+        ordered = [None] * len(prompts)
+        rank_idx = 0
+        for r in range(dp_size):
+            if per_rank[r]:
+                for local_i, orig_i in enumerate(indices[r]):
+                    ordered[orig_i] = all_outputs[rank_idx][local_i]
+                rank_idx += 1
+        return ordered
+
+    def init_weight_transfer(self):
+        ray.get(
+            [
+                actor.init_weight_transfer_engine.remote(
+                    dict(init_info=asdict(IPCWeightTransferInitInfo()))
+                )
+                for actor in self.llm_actors
+            ]
+        )
+
+    def start_weight_update(self, is_checkpoint_format: bool = True):
+        ray.get(
+            [
+                actor.start_weight_update.remote(
+                    is_checkpoint_format=is_checkpoint_format
+                )
+                for actor in self.llm_actors
+            ]
+        )
+
+    def finish_weight_update(self):
+        ray.get([actor.finish_weight_update.remote() for actor in self.llm_actors])
+
+    def sleep(self, level: int = 0):
+        ray.get([actor.sleep.remote(level=level) for actor in self.llm_actors])
+
+    def wake_up(self, tags: list[str] | None = None):
+        ray.get([actor.wake_up.remote(tags=tags) for actor in self.llm_actors])
+
+
+def main():
+    ray.init(
+        runtime_env={
+            "env_vars": {
+                "VLLM_ALLOW_INSECURE_SERIALIZATION": "1",
+            }
+        }
+    )
+
+    assert TRAIN_GPU_FRACTION + VLLM_GPU_FRACTION <= 1.0, (
+        "Train + vLLM GPU fractions must sum to at most 1.0 per bundle."
+    )
+
+    local_model_path = snapshot_download(MODEL_NAME)
+    print(f"[init] Model downloaded to {local_model_path}")
+
+    fsdp_master_addr = get_ip()
+    fsdp_master_port = get_open_port()
+    dp_master_port = get_open_port()
+    dp_master_ip = get_ip()
+
+    # Create one placement group per DP rank (one GPU each).
+    pgs = []
+    for _ in range(INFERENCE_DP_SIZE):
+        pg = placement_group([{"GPU": 1, "CPU": 1}])
+        pgs.append(pg)
+    ray.get([pg.ready() for pg in pgs])
+    print(f"[init] {len(pgs)} placement groups ready.")
+
+    # Launch FSDP training workers, one per PG.
+    scheduling = [
+        PlacementGroupSchedulingStrategy(
+            placement_group=pgs[r],
+            placement_group_capture_child_tasks=True,
+        )
+        for r in range(FSDP_WORLD_SIZE)
+    ]
+
+    fsdp_workers = [
+        FSDPTrainWorker.options(scheduling_strategy=scheduling[r]).remote(
+            local_model_path,
+            r,
+            FSDP_WORLD_SIZE,
+            fsdp_master_addr,
+            fsdp_master_port,
+        )
+        for r in range(FSDP_WORLD_SIZE)
+    ]
+    ray.get([w.get_rank.remote() for w in fsdp_workers])
+    print(f"[init] {FSDP_WORLD_SIZE} FSDP workers ready.")
+
+    # Launch DP inference engine (spawns and initializes all LLM actors).
+    inference_engine = DataParallelInferenceEngine.remote(
+        model=local_model_path,
+        pgs=pgs,
+        dp_master_ip=dp_master_ip,
+        dp_master_port=dp_master_port,
+    )
+    llm_actors = ray.get(inference_engine.get_llm_actors.remote())
+    print(f"[init] {INFERENCE_DP_SIZE} LLM actors ready.")
+
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+    sampling_params = SamplingParams(temperature=0)
+
+    print("[generate] Generating with dummy weights...")
+    outputs = ray.get(inference_engine.generate.remote(prompts, sampling_params))
+    print("-" * 60)
+    print("BEFORE weight sync (dummy weights):")
+    print("-" * 60)
+    for output in outputs:
+        print(f"Prompt: {output.prompt!r}")
+        print(f"Generated: {output.outputs[0].text!r}")
+        print("-" * 60)
+
+    # --- Weight transfer ---
+    print("[transfer] Initializing IPC weight transfer...")
+    ray.get(inference_engine.init_weight_transfer.remote())
+
+    # Two-phase sleep/wake pattern:
+    # 1. sleep(level=1) — offload weights to CPU, discard KV cache
+    # 2. wake_up(tags=["weights"]) — bring weights back to GPU (KV cache still free)
+    # 3. IPC weight transfer — overwrite weights, plenty of room without KV cache
+    # 4. wake_up(tags=["kv_cache"]) — re-allocate KV cache for inference
+    print("[sync] Sleeping engines (offload weights + free KV cache)...")
+    ray.get(inference_engine.sleep.remote(level=1))
+
+    print("[sync] Waking weights (KV cache stays free)...")
+    ray.get(inference_engine.wake_up.remote(tags=["weights"]))
+
+    print("[sync] Starting weight update...")
+    ray.get(inference_engine.start_weight_update.remote(is_checkpoint_format=True))
+
+    print("[sync] Packed IPC transfer FSDP → vLLM...")
+    ray.get(
+        [
+            w.gather_and_broadcast_weights_ipc.remote(llm_actors, packed=True)
+            for w in fsdp_workers
+        ]
+    )
+
+    ray.get(inference_engine.finish_weight_update.remote())
+    print("[sync] Weight transfer complete.")
+
+    print("[sync] Waking KV cache + scheduling...")
+    ray.get(inference_engine.wake_up.remote(tags=["kv_cache", "scheduling"]))
+
+    print("[generate] Generating with synced weights...")
+    outputs_updated = ray.get(
+        inference_engine.generate.remote(prompts, sampling_params)
+    )
+    print("-" * 60)
+    print("AFTER weight sync (real weights):")
+    print("-" * 60)
+    for output in outputs_updated:
+        print(f"Prompt: {output.prompt!r}")
+        print(f"Generated: {output.outputs[0].text!r}")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/rl/rlhf_nccl.py b/examples/rl/rlhf_nccl.py
index 5d5f24a93f35..b94d5e4db827 100644
--- a/examples/rl/rlhf_nccl.py
+++ b/examples/rl/rlhf_nccl.py
@@ -186,6 +186,9 @@ def broadcast_weights(self, packed: bool = True):
 # Collect all weight metadata from the training actor
 names, dtype_names, shapes = ray.get(train_model.get_weight_metadata.remote())
 
+# Start weight update
+ray.get(llm.start_weight_update.remote(is_checkpoint_format=True))
+
 # Issue update_weights call with NCCL-specific update info
 # packed=True enables efficient batched tensor broadcasting
 inference_handle = llm.update_weights.remote(
@@ -203,6 +206,9 @@ def broadcast_weights(self, packed: bool = True):
 train_handle = train_model.broadcast_weights.remote(packed=True)
 ray.get([train_handle, inference_handle])
 
+# Finish weight update
+ray.get(llm.finish_weight_update.remote())
+
 ray.get(llm.wake_up.remote(tags=["scheduling"]))
 
 # Generate text with the updated model. The output is expected to be normal
diff --git a/examples/rl/rlhf_nccl_fsdp_ep.py b/examples/rl/rlhf_nccl_fsdp_ep.py
index 5b1eda3f4610..4337e6fea5ac 100644
--- a/examples/rl/rlhf_nccl_fsdp_ep.py
+++ b/examples/rl/rlhf_nccl_fsdp_ep.py
@@ -298,6 +298,9 @@ async def main():
     names, dtype_names, shapes = ray.get(fsdp_workers[0].get_weight_metadata.remote())
     print(f"[sync] Got metadata for {len(names)} parameters.")
 
+    print("[sync] Starting weight update...")
+    await engine.start_weight_update(is_checkpoint_format=True)
+
     print("[sync] Broadcasting weights from FSDP → vLLM...")
     broadcast_handles = [
         w.gather_and_broadcast_weights.remote(packed=True) for w in fsdp_workers
@@ -315,6 +318,8 @@ async def main():
         )
     )
     ray.get(broadcast_handles)
+
+    await engine.finish_weight_update()
     print("[sync] Weight broadcast complete.")
 
     print("[sync] Resuming generation...")
diff --git a/examples/offline_inference/routed_experts_e2e.py b/examples/rl/routed_experts_e2e.py
similarity index 99%
rename from examples/offline_inference/routed_experts_e2e.py
rename to examples/rl/routed_experts_e2e.py
index bb1d7b411f99..1666bc3ffe16 100644
--- a/examples/offline_inference/routed_experts_e2e.py
+++ b/examples/rl/routed_experts_e2e.py
@@ -9,7 +9,7 @@
 3. Results are deterministic across runs (baseline vs reference).
 
 Usage:
-    python examples/offline_inference/routed_experts_e2e.py \
+    python examples/rl/routed_experts_e2e.py \
         --model Qwen/Qwen3-30B-A3B \
         --tp 4 \
         --max-model-len 4096 \
diff --git a/examples/offline_inference/skip_loading_weights_in_engine_init.py b/examples/rl/skip_loading_weights_in_engine_init.py
similarity index 100%
rename from examples/offline_inference/skip_loading_weights_in_engine_init.py
rename to examples/rl/skip_loading_weights_in_engine_init.py
diff --git a/examples/speech_to_text/lid/openai_lid_client.py b/examples/speech_to_text/lid/openai_lid_client.py
new file mode 100644
index 000000000000..d91df3298c22
--- /dev/null
+++ b/examples/speech_to_text/lid/openai_lid_client.py
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Language Identification (LID) demo using the FireRedLID model on vLLM.
+
+FireRedLID is an audio encoder-decoder model that identifies the spoken
+language of an audio clip. Unlike ASR models that output full transcriptions,
+FireRedLID outputs at most 2 tokens representing the detected language
+(e.g. "en", "zh mandarin").
+
+Start the vLLM server:
+
+    vllm serve PatchyTisa/FireRedLID-vllm
+
+Then run this script:
+
+    # Use the built-in sample audio
+    python examples/speech_to_text/lid/openai_lid_client.py
+
+    # Use your own audio file(s)
+    python examples/speech_to_text/lid/openai_lid_client.py \
+        --audio_paths audio_en.wav audio_zh.wav audio_fr.wav
+
+    # Batch-identify multiple files in one run
+    python examples/speech_to_text/lid/openai_lid_client.py \
+        --audio_paths /path/to/dir/*.wav
+
+Requirements:
+- vLLM with audio support
+- openai Python SDK
+- kaldi_native_fbank (pulled in by the model)
+"""
+
+import argparse
+import json
+import os
+
+from openai import OpenAI
+
+from vllm.assets.audio import AudioAsset
+
+# ──────────────────────────────────────────────────────────────────────
+# Helpers
+# ──────────────────────────────────────────────────────────────────────
+
+
+def identify_language(
+    audio_path: str,
+    client: OpenAI,
+    model: str,
+) -> str:
+    """
+    Send a single audio file to the vLLM transcription endpoint and return
+    the detected language tag.
+
+    FireRedLID re-uses the OpenAI-compatible ``/v1/audio/transcriptions``
+    endpoint. The "transcription" it returns is actually the language label
+    (e.g. ``"en"`` or ``"zh mandarin"``).
+    """
+    with open(audio_path, "rb") as f:
+        result = client.audio.transcriptions.create(
+            file=f,
+            model=model,
+            response_format="json",
+            temperature=0.0,
+        )
+    return result.text.strip()
+
+
+def identify_language_raw(
+    audio_path: str,
+    model: str,
+    api_base: str,
+) -> str:
+    """
+    Same as :func:`identify_language` but uses raw HTTP so that the demo
+    works without the ``openai`` SDK (useful for quick debugging).
+    """
+    import requests
+
+    url = f"{api_base}/audio/transcriptions"
+    with open(audio_path, "rb") as f:
+        files = {"file": (os.path.basename(audio_path), f)}
+        data = {
+            "model": model,
+            "response_format": "json",
+        }
+        resp = requests.post(url, files=files, data=data)
+        resp.raise_for_status()
+    return resp.json()["text"].strip()
+
+
+def identify_language_streaming(
+    audio_path: str,
+    model: str,
+    api_base: str,
+) -> str:
+    """
+    Streaming variant – demonstrates the streaming transcription endpoint.
+    For a 1-2 token output the stream finishes almost instantly, but this
+    shows that the API path works end-to-end.
+    """
+    import requests
+
+    url = f"{api_base}/audio/transcriptions"
+    with open(audio_path, "rb") as f:
+        files = {"file": (os.path.basename(audio_path), f)}
+        data = {
+            "stream": "true",
+            "model": model,
+            "response_format": "json",
+        }
+        response = requests.post(url, files=files, data=data, stream=True)
+        response.raise_for_status()
+
+        tokens: list[str] = []
+        for chunk in response.iter_lines(
+            chunk_size=8192, decode_unicode=False, delimiter=b"\n"
+        ):
+            if not chunk:
+                continue
+            payload = json.loads(chunk[len("data: ") :].decode("utf-8"))
+            choice = payload["choices"][0]
+            delta = choice.get("delta", {}).get("content", "")
+            if delta:
+                tokens.append(delta)
+            if choice.get("finish_reason") is not None:
+                break
+
+    return "".join(tokens).strip()
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Main
+# ──────────────────────────────────────────────────────────────────────
+
+
+def main(args: argparse.Namespace) -> None:
+    api_base = args.api_base.rstrip("/")
+    client = OpenAI(api_key="EMPTY", base_url=api_base)
+    model = client.models.list().data[0].id
+    print(f"Model : {model}")
+    print(f"Server: {api_base}\n")
+
+    # Resolve audio paths ------------------------------------------------
+    if args.audio_paths:
+        audio_paths = args.audio_paths
+    else:
+        # Fall back to the built-in vLLM sample audios (both are English).
+        audio_paths = [
+            str(AudioAsset("mary_had_lamb").get_local_path()),
+            str(AudioAsset("winning_call").get_local_path()),
+        ]
+
+    # Run LID for each file ----------------------------------------------
+    print(f"{'Audio File':<50} {'Language (sync)':<20} {'Language (stream)'}")
+    print("-" * 90)
+
+    for path in audio_paths:
+        basename = os.path.basename(path)
+
+        # 1) Synchronous via OpenAI SDK
+        lang_sync = identify_language(path, client, model)
+
+        # 2) Streaming via raw HTTP
+        lang_stream = identify_language_streaming(path, model, api_base)
+
+        print(f"{basename:<50} {lang_sync:<20} {lang_stream}")
+
+    print()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="FireRedLID – Language Identification demo via vLLM",
+    )
+    parser.add_argument(
+        "--audio_paths",
+        nargs="+",
+        default=None,
+        help=(
+            "One or more audio files to identify. "
+            "If omitted, uses vLLM's built-in sample audios."
+        ),
+    )
+    parser.add_argument(
+        "--api_base",
+        type=str,
+        default="http://localhost:8000/v1",
+        help="vLLM API base URL (default: http://localhost:8000/v1)",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/examples/online_serving/openai_transcription_client.py b/examples/speech_to_text/openai/openai_transcription_client.py
similarity index 90%
rename from examples/online_serving/openai_transcription_client.py
rename to examples/speech_to_text/openai/openai_transcription_client.py
index 478a0a7ea9e8..396edba1155d 100644
--- a/examples/online_serving/openai_transcription_client.py
+++ b/examples/speech_to_text/openai/openai_transcription_client.py
@@ -27,7 +27,12 @@
 
 
 def sync_openai(
-    audio_path: str, client: OpenAI, model: str, *, repetition_penalty: float = 1.3
+    audio_path: str,
+    client: OpenAI,
+    model: str,
+    *,
+    repetition_penalty: float = 1.3,
+    hotwords: str = None,
 ):
     """
     Perform synchronous transcription using OpenAI-compatible API.
@@ -43,12 +48,15 @@ def sync_openai(
             extra_body=dict(
                 seed=4419,
                 repetition_penalty=repetition_penalty,
+                hotwords=hotwords,
             ),
         )
         print("transcription result [sync]:", transcription.text)
 
 
-async def stream_openai_response(audio_path: str, client: AsyncOpenAI, model: str):
+async def stream_openai_response(
+    audio_path: str, client: AsyncOpenAI, model: str, hotwords: str = None
+):
     """
     Perform asynchronous transcription using OpenAI-compatible API.
     """
@@ -64,6 +72,7 @@ async def stream_openai_response(audio_path: str, client: AsyncOpenAI, model: st
             extra_body=dict(
                 seed=420,
                 top_p=0.6,
+                hotwords=hotwords,
             ),
             stream=True,
         )
@@ -136,6 +145,7 @@ def main(args):
         client=client,
         model=model,
         repetition_penalty=args.repetition_penalty,
+        hotwords=args.hotwords,
     )
 
     # Run the asynchronous function
@@ -146,7 +156,10 @@ def main(args):
         )
         asyncio.run(
             stream_openai_response(
-                args.audio_path if args.audio_path else winning_call, client, model
+                args.audio_path if args.audio_path else winning_call,
+                client,
+                model,
+                hotwords=args.hotwords,
             )
         )
     else:
@@ -174,5 +187,11 @@ def main(args):
         default=1.3,
         help="repetition penalty",
     )
+    parser.add_argument(
+        "--hotwords",
+        type=str,
+        default=None,
+        help="hotwords",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/examples/online_serving/openai_translation_client.py b/examples/speech_to_text/openai/openai_translation_client.py
similarity index 100%
rename from examples/online_serving/openai_translation_client.py
rename to examples/speech_to_text/openai/openai_translation_client.py
diff --git a/examples/online_serving/openai_realtime_client.py b/examples/speech_to_text/realtime/openai_realtime_client.py
similarity index 97%
rename from examples/online_serving/openai_realtime_client.py
rename to examples/speech_to_text/realtime/openai_realtime_client.py
index 2bd3c7e60d55..fda3d7cb4564 100644
--- a/examples/online_serving/openai_realtime_client.py
+++ b/examples/speech_to_text/realtime/openai_realtime_client.py
@@ -12,7 +12,6 @@
 Requirements:
 - vllm with audio support
 - websockets
-- librosa
 - numpy
 
 The script:
@@ -26,12 +25,12 @@
 import asyncio
 import json
 
-import librosa
 import numpy as np
 import pybase64 as base64
 import websockets
 
 from vllm.assets.audio import AudioAsset
+from vllm.multimodal.media.audio import load_audio
 
 
 def audio_to_pcm16_base64(audio_path: str) -> str:
@@ -39,7 +38,7 @@ def audio_to_pcm16_base64(audio_path: str) -> str:
     Load an audio file and convert it to base64-encoded PCM16 @ 16kHz.
     """
     # Load audio and resample to 16kHz mono
-    audio, _ = librosa.load(audio_path, sr=16000, mono=True)
+    audio, _ = load_audio(audio_path, sr=16000, mono=True)
     # Convert to PCM16
     pcm16 = (audio * 32767).astype(np.int16)
     # Encode as base64
diff --git a/examples/online_serving/openai_realtime_microphone_client.py b/examples/speech_to_text/realtime/openai_realtime_microphone_client.py
similarity index 100%
rename from examples/online_serving/openai_realtime_microphone_client.py
rename to examples/speech_to_text/realtime/openai_realtime_microphone_client.py
diff --git a/examples/offline_inference/chat_with_tools.py b/examples/tool_calling/chat_with_tools_offline.py
similarity index 100%
rename from examples/offline_inference/chat_with_tools.py
rename to examples/tool_calling/chat_with_tools_offline.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/tool_calling/openai_chat_completion_client_with_tools.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_required.py b/examples/tool_calling/openai_chat_completion_client_with_tools_required.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_required.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_required.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_xlam.py b/examples/tool_calling/openai_chat_completion_client_with_tools_xlam.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_xlam.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_xlam.py
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py b/examples/tool_calling/openai_chat_completion_client_with_tools_xlam_streaming.py
similarity index 100%
rename from examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py
rename to examples/tool_calling/openai_chat_completion_client_with_tools_xlam_streaming.py
diff --git a/examples/online_serving/openai_responses_client_with_mcp_tools.py b/examples/tool_calling/openai_responses_client_with_mcp_tools.py
similarity index 97%
rename from examples/online_serving/openai_responses_client_with_mcp_tools.py
rename to examples/tool_calling/openai_responses_client_with_mcp_tools.py
index cafe19a2d195..1e685a4f3440 100644
--- a/examples/online_serving/openai_responses_client_with_mcp_tools.py
+++ b/examples/tool_calling/openai_responses_client_with_mcp_tools.py
@@ -19,7 +19,6 @@
 """
 
 from openai import OpenAI
-from utils import get_first_model
 
 
 def example_no_filter():
@@ -30,7 +29,7 @@ def example_no_filter():
 
     base_url = "http://0.0.0.0:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
 
     response = client.responses.create(
         model=model,
@@ -59,7 +58,7 @@ def example_wildcard():
 
     base_url = "http://0.0.0.0:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
 
     response = client.responses.create(
         model=model,
@@ -95,7 +94,7 @@ def example_specific_tools():
 
     base_url = "http://0.0.0.0:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
 
     response = client.responses.create(
         model=model,
@@ -126,7 +125,7 @@ def example_object_format():
 
     base_url = "http://0.0.0.0:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
 
     response = client.responses.create(
         model=model,
diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/tool_calling/openai_responses_client_with_tools.py
similarity index 97%
rename from examples/online_serving/openai_responses_client_with_tools.py
rename to examples/tool_calling/openai_responses_client_with_tools.py
index c85c8cf807b4..a47210f60020 100644
--- a/examples/online_serving/openai_responses_client_with_tools.py
+++ b/examples/tool_calling/openai_responses_client_with_tools.py
@@ -14,7 +14,6 @@
 import json
 
 from openai import OpenAI
-from utils import get_first_model
 
 
 def get_weather(latitude: float, longitude: float) -> str:
@@ -51,7 +50,7 @@ def get_weather(latitude: float, longitude: float) -> str:
 def main():
     base_url = "http://0.0.0.0:8000/v1"
     client = OpenAI(base_url=base_url, api_key="empty")
-    model = get_first_model(client)
+    model = client.models.list().data[0].id
     response = client.responses.create(
         model=model, input=input_messages, tools=tools, tool_choice="required"
     )
diff --git a/examples/tool_chat_template_apertus.jinja b/examples/tool_chat_template_apertus.jinja
new file mode 100644
index 000000000000..87b4929b835a
--- /dev/null
+++ b/examples/tool_chat_template_apertus.jinja
@@ -0,0 +1,384 @@
+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{\n" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro render_tools(tools) -%}
+    {%- for tool in tools %}
+        {%- if tool.function is defined -%}
+            {#- Chat Completions format: {"type": "function", "function": {...}} #}
+            {%- set func = tool.function -%}
+            {%- if func.description is defined -%}
+                {{- "// " + func.description + "\n" }}
+            {%- endif -%}
+            {{- "type "+ func.name + " = " }}
+            {%- if func.parameters and func.parameters.properties %}
+                {{- "(_: {\n" }}
+                {%- for param_name, param_spec in func.parameters.properties.items() %}
+                    {%- if param_spec.description is defined %}
+                        {{- "// " + param_spec.description + "\n" }}
+                    {%- endif %}
+                    {{- param_name }}
+                    {%- if param_name not in (func.parameters.required or []) -%}
+                        {{- "?" }}
+                    {%- endif -%}
+                    {{- ": " }}
+                    {{- render_typescript_type(param_spec, func.parameters.required or []) }}
+                    {%- if param_spec.default is defined -%}
+                        {%- if param_spec.enum %}
+                            {{- ", // default: " + param_spec.default }}
+                        {%- elif param_spec.oneOf %}
+                            {{- "// default: " + param_spec.default }}
+                        {%- else %}
+                            {{- ", // default: " + param_spec.default|tojson }}
+                        {%- endif -%}
+                    {%- endif -%}
+                    {%- if not loop.last %}
+                        {{- ",\n" }}
+                    {%- else %}
+                        {{- "\n" }}
+                    {%- endif -%}
+                {%- endfor %}
+                {{- "}) => any;" }}
+            {%- else -%}
+                {{- "() => any;" }}
+            {%- endif -%}
+        {%- else -%}
+            {#- Responses format: {"type": "function", "name": "...", ...} #}
+            {%- if tool.description is defined -%}
+                {{- "// " + tool.description + "\n" }}
+            {%- endif -%}
+            {{- "type "+ tool.name + " = " }}
+            {%- if tool.parameters and tool.parameters.properties %}
+                {{- "(_: {\n" }}
+                {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                    {%- if param_spec.description is defined %}
+                        {{- "// " + param_spec.description + "\n" }}
+                    {%- endif %}
+                    {{- param_name }}
+                    {%- if param_name not in (tool.parameters.required or []) -%}
+                        {{- "?" }}
+                    {%- endif -%}
+                    {{- ": " }}
+                    {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                    {%- if param_spec.default is defined -%}
+                        {%- if param_spec.enum %}
+                            {{- ", // default: " + param_spec.default }}
+                        {%- elif param_spec.oneOf %}
+                            {{- "// default: " + param_spec.default }}
+                        {%- else %}
+                            {{- ", // default: " + param_spec.default|tojson }}
+                        {%- endif -%}
+                    {%- endif -%}
+                    {%- if not loop.last %}
+                        {{- ",\n" }}
+                    {%- else %}
+                        {{- "\n" }}
+                    {%- endif -%}
+                {%- endfor %}
+                {{- "}) => any;" }}
+            {%- else -%}
+                {{- "() => any;" }}
+            {%- endif -%}
+        {%- endif -%}
+        {%- if not loop.last -%}
+            {{- "\n" }}
+        {%- endif -%}
+    {%- endfor %}
+{%- endmacro -%}
+
+{{ bos_token }}
+
+{%- set system_token = '<|system_start|>' -%}
+{%- set end_system_token = '<|system_end|>' -%}
+{%- set developer_token = '<|developer_start|>' -%}
+{%- set end_developer_token = '<|developer_end|>' -%}
+{%- set user_token = '<|user_start|>' -%}
+{%- set end_user_token = '<|user_end|>' -%}
+{%- set assistant_token = '<|assistant_start|>' -%}
+{%- set end_assistant_token = '<|assistant_end|>' -%}
+{%- set inner_token = '<|inner_prefix|>' -%}
+{%- set outer_token = '<|inner_suffix|>' -%}
+{%- set tool_calls_token = '<|tools_prefix|>' -%}
+{%- set end_tool_calls_token = '<|tools_suffix|>' -%}
+
+{%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
+
+{%- if messages and messages[0].role == 'system' -%}
+    {%- if "content" in messages[0] -%}
+        {%- if messages[0].content is string -%}
+            {{ system_token + messages[0].content + end_system_token }}
+        {%- elif messages[0].content is mapping and "text" in messages[0].content -%}
+            {{ system_token + messages[0].content.text + end_system_token }}
+        {%- else -%}
+            {{- raise_exception("Invalid system message") -}}
+        {%- endif -%}
+    {%- else -%}
+        {{- raise_exception("Invalid system message") -}}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {{ system_token + 'You are Apertus, a helpful assistant created by the SwissAI initiative.\nKnowledge cutoff: 2024-04\nCurrent date: ' + strftime_now('%Y-%m-%d') + end_system_token }}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+
+{{ developer_token + 'Deliberation: ' }}
+{%- if enable_thinking is defined and enable_thinking -%}
+    {{ 'enabled\n' }}
+{%- else -%}
+    {{ 'disabled\n' }}
+{%- endif -%}
+{%- if tools is defined and tools -%}
+    {{ 'Tool Capabilities:\n' + render_tools(tools) }}
+{%- else -%}
+    {{ 'Tool Capabilities: disabled' }}
+{%- endif -%}
+{{ end_developer_token }}
+
+{%- for message in loop_messages -%}
+    {%- if message.role == 'user' -%}
+        {%- set ns.in_inner = false -%}
+        {%- if ns.in_tool -%}
+            {{ ']' }}
+            {%- set ns.in_tool = false -%}
+        {%- endif -%}
+        {%- if ns.in_assistant -%}
+            {{ end_assistant_token }}
+            {%- set ns.in_assistant = false -%}
+        {%- endif -%}
+        {%- if "content" in message -%}
+            {{ user_token }}
+            {%- if message.content is string -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "parts" in message.content -%}
+                {%- set parts = message.content.parts -%}
+                {%- for part in parts -%}
+                    {%- if part.type == "text" -%}
+                        {{ part.text }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid user part: " + part.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid user message: " + message.role) -}}
+            {%- endif -%}
+            {{ end_user_token }}
+        {%- endif -%}
+    {%- elif message.role == 'assistant' -%}
+        {%- if not ns.in_assistant -%}
+            {{ assistant_token }}
+            {%- set ns.in_assistant = true -%}
+        {%- endif -%}
+        {%- if "content" in message and message.content is not none -%}
+            {%- if message.content is string and (ns.assistant_format is none or ns.assistant_format == "string") -%}
+                {%- if ns.in_tool -%}
+                    {{ ']' }}
+                    {%- set ns.in_tool = false -%}
+                {%- endif -%}
+                {%- set ns.assistant_format = "string" -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "blocks" in message.content and (ns.assistant_format is none or ns.assistant_format == "mapping") -%}
+                {%- set ns.assistant_format = "mapping" -%}
+                {%- set blocks = message.content.blocks -%}
+                {%- for block in blocks -%}
+                    {%- if block.type == 'thoughts' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if not ns.in_inner -%}
+                            {%- set ns.in_inner = true -%}
+                            {{ inner_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- elif block.type == 'tool_calls' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if ns.in_inner and not loop.first and block.calls|length == 1 and block.calls[0].name == 'display_answers' -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ tool_calls_token + '[' }}
+                        {%- for tool_call in block.calls -%}
+                            {%- set args = tool_call.arguments -%}
+                            {%- if args is string -%}
+                                {{- '{"' + tool_call.name + '": ' + args + '}' }}
+                            {%- else -%}
+                                {{- '{"' + tool_call.name + '": ' + args|tojson + '}' }}
+                            {%- endif -%}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{ ']' + end_tool_calls_token }}
+                    {%- elif block.type == 'tool_outputs' -%}
+                        {%- if ns.in_tool -%}
+                            {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
+                        {%- endif -%}
+                        {{ '[' }}
+                        {%- for tool_output in block.outputs -%}
+                            {{- tool_output.output }}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{- ']' }}
+                    {%- elif block.type == 'response' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if (not loop.first and ns.in_inner) or (ns.in_assistant and ns.in_inner) -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid assistant block type: " + block.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid assistant content") -}}
+            {%- endif -%}
+        {%- elif not ("tool_calls" in message and message.tool_calls) -%}
+            {{- raise_exception("Invalid assistant message") -}}
+        {%- endif -%}
+        {%- if "tool_calls" in message and message.tool_calls -%}
+            {{ tool_calls_token + '[' }}
+            {%- for tool_call in message.tool_calls -%}
+                {%- if tool_call.type == 'function' -%}
+                    {%- set function = tool_call.function -%}
+                    {%- set args = function.arguments -%}
+                    {%- if args is string -%}
+                        {{- '{"' + function.name + '": ' + args + '}' }}
+                    {%- else -%}
+                        {{- '{"' + function.name + '": ' + args|tojson + '}' }}
+                    {%- endif -%}
+                    {%- if not loop.last -%}
+                        {{- ", " }}
+                    {%- endif -%}
+                {%- else -%}
+                    {{- raise_exception("Invalid tool call type: " + tool_call.type) -}}
+                {%- endif -%}
+            {%- endfor -%}
+            {{ ']' + end_tool_calls_token }}
+        {%- endif -%}
+    {%- elif message.role == 'tool' -%}
+        {%- if not ns.in_assistant -%}
+            {{- raise_exception("Tool message outside of assistant") -}}
+        {%- endif -%}
+        {%- if not ns.in_tool -%}
+            {{ '[' }}
+            {%- set ns.in_tool = true -%}
+        {%- else -%}
+            {{ ", "}}
+        {%- endif -%}
+        {%- if message.content is string -%}
+            {{ message.content }}
+        {%- else -%}
+            {{ message.content|tojson }}
+        {%- endif -%}
+    {%- else -%}
+        {{- raise_exception("Invalid message role") -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if ns.in_tool -%}
+    {{ ']' }}
+{%- endif -%}
+{%- if add_generation_prompt -%}
+    {{ assistant_token }}
+{%- endif -%}
\ No newline at end of file
diff --git a/examples/tool_chat_template_gemma4.jinja b/examples/tool_chat_template_gemma4.jinja
new file mode 100644
index 000000000000..d61dd795b586
--- /dev/null
+++ b/examples/tool_chat_template_gemma4.jinja
@@ -0,0 +1,354 @@
+{%- macro format_parameters(properties, required, filter_keys=false) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if not filter_keys or key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'OBJECT' -%}
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                    }
+                {%- elif value is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value, value['required'] | default([]), filter_keys=true) -}}
+                    }
+                {%- endif -%}
+                {%- if value['required'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+
+{%- macro format_tool_response_block(tool_name, response) -%}
+    {{- '<|tool_response>' -}}
+    {%- if response is mapping -%}
+        {{- 'response:' + tool_name + '{' -}}
+        {%- for key, value in response | dictsort -%}
+            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- else -%}
+        {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
+    {%- endif -%}
+    {{- '<tool_response|>' -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{- bos_token -}}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>\n' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {%- if messages[0]['content'] is string -%}
+            {{- messages[0]['content'] | trim -}}
+        {%- elif messages[0]['content'] is sequence -%}
+            {%- for item in messages[0]['content'] -%}
+                {{- item['text'] | trim + ' '-}}
+            {%- endfor -%}
+        {%- endif -%}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+    {{- '<turn|>\n' -}}
+{%- endif %}
+
+{#- Pre-scan: find last user message index for reasoning guard -#}
+{%- set ns_turn = namespace(last_user_idx=-1) -%}
+{%- for i in range(loop_messages | length) -%}
+    {%- if loop_messages[i]['role'] == 'user' -%}
+        {%- set ns_turn.last_user_idx = i -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- if message['role'] != 'tool' -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+    {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
+    {%- set prev_nt = namespace(role=None, found=false) -%}
+    {%- if loop.index0 > 0 -%}
+        {%- for j in range(loop.index0 - 1, -1, -1) -%}
+            {%- if not prev_nt.found -%}
+                {%- if loop_messages[j]['role'] != 'tool' -%}
+                    {%- set prev_nt.role = loop_messages[j]['role'] -%}
+                    {%- set prev_nt.found = true -%}
+                {%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
+    {%- if not continue_same_model_turn -%}
+        {{- '<|turn>' + role + '\n' }}
+    {%- endif -%}
+
+    {#- Render reasoning/reasoning_content as thinking channel -#}
+    {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
+    {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
+        {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
+    {%- endif -%}
+
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+
+            {%- set ns_tr_out = namespace(flag=false) -%}
+            {%- if message.get('tool_responses') -%}
+                {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- format_tool_response_block(tool_response['name'] | default('unknown', true), tool_response['response']) -}}
+                    {%- set ns_tr_out.flag = true -%}
+                    {%- set ns.prev_message_type = 'tool_response' -%}
+                {%- endfor -%}
+            {%- elif message.get('tool_calls') -%}
+                {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
+                {%- set ns_tool_scan = namespace(stopped=false) -%}
+                {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
+                    {%- if ns_tool_scan.stopped -%}
+                    {%- elif loop_messages[k]['role'] != 'tool' -%}
+                        {%- set ns_tool_scan.stopped = true -%}
+                    {%- else -%}
+                        {%- set follow = loop_messages[k] -%}
+                        {#- Resolve tool_call_id to function name -#}
+                        {%- set ns_tname = namespace(name=follow.get('name') | default('unknown', true)) -%}
+                        {%- for tc in message['tool_calls'] -%}
+                            {%- if tc.get('id') == follow.get('tool_call_id') -%}
+                                {%- set ns_tname.name = tc['function']['name'] -%}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {#- Handle content as string or content-parts array -#}
+                        {%- set tool_body = follow.get('content') -%}
+                        {%- if tool_body is string -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- elif tool_body is sequence and tool_body is not string -%}
+                            {%- set ns_txt = namespace(s='') -%}
+                            {%- for part in tool_body -%}
+                                {%- if part.get('type') == 'text' -%}
+                                    {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
+                                {%- endif -%}
+                            {%- endfor -%}
+                            {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
+                        {%- else -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- endif -%}
+                        {%- set ns_tr_out.flag = true -%}
+                        {%- set ns.prev_message_type = 'tool_response' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+            {%- set captured_content -%}
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '<|image|>' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '<|video|>' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+            {%- endset -%}
+
+            {{- captured_content -}}
+            {%- set has_content = captured_content | trim | length > 0 -%}
+
+        {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
+            {{- '<|tool_response>' -}}
+        {%- elif not (ns_tr_out.flag and not has_content) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
+        {{- '<|turn>model\n' -}}
+        {%- if not enable_thinking | default(false) -%}
+            {{- '<|channel>thought\n<channel|>' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endif -%}
\ No newline at end of file
diff --git a/examples/tool_chat_template_granite.jinja b/examples/tool_chat_template_granite.jinja
index 467dcb2d1023..834ec1bec480 100644
--- a/examples/tool_chat_template_granite.jinja
+++ b/examples/tool_chat_template_granite.jinja
@@ -1,4 +1,8 @@
 {%- if tools %}
+    {%- if messages and messages[0]['role'] != 'system' %}
+        {{- '<|start_of_role|>system<|end_of_role|>You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user query, respond with <|tool_call|> followed by a JSON list of tools used.<|end_of_text|>
+' }}
+    {%- endif %}
     {{- '<|start_of_role|>available_tools<|end_of_role|>
 ' }}
     {%- for tool in tools %}
diff --git a/mkdocs.yaml b/mkdocs.yaml
index e37ae9b879a5..097f7497fb22 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -54,6 +54,7 @@ hooks:
   - docs/mkdocs/hooks/generate_argparse.py
   - docs/mkdocs/hooks/generate_metrics.py
   - docs/mkdocs/hooks/url_schemes.py
+  - docs/mkdocs/hooks/autoref_code.py
 
 plugins:
   - meta
@@ -109,6 +110,7 @@ plugins:
       redirect_maps:
         features/spec_decode/README.md: features/speculative_decoding/README.md
         features/spec_decode/speculators.md: features/speculative_decoding/speculators.md
+        serving/openai_compatible_server.md: serving/online_serving/README.md
 
 markdown_extensions:
   - attr_list
@@ -117,7 +119,11 @@ markdown_extensions:
   - admonition
   - pymdownx.details
   # For content tabs
-  - pymdownx.superfences
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
   - pymdownx.tabbed:
       slugify: !!python/object/apply:pymdownx.slugs.slugify
         kwds:
diff --git a/pyproject.toml b/pyproject.toml
index fad8c8c687a1..faba7f3df3f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,12 +1,13 @@
 [build-system]
-# Should be mirrored in requirements/build.txt
+# Should be mirrored in requirements/build/cuda.txt
 requires = [
     "cmake>=3.26.1",
     "ninja",
     "packaging>=24.2",
     "setuptools>=77.0.3,<81.0.0",
     "setuptools-scm>=8.0",
-    "torch == 2.10.0",
+    "setuptools-rust>=1.9.0",
+    "torch == 2.11.0",
     "wheel",
     "jinja2",
 ]
@@ -24,13 +25,14 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Intended Audience :: Developers",
     "Intended Audience :: Information Technology",
     "Intended Audience :: Science/Research",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Scientific/Engineering :: Information Analysis",
 ]
-requires-python = ">=3.10,<3.14"
+requires-python = ">=3.10,<3.15"
 dynamic = [ "version", "dependencies", "optional-dependencies"]
 
 [project.urls]
@@ -120,9 +122,15 @@ python = "./.venv"
 [tool.typos.files]
 # these files may be written in non english words
 extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", "tests/tokenizers_/*",
-    "benchmarks/sonnet.txt", "tests/lora/data/*", "examples/pooling/token_embed/*", "build/*",
-    "vllm/third_party/*", "vllm/entrypoints/serve/instrumentator/static/*", "tests/entrypoints/openai/speech_to_text/test_transcription_validation.py",
-    "docs/governance/process.md", "tests/v1/engine/test_fast_incdec_prefix_err.py", ".git/*"]
+    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
+    "examples/pooling/token_embed/*", "tests/models/language/pooling/*",
+    "vllm/third_party/*", "vllm/entrypoints/serve/instrumentator/static/*",
+    "tests/entrypoints/speech_to_text/transcription/test_transcription_validation.py",
+    "docs/governance/process.md", "docs/assets/contributing/vllm_bench_serve_timeline.html", 
+    "tests/v1/engine/test_fast_incdec_prefix_err.py", ".git/*", "csrc/cpu/sgl-kernels/*",
+    "rust/src/chat/src/renderer/deepseek_v32/fixtures/*",
+    "rust/src/tool-parser/src/gemma4.rs", "rust/src/text/src/output/decoded.rs",
+    "rust/src/tokenizer/src/incremental.rs", "rust/src/reasoning-parser/src/tests.rs"]
 ignore-hidden = false
 
 [tool.typos.default]
@@ -169,6 +177,10 @@ eles = "eles"
 datas = "datas"
 ser = "ser"
 ure = "ure"
+VALU = "VALU"
+# Walsh-Hadamard Transform
+wht = "wht"
+WHT = "WHT"
 
 [tool.uv]
 no-build-isolation-package = ["torch"]
diff --git a/requirements/build/cpu.txt b/requirements/build/cpu.txt
new file mode 100644
index 000000000000..640432ddd8cc
--- /dev/null
+++ b/requirements/build/cpu.txt
@@ -0,0 +1,12 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+cmake>=3.26.1
+ninja
+packaging>=24.2
+setuptools==77.0.3 # this version can reuse CMake build dir
+setuptools-scm>=8
+setuptools-rust>=1.9.0
+torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
+torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le"  or platform_machine == "riscv64"
+wheel
+jinja2>=3.1.6
+regex
diff --git a/requirements/build.txt b/requirements/build/cuda.txt
similarity index 87%
rename from requirements/build.txt
rename to requirements/build/cuda.txt
index c46880a05ebb..70da484a4133 100644
--- a/requirements/build.txt
+++ b/requirements/build/cuda.txt
@@ -4,7 +4,8 @@ ninja
 packaging>=24.2
 setuptools>=77.0.3,<81.0.0
 setuptools-scm>=8
-torch==2.10.0
+setuptools-rust>=1.9.0
+torch==2.11.0
 wheel
 jinja2>=3.1.6
 regex
diff --git a/requirements/rocm-build.txt b/requirements/build/rocm.txt
similarity index 70%
rename from requirements/rocm-build.txt
rename to requirements/build/rocm.txt
index 6f96c7d55742..e5c2176a2c8c 100644
--- a/requirements/rocm-build.txt
+++ b/requirements/build/rocm.txt
@@ -1,15 +1,17 @@
 # Common dependencies
--r common.txt
+-r ../common.txt
+
 
 --extra-index-url https://download.pytorch.org/whl/rocm7.1
-torch==2.10.0
-torchvision==0.25.0
-torchaudio==2.10.0
+torch==2.11.0
+torchvision==0.26.0
+torchaudio==2.11.0
 triton==3.6.0
 cmake>=3.26.1,<4
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
 setuptools-scm>=8
+setuptools-rust>=1.9.0
 wheel
 jinja2>=3.1.6
 amdsmi==7.0.2
diff --git a/requirements/build/tpu.txt b/requirements/build/tpu.txt
new file mode 100644
index 000000000000..56348e757ecd
--- /dev/null
+++ b/requirements/build/tpu.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+cmake>=3.26.1
+ninja
+setuptools>=77.0.3,<81.0.0
+setuptools-scm>=8
+setuptools-rust>=1.9.0
+torch==2.11.0+cpu
+wheel
diff --git a/requirements/common.txt b/requirements/common.txt
index 05666c5d14b0..d37ef1f1fedc 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -7,8 +7,9 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.56.0, < 5
+transformers >= 4.56.0, != 5.0.*, != 5.1.*, != 5.2.*, != 5.3.*, != 5.4.*, != 5.5.0
 tokenizers >= 0.21.1  # Required for fast incremental detokenization.
+safetensors >= 0.6.2  # MXFP4/MXFP6 dtype support (F8_E8M0, F4) added in 0.6.0: https://github.com/huggingface/safetensors/pull/611
 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
 aiohttp >= 3.13.3
@@ -19,29 +20,29 @@ pillow  # Required for image processing
 prometheus-fastapi-instrumentator >= 7.0.0
 tiktoken >= 0.6.0  # Required for DBRX tokenizer
 lm-format-enforcer == 0.11.3
-llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
-outlines_core == 0.2.11
+llguidance >= 1.7.0, < 1.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "ppc64le"
+outlines_core == 0.2.14
 # required for outlines backend disk cache
 diskcache == 5.6.3
 lark == 1.2.2
-xgrammar >= 0.1.32, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+xgrammar >= 0.2.0, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
 typing_extensions >= 4.10
 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
 partial-json-parser # used for parsing partial JSON outputs
 pyzmq >= 25.0.0
 msgspec
 gguf >= 0.17.0
-mistral_common[image] >= 1.10.0
+mistral_common[image] >= 1.11.2
 opencv-python-headless >= 4.13.0    # required for video IO
 pyyaml
 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
 setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
 einops # Required for Qwen2-VL.
-compressed-tensors == 0.14.0.1 # required for compressed-tensors
+compressed-tensors == 0.15.0.1 # required for compressed-tensors
 depyf==0.20.0 # required for profiling and debugging with compilation config
 cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
 watchfiles # required for http server to monitor the updates of TLS files
-python-json-logger # Used by logging as per examples/others/logging_configuration.md
+python-json-logger # Used by logging as per examples/features/logging_configuration.md
 ninja # Required for xgrammar, rocm, tpu, xpu
 pybase64 # fast base64 implementation
 cbor2 # Required for cross-language serialization of hashable objects
@@ -49,7 +50,7 @@ ijson # Required for mistral streaming tool parser
 setproctitle # Used to set process names for better debugging and monitoring
 openai-harmony >= 0.0.3  # Required for gpt-oss
 anthropic >= 0.71.0
-model-hosting-container-standards >= 0.1.13, < 1.0.0
+model-hosting-container-standards >= 0.1.14, < 1.0.0
 mcp
 opentelemetry-sdk >= 1.27.0
 opentelemetry-api >= 1.27.0
diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt
deleted file mode 100644
index 3893b0026978..000000000000
--- a/requirements/cpu-build.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-cmake>=3.26.1
-ninja
-packaging>=24.2
-setuptools==77.0.3 # this version can reuse CMake build dir
-setuptools-scm>=8
-torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
-torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le"
-wheel
-jinja2>=3.1.6
-regex
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
index 378f61ba8686..5ec338af7362 100644
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -1,13 +1,14 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
 # Common dependencies
 -r common.txt
 
 setuptools==77.0.3 # this version can reuse CMake build dir
 
-numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
+numba == 0.65.0; platform_machine != "s390x" # Required for N-gram speculative decoding
 
 # Dependencies for CPUs
-torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
-torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
+torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
+torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
 
 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
 torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index fe566db357dc..14482744f0c2 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -1,20 +1,28 @@
 # Common dependencies
 -r common.txt
 
-numba == 0.61.2 # Required for N-gram speculative decoding
+numba == 0.65.0 # Required for N-gram speculative decoding
 
 # Dependencies for NVIDIA GPUs
-torch==2.10.0
-torchaudio==2.10.0
+torch==2.11.0
+torchaudio==2.11.0
 # These must be updated alongside torch
-torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # FlashInfer should be updated together with the Dockerfile
-flashinfer-python==0.6.6
-flashinfer-cubin==0.6.6
+flashinfer-python==0.6.11.post2
+flashinfer-cubin==0.6.11.post2
+apache-tvm-ffi==0.1.9
+tilelang==0.1.9
 # Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
 # breaking changes in 1.19.0
 nvidia-cudnn-frontend>=1.13.0,<1.19.0
 
+# Required for faster safetensors model loading
+fastsafetensors >= 0.2.2
+
 # QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
-nvidia-cutlass-dsl>=4.4.0.dev1
-quack-kernels>=0.2.7
+nvidia-cutlass-dsl[cu13]==4.5.0
+quack-kernels>=0.3.3
+
+# Tokenspeed_MLA for faster mla with spec decode
+tokenspeed-mla==0.1.2
\ No newline at end of file
diff --git a/requirements/dev.txt b/requirements/dev.txt
index e75821eb4a81..fe0b9eaaf961 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,5 +1,5 @@
 -r lint.txt
--r test.txt
+-r test/cuda.txt
 
 # Avoid adding requirements directly to this file.
 # Instead, modify the two files referenced above.
diff --git a/requirements/docs.in b/requirements/docs.in
new file mode 100644
index 000000000000..952e7c09bae9
--- /dev/null
+++ b/requirements/docs.in
@@ -0,0 +1,17 @@
+mkdocs<2.0.0
+mkdocs-api-autonav
+mkdocs-material
+mkdocstrings-python
+mkdocs-gen-files
+mkdocs-awesome-nav
+mkdocs-glightbox
+mkdocs-git-revision-date-localized-plugin
+mkdocs-minify-plugin
+mkdocs-redirects
+regex
+ruff
+pydantic
+
+# For generating argparse docs.
+# Adding requirements here should only be used as a last resort.
+msgspec  # Need for multiple inheritance involving msgspec.Struct
\ No newline at end of file
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 952e7c09bae9..5331a3b79b7d 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,17 +1,182 @@
-mkdocs<2.0.0
-mkdocs-api-autonav
-mkdocs-material
-mkdocstrings-python
-mkdocs-gen-files
-mkdocs-awesome-nav
-mkdocs-glightbox
-mkdocs-git-revision-date-localized-plugin
-mkdocs-minify-plugin
-mkdocs-redirects
-regex
-ruff
-pydantic
-
-# For generating argparse docs.
-# Adding requirements here should only be used as a last resort.
-msgspec  # Need for multiple inheritance involving msgspec.Struct
\ No newline at end of file
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements/docs.in -o requirements/docs.txt --python-platform x86_64-manylinux_2_28 --python-version 3.12
+annotated-types==0.7.0
+    # via pydantic
+babel==2.18.0
+    # via
+    #   mkdocs-git-revision-date-localized-plugin
+    #   mkdocs-material
+backrefs==7.0
+    # via mkdocs-material
+bracex==2.6
+    # via wcmatch
+certifi==2026.4.22
+    # via requests
+charset-normalizer==3.4.7
+    # via requests
+click==8.3.3
+    # via
+    #   mkdocs
+    #   properdocs
+colorama==0.4.6
+    # via mkdocs-material
+csscompressor==0.9.5
+    # via mkdocs-minify-plugin
+ghp-import==2.1.0
+    # via
+    #   mkdocs
+    #   properdocs
+gitdb==4.0.12
+    # via gitpython
+gitpython==3.1.50
+    # via mkdocs-git-revision-date-localized-plugin
+griffelib==2.0.2
+    # via mkdocstrings-python
+htmlmin2==0.1.13
+    # via mkdocs-minify-plugin
+idna==3.13
+    # via requests
+jinja2==3.1.6
+    # via
+    #   mkdocs
+    #   mkdocs-material
+    #   mkdocstrings
+    #   properdocs
+jsmin==3.0.1
+    # via mkdocs-minify-plugin
+markdown==3.10.2
+    # via
+    #   mkdocs
+    #   mkdocs-autorefs
+    #   mkdocs-material
+    #   mkdocstrings
+    #   properdocs
+    #   pymdown-extensions
+markupsafe==3.0.3
+    # via
+    #   jinja2
+    #   mkdocs
+    #   mkdocs-autorefs
+    #   mkdocstrings
+    #   properdocs
+mergedeep==1.3.4
+    # via
+    #   mkdocs
+    #   mkdocs-get-deps
+mkdocs==1.6.1
+    # via
+    #   -r requirements/docs.in
+    #   mkdocs-api-autonav
+    #   mkdocs-autorefs
+    #   mkdocs-awesome-nav
+    #   mkdocs-gen-files
+    #   mkdocs-git-revision-date-localized-plugin
+    #   mkdocs-material
+    #   mkdocs-minify-plugin
+    #   mkdocs-redirects
+    #   mkdocstrings
+mkdocs-api-autonav==0.4.0
+    # via -r requirements/docs.in
+mkdocs-autorefs==1.4.4
+    # via
+    #   mkdocstrings
+    #   mkdocstrings-python
+mkdocs-awesome-nav==3.3.0
+    # via -r requirements/docs.in
+mkdocs-gen-files==0.6.1
+    # via -r requirements/docs.in
+mkdocs-get-deps==0.2.2
+    # via mkdocs
+mkdocs-git-revision-date-localized-plugin==1.5.1
+    # via -r requirements/docs.in
+mkdocs-glightbox==0.5.2
+    # via -r requirements/docs.in
+mkdocs-material==9.7.6
+    # via -r requirements/docs.in
+mkdocs-material-extensions==1.3.1
+    # via mkdocs-material
+mkdocs-minify-plugin==0.8.0
+    # via -r requirements/docs.in
+mkdocs-redirects==1.2.3
+    # via -r requirements/docs.in
+mkdocstrings==1.0.4
+    # via mkdocstrings-python
+mkdocstrings-python==2.0.3
+    # via
+    #   -r requirements/docs.in
+    #   mkdocs-api-autonav
+msgspec==0.21.1
+    # via -r requirements/docs.in
+natsort==8.4.0
+    # via mkdocs-awesome-nav
+packaging==26.2
+    # via
+    #   mkdocs
+    #   properdocs
+paginate==0.5.7
+    # via mkdocs-material
+pathspec==1.1.1
+    # via
+    #   mkdocs
+    #   properdocs
+platformdirs==4.9.6
+    # via
+    #   mkdocs-get-deps
+    #   properdocs
+properdocs==1.6.7
+    # via
+    #   mkdocs-gen-files
+    #   mkdocs-redirects
+pydantic==2.13.4
+    # via
+    #   -r requirements/docs.in
+    #   mkdocs-awesome-nav
+pydantic-core==2.46.4
+    # via pydantic
+pygments==2.20.0
+    # via mkdocs-material
+pymdown-extensions==10.21.2
+    # via
+    #   mkdocs-material
+    #   mkdocstrings
+python-dateutil==2.9.0.post0
+    # via ghp-import
+pyyaml==6.0.3
+    # via
+    #   mkdocs
+    #   mkdocs-api-autonav
+    #   mkdocs-get-deps
+    #   properdocs
+    #   pymdown-extensions
+    #   pyyaml-env-tag
+pyyaml-env-tag==1.1
+    # via
+    #   mkdocs
+    #   properdocs
+regex==2026.4.4
+    # via -r requirements/docs.in
+requests==2.33.1
+    # via mkdocs-material
+ruff==0.15.12
+    # via -r requirements/docs.in
+selectolax==0.4.8
+    # via mkdocs-glightbox
+six==1.17.0
+    # via python-dateutil
+smmap==5.0.3
+    # via gitdb
+typing-extensions==4.15.0
+    # via
+    #   pydantic
+    #   pydantic-core
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via pydantic
+urllib3==2.6.3
+    # via requests
+watchdog==6.0.0
+    # via
+    #   mkdocs
+    #   properdocs
+wcmatch==10.1
+    # via mkdocs-awesome-nav
diff --git a/requirements/kv_connectors.txt b/requirements/kv_connectors.txt
index 1164720e0dd6..6699442872c1 100644
--- a/requirements/kv_connectors.txt
+++ b/requirements/kv_connectors.txt
@@ -1,3 +1,3 @@
 lmcache >= 0.3.9
-nixl >= 0.7.1, < 0.10.0 # Required for disaggregated prefill
+nixl >= 1.1.0 # Required for disaggregated prefill
 mooncake-transfer-engine >= 0.3.8
diff --git a/requirements/rocm.txt b/requirements/rocm.txt
index 6639e71a4b93..61fcbc07010c 100644
--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@@ -5,7 +5,7 @@
 grpcio==1.78.0
 grpcio-reflection==1.78.0
 
-numba == 0.61.2 # Required for N-gram speculative decoding
+numba == 0.65.0 # Required for N-gram speculative decoding
 
 # Dependencies for AMD GPUs
 datasets
@@ -15,9 +15,10 @@ tensorizer==2.10.1
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
 setuptools-scm>=8
+setuptools-rust>=1.9.0
 runai-model-streamer[s3,gcs,azure]==0.15.7
 conch-triton-kernels==1.2.1
 timm>=1.0.17
 # amd-quark: required for Quark quantization on ROCm 
 # To be consistent with test_quark.py
-amd-quark>=0.8.99
\ No newline at end of file
+amd-quark>=0.8.99
diff --git a/requirements/test.in b/requirements/test/cuda.in
similarity index 77%
rename from requirements/test.in
rename to requirements/test/cuda.in
index dc47976b1a60..1a71e3ee0e43 100644
--- a/requirements/test.in
+++ b/requirements/test/cuda.in
@@ -18,29 +18,28 @@ httpx
 librosa # required for audio tests
 vector_quantize_pytorch # required for minicpmo_26 test
 vocos # required for minicpmo_26 test
-peft>=0.15.0 # required for phi-4-mm test
+peft>=0.18.1 # required for phi-4-mm test
 pqdm
 ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
-resampy # required for audio tests
 sentence-transformers>=5.2.0 # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
 tblib # for pickling test exceptions
 timm >=1.0.17 # required for internvl and gemma3n-mm test
-torch==2.10.0
-torchaudio==2.10.0
-torchvision==0.25.0
+torch==2.11.0
+torchaudio==2.11.0
+torchvision==0.26.0
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
-mistral_common[image,audio] >= 1.9.1 # required for voxtral test
+mistral_common[image,audio] >= 1.11.2 # required for voxtral test
 num2words # required for smolvlm test
 open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
 opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
-lm-eval[api]>=0.4.11 # required for model evaluation test
+lm-eval[api]>=0.4.12 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
-tokenizers==0.22.0
+transformers==5.5.3
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes==0.49.2
@@ -55,14 +54,18 @@ grpcio==1.78.0
 grpcio-reflection==1.78.0
 
 arctic-inference == 0.1.1 # Required for suffix decoding test
-numba == 0.61.2 # Required for N-gram speculative decoding
+numba == 0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
 fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
 instanttensor>=0.1.5
 pydantic>=2.12 # 2.11 leads to error on python 3.13
 decord==0.6.0; platform_machine == "x86_64"
-terratorch >= 1.2.2 # Required for Prithvi tests
+# terratorch is temporarily disabled while PyPI has the `lightning` package
+# in `quarantined` status (every published terratorch version transitively
+# requires `lightning`, so the resolver fails with "no versions of lightning").
+# Re-enable once PyPI lifts the quarantine. Tracked in #41376.
+# terratorch >= 1.2.2 # Required for Prithvi tests
 imagehash # Required for Prithvi tests
 segmentation-models-pytorch > 0.4.0 # Required for Prithvi tests
 
diff --git a/requirements/test.txt b/requirements/test/cuda.txt
similarity index 68%
rename from requirements/test.txt
rename to requirements/test/cuda.txt
index 7d3a988a729d..245a86f93beb 100644
--- a/requirements/test.txt
+++ b/requirements/test/cuda.txt
@@ -1,19 +1,14 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu129 --python-platform x86_64-manylinux_2_28 --python-version 3.12
+#    uv pip compile requirements/test/cuda.in -c requirements/cuda.txt -o requirements/test/cuda.txt --index-strategy unsafe-best-match --torch-backend cu130 --python-platform x86_64-manylinux_2_28 --python-version 3.12
 absl-py==2.1.0
-    # via
-    #   rouge-score
-    #   tensorboard
-accelerate==1.0.1
+    # via rouge-score
+accelerate==1.13.0
     # via peft
-aenum==3.1.16
-    # via lightly
-affine==2.4.0
-    # via rasterio
 aiohappyeyeballs==2.6.1
     # via aiohttp
 aiohttp==3.13.3
     # via
+    #   -c requirements/common.txt
     #   aiohttp-cors
     #   datasets
     #   fsspec
@@ -24,28 +19,20 @@ aiohttp-cors==0.8.1
     # via ray
 aiosignal==1.4.0
     # via aiohttp
-albucore==0.0.16
-    # via terratorch
 albumentations==1.4.6
-    # via
-    #   -r requirements/test.in
-    #   terratorch
+    # via -r requirements/test/cuda.in
 alembic==1.16.4
     # via optuna
 annotated-doc==0.0.4
     # via fastapi
 annotated-types==0.7.0
     # via pydantic
-antlr4-python3-runtime==4.9.3
-    # via
-    #   hydra-core
-    #   omegaconf
 anyio==4.6.2.post1
     # via
     #   httpx
     #   starlette
 arctic-inference==0.1.1
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 argcomplete==3.5.1
     # via datamodel-code-generator
 arrow==1.3.0
@@ -53,17 +40,14 @@ arrow==1.3.0
 attrs==24.2.0
     # via
     #   aiohttp
-    #   fiona
     #   hypothesis
-    #   jsonlines
     #   jsonschema
     #   pytest-subtests
-    #   rasterio
     #   referencing
 audioread==3.0.1
     # via librosa
 av==16.1.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 azure-core==1.38.2
     # via
     #   azure-identity
@@ -74,16 +58,14 @@ azure-storage-blob==12.28.0
     # via runai-model-streamer-azure
 backoff==2.2.1
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   schemathesis
 bitsandbytes==0.49.2
-    # via
-    #   -r requirements/test.in
-    #   lightning
+    # via -r requirements/test/cuda.in
 black==24.10.0
     # via datamodel-code-generator
 blobfile==3.0.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 bm25s==0.2.13
     # via mteb
 boto3==1.35.57
@@ -97,20 +79,14 @@ botocore==1.35.57
 bounded-pool-executor==0.0.3
     # via pqdm
 buildkite-test-collector==0.1.9
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 cachetools==5.5.2
     # via google-auth
 certifi==2024.8.30
     # via
-    #   fiona
     #   httpcore
     #   httpx
-    #   lightly
-    #   pyogrio
-    #   pyproj
-    #   rasterio
     #   requests
-    #   sentry-sdk
 cffi==2.0.0
     # via
     #   cryptography
@@ -124,25 +100,12 @@ chz==0.3.0
 click==8.1.7
     # via
     #   black
-    #   click-plugins
-    #   cligj
-    #   fiona
     #   jiwer
     #   nltk
-    #   rasterio
     #   ray
     #   schemathesis
     #   typer
     #   uvicorn
-    #   wandb
-click-plugins==1.1.1.2
-    # via
-    #   fiona
-    #   rasterio
-cligj==0.7.2
-    # via
-    #   fiona
-    #   rasterio
 colorama==0.4.6
     # via
     #   perceptron
@@ -164,32 +127,32 @@ cryptography==46.0.5
     #   azure-storage-blob
     #   msal
     #   pyjwt
-cuda-bindings==12.9.4
+cuda-bindings==13.0.3
     # via torch
 cuda-pathfinder==1.3.3
     # via cuda-bindings
+cuda-toolkit==13.0.2
+    # via torch
 cupy-cuda12x==13.6.0
     # via ray
 cycler==0.12.1
     # via matplotlib
 datamodel-code-generator==0.26.3
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 dataproperty==1.0.1
     # via
     #   pytablewriter
     #   tabledata
 datasets==3.3.0
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   evaluate
     #   lm-eval
     #   mteb
 decorator==5.1.1
     # via librosa
 decord==0.6.0
-    # via -r requirements/test.in
-diffusers==0.36.0
-    # via terratorch
+    # via -r requirements/test/cuda.in
 dill==0.3.8
     # via
     #   datasets
@@ -204,14 +167,10 @@ docker==7.1.0
     # via gpt-oss
 docopt==0.6.2
     # via num2words
-docstring-parser==0.17.0
-    # via jsonargparse
 einops==0.8.1
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   encodec
-    #   terratorch
-    #   torchgeo
     #   vector-quantize-pytorch
     #   vocos
 einx==0.3.0
@@ -225,25 +184,26 @@ et-xmlfile==2.0.0
 evaluate==0.4.3
     # via lm-eval
 fastapi==0.128.0
-    # via gpt-oss
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
 fastparquet==2024.11.0
     # via genai-perf
 fastrlock==0.8.2
     # via cupy-cuda12x
 fastsafetensors==0.2.2
-    # via -r requirements/test.in
+    # via
+    #   -c requirements/cuda.txt
+    #   -r requirements/test/cuda.in
 filelock==3.16.1
     # via
+    #   -c requirements/common.txt
     #   blobfile
     #   datasets
-    #   diffusers
     #   huggingface-hub
     #   ray
     #   torch
-    #   transformers
     #   virtualenv
-fiona==1.10.1
-    # via torchgeo
 fonttools==4.55.0
     # via matplotlib
 fqdn==1.5.1
@@ -260,22 +220,13 @@ fsspec==2024.12.0
     #   evaluate
     #   fastparquet
     #   huggingface-hub
-    #   lightning
-    #   pytorch-lightning
-    #   tacoreader
     #   torch
 ftfy==6.3.1
     # via open-clip-torch
 genai-perf==0.0.16
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 genson==1.3.0
     # via datamodel-code-generator
-geopandas==1.0.1
-    # via terratorch
-gitdb==4.0.12
-    # via gitpython
-gitpython==3.1.44
-    # via wandb
 google-api-core==2.24.2
     # via
     #   google-cloud-core
@@ -300,30 +251,27 @@ google-resumable-media==2.7.2
 googleapis-common-protos==1.70.0
     # via google-api-core
 gpt-oss==0.0.8
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 graphql-core==3.2.6
     # via hypothesis-graphql
 greenlet==3.2.3
     # via sqlalchemy
 grpcio==1.78.0
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   grpcio-reflection
     #   ray
-    #   tensorboard
 grpcio-reflection==1.78.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 h11==0.14.0
     # via
     #   httpcore
     #   uvicorn
 h2==4.3.0
     # via httpx
-h5py==3.13.0
-    # via terratorch
 harfile==0.3.0
     # via schemathesis
-hf-xet==1.1.7
+hf-xet==1.4.3
     # via huggingface-hub
 hiredis==3.0.0
     # via tensorizer
@@ -335,31 +283,25 @@ httpcore==1.0.6
     # via httpx
 httpx==0.27.2
     # via
-    #   -r requirements/test.in
-    #   diffusers
+    #   -r requirements/test/cuda.in
+    #   huggingface-hub
     #   perceptron
     #   schemathesis
-huggingface-hub==0.36.2
+huggingface-hub==1.10.2
     # via
     #   accelerate
     #   datasets
-    #   diffusers
     #   evaluate
     #   open-clip-torch
     #   peft
     #   segmentation-models-pytorch
     #   sentence-transformers
-    #   terratorch
     #   timm
     #   tokenizers
     #   transformers
     #   vocos
 humanize==4.11.0
     # via runai-model-streamer
-hydra-core==1.3.2
-    # via
-    #   lightly
-    #   lightning
 hyperframe==6.1.0
     # via h2
 hypothesis==6.131.0
@@ -380,21 +322,17 @@ idna==3.10
     #   requests
     #   yarl
 imagehash==4.3.2
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 imageio==2.37.0
     # via scikit-image
 importlib-metadata==8.7.0
-    # via
-    #   diffusers
-    #   opentelemetry-api
-importlib-resources==6.5.2
-    # via typeshed-client
+    # via opentelemetry-api
 inflect==5.6.2
     # via datamodel-code-generator
 iniconfig==2.0.0
     # via pytest
 instanttensor==0.1.5
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 isodate==0.7.2
     # via azure-storage-blob
 isoduration==20.11.0
@@ -408,7 +346,7 @@ jinja2==3.1.6
     #   lm-eval
     #   torch
 jiwer==3.0.5
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 jmespath==1.0.1
     # via
     #   boto3
@@ -418,14 +356,6 @@ joblib==1.4.2
     #   librosa
     #   nltk
     #   scikit-learn
-jsonargparse==4.46.0
-    # via
-    #   lightning
-    #   terratorch
-jsonlines==4.0.0
-    # via lm-eval
-jsonnet==0.21.0
-    # via jsonargparse
 jsonpointer==3.0.0
     # via jsonschema
 jsonschema==4.23.0
@@ -439,15 +369,11 @@ jsonschema-specifications==2024.10.1
 junit-xml==1.9
     # via schemathesis
 kaldi-native-fbank==1.22.3
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 kaleido==0.2.1
     # via genai-perf
 kiwisolver==1.4.7
     # via matplotlib
-kornia==0.8.1
-    # via torchgeo
-kornia-rs==0.1.9
-    # via kornia
 lazy-loader==0.4
     # via
     #   librosa
@@ -455,26 +381,11 @@ lazy-loader==0.4
 libnacl==2.1.0
     # via tensorizer
 librosa==0.10.2.post1
-    # via -r requirements/test.in
-lightly==1.5.22
-    # via
-    #   terratorch
-    #   torchgeo
-lightly-utils==0.0.2
-    # via lightly
-lightning==2.6.1
-    # via
-    #   terratorch
-    #   torchgeo
-lightning-utilities==0.14.3
-    # via
-    #   lightning
-    #   pytorch-lightning
-    #   torchmetrics
-llvmlite==0.44.0
+    # via -r requirements/test/cuda.in
+llvmlite==0.47.0
     # via numba
-lm-eval==0.4.11
-    # via -r requirements/test.in
+lm-eval==0.4.12
+    # via -r requirements/test/cuda.in
 lxml==5.3.0
     # via
     #   blobfile
@@ -482,8 +393,6 @@ lxml==5.3.0
     #   sacrebleu
 mako==1.3.10
     # via alembic
-markdown==3.8.2
-    # via tensorboard
 markdown-it-py==3.0.0
     # via rich
 markupsafe==3.0.1
@@ -492,11 +401,7 @@ markupsafe==3.0.1
     #   mako
     #   werkzeug
 matplotlib==3.9.2
-    # via
-    #   -r requirements/test.in
-    #   lightning
-    #   pycocotools
-    #   torchgeo
+    # via -r requirements/test/cuda.in
 mbstrdecoder==1.1.3
     # via
     #   dataproperty
@@ -504,8 +409,10 @@ mbstrdecoder==1.1.3
     #   typepy
 mdurl==0.1.2
     # via markdown-it-py
-mistral-common==1.10.0
-    # via -r requirements/test.in
+mistral-common==1.11.2
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/cuda.in
 more-itertools==10.5.0
     # via lm-eval
 mpmath==1.3.0
@@ -521,7 +428,7 @@ msgpack==1.1.0
     #   librosa
     #   ray
 mteb==2.8.3
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 multidict==6.1.0
     # via
     #   aiohttp
@@ -539,17 +446,16 @@ networkx==3.2.1
 nltk==3.9.1
     # via rouge-score
 num2words==0.5.14
-    # via -r requirements/test.in
-numba==0.61.2
+    # via -r requirements/test/cuda.in
+numba==0.65.0
     # via
-    #   -r requirements/test.in
+    #   -c requirements/cuda.txt
+    #   -r requirements/test/cuda.in
     #   librosa
-    #   resampy
 numpy==2.2.6
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   accelerate
-    #   albucore
     #   albumentations
     #   bitsandbytes
     #   bm25s
@@ -557,19 +463,14 @@ numpy==2.2.6
     #   cupy-cuda12x
     #   datasets
     #   decord
-    #   diffusers
     #   einx
     #   encodec
     #   evaluate
     #   fastparquet
     #   genai-perf
-    #   geopandas
-    #   h5py
     #   imagehash
     #   imageio
     #   librosa
-    #   lightly
-    #   lightly-utils
     #   lm-eval
     #   matplotlib
     #   mistral-common
@@ -581,12 +482,7 @@ numpy==2.2.6
     #   patsy
     #   peft
     #   perceptron
-    #   pycocotools
-    #   pyogrio
     #   pywavelets
-    #   rasterio
-    #   resampy
-    #   rioxarray
     #   rouge-score
     #   runai-model-streamer
     #   sacrebleu
@@ -594,91 +490,84 @@ numpy==2.2.6
     #   scikit-learn
     #   scipy
     #   segmentation-models-pytorch
-    #   shapely
     #   soxr
     #   statsmodels
-    #   tensorboard
-    #   tensorboardx
     #   tensorizer
-    #   terratorch
     #   tifffile
-    #   torchgeo
-    #   torchmetrics
     #   torchvision
     #   transformers
     #   tritonclient
     #   vocos
-    #   xarray
-nvidia-cublas-cu12==12.9.1.4
-    # via
-    #   nvidia-cudnn-cu12
-    #   nvidia-cusolver-cu12
-    #   torch
-nvidia-cuda-cupti-cu12==12.9.79
+nvidia-cublas==13.1.0.3
+    # via
+    #   cuda-toolkit
+    #   nvidia-cudnn-cu13
+    #   nvidia-cusolver
+nvidia-cuda-cupti==13.0.85
+    # via cuda-toolkit
+nvidia-cuda-nvrtc==13.0.88
+    # via cuda-toolkit
+nvidia-cuda-runtime==13.0.96
+    # via cuda-toolkit
+nvidia-cudnn-cu13==9.19.0.56
     # via torch
-nvidia-cuda-nvrtc-cu12==12.9.86
-    # via torch
-nvidia-cuda-runtime-cu12==12.9.79
-    # via torch
-nvidia-cudnn-cu12==9.10.2.21
-    # via torch
-nvidia-cufft-cu12==11.4.1.4
-    # via torch
-nvidia-cufile-cu12==1.14.1.1
-    # via torch
-nvidia-curand-cu12==10.3.10.19
-    # via torch
-nvidia-cusolver-cu12==11.7.5.82
-    # via torch
-nvidia-cusparse-cu12==12.5.10.65
-    # via
-    #   nvidia-cusolver-cu12
-    #   torch
-nvidia-cusparselt-cu12==0.7.1
+nvidia-cufft==12.0.0.61
+    # via cuda-toolkit
+nvidia-cufile==1.15.1.6
+    # via cuda-toolkit
+nvidia-curand==10.4.0.35
+    # via cuda-toolkit
+nvidia-cusolver==12.0.4.66
+    # via cuda-toolkit
+nvidia-cusparse==12.6.3.3
+    # via
+    #   cuda-toolkit
+    #   nvidia-cusolver
+nvidia-cusparselt-cu13==0.8.0
     # via torch
-nvidia-nccl-cu12==2.27.5
+nvidia-nccl-cu13==2.28.9
     # via torch
-nvidia-nvjitlink-cu12==12.9.86
+nvidia-nvjitlink==13.0.88
     # via
-    #   nvidia-cufft-cu12
-    #   nvidia-cusolver-cu12
-    #   nvidia-cusparse-cu12
-    #   torch
-nvidia-nvshmem-cu12==3.4.5
+    #   cuda-toolkit
+    #   nvidia-cufft
+    #   nvidia-cusolver
+    #   nvidia-cusparse
+nvidia-nvshmem-cu13==3.4.5
     # via torch
-nvidia-nvtx-cu12==12.9.79
-    # via torch
-omegaconf==2.3.0
-    # via
-    #   hydra-core
-    #   lightning
+nvidia-nvtx==13.0.85
+    # via cuda-toolkit
 open-clip-torch==2.32.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 openai-harmony==0.0.4
-    # via gpt-oss
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
 opencensus==0.11.4
     # via ray
 opencensus-context==0.1.3
     # via opencensus
 opencv-python-headless==4.13.0.90
     # via
-    #   -r requirements/test.in
-    #   albucore
+    #   -c requirements/common.txt
+    #   -r requirements/test/cuda.in
     #   albumentations
     #   mistral-common
 openpyxl==3.1.5
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 opentelemetry-api==1.35.0
     # via
+    #   -c requirements/common.txt
     #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
 opentelemetry-exporter-prometheus==0.56b0
     # via ray
-opentelemetry-proto==1.36.0
+opentelemetry-proto==1.35.0
     # via ray
 opentelemetry-sdk==1.35.0
     # via
+    #   -c requirements/common.txt
     #   opentelemetry-exporter-prometheus
     #   ray
 opentelemetry-semantic-conventions==0.56b0
@@ -696,80 +585,58 @@ packaging==24.2
     #   datasets
     #   evaluate
     #   fastparquet
-    #   geopandas
     #   huggingface-hub
-    #   hydra-core
-    #   kornia
     #   lazy-loader
-    #   lightning
-    #   lightning-utilities
     #   matplotlib
     #   optuna
     #   peft
     #   plotly
     #   pooch
-    #   pyogrio
     #   pytest
     #   pytest-rerunfailures
-    #   pytorch-lightning
     #   ray
-    #   rioxarray
     #   scikit-image
     #   statsmodels
-    #   tensorboard
-    #   tensorboardx
-    #   torchmetrics
     #   transformers
     #   typepy
-    #   wandb
-    #   xarray
 pandas==2.2.3
     # via
     #   datasets
     #   evaluate
     #   fastparquet
     #   genai-perf
-    #   geopandas
     #   statsmodels
-    #   tacoreader
-    #   torchgeo
-    #   xarray
 pathspec==0.12.1
     # via black
 pathvalidate==3.2.1
     # via pytablewriter
 patsy==1.0.1
     # via statsmodels
-peft==0.16.0
-    # via -r requirements/test.in
+peft==0.18.1
+    # via -r requirements/test/cuda.in
 perceptron==0.1.4
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 perf-analyzer==0.1.0
     # via genai-perf
 pillow==10.4.0
     # via
-    #   diffusers
     #   genai-perf
     #   imagehash
     #   imageio
-    #   lightly-utils
     #   matplotlib
     #   mistral-common
     #   perceptron
     #   scikit-image
     #   segmentation-models-pytorch
-    #   tensorboard
-    #   torchgeo
     #   torchvision
 platformdirs==4.3.6
     # via
     #   black
     #   pooch
     #   virtualenv
-    #   wandb
 plotly==5.24.1
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   genai-perf
 pluggy==1.5.0
     # via
@@ -782,9 +649,10 @@ pooch==1.8.2
 portalocker==2.10.1
     # via sacrebleu
 pqdm==0.2.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 prometheus-client==0.22.0
     # via
+    #   -c requirements/common.txt
     #   opentelemetry-exporter-prometheus
     #   ray
 propcache==0.2.0
@@ -793,18 +661,16 @@ propcache==0.2.0
     #   yarl
 proto-plus==1.26.1
     # via google-api-core
-protobuf==6.33.2
+protobuf==6.33.6
     # via
+    #   -c requirements/common.txt
     #   google-api-core
     #   googleapis-common-protos
     #   grpcio-reflection
     #   opentelemetry-proto
     #   proto-plus
     #   ray
-    #   tensorboard
-    #   tensorboardx
     #   tensorizer
-    #   wandb
 psutil==6.1.0
     # via
     #   accelerate
@@ -818,16 +684,12 @@ pyarrow==23.0.0
     # via
     #   datasets
     #   genai-perf
-    #   tacoreader
-    #   terratorch
 pyasn1==0.6.1
     # via
     #   pyasn1-modules
     #   rsa
 pyasn1-modules==0.4.2
     # via google-auth
-pycocotools==2.0.8
-    # via terratorch
 pycountry==24.6.1
     # via pydantic-extra-types
 pycparser==2.22
@@ -836,18 +698,17 @@ pycryptodomex==3.22.0
     # via blobfile
 pydantic==2.12.0
     # via
-    #   -r requirements/test.in
+    #   -c requirements/common.txt
+    #   -r requirements/test/cuda.in
     #   albumentations
     #   datamodel-code-generator
     #   fastapi
     #   gpt-oss
-    #   lightly
     #   mistral-common
     #   mteb
     #   openai-harmony
     #   pydantic-extra-types
     #   ray
-    #   wandb
 pydantic-core==2.41.1
     # via pydantic
 pydantic-extra-types==2.10.5
@@ -856,17 +717,8 @@ pygments==2.18.0
     # via rich
 pyjwt==2.11.0
     # via msal
-pyogrio==0.11.0
-    # via geopandas
 pyparsing==3.2.0
-    # via
-    #   matplotlib
-    #   rasterio
-pyproj==3.7.1
-    # via
-    #   geopandas
-    #   rioxarray
-    #   torchgeo
+    # via matplotlib
 pyrate-limiter==3.7.0
     # via schemathesis
 pystemmer==3.0.0
@@ -875,7 +727,7 @@ pytablewriter==1.2.0
     # via lm-eval
 pytest==8.3.5
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   buildkite-test-collector
     #   genai-perf
     #   pytest-asyncio
@@ -888,37 +740,30 @@ pytest==8.3.5
     #   pytest-timeout
     #   schemathesis
 pytest-asyncio==0.24.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 pytest-cov==6.3.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 pytest-forked==1.6.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 pytest-mock==3.14.0
     # via genai-perf
 pytest-rerunfailures==14.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 pytest-shard==0.1.2
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 pytest-subtests==0.14.1
     # via schemathesis
 pytest-timeout==2.3.1
-    # via -r requirements/test.in
-python-box==7.3.2
-    # via terratorch
+    # via -r requirements/test/cuda.in
 python-dateutil==2.9.0.post0
     # via
     #   arrow
     #   botocore
-    #   lightly
     #   matplotlib
     #   pandas
     #   typepy
 python-rapidjson==1.20
     # via tritonclient
-pytorch-lightning==2.5.2
-    # via
-    #   lightly
-    #   lightning
 pytrec-eval-terrier==0.5.7
     # via mteb
 pytz==2024.2
@@ -935,37 +780,26 @@ pyyaml==6.0.2
     #   datasets
     #   genai-perf
     #   huggingface-hub
-    #   jsonargparse
-    #   lightning
-    #   omegaconf
     #   optuna
     #   peft
-    #   pytorch-lightning
     #   ray
     #   responses
     #   schemathesis
     #   timm
     #   transformers
     #   vocos
-    #   wandb
 rapidfuzz==3.12.1
     # via jiwer
-rasterio==1.4.3
-    # via
-    #   rioxarray
-    #   terratorch
-    #   torchgeo
 ray==2.48.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 redis==5.2.0
     # via tensorizer
 referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.9.11
+regex==2026.2.28
     # via
-    #   diffusers
     #   nltk
     #   open-clip-torch
     #   sacrebleu
@@ -973,17 +807,15 @@ regex==2024.9.11
     #   transformers
 requests==2.32.3
     # via
+    #   -c requirements/common.txt
     #   azure-core
     #   buildkite-test-collector
     #   datasets
-    #   diffusers
     #   docker
     #   evaluate
     #   google-api-core
     #   google-cloud-storage
     #   gpt-oss
-    #   huggingface-hub
-    #   lightly
     #   lm-eval
     #   mistral-common
     #   msal
@@ -993,12 +825,7 @@ requests==2.32.3
     #   responses
     #   schemathesis
     #   starlette-testclient
-    #   tacoreader
     #   tiktoken
-    #   transformers
-    #   wandb
-resampy==0.4.3
-    # via -r requirements/test.in
 responses==0.25.3
     # via genai-perf
 rfc3339-validator==0.1.4
@@ -1008,13 +835,9 @@ rfc3987==1.3.8
 rich==13.9.4
     # via
     #   genai-perf
-    #   lightning
     #   mteb
     #   perceptron
-    #   terratorch
     #   typer
-rioxarray==0.19.0
-    # via terratorch
 rouge-score==0.1.2
     # via lm-eval
 rpds-py==0.20.1
@@ -1023,10 +846,8 @@ rpds-py==0.20.1
     #   referencing
 rsa==4.9.1
     # via google-auth
-rtree==1.4.0
-    # via torchgeo
 runai-model-streamer==0.15.7
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 runai-model-streamer-azure==0.15.7
     # via runai-model-streamer
 runai-model-streamer-gcs==0.15.7
@@ -1037,21 +858,19 @@ s3transfer==0.10.3
     # via boto3
 sacrebleu==2.4.3
     # via lm-eval
-safetensors==0.4.5
+safetensors==0.7.0
     # via
+    #   -c requirements/common.txt
     #   accelerate
-    #   diffusers
     #   open-clip-torch
     #   peft
     #   segmentation-models-pytorch
     #   timm
     #   transformers
 schemathesis==3.39.15
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 scikit-image==0.25.2
-    # via
-    #   albumentations
-    #   terratorch
+    # via albumentations
 scikit-learn==1.5.2
     # via
     #   albumentations
@@ -1059,7 +878,6 @@ scikit-learn==1.5.2
     #   lm-eval
     #   mteb
     #   sentence-transformers
-    #   terratorch
 scipy==1.13.1
     # via
     #   albumentations
@@ -1073,42 +891,30 @@ scipy==1.13.1
     #   statsmodels
     #   vocos
 segmentation-models-pytorch==0.5.0
-    # via
-    #   -r requirements/test.in
-    #   terratorch
-    #   torchgeo
+    # via -r requirements/test/cuda.in
 sentence-transformers==5.2.0
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   mteb
-sentry-sdk==2.52.0
-    # via wandb
 setuptools==77.0.3
     # via
-    #   lightning-utilities
+    #   -c requirements/common.txt
     #   pytablewriter
-    #   tensorboard
     #   torch
-shapely==2.1.1
-    # via
-    #   geopandas
-    #   torchgeo
 shellingham==1.5.4
     # via
     #   perceptron
     #   typer
 six==1.16.0
     # via
+    #   -c requirements/common.txt
     #   junit-xml
-    #   lightly
     #   opencensus
     #   python-dateutil
     #   rfc3339-validator
     #   rouge-score
 smart-open==7.1.0
     # via ray
-smmap==5.0.2
-    # via gitdb
 sniffio==1.3.1
     # via
     #   anyio
@@ -1117,7 +923,7 @@ sortedcontainers==2.4.0
     # via hypothesis
 soundfile==0.12.1
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   genai-perf
     #   librosa
     #   mistral-common
@@ -1150,10 +956,8 @@ tabledata==1.3.3
     # via pytablewriter
 tabulate==0.9.0
     # via sacrebleu
-tacoreader==0.5.6
-    # via terratorch
 tblib==3.1.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 tcolorpy==0.1.6
     # via pytablewriter
 tenacity==9.1.2
@@ -1161,101 +965,71 @@ tenacity==9.1.2
     #   gpt-oss
     #   lm-eval
     #   plotly
-tensorboard==2.20.0
-    # via terratorch
-tensorboard-data-server==0.7.2
-    # via tensorboard
-tensorboardx==2.6.4
-    # via lightning
 tensorizer==2.10.1
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 termcolor==3.1.0
-    # via
-    #   gpt-oss
-    #   terratorch
-terratorch==1.2.2
-    # via -r requirements/test.in
+    # via gpt-oss
 threadpoolctl==3.5.0
     # via scikit-learn
 tifffile==2025.3.30
-    # via
-    #   scikit-image
-    #   terratorch
+    # via scikit-image
 tiktoken==0.12.0
     # via
+    #   -c requirements/common.txt
     #   gpt-oss
     #   lm-eval
     #   mistral-common
 timm==1.0.17
     # via
-    #   -r requirements/test.in
+    #   -r requirements/test/cuda.in
     #   open-clip-torch
     #   segmentation-models-pytorch
-    #   terratorch
-    #   torchgeo
-tokenizers==0.22.0
+tokenizers==0.22.2
     # via
-    #   -r requirements/test.in
+    #   -c requirements/common.txt
+    #   -r requirements/test/cuda.in
     #   transformers
 tomli==2.2.1
     # via schemathesis
 tomli-w==1.2.0
     # via schemathesis
-torch==2.10.0+cu129
+torch==2.11.0+cu130
     # via
-    #   -r requirements/test.in
+    #   -c requirements/cuda.txt
+    #   -r requirements/test/cuda.in
     #   accelerate
     #   bitsandbytes
     #   encodec
     #   instanttensor
-    #   kornia
-    #   lightly
-    #   lightning
     #   mteb
     #   open-clip-torch
     #   peft
-    #   pytorch-lightning
     #   runai-model-streamer
     #   segmentation-models-pytorch
     #   sentence-transformers
     #   tensorizer
-    #   terratorch
     #   timm
-    #   torchaudio
-    #   torchgeo
-    #   torchmetrics
     #   torchvision
     #   vector-quantize-pytorch
     #   vocos
-torchaudio==2.10.0+cu129
+torchaudio==2.11.0+cu130
     # via
-    #   -r requirements/test.in
+    #   -c requirements/cuda.txt
+    #   -r requirements/test/cuda.in
     #   encodec
     #   vocos
-torchgeo==0.7.0
-    # via terratorch
-torchmetrics==1.7.4
-    # via
-    #   lightning
-    #   pytorch-lightning
-    #   terratorch
-    #   torchgeo
-torchvision==0.25.0+cu129
-    # via
-    #   -r requirements/test.in
-    #   lightly
+torchvision==0.26.0+cu130
+    # via
+    #   -c requirements/cuda.txt
+    #   -r requirements/test/cuda.in
     #   open-clip-torch
     #   segmentation-models-pytorch
-    #   terratorch
     #   timm
-    #   torchgeo
 tqdm==4.67.3
     # via
     #   datasets
     #   evaluate
     #   huggingface-hub
-    #   lightly
-    #   lightning
     #   lm-eval
     #   mteb
     #   nltk
@@ -1263,25 +1037,23 @@ tqdm==4.67.3
     #   optuna
     #   peft
     #   pqdm
-    #   pytorch-lightning
     #   segmentation-models-pytorch
     #   sentence-transformers
-    #   tacoreader
-    #   terratorch
     #   transformers
-transformers==4.57.5
+transformers==5.5.3
     # via
-    #   -r requirements/test.in
+    #   -c requirements/common.txt
+    #   -r requirements/test/cuda.in
     #   genai-perf
     #   peft
     #   sentence-transformers
     #   transformers-stream-generator
 transformers-stream-generator==0.0.5
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 triton==3.6.0
     # via torch
 tritonclient==2.64.0
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 typepy==1.3.2
     # via
     #   dataproperty
@@ -1290,13 +1062,14 @@ typepy==1.3.2
 typer==0.15.2
     # via
     #   fastsafetensors
+    #   huggingface-hub
     #   perceptron
+    #   transformers
 types-python-dateutil==2.9.0.20241206
     # via arrow
-typeshed-client==2.8.2
-    # via jsonargparse
 typing-extensions==4.15.0
     # via
+    #   -c requirements/common.txt
     #   aiosignal
     #   albumentations
     #   alembic
@@ -1308,8 +1081,6 @@ typing-extensions==4.15.0
     #   grpcio
     #   huggingface-hub
     #   librosa
-    #   lightning
-    #   lightning-utilities
     #   lm-eval
     #   mistral-common
     #   mteb
@@ -1320,16 +1091,12 @@ typing-extensions==4.15.0
     #   pydantic
     #   pydantic-core
     #   pydantic-extra-types
-    #   pytorch-lightning
     #   sentence-transformers
     #   sqlalchemy
     #   starlette
     #   torch
-    #   torchgeo
     #   typer
-    #   typeshed-client
     #   typing-inspection
-    #   wandb
 typing-inspection==0.4.2
     # via pydantic
 tzdata==2024.2
@@ -1341,35 +1108,27 @@ urllib3==2.2.3
     #   blobfile
     #   botocore
     #   docker
-    #   lightly
     #   requests
     #   responses
-    #   sentry-sdk
     #   tritonclient
 uvicorn==0.35.0
     # via gpt-oss
 vector-quantize-pytorch==1.21.2
-    # via -r requirements/test.in
+    # via -r requirements/test/cuda.in
 virtualenv==20.31.2
     # via ray
 vocos==0.1.0
-    # via -r requirements/test.in
-wandb==0.24.2
-    # via terratorch
+    # via -r requirements/test/cuda.in
 wcwidth==0.2.13
     # via ftfy
 webcolors==24.11.1
     # via jsonschema
 werkzeug==3.1.3
-    # via
-    #   schemathesis
-    #   tensorboard
+    # via schemathesis
 word2number==1.1
     # via lm-eval
 wrapt==1.17.2
     # via smart-open
-xarray==2025.7.1
-    # via rioxarray
 xxhash==3.5.0
     # via
     #   datasets
@@ -1380,5 +1139,3 @@ yarl==1.17.1
     #   schemathesis
 zipp==3.23.0
     # via importlib-metadata
-zstandard==0.23.0
-    # via lm-eval
diff --git a/requirements/nightly_torch_test.txt b/requirements/test/nightly-torch.txt
similarity index 84%
rename from requirements/nightly_torch_test.txt
rename to requirements/test/nightly-torch.txt
index ca9c5bd1cace..9c70aa8b90e2 100644
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/test/nightly-torch.txt
@@ -23,14 +23,14 @@ jiwer # required for audio tests
 timm # required for internvl test
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
-mistral_common[image,audio] >= 1.9.1 # required for voxtral test
+mistral_common[image,audio] >= 1.11.2 # required for voxtral test
 num2words # required for smolvlm test
 opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
-lm-eval[api]>=0.4.11 # required for model evaluation test
+lm-eval[api]>=0.4.12 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
-tokenizers==0.22.0
+transformers==5.5.3
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes>=0.49.2
@@ -40,7 +40,7 @@ buildkite-test-collector==0.1.9
 genai_perf>=0.0.8
 tritonclient>=2.51.0
 
-numba == 0.61.2 # Required for N-gram speculative decoding
+numba == 0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
 fastsafetensors>=0.2.2
diff --git a/requirements/rocm-test.in b/requirements/test/rocm.in
similarity index 72%
rename from requirements/rocm-test.in
rename to requirements/test/rocm.in
index 856fab7e9f65..812fb736b570 100644
--- a/requirements/rocm-test.in
+++ b/requirements/test/rocm.in
@@ -1,3 +1,5 @@
+-r ../common.txt
+
 # testing
 pytest
 tensorizer==2.10.1
@@ -21,7 +23,6 @@ vocos # required for minicpmo_26 test
 peft>=0.15.0 # required for phi-4-mm test
 pqdm
 ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
-resampy # required for audio tests
 sentence-transformers>=5.2.0 # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
@@ -29,15 +30,15 @@ tblib # for pickling test exceptions
 timm>=1.0.17 # required for internvl and gemma3n-mm test
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
-mistral_common[image,audio]>=1.10.0 # required for voxtral test
+mistral_common[image,audio]>=1.11.2 # required for voxtral test
 num2words # required for smolvlm test
 open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
 opencv-python-headless>=4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
-lm-eval[api]>=0.4.11 # required for model evaluation test
+lm-eval[api]>=0.4.12 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
-transformers==4.57.5
-tokenizers==0.22.0
+transformers==5.5.3
+tokenizers==0.22.2
 schemathesis>=3.39.15 # Required for openai schema test
 # quantization
 bitsandbytes==0.49.2
@@ -51,16 +52,20 @@ grpcio==1.78.0
 grpcio-reflection==1.78.0
 
 arctic-inference==0.1.1 # Required for suffix decoding test
-numba==0.61.2 # Required for N-gram speculative decoding
+numba==0.65.0 # Required for N-gram speculative decoding
 numpy
 runai-model-streamer[s3,gcs,azure]==0.15.7
-fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
+fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@0.2.2 # PyPI only ships CUDA wheels
 instanttensor>=0.1.5
 pydantic>=2.12 # 2.11 leads to error on python 3.13
 decord==0.6.0
 
 # Prithvi tests
-terratorch>=1.2.2
+# terratorch is temporarily disabled while PyPI has the `lightning` package
+# in `quarantined` status (every published terratorch version transitively
+# requires `lightning`, so the resolver fails with "no versions of lightning").
+# Re-enable once PyPI lifts the quarantine. Tracked in #41376.
+# terratorch>=1.2.2
 imagehash # Required for Prithvi tests
 segmentation-models-pytorch>0.4.0 # Required for Prithvi tests
 
@@ -76,8 +81,9 @@ datasets>=3.3.0,<=3.6.0
 openpyxl # required for perf comparison excel report
 plotly # required for perf comparison html report
 
-# ROCm-specific extras (not in CUDA test.in)
+# ROCm-specific extras (not in CUDA cuda.in)
 rapidfuzz
-torchgeo==0.7.0
+# torchgeo also pulled in `lightning` transitively; disabled for the same
+# quarantine reason as terratorch above. Restore once the quarantine clears.
+# torchgeo==0.7.0
 multiprocess==0.70.16
-huggingface-hub==0.36.2
diff --git a/requirements/rocm-test.txt b/requirements/test/rocm.txt
similarity index 63%
rename from requirements/rocm-test.txt
rename to requirements/test/rocm.txt
index dd4c7c24f40c..b15e00edf1dd 100644
--- a/requirements/rocm-test.txt
+++ b/requirements/test/rocm.txt
@@ -1,20 +1,15 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile requirements/rocm-test.in -o requirements/rocm-test.txt --index-strategy unsafe-best-match -c requirements/rocm.txt --python-platform x86_64-manylinux_2_28 --python-version 3.12 --no-emit-package torch --no-emit-package torchvision --no-emit-package torchaudio --no-emit-package triton --no-emit-package cuda-bindings --no-emit-package cuda-pathfinder --no-emit-package cuda-toolkit --no-emit-package cupy-cuda12x --no-emit-package nvidia-cublas --no-emit-package nvidia-cuda-cupti --no-emit-package nvidia-cuda-nvrtc --no-emit-package nvidia-cuda-runtime --no-emit-package nvidia-cudnn-cu13 --no-emit-package nvidia-cufft --no-emit-package nvidia-cufile --no-emit-package nvidia-curand --no-emit-package nvidia-cusolver --no-emit-package nvidia-cusparse --no-emit-package nvidia-cusparselt-cu13 --no-emit-package nvidia-nccl-cu13 --no-emit-package nvidia-nvjitlink --no-emit-package nvidia-nvshmem-cu13 --no-emit-package nvidia-nvtx
+#    uv pip compile requirements/test/rocm.in -c requirements/rocm.txt -o requirements/test/rocm.txt --index-strategy unsafe-best-match --python-platform x86_64-manylinux_2_28 --python-version 3.12 --no-emit-package torch --no-emit-package torchvision --no-emit-package torchaudio --no-emit-package triton --no-emit-package cuda-bindings --no-emit-package cuda-pathfinder --no-emit-package cuda-toolkit --no-emit-package cupy-cuda12x --no-emit-package nvidia-cublas --no-emit-package nvidia-cuda-cupti --no-emit-package nvidia-cuda-nvrtc --no-emit-package nvidia-cuda-runtime --no-emit-package nvidia-cudnn --no-emit-package nvidia-cufft --no-emit-package nvidia-cufile --no-emit-package nvidia-curand --no-emit-package nvidia-cusolver --no-emit-package nvidia-cusparse --no-emit-package nvidia-cusparselt --no-emit-package nvidia-nccl --no-emit-package nvidia-nvjitlink --no-emit-package nvidia-nvshmem --no-emit-package nvidia-nvtx --no-emit-package nvidia-cublas-cu12 --no-emit-package nvidia-cuda-cupti-cu12 --no-emit-package nvidia-cuda-nvrtc-cu12 --no-emit-package nvidia-cuda-runtime-cu12 --no-emit-package nvidia-cudnn-cu12 --no-emit-package nvidia-cufft-cu12 --no-emit-package nvidia-cufile-cu12 --no-emit-package nvidia-curand-cu12 --no-emit-package nvidia-cusolver-cu12 --no-emit-package nvidia-cusparse-cu12 --no-emit-package nvidia-cusparselt-cu12 --no-emit-package nvidia-nccl-cu12 --no-emit-package nvidia-nvjitlink-cu12 --no-emit-package nvidia-nvshmem-cu12 --no-emit-package nvidia-nvtx-cu12 --no-emit-package nvidia-cublas-cu13 --no-emit-package nvidia-cuda-cupti-cu13 --no-emit-package nvidia-cuda-nvrtc-cu13 --no-emit-package nvidia-cuda-runtime-cu13 --no-emit-package nvidia-cudnn-cu13 --no-emit-package nvidia-cufft-cu13 --no-emit-package nvidia-cufile-cu13 --no-emit-package nvidia-curand-cu13 --no-emit-package nvidia-cusolver-cu13 --no-emit-package nvidia-cusparse-cu13 --no-emit-package nvidia-cusparselt-cu13 --no-emit-package nvidia-nccl-cu13 --no-emit-package nvidia-nvjitlink-cu13 --no-emit-package nvidia-nvshmem-cu13 --no-emit-package nvidia-nvtx-cu13
 absl-py==2.4.0
-    # via
-    #   rouge-score
-    #   tensorboard
+    # via rouge-score
 accelerate==1.13.0
     # via peft
-aenum==3.1.17
-    # via lightly
-affine==2.4.0
-    # via rasterio
 aiohappyeyeballs==2.6.1
     # via aiohttp
 aiohttp==3.13.3
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   aiohttp-cors
     #   fsspec
     #   gpt-oss
@@ -24,12 +19,8 @@ aiohttp-cors==0.8.1
     # via ray
 aiosignal==1.4.0
     # via aiohttp
-albucore==0.1.2
-    # via terratorch
 albumentations==1.4.6
-    # via
-    #   -r requirements/rocm-test.in
-    #   terratorch
+    # via -r requirements/test/rocm.in
 alembic==1.18.4
     # via optuna
 annotated-doc==0.0.4
@@ -38,33 +29,39 @@ annotated-doc==0.0.4
     #   typer
 annotated-types==0.7.0
     # via pydantic
-antlr4-python3-runtime==4.9.3
+anthropic==0.93.0
     # via
-    #   hydra-core
-    #   omegaconf
-anyio==4.6.2.post1
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+anyio==4.13.0
     # via
+    #   anthropic
     #   httpx
+    #   mcp
+    #   openai
+    #   sse-starlette
     #   starlette
+    #   watchfiles
+apache-tvm-ffi==0.1.10
+    # via xgrammar
 arctic-inference==0.1.1
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 argcomplete==3.6.3
     # via datamodel-code-generator
 arrow==1.4.0
     # via isoduration
+astor==0.8.1
+    # via depyf
 attrs==26.1.0
     # via
     #   aiohttp
-    #   fiona
-    #   jsonlines
     #   jsonschema
     #   pytest-subtests
-    #   rasterio
     #   referencing
 audioread==3.0.1
     # via librosa
 av==16.1.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 azure-core==1.39.0
     # via
     #   azure-identity
@@ -75,16 +72,16 @@ azure-storage-blob==12.28.0
     # via runai-model-streamer-azure
 backoff==2.2.1
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   schemathesis
 bitsandbytes==0.49.2
-    # via
-    #   -r requirements/rocm-test.in
-    #   lightning
+    # via -r requirements/test/rocm.in
 black==26.3.1
     # via datamodel-code-generator
+blake3==1.0.8
+    # via -r requirements/test/../common.txt
 blobfile==3.0.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 bm25s==0.2.13
     # via mteb
 boto3==1.42.74
@@ -98,16 +95,15 @@ botocore==1.42.74
 bounded-pool-executor==0.0.3
     # via pqdm
 buildkite-test-collector==0.1.9
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
+cachetools==7.0.5
+    # via -r requirements/test/../common.txt
+cbor2==5.9.0
+    # via -r requirements/test/../common.txt
 certifi==2026.2.25
     # via
-    #   fiona
     #   httpcore
     #   httpx
-    #   lightly
-    #   pyogrio
-    #   pyproj
-    #   rasterio
     #   requests
     #   sentry-sdk
 cffi==1.17.1
@@ -125,23 +121,15 @@ chz==0.4.0
 click==8.3.1
     # via
     #   black
-    #   click-plugins
-    #   cligj
-    #   fiona
     #   jiwer
     #   nltk
-    #   rasterio
     #   ray
+    #   rich-toolkit
     #   schemathesis
     #   typer
     #   uvicorn
-    #   wandb
-click-plugins==1.1.1.2
-    # via fiona
-cligj==0.7.2
-    # via
-    #   fiona
-    #   rasterio
+cloudpickle==3.1.2
+    # via -r requirements/test/../common.txt
 colorama==0.4.6
     # via
     #   perceptron
@@ -151,6 +139,10 @@ colorful==0.5.8
     # via ray
 colorlog==6.10.1
     # via optuna
+compressed-tensors==0.15.0.1
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 contourpy==1.3.3
     # via matplotlib
 coverage==7.13.5
@@ -167,47 +159,63 @@ cryptography==46.0.0
 cycler==0.12.1
     # via matplotlib
 datamodel-code-generator==0.55.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 dataproperty==1.1.0
     # via
     #   pytablewriter
     #   tabledata
 datasets==3.6.0
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   evaluate
     #   lm-eval
     #   mteb
 decorator==5.2.1
     # via librosa
 decord==0.6.0
-    # via -r requirements/rocm-test.in
-diffusers==0.37.0
-    # via terratorch
+    # via -r requirements/test/rocm.in
+depyf==0.20.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 dill==0.3.8
     # via
     #   datasets
+    #   depyf
     #   evaluate
     #   lm-eval
     #   multiprocess
+diskcache==5.6.3
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 distlib==0.4.0
     # via virtualenv
+distro==1.9.0
+    # via
+    #   anthropic
+    #   openai
+dnspython==2.8.0
+    # via email-validator
 docker==7.1.0
     # via gpt-oss
 docopt==0.6.2
     # via num2words
 docstring-parser==0.17.0
-    # via jsonargparse
+    # via anthropic
 einops==0.8.2
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
     #   encodec
-    #   terratorch
-    #   torchgeo
     #   vector-quantize-pytorch
     #   vocos
 einx==0.4.2
     # via vector-quantize-pytorch
+email-validator==2.3.0
+    # via
+    #   fastapi
+    #   pydantic
 encodec==0.1.1
     # via vocos
 et-xmlfile==2.0.0
@@ -217,25 +225,30 @@ evaluate==0.4.6
 fastapi==0.135.2
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   gpt-oss
+    #   model-hosting-container-standards
+fastapi-cli==0.0.24
+    # via fastapi
+fastapi-cloud-cli==0.16.1
+    # via fastapi-cli
+fastar==0.10.0
+    # via fastapi-cloud-cli
 fastparquet==2026.3.0
     # via genai-perf
-fastsafetensors==0.2.2
-    # via -r requirements/rocm-test.in
+fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@65d80088fca7a8f567fba30415fbcc80f7d2259c
+    # via -r requirements/test/rocm.in
 filelock==3.25.2
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   blobfile
     #   datasets
-    #   diffusers
     #   huggingface-hub
     #   python-discovery
     #   ray
     #   torch
-    #   transformers
     #   virtualenv
-fiona==1.10.1
-    # via torchgeo
 fonttools==4.62.1
     # via matplotlib
 fqdn==1.5.1
@@ -252,22 +265,17 @@ fsspec==2025.3.0
     #   evaluate
     #   fastparquet
     #   huggingface-hub
-    #   lightning
-    #   pytorch-lightning
-    #   tacoreader
     #   torch
 ftfy==6.3.1
     # via open-clip-torch
 genai-perf==0.0.16
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 genson==1.3.0
     # via datamodel-code-generator
-geopandas==1.1.3
-    # via terratorch
-gitdb==4.0.12
-    # via gitpython
-gitpython==3.1.46
-    # via wandb
+gguf==0.18.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 google-api-core==2.30.0
     # via
     #   google-cloud-core
@@ -290,9 +298,12 @@ google-crc32c==1.8.0
 google-resumable-media==2.8.0
     # via google-cloud-storage
 googleapis-common-protos==1.73.0
-    # via google-api-core
+    # via
+    #   google-api-core
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
 gpt-oss==0.0.8
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 graphql-core==3.2.8
     # via hypothesis-graphql
 greenlet==3.3.2
@@ -300,25 +311,23 @@ greenlet==3.3.2
 grpcio==1.78.0
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   grpcio-reflection
+    #   opentelemetry-exporter-otlp-proto-grpc
     #   ray
-    #   tensorboard
 grpcio-reflection==1.78.0
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
 h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
 h2==4.3.0
     # via httpx
-h5py==3.16.0
-    # via terratorch
 harfile==0.4.0
     # via schemathesis
-hf-xet==1.4.2
+hf-xet==1.4.3
     # via huggingface-hub
 hiredis==3.3.1
     # via tensorizer
@@ -328,34 +337,37 @@ html2text==2025.4.15
     # via gpt-oss
 httpcore==1.0.9
     # via httpx
+httptools==0.7.1
+    # via uvicorn
 httpx==0.27.2
     # via
-    #   -r requirements/rocm-test.in
-    #   diffusers
+    #   -r requirements/test/rocm.in
+    #   anthropic
+    #   fastapi
+    #   fastapi-cloud-cli
+    #   huggingface-hub
+    #   mcp
+    #   model-hosting-container-standards
+    #   openai
     #   perceptron
     #   schemathesis
-huggingface-hub==0.36.2
+httpx-sse==0.4.3
+    # via mcp
+huggingface-hub==1.10.2
     # via
-    #   -r requirements/rocm-test.in
     #   accelerate
     #   datasets
-    #   diffusers
     #   evaluate
     #   open-clip-torch
     #   peft
     #   segmentation-models-pytorch
     #   sentence-transformers
-    #   terratorch
     #   timm
     #   tokenizers
     #   transformers
     #   vocos
 humanize==4.15.0
     # via runai-model-streamer
-hydra-core==1.3.2
-    # via
-    #   lightly
-    #   lightning
 hyperframe==6.1.0
     # via h2
 hypothesis==6.151.9
@@ -370,26 +382,27 @@ hypothesis-jsonschema==0.23.1
 idna==3.11
     # via
     #   anyio
+    #   email-validator
     #   httpx
     #   jsonschema
     #   requests
     #   yarl
+ijson==3.5.0
+    # via -r requirements/test/../common.txt
 imagehash==4.3.2
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 imageio==2.37.3
     # via scikit-image
 importlib-metadata==8.7.1
-    # via
-    #   diffusers
-    #   opentelemetry-api
-importlib-resources==6.5.2
-    # via typeshed-client
+    # via opentelemetry-api
 inflect==7.5.0
     # via datamodel-code-generator
 iniconfig==2.3.0
     # via pytest
 instanttensor==0.1.6
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
+interegular==0.3.3
+    # via lm-format-enforcer
 isodate==0.7.2
     # via azure-storage-blob
 isoduration==20.11.0
@@ -399,33 +412,32 @@ isort==8.0.1
 jinja2==3.1.6
     # via
     #   datamodel-code-generator
+    #   fastapi
     #   genai-perf
     #   lm-eval
     #   torch
+jiter==0.14.0
+    # via
+    #   anthropic
+    #   openai
 jiwer==4.0.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 jmespath==1.1.0
     # via
     #   boto3
     #   botocore
+    #   model-hosting-container-standards
 joblib==1.5.3
     # via
     #   librosa
     #   nltk
     #   scikit-learn
-jsonargparse==4.47.0
-    # via
-    #   lightning
-    #   terratorch
-jsonlines==4.0.0
-    # via lm-eval
-jsonnet==0.21.0
-    # via jsonargparse
 jsonpointer==3.1.0
     # via jsonschema
 jsonschema==4.26.0
     # via
     #   hypothesis-jsonschema
+    #   mcp
     #   mistral-common
     #   ray
     #   schemathesis
@@ -434,15 +446,15 @@ jsonschema-specifications==2025.9.1
 junit-xml==1.9
     # via schemathesis
 kaldi-native-fbank==1.22.3
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 kaleido==1.0.0
     # via genai-perf
 kiwisolver==1.5.0
     # via matplotlib
-kornia==0.8.2
-    # via torchgeo
-kornia-rs==0.1.10
-    # via kornia
+lark==1.2.2
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 lazy-loader==0.4
     # via
     #   librosa
@@ -450,30 +462,25 @@ lazy-loader==0.4
 libnacl==2.1.0
     # via tensorizer
 librosa==0.10.2.post1
-    # via -r requirements/rocm-test.in
-lightly==1.5.22
-    # via
-    #   terratorch
-    #   torchgeo
-lightly-utils==0.0.2
-    # via lightly
-lightning==2.6.1
-    # via
-    #   terratorch
-    #   torchgeo
-lightning-utilities==0.15.3
-    # via
-    #   lightning
-    #   pytorch-lightning
-    #   torchmetrics
-llvmlite==0.44.0
+    # via -r requirements/test/rocm.in
+llguidance==1.7.5
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+llvmlite==0.47.0
     # via numba
-lm-eval==0.4.11
-    # via -r requirements/rocm-test.in
+lm-eval==0.4.12
+    # via -r requirements/test/rocm.in
+lm-format-enforcer==0.11.3
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 logistro==2.0.1
     # via
     #   choreographer
     #   kaleido
+loguru==0.7.3
+    # via compressed-tensors
 lxml==6.0.2
     # via
     #   blobfile
@@ -481,8 +488,6 @@ lxml==6.0.2
     #   sacrebleu
 mako==1.3.10
     # via alembic
-markdown==3.10.2
-    # via tensorboard
 markdown-it-py==4.0.0
     # via rich
 markupsafe==3.0.3
@@ -491,21 +496,25 @@ markupsafe==3.0.3
     #   mako
     #   werkzeug
 matplotlib==3.10.8
-    # via
-    #   -r requirements/rocm-test.in
-    #   lightning
-    #   torchgeo
+    # via -r requirements/test/rocm.in
 mbstrdecoder==1.1.4
     # via
     #   dataproperty
     #   pytablewriter
     #   typepy
+mcp==1.27.0
+    # via -r requirements/test/../common.txt
 mdurl==0.1.2
     # via markdown-it-py
-mistral-common==1.10.0
+mistral-common==1.11.2
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
+model-hosting-container-standards==0.1.14
     # via
     #   -c requirements/common.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
 more-itertools==10.8.0
     # via
     #   inflect
@@ -522,15 +531,17 @@ msgpack==1.1.2
     # via
     #   librosa
     #   ray
+msgspec==0.21.0
+    # via -r requirements/test/../common.txt
 mteb==2.11.5
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 multidict==6.7.1
     # via
     #   aiohttp
     #   yarl
 multiprocess==0.70.16
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   datasets
     #   evaluate
 mypy-extensions==1.1.0
@@ -541,23 +552,22 @@ networkx==3.6.1
     # via
     #   scikit-image
     #   torch
+ninja==1.13.0
+    # via -r requirements/test/../common.txt
 nltk==3.9.3
     # via rouge-score
 num2words==0.5.14
-    # via -r requirements/rocm-test.in
-numba==0.61.2
+    # via -r requirements/test/rocm.in
+numba==0.65.0
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   librosa
-    #   resampy
-numkong==7.1.1
-    # via albucore
 numpy==2.2.6
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
     #   accelerate
-    #   albucore
     #   albumentations
     #   bitsandbytes
     #   bm25s
@@ -565,19 +575,15 @@ numpy==2.2.6
     #   cupy-cuda12x
     #   datasets
     #   decord
-    #   diffusers
     #   einx
     #   encodec
     #   evaluate
     #   fastparquet
     #   genai-perf
-    #   geopandas
-    #   h5py
+    #   gguf
     #   imagehash
     #   imageio
     #   librosa
-    #   lightly
-    #   lightly-utils
     #   lm-eval
     #   matplotlib
     #   mistral-common
@@ -589,13 +595,8 @@ numpy==2.2.6
     #   patsy
     #   peft
     #   perceptron
-    #   pycocotools
-    #   pyogrio
     #   pytrec-eval-terrier
     #   pywavelets
-    #   rasterio
-    #   resampy
-    #   rioxarray
     #   rouge-score
     #   runai-model-streamer
     #   sacrebleu
@@ -604,31 +605,26 @@ numpy==2.2.6
     #   scipy
     #   segmentation-models-pytorch
     #   sentence-transformers
-    #   shapely
     #   soundfile
     #   soxr
     #   statsmodels
-    #   tensorboard
-    #   tensorboardx
     #   tensorizer
-    #   terratorch
     #   tifffile
-    #   torchgeo
-    #   torchmetrics
     #   torchvision
     #   transformers
     #   tritonclient
     #   vocos
-    #   xarray
-omegaconf==2.3.0
-    # via
-    #   hydra-core
-    #   lightning
+    #   xgrammar
 open-clip-torch==2.32.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
+openai==2.31.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 openai-harmony==0.0.8
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   gpt-oss
 opencensus==0.11.4
     # via ray
@@ -637,34 +633,68 @@ opencensus-context==0.1.3
 opencv-python-headless==4.13.0.92
     # via
     #   -c requirements/common.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
     #   albumentations
     #   mistral-common
 openpyxl==3.1.5
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 opentelemetry-api==1.40.0
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
     #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.40.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+opentelemetry-exporter-otlp-proto-common==1.40.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.40.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.40.0
+    # via opentelemetry-exporter-otlp
 opentelemetry-exporter-prometheus==0.61b0
     # via ray
 opentelemetry-proto==1.40.0
-    # via ray
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   ray
 opentelemetry-sdk==1.40.0
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
     #   opentelemetry-exporter-prometheus
+    #   opentelemetry-semantic-conventions-ai
     #   ray
 opentelemetry-semantic-conventions==0.61b0
-    # via opentelemetry-sdk
+    # via
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions-ai
+opentelemetry-semantic-conventions-ai==0.5.1
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 optuna==3.6.1
     # via genai-perf
 orjson==3.11.7
     # via
     #   genai-perf
     #   kaleido
+outlines-core==0.2.14
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 packaging==26.0
     # via
     #   -c requirements/rocm.txt
@@ -674,45 +704,31 @@ packaging==26.0
     #   datasets
     #   evaluate
     #   fastparquet
-    #   geopandas
     #   huggingface-hub
-    #   hydra-core
     #   kaleido
-    #   kornia
     #   lazy-loader
-    #   lightning
-    #   lightning-utilities
+    #   lm-format-enforcer
     #   matplotlib
     #   optuna
     #   peft
     #   plotly
     #   pooch
-    #   pyogrio
     #   pytest
     #   pytest-rerunfailures
-    #   pytorch-lightning
     #   ray
-    #   rioxarray
     #   scikit-image
     #   statsmodels
-    #   tensorboard
-    #   tensorboardx
-    #   torchmetrics
     #   transformers
     #   typepy
-    #   wandb
-    #   xarray
 pandas==3.0.1
     # via
     #   datasets
     #   evaluate
     #   fastparquet
     #   genai-perf
-    #   geopandas
     #   statsmodels
-    #   tacoreader
-    #   torchgeo
-    #   xarray
+partial-json-parser==0.2.1.1.post7
+    # via -r requirements/test/../common.txt
 pathspec==1.0.4
     # via black
 pathvalidate==3.3.1
@@ -720,25 +736,22 @@ pathvalidate==3.3.1
 patsy==1.0.2
     # via statsmodels
 peft==0.18.1
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 perceptron==0.1.4
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 perf-analyzer==0.1.0
     # via genai-perf
 pillow==12.1.1
     # via
-    #   diffusers
+    #   -r requirements/test/../common.txt
     #   genai-perf
     #   imagehash
     #   imageio
-    #   lightly-utils
     #   matplotlib
     #   mistral-common
     #   perceptron
     #   scikit-image
     #   segmentation-models-pytorch
-    #   tensorboard
-    #   torchgeo
     #   torchvision
 platformdirs==4.3.6
     # via
@@ -746,10 +759,9 @@ platformdirs==4.3.6
     #   pooch
     #   python-discovery
     #   virtualenv
-    #   wandb
 plotly==6.6.0
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   genai-perf
 pluggy==1.6.0
     # via
@@ -764,12 +776,18 @@ pooch==1.8.2
 portalocker==3.2.0
     # via sacrebleu
 pqdm==0.2.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 prometheus-client==0.24.1
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   opentelemetry-exporter-prometheus
+    #   prometheus-fastapi-instrumentator
     #   ray
+prometheus-fastapi-instrumentator==7.1.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 propcache==0.4.1
     # via
     #   aiohttp
@@ -779,37 +797,36 @@ proto-plus==1.27.1
 protobuf==6.33.6
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   google-api-core
     #   googleapis-common-protos
     #   grpcio-reflection
     #   opentelemetry-proto
     #   proto-plus
     #   ray
-    #   tensorboard
-    #   tensorboardx
     #   tensorizer
-    #   wandb
 psutil==7.2.2
     # via
+    #   -r requirements/test/../common.txt
     #   accelerate
     #   peft
     #   tensorizer
 py==1.11.0
     # via pytest-forked
+py-cpuinfo==9.0.0
+    # via -r requirements/test/../common.txt
 py-spy==0.4.1
     # via ray
 pyarrow==23.0.1
     # via
     #   datasets
     #   genai-perf
-    #   tacoreader
-    #   terratorch
 pyasn1==0.6.3
     # via pyasn1-modules
 pyasn1-modules==0.4.2
     # via google-auth
-pycocotools==2.0.11
-    # via terratorch
+pybase64==1.4.3
+    # via -r requirements/test/../common.txt
 pycountry==26.2.16
     # via pydantic-extra-types
 pycparser==3.0
@@ -819,37 +836,44 @@ pycryptodomex==3.23.0
 pydantic==2.12.5
     # via
     #   -c requirements/common.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
     #   albumentations
+    #   anthropic
+    #   compressed-tensors
     #   datamodel-code-generator
     #   fastapi
+    #   fastapi-cloud-cli
     #   gpt-oss
-    #   lightly
+    #   lm-format-enforcer
+    #   mcp
     #   mistral-common
+    #   model-hosting-container-standards
     #   mteb
+    #   openai
     #   openai-harmony
     #   pydantic-extra-types
+    #   pydantic-settings
     #   ray
-    #   wandb
+    #   xgrammar
 pydantic-core==2.41.5
     # via pydantic
 pydantic-extra-types==2.11.1
-    # via mistral-common
+    # via
+    #   fastapi
+    #   mistral-common
+pydantic-settings==2.13.1
+    # via
+    #   fastapi
+    #   mcp
 pygments==2.19.2
     # via rich
 pyjwt==2.12.1
-    # via msal
-pyogrio==0.12.1
-    # via geopandas
-pyparsing==3.3.2
-    # via
-    #   matplotlib
-    #   rasterio
-pyproj==3.7.2
     # via
-    #   geopandas
-    #   rioxarray
-    #   torchgeo
+    #   mcp
+    #   msal
+pyparsing==3.3.2
+    # via matplotlib
 pyrate-limiter==3.9.0
     # via schemathesis
 pystemmer==3.0.0
@@ -858,7 +882,7 @@ pytablewriter==1.2.1
     # via lm-eval
 pytest==8.3.5
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   buildkite-test-collector
     #   genai-perf
     #   pytest-asyncio
@@ -871,41 +895,44 @@ pytest==8.3.5
     #   pytest-timeout
     #   schemathesis
 pytest-asyncio==0.24.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 pytest-cov==6.3.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 pytest-forked==1.6.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 pytest-mock==3.15.1
     # via genai-perf
 pytest-rerunfailures==14.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 pytest-shard==0.1.2
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 pytest-subtests==0.14.2
     # via schemathesis
 pytest-timeout==2.3.1
-    # via -r requirements/rocm-test.in
-python-box==7.4.1
-    # via terratorch
+    # via -r requirements/test/rocm.in
 python-dateutil==2.9.0.post0
     # via
     #   arrow
     #   botocore
-    #   lightly
     #   matplotlib
     #   pandas
     #   typepy
 python-discovery==1.2.0
     # via virtualenv
+python-dotenv==1.2.2
+    # via
+    #   pydantic-settings
+    #   uvicorn
+python-json-logger==4.1.0
+    # via -r requirements/test/../common.txt
+python-multipart==0.0.26
+    # via
+    #   fastapi
+    #   mcp
 python-rapidjson==1.23
     # via tritonclient
 pytokens==0.4.1
     # via black
-pytorch-lightning==2.6.1
-    # via
-    #   lightly
-    #   lightning
 pytrec-eval-terrier==0.5.10
     # via mteb
 pytz==2026.1.post1
@@ -914,36 +941,34 @@ pywavelets==1.9.0
     # via imagehash
 pyyaml==6.0.3
     # via
+    #   -r requirements/test/../common.txt
     #   accelerate
     #   albumentations
     #   datamodel-code-generator
     #   datasets
     #   genai-perf
+    #   gguf
     #   huggingface-hub
-    #   jsonargparse
-    #   lightning
-    #   omegaconf
+    #   lm-format-enforcer
     #   optuna
     #   peft
-    #   pytorch-lightning
     #   ray
     #   responses
     #   schemathesis
     #   timm
     #   transformers
+    #   uvicorn
     #   vocos
-    #   wandb
+pyzmq==27.1.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 rapidfuzz==3.12.1
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   jiwer
-rasterio==1.5.0
-    # via
-    #   rioxarray
-    #   terratorch
-    #   torchgeo
 ray==2.54.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 redis==7.3.0
     # via tensorizer
 referencing==0.37.0
@@ -952,7 +977,7 @@ referencing==0.37.0
     #   jsonschema-specifications
 regex==2026.2.28
     # via
-    #   diffusers
+    #   -r requirements/test/../common.txt
     #   nltk
     #   open-clip-torch
     #   sacrebleu
@@ -961,32 +986,27 @@ regex==2026.2.28
 requests==2.32.5
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   azure-core
     #   buildkite-test-collector
     #   datasets
-    #   diffusers
     #   docker
     #   evaluate
+    #   gguf
     #   google-api-core
     #   google-cloud-storage
     #   gpt-oss
-    #   huggingface-hub
-    #   lightly
     #   lm-eval
     #   mistral-common
     #   msal
     #   mteb
+    #   opentelemetry-exporter-otlp-proto-http
     #   pooch
     #   ray
     #   responses
     #   schemathesis
     #   starlette-testclient
-    #   tacoreader
     #   tiktoken
-    #   transformers
-    #   wandb
-resampy==0.4.3
-    # via -r requirements/rocm-test.in
 responses==0.26.0
     # via genai-perf
 rfc3339-validator==0.1.4
@@ -996,25 +1016,26 @@ rfc3987==1.3.8
 rich==14.3.3
     # via
     #   genai-perf
-    #   lightning
     #   mteb
     #   perceptron
-    #   terratorch
+    #   rich-toolkit
     #   typer
-rioxarray==0.22.0
-    # via terratorch
+rich-toolkit==0.19.7
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+rignore==0.7.6
+    # via fastapi-cloud-cli
 rouge-score==0.1.2
     # via lm-eval
 rpds-py==0.30.0
     # via
     #   jsonschema
     #   referencing
-rtree==1.4.1
-    # via torchgeo
 runai-model-streamer==0.15.7
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
 runai-model-streamer-azure==0.15.7
     # via runai-model-streamer
 runai-model-streamer-gcs==0.15.7
@@ -1027,19 +1048,18 @@ sacrebleu==2.6.0
     # via lm-eval
 safetensors==0.7.0
     # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   accelerate
-    #   diffusers
     #   open-clip-torch
     #   peft
     #   segmentation-models-pytorch
     #   timm
     #   transformers
 schemathesis==3.39.15
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 scikit-image==0.26.0
-    # via
-    #   albumentations
-    #   terratorch
+    # via albumentations
 scikit-learn==1.8.0
     # via
     #   albumentations
@@ -1047,7 +1067,6 @@ scikit-learn==1.8.0
     #   lm-eval
     #   mteb
     #   sentence-transformers
-    #   terratorch
 scipy==1.17.1
     # via
     #   albumentations
@@ -1062,27 +1081,25 @@ scipy==1.17.1
     #   statsmodels
     #   vocos
 segmentation-models-pytorch==0.5.0
-    # via
-    #   -r requirements/rocm-test.in
-    #   terratorch
-    #   torchgeo
+    # via -r requirements/test/rocm.in
 sentence-transformers==5.3.0
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   mteb
+sentencepiece==0.2.1
+    # via -r requirements/test/../common.txt
 sentry-sdk==2.55.0
-    # via wandb
+    # via fastapi-cloud-cli
+setproctitle==1.3.7
+    # via -r requirements/test/../common.txt
 setuptools==79.0.1
     # via
     #   -c requirements/common.txt
     #   -c requirements/rocm.txt
+    #   -r requirements/test/../common.txt
+    #   model-hosting-container-standards
     #   pytablewriter
-    #   tensorboard
     #   torch
-shapely==2.1.2
-    # via
-    #   geopandas
-    #   torchgeo
 shellingham==1.5.4
     # via
     #   perceptron
@@ -1092,25 +1109,24 @@ simplejson==3.20.2
 six==1.17.0
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   junit-xml
-    #   lightly
     #   opencensus
     #   python-dateutil
     #   rfc3339-validator
     #   rouge-score
 smart-open==7.5.1
     # via ray
-smmap==5.0.3
-    # via gitdb
 sniffio==1.3.1
     # via
-    #   anyio
+    #   anthropic
     #   httpx
+    #   openai
 sortedcontainers==2.4.0
     # via hypothesis
 soundfile==0.13.1
     # via
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   genai-perf
     #   librosa
     #   mistral-common
@@ -1124,19 +1140,25 @@ sqlalchemy==2.0.48
     #   optuna
 sqlitedict==2.1.0
     # via lm-eval
+sse-starlette==3.3.4
+    # via mcp
 starlette==0.52.1
     # via
     #   fastapi
+    #   mcp
+    #   model-hosting-container-standards
+    #   prometheus-fastapi-instrumentator
     #   schemathesis
+    #   sse-starlette
     #   starlette-testclient
 starlette-testclient==0.4.1
     # via schemathesis
 statsmodels==0.14.6
     # via genai-perf
-stringzilla==4.6.0
-    # via albucore
 structlog==25.5.0
     # via gpt-oss
+supervisor==4.3.0
+    # via model-hosting-container-standards
 sympy==1.14.0
     # via
     #   einx
@@ -1145,103 +1167,80 @@ tabledata==1.3.4
     # via pytablewriter
 tabulate==0.10.0
     # via sacrebleu
-tacoreader==0.5.6
-    # via terratorch
 tblib==3.1.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 tcolorpy==0.1.7
     # via pytablewriter
 tenacity==9.1.4
     # via
     #   gpt-oss
     #   lm-eval
-tensorboard==2.20.0
-    # via terratorch
-tensorboard-data-server==0.7.2
-    # via tensorboard
-tensorboardx==2.6.4
-    # via lightning
 tensorizer==2.10.1
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
 termcolor==3.3.0
-    # via
-    #   gpt-oss
-    #   terratorch
-terratorch==1.2.2
-    # via -r requirements/rocm-test.in
+    # via gpt-oss
 threadpoolctl==3.6.0
     # via scikit-learn
 tifffile==2026.3.3
-    # via
-    #   scikit-image
-    #   terratorch
+    # via scikit-image
 tiktoken==0.12.0
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   gpt-oss
     #   lm-eval
     #   mistral-common
 timm==1.0.17
     # via
     #   -c requirements/rocm.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/rocm.in
     #   open-clip-torch
     #   segmentation-models-pytorch
-    #   terratorch
-    #   torchgeo
-tokenizers==0.22.0
+tokenizers==0.22.2
     # via
     #   -c requirements/common.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
     #   transformers
 tomli==2.4.0
     # via schemathesis
 tomli-w==1.2.0
     # via schemathesis
-torchgeo==0.7.0
-    # via
-    #   -r requirements/rocm-test.in
-    #   terratorch
-torchmetrics==1.9.0
-    # via
-    #   lightning
-    #   pytorch-lightning
-    #   terratorch
-    #   torchgeo
 tqdm==4.67.3
     # via
+    #   -r requirements/test/../common.txt
     #   datasets
     #   evaluate
+    #   gguf
     #   huggingface-hub
-    #   lightly
-    #   lightning
     #   lm-eval
     #   mteb
     #   nltk
     #   open-clip-torch
+    #   openai
     #   optuna
     #   peft
     #   pqdm
-    #   pytorch-lightning
     #   segmentation-models-pytorch
     #   sentence-transformers
-    #   tacoreader
-    #   terratorch
     #   transformers
-transformers==4.57.5
+transformers==5.5.3
     # via
     #   -c requirements/common.txt
-    #   -r requirements/rocm-test.in
+    #   -r requirements/test/../common.txt
+    #   -r requirements/test/rocm.in
+    #   compressed-tensors
     #   genai-perf
     #   peft
     #   sentence-transformers
     #   transformers-stream-generator
+    #   xgrammar
 transformers-stream-generator==0.0.5
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 tritonclient==2.66.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 typeguard==4.5.1
     # via inflect
 typepy==1.3.4
@@ -1251,16 +1250,22 @@ typepy==1.3.4
     #   tabledata
 typer==0.24.1
     # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
     #   fastsafetensors
+    #   huggingface-hub
     #   perceptron
-typeshed-client==2.9.0
-    # via jsonargparse
+    #   transformers
 typing-extensions==4.15.0
     # via
     #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
     #   aiosignal
     #   albumentations
     #   alembic
+    #   anthropic
+    #   anyio
+    #   apache-tvm-ffi
     #   azure-core
     #   azure-identity
     #   azure-storage-blob
@@ -1269,33 +1274,35 @@ typing-extensions==4.15.0
     #   grpcio
     #   huggingface-hub
     #   librosa
-    #   lightning
-    #   lightning-utilities
     #   lm-eval
+    #   mcp
     #   mistral-common
     #   mteb
+    #   openai
     #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
     #   pqdm
     #   pydantic
     #   pydantic-core
     #   pydantic-extra-types
-    #   pytorch-lightning
     #   referencing
+    #   rich-toolkit
     #   sentence-transformers
     #   sqlalchemy
     #   starlette
     #   torch
-    #   torchgeo
     #   typeguard
-    #   typeshed-client
     #   typing-inspection
-    #   wandb
+    #   xgrammar
 typing-inspection==0.4.2
     # via
     #   fastapi
+    #   mcp
     #   pydantic
+    #   pydantic-settings
 tzdata==2025.3
     # via arrow
 uri-template==1.3.0
@@ -1305,35 +1312,45 @@ urllib3==2.6.3
     #   blobfile
     #   botocore
     #   docker
-    #   lightly
     #   requests
     #   responses
     #   sentry-sdk
     #   tritonclient
 uvicorn==0.42.0
-    # via gpt-oss
+    # via
+    #   fastapi
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+    #   gpt-oss
+    #   mcp
+uvloop==0.22.1
+    # via uvicorn
 vector-quantize-pytorch==1.28.0
-    # via -r requirements/rocm-test.in
+    # via -r requirements/test/rocm.in
 virtualenv==21.2.0
     # via ray
 vocos==0.1.0
-    # via -r requirements/rocm-test.in
-wandb==0.25.1
-    # via terratorch
+    # via -r requirements/test/rocm.in
+watchfiles==1.1.1
+    # via
+    #   -r requirements/test/../common.txt
+    #   uvicorn
 wcwidth==0.6.0
     # via ftfy
 webcolors==25.10.0
     # via jsonschema
+websockets==16.0
+    # via uvicorn
 werkzeug==3.1.6
-    # via
-    #   schemathesis
-    #   tensorboard
+    # via schemathesis
 word2number==1.1
     # via lm-eval
 wrapt==2.1.2
     # via smart-open
-xarray==2026.2.0
-    # via rioxarray
+xgrammar==0.2.0
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/../common.txt
 xxhash==3.6.0
     # via
     #   datasets
@@ -1344,8 +1361,6 @@ yarl==1.23.0
     #   schemathesis
 zipp==3.23.0
     # via importlib-metadata
-zstandard==0.25.0
-    # via lm-eval
 
 # The following packages were excluded from the output:
 # torch
@@ -1360,14 +1375,14 @@ zstandard==0.25.0
 # nvidia-cuda-cupti
 # nvidia-cuda-nvrtc
 # nvidia-cuda-runtime
-# nvidia-cudnn-cu13
 # nvidia-cufft
 # nvidia-cufile
 # nvidia-curand
 # nvidia-cusolver
 # nvidia-cusparse
+# nvidia-nvjitlink
+# nvidia-nvtx
+# nvidia-cudnn-cu13
 # nvidia-cusparselt-cu13
 # nvidia-nccl-cu13
-# nvidia-nvjitlink
 # nvidia-nvshmem-cu13
-# nvidia-nvtx
diff --git a/requirements/xpu-test.in b/requirements/test/xpu.in
similarity index 88%
rename from requirements/xpu-test.in
rename to requirements/test/xpu.in
index 0b2273d8829c..94ffc249395a 100644
--- a/requirements/xpu-test.in
+++ b/requirements/test/xpu.in
@@ -1,5 +1,7 @@
 # --- Test Infrastructure ---
 tblib
+pytest
+pytest_asyncio
 pytest-timeout
 pytest-cov
 pytest-forked
@@ -7,8 +9,12 @@ pytest-rerunfailures
 pytest-shard
 
 # --- Core Tools & Bindings ---
+
 absl-py
+accelerate
 arctic-inference
+lm_eval[api]
+modelscope
 
 # --- Audio Processing ---
 librosa
diff --git a/requirements/test/xpu.txt b/requirements/test/xpu.txt
new file mode 100644
index 000000000000..5581d0a079c5
--- /dev/null
+++ b/requirements/test/xpu.txt
@@ -0,0 +1,737 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements/test/xpu.in -c requirements/xpu.txt -o requirements/test/xpu.txt --index-strategy unsafe-best-match --torch-backend xpu --python-platform x86_64-manylinux_2_39 --python-version 3.12
+absl-py==2.4.0
+    # via
+    #   -r requirements/test/xpu.in
+    #   rouge-score
+accelerate==1.13.0
+    # via -r requirements/test/xpu.in
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.4
+    # via
+    #   -c requirements/common.txt
+    #   fsspec
+    #   gpt-oss
+    #   lm-eval
+aiosignal==1.4.0
+    # via aiohttp
+albumentations==1.4.6
+    # via -r requirements/test/xpu.in
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   httpx
+    #   starlette
+arctic-inference==0.1.1
+    # via -r requirements/test/xpu.in
+attrs==26.1.0
+    # via
+    #   aiohttp
+    #   jsonlines
+    #   jsonschema
+    #   referencing
+audioread==3.0.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+blobfile==3.0.0
+    # via -r requirements/test/xpu.in
+bm25s==0.2.13
+    # via
+    #   -r requirements/test/xpu.in
+    #   mteb
+bounded-pool-executor==0.0.3
+    # via pqdm
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0
+    # via soundfile
+chardet==5.2.0
+    # via mbstrdecoder
+charset-normalizer==3.4.6
+    # via requests
+chz==0.4.0
+    # via gpt-oss
+click==8.3.1
+    # via
+    #   jiwer
+    #   nltk
+    #   schemathesis
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via sacrebleu
+coverage==7.13.5
+    # via pytest-cov
+dataproperty==1.1.0
+    # via
+    #   pytablewriter
+    #   tabledata
+datasets==4.8.4
+    # via
+    #   evaluate
+    #   lm-eval
+    #   mteb
+decorator==5.2.1
+    # via librosa
+dill==0.4.1
+    # via
+    #   datasets
+    #   evaluate
+    #   lm-eval
+    #   multiprocess
+docker==7.1.0
+    # via gpt-oss
+docopt==0.6.2
+    # via num2words
+dpcpp-cpp-rt==2025.3.2
+    # via
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+evaluate==0.4.6
+    # via lm-eval
+fastapi==0.135.2
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+filelock==3.25.2
+    # via
+    #   -c requirements/common.txt
+    #   blobfile
+    #   datasets
+    #   huggingface-hub
+    #   modelscope
+    #   torch
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   torch
+gpt-oss==0.0.8
+    # via -r requirements/test/xpu.in
+graphql-core==3.2.8
+    # via hypothesis-graphql
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+harfile==0.4.0
+    # via schemathesis
+hf-xet==1.4.3
+    # via huggingface-hub
+html2text==2025.4.15
+    # via gpt-oss
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   schemathesis
+huggingface-hub==1.10.2
+    # via
+    #   accelerate
+    #   datasets
+    #   evaluate
+    #   sentence-transformers
+    #   timm
+    #   tokenizers
+    #   transformers
+hypothesis==6.151.10
+    # via
+    #   hypothesis-graphql
+    #   hypothesis-jsonschema
+    #   schemathesis
+hypothesis-graphql==0.12.0
+    # via schemathesis
+hypothesis-jsonschema==0.23.1
+    # via schemathesis
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+imageio==2.37.3
+    # via scikit-image
+impi-rt==2021.17.2
+    # via
+    #   oneccl
+    #   torch
+iniconfig==2.3.0
+    # via pytest
+intel-cmplr-lib-rt==2025.3.2
+    # via
+    #   intel-sycl-rt
+    #   torch
+intel-cmplr-lib-ur==2025.3.2
+    # via
+    #   intel-openmp
+    #   intel-sycl-rt
+    #   torch
+intel-cmplr-lic-rt==2025.3.2
+    # via
+    #   intel-opencl-rt
+    #   intel-sycl-rt
+    #   torch
+intel-opencl-rt==2025.3.2
+    # via
+    #   dpcpp-cpp-rt
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+intel-openmp==2025.3.2
+    # via
+    #   dpcpp-cpp-rt
+    #   mkl
+    #   torch
+intel-pti==0.16.0
+    # via torch
+intel-sycl-rt==2025.3.2
+    # via
+    #   dpcpp-cpp-rt
+    #   oneccl
+    #   torch
+jinja2==3.1.6
+    # via
+    #   -c requirements/xpu.txt
+    #   lm-eval
+    #   torch
+jiwer==4.0.0
+    # via -r requirements/test/xpu.in
+joblib==1.5.3
+    # via
+    #   librosa
+    #   nltk
+    #   scikit-learn
+jsonlines==4.0.0
+    # via lm-eval
+jsonschema==4.26.0
+    # via
+    #   hypothesis-jsonschema
+    #   mistral-common
+    #   schemathesis
+jsonschema-rs==0.45.0
+    # via schemathesis
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+junit-xml==1.9
+    # via schemathesis
+lazy-loader==0.5
+    # via
+    #   librosa
+    #   scikit-image
+librosa==0.10.2.post1
+    # via -r requirements/test/xpu.in
+llvmlite==0.47.0
+    # via numba
+lm-eval==0.4.11
+    # via -r requirements/test/xpu.in
+lxml==6.0.2
+    # via
+    #   blobfile
+    #   gpt-oss
+    #   sacrebleu
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   jinja2
+    #   werkzeug
+mbstrdecoder==1.1.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   typepy
+mdurl==0.1.2
+    # via markdown-it-py
+mistral-common==1.11.2
+    # via
+    #   -c requirements/common.txt
+    #   -r requirements/test/xpu.in
+mkl==2025.3.1
+    # via
+    #   onemkl-sycl-blas
+    #   onemkl-sycl-dft
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-rng
+    #   onemkl-sycl-sparse
+    #   torch
+modelscope==1.35.3
+    # via -r requirements/test/xpu.in
+more-itertools==10.8.0
+    # via lm-eval
+mpmath==1.3.0
+    # via sympy
+msgpack==1.1.2
+    # via librosa
+mteb==2.12.7
+    # via -r requirements/test/xpu.in
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.19
+    # via
+    #   datasets
+    #   evaluate
+networkx==3.6.1
+    # via
+    #   scikit-image
+    #   torch
+nltk==3.9.4
+    # via rouge-score
+num2words==0.5.14
+    # via -r requirements/test/xpu.in
+numba==0.65.0
+    # via
+    #   -c requirements/xpu.txt
+    #   librosa
+numpy==2.2.6
+    # via
+    #   accelerate
+    #   albumentations
+    #   bm25s
+    #   datasets
+    #   evaluate
+    #   imageio
+    #   librosa
+    #   lm-eval
+    #   mistral-common
+    #   mteb
+    #   numba
+    #   opencv-python-headless
+    #   pandas
+    #   pytrec-eval-terrier
+    #   rouge-score
+    #   sacrebleu
+    #   scikit-image
+    #   scikit-learn
+    #   scipy
+    #   sentence-transformers
+    #   soundfile
+    #   soxr
+    #   tifffile
+    #   torchvision
+    #   transformers
+oneccl==2021.17.2
+    # via
+    #   oneccl-devel
+    #   torch
+oneccl-devel==2021.17.2
+    # via torch
+onemkl-license==2025.3.1
+    # via
+    #   mkl
+    #   torch
+onemkl-sycl-blas==2025.3.1
+    # via
+    #   onemkl-sycl-lapack
+    #   onemkl-sycl-sparse
+    #   torch
+onemkl-sycl-dft==2025.3.1
+    # via torch
+onemkl-sycl-lapack==2025.3.1
+    # via torch
+onemkl-sycl-rng==2025.3.1
+    # via torch
+onemkl-sycl-sparse==2025.3.1
+    # via torch
+openai-harmony==0.0.8
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+opencv-python-headless==4.13.0.92
+    # via
+    #   -c requirements/common.txt
+    #   albumentations
+    #   mistral-common
+packaging==26.0
+    # via
+    #   -c requirements/xpu.txt
+    #   accelerate
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   lazy-loader
+    #   modelscope
+    #   pooch
+    #   pytest
+    #   pytest-rerunfailures
+    #   scikit-image
+    #   transformers
+    #   typepy
+pandas==3.0.1
+    # via
+    #   datasets
+    #   evaluate
+pathvalidate==3.3.1
+    # via pytablewriter
+pillow==12.1.1
+    # via
+    #   imageio
+    #   mistral-common
+    #   scikit-image
+    #   torchvision
+platformdirs==4.9.4
+    # via pooch
+pluggy==1.6.0
+    # via
+    #   pytest
+    #   pytest-cov
+polars==1.39.3
+    # via mteb
+polars-runtime-32==1.39.3
+    # via polars
+pooch==1.8.2
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+portalocker==3.2.0
+    # via sacrebleu
+pqdm==0.2.0
+    # via -r requirements/test/xpu.in
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+psutil==7.2.2
+    # via accelerate
+py==1.11.0
+    # via pytest-forked
+pyarrow==23.0.1
+    # via datasets
+pycountry==26.2.16
+    # via pydantic-extra-types
+pycparser==3.0
+    # via cffi
+pycryptodomex==3.23.0
+    # via blobfile
+pydantic==2.12.5
+    # via
+    #   -c requirements/common.txt
+    #   albumentations
+    #   fastapi
+    #   gpt-oss
+    #   mistral-common
+    #   mteb
+    #   openai-harmony
+    #   pydantic-extra-types
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-extra-types==2.11.1
+    # via mistral-common
+pyelftools==0.32
+    # via triton-xpu
+pygments==2.20.0
+    # via
+    #   pytest
+    #   rich
+pyrate-limiter==4.1.0
+    # via schemathesis
+pystemmer==3.0.0
+    # via
+    #   -r requirements/test/xpu.in
+    #   mteb
+pytablewriter==1.2.1
+    # via lm-eval
+pytest==9.0.2
+    # via
+    #   -r requirements/test/xpu.in
+    #   pytest-asyncio
+    #   pytest-cov
+    #   pytest-forked
+    #   pytest-rerunfailures
+    #   pytest-shard
+    #   pytest-timeout
+    #   schemathesis
+pytest-asyncio==1.3.0
+    # via -r requirements/test/xpu.in
+pytest-cov==6.3.0
+    # via -r requirements/test/xpu.in
+pytest-forked==1.6.0
+    # via -r requirements/test/xpu.in
+pytest-rerunfailures==14.0
+    # via -r requirements/test/xpu.in
+pytest-shard==0.1.2
+    # via -r requirements/test/xpu.in
+pytest-timeout==2.3.1
+    # via -r requirements/test/xpu.in
+python-dateutil==2.9.0.post0
+    # via
+    #   pandas
+    #   typepy
+pytrec-eval-terrier==0.5.10
+    # via mteb
+pytz==2026.1.post1
+    # via typepy
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   albumentations
+    #   datasets
+    #   huggingface-hub
+    #   schemathesis
+    #   timm
+    #   transformers
+rapidfuzz==3.12.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   jiwer
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.3.32
+    # via
+    #   nltk
+    #   sacrebleu
+    #   tiktoken
+    #   transformers
+requests==2.33.1
+    # via
+    #   -c requirements/common.txt
+    #   datasets
+    #   docker
+    #   evaluate
+    #   gpt-oss
+    #   lm-eval
+    #   mistral-common
+    #   modelscope
+    #   mteb
+    #   pooch
+    #   schemathesis
+    #   starlette-testclient
+    #   tiktoken
+rich==14.3.3
+    # via
+    #   mteb
+    #   schemathesis
+    #   typer
+rouge-score==0.1.2
+    # via lm-eval
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+sacrebleu==2.6.0
+    # via lm-eval
+safetensors==0.7.0
+    # via
+    #   -c requirements/common.txt
+    #   accelerate
+    #   timm
+    #   transformers
+schemathesis==4.14.2
+    # via -r requirements/test/xpu.in
+scikit-image==0.26.0
+    # via albumentations
+scikit-learn==1.8.0
+    # via
+    #   albumentations
+    #   librosa
+    #   lm-eval
+    #   mteb
+    #   sentence-transformers
+scipy==1.17.1
+    # via
+    #   albumentations
+    #   bm25s
+    #   librosa
+    #   mteb
+    #   pytrec-eval-terrier
+    #   scikit-image
+    #   scikit-learn
+    #   sentence-transformers
+sentence-transformers==5.3.0
+    # via mteb
+setuptools==80.10.2
+    # via
+    #   -c requirements/common.txt
+    #   -c requirements/xpu.txt
+    #   modelscope
+    #   pytablewriter
+    #   torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via
+    #   -c requirements/common.txt
+    #   junit-xml
+    #   python-dateutil
+    #   rouge-score
+sortedcontainers==2.4.0
+    # via hypothesis
+soundfile==0.13.1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+    #   mistral-common
+soxr==0.5.0.post1
+    # via
+    #   -r requirements/test/xpu.in
+    #   librosa
+    #   mistral-common
+sqlitedict==2.1.0
+    # via lm-eval
+starlette==1.0.0
+    # via
+    #   fastapi
+    #   starlette-testclient
+starlette-testclient==0.4.1
+    # via schemathesis
+structlog==25.5.0
+    # via gpt-oss
+sympy==1.14.0
+    # via torch
+tabledata==1.3.4
+    # via pytablewriter
+tabulate==0.10.0
+    # via sacrebleu
+tbb==2022.3.1
+    # via
+    #   intel-opencl-rt
+    #   mkl
+    #   torch
+tblib==3.1.0
+    # via -r requirements/test/xpu.in
+tcmlib==1.4.1
+    # via
+    #   tbb
+    #   torch
+    #   umf
+tcolorpy==0.1.7
+    # via pytablewriter
+tenacity==9.1.4
+    # via
+    #   gpt-oss
+    #   lm-eval
+    #   schemathesis
+termcolor==3.3.0
+    # via gpt-oss
+threadpoolctl==3.6.0
+    # via scikit-learn
+tifffile==2026.3.3
+    # via scikit-image
+tiktoken==0.12.0
+    # via
+    #   -c requirements/common.txt
+    #   gpt-oss
+    #   lm-eval
+    #   mistral-common
+timm==1.0.17
+    # via -r requirements/test/xpu.in
+tokenizers==0.22.2
+    # via
+    #   -c requirements/common.txt
+    #   transformers
+torch==2.11.0+xpu
+    # via
+    #   -c requirements/xpu.txt
+    #   accelerate
+    #   mteb
+    #   sentence-transformers
+    #   timm
+    #   torchvision
+torchvision==0.26.0+xpu
+    # via timm
+tqdm==4.67.3
+    # via
+    #   datasets
+    #   evaluate
+    #   huggingface-hub
+    #   lm-eval
+    #   modelscope
+    #   mteb
+    #   nltk
+    #   pqdm
+    #   sentence-transformers
+    #   transformers
+transformers==5.5.3
+    # via
+    #   -c requirements/common.txt
+    #   sentence-transformers
+triton-xpu==3.7.0
+    # via torch
+typepy==1.3.4
+    # via
+    #   dataproperty
+    #   pytablewriter
+    #   tabledata
+typer==0.24.1
+    # via
+    #   huggingface-hub
+    #   transformers
+typing-extensions==4.15.0
+    # via
+    #   -c requirements/common.txt
+    #   aiosignal
+    #   albumentations
+    #   anyio
+    #   chz
+    #   fastapi
+    #   huggingface-hub
+    #   librosa
+    #   lm-eval
+    #   mistral-common
+    #   mteb
+    #   pqdm
+    #   pydantic
+    #   pydantic-core
+    #   pydantic-extra-types
+    #   pytest-asyncio
+    #   referencing
+    #   schemathesis
+    #   sentence-transformers
+    #   starlette
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   pydantic
+umf==1.0.3
+    # via
+    #   intel-cmplr-lib-ur
+    #   torch
+urllib3==2.6.3
+    # via
+    #   blobfile
+    #   docker
+    #   modelscope
+    #   requests
+uvicorn==0.42.0
+    # via gpt-oss
+werkzeug==3.1.7
+    # via schemathesis
+word2number==1.1
+    # via lm-eval
+xxhash==3.6.0
+    # via
+    #   datasets
+    #   evaluate
+yarl==1.23.0
+    # via aiohttp
+zstandard==0.25.0
+    # via lm-eval
diff --git a/requirements/tpu.txt b/requirements/tpu.txt
index 7695b4ba2f4c..a37a466cde79 100644
--- a/requirements/tpu.txt
+++ b/requirements/tpu.txt
@@ -10,5 +10,6 @@ jinja2>=3.1.6
 ray[default]
 ray[data]
 setuptools==78.1.0
+setuptools-rust>=1.9.0
 nixl==0.3.0
-tpu-inference==0.12.0
+tpu-inference==0.19.0
diff --git a/requirements/xpu-test.txt b/requirements/xpu-test.txt
deleted file mode 100644
index 2a9a0e06aa74..000000000000
--- a/requirements/xpu-test.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-# XPU Test Dependencies
-# NOTE: Base image already has common.txt + xpu.txt installed,
-#       and vllm-openai stage has pytest, pytest-asyncio, lm-eval[api].
-#       This file only adds incremental test-specific packages.
-
-# Additional test infrastructure (pytest/pytest-asyncio already in base)
-# This file was autogenerated by uv via the following command:
-#    uv pip compile /workspace/vllm/requirements/xpu-test.in -o /workspace/vllm/requirements/xpu-test.txt -c /workspace/vllm/requirements/xpu.txt --index-strategy unsafe-best-match --extra-index-url ${PIP_EXTRA_INDEX_URL} --python-version ${PYTHON_VERSION} 
-tblib==3.1.0
-pytest-timeout==2.3.1
-pytest-cov==6.3.0
-pytest-forked==1.6.0
-pytest-rerunfailures==14.0
-pytest-shard==0.1.2
-
-arctic-inference==0.1.1
-
-# Required for audio processing tests
-librosa==0.10.2.post1
-audioread==3.0.1
-soxr==0.5.0.post1
-pooch==1.8.2
-soundfile==0.13.1
-
-# Required for Mistral's streaming tool parser
-blobfile==3.0.0
-rapidfuzz==3.12.1
-
-# Required for Mistral's streaming tool parser and some evaluation scripts
-gpt-oss==0.0.8
-schemathesis==3.39.15
-jiwer==4.0.0
-bm25s==0.2.13
-pystemmer==3.0.0
-mteb[bm25s]>=2, <3
-num2words==0.5.14
-pqdm==0.2.0
-
-# Required for some evaluation scripts
-timm==1.0.17
-albumentations==1.4.6
-mistral-common[image,audio]==1.9.1
\ No newline at end of file
diff --git a/requirements/xpu.txt b/requirements/xpu.txt
index 0cddd6dc6abb..1bbf777b5202 100644
--- a/requirements/xpu.txt
+++ b/requirements/xpu.txt
@@ -5,14 +5,16 @@ ray>=2.9
 cmake>=3.26.1
 packaging>=24.2
 setuptools-scm>=8
+setuptools-rust>=1.9.0
 setuptools>=77.0.3,<81.0.0
 wheel
 jinja2>=3.1.6
 datasets # for benchmark scripts
-numba == 0.61.2 # Required for N-gram speculative decoding
+numba == 0.65.0 # Required for N-gram speculative decoding
 --extra-index-url=https://download.pytorch.org/whl/xpu
-torch==2.10.0+xpu
+torch==2.11.0+xpu
 torchaudio
 torchvision
 
-vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.4/vllm_xpu_kernels-0.1.4-cp38-abi3-manylinux_2_28_x86_64.whl
+auto_round_lib>=0.13.0
+vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.8/vllm_xpu_kernels-0.1.8-cp38-abi3-manylinux_2_28_x86_64.whl
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
new file mode 100644
index 000000000000..4933b3ba1707
--- /dev/null
+++ b/rust-toolchain.toml
@@ -0,0 +1,2 @@
+[toolchain]
+channel = "1.95"
diff --git a/rust/.gitattributes b/rust/.gitattributes
new file mode 100644
index 000000000000..2f7173592d7f
--- /dev/null
+++ b/rust/.gitattributes
@@ -0,0 +1,2 @@
+src/chat/src/renderer/deepseek_v32/fixtures/** linguist-generated=true
+src/chat/tests/templates/** linguist-vendored=true
diff --git a/rust/.gitignore b/rust/.gitignore
new file mode 100644
index 000000000000..14f70eb7d43d
--- /dev/null
+++ b/rust/.gitignore
@@ -0,0 +1,3 @@
+/target
+AGENTS.override.md
+.vscode
diff --git a/rust/AGENTS.md b/rust/AGENTS.md
new file mode 100644
index 000000000000..37117c7490bb
--- /dev/null
+++ b/rust/AGENTS.md
@@ -0,0 +1,37 @@
+# Alternative Frontend to vLLM Engine in Rust
+
+This project aims to implement an alternative frontend to the vLLM Engine in Rust, providing a more efficient and robust interface for interacting with the engine. Currently it's still in the very early stage and is actively evolving.
+
+## Coding Styles
+
+- Always use workspace dependencies for Cargo crates.
+- Prefer splitting code into multiple smaller modules and files for better organization and readability, rather than putting everything in a single file.
+- When refactoring or reconstructing code, always preserve the original comments and documentation VERBATIM, if applicable.
+- If not specified, default to writing concise Rust documentation and comments that match the style of the existing codebase when generating code.
+- When migrating code from Python or any other language, preserve the original documentation comments whenever they still make sense in the Rust code.
+- Although you might be asked to only implement or migrate minimal functionality at the beginning, you should still leave necessary `TODO` comments in the code for the future improvements of the lacked features, so that it's easier for the next iteration to build upon the existing codebase.
+- When writing parsers with `winnow`:
+    - Prefer a declarative parser shape over imperative step-by-step parsing, as long as it's more readable and maintainable.
+    - Prefer tuple-based parser composition over calling `parse_next` one parser at a time.
+    - Prefer built-in combinators and token parsers before adding local helpers.
+    - Add short documentation comments like `Parse a ..` to all local parser/combinator functions.
+    - Reuse existing utilities from `utils` module as much as possible, and add new ones there if needed.
+- Rust error handling:
+    - Never call `to_string()` directly on an error value.
+    - Use `ToReportString` or `AsReport` by `thiserror-ext` instead.
+    - For `Error` variants that are primarily free-form text, prefer a struct variant with a `message: String` field. `thiserror_ext::Macro` will auto-derive `foo!(...)` and `bail_foo!(...)` helper macros from that shape.
+        - Use `foo!(...)` when you need to construct an error value inside an expression, such as `Err(foo!(...))`, `.ok_or_else(|| foo!(...))`, or `Err::<(), _>(foo!(...))?`.
+        - Use `bail_foo!(...)` only in statement positions where you want to exit the current `Result`-returning function immediately. Prefer it over `return Err(foo!(...))` in those cases.
+        - If a variant has extra structured fields, prefer the generated macro form `foo!(field = value, "message")` rather than manually writing `Error::Foo { ... }`.
+- Since the project is still in early stage, it's fine to break API and make non-backwards-compatible changes as needed.
+- Currently the project is only targeting Unix-like platforms, so it's fine to use Unix-specific APIs without extra compatibility layers like `cfg(unix)`
+
+## Testing
+
+- Prefer snapshot testing with the `expect-test` crate over writing multiple `assert_eq!` statements on individual fields. Use `expect_test::expect![[...]].assert_debug_eq(...)` to snapshot the `Debug` output of the entire struct.
+    - Write `expect![[""]]` as a placeholder first, then run `UPDATE_EXPECT=1 cargo test` to auto-fill the snapshot content.
+    - For values containing non-deterministic data (e.g., UUIDs), set them to a fixed value like `"<placeholder>"` before snapshotting.
+- In tests, avoid hand-writing full request struct literals when only a few fields matter. Prefer test fixtures such as `for_test()` with struct update syntax, so newly added fields do not force mechanical edits across many tests.
+- Prefer deterministic synchronization in async and integration tests, such as channels, barriers, explicit handshakes, or observable state transitions, instead of `sleep`-based timing assumptions.
+    - Use `sleep` only as a last resort when there is no better observable synchronization point.
+- Always run test with `cargo nextest run` instead of `cargo test`, if available, as it's much faster.
diff --git a/rust/CLAUDE.md b/rust/CLAUDE.md
new file mode 100644
index 000000000000..f96fd34544be
--- /dev/null
+++ b/rust/CLAUDE.md
@@ -0,0 +1,4 @@
+# CLAUDE.md
+
+First, check @AGENTS.override.md if exists.
+Then, follow instructions in @AGENTS.md.
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
new file mode 100644
index 000000000000..16329d1fb3ad
--- /dev/null
+++ b/rust/Cargo.lock
@@ -0,0 +1,6611 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "getrandom 0.3.4",
+ "once_cell",
+ "serde",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "aligned"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee4508988c62edf04abd8d92897fca0c2995d907ce1dfeaf369dac3716a40685"
+dependencies = [
+ "as-slice",
+]
+
+[[package]]
+name = "aligned-vec"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
+dependencies = [
+ "equator",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse 0.2.7",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstream"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
+dependencies = [
+ "anstyle",
+ "anstyle-parse 1.0.0",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-parse"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
+
+[[package]]
+name = "arc-swap"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "arg_enum_proc_macro"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "arrayref"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "as-slice"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "516b6b4f0e40d50dcda9365d53964ec74560ad4284da2e7fc97122cd83174516"
+dependencies = [
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "async-io"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "concurrent-queue",
+ "futures-io",
+ "futures-lite",
+ "parking",
+ "polling",
+ "rustix",
+ "slab",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "async-openai"
+version = "0.33.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc48c3deb4ad9a2ee8c8e364c79eb0f74e69e17ed7e883d55988b90ea44fe986"
+dependencies = [
+ "async-openai-macros",
+ "backoff",
+ "base64 0.22.1",
+ "bytes",
+ "derive_builder",
+ "eventsource-stream",
+ "futures",
+ "getrandom 0.3.4",
+ "hex",
+ "hmac",
+ "rand 0.9.2",
+ "reqwest",
+ "reqwest-eventsource",
+ "secrecy",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sha2",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-stream",
+ "tokio-tungstenite",
+ "tokio-util",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "async-openai-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81872a8e595e8ceceab71c6ba1f9078e313b452a1e31934e6763ef5d308705e4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "asynchronous-codec"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a860072022177f903e59730004fb5dc13db9275b79bb2aef7ba8ce831956c233"
+dependencies = [
+ "bytes",
+ "futures-sink",
+ "futures-util",
+ "memchr",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "asynk-strim"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52697735bdaac441a29391a9e97102c74c6ef0f9b60a40cf109b1b404e29d2f6"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "asynk-strim-attr"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6ccb67be092524ce594e599332719f1cd6d64dcaed8d46f1e8726d466c10bcb"
+dependencies = [
+ "asynk-strim",
+ "asynk-strim-attr-macro",
+ "futures-core",
+]
+
+[[package]]
+name = "asynk-strim-attr-macro"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34e40a2181bb16fb68e25c49c8b3e25bbb9a808bf8f9f83bc596ac4ad70c86a1"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "av-scenechange"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f321d77c20e19b92c39e7471cf986812cbb46659d2af674adc4331ef3f18394"
+dependencies = [
+ "aligned",
+ "anyhow",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "log",
+ "num-rational",
+ "num-traits",
+ "pastey",
+ "rayon",
+ "thiserror 2.0.18",
+ "v_frame",
+ "y4m",
+]
+
+[[package]]
+name = "av1-grain"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cfddb07216410377231960af4fcab838eaa12e013417781b78bd95ee22077f8"
+dependencies = [
+ "anyhow",
+ "arrayvec",
+ "log",
+ "nom 8.0.0",
+ "num-rational",
+ "v_frame",
+]
+
+[[package]]
+name = "avif-serialize"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "375082f007bd67184fb9c0374614b29f9aaa604ec301635f72338bb65386a53d"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "backoff"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
+dependencies = [
+ "futures-core",
+ "getrandom 0.2.17",
+ "instant",
+ "pin-project-lite",
+ "rand 0.8.5",
+ "tokio",
+]
+
+[[package]]
+name = "base64"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "base64ct"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
+
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec 0.6.3",
+]
+
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec 0.8.0",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
+[[package]]
+name = "bit_field"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6"
+
+[[package]]
+name = "bitflags"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+
+[[package]]
+name = "bitstream-io"
+version = "4.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f"
+dependencies = [
+ "no_std_io2",
+]
+
+[[package]]
+name = "blake3"
+version = "1.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce"
+dependencies = [
+ "arrayref",
+ "arrayvec",
+ "cc",
+ "cfg-if",
+ "constant_time_eq",
+ "cpufeatures 0.3.0",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "bstr"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
+dependencies = [
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
+[[package]]
+name = "built"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4ad8f11f288f48ca24471bbd51ac257aaeaaa07adae295591266b792902ae64"
+
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+dependencies = [
+ "bytemuck_derive",
+]
+
+[[package]]
+name = "bytemuck_derive"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "castaway"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
+[[package]]
+name = "chrono"
+version = "0.4.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream 0.6.21",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "color_quant"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "compact_str"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
+dependencies = [
+ "castaway",
+ "cfg-if",
+ "itoa",
+ "rustversion",
+ "ryu",
+ "serde",
+ "static_assertions",
+]
+
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "console"
+version = "0.15.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "console"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "constant_time_eq"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
+
+[[package]]
+name = "cookie"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
+dependencies = [
+ "percent-encoding",
+ "time",
+ "version_check",
+]
+
+[[package]]
+name = "cookie_store"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15b2c103cf610ec6cae3da84a766285b42fd16aad564758459e6ecf128c75206"
+dependencies = [
+ "cookie",
+ "document-features",
+ "idna",
+ "indexmap 2.13.0",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "time",
+ "url",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools 0.10.5",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "daachorse"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36"
+
+[[package]]
+name = "darling"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+dependencies = [
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
+[[package]]
+name = "darling"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
+dependencies = [
+ "darling_core 0.23.0",
+ "darling_macro 0.23.0",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
+dependencies = [
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+dependencies = [
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
+dependencies = [
+ "darling_core 0.23.0",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "dary_heap"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "der"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71fd89660b2dc699704064e59e9dba0147b903e85319429e131620d022be411b"
+dependencies = [
+ "pem-rfc7468",
+ "zeroize",
+]
+
+[[package]]
+name = "deranged"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
+dependencies = [
+ "powerfmt",
+ "serde_core",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling 0.20.11",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "derive_more"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05"
+dependencies = [
+ "derive_more-impl",
+]
+
+[[package]]
+name = "derive_more-impl"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "unicode-xid",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "dirs"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
+dependencies = [
+ "libc",
+ "option-ext",
+ "redox_users",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "dissimilar"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e"
+
+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
+[[package]]
+name = "dtoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
+
+[[package]]
+name = "dyn-clone"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+
+[[package]]
+name = "easy-ext"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8072bec12b909b65aec01fa6518f387cfbf3427d4475409ad622898cd347522c"
+
+[[package]]
+name = "educe"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
+dependencies = [
+ "enum-ordinalize",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "enum-as-inner"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0359ee92f81184d7985519e474bda2a5738476334edd3746c9b1265c067afe70"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "enum-ordinalize"
+version = "4.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0"
+dependencies = [
+ "enum-ordinalize-derive",
+]
+
+[[package]]
+name = "enum-ordinalize-derive"
+version = "4.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "env_filter"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.11.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
+dependencies = [
+ "anstream 1.0.0",
+ "anstyle",
+ "env_filter",
+ "jiff",
+ "log",
+]
+
+[[package]]
+name = "equator"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
+dependencies = [
+ "equator-macro",
+]
+
+[[package]]
+name = "equator-macro"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "esaxx-rs"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "eventsource-stream"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
+dependencies = [
+ "futures-core",
+ "nom 7.1.3",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "expect-test"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63af43ff4431e848fb47472a920f14fa71c24de13255a5692e93d4e90302acb0"
+dependencies = [
+ "dissimilar",
+ "once_cell",
+]
+
+[[package]]
+name = "exr"
+version = "1.74.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4300e043a56aa2cb633c01af81ca8f699a321879a7854d3896a0ba89056363be"
+dependencies = [
+ "bit_field",
+ "half",
+ "lebe",
+ "miniz_oxide",
+ "rayon-core",
+ "smallvec",
+ "zune-inflate",
+]
+
+[[package]]
+name = "fancy-regex"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
+dependencies = [
+ "bit-set 0.5.3",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "fancy-regex"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
+dependencies = [
+ "bit-set 0.8.0",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "fast_image_resize"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12dd43e5011e8d8411a3215a0d57a2ec5c68282fb90eb5d7221fab0113442174"
+dependencies = [
+ "bytemuck",
+ "cfg-if",
+ "document-features",
+ "image",
+ "num-traits",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "fastokens"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "796a262ed47d1458a4b40d0ed831c927e6f54d5b9c1de2683bb4ac9b04f4c7cc"
+dependencies = [
+ "daachorse",
+ "fancy-regex 0.17.0",
+ "hf-hub 0.4.3",
+ "icu_normalizer",
+ "memchr",
+ "pcre2",
+ "rayon",
+ "serde",
+ "serde_json",
+ "strum",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "fax"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab"
+dependencies = [
+ "fax_derive",
+]
+
+[[package]]
+name = "fax_derive"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "fdeflate"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "fixedbitset"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "fslock"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "futures"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-lite"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-timer"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getopts"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "r-efi 5.3.0",
+ "wasip2",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 6.0.0",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "gif"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159"
+dependencies = [
+ "color_quant",
+ "weezl",
+]
+
+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap 2.13.0",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "bytemuck",
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "hf-hub"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
+dependencies = [
+ "dirs",
+ "http",
+ "indicatif 0.17.11",
+ "libc",
+ "log",
+ "rand 0.9.2",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "ureq 2.12.1",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "hf-hub"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef3982638978efa195ff11b305f51f1f22f4f0a6cabee7af79b383ebee6a213"
+dependencies = [
+ "dirs",
+ "futures",
+ "http",
+ "indicatif 0.18.4",
+ "libc",
+ "log",
+ "native-tls",
+ "num_cpus",
+ "rand 0.9.2",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "ureq 3.3.0",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "hmac"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "hound"
+version = "3.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
+
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.27.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
+dependencies = [
+ "http",
+ "hyper",
+ "hyper-util",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls",
+ "tower-service",
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "hyper-timeout"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
+dependencies = [
+ "hyper",
+ "hyper-util",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
+[[package]]
+name = "hyper-tls"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
+dependencies = [
+ "bytes",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "socket2",
+ "system-configuration",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "windows-registry",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "image"
+version = "0.25.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104"
+dependencies = [
+ "bytemuck",
+ "byteorder-lite",
+ "color_quant",
+ "exr",
+ "gif",
+ "image-webp",
+ "moxcms",
+ "num-traits",
+ "png",
+ "qoi",
+ "ravif",
+ "rayon",
+ "rgb",
+ "tiff",
+ "zune-core",
+ "zune-jpeg",
+]
+
+[[package]]
+name = "image-webp"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3"
+dependencies = [
+ "byteorder-lite",
+ "quick-error",
+]
+
+[[package]]
+name = "imgref"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8"
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+ "serde",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "indicatif"
+version = "0.17.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+dependencies = [
+ "console 0.15.11",
+ "number_prefix",
+ "portable-atomic",
+ "unicode-width",
+ "web-time",
+]
+
+[[package]]
+name = "indicatif"
+version = "0.18.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb"
+dependencies = [
+ "console 0.16.2",
+ "portable-atomic",
+ "unicode-width",
+ "unit-prefix",
+ "web-time",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "interpolate_name"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
+
+[[package]]
+name = "iri-string"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
+[[package]]
+name = "is-macro"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "jiff"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359"
+dependencies = [
+ "jiff-static",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde_core",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lalrpop-util"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "lebe"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
+
+[[package]]
+name = "libc"
+version = "0.2.183"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d"
+dependencies = [
+ "arbitrary",
+ "cc",
+]
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "libredox"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
+[[package]]
+name = "llm-multimodal"
+version = "1.5.0"
+source = "git+https://github.com/vllm-project/llm-multimodal?rev=5b558989844d1c7af3e43d0f604069ffd9c06320#5b558989844d1c7af3e43d0f604069ffd9c06320"
+dependencies = [
+ "base64 0.22.1",
+ "blake3",
+ "bytes",
+ "fast_image_resize",
+ "image",
+ "ndarray 0.17.2",
+ "once_cell",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "url",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "loop9"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
+dependencies = [
+ "imgref",
+]
+
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
+[[package]]
+name = "macro_rules_attribute"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520"
+dependencies = [
+ "macro_rules_attribute-proc_macro",
+ "paste",
+]
+
+[[package]]
+name = "macro_rules_attribute-proc_macro"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
+
+[[package]]
+name = "malachite"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fbdf9cb251732db30a7200ebb6ae5d22fe8e11397364416617d2c2cf0c51cb5"
+dependencies = [
+ "malachite-base",
+ "malachite-nz",
+ "malachite-q",
+]
+
+[[package]]
+name = "malachite-base"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ea0ed76adf7defc1a92240b5c36d5368cfe9251640dcce5bd2d0b7c1fd87aeb"
+dependencies = [
+ "hashbrown 0.14.5",
+ "itertools 0.11.0",
+ "libm",
+ "ryu",
+]
+
+[[package]]
+name = "malachite-bigint"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d149aaa2965d70381709d9df4c7ee1fc0de1c614a4efc2ee356f5e43d68749f8"
+dependencies = [
+ "derive_more",
+ "malachite",
+ "num-integer",
+ "num-traits",
+ "paste",
+]
+
+[[package]]
+name = "malachite-nz"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34a79feebb2bc9aa7762047c8e5495269a367da6b5a90a99882a0aeeac1841f7"
+dependencies = [
+ "itertools 0.11.0",
+ "libm",
+ "malachite-base",
+]
+
+[[package]]
+name = "malachite-q"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f235d5747b1256b47620f5640c2a17a88c7569eebdf27cd9cb130e1a619191"
+dependencies = [
+ "itertools 0.11.0",
+ "malachite-base",
+ "malachite-nz",
+]
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
+[[package]]
+name = "matrixmultiply"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
+dependencies = [
+ "autocfg",
+ "rawpointer",
+]
+
+[[package]]
+name = "maybe-rayon"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
+dependencies = [
+ "cfg-if",
+ "rayon",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "memo-map"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mime_guess"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
+name = "minijinja"
+version = "2.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "328251e58ad8e415be6198888fc207502727dc77945806421ab34f35bf012e7d"
+dependencies = [
+ "memo-map",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "minijinja-contrib"
+version = "2.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c6302e47d2b51f9fc978268ff7f5a014de5caa2ad48440309fd10ee711480d7"
+dependencies = [
+ "minijinja",
+ "serde",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "monostate"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67"
+dependencies = [
+ "monostate-impl",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "monostate-impl"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "moxcms"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b"
+dependencies = [
+ "num-traits",
+ "pxfm",
+]
+
+[[package]]
+name = "multimap"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
+
+[[package]]
+name = "native-tls"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2"
+dependencies = [
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "ndarray"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
+dependencies = [
+ "matrixmultiply",
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "portable-atomic",
+ "portable-atomic-util",
+ "rawpointer",
+]
+
+[[package]]
+name = "ndarray"
+version = "0.17.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
+dependencies = [
+ "matrixmultiply",
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "portable-atomic",
+ "portable-atomic-util",
+ "rawpointer",
+]
+
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+
+[[package]]
+name = "no_std_io2"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nom"
+version = "8.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "noop_proc_macro"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "onig"
+version = "6.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
+dependencies = [
+ "bitflags",
+ "libc",
+ "once_cell",
+ "onig_sys",
+]
+
+[[package]]
+name = "onig_sys"
+version = "69.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
+
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "openai-harmony"
+version = "0.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e77e82af451fc95deeb728a40b84db8ee82d341e136c268de415123a560b9b72"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bstr",
+ "clap",
+ "fancy-regex 0.13.0",
+ "futures",
+ "image",
+ "regex",
+ "reqwest",
+ "rustc-hash 1.1.0",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "sha1",
+ "sha2",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "openai-protocol"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b8d41ed865b7a26b6b2d2a519b774460e5ddc50eeeb4ac2a2409c8817ec2de9"
+dependencies = [
+ "bitflags",
+ "chrono",
+ "rand 0.9.2",
+ "schemars 0.8.22",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "tokio",
+ "tracing",
+ "url",
+ "validator",
+]
+
+[[package]]
+name = "openssl"
+version = "0.10.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
+[[package]]
+name = "openssl-src"
+version = "300.5.5+3.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f1787d533e03597a7934fd0a765f0d28e94ecc5fb7789f8053b1e699a56f709"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.112"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb"
+dependencies = [
+ "cc",
+ "libc",
+ "openssl-src",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "option-ext"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+
+[[package]]
+name = "parking"
+version = "2.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pastey"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
+
+[[package]]
+name = "pcre2"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e970b0fcce0c7ee6ef662744ff711f21ccd6f11b7cf03cd187a80e89797fc67"
+dependencies = [
+ "libc",
+ "log",
+ "pcre2-sys",
+]
+
+[[package]]
+name = "pcre2-sys"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18b9073c1a2549bd409bf4a32c94d903bb1a09bf845bc306ae148897fa0760a4"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "pem-rfc7468"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6305423e0e7738146434843d1694d621cce767262b2a86910beab705e4493d9"
+dependencies = [
+ "base64ct",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "petgraph"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
+dependencies = [
+ "fixedbitset",
+ "hashbrown 0.15.5",
+ "indexmap 2.13.0",
+]
+
+[[package]]
+name = "phf"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
+dependencies = [
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
+dependencies = [
+ "phf_shared",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "pin-project"
+version = "1.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "png"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
+dependencies = [
+ "bitflags",
+ "crc32fast",
+ "fdeflate",
+ "flate2",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "polling"
+version = "3.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218"
+dependencies = [
+ "cfg-if",
+ "concurrent-queue",
+ "hermit-abi",
+ "pin-project-lite",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "primal-check"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08"
+dependencies = [
+ "num-integer",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro-error-attr2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "proc-macro-error2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
+dependencies = [
+ "proc-macro-error-attr2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "profiling"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773"
+dependencies = [
+ "profiling-procmacros",
+]
+
+[[package]]
+name = "profiling-procmacros"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
+dependencies = [
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "prometheus-client"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4500adecd7af8e0e9f4dbce15cfee07ce913fbf6ad605cc468b83f2d531ee94"
+dependencies = [
+ "dtoa",
+ "itoa",
+ "parking_lot",
+ "prometheus-client-derive-encode",
+]
+
+[[package]]
+name = "prometheus-client-derive-encode"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9adf1691c04c0a5ff46ff8f262b58beb07b0dbb61f96f9f54f6cbd82106ed87f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "prost"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
+dependencies = [
+ "heck",
+ "itertools 0.14.0",
+ "log",
+ "multimap",
+ "petgraph",
+ "prettyplease",
+ "prost",
+ "prost-types",
+ "pulldown-cmark",
+ "pulldown-cmark-to-cmark",
+ "regex",
+ "syn 2.0.117",
+ "tempfile",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
+dependencies = [
+ "anyhow",
+ "itertools 0.14.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7"
+dependencies = [
+ "prost",
+]
+
+[[package]]
+name = "pulldown-cmark"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
+dependencies = [
+ "bitflags",
+ "memchr",
+ "unicase",
+]
+
+[[package]]
+name = "pulldown-cmark-to-cmark"
+version = "22.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90"
+dependencies = [
+ "pulldown-cmark",
+]
+
+[[package]]
+name = "pxfm"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f"
+
+[[package]]
+name = "qoi"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
+[[package]]
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash 2.1.1",
+ "rustls",
+ "socket2",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.4",
+ "lru-slab",
+ "rand 0.9.2",
+ "ring",
+ "rustc-hash 2.1.1",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.18",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "rav1e"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43b6dd56e85d9483277cde964fd1bdb0428de4fec5ebba7540995639a21cb32b"
+dependencies = [
+ "aligned-vec",
+ "arbitrary",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "av-scenechange",
+ "av1-grain",
+ "bitstream-io",
+ "built",
+ "cfg-if",
+ "interpolate_name",
+ "itertools 0.14.0",
+ "libc",
+ "libfuzzer-sys",
+ "log",
+ "maybe-rayon",
+ "new_debug_unreachable",
+ "noop_proc_macro",
+ "num-derive",
+ "num-traits",
+ "paste",
+ "profiling",
+ "rand 0.9.2",
+ "rand_chacha 0.9.0",
+ "simd_helpers",
+ "thiserror 2.0.18",
+ "v_frame",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "ravif"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e52310197d971b0f5be7fe6b57530dcd27beb35c1b013f29d66c1ad73fbbcc45"
+dependencies = [
+ "avif-serialize",
+ "imgref",
+ "loop9",
+ "quick-error",
+ "rav1e",
+ "rayon",
+ "rgb",
+]
+
+[[package]]
+name = "rawpointer"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
+
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-cond"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
+dependencies = [
+ "either",
+ "itertools 0.14.0",
+ "rayon",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "realfft"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f821338fddb99d089116342c46e9f1fbf3828dba077674613e734e01d6ea8677"
+dependencies = [
+ "rustfft",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "redox_users"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
+dependencies = [
+ "getrandom 0.2.17",
+ "libredox",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "ref-cast"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
+dependencies = [
+ "ref-cast-impl",
+]
+
+[[package]]
+name = "ref-cast-impl"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "reqwest"
+version = "0.12.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "encoding_rs",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-tls",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "mime",
+ "mime_guess",
+ "native-tls",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-rustls",
+ "tokio-util",
+ "tower",
+ "tower-http",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-streams",
+ "web-sys",
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "reqwest-eventsource"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
+dependencies = [
+ "eventsource-stream",
+ "futures-core",
+ "futures-timer",
+ "mime",
+ "nom 7.1.3",
+ "pin-project-lite",
+ "reqwest",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "rgb"
+version = "0.8.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.17",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "riptoken"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "196b37c4dd48f99b51e8aeaa3d0c343df62c7bb5b66cd6735aa072524fbe8665"
+dependencies = [
+ "fancy-regex 0.17.0",
+ "rayon",
+ "regex",
+ "regex-automata",
+ "rustc-hash 2.1.1",
+]
+
+[[package]]
+name = "rmp"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "rmp-serde"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155"
+dependencies = [
+ "rmp",
+ "serde",
+]
+
+[[package]]
+name = "rmpv"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a4e1d4b9b938a26d2996af33229f0ca0956c652c1375067f0b45291c1df8417"
+dependencies = [
+ "rmp",
+ "serde",
+ "serde_bytes",
+]
+
+[[package]]
+name = "rubato"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5258099699851cfd0082aeb645feb9c084d9a5e1f1b8d5372086b989fc5e56a1"
+dependencies = [
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "realfft",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
+[[package]]
+name = "rustfft"
+version = "6.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89"
+dependencies = [
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "primal-check",
+ "strength_reduce",
+ "transpose",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustls"
+version = "0.23.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
+dependencies = [
+ "log",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
+dependencies = [
+ "web-time",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+dependencies = [
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
+[[package]]
+name = "rustpython-ast"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5"
+dependencies = [
+ "is-macro",
+ "malachite-bigint",
+ "rustpython-parser-core",
+ "static_assertions",
+]
+
+[[package]]
+name = "rustpython-parser"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb"
+dependencies = [
+ "anyhow",
+ "is-macro",
+ "itertools 0.11.0",
+ "lalrpop-util",
+ "log",
+ "malachite-bigint",
+ "num-traits",
+ "phf",
+ "phf_codegen",
+ "rustc-hash 1.1.0",
+ "rustpython-ast",
+ "rustpython-parser-core",
+ "tiny-keccak",
+ "unic-emoji-char",
+ "unic-ucd-ident",
+ "unicode_names2",
+]
+
+[[package]]
+name = "rustpython-parser-core"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad"
+dependencies = [
+ "is-macro",
+ "memchr",
+ "rustpython-parser-vendored",
+]
+
+[[package]]
+name = "rustpython-parser-vendored"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6"
+dependencies = [
+ "memchr",
+ "once_cell",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "saa"
+version = "5.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16c7f49c9d5caa3bf4b3106900484b447b9253fe99670ceb81cb6cb5027855e1"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scc"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
+dependencies = [
+ "sdd 3.0.10",
+]
+
+[[package]]
+name = "scc"
+version = "3.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45bb5ce9efd4a6e7b0f86c2697fe4c1d78d1f4e6d988c54b752d577cafe22fe8"
+dependencies = [
+ "saa",
+ "sdd 4.7.3",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "schemars"
+version = "0.8.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
+dependencies = [
+ "dyn-clone",
+ "schemars_derive",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars_derive"
+version = "0.8.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "sdd"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
+
+[[package]]
+name = "sdd"
+version = "4.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b21a75f5913ab130e4b369fb8693be25f29b983e2ecad4279df9bfa5dd8aaf3e"
+
+[[package]]
+name = "secrecy"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
+dependencies = [
+ "serde",
+ "zeroize",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
+dependencies = [
+ "bitflags",
+ "core-foundation 0.10.1",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde-json-fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4a33b7a5f52a26d520099339add40c48baf2e5ada194c8cc1b18cafa2b5e419"
+dependencies = [
+ "serde",
+ "serde_json",
+ "smartstring",
+]
+
+[[package]]
+name = "serde_bytes"
+version = "0.11.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
+dependencies = [
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_default"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "486b028b311aaaea83e0ba65a3e6e3cbef381e74e9d0bd6263faefd1fb503c1d"
+dependencies = [
+ "darling 0.20.11",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_derive_internals"
+version = "0.29.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "indexmap 2.13.0",
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_repr"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_tuple"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6af196b9c06f0aa5555ab980c01a2527b0f67517da8d68b1731b9d4764846a6f"
+dependencies = [
+ "serde",
+ "serde_tuple_macros",
+]
+
+[[package]]
+name = "serde_tuple_macros"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec3a1e7d2eadec84deabd46ae061bf480a91a6bce74d25dad375bd656f2e19d8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_with"
+version = "3.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f"
+dependencies = [
+ "base64 0.22.1",
+ "chrono",
+ "hex",
+ "indexmap 1.9.3",
+ "indexmap 2.13.0",
+ "schemars 0.9.0",
+ "schemars 1.2.1",
+ "serde_core",
+ "serde_json",
+ "serde_with_macros",
+ "time",
+]
+
+[[package]]
+name = "serde_with_macros"
+version = "3.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65"
+dependencies = [
+ "darling 0.23.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serial_test"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
+dependencies = [
+ "fslock",
+ "futures-executor",
+ "futures-util",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "scc 2.4.0",
+ "serial_test_derive",
+]
+
+[[package]]
+name = "serial_test_derive"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "digest",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "digest",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "simd_helpers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
+dependencies = [
+ "quote",
+]
+
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "smartstring"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
+dependencies = [
+ "autocfg",
+ "static_assertions",
+ "version_check",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "socks"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
+dependencies = [
+ "byteorder",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "spm_precompiled"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
+dependencies = [
+ "base64 0.13.1",
+ "nom 7.1.3",
+ "serde",
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "strength_reduce"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros",
+]
+
+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "subenum"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec3d08fe7078c57309d5c3d938e50eba95ba1d33b9c3a101a8465fc6861a5416"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "system-configuration"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
+dependencies = [
+ "bitflags",
+ "core-foundation 0.9.4",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "task-local"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2972044a9e5e448a506a7ff6f0d03b566d8ef4cd6918a58fc59835a0f8666626"
+dependencies = [
+ "pin-project-lite",
+]
+
+[[package]]
+name = "tekken-rs"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49623843103837f53f7ebe8cfafc19ccff28ff0e15e7c4b9f6ad21e36fbfde3a"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "env_logger",
+ "hound",
+ "log",
+ "ndarray 0.16.1",
+ "regex",
+ "rubato",
+ "rustc-hash 1.1.0",
+ "rustfft",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tiktoken-rs 0.7.0",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-ext"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fb7e61141f4141832ca9aad63c3c90023843f944a1975460abdacc64d03f534"
+dependencies = [
+ "thiserror 2.0.18",
+ "thiserror-ext-derive",
+]
+
+[[package]]
+name = "thiserror-ext-derive"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b5042dd3b562d1d57711be902006a0003fa2781b81d5b2bec07416be31586ff"
+dependencies = [
+ "either",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "tiff"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52"
+dependencies = [
+ "fax",
+ "flate2",
+ "half",
+ "quick-error",
+ "weezl",
+ "zune-jpeg",
+]
+
+[[package]]
+name = "tiktoken-rs"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bstr",
+ "fancy-regex 0.13.0",
+ "lazy_static",
+ "regex",
+ "rustc-hash 1.1.0",
+]
+
+[[package]]
+name = "tiktoken-rs"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bstr",
+ "fancy-regex 0.13.0",
+ "lazy_static",
+ "regex",
+ "rustc-hash 1.1.0",
+]
+
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokenizers"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223"
+dependencies = [
+ "ahash",
+ "aho-corasick",
+ "compact_str",
+ "dary_heap",
+ "derive_builder",
+ "esaxx-rs",
+ "getrandom 0.3.4",
+ "indicatif 0.18.4",
+ "itertools 0.14.0",
+ "log",
+ "macro_rules_attribute",
+ "monostate",
+ "onig",
+ "paste",
+ "rand 0.9.2",
+ "rayon",
+ "rayon-cond",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_json",
+ "spm_precompiled",
+ "thiserror 2.0.18",
+ "unicode-normalization-alignments",
+ "unicode-segmentation",
+ "unicode_categories",
+]
+
+[[package]]
+name = "tokio"
+version = "1.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-io",
+ "futures-sink",
+ "futures-util",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.25.11+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b"
+dependencies = [
+ "indexmap 2.13.0",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "tonic"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec"
+dependencies = [
+ "async-trait",
+ "axum",
+ "base64 0.22.1",
+ "bytes",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-timeout",
+ "hyper-util",
+ "percent-encoding",
+ "pin-project",
+ "socket2",
+ "sync_wrapper",
+ "tokio",
+ "tokio-stream",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tonic-build"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1882ac3bf5ef12877d7ed57aad87e75154c11931c2ba7e6cde5e22d63522c734"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tonic-prost"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309"
+dependencies = [
+ "bytes",
+ "prost",
+ "tonic",
+]
+
+[[package]]
+name = "tonic-prost-build"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3144df636917574672e93d0f56d7edec49f90305749c668df5101751bb8f95a"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "prost-types",
+ "quote",
+ "syn 2.0.117",
+ "tempfile",
+ "tonic-build",
+]
+
+[[package]]
+name = "tool-parser"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bdcf0aa96d42cfc2ecc8e7b3c10598b9c1a6052f996b5ab574dec72f483d87c"
+dependencies = [
+ "async-trait",
+ "num-traits",
+ "openai-protocol",
+ "parking_lot",
+ "regex",
+ "rustpython-parser",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "indexmap 2.13.0",
+ "pin-project-lite",
+ "slab",
+ "sync_wrapper",
+ "tokio",
+ "tokio-util",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "iri-string",
+ "pin-project-lite",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "futures",
+ "futures-task",
+ "pin-project",
+ "tracing",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "trait-set"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b79e2e9c9ab44c6d7c20d5976961b47e8f49ac199154daa514b77cd1ab536625"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "transpose"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e"
+dependencies = [
+ "num-integer",
+ "strength_reduce",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "tungstenite"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442"
+dependencies = [
+ "bytes",
+ "log",
+ "rand 0.9.2",
+ "thiserror 2.0.18",
+ "utf-8",
+]
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+
+[[package]]
+name = "unic-char-property"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
+dependencies = [
+ "unic-char-range",
+]
+
+[[package]]
+name = "unic-char-range"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
+
+[[package]]
+name = "unic-common"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
+
+[[package]]
+name = "unic-emoji-char"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
+dependencies = [
+ "unic-char-property",
+ "unic-char-range",
+ "unic-ucd-version",
+]
+
+[[package]]
+name = "unic-ucd-ident"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987"
+dependencies = [
+ "unic-char-property",
+ "unic-char-range",
+ "unic-ucd-version",
+]
+
+[[package]]
+name = "unic-ucd-version"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
+dependencies = [
+ "unic-common",
+]
+
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-normalization-alignments"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
+dependencies = [
+ "smallvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
+[[package]]
+name = "unicode_names2"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd"
+dependencies = [
+ "phf",
+ "unicode_names2_generator",
+]
+
+[[package]]
+name = "unicode_names2_generator"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e"
+dependencies = [
+ "getopts",
+ "log",
+ "phf_codegen",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "unit-prefix"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3"
+
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "ureq"
+version = "2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
+dependencies = [
+ "base64 0.22.1",
+ "flate2",
+ "log",
+ "once_cell",
+ "rustls",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "socks",
+ "url",
+ "webpki-roots 0.26.11",
+]
+
+[[package]]
+name = "ureq"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0"
+dependencies = [
+ "base64 0.22.1",
+ "cookie_store",
+ "der",
+ "flate2",
+ "log",
+ "native-tls",
+ "percent-encoding",
+ "rustls",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "socks",
+ "ureq-proto",
+ "utf8-zero",
+ "webpki-root-certs",
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "ureq-proto"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c"
+dependencies = [
+ "base64 0.22.1",
+ "http",
+ "httparse",
+ "log",
+]
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8-zero"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "uuid"
+version = "1.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
+dependencies = [
+ "getrandom 0.4.2",
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "v_frame"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2"
+dependencies = [
+ "aligned-vec",
+ "num-traits",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "validator"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa"
+dependencies = [
+ "idna",
+ "once_cell",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "url",
+ "validator_derive",
+]
+
+[[package]]
+name = "validator_derive"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca"
+dependencies = [
+ "darling 0.20.11",
+ "once_cell",
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "vllm-chat"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "asynk-strim-attr",
+ "bytes",
+ "clap",
+ "easy-ext",
+ "expect-test",
+ "futures",
+ "half",
+ "itertools 0.14.0",
+ "llm-multimodal",
+ "minijinja",
+ "minijinja-contrib",
+ "openai-harmony",
+ "reqwest",
+ "rmp-serde",
+ "serde",
+ "serde-json-fmt",
+ "serde_json",
+ "serde_with",
+ "serial_test",
+ "subenum",
+ "tempfile",
+ "thiserror 2.0.18",
+ "thiserror-ext",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+ "trait-set",
+ "uuid",
+ "vllm-engine-core-client",
+ "vllm-llm",
+ "vllm-reasoning-parser",
+ "vllm-text",
+ "vllm-tokenizer",
+ "vllm-tool-parser",
+ "zeromq",
+]
+
+[[package]]
+name = "vllm-cmd"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "educe",
+ "expect-test",
+ "itertools 0.14.0",
+ "native-tls",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "thiserror-ext",
+ "time",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+ "vllm-engine-core-client",
+ "vllm-managed-engine",
+ "vllm-server",
+]
+
+[[package]]
+name = "vllm-engine-core-client"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "arc-swap",
+ "bytemuck",
+ "byteorder",
+ "bytes",
+ "clap",
+ "easy-ext",
+ "enum-as-inner",
+ "expect-test",
+ "futures",
+ "half",
+ "hex",
+ "itertools 0.14.0",
+ "parking_lot",
+ "rmp-serde",
+ "rmpv",
+ "serde",
+ "serde_default",
+ "serde_json",
+ "serde_repr",
+ "serde_tuple",
+ "serde_with",
+ "task-local",
+ "tempfile",
+ "thiserror 2.0.18",
+ "thiserror-ext",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "tracing-subscriber",
+ "vllm-metrics",
+ "zeromq",
+]
+
+[[package]]
+name = "vllm-llm"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bytes",
+ "clap",
+ "easy-ext",
+ "enum-as-inner",
+ "expect-test",
+ "futures",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+ "vllm-engine-core-client",
+ "vllm-metrics",
+ "zeromq",
+]
+
+[[package]]
+name = "vllm-managed-engine"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "expect-test",
+ "libc",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "vllm-metrics"
+version = "0.1.0"
+dependencies = [
+ "prometheus-client",
+]
+
+[[package]]
+name = "vllm-reasoning-parser"
+version = "0.1.0"
+dependencies = [
+ "thiserror 2.0.18",
+ "vllm-tokenizer",
+]
+
+[[package]]
+name = "vllm-server"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-openai",
+ "asynk-strim-attr",
+ "axum",
+ "bytes",
+ "clap",
+ "expect-test",
+ "futures",
+ "http-body",
+ "itertools 0.14.0",
+ "libc",
+ "llm-multimodal",
+ "prost",
+ "prost-types",
+ "rmp-serde",
+ "rmpv",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "serial_test",
+ "socket2",
+ "thiserror-ext",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tonic",
+ "tonic-prost",
+ "tonic-prost-build",
+ "tower",
+ "tower-http",
+ "tracing",
+ "tracing-futures",
+ "tracing-subscriber",
+ "uuid",
+ "validator",
+ "vllm-chat",
+ "vllm-engine-core-client",
+ "vllm-llm",
+ "vllm-metrics",
+ "vllm-text",
+ "zeromq",
+]
+
+[[package]]
+name = "vllm-text"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "asynk-strim-attr",
+ "easy-ext",
+ "enum-as-inner",
+ "expect-test",
+ "futures",
+ "hf-hub 0.5.0",
+ "itertools 0.14.0",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "tempfile",
+ "thiserror 2.0.18",
+ "thiserror-ext",
+ "tokio",
+ "tracing",
+ "trait-set",
+ "vllm-engine-core-client",
+ "vllm-llm",
+ "vllm-tokenizer",
+]
+
+[[package]]
+name = "vllm-tokenizer"
+version = "0.1.0"
+dependencies = [
+ "base64 0.22.1",
+ "criterion",
+ "fastokens",
+ "hf-hub 0.5.0",
+ "riptoken",
+ "rustc-hash 1.1.0",
+ "serde",
+ "serde_json",
+ "tekken-rs",
+ "tempfile",
+ "thiserror 2.0.18",
+ "thiserror-ext",
+ "tiktoken-rs 0.9.1",
+ "tokenizers",
+ "tracing",
+]
+
+[[package]]
+name = "vllm-tool-parser"
+version = "0.1.0"
+dependencies = [
+ "criterion",
+ "expect-test",
+ "futures",
+ "openai-protocol",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "thiserror-ext",
+ "tool-parser",
+ "winnow",
+]
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "js-sys",
+ "once_cell",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap 2.13.0",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap 2.13.0",
+ "semver",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "webpki-root-certs"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "0.26.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
+dependencies = [
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "weezl"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
+
+[[package]]
+name = "win_uds"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd30a1a28a3799479cbf4e17284a220ea9ff6bad098a9d0224543a5d1efe1da"
+dependencies = [
+ "async-io",
+ "futures-io",
+ "socket2",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-registry"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
+dependencies = [
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "winnow"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap 2.13.0",
+ "prettyplease",
+ "syn 2.0.117",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap 2.13.0",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap 2.13.0",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "y4m"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448"
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
+[[package]]
+name = "zeromq"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efb2c254fd8f366755335c9e43b865f8484fe3bd717d65ffe7c3f28852863030"
+dependencies = [
+ "async-trait",
+ "asynchronous-codec",
+ "bytes",
+ "crossbeam-queue",
+ "futures",
+ "log",
+ "num-traits",
+ "once_cell",
+ "parking_lot",
+ "rand 0.9.2",
+ "regex",
+ "scc 3.6.9",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-util",
+ "uuid",
+ "win_uds",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zune-core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9"
+
+[[package]]
+name = "zune-inflate"
+version = "0.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296"
+dependencies = [
+ "zune-core",
+]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
index 000000000000..89e582dbc56c
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,129 @@
+[workspace]
+members = [
+    "src/chat",
+    "src/cmd",
+    "src/engine-core-client",
+    "src/llm",
+    "src/managed-engine",
+    "src/metrics",
+    "src/reasoning-parser",
+    "src/server",
+    "src/text",
+    "src/tokenizer",
+    "src/tool-parser",
+]
+resolver = "3"
+
+[workspace.package]
+version = "0.1.0"
+edition = "2024"
+license = "Apache-2.0"
+
+[workspace.dependencies]
+anyhow = "1.0.100"
+arc-swap = "1.9.0"
+async-openai = "0.33.1"
+async-trait = "0.1.89"
+asynk-strim-attr = "0.1.0"
+axum = "0.8.8"
+base64 = "0.22.1"
+bytemuck = { version = "1.25.0", features = ["extern_crate_alloc"] }
+byteorder = "1.5.0"
+bytes = "1.11.1"
+clap = { version = "4.5.38", features = ["derive", "env"] }
+criterion = "0.5.1"
+easy-ext = "1.0.3"
+educe = "0.6.0"
+enum-as-inner = "0.7.0"
+expect-test = "1.5.1"
+fastokens = "0.2.0"
+futures = "0.3.31"
+half = { version = "2.7.1", features = ["bytemuck"] }
+hex = "0.4.3"
+hf-hub = { version = "0.5.0", features = ["tokio"] }
+http-body = "1.0.1"
+itertools = "0.14.0"
+libc = "0.2.177"
+llm-multimodal = { git = "https://github.com/vllm-project/llm-multimodal", rev = "5b558989844d1c7af3e43d0f604069ffd9c06320" }
+minijinja = { version = "2.0", features = ["unstable_machinery", "json", "builtins", "loader", "loop_controls"] }
+minijinja-contrib = { version = "2.0", features = ["pycompat"] }
+native-tls-vendored = { package = "native-tls", version = "0.2.18", features = ["vendored"] }
+ndarray = { version = "0.16.1", features = ["serde"] }
+openai-harmony = "0.0.8"
+openai-protocol = "1.6.0"
+parking_lot = "0.12.5"
+prometheus-client = "0.24.0"
+prometheus-client-derive-encode = "0.5.0"
+prost = "0.14.3"
+prost-types = "0.14.3"
+reasoning-parser = "1.2.2"
+reqwest = { version = "0.12.8", default-features = false, features = ["rustls-tls"] }
+riptoken = { version = "0.3.0", default-features = false }
+rmp-serde = "1.3.1"
+rmpv = { version = "1.3.1", features = ["with-serde"] }
+rustc-hash = "1.1.0"
+serde = { version = "1.0.228", features = ["derive"] }
+serde-json-fmt = "0.1.0"
+serde_default = "0.2.0"
+serde_json = "1.0.145"
+serde_repr = "0.1.20"
+serde_tuple = "1.1.3"
+serde_with = "3.18.0"
+serial_test = "3.2.0"
+socket2 = "0.6.3"
+subenum = "1.1.3"
+task-local = "0.1.1"
+tekken = { package = "tekken-rs", version = "0.1.1", default-features = false }
+tempfile = "3.23.0"
+thiserror = "2.0.16"
+thiserror-ext = "0.3.0"
+tiktoken-rs = "0.9.1"
+time = { version = "0.3.47", features = ["formatting", "local-offset", "macros"] }
+tokenizers = "0.22.0"
+tokio = { version = "1.47.1", features = [
+    "macros",
+    "net",
+    "rt-multi-thread",
+    "sync",
+    "time",
+] }
+tokio-stream = "0.1"
+tokio-util = { version = "0.7.18", features = ["rt"] }
+tonic = "0.14.5"
+tonic-build = "0.14.5"
+tonic-prost = "0.14.5"
+tonic-prost-build = "0.14.5"
+tool-parser = "1.2.0"
+tower = { version = "0.5.3", features = ["util"] }
+tower-http = { version = "0.6.8", features = ["trace"] }
+tracing = { version = "0.1.44", features = ["release_max_level_debug"] }
+tracing-futures = { version = "0.2.5", features = ["futures-03"] }
+tracing-subscriber = { version = "0.3.20", features = ["env-filter", "fmt"] }
+trait-set = "0.3.0"
+uuid = { version = "1.22.0", features = ["v4"] }
+validator = { version = "0.20.0", features = ["derive"] }
+vllm-chat = { path = "src/chat" }
+vllm-engine-core-client = { path = "src/engine-core-client" }
+vllm-llm = { path = "src/llm" }
+vllm-managed-engine = { path = "src/managed-engine" }
+vllm-metrics = { path = "src/metrics" }
+vllm-reasoning-parser = { path = "src/reasoning-parser" }
+vllm-server = { path = "src/server" }
+vllm-text = { path = "src/text" }
+vllm-tokenizer = { path = "src/tokenizer" }
+vllm-tool-parser = { path = "src/tool-parser" }
+winnow = "1.0.2"
+zeromq = { version = "0.6.0", default-features = false, features = [
+    "tokio-runtime",
+    "all-transport",
+] }
+
+[workspace.lints.clippy]
+too_many_arguments = "allow"
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+lto = "thin"
+panic = "abort"
diff --git a/rust/README.md b/rust/README.md
new file mode 100644
index 000000000000..679a7f0966e2
--- /dev/null
+++ b/rust/README.md
@@ -0,0 +1,89 @@
+# vllm-frontend-rs
+
+This is a Rust drop-in alternative frontend for vLLM. The current goal is to rebuild the northbound serving layer in Rust while still talking to the core Python vLLM engine process(es) via ZMQ over the existing engine boundary.
+
+It should still be considered experimental, and is not feature-complete. We are working to add more functionality from the python front-end.
+
+See <https://github.com/Inferact/vllm-frontend-rs> for the original commit history before it was moved into the main vllm repo.
+
+## Architecture
+
+The component is organized as a Cargo workspace with several crates, layered bottom-up:
+
+```text
+┌─────────────────────────────────┐
+│  vllm-cmd / vllm-rs             │  CLI entrypoint:
+│                                 │  Python vLLM frontend subprocess
+│                                 │  Rust managed-engine serve mode
+├─────────────────────────────────┤
+│  vllm-server                    │  OpenAI-compatible HTTP API (axum)
+├─────────────────────────────────┤
+│  vllm-chat                      │  Chat completions: template rendering,
+│                                 │  structured assistant events,
+│                                 │  reasoning & tool parsing
+├─────────────────────────────────┤
+│  vllm-text                      │  Tokenizer & incremental detokenizer
+├─────────────────────────────────┤
+│  vllm-llm                       │  Thin token-in/token-out facade over
+│                                 │  the engine client
+├─────────────────────────────────┤
+│  vllm-engine-core-client        │  ZMQ transport + MessagePack protocol
+│                                 │  for the headless vLLM engine
+└─────────────────────────────────┘
+```
+
+`vllm-rs` integrates into Python `vllm` as a Rust frontend subprocess.
+Python owns process startup and launches the Rust API server as a Python-supervised worker, while
+passing the inherited listening socket and transport addresses into `vllm-rs`.
+
+For example:
+
+```bash
+VLLM_USE_RUST_FRONTEND=1 vllm serve Qwen/Qwen3-0.6B
+```
+
+### External Engine
+
+`vllm-rs serve` can be run standalone with `--data-parallel-size-local 0` when the Python engines
+are started elsewhere and this node should run only the Rust frontend. The frontend still uses
+the global `--data-parallel-size` to determine how many engines it expects to join the shared handshake.
+
+```bash
+vllm serve Qwen/Qwen3-0.6B \
+  --headless \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size 1 \
+  --data-parallel-size-local 1
+```
+
+Then start the Rust frontend-only server:
+
+```bash
+vllm-rs serve Qwen/Qwen3-0.6B \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size 1 \
+  --data-parallel-size-local 0
+```
+
+To build the `vllm-rs` in isolation:
+
+```bash
+# from the local checkout
+cargo install --path src/cmd --bin vllm-rs
+```
+
+### Example Request
+
+After either startup path, you can use any OpenAI-compatible client:
+
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen3-0.6B",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "stream": true
+  }'
+```
diff --git a/rust/proto/vllm_grpc.proto b/rust/proto/vllm_grpc.proto
new file mode 100644
index 000000000000..56c5f36442db
--- /dev/null
+++ b/rust/proto/vllm_grpc.proto
@@ -0,0 +1,196 @@
+syntax = "proto3";
+package vllm;
+
+import "google/protobuf/struct.proto";
+
+
+service Generate {
+  // Generates text given a prompt
+  rpc Generate (GenerateRequest) returns (GenerateResponse) {}
+  // Generates text given a prompt, streaming the outputs
+  rpc GenerateStream (GenerateRequest) returns (stream GenerateResponse) {}
+}
+
+// ======================================================================================
+// Generate Request
+// ======================================================================================
+
+message GenerateRequest {
+  string request_id = 1;
+  string model = 2;
+
+  oneof prompt {
+    string text = 3;
+    TokenIds token_ids = 4;
+  }
+
+  // Temperature, defaults to model-specific default or 0
+  optional float temperature = 5;
+  // Parameters controlling random sampling, not applicable if temperature == 0
+  RandomSampling sampling = 6;
+  // Parameters for conditionally penalizing/boosting
+  // candidate tokens during decoding
+  DecodingParameters decoding = 7;
+  // Parameters controlling when generation should stop
+  StoppingCriteria stopping = 8;
+  // Flags to control what is returned in the response
+  ResponseOptions response = 9;
+  // Parameters controlling KV cache/distribution
+  KVCacheParameters kv = 10;
+
+  // Truncate prompt tokens; default (0) means no truncation
+  uint32 truncate_prompt_tokens = 11;
+
+  int32 priority = 12;
+}
+
+message RandomSampling {
+  uint32 num_sequences = 1;  // "n", default (0) means 1
+  uint32 top_k = 2;  // 0 means default
+  float top_p = 3;  // 0 means default
+  float min_p = 4;  // 0 means default
+  optional int64 seed = 5;
+}
+
+message DecodingParameters {
+  // Penalties
+  float presence_penalty = 1;  // Default (0.0) means no penalty
+  float frequency_penalty = 2;  // Default (0.0) means no penalty
+  float repetition_penalty = 3;  // Default (0.0) means no penalty
+  map<uint32, float> logit_bias = 4;
+  repeated uint32 allowed_token_ids = 5;
+
+  message StringChoices {
+    repeated string choices = 1;
+  }
+
+  // Control structured outputs
+  oneof structured_output {
+    string json = 6;
+    string regex = 7;
+    StringChoices choice = 8;
+    string grammar = 9;
+    bool json_object = 10;
+    string structural_tag = 11;
+  }
+}
+
+message StoppingCriteria {
+  // Default (0) is currently 20
+  uint32 max_new_tokens = 1;
+  // Default (0) means no minimum
+  uint32 min_new_tokens = 2;
+
+  repeated uint32 stop_token_ids = 3;
+  repeated string stop_strings = 4;
+  bool include_stop_strings = 5;
+
+  bool ignore_eos = 6;
+}
+
+message ResponseOptions {
+  // Prompt options
+  bool prompt_token_ids = 1;
+  bool prompt_logprobs = 2;
+  optional CandidateTokens prompt_candidates = 3;
+
+  // Output options; output_text defaults to true
+  optional bool output_text = 4;
+  bool output_token_ids = 5;
+  bool output_logprobs = 6;
+  optional CandidateTokens output_candidates = 7;
+}
+
+message KVCacheParameters {
+  bool bypass_prefix_cache = 1;
+  string cache_salt = 2;
+
+  // KV Connector transfer parameters
+  google.protobuf.Struct kv_transfer_params = 3;
+}
+
+// Controls which extra candidate tokens at each position should be returned
+message CandidateTokens {
+  oneof select {
+    uint32 top_n = 1;
+    TokenIds token_ids = 2;
+    bool all = 3;
+  }
+}
+
+// ======================================================================================
+// Generate Response
+// ======================================================================================
+
+message GenerateResponse {
+  // Only present in first response
+  optional PromptInfo prompt_info = 1;
+  SequenceOutput outputs = 2;
+}
+
+message SequenceOutput {
+  // Index of output sequence for num_sequences > 1.
+  uint32 index = 1;
+
+  string text = 2;
+  uint32 num_tokens = 3;  // Number of tokens in this chunk
+  repeated uint32 token_ids = 4;  // If requested
+  repeated float logprobs = 5;  // If requested
+  repeated uint32 ranks = 6;  // If logprobs were requested
+  repeated CandidateTokenInfo candidate_tokens = 7; // If requested
+
+  // Only present in final output for this sequence
+  optional FinishInfo finish_info = 8;
+}
+
+// Prompt info, returned in the first response
+message PromptInfo {
+  uint32 num_prompt_tokens = 1;
+  repeated uint32 token_ids = 2;  // If requested
+  repeated float logprobs = 3;  // If requested
+  repeated uint32 ranks = 4;  // If logprobs were requested
+  repeated CandidateTokenInfo candidate_tokens = 5;
+}
+
+// Finish info, returned in the final response
+message FinishInfo {
+  uint32 num_output_tokens = 1;
+
+  enum FinishReason {
+    NOT_FINISHED = 0;  // Possibly more tokens to be streamed
+    LENGTH = 1;  // Finished due to length constraint
+    STOP = 2;  // Stop string/token or EOS encountered
+    ABORTED = 3;  // Request aborted/cancelled
+  }
+
+  FinishReason finish_reason = 2;
+  // One of these will be set when finish_reason == STOP
+  oneof stop_reason {
+    uint32 stop_token_id = 3;
+    uint32 eos_token_id = 4;
+    string stop_string = 5;
+  }
+
+  google.protobuf.Struct kv_transfer_params = 6;
+  //uint64 seed = 7;
+}
+
+// Info for candidate tokens other than the input/sampled
+// token at a given position
+message CandidateTokenInfo {
+  message TokenInfo {
+    uint32 id = 1;
+    float logprob = 2;
+    uint32 rank = 3;
+    //    string text = 4;
+    //    bytes token_bytes = 5;
+  }
+  // Candidate token infos at this position
+  repeated TokenInfo tokens = 1;
+}
+
+// Token ids used for prompt
+message TokenIds {
+  repeated uint32 ids = 1;
+}
+
diff --git a/rust/rustfmt.toml b/rust/rustfmt.toml
new file mode 100644
index 000000000000..e619a753fa0d
--- /dev/null
+++ b/rust/rustfmt.toml
@@ -0,0 +1,3 @@
+style_edition = "2024"
+chain_width = 80
+use_field_init_shorthand = true
diff --git a/rust/rustfmt.unstable.toml b/rust/rustfmt.unstable.toml
new file mode 100644
index 000000000000..4a0dda0a49f3
--- /dev/null
+++ b/rust/rustfmt.unstable.toml
@@ -0,0 +1,20 @@
+# Optional local formatting profile. CI and pre-commit use rustfmt.toml.
+# Apply manually with:
+#   cargo +nightly fmt -- --config-path rustfmt.unstable.toml
+
+style_edition = "2024"
+chain_width = 80
+comment_width = 100
+use_field_init_shorthand = true
+
+# Unstable features go here.
+unstable_features = true
+
+format_code_in_doc_comments = true
+format_macro_matchers = true
+normalize_comments = true
+normalize_doc_attributes = true
+imports_granularity = "Module"
+group_imports = "StdExternalCrate"
+reorder_impl_items = true
+wrap_comments = true
diff --git a/rust/src/chat/Cargo.toml b/rust/src/chat/Cargo.toml
new file mode 100644
index 000000000000..4bb339d43bab
--- /dev/null
+++ b/rust/src/chat/Cargo.toml
@@ -0,0 +1,52 @@
+[package]
+name = "vllm-chat"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+asynk-strim-attr.workspace = true
+easy-ext.workspace = true
+futures.workspace = true
+half.workspace = true
+itertools.workspace = true
+llm-multimodal.workspace = true
+minijinja.workspace = true
+minijinja-contrib.workspace = true
+openai-harmony.workspace = true
+reqwest.workspace = true
+serde.workspace = true
+serde-json-fmt.workspace = true
+serde_json.workspace = true
+serde_with.workspace = true
+subenum.workspace = true
+thiserror.workspace = true
+thiserror-ext.workspace = true
+tokio.workspace = true
+tracing.workspace = true
+trait-set.workspace = true
+uuid.workspace = true
+vllm-engine-core-client.workspace = true
+vllm-llm.workspace = true
+vllm-reasoning-parser.workspace = true
+vllm-text.workspace = true
+vllm-tokenizer.workspace = true
+vllm-tool-parser.workspace = true
+
+[dev-dependencies]
+anyhow.workspace = true
+bytes.workspace = true
+clap.workspace = true
+expect-test.workspace = true
+rmp-serde.workspace = true
+serial_test = { workspace = true, features = ["file_locks"] }
+tempfile.workspace = true
+tokio.workspace = true
+tracing-subscriber.workspace = true
+uuid.workspace = true
+vllm-engine-core-client = { workspace = true, features = ["test-util"] }
+zeromq.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/chat/examples/README.md b/rust/src/chat/examples/README.md
new file mode 100644
index 000000000000..9ae231da7925
--- /dev/null
+++ b/rust/src/chat/examples/README.md
@@ -0,0 +1,39 @@
+# Chat Smoke Test
+
+Start a fresh headless `vllm` engine:
+
+```bash
+source ../vllm/.venv/bin/activate
+HF_HUB_OFFLINE=1 \
+VLLM_LOGGING_LEVEL=DEBUG \
+VLLM_CPU_KVCACHE_SPACE=2 \
+VLLM_HOST_IP=127.0.0.1 \
+VLLM_LOOPBACK_IP=127.0.0.1 \
+python3 -m vllm.entrypoints.cli.main serve Qwen/Qwen3-0.6B \
+  --headless \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size-local 1 \
+  --max-model-len 512 \
+  --dtype float16
+```
+
+Run the Rust chat smoke test through the `vllm-chat` interface:
+
+```bash
+cargo run -p vllm-chat --example external_engine_chat_qwen -- \
+  --handshake-address tcp://127.0.0.1:62100 \
+  --host 127.0.0.1 \
+  --prompt 'What is the capital of France? Answer with one word.'
+```
+
+The example now defaults to `Qwen/Qwen3-0.6B`. The current `vllm-chat`
+request model stays text-first and supports either plain string content or
+OpenAI-style text blocks, while the output side now emits structured assistant
+events and automatically separates reasoning blocks for supported models. Tool
+use and multimodal inputs are still out of scope. It uses the Rust
+`tokenizers` library for the tokenizer itself, plus standard Hugging Face
+config files to load the chat template and EOS metadata.
+
+IMPORTANT: Restart `vllm` each time you run the smoke test. The current headless
+engine cannot safely handle frontend reconnects after the client shuts down.
diff --git a/rust/src/chat/examples/external_engine_chat_qwen.rs b/rust/src/chat/examples/external_engine_chat_qwen.rs
new file mode 100644
index 000000000000..d99d672d5eb9
--- /dev/null
+++ b/rust/src/chat/examples/external_engine_chat_qwen.rs
@@ -0,0 +1,178 @@
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use clap::Parser;
+use futures::StreamExt as _;
+use tracing_subscriber::EnvFilter;
+use vllm_chat::{
+    AssistantBlockKind, AssistantMessageExt as _, ChatEvent, ChatLlm, ChatMessage, ChatRequest,
+    ChatRole, SamplingParams, load_model_backends,
+};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, TransportMode};
+use vllm_llm::Llm;
+use vllm_text::TextLlm;
+
+#[derive(Debug, Parser)]
+#[command(about = "Smoke-test the Rust chat facade against an external Qwen vLLM engine.")]
+struct Args {
+    #[arg(long)]
+    handshake_address: String,
+    #[arg(long, default_value_t = 1)]
+    engine_count: usize,
+    #[arg(long, default_value = "Qwen/Qwen3-0.6B")]
+    model: String,
+    #[arg(long, default_value = "127.0.0.1")]
+    host: String,
+    #[arg(long, default_value_t = 30)]
+    ready_timeout_secs: u64,
+    #[arg(long)]
+    prompt: String,
+}
+
+const CLIENT_INDEX: u32 = 0;
+const OUTPUT_TIMEOUT_SECS: u64 = 120;
+
+fn unique_request_id() -> String {
+    format!("rust-chat-smoke-{}", uuid::Uuid::new_v4())
+}
+
+fn init_tracing() {
+    let filter = EnvFilter::try_from_default_env()
+        .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+    let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
+}
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() -> Result<()> {
+    init_tracing();
+    let args = Args::parse();
+    let loaded = load_model_backends(&args.model, Default::default())
+        .await
+        .with_context(|| format!("failed to load backends for {}", args.model))?;
+    let text_backend = loaded.text_backend;
+    let chat_backend = loaded.chat_backend;
+
+    let ready_timeout = Duration::from_secs(args.ready_timeout_secs);
+    let output_timeout = Duration::from_secs(OUTPUT_TIMEOUT_SECS);
+    let request_id = unique_request_id();
+    let client = EngineCoreClient::connect(EngineCoreClientConfig {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: args.handshake_address.clone(),
+            advertised_host: args.host.clone(),
+            engine_count: args.engine_count,
+            ready_timeout,
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode: None,
+        model_name: args.model.clone(),
+        client_index: CLIENT_INDEX,
+    })
+    .await
+    .context("failed to connect to external vLLM engine")?;
+
+    println!("model={}", args.model);
+    println!("tokenizer_source=tokenizers + hf-hub");
+    println!("chat_template_source=tokenizer_config.json or adjacent chat template file");
+    println!("handshake_address={}", args.handshake_address);
+    println!("engine_count={}", args.engine_count);
+    println!("input_address={}", client.input_address());
+    println!("output_address={}", client.output_address());
+    println!("engine_identities={:x?}", client.engine_identities());
+
+    let llm = Llm::new(client);
+    let chat = ChatLlm::new(TextLlm::new(llm, text_backend), chat_backend);
+
+    let request = ChatRequest {
+        messages: vec![ChatMessage::text(ChatRole::User, args.prompt.clone())],
+        sampling_params: SamplingParams {
+            temperature: Some(0.0),
+            ..Default::default()
+        },
+        request_id: request_id.clone(),
+        ..ChatRequest::for_test()
+    };
+
+    println!("request_id={request_id}");
+    println!("prompt={}", args.prompt);
+
+    let mut stream = chat.chat(request).await.context("failed to submit chat request")?;
+    let output = tokio::time::timeout(output_timeout, async {
+        let mut final_reasoning = String::new();
+        let mut final_text = String::new();
+        let mut final_output_token_count = 0usize;
+        let mut finish_reason = None;
+        let mut saw_start = false;
+        let mut saw_stream_output = false;
+
+        while let Some(event) = stream.next().await.transpose()? {
+            match event {
+                ChatEvent::Start { .. } => {
+                    saw_start = true;
+                }
+                ChatEvent::BlockStart { kind, .. } => {
+                    if saw_stream_output {
+                        println!();
+                    }
+                    match kind {
+                        AssistantBlockKind::Reasoning => print!("[reasoning] "),
+                        AssistantBlockKind::Text => print!("[answer] "),
+                        AssistantBlockKind::ToolCall => {}
+                    }
+                    saw_stream_output = true;
+                }
+                ChatEvent::ToolCallStart { name, .. } => {
+                    if saw_stream_output {
+                        println!();
+                    }
+                    print!("[tool:{name}] ");
+                    saw_stream_output = true;
+                }
+                ChatEvent::LogprobsDelta { .. } => {}
+                ChatEvent::Done {
+                    message,
+                    output_token_count,
+                    finish_reason: reason,
+                    ..
+                } => {
+                    final_reasoning = message.reasoning().unwrap_or_default();
+                    final_text = message.text();
+                    final_output_token_count = output_token_count;
+                    finish_reason = Some(reason);
+                    break;
+                }
+                ChatEvent::BlockDelta { kind, delta, .. } => match kind {
+                    AssistantBlockKind::Reasoning | AssistantBlockKind::Text => {
+                        print!("{delta}");
+                    }
+                    AssistantBlockKind::ToolCall => {}
+                },
+                ChatEvent::ToolCallArgumentsDelta { delta, .. } => print!("{delta}"),
+                ChatEvent::BlockEnd { .. } | ChatEvent::ToolCallEnd { .. } => {}
+            }
+        }
+
+        println!();
+
+        if !saw_start {
+            bail!("chat stream ended without a start event");
+        }
+        Ok::<_, anyhow::Error>((
+            final_reasoning,
+            final_text,
+            final_output_token_count,
+            finish_reason,
+        ))
+    })
+    .await
+    .context("timed out waiting for chat output")??;
+
+    chat.shutdown().await.context("failed to shut down chat client")?;
+
+    println!("final_reasoning={:?}", output.0);
+    println!("final_text={:?}", output.1);
+    println!("final_output_token_count={:?}", output.2);
+    println!("finish_reason={:?}", output.3);
+
+    Ok(())
+}
diff --git a/rust/src/chat/src/backend/hf.rs b/rust/src/chat/src/backend/hf.rs
new file mode 100644
index 000000000000..6c3dddc87292
--- /dev/null
+++ b/rust/src/chat/src/backend/hf.rs
@@ -0,0 +1,308 @@
+use std::sync::Arc;
+
+use tracing::info;
+use vllm_text::backend::hf::{HfTextBackend, ResolvedModelFiles, load_model_config};
+use vllm_text::tokenizer::DynTokenizer;
+use vllm_text::{DynTextBackend, TextBackend as _};
+
+use crate::backend::{
+    ChatBackend, DynChatBackend, LoadModelBackendsOptions, LoadedModelBackends,
+    NewChatOutputProcessorOptions,
+};
+use crate::error::Result;
+use crate::multimodal::MultimodalModelInfo;
+use crate::output::{
+    DefaultChatOutputProcessor, HarmonyChatOutputProcessor, validate_harmony_parser_overrides,
+};
+use crate::renderer::hf::{HfChatRenderer, MultimodalRenderInfo};
+use crate::renderer::{DeepSeekV4ChatRenderer, DeepSeekV32ChatRenderer, DynChatRenderer};
+use crate::request::ChatRequest;
+use crate::{DynChatOutputProcessor, RendererSelection};
+
+/// [`ChatBackend`] implementation built on Hugging Face model files.
+pub struct HfChatBackend {
+    model_id: String,
+    model_type: String,
+    tokenizer: DynTokenizer,
+    chat_renderer: DynChatRenderer,
+    multimodal_model_info: Option<MultimodalModelInfo>,
+}
+
+impl HfChatBackend {
+    /// Load the chat backend from resolved Hugging Face model files.
+    pub fn from_resolved_model_files(
+        files: ResolvedModelFiles,
+        model_id: String,
+        options: LoadModelBackendsOptions,
+        tokenizer: DynTokenizer,
+    ) -> Result<Self> {
+        let model_config = load_model_config(files.config_path.as_deref())?;
+        let model_type = model_config.model_type().unwrap_or_default();
+        let multimodal_model_info = MultimodalModelInfo::from_paths(
+            model_id.clone(),
+            (!model_type.is_empty()).then_some(model_type.to_string()),
+            files.config_path.as_deref(),
+            files.preprocessor_config_path.as_deref(),
+            tokenizer.clone(),
+        )?;
+        let multimodal_render_info = resolve_multimodal_render_info(multimodal_model_info.as_ref());
+
+        let renderer = options.renderer.resolve(model_type);
+        let chat_renderer: DynChatRenderer = match renderer {
+            RendererSelection::Auto => unreachable!("renderer auto should be resolved above"),
+            RendererSelection::Hf => Arc::new(HfChatRenderer::load(
+                &files,
+                options,
+                multimodal_render_info,
+            )?),
+            RendererSelection::DeepSeekV32 => Arc::new(DeepSeekV32ChatRenderer::new()),
+            RendererSelection::DeepSeekV4 => Arc::new(DeepSeekV4ChatRenderer::new()),
+        };
+
+        info!(
+            model_id,
+            model_type,
+            %renderer,
+            "loaded chat backend with Hugging Face model files"
+        );
+
+        Ok(Self {
+            model_id,
+            model_type: model_type.to_string(),
+            tokenizer,
+            chat_renderer,
+            multimodal_model_info,
+        })
+    }
+}
+
+impl ChatBackend for HfChatBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        self.chat_renderer.clone()
+    }
+
+    fn multimodal_model_info(&self) -> Option<&MultimodalModelInfo> {
+        self.multimodal_model_info.as_ref()
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> Result<DynChatOutputProcessor> {
+        if self.model_type == "gpt_oss" {
+            validate_harmony_parser_overrides(options.tool_call_parser, options.reasoning_parser)?;
+            return Ok(Box::new(HarmonyChatOutputProcessor::new(request)?));
+        }
+
+        Ok(Box::new(DefaultChatOutputProcessor::new(
+            request,
+            &self.model_id,
+            self.tokenizer.clone(),
+            options.tool_call_parser,
+            options.reasoning_parser,
+        )?))
+    }
+}
+
+/// Load the Hugging Face text and chat backends for the given model id.
+pub(super) async fn load_model_backends(
+    model_id: &str,
+    options: LoadModelBackendsOptions,
+) -> Result<LoadedModelBackends> {
+    let files = ResolvedModelFiles::new(model_id).await?;
+    let text_backend =
+        HfTextBackend::from_resolved_model_files(files.clone(), model_id.to_string())?;
+    let tokenizer = text_backend.tokenizer();
+    let text_backend: DynTextBackend = Arc::new(text_backend);
+
+    let chat_backend: DynChatBackend = Arc::new(HfChatBackend::from_resolved_model_files(
+        files,
+        model_id.to_string(),
+        options,
+        tokenizer,
+    )?);
+
+    Ok(LoadedModelBackends {
+        text_backend,
+        chat_backend,
+    })
+}
+
+fn resolve_multimodal_render_info(
+    info: Option<&MultimodalModelInfo>,
+) -> Option<MultimodalRenderInfo> {
+    info.map(|info| MultimodalRenderInfo {
+        placeholder_token: info.placeholder_token().to_string(),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::path::PathBuf;
+    use std::sync::Arc;
+
+    use tempfile::tempdir;
+    use vllm_text::backend::hf::TokenizerSource;
+    use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+
+    use super::HfChatBackend;
+    use crate::RendererSelection;
+    use crate::backend::{ChatBackend, LoadModelBackendsOptions};
+    use crate::request::{ChatContent, ChatMessage, ChatRequest};
+
+    fn request_with_user_text(text: &str) -> ChatRequest {
+        ChatRequest {
+            request_id: "renderer-selection-test".to_string(),
+            messages: vec![ChatMessage::User {
+                content: ChatContent::Text(text.to_string()),
+            }],
+            ..ChatRequest::for_test()
+        }
+    }
+
+    fn write_json(path: &std::path::Path, content: &str) {
+        std::fs::write(path, content).unwrap();
+    }
+
+    fn resolved_files(
+        config_json: &str,
+        tokenizer_config_json: &str,
+    ) -> vllm_text::backend::hf::ResolvedModelFiles {
+        let dir = tempdir().unwrap();
+        let root = dir.keep();
+        let config_path = root.join("config.json");
+        let tokenizer_config_path = root.join("tokenizer_config.json");
+        write_json(&config_path, config_json);
+        write_json(&tokenizer_config_path, tokenizer_config_json);
+
+        vllm_text::backend::hf::ResolvedModelFiles {
+            tokenizer: TokenizerSource::HuggingFace(PathBuf::from("/tmp/unused-tokenizer.json")),
+            tokenizer_config_path: Some(tokenizer_config_path),
+            generation_config_path: None,
+            preprocessor_config_path: None,
+            chat_template_path: None,
+            config_path: Some(config_path),
+        }
+    }
+
+    struct TestTokenizer;
+
+    impl Tokenizer for TestTokenizer {
+        fn encode(
+            &self,
+            _text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_text::tokenizer::Result<Vec<u32>> {
+            Ok(Vec::new())
+        }
+
+        fn decode(
+            &self,
+            _token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_text::tokenizer::Result<String> {
+            Ok(String::new())
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            None
+        }
+    }
+
+    fn test_tokenizer() -> DynTokenizer {
+        Arc::new(TestTokenizer)
+    }
+
+    fn render_prompt(
+        renderer: RendererSelection,
+        config_json: &str,
+        tokenizer_config_json: &str,
+    ) -> String {
+        let backend = HfChatBackend::from_resolved_model_files(
+            resolved_files(config_json, tokenizer_config_json),
+            "test-model".to_string(),
+            LoadModelBackendsOptions {
+                renderer,
+                chat_template_content_format: Default::default(),
+                chat_template: None,
+                default_chat_template_kwargs: HashMap::new(),
+            },
+            test_tokenizer(),
+        )
+        .unwrap();
+
+        backend
+            .chat_renderer()
+            .render(&request_with_user_text("hello"))
+            .unwrap()
+            .prompt
+            .into_text()
+            .expect("renderer should return text prompt")
+    }
+
+    #[test]
+    fn auto_uses_deepseek_renderer_for_deepseek_v32_model_type() {
+        let prompt = render_prompt(
+            RendererSelection::Auto,
+            r#"{"model_type":"deepseek_v32"}"#,
+            r#"{}"#,
+        );
+
+        assert_eq!(
+            prompt,
+            "<｜begin▁of▁sentence｜><｜User｜>hello<｜Assistant｜></think>"
+        );
+    }
+
+    #[test]
+    fn auto_uses_hf_renderer_for_other_model_types() {
+        let prompt = render_prompt(
+            RendererSelection::Auto,
+            r#"{"model_type":"qwen2"}"#,
+            r#"{"chat_template":"{{ messages[0].content }}"}"#,
+        );
+
+        assert_eq!(prompt, "hello");
+    }
+
+    #[test]
+    fn explicit_deepseek_renderer_overrides_generic_model_type() {
+        let prompt = render_prompt(
+            RendererSelection::DeepSeekV32,
+            r#"{"model_type":"qwen2"}"#,
+            r#"{"chat_template":"{{ messages[0].content }}"}"#,
+        );
+
+        assert_eq!(
+            prompt,
+            "<｜begin▁of▁sentence｜><｜User｜>hello<｜Assistant｜></think>"
+        );
+    }
+
+    #[test]
+    fn explicit_hf_renderer_overrides_deepseek_v32_model_type() {
+        let prompt = render_prompt(
+            RendererSelection::Hf,
+            r#"{"model_type":"deepseek_v32"}"#,
+            r#"{"chat_template":"{{ messages[0].content }}"}"#,
+        );
+
+        assert_eq!(prompt, "hello");
+    }
+
+    #[test]
+    fn auto_uses_nested_text_config_model_type() {
+        let prompt = render_prompt(
+            RendererSelection::Auto,
+            r#"{"text_config":{"model_type":"deepseek_v32","num_attention_heads":32}}"#,
+            r#"{}"#,
+        );
+
+        assert_eq!(
+            prompt,
+            "<｜begin▁of▁sentence｜><｜User｜>hello<｜Assistant｜></think>"
+        );
+    }
+}
diff --git a/rust/src/chat/src/backend/mod.rs b/rust/src/chat/src/backend/mod.rs
new file mode 100644
index 000000000000..f49ca6737047
--- /dev/null
+++ b/rust/src/chat/src/backend/mod.rs
@@ -0,0 +1,86 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use serde_json::Value;
+use vllm_text::{DynTextBackend, TextBackend};
+
+use crate::error::Result;
+use crate::multimodal::MultimodalModelInfo;
+use crate::output::DynChatOutputProcessor;
+use crate::renderer::DynChatRenderer;
+use crate::request::ChatRequest;
+use crate::{ChatTemplateContentFormatOption, ParserSelection, RendererSelection};
+
+pub mod hf;
+
+/// Options for creating a new chat output processor.
+pub struct NewChatOutputProcessorOptions<'a> {
+    pub tool_call_parser: &'a ParserSelection,
+    pub reasoning_parser: &'a ParserSelection,
+}
+
+/// Minimal prompt-processing backend needed by `vllm-chat`.
+pub trait ChatBackend: Send + Sync {
+    /// Return the renderer used for chat-prompt construction.
+    fn chat_renderer(&self) -> DynChatRenderer;
+
+    /// Return model files/config needed for request-scoped multimodal
+    /// preprocessing, if supported.
+    fn multimodal_model_info(&self) -> Option<&MultimodalModelInfo> {
+        None
+    }
+
+    /// Create a request-scoped output processor after request-level adjustments
+    /// are applied.
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> Result<DynChatOutputProcessor>;
+}
+
+/// Shared trait-object form of [`ChatBackend`].
+pub type DynChatBackend = Arc<dyn ChatBackend>;
+
+/// Convenience trait for backends that can serve both raw text generation and
+/// chat templating.
+///
+/// This is mainly useful in tests and small examples, where one mock/backend
+/// often implements both sides and callers want `ChatLlm` to wire the shared
+/// object into `TextLlm` automatically.
+pub trait ChatTextBackend: ChatBackend + TextBackend {}
+
+impl<T> ChatTextBackend for T where T: ChatBackend + TextBackend + ?Sized {}
+
+/// Shared trait-object form of [`ChatTextBackend`].
+pub type DynChatTextBackend = Arc<dyn ChatTextBackend>;
+
+/// Frontend-side chat backend loading options.
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct LoadModelBackendsOptions {
+    /// Which chat renderer implementation to use.
+    pub renderer: RendererSelection,
+    /// How to serialize `message.content` when rendering the chat template.
+    pub chat_template_content_format: ChatTemplateContentFormatOption,
+    /// Optional server-default chat template override, provided either as an
+    /// inline template or as a path to a template file.
+    pub chat_template: Option<String>,
+    /// Optional server-default keyword arguments merged into every
+    /// chat-template render before request-level `chat_template_kwargs`.
+    pub default_chat_template_kwargs: HashMap<String, Value>,
+}
+
+/// Shared backends loaded from a model id.
+pub struct LoadedModelBackends {
+    pub text_backend: DynTextBackend,
+    pub chat_backend: DynChatBackend,
+}
+
+/// Load text and chat backends for the given model id.
+pub async fn load_model_backends(
+    model_id: &str,
+    options: LoadModelBackendsOptions,
+) -> Result<LoadedModelBackends> {
+    // Currently, we only have HuggingFace backends.
+    hf::load_model_backends(model_id, options).await
+}
diff --git a/rust/src/chat/src/error.rs b/rust/src/chat/src/error.rs
new file mode 100644
index 000000000000..25d8d015680e
--- /dev/null
+++ b/rust/src/chat/src/error.rs
@@ -0,0 +1,80 @@
+use thiserror::Error;
+use thiserror_ext::Macro;
+
+type BoxedError = Box<dyn std::error::Error + Send + Sync>;
+
+#[derive(Debug, Error, Macro)]
+#[thiserror_ext(macro(path = "crate::error"))]
+pub enum Error {
+    #[error("chat request must contain at least one message")]
+    EmptyMessages,
+    #[error("cannot continue the final message when the last message is not from the assistant")]
+    ContinueFinalAssistantWithoutFinalAssistant,
+    #[error("chat template is required but none was configured")]
+    MissingChatTemplate,
+    #[error("chat template error: {0}")]
+    ChatTemplate(String),
+    #[error("multimodal input is not supported by this chat renderer")]
+    UnsupportedMultimodalRenderer,
+    #[error("unsupported multimodal content: {0}")]
+    UnsupportedMultimodalContent(&'static str),
+    #[error("multimodal preprocessing error: {0}")]
+    Multimodal(#[message] String),
+    #[error("{kind} parsing is not available for model `{model_id}`")]
+    ParserUnavailableForModel {
+        kind: &'static str,
+        model_id: String,
+    },
+    #[error("{kind} parsing is disabled by frontend configuration")]
+    ParserDisabled { kind: &'static str },
+    #[error(
+        "{kind} parser `{name}` is not registered{}",
+        available_parser_hint(.available_names)
+    )]
+    ParserUnavailableByName {
+        kind: &'static str,
+        name: String,
+        available_names: Vec<String>,
+    },
+    #[error("failed to initialize {kind} parser `{name}`")]
+    ParserInitialization {
+        kind: &'static str,
+        name: String,
+        #[source]
+        error: BoxedError,
+    },
+    #[error(
+        "gpt_oss uses native Harmony output parsing; generic {kind} parser override `{selection}` is not supported"
+    )]
+    HarmonyParserOverrideUnsupported {
+        kind: &'static str,
+        selection: String,
+    },
+    #[error("harmony output parsing failed")]
+    HarmonyOutputParsing {
+        #[source]
+        error: BoxedError,
+    },
+    #[error(
+        "this model's maximum context length is {max_model_len} tokens, \
+         but the prompt contains {prompt_len} input tokens"
+    )]
+    PromptTooLong { max_model_len: u32, prompt_len: u32 },
+    #[error("chat request stream `{request_id}` closed before terminal output")]
+    StreamClosedBeforeTerminalOutput { request_id: String },
+    #[error("tool call stream state is inconsistent: {message}")]
+    ToolCallStreamInvariant { message: String },
+    #[error(transparent)]
+    Text(#[from] vllm_text::Error),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Format the available-parser suffix used in user-facing error messages.
+fn available_parser_hint(available_names: &[String]) -> String {
+    if available_names.is_empty() {
+        String::new()
+    } else {
+        format!(" (choose from: {})", available_names.join(", "))
+    }
+}
diff --git a/rust/src/chat/src/event.rs b/rust/src/chat/src/event.rs
new file mode 100644
index 000000000000..08603b43572a
--- /dev/null
+++ b/rust/src/chat/src/event.rs
@@ -0,0 +1,183 @@
+use std::ops::Deref;
+use std::sync::Arc;
+
+use serde::{Deserialize, Serialize};
+use vllm_text::{DecodedLogprobs, DecodedPromptLogprobs};
+
+use crate::FinishReason;
+
+/// One finalized assistant tool call.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct AssistantToolCall {
+    pub id: String,
+    pub name: String,
+    pub arguments: String,
+}
+
+/// Semantic kind of one assistant output block.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum AssistantBlockKind {
+    /// Visible final-answer text.
+    Text,
+    /// Extracted reasoning content.
+    Reasoning,
+    /// One finalized tool call.
+    ToolCall,
+}
+
+/// One structured assistant output block.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum AssistantContentBlock {
+    /// Visible final-answer text.
+    Text { text: String },
+    /// Extracted reasoning content.
+    Reasoning { text: String },
+    /// One finalized tool call.
+    ToolCall(AssistantToolCall),
+}
+
+impl AssistantContentBlock {
+    /// Return the semantic kind of this block.
+    pub fn kind(&self) -> AssistantBlockKind {
+        match self {
+            Self::Text { .. } => AssistantBlockKind::Text,
+            Self::Reasoning { .. } => AssistantBlockKind::Reasoning,
+            Self::ToolCall(..) => AssistantBlockKind::ToolCall,
+        }
+    }
+
+    /// Return this block as one finalized tool call, if applicable.
+    pub fn as_tool_call(&self) -> Option<&AssistantToolCall> {
+        match self {
+            Self::ToolCall(call) => Some(call),
+            _ => None,
+        }
+    }
+}
+
+#[easy_ext::ext(AssistantMessageExt)]
+impl [AssistantContentBlock] {
+    /// Concatenate all visible final-answer text blocks.
+    pub fn text(&self) -> String {
+        self.iter()
+            .filter_map(|block| match block {
+                AssistantContentBlock::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect()
+    }
+
+    /// Concatenate all extracted reasoning blocks, if any.
+    pub fn reasoning(&self) -> Option<String> {
+        Some(
+            self.iter()
+                .filter_map(|block| match block {
+                    AssistantContentBlock::Reasoning { text } => Some(text.as_str()),
+                    _ => None,
+                })
+                .collect(),
+        )
+        .filter(|s: &String| !s.is_empty())
+    }
+
+    /// Return whether this assistant message contains any non-empty reasoning
+    /// text blocks.
+    pub fn has_reasoning(&self) -> bool {
+        self.iter().any(|block| match block {
+            AssistantContentBlock::Reasoning { text } => !text.is_empty(),
+            _ => false,
+        })
+    }
+
+    /// Return finalized assistant tool calls in encounter order.
+    pub fn tool_calls(&self) -> impl Iterator<Item = &AssistantToolCall> {
+        self.iter().filter_map(AssistantContentBlock::as_tool_call)
+    }
+
+    /// Return whether this assistant message contains any tool-call blocks.
+    pub fn has_tool_calls(&self) -> bool {
+        self.iter().any(|block| matches!(block, AssistantContentBlock::ToolCall(_)))
+    }
+}
+
+/// Final structured assistant message assembled from the event stream.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct AssistantMessage {
+    pub content: Vec<AssistantContentBlock>,
+}
+
+impl Deref for AssistantMessage {
+    type Target = [AssistantContentBlock];
+
+    fn deref(&self) -> &Self::Target {
+        &self.content
+    }
+}
+
+impl AssistantMessage {
+    /// Push one new block to the end of the message content.
+    pub(crate) fn push_block(&mut self, block: AssistantContentBlock) {
+        self.content.push(block);
+    }
+}
+
+/// Streamed chat event emitted by [`crate::ChatEventStream`].
+#[derive(Debug, Clone, PartialEq)]
+pub enum ChatEvent {
+    /// The request was accepted, streaming has started, and prompt metadata is
+    /// ready.
+    Start {
+        /// The actual prompt token IDs for this request.
+        prompt_token_ids: Arc<[u32]>,
+        /// Once-only prompt logprobs metadata, when requested.
+        prompt_logprobs: Option<DecodedPromptLogprobs>,
+    },
+    /// A new assistant output block has started.
+    BlockStart {
+        index: usize,
+        kind: AssistantBlockKind,
+    },
+    /// A newly observed delta for one open assistant output block.
+    BlockDelta {
+        index: usize,
+        kind: AssistantBlockKind,
+        delta: String,
+    },
+    /// Per-decoded-update sample metadata: logprobs and/or output token IDs.
+    LogprobsDelta {
+        logprobs: Option<DecodedLogprobs>,
+        token_ids: Vec<u32>,
+    },
+    /// One assistant output block has ended.
+    BlockEnd {
+        index: usize,
+        block: AssistantContentBlock,
+    },
+    /// One tool call has started.
+    ToolCallStart {
+        index: usize,
+        id: String,
+        name: String,
+    },
+    /// One incremental tool-call arguments delta for the currently open tool
+    /// call.
+    ToolCallArgumentsDelta { index: usize, delta: String },
+    /// One tool call has ended.
+    ToolCallEnd {
+        index: usize,
+        call: AssistantToolCall,
+    },
+    /// Terminal event carrying the final assembled assistant message and finish
+    /// metadata.
+    Done {
+        message: AssistantMessage,
+        /// Number of prompt tokens actually sent to the engine after chat
+        /// template rendering and tokenization.
+        prompt_token_count: usize,
+        /// Number of output tokens generated.
+        output_token_count: usize,
+        finish_reason: FinishReason,
+        /// Connector-specific KV transfer parameters for disaggregated serving.
+        kv_transfer_params: Option<serde_json::Value>,
+    },
+}
diff --git a/rust/src/chat/src/lib.rs b/rust/src/chat/src/lib.rs
new file mode 100644
index 000000000000..6afc4b35c5fc
--- /dev/null
+++ b/rust/src/chat/src/lib.rs
@@ -0,0 +1,249 @@
+//! Minimal chat facade above [`vllm_text`].
+//!
+//! This crate keeps the northbound boundary intentionally small:
+//! `messages -> rendered prompt -> tokenized prompt -> engine request ->
+//! streamed structured assistant events`. The request side remains text-first,
+//! while the response side can emit structured reasoning and final-answer
+//! blocks. It is closer to vLLM's internal chat-rendering flow than to a full
+//! OpenAI-compatible surface.
+
+pub use backend::hf::HfChatBackend;
+pub use backend::{
+    ChatBackend, ChatTextBackend, DynChatBackend, DynChatTextBackend, LoadModelBackendsOptions,
+    LoadedModelBackends, NewChatOutputProcessorOptions, load_model_backends,
+};
+pub use error::{Error, Result};
+pub use event::{
+    AssistantBlockKind, AssistantContentBlock, AssistantMessage, AssistantMessageExt,
+    AssistantToolCall, ChatEvent,
+};
+use futures::{StreamExt, TryStreamExt as _};
+pub use output::{
+    ChatOutputProcessor, DefaultChatOutputProcessor, DynChatOutputProcessor,
+    HarmonyChatOutputProcessor,
+};
+pub use parser::ParserSelection;
+pub use parser::reasoning::{
+    ReasoningDelta, ReasoningError, ReasoningParser, ReasoningParserFactory,
+};
+pub use parser::tool::{ToolParser, ToolParserError, ToolParserFactory};
+pub use renderer::hf::ChatTemplateContentFormatOption;
+pub use renderer::{
+    ChatRenderer, DeepSeekV4ChatRenderer, DeepSeekV32ChatRenderer, DynChatRenderer, RenderedPrompt,
+    RendererSelection,
+};
+pub use request::{
+    ChatContent, ChatContentPart, ChatMessage, ChatOptions, ChatRequest, ChatRole, ChatTool,
+    ChatToolChoice, GenerationPromptMode, ReasoningEffort, SamplingParams,
+};
+pub use stream::{ChatEventStream, ChatEventStreamTrait, CollectedAssistantMessage};
+pub use vllm_llm::FinishReason;
+
+mod backend;
+mod error;
+mod event;
+pub mod multimodal;
+mod output;
+mod parser;
+mod renderer;
+mod request;
+mod stream;
+
+use vllm_engine_core_client::EngineCoreClient;
+use vllm_engine_core_client::protocol::ModelDtype;
+use vllm_llm::Llm;
+use vllm_text::{TextLlm, TextRequest};
+
+/// Validate explicit parser override names without starting request processing.
+pub fn validate_parser_overrides(
+    tool_call_parser: &ParserSelection,
+    reasoning_parser: &ParserSelection,
+) -> Result<()> {
+    let tool_parser_factory = ToolParserFactory::global();
+    if let ParserSelection::Explicit(name) = tool_call_parser
+        && !tool_parser_factory.contains(name)
+    {
+        return Err(Error::ParserUnavailableByName {
+            kind: "tool",
+            name: name.clone(),
+            available_names: tool_parser_factory.list(),
+        });
+    }
+
+    let reasoning_parser_factory = ReasoningParserFactory::global();
+    if let ParserSelection::Explicit(name) = reasoning_parser
+        && !reasoning_parser_factory.contains(name)
+    {
+        return Err(Error::ParserUnavailableByName {
+            kind: "reasoning",
+            name: name.clone(),
+            available_names: reasoning_parser_factory.list(),
+        });
+    }
+
+    Ok(())
+}
+
+/// Structured chat facade above [`TextLlm`].
+///
+/// This layer stays above raw text semantics: it takes care of chat-template
+/// rendering, exposes structured assistant events, and adds chat-specific
+/// request semantics such as tool calls.
+pub struct ChatLlm {
+    text: TextLlm,
+    backend: DynChatBackend,
+    /// Effective model dtype reported by the engine.
+    model_dtype: Option<ModelDtype>,
+    /// Tool-call parser selection.
+    tool_call_parser: ParserSelection,
+    /// Reasoning parser selection.
+    reasoning_parser: ParserSelection,
+}
+
+impl ChatLlm {
+    /// Create a new chat facade from a text-generation facade plus a chat
+    /// backend.
+    pub fn new(text: TextLlm, backend: DynChatBackend) -> Self {
+        let model_dtype = text.engine_core_client().model_dtype();
+
+        Self {
+            text,
+            backend,
+            model_dtype,
+            tool_call_parser: ParserSelection::Auto,
+            reasoning_parser: ParserSelection::Auto,
+        }
+    }
+
+    /// Convenience constructor for one shared backend object that implements
+    /// both text and chat responsibilities.
+    pub fn from_shared_backend(llm: Llm, backend: DynChatTextBackend) -> Self {
+        let text = TextLlm::new(llm, backend.clone());
+        Self::new(text, backend)
+    }
+
+    /// Set tool-call parser selection.
+    pub fn with_tool_call_parser(mut self, selection: ParserSelection) -> Self {
+        self.tool_call_parser = selection;
+        self
+    }
+
+    /// Set reasoning parser selection.
+    pub fn with_reasoning_parser(mut self, selection: ParserSelection) -> Self {
+        self.reasoning_parser = selection;
+        self
+    }
+
+    /// Override the effective model dtype used for multimodal tensor encoding.
+    pub fn with_model_dtype(mut self, model_dtype: Option<ModelDtype>) -> Self {
+        self.model_dtype = model_dtype;
+        self
+    }
+
+    /// Expose the underlying text facade for raw text-generation routes such as
+    /// `/v1/completions`.
+    pub fn text(&self) -> &TextLlm {
+        &self.text
+    }
+
+    /// Return the model ID reported by the underlying text backend.
+    pub fn model_id(&self) -> &str {
+        self.text.model_id()
+    }
+
+    /// Expose the underlying engine-core client for low-level utility/admin
+    /// calls.
+    pub fn engine_core_client(&self) -> &EngineCoreClient {
+        self.text.engine_core_client()
+    }
+
+    /// Render, tokenize, and submit one chat request.
+    pub async fn chat(&self, mut request: ChatRequest) -> Result<ChatEventStream> {
+        request.validate()?;
+
+        let output_processor = self.backend.new_chat_output_processor(
+            &mut request,
+            NewChatOutputProcessorOptions {
+                tool_call_parser: &self.tool_call_parser,
+                reasoning_parser: &self.reasoning_parser,
+            },
+        )?;
+        let rendered = self.backend.chat_renderer().render(&request)?;
+
+        let (prompt, mm_features) = multimodal::finalize_rendered_prompt(
+            &request,
+            rendered,
+            self.backend.multimodal_model_info(),
+            self.model_dtype,
+        )
+        .await?;
+
+        let text_request = TextRequest {
+            request_id: request.request_id.clone(),
+            prompt,
+            mm_features,
+            sampling_params: request.sampling_params,
+            decode_options: request.decode_options,
+            intermediate: request.intermediate,
+            priority: request.priority,
+            cache_salt: request.cache_salt,
+            add_special_tokens: request.add_special_tokens,
+            data_parallel_rank: request.data_parallel_rank,
+        };
+        let decoded_stream = self.text.generate(text_request).await?.map_err(Error::from).boxed();
+
+        let structured_stream = output_processor.process(decoded_stream)?;
+
+        Ok(ChatEventStream::new(request.request_id, structured_stream))
+    }
+
+    /// Shut down the underlying LLM client and its background tasks.
+    pub async fn shutdown(self) -> Result<()> {
+        self.text.shutdown().await?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use thiserror_ext::AsReport;
+
+    use super::{ParserSelection, validate_parser_overrides};
+    use crate::parser::reasoning::names;
+
+    #[test]
+    fn validate_parser_overrides_accepts_registered_names() {
+        validate_parser_overrides(
+            &ParserSelection::Explicit("llama3_json".to_string()),
+            &ParserSelection::Explicit(names::QWEN3.to_string()),
+        )
+        .unwrap();
+    }
+
+    #[test]
+    fn validate_parser_overrides_accepts_auto_and_none() {
+        validate_parser_overrides(&ParserSelection::Auto, &ParserSelection::None).unwrap();
+    }
+
+    #[test]
+    fn validate_parser_overrides_rejects_unknown_tool_parser() {
+        let error = validate_parser_overrides(
+            &ParserSelection::Explicit("definitely_missing_tool_parser".to_string()),
+            &ParserSelection::Auto,
+        )
+        .unwrap_err();
+
+        expect_test::expect!["tool parser `definitely_missing_tool_parser` is not registered (choose from: deepseek_v3, deepseek_v31, deepseek_v32, deepseek_v4, gemma4, glm45, glm47, hermes, kimi_k2, llama3_json, llama4_json, minimax_m2, mistral, qwen3_coder, qwen3_xml)"].assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn validate_parser_overrides_rejects_unknown_reasoning_parser() {
+        let error = validate_parser_overrides(
+            &ParserSelection::Auto,
+            &ParserSelection::Explicit("definitely_missing_reasoning_parser".to_string()),
+        )
+        .unwrap_err();
+
+        expect_test::expect!["reasoning parser `definitely_missing_reasoning_parser` is not registered (choose from: cohere_cmd, deepseek_r1, deepseek_v3, deepseek_v4, gemma4, glm45, kimi, kimi_k2, minimax_m2, nemotron_v3, qwen3, step3)"].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/chat/src/multimodal.rs b/rust/src/chat/src/multimodal.rs
new file mode 100644
index 000000000000..ad730405601f
--- /dev/null
+++ b/rust/src/chat/src/multimodal.rs
@@ -0,0 +1,775 @@
+//! Chat-layer multimodal image preparation.
+//!
+//! This module owns the narrow image-only multimodal path for chat requests:
+//! it extracts image parts from structured chat messages, fetches and
+//! preprocesses them through `llm-multimodal`, expands rendered prompt
+//! placeholders after tokenization, and builds the engine-facing
+//! `MmFeatures` payload.
+//!
+//! Raw media stays above `vllm-text`; this module lowers it into token IDs and
+//! opaque tensor payloads before the request is handed to text generation.
+
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::Path;
+use std::sync::{Arc, LazyLock, Once};
+
+use itertools::izip;
+use llm_multimodal::{
+    AsyncMultiModalTracker, FieldLayout, ImagePreProcessor, ImageProcessorRegistry, MediaConnector,
+    MediaConnectorConfig, MediaContentPart, Modality, ModelMetadata, ModelProcessorSpec,
+    ModelRegistry, PreProcessorConfig, PreprocessedImages, PromptReplacement, TokenResolver,
+    TrackedMedia,
+};
+use tracing::warn;
+use vllm_engine_core_client::protocol::ModelDtype;
+use vllm_engine_core_client::protocol::multimodal::{
+    MmBatchedField, MmFeatureSpec, MmFeatures, MmField, MmFieldElem, MmFlatField, MmKwargsItem,
+    MmSharedField, MmSlice, PlaceholderRange, SliceSpec,
+};
+use vllm_engine_core_client::protocol::tensor::WireTensor;
+use vllm_text::Prompt;
+use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+
+use crate::error::{Error, Result, bail_multimodal, multimodal};
+use crate::renderer::RenderedPrompt;
+use crate::request::{ChatContent, ChatContentPart, ChatMessage, ChatRequest};
+
+mod tensor;
+
+/// Resolved multimodal support for one loaded model.
+#[derive(Clone)]
+pub struct MultimodalModelInfo {
+    context: MultimodalModelContext,
+    spec: ResolvedMultimodalSpec,
+    image_processor: ResolvedImageProcessor,
+    media_connector: Arc<MediaConnector>,
+}
+
+/// Model metadata and tokenizer access shared by all multimodal specs.
+#[derive(Clone)]
+struct MultimodalModelContext {
+    model_id: String,
+    model_type: Option<String>,
+    config: serde_json::Value,
+    tokenizer: TokenizerResolver,
+}
+
+impl MultimodalModelContext {
+    fn metadata(&self) -> ModelMetadata<'_> {
+        ModelMetadata {
+            model_id: &self.model_id,
+            tokenizer: &self.tokenizer,
+            config: &self.config,
+        }
+    }
+
+    fn tokenizer(&self) -> &dyn Tokenizer {
+        self.tokenizer.0.as_ref()
+    }
+
+    /// Resolve a static model processor spec for one loaded model.
+    fn resolve_model_spec(&self) -> Option<&'static dyn ModelProcessorSpec> {
+        static REGISTRY: LazyLock<ModelRegistry> = LazyLock::new(ModelRegistry::new);
+        REGISTRY.lookup(&self.metadata())
+    }
+
+    /// Resolve a static image preprocessor for one loaded model.
+    fn resolve_image_processor(&self) -> Option<&'static dyn ImagePreProcessor> {
+        static REGISTRY: LazyLock<ImageProcessorRegistry> =
+            LazyLock::new(ImageProcessorRegistry::with_defaults);
+        REGISTRY.find(&self.model_id, self.model_type.as_deref())
+    }
+}
+
+/// Static model-specific prompt and tensor-layout behavior.
+#[derive(Clone)]
+struct ResolvedMultimodalSpec {
+    raw: &'static dyn ModelProcessorSpec,
+    placeholder_token: String,
+    placeholder_marker_token_id: u32,
+    placeholder_embed_token_id: u32,
+    field_layouts: HashMap<String, FieldLayout>,
+    keep_on_cpu_keys: HashSet<String>,
+}
+
+impl ResolvedMultimodalSpec {
+    fn new(raw: &'static dyn ModelProcessorSpec, context: &MultimodalModelContext) -> Result<Self> {
+        let metadata = context.metadata();
+        let placeholder_token =
+            raw.placeholder_token(&metadata).map_err(|error| multimodal!("{error}"))?;
+        // This is the rendered prompt marker, so resolve it from the token
+        // string itself. Do not use `ModelProcessorSpec::placeholder_token_id()`:
+        // for some specs that ID is the replacement vision/patch token,
+        // not necessarily the token ID of `placeholder_token`.
+        let placeholder_marker_token_id =
+            context.tokenizer().token_to_id(&placeholder_token).ok_or_else(|| {
+                multimodal!(
+                    "placeholder token `{placeholder_token}` is not in the tokenizer vocabulary"
+                )
+            })?;
+        let placeholder_embed_token_id =
+            raw.placeholder_token_id(&metadata).map_err(|error| multimodal!("{error}"))? as u32;
+
+        Ok(Self {
+            raw,
+            placeholder_token,
+            placeholder_marker_token_id,
+            placeholder_embed_token_id,
+            field_layouts: raw.field_layouts(),
+            keep_on_cpu_keys: raw.keep_on_cpu_keys().into_iter().collect(),
+        })
+    }
+
+    fn prompt_replacements(
+        &self,
+        context: &MultimodalModelContext,
+        preprocessed: &PreprocessedImages,
+    ) -> Result<Vec<PromptReplacement>> {
+        self.raw
+            .prompt_replacements(&context.metadata(), preprocessed)
+            .map_err(|error| multimodal!("{error}"))
+    }
+}
+
+/// Static image preprocessor plus its loaded config.
+#[derive(Clone)]
+struct ResolvedImageProcessor {
+    raw: &'static dyn ImagePreProcessor,
+    config: PreProcessorConfig,
+}
+
+/// Request-scoped fetched media, kept together with tracker UUID metadata.
+struct FetchedImageMedia {
+    frames: Vec<Arc<llm_multimodal::ImageFrame>>,
+    uuids: Vec<Option<String>>,
+}
+
+impl MultimodalModelInfo {
+    /// Load and resolve multimodal support from model files.
+    ///
+    /// Returns `Ok(Some(_))` only when both the model spec and image processor
+    /// are registered. File read/parse failures are real errors; unsupported
+    /// model families are logged and returned as `Ok(None)`.
+    pub fn from_paths(
+        model_id: String,
+        model_type: Option<String>,
+        config_path: Option<&Path>,
+        preprocessor_config_path: Option<&Path>,
+        tokenizer: DynTokenizer,
+    ) -> Result<Option<Self>> {
+        let config = match config_path {
+            Some(path) => {
+                let text = fs::read_to_string(path)
+                    .map_err(|error| multimodal!("failed to read config.json: {error}"))?;
+                serde_json::from_str(&text)
+                    .map_err(|error| multimodal!("failed to parse config.json: {error}"))?
+            }
+            None => serde_json::Value::Object(Default::default()),
+        };
+        let preprocessor_config = match preprocessor_config_path {
+            Some(path) => {
+                let text = fs::read_to_string(path).map_err(|error| {
+                    multimodal!("failed to read preprocessor_config.json: {error}")
+                })?;
+                PreProcessorConfig::from_json(&text).map_err(|error| {
+                    multimodal!("failed to parse preprocessor_config.json: {error}")
+                })?
+            }
+            None => PreProcessorConfig::default(),
+        };
+
+        let context = MultimodalModelContext {
+            model_id,
+            model_type,
+            config,
+            tokenizer: TokenizerResolver(tokenizer),
+        };
+
+        let Some(spec) = context.resolve_model_spec() else {
+            warn!(
+                model_id = context.model_id,
+                model_type = context.model_type,
+                "multimodal model spec is not registered; disabling multimodal support for this model"
+            );
+            return Ok(None);
+        };
+        let spec = ResolvedMultimodalSpec::new(spec, &context)?;
+
+        let Some(image_processor) = context.resolve_image_processor() else {
+            warn!(
+                model_id = context.model_id,
+                model_type = context.model_type,
+                "image processor is not registered; disabling multimodal support for this model"
+            );
+            return Ok(None);
+        };
+
+        let media_connector = Arc::new(
+            MediaConnector::new(reqwest::Client::new(), MediaConnectorConfig::default())
+                .map_err(|error| multimodal!("{error}"))?,
+        );
+
+        Ok(Some(Self {
+            context,
+            spec,
+            image_processor: ResolvedImageProcessor {
+                raw: image_processor,
+                config: preprocessor_config,
+            },
+            media_connector,
+        }))
+    }
+
+    /// Return the template-visible placeholder token for this model.
+    ///
+    /// The HF renderer uses this token while flattening image content in string
+    /// content format.
+    pub(crate) fn placeholder_token(&self) -> &str {
+        &self.spec.placeholder_token
+    }
+}
+
+/// Finalize a rendered chat prompt into text-generation input.
+///
+/// Text-only requests pass through unchanged as `Prompt::Text`. Multimodal
+/// requests are tokenized in chat, their image placeholders are expanded, and
+/// preprocessed image features are attached for engine-core transport.
+pub(crate) async fn finalize_rendered_prompt(
+    request: &ChatRequest,
+    rendered: RenderedPrompt,
+    info: Option<&MultimodalModelInfo>,
+    model_dtype: Option<ModelDtype>,
+) -> Result<(Prompt, Option<MmFeatures>)> {
+    if !request.has_multimodal() {
+        return Ok((rendered.prompt, None));
+    }
+    let info = info.ok_or(Error::UnsupportedMultimodalRenderer)?;
+    let Prompt::Text(prompt) = rendered.prompt else {
+        bail_multimodal!("multimodal chat renderer must return a text prompt before expansion");
+    };
+    let media_parts = extract_media_parts(request)?;
+    let model_dtype = model_dtype.unwrap_or_else(|| {
+        static WARN_ONCE: Once = Once::new();
+        WARN_ONCE.call_once(|| {
+            warn!(
+                "engine handshake did not report model dtype; \
+                 falling back to float32 for multimodal tensor encoding"
+            );
+        });
+        ModelDtype::Float32
+    });
+
+    let mut prompt_token_ids = info
+        .context
+        .tokenizer()
+        .encode(&prompt, request.add_special_tokens)
+        .map_err(|error| multimodal!("{error}"))?;
+    let prepared = info.prepare_multimodal(media_parts, &mut prompt_token_ids, model_dtype).await?;
+
+    Ok((Prompt::TokenIds(prompt_token_ids), Some(prepared)))
+}
+
+/// Extract image media parts from chat messages in message/content order.
+///
+/// Assistant history is skipped because generated assistant blocks are already
+/// represented as text for prompt rendering in this crate.
+fn extract_media_parts(request: &ChatRequest) -> Result<Vec<MediaContentPart>> {
+    let mut all_parts = Vec::new();
+    for message in &request.messages {
+        let content = match message {
+            ChatMessage::System { content }
+            | ChatMessage::Developer { content, .. }
+            | ChatMessage::User { content }
+            | ChatMessage::ToolResponse { content, .. } => content,
+            ChatMessage::Assistant { .. } => continue,
+        };
+        let ChatContent::Parts(parts) = content else {
+            continue;
+        };
+        for part in parts {
+            match part {
+                ChatContentPart::Text { .. } => {}
+                ChatContentPart::ImageUrl {
+                    image_url,
+                    detail,
+                    uuid,
+                } => all_parts.push(MediaContentPart::ImageUrl {
+                    url: image_url.clone(),
+                    detail: *detail,
+                    uuid: uuid.clone(),
+                }),
+            }
+        }
+    }
+    Ok(all_parts)
+}
+
+impl MultimodalModelInfo {
+    /// Run media fetch, image preprocessing, prompt expansion, and feature
+    /// build.
+    ///
+    /// `prompt_token_ids` is mutated in place because placeholder expansion
+    /// changes both the final prompt and the offsets recorded in
+    /// `PlaceholderRange`.
+    async fn prepare_multimodal(
+        &self,
+        media_parts: Vec<MediaContentPart>,
+        prompt_token_ids: &mut Vec<u32>,
+        model_dtype: ModelDtype,
+    ) -> Result<MmFeatures> {
+        if media_parts.is_empty() {
+            return Ok(Vec::new());
+        }
+        let media_parts_len = media_parts.len();
+
+        let fetched = self.fetch_images(media_parts).await?;
+        let preprocessed = self.preprocess_images(&fetched.frames).await?;
+        let replacements = self.spec.prompt_replacements(&self.context, &preprocessed)?;
+        let ranges = self.expand_prompt_tokens(prompt_token_ids, replacements)?;
+
+        let features = self.build_features(preprocessed, fetched, ranges, model_dtype)?;
+        if features.len() != media_parts_len {
+            bail_multimodal!(
+                "number of built multimodal features {} does not match number of media parts {}",
+                features.len(),
+                media_parts_len
+            );
+        }
+        Ok(features)
+    }
+
+    /// Fetch all image parts and preserve their request-order UUID metadata.
+    async fn fetch_images(&self, media_parts: Vec<MediaContentPart>) -> Result<FetchedImageMedia> {
+        let mut tracker = AsyncMultiModalTracker::new(Arc::clone(&self.media_connector));
+        for part in media_parts {
+            tracker.push_part(part).map_err(|error| multimodal!("{error}"))?;
+        }
+
+        let tracker_output = tracker.finalize().await.map_err(|error| multimodal!("{error}"))?;
+        let images = tracker_output.data.get(&Modality::Image).cloned().unwrap_or_default();
+        let uuids = tracker_output.uuids.get(&Modality::Image).cloned().unwrap_or_default();
+
+        let frames = images
+            .into_iter()
+            .map(|media| match media {
+                TrackedMedia::Image(frame) => Ok(frame),
+                _ => Err(Error::UnsupportedMultimodalContent("non-image")),
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(FetchedImageMedia { frames, uuids })
+    }
+
+    /// Preprocess fetched image frames with the model's resolved image
+    /// processor.
+    ///
+    /// The processor work is CPU-heavy relative to request wiring, so it runs
+    /// in a blocking task and returns owned tensors ready for wire
+    /// conversion.
+    async fn preprocess_images(
+        &self,
+        image_frames: &[Arc<llm_multimodal::ImageFrame>],
+    ) -> Result<PreprocessedImages> {
+        let config = self.image_processor.config.clone();
+        let processor = self.image_processor.raw;
+        let images = image_frames.iter().map(|frame| frame.data().clone()).collect::<Vec<_>>();
+
+        tokio::task::spawn_blocking(move || {
+            processor.preprocess(&images, &config).map_err(|error| multimodal!("{error}"))
+        })
+        .await
+        .map_err(|error| multimodal!("image preprocessing task failed: {error}"))?
+    }
+
+    /// Replace rendered placeholder markers with model-specific replacement
+    /// tokens.
+    ///
+    /// Replacements are consumed in order, matching the original media-part
+    /// order. The returned ranges point into the already-expanded prompt.
+    fn expand_prompt_tokens(
+        &self,
+        prompt_token_ids: &mut Vec<u32>,
+        replacements: Vec<PromptReplacement>,
+    ) -> Result<Vec<PlaceholderRange>> {
+        let mut cursor = 0;
+        let mut ranges = Vec::with_capacity(replacements.len());
+        for replacement in replacements {
+            if replacement.modality != Modality::Image {
+                bail_multimodal!(
+                    "unsupported prompt replacement modality `{}`",
+                    replacement.modality
+                );
+            }
+            let offset = find_next_token(
+                prompt_token_ids,
+                self.spec.placeholder_marker_token_id,
+                cursor,
+            )
+            .ok_or_else(|| {
+                multimodal!(
+                    "placeholder token `{}` was not found in tokenized prompt",
+                    self.spec.placeholder_token
+                )
+            })?;
+
+            if replacement.tokens.is_empty() {
+                bail_multimodal!(
+                    "placeholder token `{}` expanded to no tokens",
+                    self.spec.placeholder_token
+                );
+            }
+            let replacement_len = replacement.tokens.len();
+            let replacement_tokens =
+                replacement.tokens.iter().map(|&token| token as u32).collect::<Vec<_>>();
+            let is_embed = {
+                let mask = replacement_tokens
+                    .iter()
+                    .map(|&token| token == self.spec.placeholder_embed_token_id)
+                    .collect::<Vec<_>>();
+                WireTensor::from_bool(vec![replacement_len], mask).map_err(Error::Multimodal)?
+            };
+
+            prompt_token_ids.splice(offset..offset + 1, replacement_tokens);
+            ranges.push(PlaceholderRange {
+                offset,
+                length: replacement_len,
+                is_embed: Some(is_embed),
+            });
+            cursor = offset + replacement_len;
+        }
+        Ok(ranges)
+    }
+
+    /// Convert preprocessed image tensors into engine-core multimodal features.
+    ///
+    /// One `MmFeatureSpec` is produced per image. Tensor fields are
+    /// sliced according to the model spec's field layout declarations.
+    fn build_features(
+        &self,
+        preprocessed: PreprocessedImages,
+        images: FetchedImageMedia,
+        ranges: Vec<PlaceholderRange>,
+        model_dtype: ModelDtype,
+    ) -> Result<MmFeatures> {
+        let len = images.frames.len();
+        let tensors = tensor::collect_tensors(preprocessed, model_dtype)?;
+
+        let mut features = Vec::with_capacity(images.frames.len());
+        for (index, (frame, uuid, range)) in izip!(images.frames, images.uuids, ranges).enumerate()
+        {
+            let mut data = MmKwargsItem::new();
+            for (key, tensor) in &tensors {
+                let keep_on_cpu = self.spec.keep_on_cpu_keys.contains(key);
+                let (value, field) = match self.spec.field_layouts.get(key) {
+                    Some(FieldLayout::Batched) => (
+                        tensor.batched_value_at(index)?,
+                        MmField::Batched(MmBatchedField { keep_on_cpu }),
+                    ),
+                    Some(FieldLayout::Flat { sizes_key }) => {
+                        let sizes = tensors.get(sizes_key).ok_or_else(|| {
+                            multimodal!("flat tensor sizes key `{sizes_key}` is missing")
+                        })?;
+                        let (start, end) = tensor::flat_range_for_index(sizes, sizes_key, index)?;
+                        (
+                            tensor.flat_value_range(start, end)?,
+                            MmField::Flat(MmFlatField {
+                                slices: vec![MmSlice::Slice(SliceSpec {
+                                    start: Some(0),
+                                    stop: Some((end - start) as isize),
+                                    step: None,
+                                })],
+                                dim: 0,
+                                keep_on_cpu,
+                            }),
+                        )
+                    }
+                    None => (
+                        tensor.clone(),
+                        MmField::Shared(MmSharedField {
+                            batch_size: len,
+                            keep_on_cpu,
+                        }),
+                    ),
+                };
+
+                data.insert(
+                    key.clone(),
+                    MmFieldElem {
+                        data: Some(value.try_into()?),
+                        field,
+                    },
+                );
+            }
+
+            let hash = frame.hash.clone();
+            features.push(MmFeatureSpec {
+                data: Some(data),
+                modality: "image".to_string(),
+                identifier: uuid.unwrap_or_else(|| hash.clone()),
+                mm_position: range,
+                mm_hash: Some(hash),
+            });
+        }
+
+        Ok(features)
+    }
+}
+
+/// Find `needle` in `haystack`, starting at `start`.
+///
+/// This is intentionally order-preserving rather than a global replace: each
+/// image consumes the next placeholder occurrence.
+fn find_next_token(haystack: &[u32], needle: u32, start: usize) -> Option<usize> {
+    haystack
+        .get(start..)?
+        .iter()
+        .position(|token| *token == needle)
+        .map(|offset| start + offset)
+}
+
+/// Adapter from the frontend tokenizer trait to `llm-multimodal`.
+#[derive(Clone)]
+struct TokenizerResolver(DynTokenizer);
+
+impl TokenResolver for TokenizerResolver {
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        self.0.token_to_id(token)
+    }
+
+    fn id_to_token(&self, id: u32) -> Option<String> {
+        self.0.id_to_token(id)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use llm_multimodal::TokenId;
+    use vllm_engine_core_client::protocol::tensor::WireArrayData;
+    use vllm_text::tokenizer::{IncrementalDecoder, Tokenizer, TokenizerError};
+
+    use super::*;
+
+    const LLAMA4_IMAGE_START_ID: u32 = 200088;
+    const LLAMA4_IMAGE_END_ID: u32 = 200089;
+    const LLAMA4_IMAGE_ID: u32 = 200090;
+    const LLAMA4_PATCH_ID: u32 = 200092;
+    const LLAMA4_TILE_X_SEPARATOR_ID: u32 = 200093;
+    const LLAMA4_TILE_Y_SEPARATOR_ID: u32 = 200094;
+
+    struct TestTokenizer;
+
+    impl Tokenizer for TestTokenizer {
+        fn encode(
+            &self,
+            text: &str,
+            _add_special_tokens: bool,
+        ) -> std::result::Result<Vec<u32>, TokenizerError> {
+            Ok(match text {
+                "<|image|>" => vec![LLAMA4_IMAGE_ID],
+                text => text.bytes().map(u32::from).collect(),
+            })
+        }
+
+        fn decode(
+            &self,
+            _token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> std::result::Result<String, TokenizerError> {
+            Ok(String::new())
+        }
+
+        fn token_to_id(&self, token: &str) -> Option<u32> {
+            match token {
+                "<|image_start|>" => Some(LLAMA4_IMAGE_START_ID),
+                "<|image_end|>" => Some(LLAMA4_IMAGE_END_ID),
+                "<|image|>" => Some(LLAMA4_IMAGE_ID),
+                "<|patch|>" => Some(LLAMA4_PATCH_ID),
+                "<|tile_x_separator|>" => Some(LLAMA4_TILE_X_SEPARATOR_ID),
+                "<|tile_y_separator|>" => Some(LLAMA4_TILE_Y_SEPARATOR_ID),
+                _ => None,
+            }
+        }
+
+        fn id_to_token(&self, id: u32) -> Option<String> {
+            match id {
+                LLAMA4_IMAGE_START_ID => Some("<|image_start|>".to_string()),
+                LLAMA4_IMAGE_END_ID => Some("<|image_end|>".to_string()),
+                LLAMA4_IMAGE_ID => Some("<|image|>".to_string()),
+                LLAMA4_PATCH_ID => Some("<|patch|>".to_string()),
+                LLAMA4_TILE_X_SEPARATOR_ID => Some("<|tile_x_separator|>".to_string()),
+                LLAMA4_TILE_Y_SEPARATOR_ID => Some("<|tile_y_separator|>".to_string()),
+                _ => None,
+            }
+        }
+
+        fn create_decode_stream(
+            &self,
+            _prompt_token_ids: &[u32],
+            _skip_special_tokens: bool,
+            _min_bytes_to_buffer: usize,
+        ) -> Box<dyn IncrementalDecoder + '_> {
+            unreachable!("not used")
+        }
+    }
+
+    fn test_info(model_type: &str, config: serde_json::Value) -> MultimodalModelInfo {
+        let context = MultimodalModelContext {
+            model_id: format!("{model_type}-test"),
+            model_type: Some(model_type.to_string()),
+            config,
+            tokenizer: TokenizerResolver(Arc::new(TestTokenizer)),
+        };
+        let spec = context
+            .resolve_model_spec()
+            .unwrap_or_else(|| panic!("{model_type} spec should match"));
+        let spec = ResolvedMultimodalSpec::new(spec, &context).unwrap();
+        let raw_image_processor = context
+            .resolve_image_processor()
+            .unwrap_or_else(|| panic!("{model_type} image processor should match"));
+        let media_connector = Arc::new(
+            MediaConnector::new(reqwest::Client::new(), MediaConnectorConfig::default()).unwrap(),
+        );
+
+        MultimodalModelInfo {
+            context,
+            spec,
+            image_processor: ResolvedImageProcessor {
+                raw: raw_image_processor,
+                config: PreProcessorConfig::default(),
+            },
+            media_connector,
+        }
+    }
+
+    fn llama4_info() -> MultimodalModelInfo {
+        let config = serde_json::json!({
+            "model_type": "llama4",
+            "image_token_index": LLAMA4_PATCH_ID,
+            "vision_config": {"image_size": 336, "patch_size": 14}
+        });
+        test_info("llama4", config)
+    }
+
+    fn llama4_single_tile_replacement() -> PromptReplacement {
+        PromptReplacement::sequence(
+            Modality::Image,
+            "<|image|>",
+            vec![
+                LLAMA4_IMAGE_START_ID as TokenId,
+                LLAMA4_IMAGE_ID as TokenId,
+                LLAMA4_PATCH_ID as TokenId,
+                LLAMA4_PATCH_ID as TokenId,
+                LLAMA4_IMAGE_END_ID as TokenId,
+            ],
+        )
+    }
+
+    fn llama4_multi_tile_replacement() -> PromptReplacement {
+        PromptReplacement::sequence(
+            Modality::Image,
+            "<|image|>",
+            vec![
+                LLAMA4_IMAGE_START_ID as TokenId,
+                LLAMA4_PATCH_ID as TokenId,
+                LLAMA4_TILE_X_SEPARATOR_ID as TokenId,
+                LLAMA4_PATCH_ID as TokenId,
+                LLAMA4_TILE_Y_SEPARATOR_ID as TokenId,
+                LLAMA4_IMAGE_ID as TokenId,
+                LLAMA4_PATCH_ID as TokenId,
+                LLAMA4_IMAGE_END_ID as TokenId,
+            ],
+        )
+    }
+
+    fn assert_bool_mask(range: &PlaceholderRange, expected: &[bool]) {
+        let tensor = range.is_embed.as_ref().expect("is_embed mask");
+        assert_eq!(tensor.dtype, "bool");
+        assert_eq!(tensor.shape, vec![expected.len()]);
+        assert_eq!(
+            tensor.data,
+            WireArrayData::RawView(expected.iter().map(|value| u8::from(*value)).collect())
+        );
+    }
+
+    #[test]
+    fn expand_prompt_tokens_marks_only_llama4_patch_tokens_as_embed() {
+        let info = llama4_info();
+        let mut prompt_token_ids = vec![1, LLAMA4_IMAGE_ID, 2];
+        let replacements = vec![llama4_multi_tile_replacement()];
+
+        let ranges = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap();
+
+        assert_eq!(
+            prompt_token_ids,
+            vec![
+                1,
+                LLAMA4_IMAGE_START_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_TILE_X_SEPARATOR_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_TILE_Y_SEPARATOR_ID,
+                LLAMA4_IMAGE_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_IMAGE_END_ID,
+                2,
+            ]
+        );
+        assert_eq!(ranges[0].offset, 1);
+        assert_eq!(ranges[0].length, 8);
+        assert_bool_mask(
+            &ranges[0],
+            &[false, true, false, true, false, false, true, false],
+        );
+    }
+
+    #[test]
+    fn expand_prompt_tokens_errors_when_placeholder_missing() {
+        let info = llama4_info();
+        let mut prompt_token_ids = vec![1, 2, 3];
+        let replacements = vec![llama4_single_tile_replacement()];
+
+        let error = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap_err();
+
+        assert!(matches!(error, Error::Multimodal(message) if message.contains("not found")));
+    }
+
+    #[test]
+    fn expand_prompt_tokens_skips_llama4_image_marker_inside_replacement() {
+        let info = llama4_info();
+        let mut prompt_token_ids = vec![1, LLAMA4_IMAGE_ID, 2, LLAMA4_IMAGE_ID, 3];
+        let replacements = vec![
+            llama4_single_tile_replacement(),
+            llama4_single_tile_replacement(),
+        ];
+
+        let ranges = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap();
+
+        assert_eq!(
+            prompt_token_ids,
+            vec![
+                1,
+                LLAMA4_IMAGE_START_ID,
+                LLAMA4_IMAGE_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_IMAGE_END_ID,
+                2,
+                LLAMA4_IMAGE_START_ID,
+                LLAMA4_IMAGE_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_PATCH_ID,
+                LLAMA4_IMAGE_END_ID,
+                3,
+            ]
+        );
+        assert_eq!(ranges[0].offset, 1);
+        assert_eq!(ranges[0].length, 5);
+        assert_bool_mask(&ranges[0], &[false, false, true, true, false]);
+        assert_eq!(ranges[1].offset, 7);
+        assert_eq!(ranges[1].length, 5);
+        assert_bool_mask(&ranges[1], &[false, false, true, true, false]);
+    }
+}
diff --git a/rust/src/chat/src/multimodal/tensor.rs b/rust/src/chat/src/multimodal/tensor.rs
new file mode 100644
index 000000000000..eddf8f707e95
--- /dev/null
+++ b/rust/src/chat/src/multimodal/tensor.rs
@@ -0,0 +1,342 @@
+use std::collections::HashMap;
+
+use half::{bf16, f16};
+use llm_multimodal::{ModelSpecificValue, PreprocessedImages};
+use vllm_engine_core_client::protocol::ModelDtype;
+use vllm_engine_core_client::protocol::multimodal::MmKwargValue as ProtocolKwargValue;
+use vllm_engine_core_client::protocol::tensor::{ShapeExt as _, WireTensor};
+
+use crate::error::{Error, Result, bail_multimodal, multimodal};
+
+/// Representation for multimodal kwarg values for transformation.
+#[derive(Debug, Clone)]
+pub(super) enum KwargValue {
+    /// Float tensor with row-major flat data and shape.
+    F32Tensor { data: Vec<f32>, shape: Vec<usize> },
+    /// Float16 tensor with row-major flat data and shape.
+    F16Tensor { data: Vec<f16>, shape: Vec<usize> },
+    /// BFloat16 tensor with row-major flat data and shape.
+    Bf16Tensor { data: Vec<bf16>, shape: Vec<usize> },
+    /// Signed integer tensor with row-major flat data and shape.
+    I64Tensor { data: Vec<i64>, shape: Vec<usize> },
+    /// Unsigned integer tensor with row-major flat data and shape.
+    U32Tensor { data: Vec<u32>, shape: Vec<usize> },
+    /// Non-tensor kwarg value that is shared or copied as-is.
+    Passthrough(ProtocolKwargValue),
+}
+
+/// Collect `pixel_values` and model-specific outputs into one tensor map.
+pub(super) fn collect_tensors(
+    preprocessed: PreprocessedImages,
+    float_dtype: ModelDtype,
+) -> Result<HashMap<String, KwargValue>> {
+    let PreprocessedImages {
+        pixel_values,
+        model_specific,
+        ..
+    } = preprocessed;
+
+    let pixel_values = {
+        let shape = pixel_values.shape().to_vec();
+        let data = pixel_values.into_iter().collect();
+        KwargValue::from_f32_tensor(data, shape, float_dtype)?
+    };
+
+    let mut tensors = HashMap::new();
+    tensors.insert("pixel_values".to_string(), pixel_values);
+    for (key, value) in model_specific {
+        tensors.insert(key, KwargValue::from_model_specific(value, float_dtype)?);
+    }
+    Ok(tensors)
+}
+
+impl KwargValue {
+    fn from_model_specific(value: ModelSpecificValue, float_dtype: ModelDtype) -> Result<Self> {
+        use ProtocolKwargValue::*;
+
+        Ok(match value {
+            ModelSpecificValue::Tensor { data, shape } => {
+                Self::from_f32_tensor(data, shape, float_dtype)?
+            }
+            ModelSpecificValue::IntTensor { data, shape } => Self::I64Tensor { data, shape },
+            ModelSpecificValue::UintTensor { data, shape } => Self::U32Tensor { data, shape },
+            ModelSpecificValue::Int(value) => Self::Passthrough(Int(value)),
+            ModelSpecificValue::Float(value) => Self::Passthrough(Float(value)),
+            ModelSpecificValue::IntVec(values) => {
+                Self::Passthrough(List(values.into_iter().map(Int).collect()))
+            }
+            ModelSpecificValue::UintVec(values) => Self::Passthrough(List(
+                values.into_iter().map(|value| Int(value as i64)).collect(),
+            )),
+            ModelSpecificValue::FloatVec(values) => Self::Passthrough(List(
+                values.into_iter().map(|value| Float(value as f64)).collect(),
+            )),
+            ModelSpecificValue::TupleVec(values) => Self::Passthrough(List(
+                values
+                    .into_iter()
+                    .map(|(height, width)| List(vec![Int(height as i64), Int(width as i64)]))
+                    .collect(),
+            )),
+            ModelSpecificValue::Bool(value) => Self::Passthrough(Int(i64::from(value))),
+        })
+    }
+
+    /// Convert a float tensor to the target float dtype if needed, keeping the
+    /// same shape.
+    fn from_f32_tensor(data: Vec<f32>, shape: Vec<usize>, float_dtype: ModelDtype) -> Result<Self> {
+        match float_dtype {
+            ModelDtype::Float16 => Ok(Self::F16Tensor {
+                data: data.into_iter().map(f16::from_f32).collect(),
+                shape,
+            }),
+            ModelDtype::BFloat16 => Ok(Self::Bf16Tensor {
+                data: data.into_iter().map(bf16::from_f32).collect(),
+                shape,
+            }),
+            ModelDtype::Float32 => Ok(Self::F32Tensor { data, shape }),
+        }
+    }
+}
+
+impl TryFrom<KwargValue> for ProtocolKwargValue {
+    type Error = Error;
+
+    fn try_from(value: KwargValue) -> Result<Self> {
+        match value {
+            KwargValue::F32Tensor { data, shape } => Ok(Self::Tensor(
+                WireTensor::from_f32(shape, data).map_err(Error::Multimodal)?,
+            )),
+            KwargValue::F16Tensor { data, shape } => Ok(Self::Tensor(
+                WireTensor::from_f16(shape, data).map_err(Error::Multimodal)?,
+            )),
+            KwargValue::Bf16Tensor { data, shape } => Ok(Self::Tensor(
+                WireTensor::from_bf16(shape, data).map_err(Error::Multimodal)?,
+            )),
+            KwargValue::I64Tensor { data, shape } => Ok(Self::Tensor(
+                WireTensor::from_i64(shape, data).map_err(Error::Multimodal)?,
+            )),
+            KwargValue::U32Tensor { data, shape } => Ok(Self::Tensor(
+                WireTensor::from_u32(shape, data).map_err(Error::Multimodal)?,
+            )),
+            KwargValue::Passthrough(value) => Ok(value),
+        }
+    }
+}
+
+impl KwargValue {
+    /// Extract one image from a batched tensor field.
+    ///
+    /// Batched fields use their first axis as image index and drop that axis in
+    /// the per-feature value, matching vLLM's batched-field semantics.
+    pub(super) fn batched_value_at(&self, index: usize) -> Result<Self> {
+        match self {
+            Self::F32Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
+                Ok(Self::F32Tensor { data, shape })
+            }
+            Self::F16Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
+                Ok(Self::F16Tensor { data, shape })
+            }
+            Self::Bf16Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
+                Ok(Self::Bf16Tensor { data, shape })
+            }
+            Self::I64Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
+                Ok(Self::I64Tensor { data, shape })
+            }
+            Self::U32Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
+                Ok(Self::U32Tensor { data, shape })
+            }
+            Self::Passthrough(value) => Ok(Self::Passthrough(value.clone())),
+        }
+    }
+
+    /// Extract one image's variable-length range from a flat tensor field.
+    ///
+    /// Flat fields keep the first axis as the sliced length for this image.
+    pub(super) fn flat_value_range(&self, start: usize, end: usize) -> Result<Self> {
+        match self {
+            Self::F32Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
+                Ok(Self::F32Tensor { data, shape })
+            }
+            Self::F16Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
+                Ok(Self::F16Tensor { data, shape })
+            }
+            Self::Bf16Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
+                Ok(Self::Bf16Tensor { data, shape })
+            }
+            Self::I64Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
+                Ok(Self::I64Tensor { data, shape })
+            }
+            Self::U32Tensor { data, shape } => {
+                let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
+                Ok(Self::U32Tensor { data, shape })
+            }
+            Self::Passthrough(value) => Ok(Self::Passthrough(value.clone())),
+        }
+    }
+}
+
+/// Compute the first-axis range for one image in a flat tensor.
+///
+/// `sizes_key` names a companion tensor whose entries are cumulative slice
+/// sizes per image.
+pub(super) fn flat_range_for_index(
+    sizes: &KwargValue,
+    sizes_key: &str,
+    index: usize,
+) -> Result<(usize, usize)> {
+    let sizes = tensor_as_usize_vec(sizes)?;
+    let size = *sizes.get(index).ok_or_else(|| {
+        multimodal!("flat tensor sizes key `{sizes_key}` has no entry for image {index}")
+    })?;
+    let start = sizes[..index].iter().sum::<usize>();
+    Ok((start, start + size))
+}
+
+/// Read a tensor value as per-image sizes for flat slicing.
+fn tensor_as_usize_vec(tensor: &KwargValue) -> Result<Vec<usize>> {
+    match tensor {
+        KwargValue::I64Tensor { data, .. } => data
+            .iter()
+            .map(|value| {
+                usize::try_from(*value)
+                    .map_err(|_| multimodal!("negative flat tensor size `{value}`"))
+            })
+            .collect(),
+        KwargValue::U32Tensor { data, .. } => {
+            Ok(data.iter().map(|value| *value as usize).collect())
+        }
+        _ => Err(multimodal!("flat tensor sizes must be int64 or uint32")),
+    }
+}
+
+/// Slice a flat row-major tensor along its first axis.
+fn slice_first_axis_range<T: Clone>(
+    shape: &[usize],
+    data: &[T],
+    start: usize,
+    end: usize,
+    drop_axis: bool,
+) -> Result<(Vec<usize>, Vec<T>)> {
+    let first_dim = *shape.first().ok_or_else(|| multimodal!("tensor has no first dimension"))?;
+    if start > end || end > first_dim {
+        bail_multimodal!("invalid tensor slice {start}..{end} for first dimension {first_dim}");
+    }
+    let expected_len = shape
+        .checked_numel()
+        .ok_or_else(|| multimodal!("tensor shape {shape:?} has too many elements"))?;
+    if expected_len != data.len() {
+        bail_multimodal!(
+            "tensor shape {shape:?} expects {expected_len} elements, got {}",
+            data.len()
+        );
+    }
+    let stride = shape[1..].iter().product::<usize>();
+    let data_start = start * stride;
+    let data_end = end * stride;
+    let out_shape = if drop_axis {
+        shape[1..].to_vec()
+    } else {
+        let mut shape = shape.to_vec();
+        shape[0] = end - start;
+        shape
+    };
+    Ok((out_shape, data[data_start..data_end].to_vec()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn batched_value_at_drops_first_axis() {
+        let value = KwargValue::F32Tensor {
+            data: vec![1.0, 2.0, 3.0, 4.0],
+            shape: vec![2, 2],
+        };
+
+        let value = value.batched_value_at(1).unwrap();
+
+        assert!(matches!(
+            value,
+            KwargValue::F32Tensor { data, shape }
+                if shape == vec![2] && data == vec![3.0, 4.0]
+        ));
+    }
+
+    #[test]
+    fn flat_value_range_keeps_first_axis() {
+        let value = KwargValue::U32Tensor {
+            data: (0..10).collect(),
+            shape: vec![5, 2],
+        };
+
+        let value = value.flat_value_range(1, 3).unwrap();
+
+        assert!(matches!(
+            value,
+            KwargValue::U32Tensor { data, shape }
+                if shape == vec![2, 2] && data == vec![2, 3, 4, 5]
+        ));
+    }
+
+    #[test]
+    fn flat_range_for_index_uses_size_tensor() {
+        let sizes = KwargValue::I64Tensor {
+            data: vec![2, 3, 4],
+            shape: vec![3],
+        };
+
+        assert_eq!(
+            flat_range_for_index(&sizes, "image_grid_thw", 1).unwrap(),
+            (2, 5)
+        );
+    }
+
+    #[test]
+    fn slice_first_axis_range_errors_on_shape_data_mismatch() {
+        let error = slice_first_axis_range(&[2, 2], &[1.0_f32, 2.0, 3.0], 0, 1, true).unwrap_err();
+
+        assert!(
+            matches!(error, Error::Multimodal(message) if message.contains("expects 4 elements"))
+        );
+    }
+
+    #[test]
+    fn bfloat16_tensor_wire_uses_bfloat16_dtype() {
+        let value =
+            KwargValue::from_f32_tensor(vec![1.0, -1.0], vec![2], ModelDtype::BFloat16).unwrap();
+
+        let ProtocolKwargValue::Tensor(tensor) = ProtocolKwargValue::try_from(value).unwrap()
+        else {
+            panic!("expected tensor");
+        };
+
+        assert_eq!(tensor.dtype, "bfloat16");
+        assert_eq!(tensor.shape, vec![2]);
+        assert_eq!(tensor.data.into_raw_view().unwrap().len(), 4);
+    }
+
+    #[test]
+    fn float16_tensor_wire_uses_float16_dtype() {
+        let value =
+            KwargValue::from_f32_tensor(vec![1.0, -1.0], vec![2], ModelDtype::Float16).unwrap();
+
+        let ProtocolKwargValue::Tensor(tensor) = ProtocolKwargValue::try_from(value).unwrap()
+        else {
+            panic!("expected tensor");
+        };
+
+        assert_eq!(tensor.dtype, "float16");
+        assert_eq!(tensor.shape, vec![2]);
+        assert_eq!(tensor.data.into_raw_view().unwrap().len(), 4);
+    }
+}
diff --git a/rust/src/chat/src/output/default/mod.rs b/rust/src/chat/src/output/default/mod.rs
new file mode 100644
index 000000000000..40526a9e84ce
--- /dev/null
+++ b/rust/src/chat/src/output/default/mod.rs
@@ -0,0 +1,166 @@
+//! Default output processing pipeline.
+
+mod reasoning;
+mod tool;
+
+use std::sync::Once;
+
+use futures::{Stream, StreamExt as _};
+use tracing::info;
+use trait_set::trait_set;
+use vllm_text::tokenizer::DynTokenizer;
+
+use self::reasoning::reasoning_event_stream;
+use self::tool::tool_event_stream;
+use super::structured::structured_chat_event_stream;
+use crate::error::Result;
+use crate::output::{
+    AssistantEvent, ChatOutputProcessor, ContentEvent, DynChatEventStream,
+    DynDecodedTextEventStream,
+};
+use crate::parser::ParserSelection;
+use crate::parser::reasoning::{ReasoningParser, ReasoningParserFactory};
+use crate::parser::tool::{ToolParser, ToolParserFactory};
+use crate::request::{ChatRequest, ChatToolChoice};
+use crate::{Error, Result as ChatResult};
+
+trait_set! {
+    trait ContentEventStream = Stream<Item = Result<ContentEvent>> + Send + 'static;
+}
+
+/// Default request-scoped output processor used by Hugging Face style chat
+/// backends.
+///
+/// This implementation assumes the backend already emitted decoded text deltas,
+/// then optionally layers reasoning parsing and tool-call parsing before
+/// assembling final structured chat events.
+pub struct DefaultChatOutputProcessor {
+    reasoning_parser: Option<Box<dyn ReasoningParser>>,
+    tool_parser: Option<Box<dyn ToolParser>>,
+}
+
+impl DefaultChatOutputProcessor {
+    /// Build the default output processor and apply any parser-specific request
+    /// adjustments.
+    ///
+    /// Parser resolution happens here so that request validation, prompt
+    /// rendering, and streaming all observe the same parser-adjusted
+    /// request state.
+    pub fn new(
+        request: &mut ChatRequest,
+        model_id: &str,
+        tokenizer: DynTokenizer,
+        tool_call_parser: &ParserSelection,
+        reasoning_parser: &ParserSelection,
+    ) -> ChatResult<Self> {
+        let tool_parsing_enabled =
+            matches!(request.tool_choice, ChatToolChoice::Auto) && !request.tools.is_empty();
+        let tool_parser = if tool_parsing_enabled {
+            Some(Self::resolve_tool_parser(
+                request,
+                model_id,
+                tool_call_parser,
+            )?)
+        } else {
+            None
+        };
+        let reasoning_parser = Self::resolve_optional_reasoning_parser(
+            request,
+            model_id,
+            tokenizer,
+            reasoning_parser,
+        )?;
+
+        Ok(Self {
+            reasoning_parser,
+            tool_parser,
+        })
+    }
+
+    /// Build the plain-text-only default output processor.
+    ///
+    /// This keeps the default structured chat-event assembly but disables both
+    /// reasoning parsing and tool-call parsing completely, so that all
+    /// content is treated as opaque text.
+    pub fn plain_text_only() -> Self {
+        Self {
+            reasoning_parser: None,
+            tool_parser: None,
+        }
+    }
+
+    fn resolve_tool_parser(
+        request: &mut ChatRequest,
+        model_id: &str,
+        selection: &ParserSelection,
+    ) -> ChatResult<Box<dyn ToolParser>> {
+        let factory = ToolParserFactory::global();
+        let parser_name = match selection {
+            ParserSelection::Auto => factory.resolve_name_for_model(model_id).ok_or_else(|| {
+                Error::ParserUnavailableForModel {
+                    kind: "tool",
+                    model_id: model_id.to_string(),
+                }
+            })?,
+            ParserSelection::None => return Err(Error::ParserDisabled { kind: "tool" }),
+            ParserSelection::Explicit(name) => name.as_str(),
+        };
+
+        let parser = factory.create(parser_name, &request.tools)?;
+
+        if parser.preserve_special_tokens() {
+            request.decode_options.skip_special_tokens = false;
+        }
+
+        TOOL_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using tool parser"));
+        Ok(parser)
+    }
+
+    fn resolve_optional_reasoning_parser(
+        request: &mut ChatRequest,
+        model_id: &str,
+        tokenizer: DynTokenizer,
+        selection: &ParserSelection,
+    ) -> ChatResult<Option<Box<dyn ReasoningParser>>> {
+        let factory = ReasoningParserFactory::global();
+        let parser_name = match selection {
+            ParserSelection::Auto => factory.resolve_name_for_model(model_id),
+            ParserSelection::None => None,
+            ParserSelection::Explicit(name) => Some(name.as_str()),
+        };
+
+        let Some(parser_name) = parser_name else {
+            REASONING_PARSER_LOG_ONCE.call_once(|| info!("reasoning parsing disabled"));
+            return Ok(None);
+        };
+
+        let parser = factory.create(parser_name, tokenizer)?;
+
+        if parser.preserve_special_tokens() {
+            request.decode_options.skip_special_tokens = false;
+        }
+
+        REASONING_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using reasoning parser"));
+        Ok(Some(parser))
+    }
+}
+
+static TOOL_PARSER_LOG_ONCE: Once = Once::new();
+static REASONING_PARSER_LOG_ONCE: Once = Once::new();
+
+impl ChatOutputProcessor for DefaultChatOutputProcessor {
+    /// Transforms a raw generate-output token stream into structured chat
+    /// events through three sequential stages once text decoding has
+    /// already happened:
+    ///
+    /// 1. [`reasoning_event_stream`] — reasoning/content separation
+    /// 2. [`tool_event_stream`] — tool-call parsing
+    /// 3. [`structured_chat_event_stream`] — final block assembly
+    fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream> {
+        let reasoning = reasoning_event_stream(decoded, self.reasoning_parser);
+        let tool = tool_event_stream(reasoning, self.tool_parser);
+        let structured = structured_chat_event_stream(tool);
+
+        Ok(structured.boxed())
+    }
+}
diff --git a/rust/src/chat/src/output/default/reasoning.rs b/rust/src/chat/src/output/default/reasoning.rs
new file mode 100644
index 000000000000..b51ce41961d5
--- /dev/null
+++ b/rust/src/chat/src/output/default/reasoning.rs
@@ -0,0 +1,504 @@
+//! Adapts decoded text updates into reasoning-aware assistant deltas.
+//!
+//! This stage sits between low-level token decoding and final block assembly.
+//! It is the only place in the new pipeline that understands reasoning
+//! separation: `decoded.rs` still only produces plain text deltas, while later
+//! stages consume the semantic `Text` / `Reasoning` split emitted here.
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use futures::{StreamExt as _, pin_mut};
+use thiserror_ext::AsReport;
+use tracing::warn;
+use vllm_text::output::DecodedTextEvent;
+
+use super::ContentEvent;
+use crate::Result;
+use crate::error::Error;
+use crate::event::AssistantBlockKind;
+use crate::output::DecodedTextEventStream;
+use crate::parser::reasoning::{ReasoningDelta, ReasoningParser};
+
+/// Per-stream reasoning parsing state.
+struct ReasoningState {
+    /// Reasoning parser for the current model family.
+    parser: Box<dyn ReasoningParser>,
+    /// Whether reasoning parsing has already failed for this stream.
+    parser_failed: bool,
+}
+
+impl ReasoningState {
+    /// Create one fresh reasoning-adaptation state for a new streamed response.
+    fn new(parser: Box<dyn ReasoningParser>) -> Self {
+        Self {
+            parser,
+            parser_failed: false,
+        }
+    }
+
+    /// Convert one decoded text delta into zero or more semantic assistant
+    /// deltas.
+    fn process_delta(&mut self, delta: String) -> Vec<ContentEvent> {
+        // If the parser has already failed, skip parsing and return plain text deltas.
+        if self.parser_failed {
+            return vec![ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta,
+            }];
+        }
+
+        let mut events = Vec::new();
+
+        match self.parser.push(&delta) {
+            Ok(result) => {
+                push_reasoning_delta(&mut events, result);
+            }
+            Err(error) => {
+                if !self.parser_failed {
+                    warn!(
+                        error = %error.as_report(),
+                        "reasoning parser failed; falling back to plain text deltas"
+                    );
+                    self.parser_failed = true;
+                }
+                push_text_delta(&mut events, AssistantBlockKind::Text, delta);
+            }
+        }
+
+        events
+    }
+
+    /// Initialize parser state once prompt token IDs are available.
+    fn initialize(&mut self, prompt_token_ids: &[u32]) {
+        if self.parser_failed {
+            return;
+        }
+
+        match self.parser.initialize(prompt_token_ids) {
+            Ok(()) => {}
+            Err(error) => {
+                warn!(
+                    error = %error.as_report(),
+                    "failed to initialize reasoning parser; falling back to plain text deltas"
+                );
+                self.parser_failed = true;
+            }
+        }
+    }
+
+    /// Flush any parser-held partial delimiter state at end of stream.
+    fn finish(&mut self) -> Vec<ContentEvent> {
+        if self.parser_failed {
+            return Vec::new();
+        }
+
+        match self.parser.finish() {
+            Ok(result) => {
+                let mut events = Vec::new();
+                push_reasoning_delta(&mut events, result);
+                events
+            }
+            Err(error) => {
+                warn!(error = %error.as_report(), "failed to flush reasoning parser state");
+                Vec::new()
+            }
+        }
+    }
+}
+
+/// Push one semantic text delta if it is non-empty.
+fn push_text_delta(events: &mut Vec<ContentEvent>, kind: AssistantBlockKind, delta: String) {
+    if delta.is_empty() {
+        return;
+    }
+    events.push(ContentEvent::TextDelta { kind, delta });
+}
+
+/// Convert one parsed reasoning delta into zero or more content events.
+fn push_reasoning_delta(events: &mut Vec<ContentEvent>, delta: ReasoningDelta) {
+    if let Some(reasoning) = delta.reasoning {
+        push_text_delta(events, AssistantBlockKind::Reasoning, reasoning);
+    }
+    if let Some(content) = delta.content {
+        push_text_delta(events, AssistantBlockKind::Text, content);
+    }
+}
+
+/// Wrap one decoded-text stream into the internal reasoning event stream.
+#[try_stream]
+pub(crate) async fn reasoning_event_stream(
+    decoded_stream: impl DecodedTextEventStream,
+    reasoning_parser: Option<Box<dyn ReasoningParser>>,
+    mut y: TryYielder<ContentEvent, Error>,
+) -> Result<()> {
+    pin_mut!(decoded_stream);
+
+    // Without a parser, pass through as plain text deltas.
+    let Some(reasoning_parser) = reasoning_parser else {
+        while let Some(event) = decoded_stream.next().await.transpose()? {
+            for next in ContentEvent::from_decoded_plain_text(event) {
+                y.yield_ok(next).await;
+            }
+        }
+        return Ok(());
+    };
+
+    let mut state = ReasoningState::new(reasoning_parser);
+
+    while let Some(event) = decoded_stream.next().await.transpose()? {
+        match event {
+            DecodedTextEvent::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            } => {
+                state.initialize(&prompt_token_ids);
+                y.yield_ok(ContentEvent::Start {
+                    prompt_token_ids,
+                    prompt_logprobs,
+                })
+                .await;
+            }
+            DecodedTextEvent::TextDelta {
+                delta,
+                token_ids,
+                logprobs,
+                finished,
+            } => {
+                for next in state.process_delta(delta) {
+                    y.yield_ok(next).await;
+                }
+                if logprobs.is_some() || !token_ids.is_empty() {
+                    y.yield_ok(ContentEvent::LogprobsDelta {
+                        logprobs,
+                        token_ids,
+                    })
+                    .await;
+                }
+                if let Some(finished) = finished {
+                    for next in state.finish() {
+                        y.yield_ok(next).await;
+                    }
+                    y.yield_ok(ContentEvent::Done {
+                        prompt_token_count: finished.prompt_token_count,
+                        output_token_count: finished.output_token_count,
+                        finish_reason: finished.finish_reason,
+                        kv_transfer_params: finished.kv_transfer_params,
+                    })
+                    .await;
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+
+    use std::sync::Arc;
+
+    use futures::{StreamExt as _, stream};
+    use vllm_llm::FinishReason;
+    use vllm_text::output::{
+        DecodedLogprobs, DecodedPositionLogprobs, DecodedTextEvent, DecodedTokenLogprob,
+    };
+    use vllm_tokenizer::{DynTokenizer, Tokenizer};
+
+    use super::super::ContentEvent;
+    use super::reasoning_event_stream;
+    use crate::event::AssistantBlockKind;
+    use crate::parser::reasoning::{
+        ReasoningDelta, ReasoningError, ReasoningParser, ReasoningParserFactory, names,
+    };
+
+    struct FakeTokenizer;
+
+    impl Tokenizer for FakeTokenizer {
+        fn encode(
+            &self,
+            text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<Vec<u32>> {
+            Ok(text.chars().map(u32::from).collect())
+        }
+
+        fn decode(
+            &self,
+            token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<String> {
+            Ok(token_ids
+                .iter()
+                .map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
+                .collect())
+        }
+
+        fn token_to_id(&self, token: &str) -> Option<u32> {
+            match token {
+                "<think>" => Some(1),
+                "</think>" => Some(2),
+                _ => None,
+            }
+        }
+    }
+
+    struct FailingReasoningParser {
+        fail_next: bool,
+    }
+
+    impl ReasoningParser for FailingReasoningParser {
+        fn create(_tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>, ReasoningError>
+        where
+            Self: Sized + 'static,
+        {
+            Ok(Box::new(Self { fail_next: true }))
+        }
+
+        fn push(&mut self, _text: &str) -> Result<ReasoningDelta, ReasoningError> {
+            if self.fail_next {
+                self.fail_next = false;
+                return Err(ReasoningError::MissingToken {
+                    token: "<think>".to_string(),
+                });
+            }
+            Ok(ReasoningDelta::default())
+        }
+    }
+
+    fn test_reasoning_parser(factory: &mut ReasoningParserFactory) -> Box<dyn ReasoningParser> {
+        factory.register_parser::<FailingReasoningParser>("failing");
+
+        factory.create("failing", Arc::new(FakeTokenizer)).unwrap()
+    }
+
+    #[tokio::test]
+    async fn reasoning_parser_failure_falls_back_to_plain_text() {
+        let mut factory = ReasoningParserFactory::new();
+        let events = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![1, 2, 3].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "abc".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "def".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: Some(vllm_text::Finished {
+                    prompt_token_count: 3,
+                    output_token_count: 0,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                }),
+            }),
+        ]);
+
+        let collected = reasoning_event_stream(events, Some(test_reasoning_parser(&mut factory)))
+            .collect::<Vec<_>>()
+            .await;
+
+        let events = collected
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .expect("reasoning stream should not fail");
+
+        assert_eq!(
+            events,
+            vec![
+                ContentEvent::Start {
+                    prompt_token_ids: vec![1, 2, 3].into(),
+                    prompt_logprobs: None,
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "abc".to_string(),
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "def".to_string(),
+                },
+                ContentEvent::Done {
+                    prompt_token_count: 3,
+                    output_token_count: 0,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                },
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn reasoning_stream_preserves_logprobs_delta() {
+        let events = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![1].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "abc".to_string(),
+                token_ids: vec![],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "a".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                finished: None,
+            }),
+        ]);
+
+        let collected = reasoning_event_stream(events, None)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert_eq!(
+            collected,
+            vec![
+                ContentEvent::Start {
+                    prompt_token_ids: vec![1].into(),
+                    prompt_logprobs: None,
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "abc".to_string(),
+                },
+                ContentEvent::LogprobsDelta {
+                    logprobs: Some(DecodedLogprobs {
+                        positions: vec![DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "a".to_string(),
+                                logprob: -0.1,
+                                rank: 1,
+                            }],
+                        }],
+                    }),
+                    token_ids: vec![],
+                },
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn qwen3_parser_uses_prompt_end_marker_to_switch_to_content() {
+        let tokenizer = Arc::new(FakeTokenizer);
+        let events = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![2].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "thought ".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "done</think>OK".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: None,
+            }),
+        ]);
+
+        let factory = ReasoningParserFactory::new();
+        let collected = reasoning_event_stream(
+            events,
+            Some(factory.create(names::QWEN3, tokenizer).unwrap()),
+        )
+        .collect::<Vec<_>>()
+        .await;
+
+        let events = collected
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .expect("reasoning stream should not fail");
+
+        assert_eq!(
+            events,
+            vec![
+                ContentEvent::Start {
+                    prompt_token_ids: vec![2].into(),
+                    prompt_logprobs: None,
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "thought ".to_string(),
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "done</think>OK".to_string(),
+                },
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn qwen3_parser_tolerates_prompt_prefill_reasoning() {
+        let tokenizer = Arc::new(FakeTokenizer);
+        let events = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![1].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "thought ".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "done</think>OK".to_string(),
+                token_ids: vec![],
+                logprobs: None,
+                finished: None,
+            }),
+        ]);
+
+        let factory = ReasoningParserFactory::new();
+        let collected = reasoning_event_stream(
+            events,
+            Some(factory.create(names::QWEN3, tokenizer).unwrap()),
+        )
+        .collect::<Vec<_>>()
+        .await;
+
+        let events = collected
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .expect("reasoning stream should not fail");
+
+        assert_eq!(
+            events,
+            vec![
+                ContentEvent::Start {
+                    prompt_token_ids: vec![1].into(),
+                    prompt_logprobs: None,
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Reasoning,
+                    delta: "thought ".to_string(),
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Reasoning,
+                    delta: "done".to_string(),
+                },
+                ContentEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "OK".to_string(),
+                },
+            ]
+        );
+    }
+}
diff --git a/rust/src/chat/src/output/default/tool.rs b/rust/src/chat/src/output/default/tool.rs
new file mode 100644
index 000000000000..9ab5ee1684b8
--- /dev/null
+++ b/rust/src/chat/src/output/default/tool.rs
@@ -0,0 +1,625 @@
+//! Adapts plain assistant text deltas into tool-call-aware assistant updates.
+//!
+//! This stage runs after reasoning separation and before final block assembly.
+//! It only inspects normal assistant text, leaves reasoning deltas untouched,
+//! and translates incremental tool parsing output into internal tool-call
+//! events while preserving plain-text fallback behavior.
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use futures::{StreamExt as _, pin_mut};
+use thiserror_ext::AsReport;
+use tracing::warn;
+
+use super::{AssistantEvent, ContentEvent, ContentEventStream};
+use crate::Result;
+use crate::error::Error;
+use crate::event::AssistantBlockKind;
+use crate::output::generate_tool_call_id;
+use crate::parser::tool::{ToolCallDelta, ToolParseResult, ToolParser};
+
+/// Per-stream tool parsing state.
+struct ToolState {
+    /// Parser for the current model family.
+    parser: Box<dyn ToolParser>,
+    /// Whether tool parsing has already failed for this stream.
+    parser_failed: bool,
+    /// The parser-local index of the currently open tool call, if any.
+    // NOTE: We only allow single open tool call at a time right now, since that's what all
+    // supported parsers currently emit. Change this to a `BTreeMap` if we need to support multiple
+    // interleaved calls in the future.
+    open_call_index: Option<usize>,
+}
+
+impl ToolState {
+    /// Create one fresh tool-parsing state for a new streamed response.
+    fn new(parser: Box<dyn ToolParser>) -> Self {
+        Self {
+            parser,
+            parser_failed: false,
+            open_call_index: None,
+        }
+    }
+
+    /// Convert one semantic assistant text delta into zero or more tool-aware
+    /// internal events.
+    fn process_text_delta(
+        &mut self,
+        kind: AssistantBlockKind,
+        delta: String,
+    ) -> Result<Vec<AssistantEvent>> {
+        let mut events = Vec::new();
+
+        // Only normal assistant text is eligible for tool parsing. Reasoning
+        // blocks and plain-text fallback should pass through unchanged.
+        if kind != AssistantBlockKind::Text || self.parser_failed {
+            self.open_call_index = None;
+            events.push(AssistantEvent::TextDelta { kind, delta });
+            return Ok(events);
+        }
+
+        let parse_result = self.parser.push(&delta);
+
+        match parse_result {
+            Ok(result) => self.process_parse_result(kind, result, &mut events)?,
+            Err(error) => {
+                if !self.parser_failed {
+                    warn!(
+                        error = %error.as_report(),
+                        "tool parser failed; falling back to plain text deltas"
+                    );
+                    self.parser_failed = true;
+                }
+                self.open_call_index = None;
+                events.push(AssistantEvent::TextDelta { kind, delta });
+            }
+        }
+
+        Ok(events)
+    }
+
+    /// Apply one parsed tool result to the current stream state.
+    fn process_parse_result(
+        &mut self,
+        kind: AssistantBlockKind,
+        result: ToolParseResult,
+        events: &mut Vec<AssistantEvent>,
+    ) -> Result<()> {
+        // When we are not currently streaming a tool call, preserve plain
+        // text first and then surface any new tool call items.
+        if self.open_call_index.is_none() {
+            push_text_delta(events, kind, result.normal_text);
+            self.process_tool_items(result.calls, events)?;
+        } else {
+            // Once a tool call is open, prioritize tool deltas first. If the
+            // parser emits normal text again, close the tool call and resume
+            // plain text output.
+            self.process_tool_items(result.calls, events)?;
+            if !result.normal_text.is_empty() {
+                self.open_call_index = None;
+                push_text_delta(events, kind, result.normal_text);
+            }
+        }
+        Ok(())
+    }
+
+    /// Apply one batch of parsed tool-call deltas emitted by the parser.
+    fn process_tool_items(
+        &mut self,
+        items: Vec<ToolCallDelta>,
+        events: &mut Vec<AssistantEvent>,
+    ) -> Result<()> {
+        for item in items {
+            if let Some(name) = item.name {
+                let is_new_tool = match self.open_call_index {
+                    Some(open_call_index) => open_call_index != item.tool_index,
+                    None => true,
+                };
+                if is_new_tool {
+                    let id = generate_tool_call_id();
+                    self.open_call_index = Some(item.tool_index);
+                    events.push(AssistantEvent::ToolCallStart { id, name });
+                }
+            }
+
+            if item.arguments.is_empty() {
+                // No arguments delta to apply.
+                continue;
+            }
+            let Some(open_call_index) = self.open_call_index else {
+                return Err(Error::ToolCallStreamInvariant {
+                    message: format!(
+                        "received arguments for tool index {} before any tool-call start",
+                        item.tool_index
+                    ),
+                });
+            };
+            if open_call_index != item.tool_index {
+                return Err(Error::ToolCallStreamInvariant {
+                    message: format!(
+                        "received arguments for tool index {} while tool index {} is open",
+                        item.tool_index, open_call_index
+                    ),
+                });
+            }
+
+            events.push(AssistantEvent::ToolCallArgumentsDelta {
+                delta: item.arguments,
+            });
+        }
+        Ok(())
+    }
+
+    /// Flush parser state at end-of-stream and close any remaining open calls.
+    fn finish(&mut self) -> Result<Vec<AssistantEvent>> {
+        let mut events = Vec::new();
+
+        if self.parser_failed {
+            return Ok(events);
+        }
+
+        match self.parser.finish() {
+            Ok(result) => {
+                self.process_parse_result(AssistantBlockKind::Text, result, &mut events)?
+            }
+            Err(error) => {
+                warn!(
+                    error = %error.as_report(),
+                    "tool parser finish failed; closing open tool calls with buffered state"
+                );
+                self.parser_failed = true;
+            }
+        }
+
+        Ok(events)
+    }
+}
+
+/// Push one plain-text delta if it is non-empty.
+fn push_text_delta(events: &mut Vec<AssistantEvent>, kind: AssistantBlockKind, delta: String) {
+    if delta.is_empty() {
+        return;
+    }
+    events.push(AssistantEvent::TextDelta { kind, delta });
+}
+
+/// Wrap one semantic assistant stream into the internal tool-aware assistant
+/// stream.
+#[try_stream]
+pub(crate) async fn tool_event_stream(
+    stream: impl ContentEventStream,
+    parser: Option<Box<dyn ToolParser>>,
+    mut y: TryYielder<AssistantEvent, Error>,
+) -> Result<()> {
+    // Without a parser, pass through the input stream unchanged.
+    let Some(parser) = parser else {
+        pin_mut!(stream);
+        while let Some(event) = stream.next().await.transpose()? {
+            y.yield_ok(event.into()).await;
+        }
+        return Ok(());
+    };
+
+    pin_mut!(stream);
+    let mut state = ToolState::new(parser);
+
+    while let Some(event) = stream.next().await.transpose()? {
+        match event {
+            ContentEvent::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            } => {
+                y.yield_ok(AssistantEvent::Start {
+                    prompt_token_ids,
+                    prompt_logprobs,
+                })
+                .await;
+            }
+            ContentEvent::TextDelta { kind, delta } => {
+                for next in state.process_text_delta(kind, delta)? {
+                    y.yield_ok(next).await;
+                }
+            }
+            ContentEvent::LogprobsDelta {
+                logprobs,
+                token_ids,
+            } => {
+                y.yield_ok(AssistantEvent::LogprobsDelta {
+                    logprobs,
+                    token_ids,
+                })
+                .await;
+            }
+            ContentEvent::Done {
+                prompt_token_count,
+                output_token_count,
+                finish_reason,
+                kv_transfer_params,
+            } => {
+                for next in state.finish()? {
+                    y.yield_ok(next).await;
+                }
+
+                y.yield_ok(AssistantEvent::Done {
+                    prompt_token_count,
+                    output_token_count,
+                    finish_reason,
+                    kv_transfer_params,
+                })
+                .await;
+            }
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+
+    use futures::{StreamExt as _, stream};
+    use vllm_llm::FinishReason;
+    use vllm_text::{DecodedLogprobs, DecodedPositionLogprobs, DecodedTokenLogprob};
+    use vllm_tool_parser::Result;
+
+    use super::super::{AssistantEvent, ContentEvent};
+    use super::tool_event_stream;
+    use crate::error::Error;
+    use crate::event::{AssistantBlockKind, AssistantMessageExt as _};
+    use crate::output::structured::structured_chat_event_stream;
+    use crate::parser::tool::{ToolParseResult, ToolParser, ToolParserError};
+    use crate::request::ChatTool;
+    use crate::stream::ChatEventStream;
+
+    struct FailingParser {
+        fail_next: bool,
+    }
+
+    struct ScriptedParser {
+        push_results: Vec<ToolParseResult>,
+        finish_result: ToolParseResult,
+    }
+
+    impl ToolParser for FailingParser {
+        fn create(_tools: &[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>
+        where
+            Self: Sized + 'static,
+        {
+            Ok(Box::new(Self { fail_next: false }))
+        }
+
+        fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
+            if self.fail_next {
+                self.fail_next = false;
+                return Err(ToolParserError::ParsingFailed {
+                    message: "boom".to_string(),
+                });
+            }
+
+            Ok(ToolParseResult::default())
+        }
+    }
+
+    impl ToolParser for ScriptedParser {
+        fn create(_tools: &[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>
+        where
+            Self: Sized + 'static,
+        {
+            Ok(Box::new(Self {
+                push_results: Vec::new(),
+                finish_result: ToolParseResult::default(),
+            }))
+        }
+
+        fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
+            Ok(self.push_results.pop().unwrap_or_default())
+        }
+
+        fn finish(&mut self) -> Result<ToolParseResult> {
+            Ok(std::mem::take(&mut self.finish_result))
+        }
+    }
+
+    #[tokio::test]
+    async fn tool_parser_failure_falls_back_to_plain_text() {
+        let events = stream::iter(vec![
+            Ok(ContentEvent::Start {
+                prompt_token_ids: vec![1, 2, 3].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "abc".to_string(),
+            }),
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "def".to_string(),
+            }),
+            Ok(ContentEvent::Done {
+                prompt_token_count: 3,
+                output_token_count: 0,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let collected =
+            tool_event_stream(events, Some(Box::new(FailingParser { fail_next: true })))
+                .collect::<Vec<_>>()
+                .await;
+
+        let events = collected
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .expect("tool stream should not fail");
+
+        assert_eq!(
+            events,
+            vec![
+                AssistantEvent::Start {
+                    prompt_token_ids: vec![1, 2, 3].into(),
+                    prompt_logprobs: None,
+                },
+                AssistantEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "abc".to_string(),
+                },
+                AssistantEvent::TextDelta {
+                    kind: AssistantBlockKind::Text,
+                    delta: "def".to_string(),
+                },
+                AssistantEvent::Done {
+                    prompt_token_count: 3,
+                    output_token_count: 0,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                },
+            ]
+        );
+
+        let message = ChatEventStream::new(
+            "req_fallback".to_string(),
+            Box::pin(structured_chat_event_stream(stream::iter(
+                events.into_iter().map(Ok),
+            ))),
+        )
+        .collect_message()
+        .await
+        .expect("collect_message should succeed");
+        assert_eq!(message.message.text(), "abcdef");
+        assert!(message.message.tool_calls().next().is_none());
+    }
+
+    #[tokio::test]
+    async fn tool_stream_preserves_logprobs_delta() {
+        let events = stream::iter(vec![
+            Ok(ContentEvent::Start {
+                prompt_token_ids: vec![1].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ContentEvent::LogprobsDelta {
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "a".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                token_ids: vec![],
+            }),
+            Ok(ContentEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 0,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+        let events = tool_event_stream(events, Some(Box::new(FailingParser { fail_next: false })))
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert_eq!(
+            events,
+            vec![
+                AssistantEvent::Start {
+                    prompt_token_ids: vec![1].into(),
+                    prompt_logprobs: None,
+                },
+                AssistantEvent::LogprobsDelta {
+                    logprobs: Some(DecodedLogprobs {
+                        positions: vec![DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "a".to_string(),
+                                logprob: -0.2,
+                                rank: 1,
+                            }],
+                        }],
+                    }),
+                    token_ids: vec![],
+                },
+                AssistantEvent::Done {
+                    prompt_token_count: 1,
+                    output_token_count: 0,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                },
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn tool_stream_rejects_interleaved_tool_indices() {
+        let events = stream::iter(vec![
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "ignored".to_string(),
+            }),
+            Ok(ContentEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let parser = ScriptedParser {
+            push_results: vec![ToolParseResult {
+                normal_text: String::new(),
+                calls: vec![
+                    crate::parser::tool::ToolCallDelta {
+                        tool_index: 0,
+                        name: Some("first".to_string()),
+                        arguments: String::new(),
+                    },
+                    crate::parser::tool::ToolCallDelta {
+                        tool_index: 1,
+                        name: None,
+                        arguments: "{}".to_string(),
+                    },
+                ],
+            }],
+            finish_result: ToolParseResult::default(),
+        };
+
+        let err = tool_event_stream(events, Some(Box::new(parser)))
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .find_map(|result| result.err())
+            .expect("expected invariant error");
+
+        assert!(matches!(err, Error::ToolCallStreamInvariant { .. }));
+    }
+
+    #[tokio::test]
+    async fn tool_stream_resets_open_tool_when_normal_text_interrupts_it() {
+        let events = stream::iter(vec![
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "start".to_string(),
+            }),
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "text".to_string(),
+            }),
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "args".to_string(),
+            }),
+        ]);
+
+        let parser = ScriptedParser {
+            push_results: vec![
+                ToolParseResult {
+                    normal_text: String::new(),
+                    calls: vec![crate::parser::tool::ToolCallDelta {
+                        tool_index: 0,
+                        name: None,
+                        arguments: "}".to_string(),
+                    }],
+                },
+                ToolParseResult {
+                    normal_text: "plain text".to_string(),
+                    calls: Vec::new(),
+                },
+                ToolParseResult {
+                    normal_text: String::new(),
+                    calls: vec![crate::parser::tool::ToolCallDelta {
+                        tool_index: 0,
+                        name: Some("first".to_string()),
+                        arguments: "{".to_string(),
+                    }],
+                },
+            ],
+            finish_result: ToolParseResult::default(),
+        };
+
+        let err = tool_event_stream(events, Some(Box::new(parser)))
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .find_map(|result| result.err())
+            .expect("expected invariant error");
+
+        assert!(matches!(
+            err,
+            Error::ToolCallStreamInvariant { message }
+                if message == "received arguments for tool index 0 before any tool-call start"
+        ));
+    }
+
+    #[tokio::test]
+    async fn tool_stream_emits_start_and_args_for_terminal_text() {
+        let events = stream::iter(vec![
+            Ok(ContentEvent::Start {
+                prompt_token_ids: vec![1].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ContentEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "ignored".to_string(),
+            }),
+            Ok(ContentEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let parser = ScriptedParser {
+            push_results: vec![ToolParseResult {
+                normal_text: String::new(),
+                calls: vec![
+                    crate::parser::tool::ToolCallDelta {
+                        tool_index: 0,
+                        name: Some("first".to_string()),
+                        arguments: r#"{"a":1}"#.to_string(),
+                    },
+                    crate::parser::tool::ToolCallDelta {
+                        tool_index: 1,
+                        name: Some("second".to_string()),
+                        arguments: r#"{"b":2}"#.to_string(),
+                    },
+                ],
+            }],
+            finish_result: ToolParseResult::default(),
+        };
+
+        let events = tool_event_stream(events, Some(Box::new(parser)))
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert!(matches!(events[1], AssistantEvent::ToolCallStart { .. }));
+        assert!(matches!(
+            events[2],
+            AssistantEvent::ToolCallArgumentsDelta { .. }
+        ));
+        assert!(matches!(events[3], AssistantEvent::ToolCallStart { .. }));
+        assert!(matches!(
+            events[4],
+            AssistantEvent::ToolCallArgumentsDelta { .. }
+        ));
+        let collected = ChatEventStream::new(
+            "req_final_only".to_string(),
+            Box::pin(structured_chat_event_stream(stream::iter(
+                events.into_iter().map(Ok),
+            ))),
+        )
+        .collect_message()
+        .await
+        .unwrap();
+        let tool_calls = collected.message.tool_calls().collect::<Vec<_>>();
+        assert_eq!(tool_calls.len(), 2);
+        assert_eq!(tool_calls[0].name, "first");
+        assert_eq!(tool_calls[1].name, "second");
+    }
+}
diff --git a/rust/src/chat/src/output/harmony/mod.rs b/rust/src/chat/src/output/harmony/mod.rs
new file mode 100644
index 000000000000..5dc6bc311856
--- /dev/null
+++ b/rust/src/chat/src/output/harmony/mod.rs
@@ -0,0 +1,430 @@
+//! Native Harmony output processing for `gpt_oss`.
+//!
+//! Unlike the default text-first pipeline, this processor consumes
+//! `DecodedTextEvent` token IDs directly and lets the official `openai-harmony`
+//! parser recover the structured assistant message shape at token granularity.
+
+use std::sync::LazyLock;
+
+use anyhow::Context;
+use asynk_strim_attr::{TryYielder, try_stream};
+use futures::StreamExt as _;
+use openai_harmony::chat::{Content as HarmonyContent, Message as HarmonyMessage, Role};
+use openai_harmony::{
+    HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
+};
+use thiserror_ext::AsReport;
+use vllm_text::output::DecodedTextEvent;
+
+use crate::Result as ChatResult;
+use crate::error::{Error, Result};
+use crate::event::AssistantBlockKind;
+use crate::output::{
+    AssistantEvent, ChatOutputProcessor, DynChatEventStream, DynDecodedTextEventStream,
+    generate_tool_call_id,
+};
+use crate::parser::ParserSelection;
+use crate::request::ChatRequest;
+
+/// Request-scoped Harmony output processor used for `model_type == "gpt_oss"`.
+///
+/// This processor keeps the existing northbound `ChatEvent` shape, but swaps
+/// the parsed-assistant backend from generic text/reasoning/tool parsers to the
+/// official Harmony token parser.
+#[derive(Debug)]
+pub struct HarmonyChatOutputProcessor {
+    encoding: &'static HarmonyEncoding,
+    tool_calls_enabled: bool,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct HarmonyGroupKey {
+    serial: usize,
+    channel: Option<String>,
+    recipient: Option<String>,
+}
+
+#[derive(Debug)]
+struct HarmonyGroup {
+    key: HarmonyGroupKey,
+    text: String,
+}
+
+#[derive(Debug)]
+struct OpenHarmonyToolCall {
+    recipient: String,
+}
+
+struct HarmonyState {
+    /// Incremental Harmony parser over assistant token IDs.
+    parser: StreamableParser,
+    /// Whether tool-call content should surface as structured tool events.
+    tool_calls_enabled: bool,
+    /// Count of completed visible assistant messages for newline insertion.
+    completed_visible_messages: usize,
+    /// Count of completed reasoning messages for newline insertion.
+    completed_reasoning_messages: usize,
+    /// The current visible text/reasoning group, if any.
+    current_text_group: Option<HarmonyGroupKey>,
+    /// The currently open Harmony tool recipient, if any.
+    open_tool_call: Option<OpenHarmonyToolCall>,
+}
+
+impl HarmonyChatOutputProcessor {
+    /// Build one request-scoped Harmony processor after backend policy checks.
+    pub fn new(request: &ChatRequest) -> ChatResult<Self> {
+        Ok(Self {
+            encoding: harmony_encoding()?,
+            tool_calls_enabled: request.tool_parsing_enabled(),
+        })
+    }
+}
+
+/// Validate that the generic parser selections are compatible with native
+/// Harmony output parsing.
+///
+/// `gpt_oss` uses a model-specific token-level parser, so any generic
+/// reasoning/tool parser override is rejected instead of being silently
+/// ignored.
+pub(crate) fn validate_harmony_parser_overrides(
+    tool_call_parser: &ParserSelection,
+    reasoning_parser: &ParserSelection,
+) -> ChatResult<()> {
+    validate_harmony_override("tool", tool_call_parser)?;
+    validate_harmony_override("reasoning", reasoning_parser)?;
+    Ok(())
+}
+
+fn validate_harmony_override(kind: &'static str, selection: &ParserSelection) -> ChatResult<()> {
+    if matches!(selection, ParserSelection::Auto) {
+        return Ok(());
+    }
+
+    Err(Error::HarmonyParserOverrideUnsupported {
+        kind,
+        selection: selection.to_string(),
+    })
+}
+
+impl ChatOutputProcessor for HarmonyChatOutputProcessor {
+    fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream> {
+        let assistant =
+            harmony_assistant_event_stream(decoded, self.encoding, self.tool_calls_enabled);
+        Ok(crate::output::structured::structured_chat_event_stream(assistant).boxed())
+    }
+}
+
+impl HarmonyState {
+    /// Create one fresh Harmony streaming state for a new assistant response.
+    fn new(encoding: HarmonyEncoding, tool_calls_enabled: bool) -> Result<Self> {
+        Ok(Self {
+            parser: StreamableParser::new(encoding, Some(Role::Assistant))
+                .map_err(harmony_output_parsing_error)?,
+            tool_calls_enabled,
+            completed_visible_messages: 0,
+            completed_reasoning_messages: 0,
+            current_text_group: None,
+            open_tool_call: None,
+        })
+    }
+
+    fn process_token_ids(&mut self, token_ids: &[u32]) -> Result<Vec<AssistantEvent>> {
+        let mut events = Vec::new();
+        let mut pending_group: Option<HarmonyGroup> = None;
+
+        for &token_id in token_ids {
+            let completed_before = self.parser.messages().len();
+            self.parser.process(token_id).map_err(harmony_output_parsing_error)?;
+            let completed_after = self.parser.messages().len();
+
+            if let Some(delta) = self
+                .parser
+                .last_content_delta()
+                .map_err(harmony_output_parsing_error)?
+                .filter(|delta| !delta.is_empty())
+            {
+                let key = HarmonyGroupKey {
+                    serial: completed_after,
+                    channel: self.parser.current_channel(),
+                    recipient: self.parser.current_recipient(),
+                };
+
+                match pending_group.as_mut() {
+                    Some(group) if group.key == key => group.text.push_str(&delta),
+                    _ => {
+                        if let Some(group) = pending_group.take() {
+                            self.emit_group(group, &mut events);
+                        }
+                        pending_group = Some(HarmonyGroup { key, text: delta });
+                    }
+                }
+            }
+
+            if completed_after > completed_before {
+                if let Some(group) = pending_group.take() {
+                    self.emit_group(group, &mut events);
+                }
+
+                for serial in completed_before..completed_after {
+                    let key = {
+                        let message = &self.parser.messages()[serial];
+                        HarmonyGroupKey {
+                            serial,
+                            channel: message.channel.clone(),
+                            recipient: message.recipient.clone(),
+                        }
+                    };
+                    self.handle_completed_message(key);
+                }
+            }
+        }
+
+        if let Some(group) = pending_group {
+            self.emit_group(group, &mut events);
+        }
+
+        Ok(events)
+    }
+
+    /// Flush Harmony parser state at EOS and emit any newly finalized assistant
+    /// events.
+    fn process_eos(&mut self) -> Result<Vec<AssistantEvent>> {
+        let completed_before = self.parser.messages().len();
+        let pending_key = HarmonyGroupKey {
+            serial: completed_before,
+            channel: self.parser.current_channel(),
+            recipient: self.parser.current_recipient(),
+        };
+        let pending_content =
+            self.parser.current_content().map_err(harmony_output_parsing_error)?;
+
+        self.parser.process_eos().map_err(harmony_output_parsing_error)?;
+
+        let completed_after = self.parser.messages().len();
+        let mut events = Vec::new();
+
+        if completed_after == completed_before {
+            return Ok(events);
+        }
+
+        let final_message = &self.parser.messages()[completed_before];
+        let final_text = harmony_message_text(final_message);
+        let tail = final_text.strip_prefix(&pending_content).unwrap_or(final_text).to_string();
+        if !tail.is_empty() {
+            self.emit_group(
+                HarmonyGroup {
+                    key: pending_key,
+                    text: tail,
+                },
+                &mut events,
+            );
+        }
+
+        for serial in completed_before..completed_after {
+            let key = {
+                let message = &self.parser.messages()[serial];
+                HarmonyGroupKey {
+                    serial,
+                    channel: message.channel.clone(),
+                    recipient: message.recipient.clone(),
+                }
+            };
+            self.handle_completed_message(key);
+        }
+
+        Ok(events)
+    }
+
+    /// Flush one coalesced Harmony content group into internal assistant
+    /// events.
+    fn emit_group(&mut self, group: HarmonyGroup, events: &mut Vec<AssistantEvent>) {
+        let channel = group.key.channel.as_deref();
+        let recipient = group.key.recipient.as_deref();
+
+        if let Some(kind) = text_block_kind(channel, recipient) {
+            self.open_tool_call = None;
+
+            if self.current_text_group.as_ref() != Some(&group.key) {
+                let needs_newline = match kind {
+                    AssistantBlockKind::Text => self.completed_visible_messages > 0,
+                    AssistantBlockKind::Reasoning => self.completed_reasoning_messages > 0,
+                    AssistantBlockKind::ToolCall => false,
+                };
+
+                if needs_newline {
+                    events.push(AssistantEvent::TextDelta {
+                        kind,
+                        delta: "\n".to_string(),
+                    });
+                }
+
+                self.current_text_group = Some(group.key.clone());
+            }
+
+            events.push(AssistantEvent::TextDelta {
+                kind,
+                delta: group.text,
+            });
+            return;
+        }
+
+        self.current_text_group = None;
+
+        let Some(tool_name) = tool_name(channel, recipient) else {
+            return;
+        };
+        if !self.tool_calls_enabled {
+            return;
+        }
+
+        let recipient = recipient.expect("tool groups always have recipient").to_string();
+        let opens_same_call = match self.open_tool_call.as_ref() {
+            Some(open_call) => open_call.recipient == recipient,
+            None => false,
+        };
+        if !opens_same_call {
+            let id = generate_tool_call_id();
+            self.open_tool_call = Some(OpenHarmonyToolCall { recipient });
+            events.push(AssistantEvent::ToolCallStart {
+                id,
+                name: tool_name.to_string(),
+            });
+        }
+
+        if !group.text.is_empty() {
+            events.push(AssistantEvent::ToolCallArgumentsDelta { delta: group.text });
+        }
+    }
+
+    /// Update newline and open-tool state after one Harmony message completes.
+    fn handle_completed_message(&mut self, key: HarmonyGroupKey) {
+        if self.current_text_group.as_ref() == Some(&key) {
+            self.current_text_group = None;
+        }
+
+        let channel = key.channel.as_deref();
+        let recipient = key.recipient.as_deref();
+        let kind = text_block_kind(channel, recipient);
+
+        if kind == Some(AssistantBlockKind::Text) {
+            self.completed_visible_messages += 1;
+        } else if kind == Some(AssistantBlockKind::Reasoning) {
+            self.completed_reasoning_messages += 1;
+        } else if tool_name(channel, recipient).is_some() {
+            self.open_tool_call = None;
+        }
+    }
+}
+
+/// Convert decoded token updates into internal assistant events with Harmony
+/// parsing.
+#[try_stream]
+async fn harmony_assistant_event_stream(
+    decoded: DynDecodedTextEventStream,
+    encoding: &'static HarmonyEncoding,
+    tool_calls_enabled: bool,
+    mut y: TryYielder<AssistantEvent, Error>,
+) -> Result<()> {
+    let mut state = HarmonyState::new(encoding.clone(), tool_calls_enabled)?;
+    futures::pin_mut!(decoded);
+
+    while let Some(event) = decoded.next().await.transpose()? {
+        match event {
+            DecodedTextEvent::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            } => {
+                y.yield_ok(AssistantEvent::Start {
+                    prompt_token_ids,
+                    prompt_logprobs,
+                })
+                .await;
+            }
+            DecodedTextEvent::TextDelta {
+                delta: _, // harmony takes raw token IDs as input, so we ignore text deltas here
+                token_ids,
+                logprobs,
+                finished,
+            } => {
+                for event in state.process_token_ids(&token_ids)? {
+                    y.yield_ok(event).await;
+                }
+
+                if finished.is_some() {
+                    for event in state.process_eos()? {
+                        y.yield_ok(event).await;
+                    }
+                }
+
+                if logprobs.is_some() || !token_ids.is_empty() {
+                    y.yield_ok(AssistantEvent::LogprobsDelta {
+                        logprobs,
+                        token_ids,
+                    })
+                    .await;
+                }
+
+                if let Some(finished) = finished {
+                    y.yield_ok(AssistantEvent::Done {
+                        prompt_token_count: finished.prompt_token_count,
+                        output_token_count: finished.output_token_count,
+                        finish_reason: finished.finish_reason,
+                        kv_transfer_params: finished.kv_transfer_params,
+                    })
+                    .await;
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Lazily load the shared GPT-OSS Harmony encoding once per process.
+fn harmony_encoding() -> Result<&'static HarmonyEncoding> {
+    static ENCODING: LazyLock<anyhow::Result<HarmonyEncoding>> = LazyLock::new(|| {
+        load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss)
+            .context("failed to load harmony encoding for gpt-oss")
+    });
+
+    ENCODING.as_ref().map_err(|error| Error::HarmonyOutputParsing {
+        error: error.to_report_string().into(),
+    })
+}
+
+fn harmony_output_parsing_error(
+    error: impl Into<Box<dyn std::error::Error + Send + Sync>>,
+) -> Error {
+    Error::HarmonyOutputParsing {
+        error: error.into(),
+    }
+}
+
+/// Return the decoded text payload from one parsed Harmony message.
+fn harmony_message_text(message: &HarmonyMessage) -> &str {
+    let [HarmonyContent::Text(text)] = message.content.as_slice() else {
+        unreachable!("Harmony parser emits one text content block per parsed message")
+    };
+    &text.text
+}
+
+/// Map one Harmony `(channel, recipient)` pair to a visible assistant block
+/// kind.
+fn text_block_kind(channel: Option<&str>, recipient: Option<&str>) -> Option<AssistantBlockKind> {
+    match (channel, recipient) {
+        (Some("final"), _) => Some(AssistantBlockKind::Text),
+        (Some("analysis"), None) => Some(AssistantBlockKind::Reasoning),
+        (Some("commentary"), None) => Some(AssistantBlockKind::Text),
+        _ => None,
+    }
+}
+
+/// Extract the tool name from a Harmony tool-recipient field, if present.
+fn tool_name<'a>(channel: Option<&str>, recipient: Option<&'a str>) -> Option<&'a str> {
+    match (channel, recipient) {
+        (Some("commentary" | "analysis"), Some(recipient)) => recipient.strip_prefix("functions."),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/chat/src/output/harmony/tests.rs b/rust/src/chat/src/output/harmony/tests.rs
new file mode 100644
index 000000000000..fe42542b4736
--- /dev/null
+++ b/rust/src/chat/src/output/harmony/tests.rs
@@ -0,0 +1,351 @@
+//! Harmony output tests share the upstream `openai-harmony` tiktoken cache.
+//!
+//! Use a file lock for tests that load the encoding so `cargo nextest` cannot
+//! start multiple processes that concurrently populate the same cache file.
+
+use std::sync::Arc;
+
+use futures::executor::block_on;
+use futures::{TryStreamExt as _, stream};
+use openai_harmony::chat::{Message, Role};
+use serial_test::file_serial;
+use vllm_text::output::{DecodedLogprobs, DecodedPositionLogprobs, DecodedTextEvent, Finished};
+
+use super::*;
+use crate::output::ChatOutputProcessor;
+use crate::request::{ChatRequest, ChatTool, ChatToolChoice};
+use crate::{AssistantMessageExt, ChatEvent, FinishReason};
+
+fn assistant_prefix() -> Vec<u32> {
+    harmony_encoding()
+        .unwrap()
+        .render_conversation_for_completion(std::iter::empty::<&Message>(), Role::Assistant, None)
+        .unwrap()
+}
+
+fn completion_tokens(messages: &[Message]) -> Vec<u32> {
+    let encoding = harmony_encoding().unwrap();
+    let prefix = assistant_prefix();
+    let rendered = encoding.render_conversation(messages.iter(), None).unwrap();
+    assert!(rendered.starts_with(&prefix));
+    rendered[prefix.len()..].to_vec()
+}
+
+fn text_message(channel: &str, text: &str) -> Message {
+    Message::from_role_and_content(Role::Assistant, text).with_channel(channel)
+}
+
+fn tool_message(name: &str, arguments: &str, channel: &str) -> Message {
+    Message::from_role_and_content(Role::Assistant, arguments)
+        .with_channel(channel)
+        .with_recipient(format!("functions.{name}"))
+        .with_content_type("json")
+}
+
+fn decoded_start() -> DecodedTextEvent {
+    DecodedTextEvent::Start {
+        prompt_token_ids: Arc::<[u32]>::from([]),
+        prompt_logprobs: None,
+    }
+}
+
+fn finished() -> Finished {
+    Finished {
+        prompt_token_count: 0,
+        output_token_count: 0,
+        finish_reason: FinishReason::stop_eos(),
+        kv_transfer_params: None,
+    }
+}
+
+async fn collect_events(
+    processor: HarmonyChatOutputProcessor,
+    events: Vec<DecodedTextEvent>,
+) -> Vec<ChatEvent> {
+    Box::new(processor)
+        .process(Box::pin(stream::iter(events.into_iter().map(Ok))))
+        .unwrap()
+        .try_collect()
+        .await
+        .unwrap()
+}
+
+fn request_with_tools() -> ChatRequest {
+    ChatRequest {
+        tool_choice: ChatToolChoice::Auto,
+        tools: vec![ChatTool {
+            name: "get_weather".to_string(),
+            description: Some("Get weather".to_string()),
+            parameters: serde_json::json!({
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"]
+            }),
+            strict: None,
+        }],
+        ..ChatRequest::for_test()
+    }
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn interrupted_final_message_is_preserved() {
+    let tokens = completion_tokens(&[text_message("final", "hello")]);
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens[..tokens.len() - 1].to_vec(),
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    assert_eq!(
+        events.last(),
+        Some(&ChatEvent::Done {
+            message: crate::AssistantMessage {
+                content: vec![crate::AssistantContentBlock::Text {
+                    text: "hello".to_string(),
+                }],
+            },
+            prompt_token_count: 0,
+            output_token_count: 0,
+            finish_reason: FinishReason::stop_eos(),
+            kv_transfer_params: None,
+        })
+    );
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn eos_flush_preserves_trailing_replacement_text() {
+    let mut tokens = completion_tokens(&[text_message("final", "Hi")]);
+    tokens.pop();
+    tokens.push(u32::MAX);
+
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens,
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    let ChatEvent::Done { message, .. } = events.last().unwrap() else {
+        panic!("expected done");
+    };
+    assert_eq!(message.text(), format!("Hi{}", char::REPLACEMENT_CHARACTER));
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn interrupted_analysis_message_is_preserved() {
+    let tokens = completion_tokens(&[text_message("analysis", "think")]);
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens[..tokens.len() - 1].to_vec(),
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    assert_eq!(
+        events.last(),
+        Some(&ChatEvent::Done {
+            message: crate::AssistantMessage {
+                content: vec![crate::AssistantContentBlock::Reasoning {
+                    text: "think".to_string(),
+                }],
+            },
+            prompt_token_count: 0,
+            output_token_count: 0,
+            finish_reason: FinishReason::stop_eos(),
+            kv_transfer_params: None,
+        })
+    );
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn commentary_preamble_is_visible_but_commentary_tool_payload_is_not() {
+    let tokens = completion_tokens(&[
+        text_message("commentary", "Let me check."),
+        tool_message("get_weather", r#"{"city":"Paris"}"#, "commentary"),
+    ]);
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&request_with_tools()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens,
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    let done = events.last().unwrap();
+    let ChatEvent::Done { message, .. } = done else {
+        panic!("expected done");
+    };
+    assert_eq!(message.text(), "Let me check.");
+    assert_eq!(message.tool_calls().count(), 1);
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn multiple_messages_get_newline_separators() {
+    let tokens = completion_tokens(&[
+        text_message("analysis", "first think"),
+        text_message("analysis", "second think"),
+        text_message("final", "first answer"),
+        text_message("final", "second answer"),
+    ]);
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens,
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    let ChatEvent::Done { message, .. } = events.last().unwrap() else {
+        panic!("expected done");
+    };
+    assert_eq!(
+        message.reasoning().as_deref(),
+        Some("first think\nsecond think")
+    );
+    assert_eq!(message.text(), "first answer\nsecond answer");
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn tool_calls_stream_arguments_and_finish_with_local_id_shape() {
+    let tokens = completion_tokens(&[tool_message(
+        "get_weather",
+        r#"{"city":"Paris"}"#,
+        "commentary",
+    )]);
+    let midpoint = tokens.len() / 2;
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&request_with_tools()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens[..midpoint].to_vec(),
+                logprobs: None,
+                finished: None,
+            },
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens[midpoint..].to_vec(),
+                logprobs: None,
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    let mut saw_start = None;
+    let mut saw_args = String::new();
+    let mut saw_end = None;
+    for event in &events {
+        match event {
+            ChatEvent::ToolCallStart { id, name, .. } => {
+                assert!(id.starts_with("call_"));
+                assert_eq!(name, "get_weather");
+                saw_start = Some(id.clone());
+            }
+            ChatEvent::ToolCallArgumentsDelta { delta, .. } => saw_args.push_str(delta),
+            ChatEvent::ToolCallEnd { call, .. } => {
+                saw_end = Some(call.clone());
+            }
+            _ => {}
+        }
+    }
+
+    let start_id = saw_start.expect("tool start");
+    assert_eq!(saw_args, r#"{"city":"Paris"}"#);
+    let end = saw_end.expect("tool end");
+    assert_eq!(end.id, start_id);
+    assert_eq!(end.arguments, r#"{"city":"Paris"}"#);
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn semantic_events_precede_same_update_logprobs() {
+    let tokens = completion_tokens(&[text_message("final", "hello")]);
+    let events = block_on(collect_events(
+        HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
+        vec![
+            decoded_start(),
+            DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: tokens,
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs { entries: vec![] }],
+                }),
+                finished: Some(finished()),
+            },
+        ],
+    ));
+
+    let block_delta_index = events
+        .iter()
+        .position(|event| matches!(event, ChatEvent::BlockDelta { .. }))
+        .unwrap();
+    let logprobs_index = events
+        .iter()
+        .position(|event| matches!(event, ChatEvent::LogprobsDelta { .. }))
+        .unwrap();
+    assert!(block_delta_index < logprobs_index);
+}
+
+#[test]
+fn rejects_generic_parser_overrides() {
+    let reasoning_error =
+        validate_harmony_parser_overrides(&ParserSelection::Auto, &ParserSelection::None)
+            .unwrap_err();
+    assert_eq!(
+        reasoning_error.to_string(),
+        "gpt_oss uses native Harmony output parsing; generic reasoning parser override `none` is not supported"
+    );
+
+    let tool_error = validate_harmony_parser_overrides(
+        &ParserSelection::Explicit("json".to_string()),
+        &ParserSelection::Auto,
+    )
+    .unwrap_err();
+    assert_eq!(
+        tool_error.to_string(),
+        "gpt_oss uses native Harmony output parsing; generic tool parser override `json` is not supported"
+    );
+}
+
+#[test]
+#[file_serial(harmony_tiktoken_cache)]
+fn allows_auto_auto_only() {
+    validate_harmony_parser_overrides(&ParserSelection::Auto, &ParserSelection::Auto).unwrap();
+    let _ = HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap();
+}
diff --git a/rust/src/chat/src/output/mod.rs b/rust/src/chat/src/output/mod.rs
new file mode 100644
index 000000000000..81ec124fbcf2
--- /dev/null
+++ b/rust/src/chat/src/output/mod.rs
@@ -0,0 +1,135 @@
+use std::pin::Pin;
+use std::sync::Arc;
+
+use futures::Stream;
+use subenum::subenum;
+use trait_set::trait_set;
+use uuid::Uuid;
+use vllm_text::output::{DecodedLogprobs, DecodedPromptLogprobs, DecodedTextEvent};
+
+use crate::FinishReason;
+use crate::error::Result;
+use crate::event::{AssistantBlockKind, ChatEvent};
+
+mod default;
+mod harmony;
+mod structured;
+
+pub use default::DefaultChatOutputProcessor;
+pub use harmony::HarmonyChatOutputProcessor;
+pub(crate) use harmony::validate_harmony_parser_overrides;
+
+/// Internal assistant event before final assembly.
+///
+/// - [`ContentEvent`]: subenum after reasoning parsing, carries only text content.
+/// - [`AssistantEvent`]: full event after tool parsing, adds tool-call variants.
+#[subenum(ContentEvent)]
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) enum AssistantEvent {
+    #[subenum(ContentEvent)]
+    Start {
+        prompt_token_ids: Arc<[u32]>,
+        prompt_logprobs: Option<DecodedPromptLogprobs>,
+    },
+    #[subenum(ContentEvent)]
+    TextDelta {
+        kind: AssistantBlockKind,
+        delta: String,
+    },
+    /// Per-decoded-update sample metadata: logprobs and/or output token IDs.
+    #[subenum(ContentEvent)]
+    LogprobsDelta {
+        logprobs: Option<DecodedLogprobs>,
+        token_ids: Vec<u32>,
+    },
+    /// The start of a new tool call, with its declared name and generated ID.
+    ToolCallStart { id: String, name: String },
+    /// A delta for the arguments of the currently open tool call. Must follow a
+    /// `ToolCallStart`.
+    ToolCallArgumentsDelta { delta: String },
+    #[subenum(ContentEvent)]
+    Done {
+        prompt_token_count: usize,
+        output_token_count: usize,
+        finish_reason: FinishReason,
+        /// Connector-specific KV transfer parameters for disaggregated serving.
+        kv_transfer_params: Option<serde_json::Value>,
+    },
+}
+
+impl ContentEvent {
+    /// Convert a [`DecodedTextEvent`] into one or more [`ContentEvent`] values
+    /// by treating all text as plain (non-reasoning) content.
+    fn from_decoded_plain_text(event: DecodedTextEvent) -> Vec<Self> {
+        match event {
+            DecodedTextEvent::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            } => vec![Self::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            }],
+            DecodedTextEvent::TextDelta {
+                delta,
+                token_ids,
+                logprobs,
+                finished,
+            } => {
+                let mut events = Vec::new();
+                if !delta.is_empty() {
+                    events.push(Self::TextDelta {
+                        kind: AssistantBlockKind::Text,
+                        delta,
+                    });
+                }
+                if logprobs.is_some() || !token_ids.is_empty() {
+                    events.push(Self::LogprobsDelta {
+                        logprobs,
+                        token_ids,
+                    });
+                }
+                if let Some(finished) = finished {
+                    events.push(Self::Done {
+                        prompt_token_count: finished.prompt_token_count,
+                        output_token_count: finished.output_token_count,
+                        finish_reason: finished.finish_reason,
+                        kv_transfer_params: finished.kv_transfer_params,
+                    });
+                }
+                events
+            }
+        }
+    }
+}
+
+/// Boxed stream of decoded text events coming from [`vllm_text`].
+pub type DynDecodedTextEventStream = Pin<Box<dyn Stream<Item = Result<DecodedTextEvent>> + Send>>;
+/// Boxed stream of structured chat events exposed by [`crate::ChatLlm`].
+pub type DynChatEventStream = Pin<Box<dyn Stream<Item = Result<ChatEvent>> + Send>>;
+
+/// Request-scoped output processor from decoded text events into structured
+/// chat events.
+pub trait ChatOutputProcessor: Send {
+    /// Consume decoded text stream and return the structured chat-event stream.
+    fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream>;
+}
+
+/// Trait-object form of [`ChatOutputProcessor`].
+pub type DynChatOutputProcessor = Box<dyn ChatOutputProcessor>;
+
+trait_set! {
+    /// Boxed-stream constraint for decoded text updates.
+    pub(crate) trait DecodedTextEventStream = Stream<Item = Result<DecodedTextEvent>> + Send + 'static;
+    /// Boxed-stream constraint for internal assistant events.
+    pub(crate) trait AssistantEventStream = Stream<Item = Result<AssistantEvent>> + Send + 'static;
+    /// Boxed-stream constraint for public chat events.
+    pub(crate) trait ChatEventStream = Stream<Item = Result<ChatEvent>> + Send + 'static;
+}
+
+/// Generate the northbound tool-call ID using the OpenAI-style `call_<id>`
+/// format.
+// TODO: support other ID scheme like Kimi-K2's
+// `functions.{name}:{global_index}`.
+pub(crate) fn generate_tool_call_id() -> String {
+    format!("call_{}", &Uuid::new_v4().simple().to_string()[..24])
+}
diff --git a/rust/src/chat/src/output/structured.rs b/rust/src/chat/src/output/structured.rs
new file mode 100644
index 000000000000..ed6e3a5130ce
--- /dev/null
+++ b/rust/src/chat/src/output/structured.rs
@@ -0,0 +1,508 @@
+//! Adapts parsed assistant updates into structured chat events.
+//!
+//! This module remains the final assembly stage in `vllm-chat`. Token-to-text
+//! decoding still lives in `decoded.rs`, while reasoning separation and tool
+//! parsing are handled earlier by their own adapters. This stage consumes those
+//! parsed deltas and assembles higher-level assistant content blocks.
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use futures::{StreamExt as _, pin_mut};
+use vllm_text::DecodedLogprobs;
+
+use super::{AssistantEvent, AssistantEventStream};
+use crate::error::Error;
+use crate::event::{
+    AssistantBlockKind, AssistantContentBlock, AssistantMessage, AssistantToolCall, ChatEvent,
+};
+use crate::{FinishReason, Result};
+
+/// One currently open assistant text-like block being assembled from streamed
+/// deltas.
+struct OpenTextBlock {
+    /// Stable position of this block in the final assistant message.
+    index: usize,
+    /// Semantic kind of the block being assembled.
+    kind: AssistantBlockKind,
+    /// Accumulated text payload for the block.
+    text: String,
+}
+
+/// One currently open assistant tool call being assembled from streamed deltas.
+struct OpenToolCall {
+    /// Stable ordinal of this tool call in the assistant tool-call list.
+    index: usize,
+    /// Stable tool-call ID exposed northbound.
+    id: String,
+    /// Function name.
+    name: String,
+    /// Incremental JSON arguments accumulated so far.
+    arguments: String,
+}
+
+/// Per-stream block assembly state.
+///
+/// The adapter maintains at most one open text block and one open tool call,
+/// and appends deltas to them until the semantic kind changes or the stream
+/// terminates.
+struct StructuredEventState {
+    /// Final assistant message assembled so far.
+    message: AssistantMessage,
+    /// Currently open text or reasoning block, if any.
+    open_text_block: Option<OpenTextBlock>,
+    /// Currently open tool call, if any.
+    open_tool_call: Option<OpenToolCall>,
+    /// Next OpenAI-compatible tool-call ordinal.
+    next_tool_call_index: usize,
+}
+
+impl StructuredEventState {
+    /// Create one fresh assembly state for a new streamed response.
+    fn new() -> Self {
+        Self {
+            message: AssistantMessage::default(),
+            open_text_block: None,
+            open_tool_call: None,
+            next_tool_call_index: 0,
+        }
+    }
+
+    /// Convert one parsed text delta into zero or more structured chat events.
+    fn process_text_delta(
+        &mut self,
+        kind: AssistantBlockKind,
+        delta: String,
+    ) -> Result<Vec<ChatEvent>> {
+        let mut events = Vec::new();
+        self.close_open_tool_call(&mut events);
+        self.push_text_delta(kind, delta, &mut events);
+        Ok(events)
+    }
+
+    /// Forward per-update sample metadata without attaching it to text blocks.
+    fn process_logprobs_delta(
+        &mut self,
+        logprobs: Option<DecodedLogprobs>,
+        token_ids: Vec<u32>,
+    ) -> Result<Vec<ChatEvent>> {
+        Ok(vec![ChatEvent::LogprobsDelta {
+            logprobs,
+            token_ids,
+        }])
+    }
+
+    /// Start one new tool call, closing any incompatible open block first.
+    fn start_tool_call(&mut self, id: String, name: String) -> Result<Vec<ChatEvent>> {
+        let mut events = Vec::new();
+        self.close_open_text_block(&mut events);
+        self.close_open_tool_call(&mut events);
+
+        let index = self.next_tool_call_index;
+        self.next_tool_call_index += 1;
+        self.open_tool_call = Some(OpenToolCall {
+            index,
+            id: id.clone(),
+            name: name.clone(),
+            arguments: String::new(),
+        });
+        events.push(ChatEvent::ToolCallStart { index, id, name });
+        Ok(events)
+    }
+
+    /// Append one incremental tool-call arguments delta.
+    fn push_tool_call_arguments(&mut self, delta: String) -> Result<Vec<ChatEvent>> {
+        let mut events = Vec::new();
+        let Some(open_tool_call) = self.open_tool_call.as_mut() else {
+            return Err(Error::ToolCallStreamInvariant {
+                message: "received tool-call arguments delta without an open tool call".to_string(),
+            });
+        };
+        open_tool_call.arguments.push_str(&delta);
+        events.push(ChatEvent::ToolCallArgumentsDelta {
+            index: open_tool_call.index,
+            delta,
+        });
+        Ok(events)
+    }
+
+    /// Close any open block and emit the terminal `Done` event.
+    fn finish(
+        &mut self,
+        prompt_token_count: usize,
+        output_token_count: usize,
+        finish_reason: FinishReason,
+        kv_transfer_params: Option<serde_json::Value>,
+    ) -> Result<Vec<ChatEvent>> {
+        let mut events = Vec::new();
+        self.close_open_text_block(&mut events);
+        self.close_open_tool_call(&mut events);
+        events.push(ChatEvent::Done {
+            message: self.message.clone(),
+            prompt_token_count,
+            output_token_count,
+            finish_reason,
+            kv_transfer_params,
+        });
+        Ok(events)
+    }
+
+    /// Append one semantic text delta to the current block, or open a new block
+    /// when the semantic kind changes.
+    fn push_text_delta(
+        &mut self,
+        kind: AssistantBlockKind,
+        delta: String,
+        events: &mut Vec<ChatEvent>,
+    ) {
+        if delta.is_empty() {
+            return;
+        }
+
+        match self.open_text_block.as_mut() {
+            // If there's a currently open block of the same kind, append to it.
+            Some(open_block) if open_block.kind == kind => {
+                open_block.text.push_str(&delta);
+                events.push(ChatEvent::BlockDelta {
+                    index: open_block.index,
+                    kind,
+                    delta,
+                });
+            }
+            // Otherwise, close the currently open block (if any) and start a
+            // new one.
+            _ => {
+                self.close_open_text_block(events);
+                let index = self.message.content.len();
+                self.open_text_block = Some(OpenTextBlock {
+                    index,
+                    kind,
+                    text: delta.clone(),
+                });
+                events.push(ChatEvent::BlockStart { index, kind });
+                events.push(ChatEvent::BlockDelta { index, kind, delta });
+            }
+        }
+    }
+
+    /// Finalize the currently open text block, if present.
+    fn close_open_text_block(&mut self, events: &mut Vec<ChatEvent>) {
+        let Some(open_block) = self.open_text_block.take() else {
+            return;
+        };
+
+        let block = match open_block.kind {
+            AssistantBlockKind::Text => AssistantContentBlock::Text {
+                text: open_block.text,
+            },
+            AssistantBlockKind::Reasoning => AssistantContentBlock::Reasoning {
+                text: open_block.text,
+            },
+            AssistantBlockKind::ToolCall => {
+                unreachable!("tool calls must not be assembled as text blocks")
+            }
+        };
+        self.message.push_block(block.clone());
+        events.push(ChatEvent::BlockEnd {
+            index: open_block.index,
+            block,
+        });
+    }
+
+    /// Finalize the currently open tool call, if present.
+    fn close_open_tool_call(&mut self, events: &mut Vec<ChatEvent>) {
+        let Some(open_tool_call) = self.open_tool_call.take() else {
+            return;
+        };
+
+        let call = AssistantToolCall {
+            id: open_tool_call.id,
+            name: open_tool_call.name,
+            arguments: open_tool_call.arguments,
+        };
+        self.message.push_block(AssistantContentBlock::ToolCall(call.clone()));
+        events.push(ChatEvent::ToolCallEnd {
+            index: open_tool_call.index,
+            call,
+        });
+    }
+}
+
+/// Wrap one parsed assistant stream into the public structured chat event
+/// stream.
+#[try_stream]
+pub(crate) async fn structured_chat_event_stream(
+    stream: impl AssistantEventStream,
+    mut y: TryYielder<ChatEvent, Error>,
+) -> Result<()> {
+    pin_mut!(stream);
+
+    let mut state = StructuredEventState::new();
+
+    while let Some(event) = stream.next().await.transpose()? {
+        match event {
+            AssistantEvent::Start {
+                prompt_token_ids,
+                prompt_logprobs,
+            } => {
+                y.yield_ok(ChatEvent::Start {
+                    prompt_token_ids,
+                    prompt_logprobs,
+                })
+                .await;
+            }
+            AssistantEvent::TextDelta { kind, delta } => {
+                for next in state.process_text_delta(kind, delta)? {
+                    y.yield_ok(next).await;
+                }
+            }
+            AssistantEvent::LogprobsDelta {
+                logprobs,
+                token_ids,
+            } => {
+                for next in state.process_logprobs_delta(logprobs, token_ids)? {
+                    y.yield_ok(next).await;
+                }
+            }
+            AssistantEvent::ToolCallStart { id, name } => {
+                for next in state.start_tool_call(id, name)? {
+                    y.yield_ok(next).await;
+                }
+            }
+            AssistantEvent::ToolCallArgumentsDelta { delta } => {
+                for next in state.push_tool_call_arguments(delta)? {
+                    y.yield_ok(next).await;
+                }
+            }
+            AssistantEvent::Done {
+                prompt_token_count,
+                output_token_count,
+                finish_reason,
+                kv_transfer_params,
+            } => {
+                for next in state.finish(
+                    prompt_token_count,
+                    output_token_count,
+                    finish_reason,
+                    kv_transfer_params,
+                )? {
+                    y.yield_ok(next).await;
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use futures::{StreamExt as _, stream};
+
+    use super::structured_chat_event_stream;
+    use crate::FinishReason;
+    use crate::error::Error;
+    use crate::event::{AssistantBlockKind, AssistantMessageExt as _, ChatEvent};
+    use crate::output::AssistantEvent;
+
+    #[tokio::test]
+    async fn structured_stream_closes_tool_call_on_done() {
+        let events = stream::iter(vec![
+            Ok(AssistantEvent::ToolCallStart {
+                id: "call_1".to_string(),
+                name: "get_weather".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallArgumentsDelta {
+                delta: r#"{"city":"Paris"}"#.to_string(),
+            }),
+            Ok(AssistantEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let events = structured_chat_event_stream(events)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert!(matches!(events[0], ChatEvent::ToolCallStart { .. }));
+        assert!(matches!(
+            events[1],
+            ChatEvent::ToolCallArgumentsDelta { .. }
+        ));
+        let ChatEvent::ToolCallEnd { call, .. } = &events[2] else {
+            panic!("expected tool call end");
+        };
+        assert_eq!(call.name, "get_weather");
+        assert_eq!(call.arguments, r#"{"city":"Paris"}"#);
+        let ChatEvent::Done { message, .. } = &events[3] else {
+            panic!("expected done");
+        };
+        let tool_calls = message.tool_calls().collect::<Vec<_>>();
+        assert_eq!(tool_calls.len(), 1);
+        assert_eq!(tool_calls[0].id, "call_1");
+        assert_eq!(tool_calls[0].arguments, r#"{"city":"Paris"}"#);
+    }
+
+    #[tokio::test]
+    async fn structured_stream_closes_previous_tool_call_on_next_start() {
+        let events = stream::iter(vec![
+            Ok(AssistantEvent::ToolCallStart {
+                id: "call_1".to_string(),
+                name: "first".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallArgumentsDelta {
+                delta: r#"{"a":1}"#.to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallStart {
+                id: "call_2".to_string(),
+                name: "second".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallArgumentsDelta {
+                delta: r#"{"b":2}"#.to_string(),
+            }),
+            Ok(AssistantEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let events = structured_chat_event_stream(events)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert!(matches!(events[0], ChatEvent::ToolCallStart { .. }));
+        assert!(matches!(
+            events[1],
+            ChatEvent::ToolCallArgumentsDelta { .. }
+        ));
+        let ChatEvent::ToolCallEnd { call, .. } = &events[2] else {
+            panic!("expected first tool call end");
+        };
+        assert_eq!(call.name, "first");
+        assert!(matches!(events[3], ChatEvent::ToolCallStart { .. }));
+        let ChatEvent::Done { message, .. } = &events[6] else {
+            panic!("expected done");
+        };
+        let tool_calls = message.tool_calls().collect::<Vec<_>>();
+        assert_eq!(tool_calls.len(), 2);
+        assert_eq!(tool_calls[0].name, "first");
+        assert_eq!(tool_calls[1].name, "second");
+    }
+
+    #[tokio::test]
+    async fn structured_stream_numbers_tool_calls_independent_of_text_blocks() {
+        let events = stream::iter(vec![
+            Ok(AssistantEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "before".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallStart {
+                id: "call_1".to_string(),
+                name: "get_weather".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallArgumentsDelta {
+                delta: r#"{"city":"Paris"}"#.to_string(),
+            }),
+            Ok(AssistantEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let events = structured_chat_event_stream(events)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert!(matches!(
+            events[0],
+            ChatEvent::BlockStart {
+                index: 0,
+                kind: AssistantBlockKind::Text,
+            }
+        ));
+        assert!(matches!(events[2], ChatEvent::BlockEnd { index: 0, .. }));
+        assert!(matches!(
+            events[3],
+            ChatEvent::ToolCallStart { index: 0, .. }
+        ));
+        assert!(matches!(
+            events[4],
+            ChatEvent::ToolCallArgumentsDelta { index: 0, .. }
+        ));
+        assert!(matches!(events[5], ChatEvent::ToolCallEnd { index: 0, .. }));
+    }
+
+    #[tokio::test]
+    async fn structured_stream_closes_tool_call_before_text() {
+        let events = stream::iter(vec![
+            Ok(AssistantEvent::ToolCallStart {
+                id: "call_1".to_string(),
+                name: "get_weather".to_string(),
+            }),
+            Ok(AssistantEvent::ToolCallArgumentsDelta {
+                delta: r#"{"city":"Paris"}"#.to_string(),
+            }),
+            Ok(AssistantEvent::TextDelta {
+                kind: AssistantBlockKind::Text,
+                delta: "done".to_string(),
+            }),
+            Ok(AssistantEvent::Done {
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let events = structured_chat_event_stream(events)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .collect::<crate::Result<Vec<_>>>()
+            .unwrap();
+
+        assert!(matches!(events[2], ChatEvent::ToolCallEnd { .. }));
+        assert!(matches!(
+            events[3],
+            ChatEvent::BlockStart {
+                kind: AssistantBlockKind::Text,
+                ..
+            }
+        ));
+        let ChatEvent::Done { message, .. } = &events[6] else {
+            panic!("expected done");
+        };
+        assert_eq!(message.text(), "done");
+        assert_eq!(message.tool_calls().count(), 1);
+    }
+
+    #[tokio::test]
+    async fn structured_stream_rejects_arguments_without_open_tool_call() {
+        let events = stream::iter(vec![Ok(AssistantEvent::ToolCallArgumentsDelta {
+            delta: "{}".to_string(),
+        })]);
+
+        let err = structured_chat_event_stream(events)
+            .collect::<Vec<_>>()
+            .await
+            .into_iter()
+            .next()
+            .expect("expected one event")
+            .expect_err("expected invariant error");
+
+        assert!(matches!(err, Error::ToolCallStreamInvariant { .. }));
+    }
+}
diff --git a/rust/src/chat/src/parser/mod.rs b/rust/src/chat/src/parser/mod.rs
new file mode 100644
index 000000000000..52e83b3e0471
--- /dev/null
+++ b/rust/src/chat/src/parser/mod.rs
@@ -0,0 +1,107 @@
+pub mod reasoning;
+pub mod tool;
+
+use std::collections::HashMap;
+use std::convert::Infallible;
+use std::fmt;
+use std::str::FromStr;
+
+use serde_with::DeserializeFromStr;
+
+/// Specify which reasoning or tool-call parser implementation to use.
+#[derive(Debug, Clone, PartialEq, Eq, Default, DeserializeFromStr)]
+pub enum ParserSelection {
+    /// Use model-based auto-detection.
+    #[default]
+    Auto,
+    /// Disable the parser entirely.
+    None,
+    /// Force one specific parser implementation by name.
+    Explicit(String),
+}
+
+impl ParserSelection {
+    pub const AUTO_LITERAL: &str = "auto";
+    pub const NONE_LITERAL: &str = "none";
+}
+
+impl FromStr for ParserSelection {
+    type Err = Infallible;
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        Ok(if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
+            Self::Auto
+        } else if value.eq_ignore_ascii_case(Self::NONE_LITERAL) {
+            Self::None
+        } else {
+            Self::Explicit(value.to_owned())
+        })
+    }
+}
+
+impl fmt::Display for ParserSelection {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Auto => f.write_str(Self::AUTO_LITERAL),
+            Self::None => f.write_str(Self::NONE_LITERAL),
+            Self::Explicit(name) => f.write_str(name),
+        }
+    }
+}
+
+/// Registry and model matcher for reasoning and tool parsers.
+#[derive(Clone)]
+pub struct ParserFactory<C> {
+    creators: HashMap<String, C>,
+    patterns: Vec<(String, String)>,
+}
+
+impl<C> Default for ParserFactory<C> {
+    fn default() -> Self {
+        Self {
+            creators: HashMap::new(),
+            patterns: Vec::new(),
+        }
+    }
+}
+
+impl<C> ParserFactory<C> {
+    /// Register a creator for a parser by an exact name.
+    pub fn register_creator(&mut self, name: &str, creator: C) -> &mut Self {
+        self.creators.insert(name.to_string(), creator);
+        self
+    }
+
+    /// Add a case-insensitive substring match from model ID to parser name.
+    pub fn register_pattern(&mut self, pattern: &str, parser_name: &str) -> &mut Self {
+        self.patterns.push((pattern.to_lowercase(), parser_name.to_string()));
+        self
+    }
+
+    /// Return the first registered parser name matching the given model ID.
+    pub fn resolve_name_for_model(&self, model_id: &str) -> Option<&str> {
+        let model_lower = model_id.to_lowercase();
+        self.patterns
+            .iter()
+            .find(|(pattern, _)| model_lower.contains(pattern))
+            .map(|(_, parser_name)| parser_name.as_str())
+    }
+
+    /// Return true if the exact parser name is registered.
+    pub fn contains(&self, name: &str) -> bool {
+        self.creators.contains_key(name)
+    }
+
+    /// Return all registered parser names sorted for stable display.
+    pub fn list(&self) -> Vec<String> {
+        let mut names: Vec<_> = self.creators.keys().cloned().collect();
+        names.sort_unstable();
+        names
+    }
+
+    /// Get the constructor for a parser by its exact registered name, or return
+    /// None if not found.
+    pub fn creator(&self, name: &str) -> Option<&C> {
+        self.creators.get(name)
+    }
+}
diff --git a/rust/src/chat/src/parser/reasoning/mod.rs b/rust/src/chat/src/parser/reasoning/mod.rs
new file mode 100644
index 000000000000..09111d7252f3
--- /dev/null
+++ b/rust/src/chat/src/parser/reasoning/mod.rs
@@ -0,0 +1,120 @@
+//! Reasoning parser registration and selection boundary for `vllm-chat`.
+
+use std::sync::LazyLock;
+
+pub use vllm_reasoning_parser::{
+    CohereCmdReasoningParser, DeepSeekR1ReasoningParser, DeepSeekV3ReasoningParser,
+    DeepSeekV4ReasoningParser, Gemma4ReasoningParser, Glm45ReasoningParser, KimiK2ReasoningParser,
+    KimiReasoningParser, MiniMaxM2ReasoningParser, NemotronV3ReasoningParser, Qwen3ReasoningParser,
+    ReasoningDelta, ReasoningError, ReasoningParser, Step3ReasoningParser,
+};
+use vllm_tokenizer::DynTokenizer;
+
+use crate::parser::ParserFactory;
+
+/// Canonical public names for registered reasoning parsers.
+pub mod names {
+    pub const COHERE_CMD: &str = "cohere_cmd";
+    pub const DEEPSEEK_R1: &str = "deepseek_r1";
+    pub const DEEPSEEK_V3: &str = "deepseek_v3";
+    pub const DEEPSEEK_V4: &str = "deepseek_v4";
+    pub const GEMMA4: &str = "gemma4";
+    pub const GLM45: &str = "glm45";
+    pub const KIMI: &str = "kimi";
+    pub const KIMI_K2: &str = "kimi_k2";
+    pub const MINIMAX_M2: &str = "minimax_m2";
+    pub const NEMOTRON_V3: &str = "nemotron_v3";
+    pub const QWEN3: &str = "qwen3";
+    pub const STEP3: &str = "step3";
+}
+
+/// Constructor signature for one registered reasoning parser implementation.
+type ReasoningParserCreator =
+    fn(DynTokenizer) -> vllm_reasoning_parser::Result<Box<dyn ReasoningParser>>;
+
+/// Registry and model matcher for reasoning parsers.
+pub type ReasoningParserFactory = ParserFactory<ReasoningParserCreator>;
+
+impl ReasoningParserFactory {
+    /// Get the global reasoning parser factory with built-in registrations and
+    /// model mappings.
+    pub fn global() -> &'static Self {
+        static INSTANCE: LazyLock<ReasoningParserFactory> =
+            LazyLock::new(ReasoningParserFactory::new);
+        &INSTANCE
+    }
+
+    /// Create the default registry with built-in parser names and model
+    /// mappings.
+    pub fn new() -> Self {
+        let mut factory = Self::default();
+
+        factory
+            .register_parser::<CohereCmdReasoningParser>(names::COHERE_CMD)
+            .register_parser::<DeepSeekR1ReasoningParser>(names::DEEPSEEK_R1)
+            .register_parser::<DeepSeekV3ReasoningParser>(names::DEEPSEEK_V3)
+            .register_parser::<DeepSeekV4ReasoningParser>(names::DEEPSEEK_V4)
+            .register_parser::<Gemma4ReasoningParser>(names::GEMMA4)
+            .register_parser::<Glm45ReasoningParser>(names::GLM45)
+            .register_parser::<KimiReasoningParser>(names::KIMI)
+            .register_parser::<KimiK2ReasoningParser>(names::KIMI_K2)
+            .register_parser::<MiniMaxM2ReasoningParser>(names::MINIMAX_M2)
+            .register_parser::<NemotronV3ReasoningParser>(names::NEMOTRON_V3)
+            .register_parser::<Qwen3ReasoningParser>(names::QWEN3)
+            .register_parser::<Step3ReasoningParser>(names::STEP3);
+
+        factory
+            .register_pattern("deepseek-r1", names::DEEPSEEK_R1)
+            .register_pattern("deepseek-v4", names::DEEPSEEK_V4)
+            .register_pattern("deepseek_v4", names::DEEPSEEK_V4)
+            .register_pattern("deepseek-v3", names::DEEPSEEK_V3)
+            .register_pattern("gemma-4", names::GEMMA4)
+            .register_pattern("gemma4", names::GEMMA4)
+            .register_pattern("qwen", names::QWEN3)
+            .register_pattern("glm-5", names::GLM45)
+            .register_pattern("glm-4.7", names::GLM45)
+            .register_pattern("glm-4.6", names::GLM45)
+            .register_pattern("glm-4.5", names::GLM45)
+            .register_pattern("kimi-k2", names::KIMI_K2)
+            .register_pattern("kimi", names::KIMI)
+            .register_pattern("step3", names::STEP3)
+            .register_pattern("minimax", names::MINIMAX_M2)
+            .register_pattern("mm-m2", names::MINIMAX_M2)
+            .register_pattern("cohere", names::COHERE_CMD)
+            .register_pattern("command", names::COHERE_CMD)
+            .register_pattern("nano", names::NEMOTRON_V3)
+            .register_pattern("nemotron", names::NEMOTRON_V3);
+
+        factory
+    }
+
+    /// Register one parser type that exposes a static `create()` constructor.
+    pub fn register_parser<T>(&mut self, name: &str) -> &mut Self
+    where
+        T: ReasoningParser + 'static,
+    {
+        self.register_creator(name, T::create)
+    }
+
+    /// Construct a parser from an exact name.
+    pub fn create(
+        &self,
+        name: &str,
+        tokenizer: DynTokenizer,
+    ) -> crate::Result<Box<dyn ReasoningParser>> {
+        let creator = self.creator(name).ok_or_else(|| crate::Error::ParserUnavailableByName {
+            kind: "reasoning",
+            name: name.to_string(),
+            available_names: self.list(),
+        })?;
+
+        creator(tokenizer).map_err(|error| crate::Error::ParserInitialization {
+            kind: "reasoning",
+            name: name.to_string(),
+            error: error.into(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/chat/src/parser/reasoning/tests.rs b/rust/src/chat/src/parser/reasoning/tests.rs
new file mode 100644
index 000000000000..89b5f8e2308a
--- /dev/null
+++ b/rust/src/chat/src/parser/reasoning/tests.rs
@@ -0,0 +1,61 @@
+use std::sync::Arc;
+
+use vllm_tokenizer::Tokenizer;
+
+use super::{ReasoningParserFactory, names};
+
+struct FakeTokenizer;
+
+impl Tokenizer for FakeTokenizer {
+    fn encode(&self, text: &str, _add_special_tokens: bool) -> vllm_tokenizer::Result<Vec<u32>> {
+        Ok(text.chars().map(u32::from).collect())
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        _skip_special_tokens: bool,
+    ) -> vllm_tokenizer::Result<String> {
+        Ok(token_ids
+            .iter()
+            .map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
+            .collect())
+    }
+
+    fn token_to_id(&self, _token: &str) -> Option<u32> {
+        None
+    }
+}
+
+#[test]
+fn factory_contains_and_lists_registered_parsers() {
+    let factory = ReasoningParserFactory::new();
+    assert!(factory.contains(names::QWEN3));
+    assert!(factory.contains(names::DEEPSEEK_V4));
+    assert!(factory.list().contains(&names::QWEN3.to_string()));
+    assert!(factory.list().contains(&names::DEEPSEEK_V4.to_string()));
+}
+
+#[test]
+fn factory_resolves_deepseek_v4_to_qwen3_alias() {
+    let factory = ReasoningParserFactory::new();
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4"),
+        Some(names::DEEPSEEK_V4)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek_v4"),
+        Some(names::DEEPSEEK_V4)
+    );
+}
+
+#[test]
+fn factory_rejects_unknown_parser_names() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let factory = ReasoningParserFactory::new();
+    let error = match factory.create("missing", tokenizer) {
+        Ok(_) => panic!("expected parser lookup to fail"),
+        Err(error) => error,
+    };
+    assert!(error.to_string().contains("choose from"));
+}
diff --git a/rust/src/chat/src/parser/tool/mod.rs b/rust/src/chat/src/parser/tool/mod.rs
new file mode 100644
index 000000000000..fd1bbedd8225
--- /dev/null
+++ b/rust/src/chat/src/parser/tool/mod.rs
@@ -0,0 +1,140 @@
+//! Tool parser registration and selection boundary for `vllm-chat`.
+
+use std::sync::LazyLock;
+
+pub use vllm_tool_parser::{
+    DeepSeekV3ToolParser, DeepSeekV4ToolParser, DeepSeekV31ToolParser, DeepSeekV32ToolParser,
+    Gemma4ToolParser, Glm45MoeToolParser, Glm47MoeToolParser, HermesToolParser, KimiK2ToolParser,
+    Llama3JsonToolParser, MinimaxM2ToolParser, MistralToolParser, Qwen3CoderToolParser,
+    Qwen3XmlToolParser, ToolCallDelta, ToolParseResult, ToolParser, ToolParserError,
+};
+
+use crate::parser::ParserFactory;
+use crate::request::ChatTool;
+
+/// Canonical public names for registered tool parsers.
+pub mod names {
+    pub const DEEPSEEK_V3: &str = "deepseek_v3";
+    pub const DEEPSEEK_V31: &str = "deepseek_v31";
+    pub const DEEPSEEK_V32: &str = "deepseek_v32";
+    pub const DEEPSEEK_V4: &str = "deepseek_v4";
+    pub const GLM45: &str = "glm45";
+    pub const GLM47: &str = "glm47";
+    pub const GEMMA4: &str = "gemma4";
+    pub const HERMES: &str = "hermes";
+    pub const KIMI_K2: &str = "kimi_k2";
+    pub const LLAMA3_JSON: &str = "llama3_json";
+    pub const LLAMA4_JSON: &str = "llama4_json";
+    pub const MINIMAX_M2: &str = "minimax_m2";
+    pub const MISTRAL: &str = "mistral";
+    pub const QWEN3_CODER: &str = "qwen3_coder";
+    pub const QWEN3_XML: &str = "qwen3_xml";
+}
+
+/// Constructor signature for one registered tool parser implementation.
+type ToolParserCreator = fn(&[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>;
+
+/// Registry and model matcher for tool parsers.
+pub type ToolParserFactory = ParserFactory<ToolParserCreator>;
+
+impl ToolParserFactory {
+    /// Get the global tool parser factory with built-in registrations and model
+    /// mappings.
+    pub fn global() -> &'static Self {
+        static INSTANCE: LazyLock<ToolParserFactory> = LazyLock::new(ToolParserFactory::new);
+        &INSTANCE
+    }
+
+    /// Create the default registry with built-in parser names and model
+    /// mappings.
+    pub fn new() -> Self {
+        let mut factory = Self::default();
+
+        factory
+            .register_parser::<DeepSeekV3ToolParser>(names::DEEPSEEK_V3)
+            .register_parser::<DeepSeekV31ToolParser>(names::DEEPSEEK_V31)
+            .register_parser::<DeepSeekV32ToolParser>(names::DEEPSEEK_V32)
+            .register_parser::<DeepSeekV4ToolParser>(names::DEEPSEEK_V4)
+            .register_parser::<Glm45MoeToolParser>(names::GLM45)
+            .register_parser::<Glm47MoeToolParser>(names::GLM47)
+            .register_parser::<Gemma4ToolParser>(names::GEMMA4)
+            .register_parser::<HermesToolParser>(names::HERMES)
+            .register_parser::<KimiK2ToolParser>(names::KIMI_K2)
+            .register_parser::<Llama3JsonToolParser>(names::LLAMA3_JSON)
+            .register_parser::<Llama3JsonToolParser>(names::LLAMA4_JSON)
+            .register_parser::<MinimaxM2ToolParser>(names::MINIMAX_M2)
+            .register_parser::<MistralToolParser>(names::MISTRAL)
+            .register_parser::<Qwen3XmlToolParser>(names::QWEN3_XML)
+            .register_parser::<Qwen3CoderToolParser>(names::QWEN3_CODER);
+
+        factory
+            .register_pattern("mistral-", names::MISTRAL)
+            .register_pattern("mixtral-", names::MISTRAL)
+            .register_pattern("qwen3-coder", names::QWEN3_CODER)
+            .register_pattern("qwen2.5-coder", names::QWEN3_CODER)
+            .register_pattern("qwen3.5", names::QWEN3_CODER)
+            .register_pattern("qwen", names::QWEN3_XML)
+            .register_pattern("hermes", names::HERMES)
+            .register_pattern("llama-4", names::LLAMA4_JSON)
+            .register_pattern("llama-3.2", names::LLAMA3_JSON)
+            .register_pattern("llama-3.1", names::LLAMA3_JSON)
+            .register_pattern("deepseek-r1", names::DEEPSEEK_V3)
+            .register_pattern("deepseek-v4", names::DEEPSEEK_V4)
+            .register_pattern("deepseek_v4", names::DEEPSEEK_V4)
+            .register_pattern("deepseek-v3.2", names::DEEPSEEK_V32)
+            .register_pattern("deepseek-v3.1", names::DEEPSEEK_V31)
+            .register_pattern("deepseek-v3", names::DEEPSEEK_V3)
+            .register_pattern("glm-5", names::GLM47)
+            .register_pattern("glm-4.7", names::GLM47)
+            .register_pattern("glm-4.6", names::GLM45)
+            .register_pattern("glm-4.5", names::GLM45)
+            .register_pattern("gemma4", names::GEMMA4)
+            .register_pattern("gemma-4", names::GEMMA4)
+            .register_pattern("kimi-k2", names::KIMI_K2)
+            .register_pattern("minimax", names::MINIMAX_M2)
+            .register_pattern("mm-m2", names::MINIMAX_M2);
+
+        factory
+    }
+
+    /// Register one parser type that exposes a static `create()` constructor.
+    pub fn register_parser<T>(&mut self, name: &str) -> &mut Self
+    where
+        T: ToolParser + 'static,
+    {
+        self.register_creator(name, T::create)
+    }
+
+    /// Construct a parser from an exact name.
+    pub fn create(&self, name: &str, tools: &[ChatTool]) -> crate::Result<Box<dyn ToolParser>> {
+        let creator = self.creator(name).ok_or_else(|| crate::Error::ParserUnavailableByName {
+            kind: "tool",
+            name: name.to_string(),
+            available_names: self.list(),
+        })?;
+
+        creator(tools).map_err(|error| crate::Error::ParserInitialization {
+            kind: "tool",
+            name: name.to_string(),
+            error: error.into(),
+        })
+    }
+
+    /// Resolve a parser from model ID and then construct it.
+    pub fn create_for_model(
+        &self,
+        model_id: &str,
+        tools: &[ChatTool],
+    ) -> crate::Result<Box<dyn ToolParser>> {
+        let name = self.resolve_name_for_model(model_id).ok_or_else(|| {
+            crate::Error::ParserUnavailableForModel {
+                kind: "tool",
+                model_id: model_id.to_string(),
+            }
+        })?;
+        self.create(name, tools)
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/chat/src/parser/tool/tests.rs b/rust/src/chat/src/parser/tool/tests.rs
new file mode 100644
index 000000000000..fb2230faeb79
--- /dev/null
+++ b/rust/src/chat/src/parser/tool/tests.rs
@@ -0,0 +1,152 @@
+use vllm_tool_parser::Result;
+
+use super::{ToolParseResult, ToolParser, ToolParserFactory, names};
+use crate::Error;
+use crate::request::ChatTool;
+
+struct FakeToolParser;
+
+impl ToolParser for FakeToolParser {
+    fn create(_tools: &[ChatTool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self))
+    }
+
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
+        Ok(ToolParseResult::default())
+    }
+}
+
+#[test]
+fn default_factory_starts_empty() {
+    let factory = ToolParserFactory::default();
+    assert!(factory.list().is_empty());
+}
+
+#[test]
+fn factory_contains_and_creates_registered_parsers() {
+    let mut factory = ToolParserFactory::default();
+    factory.register_parser::<FakeToolParser>("fake");
+
+    assert!(factory.contains("fake"));
+    assert!(factory.list().contains(&"fake".to_string()));
+    factory.create("fake", &[]).unwrap();
+}
+
+#[test]
+fn factory_rejects_unknown_parser_names() {
+    let factory = ToolParserFactory::default();
+    let error = match factory.create("missing", &[]) {
+        Ok(_) => panic!("expected parser lookup to fail"),
+        Err(error) => error,
+    };
+    assert!(matches!(error, Error::ParserUnavailableByName { .. }));
+}
+
+#[test]
+fn factory_rejects_unknown_models() {
+    let factory = ToolParserFactory::default();
+    let error = match factory.create_for_model("definitely-unknown-model", &[]) {
+        Ok(_) => panic!("expected model lookup to fail"),
+        Err(error) => error,
+    };
+    assert!(matches!(error, Error::ParserUnavailableForModel { .. }));
+}
+
+#[test]
+fn factory_creates_registered_parser_for_model() {
+    let mut factory = ToolParserFactory::default();
+    factory
+        .register_parser::<FakeToolParser>("fake")
+        .register_pattern("fake-model", "fake");
+
+    factory.create_for_model("my-fake-model-v1", &[]).unwrap();
+}
+
+#[test]
+fn factory_new_resolves_default_patterns() {
+    let factory = ToolParserFactory::new();
+
+    assert_eq!(
+        factory.resolve_name_for_model("Qwen/Qwen3.5-0.8B"),
+        Some(names::QWEN3_CODER)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("Qwen/Qwen3-0.6B"),
+        Some(names::QWEN3_XML)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("Qwen/Qwen3-Coder-30B"),
+        Some(names::QWEN3_CODER)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("meta-llama-4-maverick"),
+        Some(names::LLAMA4_JSON)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("meta-llama-3.2-3b-instruct"),
+        Some(names::LLAMA3_JSON)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("meta-llama/Llama-3.1-8B-Instruct"),
+        Some(names::LLAMA3_JSON)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4"),
+        Some(names::DEEPSEEK_V4)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-V3.2-Exp"),
+        Some(names::DEEPSEEK_V32)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4-Chat"),
+        Some(names::DEEPSEEK_V4)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek_v4"),
+        Some(names::DEEPSEEK_V4)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-R1-0528"),
+        Some(names::DEEPSEEK_V3)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("deepseek-ai/DeepSeek-V3.1"),
+        Some(names::DEEPSEEK_V31)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("zai-org/GLM-5-32B-Chat"),
+        Some(names::GLM47)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("zai-org/GLM-5.1-32B-Instruct"),
+        Some(names::GLM47)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("glm-4.7"),
+        Some(names::GLM47)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("google/gemma-4-27b-it"),
+        Some(names::GEMMA4)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("NousResearch/Hermes-3-Llama-3.1-8B"),
+        Some(names::HERMES)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("MiniMax/MiniMax-M2-01"),
+        Some(names::MINIMAX_M2)
+    );
+    assert_eq!(
+        factory.resolve_name_for_model("org/mm-m2-base"),
+        Some(names::MINIMAX_M2)
+    );
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v32/encoding.rs b/rust/src/chat/src/renderer/deepseek_v32/encoding.rs
new file mode 100644
index 000000000000..978255192768
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/encoding.rs
@@ -0,0 +1,555 @@
+//! DeepSeek V3.2 prompt renderer.
+
+use std::collections::{HashMap, HashSet};
+use std::fmt::Write as _;
+
+use serde::Serialize;
+use serde_json::Value;
+use serde_json_fmt::JsonFormat;
+
+use crate::error::{Error, Result};
+use crate::request::{ChatContent, ChatMessage, ChatRequest, ChatRole, ChatTool};
+use crate::{AssistantContentBlock, AssistantMessageExt, AssistantToolCall};
+
+const BOS_TOKEN: &str = "<｜begin▁of▁sentence｜>";
+const EOS_TOKEN: &str = "<｜end▁of▁sentence｜>";
+const THINKING_START_TOKEN: &str = "<think>";
+const THINKING_END_TOKEN: &str = "</think>";
+const DSML_TOKEN: &str = "｜DSML｜";
+
+/// DeepSeek uses `"chat"` vs `"thinking"` mode names. Keep the split explicit
+/// here so the render branches stay easy to read.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ThinkingMode {
+    Chat,
+    Thinking,
+}
+
+/// Tool schema shape rendered inside the `<functions>` block.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Serialize)]
+struct RenderedToolSchema<'a> {
+    name: &'a str,
+    description: Option<&'a str>,
+    parameters: &'a Value,
+    strict: Option<bool>,
+}
+
+/// Render one chat request into the final prompt string.
+pub(super) fn render_request(request: &ChatRequest) -> Result<String> {
+    let thinking_mode = match request.enable_thinking()?.unwrap_or(false) {
+        true => ThinkingMode::Thinking,
+        false => ThinkingMode::Chat,
+    };
+    let drop_thinking = matches!(
+        request.messages.last().map(ChatMessage::role),
+        Some(ChatRole::User | ChatRole::Developer)
+    );
+    let render_offset = isize::from(request.tool_parsing_enabled());
+    let last_user_render_index =
+        find_last_user_render_index(request.messages.as_slice(), render_offset);
+    let last_user_actual_index = find_last_user_actual_index(request.messages.as_slice());
+    let mut prompt = String::from(BOS_TOKEN);
+
+    if request.tool_parsing_enabled() {
+        render_system_message(&mut prompt, None, &request.tools)?;
+    }
+
+    for (message_index, message) in request.messages.iter().enumerate() {
+        render_message(
+            &mut prompt,
+            request.messages.as_slice(),
+            message_index,
+            message,
+            render_offset,
+            last_user_render_index,
+            last_user_actual_index,
+            thinking_mode,
+            drop_thinking,
+        )?;
+    }
+
+    Ok(prompt)
+}
+
+/// Find the last user-like turn in render order.
+///
+/// `render_offset` is `1` when a synthetic tool-only system turn is rendered
+/// before the real request messages, and `0` otherwise.
+fn find_last_user_render_index(messages: &[ChatMessage], render_offset: isize) -> isize {
+    messages
+        .iter()
+        .rposition(|message| matches!(message.role(), ChatRole::User | ChatRole::Developer))
+        .map(|index| index as isize + render_offset)
+        .unwrap_or(-1)
+}
+
+/// Render one real request message, using `render_offset` to account for any
+/// synthetic tool-only system turn that was already emitted before the loop.
+fn render_message(
+    out: &mut String,
+    messages: &[ChatMessage],
+    message_index: usize,
+    message: &ChatMessage,
+    render_offset: isize,
+    last_user_render_index: isize,
+    last_user_actual_index: usize,
+    thinking_mode: ThinkingMode,
+    drop_thinking: bool,
+) -> Result<()> {
+    let render_index = message_index as isize + render_offset;
+    let opens_thinking = render_index == last_user_render_index;
+    let after_last_user_turn = render_index > last_user_render_index;
+    let after_or_at_last_user_turn = render_index >= last_user_render_index;
+
+    match message {
+        ChatMessage::System { content } => render_system_message(out, Some(content), &[]),
+        ChatMessage::Developer { content, tools } => render_developer_message(
+            out,
+            content,
+            tools.as_deref().unwrap_or(&[]),
+            thinking_mode == ThinkingMode::Thinking && opens_thinking,
+        ),
+        ChatMessage::User { content } => render_user_message(
+            out,
+            content,
+            thinking_mode == ThinkingMode::Thinking && opens_thinking,
+        ),
+        ChatMessage::Assistant { content } => render_assistant_message(
+            out,
+            thinking_mode == ThinkingMode::Thinking && after_last_user_turn,
+            content,
+            should_keep_assistant_reasoning(
+                message_index,
+                last_user_actual_index,
+                thinking_mode,
+                drop_thinking,
+            ),
+            // TODO: Respect `continue_final_message` and map it to DeepSeek's
+            // prefix-style final-assistant continuation behavior.
+            false,
+        ),
+        ChatMessage::ToolResponse { content, .. } => render_tool_message(
+            out,
+            messages,
+            message_index,
+            thinking_mode == ThinkingMode::Thinking && after_or_at_last_user_turn,
+            content,
+        ),
+    }
+}
+
+/// Historical assistant reasoning is dropped in thinking mode when the final
+/// request turn is a new user-like message.
+fn should_keep_assistant_reasoning(
+    actual_index: usize,
+    last_user_actual_index: usize,
+    thinking_mode: ThinkingMode,
+    drop_thinking: bool,
+) -> bool {
+    !(thinking_mode == ThinkingMode::Thinking
+        && drop_thinking
+        && actual_index < last_user_actual_index)
+}
+
+/// Return the last user/developer turn in the real request message list.
+fn find_last_user_actual_index(messages: &[ChatMessage]) -> usize {
+    messages
+        .iter()
+        .rposition(|message| matches!(message.role(), ChatRole::User | ChatRole::Developer))
+        .unwrap_or(usize::MAX)
+}
+
+/// Render a system turn, optionally followed by the tool preamble.
+fn render_system_message(
+    out: &mut String,
+    content: Option<&ChatContent>,
+    tools: &[ChatTool],
+) -> Result<()> {
+    if let Some(content) = content {
+        write_chat_content(out, content)?;
+    }
+    if !tools.is_empty() {
+        out.push_str("\n\n");
+        render_tools(out, tools)?;
+    }
+    Ok(())
+}
+
+/// Developer messages are wrapped into the same user-like turn shape as real
+/// user messages, but can also carry message-local tools.
+fn render_developer_message(
+    out: &mut String,
+    content: &ChatContent,
+    tools: &[ChatTool],
+    opens_thinking: bool,
+) -> Result<()> {
+    if content.is_empty() {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V3.2 developer message: empty content".to_string(),
+        ));
+    }
+
+    out.push_str("<｜User｜>");
+    if !tools.is_empty() {
+        out.push_str("\n\n");
+        render_tools(out, tools)?;
+    }
+    out.push_str("\n\n# The user's message is: ");
+    write_chat_content(out, content)?;
+    write_user_like_suffix(out, opens_thinking);
+    Ok(())
+}
+
+/// Plain user turns share the same wrapper shape as developer turns without the
+/// developer-specific preamble.
+fn render_user_message(
+    out: &mut String,
+    content: &ChatContent,
+    opens_thinking: bool,
+) -> Result<()> {
+    out.push_str("<｜User｜>");
+    write_chat_content(out, content)?;
+    write_user_like_suffix(out, opens_thinking);
+    Ok(())
+}
+
+/// Shared trailing wrapper used by both real user turns and native developer
+/// turns after their content has already been written.
+// TODO: respect `add_generation_prompt` option
+fn write_user_like_suffix(out: &mut String, opens_thinking: bool) {
+    out.push_str("<｜Assistant｜>");
+    if opens_thinking {
+        out.push_str(THINKING_START_TOKEN);
+    } else {
+        out.push_str(THINKING_END_TOKEN);
+    }
+}
+
+/// Render one tool result turn and decide whether it opens or closes the shared
+/// `<function_results>` block for the preceding assistant tool-call message.
+fn render_tool_message(
+    out: &mut String,
+    messages: &[ChatMessage],
+    message_index: usize,
+    resumes_thinking: bool,
+    _content: &ChatContent,
+) -> Result<()> {
+    let (block_start, block_end) = tool_response_block_bounds(messages, message_index);
+    if message_index != block_start {
+        return Ok(());
+    }
+
+    let Some(prev_assistant_idx) = previous_assistant_actual_index(messages, block_start) else {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V3.2 tool message: missing previous assistant message".to_string(),
+        ));
+    };
+
+    let ChatMessage::Assistant {
+        content: assistant_content,
+    } = &messages[prev_assistant_idx]
+    else {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V3.2 tool message: previous non-tool message is not assistant"
+                .to_string(),
+        ));
+    };
+
+    let assistant_tool_calls = assistant_content.tool_calls().collect::<Vec<_>>();
+    if assistant_tool_calls.is_empty() {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V3.2 tool message: previous assistant message has no tool calls"
+                .to_string(),
+        ));
+    }
+
+    let mut expected_tool_call_ids = HashSet::with_capacity(assistant_tool_calls.len());
+    for tool_call in &assistant_tool_calls {
+        if !expected_tool_call_ids.insert(tool_call.id.as_str()) {
+            return Err(Error::ChatTemplate(
+                "invalid DeepSeek V3.2 assistant tool calls: duplicate tool_call_id".to_string(),
+            ));
+        }
+    }
+
+    let mut tool_results_by_id = HashMap::with_capacity(assistant_tool_calls.len());
+    for message in &messages[block_start..block_end] {
+        let ChatMessage::ToolResponse {
+            content,
+            tool_call_id,
+        } = message
+        else {
+            unreachable!("tool response block should only contain tool messages");
+        };
+
+        if !expected_tool_call_ids.contains(tool_call_id.as_str()) {
+            return Err(Error::ChatTemplate(format!(
+                "invalid DeepSeek V3.2 tool message: unknown tool_call_id `{tool_call_id}`"
+            )));
+        }
+
+        if tool_results_by_id.insert(tool_call_id.as_str(), content).is_some() {
+            return Err(Error::ChatTemplate(format!(
+                "invalid DeepSeek V3.2 tool message: duplicate tool_call_id `{tool_call_id}`"
+            )));
+        }
+    }
+
+    if tool_results_by_id.len() != assistant_tool_calls.len() {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V3.2 tool messages: missing tool result for assistant tool call"
+                .to_string(),
+        ));
+    }
+
+    out.push_str("\n\n<function_results>");
+    for tool_call in assistant_tool_calls {
+        let content = tool_results_by_id
+            .get(tool_call.id.as_str())
+            .expect("validated tool_call_id set should be complete");
+        out.push_str("\n<result>");
+        write_chat_content(out, content)?;
+        out.push_str("</result>");
+    }
+
+    out.push_str("\n</function_results>");
+    out.push_str("\n\n");
+    if resumes_thinking {
+        out.push_str(THINKING_START_TOKEN);
+    } else {
+        out.push_str(THINKING_END_TOKEN);
+    }
+
+    Ok(())
+}
+
+/// Return the contiguous tool-response block containing `actual_index`.
+fn tool_response_block_bounds(messages: &[ChatMessage], actual_index: usize) -> (usize, usize) {
+    let mut block_start = actual_index;
+    while block_start > 0 && matches!(messages[block_start - 1], ChatMessage::ToolResponse { .. }) {
+        block_start -= 1;
+    }
+
+    let mut block_end = actual_index + 1;
+    while block_end < messages.len()
+        && matches!(messages[block_end], ChatMessage::ToolResponse { .. })
+    {
+        block_end += 1;
+    }
+
+    (block_start, block_end)
+}
+
+/// Return the most recent assistant turn before `actual_index`.
+fn previous_assistant_actual_index(messages: &[ChatMessage], actual_index: usize) -> Option<usize> {
+    messages[..actual_index]
+        .iter()
+        .rposition(|message| matches!(message, ChatMessage::Assistant { .. }))
+}
+
+/// Render one assistant turn, including optional reasoning, DSML tool calls,
+/// and the trailing EOS marker.
+fn render_assistant_message(
+    out: &mut String,
+    after_last_user_turn: bool,
+    content: &[AssistantContentBlock],
+    keep_reasoning: bool,
+    prefix: bool,
+) -> Result<()> {
+    let has_reasoning = keep_reasoning && content.has_reasoning();
+    let has_tool_calls = content.has_tool_calls();
+
+    if !has_tool_calls && prefix {
+        write_assistant_text(out, content);
+        return Ok(());
+    }
+
+    if after_last_user_turn {
+        if !has_reasoning && !has_tool_calls {
+            return Err(Error::ChatTemplate(
+                "invalid DeepSeek V3.2 assistant message after last user message: expected reasoning or tool calls"
+                    .to_string(),
+            ));
+        }
+
+        if has_reasoning {
+            write_assistant_reasoning(out, content);
+        }
+        out.push_str(THINKING_END_TOKEN);
+    }
+
+    write_assistant_text(out, content);
+
+    if has_tool_calls {
+        out.push_str("\n\n<｜DSML｜function_calls>\n");
+        for (index, tool_call) in content.tool_calls().enumerate() {
+            if index > 0 {
+                out.push('\n');
+            }
+            render_tool_call(out, tool_call)?;
+        }
+        out.push_str("\n</｜DSML｜function_calls>");
+    }
+
+    out.push_str(EOS_TOKEN);
+    Ok(())
+}
+
+/// Render one assistant tool call in DSML XML-like format.
+fn render_tool_call(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
+    writeln!(out, "<{DSML_TOKEN}invoke name=\"{}\">", tool_call.name)
+        .expect("writing to String cannot fail");
+    encode_arguments_to_dsml(out, tool_call)?;
+    write!(out, "\n</{DSML_TOKEN}invoke>").expect("writing to String cannot fail");
+    Ok(())
+}
+
+/// Convert one assistant tool-call arguments object into DSML parameter form.
+///
+/// String values are emitted raw with `string="true"`, while all other JSON
+/// values are rendered with JSON syntax and `string="false"`.
+fn encode_arguments_to_dsml(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
+    let arguments: Value = serde_json::from_str(&tool_call.arguments).map_err(|error| {
+        Error::ChatTemplate(format!(
+            "assistant tool call has invalid JSON arguments for DeepSeek V3.2: {error}"
+        ))
+    })?;
+    let Some(arguments) = arguments.as_object() else {
+        return Err(Error::ChatTemplate(
+            "assistant tool call arguments for DeepSeek V3.2 must be a JSON object".to_string(),
+        ));
+    };
+
+    let mut wrote_parameter = false;
+    for (key, value) in arguments {
+        if wrote_parameter {
+            out.push('\n');
+        }
+
+        let is_string = matches!(value, Value::String(_));
+        write!(
+            out,
+            "<{DSML_TOKEN}parameter name=\"{key}\" string=\"{}\">",
+            if is_string { "true" } else { "false" }
+        )
+        .expect("writing to String cannot fail");
+
+        match value {
+            Value::String(value) => out.push_str(value),
+            value => out.push_str(&json_dumps(value)?),
+        }
+
+        write!(out, "</{DSML_TOKEN}parameter>").expect("writing to String cannot fail");
+        wrote_parameter = true;
+    }
+
+    Ok(())
+}
+
+/// Render the full tool preamble shown to the model.
+fn render_tools(out: &mut String, tools: &[ChatTool]) -> Result<()> {
+    out.push_str(
+        r#"## Tools
+
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<｜DSML｜function_calls>" block like the following as part of your reply to the user:
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="$FUNCTION_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$FUNCTION_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜function_calls>
+
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+
+<｜DSML｜function_calls>
+...
+</｜DSML｜function_calls>
+
+<function_results>
+...
+</function_results>
+
+<think>...thinking about results</think>
+
+Here are the functions available in JSONSchema format:
+<functions>
+"#,
+    );
+
+    for (index, tool) in tools.iter().enumerate() {
+        if index > 0 {
+            out.push('\n');
+        }
+        render_tool_schema(out, tool)?;
+    }
+
+    out.push_str("\n</functions>\n");
+    Ok(())
+}
+
+/// Serialize one typed tool schema into the JSON shape embedded inside
+/// `<functions>`.
+fn render_tool_schema(out: &mut String, tool: &ChatTool) -> Result<()> {
+    out.push_str(&json_dumps(&RenderedToolSchema {
+        name: &tool.name,
+        description: tool.description.as_deref(),
+        parameters: &tool.parameters,
+        strict: tool.strict,
+    })?);
+    Ok(())
+}
+
+/// Write chat content directly into the destination buffer without flattening
+/// it into an intermediate `String`.
+fn write_chat_content(out: &mut String, content: &ChatContent) -> Result<()> {
+    match content {
+        ChatContent::Text(text) => out.push_str(text),
+        ChatContent::Parts(parts) => {
+            for part in parts {
+                out.push_str(part.as_text()?);
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Write all reasoning blocks in encounter order.
+fn write_assistant_reasoning(out: &mut String, content: &[AssistantContentBlock]) {
+    for block in content {
+        if let AssistantContentBlock::Reasoning { text } = block {
+            out.push_str(text);
+        }
+    }
+}
+
+/// Write all visible assistant text blocks in encounter order.
+fn write_assistant_text(out: &mut String, content: &[AssistantContentBlock]) {
+    for block in content {
+        if let AssistantContentBlock::Text { text } = block {
+            out.push_str(text);
+        }
+    }
+}
+
+/// Compact JSON serialization used by this renderer for exact prompt text.
+fn json_dumps<T: Serialize>(value: &T) -> Result<String> {
+    JsonFormat::new()
+        .comma(", ")
+        .expect("literal comma separator is valid JSON")
+        .colon(": ")
+        .expect("literal colon separator is valid JSON")
+        .ascii(false)
+        .format_to_string(value)
+        .map_err(|error| {
+            Error::ChatTemplate(format!(
+                "failed to serialize DeepSeek V3.2 JSON payload: {error}"
+            ))
+        })
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input.json b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input.json
new file mode 100644
index 000000000000..0582611470d4
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input.json
@@ -0,0 +1,149 @@
+{
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_datetime",
+                "description": "Get the current date and time",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "timezone": {
+                            "type": "string",
+                            "description": "The timezone, e.g. Asia/Shanghai, UTC"
+                        }
+                    },
+                    "required": ["timezone"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather for a specific date and location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name, e.g. Beijing, Hangzhou"
+                        },
+                        "date": {
+                            "type": "string",
+                            "description": "The date in YYYY-MM-DD format"
+                        }
+                    },
+                    "required": ["location", "date"]
+                }
+            }
+        }
+    ],
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful Assistant."
+        },
+        {
+            "role": "user",
+            "content": "明天杭州和北京的天气怎么样？"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "用户询问明天的天气，我需要先获取当前日期来计算明天的日期📅",
+            "tool_calls": [
+                {
+                    "id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"timezone\": \"Asia/Shanghai\"}",
+                        "name": "get_datetime"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
+            "role": "tool",
+            "content": "{\"current_date\": \"2024-01-15\", \"current_time\": \"14:30:00\", \"timezone\": \"Asia/Shanghai\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "现在知道今天是2024-01-15，明天就是2024-01-16。接下来查询杭州和北京明天的天气🌤️",
+            "tool_calls": [
+                {
+                    "id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\"}",
+                        "name": "get_weather"
+                    }
+                },
+                {
+                    "id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\"}",
+                        "name": "get_weather"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
+            "role": "tool",
+            "content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\", \"temperature_high\": \"12\", \"temperature_low\": \"5\", \"weather\": \"多云\", \"humidity\": \"65%\"}"
+        },
+        {
+            "tool_call_id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
+            "role": "tool",
+            "content": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\", \"temperature_high\": \"-2\", \"temperature_low\": \"-8\", \"weather\": \"晴\", \"humidity\": \"30%\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "已获取两个城市明天的天气信息，现在整理给用户✨",
+            "content": "根据查询结果，明天（2024年1月16日）的天气情况如下：\n\n**杭州**：\n- 天气：多云\n- 最高温度：12°C\n- 最低温度：5°C\n- 湿度：65%\n\n**北京**：\n- 天气：晴\n- 最高温度：-2°C\n- 最低温度：-8°C\n- 湿度：30%\n\n杭州明天会比较温暖但有些多云，而北京会很冷但是晴天。建议在北京的朋友要注意保暖！"
+        },
+        {
+            "role": "user",
+            "content": "谢谢！那后天呢？"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "用户现在问后天的天气，后天是2024-01-17，我可以直接查询（因为已知今天日期）🗓️",
+            "tool_calls": [
+                {
+                    "id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\"}",
+                        "name": "get_weather"
+                    }
+                },
+                {
+                    "id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\"}",
+                        "name": "get_weather"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
+            "role": "tool",
+            "content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\", \"temperature_high\": \"15\", \"temperature_low\": \"8\", \"weather\": \"小雨\", \"humidity\": \"80%\"}"
+        },
+        {
+            "tool_call_id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
+            "role": "tool",
+            "content": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\", \"temperature_high\": \"0\", \"temperature_low\": \"-6\", \"weather\": \"多云\", \"humidity\": \"45%\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "获取到后天的天气数据，整理回复给用户📝",
+            "content": "后天（2024年1月17日）的天气情况：\n\n**杭州**：\n- 天气：小雨\n- 最高温度：15°C\n- 最低温度：8°C\n- 湿度：80%\n\n**北京**：\n- 天气：多云\n- 最高温度：0°C\n- 最低温度：-6°C\n- 湿度：45%\n\n杭州后天会有小雨，温度略有回升，记得带伞。北京会稍微暖和一点，但依然很冷，请继续做好保暖措施。"
+        }
+    ]
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_w_date.json b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_w_date.json
new file mode 100644
index 000000000000..ccfc2ee73321
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_w_date.json
@@ -0,0 +1,732 @@
+{
+    "messages": [
+        {
+            "role": "developer",
+            "content": "帮我调研一下，目前有哪些针对search agent的benchmark？详细介绍各自的特点、使用场景、例题。\n\n<system-reminder>\n## Today’s Date\n2025-11-27, Thursday.\n</system-reminder>",
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "description": "Searches for information related to query and displays topn results.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "The search query string"
+                                },
+                                "topn": {
+                                    "type": "integer",
+                                    "description": "Number of top results to display",
+                                    "default": 10
+                                },
+                                "source": {
+                                    "type": "string",
+                                    "description": "Source to search within",
+                                    "enum": [
+                                        "web",
+                                        "news"
+                                    ],
+                                    "default": "web"
+                                }
+                            },
+                            "required": [
+                                "query"
+                            ],
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "description": "Opens the link id from the page indicated by cursor starting at line number loc, showing num_lines lines. Valid link ids are displayed with the formatting: 【{id}†.*】. If cursor is not provided, the most recent page is implied. If id is a string, it is treated as a fully qualified URL associated with source. If loc is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available. Use this function without id to scroll to a new location of an opened page.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "description": "Link ID (number) or fully qualified URL (string)",
+                                    "anyOf": [
+                                        {
+                                            "type": "integer"
+                                        },
+                                        {
+                                            "type": "string"
+                                        }
+                                    ],
+                                    "default": -1
+                                },
+                                "cursor": {
+                                    "type": "integer",
+                                    "description": "Page cursor indicator",
+                                    "default": -1
+                                },
+                                "loc": {
+                                    "type": "integer",
+                                    "description": "Starting line number",
+                                    "default": -1
+                                },
+                                "num_lines": {
+                                    "type": "integer",
+                                    "description": "Number of lines to show",
+                                    "default": -1
+                                },
+                                "view_source": {
+                                    "type": "boolean",
+                                    "description": "Whether to view source",
+                                    "default": false
+                                },
+                                "source": {
+                                    "type": "string",
+                                    "description": "Source associated with the URL",
+                                    "enum": [
+                                        "web",
+                                        "news"
+                                    ],
+                                    "default": "web"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "description": "Finds exact matches of pattern in the current page, or the page given by cursor.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "pattern": {
+                                    "type": "string",
+                                    "description": "The exact pattern to search for"
+                                },
+                                "cursor": {
+                                    "type": "integer",
+                                    "description": "Page cursor to search in",
+                                    "default": -1
+                                }
+                            },
+                            "required": [
+                                "pattern"
+                            ],
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "用户想调研目前针对搜索智能体（search agent）的基准测试（benchmark），要求详细介绍各自的特点、使用场景和例题。我需要提供一份全面的概述，涵盖现有的基准测试，例如WebGPT、WebShop、ALFWorld、HotpotQA、FEVER、TriviaQA、Natural Questions、SearchQA、MS MARCO、BEIR、LoTTE、MIRACL、Zero-shot IR基准等。同时，需要区分这些基准测试是针对检索任务、问答任务，还是具体针对搜索智能体的评估。此外，还应考虑那些专门为评估搜索智能体（即能够与环境交互、执行搜索、浏览网页、使用工具的智能体）而设计的基准测试，例如WebGPT、WebShop、ALFWorld、Mind2Web、WebArena、VisualWebArena、SearchBench、WebVLN、WebNav等。我需要详细介绍每个基准测试的特点、使用场景，并举例说明。为了确保信息的全面性和时效性，我将同时进行两项搜索：一项是针对“search agent benchmark”的通用搜索，以获取广泛的基准测试信息；另一项是针对“搜索智能体 基准测试”的中文搜索，以获取更符合用户语言习惯和可能更具体的中文资料。",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-1-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"search agent benchmark 2024\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-1-1",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"搜索智能体 基准测试\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[0] search agent benchmark 2024 (Search_Results/search agent benchmark 2024)\n**viewing lines [0 - 39] of 39**\n\nL0: \nL1: URL: Search_Results/search agent benchmark 2024\nL2: # Search Results\nL3: \nL4:   * 【0†HSCodeComp: A Realistic and Expert-level Benchmark for ...; publish_date:\nL5:  none†arxiv.org】 Oct 22, 2025 — To fill this gap, we introduce HSCodeComp, the \nL6: first realistic, expert-level e-commerce benchmark designed to evaluate deep \nL7: search agents in ...\nL8:   * 【1†open-compass/GTA - A Benchmark for General Tool Agents; publish_date: \nL9: none†github.com】 GTA is a benchmark to evaluate the tool-use capability of LLM-\nL10: based agents in real-world scenarios. It features three main aspects.\nL11:   * 【2†Benchmarking real-time trust scoring across five AI Agent ...; \nL12: publish_date: none†cleanlab.ai】 Aug 20, 2025 — This article evaluates 5 AI Agent\nL13:  architectures over the BOLAA (ICLR 2024) benchmark, and assesses the effects of\nL14:  adding automated trust ...\nL15:   * 【3†10 AI agent benchmarks; publish_date: none†www.evidentlyai.com】 Jul 11, \nL16: 2025 — We put together 10 AI agent benchmarks designed to assess how well \nL17: different LLMs perform as agents in real-world scenarios, ...\nL18:   * 【4†A state-of-the-art search API purpose-built for agents; publish_date: \nL19: none†parallel.ai】 Jul 31, 2025 — To evaluate real-world performance of the \nL20: Parallel Search MCP Server, we created the WISER-Search benchmark which blends \nL21: WISER-Fresh (queries ...\nL22:   * 【5†AI Agent Benchmarks are Broken; publish_date: none†medium.com】 We break \nL23: down the failure modes in current AI agent benchmarks and introduce a checklist \nL24: that minimizes the gamability of AI agent benchmarks.\nL25:   * 【6†Benchmarks and Tree Search for Multimodal LLM Web Agents; publish_date: \nL26: none†dpfried.github.io】 2024, When is Tree Search Useful? ○ Dealing with \nL27: destructive actions. ○ Some things on the web are very difficult to undo, e.g., \nL28: ordering an item. 56.\nL29:   * 【7†-Bench: Benchmarking AI agents for the real-world; publish_date: \nL30: none†sierra.ai】 Jun 20, 2024 — τ-bench measures an agent's ability to interact \nL31: with (simulated) human users and programmatic APIs while following domain-\nL32: specific policies in a consistent ...\nL33:   * 【8†Browser Use = state of the art Web Agent; publish_date: none†browser-\nL34: use.com】 Dec 15, 2024 — Browser Use has achieved state-of-the-art performance on\nL35:  the WebVoyager benchmark, with an impressive 89.1% success rate across 586 \nL36: diverse web tasks.\nL37:   * 【9†FutureSearch Benchmarks; publish_date: none†evals.futuresearch.ai】 Find \nL38: the original source of a given claim. Example: From , more than 8 out of 1000 \nL39: users clicked on a phishing link monthly in 2024, up 190% vs 2023.",
+            "tool_call_id": "fixture-tool-call-1-0"
+        },
+        {
+            "role": "tool",
+            "content": "[1] 搜索智能体 基准测试 (Search_Results/搜索智能体 基准测试)\n**viewing lines [0 - 33] of 33**\n\nL0: \nL1: URL: Search_Results/搜索智能体 基准测试\nL2: # Search Results\nL3: \nL4:   * 【0†WideSearch：揭示AI 智能体缺失的「广度」能力; publish_date: none†zhuanlan.zhihu.com】 Aug \nL5: 16, 2025 — 为系统评估智能体在该任务上的能力，论文构建了第一个专门的基准测试 WideSearch ，包含200 个源于真实世界、横跨18 \nL6: 个领域的高质量任务。 通过对超过10 个 ...\nL7:   * 【1†GAIA: 一个严苛的智能体基准- HuggingFace; publish_date: none†www.cnblogs.com】 Jul 9,\nL8:  2024 — 我们使用一个用库构建的代码智能体 在GAIA 基准上进行测试，这可以说是最困难、最全面的智能体基准测试……最终我们取得了第一名的成绩！ \nL9: GAIA: 一个严苛的 ...\nL10:   * 【2†AI搜索智能体遭遇新挑战：滑铁卢大学团队提出更公平透明的 ...; publish_date: none†www.techwalker.com】 \nL11: Aug 14, 2025 — \nL12: 目前评测AI搜索智能体主要依靠BrowseComp这样的基准测试，它就像一场实时的开卷考试，让AI在真实的网络环境中搜索信息来回答复杂问题。听起来很合理 ...\nL13:   * 【3†Agentic AI基础设施实践经验系列（六）：Agent质量评估 - AWS; publish_date: \nL14: none†aws.amazon.com】 Sep 19, 2025 — TAU-bench \nL15: 是一个评估AI智能体在真实世界环境中可靠性的基准测试。它评估智能体是否能够在动态的多轮对话中与用户进行交互，理解需求并完成任务。T-bench ...\nL16:   * 【4†DeepAgent：能自己找工具的通用推理智能体 - 高瓴人工智能学院; publish_date: none†ai.ruc.edu.cn】 \nL17: Nov 6, 2025 — 在八大基准测试中，DeepAgent在绝大多数任务上全面领先所有基线模型。 \nL18: 开放环境优势:在最具挑战的“开放工具检索”场景下(如ToolBench)，其成功率达到64%，远 ...\nL19:   * 【5†BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试; publish_date: none†blog.csdn.net】 Sep \nL20: 22, 2025 — 该基准测试由OpenAI团队开发，旨在推动更可信赖和可靠的AI代理研究。 核心特点. \nL21: 挑战性问题设计：BrowseComp的问题设计遵循严格的难度标准：. 人类创建者确保 ...\nL22:   * 【6†什么是GAIA？; publish_date: none†huggingface.co】 什么是GAIA？ GAIA \nL23: 是一个用于评估AI助手在需要核心能力组合的真实世界任务上的表现的基准，这些核心能力包括推理、多模态理解、网页浏览和熟练的工具使用。\nL24:   * 【7†SuperCLUE-Agent: Agent智能体中文原生任务能力测评基准; publish_date: \nL25: none†www.cluebenchmarks.com】 AI \nL26: Agent是一个能够自主理解、规划决策和执行复杂任务的智能体，有代表性的Agent比如AutoGPT、AgentGPT等。 \nL27: 现有关于Agent能力的测评，主要是在英文场景或任务的测评。目前还 ...\nL28:   * 【8†Gaia2 与ARE：赋能社区的智能体评测; publish_date: none†hub.baai.ac.cn】 Sep 24, 2025 — \nL29: 快速评估任意智能体：基于真实或模拟数据，测试不同规则、工具、内容和验证方式下的表现 · 测试智能体的 工具调用与编排能力：可结合本地应用或MCP 工具 · 生成\nL30:  ...\nL31:   * 【9†许多AI 智能体评测基准并不可靠; publish_date: none†my.oschina.net】 Aug 13, 2025 — \nL32: 当前AI智能体基准测试存在严重缺陷，亟需更严谨的评估框架。作者提出AI智能体基准测试核查清单（ABC），包含43个项目，旨在识别基准测试潜在陷阱，构建可靠评估 \nL33: ...",
+            "tool_call_id": "fixture-tool-call-1-1"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "第一个搜索结果（针对“search agent benchmark 2024”）显示了一些相关条目，例如“HSCodeComp: A Realistic and Expert-level Benchmark for ...”、“open-compass/GTA - A Benchmark for General Tool Agents”、“10 AI agent benchmarks”、“WISER-Search benchmark”、“Benchmarks and Tree Search for Multimodal LLM Web Agents”、“τ-bench”、“WebVoyager benchmark”、“FutureSearch Benchmarks”。这些可能都与搜索智能体相关。第二个搜索结果（针对“搜索智能体 基准测试”）显示了一些中文资料，例如“WideSearch”、“BrowseComp”、“GAIA”、“SuperCLUE-Agent”等。这些都可能与搜索智能体相关。为了全面了解这些基准测试，我们需要同时打开并分析其中几个最有潜力的结果。具体来说，我们将打开第一个搜索结果中的“HSCodeComp”（索引0）和“open-compass/GTA”（索引1），以及第二个搜索结果中的“WideSearch”（索引0）和“BrowseComp”（索引5）。",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-4-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 0, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-4-1",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 0, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-4-2",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 1, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-4-3",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 1, \"id\": 5}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[2]  (https://ar5iv.org/html/2510.19631v1)\n**viewing lines [0 - 119] of 983**\n\nL0: \nL1: URL: https://ar5iv.org/html/2510.19631v1\nL2: # HSCodeComp: A Realistic and Expert-level Benchmark for Deep Search Agents in \nL3: Hierarchical Rule Application\nL4: \nL5: Yiqian Yang† Tian Lan† Qianghuai Jia∗ Li Zhu Hui Jiang Hang Zhu Longyue Wang \nL6: Weihua Luo Kaifu Zhang\nL7: \nL8: Alibaba International Digital Commerce∗* Corresponding Author: Qianghuai Jia \nL9: (qianghuai.jqh@alibaba-inc.com)\nL10: †\\dagger Equal Contribution: Yiqian Yang\nL11: \nL12:  Tian Lan\nL13: \nL14: ###### Abstract\nL15: \nL16: Abstract\nL17: \nL18: Effective deep search agents must not only access open-domain and domain-\nL19: specific knowledge but also apply complex rules—such as legal clauses, medical \nL20: manuals and tariff rules. These rules often feature vague boundaries and \nL21: implicit logic relationships, making precise application challenging for agents.\nL22:  However, this critical capability is largely overlooked by current agent \nL23: benchmarks. To fill this gap, we introduce HSCodeComp, the first realistic, \nL24: expert-level e-commerce benchmark designed to evaluate deep search agents in \nL25: hierarchical rule application. In this task, the deep reasoning process of \nL26: agents is guided by these rules to predict 10-digit Harmonized System Code \nL27: (HSCode) of products with noisy but realistic descriptions. These codes, \nL28: established by the World Customs Organization, are vital for global supply chain\nL29:  efficiency. Built from real-world data collected from large-scale e-commerce \nL30: platforms, our proposed HSCodeComp comprises 632 product entries spanning \nL31: diverse product categories, with these HSCodes annotated by several human \nL32: experts. Extensive experimental results on several state-of-the-art LLMs, open-\nL33: source, and closed-source agents reveal a huge performance gap: best agent \nL34: achieves only 46.8% 10-digit accuracy, far below human experts at 95.0%. \nL35: Besides, detailed analysis demonstrates the challenges of hierarchical rule \nL36: application, and test-time scaling fails to improve performance further.\nL37: \nL38: ## 1 Introduction\nL39: \nL40: Deep search agents have demonstrated significant value in solving complex real-\nL41: world problems, where robust external knowledge utilization constitutes a \nL42: critical capability [Wu et al., 2025, Tao et al., 2025, Li et al., 2025b]. To \nL43: evaluate this capability, numerous established benchmarks are proposed to assess\nL44:  agents in utilizing open-domain data (e.g., GAIA [Mialon et al., 2023b] and \nL45: BrowseComp [Wei et al., 2025]) and domain-specific data (e.g., WebMall [Peeters \nL46: et al., 2025a], FinSearchComp [Hu et al., 2025a] and MedBrowseComp [Yu et al., \nL47: 2025b]).\nL48: \nL49: Beyond open-domain and domain-specific data, agents also need to effectively \nL50: apply rules that encode human expert knowledge, particularly in scenarios like \nL51: law, medical and e-commerce [Li et al., 2025a, Chen et al., 2025b, Yao et al., \nL52: 2022, Chollet et al., 2025]. For instance, legal case adjudication require \nL53: interpreting abstract legal provisions, and accurate e-commerce product \nL54: classification in depends on tariff rules [Grainger, 2024]. Previous works have \nL55: defined rule application as using specific logical rules with supporting facts \nL56: to derive conclusions [Wang et al., 2024, Servantez et al., 2024]. In contrast, \nL57: we define it as a core capability for deep search agents, where human-written \nL58: rules are systematically applied to guide complex reasoning and decision-making \nL59: [Sadowski and Chudziak, 2025]. Building on this observation, we categorize \nL60: knowledge data for deep search agents into three levels (Figure 1, left), with \nL61: increasing knowledge complexity: (1) Level 1: Open-domain Data - Tests \nL62: understanding and deep reasoning abilities of agents on long-form web content. \nL63: Established benchmarks include GAIA [Mialon et al., 2023b] and BrowseComp [Wei \nL64: et al., 2025]; (2) Level 2: Structured Data - Assesses agents to precisely \nL65: utilize structured data such as databases and knowledge graphs, as seen in \nL66: domain-specific benchmarks like WebMall [Peeters et al., 2025a], MedBrowseComp \nL67: [Chen et al., 2025b] and FinSearchComp [Hu et al., 2025a]; (3) Level 3: Rule \nL68: Data - Evaluates agents to apply complex and abstract rules [Chollet et al., \nL69: 2025]. This level presents two key challenges: (a) making accurate decisions \nL70: when rules contain vague natural language descriptions [Sadowski and Chudziak, \nL71: 2025]; and (b) reasoning about logical dependencies among rules, such as \nL72: exception clauses and cross-category relationships [Guha et al., 2023]. Despite \nL73: the importance of rule application in real-world scenarios, current agent \nL74: benchmarks largely overlook its evaluation.\nL75: \nL76: To fill this gap, we introduce HSCodeComp (short for the Harmonized System Code \nL77: (HSCode) Competition), the first realistic, expert-level e-commerce benchmark \nL78: designed to evaluate agents in predicting complete 10-digit Harmonized System \nL79: Code (HSCode) of the product, using hierarchical rules (e.g., eWTP tariff \nL80: rules111https://www.ewtp.com/web/smart/hscode). HSCodes organize products \nL81: through a hierarchical structure spanning over 5,000 distinct codes across \nL82: multiple classification levels, representing the global standard for classifying\nL83:  traded international goods, established by the World Customs Organization and \nL84: implemented across more than 200 countries for customs clearance and tariff \nL85: determination [Grainger, 2024, Nath et al., 2025]. Built from the data of the \nL86: large-scale e-commerce platforms, our proposed HSCodeComp comprises 632 \nL87: carefully curated product entries, encompassing 27 unique HS chapters and 32 \nL88: distinct first-level categories. These HSCodes have been rigorously annotated by\nL89:  multiple e-commerce domain experts, ensuring that HSCodeComp is expert-level. \nL90: Accurately predicting the exact 10-digit HSCode presents significant challenges:\nL91:  agents must perform multi-hop hierarchical reasoning with complex tariff rules \nL92: while processing noisy but realistic product descriptions that often contain \nL93: abbreviations, language variations, or incomplete information.\nL94: \nL95: Extensive experiments on the state-of-the-art baselines, including 14 advanced \nL96: foundation models, 6 advanced open-source agent systems and 3 closed-source \nL97: agent systems, demonstrate that HSCode prediction task remains a substantial \nL98: challenge for current AI approaches. As shown in the Figure 1 (right), even the \nL99: best-performing system (SmolAgent [Roucher et al., 2025] with GPT-5) achieves \nL100: only 46.8% accuracy, substantially below the 95.0% accuracy attained by human \nL101: experts. Further detailed analysis reveals that existing agent systems lack \nL102: critical capabilities required for this complex hierarchical rule applications. \nL103: Notably, test-time scaling approach—which has proven effective in other \nL104: reasoning tasks [Guo et al., 2025, Liu et al., 2025]—fail to improve performance\nL105:  on HSCodeComp. These observations demonstrate the challenging nature of our \nL106: proposed HSCodeComp, highlighting the need for more effective designs of agent \nL107: systems. To facilitate future research, we will publicly release codes and the \nL108: benchmark dataset of HSCodeComp.\nL109: \nL110: ## 2 Related Works\nL111: \nL112: ### 2.1 Previous Works in HSCode Prediction\nL113: \nL114: Previous works treat HSCode prediction as the e-commerce text classification \nL115: task [Grainger, 2024], using pre-trained BERT models [Liao et al., 2024, Shubham\nL116:  et al., 2022] or Large Language Models (LLMs) prompting [Hussain and Ahmed, \nL117: 2023]. However, these approaches fail to leverage domain-specific knowledge, \nL118: especially the rules written by human experts [Hussain and Ahmed, 2023, Judy, \nL119: 2024]. Besides, existing HSCode benchmarks face two critical limitations [Judy, ",
+            "tool_call_id": "fixture-tool-call-4-0"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://github.com/open-compass/GTA`",
+            "tool_call_id": "fixture-tool-call-4-1"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://zhuanlan.zhihu.com/p/1939316761775301093`",
+            "tool_call_id": "fixture-tool-call-4-2"
+        },
+        {
+            "role": "tool",
+            "content": "[3]  (https://blog.csdn.net/qq_41472205/article/details/151974603)\n**viewing lines [0 - 167] of 187**\n\nL0: \nL1: URL: https://blog.csdn.net/qq_41472205/article/details/151974603\nL2: # BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试\nL3: \nL4: BrowseComp：AI浏览能力评估基准\nL5: \nL6: 最新推荐文章于 2025-11-12 13:40:20 发布\nL7: \nL8: 原创 于 2025-09-22 22:33:04 发布 · 1.3k 阅读\nL9: \nL10: · 9\nL11: · 25 · \nL12: CC 4.0 BY-SA版权\nL13: \nL14: 版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。\nL15: \nL16: ## BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试\nL17: \nL18: 在人工智能从基础聊天机器人向推理器和智能体发展的进程中，具备浏览互联网能力的人工智能模型正变得越来越重要。今天，我们将介绍一个名为BrowseComp的创新基准\nL19: 测试，它专门设计用于评估AI代理在复杂网络浏览任务中的能力。\nL20: \nL21: ### 什么是BrowseComp？\nL22: \nL23: BrowseComp（全称Browsing Competition）是一个包含1,266个挑战性问题的基准测试集，专门用于衡量AI代理在互联网上持续导航、寻找难\nL24: 以找到的纠缠信息的能力。该基准测试由OpenAI团队开发，旨在推动更可信赖和可靠的AI代理研究。\nL25: \nL26: #### 核心特点\nL27: \nL28: 挑战性问题设计：BrowseComp的问题设计遵循严格的难度标准：\nL29: \nL30: - 人类创建者确保问题在10分钟内无法被人解决\nL31: - 现有模型（包括带浏览功能的ChatGPT和早期版本的OpenAI Deep Research）无法解决\nL32: - 通过5次简单Google搜索无法在结果首页找到答案\nL33: \nL34: 简单易验证：尽管问题极具挑战性，但答案形式简单——都是短字符串，便于自动验证模型输出的正确性。\nL35: \nL36: ### 为什么需要BrowseComp？\nL37: \nL38: #### 现有基准的局限性\nL39: \nL40: 传统的信息检索基准（如TriviaQA、HotpotQA等）主要关注易于查找的信息，随着语言模型的进步，这些基准已经趋于饱和。而BrowseComp专注于那些需\nL41: 要浏览大量网站才能解决的\"硬核\"问题。\nL42: \nL43: #### 模拟真实挑战\nL44: \nL45: BrowseComp问题通常采用\"逆向设计\"方法：创建者从一个已知事实出发，构建一个搜索空间巨大但验证简单的问题。例如：\nL46: \nL47: “找出2018-2023年间在EMNLP会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”\nL48: \nL49: 这类问题验证简单，但解决起来需要检查数千篇论文并调查每位作者的背景。\nL50: \nL51: ### 数据集特点\nL52: \nL53: #### 主题多样性\nL54: \nL55: BrowseComp涵盖了广泛的主题领域（如图2所示），包括历史、科学、文化等。创建者被鼓励基于个人兴趣设计问题，这有助于提高数据质量和参与度。\nL56: \nL57: #### 质量保证\nL58: \nL59: 为确保答案的唯一性，创建者需要：\nL60: \nL61: - 对问题内容有足够了解，确信没有其他有效答案\nL62: - 如果不确定，则添加更多约束条件\nL63: - 接受其他创建者的验证反馈\nL64: \nL65: ### 人类表现基准\nL66: \nL67: 为了衡量BrowseComp的难度，研究人员让人类创建者尝试解决问题（不能解答自己创建的问题）。结果显示：\nL68: \nL69: - **70.8%**的问题在2小时搜索后人类选择放弃\nL70: - **29.2%**的问题被成功解决\nL71: - 在解决的问题中，**86.4%**的人类答案与参考答案一致\nL72: \nL73: 这表明BrowseComp确实极具挑战性，即使是熟悉数据集的人类专家也难以在有限时间内解决大部分问题。\nL74: \nL75: ### AI模型表现评估\nL76: \nL77: #### 各模型对比\nL78: \nL79: 研究人员评估了多种模型在BrowseComp上的表现：\nL80: \nL81: 模型 | 准确率(%) | 校准误差(%) \nL82: ---|---|---\nL83: GPT-4o | 0.6 | 69 \nL84: GPT-4o（带浏览） | 1.9 | 82 \nL85: GPT-4.5 | 0.9 | 68 \nL86: OpenAI o1 | 9.9 | 65 \nL87: Deep Research | 51.5 | 91 \nL88: \nL89: #### 关键发现\nL90: \nL91: - 基础模型表现不佳：GPT-4o和GPT-4.5准确率接近零，凸显了基准的难度\nL92: - 浏览功能带来有限提升：启用浏览功能的GPT-4o准确率略有提高，但仍很低\nL93: - 推理能力的重要性：OpenAI o1虽然没有浏览能力，但凭借更强的推理能力获得较高准确率\nL94: - 专业模型的优势：专门为持久网络浏览训练的Deep Research模型解决了约一半的问题\nL95: \nL96: #### 计算资源与性能关系\nL97: \nL98: 研究表明，BrowseComp性能随测试时计算资源的增加而平滑提升（如图1所示）。这与智能体模型的特性一致——更多计算资源允许模型浏览更多网站，从而提高找到正确\nL99: 答案的机会。\nL100: \nL101: ### 进阶策略分析\nL102: \nL103: #### 聚合策略的效果\nL104: \nL105: 通过让模型多次尝试同一问题并采用投票策略，可以显著提升性能：\nL106: \nL107: - 多数投票：选择样本中最常见的答案\nL108: - 加权投票：根据模型置信度加权投票\nL109: - 最佳选择：选择置信度最高的答案\nL110: \nL111: 这些方法将Deep Research的性能提升了15-25%，表明模型通常能够识别自己的正确答案。\nL112: \nL113: #### 任务难度分布\nL114: \nL115: 分析显示，BrowseComp中的任务难度分布广泛：\nL116: \nL117: - 16%的任务被Deep Research完美解决（100%通过率）\nL118: - 14%的任务完全失败（0%通过率）\nL119: - 其余任务处于中间难度水平\nL120: \nL121: ### BrowseComp的意义与局限性\nL122: \nL123: #### 作为评估工具的价值\nL124: \nL125: BrowseComp可被视为浏览智能体的\"编程竞赛\"——虽然不全面，但对核心浏览能力提供了有用的衡量。正如在编程竞赛中表现出色的模型很可能在其他编码任务中表现良\nL126: 好，能够解决BrowseComp的模型在定位难以查找信息方面应该具有很强的能力。\nL127: \nL128: #### 已知局限性\nL129: \nL130: - 不涉及真实用户查询分布：如生成长回答或解决查询歧义等挑战\nL131: - 答案唯一性假设：虽然尽力确保，但不能完全保证没有其他有效答案\nL132: - 单模态限制：当前仅关注文本信息检索，未来可扩展至图像、视频等模态\nL133: \nL134: ### 结论与展望\nL135: \nL136: BrowseComp为评估AI代理的网络浏览能力提供了一个具有挑战性且易于使用的基准。它强调的持久性、创造性和战略性浏览能力，正是未来AI智能体在实际应用中所需\nL137: 的核心技能。\nL138: \nL139: 随着更多研究团队在BrowseComp上测试他们的模型，我们期待看到浏览智能体技术的快速进步，最终使AI能够更可靠地帮助我们获取那些深藏在互联网角落的宝贵信息。\nL140: \nL141: 访问地址：BrowseComp已在GitHub上开源，地址为 https://github.com/openai/simple-evals\nL142: \nL143: 添加红包\nL144: \nL145: 请填写红包祝福语或标题\nL146: \nL147: 个\nL148: \nL149: 红包个数最小为10个\nL150: \nL151: 元\nL152: \nL153: 红包金额最低5元\nL154: \nL155: 当前余额3.43元 前往充值 >\nL156: \nL157: 需支付：10.00元\nL158: \nL159: 成就一亿技术人!\nL160: \nL161: hope_wisdom\nL162:  发出的红包 \nL163: 实付元\nL164: \nL165: 点击重新获取\nL166: \nL167: 扫码支付",
+            "tool_call_id": "fixture-tool-call-4-3"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "打开的HSCodeComp页面详细介绍了这个基准测试，它是一个用于评估深度搜索智能体在分层规则应用方面能力的基准测试，特点包括：基于真实世界电商数据、任务复杂、需要多跳层次推理、使用嘈杂的产品描述，并且标注由专家完成。使用场景是评估智能体在规则应用方面的能力，例如关税代码分类。例题是预测产品的10位HS编码。打开的BrowseComp页面详细介绍了这个基准测试，它是一个用于评估浏览智能体（即能够在互联网上导航、搜索信息的智能体）的基准测试。其特点包括：包含1266个具有挑战性的问题，需要持续导航和搜索难以查找的信息；答案形式简单，便于自动验证；问题设计确保人类在10分钟内无法解决，且现有模型也无法解决；涵盖了广泛的主题领域。使用场景是评估智能体在复杂网络浏览任务中的能力。例题是：“找出2018-2023年间在EMNLP会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”。另外两个打开的页面（open-compass/GTA和WideSearch）出现了错误，可能链接已失效或需要其他访问方式。为了进一步获取其他相关基准测试的信息，我们需要同时进行多项搜索。考虑到用户可能对搜索智能体基准测试有广泛需求，我们将同时搜索“WebVoyager benchmark”、“WebArena benchmark”、“GAIA benchmark”、“ToolBench benchmark”、“HotpotQA benchmark”、“FEVER benchmark”、“TriviaQA benchmark”、“Natural Questions benchmark”、“MS MARCO benchmark”、“BEIR benchmark”、“MIRACL benchmark”、“Zero-shot IR benchmark”、“WebGPT benchmark”、“WebShop benchmark”、“ALFWorld benchmark”、“Mind2Web benchmark”、“VisualWebArena benchmark”、“SearchBench benchmark”、“WebVLN benchmark”、“WebNav benchmark”。这些搜索将帮助我们全面了解现有基准测试的概况。",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-9-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebVoyager benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-1",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebArena benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-2",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"GAIA benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-3",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"ToolBench benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-4",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"HotpotQA benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-5",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"FEVER benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-6",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"TriviaQA benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-7",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"Natural Questions benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-8",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"MS MARCO benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-9",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"BEIR benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-10",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"MIRACL benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-11",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"Zero-shot IR benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-12",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebGPT benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-13",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebShop benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-14",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"ALFWorld benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-15",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"Mind2Web benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-16",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"VisualWebArena benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-17",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"SearchBench benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-18",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebVLN benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-9-19",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"WebNav benchmark\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[4] WebVoyager benchmark (Search_Results/WebVoyager benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/WebVoyager benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebVoyager Benchmark; publish_date: none†www.trykura.com】 Performance \nL5: measured across various websites in WebVoyager shows Kura consistently \nL6: outperforming other agents, with particularly strong results on e-commerce ...\nL7:   * 【1†WebVoyager: Building an End-to-End Web Agent with ...; publish_date: \nL8: none†github.com】 WebVoyager is an innovative Large Multimodal Model (LMM) \nL9: powered web agent that can complete user instructions end-to-end by interacting \nL10: with real-world ...\nL11:   * 【2†AI Browser Agent Leaderboard | Steel.dev; publish_date: \nL12: none†leaderboard.steel.dev】 See how various AI browser agents stack up based on \nL13: their accuracy in completing web-based tasks on the WebVoyager benchmark.\nL14:   * 【3†[2401.13919] WebVoyager: Building an End-to-End Web ...; publish_date: \nL15: none†arxiv.org】 by H He · 2024 · Cited by 282 — We show that WebVoyager achieves\nL16:  a 59.1% task success rate on our benchmark, significantly surpassing the \nL17: performance of both GPT-4 (All ...\nL18:   * 【4†Our Agent-E SOTA Results on the WebVoyager Benchmark; publish_date: \nL19: none†www.emergence.ai】 Jul 11, 2024 — WebVoyager is a benchmark that tests an \nL20: agent's capabilities for navigation on dynamic live websites. It is more \nL21: representative than WebArena [4] ...\nL22:   * 【5†Browser Use = state of the art Web Agent; publish_date: none†browser-\nL23: use.com】 Dec 15, 2024 — Browser Use has achieved state-of-the-art performance on\nL24:  the WebVoyager benchmark, with an impressive 89.1% success rate across 586 \nL25: diverse web tasks.\nL26:   * 【6†Magnitude achieves SOTA 94% on WebVoyager benchmark; publish_date: \nL27: none†github.com】 Magnitude achieves state-of-the-art performance with 93.9% \nL28: success rate on WebVoyager, beating all other browser agents.\nL29:   * 【7†WebVoyager: Autonomous Web Agent Benchmark; publish_date: \nL30: none†www.emergentmind.com】 3 days ago — WebVoyager Benchmark is a comprehensive \nL31: evaluation suite for autonomous web agents, featuring 643 tasks across 15 \nL32: popular websites.\nL33:   * 【8†WebVoyager Benchmark Results; publish_date: none†www.browserable.ai】 \nL34: Browserable has achieved 90.4% on the WebVoyager benchmark. This is best-in-\nL35: class performance across all web agents. This was done across 567 web tasks \nL36: which ...\nL37:   * 【9†89% achieved on WebVoyager using Anchor + Browser Use; publish_date: \nL38: none†www.reddit.com】 Thanks to the amazing work from the browser-use open-source\nL39:  community and the built-in support from Anchor Browser, we've hit an 89% score \nL40: on WebVoyager.",
+            "tool_call_id": "fixture-tool-call-9-0"
+        },
+        {
+            "role": "tool",
+            "content": "[5] WebArena benchmark (Search_Results/WebArena benchmark)\n**viewing lines [0 - 42] of 42**\n\nL0: \nL1: URL: Search_Results/WebArena benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebArena: A Realistic Web Environment for Building ...; publish_date: \nL5: none†webarena.dev】 Our benchmark is implemented in our fully interactable \nL6: highly-realistic WebArena environment. It features diverse tasks human may \nL7: encounter in their daily ...\nL8:   * 【1†[2307.13854] WebArena: A Realistic Web Environment for ...; publish_date:\nL9:  none†arxiv.org】 by S Zhou · 2023 · Cited by 637 — Building upon our \nL10: environment, we release a set of benchmark tasks focusing on evaluating the \nL11: functional correctness of task completions.\nL12:   * 【2†WebArena: A Realistic Web Environment for Building ...; publish_date: \nL13: none†www.cmu.edu】 WebArena introduces a benchmark on interpreting high-level \nL14: realistic natural language command to concrete web-based interactions. We \nL15: provide annotated programs ...\nL16:   * 【3†GitHub - web-arena-x/webarena: Code repo for ...; publish_date: \nL17: none†github.com】 [12/20/2024] Check out our new benchmark on even more \nL18: consequential tasks, including terminal use and coding, TheAgentCompany. \nL19: [12/21/2023] We release the ...\nL20:   * 【4†WebArena Benchmark and the State of Agentic AI; publish_date: \nL21: none†medium.com】 In short, WebArena established a new standard for realism and \nL22: complexity in web agent evaluation, forcing AI agents to operate in dynamic, \nL23: high- ...\nL24:   * 【5†WebArena: A Realistic Web Environment for Building ...; publish_date: \nL25: none†huggingface.co】 Jul 25, 2023 — WebArena, a realistic and reproducible \nL26: environment, evaluates the performance of autonomous agents performing complex \nL27: tasks on websites using ...\nL28:   * 【6†WebArena Benchmark: Evaluating Web Agents; publish_date: \nL29: none†www.emergentmind.com】 Jun 30, 2025 — WebArena Benchmark is a self-contained\nL30:  suite that evaluates autonomous agents on realistic, multi-step web tasks using\nL31:  natural language ...\nL32:   * 【7†VisualWebArena is a benchmark for multimodal agents.; publish_date: \nL33: none†github.com】 VisualWebArena is a realistic and diverse benchmark for \nL34: evaluating multimodal autonomous language agents. It comprises of a set of \nL35: diverse and complex web-based ...\nL36:   * 【8†WebDev Arena Leaderboard - LMArena; publish_date: none†web.lmarena.ai】 \nL37: WebDev Arena is a real-time AI coding competition where models go head-to-head \nL38: in web development challenges, developed by LMArena.\nL39:   * 【9†WebArena: A Realistic Web Environment for Building ...; publish_date: \nL40: none†arxiv.org】 Apr 16, 2024 — We use this benchmark to evaluate several agents \nL41: that can follow NL command and perform web-based tasks (§4). These agents are \nL42: implemented in a ...",
+            "tool_call_id": "fixture-tool-call-9-1"
+        },
+        {
+            "role": "tool",
+            "content": "[6] GAIA benchmark (Search_Results/GAIA benchmark)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/GAIA benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†GAIA Leaderboard - a Hugging Face Space by ...; publish_date: \nL5: none†huggingface.co】 GAIA is a benchmark which aims at evaluating next-\nL6: generation LLMs (LLMs with augmented capabilities due to added tooling, \nL7: efficient prompting, access to search ...\nL8:   * 【1†[2311.12983] GAIA: a benchmark for General AI Assistants; publish_date: \nL9: none†arxiv.org】 by G Mialon · 2023 · Cited by 367 — GAIA proposes real-world \nL10: questions that require a set of fundamental abilities such as reasoning, multi-\nL11: modality handling, web browsing, and generally tool-use ...\nL12:   * 【2†GAIA benchmark; publish_date: none†huggingface.co】 This is the \nL13: organisation page for all things related to GAIA, a benchmark for General AI \nL14: Assistants. You can find all the information and links on the GAIA ...\nL15:   * 【3†GAIA: A Benchmark for General AI Assistants; publish_date: \nL16: none†ukgovernmentbeis.github.io】 This is an Inspect AI implementation of the \nL17: GAIA (General AI Assistants) benchmark, consisting of 450 questions testing tool\nL18:  use on realistic assistant tasks.\nL19:   * 【4†GAIA: a benchmark for general AI assistants | Research; publish_date: \nL20: none†ai.meta.com】 May 6, 2024 — GAIA proposes real-world questions that require \nL21: a set of fundamental abilities such as reasoning, multi-modality handling, web \nL22: browsing, and generally tool-use ...\nL23:   * 【5†HAL: GAIA Leaderboard; publish_date: none†hal.cs.princeton.edu】 GAIA is a\nL24:  benchmark for General AI Assistants that requires a set of fundamental \nL25: abilities such as reasoning, multi-modality handling, web browsing, and tool- \nL26: ...\nL27:   * 【6†GAIA: The LLM Agent Benchmark Everyone's Talking About; publish_date: \nL28: none†towardsdatascience.com】 May 29, 2025 — GAIA stands for General AI \nL29: Assistants benchmark [1]. This benchmark was introduced to specifically evaluate\nL30:  LLM agents on their ability to act as general- ...\nL31:   * 【7†GAIA: a benchmark for General AI Assistants; publish_date: \nL32: none†openreview.net】 by G Mialon · Cited by 367 — GAIA proposes real-world \nL33: questions that require a set of fundamental abilities such as reasoning, multi-\nL34: modality handling, web browsing, and generally tool-use ...\nL35:   * 【8†Rethinking AI Evaluation: Introducing the GAIA Benchmark; publish_date: \nL36: none†medium.com】 The authors introduce GAIA, a benchmark designed to assess the \nL37: robustness of AI systems across a variety of practical tasks.\nL38:   * 【9†H2O.ai Tops the General AI Assistant (GAIA) Test; publish_date: \nL39: none†h2o.ai】 Mar 17, 2025 — Our h2oGPTe Agent has once again claimed the #1 spot\nL40:  on the prestigious GAIA (General AI Assistants) benchmark with an impressive \nL41: 75% accuracy rate.",
+            "tool_call_id": "fixture-tool-call-9-2"
+        },
+        {
+            "role": "tool",
+            "content": "[7] ToolBench benchmark (Search_Results/ToolBench benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/ToolBench benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†ToolBench, an evaluation suite for LLM tool manipulation ...; \nL5: publish_date: none†github.com】 The ToolBench is a benchmark consisting of \nL6: diverse software tools for real-world tasks. We also provide easy-to-use \nL7: infrastructure in this repository.\nL8:   * 【1†OpenBMB/ToolBench; publish_date: none†github.com】 [2023/7/27] New version\nL9:  ToolBench is released. ✨Here is an overview of the dataset construction, \nL10: training, and evaluation. ✨✨Features:.\nL11:   * 【2†Towards Stable Large-Scale Benchmarking on Tool ...; publish_date: \nL12: none†arxiv.org】 by Z Guo · 2024 · Cited by 100 — We introduce StableToolBench, a\nL13:  benchmark evolving from ToolBench, proposing a virtual API server and stable \nL14: evaluation system.\nL15:   * 【3†StableToolBench - Zhicheng Guo; publish_date: none†zhichengg.github.io】 \nL16: We introduce StableToolBench, a benchmark evolving from ToolBench, proposing a \nL17: virtual API server and stable evaluation system.\nL18:   * 【4†ToolBench | EvalScope - Read the Docs; publish_date: \nL19: none†evalscope.readthedocs.io】 We evaluate the effectiveness of the ToolBench \nL20: benchmark: ToolBench (Qin et al., 2023b). The task involves integrating API \nL21: calls to complete tasks.\nL22:   * 【5†Towards Stable Large-Scale Benchmarking on Tool ...; publish_date: \nL23: none†aclanthology.org】 by Z Guo · 2024 · Cited by 100 — We introduce \nL24: StableToolBench, a benchmark evolving from ToolBench, proposing a virtual API \nL25: server and stable evaluation system.\nL26:   * 【6†ML-Tool-Bench: Tool-Augmented Planning for ML Tasks; publish_date: \nL27: none†openreview.net】 Sep 18, 2025 — In this work, we introduce a comprehensive \nL28: benchmark for evaluating tool-augmented ML agents using a curated set of 61 \nL29: specialized tools and 15 ...\nL30:   * 【7†-Bench: Benchmarking AI agents for the real-world; publish_date: \nL31: none†sierra.ai】 Jun 20, 2024 — τ-bench measures an agent's ability to interact \nL32: with (simulated) human users and programmatic APIs while following domain-\nL33: specific policies in a consistent ...\nL34:   * 【8†ToolEval Leaderboard; publish_date: none†openbmb.github.io】 ToolEval is \nL35: an automatic evaluator build for tool learning which incorporates two evaluation\nL36:  metrics, Pass Rate and Win Rate(Preference).\nL37:   * 【9†What is the best benchmark dataset for multi-step tool-use?; \nL38: publish_date: none†www.reddit.com】 I'm a newbie trying to evaluate the \nL39: performance of different prompts strategies for multi-step tool-using, wondering\nL40:  what is the recommended benchmark dataset ...",
+            "tool_call_id": "fixture-tool-call-9-3"
+        },
+        {
+            "role": "tool",
+            "content": "[8] HotpotQA benchmark (Search_Results/HotpotQA benchmark)\n**viewing lines [0 - 39] of 39**\n\nL0: \nL1: URL: Search_Results/HotpotQA benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†HotpotQA Homepage; publish_date: none†hotpotqa.github.io】 HotpotQA is a \nL5: question answering dataset featuring natural, multi-hop questions, with strong \nL6: supervision for supporting facts to enable more explainable ...See more\nL7:   * 【1†HotpotQA: A Dataset for Diverse, Explainable Multi-hop ...; publish_date:\nL8:  none†arxiv.org】 by Z Yang · 2018 · Cited by 3834 — HotpotQA is a dataset with \nL9: 113k Wikipedia-based question-answer pairs requiring multi-document reasoning, \nL10: diverse questions, sentence-level ...\nL11:   * 【2†hotpotqa/hotpot_qa · Datasets at Hugging Face; publish_date: \nL12: none†huggingface.co】 HotpotQA is a new dataset with 113k Wikipedia-based \nL13: question-answer pairs with four key features: (1) the questions require finding \nL14: and reasoning over multiple ...See more\nL15:   * 【3†Why You Should Stop Using HotpotQA for AI Agents ...; publish_date: \nL16: none†qipeng.me】 Jul 1, 2025 — HotpotQA pioneered a class of AI tasks that \nL17: requires the AI system to autonomously perform multiple steps of reasoning in an\nL18:  open-domain setting.See more\nL19:   * 【4†hotpotqa/hotpot; publish_date: none†github.com】 A dataset for diverse, \nL20: explainable multi-hop question answering. This repository contains the baseline \nL21: model code, as well as the entire pipeline of running ...See more\nL22:   * 【5†HotpotQA: Multi-Hop QA Benchmark; publish_date: \nL23: none†www.emergentmind.com】 Sep 10, 2025 — HotpotQA is a large-scale multi-hop \nL24: question answering benchmark featuring 112,779 Wikipedia-based Q&A pairs with \nL25: detailed, sentence-level ...See more\nL26:   * 【6†HotpotQA Dataset | Papers With Code; publish_date: \nL27: none†paperswithcode.com】 HotpotQA is a question answering dataset collected on \nL28: the English Wikipedia, containing about 113K crowd-sourced questions.See more\nL29:   * 【7†HotpotQA: A Dataset for Diverse, Explainable Multi-hop ...; publish_date:\nL30:  none†aclanthology.org】 by Z Yang · 2018 · Cited by 3834 — HotpotQA is a dataset\nL31:  with 113k Wikipedia-based question-answer pairs requiring multi-document \nL32: reasoning, diverse questions, sentence-level facts, and factoid ...\nL33:   * 【8†Benchmark BM25S: HotpotQA; publish_date: none†www.kaggle.com】 Explore and\nL34:  run machine learning code with Kaggle Notebooks | Using data from No attached \nL35: data sources.\nL36:   * 【9†mteb/hotpotqa · Datasets at Hugging Face; publish_date: \nL37: none†huggingface.co】 HotpotQA is a question answering dataset featuring natural,\nL38:  multi-hop questions, with strong supervision for supporting facts to enable \nL39: more explainable ...See more",
+            "tool_call_id": "fixture-tool-call-9-4"
+        },
+        {
+            "role": "tool",
+            "content": "[9] FEVER benchmark (Search_Results/FEVER benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/FEVER benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†Fever.ai; publish_date: none†fever.ai】 We are pleased to announce that \nL5: FEVER9 will be co-located with EACL 2026. In this year's workshop, we will \nL6: introduce a new shared task focused on automated fact ...\nL7:   * 【1†a Large-scale Dataset for Fact Extraction and VERification; publish_date:\nL8:  none†aclanthology.org】 by J Thorne · 2018 · Cited by 2315 — In this paper we \nL9: introduce a new publicly available dataset for verification against textual \nL10: sources, FEVER: Fact Extraction.\nL11:   * 【2†awslabs/fever: FEVER (Fact Extraction and VERification) ...; \nL12: publish_date: none†github.com】 In this paper we introduce a new publicly \nL13: available dataset for verification against textual sources, FEVER: Fact \nL14: Extraction and VERification.\nL15:   * 【3†FEVER: Fact Extraction and VERification; publish_date: \nL16: none†www.amazon.science】 The best accuracy we achieve on labeling a claim \nL17: accompanied by the correct evidence is 31.87%, while if we ignore the evidence \nL18: we achieve 50.91%. Thus we ...\nL19:   * 【4†FEVER Dataset; publish_date: none†fever.ai】 FEVER (Fact Extraction and \nL20: VERification) consists of 185,445 claims generated by altering sentences \nL21: extracted from Wikipedia and subsequently verified ...\nL22:   * 【5†mteb/fever · Datasets at Hugging Face; publish_date: none†huggingface.co】\nL23:  FEVER. An MTEB dataset. Massive Text Embedding Benchmark. FEVER (Fact \nL24: Extraction and VERification) consists of 185,445 claims generated by altering \nL25: sentences ...\nL26:   * 【6†FEVEROUS: Fact Extraction and VERification Over ...; publish_date: \nL27: none†datasets-benchmarks-proceedings.neurips.cc】 by R Aly · Cited by 359 — In \nL28: this paper we introduce a novel dataset and benchmark, Fact Extraction and \nL29: VERification Over. Unstructured and Structured information (FEVEROUS), which ...\nL30:   * 【7†a large-scale dataset for Fact Extraction and VERification; publish_date:\nL31:  none†arxiv.org】 by J Thorne · 2018 · Cited by 2315 — In this paper we introduce\nL32:  a new publicly available dataset for verification against textual sources, \nL33: FEVER: Fact Extraction and VERification.\nL34:   * 【8†FEVER Resources; publish_date: none†fever.ai】 2018 FEVER: a large-scale \nL35: dataset for Fact Extraction and VERification .bib James Thorne, Andreas Vlachos,\nL36:  Christos Christodoulopoulos, Arpit Mittal\nL37:   * 【9†a Large-scale Dataset for Fact Extraction and VERification; publish_date:\nL38:  none†www.semanticscholar.org】 This paper introduces a new publicly available \nL39: dataset for verification against textual sources, FEVER, which consists of \nL40: 185,445 claims generated by ...",
+            "tool_call_id": "fixture-tool-call-9-5"
+        },
+        {
+            "role": "tool",
+            "content": "[10] TriviaQA benchmark (Search_Results/TriviaQA benchmark)\n**viewing lines [0 - 35] of 35**\n\nL0: \nL1: URL: Search_Results/TriviaQA benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†TriviaQA; publish_date: none†nlp.cs.washington.edu】 TriviaQA is a reading\nL5:  comprehension dataset containing over 650K question-answer-evidence triples. \nL6: TriviaQA includes 95K question-answer pairs authored ...\nL7:   * 【1†TriviaQA: A Large Scale Distantly Supervised Challenge ...; publish_date:\nL8:  none†aclanthology.org】 by M Joshi · 2017 · Cited by 3451 — We present TriviaQA,\nL9:  a challenging reading comprehension dataset containing over 650K question-\nL10: answer-evidence triples. TriviaQA includes 95K question ...\nL11:   * 【2†mandarjoshi/trivia_qa · Datasets at Hugging Face; publish_date: \nL12: none†huggingface.co】 TriviaqQA is a reading comprehension dataset containing \nL13: over 650K question-answer-evidence triples. TriviaqQA includes 95K question-\nL14: answer pairs authored by ...\nL15:   * 【3†[1705.03551] TriviaQA: A Large Scale Distantly Supervised ...; \nL16: publish_date: none†arxiv.org】 by M Joshi · 2017 · Cited by 3451 — We present \nL17: TriviaQA, a challenging reading comprehension dataset containing over 650K \nL18: question-answer-evidence triples.\nL19:   * 【4†TriviaQA; publish_date: none†epoch.ai】 An open-domain question answering \nL20: benchmark with challenging trivia questions paired with evidence documents.\nL21:   * 【5†TriviaQA Leaderboard; publish_date: none†llm-stats.com】 What is the \nL22: TriviaQA benchmark? A large-scale reading comprehension dataset containing over \nL23: 650K question-answer-evidence triples. TriviaQA includes 95K ...\nL24:   * 【6†Code for the TriviaQA reading comprehension dataset; publish_date: \nL25: none†github.com】 A large scale distantly supervised challenge dataset for \nL26: reading comprehension. In Association for Computational Linguistics (ACL) 2017, \nL27: Vancouver, Canada.\nL28:   * 【7†TriviaQA - Model Benchmarks - The Regularizer; publish_date: \nL29: none†www.theregularizer.com】 May 4, 2025 — Compare the performance of different \nL30: AI models across standardized benchmarks. Higher scores generally indicate \nL31: better performance, but context ...\nL32:   * 【8†TriviaQA: A Large Scale Distantly Supervised Challenge ...; publish_date:\nL33:  none†www.cs.utexas.edu】 by M Joshi · Cited by 3445 — We present TriviaQA, a \nL34: challenging reading comprehension dataset contain- ing over 650K question-\nL35: answer-evidence triples. TriviaQA includes 95K question-.",
+            "tool_call_id": "fixture-tool-call-9-6"
+        },
+        {
+            "role": "tool",
+            "content": "[11] Natural Questions benchmark (Search_Results/Natural Questions benchmark)\n**viewing lines [0 - 39] of 39**\n\nL0: \nL1: URL: Search_Results/Natural Questions benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†Natural Questions: a Benchmark for Question Answering ...; publish_date: \nL5: none†research.google】 by T Kwiatkowski · Cited by 4339 — We present the Natural \nL6: Questions corpus, a question answering dataset. Questions consist of real \nL7: anonymized, aggregated queries issued to the Google search ...\nL8:   * 【1†Natural Questions: A Benchmark for Question Answering ...; publish_date: \nL9: none†aclanthology.org】 by T Kwiatkowski · Cited by 4308 — Abstract. We present \nL10: the Natural Questions corpus, a question answering data set. Questions consist \nL11: of real anonymized, aggregated queries issued.\nL12:   * 【2†Google's Natural Questions; publish_date: none†ai.google.com】 Natural \nL13: Questions. A Benchmark for Question Answering Research. View examples · Download\nL14:  dataset. Open Domain Question Answering. A core goal in artificial ...\nL15:   * 【3†google-research-datasets/natural-questions; publish_date: \nL16: none†github.com】 Natural Questions (NQ) contains real user questions issued to \nL17: Google search, and answers found from Wikipedia by annotators. NQ is designed \nL18: for the training and ...\nL19:   * 【4†Natural Questions: A Benchmark for Question Answering ...; publish_date: \nL20: none†direct.mit.edu】 Aug 1, 2019 — We present the Natural Questions corpus, a \nL21: question answering data set. Questions consist of real anonymized, aggregated \nL22: queries issued to the Google search ...\nL23:   * 【5†ir_datasets : Natural Questions; publish_date: none†ir-datasets.com】 \nL24: Google Natural Questions is a Q&A dataset containing long, short, and Yes/No \nL25: answers from Wikipedia. ir_datasets frames this around an ad-hoc ranking setting\nL26:  ...\nL27:   * 【6†sentence-transformers/natural-questions · Datasets at ...; publish_date: \nL28: none†huggingface.co】 This dataset is a collection of question-answer pairs from \nL29: the Natural Questions dataset. See Natural Questions for additional information.\nL30:   * 【7†Google's Natural Questions; publish_date: none†ai.google.com】 Natural \nL31: Questions contains 307K training examples, 8K examples for development, and a \nL32: further 8K examples for testing. In the paper, we demonstrate a human ...\nL33:   * 【8†A Benchmark for Question Answering Research; publish_date: \nL34: none†www.researchgate.net】 Jul 27, 2025 — We present the Natural Questions \nL35: corpus, a question answering data set. Questions consist of real anonymized, \nL36: aggregated queries issued to the Google search ...\nL37:   * 【9†natural-questions; publish_date: none†docs.unity.rc.umass.edu】 Sep 4, \nL38: 2025 — “Natural questions: a benchmark for question answering research.” \nL39: Transactions of the Association for Computational Linguistics 7 (2019): ...",
+            "tool_call_id": "fixture-tool-call-9-7"
+        },
+        {
+            "role": "tool",
+            "content": "[12] MS MARCO benchmark (Search_Results/MS MARCO benchmark)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/MS MARCO benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†MS MARCO - Microsoft Open Source; publish_date: none†microsoft.github.io】\nL5:  The MS MARCO datasets are intended for non-commercial research purposes only to\nL6:  promote advancement in the field of artificial intelligence and related areas, \nL7: ...\nL8:   * 【1†microsoft/ms_marco · Datasets at Hugging Face; publish_date: \nL9: none†huggingface.co】 Starting with a paper released at NIPS 2016, MS MARCO is a \nL10: collection of datasets focused on deep learning in search. The first dataset was\nL11:  a question ...\nL12:   * 【2†Benchmarking Ranking Models in the Large-Data Regime; publish_date: \nL13: none†arxiv.org】 by N Craswell · 2021 · Cited by 89 — This paper uses the MS \nL14: MARCO and TREC Deep Learning Track as our case study, comparing it to the case \nL15: of TREC ad hoc ranking in the 1990s.\nL16:   * 【3†Benchmarking Ranking Models in the Large-Data Regime; publish_date: \nL17: none†www.microsoft.com】 This paper uses the MS MARCO and TREC Deep Learning \nL18: Track as our case study, comparing it to the case of TREC ad hoc ranking in the \nL19: 1990s. We show how the ...\nL20:   * 【4†Datasets for Document and Passage Ranking Leadboards; publish_date: \nL21: none†microsoft.github.io】 The MS MARCO document and passage ranking leaderboards\nL22:  complements the TREC Deep Learning Track by providing on-going evaluation of \nL23: submissions using pre- ...\nL24:   * 【5†MS MARCO: Benchmarking Ranking Models in the Large- ...; publish_date: \nL25: none†dl.acm.org】 Jul 11, 2021 — This paper uses the MS MARCO and TREC Deep \nL26: Learning Track as our case study, comparing it to the case of TREC ad hoc \nL27: ranking in the 1990s.\nL28:   * 【6†ir_datasets : MSMARCO (passage); publish_date: none†ir-datasets.com】 A \nL29: passage ranking benchmark with a collection of 8.8 million passages and question\nL30:  queries. Most relevance judgments are shallow.\nL31:   * 【7†MS MARCO; publish_date: none†sbert.net】 MS MARCO Passage Ranking is a \nL32: large dataset to train models for information retrieval. It consists of about \nL33: 500k real search queries from Bing search engine ...\nL34:   * 【8†MS MARCO: A Human Generated MAchine Reading ...; publish_date: \nL35: none†arxiv.org】 by P Bajaj · 2016 · Cited by 1151 — We introduce a large scale \nL36: MAchine Reading COmprehension dataset, which we name MS MARCO. The dataset \nL37: comprises of 1,010,916 anonymized ...\nL38:   * 【9†MS MARCO Web Search: A Large-scale Information-rich ...; publish_date: \nL39: none†www.microsoft.com】 May 13, 2024 — MS MARCO Web Search offers a retrieval \nL40: benchmark with three web retrieval challenge tasks that demands innovations in \nL41: both machine learning and ...",
+            "tool_call_id": "fixture-tool-call-9-8"
+        },
+        {
+            "role": "tool",
+            "content": "[13] BEIR benchmark (Search_Results/BEIR benchmark)\n**viewing lines [0 - 37] of 37**\n\nL0: \nL1: URL: Search_Results/BEIR benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†详细介绍文本检索基准BEIR: A Heterogeneous Benchmark ...; publish_date: \nL5: none†blog.csdn.net】 2023年1月1日 — \nL6: BEIR旨在为所有不同的检索任务提供一个一站式的零样本评估基准。为了构建一个全面的评估基准，选择方法对于收集具有理想属性的任务和数据集至关重要。对于 ...\nL7:   * 【1†beir-cellar/beir; publish_date: none†github.com】 BEIR is a heterogeneous \nL8: benchmark containing diverse IR tasks. It also provides a common and easy \nL9: framework for evaluation of your NLP-based retrieval models ...\nL10:   * 【2†BEIR: A Heterogenous Benchmark for Zero-shot Evaluation ...; \nL11: publish_date: none†arxiv.org】 作者：N Thakur · 2021 · 被引用次数：1480 — We introduce \nL12: Benchmarking-IR (BEIR), a robust and heterogeneous evaluation benchmark for \nL13: information retrieval.\nL14:   * 【3†BeIR; publish_date: none†huggingface.co】 BEIR (Benchmarking IR) consists \nL15: of a homogenous benchmark for diverse sentence or passage level IR tasks. It \nL16: provides a common and easy framework for the cross ...\nL17:   * 【4†论文分享：BEIR A Heterogeneous Benchmark for Zero-shot ...; publish_date: \nL18: none†zhuanlan.zhihu.com】 2022年10月3日 — 分享论文，夹带个人理解的分享，建议结合原论文看。 1 研究背景. \nL19: 本论文主要关注的领域是query-document检索（下文简称qd检索），即根据query去文档库里 ...\nL20:   * 【5†Benchmarking IR Information Retrieval (BEIR); publish_date: \nL21: none†zilliz.com】 BEIR is a benchmark designed for evaluating the versatility and\nL22:  robustness of information retrieval models. It features 18 diverse datasets \nL23: from domains like ...\nL24:   * 【6†BEIR (Benchmarking IR) - OpenDataLab; publish_date: none†opendatalab.com】\nL25:  简介-Introduction. BEIR（Benchmarking \nL26: IR）是包含不同信息检索（IR）任务的异构基准。通过BEIR，可以系统地研究多种神经检索方法的零样本泛化能力。\nL27:   * 【7†What is the BEIR benchmark and how is it used?; publish_date: \nL28: none†milvus.io】 The BEIR (Benchmarking Information Retrieval) benchmark is a \nL29: standardized framework designed to evaluate the effectiveness of search and \nL30: retrieval algorithms.\nL31:   * 【8†BEIR Benchmark数据集卡片; publish_date: none†www.atyun.com】 BEIR \nL32: Benchmark数据集卡片. 数据集简介. BEIR是一个异构评测基准，由18个多样化的数据集构建而成，代表了9个信息检索任务：. 事实查证： FEVER ，\nL33:  Climate-FEVER ， SciFact ...\nL34:   * 【9†Evaluating search relevance part 1 - The BEIR benchmark; publish_date: \nL35: none†www.elastic.co】 2024年7月16日 — Learn to evaluate your search system in the \nL36: context of better understanding the BEIR benchmark, with tips & techniques to \nL37: improve your ...",
+            "tool_call_id": "fixture-tool-call-9-9"
+        },
+        {
+            "role": "tool",
+            "content": "[14] MIRACL benchmark (Search_Results/MIRACL benchmark)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/MIRACL benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†MIRACL | Multilingual Information Retrieval Across a ...; publish_date: \nL5: none†project-miracl.github.io】 MIRACL (Multilingual Information Retrieval Across\nL6:  a Continuum of Languages) is an WSDM 2023 Cup challenge that focuses on search \nL7: across 18 different ...\nL8:   * 【1†project-miracl/miracl: A large-scale multilingual dataset for ...; \nL9: publish_date: none†github.com】 A large-scale multilingual dataset for \nL10: Information Retrieval. Thorough human-annotations across 18 diverse languages.\nL11:   * 【2†A Large, multilingual, visual document retrieval benchmark; publish_date:\nL12:  none†arxiv.org】 by R Osmulski · 2025 · Cited by 2 — MIRACL-VISION is a \nL13: challenging, representative, multilingual evaluation benchmark for visual \nL14: retrieval pipelines and will help the community build robust ...\nL15:   * 【3†miracl/miracl · Datasets at Hugging Face; publish_date: \nL16: none†huggingface.co】 MIRACL (Multilingual Information Retrieval Across a \nL17: Continuum of Languages) is a multilingual retrieval dataset that focuses on \nL18: search across 18 different ...\nL19:   * 【4†MIRACL: A Multilingual Retrieval Dataset Covering 18 ...; publish_date: \nL20: none†direct.mit.edu】 by X Zhang · 2023 · Cited by 131 — MIRACL is a multilingual\nL21:  dataset for ad hoc retrieval across 18 languages that collectively encompass \nL22: over three billion native speakers around the world.\nL23:   * 【5†(PDF) MIRACL-VISION: A Large, multilingual, visual ...; publish_date: \nL24: none†www.researchgate.net】 May 23, 2025 — MIRACL-VISION covers 18 languages, and\nL25:  is an extension of the MIRACL dataset, a popular benchmark to evaluate text-\nL26: based multilingual retrieval ...\nL27:   * 【6†A Large, multilingual, visual document retrieval benchmark; publish_date:\nL28:  none†arxiv.org】 by R Osmulski · 2025 · Cited by 2 — MIRACL-VISION is a \nL29: challenging, representative, multilingual evaluation benchmark for visual \nL30: retrieval pipelines and will help the community ...\nL31:   * 【7†ir_datasets : MIRACL; publish_date: none†ir-datasets.com】 \nL32: \"miracl/ar/test-a\". The held-out test set (version a) for Arabic. \nL33: queriesdocsCitationMetadata. 936 queries. Language: ar. Query type: \nL34: GenericQuery: (namedtuple).\nL35:   * 【8†Evaluate on MIRACL — BGE documentation; publish_date: none†bge-model.com】\nL36:  MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is \nL37: an WSDM 2023 Cup challenge that focuses on search across 18 different languages.\nL38:   * 【9†MIRACL - Alpha's Tech Garden; publish_date: \nL39: none†techgarden.alphasmanifesto.com】 MIRACL (Multilingual Information Retrieval \nL40: Across a Continuum of Languages) is a multilingual dataset we have built for the\nL41:  WSDM 2023 Cup ...",
+            "tool_call_id": "fixture-tool-call-9-10"
+        },
+        {
+            "role": "tool",
+            "content": "[15] Zero-shot IR benchmark (Search_Results/Zero-shot IR benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/Zero-shot IR benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†BEIR: A Heterogenous Benchmark for Zero-shot Evaluation ...; \nL5: publish_date: none†arxiv.org】 by N Thakur · 2021 · Cited by 1480 — We introduce \nL6: Benchmarking-IR (BEIR), a robust and heterogeneous evaluation benchmark for \nL7: information retrieval.See more\nL8:   * 【1†beir-cellar/beir; publish_date: none†github.com】 BEIR: A Heterogenous \nL9: Benchmark for Zero-shot Evaluation of Information Retrieval Models (NeurIPS \nL10: 2021, Datasets and Benchmarks Track); Resources for Brewing ...See more\nL11:   * 【2†Benchmarking IR Information Retrieval (BEIR); publish_date: \nL12: none†zilliz.com】 BEIR is a tool to evaluate how well Information Retrieval \nL13: systems perform across many tasks and types of information, and is a standard \nL14: benchmark.\nL15:   * 【3†BEIR: A Heterogeneous Benchmark for Zero-shot ...; publish_date: \nL16: none†datasets-benchmarks-proceedings.neurips.cc】 by N Thakur · Cited by 1480 — \nL17: BEIR is a robust, heterogeneous benchmark for information retrieval, using 18 \nL18: datasets and 9 tasks to evaluate model generalization.\nL19:   * 【4†BEIR; publish_date: none†eval.ai】 BEIR is a heterogeneous zero-shot \nL20: retrieval benchmark containing 18 datasets from diverse text retrieval tasks and\nL21:  domains.See more\nL22:   * 【5†[2409.15763] IRSC: A Zero-shot Evaluation Benchmark for ...; \nL23: publish_date: none†arxiv.org】 by H Lin · 2024 · Cited by 2 — This paper \nL24: introduces the IRSC benchmark for evaluating the performance of embedding models\nL25:  in multilingual RAG tasks.See more\nL26:   * 【6†FactIR: A Real-World Zero-shot Open-Domain Retrieval ...; publish_date: \nL27: none†dl.acm.org】 May 23, 2025 — In this paper, we present a real-world retrieval\nL28:  benchmark FactIR, derived from Factiverse production logs, enhanced with human \nL29: annotations. We ...See more\nL30:   * 【7†UniIR: Training and Benchmarking Universal Multimodal ...; publish_date: \nL31: none†tiger-ai-lab.github.io】 At test time, we evaluated the zero-shot \nL32: performance of all fine-tuned models, as well as SoTA pre-trained retrievers on \nL33: the three held-out datasets. UniIR ...See more\nL34:   * 【8†Zero-Shot BEIR Tasks; publish_date: none†www.emergentmind.com】 Aug 26, \nL35: 2025 — Zero-Shot BEIR Tasks are evaluation methodologies that assess IR models' \nL36: ability to generalize to unseen query domains without task-specific ...See more\nL37:   * 【9†BEIR-PL: Zero Shot Information Retrieval Benchmark for ...; publish_date:\nL38:  none†aclanthology.org】 by K Wojtasik · 2024 · Cited by 12 — BEIR-PL is a new \nL39: benchmark with 13 datasets for Polish Information Retrieval, created to advance \nL40: research in this area.",
+            "tool_call_id": "fixture-tool-call-9-11"
+        },
+        {
+            "role": "tool",
+            "content": "[16] WebGPT benchmark (Search_Results/WebGPT benchmark)\n**viewing lines [0 - 38] of 38**\n\nL0: \nL1: URL: Search_Results/WebGPT benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebGPT: Improving the factual accuracy of language ...; publish_date: \nL5: none†openai.com】 Dec 16, 2021 — Our models outperform GPT‑3 on TruthfulQA and \nL6: exhibit more favourable scaling properties. However, our models lag behind human\nL7:  performance, ...\nL8:   * 【1†A Simple Yet Challenging Benchmark for Browsing Agents; publish_date: \nL9: none†arxiv.org】 by J Wei · 2025 · Cited by 124 — Abstract. We present \nL10: BrowseComp, a simple yet challenging benchmark for measuring the ability for \nL11: agents to browse the web.\nL12:   * 【2†openai/webgpt_comparisons · Datasets at Hugging Face; publish_date: \nL13: none†huggingface.co】 This is the dataset of all comparisons that were marked as \nL14: suitable for reward modeling by the end of the WebGPT project. There are 19,578 \nL15: comparisons in total.\nL16:   * 【3†Evaluation & Limitations of WebGPT, WebVoyager & Agent-E; publish_date: \nL17: none†deepsense.ai】 Oct 14, 2024 — WebArena benchmark features 812 tasks \nL18: evaluated using metrics such as Exact Match, Must Include, and Fuzzy Match, \nL19: focusing on outcomes rather ...\nL20:   * 【4†OpenAI Announces Question-Answering AI WebGPT; publish_date: \nL21: none†www.infoq.com】 Jan 25, 2022 — On the TriviaQA benchmark, WebGPT \nL22: outperformed GPT-3, producing answers that were true 75% of the time, and \"both \nL23: true and informative\" 54% of ...\nL24:   * 【5†WebGPT: Improving the factual accuracy of language models ...; \nL25: publish_date: none†kargarisaac.medium.com】 The top-performing model generated \nL26: answers that were preferred over 56% of the time compared to answers produced by\nL27:  human demonstrators, with ...\nL28:   * 【6†Browser-assisted question-answering with human feedback; publish_date: \nL29: none†www.alphaxiv.org】 WebGPT represents a significant advancement in long-form \nL30: question answering by combining the language generation capabilities of GPT-3 \nL31: with real-time web ...\nL32:   * 【7†Benchmarking Open-Source Large Language Models, GPT-4 ...; publish_date: \nL33: none†ai.nejm.org】 by S Wu · 2024 · Cited by 69 — We show that the current widely\nL34:  used open-source LLMs have poor zero-shot reasoning ability in nephrology \nL35: compared with GPT-4 and Claude 2.\nL36:   * 【8†0hq/WebGPT: Run GPT model on ...; publish_date: none†github.com】 WebGPT \nL37: is a vanilla JS and HTML implementation of a transformer model, intended as a \nL38: proof-of-concept as well as educational resource.",
+            "tool_call_id": "fixture-tool-call-9-12"
+        },
+        {
+            "role": "tool",
+            "content": "[17] WebShop benchmark (Search_Results/WebShop benchmark)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/WebShop benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: \nL5: none†arxiv.org】 by S Yao · 2022 · Cited by 710 — To bridge this gap, we develop \nL6: WebShop -- a simulated e-commerce website environment with 1.18 million real-\nL7: world products and 12,087 crowd- ...\nL8:   * 【1†WebShop; publish_date: none†webshop-pnlp.github.io】 To bridge this gap, \nL9: we develop WebShop – a simulated e-commerce website environment with 1.18 \nL10: million real-world products and 12,087 crowd-sourced text ...\nL11:   * 【2†princeton-nlp/WebShop; publish_date: none†github.com】 WebShop is a \nL12: simulated e-commerce website environment with 1.18 million real-world products \nL13: and 12,087 crowd-sourced text instructions. In this environment, an ...\nL14:   * 【3†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: \nL15: none†papers.nips.cc】 by S Yao · 2022 · Cited by 710 — We collect over 1,600 \nL16: human trajectories to first validate the benchmark, then train and evaluate a \nL17: diverse range of agents using reinforcement learning, ...\nL18:   * 【4†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: \nL19: none†proceedings.neurips.cc】 by S Yao · 2022 · Cited by 709 — We have developed \nL20: WebShop, a new web-based benchmark for sequential decision making and language \nL21: grounding, modeled on interaction with an e-commerce website.\nL22:   * 【5†Webshop & Benchmark Analysis | Documentation Infinity; publish_date: \nL23: none†docs.fact-finder.com】 Aug 15, 2025 — Evaluation of your shop based on \nL24: different categories in comparison, to your competitors/industry. Recommended \nL25: when doing a shop relaunch.\nL26:   * 【6†A Multi-Shop Benchmark for Evaluating Web Agents; publish_date: \nL27: none†arxiv.org】 by R Peeters · 2025 · Cited by 2 — Compared to existing \nL28: e-commerce benchmarks, such as WebShop or ShoppingBench, WebMall introduces \nL29: comparison-shopping tasks across multiple shops ...\nL30:   * 【7†WebShop: towards scalable real-world web interaction with ...; \nL31: publish_date: none†dl.acm.org】 by S Yao · 2022 · Cited by 710 — To bridge this \nL32: gap, we develop WebShop - a simulated e-commerce website environment with 1.18 \nL33: million real-world products and 12, 087 crowd- ...\nL34:   * 【8†[PDF] WebShop: Towards Scalable Real-World Web ...; publish_date: \nL35: none†www.semanticscholar.org】 It is shown that agents trained on WebShop exhibit\nL36:  non-trivial sim-to-real transfer when evaluated on amazon.com and ebay.com, \nL37: indicating the potential ...\nL38:   * 【9†X-WebAgentBench: A Multilingual Interactive Web ...; publish_date: \nL39: none†aclanthology.org】 by P Wang · 2025 · Cited by 3 — (2023) based on the \nL40: English WebShop benchmark (Yao et al., 2022), while the multilingual task scores\nL41:  are ob- tained through evaluation on our own benchmark.",
+            "tool_call_id": "fixture-tool-call-9-13"
+        },
+        {
+            "role": "tool",
+            "content": "[18] ALFWorld benchmark (Search_Results/ALFWorld benchmark)\n**viewing lines [0 - 31] of 31**\n\nL0: \nL1: URL: Search_Results/ALFWorld benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†ALFWorld; publish_date: none†alfworld.github.io】 ALFWorld contains \nL5: interactive TextWorld environments (Côté et. al) that parallel embodied worlds \nL6: in the ALFRED dataset (Shridhar et. al).\nL7:   * 【1†ALFWorld: Aligning Text and Embodied Environments for ...; publish_date: \nL8: none†arxiv.org】 by M Shridhar · 2020 · Cited by 674 — ALFWorld enables the \nL9: creation of a new BUTLER agent whose abstract knowledge, learned in TextWorld, \nL10: corresponds directly to concrete, visually grounded actions.\nL11:   * 【2†ALFWorld: Aligning Text and Embodied Environments ...; publish_date: \nL12: none†github.com】 ALFWorld contains interactive TextWorld environments (Côté et. \nL13: al) that parallel embodied worlds in the ALFRED dataset (Shridhar et. al).\nL14:   * 【3†alfworld - benchmark's activity; publish_date: none†huggingface.co】 MM-\nL15: IQ: Benchmarking Human-Like Abstraction and Reasoning in Multimodal Models Paper\nL16:  • 2502.00698 • Published Feb 1 • 24\nL17:   * 【4†Tackling AlfWorld with Action Attention and Common ...; publish_date: \nL18: none†neurips.cc】 On the Alfworld benchmark for indoor instruction following, we \nL19: achieve a significantly higher success rate (50% over the baseline) with our \nL20: novel object ...\nL21:   * 【5†ALFWORLD: ALIGNING TEXT AND EMBODIED ...; publish_date: \nL22: none†openreview.net】 by M Shridhar · Cited by 674 — The ALFRED dataset (Shridhar\nL23:  et al., 2020), set in the THOR simulator (Kolve et al., 2017), is a benchmark \nL24: for learning to com- plete embodied household tasks ...\nL25:   * 【6†AlfWorld; publish_date: none†primo.ai】 Mar 23, 2024 — A simulator that \nL26: enables agents to learn abstract, text based policies in TextWorld (Côté et al.,\nL27:  2018) and then execute goals from the ALFRED benchmark.\nL28:   * 【7†AlfWorld performance across 134 tasks showing cumulative...; \nL29: publish_date: none†www.researchgate.net】 In the AlfWorld benchmark, we defined \nL30: hallucination as the occurrence of two or more consecutive identical actions in \nL31: which the environment responded with ...",
+            "tool_call_id": "fixture-tool-call-9-14"
+        },
+        {
+            "role": "tool",
+            "content": "[19] Mind2Web benchmark (Search_Results/Mind2Web benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/Mind2Web benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†Mind2Web: Towards a Generalist Agent for the Web; publish_date: none†osu-\nL5: nlp-group.github.io】 Mind2Web is a dataset for developing and evaluating \nL6: generalist agents for the web that can follow language instructions to complete \nL7: complex tasks on any ...\nL8:   * 【1†Online-Mind2Web Leaderboard; publish_date: none†huggingface.co】 Online-\nL9: Mind2Web is a benchmark designed to evaluate the real-world performance of web \nL10: agents on live websites, featuring 300 tasks across 136 popular sites ...\nL11:   * 【2†Mind2Web: Towards a Generalist Agent for the Web; publish_date: \nL12: none†github.com】 Mind2Web is the first dataset for developing and evaluating \nL13: generalist agents for the web that can follow language instructions to complete \nL14: complex tasks on any ...\nL15:   * 【3†HAL: Online Mind2Web Leaderboard; publish_date: \nL16: none†hal.cs.princeton.edu】 Online Mind2Web leaderboard for evaluating AI agents'\nL17:  ability to complete tasks on real, changing webpages.\nL18:   * 【4†[2506.21506] Mind2Web 2: Evaluating Agentic Search with ...; \nL19: publish_date: none†arxiv.org】 by B Gou · 2025 · Cited by 11 — In this paper, we \nL20: introduce Mind2Web 2, a benchmark of 130 realistic, high-quality, and long-\nL21: horizon tasks that require real-time web browsing and extensive ...\nL22:   * 【5†Mind2Web 2: Evaluating Agentic Search with Agent-as-a-Judge; \nL23: publish_date: none†osu-nlp-group.github.io】 We introduce Mind2Web 2, a benchmark\nL24:  of 130 realistic, high-quality, long-horizon tasks that require real-time web \nL25: browsing and extensive information ...\nL26:   * 【6†Mind2Web: The Benchmark for AI Agent Evaluation and ...; publish_date: \nL27: none†www.enhans.ai】 Sep 26, 2025 — Mind2Web is a globally recognized web-based \nL28: AI Agent evaluation benchmark introduced by the NLP group at Ohio State \nL29: University at NeurIPS 2023.\nL30:   * 【7†Evaluating AI Web Agents: Insights from the WebCanvas ...; publish_date: \nL31: none†medium.com】 Thanks to the comprehensive WebCanvas Benchmark, which \nL32: incorporates a robust Mind2Web-Live data set of 542 live web tasks and 2,439 ...\nL33:   * 【8†Mind2Web: Towards a Generalist Agent for the Web; publish_date: \nL34: none†proceedings.neurips.cc】 by X Deng · 2023 · Cited by 760 — We introduce \nL35: Mind2Web, the first dataset for developing and evaluating generalist agents for \nL36: the web that can follow language instructions to complete complex ...\nL37:   * 【9†Mind2Web: Towards a Generalist Agent for the Web; publish_date: \nL38: none†openreview.net】 by X Deng · Cited by 760 — We introduce Mind2Web, the first\nL39:  dataset for developing and evaluating generalist agents for the web that can \nL40: follow language instructions to complete complex ...",
+            "tool_call_id": "fixture-tool-call-9-15"
+        },
+        {
+            "role": "tool",
+            "content": "[20] VisualWebArena benchmark (Search_Results/VisualWebArena benchmark)\n**viewing lines [0 - 38] of 38**\n\nL0: \nL1: URL: Search_Results/VisualWebArena benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†VisualWebArena is a benchmark for multimodal agents.; publish_date: \nL5: none†github.com】 VisualWebArena is a realistic and diverse benchmark for \nL6: evaluating multimodal autonomous language agents.\nL7:   * 【1†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:\nL8:  none†arxiv.org】 by JY Koh · 2024 · Cited by 363 — To bridge this gap, we \nL9: introduce VisualWebArena, a benchmark designed to assess the performance of \nL10: multimodal web agents on realistic \\textit{ ...\nL11:   * 【2†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:\nL12:  none†jykoh.com】 To bridge this gap, we introduce VisualWebArena, a benchmark \nL13: designed to assess the performance of multimodal web agents on realistic \nL14: visually grounded tasks.\nL15:   * 【3†VisualWebArena: Evaluating Multimodal Agents on ...; publish_date: \nL16: none†arxiv.org】 VisualWebArena is a research benchmark to measure and evaluate \nL17: the progress of multimodal agents. It is primarily meant to act as a self-\nL18: contained sandbox ...\nL19:   * 【4†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:\nL20:  none†aclanthology.org】 by JY Koh · 2024 · Cited by 363 — To bridge this gap, we\nL21:  introduce VisualWebArena, a benchmark designed to assess the performance of \nL22: multimodal web agents on *realistic visually grounded tasks*.\nL23:   * 【5†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:\nL24:  none†www.semanticscholar.org】 VisualWebArena: Evaluating Multimodal Agents on \nL25: Realistic Visual Web Tasks ... MMInA, a multihop and multimodal benchmark to \nL26: evaluate the embodied agents ...\nL27:   * 【6†CMU Researchers Introduce VisualWebArena: An AI ...; publish_date: \nL28: none†www.marktechpost.com】 Feb 9, 2024 — VisualWebArena, a benchmark designed \nL29: and developed to evaluate the performance of multimodal web agents on realistic \nL30: and visually stimulating challenges.\nL31:   * 【7†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:\nL32:  none†www.themoonlight.io】 The paper \"VisualWebArena: Evaluating Multimodal \nL33: Agents on Realistic Visually Grounded Web Tasks\" introduces a new benchmark, \nL34: **VisualWebArena**, ...\nL35:   * 【8†WebArena: A Realistic Web Environment for Building ...; publish_date: \nL36: none†webarena.dev】 Our benchmark is implemented in our fully interactable \nL37: highly-realistic WebArena environment. It features diverse tasks human may \nL38: encounter in their daily ...",
+            "tool_call_id": "fixture-tool-call-9-16"
+        },
+        {
+            "role": "tool",
+            "content": "[21] SearchBench benchmark (Search_Results/SearchBench benchmark)\n**viewing lines [0 - 40] of 40**\n\nL0: \nL1: URL: Search_Results/SearchBench benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†Talc-AI/search-bench; publish_date: none†github.com】 A practical \nL5: benchmark that focuses on every day helpfulness of LLM products, not just the \nL6: underlying models. Searchbench is a benchmark that addresses these ...\nL7:   * 【1†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: \nL8: none†arxiv.org】 These capabilities are essential for robust reasoning, making \nL9: SearchBench a valuable benchmark for evaluating LLMs' reasoning capabilities as \nL10: they continue to ...\nL11:   * 【2†NasimBrz/SearchBench · Datasets at Hugging Face; publish_date: \nL12: none†huggingface.co】 Dataset Summary. SearchBench is a benchmark designed to \nL13: evaluate Language Models' (LLMs) ability to solve state-based problems that \nL14: require combinatorial search ...\nL15:   * 【3†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: \nL16: none†openreview.net】 2025年10月22日 — To further investigate this, we introduce a \nL17: new benchmark, SearchBench, which contains 11 unique search problems inspired by\nL18:  intuitive puzzles.\nL19:   * 【4†Navigating the Labyrinth: Evaluating and Enhancing LLMs' ...; \nL20: publish_date: none†hub.baai.ac.cn】 2024年6月17日 — \nL21: 论文提出了一个新的基准测试SearchBench，包含11种独特的搜索问题类型，并自动化生成任意数量的实例和分析解决方案的可行性、正确性和最优性。论文使用A* \nL22: ...\nL23:   * 【5†Towards Unified Text-based Person Retrieval: A Large- ...; publish_date: \nL24: none†blog.csdn.net】 2023年10月17日 — ... Search \nL25: Benchmark（面向统一的基于文本的人物检索：一个大规模的多属性和语言搜索基准）; 研究背景. 相关工作; BENCHMARK. 论文方法分析. 网络框架;\nL26:  1、APTM ...\nL27:   * 【6†Desearch-ai/ai-search-benchmark; publish_date: none†github.com】 The \nL28: SearchBench repository addresses common issues with traditional benchmarks by \nL29: focusing on practical, everyday use cases rather than theoretical limits. It ...\nL30:   * 【7†o1 results for 3 benchmarks: PlanBench, SearchBench, ...; publish_date: \nL31: none†www.reddit.com】 o1 results for 3 benchmarks: PlanBench, SearchBench, and \nL32: Summary of a Haystack. AI. PlanBench: Paper \"LLMs Still Can't Plan; Can LRMs? A \nL33: ...\nL34:   * 【8†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: \nL35: none†ui.adsabs.harvard.edu】 To further investigate this, we introduce a new \nL36: benchmark, SearchBench, which contains 11 unique search problems inspired by \nL37: intuitive puzzles. Each SearchBench ...\nL38:   * 【9†Introducing SearchBench; publish_date: none†www.tag1consulting.com】 \nL39: Toward this goal, over the weekend I launched a new project called SearchBench, \nL40: a Drupal module for benchmarking Drupal's search performance. As the module ...",
+            "tool_call_id": "fixture-tool-call-9-17"
+        },
+        {
+            "role": "tool",
+            "content": "[22] WebVLN benchmark (Search_Results/WebVLN benchmark)\n**viewing lines [0 - 42] of 42**\n\nL0: \nL1: URL: Search_Results/WebVLN benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebVLN: Vision-and-Language Navigation on Websites; publish_date: \nL5: none†ojs.aaai.org】 by Q Chen · 2024 · Cited by 35 — the WebVLN-v1 dataset, where\nL6:  the performance is far from saturation, highlighting the utility of our \nL7: WebVLN-v1 as a benchmark to assess progress in this field.\nL8:   * 【1†[2312.15820] WebVLN: Vision-and-Language Navigation on Websites; \nL9: publish_date: none†ar5iv.labs.arxiv.org】 Experimental results show that WebVLN-\nL10: Net outperforms current VLN and web-related navigation methods. ... Code is \nL11: available at: https://github.com/WebVLN/WebVLN.\nL12:   * 【2†WebVLN: Vision-and-Language Navigation on Websites; publish_date: \nL13: none†github.com】 Experimental results show that WebVLN-Net outperforms current \nL14: VLN and web-related navigation methods. We believe that the introduction of the \nL15: new WebVLN task ...\nL16:   * 【3†Vision-and-Language Navigation in the Real-World; publish_date: \nL17: none†digital.library.adelaide.edu.au】 By leveraging our proposed WebVLN-v1 \nL18: dataset, experimental results showcase the superior performance of WebVLN-Net \nL19: compared to existing VLN and web-related ...\nL20:   * 【4†WebVLN: Vision-and-Language Navigation on Websites; publish_date: \nL21: none†www.researchgate.net】 Experimental results show that WebVLN-Net outperforms\nL22:  current VLN and web-related navigation methods. We believe that the \nL23: introduction of the newWebVLN task and ...\nL24:   * 【5†[PDF] WebVLN: Vision-and-Language Navigation on Websites; publish_date: \nL25: none†www.semanticscholar.org】 A new task named Vision-and-Language Navigation on\nL26:  Websites (WebVLN), where question-based instructions are used to train an \nL27: agent, emulating how users ...\nL28:   * 【6†WebVLN: Vision-and-Language Navigation on Websites; publish_date: \nL29: none†arxiv.org】 by Q Chen · 2023 · Cited by 35 — Experimental results show that \nL30: WebVLN-Net outperforms current VLN and web-related navigation methods. We \nL31: believe that the introduction of the ...\nL32:   * 【7†Human-Aware Vision-and-Language Navigation; publish_date: \nL33: none†proceedings.neurips.cc】 by H Li · 2024 · Cited by 19 — Vision-and-Language \nL34: Navigation (VLN) [2, 7, 9, 40] has emerged as a key benchmark for evaluating. \nL35: Sim2Real transfer [23], showing impressive performance in ...\nL36:   * 【8†LiveBench; publish_date: none†livebench.ai】 Introducing LiveBench: a \nL37: benchmark for LLMs designed with test set contamination and objective evaluation\nL38:  in mind.\nL39:   * 【9†MG-VLN: Benchmarking Multi-Goal and Long-Horizon ...; publish_date: \nL40: none†ieeexplore.ieee.org】 by J Zhang · 2024 — This task aims to provide a \nL41: simulation benchmark to guide the design of lifelong and long-horizon navigation\nL42:  robots.",
+            "tool_call_id": "fixture-tool-call-9-18"
+        },
+        {
+            "role": "tool",
+            "content": "[23] WebNav benchmark (Search_Results/WebNav benchmark)\n**viewing lines [0 - 36] of 36**\n\nL0: \nL1: URL: Search_Results/WebNav benchmark\nL2: # Search Results\nL3: \nL4:   * 【0†WebNav: A New Large-Scale Task for Natural Language ...; publish_date: \nL5: none†github.com】 WebNav is a benchmark task for evaluating an agent with \nL6: abilities to understand natural language and plan on partially observed \nL7: environments.\nL8:   * 【1†[1602.02261] End-to-End Goal-Driven Web Navigation; publish_date: \nL9: none†arxiv.org】 by R Nogueira · 2016 · Cited by 39 — We propose a goal-driven \nL10: web navigation as a benchmark task for evaluating an agent with abilities to \nL11: understand natural language and plan on partially ...\nL12:   * 【2†nyu-dl/dl4ir-webnav; publish_date: none†github.com】 WebNav is a benchmark\nL13:  task for evaluating an agent with abilities to understand natural language and \nL14: plan on partially observed environments.\nL15:   * 【3†WebNav: A New Large-Scale Task for Natural Language ...; publish_date: \nL16: none†www.researchgate.net】 We propose a goal-driven web navigation as a \nL17: benchmark task for evaluating an agent with abilities to understand natural \nL18: language and plan on partially ...\nL19:   * 【4†WebNav: An Intelligent Agent for Voice-Controlled Web ...; publish_date: \nL20: none†arxiv.org】 In this paper, we introduce WebNav, an innovative multi-modal \nL21: agent designed to automate web tasks based on natural language.\nL22:   * 【5†WebCanvas: Benchmarking Web Agents in Online ...; publish_date: \nL23: none†openreview.net】 by Y Pan · Cited by 78 — TL;DR: We introduce WebCanvas, an \nL24: online evaluation framework for web agents designed to address the dynamic \nL25: nature of web interactions.\nL26:   * 【6†WebNav: An Intelligent Agent for Voice-Controlled Web ...; publish_date: \nL27: none†huggingface.co】 Mar 17, 2025 — Preliminary evaluations show that WebNav \nL28: outperforms traditional screen readers in response time and task completion \nL29: accuracy for the visually ...\nL30:   * 【7†Web Bench - A new way to compare AI Browser Agents; publish_date: \nL31: none†www.skyvern.com】 May 29, 2025 — TL;DR: Web Bench is a new dataset to \nL32: evaluate web browsing agents that consists of 5,750 tasks on 452 different \nL33: websites, with 2,454 tasks ...\nL34:   * 【8†Web Framework Benchmarks; publish_date: none†www.techempower.com】 In the \nL35: following tests, we have measured the performance of several web application \nL36: platforms, full-stack frameworks, and micro-frameworks.",
+            "tool_call_id": "fixture-tool-call-9-19"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "我们收到了大量关于各种基准测试的搜索结果。现在，我们需要整理一份针对搜索智能体的基准测试列表。这些基准测试可以分为几类：网络浏览智能体基准测试（例如BrowseComp、WebVoyager、WebArena、VisualWebArena、Mind2Web、WebVLN、WebNav）、工具使用智能体基准测试（例如ToolBench、GTA）、问答/检索基准测试（例如HotpotQA、FEVER、TriviaQA、Natural Questions、MS MARCO、BEIR、MIRACL），以及特定领域的搜索基准测试（例如HSCodeComp、WebShop、ALFWorld）。此外，还有一些通用智能体基准测试，例如GAIA、SuperCLUE-Agent等。用户要求“详细介绍各自的特点、使用场景、例题”。因此，我需要为每个基准测试提供详细的描述，包括其特点、使用场景，并举例说明。为了全面获取这些信息，我将同时打开以下基准测试的相关页面：WebVoyager（索引3）、WebArena（索引1）、GAIA（索引1）、ToolBench（索引0）、HotpotQA（索引0）、FEVER（索引1）、TriviaQA（索引0）、Natural Questions（索引0）、MS MARCO（索引0）、BEIR（索引1）、MIRACL（索引0）、WebShop（索引0）、ALFWorld（索引0）、Mind2Web（索引0）、VisualWebArena（索引0）、SearchBench（索引1）、WebVLN（索引1）和WebNav（索引0）。",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-30-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 4, \"id\": 3}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-1",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 5, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-2",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 6, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-3",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 7, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-4",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 8, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-5",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 9, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-6",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 10, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-7",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 11, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-8",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 12, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-9",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 13, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-10",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 14, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-11",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 17, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-12",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 18, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-13",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 19, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-14",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 20, \"id\": 0}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-15",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 21, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-16",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 22, \"id\": 1}"
+                    }
+                },
+                {
+                    "id": "fixture-tool-call-30-17",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 23, \"id\": 0}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[24]  (https://ar5iv.org/abs/2401.13919)\n**viewing lines [0 - 129] of 825**\n\nL0: \nL1: URL: https://ar5iv.org/abs/2401.13919\nL2: # WebVoyager : Building an End-to-End Web Agent with\nL3: Large Multimodal Models\nL4: \nL5: Hongliang He1,3 , Wenlin Yao2, Kaixin Ma2, Wenhao Yu2, Yong Dai2,\nL6: \nL7: Hongming Zhang2,\nL8: Zhenzhong Lan3,\nL9: Dong Yu2\nL10: 1Zhejiang University,\nL11: 2Tencent AI Lab,\nL12: 3Westlake University\nL13: hehongliang@westlake.edu.cn, wenlinyao@global.tencent.com\nL14: Work done during the internship at Tencent AI Lab.\nL15: \nL16: ###### Abstract\nL17: \nL18: The advancement of large language models (LLMs) leads to a new era marked by the\nL19:  development of autonomous applications in the real world, which drives \nL20: innovation in the creation of advanced web-based agents. Existing web agents \nL21: typically only handle one input modality and are evaluated only in simplified \nL22: web simulators or static web snapshots, greatly limiting their applicability in \nL23: real-world scenarios. To bridge this gap, we introduce WebVoyager, an innovative\nL24:  Large Multimodal Model (LMM) powered web agent that can complete user \nL25: instructions end-to-end by interacting with real-world websites. Moreover, we \nL26: propose a new evaluation protocol for web agents to address the challenges of \nL27: automatic evaluation of open-ended web agent tasks, leveraging the robust \nL28: multimodal comprehension capabilities of GPT-4V. We create a new benchmark by \nL29: gathering real-world tasks from 15 widely used websites to evaluate our agents. \nL30: We show that WebVoyager achieves a 55.7% task success rate, significantly \nL31: surpassing the performance of both GPT-4 (All Tools) and the WebVoyager (text-\nL32: only) setups, underscoring the exceptional capability of WebVoyager in practical\nL33:  applications. We found that our proposed automatic evaluation achieves 85.3% \nL34: agreement with human judgment, paving the way for further development of web \nL35: agents in a real-world setting.111Our code and data will be released at \nL36: https://github.com/MinorJerry/WebVoyager\nL37: \nL38: ## 1 Introduction\nL39: \nL40: The recent advancement of large language models (LLMs), such as ChatGPT and \nL41: GPT-4 (OpenAI, 2023), have sparked significant interest in developing LLM-based \nL42: autonomous agents (AutoGPT, 2022) for complex task execution (Qin et al., 2023; \nL43: Schick et al., 2023). Recent studies have explored the construction of text-\nL44: based web browsing environments and how to instruct large language model agents \nL45: to perform web navigation (Nakano et al., 2021; Gur et al., 2023; Zhou et al., \nL46: 2023; Lu et al., 2023). The primary challenge in these works lies in managing \nL47: complex and verbose HTML texts, and solutions include simplifying and \nL48: structuring HTML (Nakano et al., 2021; Zhou et al., 2023; Gur et al., 2023; Deng\nL49:  et al., 2023).\nL50: \nL51: However, existing approaches overlook a critical functionality of browsing: \nL52: rendering HTML into visual webpages. Particularly, vision capability is crucial \nL53: for utilizing tools like web browsers, as rendered web pages are inherently \nL54: designed with user experience (UX), emphasizing intuitive information and \nL55: structured presentation. This design principle of rendering makes visual \nL56: analysis more effective than mere HTML representation. At present, large \nL57: multimodal models (LMMs), particularly GPT-4V(ision) (OpenAI, 2023) and Gemini \nL58: (Team et al., 2023), demonstrate a remarkable ability to integrate intricate \nL59: visual cues with textual information. Existing studies such as Pix2Struct (Lee \nL60: et al., 2023) and WebArena (Zhou et al., 2023), have initiated explorations into\nL61:  using screenshots as inputs for decision-making in web navigation, yet these \nL62: are preliminary and do not represent a deep exploration. Therefore, building \nL63: multimodal web agents to leverage the environment rendered by browsers through \nL64: screenshots, thus mimicking human web browsing behavior, is now a viable \nL65: approach to enhance web navigation efficiency.\nL66: \nL67: We introduce WebVoyager, a multimodal web agent designed to handle web tasks \nL68: online in an end-to-end manner, which denotes managing the process from start to\nL69:  finish autonomously without intermediate human intervention. We construct an \nL70: online environment using Selenium for WebVoyager, feeding it with screenshots \nL71: and textual content in interactive web elements. Inspired by Set-of-Mark \nL72: Prompting (Yang et al., 2023a), we mark interactive web elements on screenshots \nL73: (see Figure 2) to facilitate decision-making for WebVoyager. As a pioneer in \nL74: combining vision and text information during web navigation, we advocate that \nL75: autonomous end-to-end task completion, multimodal capabilities and online \nL76: navigation constitute the essential trajectory toward the genuine intelligence \nL77: of web agents.\nL78: \nL79: Another challenge arises when it comes to evaluating an end-to-end web agent \nL80: with online navigation. Existing benchmarks, such as Mind2Web (Deng et al., \nL81: 2023), primarily focus on stepwise and offline evaluation, where agents follow \nL82: predefined “golden” trajectory for action selection. This approach, however, may\nL83:  not fully account for the variety of viable strategies to accomplish a task, as\nL84:  it only reflects one possible plan. This limitation could lead to a biased \nL85: evaluation and difficulties in fairly comparing different methods. To more \nL86: accurately gauge the capabilities of web agents in end-to-end task completion, \nL87: we save screenshots throughout the online navigation process, and then use \nL88: GPT-4V to evaluate these trajectories and the final results automatically. Human\nL89:  evaluations are also conducted to verify the results and confirm the \nL90: reliability of GPT-4V as the evaluator.\nL91: \nL92: We conduct evaluations on a collected dataset, which is semi-automatically \nL93: generated using a self-instruct (Wang et al., 2022) method, comprising 300 web \nL94: tasks from 15 commonly accessed websites. Additionally, we extract 90 web-\nL95: related tasks of level 1 and level 2 from the GAIA (Mialon et al., 2023) to \nL96: enrich our evaluation. We compare our WebVoyager with 1) GPT-4 (All \nL97: Tools)222GPT-4 (All Tools) is an integrated tool-based agent released by OpenAI \nL98: in Oct. 2023. See https://chat.openai.com/, and 2) WebVoyager in a text-only \nL99: setting, employing the accessibility tree proposed in WebArena (Zhou et al., \nL100: 2023) to describe web pages. The results show that WebVoyager achieves a Task \nL101: Success Rate of 55.7%, significantly outperforming GPT-4 (All Tools) with a rate\nL102:  of 32.7% and the text-only setting with a rate of 39.0%, demonstrating the \nL103: effectiveness of our method. Furthermore, we report the consistency between \nL104: human-human and human-GPT4V to ensure credibility. Our main contributions are as\nL105:  follows:\nL106: \nL107: - •\nL108: We employ a multimodal web agent that integrates textual and visual information \nL109: to address web tasks end-to-end and introduce a generalist planning approach for\nL110:  navigation.\nL111: \nL112: - •\nL113: We build an online web browsing environment, offering a variety of tasks \nL114: centered on widely used websites and introducing a method for expanding these \nL115: tasks.\nL116: \nL117: - •\nL118: We conduct manual evaluations of navigation trajectories and propose an \nL119: automated evaluation protocol using GPT-4V. We present a comprehensive analysis \nL120: of the evaluation results and show that GPT-4V can serve as a reliable evaluator\nL121:  for online agents.\nL122: \nL123: ## 2 Related Work\nL124: \nL125: ### 2.1 Web Agents\nL126: \nL127: Autonomous web navigation (Shi et al., 2017; Yang et al., 2023b) requires an \nL128: agent to follow instructions, construct plans, comprehend complex web \nL129: structures, and decompose tasks into step-by-step decisions (Weng, 2023). To ",
+            "tool_call_id": "fixture-tool-call-30-0"
+        },
+        {
+            "role": "tool",
+            "content": "[25]  (https://ar5iv.org/abs/2307.13854)\n**viewing lines [0 - 124] of 1092**\n\nL0: \nL1: URL: https://ar5iv.org/abs/2307.13854\nL2: # WebArena: A Realistic Web Environment for Building Autonomous Agents\nL3: \nL4: Shuyan Zhou Frank F. Xu11footnotemark: 1 Hao Zhu Xuhui Zhou22footnotemark: 2\nL5: \nL6: Robert Lo22footnotemark: 2 Abishek Sridhar22footnotemark: 2 Xianyi Cheng Tianyue\nL7:  Ou\nL8: Yonatan Bisk Daniel Fried Uri Alon Graham Neubig\nL9: Carnegie Mellon University\nL10: {shuyanzh, fangzhex, gneubig}@cs.cmu.edu\nL11: Lead contributors.Equal contribution.\nL12: \nL13: ###### Abstract\nL14: \nL15: With advances in generative AI, there is now potential for autonomous agents to \nL16: manage daily tasks via natural language commands. However, current agents are \nL17: primarily created and tested in simplified synthetic environments, leading to a \nL18: disconnect with real-world scenarios. In this paper, we build an environment for\nL19:  language-guided agents that is highly realistic and reproducible. Specifically,\nL20:  we focus on agents that perform tasks on the web, and create an environment \nL21: with fully functional websites from four common domains: e-commerce, social \nL22: forum discussions, collaborative software development, and content management. \nL23: Our environment is enriched with tools (e.g., a map) and external knowledge \nL24: bases (e.g., user manuals) to encourage human-like task-solving. Building upon \nL25: our environment, we release a set of benchmark tasks focusing on evaluating the \nL26: functional correctness of task completions. The tasks in our benchmark are \nL27: diverse, long-horizon, and designed to emulate tasks that humans routinely \nL28: perform on the internet. We experiment with several baseline agents, integrating\nL29:  recent techniques such as reasoning before acting. The results demonstrate that\nL30:  solving complex tasks is challenging: our best GPT-4-based agent only achieves \nL31: an end-to-end task success rate of 14.41%, significantly lower than the human \nL32: performance of 78.24%. These results highlight the need for further development \nL33: of robust agents, that current state-of-the-art large language models are far \nL34: from perfect performance in these real-life tasks, and that WebArena can be used\nL35:  to measure such progress.\nL36: \nL37: Our code, data, environment reproduction resources, and video demonstrations are\nL38:  publicly available at https://webarena.dev/.\nL39: \nL40: ## 1 Introduction\nL41: \nL42: Autonomous agents that perform everyday tasks via human natural language \nL43: commands could significantly augment human capabilities, improve efficiency, and\nL44:  increase accessibility. Nonetheless, to fully leverage the power of autonomous \nL45: agents, it is crucial to understand their behavior within an environment that is\nL46:  both authentic and reproducible. This will allow measurement of the ability of \nL47: agents on tasks that human users care about in a fair and consistent manner.\nL48: \nL49: Current environments for evaluate agents tend to over-simplify real-world \nL50: situations. As a result, the functionality of many environments is a limited \nL51: version of their real-world counterparts, leading to a lack of task diversity \nL52: (Shi et al., 2017; Anderson et al., 2018; Gordon et al., 2018; Misra et al., \nL53: 2016; Shridhar et al., 2020; 2021; Yao et al., 2022a). In addition, these \nL54: simplifications often lower the complexity of tasks as compared to their \nL55: execution in the real world (Puig et al., 2018; Shridhar et al., 2020; Yao et \nL56: al., 2022a). Finally, some environments are presented as a static resource (Shi \nL57: et al., 2017; Deng et al., 2023) where agents are confined to accessing only \nL58: those states that were previously cached during data collection, thus limiting \nL59: the breadth and diversity of exploration. Dor evaluation, many environments \nL60: focus on comparing the textual surface form of the predicted action sequences \nL61: with reference action sequences, disregarding the functional correctness of the \nL62: executions and possible alternative solutions (Puig et al., 2018; Jernite et \nL63: al., 2019; Xu et al., 2021; Li et al., 2020; Deng et al., 2023). These \nL64: limitations often result in a discrepancy between simulated environments and the\nL65:  real world, and can potentially impact the generalizability of AI agents to \nL66: successfully understand, adapt, and operate within complex real-world \nL67: situations.\nL68: \nL69: We introduce WebArena, a realistic and reproducible web environment designed to \nL70: facilitate the development of autonomous agents capable of executing tasks (§2).\nL71:  An overview of WebArena is in Figure 1. Our environment comprises four fully \nL72: operational, self-hosted web applications, each representing a distinct domain \nL73: prevalent on the internet: online shopping, discussion forums, collaborative \nL74: development, and business content management. Furthermore, WebArena incorporates\nL75:  several utility tools, such as map, calculator, and scratchpad, to best support\nL76:  possible human-like task executions. Lastly, WebArena is complemented by an \nL77: extensive collection of documentation and knowledge bases that vary from general\nL78:  resources like English Wikipedia to more domain-specific references, such as \nL79: manuals for using the integrated development tool (Fan et al., 2022). The \nL80: content populating these websites is extracted from their real-world \nL81: counterparts, preserving the authenticity of the content served on each \nL82: platform. We deliver the hosting services using Docker containers with gym-APIs \nL83: (Brockman et al., 2016), ensuring both the usability and the reproducibility of \nL84: WebArena.\nL85: \nL86: Along with WebArena, we release a ready-to-use benchmark with 812 long-horizon \nL87: web-based tasks (§3). Each task is described as a high-level natural language \nL88: intent, emulating the abstract language usage patterns typically employed by \nL89: humans (Bisk et al., 2019). Two example intents are shown in the upper left of \nL90: Figure 1. We focus on evaluating the functional correctness of these tasks, \nL91: i.e., does the result of the execution actually achieve the desired goal (§3.2).\nL92:  For instance, to evaluate the example in Figure 2, our evaluation method \nL93: verifies the concrete contents in the designated repository. This evaluation is \nL94: not only more reliable (Zhong et al., 2017; Chen et al., 2021; Wang et al., \nL95: 2022) than comparing the textual surface-form action sequences (Puig et al., \nL96: 2018; Deng et al., 2023) but also accommodate a range of potential valid paths \nL97: to achieve the same goal, which is a ubiquitous phenomenon in sufficiently \nL98: complex tasks.\nL99: \nL100: We use this benchmark to evaluate several agents that can follow NL command and \nL101: perform web-based tasks (§4). These agents are implemented in a few-shot in-\nL102: context learning fashion with powerful large language models (LLMs) such as \nL103: GPT-4 and PALM-2. Experiment results show that the best GPT-4 agent performance \nL104: is somewhat limited, with an end-to-end task success rate of only 14.41%, while \nL105: the human performance is 78.24%. We hypothesize that the limited performance of \nL106: current LLMs stems from a lack of crucial capabilities such as active \nL107: exploration and failure recovery to successfully perform complex tasks (§5.2). \nL108: These outcomes underscore the necessity for further development towards robust \nL109: and effective agents (LeCun, 2022) in WebArena.\nL110: \nL111: ## 2 WebArena: Websites as an Environment for Autonomous Agents\nL112: \nL113: Our goal is to create a realistic and reproducible web environment. We achieve \nL114: reproducibility by making the environment standalone, without relying on live \nL115: websites. This circumvents technical challenges such as bots being subject to \nL116: CAPTCHAs, unpredictable content modifications, and configuration changes, which \nL117: obstruct a fair comparison across different systems over time. We achieve \nL118: realism by using open-source libraries that underlie many in-use sites from \nL119: several popular categories and importing data to our environment from their \nL120: real-world counterparts.\nL121: \nL122: ### 2.1 Controlling Agents through High-level Natural Language\nL123: \nL124: The WebArena environment is denoted asℰ\\mathcal{E} with state space ",
+            "tool_call_id": "fixture-tool-call-30-1"
+        },
+        {
+            "role": "tool",
+            "content": "[26]  (https://ar5iv.org/abs/2311.12983)\n**viewing lines [0 - 118] of 1207**\n\nL0: \nL1: URL: https://ar5iv.org/abs/2311.12983\nL2: 1]FAIR, Meta 2]HuggingFace 3]AutoGPT 4]GenAI, Meta\nL3: \nL4: # GAIA: A Benchmark for General AI Assistants\nL5: \nL6: Grégoire Mialon Clémentine Fourrier Craig Swift Thomas Wolf Yann LeCun Thomas \nL7: Scialom [ [ [ [ {gmialon,tscialom}@meta.com clementine@huggingface.co\nL8: \nL9: ###### Abstract\nL10: \nL11: We introduce GAIA, a benchmark for General AI Assistants that, if solved, would \nL12: represent a milestone in AI research. GAIA proposes real-world questions that \nL13: require a set of fundamental abilities such as reasoning, multi-modality \nL14: handling, web browsing, and generally tool-use proficiency. GAIA questions are \nL15: conceptually simple for humans yet challenging for most advanced AIs: we show \nL16: that human respondents obtain 92% vs. 15% for GPT-4 equipped with plugins. This \nL17: notable performance disparity contrasts with the recent trend of LLMs \nL18: outperforming humans on tasks requiring professional skills in e.g. law or \nL19: chemistry. GAIA’s philosophy departs from the current trend in AI benchmarks \nL20: suggesting to target tasks that are ever more difficult for humans. We posit \nL21: that the advent of Artificial General Intelligence (AGI) hinges on a system’s \nL22: capability to exhibit similar robustness as the average human does on such \nL23: questions. Using GAIA’s methodology, we devise 466 questions and their answer. \nL24: We release our questions while retaining answers to 300 of them to power a \nL25: leader-board hereby accessible.\nL26: \nL27: \\correspondence\nL28: \nL29: ## 1 Introduction\nL30: \nL31: Large Language Models (LLMs) arguably open the way to general purpose systems. \nL32: Indeed, the latest among them (OpenAI, 2023; Anthropic, 2023; Anil et al., 2023;\nL33:  Touvron et al., 2023) are fluent, knowledgeable, aligned to some extent with \nL34: human preferences (Ouyang et al., 2022), and can be augmented (Mialon et al., \nL35: 2023) with tools such as web browsers or code interpreters in a zero or few-shot\nL36:  setting (Brown et al., 2020). However, evaluating these systems is an open \nL37: problem: given their emerging new capabilities, LLMs are regularly breaking AI \nL38: benchmarks, at an ever-increasing rate (Kiela et al., 2023).\nL39: \nL40: In search for more challenging benchmarks, current trend suggests to seek tasks \nL41: that are ever more difficult for humans, and challenge LLMs with more intricate \nL42: educational assessments, for example in STEM and Law, or target more complex \nL43: realisations, such as writing a coherent book. But, tasks that are difficult for\nL44:  humans are not necessarily difficult for recent systems: the challenging MMLU \nL45: or GSM8k benchmarks for example (Hendrycks et al., 2021; Cobbe et al., 2021) are\nL46:  already close to be solved,111GPT4 does 86.4% on MMLU. Human non-specialist \nL47: accuracy on the benchmark is only 34.5% Expert-level human performance is \nL48: estimated at 89.8%. due to rapid LLM improvement possibly combined with data \nL49: contamination.222See for example the case of Hellaswag. Furthermore, open-ended \nL50: generation generally requires human or model-based evaluation (Zheng et al., \nL51: 2023). Human evaluation will become less and less feasible when increasing the \nL52: task complexity, e.g. in terms of output length or required skills: how to \nL53: evaluate a book generated by an AI, or solutions to maths problems that few \nL54: people in the world can solve? Model-based evaluations on the other hand are by \nL55: construction dependent of stronger models hence cannot evaluate new state-of-\nL56: the-art models, without mentioning potential subtle biases such as preferring \nL57: the first choice presented (Zheng et al., 2023). Overall, evaluating new AI \nL58: systems requires to rethink benchmarks (Chollet, 2019).\nL59: \nL60: Alternatively to tasks that are harder for humans, AI systems could be asked to \nL61: solve conceptually simple tasks yet that require accurate execution of complex \nL62: sequences of actions, with large combinatorial spaces. The output could only be \nL63: obtained upon successful completion of the task and be easy to validate, \nL64: analogous to the Proof of Work algorithm (Jakobsson and Juels, 1999; Dwork and \nL65: Naor, 1993), where a computer is asked to solve a complex problem whose solution\nL66:  is easy to verify. Tasks for AI assistants, given their need for access to a \nL67: diverse and uncertain world, meet this criterion while being inherently rooted \nL68: in practical use cases.\nL69: \nL70: We move in that direction by proposing GAIA, a benchmark for General AI \nL71: Assistants featuring 466 carefully crafted questions and their answer, along \nL72: with the associated design methodology. Our questions are easy to create, \nL73: challenging for AI systems—for LLMs, most require complex generations—, yet \nL74: admit a unique, factual answer, allowing a simple and robust automatic \nL75: evaluation.\nL76: \nL77: GAIA attempts to avoid current pitfalls of LLMs evaluation by targeting:\nL78: \nL79: Real-world and challenging questions. For example, a LLM will typically need to \nL80: browse the open and changing web, handle multi-modality, or reason over multiple\nL81:  steps to answer our questions. Conversely, many LLM benchmarks are quite \nL82: specific and/or restricted to closed and synthetic environments.\nL83: \nL84: Easy interpretability through conceptually simple tasks—non experts annotators \nL85: exhibit a near perfect score—, associated reasoning trace, and few but highly \nL86: curated questions. This is in contrast with aggregated benchmarks that can lack \nL87: efficiency and reliability (Perlitz et al., 2023).\nL88: \nL89: Non-gameability. Answering the questions requires successful completion of some \nL90: number of steps, which cannot easily be brute forced due to their diversity. The\nL91:  possibility to check the reasoning trace, the accuracy required in the answers,\nL92:  their absence in plain text from the internet prevent a possible data \nL93: contamination. In contrast, multiple choice answers (e.g., MMLU) make \nL94: contamination assessment more difficult since a wrong reasoning trace can more \nL95: easily get to the correct choice.\nL96: \nL97: Simplicity of use. Crucially, the answers to our questions are factoid, concise \nL98: and unambiguous. These properties allow simple, fast and factual evaluation. Our\nL99:  questions are meant to be answered in zero shot, limiting the influence of the \nL100: evaluation setup. By opposition, many LLM benchmarks require evaluations that \nL101: are sensitive to the experimental setup such as the number and nature of prompts\nL102:  (Liang et al., 2022b) (Section 8.2), or the benchmark \nL103: implementation.333https://huggingface.co/blog/evaluating-mmlu-leaderboard\nL104: \nL105: In spite of being successful at tasks that are difficult for humans, the most \nL106: capable LLMs do poorly on GAIA. Even equipped with tools, GPT4 does not exceed a\nL107:  30% success rate for the easiest of our tasks, and 0% for the hardest. In the \nL108: meantime, the average success rate for human respondents is 92%. Consequently, a\nL109:  system capable of solving GAIA can be assessed in the context of t-AGI,444As \nL110: defined in https://www.alignmentforum.org/posts/BoA3agdkAzL6HQtQP/clarifying-\nL111: and-predicting-agi, a t-AGI beats, on most tasks, most human experts who are \nL112: given time t to perform the task noting that humans typically take between 6 \nL113: minutes for the simplest questions to 17 minutes for the most complex ones. From\nL114:  a related perspective, such system would arguably be a competent General AI \nL115: within the framework recently proposed in Morris et al. (2023), which also \nL116: appear to be the next milestone in AI research since ChatGPT (OpenAI, 2023) is \nL117: one level below. This paper covers the composition of GAIA, its design choices, \nL118: and explain how to craft questions and the associated challenges so that the ",
+            "tool_call_id": "fixture-tool-call-30-2"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://github.com/sambanova/toolbench`",
+            "tool_call_id": "fixture-tool-call-30-3"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://hotpotqa.github.io/`",
+            "tool_call_id": "fixture-tool-call-30-4"
+        },
+        {
+            "role": "tool",
+            "content": "[27]  (https://aclanthology.org/N18-1074.pdf)\n**viewing lines [0 - 134] of 809**\n\nL0: \nL1: URL: https://aclanthology.org/N18-1074.pdf\nL2: ===== Page 1 =====\nL3: \nL4: # FEVER: a large-scale dataset for Fact Extraction and VERification\nL5: \nL6: James Thorne\\(^1\\), Andreas Vlachos\\(^1\\), Christos Christodoulopoulos\\(^2\\), \nL7: and Arpit Mittal\\(^2\\)\nL8: \nL9: \\(^1\\)Department of Computer Science, University of Sheffield  \nL10: \\(^2\\)Amazon Research Cambridge  \nL11: {j.thorne, a.vlachos}@sheffield.ac.uk  \nL12: {chrchrs, mitarpit}@amazon.co.uk  \nL13: \nL14: ## Abstract\nL15: \nL16: In this paper we introduce a new publicly available dataset for verification \nL17: against textual sources, FEVER: Fact Extraction and VERification. It consists of\nL18:  185,445 claims generated by altering sentences extracted from Wikipedia and \nL19: subsequently verified without knowledge of the sentence they were derived from. \nL20: The claims are classified as Supported, Refuted or NotEnoughInfo by annotators \nL21: achieving 0.6841 in Fleiss \\(\\kappa\\). For the first two classes, the annotators\nL22:  also recorded the sentence(s) forming the necessary evidence for their \nL23: judgment. To characterize the challenge of the dataset presented, we develop a \nL24: pipeline approach and compare it to suitably designed oracles. The best accuracy\nL25:  we achieve on labeling a claim accompanied by the correct evidence is 31.87%, \nL26: while if we ignore the evidence we achieve 50.91%. Thus we believe that FEVER is\nL27:  a challenging testbed that will help stimulate progress on claim verification \nL28: against textual sources.\nL29: \nL30: ## 1 Introduction\nL31: \nL32: The ever-increasing amounts of textual information available combined with the \nL33: ease in sharing it through the web has increased the demand for verification, \nL34: also referred to as fact checking. While it has received a lot of attention in \nL35: the context of journalism, verification is important for other domains, e.g. \nL36: information in scientific publications, product reviews, etc.\nL37: \nL38: In this paper we focus on verification of textual claims against textual \nL39: sources. When compared to textual entailment (TE)/natural language inference \nL40: (Dagan et al., 2009; Bowman et al., 2015), the key difference is that in these \nL41: tasks the passage to verify each claim is given, and in recent years it \nL42: typically consists a single sentence, while in verification systems it is \nL43: retrieved from a large set of documents in order to form the evidence. Another \nL44: related task is question answering (QA), for which approaches have recently been\nL45:  extended to handle large-scale resources such as Wikipedia (Chen et al., 2017).\nL46:  However, questions typically provide the information needed to identify the \nL47: answer, while information missing from a claim can often be crucial in \nL48: retrieving refuting evidence. For example, a claim stating \"Fiji's largest \nL49: island is Kauai.\" can be refuted by retrieving \"Kauai is the oldest Hawaiian \nL50: Island.\" as evidence.\nL51: \nL52: Progress on the aforementioned tasks has benefited from the availability of \nL53: large-scale datasets (Bowman et al., 2015; Rajpurkar et al., 2016). However, \nL54: despite the rising interest in verification and fact checking among researchers,\nL55:  the datasets currently used for this task are limited to a few hundred claims. \nL56: Indicatively, the recently conducted Fake News Challenge (Pomerleau and Rao, \nL57: 2017) with 50 participating teams used a dataset consisting of 300 claims \nL58: verified against 2,595 associated news articles which is orders of magnitude \nL59: smaller than those used for TE and QA.\nL60: \nL61: In this paper we present a new dataset for claim verification, FEVER: Fact \nL62: Extraction and VERification. It consists of 185,445 claims manually verified \nL63: against the introductory sections of Wikipedia pages and classified as \nL64: Supported, Refuted or NotEnoughInfo. For the first two classes, systems and \nL65: annotators need to also return the combination of sentences forming the \nL66: necessary evidence supporting or refuting the claim (see Figure 1). The claims \nL67: were generated by human annotators extracting claims from Wikipedia and mutating\nL68:  them in a variety of ways, some of which were meaning-altering. The \nL69: verification of each\nL70: \nL71: 809\nL72: \nL73: Proceedings of NAACL-HLT 2018, pages 809–819\nL74: \nL75: New Orleans, Louisiana, June 1 - 6, 2018. ©2018 Association for Computational \nL76: Linguistics\nL77: \nL78: ===== Page 2 =====\nL79: \nL80: claim was conducted in a separate annotation process by annotators who were \nL81: aware of the page but not the sentence from which original claim was extracted \nL82: and thus in 31.75% of the claims more than one sentence was considered \nL83: appropriate evidence. Claims require composition of evidence from multiple \nL84: sentences in 16.82% of cases. Furthermore, in 12.15% of the claims, this \nL85: evidence was taken from multiple pages.\nL86: \nL87: To ensure annotation consistency, we developed suitable guidelines and user \nL88: interfaces, resulting in inter-annotator agreement of 0.6841 in Fleiss (Fleiss, \nL89: 1971) in claim verification classification, and 95.42% precision and 72.36% \nL90: recall in evidence retrieval.\nL91: \nL92: To characterize the challenges posed by FEVER we develop a pipeline approach \nL93: which, given a claim, first identifies relevant documents, then selects \nL94: sentences forming the evidence from the documents and finally classifies the \nL95: claim w.r.t. evidence. The best performing version achieves 31.87% accuracy in \nL96: verification when requiring correct evidence to be retrieved for claims \nL97: Supported or Refuted, and 50.91% if the correctness of the evidence is ignored, \nL98: both indicating the difficulty but also the feasibility of the task. We also \nL99: conducted oracle experiments in which components of the pipeline were replaced \nL100: by the gold standard annotations, and observed that the most challenging part of\nL101:  the task is selecting the sentences containing the evidence. In addition to \nL102: publishing the data via our website1, we also publish the annotation interfaces2\nL103:  and the baseline system3 to stimulate further research on verification.\nL104: \nL105: Footnote 1: http://fever.ai\nL106: \nL107: Footnote 2: https://github.com/awslabs/fever\nL108: \nL109: Footnote 3: https://github.com/sheffieldnlp/fever-baselines\nL110: \nL111: ## 2 Related Works\nL112: \nL113: Vlachos and Riedel (2014) constructed a dataset for claim verification \nL114: consisting of 106 claims, selecting data from fact-checking websites such as \nL115: PolitiFact, taking advantage of the labelled claims available there. However, in\nL116:  order to develop claim verification components we typically require the \nL117: justification for each verdict, including the sources used. While this \nL118: information is usually available in justifications provided by the journalists, \nL119: they are not in a machine-readable form. Thus, also considering the small number\nL120:  of claims, the task defined by the dataset proposed remains too challenging for\nL121:  the ML/NLP methods currently available. Wang (2017) extended this approach by \nL122: including all 12.8K claims available by Politifact via its API, however the \nL123: justification and the evidence contained in it was ignored in the experiments as\nL124:  it was not machine-readable. Instead, the claims were classified considering \nL125: only the text and the metadata related to the person making the claim. While \nL126: this rendered the task amenable to current NLP/ML methods, it does not allow for\nL127:  verification against any sources and no evidence needs to be returned to \nL128: justify the verdicts.\nL129: \nL130: The Fake News challenge (Pomerleau and Rao, 2017) modelled verification as \nL131: stance classification: given a claim and an article, predict whether the article\nL132:  supports, refutes, observes (neutrally states the claim) or is irrelevant to \nL133: the claim. It consists of 50K labelled claim-article pairs, combining 300 claims\nL134:  with 2,582 articles. The claims and the articles were curated and labeled by ",
+            "tool_call_id": "fixture-tool-call-30-5"
+        },
+        {
+            "role": "tool",
+            "content": "[28]  (https://nlp.cs.washington.edu/triviaqa/)\n**viewing lines [0 - 48] of 48**\n\nL0: \nL1: URL: https://nlp.cs.washington.edu/triviaqa/\nL2: ## TriviaQA: A Large Scale Dataset for Reading Comprehension and Question \nL3: Answering\nL4: \nL5: TriviaQA is a reading comprehension dataset containing over 650K question-\nL6: answer-evidence triples. TriviaQA includes 95K question-answer pairs authored by\nL7:  trivia enthusiasts and independently gathered evidence documents, six per \nL8: question on average, that provide high quality distant supervision for answering\nL9:  the questions. The details can be found in our ACL 17 paper TriviaQA: A Large \nL10: Scale Distantly Supervised Challenge Dataset for Reading Comprehension\nL11: \nL12: Mandar Joshi, Eunsol Choi, Daniel Weld, Luke Zettlemoyer. TriviaQA: A Large \nL13: Scale Distantly Supervised Challenge Dataset for Reading Comprehension\nL14: \nL15: In Association for Computational Linguistics (ACL) 2017, Vancouver, Canada.\nL16: [bib]\nL17: \nL18: ## News\nL19: \nL20: ##### Jul 2017\nL21: \nL22: The TriviaQA leaderboard is now live on Codalab. Submit your predictions for \nL23: evaluation on the test set!\nL24: \nL25: ## Data\nL26: \nL27: If you are interested in the reading comprehension task motivated in the paper, \nL28: click on the link below to download the data.\nL29: \nL30: If you are interested in open domain QA, click on the link below to download the\nL31:  data. It contains the unfiltered dataset with 110K question-answer pairs. The \nL32: Wikipedia and top 10 search documents can be obtained from the RC version. The \nL33: main difference between the RC version above and the unfiltered dataset is that \nL34: not all documents (in the unfiltered set) for a given question contain the \nL35: answer string(s). This makes the unfiltered dataset more appropriate for IR-\nL36: style QA.\nL37: \nL38: The University of Washington does not own the copyright of the questions and \nL39: documents included in TriviaQA.\nL40: \nL41: ## Code\nL42: \nL43: Check out our Github repository.\nL44: \nL45: ## Contact\nL46: \nL47: For any questions about the code or data, please contact Mandar Joshi -- {first \nL48: name of the first author}90[at]cs[dot]washington[dot]edu",
+            "tool_call_id": "fixture-tool-call-30-6"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://research.google/pubs/natural-questions-a-benchmark-for-question-answering-research/`",
+            "tool_call_id": "fixture-tool-call-30-7"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://microsoft.github.io/msmarco/`",
+            "tool_call_id": "fixture-tool-call-30-8"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://github.com/beir-cellar/beir`",
+            "tool_call_id": "fixture-tool-call-30-9"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://project-miracl.github.io/`",
+            "tool_call_id": "fixture-tool-call-30-10"
+        },
+        {
+            "role": "tool",
+            "content": "[29]  (https://ar5iv.org/abs/2207.01206)\n**viewing lines [0 - 122] of 1392**\n\nL0: \nL1: URL: https://ar5iv.org/abs/2207.01206\nL2: # WebShop: Towards Scalable Real-World Web Interaction with Grounded Language \nL3: Agents\nL4: \nL5: Shunyu Yao111Equal contribution. Project site with code, data, and demos: \nL6: https://webshop-pnlp.github.io. Howard Chen111Equal contribution. Project site \nL7: with code, data, and demos: https://webshop-pnlp.github.io. John Yang Karthik \nL8: Narasimhan\nL9: \nL10: Department of Computer Science, Princeton University\nL11: {shunyuy, howardchen, jy1682, karthikn}@princeton.edu\nL12: \nL13: ###### Abstract\nL14: \nL15: Existing benchmarks for grounding language in interactive environments either \nL16: lack real-world linguistic elements, or prove difficult to scale up due to \nL17: substantial human involvement in the collection of data or feedback signals. To \nL18: bridge this gap, we develop WebShop – a simulated e-commerce website environment\nL19:  with million real-world products and 1.181.18 crowd-sourced text instructions. \nL20: Given a text instruction specifying a product requirement, an agent needs to \nL21: navigate multiple types of webpages and issue diverse actions to find, \nL22: customize, and purchase an item. WebShop provides several challenges for \nL23: language grounding including understanding compositional instructions, query \nL24: (re-)formulation, comprehending and acting on noisy text in webpages, and \nL25: performing strategic exploration. We collect over 12,08712,087 human \nL26: demonstrations for the task, and train and evaluate a diverse range of agents \nL27: using reinforcement learning, imitation learning, and pre-trained image and \nL28: language models. Our best model achieves a task success rate of 1,6001,600, \nL29: which outperforms rule-based heuristics (29%29\\%) but is far lower than human \nL30: expert performance (9.6%9.6\\%). We also analyze agent and human trajectories and\nL31:  ablate various model components to provide insights for developing future \nL32: agents with stronger language understanding and decision making abilities. \nL33: Finally, we show that agents trained on WebShop exhibit non-trivial sim-to-real \nL34: transfer when evaluated on amazon.com and ebay.com , indicating the potential \nL35: value of WebShop in developing practical web-based agents that can operate in \nL36: the wild.59%59\\%\nL37: \nL38: ## 1 Introduction\nL39: \nL40: Recent advances in natural language processing (NLP) and reinforcement learning \nL41: (RL) have brought about several exciting developments in agents that can perform\nL42:  sequential decision making while making use of linguistic context [30, 50, 58].\nL43:  On the other hand, large-scale language models like GPT-3 [6] and BERT [11] are\nL44:  excelling at traditional NLP benchmarks such as text classification, \nL45: information extraction and question answering. While the former set of tasks are\nL46:  limited in their set of linguistic concepts and prove difficult to scale up, \nL47: the latter tasks usually contain static, non-interactive datasets that lack \nL48: adequate grounding to extra-linguistic concepts [4]. In order to make further \nL49: progress in building grounded language models, we believe there is a need for \nL50: scalable interactive environments that contain: (1) language elements that \nL51: reflect rich, real-world usage and are collectible at scale, and (2) task \nL52: feedback that is well-defined and automatically computable to facilitate \nL53: interactive learning, without the constant need for expensive feedback from \nL54: humans.\nL55: \nL56: The world wide web (WWW) is a massive open-domain interactive environment that \nL57: inherently satisfies the first aforementioned requirement through its \nL58: interconnected set of pages with natural text, images and interactive elements. \nL59: By being simultaneously scalable, semantic, interactive, dynamic and realistic, \nL60: the web is uniquely different from existing environments for autonomous agents \nL61: like games or 3D navigation. Moreover, the web also provides a practical \nL62: environment to deploy trained agents, with great potential for alleviating human\nL63:  efforts in tedious tasks (e.g. buying products, booking appointments). While \nL64: there has been prior work on building web-based tasks, they either lack depth in\nL65:  the transition and action spaces, or prove difficult to scale up. Some \nL66: benchmarks only contain either a single classification task [39, 46, 31] or \nL67: interactions containing only a handful of different pages in each episode [43]. \nL68: Others propose tasks with longer horizons but are either limited to following \nL69: hyperlinks for web navigation [36] or require human-in-the-loop feedback due to \nL70: the lack of an automated reward function [33].\nL71: \nL72: In this paper, we introduce WebShop (Figure 1) – a large-scale interactive web-\nL73: based environment for language understanding and decision making – and train \nL74: autonomous agents to complete tasks on this benchmark. With the goals of being \nL75: scalable and containing realistic language and visual elements, WebShop emulates\nL76:  the task of online shopping on an e-commerce website, where the agent’s goal is\nL77:  to understand a human-provided text instruction and purchase a product to match\nL78:  the specifications. To do so, the agent needs to query the website’s search \nL79: engine, choose items to explore from search results, open and read their \nL80: description and details, and select the necessary options (e.g. 32 oz., red \nL81: color) before clicking the ‘Buy’ button. In order to pick the optimal product \nL82: that matches user requirements, the agent may need to view and compare various \nL83: products (including backtracking between pages), and potentially perform \nL84: multiple searches. WebShop contains over one million products scraped from \nL85: amazon.com, over thousand crowdsourced instructions, and a diverse semantic \nL86: action space of searching text queries and choosing text buttons. It is packaged\nL87:  into a convenient OpenAI Gym [5] environment and can be rendered in two modes \nL88: (HTML or simple) with parallel observation spaces that are easy for human and \nL89: model respectively. Rewards are automatically computed using a combination of \nL90: programmatic matching functions that consider the attributes, type, options and \nL91: price of the chosen product, alleviating the need for human evaluation and \nL92: providing a path to scaling up interactive learning.1212\nL93: \nL94: We develop several agents to perform this task, using both reinforcement \nL95: learning (RL) and imitation learning (IL). We also leverage the latest pre-\nL96: trained language models [26, 11] for representing and generating text. Our \nL97: modular architecture includes a factorized processing of state observations and \nL98: action choices using ResNets (visual) and Transformers (text), followed by an \nL99: attention fusion layer that helps the agent contextually score each action. Our \nL100: best agent achieves an average score of (out of 62.462.4) and successfully \nL101: completes the task 100100 of the time, significantly higher than a heuristic \nL102: baseline that achieves 28.7%28.7\\% and 45.645.6, respectively. While this \nL103: demonstrates the potential for IL and RL, the agents are still much lower than \nL104: human experts, who can achieve 9.6%9.6\\% and 82.182.1 on this task.*** In our \nL105: analysis (§5.3), we observe that the task requires patience and consistency, \nL106: which is lacking in some crowdsource workers, leading to lower scores. Even with\nL107:  this caveat, the gap between human performance and the model remains \nL108: significant. We perform several analyses and ablation studies to identify the \nL109: cause of this gap and find several avenues for agent improvement in the future \nL110: including more robust search generation, explicit memory modules, and better \nL111: handling of noisy web text. Finally, we also demonstrate an instance of sim-to-\nL112: real transfer by deploying agents trained with WebShop to operate on amazon.com \nL113: and ebay.com, and find that they can achieve similar performances despite search\nL114:  engine and product differences, and consistently outperform the rule baseline \nL115: of using the first result returned by the commercial search engines when \nL116: directly searching the instruction texts. This demonstrates the practical \nL117: potential of our work towards developing agents that can operate autonomously on\nL118:  the world wide web (WWW).59.6%59.6\\%\nL119: \nL120: ## 2 Related Work\nL121: \nL122: Reinforcement learning on the web. Nogueira and Cho [36] introduced WikiNav as a",
+            "tool_call_id": "fixture-tool-call-30-11"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `http://alfworld.github.io/`",
+            "tool_call_id": "fixture-tool-call-30-12"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://osu-nlp-group.github.io/Mind2Web/`",
+            "tool_call_id": "fixture-tool-call-30-13"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://github.com/web-arena-x/visualwebarena`",
+            "tool_call_id": "fixture-tool-call-30-14"
+        },
+        {
+            "role": "tool",
+            "content": "[30]  (https://ar5iv.org/pdf/2406.12172)\n**viewing lines [0 - 127] of 1478**\n\nL0: \nL1: URL: https://ar5iv.org/pdf/2406.12172\nL2: # Navigating the Labyrinth: Evaluating and Enhancing LLMs’ Ability to Reason \nL3: About Search Problems\nL4: \nL5: Nasim Borazjanizadeh\nL6: \nL7: Berkeley AI Research, UC Berkeley\nL8: \\AndRoei Herzig\nL9: Berkeley AI Research, UC Berkeley\nL10: \\AndTrevor Darrell\nL11: Berkeley AI Research, UC Berkeley\nL12: \\AndRogerio Feris\nL13: MIT-IBM Watson AI Lab\nL14: \\AndLeonid Karlinsky\nL15: MIT-IBM Watson AI Lab\nL16: \nL17: ###### Abstract\nL18: \nL19: Recently, Large Language Models (LLMs) attained impressive performance in math \nL20: and reasoning benchmarks. However, they still often struggle with logic problems\nL21:  and puzzles that are relatively easy for humans. To further investigate this, \nL22: we introduce a new benchmark, SearchBench, containing 11 unique search problems,\nL23:  each equipped with automated pipelines to generate an arbitrary number of \nL24: instances and analyze the feasibility, correctness, and optimality of LLM-\nL25: generated solutions. We show that even the most advanced LLMs fail to solve \nL26: these problems end-to-end in text, e.g., GPT4 solves only 1.4%. SearchBench \nL27: problems require considering multiple pathways to the solution as well as \nL28: backtracking, posing a significant challenge to auto-regressive models. \nL29: Instructing LLMs to generate code that solves the problem helps, but only \nL30: slightly, e.g., GPT4’s performance rises to 11.7%. In this work, we show that \nL31: in-context learning with A* algorithm implementations enhances performance. The \nL32: full potential of this promoting approach emerges when combined with our \nL33: proposed Multi-Stage-Multi-Try method, which breaks down the algorithm \nL34: implementation into two stages and verifies the first stage against unit tests, \nL35: raising GPT-4’s performance above 57%.\nL36: \nL37: \\doparttoc\\faketableofcontents\nL38: \nL39: ### 1 Introduction\nL40: \nL41: The advent of Large Language Models (LLMs) has revolutionized the field of \nL42: natural language processing, with models like Gemini[18], GPT-4[26] \nL43: demonstrating unprecedented performance on reasoning tasks such as GSM8k[8]. \nL44: However, these models still exhibit surprising failures on some intuitive \nL45: tasks[2, 30, 22] and struggle with multi-step compositional reasoning, \nL46: combinatorial problems, and planning [9, 40, 44]. Inspired by these observations\nL47:  and to further investigate LLMs’ reasoning abilities, we offer a new benchmark \nL48: of search problems, SearchBench. The problems in SearchBench are combinatorial, \nL49: defined as tasks that involve finding an optimal object from a finite set of \nL50: objects, where the set of feasible solutions is either discrete or can be \nL51: reduced to a discrete set [43]. These problems are predominantly NP-hard and \nL52: necessitate systematic exploration of action paths and backtracking to \nL53: intermediate feasible states; thus, SearchBench implicitly investigates the \nL54: LLM’s capacity for non-linear reasoning.\nL55: \nL56: SearchBench has five distinct problem categories: (i) pathfinding, (ii) puzzles,\nL57:  (iii) subset sum, (iv) sorting, and (v) under-determined systems; further \nL58: divided into 11 unique problem types. Each problem type is inspired by known \nL59: puzzles and combinatorial problems but augmented with modified rules and \nL60: constraints to ensure substantial differences from similar problems LLMs \nL61: encountered during their training. And the solution to each problem is a \nL62: sequence of actions leading from the initial state to the goal state, while \nL63: optimizing a cost. We generate100 instances of varying difficulty per problem \nL64: type using an automatic pipeline, resulting in 1107 problem instances total. \nL65: Each problem type in SearchBench is equipped with an automatic pipeline that \nL66: evaluates LLM-generated solutions on three dimensions: feasibility, correctness,\nL67:  and optimality. Feasibility checks whether the actions taken follow the \nL68: problem’s rules; correctness verifies if a feasible solution reaches the goal \nL69: state; and optimality checks if the least cost solution was found.∼\\sim\nL70: \nL71: SearchBench is challenging to LLMs due to several factors. Firstly, natural \nL72: language is less suited for describing or updating accurate representations of \nL73: complex intermediate states. Secondly, our experiments show LLMs struggle with \nL74: exploring a combinatorial exponentially exploding state-space. Despite the fact \nL75: that some methods were developed for long-context reasoning [4, 13, 50], \nL76: SearchBench problems cannot be easily summarized [4], reasoned about [13], or \nL77: processed in parallel due to their size [50, 45]. Our findings show that even \nL78: the strongest LLMs [26] almost completely fail to solve SearchBench problems in \nL79: text-only mode.\nL80: \nL81: To provide further insights, we show that LLMs’ performance on SearchBench \nL82: improves by prompting the models to solve the problems using the A* search \nL83: algorithm [11]. A* is a heuristic-based graph traversal algorithm known for its \nL84: time efficiency and provable optimality guarantees, making it the most suitable \nL85: search algorithm for solving the problems in our benchmark. This method \nL86: leverages A*’s correctness and optimality, while offloading some of the non-\nL87: linear computations involved in searching the state-space to code execution. \nL88: Additionally, to improve the quality of generated A* codes, motivated that \nL89: ensembling helps generation quality[41, 47, 21], we introduce the Multi-Stage-\nL90: Multi-Try (MSMT) inference strategy. In the \"Multi-Try\" aspect of MSMT, before \nL91: evaluating the solution returned by the code, we first verify whether the code \nL92: generated by the model satisfies a set of unit tests: (i) it is executable; (ii)\nL93:  it returns a list as output; and (iii) data type of list elements is correct. \nL94: If the code fails any of the tests, MSMT re-runs the LLM until a valid code is \nL95: generated or allowed number of attempts is exhausted. The \"Multi-Stage\" aspect \nL96: of MSMT generates the code in two steps: (i) ‘A* Implementation’ - the \nL97: implementation of an instance-agnostic A* algorithm for the problem type; and \nL98: (ii) Initialization - the instantiation of initial conditions and state \nL99: variables of the problem instance. In MSMT ’Initialization’ is generated \nL100: conditioned on the ‘A* Implementation’ (which is generated separately first and \nL101: provided in ‘Intitialization’ prompt). We demonstrate that our MSMT A* method \nL102: (Fig. 2) significantly enhances the LLMs’ ability to solve search problems, \nL103: outperforming all other prompting strategies we used to evaluate models on \nL104: SearchBench, including 0-shot text, 4-shot Chain-of-Thought (CoT)[42] text, \nL105: 0-shot code generation, and 4-shot A* prompting with the naive greedy decoding \nL106: strategy.\nL107: \nL108: To summarize, our main contributions are as follows: (i) We contribute the \nL109: SearchBench benchmark designed to assess the capability of LLMs in solving \nL110: state-based problems requiring combinatorial search; (ii) We introduce the A* \nL111: Multi-Stage-Multi-Try (MSMT) framework that leverages the flexibility and \nL112: natural language comprehension capabilities of LLMs, reliability and structured \nL113: nature of the A* algorithm, along with unitests to verify LLMs’ code generation \nL114: in order to address search problems. (iii) We show a strong advantage of MSMT A*\nL115:  prompting for solving search problems robustly with LLMs, suggesting \nL116: interesting future work directions and yet leaving significant room for future \nL117: improvement on SearchBench.\nL118: \nL119: ### 2 SearchBench Benchmark\nL120: \nL121: SearchBench includes five categories of problems: puzzles, subset sum, sorting, \nL122: pathfinding, and under-determined systems, further subdivided into 11 problem \nL123: types and instantiated as 1107 natural language problem instances. Each problem \nL124: type requires a different algorithmic approach to solve. Our problems are \nL125: inspired by well-known (mostly NP-hard) puzzles and search problems, though \nL126: their rules were modified to ensure a solved instance of a problem similar to \nL127: any SearchBench problem was not observed during LLMs’ massive internet-scale ",
+            "tool_call_id": "fixture-tool-call-30-15"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://ar5iv.labs.ar5iv.org/html/2312.15820`",
+            "tool_call_id": "fixture-tool-call-30-16"
+        },
+        {
+            "role": "tool",
+            "content": "Error fetching URL `https://github.com/jxwuyi/WebNav`",
+            "tool_call_id": "fixture-tool-call-30-17"
+        },
+        {
+            "role": "assistant",
+            "content": "针对搜索智能体（Search Agent）的评估，已涌现出多个侧重点不同的基准测试。以下将分类介绍这些基准，涵盖其核心特点、典型使用场景及例题。\n\n---\n\n## 🔍 1. 网页浏览/导航类基准\n这类基准要求智能体在真实或模拟的网页环境中执行多步操作，以完成复杂任务。\n\n| 基准 | 核心特点 | 使用场景 | 例题 |\n|------|----------|----------|------|\n| **BrowseComp**（OpenAI）【3†L16-L35】 | - 1,266 个挑战性问题，需持续导航、查找难以直接搜索的信息<br>- 问题设计保证人类在 10 分钟内无法解决，且现有模型（包括带浏览的 ChatGPT）也难以完成<br>- 答案为短字符串，便于自动验证 | 评估智能体在复杂网络浏览任务中的持久性、创造性搜索能力 | “找出 2018‑2023 年间在 EMNLP 会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”【3†L45-L49】 |\n| **WebVoyager**【24†L18-L33】 | - 多模态（视觉+文本）端到端网页智能体基准<br>- 包含 300 个真实任务，覆盖 15 个常用网站（如 Amazon、Wikipedia）<br>- 使用 GPT‑4V 进行自动评估，与人工评估一致性达 85.3% | 评估智能体在真实网站上的端到端任务完成能力（多模态输入） | “在 Amazon 上找到‘无线蓝牙耳机’并按价格从低到高排序，然后选择最便宜的一款加入购物车” |\n| **WebArena**【25†L15-L35】 | - 完全可交互的、高度真实的网页环境（4 个领域：电商、论坛、协作开发、内容管理）<br>- 812 项长视野任务，评估功能正确性而非表面动作匹配<br>- 最佳 GPT‑4 智能体成功率仅 14.41%，远低于人类的 78.24% | 评估自主智能体在真实网页环境中的复杂任务执行能力 | “在电商网站中购买一双‘黑色运动鞋’，价格低于 $80，并将收货地址修改为纽约”【25†L86-L92】 |\n| **Mind2Web**【19†L4-L14】 | - 首个面向通用网页智能体的数据集，任务来自真实网站<br>- 支持在线评估（Online‑Mind2Web），包含 300 个实时任务、136 个流行网站<br>- 强调多步指令跟随与复杂交互 | 评估智能体在任意网站上的通用任务完成能力 | “在 GitHub 上创建一个名为 ‘test‑repo’ 的公开仓库，并添加一个 README.md 文件” |\n| **VisualWebArena**【20†L4-L10】 | - 多模态网页智能体基准，任务依赖视觉信息（截图）<br>- 包含丰富的视觉接地任务，如识别图片、按钮位置等<br>- 评估智能体在真实视觉网页环境中的表现 | 评估多模态智能体在视觉丰富的网页任务中的能力 | “在纽约时报网站找到今天头条新闻的配图，并描述图中主要物体” |\n| **WebVLN**【22†L4-L11】 | - 网站上的视觉‑语言导航任务，智能体需根据问题指令在网站中导航<br>- 数据集 WebVLN‑v1 包含多种网站导航场景<br>- 评估导航成功率与任务完成度 | 评估智能体在网站中基于视觉与语言指令的导航能力 | “找到‘联系我们’页面，并提取其中的邮箱地址” |\n| **WebNav**【23†L4-L11】 | - 目标驱动的网页导航基准，环境为部分观测<br>- 智能体需理解自然语言指令并在网页中规划动作序列<br>- 早期经典任务，常用于测试基础导航能力 | 评估智能体在部分观测网页环境中的规划与指令跟随能力 | “导航到 Wikipedia 上关于‘人工智能’的页面，并摘录第一段内容” |\n\n---\n\n## 🛠️ 2. 工具使用/通用助手类基准\n这类基准测试智能体调用外部工具（如搜索、计算、API）来解决实际问题的能力。\n\n| 基准 | 核心特点 | 使用场景 | 例题 |\n|------|----------|----------|------|\n| **GAIA**【26†L10-L25】 | - 466 个真实世界问题，需要推理、多模态处理、网页浏览、工具使用等综合能力<br>- 对人类简单（92% 准确率），但对最强 GPT‑4 仅 15%<br>- 答案唯一、事实性，支持自动评估 | 评估通用 AI 助手在需要多步工具使用的真实任务中的表现 | “2022 年法国总统选举的获胜者出生在哪个城市？”【26†L10-L25】 |\n| **ToolBench**（OpenBMB）【7†L4-L10】 | - 包含多样软件工具的真实任务基准<br>- 提供易用的基础设施（虚拟 API 服务器等）<br>- 评估 LLM 调用工具完成复杂任务的能力 | 评估智能体在工具调用与组合上的熟练度 | “使用计算器 API 计算 (3.14 * 7.2)^2，然后使用搜索 API 查找‘圆周率’的最新精确值” |\n| **HSCodeComp**【2†L16-L35】 | - 首个针对深度搜索智能体在分层规则应用上的专家级电商基准<br>- 任务：根据嘈杂的商品描述预测 10 位 HS 编码（关税规则）<br>- 632 个商品条目，标注由多位电商专家完成，最佳智能体准确率仅 46.8%（人类 95%） | 评估智能体在复杂规则（如关税分类）下的深层推理与搜索能力 | “给定商品描述‘男士纯棉针织T恤，领口为V领’，预测其 10 位 HS 编码”【2†L24-L32】 |\n\n---\n\n## ❓ 3. 问答/检索类基准\n这类基准主要评估智能体在开放域或特定领域的信息检索与问答能力。\n\n| 基准 | 核心特点 | 使用场景 | 例题 |\n|------|----------|----------|------|\n| **HotpotQA**【8†L4-L10】 | - 113k 个基于 Wikipedia 的多跳问答对<br>- 需要跨多个文档推理，并提供句子级支持事实<br>- 涵盖多样的问题类型（比较、列举、因果等） | 评估模型在需要多跳推理的开放域问答中的能力 | “《了不起的盖茨比》的作者还写过哪些小说？” |\n| **FEVER**【27†L16-L28】 | - 185,445 个基于 Wikipedia 的声明，需分类为 Supported/Refuted/NotEnoughInfo<br>- 要求提供证据句子（可多句、多页面）<br>- 挑战性高（最佳系统仅 31.87% 准确率） | 评估系统在事实核查与证据检索上的能力 | “斐济最大的岛屿是考艾岛。”【27†L48-L50】 |\n| **TriviaQA**【28†L5-L10】 | - 超过 650k 个问答‑证据三元组，问题由琐事爱好者编写<br>- 每个问题平均提供 6 篇证据文档，适合远程监督阅读<br>- 包含阅读理解和开放域 QA 两种任务设置 | 评估模型在开放域琐事问答中的检索与阅读理解能力 | “哪位演员在《星球大战：原力觉醒》中扮演凯洛·伦？” |\n| **Natural Questions**【11†L4-L8】 | - 来自 Google 搜索的真实用户问题，答案来自 Wikipedia<br>- 包含长答案（段落）和短答案（实体/日期等）<br>- 训练集 307k，开发/测试集各 8k | 评估开放域问答系统对真实用户查询的响应能力 | “谁写了《傲慢与偏见》？”（答案：“简·奥斯汀”） |\n| **MS MARCO**【12†L4-L10】 | - 基于 Bing 真实搜索查询的检索/问答基准<br>- 包含段落排序、文档排序、问答等任务<br>- 数据规模大（约 500k 查询，880 万段落） | 训练与评估检索模型在真实搜索场景下的表现 | “如何冲泡咖啡？”（系统需检索相关段落回答） |\n| **BEIR**【13†L4-L10】 | - 异构零样本检索基准，包含 18 个数据集、9 种任务（事实核查、QA、对话检索等）<br>- 评估模型在未见任务上的泛化能力<br>- 已成为检索模型的标准评估套件 | 评估检索模型在零样本设置下的跨任务泛化能力 | 给定查询“全球变暖的主要原因”，在文档集合中检索相关文档 |\n| **MIRACL**【14†L4-L10】 | - 多语言检索基准，覆盖 18 种语言（包括中文、阿拉伯语等）<br>- 专注于 ad‑hoc 检索，所有查询与文档均经过人工标注<br>- 提供多语言检索挑战（WSDM 2023 Cup） | 评估多语言检索模型在不同语言下的检索效果 | 中文查询：“全球变暖的原因”，检索相关中文文档 |\n\n---\n\n## 🧩 4. 组合搜索/推理类基准\n这类基准专门测试智能体在组合搜索问题上的逻辑推理与规划能力。\n\n| 基准 | 核心特点 | 使用场景 | 例题 |\n|------|----------|----------|------|\n| **SearchBench**【30†L22-L36】 | - 11 种独特的搜索问题类型（路径查找、谜题、子集和、排序、欠定系统等）<br>- 自动生成任意数量实例，并评估解决方案的可行性、正确性、最优性<br>- GPT‑4 在纯文本模式下仅解决 1.4%，通过 A* 代码生成提升至 11.7% | 评估 LLM 在组合搜索问题上的推理与规划能力 | “在一个 5x5 迷宫中，从起点 (0,0) 到终点 (4,4) 的最短路径是什么？（障碍物位置已知）”【30†L56-L64】 |\n\n---\n\n## 🛒 5. 特定领域/仿真环境基准\n这类基准针对特定领域（如电商、家庭环境）设计，测试智能体在仿真环境中的交互能力。\n\n| 基准 | 核心特点 | 使用场景 | 例题 |\n|------|----------|----------|------|\n| **WebShop**【29†L15-L30】 | - 模拟电商网站环境，包含 118 万真实商品、12,087 条众包指令<br>- 智能体需根据文本指令搜索、筛选、购买商品<br>- 奖励自动计算，最佳模型成功率 29%（人类 59%） | 评估语言智能体在电商任务中的导航、搜索与决策能力 | “找到一款价格低于 $50 的无线蓝牙耳机，评分至少 4 星，并加入购物车”【29†L20-L25】 |\n| **ALFWorld**【18†L4-L10】 | - 将文本环境（TextWorld）与具身环境（ALFRED）对齐的基准<br>- 智能体先在文本中学习抽象策略，然后在视觉环境中执行<br>- 包含 134 个家庭任务（如“在厨房找到杯子并拿到客厅”） | 评估智能体在文本与具身环境之间的迁移学习与规划能力 | “在厨房找到‘杯子’，把它拿到客厅放在桌子上” |\n\n---\n\n## 📊 总结\n以上基准覆盖了搜索智能体的不同能力维度：从**网页浏览、工具使用、问答检索**，到**组合搜索推理**以及**特定领域仿真**。选择合适的基准取决于您要评估的具体能力（如多模态交互、工具调用、多跳推理等）。这些基准大多提供公开的数据集与评估代码，可直接用于实验。\n\n> 注：本文基于 2025 年 11 月前的公开资料整理，部分基准可能已有更新版本，建议访问对应官网获取最新信息。",
+            "reasoning_content": "在同时打开多个基准测试相关页面后，我们获得了大量信息。WebVoyager 是一个多模态网页智能体基准测试，专注于在真实网站上进行端到端任务执行，其特点包括多模态输入（屏幕截图和文本）、在15个常用网站上的300个任务，以及使用GPT-4V进行自动评估。使用场景是评估网页智能体在真实世界中的导航和任务完成能力。例题可能是在亚马逊上找到特定产品并将其加入购物车。\n\nWebArena 是一个真实且可复现的网页环境，用于评估自主智能体。其特点包括四个功能完整的网站（电子商务、论坛、协作开发、内容管理）、812项长视野任务，以及专注于功能正确性的评估。使用场景是评估智能体在复杂网页任务中的表现。例题可能是在电子商务网站上购买符合特定标准的商品。\n\nGAIA 是一个针对通用AI助手的基准测试，要求具备推理、多模态处理、网页浏览和工具使用等能力。其特点包括466个问题，对人类来说概念简单但对AI具有挑战性，答案简洁明确，支持自动评估。使用场景是评估AI助手在真实世界问题中的表现。例题可能是“找出2022年法国总统选举中获胜者的出生城市”。\n\nToolBench 是一个用于评估LLM工具操作能力的基准测试。其特点包括多样化的软件工具、真实世界任务，以及易于使用的基础设施。使用场景是评估智能体使用工具完成任务的能力。例题可能是使用计算器API计算表达式，或使用搜索API查找信息。\n\nHotpotQA 是一个多跳问答基准测试，其特点包括基于Wikipedia的113k个问题-答案对，需要多文档推理，并提供句子级别的支持事实。使用场景是评估模型在回答需要多跳推理的问题时的能力。例题可能是“《了不起的盖茨比》的作者还写过哪些小说？”。\n\nFEVER 是一个事实提取和验证基准测试，其特点包括185,445个基于Wikipedia的声明，需要分类为支持、反驳或信息不足，并需要提供证据句子。使用场景是评估系统验证声称事实的能力。例题可能是“斐济最大的岛屿是考艾岛”，系统需要检索证据来支持或反驳这一说法。\n\nTriviaQA 是一个阅读理解基准测试，其特点包括超过65万个问题-答案-证据三元组，问题由琐事爱好者编写，并附带证据文档。使用场景是评估模型在开放域问答中的能力。例题可能是“哪位演员在《星球大战：原力觉醒》中扮演凯洛·伦？”。\n\nNatural Questions 是一个问答基准测试，其特点包括来自谷歌搜索的真实问题，答案来自Wikipedia，并包含长答案和短答案。使用场景是评估开放域问答系统。例题可能是“谁写了《傲慢与偏见》？”，答案可能是“简·奥斯汀”。\n\nMS MARCO 是一个用于深度学习搜索的基准测试，其特点包括来自Bing的真实搜索查询、人工生成的答案，以及用于段落排序和文档排序的任务。使用场景是训练和评估检索模型。例题可能是“如何冲泡咖啡？”，系统需要检索相关段落来回答。\n\nBEIR 是一个用于零样本评估的异构信息检索基准测试，其特点包括18个数据集、9项任务，涵盖事实核查、问答、对话检索等。使用场景是评估检索模型在零样本设置下的泛化能力。例题可能是给定一个查询，在文档集合中检索相关文档。\n\nMIRACL 是一个多语言信息检索基准测试，其特点涵盖18种语言，专注于ad-hoc检索，并包含人工标注的相关性判断。使用场景是评估多语言检索模型。例题可能是用中文查询“全球变暖的原因”，并检索相关文档。\n\nWebShop 是一个模拟电子商务网站环境，用于评估语言智能体。其特点包括118万个真实产品、12,087条众包文本指令，以及需要导航、搜索和购买的任务。使用场景是评估智能体在电子商务任务中的表现。例题可能是“找到一款价格低于50美元的无线蓝牙耳机”。\n\nALFWorld 是一个将文本和具身环境对齐的基准测试，用于在文本世界中训练智能体，然后将其转移到具身环境中。其特点包括基于文本的环境，与ALFRED数据集并行，并专注于指令跟随。使用场景是评估智能体在家庭任务中的规划能力。例题可能是“在厨房里找到一个杯子，把它拿到客厅”。\n\nMind2Web 是一个用于评估网页通用智能体的基准测试，其特点包括来自真实网站的任务、需要多步交互，并专注于语言指令跟随。使用场景是评估智能体在复杂网页任务中的表现。例题可能是“在GitHub上创建一个新的仓库，命名为‘test-repo’”。\n\nVisualWebArena 是一个用于多模态智能体的基准测试，其特点包括真实视觉网页任务、需要理解视觉和文本元素，并评估功能正确性。使用场景是评估多模态智能体在视觉接地网页任务中的表现。例题可能是“在纽约时报网站上找到今天头条新闻的图片”。\n\nSearchBench 是一个用于评估LLM解决搜索问题能力的基准测试，其特点包括11种搜索问题类型、自动生成实例，并评估可行性、正确性和最优性。使用场景是评估LLM在组合搜索问题中的推理能力。例题可能是“在一个迷宫中找到从起点到终点的最短路径”。\n\nWebVLN 是一个网站上的视觉语言导航基准测试，其特点包括基于问题的指令、需要导航网站，并评估导航成功率。使用场景是评估智能体在网站中基于视觉和语言指令进行导航的能力。例题可能是“找到‘联系我们’页面并提取电子邮件地址”。\n\nWebNav 是一个用于目标驱动网页导航的基准测试，其特点包括部分观察环境、需要规划和理解自然语言指令。使用场景是评估智能体在网页导航中的能力。例题可能是“导航到维基百科上关于‘人工智能’的页面”。\n\n现在，我需要整理一份针对搜索智能体的基准测试列表。考虑到用户可能更关注中文资料，我会优先考虑那些有中文描述的基准测试，例如BrowseComp、HSCodeComp、GAIA、WebVoyager、WebArena、Mind2Web、WebShop、ALFWorld、HotpotQA、FEVER、TriviaQA、Natural Questions、MS MARCO、BEIR、MIRACL、SearchBench、WebVLN、WebNav。我会为每个基准测试提供特点、使用场景和例题。同时，我会引用相关来源，例如HSCodeComp论文、BrowseComp博客文章、WebVoyager论文、WebArena论文、GAIA论文、WebShop论文、SearchBench论文等。我的回答将用中文呈现，并确保内容全面、结构清晰。现在开始撰写回答。",
+            "tool_calls": []
+        }
+    ]
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_wo_date.json b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_wo_date.json
new file mode 100644
index 000000000000..f05891138c09
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_input_search_wo_date.json
@@ -0,0 +1,533 @@
+{
+    "messages": [
+        {
+            "role": "developer",
+            "content": "In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.",
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "description": "Searches for information related to query and displays topn results.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "The search query string"
+                                },
+                                "topn": {
+                                    "type": "integer",
+                                    "description": "Number of top results to display",
+                                    "default": 10
+                                },
+                                "source": {
+                                    "type": "string",
+                                    "description": "Source to search within",
+                                    "enum": [
+                                        "web",
+                                        "news"
+                                    ],
+                                    "default": "web"
+                                }
+                            },
+                            "required": [
+                                "query"
+                            ],
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "description": "Opens the link id from the page indicated by cursor starting at line number loc, showing num_lines lines. Valid link ids are displayed with the formatting: 【{id}†.*】. If cursor is not provided, the most recent page is implied. If id is a string, it is treated as a fully qualified URL associated with source. If loc is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available. Use this function without id to scroll to a new location of an opened page.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "description": "Link ID (number) or fully qualified URL (string)",
+                                    "anyOf": [
+                                        {
+                                            "type": "integer"
+                                        },
+                                        {
+                                            "type": "string"
+                                        }
+                                    ],
+                                    "default": -1
+                                },
+                                "cursor": {
+                                    "type": "integer",
+                                    "description": "Page cursor indicator",
+                                    "default": -1
+                                },
+                                "loc": {
+                                    "type": "integer",
+                                    "description": "Starting line number",
+                                    "default": -1
+                                },
+                                "num_lines": {
+                                    "type": "integer",
+                                    "description": "Number of lines to show",
+                                    "default": -1
+                                },
+                                "view_source": {
+                                    "type": "boolean",
+                                    "description": "Whether to view source",
+                                    "default": false
+                                },
+                                "source": {
+                                    "type": "string",
+                                    "description": "Source associated with the URL",
+                                    "enum": [
+                                        "web",
+                                        "news"
+                                    ],
+                                    "default": "web"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "description": "Finds exact matches of pattern in the current page, or the page given by cursor.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "pattern": {
+                                    "type": "string",
+                                    "description": "The exact pattern to search for"
+                                },
+                                "cursor": {
+                                    "type": "integer",
+                                    "description": "Page cursor to search in",
+                                    "default": -1
+                                }
+                            },
+                            "required": [
+                                "pattern"
+                            ],
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "python",
+                        "description": "Use this tool to execute Python code. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files). When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "code": {
+                                    "type": "string",
+                                    "description": "The python code to execute in the jupyter environment."
+                                }
+                            },
+                            "required": [
+                                "code"
+                            ],
+                            "additionalProperties": false,
+                            "$schema": "http://json-schema.org/draft-07/schema#"
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "The user asks: \"In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.\"\n\nWe need to parse the puzzle.\n\nWe have clues about a press release about vaccine doses for a country, dated between Dec 1-15 inclusive, reporting as of 5th of that month total doses received 16,201,670 and administered 7,583,134. That is likely a COVID-19 vaccine update for some country. Which country? Might be a smaller country. Could be something like \"Uganda\", \"Zimbabwe\", \"Kenya\", \"Philippines\", etc. Let's search.\n\nSearch for \"16,201,670 vaccine doses\" \"7,583,134 administered\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-1-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"\\\"16,201,670\\\" vaccine doses\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[0] \"16,201,670\" vaccine doses (Search_Results/\"16,201,670\" vaccine doses)\n**viewing lines [0 - 38] of 38**\n\nL0: \nL1: URL: Search_Results/\"16,201,670\" vaccine doses\nL2: # Search Results\nL3: \nL4:   * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: \nL5: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had \nL6: received a total of 16,201,670 vaccines, with 7,583,134 administered. While \nL7: vaccine acceptance is ...\nL8:   * 【1†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec \nL9: 1, 2021 — Total Doses Received 16,201,670. Table 10 gives the total vaccines \nL10: received since the start of Covid -19 vaccination exercise in the country.\nL11:   * 【2†Output Result Page; publish_date: none†open.unicef.org】 ... 16,201,670 \nL12: doses of multiple vaccines nationwide and full vaccination of 15.5 per cent with\nL13:  two doses of COVID-19 vaccine as of 31 December 2021.\nL14:   * 【3†rebased GDP; publish_date: none†documents1.worldbank.org】 Dec 7, 2021 — \nL15: As of December 5, 2021,. Kenya had received a total of 16,201,670 vaccines, \nL16: with. 7,583,134 administered. Vaccine acceptance is reportedly high.\nL17:   * 【4†Integrated Annual Report; publish_date: none†www.co-opbank.co.ke】 May 27,\nL18:  2022 — ... doses of Covid-19 vaccines and administered close to 17 million ... \nL19: 16,201,670, huku 7,583,134 zikiwa tayari zimedungwa watu. Bado kuna ...\nL20:   * 【5†World Bank lifts Kenya's growth prospect to 5% in 2021; publish_date: \nL21: none†www.africa-press.net】 ... 16,201,670 vaccines, with 7,583,134 administered.\nL22:  While vaccine acceptance is reportedly high there is still a long way to go \nL23: towards the government's ...\nL24:   * 【6†2020 Annual Report United States; publish_date: none†www.hi-us.org】 \nL25: including medicine, vaccines, Covid-19 prevention kits, and kits of ... \nL26: 16,201,670. 9,630,584. Prepaid expenses. 56,707. 22,601. Total current assets.\nL27:   * 【7†HOSPITAL CENTRAL DE LA DEFENSA; publish_date: \nL28: none†contrataciondelestado.es】 16201670 X. URPA-URPA. 0. HCD-000393/00. KONTRON.\nL29:  7000 ... LASER SHOT LBP-1120. CNEUR-C. NEUROFISIOLOGÍA. 0 ... VICTOREEN CT HEAD\nL30:  DOSE PHANTON. 43650-014.\nL31:   * 【8†body metal concentrations: Topics by ...; publish_date: \nL32: none†www.science.gov】 PMID:16201670. Design Characteristics and Tobacco Metal \nL33: Concentrations in Filtered Cigars. PubMed. Caruso, Rosalie V; O'Connor, Richard \nL34: J; Travers, Mark J ...\nL35:   * 【9†Public Offices, Personal Demands: Capability in ...; publish_date: \nL36: none†dokumen.pub】 ... (16201670).9 The Institutiones Politicae consists of two \nL37: books. Book one deals with the origin, nature and goal of the commonwealth on \nL38: the one hand, and the ...",
+            "tool_call_id": "fixture-tool-call-1-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Result shows Kenya. So the press release is about Kenya's vaccine doses as of Dec 5, 2021. The press release likely by World Bank or something. The search result shows \"World Bank lifts Kenya's growth prospect...\" maybe a press release dated Dec 14, 2021. But the clue says \"between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered.\" That matches Kenya on Dec 5, 2021. So the year is 2021.\n\nNow the next clue: \"That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services.\"\n\nThis sounds like an environmental or conservation organization. \"Nearly 2.2 million members and supporters contributed over $49 million.\" That might be something like \"The Nature Conservancy\", \"World Wildlife Fund\", \"Sierra Club\", \"Environmental Defense Fund\", \"Natural Resources Defense Council\", etc. Let's search.\n\nSearch for \"nearly 2.2 million members and supporters contributed over $49 million\" \"12% of total revenue coming from contributed services\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-3-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"\\\"nearly 2.2 million members\\\" \\\"contributed over $49 million\\\"\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[1] \"nearly 2.2 million members\" \"contributed over $49 million\" (Search_Results/\"nearly 2.2 million members\" \"contributed over $49 million\")\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/\"nearly 2.2 million members\" \"contributed over $49 million\"\nL2: # Search Results\nL3: \nL4:   * 【0†The Global Good Fund, Microsoft and Goodwill join forces ...; \nL5: publish_date: none†globalgoodfund.org】 Sep 28, 2022 — ... more than 2.2 million \nL6: people. They additionally contribute close to $49 billion in local, state and \nL7: federal tax revenues. When COVID hit ...\nL8:   * 【1†Almost 22 billion American tax dollars spent to wipe out a ...; \nL9: publish_date: none†www.facebook.com】 US military funding for Israel's war crimes\nL10:  in Lebanon and Gaza has now cost US taxpayers over $22 billion. When millions \nL11: struggle to afford the ...\nL12:   * 【2†Corporate America has largely abandoned its post-January ...; \nL13: publish_date: none†www.citizensforethics.org】 Jul 29, 2025 — Since the January 6\nL14:  insurrection, over 2,000 corporate and industry group PACs have given over $174\nL15:  million to members of the Sedition ...\nL16:   * 【3†Audit shows millions in questionable taxpayer spending at ...; \nL17: publish_date: none†www.aol.com】 18 hours ago — ... nearly doubled from 1.3 \nL18: million to about 2.2 million. That is more than one in four Washington state \nL19: residents receiving Medicaid, and the ...\nL20:   * 【4†Incarceration and Poverty in the United States - AAF; publish_date: \nL21: none†www.americanactionforum.org】 Jun 30, 2020 — The United States currently \nL22: incarcerates 2.2 million people, nearly half of whom are non-violent drug \nL23: offenders, accused people held pre ...\nL24:   * 【5†How the NRA Spends Revenue (2023); publish_date: none†paddockpost.com】 \nL25: Apr 25, 2025 — In 2023, the NRA reported total revenue of $176 million (compared\nL26:  to $211 million in 2022, $227 million in 2021, $282 million in 2020, $292 ...\nL27:   * 【6†NCA Newsroom; publish_date: none†www.ncausa.org】 ... 2.2 million U.S. \nL28: jobs—operating in every U.S. state and territory and contributing nearly $350 \nL29: billion to the U.S. economy every year. For media inquiries ...\nL30:   * 【7†The Early Childhood Educator Workforce; publish_date: \nL31: none†cscce.berkeley.edu】 About 2.2 million adults are paid to care for and \nL32: educate more than 9.7 million children between birth and age five in center-\nL33: based and home-based settings.\nL34:   * 【8†Bringing Out the Worst in Congress: CRA By the Numbers ...; publish_date:\nL35:  none†progressivereform.org】 Jul 14, 2025 — In fact, the top ten recipients (out\nL36:  of 25) account for almost $7 million, showing how these contributions are \nL37: targeted and strategic. Real ...\nL38:   * 【9†Club 100: Over 100 Americans gave more than $100000 to ...; publish_date:\nL39:  none†publicintegrity.org】 Jan 7, 2022 — Through Internet investments during the\nL40:  1990s, he attracted 2.2 million online accounts valued at $174 million. Schwab \nL41: has been a vocal ...",
+            "tool_call_id": "fixture-tool-call-3-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Not that. Try search for \"2.2 million members and supporters contributed over $49 million\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-5-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"\\\"2.2 million members\\\" \\\"contributed over $49\\\"\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[2] \"2.2 million members\" \"contributed over $49\" (Search_Results/\"2.2 million members\" \"contributed over $49\")\n**viewing lines [0 - 2] of 2**\n\nL0: \nL1: URL: Search_Results/\"2.2 million members\" \"contributed over $49\"\nL2: # Search Results",
+            "tool_call_id": "fixture-tool-call-5-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Maybe it's an annual report of a non-profit. Search for \"annual report 2.2 million members contributed $49 million\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-7-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"\\\"2.2 million members\\\" \\\"annual report\\\" $49 million\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[3] \"2.2 million members\" \"annual report\" $49 million (Search_Results/\"2.2 million members\" \"annual report\" $49 million)\n**viewing lines [0 - 36] of 36**\n\nL0: \nL1: URL: Search_Results/\"2.2 million members\" \"annual report\" $49 million\nL2: # Search Results\nL3: \nL4:   * 【0†20-F; publish_date: none†www.sec.gov】 ANNUAL REPORT PURSUANT TO SECTION \nL5: ... Our membership grew from 2.1 million members as at December 31, 2023 to 2.2 \nL6: million members as at December 31, 2024.\nL7:   * 【1†Oportun Reports Fourth Quarter and Full Year 2023 ...; publish_date: \nL8: none†investor.oportun.com】 Mar 12, 2024 — Oportun (Nasdaq: OPRT) is a mission-\nL9: driven fintech that puts its 2.2 million members' financial goals within reach. \nL10: ... annual report on ...\nL11:   * 【2†2 0 21; publish_date: none†www.annualreports.com】 ANNUAL REPORT. 2. 0. \nL12: 21. 2. 0. 21. Page 2. 2. DEFENDERS OF WILDLIFE. 2. 0. 21. 2. 0. 21 ... In 2021, \nL13: Defenders of Wildlife's nearly 2.2 million members and.\nL14:   * 【3†Annual report and accounts 2020; publish_date: none†www.3i.com】 \nL15: Disclaimer. The Annual report and accounts have been prepared solely to provide \nL16: information to shareholders. ... 2.2 million members. In December 2019, we sold \nL17: ...\nL18:   * 【4†united states securities and exchange commission; publish_date: \nL19: none†s21.q4cdn.com】 Dec 14, 2018 — ☒ ANNUAL REPORT ... approximately 5.7 million\nL20:  Members, including approximately 2.2 million Members from the acquisition of \nL21: Best Doctors.\nL22:   * 【5†securities and exchange commission; publish_date: none†www.sec.gov】 ... \nL23: Annual Report on Form 10-K filed with the SEC on March 10, 2020. The ... 2.2 \nL24: million members at June 30, 2020. The following table highlights the ...\nL25:   * 【6†2022-23 Annual Comprehensive Financial Report - CalPERS; publish_date: \nL26: none†www.calpers.ca.gov】 Nov 17, 2023 — ... annual report is consistent with \nL27: these accounting principles ... 2.2 million members and health benefit services \nL28: to over 1.5 million ...\nL29:   * 【7†Medical Centers Report; publish_date: \nL30: none†regents.universityofcalifornia.edu】 Oct 8, 2019 — which will allow its 2.2 \nL31: million members to receive advanced and ... annual report of the University of \nL32: California. For the Medical ...\nL33:   * 【8†Working TogeTher to make HealtH care better; publish_date: \nL34: none†www.unitedhealthgroup.com】 Feb 8, 2012 — ANNUAL REPORT PURSUANT TO SECTION \nL35: 13 OR 15(d) OF THE ... approximately 2.2 million members enrolled in its \nL36: Medicare. Advantage ...",
+            "tool_call_id": "fixture-tool-call-7-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Result 2 shows \"Defenders of Wildlife\". That might be the organization. Let's open.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-9-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 3, \"id\": 2}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[4] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [0 - 53] of 261**\n\nL0: \nL1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-\nL2: wildlife_2021.pdf\nL3: ===== Page 1 ===== 2021 ANNUAL REPORT ===== Page 2 ===== # DEFENDERS OF WILDLIFE\nL4:  made important progress for imperiled species and vital landscapes across the \nL5: United States in 2021. \\--- **LAWYERS** immediately **challenged** the premature\nL6:  and reckless decision to strip **gray wolves** of federal **Endangered Species \nL7: Act (ESA)** protections. For many decades, Defenders has led the effort to \nL8: protect and restore the gray wolf, and we will continue to fight the \nL9: unscientific and hostile anti-wolf policies that impede conservation progress \nL10: and will carry on our unrelenting battle to restore federal protections for this\nL11:  iconic keystone species. \\--- **LOBBYISTS** worked around the clock to keep \nL12: wildlife and climate priorities in the **Infrastructure Investment and Jobs \nL13: Act**. We also continue fighting to keep important wildlife and habitat funding \nL14: in relevant **appropriations bills**. \\--- 2 DEFENDERS OF WILDLIFE ===== Page 3 \nL15: ===== POLICY EXPERTS pushed forward on the urgent need for a National \nL16: Biodiversity Strategy (NBS), an all-of-government approach to address the \nL17: unprecedented loss of wildlife and habitat we are experiencing. We have coupled \nL18: this with our new campaign to expand the National Wildlife Refuge System to \nL19: preserve our nation’s only lands set aside for wildlife. By defending, funding \nL20: and expanding our national wildlife refuges, we will directly address \nL21: biodiversity loss and climate change while promoting increased equitable access \nL22: to nature. FIELD TEAMS were on the ground helping to recover imperiled species. \nL23: From panthers and sea turtles in Florida to wolves, bison and black-footed \nL24: ferrets in Montana, Defenders’ conservation experts were in the field saving \nL25: wildlife all over the country. CONSERVATION INNOVATION EXPERTS provided \nL26: comprehensive analyses to guide policy and inform conservation strategies to \nL27: reach the goal of protecting 30% of our terrestrial and marine systems by 2030 \nL28: (“30x30”). Defenders’ Center for Conservation Innovation (CCI) produced a report\nL29:  which details actions we need to take to achieve 30x30 while protecting \nL30: biodiversity and addressing the climate crisis. DEFENDERS.ORG ===== Page 4 =====\nL31:  WE HAVE ACCOMPLISHED MUCH THIS YEAR WORKING WITH AN ADMINISTRATION THAT VALUES \nL32: SCIENCE AND CONSERVATION. Our many successes include the return of protections \nL33: to the Tongass National Forest in Alaska and the suspension of oil and gas \nL34: leasing permits for the Arctic National Wildlife Refuge. Defenders also played a\nL35:  lead role in successfully defending the Migratory Bird Protection Act from \nL36: attack and securing critical protections for migratory birds. Throughout 2021 \nL37: Defenders made important progress for the wildlife and wild places we all love, \nL38: yet our nation’s wildlife still face unprecedented challenges. More and more \nL39: species, including manatees, Mexican gray wolves, polar bears and North Atlantic\nL40:  right whales, face the very real threat of extinction because of climate \nL41: change, habitat loss, pollution and inadequate protections. In our work we \nL42: continue to demonstrate success by following the science, building partnerships,\nL43:  leading with innovative solutions and focusing on collaborative and practical \nL44: approaches in defense of nature. Together we can make a lasting difference for \nL45: wildlife and habitats. Your support is essential to our work and we couldn’t do \nL46: it without you. Thank you for being an important part of our efforts to protect \nL47: imperiled wildlife across our nation. Jamie Rappaport Clark President and CEO \nL48: John Dayton Board Chair OUR NEW TELEVISION SERIES, WILDLIFE NATION WITH JEFF \nL49: CORWIN, premiered in October and airs every weekend on ABC stations nationwide. \nL50: This groundbreaking series takes Jeff across the country to meet Defenders’ \nL51: experts along with our science, tribal and community partners to educate and \nL52: inspire viewers to join with Defenders to protect wildlife. While highlighting \nL53: some of our important conservation work and raising awareness of important ",
+            "tool_call_id": "fixture-tool-call-9-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Search within PDF for \"2.2 million members\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-11-0",
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "arguments": "{\"cursor\": 4, \"pattern\": \"2.2\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[5] Find results for text: `2.2` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=2.2)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L187】\nL1: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters \nL2: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL3:  includes all those who generously donated their time and expertise. The \nL4: steadfast support of our donors allows Defenders to sustain our program and ",
+            "tool_call_id": "fixture-tool-call-11-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open around L180.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-13-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 4, \"loc\": 170}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[6] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [170 - 215] of 261**\n\nL170: into the wild. **10 DEFENDERS OF WILDLIFE** ===== Page 11 ===== AFTER ANOTHER \nL171: SUCCESSFUL DEFENDERS LAWSUIT ON BEHALF OF THE CRITICALLY ENDANGERED RED WOLF, \nL172: FWS reversed its decision to limit the recovery area and committed to a robust \nL173: release strategy. After years of inaction, FWS released eight wolves to the wild\nL174:  in North Carolina and plan to release nine more wolves in the spring of 2022. \nL175: This is an incredible win for this critically endangered species whose \nL176: population has dwindled down to single digits in the wild because of \nL177: mismanagement, vehicle strikes and poaching. DEFENDERS CONTINUED TO LEAD EFFORTS\nL178:  TO PROTECT THE FLORIDA MANATEE, a beloved species that suffered the deadliest \nL179: year on record in 2021, tragically surpassing 1,000 deaths because of water \nL180: pollution and lack of warm water habitat. Defenders led advocacy and education \nL181: aimed at restoring the natural flow of the dammed Ocklawaha River, which would \nL182: provide critical warm-water habitat that manatees need to survive. Defenders’ \nL183: legal team continued to fight for manatees in the courts, holding government \nL184: agencies accountable for protecting critical habitat and addressing the \nL185: devastating water pollution that is killing the seagrass and causing manatees to\nL186:  starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In \nL187: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters \nL188: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL189:  includes all those who generously donated their time and expertise. The \nL190: steadfast support of our donors allows Defenders to sustain our program and \nL191: public education efforts in the field, the courts and on Capitol Hill. 2021 \nL192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split \nL193: interests $7,692 Income from investments, annuity reserve funds and trusts \nL194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 \nL198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF \nL199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, \nL200: trusts and split interests 15% Royalties and other income 7% Contributed \nL201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====\nL202:  DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl \nL203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*\nL204:  Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. \nL205: Oregon Dinah Bear, Esq. Arizona Kim O’Keefe Beck, Ph.D California Gabriela \nL206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia \nL207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. \nL208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* \nL209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New\nL210:  York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS \nL211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, \nL212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D\nL213:  Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State \nL214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D \nL215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae ",
+            "tool_call_id": "fixture-tool-call-13-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Scrolling further for \"Contributed services 12%\". Already seen. So the annual report is Defenders of Wildlife 2021 annual report.\n\nNext clue: \"The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%.\"\n\nThus, the following year (2022) annual report of Defenders of Wildlife includes mention of rescinding plan to open most of 22 million acres of NPR-A to oil and gas drilling. Also contributed services decreased to 10% (from 12% in 2021). So we need to find the 2022 annual report for Defenders of Wildlife.\n\nSearch for \"Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-15-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[7] Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska (Search_Results/Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska)\n**viewing lines [0 - 37] of 37**\n\nL0: \nL1: URL: Search_Results/Defenders of Wildlife 2022 annual report National Petroleum \nL2: Reserve Alaska\nL3: # Search Results\nL4: \nL5:   * 【0†CELEBRATING YEARS; publish_date: none†www.annualreports.com】 With less \nL6: than 340 right whales left in the wild, Defenders is fighting tirelessly to end \nL7: deadly lobster gear entanglements and vessel strikes that are driving ...\nL8:   * 【1†Financials; publish_date: none†defenders.org】 We invite you to explore \nL9: the reports below to learn more about our activities and accomplishments, and \nL10: how we put your money to work for wildlife.\nL11:   * 【2†Alaska Program Looks Back on 2022; publish_date: none†defenders.org】 Feb \nL12: 9, 2023 — Thanks to a lawsuit joined by Defenders, seven million acres were \nL13: returned to protection within the National Petroleum Reserve-Alaska (NPR-A), ...\nL14:   * 【3†Defenders-of-Wildlife-2022-Financial-Statement. ...; publish_date: \nL15: none†defenders.org】 We have audited the accompanying consolidated financial \nL16: statements of Defenders of Wildlife and Affiliated Defenders of Wildlife Action \nL17: Fund (collectively, ...\nL18:   * 【4†2022 Annual Report; publish_date: none†alaskaconservation.org】 Jun 13, \nL19: 2023 — In 2022, we focused on three landscapes: the Arctic. National Wildlife \nL20: Refuge, Bristol Bay, and the Tongass National Forest. In March 2022,.\nL21:   * 【5†Assessment of ecological and cultural values within the ...; \nL22: publish_date: none†www.blm.gov】 This document was written to provide technical \nL23: information regarding the ecological importance of the National Petroleum \nL24: Reserve – Alaska (NPR-A). Several ...\nL25:   * 【6†Accomplishments Report; publish_date: none†defenders.org】 National \nL26: Petroleum Reserve-Alaska. Identified for exceptional wildlife and cultural \nL27: values, including critical habitat for polar bears and other species ...\nL28:   * 【7†2022 annual report; publish_date: none†dory-\nL29: plantain-s2zc.squarespace.com】 These projects are made possible through \nL30: collaborations with Defenders of. Wildlife, Cook Inletkeeper, Trustees for \nL31: Alaska,. Environmental Investigation Agency, ...\nL32:   * 【8†23IMPACT REPORT; publish_date: none†www.annualreports.com】 Defenders of \nL33: Wildlife made incredible progress protecting wildlife and wild places in 2023, \nL34: helping shape a brighter future for imperiled species and vital ...\nL35:   * 【9†From Leasing to Land Protections; publish_date: none†defenders-cci.org】 \nL36: Abstract. When you hear the name, National Petroleum Reserve - Alaska (NPR-A, or\nL37:  Reserve), you might think of a barren land filled with oil wells and ...",
+            "tool_call_id": "fixture-tool-call-15-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open result 0.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-17-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 7, \"id\": 0}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[8] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [0 - 53] of 289**\n\nL0: \nL1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-\nL2: wildlife_2022.pdf\nL3: ===== Page 1 ===== 2022 Impact Report C E L E B R A T I N G Y E A R S ===== Page\nL4:  2 ===== 2022 Defenders of Wildlife made important progress for imperiled \nL5: species and vital landscapes across the United States in 2022. GRAY WOLF | JIM \nL6: GUMMERAL MAY STOCK PRIOR Lawyers successfully challenged the previous \nL7: administration’s decision to delist the gray wolf and restored critical federal \nL8: protections under the Endangered Species Act. This latest triumph in court is \nL9: part of our ongoing battle to protect and restore gray wolves throughout their \nL10: historical range and shield them from persecution by extremist legislators in \nL11: Idaho, Montana and Wyoming. TWO MORE FATALIZED GRAY SWALLETS TO SEA TO SHARE \nL12: ALLIANCE Lobbyists worked around the clock to expand funding for wildlife \nL13: conservation in the FY2022 federal spending bill, which included $31 million (a \nL14: 44% increase) for the Bureau of Land Management’s Threatened and Endangered \nL15: Species Program, $2.5 million (an 81% increase) for the U.S. Department of \nL16: Agriculture Wildlife Services’ Nonlethal Initiative to prevent human-wildlife \nL17: conflicts and $21 million (a 320% increase) for North Atlantic right whale \nL18: conservation. 2 DEFENDERS OF WILDLIFE ===== Page 3 ===== **Policy Experts** \nL19: played a crucial role in securing international trade protections for 100 \nL20: species of sharks and rays, all 158 species of glass frogs and 73 species of \nL21: reptiles, including 21 species of desert horned lizards, at the Convention on \nL22: International Trade in Endangered Species (CITES) in Panama. \\--- **Field \nL23: Teams** worked tirelessly to protect and restore imperiled species across the \nL24: country. From Florida manatees and red wolves in the Southeast to belugas and \nL25: grizzly bears in Alaska, Defenders’ conservation experts were on the ground \nL26: saving species that need our help to survive and thrive. \\--- **Conservation \nL27: Innovation Experts** published more than 10 peer-reviewed studies on topics that\nL28:  include the Cook Inlet beluga whale, golden-cheeked warbler, global parrot \nL29: biodiversity, the Endangered Species Act, the effects of mountaintop removal \nL30: mining on endangered species, the ecological importance of panthers and the \nL31: implementation of “30x30” – the globally recognized goal to which President \nL32: Biden committed the U.S. to conserve 30% of our imperiled lands and waters by \nL33: 2030. \\--- **DEFENDERS.ORG** ===== Page 4 ===== THANK YOU Defenders celebrated \nL34: our 75th anniversary in 2022—an exciting milestone that we attribute to our \nL35: unwavering dedication to our wildlife conservation mission. From helping to pass\nL36:  the Endangered Species Act and other bedrock conservation laws to leading the \nL37: advocacy on the reintroduction of wolves to Yellowstone National Park and other \nL38: parts of the West, Defenders is proud to inspire, lead and ultimately celebrate \nL39: some of the most consequential conservation wins for wildlife and wild places. \nL40: It is wonderfully fitting that we also mark 2022 as a year when we successfully \nL41: defended, yet again, the original focal species and symbol of our organization, \nL42: the gray wolf. While the future for gray wolves and many other species remains \nL43: uncertain, we will never stop fighting to protect the imperiled wildlife that \nL44: need us and to restore vibrant and balanced ecosystems. Our mission is more \nL45: important than ever due to the escalating biodiversity and climate crises. \nL46: Defenders is determined to build on our accomplishments and seize new \nL47: opportunities to address these challenges through our multifaceted approach to \nL48: conservation. We focus on developing innovative and science-based solutions, \nL49: coordinating on-the-ground species and habitat restoration, promoting human-\nL50: wildlife coexistence, protecting and strengthening our bedrock environmental \nL51: laws, investing in impactful collaborations and partnerships, building a more \nL52: diverse and powerful conservation movement and challenging attacks on wildlife \nL53: through advocacy, policy and litigation. Defenders’ continued success is made ",
+            "tool_call_id": "fixture-tool-call-17-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Search within PDF for \"National Petroleum Reserve\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-19-0",
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "arguments": "{\"cursor\": 8, \"pattern\": \"Petroleum\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[9] Find results for text: `petroleum` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=petroleum)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L201】\nL1: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas \nL2: drilling and removed protections for important wildlife habitat. Protections \nL3: have now been restored for nearly half of the reserve’s pristine lands, which \nL4: are vital habitat for shorebirds, denning polar bears and tens of thousands of ",
+            "tool_call_id": "fixture-tool-call-19-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open around L190.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-21-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 8, \"loc\": 180}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[10] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [180 - 227] of 289**\n\nL180: the sixth successful transfer of bison to the Assiniboine and Sioux Tribes of \nL181: Fort Peck since 2019. \\--- **SWIFT FIX KITS | © ROSIMA PAELARINTSKIMMA MADDIAL \nL182: 200 AND CONSERVATION BIOLOGY INSTITUTE** \\--- **Celebrating our third year** of \nL183: a collaborative program with the Aaniih and Nakoda Tribes and others to restore \nL184: swift foxes to the Fort Belknap Indian Reservation in Montana, Defenders helped \nL185: with the release of 28 more swift foxes. With over 100 foxes reintroduced \nL186: through this program, monitoring efforts show that they are reproducing in the \nL187: wild—a critical measure of success for a self-sustaining population. \\--- \nL188: **Defenders continued to lead the way** for conserving and recovering the \nL189: endangered black-footed ferret, supporting the black-footed ferret survey for \nL190: the Fort Belknap Indian community. Thirty-six ferrets were vaccinated against \nL191: sylvatic plague and two dozen kits were released in the wild. \\--- **10 \nL192: DEFENDERS OF WILDLIFE** ===== Page 11 ===== Defenders helped to bring hope for \nL193: recovery for the endangered military macaw, adding 11 fledglings to a growing \nL194: wild population in Puerta Vallarta, Mexico, that is under pressure from habitat \nL195: loss and poachers for the illegal pet trade. Accord- ing to our recent report, \nL196: the 2008 parrot trade ban that Defenders fought to achieve is working. \nL197: Preventing more than 30,000 parrots from being illegally trapped each year, the \nL198: trade ban has resulted in a 47% decrease in the illegal trade of parrots and an \nL199: 88% decrease in U.S. seizures of Mexican parrots. As a result of a Defenders \nL200: lawsuit, BLM rescinded the previous administration’s plan that opened most of \nL201: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas \nL202: drilling and removed protections for important wildlife habitat. Protections \nL203: have now been restored for nearly half of the reserve’s pristine lands, which \nL204: are vital habitat for shorebirds, denning polar bears and tens of thousands of \nL205: caribou in the Teshekpuk caribou herd. Our new storymap illustrates why the \nL206: reserve is so important to wildlife. Defenders marked the 20th anniversary of \nL207: our Sea Otter Awareness Week. In celebration of this milestone, we sponsored \nL208: state legislation, signed by Gov. Gavin Newsom, that formally recognizes the \nL209: event’s anniversary and acknowledges the vital role that sea otters play in \nL210: California’s nearshore ecosystems. This annual celebration provides a unique \nL211: opportunity to raise awareness of the important role sea otters play in \nL212: maintaining ecosystem health, our need to coexist with sea otters and the \nL213: efforts of Defenders and others in advancing sea otter conservation. \nL214: DEFENDERS.ORG ===== Page 12 ===== FINANCIAL REPORT In 2022, Defenders of \nL215: Wildlife’s nearly 2.2 million members and supporters contributed more than $41 \nL216: million for wildlife and wildlife habitat. This figure includes all those who \nL217: generously donated their time and expertise. The steadfast support of our donors\nL218:  allows Defenders to sustain our program and public education efforts in the \nL219: field, the courts and on Capitol Hill. 2022 SOURCES OF FUNDS Grants and \nL220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from \nL221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other \nL222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL225:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL226:  Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%\nL227:  Contributed services 10% Royalties and other income 2% Bequests, trusts and ",
+            "tool_call_id": "fixture-tool-call-21-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Scrolling a bit more for \"Contributed services 10%\". Already seen. So indeed the 2022 annual report shows contributed services 10%, down from 12% in 2021.\n\nThus the organization is Defenders of Wildlife.\n\nThe question: \"Who was the emeritus board member that year? Name only, no titles.\"\n\nWe need to identify the emeritus board member listed in the 2022 annual report. From the 2021 annual report, we saw \"BOARD EMERITUS Terry C. Pelster, Esq.\" However, that is for 2021. The question asks \"Who was the emeritus board member that year?\" That year refers to the following year (2022). So we need to check the 2022 annual report for the emeritus board member.\n\nOpen the 2022 PDF near the board list. Search for \"Emeritus\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-23-0",
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "arguments": "{\"cursor\": 8, \"pattern\": \"Emeritus\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[11] Find results for text: `emeritus` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=emeritus)\n**viewing lines [0 - 10] of 10**\n\nL0: # 【0†match at L237】\nL1: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New \nL2: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela \nL3: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona\nL4:  State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad\nL5: \nL6: # 【1†match at L243】\nL7: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David \nL8: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic \nL9: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of \nL10: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice ",
+            "tool_call_id": "fixture-tool-call-23-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open around L230.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-25-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 8, \"loc\": 220}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[12] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [220 - 267] of 289**\n\nL220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from \nL221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other \nL222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL225:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL226:  Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%\nL227:  Contributed services 10% Royalties and other income 2% Bequests, trusts and \nL228: split interests 29% TOTAL REVENUE Income from investments -15% Program and \nL229: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== \nL230: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair\nL231:  Texas Sheryl Sachs* Vice Chair Connecticut Dinah Bear, Esq.* Secretary Arizona \nL232: Mark Caylor* Treasurer District of Columbia Susan Alexander Connecticut Sylvia \nL233: Baca* California Whitefield Bailey Tennessee Donald Barry, Esq. Oregon Kim \nL234: O’Keefe Beck, Ph.D California Gabriela Chavarria, Ph.D Washington Jamie \nL235: Rappaport Clark* President & CEO Virginia John Donovan, Esq.* Montana and \nL236: California Caroline D. Gabel Maryland Terry L. Root, Ph.D New Mexico Thomas D. \nL237: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New \nL238: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela \nL239: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona\nL240:  State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad\nL241:  Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State University Terry \nL242: L. Root, Ph.D Stanford University Mark Schwartz, Ph.D University of California-\nL243: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David \nL244: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic \nL245: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of \nL246: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice \nL247: President, Conservation Law Matt Braughler Vice President, Philanthropy Rachel \nL248: Brittin Vice President, Communications Shawn Cantrell Vice President, Field \nL249: Conservation Programs Robert L. Dewey Vice President, Government Relations & \nL250: External Affairs Monica Goldberg Vice President, Landscape Conservation Sharon \nL251: Morris-Smith Vice President of Human Resources Elizabeth Nichols Vice President,\nL252:  Finance/CFO Anthony Pegues Vice President, Diversity, Equity, Inclusion, \nL253: Justice Lindsay Rosa, Ph.D Vice President, Conservation Research and Innovation \nL254: Brandace Swimley Vice President, Integrated Marketing 14 DEFENDERS OF WILDLIFE \nL255: ===== Page 15 ===== There are many ways to support Defenders of Wildlife and \nL256: protect our imperiled wildlife. To learn more or donate, contact us at \nL257: 1-800-385-9712, visit Defenders.org/donate or choose one of the following \nL258: options. ➤ Transfer appreciated assets directly to Defenders and maximize \nL259: potential tax benefits. Defenders.org/ donate-stocks-help-save-wildlife ➤ Become\nL260:  a Leadership Council member with a single donation of $1,000 or more and enjoy \nL261: more personal involvement with Defenders. Defenders.org/leadershipcouncil ➤ If \nL262: you are 70.5 or older, roll over a portion of your IRA to benefit wildlife. \nL263: Remember, these gifts must come directly from your IRA account. \nL264: Defenders.org/planned-giving/gifts-your-retirement-plan ➤ Get started on a \nL265: contribution from your donor-advised fund Defenders.org/donor-advised-funds ➤ \nL266: Check with your personnel office or that of your spouse/partner about your \nL267: company’s Corporate Matching Gifts program or visit us at ",
+            "tool_call_id": "fixture-tool-call-25-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Thus the emeritus board member is Terry C. Pelster. However, the question might ask for \"Who was the emeritus board member that year?\" Possibly referring to the year of the annual report that mentioned the rescinded plan and contributed services decrease. That is 2022. So the answer is Terry C. Pelster.\n\nBut let's verify that the 2022 annual report indeed includes the mention of contributed services decreasing to 10%. We saw that. So answer is Terry C. Pelster.\n\nBut maybe the question is tricky: \"Who was the emeritus board member that year?\" Could be the emeritus board member listed in the 2021 annual report? However, the question says \"That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%.\"\n\nThus the \"following year\" annual report is 2022. The question likely asks: \"Who was the emeritus board member that year?\" Meaning the emeritus board member listed in that annual report (2022). So answer: Terry C. Pelster.\n\nBut let's confirm that the 2021 annual report also shows emeritus board member Terry C. Pelster. Indeed we saw that. But perhaps the 2022 report might have a different emeritus board member. We saw it's the same. So answer is Terry C. Pelster.\n\nHowever, the question might be about a different organization. Let's verify the details.\n\nFirst clue: Press release about Kenya vaccine doses on Dec 5, 2021. That is likely a World Bank press release. Search for \"World Bank Kenya vaccine doses December 2021 press release\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-27-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[13] As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines (Search_Results/As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/As of December 5, 2021, Kenya had received a total of \nL2: 16,201,670 vaccines\nL3: # Search Results\nL4: \nL5:   * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: \nL6: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had \nL7: received a total of 16,201,670 vaccines, with 7,583,134 administered. While \nL8: vaccine acceptance is ...\nL9:   * 【1†Unmet need for COVID-19 vaccination coverage in Kenya - PMC; \nL10: publish_date: none†pmc.ncbi.nlm.nih.gov】 by SK Muchiri · 2022 · Cited by 42 — As\nL11:  of December 2021, six counties had a vaccination coverage of less than 5%. \nL12: These counties include Garissa, Mandera, Marsabit, Tana River, Turkana, and ...\nL13:   * 【2†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec \nL14: 1, 2021 — • Total Covid-19 Vaccines Received to date- 16,201,670 ... Table 10: \nL15: Vaccine Logistics Received in the Country as at 5th, December 2021.\nL16:   * 【3†COVID-19 vaccination refusal trends in Kenya over 2021 - PMC; \nL17: publish_date: none†pmc.ncbi.nlm.nih.gov】 by RT Rego · 2023 · Cited by 21 — We \nL18: assessed vaccine refusal over time in Kenya, and characterized factors \nL19: associated with changes in vaccination refusal.\nL20:   * 【4†Ciheb-Kenya on the Front Lines of the COVID-19 Vaccine ...; publish_date:\nL21:  none†www.ciheb.org】 Of the 721,509 persons vaccinated, 409,711 are aged 58 \nL22: years and above, 143,050 are healthcare workers, 109,593 are teachers, and \nL23: 59,155 are security officers.\nL24:   * 【5†proportion of fully vaccinated adults 23.3pc.; publish_date: \nL25: none†www.facebook.com】 COVID-19: KENYA administers 150632 new vaccinations; \nL26: total jabs given now 13782963; proportion of fully vaccinated adults 23.3pc.\nL27:   * 【6†National COVID-19 Vaccines Deployment and ...; publish_date: \nL28: none†www.parliament.go.ke】 Kenya has a well-established immunization program and\nL29:  a robust disease surveillance and response unit (DSRU) which serves as an early\nL30:  warning system, to.\nL31:   * 【7†Kenya: Vaccine Requirements Violate Rights; publish_date: \nL32: none†www.hrw.org】 Dec 13, 2021 — Kenya, with an estimated adult population of \nL33: 27.2 million and a total population of 55 million, has received approximately 23\nL34:  million doses as ...\nL35:   * 【8†Government gives 20 million a month to get vaccinated; publish_date: \nL36: none†www.bbc.com】 Nov 22, 2021 — Although less than 10% of Kenyans are \nL37: vaccinated, the government wants to avoid a surge over Christmas.\nL38:   * 【9†ministry of health kenya covid-19 immunization status ...; publish_date: \nL39: none†www.health.go.ke】 May 15, 2022 — By the end of 2021 Kenya had administered \nL40: 10.1 million doses of 5 types of Covid-19 vaccines. During this period 4.2 \nL41: million people were ...",
+            "tool_call_id": "fixture-tool-call-27-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open result 0.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-29-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 13, \"id\": 0}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[14] www.worldbank.org (https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-rebound-in-the-services-sect)\n**viewing lines [0 - 52] of 61**\n\nL0: \nL1: URL: https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-\nL2: is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-\nL3: rebound-in-the-services-sect\nL4: NAIROBI, December 14, 2021 — Kenya’s economy has demonstrated resilience to the \nL5: COVID-19 shock, with output in the first half of the year rising above pre-\nL6: pandemic levels. In 2021 as a whole, gross domestic product (GDP) is expected to\nL7:  grow by 5%, one of the faster recoveries among Sub-Saharan African countries. \nL8: Overall economic performance is expected to be robust at 4.9% per year in \nL9: 2022-23, similar to the pre-pandemic pace (5% average annual growth from 2010 to\nL10:  2019). According to the 24th edition of the Kenya Economic Update, “From \nL11: Recovery to Better Jobs,” growth has been supported by rebounds in industry and,\nL12:  especially, services. Agricultural output, however, fell by 0.5% year on year \nL13: in the first half of 2021 following a particularly strong performance in 2020, \nL14: partly due to below-average rains. Demand-side recovery has been supported by a \nL15: revival in private consumption, against a backdrop of improving employment \nL16: conditions and household incomes. “Kenya’s economy has shown considerable \nL17: resilience to the enormous shock of the pandemic, and this year is expected to \nL18: post one of the stronger growth rebounds in the region thanks to diversified \nL19: sources of growth and sound economic policies and management,” said Keith \nL20: Hansen, World Bank Country Director for Kenya. “However, poverty has increased, \nL21: and the buffers and coping mechanisms of households, firms, and the public \nL22: finances have been depleted.” Economic activity in Kenya has continued to adapt \nL23: to the pandemic and associated restrictions. A mix of containment measures, such\nL24:  as a nightly curfew, were in effect through most of 2021, while more \nL25: economically disruptive measures such as lockdowns and travel restrictions were \nL26: phased, limiting the impact on economic activities. The vaccine rollout, which \nL27: had a slow start due to supply constraints, has picked up as new shipments of \nL28: vaccines have arrived, particularly since September. This has supported economic\nL29:  recovery and growth through the third quarter of 2021. As of December 5, 2021, \nL30: Kenya had received a total of 16,201,670 vaccines, with 7,583,134 administered. \nL31: While vaccine acceptance is reportedly high there is still a long way to go \nL32: towards the government’s target of fully inoculating the adult population of \nL33: about 30 million by the end of 2022.As of December 6, 2021, about 10% of adults \nL34: (2.9 million people) had been fully vaccinated while another 16% (4.9 million \nL35: people) had received their first dose. The report also projects robust growth in\nL36:  the medium-term. This outlook takes into account that some sub-sectors, such as\nL37:  education, have bounced back strongly, but others such as international \nL38: tourism, have only partially and face a much more protracted recovery. The \nL39: anticipated further recovery of hotels and restaurants, trade, transport, and \nL40: other services, depends on substantial vaccination progress to help prevent new \nL41: waves of infections and associated containment measures. “Recent economic \nL42: performance has been strong and the outlook is positive, but in Kenya, as \nL43: everywhere, the future course of the pandemic remains a major source of \nL44: uncertainty, said Alex Sienaert, Senior Economist for Kenya. “To support further\nL45:  recovery, reduce debt distress risks, and rebuild space for social and \nL46: development spending, it is critical that Kenya continues to implement medium-\nL47: term fiscal consolidation plans.” In addition to pandemic-related risks to the \nL48: outlook, a second key domestic risk factor stems from the drought conditions \nL49: which are affecting parts of the country and already causing severe hardship. \nL50: Should the drought intensify or spread, this would weigh on the near-term \nL51: economic outlook. Weaker global growth, higher-than-anticipated energy prices, \nL52: and tighter external financing conditions are the primary external risks. As ",
+            "tool_call_id": "fixture-tool-call-29-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Thus first clue confirmed.\n\nSecond clue: Defenders of Wildlife 2021 annual report. Confirmed.\n\nThird clue: Defenders of Wildlife 2022 annual report. Confirmed.\n\nThus the answer is Terry C. Pelster.\n\nBut let's verify that the 2022 annual report indeed shows \"Contributed services 10%\". We saw that. Also confirm that 2021 report shows 12%. We saw that.\n\nThus the question: \"Who was the emeritus board member that year?\" Refers to 2022. Answer: Terry C. Pelster.\n\nHowever, the question might be asking for the emeritus board member in the 2021 report? Let's read the question carefully:\n\n\"In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.\"\n\nThe phrase \"that year\" could refer to \"the following year\" i.e., the year of the annual report that mentioned the rescinded plan. Or could refer to \"that same year\" (the year of the press release and the first annual report). However, the phrase \"that year\" appears at the end: \"Who was the emeritus board member that year?\" It likely refers to the year of the annual report that is being discussed in the preceding sentence. The preceding sentence talks about the following year's annual report. So indeed \"that year\" is the following year (2022).\n\nThus answer: Terry C. Pelster.\n\nBut to be thorough, we should check the 2022 annual report for any other emeritus board members. The search only showed \"BOARD EMERITUS Terry C. Pelster, Esq.\" Maybe there are multiple emeritus board members. But likely only one.\n\nCheck the 2021 annual report for emeritus board member. It also shows Terry C. Pelster. So maybe the question is asking for the emeritus board member in the 2021 report. But why would they ask that? The question is about connecting the clues to identify the organization and then find the emeritus board member. The clues point to Defenders of Wildlife. The question likely asks for the emeritus board member as of the 2022 report.\n\nNevertheless, we should confirm that the 2022 report is indeed the one that mentions the rescinded plan. We saw that. Also confirm that contributed services decreased from 12% to 10%. We saw that.\n\nThus answer is Terry C. Pelster.\n\nBut let's search for \"Defenders of Wildlife emeritus board member 2022 Terry Pelster\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-31-0",
+                    "type": "function",
+                    "function": {
+                        "name": "search",
+                        "arguments": "{\"query\": \"Terry C. Pelster Defenders of Wildlife emeritus board member\", \"topn\": 10, \"source\": \"web\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[15] Terry C. Pelster Defenders of Wildlife emeritus board member (Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member)\n**viewing lines [0 - 31] of 31**\n\nL0: \nL1: URL: Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member\nL2: # Search Results\nL3: \nL4:   * 【0†2024 impact report; publish_date: none†defenders.org】 Terry Root, Ph.D. \nL5: New Mexico. Thomas D. Sisk, Ph.D. British Columbia, Canada. BOARD EMERITUS. \nL6: Terry C. Pelster, Esq. New York. SCIENCE ADVISORS. Leah Gerber, Ph ...\nL7:   * 【1†Defenders of Wildlife; publish_date: none†www.wrongkindofgreen.org】 \nL8: Directors ; Victor M. Sher*, Chair – California ; Terry C. Pelster*, Vice Chair \nL9: – New York ; Richard Kopcho*, Treasurer – California ; Adelaide P. Gomer*, \nL10: Secretary ...\nL11:   * 【2†Exhibit 12; publish_date: none†www.uschamber.com】 DECLARATION OF TERRY \nL12: PELSTER. I, Terry C. Pelster, declare as follows: 1. I am a current member of \nL13: Defenders of Wildlife (“Defenders”) and have been a member.\nL14:   * 【3†2020 ANNUAL REPORT; publish_date: none†defenders.org】 Terry L. Root. \nL15: Florida. BOARD. EMERITUS. Terry C. Pelster. New York. Alan Steinberg. Florida. \nL16: SCIENCE. ADVISORS. Gabriela Chavarria,. Ph.D. Denver Museum of.\nL17:   * 【4†2019 Annual Report; publish_date: none†defenders.org】 BOARD EMERITUS. \nL18: Terry C. Pelster. New York. Alan Steinberg. Florida. SCIENCE ADVISORS. Leah \nL19: Gerber, Ph.D. Arizona State University. Kathayoon Khalil, Ph.D.\nL20:   * 【5†annual report; publish_date: none†defenders.org】 * Executive Committee \nL21: member. BoARD EMERITuS. Terry C. Pelster. New York. Alan Steinberg. Florida. \nL22: PRESIDENT. EMERITuS. Rodger Schlickeisen. Virginia. SCIENCE ...\nL23:   * 【6†BEING THERE FOR BELUGAS; publish_date: none†wendeenicole.com】 Mar 2, 2023\nL24:  — Defenders of Wildlife is a national, nonprofit member- ship ... Terry C. \nL25: Pelster, Esq. New York. *Executive Committee member outfront.\nL26:   * 【7†Annual Reports; publish_date: none†www.annualreports.com】 WAYS TO GIVE \nL27: There are many ways to support Defenders of Wildlife and protect our imperiled \nL28: wildlife. ... Terry C. Pelster. New York. Alan Steinberg. Florida.\nL29:   * 【8†2014 Annual Report; publish_date: none†defenders.org】 BOARD OF DIRECTORS \nL30: defenders of wildlife is a national, nonprofit ... BOARD EMERITUS. Terry C. \nL31: Pelster. New York. Alan Steinberg. Florida. Page 3. PO.",
+            "tool_call_id": "fixture-tool-call-31-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Thus Terry C. Pelster is indeed the emeritus board member.\n\nNow the question might want just \"Terry C. Pelster\". Or maybe \"Terry Pelster\". Or \"Terry C. Pelster, Esq.\" but instructions say \"Name only, no titles.\" So likely \"Terry C. Pelster\". However, maybe they want \"Terry Pelster\". Better to include full name as listed: \"Terry C. Pelster\".\n\nLet's read the question again: \"Who was the emeritus board member that year? Name only, no titles.\" So we should answer just the name, no \"Esq.\" or \"Board Emeritus\". So \"Terry C. Pelster\". Or maybe \"Terry Pelster\". But the document shows \"Terry C. Pelster, Esq.\" So the name is \"Terry C. Pelster\". \n\nBetter to verify that the 2022 annual report indeed lists \"BOARD EMERITUS Terry C. Pelster, Esq.\" We saw that line.\n\nThus answer: Terry C. Pelster.\n\nBut let's double-check that the question might refer to the 2021 annual report. The phrase \"that year\" could be ambiguous. Let's parse the timeline:\n\n- Year 1: 2021. Press release in Dec 2021 about Kenya vaccine doses. Same year (2021) annual report of Defenders of Wildlife shows 12% contributed services.\n\n- Year 2: 2022. Annual report mentions rescinded plan and contributed services decreased to 10%.\n\nThe question says \"The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan ... and that contributed services had decreased by 2% to 10%.\" This is year 2.\n\nThen asks \"Who was the emeritus board member that year?\" That likely refers to year 2.\n\nThus answer: Terry C. Pelster.\n\nHowever, perhaps the emeritus board member changed between 2021 and 2022. We need to check 2022 annual report for any other emeritus board member. We saw only Terry C. Pelster. Also check 2021 annual report: same. So likely unchanged.\n\nThus answer is Terry C. Pelster.\n\nNevertheless, we should confirm that the 2022 annual report is indeed the one that includes the phrase \"BLM rescinded the previous administration’s plan that opened most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling\". We saw that in line 200-202.\n\nThus answer is Terry C. Pelster.\n\nNow we need to provide the answer in the final message. Only the name.\n\nThus final answer: Terry C. Pelster.\n\nBut let's be thorough and search for \"Defenders of Wildlife 2022 annual report contributed services 10%\".",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-33-0",
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "arguments": "{\"cursor\": 8, \"pattern\": \"Contributed services\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[16] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=contributed services)\n**viewing lines [0 - 10] of 10**\n\nL0: # 【0†match at L222】\nL1: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL2: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL3: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL4:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL5: \nL6: # 【1†match at L227】\nL7:  Contributed services 10% Royalties and other income 2% Bequests, trusts and \nL8: split interests 29% TOTAL REVENUE Income from investments -15% Program and \nL9: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== \nL10: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair",
+            "tool_call_id": "fixture-tool-call-33-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Thus confirmed.\n\nNow we should verify that the 2021 annual report shows contributed services 12%.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-35-0",
+                    "type": "function",
+                    "function": {
+                        "name": "find",
+                        "arguments": "{\"cursor\": 4, \"pattern\": \"Contributed services\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[17] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=contributed services)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L194】\nL1: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL2: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL3: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL4: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 ",
+            "tool_call_id": "fixture-tool-call-35-0"
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Open around L190-200 to see percentage.",
+            "tool_calls": [
+                {
+                    "id": "fixture-tool-call-37-0",
+                    "type": "function",
+                    "function": {
+                        "name": "open",
+                        "arguments": "{\"cursor\": 4, \"loc\": 185}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "content": "[18] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [185 - 232] of 261**\n\nL185: devastating water pollution that is killing the seagrass and causing manatees to\nL186:  starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In \nL187: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters \nL188: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL189:  includes all those who generously donated their time and expertise. The \nL190: steadfast support of our donors allows Defenders to sustain our program and \nL191: public education efforts in the field, the courts and on Capitol Hill. 2021 \nL192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split \nL193: interests $7,692 Income from investments, annuity reserve funds and trusts \nL194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 \nL198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF \nL199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, \nL200: trusts and split interests 15% Royalties and other income 7% Contributed \nL201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====\nL202:  DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl \nL203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*\nL204:  Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. \nL205: Oregon Dinah Bear, Esq. Arizona Kim O’Keefe Beck, Ph.D California Gabriela \nL206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia \nL207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. \nL208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* \nL209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New\nL210:  York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS \nL211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, \nL212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D\nL213:  Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State \nL214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D \nL215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae \nL216: Wynn-Grant, Ph.D National Geographic Society SENIOR STAFF Jamie Rappaport Clark \nL217: President & CEO Thu Pham Chief of Staff Renee Stone Senior Vice President, \nL218: Conservation Programs & General Counsel James Stefan Senior Vice President, \nL219: Operations Matt Braughler Vice President, Philanthropy Rachel Brittin Vice \nL220: President, Communications Shawn Cantrell Vice President, Field Conservation \nL221: Programs Robert L. Dewey Vice President, Government Relations & External Affairs\nL222:  Monica Goldberg Vice President, Landscape Conservation Sharon Morris-Smith Vice\nL223:  President of Human Resources Elizabeth Nichols Vice President, Finance/CFO \nL224: Anthony Pegues Vice President, Diversity, Equity, Inclusion, Justice Lindsay \nL225: Rosa, Ph.D Director, Center for Conservation Innovation Michael P. Senatore Vice\nL226:  President, Conservation Law Brandace Swimley Vice President, Integrated \nL227: Marketing 14 DEFENDERS OF WILDLIFE ===== Page 15 ===== THERE ARE MANY WAYS TO \nL228: SUPPORT DEFENDERS OF WILDLIFE and protect our imperiled wildlife. To learn more \nL229: or donate, contact us at 1-800-385-9712, visit defenders.org/donate or choose \nL230: one of the following options. ➤ Transfer appreciated assets directly to \nL231: Defenders and maximize potential tax benefits. Defenders.org/donate-stocks-help-\nL232: save-wildlife ➤ Become a Leadership Council member with a single donation of ",
+            "tool_call_id": "fixture-tool-call-37-0"
+        },
+        {
+            "role": "assistant",
+            "content": "Terry C. Pelster",
+            "reasoning_content": "Thus everything matches.\n\nThus answer: Terry C. Pelster.\n\nNow we need to output only the name.\n\nThus final answer: Terry C. Pelster.\n\nBut perhaps the question expects \"Terry Pelster\". However, the document includes middle initial \"C.\" It might be better to include the full name as listed: \"Terry C. Pelster\". The instruction \"Name only, no titles\" suggests we should not include \"Esq.\" or \"Board Emeritus\". But including middle initial is likely okay.\n\nTo be safe, we could output \"Terry C. Pelster\". Let's see similar puzzles: often they expect the full name as appears in the document.\n\nThus answer: Terry C. Pelster.\n\nNow we will provide final answer.",
+            "tool_calls": []
+        }
+    ]
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_w_date.txt b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_w_date.txt
new file mode 100644
index 000000000000..6349a4bb2083
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_w_date.txt
@@ -0,0 +1,2455 @@
+<｜begin▁of▁sentence｜><｜User｜>
+
+## Tools
+
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<｜DSML｜function_calls>" block like the following as part of your reply to the user:
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="$FUNCTION_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$FUNCTION_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜function_calls>
+
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+
+<｜DSML｜function_calls>
+...
+</｜DSML｜function_calls>
+
+<function_results>
+...
+</function_results>
+
+<think>...thinking about results</think>
+
+Here are the functions available in JSONSchema format:
+<functions>
+{"name": "search", "description": "Searches for information related to query and displays topn results.", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "The search query string"}, "topn": {"type": "integer", "description": "Number of top results to display", "default": 10}, "source": {"type": "string", "description": "Source to search within", "enum": ["web", "news"], "default": "web"}}, "required": ["query"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "open", "description": "Opens the link id from the page indicated by cursor starting at line number loc, showing num_lines lines. Valid link ids are displayed with the formatting: 【{id}†.*】. If cursor is not provided, the most recent page is implied. If id is a string, it is treated as a fully qualified URL associated with source. If loc is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available. Use this function without id to scroll to a new location of an opened page.", "parameters": {"type": "object", "properties": {"id": {"description": "Link ID (number) or fully qualified URL (string)", "anyOf": [{"type": "integer"}, {"type": "string"}], "default": -1}, "cursor": {"type": "integer", "description": "Page cursor indicator", "default": -1}, "loc": {"type": "integer", "description": "Starting line number", "default": -1}, "num_lines": {"type": "integer", "description": "Number of lines to show", "default": -1}, "view_source": {"type": "boolean", "description": "Whether to view source", "default": false}, "source": {"type": "string", "description": "Source associated with the URL", "enum": ["web", "news"], "default": "web"}}, "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "find", "description": "Finds exact matches of pattern in the current page, or the page given by cursor.", "parameters": {"type": "object", "properties": {"pattern": {"type": "string", "description": "The exact pattern to search for"}, "cursor": {"type": "integer", "description": "Page cursor to search in", "default": -1}}, "required": ["pattern"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+</functions>
+
+
+# The user's message is: 帮我调研一下，目前有哪些针对search agent的benchmark？详细介绍各自的特点、使用场景、例题。
+
+<system-reminder>
+## Today’s Date
+2025-11-27, Thursday.
+</system-reminder><｜Assistant｜><think>用户想调研目前针对搜索智能体（search agent）的基准测试（benchmark），要求详细介绍各自的特点、使用场景和例题。我需要提供一份全面的概述，涵盖现有的基准测试，例如WebGPT、WebShop、ALFWorld、HotpotQA、FEVER、TriviaQA、Natural Questions、SearchQA、MS MARCO、BEIR、LoTTE、MIRACL、Zero-shot IR基准等。同时，需要区分这些基准测试是针对检索任务、问答任务，还是具体针对搜索智能体的评估。此外，还应考虑那些专门为评估搜索智能体（即能够与环境交互、执行搜索、浏览网页、使用工具的智能体）而设计的基准测试，例如WebGPT、WebShop、ALFWorld、Mind2Web、WebArena、VisualWebArena、SearchBench、WebVLN、WebNav等。我需要详细介绍每个基准测试的特点、使用场景，并举例说明。为了确保信息的全面性和时效性，我将同时进行两项搜索：一项是针对“search agent benchmark”的通用搜索，以获取广泛的基准测试信息；另一项是针对“搜索智能体 基准测试”的中文搜索，以获取更符合用户语言习惯和可能更具体的中文资料。</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">search agent benchmark 2024</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">搜索智能体 基准测试</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[0] search agent benchmark 2024 (Search_Results/search agent benchmark 2024)
+**viewing lines [0 - 39] of 39**
+
+L0: 
+L1: URL: Search_Results/search agent benchmark 2024
+L2: # Search Results
+L3: 
+L4:   * 【0†HSCodeComp: A Realistic and Expert-level Benchmark for ...; publish_date:
+L5:  none†arxiv.org】 Oct 22, 2025 — To fill this gap, we introduce HSCodeComp, the 
+L6: first realistic, expert-level e-commerce benchmark designed to evaluate deep 
+L7: search agents in ...
+L8:   * 【1†open-compass/GTA - A Benchmark for General Tool Agents; publish_date: 
+L9: none†github.com】 GTA is a benchmark to evaluate the tool-use capability of LLM-
+L10: based agents in real-world scenarios. It features three main aspects.
+L11:   * 【2†Benchmarking real-time trust scoring across five AI Agent ...; 
+L12: publish_date: none†cleanlab.ai】 Aug 20, 2025 — This article evaluates 5 AI Agent
+L13:  architectures over the BOLAA (ICLR 2024) benchmark, and assesses the effects of
+L14:  adding automated trust ...
+L15:   * 【3†10 AI agent benchmarks; publish_date: none†www.evidentlyai.com】 Jul 11, 
+L16: 2025 — We put together 10 AI agent benchmarks designed to assess how well 
+L17: different LLMs perform as agents in real-world scenarios, ...
+L18:   * 【4†A state-of-the-art search API purpose-built for agents; publish_date: 
+L19: none†parallel.ai】 Jul 31, 2025 — To evaluate real-world performance of the 
+L20: Parallel Search MCP Server, we created the WISER-Search benchmark which blends 
+L21: WISER-Fresh (queries ...
+L22:   * 【5†AI Agent Benchmarks are Broken; publish_date: none†medium.com】 We break 
+L23: down the failure modes in current AI agent benchmarks and introduce a checklist 
+L24: that minimizes the gamability of AI agent benchmarks.
+L25:   * 【6†Benchmarks and Tree Search for Multimodal LLM Web Agents; publish_date: 
+L26: none†dpfried.github.io】 2024, When is Tree Search Useful? ○ Dealing with 
+L27: destructive actions. ○ Some things on the web are very difficult to undo, e.g., 
+L28: ordering an item. 56.
+L29:   * 【7†-Bench: Benchmarking AI agents for the real-world; publish_date: 
+L30: none†sierra.ai】 Jun 20, 2024 — τ-bench measures an agent's ability to interact 
+L31: with (simulated) human users and programmatic APIs while following domain-
+L32: specific policies in a consistent ...
+L33:   * 【8†Browser Use = state of the art Web Agent; publish_date: none†browser-
+L34: use.com】 Dec 15, 2024 — Browser Use has achieved state-of-the-art performance on
+L35:  the WebVoyager benchmark, with an impressive 89.1% success rate across 586 
+L36: diverse web tasks.
+L37:   * 【9†FutureSearch Benchmarks; publish_date: none†evals.futuresearch.ai】 Find 
+L38: the original source of a given claim. Example: From , more than 8 out of 1000 
+L39: users clicked on a phishing link monthly in 2024, up 190% vs 2023.</result>
+<result>[1] 搜索智能体 基准测试 (Search_Results/搜索智能体 基准测试)
+**viewing lines [0 - 33] of 33**
+
+L0: 
+L1: URL: Search_Results/搜索智能体 基准测试
+L2: # Search Results
+L3: 
+L4:   * 【0†WideSearch：揭示AI 智能体缺失的「广度」能力; publish_date: none†zhuanlan.zhihu.com】 Aug 
+L5: 16, 2025 — 为系统评估智能体在该任务上的能力，论文构建了第一个专门的基准测试 WideSearch ，包含200 个源于真实世界、横跨18 
+L6: 个领域的高质量任务。 通过对超过10 个 ...
+L7:   * 【1†GAIA: 一个严苛的智能体基准- HuggingFace; publish_date: none†www.cnblogs.com】 Jul 9,
+L8:  2024 — 我们使用一个用库构建的代码智能体 在GAIA 基准上进行测试，这可以说是最困难、最全面的智能体基准测试……最终我们取得了第一名的成绩！ 
+L9: GAIA: 一个严苛的 ...
+L10:   * 【2†AI搜索智能体遭遇新挑战：滑铁卢大学团队提出更公平透明的 ...; publish_date: none†www.techwalker.com】 
+L11: Aug 14, 2025 — 
+L12: 目前评测AI搜索智能体主要依靠BrowseComp这样的基准测试，它就像一场实时的开卷考试，让AI在真实的网络环境中搜索信息来回答复杂问题。听起来很合理 ...
+L13:   * 【3†Agentic AI基础设施实践经验系列（六）：Agent质量评估 - AWS; publish_date: 
+L14: none†aws.amazon.com】 Sep 19, 2025 — TAU-bench 
+L15: 是一个评估AI智能体在真实世界环境中可靠性的基准测试。它评估智能体是否能够在动态的多轮对话中与用户进行交互，理解需求并完成任务。T-bench ...
+L16:   * 【4†DeepAgent：能自己找工具的通用推理智能体 - 高瓴人工智能学院; publish_date: none†ai.ruc.edu.cn】 
+L17: Nov 6, 2025 — 在八大基准测试中，DeepAgent在绝大多数任务上全面领先所有基线模型。 
+L18: 开放环境优势:在最具挑战的“开放工具检索”场景下(如ToolBench)，其成功率达到64%，远 ...
+L19:   * 【5†BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试; publish_date: none†blog.csdn.net】 Sep 
+L20: 22, 2025 — 该基准测试由OpenAI团队开发，旨在推动更可信赖和可靠的AI代理研究。 核心特点. 
+L21: 挑战性问题设计：BrowseComp的问题设计遵循严格的难度标准：. 人类创建者确保 ...
+L22:   * 【6†什么是GAIA？; publish_date: none†huggingface.co】 什么是GAIA？ GAIA 
+L23: 是一个用于评估AI助手在需要核心能力组合的真实世界任务上的表现的基准，这些核心能力包括推理、多模态理解、网页浏览和熟练的工具使用。
+L24:   * 【7†SuperCLUE-Agent: Agent智能体中文原生任务能力测评基准; publish_date: 
+L25: none†www.cluebenchmarks.com】 AI 
+L26: Agent是一个能够自主理解、规划决策和执行复杂任务的智能体，有代表性的Agent比如AutoGPT、AgentGPT等。 
+L27: 现有关于Agent能力的测评，主要是在英文场景或任务的测评。目前还 ...
+L28:   * 【8†Gaia2 与ARE：赋能社区的智能体评测; publish_date: none†hub.baai.ac.cn】 Sep 24, 2025 — 
+L29: 快速评估任意智能体：基于真实或模拟数据，测试不同规则、工具、内容和验证方式下的表现 · 测试智能体的 工具调用与编排能力：可结合本地应用或MCP 工具 · 生成
+L30:  ...
+L31:   * 【9†许多AI 智能体评测基准并不可靠; publish_date: none†my.oschina.net】 Aug 13, 2025 — 
+L32: 当前AI智能体基准测试存在严重缺陷，亟需更严谨的评估框架。作者提出AI智能体基准测试核查清单（ABC），包含43个项目，旨在识别基准测试潜在陷阱，构建可靠评估 
+L33: ...</result>
+</function_results>
+
+<think>第一个搜索结果（针对“search agent benchmark 2024”）显示了一些相关条目，例如“HSCodeComp: A Realistic and Expert-level Benchmark for ...”、“open-compass/GTA - A Benchmark for General Tool Agents”、“10 AI agent benchmarks”、“WISER-Search benchmark”、“Benchmarks and Tree Search for Multimodal LLM Web Agents”、“τ-bench”、“WebVoyager benchmark”、“FutureSearch Benchmarks”。这些可能都与搜索智能体相关。第二个搜索结果（针对“搜索智能体 基准测试”）显示了一些中文资料，例如“WideSearch”、“BrowseComp”、“GAIA”、“SuperCLUE-Agent”等。这些都可能与搜索智能体相关。为了全面了解这些基准测试，我们需要同时打开并分析其中几个最有潜力的结果。具体来说，我们将打开第一个搜索结果中的“HSCodeComp”（索引0）和“open-compass/GTA”（索引1），以及第二个搜索结果中的“WideSearch”（索引0）和“BrowseComp”（索引5）。</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">0</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">0</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">1</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">1</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">5</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[2]  (https://ar5iv.org/html/2510.19631v1)
+**viewing lines [0 - 119] of 983**
+
+L0: 
+L1: URL: https://ar5iv.org/html/2510.19631v1
+L2: # HSCodeComp: A Realistic and Expert-level Benchmark for Deep Search Agents in 
+L3: Hierarchical Rule Application
+L4: 
+L5: Yiqian Yang† Tian Lan† Qianghuai Jia∗ Li Zhu Hui Jiang Hang Zhu Longyue Wang 
+L6: Weihua Luo Kaifu Zhang
+L7: 
+L8: Alibaba International Digital Commerce∗* Corresponding Author: Qianghuai Jia 
+L9: (qianghuai.jqh@alibaba-inc.com)
+L10: †\dagger Equal Contribution: Yiqian Yang
+L11: 
+L12:  Tian Lan
+L13: 
+L14: ###### Abstract
+L15: 
+L16: Abstract
+L17: 
+L18: Effective deep search agents must not only access open-domain and domain-
+L19: specific knowledge but also apply complex rules—such as legal clauses, medical 
+L20: manuals and tariff rules. These rules often feature vague boundaries and 
+L21: implicit logic relationships, making precise application challenging for agents.
+L22:  However, this critical capability is largely overlooked by current agent 
+L23: benchmarks. To fill this gap, we introduce HSCodeComp, the first realistic, 
+L24: expert-level e-commerce benchmark designed to evaluate deep search agents in 
+L25: hierarchical rule application. In this task, the deep reasoning process of 
+L26: agents is guided by these rules to predict 10-digit Harmonized System Code 
+L27: (HSCode) of products with noisy but realistic descriptions. These codes, 
+L28: established by the World Customs Organization, are vital for global supply chain
+L29:  efficiency. Built from real-world data collected from large-scale e-commerce 
+L30: platforms, our proposed HSCodeComp comprises 632 product entries spanning 
+L31: diverse product categories, with these HSCodes annotated by several human 
+L32: experts. Extensive experimental results on several state-of-the-art LLMs, open-
+L33: source, and closed-source agents reveal a huge performance gap: best agent 
+L34: achieves only 46.8% 10-digit accuracy, far below human experts at 95.0%. 
+L35: Besides, detailed analysis demonstrates the challenges of hierarchical rule 
+L36: application, and test-time scaling fails to improve performance further.
+L37: 
+L38: ## 1 Introduction
+L39: 
+L40: Deep search agents have demonstrated significant value in solving complex real-
+L41: world problems, where robust external knowledge utilization constitutes a 
+L42: critical capability [Wu et al., 2025, Tao et al., 2025, Li et al., 2025b]. To 
+L43: evaluate this capability, numerous established benchmarks are proposed to assess
+L44:  agents in utilizing open-domain data (e.g., GAIA [Mialon et al., 2023b] and 
+L45: BrowseComp [Wei et al., 2025]) and domain-specific data (e.g., WebMall [Peeters 
+L46: et al., 2025a], FinSearchComp [Hu et al., 2025a] and MedBrowseComp [Yu et al., 
+L47: 2025b]).
+L48: 
+L49: Beyond open-domain and domain-specific data, agents also need to effectively 
+L50: apply rules that encode human expert knowledge, particularly in scenarios like 
+L51: law, medical and e-commerce [Li et al., 2025a, Chen et al., 2025b, Yao et al., 
+L52: 2022, Chollet et al., 2025]. For instance, legal case adjudication require 
+L53: interpreting abstract legal provisions, and accurate e-commerce product 
+L54: classification in depends on tariff rules [Grainger, 2024]. Previous works have 
+L55: defined rule application as using specific logical rules with supporting facts 
+L56: to derive conclusions [Wang et al., 2024, Servantez et al., 2024]. In contrast, 
+L57: we define it as a core capability for deep search agents, where human-written 
+L58: rules are systematically applied to guide complex reasoning and decision-making 
+L59: [Sadowski and Chudziak, 2025]. Building on this observation, we categorize 
+L60: knowledge data for deep search agents into three levels (Figure 1, left), with 
+L61: increasing knowledge complexity: (1) Level 1: Open-domain Data - Tests 
+L62: understanding and deep reasoning abilities of agents on long-form web content. 
+L63: Established benchmarks include GAIA [Mialon et al., 2023b] and BrowseComp [Wei 
+L64: et al., 2025]; (2) Level 2: Structured Data - Assesses agents to precisely 
+L65: utilize structured data such as databases and knowledge graphs, as seen in 
+L66: domain-specific benchmarks like WebMall [Peeters et al., 2025a], MedBrowseComp 
+L67: [Chen et al., 2025b] and FinSearchComp [Hu et al., 2025a]; (3) Level 3: Rule 
+L68: Data - Evaluates agents to apply complex and abstract rules [Chollet et al., 
+L69: 2025]. This level presents two key challenges: (a) making accurate decisions 
+L70: when rules contain vague natural language descriptions [Sadowski and Chudziak, 
+L71: 2025]; and (b) reasoning about logical dependencies among rules, such as 
+L72: exception clauses and cross-category relationships [Guha et al., 2023]. Despite 
+L73: the importance of rule application in real-world scenarios, current agent 
+L74: benchmarks largely overlook its evaluation.
+L75: 
+L76: To fill this gap, we introduce HSCodeComp (short for the Harmonized System Code 
+L77: (HSCode) Competition), the first realistic, expert-level e-commerce benchmark 
+L78: designed to evaluate agents in predicting complete 10-digit Harmonized System 
+L79: Code (HSCode) of the product, using hierarchical rules (e.g., eWTP tariff 
+L80: rules111https://www.ewtp.com/web/smart/hscode). HSCodes organize products 
+L81: through a hierarchical structure spanning over 5,000 distinct codes across 
+L82: multiple classification levels, representing the global standard for classifying
+L83:  traded international goods, established by the World Customs Organization and 
+L84: implemented across more than 200 countries for customs clearance and tariff 
+L85: determination [Grainger, 2024, Nath et al., 2025]. Built from the data of the 
+L86: large-scale e-commerce platforms, our proposed HSCodeComp comprises 632 
+L87: carefully curated product entries, encompassing 27 unique HS chapters and 32 
+L88: distinct first-level categories. These HSCodes have been rigorously annotated by
+L89:  multiple e-commerce domain experts, ensuring that HSCodeComp is expert-level. 
+L90: Accurately predicting the exact 10-digit HSCode presents significant challenges:
+L91:  agents must perform multi-hop hierarchical reasoning with complex tariff rules 
+L92: while processing noisy but realistic product descriptions that often contain 
+L93: abbreviations, language variations, or incomplete information.
+L94: 
+L95: Extensive experiments on the state-of-the-art baselines, including 14 advanced 
+L96: foundation models, 6 advanced open-source agent systems and 3 closed-source 
+L97: agent systems, demonstrate that HSCode prediction task remains a substantial 
+L98: challenge for current AI approaches. As shown in the Figure 1 (right), even the 
+L99: best-performing system (SmolAgent [Roucher et al., 2025] with GPT-5) achieves 
+L100: only 46.8% accuracy, substantially below the 95.0% accuracy attained by human 
+L101: experts. Further detailed analysis reveals that existing agent systems lack 
+L102: critical capabilities required for this complex hierarchical rule applications. 
+L103: Notably, test-time scaling approach—which has proven effective in other 
+L104: reasoning tasks [Guo et al., 2025, Liu et al., 2025]—fail to improve performance
+L105:  on HSCodeComp. These observations demonstrate the challenging nature of our 
+L106: proposed HSCodeComp, highlighting the need for more effective designs of agent 
+L107: systems. To facilitate future research, we will publicly release codes and the 
+L108: benchmark dataset of HSCodeComp.
+L109: 
+L110: ## 2 Related Works
+L111: 
+L112: ### 2.1 Previous Works in HSCode Prediction
+L113: 
+L114: Previous works treat HSCode prediction as the e-commerce text classification 
+L115: task [Grainger, 2024], using pre-trained BERT models [Liao et al., 2024, Shubham
+L116:  et al., 2022] or Large Language Models (LLMs) prompting [Hussain and Ahmed, 
+L117: 2023]. However, these approaches fail to leverage domain-specific knowledge, 
+L118: especially the rules written by human experts [Hussain and Ahmed, 2023, Judy, 
+L119: 2024]. Besides, existing HSCode benchmarks face two critical limitations [Judy, </result>
+<result>Error fetching URL `https://github.com/open-compass/GTA`</result>
+<result>Error fetching URL `https://zhuanlan.zhihu.com/p/1939316761775301093`</result>
+<result>[3]  (https://blog.csdn.net/qq_41472205/article/details/151974603)
+**viewing lines [0 - 167] of 187**
+
+L0: 
+L1: URL: https://blog.csdn.net/qq_41472205/article/details/151974603
+L2: # BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试
+L3: 
+L4: BrowseComp：AI浏览能力评估基准
+L5: 
+L6: 最新推荐文章于 2025-11-12 13:40:20 发布
+L7: 
+L8: 原创 于 2025-09-22 22:33:04 发布 · 1.3k 阅读
+L9: 
+L10: · 9
+L11: · 25 · 
+L12: CC 4.0 BY-SA版权
+L13: 
+L14: 版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。
+L15: 
+L16: ## BrowseComp：为浏览智能体设计的简单而具挑战性的基准测试
+L17: 
+L18: 在人工智能从基础聊天机器人向推理器和智能体发展的进程中，具备浏览互联网能力的人工智能模型正变得越来越重要。今天，我们将介绍一个名为BrowseComp的创新基准
+L19: 测试，它专门设计用于评估AI代理在复杂网络浏览任务中的能力。
+L20: 
+L21: ### 什么是BrowseComp？
+L22: 
+L23: BrowseComp（全称Browsing Competition）是一个包含1,266个挑战性问题的基准测试集，专门用于衡量AI代理在互联网上持续导航、寻找难
+L24: 以找到的纠缠信息的能力。该基准测试由OpenAI团队开发，旨在推动更可信赖和可靠的AI代理研究。
+L25: 
+L26: #### 核心特点
+L27: 
+L28: 挑战性问题设计：BrowseComp的问题设计遵循严格的难度标准：
+L29: 
+L30: - 人类创建者确保问题在10分钟内无法被人解决
+L31: - 现有模型（包括带浏览功能的ChatGPT和早期版本的OpenAI Deep Research）无法解决
+L32: - 通过5次简单Google搜索无法在结果首页找到答案
+L33: 
+L34: 简单易验证：尽管问题极具挑战性，但答案形式简单——都是短字符串，便于自动验证模型输出的正确性。
+L35: 
+L36: ### 为什么需要BrowseComp？
+L37: 
+L38: #### 现有基准的局限性
+L39: 
+L40: 传统的信息检索基准（如TriviaQA、HotpotQA等）主要关注易于查找的信息，随着语言模型的进步，这些基准已经趋于饱和。而BrowseComp专注于那些需
+L41: 要浏览大量网站才能解决的"硬核"问题。
+L42: 
+L43: #### 模拟真实挑战
+L44: 
+L45: BrowseComp问题通常采用"逆向设计"方法：创建者从一个已知事实出发，构建一个搜索空间巨大但验证简单的问题。例如：
+L46: 
+L47: “找出2018-2023年间在EMNLP会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”
+L48: 
+L49: 这类问题验证简单，但解决起来需要检查数千篇论文并调查每位作者的背景。
+L50: 
+L51: ### 数据集特点
+L52: 
+L53: #### 主题多样性
+L54: 
+L55: BrowseComp涵盖了广泛的主题领域（如图2所示），包括历史、科学、文化等。创建者被鼓励基于个人兴趣设计问题，这有助于提高数据质量和参与度。
+L56: 
+L57: #### 质量保证
+L58: 
+L59: 为确保答案的唯一性，创建者需要：
+L60: 
+L61: - 对问题内容有足够了解，确信没有其他有效答案
+L62: - 如果不确定，则添加更多约束条件
+L63: - 接受其他创建者的验证反馈
+L64: 
+L65: ### 人类表现基准
+L66: 
+L67: 为了衡量BrowseComp的难度，研究人员让人类创建者尝试解决问题（不能解答自己创建的问题）。结果显示：
+L68: 
+L69: - **70.8%**的问题在2小时搜索后人类选择放弃
+L70: - **29.2%**的问题被成功解决
+L71: - 在解决的问题中，**86.4%**的人类答案与参考答案一致
+L72: 
+L73: 这表明BrowseComp确实极具挑战性，即使是熟悉数据集的人类专家也难以在有限时间内解决大部分问题。
+L74: 
+L75: ### AI模型表现评估
+L76: 
+L77: #### 各模型对比
+L78: 
+L79: 研究人员评估了多种模型在BrowseComp上的表现：
+L80: 
+L81: 模型 | 准确率(%) | 校准误差(%) 
+L82: ---|---|---
+L83: GPT-4o | 0.6 | 69 
+L84: GPT-4o（带浏览） | 1.9 | 82 
+L85: GPT-4.5 | 0.9 | 68 
+L86: OpenAI o1 | 9.9 | 65 
+L87: Deep Research | 51.5 | 91 
+L88: 
+L89: #### 关键发现
+L90: 
+L91: - 基础模型表现不佳：GPT-4o和GPT-4.5准确率接近零，凸显了基准的难度
+L92: - 浏览功能带来有限提升：启用浏览功能的GPT-4o准确率略有提高，但仍很低
+L93: - 推理能力的重要性：OpenAI o1虽然没有浏览能力，但凭借更强的推理能力获得较高准确率
+L94: - 专业模型的优势：专门为持久网络浏览训练的Deep Research模型解决了约一半的问题
+L95: 
+L96: #### 计算资源与性能关系
+L97: 
+L98: 研究表明，BrowseComp性能随测试时计算资源的增加而平滑提升（如图1所示）。这与智能体模型的特性一致——更多计算资源允许模型浏览更多网站，从而提高找到正确
+L99: 答案的机会。
+L100: 
+L101: ### 进阶策略分析
+L102: 
+L103: #### 聚合策略的效果
+L104: 
+L105: 通过让模型多次尝试同一问题并采用投票策略，可以显著提升性能：
+L106: 
+L107: - 多数投票：选择样本中最常见的答案
+L108: - 加权投票：根据模型置信度加权投票
+L109: - 最佳选择：选择置信度最高的答案
+L110: 
+L111: 这些方法将Deep Research的性能提升了15-25%，表明模型通常能够识别自己的正确答案。
+L112: 
+L113: #### 任务难度分布
+L114: 
+L115: 分析显示，BrowseComp中的任务难度分布广泛：
+L116: 
+L117: - 16%的任务被Deep Research完美解决（100%通过率）
+L118: - 14%的任务完全失败（0%通过率）
+L119: - 其余任务处于中间难度水平
+L120: 
+L121: ### BrowseComp的意义与局限性
+L122: 
+L123: #### 作为评估工具的价值
+L124: 
+L125: BrowseComp可被视为浏览智能体的"编程竞赛"——虽然不全面，但对核心浏览能力提供了有用的衡量。正如在编程竞赛中表现出色的模型很可能在其他编码任务中表现良
+L126: 好，能够解决BrowseComp的模型在定位难以查找信息方面应该具有很强的能力。
+L127: 
+L128: #### 已知局限性
+L129: 
+L130: - 不涉及真实用户查询分布：如生成长回答或解决查询歧义等挑战
+L131: - 答案唯一性假设：虽然尽力确保，但不能完全保证没有其他有效答案
+L132: - 单模态限制：当前仅关注文本信息检索，未来可扩展至图像、视频等模态
+L133: 
+L134: ### 结论与展望
+L135: 
+L136: BrowseComp为评估AI代理的网络浏览能力提供了一个具有挑战性且易于使用的基准。它强调的持久性、创造性和战略性浏览能力，正是未来AI智能体在实际应用中所需
+L137: 的核心技能。
+L138: 
+L139: 随着更多研究团队在BrowseComp上测试他们的模型，我们期待看到浏览智能体技术的快速进步，最终使AI能够更可靠地帮助我们获取那些深藏在互联网角落的宝贵信息。
+L140: 
+L141: 访问地址：BrowseComp已在GitHub上开源，地址为 https://github.com/openai/simple-evals
+L142: 
+L143: 添加红包
+L144: 
+L145: 请填写红包祝福语或标题
+L146: 
+L147: 个
+L148: 
+L149: 红包个数最小为10个
+L150: 
+L151: 元
+L152: 
+L153: 红包金额最低5元
+L154: 
+L155: 当前余额3.43元 前往充值 >
+L156: 
+L157: 需支付：10.00元
+L158: 
+L159: 成就一亿技术人!
+L160: 
+L161: hope_wisdom
+L162:  发出的红包 
+L163: 实付元
+L164: 
+L165: 点击重新获取
+L166: 
+L167: 扫码支付</result>
+</function_results>
+
+<think>打开的HSCodeComp页面详细介绍了这个基准测试，它是一个用于评估深度搜索智能体在分层规则应用方面能力的基准测试，特点包括：基于真实世界电商数据、任务复杂、需要多跳层次推理、使用嘈杂的产品描述，并且标注由专家完成。使用场景是评估智能体在规则应用方面的能力，例如关税代码分类。例题是预测产品的10位HS编码。打开的BrowseComp页面详细介绍了这个基准测试，它是一个用于评估浏览智能体（即能够在互联网上导航、搜索信息的智能体）的基准测试。其特点包括：包含1266个具有挑战性的问题，需要持续导航和搜索难以查找的信息；答案形式简单，便于自动验证；问题设计确保人类在10分钟内无法解决，且现有模型也无法解决；涵盖了广泛的主题领域。使用场景是评估智能体在复杂网络浏览任务中的能力。例题是：“找出2018-2023年间在EMNLP会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”。另外两个打开的页面（open-compass/GTA和WideSearch）出现了错误，可能链接已失效或需要其他访问方式。为了进一步获取其他相关基准测试的信息，我们需要同时进行多项搜索。考虑到用户可能对搜索智能体基准测试有广泛需求，我们将同时搜索“WebVoyager benchmark”、“WebArena benchmark”、“GAIA benchmark”、“ToolBench benchmark”、“HotpotQA benchmark”、“FEVER benchmark”、“TriviaQA benchmark”、“Natural Questions benchmark”、“MS MARCO benchmark”、“BEIR benchmark”、“MIRACL benchmark”、“Zero-shot IR benchmark”、“WebGPT benchmark”、“WebShop benchmark”、“ALFWorld benchmark”、“Mind2Web benchmark”、“VisualWebArena benchmark”、“SearchBench benchmark”、“WebVLN benchmark”、“WebNav benchmark”。这些搜索将帮助我们全面了解现有基准测试的概况。</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebVoyager benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebArena benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">GAIA benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">ToolBench benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">HotpotQA benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">FEVER benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">TriviaQA benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">Natural Questions benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">MS MARCO benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">BEIR benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">MIRACL benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">Zero-shot IR benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebGPT benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebShop benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">ALFWorld benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">Mind2Web benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">VisualWebArena benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">SearchBench benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebVLN benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">WebNav benchmark</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[4] WebVoyager benchmark (Search_Results/WebVoyager benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/WebVoyager benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebVoyager Benchmark; publish_date: none†www.trykura.com】 Performance 
+L5: measured across various websites in WebVoyager shows Kura consistently 
+L6: outperforming other agents, with particularly strong results on e-commerce ...
+L7:   * 【1†WebVoyager: Building an End-to-End Web Agent with ...; publish_date: 
+L8: none†github.com】 WebVoyager is an innovative Large Multimodal Model (LMM) 
+L9: powered web agent that can complete user instructions end-to-end by interacting 
+L10: with real-world ...
+L11:   * 【2†AI Browser Agent Leaderboard | Steel.dev; publish_date: 
+L12: none†leaderboard.steel.dev】 See how various AI browser agents stack up based on 
+L13: their accuracy in completing web-based tasks on the WebVoyager benchmark.
+L14:   * 【3†[2401.13919] WebVoyager: Building an End-to-End Web ...; publish_date: 
+L15: none†arxiv.org】 by H He · 2024 · Cited by 282 — We show that WebVoyager achieves
+L16:  a 59.1% task success rate on our benchmark, significantly surpassing the 
+L17: performance of both GPT-4 (All ...
+L18:   * 【4†Our Agent-E SOTA Results on the WebVoyager Benchmark; publish_date: 
+L19: none†www.emergence.ai】 Jul 11, 2024 — WebVoyager is a benchmark that tests an 
+L20: agent's capabilities for navigation on dynamic live websites. It is more 
+L21: representative than WebArena [4] ...
+L22:   * 【5†Browser Use = state of the art Web Agent; publish_date: none†browser-
+L23: use.com】 Dec 15, 2024 — Browser Use has achieved state-of-the-art performance on
+L24:  the WebVoyager benchmark, with an impressive 89.1% success rate across 586 
+L25: diverse web tasks.
+L26:   * 【6†Magnitude achieves SOTA 94% on WebVoyager benchmark; publish_date: 
+L27: none†github.com】 Magnitude achieves state-of-the-art performance with 93.9% 
+L28: success rate on WebVoyager, beating all other browser agents.
+L29:   * 【7†WebVoyager: Autonomous Web Agent Benchmark; publish_date: 
+L30: none†www.emergentmind.com】 3 days ago — WebVoyager Benchmark is a comprehensive 
+L31: evaluation suite for autonomous web agents, featuring 643 tasks across 15 
+L32: popular websites.
+L33:   * 【8†WebVoyager Benchmark Results; publish_date: none†www.browserable.ai】 
+L34: Browserable has achieved 90.4% on the WebVoyager benchmark. This is best-in-
+L35: class performance across all web agents. This was done across 567 web tasks 
+L36: which ...
+L37:   * 【9†89% achieved on WebVoyager using Anchor + Browser Use; publish_date: 
+L38: none†www.reddit.com】 Thanks to the amazing work from the browser-use open-source
+L39:  community and the built-in support from Anchor Browser, we've hit an 89% score 
+L40: on WebVoyager.</result>
+<result>[5] WebArena benchmark (Search_Results/WebArena benchmark)
+**viewing lines [0 - 42] of 42**
+
+L0: 
+L1: URL: Search_Results/WebArena benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebArena: A Realistic Web Environment for Building ...; publish_date: 
+L5: none†webarena.dev】 Our benchmark is implemented in our fully interactable 
+L6: highly-realistic WebArena environment. It features diverse tasks human may 
+L7: encounter in their daily ...
+L8:   * 【1†[2307.13854] WebArena: A Realistic Web Environment for ...; publish_date:
+L9:  none†arxiv.org】 by S Zhou · 2023 · Cited by 637 — Building upon our 
+L10: environment, we release a set of benchmark tasks focusing on evaluating the 
+L11: functional correctness of task completions.
+L12:   * 【2†WebArena: A Realistic Web Environment for Building ...; publish_date: 
+L13: none†www.cmu.edu】 WebArena introduces a benchmark on interpreting high-level 
+L14: realistic natural language command to concrete web-based interactions. We 
+L15: provide annotated programs ...
+L16:   * 【3†GitHub - web-arena-x/webarena: Code repo for ...; publish_date: 
+L17: none†github.com】 [12/20/2024] Check out our new benchmark on even more 
+L18: consequential tasks, including terminal use and coding, TheAgentCompany. 
+L19: [12/21/2023] We release the ...
+L20:   * 【4†WebArena Benchmark and the State of Agentic AI; publish_date: 
+L21: none†medium.com】 In short, WebArena established a new standard for realism and 
+L22: complexity in web agent evaluation, forcing AI agents to operate in dynamic, 
+L23: high- ...
+L24:   * 【5†WebArena: A Realistic Web Environment for Building ...; publish_date: 
+L25: none†huggingface.co】 Jul 25, 2023 — WebArena, a realistic and reproducible 
+L26: environment, evaluates the performance of autonomous agents performing complex 
+L27: tasks on websites using ...
+L28:   * 【6†WebArena Benchmark: Evaluating Web Agents; publish_date: 
+L29: none†www.emergentmind.com】 Jun 30, 2025 — WebArena Benchmark is a self-contained
+L30:  suite that evaluates autonomous agents on realistic, multi-step web tasks using
+L31:  natural language ...
+L32:   * 【7†VisualWebArena is a benchmark for multimodal agents.; publish_date: 
+L33: none†github.com】 VisualWebArena is a realistic and diverse benchmark for 
+L34: evaluating multimodal autonomous language agents. It comprises of a set of 
+L35: diverse and complex web-based ...
+L36:   * 【8†WebDev Arena Leaderboard - LMArena; publish_date: none†web.lmarena.ai】 
+L37: WebDev Arena is a real-time AI coding competition where models go head-to-head 
+L38: in web development challenges, developed by LMArena.
+L39:   * 【9†WebArena: A Realistic Web Environment for Building ...; publish_date: 
+L40: none†arxiv.org】 Apr 16, 2024 — We use this benchmark to evaluate several agents 
+L41: that can follow NL command and perform web-based tasks (§4). These agents are 
+L42: implemented in a ...</result>
+<result>[6] GAIA benchmark (Search_Results/GAIA benchmark)
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/GAIA benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†GAIA Leaderboard - a Hugging Face Space by ...; publish_date: 
+L5: none†huggingface.co】 GAIA is a benchmark which aims at evaluating next-
+L6: generation LLMs (LLMs with augmented capabilities due to added tooling, 
+L7: efficient prompting, access to search ...
+L8:   * 【1†[2311.12983] GAIA: a benchmark for General AI Assistants; publish_date: 
+L9: none†arxiv.org】 by G Mialon · 2023 · Cited by 367 — GAIA proposes real-world 
+L10: questions that require a set of fundamental abilities such as reasoning, multi-
+L11: modality handling, web browsing, and generally tool-use ...
+L12:   * 【2†GAIA benchmark; publish_date: none†huggingface.co】 This is the 
+L13: organisation page for all things related to GAIA, a benchmark for General AI 
+L14: Assistants. You can find all the information and links on the GAIA ...
+L15:   * 【3†GAIA: A Benchmark for General AI Assistants; publish_date: 
+L16: none†ukgovernmentbeis.github.io】 This is an Inspect AI implementation of the 
+L17: GAIA (General AI Assistants) benchmark, consisting of 450 questions testing tool
+L18:  use on realistic assistant tasks.
+L19:   * 【4†GAIA: a benchmark for general AI assistants | Research; publish_date: 
+L20: none†ai.meta.com】 May 6, 2024 — GAIA proposes real-world questions that require 
+L21: a set of fundamental abilities such as reasoning, multi-modality handling, web 
+L22: browsing, and generally tool-use ...
+L23:   * 【5†HAL: GAIA Leaderboard; publish_date: none†hal.cs.princeton.edu】 GAIA is a
+L24:  benchmark for General AI Assistants that requires a set of fundamental 
+L25: abilities such as reasoning, multi-modality handling, web browsing, and tool- 
+L26: ...
+L27:   * 【6†GAIA: The LLM Agent Benchmark Everyone's Talking About; publish_date: 
+L28: none†towardsdatascience.com】 May 29, 2025 — GAIA stands for General AI 
+L29: Assistants benchmark [1]. This benchmark was introduced to specifically evaluate
+L30:  LLM agents on their ability to act as general- ...
+L31:   * 【7†GAIA: a benchmark for General AI Assistants; publish_date: 
+L32: none†openreview.net】 by G Mialon · Cited by 367 — GAIA proposes real-world 
+L33: questions that require a set of fundamental abilities such as reasoning, multi-
+L34: modality handling, web browsing, and generally tool-use ...
+L35:   * 【8†Rethinking AI Evaluation: Introducing the GAIA Benchmark; publish_date: 
+L36: none†medium.com】 The authors introduce GAIA, a benchmark designed to assess the 
+L37: robustness of AI systems across a variety of practical tasks.
+L38:   * 【9†H2O.ai Tops the General AI Assistant (GAIA) Test; publish_date: 
+L39: none†h2o.ai】 Mar 17, 2025 — Our h2oGPTe Agent has once again claimed the #1 spot
+L40:  on the prestigious GAIA (General AI Assistants) benchmark with an impressive 
+L41: 75% accuracy rate.</result>
+<result>[7] ToolBench benchmark (Search_Results/ToolBench benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/ToolBench benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†ToolBench, an evaluation suite for LLM tool manipulation ...; 
+L5: publish_date: none†github.com】 The ToolBench is a benchmark consisting of 
+L6: diverse software tools for real-world tasks. We also provide easy-to-use 
+L7: infrastructure in this repository.
+L8:   * 【1†OpenBMB/ToolBench; publish_date: none†github.com】 [2023/7/27] New version
+L9:  ToolBench is released. ✨Here is an overview of the dataset construction, 
+L10: training, and evaluation. ✨✨Features:.
+L11:   * 【2†Towards Stable Large-Scale Benchmarking on Tool ...; publish_date: 
+L12: none†arxiv.org】 by Z Guo · 2024 · Cited by 100 — We introduce StableToolBench, a
+L13:  benchmark evolving from ToolBench, proposing a virtual API server and stable 
+L14: evaluation system.
+L15:   * 【3†StableToolBench - Zhicheng Guo; publish_date: none†zhichengg.github.io】 
+L16: We introduce StableToolBench, a benchmark evolving from ToolBench, proposing a 
+L17: virtual API server and stable evaluation system.
+L18:   * 【4†ToolBench | EvalScope - Read the Docs; publish_date: 
+L19: none†evalscope.readthedocs.io】 We evaluate the effectiveness of the ToolBench 
+L20: benchmark: ToolBench (Qin et al., 2023b). The task involves integrating API 
+L21: calls to complete tasks.
+L22:   * 【5†Towards Stable Large-Scale Benchmarking on Tool ...; publish_date: 
+L23: none†aclanthology.org】 by Z Guo · 2024 · Cited by 100 — We introduce 
+L24: StableToolBench, a benchmark evolving from ToolBench, proposing a virtual API 
+L25: server and stable evaluation system.
+L26:   * 【6†ML-Tool-Bench: Tool-Augmented Planning for ML Tasks; publish_date: 
+L27: none†openreview.net】 Sep 18, 2025 — In this work, we introduce a comprehensive 
+L28: benchmark for evaluating tool-augmented ML agents using a curated set of 61 
+L29: specialized tools and 15 ...
+L30:   * 【7†-Bench: Benchmarking AI agents for the real-world; publish_date: 
+L31: none†sierra.ai】 Jun 20, 2024 — τ-bench measures an agent's ability to interact 
+L32: with (simulated) human users and programmatic APIs while following domain-
+L33: specific policies in a consistent ...
+L34:   * 【8†ToolEval Leaderboard; publish_date: none†openbmb.github.io】 ToolEval is 
+L35: an automatic evaluator build for tool learning which incorporates two evaluation
+L36:  metrics, Pass Rate and Win Rate(Preference).
+L37:   * 【9†What is the best benchmark dataset for multi-step tool-use?; 
+L38: publish_date: none†www.reddit.com】 I'm a newbie trying to evaluate the 
+L39: performance of different prompts strategies for multi-step tool-using, wondering
+L40:  what is the recommended benchmark dataset ...</result>
+<result>[8] HotpotQA benchmark (Search_Results/HotpotQA benchmark)
+**viewing lines [0 - 39] of 39**
+
+L0: 
+L1: URL: Search_Results/HotpotQA benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†HotpotQA Homepage; publish_date: none†hotpotqa.github.io】 HotpotQA is a 
+L5: question answering dataset featuring natural, multi-hop questions, with strong 
+L6: supervision for supporting facts to enable more explainable ...See more
+L7:   * 【1†HotpotQA: A Dataset for Diverse, Explainable Multi-hop ...; publish_date:
+L8:  none†arxiv.org】 by Z Yang · 2018 · Cited by 3834 — HotpotQA is a dataset with 
+L9: 113k Wikipedia-based question-answer pairs requiring multi-document reasoning, 
+L10: diverse questions, sentence-level ...
+L11:   * 【2†hotpotqa/hotpot_qa · Datasets at Hugging Face; publish_date: 
+L12: none†huggingface.co】 HotpotQA is a new dataset with 113k Wikipedia-based 
+L13: question-answer pairs with four key features: (1) the questions require finding 
+L14: and reasoning over multiple ...See more
+L15:   * 【3†Why You Should Stop Using HotpotQA for AI Agents ...; publish_date: 
+L16: none†qipeng.me】 Jul 1, 2025 — HotpotQA pioneered a class of AI tasks that 
+L17: requires the AI system to autonomously perform multiple steps of reasoning in an
+L18:  open-domain setting.See more
+L19:   * 【4†hotpotqa/hotpot; publish_date: none†github.com】 A dataset for diverse, 
+L20: explainable multi-hop question answering. This repository contains the baseline 
+L21: model code, as well as the entire pipeline of running ...See more
+L22:   * 【5†HotpotQA: Multi-Hop QA Benchmark; publish_date: 
+L23: none†www.emergentmind.com】 Sep 10, 2025 — HotpotQA is a large-scale multi-hop 
+L24: question answering benchmark featuring 112,779 Wikipedia-based Q&A pairs with 
+L25: detailed, sentence-level ...See more
+L26:   * 【6†HotpotQA Dataset | Papers With Code; publish_date: 
+L27: none†paperswithcode.com】 HotpotQA is a question answering dataset collected on 
+L28: the English Wikipedia, containing about 113K crowd-sourced questions.See more
+L29:   * 【7†HotpotQA: A Dataset for Diverse, Explainable Multi-hop ...; publish_date:
+L30:  none†aclanthology.org】 by Z Yang · 2018 · Cited by 3834 — HotpotQA is a dataset
+L31:  with 113k Wikipedia-based question-answer pairs requiring multi-document 
+L32: reasoning, diverse questions, sentence-level facts, and factoid ...
+L33:   * 【8†Benchmark BM25S: HotpotQA; publish_date: none†www.kaggle.com】 Explore and
+L34:  run machine learning code with Kaggle Notebooks | Using data from No attached 
+L35: data sources.
+L36:   * 【9†mteb/hotpotqa · Datasets at Hugging Face; publish_date: 
+L37: none†huggingface.co】 HotpotQA is a question answering dataset featuring natural,
+L38:  multi-hop questions, with strong supervision for supporting facts to enable 
+L39: more explainable ...See more</result>
+<result>[9] FEVER benchmark (Search_Results/FEVER benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/FEVER benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†Fever.ai; publish_date: none†fever.ai】 We are pleased to announce that 
+L5: FEVER9 will be co-located with EACL 2026. In this year's workshop, we will 
+L6: introduce a new shared task focused on automated fact ...
+L7:   * 【1†a Large-scale Dataset for Fact Extraction and VERification; publish_date:
+L8:  none†aclanthology.org】 by J Thorne · 2018 · Cited by 2315 — In this paper we 
+L9: introduce a new publicly available dataset for verification against textual 
+L10: sources, FEVER: Fact Extraction.
+L11:   * 【2†awslabs/fever: FEVER (Fact Extraction and VERification) ...; 
+L12: publish_date: none†github.com】 In this paper we introduce a new publicly 
+L13: available dataset for verification against textual sources, FEVER: Fact 
+L14: Extraction and VERification.
+L15:   * 【3†FEVER: Fact Extraction and VERification; publish_date: 
+L16: none†www.amazon.science】 The best accuracy we achieve on labeling a claim 
+L17: accompanied by the correct evidence is 31.87%, while if we ignore the evidence 
+L18: we achieve 50.91%. Thus we ...
+L19:   * 【4†FEVER Dataset; publish_date: none†fever.ai】 FEVER (Fact Extraction and 
+L20: VERification) consists of 185,445 claims generated by altering sentences 
+L21: extracted from Wikipedia and subsequently verified ...
+L22:   * 【5†mteb/fever · Datasets at Hugging Face; publish_date: none†huggingface.co】
+L23:  FEVER. An MTEB dataset. Massive Text Embedding Benchmark. FEVER (Fact 
+L24: Extraction and VERification) consists of 185,445 claims generated by altering 
+L25: sentences ...
+L26:   * 【6†FEVEROUS: Fact Extraction and VERification Over ...; publish_date: 
+L27: none†datasets-benchmarks-proceedings.neurips.cc】 by R Aly · Cited by 359 — In 
+L28: this paper we introduce a novel dataset and benchmark, Fact Extraction and 
+L29: VERification Over. Unstructured and Structured information (FEVEROUS), which ...
+L30:   * 【7†a large-scale dataset for Fact Extraction and VERification; publish_date:
+L31:  none†arxiv.org】 by J Thorne · 2018 · Cited by 2315 — In this paper we introduce
+L32:  a new publicly available dataset for verification against textual sources, 
+L33: FEVER: Fact Extraction and VERification.
+L34:   * 【8†FEVER Resources; publish_date: none†fever.ai】 2018 FEVER: a large-scale 
+L35: dataset for Fact Extraction and VERification .bib James Thorne, Andreas Vlachos,
+L36:  Christos Christodoulopoulos, Arpit Mittal
+L37:   * 【9†a Large-scale Dataset for Fact Extraction and VERification; publish_date:
+L38:  none†www.semanticscholar.org】 This paper introduces a new publicly available 
+L39: dataset for verification against textual sources, FEVER, which consists of 
+L40: 185,445 claims generated by ...</result>
+<result>[10] TriviaQA benchmark (Search_Results/TriviaQA benchmark)
+**viewing lines [0 - 35] of 35**
+
+L0: 
+L1: URL: Search_Results/TriviaQA benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†TriviaQA; publish_date: none†nlp.cs.washington.edu】 TriviaQA is a reading
+L5:  comprehension dataset containing over 650K question-answer-evidence triples. 
+L6: TriviaQA includes 95K question-answer pairs authored ...
+L7:   * 【1†TriviaQA: A Large Scale Distantly Supervised Challenge ...; publish_date:
+L8:  none†aclanthology.org】 by M Joshi · 2017 · Cited by 3451 — We present TriviaQA,
+L9:  a challenging reading comprehension dataset containing over 650K question-
+L10: answer-evidence triples. TriviaQA includes 95K question ...
+L11:   * 【2†mandarjoshi/trivia_qa · Datasets at Hugging Face; publish_date: 
+L12: none†huggingface.co】 TriviaqQA is a reading comprehension dataset containing 
+L13: over 650K question-answer-evidence triples. TriviaqQA includes 95K question-
+L14: answer pairs authored by ...
+L15:   * 【3†[1705.03551] TriviaQA: A Large Scale Distantly Supervised ...; 
+L16: publish_date: none†arxiv.org】 by M Joshi · 2017 · Cited by 3451 — We present 
+L17: TriviaQA, a challenging reading comprehension dataset containing over 650K 
+L18: question-answer-evidence triples.
+L19:   * 【4†TriviaQA; publish_date: none†epoch.ai】 An open-domain question answering 
+L20: benchmark with challenging trivia questions paired with evidence documents.
+L21:   * 【5†TriviaQA Leaderboard; publish_date: none†llm-stats.com】 What is the 
+L22: TriviaQA benchmark? A large-scale reading comprehension dataset containing over 
+L23: 650K question-answer-evidence triples. TriviaQA includes 95K ...
+L24:   * 【6†Code for the TriviaQA reading comprehension dataset; publish_date: 
+L25: none†github.com】 A large scale distantly supervised challenge dataset for 
+L26: reading comprehension. In Association for Computational Linguistics (ACL) 2017, 
+L27: Vancouver, Canada.
+L28:   * 【7†TriviaQA - Model Benchmarks - The Regularizer; publish_date: 
+L29: none†www.theregularizer.com】 May 4, 2025 — Compare the performance of different 
+L30: AI models across standardized benchmarks. Higher scores generally indicate 
+L31: better performance, but context ...
+L32:   * 【8†TriviaQA: A Large Scale Distantly Supervised Challenge ...; publish_date:
+L33:  none†www.cs.utexas.edu】 by M Joshi · Cited by 3445 — We present TriviaQA, a 
+L34: challenging reading comprehension dataset contain- ing over 650K question-
+L35: answer-evidence triples. TriviaQA includes 95K question-.</result>
+<result>[11] Natural Questions benchmark (Search_Results/Natural Questions benchmark)
+**viewing lines [0 - 39] of 39**
+
+L0: 
+L1: URL: Search_Results/Natural Questions benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†Natural Questions: a Benchmark for Question Answering ...; publish_date: 
+L5: none†research.google】 by T Kwiatkowski · Cited by 4339 — We present the Natural 
+L6: Questions corpus, a question answering dataset. Questions consist of real 
+L7: anonymized, aggregated queries issued to the Google search ...
+L8:   * 【1†Natural Questions: A Benchmark for Question Answering ...; publish_date: 
+L9: none†aclanthology.org】 by T Kwiatkowski · Cited by 4308 — Abstract. We present 
+L10: the Natural Questions corpus, a question answering data set. Questions consist 
+L11: of real anonymized, aggregated queries issued.
+L12:   * 【2†Google's Natural Questions; publish_date: none†ai.google.com】 Natural 
+L13: Questions. A Benchmark for Question Answering Research. View examples · Download
+L14:  dataset. Open Domain Question Answering. A core goal in artificial ...
+L15:   * 【3†google-research-datasets/natural-questions; publish_date: 
+L16: none†github.com】 Natural Questions (NQ) contains real user questions issued to 
+L17: Google search, and answers found from Wikipedia by annotators. NQ is designed 
+L18: for the training and ...
+L19:   * 【4†Natural Questions: A Benchmark for Question Answering ...; publish_date: 
+L20: none†direct.mit.edu】 Aug 1, 2019 — We present the Natural Questions corpus, a 
+L21: question answering data set. Questions consist of real anonymized, aggregated 
+L22: queries issued to the Google search ...
+L23:   * 【5†ir_datasets : Natural Questions; publish_date: none†ir-datasets.com】 
+L24: Google Natural Questions is a Q&A dataset containing long, short, and Yes/No 
+L25: answers from Wikipedia. ir_datasets frames this around an ad-hoc ranking setting
+L26:  ...
+L27:   * 【6†sentence-transformers/natural-questions · Datasets at ...; publish_date: 
+L28: none†huggingface.co】 This dataset is a collection of question-answer pairs from 
+L29: the Natural Questions dataset. See Natural Questions for additional information.
+L30:   * 【7†Google's Natural Questions; publish_date: none†ai.google.com】 Natural 
+L31: Questions contains 307K training examples, 8K examples for development, and a 
+L32: further 8K examples for testing. In the paper, we demonstrate a human ...
+L33:   * 【8†A Benchmark for Question Answering Research; publish_date: 
+L34: none†www.researchgate.net】 Jul 27, 2025 — We present the Natural Questions 
+L35: corpus, a question answering data set. Questions consist of real anonymized, 
+L36: aggregated queries issued to the Google search ...
+L37:   * 【9†natural-questions; publish_date: none†docs.unity.rc.umass.edu】 Sep 4, 
+L38: 2025 — “Natural questions: a benchmark for question answering research.” 
+L39: Transactions of the Association for Computational Linguistics 7 (2019): ...</result>
+<result>[12] MS MARCO benchmark (Search_Results/MS MARCO benchmark)
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/MS MARCO benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†MS MARCO - Microsoft Open Source; publish_date: none†microsoft.github.io】
+L5:  The MS MARCO datasets are intended for non-commercial research purposes only to
+L6:  promote advancement in the field of artificial intelligence and related areas, 
+L7: ...
+L8:   * 【1†microsoft/ms_marco · Datasets at Hugging Face; publish_date: 
+L9: none†huggingface.co】 Starting with a paper released at NIPS 2016, MS MARCO is a 
+L10: collection of datasets focused on deep learning in search. The first dataset was
+L11:  a question ...
+L12:   * 【2†Benchmarking Ranking Models in the Large-Data Regime; publish_date: 
+L13: none†arxiv.org】 by N Craswell · 2021 · Cited by 89 — This paper uses the MS 
+L14: MARCO and TREC Deep Learning Track as our case study, comparing it to the case 
+L15: of TREC ad hoc ranking in the 1990s.
+L16:   * 【3†Benchmarking Ranking Models in the Large-Data Regime; publish_date: 
+L17: none†www.microsoft.com】 This paper uses the MS MARCO and TREC Deep Learning 
+L18: Track as our case study, comparing it to the case of TREC ad hoc ranking in the 
+L19: 1990s. We show how the ...
+L20:   * 【4†Datasets for Document and Passage Ranking Leadboards; publish_date: 
+L21: none†microsoft.github.io】 The MS MARCO document and passage ranking leaderboards
+L22:  complements the TREC Deep Learning Track by providing on-going evaluation of 
+L23: submissions using pre- ...
+L24:   * 【5†MS MARCO: Benchmarking Ranking Models in the Large- ...; publish_date: 
+L25: none†dl.acm.org】 Jul 11, 2021 — This paper uses the MS MARCO and TREC Deep 
+L26: Learning Track as our case study, comparing it to the case of TREC ad hoc 
+L27: ranking in the 1990s.
+L28:   * 【6†ir_datasets : MSMARCO (passage); publish_date: none†ir-datasets.com】 A 
+L29: passage ranking benchmark with a collection of 8.8 million passages and question
+L30:  queries. Most relevance judgments are shallow.
+L31:   * 【7†MS MARCO; publish_date: none†sbert.net】 MS MARCO Passage Ranking is a 
+L32: large dataset to train models for information retrieval. It consists of about 
+L33: 500k real search queries from Bing search engine ...
+L34:   * 【8†MS MARCO: A Human Generated MAchine Reading ...; publish_date: 
+L35: none†arxiv.org】 by P Bajaj · 2016 · Cited by 1151 — We introduce a large scale 
+L36: MAchine Reading COmprehension dataset, which we name MS MARCO. The dataset 
+L37: comprises of 1,010,916 anonymized ...
+L38:   * 【9†MS MARCO Web Search: A Large-scale Information-rich ...; publish_date: 
+L39: none†www.microsoft.com】 May 13, 2024 — MS MARCO Web Search offers a retrieval 
+L40: benchmark with three web retrieval challenge tasks that demands innovations in 
+L41: both machine learning and ...</result>
+<result>[13] BEIR benchmark (Search_Results/BEIR benchmark)
+**viewing lines [0 - 37] of 37**
+
+L0: 
+L1: URL: Search_Results/BEIR benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†详细介绍文本检索基准BEIR: A Heterogeneous Benchmark ...; publish_date: 
+L5: none†blog.csdn.net】 2023年1月1日 — 
+L6: BEIR旨在为所有不同的检索任务提供一个一站式的零样本评估基准。为了构建一个全面的评估基准，选择方法对于收集具有理想属性的任务和数据集至关重要。对于 ...
+L7:   * 【1†beir-cellar/beir; publish_date: none†github.com】 BEIR is a heterogeneous 
+L8: benchmark containing diverse IR tasks. It also provides a common and easy 
+L9: framework for evaluation of your NLP-based retrieval models ...
+L10:   * 【2†BEIR: A Heterogenous Benchmark for Zero-shot Evaluation ...; 
+L11: publish_date: none†arxiv.org】 作者：N Thakur · 2021 · 被引用次数：1480 — We introduce 
+L12: Benchmarking-IR (BEIR), a robust and heterogeneous evaluation benchmark for 
+L13: information retrieval.
+L14:   * 【3†BeIR; publish_date: none†huggingface.co】 BEIR (Benchmarking IR) consists 
+L15: of a homogenous benchmark for diverse sentence or passage level IR tasks. It 
+L16: provides a common and easy framework for the cross ...
+L17:   * 【4†论文分享：BEIR A Heterogeneous Benchmark for Zero-shot ...; publish_date: 
+L18: none†zhuanlan.zhihu.com】 2022年10月3日 — 分享论文，夹带个人理解的分享，建议结合原论文看。 1 研究背景. 
+L19: 本论文主要关注的领域是query-document检索（下文简称qd检索），即根据query去文档库里 ...
+L20:   * 【5†Benchmarking IR Information Retrieval (BEIR); publish_date: 
+L21: none†zilliz.com】 BEIR is a benchmark designed for evaluating the versatility and
+L22:  robustness of information retrieval models. It features 18 diverse datasets 
+L23: from domains like ...
+L24:   * 【6†BEIR (Benchmarking IR) - OpenDataLab; publish_date: none†opendatalab.com】
+L25:  简介-Introduction. BEIR（Benchmarking 
+L26: IR）是包含不同信息检索（IR）任务的异构基准。通过BEIR，可以系统地研究多种神经检索方法的零样本泛化能力。
+L27:   * 【7†What is the BEIR benchmark and how is it used?; publish_date: 
+L28: none†milvus.io】 The BEIR (Benchmarking Information Retrieval) benchmark is a 
+L29: standardized framework designed to evaluate the effectiveness of search and 
+L30: retrieval algorithms.
+L31:   * 【8†BEIR Benchmark数据集卡片; publish_date: none†www.atyun.com】 BEIR 
+L32: Benchmark数据集卡片. 数据集简介. BEIR是一个异构评测基准，由18个多样化的数据集构建而成，代表了9个信息检索任务：. 事实查证： FEVER ，
+L33:  Climate-FEVER ， SciFact ...
+L34:   * 【9†Evaluating search relevance part 1 - The BEIR benchmark; publish_date: 
+L35: none†www.elastic.co】 2024年7月16日 — Learn to evaluate your search system in the 
+L36: context of better understanding the BEIR benchmark, with tips & techniques to 
+L37: improve your ...</result>
+<result>[14] MIRACL benchmark (Search_Results/MIRACL benchmark)
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/MIRACL benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†MIRACL | Multilingual Information Retrieval Across a ...; publish_date: 
+L5: none†project-miracl.github.io】 MIRACL (Multilingual Information Retrieval Across
+L6:  a Continuum of Languages) is an WSDM 2023 Cup challenge that focuses on search 
+L7: across 18 different ...
+L8:   * 【1†project-miracl/miracl: A large-scale multilingual dataset for ...; 
+L9: publish_date: none†github.com】 A large-scale multilingual dataset for 
+L10: Information Retrieval. Thorough human-annotations across 18 diverse languages.
+L11:   * 【2†A Large, multilingual, visual document retrieval benchmark; publish_date:
+L12:  none†arxiv.org】 by R Osmulski · 2025 · Cited by 2 — MIRACL-VISION is a 
+L13: challenging, representative, multilingual evaluation benchmark for visual 
+L14: retrieval pipelines and will help the community build robust ...
+L15:   * 【3†miracl/miracl · Datasets at Hugging Face; publish_date: 
+L16: none†huggingface.co】 MIRACL (Multilingual Information Retrieval Across a 
+L17: Continuum of Languages) is a multilingual retrieval dataset that focuses on 
+L18: search across 18 different ...
+L19:   * 【4†MIRACL: A Multilingual Retrieval Dataset Covering 18 ...; publish_date: 
+L20: none†direct.mit.edu】 by X Zhang · 2023 · Cited by 131 — MIRACL is a multilingual
+L21:  dataset for ad hoc retrieval across 18 languages that collectively encompass 
+L22: over three billion native speakers around the world.
+L23:   * 【5†(PDF) MIRACL-VISION: A Large, multilingual, visual ...; publish_date: 
+L24: none†www.researchgate.net】 May 23, 2025 — MIRACL-VISION covers 18 languages, and
+L25:  is an extension of the MIRACL dataset, a popular benchmark to evaluate text-
+L26: based multilingual retrieval ...
+L27:   * 【6†A Large, multilingual, visual document retrieval benchmark; publish_date:
+L28:  none†arxiv.org】 by R Osmulski · 2025 · Cited by 2 — MIRACL-VISION is a 
+L29: challenging, representative, multilingual evaluation benchmark for visual 
+L30: retrieval pipelines and will help the community ...
+L31:   * 【7†ir_datasets : MIRACL; publish_date: none†ir-datasets.com】 
+L32: "miracl/ar/test-a". The held-out test set (version a) for Arabic. 
+L33: queriesdocsCitationMetadata. 936 queries. Language: ar. Query type: 
+L34: GenericQuery: (namedtuple).
+L35:   * 【8†Evaluate on MIRACL — BGE documentation; publish_date: none†bge-model.com】
+L36:  MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is 
+L37: an WSDM 2023 Cup challenge that focuses on search across 18 different languages.
+L38:   * 【9†MIRACL - Alpha's Tech Garden; publish_date: 
+L39: none†techgarden.alphasmanifesto.com】 MIRACL (Multilingual Information Retrieval 
+L40: Across a Continuum of Languages) is a multilingual dataset we have built for the
+L41:  WSDM 2023 Cup ...</result>
+<result>[15] Zero-shot IR benchmark (Search_Results/Zero-shot IR benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/Zero-shot IR benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†BEIR: A Heterogenous Benchmark for Zero-shot Evaluation ...; 
+L5: publish_date: none†arxiv.org】 by N Thakur · 2021 · Cited by 1480 — We introduce 
+L6: Benchmarking-IR (BEIR), a robust and heterogeneous evaluation benchmark for 
+L7: information retrieval.See more
+L8:   * 【1†beir-cellar/beir; publish_date: none†github.com】 BEIR: A Heterogenous 
+L9: Benchmark for Zero-shot Evaluation of Information Retrieval Models (NeurIPS 
+L10: 2021, Datasets and Benchmarks Track); Resources for Brewing ...See more
+L11:   * 【2†Benchmarking IR Information Retrieval (BEIR); publish_date: 
+L12: none†zilliz.com】 BEIR is a tool to evaluate how well Information Retrieval 
+L13: systems perform across many tasks and types of information, and is a standard 
+L14: benchmark.
+L15:   * 【3†BEIR: A Heterogeneous Benchmark for Zero-shot ...; publish_date: 
+L16: none†datasets-benchmarks-proceedings.neurips.cc】 by N Thakur · Cited by 1480 — 
+L17: BEIR is a robust, heterogeneous benchmark for information retrieval, using 18 
+L18: datasets and 9 tasks to evaluate model generalization.
+L19:   * 【4†BEIR; publish_date: none†eval.ai】 BEIR is a heterogeneous zero-shot 
+L20: retrieval benchmark containing 18 datasets from diverse text retrieval tasks and
+L21:  domains.See more
+L22:   * 【5†[2409.15763] IRSC: A Zero-shot Evaluation Benchmark for ...; 
+L23: publish_date: none†arxiv.org】 by H Lin · 2024 · Cited by 2 — This paper 
+L24: introduces the IRSC benchmark for evaluating the performance of embedding models
+L25:  in multilingual RAG tasks.See more
+L26:   * 【6†FactIR: A Real-World Zero-shot Open-Domain Retrieval ...; publish_date: 
+L27: none†dl.acm.org】 May 23, 2025 — In this paper, we present a real-world retrieval
+L28:  benchmark FactIR, derived from Factiverse production logs, enhanced with human 
+L29: annotations. We ...See more
+L30:   * 【7†UniIR: Training and Benchmarking Universal Multimodal ...; publish_date: 
+L31: none†tiger-ai-lab.github.io】 At test time, we evaluated the zero-shot 
+L32: performance of all fine-tuned models, as well as SoTA pre-trained retrievers on 
+L33: the three held-out datasets. UniIR ...See more
+L34:   * 【8†Zero-Shot BEIR Tasks; publish_date: none†www.emergentmind.com】 Aug 26, 
+L35: 2025 — Zero-Shot BEIR Tasks are evaluation methodologies that assess IR models' 
+L36: ability to generalize to unseen query domains without task-specific ...See more
+L37:   * 【9†BEIR-PL: Zero Shot Information Retrieval Benchmark for ...; publish_date:
+L38:  none†aclanthology.org】 by K Wojtasik · 2024 · Cited by 12 — BEIR-PL is a new 
+L39: benchmark with 13 datasets for Polish Information Retrieval, created to advance 
+L40: research in this area.</result>
+<result>[16] WebGPT benchmark (Search_Results/WebGPT benchmark)
+**viewing lines [0 - 38] of 38**
+
+L0: 
+L1: URL: Search_Results/WebGPT benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebGPT: Improving the factual accuracy of language ...; publish_date: 
+L5: none†openai.com】 Dec 16, 2021 — Our models outperform GPT‑3 on TruthfulQA and 
+L6: exhibit more favourable scaling properties. However, our models lag behind human
+L7:  performance, ...
+L8:   * 【1†A Simple Yet Challenging Benchmark for Browsing Agents; publish_date: 
+L9: none†arxiv.org】 by J Wei · 2025 · Cited by 124 — Abstract. We present 
+L10: BrowseComp, a simple yet challenging benchmark for measuring the ability for 
+L11: agents to browse the web.
+L12:   * 【2†openai/webgpt_comparisons · Datasets at Hugging Face; publish_date: 
+L13: none†huggingface.co】 This is the dataset of all comparisons that were marked as 
+L14: suitable for reward modeling by the end of the WebGPT project. There are 19,578 
+L15: comparisons in total.
+L16:   * 【3†Evaluation & Limitations of WebGPT, WebVoyager & Agent-E; publish_date: 
+L17: none†deepsense.ai】 Oct 14, 2024 — WebArena benchmark features 812 tasks 
+L18: evaluated using metrics such as Exact Match, Must Include, and Fuzzy Match, 
+L19: focusing on outcomes rather ...
+L20:   * 【4†OpenAI Announces Question-Answering AI WebGPT; publish_date: 
+L21: none†www.infoq.com】 Jan 25, 2022 — On the TriviaQA benchmark, WebGPT 
+L22: outperformed GPT-3, producing answers that were true 75% of the time, and "both 
+L23: true and informative" 54% of ...
+L24:   * 【5†WebGPT: Improving the factual accuracy of language models ...; 
+L25: publish_date: none†kargarisaac.medium.com】 The top-performing model generated 
+L26: answers that were preferred over 56% of the time compared to answers produced by
+L27:  human demonstrators, with ...
+L28:   * 【6†Browser-assisted question-answering with human feedback; publish_date: 
+L29: none†www.alphaxiv.org】 WebGPT represents a significant advancement in long-form 
+L30: question answering by combining the language generation capabilities of GPT-3 
+L31: with real-time web ...
+L32:   * 【7†Benchmarking Open-Source Large Language Models, GPT-4 ...; publish_date: 
+L33: none†ai.nejm.org】 by S Wu · 2024 · Cited by 69 — We show that the current widely
+L34:  used open-source LLMs have poor zero-shot reasoning ability in nephrology 
+L35: compared with GPT-4 and Claude 2.
+L36:   * 【8†0hq/WebGPT: Run GPT model on ...; publish_date: none†github.com】 WebGPT 
+L37: is a vanilla JS and HTML implementation of a transformer model, intended as a 
+L38: proof-of-concept as well as educational resource.</result>
+<result>[17] WebShop benchmark (Search_Results/WebShop benchmark)
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/WebShop benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: 
+L5: none†arxiv.org】 by S Yao · 2022 · Cited by 710 — To bridge this gap, we develop 
+L6: WebShop -- a simulated e-commerce website environment with 1.18 million real-
+L7: world products and 12,087 crowd- ...
+L8:   * 【1†WebShop; publish_date: none†webshop-pnlp.github.io】 To bridge this gap, 
+L9: we develop WebShop – a simulated e-commerce website environment with 1.18 
+L10: million real-world products and 12,087 crowd-sourced text ...
+L11:   * 【2†princeton-nlp/WebShop; publish_date: none†github.com】 WebShop is a 
+L12: simulated e-commerce website environment with 1.18 million real-world products 
+L13: and 12,087 crowd-sourced text instructions. In this environment, an ...
+L14:   * 【3†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: 
+L15: none†papers.nips.cc】 by S Yao · 2022 · Cited by 710 — We collect over 1,600 
+L16: human trajectories to first validate the benchmark, then train and evaluate a 
+L17: diverse range of agents using reinforcement learning, ...
+L18:   * 【4†WebShop: Towards Scalable Real-World Web Interaction ...; publish_date: 
+L19: none†proceedings.neurips.cc】 by S Yao · 2022 · Cited by 709 — We have developed 
+L20: WebShop, a new web-based benchmark for sequential decision making and language 
+L21: grounding, modeled on interaction with an e-commerce website.
+L22:   * 【5†Webshop & Benchmark Analysis | Documentation Infinity; publish_date: 
+L23: none†docs.fact-finder.com】 Aug 15, 2025 — Evaluation of your shop based on 
+L24: different categories in comparison, to your competitors/industry. Recommended 
+L25: when doing a shop relaunch.
+L26:   * 【6†A Multi-Shop Benchmark for Evaluating Web Agents; publish_date: 
+L27: none†arxiv.org】 by R Peeters · 2025 · Cited by 2 — Compared to existing 
+L28: e-commerce benchmarks, such as WebShop or ShoppingBench, WebMall introduces 
+L29: comparison-shopping tasks across multiple shops ...
+L30:   * 【7†WebShop: towards scalable real-world web interaction with ...; 
+L31: publish_date: none†dl.acm.org】 by S Yao · 2022 · Cited by 710 — To bridge this 
+L32: gap, we develop WebShop - a simulated e-commerce website environment with 1.18 
+L33: million real-world products and 12, 087 crowd- ...
+L34:   * 【8†[PDF] WebShop: Towards Scalable Real-World Web ...; publish_date: 
+L35: none†www.semanticscholar.org】 It is shown that agents trained on WebShop exhibit
+L36:  non-trivial sim-to-real transfer when evaluated on amazon.com and ebay.com, 
+L37: indicating the potential ...
+L38:   * 【9†X-WebAgentBench: A Multilingual Interactive Web ...; publish_date: 
+L39: none†aclanthology.org】 by P Wang · 2025 · Cited by 3 — (2023) based on the 
+L40: English WebShop benchmark (Yao et al., 2022), while the multilingual task scores
+L41:  are ob- tained through evaluation on our own benchmark.</result>
+<result>[18] ALFWorld benchmark (Search_Results/ALFWorld benchmark)
+**viewing lines [0 - 31] of 31**
+
+L0: 
+L1: URL: Search_Results/ALFWorld benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†ALFWorld; publish_date: none†alfworld.github.io】 ALFWorld contains 
+L5: interactive TextWorld environments (Côté et. al) that parallel embodied worlds 
+L6: in the ALFRED dataset (Shridhar et. al).
+L7:   * 【1†ALFWorld: Aligning Text and Embodied Environments for ...; publish_date: 
+L8: none†arxiv.org】 by M Shridhar · 2020 · Cited by 674 — ALFWorld enables the 
+L9: creation of a new BUTLER agent whose abstract knowledge, learned in TextWorld, 
+L10: corresponds directly to concrete, visually grounded actions.
+L11:   * 【2†ALFWorld: Aligning Text and Embodied Environments ...; publish_date: 
+L12: none†github.com】 ALFWorld contains interactive TextWorld environments (Côté et. 
+L13: al) that parallel embodied worlds in the ALFRED dataset (Shridhar et. al).
+L14:   * 【3†alfworld - benchmark's activity; publish_date: none†huggingface.co】 MM-
+L15: IQ: Benchmarking Human-Like Abstraction and Reasoning in Multimodal Models Paper
+L16:  • 2502.00698 • Published Feb 1 • 24
+L17:   * 【4†Tackling AlfWorld with Action Attention and Common ...; publish_date: 
+L18: none†neurips.cc】 On the Alfworld benchmark for indoor instruction following, we 
+L19: achieve a significantly higher success rate (50% over the baseline) with our 
+L20: novel object ...
+L21:   * 【5†ALFWORLD: ALIGNING TEXT AND EMBODIED ...; publish_date: 
+L22: none†openreview.net】 by M Shridhar · Cited by 674 — The ALFRED dataset (Shridhar
+L23:  et al., 2020), set in the THOR simulator (Kolve et al., 2017), is a benchmark 
+L24: for learning to com- plete embodied household tasks ...
+L25:   * 【6†AlfWorld; publish_date: none†primo.ai】 Mar 23, 2024 — A simulator that 
+L26: enables agents to learn abstract, text based policies in TextWorld (Côté et al.,
+L27:  2018) and then execute goals from the ALFRED benchmark.
+L28:   * 【7†AlfWorld performance across 134 tasks showing cumulative...; 
+L29: publish_date: none†www.researchgate.net】 In the AlfWorld benchmark, we defined 
+L30: hallucination as the occurrence of two or more consecutive identical actions in 
+L31: which the environment responded with ...</result>
+<result>[19] Mind2Web benchmark (Search_Results/Mind2Web benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/Mind2Web benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†Mind2Web: Towards a Generalist Agent for the Web; publish_date: none†osu-
+L5: nlp-group.github.io】 Mind2Web is a dataset for developing and evaluating 
+L6: generalist agents for the web that can follow language instructions to complete 
+L7: complex tasks on any ...
+L8:   * 【1†Online-Mind2Web Leaderboard; publish_date: none†huggingface.co】 Online-
+L9: Mind2Web is a benchmark designed to evaluate the real-world performance of web 
+L10: agents on live websites, featuring 300 tasks across 136 popular sites ...
+L11:   * 【2†Mind2Web: Towards a Generalist Agent for the Web; publish_date: 
+L12: none†github.com】 Mind2Web is the first dataset for developing and evaluating 
+L13: generalist agents for the web that can follow language instructions to complete 
+L14: complex tasks on any ...
+L15:   * 【3†HAL: Online Mind2Web Leaderboard; publish_date: 
+L16: none†hal.cs.princeton.edu】 Online Mind2Web leaderboard for evaluating AI agents'
+L17:  ability to complete tasks on real, changing webpages.
+L18:   * 【4†[2506.21506] Mind2Web 2: Evaluating Agentic Search with ...; 
+L19: publish_date: none†arxiv.org】 by B Gou · 2025 · Cited by 11 — In this paper, we 
+L20: introduce Mind2Web 2, a benchmark of 130 realistic, high-quality, and long-
+L21: horizon tasks that require real-time web browsing and extensive ...
+L22:   * 【5†Mind2Web 2: Evaluating Agentic Search with Agent-as-a-Judge; 
+L23: publish_date: none†osu-nlp-group.github.io】 We introduce Mind2Web 2, a benchmark
+L24:  of 130 realistic, high-quality, long-horizon tasks that require real-time web 
+L25: browsing and extensive information ...
+L26:   * 【6†Mind2Web: The Benchmark for AI Agent Evaluation and ...; publish_date: 
+L27: none†www.enhans.ai】 Sep 26, 2025 — Mind2Web is a globally recognized web-based 
+L28: AI Agent evaluation benchmark introduced by the NLP group at Ohio State 
+L29: University at NeurIPS 2023.
+L30:   * 【7†Evaluating AI Web Agents: Insights from the WebCanvas ...; publish_date: 
+L31: none†medium.com】 Thanks to the comprehensive WebCanvas Benchmark, which 
+L32: incorporates a robust Mind2Web-Live data set of 542 live web tasks and 2,439 ...
+L33:   * 【8†Mind2Web: Towards a Generalist Agent for the Web; publish_date: 
+L34: none†proceedings.neurips.cc】 by X Deng · 2023 · Cited by 760 — We introduce 
+L35: Mind2Web, the first dataset for developing and evaluating generalist agents for 
+L36: the web that can follow language instructions to complete complex ...
+L37:   * 【9†Mind2Web: Towards a Generalist Agent for the Web; publish_date: 
+L38: none†openreview.net】 by X Deng · Cited by 760 — We introduce Mind2Web, the first
+L39:  dataset for developing and evaluating generalist agents for the web that can 
+L40: follow language instructions to complete complex ...</result>
+<result>[20] VisualWebArena benchmark (Search_Results/VisualWebArena benchmark)
+**viewing lines [0 - 38] of 38**
+
+L0: 
+L1: URL: Search_Results/VisualWebArena benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†VisualWebArena is a benchmark for multimodal agents.; publish_date: 
+L5: none†github.com】 VisualWebArena is a realistic and diverse benchmark for 
+L6: evaluating multimodal autonomous language agents.
+L7:   * 【1†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:
+L8:  none†arxiv.org】 by JY Koh · 2024 · Cited by 363 — To bridge this gap, we 
+L9: introduce VisualWebArena, a benchmark designed to assess the performance of 
+L10: multimodal web agents on realistic \textit{ ...
+L11:   * 【2†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:
+L12:  none†jykoh.com】 To bridge this gap, we introduce VisualWebArena, a benchmark 
+L13: designed to assess the performance of multimodal web agents on realistic 
+L14: visually grounded tasks.
+L15:   * 【3†VisualWebArena: Evaluating Multimodal Agents on ...; publish_date: 
+L16: none†arxiv.org】 VisualWebArena is a research benchmark to measure and evaluate 
+L17: the progress of multimodal agents. It is primarily meant to act as a self-
+L18: contained sandbox ...
+L19:   * 【4†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:
+L20:  none†aclanthology.org】 by JY Koh · 2024 · Cited by 363 — To bridge this gap, we
+L21:  introduce VisualWebArena, a benchmark designed to assess the performance of 
+L22: multimodal web agents on *realistic visually grounded tasks*.
+L23:   * 【5†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:
+L24:  none†www.semanticscholar.org】 VisualWebArena: Evaluating Multimodal Agents on 
+L25: Realistic Visual Web Tasks ... MMInA, a multihop and multimodal benchmark to 
+L26: evaluate the embodied agents ...
+L27:   * 【6†CMU Researchers Introduce VisualWebArena: An AI ...; publish_date: 
+L28: none†www.marktechpost.com】 Feb 9, 2024 — VisualWebArena, a benchmark designed 
+L29: and developed to evaluate the performance of multimodal web agents on realistic 
+L30: and visually stimulating challenges.
+L31:   * 【7†Evaluating Multimodal Agents on Realistic Visual Web Tasks; publish_date:
+L32:  none†www.themoonlight.io】 The paper "VisualWebArena: Evaluating Multimodal 
+L33: Agents on Realistic Visually Grounded Web Tasks" introduces a new benchmark, 
+L34: **VisualWebArena**, ...
+L35:   * 【8†WebArena: A Realistic Web Environment for Building ...; publish_date: 
+L36: none†webarena.dev】 Our benchmark is implemented in our fully interactable 
+L37: highly-realistic WebArena environment. It features diverse tasks human may 
+L38: encounter in their daily ...</result>
+<result>[21] SearchBench benchmark (Search_Results/SearchBench benchmark)
+**viewing lines [0 - 40] of 40**
+
+L0: 
+L1: URL: Search_Results/SearchBench benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†Talc-AI/search-bench; publish_date: none†github.com】 A practical 
+L5: benchmark that focuses on every day helpfulness of LLM products, not just the 
+L6: underlying models. Searchbench is a benchmark that addresses these ...
+L7:   * 【1†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: 
+L8: none†arxiv.org】 These capabilities are essential for robust reasoning, making 
+L9: SearchBench a valuable benchmark for evaluating LLMs' reasoning capabilities as 
+L10: they continue to ...
+L11:   * 【2†NasimBrz/SearchBench · Datasets at Hugging Face; publish_date: 
+L12: none†huggingface.co】 Dataset Summary. SearchBench is a benchmark designed to 
+L13: evaluate Language Models' (LLMs) ability to solve state-based problems that 
+L14: require combinatorial search ...
+L15:   * 【3†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: 
+L16: none†openreview.net】 2025年10月22日 — To further investigate this, we introduce a 
+L17: new benchmark, SearchBench, which contains 11 unique search problems inspired by
+L18:  intuitive puzzles.
+L19:   * 【4†Navigating the Labyrinth: Evaluating and Enhancing LLMs' ...; 
+L20: publish_date: none†hub.baai.ac.cn】 2024年6月17日 — 
+L21: 论文提出了一个新的基准测试SearchBench，包含11种独特的搜索问题类型，并自动化生成任意数量的实例和分析解决方案的可行性、正确性和最优性。论文使用A* 
+L22: ...
+L23:   * 【5†Towards Unified Text-based Person Retrieval: A Large- ...; publish_date: 
+L24: none†blog.csdn.net】 2023年10月17日 — ... Search 
+L25: Benchmark（面向统一的基于文本的人物检索：一个大规模的多属性和语言搜索基准）; 研究背景. 相关工作; BENCHMARK. 论文方法分析. 网络框架;
+L26:  1、APTM ...
+L27:   * 【6†Desearch-ai/ai-search-benchmark; publish_date: none†github.com】 The 
+L28: SearchBench repository addresses common issues with traditional benchmarks by 
+L29: focusing on practical, everyday use cases rather than theoretical limits. It ...
+L30:   * 【7†o1 results for 3 benchmarks: PlanBench, SearchBench, ...; publish_date: 
+L31: none†www.reddit.com】 o1 results for 3 benchmarks: PlanBench, SearchBench, and 
+L32: Summary of a Haystack. AI. PlanBench: Paper "LLMs Still Can't Plan; Can LRMs? A 
+L33: ...
+L34:   * 【8†Evaluating LLMs' Ability to Reason About Search Problems; publish_date: 
+L35: none†ui.adsabs.harvard.edu】 To further investigate this, we introduce a new 
+L36: benchmark, SearchBench, which contains 11 unique search problems inspired by 
+L37: intuitive puzzles. Each SearchBench ...
+L38:   * 【9†Introducing SearchBench; publish_date: none†www.tag1consulting.com】 
+L39: Toward this goal, over the weekend I launched a new project called SearchBench, 
+L40: a Drupal module for benchmarking Drupal's search performance. As the module ...</result>
+<result>[22] WebVLN benchmark (Search_Results/WebVLN benchmark)
+**viewing lines [0 - 42] of 42**
+
+L0: 
+L1: URL: Search_Results/WebVLN benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebVLN: Vision-and-Language Navigation on Websites; publish_date: 
+L5: none†ojs.aaai.org】 by Q Chen · 2024 · Cited by 35 — the WebVLN-v1 dataset, where
+L6:  the performance is far from saturation, highlighting the utility of our 
+L7: WebVLN-v1 as a benchmark to assess progress in this field.
+L8:   * 【1†[2312.15820] WebVLN: Vision-and-Language Navigation on Websites; 
+L9: publish_date: none†ar5iv.labs.arxiv.org】 Experimental results show that WebVLN-
+L10: Net outperforms current VLN and web-related navigation methods. ... Code is 
+L11: available at: https://github.com/WebVLN/WebVLN.
+L12:   * 【2†WebVLN: Vision-and-Language Navigation on Websites; publish_date: 
+L13: none†github.com】 Experimental results show that WebVLN-Net outperforms current 
+L14: VLN and web-related navigation methods. We believe that the introduction of the 
+L15: new WebVLN task ...
+L16:   * 【3†Vision-and-Language Navigation in the Real-World; publish_date: 
+L17: none†digital.library.adelaide.edu.au】 By leveraging our proposed WebVLN-v1 
+L18: dataset, experimental results showcase the superior performance of WebVLN-Net 
+L19: compared to existing VLN and web-related ...
+L20:   * 【4†WebVLN: Vision-and-Language Navigation on Websites; publish_date: 
+L21: none†www.researchgate.net】 Experimental results show that WebVLN-Net outperforms
+L22:  current VLN and web-related navigation methods. We believe that the 
+L23: introduction of the newWebVLN task and ...
+L24:   * 【5†[PDF] WebVLN: Vision-and-Language Navigation on Websites; publish_date: 
+L25: none†www.semanticscholar.org】 A new task named Vision-and-Language Navigation on
+L26:  Websites (WebVLN), where question-based instructions are used to train an 
+L27: agent, emulating how users ...
+L28:   * 【6†WebVLN: Vision-and-Language Navigation on Websites; publish_date: 
+L29: none†arxiv.org】 by Q Chen · 2023 · Cited by 35 — Experimental results show that 
+L30: WebVLN-Net outperforms current VLN and web-related navigation methods. We 
+L31: believe that the introduction of the ...
+L32:   * 【7†Human-Aware Vision-and-Language Navigation; publish_date: 
+L33: none†proceedings.neurips.cc】 by H Li · 2024 · Cited by 19 — Vision-and-Language 
+L34: Navigation (VLN) [2, 7, 9, 40] has emerged as a key benchmark for evaluating. 
+L35: Sim2Real transfer [23], showing impressive performance in ...
+L36:   * 【8†LiveBench; publish_date: none†livebench.ai】 Introducing LiveBench: a 
+L37: benchmark for LLMs designed with test set contamination and objective evaluation
+L38:  in mind.
+L39:   * 【9†MG-VLN: Benchmarking Multi-Goal and Long-Horizon ...; publish_date: 
+L40: none†ieeexplore.ieee.org】 by J Zhang · 2024 — This task aims to provide a 
+L41: simulation benchmark to guide the design of lifelong and long-horizon navigation
+L42:  robots.</result>
+<result>[23] WebNav benchmark (Search_Results/WebNav benchmark)
+**viewing lines [0 - 36] of 36**
+
+L0: 
+L1: URL: Search_Results/WebNav benchmark
+L2: # Search Results
+L3: 
+L4:   * 【0†WebNav: A New Large-Scale Task for Natural Language ...; publish_date: 
+L5: none†github.com】 WebNav is a benchmark task for evaluating an agent with 
+L6: abilities to understand natural language and plan on partially observed 
+L7: environments.
+L8:   * 【1†[1602.02261] End-to-End Goal-Driven Web Navigation; publish_date: 
+L9: none†arxiv.org】 by R Nogueira · 2016 · Cited by 39 — We propose a goal-driven 
+L10: web navigation as a benchmark task for evaluating an agent with abilities to 
+L11: understand natural language and plan on partially ...
+L12:   * 【2†nyu-dl/dl4ir-webnav; publish_date: none†github.com】 WebNav is a benchmark
+L13:  task for evaluating an agent with abilities to understand natural language and 
+L14: plan on partially observed environments.
+L15:   * 【3†WebNav: A New Large-Scale Task for Natural Language ...; publish_date: 
+L16: none†www.researchgate.net】 We propose a goal-driven web navigation as a 
+L17: benchmark task for evaluating an agent with abilities to understand natural 
+L18: language and plan on partially ...
+L19:   * 【4†WebNav: An Intelligent Agent for Voice-Controlled Web ...; publish_date: 
+L20: none†arxiv.org】 In this paper, we introduce WebNav, an innovative multi-modal 
+L21: agent designed to automate web tasks based on natural language.
+L22:   * 【5†WebCanvas: Benchmarking Web Agents in Online ...; publish_date: 
+L23: none†openreview.net】 by Y Pan · Cited by 78 — TL;DR: We introduce WebCanvas, an 
+L24: online evaluation framework for web agents designed to address the dynamic 
+L25: nature of web interactions.
+L26:   * 【6†WebNav: An Intelligent Agent for Voice-Controlled Web ...; publish_date: 
+L27: none†huggingface.co】 Mar 17, 2025 — Preliminary evaluations show that WebNav 
+L28: outperforms traditional screen readers in response time and task completion 
+L29: accuracy for the visually ...
+L30:   * 【7†Web Bench - A new way to compare AI Browser Agents; publish_date: 
+L31: none†www.skyvern.com】 May 29, 2025 — TL;DR: Web Bench is a new dataset to 
+L32: evaluate web browsing agents that consists of 5,750 tasks on 452 different 
+L33: websites, with 2,454 tasks ...
+L34:   * 【8†Web Framework Benchmarks; publish_date: none†www.techempower.com】 In the 
+L35: following tests, we have measured the performance of several web application 
+L36: platforms, full-stack frameworks, and micro-frameworks.</result>
+</function_results>
+
+<think>我们收到了大量关于各种基准测试的搜索结果。现在，我们需要整理一份针对搜索智能体的基准测试列表。这些基准测试可以分为几类：网络浏览智能体基准测试（例如BrowseComp、WebVoyager、WebArena、VisualWebArena、Mind2Web、WebVLN、WebNav）、工具使用智能体基准测试（例如ToolBench、GTA）、问答/检索基准测试（例如HotpotQA、FEVER、TriviaQA、Natural Questions、MS MARCO、BEIR、MIRACL），以及特定领域的搜索基准测试（例如HSCodeComp、WebShop、ALFWorld）。此外，还有一些通用智能体基准测试，例如GAIA、SuperCLUE-Agent等。用户要求“详细介绍各自的特点、使用场景、例题”。因此，我需要为每个基准测试提供详细的描述，包括其特点、使用场景，并举例说明。为了全面获取这些信息，我将同时打开以下基准测试的相关页面：WebVoyager（索引3）、WebArena（索引1）、GAIA（索引1）、ToolBench（索引0）、HotpotQA（索引0）、FEVER（索引1）、TriviaQA（索引0）、Natural Questions（索引0）、MS MARCO（索引0）、BEIR（索引1）、MIRACL（索引0）、WebShop（索引0）、ALFWorld（索引0）、Mind2Web（索引0）、VisualWebArena（索引0）、SearchBench（索引1）、WebVLN（索引1）和WebNav（索引0）。</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">4</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">3</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">5</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">6</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">7</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">9</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">11</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">12</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">13</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">14</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">17</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">18</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">19</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">20</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">21</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">22</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">1</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">23</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[24]  (https://ar5iv.org/abs/2401.13919)
+**viewing lines [0 - 129] of 825**
+
+L0: 
+L1: URL: https://ar5iv.org/abs/2401.13919
+L2: # WebVoyager : Building an End-to-End Web Agent with
+L3: Large Multimodal Models
+L4: 
+L5: Hongliang He1,3 , Wenlin Yao2, Kaixin Ma2, Wenhao Yu2, Yong Dai2,
+L6: 
+L7: Hongming Zhang2,
+L8: Zhenzhong Lan3,
+L9: Dong Yu2
+L10: 1Zhejiang University,
+L11: 2Tencent AI Lab,
+L12: 3Westlake University
+L13: hehongliang@westlake.edu.cn, wenlinyao@global.tencent.com
+L14: Work done during the internship at Tencent AI Lab.
+L15: 
+L16: ###### Abstract
+L17: 
+L18: The advancement of large language models (LLMs) leads to a new era marked by the
+L19:  development of autonomous applications in the real world, which drives 
+L20: innovation in the creation of advanced web-based agents. Existing web agents 
+L21: typically only handle one input modality and are evaluated only in simplified 
+L22: web simulators or static web snapshots, greatly limiting their applicability in 
+L23: real-world scenarios. To bridge this gap, we introduce WebVoyager, an innovative
+L24:  Large Multimodal Model (LMM) powered web agent that can complete user 
+L25: instructions end-to-end by interacting with real-world websites. Moreover, we 
+L26: propose a new evaluation protocol for web agents to address the challenges of 
+L27: automatic evaluation of open-ended web agent tasks, leveraging the robust 
+L28: multimodal comprehension capabilities of GPT-4V. We create a new benchmark by 
+L29: gathering real-world tasks from 15 widely used websites to evaluate our agents. 
+L30: We show that WebVoyager achieves a 55.7% task success rate, significantly 
+L31: surpassing the performance of both GPT-4 (All Tools) and the WebVoyager (text-
+L32: only) setups, underscoring the exceptional capability of WebVoyager in practical
+L33:  applications. We found that our proposed automatic evaluation achieves 85.3% 
+L34: agreement with human judgment, paving the way for further development of web 
+L35: agents in a real-world setting.111Our code and data will be released at 
+L36: https://github.com/MinorJerry/WebVoyager
+L37: 
+L38: ## 1 Introduction
+L39: 
+L40: The recent advancement of large language models (LLMs), such as ChatGPT and 
+L41: GPT-4 (OpenAI, 2023), have sparked significant interest in developing LLM-based 
+L42: autonomous agents (AutoGPT, 2022) for complex task execution (Qin et al., 2023; 
+L43: Schick et al., 2023). Recent studies have explored the construction of text-
+L44: based web browsing environments and how to instruct large language model agents 
+L45: to perform web navigation (Nakano et al., 2021; Gur et al., 2023; Zhou et al., 
+L46: 2023; Lu et al., 2023). The primary challenge in these works lies in managing 
+L47: complex and verbose HTML texts, and solutions include simplifying and 
+L48: structuring HTML (Nakano et al., 2021; Zhou et al., 2023; Gur et al., 2023; Deng
+L49:  et al., 2023).
+L50: 
+L51: However, existing approaches overlook a critical functionality of browsing: 
+L52: rendering HTML into visual webpages. Particularly, vision capability is crucial 
+L53: for utilizing tools like web browsers, as rendered web pages are inherently 
+L54: designed with user experience (UX), emphasizing intuitive information and 
+L55: structured presentation. This design principle of rendering makes visual 
+L56: analysis more effective than mere HTML representation. At present, large 
+L57: multimodal models (LMMs), particularly GPT-4V(ision) (OpenAI, 2023) and Gemini 
+L58: (Team et al., 2023), demonstrate a remarkable ability to integrate intricate 
+L59: visual cues with textual information. Existing studies such as Pix2Struct (Lee 
+L60: et al., 2023) and WebArena (Zhou et al., 2023), have initiated explorations into
+L61:  using screenshots as inputs for decision-making in web navigation, yet these 
+L62: are preliminary and do not represent a deep exploration. Therefore, building 
+L63: multimodal web agents to leverage the environment rendered by browsers through 
+L64: screenshots, thus mimicking human web browsing behavior, is now a viable 
+L65: approach to enhance web navigation efficiency.
+L66: 
+L67: We introduce WebVoyager, a multimodal web agent designed to handle web tasks 
+L68: online in an end-to-end manner, which denotes managing the process from start to
+L69:  finish autonomously without intermediate human intervention. We construct an 
+L70: online environment using Selenium for WebVoyager, feeding it with screenshots 
+L71: and textual content in interactive web elements. Inspired by Set-of-Mark 
+L72: Prompting (Yang et al., 2023a), we mark interactive web elements on screenshots 
+L73: (see Figure 2) to facilitate decision-making for WebVoyager. As a pioneer in 
+L74: combining vision and text information during web navigation, we advocate that 
+L75: autonomous end-to-end task completion, multimodal capabilities and online 
+L76: navigation constitute the essential trajectory toward the genuine intelligence 
+L77: of web agents.
+L78: 
+L79: Another challenge arises when it comes to evaluating an end-to-end web agent 
+L80: with online navigation. Existing benchmarks, such as Mind2Web (Deng et al., 
+L81: 2023), primarily focus on stepwise and offline evaluation, where agents follow 
+L82: predefined “golden” trajectory for action selection. This approach, however, may
+L83:  not fully account for the variety of viable strategies to accomplish a task, as
+L84:  it only reflects one possible plan. This limitation could lead to a biased 
+L85: evaluation and difficulties in fairly comparing different methods. To more 
+L86: accurately gauge the capabilities of web agents in end-to-end task completion, 
+L87: we save screenshots throughout the online navigation process, and then use 
+L88: GPT-4V to evaluate these trajectories and the final results automatically. Human
+L89:  evaluations are also conducted to verify the results and confirm the 
+L90: reliability of GPT-4V as the evaluator.
+L91: 
+L92: We conduct evaluations on a collected dataset, which is semi-automatically 
+L93: generated using a self-instruct (Wang et al., 2022) method, comprising 300 web 
+L94: tasks from 15 commonly accessed websites. Additionally, we extract 90 web-
+L95: related tasks of level 1 and level 2 from the GAIA (Mialon et al., 2023) to 
+L96: enrich our evaluation. We compare our WebVoyager with 1) GPT-4 (All 
+L97: Tools)222GPT-4 (All Tools) is an integrated tool-based agent released by OpenAI 
+L98: in Oct. 2023. See https://chat.openai.com/, and 2) WebVoyager in a text-only 
+L99: setting, employing the accessibility tree proposed in WebArena (Zhou et al., 
+L100: 2023) to describe web pages. The results show that WebVoyager achieves a Task 
+L101: Success Rate of 55.7%, significantly outperforming GPT-4 (All Tools) with a rate
+L102:  of 32.7% and the text-only setting with a rate of 39.0%, demonstrating the 
+L103: effectiveness of our method. Furthermore, we report the consistency between 
+L104: human-human and human-GPT4V to ensure credibility. Our main contributions are as
+L105:  follows:
+L106: 
+L107: - •
+L108: We employ a multimodal web agent that integrates textual and visual information 
+L109: to address web tasks end-to-end and introduce a generalist planning approach for
+L110:  navigation.
+L111: 
+L112: - •
+L113: We build an online web browsing environment, offering a variety of tasks 
+L114: centered on widely used websites and introducing a method for expanding these 
+L115: tasks.
+L116: 
+L117: - •
+L118: We conduct manual evaluations of navigation trajectories and propose an 
+L119: automated evaluation protocol using GPT-4V. We present a comprehensive analysis 
+L120: of the evaluation results and show that GPT-4V can serve as a reliable evaluator
+L121:  for online agents.
+L122: 
+L123: ## 2 Related Work
+L124: 
+L125: ### 2.1 Web Agents
+L126: 
+L127: Autonomous web navigation (Shi et al., 2017; Yang et al., 2023b) requires an 
+L128: agent to follow instructions, construct plans, comprehend complex web 
+L129: structures, and decompose tasks into step-by-step decisions (Weng, 2023). To </result>
+<result>[25]  (https://ar5iv.org/abs/2307.13854)
+**viewing lines [0 - 124] of 1092**
+
+L0: 
+L1: URL: https://ar5iv.org/abs/2307.13854
+L2: # WebArena: A Realistic Web Environment for Building Autonomous Agents
+L3: 
+L4: Shuyan Zhou Frank F. Xu11footnotemark: 1 Hao Zhu Xuhui Zhou22footnotemark: 2
+L5: 
+L6: Robert Lo22footnotemark: 2 Abishek Sridhar22footnotemark: 2 Xianyi Cheng Tianyue
+L7:  Ou
+L8: Yonatan Bisk Daniel Fried Uri Alon Graham Neubig
+L9: Carnegie Mellon University
+L10: {shuyanzh, fangzhex, gneubig}@cs.cmu.edu
+L11: Lead contributors.Equal contribution.
+L12: 
+L13: ###### Abstract
+L14: 
+L15: With advances in generative AI, there is now potential for autonomous agents to 
+L16: manage daily tasks via natural language commands. However, current agents are 
+L17: primarily created and tested in simplified synthetic environments, leading to a 
+L18: disconnect with real-world scenarios. In this paper, we build an environment for
+L19:  language-guided agents that is highly realistic and reproducible. Specifically,
+L20:  we focus on agents that perform tasks on the web, and create an environment 
+L21: with fully functional websites from four common domains: e-commerce, social 
+L22: forum discussions, collaborative software development, and content management. 
+L23: Our environment is enriched with tools (e.g., a map) and external knowledge 
+L24: bases (e.g., user manuals) to encourage human-like task-solving. Building upon 
+L25: our environment, we release a set of benchmark tasks focusing on evaluating the 
+L26: functional correctness of task completions. The tasks in our benchmark are 
+L27: diverse, long-horizon, and designed to emulate tasks that humans routinely 
+L28: perform on the internet. We experiment with several baseline agents, integrating
+L29:  recent techniques such as reasoning before acting. The results demonstrate that
+L30:  solving complex tasks is challenging: our best GPT-4-based agent only achieves 
+L31: an end-to-end task success rate of 14.41%, significantly lower than the human 
+L32: performance of 78.24%. These results highlight the need for further development 
+L33: of robust agents, that current state-of-the-art large language models are far 
+L34: from perfect performance in these real-life tasks, and that WebArena can be used
+L35:  to measure such progress.
+L36: 
+L37: Our code, data, environment reproduction resources, and video demonstrations are
+L38:  publicly available at https://webarena.dev/.
+L39: 
+L40: ## 1 Introduction
+L41: 
+L42: Autonomous agents that perform everyday tasks via human natural language 
+L43: commands could significantly augment human capabilities, improve efficiency, and
+L44:  increase accessibility. Nonetheless, to fully leverage the power of autonomous 
+L45: agents, it is crucial to understand their behavior within an environment that is
+L46:  both authentic and reproducible. This will allow measurement of the ability of 
+L47: agents on tasks that human users care about in a fair and consistent manner.
+L48: 
+L49: Current environments for evaluate agents tend to over-simplify real-world 
+L50: situations. As a result, the functionality of many environments is a limited 
+L51: version of their real-world counterparts, leading to a lack of task diversity 
+L52: (Shi et al., 2017; Anderson et al., 2018; Gordon et al., 2018; Misra et al., 
+L53: 2016; Shridhar et al., 2020; 2021; Yao et al., 2022a). In addition, these 
+L54: simplifications often lower the complexity of tasks as compared to their 
+L55: execution in the real world (Puig et al., 2018; Shridhar et al., 2020; Yao et 
+L56: al., 2022a). Finally, some environments are presented as a static resource (Shi 
+L57: et al., 2017; Deng et al., 2023) where agents are confined to accessing only 
+L58: those states that were previously cached during data collection, thus limiting 
+L59: the breadth and diversity of exploration. Dor evaluation, many environments 
+L60: focus on comparing the textual surface form of the predicted action sequences 
+L61: with reference action sequences, disregarding the functional correctness of the 
+L62: executions and possible alternative solutions (Puig et al., 2018; Jernite et 
+L63: al., 2019; Xu et al., 2021; Li et al., 2020; Deng et al., 2023). These 
+L64: limitations often result in a discrepancy between simulated environments and the
+L65:  real world, and can potentially impact the generalizability of AI agents to 
+L66: successfully understand, adapt, and operate within complex real-world 
+L67: situations.
+L68: 
+L69: We introduce WebArena, a realistic and reproducible web environment designed to 
+L70: facilitate the development of autonomous agents capable of executing tasks (§2).
+L71:  An overview of WebArena is in Figure 1. Our environment comprises four fully 
+L72: operational, self-hosted web applications, each representing a distinct domain 
+L73: prevalent on the internet: online shopping, discussion forums, collaborative 
+L74: development, and business content management. Furthermore, WebArena incorporates
+L75:  several utility tools, such as map, calculator, and scratchpad, to best support
+L76:  possible human-like task executions. Lastly, WebArena is complemented by an 
+L77: extensive collection of documentation and knowledge bases that vary from general
+L78:  resources like English Wikipedia to more domain-specific references, such as 
+L79: manuals for using the integrated development tool (Fan et al., 2022). The 
+L80: content populating these websites is extracted from their real-world 
+L81: counterparts, preserving the authenticity of the content served on each 
+L82: platform. We deliver the hosting services using Docker containers with gym-APIs 
+L83: (Brockman et al., 2016), ensuring both the usability and the reproducibility of 
+L84: WebArena.
+L85: 
+L86: Along with WebArena, we release a ready-to-use benchmark with 812 long-horizon 
+L87: web-based tasks (§3). Each task is described as a high-level natural language 
+L88: intent, emulating the abstract language usage patterns typically employed by 
+L89: humans (Bisk et al., 2019). Two example intents are shown in the upper left of 
+L90: Figure 1. We focus on evaluating the functional correctness of these tasks, 
+L91: i.e., does the result of the execution actually achieve the desired goal (§3.2).
+L92:  For instance, to evaluate the example in Figure 2, our evaluation method 
+L93: verifies the concrete contents in the designated repository. This evaluation is 
+L94: not only more reliable (Zhong et al., 2017; Chen et al., 2021; Wang et al., 
+L95: 2022) than comparing the textual surface-form action sequences (Puig et al., 
+L96: 2018; Deng et al., 2023) but also accommodate a range of potential valid paths 
+L97: to achieve the same goal, which is a ubiquitous phenomenon in sufficiently 
+L98: complex tasks.
+L99: 
+L100: We use this benchmark to evaluate several agents that can follow NL command and 
+L101: perform web-based tasks (§4). These agents are implemented in a few-shot in-
+L102: context learning fashion with powerful large language models (LLMs) such as 
+L103: GPT-4 and PALM-2. Experiment results show that the best GPT-4 agent performance 
+L104: is somewhat limited, with an end-to-end task success rate of only 14.41%, while 
+L105: the human performance is 78.24%. We hypothesize that the limited performance of 
+L106: current LLMs stems from a lack of crucial capabilities such as active 
+L107: exploration and failure recovery to successfully perform complex tasks (§5.2). 
+L108: These outcomes underscore the necessity for further development towards robust 
+L109: and effective agents (LeCun, 2022) in WebArena.
+L110: 
+L111: ## 2 WebArena: Websites as an Environment for Autonomous Agents
+L112: 
+L113: Our goal is to create a realistic and reproducible web environment. We achieve 
+L114: reproducibility by making the environment standalone, without relying on live 
+L115: websites. This circumvents technical challenges such as bots being subject to 
+L116: CAPTCHAs, unpredictable content modifications, and configuration changes, which 
+L117: obstruct a fair comparison across different systems over time. We achieve 
+L118: realism by using open-source libraries that underlie many in-use sites from 
+L119: several popular categories and importing data to our environment from their 
+L120: real-world counterparts.
+L121: 
+L122: ### 2.1 Controlling Agents through High-level Natural Language
+L123: 
+L124: The WebArena environment is denoted asℰ\mathcal{E} with state space </result>
+<result>[26]  (https://ar5iv.org/abs/2311.12983)
+**viewing lines [0 - 118] of 1207**
+
+L0: 
+L1: URL: https://ar5iv.org/abs/2311.12983
+L2: 1]FAIR, Meta 2]HuggingFace 3]AutoGPT 4]GenAI, Meta
+L3: 
+L4: # GAIA: A Benchmark for General AI Assistants
+L5: 
+L6: Grégoire Mialon Clémentine Fourrier Craig Swift Thomas Wolf Yann LeCun Thomas 
+L7: Scialom [ [ [ [ {gmialon,tscialom}@meta.com clementine@huggingface.co
+L8: 
+L9: ###### Abstract
+L10: 
+L11: We introduce GAIA, a benchmark for General AI Assistants that, if solved, would 
+L12: represent a milestone in AI research. GAIA proposes real-world questions that 
+L13: require a set of fundamental abilities such as reasoning, multi-modality 
+L14: handling, web browsing, and generally tool-use proficiency. GAIA questions are 
+L15: conceptually simple for humans yet challenging for most advanced AIs: we show 
+L16: that human respondents obtain 92% vs. 15% for GPT-4 equipped with plugins. This 
+L17: notable performance disparity contrasts with the recent trend of LLMs 
+L18: outperforming humans on tasks requiring professional skills in e.g. law or 
+L19: chemistry. GAIA’s philosophy departs from the current trend in AI benchmarks 
+L20: suggesting to target tasks that are ever more difficult for humans. We posit 
+L21: that the advent of Artificial General Intelligence (AGI) hinges on a system’s 
+L22: capability to exhibit similar robustness as the average human does on such 
+L23: questions. Using GAIA’s methodology, we devise 466 questions and their answer. 
+L24: We release our questions while retaining answers to 300 of them to power a 
+L25: leader-board hereby accessible.
+L26: 
+L27: \correspondence
+L28: 
+L29: ## 1 Introduction
+L30: 
+L31: Large Language Models (LLMs) arguably open the way to general purpose systems. 
+L32: Indeed, the latest among them (OpenAI, 2023; Anthropic, 2023; Anil et al., 2023;
+L33:  Touvron et al., 2023) are fluent, knowledgeable, aligned to some extent with 
+L34: human preferences (Ouyang et al., 2022), and can be augmented (Mialon et al., 
+L35: 2023) with tools such as web browsers or code interpreters in a zero or few-shot
+L36:  setting (Brown et al., 2020). However, evaluating these systems is an open 
+L37: problem: given their emerging new capabilities, LLMs are regularly breaking AI 
+L38: benchmarks, at an ever-increasing rate (Kiela et al., 2023).
+L39: 
+L40: In search for more challenging benchmarks, current trend suggests to seek tasks 
+L41: that are ever more difficult for humans, and challenge LLMs with more intricate 
+L42: educational assessments, for example in STEM and Law, or target more complex 
+L43: realisations, such as writing a coherent book. But, tasks that are difficult for
+L44:  humans are not necessarily difficult for recent systems: the challenging MMLU 
+L45: or GSM8k benchmarks for example (Hendrycks et al., 2021; Cobbe et al., 2021) are
+L46:  already close to be solved,111GPT4 does 86.4% on MMLU. Human non-specialist 
+L47: accuracy on the benchmark is only 34.5% Expert-level human performance is 
+L48: estimated at 89.8%. due to rapid LLM improvement possibly combined with data 
+L49: contamination.222See for example the case of Hellaswag. Furthermore, open-ended 
+L50: generation generally requires human or model-based evaluation (Zheng et al., 
+L51: 2023). Human evaluation will become less and less feasible when increasing the 
+L52: task complexity, e.g. in terms of output length or required skills: how to 
+L53: evaluate a book generated by an AI, or solutions to maths problems that few 
+L54: people in the world can solve? Model-based evaluations on the other hand are by 
+L55: construction dependent of stronger models hence cannot evaluate new state-of-
+L56: the-art models, without mentioning potential subtle biases such as preferring 
+L57: the first choice presented (Zheng et al., 2023). Overall, evaluating new AI 
+L58: systems requires to rethink benchmarks (Chollet, 2019).
+L59: 
+L60: Alternatively to tasks that are harder for humans, AI systems could be asked to 
+L61: solve conceptually simple tasks yet that require accurate execution of complex 
+L62: sequences of actions, with large combinatorial spaces. The output could only be 
+L63: obtained upon successful completion of the task and be easy to validate, 
+L64: analogous to the Proof of Work algorithm (Jakobsson and Juels, 1999; Dwork and 
+L65: Naor, 1993), where a computer is asked to solve a complex problem whose solution
+L66:  is easy to verify. Tasks for AI assistants, given their need for access to a 
+L67: diverse and uncertain world, meet this criterion while being inherently rooted 
+L68: in practical use cases.
+L69: 
+L70: We move in that direction by proposing GAIA, a benchmark for General AI 
+L71: Assistants featuring 466 carefully crafted questions and their answer, along 
+L72: with the associated design methodology. Our questions are easy to create, 
+L73: challenging for AI systems—for LLMs, most require complex generations—, yet 
+L74: admit a unique, factual answer, allowing a simple and robust automatic 
+L75: evaluation.
+L76: 
+L77: GAIA attempts to avoid current pitfalls of LLMs evaluation by targeting:
+L78: 
+L79: Real-world and challenging questions. For example, a LLM will typically need to 
+L80: browse the open and changing web, handle multi-modality, or reason over multiple
+L81:  steps to answer our questions. Conversely, many LLM benchmarks are quite 
+L82: specific and/or restricted to closed and synthetic environments.
+L83: 
+L84: Easy interpretability through conceptually simple tasks—non experts annotators 
+L85: exhibit a near perfect score—, associated reasoning trace, and few but highly 
+L86: curated questions. This is in contrast with aggregated benchmarks that can lack 
+L87: efficiency and reliability (Perlitz et al., 2023).
+L88: 
+L89: Non-gameability. Answering the questions requires successful completion of some 
+L90: number of steps, which cannot easily be brute forced due to their diversity. The
+L91:  possibility to check the reasoning trace, the accuracy required in the answers,
+L92:  their absence in plain text from the internet prevent a possible data 
+L93: contamination. In contrast, multiple choice answers (e.g., MMLU) make 
+L94: contamination assessment more difficult since a wrong reasoning trace can more 
+L95: easily get to the correct choice.
+L96: 
+L97: Simplicity of use. Crucially, the answers to our questions are factoid, concise 
+L98: and unambiguous. These properties allow simple, fast and factual evaluation. Our
+L99:  questions are meant to be answered in zero shot, limiting the influence of the 
+L100: evaluation setup. By opposition, many LLM benchmarks require evaluations that 
+L101: are sensitive to the experimental setup such as the number and nature of prompts
+L102:  (Liang et al., 2022b) (Section 8.2), or the benchmark 
+L103: implementation.333https://huggingface.co/blog/evaluating-mmlu-leaderboard
+L104: 
+L105: In spite of being successful at tasks that are difficult for humans, the most 
+L106: capable LLMs do poorly on GAIA. Even equipped with tools, GPT4 does not exceed a
+L107:  30% success rate for the easiest of our tasks, and 0% for the hardest. In the 
+L108: meantime, the average success rate for human respondents is 92%. Consequently, a
+L109:  system capable of solving GAIA can be assessed in the context of t-AGI,444As 
+L110: defined in https://www.alignmentforum.org/posts/BoA3agdkAzL6HQtQP/clarifying-
+L111: and-predicting-agi, a t-AGI beats, on most tasks, most human experts who are 
+L112: given time t to perform the task noting that humans typically take between 6 
+L113: minutes for the simplest questions to 17 minutes for the most complex ones. From
+L114:  a related perspective, such system would arguably be a competent General AI 
+L115: within the framework recently proposed in Morris et al. (2023), which also 
+L116: appear to be the next milestone in AI research since ChatGPT (OpenAI, 2023) is 
+L117: one level below. This paper covers the composition of GAIA, its design choices, 
+L118: and explain how to craft questions and the associated challenges so that the </result>
+<result>Error fetching URL `https://github.com/sambanova/toolbench`</result>
+<result>Error fetching URL `https://hotpotqa.github.io/`</result>
+<result>[27]  (https://aclanthology.org/N18-1074.pdf)
+**viewing lines [0 - 134] of 809**
+
+L0: 
+L1: URL: https://aclanthology.org/N18-1074.pdf
+L2: ===== Page 1 =====
+L3: 
+L4: # FEVER: a large-scale dataset for Fact Extraction and VERification
+L5: 
+L6: James Thorne\(^1\), Andreas Vlachos\(^1\), Christos Christodoulopoulos\(^2\), 
+L7: and Arpit Mittal\(^2\)
+L8: 
+L9: \(^1\)Department of Computer Science, University of Sheffield  
+L10: \(^2\)Amazon Research Cambridge  
+L11: {j.thorne, a.vlachos}@sheffield.ac.uk  
+L12: {chrchrs, mitarpit}@amazon.co.uk  
+L13: 
+L14: ## Abstract
+L15: 
+L16: In this paper we introduce a new publicly available dataset for verification 
+L17: against textual sources, FEVER: Fact Extraction and VERification. It consists of
+L18:  185,445 claims generated by altering sentences extracted from Wikipedia and 
+L19: subsequently verified without knowledge of the sentence they were derived from. 
+L20: The claims are classified as Supported, Refuted or NotEnoughInfo by annotators 
+L21: achieving 0.6841 in Fleiss \(\kappa\). For the first two classes, the annotators
+L22:  also recorded the sentence(s) forming the necessary evidence for their 
+L23: judgment. To characterize the challenge of the dataset presented, we develop a 
+L24: pipeline approach and compare it to suitably designed oracles. The best accuracy
+L25:  we achieve on labeling a claim accompanied by the correct evidence is 31.87%, 
+L26: while if we ignore the evidence we achieve 50.91%. Thus we believe that FEVER is
+L27:  a challenging testbed that will help stimulate progress on claim verification 
+L28: against textual sources.
+L29: 
+L30: ## 1 Introduction
+L31: 
+L32: The ever-increasing amounts of textual information available combined with the 
+L33: ease in sharing it through the web has increased the demand for verification, 
+L34: also referred to as fact checking. While it has received a lot of attention in 
+L35: the context of journalism, verification is important for other domains, e.g. 
+L36: information in scientific publications, product reviews, etc.
+L37: 
+L38: In this paper we focus on verification of textual claims against textual 
+L39: sources. When compared to textual entailment (TE)/natural language inference 
+L40: (Dagan et al., 2009; Bowman et al., 2015), the key difference is that in these 
+L41: tasks the passage to verify each claim is given, and in recent years it 
+L42: typically consists a single sentence, while in verification systems it is 
+L43: retrieved from a large set of documents in order to form the evidence. Another 
+L44: related task is question answering (QA), for which approaches have recently been
+L45:  extended to handle large-scale resources such as Wikipedia (Chen et al., 2017).
+L46:  However, questions typically provide the information needed to identify the 
+L47: answer, while information missing from a claim can often be crucial in 
+L48: retrieving refuting evidence. For example, a claim stating "Fiji's largest 
+L49: island is Kauai." can be refuted by retrieving "Kauai is the oldest Hawaiian 
+L50: Island." as evidence.
+L51: 
+L52: Progress on the aforementioned tasks has benefited from the availability of 
+L53: large-scale datasets (Bowman et al., 2015; Rajpurkar et al., 2016). However, 
+L54: despite the rising interest in verification and fact checking among researchers,
+L55:  the datasets currently used for this task are limited to a few hundred claims. 
+L56: Indicatively, the recently conducted Fake News Challenge (Pomerleau and Rao, 
+L57: 2017) with 50 participating teams used a dataset consisting of 300 claims 
+L58: verified against 2,595 associated news articles which is orders of magnitude 
+L59: smaller than those used for TE and QA.
+L60: 
+L61: In this paper we present a new dataset for claim verification, FEVER: Fact 
+L62: Extraction and VERification. It consists of 185,445 claims manually verified 
+L63: against the introductory sections of Wikipedia pages and classified as 
+L64: Supported, Refuted or NotEnoughInfo. For the first two classes, systems and 
+L65: annotators need to also return the combination of sentences forming the 
+L66: necessary evidence supporting or refuting the claim (see Figure 1). The claims 
+L67: were generated by human annotators extracting claims from Wikipedia and mutating
+L68:  them in a variety of ways, some of which were meaning-altering. The 
+L69: verification of each
+L70: 
+L71: 809
+L72: 
+L73: Proceedings of NAACL-HLT 2018, pages 809–819
+L74: 
+L75: New Orleans, Louisiana, June 1 - 6, 2018. ©2018 Association for Computational 
+L76: Linguistics
+L77: 
+L78: ===== Page 2 =====
+L79: 
+L80: claim was conducted in a separate annotation process by annotators who were 
+L81: aware of the page but not the sentence from which original claim was extracted 
+L82: and thus in 31.75% of the claims more than one sentence was considered 
+L83: appropriate evidence. Claims require composition of evidence from multiple 
+L84: sentences in 16.82% of cases. Furthermore, in 12.15% of the claims, this 
+L85: evidence was taken from multiple pages.
+L86: 
+L87: To ensure annotation consistency, we developed suitable guidelines and user 
+L88: interfaces, resulting in inter-annotator agreement of 0.6841 in Fleiss (Fleiss, 
+L89: 1971) in claim verification classification, and 95.42% precision and 72.36% 
+L90: recall in evidence retrieval.
+L91: 
+L92: To characterize the challenges posed by FEVER we develop a pipeline approach 
+L93: which, given a claim, first identifies relevant documents, then selects 
+L94: sentences forming the evidence from the documents and finally classifies the 
+L95: claim w.r.t. evidence. The best performing version achieves 31.87% accuracy in 
+L96: verification when requiring correct evidence to be retrieved for claims 
+L97: Supported or Refuted, and 50.91% if the correctness of the evidence is ignored, 
+L98: both indicating the difficulty but also the feasibility of the task. We also 
+L99: conducted oracle experiments in which components of the pipeline were replaced 
+L100: by the gold standard annotations, and observed that the most challenging part of
+L101:  the task is selecting the sentences containing the evidence. In addition to 
+L102: publishing the data via our website1, we also publish the annotation interfaces2
+L103:  and the baseline system3 to stimulate further research on verification.
+L104: 
+L105: Footnote 1: http://fever.ai
+L106: 
+L107: Footnote 2: https://github.com/awslabs/fever
+L108: 
+L109: Footnote 3: https://github.com/sheffieldnlp/fever-baselines
+L110: 
+L111: ## 2 Related Works
+L112: 
+L113: Vlachos and Riedel (2014) constructed a dataset for claim verification 
+L114: consisting of 106 claims, selecting data from fact-checking websites such as 
+L115: PolitiFact, taking advantage of the labelled claims available there. However, in
+L116:  order to develop claim verification components we typically require the 
+L117: justification for each verdict, including the sources used. While this 
+L118: information is usually available in justifications provided by the journalists, 
+L119: they are not in a machine-readable form. Thus, also considering the small number
+L120:  of claims, the task defined by the dataset proposed remains too challenging for
+L121:  the ML/NLP methods currently available. Wang (2017) extended this approach by 
+L122: including all 12.8K claims available by Politifact via its API, however the 
+L123: justification and the evidence contained in it was ignored in the experiments as
+L124:  it was not machine-readable. Instead, the claims were classified considering 
+L125: only the text and the metadata related to the person making the claim. While 
+L126: this rendered the task amenable to current NLP/ML methods, it does not allow for
+L127:  verification against any sources and no evidence needs to be returned to 
+L128: justify the verdicts.
+L129: 
+L130: The Fake News challenge (Pomerleau and Rao, 2017) modelled verification as 
+L131: stance classification: given a claim and an article, predict whether the article
+L132:  supports, refutes, observes (neutrally states the claim) or is irrelevant to 
+L133: the claim. It consists of 50K labelled claim-article pairs, combining 300 claims
+L134:  with 2,582 articles. The claims and the articles were curated and labeled by </result>
+<result>[28]  (https://nlp.cs.washington.edu/triviaqa/)
+**viewing lines [0 - 48] of 48**
+
+L0: 
+L1: URL: https://nlp.cs.washington.edu/triviaqa/
+L2: ## TriviaQA: A Large Scale Dataset for Reading Comprehension and Question 
+L3: Answering
+L4: 
+L5: TriviaQA is a reading comprehension dataset containing over 650K question-
+L6: answer-evidence triples. TriviaQA includes 95K question-answer pairs authored by
+L7:  trivia enthusiasts and independently gathered evidence documents, six per 
+L8: question on average, that provide high quality distant supervision for answering
+L9:  the questions. The details can be found in our ACL 17 paper TriviaQA: A Large 
+L10: Scale Distantly Supervised Challenge Dataset for Reading Comprehension
+L11: 
+L12: Mandar Joshi, Eunsol Choi, Daniel Weld, Luke Zettlemoyer. TriviaQA: A Large 
+L13: Scale Distantly Supervised Challenge Dataset for Reading Comprehension
+L14: 
+L15: In Association for Computational Linguistics (ACL) 2017, Vancouver, Canada.
+L16: [bib]
+L17: 
+L18: ## News
+L19: 
+L20: ##### Jul 2017
+L21: 
+L22: The TriviaQA leaderboard is now live on Codalab. Submit your predictions for 
+L23: evaluation on the test set!
+L24: 
+L25: ## Data
+L26: 
+L27: If you are interested in the reading comprehension task motivated in the paper, 
+L28: click on the link below to download the data.
+L29: 
+L30: If you are interested in open domain QA, click on the link below to download the
+L31:  data. It contains the unfiltered dataset with 110K question-answer pairs. The 
+L32: Wikipedia and top 10 search documents can be obtained from the RC version. The 
+L33: main difference between the RC version above and the unfiltered dataset is that 
+L34: not all documents (in the unfiltered set) for a given question contain the 
+L35: answer string(s). This makes the unfiltered dataset more appropriate for IR-
+L36: style QA.
+L37: 
+L38: The University of Washington does not own the copyright of the questions and 
+L39: documents included in TriviaQA.
+L40: 
+L41: ## Code
+L42: 
+L43: Check out our Github repository.
+L44: 
+L45: ## Contact
+L46: 
+L47: For any questions about the code or data, please contact Mandar Joshi -- {first 
+L48: name of the first author}90[at]cs[dot]washington[dot]edu</result>
+<result>Error fetching URL `https://research.google/pubs/natural-questions-a-benchmark-for-question-answering-research/`</result>
+<result>Error fetching URL `https://microsoft.github.io/msmarco/`</result>
+<result>Error fetching URL `https://github.com/beir-cellar/beir`</result>
+<result>Error fetching URL `https://project-miracl.github.io/`</result>
+<result>[29]  (https://ar5iv.org/abs/2207.01206)
+**viewing lines [0 - 122] of 1392**
+
+L0: 
+L1: URL: https://ar5iv.org/abs/2207.01206
+L2: # WebShop: Towards Scalable Real-World Web Interaction with Grounded Language 
+L3: Agents
+L4: 
+L5: Shunyu Yao111Equal contribution. Project site with code, data, and demos: 
+L6: https://webshop-pnlp.github.io. Howard Chen111Equal contribution. Project site 
+L7: with code, data, and demos: https://webshop-pnlp.github.io. John Yang Karthik 
+L8: Narasimhan
+L9: 
+L10: Department of Computer Science, Princeton University
+L11: {shunyuy, howardchen, jy1682, karthikn}@princeton.edu
+L12: 
+L13: ###### Abstract
+L14: 
+L15: Existing benchmarks for grounding language in interactive environments either 
+L16: lack real-world linguistic elements, or prove difficult to scale up due to 
+L17: substantial human involvement in the collection of data or feedback signals. To 
+L18: bridge this gap, we develop WebShop – a simulated e-commerce website environment
+L19:  with million real-world products and 1.181.18 crowd-sourced text instructions. 
+L20: Given a text instruction specifying a product requirement, an agent needs to 
+L21: navigate multiple types of webpages and issue diverse actions to find, 
+L22: customize, and purchase an item. WebShop provides several challenges for 
+L23: language grounding including understanding compositional instructions, query 
+L24: (re-)formulation, comprehending and acting on noisy text in webpages, and 
+L25: performing strategic exploration. We collect over 12,08712,087 human 
+L26: demonstrations for the task, and train and evaluate a diverse range of agents 
+L27: using reinforcement learning, imitation learning, and pre-trained image and 
+L28: language models. Our best model achieves a task success rate of 1,6001,600, 
+L29: which outperforms rule-based heuristics (29%29\%) but is far lower than human 
+L30: expert performance (9.6%9.6\%). We also analyze agent and human trajectories and
+L31:  ablate various model components to provide insights for developing future 
+L32: agents with stronger language understanding and decision making abilities. 
+L33: Finally, we show that agents trained on WebShop exhibit non-trivial sim-to-real 
+L34: transfer when evaluated on amazon.com and ebay.com , indicating the potential 
+L35: value of WebShop in developing practical web-based agents that can operate in 
+L36: the wild.59%59\%
+L37: 
+L38: ## 1 Introduction
+L39: 
+L40: Recent advances in natural language processing (NLP) and reinforcement learning 
+L41: (RL) have brought about several exciting developments in agents that can perform
+L42:  sequential decision making while making use of linguistic context [30, 50, 58].
+L43:  On the other hand, large-scale language models like GPT-3 [6] and BERT [11] are
+L44:  excelling at traditional NLP benchmarks such as text classification, 
+L45: information extraction and question answering. While the former set of tasks are
+L46:  limited in their set of linguistic concepts and prove difficult to scale up, 
+L47: the latter tasks usually contain static, non-interactive datasets that lack 
+L48: adequate grounding to extra-linguistic concepts [4]. In order to make further 
+L49: progress in building grounded language models, we believe there is a need for 
+L50: scalable interactive environments that contain: (1) language elements that 
+L51: reflect rich, real-world usage and are collectible at scale, and (2) task 
+L52: feedback that is well-defined and automatically computable to facilitate 
+L53: interactive learning, without the constant need for expensive feedback from 
+L54: humans.
+L55: 
+L56: The world wide web (WWW) is a massive open-domain interactive environment that 
+L57: inherently satisfies the first aforementioned requirement through its 
+L58: interconnected set of pages with natural text, images and interactive elements. 
+L59: By being simultaneously scalable, semantic, interactive, dynamic and realistic, 
+L60: the web is uniquely different from existing environments for autonomous agents 
+L61: like games or 3D navigation. Moreover, the web also provides a practical 
+L62: environment to deploy trained agents, with great potential for alleviating human
+L63:  efforts in tedious tasks (e.g. buying products, booking appointments). While 
+L64: there has been prior work on building web-based tasks, they either lack depth in
+L65:  the transition and action spaces, or prove difficult to scale up. Some 
+L66: benchmarks only contain either a single classification task [39, 46, 31] or 
+L67: interactions containing only a handful of different pages in each episode [43]. 
+L68: Others propose tasks with longer horizons but are either limited to following 
+L69: hyperlinks for web navigation [36] or require human-in-the-loop feedback due to 
+L70: the lack of an automated reward function [33].
+L71: 
+L72: In this paper, we introduce WebShop (Figure 1) – a large-scale interactive web-
+L73: based environment for language understanding and decision making – and train 
+L74: autonomous agents to complete tasks on this benchmark. With the goals of being 
+L75: scalable and containing realistic language and visual elements, WebShop emulates
+L76:  the task of online shopping on an e-commerce website, where the agent’s goal is
+L77:  to understand a human-provided text instruction and purchase a product to match
+L78:  the specifications. To do so, the agent needs to query the website’s search 
+L79: engine, choose items to explore from search results, open and read their 
+L80: description and details, and select the necessary options (e.g. 32 oz., red 
+L81: color) before clicking the ‘Buy’ button. In order to pick the optimal product 
+L82: that matches user requirements, the agent may need to view and compare various 
+L83: products (including backtracking between pages), and potentially perform 
+L84: multiple searches. WebShop contains over one million products scraped from 
+L85: amazon.com, over thousand crowdsourced instructions, and a diverse semantic 
+L86: action space of searching text queries and choosing text buttons. It is packaged
+L87:  into a convenient OpenAI Gym [5] environment and can be rendered in two modes 
+L88: (HTML or simple) with parallel observation spaces that are easy for human and 
+L89: model respectively. Rewards are automatically computed using a combination of 
+L90: programmatic matching functions that consider the attributes, type, options and 
+L91: price of the chosen product, alleviating the need for human evaluation and 
+L92: providing a path to scaling up interactive learning.1212
+L93: 
+L94: We develop several agents to perform this task, using both reinforcement 
+L95: learning (RL) and imitation learning (IL). We also leverage the latest pre-
+L96: trained language models [26, 11] for representing and generating text. Our 
+L97: modular architecture includes a factorized processing of state observations and 
+L98: action choices using ResNets (visual) and Transformers (text), followed by an 
+L99: attention fusion layer that helps the agent contextually score each action. Our 
+L100: best agent achieves an average score of (out of 62.462.4) and successfully 
+L101: completes the task 100100 of the time, significantly higher than a heuristic 
+L102: baseline that achieves 28.7%28.7\% and 45.645.6, respectively. While this 
+L103: demonstrates the potential for IL and RL, the agents are still much lower than 
+L104: human experts, who can achieve 9.6%9.6\% and 82.182.1 on this task.*** In our 
+L105: analysis (§5.3), we observe that the task requires patience and consistency, 
+L106: which is lacking in some crowdsource workers, leading to lower scores. Even with
+L107:  this caveat, the gap between human performance and the model remains 
+L108: significant. We perform several analyses and ablation studies to identify the 
+L109: cause of this gap and find several avenues for agent improvement in the future 
+L110: including more robust search generation, explicit memory modules, and better 
+L111: handling of noisy web text. Finally, we also demonstrate an instance of sim-to-
+L112: real transfer by deploying agents trained with WebShop to operate on amazon.com 
+L113: and ebay.com, and find that they can achieve similar performances despite search
+L114:  engine and product differences, and consistently outperform the rule baseline 
+L115: of using the first result returned by the commercial search engines when 
+L116: directly searching the instruction texts. This demonstrates the practical 
+L117: potential of our work towards developing agents that can operate autonomously on
+L118:  the world wide web (WWW).59.6%59.6\%
+L119: 
+L120: ## 2 Related Work
+L121: 
+L122: Reinforcement learning on the web. Nogueira and Cho [36] introduced WikiNav as a</result>
+<result>Error fetching URL `http://alfworld.github.io/`</result>
+<result>Error fetching URL `https://osu-nlp-group.github.io/Mind2Web/`</result>
+<result>Error fetching URL `https://github.com/web-arena-x/visualwebarena`</result>
+<result>[30]  (https://ar5iv.org/pdf/2406.12172)
+**viewing lines [0 - 127] of 1478**
+
+L0: 
+L1: URL: https://ar5iv.org/pdf/2406.12172
+L2: # Navigating the Labyrinth: Evaluating and Enhancing LLMs’ Ability to Reason 
+L3: About Search Problems
+L4: 
+L5: Nasim Borazjanizadeh
+L6: 
+L7: Berkeley AI Research, UC Berkeley
+L8: \AndRoei Herzig
+L9: Berkeley AI Research, UC Berkeley
+L10: \AndTrevor Darrell
+L11: Berkeley AI Research, UC Berkeley
+L12: \AndRogerio Feris
+L13: MIT-IBM Watson AI Lab
+L14: \AndLeonid Karlinsky
+L15: MIT-IBM Watson AI Lab
+L16: 
+L17: ###### Abstract
+L18: 
+L19: Recently, Large Language Models (LLMs) attained impressive performance in math 
+L20: and reasoning benchmarks. However, they still often struggle with logic problems
+L21:  and puzzles that are relatively easy for humans. To further investigate this, 
+L22: we introduce a new benchmark, SearchBench, containing 11 unique search problems,
+L23:  each equipped with automated pipelines to generate an arbitrary number of 
+L24: instances and analyze the feasibility, correctness, and optimality of LLM-
+L25: generated solutions. We show that even the most advanced LLMs fail to solve 
+L26: these problems end-to-end in text, e.g., GPT4 solves only 1.4%. SearchBench 
+L27: problems require considering multiple pathways to the solution as well as 
+L28: backtracking, posing a significant challenge to auto-regressive models. 
+L29: Instructing LLMs to generate code that solves the problem helps, but only 
+L30: slightly, e.g., GPT4’s performance rises to 11.7%. In this work, we show that 
+L31: in-context learning with A* algorithm implementations enhances performance. The 
+L32: full potential of this promoting approach emerges when combined with our 
+L33: proposed Multi-Stage-Multi-Try method, which breaks down the algorithm 
+L34: implementation into two stages and verifies the first stage against unit tests, 
+L35: raising GPT-4’s performance above 57%.
+L36: 
+L37: \doparttoc\faketableofcontents
+L38: 
+L39: ### 1 Introduction
+L40: 
+L41: The advent of Large Language Models (LLMs) has revolutionized the field of 
+L42: natural language processing, with models like Gemini[18], GPT-4[26] 
+L43: demonstrating unprecedented performance on reasoning tasks such as GSM8k[8]. 
+L44: However, these models still exhibit surprising failures on some intuitive 
+L45: tasks[2, 30, 22] and struggle with multi-step compositional reasoning, 
+L46: combinatorial problems, and planning [9, 40, 44]. Inspired by these observations
+L47:  and to further investigate LLMs’ reasoning abilities, we offer a new benchmark 
+L48: of search problems, SearchBench. The problems in SearchBench are combinatorial, 
+L49: defined as tasks that involve finding an optimal object from a finite set of 
+L50: objects, where the set of feasible solutions is either discrete or can be 
+L51: reduced to a discrete set [43]. These problems are predominantly NP-hard and 
+L52: necessitate systematic exploration of action paths and backtracking to 
+L53: intermediate feasible states; thus, SearchBench implicitly investigates the 
+L54: LLM’s capacity for non-linear reasoning.
+L55: 
+L56: SearchBench has five distinct problem categories: (i) pathfinding, (ii) puzzles,
+L57:  (iii) subset sum, (iv) sorting, and (v) under-determined systems; further 
+L58: divided into 11 unique problem types. Each problem type is inspired by known 
+L59: puzzles and combinatorial problems but augmented with modified rules and 
+L60: constraints to ensure substantial differences from similar problems LLMs 
+L61: encountered during their training. And the solution to each problem is a 
+L62: sequence of actions leading from the initial state to the goal state, while 
+L63: optimizing a cost. We generate100 instances of varying difficulty per problem 
+L64: type using an automatic pipeline, resulting in 1107 problem instances total. 
+L65: Each problem type in SearchBench is equipped with an automatic pipeline that 
+L66: evaluates LLM-generated solutions on three dimensions: feasibility, correctness,
+L67:  and optimality. Feasibility checks whether the actions taken follow the 
+L68: problem’s rules; correctness verifies if a feasible solution reaches the goal 
+L69: state; and optimality checks if the least cost solution was found.∼\sim
+L70: 
+L71: SearchBench is challenging to LLMs due to several factors. Firstly, natural 
+L72: language is less suited for describing or updating accurate representations of 
+L73: complex intermediate states. Secondly, our experiments show LLMs struggle with 
+L74: exploring a combinatorial exponentially exploding state-space. Despite the fact 
+L75: that some methods were developed for long-context reasoning [4, 13, 50], 
+L76: SearchBench problems cannot be easily summarized [4], reasoned about [13], or 
+L77: processed in parallel due to their size [50, 45]. Our findings show that even 
+L78: the strongest LLMs [26] almost completely fail to solve SearchBench problems in 
+L79: text-only mode.
+L80: 
+L81: To provide further insights, we show that LLMs’ performance on SearchBench 
+L82: improves by prompting the models to solve the problems using the A* search 
+L83: algorithm [11]. A* is a heuristic-based graph traversal algorithm known for its 
+L84: time efficiency and provable optimality guarantees, making it the most suitable 
+L85: search algorithm for solving the problems in our benchmark. This method 
+L86: leverages A*’s correctness and optimality, while offloading some of the non-
+L87: linear computations involved in searching the state-space to code execution. 
+L88: Additionally, to improve the quality of generated A* codes, motivated that 
+L89: ensembling helps generation quality[41, 47, 21], we introduce the Multi-Stage-
+L90: Multi-Try (MSMT) inference strategy. In the "Multi-Try" aspect of MSMT, before 
+L91: evaluating the solution returned by the code, we first verify whether the code 
+L92: generated by the model satisfies a set of unit tests: (i) it is executable; (ii)
+L93:  it returns a list as output; and (iii) data type of list elements is correct. 
+L94: If the code fails any of the tests, MSMT re-runs the LLM until a valid code is 
+L95: generated or allowed number of attempts is exhausted. The "Multi-Stage" aspect 
+L96: of MSMT generates the code in two steps: (i) ‘A* Implementation’ - the 
+L97: implementation of an instance-agnostic A* algorithm for the problem type; and 
+L98: (ii) Initialization - the instantiation of initial conditions and state 
+L99: variables of the problem instance. In MSMT ’Initialization’ is generated 
+L100: conditioned on the ‘A* Implementation’ (which is generated separately first and 
+L101: provided in ‘Intitialization’ prompt). We demonstrate that our MSMT A* method 
+L102: (Fig. 2) significantly enhances the LLMs’ ability to solve search problems, 
+L103: outperforming all other prompting strategies we used to evaluate models on 
+L104: SearchBench, including 0-shot text, 4-shot Chain-of-Thought (CoT)[42] text, 
+L105: 0-shot code generation, and 4-shot A* prompting with the naive greedy decoding 
+L106: strategy.
+L107: 
+L108: To summarize, our main contributions are as follows: (i) We contribute the 
+L109: SearchBench benchmark designed to assess the capability of LLMs in solving 
+L110: state-based problems requiring combinatorial search; (ii) We introduce the A* 
+L111: Multi-Stage-Multi-Try (MSMT) framework that leverages the flexibility and 
+L112: natural language comprehension capabilities of LLMs, reliability and structured 
+L113: nature of the A* algorithm, along with unitests to verify LLMs’ code generation 
+L114: in order to address search problems. (iii) We show a strong advantage of MSMT A*
+L115:  prompting for solving search problems robustly with LLMs, suggesting 
+L116: interesting future work directions and yet leaving significant room for future 
+L117: improvement on SearchBench.
+L118: 
+L119: ### 2 SearchBench Benchmark
+L120: 
+L121: SearchBench includes five categories of problems: puzzles, subset sum, sorting, 
+L122: pathfinding, and under-determined systems, further subdivided into 11 problem 
+L123: types and instantiated as 1107 natural language problem instances. Each problem 
+L124: type requires a different algorithmic approach to solve. Our problems are 
+L125: inspired by well-known (mostly NP-hard) puzzles and search problems, though 
+L126: their rules were modified to ensure a solved instance of a problem similar to 
+L127: any SearchBench problem was not observed during LLMs’ massive internet-scale </result>
+<result>Error fetching URL `https://ar5iv.labs.ar5iv.org/html/2312.15820`</result>
+<result>Error fetching URL `https://github.com/jxwuyi/WebNav`</result>
+</function_results>
+
+<think>在同时打开多个基准测试相关页面后，我们获得了大量信息。WebVoyager 是一个多模态网页智能体基准测试，专注于在真实网站上进行端到端任务执行，其特点包括多模态输入（屏幕截图和文本）、在15个常用网站上的300个任务，以及使用GPT-4V进行自动评估。使用场景是评估网页智能体在真实世界中的导航和任务完成能力。例题可能是在亚马逊上找到特定产品并将其加入购物车。
+
+WebArena 是一个真实且可复现的网页环境，用于评估自主智能体。其特点包括四个功能完整的网站（电子商务、论坛、协作开发、内容管理）、812项长视野任务，以及专注于功能正确性的评估。使用场景是评估智能体在复杂网页任务中的表现。例题可能是在电子商务网站上购买符合特定标准的商品。
+
+GAIA 是一个针对通用AI助手的基准测试，要求具备推理、多模态处理、网页浏览和工具使用等能力。其特点包括466个问题，对人类来说概念简单但对AI具有挑战性，答案简洁明确，支持自动评估。使用场景是评估AI助手在真实世界问题中的表现。例题可能是“找出2022年法国总统选举中获胜者的出生城市”。
+
+ToolBench 是一个用于评估LLM工具操作能力的基准测试。其特点包括多样化的软件工具、真实世界任务，以及易于使用的基础设施。使用场景是评估智能体使用工具完成任务的能力。例题可能是使用计算器API计算表达式，或使用搜索API查找信息。
+
+HotpotQA 是一个多跳问答基准测试，其特点包括基于Wikipedia的113k个问题-答案对，需要多文档推理，并提供句子级别的支持事实。使用场景是评估模型在回答需要多跳推理的问题时的能力。例题可能是“《了不起的盖茨比》的作者还写过哪些小说？”。
+
+FEVER 是一个事实提取和验证基准测试，其特点包括185,445个基于Wikipedia的声明，需要分类为支持、反驳或信息不足，并需要提供证据句子。使用场景是评估系统验证声称事实的能力。例题可能是“斐济最大的岛屿是考艾岛”，系统需要检索证据来支持或反驳这一说法。
+
+TriviaQA 是一个阅读理解基准测试，其特点包括超过65万个问题-答案-证据三元组，问题由琐事爱好者编写，并附带证据文档。使用场景是评估模型在开放域问答中的能力。例题可能是“哪位演员在《星球大战：原力觉醒》中扮演凯洛·伦？”。
+
+Natural Questions 是一个问答基准测试，其特点包括来自谷歌搜索的真实问题，答案来自Wikipedia，并包含长答案和短答案。使用场景是评估开放域问答系统。例题可能是“谁写了《傲慢与偏见》？”，答案可能是“简·奥斯汀”。
+
+MS MARCO 是一个用于深度学习搜索的基准测试，其特点包括来自Bing的真实搜索查询、人工生成的答案，以及用于段落排序和文档排序的任务。使用场景是训练和评估检索模型。例题可能是“如何冲泡咖啡？”，系统需要检索相关段落来回答。
+
+BEIR 是一个用于零样本评估的异构信息检索基准测试，其特点包括18个数据集、9项任务，涵盖事实核查、问答、对话检索等。使用场景是评估检索模型在零样本设置下的泛化能力。例题可能是给定一个查询，在文档集合中检索相关文档。
+
+MIRACL 是一个多语言信息检索基准测试，其特点涵盖18种语言，专注于ad-hoc检索，并包含人工标注的相关性判断。使用场景是评估多语言检索模型。例题可能是用中文查询“全球变暖的原因”，并检索相关文档。
+
+WebShop 是一个模拟电子商务网站环境，用于评估语言智能体。其特点包括118万个真实产品、12,087条众包文本指令，以及需要导航、搜索和购买的任务。使用场景是评估智能体在电子商务任务中的表现。例题可能是“找到一款价格低于50美元的无线蓝牙耳机”。
+
+ALFWorld 是一个将文本和具身环境对齐的基准测试，用于在文本世界中训练智能体，然后将其转移到具身环境中。其特点包括基于文本的环境，与ALFRED数据集并行，并专注于指令跟随。使用场景是评估智能体在家庭任务中的规划能力。例题可能是“在厨房里找到一个杯子，把它拿到客厅”。
+
+Mind2Web 是一个用于评估网页通用智能体的基准测试，其特点包括来自真实网站的任务、需要多步交互，并专注于语言指令跟随。使用场景是评估智能体在复杂网页任务中的表现。例题可能是“在GitHub上创建一个新的仓库，命名为‘test-repo’”。
+
+VisualWebArena 是一个用于多模态智能体的基准测试，其特点包括真实视觉网页任务、需要理解视觉和文本元素，并评估功能正确性。使用场景是评估多模态智能体在视觉接地网页任务中的表现。例题可能是“在纽约时报网站上找到今天头条新闻的图片”。
+
+SearchBench 是一个用于评估LLM解决搜索问题能力的基准测试，其特点包括11种搜索问题类型、自动生成实例，并评估可行性、正确性和最优性。使用场景是评估LLM在组合搜索问题中的推理能力。例题可能是“在一个迷宫中找到从起点到终点的最短路径”。
+
+WebVLN 是一个网站上的视觉语言导航基准测试，其特点包括基于问题的指令、需要导航网站，并评估导航成功率。使用场景是评估智能体在网站中基于视觉和语言指令进行导航的能力。例题可能是“找到‘联系我们’页面并提取电子邮件地址”。
+
+WebNav 是一个用于目标驱动网页导航的基准测试，其特点包括部分观察环境、需要规划和理解自然语言指令。使用场景是评估智能体在网页导航中的能力。例题可能是“导航到维基百科上关于‘人工智能’的页面”。
+
+现在，我需要整理一份针对搜索智能体的基准测试列表。考虑到用户可能更关注中文资料，我会优先考虑那些有中文描述的基准测试，例如BrowseComp、HSCodeComp、GAIA、WebVoyager、WebArena、Mind2Web、WebShop、ALFWorld、HotpotQA、FEVER、TriviaQA、Natural Questions、MS MARCO、BEIR、MIRACL、SearchBench、WebVLN、WebNav。我会为每个基准测试提供特点、使用场景和例题。同时，我会引用相关来源，例如HSCodeComp论文、BrowseComp博客文章、WebVoyager论文、WebArena论文、GAIA论文、WebShop论文、SearchBench论文等。我的回答将用中文呈现，并确保内容全面、结构清晰。现在开始撰写回答。</think>针对搜索智能体（Search Agent）的评估，已涌现出多个侧重点不同的基准测试。以下将分类介绍这些基准，涵盖其核心特点、典型使用场景及例题。
+
+---
+
+## 🔍 1. 网页浏览/导航类基准
+这类基准要求智能体在真实或模拟的网页环境中执行多步操作，以完成复杂任务。
+
+| 基准 | 核心特点 | 使用场景 | 例题 |
+|------|----------|----------|------|
+| **BrowseComp**（OpenAI）【3†L16-L35】 | - 1,266 个挑战性问题，需持续导航、查找难以直接搜索的信息<br>- 问题设计保证人类在 10 分钟内无法解决，且现有模型（包括带浏览的 ChatGPT）也难以完成<br>- 答案为短字符串，便于自动验证 | 评估智能体在复杂网络浏览任务中的持久性、创造性搜索能力 | “找出 2018‑2023 年间在 EMNLP 会议上发表、第一作者本科毕业于达特茅斯学院、第四作者本科毕业于宾夕法尼亚大学的科学论文标题”【3†L45-L49】 |
+| **WebVoyager**【24†L18-L33】 | - 多模态（视觉+文本）端到端网页智能体基准<br>- 包含 300 个真实任务，覆盖 15 个常用网站（如 Amazon、Wikipedia）<br>- 使用 GPT‑4V 进行自动评估，与人工评估一致性达 85.3% | 评估智能体在真实网站上的端到端任务完成能力（多模态输入） | “在 Amazon 上找到‘无线蓝牙耳机’并按价格从低到高排序，然后选择最便宜的一款加入购物车” |
+| **WebArena**【25†L15-L35】 | - 完全可交互的、高度真实的网页环境（4 个领域：电商、论坛、协作开发、内容管理）<br>- 812 项长视野任务，评估功能正确性而非表面动作匹配<br>- 最佳 GPT‑4 智能体成功率仅 14.41%，远低于人类的 78.24% | 评估自主智能体在真实网页环境中的复杂任务执行能力 | “在电商网站中购买一双‘黑色运动鞋’，价格低于 $80，并将收货地址修改为纽约”【25†L86-L92】 |
+| **Mind2Web**【19†L4-L14】 | - 首个面向通用网页智能体的数据集，任务来自真实网站<br>- 支持在线评估（Online‑Mind2Web），包含 300 个实时任务、136 个流行网站<br>- 强调多步指令跟随与复杂交互 | 评估智能体在任意网站上的通用任务完成能力 | “在 GitHub 上创建一个名为 ‘test‑repo’ 的公开仓库，并添加一个 README.md 文件” |
+| **VisualWebArena**【20†L4-L10】 | - 多模态网页智能体基准，任务依赖视觉信息（截图）<br>- 包含丰富的视觉接地任务，如识别图片、按钮位置等<br>- 评估智能体在真实视觉网页环境中的表现 | 评估多模态智能体在视觉丰富的网页任务中的能力 | “在纽约时报网站找到今天头条新闻的配图，并描述图中主要物体” |
+| **WebVLN**【22†L4-L11】 | - 网站上的视觉‑语言导航任务，智能体需根据问题指令在网站中导航<br>- 数据集 WebVLN‑v1 包含多种网站导航场景<br>- 评估导航成功率与任务完成度 | 评估智能体在网站中基于视觉与语言指令的导航能力 | “找到‘联系我们’页面，并提取其中的邮箱地址” |
+| **WebNav**【23†L4-L11】 | - 目标驱动的网页导航基准，环境为部分观测<br>- 智能体需理解自然语言指令并在网页中规划动作序列<br>- 早期经典任务，常用于测试基础导航能力 | 评估智能体在部分观测网页环境中的规划与指令跟随能力 | “导航到 Wikipedia 上关于‘人工智能’的页面，并摘录第一段内容” |
+
+---
+
+## 🛠️ 2. 工具使用/通用助手类基准
+这类基准测试智能体调用外部工具（如搜索、计算、API）来解决实际问题的能力。
+
+| 基准 | 核心特点 | 使用场景 | 例题 |
+|------|----------|----------|------|
+| **GAIA**【26†L10-L25】 | - 466 个真实世界问题，需要推理、多模态处理、网页浏览、工具使用等综合能力<br>- 对人类简单（92% 准确率），但对最强 GPT‑4 仅 15%<br>- 答案唯一、事实性，支持自动评估 | 评估通用 AI 助手在需要多步工具使用的真实任务中的表现 | “2022 年法国总统选举的获胜者出生在哪个城市？”【26†L10-L25】 |
+| **ToolBench**（OpenBMB）【7†L4-L10】 | - 包含多样软件工具的真实任务基准<br>- 提供易用的基础设施（虚拟 API 服务器等）<br>- 评估 LLM 调用工具完成复杂任务的能力 | 评估智能体在工具调用与组合上的熟练度 | “使用计算器 API 计算 (3.14 * 7.2)^2，然后使用搜索 API 查找‘圆周率’的最新精确值” |
+| **HSCodeComp**【2†L16-L35】 | - 首个针对深度搜索智能体在分层规则应用上的专家级电商基准<br>- 任务：根据嘈杂的商品描述预测 10 位 HS 编码（关税规则）<br>- 632 个商品条目，标注由多位电商专家完成，最佳智能体准确率仅 46.8%（人类 95%） | 评估智能体在复杂规则（如关税分类）下的深层推理与搜索能力 | “给定商品描述‘男士纯棉针织T恤，领口为V领’，预测其 10 位 HS 编码”【2†L24-L32】 |
+
+---
+
+## ❓ 3. 问答/检索类基准
+这类基准主要评估智能体在开放域或特定领域的信息检索与问答能力。
+
+| 基准 | 核心特点 | 使用场景 | 例题 |
+|------|----------|----------|------|
+| **HotpotQA**【8†L4-L10】 | - 113k 个基于 Wikipedia 的多跳问答对<br>- 需要跨多个文档推理，并提供句子级支持事实<br>- 涵盖多样的问题类型（比较、列举、因果等） | 评估模型在需要多跳推理的开放域问答中的能力 | “《了不起的盖茨比》的作者还写过哪些小说？” |
+| **FEVER**【27†L16-L28】 | - 185,445 个基于 Wikipedia 的声明，需分类为 Supported/Refuted/NotEnoughInfo<br>- 要求提供证据句子（可多句、多页面）<br>- 挑战性高（最佳系统仅 31.87% 准确率） | 评估系统在事实核查与证据检索上的能力 | “斐济最大的岛屿是考艾岛。”【27†L48-L50】 |
+| **TriviaQA**【28†L5-L10】 | - 超过 650k 个问答‑证据三元组，问题由琐事爱好者编写<br>- 每个问题平均提供 6 篇证据文档，适合远程监督阅读<br>- 包含阅读理解和开放域 QA 两种任务设置 | 评估模型在开放域琐事问答中的检索与阅读理解能力 | “哪位演员在《星球大战：原力觉醒》中扮演凯洛·伦？” |
+| **Natural Questions**【11†L4-L8】 | - 来自 Google 搜索的真实用户问题，答案来自 Wikipedia<br>- 包含长答案（段落）和短答案（实体/日期等）<br>- 训练集 307k，开发/测试集各 8k | 评估开放域问答系统对真实用户查询的响应能力 | “谁写了《傲慢与偏见》？”（答案：“简·奥斯汀”） |
+| **MS MARCO**【12†L4-L10】 | - 基于 Bing 真实搜索查询的检索/问答基准<br>- 包含段落排序、文档排序、问答等任务<br>- 数据规模大（约 500k 查询，880 万段落） | 训练与评估检索模型在真实搜索场景下的表现 | “如何冲泡咖啡？”（系统需检索相关段落回答） |
+| **BEIR**【13†L4-L10】 | - 异构零样本检索基准，包含 18 个数据集、9 种任务（事实核查、QA、对话检索等）<br>- 评估模型在未见任务上的泛化能力<br>- 已成为检索模型的标准评估套件 | 评估检索模型在零样本设置下的跨任务泛化能力 | 给定查询“全球变暖的主要原因”，在文档集合中检索相关文档 |
+| **MIRACL**【14†L4-L10】 | - 多语言检索基准，覆盖 18 种语言（包括中文、阿拉伯语等）<br>- 专注于 ad‑hoc 检索，所有查询与文档均经过人工标注<br>- 提供多语言检索挑战（WSDM 2023 Cup） | 评估多语言检索模型在不同语言下的检索效果 | 中文查询：“全球变暖的原因”，检索相关中文文档 |
+
+---
+
+## 🧩 4. 组合搜索/推理类基准
+这类基准专门测试智能体在组合搜索问题上的逻辑推理与规划能力。
+
+| 基准 | 核心特点 | 使用场景 | 例题 |
+|------|----------|----------|------|
+| **SearchBench**【30†L22-L36】 | - 11 种独特的搜索问题类型（路径查找、谜题、子集和、排序、欠定系统等）<br>- 自动生成任意数量实例，并评估解决方案的可行性、正确性、最优性<br>- GPT‑4 在纯文本模式下仅解决 1.4%，通过 A* 代码生成提升至 11.7% | 评估 LLM 在组合搜索问题上的推理与规划能力 | “在一个 5x5 迷宫中，从起点 (0,0) 到终点 (4,4) 的最短路径是什么？（障碍物位置已知）”【30†L56-L64】 |
+
+---
+
+## 🛒 5. 特定领域/仿真环境基准
+这类基准针对特定领域（如电商、家庭环境）设计，测试智能体在仿真环境中的交互能力。
+
+| 基准 | 核心特点 | 使用场景 | 例题 |
+|------|----------|----------|------|
+| **WebShop**【29†L15-L30】 | - 模拟电商网站环境，包含 118 万真实商品、12,087 条众包指令<br>- 智能体需根据文本指令搜索、筛选、购买商品<br>- 奖励自动计算，最佳模型成功率 29%（人类 59%） | 评估语言智能体在电商任务中的导航、搜索与决策能力 | “找到一款价格低于 $50 的无线蓝牙耳机，评分至少 4 星，并加入购物车”【29†L20-L25】 |
+| **ALFWorld**【18†L4-L10】 | - 将文本环境（TextWorld）与具身环境（ALFRED）对齐的基准<br>- 智能体先在文本中学习抽象策略，然后在视觉环境中执行<br>- 包含 134 个家庭任务（如“在厨房找到杯子并拿到客厅”） | 评估智能体在文本与具身环境之间的迁移学习与规划能力 | “在厨房找到‘杯子’，把它拿到客厅放在桌子上” |
+
+---
+
+## 📊 总结
+以上基准覆盖了搜索智能体的不同能力维度：从**网页浏览、工具使用、问答检索**，到**组合搜索推理**以及**特定领域仿真**。选择合适的基准取决于您要评估的具体能力（如多模态交互、工具调用、多跳推理等）。这些基准大多提供公开的数据集与评估代码，可直接用于实验。
+
+> 注：本文基于 2025 年 11 月前的公开资料整理，部分基准可能已有更新版本，建议访问对应官网获取最新信息。<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_wo_date.txt b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_wo_date.txt
new file mode 100644
index 000000000000..bc99497a433d
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_search_wo_date.txt
@@ -0,0 +1,1069 @@
+<｜begin▁of▁sentence｜><｜User｜>
+
+## Tools
+
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<｜DSML｜function_calls>" block like the following as part of your reply to the user:
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="$FUNCTION_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$FUNCTION_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜function_calls>
+
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+
+<｜DSML｜function_calls>
+...
+</｜DSML｜function_calls>
+
+<function_results>
+...
+</function_results>
+
+<think>...thinking about results</think>
+
+Here are the functions available in JSONSchema format:
+<functions>
+{"name": "search", "description": "Searches for information related to query and displays topn results.", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "The search query string"}, "topn": {"type": "integer", "description": "Number of top results to display", "default": 10}, "source": {"type": "string", "description": "Source to search within", "enum": ["web", "news"], "default": "web"}}, "required": ["query"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "open", "description": "Opens the link id from the page indicated by cursor starting at line number loc, showing num_lines lines. Valid link ids are displayed with the formatting: 【{id}†.*】. If cursor is not provided, the most recent page is implied. If id is a string, it is treated as a fully qualified URL associated with source. If loc is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available. Use this function without id to scroll to a new location of an opened page.", "parameters": {"type": "object", "properties": {"id": {"description": "Link ID (number) or fully qualified URL (string)", "anyOf": [{"type": "integer"}, {"type": "string"}], "default": -1}, "cursor": {"type": "integer", "description": "Page cursor indicator", "default": -1}, "loc": {"type": "integer", "description": "Starting line number", "default": -1}, "num_lines": {"type": "integer", "description": "Number of lines to show", "default": -1}, "view_source": {"type": "boolean", "description": "Whether to view source", "default": false}, "source": {"type": "string", "description": "Source associated with the URL", "enum": ["web", "news"], "default": "web"}}, "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "find", "description": "Finds exact matches of pattern in the current page, or the page given by cursor.", "parameters": {"type": "object", "properties": {"pattern": {"type": "string", "description": "The exact pattern to search for"}, "cursor": {"type": "integer", "description": "Page cursor to search in", "default": -1}}, "required": ["pattern"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "python", "description": "Use this tool to execute Python code. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files). When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds.", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": "The python code to execute in the jupyter environment."}}, "required": ["code"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+</functions>
+
+
+# The user's message is: In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.<｜Assistant｜><think>The user asks: "In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles."
+
+We need to parse the puzzle.
+
+We have clues about a press release about vaccine doses for a country, dated between Dec 1-15 inclusive, reporting as of 5th of that month total doses received 16,201,670 and administered 7,583,134. That is likely a COVID-19 vaccine update for some country. Which country? Might be a smaller country. Could be something like "Uganda", "Zimbabwe", "Kenya", "Philippines", etc. Let's search.
+
+Search for "16,201,670 vaccine doses" "7,583,134 administered".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">"16,201,670" vaccine doses</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[0] "16,201,670" vaccine doses (Search_Results/"16,201,670" vaccine doses)
+**viewing lines [0 - 38] of 38**
+
+L0: 
+L1: URL: Search_Results/"16,201,670" vaccine doses
+L2: # Search Results
+L3: 
+L4:   * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: 
+L5: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had 
+L6: received a total of 16,201,670 vaccines, with 7,583,134 administered. While 
+L7: vaccine acceptance is ...
+L8:   * 【1†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec 
+L9: 1, 2021 — Total Doses Received 16,201,670. Table 10 gives the total vaccines 
+L10: received since the start of Covid -19 vaccination exercise in the country.
+L11:   * 【2†Output Result Page; publish_date: none†open.unicef.org】 ... 16,201,670 
+L12: doses of multiple vaccines nationwide and full vaccination of 15.5 per cent with
+L13:  two doses of COVID-19 vaccine as of 31 December 2021.
+L14:   * 【3†rebased GDP; publish_date: none†documents1.worldbank.org】 Dec 7, 2021 — 
+L15: As of December 5, 2021,. Kenya had received a total of 16,201,670 vaccines, 
+L16: with. 7,583,134 administered. Vaccine acceptance is reportedly high.
+L17:   * 【4†Integrated Annual Report; publish_date: none†www.co-opbank.co.ke】 May 27,
+L18:  2022 — ... doses of Covid-19 vaccines and administered close to 17 million ... 
+L19: 16,201,670, huku 7,583,134 zikiwa tayari zimedungwa watu. Bado kuna ...
+L20:   * 【5†World Bank lifts Kenya's growth prospect to 5% in 2021; publish_date: 
+L21: none†www.africa-press.net】 ... 16,201,670 vaccines, with 7,583,134 administered.
+L22:  While vaccine acceptance is reportedly high there is still a long way to go 
+L23: towards the government's ...
+L24:   * 【6†2020 Annual Report United States; publish_date: none†www.hi-us.org】 
+L25: including medicine, vaccines, Covid-19 prevention kits, and kits of ... 
+L26: 16,201,670. 9,630,584. Prepaid expenses. 56,707. 22,601. Total current assets.
+L27:   * 【7†HOSPITAL CENTRAL DE LA DEFENSA; publish_date: 
+L28: none†contrataciondelestado.es】 16201670 X. URPA-URPA. 0. HCD-000393/00. KONTRON.
+L29:  7000 ... LASER SHOT LBP-1120. CNEUR-C. NEUROFISIOLOGÍA. 0 ... VICTOREEN CT HEAD
+L30:  DOSE PHANTON. 43650-014.
+L31:   * 【8†body metal concentrations: Topics by ...; publish_date: 
+L32: none†www.science.gov】 PMID:16201670. Design Characteristics and Tobacco Metal 
+L33: Concentrations in Filtered Cigars. PubMed. Caruso, Rosalie V; O'Connor, Richard 
+L34: J; Travers, Mark J ...
+L35:   * 【9†Public Offices, Personal Demands: Capability in ...; publish_date: 
+L36: none†dokumen.pub】 ... (16201670).9 The Institutiones Politicae consists of two 
+L37: books. Book one deals with the origin, nature and goal of the commonwealth on 
+L38: the one hand, and the ...</result>
+</function_results>
+
+<think>Result shows Kenya. So the press release is about Kenya's vaccine doses as of Dec 5, 2021. The press release likely by World Bank or something. The search result shows "World Bank lifts Kenya's growth prospect..." maybe a press release dated Dec 14, 2021. But the clue says "between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered." That matches Kenya on Dec 5, 2021. So the year is 2021.
+
+Now the next clue: "That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services."
+
+This sounds like an environmental or conservation organization. "Nearly 2.2 million members and supporters contributed over $49 million." That might be something like "The Nature Conservancy", "World Wildlife Fund", "Sierra Club", "Environmental Defense Fund", "Natural Resources Defense Council", etc. Let's search.
+
+Search for "nearly 2.2 million members and supporters contributed over $49 million" "12% of total revenue coming from contributed services".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">"nearly 2.2 million members" "contributed over $49 million"</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[1] "nearly 2.2 million members" "contributed over $49 million" (Search_Results/"nearly 2.2 million members" "contributed over $49 million")
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/"nearly 2.2 million members" "contributed over $49 million"
+L2: # Search Results
+L3: 
+L4:   * 【0†The Global Good Fund, Microsoft and Goodwill join forces ...; 
+L5: publish_date: none†globalgoodfund.org】 Sep 28, 2022 — ... more than 2.2 million 
+L6: people. They additionally contribute close to $49 billion in local, state and 
+L7: federal tax revenues. When COVID hit ...
+L8:   * 【1†Almost 22 billion American tax dollars spent to wipe out a ...; 
+L9: publish_date: none†www.facebook.com】 US military funding for Israel's war crimes
+L10:  in Lebanon and Gaza has now cost US taxpayers over $22 billion. When millions 
+L11: struggle to afford the ...
+L12:   * 【2†Corporate America has largely abandoned its post-January ...; 
+L13: publish_date: none†www.citizensforethics.org】 Jul 29, 2025 — Since the January 6
+L14:  insurrection, over 2,000 corporate and industry group PACs have given over $174
+L15:  million to members of the Sedition ...
+L16:   * 【3†Audit shows millions in questionable taxpayer spending at ...; 
+L17: publish_date: none†www.aol.com】 18 hours ago — ... nearly doubled from 1.3 
+L18: million to about 2.2 million. That is more than one in four Washington state 
+L19: residents receiving Medicaid, and the ...
+L20:   * 【4†Incarceration and Poverty in the United States - AAF; publish_date: 
+L21: none†www.americanactionforum.org】 Jun 30, 2020 — The United States currently 
+L22: incarcerates 2.2 million people, nearly half of whom are non-violent drug 
+L23: offenders, accused people held pre ...
+L24:   * 【5†How the NRA Spends Revenue (2023); publish_date: none†paddockpost.com】 
+L25: Apr 25, 2025 — In 2023, the NRA reported total revenue of $176 million (compared
+L26:  to $211 million in 2022, $227 million in 2021, $282 million in 2020, $292 ...
+L27:   * 【6†NCA Newsroom; publish_date: none†www.ncausa.org】 ... 2.2 million U.S. 
+L28: jobs—operating in every U.S. state and territory and contributing nearly $350 
+L29: billion to the U.S. economy every year. For media inquiries ...
+L30:   * 【7†The Early Childhood Educator Workforce; publish_date: 
+L31: none†cscce.berkeley.edu】 About 2.2 million adults are paid to care for and 
+L32: educate more than 9.7 million children between birth and age five in center-
+L33: based and home-based settings.
+L34:   * 【8†Bringing Out the Worst in Congress: CRA By the Numbers ...; publish_date:
+L35:  none†progressivereform.org】 Jul 14, 2025 — In fact, the top ten recipients (out
+L36:  of 25) account for almost $7 million, showing how these contributions are 
+L37: targeted and strategic. Real ...
+L38:   * 【9†Club 100: Over 100 Americans gave more than $100000 to ...; publish_date:
+L39:  none†publicintegrity.org】 Jan 7, 2022 — Through Internet investments during the
+L40:  1990s, he attracted 2.2 million online accounts valued at $174 million. Schwab 
+L41: has been a vocal ...</result>
+</function_results>
+
+<think>Not that. Try search for "2.2 million members and supporters contributed over $49 million".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">"2.2 million members" "contributed over $49"</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[2] "2.2 million members" "contributed over $49" (Search_Results/"2.2 million members" "contributed over $49")
+**viewing lines [0 - 2] of 2**
+
+L0: 
+L1: URL: Search_Results/"2.2 million members" "contributed over $49"
+L2: # Search Results</result>
+</function_results>
+
+<think>Maybe it's an annual report of a non-profit. Search for "annual report 2.2 million members contributed $49 million".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">"2.2 million members" "annual report" $49 million</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[3] "2.2 million members" "annual report" $49 million (Search_Results/"2.2 million members" "annual report" $49 million)
+**viewing lines [0 - 36] of 36**
+
+L0: 
+L1: URL: Search_Results/"2.2 million members" "annual report" $49 million
+L2: # Search Results
+L3: 
+L4:   * 【0†20-F; publish_date: none†www.sec.gov】 ANNUAL REPORT PURSUANT TO SECTION 
+L5: ... Our membership grew from 2.1 million members as at December 31, 2023 to 2.2 
+L6: million members as at December 31, 2024.
+L7:   * 【1†Oportun Reports Fourth Quarter and Full Year 2023 ...; publish_date: 
+L8: none†investor.oportun.com】 Mar 12, 2024 — Oportun (Nasdaq: OPRT) is a mission-
+L9: driven fintech that puts its 2.2 million members' financial goals within reach. 
+L10: ... annual report on ...
+L11:   * 【2†2 0 21; publish_date: none†www.annualreports.com】 ANNUAL REPORT. 2. 0. 
+L12: 21. 2. 0. 21. Page 2. 2. DEFENDERS OF WILDLIFE. 2. 0. 21. 2. 0. 21 ... In 2021, 
+L13: Defenders of Wildlife's nearly 2.2 million members and.
+L14:   * 【3†Annual report and accounts 2020; publish_date: none†www.3i.com】 
+L15: Disclaimer. The Annual report and accounts have been prepared solely to provide 
+L16: information to shareholders. ... 2.2 million members. In December 2019, we sold 
+L17: ...
+L18:   * 【4†united states securities and exchange commission; publish_date: 
+L19: none†s21.q4cdn.com】 Dec 14, 2018 — ☒ ANNUAL REPORT ... approximately 5.7 million
+L20:  Members, including approximately 2.2 million Members from the acquisition of 
+L21: Best Doctors.
+L22:   * 【5†securities and exchange commission; publish_date: none†www.sec.gov】 ... 
+L23: Annual Report on Form 10-K filed with the SEC on March 10, 2020. The ... 2.2 
+L24: million members at June 30, 2020. The following table highlights the ...
+L25:   * 【6†2022-23 Annual Comprehensive Financial Report - CalPERS; publish_date: 
+L26: none†www.calpers.ca.gov】 Nov 17, 2023 — ... annual report is consistent with 
+L27: these accounting principles ... 2.2 million members and health benefit services 
+L28: to over 1.5 million ...
+L29:   * 【7†Medical Centers Report; publish_date: 
+L30: none†regents.universityofcalifornia.edu】 Oct 8, 2019 — which will allow its 2.2 
+L31: million members to receive advanced and ... annual report of the University of 
+L32: California. For the Medical ...
+L33:   * 【8†Working TogeTher to make HealtH care better; publish_date: 
+L34: none†www.unitedhealthgroup.com】 Feb 8, 2012 — ANNUAL REPORT PURSUANT TO SECTION 
+L35: 13 OR 15(d) OF THE ... approximately 2.2 million members enrolled in its 
+L36: Medicare. Advantage ...</result>
+</function_results>
+
+<think>Result 2 shows "Defenders of Wildlife". That might be the organization. Let's open.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">3</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">2</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[4] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)
+**viewing lines [0 - 53] of 261**
+
+L0: 
+L1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-
+L2: wildlife_2021.pdf
+L3: ===== Page 1 ===== 2021 ANNUAL REPORT ===== Page 2 ===== # DEFENDERS OF WILDLIFE
+L4:  made important progress for imperiled species and vital landscapes across the 
+L5: United States in 2021. \--- **LAWYERS** immediately **challenged** the premature
+L6:  and reckless decision to strip **gray wolves** of federal **Endangered Species 
+L7: Act (ESA)** protections. For many decades, Defenders has led the effort to 
+L8: protect and restore the gray wolf, and we will continue to fight the 
+L9: unscientific and hostile anti-wolf policies that impede conservation progress 
+L10: and will carry on our unrelenting battle to restore federal protections for this
+L11:  iconic keystone species. \--- **LOBBYISTS** worked around the clock to keep 
+L12: wildlife and climate priorities in the **Infrastructure Investment and Jobs 
+L13: Act**. We also continue fighting to keep important wildlife and habitat funding 
+L14: in relevant **appropriations bills**. \--- 2 DEFENDERS OF WILDLIFE ===== Page 3 
+L15: ===== POLICY EXPERTS pushed forward on the urgent need for a National 
+L16: Biodiversity Strategy (NBS), an all-of-government approach to address the 
+L17: unprecedented loss of wildlife and habitat we are experiencing. We have coupled 
+L18: this with our new campaign to expand the National Wildlife Refuge System to 
+L19: preserve our nation’s only lands set aside for wildlife. By defending, funding 
+L20: and expanding our national wildlife refuges, we will directly address 
+L21: biodiversity loss and climate change while promoting increased equitable access 
+L22: to nature. FIELD TEAMS were on the ground helping to recover imperiled species. 
+L23: From panthers and sea turtles in Florida to wolves, bison and black-footed 
+L24: ferrets in Montana, Defenders’ conservation experts were in the field saving 
+L25: wildlife all over the country. CONSERVATION INNOVATION EXPERTS provided 
+L26: comprehensive analyses to guide policy and inform conservation strategies to 
+L27: reach the goal of protecting 30% of our terrestrial and marine systems by 2030 
+L28: (“30x30”). Defenders’ Center for Conservation Innovation (CCI) produced a report
+L29:  which details actions we need to take to achieve 30x30 while protecting 
+L30: biodiversity and addressing the climate crisis. DEFENDERS.ORG ===== Page 4 =====
+L31:  WE HAVE ACCOMPLISHED MUCH THIS YEAR WORKING WITH AN ADMINISTRATION THAT VALUES 
+L32: SCIENCE AND CONSERVATION. Our many successes include the return of protections 
+L33: to the Tongass National Forest in Alaska and the suspension of oil and gas 
+L34: leasing permits for the Arctic National Wildlife Refuge. Defenders also played a
+L35:  lead role in successfully defending the Migratory Bird Protection Act from 
+L36: attack and securing critical protections for migratory birds. Throughout 2021 
+L37: Defenders made important progress for the wildlife and wild places we all love, 
+L38: yet our nation’s wildlife still face unprecedented challenges. More and more 
+L39: species, including manatees, Mexican gray wolves, polar bears and North Atlantic
+L40:  right whales, face the very real threat of extinction because of climate 
+L41: change, habitat loss, pollution and inadequate protections. In our work we 
+L42: continue to demonstrate success by following the science, building partnerships,
+L43:  leading with innovative solutions and focusing on collaborative and practical 
+L44: approaches in defense of nature. Together we can make a lasting difference for 
+L45: wildlife and habitats. Your support is essential to our work and we couldn’t do 
+L46: it without you. Thank you for being an important part of our efforts to protect 
+L47: imperiled wildlife across our nation. Jamie Rappaport Clark President and CEO 
+L48: John Dayton Board Chair OUR NEW TELEVISION SERIES, WILDLIFE NATION WITH JEFF 
+L49: CORWIN, premiered in October and airs every weekend on ABC stations nationwide. 
+L50: This groundbreaking series takes Jeff across the country to meet Defenders’ 
+L51: experts along with our science, tribal and community partners to educate and 
+L52: inspire viewers to join with Defenders to protect wildlife. While highlighting 
+L53: some of our important conservation work and raising awareness of important </result>
+</function_results>
+
+<think>Search within PDF for "2.2 million members".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="find">
+<｜DSML｜parameter name="cursor" string="false">4</｜DSML｜parameter>
+<｜DSML｜parameter name="pattern" string="true">2.2</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[5] Find results for text: `2.2` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=2.2)
+**viewing lines [0 - 4] of 4**
+
+L0: # 【0†match at L187】
+L1: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters 
+L2: contributed more than $49 million for wildlife and wildlife habitat. This figure
+L3:  includes all those who generously donated their time and expertise. The 
+L4: steadfast support of our donors allows Defenders to sustain our program and </result>
+</function_results>
+
+<think>Open around L180.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">4</｜DSML｜parameter>
+<｜DSML｜parameter name="loc" string="false">170</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[6] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)
+**viewing lines [170 - 215] of 261**
+
+L170: into the wild. **10 DEFENDERS OF WILDLIFE** ===== Page 11 ===== AFTER ANOTHER 
+L171: SUCCESSFUL DEFENDERS LAWSUIT ON BEHALF OF THE CRITICALLY ENDANGERED RED WOLF, 
+L172: FWS reversed its decision to limit the recovery area and committed to a robust 
+L173: release strategy. After years of inaction, FWS released eight wolves to the wild
+L174:  in North Carolina and plan to release nine more wolves in the spring of 2022. 
+L175: This is an incredible win for this critically endangered species whose 
+L176: population has dwindled down to single digits in the wild because of 
+L177: mismanagement, vehicle strikes and poaching. DEFENDERS CONTINUED TO LEAD EFFORTS
+L178:  TO PROTECT THE FLORIDA MANATEE, a beloved species that suffered the deadliest 
+L179: year on record in 2021, tragically surpassing 1,000 deaths because of water 
+L180: pollution and lack of warm water habitat. Defenders led advocacy and education 
+L181: aimed at restoring the natural flow of the dammed Ocklawaha River, which would 
+L182: provide critical warm-water habitat that manatees need to survive. Defenders’ 
+L183: legal team continued to fight for manatees in the courts, holding government 
+L184: agencies accountable for protecting critical habitat and addressing the 
+L185: devastating water pollution that is killing the seagrass and causing manatees to
+L186:  starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In 
+L187: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters 
+L188: contributed more than $49 million for wildlife and wildlife habitat. This figure
+L189:  includes all those who generously donated their time and expertise. The 
+L190: steadfast support of our donors allows Defenders to sustain our program and 
+L191: public education efforts in the field, the courts and on Capitol Hill. 2021 
+L192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split 
+L193: interests $7,692 Income from investments, annuity reserve funds and trusts 
+L194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total 
+L195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency 
+L196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total 
+L197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 
+L198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF 
+L199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, 
+L200: trusts and split interests 15% Royalties and other income 7% Contributed 
+L201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====
+L202:  DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl 
+L203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*
+L204:  Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. 
+L205: Oregon Dinah Bear, Esq. Arizona Kim O’Keefe Beck, Ph.D California Gabriela 
+L206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia 
+L207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. 
+L208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* 
+L209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New
+L210:  York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS 
+L211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, 
+L212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D
+L213:  Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State 
+L214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D 
+L215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae </result>
+</function_results>
+
+<think>Scrolling further for "Contributed services 12%". Already seen. So the annual report is Defenders of Wildlife 2021 annual report.
+
+Next clue: "The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%."
+
+Thus, the following year (2022) annual report of Defenders of Wildlife includes mention of rescinding plan to open most of 22 million acres of NPR-A to oil and gas drilling. Also contributed services decreased to 10% (from 12% in 2021). So we need to find the 2022 annual report for Defenders of Wildlife.
+
+Search for "Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[7] Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska (Search_Results/Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska)
+**viewing lines [0 - 37] of 37**
+
+L0: 
+L1: URL: Search_Results/Defenders of Wildlife 2022 annual report National Petroleum 
+L2: Reserve Alaska
+L3: # Search Results
+L4: 
+L5:   * 【0†CELEBRATING YEARS; publish_date: none†www.annualreports.com】 With less 
+L6: than 340 right whales left in the wild, Defenders is fighting tirelessly to end 
+L7: deadly lobster gear entanglements and vessel strikes that are driving ...
+L8:   * 【1†Financials; publish_date: none†defenders.org】 We invite you to explore 
+L9: the reports below to learn more about our activities and accomplishments, and 
+L10: how we put your money to work for wildlife.
+L11:   * 【2†Alaska Program Looks Back on 2022; publish_date: none†defenders.org】 Feb 
+L12: 9, 2023 — Thanks to a lawsuit joined by Defenders, seven million acres were 
+L13: returned to protection within the National Petroleum Reserve-Alaska (NPR-A), ...
+L14:   * 【3†Defenders-of-Wildlife-2022-Financial-Statement. ...; publish_date: 
+L15: none†defenders.org】 We have audited the accompanying consolidated financial 
+L16: statements of Defenders of Wildlife and Affiliated Defenders of Wildlife Action 
+L17: Fund (collectively, ...
+L18:   * 【4†2022 Annual Report; publish_date: none†alaskaconservation.org】 Jun 13, 
+L19: 2023 — In 2022, we focused on three landscapes: the Arctic. National Wildlife 
+L20: Refuge, Bristol Bay, and the Tongass National Forest. In March 2022,.
+L21:   * 【5†Assessment of ecological and cultural values within the ...; 
+L22: publish_date: none†www.blm.gov】 This document was written to provide technical 
+L23: information regarding the ecological importance of the National Petroleum 
+L24: Reserve – Alaska (NPR-A). Several ...
+L25:   * 【6†Accomplishments Report; publish_date: none†defenders.org】 National 
+L26: Petroleum Reserve-Alaska. Identified for exceptional wildlife and cultural 
+L27: values, including critical habitat for polar bears and other species ...
+L28:   * 【7†2022 annual report; publish_date: none†dory-
+L29: plantain-s2zc.squarespace.com】 These projects are made possible through 
+L30: collaborations with Defenders of. Wildlife, Cook Inletkeeper, Trustees for 
+L31: Alaska,. Environmental Investigation Agency, ...
+L32:   * 【8†23IMPACT REPORT; publish_date: none†www.annualreports.com】 Defenders of 
+L33: Wildlife made incredible progress protecting wildlife and wild places in 2023, 
+L34: helping shape a brighter future for imperiled species and vital ...
+L35:   * 【9†From Leasing to Land Protections; publish_date: none†defenders-cci.org】 
+L36: Abstract. When you hear the name, National Petroleum Reserve - Alaska (NPR-A, or
+L37:  Reserve), you might think of a barren land filled with oil wells and ...</result>
+</function_results>
+
+<think>Open result 0.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">7</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[8] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)
+**viewing lines [0 - 53] of 289**
+
+L0: 
+L1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-
+L2: wildlife_2022.pdf
+L3: ===== Page 1 ===== 2022 Impact Report C E L E B R A T I N G Y E A R S ===== Page
+L4:  2 ===== 2022 Defenders of Wildlife made important progress for imperiled 
+L5: species and vital landscapes across the United States in 2022. GRAY WOLF | JIM 
+L6: GUMMERAL MAY STOCK PRIOR Lawyers successfully challenged the previous 
+L7: administration’s decision to delist the gray wolf and restored critical federal 
+L8: protections under the Endangered Species Act. This latest triumph in court is 
+L9: part of our ongoing battle to protect and restore gray wolves throughout their 
+L10: historical range and shield them from persecution by extremist legislators in 
+L11: Idaho, Montana and Wyoming. TWO MORE FATALIZED GRAY SWALLETS TO SEA TO SHARE 
+L12: ALLIANCE Lobbyists worked around the clock to expand funding for wildlife 
+L13: conservation in the FY2022 federal spending bill, which included $31 million (a 
+L14: 44% increase) for the Bureau of Land Management’s Threatened and Endangered 
+L15: Species Program, $2.5 million (an 81% increase) for the U.S. Department of 
+L16: Agriculture Wildlife Services’ Nonlethal Initiative to prevent human-wildlife 
+L17: conflicts and $21 million (a 320% increase) for North Atlantic right whale 
+L18: conservation. 2 DEFENDERS OF WILDLIFE ===== Page 3 ===== **Policy Experts** 
+L19: played a crucial role in securing international trade protections for 100 
+L20: species of sharks and rays, all 158 species of glass frogs and 73 species of 
+L21: reptiles, including 21 species of desert horned lizards, at the Convention on 
+L22: International Trade in Endangered Species (CITES) in Panama. \--- **Field 
+L23: Teams** worked tirelessly to protect and restore imperiled species across the 
+L24: country. From Florida manatees and red wolves in the Southeast to belugas and 
+L25: grizzly bears in Alaska, Defenders’ conservation experts were on the ground 
+L26: saving species that need our help to survive and thrive. \--- **Conservation 
+L27: Innovation Experts** published more than 10 peer-reviewed studies on topics that
+L28:  include the Cook Inlet beluga whale, golden-cheeked warbler, global parrot 
+L29: biodiversity, the Endangered Species Act, the effects of mountaintop removal 
+L30: mining on endangered species, the ecological importance of panthers and the 
+L31: implementation of “30x30” – the globally recognized goal to which President 
+L32: Biden committed the U.S. to conserve 30% of our imperiled lands and waters by 
+L33: 2030. \--- **DEFENDERS.ORG** ===== Page 4 ===== THANK YOU Defenders celebrated 
+L34: our 75th anniversary in 2022—an exciting milestone that we attribute to our 
+L35: unwavering dedication to our wildlife conservation mission. From helping to pass
+L36:  the Endangered Species Act and other bedrock conservation laws to leading the 
+L37: advocacy on the reintroduction of wolves to Yellowstone National Park and other 
+L38: parts of the West, Defenders is proud to inspire, lead and ultimately celebrate 
+L39: some of the most consequential conservation wins for wildlife and wild places. 
+L40: It is wonderfully fitting that we also mark 2022 as a year when we successfully 
+L41: defended, yet again, the original focal species and symbol of our organization, 
+L42: the gray wolf. While the future for gray wolves and many other species remains 
+L43: uncertain, we will never stop fighting to protect the imperiled wildlife that 
+L44: need us and to restore vibrant and balanced ecosystems. Our mission is more 
+L45: important than ever due to the escalating biodiversity and climate crises. 
+L46: Defenders is determined to build on our accomplishments and seize new 
+L47: opportunities to address these challenges through our multifaceted approach to 
+L48: conservation. We focus on developing innovative and science-based solutions, 
+L49: coordinating on-the-ground species and habitat restoration, promoting human-
+L50: wildlife coexistence, protecting and strengthening our bedrock environmental 
+L51: laws, investing in impactful collaborations and partnerships, building a more 
+L52: diverse and powerful conservation movement and challenging attacks on wildlife 
+L53: through advocacy, policy and litigation. Defenders’ continued success is made </result>
+</function_results>
+
+<think>Search within PDF for "National Petroleum Reserve".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="find">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="pattern" string="true">Petroleum</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[9] Find results for text: `petroleum` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=petroleum)
+**viewing lines [0 - 4] of 4**
+
+L0: # 【0†match at L201】
+L1: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas 
+L2: drilling and removed protections for important wildlife habitat. Protections 
+L3: have now been restored for nearly half of the reserve’s pristine lands, which 
+L4: are vital habitat for shorebirds, denning polar bears and tens of thousands of </result>
+</function_results>
+
+<think>Open around L190.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="loc" string="false">180</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[10] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)
+**viewing lines [180 - 227] of 289**
+
+L180: the sixth successful transfer of bison to the Assiniboine and Sioux Tribes of 
+L181: Fort Peck since 2019. \--- **SWIFT FIX KITS | © ROSIMA PAELARINTSKIMMA MADDIAL 
+L182: 200 AND CONSERVATION BIOLOGY INSTITUTE** \--- **Celebrating our third year** of 
+L183: a collaborative program with the Aaniih and Nakoda Tribes and others to restore 
+L184: swift foxes to the Fort Belknap Indian Reservation in Montana, Defenders helped 
+L185: with the release of 28 more swift foxes. With over 100 foxes reintroduced 
+L186: through this program, monitoring efforts show that they are reproducing in the 
+L187: wild—a critical measure of success for a self-sustaining population. \--- 
+L188: **Defenders continued to lead the way** for conserving and recovering the 
+L189: endangered black-footed ferret, supporting the black-footed ferret survey for 
+L190: the Fort Belknap Indian community. Thirty-six ferrets were vaccinated against 
+L191: sylvatic plague and two dozen kits were released in the wild. \--- **10 
+L192: DEFENDERS OF WILDLIFE** ===== Page 11 ===== Defenders helped to bring hope for 
+L193: recovery for the endangered military macaw, adding 11 fledglings to a growing 
+L194: wild population in Puerta Vallarta, Mexico, that is under pressure from habitat 
+L195: loss and poachers for the illegal pet trade. Accord- ing to our recent report, 
+L196: the 2008 parrot trade ban that Defenders fought to achieve is working. 
+L197: Preventing more than 30,000 parrots from being illegally trapped each year, the 
+L198: trade ban has resulted in a 47% decrease in the illegal trade of parrots and an 
+L199: 88% decrease in U.S. seizures of Mexican parrots. As a result of a Defenders 
+L200: lawsuit, BLM rescinded the previous administration’s plan that opened most of 
+L201: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas 
+L202: drilling and removed protections for important wildlife habitat. Protections 
+L203: have now been restored for nearly half of the reserve’s pristine lands, which 
+L204: are vital habitat for shorebirds, denning polar bears and tens of thousands of 
+L205: caribou in the Teshekpuk caribou herd. Our new storymap illustrates why the 
+L206: reserve is so important to wildlife. Defenders marked the 20th anniversary of 
+L207: our Sea Otter Awareness Week. In celebration of this milestone, we sponsored 
+L208: state legislation, signed by Gov. Gavin Newsom, that formally recognizes the 
+L209: event’s anniversary and acknowledges the vital role that sea otters play in 
+L210: California’s nearshore ecosystems. This annual celebration provides a unique 
+L211: opportunity to raise awareness of the important role sea otters play in 
+L212: maintaining ecosystem health, our need to coexist with sea otters and the 
+L213: efforts of Defenders and others in advancing sea otter conservation. 
+L214: DEFENDERS.ORG ===== Page 12 ===== FINANCIAL REPORT In 2022, Defenders of 
+L215: Wildlife’s nearly 2.2 million members and supporters contributed more than $41 
+L216: million for wildlife and wildlife habitat. This figure includes all those who 
+L217: generously donated their time and expertise. The steadfast support of our donors
+L218:  allows Defenders to sustain our program and public education efforts in the 
+L219: field, the courts and on Capitol Hill. 2022 SOURCES OF FUNDS Grants and 
+L220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from 
+L221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other 
+L222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS 
+L223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising 
+L224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets
+L225:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433
+L226:  Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%
+L227:  Contributed services 10% Royalties and other income 2% Bequests, trusts and </result>
+</function_results>
+
+<think>Scrolling a bit more for "Contributed services 10%". Already seen. So indeed the 2022 annual report shows contributed services 10%, down from 12% in 2021.
+
+Thus the organization is Defenders of Wildlife.
+
+The question: "Who was the emeritus board member that year? Name only, no titles."
+
+We need to identify the emeritus board member listed in the 2022 annual report. From the 2021 annual report, we saw "BOARD EMERITUS Terry C. Pelster, Esq." However, that is for 2021. The question asks "Who was the emeritus board member that year?" That year refers to the following year (2022). So we need to check the 2022 annual report for the emeritus board member.
+
+Open the 2022 PDF near the board list. Search for "Emeritus".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="find">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="pattern" string="true">Emeritus</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[11] Find results for text: `emeritus` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=emeritus)
+**viewing lines [0 - 10] of 10**
+
+L0: # 【0†match at L237】
+L1: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New 
+L2: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela 
+L3: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona
+L4:  State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad
+L5: 
+L6: # 【1†match at L243】
+L7: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David 
+L8: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic 
+L9: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of 
+L10: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice </result>
+</function_results>
+
+<think>Open around L230.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="loc" string="false">220</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[12] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)
+**viewing lines [220 - 267] of 289**
+
+L220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from 
+L221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other 
+L222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS 
+L223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising 
+L224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets
+L225:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433
+L226:  Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%
+L227:  Contributed services 10% Royalties and other income 2% Bequests, trusts and 
+L228: split interests 29% TOTAL REVENUE Income from investments -15% Program and 
+L229: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== 
+L230: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair
+L231:  Texas Sheryl Sachs* Vice Chair Connecticut Dinah Bear, Esq.* Secretary Arizona 
+L232: Mark Caylor* Treasurer District of Columbia Susan Alexander Connecticut Sylvia 
+L233: Baca* California Whitefield Bailey Tennessee Donald Barry, Esq. Oregon Kim 
+L234: O’Keefe Beck, Ph.D California Gabriela Chavarria, Ph.D Washington Jamie 
+L235: Rappaport Clark* President & CEO Virginia John Donovan, Esq.* Montana and 
+L236: California Caroline D. Gabel Maryland Terry L. Root, Ph.D New Mexico Thomas D. 
+L237: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New 
+L238: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela 
+L239: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona
+L240:  State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad
+L241:  Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State University Terry 
+L242: L. Root, Ph.D Stanford University Mark Schwartz, Ph.D University of California-
+L243: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David 
+L244: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic 
+L245: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of 
+L246: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice 
+L247: President, Conservation Law Matt Braughler Vice President, Philanthropy Rachel 
+L248: Brittin Vice President, Communications Shawn Cantrell Vice President, Field 
+L249: Conservation Programs Robert L. Dewey Vice President, Government Relations & 
+L250: External Affairs Monica Goldberg Vice President, Landscape Conservation Sharon 
+L251: Morris-Smith Vice President of Human Resources Elizabeth Nichols Vice President,
+L252:  Finance/CFO Anthony Pegues Vice President, Diversity, Equity, Inclusion, 
+L253: Justice Lindsay Rosa, Ph.D Vice President, Conservation Research and Innovation 
+L254: Brandace Swimley Vice President, Integrated Marketing 14 DEFENDERS OF WILDLIFE 
+L255: ===== Page 15 ===== There are many ways to support Defenders of Wildlife and 
+L256: protect our imperiled wildlife. To learn more or donate, contact us at 
+L257: 1-800-385-9712, visit Defenders.org/donate or choose one of the following 
+L258: options. ➤ Transfer appreciated assets directly to Defenders and maximize 
+L259: potential tax benefits. Defenders.org/ donate-stocks-help-save-wildlife ➤ Become
+L260:  a Leadership Council member with a single donation of $1,000 or more and enjoy 
+L261: more personal involvement with Defenders. Defenders.org/leadershipcouncil ➤ If 
+L262: you are 70.5 or older, roll over a portion of your IRA to benefit wildlife. 
+L263: Remember, these gifts must come directly from your IRA account. 
+L264: Defenders.org/planned-giving/gifts-your-retirement-plan ➤ Get started on a 
+L265: contribution from your donor-advised fund Defenders.org/donor-advised-funds ➤ 
+L266: Check with your personnel office or that of your spouse/partner about your 
+L267: company’s Corporate Matching Gifts program or visit us at </result>
+</function_results>
+
+<think>Thus the emeritus board member is Terry C. Pelster. However, the question might ask for "Who was the emeritus board member that year?" Possibly referring to the year of the annual report that mentioned the rescinded plan and contributed services decrease. That is 2022. So the answer is Terry C. Pelster.
+
+But let's verify that the 2022 annual report indeed includes the mention of contributed services decreasing to 10%. We saw that. So answer is Terry C. Pelster.
+
+But maybe the question is tricky: "Who was the emeritus board member that year?" Could be the emeritus board member listed in the 2021 annual report? However, the question says "That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%."
+
+Thus the "following year" annual report is 2022. The question likely asks: "Who was the emeritus board member that year?" Meaning the emeritus board member listed in that annual report (2022). So answer: Terry C. Pelster.
+
+But let's confirm that the 2021 annual report also shows emeritus board member Terry C. Pelster. Indeed we saw that. But perhaps the 2022 report might have a different emeritus board member. We saw it's the same. So answer is Terry C. Pelster.
+
+However, the question might be about a different organization. Let's verify the details.
+
+First clue: Press release about Kenya vaccine doses on Dec 5, 2021. That is likely a World Bank press release. Search for "World Bank Kenya vaccine doses December 2021 press release".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[13] As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines (Search_Results/As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines)
+**viewing lines [0 - 41] of 41**
+
+L0: 
+L1: URL: Search_Results/As of December 5, 2021, Kenya had received a total of 
+L2: 16,201,670 vaccines
+L3: # Search Results
+L4: 
+L5:   * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: 
+L6: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had 
+L7: received a total of 16,201,670 vaccines, with 7,583,134 administered. While 
+L8: vaccine acceptance is ...
+L9:   * 【1†Unmet need for COVID-19 vaccination coverage in Kenya - PMC; 
+L10: publish_date: none†pmc.ncbi.nlm.nih.gov】 by SK Muchiri · 2022 · Cited by 42 — As
+L11:  of December 2021, six counties had a vaccination coverage of less than 5%. 
+L12: These counties include Garissa, Mandera, Marsabit, Tana River, Turkana, and ...
+L13:   * 【2†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec 
+L14: 1, 2021 — • Total Covid-19 Vaccines Received to date- 16,201,670 ... Table 10: 
+L15: Vaccine Logistics Received in the Country as at 5th, December 2021.
+L16:   * 【3†COVID-19 vaccination refusal trends in Kenya over 2021 - PMC; 
+L17: publish_date: none†pmc.ncbi.nlm.nih.gov】 by RT Rego · 2023 · Cited by 21 — We 
+L18: assessed vaccine refusal over time in Kenya, and characterized factors 
+L19: associated with changes in vaccination refusal.
+L20:   * 【4†Ciheb-Kenya on the Front Lines of the COVID-19 Vaccine ...; publish_date:
+L21:  none†www.ciheb.org】 Of the 721,509 persons vaccinated, 409,711 are aged 58 
+L22: years and above, 143,050 are healthcare workers, 109,593 are teachers, and 
+L23: 59,155 are security officers.
+L24:   * 【5†proportion of fully vaccinated adults 23.3pc.; publish_date: 
+L25: none†www.facebook.com】 COVID-19: KENYA administers 150632 new vaccinations; 
+L26: total jabs given now 13782963; proportion of fully vaccinated adults 23.3pc.
+L27:   * 【6†National COVID-19 Vaccines Deployment and ...; publish_date: 
+L28: none†www.parliament.go.ke】 Kenya has a well-established immunization program and
+L29:  a robust disease surveillance and response unit (DSRU) which serves as an early
+L30:  warning system, to.
+L31:   * 【7†Kenya: Vaccine Requirements Violate Rights; publish_date: 
+L32: none†www.hrw.org】 Dec 13, 2021 — Kenya, with an estimated adult population of 
+L33: 27.2 million and a total population of 55 million, has received approximately 23
+L34:  million doses as ...
+L35:   * 【8†Government gives 20 million a month to get vaccinated; publish_date: 
+L36: none†www.bbc.com】 Nov 22, 2021 — Although less than 10% of Kenyans are 
+L37: vaccinated, the government wants to avoid a surge over Christmas.
+L38:   * 【9†ministry of health kenya covid-19 immunization status ...; publish_date: 
+L39: none†www.health.go.ke】 May 15, 2022 — By the end of 2021 Kenya had administered 
+L40: 10.1 million doses of 5 types of Covid-19 vaccines. During this period 4.2 
+L41: million people were ...</result>
+</function_results>
+
+<think>Open result 0.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">13</｜DSML｜parameter>
+<｜DSML｜parameter name="id" string="false">0</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[14] www.worldbank.org (https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-rebound-in-the-services-sect)
+**viewing lines [0 - 52] of 61**
+
+L0: 
+L1: URL: https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-
+L2: is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-
+L3: rebound-in-the-services-sect
+L4: NAIROBI, December 14, 2021 — Kenya’s economy has demonstrated resilience to the 
+L5: COVID-19 shock, with output in the first half of the year rising above pre-
+L6: pandemic levels. In 2021 as a whole, gross domestic product (GDP) is expected to
+L7:  grow by 5%, one of the faster recoveries among Sub-Saharan African countries. 
+L8: Overall economic performance is expected to be robust at 4.9% per year in 
+L9: 2022-23, similar to the pre-pandemic pace (5% average annual growth from 2010 to
+L10:  2019). According to the 24th edition of the Kenya Economic Update, “From 
+L11: Recovery to Better Jobs,” growth has been supported by rebounds in industry and,
+L12:  especially, services. Agricultural output, however, fell by 0.5% year on year 
+L13: in the first half of 2021 following a particularly strong performance in 2020, 
+L14: partly due to below-average rains. Demand-side recovery has been supported by a 
+L15: revival in private consumption, against a backdrop of improving employment 
+L16: conditions and household incomes. “Kenya’s economy has shown considerable 
+L17: resilience to the enormous shock of the pandemic, and this year is expected to 
+L18: post one of the stronger growth rebounds in the region thanks to diversified 
+L19: sources of growth and sound economic policies and management,” said Keith 
+L20: Hansen, World Bank Country Director for Kenya. “However, poverty has increased, 
+L21: and the buffers and coping mechanisms of households, firms, and the public 
+L22: finances have been depleted.” Economic activity in Kenya has continued to adapt 
+L23: to the pandemic and associated restrictions. A mix of containment measures, such
+L24:  as a nightly curfew, were in effect through most of 2021, while more 
+L25: economically disruptive measures such as lockdowns and travel restrictions were 
+L26: phased, limiting the impact on economic activities. The vaccine rollout, which 
+L27: had a slow start due to supply constraints, has picked up as new shipments of 
+L28: vaccines have arrived, particularly since September. This has supported economic
+L29:  recovery and growth through the third quarter of 2021. As of December 5, 2021, 
+L30: Kenya had received a total of 16,201,670 vaccines, with 7,583,134 administered. 
+L31: While vaccine acceptance is reportedly high there is still a long way to go 
+L32: towards the government’s target of fully inoculating the adult population of 
+L33: about 30 million by the end of 2022.As of December 6, 2021, about 10% of adults 
+L34: (2.9 million people) had been fully vaccinated while another 16% (4.9 million 
+L35: people) had received their first dose. The report also projects robust growth in
+L36:  the medium-term. This outlook takes into account that some sub-sectors, such as
+L37:  education, have bounced back strongly, but others such as international 
+L38: tourism, have only partially and face a much more protracted recovery. The 
+L39: anticipated further recovery of hotels and restaurants, trade, transport, and 
+L40: other services, depends on substantial vaccination progress to help prevent new 
+L41: waves of infections and associated containment measures. “Recent economic 
+L42: performance has been strong and the outlook is positive, but in Kenya, as 
+L43: everywhere, the future course of the pandemic remains a major source of 
+L44: uncertainty, said Alex Sienaert, Senior Economist for Kenya. “To support further
+L45:  recovery, reduce debt distress risks, and rebuild space for social and 
+L46: development spending, it is critical that Kenya continues to implement medium-
+L47: term fiscal consolidation plans.” In addition to pandemic-related risks to the 
+L48: outlook, a second key domestic risk factor stems from the drought conditions 
+L49: which are affecting parts of the country and already causing severe hardship. 
+L50: Should the drought intensify or spread, this would weigh on the near-term 
+L51: economic outlook. Weaker global growth, higher-than-anticipated energy prices, 
+L52: and tighter external financing conditions are the primary external risks. As </result>
+</function_results>
+
+<think>Thus first clue confirmed.
+
+Second clue: Defenders of Wildlife 2021 annual report. Confirmed.
+
+Third clue: Defenders of Wildlife 2022 annual report. Confirmed.
+
+Thus the answer is Terry C. Pelster.
+
+But let's verify that the 2022 annual report indeed shows "Contributed services 10%". We saw that. Also confirm that 2021 report shows 12%. We saw that.
+
+Thus the question: "Who was the emeritus board member that year?" Refers to 2022. Answer: Terry C. Pelster.
+
+However, the question might be asking for the emeritus board member in the 2021 report? Let's read the question carefully:
+
+"In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles."
+
+The phrase "that year" could refer to "the following year" i.e., the year of the annual report that mentioned the rescinded plan. Or could refer to "that same year" (the year of the press release and the first annual report). However, the phrase "that year" appears at the end: "Who was the emeritus board member that year?" It likely refers to the year of the annual report that is being discussed in the preceding sentence. The preceding sentence talks about the following year's annual report. So indeed "that year" is the following year (2022).
+
+Thus answer: Terry C. Pelster.
+
+But to be thorough, we should check the 2022 annual report for any other emeritus board members. The search only showed "BOARD EMERITUS Terry C. Pelster, Esq." Maybe there are multiple emeritus board members. But likely only one.
+
+Check the 2021 annual report for emeritus board member. It also shows Terry C. Pelster. So maybe the question is asking for the emeritus board member in the 2021 report. But why would they ask that? The question is about connecting the clues to identify the organization and then find the emeritus board member. The clues point to Defenders of Wildlife. The question likely asks for the emeritus board member as of the 2022 report.
+
+Nevertheless, we should confirm that the 2022 report is indeed the one that mentions the rescinded plan. We saw that. Also confirm that contributed services decreased from 12% to 10%. We saw that.
+
+Thus answer is Terry C. Pelster.
+
+But let's search for "Defenders of Wildlife emeritus board member 2022 Terry Pelster".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="query" string="true">Terry C. Pelster Defenders of Wildlife emeritus board member</｜DSML｜parameter>
+<｜DSML｜parameter name="topn" string="false">10</｜DSML｜parameter>
+<｜DSML｜parameter name="source" string="true">web</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[15] Terry C. Pelster Defenders of Wildlife emeritus board member (Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member)
+**viewing lines [0 - 31] of 31**
+
+L0: 
+L1: URL: Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member
+L2: # Search Results
+L3: 
+L4:   * 【0†2024 impact report; publish_date: none†defenders.org】 Terry Root, Ph.D. 
+L5: New Mexico. Thomas D. Sisk, Ph.D. British Columbia, Canada. BOARD EMERITUS. 
+L6: Terry C. Pelster, Esq. New York. SCIENCE ADVISORS. Leah Gerber, Ph ...
+L7:   * 【1†Defenders of Wildlife; publish_date: none†www.wrongkindofgreen.org】 
+L8: Directors ; Victor M. Sher*, Chair – California ; Terry C. Pelster*, Vice Chair 
+L9: – New York ; Richard Kopcho*, Treasurer – California ; Adelaide P. Gomer*, 
+L10: Secretary ...
+L11:   * 【2†Exhibit 12; publish_date: none†www.uschamber.com】 DECLARATION OF TERRY 
+L12: PELSTER. I, Terry C. Pelster, declare as follows: 1. I am a current member of 
+L13: Defenders of Wildlife (“Defenders”) and have been a member.
+L14:   * 【3†2020 ANNUAL REPORT; publish_date: none†defenders.org】 Terry L. Root. 
+L15: Florida. BOARD. EMERITUS. Terry C. Pelster. New York. Alan Steinberg. Florida. 
+L16: SCIENCE. ADVISORS. Gabriela Chavarria,. Ph.D. Denver Museum of.
+L17:   * 【4†2019 Annual Report; publish_date: none†defenders.org】 BOARD EMERITUS. 
+L18: Terry C. Pelster. New York. Alan Steinberg. Florida. SCIENCE ADVISORS. Leah 
+L19: Gerber, Ph.D. Arizona State University. Kathayoon Khalil, Ph.D.
+L20:   * 【5†annual report; publish_date: none†defenders.org】 * Executive Committee 
+L21: member. BoARD EMERITuS. Terry C. Pelster. New York. Alan Steinberg. Florida. 
+L22: PRESIDENT. EMERITuS. Rodger Schlickeisen. Virginia. SCIENCE ...
+L23:   * 【6†BEING THERE FOR BELUGAS; publish_date: none†wendeenicole.com】 Mar 2, 2023
+L24:  — Defenders of Wildlife is a national, nonprofit member- ship ... Terry C. 
+L25: Pelster, Esq. New York. *Executive Committee member outfront.
+L26:   * 【7†Annual Reports; publish_date: none†www.annualreports.com】 WAYS TO GIVE 
+L27: There are many ways to support Defenders of Wildlife and protect our imperiled 
+L28: wildlife. ... Terry C. Pelster. New York. Alan Steinberg. Florida.
+L29:   * 【8†2014 Annual Report; publish_date: none†defenders.org】 BOARD OF DIRECTORS 
+L30: defenders of wildlife is a national, nonprofit ... BOARD EMERITUS. Terry C. 
+L31: Pelster. New York. Alan Steinberg. Florida. Page 3. PO.</result>
+</function_results>
+
+<think>Thus Terry C. Pelster is indeed the emeritus board member.
+
+Now the question might want just "Terry C. Pelster". Or maybe "Terry Pelster". Or "Terry C. Pelster, Esq." but instructions say "Name only, no titles." So likely "Terry C. Pelster". However, maybe they want "Terry Pelster". Better to include full name as listed: "Terry C. Pelster".
+
+Let's read the question again: "Who was the emeritus board member that year? Name only, no titles." So we should answer just the name, no "Esq." or "Board Emeritus". So "Terry C. Pelster". Or maybe "Terry Pelster". But the document shows "Terry C. Pelster, Esq." So the name is "Terry C. Pelster". 
+
+Better to verify that the 2022 annual report indeed lists "BOARD EMERITUS Terry C. Pelster, Esq." We saw that line.
+
+Thus answer: Terry C. Pelster.
+
+But let's double-check that the question might refer to the 2021 annual report. The phrase "that year" could be ambiguous. Let's parse the timeline:
+
+- Year 1: 2021. Press release in Dec 2021 about Kenya vaccine doses. Same year (2021) annual report of Defenders of Wildlife shows 12% contributed services.
+
+- Year 2: 2022. Annual report mentions rescinded plan and contributed services decreased to 10%.
+
+The question says "The following year, the annual report stated that the organization had played a role in rescinding a previous administration’s plan ... and that contributed services had decreased by 2% to 10%." This is year 2.
+
+Then asks "Who was the emeritus board member that year?" That likely refers to year 2.
+
+Thus answer: Terry C. Pelster.
+
+However, perhaps the emeritus board member changed between 2021 and 2022. We need to check 2022 annual report for any other emeritus board member. We saw only Terry C. Pelster. Also check 2021 annual report: same. So likely unchanged.
+
+Thus answer is Terry C. Pelster.
+
+Nevertheless, we should confirm that the 2022 annual report is indeed the one that includes the phrase "BLM rescinded the previous administration’s plan that opened most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling". We saw that in line 200-202.
+
+Thus answer is Terry C. Pelster.
+
+Now we need to provide the answer in the final message. Only the name.
+
+Thus final answer: Terry C. Pelster.
+
+But let's be thorough and search for "Defenders of Wildlife 2022 annual report contributed services 10%".</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="find">
+<｜DSML｜parameter name="cursor" string="false">8</｜DSML｜parameter>
+<｜DSML｜parameter name="pattern" string="true">Contributed services</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[16] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=contributed services)
+**viewing lines [0 - 10] of 10**
+
+L0: # 【0†match at L222】
+L1: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS 
+L2: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising 
+L3: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets
+L4:  (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433
+L5: 
+L6: # 【1†match at L227】
+L7:  Contributed services 10% Royalties and other income 2% Bequests, trusts and 
+L8: split interests 29% TOTAL REVENUE Income from investments -15% Program and 
+L9: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== 
+L10: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair</result>
+</function_results>
+
+<think>Thus confirmed.
+
+Now we should verify that the 2021 annual report shows contributed services 12%.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="find">
+<｜DSML｜parameter name="cursor" string="false">4</｜DSML｜parameter>
+<｜DSML｜parameter name="pattern" string="true">Contributed services</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[17] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=contributed services)
+**viewing lines [0 - 4] of 4**
+
+L0: # 【0†match at L194】
+L1: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total 
+L2: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency 
+L3: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total 
+L4: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 </result>
+</function_results>
+
+<think>Open around L190-200 to see percentage.</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="open">
+<｜DSML｜parameter name="cursor" string="false">4</｜DSML｜parameter>
+<｜DSML｜parameter name="loc" string="false">185</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>[18] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)
+**viewing lines [185 - 232] of 261**
+
+L185: devastating water pollution that is killing the seagrass and causing manatees to
+L186:  starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In 
+L187: 2021, Defenders of Wildlife’s nearly 2.2 million members and supporters 
+L188: contributed more than $49 million for wildlife and wildlife habitat. This figure
+L189:  includes all those who generously donated their time and expertise. The 
+L190: steadfast support of our donors allows Defenders to sustain our program and 
+L191: public education efforts in the field, the courts and on Capitol Hill. 2021 
+L192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split 
+L193: interests $7,692 Income from investments, annuity reserve funds and trusts 
+L194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total 
+L195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency 
+L196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total 
+L197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 
+L198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF 
+L199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, 
+L200: trusts and split interests 15% Royalties and other income 7% Contributed 
+L201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====
+L202:  DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl 
+L203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*
+L204:  Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. 
+L205: Oregon Dinah Bear, Esq. Arizona Kim O’Keefe Beck, Ph.D California Gabriela 
+L206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia 
+L207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. 
+L208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* 
+L209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New
+L210:  York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS 
+L211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, 
+L212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D
+L213:  Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State 
+L214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D 
+L215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae 
+L216: Wynn-Grant, Ph.D National Geographic Society SENIOR STAFF Jamie Rappaport Clark 
+L217: President & CEO Thu Pham Chief of Staff Renee Stone Senior Vice President, 
+L218: Conservation Programs & General Counsel James Stefan Senior Vice President, 
+L219: Operations Matt Braughler Vice President, Philanthropy Rachel Brittin Vice 
+L220: President, Communications Shawn Cantrell Vice President, Field Conservation 
+L221: Programs Robert L. Dewey Vice President, Government Relations & External Affairs
+L222:  Monica Goldberg Vice President, Landscape Conservation Sharon Morris-Smith Vice
+L223:  President of Human Resources Elizabeth Nichols Vice President, Finance/CFO 
+L224: Anthony Pegues Vice President, Diversity, Equity, Inclusion, Justice Lindsay 
+L225: Rosa, Ph.D Director, Center for Conservation Innovation Michael P. Senatore Vice
+L226:  President, Conservation Law Brandace Swimley Vice President, Integrated 
+L227: Marketing 14 DEFENDERS OF WILDLIFE ===== Page 15 ===== THERE ARE MANY WAYS TO 
+L228: SUPPORT DEFENDERS OF WILDLIFE and protect our imperiled wildlife. To learn more 
+L229: or donate, contact us at 1-800-385-9712, visit defenders.org/donate or choose 
+L230: one of the following options. ➤ Transfer appreciated assets directly to 
+L231: Defenders and maximize potential tax benefits. Defenders.org/donate-stocks-help-
+L232: save-wildlife ➤ Become a Leadership Council member with a single donation of </result>
+</function_results>
+
+<think>Thus everything matches.
+
+Thus answer: Terry C. Pelster.
+
+Now we need to output only the name.
+
+Thus final answer: Terry C. Pelster.
+
+But perhaps the question expects "Terry Pelster". However, the document includes middle initial "C." It might be better to include the full name as listed: "Terry C. Pelster". The instruction "Name only, no titles" suggests we should not include "Esq." or "Board Emeritus". But including middle initial is likely okay.
+
+To be safe, we could output "Terry C. Pelster". Let's see similar puzzles: often they expect the full name as appears in the document.
+
+Thus answer: Terry C. Pelster.
+
+Now we will provide final answer.</think>Terry C. Pelster<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_vllm_parity.txt b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_vllm_parity.txt
new file mode 100644
index 000000000000..f377fad6564f
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/fixtures/test_output_vllm_parity.txt
@@ -0,0 +1,112 @@
+<｜begin▁of▁sentence｜>
+
+## Tools
+
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<｜DSML｜function_calls>" block like the following as part of your reply to the user:
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="$FUNCTION_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$FUNCTION_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜function_calls>
+
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+
+<｜DSML｜function_calls>
+...
+</｜DSML｜function_calls>
+
+<function_results>
+...
+</function_results>
+
+<think>...thinking about results</think>
+
+Here are the functions available in JSONSchema format:
+<functions>
+{"name": "get_datetime", "description": "Get the current date and time", "parameters": {"type": "object", "properties": {"timezone": {"type": "string", "description": "The timezone, e.g. Asia/Shanghai, UTC"}}, "required": ["timezone"]}}
+{"name": "get_weather", "description": "Get the weather for a specific date and location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name, e.g. Beijing, Hangzhou"}, "date": {"type": "string", "description": "The date in YYYY-MM-DD format"}}, "required": ["location", "date"]}}
+</functions>
+You are a helpful Assistant.<｜User｜>明天杭州和北京的天气怎么样？<｜Assistant｜></think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_datetime">
+<｜DSML｜parameter name="timezone" string="true">Asia/Shanghai</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>{"current_date": "2024-01-15", "current_time": "14:30:00", "timezone": "Asia/Shanghai"}</result>
+</function_results>
+
+</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Hangzhou</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>{"location": "Hangzhou", "date": "2024-01-16", "temperature_high": "12", "temperature_low": "5", "weather": "多云", "humidity": "65%"}</result>
+<result>{"location": "Beijing", "date": "2024-01-16", "temperature_high": "-2", "temperature_low": "-8", "weather": "晴", "humidity": "30%"}</result>
+</function_results>
+
+</think>根据查询结果，明天（2024年1月16日）的天气情况如下：
+
+**杭州**：
+- 天气：多云
+- 最高温度：12°C
+- 最低温度：5°C
+- 湿度：65%
+
+**北京**：
+- 天气：晴
+- 最高温度：-2°C
+- 最低温度：-8°C
+- 湿度：30%
+
+杭州明天会比较温暖但有些多云，而北京会很冷但是晴天。建议在北京的朋友要注意保暖！<｜end▁of▁sentence｜><｜User｜>谢谢！那后天呢？<｜Assistant｜><think>用户现在问后天的天气，后天是2024-01-17，我可以直接查询（因为已知今天日期）🗓️</think>
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Hangzhou</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-17</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-17</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+
+<function_results>
+<result>{"location": "Hangzhou", "date": "2024-01-17", "temperature_high": "15", "temperature_low": "8", "weather": "小雨", "humidity": "80%"}</result>
+<result>{"location": "Beijing", "date": "2024-01-17", "temperature_high": "0", "temperature_low": "-6", "weather": "多云", "humidity": "45%"}</result>
+</function_results>
+
+<think>获取到后天的天气数据，整理回复给用户📝</think>后天（2024年1月17日）的天气情况：
+
+**杭州**：
+- 天气：小雨
+- 最高温度：15°C
+- 最低温度：8°C
+- 湿度：80%
+
+**北京**：
+- 天气：多云
+- 最高温度：0°C
+- 最低温度：-6°C
+- 湿度：45%
+
+杭州后天会有小雨，温度略有回升，记得带伞。北京会稍微暖和一点，但依然很冷，请继续做好保暖措施。<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v32/mod.rs b/rust/src/chat/src/renderer/deepseek_v32/mod.rs
new file mode 100644
index 000000000000..97225bbab09b
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/mod.rs
@@ -0,0 +1,31 @@
+mod encoding;
+
+use vllm_text::Prompt;
+
+use super::{ChatRenderer, RenderedPrompt};
+use crate::Result;
+use crate::request::ChatRequest;
+
+/// Dedicated DeepSeek V3.2 renderer.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct DeepSeekV32ChatRenderer;
+
+impl DeepSeekV32ChatRenderer {
+    /// Create the dedicated DeepSeek V3.2 renderer.
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl ChatRenderer for DeepSeekV32ChatRenderer {
+    fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
+        request.validate()?;
+
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(encoding::render_request(request)?),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/chat/src/renderer/deepseek_v32/tests.rs b/rust/src/chat/src/renderer/deepseek_v32/tests.rs
new file mode 100644
index 000000000000..0b8f2b09e11c
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v32/tests.rs
@@ -0,0 +1,422 @@
+use std::fs;
+use std::path::PathBuf;
+
+use expect_test::{ExpectFile, expect, expect_file};
+use serde::Deserialize;
+use serde_json::{Value, json};
+use thiserror_ext::AsReport;
+
+use super::DeepSeekV32ChatRenderer;
+use crate::error::Error;
+use crate::event::{AssistantContentBlock, AssistantToolCall};
+use crate::request::{
+    ChatContentPart, ChatMessage, ChatRequest, ChatTool, ChatToolChoice, GenerationPromptMode,
+};
+use crate::{ChatRenderer, ChatRole};
+
+#[derive(Debug, Deserialize)]
+struct FixtureRequest {
+    #[serde(default)]
+    tools: Vec<FixtureTool>,
+    messages: Vec<FixtureMessage>,
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureTool {
+    function: FixtureToolFunction,
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolFunction {
+    name: String,
+    description: Option<String>,
+    parameters: Value,
+    #[serde(default)]
+    strict: Option<bool>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(tag = "role", rename_all = "snake_case")]
+enum FixtureMessage {
+    System {
+        content: String,
+    },
+    Developer {
+        content: String,
+        #[serde(default)]
+        tools: Vec<FixtureTool>,
+    },
+    User {
+        content: String,
+    },
+    Assistant {
+        #[serde(default)]
+        content: String,
+        #[serde(default)]
+        reasoning_content: String,
+        #[serde(default)]
+        tool_calls: Vec<FixtureToolCall>,
+    },
+    Tool {
+        content: String,
+        #[serde(default)]
+        tool_call_id: Option<String>,
+    },
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolCall {
+    #[serde(default)]
+    id: Option<String>,
+    function: FixtureToolCallFunction,
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolCallFunction {
+    name: String,
+    arguments: String,
+}
+
+fn render_request(request: &ChatRequest) -> String {
+    DeepSeekV32ChatRenderer::new()
+        .render(request)
+        .unwrap()
+        .prompt
+        .into_text()
+        .expect("deepseek renderer should return text prompt")
+}
+
+fn render_result(request: &ChatRequest) -> Result<String, Error> {
+    DeepSeekV32ChatRenderer::new().render(request).map(|rendered| {
+        rendered
+            .prompt
+            .into_text()
+            .expect("deepseek renderer should return text prompt")
+    })
+}
+
+fn thinking_request(messages: Vec<ChatMessage>) -> ChatRequest {
+    let mut request = ChatRequest {
+        request_id: "deepseek-v32-small-test".to_string(),
+        messages,
+        ..ChatRequest::for_test()
+    };
+    if matches!(
+        request.messages.last().map(ChatMessage::role),
+        Some(ChatRole::Assistant)
+    ) {
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
+    }
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request
+}
+
+fn fixture_request(input_name: &str) -> ChatRequest {
+    let fixture = fs::read_to_string(fixture_path(input_name)).unwrap();
+    let fixture: FixtureRequest = serde_json::from_str(&fixture).unwrap();
+    let mut request = ChatRequest {
+        request_id: "deepseek-v32-fixture".to_string(),
+        messages: fixture
+            .messages
+            .into_iter()
+            .enumerate()
+            .map(|(index, message)| match message {
+                FixtureMessage::System { content } => ChatMessage::system(content),
+                FixtureMessage::Developer { content, tools } => ChatMessage::developer(
+                    content,
+                    (!tools.is_empty()).then(|| to_chat_tools(&tools)),
+                ),
+                FixtureMessage::User { content } => ChatMessage::user(content),
+                FixtureMessage::Assistant {
+                    content,
+                    reasoning_content,
+                    tool_calls,
+                } => {
+                    let mut blocks = Vec::new();
+                    if !reasoning_content.is_empty() {
+                        blocks.push(AssistantContentBlock::Reasoning {
+                            text: reasoning_content,
+                        });
+                    }
+                    if !content.is_empty() {
+                        blocks.push(AssistantContentBlock::Text { text: content });
+                    }
+                    blocks.extend(tool_calls.into_iter().enumerate().map(
+                        |(tool_index, tool_call)| {
+                            AssistantContentBlock::ToolCall(AssistantToolCall {
+                                id: tool_call.id.unwrap_or_else(|| {
+                                    format!("fixture-tool-call-{index}-{tool_index}")
+                                }),
+                                name: tool_call.function.name,
+                                arguments: tool_call.function.arguments,
+                            })
+                        },
+                    ));
+                    ChatMessage::assistant_blocks(blocks)
+                }
+                FixtureMessage::Tool {
+                    content,
+                    tool_call_id,
+                } => ChatMessage::tool_response(
+                    content,
+                    tool_call_id.unwrap_or_else(|| format!("fixture-tool-response-{index}")),
+                ),
+            })
+            .collect(),
+        tools: to_chat_tools(&fixture.tools),
+        tool_choice: if fixture.tools.is_empty() {
+            ChatToolChoice::None
+        } else {
+            ChatToolChoice::Auto
+        },
+        ..ChatRequest::for_test()
+    };
+    if matches!(
+        request.messages.last().map(ChatMessage::role),
+        Some(ChatRole::Assistant)
+    ) {
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
+    }
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request
+}
+
+fn to_chat_tools(tools: &[FixtureTool]) -> Vec<ChatTool> {
+    tools
+        .iter()
+        .map(|tool| ChatTool {
+            name: tool.function.name.clone(),
+            description: tool.function.description.clone(),
+            parameters: tool.function.parameters.clone(),
+            strict: tool.function.strict,
+        })
+        .collect()
+}
+
+fn fixture_path(name: &str) -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("src/renderer/deepseek_v32")
+        .join("fixtures")
+        .join(name)
+}
+
+fn assert_fixture(input_name: &str, expected: ExpectFile) {
+    let request = fixture_request(input_name);
+    let rendered = render_request(&request);
+    expected.assert_eq(&rendered);
+}
+
+#[test]
+fn renders_vllm_parity_prompt_for_request_level_tools_fixture() {
+    assert_fixture(
+        "test_input.json",
+        expect_file!["fixtures/test_output_vllm_parity.txt"],
+    );
+}
+
+#[test]
+fn renders_official_search_fixture_without_date() {
+    assert_fixture(
+        "test_input_search_wo_date.json",
+        expect_file!["fixtures/test_output_search_wo_date.txt"],
+    );
+}
+
+#[test]
+fn renders_official_search_fixture_with_date() {
+    assert_fixture(
+        "test_input_search_w_date.json",
+        expect_file!["fixtures/test_output_search_w_date.txt"],
+    );
+}
+
+#[test]
+fn request_level_tools_are_lowered_as_synthetic_leading_system_message() {
+    let mut request = ChatRequest {
+        request_id: "deepseek-v32-tools".to_string(),
+        messages: vec![
+            ChatMessage::system("System prompt."),
+            ChatMessage::text(ChatRole::User, "Hello"),
+        ],
+        tools: vec![ChatTool {
+            name: "lookup".to_string(),
+            description: Some("Look things up".to_string()),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string"
+                    }
+                },
+                "required": ["query"]
+            }),
+            strict: None,
+        }],
+        tool_choice: ChatToolChoice::Auto,
+        ..ChatRequest::for_test()
+    };
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+
+    let rendered = render_request(&request);
+
+    assert!(rendered.starts_with("<｜begin▁of▁sentence｜>\n\n## Tools\n"));
+    assert!(rendered.contains("</functions>\nSystem prompt."));
+    assert!(rendered.ends_with("<｜User｜>Hello<｜Assistant｜><think>"));
+}
+
+#[test]
+fn developer_turn_is_treated_as_last_user_like_turn() {
+    let request = thinking_request(vec![ChatMessage::developer("Follow policy.", None)]);
+
+    let rendered = render_request(&request);
+
+    assert!(rendered.contains("# The user's message is: Follow policy."));
+    assert!(rendered.ends_with("<｜Assistant｜><think>"));
+}
+
+#[test]
+fn historical_assistant_reasoning_is_dropped_before_final_user_turn() {
+    let request = thinking_request(vec![
+        ChatMessage::assistant_blocks(vec![
+            AssistantContentBlock::Reasoning {
+                text: "internal reasoning".to_string(),
+            },
+            AssistantContentBlock::Text {
+                text: "Visible answer.".to_string(),
+            },
+        ]),
+        ChatMessage::user("What about the next one?"),
+    ]);
+
+    let rendered = render_request(&request);
+
+    assert!(!rendered.contains("internal reasoning"));
+    assert!(rendered.contains("Visible answer.<｜end▁of▁sentence｜>"));
+    assert!(rendered.ends_with("<｜User｜>What about the next one?<｜Assistant｜><think>"));
+}
+
+#[test]
+fn historical_assistant_reasoning_is_dropped_before_final_developer_turn() {
+    let request = thinking_request(vec![
+        ChatMessage::assistant_blocks(vec![
+            AssistantContentBlock::Reasoning {
+                text: "internal reasoning".to_string(),
+            },
+            AssistantContentBlock::Text {
+                text: "Visible answer.".to_string(),
+            },
+        ]),
+        ChatMessage::developer("Follow the rubric.", None),
+    ]);
+
+    let rendered = render_request(&request);
+
+    assert!(!rendered.contains("internal reasoning"));
+    assert!(rendered.contains("Visible answer.<｜end▁of▁sentence｜>"));
+    assert!(rendered.ends_with(
+        "<｜User｜>\n\n# The user's message is: Follow the rubric.<｜Assistant｜><think>"
+    ));
+}
+
+#[test]
+fn tool_results_after_last_user_resume_thinking() {
+    let request = thinking_request(vec![
+        ChatMessage::user("Check the weather."),
+        ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(AssistantToolCall {
+            id: "call-weather".to_string(),
+            name: "weather".to_string(),
+            arguments: "{\"city\":\"Hangzhou\"}".to_string(),
+        })]),
+        ChatMessage::tool_response("{\"ok\":true}", "call-weather"),
+    ]);
+
+    let rendered = render_request(&request);
+
+    assert!(rendered.contains(
+        "<｜User｜>Check the weather.<｜Assistant｜><think></think>\n\n<｜DSML｜function_calls>"
+    ));
+    assert!(rendered.ends_with("</function_results>\n\n<think>"));
+}
+
+#[test]
+fn tool_results_follow_assistant_tool_call_id_order() {
+    let request = thinking_request(vec![
+        ChatMessage::user("Check two cities."),
+        ChatMessage::assistant_blocks(vec![
+            AssistantContentBlock::ToolCall(AssistantToolCall {
+                id: "call-hangzhou".to_string(),
+                name: "weather".to_string(),
+                arguments: "{\"city\":\"Hangzhou\"}".to_string(),
+            }),
+            AssistantContentBlock::ToolCall(AssistantToolCall {
+                id: "call-beijing".to_string(),
+                name: "weather".to_string(),
+                arguments: "{\"city\":\"Beijing\"}".to_string(),
+            }),
+        ]),
+        ChatMessage::tool_response("{\"city\":\"Beijing\"}", "call-beijing"),
+        ChatMessage::tool_response("{\"city\":\"Hangzhou\"}", "call-hangzhou"),
+    ]);
+
+    let rendered = render_request(&request);
+
+    assert!(rendered.contains(
+        "<function_results>\n<result>{\"city\":\"Hangzhou\"}</result>\n<result>{\"city\":\"Beijing\"}</result>\n</function_results>"
+    ));
+}
+
+#[test]
+fn tool_results_require_matching_tool_call_ids() {
+    let request = thinking_request(vec![
+        ChatMessage::user("Check the weather."),
+        ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(AssistantToolCall {
+            id: "call-weather".to_string(),
+            name: "weather".to_string(),
+            arguments: "{\"city\":\"Hangzhou\"}".to_string(),
+        })]),
+        ChatMessage::tool_response("{\"ok\":true}", "call-unknown"),
+    ]);
+
+    let error = render_result(&request).unwrap_err();
+
+    expect!["chat template error: invalid DeepSeek V3.2 tool message: unknown tool_call_id `call-unknown`"]
+        .assert_eq(&error.to_report_string());
+}
+
+#[test]
+fn assistant_after_last_user_requires_reasoning_or_tool_calls() {
+    let request = thinking_request(vec![
+        ChatMessage::user("Hello"),
+        ChatMessage::assistant_text("Hi there."),
+    ]);
+
+    let error = render_result(&request).unwrap_err();
+
+    expect!["chat template error: invalid DeepSeek V3.2 assistant message after last user message: expected reasoning or tool calls"]
+        .assert_eq(&error.to_report_string());
+}
+#[test]
+fn render_rejects_multimodal_input() {
+    let request = ChatRequest {
+        messages: vec![ChatMessage::user(vec![ChatContentPart::image_url(
+            "data:image/png;base64,test",
+        )])],
+        ..ChatRequest::for_test()
+    };
+
+    let error = DeepSeekV32ChatRenderer::new().render(&request).unwrap_err();
+
+    assert!(matches!(
+        error,
+        Error::UnsupportedMultimodalContent("image_url")
+    ));
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v4/encoding.rs b/rust/src/chat/src/renderer/deepseek_v4/encoding.rs
new file mode 100644
index 000000000000..54a69248618c
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/encoding.rs
@@ -0,0 +1,558 @@
+//! DeepSeek V4 prompt renderer.
+//!
+//! Original Python implementation:
+//! <https://github.com/vllm-project/vllm/blob/main/vllm/tokenizers/deepseek_v4_encoding.py>
+
+use std::collections::HashMap;
+use std::fmt::Write as _;
+
+use serde::Serialize;
+use serde_json::Value;
+use serde_json_fmt::JsonFormat;
+
+use crate::error::{Error, Result};
+use crate::request::{ChatContent, ChatMessage, ChatRequest, ChatTool, ReasoningEffort};
+use crate::{AssistantContentBlock, AssistantMessageExt, AssistantToolCall};
+
+const BOS_TOKEN: &str = "<｜begin▁of▁sentence｜>";
+const EOS_TOKEN: &str = "<｜end▁of▁sentence｜>";
+const THINKING_START_TOKEN: &str = "<think>";
+const THINKING_END_TOKEN: &str = "</think>";
+const DSML_TOKEN: &str = "｜DSML｜";
+const USER_SP_TOKEN: &str = "<｜User｜>";
+const ASSISTANT_SP_TOKEN: &str = "<｜Assistant｜>";
+const REASONING_EFFORT_MAX: &str = concat!(
+    "Reasoning Effort: Absolute maximum with no shortcuts permitted.\n",
+    "You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n",
+    "Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.\n\n",
+);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ThinkingMode {
+    Chat,
+    Thinking,
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Serialize)]
+struct RenderedToolSchema<'a> {
+    name: &'a str,
+    description: Option<&'a str>,
+    parameters: &'a Value,
+    strict: Option<bool>,
+}
+
+/// Render one chat request into the final prompt string.
+pub(super) fn render_request(request: &ChatRequest) -> Result<String> {
+    let (thinking_mode, max_reasoning_effort) = resolve_thinking_options(request)?;
+    let request_tools = request_tools(request);
+    let synthetic_tool_system = needs_synthetic_tool_system(request, request_tools);
+    let drop_thinking = request.parse_template_bool("drop_thinking")?.unwrap_or(true)
+        && !rendered_tools_present(request, request_tools);
+    let last_user_render_index =
+        find_last_user_render_index(request.messages.as_slice(), synthetic_tool_system);
+    let mut out = String::from(BOS_TOKEN);
+    if thinking_mode == ThinkingMode::Thinking && max_reasoning_effort {
+        out.push_str(REASONING_EFFORT_MAX);
+    }
+
+    let mut request_tools_attached = false;
+    let mut render_index = 0isize;
+    if synthetic_tool_system {
+        render_system_message(&mut out, None, request_tools)?;
+        request_tools_attached = true;
+        render_index += 1;
+    }
+
+    for (message_index, message) in request.messages.iter().enumerate() {
+        if is_following_tool_response(request.messages.as_slice(), message_index) {
+            continue;
+        }
+
+        let current_render_index = render_index;
+        render_index += 1;
+
+        match message {
+            ChatMessage::System { content } => {
+                let tools = if !request_tools_attached {
+                    request_tools_attached = true;
+                    request_tools
+                } else {
+                    &[]
+                };
+                render_system_message(&mut out, Some(content), tools)?;
+            }
+            ChatMessage::Developer { content, tools } => {
+                render_developer_message(&mut out, content, tools.as_deref().unwrap_or(&[]))?;
+            }
+            ChatMessage::User { content } => render_user_message(&mut out, content)?,
+            ChatMessage::Assistant { content } => {
+                // Mirror Python: thinking block (reasoning + </think>) is
+                // emitted whenever thinking is active and reasoning isn't
+                // dropped - i.e. drop_thinking is off OR this turn lies
+                // strictly after the last user turn.
+                let emit_thinking_block = thinking_mode == ThinkingMode::Thinking
+                    && (!drop_thinking || current_render_index > last_user_render_index);
+                let append_eos = !(message_index + 1 == request.messages.len()
+                    && request.chat_options.continue_final_message());
+                render_assistant_message(&mut out, emit_thinking_block, append_eos, content)?;
+            }
+            ChatMessage::ToolResponse { .. } => {
+                render_tool_response_block(&mut out, request.messages.as_slice(), message_index)?;
+            }
+        }
+
+        if is_user_like_entry(message)
+            && next_rendered_entry_is_assistant_or_end(request.messages.as_slice(), message_index)
+        {
+            write_assistant_transition(
+                &mut out,
+                thinking_mode,
+                drop_thinking,
+                current_render_index >= last_user_render_index,
+            );
+        }
+    }
+
+    Ok(out)
+}
+
+/// Resolve DeepSeek V4's thinking controls. Unlike the Python tokenizer
+/// wrapper, the Rust renderer only consumes the typed top-level
+/// `reasoning_effort`; the generic template-kwargs map is left for HF
+/// templates.
+fn resolve_thinking_options(request: &ChatRequest) -> Result<(ThinkingMode, bool)> {
+    let mut thinking_mode = match request.enable_thinking()?.unwrap_or(false) {
+        true => ThinkingMode::Thinking,
+        false => ThinkingMode::Chat,
+    };
+    let mut max_reasoning_effort = false;
+
+    match request.chat_options.reasoning_effort {
+        Some(ReasoningEffort::None) => thinking_mode = ThinkingMode::Chat,
+        Some(ReasoningEffort::Max | ReasoningEffort::XHigh) => max_reasoning_effort = true,
+        Some(_) | None => {}
+    }
+
+    Ok((thinking_mode, max_reasoning_effort))
+}
+
+/// Return request-level tools only when native tool parsing is enabled.
+fn request_tools(request: &ChatRequest) -> &[ChatTool] {
+    if request.tool_parsing_enabled() {
+        request.tools.as_slice()
+    } else {
+        &[]
+    }
+}
+
+/// Return whether request tools need a synthetic leading system entry.
+fn needs_synthetic_tool_system(request: &ChatRequest, request_tools: &[ChatTool]) -> bool {
+    !request_tools.is_empty()
+        && !request
+            .messages
+            .iter()
+            .any(|message| matches!(message, ChatMessage::System { .. }))
+}
+
+/// Return whether any rendered message carries tool schemas.
+fn rendered_tools_present(request: &ChatRequest, request_tools: &[ChatTool]) -> bool {
+    !request_tools.is_empty()
+        || request.messages.iter().any(|message| {
+            matches!(
+                message,
+                ChatMessage::Developer {
+                    tools: Some(tools),
+                    ..
+                } if !tools.is_empty()
+            )
+        })
+}
+
+/// Find the last user-like turn after inline tool-response merging.
+fn find_last_user_render_index(messages: &[ChatMessage], synthetic_tool_system: bool) -> isize {
+    let mut render_index = isize::from(synthetic_tool_system);
+    let mut last_user_index = -1;
+
+    for (message_index, message) in messages.iter().enumerate() {
+        if is_following_tool_response(messages, message_index) {
+            continue;
+        }
+
+        if is_user_like_entry(message) {
+            last_user_index = render_index;
+        }
+        render_index += 1;
+    }
+
+    last_user_index
+}
+
+/// Return whether this tool message is already covered by a previous tool run.
+fn is_following_tool_response(messages: &[ChatMessage], message_index: usize) -> bool {
+    matches!(messages[message_index], ChatMessage::ToolResponse { .. })
+        && message_index > 0
+        && matches!(
+            messages[message_index - 1],
+            ChatMessage::ToolResponse { .. }
+        )
+}
+
+/// Return whether one rendered entry should be treated as user-like.
+fn is_user_like_entry(message: &ChatMessage) -> bool {
+    matches!(
+        message,
+        ChatMessage::Developer { .. } | ChatMessage::User { .. } | ChatMessage::ToolResponse { .. }
+    )
+}
+
+/// Return whether the next rendered entry is assistant, or there is no next
+/// entry.
+fn next_rendered_entry_is_assistant_or_end(messages: &[ChatMessage], message_index: usize) -> bool {
+    let mut next_index = message_index + 1;
+    if matches!(messages[message_index], ChatMessage::ToolResponse { .. }) {
+        while next_index < messages.len()
+            && matches!(messages[next_index], ChatMessage::ToolResponse { .. })
+        {
+            next_index += 1;
+        }
+    }
+
+    messages
+        .get(next_index)
+        .map(|message| matches!(message, ChatMessage::Assistant { .. }))
+        .unwrap_or(true)
+}
+
+/// Render the tool preamble shown to the model, V4 flavor.
+fn render_tools(out: &mut String, tools: &[ChatTool]) -> Result<()> {
+    out.push_str(
+        r#"## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<｜DSML｜tool_calls>" block like the following:
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="$TOOL_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$TOOL_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
+
+Otherwise, output directly after </think> with tool calls or final response.
+
+### Available Tool Schemas
+
+"#,
+    );
+
+    for (index, tool) in tools.iter().enumerate() {
+        if index > 0 {
+            out.push('\n');
+        }
+        render_tool_schema(out, tool)?;
+    }
+
+    out.push_str(
+        "\n\nYou MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.\n",
+    );
+    Ok(())
+}
+
+/// Serialize one typed tool schema into the JSON shape embedded in the prompt.
+fn render_tool_schema(out: &mut String, tool: &ChatTool) -> Result<()> {
+    out.push_str(&json_dumps(&RenderedToolSchema {
+        name: &tool.name,
+        description: tool.description.as_deref(),
+        parameters: &tool.parameters,
+        strict: tool.strict,
+    })?);
+    Ok(())
+}
+
+/// Render a system turn, optionally followed by the V4 tool preamble.
+fn render_system_message(
+    out: &mut String,
+    content: Option<&ChatContent>,
+    tools: &[ChatTool],
+) -> Result<()> {
+    if let Some(content) = content {
+        write_chat_content(out, content)?;
+    }
+    if !tools.is_empty() {
+        out.push_str("\n\n");
+        render_tools(out, tools)?;
+    }
+    Ok(())
+}
+
+/// Developer messages are rendered as user-like turns with optional tools.
+fn render_developer_message(
+    out: &mut String,
+    content: &ChatContent,
+    tools: &[ChatTool],
+) -> Result<()> {
+    if content.is_empty() {
+        return Err(Error::ChatTemplate(
+            "invalid DeepSeek V4 developer message: empty content".to_string(),
+        ));
+    }
+
+    out.push_str(USER_SP_TOKEN);
+    write_chat_content(out, content)?;
+    if !tools.is_empty() {
+        out.push_str("\n\n");
+        render_tools(out, tools)?;
+    }
+    Ok(())
+}
+
+/// Render one plain user turn.
+fn render_user_message(out: &mut String, content: &ChatContent) -> Result<()> {
+    out.push_str(USER_SP_TOKEN);
+    write_chat_content(out, content)?;
+    Ok(())
+}
+
+/// Render a contiguous tool-response run as one synthetic user turn.
+fn render_tool_response_block(
+    out: &mut String,
+    messages: &[ChatMessage],
+    message_index: usize,
+) -> Result<()> {
+    let (block_start, block_end) = tool_response_block_bounds(messages, message_index);
+    let sorted_indices = sorted_tool_response_indices(messages, block_start, block_end);
+
+    out.push_str(USER_SP_TOKEN);
+    for (offset, message_index) in sorted_indices.iter().enumerate() {
+        if offset > 0 {
+            out.push_str("\n\n");
+        }
+        let ChatMessage::ToolResponse { content, .. } = &messages[*message_index] else {
+            unreachable!("tool response block should only contain tool messages");
+        };
+        write_tool_result(out, content)?;
+    }
+
+    Ok(())
+}
+
+/// Return the contiguous tool-response block containing `actual_index`.
+fn tool_response_block_bounds(messages: &[ChatMessage], actual_index: usize) -> (usize, usize) {
+    let mut block_start = actual_index;
+    while block_start > 0 && matches!(messages[block_start - 1], ChatMessage::ToolResponse { .. }) {
+        block_start -= 1;
+    }
+
+    let mut block_end = actual_index + 1;
+    while block_end < messages.len()
+        && matches!(messages[block_end], ChatMessage::ToolResponse { .. })
+    {
+        block_end += 1;
+    }
+
+    (block_start, block_end)
+}
+
+fn sorted_tool_response_indices(
+    messages: &[ChatMessage],
+    block_start: usize,
+    block_end: usize,
+) -> Vec<usize> {
+    let Some(tool_call_order) = last_tool_call_order_before(messages, block_start) else {
+        return (block_start..block_end).collect();
+    };
+
+    let mut indices = (block_start..block_end).collect::<Vec<_>>();
+    indices.sort_by_key(|index| {
+        let ChatMessage::ToolResponse { tool_call_id, .. } = &messages[*index] else {
+            unreachable!("tool response block should only contain tool messages");
+        };
+        tool_call_order.get(tool_call_id.as_str()).copied().unwrap_or(0)
+    });
+    indices
+}
+
+fn last_tool_call_order_before(
+    messages: &[ChatMessage],
+    message_index: usize,
+) -> Option<HashMap<&str, usize>> {
+    let mut tool_call_order = None;
+    for message in &messages[..message_index] {
+        if let ChatMessage::Assistant { content } = message {
+            let order = content
+                .tool_calls()
+                .enumerate()
+                .map(|(index, tool_call)| (tool_call.id.as_str(), index))
+                .collect::<HashMap<_, _>>();
+            if !order.is_empty() {
+                tool_call_order = Some(order);
+            }
+        }
+    }
+    tool_call_order
+}
+
+/// Render one tool response payload inside a V4 `<tool_result>` block.
+fn write_tool_result(out: &mut String, content: &ChatContent) -> Result<()> {
+    out.push_str("<tool_result>");
+    write_chat_content(out, content)?;
+    out.push_str("</tool_result>");
+    Ok(())
+}
+
+/// Append the assistant transition token after a user-like turn.
+fn write_assistant_transition(
+    out: &mut String,
+    thinking_mode: ThinkingMode,
+    drop_thinking: bool,
+    opens_thinking: bool,
+) {
+    out.push_str(ASSISTANT_SP_TOKEN);
+    if thinking_mode == ThinkingMode::Thinking && (!drop_thinking || opens_thinking) {
+        out.push_str(THINKING_START_TOKEN);
+    } else {
+        out.push_str(THINKING_END_TOKEN);
+    }
+}
+
+/// Render one assistant turn, including optional reasoning, DSML tool calls,
+/// and the trailing EOS marker.
+fn render_assistant_message(
+    out: &mut String,
+    emit_thinking_block: bool,
+    append_eos: bool,
+    content: &[AssistantContentBlock],
+) -> Result<()> {
+    let has_tool_calls = content.has_tool_calls();
+
+    if emit_thinking_block {
+        if content.has_reasoning() {
+            write_assistant_reasoning(out, content);
+        }
+        out.push_str(THINKING_END_TOKEN);
+    }
+
+    write_assistant_text(out, content);
+
+    if has_tool_calls {
+        out.push_str("\n\n<｜DSML｜tool_calls>\n");
+        for (index, tool_call) in content.tool_calls().enumerate() {
+            if index > 0 {
+                out.push('\n');
+            }
+            render_tool_call(out, tool_call)?;
+        }
+        out.push_str("\n</｜DSML｜tool_calls>");
+    }
+
+    if append_eos {
+        out.push_str(EOS_TOKEN);
+    }
+    Ok(())
+}
+
+/// Render one assistant tool call in DSML XML-like format.
+fn render_tool_call(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
+    writeln!(out, "<{DSML_TOKEN}invoke name=\"{}\">", tool_call.name)
+        .expect("writing to String cannot fail");
+    encode_arguments_to_dsml(out, tool_call)?;
+    write!(out, "\n</{DSML_TOKEN}invoke>").expect("writing to String cannot fail");
+    Ok(())
+}
+
+/// Convert one assistant tool-call arguments object into DSML parameter form.
+///
+/// String values are emitted raw with `string="true"`, while all other JSON
+/// values are rendered with JSON syntax and `string="false"`.
+fn encode_arguments_to_dsml(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
+    let arguments: Value = serde_json::from_str(&tool_call.arguments).map_err(|error| {
+        Error::ChatTemplate(format!(
+            "assistant tool call has invalid JSON arguments for DeepSeek V4: {error}"
+        ))
+    })?;
+    let Some(arguments) = arguments.as_object() else {
+        return Err(Error::ChatTemplate(
+            "assistant tool call arguments for DeepSeek V4 must be a JSON object".to_string(),
+        ));
+    };
+
+    let mut wrote_parameter = false;
+    for (key, value) in arguments {
+        if wrote_parameter {
+            out.push('\n');
+        }
+
+        let is_string = matches!(value, Value::String(_));
+        write!(
+            out,
+            "<{DSML_TOKEN}parameter name=\"{key}\" string=\"{}\">",
+            if is_string { "true" } else { "false" }
+        )
+        .expect("writing to String cannot fail");
+
+        match value {
+            Value::String(value) => out.push_str(value),
+            value => out.push_str(&json_dumps(value)?),
+        }
+
+        write!(out, "</{DSML_TOKEN}parameter>").expect("writing to String cannot fail");
+        wrote_parameter = true;
+    }
+
+    Ok(())
+}
+
+/// Write chat content directly into the destination buffer without flattening
+/// it into an intermediate `String`.
+fn write_chat_content(out: &mut String, content: &ChatContent) -> Result<()> {
+    match content {
+        ChatContent::Text(text) => out.push_str(text),
+        ChatContent::Parts(parts) => {
+            for part in parts {
+                out.push_str(part.as_text()?);
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Write all reasoning blocks in encounter order.
+fn write_assistant_reasoning(out: &mut String, content: &[AssistantContentBlock]) {
+    for block in content {
+        if let AssistantContentBlock::Reasoning { text } = block {
+            out.push_str(text);
+        }
+    }
+}
+
+/// Write all visible assistant text blocks in encounter order.
+fn write_assistant_text(out: &mut String, content: &[AssistantContentBlock]) {
+    for block in content {
+        if let AssistantContentBlock::Text { text } = block {
+            out.push_str(text);
+        }
+    }
+}
+
+/// Compact JSON serialization used by this renderer for exact prompt text.
+fn json_dumps<T: Serialize>(value: &T) -> Result<String> {
+    JsonFormat::new()
+        .comma(", ")
+        .expect("literal comma separator is valid JSON")
+        .colon(": ")
+        .expect("literal colon separator is valid JSON")
+        .ascii(false)
+        .format_to_string(value)
+        .map_err(|error| {
+            Error::ChatTemplate(format!(
+                "failed to serialize DeepSeek V4 JSON payload: {error}"
+            ))
+        })
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_1.json b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_1.json
new file mode 100644
index 000000000000..d423b221fa88
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_1.json
@@ -0,0 +1,81 @@
+{
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather for a specific location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name"
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "Temperature unit"
+                        }
+                    },
+                    "required": ["location"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "search",
+                "description": "Search the web for information",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query"
+                        },
+                        "num_results": {
+                            "type": "integer",
+                            "description": "Number of results to return"
+                        }
+                    },
+                    "required": ["query"]
+                }
+            }
+        }
+    ],
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant."
+        },
+        {
+            "role": "user",
+            "content": "What's the weather in Beijing?"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "The user wants to know the weather in Beijing. I should use the get_weather tool.",
+            "tool_calls": [
+                {
+                    "id": "call_001",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"Beijing\", \"unit\": \"celsius\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_001",
+            "content": "{\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "Got the weather data. Let me format a nice response.",
+            "content": "The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity."
+        }
+    ]
+}
diff --git a/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_2.json b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_2.json
new file mode 100644
index 000000000000..13b3454a902d
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_input_2.json
@@ -0,0 +1,24 @@
+[
+  {
+    "role": "system",
+    "content": "You are a helpful assistant."
+  },
+  {
+    "role": "user",
+    "content": "Hello"
+  },
+  {
+    "role": "assistant",
+    "reasoning_content": "The user said hello, I should greet back.",
+    "content": "Hi there! How can I help you?"
+  },
+  {
+    "role": "user",
+    "content": "What is the capital of France?"
+  },
+  {
+    "role": "assistant",
+    "reasoning_content": "The user asks about the capital of France. It is Paris.",
+    "content": "The capital of France is Paris."
+  }
+]
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_1.txt b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_1.txt
new file mode 100644
index 000000000000..7e3c9bd5a394
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_1.txt
@@ -0,0 +1,36 @@
+<｜begin▁of▁sentence｜>You are a helpful assistant.
+
+## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<｜DSML｜tool_calls>" block like the following:
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="$TOOL_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$TOOL_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
+
+Otherwise, output directly after </think> with tool calls or final response.
+
+### Available Tool Schemas
+
+{"name": "get_weather", "description": "Get the weather for a specific location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}}, "required": ["location"]}}
+{"name": "search", "description": "Search the web for information", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}, "num_results": {"type": "integer", "description": "Number of results to return"}}, "required": ["query"]}}
+
+You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
+<｜User｜>What's the weather in Beijing?<｜Assistant｜><think>The user wants to know the weather in Beijing. I should use the get_weather tool.</think>
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="unit" string="true">celsius</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜tool_calls><｜end▁of▁sentence｜><｜User｜><tool_result>{"temperature": 22, "condition": "sunny", "humidity": 45}</tool_result><｜Assistant｜><think>Got the weather data. Let me format a nice response.</think>The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity.<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_2.txt b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_2.txt
new file mode 100644
index 000000000000..fc397ef54972
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/fixtures/test_output_2.txt
@@ -0,0 +1 @@
+<｜begin▁of▁sentence｜>You are a helpful assistant.<｜User｜>Hello<｜Assistant｜></think>Hi there! How can I help you?<｜end▁of▁sentence｜><｜User｜>What is the capital of France?<｜Assistant｜><think>The user asks about the capital of France. It is Paris.</think>The capital of France is Paris.<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/rust/src/chat/src/renderer/deepseek_v4/mod.rs b/rust/src/chat/src/renderer/deepseek_v4/mod.rs
new file mode 100644
index 000000000000..7c3f4631d20e
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/mod.rs
@@ -0,0 +1,30 @@
+mod encoding;
+
+use vllm_text::Prompt;
+
+use super::{ChatRenderer, RenderedPrompt};
+use crate::Result;
+use crate::request::ChatRequest;
+
+/// Dedicated DeepSeek V4 renderer.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct DeepSeekV4ChatRenderer;
+
+impl DeepSeekV4ChatRenderer {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl ChatRenderer for DeepSeekV4ChatRenderer {
+    fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
+        request.validate()?;
+
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(encoding::render_request(request)?),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/chat/src/renderer/deepseek_v4/tests.rs b/rust/src/chat/src/renderer/deepseek_v4/tests.rs
new file mode 100644
index 000000000000..78936d8e68e4
--- /dev/null
+++ b/rust/src/chat/src/renderer/deepseek_v4/tests.rs
@@ -0,0 +1,369 @@
+use std::fs;
+use std::path::PathBuf;
+
+use expect_test::{ExpectFile, expect, expect_file};
+use serde::Deserialize;
+use serde_json::Value;
+
+use super::DeepSeekV4ChatRenderer;
+use crate::event::{AssistantContentBlock, AssistantToolCall};
+use crate::request::{
+    ChatMessage, ChatRequest, ChatTool, ChatToolChoice, GenerationPromptMode, ReasoningEffort,
+};
+use crate::{ChatRenderer, ChatRole};
+
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+enum FixtureFile {
+    WithTools(FixtureRequest),
+    MessagesOnly(Vec<FixtureMessage>),
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureRequest {
+    #[serde(default)]
+    tools: Vec<FixtureTool>,
+    messages: Vec<FixtureMessage>,
+}
+
+impl FixtureFile {
+    fn into_parts(self) -> (Vec<FixtureTool>, Vec<FixtureMessage>) {
+        match self {
+            Self::WithTools(req) => (req.tools, req.messages),
+            Self::MessagesOnly(messages) => (Vec::new(), messages),
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureTool {
+    function: FixtureToolFunction,
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolFunction {
+    name: String,
+    description: Option<String>,
+    parameters: Value,
+    #[serde(default)]
+    strict: Option<bool>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(tag = "role", rename_all = "snake_case")]
+enum FixtureMessage {
+    System {
+        content: String,
+    },
+    Developer {
+        content: String,
+        #[serde(default)]
+        tools: Vec<FixtureTool>,
+    },
+    User {
+        content: String,
+    },
+    Assistant {
+        #[serde(default)]
+        content: String,
+        #[serde(default)]
+        reasoning_content: String,
+        #[serde(default)]
+        tool_calls: Vec<FixtureToolCall>,
+    },
+    Tool {
+        content: String,
+        #[serde(default)]
+        tool_call_id: Option<String>,
+    },
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolCall {
+    #[serde(default)]
+    id: Option<String>,
+    function: FixtureToolCallFunction,
+}
+
+#[derive(Debug, Deserialize)]
+struct FixtureToolCallFunction {
+    name: String,
+    arguments: String,
+}
+
+fn render_request(request: &ChatRequest) -> String {
+    DeepSeekV4ChatRenderer::new()
+        .render(request)
+        .unwrap()
+        .prompt
+        .into_text()
+        .expect("deepseek v4 renderer should return text prompt")
+}
+
+fn fixture_request(input_name: &str) -> ChatRequest {
+    let fixture = fs::read_to_string(fixture_path(input_name)).unwrap();
+    let fixture: FixtureFile = serde_json::from_str(&fixture).unwrap();
+    let (fixture_tools, fixture_messages) = fixture.into_parts();
+    let mut request = ChatRequest {
+        request_id: "deepseek-v4-fixture".to_string(),
+        messages: fixture_messages
+            .into_iter()
+            .enumerate()
+            .map(|(index, message)| match message {
+                FixtureMessage::System { content } => ChatMessage::system(content),
+                FixtureMessage::Developer { content, tools } => ChatMessage::developer(
+                    content,
+                    (!tools.is_empty()).then(|| to_chat_tools(&tools)),
+                ),
+                FixtureMessage::User { content } => ChatMessage::user(content),
+                FixtureMessage::Assistant {
+                    content,
+                    reasoning_content,
+                    tool_calls,
+                } => {
+                    let mut blocks = Vec::new();
+                    if !reasoning_content.is_empty() {
+                        blocks.push(AssistantContentBlock::Reasoning {
+                            text: reasoning_content,
+                        });
+                    }
+                    if !content.is_empty() {
+                        blocks.push(AssistantContentBlock::Text { text: content });
+                    }
+                    blocks.extend(tool_calls.into_iter().enumerate().map(
+                        |(tool_index, tool_call)| {
+                            AssistantContentBlock::ToolCall(AssistantToolCall {
+                                id: tool_call.id.unwrap_or_else(|| {
+                                    format!("fixture-tool-call-{index}-{tool_index}")
+                                }),
+                                name: tool_call.function.name,
+                                arguments: tool_call.function.arguments,
+                            })
+                        },
+                    ));
+                    ChatMessage::assistant_blocks(blocks)
+                }
+                FixtureMessage::Tool {
+                    content,
+                    tool_call_id,
+                } => ChatMessage::tool_response(
+                    content,
+                    tool_call_id.unwrap_or_else(|| format!("fixture-tool-response-{index}")),
+                ),
+            })
+            .collect(),
+        tools: to_chat_tools(&fixture_tools),
+        tool_choice: if fixture_tools.is_empty() {
+            ChatToolChoice::None
+        } else {
+            ChatToolChoice::Auto
+        },
+        ..ChatRequest::for_test()
+    };
+    if matches!(
+        request.messages.last().map(ChatMessage::role),
+        Some(ChatRole::Assistant)
+    ) {
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
+    }
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request
+}
+
+fn to_chat_tools(tools: &[FixtureTool]) -> Vec<ChatTool> {
+    tools
+        .iter()
+        .map(|tool| ChatTool {
+            name: tool.function.name.clone(),
+            description: tool.function.description.clone(),
+            parameters: tool.function.parameters.clone(),
+            strict: tool.function.strict,
+        })
+        .collect()
+}
+
+fn fixture_path(name: &str) -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("src/renderer/deepseek_v4")
+        .join("fixtures")
+        .join(name)
+}
+
+fn assert_fixture(input_name: &str, expected: ExpectFile) {
+    let request = fixture_request(input_name);
+    let rendered = render_request(&request);
+    expected.assert_eq(&rendered);
+}
+
+#[test]
+fn renders_v4_fixture_1_tool_call_round_trip() {
+    assert_fixture(
+        "test_input_1.json",
+        expect_file!["fixtures/test_output_1.txt"],
+    );
+}
+
+#[test]
+fn renders_v4_fixture_2_multi_turn_drop_thinking() {
+    assert_fixture(
+        "test_input_2.json",
+        expect_file!["fixtures/test_output_2.txt"],
+    );
+}
+
+#[test]
+fn reasoning_effort_max_adds_prefix_when_thinking_is_enabled() {
+    let mut request = ChatRequest {
+        messages: vec![ChatMessage::user("solve it")],
+        ..ChatRequest::for_test()
+    };
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request.chat_options.reasoning_effort = Some(ReasoningEffort::Max);
+
+    let rendered = render_request(&request);
+
+    expect![[r#"
+        <｜begin▁of▁sentence｜>Reasoning Effort: Absolute maximum with no shortcuts permitted.
+        You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.
+        Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.
+
+        <｜User｜>solve it<｜Assistant｜><think>"#]]
+    .assert_eq(&rendered);
+}
+
+#[test]
+fn reasoning_effort_none_disables_thinking() {
+    let mut request = ChatRequest {
+        messages: vec![ChatMessage::user("answer directly")],
+        ..ChatRequest::for_test()
+    };
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request.chat_options.reasoning_effort = Some(ReasoningEffort::None);
+
+    let rendered = render_request(&request);
+
+    expect!["<｜begin▁of▁sentence｜><｜User｜>answer directly<｜Assistant｜></think>"]
+        .assert_eq(&rendered);
+}
+
+#[test]
+fn reasoning_effort_template_kwarg_is_ignored() {
+    let mut request = ChatRequest {
+        messages: vec![ChatMessage::user("solve it")],
+        ..ChatRequest::for_test()
+    };
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request.chat_options.template_kwargs.insert(
+        "reasoning_effort".to_string(),
+        Value::String("max".to_string()),
+    );
+
+    let rendered = render_request(&request);
+
+    expect!["<｜begin▁of▁sentence｜><｜User｜>solve it<｜Assistant｜><think>"].assert_eq(&rendered);
+}
+
+#[test]
+fn tool_results_are_sorted_by_previous_assistant_tool_call_order() {
+    let request = ChatRequest {
+        messages: vec![
+            ChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::ToolCall(AssistantToolCall {
+                    id: "second".to_string(),
+                    name: "second_tool".to_string(),
+                    arguments: "{}".to_string(),
+                }),
+                AssistantContentBlock::ToolCall(AssistantToolCall {
+                    id: "first".to_string(),
+                    name: "first_tool".to_string(),
+                    arguments: "{}".to_string(),
+                }),
+            ]),
+            ChatMessage::tool_response("first result", "first"),
+            ChatMessage::tool_response("second result", "second"),
+        ],
+        ..ChatRequest::for_test()
+    };
+
+    let rendered = render_request(&request);
+
+    expect![[r#"
+        <｜begin▁of▁sentence｜>
+
+        <｜DSML｜tool_calls>
+        <｜DSML｜invoke name="second_tool">
+
+        </｜DSML｜invoke>
+        <｜DSML｜invoke name="first_tool">
+
+        </｜DSML｜invoke>
+        </｜DSML｜tool_calls><｜end▁of▁sentence｜><｜User｜><tool_result>second result</tool_result>
+
+        <tool_result>first result</tool_result><｜Assistant｜></think>"#]]
+    .assert_eq(&rendered);
+}
+
+#[test]
+fn drop_thinking_false_keeps_prior_assistant_reasoning() {
+    let mut request = ChatRequest {
+        messages: vec![
+            ChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::Reasoning {
+                    text: "old reasoning".to_string(),
+                },
+                AssistantContentBlock::Text {
+                    text: "old answer".to_string(),
+                },
+            ]),
+            ChatMessage::user("next"),
+        ],
+        ..ChatRequest::for_test()
+    };
+    request
+        .chat_options
+        .template_kwargs
+        .insert("thinking".to_string(), Value::Bool(true));
+    request
+        .chat_options
+        .template_kwargs
+        .insert("drop_thinking".to_string(), Value::Bool(false));
+
+    let rendered = render_request(&request);
+
+    expect!(
+        "<｜begin▁of▁sentence｜>old reasoning</think>old answer<｜end▁of▁sentence｜><｜User｜>next<｜Assistant｜><think>"
+    )
+    .assert_eq(&rendered);
+}
+
+#[test]
+fn continue_final_assistant_omits_final_eos() {
+    let request = ChatRequest {
+        messages: vec![
+            ChatMessage::user("write"),
+            ChatMessage::assistant_text("partial answer"),
+        ],
+        chat_options: crate::request::ChatOptions {
+            generation_prompt_mode: GenerationPromptMode::ContinueFinalAssistant,
+            ..Default::default()
+        },
+        ..ChatRequest::for_test()
+    };
+
+    let rendered = render_request(&request);
+
+    expect!["<｜begin▁of▁sentence｜><｜User｜>write<｜Assistant｜></think>partial answer"]
+        .assert_eq(&rendered);
+}
diff --git a/rust/src/chat/src/renderer/hf/error.rs b/rust/src/chat/src/renderer/hf/error.rs
new file mode 100644
index 000000000000..fcc48c75aba9
--- /dev/null
+++ b/rust/src/chat/src/renderer/hf/error.rs
@@ -0,0 +1,15 @@
+use thiserror::Error as ThisError;
+
+#[derive(Debug, ThisError)]
+pub(crate) enum TemplateError {
+    #[error("failed to render jinja template")]
+    Jinja(#[from] minijinja::Error),
+    #[error("failed to read chat template file")]
+    ReadTemplateFile(#[source] std::io::Error),
+    #[error("chat template looks like a file path but does not exist")]
+    MissingTemplatePath,
+    #[error("failed to parse chat_template.json")]
+    ParseTemplateJson(#[source] serde_json::Error),
+    #[error("chat_template.json does not contain a valid template")]
+    InvalidTemplateJson,
+}
diff --git a/rust/src/chat/src/renderer/hf/format.rs b/rust/src/chat/src/renderer/hf/format.rs
new file mode 100644
index 000000000000..a9b35d0f41c8
--- /dev/null
+++ b/rust/src/chat/src/renderer/hf/format.rs
@@ -0,0 +1,400 @@
+use std::collections::{HashSet, VecDeque};
+use std::fmt;
+use std::str::FromStr;
+
+use minijinja::machinery::ast::{Expr, ForLoop, Set, Stmt};
+use minijinja::machinery::{WhitespaceConfig, parse};
+use minijinja::syntax::SyntaxConfig;
+use serde_with::DeserializeFromStr;
+
+/// Chat template content format.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum ChatTemplateContentFormat {
+    /// Content is a simple string.
+    #[default]
+    String,
+    /// Content is a list of structured parts (OpenAI format).
+    OpenAi,
+}
+
+/// Configurable chat-template content format selection.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, DeserializeFromStr)]
+pub enum ChatTemplateContentFormatOption {
+    /// Detect the format from the template source.
+    #[default]
+    Auto,
+    /// Always flatten content into plain strings before rendering.
+    String,
+    /// Always pass content through in OpenAI-compatible structured form.
+    OpenAi,
+}
+
+impl ChatTemplateContentFormatOption {
+    pub const AUTO_LITERAL: &str = "auto";
+    pub const OPENAI_LITERAL: &str = "openai";
+    pub const STRING_LITERAL: &str = "string";
+}
+
+impl FromStr for ChatTemplateContentFormatOption {
+    type Err = String;
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
+            Ok(Self::Auto)
+        } else if value.eq_ignore_ascii_case(Self::STRING_LITERAL) {
+            Ok(Self::String)
+        } else if value.eq_ignore_ascii_case(Self::OPENAI_LITERAL) {
+            Ok(Self::OpenAi)
+        } else {
+            Err(format!(
+                "invalid content format `{value}`; expected one of: {}, {}, {}",
+                Self::AUTO_LITERAL,
+                Self::STRING_LITERAL,
+                Self::OPENAI_LITERAL
+            ))
+        }
+    }
+}
+
+impl fmt::Display for ChatTemplateContentFormatOption {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Auto => f.write_str(Self::AUTO_LITERAL),
+            Self::String => f.write_str(Self::STRING_LITERAL),
+            Self::OpenAi => f.write_str(Self::OPENAI_LITERAL),
+        }
+    }
+}
+
+fn is_var_access(expr: &Expr, varname: &str) -> bool {
+    matches!(expr, Expr::Var(v) if v.id == varname)
+}
+
+fn is_const_str(expr: &Expr, value: &str) -> bool {
+    matches!(expr, Expr::Const(c) if c.value.as_str() == Some(value))
+}
+
+fn is_attr_access(expr: &Expr, varname: &str, key: &str) -> bool {
+    match expr {
+        Expr::GetItem(g) => is_var_access(&g.expr, varname) && is_const_str(&g.subscript_expr, key),
+        Expr::GetAttr(g) => is_var_access(&g.expr, varname) && g.name == key,
+        _ => false,
+    }
+}
+
+fn is_var_or_elems_access(expr: &Expr, varname: &str, key: Option<&str>) -> bool {
+    match expr {
+        Expr::Filter(f) => {
+            f.expr.as_ref().is_some_and(|inner| is_var_or_elems_access(inner, varname, key))
+        }
+        Expr::Test(t) => is_var_or_elems_access(&t.expr, varname, key),
+        Expr::Slice(s) => is_var_or_elems_access(&s.expr, varname, key),
+        _ => key.map_or_else(
+            || is_var_access(expr, varname),
+            |key| is_attr_access(expr, varname, key),
+        ),
+    }
+}
+
+fn visit_stmt<'a>(
+    stmt: &'a Stmt<'a>,
+    assignments: &mut Vec<&'a Set<'a>>,
+    loops: &mut Vec<&'a ForLoop<'a>>,
+) {
+    match stmt {
+        Stmt::Template(t) => {
+            for child in &t.children {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::ForLoop(fl) => {
+            loops.push(fl);
+            for child in &fl.body {
+                visit_stmt(child, assignments, loops);
+            }
+            for child in &fl.else_body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::IfCond(ic) => {
+            for child in &ic.true_body {
+                visit_stmt(child, assignments, loops);
+            }
+            for child in &ic.false_body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::WithBlock(wb) => {
+            for child in &wb.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::Set(set_stmt) => assignments.push(set_stmt),
+        Stmt::SetBlock(sb) => {
+            for child in &sb.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::AutoEscape(ae) => {
+            for child in &ae.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::FilterBlock(fb) => {
+            for child in &fb.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::Block(b) => {
+            for child in &b.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::Macro(m) => {
+            for child in &m.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        Stmt::CallBlock(cb) => {
+            for child in &cb.macro_decl.body {
+                visit_stmt(child, assignments, loops);
+            }
+        }
+        _ => {}
+    }
+}
+
+fn collect_assignments_and_loops<'a>(
+    root: &'a Stmt<'a>,
+) -> (Vec<&'a Set<'a>>, Vec<&'a ForLoop<'a>>) {
+    let mut assignments = Vec::new();
+    let mut loops = Vec::new();
+    visit_stmt(root, &mut assignments, &mut loops);
+    (assignments, loops)
+}
+
+fn iter_nodes_assign_var_or_elems(root: &Stmt<'_>, varname: &str) -> Vec<String> {
+    let (assignments, _) = collect_assignments_and_loops(root);
+
+    let mut discovered = vec![varname.to_string()];
+    let mut seen = HashSet::from([varname.to_string()]);
+    let mut related = VecDeque::from([varname.to_string()]);
+
+    while let Some(related_varname) = related.pop_front() {
+        for assign in &assignments {
+            let Expr::Var(lhs) = &assign.target else {
+                continue;
+            };
+
+            if is_var_or_elems_access(&assign.expr, &related_varname, None) {
+                let lhs_name = lhs.id.to_string();
+                if seen.insert(lhs_name.clone()) {
+                    discovered.push(lhs_name.clone());
+                    if lhs_name != related_varname {
+                        related.push_back(lhs_name);
+                    }
+                }
+            }
+        }
+    }
+
+    discovered
+}
+
+fn iter_nodes_assign_messages_item(root: &Stmt<'_>) -> Vec<String> {
+    let message_varnames = iter_nodes_assign_var_or_elems(root, "messages");
+    let (_, loops) = collect_assignments_and_loops(root);
+
+    let mut discovered = Vec::new();
+    let mut seen = HashSet::new();
+
+    for loop_ast in loops {
+        let Expr::Var(target) = &loop_ast.target else {
+            continue;
+        };
+
+        if message_varnames
+            .iter()
+            .any(|varname| is_var_or_elems_access(&loop_ast.iter, varname, None))
+        {
+            let target_name = target.id.to_string();
+            if seen.insert(target_name.clone()) {
+                discovered.push(target_name);
+            }
+        }
+    }
+
+    discovered
+}
+
+fn has_content_item_loop(root: &Stmt<'_>) -> bool {
+    let message_varnames = iter_nodes_assign_messages_item(root);
+    let (_, loops) = collect_assignments_and_loops(root);
+
+    loops.into_iter().any(|loop_ast| {
+        matches!(loop_ast.target, Expr::Var(_))
+            && message_varnames
+                .iter()
+                .any(|varname| is_var_or_elems_access(&loop_ast.iter, varname, Some("content")))
+    })
+}
+
+/// Detect the content format expected by a Jinja2 chat template based on AST
+/// analysis.
+pub fn detect_chat_template_content_format(template: &str) -> ChatTemplateContentFormat {
+    let ast = match parse(
+        template,
+        "template",
+        SyntaxConfig {},
+        WhitespaceConfig::default(),
+    ) {
+        Ok(ast) => ast,
+        Err(_) => return ChatTemplateContentFormat::String,
+    };
+
+    if has_content_item_loop(&ast) {
+        ChatTemplateContentFormat::OpenAi
+    } else {
+        ChatTemplateContentFormat::String
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use std::path::{Path, PathBuf};
+
+    use expect_test::expect;
+
+    use super::{ChatTemplateContentFormat, detect_chat_template_content_format};
+
+    fn detect(template: &str) -> ChatTemplateContentFormat {
+        detect_chat_template_content_format(template)
+    }
+
+    fn vllm_examples_dir() -> PathBuf {
+        Path::new(env!("CARGO_MANIFEST_DIR"))
+            .join("tests/templates/vllm_examples")
+            .canonicalize()
+            .expect("vLLM example template directory should exist locally")
+    }
+
+    fn read_vllm_example(relative_path: &str) -> String {
+        fs::read_to_string(vllm_examples_dir().join(relative_path))
+            .unwrap_or_else(|_| panic!("failed to read vLLM example template: {relative_path}"))
+    }
+
+    fn iter_vllm_example_template_paths() -> impl Iterator<Item = PathBuf> {
+        let mut paths = fs::read_dir(vllm_examples_dir())
+            .expect("failed to read vLLM example template directory")
+            .map(|entry| entry.expect("failed to read vLLM example template dir entry").path())
+            .filter(|path| path.extension().is_some_and(|ext| ext == "jinja"))
+            .collect::<Vec<_>>();
+        paths.sort();
+        paths.into_iter()
+    }
+
+    #[test]
+    fn detects_string_template_without_content_loop() {
+        assert_eq!(
+            detect("{% for message in messages %}{{ message.content }}{% endfor %}"),
+            ChatTemplateContentFormat::String
+        );
+    }
+
+    #[test]
+    fn detects_openai_template_with_direct_content_loop() {
+        assert_eq!(
+            detect(
+                "{% for message in messages %}{% for content in message['content'] %}{{ content }}{% endfor %}{% endfor %}"
+            ),
+            ChatTemplateContentFormat::OpenAi
+        );
+    }
+
+    #[test]
+    fn detects_openai_template_with_messages_alias() {
+        assert_eq!(
+            detect(
+                "{% set msgs = messages %}{% for message in msgs %}{% for content in message.content %}{{ content }}{% endfor %}{% endfor %}"
+            ),
+            ChatTemplateContentFormat::OpenAi
+        );
+    }
+
+    #[test]
+    fn does_not_detect_content_alias_loop_as_openai() {
+        assert_eq!(
+            detect(
+                "{% for message in messages %}{% set parts = message.content %}{% for item in parts %}{{ item }}{% endfor %}{% endfor %}"
+            ),
+            ChatTemplateContentFormat::String
+        );
+    }
+
+    #[test]
+    fn does_not_treat_length_or_index_access_as_openai() {
+        assert_eq!(
+            detect("{% for message in messages %}{{ message.content|length }}{% endfor %}"),
+            ChatTemplateContentFormat::String
+        );
+        assert_eq!(
+            detect("{% for message in messages %}{{ message.content[0] }}{% endfor %}"),
+            ChatTemplateContentFormat::String
+        );
+    }
+
+    #[test]
+    fn matches_vllm_example_template_formats() {
+        let snapshot = iter_vllm_example_template_paths()
+            .map(|path| {
+                let file_name = path
+                    .file_name()
+                    .and_then(|name| name.to_str())
+                    .expect("template file name should be valid UTF-8");
+                let template = read_vllm_example(file_name);
+                let format = detect(&template);
+                format!("{file_name:50} => {format:?}")
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        expect![[r#"
+            template_alpaca.jinja                              => String
+            template_baichuan.jinja                            => String
+            template_chatglm.jinja                             => String
+            template_chatglm2.jinja                            => String
+            template_chatml.jinja                              => String
+            template_falcon.jinja                              => String
+            template_falcon_180b.jinja                         => String
+            template_inkbot.jinja                              => String
+            template_teleflm.jinja                             => String
+            tool_chat_template_deepseekr1.jinja                => String
+            tool_chat_template_deepseekv3.jinja                => String
+            tool_chat_template_deepseekv31.jinja               => String
+            tool_chat_template_functiongemma.jinja             => String
+            tool_chat_template_gemma3_pythonic.jinja           => OpenAi
+            tool_chat_template_gemma4.jinja                    => OpenAi
+            tool_chat_template_glm4.jinja                      => String
+            tool_chat_template_granite.jinja                   => String
+            tool_chat_template_granite_20b_fc.jinja            => String
+            tool_chat_template_hermes.jinja                    => String
+            tool_chat_template_hunyuan_a13b.jinja              => String
+            tool_chat_template_internlm2_tool.jinja            => String
+            tool_chat_template_llama3.1_json.jinja             => OpenAi
+            tool_chat_template_llama3.2_json.jinja             => OpenAi
+            tool_chat_template_llama3.2_pythonic.jinja         => String
+            tool_chat_template_llama4_json.jinja               => OpenAi
+            tool_chat_template_llama4_pythonic.jinja           => OpenAi
+            tool_chat_template_minimax_m1.jinja                => OpenAi
+            tool_chat_template_mistral.jinja                   => String
+            tool_chat_template_mistral3.jinja                  => OpenAi
+            tool_chat_template_mistral_parallel.jinja          => String
+            tool_chat_template_phi4_mini.jinja                 => String
+            tool_chat_template_qwen3coder.jinja                => String
+            tool_chat_template_toolace.jinja                   => String
+            tool_chat_template_xlam_llama.jinja                => String
+            tool_chat_template_xlam_qwen.jinja                 => String"#]]
+        .assert_eq(&snapshot);
+    }
+}
diff --git a/rust/src/chat/src/renderer/hf/mod.rs b/rust/src/chat/src/renderer/hf/mod.rs
new file mode 100644
index 000000000000..851df5068f41
--- /dev/null
+++ b/rust/src/chat/src/renderer/hf/mod.rs
@@ -0,0 +1,970 @@
+use std::collections::HashMap;
+
+use serde::Serialize;
+use serde_json::Value;
+use thiserror_ext::AsReport as _;
+use tracing::{info, trace, warn};
+use vllm_text::Prompt;
+use vllm_text::backend::hf::{
+    HfSpecialTokens, HfTokenizerConfig, ResolvedModelFiles, load_tokenizer_config,
+};
+
+use self::format::{
+    ChatTemplateContentFormat, ChatTemplateContentFormatOption as ContentFormatOption,
+};
+use self::template::{CompiledChatTemplate, TemplateContext};
+use super::{ChatRenderer, RenderedPrompt};
+use crate::error::Result;
+use crate::request::{ChatContent, ChatContentPart, ChatMessage, ChatRequest};
+use crate::{
+    AssistantContentBlock, AssistantMessageExt, ChatTool, Error, LoadModelBackendsOptions,
+};
+
+mod error;
+mod format;
+mod template;
+mod tojson;
+
+pub use template::{load_chat_template, resolve_chat_template};
+
+pub use self::format::ChatTemplateContentFormatOption;
+
+#[derive(Debug, Clone)]
+pub struct MultimodalRenderInfo {
+    pub placeholder_token: String,
+}
+
+/// Hugging Face chat-template renderer backed by the local Jinja chat-template
+/// state.
+pub struct HfChatRenderer {
+    default_template: Option<CompiledChatTemplate>,
+    default_template_kwargs: HashMap<String, Value>,
+    content_format: ContentFormatOption,
+    special_tokens: Option<HfSpecialTokens>,
+    multimodal: Option<MultimodalRenderInfo>,
+}
+
+impl HfChatRenderer {
+    /// Create a renderer from the given template string.
+    pub fn new(
+        template: Option<String>,
+        default_template_kwargs: HashMap<String, Value>,
+        content_format: ContentFormatOption,
+    ) -> Result<Self> {
+        Ok(Self {
+            default_template: template
+                .map(|template| {
+                    CompiledChatTemplate::new(template, content_format)
+                        .map_err(|error| Error::ChatTemplate(error.to_report_string()))
+                })
+                .transpose()?,
+            default_template_kwargs,
+            content_format,
+            special_tokens: None,
+            multimodal: None,
+        })
+    }
+
+    pub fn with_special_tokens(mut self, special_tokens: Option<HfSpecialTokens>) -> Self {
+        self.special_tokens = special_tokens;
+        self
+    }
+
+    pub fn with_multimodal(mut self, multimodal: Option<MultimodalRenderInfo>) -> Self {
+        self.multimodal = multimodal;
+        self
+    }
+
+    /// Create a renderer from the given model files and loading options.
+    pub fn load(
+        files: &ResolvedModelFiles,
+        options: LoadModelBackendsOptions,
+        multimodal: Option<MultimodalRenderInfo>,
+    ) -> Result<Self> {
+        let HfTokenizerConfig {
+            special_tokens,
+            chat_template,
+            ..
+        } = load_tokenizer_config(files.tokenizer_config_path.as_deref())?;
+        let mut template = chat_template;
+        let special_tokens = (!special_tokens.is_empty()).then_some(special_tokens);
+
+        if let Some(configured_template) = options.chat_template.as_deref() {
+            template = Some(
+                resolve_chat_template(configured_template)
+                    .map_err(|error| Error::ChatTemplate(error.to_report_string()))?,
+            );
+            info!("using configured chat template override");
+        } else if let Some(chat_template_path) = files.chat_template_path.as_deref() {
+            // If independent chat template file(s) exist and contain non-empty content,
+            // they take priority over template entries in the tokenizer config
+            let file_template = load_chat_template(chat_template_path)
+                .map_err(|error| Error::ChatTemplate(error.to_report_string()))?;
+
+            if file_template.as_ref().is_some_and(|t| !t.trim().is_empty()) {
+                info!(
+                    path = %chat_template_path.display(),
+                    "loaded dedicated chat template file, overriding tokenizer_config chat_template"
+                );
+                template = file_template;
+            } else {
+                warn!(
+                    path = %chat_template_path.display(),
+                    "ignoring empty dedicated chat template file and falling back to tokenizer_config chat_template"
+                );
+            }
+        }
+
+        Ok(Self::new(
+            template,
+            options.default_chat_template_kwargs,
+            options.chat_template_content_format,
+        )?
+        .with_special_tokens(special_tokens)
+        .with_multimodal(multimodal))
+    }
+
+    /// Apply the chat template to one chat request, rendering the prompt string
+    /// to be tokenized and submitted to the model.
+    ///
+    /// If the request carries a per-request `chat_template` override, a
+    /// temporary template is compiled from that string and used instead of
+    /// the model's default.
+    fn apply_chat_template(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
+        let override_template = request
+            .chat_options
+            .chat_template
+            .as_ref()
+            .map(|template| {
+                CompiledChatTemplate::new(template.clone(), self.content_format)
+                    .map_err(|error| Error::ChatTemplate(error.to_report_string()))
+            })
+            .transpose()?;
+        let template = override_template
+            .as_ref()
+            .or(self.default_template.as_ref())
+            .ok_or(Error::MissingChatTemplate)?;
+
+        self.apply_chat_template_inner(template, request)
+    }
+
+    fn apply_chat_template_inner(
+        &self,
+        effective_template: &CompiledChatTemplate,
+        request: &ChatRequest,
+    ) -> Result<RenderedPrompt> {
+        let messages = to_template_messages(
+            &request.messages,
+            effective_template.content_format(),
+            self.multimodal.as_ref(),
+        )?;
+        let tools = request.tool_parsing_enabled().then(|| to_template_tools(&request.tools));
+        trace!(
+            message_count = messages.len(),
+            content_format = ?effective_template.content_format(),
+            ?messages,
+            ?tools,
+            "applying chat template"
+        );
+
+        let mut merged_template_kwargs = self.default_template_kwargs.clone();
+        merged_template_kwargs.extend(request.chat_options.template_kwargs.clone());
+        let prompt = effective_template
+            .apply(TemplateContext {
+                messages: &messages,
+                add_generation_prompt: request.chat_options.add_generation_prompt(),
+                continue_final_message: request.chat_options.continue_final_message(),
+                tools: tools.as_deref(),
+                documents: request.documents.as_deref(),
+                template_kwargs: Some(&merged_template_kwargs),
+                special_tokens: self.special_tokens.as_ref(),
+                reasoning_effort: request.chat_options.reasoning_effort,
+            })
+            .map_err(|error| Error::ChatTemplate(error.to_report_string()))?;
+
+        trace!(
+            prompt_len = prompt.len(),
+            prompt, "rendered chat template prompt"
+        );
+
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(prompt),
+        })
+    }
+}
+
+impl ChatRenderer for HfChatRenderer {
+    fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
+        self.apply_chat_template(request)
+    }
+}
+
+/// Chat message in the JSON shape expected by Jinja chat templates.
+// TODO: borrow more fields directly from the original `ChatMessage`.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Serialize)]
+struct TemplateMessage {
+    role: &'static str,
+    content: TemplateContent,
+    // Developer-role messages may provide message-local tools in the same shape
+    // as top-level request tools.
+    tools: Option<Vec<TemplateTool>>,
+    // Reasoning-capable HF templates are inconsistent on the exact field name,
+    // so expose both variants for compatibility.
+    reasoning: Option<String>,
+    reasoning_content: Option<String>,
+    // Function-call-capable templates commonly expect assistant tool calls
+    // under this OpenAI-compatible field name.
+    tool_calls: Option<Vec<TemplateToolCall>>,
+    // Tool-role messages refer back to the assistant call they are answering.
+    tool_call_id: Option<String>,
+}
+
+/// Chat content in the two shapes HF templates commonly expect.
+#[derive(Debug, Serialize)]
+#[serde(untagged)]
+enum TemplateContent {
+    String(String),
+    OpenAi(Vec<TemplateContentPart>),
+}
+
+#[derive(Debug, Serialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+enum TemplateContentPart {
+    Text { text: String },
+    Image,
+}
+
+#[derive(Debug, Serialize)]
+struct TemplateToolCall {
+    id: String,
+    r#type: &'static str, // always "function"
+    function: TemplateToolFunction,
+}
+
+#[derive(Debug, Serialize)]
+struct TemplateToolFunction {
+    name: String,
+    arguments: Value,
+}
+
+#[derive(Debug, Serialize)]
+pub(super) struct TemplateTool {
+    #[serde(rename = "type")]
+    tool_type: &'static str,
+    function: TemplateToolDefinition,
+}
+
+#[derive(Debug, Serialize)]
+struct TemplateToolDefinition {
+    name: String,
+    description: Option<String>,
+    parameters: Value,
+    strict: Option<bool>,
+}
+
+/// Convert chat messages into the JSON shape expected by Jinja chat templates.
+fn to_template_messages(
+    messages: &[ChatMessage],
+    content_format: ChatTemplateContentFormat,
+    multimodal: Option<&MultimodalRenderInfo>,
+) -> Result<Vec<TemplateMessage>> {
+    messages
+        .iter()
+        .map(|message| to_template_message(message, content_format, multimodal))
+        .collect()
+}
+
+fn to_template_message(
+    message: &ChatMessage,
+    content_format: ChatTemplateContentFormat,
+    multimodal: Option<&MultimodalRenderInfo>,
+) -> Result<TemplateMessage> {
+    Ok(match message {
+        ChatMessage::System { content } => TemplateMessage {
+            role: "system",
+            content: to_template_content(content, content_format, multimodal)?,
+            tools: None,
+            reasoning: None,
+            reasoning_content: None,
+            tool_calls: None,
+            tool_call_id: None,
+        },
+        ChatMessage::Developer { content, tools } => TemplateMessage {
+            role: "developer",
+            content: to_template_content(content, content_format, multimodal)?,
+            tools: tools.as_deref().map(to_template_tools),
+            reasoning: None,
+            reasoning_content: None,
+            tool_calls: None,
+            tool_call_id: None,
+        },
+        ChatMessage::User { content } => TemplateMessage {
+            role: "user",
+            content: to_template_content(content, content_format, multimodal)?,
+            tools: None,
+            reasoning: None,
+            reasoning_content: None,
+            tool_calls: None,
+            tool_call_id: None,
+        },
+        ChatMessage::Assistant { content } => {
+            let text = content.text();
+            let reasoning = content.reasoning();
+            let tool_calls = to_template_tool_calls(content)?;
+            let content =
+                to_template_content(&ChatContent::Text(text), content_format, multimodal)?;
+            TemplateMessage {
+                role: "assistant",
+                content,
+                tools: None,
+                reasoning: reasoning.clone(),
+                reasoning_content: reasoning,
+                tool_calls,
+                tool_call_id: None,
+            }
+        }
+        ChatMessage::ToolResponse {
+            content,
+            tool_call_id,
+        } => TemplateMessage {
+            role: "tool",
+            content: to_template_content(content, content_format, multimodal)?,
+            tools: None,
+            reasoning: None,
+            reasoning_content: None,
+            tool_calls: None,
+            tool_call_id: Some(tool_call_id.clone()),
+        },
+    })
+}
+
+fn to_template_tool_calls(
+    content: &[AssistantContentBlock],
+) -> Result<Option<Vec<TemplateToolCall>>> {
+    let mut tool_calls = Vec::new();
+
+    for tool_call in content.tool_calls() {
+        let arguments = serde_json::from_str::<Value>(&tool_call.arguments).map_err(|error| {
+            Error::ChatTemplate(format!(
+                "assistant tool call `{}` has invalid JSON arguments: {}",
+                tool_call.id,
+                error.as_report()
+            ))
+        })?;
+
+        tool_calls.push(TemplateToolCall {
+            id: tool_call.id.clone(),
+            r#type: "function",
+            function: TemplateToolFunction {
+                name: tool_call.name.clone(),
+                arguments,
+            },
+        });
+    }
+
+    Ok((!tool_calls.is_empty()).then_some(tool_calls))
+}
+
+fn to_template_content(
+    content: &ChatContent,
+    content_format: ChatTemplateContentFormat,
+    multimodal: Option<&MultimodalRenderInfo>,
+) -> Result<TemplateContent> {
+    Ok(match content_format {
+        ChatTemplateContentFormat::String => {
+            TemplateContent::String(to_template_string_content(content, multimodal)?)
+        }
+        ChatTemplateContentFormat::OpenAi => {
+            TemplateContent::OpenAi(to_template_openai_content(content, multimodal)?)
+        }
+    })
+}
+
+fn to_template_openai_content(
+    content: &ChatContent,
+    multimodal: Option<&MultimodalRenderInfo>,
+) -> Result<Vec<TemplateContentPart>> {
+    match content {
+        ChatContent::Text(text) => Ok(vec![TemplateContentPart::Text { text: text.clone() }]),
+        ChatContent::Parts(parts) => parts
+            .iter()
+            .map(|part| match part {
+                ChatContentPart::Text { text } => {
+                    Ok(TemplateContentPart::Text { text: text.clone() })
+                }
+                // All multimodal contents are normalized to `{ "type": <modality> }`.
+                ChatContentPart::ImageUrl { .. } => {
+                    multimodal.ok_or(Error::UnsupportedMultimodalContent("image_url"))?;
+                    Ok(TemplateContentPart::Image)
+                }
+            })
+            .collect(),
+    }
+}
+
+fn to_template_string_content(
+    content: &ChatContent,
+    multimodal: Option<&MultimodalRenderInfo>,
+) -> Result<String> {
+    match content {
+        ChatContent::Text(text) => Ok(text.clone()),
+        ChatContent::Parts(parts) => {
+            let mut out = String::new();
+            for part in parts {
+                match part {
+                    ChatContentPart::Text { text } => out.push_str(text),
+                    ChatContentPart::ImageUrl { .. } => {
+                        let multimodal =
+                            multimodal.ok_or(Error::UnsupportedMultimodalContent("image_url"))?;
+                        out.push_str(&multimodal.placeholder_token);
+                    }
+                }
+            }
+            Ok(out)
+        }
+    }
+}
+
+fn to_template_tools(tools: &[ChatTool]) -> Vec<TemplateTool> {
+    tools
+        .iter()
+        .map(|tool| TemplateTool {
+            tool_type: "function",
+            function: TemplateToolDefinition {
+                name: tool.name.clone(),
+                description: tool.description.clone(),
+                parameters: tool.parameters.clone(),
+                strict: tool.strict,
+            },
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use expect_test::expect;
+    use serde_json::Value;
+    use vllm_text::Prompt;
+    use vllm_text::backend::hf::{HfSpecialTokens, NamedSpecialToken};
+
+    use super::{ChatTemplateContentFormatOption, HfChatRenderer, MultimodalRenderInfo};
+    use crate::request::{
+        ChatContentPart, ChatMessage, ChatRequest, ChatRole, ChatTool, ChatToolChoice,
+        GenerationPromptMode, ReasoningEffort,
+    };
+    use crate::{AssistantContentBlock, ChatRenderer, Error, Result};
+
+    const QWEN3_0_6B_TEMPLATE: &str = include_str!("../../../tests/templates/qwen3.jinja");
+    const QWEN3_5_0_8B_TEMPLATE: &str = include_str!("../../../tests/templates/qwen35.jinja");
+
+    fn sample_request(messages: Vec<ChatMessage>) -> ChatRequest {
+        ChatRequest {
+            messages,
+            request_id: "render-test".to_string(),
+            ..ChatRequest::for_test()
+        }
+    }
+
+    fn render(template: Option<&str>, request: &ChatRequest) -> Result<String> {
+        HfChatRenderer::new(
+            template.map(str::to_owned),
+            HashMap::new(),
+            ChatTemplateContentFormatOption::Auto,
+        )?
+        .render(request)?
+        .prompt
+        .into_text()
+        .map_err(|_| unreachable!("HF renderer should return text prompt"))
+    }
+
+    fn render_mm(
+        template: &str,
+        request: &ChatRequest,
+        content_format: ChatTemplateContentFormatOption,
+    ) -> Result<crate::RenderedPrompt> {
+        HfChatRenderer::new(Some(template.to_string()), HashMap::new(), content_format)?
+            .with_multimodal(Some(MultimodalRenderInfo {
+                placeholder_token: "<image>".to_string(),
+            }))
+            .render(request)
+    }
+
+    fn image_request() -> ChatRequest {
+        sample_request(vec![ChatMessage::user(vec![
+            ChatContentPart::text("a"),
+            ChatContentPart::image_url("data:image/png;base64,test"),
+            ChatContentPart::text("b"),
+        ])])
+    }
+
+    #[test]
+    fn string_content_format_replaces_image_with_placeholder_text() {
+        let rendered = render_mm(
+            "{{ messages[0].content }}",
+            &image_request(),
+            ChatTemplateContentFormatOption::String,
+        )
+        .unwrap();
+
+        assert_eq!(rendered.prompt, Prompt::Text("a<image>b".to_string()));
+    }
+
+    #[test]
+    fn openai_content_format_normalizes_image_url_for_template() {
+        let rendered = render_mm(
+            "{% for item in messages[0].content %}{% if item.type == 'image' %}<|image_pad|>{% else %}{{ item.text }}{% endif %}{% endfor %}",
+            &image_request(),
+            ChatTemplateContentFormatOption::OpenAi,
+        )
+        .unwrap();
+
+        assert_eq!(rendered.prompt, Prompt::Text("a<|image_pad|>b".to_string()));
+    }
+
+    #[test]
+    fn chat_template_supports_pycompat_templates() {
+        let request = sample_request(vec![ChatMessage::text(ChatRole::User, "<think>hello")]);
+
+        let rendered = render(
+            Some(
+                "{% for message in messages %}{% if message.content.startswith('<think>') %}think{% else %}plain{% endif %}{% endfor %}",
+            ),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "think");
+    }
+
+    #[test]
+    fn chat_template_passes_continue_final_message_to_template() {
+        let mut request = sample_request(vec![ChatMessage::text(
+            ChatRole::Assistant,
+            "The capital of",
+        )]);
+
+        assert_eq!(
+            render(
+                Some("{% if continue_final_message %}continue{% else %}new{% endif %}"),
+                &request,
+            )
+            .unwrap(),
+            "new"
+        );
+
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::ContinueFinalAssistant;
+
+        assert_eq!(
+            render(
+                Some("{% if continue_final_message %}continue{% else %}new{% endif %}"),
+                &request,
+            )
+            .unwrap(),
+            "continue"
+        );
+    }
+
+    #[test]
+    fn chat_template_flattens_text_parts_for_string_templates() {
+        let request = sample_request(vec![ChatMessage::user(vec![
+            ChatContentPart::text("hello"),
+            ChatContentPart::text(" world"),
+        ])]);
+
+        let rendered = render(Some("{{ messages[0].content }}"), &request).unwrap();
+
+        assert_eq!(rendered, "hello world");
+    }
+
+    #[test]
+    fn chat_template_exposes_developer_tools() {
+        let request = sample_request(vec![ChatMessage::developer(
+            "policy",
+            Some(vec![ChatTool {
+                name: "get_weather".to_string(),
+                description: Some("Get weather".to_string()),
+                parameters: serde_json::json!({
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                }),
+                strict: Some(true),
+            }]),
+        )]);
+
+        let rendered = render(
+            Some("{{ messages[0].role }}|{{ messages[0].content }}|{{ messages[0].tools[0].function.name }}|{{ messages[0].tools[0].function.parameters.required[0] }}"),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "developer|policy|get_weather|city");
+    }
+
+    #[test]
+    fn chat_template_keeps_string_text_for_openai_detected_templates() {
+        let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+
+        let rendered = render(
+            Some(
+                "{%- for message in messages %}{%- if message.content is string %}{%- set content = message.content %}{{ content }}{%- endif %}{%- endfor %}",
+            ),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "hello");
+    }
+
+    #[test]
+    fn chat_template_emits_openai_text_blocks_for_structured_templates() {
+        let request = sample_request(vec![ChatMessage::user(vec![
+            ChatContentPart::text("hello"),
+            ChatContentPart::text("world"),
+        ])]);
+
+        let rendered = render(
+            Some(
+                "{%- for message in messages %}{%- for item in message.content %}{{ item.text }}|{%- endfor %}{%- endfor %}",
+            ),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "hello|world|");
+    }
+
+    #[test]
+    fn chat_template_per_request_override() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+
+        // Default template renders one way.
+        let default_rendered = render(Some("{{ messages[0].content }}"), &request).unwrap();
+        assert_eq!(default_rendered, "hello");
+
+        // Per-request override replaces the default template entirely.
+        request.chat_options.chat_template = Some("override:{{ messages[0].content }}".to_string());
+        let overridden = render(Some("{{ messages[0].content }}"), &request).unwrap();
+        assert_eq!(overridden, "override:hello");
+    }
+
+    #[test]
+    fn chat_template_per_request_override_without_default_template() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request.chat_options.chat_template = Some("override:{{ messages[0].content }}".to_string());
+
+        let rendered = render(None, &request).unwrap();
+
+        assert_eq!(rendered, "override:hello");
+    }
+
+    #[test]
+    fn chat_template_requires_a_template() {
+        let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        let error = render(None, &request).unwrap_err();
+
+        assert!(matches!(error, Error::MissingChatTemplate));
+    }
+
+    #[test]
+    fn chat_template_injects_special_tokens_into_context() {
+        let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        let special_tokens = HfSpecialTokens {
+            bos_token: Some(NamedSpecialToken::Text("<bos>".to_string())),
+            ..Default::default()
+        };
+
+        let rendered = HfChatRenderer::new(
+            Some("{{ bos_token }}|{{ bos_token is defined }}".to_string()),
+            HashMap::new(),
+            ChatTemplateContentFormatOption::Auto,
+        )
+        .unwrap()
+        .with_special_tokens(Some(special_tokens))
+        .apply_chat_template(&request)
+        .unwrap();
+
+        assert_eq!(rendered.prompt, Prompt::Text("<bos>|true".to_string()));
+    }
+
+    #[test]
+    fn chat_template_exposes_assistant_reasoning_separately() {
+        let request = sample_request(vec![ChatMessage::assistant_blocks(vec![
+            AssistantContentBlock::Reasoning {
+                text: "inner".to_string(),
+            },
+            AssistantContentBlock::Text {
+                text: "outer".to_string(),
+            },
+        ])]);
+
+        let rendered = render(
+            Some("{{ messages[0].reasoning_content }}|{{ messages[0].content }}"),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "inner|outer");
+    }
+
+    #[test]
+    fn chat_template_forces_string_content_format_when_configured() {
+        let request = sample_request(vec![ChatMessage::user(vec![
+            ChatContentPart::text("hello"),
+            ChatContentPart::text(" world"),
+        ])]);
+
+        let rendered = HfChatRenderer::new(
+            Some(
+                "{%- if messages[0].content is string -%}{{ messages[0].content }}{%- else -%}{%- for item in messages[0].content %}{{ item.text }}|{%- endfor -%}{%- endif -%}".to_string(),
+            ),
+            HashMap::new(),
+            ChatTemplateContentFormatOption::String,
+        )
+        .unwrap()
+        .render(&request)
+        .unwrap()
+        .prompt;
+
+        assert_eq!(rendered, Prompt::Text("hello world".to_string()));
+    }
+
+    #[test]
+    fn chat_template_forces_openai_content_format_when_configured() {
+        let request = sample_request(vec![ChatMessage::user(vec![
+            ChatContentPart::text("hello"),
+            ChatContentPart::text(" world"),
+        ])]);
+
+        let rendered = HfChatRenderer::new(
+            Some("{{ messages[0].content[0].text }}{{ messages[0].content[1].text }}".to_string()),
+            HashMap::new(),
+            ChatTemplateContentFormatOption::OpenAi,
+        )
+        .unwrap()
+        .render(&request)
+        .unwrap()
+        .prompt;
+
+        assert_eq!(rendered, Prompt::Text("hello world".to_string()));
+    }
+
+    #[test]
+    fn chat_template_merges_default_template_kwargs_before_request_kwargs() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), Value::Bool(true));
+
+        let renderer = HfChatRenderer::new(
+            Some("{{ enable_thinking }}|{{ default_only }}".to_string()),
+            HashMap::from([
+                ("enable_thinking".to_string(), Value::Bool(false)),
+                ("default_only".to_string(), Value::String("x".to_string())),
+            ]),
+            ChatTemplateContentFormatOption::Auto,
+        )
+        .unwrap();
+
+        let rendered = renderer.render(&request).unwrap().prompt;
+
+        assert_eq!(rendered, Prompt::Text("true|x".to_string()));
+    }
+
+    #[test]
+    fn chat_template_reasoning_effort_overrides_template_kwargs() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request.chat_options.reasoning_effort = Some(ReasoningEffort::Max);
+        request.chat_options.template_kwargs.insert(
+            "reasoning_effort".to_string(),
+            Value::String("low".to_string()),
+        );
+
+        let renderer = HfChatRenderer::new(
+            Some("{{ reasoning_effort }}".to_string()),
+            HashMap::from([(
+                "reasoning_effort".to_string(),
+                Value::String("medium".to_string()),
+            )]),
+            ChatTemplateContentFormatOption::Auto,
+        )
+        .unwrap();
+
+        let rendered = renderer.render(&request).unwrap().prompt;
+
+        assert_eq!(rendered, Prompt::Text("max".to_string()));
+    }
+
+    #[test]
+    fn qwen3_template_omits_reasoning_for_historical_assistant_messages() {
+        let request = sample_request(vec![
+            ChatMessage::text(
+                ChatRole::User,
+                "Hi. Tell me about the capital of France in short",
+            ),
+            ChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::Reasoning {
+                    text: "\nOkay, the user is asking... I think that's all.\n".to_string(),
+                },
+                AssistantContentBlock::Text {
+                    text: "Paris is the capital of France.".to_string(),
+                },
+            ]),
+            ChatMessage::text(ChatRole::User, "Tell me about Paris more."),
+        ]);
+
+        let rendered = render(Some(QWEN3_0_6B_TEMPLATE), &request).unwrap();
+
+        expect![[r#"
+            <|im_start|>user
+            Hi. Tell me about the capital of France in short<|im_end|>
+            <|im_start|>assistant
+            Paris is the capital of France.<|im_end|>
+            <|im_start|>user
+            Tell me about Paris more.<|im_end|>
+            <|im_start|>assistant
+        "#]]
+        .assert_eq(&rendered);
+    }
+
+    #[test]
+    fn qwen3_template_keeps_reasoning_after_the_last_user_query() {
+        let mut request = sample_request(vec![
+            ChatMessage::text(ChatRole::User, "What is 1 + 1?"),
+            ChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::Reasoning {
+                    text: "need simple arithmetic".to_string(),
+                },
+                AssistantContentBlock::Text {
+                    text: "2".to_string(),
+                },
+            ]),
+        ]);
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
+
+        let rendered = render(Some(QWEN3_0_6B_TEMPLATE), &request).unwrap();
+
+        expect![[r#"
+            <|im_start|>user
+            What is 1 + 1?<|im_end|>
+            <|im_start|>assistant
+            <think>
+            need simple arithmetic
+            </think>
+
+            2<|im_end|>
+        "#]]
+        .assert_eq(&rendered);
+    }
+
+    #[test]
+    fn chat_template_exposes_tools_to_templates_when_auto_enabled() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request.tools = vec![ChatTool {
+            name: "get_weather".to_string(),
+            description: Some("Get weather".to_string()),
+            parameters: serde_json::json!({
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"],
+            }),
+            strict: None,
+        }];
+        request.tool_choice = ChatToolChoice::Auto;
+
+        let rendered = render(
+            Some("{{ tools[0].function.name }}|{{ tools[0].function.parameters.required[0] }}"),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "get_weather|city");
+    }
+
+    #[test]
+    fn chat_template_exposes_assistant_tool_calls_and_tool_messages() {
+        let request = sample_request(vec![
+            ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(
+                crate::AssistantToolCall {
+                    id: "call_1".to_string(),
+                    name: "get_weather".to_string(),
+                    arguments: r#"{"city":"Paris"}"#.to_string(),
+                },
+            )]),
+            ChatMessage::tool_response("Sunny", "call_1"),
+        ]);
+
+        let rendered = render(
+            Some(
+                "{{ messages[0].tool_calls[0].function.name }}|{{ messages[0].tool_calls[0].function.arguments.city }}|{{ messages[1].tool_call_id }}|{{ messages[1].content }}",
+            ),
+            &request,
+        )
+        .unwrap();
+
+        assert_eq!(rendered, "get_weather|Paris|call_1|Sunny");
+    }
+
+    #[test]
+    fn qwen35_template_renders_prefilled_reasoning_start_when_thinking_enabled() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), Value::Bool(true));
+
+        let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
+
+        expect![[r#"
+            <|im_start|>user
+            hello<|im_end|>
+            <|im_start|>assistant
+            <think>
+        "#]]
+        .assert_eq(&rendered);
+    }
+
+    #[test]
+    fn qwen35_template_renders_closed_empty_reasoning_span_when_thinking_disabled() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), Value::Bool(false));
+
+        let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
+
+        expect![[r#"
+            <|im_start|>user
+            hello<|im_end|>
+            <|im_start|>assistant
+            <think>
+
+            </think>
+
+        "#]]
+        .assert_eq(&rendered);
+    }
+
+    #[test]
+    fn qwen35_template_omits_assistant_reasoning_prefill_without_generation_prompt() {
+        let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
+        request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), Value::Bool(true));
+
+        let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
+
+        expect![[r#"
+            <|im_start|>user
+            hello<|im_end|>
+        "#]]
+        .assert_eq(&rendered);
+    }
+}
diff --git a/rust/src/chat/src/renderer/hf/template.rs b/rust/src/chat/src/renderer/hf/template.rs
new file mode 100644
index 000000000000..b71efc1e53e2
--- /dev/null
+++ b/rust/src/chat/src/renderer/hf/template.rs
@@ -0,0 +1,316 @@
+//! Chat template support for tokenizers using Jinja2 templates.
+//!
+//! This module is inlined from SMG's tokenizer crate with local adaptations:
+//! - thinking-related detection/state is removed
+//! - special tokens are wired to `vllm_text::backends::hf::HfSpecialTokens`
+
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+
+use minijinja::Environment;
+use serde::{Deserialize, Serialize};
+use serde_json::{self};
+use vllm_text::backend::hf::HfSpecialTokens;
+
+use super::error::TemplateError;
+use super::format::{
+    ChatTemplateContentFormat, ChatTemplateContentFormatOption, detect_chat_template_content_format,
+};
+use super::tojson::hf_tojson_filter;
+use crate::renderer::hf::{TemplateMessage, TemplateTool};
+use crate::request::ReasoningEffort;
+
+type Result<T> = std::result::Result<T, TemplateError>;
+
+/// Build a pre-configured environment with the given template string.
+fn build_environment(template: String) -> Result<Environment<'static>> {
+    let mut env = Environment::new();
+
+    env.set_trim_blocks(true);
+    env.set_lstrip_blocks(true);
+
+    env.add_template_owned("chat".to_owned(), template)?;
+
+    env.set_unknown_method_callback(minijinja_contrib::pycompat::unknown_method_callback);
+    env.add_filter("tojson", hf_tojson_filter);
+
+    Ok(env)
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Default, Serialize)]
+pub(super) struct TemplateContext<'a> {
+    pub(super) messages: &'a [TemplateMessage],
+    pub(super) add_generation_prompt: bool,
+    pub(super) continue_final_message: bool,
+    pub(super) tools: Option<&'a [TemplateTool]>,
+    pub(super) documents: Option<&'a [serde_json::Value]>,
+    #[serde(flatten)]
+    pub(super) special_tokens: Option<&'a HfSpecialTokens>,
+    #[serde(flatten)]
+    pub(super) template_kwargs: Option<&'a HashMap<String, serde_json::Value>>,
+    // By putting top-level `reasoning_effort` after `template_kwargs`, this overrides any
+    // `reasoning_effort` value that might be present there.
+    pub(super) reasoning_effort: Option<ReasoningEffort>,
+}
+
+/// Load chat template from a file (`.jinja` or `.json` containing Jinja).
+pub fn load_chat_template(template_path: &Path) -> Result<Option<String>> {
+    let content = fs::read_to_string(template_path).map_err(TemplateError::ReadTemplateFile)?;
+
+    if template_path.extension().is_some_and(|ext| ext == "json") {
+        #[derive(Deserialize)]
+        #[serde(untagged)]
+        enum ChatTemplateFile {
+            String(String),
+            Object { chat_template: String },
+        }
+
+        let json_value =
+            serde_json::from_str(&content).map_err(TemplateError::ParseTemplateJson)?;
+        let json_template =
+            serde_json::from_value(json_value).map_err(|_| TemplateError::InvalidTemplateJson)?;
+
+        return Ok(Some(match json_template {
+            ChatTemplateFile::String(template) => template,
+            ChatTemplateFile::Object { chat_template } => chat_template,
+        }));
+    }
+
+    let template = content.trim().replace("\\n", "\n");
+    Ok(Some(template))
+}
+
+/// Resolve a configured chat template value into a template string.
+pub fn resolve_chat_template(chat_template: &str) -> Result<String> {
+    let path = Path::new(chat_template);
+    if path.exists() {
+        return load_chat_template(path).map(|template| template.unwrap_or_default());
+    }
+
+    const JINJA_CHARS: [char; 3] = ['{', '}', '\n'];
+    if chat_template.chars().any(|c| JINJA_CHARS.contains(&c)) {
+        return Ok(chat_template.to_string());
+    }
+
+    Err(TemplateError::MissingTemplatePath)
+}
+
+/// One compiled chat template with its Jinja environment and detected content
+/// format.
+pub(super) struct CompiledChatTemplate {
+    /// Cached, fully-configured environment for one compiled template.
+    env: Environment<'static>,
+    content_format: ChatTemplateContentFormat,
+}
+
+impl CompiledChatTemplate {
+    /// Compile the given chat template string into a [`CompiledChatTemplate`].
+    pub fn new(template: String, content_format: ChatTemplateContentFormatOption) -> Result<Self> {
+        let content_format = match content_format {
+            ChatTemplateContentFormatOption::Auto => detect_chat_template_content_format(&template),
+            ChatTemplateContentFormatOption::String => ChatTemplateContentFormat::String,
+            ChatTemplateContentFormatOption::OpenAi => ChatTemplateContentFormat::OpenAi,
+        };
+        let env = build_environment(template)?;
+        Ok(Self {
+            env,
+            content_format,
+        })
+    }
+
+    /// Apply the compiled template to the given context and return the rendered
+    /// prompt.
+    pub fn apply(&self, ctx: TemplateContext<'_>) -> Result<String> {
+        let tmpl = self.env.get_template("chat")?;
+        tmpl.render(ctx).map_err(TemplateError::from)
+    }
+
+    pub fn content_format(&self) -> ChatTemplateContentFormat {
+        self.content_format
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use tempfile::TempDir;
+    use vllm_text::backend::hf::{HfSpecialTokens, NamedSpecialToken};
+
+    use super::*;
+
+    #[test]
+    fn test_chat_template_state_valid_template() {
+        let template = CompiledChatTemplate::new(
+            "{{ messages }}".to_string(),
+            ChatTemplateContentFormatOption::Auto,
+        )
+        .unwrap();
+        assert_eq!(template.content_format(), ChatTemplateContentFormat::String);
+        let result = template.apply(TemplateContext::default()).unwrap();
+        assert_eq!(result, "[]");
+    }
+
+    #[test]
+    fn test_chat_template_state_invalid_template() {
+        let result = CompiledChatTemplate::new(
+            "{% invalid".to_string(),
+            ChatTemplateContentFormatOption::Auto,
+        );
+        assert!(result.is_err());
+        let err = result.err().unwrap().to_string();
+        assert!(
+            err.contains("failed to render jinja template"),
+            "Error should explain parse failure, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_special_tokens_injected_into_context() {
+        let template = "{{ bos_token }}hello{{ eos_token }}";
+        let template =
+            CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
+                .unwrap();
+
+        let special_tokens = HfSpecialTokens {
+            bos_token: Some(NamedSpecialToken::Text("<s>".to_string())),
+            eos_token: Some(NamedSpecialToken::Text("</s>".to_string())),
+            ..Default::default()
+        };
+
+        let result = template
+            .apply(TemplateContext {
+                special_tokens: Some(&special_tokens),
+                ..Default::default()
+            })
+            .unwrap();
+
+        assert_eq!(result, "<s>hello</s>");
+    }
+
+    #[test]
+    fn test_special_tokens_undefined_when_not_provided() {
+        let template = "{% if bos_token is defined %}{{ bos_token }}{% endif %}hello";
+        let template =
+            CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
+                .unwrap();
+
+        let result = template.apply(TemplateContext::default()).unwrap();
+        assert_eq!(result, "hello");
+    }
+
+    #[test]
+    fn test_special_tokens_partial() {
+        let template =
+            "{{ bos_token }}hello{% if eos_token is defined %}{{ eos_token }}{% endif %}";
+        let template =
+            CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
+                .unwrap();
+
+        let special_tokens = HfSpecialTokens {
+            bos_token: Some(NamedSpecialToken::Text("<s>".to_string())),
+            eos_token: None,
+            ..Default::default()
+        };
+
+        let result = template
+            .apply(TemplateContext {
+                special_tokens: Some(&special_tokens),
+                ..Default::default()
+            })
+            .unwrap();
+
+        assert_eq!(result, "<s>hello");
+    }
+
+    #[test]
+    fn test_tojson_filter_supports_indent_and_sort_keys() {
+        let template = CompiledChatTemplate::new(
+            "{{ payload | tojson(indent=2, sort_keys=true) }}".to_string(),
+            ChatTemplateContentFormatOption::Auto,
+        )
+        .unwrap();
+        let mut kwargs = HashMap::new();
+        kwargs.insert("payload".to_string(), serde_json::json!({"b": 1, "a": 2}));
+
+        let result = template
+            .apply(TemplateContext {
+                template_kwargs: Some(&kwargs),
+                ..Default::default()
+            })
+            .unwrap();
+
+        assert_eq!(result, "{\n  \"a\": 2,\n  \"b\": 1\n}");
+    }
+
+    #[test]
+    fn test_load_chat_template_from_file_jinja() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("chat_template.jinja");
+        fs::write(&path, "{{ messages }}").unwrap();
+
+        let template = load_chat_template(&path).unwrap();
+
+        assert_eq!(template.as_deref(), Some("{{ messages }}"));
+    }
+
+    #[test]
+    fn test_resolve_chat_template_from_inline_literal() {
+        let template = resolve_chat_template("{{ messages }}").unwrap();
+
+        assert_eq!(template, "{{ messages }}");
+    }
+
+    #[test]
+    fn test_resolve_chat_template_from_existing_file() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("chat_template.jinja");
+        fs::write(&path, "{{ messages }}").unwrap();
+
+        let template = resolve_chat_template(path.to_str().unwrap()).unwrap();
+
+        assert_eq!(template, "{{ messages }}");
+    }
+
+    #[test]
+    fn test_resolve_chat_template_rejects_missing_path_like_value() {
+        let error = resolve_chat_template("missing_template.jinja").unwrap_err();
+
+        assert!(matches!(error, TemplateError::MissingTemplatePath));
+    }
+
+    #[test]
+    fn test_chat_template_state_respects_explicit_content_format_override() {
+        let template = CompiledChatTemplate::new(
+            "{% for item in messages[0].content %}{{ item.text }}{% endfor %}".to_string(),
+            ChatTemplateContentFormatOption::String,
+        )
+        .unwrap();
+
+        assert_eq!(template.content_format(), ChatTemplateContentFormat::String);
+    }
+
+    #[test]
+    fn test_load_chat_template_from_file_json_string() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("chat_template.json");
+        fs::write(&path, "\"{{ messages }}\"").unwrap();
+
+        let template = load_chat_template(&path).unwrap();
+
+        assert_eq!(template.as_deref(), Some("{{ messages }}"));
+    }
+
+    #[test]
+    fn test_load_chat_template_from_file_json_object() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("chat_template.json");
+        fs::write(&path, r#"{"chat_template":"{{ messages }}"}"#).unwrap();
+
+        let template = load_chat_template(&path).unwrap();
+
+        assert_eq!(template.as_deref(), Some("{{ messages }}"));
+    }
+}
diff --git a/rust/src/chat/src/renderer/hf/tojson.rs b/rust/src/chat/src/renderer/hf/tojson.rs
new file mode 100644
index 000000000000..1c5c20f47658
--- /dev/null
+++ b/rust/src/chat/src/renderer/hf/tojson.rs
@@ -0,0 +1,277 @@
+use minijinja::value::{Kwargs, ViaDeserialize};
+use minijinja::{Error as MinijinjaError, ErrorKind, Value};
+use serde::Deserialize;
+use serde_json::{self, Value as JsonValue};
+use serde_json_fmt::{JsonFormat, JsonSyntaxError};
+use thiserror_ext::AsReport;
+
+/// Hugging Face-compatible `tojson` filter for chat templates.
+///
+/// We cannot use MiniJinja's built-in filter directly because HF relies on
+/// Python `json.dumps` semantics:
+/// - no HTML escaping
+/// - extra kwargs such as `ensure_ascii`, `separators`, and `sort_keys`
+/// - Python-style `indent` handling
+pub(super) fn hf_tojson_filter(
+    value: Value,
+    kwargs: Kwargs,
+) -> std::result::Result<Value, MinijinjaError> {
+    let ensure_ascii = kwargs.get::<Option<bool>>("ensure_ascii")?.unwrap_or(false);
+    let indent = parse_indent(
+        kwargs.get::<Option<ViaDeserialize<IndentArg>>>("indent")?.map(|value| value.0),
+    );
+    let separators = parse_separators(
+        kwargs
+            .get::<Option<ViaDeserialize<SeparatorsArg>>>("separators")?
+            .map(|value| value.0),
+        indent.is_some(),
+    );
+    let sort_keys = kwargs.get::<Option<bool>>("sort_keys")?.unwrap_or(false);
+
+    kwargs.assert_all_used()?;
+
+    let json_value: serde_json::Value = serde_json::to_value(&value).map_err(|e| {
+        MinijinjaError::new(
+            ErrorKind::InvalidOperation,
+            format!("Failed to convert to JSON value: {e}"),
+        )
+    })?;
+
+    let json_str = {
+        let value_to_serialize = if sort_keys {
+            &sort_json_keys(&json_value)
+        } else {
+            &json_value
+        };
+
+        build_json_format(indent, separators.0, separators.1, ensure_ascii)?
+            .format_to_string(value_to_serialize)
+            .map_err(|e| {
+                MinijinjaError::new(
+                    ErrorKind::InvalidOperation,
+                    format!("Failed to serialize JSON: {}", e.as_report()),
+                )
+            })?
+    };
+
+    Ok(Value::from_safe_string(json_str))
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum IndentArg {
+    // Python `json.dumps` accepts bool, int, and string indentation styles.
+    Bool(bool),
+    Integer(i64),
+    String(String),
+}
+
+fn parse_indent(value: Option<IndentArg>) -> Option<String> {
+    match value? {
+        IndentArg::Bool(indent) => Some(if indent {
+            " ".to_owned()
+        } else {
+            String::new()
+        }),
+        IndentArg::Integer(indent) => Some(if indent > 0 {
+            " ".repeat(indent as usize)
+        } else {
+            String::new()
+        }),
+        IndentArg::String(indent) => Some(indent),
+    }
+}
+
+#[derive(Deserialize)]
+struct SeparatorsArg((String, String));
+
+fn parse_separators(value: Option<SeparatorsArg>, pretty: bool) -> (String, String) {
+    let Some(SeparatorsArg((item_separator, key_separator))) = value else {
+        let default_item_separator = if pretty { "," } else { ", " };
+        let default_key_separator = ": ";
+
+        return (
+            default_item_separator.to_owned(),
+            default_key_separator.to_owned(),
+        );
+    };
+
+    (item_separator, key_separator)
+}
+
+fn build_json_format(
+    indent: Option<String>,
+    item_separator: String,
+    key_separator: String,
+    ensure_ascii: bool,
+) -> std::result::Result<JsonFormat, MinijinjaError> {
+    JsonFormat::new()
+        .indent(indent)
+        .map_err(map_json_syntax_error("indent"))?
+        .comma(item_separator)
+        .map_err(map_json_syntax_error("separators (item)"))?
+        .colon(key_separator)
+        .map_err(map_json_syntax_error("separators (key)"))
+        .map(|format| format.ascii(ensure_ascii))
+}
+
+fn map_json_syntax_error(
+    field: &'static str,
+) -> impl FnOnce(JsonSyntaxError) -> MinijinjaError + Copy {
+    move |error| {
+        MinijinjaError::new(
+            ErrorKind::InvalidOperation,
+            format!("invalid {field} value for tojson: {error}"),
+        )
+    }
+}
+
+/// Recursively sort all object keys in a JSON value.
+fn sort_json_keys(value: &JsonValue) -> JsonValue {
+    match value {
+        JsonValue::Object(map) => {
+            let mut sorted: serde_json::Map<String, JsonValue> = serde_json::Map::new();
+            let mut keys: Vec<_> = map.keys().collect();
+            keys.sort();
+            for key in keys {
+                sorted.insert(key.clone(), sort_json_keys(&map[key]));
+            }
+            JsonValue::Object(sorted)
+        }
+        JsonValue::Array(arr) => JsonValue::Array(arr.iter().map(sort_json_keys).collect()),
+        _ => value.clone(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use minijinja::Environment;
+    use serde_json::json;
+    use thiserror_ext::AsReport;
+
+    use super::hf_tojson_filter;
+
+    fn render(template: &str, payload: serde_json::Value) -> String {
+        let mut env = Environment::new();
+        env.add_filter("tojson", hf_tojson_filter);
+        env.render_str(template, json!({ "payload": payload })).unwrap()
+    }
+
+    fn render_error(template: &str, payload: serde_json::Value) -> minijinja::Error {
+        let mut env = Environment::new();
+        env.add_filter("tojson", hf_tojson_filter);
+        env.render_str(template, json!({ "payload": payload })).unwrap_err()
+    }
+
+    #[test]
+    fn tojson_does_not_html_escape_like_minijinja_builtin() {
+        let rendered = render("{{ payload|tojson }}", json!("<tag>&'"));
+        assert_eq!(rendered, "\"<tag>&'\"");
+    }
+
+    #[test]
+    fn tojson_supports_sort_keys_recursively() {
+        let rendered = render(
+            "{{ payload|tojson(sort_keys=true) }}",
+            json!({
+                "z": {"b": 1, "a": 2},
+                "a": 0
+            }),
+        );
+
+        assert_eq!(rendered, "{\"a\": 0, \"z\": {\"a\": 2, \"b\": 1}}");
+    }
+
+    #[test]
+    fn tojson_supports_indent() {
+        let rendered = render("{{ payload|tojson(indent=2) }}", json!([1, 2]));
+
+        assert_eq!(rendered, "[\n  1,\n  2\n]");
+    }
+
+    #[test]
+    fn tojson_supports_ensure_ascii_false() {
+        let rendered = render("{{ payload|tojson(ensure_ascii=false) }}", json!("中文"));
+        assert_eq!(rendered, "\"中文\"");
+    }
+
+    #[test]
+    fn tojson_supports_ensure_ascii_true() {
+        let rendered = render("{{ payload|tojson(ensure_ascii=true) }}", json!("中文"));
+        assert_eq!(rendered, "\"\\u4e2d\\u6587\"");
+    }
+
+    #[test]
+    fn tojson_supports_separators() {
+        let rendered = render(
+            "{{ payload|tojson(separators=[',', ':']) }}",
+            json!({
+                "x": [1, 2]
+            }),
+        );
+
+        assert_eq!(rendered, "{\"x\":[1,2]}");
+    }
+
+    #[test]
+    fn tojson_supports_negative_indent_as_newline_only() {
+        let rendered = render("{{ payload|tojson(indent=-1) }}", json!([1, 2]));
+        assert_eq!(rendered, "[\n1,\n2\n]");
+    }
+
+    #[test]
+    fn tojson_supports_string_indent() {
+        let rendered = render("{{ payload|tojson(indent='  ') }}", json!([1, 2]));
+        assert_eq!(rendered, "[\n  1,\n  2\n]");
+    }
+
+    #[test]
+    fn tojson_supports_boolean_indent() {
+        let rendered_true = render("{{ payload|tojson(indent=true) }}", json!([1, 2]));
+        assert_eq!(rendered_true, "[\n 1,\n 2\n]");
+
+        let rendered_false = render("{{ payload|tojson(indent=false) }}", json!([1, 2]));
+        assert_eq!(rendered_false, "[\n1,\n2\n]");
+    }
+
+    #[test]
+    fn tojson_combines_indent_sort_keys_separators_and_ensure_ascii() {
+        let rendered = render(
+            "{{ payload|tojson(ensure_ascii=true, sort_keys=true, separators=[',', ':'], indent='  ') }}",
+            json!({
+                "b": "<中>",
+                "a": [1, 2]
+            }),
+        );
+
+        assert_eq!(
+            rendered,
+            "{\n  \"a\":[\n    1,\n    2\n  ],\n  \"b\":\"<\\u4e2d>\"\n}"
+        );
+    }
+
+    #[test]
+    fn tojson_rejects_invalid_indent() {
+        let error = render_error("{{ payload|tojson(indent='-->') }}", json!({"a": 1}));
+        expect!["invalid operation: invalid indent value for tojson: string contains unexpected character '-' (in <string>:1)"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn tojson_rejects_invalid_separator_shape() {
+        let error = render_error("{{ payload|tojson(separators=':,') }}", json!({"a": 1}));
+        expect!["cannot deserialize: invalid type: string \":,\", expected a tuple of size 2 (in <string>:1)"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn tojson_rejects_invalid_key_separator() {
+        let error = render_error(
+            "{{ payload|tojson(separators=[',', '=>']) }}",
+            json!({"a": 1}),
+        );
+        expect!["invalid operation: invalid separators (key) value for tojson: string contains unexpected character '=' (in <string>:1)"]
+            .assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/chat/src/renderer/mod.rs b/rust/src/chat/src/renderer/mod.rs
new file mode 100644
index 000000000000..07ff5d0b6ddc
--- /dev/null
+++ b/rust/src/chat/src/renderer/mod.rs
@@ -0,0 +1,31 @@
+use std::sync::Arc;
+
+use vllm_text::Prompt;
+
+use crate::error::Result;
+use crate::request::ChatRequest;
+
+pub mod deepseek_v32;
+pub mod deepseek_v4;
+pub mod hf;
+mod selection;
+
+pub use deepseek_v4::DeepSeekV4ChatRenderer;
+pub use deepseek_v32::DeepSeekV32ChatRenderer;
+pub use selection::RendererSelection;
+
+/// Rendered chat prompt submitted to the text backend.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RenderedPrompt {
+    pub prompt: Prompt,
+}
+
+/// Minimal chat-prompt renderer used by `vllm-chat`.
+pub trait ChatRenderer: Send + Sync {
+    /// Render one chat request into the text prompt submitted to the text
+    /// backend.
+    fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt>;
+}
+
+/// Shared trait-object form of [`ChatRenderer`].
+pub type DynChatRenderer = Arc<dyn ChatRenderer>;
diff --git a/rust/src/chat/src/renderer/selection.rs b/rust/src/chat/src/renderer/selection.rs
new file mode 100644
index 000000000000..f4bd565bafdd
--- /dev/null
+++ b/rust/src/chat/src/renderer/selection.rs
@@ -0,0 +1,109 @@
+use std::fmt;
+use std::str::FromStr;
+
+use serde_with::DeserializeFromStr;
+
+/// Specify which chat renderer implementation to use.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, DeserializeFromStr)]
+pub enum RendererSelection {
+    /// Use model-based auto-detection.
+    #[default]
+    Auto,
+    /// Force the generic Hugging Face chat-template renderer.
+    Hf,
+    /// Force the DeepSeek V3.2 renderer.
+    DeepSeekV32,
+    /// Force the DeepSeek V4 renderer.
+    DeepSeekV4,
+}
+
+impl RendererSelection {
+    pub const AUTO_LITERAL: &str = "auto";
+    pub const DEEPSEEK_V32_LITERAL: &str = "deepseek_v32";
+    pub const DEEPSEEK_V4_LITERAL: &str = "deepseek_v4";
+    pub const HF_LITERAL: &str = "hf";
+
+    /// Resolve the renderer selection using the given model type string, if
+    /// it's `Auto`.
+    pub fn resolve(self, model_type: &str) -> Self {
+        match self {
+            Self::Auto => match model_type {
+                Self::DEEPSEEK_V32_LITERAL => Self::DeepSeekV32,
+                Self::DEEPSEEK_V4_LITERAL => Self::DeepSeekV4,
+                _ => Self::Hf,
+            },
+            selection => selection,
+        }
+    }
+}
+
+impl FromStr for RendererSelection {
+    type Err = String;
+
+    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
+            Ok(Self::Auto)
+        } else if value.eq_ignore_ascii_case(Self::HF_LITERAL) {
+            Ok(Self::Hf)
+        } else if value.eq_ignore_ascii_case(Self::DEEPSEEK_V32_LITERAL) {
+            Ok(Self::DeepSeekV32)
+        } else if value.eq_ignore_ascii_case(Self::DEEPSEEK_V4_LITERAL) {
+            Ok(Self::DeepSeekV4)
+        } else {
+            Err(format!(
+                "unknown renderer `{value}` (expected one of: auto, hf, deepseek_v32, deepseek_v4)"
+            ))
+        }
+    }
+}
+
+impl fmt::Display for RendererSelection {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Auto => f.write_str(Self::AUTO_LITERAL),
+            Self::Hf => f.write_str(Self::HF_LITERAL),
+            Self::DeepSeekV32 => f.write_str(Self::DEEPSEEK_V32_LITERAL),
+            Self::DeepSeekV4 => f.write_str(Self::DEEPSEEK_V4_LITERAL),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::RendererSelection;
+
+    #[test]
+    fn renderer_selection_parses_known_values() {
+        assert_eq!(
+            "auto".parse::<RendererSelection>().unwrap(),
+            RendererSelection::Auto
+        );
+        assert_eq!(
+            "hf".parse::<RendererSelection>().unwrap(),
+            RendererSelection::Hf
+        );
+        assert_eq!(
+            "deepseek_v32".parse::<RendererSelection>().unwrap(),
+            RendererSelection::DeepSeekV32
+        );
+        assert_eq!(
+            "deepseek_v4".parse::<RendererSelection>().unwrap(),
+            RendererSelection::DeepSeekV4
+        );
+    }
+
+    #[test]
+    fn renderer_selection_display_round_trips() {
+        for selection in [
+            RendererSelection::Auto,
+            RendererSelection::Hf,
+            RendererSelection::DeepSeekV32,
+            RendererSelection::DeepSeekV4,
+        ] {
+            assert_eq!(
+                selection.to_string().parse::<RendererSelection>().unwrap(),
+                selection
+            );
+        }
+    }
+}
diff --git a/rust/src/chat/src/request.rs b/rust/src/chat/src/request.rs
new file mode 100644
index 000000000000..c1cb83b8dc32
--- /dev/null
+++ b/rust/src/chat/src/request.rs
@@ -0,0 +1,662 @@
+use std::collections::HashMap;
+
+use llm_multimodal::ImageDetail;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+pub use vllm_text::SamplingParams;
+use vllm_text::TextDecodeOptions;
+pub use vllm_tool_parser::Tool as ChatTool;
+
+use crate::AssistantMessageExt;
+use crate::error::{Error, Result};
+use crate::event::{AssistantContentBlock, AssistantMessage};
+
+/// Role label for one text-only chat message.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ChatRole {
+    System,
+    Developer,
+    User,
+    Assistant,
+    ToolResponse,
+}
+
+/// One text-only chat content part in OpenAI-style block format.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ChatContentPart {
+    /// One plain-text content block.
+    Text { text: String },
+    /// One image URL/data URL content block.
+    ImageUrl {
+        image_url: String,
+        detail: Option<ImageDetail>,
+        uuid: Option<String>,
+    },
+    // ImageData...
+    // ImageEmbeds...
+}
+
+impl ChatContentPart {
+    /// Construct one text content part with plain string content.
+    pub fn text(text: impl Into<String>) -> Self {
+        Self::Text { text: text.into() }
+    }
+
+    /// Construct one image URL content part with the given URL string.
+    pub fn image_url(image_url: impl Into<String>) -> Self {
+        Self::ImageUrl {
+            image_url: image_url.into(),
+            detail: None,
+            uuid: None,
+        }
+    }
+
+    /// Return the text content of this part when it's a text block, or an
+    /// "unsupported multimodal content" error otherwise.
+    pub(crate) fn as_text(&self) -> Result<&str> {
+        match self {
+            Self::Text { text } => Ok(text),
+            Self::ImageUrl { .. } => Err(Error::UnsupportedMultimodalContent("image_url")),
+        }
+    }
+
+    /// Return whether this part is a text block with empty content.
+    pub(crate) fn is_empty_text(&self) -> bool {
+        matches!(self, Self::Text { text } if text.is_empty())
+    }
+
+    /// Return whether this part contains any multimodal content.
+    pub(crate) fn is_multimodal(&self) -> bool {
+        match self {
+            Self::Text { .. } => false,
+            Self::ImageUrl { .. } => true,
+        }
+    }
+}
+
+/// Text-only chat content.
+///
+/// This supports either a simple string or an OpenAI-style list of text blocks.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum ChatContent {
+    /// Simple text content.
+    Text(String),
+    /// OpenAI-style blocks.
+    Parts(Vec<ChatContentPart>),
+}
+
+impl ChatContent {
+    /// Flatten the text content into one plain string without adding
+    /// separators.
+    // TODO: this method will be truly fallible once we add non-text content parts.
+    pub fn try_flatten_to_text(&self) -> Result<String> {
+        Ok(match self {
+            Self::Text(text) => text.clone(),
+            Self::Parts(parts) => {
+                parts.iter().map(ChatContentPart::as_text).collect::<Result<Vec<_>>>()?.concat()
+            }
+        })
+    }
+
+    /// Return whether there's no text content or only empty text blocks.
+    pub fn is_empty(&self) -> bool {
+        match self {
+            Self::Text(text) => text.is_empty(),
+            Self::Parts(parts) => parts.iter().all(ChatContentPart::is_empty_text),
+        }
+    }
+
+    /// Return whether this content contains any multimodal parts.
+    pub fn has_multimodal(&self) -> bool {
+        match self {
+            Self::Text(_) => false,
+            Self::Parts(parts) => parts.iter().any(ChatContentPart::is_multimodal),
+        }
+    }
+}
+
+impl From<String> for ChatContent {
+    fn from(value: String) -> Self {
+        Self::Text(value)
+    }
+}
+
+impl From<&str> for ChatContent {
+    fn from(value: &str) -> Self {
+        Self::Text(value.to_string())
+    }
+}
+
+impl From<Vec<ChatContentPart>> for ChatContent {
+    fn from(value: Vec<ChatContentPart>) -> Self {
+        Self::Parts(value)
+    }
+}
+
+/// One chat message.
+///
+/// Original Python API reference:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/entrypoints/chat_utils.py#L309-L333>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "role", rename_all = "snake_case")]
+pub enum ChatMessage {
+    /// System message content.
+    System { content: ChatContent },
+    /// Developer message content plus optional message-local tools.
+    Developer {
+        content: ChatContent,
+        tools: Option<Vec<ChatTool>>,
+    },
+    /// User message content.
+    User { content: ChatContent },
+    /// Assistant history content assembled from structured assistant blocks.
+    Assistant { content: Vec<AssistantContentBlock> },
+    /// Tool response content associated with one prior assistant tool call.
+    ToolResponse {
+        content: ChatContent,
+        tool_call_id: String,
+    },
+}
+
+impl ChatMessage {
+    /// Construct one chat message with plain string content.
+    pub fn text(role: ChatRole, text: impl Into<String>) -> Self {
+        let content: String = text.into();
+
+        match role {
+            ChatRole::System => Self::system(content),
+            ChatRole::Developer => Self::developer(content, None),
+            ChatRole::User => Self::user(content),
+            ChatRole::Assistant => Self::assistant_text(content),
+            ChatRole::ToolResponse => {
+                panic!(
+                    "tool response messages require a tool_call_id; \
+                     use ChatMessage::tool_response() instead"
+                )
+            }
+        }
+    }
+
+    /// Construct one chat message with system role.
+    pub fn system(content: impl Into<ChatContent>) -> Self {
+        Self::System {
+            content: content.into(),
+        }
+    }
+
+    /// Construct one chat message with developer role.
+    pub fn developer(content: impl Into<ChatContent>, tools: Option<Vec<ChatTool>>) -> Self {
+        Self::Developer {
+            content: content.into(),
+            tools,
+        }
+    }
+
+    /// Construct one chat message with user role.
+    pub fn user(content: impl Into<ChatContent>) -> Self {
+        Self::User {
+            content: content.into(),
+        }
+    }
+
+    /// Construct one chat message with assistant role and plain string content.
+    pub fn assistant_text(text: impl Into<String>) -> Self {
+        Self::Assistant {
+            content: vec![AssistantContentBlock::Text { text: text.into() }],
+        }
+    }
+
+    /// Construct one chat message with assistant role and structured content
+    /// blocks.
+    pub fn assistant_blocks(content: Vec<AssistantContentBlock>) -> Self {
+        Self::Assistant { content }
+    }
+
+    /// Construct one tool-role message.
+    pub fn tool_response(content: impl Into<ChatContent>, tool_call_id: impl Into<String>) -> Self {
+        Self::ToolResponse {
+            content: content.into(),
+            tool_call_id: tool_call_id.into(),
+        }
+    }
+
+    /// Return the chat role of this message.
+    pub fn role(&self) -> ChatRole {
+        match self {
+            Self::System { .. } => ChatRole::System,
+            Self::Developer { .. } => ChatRole::Developer,
+            Self::User { .. } => ChatRole::User,
+            Self::Assistant { .. } => ChatRole::Assistant,
+            Self::ToolResponse { .. } => ChatRole::ToolResponse,
+        }
+    }
+
+    /// Concatenate the visible text carried by this message.
+    pub fn text_content(&self) -> Result<String> {
+        match self {
+            Self::System { content }
+            | Self::Developer { content, .. }
+            | Self::User { content }
+            | Self::ToolResponse { content, .. } => content.try_flatten_to_text(),
+            Self::Assistant { content } => Ok(content.text()),
+        }
+    }
+
+    /// Concatenate assistant reasoning text when present.
+    pub fn reasoning_content(&self) -> Option<String> {
+        match self {
+            Self::Assistant { content } => content.reasoning(),
+            Self::System { .. }
+            | Self::Developer { .. }
+            | Self::User { .. }
+            | Self::ToolResponse { .. } => None,
+        }
+    }
+
+    /// Return whether this message contains any multimodal content.
+    pub fn has_multimodal(&self) -> bool {
+        match self {
+            Self::System { content }
+            | Self::Developer { content, .. }
+            | Self::User { content }
+            | Self::ToolResponse { content, .. } => content.has_multimodal(),
+            Self::Assistant { .. } => false,
+        }
+    }
+}
+
+impl From<AssistantMessage> for ChatMessage {
+    fn from(value: AssistantMessage) -> Self {
+        Self::Assistant {
+            content: value.content,
+        }
+    }
+}
+
+/// Controls how prompt rendering should end after the existing chat history.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum GenerationPromptMode {
+    /// Append a generation prompt for a new assistant turn.
+    ///
+    /// Equivalent to `add_generation_prompt = true` and `continue_final_message
+    /// = false`.
+    #[default]
+    StartNewAssistant,
+    /// Leave the final assistant message open so generation continues it.
+    ///
+    /// Equivalent to `add_generation_prompt = false` and
+    /// `continue_final_message = true`.
+    ContinueFinalAssistant,
+    /// Render the existing chat history without adding any trailing generation
+    /// prompt.
+    ///
+    /// Equivalent to `add_generation_prompt = false` and
+    /// `continue_final_message = false`.
+    NoGenerationPrompt,
+}
+
+/// Effort level for reasoning models.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum ReasoningEffort {
+    None,
+    Minimal,
+    Low,
+    Medium,
+    High,
+    XHigh,
+    Max,
+}
+
+impl ReasoningEffort {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::None => "none",
+            Self::Minimal => "minimal",
+            Self::Low => "low",
+            Self::Medium => "medium",
+            Self::High => "high",
+            Self::XHigh => "xhigh",
+            Self::Max => "max",
+        }
+    }
+}
+
+/// Chat-template-related request options.
+///
+/// These are the small subset of chat controls that currently affect prompt
+/// rendering in `vllm-chat`.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct ChatOptions {
+    /// Controls whether rendering starts a new assistant turn, continues the
+    /// final assistant message, or emits no trailing generation prompt at
+    /// all.
+    pub generation_prompt_mode: GenerationPromptMode,
+
+    /// Per-request Jinja chat template override. When set, this template is
+    /// used instead of the model's default chat template.
+    pub chat_template: Option<String>,
+
+    /// Effort level exposed to chat templates for reasoning models.
+    pub reasoning_effort: Option<ReasoningEffort>,
+
+    /// Additional keyword arguments exposed to the chat template.
+    pub template_kwargs: HashMap<String, Value>,
+}
+
+impl Default for ChatOptions {
+    fn default() -> Self {
+        Self {
+            generation_prompt_mode: GenerationPromptMode::StartNewAssistant,
+            chat_template: None,
+            reasoning_effort: None,
+            template_kwargs: HashMap::new(),
+        }
+    }
+}
+
+impl ChatOptions {
+    /// Whether to add a generation prompt for a new assistant turn after the
+    /// existing chat history.
+    pub fn add_generation_prompt(&self) -> bool {
+        matches!(
+            self.generation_prompt_mode,
+            GenerationPromptMode::StartNewAssistant
+        )
+    }
+
+    /// Whether to leave the final assistant message open so generation
+    /// continues it.
+    pub fn continue_final_message(&self) -> bool {
+        matches!(
+            self.generation_prompt_mode,
+            GenerationPromptMode::ContinueFinalAssistant
+        )
+    }
+}
+
+/// Tool-choice semantics supported by `vllm-chat`.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ChatToolChoice {
+    Auto,
+    #[default]
+    None,
+}
+
+/// One chat request ready to be rendered into a prompt and lowered into a
+/// generate request.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct ChatRequest {
+    /// Stable caller-supplied request ID.
+    pub request_id: String,
+    /// Ordered chat history to render.
+    pub messages: Vec<ChatMessage>,
+    /// User-facing sampling parameters accepted by `vllm-chat`.
+    pub sampling_params: SamplingParams,
+    /// Chat-specific rendering options.
+    pub chat_options: ChatOptions,
+    /// Function tools made available to the model for this request.
+    pub tools: Vec<ChatTool>,
+    /// Tool-choice behavior for this request.
+    pub tool_choice: ChatToolChoice,
+    /// Text decode options for incremental detokenization.
+    pub decode_options: TextDecodeOptions,
+    /// Whether to emit intermediate northbound content deltas before the
+    /// terminal result.
+    ///
+    /// If `false`, callers only observe the terminal accumulated assistant
+    /// output. If `true`, callers may receive zero or more incremental
+    /// content events before the final terminal one.
+    pub intermediate: bool,
+    /// Request scheduling priority (lower means earlier handling; default 0).
+    pub priority: i32,
+    /// Documents for RAG (retrieval-augmented generation), passed to the chat
+    /// template.
+    pub documents: Option<Vec<Value>>,
+    /// Salt for prefix cache isolation in multi-user environments.
+    pub cache_salt: Option<String>,
+    /// Whether to add special tokens (e.g. BOS) during prompt tokenization.
+    pub add_special_tokens: bool,
+    /// Override data parallel rank.
+    #[serde(default)]
+    pub data_parallel_rank: Option<u32>,
+}
+
+impl ChatRequest {
+    /// Return one minimal valid request fixture for tests.
+    pub fn for_test() -> Self {
+        Self {
+            request_id: "test-request".to_string(),
+            messages: vec![ChatMessage::text(ChatRole::User, "test")],
+            sampling_params: SamplingParams::default(),
+            chat_options: ChatOptions::default(),
+            tools: Vec::new(),
+            tool_choice: ChatToolChoice::None,
+            decode_options: TextDecodeOptions::default(),
+            intermediate: true,
+            priority: 0,
+            documents: None,
+            cache_salt: None,
+            add_special_tokens: false,
+            data_parallel_rank: None,
+        }
+    }
+
+    /// Validate basic request invariants before rendering.
+    pub fn validate(&self) -> Result<()> {
+        if self.messages.is_empty() {
+            return Err(Error::EmptyMessages);
+        }
+        match (
+            self.chat_options.generation_prompt_mode,
+            self.messages.last().map(ChatMessage::role),
+        ) {
+            (GenerationPromptMode::ContinueFinalAssistant, Some(ChatRole::Assistant)) => {}
+            (GenerationPromptMode::ContinueFinalAssistant, _) => {
+                return Err(Error::ContinueFinalAssistantWithoutFinalAssistant);
+            }
+            (GenerationPromptMode::NoGenerationPrompt, _)
+            | (GenerationPromptMode::StartNewAssistant, _) => {}
+        }
+        Ok(())
+    }
+
+    /// Return true if this request contains any multimodal content in its
+    /// messages.
+    pub fn has_multimodal(&self) -> bool {
+        self.messages.iter().any(ChatMessage::has_multimodal)
+    }
+
+    /// Return true if this request should enable tool parsing based on the tool
+    /// choice and tool list.
+    pub(crate) fn tool_parsing_enabled(&self) -> bool {
+        matches!(self.tool_choice, ChatToolChoice::Auto) && !self.tools.is_empty()
+    }
+
+    /// Return the request-level thinking toggle when explicitly requested.
+    ///
+    /// We currently accept the two request kwargs `thinking` and
+    /// `enable_thinking`. Both must be booleans when present. If both are
+    /// present, they must have the same value. If neither key is provided,
+    /// return `None`.
+    pub(crate) fn enable_thinking(&self) -> Result<Option<bool>> {
+        let thinking = self.parse_template_bool("thinking")?;
+        let enable_thinking = self.parse_template_bool("enable_thinking")?;
+
+        match (thinking, enable_thinking) {
+            (None, None) => Ok(None),
+            (Some(thinking), Some(enable_thinking)) if thinking != enable_thinking => {
+                Err(Error::ChatTemplate(
+                    "template kwargs `thinking` and `enable_thinking` must match when both are set"
+                        .to_string(),
+                ))
+            }
+            (Some(thinking), _) => Ok(Some(thinking)),
+            (None, Some(enable_thinking)) => Ok(Some(enable_thinking)),
+        }
+    }
+
+    pub(crate) fn parse_template_bool(&self, key: &str) -> Result<Option<bool>> {
+        match self.chat_options.template_kwargs.get(key) {
+            None => Ok(None),
+            Some(Value::Bool(value)) => Ok(Some(*value)),
+            Some(other) => Err(Error::ChatTemplate(format!(
+                "template kwarg `{key}` must be a boolean, got {other}"
+            ))),
+        }
+    }
+}
+
+impl ChatRole {
+    /// Return the chat-template role string used by the current text-only chat
+    /// backend.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::System => "system",
+            Self::Developer => "developer",
+            Self::User => "user",
+            Self::Assistant => "assistant",
+            Self::ToolResponse => "tool_response",
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{json, to_value};
+
+    use super::{ChatContent, ChatContentPart, ChatMessage, ChatRequest, ChatRole, ChatTool};
+    use crate::Error;
+    use crate::event::AssistantContentBlock;
+
+    #[test]
+    fn chat_content_deserializes_from_raw_string() {
+        let content: ChatContent = serde_json::from_value(json!("hello")).unwrap();
+        assert_eq!(content, ChatContent::Text("hello".to_string()));
+    }
+
+    #[test]
+    fn chat_content_deserializes_from_openai_text_blocks() {
+        let content: ChatContent =
+            serde_json::from_value(json!([{ "type": "text", "text": "hello" }])).unwrap();
+        assert_eq!(
+            content,
+            ChatContent::Parts(vec![ChatContentPart::text("hello")])
+        );
+    }
+
+    #[test]
+    fn chat_content_from_string_like_values_builds_text() {
+        assert_eq!(
+            ChatContent::from("hello"),
+            ChatContent::Text("hello".to_string())
+        );
+        assert_eq!(
+            ChatContent::from("hello".to_string()),
+            ChatContent::Text("hello".to_string())
+        );
+    }
+
+    #[test]
+    fn chat_content_try_flattens_text_parts_without_separators() {
+        let content = ChatContent::Parts(vec![
+            ChatContentPart::text("hello"),
+            ChatContentPart::text(" world"),
+        ]);
+        assert_eq!(content.try_flatten_to_text().unwrap(), "hello world");
+    }
+
+    #[test]
+    fn assistant_message_collects_visible_and_reasoning_text() {
+        let message = ChatMessage::assistant_blocks(vec![
+            AssistantContentBlock::Reasoning {
+                text: "inner".to_string(),
+            },
+            AssistantContentBlock::Text {
+                text: "outer".to_string(),
+            },
+        ]);
+
+        assert_eq!(message.role(), ChatRole::Assistant);
+        assert_eq!(message.text_content().unwrap(), "outer");
+        assert_eq!(message.reasoning_content().as_deref(), Some("inner"));
+    }
+
+    #[test]
+    fn developer_message_round_trips_through_serde() {
+        let message = ChatMessage::developer(
+            "hello",
+            Some(vec![ChatTool {
+                name: "get_weather".to_string(),
+                description: Some("Get weather".to_string()),
+                parameters: json!({
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                }),
+                strict: Some(true),
+            }]),
+        );
+
+        let value = to_value(&message).unwrap();
+        let decoded: ChatMessage = serde_json::from_value(value).unwrap();
+        assert_eq!(decoded, message);
+    }
+
+    #[test]
+    fn enable_thinking_is_none_when_no_kwargs_are_present() {
+        let request = ChatRequest::for_test();
+        assert_eq!(request.enable_thinking().unwrap(), None);
+    }
+
+    #[test]
+    fn enable_thinking_accepts_matching_duplicate_kwargs() {
+        let mut request = ChatRequest::for_test();
+        request.chat_options.template_kwargs.insert("thinking".to_string(), json!(true));
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), json!(true));
+
+        assert_eq!(request.enable_thinking().unwrap(), Some(true));
+    }
+
+    #[test]
+    fn enable_thinking_rejects_non_boolean_kwargs() {
+        let mut request = ChatRequest::for_test();
+        request
+            .chat_options
+            .template_kwargs
+            .insert("thinking".to_string(), json!("yes"));
+
+        assert!(matches!(
+            request.enable_thinking(),
+            Err(Error::ChatTemplate(message))
+                if message.contains("`thinking` must be a boolean")
+        ));
+    }
+
+    #[test]
+    fn enable_thinking_rejects_conflicting_duplicate_kwargs() {
+        let mut request = ChatRequest::for_test();
+        request
+            .chat_options
+            .template_kwargs
+            .insert("thinking".to_string(), json!(false));
+        request
+            .chat_options
+            .template_kwargs
+            .insert("enable_thinking".to_string(), json!(true));
+
+        assert!(matches!(
+            request.enable_thinking(),
+            Err(Error::ChatTemplate(message))
+                if message.contains("`thinking` and `enable_thinking` must match")
+        ));
+    }
+}
diff --git a/rust/src/chat/src/stream.rs b/rust/src/chat/src/stream.rs
new file mode 100644
index 000000000000..8a8dea46e6c0
--- /dev/null
+++ b/rust/src/chat/src/stream.rs
@@ -0,0 +1,237 @@
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use futures::Stream;
+use trait_set::trait_set;
+use vllm_text::{DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs};
+
+use crate::FinishReason;
+use crate::error::{Error, Result};
+use crate::event::{AssistantContentBlock, AssistantMessage, ChatEvent};
+
+/// Final structured assistant message plus terminal stream metadata.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CollectedAssistantMessage {
+    pub message: AssistantMessage,
+    pub prompt_token_count: usize,
+    pub prompt_token_ids: Arc<[u32]>,
+    pub prompt_logprobs: Option<DecodedPromptLogprobs>,
+    pub logprobs: Option<DecodedLogprobs>,
+    pub token_ids: Vec<u32>,
+    pub output_token_count: usize,
+    pub finish_reason: FinishReason,
+    /// Connector-specific KV transfer parameters for disaggregated serving.
+    pub kv_transfer_params: Option<serde_json::Value>,
+}
+
+/// Per-request stream of chat events.
+pub struct ChatEventStream {
+    request_id: String,
+    inner: Pin<Box<dyn Stream<Item = Result<ChatEvent>> + Send>>,
+}
+
+impl ChatEventStream {
+    pub(crate) fn new(request_id: String, inner: impl crate::output::ChatEventStream) -> Self {
+        Self {
+            request_id,
+            inner: Box::pin(inner),
+        }
+    }
+
+    /// Return the request ID associated with this stream.
+    pub fn request_id(&self) -> &str {
+        &self.request_id
+    }
+
+    /// Collect the stream to completion and return the final assembled
+    /// assistant message.
+    pub async fn collect_message(mut self) -> Result<CollectedAssistantMessage> {
+        use futures::StreamExt as _;
+
+        let mut message = AssistantMessage::default();
+        let mut prompt_logprobs = None;
+        let mut prompt_token_ids: Arc<[u32]> = Arc::from([]);
+        let mut logprob_positions: Vec<DecodedPositionLogprobs> = Vec::new();
+        let mut token_ids: Vec<u32> = Vec::new();
+        while let Some(event) = self.next().await.transpose()? {
+            match event {
+                ChatEvent::Start {
+                    prompt_logprobs: start_prompt_logprobs,
+                    prompt_token_ids: start_prompt_token_ids,
+                } => {
+                    prompt_logprobs = start_prompt_logprobs;
+                    prompt_token_ids = start_prompt_token_ids;
+                }
+                ChatEvent::BlockEnd { block, .. } => message.push_block(block),
+                ChatEvent::LogprobsDelta {
+                    logprobs,
+                    token_ids: delta_ids,
+                } => {
+                    if let Some(logprobs) = logprobs {
+                        logprob_positions.extend(logprobs.positions);
+                    }
+                    token_ids.extend(delta_ids);
+                }
+                ChatEvent::Done {
+                    message: done,
+                    prompt_token_count,
+                    output_token_count,
+                    finish_reason,
+                    kv_transfer_params,
+                } => {
+                    return Ok(CollectedAssistantMessage {
+                        message: done,
+                        prompt_token_count,
+                        prompt_token_ids,
+                        prompt_logprobs,
+                        logprobs: (!logprob_positions.is_empty()).then_some(DecodedLogprobs {
+                            positions: logprob_positions,
+                        }),
+                        token_ids,
+                        output_token_count,
+                        finish_reason,
+                        kv_transfer_params,
+                    });
+                }
+                ChatEvent::ToolCallEnd { call, .. } => {
+                    message.push_block(AssistantContentBlock::ToolCall(call));
+                }
+                ChatEvent::BlockStart { .. }
+                | ChatEvent::BlockDelta { .. }
+                | ChatEvent::ToolCallStart { .. }
+                | ChatEvent::ToolCallArgumentsDelta { .. } => {}
+            }
+        }
+
+        // Note: this is actually unreachable, as the underlying stream always emit an
+        // error on unexpected close.
+        Err(Error::StreamClosedBeforeTerminalOutput {
+            request_id: self.request_id,
+        })
+    }
+}
+
+impl Stream for ChatEventStream {
+    type Item = Result<ChatEvent>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        Pin::new(&mut self.inner).poll_next(cx)
+    }
+}
+
+trait_set! {
+    pub trait ChatEventStreamTrait = Stream<Item = Result<ChatEvent>> + Send + 'static;
+}
+
+#[cfg(test)]
+mod tests {
+
+    use futures::stream;
+    use vllm_llm::FinishReason;
+    use vllm_text::{
+        DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs, DecodedTokenLogprob,
+    };
+
+    use super::{ChatEventStream, CollectedAssistantMessage};
+    use crate::error::Error;
+    use crate::event::ChatEvent;
+
+    #[tokio::test]
+    async fn collect_message_requires_terminal_done_event() {
+        let stream = ChatEventStream::new(
+            "chat-missing-done".to_string(),
+            stream::iter([Ok(ChatEvent::Start {
+                prompt_token_ids: vec![].into(),
+                prompt_logprobs: None,
+            })]),
+        );
+
+        let error = stream.collect_message().await.expect_err("missing done");
+        assert!(matches!(
+            error,
+            Error::StreamClosedBeforeTerminalOutput { request_id }
+            if request_id == "chat-missing-done"
+        ));
+    }
+
+    #[tokio::test]
+    async fn collect_message_retains_prompt_and_sample_logprobs() {
+        let stream = ChatEventStream::new(
+            "chat-logprobs".to_string(),
+            stream::iter(vec![
+                Ok(ChatEvent::Start {
+                    prompt_token_ids: vec![10, 11].into(),
+                    prompt_logprobs: Some(DecodedPromptLogprobs {
+                        first_token_id: 0,
+                        first_token: "o".to_string(),
+                        scored_positions: vec![DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "p".to_string(),
+                                logprob: -0.1,
+                                rank: 1,
+                            }],
+                        }],
+                    }),
+                }),
+                Ok(ChatEvent::LogprobsDelta {
+                    logprobs: Some(DecodedLogprobs {
+                        positions: vec![DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "a".to_string(),
+                                logprob: -0.2,
+                                rank: 1,
+                            }],
+                        }],
+                    }),
+                    token_ids: vec![],
+                }),
+                Ok(ChatEvent::Done {
+                    message: Default::default(),
+                    prompt_token_count: 2,
+                    output_token_count: 1,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                }),
+            ]),
+        );
+
+        let collected = stream.collect_message().await.unwrap();
+        assert_eq!(
+            collected,
+            CollectedAssistantMessage {
+                message: Default::default(),
+                prompt_token_count: 2,
+                prompt_token_ids: vec![10, 11].into(),
+                prompt_logprobs: Some(DecodedPromptLogprobs {
+                    first_token_id: 0,
+                    first_token: "o".to_string(),
+                    scored_positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "p".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "a".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                token_ids: vec![],
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }
+        );
+    }
+}
diff --git a/rust/src/chat/tests/chat.rs b/rust/src/chat/tests/chat.rs
new file mode 100644
index 000000000000..7c423561c855
--- /dev/null
+++ b/rust/src/chat/tests/chat.rs
@@ -0,0 +1,1671 @@
+use std::collections::BTreeSet;
+use std::fmt;
+use std::sync::Arc;
+use std::time::Duration;
+
+use futures::StreamExt as _;
+use tokio::time::timeout;
+use vllm_chat::{
+    AssistantBlockKind, AssistantContentBlock, AssistantMessageExt as _, ChatBackend, ChatEvent,
+    ChatLlm, ChatMessage, ChatRenderer, ChatRequest, ChatRole, ChatTextBackend, ChatTool,
+    ChatToolChoice, DefaultChatOutputProcessor, DynChatOutputProcessor, DynChatRenderer,
+    FinishReason, GenerationPromptMode, NewChatOutputProcessorOptions, ParserSelection,
+    RenderedPrompt, SamplingParams,
+};
+use vllm_engine_core_client::protocol::logprobs::{
+    Logprobs, MaybeWireLogprobs, PositionLogprobs, TokenLogprob,
+};
+use vllm_engine_core_client::protocol::{
+    EngineCoreFinishReason, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest, StopReason,
+};
+use vllm_engine_core_client::test_utils::{IpcNamespace, spawn_mock_engine_task};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig};
+use vllm_llm::Llm;
+use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+use vllm_text::{
+    DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs, DecodedTokenLogprob, Prompt,
+    TextBackend,
+};
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{DealerSocket, PushSocket, ZmqMessage};
+
+const SPECIAL_STOP_TOKEN_ID: u32 = 256;
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn request_output_with_logprobs(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+    new_logprobs: Option<Logprobs>,
+    new_prompt_logprobs_tensors: Option<Logprobs>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: new_logprobs.map(MaybeWireLogprobs::Direct),
+        new_prompt_logprobs_tensors: new_prompt_logprobs_tensors.map(MaybeWireLogprobs::Direct),
+        pooling_output: None,
+        finish_reason,
+        stop_reason,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn sample_logprobs_for_token(token_id: u32, alternate_token_id: u32) -> Logprobs {
+    Logprobs {
+        positions: vec![PositionLogprobs {
+            entries: vec![
+                TokenLogprob {
+                    token_id,
+                    logprob: -0.1,
+                    rank: 1,
+                },
+                TokenLogprob {
+                    token_id: alternate_token_id,
+                    logprob: -0.2,
+                    rank: 2,
+                },
+            ],
+        }],
+    }
+}
+
+fn prompt_logprobs_for_hi() -> Logprobs {
+    Logprobs {
+        positions: vec![PositionLogprobs {
+            entries: vec![
+                TokenLogprob {
+                    token_id: b'i' as u32,
+                    logprob: -0.3,
+                    rank: 1,
+                },
+                TokenLogprob {
+                    token_id: b'!' as u32,
+                    logprob: -0.4,
+                    rank: 2,
+                },
+            ],
+        }],
+    }
+}
+
+fn bytes_to_token_ids(bytes: &[u8]) -> Vec<u32> {
+    bytes.iter().map(|byte| u32::from(*byte)).collect()
+}
+
+fn bytes_with_special_stop_token(bytes: &[u8]) -> Vec<u32> {
+    let mut token_ids = bytes_to_token_ids(bytes);
+    token_ids.push(SPECIAL_STOP_TOKEN_ID);
+    token_ids
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(rmp_serde::to_vec_named(&outputs).unwrap()))
+        .await
+        .unwrap();
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<bytes::Bytes> {
+    dealer.recv().await.unwrap().into_vec()
+}
+
+async fn connect_chat_llm_with_ipc(
+    config: EngineCoreClientConfig,
+    ipc: &IpcNamespace,
+    backend: Arc<dyn ChatTextBackend>,
+) -> ChatLlm {
+    let client = EngineCoreClient::connect(config.with_local_input_output_addresses(
+        Some(ipc.input_endpoint()),
+        Some(ipc.output_endpoint()),
+    ))
+    .await
+    .unwrap();
+    ChatLlm::from_shared_backend(
+        Llm::new(client).with_request_id_randomization(false),
+        backend,
+    )
+}
+
+#[derive(Clone)]
+struct FakeChatBackend {
+    has_template: bool,
+    model_id: String,
+}
+
+#[derive(Debug)]
+struct FakeChatTokenizer;
+
+impl Tokenizer for FakeChatTokenizer {
+    fn encode(&self, text: &str, _add_special_tokens: bool) -> vllm_tokenizer::Result<Vec<u32>> {
+        Ok(text.bytes().map(u32::from).collect())
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        skip_special_tokens: bool,
+    ) -> vllm_tokenizer::Result<String> {
+        let bytes = token_ids
+            .iter()
+            .filter_map(|id| {
+                if skip_special_tokens && *id == SPECIAL_STOP_TOKEN_ID {
+                    None
+                } else {
+                    Some(*id as u8)
+                }
+            })
+            .collect::<Vec<_>>();
+        Ok(String::from_utf8_lossy(&bytes).into_owned())
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        match token {
+            "<think>" => Some(0xF001),
+            "</think>" => Some(0xF002),
+            "<|START_THINKING|>" => Some(0xF003),
+            "<|END_THINKING|>" => Some(0xF004),
+            "◁think▷" => Some(0xF005),
+            "◁/think▷" => Some(0xF006),
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Debug for FakeChatBackend {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FakeChatBackend").finish_non_exhaustive()
+    }
+}
+
+impl FakeChatBackend {
+    fn new() -> Self {
+        Self {
+            has_template: true,
+            model_id: "test-model".to_string(),
+        }
+    }
+
+    fn without_template() -> Self {
+        Self {
+            has_template: false,
+            model_id: "test-model".to_string(),
+        }
+    }
+
+    fn with_model_id(model_id: impl Into<String>) -> Self {
+        Self {
+            has_template: true,
+            model_id: model_id.into(),
+        }
+    }
+}
+
+impl TextBackend for FakeChatBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FakeChatTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        &self.model_id
+    }
+}
+
+impl ChatBackend for FakeChatBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::new(
+            request,
+            &self.model_id,
+            self.tokenizer(),
+            options.tool_call_parser,
+            options.reasoning_parser,
+        )?))
+    }
+}
+
+impl ChatRenderer for FakeChatBackend {
+    fn render(&self, request: &ChatRequest) -> vllm_chat::Result<RenderedPrompt> {
+        if !self.has_template {
+            return Err(vllm_chat::Error::MissingChatTemplate);
+        }
+
+        let mut prompt = String::new();
+        for message in &request.messages {
+            prompt.push_str(message.role().as_str());
+            prompt.push_str(": ");
+            prompt.push_str(&message.text_content()?);
+            prompt.push('\n');
+        }
+        if request.chat_options.add_generation_prompt() {
+            prompt.push_str("assistant:");
+        }
+
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(prompt),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+struct FailingDecodeBackend {
+    inner: FakeChatBackend,
+}
+
+#[derive(Debug)]
+struct FailingDecodeTokenizer;
+
+impl Tokenizer for FailingDecodeTokenizer {
+    fn encode(&self, text: &str, add_special_tokens: bool) -> vllm_tokenizer::Result<Vec<u32>> {
+        FakeChatTokenizer.encode(text, add_special_tokens)
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        skip_special_tokens: bool,
+    ) -> vllm_tokenizer::Result<String> {
+        if token_ids.contains(&(b'i' as u32)) {
+            return Err(vllm_tokenizer::TokenizerError("decode failed".to_string()));
+        }
+        FakeChatTokenizer.decode(token_ids, skip_special_tokens)
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        FakeChatTokenizer.token_to_id(token)
+    }
+}
+
+impl TextBackend for FailingDecodeBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FailingDecodeTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        self.inner.model_id()
+    }
+}
+
+impl ChatBackend for FailingDecodeBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        _request: &mut ChatRequest,
+        _options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::plain_text_only()))
+    }
+}
+
+impl ChatRenderer for FailingDecodeBackend {
+    fn render(&self, request: &ChatRequest) -> vllm_chat::Result<RenderedPrompt> {
+        self.inner.render(request)
+    }
+}
+
+/// Skip `LogprobsDelta` events that carry only token_ids (no logprobs),
+/// returning the next semantically interesting event.
+async fn next_semantic<S>(stream: &mut S) -> Option<Result<ChatEvent, vllm_chat::Error>>
+where
+    S: futures::Stream<Item = Result<ChatEvent, vllm_chat::Error>> + Unpin,
+{
+    loop {
+        match stream.next().await {
+            Some(Ok(ChatEvent::LogprobsDelta { logprobs: None, .. })) => continue,
+            other => return other,
+        }
+    }
+}
+
+fn sample_request(request_id: &str) -> ChatRequest {
+    ChatRequest {
+        messages: vec![
+            ChatMessage::text(ChatRole::System, "You are terse."),
+            ChatMessage::text(ChatRole::User, "Say hi"),
+        ],
+        sampling_params: SamplingParams {
+            max_tokens: Some(8),
+            ..Default::default()
+        },
+        request_id: request_id.to_string(),
+        ..ChatRequest::for_test()
+    }
+}
+
+fn sample_tool_request(request_id: &str) -> ChatRequest {
+    let mut request = sample_request(request_id);
+    request.tools = vec![ChatTool {
+        name: "get_weather".to_string(),
+        description: Some("Get weather".to_string()),
+        parameters: serde_json::json!({
+            "type": "object",
+            "properties": {"city": {"type": "string"}},
+            "required": ["city"],
+        }),
+        strict: None,
+    }];
+    request.tool_choice = ChatToolChoice::Auto;
+    request
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_streams_text_events() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.request_id, "chat-1");
+                // more fields here in the future
+                assert_eq!(
+                    String::from_utf8(
+                        request
+                            .prompt_token_ids
+                            .clone()
+                            .unwrap()
+                            .into_iter()
+                            .map(|id| id as u8)
+                            .collect()
+                    )
+                    .unwrap(),
+                    "system: You are terse.\nuser: Say hi\nassistant:"
+                );
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output("chat-1", vec![b'H' as u32], None, None),
+                            request_output(
+                                "chat-1",
+                                vec![b'i' as u32, b'!' as u32],
+                                Some(EngineCoreFinishReason::Stop),
+                                Some(StopReason::TokenId(b'!' as u32)),
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from(["chat-1".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address).with_model_name("test-model"),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut stream = chat.chat(sample_request("chat-1")).await.unwrap();
+
+    match next_semantic(&mut stream).await.unwrap().unwrap() {
+        ChatEvent::Start {
+            prompt_token_ids,
+            prompt_logprobs: None,
+        } => {
+            assert_eq!(
+                prompt_token_ids.len(),
+                "system: You are terse.\nuser: Say hi\nassistant:".len()
+            );
+            assert!(!prompt_token_ids.is_empty());
+        }
+        other => panic!("expected Start, got {other:?}"),
+    }
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "H".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "i".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 0,
+            block: AssistantContentBlock::Text {
+                text: "Hi".to_string(),
+            },
+        }
+    );
+
+    match next_semantic(&mut stream).await {
+        Some(Ok(ChatEvent::Done {
+            message,
+            output_token_count,
+            finish_reason,
+            ..
+        })) => {
+            assert_eq!(message.text(), "Hi");
+            assert_eq!(output_token_count, 3);
+            assert_eq!(
+                finish_reason,
+                FinishReason::Stop(Some(StopReason::TokenId(b'!' as u32)))
+            );
+        }
+        other => panic!("unexpected final event: {other:?}"),
+    }
+    assert!(next_semantic(&mut stream).await.is_none());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_waits_for_complete_utf8_before_emitting() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-utf8".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output("chat-utf8", bytes_to_token_ids(&[0xe4]), None, None),
+                            request_output(
+                                "chat-utf8",
+                                bytes_to_token_ids(&[0xbd, 0xa0, b'!']),
+                                Some(EngineCoreFinishReason::Stop),
+                                Some(StopReason::TokenId(b'!' as u32)),
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from(["chat-utf8".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut stream = chat.chat(sample_request("chat-utf8")).await.unwrap();
+
+    assert!(matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Start {
+            prompt_logprobs: None,
+            ..
+        }))
+    ));
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "你".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 0,
+            block: AssistantContentBlock::Text {
+                text: "你".to_string(),
+            },
+        }
+    );
+
+    match next_semantic(&mut stream).await {
+        Some(Ok(ChatEvent::Done {
+            message,
+            output_token_count,
+            ..
+        })) => {
+            assert_eq!(message.text(), "你");
+            assert_eq!(output_token_count, 4);
+        }
+        other => panic!("unexpected final event: {other:?}"),
+    }
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_flushes_held_text_on_finish() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-final-flush".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            "chat-final-flush",
+                            bytes_to_token_ids(b"ok st"),
+                            Some(EngineCoreFinishReason::Length),
+                            None,
+                        )],
+                        finished_requests: Some(BTreeSet::from(["chat-final-flush".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut stream = chat.chat(sample_request("chat-final-flush")).await.unwrap();
+
+    assert!(matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Start {
+            prompt_logprobs: None,
+            ..
+        }))
+    ));
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "ok st".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 0,
+            block: AssistantContentBlock::Text {
+                text: "ok st".to_string(),
+            },
+        }
+    );
+
+    match next_semantic(&mut stream).await {
+        Some(Ok(ChatEvent::Done {
+            message,
+            output_token_count,
+            finish_reason,
+            ..
+        })) => {
+            assert_eq!(message.text(), "ok st");
+            assert_eq!(output_token_count, 5);
+            assert_eq!(finish_reason, FinishReason::Length);
+        }
+        other => panic!("unexpected final event: {other:?}"),
+    }
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[test]
+fn chat_request_rejects_conflicting_generation_modes() {
+    let mut request = sample_request("chat-2");
+    request.chat_options.generation_prompt_mode = GenerationPromptMode::ContinueFinalAssistant;
+    let error = request.validate().unwrap_err();
+
+    assert!(matches!(
+        error,
+        vllm_chat::Error::ContinueFinalAssistantWithoutFinalAssistant
+    ));
+}
+
+#[test]
+fn chat_request_accepts_continue_final_assistant_mode_with_final_assistant() {
+    let mut request = sample_request("chat-2b");
+    request.messages = vec![ChatMessage::assistant_text("hello")];
+    request.chat_options.generation_prompt_mode = GenerationPromptMode::ContinueFinalAssistant;
+
+    request.validate().unwrap();
+}
+
+#[test]
+fn backend_requires_a_template() {
+    let request = sample_request("chat-3");
+    let backend = FakeChatBackend::without_template();
+    let error = backend.chat_renderer().render(&request).unwrap_err();
+    assert!(matches!(error, vllm_chat::Error::MissingChatTemplate));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_reports_decode_failure_as_error_event() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-decode-fail".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output("chat-4", vec![b'i' as u32], None, None)],
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FailingDecodeBackend {
+        inner: FakeChatBackend::new(),
+    });
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut stream = chat.chat(sample_request("chat-4")).await.unwrap();
+    assert_eq!(stream.request_id(), "chat-4");
+    assert!(matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Start {
+            prompt_logprobs: None,
+            ..
+        }))
+    ));
+
+    match timeout(Duration::from_secs(2), stream.next()).await.unwrap() {
+        Some(Err(vllm_chat::Error::Text(vllm_text::Error::Tokenizer(message)))) => {
+            assert_eq!(message, "decode failed");
+        }
+        other => panic!("unexpected event after close: {other:?}"),
+    }
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_preserves_terminal_stop_token_when_requested() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-include-stop".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            "chat-include-stop",
+                            vec![b'H' as u32, b'i' as u32, b'!' as u32],
+                            Some(EngineCoreFinishReason::Stop),
+                            Some(StopReason::TokenId(b'!' as u32)),
+                        )],
+                        finished_requests: Some(BTreeSet::from(["chat-include-stop".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut request = sample_request("chat-include-stop");
+    request.decode_options.include_stop_str_in_output = true;
+    let mut stream = chat.chat(request).await.unwrap();
+
+    assert!(matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Start {
+            prompt_logprobs: None,
+            ..
+        }))
+    ));
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "Hi!".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 0,
+            block: AssistantContentBlock::Text {
+                text: "Hi!".to_string(),
+            },
+        }
+    );
+
+    match next_semantic(&mut stream).await {
+        Some(Ok(ChatEvent::Done {
+            message,
+            output_token_count,
+            ..
+        })) => {
+            assert_eq!(message.text(), "Hi!");
+            assert_eq!(output_token_count, 3);
+        }
+        other => panic!("unexpected final event: {other:?}"),
+    }
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_separates_reasoning_blocks_automatically() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-reasoning".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output(
+                                "chat-reasoning",
+                                bytes_to_token_ids(b"<think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning",
+                                bytes_to_token_ids(b"reason "),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning",
+                                bytes_to_token_ids(b"more</think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning",
+                                bytes_to_token_ids(b"answer"),
+                                Some(EngineCoreFinishReason::Length),
+                                None,
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from(["chat-reasoning".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> =
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B"));
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut stream = chat.chat(sample_request("chat-reasoning")).await.unwrap();
+
+    assert!(matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Start {
+            prompt_logprobs: None,
+            ..
+        }))
+    ));
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Reasoning,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Reasoning,
+            delta: "reason ".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Reasoning,
+            delta: "more".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 0,
+            block: AssistantContentBlock::Reasoning {
+                text: "reason more".to_string(),
+            },
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 1,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 1,
+            kind: AssistantBlockKind::Text,
+            delta: "answer".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockEnd {
+            index: 1,
+            block: AssistantContentBlock::Text {
+                text: "answer".to_string(),
+            },
+        }
+    );
+
+    match next_semantic(&mut stream).await {
+        Some(Ok(ChatEvent::Done {
+            message,
+            finish_reason,
+            ..
+        })) => {
+            assert_eq!(message.reasoning().unwrap(), "reason more");
+            assert_eq!(message.text(), "answer");
+            assert_eq!(finish_reason, FinishReason::Length);
+        }
+        other => panic!("unexpected final event: {other:?}"),
+    }
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_collectors_return_structured_message_and_visible_text() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-collect".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            "chat-collect",
+                            bytes_to_token_ids(b"<think>inner</think>outer"),
+                            Some(EngineCoreFinishReason::Length),
+                            None,
+                        )],
+                        finished_requests: Some(BTreeSet::from(["chat-collect".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> =
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B"));
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address.clone()),
+        &ipc,
+        backend.clone(),
+    )
+    .await;
+
+    let message = chat
+        .chat(sample_request("chat-collect"))
+        .await
+        .unwrap()
+        .collect_message()
+        .await
+        .unwrap();
+    assert_eq!(message.message.reasoning().unwrap(), "inner");
+    assert_eq!(message.message.text(), "outer");
+    assert_eq!(message.finish_reason, FinishReason::Length);
+    assert_eq!(
+        message.prompt_token_count,
+        "system: You are terse.\nuser: Say hi\nassistant:".len()
+    );
+    assert_eq!(
+        message.output_token_count,
+        "<think>inner</think>outer".len()
+    );
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_explicitly_disables_reasoning_parser() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-reasoning-disabled".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output(
+                                "chat-reasoning-disabled",
+                                bytes_to_token_ids(b"<think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning-disabled",
+                                bytes_to_token_ids(b"reason "),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning-disabled",
+                                bytes_to_token_ids(b"more</think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-reasoning-disabled",
+                                bytes_to_token_ids(b"answer"),
+                                Some(EngineCoreFinishReason::Length),
+                                None,
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from([
+                            "chat-reasoning-disabled".to_string()
+                        ])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> =
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B"));
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await
+    .with_reasoning_parser(ParserSelection::None);
+
+    let message = chat
+        .chat(sample_request("chat-reasoning-disabled"))
+        .await
+        .unwrap()
+        .collect_message()
+        .await
+        .unwrap();
+    assert_eq!(message.message.reasoning(), None);
+    assert_eq!(message.message.text(), "<think>reason more</think>answer");
+    assert_eq!(message.finish_reason, FinishReason::Length);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_parses_tool_calls_automatically() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-tool".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output(
+                                "chat-tool",
+                                bytes_to_token_ids(b"<think>Need tool.</think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-tool",
+                                bytes_to_token_ids(b"<tool_call>\n{\"name\":\"get_weather\", "),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-tool",
+                                bytes_to_token_ids(
+                                    b"\"arguments\":{\"city\":\"Paris\"}}\n</tool_call>",
+                                ),
+                                Some(EngineCoreFinishReason::Stop),
+                                Some(StopReason::TokenId(SPECIAL_STOP_TOKEN_ID)),
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from(["chat-tool".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> =
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B"));
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+    let mut stream = chat.chat(sample_tool_request("chat-tool")).await.unwrap();
+
+    let mut saw_tool_start = false;
+    let mut saw_tool_args = false;
+    let mut saw_tool_end = false;
+
+    while let Some(event) = stream.next().await {
+        match event.unwrap() {
+            ChatEvent::Start { .. } => {}
+            ChatEvent::LogprobsDelta { .. } => {}
+            ChatEvent::ToolCallStart { name, .. } => {
+                saw_tool_start = true;
+                assert_eq!(name, "get_weather");
+            }
+            ChatEvent::ToolCallArgumentsDelta { delta, .. } => {
+                saw_tool_args = true;
+                assert!(delta.contains("Paris"), "{delta}");
+            }
+            ChatEvent::ToolCallEnd { call, .. } => {
+                saw_tool_end = true;
+                assert_eq!(call.name, "get_weather");
+                assert_eq!(call.arguments, r#"{"city":"Paris"}"#);
+            }
+            ChatEvent::Done {
+                message,
+                finish_reason,
+                ..
+            } => {
+                assert_eq!(
+                    finish_reason,
+                    FinishReason::Stop(Some(StopReason::TokenId(SPECIAL_STOP_TOKEN_ID)))
+                );
+                assert_eq!(message.text(), "");
+                let tool_calls = message.tool_calls().collect::<Vec<_>>();
+                assert_eq!(tool_calls.len(), 1);
+                assert_eq!(tool_calls[0].name, "get_weather");
+                assert_eq!(tool_calls[0].arguments, r#"{"city":"Paris"}"#);
+                break;
+            }
+            ChatEvent::BlockStart { .. }
+            | ChatEvent::BlockDelta { .. }
+            | ChatEvent::BlockEnd { .. } => {}
+        }
+    }
+
+    assert!(saw_tool_start);
+    assert!(saw_tool_args);
+    assert!(saw_tool_end);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_collect_message_preserves_tool_call_arguments_in_final_only_mode() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-final-only-tool".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output(
+                                "chat-final-only-tool",
+                                bytes_to_token_ids(b"<think>Need tool.</think>"),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-final-only-tool",
+                                bytes_to_token_ids(b"<tool_call>\n{\"name\":\"get_weather\", "),
+                                None,
+                                None,
+                            ),
+                            request_output(
+                                "chat-final-only-tool",
+                                bytes_with_special_stop_token(
+                                    b"\"arguments\":{\"city\":\"Paris\"}}\n</tool_call>",
+                                ),
+                                Some(EngineCoreFinishReason::Stop),
+                                Some(StopReason::TokenId(SPECIAL_STOP_TOKEN_ID)),
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from([
+                            "chat-final-only-tool".to_string()
+                        ])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> =
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B"));
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await;
+    let mut request = sample_tool_request("chat-final-only-tool");
+    request.intermediate = false;
+
+    let message = chat.chat(request).await.unwrap().collect_message().await.unwrap();
+
+    assert_eq!(
+        message.finish_reason,
+        FinishReason::Stop(Some(StopReason::TokenId(SPECIAL_STOP_TOKEN_ID)))
+    );
+    assert_eq!(message.message.tool_calls().count(), 1);
+    assert_eq!(
+        message.message.tool_calls().next().unwrap().arguments,
+        r#"{"city":"Paris"}"#
+    );
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_stream_and_collect_preserve_prompt_and_sample_logprobs() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-logprobs".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                for _ in 0..2 {
+                    let add = recv_engine_message(dealer).await;
+                    let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                    send_outputs(
+                        push,
+                        EngineCoreOutputs {
+                            outputs: vec![
+                                request_output_with_logprobs(
+                                    &request.request_id,
+                                    vec![b'H' as u32],
+                                    None,
+                                    None,
+                                    Some(sample_logprobs_for_token(b'H' as u32, b'h' as u32)),
+                                    Some(prompt_logprobs_for_hi()),
+                                ),
+                                request_output_with_logprobs(
+                                    &request.request_id,
+                                    vec![b'i' as u32],
+                                    Some(EngineCoreFinishReason::Length),
+                                    None,
+                                    Some(sample_logprobs_for_token(b'i' as u32, b'I' as u32)),
+                                    None,
+                                ),
+                            ],
+                            finished_requests: Some(BTreeSet::from([request.request_id])),
+                            ..Default::default()
+                        },
+                    )
+                    .await;
+                }
+            })
+        },
+    );
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address.clone()),
+        &ipc,
+        backend,
+    )
+    .await;
+
+    let mut request = sample_request("chat-logprobs");
+    request.sampling_params.logprobs = Some(1);
+    request.sampling_params.prompt_logprobs = Some(1);
+
+    let mut stream = chat.chat(request.clone()).await.unwrap();
+    match next_semantic(&mut stream).await.unwrap().unwrap() {
+        ChatEvent::Start {
+            prompt_token_ids,
+            prompt_logprobs,
+        } => {
+            assert_eq!(
+                prompt_token_ids.len(),
+                "system: You are terse.\nuser: Say hi\nassistant:".len()
+            );
+            assert!(!prompt_token_ids.is_empty());
+            assert_eq!(
+                prompt_logprobs,
+                Some(DecodedPromptLogprobs {
+                    first_token_id: b's' as u32,
+                    first_token: "s".to_string(),
+                    scored_positions: vec![DecodedPositionLogprobs {
+                        entries: vec![
+                            DecodedTokenLogprob {
+                                token_id: b'i' as u32,
+                                token: "i".to_string(),
+                                logprob: -0.3,
+                                rank: 1,
+                            },
+                            DecodedTokenLogprob {
+                                token_id: b'!' as u32,
+                                token: "!".to_string(),
+                                logprob: -0.4,
+                                rank: 1,
+                            },
+                        ],
+                    }],
+                })
+            );
+        }
+        other => panic!("expected Start, got {other:?}"),
+    }
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockStart {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::BlockDelta {
+            index: 0,
+            kind: AssistantBlockKind::Text,
+            delta: "H".to_string(),
+        }
+    );
+    assert_eq!(
+        next_semantic(&mut stream).await.unwrap().unwrap(),
+        ChatEvent::LogprobsDelta {
+            logprobs: Some(DecodedLogprobs {
+                positions: vec![DecodedPositionLogprobs {
+                    entries: vec![
+                        DecodedTokenLogprob {
+                            token_id: b'H' as u32,
+                            token: "H".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        },
+                        DecodedTokenLogprob {
+                            token_id: b'h' as u32,
+                            token: "h".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        },
+                    ],
+                }],
+            }),
+            token_ids: vec![b'H' as u32],
+        }
+    );
+    while !matches!(
+        next_semantic(&mut stream).await,
+        Some(Ok(ChatEvent::Done { .. }))
+    ) {}
+
+    request.request_id = "chat-logprobs-collect".to_string();
+    let collected = chat.chat(request).await.unwrap().collect_message().await.unwrap();
+    assert_eq!(collected.message.text(), "Hi");
+    assert_eq!(
+        collected.prompt_logprobs,
+        Some(DecodedPromptLogprobs {
+            first_token_id: b's' as u32,
+            first_token: "s".to_string(),
+            scored_positions: vec![DecodedPositionLogprobs {
+                entries: vec![
+                    DecodedTokenLogprob {
+                        token_id: b'i' as u32,
+                        token: "i".to_string(),
+                        logprob: -0.3,
+                        rank: 1,
+                    },
+                    DecodedTokenLogprob {
+                        token_id: b'!' as u32,
+                        token: "!".to_string(),
+                        logprob: -0.4,
+                        rank: 1,
+                    },
+                ],
+            }],
+        })
+    );
+    assert_eq!(
+        collected.logprobs,
+        Some(DecodedLogprobs {
+            positions: vec![
+                DecodedPositionLogprobs {
+                    entries: vec![
+                        DecodedTokenLogprob {
+                            token_id: b'H' as u32,
+                            token: "H".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        },
+                        DecodedTokenLogprob {
+                            token_id: b'h' as u32,
+                            token: "h".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        },
+                    ],
+                },
+                DecodedPositionLogprobs {
+                    entries: vec![
+                        DecodedTokenLogprob {
+                            token_id: b'i' as u32,
+                            token: "i".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        },
+                        DecodedTokenLogprob {
+                            token_id: b'I' as u32,
+                            token: "I".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        },
+                    ],
+                },
+            ],
+        })
+    );
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_rejects_unknown_tool_parser_before_engine_request() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-tool-no-model".to_vec();
+    let (shutdown_tx, engine_task) =
+        spawn_mock_engine_task(handshake_address.clone(), engine_id, |dealer, _| {
+            Box::pin(async move {
+                assert!(
+                    timeout(Duration::from_millis(100), recv_engine_message(dealer)).await.is_err(),
+                    "chat request should fail before any engine request is sent"
+                );
+            })
+        });
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await
+    .with_tool_call_parser(ParserSelection::Explicit(
+        "definitely_missing_tool_parser".into(),
+    ));
+    let error = match chat.chat(sample_tool_request("chat-tool-no-model")).await {
+        Ok(_) => panic!("unknown explicit tool parser should fail"),
+        Err(error) => error,
+    };
+
+    assert!(matches!(
+        error,
+        vllm_chat::Error::ParserUnavailableByName { name, .. }
+        if name == "definitely_missing_tool_parser"
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_rejects_unknown_reasoning_parser_before_engine_request() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-reasoning-no-model".to_vec();
+    let (shutdown_tx, engine_task) =
+        spawn_mock_engine_task(handshake_address.clone(), engine_id, |dealer, _| {
+            Box::pin(async move {
+                assert!(
+                    timeout(Duration::from_millis(100), recv_engine_message(dealer)).await.is_err(),
+                    "chat request should fail before any engine request is sent"
+                );
+            })
+        });
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await
+    .with_reasoning_parser(ParserSelection::Explicit(
+        "definitely_missing_reasoning_parser".into(),
+    ));
+    let error = match chat.chat(sample_request("chat-reasoning-no-model")).await {
+        Ok(_) => panic!("unknown explicit reasoning parser should fail"),
+        Err(error) => error,
+    };
+
+    assert!(matches!(
+        error,
+        vllm_chat::Error::ParserUnavailableByName { name, .. }
+        if name == "definitely_missing_reasoning_parser"
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_rejects_tool_requests_when_tool_parser_is_disabled() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-tool-parser-disabled".to_vec();
+    let (shutdown_tx, engine_task) =
+        spawn_mock_engine_task(handshake_address.clone(), engine_id, |dealer, _| {
+            Box::pin(async move {
+                assert!(
+                    timeout(Duration::from_millis(100), recv_engine_message(dealer)).await.is_err(),
+                    "chat request should fail before any engine request is sent"
+                );
+            })
+        });
+
+    let backend: Arc<dyn ChatTextBackend> = Arc::new(FakeChatBackend::new());
+    let chat = connect_chat_llm_with_ipc(
+        EngineCoreClientConfig::new_single(handshake_address),
+        &ipc,
+        backend,
+    )
+    .await
+    .with_tool_call_parser(ParserSelection::None);
+    let error = match chat.chat(sample_tool_request("chat-tool-parser-disabled")).await {
+        Ok(_) => panic!("tool requests should fail when tool parsing is disabled"),
+        Err(error) => error,
+    };
+
+    assert!(matches!(
+        error,
+        vllm_chat::Error::ParserDisabled { kind: "tool" }
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    chat.shutdown().await.unwrap();
+}
diff --git a/rust/src/chat/tests/templates/qwen3.jinja b/rust/src/chat/tests/templates/qwen3.jinja
new file mode 100644
index 000000000000..9769e5cfde9c
--- /dev/null
+++ b/rust/src/chat/tests/templates/qwen3.jinja
@@ -0,0 +1,89 @@
+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/qwen35.jinja b/rust/src/chat/tests/templates/qwen35.jinja
new file mode 100644
index 000000000000..ae8ae3649216
--- /dev/null
+++ b/rust/src/chat/tests/templates/qwen35.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+    {%- if content is string %}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping %}
+        {%- for item in content %}
+            {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain images.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set image_count.value = image_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Picture ' ~ image_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+            {%- elif 'video' in item or item.type == 'video' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain videos.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set video_count.value = video_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Video ' ~ video_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+            {%- elif 'text' in item %}
+                {{- item.text }}
+            {%- else %}
+                {{- raise_exception('Unexpected item type in content.') }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif content is none or content is undefined %}
+        {{- '' }}
+    {%- else %}
+        {{- raise_exception('Unexpected content type.') }}
+    {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+    {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+    {{- '<|im_start|>system\n' }}
+    {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {%- if content %}
+            {{- '\n\n' + content }}
+        {%- endif %}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" %}
+        {%- set content = render_content(message.content, false)|trim %}
+        {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
+            {%- set ns.multi_step_tool = false %}
+            {%- set ns.last_query_index = index %}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+    {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content, true)|trim %}
+    {%- if message.role == "system" %}
+        {%- if not loop.first %}
+            {{- raise_exception('System message must be at the beginning.') }}
+        {%- endif %}
+    {%- elif message.role == "user" %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- set reasoning_content = reasoning_content|trim %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {%- if loop.first %}
+                    {%- if content|trim %}
+                        {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- else %}
+                        {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- endif %}
+                {%- else %}
+                    {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- endif %}
+                {%- if tool_call.arguments is defined %}
+                    {%- for args_name, args_value in tool_call.arguments|items %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- raise_exception('Unexpected message role.') }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is true %}
+        {{- '<think>\n' }}
+    {%- else %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/README.md b/rust/src/chat/tests/templates/vllm_examples/README.md
new file mode 100644
index 000000000000..7d8a9150f107
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/README.md
@@ -0,0 +1,6 @@
+# vLLM Example Templates
+
+These fixtures are copied from `vllm/examples/`.
+
+They are currently used by `src/chat/src/renderers/hf/format.rs` tests to keep
+our chat-template content format detection aligned with Python vLLM behavior.
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_alpaca.jinja b/rust/src/chat/tests/templates/vllm_examples/template_alpaca.jinja
new file mode 100644
index 000000000000..60667acc3ef9
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_alpaca.jinja
@@ -0,0 +1,29 @@
+{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
+
+{% for message in messages %}
+{% if message['role'] == 'user' %}
+### Instruction:
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+
+{% endif %}
+{% elif message['role'] == 'assistant' %}
+### Response:
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+
+{% endif %}
+{% elif message['role'] == 'user_context' %}
+### Input:
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+
+{% endif %}
+{% endif %}
+{% endfor %}
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
+### Response:
+{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_baichuan.jinja b/rust/src/chat/tests/templates/vllm_examples/template_baichuan.jinja
new file mode 100644
index 000000000000..42a8d9270a4c
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_baichuan.jinja
@@ -0,0 +1,13 @@
+{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
+
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '<reserved_106>' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- '<reserved_107>' + message['content'] -}}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '<reserved_107>' -}}
+{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_chatglm.jinja b/rust/src/chat/tests/templates/vllm_examples/template_chatglm.jinja
new file mode 100644
index 000000000000..bf26f27274ef
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_chatglm.jinja
@@ -0,0 +1,18 @@
+{%- set counter = namespace(index=0) -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n答：' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_chatglm2.jinja b/rust/src/chat/tests/templates/vllm_examples/template_chatglm2.jinja
new file mode 100644
index 000000000000..c155b7c23f64
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_chatglm2.jinja
@@ -0,0 +1,18 @@
+{%- set counter = namespace(index=1) -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n\n答：' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_chatml.jinja b/rust/src/chat/tests/templates/vllm_examples/template_chatml.jinja
new file mode 100644
index 000000000000..4844e681e1b6
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_chatml.jinja
@@ -0,0 +1,2 @@
+{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_falcon.jinja b/rust/src/chat/tests/templates/vllm_examples/template_falcon.jinja
new file mode 100644
index 000000000000..01cf0e2670d0
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_falcon.jinja
@@ -0,0 +1,15 @@
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- 'User: ' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- 'Assistant: ' + message['content'] -}}
+    {%- endif -%}
+    {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+        {{- '\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- 'Assistant:' -}}
+{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_falcon_180b.jinja b/rust/src/chat/tests/templates/vllm_examples/template_falcon_180b.jinja
new file mode 100644
index 000000000000..f08f7395b7fd
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_falcon_180b.jinja
@@ -0,0 +1,17 @@
+{%- for message in messages -%}
+    {%- if message['role'] == 'system' -%}
+        {{- 'System: ' + message['content'] -}}
+    {%- elif message['role'] == 'user' -%}
+        {{- 'User: ' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- 'Falcon: ' + message['content'] -}}
+    {%- endif -%}
+    {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+        {{- '\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- 'Falcon:' -}}
+{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_inkbot.jinja b/rust/src/chat/tests/templates/vllm_examples/template_inkbot.jinja
new file mode 100644
index 000000000000..33a817454df3
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_inkbot.jinja
@@ -0,0 +1,30 @@
+<#meta#>
+- Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
+- Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}
+<#system#>
+{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
+<#chat#>
+{% for message in messages %}
+{% if message['role'] == 'user' %}
+<#user#>
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+{% endif %}
+{% elif message['role'] == 'assistant' %}
+<#bot#>
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+{% endif %}
+{% elif message['role'] == 'user_context' %}
+<#user_context#>
+{{ message['content']|trim -}}
+{% if not loop.last %}
+
+{% endif %}
+{% endif %}
+{% endfor %}
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
+<#bot#>
+{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/template_teleflm.jinja b/rust/src/chat/tests/templates/vllm_examples/template_teleflm.jinja
new file mode 100644
index 000000000000..0cb29ccbb841
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/template_teleflm.jinja
@@ -0,0 +1,12 @@
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- '<_user>' + message['content']|trim }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<_system>' + message['content']|trim }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '<_bot>' + message['content'] }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<_bot>' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekr1.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekr1.jinja
new file mode 100644
index 000000000000..908574be9df5
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekr1.jinja
@@ -0,0 +1,92 @@
+{% if not add_generation_prompt is defined %}
+    {% set add_generation_prompt = false %}
+{% endif %}
+{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
+{%- for message in messages %}
+    {%- if message['role'] == 'system' %}
+        {%- if ns.is_first_sp %}
+            {% set ns.system_prompt = ns.system_prompt + message['content'] %}
+            {% set ns.is_first_sp = false %}
+        {%- else %}
+            {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
+        {%- endif %}
+    {%- endif %}
+{%- endfor -%}
+
+{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
+{% if tools is defined and tools is not none %}
+    {% set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. '
+        'When a tool call is needed, you MUST use the following format to issue the call:\n'
+        '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>FUNCTION_NAME\n'
+        '```json\n{"param1": "value1", "param2": "value2"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>\n\n'
+        'Make sure the JSON is valid.'
+        '## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
+    {% for tool in tools %}
+        {% set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
+    {% endfor %}
+    {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
+{% endif %}
+
+{{- bos_token }}
+{{- ns.system_prompt }}
+{%- for message in messages %}
+    {% set content = message['content'] %}
+    {%- if message['role'] == 'user' %}
+        {%- set ns.is_tool = false -%}
+        {%- set ns.is_first = false -%}
+        {%- set ns.is_last_user = true -%}
+        {{'<｜User｜>' + content + '<｜Assistant｜>'}}
+    {%- endif %}
+    {%- if message['role'] == 'assistant' %}
+        {% if '</think>' in content %}
+            {% set content = content.split('</think>')[-1] %}
+        {% endif %}
+    {% endif %}
+    {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
+        {%- set ns.is_last_user = false -%}
+        {%- if ns.is_tool %}
+            {{- '<｜tool▁outputs▁end｜>'}}
+        {%- endif %}
+        {%- set ns.is_first = false %}
+        {%- set ns.is_tool = false -%}
+        {%- set ns.is_output_first = true %}
+        {%- for tool in message['tool_calls'] %}
+            {%- if not ns.is_first %}
+                {%- if content is none %}
+                    {{- '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                {%- else %}
+                    {{- content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                {%- endif %}
+                {%- set ns.is_first = true -%}
+            {%- else %}
+                {{- '\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+            {%- endif %}
+        {%- endfor %}
+        {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+    {%- endif %}
+    {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
+        {%- set ns.is_last_user = false -%}
+        {%- if ns.is_tool %}
+            {{- '<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}
+            {%- set ns.is_tool = false -%}
+        {%- else %}
+            {{- content + '<｜end▁of▁sentence｜>'}}
+        {%- endif %}
+    {%- endif %}
+    {%- if message['role'] == 'tool' %}
+        {%- set ns.is_last_user = false -%}
+        {%- set ns.is_tool = true -%}
+        {%- if ns.is_output_first %}
+            {{- '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
+            {%- set ns.is_output_first = false %}
+        {%- else %}
+            {{- '\n<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
+        {%- endif %}
+    {%- endif %}
+{%- endfor -%}
+{% if ns.is_tool %}
+    {{- '<｜tool▁outputs▁end｜>'}}
+{%- endif %}
+{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
+    {{- '<｜Assistant｜>'}}
+{%- endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv3.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv3.jinja
new file mode 100644
index 000000000000..36f3781439ed
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv3.jinja
@@ -0,0 +1,96 @@
+{% if not add_generation_prompt is defined %}
+    {% set add_generation_prompt = false %}
+{% endif %}
+
+{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
+
+{%- for message in messages %}
+    {%- if message['role'] == 'system' %}
+        {%- if ns.is_first_sp %}
+            {% set ns.system_prompt = ns.system_prompt + message['content'] %}
+            {% set ns.is_first_sp = false %}
+        {%- else %}
+            {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+
+{{ bos_token }}
+{{ ns.system_prompt }}
+{%- if tools %}
+    {{"\n\n# Tools\n\nYou may call one or more functions to assist with the user query." }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{"\n</tools>\n\n"}}
+
+    {{"For function call returns, you should first print <｜tool▁calls▁begin｜>"}}
+
+    {{"For each function call, you should return object like:\n" }}
+    {{"<｜tool▁call▁begin｜>function<｜tool▁sep｜><function_name>\n```json\n<function_arguments_in_json_format>\n```<｜tool▁call▁end｜>"}}
+
+    {{"At the end of function call returns, you should print <｜tool▁calls▁end｜><｜end▁of▁sentence｜>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {%- set ns.is_tool = false -%}
+        {%- set ns.is_first = false -%}
+        {%- set ns.is_last_user = true -%}
+        {{'<｜User｜>' + message['content'] + '<｜Assistant｜>'}}
+    {%- endif %}
+
+    {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
+        {%- set ns.is_last_user = false -%}
+        {%- if ns.is_tool %}
+            {{'<｜tool▁outputs▁end｜>'}}
+        {%- endif %}
+        {%- set ns.is_first = false %}
+        {%- set ns.is_tool = false -%}
+        {%- set ns.is_output_first = true %}
+        
+        {%- for tool in message['tool_calls'] %}
+            {%- if not ns.is_first %}
+                {%- if message['content'] is none %}
+                    {{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                {%- else %}
+                    {{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                {%- endif %}
+            {%- set ns.is_first = true -%}
+            {%- else %}
+                {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+            {%- endif %}
+        {%- endfor %}
+        {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+    {%- endif %}
+    {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
+        {%- set ns.is_last_user = false -%}
+        {%- if ns.is_tool %}
+            {{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+            {%- set ns.is_tool = false -%}
+        {%- else %}
+            {% set content = message['content'] %}
+            {{content + '<｜end▁of▁sentence｜>'}}
+        {%- endif %}
+    {%- endif %}
+
+    {%- if message['role'] == 'tool' %}
+        {%- set ns.is_last_user = false -%}
+        {%- set ns.is_tool = true -%}
+        {%- if ns.is_output_first %}
+            {{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+            {%- set ns.is_output_first = false %}
+        {%- else %}
+            {{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+        {%- endif %}
+    {%- endif %}
+{%- endfor -%}
+
+{% if ns.is_tool %}
+    {{'<｜tool▁outputs▁end｜>'}}
+{% endif %}
+
+{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
+    {{'<｜Assistant｜>'}}
+{% endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv31.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv31.jinja
new file mode 100644
index 000000000000..863be69d60b6
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_deepseekv31.jinja
@@ -0,0 +1,91 @@
+{% if not add_generation_prompt is defined %}
+  {% set add_generation_prompt = false %}
+{% endif %}
+{% if not thinking is defined %}
+  {% set thinking = false %}
+{% endif %}
+{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}
+{%- for message in messages %}
+  {%- if message['role'] == 'system' %}
+    {%- if ns.is_first_sp %}
+      {% set ns.system_prompt = ns.system_prompt + message['content'] %}
+      {% set ns.is_first_sp = false %}
+    {%- else %}
+      {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
+    {%- endif %}
+  {%- endif %}
+{%- endfor %}
+
+{% if tools is defined and tools is not none %}
+  {% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
+  {% for tool in tools %}
+    {% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
+  {% endfor %}
+  {% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>tool_call_name<｜tool▁sep｜>tool_call_arguments<｜tool▁call▁end｜>{{additional_tool_calls}}<｜tool▁calls▁end｜>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
+  {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
+{% endif %}
+
+{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages %}
+  {%- if message['role'] == 'user' %}
+    {%- set ns.is_tool = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_last_user = true -%}
+    {{'<｜User｜>' + message['content']}}
+  {%- endif %}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
+    {%- if ns.is_last_user %}
+      {{'<｜Assistant｜></think>'}}
+    {%- endif %}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_first = false %}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls'] %}
+      {%- if not ns.is_first %}
+        {%- if message['content'] is none %}
+          {{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+        {%- else %}
+          {{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+        {%- endif %}
+        {%- set ns.is_first = true -%}
+      {%- else %}
+        {{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+      {%- endif %}
+    {%- endfor %}
+    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif %}
+  {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
+    {%- if ns.is_last_user %}
+      {{'<｜Assistant｜>'}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking %}
+        {{'<think>'}}  
+      {%- else %}
+        {{'</think>'}}
+      {%- endif %}
+    {%- endif %}
+    {%- set ns.is_last_user = false -%}
+    {%- if ns.is_tool %}
+      {{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+    {%- else %}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content %}
+        {%- set content = content.split('</think>', 1)[1] -%}
+      {%- endif %}
+      {{content + '<｜end▁of▁sentence｜>'}}
+    {%- endif %}
+  {%- endif %}
+  {%- if message['role'] == 'tool' %}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_tool = true -%}
+    {{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+  {%- endif %}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
+  {{'<｜Assistant｜>'}}
+  {%- if not thinking %}
+    {{'</think>'}}
+  {%- else %}
+    {{'<think>'}}
+  {%- endif %}
+{% endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_functiongemma.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_functiongemma.jinja
new file mode 100644
index 000000000000..63b5d336a76b
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_functiongemma.jinja
@@ -0,0 +1,54 @@
+{%- set ns = namespace(developer_content='', has_tools=false) -%}
+
+{%- if tools is defined and tools | length > 0 -%}
+    {%- set ns.has_tools = true -%}
+{%- endif -%}
+
+{%- for message in messages -%}
+    {%- if message.role == 'developer' or message.role == 'system' -%}
+<start_of_turn>user
+{{ message.content }}
+{%- if ns.has_tools %}
+
+Available functions:
+{%- for tool in tools %}
+{%- if tool.type == 'function' %}
+
+Function: {{ tool.function.name }}
+Description: {{ tool.function.description | default('No description provided') }}
+Parameters: {{ tool.function.parameters | tojson }}
+{%- endif %}
+{%- endfor %}
+{%- endif %}
+<end_of_turn>
+    {%- elif message.role == 'user' -%}
+<start_of_turn>user
+{{ message.content }}<end_of_turn>
+    {%- elif message.role == 'assistant' -%}
+        {%- if message.tool_calls is defined and message.tool_calls | length > 0 -%}
+<start_of_turn>model
+{%- for tool_call in message.tool_calls %}
+<start_function_call>call:{{ tool_call.function.name }}{
+{%- set args = tool_call.function.arguments -%}
+{%- if args is string -%}
+{%- set args = args | fromjson -%}
+{%- endif -%}
+{%- for key, value in args.items() -%}
+{{ key }}:<escape>{{ value }}<escape>{% if not loop.last %},{% endif %}
+{%- endfor -%}
+}<end_function_call>
+{%- endfor %}
+<end_of_turn>
+        {%- else -%}
+<start_of_turn>model
+{{ message.content }}<end_of_turn>
+        {%- endif -%}
+    {%- elif message.role == 'tool' -%}
+<start_of_turn>user
+Function result for {{ message.name | default('function') }}: {{ message.content }}<end_of_turn>
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+<start_of_turn>model
+{%- endif -%}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma3_pythonic.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma3_pythonic.jinja
new file mode 100644
index 000000000000..5a20b0191129
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma3_pythonic.jinja
@@ -0,0 +1,123 @@
+{#- Begin-of-sequence token to start the model prompt -#}
+{{ bos_token }}
+{#- Extracts the system message. Gemma does not support system messages so it will be prepended to first user message. -#}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{#- Set tools to none if not defined for this ChatCompletion request (helps avoid errors later) -#}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- Validate alternating user/assistant messages (excluding 'tool' messages and ones with tool_calls) -#}
+{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | selectattr("tool_calls", "undefined") -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+{%- endfor -%}
+
+{#- Main loop over all messages in the conversation history -#}
+{%- for message in loop_messages -%}
+    {#- Normalize roles for model prompt formatting -#}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- elif (message['role'] == 'tool') -%}
+        {%- set role = "user" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {#- Mark the start of a message block with the appropriate role -#}
+    {{ '<start_of_turn>' + role + '\n' -}}
+
+    {#- Insert system message content (if present) at the beginning of the first message. -#}
+    {%- if loop.first -%}
+        {{ first_user_prefix }}
+        {#- Append system message with tool information if using tools in message request. -#}
+        {%- if tools is not none -%}
+            {{- "Tools (functions) are available. If you decide to invoke one or more of the tools, you must respond with a python list of the function calls.\n" -}}
+            {{- "Example Format: [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] \n" -}}
+            {{- "Do not use variables. DO NOT USE MARKDOWN SYNTAX. You SHOULD NOT include any other text in the response if you call a function. If none of the functions can be used, point it out. If you lack the parameters required by the function, also point it out.\n" -}}
+            {{- "Here is a list of functions in JSON format that you can invoke.\n" -}}
+            {{- tools | tojson(indent=4) -}}
+            {{- "\n\n" -}}
+        {%- endif -%}
+    {%- endif -%}
+
+    {#- Format model tool calls (turns where model indicates they want to call a tool) -#}
+    {%- if 'tool_calls' in message -%}
+        {#- Opening bracket for tool call list. -#}
+        {{- '[' -}}
+        {#- For each tool call -#}
+        {%- for tool_call in message.tool_calls -%}
+            {#- Get tool call function. -#}
+            {%- if tool_call.function is defined -%}
+                {%- set tool_call = tool_call.function -%}
+            {%- endif -%}
+            {#- Function name & opening parenthesis. -#}
+            {{- tool_call.name + '(' -}}
+
+            {#-- Handle arguments as list (positional) or dict (named) --#}
+            {#-- Named arguments (dict) --#}
+            {%- if tool_call.arguments is iterable and tool_call.arguments is mapping -%}
+                {%- set first = true -%}
+                {%- for key, val in tool_call.arguments.items() -%}
+                    {%- if not first %}, {% endif -%}
+                    {{ key }}={{ val | tojson }}
+                    {%- set first = false -%}
+                {%- endfor -%}
+            {#-- Positional arguments (list) --#}
+            {%- elif tool_call.arguments is iterable -%}
+                {{- tool_call.arguments | map('tojson') | join(', ') -}}
+            {#-- Fallback: single positional value --#}
+            {%- else -%}
+                {{- tool_call.arguments | tojson -}}
+            {#-- Closing parenthesis. --#}
+            {%- endif -%}
+                {{- ')' -}}
+            {#-- If more than one tool call, place comma and move to formatting next tool call --#}
+            {%- if not loop.last -%}, {% endif -%}
+        {%- endfor -%}
+        {#- Closing bracket for tool call list. -#}
+        {{- ']' -}}
+    {%- endif -%}
+    
+    {#- Tool response start tag (for messages from a tool) -#}
+    {%- if (message['role'] == 'tool') -%}
+        {{ '<tool_response>\n' -}}
+    {%- endif -%}
+
+    {#- Render the message content: handle plain string or multimodal content like image/text -#}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+
+    {#- Tool response end tag -#}
+    {%- if (message['role'] == 'tool') -%}
+        {{ '</tool_response>' -}}
+    {%- endif -%}
+
+    {#- Mark end of a single turn -#}
+    {{ '<end_of_turn>\n' }}
+{%- endfor -%}
+
+{#- If generation is to be triggered, add model prompt prefix -#}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model\n'}}
+{%- endif -%}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma4.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma4.jinja
new file mode 100644
index 000000000000..15c5238ac332
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_gemma4.jinja
@@ -0,0 +1,331 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'OBJECT' -%}
+                ,properties:{
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                {%- elif value is mapping -%}
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                {%- endif -%}
+                }
+                {%- if value['required'] -%}
+                    ,required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    ,items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+
+{%- macro format_tool_response_block(tool_name, response) -%}
+    {{- '<|tool_response>' -}}
+    {%- if response is mapping -%}
+        {{- 'response:' + tool_name + '{' -}}
+        {%- for key, value in response | dictsort -%}
+            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- else -%}
+        {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
+    {%- endif -%}
+    {{- '<tool_response|>' -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{ bos_token }}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {{- messages[0]['content'] | trim -}}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+
+    {{- '<turn|>\n' -}}
+{%- endif %}
+
+{%- set ns_turn = namespace(last_user_idx=-1) -%}
+{%- for i in range(loop_messages | length) -%}
+    {%- if loop_messages[i]['role'] == 'user' -%}
+        {%- set ns_turn.last_user_idx = i -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- for message in loop_messages -%}
+    {%- if message['role'] != 'tool' -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+    {#- OpenAI may emit multiple assistant messages in one tool loop (user → asst → tool → asst → tool).
+        Only the first of those should open <|turn>model; later ones continue the same model turn. -#}
+    {%- set prev_nt = namespace(role=None, found=false) -%}
+    {%- if loop.index0 > 0 -%}
+        {%- for j in range(loop.index0 - 1, -1, -1) -%}
+            {%- if not prev_nt.found -%}
+                {%- if loop_messages[j]['role'] != 'tool' -%}
+                    {%- set prev_nt.role = loop_messages[j]['role'] -%}
+                    {%- set prev_nt.found = true -%}
+                {%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
+    {%- if not continue_same_model_turn -%}
+        {{- '<|turn>' + role + '\n' }}
+    {%- endif -%}
+
+    {%- if message.get('reasoning') and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
+        {{- '<|channel>thought\n' + message['reasoning'] + '\n<channel|>'}}
+    {%- endif -%}
+
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+
+            {%- set ns_tr_out = namespace(flag=false) -%}
+            {%- if message.get('tool_responses') -%}
+                {#- Legacy: tool_responses embedded on the assistant message -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
+                    {%- set ns_tr_out.flag = true -%}
+                    {%- set ns.prev_message_type = 'tool_response' -%}
+                {%- endfor -%}
+            {%- elif message.get('tool_calls') -%}
+                {#- OpenAI Chat Completions: consecutive following messages with role "tool" (no break/continue; range scan) -#}
+                {%- set ns_tool_scan = namespace(stopped=false) -%}
+                {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
+                    {%- if ns_tool_scan.stopped -%}
+                    {%- elif loop_messages[k]['role'] != 'tool' -%}
+                        {%- set ns_tool_scan.stopped = true -%}
+                    {%- else -%}
+                        {%- set follow = loop_messages[k] -%}
+                        {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
+                        {%- for tc in message['tool_calls'] -%}
+                            {%- if tc.get('id') == follow.get('tool_call_id') -%}
+                                {%- set ns_tname.name = tc['function']['name'] -%}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {%- set tool_body = follow.get('content') -%}
+                        {%- if tool_body is string -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- elif tool_body is sequence and tool_body is not string -%}
+                            {%- set ns_txt = namespace(s='') -%}
+                            {%- for part in tool_body -%}
+                                {%- if part.get('type') == 'text' -%}
+                                    {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
+                                {%- endif -%}
+                            {%- endfor -%}
+                            {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
+                        {%- else -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- endif -%}
+                        {%- set ns_tr_out.flag = true -%}
+                        {%- set ns.prev_message_type = 'tool_response' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '\n\n<|image|>\n\n' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '\n\n<|video|>\n\n' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+        {%- if not (ns_tr_out.flag and not message.get('content')) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' -%}
+        {{- '<|turn>model\n' -}}
+    {%- endif -%}
+    {%- if not enable_thinking | default(false) -%}
+        {{- '<|channel>thought\n<channel|>' -}}
+    {%- endif -%}
+{%- endif -%}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_glm4.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_glm4.jinja
new file mode 100644
index 000000000000..11f76b4d4af4
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_glm4.jinja
@@ -0,0 +1,54 @@
+{%- set counter = namespace(index=0) -%}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{%- if messages and messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant." %}
+{%- endif %}
+
+{%- if tools is not none %}
+    {%- set tool_instruction %}
+You have access to the following tools. When you need to call a tool, you MUST use the following format:
+
+<tool_call>function_name
+<arg_key>parameter_name</arg_key>
+<arg_value>parameter_value</arg_value>
+</tool_call>
+
+Important rules:
+- Always wrap tool calls with <tool_call>...</tool_call> tags
+- Put the function name on the first line after <tool_call>
+- Use <arg_key> and <arg_value> tags for each parameter
+- If a parameter value is a string, keep it as-is. If it's a number or boolean, convert it appropriately
+- You can make multiple tool calls if needed
+- If no tool is suitable, respond with regular text
+
+Available tools:
+{% endset %}
+    {{- tool_instruction + "\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n答：' -}}
+{%- endif -%}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite.jinja
new file mode 100644
index 000000000000..467dcb2d1023
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite.jinja
@@ -0,0 +1,36 @@
+{%- if tools %}
+    {{- '<|start_of_role|>available_tools<|end_of_role|>
+' }}
+    {%- for tool in tools %}
+    {{- tool | tojson(indent=4) }}
+    {%- if not loop.last %}
+        {{- '
+
+' }}
+    {%- endif %}
+    {%- endfor %}
+    {{- '<|end_of_text|>
+' }}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if message['role'] == 'system' %}
+    {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+    {%- elif message['role'] == 'user' %}
+    {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+    {%- elif message['role'] == 'assistant_tool_call' or (message['role'] == 'assistant' and message.tool_calls is defined) %}
+    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message.tool_calls|map(attribute='function')|list|tojson(indent=4) + '<|end_of_text|>
+' }}
+    {%- elif message['role'] == 'assistant' %}
+    {{- '<|start_of_role|>assistant<|end_of_role|>'  + message['content'] + '<|end_of_text|>
+' }}
+    {%- elif message['role'] == 'tool_response' or  message['role'] == 'tool' %}
+    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>
+' }}
+    {%- endif %}
+    {%- if loop.last and add_generation_prompt %}
+    {{- '<|start_of_role|>assistant<|end_of_role|>' }}
+    {%- endif %}
+{%- endfor %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite_20b_fc.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite_20b_fc.jinja
new file mode 100644
index 000000000000..cb52188ec72d
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_granite_20b_fc.jinja
@@ -0,0 +1,130 @@
+{%- macro json_to_python_type(json_spec) %}
+    {%- set basic_type_map = {
+    "string": "str",
+    "number": "float",
+    "integer": "int",
+    "boolean": "bool"
+} %}
+
+    {%- if basic_type_map[json_spec.type] is defined %}
+        {{- basic_type_map[json_spec.type] }}
+    {%- elif json_spec.type == "array" %}
+        {{- "list[" +  json_to_python_type(json_spec|items) + "]" }}
+    {%- elif json_spec.type == "object" %}
+        {%- if json_spec.additionalProperties is defined %}
+            {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }}
+        {%- else %}
+            {{- "dict" }}
+        {%- endif %}
+    {%- elif json_spec.type is iterable %}
+        {{- "Union[" }}
+        {%- for t in json_spec.type %}
+            {{- json_to_python_type({"type": t}) }}
+            {%- if not loop.last %}
+                {{- "," }}
+            {%- endif %}
+        {%- endfor %}
+        {{- "]" }}
+    {%- else %}
+        {{- "Any" }}
+    {%- endif %}
+{%- endmacro %}
+
+{%- if not full_function_description is defined %}
+    {%- set full_function_description = false %}
+{%- endif %}
+
+{%- macro full_description(tool) %}
+    {{- tool.name + '(' }}
+    {%- if tool.parameters is defined %}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {{- param_name + ": " + json_to_python_type(param_fields) }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+    {{- ")" }}
+    {%- if tool.return is defined %}
+        {{- " -> " + json_to_python_type(tool.return) }}
+    {%- endif %}
+    {{- " - " + tool.description + "\n\n" }}
+    {%- if tool.parameters is defined %}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {%- if loop.first %}
+                {{- "    Args:\n" }}
+            {%- endif %}
+            {{- "        " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
+        {%- endfor %}
+    {%- endif %}
+    {%- if tool.return is defined and tool.return.description is defined %}
+        {{- "\n    Returns:\n        " + tool.return.description }}
+    {%- endif %}
+    {{- '"' }}
+{%- endmacro %}
+
+{%- macro simple_description(tool) %}
+    {{- tool.description }}
+{%- endmacro %}
+
+{%- macro function_description(tool) %}
+    {%- if full_function_description %}
+        {{- full_description(tool) }}
+    {%- else %}
+        {{- simple_description(tool) }}
+    {%- endif %}
+{%- endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+    {%- set sys_prompt = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+    {% set sys_prompt = 'You are a helpful assistant with access to the following function calls. Your task is to understand the given conversation with function calls and responses and generate natural language response as the ASSISTANT to continue the conversation. You may use the following function calls to understand how to respond to the user query.' %}
+{%- endif %}
+
+{{ 'SYSTEM: ' + sys_prompt }}
+{% if tools is iterable and tools | length > 0 %}
+<|function_call_library|>
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- '{"name": "' + tool.name + '", ' }}
+        {{- '"description": "' + function_description(tool) }}
+        {{- ', "parameters": ' }}
+        {%- if not tool.parameters is defined or tool.parameters.properties | length == 0 %}
+            {{- "{}" }}
+        {%- else %}
+            {{- tool.parameters|tojson }}
+        {%- endif %}
+        {{- "}" }}
+        {%- if not loop.last %}
+            {{- "\n" }}
+        {%- endif %}
+    {%- endfor %}
+If none of the functions are relevant or the given question lacks the parameters required by the function, please output \"<function_call> {\"name\": \"no_function\", \"arguments\": {}}\".
+{%- endif %}
+
+
+
+{% for message in messages %}
+    {% if message['role'] == 'user' %}
+        {{- '\nUSER: ' + message['content'] }}
+    {% elif message['role'] == 'assistant' and message.tool_calls is defined %}
+        {{- '\nASSISTANT:'  }}
+        {% for tc in message.tool_calls %}
+            {{- '<function_call> ' + {'name': tc.function.name, 'arguments': tc.function.arguments}|tojson  }}
+        {% endfor %}
+        {{- '<|endoftext|>'  }}
+    {% elif message['role'] == 'assistant' %}
+        {{- '\nASSISTANT: ' + message['content'] + ' <|endoftext|>'  }}
+    {% elif message['role'] == 'tool' %}
+        {{- '<function_response> ' + message['content'] }}
+    {%- else %}
+        {{- raise_exception("Unexpected combination of role and message content") }}
+    {% endif %}
+    {% if loop.last and add_generation_prompt %}
+        {{- '\nASSISTANT: ' }}
+    {% endif %}
+{% endfor %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hermes.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hermes.jinja
new file mode 100644
index 000000000000..0b0902c8e749
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hermes.jinja
@@ -0,0 +1,130 @@
+{%- macro json_to_python_type(json_spec) %}
+    {%- set basic_type_map = {
+    "string": "str",
+    "number": "float",
+    "integer": "int",
+    "boolean": "bool"
+} %}
+
+    {%- if basic_type_map[json_spec.type] is defined %}
+        {{- basic_type_map[json_spec.type] }}
+    {%- elif json_spec.type == "array" %}
+        {{- "list[" +  json_to_python_type(json_spec|items) + "]" }}
+    {%- elif json_spec.type == "object" %}
+        {%- if json_spec.additionalProperties is defined %}
+            {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }}
+        {%- else %}
+            {{- "dict" }}
+        {%- endif %}
+    {%- elif json_spec.type is iterable %}
+        {{- "Union[" }}
+        {%- for t in json_spec.type %}
+            {{- json_to_python_type({"type": t}) }}
+            {%- if not loop.last %}
+                {{- "," }}
+            {%- endif %}
+        {%- endfor %}
+        {{- "]" }}
+    {%- else %}
+        {{- "Any" }}
+    {%- endif %}
+{%- endmacro %}
+
+
+{{- bos_token }}
+{{- "<|im_start|>system\nYou are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }}
+{%- if tools is iterable and tools | length > 0 %}
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- '{"type": "function", "function": ' }}
+        {{- '{"name": "' + tool.name + '", ' }}
+        {{- '"description": "' + tool.name + '(' }}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {{- param_name + ": " + json_to_python_type(param_fields) }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ")" }}
+        {%- if tool.return is defined %}
+            {{- " -> " + json_to_python_type(tool.return) }}
+        {%- endif %}
+        {{- " - " + tool.description + "\n\n" }}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {%- if loop.first %}
+                {{- "    Args:\n" }}
+            {%- endif %}
+            {{- "        " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
+        {%- endfor %}
+        {%- if tool.return is defined and tool.return.description is defined %}
+            {{- "\n    Returns:\n        " + tool.return.description }}
+        {%- endif %}
+        {{- '"' }}
+        {{- ', "parameters": ' }}
+        {%- if tool.parameters.properties | length == 0 %}
+            {{- "{}" }}
+        {%- else %}
+            {{- tool.parameters|tojson }}
+        {%- endif %}
+        {{- "}" }}
+        {%- if not loop.last %}
+            {{- "\n" }}
+        {%- endif %}
+    {%- endfor %}
+{%- endif %}
+{{- " </tools>" }}
+{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}}
+' }}
+{{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+" }}
+{{- "<tool_call>
+" }}
+{{- '{"name": <function-name>, "arguments": <args-dict>}
+' }}
+{{- '</tool_call><|im_end|>' }}
+{%- for message in messages %}
+    {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" and message.tool_calls is defined %}
+        {{- '<|im_start|>' + message.role }}
+        {%- for tool_call in message.tool_calls %}
+            {{- '\n<tool_call>\n' }}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '{' }}
+            {{- '"name": "' }}
+            {{- tool_call.name }}
+            {{- '"' }}
+            {%- if tool_call.arguments is defined %}
+                {{- ', ' }}
+                {{- '"arguments": ' }}
+                {{- tool_call.arguments|tojson }}
+            {%- endif %}
+            {{- '}' }}
+            {{- '\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>tool\n' }}
+        {%- endif %}
+        {{- '<tool_response>\n' }}
+        {{- message.content }}
+        {%- if not loop.last %}
+            {{- '\n</tool_response>\n' }}
+        {%- else %}
+            {{- '\n</tool_response>' }}
+        {%- endif %}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hunyuan_a13b.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hunyuan_a13b.jinja
new file mode 100644
index 000000000000..a0808e44858a
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_hunyuan_a13b.jinja
@@ -0,0 +1,113 @@
+{% set loop_messages = messages %}
+{% if tools %}
+    {% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
+    {% set weekday_cn = weekday_map[strftime_now('%A')] %}
+    {% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
+    {% set datetime_str = datetime_str + ' ' + weekday_cn %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题，你需要进行一个或多个函数/工具调用以实现目的。
+如果没有一个函数可以使用，请直接使用自然语言回复用户，以助手：开头。
+如果给定的问题缺少函数所需的参数，请使用自然语言进行提问，向用户询问必要信息，以助手：开头。
+如果调用结果已经足够回答用户问题，请对历史结果进行总结，使用自然语言回复用户，以助手：开头。
+你应该只在工具调用部分返回函数调用。如果你决定调用任何函数，你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表，格式为JSON。
+' %}
+            {% set content_tmp = content_tmp + '
+' + tools | tojson + '
+' %}
+            {% if message['role'] == 'system' %}
+                {% set content_tmp = content_tmp + '
+额外要求：
+' + content + '
+
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。如果额外要求里有格式要求，请忽略，以此处为准。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+如果额外要求里有时间信息，就以额外要求里的时间为准，否则，参考当前时间：' + datetime_str %}
+                {% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
+            {% elif message['role'] == 'user' %}
+                {% set content_tmp = content_tmp + '
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+当前时间：' + datetime_str %}
+                {% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
+                {% set content = content_tmp + '用户：' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% else %}
+            {% if message['role'] == 'user' %}
+                {% set content = '用户：' + content + '<|extra_0|>' %}
+            {% elif message['role'] == 'assistant' %}
+                {% if 'tool_calls' in message %}
+                    {% set tool_calls = message['tool_calls'] %}
+                    {% set ns = namespace(tool_calls="[") %}
+                    {% for tool_call in tool_calls %}
+                        {% set function = tool_call['function'] %}
+                        {% set name = function['name'] %}
+                        {% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
+                        {% set arguments = function['arguments'] %}
+                        {% if arguments is not string %}
+                            {% set arguments = arguments | tojson %}
+                        {% endif %}
+                        {% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
+                        {% if not loop.last %}
+                            {% set ns.tool_calls = ns.tool_calls + ', '%}
+                        {% endif %}
+                    {% endfor %}
+                    {% set ns.tool_calls = ns.tool_calls + ']' %}
+                    {% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
+                {% else %}
+                    {% set content = '助手：' + content %}
+                {% endif %}
+                {% set content = content + '<|eos|>' %}
+            {% elif message['role'] == 'tool' %}
+                {% if content is not string %}
+                    {set content = content | tojson }
+                {% endif %}
+                {% set content = '<tool_response>' + content + '</tool_response>' %}
+                {% set content = content + '<|extra_0|>' %}
+            {% endif %}
+        {% endif %}
+    {{- content -}}
+    {% endfor %}
+{% else %}
+    {% set context = {'has_head': true} %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% if content == '' %}
+                {% set _ = context.update({'has_head': false}) %}
+            {% elif message['role'] == 'system' %}
+                {% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
+            {% endif %}
+        {% endif %}
+        {% if message['role'] == 'user' %}
+            {% if loop.index0 == 1 and not context.has_head %}
+                {% set content = '<|startoftext|>' + content %}
+            {% endif %}
+            {% if loop.index0 == 1 and context.has_head %}
+                {% set content = content + '<|extra_0|>' %}
+            {% else %}
+                {% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% elif message['role'] == 'assistant' %}
+            {% set content = content + '<|eos|>' %}
+        {% elif message['role'] == 'tool' %}
+            {% set content = content + '<|extra_0|>' %}
+        {% endif %}
+        {{- content -}}
+    {% endfor %}
+{% endif %}
+{%- if enable_thinking is defined and enable_thinking is false %}
+    {{- '<think>\n\n</think>\n' }}
+{%- endif %}
+
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_internlm2_tool.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_internlm2_tool.jinja
new file mode 100644
index 000000000000..ac99666e93bc
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_internlm2_tool.jinja
@@ -0,0 +1,60 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{{- bos_token }}
+{%- if system_message is defined %}
+{{- "<|im_start|>system\n" + system_message + "<|im_end|>\n" }}
+{%- endif %}
+
+{%- if tools is not none %}
+    {{- "<|im_start|>system name=<|plugin|>\n[" }}
+    {%- for tool in tools %}
+        {{- tool.function|tojson }}
+        {%- if not loop.last %}
+            {{- ", " }}
+        {%- else %}
+            {{- "]" }}
+        {%- endif %}
+    {%- endfor %}
+    {{- "<|im_end|>\n" }}
+{%- endif %}
+
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {{- "<|im_start|>user\n" + message["content"] + "<|im_end|>\n"}}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {%- set content = message["content"] if message["content"] else "" %}
+        {{- "<|im_start|>assistant\n" + content }}
+        {%- for tool_call in message.tool_calls %}
+            {%- set function=tool_call.function %}
+            {{- "<|action_start|><|plugin|>\n" }}
+            {{- '{"name": "' + function.name + '", '}}
+            {{- '"arguments": ' + function.arguments|tojson + '}' }}
+            {{- "<|action_end|>" }}
+        {%- endfor %}
+        {{- "<|im_end|>\n" }}
+    {%- elif message["role"] == "assistant" %}
+        {{- "<|im_start|>assistant\n" + message["content"] + "<|im_end|>\n"}}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" or message["role"] == "function" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- "<|im_start|>environment name=<|plugin|>\n" + content|string + "<|im_end|>\n" }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant and tool_results and tool and function roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+{{- '<|im_start|>assistant\n' }}
+{%- endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.1_json.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.1_json.jinja
new file mode 100644
index 000000000000..033830936a56
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.1_json.jinja
@@ -0,0 +1,120 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {#- Llama 3.1 doesn't pass all tests if the tools are in the system prompt #}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- if tools is not none %}
+        {%- set system_message = "You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question." %}
+    {%- else %}
+        {%- set system_message = "" %}
+    {%- endif %}
+{%- endif %}
+
+{#- System message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call. " }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- if messages[0]['content'] is string %}
+            {%- set first_user_message = messages[0]['content']|trim %}
+        {%- else %}
+            {%- set first_user_message = messages[0]['content'] | selectattr('type', 'equalto', 'text') | map(attribute='text') | map('trim') | join('\n') %}
+        {%- endif %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] | trim}}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'text' %}
+                    {{- content['text'] | trim }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {{- '{"name": "' + tool_call.name + '", ' }}
+        {{- '"parameters": ' }}
+        {{- tool_call.arguments | tojson }}
+        {{- "}" }}
+        {{- "<|eot_id|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is string %}
+            {{- { "output": message.content } | tojson }}
+        {%- else %}
+            {%- for content in message['content']  %}
+                {%- if content['type']  == 'text' %}
+                    {{- { "output": content['text']  } | tojson }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_json.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_json.jinja
new file mode 100644
index 000000000000..2b290c0eede0
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_json.jinja
@@ -0,0 +1,133 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = false %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- Find out if there are any images #}
+{% set image_ns = namespace(has_images=false) %}
+{%- for message in messages %}
+    {%- for content in message['content'] %}
+        {%- if content['type'] == 'image' %}
+            {%- set image_ns.has_images = true %}
+        {%- endif %}
+    {%- endfor %}
+{%- endfor %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- if tools is not none %}
+        {%- set system_message = "You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question." %}
+    {%- else %}
+        {%- set system_message = "" %}
+    {%- endif %}
+{%- endif %}
+
+{#- System message if there are no images, if the user supplied one, or if tools are used (default tool system message) #}
+{%- if system_message or not image_ns.has_images %}
+    {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+    {%- if tools is not none %}
+        {{- "Environment: ipython\n" }}
+    {%- endif %}
+    {{- "Cutting Knowledge Date: December 2023\n" }}
+    {{- "Today Date: " + date_string + "\n\n" }}
+    {%- if tools is not none and not tools_in_user_message %}
+        {{- "You have access to the following functions. To call a function, please respond with JSON for a function call. " }}
+        {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
+        {{- "Do not use variables.\n\n" }}
+        {%- for t in tools %}
+            {{- t | tojson(indent=4) }}
+            {{- "\n\n" }}
+        {%- endfor %}
+    {%- endif %}
+    {{- system_message }}
+    {{- "<|eot_id|>" }}
+{%- endif %}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- if messages[0]['content'] is string %}
+            {%- set first_user_message = messages[0]['content']|trim %}
+        {%- else %}
+            {%- set first_user_message = messages[0]['content'] | selectattr('type', 'equalto', 'text') | map(attribute='text') | map('trim') | join('\n') %}
+        {%- endif %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] | trim}}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] | trim }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {{- '{"name": "' + tool_call.name + '", ' }}
+        {{- '"parameters": ' }}
+        {{- tool_call.arguments | tojson }}
+        {{- "}" }}
+        {{- "<|eot_id|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is string %}
+            {{- { "output": message.content } | tojson }}
+        {%- else %}
+            {%- for content in message['content']  %}
+                {%- if content['type']  == 'text' %}
+                    {{- { "output": content['text']  } | tojson }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_pythonic.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_pythonic.jinja
new file mode 100644
index 000000000000..e4ec2353b350
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama3.2_pythonic.jinja
@@ -0,0 +1,98 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = false %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question." %}
+{%- endif %}
+
+{#- System message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call functions, please respond with a python list of the calls. " }}
+    {{- 'Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] ' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a python list for function calls " }}
+    {{- "with their proper arguments to best answer the given prompt.\n\n" }}
+    {{- 'Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] ' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n[' -}}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- tool_call.name + '(' -}}
+            {%- for param in tool_call.arguments %}
+                {{- param + '=' -}}
+                {{- "%s" | format(tool_call.arguments[param]) -}}
+                {% if not loop.last %}, {% endif %}
+            {%- endfor %}
+            {{- ')' -}}
+            {% if not loop.last %}, {% endif %}
+        {%- endfor %}
+        {{- ']<|eot_id|>' -}}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- { "output": message.content } | tojson }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_json.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_json.jinja
new file mode 100644
index 000000000000..759f16554436
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_json.jinja
@@ -0,0 +1,116 @@
+{%- macro is_array_of_type_objects(var) -%}
+    {%- if var is iterable and var is not string -%}
+        {%- set valid = true -%}
+        {%- for item in var -%}
+            {%- if 'type' not in item -%}
+                {%- set valid = false -%}
+                {%- break -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {{ valid }}
+    {%- else -%}
+        {{ false }}
+    {%- endif -%}
+{%- endmacro %}
+
+{%- macro render_message(message) %}
+    {%- if message['content'] is string %}
+        {{- message['content']|trim }}
+    {%- elif is_array_of_type_objects(data) == 'True' %}
+        {%- for content in message['content'] %}
+            {%- if content['type'] == 'image' %}
+                {{- '<|image|>' }}
+            {%- elif content['type'] == 'text' %}
+                {{- content['text']|trim }}
+            {%- endif %}
+        {%- endfor %}
+    {%- else %}
+        {{- message['content']|tojson }}
+    {%- endif %}
+{%- endmacro %}
+
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0] %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
+        "capabilities. Only reply with a tool call if the function exists in the "
+        "library provided by the user. If it doesn't exist, just reply directly in "
+        "natural language. When you receive a tool call response, use the output to "
+        "format an answer to the original user question."}) %}
+{%- endif %}
+
+{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
+    'or more function/tool calls to fulfill the task. \n'
+    'If none are needed, then proceed to the response.\n\n'
+    'Tool Call Syntax: You can call tools using the following syntax:\n'
+    '{"name": function name, "parameters": dictionary of argument name and its value}.\n'
+    'Separate multiple function calls by "; ". Do not use variables.\n'
+    'Do not include anything else when calling the tools with the syntax above.\n\n'
+    'Here is a list of functions in JSON format that you can invoke.\n' %}
+
+{{- "<|header_start|>system<|header_end|>\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- tool_lib_preamble }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- render_message(system_message) }}
+{{ "<|eot|>\n" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0] %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+    {%- endif %}
+    {{- '<|header_start|>user<|header_end|>\n\n' }}
+    {{- tool_lib_preamble }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- render_message(first_user_message) + "\n<|eot|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
+        {{- render_message(message) }}
+        {{- "\n<|eot|>" }}
+    {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
+        {{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
+        {{- render_message(message) }}
+        {%- for tool_call in message.tool_calls %}
+           {{- '{"name": "' + tool_call.function.name + '", ' }}
+           {{- '"parameters": ' }}
+           {{- tool_call.function.arguments | tojson }}
+           {{- "}" }}
+        {%- endfor %}
+       {{- "\n<|eot|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
+        {{- render_message(message) }}
+        {{- "\n<|eom|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_pythonic.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_pythonic.jinja
new file mode 100644
index 000000000000..bbed3d8205e0
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_llama4_pythonic.jinja
@@ -0,0 +1,111 @@
+{{- bos_token }}
+{%- if custom_tools is defined and custom_tools%}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if tools is defined and tools %}
+    {%- set tool_definition = tool_definition ~ (tools | tojson(indent=4)) %}
+{%- else %}
+    {%- set tools = none %}
+{%- endif %}
+
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set user_provided_system_message = true %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- if tools is not none  %}
+        {#- Since not system_message was provided by user, if tool is provided, system_message is now default tool system message #}
+        {#- This system message is from llama website:https://www.llama.com/docs/model-cards-and-prompt-formats/llama4/  #}
+        {%- set system_message = "You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:\n\n1. FUNCTION CALLS:\n- ONLY use functions that are EXPLICITLY listed in the function list below\n- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If a function is not in the list, respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)\n- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\nExamples:\nCORRECT: [get_weather(location=\"Vancouver\"), calculate_route(start=\"Boston\", end=\"New York\")] <- Only if get_weather and calculate_route are in function list\nINCORRECT: get_weather(location=\"New York\")\nINCORRECT: Let me check the weather: [get_weather(location=\"New York\")]\nINCORRECT: [get_events(location=\"Singapore\")] <- If function not in list\n\n2. RESPONSE RULES:\n- For pure function requests matching a listed function: ONLY output the function call(s)\n- For knowledge questions: ONLY output text\n- For missing parameters: ONLY request the specific missing parameters\n- For unavailable services (not in function list): output ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\". Do NOT execute a function call.\n- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations\n- NEVER combine text and function calls in the same response\n- NEVER suggest alternative functions when the requested service is unavailable\n- NEVER create or invent new functions not listed below\n\n3. STRICT BOUNDARIES:\n- ONLY use functions from the list below - no exceptions\n- NEVER use a function as an alternative to unavailable information\n- NEVER call functions not present in the function list\n- NEVER add explanatory text to function calls\n- NEVER respond with empty brackets\n- Use proper Python/JSON syntax for function calls\n- Check the function list carefully before responding\n\n4. TOOL RESPONSE HANDLING:\n- When receiving tool responses: provide concise, natural language responses\n- Don't repeat tool response verbatim\n- Don't add supplementary information\n\nHere is a list of functions in JSON format that you can invoke:\n" %}
+    {%- else %}
+        {%- set system_message = "" %}
+    {%- endif %}
+{%- endif %}
+{#- Now writing the system message: use the user provided system message if user_provided_system_message, else default tool system message if tools presented #}
+{%- if system_message %}
+    {#- always use user provided system message to override default tool system message #}
+    {{- "<|header_start|>system<|header_end|>\n\n" }}
+    {{- system_message }}
+    {%- if user_provided_system_message and tools %}
+        {{- "\nHere is a list of functions in JSON format that you can invoke. Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\n" }}
+        {{- tool_definition -}}
+        {%- elif tool_definition %}
+        {{- tool_definition -}}
+    {%- endif %}
+    {{- "<|eot|>" }}
+{%- endif %}
+
+{#- Now deal with all other messages #}
+{%- for message in messages %}
+    {#- Base case: messages that are not from tool role and has empty tool_call list  #}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or ('tool_calls' in message and  message.tool_calls|length != 0 )) %}
+        {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] | trim }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+    {{- "<|eot|>" }}
+    {#- Tool case: messages has non-empty tool_call list, must from assistant #}
+    {%- elif 'tool_calls' in message %}
+        {#- assume tool_calls are always coming from assistant #}
+        {%- if message.role == 'assistant' %}
+            {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+            {{- "[" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+                {{-  tool_call.name + '(' -}}
+            {%- for param in tool_call.arguments %}
+                {{- param + '="' -}}
+                {{- "%s" | format(tool_call.arguments[param]) -}}
+                {{- '"' -}}
+                {% if not loop.last %}, {% endif %}
+            {%- endfor %}
+            {{- ')' -}}
+            {% if not loop.last %}, {% endif %}
+        {%- endfor %}
+        {{- "]<|eot|>" }}
+{%- endif %}
+{#- Tool_response case: messages are from tool_response  #}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|header_start|>ipython<|header_end|>\n\n" }}
+        {%- if message.content is string %}
+            {{-  message.content  | tojson }}
+        {%- else %}
+            {%- for content in message['content']  %}
+                {%- if content['type']  == 'text' %}
+                    {{-  content['text'] | tojson }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_minimax_m1.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_minimax_m1.jinja
new file mode 100644
index 000000000000..2d5bbf4de56f
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_minimax_m1.jinja
@@ -0,0 +1,91 @@
+{{ '<begin_of_document>' -}}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- Extract system message #}
+{% set ns = namespace(system_prompt='') -%}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set ns.system_prompt = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set ns.system_prompt = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- if tools is not none %}
+        {%- set ns.system_prompt = "You are a helpful assistant created by Minimax based on MiniMax-M1 model." %}
+    {%- else %}
+        {%- set ns.system_prompt = "You are a helpful assistant created by Minimax based on MiniMax-M1 model." %}
+    {%- endif %}
+{%- endif %}
+
+{#- System message #}
+{%- if ns.system_prompt != '' %}
+{{ '<beginning_of_sentence>system ai_setting=assistant\n' + ns.system_prompt + '<end_of_sentence>\n' -}}
+{%- endif %}
+
+{#- Tools configuration #}
+{%- if tools is not none %}
+{{ '<beginning_of_sentence>system tool_setting=tools\nYou are provided with these tools:\n<tools>\n' -}}
+{%- for tool in tools %}
+{{ tool | tojson ~ '\n' -}}
+{%- endfor %}
+{{ '</tools>\n\nIf you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and json-object of arguments, following the format below:\n<tool_calls>\n{"name": <tool-name>, "arguments": <args-json-object>}\n...\n</tool_calls><end_of_sentence>\n' -}}
+{%- endif %}
+
+{#- Process messages #}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {%- if message['role'] == 'user' %}
+{{ '<beginning_of_sentence>user name=user\n' -}}
+{%- if message['content'] is string %}
+{{ message['content']|trim -}}
+{%- else %}
+{%- for content in message['content'] %}
+{%- if content['type'] == 'text' %}
+{{ content['text']|trim -}}
+{%- endif %}
+{%- endfor %}
+{%- endif %}
+{{ '<end_of_sentence>\n' -}}
+        {%- elif message['role'] == 'assistant' %}
+{{ '<beginning_of_sentence>ai name=assistant\n' -}}
+{%- if message['content'] is string %}
+{{ message['content']|trim -}}
+{%- else %}
+{%- for content in message['content'] | selectattr('type', 'equalto', 'text') %}
+{{ content['text']|trim -}}
+{%- endfor %}
+{%- endif %}
+{{ '<end_of_sentence>\n' -}}
+        {%- endif %}
+    {%- elif 'tool_calls' in message %}
+{{ '<beginning_of_sentence>ai name=assistant\n<tool_calls>\n' -}}
+{%- for tool_call in message.tool_calls %}
+{{ '{"name": "' + tool_call.function.name + '", "arguments": ' + tool_call.function.arguments | tojson + '}\n' -}}
+{%- endfor %}
+{{ '</tool_calls><end_of_sentence>\n' -}}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+{{ '<beginning_of_sentence>tool name=tools\n' -}}
+{%- if message.content is string %}
+{{ 'tool result: ' + message.content + '\n\n' -}}
+{%- else %}
+{%- for content in message['content'] %}
+{%- if content['type'] == 'text' %}
+{{ 'tool result: ' + content['text'] + '\n\n' -}}
+{%- elif content.get('name') %}
+{{ 'tool name: ' + content['name'] + '\ntool result: ' + content['text'] + '\n\n' -}}
+{%- endif %}
+{%- endfor %}
+{%- endif %}
+{{ '<end_of_sentence>\n' -}}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+{{ '<beginning_of_sentence>ai name=assistant\n' -}}
+{%- endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral.jinja
new file mode 100644
index 000000000000..49691f59c2f2
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral.jinja
@@ -0,0 +1,86 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+
+{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
+    {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
+        {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif %}
+{%- endfor %}
+
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+        {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
+        {%- if message.tool_calls is defined %}
+            {%- set tool_calls = message.tool_calls %}
+        {%- else %}
+            {%- set tool_calls = message.content %}
+        {%- endif %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"] + eos_token }}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral3.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral3.jinja
new file mode 100644
index 000000000000..7c4249ec44c5
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral3.jinja
@@ -0,0 +1,126 @@
+{%- set today = strftime_now("%Y-%m-%d") %}
+{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
+
+{{- bos_token }}
+
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content'] %}
+        {%- set loop_messages = messages[1:] %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text'] %}
+        {%- set loop_messages = messages[1:] %}
+    {%- endif %}
+{%- else %}
+    {%- set system_message = default_system_message %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- elif tools is not none %}
+    {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %}
+    {%- if system_message is defined %}
+        {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %}
+    {%- else %}
+        {%- set system_message = parallel_tool_prompt %}
+    {%- endif %}
+{%- endif %}
+{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
+
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+
+{%- set filtered_messages = [] %}
+{%- for message in loop_messages %}
+    {%- if message["role"] not in ["tool", "tool_results"] and not message.get("tool_calls") %}
+        {%- set filtered_messages = filtered_messages + [message] %}
+    {%- endif %}
+{%- endfor %}
+
+{%- for message in filtered_messages %}
+    {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
+        {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif %}
+{%- endfor %}
+
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+        {%- endif %}
+        {%- if message['content'] is string %}
+        {{- '[INST]' + message['content'] + '[/INST]' }}
+        {%- else %}
+                {{- '[INST]' }}
+                {%- for block in message['content'] %}
+                        {%- if block['type'] == 'text' %}
+                                {{- block['text'] }}
+                        {%- elif block['type'] == 'image' or block['type'] == 'image_url' %}
+                                {{- '[IMG]' }}
+                            {%- else %}
+                                {{- raise_exception('Only text and image blocks are supported in message content!') }}
+                            {%- endif %}
+                    {%- endfor %}
+                {{- '[/INST]' }}
+            {%- endif %}
+    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
+        {%- if message.tool_calls is defined %}
+            {%- set tool_calls = message.tool_calls %}
+        {%- else %}
+            {%- set tool_calls = message.content %}
+        {%- endif %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message['role'] == 'assistant' %}
+        {%- if message['content'] is string %}
+            {{- message['content'] + eos_token }}
+        {%- else %}
+            {{- message['content'][0]['text'] + eos_token }}
+        {%- endif %}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral_parallel.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral_parallel.jinja
new file mode 100644
index 000000000000..2ef4bedf8621
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_mistral_parallel.jinja
@@ -0,0 +1,93 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- elif tools is not none %}
+    {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %}
+    {%- if system_message is defined %}
+        {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %}
+    {%- else %}
+        {%- set system_message = parallel_tool_prompt %}
+    {%- endif %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+
+{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
+    {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}
+        {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif %}
+{%- endfor %}
+
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+        {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
+        {%- if message.tool_calls is defined %}
+            {%- set tool_calls = message.tool_calls %}
+        {%- else %}
+            {%- set tool_calls = message.content %}
+        {%- endif %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"] + eos_token }}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_phi4_mini.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_phi4_mini.jinja
new file mode 100644
index 000000000000..6f40c38c2064
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_phi4_mini.jinja
@@ -0,0 +1,62 @@
+{%- if messages and messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant." %}
+{%- endif %}
+
+{%- if messages %}
+<|system|>
+{{ system_message }}
+{%- if tools %}
+In addition to plain text responses, you can choose to call one or more of the provided functions.
+
+Use the following rule to decide when to call a function:
+  * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so
+  * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls
+
+If you decide to call functions:
+  * prefix function calls with functools marker (no closing marker required)
+  * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
+  * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples
+  * respect the argument type formatting. E.g., if the type is number and format is float, write value 7 as 7.0
+  * make sure you pick the right functions that match the user intent
+
+
+        {%- for t in tools %}
+            {{- t | tojson(indent=4) }}
+            {{- "\n\n" }}
+        {%- endfor %}
+{%- endif %}<|end|>
+
+    {%- for message in messages %}
+        {%- if message.role != "system" %}
+<|{{ message.role }}|>
+            {%- if message.content and message.role == "tools" %}
+{"result": {{ message.content }}}
+            {%- elif message.content %}
+{{ message.content }}
+            {%- elif message.tool_calls %}
+                {%- for call in message.tool_calls %}
+{"name": "{{ call.function.name }}", "arguments": {{ call.function.arguments }}}
+                    {%- if not loop.last %},{% endif %}
+                {%- endfor %}
+            {%- endif %}<|end|>
+        {%- endif %}
+    {%- endfor %}<|assistant|>
+
+{%- else %}
+    {%- if system_message %}
+<|system|>
+
+{{ system_message }}<|end|>
+    {%- endif %}
+    {%- if prompt %}
+<|user|>
+
+{{ prompt }}<|end|>
+    {%- endif %}<|assistant|>
+
+{%- endif %}
+{{ response }}
+{%- if response %}<|user|>{% endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_qwen3coder.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_qwen3coder.jinja
new file mode 100644
index 000000000000..49b0e8d0ee7e
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_qwen3coder.jinja
@@ -0,0 +1,117 @@
+{% macro render_extra_keys(json_dict, handled_keys) %}
+    {%- if json_dict is mapping %}
+        {%- for json_key in json_dict if json_key not in handled_keys %}
+            {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+                {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
+            {%- else %}
+                {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+{% endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+
+{%- if not tools is defined %}
+    {%- set tools = [] %}
+{%- endif %}
+
+{%- if system_message is defined %}
+    {{- "<|im_start|>system\n" + system_message }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is iterable and tools | length > 0 %}
+    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "<tools>" }}
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
+        {%- if tool.description is defined %}
+            {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
+        {%- endif %}
+        {{- '\n<parameters>' }}
+        {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+            {%- for param_name, param_fields in tool.parameters.properties|items %}
+                {{- '\n<parameter>' }}
+                {{- '\n<name>' ~ param_name ~ '</name>' }}
+                {%- if param_fields.type is defined %}
+                    {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
+                {%- endif %}
+                {%- if param_fields.description is defined %}
+                    {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
+                {%- endif %}
+                {%- set handled_keys = ['name', 'type', 'description'] %}
+                {{- render_extra_keys(param_fields, handled_keys) }}
+                {{- '\n</parameter>' }}
+            {%- endfor %}
+        {%- endif %}
+        {% set handled_keys = ['type', 'properties'] %}
+        {{- render_extra_keys(tool.parameters, handled_keys) }}
+        {{- '\n</parameters>' }}
+        {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+        {{- render_extra_keys(tool, handled_keys) }}
+        {{- '\n</function>' }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+{%- endif %}
+{%- if system_message is defined %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in loop_messages %}
+    {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
+            {{- '\n' + message.content | trim + '\n' }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+            {%- if tool_call.arguments is defined %}
+                {%- for args_name, args_value in tool_call.arguments|items %}
+                    {{- '<parameter=' + args_name + '>\n' }}
+                    {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                    {{- args_value }}
+                    {{- '\n</parameter>\n' }}
+                {%- endfor %}
+            {%- endif %}
+            {{- '</function>\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>\n' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_toolace.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_toolace.jinja
new file mode 100644
index 000000000000..da0f25cdcb33
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_toolace.jinja
@@ -0,0 +1,65 @@
+{{- bos_token }}
+
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language." %}
+{%- endif %}
+
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose.\n" }}
+    {{- "If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out.\n" }}
+    {{- "You should only return the function call in tools call sections.\n\n" }}
+    {{- "If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\n" }}
+    {{- "You SHOULD NOT include any other text in the response.\n" }}
+    {{- "Here is a list of functions in JSON format that you can invoke.\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- "\n" }}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n[' -}}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- tool_call.name + '(' -}}
+            {%- for param in tool_call.arguments %}
+                {{- param + '=' -}}
+                {{- "%s" | format(tool_call.arguments[param]) -}}
+                {% if not loop.last %}, {% endif %}
+            {%- endfor %}
+            {{- ')' -}}
+            {% if not loop.last %}, {% endif %}
+        {%- endfor %}
+        {{- ']<|eot_id|>' -}}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- { "output": message.content } | tojson }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+
+{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_llama.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_llama.jinja
new file mode 100644
index 000000000000..f97de4004f1c
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_llama.jinja
@@ -0,0 +1,77 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- Extract system message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] | trim %}
+    {%- set messages = messages[1:] %}
+    {{- system_message + "\n" }}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant. You are developed by Salesforce xLAM team." %}
+    {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: 
+
+[{"name": "tool_call_name", "arguments": {"arg1": "value1", "arg2": "value2"}}, ... (additional parallel tool calls as needed)]
+
+If no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}
+    {{- system_message + "\n" }}
+    {%- if tools is not none %}
+        {{- format_instruction + "\n\n" }}
+    {%- endif %}
+{%- endif %}
+
+
+{%- if tools is not none %}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- "<|eot_id|>" }}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {%- if message['tool_calls'] %}
+            {{- "[" }}
+            {%- for tool_call_function in message.tool_calls %}
+                {%- set tool_call = tool_call_function.function %}
+                {{- '{"name": "' + tool_call.name + '", ' }}
+                {{- '"arguments": ' }}
+                {{- tool_call.arguments | tojson }}
+                {{- "}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "]" }}
+            {{- "<|eot_id|>" }}
+        {%- elif message['content'] %}
+            {{- message['content'] | trim + '<|eot_id|>' }}
+        {%- else %}
+            {{- "[]\n" + '<|eot_id|>' }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>" + "ipython" + "<|end_header_id|>\n\n" }}
+        {%- set content = message["content"] %}
+        {%- if content is mapping or (content is iterable and content is not string) %}
+            {{- content | tojson }}
+        {%- else %}
+            {{- content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
\ No newline at end of file
diff --git a/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_qwen.jinja b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_qwen.jinja
new file mode 100644
index 000000000000..acf57cc4b2c1
--- /dev/null
+++ b/rust/src/chat/tests/templates/vllm_examples/tool_chat_template_xlam_qwen.jinja
@@ -0,0 +1,66 @@
+{# System message #}
+{{- "<|im_start|>system\n" }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] | trim %}
+    {%- set messages = messages[1:] %}
+    {{- system_message + "\n" }}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant. You are developed by Salesforce xLAM team." %}
+    {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: 
+
+[{"name": "tool_call_name", "arguments": {"arg1": "value1", "arg2": "value2"}}, ... (additional parallel tool calls as needed)]
+
+If no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}
+    {{- system_message + "\n" }}
+    {%- if tools is not none %}
+        {{- format_instruction + "\n\n" }}
+    {%- endif %}
+{%- endif %}
+
+{%- if tools is not none %}
+    {%- for func in tools %}
+        {{- func | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- "<|im_end|>\n" }}
+{%- for message in messages %}
+    {%- if message['role'] == 'tool' %}
+        {{- "<|im_start|>tool\n" }}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {%- if content is mapping or content is iterable and content is not string %}
+            {{- content | tojson }}
+        {%- else %}
+            {{- content }}
+        {%- endif %}
+        {{- "<|im_end|>\n" }}
+    {%- elif 'tool_calls' in message %}
+        {{- "<|im_start|>assistant\n" }}
+        {%- if message['tool_calls'] %}
+            {{- "[" }}
+            {%- for tool_call in message.tool_calls %}
+                {%- set out = tool_call.function | tojson %}
+                {{- out }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "]"}}
+        {%- elif message['content'] %}
+            {{- message['content'] | trim }}
+        {%- else %}
+            {{- "[]\n" }}
+        {%- endif %}
+        {{- "<|im_end|>\n" }}
+    {%- else %}
+        {{- "<|im_start|>" + message['role'] + "\n" + message['content'] | trim + "<|im_end|>\n" }}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- "<|im_start|>assistant\n" }}
+{%- endif %}
diff --git a/rust/src/cmd/Cargo.toml b/rust/src/cmd/Cargo.toml
new file mode 100644
index 000000000000..b684d0722028
--- /dev/null
+++ b/rust/src/cmd/Cargo.toml
@@ -0,0 +1,39 @@
+[package]
+name = "vllm-cmd"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[[bin]]
+name = "vllm-rs"
+path = "src/main.rs"
+
+[features]
+default = []
+native-tls-vendored = ["dep:native-tls-vendored"]
+
+[dependencies]
+anyhow.workspace = true
+clap.workspace = true
+educe.workspace = true
+itertools.workspace = true
+native-tls-vendored = { workspace = true, optional = true }
+serde.workspace = true
+serde_json.workspace = true
+serde_with.workspace = true
+thiserror-ext.workspace = true
+time.workspace = true
+tokio = { workspace = true, features = ["signal"] }
+tokio-util.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
+uuid.workspace = true
+vllm-engine-core-client.workspace = true
+vllm-managed-engine.workspace = true
+vllm-server.workspace = true
+
+[dev-dependencies]
+expect-test.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/cmd/examples/README.md b/rust/src/cmd/examples/README.md
new file mode 100644
index 000000000000..ad328a731833
--- /dev/null
+++ b/rust/src/cmd/examples/README.md
@@ -0,0 +1,44 @@
+# `vllm-rs` CLI Quick Start
+
+Start Qwen3 with one managed `vllm-rs serve` command from the repo root:
+
+```bash
+HF_HUB_OFFLINE=1 \
+VLLM_CPU_KVCACHE_SPACE=2 \
+VLLM_HOST_IP=127.0.0.1 \
+VLLM_LOOPBACK_IP=127.0.0.1 \
+cargo run --bin vllm-rs -- serve \
+  Qwen/Qwen3-0.6B \
+  --python ../vllm/.venv/bin/python \
+  --max-model-len 512 \
+  -- \
+  --dtype float16
+```
+
+This launches:
+
+- a managed headless Python `vllm` engine
+- the Rust OpenAI-compatible frontend on `127.0.0.1:8000`
+
+All Python engine arguments must be placed after `--`. Arguments before `--` are parsed by the Rust
+frontend itself.
+
+You can then send OpenAI-style requests to the Rust frontend:
+
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen3-0.6B",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "stream": true
+  }'
+```
+
+If you already started headless `vllm` yourself, use `frontend` instead:
+
+```bash
+cargo run --bin vllm-rs -- frontend \
+  --handshake-address tcp://127.0.0.1:62100 \
+  Qwen/Qwen3-0.6B
+```
diff --git a/rust/src/cmd/src/cli.rs b/rust/src/cmd/src/cli.rs
new file mode 100644
index 000000000000..70ac8440453b
--- /dev/null
+++ b/rust/src/cmd/src/cli.rs
@@ -0,0 +1,434 @@
+//! CLI argument definitions for the `vllm-rs` binary.
+//!
+//! Python vLLM references:
+//! - Engine args: <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/engine/arg_utils.py#L657-L1311>
+//! - Environment variables: <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/envs.py#L472>
+
+mod unsupported;
+
+use std::collections::HashMap;
+use std::ffi::{OsStr, OsString};
+use std::path::PathBuf;
+use std::time::Duration;
+
+use clap::{Args, Parser, Subcommand};
+use educe::Educe;
+use serde::Deserialize;
+use serde::de::DeserializeOwned;
+use serde_json::Value;
+use thiserror_ext::AsReport as _;
+use uuid::Uuid;
+use vllm_engine_core_client::TransportMode;
+use vllm_managed_engine::ManagedEngineConfig;
+use vllm_managed_engine::cli::{ManagedEngineArgs, repartition_managed_engine_args};
+use vllm_server::{
+    ChatTemplateContentFormatOption, Config, CoordinatorMode, HttpListenerMode, ParserSelection,
+    RendererSelection,
+};
+
+use crate::cli::unsupported::UnsupportedArgs;
+
+/// Top-level parser for the `vllm-rs` binary.
+#[derive(Debug, Parser)]
+#[command(
+    name = "vllm-rs",
+    about = "Rust frontend and managed-engine CLI for vLLM."
+)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Command,
+}
+
+impl Cli {
+    pub fn parse() -> Self {
+        Self::try_parse_from(std::env::args_os()).unwrap_or_else(|error| error.exit())
+    }
+
+    pub fn try_parse_from<I, T>(itr: I) -> Result<Self, clap::Error>
+    where
+        I: IntoIterator<Item = T>,
+        T: Into<OsString>,
+    {
+        let args: Vec<OsString> = itr.into_iter().map(Into::into).collect();
+        let repartitioned_args = repartition_managed_engine_args::<Self>(&args, Some("serve"))?;
+        <Self as Parser>::try_parse_from(&repartitioned_args).inspect(|cli| {
+            if let Command::Serve(serve) = &cli.command
+                && serve.debug_cli
+            {
+                println!(
+                    "Original CLI args: {}\n",
+                    args.join(OsStr::new(" ")).display()
+                );
+                println!(
+                    "Repartitioned CLI args: {}\n",
+                    repartitioned_args.join(OsStr::new(" ")).display()
+                );
+                println!(
+                    "Passthrough Python args: {}",
+                    serve.managed_engine.python_args.join(" ")
+                );
+                std::process::exit(0);
+            }
+        })
+    }
+}
+
+/// Supported top-level CLI commands.
+#[derive(Debug, Subcommand, PartialEq, Eq)]
+pub enum Command {
+    /// Run the Rust OpenAI frontend as a Python-supervised worker.
+    Frontend(FrontendArgs),
+    /// Launch a managed Python headless engine, then run the Rust OpenAI
+    /// frontend.
+    Serve(ServeArgs),
+}
+
+/// Runtime arguments shared by the external-engine and managed-engine paths.
+#[derive(Educe, Clone, Args, PartialEq, Eq, Deserialize)]
+#[educe(Debug)]
+pub struct SharedRuntimeArgs {
+    #[serde(rename = "model_tag")]
+    /// Model identifier or local model directory used for backend loading and
+    /// public model ID.
+    pub model: String,
+
+    /// Maximum time to wait for the expected engines to register on the
+    /// frontend transport.
+    #[arg(
+        long = "engine-ready-timeout-secs",
+        env = "VLLM_ENGINE_READY_TIMEOUT_S",
+        default_value_t = default_engine_ready_timeout_secs()
+    )]
+    #[serde(default = "default_engine_ready_timeout_secs")]
+    pub engine_ready_timeout_secs: u64,
+
+    /// Select the tool call parser depending on the model that you're using.
+    /// Use `auto` to infer from the model or `none` to disable parsing.
+    #[arg(long, default_value_t)]
+    #[serde(default)]
+    pub tool_call_parser: ParserSelection,
+    /// Select the reasoning parser depending on the model that you're using.
+    /// Use `auto` to infer from the model or `none` to disable parsing.
+    #[arg(long, default_value_t)]
+    #[serde(default)]
+    pub reasoning_parser: ParserSelection,
+    /// Select the chat renderer implementation.
+    #[arg(long = "tokenizer-mode", default_value_t)]
+    #[serde(default, rename = "tokenizer_mode")]
+    pub renderer: RendererSelection,
+    /// Override the maximum model context length. When set, the frontend uses
+    /// this value instead of the model's `max_position_embeddings` from
+    /// `config.json`.
+    #[arg(long)]
+    pub max_model_len: Option<u32>,
+    /// TCP port for the gRPC Generate service. When not set, no gRPC server is
+    /// started.
+    #[arg(long)]
+    #[serde(default)]
+    pub grpc_port: Option<u16>,
+    /// Maximum time to wait for active requests to drain during shutdown.
+    #[arg(long, default_value_t = 0)]
+    #[serde(default)]
+    pub shutdown_timeout: u64,
+
+    /// The file path to the chat template, or the template in single-line form
+    /// for the specified model.
+    #[arg(long)]
+    #[serde(default)]
+    pub chat_template: Option<String>,
+
+    /// Default keyword arguments to pass to the chat template renderer.
+    ///
+    /// These will be merged with request-level chat_template_kwargs, with
+    /// request values taking precedence. Useful for setting default
+    /// behavior for reasoning models.
+    ///
+    /// Example: `{"enable_thinking": false}` to disable thinking mode by
+    /// default for Qwen3/DeepSeek models.
+    #[arg(long, value_parser = parse_json::<HashMap<String, Value>>, value_name = "JSON")]
+    #[serde(default)]
+    pub default_chat_template_kwargs: Option<HashMap<String, Value>>,
+
+    /// The format to render message content within a chat template.
+    ///
+    /// * "auto" detects the format from the template
+    /// * "string" renders content as a string. Example: `"Hello World"`
+    /// * "openai" renders content as a list of dictionaries, similar to OpenAI schema. Example:
+    ///   `[{"type": "text", "text": "Hello world!"}]`
+    #[arg(long, default_value_t)]
+    #[serde(default)]
+    pub chat_template_content_format: ChatTemplateContentFormatOption,
+
+    /// Log a summary line for each completed request, including prompt/output
+    /// token counts and finish reason.
+    #[arg(long)]
+    #[serde(default)]
+    pub enable_log_requests: bool,
+
+    /// Disable periodic logging of engine statistics (throughput, queue depth,
+    /// cache usage).
+    #[arg(long)]
+    #[serde(default)]
+    pub disable_log_stats: bool,
+
+    /// The model name(s) used in the API. If multiple names are provided, the
+    /// server will respond to any of the provided names. The model name in the
+    /// model field of a response will be the first name in this list. If not
+    /// specified, the model name will be the same as the `--model` argument.
+    /// Noted that this name(s) will also be used in `model_name` tag
+    /// content of prometheus metrics, if multiple names provided, metrics
+    /// tag will take the first one.
+    #[arg(long, num_args = 0..)]
+    #[serde(default)]
+    pub served_model_name: Vec<String>,
+
+    /// Unsupported Python vLLM frontend arguments recognized but not yet
+    /// implemented in Rust.
+    #[educe(Debug(ignore))]
+    #[command(flatten)]
+    #[serde(default, flatten)]
+    pub unsupported: UnsupportedArgs,
+}
+
+impl SharedRuntimeArgs {
+    /// Maximum time to wait for the expected engines to register on the
+    /// frontend transport.
+    pub fn ready_timeout(&self) -> Duration {
+        Duration::from_secs(self.engine_ready_timeout_secs)
+    }
+
+    /// Maximum time to wait for active requests to drain during shutdown.
+    pub fn shutdown_timeout(&self) -> Duration {
+        Duration::from_secs(self.shutdown_timeout)
+    }
+
+    /// Build the OpenAI-server config for the Python-bootstrap worker contract.
+    ///
+    /// The resulting config binds the Python-supplied transport addresses and
+    /// inherits an already open HTTP listener from the supervisor process.
+    fn into_bootstrapped_config(
+        self,
+        listen_fd: i32,
+        input_address: String,
+        output_address: String,
+        coordinator_address: Option<String>,
+        engine_count: usize,
+    ) -> Config {
+        let ready_timeout = self.ready_timeout();
+        let shutdown_timeout = self.shutdown_timeout();
+
+        Config {
+            transport_mode: TransportMode::Bootstrapped {
+                input_address,
+                output_address,
+                engine_count,
+                ready_timeout,
+            },
+            coordinator_mode: match coordinator_address {
+                Some(address) => CoordinatorMode::External { address },
+                None => CoordinatorMode::None,
+            },
+            model: self.model,
+            served_model_name: self.served_model_name,
+            listener_mode: HttpListenerMode::InheritedFd { fd: listen_fd },
+            tool_call_parser: self.tool_call_parser,
+            reasoning_parser: self.reasoning_parser,
+            renderer: self.renderer,
+            chat_template: self.chat_template,
+            default_chat_template_kwargs: self.default_chat_template_kwargs,
+            chat_template_content_format: self.chat_template_content_format,
+            enable_log_requests: self.enable_log_requests,
+            disable_log_stats: self.disable_log_stats,
+            grpc_port: self.grpc_port,
+            shutdown_timeout,
+        }
+    }
+
+    /// Build the OpenAI-server config for the managed `serve` path that still
+    /// owns the startup handshake and binds its own HTTP listener.
+    fn into_managed_config(
+        self,
+        listener_mode: HttpListenerMode,
+        handshake_address: String,
+        advertised_host: String,
+        engine_count: usize,
+        local_input_address: Option<String>,
+        local_output_address: Option<String>,
+    ) -> Config {
+        let ready_timeout = self.ready_timeout();
+        let shutdown_timeout = self.shutdown_timeout();
+
+        Config {
+            transport_mode: TransportMode::HandshakeOwner {
+                handshake_address,
+                advertised_host,
+                engine_count,
+                ready_timeout,
+                local_input_address,
+                local_output_address,
+            },
+            coordinator_mode: CoordinatorMode::MaybeInProc,
+            model: self.model,
+            served_model_name: self.served_model_name,
+            listener_mode,
+            tool_call_parser: self.tool_call_parser,
+            reasoning_parser: self.reasoning_parser,
+            renderer: self.renderer,
+            chat_template: self.chat_template,
+            default_chat_template_kwargs: self.default_chat_template_kwargs,
+            chat_template_content_format: self.chat_template_content_format,
+            enable_log_requests: self.enable_log_requests,
+            disable_log_stats: self.disable_log_stats,
+            grpc_port: self.grpc_port,
+            shutdown_timeout,
+        }
+    }
+}
+
+fn default_engine_ready_timeout_secs() -> u64 {
+    600
+}
+
+fn parse_json<T: DeserializeOwned>(value: &str) -> Result<T, String> {
+    serde_json::from_str(value).map_err(|e| format!("invalid JSON object: {}", e.as_report()))
+}
+
+fn parse_runtime_args_json(value: &str) -> Result<SharedRuntimeArgs, String> {
+    let args: SharedRuntimeArgs = serde_json::from_str(value)
+        .map_err(|e| format!("invalid JSON arguments: {}", e.as_report()))?;
+    args.unsupported.check()?;
+    Ok(args)
+}
+
+/// Arguments for running the Rust frontend as a Python-bootstrapped worker.
+#[derive(Educe, Clone, Args, PartialEq, Eq)]
+#[educe(Debug)]
+pub struct FrontendArgs {
+    /// Inherited listening socket file descriptor passed by the Python
+    /// supervisor.
+    #[arg(long)]
+    pub listen_fd: i32,
+    /// Frontend input ROUTER socket address that the Python engines will
+    /// connect to.
+    #[arg(long)]
+    pub input_address: String,
+    /// Frontend output PULL socket address that the Python engines will push
+    /// responses to.
+    #[arg(long)]
+    pub output_address: String,
+    /// Optional Python-owned frontend-side DP coordinator socket address for
+    /// external coordinator mode in the bootstrapped frontend path, i.e.,
+    /// `stats_update_address`.
+    #[arg(long)]
+    pub coordinator_address: Option<String>,
+    /// Total number of data-parallel engines expected for this frontend.
+    #[arg(long, default_value_t = 1)]
+    pub engine_count: usize,
+
+    /// Shared frontend arguments as one JSON object.
+    #[arg(long = "args-json", value_parser = parse_runtime_args_json, value_name = "JSON")]
+    pub runtime: SharedRuntimeArgs,
+}
+
+impl FrontendArgs {
+    /// Convert the CLI arguments into the OpenAI server's runtime config.
+    pub fn into_config(self) -> Config {
+        self.runtime.into_bootstrapped_config(
+            self.listen_fd,
+            self.input_address,
+            self.output_address,
+            self.coordinator_address,
+            self.engine_count,
+        )
+    }
+}
+
+/// Arguments for the managed-engine mode that spawns Python on behalf of the
+/// user.
+#[derive(Educe, Clone, Args, PartialEq, Eq)]
+#[educe(Debug)]
+#[command(override_usage = "vllm-rs serve <MODEL> [OPTIONS] [-- <PYTHON_ARGS>...]")]
+pub struct ServeArgs {
+    /// Only launch the managed Python headless engine and do not start the Rust
+    /// frontend.
+    #[arg(long)]
+    pub headless: bool,
+    /// HTTP bind host for the OpenAI-compatible server.
+    #[arg(long, default_value = "127.0.0.1")]
+    pub host: String,
+    /// HTTP bind port for the OpenAI-compatible server.
+    #[arg(long, default_value_t = 8000)]
+    pub port: u16,
+    /// Unix domain socket path. If set, host and port arguments are ignored.
+    #[arg(long)]
+    pub uds: Option<String>,
+
+    /// Flag to print debug information about CLI argument parsing and exit.
+    #[educe(Debug(ignore))]
+    #[arg(long, hide = true, env = "VLLM_RS_DEBUG_CLI")]
+    pub debug_cli: bool,
+
+    /// Shared frontend arguments.
+    #[command(flatten)]
+    pub runtime: SharedRuntimeArgs,
+
+    /// Managed Python headless-engine arguments.
+    #[command(flatten)]
+    pub managed_engine: ManagedEngineArgs,
+}
+
+impl ServeArgs {
+    /// Build the OpenAI-server runtime config used after the managed Python
+    /// engine starts.
+    pub fn to_frontend_config(&self, handshake_address: String) -> Config {
+        // Prefer IPC sockets for local engine input/output.
+        let (local_input_address, local_output_address) =
+            self.managed_engine.frontend_local_only().then(frontend_ipc_addresses).unzip();
+        let listener_mode = match &self.uds {
+            Some(path) => HttpListenerMode::BindUnix { path: path.clone() },
+            None => HttpListenerMode::BindTcp {
+                host: self.host.clone(),
+                port: self.port,
+            },
+        };
+
+        self.runtime.clone().into_managed_config(
+            listener_mode,
+            handshake_address,
+            self.managed_engine.handshake_host.clone(),
+            self.managed_engine.data_parallel_size,
+            local_input_address,
+            local_output_address,
+        )
+    }
+
+    /// Build the managed Python-engine spawn configuration with the given
+    /// handshake port.
+    pub fn to_managed_engine_config(&self, handshake_port: u16) -> ManagedEngineConfig {
+        self.managed_engine.clone().into_config(
+            self.runtime.model.clone(),
+            self.runtime.max_model_len,
+            handshake_port,
+        )
+    }
+}
+
+/// Allocate fresh IPC endpoints for one managed frontend instance.
+fn frontend_ipc_addresses() -> (String, String) {
+    let preferred_base_path = std::env::var_os("VLLM_RPC_BASE_PATH")
+        .map(PathBuf::from)
+        .unwrap_or_else(std::env::temp_dir);
+    let input_name = format!("vllm-rs-i-{}", Uuid::new_v4().simple());
+    let output_name = format!("vllm-rs-o-{}", Uuid::new_v4().simple());
+
+    let input = preferred_base_path.join(input_name);
+    let output = preferred_base_path.join(output_name);
+
+    (
+        format!("ipc://{}", input.to_string_lossy()),
+        format!("ipc://{}", output.to_string_lossy()),
+    )
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/cmd/src/cli/tests.rs b/rust/src/cmd/src/cli/tests.rs
new file mode 100644
index 000000000000..0762468456e3
--- /dev/null
+++ b/rust/src/cmd/src/cli/tests.rs
@@ -0,0 +1,905 @@
+use expect_test::expect;
+use vllm_engine_core_client::TransportMode;
+use vllm_server::{Config, HttpListenerMode, ParserSelection, RendererSelection};
+
+use super::{Cli, Command};
+
+#[test]
+fn serve_args_forward_python_flags_with_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--python",
+        "../vllm/.venv/bin/python",
+        "--max-model-len",
+        "512",
+        "--",
+        "--dtype",
+        "float16",
+    ])
+    .unwrap();
+
+    expect![[r#"
+        Cli {
+            command: Serve(
+                ServeArgs {
+                    headless: false,
+                    host: "127.0.0.1",
+                    port: 8000,
+                    uds: None,
+                    runtime: SharedRuntimeArgs {
+                        model: "Qwen/Qwen3-0.6B",
+                        engine_ready_timeout_secs: 600,
+                        tool_call_parser: Auto,
+                        reasoning_parser: Auto,
+                        renderer: Auto,
+                        max_model_len: Some(
+                            512,
+                        ),
+                        grpc_port: None,
+                        shutdown_timeout: 0,
+                        chat_template: None,
+                        default_chat_template_kwargs: None,
+                        chat_template_content_format: Auto,
+                        enable_log_requests: false,
+                        disable_log_stats: false,
+                        served_model_name: [],
+                    },
+                    managed_engine: ManagedEngineArgs {
+                        python: "../vllm/.venv/bin/python",
+                        handshake_host: "127.0.0.1",
+                        handshake_port: None,
+                        data_parallel_size: 1,
+                        data_parallel_size_local: None,
+                        python_args: [
+                            "--dtype",
+                            "float16",
+                        ],
+                    },
+                },
+            ),
+        }
+    "#]]
+    .assert_debug_eq(&cli);
+}
+
+#[test]
+fn serve_args_auto_forward_python_flags_without_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--python",
+        "python3",
+        "--quantization",
+        "awq",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["--quantization", "awq"]
+    );
+}
+
+#[test]
+fn serve_args_auto_forward_python_multi_char_alias_without_separator() {
+    let cli = Cli::try_parse_from(["vllm-rs", "serve", "Qwen/Qwen3-0.6B", "-tp", "2"]).unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["--tensor-parallel-size", "2"]
+    );
+}
+
+#[test]
+fn serve_args_accept_explicit_deepseek_v32_renderer() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--tokenizer-mode",
+        "deepseek_v32",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(args.runtime.renderer, RendererSelection::DeepSeekV32);
+}
+
+#[test]
+fn serve_args_reject_unknown_renderer_value() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--tokenizer-mode",
+        "definitely_missing",
+    ])
+    .unwrap_err();
+
+    expect![[r#"
+        error: invalid value 'definitely_missing' for '--tokenizer-mode <RENDERER>': unknown renderer `definitely_missing` (expected one of: auto, hf, deepseek_v32, deepseek_v4)
+
+        For more information, try '--help'.
+    "#]]
+    .assert_eq(&error.to_string());
+}
+
+#[test]
+fn serve_args_reject_unsupported_flag_arg() {
+    let error = Cli::try_parse_from(["vllm-rs", "serve", "Qwen/Qwen3-0.6B", "--allow-credentials"])
+        .unwrap_err();
+
+    expect![[r#"
+        error: invalid value 'true' for '--allow-credentials [<ALLOW_CREDENTIALS>]': argument is not implemented in Rust frontend yet
+
+        Remove this unsupported argument to continue.
+
+        Alternatively, if you intend to pass it only to the Python engine, put it after `--` (e.g., `-- <arg>`).
+        This may lead to unexpected behavior as the Rust frontend will completely ignore that argument.
+
+        For more information, try '--help'.
+    "#]]
+    .assert_eq(&error.to_string());
+}
+
+#[test]
+fn serve_args_reject_unsupported_no_flag_alias() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--no-enable-log-deltas",
+    ])
+    .unwrap_err();
+
+    expect![[r#"
+        error: invalid value 'true' for '--enable-log-deltas [<ENABLE_LOG_DELTAS>]': argument is not implemented in Rust frontend yet
+
+        Remove this unsupported argument to continue.
+
+        Alternatively, if you intend to pass it only to the Python engine, put it after `--` (e.g., `-- <arg>`).
+        This may lead to unexpected behavior as the Rust frontend will completely ignore that argument.
+
+        For more information, try '--help'.
+    "#]]
+    .assert_eq(&error.to_string());
+}
+
+#[test]
+fn frontend_args_accept_json() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--coordinator-address",
+        "tcp://127.0.0.1:7000",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","engine_count":2}"#,
+    ])
+    .unwrap();
+
+    expect![[r#"
+        Cli {
+            command: Frontend(
+                FrontendArgs {
+                    listen_fd: 3,
+                    input_address: "ipc:///tmp/input.sock",
+                    output_address: "ipc:///tmp/output.sock",
+                    coordinator_address: Some(
+                        "tcp://127.0.0.1:7000",
+                    ),
+                    engine_count: 1,
+                    runtime: SharedRuntimeArgs {
+                        model: "Qwen/Qwen3-0.6B",
+                        engine_ready_timeout_secs: 600,
+                        tool_call_parser: Auto,
+                        reasoning_parser: Auto,
+                        renderer: Auto,
+                        max_model_len: None,
+                        grpc_port: None,
+                        shutdown_timeout: 0,
+                        chat_template: None,
+                        default_chat_template_kwargs: None,
+                        chat_template_content_format: Auto,
+                        enable_log_requests: false,
+                        disable_log_stats: false,
+                        served_model_name: [],
+                    },
+                },
+            ),
+        }
+    "#]]
+    .assert_debug_eq(&cli);
+}
+
+#[test]
+fn frontend_args_json_applies_defaults() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B"}"#,
+    ])
+    .unwrap();
+
+    let Command::Frontend(args) = cli.command else {
+        panic!("expected frontend args");
+    };
+    assert_eq!(args.runtime.model, "Qwen/Qwen3-0.6B");
+    assert_eq!(args.runtime.engine_ready_timeout_secs, 600);
+    assert_eq!(args.runtime.tool_call_parser, ParserSelection::Auto);
+    assert_eq!(args.runtime.reasoning_parser, ParserSelection::Auto);
+    assert_eq!(args.runtime.renderer, RendererSelection::Auto);
+    assert_eq!(args.runtime.max_model_len, None);
+    assert_eq!(args.runtime.shutdown_timeout, 0);
+}
+
+#[test]
+fn frontend_args_json_accepts_supported_non_default_fields() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","engine_ready_timeout_secs":42,"tool_call_parser":"hermes","reasoning_parser":"qwen3_thinking","tokenizer_mode":"deepseek_v32","max_model_len":8192,"shutdown_timeout":3}"#,
+    ])
+    .unwrap();
+
+    let Command::Frontend(args) = cli.command else {
+        panic!("expected frontend args");
+    };
+    assert_eq!(args.runtime.engine_ready_timeout_secs, 42);
+    assert_eq!(
+        args.runtime.tool_call_parser,
+        ParserSelection::Explicit("hermes".to_string())
+    );
+    assert_eq!(
+        args.runtime.reasoning_parser,
+        ParserSelection::Explicit("qwen3_thinking".to_string())
+    );
+    assert_eq!(args.runtime.renderer, RendererSelection::DeepSeekV32);
+    assert_eq!(args.runtime.max_model_len, Some(8192));
+    assert_eq!(args.runtime.shutdown_timeout, 3);
+}
+
+#[test]
+fn serve_args_accept_none_reasoning_parser() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--reasoning-parser",
+        "none",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(args.runtime.reasoning_parser, ParserSelection::None);
+    assert_eq!(args.runtime.tool_call_parser, ParserSelection::Auto);
+}
+
+#[test]
+fn frontend_args_json_ignores_unknown_fields() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","uds":"/tmp/vllm.sock","nested_unknown":{"x":1}}"#,
+    ])
+    .unwrap();
+
+    let Command::Frontend(args) = cli.command else {
+        panic!("expected frontend args");
+    };
+    assert_eq!(args.runtime.model, "Qwen/Qwen3-0.6B");
+}
+
+#[test]
+fn frontend_args_json_accepts_noop_fields() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","api_server_count":2}"#,
+    ])
+    .unwrap();
+
+    let Command::Frontend(args) = cli.command else {
+        panic!("expected frontend args");
+    };
+    assert_eq!(args.runtime.model, "Qwen/Qwen3-0.6B");
+}
+
+#[test]
+fn frontend_args_json_rejects_unsupported_fields() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","allow_credentials":true}"#,
+    ])
+    .unwrap_err();
+
+    expect![[r#"
+        error: invalid value '{"model_tag":"Qwen/Qwen3-0.6B","allow_credentials":true}' for '--args-json <JSON>': 
+        The following arguments are not implemented in Rust frontend yet:
+        - allow_credentials
+
+        Remove these arguments to continue.
+
+        For more information, try '--help'.
+    "#]].assert_eq(&error.to_string());
+}
+
+#[test]
+fn frontend_args_json_aggregates_multiple_unsupported_fields() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B","allow_credentials":true,"api_key":"secret"}"#,
+    ])
+    .unwrap_err();
+
+    expect![[r#"
+        error: invalid value '{"model_tag":"Qwen/Qwen3-0.6B","allow_credentials":true,"api_key":"secret"}' for '--args-json <JSON>': 
+        The following arguments are not implemented in Rust frontend yet:
+        - allow_credentials
+        - api_key
+
+        Remove these arguments to continue.
+
+        For more information, try '--help'.
+    "#]].assert_eq(&error.to_string());
+}
+
+#[test]
+fn frontend_args_json_rejects_malformed_json() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B""#,
+    ])
+    .unwrap_err();
+
+    expect![[r#"
+        error: invalid value '{"model_tag":"Qwen/Qwen3-0.6B"' for '--args-json <JSON>': invalid JSON arguments: EOF while parsing an object at line 1 column 30
+
+        For more information, try '--help'.
+    "#]].assert_eq(&error.to_string());
+}
+
+#[test]
+fn serve_args_reject_flags_before_model() {
+    let error = Cli::try_parse_from(["vllm-rs", "serve", "--python", "python3", "Qwen/Qwen3-0.6B"])
+        .unwrap_err();
+
+    expect![[r#"
+            error: the model must appear immediately after the command
+
+            Usage: vllm-rs serve <MODEL> [OPTIONS] [-- <PYTHON_ARGS>...]
+
+            For more information, try '--help'.
+        "#]]
+    .assert_eq(&error.to_string());
+}
+
+#[test]
+fn serve_args_accept_headless_mode() {
+    let cli = Cli::try_parse_from(["vllm-rs", "serve", "Qwen/Qwen3-0.6B", "--headless"]).unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert!(args.headless);
+}
+
+#[test]
+fn serve_args_keep_python_passthrough_flags_after_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--python",
+        "python3",
+        "--",
+        "--tensor-parallel-size",
+        "2",
+        "--dtype",
+        "float16",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["--tensor-parallel-size", "2", "--dtype", "float16"]
+    );
+}
+
+#[test]
+fn serve_args_keep_python_multi_char_alias_after_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--python",
+        "python3",
+        "--",
+        "-tp",
+        "2",
+        "--dtype",
+        "float16",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["-tp", "2", "--dtype", "float16"]
+    );
+}
+
+#[test]
+fn serve_args_keep_frontend_arg_after_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--",
+        "--uds",
+        "/tmp/vllm.sock",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["--uds", "/tmp/vllm.sock"]
+    );
+}
+
+#[test]
+fn serve_args_keep_python_multi_char_engine_aliases_after_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--",
+        "-dpr",
+        "1",
+        "-dpl",
+        "2",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["-dpr", "1", "-dpl", "2"]
+    );
+}
+
+#[test]
+fn serve_args_auto_forward_unknown_flags_without_separator() {
+    let cli = Cli::try_parse_from(["vllm-rs", "serve", "Qwen/Qwen3-0.6B", "--foo", "bar"]).unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(args.managed_engine.python_args, vec!["--foo", "bar"]);
+}
+
+#[test]
+fn serve_args_auto_forward_negative_value_without_separator() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--num-gpu-blocks-override",
+        "-1",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert_eq!(
+        args.managed_engine.python_args,
+        vec!["--num-gpu-blocks-override", "-1"]
+    );
+}
+
+#[test]
+fn serve_args_accept_handshake_aliases() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--python",
+        "python3",
+        "--handshake-host",
+        "10.99.48.128",
+        "--handshake-port",
+        "13345",
+        "--data-parallel-size",
+        "4",
+    ])
+    .unwrap();
+
+    expect![[r#"
+        Cli {
+            command: Serve(
+                ServeArgs {
+                    headless: false,
+                    host: "127.0.0.1",
+                    port: 8000,
+                    uds: None,
+                    runtime: SharedRuntimeArgs {
+                        model: "Qwen/Qwen3-0.6B",
+                        engine_ready_timeout_secs: 600,
+                        tool_call_parser: Auto,
+                        reasoning_parser: Auto,
+                        renderer: Auto,
+                        max_model_len: None,
+                        grpc_port: None,
+                        shutdown_timeout: 0,
+                        chat_template: None,
+                        default_chat_template_kwargs: None,
+                        chat_template_content_format: Auto,
+                        enable_log_requests: false,
+                        disable_log_stats: false,
+                        served_model_name: [],
+                    },
+                    managed_engine: ManagedEngineArgs {
+                        python: "python3",
+                        handshake_host: "10.99.48.128",
+                        handshake_port: Some(
+                            13345,
+                        ),
+                        data_parallel_size: 4,
+                        data_parallel_size_local: None,
+                        python_args: [],
+                    },
+                },
+            ),
+        }
+    "#]]
+    .assert_debug_eq(&cli);
+}
+
+#[test]
+fn serve_args_accept_data_parallel_primary_flags() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--data-parallel-address",
+        "10.99.48.128",
+        "--data-parallel-rpc-port",
+        "13345",
+        "--data-parallel-size",
+        "4",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    assert!(!args.headless);
+    assert_eq!(args.managed_engine.handshake_host, "10.99.48.128");
+    assert_eq!(args.managed_engine.handshake_port, Some(13345));
+    assert_eq!(args.managed_engine.data_parallel_size, 4);
+}
+
+#[test]
+fn serve_frontend_config_uses_dp_address_as_advertised_host() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--handshake-host",
+        "10.99.48.128",
+        "--data-parallel-size",
+        "4",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    let config = args.to_frontend_config("tcp://10.99.48.128:29550".to_string());
+
+    let TransportMode::HandshakeOwner {
+        handshake_address,
+        advertised_host,
+        engine_count,
+        ready_timeout,
+        local_input_address,
+        local_output_address,
+    } = &config.transport_mode
+    else {
+        panic!("expected handshake-owned transport");
+    };
+
+    assert_eq!(handshake_address, "tcp://10.99.48.128:29550");
+    assert_eq!(advertised_host, "10.99.48.128");
+    assert_eq!(*engine_count, 4);
+    assert!(
+        local_input_address
+            .as_deref()
+            .is_some_and(|address| address.starts_with("ipc://"))
+    );
+    assert!(
+        local_output_address
+            .as_deref()
+            .is_some_and(|address| address.starts_with("ipc://"))
+    );
+    assert_ne!(local_input_address, local_output_address);
+
+    expect![[r#"
+        Config {
+            transport_mode: HandshakeOwner {
+                handshake_address: "tcp://10.99.48.128:29550",
+                advertised_host: "10.99.48.128",
+                engine_count: 4,
+                ready_timeout: 600s,
+                local_input_address: Some(
+                    "<ipc input>",
+                ),
+                local_output_address: Some(
+                    "<ipc output>",
+                ),
+            },
+            coordinator_mode: MaybeInProc,
+            model: "Qwen/Qwen3-0.6B",
+            served_model_name: [],
+            listener_mode: BindTcp {
+                host: "127.0.0.1",
+                port: 8000,
+            },
+            tool_call_parser: Auto,
+            reasoning_parser: Auto,
+            renderer: Auto,
+            chat_template: None,
+            default_chat_template_kwargs: None,
+            chat_template_content_format: Auto,
+            enable_log_requests: false,
+            disable_log_stats: false,
+            grpc_port: None,
+            shutdown_timeout: 0ns,
+        }
+    "#]]
+    .assert_debug_eq(&Config {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: handshake_address.clone(),
+            advertised_host: advertised_host.clone(),
+            engine_count: *engine_count,
+            ready_timeout: *ready_timeout,
+            local_input_address: Some("<ipc input>".to_string()),
+            local_output_address: Some("<ipc output>".to_string()),
+        },
+        ..config.clone()
+    });
+}
+
+#[test]
+fn serve_frontend_config_keeps_tcp_transport_for_non_local_only_topology() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--data-parallel-address",
+        "10.99.48.128",
+        "--data-parallel-size",
+        "4",
+        "--data-parallel-size-local",
+        "2",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    let config = args.to_frontend_config("tcp://10.99.48.128:29550".to_string());
+
+    expect![[r#"
+        Config {
+            transport_mode: HandshakeOwner {
+                handshake_address: "tcp://10.99.48.128:29550",
+                advertised_host: "10.99.48.128",
+                engine_count: 4,
+                ready_timeout: 600s,
+                local_input_address: None,
+                local_output_address: None,
+            },
+            coordinator_mode: MaybeInProc,
+            model: "Qwen/Qwen3-0.6B",
+            served_model_name: [],
+            listener_mode: BindTcp {
+                host: "127.0.0.1",
+                port: 8000,
+            },
+            tool_call_parser: Auto,
+            reasoning_parser: Auto,
+            renderer: Auto,
+            chat_template: None,
+            default_chat_template_kwargs: None,
+            chat_template_content_format: Auto,
+            enable_log_requests: false,
+            disable_log_stats: false,
+            grpc_port: None,
+            shutdown_timeout: 0ns,
+        }
+    "#]]
+    .assert_debug_eq(&config);
+}
+
+#[test]
+fn frontend_args_reject_legacy_handshake_flags() {
+    let error = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B"}"#,
+        "--handshake-address",
+        "tcp://127.0.0.1:62100",
+    ])
+    .unwrap_err();
+
+    assert!(error.to_string().contains("--handshake-address"));
+}
+
+#[test]
+fn frontend_config_uses_external_coordinator_when_coordinator_address_is_present() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "frontend",
+        "--listen-fd",
+        "3",
+        "--input-address",
+        "ipc:///tmp/input.sock",
+        "--output-address",
+        "ipc:///tmp/output.sock",
+        "--coordinator-address",
+        "tcp://127.0.0.1:7000",
+        "--engine-count",
+        "2",
+        "--args-json",
+        r#"{"model_tag":"Qwen/Qwen3-0.6B"}"#,
+    ])
+    .unwrap();
+
+    let Command::Frontend(args) = cli.command else {
+        panic!("expected frontend args");
+    };
+    let config = args.into_config();
+
+    expect![[r#"
+        Config {
+            transport_mode: Bootstrapped {
+                input_address: "ipc:///tmp/input.sock",
+                output_address: "ipc:///tmp/output.sock",
+                engine_count: 2,
+                ready_timeout: 600s,
+            },
+            coordinator_mode: External {
+                address: "tcp://127.0.0.1:7000",
+            },
+            model: "Qwen/Qwen3-0.6B",
+            served_model_name: [],
+            listener_mode: InheritedFd {
+                fd: 3,
+            },
+            tool_call_parser: Auto,
+            reasoning_parser: Auto,
+            renderer: Auto,
+            chat_template: None,
+            default_chat_template_kwargs: None,
+            chat_template_content_format: Auto,
+            enable_log_requests: false,
+            disable_log_stats: false,
+            grpc_port: None,
+            shutdown_timeout: 0ns,
+        }
+    "#]]
+    .assert_debug_eq(&config);
+}
+
+#[test]
+fn serve_frontend_config_uses_unix_listener_when_uds_is_present() {
+    let cli = Cli::try_parse_from([
+        "vllm-rs",
+        "serve",
+        "Qwen/Qwen3-0.6B",
+        "--uds",
+        "/tmp/vllm.sock",
+    ])
+    .unwrap();
+
+    let Command::Serve(args) = cli.command else {
+        panic!("expected serve args");
+    };
+    let config = args.to_frontend_config("tcp://127.0.0.1:29550".to_string());
+
+    assert_eq!(
+        config.listener_mode,
+        HttpListenerMode::BindUnix {
+            path: "/tmp/vllm.sock".to_string(),
+        }
+    );
+}
diff --git a/rust/src/cmd/src/cli/unsupported.rs b/rust/src/cmd/src/cli/unsupported.rs
new file mode 100644
index 000000000000..eeaa08328889
--- /dev/null
+++ b/rust/src/cmd/src/cli/unsupported.rs
@@ -0,0 +1,660 @@
+#![allow(clippy::doc_lazy_continuation)]
+
+use std::fmt::Display;
+use std::str::FromStr;
+
+use clap::Args;
+use clap::builder::{TypedValueParser, ValueParserFactory};
+use itertools::Itertools;
+use serde::{Deserialize, Deserializer, Serialize};
+
+/// Marker type for frontend-owned `serve` arguments that `vllm-rs` recognizes
+/// but does not support yet.
+///
+/// When passed as JSON args, it can be deserialized from any value, and
+/// serializes back to the original value.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct Unsupported(pub serde_json::Value);
+
+impl FromStr for Unsupported {
+    type Err = String;
+
+    fn from_str(_s: &str) -> Result<Self, Self::Err> {
+        Err("argument is not implemented in Rust frontend yet
+
+Remove this unsupported argument to continue.
+
+Alternatively, if you intend to pass it only to the Python engine, put it after `--` (e.g., `-- <arg>`).
+This may lead to unexpected behavior as the Rust frontend will completely ignore that argument."
+            .to_string())
+    }
+}
+
+/// Marker type for no-op arguments that are accepted by the Rust frontend but
+/// have no effect.
+///
+/// When passed as JSON args, it can be deserialized from any value, but always
+/// serializes back to `null`.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
+pub struct Noop;
+
+impl<'de> Deserialize<'de> for Noop {
+    fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        Ok(Noop)
+    }
+}
+
+impl ValueParserFactory for Noop {
+    type Parser = NoopValueParser;
+
+    fn value_parser() -> Self::Parser {
+        NoopValueParser
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct NoopValueParser;
+
+#[track_caller]
+fn noop_warn(arg: impl Display) {
+    tracing::warn!("argument '{arg}' currently has no effect in Rust frontend, ignoring");
+}
+
+impl TypedValueParser for NoopValueParser {
+    type Value = Noop;
+
+    fn parse_ref(
+        &self,
+        _cmd: &clap::Command,
+        arg: Option<&clap::Arg>,
+        _value: &std::ffi::OsStr,
+    ) -> Result<Self::Value, clap::Error> {
+        if let Some(arg) = arg {
+            noop_warn(arg);
+        }
+        Ok(Noop)
+    }
+}
+
+/// Frontend-owned Python `serve` arguments that `vllm-rs` recognizes but does
+/// not support yet.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Eq, Default, Args, Serialize, Deserialize)]
+#[command(next_help_heading = "Options not implemented in Rust frontend yet")]
+pub struct UnsupportedArgs {
+    #[command(flatten)]
+    #[serde(default, flatten)]
+    top_level: TopLevelUnsupportedArgs,
+    #[command(flatten)]
+    #[serde(default, flatten)]
+    engine: EngineUnsupportedArgs,
+    #[command(flatten)]
+    #[serde(default, flatten)]
+    server: ServerUnsupportedArgs,
+}
+
+impl UnsupportedArgs {
+    /// Check whether any unsupported arguments are set, and if so, return an
+    /// error listing them. Also warn about any no-op arguments that are set
+    /// but will be ignored.
+    pub(crate) fn check(&self) -> Result<(), String> {
+        let value = serde_json::to_value(self).unwrap();
+        let map = value.as_object().unwrap();
+        let mut unsupported = Vec::new();
+
+        for (key, value) in map {
+            if value.is_null() {
+                noop_warn(key);
+            } else {
+                unsupported.push(key.as_str());
+            }
+        }
+
+        if !unsupported.is_empty() {
+            unsupported.sort_unstable();
+            let bullets = unsupported.into_iter().map(|key| format!("- {key}")).join("\n");
+            return Err(format!(
+                "
+The following arguments are not implemented in Rust frontend yet:
+{bullets}
+
+Remove these arguments to continue."
+            ));
+        }
+
+        Ok(())
+    }
+}
+
+/// Frontend-owned Python `vllm serve` top-level arguments that `vllm-rs`
+/// recognizes but does not support yet.
+///
+/// Source of truth in Python vLLM:
+/// - `vllm.entrypoints.openai.cli_args.make_arg_parser(...)`
+/// - `vllm.entrypoints.cli.serve.ServeSubcommand.subparser_init(...)`
+///
+/// These are not part of `EngineArgs`, `AsyncEngineArgs`, `BaseFrontendArgs`,
+/// or `FrontendArgs`. They live on the `serve` command itself and control
+/// managed-engine / multi-process orchestration rather than the shared frontend
+/// runtime config.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Eq, Default, Args, Serialize, Deserialize)]
+pub struct TopLevelUnsupportedArgs {
+    /// How many API server processes to run. Defaults to data_parallel_size if
+    /// not specified.
+    #[arg(long, hide = true)]
+    pub api_server_count: Option<Noop>,
+
+    /// Read CLI options from a config file. Must be a YAML with the following
+    /// options: https://docs.vllm.ai/en/latest/configuration/serve_args.html
+    #[arg(long)]
+    pub config: Option<Unsupported>,
+
+    /// Launch a gRPC server instead of the HTTP OpenAI-compatible server.
+    /// Requires: pip install vllm[grpc].
+    #[arg(long, default_missing_value = "true", num_args = 0..=1)]
+    pub grpc: Option<Unsupported>,
+}
+
+/// Frontend-owned Python engine arguments that `vllm-rs` recognizes but does
+/// not support yet.
+///
+/// Source of truth in Python vLLM:
+/// - `vllm.engine.arg_utils.EngineArgs.add_cli_args(...)`
+/// - `vllm.engine.arg_utils.AsyncEngineArgs.add_cli_args(...)`
+///
+/// These arguments are declared through the Python engine-args surface, but
+/// they are still frontend-owned: the API server / AsyncLLM layer reads them
+/// for tokenizer setup, request validation, routing, logging, and other
+/// frontend behavior, so Rust must recognize them rather than treating them as
+/// pure engine passthrough.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Eq, Default, Args, Serialize, Deserialize)]
+pub struct EngineUnsupportedArgs {
+    /// Name or path of the Hugging Face tokenizer to use. If unspecified, model
+    /// name or path will be used.
+    #[arg(long)]
+    pub tokenizer: Option<Unsupported>,
+
+    /// Name or path of the Hugging Face config to use. If unspecified, model
+    /// name or path will be used.
+    #[arg(long)]
+    pub hf_config_path: Option<Unsupported>,
+
+    /// Allowing API requests to read local images or videos from directories
+    /// specified by the server file system. This is a security risk. Should
+    /// only be enabled in trusted environments.
+    #[arg(long)]
+    pub allowed_local_media_path: Option<Unsupported>,
+
+    /// If set, only media URLs that belong to this domain can be used for
+    /// multi-modal inputs.
+    #[arg(long)]
+    pub allowed_media_domains: Option<Unsupported>,
+
+    /// The specific revision to use for the tokenizer on the Hugging Face Hub.
+    /// It can be a branch name, a tag name, or a commit id. If unspecified,
+    /// will use the default version.
+    #[arg(long)]
+    pub tokenizer_revision: Option<Unsupported>,
+
+    /// Maximum number of log probabilities to return when `logprobs` is
+    /// specified in `SamplingParams`. The default value comes the default for
+    /// the OpenAI Chat Completions API. -1 means no cap, i.e. all
+    /// (output_length * vocab_size) logprobs are allowed to be returned and
+    /// it may cause OOM.
+    #[arg(long)]
+    pub max_logprobs: Option<Unsupported>,
+
+    /// Skip initialization of tokenizer and detokenizer. Expects valid
+    /// `prompt_token_ids` and `None` for prompt from the input. The generated
+    /// output will contain token ids.
+    #[arg(
+        long,
+        visible_alias = "no-skip-tokenizer-init",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub skip_tokenizer_init: Option<Unsupported>,
+
+    /// If `True`, enables passing text embeddings as inputs via the
+    /// `prompt_embeds` key.
+    ///
+    /// WARNING: The vLLM engine may crash if incorrect shape of embeddings is
+    /// passed. Only enable this flag for trusted users!
+    #[arg(
+        long,
+        visible_alias = "no-enable-prompt-embeds",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_prompt_embeds: Option<Unsupported>,
+
+    /// The token to use as HTTP bearer authorization for remote files. If
+    /// `True`, will use the token generated when running `hf auth login`
+    /// (stored in `~/.cache/huggingface/token`).
+    #[arg(long, default_missing_value = "true", num_args = 0..=1)]
+    pub hf_token: Option<Unsupported>,
+
+    /// If a dictionary, contains arguments to be forwarded to the Hugging Face
+    /// config. If a callable, it is called to update the HuggingFace config.
+    #[arg(long)]
+    pub hf_overrides: Option<Unsupported>,
+
+    /// The folder path to the generation config. Defaults to `"auto"`, the
+    /// generation config will be loaded from model path. If set to `"vllm"`, no
+    /// generation config is loaded, vLLM defaults will be used. If set to a
+    /// folder path, the generation config will be loaded from the specified
+    /// folder path. If `max_new_tokens` is specified in generation config,
+    /// then it sets a server-wide limit on the number of output tokens for
+    /// all requests.
+    #[arg(long)]
+    pub generation_config: Option<Unsupported>,
+
+    /// IOProcessor plugin name to load at model startup
+    #[arg(long)]
+    pub io_processor_plugin: Option<Unsupported>,
+
+    /// Path to a dynamically reasoning parser plugin that can be dynamically
+    /// loaded and registered.
+    #[arg(long)]
+    pub reasoning_parser_plugin: Option<Unsupported>,
+
+    /// Rank of the data parallel group.
+    #[arg(long, env = "VLLM_DP_RANK")]
+    pub data_parallel_rank: Option<Unsupported>,
+
+    /// Whether to use "hybrid" DP LB mode. Applies only to online serving
+    /// and when data_parallel_size > 0. Enables running an AsyncLLM
+    /// and API server on a "per-node" basis where vLLM load balances
+    /// between local data parallel ranks, but an external LB balances
+    /// between vLLM nodes/replicas. Set explicitly in conjunction with
+    /// --data-parallel-start-rank.
+    #[arg(
+        long,
+        visible_alias = "no-data-parallel-hybrid-lb",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub data_parallel_hybrid_lb: Option<Unsupported>,
+
+    /// Whether to use "external" DP LB mode. Applies only to online serving
+    /// and when data_parallel_size > 0. This is useful for a "one-pod-per-rank"
+    /// wide-EP setup in Kubernetes. Set implicitly when --data-parallel-rank
+    /// is provided explicitly to vllm serve.
+    #[arg(
+        long,
+        visible_alias = "no-data-parallel-external-lb",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub data_parallel_external_lb: Option<Unsupported>,
+
+    /// This feature is work in progress and no prefill optimization takes place
+    /// with this flag enabled currently.
+    #[arg(
+        long,
+        visible_alias = "no-kv-sharing-fast-prefill",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub kv_sharing_fast_prefill: Option<Unsupported>,
+
+    /// The maximum number of input items and options allowed per
+    /// prompt for each modality.
+    #[arg(long)]
+    pub limit_mm_per_prompt: Option<Unsupported>,
+
+    /// Additional args passed to process media inputs, keyed by modalities.
+    #[arg(long)]
+    pub media_io_kwargs: Option<Unsupported>,
+
+    /// Arguments to be forwarded to the model's processor for multi-modal data,
+    /// e.g., image processor.
+    #[arg(long)]
+    pub mm_processor_kwargs: Option<Unsupported>,
+
+    /// The size (in GiB) of the multi-modal processor cache.
+    #[arg(long)]
+    pub mm_processor_cache_gb: Option<Unsupported>,
+
+    /// Type of cache to use for the multi-modal preprocessor/mapper.
+    #[arg(long)]
+    pub mm_processor_cache_type: Option<Unsupported>,
+
+    /// If True, enable handling of LoRA adapters.
+    #[arg(
+        long,
+        visible_alias = "no-enable-lora",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_lora: Option<Unsupported>,
+
+    /// Dictionary mapping specific modalities to LoRA model paths.
+    #[arg(long)]
+    pub default_mm_loras: Option<Unsupported>,
+
+    /// Target URL to which OpenTelemetry traces will be sent.
+    #[arg(long)]
+    pub otlp_traces_endpoint: Option<Unsupported>,
+
+    /// It makes sense to set this only if `--otlp-traces-endpoint` is set.
+    #[arg(long)]
+    pub collect_detailed_traces: Option<Unsupported>,
+
+    /// The interval (or buffer size) for streaming in terms of token length.
+    #[arg(long)]
+    pub stream_interval: Option<Unsupported>,
+
+    /// Structured outputs configuration.
+    #[arg(long)]
+    pub structured_outputs_config: Option<Noop>,
+
+    /// Log aggregate rather than per-engine statistics when using data
+    /// parallelism.
+    #[arg(long, default_missing_value = "true", num_args = 0..=1)]
+    pub aggregate_engine_logging: Option<Unsupported>,
+}
+
+/// Frontend-owned Python OpenAI server arguments that `vllm-rs` recognizes but
+/// does not support yet.
+///
+/// Source of truth in Python vLLM:
+/// - `vllm.entrypoints.openai.cli_args.BaseFrontendArgs`
+/// - `vllm.entrypoints.openai.cli_args.FrontendArgs`
+///
+/// These are not engine args. They belong to the Python OpenAI-compatible
+/// frontend / API-server layer itself, for example chat-template configuration,
+/// tool/frontend behavior, TLS / CORS / HTTP server settings, and other
+/// northbound server knobs.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Eq, Default, Args, Serialize, Deserialize)]
+pub struct ServerUnsupportedArgs {
+    /// LoRA modules configurations in either 'name=path' format or JSON format
+    /// or JSON list format. Example (old format): `'name=path'` Example (new
+    /// format): `{"name": "name", "path": "lora_path",
+    /// "base_model_name": "id"}`
+    #[arg(long)]
+    pub lora_modules: Option<Unsupported>,
+
+    /// Whether to trust the chat template provided in the request. If False,
+    /// the server will always use the chat template specified by
+    /// `--chat-template` or the ones from tokenizer.
+    #[arg(
+        long,
+        visible_alias = "no-trust-request-chat-template",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub trust_request_chat_template: Option<Unsupported>,
+
+    /// The role name to return if `request.add_generation_prompt=true`.
+    #[arg(long)]
+    pub response_role: Option<Unsupported>,
+
+    /// When `--max-logprobs` is specified, represents single tokens as
+    /// strings of the form 'token_id:{token_id}' so that tokens that are not
+    /// JSON-encodable can be identified.
+    #[arg(
+        long,
+        visible_alias = "no-return-tokens-as-token-ids",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub return_tokens_as_token_ids: Option<Unsupported>,
+
+    /// Enable auto tool choice for supported models. Use `--tool-call-parser`
+    /// to specify which parser to use.
+    #[arg(
+        long,
+        visible_alias = "no-enable-auto-tool-choice",
+        default_missing_value = "true",
+        num_args = 0..=1,
+        hide = true
+    )]
+    pub enable_auto_tool_choice: Option<Noop>,
+
+    /// If specified, exclude tool definitions in prompts when
+    /// tool_choice='none'.
+    #[arg(
+        long,
+        visible_alias = "no-exclude-tools-when-tool-choice-none",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub exclude_tools_when_tool_choice_none: Option<Unsupported>,
+
+    /// Special the tool parser plugin write to parse the model-generated tool
+    /// into OpenAI API format, the name register in this plugin can be used in
+    /// `--tool-call-parser`.
+    #[arg(long)]
+    pub tool_parser_plugin: Option<Unsupported>,
+
+    /// Comma-separated list of host:port pairs (IPv4, IPv6, or hostname).
+    /// Examples: 127.0.0.1:8000, [::1]:8000, localhost:1234. Or `demo` for
+    /// built-in demo tools (browser and Python code interpreter). WARNING:
+    /// The `demo` Python tool executes model-generated code in Docker without
+    /// network isolation by default. See the security guide for more
+    /// information.
+    #[arg(long)]
+    pub tool_server: Option<Unsupported>,
+
+    /// Path to logging config JSON file for both vllm and uvicorn
+    #[arg(long, /* env = "VLLM_LOGGING_CONFIG_PATH" */)]
+    pub log_config_file: Option<Unsupported>,
+
+    /// Max number of prompt characters or prompt ID numbers being printed in
+    /// log. The default of None means unlimited.
+    #[arg(long)]
+    pub max_log_len: Option<Unsupported>,
+
+    /// If set to True, enable prompt_tokens_details in usage.
+    #[arg(
+        long,
+        visible_alias = "no-enable-prompt-tokens-details",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_prompt_tokens_details: Option<Unsupported>,
+
+    /// If set to True, enable tracking server_load_metrics in the app state.
+    #[arg(
+        long,
+        visible_alias = "no-enable-server-load-tracking",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_server_load_tracking: Option<Noop>,
+
+    /// If set to True, including usage on every request.
+    #[arg(
+        long,
+        visible_alias = "no-enable-force-include-usage",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_force_include_usage: Option<Unsupported>,
+
+    /// Enable the `/tokenizer_info` endpoint. May expose chat
+    /// templates and other tokenizer configuration.
+    #[arg(
+        long,
+        visible_alias = "no-enable-tokenizer-info-endpoint",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_tokenizer_info_endpoint: Option<Unsupported>,
+
+    /// If set to True, log model outputs (generations).
+    /// Requires `--enable-log-requests`. As with `--enable-log-requests`,
+    /// information is only logged at INFO level at maximum.
+    #[arg(
+        long,
+        visible_alias = "no-enable-log-outputs",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_log_outputs: Option<Unsupported>,
+
+    /// If set to False, output deltas will not be logged. Relevant only if
+    /// --enable-log-outputs is set.
+    #[arg(
+        long,
+        visible_alias = "no-enable-log-deltas",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_log_deltas: Option<Unsupported>,
+
+    /// If set to True, log the stack trace of error responses
+    #[arg(
+        long,
+        // env = "VLLM_SERVER_DEV_MODE",
+        visible_alias = "no-log-error-stack",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub log_error_stack: Option<Unsupported>,
+
+    /// If set to True, only enable the Tokens In<>Out endpoint.
+    /// This is intended for use in a Disaggregated Everything setup.
+    #[arg(
+        long,
+        visible_alias = "no-tokens-only",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub tokens_only: Option<Unsupported>,
+
+    /// Log level for uvicorn.
+    #[arg(long)]
+    pub uvicorn_log_level: Option<Unsupported>,
+
+    /// Disable uvicorn access log.
+    #[arg(
+        long,
+        visible_alias = "no-disable-uvicorn-access-log",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub disable_uvicorn_access_log: Option<Noop>,
+
+    /// Comma-separated list of endpoint paths to exclude from uvicorn access
+    /// logs. This is useful to reduce log noise from high-frequency endpoints
+    /// like health checks. Example: "/health,/metrics,/ping".
+    /// When set, access logs for requests to these paths will be suppressed
+    /// while keeping logs for other endpoints.
+    #[arg(long)]
+    pub disable_access_log_for_endpoints: Option<Noop>,
+
+    /// Allow credentials.
+    #[arg(
+        long,
+        visible_alias = "no-allow-credentials",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub allow_credentials: Option<Unsupported>,
+
+    /// Allowed origins.
+    #[arg(long)]
+    pub allowed_origins: Option<Unsupported>,
+
+    /// Allowed methods.
+    #[arg(long)]
+    pub allowed_methods: Option<Unsupported>,
+
+    /// Allowed headers.
+    #[arg(long)]
+    pub allowed_headers: Option<Unsupported>,
+
+    /// If provided, the server will require one of these keys to be presented
+    /// in the header.
+    #[arg(long)]
+    pub api_key: Option<Unsupported>,
+
+    /// The file path to the SSL key file.
+    #[arg(long)]
+    pub ssl_keyfile: Option<Unsupported>,
+
+    /// The file path to the SSL cert file.
+    #[arg(long)]
+    pub ssl_certfile: Option<Unsupported>,
+
+    /// The CA certificates file.
+    #[arg(long)]
+    pub ssl_ca_certs: Option<Unsupported>,
+
+    /// Refresh SSL Context when SSL certificate files change
+    #[arg(
+        long,
+        visible_alias = "no-enable-ssl-refresh",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_ssl_refresh: Option<Unsupported>,
+
+    /// Whether client certificate is required (see stdlib ssl module's).
+    #[arg(long)]
+    pub ssl_cert_reqs: Option<Unsupported>,
+
+    /// SSL cipher suites for HTTPS (TLS 1.2 and below only).
+    /// Example: 'ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-CHACHA20-POLY1305'
+    #[arg(long)]
+    pub ssl_ciphers: Option<Unsupported>,
+
+    /// FastAPI root_path when app is behind a path based routing proxy.
+    #[arg(long)]
+    pub root_path: Option<Unsupported>,
+
+    /// Additional ASGI middleware to apply to the app. We accept multiple
+    /// --middleware arguments. The value should be an import path. If a
+    /// function is provided, vLLM will add it to the server using
+    /// `@app.middleware('http')`. If a class is provided, vLLM will
+    /// add it to the server using `app.add_middleware()`.
+    #[arg(long)]
+    pub middleware: Option<Unsupported>,
+
+    /// If specified, API server will add X-Request-Id header to responses.
+    #[arg(
+        long,
+        visible_alias = "no-enable-request-id-headers",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_request_id_headers: Option<Unsupported>,
+
+    /// Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint.
+    #[arg(
+        long,
+        visible_alias = "no-disable-fastapi-docs",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub disable_fastapi_docs: Option<Unsupported>,
+
+    /// Maximum size (bytes) of an incomplete HTTP event (header or body) for
+    /// h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB).
+    #[arg(long)]
+    pub h11_max_incomplete_event_size: Option<Unsupported>,
+
+    /// Maximum number of HTTP headers allowed in a request for h11 parser.
+    /// Helps mitigate header abuse. Default: 256.
+    #[arg(long)]
+    pub h11_max_header_count: Option<Unsupported>,
+
+    /// Enable offline FastAPI documentation for air-gapped environments.
+    /// Uses vendored static assets bundled with vLLM.
+    #[arg(
+        long,
+        visible_alias = "no-enable-offline-docs",
+        default_missing_value = "true",
+        num_args = 0..=1
+    )]
+    pub enable_offline_docs: Option<Unsupported>,
+}
diff --git a/rust/src/cmd/src/logging.rs b/rust/src/cmd/src/logging.rs
new file mode 100644
index 000000000000..aab080c93986
--- /dev/null
+++ b/rust/src/cmd/src/logging.rs
@@ -0,0 +1,328 @@
+use std::{env, fmt, process};
+
+use time::UtcOffset;
+use time::macros::format_description;
+use tracing::level_filters::LevelFilter;
+use tracing::{Event, Level, Subscriber};
+use tracing_subscriber::Layer as _;
+use tracing_subscriber::filter::Targets;
+use tracing_subscriber::fmt::format::{FormatEvent, FormatFields, Writer};
+use tracing_subscriber::fmt::time::FormatTime;
+use tracing_subscriber::fmt::{FmtContext, FormattedFields};
+use tracing_subscriber::layer::SubscriberExt as _;
+use tracing_subscriber::registry::LookupSpan;
+use tracing_subscriber::util::SubscriberInitExt as _;
+
+const CYAN: &str = "\x1b[0;36m";
+const GREY: &str = "\x1b[90m";
+const GREEN: &str = "\x1b[32m";
+const YELLOW: &str = "\x1b[33m";
+const RED: &str = "\x1b[31m";
+const WHITE: &str = "\x1b[37m";
+const RESET: &str = "\x1b[0m";
+const VLLM_TIME_FORMAT: &[time::format_description::FormatItem<'static>] =
+    format_description!("[month]-[day] [hour]:[minute]:[second]");
+
+const PROCESS_LABEL: &str = "RustFrontend";
+
+/// Install the process-wide vLLM-style tracing subscriber for the CLI binary.
+pub(crate) fn init_tracing() {
+    let filter = build_targets_filter(
+        env::var("VLLM_LOGGING_LEVEL").ok().as_deref(),
+        env::var("RUST_LOG").ok().as_deref(),
+    );
+    let formatter = VllmEventFormatter::new();
+
+    let _ = tracing_subscriber::registry()
+        .with(tracing_subscriber::fmt::layer().event_format(formatter).with_filter(filter))
+        .try_init();
+}
+
+/// Build the CLI log filter by merging the vLLM-style default level with
+/// Rust-style target overrides.
+///
+/// Precedence:
+/// - Start from `VLLM_LOGGING_LEVEL` as the default level for all targets.
+/// - If `RUST_LOG` contains a global default level such as `warn`, it overrides
+///   `VLLM_LOGGING_LEVEL`.
+/// - Any explicit target directives in `RUST_LOG`, such as `hyper=info`, override whichever default
+///   level is active for those targets only.
+fn build_targets_filter(vllm_logging_level: Option<&str>, rust_log: Option<&str>) -> Targets {
+    let mut filter =
+        Targets::new().with_default(map_python_log_level(vllm_logging_level.unwrap_or("INFO")));
+
+    if let Some(rust_log) = rust_log
+        && !rust_log.is_empty()
+    {
+        let rust_log_targets: Targets = rust_log.parse().expect("failed to parse `RUST_LOG`");
+        if let Some(default_level) = rust_log_targets.default_level() {
+            filter = filter.with_default(default_level);
+        }
+        filter = filter.with_targets(rust_log_targets);
+    }
+
+    filter
+}
+
+#[derive(Debug, Clone, Copy)]
+struct VllmLocalTimer {
+    local_offset: UtcOffset,
+}
+
+impl Default for VllmLocalTimer {
+    fn default() -> Self {
+        let local_offset = UtcOffset::current_local_offset().unwrap_or(UtcOffset::UTC);
+        Self { local_offset }
+    }
+}
+
+impl FormatTime for VllmLocalTimer {
+    fn format_time(&self, w: &mut Writer<'_>) -> fmt::Result {
+        let now = time::OffsetDateTime::now_utc().to_offset(self.local_offset);
+        let formatted = now.format(VLLM_TIME_FORMAT).map_err(|_| fmt::Error)?;
+        w.write_str(&formatted)
+    }
+}
+
+#[derive(Debug, Clone)]
+struct VllmEventFormatter {
+    prefix: String,
+    timer: VllmLocalTimer,
+}
+
+impl VllmEventFormatter {
+    fn new() -> Self {
+        Self {
+            prefix: format!("({} pid={})", PROCESS_LABEL, process::id()),
+            timer: VllmLocalTimer::default(),
+        }
+    }
+
+    fn write_process_prefix(&self, writer: &mut Writer<'_>, ansi: bool) -> fmt::Result {
+        write_colored(writer, ansi, Some(CYAN), &self.prefix)?;
+        writer.write_char(' ')
+    }
+
+    fn write_level(&self, writer: &mut Writer<'_>, level: &Level, ansi: bool) -> fmt::Result {
+        let (text, color) = match *level {
+            Level::TRACE => ("TRACE", WHITE),
+            Level::DEBUG => ("DEBUG", WHITE),
+            Level::INFO => ("INFO", GREEN),
+            Level::WARN => ("WARNING", YELLOW),
+            Level::ERROR => ("ERROR", RED),
+        };
+        write_colored(writer, ansi, Some(color), text)
+    }
+
+    fn write_timestamp(&self, writer: &mut Writer<'_>, ansi: bool) -> fmt::Result {
+        if ansi {
+            writer.write_str(GREY)?;
+        }
+        if self.timer.format_time(writer).is_err() {
+            writer.write_str("<unknown time>")?;
+        }
+        if ansi {
+            writer.write_str(RESET)?;
+        }
+        Ok(())
+    }
+
+    fn write_location(
+        &self,
+        writer: &mut Writer<'_>,
+        file: Option<&str>,
+        line: Option<u32>,
+        full_path: bool,
+        ansi: bool,
+    ) -> fmt::Result {
+        let Some(file) = file else {
+            return Ok(());
+        };
+        let file = if full_path {
+            file
+        } else {
+            shorten_file_path(file)
+        };
+        if ansi {
+            writer.write_str(GREY)?;
+        }
+        match line {
+            Some(line) => write!(writer, "[{file}:{line}]")?,
+            None => write!(writer, "[{file}]")?,
+        }
+        if ansi {
+            writer.write_str(RESET)?;
+        }
+        Ok(())
+    }
+
+    fn write_scope<S, N>(&self, ctx: &FmtContext<'_, S, N>, writer: &mut Writer<'_>) -> fmt::Result
+    where
+        S: Subscriber + for<'lookup> LookupSpan<'lookup>,
+        N: for<'writer> FormatFields<'writer> + 'static,
+    {
+        let Some(scope) = ctx.event_scope() else {
+            return Ok(());
+        };
+
+        let mut seen = false;
+        for span in scope.from_root() {
+            if seen {
+                writer.write_str(":")?;
+            }
+            seen = true;
+            writer.write_str(span.metadata().name())?;
+
+            let ext = span.extensions();
+            if let Some(fields) = ext.get::<FormattedFields<N>>()
+                && !fields.is_empty()
+            {
+                write!(writer, "{{{fields}}}")?;
+            }
+        }
+
+        if seen {
+            writer.write_str(": ")?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<S, N> FormatEvent<S, N> for VllmEventFormatter
+where
+    S: Subscriber + for<'lookup> LookupSpan<'lookup>,
+    N: for<'writer> FormatFields<'writer> + 'static,
+{
+    fn format_event(
+        &self,
+        ctx: &FmtContext<'_, S, N>,
+        mut writer: Writer<'_>,
+        event: &Event<'_>,
+    ) -> fmt::Result {
+        let meta = event.metadata();
+        let ansi = writer.has_ansi_escapes();
+
+        self.write_process_prefix(&mut writer, ansi)?;
+        self.write_level(&mut writer, meta.level(), ansi)?;
+        writer.write_char(' ')?;
+        self.write_timestamp(&mut writer, ansi)?;
+        writer.write_char(' ')?;
+        // Use the full file path only when DEBUG (or more verbose) is enabled anywhere,
+        // independent of the level of this particular event. Filenames alone are often
+        // ambiguous, but full paths are too noisy for normal INFO-level operation.
+        let full_path = LevelFilter::current() >= LevelFilter::DEBUG;
+        self.write_location(&mut writer, meta.file(), meta.line(), full_path, ansi)?;
+        writer.write_char(' ')?;
+        self.write_scope(ctx, &mut writer)?;
+        ctx.format_fields(writer.by_ref(), event)?;
+        writer.write_char('\n')
+    }
+}
+
+/// Shorten a source file path for log output while preserving enough context
+/// for common Rust entrypoint and module filenames.
+///
+/// - For `mod.rs`, keep the parent directory as `parent/mod.rs`.
+/// - For `src/lib.rs` and `src/main.rs`, keep one additional component as `crate/src/lib.rs` or
+///   `crate/src/main.rs` when available.
+/// - Other files are displayed as just the basename.
+fn shorten_file_path(file: &str) -> &str {
+    let mut parts = file.rsplit('/');
+    let name = parts.next().unwrap_or(file);
+    let parent = parts.next();
+    let grandparent = parts.next();
+
+    let Some(parent) = parent else {
+        return file;
+    };
+
+    if name == "mod.rs" {
+        return &file[file.len() - parent.len() - 1 - name.len()..];
+    }
+
+    if !matches!(name, "lib.rs" | "main.rs") || parent != "src" {
+        return name;
+    }
+    let Some(grandparent) = grandparent else {
+        return file;
+    };
+
+    &file[file.len() - grandparent.len() - 1 - parent.len() - 1 - name.len()..]
+}
+
+fn write_colored(
+    writer: &mut Writer<'_>,
+    ansi: bool,
+    color: Option<&str>,
+    text: &str,
+) -> fmt::Result {
+    if ansi {
+        if let Some(color) = color {
+            writer.write_str(color)?;
+        }
+        writer.write_str(text)?;
+        if color.is_some() {
+            writer.write_str(RESET)?;
+        }
+        return Ok(());
+    }
+
+    writer.write_str(text)
+}
+
+/// Map a Python logging level name to the corresponding Rust tracing level.
+fn map_python_log_level(level: &str) -> LevelFilter {
+    match level.to_ascii_uppercase().as_str() {
+        "CRITICAL" | "FATAL" => LevelFilter::ERROR,
+        "ERROR" => LevelFilter::ERROR,
+        "WARNING" | "WARN" => LevelFilter::WARN,
+        "INFO" => LevelFilter::INFO,
+        "DEBUG" => LevelFilter::DEBUG,
+        "NOTSET" => LevelFilter::TRACE,
+        _ => LevelFilter::INFO,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rust_log_target_overrides_are_merged_with_vllm_default_level() {
+        let filter = build_targets_filter(Some("DEBUG"), Some("hyper=warn,tower=error"));
+
+        assert_eq!(filter.to_string(), "tower=error,hyper=warn,debug");
+    }
+
+    #[test]
+    fn rust_log_default_level_overrides_vllm_default_level() {
+        let filter = build_targets_filter(Some("DEBUG"), Some("warn,hyper=info"));
+
+        assert_eq!(filter.to_string(), "hyper=info,warn");
+    }
+
+    #[test]
+    fn invalid_vllm_level_falls_back_to_info() {
+        let filter = build_targets_filter(Some("bogus"), None);
+
+        assert_eq!(filter.to_string(), "info");
+    }
+
+    #[test]
+    fn location_path_uses_filename_for_non_ambiguous_files() {
+        assert_eq!(shorten_file_path("src/cmd/src/logging.rs"), "logging.rs");
+        assert_eq!(shorten_file_path("src/chat/lib.rs"), "lib.rs");
+        assert_eq!(shorten_file_path("src/chat/main.rs"), "main.rs");
+        assert_eq!(shorten_file_path("src/chat/src/xmod.rs"), "xmod.rs");
+    }
+
+    #[test]
+    fn location_path_keeps_more_context_for_common_entrypoint_filenames() {
+        assert_eq!(shorten_file_path("src/lib.rs"), "src/lib.rs");
+        assert_eq!(shorten_file_path("src/chat/src/lib.rs"), "chat/src/lib.rs");
+        assert_eq!(shorten_file_path("src/cmd/src/main.rs"), "cmd/src/main.rs");
+        assert_eq!(shorten_file_path("mod.rs"), "mod.rs");
+        assert_eq!(shorten_file_path("src/chat/src/tool/mod.rs"), "tool/mod.rs");
+    }
+}
diff --git a/rust/src/cmd/src/main.rs b/rust/src/cmd/src/main.rs
new file mode 100644
index 000000000000..ce4e37e09bd0
--- /dev/null
+++ b/rust/src/cmd/src/main.rs
@@ -0,0 +1,189 @@
+mod cli;
+mod logging;
+
+use std::env;
+use std::process::ExitStatus;
+
+use anyhow::{Context, Result, anyhow, bail};
+use tokio_util::sync::CancellationToken;
+use tracing::{info, warn};
+use vllm_managed_engine::ManagedEngineHandle;
+
+use crate::cli::{Cli, Command};
+
+const TOKIO_WORKER_THREADS_ENV: &str = "TOKIO_WORKER_THREADS";
+const DEFAULT_MAX_TOKIO_WORKER_THREADS: usize = 32;
+
+/// Cap the default number of Tokio worker threads if the user did not
+/// explicitly set `TOKIO_WORKER_THREADS` to avoid spawning too many threads on
+/// machines with a large number of CPUs, which may lead to excessive context
+/// switching and degraded performance.
+fn tokio_worker_threads() -> Option<usize> {
+    if env::var_os(TOKIO_WORKER_THREADS_ENV).is_some() {
+        return None;
+    }
+
+    std::thread::available_parallelism()
+        .map(|parallelism| {
+            let available = parallelism.get();
+            let worker_threads = available.min(DEFAULT_MAX_TOKIO_WORKER_THREADS);
+            if worker_threads < available {
+                info!(
+                    available_parallelism = available,
+                    capped_worker_threads = worker_threads,
+                    "capping tokio worker threads, set {TOKIO_WORKER_THREADS_ENV} to override"
+                );
+            }
+            worker_threads
+        })
+        .ok()
+}
+
+/// Reason that caused a managed `serve` session to stop.
+#[derive(Debug)]
+enum ShutdownReason {
+    Signal,
+    Server(anyhow::Error),
+    EngineExited(ExitStatus),
+}
+
+/// Cancellation token tripped by Ctrl-C or SIGTERM.
+fn shutdown_signal() -> CancellationToken {
+    let token = CancellationToken::new();
+    let shutdown = token.clone();
+
+    tokio::spawn(async move {
+        let ctrl_c = async {
+            tokio::signal::ctrl_c().await.expect("failed to install Ctrl-C signal handler");
+        };
+
+        let sigterm = async {
+            tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
+                .expect("failed to install SIGTERM signal handler")
+                .recv()
+                .await;
+        };
+
+        tokio::select! {
+            _ = ctrl_c => info!("received shutdown signal (Ctrl-C), shutting down..."),
+            _ = sigterm => info!("received shutdown signal (SIGTERM), shutting down..."),
+        }
+
+        shutdown.cancel();
+    });
+
+    token
+}
+
+fn main() -> Result<()> {
+    logging::init_tracing();
+    let cli = Cli::parse();
+
+    let mut runtime = tokio::runtime::Builder::new_multi_thread();
+    runtime.enable_all();
+    if let Some(worker_threads) = tokio_worker_threads() {
+        runtime.worker_threads(worker_threads);
+    }
+
+    runtime
+        .build()
+        .context("failed to build Tokio runtime")?
+        .block_on(async_main(cli))
+}
+
+async fn async_main(cli: Cli) -> Result<()> {
+    match cli.command {
+        Command::Frontend(args) => vllm_server::serve(args.into_config(), shutdown_signal()).await,
+        Command::Serve(args) => {
+            let handshake_port = args.managed_engine.resolve_handshake_port()?;
+
+            if args.managed_engine.data_parallel_size_local == Some(0) {
+                if args.headless {
+                    bail!("cannot combine `--headless` with `--data-parallel-size-local 0`");
+                }
+
+                let handshake_address = args.managed_engine.handshake_address(handshake_port);
+                info!(
+                    %handshake_address,
+                    engine_count = args.managed_engine.data_parallel_size,
+                    "running Rust frontend without a managed local Python engine"
+                );
+                let config = args.to_frontend_config(handshake_address);
+                return vllm_server::serve(config, shutdown_signal()).await;
+            }
+
+            let shutdown_timeout = args.runtime.shutdown_timeout();
+            let engine_config = args.to_managed_engine_config(handshake_port);
+            let handshake_address = engine_config.handshake_address();
+
+            let engine = ManagedEngineHandle::spawn(engine_config)
+                .await
+                .context("failed to start managed Python headless engine")?;
+
+            let shutdown = shutdown_signal();
+
+            let mut serve_task = if args.headless {
+                info!("running managed Python headless engine without Rust frontend");
+                let shutdown = shutdown.clone();
+                tokio::spawn(async move {
+                    shutdown.cancelled().await;
+                    Ok(())
+                })
+            } else {
+                let config = args.to_frontend_config(handshake_address);
+                let shutdown = shutdown.clone();
+                tokio::spawn(async move {
+                    let result = vllm_server::serve(config, shutdown).await;
+                    if result.is_ok() {
+                        info!("OpenAI server shut down gracefully");
+                    }
+                    result
+                })
+            };
+
+            let shutdown_reason = tokio::select! {
+                biased;
+
+                // Received shutdown signal via Ctrl-C or SIGTERM.
+                _ = shutdown.cancelled() => ShutdownReason::Signal,
+
+                // Engine process exited unexpectedly.
+                status = engine.wait_for_exit() => {
+                    warn!(%status, "managed Python headless engine exited, shutting down...");
+                    ShutdownReason::EngineExited(status)
+                }
+
+                // Serve task exited unexpectedly.
+                serve_result = &mut serve_task => {
+                    let serve_result = serve_result.context("serve task join failed")?;
+                    match serve_result {
+                        Ok(()) => ShutdownReason::Server(anyhow!("OpenAI server shut down unexpectedly without error")),
+                        Err(error) => ShutdownReason::Server(error),
+                    }
+                }
+            };
+            // Regardless of the shutdown reason, broadcast shutdown signal here to ensure
+            // that all serving tasks are notified.
+            shutdown.cancel();
+
+            // Shutdown begins. Terminate the managed engine first.
+            engine.shutdown(shutdown_timeout).await?;
+            info!("managed engine shut down gracefully");
+            // Wait for the API server to shut down gracefully by draining in-flight
+            // requests.
+            if !matches!(shutdown_reason, ShutdownReason::Server(_)) {
+                serve_task.await.context("serve task join failed")??;
+            }
+
+            match shutdown_reason {
+                ShutdownReason::Signal => Ok(()),
+                ShutdownReason::Server(error) => {
+                    Err(error.context("OpenAI server shut down unexpectedly"))
+                }
+                ShutdownReason::EngineExited(status) => Err(anyhow!(
+                    "managed Python headless engine exited unexpectedly with status {status}"
+                )),
+            }
+        }
+    }
+}
diff --git a/rust/src/engine-core-client/Cargo.toml b/rust/src/engine-core-client/Cargo.toml
new file mode 100644
index 000000000000..14b9fe142349
--- /dev/null
+++ b/rust/src/engine-core-client/Cargo.toml
@@ -0,0 +1,49 @@
+[package]
+name = "vllm-engine-core-client"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[features]
+test-util = ["dep:tempfile"]
+
+[dependencies]
+arc-swap.workspace = true
+bytemuck.workspace = true
+byteorder.workspace = true
+bytes.workspace = true
+easy-ext.workspace = true
+enum-as-inner.workspace = true
+futures.workspace = true
+half.workspace = true
+hex.workspace = true
+itertools.workspace = true
+parking_lot.workspace = true
+rmp-serde.workspace = true
+rmpv.workspace = true
+serde.workspace = true
+serde_default.workspace = true
+serde_json.workspace = true
+serde_repr.workspace = true
+serde_tuple.workspace = true
+serde_with.workspace = true
+task-local.workspace = true
+tempfile = { workspace = true, optional = true }
+thiserror.workspace = true
+thiserror-ext.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+tracing.workspace = true
+vllm-metrics.workspace = true
+zeromq.workspace = true
+
+[dev-dependencies]
+anyhow.workspace = true
+clap.workspace = true
+expect-test.workspace = true
+hex.workspace = true
+tempfile.workspace = true
+tracing-subscriber.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/engine-core-client/examples/README.md b/rust/src/engine-core-client/examples/README.md
new file mode 100644
index 000000000000..4fa13133d473
--- /dev/null
+++ b/rust/src/engine-core-client/examples/README.md
@@ -0,0 +1,53 @@
+# Engine-Core Smoke Tests
+
+Start headless `vllm`:
+
+```bash
+source ../vllm/.venv/bin/activate
+HF_HUB_OFFLINE=1 \
+VLLM_LOGGING_LEVEL=DEBUG \
+VLLM_CPU_KVCACHE_SPACE=2 \
+VLLM_HOST_IP=127.0.0.1 \
+VLLM_LOOPBACK_IP=127.0.0.1 \
+python3 -m vllm.entrypoints.cli.main serve Qwen/Qwen3-0.6B \
+  --headless \
+  --enable-sleep-mode \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size-local 1 \
+  --max-model-len 512 \
+  --dtype float16
+```
+
+Run the Rust smoke test through the `vllm-engine-core-client` utility interface:
+
+```bash
+cargo run -p vllm-engine-core-client --example external_engine_utility_call -- \
+  --handshake-address tcp://127.0.0.1:62100 \
+  --host 127.0.0.1
+```
+
+If your current engine setup does not support sleep mode, skip the `sleep` / `wake_up` part of the
+smoke:
+
+```bash
+cargo run -p vllm-engine-core-client --example external_engine_utility_call -- \
+  --handshake-address tcp://127.0.0.1:62100 \
+  --host 127.0.0.1 \
+  --skip-sleep-wake
+```
+
+Run the Rust smoke test for sample logprobs decoding through the raw engine-core request path:
+
+```bash
+cargo run -p vllm-engine-core-client --example external_engine_logprobs -- \
+  --handshake-address tcp://127.0.0.1:62100 \
+  --host 127.0.0.1
+```
+
+This smoke requests a small generated-token `logprobs` payload plus prompt logprobs over a much
+longer prompt, so it exercises both the inline and aux-frame decode paths against a real engine.
+The Rust client decodes those payloads into semantic per-position records rather than exposing the
+raw ndarray/tensor wire shape.
+
+IMPORTANT: You must restart `vllm` each time you run the smoke test, as the vLLM engine cannot manage frontend closures and subsequent reconnects. In other words, do not reuse existing `vllm` instances, if any.
diff --git a/rust/src/engine-core-client/examples/external_engine_logprobs.rs b/rust/src/engine-core-client/examples/external_engine_logprobs.rs
new file mode 100644
index 000000000000..08290c69bf5b
--- /dev/null
+++ b/rust/src/engine-core-client/examples/external_engine_logprobs.rs
@@ -0,0 +1,190 @@
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use clap::Parser;
+use futures::StreamExt as _;
+use tokio::time::timeout;
+use tracing_subscriber::EnvFilter;
+use vllm_engine_core_client::protocol::{
+    EngineCoreFinishReason, EngineCoreRequest, EngineCoreSamplingParams,
+};
+use vllm_engine_core_client::{
+    EngineCoreClient, EngineCoreClientConfig, EngineCoreStreamOutput, TransportMode,
+};
+
+const BASE_PROMPT_TOKEN_IDS: &[u32] = &[20841, 448, 6896, 25, 23811];
+
+#[derive(Debug, Parser)]
+#[command(about = "Smoke-test engine-core sample logprobs against an external vLLM engine.")]
+struct Args {
+    #[arg(long)]
+    handshake_address: String,
+    #[arg(long, default_value_t = 1)]
+    engine_count: usize,
+    #[arg(long, default_value = "Qwen/Qwen3-0.6B")]
+    model: String,
+    #[arg(long, default_value = "127.0.0.1")]
+    host: String,
+    #[arg(long, default_value_t = 0)]
+    client_index: u32,
+    #[arg(long, default_value_t = 30)]
+    ready_timeout_secs: u64,
+    #[arg(long, default_value_t = 120)]
+    output_timeout_secs: u64,
+    #[arg(long, default_value_t = 1)]
+    max_tokens: u32,
+    #[arg(long, default_value_t = 2)]
+    logprobs: i32,
+    #[arg(long, default_value_t = 1)]
+    prompt_logprobs: i32,
+    #[arg(long, default_value_t = 96)]
+    prompt_repeats: usize,
+}
+
+fn init_tracing() {
+    let filter = EnvFilter::try_from_default_env()
+        .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+    let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
+}
+
+fn unique_request_id() -> String {
+    let nanos = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .expect("system clock should be after unix epoch")
+        .as_nanos();
+    format!("rust-engine-core-logprobs-{nanos}")
+}
+
+fn build_prompt_token_ids(prompt_repeats: usize) -> Vec<u32> {
+    let repeats = prompt_repeats.max(1);
+    BASE_PROMPT_TOKEN_IDS.repeat(repeats)
+}
+
+fn build_request(
+    request_id: String,
+    prompt_token_ids: Vec<u32>,
+    max_tokens: u32,
+    logprobs: i32,
+    prompt_logprobs: i32,
+    client_index: u32,
+) -> EngineCoreRequest {
+    EngineCoreRequest {
+        request_id,
+        prompt_token_ids: Some(prompt_token_ids),
+        sampling_params: Some(EngineCoreSamplingParams {
+            max_tokens,
+            logprobs: Some(logprobs),
+            prompt_logprobs: Some(prompt_logprobs),
+            ..EngineCoreSamplingParams::for_test()
+        }),
+        arrival_time: 0.0,
+        client_index,
+        ..EngineCoreRequest::default()
+    }
+}
+
+async fn wait_for_final_output(
+    mut stream: vllm_engine_core_client::EngineCoreOutputStream,
+) -> Result<EngineCoreStreamOutput> {
+    while let Some(output) = stream.next().await {
+        let output = output.context("failed to receive engine-core output")?;
+        if output.finished() {
+            return Ok(output);
+        }
+    }
+    bail!("request stream ended without a final output")
+}
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() -> Result<()> {
+    init_tracing();
+    let args = Args::parse();
+    let ready_timeout = Duration::from_secs(args.ready_timeout_secs);
+    let output_timeout = Duration::from_secs(args.output_timeout_secs);
+    let request_id = unique_request_id();
+    let prompt_token_ids = build_prompt_token_ids(args.prompt_repeats);
+    let client = EngineCoreClient::connect(EngineCoreClientConfig {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: args.handshake_address.clone(),
+            advertised_host: args.host.clone(),
+            engine_count: args.engine_count,
+            ready_timeout,
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode: None,
+        model_name: args.model.clone(),
+        client_index: args.client_index,
+    })
+    .await
+    .context("failed to connect to external vLLM engine")?;
+
+    println!("model={}", args.model);
+    println!("handshake_address={}", args.handshake_address);
+    println!("engine_count={}", args.engine_count);
+    println!("input_address={}", client.input_address());
+    println!("output_address={}", client.output_address());
+    println!("engine_identities={:x?}", client.engine_identities());
+
+    let request = build_request(
+        request_id.clone(),
+        prompt_token_ids.clone(),
+        args.max_tokens,
+        args.logprobs,
+        args.prompt_logprobs,
+        args.client_index,
+    );
+    println!("request_id={request_id}");
+    println!("prompt_len={}", prompt_token_ids.len());
+    println!("base_prompt_len={}", BASE_PROMPT_TOKEN_IDS.len());
+    println!("prompt_repeats={}", args.prompt_repeats);
+    println!("requested_logprobs={}", args.logprobs);
+    println!("requested_prompt_logprobs={}", args.prompt_logprobs);
+
+    let stream = client.call(request).await.context("failed to submit engine-core request")?;
+    let output = timeout(output_timeout, wait_for_final_output(stream))
+        .await
+        .context("timed out waiting for final output")??;
+
+    let finish_reason = output.finish_reason;
+    let token_ids = output.new_token_ids.clone();
+    let logprobs = output
+        .new_logprobs
+        .as_ref()
+        .and_then(|value| value.as_direct())
+        .context("engine output did not include decoded sample logprobs")?;
+    let prompt_logprobs = output
+        .new_prompt_logprobs_tensors
+        .as_ref()
+        .and_then(|value| value.as_direct())
+        .context("engine output did not include decoded prompt logprobs")?;
+
+    println!("token_ids={token_ids:?}");
+    println!("finish_reason={finish_reason:?}");
+    println!("new_logprobs={logprobs:#?}");
+    println!("new_prompt_logprobs_tensors={prompt_logprobs:#?}");
+
+    client.shutdown().await.context("failed to shut down engine-core client")?;
+
+    if finish_reason != Some(EngineCoreFinishReason::Length) {
+        bail!("unexpected finish_reason: expected Length, got {finish_reason:?}");
+    }
+    if token_ids.is_empty() {
+        bail!("engine returned no generated token ids");
+    }
+    if logprobs.is_empty() {
+        bail!("decoded logprobs payload is unexpectedly empty");
+    }
+    if prompt_logprobs.is_empty() {
+        bail!("decoded prompt logprobs payload is unexpectedly empty");
+    }
+    if prompt_logprobs.len() + 1 < prompt_token_ids.len() {
+        bail!(
+            "prompt logprobs rows look too short: prompt_len={}, rows={}",
+            prompt_token_ids.len(),
+            prompt_logprobs.len()
+        );
+    }
+
+    Ok(())
+}
diff --git a/rust/src/engine-core-client/examples/external_engine_utility_call.rs b/rust/src/engine-core-client/examples/external_engine_utility_call.rs
new file mode 100644
index 000000000000..ee2a4e57b7aa
--- /dev/null
+++ b/rust/src/engine-core-client/examples/external_engine_utility_call.rs
@@ -0,0 +1,143 @@
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use clap::Parser;
+use tracing_subscriber::EnvFilter;
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, TransportMode};
+
+#[derive(Debug, Parser)]
+#[command(about = "Smoke-test EngineCoreClient utility calls against an external vLLM engine.")]
+struct Args {
+    #[arg(long)]
+    handshake_address: String,
+    #[arg(long, default_value_t = 1)]
+    engine_count: usize,
+    #[arg(long, default_value = "Qwen/Qwen3-0.6B")]
+    model: String,
+    #[arg(long, default_value = "127.0.0.1")]
+    host: String,
+    #[arg(long, default_value_t = 0)]
+    client_index: u32,
+    #[arg(long, default_value_t = 30)]
+    ready_timeout_secs: u64,
+    #[arg(
+        long,
+        default_value_t = false,
+        help = "Expected initial result of is_sleeping() before running the smoke steps."
+    )]
+    expected_is_sleeping: bool,
+    #[arg(long, default_value_t = false)]
+    reset_running_requests: bool,
+    #[arg(long, default_value_t = false)]
+    reset_external: bool,
+    #[arg(long, default_value_t = 1)]
+    sleep_level: u32,
+    #[arg(long, default_value = "abort")]
+    sleep_mode: String,
+    #[arg(
+        long,
+        default_value_t = false,
+        help = "Skip sleep/wake_up calls when the engine was not started with sleep-mode support."
+    )]
+    skip_sleep_wake: bool,
+}
+
+fn init_tracing() {
+    let filter = EnvFilter::try_from_default_env()
+        .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+    let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
+}
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() -> Result<()> {
+    init_tracing();
+    let args = Args::parse();
+    let client = EngineCoreClient::connect(EngineCoreClientConfig {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: args.handshake_address.clone(),
+            advertised_host: args.host.clone(),
+            engine_count: args.engine_count,
+            ready_timeout: Duration::from_secs(args.ready_timeout_secs),
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode: None,
+        model_name: args.model.clone(),
+        client_index: args.client_index,
+    })
+    .await
+    .context("failed to connect to external vLLM engine")?;
+
+    println!("model={}", args.model);
+    println!("handshake_address={}", args.handshake_address);
+    println!("engine_count={}", args.engine_count);
+    println!("input_address={}", client.input_address());
+    println!("output_address={}", client.output_address());
+    println!("engine_identities={:x?}", client.engine_identities());
+
+    let initial_is_sleeping =
+        client.is_sleeping().await.context("failed to call is_sleeping utility")?;
+
+    println!("is_sleeping(initial)={initial_is_sleeping}");
+
+    if initial_is_sleeping != args.expected_is_sleeping {
+        bail!(
+            "unexpected initial is_sleeping state: expected {}, got {}",
+            args.expected_is_sleeping,
+            initial_is_sleeping
+        );
+    }
+
+    let reset_prefix_cache = client
+        .reset_prefix_cache(args.reset_running_requests, args.reset_external)
+        .await
+        .context("failed to call reset_prefix_cache utility")?;
+    println!("reset_prefix_cache={reset_prefix_cache}");
+
+    client.reset_mm_cache().await.context("failed to call reset_mm_cache utility")?;
+    println!("reset_mm_cache=ok");
+
+    client
+        .reset_encoder_cache()
+        .await
+        .context("failed to call reset_encoder_cache utility")?;
+    println!("reset_encoder_cache=ok");
+
+    if args.skip_sleep_wake {
+        println!("sleep_wake=skipped");
+    } else {
+        client.sleep(args.sleep_level, &args.sleep_mode).await.with_context(|| {
+            format!(
+                "failed to call sleep utility with level={} mode={}",
+                args.sleep_level, args.sleep_mode
+            )
+        })?;
+        println!(
+            "sleep=ok level={} mode={}",
+            args.sleep_level, args.sleep_mode
+        );
+
+        let sleeping_after_sleep =
+            client.is_sleeping().await.context("failed to call is_sleeping after sleep")?;
+        println!("is_sleeping(after_sleep)={sleeping_after_sleep}");
+
+        if !sleeping_after_sleep {
+            bail!("engine should report sleeping=true after sleep()");
+        }
+
+        client.wake_up(None).await.context("failed to call wake_up utility")?;
+        println!("wake_up=ok");
+
+        let sleeping_after_wake =
+            client.is_sleeping().await.context("failed to call is_sleeping after wake_up")?;
+        println!("is_sleeping(after_wake)={sleeping_after_wake}");
+
+        if sleeping_after_wake {
+            bail!("engine should report sleeping=false after wake_up()");
+        }
+    }
+
+    client.shutdown().await.context("failed to shut down engine-core client")?;
+
+    Ok(())
+}
diff --git a/rust/src/engine-core-client/src/client.rs b/rust/src/engine-core-client/src/client.rs
new file mode 100644
index 000000000000..4e7140525353
--- /dev/null
+++ b/rust/src/engine-core-client/src/client.rs
@@ -0,0 +1,672 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use futures::future::{join_all, try_join_all};
+use tokio::sync::mpsc;
+use tokio_util::task::AbortOnDropHandle;
+use tracing::{debug, info, trace};
+
+use crate::client::imp::{ClientInner, run_abort_loop, run_output_dispatcher_loop};
+use crate::coordinator::CoordinatorHandle;
+use crate::error::{Error, Result};
+use crate::protocol::handshake::EngineCoreReadyResponse;
+use crate::protocol::utility::EngineCoreUtilityRequest;
+use crate::protocol::{EngineCoreRequest, EngineCoreRequestType, ModelDtype};
+use crate::transport::{self, ConnectedEngine};
+
+pub(crate) mod imp;
+mod state;
+mod stream;
+
+pub use stream::{EngineCoreOutputStream, EngineCoreStreamOutput};
+
+/// How the frontend acquires its request/response transport with Python
+/// `EngineCoreProc`s.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum TransportMode {
+    /// The Rust process owns the startup handshake and allocates or binds the
+    /// frontend transport addresses itself before replying to engine
+    /// `HELLO` messages.
+    HandshakeOwner {
+        /// Shared handshake endpoint that engines dial during startup.
+        handshake_address: String,
+        /// Host/IP that engines should use to connect back to the frontend
+        /// transport sockets.
+        advertised_host: String,
+        /// Total number of engines expected to join this transport.
+        engine_count: usize,
+        /// Maximum time to wait for each startup phase to complete.
+        ready_timeout: Duration,
+        /// Optional explicit bind address for the input ROUTER socket.
+        local_input_address: Option<String>,
+        /// Optional explicit bind address for the output PULL socket.
+        local_output_address: Option<String>,
+    },
+
+    /// The Python supervisor has already chosen the frontend transport
+    /// addresses, and the Rust process only needs to bind them and wait for
+    /// engine registration frames.
+    Bootstrapped {
+        /// Input ROUTER socket address that engines will connect to for
+        /// requests.
+        input_address: String,
+        /// Output PULL socket address that engines will connect to for
+        /// responses.
+        output_address: String,
+        /// Total number of engines expected to register on this transport.
+        engine_count: usize,
+        /// Maximum time to wait for all expected engines to register.
+        ready_timeout: Duration,
+    },
+}
+
+/// Which coordinator implementation should be active when one is present for a
+/// frontend client.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum CoordinatorMode {
+    /// Run the Rust in-process coordinator for managed `serve` deployments.
+    InProc,
+    /// Connect to an external coordinator owned by another process.
+    External { address: String },
+}
+
+/// Configuration for connecting a Rust frontend client to an already running
+/// Python `EngineCoreProc`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct EngineCoreClientConfig {
+    /// Frontend-to-engine transport setup.
+    pub transport_mode: TransportMode,
+    /// Frontend-side coordinator behavior, or `None` when requests should flow
+    /// directly to engines without any coordinator involvement.
+    pub coordinator_mode: Option<CoordinatorMode>,
+    /// Model name used for frontend-side metrics labels.
+    pub model_name: String,
+    /// Frontend client index stamped onto every request.
+    pub client_index: u32,
+}
+
+impl EngineCoreClientConfig {
+    /// Create a new client config with the given handshake address, expecting a
+    /// single engine, and default values for all other fields.
+    pub fn new_single(handshake_address: impl Into<String>) -> Self {
+        Self {
+            transport_mode: TransportMode::HandshakeOwner {
+                handshake_address: handshake_address.into(),
+                advertised_host: "127.0.0.1".to_string(),
+                engine_count: 1,
+                ready_timeout: Duration::from_secs(30),
+                local_input_address: None,
+                local_output_address: None,
+            },
+            coordinator_mode: None,
+            model_name: String::new(),
+            client_index: 0,
+        }
+    }
+
+    /// Set the model name used by frontend-side metrics and diagnostics.
+    pub fn with_model_name(mut self, model_name: impl Into<String>) -> Self {
+        self.model_name = model_name.into();
+        self
+    }
+
+    /// Override the client index stamped onto every outgoing request.
+    pub fn with_client_index(mut self, client_index: u32) -> Self {
+        self.client_index = client_index;
+        self
+    }
+
+    /// Override the optional coordinator mode for this client config.
+    pub fn with_coordinator_mode(mut self, coordinator_mode: Option<CoordinatorMode>) -> Self {
+        self.coordinator_mode = coordinator_mode;
+        self
+    }
+
+    /// Override the locally bound input/output addresses for handshake-owned
+    /// transport mode.
+    ///
+    /// This is primarily used by tests that want deterministic IPC endpoints
+    /// while still exercising the handshake-owned startup path.
+    pub fn with_local_input_output_addresses(
+        mut self,
+        local_input_address: Option<String>,
+        local_output_address: Option<String>,
+    ) -> Self {
+        let TransportMode::HandshakeOwner {
+            local_input_address: current_input,
+            local_output_address: current_output,
+            ..
+        } = &mut self.transport_mode
+        else {
+            panic!("local input/output overrides are only valid in handshake-owned mode");
+        };
+        *current_input = local_input_address;
+        *current_output = local_output_address;
+        self
+    }
+}
+
+/// The reason a request stream is being aborted when its output stream is
+/// dropped.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum AbortCause {
+    /// The consumer dropped the stream before the request reached a terminal
+    /// engine output.
+    #[default]
+    DroppedStream,
+    /// The frontend matched a stop string locally and intentionally stopped
+    /// consuming the stream.
+    StopStringMatched,
+}
+
+task_local::task_local! {
+    static ABORT_CAUSE: AbortCause;
+}
+
+impl AbortCause {
+    /// Return the abort cause currently associated with this task, or
+    /// [`AbortCause::DroppedStream`] by default.
+    pub fn current() -> Self {
+        ABORT_CAUSE.try_get().unwrap_or_default()
+    }
+
+    /// Drop one value while marking the drop as happening for this abort cause.
+    pub fn drop_as<T>(self, value: T) {
+        ABORT_CAUSE.sync_scope(self, move || drop(value));
+    }
+}
+
+/// Internal auto-abort work item sent from stream `Drop` handlers to the abort
+/// worker.
+#[derive(Debug, Clone)]
+pub(crate) struct AbortRequest {
+    request_id: String,
+    cause: AbortCause,
+}
+
+/// Default ZMQ-based implementation that talks directly to a Python
+/// `EngineCoreProc`.
+pub struct EngineCoreClient {
+    config: EngineCoreClientConfig,
+    input_address: String,
+    output_address: String,
+    engines: Vec<ConnectedEngine>,
+    inner: Arc<ClientInner>,
+    coordinator: Option<CoordinatorHandle>,
+    abort_tx: mpsc::UnboundedSender<AbortRequest>,
+
+    // Background tasks
+    output_task: AbortOnDropHandle<()>,
+    dispatcher_task: AbortOnDropHandle<()>,
+    abort_task: AbortOnDropHandle<()>,
+    coordinator_output_task: Option<AbortOnDropHandle<()>>,
+    coordinator_task: Option<AbortOnDropHandle<()>>,
+}
+
+impl EngineCoreClient {
+    /// Connect to Python `EngineCoreProc`s using the configured
+    /// transport/coordinator modes.
+    ///
+    /// In handshake-owned mode this method drives the full engine startup
+    /// handshake. In bootstrapped mode it binds the provided frontend
+    /// sockets and waits for the expected engine registration frames.
+    pub async fn connect(config: EngineCoreClientConfig) -> Result<Self> {
+        let connected = match &config.transport_mode {
+            TransportMode::HandshakeOwner {
+                handshake_address,
+                advertised_host,
+                engine_count,
+                ready_timeout,
+                local_input_address,
+                local_output_address,
+            } => {
+                let enable_inproc_coordinator = match config.coordinator_mode {
+                    None => false,
+                    Some(CoordinatorMode::InProc) => true,
+                    Some(CoordinatorMode::External { .. }) => {
+                        return Err(Error::UnsupportedExternalCoordinator);
+                    }
+                };
+
+                transport::connect_handshake(
+                    handshake_address,
+                    *engine_count,
+                    advertised_host,
+                    local_input_address.as_deref(),
+                    local_output_address.as_deref(),
+                    enable_inproc_coordinator,
+                    *ready_timeout,
+                )
+                .await?
+            }
+
+            TransportMode::Bootstrapped {
+                input_address,
+                output_address,
+                engine_count,
+                ready_timeout,
+            } => {
+                if let Some(CoordinatorMode::InProc) = config.coordinator_mode {
+                    panic!("cannot use in-process coordinator with bootstrapped transport mode")
+                }
+
+                transport::connect_bootstrapped(
+                    input_address,
+                    output_address,
+                    *engine_count,
+                    *ready_timeout,
+                )
+                .await?
+            }
+        };
+
+        Self::from_connected(config, connected).await
+    }
+
+    /// Create a new client instance from the connected transport state after
+    /// the startup handshake completes.
+    async fn from_connected(
+        config: EngineCoreClientConfig,
+        connected: transport::ConnectedTransport,
+    ) -> Result<Self> {
+        let (output_tx, output_rx) = mpsc::channel(64);
+        let (abort_tx, abort_rx) = mpsc::unbounded_channel();
+        let engines = connected.engines;
+        let inner = Arc::new(ClientInner::new(
+            connected.input_send,
+            config.model_name.clone(),
+            &engines,
+        ));
+        let output_task = AbortOnDropHandle::new(tokio::spawn(transport::run_output_loop(
+            connected.output_socket,
+            output_tx,
+        )));
+        let dispatcher_task = AbortOnDropHandle::new(tokio::spawn(run_output_dispatcher_loop(
+            inner.clone(),
+            output_rx,
+        )));
+        let abort_task =
+            AbortOnDropHandle::new(tokio::spawn(run_abort_loop(inner.clone(), abort_rx)));
+
+        // If any engine reported a dp_stats_address in its ready response, use it
+        // as the external coordinator address.
+        let dp_stats_address: Option<String> = engines
+            .iter()
+            .filter_map(|e| e.ready_response.as_ref())
+            .find_map(|r| r.dp_stats_address.clone());
+
+        let (coordinator, coordinator_output_task, coordinator_task) =
+            if let Some(coordinator_transport) = connected.coordinator {
+                let (handle, runner) =
+                    CoordinatorHandle::new_inproc(coordinator_transport.input_socket);
+                let (coordinator_output_tx, coordinator_output_rx) = mpsc::channel(64);
+                let coordinator_output_task =
+                    AbortOnDropHandle::new(tokio::spawn(transport::run_output_loop(
+                        coordinator_transport.output_socket,
+                        coordinator_output_tx,
+                    )));
+                let coordinator_task = AbortOnDropHandle::new(tokio::spawn(
+                    runner.run(coordinator_output_rx, inner.clone()),
+                ));
+                (
+                    Some(handle),
+                    Some(coordinator_output_task),
+                    Some(coordinator_task),
+                )
+            } else if let Some(address) =
+                dp_stats_address.as_deref().or(match config.coordinator_mode.as_ref() {
+                    Some(CoordinatorMode::External { address }) => Some(address.as_str()),
+                    _ => None,
+                })
+            {
+                let (handle, service) = CoordinatorHandle::connect_external(address).await?;
+                let coordinator_task =
+                    AbortOnDropHandle::new(tokio::spawn(service.run(inner.clone())));
+                (Some(handle), None, Some(coordinator_task))
+            } else {
+                (None, None, None)
+            };
+
+        Ok(Self {
+            config,
+            input_address: connected.input_address,
+            output_address: connected.output_address,
+            engines,
+            inner,
+            coordinator,
+            abort_tx,
+            output_task,
+            dispatcher_task,
+            abort_task,
+            coordinator_output_task,
+            coordinator_task,
+        })
+    }
+
+    /// Return the address of the input socket that the client uses to send
+    /// requests to the engine.
+    pub fn input_address(&self) -> &str {
+        &self.input_address
+    }
+
+    /// Return the address of the output socket that the client listens on for
+    /// engine responses.
+    pub fn output_address(&self) -> &str {
+        &self.output_address
+    }
+
+    /// Return the number of engines connected to this client.
+    pub fn engine_count(&self) -> usize {
+        self.engines.len()
+    }
+
+    /// Return the engine identities of all engines connected to this client.
+    pub fn engine_identities(&self) -> Vec<&[u8]> {
+        self.engines.iter().map(|engine| &*engine.engine_id).collect()
+    }
+
+    /// Return the ready responses received from all engines on the input
+    /// socket.
+    pub fn ready_responses(&self) -> Vec<&EngineCoreReadyResponse> {
+        self.engines
+            .iter()
+            .filter_map(|engine| engine.ready_response.as_ref())
+            .collect()
+    }
+
+    /// Return the engine-reported effective model dtype, when available.
+    pub fn model_dtype(&self) -> Option<ModelDtype> {
+        self.engines
+            .iter()
+            .filter_map(|engine| engine.ready_response.as_ref())
+            .find_map(|response| response.dtype)
+    }
+
+    /// Return the total number of GPU blocks summed across all connected
+    /// engines.
+    pub fn total_num_gpu_blocks(&self) -> u64 {
+        self.engines
+            .iter()
+            .filter_map(|engine| engine.ready_response.as_ref())
+            .map(|r| r.num_gpu_blocks)
+            .sum()
+    }
+
+    /// Return the minimum engine-reported `max_model_len` across all engines.
+    ///
+    /// This is the auto-fitted value after KV cache profiling and may differ
+    /// from the originally configured value.
+    pub fn max_model_len(&self) -> Option<u32> {
+        self.engines
+            .iter()
+            .filter_map(|e| e.ready_response.as_ref())
+            .map(|r| r.max_model_len as u32)
+            .min()
+    }
+
+    /// Get the model name associated with this client used for metrics
+    /// labeling.
+    pub fn model_name(&self) -> &str {
+        self.inner.model_name()
+    }
+
+    /// Return whether the client still considers the engine healthy.
+    pub fn is_healthy(&self) -> bool {
+        self.inner.is_healthy()
+    }
+
+    /// Return the first persistent health error observed by the client, if any.
+    pub fn health_error(&self) -> Option<Arc<Error>> {
+        self.inner.health_error()
+    }
+}
+
+// Client API implementation.
+impl EngineCoreClient {
+    /// Add a new request to the engine and return a per-request raw output
+    /// stream.
+    pub async fn call(&self, mut req: EngineCoreRequest) -> Result<EngineCoreOutputStream> {
+        req.client_index = self.config.client_index;
+        req.validate()?;
+        trace!(
+            request_id = %req.request_id,
+            client_index = req.client_index,
+            current_wave = req.current_wave,
+            request = ?req,
+            "sending add request"
+        );
+
+        let request_id = req.request_id.clone();
+        let data_parallel_rank = req.data_parallel_rank;
+        let (engine_id, rx) =
+            self.inner.register_request(request_id.clone(), data_parallel_rank)?;
+
+        let result: Result<()> = async {
+            if let Some(coordinator) = self.coordinator.as_ref() {
+                let snapshot = coordinator.snapshot();
+                req.current_wave = snapshot.current_wave;
+                if !snapshot.engines_running {
+                    coordinator.notify_first_request(engine_id.clone())?;
+                }
+            }
+
+            debug!(
+                request_id = req.request_id,
+                ?engine_id,
+                "registered request to engine"
+            );
+
+            self.inner.send_to_engine(&engine_id, EngineCoreRequestType::Add, &req).await?;
+            Ok(())
+        }
+        .await;
+
+        // Failed to send the request to the engine, roll back the registration.
+        if let Err(error) = result {
+            self.inner.rollback_request(&request_id);
+            return Err(error);
+        }
+
+        Ok(EngineCoreOutputStream::new(
+            request_id,
+            self.abort_tx.clone(),
+            rx,
+        ))
+    }
+
+    /// Abort currently in-flight requests by request ID.
+    pub async fn abort(&self, ids: &[String]) -> Result<()> {
+        let abortable = self.inner.abortable_request_ids(ids)?;
+
+        trace!(request_ids = ?ids, abortable_request_ids = ?abortable, "sending abort request ids");
+
+        if abortable.is_empty() {
+            return Ok(());
+        }
+
+        for (engine_id, request_ids) in abortable {
+            self.inner.do_abort_requests(&engine_id, &request_ids).await?;
+        }
+        Ok(())
+    }
+
+    /// Call a typed utility method on all connected engines, returning one
+    /// decoded result per connected engine if all calls succeed or an error
+    /// if any call fails.
+    ///
+    /// Callers should pass utility arguments using Rust tuple semantics so the
+    /// encoded payload matches Python's `(client_index, call_id,
+    /// method_name, args)` contract: `()`, `(arg,)`, `(arg1, arg2)`, etc.
+    pub async fn call_utility<T, A>(&self, method: &str, args: A) -> Result<Vec<T>>
+    where
+        T: serde::de::DeserializeOwned,
+        A: serde::Serialize + std::fmt::Debug,
+    {
+        trace!(
+            method,
+            client_index = self.config.client_index,
+            engine_count = self.engines.len(),
+            "sending utility request"
+        );
+
+        // Phase 1: allocate one call id per engine and build the per-engine
+        // request payloads up-front. Any failure here (registry closed, encode
+        // error) must roll back the call ids already allocated so they do not
+        // leak in the utility registry until shutdown.
+        let mut pending_calls = Vec::with_capacity(self.engines.len());
+        let mut prepared_sends = Vec::with_capacity(self.engines.len());
+        for engine in &self.engines {
+            let (call_id, rx) = match self.inner.allocate_and_register_utility_call() {
+                Ok(pair) => pair,
+                Err(err) => {
+                    self.inner.unregister_utility_calls(pending_calls.iter().map(|(id, _)| *id));
+                    return Err(err);
+                }
+            };
+            let request = match EngineCoreUtilityRequest::new(
+                self.config.client_index,
+                call_id,
+                method,
+                &args,
+            ) {
+                Ok(request) => request,
+                Err(err) => {
+                    self.inner.unregister_utility_calls(
+                        pending_calls.iter().map(|(id, _)| *id).chain(std::iter::once(call_id)),
+                    );
+                    return Err(err);
+                }
+            };
+            pending_calls.push((call_id, rx));
+            prepared_sends.push((&engine.engine_id, request));
+        }
+
+        // Phase 2: dispatch every utility request concurrently. `try_join_all`
+        // fails fast on the first transport error and drops the remaining send
+        // futures; any engines that already received the request will reply,
+        // but those replies are simply dropped because we roll back the call
+        // ids below.
+        let send_futures = prepared_sends.iter().map(|(engine_id, request)| {
+            self.inner.send_to_engine(engine_id, EngineCoreRequestType::Utility, request)
+        });
+        if let Err(err) = try_join_all(send_futures).await {
+            self.inner.unregister_utility_calls(pending_calls.iter().map(|(id, _)| *id));
+            return Err(err);
+        }
+
+        // Phase 3: wait for all engines to respond and preserve the per-engine
+        // result list.
+        let futures = pending_calls.into_iter().map(|(call_id, rx)| async move {
+            rx.await
+                .map_err(|_| Error::UtilityCallClosed {
+                    method: method.to_string(),
+                    call_id,
+                })??
+                .into_typed_result(method)
+        });
+        try_join_all(futures).await
+    }
+
+    /// Execute `collective_rpc` on all engines and flatten all engine results
+    /// into one list.
+    pub async fn collective_rpc<A, K>(
+        &self,
+        method: &str,
+        timeout: Option<f64>,
+        args: A,
+        kwargs: K,
+    ) -> Result<Vec<rmpv::Value>>
+    where
+        A: serde::Serialize + std::fmt::Debug,
+        K: serde::Serialize + std::fmt::Debug,
+    {
+        let results = self
+            .call_utility::<rmpv::Value, _>("collective_rpc", (method, timeout, args, kwargs))
+            .await?;
+
+        Ok(results
+            .into_iter()
+            .flat_map(|result| match result {
+                // Each engine's `collective_rpc` result is itself the worker-level result list.
+                rmpv::Value::Array(results) => results,
+                other => vec![other],
+            })
+            .collect())
+    }
+
+    /// Return whether the engine is currently sleeping at any level.
+    pub async fn is_sleeping(&self) -> Result<bool> {
+        // TODO: we only return the result of the first engine here.
+        Ok(self.call_utility("is_sleeping", ()).await?[0])
+    }
+
+    /// Reset the multi-modal cache.
+    pub async fn reset_mm_cache(&self) -> Result<()> {
+        self.call_utility::<(), _>("reset_mm_cache", ()).await?;
+        Ok(())
+    }
+
+    /// Reset the encoder cache.
+    pub async fn reset_encoder_cache(&self) -> Result<()> {
+        self.call_utility::<(), _>("reset_encoder_cache", ()).await?;
+        Ok(())
+    }
+
+    /// Reset the prefix cache and optionally the external connector cache.
+    pub async fn reset_prefix_cache(
+        &self,
+        reset_running_requests: bool,
+        reset_connector: bool,
+    ) -> Result<bool> {
+        // TODO: we only return the result of the first engine here.
+        Ok(self
+            .call_utility(
+                "reset_prefix_cache",
+                (reset_running_requests, reset_connector),
+            )
+            .await?[0])
+    }
+
+    /// Put the engine to sleep.
+    pub async fn sleep(&self, level: u32, mode: &str) -> Result<()> {
+        self.call_utility::<(), _>("sleep", (level, mode)).await?;
+        Ok(())
+    }
+
+    /// Wake the engine from sleep, optionally limiting the wake-up to specific
+    /// tags.
+    pub async fn wake_up(&self, tags: Option<Vec<String>>) -> Result<()> {
+        self.call_utility::<(), _>("wake_up", (tags,)).await?;
+        Ok(())
+    }
+
+    /// Shut down local client tasks and close transport state.
+    pub async fn shutdown(self) -> Result<()> {
+        let Self {
+            inner,
+            abort_tx,
+            output_task,
+            dispatcher_task,
+            abort_task,
+            coordinator_output_task,
+            coordinator_task,
+            ..
+        } = self;
+
+        info!("shutting down engine-core client");
+        inner.shutdown();
+        drop(abort_tx);
+
+        // Abort all client tasks first, then await them.
+        // Note the aborting orders here.
+        let mut tasks = vec![abort_task, dispatcher_task, output_task];
+        tasks.extend(coordinator_task);
+        tasks.extend(coordinator_output_task);
+
+        tasks.iter().for_each(|t| t.abort());
+        join_all(tasks).await;
+
+        info!("engine-core client shut down");
+        Ok(())
+    }
+}
diff --git a/rust/src/engine-core-client/src/client/imp.rs b/rust/src/engine-core-client/src/client/imp.rs
new file mode 100644
index 000000000000..038053882430
--- /dev/null
+++ b/rust/src/engine-core-client/src/client/imp.rs
@@ -0,0 +1,416 @@
+use std::collections::BTreeMap;
+use std::slice;
+use std::sync::Arc;
+
+use arc_swap::ArcSwapOption;
+use parking_lot::Mutex;
+use thiserror_ext::AsReport as _;
+use tokio::sync::mpsc;
+use tracing::{debug, info, trace, warn};
+use vllm_metrics::METRICS;
+use zeromq::RouterSendHalf;
+
+use crate::client::state::{OutputReceiver, RequestRegistry, UtilityReceiver, UtilityRegistry};
+use crate::client::stream::EngineCoreStreamOutput;
+use crate::client::{AbortCause, AbortRequest};
+use crate::error::{client_closed, dispatcher_closed, unexpected_dispatcher_output};
+use crate::metrics::record_scheduler_stats;
+use crate::protocol::stats::SchedulerStats;
+use crate::protocol::utility::UtilityOutput;
+use crate::protocol::{
+    ClassifiedEngineCoreOutputs, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequestType,
+    encode_msgpack,
+};
+use crate::transport::{ConnectedEngine, EngineId};
+use crate::{Error, Result, transport};
+
+pub(crate) struct ClientInner {
+    input_send: RouterSendHalf,
+    model_name: String,
+    request_reg: Mutex<RequestRegistry>,
+    utility_reg: Mutex<UtilityRegistry>,
+    health_error: ArcSwapOption<Error>,
+}
+
+impl ClientInner {
+    /// Create a new instance with the given input send half after the startup
+    /// handshake completes.
+    pub fn new(
+        input_send: RouterSendHalf,
+        model_name: String,
+        engines: &[ConnectedEngine],
+    ) -> Self {
+        Self {
+            input_send,
+            model_name,
+            request_reg: Mutex::new(RequestRegistry::new(engines)),
+            utility_reg: Mutex::new(UtilityRegistry::default()),
+            health_error: ArcSwapOption::empty(),
+        }
+    }
+
+    /// Get the model name associated with this client used for metrics
+    /// labeling.
+    pub fn model_name(&self) -> &str {
+        &self.model_name
+    }
+
+    /// Register a newly added request. Return the selected engine id and the
+    /// per-request output channel bound to its `request_id`.
+    ///
+    /// When `data_parallel_rank` is provided, the request is routed to that
+    /// specific engine rank, bypassing load balancing.
+    pub fn register_request(
+        &self,
+        request_id: String,
+        data_parallel_rank: Option<u32>,
+    ) -> Result<(EngineId, OutputReceiver)> {
+        let mut registry = self.request_reg.lock();
+        if registry.is_closed() {
+            return Err(self.closed_error());
+        }
+        registry.register(request_id, data_parallel_rank)
+    }
+
+    /// Allocate the next utility `call_id` and register its waiting receiver.
+    pub fn allocate_and_register_utility_call(&self) -> Result<(u64, UtilityReceiver)> {
+        let mut registry = self.utility_reg.lock();
+        if registry.is_closed() {
+            return Err(self.closed_error());
+        }
+        Ok(registry.allocate_and_register())
+    }
+
+    /// Undo a batch of utility call allocations when the fan-out send fails
+    /// partway through. Silently ignores unknown call ids so callers can pass
+    /// the full set without first filtering successful sends.
+    pub fn unregister_utility_calls(&self, call_ids: impl IntoIterator<Item = u64>) {
+        self.utility_reg.lock().unregister_many(call_ids);
+    }
+
+    /// Undo a request registration when `add_request()` fails.
+    pub fn rollback_request(&self, request_id: &str) {
+        let _ = self.request_reg.lock().remove(request_id);
+    }
+
+    /// Filter the given request IDs to the subset that are still tracked as
+    /// active and can be aborted, grouped by the engine that originally
+    /// accepted them.
+    pub fn abortable_request_ids(
+        &self,
+        request_ids: &[String],
+    ) -> Result<BTreeMap<EngineId, Vec<String>>> {
+        let registry = self.request_reg.lock();
+        if registry.is_closed() {
+            return Err(self.closed_error());
+        }
+        Ok(registry.abortable_request_ids(request_ids))
+    }
+
+    /// Obtain the stream sender for one output. If it indicates the request is
+    /// finished, it will be removed from the registry.
+    pub fn take_sender_for_output(
+        &self,
+        output: &EngineCoreOutput,
+    ) -> Option<mpsc::UnboundedSender<Result<EngineCoreStreamOutput>>> {
+        self.request_reg.lock().sender_for_output(output)
+    }
+
+    /// Remove a batch of requests that have finished or aborted, returning
+    /// their stream senders.
+    pub fn finish_requests<'a>(
+        &self,
+        request_ids: impl IntoIterator<Item = &'a String>,
+    ) -> Vec<mpsc::UnboundedSender<Result<EngineCoreStreamOutput>>> {
+        self.request_reg.lock().finish_many(request_ids)
+    }
+
+    /// Apply one scheduler stats update for the given engine to the local
+    /// routing state. Returns `false` if the engine is unknown to the
+    /// client.
+    pub fn apply_scheduler_stats(&self, engine_index: u32, stats: &SchedulerStats) -> bool {
+        self.request_reg.lock().apply_scheduler_stats(engine_index, stats)
+    }
+
+    /// Close all active request streams and utility calls with the first
+    /// persistent health error.
+    pub fn close_registries(&self, error: Arc<Error>) {
+        let persistent_error = self.record_health_error(error);
+        let request_senders = self.request_reg.lock().close();
+        let utility_senders = self.utility_reg.lock().close();
+
+        // Notify all ongoing requests that the client is closed.
+        for sender in request_senders {
+            let _ = sender.send(Err(Error::Shared(persistent_error.clone())));
+        }
+        for sender in utility_senders {
+            let _ = sender.send(Err(Error::Shared(persistent_error.clone())));
+        }
+    }
+
+    /// Return the first persistent health error observed by the client, if any.
+    pub fn health_error(&self) -> Option<Arc<Error>> {
+        self.health_error.load_full()
+    }
+
+    /// Return whether the client still considers the engine healthy.
+    pub fn is_healthy(&self) -> bool {
+        self.health_error.load().is_none()
+    }
+
+    /// Resolve one utility output to the waiting caller. Returns `true` if a
+    /// waiting caller existed.
+    pub fn resolve_utility_output(&self, output: UtilityOutput) -> bool {
+        let Some(call_id) = output.call_id.as_u64() else {
+            // Currently, all utility call issued by the client should have unsigned call IDs.
+            return false;
+        };
+
+        match self.utility_reg.lock().resolve(&call_id) {
+            Some(sender) => {
+                sender.send(Ok(output)).unwrap_or_default();
+                true
+            }
+            None => false,
+        }
+    }
+
+    /// Send the given message to the engine. The request should be first
+    /// registered via `register_request()` to ensure the request stream is
+    /// tracked.
+    pub async fn send_to_engine<T>(
+        &self,
+        engine_id: &EngineId,
+        request_type: EngineCoreRequestType,
+        payload: &T,
+    ) -> Result<()>
+    where
+        T: serde::Serialize + std::fmt::Debug,
+    {
+        // TODO: for `EngineCoreRequest`, split outbound tensor raw views into aux
+        // frames instead of always producing a single msgpack frame.
+        let payload = encode_msgpack(payload)?;
+        let mut input_send = self.input_send.clone();
+        transport::send_message(&mut input_send, engine_id, request_type.to_frame(), payload)
+            .await?;
+        Ok(())
+    }
+
+    /// Handle an abort request by sending the abort message to the engine.
+    pub async fn do_abort_requests(
+        &self,
+        engine_id: &EngineId,
+        request_ids: &[String],
+    ) -> Result<()> {
+        self.send_to_engine(engine_id, EngineCoreRequestType::Abort, &request_ids).await
+    }
+
+    /// Shut down by closing all active request streams and utility calls with a
+    /// sticky client closed error.
+    pub fn shutdown(&self) {
+        self.close_registries(Arc::new(client_closed!("engine-core client shut down")));
+    }
+
+    /// Remove the request from the active registry for auto-abort and return
+    /// the engine that the request was originally routed to, if it is still
+    /// active.
+    pub fn take_auto_abort_target(&self, request_id: &str) -> Option<EngineId> {
+        let mut registry = self.request_reg.lock();
+        let (_, engine_id) = registry.remove(request_id)?;
+        if registry.is_closed() {
+            return None;
+        }
+        Some(engine_id)
+    }
+
+    /// Publish the first persistent health error and return the sticky error
+    /// recorded for this client. Later failures do not overwrite the first
+    /// one so `/health` and post-close callers observe a stable cause.
+    fn record_health_error(&self, error: Arc<Error>) -> Arc<Error> {
+        if let Some(existing) = self.health_error.load_full() {
+            return existing;
+        }
+        self.health_error
+            .rcu(|current| current.clone().unwrap_or_else(|| error.clone()));
+        self.health_error
+            .load_full()
+            .expect("health error must be recorded before registries close")
+    }
+
+    /// Assert there is a recorded health error and return a `Shared` variant
+    /// wrapping it for error returns when the client is already closed.
+    fn closed_error(&self) -> Error {
+        Error::Shared(self.health_error.load_full().expect(
+            "closed registry must have a recorded health error before rejecting new operations",
+        ))
+    }
+}
+
+/// Background loop that listens for request IDs to abort and sends abort
+/// messages to the engine. This is used to implement the auto-abort behavior
+/// when a request stream is dropped without being properly terminated.
+pub(crate) async fn run_abort_loop(
+    inner: Arc<ClientInner>,
+    mut abort_rx: mpsc::UnboundedReceiver<AbortRequest>,
+) {
+    // TODO: receive and abort requests in batch
+    while let Some(AbortRequest { request_id, cause }) = abort_rx.recv().await {
+        let Some(engine_id) = inner.take_auto_abort_target(&request_id) else {
+            debug!(request_id, "skip auto-abort for inactive request");
+            continue;
+        };
+
+        match cause {
+            AbortCause::DroppedStream => {
+                info!(request_id, "auto-aborting request due to dropped stream")
+            }
+            AbortCause::StopStringMatched => {
+                debug!(
+                    request_id,
+                    "auto-aborting request due to stop string matched"
+                )
+            }
+        }
+
+        if let Err(error) = inner.do_abort_requests(&engine_id, slice::from_ref(&request_id)).await
+        {
+            warn!(
+                request_id,
+                ?engine_id,
+                error = %error.as_report(),
+                "failed to auto-abort dropped request stream"
+            );
+        }
+    }
+}
+
+/// Background loop that listens for engine-core outputs and dispatches them to
+/// the corresponding request streams based on their `request_id`.
+pub(crate) async fn run_output_dispatcher_loop(
+    inner: Arc<ClientInner>,
+    mut output_rx: mpsc::Receiver<Result<EngineCoreOutputs>>,
+) {
+    let result: Result<()> = async {
+        loop {
+            let outputs = match output_rx.recv().await {
+                Some(outputs) => outputs,
+                None => Err(dispatcher_closed!(
+                    "engine-core output dispatcher channel closed"
+                )),
+            }?;
+
+            match outputs.classify() {
+                ClassifiedEngineCoreOutputs::RequestBatch(batch) => {
+                    for output in batch.outputs {
+                        let request_id = output.request_id.clone();
+                        let Some(sender) = inner.take_sender_for_output(&output) else {
+                            debug!(request_id, "dropping output for inactive request");
+                            continue;
+                        };
+
+                        let wrapped_output = EngineCoreStreamOutput {
+                            engine_index: batch.engine_index,
+                            timestamp: batch.timestamp,
+                            output,
+                        };
+                        if sender.send(Ok(wrapped_output)).is_err() {
+                            debug!(request_id, "request output stream receiver dropped");
+                        }
+                    }
+
+                    // The sender for normally-finished requests should have already been removed
+                    // from the registry when their final output was dispatched
+                    // above. This serves as a safety net to capture any
+                    // requests marked as finished by the engine.
+                    if let Some(finished_requests) = batch.finished_requests.as_ref() {
+                        for request_id in finished_requests {
+                            trace!(request_id, "request completed via finished_requests");
+                        }
+                        drop(inner.finish_requests(finished_requests));
+                    }
+
+                    if let Some(scheduler_stats) = batch.scheduler_stats.as_ref() {
+                        if !inner.apply_scheduler_stats(batch.engine_index, scheduler_stats) {
+                            debug!(
+                                engine_index = batch.engine_index,
+                                "dropping scheduler stats for unknown engine"
+                            );
+                        }
+                        record_scheduler_stats(
+                            &METRICS.scheduler,
+                            inner.model_name(),
+                            batch.engine_index,
+                            scheduler_stats,
+                        );
+                    }
+                }
+                ClassifiedEngineCoreOutputs::Utility(utility) => {
+                    let call_id = utility.output.call_id;
+                    if inner.resolve_utility_output(utility.output) {
+                        trace!(
+                            %call_id,
+                            engine_index = utility.engine_index,
+                            "resolved utility output"
+                        );
+                    } else {
+                        warn!(
+                            %call_id,
+                            engine_index = utility.engine_index,
+                            "dropping output for unexpected utility call"
+                        );
+                    }
+                }
+                other @ (ClassifiedEngineCoreOutputs::DpControl { .. }
+                | ClassifiedEngineCoreOutputs::Other(_)) => {
+                    Err::<(), _>(unexpected_dispatcher_output!(
+                        "received unexpected output on main dispatcher path: {other:?}"
+                    ))?;
+                }
+            }
+        }
+    }
+    .await;
+    let Err(error) = result else { return };
+
+    warn!(error = %error.as_report(), "output dispatcher exiting with error");
+    inner.close_registries(Arc::new(error));
+}
+
+#[cfg(test)]
+mod tests {
+    use zeromq::{RouterSocket, Socket};
+
+    use super::*;
+
+    async fn test_inner() -> ClientInner {
+        let mut socket = RouterSocket::new();
+        socket.bind("tcp://127.0.0.1:0").await.unwrap();
+        let (send, _) = socket.split();
+        ClientInner::new(
+            send,
+            "test-model".to_string(),
+            &[ConnectedEngine {
+                engine_id: EngineId::from(b"engine-0"),
+                ready_response: None,
+            }],
+        )
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn close_registries_records_first_health_error_only() {
+        let inner = test_inner().await;
+
+        inner.close_registries(Arc::new(Error::EngineCoreDead));
+        assert!(!inner.is_healthy());
+        assert!(matches!(
+            inner.health_error().as_deref(),
+            Some(Error::EngineCoreDead)
+        ));
+
+        inner.close_registries(Arc::new(client_closed!("shutdown")));
+        assert!(matches!(
+            inner.health_error().as_deref(),
+            Some(Error::EngineCoreDead)
+        ));
+    }
+}
diff --git a/rust/src/engine-core-client/src/client/state.rs b/rust/src/engine-core-client/src/client/state.rs
new file mode 100644
index 000000000000..0dd0bd3e968d
--- /dev/null
+++ b/rust/src/engine-core-client/src/client/state.rs
@@ -0,0 +1,671 @@
+use std::collections::BTreeMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use tokio::sync::{mpsc, oneshot};
+use tracing::trace;
+
+use crate::EngineId;
+use crate::client::stream::EngineCoreStreamOutput;
+use crate::error::{Error, Result};
+use crate::protocol::EngineCoreOutput;
+use crate::protocol::stats::SchedulerStats;
+use crate::protocol::utility::UtilityOutput;
+use crate::transport::ConnectedEngine;
+
+pub type OutputSender = mpsc::UnboundedSender<Result<EngineCoreStreamOutput>>;
+pub type OutputReceiver = mpsc::UnboundedReceiver<Result<EngineCoreStreamOutput>>;
+pub type UtilitySender = oneshot::Sender<Result<UtilityOutput>>;
+pub type UtilityReceiver = oneshot::Receiver<Result<UtilityOutput>>;
+
+#[derive(Debug)]
+struct TrackedRequest {
+    sender: OutputSender,
+    engine_id: EngineId,
+}
+
+/// The latest real scheduler-side load snapshot observed from one engine.
+///
+/// These counters come from `scheduler_stats` on the normal engine output path
+/// and are the preferred routing signal once available.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct EngineLoadSnapshot {
+    /// Requests still counted on the scheduler's waiting side.
+    waiting: usize,
+    /// Requests currently counted on the scheduler's running side.
+    running: usize,
+}
+
+#[derive(Debug, Default)]
+struct EngineRoutingState {
+    /// Requests admitted by this frontend that have not finished yet.
+    ///
+    /// This is used both as the bootstrap fallback before real scheduler stats
+    /// exist and as a lower bound afterwards so asynchronous scheduler
+    /// snapshots cannot erase frontend admission history.
+    inflight: usize,
+    /// The latest real scheduler snapshot received from this engine, if any.
+    last_scheduler_stats: Option<EngineLoadSnapshot>,
+}
+
+impl EngineRoutingState {
+    /// Compute the routing score used to pick the least-loaded engine.
+    ///
+    /// Scheduler stats can raise the load estimate above the frontend-local
+    /// view, but they should not lower it below requests this frontend has
+    /// already admitted. Waiting requests still get the same extra penalty
+    /// as the original `waiting * 4 + running` score.
+    fn routing_score(&self) -> usize {
+        const WAITING_WEIGHT: usize = 4;
+
+        let Some(stats) = self.last_scheduler_stats else {
+            return self.inflight;
+        };
+
+        let scheduler_total = stats.running + stats.waiting;
+        self.inflight.max(scheduler_total) + stats.waiting * (WAITING_WEIGHT - 1)
+    }
+
+    /// Replace the local routing view with a fresh real scheduler snapshot.
+    fn apply_scheduler_counts(&mut self, next: EngineLoadSnapshot) {
+        self.last_scheduler_stats = Some(next);
+    }
+}
+
+/// Internal registry for tracking active requests and their output stream
+/// senders.
+///
+/// This is used to route incoming outputs to the correct request stream, and to
+/// ensure proper cleanup of senders when requests finish or the client shuts
+/// down.
+#[derive(Debug)]
+pub struct RequestRegistry {
+    closed: bool,
+    requests: BTreeMap<String, TrackedRequest>,
+    routing_per_engine: BTreeMap<EngineId, EngineRoutingState>,
+}
+
+impl RequestRegistry {
+    pub fn new(engines: &[ConnectedEngine]) -> Self {
+        Self {
+            closed: false,
+            requests: BTreeMap::default(),
+            routing_per_engine: engines
+                .iter()
+                .map(|engine| (engine.engine_id.clone(), EngineRoutingState::default()))
+                .collect(),
+        }
+    }
+
+    /// Register a newly added request. Create the per-request output channel
+    /// bound to its `request_id` and return the selected engine id.
+    ///
+    /// When `data_parallel_rank` is provided, the request is routed directly to
+    /// the engine at that rank index, bypassing load balancing. Otherwise
+    /// the engine with the fewest in-flight requests is chosen.
+    pub fn register(
+        &mut self,
+        request_id: String,
+        data_parallel_rank: Option<u32>,
+    ) -> Result<(EngineId, OutputReceiver)> {
+        if self.requests.contains_key(&request_id) {
+            return Err(Error::DuplicateRequestId { request_id });
+        }
+
+        let engine_id = self.choose_engine_for_request(data_parallel_rank)?;
+        let (tx, rx) = mpsc::unbounded_channel();
+        self.requests.insert(
+            request_id,
+            TrackedRequest {
+                sender: tx,
+                engine_id: engine_id.clone(),
+            },
+        );
+
+        let state = self
+            .routing_per_engine
+            .get_mut(&engine_id)
+            .expect("request registry must track all known engines");
+        state.inflight += 1;
+
+        Ok((engine_id, rx))
+    }
+
+    fn choose_engine_for_request(&mut self, data_parallel_rank: Option<u32>) -> Result<EngineId> {
+        if let Some(rank) = data_parallel_rank {
+            // Route to the engine at the specified rank index.
+            let engine_id = EngineId::from_engine_index(rank);
+            return self
+                .routing_per_engine
+                .contains_key(&engine_id)
+                .then_some(engine_id)
+                .ok_or_else(|| Error::InvalidDataParallelRank {
+                    rank,
+                    num_engines: self.routing_per_engine.len() as u32,
+                });
+        }
+
+        Ok(self
+            .routing_per_engine
+            .iter()
+            .min_by_key(|(_, state)| state.routing_score())
+            .map(|(engine_id, _)| engine_id.clone())
+            .expect("request registry must contain at least one engine"))
+    }
+
+    /// Filter the given request IDs to the subset that are still tracked as
+    /// active and can be aborted, grouped by engine.
+    pub fn abortable_request_ids(&self, request_ids: &[String]) -> BTreeMap<EngineId, Vec<String>> {
+        let mut by_engine = BTreeMap::new();
+        for request_id in request_ids {
+            let Some(tracked) = self.requests.get(request_id.as_str()) else {
+                continue;
+            };
+            by_engine
+                .entry(tracked.engine_id.clone())
+                .or_insert_with(Vec::new)
+                .push(request_id.clone());
+        }
+        by_engine
+    }
+
+    /// Obtain the stream sender for one output. If it indicates the request is
+    /// finished, it will be removed from the registry.
+    pub fn sender_for_output(&mut self, output: &EngineCoreOutput) -> Option<OutputSender> {
+        if output.finished() {
+            self.remove(output.request_id.as_str()).map(|tracked| tracked.0)
+        } else {
+            self.requests
+                .get(output.request_id.as_str())
+                .map(|tracked| tracked.sender.clone())
+        }
+    }
+
+    /// Remove a batch of requests that have finished or aborted, returning
+    /// their stream senders.
+    pub fn finish_many<'a>(
+        &mut self,
+        request_ids: impl IntoIterator<Item = &'a String>,
+    ) -> Vec<OutputSender> {
+        request_ids
+            .into_iter()
+            .filter_map(|request_id| self.remove(request_id.as_str()).map(|tracked| tracked.0))
+            .collect()
+    }
+
+    /// Apply one scheduler stats update for the given engine to the local
+    /// routing state. Returns `false` if the engine is unknown to the
+    /// client.
+    pub fn apply_scheduler_stats(&mut self, engine_index: u32, stats: &SchedulerStats) -> bool {
+        self.apply_scheduler_counts(
+            engine_index,
+            EngineLoadSnapshot {
+                waiting: stats.num_waiting_reqs as usize,
+                running: stats.num_running_reqs as usize,
+            },
+        )
+    }
+
+    /// Mark the registry as closed, detach and return all tracked senders.
+    pub fn close(&mut self) -> Vec<OutputSender> {
+        if self.closed {
+            return Vec::new();
+        }
+
+        self.closed = true;
+        std::mem::take(&mut self.requests)
+            .into_values()
+            .map(|tracked| tracked.sender)
+            .collect()
+    }
+
+    /// Remove one request from the local registry. Returns the tracked entry if
+    /// it exists.
+    #[must_use]
+    pub fn remove(&mut self, request_id: &str) -> Option<(OutputSender, EngineId)> {
+        let tracked = self.requests.remove(request_id)?;
+        self.routing_per_engine
+            .get_mut(&tracked.engine_id)
+            .expect("request registry must track all known engines")
+            .inflight -= 1;
+        Some((tracked.sender, tracked.engine_id))
+    }
+
+    fn apply_scheduler_counts(&mut self, engine_index: u32, next: EngineLoadSnapshot) -> bool {
+        let engine_id = EngineId::from_engine_index(engine_index);
+        let Some(state) = self.routing_per_engine.get_mut(&engine_id) else {
+            return false;
+        };
+
+        let previous = state.last_scheduler_stats;
+        if previous != Some(next) {
+            trace!(
+                ?engine_id,
+                previous_waiting = previous.map(|stats| stats.waiting),
+                previous_running = previous.map(|stats| stats.running),
+                waiting = next.waiting,
+                running = next.running,
+                "updated scheduler routing counts",
+            );
+        }
+
+        state.apply_scheduler_counts(next);
+        true
+    }
+
+    #[cfg(test)]
+    pub fn contains(&self, request_id: &str) -> bool {
+        self.requests.contains_key(request_id)
+    }
+
+    pub fn is_closed(&self) -> bool {
+        self.closed
+    }
+}
+
+/// Internal registry for tracking active utility calls and their waiting
+/// receivers.
+#[derive(Debug)]
+pub struct UtilityRegistry {
+    closed: bool,
+    next_call_id: AtomicU64,
+    utility_calls: BTreeMap<u64, UtilitySender>,
+}
+
+impl Default for UtilityRegistry {
+    fn default() -> Self {
+        Self {
+            closed: false,
+            next_call_id: AtomicU64::new(1),
+            utility_calls: BTreeMap::default(),
+        }
+    }
+}
+
+impl UtilityRegistry {
+    /// Allocate the next utility `call_id` and register a newly added utility
+    /// call.
+    pub fn allocate_and_register(&mut self) -> (u64, UtilityReceiver) {
+        let call_id = self.next_call_id.fetch_add(1, Ordering::Relaxed);
+        let (tx, rx) = oneshot::channel();
+        self.utility_calls.insert(call_id, tx);
+        (call_id, rx)
+    }
+
+    /// Resolve a utility output to its waiting receiver.
+    pub fn resolve(&mut self, call_id: &u64) -> Option<UtilitySender> {
+        self.utility_calls.remove(call_id)
+    }
+
+    /// Drop a batch of registered utility calls without delivering a result.
+    /// Used to roll back allocations when the dispatch fan-out fails before
+    /// every engine could accept the request.
+    pub fn unregister_many(&mut self, call_ids: impl IntoIterator<Item = u64>) {
+        for call_id in call_ids {
+            self.utility_calls.remove(&call_id);
+        }
+    }
+
+    /// Mark the registry as closed, detach and return all tracked senders.
+    pub fn close(&mut self) -> Vec<UtilitySender> {
+        if self.closed {
+            return Vec::new();
+        }
+
+        self.closed = true;
+        std::mem::take(&mut self.utility_calls).into_values().collect()
+    }
+
+    #[cfg(test)]
+    pub fn contains(&self, call_id: u64) -> bool {
+        self.utility_calls.contains_key(&call_id)
+    }
+
+    pub fn is_closed(&self) -> bool {
+        self.closed
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{EngineRoutingState, RequestRegistry, UtilityRegistry};
+    use crate::EngineId;
+    use crate::client::state::EngineLoadSnapshot;
+    use crate::protocol::{EngineCoreFinishReason, EngineCoreOutput};
+    use crate::transport::ConnectedEngine;
+
+    #[test]
+    fn registry_rejects_duplicate_request_ids() {
+        let mut registry = RequestRegistry::new(&[ConnectedEngine {
+            engine_id: EngineId::from(b"engine-0"),
+            ready_response: None,
+        }]);
+        registry.register("req-1".to_string(), None).unwrap();
+        let error = registry.register("req-1".to_string(), None).unwrap_err();
+        assert!(matches!(
+            error,
+            crate::error::Error::DuplicateRequestId { request_id } if request_id == "req-1"
+        ));
+    }
+
+    #[test]
+    fn registry_removes_finished_request_on_output() {
+        let mut registry = RequestRegistry::new(&[ConnectedEngine {
+            engine_id: EngineId::from(b"engine-0"),
+            ready_response: None,
+        }]);
+        registry.register("req-1".to_string(), None).unwrap();
+
+        let sender = registry.sender_for_output(&EngineCoreOutput {
+            request_id: "req-1".to_string(),
+            finish_reason: Some(EngineCoreFinishReason::Length),
+            ..Default::default()
+        });
+
+        assert!(sender.is_some());
+        assert!(!registry.contains("req-1"));
+    }
+
+    #[test]
+    fn registry_closes_all_requests_on_failure() {
+        let mut registry = RequestRegistry::new(&[ConnectedEngine {
+            engine_id: EngineId::from(b"engine-0"),
+            ready_response: None,
+        }]);
+        registry.register("req-1".to_string(), None).unwrap();
+        registry.register("req-2".to_string(), None).unwrap();
+
+        let senders = registry.close();
+
+        assert_eq!(senders.len(), 2);
+        assert!(registry.is_closed());
+    }
+
+    #[test]
+    fn registry_tracks_engine_id_per_request() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let engine_1 = EngineId::from_engine_index(1);
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: engine_0.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_1.clone(),
+                ready_response: None,
+            },
+        ]);
+        let (chosen_0, _) = registry.register("req-1".to_string(), None).unwrap();
+        let (chosen_1, _) = registry.register("req-2".to_string(), None).unwrap();
+        let (chosen_0_again, _) = registry.register("req-3".to_string(), None).unwrap();
+
+        assert_eq!(chosen_0, engine_0);
+        assert_eq!(chosen_1, engine_1);
+        assert_eq!(chosen_0_again, engine_0);
+
+        let grouped = registry.abortable_request_ids(&[
+            "req-1".to_string(),
+            "req-2".to_string(),
+            "req-3".to_string(),
+        ]);
+        assert_eq!(
+            grouped.get(&engine_0).unwrap(),
+            &vec!["req-1".to_string(), "req-3".to_string()]
+        );
+        assert_eq!(grouped.get(&engine_1).unwrap(), &vec!["req-2".to_string()]);
+    }
+
+    #[test]
+    fn registry_uses_inflight_as_waiting_fallback_before_stats_arrive() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let engine_1 = EngineId::from_engine_index(1);
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: engine_0.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_1.clone(),
+                ready_response: None,
+            },
+        ]);
+
+        let (chosen_0, _) = registry.register("req-1".to_string(), None).unwrap();
+        let (chosen_1, _) = registry.register("req-2".to_string(), None).unwrap();
+        let (chosen_0_again, _) = registry.register("req-3".to_string(), None).unwrap();
+
+        assert_eq!(chosen_0, engine_0);
+        assert_eq!(chosen_1, engine_1);
+        assert_eq!(chosen_0_again, engine_0);
+    }
+
+    #[test]
+    fn routing_score_uses_inflight_before_stats_arrive() {
+        let state = EngineRoutingState {
+            inflight: 3,
+            last_scheduler_stats: None,
+        };
+
+        assert_eq!(state.routing_score(), 3);
+    }
+
+    #[test]
+    fn routing_score_uses_inflight_as_scheduler_stats_lower_bound() {
+        let state = EngineRoutingState {
+            inflight: 7,
+            last_scheduler_stats: Some(EngineLoadSnapshot {
+                waiting: 0,
+                running: 2,
+            }),
+        };
+
+        assert_eq!(state.routing_score(), 7);
+    }
+
+    #[test]
+    fn routing_score_keeps_extra_waiting_penalty() {
+        let state = EngineRoutingState {
+            inflight: 1,
+            last_scheduler_stats: Some(EngineLoadSnapshot {
+                waiting: 3,
+                running: 2,
+            }),
+        };
+
+        assert_eq!(state.routing_score(), 14);
+    }
+
+    #[test]
+    fn registry_prefers_real_scheduler_stats_over_inflight() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let engine_1 = EngineId::from_engine_index(1);
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: engine_0.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_1.clone(),
+                ready_response: None,
+            },
+        ]);
+
+        assert!(registry.apply_scheduler_counts(
+            0,
+            EngineLoadSnapshot {
+                waiting: 3,
+                running: 2
+            }
+        ));
+        assert!(registry.apply_scheduler_counts(
+            1,
+            EngineLoadSnapshot {
+                waiting: 0,
+                running: 1
+            }
+        ));
+
+        let (chosen, _) = registry.register("req-stats".to_string(), None).unwrap();
+        assert_eq!(chosen, engine_1);
+    }
+
+    #[test]
+    fn register_with_data_parallel_rank_routes_to_specified_engine() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let engine_1 = EngineId::from_engine_index(1);
+        let engine_2 = EngineId::from_engine_index(2);
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: engine_0.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_1.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_2.clone(),
+                ready_response: None,
+            },
+        ]);
+
+        // Explicitly target rank 2 (third engine).
+        let (chosen, _) = registry.register("req-1".to_string(), Some(2)).unwrap();
+        assert_eq!(chosen, engine_2);
+
+        // Explicitly target rank 0 (first engine).
+        let (chosen, _) = registry.register("req-2".to_string(), Some(0)).unwrap();
+        assert_eq!(chosen, engine_0);
+
+        // Explicitly target rank 1.
+        let (chosen, _) = registry.register("req-3".to_string(), Some(1)).unwrap();
+        assert_eq!(chosen, engine_1);
+    }
+
+    #[test]
+    fn register_with_data_parallel_rank_bypasses_load_balancing() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let engine_1 = EngineId::from_engine_index(1);
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: engine_0.clone(),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: engine_1.clone(),
+                ready_response: None,
+            },
+        ]);
+
+        // Load-balance: first two go to engine_0 and engine_1.
+        registry.register("req-lb-0".to_string(), None).unwrap();
+
+        // Now engine_0 has 1 in-flight. Without dp_rank, next would go to engine_1.
+        // But with dp_rank=0, it should still go to engine_0.
+        let (chosen, _) = registry.register("req-dp".to_string(), Some(0)).unwrap();
+        assert_eq!(chosen, engine_0);
+    }
+
+    #[test]
+    fn register_with_out_of_range_rank_returns_error() {
+        let mut registry = RequestRegistry::new(&[
+            ConnectedEngine {
+                engine_id: EngineId::from_engine_index(0),
+                ready_response: None,
+            },
+            ConnectedEngine {
+                engine_id: EngineId::from_engine_index(1),
+                ready_response: None,
+            },
+        ]);
+
+        let error = registry.register("req-1".to_string(), Some(2)).unwrap_err();
+        assert!(matches!(
+            error,
+            crate::error::Error::InvalidDataParallelRank {
+                rank: 2,
+                num_engines: 2,
+            }
+        ));
+    }
+
+    #[test]
+    fn register_with_rank_on_single_engine_only_accepts_zero() {
+        let engine_0 = EngineId::from_engine_index(0);
+        let mut registry = RequestRegistry::new(&[ConnectedEngine {
+            engine_id: engine_0.clone(),
+            ready_response: None,
+        }]);
+
+        let (chosen, _) = registry.register("req-ok".to_string(), Some(0)).unwrap();
+        assert_eq!(chosen, engine_0);
+
+        let error = registry.register("req-bad".to_string(), Some(1)).unwrap_err();
+        assert!(matches!(
+            error,
+            crate::error::Error::InvalidDataParallelRank {
+                rank: 1,
+                num_engines: 1,
+            }
+        ));
+    }
+
+    #[test]
+    fn utility_registry_tracks_and_removes_call_ids() {
+        let mut registry = UtilityRegistry::default();
+        let (call_id_1, _) = registry.allocate_and_register();
+        let (call_id_2, _) = registry.allocate_and_register();
+
+        assert_eq!(call_id_1, 1);
+        assert_eq!(call_id_2, 2);
+        assert!(registry.contains(1));
+        assert!(registry.contains(2));
+        assert!(registry.resolve(&1).is_some());
+        assert!(!registry.contains(1));
+        assert!(registry.contains(2));
+    }
+
+    #[test]
+    fn utility_registry_closes_all_waiters_on_failure() {
+        let mut registry = UtilityRegistry::default();
+        registry.allocate_and_register();
+        registry.allocate_and_register();
+
+        let senders = registry.close();
+
+        assert_eq!(senders.len(), 2);
+        assert!(!registry.contains(1));
+        assert!(!registry.contains(2));
+        assert!(registry.is_closed());
+    }
+
+    #[test]
+    fn utility_registry_unregister_many_drops_pending_calls() {
+        use tokio::sync::oneshot::error::TryRecvError;
+
+        let mut registry = UtilityRegistry::default();
+        let (call_id_1, mut rx_1) = registry.allocate_and_register();
+        let (call_id_2, mut rx_2) = registry.allocate_and_register();
+        let (call_id_3, _rx_3) = registry.allocate_and_register();
+
+        // Drop two of the three allocated calls; the third stays pending.
+        registry.unregister_many([call_id_1, call_id_2]);
+
+        assert!(!registry.contains(call_id_1));
+        assert!(!registry.contains(call_id_2));
+        assert!(registry.contains(call_id_3));
+        // The receivers must observe the sender being dropped (channel closed).
+        assert!(matches!(rx_1.try_recv(), Err(TryRecvError::Closed)));
+        assert!(matches!(rx_2.try_recv(), Err(TryRecvError::Closed)));
+    }
+
+    #[test]
+    fn utility_registry_unregister_many_ignores_unknown_call_ids() {
+        let mut registry = UtilityRegistry::default();
+        let (call_id, _rx) = registry.allocate_and_register();
+
+        // Unknown call ids are silently ignored — caller doesn't care which were live.
+        registry.unregister_many([call_id, 42, 9999]);
+
+        assert!(!registry.contains(call_id));
+    }
+}
diff --git a/rust/src/engine-core-client/src/client/stream.rs b/rust/src/engine-core-client/src/client/stream.rs
new file mode 100644
index 000000000000..3cbb215b0ef0
--- /dev/null
+++ b/rust/src/engine-core-client/src/client/stream.rs
@@ -0,0 +1,153 @@
+use std::ops::Deref;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use futures::Stream;
+use futures::stream::FusedStream;
+use thiserror_ext::AsReport as _;
+use tokio::sync::mpsc;
+use tracing::{debug, error, warn};
+
+use crate::client::AbortRequest;
+use crate::client::state::OutputReceiver;
+use crate::protocol::{EngineCoreFinishReason, EngineCoreOutput};
+use crate::{AbortCause, Error, Result};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum State {
+    Running,
+    Finished,
+    ClosedWithError,
+    UnexpectedClose,
+}
+
+/// One request-scoped engine-core output plus the enclosing batch metadata.
+#[derive(Debug, Clone, PartialEq)]
+pub struct EngineCoreStreamOutput {
+    pub engine_index: u32,
+    pub timestamp: f64,
+    pub output: EngineCoreOutput,
+}
+
+impl Deref for EngineCoreStreamOutput {
+    type Target = EngineCoreOutput;
+
+    fn deref(&self) -> &Self::Target {
+        &self.output
+    }
+}
+
+/// Stream of raw engine-core outputs for one request.
+///
+/// The stream yields only [`EngineCoreStreamOutput`] values whose embedded
+/// output `request_id` matches the originating `add_request()` call. Normal
+/// request completion is expected to include a final output object whose
+/// `finish_reason` is non-`None`.
+pub struct EngineCoreOutputStream {
+    request_id: String,
+    abort_tx: mpsc::UnboundedSender<AbortRequest>,
+    state: State,
+    rx: OutputReceiver,
+}
+
+impl EngineCoreOutputStream {
+    pub(crate) fn new(
+        request_id: String,
+        abort_tx: mpsc::UnboundedSender<AbortRequest>,
+        rx: OutputReceiver,
+    ) -> Self {
+        Self {
+            request_id,
+            abort_tx,
+            state: State::Running,
+            rx,
+        }
+    }
+
+    /// Return the engine-core `request_id` bound to this stream.
+    pub fn request_id(&self) -> &str {
+        &self.request_id
+    }
+}
+
+impl Stream for EngineCoreOutputStream {
+    type Item = Result<EngineCoreStreamOutput>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        if self.is_terminated() {
+            return Poll::Ready(None);
+        }
+
+        match Pin::new(&mut self.rx).poll_recv(cx) {
+            Poll::Pending => Poll::Pending,
+            Poll::Ready(Some(item)) => {
+                match &item {
+                    Ok(output) => {
+                        // If the output indicates the request is finished, mark the stream as
+                        // terminated with cleanly-finished state and expect no more outputs to
+                        // come.
+                        if output.finished() {
+                            if output.finish_reason == Some(EngineCoreFinishReason::Error) {
+                                error!(
+                                    self.request_id,
+                                    "request failed with an internal error during generation"
+                                );
+                            }
+                            debug!(self.request_id, "request completed via final output");
+                            self.state = State::Finished;
+                        }
+                    }
+                    Err(error) => {
+                        // If we get an error from the output stream, mark the stream as terminated
+                        // with an error.
+                        warn!(self.request_id, error = %error.as_report(), "request encountered an error");
+                        self.state = State::ClosedWithError;
+                    }
+                }
+                Poll::Ready(Some(item))
+            }
+            Poll::Ready(None) => {
+                // If we get a `None` without seeing a finished output, this is an unexpected
+                // close from the engine side. Mark the stream as terminated
+                // with an unexpected close state and send an error down the
+                // stream to notify the caller.
+                warn!(self.request_id, "request stream closed unexpectedly");
+                self.state = State::UnexpectedClose;
+
+                Poll::Ready(Some(Err(Error::RequestStreamClosed {
+                    request_id: self.request_id.clone(),
+                })))
+            }
+        }
+    }
+}
+
+impl FusedStream for EngineCoreOutputStream {
+    fn is_terminated(&self) -> bool {
+        !matches!(self.state, State::Running)
+    }
+}
+
+impl Drop for EngineCoreOutputStream {
+    fn drop(&mut self) {
+        if self.is_terminated() {
+            // If it's terminated, it means that the request either finished cleanly, or
+            // encountered an error or unexpected close from the engine. In any
+            // case, the request stream is already considered inactive and
+            // there's no need to abort it on the engine side.
+            return;
+        }
+
+        let abort_req = AbortRequest {
+            request_id: self.request_id.clone(),
+            cause: AbortCause::current(),
+        };
+
+        if self.abort_tx.send(abort_req).is_err() {
+            warn!(
+                request_id = self.request_id,
+                "auto-abort worker already shut down; skip auto-abort"
+            );
+        }
+    }
+}
diff --git a/rust/src/engine-core-client/src/coordinator/bootstrap.rs b/rust/src/engine-core-client/src/coordinator/bootstrap.rs
new file mode 100644
index 000000000000..8c6855bfe77a
--- /dev/null
+++ b/rust/src/engine-core-client/src/coordinator/bootstrap.rs
@@ -0,0 +1,91 @@
+use std::time::Duration;
+
+use bytes::Bytes;
+use zeromq::prelude::{Socket, SocketRecv, SocketSend};
+use zeromq::{PullSocket, XPubSocket, ZmqMessage};
+
+use crate::error::{Error, Result, bail_unexpected_handshake_message};
+
+/// Engine-facing sockets owned by the in-process coordinator.
+pub(crate) struct CoordinatorBootstrap {
+    pub input_address: String,
+    pub output_address: String,
+    pub input_socket: XPubSocket,
+    pub output_socket: PullSocket,
+}
+
+impl CoordinatorBootstrap {
+    /// Bind the engine-facing coordinator sockets on the given host.
+    pub(crate) async fn bind(local_host: &str) -> Result<Self> {
+        let mut input_socket = XPubSocket::new();
+        let input_address = input_socket.bind(&format!("tcp://{local_host}:0")).await?.to_string();
+
+        let mut output_socket = PullSocket::new();
+        let output_address =
+            output_socket.bind(&format!("tcp://{local_host}:0")).await?.to_string();
+
+        Ok(Self {
+            input_address,
+            output_address,
+            input_socket,
+            output_socket,
+        })
+    }
+
+    /// Complete the engine-facing startup gate before engines are allowed to
+    /// send handshake READY.
+    pub(crate) async fn wait_for_startup_gate(
+        &mut self,
+        engine_count: usize,
+        ready_timeout: Duration,
+    ) -> Result<()> {
+        wait_for_engine_subscriptions(&mut self.input_socket, engine_count, ready_timeout).await?;
+        send_ready_to_engines(&mut self.input_socket).await?;
+        Ok(())
+    }
+}
+
+/// Wait until all engines subscribe to the coordinator broadcast socket.
+async fn wait_for_engine_subscriptions(
+    input_socket: &mut XPubSocket,
+    engine_count: usize,
+    ready_timeout: Duration,
+) -> Result<()> {
+    let mut received = 0;
+    while received < engine_count {
+        let message =
+            tokio::time::timeout(ready_timeout, input_socket.recv()).await.map_err(|_| {
+                Error::HandshakeTimeout {
+                    stage: "coordinator engine subscriptions",
+                    timeout: ready_timeout,
+                }
+            })??;
+        if message.len() != 1 {
+            bail_unexpected_handshake_message!(
+                "expected 1 frame for coordinator subscription, got {}",
+                message.len()
+            );
+        }
+
+        let frame = message
+            .into_vec()
+            .into_iter()
+            .next()
+            .expect("single-frame coordinator subscription message");
+        if frame.as_ref() != [0x01] {
+            bail_unexpected_handshake_message!(
+                "expected coordinator subscription frame [0x01], got {:?}",
+                frame.as_ref()
+            );
+        }
+        received += 1;
+    }
+
+    Ok(())
+}
+
+/// Send the coordinator READY marker to all subscribed engines.
+async fn send_ready_to_engines(input_socket: &mut XPubSocket) -> Result<()> {
+    input_socket.send(ZmqMessage::from(Bytes::from_static(b"READY"))).await?;
+    Ok(())
+}
diff --git a/rust/src/engine-core-client/src/coordinator/external.rs b/rust/src/engine-core-client/src/coordinator/external.rs
new file mode 100644
index 000000000000..eb447c4809e3
--- /dev/null
+++ b/rust/src/engine-core-client/src/coordinator/external.rs
@@ -0,0 +1,167 @@
+use std::sync::Arc;
+
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+use thiserror_ext::AsReport;
+use tokio::sync::mpsc;
+use tracing::{debug, warn};
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{XSubSocket, ZmqMessage};
+
+use crate::client::imp::ClientInner;
+use crate::coordinator::handle::{CoordinatorCommand, CoordinatorState};
+use crate::error::{Error, Result, bail_unexpected_coordinator_output};
+use crate::protocol::{OpaqueValue, decode_msgpack, encode_msgpack};
+
+/// Frontend-to-coordinator wakeup message sent when the first request arrives
+/// while all engines are paused.
+///
+/// This matches the frontend-side msgpack tuple sent by Python
+/// `DPAsyncMPClient._ensure_stats_update_task` to the coordinator front socket.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/694449050f8dac3d9853e97e518b4a43ec52106a/vllm/v1/engine/core_client.py#L1230-L1236>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize_tuple, Deserialize_tuple)]
+struct CoordinatorWakeupMessage {
+    /// Engine index that already has the triggering request and should be
+    /// excluded from the coordinator's `START_DP_WAVE` rebroadcast.
+    exclude_engine_index: u32,
+    /// DP wave number observed by the frontend when the request was admitted.
+    wave: u32,
+}
+
+/// Coordinator-to-frontend state publish received on the front-side coordinator
+/// socket.
+///
+/// This matches the msgpack tuple periodically published by Python
+/// `DPCoordinatorProc.run_coordinator` to all connected frontends.
+///
+/// Original Python definitions:
+/// <https://github.com/vllm-project/vllm/blob/694449050f8dac3d9853e97e518b4a43ec52106a/vllm/v1/engine/coordinator.py#L282-L283>
+/// <https://github.com/vllm-project/vllm/blob/694449050f8dac3d9853e97e518b4a43ec52106a/vllm/v1/engine/coordinator.py#L445-L447>
+#[derive(Debug, Clone, PartialEq, Deserialize_tuple)]
+struct CoordinatorStateUpdate {
+    /// Global per-engine request counts published by the coordinator.
+    ///
+    /// The Rust bootstrapped external-coordinator path preserves this field for
+    /// wire compatibility but intentionally ignores it for routing decisions.
+    counts: OpaqueValue,
+    /// Current global DP wave number stamped onto newly admitted requests.
+    wave: u32,
+    /// Whether engines are currently running (`true`) or paused (`false`).
+    engines_running: bool,
+}
+
+/// Background half of an external Python-owned coordinator connection.
+///
+/// This owns the command receiver and one frontend-facing XSUB socket. It
+/// mirrors the subset of Python's coordinator protocol needed by the Rust
+/// bootstrapped frontend: receive `(counts, wave, running)` publishes, ignore
+/// `counts`, and send `(exclude_engine_index, wave)` wakeup messages when the
+/// first request arrives while engines are paused.
+pub(crate) struct ExternalCoordinatorService {
+    state: Arc<CoordinatorState>,
+    command_rx: mpsc::UnboundedReceiver<CoordinatorCommand>,
+    socket: XSubSocket,
+}
+
+impl ExternalCoordinatorService {
+    pub(super) fn new(
+        state: Arc<CoordinatorState>,
+        command_rx: mpsc::UnboundedReceiver<CoordinatorCommand>,
+        socket: XSubSocket,
+    ) -> Self {
+        Self {
+            state,
+            command_rx,
+            socket,
+        }
+    }
+
+    /// Apply one frontend-originated command to the external coordinator state
+    /// machine.
+    async fn handle_command(&mut self, command: CoordinatorCommand) -> Result<()> {
+        match command {
+            CoordinatorCommand::FirstRequest {
+                target_engine_id,
+                wave,
+            } => {
+                let target_engine_index = target_engine_id.engine_index().ok_or_else(|| {
+                    Error::UnsupportedCoordinatorEngineId {
+                        engine_id: target_engine_id.to_vec(),
+                    }
+                })?;
+                debug!(
+                    wave,
+                    exclude_engine_index = target_engine_index,
+                    "notifying external coordinator about first request while engines were paused"
+                );
+                let payload = encode_msgpack(&CoordinatorWakeupMessage {
+                    exclude_engine_index: target_engine_index,
+                    wave,
+                })?;
+                self.socket.send(ZmqMessage::from(payload)).await?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Apply one publish received from the xsub socket containing a coordinator
+    /// state update.
+    async fn handle_publish(&mut self, message: ZmqMessage) -> Result<()> {
+        let frames = message.into_vec();
+        if frames.len() != 1 {
+            bail_unexpected_coordinator_output!(
+                "received malformed external coordinator publish with {} frame(s)",
+                frames.len()
+            );
+        }
+
+        let update: CoordinatorStateUpdate = decode_msgpack(&frames[0])?;
+
+        let mut state = self.state.lock();
+        let previous_wave = state.current_wave;
+        let previous_engines_running = state.engines_running;
+        state.current_wave = update.wave;
+        state.engines_running = update.engines_running;
+        debug!(
+            previous_wave,
+            wave = update.wave,
+            previous_engines_running,
+            engines_running = update.engines_running,
+            "applied external coordinator state update"
+        );
+        Ok(())
+    }
+
+    /// Drive the coordinator event loop until either side of the control plane
+    /// is closed or a fatal error is observed.
+    pub(crate) async fn run(mut self, inner: Arc<ClientInner>) {
+        let result: Result<()> = async {
+            loop {
+                tokio::select! {
+                    // Received frontend-originated command from the handle.
+                    command = self.command_rx.recv() => {
+                        let Some(command) = command else {
+                            warn!("external coordinator command channel closed, shutting down service");
+                            return Ok(());
+                        };
+                        self.handle_command(command).await?;
+                    }
+                    // Received publish from the external coordinator socket.
+                    publish = self.socket.recv() => {
+                        let publish = publish.map_err(Error::from)?;
+                        self.handle_publish(publish).await?;
+                    }
+                }
+            }
+        }
+        .await;
+        let Err(error) = result else { return };
+
+        warn!(
+            error = %error.as_report(),
+            "external coordinator service exiting with error"
+        );
+        inner.close_registries(Arc::new(error));
+    }
+}
diff --git a/rust/src/engine-core-client/src/coordinator/handle.rs b/rust/src/engine-core-client/src/coordinator/handle.rs
new file mode 100644
index 000000000000..dca6f70de1f8
--- /dev/null
+++ b/rust/src/engine-core-client/src/coordinator/handle.rs
@@ -0,0 +1,123 @@
+use std::sync::Arc;
+
+use parking_lot::Mutex;
+use tokio::sync::mpsc;
+use zeromq::prelude::Socket;
+use zeromq::{XPubSocket, XSubSocket};
+
+use crate::coordinator::external::ExternalCoordinatorService;
+use crate::coordinator::inproc::InProcCoordinatorRunner;
+use crate::error::{Error, Result, bail_control_closed};
+use crate::transport::EngineId;
+
+/// Snapshot to the coordinator state for request routing and stamping.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) struct CoordinatorStateSnapshot {
+    /// The current DP wave, which will be stamped on outgoing requests.
+    pub current_wave: u32,
+    /// Whether the engines are currently running or paused, which determines if
+    /// the frontend must trigger a new wave on the next request.
+    pub engines_running: bool,
+}
+
+/// Shared in-process coordinator state.
+pub(crate) type CoordinatorState = Mutex<CoordinatorStateSnapshot>;
+
+/// Commands sent from the frontend request path into the background runner.
+#[derive(Debug)]
+pub(crate) enum CoordinatorCommand {
+    /// The first request arrived while all engines were paused.
+    ///
+    /// The coordinator should broadcast `START_DP_WAVE` with the current wave
+    /// and the target engine index as the excluded engine.
+    FirstRequest {
+        target_engine_id: EngineId,
+        wave: u32,
+    },
+}
+
+/// Frontend-facing coordinator handle used by `EngineCoreClient::call()`.
+///
+/// This side stays intentionally small: it can read the latest wave snapshot
+/// and enqueue a `FirstRequest` transition when the request path observes the
+/// system in the paused state.
+#[derive(Clone)]
+pub(crate) struct CoordinatorHandle {
+    state: Arc<CoordinatorState>,
+    command_tx: mpsc::UnboundedSender<CoordinatorCommand>,
+}
+
+impl CoordinatorHandle {
+    fn new_parts() -> (
+        Self,
+        Arc<CoordinatorState>,
+        mpsc::UnboundedReceiver<CoordinatorCommand>,
+    ) {
+        let state = Arc::new(Mutex::new(CoordinatorStateSnapshot {
+            current_wave: 0,
+            engines_running: false,
+        }));
+        let (command_tx, command_rx) = mpsc::unbounded_channel();
+        (
+            Self {
+                state: state.clone(),
+                command_tx,
+            },
+            state,
+            command_rx,
+        )
+    }
+
+    /// Build the paired frontend handle and background runner around one
+    /// engine-facing coordinator broadcast socket.
+    pub(crate) fn new_inproc(coordinator_input: XPubSocket) -> (Self, InProcCoordinatorRunner) {
+        let (handle, state, command_rx) = Self::new_parts();
+        (
+            handle,
+            InProcCoordinatorRunner::new(state, command_rx, coordinator_input),
+        )
+    }
+
+    /// Build the paired frontend handle and background service around an
+    /// external Python-owned frontend-side coordinator socket.
+    pub(crate) async fn connect_external(
+        coordinator_address: &str,
+    ) -> Result<(Self, ExternalCoordinatorService)> {
+        let (handle, state, command_rx) = Self::new_parts();
+        let mut socket = XSubSocket::new();
+        socket.connect(coordinator_address).await?;
+        socket.subscribe("").await?;
+        Ok((
+            handle,
+            ExternalCoordinatorService::new(state, command_rx, socket),
+        ))
+    }
+
+    /// Snapshot the coordinator state for request routing and stamping.
+    pub(crate) fn snapshot(&self) -> CoordinatorStateSnapshot {
+        *self.state.lock()
+    }
+
+    /// Notify the runner that a new request arrived while engines were paused.
+    ///
+    /// The handle flips `engines_running` optimistically so concurrent request
+    /// submissions coalesce behind one `START_DP_WAVE` broadcast instead of all
+    /// trying to trigger the wave independently.
+    pub(crate) fn notify_first_request(&self, target_engine_id: EngineId) -> Result<()> {
+        let mut state = self.state.lock();
+        if state.engines_running {
+            return Ok(());
+        }
+
+        let command = CoordinatorCommand::FirstRequest {
+            target_engine_id,
+            wave: state.current_wave,
+        };
+        if self.command_tx.send(command).is_err() {
+            bail_control_closed!("in-process coordinator command channel already shut down");
+        }
+
+        state.engines_running = true;
+        Ok(())
+    }
+}
diff --git a/rust/src/engine-core-client/src/coordinator/inproc.rs b/rust/src/engine-core-client/src/coordinator/inproc.rs
new file mode 100644
index 000000000000..26ab0c5a0e7c
--- /dev/null
+++ b/rust/src/engine-core-client/src/coordinator/inproc.rs
@@ -0,0 +1,204 @@
+use std::sync::Arc;
+
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+use thiserror_ext::AsReport;
+use tokio::sync::mpsc;
+use tracing::{debug, warn};
+use zeromq::prelude::SocketSend;
+use zeromq::{XPubSocket, ZmqMessage};
+
+use crate::client::imp::ClientInner;
+use crate::coordinator::handle::{CoordinatorCommand, CoordinatorState};
+use crate::error::{Error, Result, bail_unexpected_coordinator_output};
+use crate::protocol::{
+    ClassifiedEngineCoreOutputs, DpControlMessage, EngineCoreOutputs, EngineCoreRequestType,
+    encode_msgpack,
+};
+
+/// Coordinator-to-engine `START_DP_WAVE` control payload encoded on the
+/// engine-facing coordinator socket.
+///
+/// This matches the msgpack tuple broadcast by Python
+/// `DPCoordinatorProc._send_start_wave`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/694449050f8dac3d9853e97e518b4a43ec52106a/vllm/v1/engine/coordinator.py#L453-L459>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize_tuple, Deserialize_tuple)]
+struct StartDpWaveMessage {
+    /// DP wave number that all engines should start processing.
+    wave: u32,
+    /// Engine index that already received the triggering request and should not
+    /// receive an extra wakeup notification.
+    exclude_engine_index: u32,
+}
+
+/// Background half of the in-process coordinator.
+///
+/// This owns the command receiver and the engine-facing coordinator input
+/// socket. It is the single place where wave transitions are serialized and
+/// where `START_DP_WAVE` broadcasts are emitted.
+pub(crate) struct InProcCoordinatorRunner {
+    state: Arc<CoordinatorState>,
+    command_rx: mpsc::UnboundedReceiver<CoordinatorCommand>,
+    coordinator_input: XPubSocket,
+}
+
+impl InProcCoordinatorRunner {
+    pub(super) fn new(
+        state: Arc<CoordinatorState>,
+        command_rx: mpsc::UnboundedReceiver<CoordinatorCommand>,
+        coordinator_input: XPubSocket,
+    ) -> Self {
+        Self {
+            state,
+            command_rx,
+            coordinator_input,
+        }
+    }
+
+    /// Broadcast Python-compatible `START_DP_WAVE` to all connected engines.
+    async fn broadcast_start_wave(&mut self, wave: u32, exclude_engine_index: u32) -> Result<()> {
+        let payload = encode_msgpack(&StartDpWaveMessage {
+            wave,
+            exclude_engine_index,
+        })?;
+        self.coordinator_input
+            .send(
+                ZmqMessage::try_from(vec![
+                    EngineCoreRequestType::StartDpWave.to_frame(),
+                    payload.into(),
+                ])
+                .expect("coordinator START_DP_WAVE message must contain two frames"),
+            )
+            .await?;
+        Ok(())
+    }
+
+    /// Apply one frontend-originated command to the coordinator state machine.
+    async fn handle_command(&mut self, command: CoordinatorCommand) -> Result<()> {
+        match command {
+            CoordinatorCommand::FirstRequest {
+                target_engine_id,
+                wave,
+            } => {
+                let target_engine_index = target_engine_id.engine_index().ok_or_else(|| {
+                    Error::UnsupportedCoordinatorEngineId {
+                        engine_id: target_engine_id.to_vec(),
+                    }
+                })?;
+                self.state.lock().current_wave = wave;
+                debug!(
+                    wave,
+                    exclude_engine_index = target_engine_index,
+                    "starting DP wave after first request while engines were paused"
+                );
+                self.broadcast_start_wave(wave, target_engine_index).await?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Apply one engine-originated control output to the coordinator state
+    /// machine.
+    async fn handle_outputs(&mut self, outputs: EngineCoreOutputs) -> Result<()> {
+        match outputs.classify() {
+            ClassifiedEngineCoreOutputs::RequestBatch(batch)
+                if batch.outputs.is_empty() && batch.finished_requests.is_none() =>
+            {
+                // Stats-only output for coordinator.
+                // Ignore since the Rust coordinator doesn't track stats for
+                // routing decisions.
+            }
+            ClassifiedEngineCoreOutputs::DpControl {
+                engine_index,
+                control,
+                ..
+            } => match control {
+                // The engines signals they completed the current wave and are now paused.
+                // Advance the current wave and mark the state as paused.
+                DpControlMessage::WaveComplete(wave) => {
+                    let mut state = self.state.lock();
+                    if wave >= state.current_wave {
+                        let next_wave = wave + 1;
+                        debug!(
+                            wave,
+                            next_wave,
+                            "DP wave finished; pausing engines and advancing coordinator state"
+                        );
+                        state.current_wave = wave + 1;
+                        state.engines_running = false;
+                    }
+                }
+                // An engine requests to start the wave.
+                // Rebroadcast the wave to all engines except for the originated one.
+                DpControlMessage::StartWave(wave) => {
+                    let should_broadcast = {
+                        let mut state = self.state.lock();
+                        if wave > state.current_wave
+                            || (wave == state.current_wave && !state.engines_running)
+                        {
+                            state.current_wave = wave;
+                            state.engines_running = true;
+                            true
+                        } else {
+                            false
+                        }
+                    };
+                    if should_broadcast {
+                        debug!(
+                            wave,
+                            exclude_engine_index = engine_index,
+                            "starting DP wave after stale-wave notification from engine"
+                        );
+                        self.broadcast_start_wave(wave, engine_index).await?;
+                    }
+                }
+            },
+            other => {
+                bail_unexpected_coordinator_output!(
+                    "received non-control output on coordinator path: {other:?}"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    /// Drive the coordinator event loop until either side of the control plane
+    /// is closed or a fatal error is observed.
+    ///
+    /// Any fatal error closes the main client registries so request streams and
+    /// future calls observe a stable shutdown cause.
+    pub(crate) async fn run(
+        mut self,
+        mut output_rx: mpsc::Receiver<Result<EngineCoreOutputs>>,
+        inner: Arc<ClientInner>,
+    ) {
+        let result: Result<()> = async {
+            loop {
+                tokio::select! {
+                    // Received frontend-originated command from the handle.
+                    command = self.command_rx.recv() => {
+                        let Some(command) = command else {
+                            warn!("coordinator command channel closed, shutting down coordinator runner");
+                            return Ok(());
+                        };
+                        self.handle_command(command).await?;
+                    }
+                    // Received engine-originated control output from the coordinator socket.
+                    outputs = output_rx.recv() => {
+                        let Some(outputs) = outputs else {
+                            warn!("coordinator output channel closed, shutting down coordinator runner");
+                            return Ok(());
+                        };
+                        self.handle_outputs(outputs?).await?;
+                    }
+                }
+            }
+        }
+        .await;
+        let Err(error) = result else { return };
+
+        warn!(error = %error.as_report(), "coordinator runner exiting with error");
+        inner.close_registries(Arc::new(error));
+    }
+}
diff --git a/rust/src/engine-core-client/src/coordinator/mod.rs b/rust/src/engine-core-client/src/coordinator/mod.rs
new file mode 100644
index 000000000000..e4740a3739fd
--- /dev/null
+++ b/rust/src/engine-core-client/src/coordinator/mod.rs
@@ -0,0 +1,7 @@
+mod bootstrap;
+mod external;
+mod handle;
+mod inproc;
+
+pub(crate) use bootstrap::CoordinatorBootstrap;
+pub(crate) use handle::CoordinatorHandle;
diff --git a/rust/src/engine-core-client/src/error.rs b/rust/src/engine-core-client/src/error.rs
new file mode 100644
index 000000000000..1ce03c407869
--- /dev/null
+++ b/rust/src/engine-core-client/src/error.rs
@@ -0,0 +1,90 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use thiserror::Error;
+use thiserror_ext::Macro;
+
+use crate::protocol::utility::UtilityCallId;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Public error type for the Rust engine-core client.
+#[derive(Debug, Error, Macro)]
+pub enum Error {
+    #[error("messagepack encode failed for {target_type}: {message}")]
+    Encode {
+        target_type: &'static str,
+        message: String,
+    },
+    #[error("messagepack decode failed for {target_type}: {message}")]
+    Decode {
+        target_type: &'static str,
+        message: String,
+    },
+    #[error("messagepack value decode failed")]
+    ValueDecode(#[from] rmpv::decode::Error),
+    #[error("messagepack ext value decode failed: {message}")]
+    ExtValueDecode { message: String },
+    #[error("io error")]
+    Io(#[from] std::io::Error),
+    #[error("transport error")]
+    Transport(#[from] zeromq::ZmqError),
+    #[error("engine core reported fatal failure")]
+    EngineCoreDead,
+    #[error("startup handshake timed out while waiting for {stage} after {timeout:?}")]
+    HandshakeTimeout {
+        stage: &'static str,
+        timeout: Duration,
+    },
+    #[error("engine input registration timed out after {timeout:?}")]
+    InputRegistrationTimeout { timeout: Duration },
+    #[error("unexpected engine id in startup handshake: expected {expected:?}, got {actual:?}")]
+    UnexpectedHandshakeIdentity { expected: Vec<u8>, actual: Vec<u8> },
+    #[error("unexpected startup handshake message: {message}")]
+    UnexpectedHandshakeMessage { message: String },
+    #[error("unexpected non-control output on coordinator path: {message}")]
+    UnexpectedCoordinatorOutput { message: String },
+    #[error("unexpected output on main dispatcher path: {message}")]
+    UnexpectedDispatcherOutput { message: String },
+    #[error("coordinator requires a Python-compatible two-byte engine id, got {engine_id:?}")]
+    UnsupportedCoordinatorEngineId { engine_id: Vec<u8> },
+    #[error("unsupported auxiliary frame(s): expected 1 frame, got {frame_count}")]
+    UnsupportedAuxFrames { frame_count: usize },
+    #[error("external coordinator mode is not implemented yet")]
+    UnsupportedExternalCoordinator,
+    #[error("unsupported field `{field}` in {context}")]
+    UnsupportedField {
+        context: &'static str,
+        field: &'static str,
+    },
+    #[error("engine control channel closed unexpectedly: {message}")]
+    ControlClosed { message: String },
+    #[error("request `{request_id}` is already in flight")]
+    DuplicateRequestId { request_id: String },
+    #[error("data parallel rank {rank} is out of range for {num_engines} engine(s)")]
+    InvalidDataParallelRank { rank: u32, num_engines: u32 },
+    #[error("engine-core output dispatcher closed: {message}")]
+    DispatcherClosed { message: String },
+    #[error("engine-core client is closed: {message}")]
+    ClientClosed { message: String },
+    #[error("request output stream for `{request_id}` closed unexpectedly")]
+    RequestStreamClosed { request_id: String },
+    #[error("utility call `{method}` failed (call_id={call_id}): {message}")]
+    UtilityCallFailed {
+        method: String,
+        call_id: UtilityCallId,
+        message: String,
+    },
+    #[error("utility call `{method}` returned an invalid result (call_id={call_id}): {message}")]
+    UtilityResultDecode {
+        method: String,
+        call_id: UtilityCallId,
+        message: String,
+    },
+    #[error("utility call `{method}` closed unexpectedly (call_id={call_id})")]
+    UtilityCallClosed { method: String, call_id: u64 },
+
+    /// A special variant to allow cloning the same error.
+    #[error(transparent)]
+    Shared(Arc<Self>),
+}
diff --git a/rust/src/engine-core-client/src/lib.rs b/rust/src/engine-core-client/src/lib.rs
new file mode 100644
index 000000000000..914ce874e9e2
--- /dev/null
+++ b/rust/src/engine-core-client/src/lib.rs
@@ -0,0 +1,18 @@
+mod client;
+mod coordinator;
+mod error;
+mod metrics;
+pub mod protocol;
+#[cfg(any(test, feature = "test-util"))]
+pub mod test_utils;
+mod transport;
+
+pub use client::{
+    AbortCause, CoordinatorMode, EngineCoreClient, EngineCoreClientConfig, EngineCoreOutputStream,
+    EngineCoreStreamOutput, TransportMode,
+};
+pub use error::{Error, Result};
+pub use transport::{ENGINE_CORE_DEAD_SENTINEL, EngineId};
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/engine-core-client/src/metrics.rs b/rust/src/engine-core-client/src/metrics.rs
new file mode 100644
index 000000000000..8f4593961982
--- /dev/null
+++ b/rust/src/engine-core-client/src/metrics.rs
@@ -0,0 +1,131 @@
+use vllm_metrics::{EngineLabels, EnginePositionLabels, SchedulerMetrics, WaitingReasonLabels};
+
+use crate::protocol::stats::SchedulerStats;
+
+const WAITING_REASON_CAPACITY: &str = "capacity";
+const WAITING_REASON_DEFERRED: &str = "deferred";
+
+/// Record the scheduler-stats-backed metrics for one engine at one point in
+/// time.
+pub(crate) fn record_scheduler_stats(
+    metrics: &SchedulerMetrics,
+    model_name: impl Into<String>,
+    engine: u32,
+    stats: &SchedulerStats,
+) {
+    let model_name = model_name.into();
+    let labels = EngineLabels {
+        model_name: model_name.clone(),
+        engine,
+    };
+
+    // Scheduler state gauges.
+    metrics.scheduler_running.get_or_create(&labels).set(stats.num_running_reqs);
+    metrics
+        .scheduler_waiting
+        .get_or_create(&labels)
+        .set(stats.num_waiting_reqs + stats.num_skipped_waiting_reqs);
+    metrics
+        .scheduler_waiting_by_reason
+        .get_or_create(&WaitingReasonLabels {
+            model_name: model_name.clone(),
+            engine,
+            reason: WAITING_REASON_CAPACITY,
+        })
+        .set(stats.num_waiting_reqs);
+    metrics
+        .scheduler_waiting_by_reason
+        .get_or_create(&WaitingReasonLabels {
+            model_name: model_name.clone(),
+            engine,
+            reason: WAITING_REASON_DEFERRED,
+        })
+        .set(stats.num_skipped_waiting_reqs);
+    metrics.kv_cache_usage.get_or_create(&labels).set(stats.kv_cache_usage);
+
+    // Prefix-cache counters, including the connector-backed external cache path.
+    metrics
+        .prefix_cache_queries
+        .get_or_create(&labels)
+        .inc_by(stats.prefix_cache_stats.base.queries);
+    metrics
+        .prefix_cache_hits
+        .get_or_create(&labels)
+        .inc_by(stats.prefix_cache_stats.base.hits);
+
+    if let Some(connector_prefix_cache_stats) = &stats.connector_prefix_cache_stats {
+        metrics
+            .external_prefix_cache_queries
+            .get_or_create(&labels)
+            .inc_by(connector_prefix_cache_stats.base.queries);
+        metrics
+            .external_prefix_cache_hits
+            .get_or_create(&labels)
+            .inc_by(connector_prefix_cache_stats.base.hits);
+    }
+
+    // Speculative decoding counters.
+    if let Some(spec_decoding_stats) = &stats.spec_decoding_stats {
+        metrics
+            .spec_decode_num_drafts
+            .get_or_create(&labels)
+            .inc_by(spec_decoding_stats.num_drafts);
+        metrics
+            .spec_decode_num_draft_tokens
+            .get_or_create(&labels)
+            .inc_by(spec_decoding_stats.num_draft_tokens);
+        metrics
+            .spec_decode_num_accepted_tokens
+            .get_or_create(&labels)
+            .inc_by(spec_decoding_stats.num_accepted_tokens);
+
+        for (position, accepted_tokens) in
+            spec_decoding_stats.num_accepted_tokens_per_pos.iter().copied().enumerate()
+        {
+            metrics
+                .spec_decode_num_accepted_tokens_per_pos
+                .get_or_create(&EnginePositionLabels {
+                    model_name: model_name.clone(),
+                    engine,
+                    position: position as u32,
+                })
+                .inc_by(accepted_tokens);
+        }
+    }
+
+    // Per-engine performance / MFU counters.
+    if let Some(perf_stats) = &stats.perf_stats
+        && (perf_stats.num_flops_per_gpu != 0
+            || perf_stats.num_read_bytes_per_gpu != 0
+            || perf_stats.num_write_bytes_per_gpu != 0)
+    {
+        metrics
+            .estimated_flops_per_gpu
+            .get_or_create(&labels)
+            .inc_by(perf_stats.num_flops_per_gpu);
+        metrics
+            .estimated_read_bytes_per_gpu
+            .get_or_create(&labels)
+            .inc_by(perf_stats.num_read_bytes_per_gpu);
+        metrics
+            .estimated_write_bytes_per_gpu
+            .get_or_create(&labels)
+            .inc_by(perf_stats.num_write_bytes_per_gpu);
+    }
+
+    // Sampled KV-cache residency histograms.
+    if !stats.kv_cache_eviction_events.is_empty() {
+        let kv_block_lifetime_seconds = metrics.kv_block_lifetime_seconds.get_or_create(&labels);
+        let kv_block_idle_before_evict_seconds =
+            metrics.kv_block_idle_before_evict_seconds.get_or_create(&labels);
+        let kv_block_reuse_gap_seconds = metrics.kv_block_reuse_gap_seconds.get_or_create(&labels);
+
+        for event in &stats.kv_cache_eviction_events {
+            kv_block_lifetime_seconds.observe(event.lifetime_seconds);
+            kv_block_idle_before_evict_seconds.observe(event.idle_seconds);
+            for reuse_gap_seconds in &event.reuse_gaps_seconds {
+                kv_block_reuse_gap_seconds.observe(*reuse_gap_seconds);
+            }
+        }
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/classified_outputs.rs b/rust/src/engine-core-client/src/protocol/classified_outputs.rs
new file mode 100644
index 000000000000..d572f8f925b3
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/classified_outputs.rs
@@ -0,0 +1,252 @@
+use std::collections::BTreeSet;
+
+use enum_as_inner::EnumAsInner;
+
+use super::utility::UtilityOutput;
+use super::{EngineCoreOutput, EngineCoreOutputs};
+use crate::protocol::stats::SchedulerStats;
+
+/// Data-parallel control notifications multiplexed through `EngineCoreOutputs`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum DpControlMessage {
+    WaveComplete(u32),
+    StartWave(u32),
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct RequestBatchOutputs {
+    pub engine_index: u32,
+    pub outputs: Vec<EngineCoreOutput>,
+    pub scheduler_stats: Option<Box<SchedulerStats>>,
+    pub timestamp: f64,
+    pub finished_requests: Option<BTreeSet<String>>,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct UtilityCallOutput {
+    pub engine_index: u32,
+    pub timestamp: f64,
+    pub output: UtilityOutput,
+}
+
+/// Semantic classification of a raw `EngineCoreOutputs` message.
+///
+/// Python currently uses one product-shaped wire struct for several distinct
+/// output families. This enum exposes those families more explicitly without
+/// changing the wire format.
+#[derive(Debug, Clone, PartialEq, EnumAsInner)]
+pub enum ClassifiedEngineCoreOutputs {
+    RequestBatch(RequestBatchOutputs),
+    Utility(UtilityCallOutput),
+    DpControl {
+        engine_index: u32,
+        timestamp: f64,
+        control: DpControlMessage,
+    },
+    /// Fallback for wire-shape combinations that do not map cleanly onto the
+    /// current semantic families.
+    Other(EngineCoreOutputs),
+}
+
+impl EngineCoreOutputs {
+    /// Classify the raw wire message into a more semantic Rust enum.
+    pub fn classify(self) -> ClassifiedEngineCoreOutputs {
+        let has_request_payload = !self.outputs.is_empty()
+            || self.scheduler_stats.is_some()
+            || self.finished_requests.is_some();
+
+        match (
+            has_request_payload,
+            &self.utility_output,
+            &self.wave_complete,
+            &self.start_wave,
+        ) {
+            (true, None, None, None) => {
+                ClassifiedEngineCoreOutputs::RequestBatch(RequestBatchOutputs {
+                    engine_index: self.engine_index,
+                    outputs: self.outputs,
+                    scheduler_stats: self.scheduler_stats,
+                    timestamp: self.timestamp,
+                    finished_requests: self.finished_requests,
+                })
+            }
+            (false, Some(_), None, None) => {
+                ClassifiedEngineCoreOutputs::Utility(UtilityCallOutput {
+                    engine_index: self.engine_index,
+                    timestamp: self.timestamp,
+                    output: self.utility_output.unwrap(),
+                })
+            }
+            (false, None, Some(_), None) => ClassifiedEngineCoreOutputs::DpControl {
+                engine_index: self.engine_index,
+                timestamp: self.timestamp,
+                control: DpControlMessage::WaveComplete(self.wave_complete.unwrap()),
+            },
+            (false, None, None, Some(_)) => ClassifiedEngineCoreOutputs::DpControl {
+                engine_index: self.engine_index,
+                timestamp: self.timestamp,
+                control: DpControlMessage::StartWave(self.start_wave.unwrap()),
+            },
+            _ => ClassifiedEngineCoreOutputs::Other(self),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::BTreeSet;
+
+    use super::*;
+    use crate::protocol::EngineCoreOutput;
+
+    #[test]
+    fn engine_core_outputs_classify_request_batch() {
+        let outputs = EngineCoreOutputs {
+            outputs: vec![EngineCoreOutput {
+                request_id: "req-1".to_string(),
+                new_token_ids: vec![7],
+                ..Default::default()
+            }],
+            finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+            ..Default::default()
+        };
+
+        expect_test::expect![[r#"
+            RequestBatch(
+                RequestBatchOutputs {
+                    engine_index: 0,
+                    outputs: [
+                        EngineCoreOutput {
+                            request_id: "req-1",
+                            new_token_ids: [
+                                7,
+                            ],
+                            new_logprobs: None,
+                            new_prompt_logprobs_tensors: None,
+                            pooling_output: None,
+                            finish_reason: None,
+                            stop_reason: None,
+                            events: None,
+                            kv_transfer_params: None,
+                            trace_headers: None,
+                            prefill_stats: None,
+                            routed_experts: None,
+                            num_nans_in_logits: 0,
+                        },
+                    ],
+                    scheduler_stats: None,
+                    timestamp: 0.0,
+                    finished_requests: Some(
+                        {
+                            "req-1",
+                        },
+                    ),
+                },
+            )
+        "#]]
+        .assert_debug_eq(&outputs.classify());
+    }
+
+    #[test]
+    fn engine_core_outputs_classify_utility() {
+        let outputs = EngineCoreOutputs {
+            utility_output: Some(UtilityOutput {
+                call_id: 42_u64.into(),
+                failure_message: None,
+                result: None,
+            }),
+            ..Default::default()
+        };
+
+        expect_test::expect![[r#"
+            Utility(
+                UtilityCallOutput {
+                    engine_index: 0,
+                    timestamp: 0.0,
+                    output: UtilityOutput {
+                        call_id: 42,
+                        failure_message: None,
+                        result: None,
+                    },
+                },
+            )
+        "#]]
+        .assert_debug_eq(&outputs.classify());
+    }
+
+    #[test]
+    fn engine_core_outputs_classify_control() {
+        let outputs = EngineCoreOutputs {
+            start_wave: Some(3),
+            ..Default::default()
+        };
+
+        expect_test::expect![[r#"
+            DpControl {
+                engine_index: 0,
+                timestamp: 0.0,
+                control: StartWave(
+                    3,
+                ),
+            }
+        "#]]
+        .assert_debug_eq(&outputs.classify());
+    }
+
+    #[test]
+    fn engine_core_outputs_classify_mixed_shape_as_raw() {
+        let outputs = EngineCoreOutputs {
+            outputs: vec![EngineCoreOutput {
+                request_id: "req-1".to_string(),
+                new_token_ids: vec![7],
+                ..Default::default()
+            }],
+            utility_output: Some(UtilityOutput {
+                call_id: 1_u64.into(),
+                failure_message: None,
+                result: None,
+            }),
+            ..Default::default()
+        };
+
+        expect_test::expect![[r#"
+            Other(
+                EngineCoreOutputs {
+                    engine_index: 0,
+                    outputs: [
+                        EngineCoreOutput {
+                            request_id: "req-1",
+                            new_token_ids: [
+                                7,
+                            ],
+                            new_logprobs: None,
+                            new_prompt_logprobs_tensors: None,
+                            pooling_output: None,
+                            finish_reason: None,
+                            stop_reason: None,
+                            events: None,
+                            kv_transfer_params: None,
+                            trace_headers: None,
+                            prefill_stats: None,
+                            routed_experts: None,
+                            num_nans_in_logits: 0,
+                        },
+                    ],
+                    scheduler_stats: None,
+                    timestamp: 0.0,
+                    utility_output: Some(
+                        UtilityOutput {
+                            call_id: 1,
+                            failure_message: None,
+                            result: None,
+                        },
+                    ),
+                    finished_requests: None,
+                    wave_complete: None,
+                    start_wave: None,
+                },
+            )
+        "#]]
+        .assert_debug_eq(&outputs.classify());
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/dtype.rs b/rust/src/engine-core-client/src/protocol/dtype.rs
new file mode 100644
index 000000000000..081d1ce921cf
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/dtype.rs
@@ -0,0 +1,40 @@
+use serde::{Deserialize, Serialize};
+
+/// Effective model dtype reported by the engine after config resolution.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ModelDtype {
+    #[serde(rename = "float16")]
+    Float16,
+    #[serde(rename = "bfloat16")]
+    BFloat16,
+    #[serde(rename = "float32")]
+    Float32,
+}
+
+impl ModelDtype {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Float16 => "float16",
+            Self::BFloat16 => "bfloat16",
+            Self::Float32 => "float32",
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ModelDtype;
+
+    #[test]
+    fn serde_uses_protocol_dtype_strings() {
+        assert_eq!(
+            serde_json::to_value(ModelDtype::Float16).unwrap(),
+            serde_json::json!("float16")
+        );
+        assert_eq!(
+            serde_json::from_value::<ModelDtype>(serde_json::json!("bfloat16")).unwrap(),
+            ModelDtype::BFloat16
+        );
+        assert_eq!(ModelDtype::Float32.as_str(), "float32");
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/handshake.rs b/rust/src/engine-core-client/src/protocol/handshake.rs
new file mode 100644
index 000000000000..622a032772e7
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/handshake.rs
@@ -0,0 +1,90 @@
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::protocol::{ModelDtype, OpaqueValue};
+
+/// Decoded engine startup-handshake payload sent on the handshake socket.
+///
+/// Original Python payload construction:
+/// <https://github.com/vllm-project/vllm/blob/c8d98f81f6/vllm/v1/engine/core.py#L1000-L1035>
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ReadyMessage {
+    #[serde(default)]
+    pub status: Option<String>,
+    #[serde(default)]
+    pub local: Option<bool>,
+    #[serde(default)]
+    pub headless: Option<bool>,
+    #[serde(default)]
+    pub parallel_config_hash: Option<String>,
+}
+
+/// Post-initialization configuration sent from each engine on the input socket
+/// registration message, after the handshake completes.
+///
+/// Contains values that may differ from the original config (e.g.
+/// `max_model_len` after KV cache auto-fitting, `num_gpu_blocks` after
+/// profiling).
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/c8d98f81f6/vllm/v1/engine/__init__.py#L67-L77>
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EngineCoreReadyResponse {
+    /// Engine-reported maximum model context length (auto-fitted after
+    /// KV cache profiling and may differ from the original config value).
+    pub max_model_len: u64,
+    /// Number of GPU blocks available for KV cache on this engine.
+    pub num_gpu_blocks: u64,
+    /// DP coordinator stats publish address, if applicable.
+    pub dp_stats_address: Option<String>,
+    /// Effective model dtype after Python vLLM resolves `--dtype`.
+    // TODO: This is currently not wired up on the engine side. After it's added, remove `Option`
+    // and `serde(default)`.
+    #[serde(default)]
+    pub dtype: Option<ModelDtype>,
+}
+
+/// Frontend-owned ZMQ addresses that are sent to the engine during startup
+/// handshake initialization.
+///
+/// Original Python definition (`EngineZmqAddresses`):
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/utils.py#L53-L67>
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HandshakeAddresses {
+    pub inputs: Vec<String>,
+    pub outputs: Vec<String>,
+    pub coordinator_input: Option<String>,
+    pub coordinator_output: Option<String>,
+    pub frontend_stats_publish_address: Option<String>,
+}
+
+/// Startup handshake payload sent from the frontend to initialize an engine
+/// after receiving `HELLO`.
+///
+/// Original Python definition (`EngineHandshakeMetadata`):
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/utils.py#L69-L77>
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HandshakeInitMessage {
+    pub addresses: HandshakeAddresses,
+    pub parallel_config: BTreeMap<String, OpaqueValue>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::EngineCoreReadyResponse;
+    use crate::protocol::ModelDtype;
+
+    #[test]
+    fn ready_response_accepts_effective_dtype() {
+        let response: EngineCoreReadyResponse = serde_json::from_value(serde_json::json!({
+            "max_model_len": 4096,
+            "num_gpu_blocks": 2,
+            "dp_stats_address": null,
+            "dtype": "bfloat16"
+        }))
+        .unwrap();
+
+        assert_eq!(response.dtype, Some(ModelDtype::BFloat16));
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/logprobs.rs b/rust/src/engine-core-client/src/protocol/logprobs.rs
new file mode 100644
index 000000000000..00c01df671cb
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/logprobs.rs
@@ -0,0 +1,330 @@
+mod array;
+#[cfg(test)]
+mod tests;
+mod wire;
+
+use std::ops::{Deref, DerefMut};
+
+use enum_as_inner::EnumAsInner;
+use serde::{Deserialize, Deserializer, Serialize};
+
+use self::wire::*;
+use super::{EngineCoreOutput, EngineCoreOutputs, decode_msgpack};
+use crate::error::{Error, Result, bail_ext_value_decode, ext_value_decode};
+use crate::protocol::tensor::{WireArrayData, WireNdArray};
+
+/// One token candidate and its logprob metadata for a single sequence position.
+///
+/// The first entry in a [`PositionLogprobs`] is always the sampled/selected
+/// token for that position. Any remaining entries follow the engine's returned
+/// top-k candidate order.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TokenLogprob {
+    pub token_id: u32,
+    pub logprob: f32,
+    /// The sampled/selected token uses its actual vocab rank. Remaining entries
+    /// use 1-based top-k ranks matching the engine's returned candidate
+    /// order.
+    pub rank: u32,
+}
+
+/// Logprob payload for one sequence position.
+///
+/// This is the semantic Rust representation used by the public client API after
+/// the lower-level ndarray/tensor wire payload has been decoded.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct PositionLogprobs {
+    pub entries: Vec<TokenLogprob>,
+}
+
+impl PositionLogprobs {
+    /// Convert one decoded logprobs row into this per-position form by grouping
+    /// each token/logprob pair together with the sampled/selected token's
+    /// actual vocab rank.
+    fn from_decoded_row(token_ids: &[u32], logprobs: &[f32], sampled_rank: u32) -> Result<Self> {
+        if token_ids.len() != logprobs.len() {
+            bail_ext_value_decode!(
+                "logprobs row length mismatch: token_ids={}, logprobs={}",
+                token_ids.len(),
+                logprobs.len()
+            );
+        }
+        if sampled_rank == 0 {
+            bail_ext_value_decode!("token_ranks must be >= 1 for decoded engine-core logprobs");
+        }
+
+        let mut entries = Vec::with_capacity(token_ids.len());
+        for (index, (&token_id, &logprob)) in token_ids.iter().zip(logprobs.iter()).enumerate() {
+            let rank = if index == 0 {
+                sampled_rank
+            } else {
+                index as u32
+            };
+            entries.push(TokenLogprob {
+                token_id,
+                logprob,
+                rank,
+            });
+        }
+        Ok(Self { entries })
+    }
+}
+
+/// Decoded per-request logprobs payload for one engine-core output.
+///
+/// Unlike the Python wire payload, this public Rust type is already fully
+/// semantic: one [`PositionLogprobs`] per scored position, each containing the
+/// sampled/selected token plus any returned top-k alternatives for that same
+/// position.
+///
+/// The Python engine still sends logprobs as ndarray/tensor-shaped wire tuples.
+/// Rust resolves that lower-level representation during decode and exposes only
+/// this per-position form to callers.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Logprobs {
+    /// One decoded logprobs record per scored position in this engine-core
+    /// output.
+    pub positions: Vec<PositionLogprobs>,
+}
+
+impl Logprobs {
+    /// Returns the number of scored positions in this payload.
+    pub fn len(&self) -> usize {
+        self.positions.len()
+    }
+
+    /// Returns whether the payload contains no scored positions.
+    pub fn is_empty(&self) -> bool {
+        self.positions.is_empty()
+    }
+}
+
+/// Output field wrapper that is initially deserialized from the Python wire
+/// shape, then resolved into [`Logprobs`] before the decoded message is
+/// returned to callers.
+#[derive(Clone, PartialEq, Debug, EnumAsInner)]
+pub enum MaybeWireLogprobs {
+    /// The logprobs are still in the wire format and need to be resolved by
+    /// looking up aux frames and decoding raw views. Should only be used
+    /// internally during deserialization.
+    Wire(Box<WireLogprobs>),
+    /// The actual decoded logprobs value,
+    Direct(Logprobs),
+}
+
+impl Deref for MaybeWireLogprobs {
+    type Target = Logprobs;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            Self::Wire(_) => panic!("Logprobs is still in wire format"),
+            Self::Direct(value) => value,
+        }
+    }
+}
+
+impl DerefMut for MaybeWireLogprobs {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        match self {
+            Self::Wire(_) => panic!("Logprobs is still in wire format"),
+            Self::Direct(value) => value,
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for MaybeWireLogprobs {
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        // When deserializing, it's always in the wire form.
+        WireLogprobs::deserialize(deserializer).map(|v| Self::Wire(Box::new(v)))
+    }
+}
+
+impl Serialize for MaybeWireLogprobs {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        // For testing purposes only. We don't actually serialize it into aux frames.
+        match self {
+            Self::Wire(value) => value.serialize(serializer),
+            Self::Direct(value) => WireLogprobs::from_direct(value)
+                .map_err(serde::ser::Error::custom)?
+                .serialize(serializer),
+        }
+    }
+}
+
+impl MaybeWireLogprobs {
+    /// Resolve the wire representation into decoded logprobs by looking up aux
+    /// frames and decoding raw views as needed.
+    fn resolve<Frame>(self, frames: &[Frame], field_prefix: &str) -> Result<Self>
+    where
+        Frame: AsRef<[u8]>,
+    {
+        match self {
+            Self::Direct(value) => Ok(Self::Direct(value)),
+            Self::Wire(value) => value.resolve(frames, field_prefix).map(Self::Direct),
+        }
+    }
+}
+
+impl EngineCoreOutputs {
+    /// Resolve all wire-format fields in-place by looking up aux frames and
+    /// decoding raw-view payloads as needed.
+    fn resolve_in_place<Frame>(&mut self, frames: &[Frame]) -> Result<()>
+    where
+        Frame: AsRef<[u8]>,
+    {
+        for output in &mut self.outputs {
+            output.resolve_in_place(frames)?;
+        }
+        Ok(())
+    }
+}
+
+impl EngineCoreOutput {
+    /// Resolve all wire-format fields in-place by looking up aux frames and
+    /// decoding raw-view payloads as needed.
+    fn resolve_in_place<Frame>(&mut self, frames: &[Frame]) -> Result<()>
+    where
+        Frame: AsRef<[u8]>,
+    {
+        self.new_logprobs = (self.new_logprobs.take())
+            .map(|value| value.resolve(frames, "new_logprobs"))
+            .transpose()?;
+        self.new_prompt_logprobs_tensors = (self.new_prompt_logprobs_tensors.take())
+            .map(|value| value.resolve(frames, "new_prompt_logprobs_tensors"))
+            .transpose()?;
+        Ok(())
+    }
+}
+
+impl WireLogprobs {
+    /// Convert semantic per-position logprobs into the Python wire tuple shape.
+    ///
+    /// This exists mainly so Rust-side tests can inject semantic logprobs into
+    /// mocked engine-core outputs without manually building ndarray
+    /// raw-view tuples.
+    fn from_direct(value: &Logprobs) -> std::result::Result<Self, String> {
+        let rows = value.positions.len();
+        let cols = value.positions.first().map(|position| position.entries.len()).unwrap_or(0);
+
+        let mut token_ids = Vec::with_capacity(rows.saturating_mul(cols).saturating_mul(8));
+        let mut logprobs = Vec::with_capacity(rows.saturating_mul(cols).saturating_mul(4));
+        let mut token_ranks = Vec::with_capacity(rows.saturating_mul(8));
+
+        for (row_index, position) in value.positions.iter().enumerate() {
+            if position.entries.len() != cols {
+                return Err(format!(
+                    "logprobs row {row_index} length mismatch: expected {cols}, got {}",
+                    position.entries.len()
+                ));
+            }
+            let Some((sampled, _)) = position.entries.split_first() else {
+                return Err(format!("logprobs row {row_index} is empty"));
+            };
+
+            token_ranks.extend_from_slice(&(sampled.rank as i64).to_le_bytes());
+            for entry in &position.entries {
+                token_ids.extend_from_slice(&(entry.token_id as i64).to_le_bytes());
+                logprobs.extend_from_slice(&entry.logprob.to_le_bytes());
+            }
+        }
+
+        Ok(Self {
+            logprob_token_ids: WireNdArray {
+                dtype: "<i8".to_string(),
+                shape: vec![rows, cols],
+                data: WireArrayData::RawView(token_ids),
+            },
+            logprobs: WireNdArray {
+                dtype: "<f4".to_string(),
+                shape: vec![rows, cols],
+                data: WireArrayData::RawView(logprobs),
+            },
+            token_ranks: WireNdArray {
+                dtype: "<i8".to_string(),
+                shape: vec![rows],
+                data: WireArrayData::RawView(token_ranks),
+            },
+            cu_num_generated_tokens: None,
+        })
+    }
+
+    /// Resolve the wire-format logprobs into semantic [`Logprobs`] records by
+    /// looking up aux frames, decoding raw views, and grouping each row
+    /// into one [`PositionLogprobs`].
+    fn resolve<Frame>(self, frames: &[Frame], field_prefix: &str) -> Result<Logprobs>
+    where
+        Frame: AsRef<[u8]>,
+    {
+        if let Some(indices) = self.cu_num_generated_tokens {
+            bail_ext_value_decode!(
+                "{field_prefix}.cu_num_generated_tokens: \
+                 expected None for per-request engine-core logprobs payload, got {indices:?}"
+            );
+        }
+
+        let token_ids = array::decode_array2_u32(
+            self.logprob_token_ids,
+            &format!("{field_prefix}.logprob_token_ids"),
+            frames,
+        )?;
+        let logprobs =
+            array::decode_array2_f32(self.logprobs, &format!("{field_prefix}.logprobs"), frames)?;
+        let token_ranks = array::decode_array1_u32(
+            self.token_ranks,
+            &format!("{field_prefix}.token_ranks"),
+            frames,
+        )?;
+
+        if token_ids.rows != logprobs.rows || token_ids.cols != logprobs.cols {
+            bail_ext_value_decode!(
+                "{field_prefix}: row shape mismatch between token ids ({}, {}) and logprobs ({}, {})",
+                token_ids.rows,
+                token_ids.cols,
+                logprobs.rows,
+                logprobs.cols
+            );
+        }
+        if token_ids.rows != token_ranks.len() {
+            bail_ext_value_decode!(
+                "{field_prefix}: token_ranks length {} does not match row count {}",
+                token_ranks.len(),
+                token_ids.rows
+            );
+        }
+
+        let mut positions = Vec::with_capacity(token_ids.rows);
+        for ((token_ids_row, logprobs_row), sampled_rank) in token_ids
+            .data
+            .chunks(token_ids.cols)
+            .zip(logprobs.data.chunks(logprobs.cols))
+            .zip(token_ranks)
+        {
+            positions.push(PositionLogprobs::from_decoded_row(
+                token_ids_row,
+                logprobs_row,
+                sampled_rank,
+            )?);
+        }
+
+        Ok(Logprobs { positions })
+    }
+}
+
+/// Decode one ordinary or multipart engine-core output message into the strong
+/// typed public protocol shape.
+pub fn decode_engine_core_outputs<Frame>(frames: &[Frame]) -> Result<EngineCoreOutputs>
+where
+    Frame: AsRef<[u8]>,
+{
+    let first_frame = frames.first().ok_or_else(|| ext_value_decode!("missing output frame"))?;
+
+    let mut outputs: EngineCoreOutputs = decode_msgpack(first_frame.as_ref())?;
+    outputs.resolve_in_place(frames)?;
+    Ok(outputs)
+}
diff --git a/rust/src/engine-core-client/src/protocol/logprobs/array.rs b/rust/src/engine-core-client/src/protocol/logprobs/array.rs
new file mode 100644
index 000000000000..132428ffcd85
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/logprobs/array.rs
@@ -0,0 +1,306 @@
+use std::io::Cursor;
+
+use byteorder::{BigEndian, LittleEndian, NativeEndian, ReadBytesExt};
+use itertools::Itertools as _;
+
+use crate::error::{Error, Result, ext_value_decode};
+use crate::protocol::tensor::{ShapeExt as _, WireArrayData, WireNdArray};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) enum ScalarType {
+    I32,
+    I64,
+    F32,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) enum Endianness {
+    Little,
+    Big,
+    Native,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub(super) struct DecodedArray2<T> {
+    pub rows: usize,
+    pub cols: usize,
+    pub data: Vec<T>,
+}
+
+pub(super) fn decode_array2_u32<Frame>(
+    value: WireNdArray,
+    field: &str,
+    frames: &[Frame],
+) -> Result<DecodedArray2<u32>>
+where
+    Frame: AsRef<[u8]>,
+{
+    let (shape, bytes, scalar, endianness) =
+        decode_array_metadata(value, field, frames, &[ScalarType::I32, ScalarType::I64])?;
+    if shape.len() != 2 {
+        return Err(decode_error(
+            field,
+            &format!("expected rank-2 array, got rank {}", shape.len()),
+        ));
+    }
+
+    let data = match scalar {
+        ScalarType::I32 => decode_i32_vec(&bytes, endianness, field)?
+            .into_iter()
+            .map(|value| convert_to_u32(value, field))
+            .try_collect()?,
+        ScalarType::I64 => decode_i64_vec(&bytes, endianness, field)?
+            .into_iter()
+            .map(|value| convert_to_u32(value, field))
+            .try_collect()?,
+        ScalarType::F32 => unreachable!("scalar validation should reject f32"),
+    };
+    Ok(DecodedArray2 {
+        rows: shape[0],
+        cols: shape[1],
+        data,
+    })
+}
+
+pub(super) fn decode_array1_u32<Frame>(
+    value: WireNdArray,
+    field: &str,
+    frames: &[Frame],
+) -> Result<Vec<u32>>
+where
+    Frame: AsRef<[u8]>,
+{
+    let (shape, bytes, scalar, endianness) =
+        decode_array_metadata(value, field, frames, &[ScalarType::I32, ScalarType::I64])?;
+    if shape.len() != 1 {
+        return Err(decode_error(
+            field,
+            &format!("expected rank-1 array, got rank {}", shape.len()),
+        ));
+    }
+
+    let data = match scalar {
+        ScalarType::I32 => decode_i32_vec(&bytes, endianness, field)?
+            .into_iter()
+            .map(|value| convert_to_u32(value, field))
+            .try_collect()?,
+        ScalarType::I64 => decode_i64_vec(&bytes, endianness, field)?
+            .into_iter()
+            .map(|value| convert_to_u32(value, field))
+            .try_collect()?,
+        ScalarType::F32 => unreachable!("scalar validation should reject f32"),
+    };
+    Ok(data)
+}
+
+pub(super) fn decode_array2_f32<Frame>(
+    value: WireNdArray,
+    field: &str,
+    frames: &[Frame],
+) -> Result<DecodedArray2<f32>>
+where
+    Frame: AsRef<[u8]>,
+{
+    let (shape, bytes, _, endianness) =
+        decode_array_metadata(value, field, frames, &[ScalarType::F32])?;
+    if shape.len() != 2 {
+        return Err(decode_error(
+            field,
+            &format!("expected rank-2 array, got rank {}", shape.len()),
+        ));
+    }
+
+    let data = decode_f32_vec(&bytes, endianness, field)?;
+    Ok(DecodedArray2 {
+        rows: shape[0],
+        cols: shape[1],
+        data,
+    })
+}
+
+pub(super) fn decode_array_metadata<Frame>(
+    value: WireNdArray,
+    field: &str,
+    frames: &[Frame],
+    expected_scalars: &[ScalarType],
+) -> Result<(Vec<usize>, Vec<u8>, ScalarType, Endianness)>
+where
+    Frame: AsRef<[u8]>,
+{
+    let WireNdArray { dtype, shape, data } = value;
+    let (scalar, endianness) = parse_dtype(&dtype, field)?;
+    if !expected_scalars.contains(&scalar) {
+        return Err(decode_error(
+            field,
+            &format!("expected dtype in {:?}, got {}", expected_scalars, dtype),
+        ));
+    }
+
+    let bytes = resolve_array_bytes(data, field, frames)?;
+    validate_byte_length(shape.as_slice(), bytes.len(), field, scalar)?;
+    Ok((shape, bytes, scalar, endianness))
+}
+
+pub(super) fn parse_dtype(dtype: &str, field: &str) -> Result<(ScalarType, Endianness)> {
+    let (endianness, body) = match dtype.as_bytes().first().copied() {
+        Some(b'<') => (Endianness::Little, &dtype[1..]),
+        Some(b'>') => (Endianness::Big, &dtype[1..]),
+        Some(b'=') => (Endianness::Native, &dtype[1..]),
+        Some(b'|') => (Endianness::Native, &dtype[1..]),
+        _ => (Endianness::Native, dtype),
+    };
+
+    let scalar = match body {
+        "i4" | "int32" => ScalarType::I32,
+        "i8" | "int64" => ScalarType::I64,
+        "f4" | "float32" => ScalarType::F32,
+        _ => {
+            return Err(decode_error(
+                field,
+                &format!("unsupported dtype string {dtype:?}"),
+            ));
+        }
+    };
+    Ok((scalar, endianness))
+}
+
+pub(super) fn resolve_array_bytes<Frame>(
+    value: WireArrayData,
+    field: &str,
+    frames: &[Frame],
+) -> Result<Vec<u8>>
+where
+    Frame: AsRef<[u8]>,
+{
+    match value {
+        WireArrayData::RawView(bytes) => Ok(bytes),
+        WireArrayData::AuxIndex(index) => {
+            let frame = frames.get(index).ok_or_else(|| {
+                decode_error(
+                    field,
+                    &format!(
+                        "aux frame index {index} out of range for {} frames",
+                        frames.len()
+                    ),
+                )
+            })?;
+            Ok(frame.as_ref().to_vec())
+        }
+    }
+}
+
+pub(super) fn validate_byte_length(
+    shape: &[usize],
+    byte_len: usize,
+    field: &str,
+    scalar: ScalarType,
+) -> Result<()> {
+    let element_count = shape
+        .checked_numel()
+        .ok_or_else(|| decode_error(field, "shape element count overflowed usize"))?;
+    let element_size = match scalar {
+        ScalarType::I32 | ScalarType::F32 => 4,
+        ScalarType::I64 => 8,
+    };
+    let expected = element_count
+        .checked_mul(element_size)
+        .ok_or_else(|| decode_error(field, "byte length overflowed usize"))?;
+    if expected != byte_len {
+        return Err(decode_error(
+            field,
+            &format!("byte length mismatch: expected {expected}, got {byte_len}"),
+        ));
+    }
+    Ok(())
+}
+
+pub(super) fn decode_i32_vec(
+    bytes: &[u8],
+    endianness: Endianness,
+    field: &str,
+) -> Result<Vec<i32>> {
+    if !bytes.len().is_multiple_of(4) {
+        return Err(decode_error(
+            field,
+            &format!("byte length {} is not divisible by 4", bytes.len()),
+        ));
+    }
+    let mut cursor = Cursor::new(bytes);
+    let mut values = Vec::with_capacity(bytes.len() / 4);
+    while (cursor.position() as usize) < bytes.len() {
+        let value = match endianness {
+            Endianness::Little => cursor.read_i32::<LittleEndian>(),
+            Endianness::Big => cursor.read_i32::<BigEndian>(),
+            Endianness::Native => cursor.read_i32::<NativeEndian>(),
+        }
+        .map_err(|error| decode_error(field, &format!("failed to read i32 payload: {error}")))?;
+        values.push(value);
+    }
+    Ok(values)
+}
+
+pub(super) fn decode_f32_vec(
+    bytes: &[u8],
+    endianness: Endianness,
+    field: &str,
+) -> Result<Vec<f32>> {
+    if !bytes.len().is_multiple_of(4) {
+        return Err(decode_error(
+            field,
+            &format!("byte length {} is not divisible by 4", bytes.len()),
+        ));
+    }
+    let mut cursor = Cursor::new(bytes);
+    let mut values = Vec::with_capacity(bytes.len() / 4);
+    while (cursor.position() as usize) < bytes.len() {
+        let value = match endianness {
+            Endianness::Little => cursor.read_f32::<LittleEndian>(),
+            Endianness::Big => cursor.read_f32::<BigEndian>(),
+            Endianness::Native => cursor.read_f32::<NativeEndian>(),
+        }
+        .map_err(|error| decode_error(field, &format!("failed to read f32 payload: {error}")))?;
+        values.push(value);
+    }
+    Ok(values)
+}
+
+pub(super) fn decode_i64_vec(
+    bytes: &[u8],
+    endianness: Endianness,
+    field: &str,
+) -> Result<Vec<i64>> {
+    if !bytes.len().is_multiple_of(8) {
+        return Err(decode_error(
+            field,
+            &format!("byte length {} is not divisible by 8", bytes.len()),
+        ));
+    }
+    let mut cursor = Cursor::new(bytes);
+    let mut values = Vec::with_capacity(bytes.len() / 8);
+    while (cursor.position() as usize) < bytes.len() {
+        let value = match endianness {
+            Endianness::Little => cursor.read_i64::<LittleEndian>(),
+            Endianness::Big => cursor.read_i64::<BigEndian>(),
+            Endianness::Native => cursor.read_i64::<NativeEndian>(),
+        }
+        .map_err(|error| decode_error(field, &format!("failed to read i64 payload: {error}")))?;
+        values.push(value);
+    }
+    Ok(values)
+}
+
+fn convert_to_u32<I>(value: I, field: &str) -> Result<u32>
+where
+    I: TryInto<u32> + std::fmt::Display + Copy,
+{
+    value.try_into().map_err(|_| {
+        decode_error(
+            field,
+            &format!("expected non-negative token id/rank that fits in u32, got {value}"),
+        )
+    })
+}
+
+pub(super) fn decode_error(field: &str, reason: &str) -> Error {
+    ext_value_decode!("{field}: {reason}")
+}
diff --git a/rust/src/engine-core-client/src/protocol/logprobs/tests.rs b/rust/src/engine-core-client/src/protocol/logprobs/tests.rs
new file mode 100644
index 000000000000..7408b98f50c7
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/logprobs/tests.rs
@@ -0,0 +1,302 @@
+use std::collections::BTreeSet;
+
+use bytes::Bytes;
+use rmpv::Value;
+
+use super::{Logprobs, PositionLogprobs, TokenLogprob, decode_engine_core_outputs};
+use crate::protocol::EngineCoreFinishReason;
+
+fn encode_value(value: &Value) -> Vec<u8> {
+    let mut out = Vec::new();
+    rmpv::encode::write_value(&mut out, value).unwrap();
+    out
+}
+
+fn output_wire_with_custom_fields(
+    new_logprobs: Option<Value>,
+    prompt_logprobs: Option<Value>,
+) -> Value {
+    Value::Array(vec![
+        Value::from(0),
+        Value::Array(vec![Value::Array(vec![
+            Value::from("req-1"),
+            Value::Array(vec![Value::from(7), Value::from(8)]),
+            new_logprobs.unwrap_or(Value::Nil),
+            prompt_logprobs.unwrap_or(Value::Nil),
+            Value::Nil,
+            Value::from(EngineCoreFinishReason::Length as u8),
+        ])]),
+        Value::Nil,
+        Value::from(0.0),
+        Value::Nil,
+        Value::Array(vec![Value::from("req-1")]),
+    ])
+}
+
+fn ndarray_value(dtype: &str, shape: &[usize], data: Value) -> Value {
+    Value::Array(vec![
+        Value::from(dtype),
+        Value::Array(shape.iter().copied().map(Value::from).collect()),
+        data,
+    ])
+}
+
+fn inline_logprobs_value() -> Value {
+    let ids = Value::Ext(
+        3,
+        vec![
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
+            0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0,
+        ],
+    );
+    let probs = Value::Ext(
+        3,
+        vec![
+            0, 0, 128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, 128, 64, 0, 0, 160, 64, 0, 0, 192, 64,
+        ],
+    );
+    let ranks = Value::Ext(3, vec![1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]);
+    Value::Array(vec![
+        ndarray_value("<i8", &[2, 3], ids),
+        ndarray_value("<f4", &[2, 3], probs),
+        ndarray_value("<i8", &[2], ranks),
+        Value::Nil,
+    ])
+}
+
+fn inline_prompt_logprobs_value() -> Value {
+    let ids = Value::Ext(
+        3,
+        vec![
+            10, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0,
+            0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0,
+        ],
+    );
+    let probs = Value::Ext(
+        3,
+        vec![
+            0, 0, 32, 65, 0, 0, 48, 65, 0, 0, 64, 65, 0, 0, 80, 65, 0, 0, 96, 65, 0, 0, 112, 65,
+        ],
+    );
+    let ranks = Value::Ext(3, vec![3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0]);
+    Value::Array(vec![
+        ndarray_value("int64", &[2, 3], ids),
+        ndarray_value("float32", &[2, 3], probs),
+        ndarray_value("int64", &[2], ranks),
+        Value::Nil,
+    ])
+}
+
+fn expected_sample_logprobs() -> Logprobs {
+    Logprobs {
+        positions: vec![
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 1,
+                        logprob: 1.0,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: 2,
+                        logprob: 2.0,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: 3,
+                        logprob: 3.0,
+                        rank: 2,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 4,
+                        logprob: 4.0,
+                        rank: 2,
+                    },
+                    TokenLogprob {
+                        token_id: 5,
+                        logprob: 5.0,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: 6,
+                        logprob: 6.0,
+                        rank: 2,
+                    },
+                ],
+            },
+        ],
+    }
+}
+
+fn expected_prompt_logprobs() -> Logprobs {
+    Logprobs {
+        positions: vec![
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 10,
+                        logprob: 10.0,
+                        rank: 3,
+                    },
+                    TokenLogprob {
+                        token_id: 11,
+                        logprob: 11.0,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: 12,
+                        logprob: 12.0,
+                        rank: 2,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 13,
+                        logprob: 13.0,
+                        rank: 4,
+                    },
+                    TokenLogprob {
+                        token_id: 14,
+                        logprob: 14.0,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: 15,
+                        logprob: 15.0,
+                        rank: 2,
+                    },
+                ],
+            },
+        ],
+    }
+}
+
+#[test]
+fn decodes_inline_new_logprobs() {
+    let frames = vec![Bytes::from(encode_value(&output_wire_with_custom_fields(
+        Some(inline_logprobs_value()),
+        None,
+    )))];
+    let decoded = decode_engine_core_outputs(&frames).unwrap();
+
+    let logprobs = decoded.outputs[0].new_logprobs.clone().unwrap().into_direct().unwrap();
+    assert_eq!(logprobs, expected_sample_logprobs());
+    assert_eq!(
+        decoded.finished_requests,
+        Some(BTreeSet::from(["req-1".to_string()]))
+    );
+}
+
+#[test]
+fn decodes_multipart_new_logprobs() {
+    let frames = vec![
+        Bytes::from(encode_value(&output_wire_with_custom_fields(
+            Some(Value::Array(vec![
+                ndarray_value("<i8", &[2, 3], Value::from(1)),
+                ndarray_value("<f4", &[2, 3], Value::from(2)),
+                ndarray_value("<i8", &[2], Value::from(3)),
+                Value::Nil,
+            ])),
+            None,
+        ))),
+        Bytes::from_static(&[
+            1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
+            0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0,
+        ]),
+        Bytes::from_static(&[
+            0, 0, 128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, 128, 64, 0, 0, 160, 64, 0, 0, 192, 64,
+        ]),
+        Bytes::from_static(&[1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]),
+    ];
+    let decoded = decode_engine_core_outputs(&frames).unwrap();
+
+    let logprobs = decoded.outputs[0].new_logprobs.clone().unwrap().into_direct().unwrap();
+    assert_eq!(logprobs, expected_sample_logprobs());
+}
+
+#[test]
+fn decodes_inline_prompt_logprobs() {
+    let frames = vec![Bytes::from(encode_value(&output_wire_with_custom_fields(
+        None,
+        Some(inline_prompt_logprobs_value()),
+    )))];
+    let decoded = decode_engine_core_outputs(&frames).unwrap();
+
+    let logprobs = decoded.outputs[0]
+        .new_prompt_logprobs_tensors
+        .clone()
+        .unwrap()
+        .into_direct()
+        .unwrap();
+    assert_eq!(logprobs, expected_prompt_logprobs());
+}
+
+#[test]
+fn decodes_big_endian_payloads() {
+    let frames = vec![Bytes::from(encode_value(&output_wire_with_custom_fields(
+        Some(Value::Array(vec![
+            ndarray_value(">i4", &[1, 2], Value::Ext(3, vec![0, 0, 0, 1, 0, 0, 0, 2])),
+            ndarray_value(
+                ">f4",
+                &[1, 2],
+                Value::Ext(3, vec![63, 128, 0, 0, 64, 0, 0, 0]),
+            ),
+            ndarray_value(">i4", &[1], Value::Ext(3, vec![0, 0, 0, 3])),
+            Value::Nil,
+        ])),
+        None,
+    )))];
+    let decoded = decode_engine_core_outputs(&frames).unwrap();
+    let logprobs = decoded.outputs[0].new_logprobs.clone().unwrap().into_direct().unwrap();
+    assert_eq!(
+        logprobs,
+        Logprobs {
+            positions: vec![PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 1,
+                        logprob: 1.0,
+                        rank: 3,
+                    },
+                    TokenLogprob {
+                        token_id: 2,
+                        logprob: 2.0,
+                        rank: 1,
+                    },
+                ],
+            }],
+        }
+    );
+}
+
+#[test]
+fn rejects_non_none_cu_num_generated_tokens() {
+    let frames = vec![Bytes::from(encode_value(&output_wire_with_custom_fields(
+        Some(Value::Array(vec![
+            ndarray_value("<i8", &[1, 1], Value::Ext(3, vec![1, 0, 0, 0, 0, 0, 0, 0])),
+            ndarray_value("<f4", &[1, 1], Value::Ext(3, vec![0, 0, 128, 63])),
+            ndarray_value("<i8", &[1], Value::Ext(3, vec![1, 0, 0, 0, 0, 0, 0, 0])),
+            Value::Array(vec![Value::from(0usize), Value::from(1usize)]),
+        ])),
+        None,
+    )))];
+
+    let error = decode_engine_core_outputs(&frames).unwrap_err();
+    let crate::error::Error::ExtValueDecode { message } = &error else {
+        panic!("expected ValueDecodeExt");
+    };
+    assert_eq!(
+        message,
+        "new_logprobs.cu_num_generated_tokens: expected None for per-request engine-core logprobs payload, got [0, 1]"
+    );
+    assert_eq!(
+        error.to_string(),
+        "messagepack ext value decode failed: new_logprobs.cu_num_generated_tokens: expected None for per-request engine-core logprobs payload, got [0, 1]"
+    );
+}
diff --git a/rust/src/engine-core-client/src/protocol/logprobs/wire.rs b/rust/src/engine-core-client/src/protocol/logprobs/wire.rs
new file mode 100644
index 000000000000..5c4a5a8e76f0
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/logprobs/wire.rs
@@ -0,0 +1,31 @@
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+
+use crate::protocol::tensor::WireNdArray;
+
+/// Python wire representation of `LogprobsLists` / `LogprobsTensors` before
+/// aux-frame references and raw-view payloads are resolved.
+///
+/// This mirrors the tuple shape emitted by Python engine-core so serde can
+/// first deserialize the raw wire payload before the Rust client converts it
+/// into semantic per-position logprobs records.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/outputs.py#L23-L56>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple)]
+pub struct WireLogprobs {
+    /// Wire array with shape `[num_positions, max_num_logprobs + 1]`.
+    pub logprob_token_ids: WireNdArray,
+    /// Wire array with shape `[num_positions, max_num_logprobs + 1]`.
+    pub logprobs: WireNdArray,
+    /// Wire array with shape `[num_positions]`.
+    ///
+    /// Python uses the field name `sampled_token_ranks` for sample logprobs and
+    /// `selected_token_ranks` for prompt logprobs. Rust keeps one neutral field
+    /// because both payloads share the same wire representation.
+    pub token_ranks: WireNdArray,
+    /// Preserved only for wire compatibility with batch-level Python tensors.
+    /// Scheduler-sliced per-request outputs should emit `None` here, and
+    /// the semantic Rust decoder rejects any other value.
+    #[serde(default)]
+    pub cu_num_generated_tokens: Option<Vec<usize>>,
+}
diff --git a/rust/src/engine-core-client/src/protocol/mod.rs b/rust/src/engine-core-client/src/protocol/mod.rs
new file mode 100644
index 000000000000..009311eae418
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/mod.rs
@@ -0,0 +1,571 @@
+use std::any::type_name;
+use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::io::Cursor;
+
+use bytes::Bytes;
+use rmpv::Value;
+use serde::{Deserialize, Serialize};
+use serde_default::DefaultFromSerde;
+use serde_repr::{Deserialize_repr, Serialize_repr};
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+use thiserror_ext::AsReport;
+
+use crate::error::{Error, Result};
+use crate::protocol::logprobs::MaybeWireLogprobs;
+use crate::protocol::multimodal::MmFeatures;
+use crate::protocol::stats::{PrefillStats, SchedulerStats};
+use crate::protocol::utility::UtilityOutput;
+
+// TODO: This module currently mixes reusable frontend-facing semantic types
+// (for example `FinishReason`, `StopReason`, `RequestOutputKind`, and future
+// cleaned-up frontend sampling types) with engine-core-specific wire DTOs and
+// handshake/control messages. While the Rust frontend is still evolving
+// quickly, keep them co-located here for iteration speed. Once the higher-level
+// API boundary stabilizes, move the truly reusable semantic types into a
+// lower-level common crate and keep the engine transport/wire messages here.
+
+/// Dynamic msgpack value used for schema positions that are preserved but not
+/// yet strongly typed in the early-stage Rust client.
+pub type OpaqueValue = Value;
+
+fn default_opaque_value_nil() -> OpaqueValue {
+    Value::Nil
+}
+
+fn is_false(v: &bool) -> bool {
+    !v
+}
+
+mod classified_outputs;
+pub mod dtype;
+pub mod handshake;
+pub mod logprobs;
+pub mod multimodal;
+pub mod stats;
+pub mod tensor;
+pub mod utility;
+pub use classified_outputs::{
+    ClassifiedEngineCoreOutputs, DpControlMessage, RequestBatchOutputs, UtilityCallOutput,
+};
+pub use dtype::ModelDtype;
+pub use logprobs::decode_engine_core_outputs;
+
+/// Request types are encoded as single-byte protocol constants so they can be
+/// sent over the ZMQ socket without an extra encoding step.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L217-L228>
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+pub enum EngineCoreRequestType {
+    Add = 0,
+    Abort = 1,
+    StartDpWave = 2,
+    Utility = 3,
+}
+
+impl EngineCoreRequestType {
+    pub fn to_frame(self) -> Bytes {
+        Bytes::from_static(match self {
+            Self::Add => b"\x00",
+            Self::Abort => b"\x01",
+            Self::StartDpWave => b"\x02",
+            Self::Utility => b"\x03",
+        })
+    }
+}
+
+/// Reason a request finished: stop, length, abort, error, or repetition.
+///
+/// This mirrors the Python enum and uses integer encoding for compact wire
+/// representation.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L41-L63>
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize_repr, Deserialize_repr)]
+#[repr(u8)]
+pub enum EngineCoreFinishReason {
+    /// A stop string was emitted.
+    Stop = 0,
+    /// `max_tokens` or `max_model_len` was reached.
+    Length = 1,
+    /// The request was aborted by the client.
+    Abort = 2,
+    /// A retryable request-level internal error occurred.
+    Error = 3,
+    /// A repetitive token pattern was detected.
+    Repetition = 4,
+}
+
+/// Event types emitted by engine-core for one request.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L113-L118>
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize_repr, Deserialize_repr)]
+#[repr(u8)]
+pub enum EngineCoreEventType {
+    Queued = 1,
+    Scheduled = 2,
+    Preempted = 3,
+}
+
+/// A timestamped engine-core event associated with one request.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L121-L130>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct EngineCoreEvent {
+    pub r#type: EngineCoreEventType,
+    pub timestamp: f64,
+}
+
+/// Controls how intermediate outputs are returned to the frontend.
+///
+/// `Cumulative = 0` is intentionally not supported in Rust frontend.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L146-L152>
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize_repr, Deserialize_repr)]
+#[repr(u8)]
+pub enum RequestOutputKind {
+    /// Return only token deltas in each update.
+    #[default]
+    Delta = 1,
+    /// Suppress intermediate updates and return only the final output.
+    FinalOnly = 2,
+}
+
+/// The stop reason associated with a finished output.
+///
+/// Python models this as the union-typed `stop_reason: int | str | None`
+/// field on `EngineCoreOutput`; the Rust client narrows it into a tagged enum.
+///
+/// Original Python field:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L155>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum StopReason {
+    TokenId(u32),
+    Text(String),
+}
+
+/// Parameters for configuring structured outputs (guided decoding).
+///
+/// Exactly one constraint field (`json`, `regex`, `choice`, `grammar`,
+/// `json_object`, or `structural_tag`) should be set. The engine-core
+/// backend selects the appropriate grammar compiler based on which field
+/// is present.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L36-L107>
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+#[serde(default)]
+pub struct StructuredOutputsParams {
+    /// JSON schema (as a dict/object or JSON string) constraining the output.
+    pub json: Option<serde_json::Value>,
+    /// Regular expression the output must match.
+    pub regex: Option<String>,
+    /// List of allowed output strings (the model must produce one of these).
+    pub choice: Option<Vec<String>>,
+    /// Context-free grammar (in EBNF-like notation) the output must conform to.
+    pub grammar: Option<String>,
+    /// When `true`, output must be valid JSON (free-form, no schema).
+    pub json_object: Option<bool>,
+    /// Disable any additional whitespace in guided JSON output.
+    #[serde(skip_serializing_if = "crate::protocol::is_false")]
+    pub disable_any_whitespace: bool,
+    /// Disable `additionalProperties` in JSON schema output.
+    #[serde(skip_serializing_if = "crate::protocol::is_false")]
+    pub disable_additional_properties: bool,
+    /// Custom whitespace pattern for guided JSON output.
+    pub whitespace_pattern: Option<String>,
+    /// Structural tag configuration (JSON-encoded string).
+    pub structural_tag: Option<String>,
+}
+
+/// Engine-core-facing sampling parameters for text generation.
+///
+/// This is the normalized southbound subset used by the Rust frontend when it
+/// talks to Python engine-core over the wire. User-facing request semantics
+/// such as `stop` strings, `n`, `ignore_eos`, and output aggregation mode are
+/// intentionally handled by higher layers before values reach this DTO.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L155-L291>
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct EngineCoreSamplingParams {
+    /// Controls randomness. Lower values are more deterministic; zero means
+    /// greedy sampling.
+    pub temperature: f32,
+    /// Cumulative probability threshold for nucleus sampling.
+    pub top_p: f32,
+    /// Maximum number of top tokens to consider. `0` means all tokens.
+    pub top_k: u32,
+    /// Random seed used by the sampler when present.
+    pub seed: Option<i64>,
+    /// Maximum number of tokens to generate per output sequence.
+    pub max_tokens: u32,
+    /// Minimum number of tokens to generate before EOS or stop-token handling.
+    pub min_tokens: u32,
+    /// Number of log probabilities to return per generated token.
+    ///
+    /// `None` disables sample logprobs. `-1` requests the full vocabulary.
+    pub logprobs: Option<i32>,
+    /// Number of log probabilities to return per prompt token.
+    ///
+    /// `None` disables prompt logprobs. `-1` requests the full vocabulary.
+    pub prompt_logprobs: Option<i32>,
+    /// Minimum probability threshold for token sampling.
+    pub min_p: f32,
+    /// Frequency penalty applied by the sampler.
+    pub frequency_penalty: f32,
+    /// Presence penalty applied by the sampler.
+    pub presence_penalty: f32,
+    /// Repetition penalty applied by the sampler.
+    pub repetition_penalty: f32,
+    /// Token IDs that stop generation.
+    pub stop_token_ids: Vec<u32>,
+    /// Primary EOS token ID used by engine-core's dedicated EOS stop path.
+    ///
+    /// This mirrors Python's internal `_eos_token_id` field and is derived by
+    /// the frontend from tokenizer/model metadata rather than supplied directly
+    /// by end users.
+    #[serde(rename = "_eos_token_id")]
+    pub eos_token_id: Option<u32>,
+    /// Complete stop-token set used by engine-core for `min_tokens` masking.
+    ///
+    /// This mirrors Python's internal `_all_stop_token_ids` field and should
+    /// contain explicit `stop_token_ids` plus any frontend-derived EOS token
+    /// IDs.
+    #[serde(rename = "_all_stop_token_ids")]
+    pub all_stop_token_ids: BTreeSet<u32>,
+    /// Logit biases to apply during sampling.
+    /// Keys are token IDs
+    #[serde(default)]
+    pub logit_bias: Option<HashMap<u32, f32>>,
+    /// Restrict output to these token IDs only.
+    #[serde(default)]
+    pub allowed_token_ids: Option<Vec<u32>>,
+    /// Tokenized bad words to avoid during generation.
+    #[serde(default, rename = "_bad_words_token_ids")]
+    pub bad_words_token_ids: Option<Vec<Vec<u32>>>,
+    /// Parameters for configuring structured outputs (guided decoding).
+    #[serde(default)]
+    pub structured_outputs: Option<StructuredOutputsParams>,
+    /// Specific token IDs for which log probabilities should be returned at
+    /// each position.
+    ///
+    /// When set, the engine returns logprobs for exactly these tokens in
+    /// addition to the sampled/scored token. Mutually exclusive with the
+    /// `logprobs` count field in practice.
+    #[serde(default)]
+    pub logprob_token_ids: Option<Vec<u32>>,
+    /// If `Some(true)`, the request will not attempt to read from the prefix
+    /// cache; newly computed blocks may still populate the cache. `None`
+    /// defers to engine-core defaults.
+    #[serde(default)]
+    pub skip_reading_prefix_cache: Option<bool>,
+    /// Additional request parameters for custom extensions (from `vllm_xargs`).
+    #[serde(default)]
+    pub extra_args: Option<HashMap<String, serde_json::Value>>,
+}
+
+impl EngineCoreSamplingParams {
+    /// Constructs a default sampling params for testing purposes only.
+    pub fn for_test() -> Self {
+        Self {
+            temperature: 1.0,
+            top_p: 1.0,
+            top_k: 0,
+            seed: None,
+            max_tokens: 65536,
+            min_tokens: 0,
+            logprobs: None,
+            prompt_logprobs: None,
+            min_p: 0.0,
+            frequency_penalty: 0.0,
+            presence_penalty: 0.0,
+            repetition_penalty: 1.0,
+            stop_token_ids: Vec::new(),
+            eos_token_id: None,
+            all_stop_token_ids: BTreeSet::new(),
+            logit_bias: None,
+            allowed_token_ids: None,
+            bad_words_token_ids: None,
+            structured_outputs: None,
+            logprob_token_ids: None,
+            skip_reading_prefix_cache: None,
+            extra_args: None,
+        }
+    }
+}
+
+/// Engine-core add-request payload sent from frontend to engine.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/3f5bd482f5c1a5dbdffbbf68d624e20bb7032013/vllm/v1/engine/__init__.py#L80-L129>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple, DefaultFromSerde)]
+pub struct EngineCoreRequest {
+    pub request_id: String,
+    pub prompt_token_ids: Option<Vec<u32>>,
+    /// Multimodal features attached to the request.
+    pub mm_features: Option<MmFeatures>,
+    pub sampling_params: Option<EngineCoreSamplingParams>,
+    /// Pooling parameters are preserved in the schema but not yet strongly
+    /// typed.
+    pub pooling_params: Option<OpaqueValue>,
+    pub arrival_time: f64,
+    #[serde(default)]
+    pub lora_request: Option<OpaqueValue>,
+    #[serde(default)]
+    pub cache_salt: Option<String>,
+    #[serde(default)]
+    pub data_parallel_rank: Option<u32>,
+    /// Unsupported in the first-stage Rust client because Python uses a custom
+    /// tensor/aux-frame encoding path for this field.
+    #[serde(default)]
+    pub prompt_embeds: Option<OpaqueValue>,
+    /// Per-position mask for mixed-mode inputs (e.g. chat completion with
+    /// `prompt_embeds` content parts). `Some(true)` means real token id;
+    /// `Some(false)` means the position uses a pre-computed entry from
+    /// `prompt_embeds`. `None` for pure-tokens and pure-embeds requests.
+    #[serde(default)]
+    pub prompt_is_token_ids: Option<Vec<bool>>,
+    /// Index of the client, used to ensure outputs are sent back to the same
+    /// client when scaling out the frontend.
+    #[serde(default)]
+    pub client_index: u32,
+    /// In DP mode, indicates which wave this request is expected to belong to.
+    #[serde(default)]
+    pub current_wave: u32,
+    #[serde(default)]
+    pub priority: i32,
+    #[serde(default)]
+    pub trace_headers: Option<BTreeMap<String, String>>,
+    #[serde(default)]
+    pub resumable: bool,
+    /// Original user-provided request ID, used for output reporting and aborts.
+    #[serde(default)]
+    pub external_req_id: Option<String>,
+    #[serde(default)]
+    pub reasoning_ended: Option<bool>,
+    /// Opaque reasoning-parser kwargs forwarded from the frontend to the
+    /// structured-output backend.
+    #[serde(default)]
+    pub reasoning_parser_kwargs: Option<OpaqueValue>,
+    /// If `true`, the request should be added to the scheduler's waiting queue
+    /// and immediately aborted, so connector-side cleanup runs via the
+    /// standard `request_finished` hook.
+    #[serde(default)]
+    pub abort_immediately: bool,
+}
+
+impl EngineCoreRequest {
+    /// Validate fields intentionally not supported in the first-stage client.
+    pub fn validate(&self) -> Result<()> {
+        if self.prompt_embeds.is_some() {
+            return Err(Error::UnsupportedField {
+                context: "EngineCoreRequest",
+                field: "prompt_embeds",
+            });
+        }
+        Ok(())
+    }
+}
+
+/// Engine-core output for a single request.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/d3af8c18317c0dc008d42e4367fbb9045cfb7bf6/vllm/v1/engine/__init__.py#L154-L184>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple, DefaultFromSerde)]
+pub struct EngineCoreOutput {
+    pub request_id: String,
+    pub new_token_ids: Vec<u32>,
+    /// Decoded sample logprobs for the newly generated positions in this
+    /// output.
+    #[serde(default)]
+    pub new_logprobs: Option<MaybeWireLogprobs>,
+    /// Decoded prompt logprobs for the scored prompt positions emitted in this
+    /// output.
+    #[serde(default)]
+    pub new_prompt_logprobs_tensors: Option<MaybeWireLogprobs>,
+    #[serde(default)]
+    pub pooling_output: Option<OpaqueValue>,
+    #[serde(default)]
+    pub finish_reason: Option<EngineCoreFinishReason>,
+    #[serde(default)]
+    pub stop_reason: Option<StopReason>,
+    #[serde(default)]
+    pub events: Option<Vec<EngineCoreEvent>>,
+    #[serde(default)]
+    pub kv_transfer_params: Option<serde_json::Value>,
+    #[serde(default)]
+    pub trace_headers: Option<OpaqueValue>,
+    /// Breakdown of the scheduled prefill computation, set on the first output
+    /// of a newly scheduled prefill and elided for subsequent decode outputs.
+    #[serde(default)]
+    pub prefill_stats: Option<PrefillStats>,
+    #[serde(default)]
+    pub routed_experts: Option<OpaqueValue>,
+    /// Number of NaNs seen in logits. Values above zero indicate corruption.
+    #[serde(default)]
+    pub num_nans_in_logits: u32,
+}
+
+impl EngineCoreOutput {
+    /// Returns whether this output is terminal for the request.
+    pub fn finished(&self) -> bool {
+        self.finish_reason.is_some()
+    }
+}
+
+/// Batch of engine-core outputs returned to a frontend client.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L186-L214>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple, DefaultFromSerde)]
+pub struct EngineCoreOutputs {
+    #[serde(default)]
+    pub engine_index: u32,
+    /// Outputs grouped for this client in the current engine tick.
+    #[serde(default)]
+    pub outputs: Vec<EngineCoreOutput>,
+    #[serde(default)]
+    pub scheduler_stats: Option<Box<SchedulerStats>>,
+    #[serde(default)]
+    pub timestamp: f64,
+    #[serde(default)]
+    pub utility_output: Option<UtilityOutput>,
+    #[serde(default)]
+    pub finished_requests: Option<BTreeSet<String>>,
+    /// In DP mode, signals that the current wave finished and engines are
+    /// paused.
+    #[serde(default)]
+    pub wave_complete: Option<u32>,
+    /// In DP mode, signals that a request arrived for an old wave and the next
+    /// wave needs to start in other engines.
+    #[serde(default)]
+    pub start_wave: Option<u32>,
+}
+
+/// Encode a Rust value into msgpack using the protocol crate's serde model.
+pub fn encode_msgpack<T>(value: &T) -> Result<Vec<u8>>
+where
+    T: Serialize + std::fmt::Debug,
+{
+    rmp_serde::to_vec_named(value).map_err(|error| Error::Encode {
+        target_type: type_name::<T>(),
+        message: format!(
+            "failed to encode value `{:?}`: {}",
+            value,
+            error.to_report_string()
+        ),
+    })
+}
+
+/// Decode a msgpack payload into a strongly typed protocol value, with enhanced
+/// error reporting.
+pub fn decode_msgpack<T>(bytes: &[u8]) -> Result<T>
+where
+    T: for<'de> Deserialize<'de>,
+{
+    fn decode_value_preview(bytes: &[u8]) -> String {
+        match decode_value(bytes) {
+            Ok(value) => format!("{value}"),
+            Err(error) => format!("<value decode failed: {error}>"),
+        }
+    }
+
+    rmp_serde::from_slice(bytes).map_err(|error| Error::Decode {
+        target_type: type_name::<T>(),
+        message: format!("{error}; value fallback: {}", decode_value_preview(bytes)),
+    })
+}
+
+pub fn decode_value(bytes: &[u8]) -> Result<Value> {
+    Ok(rmpv::decode::read_value(&mut Cursor::new(bytes))?)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::BTreeSet;
+
+    use super::*;
+
+    #[test]
+    fn engine_core_request_serializes_as_full_array() {
+        let request = EngineCoreRequest {
+            request_id: "req-1".to_string(),
+            prompt_token_ids: Some(vec![1, 2, 3]),
+            sampling_params: Some(EngineCoreSamplingParams {
+                max_tokens: 8,
+                ..EngineCoreSamplingParams::for_test()
+            }),
+            arrival_time: 1234.5,
+            client_index: 7,
+            ..EngineCoreRequest::default()
+        };
+
+        let encoded = encode_msgpack(&request).unwrap();
+        let value = decode_value(&encoded).unwrap();
+        let array = match value {
+            Value::Array(array) => array,
+            other => panic!("expected array, got {other:?}"),
+        };
+
+        assert_eq!(array.len(), 20);
+        assert_eq!(array[0], Value::from("req-1"));
+        assert_eq!(array[2], Value::Nil);
+        assert_eq!(array[4], Value::Nil);
+        assert_eq!(array[10], Value::Nil);
+        assert_eq!(array[11], Value::from(7));
+    }
+
+    #[test]
+    fn engine_core_outputs_roundtrip_finished_fields() {
+        let outputs = EngineCoreOutputs {
+            outputs: vec![EngineCoreOutput {
+                request_id: "req-1".to_string(),
+                new_token_ids: vec![42],
+                new_logprobs: None,
+                new_prompt_logprobs_tensors: None,
+                pooling_output: None,
+                finish_reason: Some(EngineCoreFinishReason::Length),
+                stop_reason: Some(StopReason::Text("stop".to_string())),
+                events: None,
+                kv_transfer_params: None,
+                trace_headers: None,
+                prefill_stats: None,
+                routed_experts: None,
+                num_nans_in_logits: 0,
+            }],
+            finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+            ..Default::default()
+        };
+
+        let encoded = encode_msgpack(&outputs).unwrap();
+        let decoded: EngineCoreOutputs = decode_msgpack(&encoded).unwrap();
+
+        assert_eq!(decoded.outputs.len(), 1);
+        assert_eq!(
+            decoded.outputs[0].finish_reason,
+            Some(EngineCoreFinishReason::Length)
+        );
+        assert_eq!(
+            decoded.finished_requests,
+            Some(BTreeSet::from(["req-1".to_string()]))
+        );
+    }
+
+    #[test]
+    fn decode_msgpack_includes_type_name_and_value_fallback() {
+        let error = decode_msgpack::<u64>(
+            &rmp_serde::to_vec_named(&BTreeMap::from([("status", "READY")])).unwrap(),
+        )
+        .unwrap_err();
+
+        expect_test::expect![[r#"messagepack decode failed for u64: wrong msgpack marker FixMap(1); value fallback: {"status": "READY"}"#]].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/multimodal.rs b/rust/src/engine-core-client/src/protocol/multimodal.rs
new file mode 100644
index 000000000000..78d84740e2d2
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/multimodal.rs
@@ -0,0 +1,282 @@
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+
+use super::tensor::WireTensor;
+
+/// Multimodal feature payload accepted from higher-level frontend code.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/engine/__init__.py#L88>
+pub type MmFeatures = Vec<MmFeatureSpec>;
+
+/// Represents a single multimodal input with its processed data and metadata.
+///
+/// Used to track multimodal data through processing and caching. A request
+/// containing multiple multimodal items will have one `MmFeatureSpec`
+/// per item.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L301-L332>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct MmFeatureSpec {
+    /// Represents multimodal data for this feature.
+    ///
+    /// Can be `None` if the item is cached, to skip IPC between API server
+    /// and engine core processes.
+    pub data: Option<MmKwargsItem>,
+
+    /// The input modality, e.g., `"image"`, `"audio"`, `"video"`.
+    pub modality: String,
+
+    /// The hash for caching encoder outputs (with LoRA prefix if applicable).
+    pub identifier: String,
+
+    /// The location of the `modality` tokens corresponding to this item
+    /// in the prompt, e.g., `PlaceholderRange(offset=2, length=336)`.
+    pub mm_position: PlaceholderRange,
+
+    /// The hash for caching processor outputs (without LoRA prefix).
+    #[serde(default)]
+    pub mm_hash: Option<String>,
+}
+
+/// Placeholder location information for multi-modal data.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L118-L145>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct PlaceholderRange {
+    /// The start index of the placeholder in the prompt.
+    pub offset: usize,
+
+    /// The length of the placeholder.
+    pub length: usize,
+
+    /// A boolean mask of shape `(length,)` indicating which positions
+    /// between `offset` and `offset + length` to assign embeddings to.
+    /// `None` means all positions.
+    #[serde(default)]
+    pub is_embed: Option<WireTensor>,
+}
+
+/// A dictionary of processed keyword arguments to pass to the model,
+/// corresponding to a single item in `MultiModalDataItems`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L854-L871>
+pub type MmKwargsItem = BTreeMap<String, MmFieldElem>;
+
+/// Represents a processed keyword argument to pass to a model for a
+/// `MmKwargsItem`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L348-L369>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct MmFieldElem {
+    /// The processed value of this field in `MmKwargsItem`, i.e. the
+    /// keyword argument value to be passed to the model.
+    ///
+    /// It may be set to `None` if it is determined that the item is cached
+    /// in `EngineCore`.
+    pub data: Option<MmKwargValue>,
+
+    /// Defines how to combine this field's processed values with others in
+    /// order to batch multi-modal items together for model inference.
+    pub field: MmField,
+}
+
+/// Processed multimodal keyword argument value.
+///
+/// Original Python definition (`NestedTensors`) and wire encoding:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L218-L226>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L292-L299>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L456-L465>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum MmKwargValue {
+    Tensor(WireTensor),
+    Int(i64),
+    Float(f64),
+    List(Vec<MmKwargValue>),
+}
+
+/// Defines how to interpret tensor data belonging to a keyword argument for
+/// `MultiModalKwargsItems`, and vice versa.
+///
+/// Original Python definitions and wire encoding:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L385-L630>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L301-L310>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L440-L454>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(try_from = "MmFieldWire", into = "MmFieldWire")]
+pub enum MmField {
+    Batched(MmBatchedField),
+    Flat(MmFlatField),
+    Shared(MmSharedField),
+}
+
+/// Info: `MultiModalFieldConfig.batched`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L385-L502>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct MmBatchedField {
+    /// If `True`, then this field is excluded from being moved to the
+    /// accelerator when multimodal items are grouped and batched.
+    pub keep_on_cpu: bool,
+}
+
+/// Info: `MultiModalFieldConfig.flat` and
+/// `MultiModalFieldConfig.flat_from_sizes`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L385-L397>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L505-L603>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct MmFlatField {
+    /// For each multi-modal item, a slice (`dim=0`) or a tuple of slices
+    /// (`dim>0`) that is used to extract the data corresponding to it.
+    pub slices: Vec<MmSlice>,
+
+    /// The dimension to extract data, default to 0.
+    pub dim: i32,
+
+    /// If `True`, then this field is excluded from being moved to the
+    /// accelerator when multimodal items are grouped and batched.
+    pub keep_on_cpu: bool,
+}
+
+/// Info: `MultiModalFieldConfig.shared`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L385-L397>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/multimodal/inputs.py#L606-L630>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct MmSharedField {
+    pub batch_size: usize,
+
+    /// If `True`, then this field is excluded from being moved to the
+    /// accelerator when multimodal items are grouped and batched.
+    pub keep_on_cpu: bool,
+}
+
+/// Python slice encoded as `(start, stop, step)`.
+///
+/// Original Python wire encoding:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L199-L204>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize_tuple, Deserialize_tuple)]
+pub struct SliceSpec {
+    pub start: Option<isize>,
+    pub stop: Option<isize>,
+    pub step: Option<isize>,
+}
+
+/// A single slice or a tuple of slices used by `MmFlatField`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum MmSlice {
+    Slice(SliceSpec),
+    Slices(Vec<SliceSpec>),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize_tuple, Deserialize_tuple)]
+struct MmFieldWire {
+    name: String,
+    inner: MmFieldWireInner,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+enum MmFieldWireInner {
+    Batched(MmBatchedField),
+    Flat(MmFlatField),
+    Shared(MmSharedField),
+}
+
+impl TryFrom<MmFieldWire> for MmField {
+    type Error = String;
+
+    fn try_from(value: MmFieldWire) -> Result<Self, Self::Error> {
+        match (value.name.as_str(), value.inner) {
+            ("batched", MmFieldWireInner::Batched(kwargs)) => Ok(Self::Batched(kwargs)),
+            ("flat", MmFieldWireInner::Flat(kwargs)) => Ok(Self::Flat(kwargs)),
+            ("shared", MmFieldWireInner::Shared(kwargs)) => Ok(Self::Shared(kwargs)),
+            (name, _) => Err(format!(
+                "mismatched or unknown multimodal field factory {name:?}"
+            )),
+        }
+    }
+}
+
+impl From<MmField> for MmFieldWire {
+    fn from(value: MmField) -> Self {
+        match value {
+            MmField::Batched(kwargs) => Self {
+                name: "batched".to_string(),
+                inner: MmFieldWireInner::Batched(kwargs),
+            },
+            MmField::Flat(kwargs) => Self {
+                name: "flat".to_string(),
+                inner: MmFieldWireInner::Flat(kwargs),
+            },
+            MmField::Shared(kwargs) => Self {
+                name: "shared".to_string(),
+                inner: MmFieldWireInner::Shared(kwargs),
+            },
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+
+    use rmpv::Value;
+
+    use super::*;
+
+    fn encode_value<T: Serialize>(value: &T) -> Value {
+        let bytes = rmp_serde::to_vec_named(value).expect("encode value");
+        rmpv::decode::read_value(&mut Cursor::new(bytes)).expect("decode value")
+    }
+
+    #[test]
+    fn multimodal_field_serializes_to_python_factory_tuple() {
+        let field = MmField::Flat(MmFlatField {
+            slices: vec![MmSlice::Slice(SliceSpec {
+                start: Some(0),
+                stop: Some(1200),
+                step: None,
+            })],
+            dim: 0,
+            keep_on_cpu: false,
+        });
+
+        let value = encode_value(&field);
+        let Value::Array(items) = value else {
+            panic!("field should encode as tuple array");
+        };
+        assert_eq!(items.len(), 2);
+        assert_eq!(items[0].as_str(), Some("flat"));
+
+        let Value::Map(kwargs) = &items[1] else {
+            panic!("field kwargs should encode as map");
+        };
+        assert!(kwargs.iter().any(|(key, _)| key.as_str() == Some("slices")));
+        assert!(kwargs.iter().any(|(key, _)| key.as_str() == Some("dim")));
+        assert!(kwargs.iter().any(|(key, _)| key.as_str() == Some("keep_on_cpu")));
+    }
+
+    #[test]
+    fn multimodal_field_round_trips_python_factory_tuple() {
+        let field = MmField::Batched(MmBatchedField { keep_on_cpu: true });
+        let encoded = rmp_serde::to_vec_named(&field).expect("encode field");
+        let decoded: MmField = rmp_serde::from_slice(&encoded).expect("decode field");
+        assert_eq!(decoded, field);
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/stats.rs b/rust/src/engine-core-client/src/protocol/stats.rs
new file mode 100644
index 000000000000..254efc31b242
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/stats.rs
@@ -0,0 +1,192 @@
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::protocol::OpaqueValue;
+
+/// Stores cache hit statistics.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L18-L32>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct BaseCacheStats {
+    /// Whether the cache was reset.
+    pub reset: bool,
+    /// The number of requests in this update.
+    pub requests: u64,
+    /// The number of queries in these requests.
+    pub queries: u64,
+    /// The number of hits in these requests.
+    pub hits: u64,
+}
+
+/// Stores prefix cache hit statistics.
+/// - `reset`: Whether `reset_prefix_cache` was invoked.
+/// - `queries`: Refers to the number of tokens that were queried.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L114-L143>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct PrefixCacheStats {
+    /// Embedded base cache counters and reset flag.
+    #[serde(flatten)]
+    pub base: BaseCacheStats,
+    /// The number of previously preempted requests in this update.
+    pub preempted_requests: u64,
+    /// The `queries` number for preempted requests.
+    pub preempted_queries: u64,
+    /// The `hits` number for preempted requests.
+    pub preempted_hits: u64,
+}
+
+/// Single KV cache block eviction sample.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L161-L167>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct KvCacheEvictionEvent {
+    /// Lifetime from allocation to eviction.
+    pub lifetime_seconds: f64,
+    /// Idle time observed before eviction.
+    pub idle_seconds: f64,
+    /// Time gaps between consecutive accesses before eviction.
+    pub reuse_gaps_seconds: Vec<f64>,
+}
+
+/// Per-step iteration decoding stats from scheduler.
+///
+/// Each scheduler step, statistics on spec decoding performance are aggregated
+/// across requests by the scheduler and returned to the frontend in
+/// `EngineCoreOutputs -> SchedulerStats`.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/spec_decode/metrics.py#L16-L44>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct SpecDecodingStats {
+    /// Configured speculative token count for this scheduler.
+    pub num_spec_tokens: u64,
+    /// Number of drafted speculative decoding attempts.
+    pub num_drafts: u64,
+    /// Number of drafted tokens.
+    pub num_draft_tokens: u64,
+    /// Number of accepted drafted tokens.
+    pub num_accepted_tokens: u64,
+    /// Accepted drafted tokens counted by draft position.
+    pub num_accepted_tokens_per_pos: Vec<u64>,
+}
+
+/// Breakdown of a scheduled prefill computation.
+///
+/// Python models this as a plain `@dataclass`, so it is serialized by msgspec
+/// as a map (named fields) rather than in the array-like form used by
+/// `EngineCoreOutput` itself.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/d3af8c18317c0dc008d42e4367fbb9045cfb7bf6/vllm/v1/metrics/stats.py#L242-L273>
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct PrefillStats {
+    /// Total number of tokens to be prefilled.
+    #[serde(default)]
+    pub num_prompt_tokens: u32,
+    /// Tokens to be prefilled locally (actual compute work).
+    #[serde(default)]
+    pub num_computed_tokens: u32,
+    /// Tokens to be prefilled without actual compute work.
+    #[serde(default)]
+    pub num_cached_tokens: u32,
+    /// Tokens to be prefilled from local prefix cache.
+    #[serde(default)]
+    pub num_local_cached_tokens: u32,
+    /// Tokens to be prefilled from external KV transfer.
+    #[serde(default)]
+    pub num_external_cached_tokens: u32,
+}
+
+/// Stats for debugging the metrics calculation.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/perf.py#L46-L55>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct DebugPerfStats {
+    /// Time spent calculating these stats.
+    pub calc_duration: f64,
+    /// Number of prefill requests included in the sampled batch.
+    pub num_prefill_requests: u64,
+    /// Number of decode requests included in the sampled batch.
+    pub num_decode_requests: u64,
+    /// Optional execution-context breakdown used for debugging.
+    pub context_breakdown: Option<BTreeMap<String, u64>>,
+    /// Optional per-component FLOPs breakdown.
+    pub num_flops_per_gpu_breakdown: Option<BTreeMap<String, u64>>,
+    /// Optional per-component memory-read breakdown.
+    pub num_read_bytes_per_gpu_breakdown: Option<BTreeMap<String, u64>>,
+    /// Optional per-component memory-write breakdown.
+    pub num_write_bytes_per_gpu_breakdown: Option<BTreeMap<String, u64>>,
+}
+
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/perf.py#L58-L63>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct PerfStats {
+    /// Estimated floating point operations per GPU.
+    pub num_flops_per_gpu: u64,
+    /// Estimated bytes read from memory per GPU.
+    pub num_read_bytes_per_gpu: u64,
+    /// Estimated bytes written to memory per GPU.
+    pub num_write_bytes_per_gpu: u64,
+    /// Optional debug-only perf derivation details.
+    pub debug_stats: Option<DebugPerfStats>,
+}
+
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/compilation/cuda_graph.py#L28-L33>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct CudagraphStat {
+    /// Number of real tokens in the captured batch before padding.
+    pub num_unpadded_tokens: u64,
+    /// Number of padded tokens in the captured batch.
+    pub num_padded_tokens: u64,
+    /// Number of padding positions added for capture/runtime shape alignment.
+    pub num_paddings: u64,
+    /// Runtime mode string associated with this CUDA graph sample.
+    pub runtime_mode: String,
+}
+
+/// Stats associated with the scheduler.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/906a8c15d0beded2cca8b357877266c631340f74/vllm/v1/metrics/stats.py#L170-L196>
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct SchedulerStats {
+    /// Number of requests in model execution batches.
+    pub num_running_reqs: u64,
+    /// Length of the "waiting" request queue.
+    pub num_waiting_reqs: u64,
+    /// Length of the "skipped waiting" queue.
+    #[serde(default)]
+    pub num_skipped_waiting_reqs: u64,
+    /// Internal DP load-balancing step counter.
+    pub step_counter: u64,
+    /// Internal DP load-balancing wave number.
+    pub current_wave: u64,
+    /// KV-cache usage. `1.0` means 100% usage.
+    pub kv_cache_usage: f64,
+    /// Local prefix cache statistics.
+    pub prefix_cache_stats: PrefixCacheStats,
+    /// External connector prefix cache statistics, when configured.
+    pub connector_prefix_cache_stats: Option<PrefixCacheStats>,
+    /// Sampled KV cache eviction events for residency metrics.
+    pub kv_cache_eviction_events: Vec<KvCacheEvictionEvent>,
+    /// Speculative decoding scheduler stats, when enabled.
+    pub spec_decoding_stats: Option<SpecDecodingStats>,
+    /// Connector-specific KV transfer stats, kept opaque for now.
+    pub kv_connector_stats: Option<BTreeMap<String, OpaqueValue>>,
+    /// Waiting request counts per LoRA adapter.
+    pub waiting_lora_adapters: BTreeMap<String, u64>,
+    /// Running request counts per LoRA adapter.
+    pub running_lora_adapters: BTreeMap<String, u64>,
+    /// CUDA graph runtime stats when graph metrics are enabled.
+    pub cudagraph_stats: Option<CudagraphStat>,
+    /// Estimated MFU/performance stats, when enabled.
+    pub perf_stats: Option<PerfStats>,
+}
diff --git a/rust/src/engine-core-client/src/protocol/tensor.rs b/rust/src/engine-core-client/src/protocol/tensor.rs
new file mode 100644
index 000000000000..b67112154817
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/tensor.rs
@@ -0,0 +1,255 @@
+use bytemuck::allocation::pod_collect_to_vec;
+use enum_as_inner::EnumAsInner;
+use half::{bf16, f16};
+use rmpv::Value;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+
+/// Tensors and ndarrays are encoded with this extension type in Python.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L41-L43>
+const CUSTOM_TYPE_RAW_VIEW: i8 = 3;
+
+#[easy_ext::ext(ShapeExt)]
+impl [usize] {
+    /// Returned the total number of elements implied by this shape, or `None`
+    /// if the product of the dimensions overflows `usize`.
+    pub fn checked_numel(&self) -> Option<usize> {
+        self.iter().try_fold(1usize, |acc, dim| acc.checked_mul(*dim))
+    }
+}
+
+/// Python ndarray/tensor wire tuple encoded as `(dtype, shape, data)`.
+///
+/// This matches the custom msgpack representation built by Python
+/// `serial_utils.encode_ndarray` / `encode_tensor`.
+///
+/// Original Python wire encoders:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L237-L273>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L389-L425>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple)]
+pub struct WireNdArray {
+    pub dtype: String,
+    pub shape: Vec<usize>,
+    pub data: WireArrayData,
+}
+
+impl WireNdArray {
+    /// Build a float32 tensor/ndarray backed by native-endian raw-view bytes.
+    pub fn from_f32(shape: Vec<usize>, data: Vec<f32>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "float32".to_string(),
+            shape,
+            data: WireArrayData::RawView(pod_collect_to_vec::<f32, u8>(&data)),
+        })
+    }
+
+    /// Build a float16 tensor/ndarray backed by native-endian raw-view bytes.
+    pub fn from_f16(shape: Vec<usize>, data: Vec<f16>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "float16".to_string(),
+            shape,
+            data: WireArrayData::RawView(pod_collect_to_vec::<f16, u8>(&data)),
+        })
+    }
+
+    /// Build a bfloat16 tensor/ndarray backed by native-endian raw-view bytes.
+    pub fn from_bf16(shape: Vec<usize>, data: Vec<bf16>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "bfloat16".to_string(),
+            shape,
+            data: WireArrayData::RawView(pod_collect_to_vec::<bf16, u8>(&data)),
+        })
+    }
+
+    /// Build an int64 tensor/ndarray backed by native-endian raw-view bytes.
+    pub fn from_i64(shape: Vec<usize>, data: Vec<i64>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "int64".to_string(),
+            shape,
+            data: WireArrayData::RawView(pod_collect_to_vec::<i64, u8>(&data)),
+        })
+    }
+
+    /// Build a uint32 tensor/ndarray backed by native-endian raw-view bytes.
+    pub fn from_u32(shape: Vec<usize>, data: Vec<u32>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "uint32".to_string(),
+            shape,
+            data: WireArrayData::RawView(pod_collect_to_vec::<u32, u8>(&data)),
+        })
+    }
+
+    /// Build a bool tensor/ndarray backed by raw-view bytes.
+    ///
+    /// This matches `torch.bool` storage: one byte per element, not a packed
+    /// bitmap. Values are canonicalized as `false -> 0` and `true -> 1`.
+    pub fn from_bool(shape: Vec<usize>, data: Vec<bool>) -> Result<Self, String> {
+        validate_element_count(&shape, data.len())?;
+        Ok(Self {
+            dtype: "bool".to_string(),
+            shape,
+            data: WireArrayData::RawView(data.iter().map(|value| u8::from(*value)).collect()),
+        })
+    }
+
+    /// Build a tensor/ndarray from already-encoded raw-view bytes.
+    ///
+    /// Use this as an escape hatch when the caller already owns bytes that
+    /// match the requested `dtype` and `shape`.
+    pub fn from_raw(dtype: impl Into<String>, shape: Vec<usize>, data: Vec<u8>) -> Self {
+        Self {
+            dtype: dtype.into(),
+            shape,
+            data: WireArrayData::RawView(data),
+        }
+    }
+}
+
+/// Validate that the number of elements implied by the shape matches the length
+/// of the data.
+fn validate_element_count(shape: &[usize], len: usize) -> Result<(), String> {
+    let expected = shape
+        .checked_numel()
+        .ok_or_else(|| format!("tensor shape product overflows usize: {shape:?}"))?;
+    if expected == len {
+        Ok(())
+    } else {
+        Err(format!(
+            "tensor data length {len} does not match shape {shape:?} product {expected}"
+        ))
+    }
+}
+
+/// Python tensor wire tuple encoded as `(dtype, shape, data)`.
+///
+/// This is the same wire shape as [`WireNdArray`]; multimodal request payloads
+/// use it for `torch.Tensor` values.
+pub type WireTensor = WireNdArray;
+
+/// Python array/tensor payload reference inside [`WireNdArray`].
+///
+/// The data can be either an inline msgpack raw-view extension or an index into
+/// the multipart aux-frame list carried alongside the primary msgpack frame.
+///
+/// Original Python wire encoders:
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L237-L273>
+/// <https://github.com/vllm-project/vllm/blob/5a0a8fc1ea7542394ff315138bd5677b7b53bca1/vllm/v1/serial_utils.py#L389-L425>
+#[derive(Debug, Clone, PartialEq, EnumAsInner)]
+pub enum WireArrayData {
+    /// The index of the aux frame where the raw bytes of this array/tensor are
+    /// stored.
+    AuxIndex(usize),
+    /// The raw bytes of this array/tensor.
+    RawView(Vec<u8>),
+}
+
+impl<'de> Deserialize<'de> for WireArrayData {
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let value = Value::deserialize(deserializer)?;
+        match value {
+            Value::Ext(tag, bytes) if tag == CUSTOM_TYPE_RAW_VIEW => Ok(Self::RawView(bytes)),
+            Value::Ext(tag, _) => Err(serde::de::Error::custom(format!(
+                "unsupported extension type code {tag}"
+            ))),
+            Value::Integer(index) => {
+                index.as_u64().map(|index| Self::AuxIndex(index as usize)).ok_or_else(|| {
+                    serde::de::Error::custom("aux frame index must be a non-negative integer")
+                })
+            }
+            other => Err(serde::de::Error::custom(format!(
+                "expected raw-view ext or aux frame index, got {other:?}"
+            ))),
+        }
+    }
+}
+
+impl Serialize for WireArrayData {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        // TODO: outbound request serialization currently only supports inline
+        // raw-view bytes. Emitting aux frames needs transport-level plumbing;
+        // serializing `AuxIndex` here only preserves an already-built reference.
+        match self {
+            Self::AuxIndex(index) => serializer.serialize_u64(*index as u64),
+            Self::RawView(bytes) => {
+                Value::Ext(CUSTOM_TYPE_RAW_VIEW, bytes.clone()).serialize(serializer)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn constructors_build_raw_view_tensors() {
+        let f32_tensor = WireNdArray::from_f32(vec![2], vec![1.0, 2.5]).unwrap();
+        assert_eq!(f32_tensor.dtype, "float32");
+        assert_eq!(f32_tensor.shape, vec![2]);
+        assert_eq!(
+            f32_tensor.data.into_raw_view().expect("raw view"),
+            [1.0_f32, 2.5].into_iter().flat_map(f32::to_ne_bytes).collect::<Vec<_>>()
+        );
+
+        let f16_tensor =
+            WireNdArray::from_f16(vec![2], vec![f16::from_f32(1.0), f16::from_f32(2.5)]).unwrap();
+        assert_eq!(f16_tensor.dtype, "float16");
+        assert_eq!(f16_tensor.shape, vec![2]);
+        assert_eq!(f16_tensor.data.into_raw_view().expect("raw view").len(), 4);
+
+        let bf16_tensor =
+            WireNdArray::from_bf16(vec![2], vec![bf16::from_f32(1.0), bf16::from_f32(2.5)])
+                .unwrap();
+        assert_eq!(bf16_tensor.dtype, "bfloat16");
+        assert_eq!(bf16_tensor.shape, vec![2]);
+        assert_eq!(bf16_tensor.data.into_raw_view().expect("raw view").len(), 4);
+
+        let i64_tensor = WireNdArray::from_i64(vec![1], vec![-7]).unwrap();
+        assert_eq!(i64_tensor.dtype, "int64");
+        assert_eq!(
+            i64_tensor.data.into_raw_view().expect("raw view"),
+            (-7_i64).to_ne_bytes()
+        );
+
+        let u32_tensor = WireNdArray::from_u32(vec![1], vec![42]).unwrap();
+        assert_eq!(u32_tensor.dtype, "uint32");
+        assert_eq!(
+            u32_tensor.data.into_raw_view().expect("raw view"),
+            42_u32.to_ne_bytes()
+        );
+
+        let bool_tensor = WireNdArray::from_bool(vec![2], vec![false, true]).unwrap();
+        assert_eq!(bool_tensor.dtype, "bool");
+        assert_eq!(
+            bool_tensor.data.into_raw_view().expect("raw view"),
+            vec![0, 1]
+        );
+
+        let raw_tensor = WireNdArray::from_raw("custom", vec![3], vec![1, 2, 3]);
+        assert_eq!(raw_tensor.dtype, "custom");
+        assert_eq!(raw_tensor.shape, vec![3]);
+        assert_eq!(
+            raw_tensor.data.into_raw_view().expect("raw view"),
+            vec![1, 2, 3]
+        );
+    }
+
+    #[test]
+    fn constructors_validate_shape_product() {
+        let err = WireNdArray::from_f32(vec![2, 2], vec![1.0, 2.0]).unwrap_err();
+        assert!(err.contains("does not match shape"));
+    }
+}
diff --git a/rust/src/engine-core-client/src/protocol/utility.rs b/rust/src/engine-core-client/src/protocol/utility.rs
new file mode 100644
index 000000000000..b46caef49cc2
--- /dev/null
+++ b/rust/src/engine-core-client/src/protocol/utility.rs
@@ -0,0 +1,330 @@
+use std::any::type_name;
+use std::fmt;
+
+use rmpv::Value;
+use serde::{Deserialize, Serialize};
+use serde_default::DefaultFromSerde;
+use serde_tuple::{Deserialize_tuple, Serialize_tuple};
+use thiserror_ext::AsReport;
+
+use super::{OpaqueValue, default_opaque_value_nil};
+use crate::error::{Error, Result};
+
+/// Utility call id as carried on the engine-core MessagePack wire.
+///
+/// Python emits utility ids as MessagePack integers, including values that may
+/// require unsigned 64-bit encoding. Keep MessagePack's signed/unsigned
+/// integer distinction instead of flattening to `i64` or `u64` at decode time.
+#[derive(Clone, Copy, PartialEq)]
+pub struct UtilityCallId(rmpv::Integer);
+
+impl UtilityCallId {
+    /// Returns the integer represented as `u64` if possible, or else `None`.
+    /// This is the typical case for utility calls.
+    pub fn as_u64(self) -> Option<u64> {
+        self.0.as_u64()
+    }
+
+    /// Returns the integer represented as `i64` if possible, or else `None`.
+    pub fn as_i64(self) -> Option<i64> {
+        self.0.as_i64()
+    }
+}
+
+impl Default for UtilityCallId {
+    fn default() -> Self {
+        Self(0_u64.into())
+    }
+}
+
+impl From<u64> for UtilityCallId {
+    fn from(value: u64) -> Self {
+        Self(value.into())
+    }
+}
+
+impl TryFrom<Value> for UtilityCallId {
+    type Error = String;
+
+    fn try_from(value: Value) -> std::result::Result<Self, Self::Error> {
+        match value {
+            Value::Integer(value) => Ok(UtilityCallId(value)),
+            other => Err(format!(
+                "expected a MessagePack integer utility call id, got {other}"
+            )),
+        }
+    }
+}
+
+impl PartialEq<u64> for UtilityCallId {
+    fn eq(&self, other: &u64) -> bool {
+        self.as_u64() == Some(*other)
+    }
+}
+
+impl fmt::Debug for UtilityCallId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(&self.0, f)
+    }
+}
+
+impl fmt::Display for UtilityCallId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(&self.0, f)
+    }
+}
+
+impl Serialize for UtilityCallId {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        Value::Integer(self.0).serialize(serializer)
+    }
+}
+
+impl<'de> Deserialize<'de> for UtilityCallId {
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let value = Value::deserialize(deserializer)?;
+        match value {
+            Value::Integer(value) => Ok(UtilityCallId(value)),
+            other => Err(serde::de::Error::custom(format!(
+                "expected a MessagePack integer utility call id, got {other}"
+            ))),
+        }
+    }
+}
+
+/// Engine-core utility call payload sent from frontend to engine.
+///
+/// Original Python payload shape:
+/// `(client_index, call_id, method_name, args)`
+#[derive(Debug, Clone, PartialEq, Serialize_tuple)]
+pub struct EngineCoreUtilityRequest {
+    pub client_index: u32,
+    pub call_id: UtilityCallId,
+    pub method_name: String,
+    pub args: OpaqueValue,
+}
+
+impl EngineCoreUtilityRequest {
+    /// Create a new utility request with the given strongly typed arguments,
+    /// encoding them into the expected msgpack value format.
+    pub fn new<T>(
+        client_index: u32,
+        call_id: u64,
+        method_name: impl Into<String>,
+        args: T,
+    ) -> Result<Self>
+    where
+        T: Serialize + std::fmt::Debug,
+    {
+        let args = rmpv::ext::to_value(&args).map_err(|error| Error::Encode {
+            target_type: type_name::<T>(),
+            message: format!(
+                "failed to encode utility args `{args:?}`: {}",
+                error.to_report_string()
+            ),
+        })?;
+        let args = match args {
+            Value::Nil => Value::Array(Vec::new()),
+            other => other,
+        };
+
+        Ok(Self {
+            client_index,
+            call_id: UtilityCallId::from(call_id),
+            method_name: method_name.into(),
+            args,
+        })
+    }
+}
+
+/// Result of a utility call.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/v1/engine/__init__.py#L174-L183>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple, DefaultFromSerde)]
+pub struct UtilityOutput {
+    pub call_id: UtilityCallId,
+    /// Non-`None` implies the call failed and `result` should be ignored.
+    #[serde(default)]
+    pub failure_message: Option<String>,
+    #[serde(default)]
+    pub result: Option<UtilityResultEnvelope>,
+}
+
+/// Python `UtilityResult` wrapper carried inside `UtilityOutput.result`.
+///
+/// Upstream reference:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/serial_utils.py#L178-L185>
+#[derive(Debug, Clone, PartialEq, Serialize_tuple, Deserialize_tuple)]
+pub struct UtilityResultEnvelope {
+    /// Recursive type information encoded on Python side, serving as the hint
+    /// for deserialization. We don't care it here as in Rust frontend all
+    /// utility calls are strongly-typed.
+    #[serde(default)]
+    type_info: Option<OpaqueValue>,
+    /// The actual utility result.
+    #[serde(default = "default_opaque_value_nil")]
+    result: OpaqueValue,
+}
+
+impl UtilityResultEnvelope {
+    /// Create a utility result envelope without type information.
+    pub fn without_type_info(result: OpaqueValue) -> Self {
+        Self {
+            type_info: None,
+            result,
+        }
+    }
+}
+
+impl UtilityOutput {
+    /// Decode the typed result of a utility call.
+    pub fn into_typed_result<T>(self, method: &str) -> Result<T>
+    where
+        T: serde::de::DeserializeOwned,
+    {
+        if let Some(message) = self.failure_message {
+            return Err(Error::UtilityCallFailed {
+                method: method.to_string(),
+                call_id: self.call_id,
+                message,
+            });
+        }
+
+        let result = self.result.map(|e| e.result).unwrap_or(Value::Nil);
+
+        rmpv::ext::from_value(result).map_err(|error| Error::UtilityResultDecode {
+            method: method.to_string(),
+            call_id: self.call_id,
+            message: error.to_report_string(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rmpv::Value;
+    use serde::Serialize;
+
+    use super::{EngineCoreUtilityRequest, UtilityOutput, UtilityResultEnvelope};
+    use crate::Error;
+    use crate::protocol::{decode_msgpack, decode_value, encode_msgpack};
+
+    fn utility_result_value<T>(value: T) -> UtilityResultEnvelope
+    where
+        T: Serialize,
+    {
+        UtilityResultEnvelope::without_type_info(rmpv::ext::to_value(value).unwrap())
+    }
+
+    #[test]
+    fn utility_request_serializes_as_tuple_payload() {
+        let request = EngineCoreUtilityRequest::new(7, 42, "is_sleeping", ()).unwrap();
+
+        let encoded = encode_msgpack(&request).unwrap();
+        let value = decode_value(&encoded).unwrap();
+        let array = match value {
+            Value::Array(array) => array,
+            other => panic!("expected utility request array, got {other:?}"),
+        };
+
+        assert_eq!(array.len(), 4);
+        assert_eq!(array[0], Value::from(7));
+        assert_eq!(array[1], Value::from(42));
+        assert_eq!(array[2], Value::from("is_sleeping"));
+        assert_eq!(array[3], Value::Array(Vec::new()));
+    }
+
+    #[test]
+    fn utility_output_decodes_typed_result() {
+        let output = UtilityOutput {
+            call_id: 9_u64.into(),
+            failure_message: None,
+            result: Some(utility_result_value(true)),
+        };
+
+        assert!(output.into_typed_result::<bool>("is_sleeping").unwrap());
+    }
+
+    #[test]
+    fn utility_output_decodes_unsigned_64_bit_call_id() {
+        let value = Value::Array(vec![Value::from(u64::MAX), Value::Nil, Value::Nil]);
+        let mut encoded = Vec::new();
+        rmpv::encode::write_value(&mut encoded, &value).unwrap();
+
+        let output: UtilityOutput = decode_msgpack(&encoded).unwrap();
+
+        assert_eq!(output.call_id.as_u64(), Some(u64::MAX));
+    }
+
+    #[test]
+    fn utility_output_decodes_signed_negative_call_id() {
+        let value = Value::Array(vec![Value::from(-1), Value::Nil, Value::Nil]);
+        let mut encoded = Vec::new();
+        rmpv::encode::write_value(&mut encoded, &value).unwrap();
+
+        let output: UtilityOutput = decode_msgpack(&encoded).unwrap();
+
+        assert_eq!(output.call_id.as_u64(), None);
+    }
+
+    #[test]
+    fn utility_output_decodes_other_negative_call_id() {
+        let value = Value::Array(vec![Value::from(-2), Value::Nil, Value::Nil]);
+        let mut encoded = Vec::new();
+        rmpv::encode::write_value(&mut encoded, &value).unwrap();
+
+        let output: UtilityOutput = decode_msgpack(&encoded).unwrap();
+
+        assert_eq!(output.call_id.as_u64(), None);
+        assert_eq!(output.call_id.as_i64(), Some(-2));
+    }
+
+    #[test]
+    fn utility_output_reports_failure_message() {
+        let error = UtilityOutput {
+            call_id: 9_u64.into(),
+            failure_message: Some("boom".to_string()),
+            result: None,
+        }
+        .into_typed_result::<bool>("is_sleeping")
+        .unwrap_err();
+
+        assert!(matches!(
+            error,
+            Error::UtilityCallFailed {
+                method,
+                call_id,
+                message
+            } if method == "is_sleeping" && call_id == 9 && message == "boom"
+        ));
+    }
+
+    #[test]
+    fn utility_output_decodes_missing_result_as_unit() {
+        UtilityOutput {
+            call_id: 3_u64.into(),
+            failure_message: None,
+            result: None,
+        }
+        .into_typed_result::<()>("reset_mm_cache")
+        .unwrap();
+    }
+
+    #[test]
+    fn utility_output_decodes_nil_result_as_unit() {
+        UtilityOutput {
+            call_id: 4_u64.into(),
+            failure_message: None,
+            result: Some(UtilityResultEnvelope::without_type_info(Value::Nil)),
+        }
+        .into_typed_result::<()>("sleep")
+        .unwrap();
+    }
+}
diff --git a/rust/src/engine-core-client/src/test_utils.rs b/rust/src/engine-core-client/src/test_utils.rs
new file mode 100644
index 000000000000..99c266dc626f
--- /dev/null
+++ b/rust/src/engine-core-client/src/test_utils.rs
@@ -0,0 +1,295 @@
+use std::future::Future;
+use std::path::Path;
+use std::pin::Pin;
+use std::time::Duration;
+
+use tempfile::TempDir;
+use tokio::sync::oneshot;
+use zeromq::prelude::{Socket, SocketRecv, SocketSend};
+use zeromq::util::PeerIdentity;
+use zeromq::{DealerSocket, PushSocket, SocketOptions, SubSocket, ZmqMessage};
+
+use crate::EngineId;
+use crate::protocol::ModelDtype;
+use crate::protocol::handshake::{EngineCoreReadyResponse, HandshakeInitMessage, ReadyMessage};
+
+/// Per-test IPC endpoint namespace backed by a unique temporary directory.
+///
+/// Using one directory per test avoids endpoint collisions without requiring
+/// ad-hoc unique-name generation at each call site.
+#[derive(Debug)]
+pub struct IpcNamespace {
+    dir: TempDir,
+}
+
+impl IpcNamespace {
+    /// Create a fresh namespace for one test case.
+    pub fn new() -> std::io::Result<Self> {
+        Ok(Self {
+            dir: TempDir::new()?,
+        })
+    }
+
+    /// Build one `ipc://...` endpoint under this namespace.
+    pub fn endpoint(&self, name: impl AsRef<Path>) -> String {
+        let path = self.dir.path().join(name);
+        format!("ipc://{}", path.to_string_lossy())
+    }
+
+    /// Endpoint used for the initial READY/HELLO handshake.
+    pub fn handshake_endpoint(&self) -> String {
+        self.endpoint("handshake.sock")
+    }
+
+    /// Endpoint used for engine-core request traffic.
+    pub fn input_endpoint(&self) -> String {
+        self.endpoint("input.sock")
+    }
+
+    /// Endpoint used for engine-core output traffic.
+    pub fn output_endpoint(&self) -> String {
+        self.endpoint("output.sock")
+    }
+}
+
+/// Construct a standard local READY message used by mock engines in tests.
+fn ready_message(status: &str) -> ReadyMessage {
+    ReadyMessage {
+        status: Some(status.to_string()),
+        local: Some(true),
+        headless: Some(true),
+        parallel_config_hash: None,
+    }
+}
+
+/// Construct a default ready response payload for mock engine input
+/// registration.
+fn ready_response_payload() -> Vec<u8> {
+    rmp_serde::to_vec_named(&EngineCoreReadyResponse {
+        max_model_len: 4096,
+        num_gpu_blocks: 0,
+        dp_stats_address: None,
+        dtype: Some(ModelDtype::Float32),
+    })
+    .expect("encode ready response payload")
+}
+
+/// Coordinator-side sockets connected by one mock engine when coordinator mode
+/// is enabled.
+pub struct MockCoordinatorConnections {
+    /// Subscription socket that receives coordinator broadcasts such as
+    /// `START_DP_WAVE`.
+    pub input_sub: SubSocket,
+    /// Push socket used to send coordinator-only `EngineCoreOutputs` back to
+    /// the frontend.
+    pub output_push: PushSocket,
+}
+
+/// Fully connected mock engine transport state used by tests.
+pub struct MockEngineConnections {
+    /// Decoded INIT message sent by the frontend during handshake.
+    pub init: HandshakeInitMessage,
+    /// Socket used to receive frontend requests.
+    pub dealer: DealerSocket,
+    /// Socket used to publish normal request outputs back to the frontend.
+    pub push: PushSocket,
+    /// Optional coordinator sockets when the client enabled the in-process
+    /// coordinator.
+    pub coordinator: Option<MockCoordinatorConnections>,
+}
+
+/// Complete the engine-core handshake and connect mock input/output sockets
+/// plus optional coordinator sockets.
+pub async fn setup_mock_engine_connections(
+    engine_handshake: String,
+    engine_id: impl Into<EngineId>,
+) -> MockEngineConnections {
+    // Wait for the client to bind the handshake socket before connecting.
+    // A fixed sleep is racy under CI load; instead poll for the socket file.
+    let socket_path = engine_handshake
+        .strip_prefix("ipc://")
+        .expect("handshake address must be ipc://");
+    for _ in 0..100 {
+        if Path::new(socket_path).exists() {
+            break;
+        }
+        tokio::time::sleep(Duration::from_millis(20)).await;
+    }
+
+    let peer_identity = PeerIdentity::try_from(engine_id.into()).expect("peer id");
+
+    let mut options = SocketOptions::default();
+    options.peer_identity(peer_identity.clone());
+    let mut handshake = DealerSocket::with_options(options);
+    handshake
+        .connect(&engine_handshake)
+        .await
+        .expect("connect mock engine handshake socket");
+    handshake
+        .send(ZmqMessage::from(
+            rmp_serde::to_vec_named(&ready_message("HELLO")).expect("encode HELLO ready message"),
+        ))
+        .await
+        .expect("send HELLO ready message");
+
+    let init_frames = handshake.recv().await.expect("receive handshake init message").into_vec();
+    assert_eq!(init_frames.len(), 1);
+    let init: HandshakeInitMessage =
+        rmp_serde::from_slice(init_frames[0].as_ref()).expect("decode handshake init message");
+
+    let mut input_options = SocketOptions::default();
+    input_options.peer_identity(peer_identity);
+    let mut dealer = DealerSocket::with_options(input_options);
+    dealer
+        .connect(&init.addresses.inputs[0])
+        .await
+        .expect("connect mock engine input socket");
+    dealer
+        .send(ZmqMessage::from(ready_response_payload()))
+        .await
+        .expect("send mock engine input ready frame");
+
+    let mut push = PushSocket::new();
+    push.connect(&init.addresses.outputs[0])
+        .await
+        .expect("connect mock engine output socket");
+
+    let coordinator = match (
+        init.addresses.coordinator_input.as_deref(),
+        init.addresses.coordinator_output.as_deref(),
+    ) {
+        (Some(coordinator_input), Some(coordinator_output)) => {
+            let mut input_sub = SubSocket::new();
+            input_sub
+                .connect(coordinator_input)
+                .await
+                .expect("connect mock engine coordinator input socket");
+            input_sub
+                .subscribe("")
+                .await
+                .expect("subscribe mock engine coordinator input socket");
+
+            let mut output_push = PushSocket::new();
+            output_push
+                .connect(coordinator_output)
+                .await
+                .expect("connect mock engine coordinator output socket");
+
+            let ready =
+                input_sub.recv().await.expect("receive coordinator READY marker").into_vec();
+            assert_eq!(ready.len(), 1);
+            assert_eq!(ready[0].as_ref(), b"READY");
+
+            Some(MockCoordinatorConnections {
+                input_sub,
+                output_push,
+            })
+        }
+        (None, None) => None,
+        _ => panic!("coordinator handshake addresses must be both present or both absent"),
+    };
+
+    handshake
+        .send(ZmqMessage::from(
+            rmp_serde::to_vec_named(&ready_message("READY")).expect("encode READY ready message"),
+        ))
+        .await
+        .expect("send READY ready message");
+
+    MockEngineConnections {
+        init,
+        dealer,
+        push,
+        coordinator,
+    }
+}
+
+/// Connect one mock engine directly to already-bootstrapped frontend
+/// input/output sockets.
+pub async fn setup_bootstrapped_mock_engine(
+    input_address: String,
+    output_address: String,
+    engine_id: impl Into<EngineId>,
+) -> (DealerSocket, PushSocket) {
+    for endpoint in [&input_address, &output_address] {
+        if let Some(socket_path) = endpoint.strip_prefix("ipc://") {
+            for _ in 0..100 {
+                if Path::new(socket_path).exists() {
+                    break;
+                }
+                tokio::time::sleep(Duration::from_millis(20)).await;
+            }
+        }
+    }
+
+    let peer_identity = PeerIdentity::try_from(engine_id.into()).expect("peer id");
+    let mut input_options = SocketOptions::default();
+    input_options.peer_identity(peer_identity);
+    let mut dealer = DealerSocket::with_options(input_options);
+    dealer.connect(&input_address).await.expect("connect mock engine input socket");
+    dealer
+        .send(ZmqMessage::from(ready_response_payload()))
+        .await
+        .expect("send mock engine input ready frame");
+
+    let mut push = PushSocket::new();
+    push.connect(&output_address).await.expect("connect mock engine output socket");
+
+    (dealer, push)
+}
+
+/// Complete the engine-core handshake and connect mock input/output sockets.
+///
+/// This returns the decoded handshake init message plus the `DealerSocket` used
+/// to receive client requests and the `PushSocket` used to send engine outputs
+/// back to the client.
+pub async fn setup_mock_engine_with_init(
+    engine_handshake: String,
+    engine_id: impl Into<EngineId>,
+) -> (HandshakeInitMessage, DealerSocket, PushSocket) {
+    let MockEngineConnections {
+        init, dealer, push, ..
+    } = setup_mock_engine_connections(engine_handshake, engine_id).await;
+    (init, dealer, push)
+}
+
+/// Complete the engine-core handshake and connect mock input/output sockets.
+///
+/// This returns the `DealerSocket` used to receive client requests and the
+/// `PushSocket` used to send engine outputs back to the client.
+pub async fn setup_mock_engine(
+    engine_handshake: String,
+    engine_id: impl Into<EngineId>,
+) -> (DealerSocket, PushSocket) {
+    let (_, dealer, push) = setup_mock_engine_with_init(engine_handshake, engine_id).await;
+    (dealer, push)
+}
+
+/// Spawn a mock engine task and keep its sockets alive until the returned
+/// shutdown sender is triggered by the test.
+///
+/// The script borrows the connected sockets mutably while it runs. After the
+/// script completes, this helper keeps the sockets alive until the test
+/// explicitly signals shutdown.
+pub fn spawn_mock_engine_task<F>(
+    engine_handshake: String,
+    engine_id: impl Into<EngineId>,
+    run: F,
+) -> (oneshot::Sender<()>, tokio::task::JoinHandle<()>)
+where
+    F: for<'a> FnOnce(
+            &'a mut DealerSocket,
+            &'a mut PushSocket,
+        ) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>>
+        + Send
+        + 'static,
+{
+    let (shutdown_tx, shutdown_rx) = oneshot::channel();
+    let engine_id = engine_id.into();
+    let engine_task = tokio::spawn(async move {
+        let (mut dealer, mut push) = setup_mock_engine(engine_handshake, engine_id).await;
+        run(&mut dealer, &mut push).await;
+        let _ = shutdown_rx.await;
+    });
+    (shutdown_tx, engine_task)
+}
diff --git a/rust/src/engine-core-client/src/tests/client.rs b/rust/src/engine-core-client/src/tests/client.rs
new file mode 100644
index 000000000000..54128330bb02
--- /dev/null
+++ b/rust/src/engine-core-client/src/tests/client.rs
@@ -0,0 +1,2559 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::convert::TryFrom;
+use std::io::Cursor;
+use std::path::PathBuf;
+use std::process::Command;
+use std::sync::Once;
+use std::time::Duration;
+
+use futures::StreamExt;
+use rmpv::Value;
+use thiserror_ext::AsReport as _;
+use tokio::sync::{mpsc, oneshot};
+use tokio::time::timeout;
+use tracing_subscriber::EnvFilter;
+use zeromq::prelude::{Socket, SocketRecv, SocketSend};
+use zeromq::util::PeerIdentity;
+use zeromq::{DealerSocket, PushSocket, SocketOptions, SubSocket, XPubSocket, ZmqMessage};
+
+use crate::protocol::handshake::{HandshakeInitMessage, ReadyMessage};
+use crate::protocol::logprobs::MaybeWireLogprobs;
+use crate::protocol::multimodal::{
+    MmFeatureSpec, MmField, MmFieldElem, MmFlatField, MmKwargValue, MmSlice, PlaceholderRange,
+    SliceSpec,
+};
+use crate::protocol::stats::SchedulerStats;
+use crate::protocol::tensor::WireTensor;
+use crate::protocol::utility::{UtilityOutput, UtilityResultEnvelope};
+use crate::protocol::{
+    EngineCoreFinishReason, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest,
+    EngineCoreRequestType, EngineCoreSamplingParams, decode_engine_core_outputs,
+};
+use crate::test_utils::{
+    IpcNamespace, setup_bootstrapped_mock_engine, setup_mock_engine_connections,
+    setup_mock_engine_with_init, spawn_mock_engine_task,
+};
+use crate::{
+    CoordinatorMode, ENGINE_CORE_DEAD_SENTINEL, EngineCoreClient, EngineCoreClientConfig, EngineId,
+    Error, TransportMode,
+};
+
+static TRACING: Once = Once::new();
+
+fn expect_sample_logprobs(actual: &MaybeWireLogprobs) {
+    expect_test::expect![[r#"
+        Logprobs {
+            positions: [
+                PositionLogprobs {
+                    entries: [
+                        TokenLogprob {
+                            token_id: 1,
+                            logprob: 1.0,
+                            rank: 1,
+                        },
+                        TokenLogprob {
+                            token_id: 2,
+                            logprob: 2.0,
+                            rank: 1,
+                        },
+                        TokenLogprob {
+                            token_id: 3,
+                            logprob: 3.0,
+                            rank: 2,
+                        },
+                    ],
+                },
+                PositionLogprobs {
+                    entries: [
+                        TokenLogprob {
+                            token_id: 4,
+                            logprob: 4.0,
+                            rank: 2,
+                        },
+                        TokenLogprob {
+                            token_id: 5,
+                            logprob: 5.0,
+                            rank: 1,
+                        },
+                        TokenLogprob {
+                            token_id: 6,
+                            logprob: 6.0,
+                            rank: 2,
+                        },
+                    ],
+                },
+            ],
+        }
+    "#]]
+    .assert_debug_eq(actual.as_direct().expect("logprobs resolved"));
+}
+
+fn expect_prompt_logprobs(actual: &MaybeWireLogprobs) {
+    expect_test::expect![[r#"
+        Logprobs {
+            positions: [
+                PositionLogprobs {
+                    entries: [
+                        TokenLogprob {
+                            token_id: 10,
+                            logprob: 10.0,
+                            rank: 3,
+                        },
+                        TokenLogprob {
+                            token_id: 11,
+                            logprob: 11.0,
+                            rank: 1,
+                        },
+                        TokenLogprob {
+                            token_id: 12,
+                            logprob: 12.0,
+                            rank: 2,
+                        },
+                    ],
+                },
+                PositionLogprobs {
+                    entries: [
+                        TokenLogprob {
+                            token_id: 13,
+                            logprob: 13.0,
+                            rank: 4,
+                        },
+                        TokenLogprob {
+                            token_id: 14,
+                            logprob: 14.0,
+                            rank: 1,
+                        },
+                        TokenLogprob {
+                            token_id: 15,
+                            logprob: 15.0,
+                            rank: 2,
+                        },
+                    ],
+                },
+            ],
+        }
+    "#]]
+    .assert_debug_eq(actual.as_direct().expect("prompt logprobs resolved"));
+}
+
+fn sample_request() -> EngineCoreRequest {
+    sample_request_with_id("req-1")
+}
+
+fn sample_request_with_id(request_id: &str) -> EngineCoreRequest {
+    EngineCoreRequest {
+        request_id: request_id.to_string(),
+        prompt_token_ids: Some(vec![11, 22]),
+        sampling_params: Some(EngineCoreSamplingParams {
+            temperature: 0.8,
+            top_p: 0.9,
+            top_k: 8,
+            max_tokens: 32,
+            min_tokens: 1,
+            stop_token_ids: vec![151643],
+            eos_token_id: Some(151645),
+            all_stop_token_ids: BTreeSet::from([151643, 151645]),
+            ..EngineCoreSamplingParams::for_test()
+        }),
+        arrival_time: 42.5,
+        ..EngineCoreRequest::default()
+    }
+}
+
+fn sample_multimodal_request() -> EngineCoreRequest {
+    EngineCoreRequest {
+        request_id: "req-mm".to_string(),
+        prompt_token_ids: Some(vec![101, 102, 103, 104]),
+        mm_features: Some(vec![MmFeatureSpec {
+            data: Some(BTreeMap::from([(
+                "pixel_values".to_string(),
+                MmFieldElem {
+                    data: Some(MmKwargValue::Tensor(
+                        WireTensor::from_f32(vec![2, 2], vec![1.0, 2.0, 3.5, 4.25])
+                            .expect("valid tensor shape"),
+                    )),
+                    field: MmField::Flat(MmFlatField {
+                        slices: vec![MmSlice::Slice(SliceSpec {
+                            start: Some(0),
+                            stop: Some(2),
+                            step: None,
+                        })],
+                        dim: 0,
+                        keep_on_cpu: false,
+                    }),
+                },
+            )])),
+            modality: "image".to_string(),
+            identifier: "mm-cache-key".to_string(),
+            mm_position: PlaceholderRange {
+                offset: 1,
+                length: 2,
+                is_embed: None,
+            },
+            mm_hash: Some("processor-hash".to_string()),
+        }]),
+        sampling_params: None,
+        pooling_params: None,
+        arrival_time: 43.5,
+        ..EngineCoreRequest::default()
+    }
+}
+
+fn ready_message(status: &str) -> ReadyMessage {
+    ReadyMessage {
+        status: Some(status.to_string()),
+        local: Some(true),
+        headless: Some(true),
+        parallel_config_hash: None,
+    }
+}
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(rmp_serde::to_vec_named(&outputs).unwrap()))
+        .await
+        .unwrap();
+}
+
+async fn send_output_frames(push: &mut PushSocket, frames: Vec<bytes::Bytes>) {
+    push.send(ZmqMessage::try_from(frames).unwrap()).await.unwrap();
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<bytes::Bytes> {
+    dealer.recv().await.unwrap().into_vec()
+}
+
+async fn recv_start_dp_wave(sub: &mut SubSocket) -> (u32, u32) {
+    let frames = sub.recv().await.unwrap().into_vec();
+    assert_eq!(frames.len(), 2);
+    assert_eq!(
+        frames[0].as_ref(),
+        EngineCoreRequestType::StartDpWave.to_frame().as_ref()
+    );
+    rmp_serde::from_slice(&frames[1]).expect("decode START_DP_WAVE payload")
+}
+
+async fn connect_client_with_ipc(
+    config: EngineCoreClientConfig,
+    ipc: &IpcNamespace,
+) -> EngineCoreClient {
+    EngineCoreClient::connect(
+        config.with_local_input_output_addresses(
+            Some(ipc.input_endpoint()),
+            Some(ipc.output_endpoint()),
+        ),
+    )
+    .await
+    .unwrap()
+}
+
+fn handshake_test_config(
+    handshake_address: String,
+    engine_count: usize,
+    model_name: &str,
+    ready_timeout: Duration,
+    client_index: u32,
+    coordinator_mode: Option<CoordinatorMode>,
+) -> EngineCoreClientConfig {
+    EngineCoreClientConfig {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address,
+            advertised_host: "127.0.0.1".to_string(),
+            engine_count,
+            ready_timeout,
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode,
+        model_name: model_name.to_string(),
+        client_index,
+    }
+}
+
+fn bootstrapped_test_config(
+    input_address: String,
+    output_address: String,
+    engine_count: usize,
+    ready_timeout: Duration,
+    client_index: u32,
+    coordinator_mode: Option<CoordinatorMode>,
+) -> EngineCoreClientConfig {
+    EngineCoreClientConfig {
+        transport_mode: TransportMode::Bootstrapped {
+            input_address,
+            output_address,
+            engine_count,
+            ready_timeout,
+        },
+        coordinator_mode,
+        model_name: "test-model".to_string(),
+        client_index,
+    }
+}
+
+async fn recv_xpub_message(xpub: &mut XPubSocket) -> Vec<bytes::Bytes> {
+    xpub.recv().await.unwrap().into_vec()
+}
+
+async fn recv_xpub_subscription(xpub: &mut XPubSocket) {
+    let frames = recv_xpub_message(xpub).await;
+    assert_eq!(frames.len(), 1);
+    assert_eq!(frames[0].as_ref(), b"\x01");
+}
+
+async fn recv_external_coordinator_wakeup(xpub: &mut XPubSocket) -> (u32, u32) {
+    let frames = recv_xpub_message(xpub).await;
+    assert_eq!(frames.len(), 1);
+    rmp_serde::from_slice(&frames[0]).expect("decode external coordinator wakeup")
+}
+
+async fn send_external_coordinator_publish<T: serde::Serialize>(
+    xpub: &mut XPubSocket,
+    payload: &T,
+) {
+    xpub.send(ZmqMessage::from(rmp_serde::to_vec_named(payload).unwrap()))
+        .await
+        .unwrap();
+}
+
+fn spawn_mock_engine_task_with_init<F>(
+    engine_handshake: String,
+    engine_id: impl Into<EngineId>,
+    run: F,
+) -> (
+    oneshot::Receiver<HandshakeInitMessage>,
+    oneshot::Sender<()>,
+    tokio::task::JoinHandle<()>,
+)
+where
+    F: for<'a> FnOnce(
+            &'a mut DealerSocket,
+            &'a mut PushSocket,
+        )
+            -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send + 'a>>
+        + Send
+        + 'static,
+{
+    let (shutdown_tx, shutdown_rx) = oneshot::channel();
+    let (init_tx, init_rx) = oneshot::channel();
+    let engine_id = engine_id.into();
+    let engine_task = tokio::spawn(async move {
+        let (init, mut dealer, mut push) =
+            setup_mock_engine_with_init(engine_handshake, engine_id).await;
+        let _ = init_tx.send(init);
+        run(&mut dealer, &mut push).await;
+        let _ = shutdown_rx.await;
+    });
+    (init_rx, shutdown_tx, engine_task)
+}
+
+fn init_tracing() {
+    TRACING.call_once(|| {
+        let filter = EnvFilter::try_from_default_env()
+            .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+        let _ = tracing_subscriber::fmt().with_test_writer().with_env_filter(filter).try_init();
+    });
+}
+
+fn is_dispatcher_closed(error: &Error) -> bool {
+    match error {
+        Error::DispatcherClosed { .. } => true,
+        Error::Shared(error) => is_dispatcher_closed(error),
+        _ => false,
+    }
+}
+
+fn is_engine_core_dead(error: &Error) -> bool {
+    match error {
+        Error::EngineCoreDead => true,
+        Error::Shared(error) => is_engine_core_dead(error),
+        _ => false,
+    }
+}
+
+fn is_decode_error(error: &Error) -> bool {
+    match error {
+        Error::Decode { .. } | Error::ExtValueDecode { .. } => true,
+        Error::Shared(error) => is_decode_error(error),
+        _ => false,
+    }
+}
+
+fn is_unexpected_dispatcher_output(error: &Error) -> bool {
+    match error {
+        Error::UnexpectedDispatcherOutput { .. } => true,
+        Error::Shared(error) => is_unexpected_dispatcher_output(error),
+        _ => false,
+    }
+}
+
+fn decode_value(bytes: &[u8]) -> Value {
+    rmpv::decode::read_value(&mut Cursor::new(bytes)).unwrap()
+}
+
+fn encode_value(value: &Value) -> Vec<u8> {
+    let mut out = Vec::new();
+    rmpv::encode::write_value(&mut out, value).unwrap();
+    out
+}
+
+fn ndarray_value(dtype: &str, shape: &[usize], data: Value) -> Value {
+    Value::Array(vec![
+        Value::from(dtype),
+        Value::Array(shape.iter().copied().map(Value::from).collect()),
+        data,
+    ])
+}
+
+fn multipart_logprob_output_frames(request_id: &str) -> Vec<bytes::Bytes> {
+    let main = Value::Array(vec![
+        Value::from(0),
+        Value::Array(vec![Value::Array(vec![
+            Value::from(request_id),
+            Value::Array(vec![Value::from(7), Value::from(8)]),
+            Value::Array(vec![
+                ndarray_value("<i4", &[2, 3], Value::from(1)),
+                ndarray_value("<f4", &[2, 3], Value::from(2)),
+                ndarray_value("<i4", &[2], Value::from(3)),
+                Value::Nil,
+            ]),
+            Value::Nil,
+            Value::Nil,
+            Value::from(EngineCoreFinishReason::Length as u8),
+        ])]),
+        Value::Nil,
+        Value::from(0.0),
+        Value::Nil,
+        Value::Array(vec![Value::from(request_id)]),
+    ]);
+
+    vec![
+        bytes::Bytes::from(encode_value(&main)),
+        bytes::Bytes::from_static(&[
+            1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0,
+        ]),
+        bytes::Bytes::from_static(&[
+            0, 0, 128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, 128, 64, 0, 0, 160, 64, 0, 0, 192, 64,
+        ]),
+        bytes::Bytes::from_static(&[1, 0, 0, 0, 2, 0, 0, 0]),
+    ]
+}
+
+fn utility_result_value<T>(value: T) -> UtilityResultEnvelope
+where
+    T: serde::Serialize,
+{
+    UtilityResultEnvelope::without_type_info(rmpv::ext::to_value(value).unwrap())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn coordinator_handshake_includes_engine_control_addresses() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = [0x00, 0x00];
+
+    let (init_tx, init_rx) = oneshot::channel();
+    let (shutdown_tx, shutdown_rx) = oneshot::channel();
+    let engine_task = tokio::spawn(async move {
+        let connections = setup_mock_engine_connections(handshake_address, &engine_id).await;
+        let _ = init_tx.send(connections.init.clone());
+        let _ = shutdown_rx.await;
+    });
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            ipc.handshake_endpoint(),
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            Some(CoordinatorMode::InProc),
+        ),
+        &ipc,
+    )
+    .await;
+
+    let init = init_rx.await.unwrap();
+    assert!(init.addresses.coordinator_input.is_some());
+    assert!(init.addresses.coordinator_output.is_some());
+    assert!(init.addresses.frontend_stats_publish_address.is_none());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn coordinator_wave_control_tracks_pause_running_and_rebroadcasts() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+
+    let (shutdown0_tx, shutdown0_rx) = oneshot::channel();
+    let engine0_task = tokio::spawn({
+        let handshake_address = handshake_address.clone();
+        async move {
+            let mut engine = setup_mock_engine_connections(handshake_address, &[0x00, 0x00]).await;
+            let mut coordinator =
+                engine.coordinator.take().expect("coordinator sockets should be present");
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (0, 0));
+
+            let add = recv_engine_message(&mut engine.dealer).await;
+            assert_eq!(add[0].as_ref(), &[0x00]);
+            let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+            assert_eq!(request.request_id, "req-1");
+            assert_eq!(request.current_wave, 0);
+
+            assert!(
+                timeout(
+                    Duration::from_millis(200),
+                    recv_start_dp_wave(&mut coordinator.input_sub)
+                )
+                .await
+                .is_err()
+            );
+
+            send_outputs(
+                &mut engine.push,
+                EngineCoreOutputs {
+                    engine_index: 0,
+                    outputs: vec![request_output(
+                        "req-1",
+                        vec![],
+                        Some(EngineCoreFinishReason::Length),
+                    )],
+                    finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+                    ..Default::default()
+                },
+            )
+            .await;
+
+            send_outputs(
+                &mut coordinator.output_push,
+                EngineCoreOutputs {
+                    engine_index: 0,
+                    wave_complete: Some(0),
+                    ..Default::default()
+                },
+            )
+            .await;
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (1, 0));
+
+            let add = recv_engine_message(&mut engine.dealer).await;
+            assert_eq!(add[0].as_ref(), &[0x00]);
+            let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+            assert_eq!(request.request_id, "req-3");
+            assert_eq!(request.current_wave, 1);
+
+            send_outputs(
+                &mut engine.push,
+                EngineCoreOutputs {
+                    engine_index: 0,
+                    outputs: vec![request_output(
+                        "req-3",
+                        vec![],
+                        Some(EngineCoreFinishReason::Length),
+                    )],
+                    finished_requests: Some(BTreeSet::from(["req-3".to_string()])),
+                    ..Default::default()
+                },
+            )
+            .await;
+
+            let _ = shutdown0_rx.await;
+        }
+    });
+
+    let (shutdown1_tx, shutdown1_rx) = oneshot::channel();
+    let engine1_task = tokio::spawn({
+        let handshake_address = handshake_address.clone();
+        async move {
+            let mut engine = setup_mock_engine_connections(handshake_address, &[0x01, 0x00]).await;
+            let mut coordinator =
+                engine.coordinator.take().expect("coordinator sockets should be present");
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (0, 0));
+
+            let add = recv_engine_message(&mut engine.dealer).await;
+            assert_eq!(add[0].as_ref(), &[0x00]);
+            let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+            assert_eq!(request.request_id, "req-2");
+            assert_eq!(request.current_wave, 0);
+
+            assert!(
+                timeout(
+                    Duration::from_millis(200),
+                    recv_start_dp_wave(&mut coordinator.input_sub)
+                )
+                .await
+                .is_err()
+            );
+
+            send_outputs(
+                &mut engine.push,
+                EngineCoreOutputs {
+                    engine_index: 1,
+                    outputs: vec![request_output(
+                        "req-2",
+                        vec![],
+                        Some(EngineCoreFinishReason::Length),
+                    )],
+                    finished_requests: Some(BTreeSet::from(["req-2".to_string()])),
+                    ..Default::default()
+                },
+            )
+            .await;
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (1, 0));
+
+            assert!(
+                timeout(
+                    Duration::from_millis(200),
+                    recv_engine_message(&mut engine.dealer)
+                )
+                .await
+                .is_err()
+            );
+
+            let _ = shutdown1_rx.await;
+        }
+    });
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            2,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            Some(CoordinatorMode::InProc),
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+
+    let final_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_1.request_id, "req-1");
+    assert_eq!(final_1.finish_reason, Some(EngineCoreFinishReason::Length));
+    assert!(timeout(Duration::from_secs(1), stream_1.next()).await.unwrap().is_none());
+
+    let final_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_2.request_id, "req-2");
+    assert_eq!(final_2.finish_reason, Some(EngineCoreFinishReason::Length));
+    assert!(timeout(Duration::from_secs(1), stream_2.next()).await.unwrap().is_none());
+
+    tokio::time::sleep(Duration::from_millis(100)).await;
+
+    let mut stream_3 = client.call(sample_request_with_id("req-3")).await.unwrap();
+    let final_3 = timeout(Duration::from_secs(1), stream_3.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_3.request_id, "req-3");
+    assert_eq!(final_3.finish_reason, Some(EngineCoreFinishReason::Length));
+    assert!(timeout(Duration::from_secs(1), stream_3.next()).await.unwrap().is_none());
+
+    tokio::time::sleep(Duration::from_millis(100)).await;
+
+    let _ = shutdown0_tx.send(());
+    let _ = shutdown1_tx.send(());
+    engine0_task.await.unwrap();
+    engine1_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn coordinator_rebroadcasts_engine_start_wave_control() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+
+    let (shutdown0_tx, shutdown0_rx) = oneshot::channel();
+    let engine0_task = tokio::spawn({
+        let handshake_address = handshake_address.clone();
+        async move {
+            let mut engine = setup_mock_engine_connections(handshake_address, &[0x00, 0x00]).await;
+            let mut coordinator =
+                engine.coordinator.take().expect("coordinator sockets should be present");
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (4, 1));
+
+            let _ = shutdown0_rx.await;
+        }
+    });
+
+    let (shutdown1_tx, shutdown1_rx) = oneshot::channel();
+    let engine1_task = tokio::spawn({
+        let handshake_address = handshake_address.clone();
+        async move {
+            let mut engine = setup_mock_engine_connections(handshake_address, &[0x01, 0x00]).await;
+            let mut coordinator =
+                engine.coordinator.take().expect("coordinator sockets should be present");
+
+            send_outputs(
+                &mut coordinator.output_push,
+                EngineCoreOutputs {
+                    engine_index: 1,
+                    start_wave: Some(4),
+                    ..Default::default()
+                },
+            )
+            .await;
+
+            let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+            assert_eq!((wave, exclude_engine), (4, 1));
+
+            let _ = shutdown1_rx.await;
+        }
+    });
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            2,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            Some(CoordinatorMode::InProc),
+        ),
+        &ipc,
+    )
+    .await;
+
+    tokio::time::sleep(Duration::from_millis(200)).await;
+
+    let _ = shutdown0_tx.send(());
+    let _ = shutdown1_tx.send(());
+    engine0_task.await.unwrap();
+    engine1_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn coordinator_accepts_stats_only_outputs() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+
+    let (shutdown_tx, shutdown_rx) = oneshot::channel();
+    let engine_task = tokio::spawn(async move {
+        let mut engine = setup_mock_engine_connections(handshake_address, &[0x00, 0x00]).await;
+        let mut coordinator =
+            engine.coordinator.take().expect("coordinator sockets should be present");
+
+        let (wave, exclude_engine) = recv_start_dp_wave(&mut coordinator.input_sub).await;
+        assert_eq!((wave, exclude_engine), (0, 0));
+
+        send_outputs(
+            &mut coordinator.output_push,
+            EngineCoreOutputs {
+                engine_index: 0,
+                scheduler_stats: Some(Box::new(SchedulerStats {
+                    num_running_reqs: 1,
+                    current_wave: 0,
+                    ..Default::default()
+                })),
+                ..Default::default()
+            },
+        )
+        .await;
+
+        let add = recv_engine_message(&mut engine.dealer).await;
+        assert_eq!(add[0].as_ref(), &[0x00]);
+        let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+        assert_eq!(request.request_id, "req-stats");
+
+        send_outputs(
+            &mut engine.push,
+            EngineCoreOutputs {
+                engine_index: 0,
+                outputs: vec![request_output(
+                    "req-stats",
+                    vec![],
+                    Some(EngineCoreFinishReason::Length),
+                )],
+                finished_requests: Some(BTreeSet::from(["req-stats".to_string()])),
+                ..Default::default()
+            },
+        )
+        .await;
+
+        let _ = shutdown_rx.await;
+    });
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            ipc.handshake_endpoint(),
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            Some(CoordinatorMode::InProc),
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream = client.call(sample_request_with_id("req-stats")).await.unwrap();
+    let final_output =
+        timeout(Duration::from_secs(1), stream.next()).await.unwrap().unwrap().unwrap();
+    assert_eq!(final_output.request_id, "req-stats");
+    assert_eq!(
+        final_output.finish_reason,
+        Some(EngineCoreFinishReason::Length)
+    );
+    assert!(client.is_healthy());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn client_fail_closes_when_main_output_path_receives_dp_control() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-0".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.client_index, 7);
+                assert_eq!(request_1.request_id, "req-1");
+
+                let add_2 = recv_engine_message(dealer).await;
+                assert_eq!(add_2[0].as_ref(), &[0x00]);
+                let request_2: EngineCoreRequest = rmp_serde::from_slice(&add_2[1]).unwrap();
+                assert_eq!(request_2.client_index, 7);
+                assert_eq!(request_2.request_id, "req-2");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: 1_u64.into(),
+                            failure_message: None,
+                            result: None,
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        start_wave: Some(3),
+                        ..Default::default()
+                    },
+                )
+                .await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output("req-1", vec![999], None)],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                tokio::time::sleep(Duration::from_millis(50)).await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            7,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+    assert_eq!(client.engine_identities()[0], b"engine-0");
+    assert!(client.ready_responses()[0].max_model_len > 0);
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+
+    let error_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(is_unexpected_dispatcher_output(&error_2));
+
+    let error_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(is_unexpected_dispatcher_output(&error_1));
+
+    assert!(matches!(
+        client.health_error().as_deref(),
+        Some(error) if is_unexpected_dispatcher_output(error)
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn client_fail_closes_when_main_output_path_receives_mixed_shape_output() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-0".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.client_index, 7);
+                assert_eq!(request_1.request_id, "req-1");
+
+                let add_2 = recv_engine_message(dealer).await;
+                assert_eq!(add_2[0].as_ref(), &[0x00]);
+                let request_2: EngineCoreRequest = rmp_serde::from_slice(&add_2[1]).unwrap();
+                assert_eq!(request_2.client_index, 7);
+                assert_eq!(request_2.request_id, "req-2");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: 1_u64.into(),
+                            failure_message: None,
+                            result: None,
+                        }),
+                        outputs: vec![request_output("req-1", vec![999], None)],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                tokio::time::sleep(Duration::from_millis(50)).await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            7,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+    assert_eq!(client.engine_identities()[0], b"engine-0");
+    assert!(client.ready_responses()[0].max_model_len > 0);
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+
+    let error_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(is_unexpected_dispatcher_output(&error_2));
+
+    let error_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(is_unexpected_dispatcher_output(&error_1));
+
+    assert!(matches!(
+        client.health_error().as_deref(),
+        Some(error) if is_unexpected_dispatcher_output(error)
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn duplicate_request_ids_are_rejected_without_sending_a_second_add() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-dup".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.request_id, "req-1");
+
+                assert!(timeout(Duration::from_millis(200), dealer.recv()).await.is_err());
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            "req-1",
+                            vec![],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream = client.call(sample_request()).await.unwrap();
+    let error = match client.call(sample_request()).await {
+        Ok(_) => panic!("expected duplicate request error"),
+        Err(error) => error,
+    };
+    assert!(matches!(
+        error,
+        Error::DuplicateRequestId { request_id } if request_id == "req-1"
+    ));
+
+    let final_output =
+        timeout(Duration::from_secs(1), stream.next()).await.unwrap().unwrap().unwrap();
+    assert_eq!(
+        final_output.finish_reason,
+        Some(EngineCoreFinishReason::Length)
+    );
+    assert!(timeout(Duration::from_secs(1), stream.next()).await.unwrap().is_none());
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn finished_requests_without_final_output_is_treated_as_unexpected_close() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-finished-only".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                assert!(timeout(Duration::from_millis(200), dealer.recv()).await.is_err());
+                let _ = push;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream = client.call(sample_request()).await.unwrap();
+    let error = timeout(Duration::from_secs(1), stream.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(matches!(
+        error,
+        Error::RequestStreamClosed { request_id } if request_id == "req-1"
+    ));
+    assert!(timeout(Duration::from_secs(1), stream.next()).await.unwrap().is_none());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn dropping_a_live_stream_triggers_abort() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-drop".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output("req-1", vec![99], None)],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let abort =
+                    timeout(Duration::from_secs(1), recv_engine_message(dealer)).await.unwrap();
+                assert_eq!(abort[0].as_ref(), &[0x01]);
+                let aborted_ids: Vec<String> = rmp_serde::from_slice(&abort[1]).unwrap();
+                assert_eq!(aborted_ids, vec!["req-1".to_string()]);
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream = client.call(sample_request()).await.unwrap();
+    let first = timeout(Duration::from_secs(1), stream.next()).await.unwrap().unwrap().unwrap();
+    assert_eq!(first.new_token_ids, vec![99]);
+    drop(stream);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn dispatcher_failure_propagates_to_streams_and_future_calls() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-fail".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let _ = recv_engine_message(dealer).await;
+                let _ = recv_engine_message(dealer).await;
+
+                push.send(ZmqMessage::from(vec![0xc1])).await.unwrap();
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+
+    let error_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    let error_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap_err();
+    assert!(is_decode_error(&error_1));
+    assert!(is_decode_error(&error_2));
+    assert!(is_decode_error(
+        client.health_error().as_deref().expect("health error recorded")
+    ));
+
+    let abort_error = client.abort(&["req-1".to_string()]).await.unwrap_err();
+    assert!(is_decode_error(&abort_error));
+
+    let add_error = match client.call(sample_request_with_id("req-3")).await {
+        Ok(_) => panic!("expected dispatcher closed error"),
+        Err(error) => error,
+    };
+    assert!(is_decode_error(&add_error));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn is_sleeping_wrapper_sends_typed_request_and_returns_typed_response() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-utility-success".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+
+                let payload = decode_value(&utility[1]);
+                let array = match payload {
+                    Value::Array(array) => array,
+                    other => panic!("expected utility payload array, got {other:?}"),
+                };
+                assert_eq!(array.len(), 4);
+                assert_eq!(array[0], Value::from(5));
+                let call_id = array[1].as_u64().expect("call_id");
+                assert_eq!(array[2], Value::from("is_sleeping"));
+                assert_eq!(array[3], Value::Array(Vec::new()));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: None,
+                            result: Some(utility_result_value(true)),
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            5,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let result = client.is_sleeping().await.unwrap();
+    assert!(result);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn call_utility_failure_message_surfaces_as_error() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-utility-fail".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+                let payload = decode_value(&utility[1]);
+                let call_id =
+                    payload.as_array().and_then(|array| array[1].as_u64()).expect("call_id");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: Some("boom".to_string()),
+                            result: None,
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let error = client.call_utility::<bool, _>("is_sleeping", ()).await.unwrap_err();
+    assert!(matches!(
+        error,
+        Error::UtilityCallFailed {
+            method,
+            message,
+            ..
+        } if method == "is_sleeping" && message == "boom"
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn dispatcher_failure_propagates_to_waiting_utility_calls() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-utility-dispatcher-fail".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+
+                push.send(ZmqMessage::from(vec![0xc1])).await.unwrap();
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let error = client.call_utility::<bool, _>("is_sleeping", ()).await.unwrap_err();
+    assert!(is_decode_error(&error));
+    assert!(is_decode_error(
+        client.health_error().as_deref().expect("health error recorded")
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn connect_times_out_without_ready_message() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_handshake = handshake_address.clone();
+    let engine_task = tokio::spawn(async move {
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let mut options = SocketOptions::default();
+        options.peer_identity(PeerIdentity::try_from(b"engine-timeout".to_vec()).unwrap());
+        let mut handshake = DealerSocket::with_options(options);
+        handshake.connect(&engine_handshake).await.unwrap();
+        handshake
+            .send(ZmqMessage::from(
+                rmp_serde::to_vec_named(&ready_message("HELLO")).unwrap(),
+            ))
+            .await
+            .unwrap();
+
+        let _ = handshake.recv().await.unwrap();
+    });
+
+    let result = EngineCoreClient::connect(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_millis(100),
+            0,
+            None,
+        )
+        .with_local_input_output_addresses(Some(ipc.input_endpoint()), Some(ipc.output_endpoint())),
+    )
+    .await;
+
+    let error = match result {
+        Ok(_) => panic!("expected ready timeout"),
+        Err(error) => error,
+    };
+
+    let message = error.to_report_string();
+    assert!(message.contains("timed out"));
+    assert!(message.contains("READY"));
+    engine_task.await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn engine_core_dead_sentinel_marks_client_unhealthy_and_sticks() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-dead".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |_dealer, push| {
+            Box::pin(async move {
+                push.send(ZmqMessage::from(ENGINE_CORE_DEAD_SENTINEL.to_vec())).await.unwrap();
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    timeout(Duration::from_secs(2), async {
+        while client.is_healthy() {
+            tokio::task::yield_now().await;
+        }
+    })
+    .await
+    .expect("wait for unhealthy client");
+
+    assert!(!client.is_healthy());
+    assert!(matches!(
+        client.health_error().as_deref(),
+        Some(Error::EngineCoreDead)
+    ));
+
+    let error = client.call_utility::<bool, _>("is_sleeping", ()).await.unwrap_err();
+    assert!(
+        is_dispatcher_closed(&error) || is_engine_core_dead(&error),
+        "unexpected error: {error:?}"
+    );
+    assert!(matches!(
+        client.health_error().as_deref(),
+        Some(Error::EngineCoreDead)
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn output_loop_failure_marks_client_unhealthy_and_records_first_error() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-output-failure".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |_dealer, push| {
+            Box::pin(async move {
+                send_output_frames(
+                    push,
+                    vec![
+                        bytes::Bytes::from_static(b"frame-1"),
+                        bytes::Bytes::from_static(b"frame-2"),
+                    ],
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    timeout(Duration::from_secs(2), async {
+        while client.is_healthy() {
+            let _ = client.call_utility::<bool, _>("is_sleeping", ()).await;
+            tokio::task::yield_now().await;
+        }
+    })
+    .await
+    .expect("wait for unhealthy client");
+
+    assert!(!client.is_healthy());
+    assert!(is_decode_error(
+        client.health_error().as_deref().expect("health error recorded")
+    ));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn client_decodes_multipart_logprob_outputs() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-multipart-logprobs".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.request_id, "req-1");
+
+                send_output_frames(push, multipart_logprob_output_frames("req-1")).await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            1,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let stream = client.call(sample_request()).await.unwrap();
+    let outputs = stream.collect::<Vec<_>>().await;
+    assert_eq!(outputs.len(), 1);
+
+    let output = outputs.into_iter().next().unwrap().unwrap();
+    assert_eq!(output.output.new_token_ids, vec![7, 8]);
+    assert_eq!(
+        output.output.finish_reason,
+        Some(EngineCoreFinishReason::Length)
+    );
+    expect_sample_logprobs(output.output.new_logprobs.as_ref().expect("logprobs decoded"));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn multi_engine_client_shares_transport_and_routes_by_inflight_count() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let (engine_0_seen_tx, mut engine_0_seen_rx) = mpsc::unbounded_channel();
+    let (engine_1_seen_tx, engine_1_seen_rx) = oneshot::channel();
+    let (finish_req_1_tx, finish_req_1_rx) = oneshot::channel();
+    let (finish_req_2_tx, finish_req_2_rx) = oneshot::channel();
+    let (finish_req_3_tx, finish_req_3_rx) = oneshot::channel();
+
+    let (init_rx_0, shutdown_tx_0, engine_task_0) = spawn_mock_engine_task_with_init(
+        handshake_address.clone(),
+        b"engine-0".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.request_id, "req-1");
+                engine_0_seen_tx.send(request_1.request_id.clone()).unwrap();
+                finish_req_1_rx.await.unwrap();
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output(
+                            &request_1.request_id,
+                            vec![10],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request_1.request_id.clone()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let add_3 = recv_engine_message(dealer).await;
+                assert_eq!(add_3[0].as_ref(), &[0x00]);
+                let request_3: EngineCoreRequest = rmp_serde::from_slice(&add_3[1]).unwrap();
+                assert_eq!(request_3.request_id, "req-3");
+                engine_0_seen_tx.send(request_3.request_id.clone()).unwrap();
+                finish_req_3_rx.await.unwrap();
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output(
+                            &request_3.request_id,
+                            vec![30],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request_3.request_id.clone()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+    let (init_rx_1, shutdown_tx_1, engine_task_1) = spawn_mock_engine_task_with_init(
+        handshake_address.clone(),
+        b"engine-1".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_2 = recv_engine_message(dealer).await;
+                assert_eq!(add_2[0].as_ref(), &[0x00]);
+                let request_2: EngineCoreRequest = rmp_serde::from_slice(&add_2[1]).unwrap();
+                assert_eq!(request_2.request_id, "req-2");
+                let _ = engine_1_seen_tx.send(request_2.request_id.clone());
+                finish_req_2_rx.await.unwrap();
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 1,
+                        outputs: vec![request_output(
+                            &request_2.request_id,
+                            vec![20],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request_2.request_id.clone()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address.clone(),
+            2,
+            "test-model",
+            Duration::from_secs(2),
+            0,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let init_0 = timeout(Duration::from_secs(1), init_rx_0).await.unwrap().unwrap();
+    let init_1 = timeout(Duration::from_secs(1), init_rx_1).await.unwrap().unwrap();
+    assert_eq!(init_0.addresses.inputs, vec![ipc.input_endpoint()]);
+    assert_eq!(init_1.addresses.inputs, vec![ipc.input_endpoint()]);
+    assert_eq!(init_0.addresses.outputs, vec![ipc.output_endpoint()]);
+    assert_eq!(init_1.addresses.outputs, vec![ipc.output_endpoint()]);
+
+    assert_eq!(client.input_address(), ipc.input_endpoint());
+    assert_eq!(client.output_address(), ipc.output_endpoint());
+    assert_eq!(client.engine_count(), 2);
+    assert_eq!(
+        client.engine_identities(),
+        vec![b"engine-0".as_slice(), b"engine-1".as_slice()]
+    );
+    assert_eq!(client.ready_responses().len(), 2);
+    assert_eq!(client.engine_identities()[0], b"engine-0");
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+    assert_eq!(
+        timeout(Duration::from_secs(1), engine_0_seen_rx.recv()).await.unwrap().unwrap(),
+        "req-1"
+    );
+    assert_eq!(
+        timeout(Duration::from_secs(1), engine_1_seen_rx).await.unwrap().unwrap(),
+        "req-2"
+    );
+
+    let _ = finish_req_1_tx.send(());
+    let final_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_1.engine_index, 0);
+    assert_eq!(final_1.new_token_ids, vec![10]);
+    assert_eq!(final_1.finish_reason, Some(EngineCoreFinishReason::Length));
+
+    let mut stream_3 = client.call(sample_request_with_id("req-3")).await.unwrap();
+    assert_eq!(
+        timeout(Duration::from_secs(1), engine_0_seen_rx.recv()).await.unwrap().unwrap(),
+        "req-3"
+    );
+
+    let _ = finish_req_3_tx.send(());
+    let final_3 = timeout(Duration::from_secs(1), stream_3.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_3.engine_index, 0);
+    assert_eq!(final_3.new_token_ids, vec![30]);
+    assert_eq!(final_3.finish_reason, Some(EngineCoreFinishReason::Length));
+
+    let _ = finish_req_2_tx.send(());
+    let final_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_2.engine_index, 1);
+    assert_eq!(final_2.new_token_ids, vec![20]);
+    assert_eq!(final_2.finish_reason, Some(EngineCoreFinishReason::Length));
+
+    assert!(timeout(Duration::from_secs(1), stream_1.next()).await.unwrap().is_none());
+    assert!(timeout(Duration::from_secs(1), stream_2.next()).await.unwrap().is_none());
+    assert!(timeout(Duration::from_secs(1), stream_3.next()).await.unwrap().is_none());
+
+    let _ = shutdown_tx_0.send(());
+    let _ = shutdown_tx_1.send(());
+    engine_task_0.await.unwrap();
+    engine_task_1.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn multi_engine_abort_is_grouped_and_utility_fans_out_to_all_engines() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+
+    let (shutdown_tx_0, engine_task_0) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        b"engine-0".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+                let payload = decode_value(&utility[1]);
+                let array = match payload {
+                    Value::Array(array) => array,
+                    other => panic!("expected utility payload array, got {other:?}"),
+                };
+                let call_id = array[1].as_u64().expect("call_id");
+                assert_eq!(array[2], Value::from("is_sleeping"));
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: None,
+                            result: Some(utility_result_value(true)),
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.request_id, "req-1");
+
+                let abort = recv_engine_message(dealer).await;
+                assert_eq!(abort[0].as_ref(), &[0x01]);
+                let aborted_ids: Vec<String> = rmp_serde::from_slice(&abort[1]).unwrap();
+                assert_eq!(aborted_ids, vec!["req-1".to_string()]);
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output(
+                            "req-1",
+                            vec![],
+                            Some(EngineCoreFinishReason::Abort),
+                        )],
+                        finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+    tokio::time::sleep(Duration::from_millis(50)).await;
+    let (shutdown_tx_1, engine_task_1) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        b"engine-1".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+                let payload = decode_value(&utility[1]);
+                let array = match payload {
+                    Value::Array(array) => array,
+                    other => panic!("expected utility payload array, got {other:?}"),
+                };
+                let call_id = array[1].as_u64().expect("call_id");
+                assert_eq!(array[2], Value::from("is_sleeping"));
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: None,
+                            result: Some(utility_result_value(true)),
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let add_2 = recv_engine_message(dealer).await;
+                assert_eq!(add_2[0].as_ref(), &[0x00]);
+                let request_2: EngineCoreRequest = rmp_serde::from_slice(&add_2[1]).unwrap();
+                assert_eq!(request_2.request_id, "req-2");
+
+                let abort = recv_engine_message(dealer).await;
+                assert_eq!(abort[0].as_ref(), &[0x01]);
+                let aborted_ids: Vec<String> = rmp_serde::from_slice(&abort[1]).unwrap();
+                assert_eq!(aborted_ids, vec!["req-2".to_string()]);
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 1,
+                        outputs: vec![request_output(
+                            "req-2",
+                            vec![],
+                            Some(EngineCoreFinishReason::Abort),
+                        )],
+                        finished_requests: Some(BTreeSet::from(["req-2".to_string()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            2,
+            "test-model",
+            Duration::from_secs(2),
+            5,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    assert!(client.is_sleeping().await.unwrap());
+
+    let mut stream_1 = client.call(sample_request_with_id("req-1")).await.unwrap();
+    let mut stream_2 = client.call(sample_request_with_id("req-2")).await.unwrap();
+
+    client
+        .abort(&[
+            "req-2".to_string(),
+            "req-1".to_string(),
+            "unknown".to_string(),
+        ])
+        .await
+        .unwrap();
+
+    let final_1 = timeout(Duration::from_secs(1), stream_1.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_1.engine_index, 0);
+    assert_eq!(final_1.finish_reason, Some(EngineCoreFinishReason::Abort));
+
+    let final_2 = timeout(Duration::from_secs(1), stream_2.next())
+        .await
+        .unwrap()
+        .unwrap()
+        .unwrap();
+    assert_eq!(final_2.engine_index, 1);
+    assert_eq!(final_2.finish_reason, Some(EngineCoreFinishReason::Abort));
+
+    let _ = shutdown_tx_0.send(());
+    let _ = shutdown_tx_1.send(());
+    engine_task_0.await.unwrap();
+    engine_task_1.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn collective_rpc_flattens_results_from_all_engines() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+
+    let (shutdown_tx_0, engine_task_0) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        b"engine-0".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+                let payload = decode_value(&utility[1]);
+                let array = match payload {
+                    Value::Array(array) => array,
+                    other => panic!("expected utility payload array, got {other:?}"),
+                };
+                let call_id = array[1].as_u64().expect("call_id");
+                assert_eq!(array[2], Value::from("collective_rpc"));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: None,
+                            result: Some(utility_result_value(vec!["engine-0-worker"])),
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+    tokio::time::sleep(Duration::from_millis(50)).await;
+    let (shutdown_tx_1, engine_task_1) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        b"engine-1".to_vec(),
+        |dealer, push| {
+            Box::pin(async move {
+                let utility = recv_engine_message(dealer).await;
+                assert_eq!(utility[0].as_ref(), &[0x03]);
+                let payload = decode_value(&utility[1]);
+                let array = match payload {
+                    Value::Array(array) => array,
+                    other => panic!("expected utility payload array, got {other:?}"),
+                };
+                let call_id = array[1].as_u64().expect("call_id");
+                assert_eq!(array[2], Value::from("collective_rpc"));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        utility_output: Some(UtilityOutput {
+                            call_id: call_id.into(),
+                            failure_message: None,
+                            result: Some(utility_result_value(vec!["engine-1-worker"])),
+                        }),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let client = connect_client_with_ipc(
+        handshake_test_config(
+            handshake_address,
+            2,
+            "test-model",
+            Duration::from_secs(2),
+            5,
+            None,
+        ),
+        &ipc,
+    )
+    .await;
+
+    let results = client
+        .collective_rpc(
+            "get_model_name",
+            Option::<f64>::None,
+            Vec::<String>::new(),
+            BTreeMap::<String, String>::new(),
+        )
+        .await
+        .unwrap();
+    assert_eq!(
+        results,
+        vec![
+            Value::from("engine-0-worker"),
+            Value::from("engine-1-worker")
+        ]
+    );
+
+    let _ = shutdown_tx_0.send(());
+    let _ = shutdown_tx_1.send(());
+    engine_task_0.await.unwrap();
+    engine_task_1.await.unwrap();
+    client.shutdown().await.unwrap();
+}
+
+#[test]
+fn python_msgpack_fixtures_match_rust_encoding() {
+    init_tracing();
+    let script = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/tests/python_compat.py");
+    let output = Command::new(&script)
+        .output()
+        .unwrap_or_else(|error| panic!("failed to execute {:?}: {error}", script));
+    assert!(
+        output.status.success(),
+        "python fixture script failed: status={:?}\nstdout:\n{}\nstderr:\n{}",
+        output.status.code(),
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr),
+    );
+
+    let stdout = String::from_utf8(output.stdout).unwrap();
+    let mut lines = stdout.lines();
+    let request_hex = lines.next().expect("missing request fixture line");
+    let multimodal_request_hex = lines.next().expect("missing multimodal request fixture line");
+    let outputs_hex = lines.next().expect("missing outputs fixture line");
+    let inline_logprobs_frames = lines.next().expect("missing inline logprobs fixture line");
+    let multipart_logprobs_frames = lines.next().expect("missing multipart logprobs fixture line");
+    let inline_prompt_frames = lines.next().expect("missing inline prompt logprobs fixture line");
+    let multipart_prompt_frames =
+        lines.next().expect("missing multipart prompt logprobs fixture line");
+
+    let request_bytes = hex::decode(request_hex).unwrap();
+    let multimodal_request_bytes = hex::decode(multimodal_request_hex).unwrap();
+    let outputs_bytes = hex::decode(outputs_hex).unwrap();
+
+    let decoded_request: EngineCoreRequest = rmp_serde::from_slice(&request_bytes).unwrap();
+    let expected_request = sample_request();
+    assert_eq!(decoded_request, expected_request);
+
+    let decoded_multimodal_request: EngineCoreRequest =
+        rmp_serde::from_slice(&multimodal_request_bytes).unwrap();
+    assert_eq!(decoded_multimodal_request, sample_multimodal_request());
+
+    // The decode assertion above proves Python wire -> Rust struct. Also compare
+    // Rust struct -> wire for the multimodal subtree, which is the frontend's
+    // production direction when sending requests to Python EngineCore.
+    let expected_multimodal_request = sample_multimodal_request();
+    let decode_value = |bytes: &[u8]| {
+        rmpv::decode::read_value(&mut Cursor::new(bytes)).expect("decode msgpack value")
+    };
+    let extract_mm_features = |value: Value| match value {
+        Value::Array(items) => items.get(2).cloned().expect("request mm_features slot"),
+        other => panic!("request should encode as tuple array, got {other:?}"),
+    };
+    let python_mm_features = extract_mm_features(decode_value(&multimodal_request_bytes));
+    let rust_mm_features =
+        decode_value(&rmp_serde::to_vec_named(&expected_multimodal_request.mm_features).unwrap());
+    assert_eq!(python_mm_features, rust_mm_features);
+
+    let decoded_outputs: EngineCoreOutputs = rmp_serde::from_slice(&outputs_bytes).unwrap();
+    expect_test::expect![[r#"
+        EngineCoreOutputs {
+            engine_index: 0,
+            outputs: [
+                EngineCoreOutput {
+                    request_id: "req-1",
+                    new_token_ids: [
+                        7,
+                        8,
+                    ],
+                    new_logprobs: None,
+                    new_prompt_logprobs_tensors: None,
+                    pooling_output: None,
+                    finish_reason: Some(
+                        Length,
+                    ),
+                    stop_reason: None,
+                    events: None,
+                    kv_transfer_params: None,
+                    trace_headers: None,
+                    prefill_stats: None,
+                    routed_experts: None,
+                    num_nans_in_logits: 0,
+                },
+            ],
+            scheduler_stats: None,
+            timestamp: 0.0,
+            utility_output: None,
+            finished_requests: Some(
+                {
+                    "req-1",
+                },
+            ),
+            wave_complete: None,
+            start_wave: None,
+        }
+    "#]]
+    .assert_debug_eq(&decoded_outputs);
+
+    let decode_frames = |line: &str| {
+        line.split_whitespace()
+            .map(|frame| bytes::Bytes::from(hex::decode(frame).unwrap()))
+            .collect::<Vec<_>>()
+    };
+
+    let inline_logprobs =
+        decode_engine_core_outputs(&decode_frames(inline_logprobs_frames)).unwrap();
+    expect_sample_logprobs(
+        inline_logprobs.outputs[0]
+            .new_logprobs
+            .as_ref()
+            .expect("inline logprobs decoded"),
+    );
+
+    let multipart_logprobs =
+        decode_engine_core_outputs(&decode_frames(multipart_logprobs_frames)).unwrap();
+    expect_sample_logprobs(
+        multipart_logprobs.outputs[0]
+            .new_logprobs
+            .as_ref()
+            .expect("multipart logprobs decoded"),
+    );
+
+    let inline_prompt = decode_engine_core_outputs(&decode_frames(inline_prompt_frames)).unwrap();
+    expect_prompt_logprobs(
+        inline_prompt.outputs[0]
+            .new_prompt_logprobs_tensors
+            .as_ref()
+            .expect("inline prompt logprobs decoded"),
+    );
+
+    let multipart_prompt =
+        decode_engine_core_outputs(&decode_frames(multipart_prompt_frames)).unwrap();
+    expect_prompt_logprobs(
+        multipart_prompt.outputs[0]
+            .new_prompt_logprobs_tensors
+            .as_ref()
+            .expect("multipart prompt logprobs decoded"),
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_connects_after_single_engine_registration() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let input_address = ipc.input_endpoint();
+    let output_address = ipc.output_endpoint();
+
+    let client_task = tokio::spawn({
+        let input_address = input_address.clone();
+        let output_address = output_address.clone();
+        async move {
+            EngineCoreClient::connect(bootstrapped_test_config(
+                input_address,
+                output_address,
+                1,
+                Duration::from_secs(2),
+                0,
+                None,
+            ))
+            .await
+            .unwrap()
+        }
+    });
+
+    let (_dealer, _push) =
+        setup_bootstrapped_mock_engine(input_address, output_address, &[0x00, 0x00]).await;
+    let client = client_task.await.unwrap();
+
+    assert_eq!(client.engine_count(), 1);
+    let engine_ids =
+        client.engine_identities().into_iter().map(|id| id.to_vec()).collect::<Vec<_>>();
+    assert_eq!(engine_ids, vec![vec![0x00, 0x00]]);
+
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_connects_with_contiguous_engine_ids() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let input_address = ipc.input_endpoint();
+    let output_address = ipc.output_endpoint();
+
+    let client_task = tokio::spawn({
+        let input_address = input_address.clone();
+        let output_address = output_address.clone();
+        async move {
+            EngineCoreClient::connect(bootstrapped_test_config(
+                input_address,
+                output_address,
+                2,
+                Duration::from_secs(2),
+                0,
+                None,
+            ))
+            .await
+            .unwrap()
+        }
+    });
+
+    let (_dealer0, _push0) = setup_bootstrapped_mock_engine(
+        input_address.clone(),
+        output_address.clone(),
+        &[0x00, 0x00],
+    )
+    .await;
+    let (_dealer1, _push1) =
+        setup_bootstrapped_mock_engine(input_address, output_address, &[0x01, 0x00]).await;
+    let client = client_task.await.unwrap();
+
+    assert_eq!(client.engine_count(), 2);
+    let engine_ids =
+        client.engine_identities().into_iter().map(|id| id.to_vec()).collect::<Vec<_>>();
+    assert_eq!(engine_ids, vec![vec![0x00, 0x00], vec![0x01, 0x00]]);
+
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_connect_times_out_without_registration() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let result = EngineCoreClient::connect(bootstrapped_test_config(
+        ipc.input_endpoint(),
+        ipc.output_endpoint(),
+        1,
+        Duration::from_millis(100),
+        0,
+        None,
+    ))
+    .await;
+
+    let error = match result {
+        Ok(_) => panic!("bootstrapped connect should time out"),
+        Err(error) => error,
+    };
+    assert!(matches!(error, Error::InputRegistrationTimeout { .. }));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_external_coordinator_connects_and_subscribes() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let input_address = ipc.input_endpoint();
+    let output_address = ipc.output_endpoint();
+    let coordinator_address = ipc.endpoint("stats.sock");
+
+    let mut stats_socket = XPubSocket::new();
+    stats_socket.bind(&coordinator_address).await.unwrap();
+
+    let client_task = tokio::spawn({
+        let input_address = input_address.clone();
+        let output_address = output_address.clone();
+        let coordinator_address = coordinator_address.clone();
+        async move {
+            EngineCoreClient::connect(bootstrapped_test_config(
+                input_address,
+                output_address,
+                1,
+                Duration::from_secs(2),
+                0,
+                Some(CoordinatorMode::External {
+                    address: coordinator_address,
+                }),
+            ))
+            .await
+            .unwrap()
+        }
+    });
+
+    let (_dealer, _push) =
+        setup_bootstrapped_mock_engine(input_address, output_address, &[0x00, 0x00]).await;
+    let client = client_task.await.unwrap();
+
+    timeout(
+        Duration::from_secs(1),
+        recv_xpub_subscription(&mut stats_socket),
+    )
+    .await
+    .unwrap();
+
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_external_coordinator_updates_wave_ignores_counts_and_sends_one_wakeup() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let input_address = ipc.input_endpoint();
+    let output_address = ipc.output_endpoint();
+    let coordinator_address = ipc.endpoint("stats.sock");
+
+    let mut stats_socket = XPubSocket::new();
+    stats_socket.bind(&coordinator_address).await.unwrap();
+
+    let client_task = tokio::spawn({
+        let input_address = input_address.clone();
+        let output_address = output_address.clone();
+        let coordinator_address = coordinator_address.clone();
+        async move {
+            EngineCoreClient::connect(bootstrapped_test_config(
+                input_address,
+                output_address,
+                1,
+                Duration::from_secs(2),
+                0,
+                Some(CoordinatorMode::External {
+                    address: coordinator_address,
+                }),
+            ))
+            .await
+            .unwrap()
+        }
+    });
+
+    let (mut dealer, mut push) =
+        setup_bootstrapped_mock_engine(input_address, output_address, &[0x00, 0x00]).await;
+    let client = client_task.await.unwrap();
+    recv_xpub_subscription(&mut stats_socket).await;
+
+    send_external_coordinator_publish(&mut stats_socket, &(vec![(11_u32, 3_u32)], 7_u32, false))
+        .await;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    let mut stream = client.call(sample_request()).await.unwrap();
+
+    let wakeup = timeout(
+        Duration::from_secs(1),
+        recv_external_coordinator_wakeup(&mut stats_socket),
+    )
+    .await
+    .unwrap();
+    assert_eq!(wakeup, (0, 7));
+
+    assert!(
+        timeout(
+            Duration::from_millis(200),
+            recv_external_coordinator_wakeup(&mut stats_socket)
+        )
+        .await
+        .is_err()
+    );
+
+    let add = recv_engine_message(&mut dealer).await;
+    assert_eq!(add[0].as_ref(), &[0x00]);
+    let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+    assert_eq!(request.request_id, "req-1");
+    assert_eq!(request.current_wave, 7);
+    assert!(client.is_healthy());
+
+    send_outputs(
+        &mut push,
+        EngineCoreOutputs {
+            engine_index: 0,
+            outputs: vec![request_output(
+                "req-1",
+                vec![],
+                Some(EngineCoreFinishReason::Length),
+            )],
+            finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+            ..Default::default()
+        },
+    )
+    .await;
+
+    let final_output = timeout(Duration::from_secs(1), stream.next()).await.unwrap();
+    assert!(final_output.is_some());
+
+    client.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn bootstrapped_external_coordinator_running_state_suppresses_wakeup() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let input_address = ipc.input_endpoint();
+    let output_address = ipc.output_endpoint();
+    let coordinator_address = ipc.endpoint("stats.sock");
+
+    let mut stats_socket = XPubSocket::new();
+    stats_socket.bind(&coordinator_address).await.unwrap();
+
+    let client_task = tokio::spawn({
+        let input_address = input_address.clone();
+        let output_address = output_address.clone();
+        let coordinator_address = coordinator_address.clone();
+        async move {
+            EngineCoreClient::connect(bootstrapped_test_config(
+                input_address,
+                output_address,
+                1,
+                Duration::from_secs(2),
+                0,
+                Some(CoordinatorMode::External {
+                    address: coordinator_address,
+                }),
+            ))
+            .await
+            .unwrap()
+        }
+    });
+
+    let (mut dealer, mut push) =
+        setup_bootstrapped_mock_engine(input_address, output_address, &[0x00, 0x00]).await;
+    let client = client_task.await.unwrap();
+    recv_xpub_subscription(&mut stats_socket).await;
+
+    send_external_coordinator_publish(&mut stats_socket, &(Value::Nil, 5_u32, true)).await;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    let mut stream = client.call(sample_request()).await.unwrap();
+
+    assert!(
+        timeout(
+            Duration::from_millis(200),
+            recv_external_coordinator_wakeup(&mut stats_socket)
+        )
+        .await
+        .is_err()
+    );
+
+    let add = recv_engine_message(&mut dealer).await;
+    let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+    assert_eq!(request.current_wave, 5);
+
+    send_outputs(
+        &mut push,
+        EngineCoreOutputs {
+            engine_index: 0,
+            outputs: vec![request_output(
+                "req-1",
+                vec![],
+                Some(EngineCoreFinishReason::Length),
+            )],
+            finished_requests: Some(BTreeSet::from(["req-1".to_string()])),
+            ..Default::default()
+        },
+    )
+    .await;
+
+    let final_output = timeout(Duration::from_secs(1), stream.next()).await.unwrap();
+    assert!(final_output.is_some());
+
+    client.shutdown().await.unwrap();
+}
diff --git a/rust/src/engine-core-client/src/tests/mod.rs b/rust/src/engine-core-client/src/tests/mod.rs
new file mode 100644
index 000000000000..b79c47fca368
--- /dev/null
+++ b/rust/src/engine-core-client/src/tests/mod.rs
@@ -0,0 +1 @@
+mod client;
diff --git a/rust/src/engine-core-client/src/tests/python_compat.py b/rust/src/engine-core-client/src/tests/python_compat.py
new file mode 100755
index 000000000000..bb81a6df1ada
--- /dev/null
+++ b/rust/src/engine-core-client/src/tests/python_compat.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env -S uv run
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "msgspec>=0.19,<1",
+#   "msgpack>=1,<2",
+#   "numpy>=2,<3",
+# ]
+# ///
+
+from enum import Enum, IntEnum
+
+import msgpack
+import msgspec
+import numpy as np
+
+
+class RequestOutputKind(Enum):
+    DELTA = 1
+    FINAL_ONLY = 2
+
+
+class FinishReason(IntEnum):
+    STOP = 0
+    LENGTH = 1
+    ABORT = 2
+    ERROR = 3
+    REPETITION = 4
+
+
+class EngineCoreSamplingParams(msgspec.Struct, dict=True):
+    temperature: float = 1.0
+    top_p: float = 1.0
+    top_k: int = 0
+    seed: int | None = None
+    max_tokens: int = 65536
+    min_tokens: int = 0
+    min_p: float = 0.0
+    frequency_penalty: float = 0.0
+    presence_penalty: float = 0.0
+    repetition_penalty: float = 1.0
+    stop_token_ids: list[int] = []
+    _eos_token_id: int | None = None
+    _all_stop_token_ids: set[int] = set()
+    output_kind: RequestOutputKind = RequestOutputKind.DELTA
+
+
+class EngineCoreRequest(
+    msgspec.Struct,
+    array_like=True,
+    omit_defaults=True,
+):
+    request_id: str
+    prompt_token_ids: list[int] | None
+    mm_features: object | None
+    sampling_params: EngineCoreSamplingParams | None
+    pooling_params: object | None
+    arrival_time: float
+    lora_request: object | None = None
+    cache_salt: str | None = None
+    data_parallel_rank: int | None = None
+    prompt_embeds: object | None = None
+    prompt_is_token_ids: list[bool] | None = None
+    client_index: int = 0
+    current_wave: int = 0
+    priority: int = 0
+    trace_headers: dict[str, str] | None = None
+    resumable: bool = False
+    external_req_id: str | None = None
+    reasoning_ended: bool | None = None
+    reasoning_parser_kwargs: dict[str, object] | None = None
+    abort_immediately: bool = False
+
+
+class EngineCoreOutput(
+    msgspec.Struct,
+    array_like=True,
+    omit_defaults=True,
+):
+    request_id: str
+    new_token_ids: list[int]
+    new_logprobs: object | None = None
+    new_prompt_logprobs_tensors: object | None = None
+    pooling_output: object | None = None
+    finish_reason: FinishReason | None = None
+    stop_reason: int | str | None = None
+    events: object | None = None
+    kv_transfer_params: object | None = None
+    trace_headers: object | None = None
+    prefill_stats: object | None = None
+    routed_experts: object | None = None
+    num_nans_in_logits: int = 0
+
+
+class EngineCoreOutputs(
+    msgspec.Struct,
+    array_like=True,
+    omit_defaults=True,
+):
+    engine_index: int = 0
+    outputs: list[EngineCoreOutput] = []
+    scheduler_stats: object | None = None
+    timestamp: float = 0.0
+    utility_output: object | None = None
+    finished_requests: set[str] | None = None
+    wave_complete: int | None = None
+    start_wave: int | None = None
+
+
+request = EngineCoreRequest(
+    request_id="req-1",
+    prompt_token_ids=[11, 22],
+    mm_features=None,
+    sampling_params=EngineCoreSamplingParams(
+        temperature=0.8,
+        top_p=0.9,
+        top_k=8,
+        seed=None,
+        max_tokens=32,
+        min_tokens=1,
+        min_p=0.0,
+        frequency_penalty=0.0,
+        presence_penalty=0.0,
+        repetition_penalty=1.0,
+        stop_token_ids=[151643],
+        _eos_token_id=151645,
+        _all_stop_token_ids={151643, 151645},
+        output_kind=RequestOutputKind.FINAL_ONLY,
+    ),
+    pooling_params=None,
+    arrival_time=42.5,
+    client_index=0,
+)
+
+multimodal_tensor = np.array([[1.0, 2.0], [3.5, 4.25]], dtype=np.float32)
+multimodal_features = [
+    {
+        "data": {
+            "pixel_values": {
+                "data": [
+                    "float32",
+                    [2, 2],
+                    msgpack.ExtType(3, multimodal_tensor.tobytes()),
+                ],
+                "field": [
+                    "flat",
+                    {
+                        "slices": [[0, 2, None]],
+                        "dim": 0,
+                        "keep_on_cpu": False,
+                    },
+                ],
+            }
+        },
+        "modality": "image",
+        "identifier": "mm-cache-key",
+        "mm_position": {
+            "offset": 1,
+            "length": 2,
+            "is_embed": None,
+        },
+        "mm_hash": "processor-hash",
+    }
+]
+multimodal_request_wire = [
+    "req-mm",
+    [101, 102, 103, 104],
+    multimodal_features,
+    None,
+    None,
+    43.5,
+]
+
+outputs = EngineCoreOutputs(
+    outputs=[
+        EngineCoreOutput(
+            request_id="req-1",
+            new_token_ids=[7, 8],
+            finish_reason=FinishReason.LENGTH,
+        )
+    ],
+    finished_requests={"req-1"},
+)
+
+
+def encode_ndarray(
+    array: np.ndarray,
+    buffers: list[bytes],
+    *,
+    size_threshold: int = 256,
+):
+    arr_data = array.data if array.flags.c_contiguous else array.tobytes()
+    if not array.shape or array.nbytes < size_threshold:
+        data = msgpack.ExtType(3, bytes(arr_data))
+    else:
+        data = len(buffers)
+        buffers.append(bytes(arr_data))
+    return [array.dtype.str, list(array.shape), data]
+
+
+def encode_tensor_like(
+    dtype: str,
+    shape: list[int],
+    payload: bytes,
+    buffers: list[bytes],
+    *,
+    size_threshold: int = 256,
+):
+    if len(payload) < size_threshold:
+        data = msgpack.ExtType(3, payload)
+    else:
+        data = len(buffers)
+        buffers.append(payload)
+    return [dtype, shape, data]
+
+
+def encode_output_frames(obj, *, size_threshold: int = 256) -> list[bytes]:
+    buffers = [b""]
+
+    def transform(value):
+        if isinstance(value, np.ndarray):
+            return encode_ndarray(value, buffers, size_threshold=size_threshold)
+        if (
+            isinstance(value, tuple)
+            and len(value) == 3
+            and value[0] in ("int32", "int64", "float32")
+        ):
+            dtype, shape, payload = value
+            return encode_tensor_like(
+                dtype,
+                shape,
+                payload,
+                buffers,
+                size_threshold=size_threshold,
+            )
+        if type(value) is list:
+            return [transform(v) for v in value]
+        if type(value) is tuple:
+            return [transform(v) for v in value]
+        if type(value) is dict:
+            return {k: transform(v) for k, v in value.items()}
+        return value
+
+    buffers[0] = msgpack.packb(transform(obj), use_bin_type=True)
+    return buffers
+
+
+def engine_output_wire(
+    request_id: str,
+    *,
+    new_logprobs=None,
+    new_prompt_logprobs_tensors=None,
+):
+    return [
+        request_id,
+        [7, 8],
+        new_logprobs,
+        new_prompt_logprobs_tensors,
+        None,
+        int(FinishReason.LENGTH),
+    ]
+
+
+def engine_outputs_wire(output):
+    return [0, [output], None, 0.0, None, ["req-1"]]
+
+
+inline_logprobs = engine_outputs_wire(
+    engine_output_wire(
+        "req-1",
+        new_logprobs=(
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64),
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            np.array([1, 2], dtype=np.int64),
+            None,
+        ),
+    )
+)
+
+multipart_logprobs = engine_outputs_wire(
+    engine_output_wire(
+        "req-1",
+        new_logprobs=(
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int64),
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            np.array([1, 2], dtype=np.int64),
+            None,
+        ),
+    )
+)
+
+inline_prompt_logprobs = engine_outputs_wire(
+    engine_output_wire(
+        "req-1",
+        new_prompt_logprobs_tensors=(
+            (
+                "int64",
+                [2, 3],
+                np.array([[10, 11, 12], [13, 14, 15]], dtype=np.int64).tobytes(),
+            ),
+            (
+                "float32",
+                [2, 3],
+                np.array(
+                    [[10, 11, 12], [13, 14, 15]],
+                    dtype=np.float32,
+                ).tobytes(),
+            ),
+            ("int64", [2], np.array([3, 4], dtype=np.int64).tobytes()),
+            None,
+        ),
+    )
+)
+
+multipart_prompt_logprobs = engine_outputs_wire(
+    engine_output_wire(
+        "req-1",
+        new_prompt_logprobs_tensors=(
+            (
+                "int64",
+                [2, 3],
+                np.array([[10, 11, 12], [13, 14, 15]], dtype=np.int64).tobytes(),
+            ),
+            (
+                "float32",
+                [2, 3],
+                np.array(
+                    [[10, 11, 12], [13, 14, 15]],
+                    dtype=np.float32,
+                ).tobytes(),
+            ),
+            ("int64", [2], np.array([3, 4], dtype=np.int64).tobytes()),
+            None,
+        ),
+    )
+)
+
+print(msgspec.msgpack.encode(request).hex())
+print(msgpack.packb(multimodal_request_wire, use_bin_type=True).hex())
+print(msgspec.msgpack.encode(outputs).hex())
+print(" ".join(frame.hex() for frame in encode_output_frames(inline_logprobs)))
+print(
+    " ".join(
+        frame.hex()
+        for frame in encode_output_frames(multipart_logprobs, size_threshold=1)
+    )
+)
+print(" ".join(frame.hex() for frame in encode_output_frames(inline_prompt_logprobs)))
+print(
+    " ".join(
+        frame.hex()
+        for frame in encode_output_frames(multipart_prompt_logprobs, size_threshold=1)
+    )
+)
diff --git a/rust/src/engine-core-client/src/transport.rs b/rust/src/engine-core-client/src/transport.rs
new file mode 100644
index 000000000000..0d6c49340af2
--- /dev/null
+++ b/rust/src/engine-core-client/src/transport.rs
@@ -0,0 +1,606 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::fmt::Debug;
+use std::ops::Deref;
+use std::time::Duration;
+
+use bytes::Bytes;
+use enum_as_inner::EnumAsInner;
+use thiserror_ext::AsReport;
+use tokio::sync::mpsc;
+use tokio::time::timeout;
+use tracing::{debug, error, info, trace, warn};
+use zeromq::prelude::{Socket, SocketRecv, SocketSend};
+use zeromq::util::PeerIdentity;
+use zeromq::{PullSocket, RouterSendHalf, RouterSocket, ZmqError, ZmqMessage};
+
+use crate::coordinator::CoordinatorBootstrap;
+use crate::error::{Error, Result, bail_unexpected_handshake_message};
+use crate::protocol::handshake::{
+    EngineCoreReadyResponse, HandshakeAddresses, HandshakeInitMessage, ReadyMessage,
+};
+use crate::protocol::{
+    EngineCoreOutputs, decode_engine_core_outputs, decode_msgpack, encode_msgpack,
+};
+
+/// Dedicated single-frame sentinel emitted by Python `EngineCoreProc` when the
+/// engine dies.
+pub const ENGINE_CORE_DEAD_SENTINEL: &[u8] = b"ENGINE_CORE_DEAD";
+
+/// Opaque routing identity of one engine on the frontend transport.
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct EngineId(Bytes);
+
+impl Debug for EngineId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Display the engine id as a hex string for easier debugging.
+        write!(f, "EngineId({})", hex::encode(&self.0))
+    }
+}
+
+impl EngineId {
+    /// Convert the engine id into a ZMQ frame for sending.
+    pub fn to_frame(&self) -> Bytes {
+        self.0.clone()
+    }
+
+    /// Convert the engine id into a ZMQ frame for sending.
+    pub fn into_frame(self) -> Bytes {
+        self.0
+    }
+
+    /// Parse the Python-compatible engine index encoded in the routing
+    /// identity.
+    ///
+    /// Python `EngineCoreProc` currently uses a two-byte little-endian engine
+    /// index as its ROUTER/DEALER identity. Coordinator control messages
+    /// such as `START_DP_WAVE(exclude_engine_index)` need that engine-side
+    /// index rather than any frontend-local ordering.
+    pub fn engine_index(&self) -> Option<u32> {
+        if self.len() != 2 {
+            return None;
+        }
+        Some(u16::from_le_bytes([self[0], self[1]]) as u32)
+    }
+
+    /// Construct an engine id from the Python-compatible engine index encoding
+    /// (two-byte little-endian).
+    pub fn from_engine_index(value: u32) -> Self {
+        Self(Bytes::copy_from_slice(&(value as u16).to_le_bytes()))
+    }
+}
+
+impl Deref for EngineId {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        self.0.as_ref()
+    }
+}
+
+impl From<Vec<u8>> for EngineId {
+    fn from(value: Vec<u8>) -> Self {
+        Self(Bytes::from(value))
+    }
+}
+
+impl<const N: usize> From<&[u8; N]> for EngineId {
+    fn from(value: &[u8; N]) -> Self {
+        Self(Bytes::copy_from_slice(value))
+    }
+}
+
+impl TryFrom<EngineId> for PeerIdentity {
+    type Error = ZmqError;
+
+    fn try_from(value: EngineId) -> std::result::Result<Self, Self::Error> {
+        PeerIdentity::try_from(value.into_frame())
+    }
+}
+
+/// Per-engine handshake result collected while bootstrapping one shared
+/// transport.
+#[derive(Clone, Debug)]
+pub struct ConnectedEngine {
+    /// The identity of the connected engine.
+    pub engine_id: EngineId,
+    /// Post-initialization configuration received from the engine on the input
+    /// socket registration message. `None` until the registration is received.
+    pub ready_response: Option<EngineCoreReadyResponse>,
+}
+
+/// Represents the connected shared transport plus all registered engines after
+/// a successful multi-engine startup handshake.
+pub struct ConnectedTransport {
+    /// The local address of the shared input socket that all engines connect to
+    /// for receiving requests.
+    pub input_address: String,
+    /// The local address of the shared output socket that all engines connect
+    /// to for sending responses.
+    pub output_address: String,
+    /// All engines connected through the startup handshake.
+    pub engines: Vec<ConnectedEngine>,
+    /// Optional engine-facing coordinator transport used for in-process wave
+    /// coordination.
+    pub coordinator: Option<CoordinatorBootstrap>,
+
+    /// The sending half of the shared input socket.
+    pub input_send: RouterSendHalf,
+    /// The shared output socket for receiving responses from all engines.
+    pub output_socket: PullSocket,
+}
+
+#[derive(Clone, Debug, EnumAsInner)]
+enum EngineStartupState {
+    HelloReceived,
+    ReadyReceived,
+}
+
+/// Connect to one or more engines through the startup handshake protocol,
+/// returning the shared data-plane transport plus the registered engines.
+pub async fn connect_handshake(
+    handshake_address: &str,
+    engine_count: usize,
+    local_host: &str,
+    local_input_address: Option<&str>,
+    local_output_address: Option<&str>,
+    enable_inproc_coordinator: bool,
+    ready_timeout: Duration,
+) -> Result<ConnectedTransport> {
+    if engine_count == 0 {
+        bail_unexpected_handshake_message!("expected engine_count >= 1");
+    }
+
+    info!(
+        engine_count,
+        handshake_address, "waiting for engines to connect"
+    );
+
+    // 1. Bind shared local input/output sockets first so every engine receives the same data-plane
+    //    addresses during handshake.
+    debug!(
+        local_host,
+        ?ready_timeout,
+        engine_count,
+        "binding shared transport sockets"
+    );
+    let (input_address, mut input_socket, output_address, output_socket) =
+        bind_local_sockets(local_host, local_input_address, local_output_address).await?;
+    info!(%input_address, %output_address, "bound local transport sockets");
+
+    let mut coordinator = if enable_inproc_coordinator {
+        Some(CoordinatorBootstrap::bind(local_host).await?)
+    } else {
+        None
+    };
+
+    // 2. Bind the shared handshake socket once. All engines connect to this socket with their own
+    //    identities, and startup order does not matter.
+    let mut handshake_socket = RouterSocket::new();
+    handshake_socket.bind(handshake_address).await?;
+
+    let mut engines = BTreeMap::new();
+
+    // 3. Receive HELLO from every engine and send a matching INIT. When coordinator mode is
+    //    enabled, the engines will not emit READY until the coordinator barrier below completes.
+    while engines.len() < engine_count {
+        debug!(
+            handshake_address,
+            connected = engines.len(),
+            waiting_for = engine_count,
+            "waiting for engine HELLO"
+        );
+        let message = timeout(ready_timeout, handshake_socket.recv()).await.map_err(|_| {
+            Error::HandshakeTimeout {
+                stage: "HELLO",
+                timeout: ready_timeout,
+            }
+        })??;
+        let (engine_id, handshake_message) = decode_handshake_message(message, None)?;
+        match handshake_message.status.as_deref() {
+            Some("HELLO") => {
+                if engines.contains_key(&engine_id) {
+                    bail_unexpected_handshake_message!(
+                        "duplicate engine id {engine_id:?} observed during startup handshake"
+                    );
+                }
+                debug!(handshake_address, ?engine_id, "received HELLO from engine");
+
+                send_init_message(
+                    &mut handshake_socket,
+                    &engine_id,
+                    &input_address,
+                    &output_address,
+                    coordinator.as_ref(),
+                )
+                .await?;
+                debug!(handshake_address, ?engine_id, "sent INIT to engine");
+
+                engines.insert(engine_id.clone(), EngineStartupState::HelloReceived);
+            }
+            Some("READY") => {
+                if coordinator.is_some() {
+                    bail_unexpected_handshake_message!(
+                        "received READY for engine id {engine_id:?} before coordinator startup gate completed"
+                    );
+                }
+                let state = match engines.get_mut(&engine_id) {
+                    Some(state) if !state.is_ready_received() => state,
+                    _ => {
+                        bail_unexpected_handshake_message!(
+                            "received READY for unexpected or duplicate engine id {engine_id:?}"
+                        );
+                    }
+                };
+                debug!(
+                    handshake_address,
+                    ?engine_id,
+                    ?handshake_message,
+                    "received overlapping READY from engine during HELLO phase"
+                );
+                *state = EngineStartupState::ReadyReceived;
+            }
+            other => {
+                bail_unexpected_handshake_message!("unexpected handshake status {other:?}");
+            }
+        }
+    }
+
+    // 4. Optional coordinator startup gate. Without coordinator there is nothing to do.
+    if let Some(coordinator) = coordinator.as_mut() {
+        coordinator.wait_for_startup_gate(engine_count, ready_timeout).await?;
+    }
+
+    // 5. After the optional gate has opened, every engine may now send READY.
+    while engines.values().any(|state| !state.is_ready_received()) {
+        debug!(
+            handshake_address,
+            connected = engines.len(),
+            ready = engines.values().filter(|state| state.is_ready_received()).count(),
+            waiting_for = engine_count,
+            "waiting for engine READY"
+        );
+        let message = timeout(ready_timeout, handshake_socket.recv()).await.map_err(|_| {
+            Error::HandshakeTimeout {
+                stage: "READY",
+                timeout: ready_timeout,
+            }
+        })??;
+        let (engine_id, handshake_message) = decode_handshake_message(message, None)?;
+        match handshake_message.status.as_deref() {
+            Some("READY") => {
+                let state = match engines.get_mut(&engine_id) {
+                    Some(state) if !state.is_ready_received() => state,
+                    _ => {
+                        bail_unexpected_handshake_message!(
+                            "received READY for unexpected or duplicate engine id {engine_id:?}"
+                        );
+                    }
+                };
+                debug!(
+                    handshake_address,
+                    ?engine_id,
+                    ?handshake_message,
+                    "received READY from engine"
+                );
+                *state = EngineStartupState::ReadyReceived;
+            }
+            Some("HELLO") => {
+                bail_unexpected_handshake_message!(
+                    "received duplicate HELLO for engine id {engine_id:?} after INIT phase completed"
+                );
+            }
+            other => {
+                bail_unexpected_handshake_message!("unexpected handshake status {other:?}");
+            }
+        }
+    }
+
+    // 4. Wait for every engine to connect to the shared input socket and register itself. The
+    //    `ready_response` is a placeholder; it is populated for each engine by
+    //    `wait_for_input_registrations` below.
+    let mut engines: Vec<_> = engines
+        .into_keys()
+        .map(|engine_id| ConnectedEngine {
+            engine_id,
+            ready_response: None,
+        })
+        .collect();
+
+    wait_for_input_registrations(&mut input_socket, &mut engines, ready_timeout).await?;
+    debug!(
+        engine_count = engines.len(),
+        "all engines registered on shared input socket"
+    );
+
+    info!(engine_count = engines.len(), "engines connected");
+
+    let (input_send, _) = input_socket.split();
+
+    Ok(ConnectedTransport {
+        input_address,
+        output_address,
+        input_send,
+        output_socket,
+        engines,
+        coordinator,
+    })
+}
+
+/// Bind to Python-supplied frontend transport addresses and wait for
+/// already-initialized engines to register themselves on the input socket.
+///
+/// This path mirrors Python's externally managed `AsyncMPClient` bootstrap
+/// model: the addresses are already fixed by the supervisor, and engine
+/// identities are synthesized from contiguous rank order instead of being
+/// discovered through a Rust-owned handshake.
+pub async fn connect_bootstrapped(
+    input_address: &str,
+    output_address: &str,
+    engine_count: usize,
+    ready_timeout: Duration,
+) -> Result<ConnectedTransport> {
+    if engine_count == 0 {
+        bail_unexpected_handshake_message!("expected engine_count >= 1");
+    }
+
+    let mut input_socket = RouterSocket::new();
+    let input_address = input_socket.bind(input_address).await?.to_string();
+
+    let mut output_socket = PullSocket::new();
+    let output_address = output_socket.bind(output_address).await?.to_string();
+
+    // TODO: follow start rank
+    let mut engines = (0..engine_count)
+        .map(|index| ConnectedEngine {
+            engine_id: EngineId::from((index as u16).to_le_bytes().to_vec()),
+            ready_response: None,
+        })
+        .collect::<Vec<_>>();
+
+    wait_for_input_registrations(&mut input_socket, &mut engines, ready_timeout).await?;
+    info!(
+        engine_count = engines.len(),
+        "bootstrapped engines connected"
+    );
+
+    let (input_send, _) = input_socket.split();
+
+    Ok(ConnectedTransport {
+        input_address,
+        output_address,
+        engines,
+        coordinator: None,
+        input_send,
+        output_socket,
+    })
+}
+
+/// Bind new input and output sockets.
+async fn bind_local_sockets(
+    local_host: &str,
+    local_input_address: Option<&str>,
+    local_output_address: Option<&str>,
+) -> Result<(String, RouterSocket, String, PullSocket)> {
+    let mut input_socket = RouterSocket::new();
+    let input_bind_address = local_input_address
+        .map(str::to_owned)
+        .unwrap_or_else(|| format!("tcp://{local_host}:0"));
+    let input_address = input_socket.bind(&input_bind_address).await?.to_string();
+
+    let mut output_socket = PullSocket::new();
+    let output_bind_address = local_output_address
+        .map(str::to_owned)
+        .unwrap_or_else(|| format!("tcp://{local_host}:0"));
+    let output_address = output_socket.bind(&output_bind_address).await?.to_string();
+
+    Ok((input_address, input_socket, output_address, output_socket))
+}
+
+/// Decode a handshake message and validate its structure and identity.
+fn decode_handshake_message(
+    message: ZmqMessage,
+    expected_id: Option<&EngineId>,
+) -> Result<(EngineId, ReadyMessage)> {
+    if message.len() != 2 {
+        bail_unexpected_handshake_message!("expected 2 frames, got {}", message.len());
+    }
+
+    let frames = message.into_vec();
+    let actual_id = EngineId(frames[0].clone());
+    if let Some(expected_id) = expected_id
+        && actual_id != *expected_id
+    {
+        return Err(Error::UnexpectedHandshakeIdentity {
+            expected: expected_id.to_vec(),
+            actual: actual_id.to_vec(),
+        });
+    }
+
+    let handshake_message: ReadyMessage = decode_msgpack(&frames[1])?;
+    Ok((actual_id, handshake_message))
+}
+
+/// Send an INIT message to the engine with the local socket addresses for the
+/// engine to connect to, using the handshake socket.
+async fn send_init_message(
+    handshake_socket: &mut RouterSocket,
+    engine_id: &EngineId,
+    input_address: &str,
+    output_address: &str,
+    coordinator: Option<&CoordinatorBootstrap>,
+) -> Result<()> {
+    let init_message = HandshakeInitMessage {
+        addresses: HandshakeAddresses {
+            inputs: vec![input_address.to_string()],
+            outputs: vec![output_address.to_string()],
+            coordinator_input: coordinator.map(|c| c.input_address.clone()),
+            coordinator_output: coordinator.map(|c| c.output_address.clone()),
+            frontend_stats_publish_address: None,
+        },
+        parallel_config: Default::default(),
+    };
+    let payload = encode_msgpack(&init_message)?;
+    let message = ZmqMessage::try_from(vec![engine_id.to_frame(), Bytes::from(payload)])
+        .expect("handshake router messages must contain identity and payload");
+    handshake_socket.send(message).await?;
+    Ok(())
+}
+
+/// Receive the input registration message from each engine and validate its
+/// identity.
+///
+/// Each registration contains 2 frames: `[identity, ready-payload]`.
+///
+/// Since vLLM commit `c8d98f81f676552c263f35bbde55e6edbe81b4e8` ("[Core]
+/// Simplify API server handshake"), the payload is a msgpack-encoded
+/// [`EngineCoreReadyResponse`] carrying post-initialization values such as
+/// `max_model_len`.
+///
+/// Older engines sent an empty second frame here just to establish the
+/// ROUTER/DEALER backchannel, with no structured payload on the input socket.
+/// We continue to tolerate that legacy shape so the frontend can still connect
+/// to slightly older local engine checkouts.
+async fn wait_for_input_registrations(
+    input_socket: &mut RouterSocket,
+    engines: &mut [ConnectedEngine],
+    ready_timeout: Duration,
+) -> Result<()> {
+    let mut pending = engines.iter().map(|e| e.engine_id.clone()).collect::<BTreeSet<_>>();
+
+    while !pending.is_empty() {
+        let registration = timeout(ready_timeout, input_socket.recv()).await.map_err(|_| {
+            Error::InputRegistrationTimeout {
+                timeout: ready_timeout,
+            }
+        })??;
+
+        if registration.len() != 2 {
+            bail_unexpected_handshake_message!(
+                "expected 2 frames for engine input registration, got {}",
+                registration.len()
+            );
+        }
+
+        let frames = registration.into_vec();
+        let actual_id = EngineId(frames[0].clone());
+        if !pending.remove(&actual_id) {
+            bail_unexpected_handshake_message!(
+                "received input registration for unexpected engine id {actual_id:?}"
+            );
+        }
+
+        let ready_response = if frames[1].is_empty() {
+            debug!(
+                ?actual_id,
+                "received legacy empty input registration from engine"
+            );
+            None
+        } else {
+            let ready_response: EngineCoreReadyResponse = decode_msgpack(&frames[1])?;
+            debug!(
+                ?actual_id,
+                ?ready_response,
+                "received input registration from engine"
+            );
+            Some(ready_response)
+        };
+
+        // Store the ready response in the corresponding engine entry.
+        if let Some(engine) = engines.iter_mut().find(|e| e.engine_id == actual_id) {
+            engine.ready_response = ready_response;
+        }
+    }
+
+    Ok(())
+}
+
+/// Send an encoded message to the engine through the input socket.
+pub async fn send_message(
+    input_send: &mut RouterSendHalf,
+    engine_id: &EngineId,
+    request_type: Bytes,
+    payload: Vec<u8>,
+) -> Result<()> {
+    let message = ZmqMessage::try_from(vec![
+        engine_id.to_frame(),
+        request_type,
+        Bytes::from(payload),
+    ])
+    .expect("router messages must contain identity and payload");
+
+    trace!(
+        ?engine_id,
+        frame_count = message.len(),
+        "sending ZMQ message"
+    );
+    input_send.send(message).await?;
+    Ok(())
+}
+
+/// Run the output loop to receive messages from the engine and send them to the
+/// provided channel.
+pub async fn run_output_loop(
+    mut output_socket: PullSocket,
+    tx: mpsc::Sender<Result<EngineCoreOutputs>>,
+) {
+    loop {
+        let message = match output_socket.recv().await {
+            Ok(message) => message,
+            Err(error) => {
+                // If we fail to receive a message from the engine, it's likely that the engine
+                // has crashed or become unreachable, so we should notify the
+                // client and shut down the output loop.
+                error!(error = %error.as_report(), "failed to receive output message");
+                let _ = tx.send(Err(Error::Transport(error))).await;
+                return;
+            }
+        };
+
+        let frame_count = message.len();
+        trace!(frame_count, "received output message");
+        let frames = message.into_vec();
+        let frame = frames.first().expect("output message must have at least one frame");
+        let frame_len = frame.len();
+        if frame.as_ref() == ENGINE_CORE_DEAD_SENTINEL {
+            warn!("received ENGINE_CORE_DEAD sentinel from engine");
+            let _ = tx.send(Err(Error::EngineCoreDead)).await;
+            return;
+        }
+        let decoded = match decode_engine_core_outputs(&frames) {
+            Ok(decoded) => {
+                trace!(frame_len, outputs = ?decoded, "decoded output message");
+                Ok(decoded)
+            }
+            Err(error) => {
+                // If we fail to decode the message from the engine, notify the client but keep
+                // the output loop running to continue processing future
+                // messages from the engine.
+                warn!(frame_len, error = %error.as_report(), "failed to decode output message");
+                Err(error)
+            }
+        };
+
+        if tx.send(decoded).await.is_err() {
+            // If we fail to send the decoded message to the client, it's likely that the
+            // client has shut down, so we should shut down the output loop as
+            // well.
+            warn!("output loop rx dropped, shutting down output loop");
+            return;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::bind_local_sockets;
+
+    #[tokio::test]
+    async fn bind_local_sockets_resolves_zero_port_bindings() {
+        let (input_address, _input_socket, output_address, _output_socket) =
+            bind_local_sockets("127.0.0.1", None, None).await.expect("bind local sockets");
+
+        assert!(input_address.starts_with("tcp://127.0.0.1:"));
+        assert!(output_address.starts_with("tcp://127.0.0.1:"));
+        assert_ne!(input_address, output_address);
+    }
+}
diff --git a/rust/src/llm/Cargo.toml b/rust/src/llm/Cargo.toml
new file mode 100644
index 000000000000..c7924b85db7e
--- /dev/null
+++ b/rust/src/llm/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "vllm-llm"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[features]
+test-util = []
+
+[dependencies]
+easy-ext.workspace = true
+enum-as-inner.workspace = true
+futures.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+thiserror.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+tracing.workspace = true
+uuid.workspace = true
+vllm-engine-core-client.workspace = true
+vllm-metrics.workspace = true
+
+[dev-dependencies]
+anyhow.workspace = true
+bytes.workspace = true
+clap.workspace = true
+expect-test.workspace = true
+rmp-serde.workspace = true
+tokio.workspace = true
+tracing-subscriber.workspace = true
+uuid.workspace = true
+vllm-engine-core-client = { workspace = true, features = ["test-util"] }
+vllm-metrics.workspace = true
+zeromq.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/llm/examples/README.md b/rust/src/llm/examples/README.md
new file mode 100644
index 000000000000..4764dba3a498
--- /dev/null
+++ b/rust/src/llm/examples/README.md
@@ -0,0 +1,29 @@
+# LLM Smoke Test
+
+Start headless `vllm`:
+
+```bash
+source ../vllm/.venv/bin/activate
+HF_HUB_OFFLINE=1 \
+VLLM_LOGGING_LEVEL=DEBUG \
+VLLM_CPU_KVCACHE_SPACE=2 \
+VLLM_HOST_IP=127.0.0.1 \
+VLLM_LOOPBACK_IP=127.0.0.1 \
+python3 -m vllm.entrypoints.cli.main serve Qwen/Qwen3-0.6B \
+  --headless \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size-local 1 \
+  --max-model-len 512 \
+  --dtype float16
+```
+
+Run the Rust smoke test through the `vllm-llm` generate interface:
+
+```bash
+cargo run -p vllm-llm --example external_engine_smoke -- \
+  --handshake-address tcp://127.0.0.1:62100 \
+  --host 127.0.0.1
+```
+
+IMPORTANT: You must restart `vllm` each time you run the smoke test, as the vLLM engine cannot manage frontend closures and subsequent reconnects. In other words, do not reuse existing `vllm` instances, if any.
diff --git a/rust/src/llm/examples/external_engine_smoke.rs b/rust/src/llm/examples/external_engine_smoke.rs
new file mode 100644
index 000000000000..c2d0e6bdfa82
--- /dev/null
+++ b/rust/src/llm/examples/external_engine_smoke.rs
@@ -0,0 +1,144 @@
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use clap::Parser;
+use futures::StreamExt as _;
+use tokio::time::timeout;
+use tracing_subscriber::EnvFilter;
+use vllm_engine_core_client::protocol::EngineCoreSamplingParams;
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, TransportMode};
+use vllm_llm::{FinishReason, GenerateOutputStream, GenerateRequest, Llm};
+
+const PROMPT_TOKEN_IDS: &[u32] = &[20841, 448, 6896, 25, 23811];
+
+#[derive(Debug, Parser)]
+#[command(about = "Smoke-test the Rust LLM facade against an external vLLM engine.")]
+struct Args {
+    #[arg(long)]
+    handshake_address: String,
+    #[arg(long, default_value_t = 1)]
+    engine_count: usize,
+    #[arg(long, default_value = "Qwen/Qwen3-0.6B")]
+    model: String,
+    #[arg(long, default_value = "127.0.0.1")]
+    host: String,
+    #[arg(long, default_value_t = 0)]
+    client_index: u32,
+    #[arg(long, default_value_t = 30)]
+    ready_timeout_secs: u64,
+    #[arg(long, default_value_t = 120)]
+    output_timeout_secs: u64,
+    #[arg(long, default_value_t = 5)]
+    max_tokens: u32,
+}
+
+fn unique_request_id() -> String {
+    format!("rust-llm-smoke-{}", uuid::Uuid::new_v4())
+}
+
+fn init_tracing() {
+    let filter = EnvFilter::try_from_default_env()
+        .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+    let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
+}
+
+fn build_request(request_id: String, max_tokens: u32) -> GenerateRequest {
+    GenerateRequest {
+        request_id,
+        prompt_token_ids: PROMPT_TOKEN_IDS.to_vec(),
+        sampling_params: EngineCoreSamplingParams {
+            max_tokens,
+            ..EngineCoreSamplingParams::for_test()
+        },
+        mm_features: None,
+        arrival_time: None,
+        cache_salt: None,
+        trace_headers: None,
+        priority: 0,
+        data_parallel_rank: None,
+        reasoning_ended: None,
+        lora_request: None,
+    }
+}
+
+#[derive(Debug)]
+struct CompletedRequest {
+    token_ids: Vec<u32>,
+    finish_reason: FinishReason,
+}
+
+async fn wait_for_request_completion(mut stream: GenerateOutputStream) -> Result<CompletedRequest> {
+    let output = match stream.next().await {
+        Some(output) => output.context("failed to receive request output")?,
+        None => bail!("request stream ended without a final output"),
+    };
+
+    let none = stream.next().await;
+    assert!(
+        none.is_none(),
+        "expected final-only stream to end after the final output"
+    );
+
+    let finish_reason = output.finish_reason.expect("final-only output must have a finish reason");
+    let token_ids = output.token_ids;
+
+    Ok(CompletedRequest {
+        token_ids,
+        finish_reason,
+    })
+}
+
+async fn wait_for_timeout(
+    stream: GenerateOutputStream,
+    output_timeout: Duration,
+) -> Result<CompletedRequest> {
+    timeout(output_timeout, wait_for_request_completion(stream))
+        .await
+        .context("timed out waiting for request output")?
+}
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() -> Result<()> {
+    init_tracing();
+    let args = Args::parse();
+    let ready_timeout = Duration::from_secs(args.ready_timeout_secs);
+    let output_timeout = Duration::from_secs(args.output_timeout_secs);
+    let request_id = unique_request_id();
+    let client = EngineCoreClient::connect(EngineCoreClientConfig {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: args.handshake_address.clone(),
+            advertised_host: args.host.clone(),
+            engine_count: args.engine_count,
+            ready_timeout,
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode: None,
+        model_name: args.model.clone(),
+        client_index: args.client_index,
+    })
+    .await
+    .context("failed to connect to external vLLM engine")?;
+
+    println!("model={}", args.model);
+    println!("handshake_address={}", args.handshake_address);
+    println!("engine_count={}", args.engine_count);
+    println!("input_address={}", client.input_address());
+    println!("output_address={}", client.output_address());
+    println!("engine_identities={:x?}", client.engine_identities());
+
+    let llm = Llm::new(client);
+    let request = build_request(request_id.clone(), args.max_tokens);
+    println!("request_id={request_id}");
+    println!("prompt_token_ids={PROMPT_TOKEN_IDS:?}");
+
+    let stream = llm.generate(request).await.context("failed to submit generate request")?;
+    let output = wait_for_timeout(stream, output_timeout).await?;
+
+    llm.shutdown().await.context("failed to shut down llm client")?;
+
+    println!("token_ids={:?}", output.token_ids);
+    println!("finish_reason={:?}", output.finish_reason);
+
+    Ok(())
+}
diff --git a/rust/src/llm/src/error.rs b/rust/src/llm/src/error.rs
new file mode 100644
index 000000000000..5865e1fcc39e
--- /dev/null
+++ b/rust/src/llm/src/error.rs
@@ -0,0 +1,12 @@
+use thiserror::Error;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Public error type for the Rust `llm` facade.
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("generate request `{request_id}` has an empty prompt_token_ids")]
+    EmptyPromptTokenIds { request_id: String },
+    #[error("engine-core error")]
+    EngineCoreClient(#[from] vllm_engine_core_client::Error),
+}
diff --git a/rust/src/llm/src/lib.rs b/rust/src/llm/src/lib.rs
new file mode 100644
index 000000000000..d47935259b59
--- /dev/null
+++ b/rust/src/llm/src/lib.rs
@@ -0,0 +1,100 @@
+use tracing::Span;
+use vllm_engine_core_client::EngineCoreClient;
+
+mod error;
+mod log_stats;
+mod output;
+mod request;
+mod request_metrics;
+
+pub use error::{Error, Result};
+pub use output::{
+    CollectedGenerateOutput, FinishReason, GenerateOutput, GenerateOutputStream,
+    GenerateOutputStreamExt, GeneratePromptInfo,
+};
+pub use request::GenerateRequest;
+pub use vllm_engine_core_client::protocol::logprobs::{Logprobs, PositionLogprobs, TokenLogprob};
+
+use crate::log_stats::StatsLogger;
+use crate::request_metrics::RequestMetricsTracker;
+
+/// Thin generate-only facade over [`EngineCoreClient`].
+///
+/// This mirrors the narrow public shape of Python `AsyncLLM.generate()` and
+/// `abort()`, but keeps the boundary close to raw engine-core requests and
+/// outputs.
+pub struct Llm {
+    client: EngineCoreClient,
+    randomize_request_id: bool,
+    stats_logger: Option<StatsLogger>,
+}
+
+impl Llm {
+    /// Create a new minimal LLM facade from an already connected engine-core
+    /// client.
+    pub fn new(client: EngineCoreClient) -> Self {
+        Self {
+            client,
+            randomize_request_id: true,
+            stats_logger: None,
+        }
+    }
+
+    /// Enable or disable periodic stats logging.
+    pub fn with_log_stats(mut self, enabled: bool) -> Self {
+        if enabled {
+            let stats_logger = StatsLogger::start(
+                self.client.model_name().to_string(),
+                self.client.engine_count(),
+            );
+            self.stats_logger = Some(stats_logger);
+        } else {
+            self.stats_logger = None;
+        }
+        self
+    }
+
+    /// Control whether external request ids are randomized before reaching
+    /// engine-core.
+    pub fn with_request_id_randomization(mut self, enabled: bool) -> Self {
+        self.randomize_request_id = enabled;
+        self
+    }
+
+    /// Expose the underlying engine-core client for low-level utility/admin
+    /// calls.
+    pub fn engine_core_client(&self) -> &EngineCoreClient {
+        &self.client
+    }
+
+    /// Submit one tokenized generate request and return a per-request output
+    /// stream.
+    pub async fn generate(&self, req: GenerateRequest) -> Result<GenerateOutputStream> {
+        let prepared = req.prepare(self.randomize_request_id)?;
+        let prompt_token_ids = prepared.prompt_token_ids().into();
+
+        // Record internal engine-core request ID in the current tracing span.
+        Span::current().record("engine_request_id", &prepared.engine_request.request_id);
+
+        let request_metrics = RequestMetricsTracker::new(
+            self.client.model_name().to_string(),
+            prepared.engine_request.arrival_time,
+            prepared.prompt_token_ids().len() as u32,
+            (prepared.engine_request.sampling_params.as_ref()).map(|p| p.max_tokens),
+            1,
+        );
+        let stream = self.client.call(prepared.engine_request).await?;
+
+        Ok(GenerateOutputStream::new(
+            prompt_token_ids,
+            stream,
+            request_metrics,
+        ))
+    }
+
+    /// Shut down the underlying engine-core client and its background tasks.
+    pub async fn shutdown(self) -> Result<()> {
+        self.client.shutdown().await?;
+        Ok(())
+    }
+}
diff --git a/rust/src/llm/src/log_stats.rs b/rust/src/llm/src/log_stats.rs
new file mode 100644
index 000000000000..7d3a149731b3
--- /dev/null
+++ b/rust/src/llm/src/log_stats.rs
@@ -0,0 +1,199 @@
+use std::fmt::Write;
+use std::time::{Duration, Instant};
+
+use tokio_util::task::AbortOnDropHandle;
+use tracing::{debug, info};
+use vllm_metrics::{
+    EngineLabels, F64Gauge, METRICS, PromptTokenSourceLabels, U64Counter, U64Gauge,
+};
+
+const LOG_STATS_INTERVAL: Duration = Duration::from_secs(10);
+
+/// Cached, cloned metric handles for one engine. Each clone shares the same
+/// underlying `Arc<Atomic*>` as the prometheus `Family` entry, so reads go
+/// straight to the atomic with no lock.
+struct EngineMetrics {
+    // Counters for throughput deltas.
+    prompt_tokens_computed: U64Counter,
+    generation_tokens: U64Counter,
+    prefix_cache_queries: U64Counter,
+    prefix_cache_hits: U64Counter,
+
+    // Gauges for instantaneous scheduler state.
+    scheduler_running: U64Gauge,
+    scheduler_waiting: U64Gauge,
+    kv_cache_usage: F64Gauge,
+}
+
+/// Accumulated snapshot values from the last logging interval, used to compute
+/// deltas.
+struct CounterSnapshot {
+    prompt_tokens: u64,
+    generation_tokens: u64,
+    prefix_cache_queries: u64,
+    prefix_cache_hits: u64,
+}
+
+/// Periodic stats logger that mirrors Python vLLM's `LoggingStatLogger`.
+///
+/// Spawns a background task that logs throughput and scheduler state at a fixed
+/// interval. When idle (both current and previous throughputs are zero), logs
+/// at DEBUG level. When load drops to zero, emits one final INFO-level line
+/// before going quiet.
+pub(crate) struct StatsLogger {
+    _task: AbortOnDropHandle<()>,
+}
+
+impl StatsLogger {
+    /// Start the background stats logging task.
+    pub(crate) fn start(model_name: String, engine_count: usize) -> Self {
+        let task = AbortOnDropHandle::new(tokio::spawn(async move {
+            run_stats_logger(model_name, engine_count).await;
+        }));
+        Self { _task: task }
+    }
+}
+
+/// Resolve and clone all metric handles once so the hot path is lock-free.
+fn resolve_engine_metrics(model_name: &str, engine_count: usize) -> Vec<EngineMetrics> {
+    let m = &METRICS;
+    (0..engine_count as u32)
+        .map(|engine| {
+            let el = EngineLabels {
+                model_name: model_name.to_string(),
+                engine,
+            };
+            let pt = PromptTokenSourceLabels {
+                model_name: model_name.to_string(),
+                engine,
+                source: "local_compute",
+            };
+            EngineMetrics {
+                // Use "local_compute" source for prompt throughput (excludes
+                // cached/transferred tokens), matching Python's
+                // `iteration_stats.prompt_token_stats.computed`.
+                prompt_tokens_computed: m.request.prompt_tokens_by_source.get_or_create_owned(&pt),
+                generation_tokens: m.request.generation_tokens.get_or_create_owned(&el),
+                prefix_cache_queries: m.scheduler.prefix_cache_queries.get_or_create_owned(&el),
+                prefix_cache_hits: m.scheduler.prefix_cache_hits.get_or_create_owned(&el),
+                scheduler_running: m.scheduler.scheduler_running.get_or_create_owned(&el),
+                scheduler_waiting: m.scheduler.scheduler_waiting.get_or_create_owned(&el),
+                kv_cache_usage: m.scheduler.kv_cache_usage.get_or_create_owned(&el),
+            }
+        })
+        .collect()
+}
+
+async fn run_stats_logger(model_name: String, engine_count: usize) {
+    let engines = resolve_engine_metrics(&model_name, engine_count);
+
+    let mut interval = tokio::time::interval(LOG_STATS_INTERVAL);
+    interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+    // The first tick fires immediately; skip it so the first log is after one full
+    // interval.
+    interval.tick().await;
+
+    let mut prev = read_counters(&engines);
+    let mut last_log_time = Instant::now();
+    let mut last_prompt_throughput: f64 = 0.0;
+    let mut last_generation_throughput: f64 = 0.0;
+
+    let mut msg = String::new();
+    loop {
+        interval.tick().await;
+
+        let now = Instant::now();
+        let elapsed = now.duration_since(last_log_time).as_secs_f64();
+        if elapsed <= 0.0 {
+            continue;
+        }
+
+        let curr = read_counters(&engines);
+
+        let prompt_throughput =
+            curr.prompt_tokens.wrapping_sub(prev.prompt_tokens) as f64 / elapsed;
+        let generation_throughput =
+            curr.generation_tokens.wrapping_sub(prev.generation_tokens) as f64 / elapsed;
+
+        // Idle = both current and previous throughputs are zero.
+        let is_idle = prompt_throughput == 0.0
+            && generation_throughput == 0.0
+            && last_prompt_throughput == 0.0
+            && last_generation_throughput == 0.0;
+
+        // Read scheduler gauges (aggregate across engines).
+        let (num_running, num_waiting, kv_cache_usage) = read_scheduler_gauges(&engines);
+
+        // Compute prefix cache hit rate over this interval.
+        let delta_queries = curr.prefix_cache_queries.wrapping_sub(prev.prefix_cache_queries);
+        let prefix_cache_hit_rate = if delta_queries > 0 {
+            let delta_hits = curr.prefix_cache_hits.wrapping_sub(prev.prefix_cache_hits);
+            delta_hits as f64 / delta_queries as f64 * 100.0
+        } else {
+            0.0
+        };
+
+        // Build the log line.
+        msg.clear();
+        write!(
+            msg,
+            "Avg prompt tput: {prompt_throughput:.1} toks/s, \
+             Avg generation tput: {generation_throughput:.1} toks/s, \
+             Reqs Running: {num_running}, \
+             Waiting: {num_waiting}, \
+             GPU KV cache used: {:.1}%, \
+             Prefix cache hit rate: {prefix_cache_hit_rate:.1}%",
+            kv_cache_usage * 100.0,
+        )
+        .unwrap();
+
+        if is_idle {
+            debug!("{msg}");
+        } else {
+            info!("{msg}");
+        }
+
+        last_prompt_throughput = prompt_throughput;
+        last_generation_throughput = generation_throughput;
+        last_log_time = now;
+        prev = curr;
+    }
+}
+
+/// Read the current cumulative counter values for throughput computation.
+fn read_counters(engines: &[EngineMetrics]) -> CounterSnapshot {
+    let mut snap = CounterSnapshot {
+        prompt_tokens: 0,
+        generation_tokens: 0,
+        prefix_cache_queries: 0,
+        prefix_cache_hits: 0,
+    };
+    for e in engines {
+        snap.prompt_tokens += e.prompt_tokens_computed.get();
+        snap.generation_tokens += e.generation_tokens.get();
+        snap.prefix_cache_queries += e.prefix_cache_queries.get();
+        snap.prefix_cache_hits += e.prefix_cache_hits.get();
+    }
+    snap
+}
+
+/// Read the current scheduler gauge values, aggregated across engines.
+fn read_scheduler_gauges(engines: &[EngineMetrics]) -> (u64, u64, f64) {
+    let mut num_running = 0u64;
+    let mut num_waiting = 0u64;
+    let mut kv_cache_usage_sum = 0.0f64;
+
+    for e in engines {
+        num_running += e.scheduler_running.get();
+        num_waiting += e.scheduler_waiting.get();
+        kv_cache_usage_sum += e.kv_cache_usage.get();
+    }
+
+    let kv_cache_usage = if !engines.is_empty() {
+        kv_cache_usage_sum / engines.len() as f64
+    } else {
+        0.0
+    };
+
+    (num_running, num_waiting, kv_cache_usage)
+}
diff --git a/rust/src/llm/src/output.rs b/rust/src/llm/src/output.rs
new file mode 100644
index 000000000000..94d9acb3fe8a
--- /dev/null
+++ b/rust/src/llm/src/output.rs
@@ -0,0 +1,346 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll, ready};
+
+use enum_as_inner::EnumAsInner;
+use futures::stream::FusedStream;
+use futures::{Stream, StreamExt as _, pin_mut};
+use serde::{Deserialize, Serialize};
+use vllm_engine_core_client::protocol::logprobs::Logprobs;
+use vllm_engine_core_client::protocol::{EngineCoreFinishReason, StopReason};
+use vllm_engine_core_client::{AbortCause, EngineCoreOutputStream};
+
+use crate::error::Result;
+use crate::request_metrics::{RequestMetricsTracker, current_unix_timestamp_secs};
+
+/// Final raw token output plus terminal stream metadata.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CollectedGenerateOutput {
+    pub request_id: String,
+    pub prompt_token_ids: Vec<u32>,
+    pub prompt_logprobs: Option<Logprobs>,
+    pub token_ids: Vec<u32>,
+    pub logprobs: Option<Logprobs>,
+    pub finish_reason: FinishReason,
+    /// Connector-specific KV transfer parameters for disaggregated serving.
+    pub kv_transfer_params: Option<serde_json::Value>,
+}
+
+/// Prompt-scoped metadata emitted only once on the first [`GenerateOutput`] for
+/// one request.
+#[derive(Debug, Clone, PartialEq)]
+pub struct GeneratePromptInfo {
+    /// Original prompt token IDs for this request.
+    pub prompt_token_ids: Arc<[u32]>,
+    /// Prompt logprobs returned by engine-core for scored prompt positions,
+    /// when requested.
+    pub prompt_logprobs: Option<Logprobs>,
+}
+
+/// The reason a request finished.
+///
+/// This is a higher-level abstraction over engine-core's finish and stop
+/// reasons.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, EnumAsInner)]
+pub enum FinishReason {
+    /// Generation stopped for a stop string, stop token, or EOS.
+    ///
+    /// The inner stop reason is present for explicit stop strings or stop
+    /// tokens, and absent for EOS-driven stops.
+    Stop(Option<StopReason>),
+    /// `max_tokens` or `max_model_len` was reached.
+    Length,
+    /// The request was aborted by the client.
+    Abort,
+    /// A retryable request-level internal error occurred.
+    Error,
+    /// A repetitive token pattern was detected.
+    Repetition,
+}
+
+impl FinishReason {
+    /// Construct a stop finish reason caused by EOS rather than an explicit
+    /// stop string/token.
+    pub fn stop_eos() -> Self {
+        Self::Stop(None)
+    }
+
+    /// Returns a human-readable string for this finish reason, used for metrics
+    /// and reporting.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Stop(_) => "stop",
+            Self::Length => "length",
+            Self::Abort => "abort",
+            Self::Error => "error",
+            Self::Repetition => "repetition",
+        }
+    }
+
+    /// If this is a stop finish reason, returns the inner stop reason if it
+    /// exists.
+    pub fn as_stop_reason(&self) -> Option<&StopReason> {
+        match self {
+            Self::Stop(stop_reason) => stop_reason.as_ref(),
+            _ => None,
+        }
+    }
+
+    /// If this is a stop finish reason, returns the inner stop reason if it
+    /// exists.
+    pub fn into_stop_reason(self) -> Option<StopReason> {
+        match self {
+            Self::Stop(stop_reason) => stop_reason,
+            _ => None,
+        }
+    }
+}
+
+fn finish_reason_from_engine(
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+) -> Option<FinishReason> {
+    finish_reason.map(|reason| match reason {
+        EngineCoreFinishReason::Stop => FinishReason::Stop(stop_reason),
+        EngineCoreFinishReason::Length => FinishReason::Length,
+        EngineCoreFinishReason::Abort => FinishReason::Abort,
+        EngineCoreFinishReason::Error => FinishReason::Error,
+        EngineCoreFinishReason::Repetition => FinishReason::Repetition,
+    })
+}
+
+/// Token and logprob output item returned by [`GenerateOutputStream`].
+///
+/// Original Python output reference:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/outputs.py#L85-L143>
+#[derive(Debug, Clone, PartialEq)]
+pub struct GenerateOutput {
+    /// Unique ID of the request that produced this output.
+    pub request_id: String,
+    /// One-time prompt metadata emitted only on the first output for this
+    /// request.
+    pub prompt_info: Option<GeneratePromptInfo>,
+    /// Newly produced token IDs for this step.
+    pub token_ids: Vec<u32>,
+    /// Sample logprobs for the generated positions in this step.
+    pub logprobs: Option<Logprobs>,
+    /// Terminal finish reason, when this is the final output for the request.
+    pub finish_reason: Option<FinishReason>,
+    /// Connector-specific KV transfer parameters for disaggregated serving.
+    pub kv_transfer_params: Option<serde_json::Value>,
+}
+
+impl GenerateOutput {
+    /// Returns the prompt token IDs when this output carries
+    /// [`GeneratePromptInfo`].
+    ///
+    /// Only the first output for a request can return `Some`; all later outputs
+    /// return `None`.
+    pub fn prompt_token_ids(&self) -> Option<&Arc<[u32]>> {
+        self.prompt_info.as_ref().map(|info| &info.prompt_token_ids)
+    }
+
+    /// Returns the prompt logprobs when this output carries
+    /// [`GeneratePromptInfo`].
+    ///
+    /// Only the first output for a request can return `Some`; all later outputs
+    /// return `None`.
+    pub fn prompt_logprobs(&self) -> Option<&Logprobs> {
+        self.prompt_info.as_ref().and_then(|info| info.prompt_logprobs.as_ref())
+    }
+
+    /// Returns whether this output is terminal for the request.
+    pub fn finished(&self) -> bool {
+        self.finish_reason.is_some()
+    }
+}
+
+#[cfg(any(test, feature = "test-util"))]
+impl GenerateOutput {
+    /// Build a [`GenerateOutput`] for tests.
+    pub fn for_test(
+        prompt_token_ids: Option<Arc<[u32]>>,
+        token_ids: Vec<u32>,
+        finish_reason: Option<FinishReason>,
+    ) -> Self {
+        Self {
+            request_id: String::new(),
+            prompt_info: prompt_token_ids.map(|ids| GeneratePromptInfo {
+                prompt_token_ids: ids,
+                prompt_logprobs: None,
+            }),
+            token_ids,
+            logprobs: None,
+            finish_reason,
+            kv_transfer_params: None,
+        }
+    }
+}
+
+/// Stream of per-request generate outputs for one request.
+///
+/// - A normal termination of the stream represents a clean completion of the request.
+/// - For errors, unexpected closes, or explicit aborts, the stream terminates with an error.
+pub struct GenerateOutputStream {
+    pending_prompt_info: Option<GeneratePromptInfo>,
+    raw_stream: EngineCoreOutputStream,
+    request_metrics: RequestMetricsTracker,
+}
+
+impl GenerateOutputStream {
+    /// Create a new generate output stream by adapting one raw engine-core
+    /// output stream.
+    pub(crate) fn new(
+        prompt_token_ids: Arc<[u32]>,
+        raw_stream: EngineCoreOutputStream,
+        request_metrics: RequestMetricsTracker,
+    ) -> Self {
+        Self {
+            pending_prompt_info: Some(GeneratePromptInfo {
+                prompt_token_ids,
+                prompt_logprobs: None,
+            }),
+            raw_stream,
+            request_metrics,
+        }
+    }
+
+    /// Return the internal engine request ID bound to this stream.
+    pub fn request_id(&self) -> &str {
+        self.raw_stream.request_id()
+    }
+}
+
+impl Stream for GenerateOutputStream {
+    type Item = Result<GenerateOutput>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let raw = match ready!(Pin::new(&mut self.raw_stream).poll_next(cx)) {
+            Some(Ok(raw)) => raw,
+            Some(Err(error)) => return Poll::Ready(Some(Err(error.into()))),
+            None => return Poll::Ready(None),
+        };
+
+        let received_at = current_unix_timestamp_secs();
+        self.request_metrics.observe_output(
+            raw.engine_index,
+            raw.timestamp,
+            received_at,
+            &raw.output,
+        );
+
+        let raw = raw.output;
+
+        // Populate the one-time prompt info on the first output.
+        if let Some(info) = &mut self.pending_prompt_info
+            && info.prompt_logprobs.is_none()
+        {
+            info.prompt_logprobs =
+                raw.new_prompt_logprobs_tensors.map(|value| value.into_direct().unwrap());
+        }
+
+        let logprobs = raw.new_logprobs.map(|value| value.into_direct().unwrap());
+
+        let finish_reason = finish_reason_from_engine(raw.finish_reason, raw.stop_reason);
+        if let Some(finish_reason) = finish_reason.as_ref() {
+            self.request_metrics.record_finished(received_at, finish_reason.clone());
+        }
+
+        let output = GenerateOutput {
+            request_id: raw.request_id,
+            prompt_info: self.pending_prompt_info.take(),
+            token_ids: raw.new_token_ids,
+            logprobs,
+            finish_reason,
+            kv_transfer_params: raw.kv_transfer_params,
+        };
+
+        Poll::Ready(Some(Ok(output)))
+    }
+}
+
+impl FusedStream for GenerateOutputStream {
+    fn is_terminated(&self) -> bool {
+        self.raw_stream.is_terminated()
+    }
+}
+
+impl Drop for GenerateOutputStream {
+    fn drop(&mut self) {
+        if self.raw_stream.is_terminated() {
+            // Already terminated cleanly, no need to record abort metrics.
+            return;
+        }
+
+        // If the user or the upper layer drops a live generate stream,
+        // `EngineCoreOutputStream::Drop` will trigger an engine-side abort. Record the
+        // matching terminal request metrics here so frontend-driven aborts are still
+        // visible as `finished_reason=...` instead of disappearing from observability
+        // entirely.
+        let finish_reason = match AbortCause::current() {
+            AbortCause::DroppedStream => FinishReason::Abort,
+            AbortCause::StopStringMatched => FinishReason::Stop(None),
+        };
+
+        self.request_metrics
+            .record_finished(current_unix_timestamp_secs(), finish_reason);
+    }
+}
+
+#[allow(clippy::manual_async_fn, reason = "specify `Send` bound")]
+#[easy_ext::ext(GenerateOutputStreamExt)]
+impl<T: Stream<Item = Result<GenerateOutput>> + Send> T {
+    /// Collect the raw generate stream to completion and return the final token
+    /// output.
+    pub fn collect_output(self) -> impl Future<Output = Result<CollectedGenerateOutput>> + Send {
+        async move {
+            let stream = self;
+            pin_mut!(stream);
+            let mut prompt_token_ids = None;
+            let mut prompt_logprobs = None;
+            let mut collected: Option<CollectedGenerateOutput> = None;
+
+            while let Some(output) = stream.next().await.transpose()? {
+                if let Some(info) = output.prompt_info {
+                    if prompt_token_ids.is_none() {
+                        prompt_token_ids = Some(info.prompt_token_ids.to_vec());
+                    }
+                    if prompt_logprobs.is_none() {
+                        prompt_logprobs = info.prompt_logprobs;
+                    }
+                }
+
+                if let Some(existing) = collected.as_mut() {
+                    existing.token_ids.extend(output.token_ids);
+                    if let Some(step_logprobs) = output.logprobs {
+                        if let Some(collected_logprobs) = existing.logprobs.as_mut() {
+                            collected_logprobs.positions.extend(step_logprobs.positions);
+                        } else {
+                            existing.logprobs = Some(step_logprobs);
+                        }
+                    }
+                } else {
+                    collected = Some(CollectedGenerateOutput {
+                        request_id: output.request_id,
+                        prompt_token_ids: prompt_token_ids.take().unwrap_or_default(),
+                        prompt_logprobs: prompt_logprobs.take(),
+                        token_ids: output.token_ids,
+                        logprobs: output.logprobs,
+                        finish_reason: FinishReason::Error,
+                        kv_transfer_params: None,
+                    });
+                }
+
+                if let Some(finish_reason) = output.finish_reason {
+                    let mut collected = collected.expect("terminal output must exist");
+                    collected.finish_reason = finish_reason;
+                    collected.kv_transfer_params = output.kv_transfer_params;
+                    return Ok(collected);
+                }
+            }
+
+            unreachable!("generate stream should yield an error instead of closing early")
+        }
+    }
+}
diff --git a/rust/src/llm/src/request.rs b/rust/src/llm/src/request.rs
new file mode 100644
index 000000000000..b17035b0512f
--- /dev/null
+++ b/rust/src/llm/src/request.rs
@@ -0,0 +1,201 @@
+use std::collections::BTreeMap;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use uuid::Uuid;
+use vllm_engine_core_client::protocol::multimodal::MmFeatures;
+use vllm_engine_core_client::protocol::{EngineCoreRequest, EngineCoreSamplingParams, OpaqueValue};
+
+use crate::error::{Error, Result};
+
+/// Tokenized decoder-only generate request accepted by [`crate::Llm`].
+///
+/// This is the first-stage Rust subset of the inputs that eventually flow into
+/// Python `AsyncLLM.generate()`. The boundary is intentionally above
+/// [`EngineCoreRequest`], but below higher-level text and multimodal
+/// preprocessing.
+///
+/// Original Python API reference:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/engine/protocol.py#L67-L84>
+#[derive(Debug, Clone, PartialEq)]
+pub struct GenerateRequest {
+    /// Unique ID of the request.
+    pub request_id: String,
+    /// Token IDs of the prompt.
+    pub prompt_token_ids: Vec<u32>,
+    /// Sampling parameters forwarded to engine-core.
+    pub sampling_params: EngineCoreSamplingParams,
+    /// Optional multimodal features already prepared by `vllm-chat`.
+    pub mm_features: Option<MmFeatures>,
+
+    // Fields below are currently likely unused by callers.
+    pub arrival_time: Option<f64>,
+    pub cache_salt: Option<String>,
+    pub trace_headers: Option<BTreeMap<String, String>>,
+    pub priority: i32,
+    pub data_parallel_rank: Option<u32>,
+    pub reasoning_ended: Option<bool>,
+    pub lora_request: Option<OpaqueValue>,
+}
+
+#[derive(Debug)]
+pub(crate) struct PreparedGenerateRequest {
+    pub engine_request: EngineCoreRequest,
+}
+
+impl GenerateRequest {
+    /// Validate and lower this request into the raw engine-core request format.
+    pub(crate) fn prepare(self, randomize_request_id: bool) -> Result<PreparedGenerateRequest> {
+        if self.prompt_token_ids.is_empty() {
+            return Err(Error::EmptyPromptTokenIds {
+                request_id: self.request_id,
+            });
+        }
+        let GenerateRequest {
+            request_id,
+            prompt_token_ids,
+            sampling_params,
+            mm_features,
+            arrival_time,
+            cache_salt,
+            trace_headers,
+            priority,
+            data_parallel_rank,
+            reasoning_ended,
+            lora_request,
+        } = self;
+
+        let external_request_id = request_id;
+        let engine_request_id = if randomize_request_id {
+            let random_suffix = Uuid::new_v4().simple().to_string();
+            format!("{external_request_id}-{}", &random_suffix[..8])
+        } else {
+            external_request_id.clone()
+        };
+
+        Ok(PreparedGenerateRequest {
+            engine_request: EngineCoreRequest {
+                request_id: engine_request_id,
+                prompt_token_ids: Some(prompt_token_ids),
+                mm_features,
+                sampling_params: Some(sampling_params),
+                pooling_params: None,
+                arrival_time: arrival_time.unwrap_or_else(current_unix_timestamp_secs),
+                lora_request,
+                cache_salt,
+                data_parallel_rank,
+                prompt_embeds: None,
+                prompt_is_token_ids: None,
+                client_index: 0,
+                current_wave: 0,
+                priority,
+                trace_headers,
+                resumable: false,
+                external_req_id: Some(external_request_id),
+                reasoning_ended,
+                reasoning_parser_kwargs: None,
+                abort_immediately: false,
+            },
+        })
+    }
+}
+
+impl PreparedGenerateRequest {
+    /// Return the original prompt token IDs copied into the raw engine request.
+    pub fn prompt_token_ids(&self) -> &[u32] {
+        self.engine_request
+            .prompt_token_ids
+            .as_ref()
+            .expect("prepared request must have prompt token ids")
+    }
+}
+
+fn current_unix_timestamp_secs() -> f64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("system clock is before unix epoch")
+        .as_secs_f64()
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::BTreeMap;
+
+    use vllm_engine_core_client::protocol::EngineCoreSamplingParams;
+
+    use super::GenerateRequest;
+    use crate::error::Error;
+
+    fn sample_request() -> GenerateRequest {
+        GenerateRequest {
+            request_id: "req-1".to_string(),
+            prompt_token_ids: vec![11, 22, 33],
+            sampling_params: EngineCoreSamplingParams::for_test(),
+            mm_features: None,
+            arrival_time: Some(42.5),
+            cache_salt: Some("salt".to_string()),
+            trace_headers: Some(BTreeMap::from([(
+                "x-trace-id".to_string(),
+                "abc".to_string(),
+            )])),
+            priority: 3,
+            data_parallel_rank: Some(2),
+            reasoning_ended: Some(true),
+            lora_request: None,
+        }
+    }
+
+    #[test]
+    fn prepare_builds_engine_core_request() {
+        let prepared = sample_request().prepare(true).unwrap();
+
+        assert_eq!(prepared.prompt_token_ids(), &[11, 22, 33]);
+
+        let request = prepared.engine_request;
+        assert_eq!(request.external_req_id.as_deref(), Some("req-1"));
+        assert!(request.request_id.starts_with("req-1-"));
+        assert_ne!(request.request_id, "req-1");
+        assert_eq!(request.prompt_token_ids.as_deref(), Some(&[11, 22, 33][..]));
+        assert_eq!(request.arrival_time, 42.5);
+        assert_eq!(request.cache_salt.as_deref(), Some("salt"));
+        assert_eq!(request.data_parallel_rank, Some(2));
+        assert_eq!(
+            request.trace_headers,
+            Some(BTreeMap::from([(
+                "x-trace-id".to_string(),
+                "abc".to_string(),
+            )]))
+        );
+        assert_eq!(request.reasoning_ended, Some(true));
+    }
+
+    #[test]
+    fn prepare_rejects_empty_prompt_tokens() {
+        let mut request = sample_request();
+        request.prompt_token_ids.clear();
+
+        let error = request.prepare(true).unwrap_err();
+        assert!(matches!(
+            error,
+            Error::EmptyPromptTokenIds { request_id } if request_id == "req-1"
+        ));
+    }
+
+    #[test]
+    fn prepare_can_preserve_external_request_id() {
+        let prepared = sample_request().prepare(false).unwrap();
+
+        let request = prepared.engine_request;
+        assert_eq!(request.external_req_id.as_deref(), Some("req-1"));
+        assert_eq!(request.request_id, "req-1");
+    }
+
+    #[test]
+    fn prepare_forwards_multimodal_features() {
+        let mut request = sample_request();
+        request.mm_features = Some(Vec::new());
+
+        let prepared = request.prepare(false).unwrap();
+
+        assert_eq!(prepared.engine_request.mm_features, Some(Vec::new()));
+    }
+}
diff --git a/rust/src/llm/src/request_metrics.rs b/rust/src/llm/src/request_metrics.rs
new file mode 100644
index 000000000000..d28b83be816b
--- /dev/null
+++ b/rust/src/llm/src/request_metrics.rs
@@ -0,0 +1,391 @@
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use vllm_engine_core_client::protocol::stats::PrefillStats;
+use vllm_engine_core_client::protocol::{EngineCoreEvent, EngineCoreEventType, EngineCoreOutput};
+use vllm_metrics::{
+    EngineLabels, FinishedReasonLabels, METRICS, PromptTokenSourceLabels, RequestMetrics,
+};
+
+use crate::FinishReason;
+
+fn metrics() -> &'static RequestMetrics {
+    &METRICS.request
+}
+
+const PROMPT_TOKEN_SOURCE_LOCAL_COMPUTE: &str = "local_compute";
+const PROMPT_TOKEN_SOURCE_LOCAL_CACHE_HIT: &str = "local_cache_hit";
+const PROMPT_TOKEN_SOURCE_EXTERNAL_KV_TRANSFER: &str = "external_kv_transfer";
+
+/// Request-scoped metrics state tracked across streamed engine-core updates.
+///
+/// This is the Rust-side counterpart of the Python frontend's request-lifecycle
+/// bookkeeping, centered on `RequestStateStats` and the per-output/per-finished
+/// update flow.
+///
+/// Original Python definitions:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L200-L237>
+///
+/// Original Python update flow:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/engine/output_processor.py#L600-L677>
+#[derive(Debug, Clone)]
+pub(crate) struct RequestMetricsTracker {
+    model_name: String,
+    arrival_time: f64,
+    prompt_len: u32,
+    max_tokens_param: Option<u32>,
+    n_param: u32,
+    is_prefilling: bool,
+    queued_ts: f64,
+    scheduled_ts: f64,
+    first_token_ts: f64,
+    last_token_ts: f64,
+    first_token_latency: f64,
+    num_generation_tokens: u32,
+    latest_num_cached_tokens: u32,
+    last_seen_engine_index: u32,
+}
+
+impl RequestMetricsTracker {
+    /// Create the per-request tracker from the normalized `llm`-layer request
+    /// context.
+    pub(crate) fn new(
+        model_name: String,
+        arrival_time: f64,
+        prompt_len: u32,
+        max_tokens_param: Option<u32>,
+        n_param: u32,
+    ) -> Self {
+        Self {
+            model_name,
+            arrival_time,
+            prompt_len,
+            max_tokens_param,
+            n_param,
+            is_prefilling: true,
+            queued_ts: 0.0,
+            scheduled_ts: 0.0,
+            first_token_ts: 0.0,
+            last_token_ts: 0.0,
+            first_token_latency: 0.0,
+            num_generation_tokens: 0,
+            latest_num_cached_tokens: 0,
+            last_seen_engine_index: 0,
+        }
+    }
+
+    /// Update request-lifecycle state from one engine-core output item.
+    ///
+    /// Original Python stats logic:
+    /// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L331-L384>
+    pub(crate) fn observe_output(
+        &mut self,
+        engine_index: u32,
+        batch_timestamp: f64,
+        received_at: f64,
+        output: &EngineCoreOutput,
+    ) {
+        self.last_seen_engine_index = engine_index;
+        if let Some(prefill_stats) = &output.prefill_stats {
+            self.latest_num_cached_tokens = prefill_stats.num_cached_tokens;
+        }
+        self.num_generation_tokens += output.new_token_ids.len() as u32;
+        metrics()
+            .generation_tokens
+            .get_or_create(&engine_labels(&self.model_name, engine_index))
+            .inc_by(output.new_token_ids.len() as u64);
+
+        if let Some(events) = &output.events {
+            self.observe_events(engine_index, events);
+        }
+
+        if self.is_prefilling {
+            if let Some(prefill_stats) = &output.prefill_stats {
+                record_prompt_tokens(&self.model_name, engine_index, prefill_stats);
+            }
+            self.first_token_latency = received_at - self.arrival_time;
+            observe_time_to_first_token_seconds(
+                &self.model_name,
+                engine_index,
+                self.first_token_latency,
+            );
+            self.first_token_ts = batch_timestamp;
+            self.is_prefilling = false;
+        } else if self.last_token_ts > 0.0 {
+            observe_inter_token_latency_seconds(
+                &self.model_name,
+                engine_index,
+                batch_timestamp - self.last_token_ts,
+            );
+        }
+
+        self.last_token_ts = batch_timestamp;
+    }
+
+    /// Emit the terminal request metrics once a finished output has been
+    /// observed.
+    ///
+    /// Original Python finished-request stats:
+    /// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L222-L237>
+    pub(crate) fn record_finished(&self, received_at: f64, finish_reason: FinishReason) {
+        let labels = engine_labels(&self.model_name, self.last_seen_engine_index);
+        let prefill_kv_computed_tokens =
+            self.prompt_len.saturating_sub(self.latest_num_cached_tokens);
+        let e2e_latency_seconds = received_at - self.arrival_time;
+        let queue_time_seconds = diff_or_zero(self.scheduled_ts, self.queued_ts);
+        let prefill_time_seconds = diff_or_zero(self.first_token_ts, self.scheduled_ts);
+        let decode_time_seconds = diff_or_zero(self.last_token_ts, self.first_token_ts);
+        let inference_time_seconds = diff_or_zero(self.last_token_ts, self.scheduled_ts);
+        let time_per_output_token_seconds = if self.num_generation_tokens > 1 {
+            diff_or_zero(self.last_token_ts, self.first_token_ts)
+                / (self.num_generation_tokens - 1) as f64
+        } else {
+            0.0
+        };
+
+        record_request_success(&self.model_name, self.last_seen_engine_index, finish_reason);
+        metrics()
+            .request_prompt_tokens
+            .get_or_create(&labels)
+            .observe(self.prompt_len as f64);
+        metrics()
+            .request_generation_tokens
+            .get_or_create(&labels)
+            .observe(self.num_generation_tokens as f64);
+        metrics()
+            .request_max_num_generation_tokens
+            .get_or_create(&labels)
+            .observe(self.num_generation_tokens as f64);
+        if let Some(max_tokens_param) = self.max_tokens_param {
+            metrics()
+                .request_params_max_tokens
+                .get_or_create(&labels)
+                .observe(max_tokens_param as f64);
+        }
+        metrics().request_params_n.get_or_create(&labels).observe(self.n_param as f64);
+        metrics()
+            .request_prefill_kv_computed_tokens
+            .get_or_create(&labels)
+            .observe(prefill_kv_computed_tokens as f64);
+        metrics()
+            .e2e_request_latency_seconds
+            .get_or_create(&labels)
+            .observe(e2e_latency_seconds);
+        metrics()
+            .request_queue_time_seconds
+            .get_or_create(&labels)
+            .observe(queue_time_seconds);
+        metrics()
+            .request_prefill_time_seconds
+            .get_or_create(&labels)
+            .observe(prefill_time_seconds);
+        metrics()
+            .request_decode_time_seconds
+            .get_or_create(&labels)
+            .observe(decode_time_seconds);
+        metrics()
+            .request_inference_time_seconds
+            .get_or_create(&labels)
+            .observe(inference_time_seconds);
+        metrics()
+            .request_time_per_output_token_seconds
+            .get_or_create(&labels)
+            .observe(time_per_output_token_seconds);
+    }
+
+    fn observe_events(&mut self, engine_index: u32, events: &[EngineCoreEvent]) {
+        for event in events {
+            match event.r#type {
+                EngineCoreEventType::Queued => {
+                    self.queued_ts = event.timestamp;
+                }
+                EngineCoreEventType::Scheduled => {
+                    if self.scheduled_ts == 0.0 {
+                        self.scheduled_ts = event.timestamp;
+                    }
+                }
+                EngineCoreEventType::Preempted => {
+                    metrics()
+                        .num_preemptions
+                        .get_or_create(&engine_labels(&self.model_name, engine_index))
+                        .inc();
+                }
+            }
+        }
+    }
+}
+
+fn engine_labels(model_name: &str, engine: u32) -> EngineLabels {
+    EngineLabels {
+        model_name: model_name.to_string(),
+        engine,
+    }
+}
+
+fn observe_time_to_first_token_seconds(model_name: &str, engine: u32, seconds: f64) {
+    metrics()
+        .time_to_first_token_seconds
+        .get_or_create(&engine_labels(model_name, engine))
+        .observe(seconds);
+}
+
+fn observe_inter_token_latency_seconds(model_name: &str, engine: u32, seconds: f64) {
+    metrics()
+        .inter_token_latency_seconds
+        .get_or_create(&engine_labels(model_name, engine))
+        .observe(seconds);
+}
+
+fn record_request_success(model_name: &str, engine: u32, finish_reason: FinishReason) {
+    metrics()
+        .request_success
+        .get_or_create(&FinishedReasonLabels {
+            model_name: model_name.to_string(),
+            engine,
+            finished_reason: finish_reason.as_str(),
+        })
+        .inc();
+}
+
+fn prompt_token_source_labels(
+    model_name: &str,
+    engine: u32,
+    source: &'static str,
+) -> PromptTokenSourceLabels {
+    PromptTokenSourceLabels {
+        model_name: model_name.to_string(),
+        engine,
+        source,
+    }
+}
+
+fn record_prompt_tokens(model_name: &str, engine: u32, prefill_stats: &PrefillStats) {
+    let computed = prefill_stats.num_computed_tokens as u64;
+    let local_cache_hit = prefill_stats.num_local_cached_tokens as u64;
+    let external_kv_transfer = prefill_stats.num_external_cached_tokens as u64;
+
+    metrics()
+        .prompt_tokens
+        .get_or_create(&engine_labels(model_name, engine))
+        .inc_by(prefill_stats.num_prompt_tokens as u64);
+    metrics()
+        .prompt_tokens_by_source
+        .get_or_create(&prompt_token_source_labels(
+            model_name,
+            engine,
+            PROMPT_TOKEN_SOURCE_LOCAL_COMPUTE,
+        ))
+        .inc_by(computed);
+    metrics()
+        .prompt_tokens_by_source
+        .get_or_create(&prompt_token_source_labels(
+            model_name,
+            engine,
+            PROMPT_TOKEN_SOURCE_LOCAL_CACHE_HIT,
+        ))
+        .inc_by(local_cache_hit);
+    metrics()
+        .prompt_tokens_by_source
+        .get_or_create(&prompt_token_source_labels(
+            model_name,
+            engine,
+            PROMPT_TOKEN_SOURCE_EXTERNAL_KV_TRANSFER,
+        ))
+        .inc_by(external_kv_transfer);
+    metrics()
+        .prompt_tokens_cached
+        .get_or_create(&engine_labels(model_name, engine))
+        .inc_by(prefill_stats.num_cached_tokens as u64);
+}
+
+fn diff_or_zero(end: f64, start: f64) -> f64 {
+    if end > 0.0 && start > 0.0 && end >= start {
+        end - start
+    } else {
+        0.0
+    }
+}
+
+/// Return the current wall-clock time in seconds since the Unix epoch.
+///
+/// This is used for frontend-side latency measurements such as TTFT and E2E,
+/// matching the Python frontend's use of wall-clock request arrival/iteration
+/// timestamps rather than engine-core's monotonic scheduler timestamps.
+///
+/// Original Python request timestamp source:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/stats.py#L206-L216>
+pub(crate) fn current_unix_timestamp_secs() -> f64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .expect("system clock is before unix epoch")
+        .as_secs_f64()
+}
+
+#[cfg(test)]
+mod tests {
+    use vllm_engine_core_client::protocol::stats::PrefillStats;
+    use vllm_engine_core_client::protocol::{EngineCoreEvent, EngineCoreEventType};
+
+    use super::{RequestMetricsTracker, diff_or_zero};
+
+    #[test]
+    fn tracker_updates_timing_state_across_prefill_decode_and_finish() {
+        let mut tracker = RequestMetricsTracker::new("model".to_string(), 100.0, 64, Some(128), 1);
+
+        tracker.observe_output(
+            2,
+            10.0,
+            100.2,
+            &vllm_engine_core_client::protocol::EngineCoreOutput {
+                request_id: "req-1".to_string(),
+                new_token_ids: vec![1],
+                finish_reason: None,
+                events: Some(vec![
+                    EngineCoreEvent {
+                        r#type: EngineCoreEventType::Queued,
+                        timestamp: 8.0,
+                    },
+                    EngineCoreEvent {
+                        r#type: EngineCoreEventType::Scheduled,
+                        timestamp: 9.0,
+                    },
+                ]),
+                prefill_stats: Some(PrefillStats {
+                    num_prompt_tokens: 64,
+                    num_computed_tokens: 60,
+                    num_cached_tokens: 4,
+                    num_local_cached_tokens: 4,
+                    num_external_cached_tokens: 0,
+                }),
+                ..Default::default()
+            },
+        );
+        tracker.observe_output(
+            2,
+            11.5,
+            100.4,
+            &vllm_engine_core_client::protocol::EngineCoreOutput {
+                request_id: "req-1".to_string(),
+                new_token_ids: vec![2, 3],
+                finish_reason: None,
+                events: Some(vec![EngineCoreEvent {
+                    r#type: EngineCoreEventType::Preempted,
+                    timestamp: 10.5,
+                }]),
+                ..Default::default()
+            },
+        );
+
+        assert!(!tracker.is_prefilling);
+        assert_eq!(tracker.last_seen_engine_index, 2);
+        assert_eq!(tracker.num_generation_tokens, 3);
+        assert_eq!(tracker.queued_ts, 8.0);
+        assert_eq!(tracker.scheduled_ts, 9.0);
+        assert_eq!(tracker.first_token_ts, 10.0);
+        assert_eq!(tracker.last_token_ts, 11.5);
+        assert!((tracker.first_token_latency - 0.2).abs() < 1e-9);
+        assert_eq!(
+            diff_or_zero(tracker.last_token_ts, tracker.first_token_ts),
+            1.5
+        );
+    }
+}
diff --git a/rust/src/llm/tests/generate.rs b/rust/src/llm/tests/generate.rs
new file mode 100644
index 000000000000..8b1b98bdc485
--- /dev/null
+++ b/rust/src/llm/tests/generate.rs
@@ -0,0 +1,749 @@
+use std::collections::BTreeSet;
+use std::sync::Once;
+use std::time::Duration;
+
+use futures::StreamExt as _;
+use tokio::time::timeout;
+use tracing_subscriber::EnvFilter;
+use uuid::Uuid;
+use vllm_engine_core_client::protocol::logprobs::{
+    Logprobs, MaybeWireLogprobs, PositionLogprobs, TokenLogprob,
+};
+use vllm_engine_core_client::protocol::stats::PrefillStats;
+use vllm_engine_core_client::protocol::{
+    EngineCoreEvent, EngineCoreEventType, EngineCoreFinishReason, EngineCoreOutput,
+    EngineCoreOutputs, EngineCoreRequest, EngineCoreSamplingParams,
+};
+use vllm_engine_core_client::test_utils::{IpcNamespace, spawn_mock_engine_task};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig};
+use vllm_llm::{
+    Error, FinishReason, GenerateOutputStreamExt as _, GeneratePromptInfo, GenerateRequest, Llm,
+};
+use vllm_metrics::METRICS;
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{DealerSocket, PushSocket, ZmqMessage};
+
+static TRACING: Once = Once::new();
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+) -> EngineCoreOutput {
+    request_output_with_events(request_id, new_token_ids, finish_reason, None)
+}
+
+fn request_output_with_events(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    events: Option<Vec<EngineCoreEvent>>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn request_output_with_logprobs(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    new_logprobs: Option<Logprobs>,
+    prompt_logprobs: Option<Logprobs>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: new_logprobs.map(MaybeWireLogprobs::Direct),
+        new_prompt_logprobs_tensors: prompt_logprobs.map(MaybeWireLogprobs::Direct),
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn request_output_with_logprobs_and_kv(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    new_logprobs: Option<Logprobs>,
+    prompt_logprobs: Option<Logprobs>,
+    kv_transfer_params: Option<serde_json::Value>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: new_logprobs.map(MaybeWireLogprobs::Direct),
+        new_prompt_logprobs_tensors: prompt_logprobs.map(MaybeWireLogprobs::Direct),
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events: None,
+        kv_transfer_params,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn logprobs_for_position(
+    sampled_token_id: u32,
+    sampled_logprob: f32,
+    sampled_rank: u32,
+    top_token_id: u32,
+    top_logprob: f32,
+) -> Logprobs {
+    Logprobs {
+        positions: vec![PositionLogprobs {
+            entries: vec![
+                TokenLogprob {
+                    token_id: sampled_token_id,
+                    logprob: sampled_logprob,
+                    rank: sampled_rank,
+                },
+                TokenLogprob {
+                    token_id: top_token_id,
+                    logprob: top_logprob,
+                    rank: 1,
+                },
+            ],
+        }],
+    }
+}
+
+fn prompt_logprobs() -> Logprobs {
+    Logprobs {
+        positions: vec![
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 11,
+                        logprob: -0.1,
+                        rank: 2,
+                    },
+                    TokenLogprob {
+                        token_id: 7,
+                        logprob: -0.05,
+                        rank: 1,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: 22,
+                        logprob: -0.2,
+                        rank: 3,
+                    },
+                    TokenLogprob {
+                        token_id: 8,
+                        logprob: -0.1,
+                        rank: 1,
+                    },
+                ],
+            },
+        ],
+    }
+}
+
+fn sample_generate_request(request_id: &str, max_tokens: u32) -> GenerateRequest {
+    GenerateRequest {
+        request_id: request_id.to_string(),
+        prompt_token_ids: vec![11, 22],
+        sampling_params: EngineCoreSamplingParams {
+            max_tokens,
+            ..EngineCoreSamplingParams::for_test()
+        },
+        mm_features: None,
+        arrival_time: Some(42.5),
+        cache_salt: None,
+        trace_headers: None,
+        priority: 0,
+        data_parallel_rank: None,
+        reasoning_ended: None,
+        lora_request: None,
+    }
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(rmp_serde::to_vec_named(&outputs).unwrap()))
+        .await
+        .unwrap();
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<bytes::Bytes> {
+    dealer.recv().await.unwrap().into_vec()
+}
+
+async fn connect_async_llm_with_ipc(
+    handshake_address: String,
+    client_index: u32,
+    model_name: &str,
+    ipc: &IpcNamespace,
+) -> Llm {
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name(model_name)
+            .with_client_index(client_index)
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .unwrap();
+    Llm::new(client)
+}
+
+fn request_metrics_model_name(prefix: &str) -> String {
+    format!("{prefix}-{}", Uuid::new_v4().simple())
+}
+
+fn init_tracing() {
+    TRACING.call_once(|| {
+        let filter = EnvFilter::try_from_default_env()
+            .unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
+        let _ = tracing_subscriber::fmt().with_test_writer().with_env_filter(filter).try_init();
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn generate_streams_outputs() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-delta".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.external_req_id.as_deref(), Some("req-delta"));
+                assert!(request.request_id.starts_with("req-delta-"));
+                assert_ne!(request.request_id, "req-delta");
+                assert_eq!(request.client_index, 7);
+                assert_eq!(request.prompt_token_ids, Some(vec![11, 22]));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![1, 2],
+                                None,
+                                Some(logprobs_for_position(1, -0.3, 4, 9, -0.1)),
+                                Some(prompt_logprobs()),
+                            ),
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![3],
+                                Some(EngineCoreFinishReason::Length),
+                                Some(logprobs_for_position(3, -0.4, 5, 10, -0.2)),
+                                None,
+                            ),
+                        ],
+                        finished_requests: Some(BTreeSet::from([request.request_id.clone()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 7, "test-model", &ipc).await;
+    let mut stream = llm.generate(sample_generate_request("req-delta", 3)).await.unwrap();
+    let internal_id = stream.request_id().to_string();
+
+    let first = stream.next().await.unwrap().unwrap();
+    assert_eq!(first.request_id, internal_id);
+    assert_eq!(
+        first.prompt_info,
+        Some(GeneratePromptInfo {
+            prompt_token_ids: vec![11, 22].into(),
+            prompt_logprobs: Some(prompt_logprobs()),
+        })
+    );
+    assert_eq!(first.token_ids, vec![1, 2]);
+    assert_eq!(
+        first.logprobs,
+        Some(logprobs_for_position(1, -0.3, 4, 9, -0.1))
+    );
+    assert_eq!(first.finish_reason, None);
+
+    let second = stream.next().await.unwrap().unwrap();
+    assert_eq!(second.prompt_info, None);
+    assert_eq!(second.token_ids, vec![3]);
+    assert_eq!(
+        second.logprobs,
+        Some(logprobs_for_position(3, -0.4, 5, 10, -0.2))
+    );
+    assert_eq!(second.finish_reason, Some(FinishReason::Length));
+    assert!(stream.next().await.is_none());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    llm.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn collect_output_aggregates_raw_tokens_logprobs_and_terminal_metadata() {
+    init_tracing();
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-collect-output".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.external_req_id.as_deref(), Some("req-collect"));
+                assert!(request.request_id.starts_with("req-collect-"));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![33],
+                                None,
+                                Some(logprobs_for_position(33, -0.1, 1, 99, -0.2)),
+                                Some(prompt_logprobs()),
+                            ),
+                            request_output_with_logprobs_and_kv(
+                                &request.request_id,
+                                vec![44],
+                                Some(EngineCoreFinishReason::Stop),
+                                Some(logprobs_for_position(44, -0.3, 1, 88, -0.4)),
+                                None,
+                                Some(serde_json::json!({"connector": "x"})),
+                            ),
+                        ],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 7, "test-model", &ipc).await;
+    let stream = llm.generate(sample_generate_request("req-collect", 4)).await.unwrap();
+    let internal_id = stream.request_id().to_string();
+    let collected = stream.collect_output().await.unwrap();
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+
+    assert_eq!(collected.request_id, internal_id);
+    assert_eq!(collected.prompt_token_ids, vec![11, 22]);
+    assert_eq!(collected.token_ids, vec![33, 44]);
+    assert_eq!(collected.finish_reason, FinishReason::stop_eos());
+    assert_eq!(collected.prompt_logprobs, Some(prompt_logprobs()));
+    assert_eq!(
+        collected.logprobs.as_ref().map(|lp| lp.positions.len()),
+        Some(2)
+    );
+    assert_eq!(
+        collected.kv_transfer_params,
+        Some(serde_json::json!({"connector": "x"}))
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn generate_propagates_unexpected_close_errors() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-close".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        finished_requests: Some(BTreeSet::from([request.request_id])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 0, "test-model", &ipc).await;
+    let mut stream = llm.generate(sample_generate_request("req-close", 1)).await.unwrap();
+    let internal_id = stream.request_id().to_string();
+
+    let error = stream.next().await.unwrap().unwrap_err();
+    assert!(matches!(
+        error,
+        Error::EngineCoreClient(vllm_engine_core_client::Error::RequestStreamClosed {
+            request_id
+        }) if request_id == internal_id
+    ));
+    assert!(stream.next().await.is_none());
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    llm.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn dropping_a_live_generate_stream_triggers_abort() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-drop".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.external_req_id.as_deref(), Some("req-drop"));
+                assert!(request.request_id.starts_with("req-drop-"));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(&request.request_id, vec![99], None)],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let abort =
+                    timeout(Duration::from_secs(1), recv_engine_message(dealer)).await.unwrap();
+                assert_eq!(abort[0].as_ref(), &[0x01]);
+                let aborted_ids: Vec<String> = rmp_serde::from_slice(&abort[1]).unwrap();
+                assert_eq!(aborted_ids, vec![request.request_id]);
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 0, "test-model", &ipc).await;
+    let mut stream = llm.generate(sample_generate_request("req-drop", 4)).await.unwrap();
+
+    let output = stream.next().await.unwrap().unwrap();
+    assert_eq!(output.token_ids, vec![99]);
+    drop(stream);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    llm.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn duplicate_external_request_ids_are_randomized_before_reaching_engine_core_client() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-dup".to_vec();
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add_1 = recv_engine_message(dealer).await;
+                assert_eq!(add_1[0].as_ref(), &[0x00]);
+                let request_1: EngineCoreRequest = rmp_serde::from_slice(&add_1[1]).unwrap();
+                assert_eq!(request_1.external_req_id.as_deref(), Some("req-dup"));
+                assert!(request_1.request_id.starts_with("req-dup-"));
+
+                let add_2 = recv_engine_message(dealer).await;
+                assert_eq!(add_2[0].as_ref(), &[0x00]);
+                let request_2: EngineCoreRequest = rmp_serde::from_slice(&add_2[1]).unwrap();
+                assert_eq!(request_2.external_req_id.as_deref(), Some("req-dup"));
+                assert!(request_2.request_id.starts_with("req-dup-"));
+                assert_ne!(request_1.request_id, request_2.request_id);
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            &request_1.request_id,
+                            vec![],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request_1.request_id.clone()])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        outputs: vec![request_output(
+                            &request_2.request_id,
+                            vec![],
+                            Some(EngineCoreFinishReason::Length),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request_2.request_id])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 0, "test-model", &ipc).await;
+    let stream_1 = llm.generate(sample_generate_request("req-dup", 1)).await.unwrap();
+    let stream_2 = llm.generate(sample_generate_request("req-dup", 1)).await.unwrap();
+    let internal_id_1 = stream_1.request_id().to_string();
+    let internal_id_2 = stream_2.request_id().to_string();
+    let collected_1 = stream_1.collect_output().await.unwrap();
+    let collected_2 = stream_2.collect_output().await.unwrap();
+    assert_eq!(collected_1.request_id, internal_id_1);
+    assert_eq!(collected_2.request_id, internal_id_2);
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    llm.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn generate_records_request_metrics_in_prometheus_output() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-metrics".to_vec();
+    let model_name = request_metrics_model_name("metrics-model");
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 4,
+                        timestamp: 10.0,
+                        outputs: vec![EngineCoreOutput {
+                            prefill_stats: Some(PrefillStats {
+                                num_prompt_tokens: 2,
+                                num_computed_tokens: 2,
+                                ..Default::default()
+                            }),
+                            ..request_output_with_events(
+                                &request.request_id,
+                                vec![1],
+                                None,
+                                Some(vec![
+                                    EngineCoreEvent {
+                                        r#type: EngineCoreEventType::Queued,
+                                        timestamp: 8.0,
+                                    },
+                                    EngineCoreEvent {
+                                        r#type: EngineCoreEventType::Scheduled,
+                                        timestamp: 9.0,
+                                    },
+                                ]),
+                            )
+                        }],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 4,
+                        timestamp: 11.5,
+                        outputs: vec![request_output_with_events(
+                            &request.request_id,
+                            vec![2, 3],
+                            Some(EngineCoreFinishReason::Length),
+                            Some(vec![EngineCoreEvent {
+                                r#type: EngineCoreEventType::Preempted,
+                                timestamp: 10.5,
+                            }]),
+                        )],
+                        finished_requests: Some(BTreeSet::from([request.request_id])),
+                        ..Default::default()
+                    },
+                )
+                .await;
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 0, &model_name, &ipc).await;
+    let mut request = sample_generate_request("req-metrics", 8);
+    request.arrival_time = None;
+    let mut stream = llm.generate(request).await.unwrap();
+
+    assert_eq!(stream.next().await.unwrap().unwrap().token_ids, vec![1]);
+    let final_output = stream.next().await.unwrap().unwrap();
+    assert_eq!(final_output.token_ids, vec![2, 3]);
+    assert_eq!(final_output.finish_reason, Some(FinishReason::Length));
+    assert!(stream.next().await.is_none());
+
+    let rendered = METRICS.render().unwrap();
+    assert!(rendered.contains(&format!(
+        "vllm:request_success_total{{model_name=\"{model_name}\",engine=\"4\",finished_reason=\"length\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:prompt_tokens_total{{model_name=\"{model_name}\",engine=\"4\"}} 2"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:prompt_tokens_by_source_total{{model_name=\"{model_name}\",engine=\"4\",source=\"local_compute\"}} 2"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:prompt_tokens_by_source_total{{model_name=\"{model_name}\",engine=\"4\",source=\"local_cache_hit\"}} 0"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:prompt_tokens_by_source_total{{model_name=\"{model_name}\",engine=\"4\",source=\"external_kv_transfer\"}} 0"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:prompt_tokens_cached_total{{model_name=\"{model_name}\",engine=\"4\"}} 0"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:generation_tokens_total{{model_name=\"{model_name}\",engine=\"4\"}} 3"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:num_preemptions_total{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:time_to_first_token_seconds_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:inter_token_latency_seconds_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:e2e_request_latency_seconds_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:request_prompt_tokens_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:request_generation_tokens_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:request_prefill_kv_computed_tokens_count{{model_name=\"{model_name}\",engine=\"4\"}} 1"
+    )));
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    llm.shutdown().await.unwrap();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn dropping_stream_records_abort_terminal_request_metrics() {
+    let ipc = IpcNamespace::new().unwrap();
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-metrics-drop".to_vec();
+    let model_name = request_metrics_model_name("metrics-drop-model");
+
+    let (shutdown_tx, engine_task) = spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                assert_eq!(add[0].as_ref(), &[0x00]);
+                let request: EngineCoreRequest = rmp_serde::from_slice(&add[1]).unwrap();
+                assert_eq!(request.external_req_id.as_deref(), Some("req-metrics-drop"));
+                assert!(request.request_id.starts_with("req-metrics-drop-"));
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 5,
+                        timestamp: 10.0,
+                        outputs: vec![request_output_with_events(
+                            &request.request_id,
+                            vec![99],
+                            None,
+                            Some(vec![
+                                EngineCoreEvent {
+                                    r#type: EngineCoreEventType::Queued,
+                                    timestamp: 8.0,
+                                },
+                                EngineCoreEvent {
+                                    r#type: EngineCoreEventType::Scheduled,
+                                    timestamp: 9.0,
+                                },
+                            ]),
+                        )],
+                        ..Default::default()
+                    },
+                )
+                .await;
+
+                let abort =
+                    timeout(Duration::from_secs(1), recv_engine_message(dealer)).await.unwrap();
+                assert_eq!(abort[0].as_ref(), &[0x01]);
+                let aborted_ids: Vec<String> = rmp_serde::from_slice(&abort[1]).unwrap();
+                assert_eq!(aborted_ids, vec![request.request_id]);
+            })
+        },
+    );
+
+    let llm = connect_async_llm_with_ipc(handshake_address, 0, &model_name, &ipc).await;
+    let mut request = sample_generate_request("req-metrics-drop", 8);
+    request.arrival_time = None;
+    let mut stream = llm.generate(request).await.unwrap();
+    assert_eq!(stream.next().await.unwrap().unwrap().token_ids, vec![99]);
+    drop(stream);
+
+    let _ = shutdown_tx.send(());
+    engine_task.await.unwrap();
+    let rendered = METRICS.render().unwrap();
+    assert!(rendered.contains(&format!(
+        "vllm:request_success_total{{model_name=\"{model_name}\",engine=\"5\",finished_reason=\"abort\"}} 1"
+    )));
+    assert!(rendered.contains(&format!(
+        "vllm:e2e_request_latency_seconds_count{{model_name=\"{model_name}\",engine=\"5\"}} 1"
+    )));
+
+    llm.shutdown().await.unwrap();
+}
diff --git a/rust/src/managed-engine/Cargo.toml b/rust/src/managed-engine/Cargo.toml
new file mode 100644
index 000000000000..2bc108b2c530
--- /dev/null
+++ b/rust/src/managed-engine/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "vllm-managed-engine"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+clap.workspace = true
+libc.workspace = true
+tokio = { workspace = true, features = ["process"] }
+tracing.workspace = true
+
+[dev-dependencies]
+expect-test.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/managed-engine/src/cli.rs b/rust/src/managed-engine/src/cli.rs
new file mode 100644
index 000000000000..302737dbd885
--- /dev/null
+++ b/rust/src/managed-engine/src/cli.rs
@@ -0,0 +1,349 @@
+use std::collections::HashSet;
+use std::ffi::OsString;
+
+use clap::error::ErrorKind;
+use clap::{Args, CommandFactory};
+
+use crate::{ManagedEngineConfig, allocate_handshake_port};
+
+/// Managed Python headless-engine CLI arguments.
+#[derive(Debug, Clone, Args, PartialEq, Eq)]
+pub struct ManagedEngineArgs {
+    /// Python executable used to launch the managed headless vLLM engine.
+    #[arg(long, env = "VLLM_RS_PYTHON", default_value = "python3")]
+    pub python: String,
+    /// Host/IP used both for the managed-engine handshake endpoint and the
+    /// frontend-advertised input/output ZMQ socket addresses.
+    #[arg(
+        long = "data-parallel-address",
+        visible_alias = "handshake-host",
+        default_value = "127.0.0.1"
+    )]
+    pub handshake_host: String,
+    /// Optional TCP port for the managed-engine handshake / data-parallel RPC
+    /// endpoint.
+    ///
+    /// When omitted, the CLI allocates an ephemeral port automatically.
+    #[arg(
+        long = "data-parallel-rpc-port",
+        visible_alias = "handshake-port",
+        value_parser = clap::value_parser!(u16).range(1..)
+    )]
+    pub handshake_port: Option<u16>,
+    /// Number of data parallel replicas across the whole deployment.
+    #[arg(long, default_value_t = 1)]
+    pub data_parallel_size: usize,
+    /// Number of data parallel replicas to run on this node.
+    #[arg(long)]
+    pub data_parallel_size_local: Option<usize>,
+
+    /// Additional arguments forwarded to `python -m vllm.entrypoints.cli.main
+    /// serve ...`.
+    ///
+    /// Arguments after an explicit `--` are forwarded verbatim. Before `--`,
+    /// `vllm-rs serve` automatically keeps recognized frontend options on
+    /// the Rust side and forwards everything else to Python.
+    #[arg(
+        last = true,
+        allow_hyphen_values = true,
+        help_heading = "Passthrough arguments"
+    )]
+    pub python_args: Vec<String>,
+}
+
+impl ManagedEngineArgs {
+    /// Build the handshake address shared by the Rust frontend and managed
+    /// Python engine.
+    pub fn handshake_address(&self, handshake_port: u16) -> String {
+        format!("tcp://{}:{}", self.handshake_host, handshake_port)
+    }
+
+    /// Resolve the handshake port, either from the CLI argument (if specified)
+    /// or by allocating a fresh port.
+    pub fn resolve_handshake_port(&self) -> anyhow::Result<u16> {
+        self.handshake_port
+            .map(Ok)
+            .unwrap_or_else(|| allocate_handshake_port(&self.handshake_host))
+    }
+
+    /// Build the managed Python-engine spawn configuration.
+    pub fn into_config(
+        self,
+        model: String,
+        max_model_len: Option<u32>,
+        handshake_port: u16,
+    ) -> ManagedEngineConfig {
+        let mut python_args = self.python_args;
+        // Manually forward some args to the Python engine.
+        if let Some(max_model_len) = max_model_len {
+            python_args.push("--max-model-len".to_string());
+            python_args.push(max_model_len.to_string());
+        }
+        if let Some(data_parallel_size_local) = self.data_parallel_size_local {
+            python_args.push("--data-parallel-size-local".to_string());
+            python_args.push(data_parallel_size_local.to_string());
+        }
+
+        ManagedEngineConfig {
+            python: self.python,
+            model,
+            handshake_host: self.handshake_host,
+            handshake_port,
+            data_parallel_size: self.data_parallel_size,
+            python_args,
+        }
+    }
+
+    /// Return the number of engines that the Rust frontend should expect to
+    /// coordinate with.
+    fn local_engine_count(&self) -> usize {
+        self.data_parallel_size_local.unwrap_or(self.data_parallel_size)
+    }
+
+    /// Return whether the managed Rust frontend only needs to communicate with
+    /// colocated engines.
+    pub fn frontend_local_only(&self) -> bool {
+        self.data_parallel_size_local != Some(0)
+            && self.local_engine_count() == self.data_parallel_size
+    }
+}
+
+/// Python `argparse` accepts these multi-character single-dash aliases, but
+/// `clap` cannot model them directly.
+const PYTHON_MULTI_CHAR_ALIASES: &[(&str, &str)] = &[
+    ("-asc", "--api-server-count"),
+    ("-pp", "--pipeline-parallel-size"),
+    ("-tp", "--tensor-parallel-size"),
+    ("-dcp", "--decode-context-parallel-size"),
+    ("-pcp", "--prefill-context-parallel-size"),
+    ("-dp", "--data-parallel-size"),
+    ("-dpn", "--data-parallel-rank"),
+    ("-dpr", "--data-parallel-start-rank"),
+    ("-dpl", "--data-parallel-size-local"),
+    ("-dpa", "--data-parallel-address"),
+    ("-dpp", "--data-parallel-rpc-port"),
+    ("-dpb", "--data-parallel-backend"),
+    ("-dph", "--data-parallel-hybrid-lb"),
+    ("-dpe", "--data-parallel-external-lb"),
+    ("-ep", "--enable-expert-parallel"),
+    ("-cc", "--compilation-config"),
+    ("-ac", "--attention-config"),
+];
+
+/// Repartition managed-engine argv so Rust-owned flags stay before `--`, while
+/// everything else is forwarded to Python.
+pub fn repartition_managed_engine_args<C>(
+    args: &[OsString],
+    subcommand: Option<&str>,
+) -> Result<Vec<OsString>, clap::Error>
+where
+    C: CommandFactory,
+{
+    let command = C::command();
+    let (prefix, real_args, command) = match subcommand {
+        Some(subcommand) => {
+            if !matches_subcommand(args, subcommand) {
+                return Ok(args.to_vec());
+            };
+
+            let subcommand = command
+                .find_subcommand(subcommand)
+                .expect("managed-engine subcommand should exist");
+
+            (args[..2].to_vec(), &args[2..], subcommand)
+        }
+        None => {
+            let Some(program) = args.first() else {
+                return Ok(args.to_vec());
+            };
+
+            (vec![program.clone()], &args[1..], &command)
+        }
+    };
+
+    let mut repartitioned = prefix;
+    repartitioned.extend(repartition_real_managed_engine_args(real_args, command)?);
+    Ok(repartitioned)
+}
+
+fn repartition_real_managed_engine_args(
+    args: &[OsString],
+    command: &clap::Command,
+) -> Result<Vec<OsString>, clap::Error> {
+    let Some(model) = args.first() else {
+        return Ok(args.to_vec());
+    };
+
+    let model = model.to_string_lossy();
+    if is_help_flag(&model) {
+        return Ok(args.to_vec());
+    }
+    if model == "--" || is_option_like(&model) {
+        return Err(build_missing_model_error(command));
+    }
+
+    let (long_flags, short_flags) = collect_option_names(command);
+    let (front_args, explicit_passthrough, had_separator) = split_managed_engine_args(&args[1..]);
+    let normalized_front_args = normalize_python_arg_aliases(front_args);
+
+    let mut frontend_chunks = Vec::new();
+    let mut python_chunks = Vec::new();
+    let mut current_chunk = Vec::new();
+
+    for arg in normalized_front_args {
+        let text = arg.to_string_lossy();
+        if is_option_like(&text) && !current_chunk.is_empty() {
+            push_chunk(
+                &mut frontend_chunks,
+                &mut python_chunks,
+                std::mem::take(&mut current_chunk),
+                &long_flags,
+                &short_flags,
+            );
+        }
+        current_chunk.push(arg);
+    }
+    if !current_chunk.is_empty() {
+        push_chunk(
+            &mut frontend_chunks,
+            &mut python_chunks,
+            current_chunk,
+            &long_flags,
+            &short_flags,
+        );
+    }
+
+    let mut repartitioned = vec![args[0].clone()];
+    repartitioned.extend(frontend_chunks);
+    if had_separator || !python_chunks.is_empty() || !explicit_passthrough.is_empty() {
+        repartitioned.push("--".into());
+        repartitioned.extend(python_chunks);
+        repartitioned.extend(explicit_passthrough.iter().cloned());
+    }
+
+    Ok(repartitioned)
+}
+
+fn matches_subcommand(args: &[OsString], subcommand: &str) -> bool {
+    args.get(1)
+        .and_then(|arg| arg.to_str())
+        .is_some_and(|candidate| candidate == subcommand)
+}
+
+fn split_managed_engine_args(args: &[OsString]) -> (&[OsString], &[OsString], bool) {
+    if let Some(index) = args.iter().position(|arg| arg == "--") {
+        (&args[..index], &args[index + 1..], true)
+    } else {
+        (args, &[], false)
+    }
+}
+
+fn normalize_python_arg_aliases(args: &[OsString]) -> Vec<OsString> {
+    args.iter()
+        .map(|arg| {
+            let text = arg.to_string_lossy();
+            normalize_python_multi_char_alias(&text)
+                .map(Into::into)
+                .unwrap_or_else(|| arg.clone())
+        })
+        .collect()
+}
+
+fn normalize_python_multi_char_alias(arg: &str) -> Option<String> {
+    find_python_multi_char_alias(arg).map(|canonical| match arg.split_once('=') {
+        Some((_, value)) => format!("{canonical}={value}"),
+        None => canonical.to_string(),
+    })
+}
+
+fn find_python_multi_char_alias(arg: &str) -> Option<&'static str> {
+    PYTHON_MULTI_CHAR_ALIASES.iter().find_map(|&(alias, canonical)| {
+        (arg == alias || arg.starts_with(&format!("{alias}="))).then_some(canonical)
+    })
+}
+
+fn push_chunk(
+    frontend_chunks: &mut Vec<OsString>,
+    python_chunks: &mut Vec<OsString>,
+    chunk: Vec<OsString>,
+    long_flags: &HashSet<String>,
+    short_flags: &HashSet<char>,
+) {
+    if chunk_head_is_frontend_owned(&chunk, long_flags, short_flags) {
+        frontend_chunks.extend(chunk);
+    } else {
+        python_chunks.extend(chunk);
+    }
+}
+
+fn chunk_head_is_frontend_owned(
+    chunk: &[OsString],
+    long_flags: &HashSet<String>,
+    short_flags: &HashSet<char>,
+) -> bool {
+    let Some(head) = chunk.first() else {
+        return false;
+    };
+    let head = head.to_string_lossy();
+
+    if let Some(rest) = head.strip_prefix("--") {
+        let name = rest.split_once('=').map_or(rest, |(name, _)| name);
+        return long_flags.contains(name);
+    }
+
+    let Some(rest) = head.strip_prefix('-') else {
+        return false;
+    };
+    let Some(short) = rest.chars().next() else {
+        return false;
+    };
+    short_flags.contains(&short)
+}
+
+fn collect_option_names(command: &clap::Command) -> (HashSet<String>, HashSet<char>) {
+    let mut long_flags = HashSet::new();
+    let mut short_flags = HashSet::new();
+    for arg in command.get_arguments() {
+        if let Some(names) = arg.get_long_and_visible_aliases() {
+            long_flags.extend(names.into_iter().map(str::to_owned));
+        }
+        if let Some(short) = arg.get_short() {
+            short_flags.insert(short);
+        }
+        if let Some(short_aliases) = arg.get_visible_short_aliases() {
+            short_flags.extend(short_aliases);
+        }
+    }
+
+    long_flags.insert("help".to_string());
+    short_flags.insert('h');
+
+    (long_flags, short_flags)
+}
+
+fn is_option_like(arg: &str) -> bool {
+    if arg == "--" {
+        return false;
+    }
+
+    if let Some(rest) = arg.strip_prefix("--") {
+        return rest.chars().next().is_some_and(char::is_alphabetic);
+    }
+
+    if let Some(rest) = arg.strip_prefix('-') {
+        return rest.chars().next().is_some_and(char::is_alphabetic);
+    }
+
+    false
+}
+
+fn is_help_flag(arg: &str) -> bool {
+    arg == "-h" || arg == "--help"
+}
+
+fn build_missing_model_error(command: &clap::Command) -> clap::Error {
+    command.clone().error(
+        ErrorKind::MissingRequiredArgument,
+        "the model must appear immediately after the command",
+    )
+}
diff --git a/rust/src/managed-engine/src/lib.rs b/rust/src/managed-engine/src/lib.rs
new file mode 100644
index 000000000000..e9812104cb4c
--- /dev/null
+++ b/rust/src/managed-engine/src/lib.rs
@@ -0,0 +1,4 @@
+pub mod cli;
+mod process;
+
+pub use process::{ManagedEngineConfig, ManagedEngineHandle, allocate_handshake_port};
diff --git a/rust/src/managed-engine/src/process.rs b/rust/src/managed-engine/src/process.rs
new file mode 100644
index 000000000000..0a506244dc3f
--- /dev/null
+++ b/rust/src/managed-engine/src/process.rs
@@ -0,0 +1,263 @@
+use std::io;
+use std::net::TcpListener;
+use std::process::{Command as StdCommand, ExitStatus, Stdio};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use tokio::process::{Child, Command};
+use tokio::sync::Mutex;
+use tokio::time::interval;
+use tracing::info;
+
+const CHILD_POLL_INTERVAL: Duration = Duration::from_millis(200);
+const MIN_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
+
+/// Allocate one ephemeral TCP port for the managed headless-engine handshake on
+/// the given host.
+pub fn allocate_handshake_port(host: &str) -> Result<u16> {
+    let listener = TcpListener::bind((host, 0)).context("failed to allocate handshake port")?;
+    let port = listener
+        .local_addr()
+        .context("failed to inspect allocated handshake listener address")?
+        .port();
+    Ok(port)
+}
+
+/// Spawn configuration for one managed headless Python vLLM engine.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ManagedEngineConfig {
+    /// Python executable used to launch `vllm.entrypoints.cli.main`.
+    pub python: String,
+    /// Model identifier passed to `vllm ... serve <model>`.
+    pub model: String,
+    /// Host portion of the headless-engine handshake endpoint.
+    pub handshake_host: String,
+    /// Port portion of the headless-engine handshake endpoint.
+    pub handshake_port: u16,
+    /// Number of data parallel replicas across the whole deployment.
+    ///
+    /// The per-node replica count is forwarded separately in `python_args` as
+    /// `--data-parallel-size-local`.
+    pub data_parallel_size: usize,
+    /// Extra CLI arguments forwarded verbatim to Python vLLM.
+    pub python_args: Vec<String>,
+}
+
+impl ManagedEngineConfig {
+    /// Render the handshake address that the Rust frontend should dial.
+    pub fn handshake_address(&self) -> String {
+        format!("tcp://{}:{}", self.handshake_host, self.handshake_port)
+    }
+
+    /// Build the concrete Python command line for the managed headless engine.
+    pub fn to_command(&self) -> StdCommand {
+        let mut command = StdCommand::new(&self.python);
+        command
+            .arg("-m")
+            .arg("vllm.entrypoints.cli.main")
+            .arg("serve")
+            .arg(&self.model)
+            .arg("--headless")
+            .arg("--data-parallel-address")
+            .arg(&self.handshake_host)
+            .arg("--data-parallel-rpc-port")
+            .arg(self.handshake_port.to_string())
+            .arg("--data-parallel-size")
+            .arg(self.data_parallel_size.to_string())
+            .args(&self.python_args);
+        command
+    }
+}
+
+/// RAII-style handle for one managed Python headless engine subprocess.
+#[derive(Clone)]
+pub struct ManagedEngineHandle {
+    child: Arc<Mutex<Child>>,
+    shutdown_started: Arc<AtomicBool>,
+}
+
+impl ManagedEngineHandle {
+    /// Spawn one managed Python headless engine and return a handle for
+    /// monitoring it.
+    pub async fn spawn(config: ManagedEngineConfig) -> Result<Self> {
+        let command = config.to_command();
+        info!(
+            handshake_address = %config.handshake_address(),
+            ?command,
+            "starting managed Python headless engine"
+        );
+
+        let mut command = Command::from(command);
+        command.stdin(Stdio::null()).stdout(Stdio::inherit()).stderr(Stdio::inherit());
+
+        process_group::configure(&mut command);
+
+        let child = command.spawn().context("failed to spawn managed engine")?;
+
+        Ok(Self {
+            child: Arc::new(Mutex::new(child)),
+            shutdown_started: Arc::new(AtomicBool::new(false)),
+        })
+    }
+
+    /// Poll whether the managed engine has exited yet.
+    pub async fn try_wait(&self) -> Option<ExitStatus> {
+        let mut child = self.child.lock().await;
+        child.try_wait().expect("failed to poll the status of managed engine")
+    }
+
+    /// Wait until the managed engine exits.
+    pub async fn wait_for_exit(&self) -> ExitStatus {
+        let mut interval = interval(CHILD_POLL_INTERVAL);
+        loop {
+            interval.tick().await;
+            if let Some(status) = self.try_wait().await {
+                return status;
+            }
+        }
+    }
+
+    /// Terminate the managed engine process group and wait for it to stop.
+    pub async fn shutdown(&self, timeout: Duration) -> Result<()> {
+        if self.shutdown_started.swap(true, Ordering::SeqCst) {
+            return Ok(());
+        }
+
+        let Some(pid) = self.child.lock().await.id() else {
+            return Ok(());
+        };
+
+        // Enforce a minimum shutdown timeout to give the engine process enough time to
+        // clean up.
+        let shutdown_timeout = std::cmp::max(timeout, MIN_SHUTDOWN_TIMEOUT);
+
+        // First, try to gracefully terminate.
+        info!(
+            pid,
+            ?shutdown_timeout,
+            "shutting down managed engine with SIGTERM"
+        );
+        process_group::terminate(pid)?;
+
+        // Wait for the process to exit on its own.
+        if tokio::time::timeout(shutdown_timeout, self.wait_for_exit()).await.is_ok() {
+            return Ok(());
+        }
+
+        // If it doesn't exit within the timeout, force kill it.
+        info!(
+            pid,
+            "managed engine did not exit within timeout, sending SIGKILL"
+        );
+        process_group::kill(pid)?;
+
+        let _ = self.wait_for_exit().await;
+        Ok(())
+    }
+}
+
+/// Process group helper functions for managing the Python subprocess and its
+/// potential children in a platform-aware way.
+mod process_group {
+    use super::*;
+
+    /// Place the Python child into its own process group so `serve` can tear
+    /// down the whole subtree rather than just the immediate shell process.
+    pub fn configure(command: &mut Command) {
+        unsafe {
+            command.pre_exec(|| {
+                if libc::setpgid(0, 0) != 0 {
+                    return Err(io::Error::last_os_error());
+                }
+                Ok(())
+            });
+        }
+    }
+
+    /// Send SIGTERM to the managed Python process group.
+    pub fn terminate(pid: u32) -> Result<()> {
+        signal(pid, libc::SIGTERM)
+    }
+
+    /// Send SIGKILL to the managed Python process group.
+    pub fn kill(pid: u32) -> Result<()> {
+        signal(pid, libc::SIGKILL)
+    }
+
+    /// Deliver one signal to the managed Python process group.
+    fn signal(pid: u32, signal: i32) -> Result<()> {
+        let rc = unsafe { libc::kill(-(pid as i32), signal) };
+        if rc == 0 {
+            return Ok(());
+        }
+
+        let error = io::Error::last_os_error();
+        if matches!(error.raw_os_error(), Some(code) if code == libc::ESRCH) {
+            return Ok(());
+        }
+        Err(error).context("failed to signal managed engine process group")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+
+    use super::{ManagedEngineConfig, allocate_handshake_port};
+
+    #[test]
+    fn command_snapshot() {
+        let config = ManagedEngineConfig {
+            python: "python3".to_string(),
+            model: "Qwen/Qwen3-0.6B".to_string(),
+            handshake_host: "127.0.0.1".to_string(),
+            handshake_port: 62100,
+            data_parallel_size: 4,
+            python_args: vec![
+                "--data-parallel-size-local".to_string(),
+                "2".to_string(),
+                "--data-parallel-start-rank".to_string(),
+                "2".to_string(),
+                "--dtype".to_string(),
+                "float16".to_string(),
+                "--max-model-len".to_string(),
+                "512".to_string(),
+            ],
+        };
+        let command = config.to_command();
+        let args = command.get_args().collect::<Vec<_>>();
+
+        expect![[r#"
+            [
+                "-m",
+                "vllm.entrypoints.cli.main",
+                "serve",
+                "Qwen/Qwen3-0.6B",
+                "--headless",
+                "--data-parallel-address",
+                "127.0.0.1",
+                "--data-parallel-rpc-port",
+                "62100",
+                "--data-parallel-size",
+                "4",
+                "--data-parallel-size-local",
+                "2",
+                "--data-parallel-start-rank",
+                "2",
+                "--dtype",
+                "float16",
+                "--max-model-len",
+                "512",
+            ]
+        "#]]
+        .assert_debug_eq(&args);
+    }
+
+    #[test]
+    fn allocate_handshake_port_returns_non_zero_port() {
+        let port = allocate_handshake_port("127.0.0.1").unwrap();
+        assert_ne!(port, 0);
+    }
+}
diff --git a/rust/src/metrics/Cargo.toml b/rust/src/metrics/Cargo.toml
new file mode 100644
index 000000000000..e6b579b97a47
--- /dev/null
+++ b/rust/src/metrics/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "vllm-metrics"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+prometheus-client.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/metrics/src/api_server.rs b/rust/src/metrics/src/api_server.rs
new file mode 100644
index 000000000000..5ef6600a385d
--- /dev/null
+++ b/rust/src/metrics/src/api_server.rs
@@ -0,0 +1,78 @@
+use prometheus_client::encoding::EncodeLabelSet;
+use prometheus_client::metrics::family::Family;
+use prometheus_client::metrics::histogram::Histogram;
+use prometheus_client::registry::Registry;
+
+use crate::U64Counter;
+
+const HTTP_REQUEST_DURATION_BUCKETS: [f64; 3] = [0.1, 0.5, 1.0];
+const HTTP_REQUEST_DURATION_HIGHR_BUCKETS: [f64; 21] = [
+    0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0,
+    7.5, 10.0, 30.0, 60.0,
+];
+
+fn http_request_duration_histogram() -> Histogram {
+    Histogram::new(HTTP_REQUEST_DURATION_BUCKETS.iter().copied())
+}
+
+fn http_request_duration_highr_histogram() -> Histogram {
+    Histogram::new(HTTP_REQUEST_DURATION_HIGHR_BUCKETS.iter().copied())
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct HttpRequestLabels {
+    pub method: String,
+    pub status: &'static str,
+    pub handler: String,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct HttpHandlerLabels {
+    pub method: String,
+    pub handler: String,
+}
+
+pub(crate) type HttpRequestCounterFamily = Family<HttpRequestLabels, U64Counter>;
+pub(crate) type HttpHandlerHistogramFamily =
+    Family<HttpHandlerLabels, Histogram, fn() -> Histogram>;
+
+/// API-server Prometheus families exported from the HTTP middleware layer.
+pub struct ApiServerMetrics {
+    pub http_requests: HttpRequestCounterFamily,
+    pub http_request_duration_seconds: HttpHandlerHistogramFamily,
+    pub http_request_duration_highr_seconds: Histogram,
+}
+
+impl ApiServerMetrics {
+    /// Register the API-server metric families into the shared registry.
+    pub(crate) fn register(registry: &mut Registry) -> Self {
+        let http_requests = HttpRequestCounterFamily::default();
+        registry.register(
+            "http_requests",
+            "Total number of HTTP requests by method, status, and handler.",
+            http_requests.clone(),
+        );
+
+        let http_request_duration_seconds = HttpHandlerHistogramFamily::new_with_constructor(
+            http_request_duration_histogram as fn() -> Histogram,
+        );
+        registry.register(
+            "http_request_duration_seconds",
+            "Duration of HTTP requests in seconds grouped by method and handler.",
+            http_request_duration_seconds.clone(),
+        );
+
+        let http_request_duration_highr_seconds = http_request_duration_highr_histogram();
+        registry.register(
+            "http_request_duration_highr_seconds",
+            "High-resolution duration of HTTP requests in seconds.",
+            http_request_duration_highr_seconds.clone(),
+        );
+
+        Self {
+            http_requests,
+            http_request_duration_seconds,
+            http_request_duration_highr_seconds,
+        }
+    }
+}
diff --git a/rust/src/metrics/src/lib.rs b/rust/src/metrics/src/lib.rs
new file mode 100644
index 000000000000..8f0db53d3ff9
--- /dev/null
+++ b/rust/src/metrics/src/lib.rs
@@ -0,0 +1,76 @@
+use std::fmt;
+use std::sync::LazyLock;
+use std::sync::atomic::AtomicU64;
+
+use prometheus_client::encoding::text::encode;
+use prometheus_client::metrics::counter::Counter;
+use prometheus_client::metrics::family::Family;
+use prometheus_client::metrics::gauge::Gauge;
+use prometheus_client::metrics::histogram::Histogram;
+use prometheus_client::registry::Registry;
+
+mod api_server;
+mod request;
+mod scheduler;
+
+pub use api_server::*;
+pub use request::*;
+pub use scheduler::*;
+
+// Note: `prometheus-client` appends the `_total` suffix automatically when
+// encoding counters, so all counter family registration names in this crate
+// must use the base metric name without a trailing `_total`.
+pub type U64Counter = Counter<u64, AtomicU64>;
+pub type U64Gauge = Gauge<u64, AtomicU64>;
+pub type F64Gauge = Gauge<f64, AtomicU64>;
+pub(crate) type HistogramFamily = Family<EngineLabels, Histogram, fn() -> Histogram>;
+
+/// Shared Prometheus registry for frontend metrics.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/v1/metrics/loggers.py#L389-L1004>
+pub struct Metrics {
+    registry: Registry,
+    pub scheduler: SchedulerMetrics,
+    pub request: RequestMetrics,
+    pub api_server: ApiServerMetrics,
+}
+
+impl Metrics {
+    /// Construct a new metrics registry.
+    pub fn new() -> Self {
+        let mut registry = Registry::default();
+        let scheduler = SchedulerMetrics::register(&mut registry);
+        let request = RequestMetrics::register(&mut registry);
+        let api_server = ApiServerMetrics::register(&mut registry);
+
+        Self {
+            registry,
+            scheduler,
+            request,
+            api_server,
+        }
+    }
+
+    /// Render the current metrics registry into Prometheus/OpenMetrics text
+    /// format.
+    pub fn render(&self) -> Result<String, fmt::Error> {
+        let mut output = String::new();
+        encode(&mut output, &self.registry)?;
+        Ok(output)
+    }
+
+    /// Return the registry owned by this metrics object.
+    pub fn registry(&self) -> &Registry {
+        &self.registry
+    }
+}
+
+impl Default for Metrics {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Process-global metrics registry shared by the frontend crates.
+pub static METRICS: LazyLock<Metrics> = LazyLock::new(Metrics::new);
diff --git a/rust/src/metrics/src/request.rs b/rust/src/metrics/src/request.rs
new file mode 100644
index 000000000000..421ff3034910
--- /dev/null
+++ b/rust/src/metrics/src/request.rs
@@ -0,0 +1,298 @@
+use prometheus_client::encoding::EncodeLabelSet;
+use prometheus_client::metrics::family::Family;
+use prometheus_client::metrics::histogram::Histogram;
+use prometheus_client::registry::Registry;
+
+use crate::{EngineLabels, HistogramFamily, U64Counter};
+
+const TTFT_BUCKETS: [f64; 22] = [
+    0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0,
+    20.0, 40.0, 80.0, 160.0, 640.0, 2560.0,
+];
+const ITL_BUCKETS: [f64; 19] = [
+    0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0,
+    40.0, 80.0,
+];
+const REQUEST_LATENCY_BUCKETS: [f64; 21] = [
+    0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0, 60.0, 120.0, 240.0,
+    480.0, 960.0, 1920.0, 7680.0,
+];
+const REQUEST_PARAMS_N_BUCKETS: [f64; 5] = [1.0, 2.0, 5.0, 10.0, 20.0];
+
+fn build_1_2_5_buckets(max_value: u32) -> Vec<f64> {
+    let mut buckets = Vec::new();
+    let mut exponent = 0;
+    loop {
+        for mantissa in [1_u32, 2, 5] {
+            let value = mantissa * 10_u32.pow(exponent);
+            if value <= max_value {
+                buckets.push(value as f64);
+            } else {
+                if buckets.last().copied() != Some(max_value as f64) {
+                    buckets.push(max_value as f64);
+                }
+                return buckets;
+            }
+        }
+        exponent += 1;
+    }
+}
+
+fn time_to_first_token_histogram() -> Histogram {
+    Histogram::new(TTFT_BUCKETS.iter().copied())
+}
+
+fn inter_token_latency_histogram() -> Histogram {
+    Histogram::new(ITL_BUCKETS.iter().copied())
+}
+
+fn request_time_per_output_token_histogram() -> Histogram {
+    Histogram::new(ITL_BUCKETS.iter().copied())
+}
+
+fn request_latency_histogram() -> Histogram {
+    Histogram::new(REQUEST_LATENCY_BUCKETS.iter().copied())
+}
+
+fn request_token_count_histogram() -> Histogram {
+    // TODO: determine max value based on `max_model_len`.
+    Histogram::new(build_1_2_5_buckets(131_072))
+}
+
+fn request_params_n_histogram() -> Histogram {
+    Histogram::new(REQUEST_PARAMS_N_BUCKETS.iter().copied())
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct FinishedReasonLabels {
+    pub model_name: String,
+    pub engine: u32,
+    pub finished_reason: &'static str,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct PromptTokenSourceLabels {
+    pub model_name: String,
+    pub engine: u32,
+    pub source: &'static str,
+}
+
+pub(crate) type FinishedReasonCounterFamily = Family<FinishedReasonLabels, U64Counter>;
+pub(crate) type PromptTokenSourceCounterFamily = Family<PromptTokenSourceLabels, U64Counter>;
+
+/// Request-lifecycle Prometheus families exported from the `llm` layer.
+pub struct RequestMetrics {
+    // Request-derived counters.
+    pub num_preemptions: Family<EngineLabels, U64Counter>,
+    pub prompt_tokens: Family<EngineLabels, U64Counter>,
+    pub prompt_tokens_by_source: PromptTokenSourceCounterFamily,
+    pub prompt_tokens_cached: Family<EngineLabels, U64Counter>,
+    pub generation_tokens: Family<EngineLabels, U64Counter>,
+
+    // We intentionally don't support iteration-level histograms for now, since it seems to make
+    // more sense if the engine maintains these metrics and frontend simply forwards.
+    //
+    // pub iteration_tokens_total: HistogramFamily,
+
+    // Request lifecycle counters and histograms.
+    pub request_success: FinishedReasonCounterFamily,
+    pub request_prompt_tokens: HistogramFamily,
+    pub request_generation_tokens: HistogramFamily,
+    pub request_max_num_generation_tokens: HistogramFamily,
+    pub request_params_max_tokens: HistogramFamily,
+    pub request_params_n: HistogramFamily,
+    pub request_prefill_kv_computed_tokens: HistogramFamily,
+    pub time_to_first_token_seconds: HistogramFamily,
+    pub inter_token_latency_seconds: HistogramFamily,
+    pub e2e_request_latency_seconds: HistogramFamily,
+    pub request_queue_time_seconds: HistogramFamily,
+    pub request_prefill_time_seconds: HistogramFamily,
+    pub request_decode_time_seconds: HistogramFamily,
+    pub request_inference_time_seconds: HistogramFamily,
+    pub request_time_per_output_token_seconds: HistogramFamily,
+}
+
+impl RequestMetrics {
+    /// Register the request-oriented metric families into the shared registry.
+    pub(crate) fn register(registry: &mut Registry) -> Self {
+        // Request-derived counters.
+        let num_preemptions = Family::default();
+        registry.register(
+            "vllm:num_preemptions",
+            "Cumulative number of preemption events.",
+            num_preemptions.clone(),
+        );
+
+        let prompt_tokens = Family::default();
+        registry.register(
+            "vllm:prompt_tokens",
+            "Number of prefill tokens processed.",
+            prompt_tokens.clone(),
+        );
+
+        let prompt_tokens_by_source = Family::default();
+        registry.register(
+            "vllm:prompt_tokens_by_source",
+            "Number of prompt tokens by source.",
+            prompt_tokens_by_source.clone(),
+        );
+
+        let prompt_tokens_cached = Family::default();
+        registry.register(
+            "vllm:prompt_tokens_cached",
+            "Number of prompt tokens with prefix cache hits.",
+            prompt_tokens_cached.clone(),
+        );
+
+        let generation_tokens = Family::default();
+        registry.register(
+            "vllm:generation_tokens",
+            "Number of generation tokens processed.",
+            generation_tokens.clone(),
+        );
+
+        // Request lifecycle counters and histograms.
+        let request_success = Family::default();
+        registry.register(
+            "vllm:request_success",
+            "Count of successfully processed requests.",
+            request_success.clone(),
+        );
+
+        let request_prompt_tokens =
+            Family::new_with_constructor(request_token_count_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_prompt_tokens",
+            "Number of prefill tokens processed.",
+            request_prompt_tokens.clone(),
+        );
+
+        let request_generation_tokens =
+            Family::new_with_constructor(request_token_count_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_generation_tokens",
+            "Number of generation tokens processed.",
+            request_generation_tokens.clone(),
+        );
+
+        let request_max_num_generation_tokens =
+            Family::new_with_constructor(request_token_count_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_max_num_generation_tokens",
+            "Histogram of maximum number of requested generation tokens.",
+            request_max_num_generation_tokens.clone(),
+        );
+
+        let request_params_max_tokens =
+            Family::new_with_constructor(request_token_count_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_params_max_tokens",
+            "Histogram of the max_tokens request parameter.",
+            request_params_max_tokens.clone(),
+        );
+
+        let request_params_n =
+            Family::new_with_constructor(request_params_n_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_params_n",
+            "Histogram of the n request parameter.",
+            request_params_n.clone(),
+        );
+
+        let request_prefill_kv_computed_tokens =
+            Family::new_with_constructor(request_token_count_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_prefill_kv_computed_tokens",
+            "Histogram of new KV tokens computed during prefill (excluding cached tokens).",
+            request_prefill_kv_computed_tokens.clone(),
+        );
+
+        let time_to_first_token_seconds =
+            Family::new_with_constructor(time_to_first_token_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:time_to_first_token_seconds",
+            "Histogram of time to first token in seconds.",
+            time_to_first_token_seconds.clone(),
+        );
+
+        let inter_token_latency_seconds =
+            Family::new_with_constructor(inter_token_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:inter_token_latency_seconds",
+            "Histogram of inter-token latency in seconds.",
+            inter_token_latency_seconds.clone(),
+        );
+
+        let e2e_request_latency_seconds =
+            Family::new_with_constructor(request_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:e2e_request_latency_seconds",
+            "Histogram of e2e request latency in seconds.",
+            e2e_request_latency_seconds.clone(),
+        );
+
+        let request_queue_time_seconds =
+            Family::new_with_constructor(request_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_queue_time_seconds",
+            "Histogram of time spent in WAITING phase for request.",
+            request_queue_time_seconds.clone(),
+        );
+
+        let request_prefill_time_seconds =
+            Family::new_with_constructor(request_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_prefill_time_seconds",
+            "Histogram of time spent in PREFILL phase for request.",
+            request_prefill_time_seconds.clone(),
+        );
+
+        let request_decode_time_seconds =
+            Family::new_with_constructor(request_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_decode_time_seconds",
+            "Histogram of time spent in DECODE phase for request.",
+            request_decode_time_seconds.clone(),
+        );
+
+        let request_inference_time_seconds =
+            Family::new_with_constructor(request_latency_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:request_inference_time_seconds",
+            "Histogram of time spent in RUNNING phase for request.",
+            request_inference_time_seconds.clone(),
+        );
+
+        let request_time_per_output_token_seconds = Family::new_with_constructor(
+            request_time_per_output_token_histogram as fn() -> Histogram,
+        );
+        registry.register(
+            "vllm:request_time_per_output_token_seconds",
+            "Histogram of time_per_output_token_seconds per request.",
+            request_time_per_output_token_seconds.clone(),
+        );
+
+        Self {
+            num_preemptions,
+            prompt_tokens,
+            prompt_tokens_by_source,
+            prompt_tokens_cached,
+            generation_tokens,
+            request_success,
+            request_prompt_tokens,
+            request_generation_tokens,
+            request_max_num_generation_tokens,
+            request_params_max_tokens,
+            request_params_n,
+            request_prefill_kv_computed_tokens,
+            time_to_first_token_seconds,
+            inter_token_latency_seconds,
+            e2e_request_latency_seconds,
+            request_queue_time_seconds,
+            request_prefill_time_seconds,
+            request_decode_time_seconds,
+            request_inference_time_seconds,
+            request_time_per_output_token_seconds,
+        }
+    }
+}
diff --git a/rust/src/metrics/src/scheduler.rs b/rust/src/metrics/src/scheduler.rs
new file mode 100644
index 000000000000..0acbdf0fa753
--- /dev/null
+++ b/rust/src/metrics/src/scheduler.rs
@@ -0,0 +1,272 @@
+use prometheus_client::encoding::EncodeLabelSet;
+use prometheus_client::metrics::family::Family;
+use prometheus_client::metrics::histogram::Histogram;
+use prometheus_client::registry::Registry;
+
+use crate::{F64Gauge, HistogramFamily, U64Counter, U64Gauge};
+
+const KV_CACHE_RESIDENCY_BUCKETS: [f64; 21] = [
+    0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 30.0, 60.0,
+    120.0, 300.0, 600.0, 1200.0, 1800.0,
+];
+
+fn kv_block_lifetime_histogram() -> Histogram {
+    Histogram::new(KV_CACHE_RESIDENCY_BUCKETS.iter().copied())
+}
+
+fn kv_block_idle_before_evict_histogram() -> Histogram {
+    Histogram::new(KV_CACHE_RESIDENCY_BUCKETS.iter().copied())
+}
+
+fn kv_block_reuse_gap_histogram() -> Histogram {
+    Histogram::new(KV_CACHE_RESIDENCY_BUCKETS.iter().copied())
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct EngineLabels {
+    pub model_name: String,
+    pub engine: u32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct EnginePositionLabels {
+    pub model_name: String,
+    pub engine: u32,
+    pub position: u32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
+pub struct WaitingReasonLabels {
+    pub model_name: String,
+    pub engine: u32,
+    pub reason: &'static str,
+}
+
+/// Scheduler/batch-scoped Prometheus families exported from `SchedulerStats`.
+pub struct SchedulerMetrics {
+    // Scheduler state gauges.
+    pub scheduler_running: Family<EngineLabels, U64Gauge>,
+    pub scheduler_waiting: Family<EngineLabels, U64Gauge>,
+    pub scheduler_waiting_by_reason: Family<WaitingReasonLabels, U64Gauge>,
+    pub kv_cache_usage: Family<EngineLabels, F64Gauge>,
+
+    // Prefix-cache counters, including the connector-backed external cache path.
+    pub prefix_cache_queries: Family<EngineLabels, U64Counter>,
+    pub prefix_cache_hits: Family<EngineLabels, U64Counter>,
+    pub external_prefix_cache_queries: Family<EngineLabels, U64Counter>,
+    pub external_prefix_cache_hits: Family<EngineLabels, U64Counter>,
+
+    // Speculative decoding counters.
+    pub spec_decode_num_drafts: Family<EngineLabels, U64Counter>,
+    pub spec_decode_num_draft_tokens: Family<EngineLabels, U64Counter>,
+    pub spec_decode_num_accepted_tokens: Family<EngineLabels, U64Counter>,
+    pub spec_decode_num_accepted_tokens_per_pos: Family<EnginePositionLabels, U64Counter>,
+
+    // Per-engine performance / MFU counters.
+    pub estimated_flops_per_gpu: Family<EngineLabels, U64Counter>,
+    pub estimated_read_bytes_per_gpu: Family<EngineLabels, U64Counter>,
+    pub estimated_write_bytes_per_gpu: Family<EngineLabels, U64Counter>,
+
+    // Sampled KV-cache residency histograms.
+    pub kv_block_lifetime_seconds: HistogramFamily,
+    pub kv_block_idle_before_evict_seconds: HistogramFamily,
+    pub kv_block_reuse_gap_seconds: HistogramFamily,
+}
+
+impl SchedulerMetrics {
+    /// Register the scheduler-oriented metric families into the shared
+    /// registry.
+    pub(crate) fn register(registry: &mut Registry) -> Self {
+        // Scheduler state gauges.
+        let scheduler_running = Family::default();
+        registry.register(
+            "vllm:num_requests_running",
+            "Number of requests in model execution batches",
+            scheduler_running.clone(),
+        );
+
+        let scheduler_waiting = Family::default();
+        registry.register(
+            "vllm:num_requests_waiting",
+            "Number of requests waiting to be processed",
+            scheduler_waiting.clone(),
+        );
+
+        let scheduler_waiting_by_reason = Family::default();
+        registry.register(
+            "vllm:num_requests_waiting_by_reason",
+            "Number of waiting requests by reason. \
+             Reason labels: 'capacity' = waiting for scheduling capacity; \
+             'deferred' = deferred by transient constraints (LoRA budget, KV transfer, \
+             blocked status). Sum of all reasons equals vllm:num_requests_waiting.",
+            scheduler_waiting_by_reason.clone(),
+        );
+
+        let kv_cache_usage = Family::default();
+        registry.register(
+            "vllm:kv_cache_usage_perc",
+            "KV-cache usage. 1 means 100 percent usage",
+            kv_cache_usage.clone(),
+        );
+
+        // Prefix-cache counters, including the connector-backed external cache path.
+        let prefix_cache_queries = Family::default();
+        registry.register(
+            "vllm:prefix_cache_queries",
+            "Prefix cache queries, in terms of number of queried tokens",
+            prefix_cache_queries.clone(),
+        );
+
+        let prefix_cache_hits = Family::default();
+        registry.register(
+            "vllm:prefix_cache_hits",
+            "Prefix cache hits, in terms of number of cached tokens.",
+            prefix_cache_hits.clone(),
+        );
+
+        let external_prefix_cache_queries = Family::default();
+        registry.register(
+            "vllm:external_prefix_cache_queries",
+            "External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.",
+            external_prefix_cache_queries.clone(),
+        );
+
+        let external_prefix_cache_hits = Family::default();
+        registry.register(
+            "vllm:external_prefix_cache_hits",
+            "External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.",
+            external_prefix_cache_hits.clone(),
+        );
+
+        // Speculative decoding counters.
+        let spec_decode_num_drafts = Family::default();
+        registry.register(
+            "vllm:spec_decode_num_drafts",
+            "Number of spec decoding drafts.",
+            spec_decode_num_drafts.clone(),
+        );
+
+        let spec_decode_num_draft_tokens = Family::default();
+        registry.register(
+            "vllm:spec_decode_num_draft_tokens",
+            "Number of draft tokens.",
+            spec_decode_num_draft_tokens.clone(),
+        );
+
+        let spec_decode_num_accepted_tokens = Family::default();
+        registry.register(
+            "vllm:spec_decode_num_accepted_tokens",
+            "Number of accepted tokens.",
+            spec_decode_num_accepted_tokens.clone(),
+        );
+
+        let spec_decode_num_accepted_tokens_per_pos = Family::default();
+        registry.register(
+            "vllm:spec_decode_num_accepted_tokens_per_pos",
+            "Accepted tokens per draft position.",
+            spec_decode_num_accepted_tokens_per_pos.clone(),
+        );
+
+        // Per-engine performance / MFU counters.
+        let estimated_flops_per_gpu = Family::default();
+        registry.register(
+            "vllm:estimated_flops_per_gpu",
+            "Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).",
+            estimated_flops_per_gpu.clone(),
+        );
+
+        let estimated_read_bytes_per_gpu = Family::default();
+        registry.register(
+            "vllm:estimated_read_bytes_per_gpu",
+            "Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).",
+            estimated_read_bytes_per_gpu.clone(),
+        );
+
+        let estimated_write_bytes_per_gpu = Family::default();
+        registry.register(
+            "vllm:estimated_write_bytes_per_gpu",
+            "Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).",
+            estimated_write_bytes_per_gpu.clone(),
+        );
+
+        // Sampled KV-cache residency histograms.
+        let kv_block_lifetime_seconds =
+            Family::new_with_constructor(kv_block_lifetime_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:kv_block_lifetime_seconds",
+            "Histogram of KV cache block lifetime from allocation to eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).",
+            kv_block_lifetime_seconds.clone(),
+        );
+
+        let kv_block_idle_before_evict_seconds =
+            Family::new_with_constructor(kv_block_idle_before_evict_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:kv_block_idle_before_evict_seconds",
+            "Histogram of idle time before KV cache block eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).",
+            kv_block_idle_before_evict_seconds.clone(),
+        );
+
+        let kv_block_reuse_gap_seconds =
+            Family::new_with_constructor(kv_block_reuse_gap_histogram as fn() -> Histogram);
+        registry.register(
+            "vllm:kv_block_reuse_gap_seconds",
+            "Histogram of time gaps between consecutive KV cache block accesses. Only the most recent accesses are recorded (ring buffer). Sampled metrics (controlled by --kv-cache-metrics-sample).",
+            kv_block_reuse_gap_seconds.clone(),
+        );
+
+        Self {
+            scheduler_running,
+            scheduler_waiting,
+            scheduler_waiting_by_reason,
+            kv_cache_usage,
+            prefix_cache_queries,
+            prefix_cache_hits,
+            external_prefix_cache_queries,
+            external_prefix_cache_hits,
+            spec_decode_num_drafts,
+            spec_decode_num_draft_tokens,
+            spec_decode_num_accepted_tokens,
+            spec_decode_num_accepted_tokens_per_pos,
+            estimated_flops_per_gpu,
+            estimated_read_bytes_per_gpu,
+            estimated_write_bytes_per_gpu,
+            kv_block_lifetime_seconds,
+            kv_block_idle_before_evict_seconds,
+            kv_block_reuse_gap_seconds,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{EngineLabels, Metrics};
+
+    #[test]
+    fn perf_counters_render_with_a_single_total_suffix() {
+        let metrics = Metrics::new();
+        let labels = EngineLabels {
+            model_name: "model".to_string(),
+            engine: 0,
+        };
+
+        metrics.scheduler.estimated_flops_per_gpu.get_or_create(&labels).inc();
+        metrics.scheduler.estimated_read_bytes_per_gpu.get_or_create(&labels).inc();
+        metrics.scheduler.estimated_write_bytes_per_gpu.get_or_create(&labels).inc();
+
+        let rendered = metrics.render().unwrap();
+        assert!(
+            rendered.contains(
+                "vllm:estimated_flops_per_gpu_total{model_name=\"model\",engine=\"0\"} 1"
+            )
+        );
+        assert!(rendered.contains(
+            "vllm:estimated_read_bytes_per_gpu_total{model_name=\"model\",engine=\"0\"} 1"
+        ));
+        assert!(rendered.contains(
+            "vllm:estimated_write_bytes_per_gpu_total{model_name=\"model\",engine=\"0\"} 1"
+        ));
+        assert!(!rendered.contains("vllm:estimated_flops_per_gpu_total_total"));
+        assert!(!rendered.contains("vllm:estimated_read_bytes_per_gpu_total_total"));
+        assert!(!rendered.contains("vllm:estimated_write_bytes_per_gpu_total_total"));
+    }
+}
diff --git a/rust/src/reasoning-parser/Cargo.toml b/rust/src/reasoning-parser/Cargo.toml
new file mode 100644
index 000000000000..d6500a7b0c1d
--- /dev/null
+++ b/rust/src/reasoning-parser/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "vllm-reasoning-parser"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+thiserror.workspace = true
+vllm-tokenizer.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/reasoning-parser/src/cohere_cmd.rs b/rust/src/reasoning-parser/src/cohere_cmd.rs
new file mode 100644
index 000000000000..9acaf3c5f1e3
--- /dev/null
+++ b/rust/src/reasoning-parser/src/cohere_cmd.rs
@@ -0,0 +1,45 @@
+use vllm_tokenizer::DynTokenizer;
+
+use super::{DelimitedReasoningParser, ReasoningDelta, ReasoningParser, Result};
+
+/// Reasoning parser for Cohere Command models that use explicit START/END tags.
+pub struct CohereCmdReasoningParser {
+    inner: DelimitedReasoningParser,
+}
+
+impl CohereCmdReasoningParser {
+    /// Create a Cohere Command parser backed by the shared delimited state
+    /// machine.
+    pub fn new(tokenizer: DynTokenizer) -> Result<Self> {
+        Ok(Self {
+            inner: DelimitedReasoningParser::new(
+                tokenizer,
+                "<|START_THINKING|>",
+                "<|END_THINKING|>",
+                false,
+            )?,
+        })
+    }
+}
+
+impl ReasoningParser for CohereCmdReasoningParser {
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tokenizer)?))
+    }
+
+    fn initialize(&mut self, prompt_token_ids: &[u32]) -> Result<()> {
+        self.inner.initialize(prompt_token_ids);
+        Ok(())
+    }
+
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta> {
+        Ok(self.inner.push(delta))
+    }
+
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        Ok(self.inner.finish())
+    }
+}
diff --git a/rust/src/reasoning-parser/src/deepseek_r1.rs b/rust/src/reasoning-parser/src/deepseek_r1.rs
new file mode 100644
index 000000000000..069de478b7a7
--- /dev/null
+++ b/rust/src/reasoning-parser/src/deepseek_r1.rs
@@ -0,0 +1,44 @@
+use vllm_tokenizer::DynTokenizer;
+
+use super::{DelimitedReasoningParser, ReasoningDelta, ReasoningParser, Result};
+
+/// Reasoning parser for DeepSeek R1 style outputs.
+///
+/// DeepSeek R1 may begin generating directly inside a reasoning span and only
+/// emit the closing `</think>` delimiter, so the no-boundary fallback defaults
+/// to `in_reasoning = true`.
+pub struct DeepSeekR1ReasoningParser {
+    inner: DelimitedReasoningParser,
+}
+
+impl DeepSeekR1ReasoningParser {
+    /// Create a DeepSeek R1 parser backed by the shared delimited state
+    /// machine.
+    pub fn new(tokenizer: DynTokenizer) -> Result<Self> {
+        Ok(Self {
+            inner: DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", true)?,
+        })
+    }
+}
+
+impl ReasoningParser for DeepSeekR1ReasoningParser {
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tokenizer)?))
+    }
+
+    fn initialize(&mut self, prompt_token_ids: &[u32]) -> Result<()> {
+        self.inner.initialize(prompt_token_ids);
+        Ok(())
+    }
+
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta> {
+        Ok(self.inner.push(delta))
+    }
+
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        Ok(self.inner.finish())
+    }
+}
diff --git a/rust/src/reasoning-parser/src/delimited.rs b/rust/src/reasoning-parser/src/delimited.rs
new file mode 100644
index 000000000000..485202e3e2ef
--- /dev/null
+++ b/rust/src/reasoning-parser/src/delimited.rs
@@ -0,0 +1,161 @@
+use vllm_tokenizer::{DynTokenizer, Tokenizer};
+
+use super::{ReasoningDelta, ReasoningError, Result};
+
+/// Shared incremental state machine for tag-delimited reasoning protocols.
+///
+/// This helper is intentionally not a public parser type. Model-family parser
+/// wrappers own one `DelimitedReasoningParser` internally and expose the
+/// request-facing [`super::ReasoningParser`] trait.
+///
+/// The shared state machine stays generic by deriving its initial
+/// `current_in_reasoning` state from the prompt token boundary instead of
+/// hardcoding model-family conventions. That means families with the same
+/// delimiters can often reuse this implementation even if their chat templates
+/// prefill different prompts.
+pub(crate) struct DelimitedReasoningParser {
+    tokenizer: DynTokenizer,
+    current_in_reasoning: bool,
+    buffer: String,
+    start_token: String,
+    end_token: String,
+    start_token_id: u32,
+    end_token_id: u32,
+    default_in_reasoning: bool,
+}
+
+impl DelimitedReasoningParser {
+    /// Create one delimited parser state machine.
+    ///
+    /// `default_in_reasoning` is only used when prompt initialization sees no
+    /// reasoning boundary token at all. If the prompt contains either the
+    /// start or end delimiter, that prompt boundary always wins.
+    pub(crate) fn new(
+        tokenizer: DynTokenizer,
+        start_token: &'static str,
+        end_token: &'static str,
+        default_in_reasoning: bool,
+    ) -> Result<Self> {
+        let start_token_id =
+            tokenizer.token_to_id(start_token).ok_or_else(|| ReasoningError::MissingToken {
+                token: start_token.to_string(),
+            })?;
+        let end_token_id =
+            tokenizer.token_to_id(end_token).ok_or_else(|| ReasoningError::MissingToken {
+                token: end_token.to_string(),
+            })?;
+
+        Ok(Self {
+            tokenizer,
+            current_in_reasoning: default_in_reasoning,
+            buffer: String::new(),
+            start_token: start_token.to_string(),
+            end_token: end_token.to_string(),
+            start_token_id,
+            end_token_id,
+            default_in_reasoning,
+        })
+    }
+
+    /// Initialize the starting state from prompt token IDs.
+    pub(crate) fn initialize(&mut self, prompt_token_ids: &[u32]) {
+        self.current_in_reasoning = last_reasoning_boundary(
+            prompt_token_ids,
+            self.start_token_id,
+            self.end_token_id,
+            self.tokenizer.as_ref(),
+        )
+        .unwrap_or(self.default_in_reasoning);
+    }
+
+    /// Parse one decoded text delta and return its reasoning/content split.
+    pub(crate) fn push(&mut self, delta: &str) -> ReasoningDelta {
+        self.buffer.push_str(delta);
+
+        let partial_suffix_len = self.partial_suffix_len(&self.buffer);
+        let stable_len = self.buffer.len() - partial_suffix_len;
+        let pending_suffix = self.buffer.split_off(stable_len);
+        let stable_text = std::mem::replace(&mut self.buffer, pending_suffix);
+
+        self.parse_stable_text(&stable_text)
+    }
+
+    /// Flush any buffered partial delimiter suffix at end of stream.
+    pub(crate) fn finish(&mut self) -> ReasoningDelta {
+        let stable_text = std::mem::take(&mut self.buffer);
+        self.parse_stable_text(&stable_text)
+    }
+
+    /// Parse text that is known not to end with a partial delimiter suffix.
+    fn parse_stable_text(&mut self, mut stable: &str) -> ReasoningDelta {
+        let mut delta = ReasoningDelta::default();
+
+        while !stable.is_empty() {
+            if self.current_in_reasoning {
+                if let Some(end_idx) = stable.find(&self.end_token) {
+                    delta.push_reasoning(&stable[..end_idx]);
+                    stable = &stable[end_idx + self.end_token.len()..];
+                    self.current_in_reasoning = false;
+                } else {
+                    delta.push_reasoning(stable);
+                    break;
+                }
+            } else if let Some(start_idx) = stable.find(&self.start_token) {
+                delta.push_content(&stable[..start_idx]);
+                stable = &stable[start_idx + self.start_token.len()..];
+                self.current_in_reasoning = true;
+            } else {
+                delta.push_content(stable);
+                break;
+            }
+        }
+
+        delta
+    }
+
+    /// Return the longest trailing suffix that could still complete a
+    /// delimiter.
+    fn partial_suffix_len(&self, text: &str) -> usize {
+        let mut best = 0;
+        for idx in text.char_indices().map(|(idx, _)| idx).skip(1) {
+            let suffix = &text[idx..];
+            if self.start_token.starts_with(suffix) && self.start_token != suffix {
+                best = best.max(text.len() - idx);
+            }
+            if self.end_token.starts_with(suffix) && self.end_token != suffix {
+                best = best.max(text.len() - idx);
+            }
+        }
+
+        if self.start_token.starts_with(text) && self.start_token != text {
+            best = best.max(text.len());
+        }
+        if self.end_token.starts_with(text) && self.end_token != text {
+            best = best.max(text.len());
+        }
+
+        best
+    }
+}
+
+/// Determine the reasoning state implied by the last prompt boundary, if any.
+fn last_reasoning_boundary(
+    prompt_token_ids: &[u32],
+    start_token_id: u32,
+    end_token_id: u32,
+    tokenizer: &dyn Tokenizer,
+) -> Option<bool> {
+    for token_id in prompt_token_ids.iter().rev() {
+        if *token_id == start_token_id {
+            return Some(true);
+        }
+        if *token_id == end_token_id {
+            return Some(false);
+        }
+        if tokenizer.is_special_id(*token_id) {
+            return None;
+        }
+    }
+
+    None
+}
diff --git a/rust/src/reasoning-parser/src/gemma4.rs b/rust/src/reasoning-parser/src/gemma4.rs
new file mode 100644
index 000000000000..86824f2ad407
--- /dev/null
+++ b/rust/src/reasoning-parser/src/gemma4.rs
@@ -0,0 +1,273 @@
+use vllm_tokenizer::DynTokenizer;
+
+use super::{DelimitedReasoningParser, ReasoningDelta, ReasoningParser, Result};
+
+const THOUGHT_PREFIX: &str = "thought\n";
+
+/// Reasoning parser for Google Gemma4 thinking models.
+///
+/// Gemma4 emits reasoning inside `<|channel> ... <channel|>` spans and adds a
+/// structural `thought\n` label at the beginning of the reasoning channel.
+/// This parser keeps the delimiter handling in the shared delimited parser and
+/// only layers on Gemma4-specific request adjustment plus prefix stripping.
+///
+/// Original Python implementation:
+/// <https://github.com/vllm-project/vllm/blob/18b1c77211d8f6fe800bcfb89524d2b598708032/vllm/reasoning/gemma4_reasoning_parser.py#L23>
+pub struct Gemma4ReasoningParser {
+    inner: DelimitedReasoningParser,
+    reasoning_text: String,
+    prefix_stripped: bool,
+}
+
+impl Gemma4ReasoningParser {
+    /// Create a Gemma4 parser.
+    pub fn new(tokenizer: DynTokenizer) -> Result<Self> {
+        Ok(Self {
+            inner: DelimitedReasoningParser::new(tokenizer, "<|channel>", "<channel|>", false)?,
+            reasoning_text: String::new(),
+            prefix_stripped: false,
+        })
+    }
+
+    /// Apply Gemma4's `thought\n` stripping rule to one reasoning delta.
+    ///
+    /// Early reasoning text is buffered until we can decide whether it begins
+    /// with the structural channel label.
+    fn strip_thought_prefix(&mut self, reasoning: &str) -> Option<String> {
+        if self.prefix_stripped {
+            return Some(reasoning.to_string());
+        }
+
+        self.reasoning_text.push_str(reasoning);
+
+        if self.reasoning_text.starts_with(THOUGHT_PREFIX) {
+            let prefix_len = THOUGHT_PREFIX.len();
+            let previous_len = self.reasoning_text.len() - reasoning.len();
+            if previous_len >= prefix_len {
+                self.reasoning_text.clear();
+                self.prefix_stripped = true;
+                return Some(reasoning.to_string());
+            }
+
+            let prefix_chars_in_delta = prefix_len - previous_len;
+            let stripped = &reasoning[prefix_chars_in_delta.min(reasoning.len())..];
+            if stripped.is_empty() {
+                if self.reasoning_text.len() >= prefix_len {
+                    self.reasoning_text.clear();
+                    self.prefix_stripped = true;
+                }
+                return None;
+            }
+
+            self.reasoning_text.clear();
+            self.prefix_stripped = true;
+            return Some(stripped.to_string());
+        }
+
+        if THOUGHT_PREFIX.starts_with(&self.reasoning_text) {
+            return None;
+        }
+
+        self.prefix_stripped = true;
+        Some(std::mem::take(&mut self.reasoning_text))
+    }
+
+    /// Apply Gemma4-specific reasoning post-processing to one parsed delta.
+    fn post_process(&mut self, mut result: ReasoningDelta) -> ReasoningDelta {
+        if let Some(reasoning) = result.reasoning.take() {
+            result.reasoning =
+                self.strip_thought_prefix(&reasoning).filter(|text| !text.is_empty());
+        }
+        result
+    }
+}
+
+impl ReasoningParser for Gemma4ReasoningParser {
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tokenizer)?))
+    }
+
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    fn initialize(&mut self, prompt_token_ids: &[u32]) -> Result<()> {
+        self.inner.initialize(prompt_token_ids);
+        self.reasoning_text.clear();
+        self.prefix_stripped = false;
+        Ok(())
+    }
+
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta> {
+        let result = self.inner.push(delta);
+        Ok(self.post_process(result))
+    }
+
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        let result = self.inner.finish();
+        Ok(self.post_process(result))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use vllm_tokenizer::Tokenizer;
+
+    use super::Gemma4ReasoningParser;
+    use crate::ReasoningParser;
+
+    struct FakeTokenizer;
+
+    impl Tokenizer for FakeTokenizer {
+        fn encode(
+            &self,
+            text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<Vec<u32>> {
+            Ok(text.chars().map(u32::from).collect())
+        }
+
+        fn decode(
+            &self,
+            token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<String> {
+            Ok(token_ids
+                .iter()
+                .map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
+                .collect())
+        }
+
+        fn token_to_id(&self, token: &str) -> Option<u32> {
+            match token {
+                "<|channel>" => Some(1000),
+                "<channel|>" => Some(1001),
+                _ => None,
+            }
+        }
+    }
+
+    fn run_streaming(output: &[&str]) -> (Option<String>, Option<String>) {
+        let tokenizer = Arc::new(FakeTokenizer);
+        let mut parser = Gemma4ReasoningParser::new(tokenizer).unwrap();
+        let mut reasoning = String::new();
+        let mut content = String::new();
+
+        for delta in output {
+            let result = parser.push(delta).unwrap();
+            if let Some(next) = result.reasoning {
+                reasoning.push_str(&next);
+            }
+            if let Some(next) = result.content {
+                content.push_str(&next);
+            }
+        }
+
+        let final_delta = parser.finish().unwrap();
+        if let Some(next) = final_delta.reasoning {
+            reasoning.push_str(&next);
+        }
+        if let Some(next) = final_delta.content {
+            content.push_str(&next);
+        }
+
+        (
+            (!reasoning.is_empty()).then_some(reasoning),
+            (!content.is_empty()).then_some(content),
+        )
+    }
+
+    #[test]
+    fn gemma4_reasoning_streaming_handles_channel_delimited_outputs() {
+        let cases = [
+            (
+                "no_reasoning",
+                vec!["This is content"],
+                None,
+                Some("This is content"),
+            ),
+            (
+                "reasoning_and_content",
+                vec!["<|channel>This is a reasoning section<channel|>This is the rest"],
+                Some("This is a reasoning section"),
+                Some("This is the rest"),
+            ),
+            (
+                "complete_reasoning",
+                vec!["<|channel>This is a reasoning section<channel|>"],
+                Some("This is a reasoning section"),
+                None,
+            ),
+            (
+                "multiple_lines",
+                vec!["<|channel>This\nThat<channel|>This is the rest\nThat"],
+                Some("This\nThat"),
+                Some("This is the rest\nThat"),
+            ),
+            (
+                "no_end",
+                vec!["<|channel>This is a reasoning section"],
+                Some("This is a reasoning section"),
+                None,
+            ),
+            ("empty", vec![""], None, None),
+            (
+                "newline_around_reasoning",
+                vec!["Before\n<|channel>This is a reasoning section<channel|>\nThis is the rest"],
+                Some("This is a reasoning section"),
+                Some("Before\n\nThis is the rest"),
+            ),
+            (
+                "thought_prefix",
+                vec!["<|channel>thought\nActual reasoning here<channel|>Final answer"],
+                Some("Actual reasoning here"),
+                Some("Final answer"),
+            ),
+            (
+                "thought_prefix_only",
+                vec!["<|channel>thought\n<channel|>"],
+                None,
+                None,
+            ),
+            (
+                "thought_prefix_multiline",
+                vec!["<|channel>thought\nLine1\nLine2<channel|>Answer"],
+                Some("Line1\nLine2"),
+                Some("Answer"),
+            ),
+            (
+                "thought_prefix_diverge",
+                vec!["<|channel>thousand reasons<channel|>Done"],
+                Some("thousand reasons"),
+                Some("Done"),
+            ),
+        ];
+
+        for (name, output, expected_reasoning, expected_content) in cases {
+            let (reasoning, content) = run_streaming(&output);
+            assert_eq!(reasoning.as_deref(), expected_reasoning, "{name}");
+            assert_eq!(content.as_deref(), expected_content, "{name}");
+        }
+    }
+
+    #[test]
+    fn gemma4_strips_thought_prefix_even_when_split_across_deltas() {
+        let (reasoning, content) =
+            run_streaming(&["<|channel>thou", "ght", "\nabc", "<channel|>done"]);
+        assert_eq!(reasoning.as_deref(), Some("abc"));
+        assert_eq!(content.as_deref(), Some("done"));
+    }
+
+    #[test]
+    fn gemma4_preserves_special_tokens() {
+        let tokenizer = Arc::new(FakeTokenizer);
+        let parser = Gemma4ReasoningParser::new(tokenizer).unwrap();
+
+        assert!(parser.preserve_special_tokens());
+    }
+}
diff --git a/rust/src/reasoning-parser/src/kimi.rs b/rust/src/reasoning-parser/src/kimi.rs
new file mode 100644
index 000000000000..6c042d0d8f1a
--- /dev/null
+++ b/rust/src/reasoning-parser/src/kimi.rs
@@ -0,0 +1,39 @@
+use vllm_tokenizer::DynTokenizer;
+
+use super::{DelimitedReasoningParser, ReasoningDelta, ReasoningParser, Result};
+
+/// Reasoning parser for legacy Kimi models that use Unicode thinking tags.
+pub struct KimiReasoningParser {
+    inner: DelimitedReasoningParser,
+}
+
+impl KimiReasoningParser {
+    /// Create a Kimi parser backed by the shared delimited state machine.
+    pub fn new(tokenizer: DynTokenizer) -> Result<Self> {
+        Ok(Self {
+            inner: DelimitedReasoningParser::new(tokenizer, "◁think▷", "◁/think▷", false)?,
+        })
+    }
+}
+
+impl ReasoningParser for KimiReasoningParser {
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tokenizer)?))
+    }
+
+    fn initialize(&mut self, prompt_token_ids: &[u32]) -> Result<()> {
+        self.inner.initialize(prompt_token_ids);
+        Ok(())
+    }
+
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta> {
+        Ok(self.inner.push(delta))
+    }
+
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        Ok(self.inner.finish())
+    }
+}
diff --git a/rust/src/reasoning-parser/src/lib.rs b/rust/src/reasoning-parser/src/lib.rs
new file mode 100644
index 000000000000..084168ab2f14
--- /dev/null
+++ b/rust/src/reasoning-parser/src/lib.rs
@@ -0,0 +1,129 @@
+//! Streaming reasoning parsers for chat completions.
+//!
+//! The key design choice here is that parser initialization prefers the
+//! *actual rendered prompt state* over model-family conventions. When a stream
+//! starts, each parser receives the prompt token IDs and inspects the last
+//! reasoning boundary that is already present in the prompt. In practice this
+//! is a more faithful signal than hardcoding assumptions such as "this model
+//! always starts in reasoning" or "this model always emits `<think>` itself".
+//!
+//! That prompt-first initialization lets multiple model families share the
+//! same incremental parser implementation even when older Python parsers split
+//! them apart. If two families use the same textual delimiters and differ
+//! mostly in how their chat templates prefill `<think>` / `</think>`, they can
+//! usually reuse one parser here because the prompt token IDs already tell us
+//! which state the stream is entering with.
+
+mod cohere_cmd;
+mod deepseek_r1;
+mod delimited;
+mod gemma4;
+mod kimi;
+mod qwen3;
+
+use thiserror::Error;
+use vllm_tokenizer::DynTokenizer;
+
+pub use self::cohere_cmd::CohereCmdReasoningParser;
+pub use self::deepseek_r1::DeepSeekR1ReasoningParser;
+pub(crate) use self::delimited::DelimitedReasoningParser;
+pub use self::gemma4::Gemma4ReasoningParser;
+pub use self::kimi::KimiReasoningParser;
+pub use self::qwen3::Qwen3ReasoningParser;
+
+/// DeepSeek V3 currently shares the standard `<think>...</think>` parser.
+pub type DeepSeekV3ReasoningParser = Qwen3ReasoningParser;
+/// DeepSeek V4 currently shares the standard `<think>...</think>` parser.
+pub type DeepSeekV4ReasoningParser = Qwen3ReasoningParser;
+/// GLM45 currently shares the standard `<think>...</think>` parser.
+pub type Glm45ReasoningParser = Qwen3ReasoningParser;
+/// Kimi K2 currently shares the standard `<think>...</think>` parser.
+// TODO: kimi k2 may implicitly end reasoning by starting a tool call section
+// using <|tool_calls_section_begin|>, we should support that.
+pub type KimiK2ReasoningParser = Qwen3ReasoningParser;
+/// MiniMax M2 currently shares the standard `<think>...</think>` parser.
+pub type MiniMaxM2ReasoningParser = Qwen3ReasoningParser;
+/// Nemotron V3 currently shares the standard `<think>...</think>` parser.
+pub type NemotronV3ReasoningParser = Qwen3ReasoningParser;
+/// Step3 currently shares the standard `<think>...</think>` parser.
+pub type Step3ReasoningParser = Qwen3ReasoningParser;
+
+/// Result alias for reasoning parser operations.
+pub type Result<T> = std::result::Result<T, ReasoningError>;
+
+/// One parsed streaming delta split into reasoning and visible content.
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct ReasoningDelta {
+    pub reasoning: Option<String>,
+    pub content: Option<String>,
+}
+
+impl ReasoningDelta {
+    /// Return true when this delta carries neither reasoning nor content text.
+    pub fn is_empty(&self) -> bool {
+        self.reasoning.is_none() && self.content.is_none()
+    }
+
+    /// Append text to the reasoning portion, creating it on first use.
+    pub(crate) fn push_reasoning(&mut self, text: &str) {
+        if text.is_empty() {
+            return;
+        }
+        match &mut self.reasoning {
+            Some(existing) => existing.push_str(text),
+            None => self.reasoning = Some(text.to_string()),
+        }
+    }
+
+    /// Append text to the visible content portion, creating it on first use.
+    pub(crate) fn push_content(&mut self, text: &str) {
+        if text.is_empty() {
+            return;
+        }
+        match &mut self.content {
+            Some(existing) => existing.push_str(text),
+            None => self.content = Some(text.to_string()),
+        }
+    }
+}
+
+/// Incremental parser that splits decoded text deltas into reasoning and
+/// content.
+pub trait ReasoningParser: Send {
+    /// Construct a boxed parser instance for one request stream.
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static;
+
+    /// Initialize parser state from prompt token IDs before output deltas
+    /// arrive.
+    fn initialize(&mut self, _prompt_token_ids: &[u32]) -> Result<()> {
+        Ok(())
+    }
+
+    /// Return whether decoded output must preserve tokenizer special tokens.
+    ///
+    /// Some model families emit reasoning sentinels as special tokens. Those
+    /// parsers need `skip_special_tokens = false` while parsing is enabled.
+    fn preserve_special_tokens(&self) -> bool {
+        false
+    }
+
+    /// Feed one decoded text delta into the parser.
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta>;
+
+    /// Flush any buffered partial delimiter state at end of stream.
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        Ok(ReasoningDelta::default())
+    }
+}
+
+/// Errors produced while creating or running reasoning parsers.
+#[derive(Debug, Error)]
+pub enum ReasoningError {
+    #[error("tokenizer is missing reasoning delimiter token `{token}`")]
+    MissingToken { token: String },
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/reasoning-parser/src/qwen3.rs b/rust/src/reasoning-parser/src/qwen3.rs
new file mode 100644
index 000000000000..c50ab2d0fbdd
--- /dev/null
+++ b/rust/src/reasoning-parser/src/qwen3.rs
@@ -0,0 +1,43 @@
+use vllm_tokenizer::DynTokenizer;
+
+use super::{DelimitedReasoningParser, ReasoningDelta, ReasoningParser, Result};
+
+/// Reasoning parser for the Qwen3/Qwen3.5 family.
+///
+/// This parser uses standard `<think>...</think>` delimiters and defaults to
+/// waiting for an explicit start token when prompt initialization finds no
+/// reasoning boundary.
+pub struct Qwen3ReasoningParser {
+    inner: DelimitedReasoningParser,
+}
+
+impl Qwen3ReasoningParser {
+    /// Create a Qwen3 parser backed by the shared delimited state machine.
+    pub fn new(tokenizer: DynTokenizer) -> Result<Self> {
+        Ok(Self {
+            inner: DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", false)?,
+        })
+    }
+}
+
+impl ReasoningParser for Qwen3ReasoningParser {
+    fn create(tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tokenizer)?))
+    }
+
+    fn initialize(&mut self, prompt_token_ids: &[u32]) -> Result<()> {
+        self.inner.initialize(prompt_token_ids);
+        Ok(())
+    }
+
+    fn push(&mut self, delta: &str) -> Result<ReasoningDelta> {
+        Ok(self.inner.push(delta))
+    }
+
+    fn finish(&mut self) -> Result<ReasoningDelta> {
+        Ok(self.inner.finish())
+    }
+}
diff --git a/rust/src/reasoning-parser/src/tests.rs b/rust/src/reasoning-parser/src/tests.rs
new file mode 100644
index 000000000000..da602d9fdddd
--- /dev/null
+++ b/rust/src/reasoning-parser/src/tests.rs
@@ -0,0 +1,161 @@
+use std::sync::Arc;
+
+use vllm_tokenizer::Tokenizer;
+
+use super::{
+    DeepSeekR1ReasoningParser, DelimitedReasoningParser, Qwen3ReasoningParser, ReasoningParser,
+};
+
+struct FakeTokenizer;
+
+impl Tokenizer for FakeTokenizer {
+    fn encode(&self, text: &str, _add_special_tokens: bool) -> vllm_tokenizer::Result<Vec<u32>> {
+        Ok(text.chars().map(u32::from).collect())
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        _skip_special_tokens: bool,
+    ) -> vllm_tokenizer::Result<String> {
+        Ok(token_ids
+            .iter()
+            .map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
+            .collect())
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        match token {
+            "<think>" => Some(1),
+            "</think>" => Some(2),
+            "<|START_THINKING|>" => Some(3),
+            "<|END_THINKING|>" => Some(4),
+            "◁think▷" => Some(5),
+            "◁/think▷" => Some(6),
+            _ => None,
+        }
+    }
+
+    fn is_special_id(&self, token_id: u32) -> bool {
+        token_id == 7
+    }
+}
+
+#[test]
+fn delimited_content_only_stream() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser =
+        DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", false).unwrap();
+
+    assert_eq!(
+        parser.push("plain content").content.as_deref(),
+        Some("plain content")
+    );
+}
+
+#[test]
+fn delimited_single_chunk_with_reasoning_and_content() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser =
+        DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", false).unwrap();
+
+    let delta = parser.push("<think>reason</think>answer");
+    assert_eq!(delta.reasoning.as_deref(), Some("reason"));
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn delimited_partial_tokens_across_chunks() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser =
+        DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", false).unwrap();
+
+    assert!(parser.push("<thi").is_empty());
+    let delta = parser.push("nk>reason</think>answer");
+    assert_eq!(delta.reasoning.as_deref(), Some("reason"));
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn delimited_finish_flushes_buffer() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser =
+        DelimitedReasoningParser::new(tokenizer, "<think>", "</think>", false).unwrap();
+    parser.initialize(&[1]);
+
+    let delta = parser.push("unfinished</thi");
+    assert_eq!(delta.reasoning.as_deref(), Some("unfinished"));
+    let final_delta = parser.finish();
+    assert_eq!(final_delta.reasoning.as_deref(), Some("</thi"));
+}
+
+#[test]
+fn qwen3_without_prompt_markers_expects_start_token() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser = Qwen3ReasoningParser::new(tokenizer).unwrap();
+
+    let delta = parser.push("reason</think>answer").unwrap();
+    assert_eq!(delta.reasoning, None);
+    assert_eq!(delta.content.as_deref(), Some("reason</think>answer"));
+}
+
+#[test]
+fn qwen3_prompt_end_marker_starts_in_content() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser = Qwen3ReasoningParser::new(tokenizer).unwrap();
+    parser.initialize(&[2]).unwrap();
+
+    let delta = parser.push("answer").unwrap();
+    assert_eq!(delta.reasoning, None);
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn qwen3_tolerates_old_and_new_formats() {
+    let tokenizer = Arc::new(FakeTokenizer);
+
+    let mut old_parser = Qwen3ReasoningParser::new(tokenizer.clone()).unwrap();
+    let old = old_parser.push("<think>reason</think>answer").unwrap();
+    assert_eq!(old.reasoning.as_deref(), Some("reason"));
+    assert_eq!(old.content.as_deref(), Some("answer"));
+
+    let mut new_parser = Qwen3ReasoningParser::new(tokenizer).unwrap();
+    new_parser.initialize(&[1]).unwrap();
+    let new = new_parser.push("reason</think>answer").unwrap();
+    assert_eq!(new.reasoning.as_deref(), Some("reason"));
+    assert_eq!(new.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn qwen3_stops_scanning_at_last_special_token() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser = Qwen3ReasoningParser::new(tokenizer).unwrap();
+
+    parser.initialize(&[1, 7]).unwrap();
+
+    let delta = parser.push("answer").unwrap();
+    assert_eq!(delta.reasoning, None);
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn deepseek_r1_defaults_to_reasoning_without_prompt_boundary() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser = DeepSeekR1ReasoningParser::new(tokenizer).unwrap();
+
+    let delta = parser.push("reason</think>answer").unwrap();
+    assert_eq!(delta.reasoning.as_deref(), Some("reason"));
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
+
+#[test]
+fn deepseek_r1_stops_scanning_at_last_special_token() {
+    let tokenizer = Arc::new(FakeTokenizer);
+    let mut parser = DeepSeekR1ReasoningParser::new(tokenizer).unwrap();
+
+    parser.initialize(&[2, 7]).unwrap();
+
+    let delta = parser.push("reason</think>answer").unwrap();
+    assert_eq!(delta.reasoning.as_deref(), Some("reason"));
+    assert_eq!(delta.content.as_deref(), Some("answer"));
+}
diff --git a/rust/src/server/Cargo.toml b/rust/src/server/Cargo.toml
new file mode 100644
index 000000000000..6030f972a9f2
--- /dev/null
+++ b/rust/src/server/Cargo.toml
@@ -0,0 +1,57 @@
+[package]
+name = "vllm-server"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+asynk-strim-attr.workspace = true
+axum.workspace = true
+futures.workspace = true
+http-body.workspace = true
+itertools.workspace = true
+libc.workspace = true
+llm-multimodal.workspace = true
+prost.workspace = true
+prost-types.workspace = true
+rmpv.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+serde_with.workspace = true
+socket2.workspace = true
+thiserror-ext.workspace = true
+tokio.workspace = true
+tokio-stream.workspace = true
+tokio-util.workspace = true
+tonic.workspace = true
+tonic-prost.workspace = true
+tower-http.workspace = true
+tracing.workspace = true
+tracing-futures.workspace = true
+tracing-subscriber.workspace = true
+uuid.workspace = true
+validator.workspace = true
+vllm-chat.workspace = true
+vllm-engine-core-client.workspace = true
+vllm-llm.workspace = true
+vllm-metrics.workspace = true
+vllm-text.workspace = true
+
+[build-dependencies]
+tonic-prost-build.workspace = true
+
+[dev-dependencies]
+anyhow.workspace = true
+async-openai = { workspace = true, features = ["full"] }
+bytes.workspace = true
+clap.workspace = true
+expect-test.workspace = true
+rmp-serde.workspace = true
+serial_test.workspace = true
+tower.workspace = true
+vllm-engine-core-client = { workspace = true, features = ["test-util"] }
+zeromq.workspace = true
+
+[lints]
+workspace = true
diff --git a/rust/src/server/build.rs b/rust/src/server/build.rs
new file mode 100644
index 000000000000..86a44aac7bb2
--- /dev/null
+++ b/rust/src/server/build.rs
@@ -0,0 +1,12 @@
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
+    let proto_dir = format!("{manifest_dir}/../../proto");
+
+    tonic_prost_build::configure()
+        .build_server(true)
+        .build_client(true)
+        .protoc_arg("--experimental_allow_proto3_optional") // be compatible with old compilers
+        .compile_protos(&[format!("{proto_dir}/vllm_grpc.proto")], &[proto_dir])?;
+
+    Ok(())
+}
diff --git a/rust/src/server/examples/README.md b/rust/src/server/examples/README.md
new file mode 100644
index 000000000000..f4f152082d9b
--- /dev/null
+++ b/rust/src/server/examples/README.md
@@ -0,0 +1,39 @@
+# Server Smoke Test
+
+Start a fresh headless `vllm` engine:
+
+```bash
+source ../vllm/.venv/bin/activate
+HF_HUB_OFFLINE=1 \
+VLLM_LOGGING_LEVEL=DEBUG \
+VLLM_CPU_KVCACHE_SPACE=2 \
+VLLM_HOST_IP=127.0.0.1 \
+VLLM_LOOPBACK_IP=127.0.0.1 \
+python3 -m vllm.entrypoints.cli.main serve Qwen/Qwen3-0.6B \
+  --headless \
+  --data-parallel-address 127.0.0.1 \
+  --data-parallel-rpc-port 62100 \
+  --data-parallel-size-local 1 \
+  --max-model-len 512 \
+  --dtype float16
+```
+
+Run the Rust server smoke test:
+
+```bash
+cargo run -p vllm-server --example external_engine_openai_qwen -- \
+  --handshake-address tcp://127.0.0.1:62100
+```
+
+The example starts the Rust OpenAI-compatible server on an ephemeral local port,
+connects to it via the `async-openai` Rust client, lists models, and then checks
+that a streamed chat completion yields the assistant role chunk, final-answer
+content chunks, and a terminal finish chunk. This example intentionally uses
+`async-openai`'s standard typed `create_stream` API instead of BYOT, so it does
+not inspect the nonstandard `reasoning_content` field even though the Rust
+server may emit it for reasoning-capable models such as Qwen3. For reasoning
+behavior itself, use the `vllm-chat` smoke test or the `vllm-server`
+route tests.
+
+IMPORTANT: Restart `vllm` each time you run the smoke test. The current headless
+engine cannot safely handle frontend reconnects after the client shuts down.
diff --git a/rust/src/server/examples/external_engine_openai_qwen.rs b/rust/src/server/examples/external_engine_openai_qwen.rs
new file mode 100644
index 000000000000..6ef2e1a883e9
--- /dev/null
+++ b/rust/src/server/examples/external_engine_openai_qwen.rs
@@ -0,0 +1,215 @@
+use std::time::Duration;
+
+use anyhow::{Context, Result, bail};
+use async_openai::Client;
+use async_openai::config::OpenAIConfig;
+use async_openai::types::chat::{
+    ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequest,
+    CreateChatCompletionRequestArgs,
+};
+use async_openai::types::models::ListModelResponse;
+use clap::Parser;
+use futures::StreamExt as _;
+use tokio_util::sync::CancellationToken;
+use tracing_subscriber::EnvFilter;
+use vllm_engine_core_client::TransportMode;
+use vllm_server::{
+    ChatTemplateContentFormatOption, Config, CoordinatorMode, HttpListenerMode, ParserSelection,
+    RendererSelection, serve,
+};
+
+#[derive(Debug, Parser)]
+#[command(
+    about = "Smoke-test the Rust OpenAI server with async-openai against an external Qwen vLLM engine."
+)]
+struct Args {
+    #[arg(long)]
+    handshake_address: String,
+    #[arg(long, default_value_t = 1)]
+    engine_count: usize,
+    #[arg(long, default_value = "Qwen/Qwen3-0.6B")]
+    model: String,
+    #[arg(long, default_value = "127.0.0.1")]
+    host: String,
+    #[arg(long, default_value_t = 30)]
+    ready_timeout_secs: u64,
+    #[arg(
+        long,
+        default_value = "What is the capital of France? Answer with one word."
+    )]
+    prompt: String,
+}
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() -> Result<()> {
+    init_tracing();
+    let args = Args::parse();
+    let port = unique_local_port()?;
+    let config = Config {
+        transport_mode: TransportMode::HandshakeOwner {
+            handshake_address: args.handshake_address,
+            advertised_host: args.host,
+            engine_count: args.engine_count,
+            ready_timeout: Duration::from_secs(args.ready_timeout_secs),
+            local_input_address: None,
+            local_output_address: None,
+        },
+        coordinator_mode: CoordinatorMode::MaybeInProc,
+        model: args.model,
+        served_model_name: vec![],
+        listener_mode: HttpListenerMode::BindTcp {
+            host: "127.0.0.1".to_string(),
+            port,
+        },
+        tool_call_parser: ParserSelection::Auto,
+        reasoning_parser: ParserSelection::Auto,
+        renderer: RendererSelection::Auto,
+        chat_template: None,
+        default_chat_template_kwargs: None,
+        chat_template_content_format: ChatTemplateContentFormatOption::Auto,
+        enable_log_requests: false,
+        disable_log_stats: false,
+        grpc_port: None,
+        shutdown_timeout: Duration::ZERO,
+    };
+
+    let bind_address = format!("127.0.0.1:{port}");
+    let shutdown = CancellationToken::new();
+    let server_config = config.clone();
+    let server_shutdown = shutdown.clone();
+    let server_task = tokio::spawn(async move { serve(server_config, server_shutdown).await });
+
+    let client = Client::with_config(
+        OpenAIConfig::new()
+            .with_api_key("unused")
+            .with_api_base(format!("http://{bind_address}/v1")),
+    );
+
+    print_models(&client).await?;
+    let final_text = stream_completion(&client, &config.model, &args.prompt).await?;
+
+    println!();
+    println!("final_text={final_text:?}");
+
+    shutdown_server(server_task, shutdown).await
+}
+
+fn init_tracing() {
+    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
+    let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
+}
+
+fn unique_local_port() -> Result<u16> {
+    let listener = std::net::TcpListener::bind("127.0.0.1:0")
+        .context("failed to allocate local smoke-test port")?;
+    let port = listener.local_addr().context("failed to read local smoke-test port")?.port();
+    drop(listener);
+    Ok(port)
+}
+
+async fn print_models(client: &Client<OpenAIConfig>) -> Result<()> {
+    let models = wait_for_models(client).await?;
+    let model_ids = models.data.into_iter().map(|model| model.id).collect::<Vec<_>>();
+    println!("models={model_ids:?}");
+    Ok(())
+}
+
+async fn wait_for_models(client: &Client<OpenAIConfig>) -> Result<ListModelResponse> {
+    let mut last_error = None;
+    for _ in 0..240 {
+        match client.models().list().await {
+            Ok(models) => return Ok(models),
+            Err(error) => {
+                last_error = Some(error);
+                tokio::time::sleep(Duration::from_millis(500)).await;
+            }
+        }
+    }
+
+    match last_error {
+        Some(error) => Err(error).context("OpenAI server did not become ready in time"),
+        None => bail!("OpenAI server readiness loop finished without a result"),
+    }
+}
+
+async fn stream_completion(
+    client: &Client<OpenAIConfig>,
+    model: &str,
+    prompt: &str,
+) -> Result<String> {
+    // Keep this smoke test on async-openai's standard `create_stream` path so it
+    // exercises the ordinary typed chat-completions client without BYOT
+    // request/response types.
+    //
+    // The current async-openai chat-completions stream delta type does not expose
+    // our OpenAI-compatible `reasoning_content` extension field, so this
+    // example only validates the assistant role chunk, visible `content`
+    // deltas, and terminal finish chunk. Reasoning coverage lives in our own
+    // route tests and in the `vllm-chat` smoke example.
+    let request: CreateChatCompletionRequest = CreateChatCompletionRequestArgs::default()
+        .model(model)
+        .stream(true)
+        .temperature(0.0)
+        .max_completion_tokens(128u32)
+        .messages([ChatCompletionRequestUserMessageArgs::default()
+            .content(prompt)
+            .build()
+            .context("failed to build user chat message")?
+            .into()])
+        .build()
+        .context("failed to build chat completion request")?;
+
+    let mut stream = client
+        .chat()
+        .create_stream(request)
+        .await
+        .context("failed to create streaming chat completion")?;
+
+    let mut final_text = String::new();
+    let mut saw_role = false;
+    let mut saw_finish_reason = false;
+    let mut saw_text = false;
+
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.context("streaming chat completion failed")?;
+        for choice in chunk.choices {
+            if choice.delta.role.is_some() {
+                saw_role = true;
+            }
+            if let Some(delta) = choice.delta.content {
+                if !saw_text {
+                    print!("[answer] ");
+                }
+                print!("{delta}");
+                final_text.push_str(&delta);
+                saw_text = true;
+            }
+            if choice.finish_reason.is_some() {
+                saw_finish_reason = true;
+            }
+        }
+    }
+
+    if !saw_role {
+        bail!("stream ended without an assistant role chunk");
+    }
+    if !saw_finish_reason {
+        bail!("stream ended without a terminal finish reason");
+    }
+    if final_text.is_empty() {
+        bail!("stream ended without any content deltas");
+    }
+
+    Ok(final_text)
+}
+
+async fn shutdown_server(
+    server_task: tokio::task::JoinHandle<anyhow::Result<()>>,
+    shutdown: CancellationToken,
+) -> Result<()> {
+    shutdown.cancel();
+    server_task
+        .await
+        .context("server task join failed")?
+        .context("server task failed")
+}
diff --git a/rust/src/server/src/config.rs b/rust/src/server/src/config.rs
new file mode 100644
index 000000000000..522133427f4d
--- /dev/null
+++ b/rust/src/server/src/config.rs
@@ -0,0 +1,110 @@
+use std::collections::HashMap;
+use std::time::Duration;
+
+use anyhow::Result;
+use serde_json::Value;
+use vllm_chat::{ChatTemplateContentFormatOption, ParserSelection, RendererSelection};
+use vllm_engine_core_client::{CoordinatorMode as EngineCoreCoordinatorMode, TransportMode};
+
+/// How the HTTP server obtains its listening socket.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum HttpListenerMode {
+    /// Bind a fresh TCP listener on the given host/port.
+    BindTcp { host: String, port: u16 },
+    /// Bind a fresh Unix domain listener on the given filesystem path.
+    BindUnix { path: String },
+    /// Adopt an already-open listening socket inherited from a supervisor
+    /// process.
+    InheritedFd { fd: i32 },
+}
+
+/// Which coordinator implementation should be active when one is present for a
+/// frontend client.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum CoordinatorMode {
+    /// Do not run a coordinator at all.
+    None,
+    /// Run the Rust in-process coordinator for managed `serve` deployments, if
+    /// there are multiple engines and the model is MoE.
+    MaybeInProc,
+    /// Connect to an external coordinator owned by another process.
+    External { address: String },
+}
+
+/// Normalized runtime configuration for the minimal OpenAI-compatible server.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Config {
+    /// Frontend-to-engine transport setup.
+    pub transport_mode: TransportMode,
+    /// Requested frontend-side coordinator behavior.
+    pub coordinator_mode: CoordinatorMode,
+    /// Backend model identifier used for engine-core loading.
+    pub model: String,
+    /// Model name(s) exposed to clients via the OpenAI API. When non-empty,
+    /// the first entry is used as the primary ID in responses and all entries
+    /// are accepted in requests. When empty, falls back to `model`.
+    pub served_model_name: Vec<String>,
+    /// HTTP listener setup.
+    pub listener_mode: HttpListenerMode,
+    /// Tool-call parser selection.
+    pub tool_call_parser: ParserSelection,
+    /// Reasoning parser selection.
+    pub reasoning_parser: ParserSelection,
+    /// Chat renderer selection.
+    pub renderer: RendererSelection,
+    /// Server-default chat template override, as a file path or inline
+    /// template.
+    pub chat_template: Option<String>,
+    /// Server-default keyword arguments merged into every chat-template render.
+    pub default_chat_template_kwargs: Option<HashMap<String, Value>>,
+    /// How to serialize `message.content` for chat-template rendering.
+    pub chat_template_content_format: ChatTemplateContentFormatOption,
+    /// Log a summary line for each completed request.
+    pub enable_log_requests: bool,
+    /// When `true`, suppress periodic stats logging (throughput, queue depth,
+    /// cache usage).
+    pub disable_log_stats: bool,
+    /// TCP port for the gRPC Generate service. When `None`, no gRPC server is
+    /// started.
+    pub grpc_port: Option<u16>,
+    /// Maximum time to wait for active HTTP/gRPC requests to drain on shutdown.
+    pub shutdown_timeout: Duration,
+}
+
+impl Config {
+    /// Validate frontend configuration that can be checked before engine
+    /// startup.
+    pub fn validate(&self) -> Result<()> {
+        vllm_chat::validate_parser_overrides(&self.tool_call_parser, &self.reasoning_parser)?;
+
+        Ok(())
+    }
+
+    /// Return the number of engines implied by the configured transport mode.
+    pub fn engine_count(&self) -> usize {
+        match &self.transport_mode {
+            TransportMode::HandshakeOwner { engine_count, .. }
+            | TransportMode::Bootstrapped { engine_count, .. } => *engine_count,
+        }
+    }
+
+    /// Resolve the effective coordinator mode.
+    pub fn effective_coordinator_mode(
+        &self,
+        model_is_moe: bool,
+    ) -> Option<EngineCoreCoordinatorMode> {
+        match &self.coordinator_mode {
+            CoordinatorMode::None => None,
+            CoordinatorMode::MaybeInProc => {
+                if model_is_moe && self.engine_count() > 1 {
+                    Some(EngineCoreCoordinatorMode::InProc)
+                } else {
+                    None
+                }
+            }
+            CoordinatorMode::External { address } => Some(EngineCoreCoordinatorMode::External {
+                address: address.clone(),
+            }),
+        }
+    }
+}
diff --git a/rust/src/server/src/error.rs b/rust/src/server/src/error.rs
new file mode 100644
index 000000000000..cc425ca076f6
--- /dev/null
+++ b/rust/src/server/src/error.rs
@@ -0,0 +1,74 @@
+use axum::Json;
+use axum::http::StatusCode;
+use axum::response::{IntoResponse, Response};
+use thiserror_ext::{Construct, Macro};
+
+use crate::routes::openai::utils::types::{ErrorDetail, ErrorResponse};
+
+/// Small OpenAI-style error family used by the minimal HTTP layer.
+#[derive(Debug, Construct, Macro)]
+pub enum ApiError {
+    /// The request is syntactically valid OpenAI JSON but asks for unsupported
+    /// behavior.
+    InvalidRequest {
+        message: String,
+        param: Option<&'static str>,
+    },
+    /// The requested model name does not match the single configured model.
+    ModelNotFound { model: String },
+    /// The request body could not be parsed as valid JSON.
+    JsonParseError { message: String },
+    /// An unexpected internal failure happened before streaming started.
+    ServerError { message: String },
+}
+
+impl ApiError {
+    /// Return the HTTP status code associated with this API error.
+    pub fn status_code(&self) -> StatusCode {
+        match self {
+            Self::InvalidRequest { .. } => StatusCode::BAD_REQUEST,
+            Self::ModelNotFound { .. } => StatusCode::NOT_FOUND,
+            Self::ServerError { .. } => StatusCode::INTERNAL_SERVER_ERROR,
+            Self::JsonParseError { .. } => StatusCode::BAD_REQUEST,
+        }
+    }
+
+    /// Convert this error into the standard OpenAI-compatible JSON error
+    /// payload.
+    pub fn to_error_response(&self) -> ErrorResponse {
+        let error = match self {
+            Self::InvalidRequest { message, param } => ErrorDetail {
+                message: message.clone(),
+                error_type: "invalid_request_error".to_string(),
+                param: param.map(|p| p.to_string()),
+                code: Some("invalid_request_error".to_string()),
+            },
+            Self::ModelNotFound { model } => ErrorDetail {
+                message: format!("The model `{model}` does not exist."),
+                error_type: "invalid_request_error".to_string(),
+                param: Some("model".to_string()),
+                code: Some("model_not_found".to_string()),
+            },
+            Self::ServerError { message } => ErrorDetail {
+                message: message.clone(),
+                error_type: "server_error".to_string(),
+                param: None,
+                code: Some("server_error".to_string()),
+            },
+            Self::JsonParseError { message } => ErrorDetail {
+                message: message.clone(),
+                error_type: "invalid_request_error".to_string(),
+                param: None,
+                code: Some("json_parse_error".to_string()),
+            },
+        };
+
+        ErrorResponse { error }
+    }
+}
+
+impl IntoResponse for ApiError {
+    fn into_response(self) -> Response {
+        (self.status_code(), Json(self.to_error_response())).into_response()
+    }
+}
diff --git a/rust/src/server/src/grpc/convert.rs b/rust/src/server/src/grpc/convert.rs
new file mode 100644
index 000000000000..ed21dd3d3396
--- /dev/null
+++ b/rust/src/server/src/grpc/convert.rs
@@ -0,0 +1,679 @@
+//! Conversion between gRPC protobuf types and internal `vllm-text`
+//! request/response types.
+
+use tonic::Status;
+use uuid::Uuid;
+use vllm_engine_core_client::protocol::{StopReason, StructuredOutputsParams};
+use vllm_text::{
+    DecodedLogprobs, DecodedPromptLogprobs, FinishReason, Finished, Prompt, SamplingParams,
+    TextDecodeOptions, TextRequest,
+};
+
+use super::pb;
+
+// ========================================================================================
+// Request conversion
+// ========================================================================================
+
+/// Convert a gRPC `GenerateRequest` into the internal `TextRequest`.
+///
+/// If `req.model` is non-empty, it must match one of `served_model_names`;
+/// otherwise the request is rejected with `NotFound`. An empty string is
+/// treated as "unset" (proto3 default) and accepted.
+pub fn to_text_request(
+    req: pb::GenerateRequest,
+    stream: bool,
+    served_model_names: &[String],
+) -> Result<TextRequest, Status> {
+    if !req.model.is_empty() && !served_model_names.iter().any(|n| n == &req.model) {
+        return Err(Status::not_found(format!(
+            "model `{}` not found",
+            req.model
+        )));
+    }
+
+    if req.truncate_prompt_tokens != 0 {
+        return Err(Status::invalid_argument(
+            "truncate_prompt_tokens is not supported",
+        ));
+    }
+
+    let prompt = match req.prompt {
+        Some(pb::generate_request::Prompt::Text(text)) => Prompt::Text(text),
+        Some(pb::generate_request::Prompt::TokenIds(ids)) => Prompt::TokenIds(ids.ids),
+        None => return Err(Status::invalid_argument("prompt is required")),
+    };
+
+    let request_id = if req.request_id.is_empty() {
+        Uuid::new_v4().to_string()
+    } else {
+        req.request_id
+    };
+
+    let sampling = req.sampling.as_ref();
+    let decoding = req.decoding.as_ref();
+    let stopping = req.stopping.as_ref();
+    let response = req.response.as_ref();
+    let kv = req.kv.as_ref();
+
+    let mut sampling_params =
+        build_sampling_params(req.temperature, sampling, decoding, stopping, response)?;
+
+    // Thread KVCacheParameters → SamplingParams fields.
+    if let Some(kv) = kv {
+        // Thread kv_transfer_params through vllm_xargs, matching the HTTP route
+        // convention.
+        if let Some(kv_struct) = kv.kv_transfer_params.as_ref() {
+            let kv_json = proto_struct_to_json(kv_struct);
+            let map = sampling_params.vllm_xargs.get_or_insert_with(Default::default);
+            map.insert("kv_transfer_params".to_string(), kv_json);
+        }
+        if kv.bypass_prefix_cache {
+            sampling_params.skip_reading_prefix_cache = Some(true);
+        }
+    }
+
+    let decode_options = TextDecodeOptions {
+        skip_special_tokens: true,
+        include_stop_str_in_output: stopping.is_some_and(|s| s.include_stop_strings),
+        stop_strings: stopping.map(|s| &s.stop_strings).filter(|ss| !ss.is_empty()).cloned(),
+        min_tokens: stopping.map_or(0, |s| s.min_new_tokens),
+    };
+
+    Ok(TextRequest {
+        request_id,
+        prompt,
+        mm_features: None,
+        sampling_params,
+        decode_options,
+        intermediate: stream,
+        priority: req.priority,
+        cache_salt: kv.map(|k| &k.cache_salt).filter(|s| !s.is_empty()).cloned(),
+        add_special_tokens: true,
+        data_parallel_rank: None,
+    })
+}
+
+fn build_sampling_params(
+    temperature: Option<f32>,
+    sampling: Option<&pb::RandomSampling>,
+    decoding: Option<&pb::DecodingParameters>,
+    stopping: Option<&pb::StoppingCriteria>,
+    response: Option<&pb::ResponseOptions>,
+) -> Result<SamplingParams, Status> {
+    // Temperature is a top-level GenerateRequest field. Default to greedy (0.0) for
+    // the gRPC API when the caller does not specify a value. This differs from
+    // the HTTP/OpenAI API (which defaults to 1.0) and matches the convention of
+    // programmatic generation APIs.
+    let temperature = temperature.or(Some(0.0));
+    let mut params = SamplingParams {
+        temperature,
+        ..SamplingParams::default()
+    };
+
+    // RandomSampling: for every remaining sampling field the protobuf default (`0`)
+    // is treated as "unset" and leaves the resolved value to the lowering
+    // stage, which falls back to the model-provided default or a
+    // neutral/disabled value otherwise.
+    if let Some(s) = sampling {
+        // num_sequences (n > 1) is not supported yet by the TextLlm layer; the response
+        // path also hardcodes SequenceOutput.index = 0, so accepting >1 would silently
+        // truncate output cardinality. Reject explicitly.
+        if s.num_sequences > 1 {
+            return Err(Status::invalid_argument(
+                "num_sequences > 1 is not supported",
+            ));
+        }
+        if s.top_k != 0 {
+            params.top_k = Some(s.top_k);
+        }
+        if s.top_p != 0.0 {
+            params.top_p = Some(s.top_p);
+        }
+        if s.min_p != 0.0 {
+            params.min_p = Some(s.min_p);
+        }
+        params.seed = s.seed;
+    }
+
+    // DecodingParameters
+    if let Some(d) = decoding {
+        if d.presence_penalty != 0.0 {
+            params.presence_penalty = Some(d.presence_penalty);
+        }
+        if d.frequency_penalty != 0.0 {
+            params.frequency_penalty = Some(d.frequency_penalty);
+        }
+        if d.repetition_penalty != 0.0 {
+            params.repetition_penalty = Some(d.repetition_penalty);
+        }
+        if !d.logit_bias.is_empty() {
+            params.logit_bias = Some(d.logit_bias.clone());
+        }
+        if !d.allowed_token_ids.is_empty() {
+            params.allowed_token_ids = Some(d.allowed_token_ids.clone());
+        }
+        params.structured_outputs = convert_structured_output(d)?;
+    }
+
+    // StoppingCriteria
+    if let Some(s) = stopping {
+        if s.max_new_tokens != 0 {
+            params.max_tokens = Some(s.max_new_tokens);
+        }
+        if s.min_new_tokens != 0 {
+            params.min_tokens = Some(s.min_new_tokens);
+        }
+        if !s.stop_token_ids.is_empty() {
+            params.stop_token_ids = Some(s.stop_token_ids.clone());
+        }
+        params.ignore_eos = s.ignore_eos;
+    }
+
+    // ResponseOptions → logprobs
+    if let Some(r) = response {
+        if r.output_logprobs {
+            let (count, token_ids) = candidate_logprob_spec(r.output_candidates.as_ref());
+            params.logprobs = Some(count);
+            params.logprob_token_ids = token_ids;
+        }
+        if r.prompt_logprobs {
+            // The engine-core protocol has only one shared `logprob_token_ids` field
+            // for output and prompt logprobs, so a per-token-id selector for prompt
+            // candidates can't be honored independently. Reject it instead of silently
+            // dropping the list.
+            if matches!(
+                r.prompt_candidates.as_ref().and_then(|c| c.select.as_ref()),
+                Some(pb::candidate_tokens::Select::TokenIds(_))
+            ) {
+                return Err(Status::invalid_argument(
+                    "prompt_candidates token_ids selector is not supported",
+                ));
+            }
+            let (count, _) = candidate_logprob_spec(r.prompt_candidates.as_ref());
+            params.prompt_logprobs = Some(count);
+        }
+    }
+
+    Ok(params)
+}
+
+/// Map the proto `CandidateTokens` selector to a `(logprobs_count,
+/// logprob_token_ids)` pair.
+///
+/// - `top_n(k)` → `(k, None)` — return top-k candidates by probability
+/// - `all` → `(-1, None)` — return the full vocabulary
+/// - `token_ids(n)` → `(1, Some(vec of n token ids))` — return logprobs for specific tokens (the
+///   count `n` is stored in the proto as the number of token IDs that follow, but the actual IDs
+///   are carried via `logprob_token_ids` on `SamplingParams`)
+/// - absent → `(1, None)` — just the sampled/scored token
+fn candidate_logprob_spec(candidates: Option<&pb::CandidateTokens>) -> (i32, Option<Vec<u32>>) {
+    match candidates.and_then(|c| c.select.as_ref()) {
+        Some(pb::candidate_tokens::Select::TopN(n)) => (*n as i32, None),
+        Some(pb::candidate_tokens::Select::All(true)) => (-1, None),
+        Some(pb::candidate_tokens::Select::TokenIds(ids)) => (1, Some(ids.ids.clone())),
+        _ => (1, None),
+    }
+}
+
+fn convert_structured_output(
+    d: &pb::DecodingParameters,
+) -> Result<Option<StructuredOutputsParams>, Status> {
+    let so = match d.structured_output.as_ref() {
+        None => return Ok(None),
+        Some(so) => so,
+    };
+    use pb::decoding_parameters::StructuredOutput;
+    let params = match so {
+        StructuredOutput::Json(schema) => {
+            let json: serde_json::Value = serde_json::from_str(schema)
+                .map_err(|e| Status::invalid_argument(format!("invalid json schema: {e}")))?;
+            StructuredOutputsParams {
+                json: Some(json),
+                ..Default::default()
+            }
+        }
+        StructuredOutput::Regex(regex) => StructuredOutputsParams {
+            regex: Some(regex.clone()),
+            ..Default::default()
+        },
+        StructuredOutput::Choice(choices) => StructuredOutputsParams {
+            choice: Some(choices.choices.clone()),
+            ..Default::default()
+        },
+        StructuredOutput::Grammar(grammar) => StructuredOutputsParams {
+            grammar: Some(grammar.clone()),
+            ..Default::default()
+        },
+        StructuredOutput::JsonObject(true) => StructuredOutputsParams {
+            json_object: Some(true),
+            ..Default::default()
+        },
+        StructuredOutput::JsonObject(false) => return Ok(None),
+        StructuredOutput::StructuralTag(tag) => StructuredOutputsParams {
+            structural_tag: Some(tag.clone()),
+            ..Default::default()
+        },
+    };
+    Ok(Some(params))
+}
+
+// ========================================================================================
+// Response conversion
+// ========================================================================================
+
+/// Convert a `DecodedTextEvent::Start` into the prompt info portion of a gRPC
+/// response.
+pub fn to_prompt_info(
+    prompt_token_ids: &[u32],
+    prompt_logprobs: Option<&DecodedPromptLogprobs>,
+    opts: &ResponseOpts,
+) -> pb::PromptInfo {
+    let token_ids = if opts.prompt_token_ids {
+        prompt_token_ids.to_vec()
+    } else {
+        vec![]
+    };
+
+    let (logprobs, ranks, candidate_tokens) = match prompt_logprobs {
+        Some(plp) if opts.prompt_logprobs => prompt_logprobs_to_proto(plp),
+        _ => (vec![], vec![], vec![]),
+    };
+
+    pb::PromptInfo {
+        num_prompt_tokens: prompt_token_ids.len() as u32,
+        token_ids,
+        logprobs,
+        ranks,
+        candidate_tokens,
+    }
+}
+
+/// Convert a `DecodedTextEvent::TextDelta` into a gRPC `SequenceOutput`.
+pub fn to_sequence_output(
+    delta: &str,
+    token_ids: &[u32],
+    logprobs: Option<&DecodedLogprobs>,
+    finished: Option<&Finished>,
+    opts: &ResponseOpts,
+) -> pb::SequenceOutput {
+    let (lp_values, rank_values, candidates) = match logprobs {
+        Some(lp) if opts.output_logprobs => output_logprobs_to_proto(lp),
+        _ => (vec![], vec![], vec![]),
+    };
+
+    pb::SequenceOutput {
+        index: 0, // TODO: multi-sequence (n > 1) not supported
+        text: if opts.output_text {
+            delta.to_string()
+        } else {
+            String::new()
+        },
+        num_tokens: token_ids.len() as u32,
+        token_ids: if opts.output_token_ids {
+            token_ids.to_vec()
+        } else {
+            vec![]
+        },
+        logprobs: lp_values,
+        ranks: rank_values,
+        candidate_tokens: candidates,
+        finish_info: finished.map(|f| to_finish_info(f, token_ids)),
+    }
+}
+
+fn to_finish_info(finished: &Finished, token_ids: &[u32]) -> pb::FinishInfo {
+    use pb::finish_info::FinishReason as PbFinishReason;
+
+    let (finish_reason, stop_reason) = match &finished.finish_reason {
+        FinishReason::Stop(reason) => {
+            let sr = match reason {
+                Some(StopReason::TokenId(id)) => {
+                    Some(pb::finish_info::StopReason::StopTokenId(*id))
+                }
+                Some(StopReason::Text(s)) => {
+                    Some(pb::finish_info::StopReason::StopString(s.clone()))
+                }
+                // EOS-driven stop: engine-core matched the primary EOS token id but did not
+                // echo it back as a `stop_reason`. The matched token is, by construction, the
+                // last token of the terminal output batch (see vllm's `check_stop` in
+                // vllm/v1/core/sched/utils.py), so we recover it from there.
+                None => token_ids.last().copied().map(pb::finish_info::StopReason::EosTokenId),
+            };
+            (PbFinishReason::Stop as i32, sr)
+        }
+        FinishReason::Length => (PbFinishReason::Length as i32, None),
+        FinishReason::Abort | FinishReason::Error | FinishReason::Repetition => {
+            (PbFinishReason::Aborted as i32, None)
+        }
+    };
+
+    pb::FinishInfo {
+        num_output_tokens: finished.output_token_count as u32,
+        finish_reason,
+        stop_reason,
+        kv_transfer_params: finished.kv_transfer_params.as_ref().and_then(json_to_proto_struct),
+    }
+}
+
+// ========================================================================================
+// Logprobs helpers
+// ========================================================================================
+
+/// Convert output logprobs to the flat proto representation.
+///
+/// Returns (logprob_values, ranks, candidate_tokens) — all parallel arrays
+/// indexed by position.
+fn output_logprobs_to_proto(
+    lp: &DecodedLogprobs,
+) -> (Vec<f32>, Vec<u32>, Vec<pb::CandidateTokenInfo>) {
+    positions_to_proto(&lp.positions)
+}
+
+/// Convert prompt logprobs to the flat proto representation.
+fn prompt_logprobs_to_proto(
+    plp: &DecodedPromptLogprobs,
+) -> (Vec<f32>, Vec<u32>, Vec<pb::CandidateTokenInfo>) {
+    // The proto PromptInfo has flat parallel arrays covering all prompt positions.
+    // DecodedPromptLogprobs has first_token separately + scored_positions for the
+    // rest. The first prompt position has no scores, so we emit zeros for it.
+    let (mut logprobs, mut ranks, mut candidates) = positions_to_proto(&plp.scored_positions);
+    logprobs.insert(0, 0.0);
+    ranks.insert(0, 0);
+    candidates.insert(0, pb::CandidateTokenInfo { tokens: vec![] });
+    (logprobs, ranks, candidates)
+}
+
+/// Shared helper: convert a slice of decoded position logprobs to flat proto
+/// arrays.
+fn positions_to_proto(
+    positions: &[vllm_text::DecodedPositionLogprobs],
+) -> (Vec<f32>, Vec<u32>, Vec<pb::CandidateTokenInfo>) {
+    let mut logprobs = Vec::with_capacity(positions.len());
+    let mut ranks = Vec::with_capacity(positions.len());
+    let mut candidates = Vec::with_capacity(positions.len());
+
+    for pos in positions {
+        // First entry is the sampled/scored token.
+        if let Some(first) = pos.entries.first() {
+            logprobs.push(first.logprob);
+            ranks.push(first.rank);
+        }
+
+        // Extra candidates beyond the first.
+        let entries = pos.entries.iter().skip(1);
+        candidates.push(pb::CandidateTokenInfo {
+            tokens: entries
+                .map(|e| pb::candidate_token_info::TokenInfo {
+                    id: e.token_id,
+                    logprob: e.logprob,
+                    rank: e.rank,
+                })
+                .collect(),
+        });
+    }
+
+    (logprobs, ranks, candidates)
+}
+
+// ========================================================================================
+// KV transfer params conversion (serde_json::Value ↔ prost_types::Struct)
+// ========================================================================================
+
+fn proto_struct_to_json(s: &prost_types::Struct) -> serde_json::Value {
+    serde_json::Value::Object(
+        s.fields.iter().map(|(k, v)| (k.clone(), proto_value_to_json(v))).collect(),
+    )
+}
+
+fn proto_value_to_json(v: &prost_types::Value) -> serde_json::Value {
+    use prost_types::value::Kind;
+    match v.kind.as_ref() {
+        None | Some(Kind::NullValue(_)) => serde_json::Value::Null,
+        Some(Kind::BoolValue(b)) => serde_json::Value::Bool(*b),
+        Some(Kind::NumberValue(n)) => serde_json::json!(*n),
+        Some(Kind::StringValue(s)) => serde_json::Value::String(s.clone()),
+        Some(Kind::ListValue(list)) => {
+            serde_json::Value::Array(list.values.iter().map(proto_value_to_json).collect())
+        }
+        Some(Kind::StructValue(s)) => proto_struct_to_json(s),
+    }
+}
+
+fn json_to_proto_struct(value: &serde_json::Value) -> Option<prost_types::Struct> {
+    match value {
+        serde_json::Value::Object(map) => Some(prost_types::Struct {
+            fields: map.iter().map(|(k, v)| (k.clone(), json_to_proto_value(v))).collect(),
+        }),
+        _ => None,
+    }
+}
+
+fn json_to_proto_value(v: &serde_json::Value) -> prost_types::Value {
+    use prost_types::value::Kind;
+    let kind = match v {
+        serde_json::Value::Null => Kind::NullValue(0),
+        serde_json::Value::Bool(b) => Kind::BoolValue(*b),
+        serde_json::Value::Number(n) => Kind::NumberValue(n.as_f64().unwrap_or(0.0)),
+        serde_json::Value::String(s) => Kind::StringValue(s.clone()),
+        serde_json::Value::Array(arr) => Kind::ListValue(prost_types::ListValue {
+            values: arr.iter().map(json_to_proto_value).collect(),
+        }),
+        serde_json::Value::Object(map) => Kind::StructValue(prost_types::Struct {
+            fields: map.iter().map(|(k, v)| (k.clone(), json_to_proto_value(v))).collect(),
+        }),
+    };
+    prost_types::Value { kind: Some(kind) }
+}
+
+// ========================================================================================
+// Options extracted from the request for response building
+// ========================================================================================
+
+/// Response-shaping options extracted from the proto `ResponseOptions`.
+#[derive(Default)]
+pub struct ResponseOpts {
+    pub prompt_token_ids: bool,
+    pub prompt_logprobs: bool,
+    pub output_text: bool,
+    pub output_token_ids: bool,
+    pub output_logprobs: bool,
+}
+
+impl ResponseOpts {
+    pub fn from_proto(r: Option<&pb::ResponseOptions>) -> Self {
+        match r {
+            Some(r) => Self {
+                prompt_token_ids: r.prompt_token_ids,
+                prompt_logprobs: r.prompt_logprobs,
+                output_text: r.output_text.unwrap_or(true),
+                output_token_ids: r.output_token_ids,
+                output_logprobs: r.output_logprobs,
+            },
+            None => Self {
+                output_text: true,
+                ..Default::default()
+            },
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use vllm_engine_core_client::protocol::StopReason;
+    use vllm_text::{FinishReason, Finished, Prompt};
+
+    use super::pb::finish_info::{FinishReason as PbFinishReason, StopReason as PbStopReason};
+    use super::{ResponseOpts, pb, to_finish_info, to_sequence_output, to_text_request};
+
+    fn base_request() -> pb::GenerateRequest {
+        pb::GenerateRequest {
+            request_id: "req".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hi".to_string())),
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn temperature_propagates_from_top_level_request_field() {
+        let req = pb::GenerateRequest {
+            temperature: Some(0.7),
+            ..base_request()
+        };
+        let text = to_text_request(req, false, &["test-model".to_string()]).expect("convert ok");
+        assert_eq!(text.sampling_params.temperature, Some(0.7));
+    }
+
+    #[test]
+    fn unset_temperature_defaults_to_greedy() {
+        let text = to_text_request(base_request(), false, &["test-model".to_string()])
+            .expect("convert ok");
+        // The gRPC API defaults to greedy (0.0) when temperature is not specified.
+        assert_eq!(text.sampling_params.temperature, Some(0.0));
+    }
+
+    #[test]
+    fn absent_seed_is_none() {
+        let req = pb::GenerateRequest {
+            sampling: Some(pb::RandomSampling {
+                seed: None,
+                ..Default::default()
+            }),
+            ..base_request()
+        };
+        let text = to_text_request(req, false, &["test-model".to_string()]).expect("convert ok");
+        assert_eq!(text.sampling_params.seed, None);
+    }
+
+    #[test]
+    fn zero_seed_is_valid() {
+        let req = pb::GenerateRequest {
+            sampling: Some(pb::RandomSampling {
+                seed: Some(0),
+                ..Default::default()
+            }),
+            ..base_request()
+        };
+        let text = to_text_request(req, false, &["test-model".to_string()]).expect("convert ok");
+        assert_eq!(text.sampling_params.seed, Some(0));
+    }
+
+    #[test]
+    fn bypass_prefix_cache_maps_to_skip_reading_prefix_cache() {
+        let req = pb::GenerateRequest {
+            kv: Some(pb::KvCacheParameters {
+                bypass_prefix_cache: true,
+                ..Default::default()
+            }),
+            ..base_request()
+        };
+        let text = to_text_request(req, false, &["test-model".to_string()]).expect("convert ok");
+        assert_eq!(text.sampling_params.skip_reading_prefix_cache, Some(true));
+    }
+
+    #[test]
+    fn bypass_prefix_cache_false_leaves_field_unset() {
+        let req = pb::GenerateRequest {
+            kv: Some(pb::KvCacheParameters {
+                bypass_prefix_cache: false,
+                ..Default::default()
+            }),
+            ..base_request()
+        };
+        let text = to_text_request(req, false, &["test-model".to_string()]).expect("convert ok");
+        assert_eq!(text.sampling_params.skip_reading_prefix_cache, None);
+        // Prompt conversion still succeeds and reaches the expected variant.
+        assert!(matches!(text.prompt, Prompt::Text(s) if s == "hi"));
+    }
+
+    fn finished(reason: FinishReason) -> Finished {
+        Finished {
+            prompt_token_count: 0,
+            output_token_count: 0,
+            finish_reason: reason,
+            kv_transfer_params: None,
+        }
+    }
+
+    #[test]
+    fn eos_stop_reports_last_output_token_as_eos_id() {
+        let fin = finished(FinishReason::Stop(None));
+        let token_ids = [1_u32, 2, 3, 151643];
+
+        let info = to_finish_info(&fin, &token_ids);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Stop as i32);
+        assert_eq!(info.stop_reason, Some(PbStopReason::EosTokenId(151643)));
+    }
+
+    #[test]
+    fn eos_stop_with_empty_token_ids_leaves_stop_reason_unset() {
+        let fin = finished(FinishReason::Stop(None));
+
+        let info = to_finish_info(&fin, &[]);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Stop as i32);
+        assert_eq!(info.stop_reason, None);
+    }
+
+    #[test]
+    fn explicit_stop_token_id_is_preserved() {
+        let fin = finished(FinishReason::Stop(Some(StopReason::TokenId(42))));
+        // Terminal token list should be ignored when an explicit stop reason is
+        // present.
+        let info = to_finish_info(&fin, &[7, 42]);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Stop as i32);
+        assert_eq!(info.stop_reason, Some(PbStopReason::StopTokenId(42)));
+    }
+
+    #[test]
+    fn explicit_stop_string_is_preserved() {
+        let fin = finished(FinishReason::Stop(Some(StopReason::Text("</stop>".into()))));
+
+        let info = to_finish_info(&fin, &[1, 2, 3]);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Stop as i32);
+        assert_eq!(
+            info.stop_reason,
+            Some(PbStopReason::StopString("</stop>".into()))
+        );
+    }
+
+    #[test]
+    fn length_finish_has_no_stop_reason() {
+        let fin = finished(FinishReason::Length);
+
+        let info = to_finish_info(&fin, &[1, 2, 3]);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Length as i32);
+        assert_eq!(info.stop_reason, None);
+    }
+
+    #[test]
+    fn abort_finish_is_mapped_to_aborted() {
+        let fin = finished(FinishReason::Abort);
+
+        let info = to_finish_info(&fin, &[]);
+
+        assert_eq!(info.finish_reason, PbFinishReason::Aborted as i32);
+        assert_eq!(info.stop_reason, None);
+    }
+
+    #[test]
+    fn to_sequence_output_threads_token_ids_into_eos_id() {
+        let fin = finished(FinishReason::Stop(None));
+        let opts = ResponseOpts {
+            output_text: true,
+            output_token_ids: true,
+            ..Default::default()
+        };
+
+        let out = to_sequence_output("hello", &[10, 20, 30], None, Some(&fin), &opts);
+
+        let finish = out.finish_info.expect("finish_info should be present");
+        assert_eq!(finish.finish_reason, PbFinishReason::Stop as i32);
+        assert_eq!(finish.stop_reason, Some(PbStopReason::EosTokenId(30)));
+    }
+}
diff --git a/rust/src/server/src/grpc/mod.rs b/rust/src/server/src/grpc/mod.rs
new file mode 100644
index 000000000000..2f648aa6ce0e
--- /dev/null
+++ b/rust/src/server/src/grpc/mod.rs
@@ -0,0 +1,158 @@
+//! gRPC Generate service backed by the shared [`vllm_text::TextLlm`] facade.
+
+mod convert;
+
+use std::pin::Pin;
+use std::sync::Arc;
+
+use futures::{Stream, StreamExt as _};
+use thiserror_ext::AsReport as _;
+use tokio::sync::mpsc;
+use tokio_stream::wrappers::ReceiverStream;
+use tonic::{Request, Response, Status};
+use tracing::info;
+use vllm_text::{DecodedTextEvent, TextOutputStreamExt as _};
+
+use self::convert::ResponseOpts;
+use crate::state::AppState;
+
+/// Generated protobuf/gRPC types for the `vllm` package.
+pub mod pb {
+    tonic::include_proto!("vllm");
+}
+
+pub use pb::generate_server::GenerateServer;
+
+#[cfg(test)]
+mod tests;
+
+/// gRPC Generate service implementation backed by the shared application state.
+pub struct GenerateServiceImpl {
+    state: Arc<AppState>,
+}
+
+impl GenerateServiceImpl {
+    pub fn new(state: Arc<AppState>) -> Self {
+        Self { state }
+    }
+}
+
+#[tonic::async_trait]
+impl pb::generate_server::Generate for GenerateServiceImpl {
+    type GenerateStreamStream =
+        Pin<Box<dyn Stream<Item = Result<pb::GenerateResponse, Status>> + Send>>;
+
+    /// Unary generate: collect all output and return a single response.
+    async fn generate(
+        &self,
+        request: Request<pb::GenerateRequest>,
+    ) -> Result<Response<pb::GenerateResponse>, Status> {
+        let proto_req = request.into_inner();
+        let response_opts = ResponseOpts::from_proto(proto_req.response.as_ref());
+        let text_request =
+            convert::to_text_request(proto_req, false, self.state.served_model_names())?;
+
+        let request_id = text_request.request_id.clone();
+        info!(%request_id, "grpc generate (unary)");
+
+        let stream = self.state.chat.text().generate(text_request).await;
+        let stream = stream.map_err(|e| Status::internal(e.to_report_string()))?;
+
+        let collected = stream
+            .collect_output()
+            .await
+            .map_err(|e| Status::internal(e.to_report_string()))?;
+
+        // Build the single aggregated response.
+        let prompt_info = convert::to_prompt_info(
+            &collected.prompt_token_ids,
+            collected.prompt_logprobs.as_ref(),
+            &response_opts,
+        );
+
+        let finish_info = vllm_text::Finished {
+            prompt_token_count: collected.prompt_token_ids.len(),
+            output_token_count: collected.token_ids.len(),
+            finish_reason: collected.finish_reason,
+            kv_transfer_params: collected.kv_transfer_params,
+        };
+
+        let outputs = convert::to_sequence_output(
+            &collected.text,
+            &collected.token_ids,
+            collected.logprobs.as_ref(),
+            Some(&finish_info),
+            &response_opts,
+        );
+
+        Ok(Response::new(pb::GenerateResponse {
+            prompt_info: Some(prompt_info),
+            outputs: Some(outputs),
+        }))
+    }
+
+    /// Streaming generate: yield incremental responses as tokens are produced.
+    async fn generate_stream(
+        &self,
+        request: Request<pb::GenerateRequest>,
+    ) -> Result<Response<Self::GenerateStreamStream>, Status> {
+        let proto_req = request.into_inner();
+        let response_opts = ResponseOpts::from_proto(proto_req.response.as_ref());
+        let text_request =
+            convert::to_text_request(proto_req, true, self.state.served_model_names())?;
+
+        let request_id = text_request.request_id.clone();
+        info!(%request_id, "grpc generate (stream)");
+
+        let stream = self.state.chat.text().generate(text_request).await;
+        let stream = stream.map_err(|e| Status::internal(e.to_report_string()))?;
+
+        let (tx, rx) = mpsc::channel(32);
+
+        tokio::spawn(async move {
+            futures::pin_mut!(stream);
+            while let Some(event) = stream.next().await {
+                let response = match event {
+                    Err(e) => Err(Status::internal(e.to_report_string())),
+                    Ok(DecodedTextEvent::Start {
+                        prompt_token_ids,
+                        prompt_logprobs,
+                    }) => {
+                        let prompt_info = convert::to_prompt_info(
+                            &prompt_token_ids,
+                            prompt_logprobs.as_ref(),
+                            &response_opts,
+                        );
+                        Ok(pb::GenerateResponse {
+                            prompt_info: Some(prompt_info),
+                            outputs: None,
+                        })
+                    }
+                    Ok(DecodedTextEvent::TextDelta {
+                        delta,
+                        token_ids,
+                        logprobs,
+                        finished,
+                    }) => Ok(pb::GenerateResponse {
+                        prompt_info: None,
+                        outputs: Some(convert::to_sequence_output(
+                            &delta,
+                            &token_ids,
+                            logprobs.as_ref(),
+                            finished.as_ref(),
+                            &response_opts,
+                        )),
+                    }),
+                };
+
+                if tx.send(response).await.is_err() {
+                    // Client disconnected.
+                    break;
+                }
+            }
+        });
+
+        let response_stream = ReceiverStream::new(rx);
+        Ok(Response::new(Box::pin(response_stream)))
+    }
+}
diff --git a/rust/src/server/src/grpc/tests.rs b/rust/src/server/src/grpc/tests.rs
new file mode 100644
index 000000000000..17361ae0e86f
--- /dev/null
+++ b/rust/src/server/src/grpc/tests.rs
@@ -0,0 +1,662 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use futures::StreamExt as _;
+use serial_test::serial;
+use tonic::transport::Server as TonicServer;
+use vllm_chat::{
+    ChatBackend, ChatLlm, ChatRenderer, ChatRequest, ChatTextBackend, DefaultChatOutputProcessor,
+    DynChatOutputProcessor, DynChatRenderer, NewChatOutputProcessorOptions, RenderedPrompt,
+};
+use vllm_engine_core_client::protocol::{
+    EngineCoreFinishReason, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest,
+};
+use vllm_engine_core_client::test_utils::{IpcNamespace, spawn_mock_engine_task};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, EngineId};
+use vllm_llm::Llm;
+use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+use vllm_text::{Prompt, TextBackend};
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{DealerSocket, PushSocket, ZmqMessage};
+
+use super::pb::generate_client::GenerateClient;
+use super::{GenerateServer, GenerateServiceImpl, pb};
+use crate::state::AppState;
+
+// ========================================================================================
+// Helpers (mirrors the patterns in routes/tests.rs)
+// ========================================================================================
+
+type TestFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
+
+fn boxed_test_future<'a>(future: impl Future<Output = ()> + Send + 'a) -> TestFuture<'a> {
+    Box::pin(future)
+}
+
+struct MockEngineTask {
+    shutdown_tx: Option<tokio::sync::oneshot::Sender<()>>,
+    join_handle: Option<tokio::task::JoinHandle<()>>,
+}
+
+impl MockEngineTask {
+    fn new(
+        (shutdown_tx, join_handle): (
+            tokio::sync::oneshot::Sender<()>,
+            tokio::task::JoinHandle<()>,
+        ),
+    ) -> Self {
+        Self {
+            shutdown_tx: Some(shutdown_tx),
+            join_handle: Some(join_handle),
+        }
+    }
+}
+
+impl Future for MockEngineTask {
+    type Output = Result<(), tokio::task::JoinError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        if let Some(shutdown_tx) = self.shutdown_tx.take() {
+            let _ = shutdown_tx.send(());
+        }
+        match self.join_handle.as_mut() {
+            Some(join_handle) => Pin::new(join_handle).poll(cx),
+            None => Poll::Ready(Ok(())),
+        }
+    }
+}
+
+impl Drop for MockEngineTask {
+    fn drop(&mut self) {
+        if let Some(join_handle) = &self.join_handle {
+            join_handle.abort();
+        }
+    }
+}
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn engine_outputs_for_request(
+    request_id: &str,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> EngineCoreOutputs {
+    EngineCoreOutputs {
+        engine_index: 0,
+        outputs: output_specs
+            .into_iter()
+            .map(|(token_ids, finish_reason)| request_output(request_id, token_ids, finish_reason))
+            .collect(),
+        scheduler_stats: None,
+        timestamp: 0.0,
+        utility_output: None,
+        finished_requests: None,
+        wave_complete: None,
+        start_wave: None,
+    }
+}
+
+fn default_stream_output_specs() -> Vec<(Vec<u32>, Option<EngineCoreFinishReason>)> {
+    vec![
+        (vec![b'h' as u32], None),
+        (vec![b'i' as u32], None),
+        (vec![b'!' as u32], Some(EngineCoreFinishReason::Stop)),
+    ]
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(
+        rmp_serde::to_vec_named(&outputs).expect("encode outputs"),
+    ))
+    .await
+    .expect("send outputs");
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<bytes::Bytes> {
+    dealer.recv().await.expect("recv engine message").into_vec()
+}
+
+fn test_llm(client: EngineCoreClient) -> Llm {
+    Llm::new(client).with_request_id_randomization(false)
+}
+
+#[derive(Clone, Debug)]
+struct FakeTextBackend;
+
+#[derive(Debug)]
+struct FakeTokenizer;
+
+impl Tokenizer for FakeTokenizer {
+    fn encode(
+        &self,
+        text: &str,
+        _add_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<Vec<u32>> {
+        Ok(text.bytes().map(u32::from).collect())
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        _skip_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<String> {
+        Ok(
+            String::from_utf8_lossy(&token_ids.iter().map(|id| *id as u8).collect::<Vec<_>>())
+                .into_owned(),
+        )
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        token.bytes().next().map(u32::from)
+    }
+}
+
+impl TextBackend for FakeTextBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FakeTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        "test-model"
+    }
+}
+
+impl ChatBackend for FakeTextBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::new(
+            request,
+            self.model_id(),
+            self.tokenizer(),
+            options.tool_call_parser,
+            options.reasoning_parser,
+        )?))
+    }
+}
+
+impl ChatRenderer for FakeTextBackend {
+    fn render(&self, _request: &ChatRequest) -> vllm_chat::Result<RenderedPrompt> {
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(String::new()),
+        })
+    }
+}
+
+/// Spin up a gRPC server backed by a mock engine that serves a single request
+/// with the given output specs. Returns the client, the gRPC server task, and
+/// the mock engine task.
+async fn grpc_test_server(
+    engine_id: impl Into<EngineId>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> (
+    GenerateClient<tonic::transport::Channel>,
+    tokio::task::JoinHandle<()>,
+    MockEngineTask,
+) {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = engine_id.into();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, output_specs),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    let chat = ChatLlm::from_shared_backend(
+        test_llm(client),
+        Arc::new(FakeTextBackend) as Arc<dyn ChatTextBackend>,
+    );
+    let state = Arc::new(AppState::new(vec!["test-model".to_string()], chat));
+    let svc = GenerateServer::new(GenerateServiceImpl::new(state));
+
+    // Bind to an OS-assigned port.
+    let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.expect("bind grpc listener");
+    let addr = listener.local_addr().expect("local addr");
+
+    let server_task = tokio::spawn(async move {
+        let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
+        TonicServer::builder()
+            .add_service(svc)
+            .serve_with_incoming(incoming)
+            .await
+            .expect("grpc server");
+    });
+
+    // Connect the client.
+    let grpc_client = GenerateClient::connect(format!("http://{addr}"))
+        .await
+        .expect("connect grpc client");
+
+    (grpc_client, server_task, engine_task)
+}
+
+// ========================================================================================
+// Tests
+// ========================================================================================
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_returns_collected_text() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-unary", default_stream_output_specs()).await;
+
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-unary-1".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hello".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            response: Some(pb::ResponseOptions {
+                output_text: Some(true),
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("unary generate")
+        .into_inner();
+
+    // Unary collects all tokens into one response.
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(outputs.text, "hi");
+
+    let finish = outputs.finish_info.expect("finish_info present");
+    assert_eq!(
+        finish.finish_reason,
+        pb::finish_info::FinishReason::Stop as i32
+    );
+    assert_eq!(finish.num_output_tokens, 3);
+
+    let prompt = response.prompt_info.expect("prompt_info present");
+    assert_eq!(prompt.num_prompt_tokens, 5); // "hello" = 5 bytes
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_with_token_ids_prompt() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-token-ids", default_stream_output_specs()).await;
+
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-token-ids".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::TokenIds(pb::TokenIds {
+                ids: vec![1, 2, 3],
+            })),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("unary generate with token ids")
+        .into_inner();
+
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(outputs.text, "hi");
+    assert_eq!(
+        response.prompt_info.expect("prompt_info").num_prompt_tokens,
+        3
+    );
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_returns_token_ids_when_requested() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-tok-resp", default_stream_output_specs()).await;
+
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-tok-resp".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hi".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            response: Some(pb::ResponseOptions {
+                output_text: Some(true),
+                output_token_ids: true,
+                prompt_token_ids: true,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("unary generate")
+        .into_inner();
+
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(
+        outputs.token_ids,
+        vec![b'h' as u32, b'i' as u32, b'!' as u32]
+    );
+
+    let prompt = response.prompt_info.expect("prompt_info present");
+    assert_eq!(prompt.token_ids, vec![b'h' as u32, b'i' as u32]);
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_missing_prompt_returns_invalid_argument() {
+    let (mut client, server_task, _engine_task) =
+        grpc_test_server(b"engine-grpc-no-prompt", default_stream_output_specs()).await;
+
+    let status = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-no-prompt".to_string(),
+            model: "test-model".to_string(),
+            prompt: None,
+            ..Default::default()
+        })
+        .await
+        .expect_err("should fail without prompt");
+
+    assert_eq!(status.code(), tonic::Code::InvalidArgument);
+    assert!(status.message().contains("prompt"));
+
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn streaming_generate_yields_incremental_responses() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-stream", default_stream_output_specs()).await;
+
+    let stream = client
+        .generate_stream(pb::GenerateRequest {
+            request_id: "test-stream-1".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hello".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            response: Some(pb::ResponseOptions {
+                output_text: Some(true),
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("streaming generate")
+        .into_inner();
+
+    let responses: Vec<pb::GenerateResponse> =
+        stream.map(|r| r.expect("stream item")).collect().await;
+
+    // First response carries prompt info, subsequent ones carry output deltas.
+    assert!(
+        responses.len() >= 2,
+        "expected at least 2 streamed responses, got {}",
+        responses.len()
+    );
+
+    // First message should have prompt info.
+    let first = &responses[0];
+    let prompt_info = first.prompt_info.as_ref().expect("first response has prompt_info");
+    assert_eq!(prompt_info.num_prompt_tokens, 5); // "hello"
+
+    // Collect all text deltas.
+    let full_text: String = responses
+        .iter()
+        .filter_map(|r| r.outputs.as_ref())
+        .map(|o| o.text.as_str())
+        .collect();
+    assert_eq!(full_text, "hi");
+
+    // Last output response should have finish info.
+    let last_output = responses
+        .iter()
+        .rev()
+        .find_map(|r| r.outputs.as_ref())
+        .expect("at least one output");
+    let finish = last_output.finish_info.as_ref().expect("finish_info on last output");
+    assert_eq!(
+        finish.finish_reason,
+        pb::finish_info::FinishReason::Stop as i32
+    );
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn streaming_generate_missing_prompt_returns_invalid_argument() {
+    let (mut client, server_task, _engine_task) = grpc_test_server(
+        b"engine-grpc-stream-no-prompt",
+        default_stream_output_specs(),
+    )
+    .await;
+
+    let status = client
+        .generate_stream(pb::GenerateRequest {
+            request_id: "test-stream-no-prompt".to_string(),
+            model: "test-model".to_string(),
+            prompt: None,
+            ..Default::default()
+        })
+        .await
+        .expect_err("should fail without prompt");
+
+    assert_eq!(status.code(), tonic::Code::InvalidArgument);
+
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_with_sampling_params() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-sampling", default_stream_output_specs()).await;
+
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-sampling".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("test".to_string())),
+            temperature: Some(0.7),
+            sampling: Some(pb::RandomSampling {
+                top_k: 50,
+                top_p: 0.9,
+                seed: Some(42),
+                ..Default::default()
+            }),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 5,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("generate with sampling params")
+        .into_inner();
+
+    // Verify the request was accepted and produced output.
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(outputs.text, "hi");
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_rejects_wrong_model() {
+    let (mut client, server_task, _engine_task) =
+        grpc_test_server(b"engine-grpc-wrong-model", default_stream_output_specs()).await;
+
+    let status = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-wrong-model".to_string(),
+            model: "other-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hi".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect_err("should fail with wrong model");
+
+    assert_eq!(status.code(), tonic::Code::NotFound);
+    assert!(status.message().contains("other-model"));
+
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn streaming_generate_rejects_wrong_model() {
+    let (mut client, server_task, _engine_task) = grpc_test_server(
+        b"engine-grpc-stream-wrong-model",
+        default_stream_output_specs(),
+    )
+    .await;
+
+    let status = client
+        .generate_stream(pb::GenerateRequest {
+            request_id: "test-stream-wrong-model".to_string(),
+            model: "other-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("hi".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect_err("should fail with wrong model");
+
+    assert_eq!(status.code(), tonic::Code::NotFound);
+    assert!(status.message().contains("other-model"));
+
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_accepts_empty_model() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-empty-model", default_stream_output_specs()).await;
+
+    // Empty `model` (proto3 default) is treated as "unset" and should be accepted.
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-empty-model".to_string(),
+            model: String::new(),
+            prompt: Some(pb::generate_request::Prompt::Text("hi".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("unary generate with empty model")
+        .into_inner();
+
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(outputs.text, "hi");
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn unary_generate_output_text_defaults_to_true() {
+    let (mut client, server_task, engine_task) =
+        grpc_test_server(b"engine-grpc-default-text", default_stream_output_specs()).await;
+
+    // No response options at all — output_text should default to true.
+    let response = client
+        .generate(pb::GenerateRequest {
+            request_id: "test-default-text".to_string(),
+            model: "test-model".to_string(),
+            prompt: Some(pb::generate_request::Prompt::Text("x".to_string())),
+            stopping: Some(pb::StoppingCriteria {
+                max_new_tokens: 10,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })
+        .await
+        .expect("unary generate")
+        .into_inner();
+
+    let outputs = response.outputs.expect("outputs present");
+    assert_eq!(outputs.text, "hi");
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
diff --git a/rust/src/server/src/lib.rs b/rust/src/server/src/lib.rs
new file mode 100644
index 000000000000..2b684287ba23
--- /dev/null
+++ b/rust/src/server/src/lib.rs
@@ -0,0 +1,237 @@
+//! Minimal OpenAI-compatible HTTP server above [`vllm_chat`].
+
+mod config;
+mod error;
+mod grpc;
+mod listener;
+mod middleware;
+mod routes;
+mod state;
+mod utils;
+
+use std::sync::{Arc, OnceLock};
+
+use anyhow::{Context as _, Result};
+use axum::serve::ListenerExt as _;
+pub use config::{Config, CoordinatorMode, HttpListenerMode};
+use tokio::net::TcpListener;
+use tokio::time::{Instant, sleep_until};
+use tokio_stream::wrappers::TcpListenerStream;
+use tokio_util::either::Either;
+use tokio_util::sync::CancellationToken;
+use tonic::transport::Server as TonicServer;
+use tracing::{info, trace, warn};
+use vllm_chat::{ChatLlm, LoadModelBackendsOptions, load_model_backends};
+pub use vllm_chat::{ChatTemplateContentFormatOption, ParserSelection, RendererSelection};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig};
+use vllm_llm::Llm;
+use vllm_text::TextLlm;
+
+use crate::listener::Listener;
+use crate::routes::build_router;
+use crate::state::AppState;
+
+/// Build the shared application state for one configured model and one engine
+/// client.
+async fn build_state(config: &Config) -> Result<Arc<AppState>> {
+    // Load both backends from the same model metadata so they stay in sync.
+    let loaded = load_model_backends(
+        &config.model,
+        LoadModelBackendsOptions {
+            renderer: config.renderer,
+            chat_template: config.chat_template.clone(),
+            chat_template_content_format: config.chat_template_content_format,
+            default_chat_template_kwargs: config
+                .default_chat_template_kwargs
+                .clone()
+                .unwrap_or_default(),
+        },
+    )
+    .await
+    .context("failed to create chat/text backends")?;
+    let text_backend = loaded.text_backend;
+    let chat_backend = loaded.chat_backend;
+
+    let coordinator_mode = config.effective_coordinator_mode(text_backend.is_moe());
+    info!(
+        engine_count = config.engine_count(),
+        model_is_moe = text_backend.is_moe(),
+        ?coordinator_mode,
+        "resolved coordinator mode"
+    );
+
+    let client = EngineCoreClient::connect(EngineCoreClientConfig {
+        transport_mode: config.transport_mode.clone(),
+        coordinator_mode,
+        model_name: config.model.clone(),
+        client_index: 0,
+    })
+    .await
+    .context("failed to connect to engine core")?;
+
+    let llm = Llm::new(client).with_log_stats(!config.disable_log_stats);
+    let text = TextLlm::new(llm, text_backend);
+
+    let chat = ChatLlm::new(text, chat_backend)
+        .with_tool_call_parser(config.tool_call_parser.clone())
+        .with_reasoning_parser(config.reasoning_parser.clone());
+
+    // If no served names are specified, fall back to the backend model path so
+    // that the API always has at least one valid model ID.
+    let served_model_names = if config.served_model_name.is_empty() {
+        vec![config.model.clone()]
+    } else {
+        config.served_model_name.clone()
+    };
+
+    Ok(Arc::new(
+        AppState::new(served_model_names, chat).with_log_requests(config.enable_log_requests),
+    ))
+}
+
+/// Run the OpenAI-compatible HTTP server until the supplied shutdown token is
+/// cancelled.
+///
+/// The server owns one `vllm-chat` facade, which in turn owns the lower
+/// `vllm-text` and `vllm-llm` layers, and shuts them down before returning.
+pub async fn serve(config: Config, shutdown: CancellationToken) -> Result<()> {
+    config.validate().context("invalid OpenAI frontend configuration")?;
+
+    // Also check shutdown during the (potentially long) startup handshake.
+    let state = tokio::select! {
+        result = build_state(&config) => result?,
+        _ = shutdown.cancelled() => return Ok(()),
+    };
+    let listener = Listener::bind(&config.listener_mode)
+        .await
+        .context("failed to bind listener for OpenAI server")?;
+    let bind_address = listener.local_addr()?;
+    let model = state.primary_model_name().to_owned();
+    let app = build_router(state.clone());
+
+    // Optionally bind the gRPC Generate server on a separate port. Bind
+    // synchronously here so bind errors (port in use, permission denied, ...)
+    // surface before we start serving, rather than being deferred until
+    // shutdown. The gRPC listener follows the same host as the HTTP listener so
+    // that enabling --grpc-port does not accidentally expose the service on all
+    // interfaces when HTTP is intentionally local-only.
+    let grpc_setup = if let Some(grpc_port) = config.grpc_port {
+        let grpc_host = match &config.listener_mode {
+            HttpListenerMode::BindTcp { host, .. } => host.as_str(),
+            HttpListenerMode::BindUnix { .. } | HttpListenerMode::InheritedFd { .. } => "0.0.0.0",
+        };
+        let grpc_listener = TcpListener::bind((grpc_host, grpc_port))
+            .await
+            .with_context(|| format!("failed to bind gRPC listener on {grpc_host}:{grpc_port}"))?;
+        let addr = grpc_listener.local_addr()?;
+        let svc = grpc::GenerateServer::new(grpc::GenerateServiceImpl::new(state.clone()));
+        info!(%addr, "starting gRPC server");
+        Some((grpc_listener, svc))
+    } else {
+        None
+    };
+
+    info!(%bind_address, %model, "starting OpenAI server");
+
+    // Set TCP_NODELAY on accepted connections to reduce latency.
+    // By `tap_io` we will do this on every accepted connection.
+    let listener = listener.tap_io(|io| {
+        if let Either::Left(tcp_stream) = io
+            && let Err(err) = tcp_stream.set_nodelay(true)
+        {
+            trace!(error = %err, "failed to enable TCP_NODELAY on accepted HTTP connection");
+        }
+    });
+
+    // Run HTTP and gRPC concurrently under a child token of the caller's shutdown
+    // token. Caller cancellation propagates into both protocols; if either
+    // protocol exits first, we cancel this child token so its sibling also
+    // begins a graceful drain.
+    let server_shutdown = shutdown.child_token();
+    let force_shutdown = CancellationToken::new();
+    let shutdown_deadline = Arc::new(OnceLock::new());
+
+    // Spawn a task to trigger `force_shutdown` after shutdown deadline elapses.
+    tokio::spawn({
+        let shutdown = server_shutdown.clone();
+        let force_shutdown = force_shutdown.clone();
+        let shutdown_deadline = shutdown_deadline.clone();
+        let shutdown_timeout = config.shutdown_timeout;
+
+        async move {
+            shutdown.cancelled().await;
+            let deadline = Instant::now() + shutdown_timeout;
+            let _ = shutdown_deadline.set(deadline);
+
+            if shutdown_timeout.is_zero() {
+                force_shutdown.cancel();
+            } else {
+                sleep_until(deadline).await;
+                force_shutdown.cancel();
+            }
+        }
+    });
+
+    let http_fut = {
+        let shutdown = server_shutdown.child_token();
+        let server_shutdown = server_shutdown.clone();
+        let force_shutdown = force_shutdown.clone();
+        async move {
+            let server =
+                axum::serve(listener, app).with_graceful_shutdown(shutdown.cancelled_owned());
+
+            let result = tokio::select! {
+                result = server => {
+                    result.context("HTTP server failed")
+                }
+                _ = force_shutdown.cancelled() => {
+                    warn!("HTTP graceful shutdown deadline elapsed; aborting server");
+                    Ok(())
+                }
+            };
+
+            server_shutdown.cancel();
+            result
+        }
+    };
+
+    let grpc_fut = {
+        let shutdown = server_shutdown.child_token();
+        let server_shutdown = server_shutdown.clone();
+        let force_shutdown = force_shutdown.clone();
+        async move {
+            let Some((grpc_listener, svc)) = grpc_setup else {
+                // No gRPC configured: just wait for shutdown so we do not race the
+                // join! by resolving early and tripping the cancellation token.
+                shutdown.cancelled().await;
+                return Ok(());
+            };
+            let server = TonicServer::builder().add_service(svc).serve_with_incoming_shutdown(
+                TcpListenerStream::new(grpc_listener),
+                shutdown.cancelled_owned(),
+            );
+
+            let result = tokio::select! {
+                result = server => {
+                    result.context("gRPC server failed")
+                }
+                _ = force_shutdown.cancelled() => {
+                    warn!("gRPC graceful shutdown deadline elapsed; aborting server");
+                    Ok(())
+                }
+            };
+
+            server_shutdown.cancel();
+            result
+        }
+    };
+
+    let (http_res, grpc_res) = tokio::join!(http_fut, grpc_fut);
+    http_res.and(grpc_res)?;
+
+    let shutdown_deadline = shutdown_deadline
+        .get()
+        .copied()
+        .unwrap_or_else(|| Instant::now() + config.shutdown_timeout);
+    state.shutdown(shutdown_deadline).await
+}
diff --git a/rust/src/server/src/listener.rs b/rust/src/server/src/listener.rs
new file mode 100644
index 000000000000..b7b715b0ebd7
--- /dev/null
+++ b/rust/src/server/src/listener.rs
@@ -0,0 +1,135 @@
+//! Unified HTTP listener wrapper for the Rust frontend.
+//!
+//! This module hides the difference between TCP and Unix-domain listeners so
+//! the rest of the server can bind or inherit one socket and pass it to
+//! `axum::serve(...)` through a single type.
+
+use std::io::Result;
+use std::net::TcpListener as StdTcpListener;
+use std::os::fd::{FromRawFd, IntoRawFd, OwnedFd};
+use std::os::unix::net::UnixListener as StdUnixListener;
+
+use socket2::Socket;
+use tokio::net::{TcpListener, TcpStream, UnixListener, UnixStream};
+use tokio_util::either::Either;
+
+use crate::HttpListenerMode;
+
+/// Runtime listener type used by the OpenAI-compatible HTTP server, which is
+/// either a TCP listener or a Unix-domain listener.
+#[derive(Debug)]
+pub enum Listener {
+    Tcp(TcpListener),
+    Unix(UnixListener),
+}
+
+impl Listener {
+    /// Bind or adopt the listener described by the frontend configuration.
+    ///
+    /// For inherited sockets, the concrete listener kind is detected from the
+    /// socket family of the supplied file descriptor.
+    pub async fn bind(mode: &HttpListenerMode) -> Result<Self> {
+        match mode {
+            HttpListenerMode::BindTcp { host, port } => {
+                Ok(Self::Tcp(TcpListener::bind((host.as_str(), *port)).await?))
+            }
+            HttpListenerMode::BindUnix { path } => Ok(Self::Unix(UnixListener::bind(path)?)),
+            HttpListenerMode::InheritedFd { fd } => Self::from_inherited_fd(*fd),
+        }
+    }
+
+    /// Return a log-friendly local address string for either TCP or Unix
+    /// sockets.
+    pub fn local_addr(&self) -> Result<String> {
+        match self {
+            Self::Tcp(listener) => Ok(listener.local_addr()?.to_string()),
+            Self::Unix(listener) => Ok(match listener.local_addr()?.as_pathname() {
+                Some(path) => format!("unix:{}", path.display()),
+                None => "unix:<unnamed>".to_string(),
+            }),
+        }
+    }
+
+    fn from_inherited_fd(fd: i32) -> Result<Self> {
+        // SAFETY: We trust the caller to only pass valid listener fds, and we only use
+        // this fd once to create a single listener.
+        let owned_fd = unsafe { OwnedFd::from_raw_fd(fd) };
+        let socket = Socket::from(owned_fd);
+
+        // The Python supervisor pre-binds the socket to reserve the endpoint early, but
+        // Rust is responsible for transitioning inherited stream sockets into
+        // the listening state before accepting connections.
+        socket.listen(libc::SOMAXCONN)?;
+        socket.set_nonblocking(true)?;
+
+        if socket.local_addr()?.is_unix() {
+            let std_listener = unsafe { StdUnixListener::from_raw_fd(socket.into_raw_fd()) };
+            Ok(Self::Unix(UnixListener::from_std(std_listener)?))
+        } else {
+            let std_listener = unsafe { StdTcpListener::from_raw_fd(socket.into_raw_fd()) };
+            Ok(Self::Tcp(TcpListener::from_std(std_listener)?))
+        }
+    }
+}
+
+/// Allow the unified listener to plug directly into `axum::serve(...)`.
+impl axum::serve::Listener for Listener {
+    type Addr = Either<std::net::SocketAddr, tokio::net::unix::SocketAddr>;
+    type Io = Either<TcpStream, UnixStream>;
+
+    async fn accept(&mut self) -> (Self::Io, Self::Addr) {
+        match self {
+            Self::Tcp(listener) => {
+                let (io, addr) = listener.accept().await;
+                (Either::Left(io), Either::Left(addr))
+            }
+            Self::Unix(listener) => {
+                let (io, addr) = listener.accept().await;
+                (Either::Right(io), Either::Right(addr))
+            }
+        }
+    }
+
+    fn local_addr(&self) -> Result<Self::Addr> {
+        match self {
+            Self::Tcp(listener) => listener.local_addr().map(Either::Left),
+            Self::Unix(listener) => listener.local_addr().map(Either::Right),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::net::{Ipv4Addr, SocketAddrV4};
+    use std::os::fd::IntoRawFd;
+
+    use socket2::{Domain, SockAddr, Socket, Type};
+    use uuid::Uuid;
+
+    use super::Listener;
+    use crate::HttpListenerMode;
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn inherited_fd_detects_tcp_listener_without_uds_hint() {
+        let socket = Socket::new(Domain::IPV4, Type::STREAM, None).unwrap();
+        socket.bind(&SockAddr::from(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 0))).unwrap();
+        let fd = socket.into_raw_fd();
+
+        let listener = Listener::bind(&HttpListenerMode::InheritedFd { fd }).await.unwrap();
+
+        assert!(matches!(listener, Listener::Tcp(_)));
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn inherited_fd_detects_unix_listener_from_fd() {
+        let path = std::env::temp_dir().join(format!("vllm-rs-{}.sock", Uuid::new_v4()));
+        let socket = Socket::new(Domain::UNIX, Type::STREAM, None).unwrap();
+        socket.bind(&SockAddr::unix(&path).unwrap()).unwrap();
+        let fd = socket.into_raw_fd();
+
+        let listener = Listener::bind(&HttpListenerMode::InheritedFd { fd }).await.unwrap();
+
+        assert!(matches!(listener, Listener::Unix(_)));
+        let _ = std::fs::remove_file(path);
+    }
+}
diff --git a/rust/src/server/src/middleware/load.rs b/rust/src/server/src/middleware/load.rs
new file mode 100644
index 000000000000..d03b36fb5fec
--- /dev/null
+++ b/rust/src/server/src/middleware/load.rs
@@ -0,0 +1,110 @@
+use std::pin::Pin;
+use std::sync::{Arc, Weak};
+use std::task::{Context, Poll};
+
+use axum::body::{Body, Bytes, HttpBody};
+use axum::extract::{MatchedPath, Request, State};
+use axum::middleware::Next;
+use axum::response::Response;
+use http_body::{Frame, SizeHint};
+
+use crate::state::AppState;
+
+/// Endpoints that will be tracked for server load.
+///
+/// Derived from the Python frontend's actual `@load_aware_call` coverage. This
+/// includes alias paths that delegate into decorated handlers, such as
+/// `/v1/rerank` and `/v2/rerank`.
+const TRACKED_HANDLERS: &[&str] = &[
+    "/v1/responses",
+    "/v1/responses/{response_id}",
+    "/v1/responses/{response_id}/cancel",
+    "/v1/messages",
+    "/v1/messages/count_tokens",
+    "/v1/chat/completions",
+    "/v1/completions",
+    "/v1/audio/transcriptions",
+    "/v1/audio/translations",
+    "/v1/embeddings",
+    "/pooling",
+    "/classify",
+    "/score",
+    "/v1/score",
+    "/rerank",
+    "/v1/rerank",
+    "/v2/rerank",
+    "/inference/v1/generate",
+];
+
+/// Track frontend-local in-flight inference requests for the `/load` endpoint.
+pub async fn track_server_load(
+    State(state): State<Arc<AppState>>,
+    req: Request,
+    next: Next,
+) -> Response {
+    let handler = req
+        .extensions()
+        .get::<MatchedPath>()
+        .map_or_else(|| "none", |path| path.as_str());
+
+    if !TRACKED_HANDLERS.contains(&handler) {
+        return next.run(req).await;
+    }
+
+    state.increment_server_load();
+    let guard = ServerLoadGuard {
+        state: Arc::downgrade(&state),
+    };
+    let response = next.run(req).await;
+
+    let (parts, body) = response.into_parts();
+    Response::from_parts(
+        parts,
+        Body::new(LoadTrackedBody {
+            inner: body,
+            _guard: guard,
+        }),
+    )
+}
+
+/// A guard that decrements the server load when dropped.
+struct ServerLoadGuard {
+    state: Weak<AppState>,
+}
+
+impl Drop for ServerLoadGuard {
+    fn drop(&mut self) {
+        if let Some(state) = self.state.upgrade() {
+            state.decrement_server_load();
+        }
+    }
+}
+
+/// A wrapper around response bodies that tracks server load by holding a
+/// `ServerLoadGuard`, which will decrement the load when the body is fully
+/// consumed and dropped.
+struct LoadTrackedBody {
+    inner: Body,
+    _guard: ServerLoadGuard,
+}
+
+// Simply delegate all `HttpBody` methods to the inner body.
+impl HttpBody for LoadTrackedBody {
+    type Data = Bytes;
+    type Error = axum::Error;
+
+    fn poll_frame(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Result<Frame<Self::Data>, Self::Error>>> {
+        Pin::new(&mut self.inner).poll_frame(cx)
+    }
+
+    fn is_end_stream(&self) -> bool {
+        self.inner.is_end_stream()
+    }
+
+    fn size_hint(&self) -> SizeHint {
+        self.inner.size_hint()
+    }
+}
diff --git a/rust/src/server/src/middleware/metrics.rs b/rust/src/server/src/middleware/metrics.rs
new file mode 100644
index 000000000000..366d73dd3dde
--- /dev/null
+++ b/rust/src/server/src/middleware/metrics.rs
@@ -0,0 +1,79 @@
+use std::time::Instant;
+
+use axum::extract::{MatchedPath, Request};
+use axum::middleware::Next;
+use axum::response::Response;
+use vllm_metrics::{HttpHandlerLabels, HttpRequestLabels, METRICS};
+
+/// Endpoints that will be excluded from HTTP metrics tracking.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/entrypoints/serve/instrumentator/metrics.py#L28-L38>
+const EXCLUDED_HANDLERS: &[&str] = &[
+    "/metrics",
+    "/health",
+    "/load",
+    "/ping",
+    "/version",
+    "/server_info",
+    // Rust frontend extra:
+    "/reset_prefix_cache",
+    "/reset_mm_cache",
+    "/reset_encoder_cache",
+    "/collective_rpc",
+    "/sleep",
+    "/wake_up",
+    "/is_sleeping",
+];
+
+/// Record API-server HTTP metrics with Python-compatible
+/// (`PrometheusFastApiInstrumentator` style) family names and labels.
+pub async fn track_http_metrics(req: Request, next: Next) -> Response {
+    let method = req.method().as_str().to_string();
+    let handler = req
+        .extensions()
+        .get::<MatchedPath>()
+        .map_or_else(|| "none".to_string(), |path| path.as_str().to_string());
+    let excluded = EXCLUDED_HANDLERS.contains(&handler.as_str());
+    let started_at = Instant::now();
+
+    let response = next.run(req).await;
+
+    if excluded {
+        return response;
+    }
+
+    let elapsed = started_at.elapsed().as_secs_f64();
+    let status = status_group(response.status().as_u16());
+
+    let metrics = &METRICS.api_server;
+
+    metrics
+        .http_requests
+        .get_or_create(&HttpRequestLabels {
+            method: method.clone(),
+            status,
+            handler: handler.clone(),
+        })
+        .inc();
+
+    metrics
+        .http_request_duration_seconds
+        .get_or_create(&HttpHandlerLabels { method, handler })
+        .observe(elapsed);
+
+    metrics.http_request_duration_highr_seconds.observe(elapsed);
+
+    response
+}
+
+fn status_group(status: u16) -> &'static str {
+    match status / 100 {
+        1 => "1xx",
+        2 => "2xx",
+        3 => "3xx",
+        4 => "4xx",
+        5 => "5xx",
+        _ => "unknown",
+    }
+}
diff --git a/rust/src/server/src/middleware/mod.rs b/rust/src/server/src/middleware/mod.rs
new file mode 100644
index 000000000000..acb3dd1fdb7f
--- /dev/null
+++ b/rust/src/server/src/middleware/mod.rs
@@ -0,0 +1,5 @@
+mod load;
+mod metrics;
+
+pub use load::track_server_load;
+pub use metrics::track_http_metrics;
diff --git a/rust/src/server/src/routes.rs b/rust/src/server/src/routes.rs
new file mode 100644
index 000000000000..ccf90db9aa81
--- /dev/null
+++ b/rust/src/server/src/routes.rs
@@ -0,0 +1,68 @@
+mod cache;
+mod collective_rpc;
+mod health;
+mod inference;
+mod load;
+mod metrics;
+pub(crate) mod openai;
+mod sleep;
+
+use std::sync::Arc;
+
+use axum::Router;
+use axum::middleware::{from_fn, from_fn_with_state};
+use axum::routing::{get, post};
+use tower_http::trace::TraceLayer;
+
+use crate::middleware;
+use crate::state::AppState;
+
+fn server_dev_mode_enabled() -> bool {
+    std::env::var("VLLM_SERVER_DEV_MODE")
+        .ok()
+        .and_then(|value| value.parse::<i64>().ok())
+        .is_some_and(|value| value != 0)
+}
+
+/// Build the minimal OpenAI-compatible router for one configured model.
+pub fn build_router(state: Arc<AppState>) -> Router {
+    build_router_with_dev_mode(state, server_dev_mode_enabled())
+}
+
+fn build_router_with_dev_mode(state: Arc<AppState>, dev_mode_enabled: bool) -> Router {
+    let mut router = Router::new()
+        // Health & monitoring
+        .route("/health", get(health::health))
+        .route("/metrics", get(metrics::scrape))
+        .route("/load", get(load::load))
+        // OpenAI-compatible endpoints
+        .route("/v1/models", get(openai::list_models))
+        .route("/v1/completions", post(openai::completions))
+        .route("/v1/chat/completions", post(openai::chat_completions))
+        // vLLM specific inference endpoints
+        .route("/inference/v1/generate", post(inference::generate));
+
+    if dev_mode_enabled {
+        // Development-only
+        router = router
+            .route("/reset_prefix_cache", post(cache::reset_prefix_cache))
+            .route("/reset_mm_cache", post(cache::reset_mm_cache))
+            .route("/reset_encoder_cache", post(cache::reset_encoder_cache))
+            .route("/collective_rpc", post(collective_rpc::collective_rpc))
+            .route("/sleep", post(sleep::sleep))
+            .route("/wake_up", post(sleep::wake_up))
+            .route("/is_sleeping", get(sleep::is_sleeping))
+    }
+
+    router
+        .with_state(state.clone())
+        .layer(from_fn_with_state(state, middleware::track_server_load))
+        .layer(from_fn(middleware::track_http_metrics))
+        .layer(TraceLayer::new_for_http())
+}
+
+#[cfg(test)]
+mod tests;
+
+#[cfg(test)]
+mod http_client_tests;
diff --git a/rust/src/server/src/routes/cache.rs b/rust/src/server/src/routes/cache.rs
new file mode 100644
index 000000000000..580b91d41314
--- /dev/null
+++ b/rust/src/server/src/routes/cache.rs
@@ -0,0 +1,56 @@
+use std::sync::Arc;
+
+use axum::extract::{Query, State};
+use axum::http::StatusCode;
+use serde::Deserialize;
+
+use crate::error::ApiError;
+use crate::state::AppState;
+use crate::utils::utility_call_error;
+
+#[derive(Debug, Default, Deserialize)]
+pub(crate) struct ResetPrefixCacheParams {
+    #[serde(default)]
+    reset_running_requests: bool,
+    #[serde(default)]
+    reset_external: bool,
+}
+
+/// Reset the local prefix cache and optionally the connector-managed external
+/// cache.
+pub async fn reset_prefix_cache(
+    State(state): State<Arc<AppState>>,
+    Query(params): Query<ResetPrefixCacheParams>,
+) -> Result<StatusCode, ApiError> {
+    state
+        .engine_core_client()
+        .reset_prefix_cache(params.reset_running_requests, params.reset_external)
+        .await
+        .map_err(|error| utility_call_error("reset_prefix_cache", error))?;
+
+    Ok(StatusCode::OK)
+}
+
+/// Reset the multi-modal cache.
+pub async fn reset_mm_cache(State(state): State<Arc<AppState>>) -> Result<StatusCode, ApiError> {
+    state
+        .engine_core_client()
+        .reset_mm_cache()
+        .await
+        .map_err(|error| utility_call_error("reset_mm_cache", error))?;
+
+    Ok(StatusCode::OK)
+}
+
+/// Reset the encoder cache.
+pub async fn reset_encoder_cache(
+    State(state): State<Arc<AppState>>,
+) -> Result<StatusCode, ApiError> {
+    state
+        .engine_core_client()
+        .reset_encoder_cache()
+        .await
+        .map_err(|error| utility_call_error("reset_encoder_cache", error))?;
+
+    Ok(StatusCode::OK)
+}
diff --git a/rust/src/server/src/routes/collective_rpc.rs b/rust/src/server/src/routes/collective_rpc.rs
new file mode 100644
index 000000000000..965797f3019f
--- /dev/null
+++ b/rust/src/server/src/routes/collective_rpc.rs
@@ -0,0 +1,51 @@
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::State;
+use axum::extract::rejection::JsonRejection;
+use rmpv::Value as MsgpackValue;
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+
+use crate::error::ApiError;
+use crate::state::AppState;
+use crate::utils::utility_call_error;
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct CollectiveRpcRequest {
+    method: Option<String>,
+    #[serde(default)]
+    timeout: Option<f64>,
+    #[serde(default)]
+    args: Vec<JsonValue>,
+    #[serde(default)]
+    kwargs: BTreeMap<String, JsonValue>,
+}
+
+#[derive(Debug, Serialize)]
+pub(crate) struct CollectiveRpcResponse {
+    results: Vec<MsgpackValue>,
+}
+
+/// Execute a development-only collective RPC on the connected engine(s).
+pub async fn collective_rpc(
+    State(state): State<Arc<AppState>>,
+    body: Result<Json<CollectiveRpcRequest>, JsonRejection>,
+) -> Result<Json<CollectiveRpcResponse>, ApiError> {
+    let Json(body) = body.map_err(|error| ApiError::json_parse_error(error.body_text()))?;
+    let method = body.method.ok_or_else(|| {
+        ApiError::invalid_request(
+            "Missing 'method' in request body".to_string(),
+            Some("method"),
+        )
+    })?;
+
+    let results = state
+        .engine_core_client()
+        .collective_rpc(&method, body.timeout, body.args, body.kwargs)
+        .await
+        .map_err(|error| utility_call_error("collective_rpc", error))?;
+
+    Ok(Json(CollectiveRpcResponse { results }))
+}
diff --git a/rust/src/server/src/routes/health.rs b/rust/src/server/src/routes/health.rs
new file mode 100644
index 000000000000..ff91f8570076
--- /dev/null
+++ b/rust/src/server/src/routes/health.rs
@@ -0,0 +1,14 @@
+use std::sync::Arc;
+
+use axum::extract::State;
+use axum::http::StatusCode;
+
+use crate::state::AppState;
+
+pub async fn health(State(state): State<Arc<AppState>>) -> StatusCode {
+    if state.chat.engine_core_client().is_healthy() {
+        StatusCode::OK
+    } else {
+        StatusCode::SERVICE_UNAVAILABLE
+    }
+}
diff --git a/rust/src/server/src/routes/http_client_tests.rs b/rust/src/server/src/routes/http_client_tests.rs
new file mode 100644
index 000000000000..8055ada9794f
--- /dev/null
+++ b/rust/src/server/src/routes/http_client_tests.rs
@@ -0,0 +1,392 @@
+//! Integration tests that exercise the OpenAI-compatible HTTP API through a
+//! real TCP connection using the `async-openai` client library, backed by a
+//! mock engine.
+
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use async_openai::Client;
+use async_openai::config::OpenAIConfig;
+use async_openai::types::chat::{
+    ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs,
+};
+use futures::StreamExt as _;
+use serial_test::serial;
+use vllm_chat::{
+    ChatBackend, ChatLlm, ChatRenderer, ChatRequest, ChatTextBackend, DefaultChatOutputProcessor,
+    DynChatOutputProcessor, DynChatRenderer, NewChatOutputProcessorOptions, RenderedPrompt,
+};
+use vllm_engine_core_client::protocol::{
+    EngineCoreFinishReason, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest,
+};
+use vllm_engine_core_client::test_utils::{IpcNamespace, spawn_mock_engine_task};
+use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, EngineId};
+use vllm_llm::Llm;
+use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+use vllm_text::{Prompt, TextBackend};
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{DealerSocket, PushSocket, ZmqMessage};
+
+use crate::routes::build_router;
+use crate::state::AppState;
+
+// ========================================================================================
+// Test infrastructure (mirrors routes/tests.rs helpers)
+// ========================================================================================
+
+type TestFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
+
+fn boxed_test_future<'a>(future: impl Future<Output = ()> + Send + 'a) -> TestFuture<'a> {
+    Box::pin(future)
+}
+
+struct MockEngineTask {
+    shutdown_tx: Option<tokio::sync::oneshot::Sender<()>>,
+    join_handle: Option<tokio::task::JoinHandle<()>>,
+}
+
+impl MockEngineTask {
+    fn new(
+        (shutdown_tx, join_handle): (
+            tokio::sync::oneshot::Sender<()>,
+            tokio::task::JoinHandle<()>,
+        ),
+    ) -> Self {
+        Self {
+            shutdown_tx: Some(shutdown_tx),
+            join_handle: Some(join_handle),
+        }
+    }
+}
+
+impl Future for MockEngineTask {
+    type Output = Result<(), tokio::task::JoinError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        if let Some(shutdown_tx) = self.shutdown_tx.take() {
+            let _ = shutdown_tx.send(());
+        }
+        match self.join_handle.as_mut() {
+            Some(join_handle) => Pin::new(join_handle).poll(cx),
+            None => Poll::Ready(Ok(())),
+        }
+    }
+}
+
+impl Drop for MockEngineTask {
+    fn drop(&mut self) {
+        if let Some(join_handle) = &self.join_handle {
+            join_handle.abort();
+        }
+    }
+}
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason: None,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn engine_outputs_for_request(
+    request_id: &str,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> EngineCoreOutputs {
+    EngineCoreOutputs {
+        engine_index: 0,
+        outputs: output_specs
+            .into_iter()
+            .map(|(token_ids, finish_reason)| request_output(request_id, token_ids, finish_reason))
+            .collect(),
+        scheduler_stats: None,
+        timestamp: 0.0,
+        utility_output: None,
+        finished_requests: None,
+        wave_complete: None,
+        start_wave: None,
+    }
+}
+
+fn default_stream_output_specs() -> Vec<(Vec<u32>, Option<EngineCoreFinishReason>)> {
+    vec![
+        (vec![b'h' as u32], None),
+        (vec![b'i' as u32], None),
+        (vec![b'!' as u32], Some(EngineCoreFinishReason::Stop)),
+    ]
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(
+        rmp_serde::to_vec_named(&outputs).expect("encode outputs"),
+    ))
+    .await
+    .expect("send outputs");
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<bytes::Bytes> {
+    dealer.recv().await.expect("recv engine message").into_vec()
+}
+
+fn test_llm(client: EngineCoreClient) -> Llm {
+    Llm::new(client).with_request_id_randomization(false)
+}
+
+#[derive(Clone, Debug)]
+struct FakeChatBackend;
+
+#[derive(Debug)]
+struct FakeChatTokenizer;
+
+impl Tokenizer for FakeChatTokenizer {
+    fn encode(
+        &self,
+        text: &str,
+        _add_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<Vec<u32>> {
+        Ok(text.bytes().map(u32::from).collect())
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        _skip_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<String> {
+        Ok(
+            String::from_utf8_lossy(&token_ids.iter().map(|id| *id as u8).collect::<Vec<_>>())
+                .into_owned(),
+        )
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        token.bytes().next().map(u32::from)
+    }
+}
+
+impl TextBackend for FakeChatBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FakeChatTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        "test-model"
+    }
+}
+
+impl ChatBackend for FakeChatBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::new(
+            request,
+            self.model_id(),
+            self.tokenizer(),
+            options.tool_call_parser,
+            options.reasoning_parser,
+        )?))
+    }
+}
+
+impl ChatRenderer for FakeChatBackend {
+    fn render(&self, request: &ChatRequest) -> vllm_chat::Result<RenderedPrompt> {
+        let mut prompt = String::new();
+        for message in &request.messages {
+            prompt.push_str(message.role().as_str());
+            prompt.push_str(": ");
+            prompt.push_str(&message.text_content()?);
+            prompt.push('\n');
+        }
+        if request.chat_options.add_generation_prompt() {
+            prompt.push_str("assistant:");
+        }
+        Ok(RenderedPrompt {
+            prompt: Prompt::Text(prompt),
+        })
+    }
+}
+
+/// Spin up an HTTP server on a random port backed by a mock engine.
+/// Returns the `async-openai` client, the HTTP server task, and the mock engine
+/// task.
+async fn http_test_server(
+    engine_id: impl Into<EngineId>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> (
+    Client<OpenAIConfig>,
+    tokio::task::JoinHandle<()>,
+    MockEngineTask,
+) {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = engine_id.into();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, output_specs),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    let chat = ChatLlm::from_shared_backend(
+        test_llm(client),
+        Arc::new(FakeChatBackend) as Arc<dyn ChatTextBackend>,
+    );
+    let state = Arc::new(AppState::new(vec!["test-model".to_string()], chat));
+    let app = build_router(state);
+
+    let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.expect("bind http listener");
+    let addr = listener.local_addr().expect("local addr");
+
+    let server_task = tokio::spawn(async move {
+        axum::serve(listener, app).await.expect("http server");
+    });
+
+    let openai_client = Client::with_config(
+        OpenAIConfig::new()
+            .with_api_key("unused")
+            .with_api_base(format!("http://{addr}/v1")),
+    );
+
+    (openai_client, server_task, engine_task)
+}
+
+// ========================================================================================
+// Tests
+// ========================================================================================
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn list_models_via_http_client() {
+    let (client, server_task, _engine_task) =
+        http_test_server(b"engine-http-models", default_stream_output_specs()).await;
+
+    let models = client.models().list().await.expect("list models");
+    let model_ids: Vec<&str> = models.data.iter().map(|m| m.id.as_str()).collect();
+    assert_eq!(model_ids, vec!["test-model"]);
+
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_streaming_chat_via_http_client() {
+    let (client, server_task, engine_task) =
+        http_test_server(b"engine-http-chat", default_stream_output_specs()).await;
+
+    let request = CreateChatCompletionRequestArgs::default()
+        .model("test-model")
+        .stream(false)
+        .max_completion_tokens(10u32)
+        .messages([ChatCompletionRequestUserMessageArgs::default()
+            .content("hello")
+            .build()
+            .expect("build user message")
+            .into()])
+        .build()
+        .expect("build request");
+
+    let response = client.chat().create(request).await.expect("chat completion");
+
+    assert_eq!(response.model, "test-model");
+    assert_eq!(response.choices.len(), 1);
+    let choice = &response.choices[0];
+    // The stop token `!` is suppressed from text.
+    assert_eq!(choice.message.content.as_deref(), Some("hi"));
+    assert_eq!(
+        choice.finish_reason,
+        Some(async_openai::types::chat::FinishReason::Stop)
+    );
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn streaming_chat_via_http_client() {
+    let (client, server_task, engine_task) =
+        http_test_server(b"engine-http-stream", default_stream_output_specs()).await;
+
+    let request = CreateChatCompletionRequestArgs::default()
+        .model("test-model")
+        .stream(true)
+        .max_completion_tokens(10u32)
+        .messages([ChatCompletionRequestUserMessageArgs::default()
+            .content("hello")
+            .build()
+            .expect("build user message")
+            .into()])
+        .build()
+        .expect("build request");
+
+    let mut stream = client.chat().create_stream(request).await.expect("streaming chat completion");
+
+    let mut full_text = String::new();
+    let mut saw_role = false;
+    let mut saw_finish_reason = false;
+
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.expect("stream chunk");
+        for choice in &chunk.choices {
+            if choice.delta.role.is_some() {
+                saw_role = true;
+            }
+            if let Some(ref delta) = choice.delta.content {
+                full_text.push_str(delta);
+            }
+            if choice.finish_reason.is_some() {
+                saw_finish_reason = true;
+            }
+        }
+    }
+
+    assert!(saw_role, "expected an assistant role chunk");
+    assert!(saw_finish_reason, "expected a terminal finish reason");
+    assert_eq!(full_text, "hi");
+
+    engine_task.await.expect("mock engine task");
+    server_task.abort();
+}
diff --git a/rust/src/server/src/routes/inference/generate.rs b/rust/src/server/src/routes/inference/generate.rs
new file mode 100644
index 000000000000..ff7ea3c6302c
--- /dev/null
+++ b/rust/src/server/src/routes/inference/generate.rs
@@ -0,0 +1,215 @@
+mod convert;
+mod types;
+mod validate;
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::State;
+use axum::http::HeaderMap;
+use axum::response::{IntoResponse, Response};
+use thiserror_ext::AsReport as _;
+use tracing::info;
+use tracing_futures::Instrument as _;
+use vllm_engine_core_client::protocol::logprobs::{Logprobs, PositionLogprobs};
+use vllm_llm::{CollectedGenerateOutput, GenerateOutputStreamExt as _};
+
+use self::convert::prepare_generate_request;
+use self::types::{GenerateLogprob, GenerateRequest, GenerateResponse, GenerateResponseChoice};
+use crate::error::{ApiError, server_error};
+use crate::routes::openai::utils::logprobs::clamp_logprob;
+use crate::routes::openai::utils::types::{ChatLogProbs, ChatLogProbsContent, TopLogProb};
+use crate::routes::openai::utils::validated_json::ValidatedJson;
+use crate::state::AppState;
+use crate::utils::resolve_request_context;
+
+/// Validate one token-in/token-out request and proxy it into the shared
+/// `vllm-text` stack.
+pub async fn generate(
+    State(state): State<Arc<AppState>>,
+    headers: HeaderMap,
+    ValidatedJson(body): ValidatedJson<GenerateRequest>,
+) -> Response {
+    let request_context = resolve_request_context(&headers, body.request_id.as_deref());
+    let prepared = match prepare_generate_request(body, state.served_model_names(), request_context)
+    {
+        Ok(prepared) => prepared,
+        Err(error) => return error.into_response(),
+    };
+    let request_span = tracing::info_span!(
+        "generate",
+        request_id = %prepared.request_id,
+        engine_request_id = tracing::field::Empty,
+    );
+
+    let log_request = state.enable_log_requests;
+    let include_logprobs = prepared.include_logprobs;
+    let include_prompt_logprobs = prepared.include_prompt_logprobs;
+
+    let raw_stream = match state
+        .chat
+        .text()
+        .generate_raw(prepared.text_request)
+        .instrument(request_span.clone())
+        .await
+    {
+        Ok(stream) => stream,
+        Err(error) => {
+            return server_error!(
+                "failed to submit raw generate request: {}",
+                error.to_report_string()
+            )
+            .into_response();
+        }
+    };
+
+    let collected = match raw_stream.collect_output().instrument(request_span.clone()).await {
+        Ok(collected) => collected,
+        Err(error) => {
+            return server_error!(
+                "failed to collect raw generate response: {}",
+                error.to_report_string()
+            )
+            .into_response();
+        }
+    };
+
+    if log_request {
+        info!(
+            parent: &request_span,
+            prompt_tokens = collected.prompt_token_ids.len(),
+            output_tokens = collected.token_ids.len(),
+            finish_reason = collected.finish_reason.as_str(),
+            "generate finished"
+        );
+    }
+
+    let response = match collect_generate(
+        collected,
+        prepared.request_id,
+        include_logprobs,
+        include_prompt_logprobs,
+    ) {
+        Ok(response) => response,
+        Err(error) => return error.into_response(),
+    };
+
+    Json(response).into_response()
+}
+
+fn collect_generate(
+    collected: CollectedGenerateOutput,
+    request_id: String,
+    include_logprobs: bool,
+    include_prompt_logprobs: bool,
+) -> Result<GenerateResponse, ApiError> {
+    let logprobs = if include_logprobs {
+        let logprobs = collected.logprobs.as_ref().ok_or_else(|| {
+            ApiError::server_error(
+                "raw generate response requested logprobs but generation returned none".to_string(),
+            )
+        })?;
+        Some(raw_logprobs_to_openai_chat(logprobs)?)
+    } else {
+        None
+    };
+    let prompt_logprobs = if include_prompt_logprobs {
+        let prompt_logprobs = collected.prompt_logprobs.as_ref().ok_or_else(|| {
+            ApiError::server_error(
+                "raw generate response requested prompt_logprobs but generation returned none"
+                    .to_string(),
+            )
+        })?;
+        Some(raw_prompt_logprobs_to_maps(prompt_logprobs))
+    } else {
+        None
+    };
+
+    Ok(GenerateResponse {
+        request_id,
+        choices: vec![GenerateResponseChoice {
+            index: 0,
+            logprobs,
+            finish_reason: Some(collected.finish_reason.as_str().to_string()),
+            token_ids: collected.token_ids,
+        }],
+        prompt_logprobs,
+        kv_transfer_params: collected.kv_transfer_params,
+    })
+}
+
+fn raw_logprobs_to_openai_chat(logprobs: &Logprobs) -> Result<ChatLogProbs, ApiError> {
+    let content = logprobs
+        .positions
+        .iter()
+        .map(position_to_chat_logprobs_content)
+        .collect::<Result<Vec<_>, _>>()?;
+
+    Ok(ChatLogProbs {
+        content: Some(content),
+    })
+}
+
+fn raw_prompt_logprobs_to_maps(
+    prompt_logprobs: &Logprobs,
+) -> Vec<Option<HashMap<u32, GenerateLogprob>>> {
+    std::iter::once(None)
+        .chain(
+            prompt_logprobs
+                .positions
+                .iter()
+                .map(|position| Some(position_to_logprob_map(position))),
+        )
+        .collect()
+}
+
+fn position_to_chat_logprobs_content(
+    position: &PositionLogprobs,
+) -> Result<ChatLogProbsContent, ApiError> {
+    let chosen = position.entries.first().ok_or_else(|| {
+        ApiError::server_error(
+            "raw generate logprobs position unexpectedly had no token candidates".to_string(),
+        )
+    })?;
+    let token = format_token_id(chosen.token_id);
+
+    Ok(ChatLogProbsContent {
+        token: token.clone(),
+        logprob: clamp_logprob(chosen.logprob),
+        bytes: Some(token.as_bytes().to_vec()),
+        top_logprobs: position
+            .entries
+            .iter()
+            .map(|entry| {
+                let token = format_token_id(entry.token_id);
+                TopLogProb {
+                    token: token.clone(),
+                    logprob: clamp_logprob(entry.logprob),
+                    bytes: Some(token.into_bytes()),
+                }
+            })
+            .collect(),
+    })
+}
+
+fn position_to_logprob_map(position: &PositionLogprobs) -> HashMap<u32, GenerateLogprob> {
+    position
+        .entries
+        .iter()
+        .map(|entry| {
+            (
+                entry.token_id,
+                GenerateLogprob {
+                    logprob: clamp_logprob(entry.logprob),
+                    rank: Some(entry.rank),
+                    decoded_token: Some(format_token_id(entry.token_id)),
+                },
+            )
+        })
+        .collect()
+}
+
+fn format_token_id(token_id: u32) -> String {
+    format!("token_id:{token_id}")
+}
diff --git a/rust/src/server/src/routes/inference/generate/convert.rs b/rust/src/server/src/routes/inference/generate/convert.rs
new file mode 100644
index 000000000000..70374c2f1eb8
--- /dev/null
+++ b/rust/src/server/src/routes/inference/generate/convert.rs
@@ -0,0 +1,112 @@
+use vllm_text::{Prompt, TextDecodeOptions, TextRequest};
+
+use super::types::GenerateRequest;
+use super::validate;
+use crate::error::ApiError;
+use crate::utils::{ResolvedRequestContext, merge_kv_transfer_params};
+
+/// Lowered generate request plus the response request ID.
+#[derive(Debug, Clone, PartialEq)]
+pub struct PreparedRequest {
+    pub request_id: String,
+    pub text_request: TextRequest,
+    pub include_logprobs: bool,
+    pub include_prompt_logprobs: bool,
+}
+
+/// Validate and lower one raw generate request into the internal
+/// text-generation format.
+pub fn prepare_generate_request(
+    request: GenerateRequest,
+    served_model_names: &[String],
+    ctx: ResolvedRequestContext,
+) -> Result<PreparedRequest, ApiError> {
+    validate::validate_request_compat(&request, served_model_names)?;
+
+    let include_logprobs = request.sampling_params.logprobs.is_some();
+    let include_prompt_logprobs = request.sampling_params.prompt_logprobs.is_some();
+    let mut sampling_params = request.sampling_params;
+    sampling_params.vllm_xargs = merge_kv_transfer_params(
+        sampling_params.vllm_xargs,
+        request.kv_transfer_params.as_ref(),
+    );
+
+    let text_request = TextRequest {
+        request_id: ctx.request_id.clone(),
+        prompt: Prompt::TokenIds(request.token_ids),
+        mm_features: None,
+        sampling_params,
+        decode_options: TextDecodeOptions::default(),
+        intermediate: false,
+        priority: request.priority,
+        cache_salt: request.cache_salt,
+        add_special_tokens: false,
+        data_parallel_rank: ctx.data_parallel_rank,
+    };
+
+    Ok(PreparedRequest {
+        request_id: ctx.request_id,
+        text_request,
+        include_logprobs,
+        include_prompt_logprobs,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+    use vllm_text::Prompt;
+
+    use super::prepare_generate_request;
+    use crate::routes::inference::generate::types::GenerateRequest;
+    use crate::utils::ResolvedRequestContext;
+
+    #[test]
+    fn prepare_generate_request_maps_token_prompt_and_sampling_params() {
+        let request: GenerateRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "token_ids": [11, 22, 33],
+            "priority": -3,
+            "cache_salt": "salt",
+            "sampling_params": {
+                "max_tokens": 7,
+                "logprobs": 2,
+                "prompt_logprobs": 1,
+                "ignore_eos": true
+            },
+            "kv_transfer_params": {
+                "connector": "x"
+            }
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_generate_request(
+            request,
+            &["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+
+        assert_eq!(
+            prepared.text_request.prompt,
+            Prompt::TokenIds(vec![11, 22, 33])
+        );
+        assert_eq!(prepared.text_request.sampling_params.max_tokens, Some(7));
+        assert_eq!(prepared.text_request.sampling_params.logprobs, Some(2));
+        assert_eq!(
+            prepared.text_request.sampling_params.prompt_logprobs,
+            Some(1)
+        );
+        assert!(prepared.text_request.sampling_params.ignore_eos);
+        assert_eq!(prepared.text_request.priority, -3);
+        assert_eq!(prepared.text_request.cache_salt.as_deref(), Some("salt"));
+        assert_eq!(
+            prepared
+                .text_request
+                .sampling_params
+                .vllm_xargs
+                .and_then(|mut xargs| xargs.remove("kv_transfer_params")),
+            Some(json!({"connector": "x"}))
+        );
+    }
+}
diff --git a/rust/src/server/src/routes/inference/generate/types.rs b/rust/src/server/src/routes/inference/generate/types.rs
new file mode 100644
index 000000000000..de7a196c3c6b
--- /dev/null
+++ b/rust/src/server/src/routes/inference/generate/types.rs
@@ -0,0 +1,57 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+use serde_json::{Map, Value};
+use validator::Validate;
+use vllm_text::SamplingParams;
+
+use crate::routes::openai::utils::types::{ChatLogProbs, Normalizable};
+
+/// vLLM-compatible request type for the token-in/token-out generate API.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
+pub struct GenerateRequest {
+    pub request_id: Option<String>,
+    pub model: Option<String>,
+    pub token_ids: Vec<u32>,
+    pub sampling_params: SamplingParams,
+    #[serde(default)]
+    pub stream: bool,
+    pub cache_salt: Option<String>,
+    #[serde(default)]
+    pub priority: i32,
+    pub kv_transfer_params: Option<HashMap<String, Value>>,
+    #[serde(flatten)]
+    pub other: Map<String, Value>,
+}
+
+impl Normalizable for GenerateRequest {}
+
+/// Mirrors the Python vLLM `GenerateResponseChoice` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct GenerateResponseChoice {
+    pub index: u32,
+    pub logprobs: Option<ChatLogProbs>,
+    pub finish_reason: Option<String>,
+    pub token_ids: Vec<u32>,
+}
+
+/// Mirrors the Python vLLM `GenerateResponse` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct GenerateResponse {
+    pub request_id: String,
+    pub choices: Vec<GenerateResponseChoice>,
+    pub prompt_logprobs: Option<Vec<Option<HashMap<u32, GenerateLogprob>>>>,
+    pub kv_transfer_params: Option<Value>,
+}
+
+/// Mirrors the Python vLLM `Logprob` class used in prompt-logprobs payloads.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct GenerateLogprob {
+    pub logprob: f32,
+    pub rank: Option<u32>,
+    pub decoded_token: Option<String>,
+}
diff --git a/rust/src/server/src/routes/inference/generate/validate.rs b/rust/src/server/src/routes/inference/generate/validate.rs
new file mode 100644
index 000000000000..74a5bbb690ae
--- /dev/null
+++ b/rust/src/server/src/routes/inference/generate/validate.rs
@@ -0,0 +1,84 @@
+use super::types::GenerateRequest;
+use crate::error::{ApiError, bail_invalid_request};
+
+/// Enforce the minimal compatibility contract for the Rust token generate
+/// route.
+pub(super) fn validate_request_compat(
+    request: &GenerateRequest,
+    served_model_names: &[String],
+) -> Result<(), ApiError> {
+    if let Some(model) = request.model.as_ref()
+        && !served_model_names.iter().any(|n| n == model)
+    {
+        return Err(ApiError::model_not_found(model.clone()));
+    }
+
+    if request.stream {
+        bail_invalid_request!(param = "stream", "stream=true is not supported.");
+    }
+
+    if request.token_ids.is_empty() {
+        bail_invalid_request!(
+            param = "token_ids",
+            "token_ids must contain at least one token ID."
+        );
+    }
+
+    if request.sampling_params.max_tokens == Some(0) {
+        bail_invalid_request!(
+            param = "sampling_params",
+            "max_tokens must be greater than 0."
+        );
+    }
+
+    if let Some(prompt_logprobs) = request.sampling_params.prompt_logprobs
+        && prompt_logprobs < 0
+        && prompt_logprobs != -1
+    {
+        bail_invalid_request!(
+            param = "sampling_params",
+            "`prompt_logprobs` must be a non-negative value or -1."
+        );
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::validate_request_compat;
+    use crate::routes::inference::generate::types::GenerateRequest;
+
+    fn base_request() -> GenerateRequest {
+        serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "token_ids": [11, 22],
+            "sampling_params": {}
+        }))
+        .expect("parse request")
+    }
+
+    fn served(names: &[&str]) -> Vec<String> {
+        names.iter().map(|s| s.to_string()).collect()
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_streaming() {
+        let request = GenerateRequest {
+            stream: true,
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_empty_token_ids() {
+        let request = GenerateRequest {
+            token_ids: Vec::new(),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+}
diff --git a/rust/src/server/src/routes/inference/mod.rs b/rust/src/server/src/routes/inference/mod.rs
new file mode 100644
index 000000000000..d601d038745b
--- /dev/null
+++ b/rust/src/server/src/routes/inference/mod.rs
@@ -0,0 +1,3 @@
+pub mod generate;
+
+pub use generate::generate;
diff --git a/rust/src/server/src/routes/load.rs b/rust/src/server/src/routes/load.rs
new file mode 100644
index 000000000000..0f666b24e408
--- /dev/null
+++ b/rust/src/server/src/routes/load.rs
@@ -0,0 +1,18 @@
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::State;
+use serde::Serialize;
+
+use crate::state::AppState;
+
+#[derive(Serialize)]
+pub(crate) struct ServerLoadResponse {
+    server_load: u64,
+}
+
+pub async fn load(State(state): State<Arc<AppState>>) -> Json<ServerLoadResponse> {
+    Json(ServerLoadResponse {
+        server_load: state.server_load(),
+    })
+}
diff --git a/rust/src/server/src/routes/metrics.rs b/rust/src/server/src/routes/metrics.rs
new file mode 100644
index 000000000000..f7017fadccba
--- /dev/null
+++ b/rust/src/server/src/routes/metrics.rs
@@ -0,0 +1,26 @@
+use axum::http::header::CONTENT_TYPE;
+use axum::http::{HeaderValue, StatusCode};
+use axum::response::{IntoResponse, Response};
+use thiserror_ext::AsReport;
+use vllm_metrics::METRICS;
+
+const OPENMETRICS_CONTENT_TYPE: &str = "application/openmetrics-text; version=1.0.0; charset=utf-8";
+
+pub async fn scrape() -> Response {
+    match METRICS.render() {
+        Ok(body) => (
+            [(
+                CONTENT_TYPE,
+                HeaderValue::from_static(OPENMETRICS_CONTENT_TYPE),
+            )],
+            body,
+        )
+            .into_response(),
+
+        Err(error) => (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to render metrics: {}", error.as_report()),
+        )
+            .into_response(),
+    }
+}
diff --git a/rust/src/server/src/routes/openai/chat_completions.rs b/rust/src/server/src/routes/openai/chat_completions.rs
new file mode 100644
index 000000000000..c0894bb70c9a
--- /dev/null
+++ b/rust/src/server/src/routes/openai/chat_completions.rs
@@ -0,0 +1,1046 @@
+pub mod convert;
+mod types;
+mod validate;
+
+use std::convert::Infallible;
+use std::result::Result;
+use std::sync::Arc;
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use axum::Json;
+use axum::extract::State;
+use axum::http::HeaderMap;
+use axum::response::sse::{Event, Sse};
+use axum::response::{IntoResponse, Response};
+use futures::{Stream, StreamExt as _, pin_mut};
+use serde_json::Value;
+use thiserror_ext::AsReport as _;
+use tracing::{debug, error, info, trace};
+use tracing_futures::Instrument as _;
+use vllm_chat::{
+    AssistantBlockKind, AssistantMessageExt as _, ChatEvent, ChatEventStream, ChatEventStreamTrait,
+    CollectedAssistantMessage, FinishReason,
+};
+use vllm_engine_core_client::protocol::StopReason;
+
+use crate::error::{ApiError, bail_server_error, server_error};
+use crate::routes::openai::chat_completions::convert::prepare_chat_request;
+use crate::routes::openai::chat_completions::types::{
+    AssistantRole, ChatCompletionChoice, ChatCompletionMessage, ChatCompletionRequest,
+    ChatCompletionResponse, ChatCompletionStreamChoice, ChatCompletionStreamResponse,
+    ChatMessageDelta,
+};
+use crate::routes::openai::utils::logprobs::{
+    decoded_logprobs_to_openai_chat, decoded_prompt_logprobs_to_maps,
+};
+use crate::routes::openai::utils::types::{
+    ChatLogProbs, FunctionCallDelta, FunctionCallResponse, ToolCall, ToolCallDelta, Usage,
+};
+use crate::routes::openai::utils::validated_json::ValidatedJson;
+use crate::state::AppState;
+use crate::utils::{resolve_request_context, unix_timestamp};
+
+/// Validate one chat completion request and proxy it into the shared
+/// `vllm-chat` stack.
+pub async fn chat_completions(
+    State(state): State<Arc<AppState>>,
+    headers: HeaderMap,
+    ValidatedJson(body): ValidatedJson<ChatCompletionRequest>,
+) -> Response {
+    let stream = body.stream;
+    let request_context = resolve_request_context(&headers, body.request_id.as_deref());
+
+    let prepared = match prepare_chat_request(body, state.served_model_names(), request_context) {
+        Ok(prepared) => prepared,
+        Err(error) => return error.into_response(),
+    };
+    let request_span = tracing::info_span!(
+        "chat_completions",
+        request_id = %prepared.request_id,
+        engine_request_id = tracing::field::Empty,
+    );
+
+    let created = unix_timestamp();
+    let log_request = state.enable_log_requests;
+
+    let chat_stream =
+        match state.chat.chat(prepared.chat_request).instrument(request_span.clone()).await {
+            Ok(stream) => stream,
+            Err(error) => {
+                return server_error!(
+                    "failed to submit chat request: {}",
+                    error.to_report_string()
+                )
+                .into_response();
+            }
+        };
+
+    if stream {
+        let chunk_stream = chat_completion_chunk_stream(
+            chat_stream,
+            prepared.request_id,
+            prepared.response_model,
+            created,
+            log_request,
+            prepared.include_usage,
+            prepared.requested_logprobs,
+            prepared.echo,
+            prepared.return_token_ids,
+            prepared.return_tokens_as_token_ids,
+        );
+        let sse_stream = chat_completion_sse_stream(chunk_stream).instrument(request_span);
+
+        Sse::new(sse_stream).into_response()
+    } else {
+        let response = match collect_chat_completion(
+            chat_stream,
+            prepared.request_id,
+            prepared.response_model,
+            created,
+            prepared.requested_logprobs,
+            prepared.include_prompt_logprobs,
+            prepared.echo,
+            prepared.return_token_ids,
+            prepared.return_tokens_as_token_ids,
+        )
+        .instrument(request_span.clone())
+        .await
+        {
+            Ok(response) => response,
+            Err(error) => return error.into_response(),
+        };
+
+        if log_request {
+            let usage = response.usage.as_ref();
+            info!(
+                parent: &request_span,
+                model = %response.model,
+                prompt_tokens = usage.map_or(0, |u| u.prompt_tokens),
+                output_tokens = usage.and_then(|u| u.completion_tokens).unwrap_or(0),
+                finish_reason = response.choices.first().and_then(|c| c.finish_reason.as_deref()).unwrap_or("unknown"),
+                "chat completion finished"
+            );
+        }
+
+        Json(response).into_response()
+    }
+}
+
+async fn collect_chat_completion(
+    stream: ChatEventStream,
+    request_id: String,
+    response_model: String,
+    created: u64,
+    requested_logprobs: bool,
+    include_prompt_logprobs: bool,
+    echo: Option<String>,
+    return_token_ids: bool,
+    return_tokens_as_token_ids: bool,
+) -> Result<ChatCompletionResponse, ApiError> {
+    let collected = stream.collect_message().await.map_err(|error| {
+        server_error!(
+            "failed to collect chat completion response: {}",
+            error.to_report_string()
+        )
+    })?;
+    let CollectedAssistantMessage {
+        message,
+        prompt_token_count,
+        prompt_token_ids,
+        prompt_logprobs,
+        logprobs,
+        token_ids,
+        output_token_count,
+        finish_reason,
+        kv_transfer_params,
+    } = collected;
+    let stop_reason = finish_reason.as_stop_reason().map(stop_reason_to_json);
+    let saw_tool_calls = message.tool_calls().next().is_some();
+    let finish_reason = chat_finish_reason_to_openai(&finish_reason, saw_tool_calls)?.to_string();
+    let tool_calls = message
+        .tool_calls()
+        .map(|call| ToolCall {
+            id: call.id.clone(),
+            tool_type: "function".to_string(),
+            function: FunctionCallResponse {
+                name: call.name.clone(),
+                arguments: Some(call.arguments.clone()),
+            },
+        })
+        .collect::<Vec<_>>();
+    let logprobs = if requested_logprobs {
+        Some(decoded_logprobs_to_openai_chat(
+            logprobs.as_ref().ok_or_else(|| {
+                server_error!("chat response requested logprobs but generation returned none")
+            })?,
+            return_tokens_as_token_ids,
+        )?)
+    } else {
+        None
+    };
+    let prompt_logprobs = if include_prompt_logprobs {
+        Some(decoded_prompt_logprobs_to_maps(
+            prompt_logprobs.as_ref().ok_or_else(|| {
+                server_error!(
+                    "chat response requested prompt_logprobs but generation returned none"
+                )
+            })?,
+            return_tokens_as_token_ids,
+        ))
+    } else {
+        None
+    };
+    let usage = Usage::from_counts(prompt_token_count as u32, output_token_count as u32);
+
+    Ok(ChatCompletionResponse {
+        id: request_id,
+        object: "chat.completion".to_string(),
+        created,
+        model: response_model,
+        choices: vec![ChatCompletionChoice {
+            index: 0,
+            message: ChatCompletionMessage {
+                role: AssistantRole,
+                content: match &echo {
+                    Some(prefix) => Some(format!("{prefix}{}", message.text())),
+                    None => Some(message.text()).filter(|t| !t.is_empty()),
+                },
+                tool_calls: Some(tool_calls).filter(|calls| !calls.is_empty()),
+                reasoning: message.reasoning(),
+            },
+            logprobs,
+            finish_reason: Some(finish_reason),
+            stop_reason,
+            token_ids: return_token_ids.then_some(token_ids),
+        }],
+        usage: Some(usage),
+        system_fingerprint: None,
+        prompt_logprobs,
+        prompt_token_ids: return_token_ids.then(|| prompt_token_ids.to_vec()),
+        kv_transfer_params,
+    })
+}
+
+/// Convert one internal chat event stream into OpenAI chat-completion chunks.
+#[try_stream]
+async fn chat_completion_chunk_stream(
+    mut stream: impl ChatEventStreamTrait + Unpin,
+    request_id: String,
+    response_model: String,
+    created: u64,
+    log_request: bool,
+    include_usage: bool,
+    requested_logprobs: bool,
+    echo: Option<String>,
+    return_token_ids: bool,
+    return_tokens_as_token_ids: bool,
+    mut y: TryYielder<ChatCompletionStreamResponse, ApiError>,
+) -> Result<(), ApiError> {
+    let mut saw_tool_calls = false;
+
+    // If the client requested logprobs or token_ids, we need to buffer chunks until
+    // we receive the separate `LogprobsDelta` event, so that we can emit one
+    // combined chunk with both the semantic delta and its per-update metadata.
+    let mut pending_chunk =
+        (requested_logprobs || return_token_ids).then(PendingChatChunk::default);
+
+    while let Some(next) = stream.next().await {
+        match next {
+            Ok(ChatEvent::Start {
+                prompt_token_ids, ..
+            }) => {
+                let mut chunk = start_chunk(&request_id, &response_model, created);
+                if return_token_ids {
+                    chunk.prompt_token_ids = Some(prompt_token_ids.to_vec());
+                }
+                y.yield_ok(chunk).await;
+                // When echo=true, emit the last assistant message content as a delta chunk.
+                if let Some(echo_text) = &echo {
+                    y.yield_ok(block_delta_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        AssistantBlockKind::Text,
+                        echo_text.clone(),
+                    ))
+                    .await;
+                }
+            }
+            Ok(ChatEvent::BlockDelta { kind, delta, .. }) => {
+                if let Some(pending_chunk) = pending_chunk.as_mut() {
+                    pending_chunk.push_block_delta(kind, delta);
+                } else {
+                    y.yield_ok(block_delta_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        kind,
+                        delta,
+                    ))
+                    .await;
+                }
+            }
+            Ok(ChatEvent::LogprobsDelta {
+                logprobs,
+                token_ids,
+            }) => {
+                let openai_logprobs = logprobs
+                    .as_ref()
+                    .map(|lp| decoded_logprobs_to_openai_chat(lp, return_tokens_as_token_ids))
+                    .transpose()?;
+                let openai_token_ids =
+                    return_token_ids.then_some(token_ids).filter(|t| !t.is_empty());
+                if let Some(pending_chunk) = pending_chunk.as_mut() {
+                    pending_chunk.logprobs = openai_logprobs;
+                    pending_chunk.token_ids = openai_token_ids;
+                    if let Some(chunk) =
+                        pending_chunk.take_chunk(&request_id, &response_model, created)
+                    {
+                        y.yield_ok(chunk).await;
+                    }
+                } else if let Some(logprobs) = openai_logprobs {
+                    y.yield_ok(logprobs_only_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        logprobs,
+                    ))
+                    .await;
+                }
+            }
+            Ok(ChatEvent::BlockStart { kind, .. }) => {
+                debug!(?kind, "starting new block");
+            }
+            Ok(ChatEvent::BlockEnd { .. }) => {
+                debug!("ending current block");
+            }
+            Ok(ChatEvent::ToolCallStart { index, id, name }) => {
+                let tool_index = index as u32;
+                saw_tool_calls = true;
+                debug!(
+                    tool_call_id = %id,
+                    tool_call_name = %name,
+                    "starting new tool call"
+                );
+                if let Some(pending_chunk) = pending_chunk.as_mut() {
+                    pending_chunk.push_tool_call_start(tool_index, id, name);
+                } else {
+                    y.yield_ok(tool_call_start_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        tool_index,
+                        id,
+                        name,
+                    ))
+                    .await;
+                }
+            }
+            Ok(ChatEvent::ToolCallArgumentsDelta { index, delta }) => {
+                let tool_index = index as u32;
+                if let Some(pending_chunk) = pending_chunk.as_mut() {
+                    pending_chunk.push_tool_call_arguments(tool_index, delta);
+                } else {
+                    y.yield_ok(tool_call_arguments_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        tool_index,
+                        delta,
+                    ))
+                    .await;
+                }
+            }
+            Ok(ChatEvent::ToolCallEnd { .. }) => {
+                debug!("ending current tool call");
+            }
+            Ok(ChatEvent::Done {
+                prompt_token_count,
+                finish_reason,
+                output_token_count,
+                ..
+            }) => {
+                if log_request {
+                    info!(
+                        stream = true,
+                        model = %response_model,
+                        prompt_tokens = prompt_token_count,
+                        output_tokens = output_token_count,
+                        finish_reason = finish_reason.as_str(),
+                        "chat completion finished"
+                    );
+                }
+
+                if let Some(pending_chunk) = pending_chunk.as_mut()
+                    && let Some(chunk) =
+                        pending_chunk.take_chunk(&request_id, &response_model, created)
+                {
+                    y.yield_ok(chunk).await;
+                }
+
+                match final_chunk(
+                    &request_id,
+                    &response_model,
+                    created,
+                    finish_reason,
+                    saw_tool_calls,
+                ) {
+                    Ok(chunk) => y.yield_ok(chunk).await,
+                    Err(error) => {
+                        error!(
+                            error = %error.to_error_response().error.message,
+                            "invalid terminal finish reason"
+                        );
+                        return Err(error);
+                    }
+                }
+
+                if include_usage {
+                    y.yield_ok(usage_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        Usage::from_counts(prompt_token_count as u32, output_token_count as u32),
+                    ))
+                    .await;
+                }
+
+                return Ok(());
+            }
+            Err(error) => {
+                error!(
+                    error = %error.as_report(),
+                    "chat stream failed"
+                );
+                bail_server_error!("{}", error.to_report_string());
+            }
+        }
+    }
+    Ok(())
+}
+
+fn usage_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    usage: Usage,
+) -> ChatCompletionStreamResponse {
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.usage = Some(usage);
+    chunk
+}
+
+/// One in-flight chat-completions SSE chunk being assembled at the route layer.
+///
+/// `vllm-chat` emits semantic chat events first and `LogprobsDelta` separately,
+/// because one decoded update may be rewritten into multiple chat events.
+/// The OpenAI chat API, though, wants one streamed chunk to optionally carry
+/// both the delta and its logprobs.
+///
+/// This small buffer accumulates the semantic delta first, then attaches the
+/// following `LogprobsDelta` and flushes one combined chunk. It relies on the
+/// current `vllm-chat` invariant that all semantic events from one decoded
+/// update are emitted before that update's `LogprobsDelta`.
+#[derive(Debug, Default)]
+struct PendingChatChunk {
+    /// The currently buffered OpenAI delta payload assembled from one or more
+    /// chat semantic events belonging to the same decoded update.
+    delta: ChatMessageDelta,
+    /// The token-aligned logprobs for that same decoded update.
+    logprobs: Option<ChatLogProbs>,
+    /// Per-update output token IDs for the same decoded update.
+    token_ids: Option<Vec<u32>>,
+}
+
+impl PendingChatChunk {
+    /// Append one assistant text/reasoning block delta to the buffered OpenAI
+    /// delta payload.
+    fn push_block_delta(&mut self, kind: AssistantBlockKind, delta: String) {
+        match kind {
+            AssistantBlockKind::Text => append_delta_text(&mut self.delta.content, delta),
+            AssistantBlockKind::Reasoning => append_delta_text(&mut self.delta.reasoning, delta),
+            AssistantBlockKind::ToolCall => {
+                unreachable!("tool calls must flow through dedicated tool-call chunks")
+            }
+        }
+    }
+
+    /// Append the OpenAI tool-call-start representation to the buffered delta.
+    fn push_tool_call_start(&mut self, index: u32, id: String, name: String) {
+        self.delta.tool_calls.get_or_insert_with(Vec::new).push(ToolCallDelta {
+            index,
+            id: Some(id),
+            tool_type: Some("function".to_string()),
+            function: Some(FunctionCallDelta {
+                name: Some(name),
+                arguments: None,
+            }),
+        });
+    }
+
+    /// Append one incremental tool-call arguments update to the buffered delta.
+    fn push_tool_call_arguments(&mut self, index: u32, delta: String) {
+        self.delta.tool_calls.get_or_insert_with(Vec::new).push(ToolCallDelta {
+            index,
+            id: None,
+            tool_type: None,
+            function: Some(FunctionCallDelta {
+                name: None,
+                arguments: Some(delta),
+            }),
+        });
+    }
+
+    /// Finalize the currently buffered SSE chunk, if it contains either a
+    /// semantic delta or a logprobs payload.
+    ///
+    /// This may produce:
+    /// - a combined delta + logprobs chunk
+    /// - a delta-only chunk
+    /// - a logprobs-only chunk
+    ///
+    /// The logprobs-only case is intentional: token-level metadata in one
+    /// decoded update is correlated with the same update boundary, not
+    /// necessarily with a visible/chat-semantic delta.
+    fn take_chunk(
+        &mut self,
+        request_id: &str,
+        response_model: &str,
+        created: u64,
+    ) -> Option<ChatCompletionStreamResponse> {
+        let has_delta = self.delta.content.is_some()
+            || self.delta.reasoning.is_some()
+            || self.delta.tool_calls.is_some();
+        let logprobs = self.logprobs.take();
+        let token_ids = self.token_ids.take();
+        if !has_delta && logprobs.is_none() && token_ids.is_none() {
+            return None;
+        }
+
+        let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+        chunk.choices.push(ChatCompletionStreamChoice {
+            delta: self.take_delta(),
+            logprobs,
+            token_ids,
+            ..Default::default()
+        });
+        Some(chunk)
+    }
+
+    /// Take the currently buffered OpenAI delta payload and leave this pending
+    /// chunk empty for the next decoded update.
+    fn take_delta(&mut self) -> ChatMessageDelta {
+        ChatMessageDelta {
+            role: self.delta.role.take(),
+            content: self.delta.content.take(),
+            tool_calls: self.delta.tool_calls.take(),
+            reasoning: self.delta.reasoning.take(),
+        }
+    }
+}
+
+/// Append one text fragment to an optional OpenAI delta string field.
+fn append_delta_text(slot: &mut Option<String>, delta: String) {
+    match slot {
+        Some(existing) => existing.push_str(&delta),
+        None => *slot = Some(delta),
+    }
+}
+
+/// Convert one chunk stream into OpenAI-style SSE events.
+///
+/// OpenAI-style streaming errors are encoded as ordinary `data: {"error": ...}`
+/// events followed by `data: [DONE]`, so the transport stream itself stays
+/// infallible even when generation fails after the HTTP response has started.
+#[try_stream]
+async fn chat_completion_sse_stream(
+    stream: impl Stream<Item = Result<ChatCompletionStreamResponse, ApiError>>,
+    mut y: TryYielder<Event, Infallible>,
+) -> Result<(), Infallible> {
+    pin_mut!(stream);
+
+    while let Some(next) = stream.next().await {
+        match next {
+            Ok(chunk) => y.yield_ok(to_sse_event(&chunk)).await,
+            Err(error) => {
+                y.yield_ok(to_error_sse_event(&error)).await;
+                break;
+            }
+        }
+    }
+
+    y.yield_ok(done_sse_event()).await;
+    Ok(())
+}
+
+/// Serialize one OpenAI chunk payload into one SSE `data:` event.
+fn to_sse_event(chunk: &ChatCompletionStreamResponse) -> Event {
+    let payload =
+        serde_json::to_string(chunk).expect("ChatCompletionStreamResponse must serialize to JSON");
+    trace!(payload, "chat completion emitting chunk");
+    Event::default().data(payload)
+}
+
+/// Serialize one OpenAI error payload into one SSE `data:` event.
+fn to_error_sse_event(error: &ApiError) -> Event {
+    let payload = serde_json::to_string(&error.to_error_response())
+        .expect("ErrorResponse must serialize to JSON");
+    trace!(payload, "chat completion emitting error");
+    Event::default().data(payload)
+}
+
+/// Build the terminal OpenAI SSE sentinel event.
+fn done_sse_event() -> Event {
+    trace!("chat completion emitting done");
+    Event::default().data("[DONE]")
+}
+
+/// Build the initial assistant-role SSE chunk required by the OpenAI streaming
+/// protocol.
+fn start_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+) -> ChatCompletionStreamResponse {
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        delta: ChatMessageDelta {
+            role: Some(AssistantRole),
+            ..Default::default()
+        },
+        ..Default::default()
+    });
+    chunk
+}
+
+/// Build one content-delta SSE chunk from one internal assistant block delta.
+fn block_delta_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    kind: AssistantBlockKind,
+    delta: String,
+) -> ChatCompletionStreamResponse {
+    let delta = match kind {
+        AssistantBlockKind::Text => ChatMessageDelta {
+            content: Some(delta),
+            ..Default::default()
+        },
+        AssistantBlockKind::Reasoning => ChatMessageDelta {
+            reasoning: Some(delta),
+            ..Default::default()
+        },
+        AssistantBlockKind::ToolCall => {
+            unreachable!("tool calls must flow through dedicated tool-call chunks")
+        }
+    };
+
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        delta,
+        ..Default::default()
+    });
+    chunk
+}
+
+fn tool_call_start_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    tool_index: u32,
+    id: String,
+    name: String,
+) -> ChatCompletionStreamResponse {
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        delta: ChatMessageDelta {
+            tool_calls: Some(vec![ToolCallDelta {
+                index: tool_index,
+                id: Some(id),
+                tool_type: Some("function".to_string()),
+                function: Some(FunctionCallDelta {
+                    name: Some(name),
+                    arguments: None,
+                }),
+            }]),
+            ..Default::default()
+        },
+        ..Default::default()
+    });
+    chunk
+}
+
+fn tool_call_arguments_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    tool_index: u32,
+    delta: String,
+) -> ChatCompletionStreamResponse {
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        delta: ChatMessageDelta {
+            tool_calls: Some(vec![ToolCallDelta {
+                index: tool_index,
+                id: None,
+                tool_type: None,
+                function: Some(FunctionCallDelta {
+                    name: None,
+                    arguments: Some(delta),
+                }),
+            }]),
+            ..Default::default()
+        },
+        ..Default::default()
+    });
+    chunk
+}
+
+fn logprobs_only_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    logprobs: ChatLogProbs,
+) -> ChatCompletionStreamResponse {
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        logprobs: Some(logprobs),
+        ..Default::default()
+    });
+    chunk
+}
+
+/// Build the terminal SSE chunk carrying the OpenAI finish reason.
+fn final_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    finish_reason: FinishReason,
+    saw_tool_calls: bool,
+) -> Result<ChatCompletionStreamResponse, ApiError> {
+    let stop_reason = finish_reason.as_stop_reason().map(stop_reason_to_json);
+    let finish_reason = chat_finish_reason_to_openai(&finish_reason, saw_tool_calls)?;
+
+    debug!(
+        finish_reason = %finish_reason,
+        stop_reason = ?stop_reason,
+        "chat stream finished"
+    );
+
+    let mut chunk = ChatCompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(ChatCompletionStreamChoice {
+        finish_reason: Some(finish_reason.to_string()),
+        stop_reason,
+        ..Default::default()
+    });
+    Ok(chunk)
+}
+
+fn chat_finish_reason_to_openai(
+    finish_reason: &FinishReason,
+    saw_tool_calls: bool,
+) -> Result<&'static str, ApiError> {
+    match finish_reason {
+        FinishReason::Stop(_) if saw_tool_calls => Ok("tool_calls"),
+        FinishReason::Stop(_) => Ok("stop"),
+        FinishReason::Length => Ok("length"),
+        FinishReason::Abort => Ok("abort"),
+        FinishReason::Repetition => Ok("stop"),
+        FinishReason::Error => {
+            bail_server_error!("Internal server error");
+        }
+    }
+}
+
+/// Convert one internal stop reason into the OpenAI-compatible `stop_reason`
+/// JSON shape.
+fn stop_reason_to_json(stop_reason: &StopReason) -> Value {
+    serde_json::to_value(stop_reason).expect("StopReason must serialize to JSON")
+}
+
+#[cfg(test)]
+mod tests {
+    use futures::{StreamExt as _, stream};
+    use serde_json::json;
+    use vllm_chat::{AssistantBlockKind, AssistantToolCall, ChatEvent, FinishReason};
+    use vllm_engine_core_client::protocol::StopReason;
+    use vllm_text::{DecodedLogprobs, DecodedPositionLogprobs, DecodedTokenLogprob};
+
+    use super::{block_delta_chunk, chat_completion_chunk_stream, final_chunk};
+
+    #[test]
+    fn text_chunk_uses_content_only_delta() {
+        let chunk = block_delta_chunk(
+            "chatcmpl-1",
+            "model",
+            1,
+            AssistantBlockKind::Text,
+            "hello".to_string(),
+        );
+        assert_eq!(chunk.choices[0].delta.role, None);
+        assert_eq!(chunk.choices[0].delta.content.as_deref(), Some("hello"));
+        assert_eq!(chunk.choices[0].delta.reasoning, None);
+    }
+
+    #[test]
+    fn reasoning_chunk_uses_reasoning_only_delta() {
+        let chunk = block_delta_chunk(
+            "chatcmpl-1",
+            "model",
+            1,
+            AssistantBlockKind::Reasoning,
+            "thinking".to_string(),
+        );
+        assert_eq!(chunk.choices[0].delta.role, None);
+        assert_eq!(chunk.choices[0].delta.content, None);
+        assert_eq!(
+            chunk.choices[0].delta.reasoning.as_deref(),
+            Some("thinking")
+        );
+    }
+
+    #[test]
+    fn final_chunk_maps_stop_finish_reason_and_stop_reason() {
+        let chunk = final_chunk(
+            "chatcmpl-1",
+            "model",
+            1,
+            FinishReason::Stop(Some(StopReason::Text("stop".to_string()))),
+            false,
+        )
+        .expect("finish reason is valid");
+
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
+        assert_eq!(chunk.choices[0].stop_reason, Some(json!("stop")));
+    }
+
+    #[test]
+    fn final_chunk_maps_length_finish_reason() {
+        let chunk = final_chunk("chatcmpl-1", "model", 1, FinishReason::Length, false)
+            .expect("finish reason is valid");
+
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("length"));
+        assert_eq!(chunk.choices[0].stop_reason, None);
+    }
+
+    #[test]
+    fn final_chunk_maps_abort_finish_reason() {
+        let chunk = final_chunk("chatcmpl-1", "model", 1, FinishReason::Abort, false)
+            .expect("abort is a valid finish reason");
+
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("abort"));
+        assert_eq!(chunk.choices[0].stop_reason, None);
+    }
+
+    #[test]
+    fn final_chunk_rejects_error_finish_reason() {
+        assert!(final_chunk("chatcmpl-1", "model", 1, FinishReason::Error, false).is_err());
+    }
+
+    #[test]
+    fn final_chunk_maps_stop_to_tool_calls_when_tool_calls_were_streamed() {
+        let chunk = final_chunk("chatcmpl-1", "model", 1, FinishReason::stop_eos(), true)
+            .expect("finish reason is valid");
+
+        assert_eq!(
+            chunk.choices[0].finish_reason.as_deref(),
+            Some("tool_calls")
+        );
+    }
+
+    #[tokio::test]
+    async fn chunk_stream_coalesces_text_delta_with_logprobs() {
+        let stream = stream::iter(vec![
+            Ok(ChatEvent::Start {
+                prompt_token_ids: vec![].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ChatEvent::BlockStart {
+                index: 0,
+                kind: AssistantBlockKind::Text,
+            }),
+            Ok(ChatEvent::BlockDelta {
+                index: 0,
+                kind: AssistantBlockKind::Text,
+                delta: "hi".to_string(),
+            }),
+            Ok(ChatEvent::LogprobsDelta {
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "hi".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                token_ids: vec![],
+            }),
+            Ok(ChatEvent::Done {
+                message: Default::default(),
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let chunks = chat_completion_chunk_stream(
+            stream,
+            "chatcmpl-1".to_string(),
+            "model".to_string(),
+            1,
+            false,
+            false,
+            true,
+            None,
+            false,
+            false,
+        )
+        .collect::<Vec<_>>()
+        .await
+        .into_iter()
+        .collect::<Result<Vec<_>, _>>()
+        .expect("stream chunks");
+
+        assert_eq!(chunks.len(), 3);
+        assert_eq!(chunks[1].choices[0].delta.content.as_deref(), Some("hi"));
+        let logprobs = chunks[1].choices[0].logprobs.as_ref().expect("logprobs");
+        let content = logprobs.content.as_ref().expect("logprobs content");
+        assert_eq!(content[0].token, "hi");
+    }
+
+    #[tokio::test]
+    async fn chunk_stream_coalesces_reasoning_delta_with_logprobs() {
+        let stream = stream::iter(vec![
+            Ok(ChatEvent::Start {
+                prompt_token_ids: vec![].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ChatEvent::BlockStart {
+                index: 0,
+                kind: AssistantBlockKind::Reasoning,
+            }),
+            Ok(ChatEvent::BlockDelta {
+                index: 0,
+                kind: AssistantBlockKind::Reasoning,
+                delta: "think".to_string(),
+            }),
+            Ok(ChatEvent::LogprobsDelta {
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "think".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+                token_ids: vec![],
+            }),
+            Ok(ChatEvent::Done {
+                message: Default::default(),
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let chunks = chat_completion_chunk_stream(
+            stream,
+            "chatcmpl-1".to_string(),
+            "model".to_string(),
+            1,
+            false,
+            false,
+            true,
+            None,
+            false,
+            false,
+        )
+        .collect::<Vec<_>>()
+        .await
+        .into_iter()
+        .collect::<Result<Vec<_>, _>>()
+        .expect("stream chunks");
+
+        assert_eq!(chunks.len(), 3);
+        assert_eq!(
+            chunks[1].choices[0].delta.reasoning.as_deref(),
+            Some("think")
+        );
+        assert!(chunks[1].choices[0].logprobs.is_some());
+    }
+
+    #[tokio::test]
+    async fn chunk_stream_preserves_tool_call_index_and_omits_id_from_arguments_delta() {
+        let stream = stream::iter(vec![
+            Ok(ChatEvent::Start {
+                prompt_token_ids: vec![].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(ChatEvent::ToolCallStart {
+                index: 3,
+                id: "call_1".to_string(),
+                name: "get_weather".to_string(),
+            }),
+            Ok(ChatEvent::ToolCallArgumentsDelta {
+                index: 3,
+                delta: r#"{"city":"Paris"}"#.to_string(),
+            }),
+            Ok(ChatEvent::ToolCallEnd {
+                index: 3,
+                call: AssistantToolCall {
+                    id: "call_1".to_string(),
+                    name: "get_weather".to_string(),
+                    arguments: r#"{"city":"Paris"}"#.to_string(),
+                },
+            }),
+            Ok(ChatEvent::Done {
+                message: Default::default(),
+                prompt_token_count: 1,
+                output_token_count: 1,
+                finish_reason: FinishReason::stop_eos(),
+                kv_transfer_params: None,
+            }),
+        ]);
+
+        let chunks = chat_completion_chunk_stream(
+            stream,
+            "chatcmpl-1".to_string(),
+            "model".to_string(),
+            1,
+            false,
+            false,
+            false,
+            None,
+            false,
+            false,
+        )
+        .collect::<Vec<_>>()
+        .await
+        .into_iter()
+        .collect::<Result<Vec<_>, _>>()
+        .expect("stream chunks");
+
+        assert_eq!(
+            chunks[1].choices[0].delta.tool_calls.as_ref().unwrap()[0].index,
+            3
+        );
+        assert_eq!(
+            chunks[1].choices[0].delta.tool_calls.as_ref().unwrap()[0].id,
+            Some("call_1".to_string())
+        );
+        assert_eq!(
+            chunks[2].choices[0].delta.tool_calls.as_ref().unwrap()[0].index,
+            3
+        );
+        assert_eq!(
+            chunks[2].choices[0].delta.tool_calls.as_ref().unwrap()[0].id,
+            None
+        );
+    }
+}
diff --git a/rust/src/server/src/routes/openai/chat_completions/convert.rs b/rust/src/server/src/routes/openai/chat_completions/convert.rs
new file mode 100644
index 000000000000..a7884b11e52c
--- /dev/null
+++ b/rust/src/server/src/routes/openai/chat_completions/convert.rs
@@ -0,0 +1,992 @@
+use itertools::Itertools as _;
+use vllm_chat::{
+    AssistantContentBlock, AssistantToolCall, ChatContent, ChatContentPart,
+    ChatMessage as VllmChatMessage, ChatOptions, ChatRequest, ChatTool, ChatToolChoice,
+    GenerationPromptMode, SamplingParams,
+};
+
+use super::types::ChatCompletionRequest;
+use super::validate;
+use crate::error::{ApiError, bail_invalid_request};
+use crate::routes::openai::utils::structured_outputs::convert_from_response_format;
+use crate::routes::openai::utils::types::{
+    ChatMessage, ContentPart, MessageContent, Tool, ToolChoice, ToolChoiceValue,
+};
+use crate::utils::{ResolvedRequestContext, convert_logit_bias, merge_kv_transfer_params};
+
+/// Lowered chat request plus the public response metadata carried by every SSE
+/// chunk.
+#[derive(Debug, Clone, PartialEq)]
+pub struct PreparedRequest {
+    /// Stable OpenAI-style request ID, reused as the external chat request ID.
+    pub request_id: String,
+    /// Public model ID echoed back to the client.
+    pub response_model: String,
+    /// Whether the caller asked for the final streamed usage chunk.
+    pub include_usage: bool,
+    /// Whether the caller requested output logprobs on chat choices.
+    pub requested_logprobs: bool,
+    /// Whether the caller requested top-level prompt logprobs.
+    pub include_prompt_logprobs: bool,
+    /// Lowered chat request for `vllm-chat`.
+    pub chat_request: ChatRequest,
+    /// Last assistant-role message content to echo back when `echo=true`.
+    pub echo: Option<String>,
+    /// Whether to include token IDs alongside generated text.
+    pub return_token_ids: bool,
+    /// Whether to format logprob tokens as `token_id:{id}`.
+    pub return_tokens_as_token_ids: bool,
+}
+
+/// Validate and lower one OpenAI chat completion request into the internal chat
+/// format.
+///
+/// `served_model_names` must be non-empty; the first entry is used as the
+/// `model` field in responses.
+pub(crate) fn prepare_chat_request(
+    request: ChatCompletionRequest,
+    served_model_names: &[String],
+    ctx: ResolvedRequestContext,
+) -> Result<PreparedRequest, ApiError> {
+    validate::validate_request_compat(&request, served_model_names)?;
+
+    let request_id = format!("chatcmpl-{}", ctx.request_id);
+    let echo = request
+        .echo
+        .then(|| extract_last_assistant_content(&request.messages))
+        .flatten();
+    let messages: Vec<_> = request.messages.into_iter().map(convert_message).try_collect()?;
+    let generation_prompt_mode = normalize_generation_prompt_mode(
+        request.add_generation_prompt,
+        request.continue_final_message,
+        &messages,
+    )?;
+
+    let template_kwargs = request.chat_template_kwargs.unwrap_or_default();
+
+    let include_usage = (request.stream_options.as_ref())
+        .and_then(|options| options.include_usage)
+        .unwrap_or(false);
+    let requested_logprobs = request.logprobs;
+
+    // Auto-enable prompt logprobs for non-streaming echo, matching Python vLLM's
+    // behavior.
+    let top_logprobs = request.top_logprobs.unwrap_or(0);
+    let prompt_logprobs = request
+        .prompt_logprobs
+        .or((request.echo && !request.stream).then_some(top_logprobs));
+    let include_prompt_logprobs = prompt_logprobs.is_some();
+
+    let structured_outputs = convert_from_response_format(
+        request.response_format.as_ref(),
+        &request.structured_outputs,
+    )?;
+
+    let chat_request = ChatRequest {
+        request_id: request_id.clone(),
+        messages,
+        sampling_params: SamplingParams {
+            temperature: request.temperature,
+            top_p: request.top_p,
+            top_k: request.top_k,
+            seed: request.seed,
+            max_tokens: request.max_completion_tokens,
+            min_tokens: request.min_tokens,
+            logprobs: request.logprobs.then_some(top_logprobs),
+            prompt_logprobs,
+            min_p: request.min_p,
+            frequency_penalty: request.frequency_penalty,
+            presence_penalty: request.presence_penalty,
+            repetition_penalty: request.repetition_penalty,
+            stop_token_ids: request.stop_token_ids,
+            ignore_eos: request.ignore_eos,
+            logit_bias: convert_logit_bias(request.logit_bias)?,
+            allowed_token_ids: request.allowed_token_ids,
+            bad_words: request.bad_words,
+            logprob_token_ids: None,
+            structured_outputs,
+            skip_reading_prefix_cache: None,
+            vllm_xargs: merge_kv_transfer_params(
+                request.vllm_xargs,
+                request.kv_transfer_params.as_ref(),
+            ),
+        },
+        chat_options: ChatOptions {
+            generation_prompt_mode,
+            chat_template: request.chat_template,
+            reasoning_effort: request.reasoning_effort,
+            template_kwargs,
+        },
+        tools: convert_tools(request.tools)?,
+        tool_choice: convert_tool_choice(request.tool_choice.as_ref())?,
+        decode_options: vllm_text::output::TextDecodeOptions {
+            skip_special_tokens: request.skip_special_tokens,
+            include_stop_str_in_output: request.include_stop_str_in_output,
+            stop_strings: request.stop.map(|stop| stop.into_vec()),
+            min_tokens: request.min_tokens.unwrap_or(0),
+        },
+        intermediate: request.stream,
+        priority: request.priority.unwrap_or(0),
+        documents: request.documents,
+        cache_salt: request.cache_salt,
+        add_special_tokens: request.add_special_tokens,
+        data_parallel_rank: ctx.data_parallel_rank,
+    };
+
+    Ok(PreparedRequest {
+        request_id,
+        response_model: served_model_names.first().cloned().unwrap_or_default(),
+        include_usage,
+        requested_logprobs,
+        include_prompt_logprobs,
+        chat_request,
+        echo,
+        return_token_ids: request.return_token_ids.unwrap_or(false),
+        return_tokens_as_token_ids: request.return_tokens_as_token_ids.unwrap_or(false),
+    })
+}
+
+fn normalize_generation_prompt_mode(
+    add_generation_prompt: Option<bool>,
+    continue_final_message: bool,
+    messages: &[VllmChatMessage],
+) -> Result<GenerationPromptMode, ApiError> {
+    if add_generation_prompt == Some(true) && continue_final_message {
+        bail_invalid_request!(
+            "Cannot set both `continue_final_message` and `add_generation_prompt` to True."
+        );
+    }
+
+    let last_role = messages.last().map(VllmChatMessage::role);
+    match (add_generation_prompt, continue_final_message, last_role) {
+        (Some(true), true, _) => unreachable!("rejected above"),
+        (_, true, Some(vllm_chat::ChatRole::Assistant)) => {
+            Ok(GenerationPromptMode::ContinueFinalAssistant)
+        }
+        (_, true, _) => {
+            bail_invalid_request!(
+                "Cannot set `continue_final_message` to True when the last message is not from the assistant."
+            );
+        }
+        (Some(false), false, _) => Ok(GenerationPromptMode::NoGenerationPrompt),
+        (None | Some(true), false, _) => Ok(GenerationPromptMode::StartNewAssistant),
+    }
+}
+
+/// Extract the text content of the last message if it has the assistant role.
+fn extract_last_assistant_content(messages: &[ChatMessage]) -> Option<String> {
+    let ChatMessage::Assistant { content, .. } = messages.last()? else {
+        return None;
+    };
+    let text = match content.as_ref()? {
+        MessageContent::Text(text) => text.clone(),
+        MessageContent::Parts(parts) => parts
+            .iter()
+            .filter_map(|p| match p {
+                ContentPart::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n"),
+    };
+    (!text.is_empty()).then_some(text)
+}
+
+/// Lower one OpenAI chat message into the `vllm-chat` message shape.
+fn convert_message(message: ChatMessage) -> Result<VllmChatMessage, ApiError> {
+    match message {
+        ChatMessage::System { content, .. } => {
+            Ok(VllmChatMessage::system(convert_content(content)?))
+        }
+        ChatMessage::User { content, .. } => Ok(VllmChatMessage::user(convert_content(content)?)),
+        ChatMessage::Assistant {
+            content,
+            tool_calls,
+            reasoning,
+            name: _,
+        } => {
+            let mut blocks = Vec::new();
+            if let Some(reasoning) = reasoning
+                && !reasoning.is_empty()
+            {
+                blocks.push(AssistantContentBlock::Reasoning { text: reasoning });
+            }
+            if let Some(content) = content {
+                blocks.extend(convert_assistant_text_blocks(content)?);
+            }
+            if let Some(tool_calls) = tool_calls {
+                blocks.extend(convert_assistant_tool_calls(tool_calls)?);
+            }
+            if blocks.is_empty() {
+                bail_invalid_request!(
+                    "Assistant messages must contain text, reasoning content, or tool_calls."
+                );
+            }
+
+            Ok(VllmChatMessage::assistant_blocks(blocks))
+        }
+        ChatMessage::Tool {
+            content,
+            tool_call_id,
+        } => Ok(VllmChatMessage::tool_response(
+            convert_content(content)?,
+            tool_call_id,
+        )),
+        ChatMessage::Function { .. } => {
+            bail_invalid_request!("Function messages are not supported.")
+        }
+        ChatMessage::Developer {
+            content,
+            tools,
+            name: _,
+        } => Ok(VllmChatMessage::developer(
+            convert_content(content)?,
+            convert_message_tools(tools)?,
+        )),
+    }
+}
+
+/// Convert the given OpenAI message content value into the internal format in
+/// `vllm-chat`.
+fn convert_content(content: MessageContent) -> Result<ChatContent, ApiError> {
+    match content {
+        MessageContent::Text(text) => Ok(ChatContent::Text(text)),
+        MessageContent::Parts(parts) => parts
+            .into_iter()
+            .map(|part| match part {
+                ContentPart::Text { text } => Ok(ChatContentPart::text(text)),
+                ContentPart::ImageUrl { image_url, uuid } => Ok(ChatContentPart::ImageUrl {
+                    image_url: image_url.url,
+                    detail: image_url.detail,
+                    uuid,
+                }),
+                _ => bail_invalid_request!("Only text and image_url content parts are supported."),
+            })
+            .try_collect()
+            .map(ChatContent::Parts),
+    }
+}
+
+/// Convert the given OpenAI assistant message content into the internal format
+/// in `vllm-chat`.
+fn convert_assistant_text_blocks(
+    content: MessageContent,
+) -> Result<Vec<AssistantContentBlock>, ApiError> {
+    match content {
+        MessageContent::Text(text) => Ok(vec![AssistantContentBlock::Text { text }]),
+        MessageContent::Parts(parts) => parts
+            .into_iter()
+            .map(|part| match part {
+                ContentPart::Text { text } => Ok(AssistantContentBlock::Text { text }),
+                _ => bail_invalid_request!(
+                    "Only text content parts are supported for assistant messages."
+                ),
+            })
+            .try_collect(),
+    }
+}
+
+fn convert_assistant_tool_calls(
+    tool_calls: Vec<crate::routes::openai::utils::types::ToolCall>,
+) -> Result<Vec<AssistantContentBlock>, ApiError> {
+    tool_calls
+        .into_iter()
+        .map(|tool_call| {
+            if tool_call.tool_type != "function" {
+                bail_invalid_request!("Only function tool calls are supported.");
+            }
+
+            Ok(AssistantContentBlock::ToolCall(AssistantToolCall {
+                id: tool_call.id,
+                name: tool_call.function.name,
+                arguments: tool_call.function.arguments.unwrap_or_else(|| "{}".to_string()),
+            }))
+        })
+        .collect()
+}
+
+fn convert_tools(tools: Option<Vec<Tool>>) -> Result<Vec<ChatTool>, ApiError> {
+    tools
+        .unwrap_or_default()
+        .into_iter()
+        .map(|tool| {
+            if tool.tool_type != "function" {
+                bail_invalid_request!("Only function tools are supported.");
+            }
+            Ok(ChatTool {
+                name: tool.function.name,
+                description: tool.function.description,
+                parameters: tool.function.parameters,
+                strict: tool.function.strict,
+            })
+        })
+        .collect()
+}
+
+fn convert_message_tools(tools: Option<Vec<Tool>>) -> Result<Option<Vec<ChatTool>>, ApiError> {
+    let tools = convert_tools(tools)?;
+    Ok((!tools.is_empty()).then_some(tools))
+}
+
+fn convert_tool_choice(tool_choice: Option<&ToolChoice>) -> Result<ChatToolChoice, ApiError> {
+    match tool_choice {
+        None | Some(ToolChoice::Value(ToolChoiceValue::Auto)) => Ok(ChatToolChoice::Auto),
+        Some(ToolChoice::Value(ToolChoiceValue::None)) => Ok(ChatToolChoice::None),
+        _ => bail_invalid_request!("tool_choice={:?} is not supported yet.", tool_choice),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use axum::http::HeaderMap;
+    use expect_test::expect;
+    use llm_multimodal::ImageDetail;
+    use serde_json::json;
+    use vllm_chat::{
+        AssistantContentBlock, AssistantToolCall, ChatContentPart, ChatMessage as VllmChatMessage,
+        ChatTool as VllmChatTool, ChatToolChoice, GenerationPromptMode,
+        SamplingParams as VllmSamplingParams,
+    };
+    use vllm_text::output::TextDecodeOptions;
+
+    use super::prepare_chat_request;
+    use crate::routes::openai::chat_completions::types::{
+        AssistantRole, ChatCompletionMessage, ChatCompletionRequest,
+    };
+    use crate::routes::openai::utils::types::{
+        ChatMessage, ContentPart, Function, FunctionCallResponse, ImageUrl, MessageContent, Tool,
+        ToolCall, ToolChoice, ToolChoiceValue, VideoUrl,
+    };
+    use crate::utils::{ResolvedRequestContext, resolve_request_context};
+
+    fn request_context(headers: &HeaderMap, request_id: Option<&str>) -> ResolvedRequestContext {
+        resolve_request_context(headers, request_id)
+    }
+
+    fn served(names: &[&str]) -> Vec<String> {
+        names.iter().map(|s| s.to_string()).collect()
+    }
+
+    fn base_request() -> ChatCompletionRequest {
+        ChatCompletionRequest {
+            model: "Qwen/Qwen1.5-0.5B-Chat".to_string(),
+            messages: vec![ChatMessage::User {
+                content: MessageContent::Text("hello".to_string()),
+                name: None,
+            }],
+            stream: true,
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn prepare_chat_request_maps_text_parts() {
+        let mut request = base_request();
+        request.messages = vec![ChatMessage::Assistant {
+            content: Some(MessageContent::Parts(vec![ContentPart::Text {
+                text: "hello".to_string(),
+            }])),
+            name: None,
+            tool_calls: None,
+            reasoning: None,
+        }];
+        request.add_generation_prompt = Some(false);
+        request.continue_final_message = true;
+        request.skip_special_tokens = false;
+        request.chat_template_kwargs = Some(HashMap::from([("foo".to_string(), json!("bar"))]));
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert!(prepared.request_id.starts_with("chatcmpl-"));
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::assistant_text("hello")]
+        );
+        assert_eq!(
+            prepared.chat_request.sampling_params,
+            VllmSamplingParams::default()
+        );
+        assert_eq!(
+            prepared.chat_request.chat_options.generation_prompt_mode,
+            GenerationPromptMode::ContinueFinalAssistant
+        );
+        assert_eq!(
+            prepared.chat_request.chat_options.template_kwargs,
+            HashMap::from([("foo".to_string(), json!("bar"))])
+        );
+        assert_eq!(
+            prepared.chat_request.decode_options,
+            TextDecodeOptions {
+                skip_special_tokens: false,
+                include_stop_str_in_output: false,
+                stop_strings: None,
+                min_tokens: 0,
+            }
+        );
+        assert!(prepared.chat_request.tools.is_empty());
+        assert_eq!(prepared.chat_request.tool_choice, ChatToolChoice::Auto);
+    }
+
+    #[test]
+    fn prepare_chat_request_keeps_optional_sampling_fields_unset() {
+        let prepared = prepare_chat_request(
+            base_request(),
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert!(prepared.request_id.starts_with("chatcmpl-"));
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::user("hello")]
+        );
+        assert_eq!(
+            prepared.chat_request.sampling_params,
+            VllmSamplingParams::default()
+        );
+        assert_eq!(
+            prepared.chat_request.chat_options.generation_prompt_mode,
+            GenerationPromptMode::StartNewAssistant
+        );
+        assert_eq!(
+            prepared.chat_request.decode_options,
+            TextDecodeOptions {
+                skip_special_tokens: true,
+                include_stop_str_in_output: false,
+                stop_strings: None,
+                min_tokens: 0,
+            }
+        );
+        assert!(prepared.chat_request.tools.is_empty());
+        assert_eq!(prepared.chat_request.tool_choice, ChatToolChoice::Auto);
+    }
+
+    #[test]
+    fn prepare_chat_request_preserves_sampling_passthrough_fields() {
+        let request = ChatCompletionRequest {
+            seed: Some(42),
+            min_p: Some(0.2),
+            frequency_penalty: Some(0.3),
+            presence_penalty: Some(0.4),
+            repetition_penalty: Some(1.1),
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+        let expected = VllmSamplingParams {
+            seed: Some(42),
+            min_p: Some(0.2),
+            frequency_penalty: Some(0.3),
+            presence_penalty: Some(0.4),
+            repetition_penalty: Some(1.1),
+            ..VllmSamplingParams::default()
+        };
+        assert_eq!(prepared.chat_request.sampling_params, expected);
+    }
+
+    #[test]
+    fn prepare_chat_request_accepts_developer_messages() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Developer {
+                content: MessageContent::Text("hello".to_string()),
+                tools: Some(vec![Tool {
+                    tool_type: "function".to_string(),
+                    function: Function {
+                        name: "get_weather".to_string(),
+                        description: Some("Get weather".to_string()),
+                        parameters: json!({
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                        }),
+                        strict: Some(true),
+                    },
+                }]),
+                name: None,
+            }],
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::developer(
+                "hello",
+                Some(vec![VllmChatTool {
+                    name: "get_weather".to_string(),
+                    description: Some("Get weather".to_string()),
+                    parameters: json!({
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    }),
+                    strict: Some(true),
+                }]),
+            )]
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_maps_image_url_content_parts() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::User {
+                content: MessageContent::Parts(vec![
+                    ContentPart::Text {
+                        text: "describe ".to_string(),
+                    },
+                    ContentPart::ImageUrl {
+                        image_url: ImageUrl {
+                            url: "https://example.com/image.png".to_string(),
+                            detail: Some(ImageDetail::Low),
+                        },
+                        uuid: Some("image-1".to_string()),
+                    },
+                    ContentPart::Text {
+                        text: " briefly".to_string(),
+                    },
+                ]),
+                name: None,
+            }],
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::user(vec![
+                ChatContentPart::text("describe "),
+                ChatContentPart::ImageUrl {
+                    image_url: "https://example.com/image.png".to_string(),
+                    detail: Some(ImageDetail::Low),
+                    uuid: Some("image-1".to_string()),
+                },
+                ChatContentPart::text(" briefly"),
+            ])]
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_maps_developer_image_url_content_parts() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Developer {
+                content: MessageContent::Parts(vec![ContentPart::ImageUrl {
+                    image_url: ImageUrl {
+                        url: "https://example.com/image.png".to_string(),
+                        detail: None,
+                    },
+                    uuid: None,
+                }]),
+                tools: None,
+                name: None,
+            }],
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::developer(
+                vec![ChatContentPart::image_url("https://example.com/image.png")],
+                None,
+            )]
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_rejects_video_content_parts() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::User {
+                content: MessageContent::Parts(vec![ContentPart::VideoUrl {
+                    video_url: VideoUrl {
+                        url: "https://example.com/video.mp4".to_string(),
+                    },
+                }]),
+                name: None,
+            }],
+            ..base_request()
+        };
+
+        let error = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .unwrap_err();
+
+        expect!["Only text and image_url content parts are supported."]
+            .assert_eq(&error.to_error_response().error.message);
+    }
+
+    #[test]
+    fn prepare_chat_request_rejects_assistant_image_url_content_parts() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Assistant {
+                content: Some(MessageContent::Parts(vec![ContentPart::ImageUrl {
+                    image_url: ImageUrl {
+                        url: "https://example.com/image.png".to_string(),
+                        detail: None,
+                    },
+                    uuid: None,
+                }])),
+                name: None,
+                tool_calls: None,
+                reasoning: None,
+            }],
+            ..base_request()
+        };
+
+        let error = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .unwrap_err();
+
+        expect!["Only text content parts are supported for assistant messages."]
+            .assert_eq(&error.to_error_response().error.message);
+    }
+
+    #[test]
+    fn prepare_chat_request_accepts_assistant_reasoning_history() {
+        let message = ChatCompletionMessage {
+            role: AssistantRole,
+            content: Some("answer".to_string()),
+            tool_calls: None,
+            reasoning: Some("inner".to_string()),
+        };
+        let message_json = serde_json::to_value(message).expect("message serializes");
+
+        let request = ChatCompletionRequest {
+            messages: vec![
+                serde_json::from_value(message_json).expect("response message is valid history"),
+            ],
+            add_generation_prompt: Some(false),
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::Reasoning {
+                    text: "inner".to_string(),
+                },
+                AssistantContentBlock::Text {
+                    text: "answer".to_string(),
+                },
+            ])]
+        );
+        assert!(prepared.chat_request.tools.is_empty());
+        assert_eq!(prepared.chat_request.tool_choice, ChatToolChoice::Auto);
+    }
+
+    #[test]
+    fn prepare_chat_request_accepts_legacy_reasoning_content_alias() {
+        let request = ChatCompletionRequest {
+            messages: vec![
+                serde_json::from_value(json!({
+                    "role": "assistant",
+                    "content": "answer",
+                    "reasoning_content": "inner",
+                }))
+                .expect("legacy reasoning_content alias is accepted"),
+            ],
+            add_generation_prompt: Some(false),
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![VllmChatMessage::assistant_blocks(vec![
+                AssistantContentBlock::Reasoning {
+                    text: "inner".to_string(),
+                },
+                AssistantContentBlock::Text {
+                    text: "answer".to_string(),
+                },
+            ])]
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_accepts_tools_and_tool_history() {
+        let request = ChatCompletionRequest {
+            messages: vec![
+                ChatMessage::Assistant {
+                    content: None,
+                    name: None,
+                    tool_calls: Some(vec![ToolCall {
+                        id: "call_1".to_string(),
+                        tool_type: "function".to_string(),
+                        function: FunctionCallResponse {
+                            name: "get_weather".to_string(),
+                            arguments: Some(r#"{"city":"Paris"}"#.to_string()),
+                        },
+                    }]),
+                    reasoning: None,
+                },
+                ChatMessage::Tool {
+                    content: MessageContent::Text("Sunny".to_string()),
+                    tool_call_id: "call_1".to_string(),
+                },
+            ],
+            tools: Some(vec![Tool {
+                tool_type: "function".to_string(),
+                function: Function {
+                    name: "get_weather".to_string(),
+                    description: Some("Get weather".to_string()),
+                    parameters: json!({
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    }),
+                    strict: None,
+                },
+            }]),
+            tool_choice: Some(ToolChoice::Value(ToolChoiceValue::None)),
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+        assert_eq!(
+            prepared.chat_request.messages,
+            vec![
+                VllmChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(
+                    AssistantToolCall {
+                        id: "call_1".to_string(),
+                        name: "get_weather".to_string(),
+                        arguments: r#"{"city":"Paris"}"#.to_string(),
+                    },
+                )]),
+                VllmChatMessage::tool_response("Sunny", "call_1"),
+            ]
+        );
+        assert_eq!(
+            prepared.chat_request.tools,
+            vec![VllmChatTool {
+                name: "get_weather".to_string(),
+                description: Some("Get weather".to_string()),
+                parameters: json!({
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                }),
+                strict: None,
+            }]
+        );
+        assert_eq!(prepared.chat_request.tool_choice, ChatToolChoice::None);
+    }
+
+    #[test]
+    fn prepare_chat_request_lowers_logprobs_fields() {
+        let request = ChatCompletionRequest {
+            stream: false,
+            logprobs: true,
+            prompt_logprobs: Some(2),
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert!(prepared.requested_logprobs);
+        assert!(prepared.include_prompt_logprobs);
+        assert_eq!(prepared.chat_request.sampling_params.logprobs, Some(0));
+        assert_eq!(
+            prepared.chat_request.sampling_params.prompt_logprobs,
+            Some(2)
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_keeps_prompt_logprobs_independent_from_echo() {
+        let request = ChatCompletionRequest {
+            logprobs: true,
+            top_logprobs: Some(3),
+            echo: true,
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(prepared.chat_request.sampling_params.logprobs, Some(3));
+        assert_eq!(prepared.chat_request.sampling_params.prompt_logprobs, None);
+        assert!(!prepared.include_prompt_logprobs);
+    }
+
+    #[test]
+    fn prepare_chat_request_threads_data_parallel_rank() {
+        let mut headers = HeaderMap::new();
+        headers.insert("X-data-parallel-rank", "7".parse().unwrap());
+        let prepared = prepare_chat_request(
+            base_request(),
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            request_context(&headers, None),
+        )
+        .expect("request is valid");
+        assert_eq!(prepared.chat_request.data_parallel_rank, Some(7));
+    }
+
+    #[test]
+    fn prepare_chat_request_leaves_data_parallel_rank_none_when_absent() {
+        let prepared = prepare_chat_request(
+            base_request(),
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+        assert_eq!(prepared.chat_request.data_parallel_rank, None);
+    }
+
+    #[test]
+    fn prepare_chat_request_maps_no_generation_prompt_mode() {
+        let mut request = base_request();
+        request.add_generation_prompt = Some(false);
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.chat_options.generation_prompt_mode,
+            GenerationPromptMode::NoGenerationPrompt
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_rejects_conflicting_explicit_generation_prompt_flags() {
+        let mut request = base_request();
+        request.add_generation_prompt = Some(true);
+        request.continue_final_message = true;
+
+        let error = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .unwrap_err();
+
+        expect!["Cannot set both `continue_final_message` and `add_generation_prompt` to True."]
+            .assert_eq(&error.to_error_response().error.message);
+    }
+
+    #[test]
+    fn prepare_chat_request_accepts_continue_final_message_with_implicit_add_generation_prompt() {
+        let mut request = base_request();
+        request.messages = vec![ChatMessage::Assistant {
+            content: Some(MessageContent::Text("hello".to_string())),
+            name: None,
+            tool_calls: None,
+            reasoning: None,
+        }];
+        request.continue_final_message = true;
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.chat_options.generation_prompt_mode,
+            GenerationPromptMode::ContinueFinalAssistant
+        );
+    }
+
+    #[test]
+    fn prepare_chat_request_rejects_continue_final_message_without_final_assistant() {
+        let mut request = base_request();
+        request.continue_final_message = true;
+
+        let error = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .unwrap_err();
+
+        expect!["Cannot set `continue_final_message` to True when the last message is not from the assistant."]
+            .assert_eq(&error.to_error_response().error.message);
+    }
+
+    #[test]
+    fn prepare_chat_request_allows_new_assistant_mode_after_final_assistant() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Assistant {
+                content: Some(MessageContent::Text("hello".to_string())),
+                name: None,
+                tool_calls: None,
+                reasoning: None,
+            }],
+            ..base_request()
+        };
+
+        let prepared = prepare_chat_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("request is valid");
+
+        assert_eq!(
+            prepared.chat_request.chat_options.generation_prompt_mode,
+            GenerationPromptMode::StartNewAssistant
+        );
+    }
+}
diff --git a/rust/src/server/src/routes/openai/chat_completions/types.rs b/rust/src/server/src/routes/openai/chat_completions/types.rs
new file mode 100644
index 000000000000..00557ad53d24
--- /dev/null
+++ b/rust/src/server/src/routes/openai/chat_completions/types.rs
@@ -0,0 +1,600 @@
+use std::collections::HashMap;
+use std::fmt;
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use serde_with::SerializeDisplay;
+use validator::Validate;
+use vllm_chat::ReasoningEffort;
+
+use crate::routes::openai::utils::structured_outputs::ResponseFormat;
+use crate::routes::openai::utils::types::{
+    ChatLogProbs, ChatMessage, MessageContent, Normalizable, StreamOptions, StringOrArray, Tool,
+    ToolCall, ToolCallDelta, ToolChoice, ToolChoiceValue, ToolReference, UNKNOWN_MODEL_ID, Usage,
+    default_true, validate_stop, validate_top_p_value,
+};
+
+/// vLLM-compatible request type for the Chat Completions API.
+///
+/// Mirrors the Python vLLM `ChatCompletionRequest` class. The local copy keeps
+/// the request type route-owned so we can add vLLM-only fields directly instead
+/// of layering wrapper deserializers on top.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
+#[validate(schema(function = "validate_chat_cross_parameters"))]
+pub struct ChatCompletionRequest {
+    // -------- Standard OpenAI API Parameters --------
+    /// A list of messages comprising the conversation so far
+    #[validate(custom(function = "validate_messages"))]
+    pub messages: Vec<ChatMessage>,
+
+    /// ID of the model to use
+    #[serde(default = "default_model")]
+    pub model: String,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based
+    /// on their existing frequency in the text so far
+    #[validate(range(min = -2.0, max = 2.0))]
+    pub frequency_penalty: Option<f32>,
+
+    /// Modify the likelihood of specified tokens appearing in the completion
+    pub logit_bias: Option<HashMap<String, f32>>,
+
+    /// Whether to return log probabilities of the output tokens
+    #[serde(default)]
+    pub logprobs: bool,
+
+    /// An integer specifying the number of most likely tokens to return
+    /// -1 means return all
+    #[validate(range(min = -1))]
+    pub top_logprobs: Option<i32>,
+
+    /// Deprecated: Replaced by max_completion_tokens
+    #[deprecated(note = "Use max_completion_tokens instead")]
+    #[validate(range(min = 1))]
+    pub max_tokens: Option<u32>,
+
+    /// An upper bound for the number of tokens that can be generated for a
+    /// completion
+    #[validate(range(min = 1))]
+    pub max_completion_tokens: Option<u32>,
+
+    /// How many chat completion choices to generate for each input message
+    #[validate(range(min = 1, max = 10))]
+    pub n: Option<u32>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based
+    /// on whether they appear in the text so far
+    #[validate(range(min = -2.0, max = 2.0))]
+    pub presence_penalty: Option<f32>,
+
+    /// An object specifying the format that the model must output
+    pub response_format: Option<ResponseFormat>,
+
+    /// If specified, our system will make a best effort to sample
+    /// deterministically
+    pub seed: Option<i64>,
+
+    /// Up to 4 sequences where the API will stop generating further tokens
+    #[validate(custom(function = "validate_stop"))]
+    pub stop: Option<StringOrArray>,
+
+    /// If set, partial message deltas will be sent
+    #[serde(default)]
+    pub stream: bool,
+
+    /// Options for streaming response
+    pub stream_options: Option<StreamOptions>,
+
+    /// What sampling temperature to use, between 0 and 2
+    #[validate(range(min = 0.0, max = 2.0))]
+    pub temperature: Option<f32>,
+
+    /// An alternative to sampling with temperature
+    #[validate(custom(function = "validate_top_p_value"))]
+    pub top_p: Option<f32>,
+
+    /// A list of tools the model may call
+    pub tools: Option<Vec<Tool>>,
+
+    /// Controls which (if any) tool is called by the model
+    pub tool_choice: Option<ToolChoice>,
+
+    /// Effort level for reasoning models (none, minimal, low, medium, high,
+    /// xhigh, max)
+    pub reasoning_effort: Option<ReasoningEffort>,
+
+    /// Whether to enable parallel function calling during tool use
+    pub parallel_tool_calls: Option<bool>,
+
+    /// A unique identifier representing your end-user
+    pub user: Option<String>,
+
+    // -------- vLLM Sampling Parameters --------
+    /// Use beam search instead of sampling
+    #[serde(default)]
+    pub use_beam_search: bool,
+
+    /// Top-k sampling parameter
+    pub top_k: Option<u32>,
+
+    /// Min-p nucleus sampling parameter
+    #[validate(range(min = 0.0, max = 1.0))]
+    pub min_p: Option<f32>,
+
+    /// Repetition penalty for reducing repetitive text
+    #[validate(range(min = 0.0, max = 2.0))]
+    pub repetition_penalty: Option<f32>,
+
+    /// Length penalty for beam search
+    pub length_penalty: Option<f32>,
+
+    /// Specific token IDs to use as stop conditions
+    pub stop_token_ids: Option<Vec<u32>>,
+
+    /// Include stop string in output
+    #[serde(default)]
+    pub include_stop_str_in_output: bool,
+
+    /// Ignore end-of-sequence tokens during generation
+    #[serde(default)]
+    pub ignore_eos: bool,
+
+    /// Minimum number of tokens to generate
+    #[validate(range(min = 1))]
+    pub min_tokens: Option<u32>,
+
+    /// Skip special tokens during detokenization
+    #[serde(default = "default_true")]
+    pub skip_special_tokens: bool,
+
+    /// Add spaces between special tokens during detokenization
+    #[serde(default = "default_true")]
+    pub spaces_between_special_tokens: bool,
+
+    /// Truncate prompt tokens to this length
+    pub truncate_prompt_tokens: Option<i64>,
+
+    /// Number of prompt logprobs to return
+    pub prompt_logprobs: Option<i32>,
+
+    /// Restrict output to these token IDs only
+    pub allowed_token_ids: Option<Vec<u32>>,
+
+    /// List of bad words to avoid during generation
+    pub bad_words: Option<Vec<String>>,
+
+    // -------- Extra vLLM Parameters --------
+    /// Token budget for reasoning/thinking
+    pub thinking_token_budget: Option<u32>,
+
+    /// Whether to include reasoning content in the response
+    #[serde(default = "default_true")]
+    pub include_reasoning: bool,
+
+    /// If true, the new message will be prepended with the last message if they
+    /// belong to the same role.
+    #[serde(default)]
+    pub echo: bool,
+
+    /// Whether to add the generation prompt to the chat template.
+    ///
+    /// When omitted, the request follows the API default behavior, which is
+    /// equivalent to `true` unless `continue_final_message=true` selects
+    /// final assistant continuation instead.
+    pub add_generation_prompt: Option<bool>,
+
+    /// Continue generating from final assistant message
+    #[serde(default)]
+    pub continue_final_message: bool,
+
+    /// Whether to add special tokens (e.g. BOS) to the prompt
+    #[serde(default)]
+    pub add_special_tokens: bool,
+
+    /// Documents for RAG (retrieval-augmented generation)
+    pub documents: Option<Vec<Value>>,
+
+    /// Jinja chat template override
+    pub chat_template: Option<String>,
+
+    /// Additional keyword args passed to the chat template renderer
+    pub chat_template_kwargs: Option<HashMap<String, Value>>,
+
+    /// Additional kwargs for media IO connectors, keyed by modality
+    pub media_io_kwargs: Option<HashMap<String, Value>>,
+
+    /// Additional kwargs for the HF processor
+    pub mm_processor_kwargs: Option<HashMap<String, Value>>,
+
+    /// Additional kwargs for structured outputs
+    pub structured_outputs: Option<Value>,
+
+    /// Request scheduling priority (lower means earlier; default 0)
+    pub priority: Option<i32>,
+
+    /// External request ID used for response correlation.
+    pub request_id: Option<String>,
+
+    /// Tokens represented as strings of the form 'token_id:{token_id}' in
+    /// logprobs
+    pub return_tokens_as_token_ids: Option<bool>,
+
+    /// Include token IDs alongside generated text
+    pub return_token_ids: Option<bool>,
+
+    /// Salt for prefix cache isolation in multi-user environments
+    pub cache_salt: Option<String>,
+
+    /// KV transfer parameters for disaggregated serving
+    pub kv_transfer_params: Option<HashMap<String, Value>>,
+
+    /// Additional request parameters with string or numeric values for custom
+    /// extensions
+    pub vllm_xargs: Option<HashMap<String, Value>>,
+
+    /// Parameters for detecting repetitive N-gram patterns in output tokens
+    pub repetition_detection: Option<Value>,
+}
+
+impl Default for ChatCompletionRequest {
+    #[expect(deprecated)]
+    fn default() -> Self {
+        Self {
+            messages: Vec::new(),
+            model: default_model(),
+            frequency_penalty: None,
+            logit_bias: None,
+            logprobs: false,
+            top_logprobs: None,
+            max_tokens: None,
+            max_completion_tokens: None,
+            n: None,
+            presence_penalty: None,
+            response_format: None,
+            seed: None,
+            stop: None,
+            stream: false,
+            stream_options: None,
+            temperature: None,
+            top_p: None,
+            tools: None,
+            tool_choice: None,
+            reasoning_effort: None,
+            thinking_token_budget: None,
+            include_reasoning: true,
+            parallel_tool_calls: None,
+            user: None,
+            use_beam_search: false,
+            top_k: None,
+            min_p: None,
+            repetition_penalty: None,
+            length_penalty: None,
+            stop_token_ids: None,
+            include_stop_str_in_output: false,
+            ignore_eos: false,
+            min_tokens: None,
+            skip_special_tokens: true,
+            spaces_between_special_tokens: true,
+            truncate_prompt_tokens: None,
+            prompt_logprobs: None,
+            allowed_token_ids: None,
+            bad_words: None,
+            echo: false,
+            add_generation_prompt: None,
+            continue_final_message: false,
+            add_special_tokens: false,
+            documents: None,
+            chat_template: None,
+            chat_template_kwargs: None,
+            media_io_kwargs: None,
+            mm_processor_kwargs: None,
+            structured_outputs: None,
+            priority: None,
+            request_id: None,
+            return_tokens_as_token_ids: None,
+            return_token_ids: None,
+            cache_salt: None,
+            kv_transfer_params: None,
+            vllm_xargs: None,
+            repetition_detection: None,
+        }
+    }
+}
+
+impl Normalizable for ChatCompletionRequest {
+    /// Normalize the request by applying migrations and defaults.
+    fn normalize(&mut self) {
+        // Migrate deprecated max_tokens → max_completion_tokens
+        #[expect(deprecated)]
+        if self.max_completion_tokens.is_none() && self.max_tokens.is_some() {
+            self.max_completion_tokens = self.max_tokens;
+            self.max_tokens = None;
+        }
+
+        // Apply tool_choice defaults
+        // If tools is None, leave tool_choice as None (don't set it)
+        if self.tool_choice.is_none()
+            && let Some(tools) = &self.tools
+        {
+            let choice_value = if tools.is_empty() {
+                ToolChoiceValue::None
+            } else {
+                ToolChoiceValue::Auto
+            };
+            self.tool_choice = Some(ToolChoice::Value(choice_value));
+        }
+    }
+}
+
+/// Mirrors the Python vLLM `ChatCompletionResponse` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct ChatCompletionResponse {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<ChatCompletionChoice>,
+    pub usage: Option<Usage>,
+    pub system_fingerprint: Option<String>,
+    pub prompt_logprobs: Option<Vec<Option<HashMap<String, f32>>>>,
+    pub prompt_token_ids: Option<Vec<u32>>,
+    pub kv_transfer_params: Option<Value>,
+}
+
+/// Mirrors the Python vLLM `ChatCompletionResponseChoice` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct ChatCompletionChoice {
+    pub index: u32,
+    pub message: ChatCompletionMessage,
+    pub logprobs: Option<ChatLogProbs>,
+    pub finish_reason: Option<String>,
+    pub stop_reason: Option<Value>,
+    pub token_ids: Option<Vec<u32>>,
+}
+
+/// A literal type for the "assistant" role, since the API only allows that
+/// specific value in responses.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, SerializeDisplay)]
+pub(super) struct AssistantRole;
+
+impl fmt::Display for AssistantRole {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str("assistant")
+    }
+}
+
+/// Mirrors the Python vLLM response `ChatMessage` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct ChatCompletionMessage {
+    pub role: AssistantRole,
+    pub content: Option<String>,
+    pub tool_calls: Option<Vec<ToolCall>>,
+    pub reasoning: Option<String>,
+}
+
+/// Mirrors the Python vLLM `ChatCompletionStreamResponse` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct ChatCompletionStreamResponse {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<ChatCompletionStreamChoice>,
+    pub usage: Option<Usage>,
+    pub prompt_token_ids: Option<Vec<u32>>,
+}
+
+impl ChatCompletionStreamResponse {
+    /// Create a stream response with the standard envelope fields pre-filled.
+    pub fn new(id: &str, model: &str, created: u64) -> Self {
+        Self {
+            id: id.to_string(),
+            object: "chat.completion.chunk".to_string(),
+            created,
+            model: model.to_string(),
+            choices: Vec::new(),
+            usage: None,
+            prompt_token_ids: None,
+        }
+    }
+}
+
+/// Mirrors the Python vLLM `ChatCompletionResponseStreamChoice` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Default, Serialize)]
+pub(super) struct ChatCompletionStreamChoice {
+    pub index: u32,
+    pub delta: ChatMessageDelta,
+    pub logprobs: Option<ChatLogProbs>,
+    pub finish_reason: Option<String>,
+    pub stop_reason: Option<Value>,
+    pub token_ids: Option<Vec<u32>>,
+}
+
+/// Mirrors the Python vLLM `DeltaMessage` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Default, Serialize)]
+pub(super) struct ChatMessageDelta {
+    pub role: Option<AssistantRole>,
+    pub content: Option<String>,
+    pub tool_calls: Option<Vec<ToolCallDelta>>,
+    pub reasoning: Option<String>,
+}
+
+fn default_model() -> String {
+    UNKNOWN_MODEL_ID.to_string()
+}
+
+/// Validates messages array is not empty and has valid content
+fn validate_messages(messages: &[ChatMessage]) -> Result<(), validator::ValidationError> {
+    if messages.is_empty() {
+        return Err(validator::ValidationError::new("messages cannot be empty"));
+    }
+
+    for msg in messages {
+        if let ChatMessage::User { content, .. } = msg {
+            match content {
+                MessageContent::Text(text) if text.is_empty() => {
+                    return Err(validator::ValidationError::new(
+                        "message content cannot be empty",
+                    ));
+                }
+                MessageContent::Parts(parts) if parts.is_empty() => {
+                    return Err(validator::ValidationError::new(
+                        "message content parts cannot be empty",
+                    ));
+                }
+                _ => {}
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Schema-level validation for cross-field dependencies
+fn validate_chat_cross_parameters(
+    req: &ChatCompletionRequest,
+) -> Result<(), validator::ValidationError> {
+    // 1. Validate logprobs dependency
+    if req.top_logprobs.is_some() && !req.logprobs {
+        let mut e = validator::ValidationError::new("top_logprobs_requires_logprobs");
+        e.message = Some("top_logprobs is only allowed when logprobs is enabled".into());
+        return Err(e);
+    }
+
+    // 2. Validate stream_options dependency
+    if req.stream_options.is_some() && !req.stream {
+        let mut e = validator::ValidationError::new("stream_options_requires_stream");
+        e.message = Some("stream_options can only be used when stream is true".into());
+        return Err(e);
+    }
+
+    // 3. Validate token limits - min <= max
+    if let (Some(min_tokens), Some(max_completion_tokens)) =
+        (req.min_tokens, req.max_completion_tokens)
+        && min_tokens > max_completion_tokens
+    {
+        let mut e = validator::ValidationError::new("min_tokens_exceeds_max_completion_tokens");
+        e.message = Some("min_tokens cannot be greater than max_completion_tokens".into());
+        return Err(e);
+    }
+
+    #[expect(deprecated, reason = "Local type still mirrors legacy upstream field")]
+    if let (Some(min_tokens), Some(max_tokens)) = (req.min_tokens, req.max_tokens)
+        && min_tokens > max_tokens
+    {
+        let mut e = validator::ValidationError::new("min_tokens_exceeds_max_tokens");
+        e.message = Some("min_tokens cannot be greater than max_tokens".into());
+        return Err(e);
+    }
+
+    // 4. Validate response format JSON schema name
+    if let Some(ResponseFormat::JsonSchema { json_schema }) = &req.response_format
+        && json_schema.name.is_empty()
+    {
+        let mut e = validator::ValidationError::new("json_schema_name_empty");
+        e.message = Some("JSON schema name cannot be empty".into());
+        return Err(e);
+    }
+
+    // 5. Validate tool_choice requires tools (except for "none")
+    if let Some(ref tool_choice) = req.tool_choice {
+        let has_tools = req.tools.as_ref().is_some_and(|t| !t.is_empty());
+
+        // Check if tool_choice is anything other than "none"
+        let is_some_choice = !matches!(tool_choice, ToolChoice::Value(ToolChoiceValue::None));
+
+        if is_some_choice && !has_tools {
+            let mut e = validator::ValidationError::new("tool_choice_requires_tools");
+            e.message = Some("Invalid value for 'tool_choice': 'tool_choice' is only allowed when 'tools' are specified.".into());
+            return Err(e);
+        }
+
+        // Additional validation when tools are present
+        if let Some(tools) = req.tools.as_ref().filter(|t| !t.is_empty()) {
+            match tool_choice {
+                ToolChoice::Function { function, .. } => {
+                    // Validate that the specified function name exists in tools
+                    let function_exists = tools.iter().any(|tool| {
+                        tool.tool_type == "function" && tool.function.name == function.name
+                    });
+
+                    if !function_exists {
+                        let mut e =
+                            validator::ValidationError::new("tool_choice_function_not_found");
+                        e.message = Some(
+                            format!(
+                            "Invalid value for 'tool_choice': function '{}' not found in 'tools'.",
+                            function.name
+                        )
+                            .into(),
+                        );
+                        return Err(e);
+                    }
+                }
+                ToolChoice::AllowedTools {
+                    mode,
+                    tools: allowed_tools,
+                    ..
+                } => {
+                    // Validate mode is "auto" or "required"
+                    if mode != "auto" && mode != "required" {
+                        let mut e = validator::ValidationError::new("tool_choice_invalid_mode");
+                        e.message = Some(format!(
+                            "Invalid value for 'tool_choice.mode': must be 'auto' or 'required', got '{mode}'."
+                        ).into());
+                        return Err(e);
+                    }
+
+                    // Validate that all ToolReferences are Function type (Chat API only supports
+                    // function tools)
+                    for tool_ref in allowed_tools {
+                        match tool_ref {
+                            ToolReference::Function { name } => {
+                                // Validate that the function exists in tools array
+                                let tool_exists = tools.iter().any(|tool| {
+                                    tool.tool_type == "function" && tool.function.name == *name
+                                });
+
+                                if !tool_exists {
+                                    let mut e = validator::ValidationError::new(
+                                        "tool_choice_tool_not_found",
+                                    );
+                                    e.message = Some(
+                                        format!(
+                                            "Invalid value for 'tool_choice.tools': tool '{name}' not found in 'tools'."
+                                        )
+                                        .into(),
+                                    );
+                                    return Err(e);
+                                }
+                            }
+                            _ => {
+                                // Chat Completion API only supports function tools in tool_choice
+                                let mut e = validator::ValidationError::new(
+                                    "tool_choice_invalid_tool_type",
+                                );
+                                e.message = Some(
+                                    format!(
+                                        "Invalid value for 'tool_choice.tools': Chat Completion API only supports function tools, got '{}'.",
+                                        tool_ref.identifier()
+                                    )
+                                    .into(),
+                                );
+                                return Err(e);
+                            }
+                        }
+                    }
+                }
+                ToolChoice::Value(_) => {}
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/rust/src/server/src/routes/openai/chat_completions/validate.rs b/rust/src/server/src/routes/openai/chat_completions/validate.rs
new file mode 100644
index 000000000000..fbd10eea0cb0
--- /dev/null
+++ b/rust/src/server/src/routes/openai/chat_completions/validate.rs
@@ -0,0 +1,410 @@
+use super::types::ChatCompletionRequest;
+use crate::error::{ApiError, bail_invalid_request};
+use crate::routes::openai::utils::types::{ChatMessage, Tool, ToolChoice, ToolChoiceValue};
+
+/// Enforce the minimal compatibility contract for the Rust OpenAI server.
+pub(super) fn validate_request_compat(
+    request: &ChatCompletionRequest,
+    served_model_names: &[String],
+) -> Result<(), ApiError> {
+    if !served_model_names.iter().any(|n| n == &request.model) {
+        return Err(ApiError::model_not_found(request.model.clone()));
+    }
+
+    if request.stream_options.is_some() && !request.stream {
+        bail_invalid_request!(
+            param = "stream_options",
+            "stream_options are only supported when stream=true."
+        );
+    }
+
+    if request.n.unwrap_or(1) > 1 {
+        bail_invalid_request!(param = "n", "Only n=1 is supported.");
+    }
+
+    if request.top_logprobs.is_some() && !request.logprobs {
+        bail_invalid_request!(
+            param = "top_logprobs",
+            "top_logprobs can only be used when logprobs=true."
+        );
+    }
+
+    if let Some(prompt_logprobs) = request.prompt_logprobs {
+        if prompt_logprobs < 0 && prompt_logprobs != -1 {
+            bail_invalid_request!(
+                param = "prompt_logprobs",
+                "prompt_logprobs must be a non-negative value or -1."
+            );
+        }
+
+        if request.stream && (prompt_logprobs > 0 || prompt_logprobs == -1) {
+            bail_invalid_request!(
+                param = "prompt_logprobs",
+                "prompt_logprobs are not available when stream=true."
+            );
+        }
+    }
+
+    if let Some(tools) = request.tools.as_ref() {
+        validate_function_tools(tools, "tools")?;
+    }
+
+    for message in &request.messages {
+        if let ChatMessage::Developer {
+            tools: Some(tools), ..
+        } = message
+        {
+            validate_function_tools(tools, "messages[].tools")?;
+        }
+    }
+
+    if let Some(tool_choice) = &request.tool_choice {
+        match tool_choice {
+            ToolChoice::Value(ToolChoiceValue::Auto | ToolChoiceValue::None) => {}
+            ToolChoice::Value(ToolChoiceValue::Required) => {
+                bail_invalid_request!(
+                    param = "tool_choice",
+                    "tool_choice=required is not supported yet."
+                );
+            }
+            ToolChoice::Function { .. } => {
+                bail_invalid_request!(
+                    param = "tool_choice",
+                    "Named function tool_choice is not supported yet."
+                );
+            }
+            ToolChoice::AllowedTools { .. } => {
+                bail_invalid_request!(
+                    param = "tool_choice",
+                    "allowed_tools tool_choice is not supported yet."
+                );
+            }
+        }
+    }
+
+    if request.use_beam_search {
+        bail_invalid_request!(
+            param = "use_beam_search",
+            "use_beam_search is not supported."
+        );
+    }
+
+    // ---- Reject parameters that are accepted for deserialization but not yet
+    // implemented ----
+
+    if request.parallel_tool_calls.is_some() {
+        bail_invalid_request!(
+            param = "parallel_tool_calls",
+            "parallel_tool_calls is not supported."
+        );
+    }
+
+    reject_non_default(
+        request.length_penalty.as_ref(),
+        "length_penalty",
+        "length_penalty is not supported.",
+    )?;
+    if !request.spaces_between_special_tokens {
+        bail_invalid_request!(
+            param = "spaces_between_special_tokens",
+            "spaces_between_special_tokens is not supported."
+        );
+    }
+    reject_non_default(
+        request.truncate_prompt_tokens.as_ref(),
+        "truncate_prompt_tokens",
+        "truncate_prompt_tokens is not supported.",
+    )?;
+    reject_non_default(
+        request.thinking_token_budget.as_ref(),
+        "thinking_token_budget",
+        "thinking_token_budget is not supported.",
+    )?;
+    if !request.include_reasoning {
+        bail_invalid_request!(
+            param = "include_reasoning",
+            "include_reasoning is not supported."
+        );
+    }
+    reject_non_default(
+        request.media_io_kwargs.as_ref(),
+        "media_io_kwargs",
+        "media_io_kwargs is not supported.",
+    )?;
+    reject_non_default(
+        request.mm_processor_kwargs.as_ref(),
+        "mm_processor_kwargs",
+        "mm_processor_kwargs is not supported.",
+    )?;
+    reject_non_default(
+        request.repetition_detection.as_ref(),
+        "repetition_detection",
+        "repetition_detection is not supported.",
+    )?;
+
+    if let Some(options) = &request.stream_options
+        && options.continuous_usage_stats.is_some()
+    {
+        bail_invalid_request!(
+            param = "stream_options",
+            "continuous_usage_stats is not supported."
+        );
+    }
+
+    Ok(())
+}
+
+/// Reject one option unless it is entirely absent.
+fn reject_non_default<T>(
+    value: Option<&T>,
+    param: &'static str,
+    message: &str,
+) -> Result<(), ApiError> {
+    if value.is_some() {
+        bail_invalid_request!(param = param, "{}", message);
+    }
+    Ok(())
+}
+
+fn validate_function_tools(tools: &[Tool], param: &'static str) -> Result<(), ApiError> {
+    for tool in tools {
+        if tool.tool_type != "function" {
+            bail_invalid_request!(param = param, "Only function tools are supported.");
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use serde_json::json;
+    use vllm_chat::ReasoningEffort;
+
+    use super::validate_request_compat;
+    use crate::routes::openai::chat_completions::types::ChatCompletionRequest;
+    use crate::routes::openai::utils::structured_outputs::ResponseFormat;
+    use crate::routes::openai::utils::types::{
+        ChatMessage, Function, FunctionChoice, MessageContent, StringOrArray, Tool, ToolChoice,
+        ToolChoiceValue, ToolReference,
+    };
+
+    fn served(names: &[&str]) -> Vec<String> {
+        names.iter().map(|s| s.to_string()).collect()
+    }
+
+    fn base_request() -> ChatCompletionRequest {
+        ChatCompletionRequest {
+            model: "Qwen/Qwen1.5-0.5B-Chat".to_string(),
+            messages: vec![ChatMessage::User {
+                content: MessageContent::Text("hello".to_string()),
+                name: None,
+            }],
+            stream: true,
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_stop() {
+        let request = ChatCompletionRequest {
+            stop: Some(StringOrArray::String("stop".to_string())),
+            ..base_request()
+        };
+
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("stop strings should be accepted");
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_non_zero_penalties_and_function_tools() {
+        let request = ChatCompletionRequest {
+            frequency_penalty: Some(0.5),
+            presence_penalty: Some(0.25),
+            min_p: Some(0.2),
+            repetition_penalty: Some(1.1),
+            seed: Some(7),
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("sampling fields should be accepted");
+
+        let request = ChatCompletionRequest {
+            tools: Some(vec![Tool {
+                tool_type: "function".to_string(),
+                function: Function {
+                    name: "tool".to_string(),
+                    description: None,
+                    parameters: json!({}),
+                    strict: None,
+                },
+            }]),
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("function tools should be accepted");
+
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Developer {
+                content: MessageContent::Text("policy".to_string()),
+                tools: Some(vec![Tool {
+                    tool_type: "function".to_string(),
+                    function: Function {
+                        name: "tool".to_string(),
+                        description: None,
+                        parameters: json!({}),
+                        strict: None,
+                    },
+                }]),
+                name: None,
+            }],
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("developer function tools should be accepted");
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_non_function_developer_tools() {
+        let request = ChatCompletionRequest {
+            messages: vec![ChatMessage::Developer {
+                content: MessageContent::Text("policy".to_string()),
+                tools: Some(vec![Tool {
+                    tool_type: "mcp".to_string(),
+                    function: Function {
+                        name: "tool".to_string(),
+                        description: None,
+                        parameters: json!({}),
+                        strict: None,
+                    },
+                }]),
+                name: None,
+            }],
+            ..base_request()
+        };
+
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_output_logprobs() {
+        let request = ChatCompletionRequest {
+            logprobs: true,
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("logprobs should be accepted");
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_reasoning_effort() {
+        let request = ChatCompletionRequest {
+            reasoning_effort: Some(ReasoningEffort::Max),
+            chat_template_kwargs: Some(HashMap::from([(
+                "reasoning_effort".to_string(),
+                json!("low"),
+            )])),
+            ..base_request()
+        };
+
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("reasoning_effort should be accepted");
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_top_logprobs_without_logprobs() {
+        let request = ChatCompletionRequest {
+            top_logprobs: Some(0),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_streaming_prompt_logprobs_requests() {
+        let request = ChatCompletionRequest {
+            prompt_logprobs: Some(1),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+
+        let request = ChatCompletionRequest {
+            prompt_logprobs: Some(-1),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_invalid_prompt_logprobs_value() {
+        let request = ChatCompletionRequest {
+            stream: false,
+            prompt_logprobs: Some(-2),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_response_format() {
+        let request = ChatCompletionRequest {
+            response_format: Some(ResponseFormat::Text),
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("response_format=text should be accepted");
+
+        let request = ChatCompletionRequest {
+            response_format: Some(ResponseFormat::JsonObject),
+            ..base_request()
+        };
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("response_format=json_object should be accepted");
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_noop_tool_choice_none() {
+        let request = ChatCompletionRequest {
+            tool_choice: Some(ToolChoice::Value(ToolChoiceValue::None)),
+            ..base_request()
+        };
+
+        validate_request_compat(&request, &served(&["Qwen/Qwen1.5-0.5B-Chat"]))
+            .expect("tool_choice=none is ok");
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_required_and_named_tool_choices() {
+        let required = ChatCompletionRequest {
+            tool_choice: Some(ToolChoice::Value(ToolChoiceValue::Required)),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&required, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+
+        let named = ChatCompletionRequest {
+            tool_choice: Some(ToolChoice::Function {
+                tool_type: "function".to_string(),
+                function: FunctionChoice {
+                    name: "tool".to_string(),
+                },
+            }),
+            ..base_request()
+        };
+        assert!(validate_request_compat(&named, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err());
+
+        let allowed_tools = ChatCompletionRequest {
+            tool_choice: Some(ToolChoice::AllowedTools {
+                tool_type: "allowed_tools".to_string(),
+                mode: "auto".to_string(),
+                tools: vec![ToolReference::Function {
+                    name: "tool".to_string(),
+                }],
+            }),
+            ..base_request()
+        };
+        assert!(
+            validate_request_compat(&allowed_tools, &served(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err()
+        );
+    }
+}
diff --git a/rust/src/server/src/routes/openai/completions.rs b/rust/src/server/src/routes/openai/completions.rs
new file mode 100644
index 000000000000..33813e67687f
--- /dev/null
+++ b/rust/src/server/src/routes/openai/completions.rs
@@ -0,0 +1,571 @@
+mod convert;
+mod types;
+mod validate;
+
+use std::convert::Infallible;
+use std::result::Result;
+use std::sync::Arc;
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use axum::Json;
+use axum::extract::State;
+use axum::http::HeaderMap;
+use axum::response::sse::{Event, Sse};
+use axum::response::{IntoResponse, Response};
+use futures::{Stream, StreamExt as _, pin_mut};
+use thiserror_ext::AsReport as _;
+use tracing::{debug, error, info, trace};
+use tracing_futures::Instrument as _;
+use vllm_text::{DecodedTextEvent, FinishReason, TextOutputStream, TextOutputStreamExt as _};
+
+use super::utils::logprobs::{
+    collected_logprobs_to_openai, decoded_logprobs_to_openai, decoded_prompt_logprobs_to_maps,
+    text_len,
+};
+use super::utils::types::Usage;
+use crate::error::{ApiError, bail_server_error, server_error};
+use crate::routes::openai::completions::convert::prepare_completion_request;
+use crate::routes::openai::completions::types::{
+    CompletionChoice, CompletionRequest, CompletionResponse, CompletionSseChunk,
+    CompletionStreamChoice, CompletionStreamResponse,
+};
+use crate::routes::openai::utils::types::LogProbs;
+use crate::routes::openai::utils::validated_json::ValidatedJson;
+use crate::state::AppState;
+use crate::utils::{resolve_request_context, unix_timestamp};
+
+/// Validate one completions request and proxy it into the shared `vllm-text`
+/// stack.
+pub async fn completions(
+    State(state): State<Arc<AppState>>,
+    headers: HeaderMap,
+    ValidatedJson(body): ValidatedJson<CompletionRequest>,
+) -> Response {
+    let stream = body.stream;
+    let logprobs = body.logprobs;
+    let request_context = resolve_request_context(&headers, body.request_id.as_deref());
+
+    let prepared =
+        match prepare_completion_request(body, state.served_model_names(), request_context) {
+            Ok(prepared) => prepared,
+            Err(error) => return error.into_response(),
+        };
+    let request_span = tracing::info_span!(
+        "completions",
+        request_id = %prepared.request_id,
+        engine_request_id = tracing::field::Empty,
+    );
+
+    let created = unix_timestamp();
+    let include_prompt_logprobs = prepared.text_request.sampling_params.prompt_logprobs.is_some();
+    let log_request = state.enable_log_requests;
+
+    let text_stream = match state
+        .chat
+        .text()
+        .generate(prepared.text_request)
+        .instrument(request_span.clone())
+        .await
+    {
+        Ok(stream) => stream,
+        Err(error) => {
+            return server_error!(
+                "failed to submit completion request: {}",
+                error.to_report_string()
+            )
+            .into_response();
+        }
+    };
+
+    if stream {
+        let chunk_stream = completion_chunk_stream(
+            text_stream,
+            prepared.request_id,
+            prepared.response_model,
+            created,
+            log_request,
+            prepared.include_usage,
+            prepared.echo,
+            logprobs,
+            prepared.return_token_ids,
+            prepared.return_tokens_as_token_ids,
+        );
+        let sse_stream = completion_sse_stream(chunk_stream).instrument(request_span);
+
+        Sse::new(sse_stream).into_response()
+    } else {
+        let response = match collect_completion(
+            text_stream,
+            prepared.request_id,
+            prepared.response_model,
+            created,
+            prepared.echo,
+            logprobs,
+            include_prompt_logprobs,
+            prepared.return_token_ids,
+            prepared.return_tokens_as_token_ids,
+        )
+        .instrument(request_span.clone())
+        .await
+        {
+            Ok(response) => response,
+            Err(error) => return error.into_response(),
+        };
+
+        if log_request {
+            let usage = response.usage.as_ref();
+            info!(
+                parent: &request_span,
+                model = %response.model,
+                prompt_tokens = usage.map_or(0, |u| u.prompt_tokens),
+                output_tokens = usage.and_then(|u| u.completion_tokens).unwrap_or(0),
+                finish_reason = response.choices.first().and_then(|c| c.finish_reason.as_deref()).unwrap_or("unknown"),
+                "completion finished"
+            );
+        }
+
+        Json(response).into_response()
+    }
+}
+
+async fn collect_completion(
+    stream: impl TextOutputStream,
+    request_id: String,
+    response_model: String,
+    created: u64,
+    echo: Option<String>,
+    requested_logprobs: Option<u32>,
+    include_prompt_logprobs: bool,
+    return_token_ids: bool,
+    return_tokens_as_token_ids: bool,
+) -> Result<CompletionResponse, ApiError> {
+    let collected = stream
+        .collect_output()
+        .await
+        .map_err(|error| server_error!("completion stream failed: {}", error.to_report_string()))?;
+    let finish_reason = collected.finish_reason.clone();
+    let stop_reason = finish_reason
+        .as_stop_reason()
+        .map(|sr| serde_json::to_value(sr).expect("StopReason must serialize to JSON"));
+
+    let prompt_char_count = echo.as_ref().map(|prompt| text_len(prompt)).unwrap_or_default();
+    let prompt_logprobs = if include_prompt_logprobs {
+        let prompt_logprobs = collected.prompt_logprobs.as_ref().ok_or_else(|| {
+            server_error!(
+                "completion response requested prompt_logprobs but generation returned none"
+            )
+        })?;
+        Some(prompt_logprobs)
+    } else {
+        None
+    };
+    let logprobs = if requested_logprobs.is_some() {
+        Some(collected_logprobs_to_openai(
+            &collected,
+            echo.is_some(),
+            prompt_char_count,
+            return_tokens_as_token_ids,
+        )?)
+    } else {
+        None
+    };
+    let prompt_logprobs =
+        prompt_logprobs.map(|lp| decoded_prompt_logprobs_to_maps(lp, return_tokens_as_token_ids));
+    let text = match &echo {
+        None => collected.text,
+        Some(prompt) => format!("{prompt}{}", collected.text),
+    };
+
+    Ok(CompletionResponse {
+        id: request_id,
+        object: "text_completion".to_string(),
+        created,
+        model: response_model,
+        choices: vec![CompletionChoice {
+            index: 0,
+            text,
+            logprobs,
+            finish_reason: Some(completion_finish_reason_to_openai(finish_reason)?.into()),
+            stop_reason,
+            prompt_logprobs,
+            token_ids: return_token_ids.then(|| collected.token_ids.clone()),
+            prompt_token_ids: return_token_ids.then(|| collected.prompt_token_ids.to_vec()),
+        }],
+        usage: Some(Usage::from_counts(
+            collected.prompt_token_ids.len() as u32,
+            collected.token_ids.len() as u32,
+        )),
+        system_fingerprint: None,
+        kv_transfer_params: collected.kv_transfer_params,
+    })
+}
+
+/// Convert one internal decoded-text stream into OpenAI completions chunks.
+#[try_stream]
+async fn completion_chunk_stream(
+    stream: impl TextOutputStream,
+    request_id: String,
+    response_model: String,
+    created: u64,
+    log_request: bool,
+    include_usage: bool,
+    echo: Option<String>,
+    requested_logprobs: Option<u32>,
+    return_token_ids: bool,
+    return_tokens_as_token_ids: bool,
+    mut y: TryYielder<CompletionSseChunk, ApiError>,
+) -> Result<(), ApiError> {
+    pin_mut!(stream);
+    let mut visible_text_len = 0_u32;
+    let mut first_chunk = true;
+
+    while let Some(next) = stream.next().await {
+        match next {
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids, ..
+            }) => {
+                debug!("completion stream started");
+                if let Some(prompt) = echo.as_ref() {
+                    visible_text_len = text_len(prompt);
+                    let mut chunk =
+                        delta_chunk(&request_id, &response_model, created, prompt.clone(), None);
+                    if return_token_ids && first_chunk {
+                        if let Some(choice) = chunk.choices.first_mut() {
+                            choice.prompt_token_ids = Some(prompt_token_ids.to_vec());
+                        }
+                        first_chunk = false;
+                    }
+                    y.yield_ok(CompletionSseChunk::Chunk(chunk)).await;
+                } else if return_token_ids {
+                    // Emit a chunk with prompt_token_ids in the first streaming response
+                    let mut chunk =
+                        delta_chunk(&request_id, &response_model, created, String::new(), None);
+                    if let Some(choice) = chunk.choices.first_mut() {
+                        choice.prompt_token_ids = Some(prompt_token_ids.to_vec());
+                    }
+                    first_chunk = false;
+                    y.yield_ok(CompletionSseChunk::Chunk(chunk)).await;
+                }
+            }
+            Ok(DecodedTextEvent::TextDelta {
+                delta,
+                token_ids,
+                logprobs,
+                finished,
+            }) => {
+                let delta_text_len = text_len(&delta);
+                let logprobs = if requested_logprobs.is_some() {
+                    let decoded_logprobs = logprobs.as_ref().ok_or_else(|| {
+                        server_error!(
+                            "completion stream requested logprobs but generation returned none"
+                        )
+                    })?;
+                    Some(decoded_logprobs_to_openai(
+                        decoded_logprobs,
+                        visible_text_len,
+                        return_tokens_as_token_ids,
+                    )?)
+                } else {
+                    None
+                };
+                let mut chunk = delta_chunk(&request_id, &response_model, created, delta, logprobs);
+                if return_token_ids && let Some(choice) = chunk.choices.first_mut() {
+                    choice.token_ids = Some(token_ids);
+                }
+                y.yield_ok(CompletionSseChunk::Chunk(chunk)).await;
+                visible_text_len = visible_text_len.saturating_add(delta_text_len);
+
+                if let Some(finished) = finished {
+                    if log_request {
+                        info!(
+                            stream = true,
+                            model = %response_model,
+                            prompt_tokens = finished.prompt_token_count,
+                            output_tokens = finished.output_token_count,
+                            finish_reason = finished.finish_reason.as_str(),
+                            "completion finished"
+                        );
+                    }
+                    y.yield_ok(CompletionSseChunk::Chunk(final_chunk(
+                        &request_id,
+                        &response_model,
+                        created,
+                        finished.finish_reason,
+                    )?))
+                    .await;
+
+                    if include_usage {
+                        y.yield_ok(CompletionSseChunk::Usage(usage_chunk(
+                            &request_id,
+                            &response_model,
+                            created,
+                            Usage::from_counts(
+                                finished.prompt_token_count as u32,
+                                finished.output_token_count as u32,
+                            ),
+                        )))
+                        .await;
+                    }
+                }
+            }
+            Err(error) => {
+                error!(
+                    error = %error.as_report(),
+                    "completion stream failed"
+                );
+                bail_server_error!("{}", error.to_report_string());
+            }
+        }
+    }
+    Ok(())
+}
+
+fn delta_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    text: String,
+    logprobs: Option<LogProbs>,
+) -> CompletionStreamResponse {
+    let mut chunk = CompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(CompletionStreamChoice {
+        text,
+        logprobs,
+        ..Default::default()
+    });
+    chunk
+}
+
+fn final_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    finish_reason: FinishReason,
+) -> Result<CompletionStreamResponse, ApiError> {
+    let finish_reason = completion_finish_reason_to_openai(finish_reason)?;
+
+    let mut chunk = CompletionStreamResponse::new(request_id, response_model, created);
+    chunk.choices.push(CompletionStreamChoice {
+        finish_reason: Some(finish_reason.to_string()),
+        ..Default::default()
+    });
+    Ok(chunk)
+}
+
+fn completion_finish_reason_to_openai(
+    finish_reason: FinishReason,
+) -> Result<&'static str, ApiError> {
+    match finish_reason {
+        FinishReason::Stop(_) | FinishReason::Repetition => Ok("stop"),
+        FinishReason::Length => Ok("length"),
+        FinishReason::Abort => Ok("abort"),
+        FinishReason::Error => {
+            bail_server_error!("Internal server error");
+        }
+    }
+}
+
+fn usage_chunk(
+    request_id: &str,
+    response_model: &str,
+    created: u64,
+    usage: Usage,
+) -> CompletionStreamResponse {
+    let mut chunk = CompletionStreamResponse::new(request_id, response_model, created);
+    chunk.usage = Some(usage);
+    chunk
+}
+
+/// Convert one chunk stream into OpenAI-style SSE events.
+///
+/// OpenAI-style streaming errors are encoded as ordinary `data: {"error": ...}`
+/// events followed by `data: [DONE]`, so the transport stream itself stays
+/// infallible even when generation fails after the HTTP response has started.
+#[try_stream]
+async fn completion_sse_stream(
+    stream: impl Stream<Item = Result<CompletionSseChunk, ApiError>>,
+    mut y: TryYielder<Event, Infallible>,
+) -> Result<(), Infallible> {
+    pin_mut!(stream);
+
+    while let Some(next) = stream.next().await {
+        match next {
+            Ok(chunk) => y.yield_ok(to_sse_event(&chunk)).await,
+            Err(error) => {
+                y.yield_ok(to_error_sse_event(&error)).await;
+                break;
+            }
+        }
+    }
+
+    y.yield_ok(done_sse_event()).await;
+    Ok(())
+}
+
+/// Serialize one OpenAI chunk payload into one SSE `data:` event.
+fn to_sse_event(chunk: &CompletionSseChunk) -> Event {
+    let payload = serde_json::to_string(chunk).expect("completion chunk must serialize to JSON");
+    trace!(payload, "completion emitting chunk");
+    Event::default().data(payload)
+}
+
+/// Serialize one OpenAI error payload into one SSE `data:` event.
+fn to_error_sse_event(error: &ApiError) -> Event {
+    let payload = serde_json::to_string(&error.to_error_response())
+        .expect("ErrorResponse must serialize to JSON");
+    trace!(payload, "completion emitting error");
+    Event::default().data(payload)
+}
+
+/// Build the terminal OpenAI SSE sentinel event.
+fn done_sse_event() -> Event {
+    trace!("completion emitting done");
+    Event::default().data("[DONE]")
+}
+
+#[cfg(test)]
+mod tests {
+    use futures::{StreamExt as _, stream};
+    use itertools::Itertools as _;
+    use vllm_text::{
+        DecodedLogprobs, DecodedPositionLogprobs, DecodedTextEvent, DecodedTokenLogprob,
+        FinishReason, Finished,
+    };
+
+    use super::{CompletionSseChunk, completion_chunk_stream, final_chunk};
+
+    #[test]
+    fn final_chunk_maps_stop_finish_reason() {
+        let chunk = final_chunk("cmpl-1", "model", 1, FinishReason::stop_eos())
+            .expect("finish reason valid");
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("stop"));
+        assert_eq!(chunk.choices[0].text, "");
+    }
+
+    #[test]
+    fn final_chunk_maps_length_finish_reason() {
+        let chunk =
+            final_chunk("cmpl-1", "model", 1, FinishReason::Length).expect("finish reason valid");
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("length"));
+    }
+
+    #[test]
+    fn final_chunk_maps_abort_finish_reason() {
+        let chunk =
+            final_chunk("cmpl-1", "model", 1, FinishReason::Abort).expect("finish reason valid");
+        assert_eq!(chunk.choices[0].finish_reason.as_deref(), Some("abort"));
+    }
+
+    #[test]
+    fn final_chunk_rejects_error_finish_reason() {
+        assert!(final_chunk("cmpl-1", "model", 1, FinishReason::Error).is_err());
+    }
+
+    #[tokio::test]
+    async fn completion_chunk_stream_maps_streaming_logprobs() {
+        let stream = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![1, 2, 3, 4, 5].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "h".to_string(),
+                token_ids: vec![b'h' as u32],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![
+                            DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "h".to_string(),
+                                logprob: -0.1,
+                                rank: 1,
+                            },
+                            DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "H".to_string(),
+                                logprob: -0.2,
+                                rank: 1,
+                            },
+                        ],
+                    }],
+                }),
+                finished: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: String::new(),
+                token_ids: vec![b'!' as u32],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![DecodedPositionLogprobs {
+                        entries: vec![
+                            DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "!".to_string(),
+                                logprob: -0.3,
+                                rank: 1,
+                            },
+                            DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "?".to_string(),
+                                logprob: -0.4,
+                                rank: 1,
+                            },
+                        ],
+                    }],
+                }),
+                finished: Some(Finished {
+                    prompt_token_count: 5,
+                    output_token_count: 2,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                }),
+            }),
+        ]);
+
+        let chunks = completion_chunk_stream(
+            stream,
+            "cmpl-1".to_string(),
+            "model".to_string(),
+            1,
+            false,
+            false,
+            None,
+            Some(1),
+            false,
+            false,
+        )
+        .collect::<Vec<_>>()
+        .await;
+
+        let chunks: Vec<_> = chunks.into_iter().try_collect().expect("stream should succeed");
+
+        match &chunks[0] {
+            CompletionSseChunk::Chunk(chunk) => {
+                assert_eq!(chunk.choices[0].text, "h");
+                assert_eq!(
+                    chunk.choices[0].logprobs.as_ref().expect("logprobs").tokens,
+                    vec!["h".to_string()]
+                );
+                assert_eq!(
+                    chunk.choices[0].logprobs.as_ref().expect("logprobs").text_offset,
+                    vec![0]
+                );
+            }
+            CompletionSseChunk::Usage(_) => panic!("expected regular chunk"),
+        }
+
+        match &chunks[1] {
+            CompletionSseChunk::Chunk(chunk) => {
+                assert_eq!(chunk.choices[0].text, "");
+                assert_eq!(
+                    chunk.choices[0].logprobs.as_ref().expect("logprobs").tokens,
+                    vec!["!".to_string()]
+                );
+                assert_eq!(
+                    chunk.choices[0].logprobs.as_ref().expect("logprobs").text_offset,
+                    vec![1]
+                );
+            }
+            CompletionSseChunk::Usage(_) => panic!("expected regular chunk"),
+        }
+    }
+}
diff --git a/rust/src/server/src/routes/openai/completions/convert.rs b/rust/src/server/src/routes/openai/completions/convert.rs
new file mode 100644
index 000000000000..066c4c046f4f
--- /dev/null
+++ b/rust/src/server/src/routes/openai/completions/convert.rs
@@ -0,0 +1,352 @@
+use vllm_text::{SamplingParams, TextDecodeOptions, TextRequest};
+
+use super::types::CompletionRequest;
+use crate::error::ApiError;
+use crate::routes::openai::completions::validate;
+use crate::routes::openai::utils::structured_outputs::convert_from_response_format_value;
+use crate::utils::{ResolvedRequestContext, convert_logit_bias, merge_kv_transfer_params};
+
+/// Lowered completion request plus the public response metadata carried by
+/// every SSE chunk.
+#[derive(Debug, Clone, PartialEq)]
+pub struct PreparedRequest {
+    /// Stable OpenAI-style request ID, reused as the external text request ID.
+    pub request_id: String,
+    /// Public model ID echoed back to the client.
+    pub response_model: String,
+    /// Whether the caller asked for the final streamed usage chunk.
+    pub include_usage: bool,
+    /// Lowered text request for the shared `vllm-text` facade.
+    pub text_request: TextRequest,
+    /// Original text prompt that should be echoed back northbound when
+    /// `echo=true`.
+    pub echo: Option<String>,
+    /// Whether to include token IDs alongside generated text.
+    pub return_token_ids: bool,
+    /// Whether to format logprob tokens as `token_id:{id}`.
+    pub return_tokens_as_token_ids: bool,
+}
+
+/// Validate and lower one OpenAI completions request into the internal
+/// text-generation format.
+///
+/// `served_model_names` must be non-empty; the first entry is used as the
+/// `model` field in responses.
+pub(crate) fn prepare_completion_request(
+    request: CompletionRequest,
+    served_model_names: &[String],
+    ctx: ResolvedRequestContext,
+) -> Result<PreparedRequest, ApiError> {
+    validate::validate_request_compat(&request, served_model_names)?;
+
+    let request_id = format!("cmpl-{}", ctx.request_id);
+
+    let logprobs = match request.logprobs {
+        Some(logprobs) => Some(i32::try_from(logprobs).map_err(|_| {
+            ApiError::invalid_request(
+                "`logprobs` must fit within a signed 32-bit integer.".to_string(),
+                Some("logprobs"),
+            )
+        })?),
+        None => None,
+    };
+    let prompt_logprobs = request.prompt_logprobs.or(if request.echo && !request.stream {
+        logprobs
+    } else {
+        None
+    });
+    let include_usage = (request.stream_options.as_ref())
+        .and_then(|options| options.include_usage)
+        .unwrap_or(false);
+    let echo = request.echo.then(|| request.prompt.as_text().cloned()).flatten();
+
+    let structured_outputs =
+        convert_from_response_format_value(&request.response_format, &request.structured_outputs)?;
+
+    let text_request = TextRequest {
+        request_id: request_id.clone(),
+        prompt: request.prompt,
+        mm_features: None,
+        sampling_params: SamplingParams {
+            temperature: request.temperature,
+            top_p: request.top_p,
+            top_k: request.top_k,
+            seed: request.seed,
+            max_tokens: request.max_tokens,
+            min_tokens: request.min_tokens,
+            logprobs,
+            prompt_logprobs,
+            min_p: request.min_p,
+            frequency_penalty: request.frequency_penalty,
+            presence_penalty: request.presence_penalty,
+            repetition_penalty: request.repetition_penalty,
+            stop_token_ids: request.stop_token_ids,
+            ignore_eos: request.ignore_eos,
+            logit_bias: convert_logit_bias(request.logit_bias)?,
+            allowed_token_ids: request.allowed_token_ids,
+            bad_words: None,
+            logprob_token_ids: None,
+            structured_outputs,
+            skip_reading_prefix_cache: None,
+            vllm_xargs: merge_kv_transfer_params(
+                request.vllm_xargs,
+                request.kv_transfer_params.as_ref(),
+            ),
+        },
+        decode_options: TextDecodeOptions {
+            skip_special_tokens: request.skip_special_tokens,
+            include_stop_str_in_output: request.include_stop_str_in_output,
+            stop_strings: request.stop.map(|stop| stop.into_vec()),
+            min_tokens: request.min_tokens.unwrap_or(0),
+        },
+        intermediate: request.stream,
+        priority: request.priority.unwrap_or(0),
+        cache_salt: request.cache_salt,
+        add_special_tokens: request.add_special_tokens,
+        data_parallel_rank: ctx.data_parallel_rank,
+    };
+
+    Ok(PreparedRequest {
+        request_id,
+        response_model: served_model_names.first().cloned().unwrap_or_default(),
+        include_usage,
+        text_request,
+        echo,
+        return_token_ids: request.return_token_ids.unwrap_or(false),
+        return_tokens_as_token_ids: request.return_tokens_as_token_ids.unwrap_or(false),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use axum::http::HeaderMap;
+    use serde_json::json;
+    use vllm_text::Prompt;
+
+    use super::prepare_completion_request;
+    use crate::routes::openai::completions::types::CompletionRequest;
+    use crate::utils::{ResolvedRequestContext, resolve_request_context};
+
+    fn request_context(headers: &HeaderMap, request_id: Option<&str>) -> ResolvedRequestContext {
+        resolve_request_context(headers, request_id)
+    }
+
+    fn served(names: &[&str]) -> Vec<String> {
+        names.iter().map(|s| s.to_string()).collect()
+    }
+
+    fn base_request_json() -> serde_json::Value {
+        json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": true
+        })
+    }
+
+    #[test]
+    fn completion_http_request_deserializes_text_prompt() {
+        let request: CompletionRequest =
+            serde_json::from_value(base_request_json()).expect("parse request");
+
+        assert_eq!(request.prompt, Prompt::Text("hello".to_string()));
+        assert_eq!(request.model, "Qwen/Qwen1.5-0.5B-Chat");
+    }
+
+    #[test]
+    fn completion_http_request_deserializes_token_id_prompt() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": [11, 22, 33],
+            "stream": true,
+            "ignore_eos": true,
+            "max_tokens": 7
+        }))
+        .expect("parse request");
+
+        assert_eq!(request.prompt, Prompt::TokenIds(vec![11, 22, 33]));
+        assert_eq!(request.max_tokens, Some(7));
+        assert!(request.ignore_eos);
+    }
+
+    #[test]
+    fn prepare_completion_request_maps_sampling_fields() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": [11, 22, 33],
+            "stream": true,
+            "stream_options": {"include_usage": true},
+            "max_tokens": 7,
+            "logprobs": 2,
+            "top_p": 0.9,
+            "top_k": 42,
+            "min_p": 0.1,
+            "frequency_penalty": 0.2,
+            "presence_penalty": 0.3,
+            "repetition_penalty": 1.1,
+            "ignore_eos": true,
+            "skip_special_tokens": false
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+
+        assert!(prepared.include_usage);
+        assert_eq!(
+            prepared.text_request.prompt,
+            Prompt::TokenIds(vec![11, 22, 33])
+        );
+        assert_eq!(prepared.text_request.sampling_params.max_tokens, Some(7));
+        assert_eq!(prepared.text_request.sampling_params.logprobs, Some(2));
+        assert_eq!(prepared.text_request.sampling_params.top_p, Some(0.9));
+        assert_eq!(prepared.text_request.sampling_params.top_k, Some(42));
+        assert_eq!(prepared.text_request.sampling_params.min_p, Some(0.1));
+        assert_eq!(
+            prepared.text_request.sampling_params.frequency_penalty,
+            Some(0.2)
+        );
+        assert_eq!(
+            prepared.text_request.sampling_params.presence_penalty,
+            Some(0.3)
+        );
+        assert_eq!(
+            prepared.text_request.sampling_params.repetition_penalty,
+            Some(1.1)
+        );
+        assert!(prepared.text_request.sampling_params.ignore_eos);
+        assert!(!prepared.text_request.decode_options.skip_special_tokens);
+    }
+
+    #[test]
+    fn prepare_completion_request_accepts_text_echo() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": true,
+            "echo": true,
+            "max_tokens": 7
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+
+        assert_eq!(prepared.echo, Some("hello".to_string()));
+        assert_eq!(prepared.text_request.sampling_params.max_tokens, Some(7));
+    }
+
+    #[test]
+    fn prepare_completion_request_enables_prompt_logprobs_for_non_stream_echo() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "echo": true,
+            "stream": false,
+            "logprobs": 3
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+
+        assert_eq!(prepared.text_request.sampling_params.logprobs, Some(3));
+        assert_eq!(
+            prepared.text_request.sampling_params.prompt_logprobs,
+            Some(3)
+        );
+    }
+
+    #[test]
+    fn prepare_completion_request_rejects_token_id_prompt_echo() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": [11, 22, 33],
+            "stream": true,
+            "echo": true
+        }))
+        .expect("parse request");
+
+        assert!(
+            prepare_completion_request(
+                request,
+                &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+                ResolvedRequestContext::default(),
+            )
+            .is_err()
+        );
+    }
+
+    #[test]
+    fn prepare_completion_request_accepts_logprobs_fields() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": false,
+            "logprobs": 1,
+            "prompt_logprobs": 2
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+        assert_eq!(prepared.text_request.sampling_params.logprobs, Some(1));
+        assert_eq!(
+            prepared.text_request.sampling_params.prompt_logprobs,
+            Some(2)
+        );
+    }
+
+    #[test]
+    fn prepare_completion_request_threads_data_parallel_rank() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": false,
+        }))
+        .expect("parse request");
+
+        let mut headers = HeaderMap::new();
+        headers.insert("X-data-parallel-rank", "3".parse().unwrap());
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            request_context(&headers, None),
+        )
+        .expect("prepare");
+        assert_eq!(prepared.text_request.data_parallel_rank, Some(3));
+    }
+
+    #[test]
+    fn prepare_completion_request_leaves_data_parallel_rank_none_when_absent() {
+        let request: CompletionRequest = serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": false,
+        }))
+        .expect("parse request");
+
+        let prepared = prepare_completion_request(
+            request,
+            &served(&["Qwen/Qwen1.5-0.5B-Chat"]),
+            ResolvedRequestContext::default(),
+        )
+        .expect("prepare");
+        assert_eq!(prepared.text_request.data_parallel_rank, None);
+    }
+}
diff --git a/rust/src/server/src/routes/openai/completions/types.rs b/rust/src/server/src/routes/openai/completions/types.rs
new file mode 100644
index 000000000000..adc8a7ba7cbd
--- /dev/null
+++ b/rust/src/server/src/routes/openai/completions/types.rs
@@ -0,0 +1,253 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+use serde_json::{Map, Value};
+use validator::Validate;
+use vllm_text::Prompt;
+
+use crate::routes::openai::utils::types::{
+    LogProbs, Normalizable, StreamOptions, StringOrArray, Usage, default_true, validate_stop,
+};
+
+/// Serde default for `CompletionRequest::max_tokens`, matching the Python vLLM
+/// / OpenAI default.
+fn default_completion_max_tokens() -> Option<u32> {
+    Some(16)
+}
+
+/// vLLM-compatible request type for the Completions API.
+///
+/// Mirrors the Python vLLM `CompletionRequest` class. The local copy keeps the
+/// request type route-owned so we can accept token-id prompts via
+/// [`vllm_text::Prompt`] and add vLLM-only fields directly instead of layering
+/// wrapper deserializers on top.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
+pub struct CompletionRequest {
+    // -------- Standard OpenAI API Parameters --------
+    /// ID of the model to use
+    pub model: String,
+
+    /// The prompt(s) to generate completions for.
+    ///
+    /// We use [`vllm_text::Prompt`] here to support token-id input.
+    pub prompt: Prompt,
+
+    /// Echo back the prompt in addition to the completion
+    #[serde(default)]
+    pub echo: bool,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based
+    /// on their existing frequency in the text so far
+    pub frequency_penalty: Option<f32>,
+
+    /// Modify the likelihood of specified tokens appearing in the completion
+    pub logit_bias: Option<HashMap<String, f32>>,
+
+    /// Include the log probabilities on the logprobs most likely tokens
+    pub logprobs: Option<u32>,
+
+    /// The maximum number of tokens to generate (defaults to 16 when absent,
+    /// matching the Python vLLM / OpenAI API convention)
+    #[serde(default = "default_completion_max_tokens")]
+    pub max_tokens: Option<u32>,
+
+    /// How many completions to generate for each prompt
+    pub n: Option<u32>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based
+    /// on whether they appear in the text so far
+    pub presence_penalty: Option<f32>,
+
+    /// If specified, our system will make a best effort to sample
+    /// deterministically
+    pub seed: Option<i64>,
+
+    /// Up to 4 sequences where the API will stop generating further tokens
+    #[validate(custom(function = "validate_stop"))]
+    pub stop: Option<StringOrArray>,
+
+    /// Whether to stream back partial progress
+    #[serde(default)]
+    pub stream: bool,
+
+    /// The suffix that comes after a completion of inserted text
+    pub suffix: Option<String>,
+
+    /// What sampling temperature to use, between 0 and 2
+    pub temperature: Option<f32>,
+
+    /// An alternative to sampling with temperature (nucleus sampling)
+    pub top_p: Option<f32>,
+
+    /// A unique identifier representing your end-user
+    pub user: Option<String>,
+
+    // -------- vLLM Sampling Parameters --------
+    /// Options for streaming response
+    pub stream_options: Option<StreamOptions>,
+
+    /// Use beam search instead of sampling
+    #[serde(default)]
+    pub use_beam_search: bool,
+
+    /// Top-k sampling parameter
+    pub top_k: Option<u32>,
+
+    /// Min-p nucleus sampling parameter
+    pub min_p: Option<f32>,
+
+    /// Repetition penalty for reducing repetitive text
+    pub repetition_penalty: Option<f32>,
+
+    /// Length penalty for beam search
+    pub length_penalty: Option<f32>,
+
+    /// Specific token IDs to use as stop conditions
+    pub stop_token_ids: Option<Vec<u32>>,
+
+    /// Include stop string in output
+    #[serde(default)]
+    pub include_stop_str_in_output: bool,
+
+    /// Ignore end-of-sequence tokens during generation
+    #[serde(default)]
+    pub ignore_eos: bool,
+
+    /// Minimum number of tokens to generate
+    pub min_tokens: Option<u32>,
+
+    /// Skip special tokens during detokenization
+    #[serde(default = "default_true")]
+    pub skip_special_tokens: bool,
+
+    /// Add spaces between special tokens during detokenization
+    #[serde(default = "default_true")]
+    pub spaces_between_special_tokens: bool,
+
+    /// Truncate prompt tokens to this length
+    pub truncate_prompt_tokens: Option<i64>,
+
+    /// Restrict output to these token IDs only
+    pub allowed_token_ids: Option<Vec<u32>>,
+
+    /// Number of prompt logprobs to return
+    pub prompt_logprobs: Option<i32>,
+
+    // -------- Extra vLLM Parameters --------
+    /// Whether to add special tokens (e.g. BOS) to the prompt
+    #[serde(default = "default_true")]
+    pub add_special_tokens: bool,
+
+    /// Format specification for structured output (JSON mode, JSON schema,
+    /// etc.)
+    pub response_format: Option<Value>,
+
+    /// Additional kwargs for structured outputs
+    pub structured_outputs: Option<Value>,
+
+    /// Request scheduling priority (lower means earlier; default 0)
+    pub priority: Option<i32>,
+
+    /// External request ID used for response correlation.
+    pub request_id: Option<String>,
+
+    /// Tokens represented as strings of the form 'token_id:{token_id}' in
+    /// logprobs
+    pub return_tokens_as_token_ids: Option<bool>,
+
+    /// Include token IDs alongside generated text
+    pub return_token_ids: Option<bool>,
+
+    /// Salt for prefix cache isolation in multi-user environments
+    pub cache_salt: Option<String>,
+
+    /// KV transfer parameters for disaggregated serving
+    pub kv_transfer_params: Option<HashMap<String, Value>>,
+
+    /// Additional request parameters with string or numeric values for custom
+    /// extensions
+    pub vllm_xargs: Option<HashMap<String, Value>>,
+
+    /// Additional fields
+    #[serde(flatten)]
+    pub other: Map<String, Value>,
+}
+
+impl Normalizable for CompletionRequest {}
+
+/// Mirrors the Python vLLM `CompletionResponse` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct CompletionResponse {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<CompletionChoice>,
+    pub usage: Option<Usage>,
+    pub system_fingerprint: Option<String>,
+    pub kv_transfer_params: Option<Value>,
+}
+
+/// Mirrors the Python vLLM `CompletionResponseChoice` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct CompletionChoice {
+    pub index: u32,
+    pub text: String,
+    pub logprobs: Option<LogProbs>,
+    pub finish_reason: Option<String>,
+    pub stop_reason: Option<Value>,
+    pub prompt_logprobs: Option<Vec<Option<HashMap<String, f32>>>>,
+    pub token_ids: Option<Vec<u32>>,
+    pub prompt_token_ids: Option<Vec<u32>>,
+}
+
+/// Mirrors the Python vLLM `CompletionStreamResponse` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub(super) struct CompletionStreamResponse {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub model: String,
+    pub choices: Vec<CompletionStreamChoice>,
+    pub usage: Option<Usage>,
+}
+
+impl CompletionStreamResponse {
+    /// Create a stream response with the standard envelope fields pre-filled.
+    pub fn new(id: &str, model: &str, created: u64) -> Self {
+        Self {
+            id: id.to_string(),
+            object: "text_completion".to_string(),
+            created,
+            model: model.to_string(),
+            choices: Vec::new(),
+            usage: None,
+        }
+    }
+}
+
+/// Mirrors the Python vLLM `CompletionResponseStreamChoice` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Default, Serialize)]
+pub(super) struct CompletionStreamChoice {
+    pub index: u32,
+    pub text: String,
+    pub logprobs: Option<LogProbs>,
+    pub finish_reason: Option<String>,
+    pub stop_reason: Option<Value>,
+    pub token_ids: Option<Vec<u32>>,
+    pub prompt_token_ids: Option<Vec<u32>>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+#[serde(untagged)]
+pub(super) enum CompletionSseChunk {
+    /// Ordinary OpenAI completions delta/final chunk.
+    Chunk(CompletionStreamResponse),
+    /// Final usage chunk emitted before `[DONE]` when `include_usage=true`.
+    Usage(CompletionStreamResponse),
+}
diff --git a/rust/src/server/src/routes/openai/completions/validate.rs b/rust/src/server/src/routes/openai/completions/validate.rs
new file mode 100644
index 000000000000..a53609234b60
--- /dev/null
+++ b/rust/src/server/src/routes/openai/completions/validate.rs
@@ -0,0 +1,178 @@
+use vllm_text::Prompt;
+
+use super::types::CompletionRequest;
+use crate::error::{ApiError, bail_invalid_request};
+
+/// Enforce the minimal compatibility contract for the Rust OpenAI server.
+pub(super) fn validate_request_compat(
+    request: &CompletionRequest,
+    served_model_names: &[String],
+) -> Result<(), ApiError> {
+    // This path is intentionally scoped to the minimum surface needed by
+    // `vllm-bench` random workload compatibility, so unsupported legacy
+    // completions features fail early here.
+    if !served_model_names.iter().any(|n| n == &request.model) {
+        return Err(ApiError::model_not_found(request.model.clone()));
+    }
+
+    if request.stream_options.is_some() && !request.stream {
+        bail_invalid_request!(
+            param = "stream_options",
+            "stream_options are only supported when stream=true."
+        );
+    }
+
+    if request.n.unwrap_or(1) > 1 {
+        bail_invalid_request!(param = "n", "Only n=1 is supported.");
+    }
+
+    if request.max_tokens == Some(0) {
+        bail_invalid_request!(param = "max_tokens", "max_tokens must be greater than 0.");
+    }
+
+    if request.echo && matches!(request.prompt, Prompt::TokenIds(_)) {
+        bail_invalid_request!(
+            param = "echo",
+            "echo is not supported with token-ID prompts."
+        );
+    }
+
+    if request.suffix.is_some() {
+        bail_invalid_request!(param = "suffix", "suffix is not supported.");
+    }
+
+    if let Some(logprobs) = request.logprobs
+        && logprobs > i32::MAX as u32
+    {
+        bail_invalid_request!(
+            param = "logprobs",
+            "`logprobs` must fit within a signed 32-bit integer."
+        );
+    }
+
+    if let Some(prompt_logprobs) = request.prompt_logprobs {
+        if request.stream && (prompt_logprobs > 0 || prompt_logprobs == -1) {
+            bail_invalid_request!(
+                param = "prompt_logprobs",
+                "`prompt_logprobs` are not available when `stream=true`."
+            );
+        }
+
+        if prompt_logprobs < 0 && prompt_logprobs != -1 {
+            bail_invalid_request!(
+                param = "prompt_logprobs",
+                "`prompt_logprobs` must be a non-negative value or -1."
+            );
+        }
+    }
+
+    if request.use_beam_search {
+        bail_invalid_request!(
+            param = "use_beam_search",
+            "use_beam_search is not supported."
+        );
+    }
+
+    // ---- Reject parameters that are accepted for deserialization but not yet
+    // implemented ----
+
+    if request.length_penalty.is_some() {
+        bail_invalid_request!(param = "length_penalty", "length_penalty is not supported.");
+    }
+    if !request.spaces_between_special_tokens {
+        bail_invalid_request!(
+            param = "spaces_between_special_tokens",
+            "spaces_between_special_tokens is not supported."
+        );
+    }
+    if request.truncate_prompt_tokens.is_some() {
+        bail_invalid_request!(
+            param = "truncate_prompt_tokens",
+            "truncate_prompt_tokens is not supported."
+        );
+    }
+
+    if let Some(options) = &request.stream_options
+        && options.continuous_usage_stats.is_some()
+    {
+        bail_invalid_request!(
+            param = "stream_options",
+            "continuous_usage_stats is not supported."
+        );
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::validate_request_compat;
+    use crate::routes::openai::completions::types::CompletionRequest;
+
+    fn base_request() -> CompletionRequest {
+        serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "prompt": "hello",
+            "stream": true,
+        }))
+        .expect("parse request")
+    }
+
+    fn served_names(names: &[&str]) -> Vec<String> {
+        names.iter().map(|s| s.to_string()).collect()
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_logprobs() {
+        let request = CompletionRequest {
+            logprobs: Some(1),
+            ..base_request()
+        };
+        assert!(
+            validate_request_compat(&request, &served_names(&["Qwen/Qwen1.5-0.5B-Chat"])).is_ok()
+        );
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_any_served_name() {
+        let request = base_request();
+        assert!(
+            validate_request_compat(
+                &request,
+                &served_names(&["other-alias", "Qwen/Qwen1.5-0.5B-Chat"])
+            )
+            .is_ok()
+        );
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_unknown_model() {
+        let request = base_request();
+        assert!(validate_request_compat(&request, &served_names(&["other-model"])).is_err());
+    }
+
+    #[test]
+    fn validate_request_compat_rejects_streaming_prompt_logprobs() {
+        let request = CompletionRequest {
+            prompt_logprobs: Some(1),
+            ..base_request()
+        };
+        assert!(
+            validate_request_compat(&request, &served_names(&["Qwen/Qwen1.5-0.5B-Chat"])).is_err()
+        );
+    }
+
+    #[test]
+    fn validate_request_compat_accepts_non_stream_prompt_logprobs() {
+        let request = CompletionRequest {
+            stream: false,
+            prompt_logprobs: Some(-1),
+            ..base_request()
+        };
+        assert!(
+            validate_request_compat(&request, &served_names(&["Qwen/Qwen1.5-0.5B-Chat"])).is_ok()
+        );
+    }
+}
diff --git a/rust/src/server/src/routes/openai/mod.rs b/rust/src/server/src/routes/openai/mod.rs
new file mode 100644
index 000000000000..0cc8887dfab6
--- /dev/null
+++ b/rust/src/server/src/routes/openai/mod.rs
@@ -0,0 +1,8 @@
+pub mod chat_completions;
+mod completions;
+mod models;
+pub(crate) mod utils;
+
+pub use chat_completions::chat_completions;
+pub use completions::completions;
+pub use models::list_models;
diff --git a/rust/src/server/src/routes/openai/models.rs b/rust/src/server/src/routes/openai/models.rs
new file mode 100644
index 000000000000..42e3098fc9e7
--- /dev/null
+++ b/rust/src/server/src/routes/openai/models.rs
@@ -0,0 +1,24 @@
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::State;
+
+use crate::routes::openai::utils::types::{ListModelsResponse, ModelObject};
+use crate::state::AppState;
+
+/// Return all configured served model names in OpenAI `list models` format.
+pub async fn list_models(State(state): State<Arc<AppState>>) -> Json<ListModelsResponse> {
+    Json(ListModelsResponse {
+        object: "list".to_string(),
+        data: state
+            .served_model_names()
+            .iter()
+            .map(|name| ModelObject {
+                id: name.clone(),
+                object: "model".to_string(),
+                created: 0,
+                owned_by: "vllm-frontend-rs".to_string(),
+            })
+            .collect(),
+    })
+}
diff --git a/rust/src/server/src/routes/openai/utils/logprobs.rs b/rust/src/server/src/routes/openai/utils/logprobs.rs
new file mode 100644
index 000000000000..aab02e46ec60
--- /dev/null
+++ b/rust/src/server/src/routes/openai/utils/logprobs.rs
@@ -0,0 +1,256 @@
+use std::collections::HashMap;
+
+use itertools::Itertools as _;
+use vllm_text::{
+    CollectedTextOutput, DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs,
+    DecodedTokenLogprob,
+};
+
+use super::types::{ChatLogProbs, ChatLogProbsContent, LogProbs, TopLogProb};
+use crate::error::{ApiError, server_error};
+
+/// Convert decoded token-position logprobs into the OpenAI completions
+/// `logprobs` shape.
+pub fn decoded_logprobs_to_openai(
+    logprobs: &DecodedLogprobs,
+    initial_text_offset: u32,
+    return_tokens_as_token_ids: bool,
+) -> Result<LogProbs, ApiError> {
+    let mut text_offset = Vec::with_capacity(logprobs.positions.len());
+    let mut token_logprobs = Vec::with_capacity(logprobs.positions.len());
+    let mut tokens = Vec::with_capacity(logprobs.positions.len());
+    let mut top_logprobs = Vec::with_capacity(logprobs.positions.len());
+    let mut current_offset = initial_text_offset;
+
+    for position in &logprobs.positions {
+        let chosen = position.entries.first().ok_or_else(|| {
+            server_error!("decoded logprobs position unexpectedly had no token candidates")
+        })?;
+
+        let token_str = format_token(chosen, return_tokens_as_token_ids);
+        text_offset.push(current_offset);
+        token_logprobs.push(Some(clamp_logprob(chosen.logprob)));
+        current_offset = current_offset.saturating_add(text_len(&token_str));
+        tokens.push(token_str);
+        top_logprobs.push(Some(position_top_logprobs_map(
+            position,
+            return_tokens_as_token_ids,
+        )));
+    }
+
+    Ok(LogProbs {
+        tokens,
+        token_logprobs,
+        top_logprobs,
+        text_offset,
+    })
+}
+
+/// Convert decoded prompt logprobs into the OpenAI completions `logprobs`
+/// shape.
+///
+/// The first prompt token is included with `None` logprob metadata, matching
+/// Python vLLM's echoed completions behavior.
+pub fn decoded_prompt_logprobs_to_openai(
+    prompt_logprobs: &DecodedPromptLogprobs,
+    initial_text_offset: u32,
+    return_tokens_as_token_ids: bool,
+) -> Result<LogProbs, ApiError> {
+    let mut text_offset = Vec::with_capacity(prompt_logprobs.scored_positions.len() + 1);
+    let mut token_logprobs = Vec::with_capacity(prompt_logprobs.scored_positions.len() + 1);
+    let mut tokens = Vec::with_capacity(prompt_logprobs.scored_positions.len() + 1);
+    let mut top_logprobs = Vec::with_capacity(prompt_logprobs.scored_positions.len() + 1);
+    let mut current_offset = initial_text_offset;
+
+    let first_token_str = if return_tokens_as_token_ids {
+        format!("token_id:{}", prompt_logprobs.first_token_id)
+    } else {
+        prompt_logprobs.first_token.clone()
+    };
+    text_offset.push(current_offset);
+    token_logprobs.push(None);
+    current_offset = current_offset.saturating_add(text_len(&first_token_str));
+    tokens.push(first_token_str);
+    top_logprobs.push(None);
+
+    for position in &prompt_logprobs.scored_positions {
+        let chosen = position.entries.first().ok_or_else(|| {
+            server_error!("decoded prompt logprobs position unexpectedly had no token candidates")
+        })?;
+
+        let token_str = format_token(chosen, return_tokens_as_token_ids);
+        text_offset.push(current_offset);
+        token_logprobs.push(Some(clamp_logprob(chosen.logprob)));
+        current_offset = current_offset.saturating_add(text_len(&token_str));
+        tokens.push(token_str);
+        top_logprobs.push(Some(position_top_logprobs_map(
+            position,
+            return_tokens_as_token_ids,
+        )));
+    }
+
+    Ok(LogProbs {
+        tokens,
+        token_logprobs,
+        top_logprobs,
+        text_offset,
+    })
+}
+
+/// Convert decoded prompt logprobs into the vLLM-style prompt-logprobs response
+/// shape.
+pub fn decoded_prompt_logprobs_to_maps(
+    prompt_logprobs: &DecodedPromptLogprobs,
+    return_tokens_as_token_ids: bool,
+) -> Vec<Option<HashMap<String, f32>>> {
+    std::iter::once(None)
+        .chain(prompt_logprobs.scored_positions.iter().map(|position| {
+            Some(position_top_logprobs_map(
+                position,
+                return_tokens_as_token_ids,
+            ))
+        }))
+        .collect()
+}
+
+/// Convert decoded token-position logprobs into the OpenAI chat `logprobs`
+/// shape.
+pub fn decoded_logprobs_to_openai_chat(
+    logprobs: &DecodedLogprobs,
+    return_tokens_as_token_ids: bool,
+) -> Result<ChatLogProbs, ApiError> {
+    let content = logprobs
+        .positions
+        .iter()
+        .map(|pos| position_to_chat_logprobs_content(pos, return_tokens_as_token_ids))
+        .try_collect()?;
+
+    Ok(ChatLogProbs {
+        content: Some(content),
+    })
+}
+
+/// Count visible text positions using OpenAI completions' character-offset
+/// convention.
+pub fn text_len(text: &str) -> u32 {
+    u32::try_from(text.chars().count()).unwrap_or(u32::MAX)
+}
+
+/// Concatenate two OpenAI-style completion logprobs payloads in token order.
+pub fn append_openai_logprobs(mut prefix: LogProbs, suffix: LogProbs) -> LogProbs {
+    prefix.tokens.extend(suffix.tokens);
+    prefix.token_logprobs.extend(suffix.token_logprobs);
+    prefix.top_logprobs.extend(suffix.top_logprobs);
+    prefix.text_offset.extend(suffix.text_offset);
+    prefix
+}
+
+/// Build the non-stream completions `logprobs` payload from collected text
+/// output.
+///
+/// When `echoed_prompt` is true, the returned payload matches Python vLLM's
+/// echoed completions behavior by concatenating prompt and completion logprobs
+/// into one OpenAI `LogProbs` object.
+pub fn collected_logprobs_to_openai(
+    collected: &CollectedTextOutput,
+    echoed_prompt: bool,
+    initial_completion_offset: u32,
+    return_tokens_as_token_ids: bool,
+) -> Result<LogProbs, ApiError> {
+    if echoed_prompt {
+        let prompt_logprobs = collected.prompt_logprobs.as_ref().ok_or_else(|| {
+            server_error!(
+                "echoed completion logprobs require prompt logprobs but generation returned none"
+            )
+        })?;
+        let prompt_logprobs =
+            decoded_prompt_logprobs_to_openai(prompt_logprobs, 0, return_tokens_as_token_ids)?;
+        let completion_start = prompt_logprobs
+            .text_offset
+            .last()
+            .zip(prompt_logprobs.tokens.last())
+            .map(|(&offset, token)| offset.saturating_add(text_len(token)))
+            .unwrap_or(0);
+        return match collected.logprobs.as_ref() {
+            Some(completion_logprobs) => Ok(append_openai_logprobs(
+                prompt_logprobs,
+                decoded_logprobs_to_openai(
+                    completion_logprobs,
+                    completion_start,
+                    return_tokens_as_token_ids,
+                )?,
+            )),
+            None => Ok(prompt_logprobs),
+        };
+    }
+
+    let completion_logprobs = collected.logprobs.as_ref().ok_or_else(|| {
+        server_error!("completion response requested logprobs but generation returned none")
+    })?;
+    decoded_logprobs_to_openai(
+        completion_logprobs,
+        initial_completion_offset,
+        return_tokens_as_token_ids,
+    )
+}
+
+/// Format a token entry as either its decoded string or `token_id:{id}`.
+fn format_token(entry: &DecodedTokenLogprob, as_token_id: bool) -> String {
+    if as_token_id {
+        format!("token_id:{}", entry.token_id)
+    } else {
+        entry.token.clone()
+    }
+}
+
+fn position_top_logprobs_map(
+    position: &DecodedPositionLogprobs,
+    return_tokens_as_token_ids: bool,
+) -> HashMap<String, f32> {
+    position
+        .entries
+        .iter()
+        .map(|entry| {
+            (
+                format_token(entry, return_tokens_as_token_ids),
+                clamp_logprob(entry.logprob),
+            )
+        })
+        .collect()
+}
+
+fn position_to_chat_logprobs_content(
+    position: &DecodedPositionLogprobs,
+    return_tokens_as_token_ids: bool,
+) -> Result<ChatLogProbsContent, ApiError> {
+    let chosen = position.entries.first().ok_or_else(|| {
+        server_error!("decoded chat logprobs position unexpectedly had no token candidates")
+    })?;
+
+    let token_str = format_token(chosen, return_tokens_as_token_ids);
+    Ok(ChatLogProbsContent {
+        token: token_str.clone(),
+        logprob: clamp_logprob(chosen.logprob),
+        bytes: Some(token_bytes(&token_str)),
+        top_logprobs: position
+            .entries
+            .iter()
+            .map(|entry| {
+                let t = format_token(entry, return_tokens_as_token_ids);
+                TopLogProb {
+                    logprob: clamp_logprob(entry.logprob),
+                    bytes: Some(token_bytes(&t)),
+                    token: t,
+                }
+            })
+            .collect(),
+    })
+}
+
+fn token_bytes(token: &str) -> Vec<u8> {
+    token.as_bytes().to_vec()
+}
+
+pub fn clamp_logprob(logprob: f32) -> f32 {
+    logprob.max(-9999.0)
+}
diff --git a/rust/src/server/src/routes/openai/utils/mod.rs b/rust/src/server/src/routes/openai/utils/mod.rs
new file mode 100644
index 000000000000..57b1d99690d4
--- /dev/null
+++ b/rust/src/server/src/routes/openai/utils/mod.rs
@@ -0,0 +1,4 @@
+pub mod logprobs;
+pub mod structured_outputs;
+pub mod types;
+pub mod validated_json;
diff --git a/rust/src/server/src/routes/openai/utils/structured_outputs.rs b/rust/src/server/src/routes/openai/utils/structured_outputs.rs
new file mode 100644
index 000000000000..e974c836bb5e
--- /dev/null
+++ b/rust/src/server/src/routes/openai/utils/structured_outputs.rs
@@ -0,0 +1,132 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use vllm_engine_core_client::protocol::StructuredOutputsParams;
+
+use crate::error::ApiError;
+
+/// JSON schema specification nested inside a `json_schema` response format.
+///
+/// Mirrors the Python vLLM `JsonSchemaResponseFormat` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct JsonSchemaFormat {
+    pub name: String,
+    #[serde(default)]
+    pub description: Option<String>,
+    /// The actual JSON schema object.
+    #[serde(alias = "json_schema")]
+    pub schema: Value,
+    #[serde(default)]
+    pub strict: Option<bool>,
+}
+
+/// Supported `response_format` types for chat and completion requests.
+///
+/// This is our own definition (rather than the `openai-protocol` crate's) so
+/// that we can support the vLLM-specific `structural_tag` variant.
+///
+/// Original Python definitions:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026/vllm/entrypoints/openai/engine/protocol.py#L116-L157>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ResponseFormat {
+    Text,
+    JsonObject,
+    JsonSchema {
+        json_schema: JsonSchemaFormat,
+    },
+    /// vLLM-specific structural tag format. The entire object (including the
+    /// `type` field) is JSON-serialized and passed as
+    /// `StructuredOutputsParams.structural_tag`.
+    ///
+    /// We capture the payload as a catch-all map so both the legacy
+    /// (`structures`/`triggers`) and current (`format`) shapes are
+    /// preserved without needing typed structs.
+    StructuralTag {
+        #[serde(flatten)]
+        extra: serde_json::Map<String, Value>,
+    },
+}
+
+/// Convert an explicit `structured_outputs` JSON blob into
+/// [`StructuredOutputsParams`].
+fn deserialize_structured_outputs(
+    raw: &serde_json::Value,
+) -> Result<StructuredOutputsParams, ApiError> {
+    serde_json::from_value(raw.clone()).map_err(|e| {
+        ApiError::invalid_request(
+            format!("invalid structured_outputs: {e}"),
+            Some("structured_outputs"),
+        )
+    })
+}
+
+/// Convert a typed [`ResponseFormat`] and/or raw `structured_outputs` blob into
+/// engine-core [`StructuredOutputsParams`].
+///
+/// Mirrors the Python vLLM conversion in
+/// `ChatCompletionRequest.to_sampling_params()`: <https://github.com/vllm-project/vllm/blob/f22d6e026/vllm/entrypoints/openai/chat_completion/protocol.py#L457-L487>
+pub fn convert_from_response_format(
+    response_format: Option<&ResponseFormat>,
+    structured_outputs: &Option<serde_json::Value>,
+) -> Result<Option<StructuredOutputsParams>, ApiError> {
+    if let Some(raw) = structured_outputs {
+        return Ok(Some(deserialize_structured_outputs(raw)?));
+    }
+
+    let Some(fmt) = response_format else {
+        return Ok(None);
+    };
+    match fmt {
+        ResponseFormat::Text => Ok(None),
+        ResponseFormat::JsonObject => Ok(Some(StructuredOutputsParams {
+            json_object: Some(true),
+            ..Default::default()
+        })),
+        ResponseFormat::JsonSchema { json_schema } => Ok(Some(StructuredOutputsParams {
+            json: Some(json_schema.schema.clone()),
+            ..Default::default()
+        })),
+        ResponseFormat::StructuralTag { .. } => {
+            // The Python frontend dumps the entire response_format object (including the
+            // `type` field) as a JSON string for the engine-core backend.
+            let tag_json = serde_json::to_string(fmt).map_err(|e| {
+                ApiError::invalid_request(
+                    format!("failed to serialize structural_tag: {e}"),
+                    Some("response_format"),
+                )
+            })?;
+            Ok(Some(StructuredOutputsParams {
+                structural_tag: Some(tag_json),
+                ..Default::default()
+            }))
+        }
+    }
+}
+
+/// Convert raw `response_format` and/or `structured_outputs` JSON blobs into
+/// engine-core [`StructuredOutputsParams`].
+///
+/// Used by the completions endpoint which keeps both fields as opaque
+/// `serde_json::Value`.
+pub fn convert_from_response_format_value(
+    response_format: &Option<serde_json::Value>,
+    structured_outputs: &Option<serde_json::Value>,
+) -> Result<Option<StructuredOutputsParams>, ApiError> {
+    if let Some(raw) = structured_outputs {
+        return Ok(Some(deserialize_structured_outputs(raw)?));
+    }
+
+    let Some(raw) = response_format else {
+        return Ok(None);
+    };
+
+    // Deserialize into our typed enum and delegate.
+    let fmt: ResponseFormat = serde_json::from_value(raw.clone()).map_err(|e| {
+        ApiError::invalid_request(
+            format!("invalid response_format: {e}"),
+            Some("response_format"),
+        )
+    })?;
+    convert_from_response_format(Some(&fmt), &None)
+}
diff --git a/rust/src/server/src/routes/openai/utils/types.rs b/rust/src/server/src/routes/openai/utils/types.rs
new file mode 100644
index 000000000000..ff747a5daf55
--- /dev/null
+++ b/rust/src/server/src/routes/openai/utils/types.rs
@@ -0,0 +1,425 @@
+use std::collections::HashMap;
+use std::slice;
+
+use llm_multimodal::ImageDetail;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+// ============================================================================
+// Constants
+// ============================================================================
+
+/// Default model identifier used when no model is specified.
+pub const UNKNOWN_MODEL_ID: &str = "unknown";
+
+// ============================================================================
+// Default value helpers
+// ============================================================================
+
+/// Helper function for serde default value (returns true).
+pub fn default_true() -> bool {
+    true
+}
+
+// ============================================================================
+// String/Array Utilities
+// ============================================================================
+
+/// A type that can be either a single string or an array of strings.
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
+#[serde(untagged)]
+pub enum StringOrArray {
+    String(String),
+    Array(Vec<String>),
+}
+
+impl StringOrArray {
+    pub fn as_slice(&self) -> &[String] {
+        match self {
+            StringOrArray::String(s) => slice::from_ref(s),
+            StringOrArray::Array(arr) => arr,
+        }
+    }
+
+    #[allow(unused)]
+    pub fn into_vec(self) -> Vec<String> {
+        match self {
+            StringOrArray::String(s) => vec![s],
+            StringOrArray::Array(arr) => arr,
+        }
+    }
+}
+
+/// Validates stop sequences (non-empty strings)
+pub fn validate_stop(stop: &StringOrArray) -> Result<(), validator::ValidationError> {
+    if stop.as_slice().iter().any(|s| s.is_empty()) {
+        return Err(validator::ValidationError::new(
+            "stop strings cannot be empty",
+        ));
+    }
+    Ok(())
+}
+
+// ============================================================================
+// Validation helpers
+// ============================================================================
+
+/// Validates top_p: 0.0 < top_p <= 1.0.
+pub fn validate_top_p_value(top_p: f32) -> Result<(), validator::ValidationError> {
+    if !(top_p > 0.0 && top_p <= 1.0) {
+        return Err(validator::ValidationError::new(
+            "top_p must be in (0, 1] - greater than 0.0 and at most 1.0",
+        ));
+    }
+    Ok(())
+}
+
+// ============================================================================
+// Content Parts (for multimodal messages)
+// ============================================================================
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
+#[serde(tag = "type")]
+pub enum ContentPart {
+    #[serde(rename = "text")]
+    Text { text: String },
+    #[serde(rename = "image_url")]
+    ImageUrl {
+        image_url: ImageUrl,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        uuid: Option<String>,
+    },
+    #[serde(rename = "video_url")]
+    VideoUrl { video_url: VideoUrl },
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
+pub struct ImageUrl {
+    pub url: String,
+    pub detail: Option<ImageDetail>,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
+pub struct VideoUrl {
+    pub url: String,
+}
+
+// ============================================================================
+// Streaming
+// ============================================================================
+
+/// Mirrors the Python vLLM `StreamOptions` class.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct StreamOptions {
+    pub include_usage: Option<bool>,
+    pub continuous_usage_stats: Option<bool>,
+}
+
+// ============================================================================
+// Tools and Function Calling
+// ============================================================================
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct Tool {
+    #[serde(rename = "type")]
+    pub tool_type: String,
+    pub function: Function,
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct Function {
+    pub name: String,
+    pub description: Option<String>,
+    pub parameters: Value,
+    /// Whether to enable strict schema adherence (OpenAI structured outputs).
+    pub strict: Option<bool>,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ToolCall {
+    pub id: String,
+    #[serde(rename = "type")]
+    pub tool_type: String,
+    pub function: FunctionCallResponse,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct FunctionCallResponse {
+    pub name: String,
+    #[serde(default)]
+    pub arguments: Option<String>,
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ToolCallDelta {
+    pub index: u32,
+    pub id: Option<String>,
+    #[serde(rename = "type")]
+    pub tool_type: Option<String>,
+    pub function: Option<FunctionCallDelta>,
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct FunctionCallDelta {
+    pub name: Option<String>,
+    pub arguments: Option<String>,
+}
+
+/// Tool choice value for simple string options.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ToolChoiceValue {
+    Auto,
+    Required,
+    None,
+}
+
+/// Tool choice for the Chat Completion API.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(untagged)]
+pub enum ToolChoice {
+    Value(ToolChoiceValue),
+    Function {
+        #[serde(rename = "type")]
+        tool_type: String,
+        function: FunctionChoice,
+    },
+    AllowedTools {
+        #[serde(rename = "type")]
+        tool_type: String,
+        mode: String,
+        tools: Vec<ToolReference>,
+    },
+}
+
+impl Default for ToolChoice {
+    fn default() -> Self {
+        Self::Value(ToolChoiceValue::Auto)
+    }
+}
+
+/// Function choice specification for `ToolChoice::Function`.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct FunctionChoice {
+    pub name: String,
+}
+
+/// Tool reference for `ToolChoice::AllowedTools`.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum ToolReference {
+    #[serde(rename = "function")]
+    Function { name: String },
+    #[serde(rename = "mcp")]
+    Mcp {
+        server_label: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        name: Option<String>,
+    },
+    #[serde(rename = "file_search")]
+    FileSearch,
+    #[serde(rename = "web_search_preview")]
+    WebSearchPreview,
+    #[serde(rename = "computer_use_preview")]
+    ComputerUsePreview,
+    #[serde(rename = "code_interpreter")]
+    CodeInterpreter,
+    #[serde(rename = "image_generation")]
+    ImageGeneration,
+}
+
+impl ToolReference {
+    /// Get a unique identifier for this tool reference.
+    pub fn identifier(&self) -> String {
+        match self {
+            ToolReference::Function { name } => format!("function:{name}"),
+            ToolReference::Mcp {
+                server_label,
+                name: Some(n),
+            } => format!("mcp:{server_label}:{n}"),
+            ToolReference::Mcp {
+                server_label,
+                name: _,
+            } => format!("mcp:{server_label}"),
+            ToolReference::FileSearch => "file_search".to_string(),
+            ToolReference::WebSearchPreview => "web_search_preview".to_string(),
+            ToolReference::ComputerUsePreview => "computer_use_preview".to_string(),
+            ToolReference::CodeInterpreter => "code_interpreter".to_string(),
+            ToolReference::ImageGeneration => "image_generation".to_string(),
+        }
+    }
+}
+
+// ============================================================================
+// Chat Messages
+// ============================================================================
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(tag = "role")]
+pub enum ChatMessage {
+    #[serde(rename = "system")]
+    System {
+        content: MessageContent,
+        name: Option<String>,
+    },
+    #[serde(rename = "user")]
+    User {
+        content: MessageContent,
+        name: Option<String>,
+    },
+    #[serde(rename = "assistant")]
+    Assistant {
+        content: Option<MessageContent>,
+        name: Option<String>,
+        tool_calls: Option<Vec<ToolCall>>,
+        /// Reasoning content for reasoning-capable models.
+        #[serde(alias = "reasoning_content")]
+        #[serde(alias = "thinking")]
+        reasoning: Option<String>,
+    },
+    #[serde(rename = "tool")]
+    Tool {
+        content: MessageContent,
+        tool_call_id: String,
+    },
+    #[serde(rename = "function")]
+    Function { content: String, name: String },
+    #[serde(rename = "developer")]
+    Developer {
+        content: MessageContent,
+        tools: Option<Vec<Tool>>,
+        name: Option<String>,
+    },
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
+#[serde(untagged)]
+pub enum MessageContent {
+    Text(String),
+    Parts(Vec<ContentPart>),
+}
+
+// ============================================================================
+// Usage and Logging
+// ============================================================================
+
+/// Mirrors the Python vLLM `UsageInfo` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub struct Usage {
+    pub prompt_tokens: u32,
+    pub total_tokens: u32,
+    pub completion_tokens: Option<u32>,
+    pub prompt_tokens_details: Option<PromptTokenUsageInfo>,
+}
+
+impl Usage {
+    /// Create a Usage from prompt and completion token counts.
+    pub fn from_counts(prompt_tokens: u32, completion_tokens: u32) -> Self {
+        Self {
+            prompt_tokens,
+            total_tokens: prompt_tokens + completion_tokens,
+            completion_tokens: Some(completion_tokens),
+            prompt_tokens_details: None,
+        }
+    }
+}
+
+/// Mirrors the Python vLLM `PromptTokenUsageInfo` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub struct PromptTokenUsageInfo {
+    pub cached_tokens: Option<u32>,
+}
+
+/// OpenAI completions-style logprobs.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct LogProbs {
+    pub tokens: Vec<String>,
+    pub token_logprobs: Vec<Option<f32>>,
+    pub top_logprobs: Vec<Option<HashMap<String, f32>>>,
+    pub text_offset: Vec<u32>,
+}
+
+/// Mirrors the Python vLLM `ChatCompletionLogProbs` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub struct ChatLogProbs {
+    pub content: Option<Vec<ChatLogProbsContent>>,
+}
+
+/// Mirrors the Python vLLM `ChatCompletionLogProbsContent` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub struct ChatLogProbsContent {
+    pub token: String,
+    pub logprob: f32,
+    pub bytes: Option<Vec<u8>>,
+    pub top_logprobs: Vec<TopLogProb>,
+}
+
+/// Mirrors the Python vLLM `ChatCompletionLogProb` class.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Serialize)]
+pub struct TopLogProb {
+    pub token: String,
+    pub logprob: f32,
+    pub bytes: Option<Vec<u8>>,
+}
+
+// ============================================================================
+// Error Types
+// ============================================================================
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ErrorResponse {
+    pub error: ErrorDetail,
+}
+
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ErrorDetail {
+    pub message: String,
+    #[serde(rename = "type")]
+    pub error_type: String,
+    pub param: Option<String>,
+    pub code: Option<String>,
+}
+
+// ============================================================================
+// Model types
+// ============================================================================
+
+/// A single model entry in the `/v1/models` response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ModelObject {
+    pub id: String,
+    pub object: String,
+    pub created: i64,
+    pub owned_by: String,
+}
+
+/// Response body for `GET /v1/models`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ListModelsResponse {
+    pub object: String,
+    pub data: Vec<ModelObject>,
+}
+
+// ============================================================================
+// Normalizable trait
+// ============================================================================
+
+/// Trait for request types that need post-deserialization normalization.
+pub trait Normalizable {
+    /// Normalize the request by applying defaults and transformations.
+    fn normalize(&mut self) {
+        // Default: no-op
+    }
+}
diff --git a/rust/src/server/src/routes/openai/utils/validated_json.rs b/rust/src/server/src/routes/openai/utils/validated_json.rs
new file mode 100644
index 000000000000..d07158cc2e0c
--- /dev/null
+++ b/rust/src/server/src/routes/openai/utils/validated_json.rs
@@ -0,0 +1,55 @@
+//! Validated JSON extractor for automatic request validation.
+//! Variation of https://github.com/lightseekorg/smg/blob/main/crates/protocols/src/validated.rs
+
+use axum::Json;
+use axum::extract::rejection::JsonRejection;
+use axum::extract::{FromRequest, Request};
+use serde::de::DeserializeOwned;
+use validator::Validate;
+
+use super::types::Normalizable;
+use crate::error::{ApiError, invalid_request};
+
+/// A JSON extractor that automatically validates and normalizes the request
+/// body.
+///
+/// This extractor deserializes the request body and automatically calls
+/// `.validate()` on types that implement the `Validate` trait. If validation
+/// fails, it returns [`ApiError::InvalidRequest`] with details about the
+/// validation errors.
+pub struct ValidatedJson<T>(pub T);
+
+impl<S, T> FromRequest<S> for ValidatedJson<T>
+where
+    T: DeserializeOwned + Validate + Normalizable + Send,
+    S: Send + Sync,
+{
+    type Rejection = ApiError;
+
+    async fn from_request(req: Request, state: &S) -> Result<Self, Self::Rejection> {
+        let Json(mut data) = Json::<T>::from_request(req, state)
+            .await
+            .map_err(|err: JsonRejection| ApiError::json_parse_error(err.body_text()))?;
+
+        data.normalize();
+
+        data.validate()
+            .map_err(|validation_errors| invalid_request!("{}", validation_errors))?;
+
+        Ok(ValidatedJson(data))
+    }
+}
+
+impl<T> std::ops::Deref for ValidatedJson<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> std::ops::DerefMut for ValidatedJson<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
diff --git a/rust/src/server/src/routes/sleep.rs b/rust/src/server/src/routes/sleep.rs
new file mode 100644
index 000000000000..d7b279699b30
--- /dev/null
+++ b/rust/src/server/src/routes/sleep.rs
@@ -0,0 +1,78 @@
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::{Query, State};
+use axum::http::StatusCode;
+use serde::{Deserialize, Serialize};
+
+use crate::error::ApiError;
+use crate::state::AppState;
+use crate::utils::utility_call_error;
+
+#[derive(Serialize)]
+pub(crate) struct IsSleepingResponse {
+    is_sleeping: bool,
+}
+
+#[derive(Debug, Deserialize)]
+pub(crate) struct SleepParams {
+    #[serde(default = "default_sleep_level")]
+    level: u32,
+    #[serde(default = "default_sleep_mode")]
+    mode: String,
+}
+
+#[derive(Debug, Default, Deserialize)]
+pub(crate) struct WakeUpParams {
+    #[serde(default)]
+    tags: Option<Vec<String>>,
+}
+
+const fn default_sleep_level() -> u32 {
+    1
+}
+
+fn default_sleep_mode() -> String {
+    "abort".to_string()
+}
+
+/// Put the engine to sleep.
+pub async fn sleep(
+    State(state): State<Arc<AppState>>,
+    Query(params): Query<SleepParams>,
+) -> Result<StatusCode, ApiError> {
+    state
+        .engine_core_client()
+        .sleep(params.level, &params.mode)
+        .await
+        .map_err(|error| utility_call_error("sleep", error))?;
+
+    Ok(StatusCode::OK)
+}
+
+/// Wake the engine from sleep mode.
+pub async fn wake_up(
+    State(state): State<Arc<AppState>>,
+    Query(params): Query<WakeUpParams>,
+) -> Result<StatusCode, ApiError> {
+    state
+        .engine_core_client()
+        .wake_up(params.tags)
+        .await
+        .map_err(|error| utility_call_error("wake_up", error))?;
+
+    Ok(StatusCode::OK)
+}
+
+/// Return whether the engine is currently sleeping at any level.
+pub async fn is_sleeping(
+    State(state): State<Arc<AppState>>,
+) -> Result<Json<IsSleepingResponse>, ApiError> {
+    let is_sleeping = state
+        .engine_core_client()
+        .is_sleeping()
+        .await
+        .map_err(|error| utility_call_error("is_sleeping", error))?;
+
+    Ok(Json(IsSleepingResponse { is_sleeping }))
+}
diff --git a/rust/src/server/src/routes/tests.rs b/rust/src/server/src/routes/tests.rs
new file mode 100644
index 000000000000..e1a16abcda57
--- /dev/null
+++ b/rust/src/server/src/routes/tests.rs
@@ -0,0 +1,3680 @@
+// Route tests should use `Service::call` rather than `ServiceExt::oneshot`.
+// `oneshot` consumes the router and can drop `AppState` before a streaming
+// response body is fully drained, which closes the mock engine connection too
+// early and causes flaky `closed unexpectedly` failures.
+
+use std::collections::BTreeSet;
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use std::time::Duration;
+use std::{fmt, fs};
+
+use axum::body::{Body, to_bytes};
+use axum::http::{Request, StatusCode};
+use bytes::Bytes;
+use futures::StreamExt as _;
+use rmpv::Value;
+use serde_json::json;
+use serial_test::serial;
+use tower::{Service as _, ServiceExt as _};
+use vllm_chat::{
+    ChatBackend, ChatContent, ChatContentPart, ChatEvent, ChatLlm, ChatMessage, ChatRenderer,
+    ChatRequest, ChatRole, ChatTextBackend, DefaultChatOutputProcessor, DynChatOutputProcessor,
+    DynChatRenderer, NewChatOutputProcessorOptions, SamplingParams,
+};
+use vllm_engine_core_client::protocol::logprobs::{
+    Logprobs, MaybeWireLogprobs, PositionLogprobs, TokenLogprob,
+};
+use vllm_engine_core_client::protocol::utility::{UtilityOutput, UtilityResultEnvelope};
+use vllm_engine_core_client::protocol::{
+    EngineCoreFinishReason, EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest, StopReason,
+    decode_value,
+};
+use vllm_engine_core_client::test_utils::{IpcNamespace, spawn_mock_engine_task};
+use vllm_engine_core_client::{
+    ENGINE_CORE_DEAD_SENTINEL, EngineCoreClient, EngineCoreClientConfig, EngineId,
+};
+use vllm_llm::Llm;
+use vllm_metrics::METRICS;
+use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
+use vllm_text::{Prompt, TextBackend};
+use zeromq::prelude::{SocketRecv, SocketSend};
+use zeromq::{DealerSocket, PushSocket, ZmqMessage};
+
+use super::{build_router, build_router_with_dev_mode};
+use crate::routes::openai::chat_completions::convert::prepare_chat_request;
+use crate::state::AppState;
+
+fn request_output(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+) -> EngineCoreOutput {
+    request_output_with_stop_reason(request_id, new_token_ids, finish_reason, None)
+}
+
+fn request_output_with_stop_reason(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: None,
+        new_prompt_logprobs_tensors: None,
+        pooling_output: None,
+        finish_reason,
+        stop_reason,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn request_output_with_logprobs(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+    new_logprobs: Option<Logprobs>,
+    new_prompt_logprobs_tensors: Option<Logprobs>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: new_logprobs.map(MaybeWireLogprobs::Direct),
+        new_prompt_logprobs_tensors: new_prompt_logprobs_tensors.map(MaybeWireLogprobs::Direct),
+        pooling_output: None,
+        finish_reason,
+        stop_reason,
+        events: None,
+        kv_transfer_params: None,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn request_output_with_logprobs_and_kv(
+    request_id: &str,
+    new_token_ids: Vec<u32>,
+    finish_reason: Option<EngineCoreFinishReason>,
+    stop_reason: Option<StopReason>,
+    new_logprobs: Option<Logprobs>,
+    new_prompt_logprobs_tensors: Option<Logprobs>,
+    kv_transfer_params: Option<serde_json::Value>,
+) -> EngineCoreOutput {
+    EngineCoreOutput {
+        request_id: request_id.to_string(),
+        new_token_ids,
+        new_logprobs: new_logprobs.map(MaybeWireLogprobs::Direct),
+        new_prompt_logprobs_tensors: new_prompt_logprobs_tensors.map(MaybeWireLogprobs::Direct),
+        pooling_output: None,
+        finish_reason,
+        stop_reason,
+        events: None,
+        kv_transfer_params,
+        trace_headers: None,
+        prefill_stats: None,
+        routed_experts: None,
+        num_nans_in_logits: 0,
+    }
+}
+
+fn bytes_to_token_ids(bytes: &[u8]) -> Vec<u32> {
+    bytes.iter().map(|byte| u32::from(*byte)).collect()
+}
+
+fn default_stream_output_specs() -> Vec<(Vec<u32>, Option<EngineCoreFinishReason>)> {
+    vec![
+        (vec![b'h' as u32], None),
+        (vec![b'i' as u32], None),
+        (vec![b'!' as u32], Some(EngineCoreFinishReason::Stop)),
+    ]
+}
+
+fn sse_data_payloads(text: &str) -> Vec<&str> {
+    text.lines().filter_map(|line| line.strip_prefix("data: ")).collect()
+}
+
+type TestFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
+
+fn boxed_test_future<'a>(future: impl Future<Output = ()> + Send + 'a) -> TestFuture<'a> {
+    Box::pin(future)
+}
+
+struct MockEngineTask {
+    shutdown_tx: Option<tokio::sync::oneshot::Sender<()>>,
+    join_handle: Option<tokio::task::JoinHandle<()>>,
+}
+
+impl MockEngineTask {
+    fn new(
+        (shutdown_tx, join_handle): (
+            tokio::sync::oneshot::Sender<()>,
+            tokio::task::JoinHandle<()>,
+        ),
+    ) -> Self {
+        Self {
+            shutdown_tx: Some(shutdown_tx),
+            join_handle: Some(join_handle),
+        }
+    }
+
+    async fn finish(self) {
+        self.await.expect("mock engine task");
+    }
+
+    fn abort(&self) {
+        if let Some(join_handle) = &self.join_handle {
+            join_handle.abort();
+        }
+    }
+
+    async fn abort_and_join(mut self) {
+        if let Some(join_handle) = self.join_handle.take() {
+            join_handle.abort();
+            let _ = join_handle.await;
+        }
+    }
+}
+
+impl Future for MockEngineTask {
+    type Output = Result<(), tokio::task::JoinError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        if let Some(shutdown_tx) = self.shutdown_tx.take() {
+            let _ = shutdown_tx.send(());
+        }
+        match self.join_handle.as_mut() {
+            Some(join_handle) => Pin::new(join_handle).poll(cx),
+            None => Poll::Ready(Ok(())),
+        }
+    }
+}
+
+impl Drop for MockEngineTask {
+    fn drop(&mut self) {
+        if let Some(join_handle) = &self.join_handle {
+            join_handle.abort();
+        }
+    }
+}
+
+fn engine_outputs_for_request(
+    request_id: &str,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> EngineCoreOutputs {
+    EngineCoreOutputs {
+        engine_index: 0,
+        outputs: output_specs
+            .into_iter()
+            .map(|(token_ids, finish_reason)| request_output(request_id, token_ids, finish_reason))
+            .collect(),
+        scheduler_stats: None,
+        timestamp: 0.0,
+        utility_output: None,
+        finished_requests: None,
+        wave_complete: None,
+        start_wave: None,
+    }
+}
+
+fn test_llm(client: EngineCoreClient) -> Llm {
+    Llm::new(client).with_request_id_randomization(false)
+}
+
+fn sample_logprobs_for_token(token_id: u32, alternate_token_id: u32) -> Logprobs {
+    Logprobs {
+        positions: vec![PositionLogprobs {
+            entries: vec![
+                TokenLogprob {
+                    token_id,
+                    logprob: -0.1,
+                    rank: 1,
+                },
+                TokenLogprob {
+                    token_id: alternate_token_id,
+                    logprob: -0.2,
+                    rank: 1,
+                },
+            ],
+        }],
+    }
+}
+
+fn sample_logprobs_for_tokens(token_ids: &[u32]) -> Logprobs {
+    Logprobs {
+        positions: token_ids
+            .iter()
+            .map(|&token_id| PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id,
+                        logprob: -0.1,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: token_id.saturating_add(1),
+                        logprob: -0.2,
+                        rank: 2,
+                    },
+                ],
+            })
+            .collect(),
+    }
+}
+
+fn prompt_logprobs_for_hello() -> Logprobs {
+    Logprobs {
+        positions: vec![
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: b'e' as u32,
+                        logprob: -0.3,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: b'a' as u32,
+                        logprob: -0.5,
+                        rank: 1,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: b'l' as u32,
+                        logprob: -0.4,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: b'r' as u32,
+                        logprob: -0.6,
+                        rank: 1,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: b'l' as u32,
+                        logprob: -0.45,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: b'i' as u32,
+                        logprob: -0.65,
+                        rank: 1,
+                    },
+                ],
+            },
+            PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: b'o' as u32,
+                        logprob: -0.5,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: b'u' as u32,
+                        logprob: -0.7,
+                        rank: 1,
+                    },
+                ],
+            },
+        ],
+    }
+}
+
+fn prompt_logprobs_for_tokens(token_ids: &[u32]) -> Logprobs {
+    Logprobs {
+        positions: token_ids
+            .iter()
+            .skip(1)
+            .map(|&token_id| PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id,
+                        logprob: -0.3,
+                        rank: 1,
+                    },
+                    TokenLogprob {
+                        token_id: token_id.saturating_add(1),
+                        logprob: -0.5,
+                        rank: 2,
+                    },
+                ],
+            })
+            .collect(),
+    }
+}
+
+fn utility_result_value<T>(value: T) -> UtilityResultEnvelope
+where
+    T: serde::Serialize,
+{
+    UtilityResultEnvelope::without_type_info(rmpv::ext::to_value(value).expect("encode result"))
+}
+
+fn utility_none_result() -> UtilityResultEnvelope {
+    UtilityResultEnvelope::without_type_info(Value::Nil)
+}
+
+fn utility_outputs(call_id: u64, result: UtilityResultEnvelope) -> EngineCoreOutputs {
+    EngineCoreOutputs {
+        utility_output: Some(UtilityOutput {
+            call_id: call_id.into(),
+            failure_message: None,
+            result: Some(result),
+        }),
+        ..Default::default()
+    }
+}
+
+async fn send_outputs(push: &mut PushSocket, outputs: EngineCoreOutputs) {
+    push.send(ZmqMessage::from(
+        rmp_serde::to_vec_named(&outputs).expect("encode outputs"),
+    ))
+    .await
+    .expect("send outputs");
+}
+
+async fn recv_engine_message(dealer: &mut DealerSocket) -> Vec<Bytes> {
+    dealer.recv().await.expect("recv engine message").into_vec()
+}
+
+#[derive(Clone)]
+struct FakeChatBackend {
+    model_id: String,
+    multimodal_model_info: Option<vllm_chat::multimodal::MultimodalModelInfo>,
+}
+
+#[derive(Debug)]
+struct FakeChatTokenizer;
+
+impl Tokenizer for FakeChatTokenizer {
+    fn encode(
+        &self,
+        text: &str,
+        _add_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<Vec<u32>> {
+        let mut token_ids = Vec::new();
+        let mut rest = text;
+        while !rest.is_empty() {
+            if let Some(stripped) = rest.strip_prefix("<image>") {
+                token_ids.push(999);
+                rest = stripped;
+                continue;
+            }
+
+            let ch = rest.chars().next().expect("rest is not empty");
+            let mut buf = [0; 4];
+            token_ids.extend(ch.encode_utf8(&mut buf).bytes().map(u32::from));
+            rest = &rest[ch.len_utf8()..];
+        }
+        Ok(token_ids)
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        _skip_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<String> {
+        Ok(
+            String::from_utf8_lossy(&token_ids.iter().map(|id| *id as u8).collect::<Vec<_>>())
+                .into_owned(),
+        )
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        match token {
+            "<image>" => Some(999),
+            "<|image_pad|>" => Some(151655),
+            "<think>" => Some(0xF001),
+            "</think>" => Some(0xF002),
+            "<|START_THINKING|>" => Some(0xF003),
+            "<|END_THINKING|>" => Some(0xF004),
+            "◁think▷" => Some(0xF005),
+            "◁/think▷" => Some(0xF006),
+            _ => None,
+        }
+    }
+
+    fn id_to_token(&self, id: u32) -> Option<String> {
+        match id {
+            999 => Some("<image>".to_string()),
+            151655 => Some("<|image_pad|>".to_string()),
+            0xF001 => Some("<think>".to_string()),
+            0xF002 => Some("</think>".to_string()),
+            0xF003 => Some("<|START_THINKING|>".to_string()),
+            0xF004 => Some("<|END_THINKING|>".to_string()),
+            0xF005 => Some("◁think▷".to_string()),
+            0xF006 => Some("◁/think▷".to_string()),
+            _ => None,
+        }
+    }
+}
+
+impl FakeChatBackend {
+    fn new() -> Self {
+        Self {
+            model_id: "test-model".to_string(),
+            multimodal_model_info: None,
+        }
+    }
+
+    fn with_model_id(model_id: impl Into<String>) -> Self {
+        Self {
+            model_id: model_id.into(),
+            multimodal_model_info: None,
+        }
+    }
+
+    fn with_multimodal_model_info(
+        multimodal_model_info: vllm_chat::multimodal::MultimodalModelInfo,
+    ) -> Self {
+        Self {
+            model_id: "test-model".to_string(),
+            multimodal_model_info: Some(multimodal_model_info),
+        }
+    }
+}
+
+impl fmt::Debug for FakeChatBackend {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FakeChatBackend")
+            .field("model_id", &self.model_id)
+            .finish_non_exhaustive()
+    }
+}
+
+impl TextBackend for FakeChatBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FakeChatTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        &self.model_id
+    }
+}
+
+impl ChatBackend for FakeChatBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn multimodal_model_info(&self) -> Option<&vllm_chat::multimodal::MultimodalModelInfo> {
+        self.multimodal_model_info.as_ref()
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        request: &mut ChatRequest,
+        options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::new(
+            request,
+            &self.model_id,
+            self.tokenizer(),
+            options.tool_call_parser,
+            options.reasoning_parser,
+        )?))
+    }
+}
+
+impl ChatRenderer for FakeChatBackend {
+    fn render(&self, request: &ChatRequest) -> vllm_chat::Result<vllm_chat::RenderedPrompt> {
+        let mut prompt = String::new();
+        for message in &request.messages {
+            prompt.push_str(message.role().as_str());
+            prompt.push_str(": ");
+            prompt.push_str(&render_fake_message_content(message)?);
+            prompt.push('\n');
+        }
+        if request.chat_options.add_generation_prompt() {
+            prompt.push_str("assistant:");
+        }
+        Ok(vllm_chat::RenderedPrompt {
+            prompt: Prompt::Text(prompt),
+        })
+    }
+}
+
+fn render_fake_message_content(message: &ChatMessage) -> vllm_chat::Result<String> {
+    match message {
+        ChatMessage::System { content }
+        | ChatMessage::Developer { content, .. }
+        | ChatMessage::User { content }
+        | ChatMessage::ToolResponse { content, .. } => render_fake_content(content),
+        ChatMessage::Assistant { .. } => message.text_content(),
+    }
+}
+
+fn render_fake_content(content: &ChatContent) -> vllm_chat::Result<String> {
+    Ok(match content {
+        ChatContent::Text(text) => text.clone(),
+        ChatContent::Parts(parts) => {
+            let mut out = String::new();
+            for part in parts {
+                match part {
+                    ChatContentPart::Text { text } => out.push_str(text),
+                    ChatContentPart::ImageUrl { .. } => out.push_str("<image>"),
+                }
+            }
+            out
+        }
+    })
+}
+
+fn qwen_multimodal_model_info() -> vllm_chat::multimodal::MultimodalModelInfo {
+    let config_path = std::env::temp_dir().join(format!(
+        "vllm-server-qwen-config-{}.json",
+        uuid::Uuid::new_v4()
+    ));
+    fs::write(
+        &config_path,
+        r#"{"model_type":"qwen2_vl","vision_token_id":151655}"#,
+    )
+    .expect("write qwen test config");
+    let info = vllm_chat::multimodal::MultimodalModelInfo::from_paths(
+        "qwen2-vl-test".to_string(),
+        Some("qwen2_vl".to_string()),
+        Some(&config_path),
+        None,
+        Arc::new(FakeChatTokenizer),
+    )
+    .expect("load multimodal info")
+    .expect("qwen multimodal info is registered");
+    let _ = fs::remove_file(config_path);
+    info
+}
+
+#[derive(Clone, Debug)]
+struct FailingDecodeChatBackend;
+
+#[derive(Debug)]
+struct FailingDecodeTokenizer;
+
+impl Tokenizer for FailingDecodeTokenizer {
+    fn encode(
+        &self,
+        text: &str,
+        add_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<Vec<u32>> {
+        FakeChatTokenizer.encode(text, add_special_tokens)
+    }
+
+    fn decode(
+        &self,
+        token_ids: &[u32],
+        skip_special_tokens: bool,
+    ) -> vllm_text::tokenizer::Result<String> {
+        if token_ids.contains(&(b'i' as u32)) {
+            return Err(vllm_text::tokenizer::TokenizerError(
+                "forced decode failure for streaming test".to_string(),
+            ));
+        }
+
+        FakeChatTokenizer.decode(token_ids, skip_special_tokens)
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        FakeChatTokenizer.token_to_id(token)
+    }
+}
+
+impl TextBackend for FailingDecodeChatBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        Arc::new(FailingDecodeTokenizer)
+    }
+
+    fn model_id(&self) -> &str {
+        "test-model"
+    }
+}
+
+impl ChatBackend for FailingDecodeChatBackend {
+    fn chat_renderer(&self) -> DynChatRenderer {
+        Arc::new(self.clone())
+    }
+
+    fn new_chat_output_processor(
+        &self,
+        _request: &mut ChatRequest,
+        _options: NewChatOutputProcessorOptions<'_>,
+    ) -> vllm_chat::Result<DynChatOutputProcessor> {
+        Ok(Box::new(DefaultChatOutputProcessor::plain_text_only()))
+    }
+}
+
+impl ChatRenderer for FailingDecodeChatBackend {
+    fn render(&self, request: &ChatRequest) -> vllm_chat::Result<vllm_chat::RenderedPrompt> {
+        FakeChatBackend::new().render(request)
+    }
+}
+
+async fn test_models_with_engine_outputs_and_backend_inner(
+    engine_id: impl Into<EngineId>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+    expected_prompt_token_ids: Option<Vec<u32>>,
+    backend: Arc<dyn ChatTextBackend>,
+) -> (ChatLlm, MockEngineTask) {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = engine_id.into();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                if let Some(expected_prompt_token_ids) = expected_prompt_token_ids {
+                    assert_eq!(
+                        request.prompt_token_ids.as_deref(),
+                        Some(expected_prompt_token_ids.as_slice())
+                    );
+                }
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, output_specs),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    (
+        ChatLlm::from_shared_backend(test_llm(client), backend),
+        engine_task,
+    )
+}
+
+async fn test_models_with_engine_outputs_and_backend(
+    engine_id: impl Into<EngineId>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+    backend: Arc<dyn ChatTextBackend>,
+) -> (ChatLlm, MockEngineTask) {
+    test_models_with_engine_outputs_and_backend_inner(engine_id, output_specs, None, backend).await
+}
+
+async fn test_chat_with_engine_outputs(
+    engine_id: impl Into<EngineId>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> (ChatLlm, MockEngineTask) {
+    test_models_with_engine_outputs_and_backend(
+        engine_id,
+        output_specs,
+        Arc::new(FakeChatBackend::new()),
+    )
+    .await
+}
+
+async fn test_app() -> axum::Router {
+    let (chat, _engine_task) = test_models_with_engine_outputs_and_backend(
+        b"engine-openai",
+        default_stream_output_specs(),
+        Arc::new(FakeChatBackend::new()),
+    )
+    .await;
+    build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )))
+}
+
+async fn test_health_app_with_engine_script<F>(
+    script: F,
+) -> (axum::Router, Arc<AppState>, MockEngineTask)
+where
+    F: for<'a> FnOnce(&'a mut PushSocket) -> TestFuture<'a> + Send + 'static,
+{
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-health".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |_dealer, push| script(push),
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let state = Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    ));
+    (build_router(state.clone()), state, engine_task)
+}
+
+async fn test_admin_app_with_engine_script<F>(script: F) -> (axum::Router, MockEngineTask)
+where
+    F: for<'a> FnOnce(&'a mut DealerSocket, &'a mut PushSocket) -> TestFuture<'a> + Send + 'static,
+{
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-admin".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |dealer, push| script(dealer, push),
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    (
+        build_router_with_dev_mode(
+            Arc::new(AppState::new(
+                vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+                chat,
+            )),
+            true,
+        ),
+        engine_task,
+    )
+}
+
+async fn test_app_with_engine_handle() -> (axum::Router, MockEngineTask) {
+    test_app_with_stream_output_specs(default_stream_output_specs()).await
+}
+
+async fn test_app_with_stream_output_specs(
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> (axum::Router, MockEngineTask) {
+    let (chat, engine_task) = test_models_with_engine_outputs_and_backend(
+        b"engine-openai",
+        output_specs,
+        Arc::new(FakeChatBackend::new()),
+    )
+    .await;
+    (
+        build_router(Arc::new(AppState::new(
+            vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+            chat,
+        ))),
+        engine_task,
+    )
+}
+
+async fn test_app_with_backend_and_stream_output_specs(
+    backend: Arc<dyn ChatTextBackend>,
+    output_specs: Vec<(Vec<u32>, Option<EngineCoreFinishReason>)>,
+) -> (axum::Router, MockEngineTask) {
+    let (chat, engine_task) =
+        test_models_with_engine_outputs_and_backend(b"engine-openai", output_specs, backend).await;
+    (
+        build_router(Arc::new(AppState::new(
+            vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+            chat,
+        ))),
+        engine_task,
+    )
+}
+
+async fn test_app_with_backend_and_engine_request_check<F>(
+    backend: Arc<dyn ChatTextBackend>,
+    check_request: F,
+) -> (axum::Router, MockEngineTask)
+where
+    F: FnOnce(&EngineCoreRequest) + Send + 'static,
+{
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-check-request".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        move |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                check_request(&request);
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, default_stream_output_specs()),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+
+    let chat = ChatLlm::from_shared_backend(test_llm(client), backend);
+    (
+        build_router(Arc::new(AppState::new(
+            vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+            chat,
+        ))),
+        engine_task,
+    )
+}
+
+async fn test_chat_with_engine_handle() -> (ChatLlm, MockEngineTask) {
+    test_chat_with_engine_outputs(b"engine-openai-chat", default_stream_output_specs()).await
+}
+
+async fn server_load(app: &axum::Router) -> u64 {
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("GET")
+                .uri("/load")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let value: serde_json::Value = serde_json::from_slice(&body).expect("json body");
+    value["server_load"].as_u64().expect("server_load")
+}
+
+async fn health_status(app: &axum::Router) -> (StatusCode, Bytes) {
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("GET")
+                .uri("/health")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    (status, body)
+}
+
+fn metric_value(rendered: &str, metric: &str, labels: Option<&str>) -> Option<f64> {
+    rendered.lines().find_map(|line| {
+        let rest = line.strip_prefix(metric)?;
+
+        match labels {
+            Some(labels) => {
+                let (encoded_labels, value) = rest.split_once("} ")?;
+                if !encoded_labels.starts_with('{') {
+                    return None;
+                }
+                let expected_parts = labels.split(',');
+                if expected_parts.into_iter().all(|part| encoded_labels.contains(part)) {
+                    value.parse::<f64>().ok()
+                } else {
+                    None
+                }
+            }
+            None => rest.strip_prefix(' ').and_then(|value| value.parse::<f64>().ok()),
+        }
+    })
+}
+
+fn metric_delta(
+    rendered_before: &str,
+    rendered_after: &str,
+    metric: &str,
+    labels: Option<&str>,
+) -> f64 {
+    metric_value(rendered_after, metric, labels).unwrap_or(0.0)
+        - metric_value(rendered_before, metric, labels).unwrap_or(0.0)
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn list_models_returns_configured_model() {
+    let mut app = test_app().await;
+    let response = app
+        .call(Request::builder().uri("/v1/models").body(Body::empty()).expect("build request"))
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+    assert_eq!(json["data"][0]["id"], "Qwen/Qwen1.5-0.5B-Chat");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn http_metrics_record_list_models_requests() {
+    let mut app = test_app().await;
+    let before = METRICS.render().unwrap();
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("GET")
+                .uri("/v1/models")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let after = METRICS.render().unwrap();
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_requests_total",
+            Some("method=\"GET\",status=\"2xx\",handler=\"/v1/models\""),
+        ),
+        1.0
+    );
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_request_duration_seconds_count",
+            Some("method=\"GET\",handler=\"/v1/models\""),
+        ),
+        1.0
+    );
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_request_duration_highr_seconds_count",
+            None,
+        ),
+        1.0
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn wrong_model_returns_not_found() {
+    let mut app = test_app().await;
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "wrong-model",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::NOT_FOUND);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn invalid_request_returns_openai_error() {
+    let mut app = test_app().await;
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "stream_options": {"include_usage": true},
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+    assert_eq!(json["error"]["type"], "invalid_request_error");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_chat_returns_json_response() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert!(
+        response
+            .headers()
+            .get("content-type")
+            .and_then(|value| value.to_str().ok())
+            .is_some_and(|value| value.starts_with("application/json"))
+    );
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["object"], "chat.completion");
+    assert_eq!(json["choices"][0]["message"]["role"], "assistant");
+    assert_eq!(json["choices"][0]["message"]["content"], "hi");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(json["usage"]["prompt_tokens"], 22);
+    assert_eq!(json["usage"]["completion_tokens"], 3);
+    assert_eq!(json["usage"]["total_tokens"], 25);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_chat_image_url_reaches_engine_mm_features() {
+    let (app, engine_task) = test_app_with_backend_and_engine_request_check(
+        Arc::new(FakeChatBackend::with_multimodal_model_info(
+            qwen_multimodal_model_info(),
+        )),
+        |request| {
+            let prompt_token_ids = request.prompt_token_ids.as_ref().expect("prompt token ids");
+            assert!(prompt_token_ids.contains(&151655));
+
+            let features = request.mm_features.as_ref().expect("multimodal features");
+            assert_eq!(features.len(), 1);
+            assert_eq!(features[0].modality, "image");
+            assert_eq!(features[0].identifier, "image-1");
+            assert!(features[0].mm_position.length > 0);
+            assert!(features[0].mm_position.is_embed.is_some());
+
+            let data = features[0].data.as_ref().expect("feature data");
+            assert!(data.contains_key("pixel_values"));
+            assert!(data.contains_key("image_grid_thw"));
+        },
+    )
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "messages": [{
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": "describe "},
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII="
+                                    },
+                                    "uuid": "image-1"
+                                }
+                            ]
+                        }]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["object"], "chat.completion");
+    assert_eq!(json["choices"][0]["message"]["content"], "hi");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_chat_includes_logprobs_and_prompt_logprobs() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-chat-logprobs".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                let prompt_token_ids = request.prompt_token_ids.clone().expect("prompt token ids");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output_with_logprobs(
+                            &request.request_id,
+                            bytes_to_token_ids(b"hi"),
+                            Some(EngineCoreFinishReason::Stop),
+                            None,
+                            Some(sample_logprobs_for_tokens(&bytes_to_token_ids(b"hi"))),
+                            Some(prompt_logprobs_for_tokens(&prompt_token_ids)),
+                        )],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "logprobs": true,
+                        "prompt_logprobs": 1,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(
+        json["choices"][0]["logprobs"]["content"][0]["token"],
+        json!("h")
+    );
+    assert_eq!(
+        json["choices"][0]["logprobs"]["content"][1]["token"],
+        json!("i")
+    );
+    assert_eq!(json["prompt_logprobs"][0], serde_json::Value::Null);
+    assert!(json["prompt_logprobs"][1].is_object());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn happy_path_returns_sse_stream() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let before = METRICS.render().unwrap();
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert_eq!(
+        response.headers().get("content-type").and_then(|value| value.to_str().ok()),
+        Some("text/event-stream")
+    );
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+    let after = METRICS.render().unwrap();
+
+    assert!(text.contains("\"role\":\"assistant\""), "{text}");
+    assert!(text.starts_with("data: "), "{text}");
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_requests_total",
+            Some("method=\"POST\",status=\"2xx\",handler=\"/v1/chat/completions\""),
+        ),
+        1.0
+    );
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_request_duration_seconds_count",
+            Some("method=\"POST\",handler=\"/v1/chat/completions\""),
+        ),
+        1.0
+    );
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_request_duration_highr_seconds_count",
+            None,
+        ),
+        1.0
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn http_metrics_exclude_metrics_route() {
+    let mut app = test_app().await;
+    let before = METRICS.render().unwrap();
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("GET")
+                .uri("/metrics")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let after = METRICS.render().unwrap();
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_request_duration_highr_seconds_count",
+            None,
+        ),
+        0.0
+    );
+    assert_eq!(
+        metric_value(
+            &after,
+            "http_requests_total",
+            Some("method=\"GET\",status=\"2xx\",handler=\"/metrics\""),
+        ),
+        None
+    );
+    assert_eq!(
+        metric_value(
+            &after,
+            "http_request_duration_seconds_count",
+            Some("method=\"GET\",handler=\"/metrics\""),
+        ),
+        None
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn http_metrics_group_error_statuses() {
+    let mut app = test_app().await;
+    let before = METRICS.render().unwrap();
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "stream_options": {"include_usage": true},
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+
+    let after = METRICS.render().unwrap();
+    assert_eq!(
+        metric_delta(
+            &before,
+            &after,
+            "http_requests_total",
+            Some("method=\"POST\",status=\"4xx\",handler=\"/v1/chat/completions\""),
+        ),
+        1.0
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn load_endpoint_tracks_chat_stream_lifecycle() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+
+    assert_eq!(server_load(&app).await, 0);
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert_eq!(server_load(&app).await, 1);
+
+    let _body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+
+    assert_eq!(server_load(&app).await, 0);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn health_endpoint_returns_ok_with_empty_body_when_client_is_healthy() {
+    let (app, _state, engine_task) =
+        test_health_app_with_engine_script(|_push| boxed_test_future(async move {})).await;
+
+    let (status, body) = health_status(&app).await;
+    assert_eq!(status, StatusCode::OK);
+    assert!(body.is_empty(), "expected empty body, got {:?}", body);
+
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn health_endpoint_returns_503_after_engine_core_dead_sentinel() {
+    let (app, state, engine_task) = test_health_app_with_engine_script(|push| {
+        boxed_test_future(async move {
+            push.send(ZmqMessage::from(ENGINE_CORE_DEAD_SENTINEL.to_vec()))
+                .await
+                .expect("send sentinel");
+        })
+    })
+    .await;
+
+    tokio::time::timeout(Duration::from_secs(2), async {
+        while state.chat.engine_core_client().is_healthy() {
+            tokio::task::yield_now().await;
+        }
+    })
+    .await
+    .expect("wait for unhealthy client");
+
+    let (status, body) = health_status(&app).await;
+    assert_eq!(status, StatusCode::SERVICE_UNAVAILABLE);
+    assert!(body.is_empty(), "expected empty body, got {:?}", body);
+    assert!(matches!(
+        state.chat.engine_core_client().health_error().as_deref(),
+        Some(vllm_engine_core_client::Error::EngineCoreDead)
+    ));
+
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn load_endpoint_resets_when_stream_response_is_dropped() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": true
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert_eq!(server_load(&app).await, 1);
+
+    drop(response);
+    tokio::task::yield_now().await;
+    engine_task.await.expect("mock engine task");
+
+    assert_eq!(server_load(&app).await, 0);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn stream_error_is_returned_as_openai_error_sse() {
+    let (app, engine_task) = test_app_with_backend_and_stream_output_specs(
+        Arc::new(FailingDecodeChatBackend),
+        default_stream_output_specs(),
+    )
+    .await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "stream_options": {"include_usage": true},
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(text.contains("\"role\":\"assistant\""), "{text}");
+    assert!(text.contains("\"type\":\"server_error\""), "{text}");
+    assert!(
+        text.contains("forced decode failure for streaming test"),
+        "{text}"
+    );
+    assert!(!text.contains("\"usage\":"), "{text}");
+    assert!(text.trim_end().ends_with("data: [DONE]"), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn invalid_terminal_finish_reason_is_returned_as_openai_error_sse() {
+    let (app, engine_task) =
+        test_app_with_stream_output_specs(vec![(vec![], Some(EngineCoreFinishReason::Error))])
+            .await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "stream_options": {"include_usage": true},
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(text.contains("\"role\":\"assistant\""), "{text}");
+    assert!(text.contains("\"type\":\"server_error\""), "{text}");
+    assert!(text.contains("Internal server error"), "{text}");
+    assert!(!text.contains("\"usage\":"), "{text}");
+    assert!(text.trim_end().ends_with("data: [DONE]"), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn include_usage_adds_final_usage_chunk_before_done() {
+    let (app, engine_task) = test_app_with_stream_output_specs(default_stream_output_specs()).await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "stream_options": {"include_usage": true},
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    let payloads = sse_data_payloads(&text);
+    let finish_index = payloads
+        .iter()
+        .position(|payload| payload.contains("\"finish_reason\":\"stop\""))
+        .expect("finish chunk");
+    let usage_index = payloads
+        .iter()
+        .position(|payload| payload.contains("\"usage\":"))
+        .expect("usage chunk");
+    let done_index =
+        payloads.iter().position(|payload| *payload == "[DONE]").expect("done sentinel");
+
+    assert!(finish_index < usage_index, "{text}");
+    assert!(usage_index < done_index, "{text}");
+
+    let usage_chunk: serde_json::Value =
+        serde_json::from_str(payloads[usage_index]).expect("usage chunk json");
+    assert_eq!(usage_chunk["choices"], json!([]));
+    assert_eq!(usage_chunk["usage"]["prompt_tokens"], 22);
+    assert_eq!(usage_chunk["usage"]["completion_tokens"], 3);
+    assert_eq!(usage_chunk["usage"]["total_tokens"], 25);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn stream_without_include_usage_keeps_existing_shape() {
+    let (app, engine_task) = test_app_with_stream_output_specs(default_stream_output_specs()).await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(!text.contains("\"usage\":"), "{text}");
+    assert!(text.contains("\"finish_reason\":\"stop\""), "{text}");
+    assert!(text.trim_end().ends_with("data: [DONE]"), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn completions_invalid_request_returns_openai_error() {
+    let mut app = test_app().await;
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stream_options": {"include_usage": true}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+    assert_eq!(json["error"]["type"], "invalid_request_error");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_return_json_response() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert!(
+        response
+            .headers()
+            .get("content-type")
+            .and_then(|value| value.to_str().ok())
+            .is_some_and(|value| value.starts_with("application/json"))
+    );
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["object"], "text_completion");
+    assert_eq!(json["choices"][0]["text"], "hi");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(json["usage"]["completion_tokens"], 3);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_echo_prepends_prompt_text() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "echo": true,
+                        "stream": false
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["choices"][0]["text"], "hellohi");
+    assert_eq!(json["usage"]["prompt_tokens"], 5);
+    assert_eq!(json["usage"]["completion_tokens"], 3);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_include_logprobs() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-completion-logprobs".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![b'h' as u32],
+                                None,
+                                None,
+                                Some(sample_logprobs_for_token(b'h' as u32, b'H' as u32)),
+                                None,
+                            ),
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![b'i' as u32],
+                                Some(EngineCoreFinishReason::Stop),
+                                None,
+                                Some(sample_logprobs_for_token(b'i' as u32, b'I' as u32)),
+                                None,
+                            ),
+                        ],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: Some(BTreeSet::from([request.request_id.clone()])),
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "logprobs": 1
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["choices"][0]["logprobs"]["tokens"], json!(["h", "i"]));
+    assert_eq!(
+        json["choices"][0]["logprobs"]["token_logprobs"],
+        json!([-0.1, -0.1])
+    );
+    assert_eq!(json["choices"][0]["logprobs"]["text_offset"], json!([0, 1]));
+    assert_eq!(
+        json["choices"][0]["logprobs"]["top_logprobs"][0],
+        json!({"h": -0.1, "H": -0.2})
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_include_prompt_logprobs() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-completion-prompt-logprobs".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output_with_logprobs(
+                            &request.request_id,
+                            vec![b'h' as u32, b'i' as u32, b'!' as u32],
+                            Some(EngineCoreFinishReason::Stop),
+                            None,
+                            Some(Logprobs {
+                                positions: vec![
+                                    sample_logprobs_for_token(b'h' as u32, b'H' as u32).positions
+                                        [0]
+                                    .clone(),
+                                    sample_logprobs_for_token(b'i' as u32, b'I' as u32).positions
+                                        [0]
+                                    .clone(),
+                                    sample_logprobs_for_token(b'!' as u32, b'?' as u32).positions
+                                        [0]
+                                    .clone(),
+                                ],
+                            }),
+                            Some(prompt_logprobs_for_hello()),
+                        )],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "echo": true,
+                        "logprobs": 1
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["choices"][0]["text"], "hellohi");
+    assert_eq!(
+        json["choices"][0]["logprobs"]["tokens"],
+        json!(["h", "e", "l", "l", "o", "h", "i", "!"])
+    );
+    assert_eq!(
+        json["choices"][0]["logprobs"]["text_offset"],
+        json!([0, 1, 2, 3, 4, 5, 6, 7])
+    );
+    assert_eq!(
+        json["choices"][0]["logprobs"]["token_logprobs"],
+        json!([null, -0.3, -0.4, -0.45, -0.5, -0.1, -0.1, -0.1])
+    );
+    assert_eq!(
+        json["choices"][0]["prompt_logprobs"][0],
+        serde_json::Value::Null
+    );
+    assert_eq!(
+        json["choices"][0]["prompt_logprobs"][1],
+        json!({"a": -0.5, "e": -0.3})
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_chat_completions_still_succeed() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-chat-non-stream".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, default_stream_output_specs()),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": false,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_still_succeed() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-completion-non-stream".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, default_stream_output_specs()),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(test_llm(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn chat_completions_header_request_id_takes_precedence() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-chat-request-id-precedence".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                assert_eq!(
+                    request.external_req_id.as_deref(),
+                    Some("chatcmpl-header-req")
+                );
+                assert!(request.request_id.starts_with("chatcmpl-header-req-"));
+                assert_ne!(request.request_id, "chatcmpl-header-req");
+
+                send_outputs(
+                    push,
+                    engine_outputs_for_request(&request.request_id, default_stream_output_specs()),
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(Llm::new(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .header("X-Request-Id", "header-req")
+                .body(Body::from(
+                    json!({
+                        "request_id": "body-req",
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["id"], "chatcmpl-header-req");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_raw_generate_returns_token_output_envelope() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-raw-generate-non-stream".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            boxed_test_future(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+                assert_eq!(request.prompt_token_ids.as_deref(), Some(&[11, 22][..]));
+                assert_eq!(request.external_req_id.as_deref(), Some("raw-req"));
+                assert!(request.request_id.starts_with("raw-req-"));
+                assert_ne!(request.request_id, "raw-req");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![
+                            request_output_with_logprobs(
+                                &request.request_id,
+                                vec![33],
+                                None,
+                                None,
+                                Some(sample_logprobs_for_token(33, 34)),
+                                Some(prompt_logprobs_for_tokens(&[11, 22])),
+                            ),
+                            request_output_with_logprobs_and_kv(
+                                &request.request_id,
+                                vec![44],
+                                Some(EngineCoreFinishReason::Stop),
+                                None,
+                                Some(sample_logprobs_for_token(44, 45)),
+                                None,
+                                Some(json!({"connector": "x"})),
+                            ),
+                        ],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(Llm::new(client), Arc::new(FakeChatBackend::new()));
+    let mut app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/inference/v1/generate")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "request_id": "raw-req",
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "token_ids": [11, 22],
+                        "stream": false,
+                        "sampling_params": {
+                            "max_tokens": 2,
+                            "logprobs": 1,
+                            "prompt_logprobs": 1
+                        }
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["request_id"], "raw-req");
+    assert_eq!(json["choices"][0]["index"], 0);
+    assert_eq!(json["choices"][0]["token_ids"], json!([33, 44]));
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(
+        json["choices"][0]["logprobs"]["content"][0]["token"],
+        "token_id:33"
+    );
+    assert_eq!(
+        json["choices"][0]["logprobs"]["content"][1]["top_logprobs"][0]["token"],
+        "token_id:44"
+    );
+    assert_eq!(json["prompt_logprobs"][0], serde_json::Value::Null);
+    assert_eq!(
+        json["prompt_logprobs"][1]["22"]["decoded_token"],
+        "token_id:22"
+    );
+    assert_eq!(json["kv_transfer_params"], json!({"connector": "x"}));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn raw_generate_rejects_streaming() {
+    let mut app = test_app().await;
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/inference/v1/generate")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "token_ids": [11, 22],
+                        "stream": true,
+                        "sampling_params": {}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+    assert_eq!(json["error"]["param"], "stream");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn raw_generate_rejects_empty_token_ids() {
+    let mut app = test_app().await;
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/inference/v1/generate")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "token_ids": [],
+                        "sampling_params": {}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+    assert_eq!(json["error"]["param"], "token_ids");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn raw_generate_rejects_wrong_model() {
+    let mut app = test_app().await;
+
+    let response = app
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/inference/v1/generate")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "wrong-model",
+                        "token_ids": [11, 22],
+                        "sampling_params": {}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::NOT_FOUND);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn completions_happy_path_returns_sse_stream() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": true,
+                        "stream_options": {"include_usage": true}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    assert_eq!(
+        response.headers().get("content-type").and_then(|value| value.to_str().ok()),
+        Some("text/event-stream")
+    );
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+    let payloads = sse_data_payloads(&text);
+    let usage_index = payloads
+        .iter()
+        .position(|payload| payload.contains("\"usage\":"))
+        .expect("usage chunk");
+    let done_index =
+        payloads.iter().position(|payload| *payload == "[DONE]").expect("done sentinel");
+
+    assert!(
+        payloads.iter().any(|payload| payload.contains("\"text\":\"h\"")),
+        "{text}"
+    );
+    assert!(
+        payloads.iter().any(|payload| payload.contains("\"finish_reason\":\"stop\"")),
+        "{text}"
+    );
+    assert!(usage_index < done_index, "{text}");
+
+    let usage_chunk: serde_json::Value =
+        serde_json::from_str(payloads[usage_index]).expect("usage chunk json");
+    assert_eq!(usage_chunk["choices"], json!([]));
+    assert_eq!(usage_chunk["usage"]["completion_tokens"], 3);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn completions_echo_stream_emits_separate_prompt_chunk() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "echo": true,
+                        "stream": true,
+                        "stream_options": {"include_usage": true}
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+    let payloads = sse_data_payloads(&text);
+    let hello_index = payloads
+        .iter()
+        .position(|payload| payload.contains("\"text\":\"hello\""))
+        .expect("prompt echo chunk");
+    let h_index = payloads
+        .iter()
+        .position(|payload| payload.contains("\"text\":\"h\""))
+        .expect("first generation chunk");
+
+    assert!(hello_index < h_index, "{text}");
+    assert!(
+        payloads.iter().any(|payload| payload.contains("\"text\":\"i\"")),
+        "{text}"
+    );
+
+    let usage_chunk: serde_json::Value = serde_json::from_str(
+        payloads
+            .iter()
+            .find(|payload| payload.contains("\"usage\":"))
+            .expect("usage chunk"),
+    )
+    .expect("usage chunk json");
+    assert_eq!(usage_chunk["usage"]["prompt_tokens"], 5);
+    assert_eq!(usage_chunk["usage"]["completion_tokens"], 3);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn chat_harness_streams_text_events() {
+    let (chat, engine_task) = test_chat_with_engine_handle().await;
+    let mut stream = chat
+        .chat(ChatRequest {
+            messages: vec![ChatMessage::text(ChatRole::User, "hello")],
+            sampling_params: SamplingParams {
+                max_tokens: Some(8),
+                ..Default::default()
+            },
+            request_id: "chat-harness".to_string(),
+            ..ChatRequest::for_test()
+        })
+        .await
+        .expect("submit chat request");
+
+    let mut saw_text = false;
+    let mut saw_done = false;
+    while let Some(event) = stream.next().await {
+        match event.expect("chat event") {
+            ChatEvent::BlockDelta { .. } => saw_text = true,
+            ChatEvent::Done { .. } => {
+                saw_done = true;
+                break;
+            }
+            ChatEvent::Start { .. }
+            | ChatEvent::LogprobsDelta { .. }
+            | ChatEvent::BlockStart { .. }
+            | ChatEvent::BlockEnd { .. }
+            | ChatEvent::ToolCallStart { .. }
+            | ChatEvent::ToolCallArgumentsDelta { .. }
+            | ChatEvent::ToolCallEnd { .. } => {}
+        }
+    }
+    engine_task.await.expect("mock engine task");
+
+    assert!(saw_text);
+    assert!(saw_done);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn prepared_openai_request_streams_text_events() {
+    let (chat, engine_task) = test_chat_with_engine_handle().await;
+    let prepared = prepare_chat_request(
+        serde_json::from_value(json!({
+            "model": "Qwen/Qwen1.5-0.5B-Chat",
+            "stream": true,
+            "messages": [{"role": "user", "content": "hello"}]
+        }))
+        .expect("decode request"),
+        &["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        crate::utils::ResolvedRequestContext::default(),
+    )
+    .expect("prepare request");
+
+    let mut stream = chat.chat(prepared.chat_request).await.expect("submit chat request");
+
+    let mut saw_text = false;
+    let mut saw_done = false;
+    while let Some(event) = stream.next().await {
+        match event.expect("chat event") {
+            ChatEvent::BlockDelta { .. } => saw_text = true,
+            ChatEvent::Done { .. } => {
+                saw_done = true;
+                break;
+            }
+            ChatEvent::Start { .. }
+            | ChatEvent::LogprobsDelta { .. }
+            | ChatEvent::BlockStart { .. }
+            | ChatEvent::BlockEnd { .. }
+            | ChatEvent::ToolCallStart { .. }
+            | ChatEvent::ToolCallArgumentsDelta { .. }
+            | ChatEvent::ToolCallEnd { .. } => {}
+        }
+    }
+    engine_task.await.expect("mock engine task");
+
+    assert!(saw_text);
+    assert!(saw_done);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn reasoning_blocks_are_mapped_to_reasoning_sse_chunks() {
+    let (app, engine_task) = test_app_with_backend_and_stream_output_specs(
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B")),
+        vec![
+            (bytes_to_token_ids(b"<think>"), None),
+            (bytes_to_token_ids(b"think "), None),
+            (bytes_to_token_ids(b"more</think>"), None),
+            (
+                bytes_to_token_ids(b"answer"),
+                Some(EngineCoreFinishReason::Length),
+            ),
+        ],
+    )
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(text.contains("\"reasoning\":\"think \""), "{text}");
+    assert!(text.contains("\"reasoning\":\"more\""), "{text}");
+    assert!(text.contains("\"content\":\"answer\""), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn tool_calls_are_mapped_to_tool_call_sse_chunks() {
+    let (app, engine_task) = test_app_with_backend_and_stream_output_specs(
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B")),
+        vec![
+            (bytes_to_token_ids(b"<think>Need tool.</think>"), None),
+            (
+                bytes_to_token_ids(b"<tool_call>\n{\"name\":\"get_weather\", "),
+                None,
+            ),
+            (
+                bytes_to_token_ids(b"\"arguments\":{\"city\":\"Paris\"}}\n</tool_call>"),
+                Some(EngineCoreFinishReason::Stop),
+            ),
+        ],
+    )
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "messages": [{"role": "user", "content": "hello"}],
+                        "tools": [{
+                            "type": "function",
+                            "function": {
+                                "name": "get_weather",
+                                "description": "Get weather",
+                                "parameters": {
+                                    "type": "object",
+                                    "properties": {"city": {"type": "string"}}
+                                }
+                            }
+                        }]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(text.contains("\"tool_calls\":"), "{text}");
+    assert!(text.contains("\"name\":\"get_weather\""), "{text}");
+    assert!(
+        text.contains("\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\""),
+        "{text}"
+    );
+    assert!(text.contains("\"finish_reason\":\"tool_calls\""), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn tool_call_sse_chunks_can_carry_logprobs() {
+    let ipc = IpcNamespace::new().expect("create ipc namespace");
+    let handshake_address = ipc.handshake_endpoint();
+    let engine_id = b"engine-openai-chat-tools-logprobs".to_vec();
+
+    let engine_task = MockEngineTask::new(spawn_mock_engine_task(
+        handshake_address.clone(),
+        engine_id.clone(),
+        |dealer, push| {
+            Box::pin(async move {
+                let add = recv_engine_message(dealer).await;
+                let request: EngineCoreRequest =
+                    rmp_serde::from_slice(&add[1]).expect("decode request");
+
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output_with_logprobs(
+                            &request.request_id,
+                            bytes_to_token_ids(b"<think>Need tool.</think>"),
+                            None,
+                            None,
+                            Some(sample_logprobs_for_tokens(&bytes_to_token_ids(
+                                b"<think>Need tool.</think>",
+                            ))),
+                            None,
+                        )],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output_with_logprobs(
+                            &request.request_id,
+                            bytes_to_token_ids(b"<tool_call>\n{\"name\":\"get_weather\", "),
+                            None,
+                            None,
+                            Some(sample_logprobs_for_tokens(&bytes_to_token_ids(
+                                b"<tool_call>\n{\"name\":\"get_weather\", ",
+                            ))),
+                            None,
+                        )],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: None,
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+                send_outputs(
+                    push,
+                    EngineCoreOutputs {
+                        engine_index: 0,
+                        outputs: vec![request_output_with_logprobs(
+                            &request.request_id,
+                            bytes_to_token_ids(
+                                b"\"arguments\":{\"city\":\"Paris\"}}\n</tool_call>",
+                            ),
+                            Some(EngineCoreFinishReason::Stop),
+                            None,
+                            Some(sample_logprobs_for_tokens(&bytes_to_token_ids(
+                                b"\"arguments\":{\"city\":\"Paris\"}}\n</tool_call>",
+                            ))),
+                            None,
+                        )],
+                        scheduler_stats: None,
+                        timestamp: 0.0,
+                        utility_output: None,
+                        finished_requests: Some(BTreeSet::from([request.request_id.clone()])),
+                        wave_complete: None,
+                        start_wave: None,
+                    },
+                )
+                .await;
+            })
+        },
+    ));
+
+    let client = EngineCoreClient::connect(
+        EngineCoreClientConfig::new_single(handshake_address)
+            .with_model_name("test-model")
+            .with_local_input_output_addresses(
+                Some(ipc.input_endpoint()),
+                Some(ipc.output_endpoint()),
+            ),
+    )
+    .await
+    .expect("connect client");
+    let chat = ChatLlm::from_shared_backend(
+        test_llm(client),
+        Arc::new(FakeChatBackend::with_model_id("Qwen/Qwen3-0.6B")),
+    );
+    let app = build_router(Arc::new(AppState::new(
+        vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+        chat,
+    )));
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "logprobs": true,
+                        "messages": [{"role": "user", "content": "hello"}],
+                        "tools": [{
+                            "type": "function",
+                            "function": {
+                                "name": "get_weather",
+                                "description": "Get weather",
+                                "parameters": {
+                                    "type": "object",
+                                    "properties": {"city": {"type": "string"}}
+                                }
+                            }
+                        }]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.finish().await;
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+
+    assert!(text.contains("\"tool_calls\":"), "{text}");
+    assert!(text.contains("\"logprobs\":{\"content\":"), "{text}");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn streaming_chat_prompt_logprobs_are_rejected() {
+    let (app, engine_task) = test_app_with_engine_handle().await;
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/chat/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "stream": true,
+                        "prompt_logprobs": 1,
+                        "messages": [{"role": "user", "content": "hello"}]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+    engine_task.abort();
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn reset_prefix_cache_route_sends_expected_utility_call() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("reset_prefix_cache"));
+            assert_eq!(
+                array[3],
+                Value::Array(vec![Value::from(true), Value::from(true)])
+            );
+
+            send_outputs(push, utility_outputs(call_id, utility_result_value(true))).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/reset_prefix_cache?reset_running_requests=true&reset_external=true")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    assert_eq!(status, StatusCode::OK, "{}", String::from_utf8_lossy(&body));
+    assert!(body.is_empty());
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn reset_mm_cache_route_sends_expected_utility_call() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("reset_mm_cache"));
+            assert_eq!(array[3], Value::Array(Vec::new()));
+
+            send_outputs(push, utility_outputs(call_id, utility_none_result())).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/reset_mm_cache")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    assert_eq!(status, StatusCode::OK, "{}", String::from_utf8_lossy(&body));
+    assert!(body.is_empty());
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn reset_encoder_cache_route_sends_expected_utility_call() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("reset_encoder_cache"));
+            assert_eq!(array[3], Value::Array(Vec::new()));
+
+            send_outputs(push, utility_outputs(call_id, utility_none_result())).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/reset_encoder_cache")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    assert_eq!(status, StatusCode::OK, "{}", String::from_utf8_lossy(&body));
+    assert!(body.is_empty());
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn collective_rpc_route_sends_expected_utility_call_and_returns_results() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("collective_rpc"));
+            assert_eq!(
+                array[3],
+                Value::Array(vec![
+                    Value::from("echo_args_kwargs"),
+                    Value::from(1.5_f64),
+                    Value::Array(vec![Value::from("arg1"), Value::from("arg2")]),
+                    Value::Map(vec![
+                        (Value::from("key1"), Value::from("value1")),
+                        (Value::from("key2"), Value::from("value2")),
+                    ]),
+                ])
+            );
+
+            send_outputs(
+                push,
+                utility_outputs(
+                    call_id,
+                    UtilityResultEnvelope::without_type_info(Value::Array(vec![Value::Map(vec![
+                        (
+                            Value::from("args"),
+                            Value::Array(vec![Value::from("arg1"), Value::from("arg2")]),
+                        ),
+                        (
+                            Value::from("kwargs"),
+                            Value::Map(vec![
+                                (Value::from("key1"), Value::from("value1")),
+                                (Value::from("key2"), Value::from("value2")),
+                            ]),
+                        ),
+                        (Value::from("total_items"), Value::from(4_u64)),
+                    ])])),
+                ),
+            )
+            .await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/collective_rpc")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    r#"{"method":"echo_args_kwargs","args":["arg1","arg2"],"kwargs":{"key1":"value1","key2":"value2"},"timeout":1.5}"#,
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+
+    assert_eq!(
+        serde_json::from_slice::<serde_json::Value>(&body).expect("decode json"),
+        json!({
+            "results": [{
+                "args": ["arg1", "arg2"],
+                "kwargs": {
+                    "key1": "value1",
+                    "key2": "value2"
+                },
+                "total_items": 4
+            }]
+        })
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn sleep_route_uses_python_compatible_default_query_values() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("sleep"));
+            assert_eq!(
+                array[3],
+                Value::Array(vec![Value::from(1_u64), Value::from("abort")])
+            );
+
+            send_outputs(push, utility_outputs(call_id, utility_none_result())).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/sleep")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    assert_eq!(status, StatusCode::OK, "{}", String::from_utf8_lossy(&body));
+    assert!(body.is_empty());
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn wake_up_route_without_tags_sends_none() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("wake_up"));
+            assert_eq!(array[3], Value::Array(vec![Value::Nil]));
+
+            send_outputs(push, utility_outputs(call_id, utility_none_result())).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("POST")
+                .uri("/wake_up")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    let status = response.status();
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    assert_eq!(status, StatusCode::OK, "{}", String::from_utf8_lossy(&body));
+    assert!(body.is_empty());
+    engine_task.await.expect("mock engine task");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn is_sleeping_route_returns_json_payload() {
+    let (app, engine_task) = test_admin_app_with_engine_script(|dealer, push| {
+        boxed_test_future(async move {
+            let utility = recv_engine_message(dealer).await;
+            assert_eq!(utility[0].as_ref(), &[0x03]);
+
+            let payload = decode_value(&utility[1]).expect("decode utility payload");
+            let array = payload.as_array().expect("utility payload array");
+            let call_id = array[1].as_u64().expect("call id");
+
+            assert_eq!(array[2], Value::from("is_sleeping"));
+            assert_eq!(array[3], Value::Array(Vec::new()));
+
+            send_outputs(push, utility_outputs(call_id, utility_result_value(true))).await;
+        })
+    })
+    .await;
+
+    let response = app
+        .clone()
+        .call(
+            Request::builder()
+                .method("GET")
+                .uri("/is_sleeping")
+                .body(Body::empty())
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+
+    assert_eq!(
+        serde_json::from_slice::<serde_json::Value>(&body).expect("decode json"),
+        json!({ "is_sleeping": true })
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn admin_routes_are_hidden_when_dev_mode_is_disabled() {
+    let (chat, engine_task) = test_chat_with_engine_handle().await;
+    let app = build_router_with_dev_mode(
+        Arc::new(AppState::new(
+            vec!["Qwen/Qwen1.5-0.5B-Chat".to_string()],
+            chat,
+        )),
+        false,
+    );
+
+    for (method, uri) in [
+        ("GET", "/is_sleeping"),
+        ("POST", "/sleep"),
+        ("POST", "/wake_up"),
+        ("POST", "/collective_rpc"),
+        ("POST", "/reset_prefix_cache"),
+        ("POST", "/reset_mm_cache"),
+        ("POST", "/reset_encoder_cache"),
+    ] {
+        let response = app
+            .clone()
+            .call(
+                Request::builder()
+                    .method(method)
+                    .uri(uri)
+                    .body(Body::empty())
+                    .expect("build request"),
+            )
+            .await
+            .expect("call app");
+
+        assert_eq!(response.status(), StatusCode::NOT_FOUND, "{method} {uri}");
+    }
+
+    engine_task.abort_and_join().await;
+}
+
+// ========================= Stop string tests =========================
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_stop_string_excluded_from_output() {
+    // Engine generates "say world" but stop string "wor" truncates output to "say
+    // ".
+    let output_specs = vec![
+        (bytes_to_token_ids(b"say"), None),
+        (
+            bytes_to_token_ids(b" world"),
+            Some(EngineCoreFinishReason::Length),
+        ),
+    ];
+    let (app, engine_task) = test_app_with_stream_output_specs(output_specs).await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stop": ["wor"]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["choices"][0]["text"], "say ");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(json["choices"][0]["stop_reason"], "wor");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_stop_string_included_in_output() {
+    // Same tokens but include_stop_str_in_output=true includes the stop string in
+    // the output.
+    let output_specs = vec![
+        (bytes_to_token_ids(b"say"), None),
+        (
+            bytes_to_token_ids(b" world"),
+            Some(EngineCoreFinishReason::Length),
+        ),
+    ];
+    let (app, engine_task) = test_app_with_stream_output_specs(output_specs).await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stop": ["wor"],
+                        "include_stop_str_in_output": true
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    assert_eq!(json["choices"][0]["text"], "say wor");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(json["choices"][0]["stop_reason"], "wor");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn stream_completions_stop_string_excluded_from_output() {
+    let output_specs = vec![
+        (bytes_to_token_ids(b"say"), None),
+        (
+            bytes_to_token_ids(b" world"),
+            Some(EngineCoreFinishReason::Length),
+        ),
+    ];
+    let (app, engine_task) = test_app_with_stream_output_specs(output_specs).await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": true,
+                        "stop": ["wor"]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+    let payloads = sse_data_payloads(&text);
+
+    // Collect all text deltas from the SSE chunks.
+    let mut full_text = String::new();
+    for payload in &payloads {
+        if *payload == "[DONE]" {
+            continue;
+        }
+        let chunk: serde_json::Value = serde_json::from_str(payload).expect("json chunk");
+        if let Some(text) = chunk["choices"][0]["text"].as_str() {
+            full_text.push_str(text);
+        }
+    }
+
+    // The concatenated text deltas should equal "say " (stop string excluded).
+    assert_eq!(full_text, "say ", "full streamed text: {text}");
+
+    // The final chunk should have finish_reason "stop".
+    assert!(
+        payloads.iter().any(|p| p.contains("\"finish_reason\":\"stop\"")),
+        "{text}"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn stream_completions_stop_string_included_in_output() {
+    let output_specs = vec![
+        (bytes_to_token_ids(b"say"), None),
+        (
+            bytes_to_token_ids(b" world"),
+            Some(EngineCoreFinishReason::Length),
+        ),
+    ];
+    let (app, engine_task) = test_app_with_stream_output_specs(output_specs).await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": true,
+                        "stop": ["wor"],
+                        "include_stop_str_in_output": true
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let text = String::from_utf8(body.to_vec()).expect("utf8 body");
+    let payloads = sse_data_payloads(&text);
+
+    let mut full_text = String::new();
+    for payload in &payloads {
+        if *payload == "[DONE]" {
+            continue;
+        }
+        let chunk: serde_json::Value = serde_json::from_str(payload).expect("json chunk");
+        if let Some(text) = chunk["choices"][0]["text"].as_str() {
+            full_text.push_str(text);
+        }
+    }
+
+    // With include_stop_str_in_output, the stop string "wor" should be included.
+    assert_eq!(full_text, "say wor", "full streamed text: {text}");
+
+    assert!(
+        payloads.iter().any(|p| p.contains("\"finish_reason\":\"stop\"")),
+        "{text}"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_no_stop_string_match_preserves_original_finish_reason() {
+    // Stop string "xyz" does not appear in "hi!" so the original finish reason is
+    // preserved.
+    let (app, engine_task) = test_app_with_engine_handle().await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stop": ["xyz"]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    // Default output is "hi" (stop token '!' suppressed), finish_reason remains
+    // "stop" from EOS.
+    assert_eq!(json["choices"][0]["text"], "hi");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    // No text stop string matched — stop_reason should be absent.
+    assert!(json["choices"][0]["stop_reason"].is_null());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn non_stream_completions_stop_string_array_matches_first_occurrence() {
+    // Multiple stop strings: "rl" appears in "world" but " wo" appears earlier.
+    let output_specs = vec![(
+        bytes_to_token_ids(b"say world"),
+        Some(EngineCoreFinishReason::Length),
+    )];
+    let (app, engine_task) = test_app_with_stream_output_specs(output_specs).await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stop": [" wo", "rl"]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::OK);
+
+    let body = to_bytes(response.into_body(), usize::MAX).await.expect("read body");
+    engine_task.await.expect("mock engine task");
+    let json: serde_json::Value = serde_json::from_slice(&body).expect("decode json");
+
+    // " wo" is detected first (at byte 3), so output is truncated to "say".
+    assert_eq!(json["choices"][0]["text"], "say");
+    assert_eq!(json["choices"][0]["finish_reason"], "stop");
+    assert_eq!(json["choices"][0]["stop_reason"], " wo");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[serial]
+async fn completions_empty_stop_string_returns_validation_error() {
+    let (app, _engine_task) = test_app_with_engine_handle().await;
+
+    let response = app
+        .clone()
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/v1/completions")
+                .header("content-type", "application/json")
+                .body(Body::from(
+                    json!({
+                        "model": "Qwen/Qwen1.5-0.5B-Chat",
+                        "prompt": "hello",
+                        "stream": false,
+                        "stop": [""]
+                    })
+                    .to_string(),
+                ))
+                .expect("build request"),
+        )
+        .await
+        .expect("call app");
+
+    assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+}
diff --git a/rust/src/server/src/state.rs b/rust/src/server/src/state.rs
new file mode 100644
index 000000000000..04d37f1a5d44
--- /dev/null
+++ b/rust/src/server/src/state.rs
@@ -0,0 +1,120 @@
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use tokio::time::{Duration, Instant, sleep_until};
+use tracing::warn;
+use vllm_chat::ChatLlm;
+use vllm_engine_core_client::EngineCoreClient;
+
+const SHUTDOWN_REFCOUNT_POLL_INTERVAL: Duration = Duration::from_millis(100);
+
+/// Shared router state for the minimal single-model OpenAI server.
+pub struct AppState {
+    /// All public model IDs served by this frontend. The first entry is the
+    /// primary ID used in responses; all entries are valid in requests.
+    served_model_names: Vec<String>,
+    /// Shared chat facade used by all requests.
+    pub chat: ChatLlm,
+    /// Whether to log a summary line for each completed request.
+    pub enable_log_requests: bool,
+    /// Number of in-flight inference requests currently owned by this frontend.
+    server_load: AtomicU64,
+}
+
+impl AppState {
+    /// Construct one application state instance.
+    ///
+    /// `served_model_names` must be non-empty; the first entry is the primary
+    /// model ID returned in API responses.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `served_model_names` is empty.
+    pub fn new(served_model_names: Vec<String>, chat: ChatLlm) -> Self {
+        assert!(
+            !served_model_names.is_empty(),
+            "served_model_names must not be empty"
+        );
+        Self {
+            served_model_names,
+            chat,
+            enable_log_requests: false,
+            server_load: AtomicU64::new(0),
+        }
+    }
+
+    /// Enable per-request completion logging.
+    pub fn with_log_requests(mut self, enabled: bool) -> Self {
+        self.enable_log_requests = enabled;
+        self
+    }
+
+    /// The primary model name echoed back in API responses (the first served
+    /// name).
+    pub fn primary_model_name(&self) -> &str {
+        self.served_model_names.first().map(String::as_str).unwrap_or_default()
+    }
+
+    /// All model names served by this frontend.
+    pub fn served_model_names(&self) -> &[String] {
+        &self.served_model_names
+    }
+
+    /// Return a reference to the underlying engine core client for utility
+    /// calls.
+    pub(crate) fn engine_core_client(&self) -> &EngineCoreClient {
+        self.chat.engine_core_client()
+    }
+
+    /// Return the current in-flight inference request count for the `/load`
+    /// endpoint.
+    pub fn server_load(&self) -> u64 {
+        self.server_load.load(Ordering::Relaxed)
+    }
+
+    /// Increment the in-flight inference request count, called by the load
+    /// tracking middleware.
+    pub(crate) fn increment_server_load(&self) {
+        self.server_load.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Decrement the in-flight inference request count, called by the load
+    /// tracking middleware.
+    pub(crate) fn decrement_server_load(&self) {
+        self.server_load.fetch_sub(1, Ordering::Relaxed);
+    }
+
+    /// Wait until all request-owned references are dropped, then shut down the
+    /// engine client.
+    ///
+    /// If the deadline elapses while request/connection tasks still hold state
+    /// references, skip the clean engine-client shutdown and let process
+    /// teardown reclaim the remaining resources.
+    pub async fn shutdown(mut self: Arc<Self>, deadline: Instant) -> anyhow::Result<()> {
+        loop {
+            match Arc::try_unwrap(self) {
+                Ok(state) => {
+                    state.chat.shutdown().await?;
+                    return Ok(());
+                }
+                Err(state) => self = state,
+            }
+            let ref_count = Arc::strong_count(&self);
+
+            let now = Instant::now();
+            if now >= deadline {
+                warn!(
+                    ref_count,
+                    "shutdown deadline elapsed before app state became idle; skipping engine-client shutdown"
+                );
+                return Ok(());
+            }
+
+            sleep_until(std::cmp::min(
+                deadline,
+                now + SHUTDOWN_REFCOUNT_POLL_INTERVAL,
+            ))
+            .await;
+        }
+    }
+}
diff --git a/rust/src/server/src/utils.rs b/rust/src/server/src/utils.rs
new file mode 100644
index 000000000000..13fa0dfaeecb
--- /dev/null
+++ b/rust/src/server/src/utils.rs
@@ -0,0 +1,106 @@
+use std::collections::HashMap;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use axum::http::HeaderMap;
+use serde_json::Value;
+use thiserror_ext::AsReport;
+use uuid::Uuid;
+
+use crate::error::ApiError;
+
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct ResolvedRequestContext {
+    pub request_id: String,
+    pub data_parallel_rank: Option<u32>,
+}
+
+/// Return the current Unix timestamp in seconds for OpenAI response objects.
+pub fn unix_timestamp() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|duration| duration.as_secs())
+        .unwrap_or_default()
+}
+
+/// Construct an API error for a failed utility call to the engine core.
+pub fn utility_call_error(method: &str, error: impl AsReport) -> ApiError {
+    ApiError::server_error(format!("failed to call {method}: {}", error.as_report()))
+}
+
+/// Merge `kv_transfer_params` into the `vllm_xargs` map, mirroring the Python
+/// vLLM behavior where `kv_transfer_params` is injected into `extra_args` for
+/// engine-core consumption.
+pub fn merge_kv_transfer_params(
+    mut xargs: Option<HashMap<String, Value>>,
+    kv_transfer_params: Option<&HashMap<String, Value>>,
+) -> Option<HashMap<String, Value>> {
+    if let Some(kv_params) = kv_transfer_params {
+        let map = xargs.get_or_insert_with(HashMap::new);
+        map.insert(
+            "kv_transfer_params".to_string(),
+            // This is safe because we know that `kv_params` is already valid JSON.
+            serde_json::to_value(kv_params).unwrap(),
+        );
+    }
+    xargs
+}
+
+/// Convert OpenAI-style `logit_bias` with string token-ID keys into the
+/// internal `HashMap<u32, f32>` representation, validating that every key
+/// parses as a `u32`.
+pub fn convert_logit_bias(
+    logit_bias: Option<HashMap<String, f32>>,
+) -> Result<Option<HashMap<u32, f32>>, ApiError> {
+    logit_bias
+        .map(|bias| {
+            bias.into_iter()
+                .map(|(key, value)| {
+                    key.parse().map(|k| (k, value)).map_err(|_| {
+                        ApiError::invalid_request(
+                            format!(
+                                "Invalid key in 'logit_bias': '{key}' is not a valid token ID. \
+                                 Token IDs must be non-negative integers."
+                            ),
+                            Some("logit_bias"),
+                        )
+                    })
+                })
+                .collect()
+        })
+        .transpose()
+}
+
+/// Extract common request metadata from HTTP headers: the external request ID
+/// and the optional data-parallel rank used for engine routing.
+pub fn resolve_request_context(
+    headers: &HeaderMap,
+    request_id: Option<&str>,
+) -> ResolvedRequestContext {
+    // `None` when the header is absent or cannot be parsed as a `u32`.
+    let data_parallel_rank = headers
+        .get("X-data-parallel-rank")
+        .and_then(|v| v.to_str().ok())
+        .and_then(|s| s.trim().parse().ok());
+
+    // Extract request id from header.
+    let request_id_header = headers.get("X-Request-Id").and_then(|value| value.to_str().ok());
+    let request_id = resolve_base_request_id(request_id_header, request_id);
+
+    ResolvedRequestContext {
+        request_id,
+        data_parallel_rank,
+    }
+}
+
+/// Resolve the base external request ID before API-specific prefixes such as
+/// `chatcmpl-`.
+pub fn resolve_base_request_id(
+    request_id_header: Option<&str>,
+    request_id: Option<&str>,
+) -> String {
+    request_id_header.or(request_id).map(ToOwned::to_owned).unwrap_or_else(|| {
+        let mut id = Uuid::new_v4().simple().to_string();
+        id.truncate(8);
+        id
+    })
+}
diff --git a/rust/src/text/Cargo.toml b/rust/src/text/Cargo.toml
new file mode 100644
index 000000000000..042023481825
--- /dev/null
+++ b/rust/src/text/Cargo.toml
@@ -0,0 +1,34 @@
+[package]
+name = "vllm-text"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+asynk-strim-attr.workspace = true
+easy-ext.workspace = true
+enum-as-inner.workspace = true
+futures.workspace = true
+hf-hub.workspace = true
+itertools.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+serde_with.workspace = true
+thiserror.workspace = true
+thiserror-ext.workspace = true
+tracing.workspace = true
+trait-set.workspace = true
+vllm-engine-core-client.workspace = true
+vllm-llm.workspace = true
+vllm-tokenizer.workspace = true
+
+[dev-dependencies]
+expect-test.workspace = true
+futures.workspace = true
+tempfile.workspace = true
+tokio.workspace = true
+vllm-llm = { workspace = true, features = ["test-util"] }
+
+[lints]
+workspace = true
diff --git a/rust/src/text/src/backend/hf/config.rs b/rust/src/text/src/backend/hf/config.rs
new file mode 100644
index 000000000000..5f2ecf8ba603
--- /dev/null
+++ b/rust/src/text/src/backend/hf/config.rs
@@ -0,0 +1,373 @@
+use std::collections::BTreeSet;
+use std::fs;
+use std::path::Path;
+
+use serde::{Deserialize, Serialize};
+use thiserror_ext::AsReport as _;
+
+use crate::error::{Error, Result};
+
+/// Minimal subset of `tokenizer_config.json` needed by chat/EOS handling.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub struct HfTokenizerConfig {
+    #[serde(flatten)]
+    pub special_tokens: HfSpecialTokens,
+    pub chat_template: Option<String>,
+    /// The `tokenizer_class` field from HuggingFace tokenizer configs. Some
+    /// tiktoken-based models (e.g. DeepSeek, Kimi K2) set this to a value
+    /// containing "Tiktoken" which can be used as a hint for backend
+    /// selection.
+    pub tokenizer_class: Option<String>,
+}
+
+/// Hugging Face named special tokens may be serialized as a string or an
+/// object carrying the token content.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(untagged)]
+pub enum NamedSpecialToken {
+    Text(String),
+    WithContent { content: String },
+}
+
+impl Serialize for NamedSpecialToken {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(self.as_str())
+    }
+}
+
+impl From<NamedSpecialToken> for String {
+    fn from(value: NamedSpecialToken) -> Self {
+        match value {
+            NamedSpecialToken::Text(string) => string,
+            NamedSpecialToken::WithContent { content } => content,
+        }
+    }
+}
+
+impl NamedSpecialToken {
+    pub fn as_str(&self) -> &str {
+        match self {
+            Self::Text(value) => value,
+            Self::WithContent { content } => content,
+        }
+    }
+}
+
+/// Minimal set of special-token entries needed by chat/EOS handling.
+#[serde_with::skip_serializing_none]
+#[derive(Debug, Clone, Default, Deserialize, Serialize)]
+#[serde(default)]
+pub struct HfSpecialTokens {
+    pub bos_token: Option<NamedSpecialToken>,
+    pub eos_token: Option<NamedSpecialToken>,
+    pub unk_token: Option<NamedSpecialToken>,
+    pub pad_token: Option<NamedSpecialToken>,
+}
+
+impl HfSpecialTokens {
+    /// Returns true if we don't discover any special tokens in the config.
+    pub fn is_empty(&self) -> bool {
+        self.bos_token.is_none()
+            && self.eos_token.is_none()
+            && self.unk_token.is_none()
+            && self.pad_token.is_none()
+    }
+}
+
+/// Minimal subset of `config.json` (the model's main HF config).
+///
+/// This intentionally supports only the two layouts we currently care about in
+/// the Rust frontend:
+/// - pure text models that keep text metadata at the top level
+/// - composite models that expose a single nested `text_config`
+///
+/// We do not support additional entry points such as `decoder`, `generator`, or
+/// `text_encoder`.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub struct ModelConfig {
+    model_type: Option<String>,
+    max_position_embeddings: Option<u32>,
+    num_attention_heads: Option<u32>,
+    num_experts: Option<OneOrManyExpertCount>,
+    moe_num_experts: Option<OneOrManyExpertCount>,
+    n_routed_experts: Option<OneOrManyExpertCount>,
+    num_local_experts: Option<OneOrManyExpertCount>,
+    block_configs: Vec<BlockConfig>,
+    text_config: Option<Box<ModelConfig>>,
+}
+
+/// Minimal subset of `generation_config.json`.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub(super) struct GenerationConfig {
+    pub eos_token_id: Option<OneOrManyTokenIds>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub top_k: Option<u32>,
+    pub min_p: Option<f32>,
+    pub repetition_penalty: Option<f32>,
+    pub max_new_tokens: Option<u32>,
+}
+
+/// HF generation configs allow either one EOS id or a list of EOS ids.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(untagged)]
+pub(super) enum OneOrManyTokenIds {
+    One(u32),
+    Many(Vec<u32>),
+}
+
+impl OneOrManyTokenIds {
+    pub(super) fn into_set(self) -> BTreeSet<u32> {
+        match self {
+            Self::One(id) => BTreeSet::from([id]),
+            Self::Many(ids) => ids.into_iter().collect(),
+        }
+    }
+}
+
+/// Hugging Face configs may expose the expert count either as one integer or
+/// as a list of repeated integers.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(untagged)]
+pub(super) enum OneOrManyExpertCount {
+    One(u32),
+    Many(Vec<u32>),
+}
+
+impl OneOrManyExpertCount {
+    fn first_value(&self) -> u32 {
+        match self {
+            Self::One(value) => *value,
+            // Python currently takes the first value for list[int] expert
+            // counts in remote-code configs.
+            Self::Many(values) => values.first().copied().unwrap_or(0),
+        }
+    }
+}
+
+/// Heterogeneous block-level MoE metadata used as a fallback when no top-level
+/// expert-count field is available.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub(super) struct BlockConfig {
+    pub block_type: String,
+    pub n_routed_experts: u32,
+}
+
+impl ModelConfig {
+    /// Return the config that the Rust frontend treats as the text/LLM config.
+    ///
+    /// This is deliberately narrower than Python/transformers: we only support
+    /// either the top-level config itself or a single nested `text_config`.
+    fn effective_text_config(&self) -> &Self {
+        self.text_config.as_deref().unwrap_or(self)
+    }
+
+    /// Return the effective Hugging Face `model_type` used by the Rust
+    /// frontend.
+    ///
+    /// This follows the same simplified text-config selection as the rest of
+    /// this type: the top-level config wins, otherwise a single nested
+    /// `text_config` may provide the value.
+    pub fn model_type(&self) -> Option<&str> {
+        self.model_type.as_deref().or_else(|| self.text_config.as_deref()?.model_type())
+    }
+
+    /// Reject partially nested `text_config` payloads that are unlikely to be
+    /// valid LLM configs for our current use.
+    ///
+    /// This keeps the simplified Rust-side parsing honest: if a model declares
+    /// `text_config`, it must at least look like a real text model config.
+    fn validate_text_config_selection(&self) -> Result<()> {
+        if let Some(text_config) = self.text_config.as_deref()
+            && text_config.num_attention_heads.is_none()
+        {
+            return Err(Error::Tokenizer(
+                "the text config extracted from the model config does not have `num_attention_heads`"
+                    .to_string(),
+            ));
+        }
+
+        Ok(())
+    }
+
+    /// Match Python's current expert-count priority on the selected text
+    /// config.
+    ///
+    /// The only intentional simplification here is how we pick the text config:
+    /// Rust only looks at the top level or `text_config`, not the broader
+    /// transformers composite-config surface.
+    fn num_experts_from_block_configs(&self) -> u32 {
+        self.effective_text_config()
+            .block_configs
+            .iter()
+            .filter(|block| block.block_type == "moe")
+            .map(|block| block.n_routed_experts)
+            .max()
+            .unwrap_or(0)
+    }
+
+    pub(super) fn num_experts(&self) -> u32 {
+        let config = self.effective_text_config();
+        let direct = [
+            config.num_experts.as_ref(),
+            config.moe_num_experts.as_ref(),
+            config.n_routed_experts.as_ref(),
+            config.num_local_experts.as_ref(),
+        ]
+        .into_iter()
+        .flatten()
+        .map(OneOrManyExpertCount::first_value)
+        .next()
+        .unwrap_or(0);
+
+        if direct > 0 {
+            direct
+        } else {
+            self.num_experts_from_block_configs()
+        }
+    }
+
+    pub(super) fn is_moe(&self) -> bool {
+        self.num_experts() > 0
+    }
+
+    pub(super) fn max_position_embeddings(&self) -> Option<u32> {
+        self.effective_text_config().max_position_embeddings
+    }
+}
+
+/// Load the tokenizer-side EOS metadata if a config file is present.
+pub fn load_tokenizer_config(path: Option<&Path>) -> Result<HfTokenizerConfig> {
+    read_json_file(path)
+}
+
+/// Load the generation-side EOS metadata if a config file is present.
+pub(super) fn load_generation_config(path: Option<&Path>) -> Result<GenerationConfig> {
+    read_json_file(path)
+}
+
+/// Load the model-side config (`config.json`) if present.
+pub fn load_model_config(path: Option<&Path>) -> Result<ModelConfig> {
+    let config: ModelConfig = read_json_file(path)?;
+    config.validate_text_config_selection()?;
+    Ok(config)
+}
+
+fn read_json_file<T>(path: Option<&Path>) -> Result<T>
+where
+    T: for<'de> Deserialize<'de> + Default,
+{
+    let Some(path) = path else {
+        return Ok(T::default());
+    };
+    let content = fs::read_to_string(path).map_err(|error| {
+        Error::Tokenizer(format!(
+            "failed to read {}: {}",
+            path.display(),
+            error.as_report()
+        ))
+    })?;
+    serde_json::from_str(&content).map_err(|error| {
+        Error::Tokenizer(format!(
+            "failed to parse {}: {}",
+            path.display(),
+            error.as_report()
+        ))
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ModelConfig;
+
+    #[test]
+    fn model_config_detects_moe_from_named_expert_fields() {
+        let field_names = [
+            "num_experts",
+            "moe_num_experts",
+            "n_routed_experts",
+            "num_local_experts",
+        ];
+
+        for field_name in field_names {
+            let config: ModelConfig =
+                serde_json::from_str(&format!(r#"{{"{field_name}": 64}}"#)).unwrap();
+            assert_eq!(config.num_experts(), 64, "field_name={field_name}");
+            assert!(config.is_moe(), "field_name={field_name}");
+        }
+    }
+
+    #[test]
+    fn model_config_uses_first_value_for_list_expert_counts() {
+        let config: ModelConfig = serde_json::from_str(r#"{"num_experts":[16,16]}"#).unwrap();
+
+        assert_eq!(config.num_experts(), 16);
+        assert!(config.is_moe());
+    }
+
+    #[test]
+    fn model_config_falls_back_to_block_configs_maximum() {
+        let config: ModelConfig = serde_json::from_str(
+            r#"{
+                "block_configs": [
+                    {"block_type":"attention","n_routed_experts":9},
+                    {"block_type":"moe","n_routed_experts":32},
+                    {"block_type":"moe","n_routed_experts":64}
+                ]
+            }"#,
+        )
+        .unwrap();
+
+        assert_eq!(config.num_experts(), 64);
+        assert!(config.is_moe());
+    }
+
+    #[test]
+    fn model_config_prefers_nested_text_config_like_python_hf_text_config() {
+        let config: ModelConfig = serde_json::from_str(
+            r#"{
+                "model_type": "top_level",
+                "num_experts": 64,
+                "max_position_embeddings": 8192,
+                "text_config": {
+                    "model_type": "nested",
+                    "num_attention_heads": 32,
+                    "num_local_experts": 8,
+                    "max_position_embeddings": 4096
+                }
+            }"#,
+        )
+        .unwrap();
+
+        assert_eq!(config.num_experts(), 8);
+        assert_eq!(config.model_type(), Some("top_level"));
+        assert_eq!(config.max_position_embeddings(), Some(4096));
+        assert!(config.is_moe());
+    }
+
+    #[test]
+    fn model_config_defaults_to_non_moe_when_no_expert_metadata_exists() {
+        let config: ModelConfig =
+            serde_json::from_str(r#"{"max_position_embeddings":4096}"#).unwrap();
+
+        assert_eq!(config.num_experts(), 0);
+        assert!(!config.is_moe());
+        assert_eq!(config.max_position_embeddings(), Some(4096));
+    }
+
+    #[test]
+    fn model_config_rejects_nested_text_config_without_attention_heads() {
+        let config: ModelConfig =
+            serde_json::from_str(r#"{"text_config":{"max_position_embeddings":4096}}"#).unwrap();
+
+        let error = config.validate_text_config_selection().unwrap_err();
+        assert!(error.to_string().contains("does not have `num_attention_heads`"),);
+    }
+}
diff --git a/rust/src/text/src/backend/hf/mod.rs b/rust/src/text/src/backend/hf/mod.rs
new file mode 100644
index 000000000000..a5d07dd8fc0b
--- /dev/null
+++ b/rust/src/text/src/backend/hf/mod.rs
@@ -0,0 +1,120 @@
+mod config;
+mod model_files;
+
+use std::collections::BTreeSet;
+use std::sync::Arc;
+
+use tracing::info;
+use vllm_tokenizer::{DynTokenizer, HuggingFaceTokenizer, TekkenTokenizer, TiktokenTokenizer};
+
+use self::config::{GenerationConfig, load_generation_config};
+pub use self::config::{
+    HfSpecialTokens, HfTokenizerConfig, ModelConfig, NamedSpecialToken, load_model_config,
+    load_tokenizer_config,
+};
+pub use self::model_files::{ResolvedModelFiles, TokenizerSource};
+use crate::backend::{SamplingHints, TextBackend};
+use crate::error::Result;
+
+fn load_tokenizer(tokenizer: &TokenizerSource) -> Result<DynTokenizer> {
+    match tokenizer {
+        TokenizerSource::HuggingFace(path) => Ok(Arc::new(HuggingFaceTokenizer::new(path)?)),
+        TokenizerSource::Tiktoken(path) => Ok(Arc::new(TiktokenTokenizer::new(path)?)),
+        TokenizerSource::Tekken(path) => Ok(Arc::new(TekkenTokenizer::new(path)?)),
+    }
+}
+
+/// [`TextBackend`] implementation built on Hugging Face model files.
+pub struct HfTextBackend {
+    model_id: String,
+    files: ResolvedModelFiles,
+    tokenizer: DynTokenizer,
+    /// Primary EOS handled by engine-core's dedicated EOS path.
+    primary_eos_token_id: Option<u32>,
+    /// Additional EOS ids that should flow through stop-token handling.
+    extra_eos_token_ids: BTreeSet<u32>,
+    /// Generation-config for sampling defaults that may be inherited when the
+    /// user does not explicitly override them.
+    generation_config: GenerationConfig,
+    /// Model config (`config.json`).
+    model_config: ModelConfig,
+}
+
+impl HfTextBackend {
+    /// Load the text backend with the given model id.
+    pub async fn from_model(model_id: &str) -> Result<Self> {
+        let files = ResolvedModelFiles::new(model_id).await?;
+        Self::from_resolved_model_files(files, model_id.to_string())
+    }
+
+    /// Load the text backend from resolved Hugging Face model files.
+    pub fn from_resolved_model_files(files: ResolvedModelFiles, model_id: String) -> Result<Self> {
+        let tokenizer_config = load_tokenizer_config(files.tokenizer_config_path.as_deref())?;
+        let tokenizer = load_tokenizer(&files.tokenizer)?;
+        let primary_eos_token_id = tokenizer_config
+            .special_tokens
+            .eos_token
+            .as_ref()
+            .and_then(|token| tokenizer.token_to_id(token.as_str()));
+
+        let model_config = load_model_config(files.config_path.as_deref())?;
+        let generation_config = load_generation_config(files.generation_config_path.as_deref())?;
+        let mut extra_eos_token_ids = generation_config
+            .eos_token_id
+            .clone()
+            .map(|value| value.into_set())
+            .unwrap_or_default();
+        if let Some(primary_eos_token_id) = primary_eos_token_id {
+            extra_eos_token_ids.remove(&primary_eos_token_id);
+        }
+
+        info!(
+            model_id,
+            "loaded text backend with Hugging Face model files"
+        );
+
+        Ok(Self {
+            model_id,
+            files,
+            tokenizer,
+            primary_eos_token_id,
+            extra_eos_token_ids,
+            generation_config,
+            model_config,
+        })
+    }
+
+    /// Expose the resolved model files for use by the chat backend to load the
+    /// chat template.
+    pub fn resolved_model_files(&self) -> &ResolvedModelFiles {
+        &self.files
+    }
+}
+
+impl TextBackend for HfTextBackend {
+    fn tokenizer(&self) -> DynTokenizer {
+        self.tokenizer.clone()
+    }
+
+    fn is_moe(&self) -> bool {
+        self.model_config.is_moe()
+    }
+
+    fn model_id(&self) -> &str {
+        &self.model_id
+    }
+
+    fn sampling_hints(&self) -> Result<SamplingHints> {
+        Ok(SamplingHints {
+            primary_eos_token_id: self.primary_eos_token_id,
+            extra_eos_token_ids: self.extra_eos_token_ids.clone(),
+            default_temperature: self.generation_config.temperature,
+            default_top_p: self.generation_config.top_p,
+            default_top_k: self.generation_config.top_k,
+            default_min_p: self.generation_config.min_p,
+            default_repetition_penalty: self.generation_config.repetition_penalty,
+            default_max_tokens: self.generation_config.max_new_tokens,
+            max_model_len: self.model_config.max_position_embeddings(),
+        })
+    }
+}
diff --git a/rust/src/text/src/backend/hf/model_files.rs b/rust/src/text/src/backend/hf/model_files.rs
new file mode 100644
index 000000000000..f4a66d30dae5
--- /dev/null
+++ b/rust/src/text/src/backend/hf/model_files.rs
@@ -0,0 +1,459 @@
+use std::path::{Path, PathBuf};
+
+use hf_hub::Cache;
+use hf_hub::api::tokio::{Api, ApiBuilder, ApiRepo};
+use thiserror_ext::AsReport as _;
+
+use super::config::{HfTokenizerConfig, load_tokenizer_config};
+use crate::error::{Error, Result};
+
+const HF_TOKEN_ENV: &str = "HF_TOKEN";
+
+/// The tokenizer source selected for a model.
+#[derive(Debug, Clone)]
+pub enum TokenizerSource {
+    /// Path to `tokenizer.json` in HuggingFace format.
+    HuggingFace(PathBuf),
+    /// Path to `tiktoken.model` or `*.tiktoken` file for tiktoken-based models.
+    Tiktoken(PathBuf),
+    /// Path to `tekken.json` when present (Mistral native tokenizer format).
+    ///
+    /// When set, the Tekken tokenizer should be preferred over the Hugging Face
+    /// tokenizer because the HuggingFace `tokenizer.json` for Mistral
+    /// models has a known regex bug that produces incorrect token IDs for
+    /// some inputs.
+    Tekken(PathBuf),
+}
+
+impl TokenizerSource {
+    pub fn path(&self) -> &Path {
+        match self {
+            Self::HuggingFace(path) | Self::Tiktoken(path) | Self::Tekken(path) => path,
+        }
+    }
+}
+
+/// Concrete tokenizer/config file locations resolved for one HF model id.
+#[derive(Debug, Clone)]
+pub struct ResolvedModelFiles {
+    /// The selected tokenizer source for this model.
+    pub tokenizer: TokenizerSource,
+    pub tokenizer_config_path: Option<PathBuf>,
+    pub generation_config_path: Option<PathBuf>,
+    pub preprocessor_config_path: Option<PathBuf>,
+    pub chat_template_path: Option<PathBuf>,
+    pub config_path: Option<PathBuf>,
+}
+
+impl ResolvedModelFiles {
+    /// Resolve tokenizer/config files from a local model directory first when
+    /// `model_id` points to one, otherwise consult the local HF cache and
+    /// finally the Hub.
+    pub async fn new(model_id: &str) -> Result<Self> {
+        if Path::new(model_id).is_dir() {
+            return resolve_local_model_files(Path::new(model_id));
+        }
+        if let Some(files) = resolve_cached_model_files(model_id)? {
+            return Ok(files);
+        }
+        resolve_remote_model_files(model_id).await
+    }
+}
+
+fn resolve_local_model_files(model_dir: &Path) -> Result<ResolvedModelFiles> {
+    let tokenizer_config_path = local_file_if_exists(model_dir, "tokenizer_config.json");
+    let tokenizer_config = load_tokenizer_config(tokenizer_config_path.as_deref())?;
+    let tokenizer = resolve_local_tokenizer_source(model_dir, &tokenizer_config)?;
+
+    Ok(ResolvedModelFiles {
+        tokenizer,
+        tokenizer_config_path,
+        generation_config_path: local_file_if_exists(model_dir, "generation_config.json"),
+        preprocessor_config_path: local_file_if_exists(model_dir, "preprocessor_config.json"),
+        chat_template_path: discover_chat_template_in_dir(model_dir),
+        config_path: local_file_if_exists(model_dir, "config.json"),
+    })
+}
+
+async fn resolve_remote_model_files(model_id: &str) -> Result<ResolvedModelFiles> {
+    let api = build_api().map_err(|error| Error::Tokenizer(error.to_report_string()))?;
+    let repo = api.model(model_id.to_string());
+    let info = repo.info().await.map_err(|error| {
+        Error::Tokenizer(format!(
+            "failed to fetch model '{model_id}': {}",
+            error.as_report()
+        ))
+    })?;
+
+    let siblings = info
+        .siblings
+        .iter()
+        .map(|sibling| sibling.rfilename.as_str())
+        .collect::<std::collections::BTreeSet<_>>();
+
+    let tokenizer_config_path =
+        download_if_present(&repo, model_id, &siblings, "tokenizer_config.json").await?;
+    let tokenizer_config = load_tokenizer_config(tokenizer_config_path.as_deref())?;
+
+    let tokenizer = resolve_remote_tokenizer_source(
+        &repo,
+        model_id,
+        &siblings,
+        tokenizer_config.tokenizer_class.as_deref(),
+    )
+    .await?;
+
+    let generation_config_path =
+        download_if_present(&repo, model_id, &siblings, "generation_config.json").await?;
+    let preprocessor_config_path =
+        download_if_present(&repo, model_id, &siblings, "preprocessor_config.json").await?;
+    let chat_template_name = siblings
+        .contains("chat_template.json")
+        .then_some("chat_template.json")
+        .or_else(|| siblings.contains("chat_template.jinja").then_some("chat_template.jinja"))
+        .or_else(|| siblings.iter().copied().find(|name| name.ends_with(".jinja")));
+    let chat_template_path = match chat_template_name {
+        Some(name) => Some(download_known_file(&repo, model_id, name).await?),
+        None => None,
+    };
+    let config_path = download_if_present(&repo, model_id, &siblings, "config.json").await?;
+
+    Ok(ResolvedModelFiles {
+        tokenizer,
+        tokenizer_config_path,
+        generation_config_path,
+        preprocessor_config_path,
+        chat_template_path,
+        config_path,
+    })
+}
+
+fn resolve_cached_model_files(model_id: &str) -> Result<Option<ResolvedModelFiles>> {
+    let cache_repo = Cache::from_env().model(model_id.to_string());
+
+    let tokenizer_config_path = cache_repo.get("tokenizer_config.json");
+    let tokenizer_config = load_tokenizer_config(tokenizer_config_path.as_deref())?;
+    let tokenizer = match resolve_cached_tokenizer_source(&cache_repo, &tokenizer_config)? {
+        Some(tokenizer) => tokenizer,
+        None => return Ok(None),
+    };
+
+    let model_dir = tokenizer.path().parent().ok_or_else(|| {
+        Error::Tokenizer("resolved tokenizer file has no parent directory".to_string())
+    })?;
+    let generation_config_path = cache_repo.get("generation_config.json");
+    let preprocessor_config_path = cache_repo.get("preprocessor_config.json");
+    let chat_template_path = discover_chat_template_in_dir(model_dir);
+    let config_path = cache_repo.get("config.json");
+
+    Ok(Some(ResolvedModelFiles {
+        tokenizer,
+        tokenizer_config_path,
+        generation_config_path,
+        preprocessor_config_path,
+        chat_template_path,
+        config_path,
+    }))
+}
+
+async fn resolve_remote_tokenizer_source(
+    repo: &ApiRepo,
+    model_id: &str,
+    siblings: &std::collections::BTreeSet<&str>,
+    tokenizer_class: Option<&str>,
+) -> Result<TokenizerSource> {
+    if let Some(tekken_path) = download_if_present(repo, model_id, siblings, "tekken.json").await? {
+        return Ok(TokenizerSource::Tekken(tekken_path));
+    }
+
+    let tokenizer_path = if siblings.contains("tokenizer.json") {
+        download_known_file(repo, model_id, "tokenizer.json").await?
+    } else if let Some(tiktoken_name) = find_tiktoken_sibling(siblings) {
+        download_known_file(repo, model_id, tiktoken_name).await?
+    } else {
+        return Err(Error::Tokenizer(format!(
+            "model '{model_id}' does not expose a supported tokenizer file \
+             (tokenizer.json, tiktoken.model, or *.tiktoken) on Hugging Face"
+        )));
+    };
+
+    Ok(resolve_tokenizer_source(
+        tokenizer_path,
+        tokenizer_class,
+        None,
+    ))
+}
+
+fn resolve_cached_tokenizer_source(
+    cache_repo: &hf_hub::CacheRepo,
+    tokenizer_config: &HfTokenizerConfig,
+) -> Result<Option<TokenizerSource>> {
+    let tekken_path = cache_repo.get("tekken.json");
+
+    if let Some(tekken_path) = tekken_path {
+        return Ok(Some(TokenizerSource::Tekken(tekken_path)));
+    }
+
+    let Some(tokenizer_path) = cache_repo.get("tokenizer.json").or_else(|| {
+        // tiktoken.model is the most common name, try it first.
+        cache_repo.get("tiktoken.model").or_else(|| {
+            // Scan for any *.tiktoken file in the cache snapshot directory.
+            let snapshot_dir = cache_repo.get("config.json")?.parent()?.to_path_buf();
+            discover_tiktoken_in_dir(&snapshot_dir)
+        })
+    }) else {
+        return Ok(None);
+    };
+
+    Ok(Some(resolve_tokenizer_source(
+        tokenizer_path,
+        tokenizer_config.tokenizer_class.as_deref(),
+        None,
+    )))
+}
+
+fn resolve_local_tokenizer_source(
+    model_dir: &Path,
+    tokenizer_config: &HfTokenizerConfig,
+) -> Result<TokenizerSource> {
+    let tekken_path = local_file_if_exists(model_dir, "tekken.json");
+    if let Some(tekken_path) = tekken_path {
+        return Ok(TokenizerSource::Tekken(tekken_path));
+    }
+
+    let tokenizer_path = local_file_if_exists(model_dir, "tokenizer.json")
+        .or_else(|| local_file_if_exists(model_dir, "tiktoken.model"))
+        .or_else(|| discover_tiktoken_in_dir(model_dir))
+        .ok_or_else(|| {
+            Error::Tokenizer(format!(
+                "local model directory '{}' does not contain a supported tokenizer file \
+                 (tokenizer.json, tiktoken.model, or *.tiktoken)",
+                model_dir.display()
+            ))
+        })?;
+
+    Ok(resolve_tokenizer_source(
+        tokenizer_path,
+        tokenizer_config.tokenizer_class.as_deref(),
+        None,
+    ))
+}
+
+/// Choose the tokenizer.
+///
+/// Selection order:
+/// 1. `tekken.json` — Mistral native tokenizer (preferred over HF `tokenizer.json` because the HF
+///    version has a known regex bug for Mistral models).
+/// 2. File extension — `.tiktoken` / `tiktoken.model` files use tiktoken from BPE data.
+/// 3. `tokenizer_class` in `tokenizer_config.json` — classes containing "Tiktoken" (case-
+///    insensitive) trigger tiktoken loading from a sibling BPE file.
+/// 4. Default — `tokenizer.json` in HuggingFace format.
+fn resolve_tokenizer_source(
+    tokenizer_path: PathBuf,
+    tokenizer_class: Option<&str>,
+    tekken_path: Option<PathBuf>,
+) -> TokenizerSource {
+    if let Some(tekken_path) = tekken_path {
+        return TokenizerSource::Tekken(tekken_path);
+    }
+
+    if is_tiktoken_file(&tokenizer_path) {
+        return TokenizerSource::Tiktoken(tokenizer_path);
+    }
+
+    if tokenizer_class.is_some_and(|cls| cls.to_ascii_lowercase().contains("tiktoken"))
+        && let Some(dir) = tokenizer_path.parent()
+        && let Some(tiktoken_path) = discover_tiktoken_in_dir(dir)
+    {
+        return TokenizerSource::Tiktoken(tiktoken_path);
+    }
+
+    TokenizerSource::HuggingFace(tokenizer_path)
+}
+
+/// Download `filename` only if it exists in `siblings`.
+async fn download_if_present(
+    repo: &ApiRepo,
+    model_id: &str,
+    siblings: &std::collections::BTreeSet<&str>,
+    filename: &str,
+) -> Result<Option<PathBuf>> {
+    match siblings.contains(filename) {
+        true => download_known_file(repo, model_id, filename).await.map(Some),
+        false => Ok(None),
+    }
+}
+
+async fn download_known_file(repo: &ApiRepo, model_id: &str, filename: &str) -> Result<PathBuf> {
+    repo.get(filename).await.map_err(|error| {
+        Error::Tokenizer(format!(
+            "failed to download '{filename}' for model '{model_id}': {}",
+            error.as_report()
+        ))
+    })
+}
+
+fn build_api() -> anyhow::Result<Api> {
+    let mut builder = ApiBuilder::from_env().with_progress(true);
+    if let Ok(token) = std::env::var(HF_TOKEN_ENV)
+        && !token.is_empty()
+    {
+        builder = builder.with_token(Some(token));
+    }
+    Ok(builder.build()?)
+}
+
+fn local_file_if_exists(dir: &Path, filename: &str) -> Option<PathBuf> {
+    let path = dir.join(filename);
+    path.is_file().then_some(path)
+}
+
+/// Find a tiktoken file name among repo siblings, preferring `tiktoken.model`.
+fn find_tiktoken_sibling<'a>(siblings: &std::collections::BTreeSet<&'a str>) -> Option<&'a str> {
+    if siblings.contains("tiktoken.model") {
+        return Some("tiktoken.model");
+    }
+    siblings.iter().copied().find(|name| name.ends_with(".tiktoken"))
+}
+
+/// Discover a tiktoken model file in a local directory.
+pub(super) fn discover_tiktoken_in_dir(dir: &std::path::Path) -> Option<PathBuf> {
+    let tiktoken_model = dir.join("tiktoken.model");
+    if tiktoken_model.exists() {
+        return Some(tiktoken_model);
+    }
+    std::fs::read_dir(dir).ok()?.flatten().find_map(|entry| {
+        let path = entry.path();
+        if path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .is_some_and(|n| n.ends_with(".tiktoken"))
+        {
+            Some(path)
+        } else {
+            None
+        }
+    })
+}
+
+/// Returns `true` if `path` points to a tiktoken-format file (by name).
+pub(super) fn is_tiktoken_file(path: &std::path::Path) -> bool {
+    path.file_name()
+        .and_then(|n| n.to_str())
+        .is_some_and(|name| name == "tiktoken.model" || name.ends_with(".tiktoken"))
+}
+
+/// Chat templates are sometimes stored as dedicated .jinja files rather than as
+/// a fixed-name config entry, so we scan the cached model dir.
+fn discover_chat_template_in_dir(dir: &std::path::Path) -> Option<PathBuf> {
+    let json_template_path = dir.join("chat_template.json");
+    if json_template_path.exists() {
+        return Some(json_template_path);
+    }
+
+    let jinja_path = dir.join("chat_template.jinja");
+    if jinja_path.exists() {
+        return Some(jinja_path);
+    }
+
+    std::fs::read_dir(dir).ok()?.flatten().map(|entry| entry.path()).find(|path| {
+        path.file_name()
+            .and_then(|name| name.to_str())
+            .is_some_and(|name| name.ends_with(".jinja"))
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use tempfile::tempdir;
+    use vllm_tokenizer::{TiktokenTokenizer, Tokenizer};
+
+    use super::{ResolvedModelFiles, TokenizerSource};
+
+    #[tokio::test]
+    async fn resolved_model_files_prefers_absolute_local_model_dir() {
+        let dir = tempdir().expect("create temp dir");
+        fs::write(dir.path().join("tokenizer.json"), "{}").expect("write tokenizer");
+        fs::write(
+            dir.path().join("tokenizer_config.json"),
+            r#"{"tokenizer_class":"PreTrainedTokenizerFast"}"#,
+        )
+        .expect("write tokenizer config");
+        fs::write(dir.path().join("config.json"), "{}").expect("write config");
+
+        let files = ResolvedModelFiles::new(dir.path().to_str().expect("utf8 path"))
+            .await
+            .expect("resolve local model files");
+
+        match files.tokenizer {
+            TokenizerSource::HuggingFace(path) => {
+                assert_eq!(path, dir.path().join("tokenizer.json"));
+            }
+            other => panic!("expected HuggingFace tokenizer, got {other:?}"),
+        }
+        assert_eq!(files.config_path, Some(dir.path().join("config.json")));
+        assert_eq!(
+            files.tokenizer_config_path,
+            Some(dir.path().join("tokenizer_config.json"))
+        );
+    }
+
+    #[tokio::test]
+    #[ignore = "requires network access to Hugging Face and downloads the real Kimi K2.5 tokenizer"]
+    async fn tiktoken_real_kimi_k25_tokenizer_files_load_and_handle_special_tokens() {
+        let files = ResolvedModelFiles::new("moonshotai/Kimi-K2.5")
+            .await
+            .expect("resolve real Kimi K2.5 model files");
+
+        let tokenizer_path = match &files.tokenizer {
+            TokenizerSource::Tiktoken(path) => path.clone(),
+            other => panic!("expected tiktoken tokenizer source, got {other:?}"),
+        };
+
+        for backend in [
+            TiktokenTokenizer::new_riptoken(&tokenizer_path).expect("load riptoken backend"),
+            TiktokenTokenizer::new_tiktoken_rs(&tokenizer_path).expect("load tiktoken-rs backend"),
+        ] {
+            let think_id = backend.token_to_id("<think>").expect("resolve <think>");
+            let end_think_id = backend.token_to_id("</think>").expect("resolve </think>");
+            let tool_section_id = backend
+                .token_to_id("<|tool_calls_section_begin|>")
+                .expect("resolve tool call section marker");
+            let contraction_heavy_text =
+                "I'm sure it's fine, but I can't say I'd trust that it's what we'd ship.";
+            let contraction_heavy_ids = backend.encode(contraction_heavy_text, false).unwrap();
+
+            assert_eq!(
+                (think_id, end_think_id, tool_section_id),
+                (163606, 163607, 163595)
+            );
+            assert_eq!(backend.decode(&[think_id], true).unwrap(), "<think>");
+            assert_eq!(backend.decode(&[end_think_id], true).unwrap(), "</think>");
+            assert_eq!(
+                backend.decode(&[tool_section_id], true).unwrap(),
+                "<|tool_calls_section_begin|>"
+            );
+
+            // This demonstrates that we're using Kimi's custom BPE pattern.
+            // With CL100K this will be 23 tokens instead.
+            assert_eq!(
+                contraction_heavy_ids,
+                vec![
+                    17172, 3287, 4643, 8201, 11, 996, 374, 8971, 3637, 20020, 8173, 473, 4643,
+                    1573, 56229, 13922, 13,
+                ]
+            );
+            assert_eq!(contraction_heavy_ids.len(), 17);
+            assert_eq!(
+                backend.decode(&contraction_heavy_ids, false).unwrap(),
+                contraction_heavy_text
+            );
+
+            // Special-looking text that is not actually registered should fail gracefully.
+            assert_eq!(backend.token_to_id("◁think▷"), None);
+            assert_eq!(backend.token_to_id("<|definitely_not_registered|>"), None);
+        }
+    }
+}
diff --git a/rust/src/text/src/backend/mod.rs b/rust/src/text/src/backend/mod.rs
new file mode 100644
index 000000000000..4f2d7093a757
--- /dev/null
+++ b/rust/src/text/src/backend/mod.rs
@@ -0,0 +1,47 @@
+pub mod hf;
+
+use std::sync::Arc;
+
+use vllm_tokenizer::DynTokenizer;
+
+use crate::error::Result;
+
+/// Tokenizer/model-derived hints used to enrich text-generation requests before
+/// they are lowered into engine-core.
+#[derive(Debug, Clone, Default, PartialEq)]
+pub struct SamplingHints {
+    pub primary_eos_token_id: Option<u32>,
+    pub extra_eos_token_ids: std::collections::BTreeSet<u32>,
+    pub default_temperature: Option<f32>,
+    pub default_top_p: Option<f32>,
+    pub default_top_k: Option<u32>,
+    pub default_min_p: Option<f32>,
+    pub default_repetition_penalty: Option<f32>,
+    pub default_max_tokens: Option<u32>,
+    /// Model context window size (`max_position_embeddings` from
+    /// `config.json`).
+    pub max_model_len: Option<u32>,
+}
+
+/// Minimal text-processing backend needed by `vllm-text`.
+pub trait TextBackend: Send + Sync {
+    /// Return the tokenizer used by this backend.
+    fn tokenizer(&self) -> DynTokenizer;
+
+    /// Return whether the loaded model is a mixture-of-experts model.
+    fn is_moe(&self) -> bool {
+        false
+    }
+
+    /// Return the backend model ID.
+    fn model_id(&self) -> &str;
+
+    /// Return tokenizer/model-derived hints used to enrich southbound sampling
+    /// parameters.
+    fn sampling_hints(&self) -> Result<SamplingHints> {
+        Ok(SamplingHints::default())
+    }
+}
+
+/// Shared trait-object form of [`TextBackend`].
+pub type DynTextBackend = Arc<dyn TextBackend>;
diff --git a/rust/src/text/src/error.rs b/rust/src/text/src/error.rs
new file mode 100644
index 000000000000..62e8e2ae98a5
--- /dev/null
+++ b/rust/src/text/src/error.rs
@@ -0,0 +1,30 @@
+use thiserror::Error;
+use vllm_engine_core_client::Error as EngineCoreError;
+use vllm_llm::Error as LlmError;
+
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("tokenizer error: {0}")]
+    Tokenizer(String),
+    #[error("text request `{request_id}` must contain at least one prompt token ID")]
+    EmptyPromptTokenIds { request_id: String },
+    #[error(
+        "this model's maximum context length is {max_model_len} tokens, \
+         but the prompt contains {prompt_len} input tokens"
+    )]
+    PromptTooLong { max_model_len: u32, prompt_len: u32 },
+    #[error("text request stream `{request_id}` closed before terminal output")]
+    StreamClosedBeforeTerminalOutput { request_id: String },
+    #[error(transparent)]
+    Llm(#[from] LlmError),
+    #[error(transparent)]
+    EngineCore(#[from] EngineCoreError),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl From<vllm_tokenizer::TokenizerError> for Error {
+    fn from(error: vllm_tokenizer::TokenizerError) -> Self {
+        Self::Tokenizer(error.0)
+    }
+}
diff --git a/rust/src/text/src/lib.rs b/rust/src/text/src/lib.rs
new file mode 100644
index 000000000000..ef5615ec6d42
--- /dev/null
+++ b/rust/src/text/src/lib.rs
@@ -0,0 +1,149 @@
+//! Shared text-generation support used by chat and future raw completions.
+//!
+//! This crate intentionally stays below chat semantics:
+//! prompt text handling, tokenizer/model loading, incremental detokenization,
+//! and the thin generate-facing backend interface live here.
+
+use std::mem::take;
+
+pub use backend::{DynTextBackend, SamplingHints, TextBackend};
+pub use error::{Error, Result};
+use futures::Stream;
+pub use lower::{
+    PreparedTextRequest, lower_sampling_params, lower_text_request, resolve_max_tokens,
+};
+pub use output::{
+    CollectedTextOutput, DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs,
+    DecodedTextEvent, DecodedTokenLogprob, Finished, TextDecodeOptions, TextOutputStreamExt,
+};
+pub use request::{Prompt, SamplingParams, TextRequest};
+use trait_set::trait_set;
+use vllm_engine_core_client::EngineCoreClient;
+pub use vllm_llm::FinishReason;
+use vllm_llm::{GenerateOutputStream, Llm};
+use vllm_tokenizer::DynTokenizer;
+
+pub mod backend;
+mod error;
+mod lower;
+pub mod output;
+mod request;
+pub use vllm_tokenizer as tokenizer;
+
+trait_set! {
+    /// Shared streamed text output type used by raw completions and other text-only northbound paths.
+    pub trait TextOutputStream = Stream<Item = Result<DecodedTextEvent>> + Send + 'static;
+}
+
+/// Raw text facade above [`Llm`].
+///
+/// This layer stays below chat semantics: prompt text or prompt token IDs flow
+/// in, decoded text deltas and terminal metadata flow out.
+pub struct TextLlm {
+    /// Generate-only client owned by this text facade.
+    llm: Llm,
+    /// Tokenizer/model metadata backend responsible for prompt encode/decode
+    /// and sampling hints.
+    backend: DynTextBackend,
+    /// Context window size derived by the backend or from engine startup
+    /// handshake, with optional override from config.
+    max_model_len: Option<u32>,
+}
+
+impl TextLlm {
+    /// Create a new text-generation facade from a shared LLM client plus a text
+    /// backend.
+    pub fn new(llm: Llm, backend: DynTextBackend) -> Self {
+        // Prefer the engine-reported max_model_len because it reflects the
+        // post-profiling, auto-fitted KV cache limit rather than static
+        // frontend metadata.
+        let max_model_len = llm.engine_core_client().max_model_len();
+
+        Self {
+            llm,
+            backend,
+            max_model_len,
+        }
+    }
+
+    /// Override the maximum model context length explicitly.
+    ///
+    /// This takes priority over both the engine-reported default and any
+    /// tokenizer/model metadata exposed by the backend.
+    pub fn with_max_model_len(mut self, max_model_len: u32) -> Self {
+        self.max_model_len = Some(max_model_len);
+        self
+    }
+
+    /// Return the backend model ID.
+    pub fn model_id(&self) -> &str {
+        self.backend.model_id()
+    }
+
+    /// Expose the underlying engine-core client for low-level utility/admin
+    /// calls.
+    pub fn engine_core_client(&self) -> &EngineCoreClient {
+        self.llm.engine_core_client()
+    }
+
+    /// Return the tokenizer used by this text backend.
+    pub fn tokenizer(&self) -> DynTokenizer {
+        self.backend.tokenizer()
+    }
+
+    /// Tokenize if needed, lower to a generate request, and return the raw
+    /// token stream.
+    pub async fn generate_raw(&self, request: TextRequest) -> Result<GenerateOutputStream> {
+        let (_, raw_stream) = self.generate_inner(request).await?;
+        Ok(raw_stream)
+    }
+
+    /// Tokenize if needed, lower to a generate request, and stream
+    /// incrementally decoded text.
+    pub async fn generate(&self, request: TextRequest) -> Result<impl TextOutputStream> {
+        let (text_request, raw_stream) = self.generate_inner(request).await?;
+        let tokenizer = self.backend.tokenizer();
+        let decoded_stream = output::decoded_text_event_stream(
+            text_request.request_id,
+            tokenizer,
+            raw_stream,
+            text_request.decode_options,
+            text_request.intermediate,
+        );
+
+        Ok(decoded_stream)
+    }
+
+    async fn generate_inner(
+        &self,
+        mut request: TextRequest,
+    ) -> Result<(TextRequest, GenerateOutputStream)> {
+        request.validate()?;
+
+        let tokenizer = self.backend.tokenizer();
+        let prompt_token_ids = match take(&mut request.prompt) {
+            Prompt::Text(text) => tokenizer.encode(&text, request.add_special_tokens)?,
+            // Pre-tokenized prompts are the main completions-side escape hatch that lets benchmark
+            // and infra workloads bypass chat rendering and tokenizer overhead entirely.
+            Prompt::TokenIds(token_ids) => token_ids,
+        };
+
+        let mut sampling_hints = self.backend.sampling_hints()?;
+        if let Some(max_model_len) = self.max_model_len {
+            sampling_hints.max_model_len = Some(max_model_len);
+        }
+        let PreparedTextRequest {
+            text_request,
+            generate_request,
+        } = lower_text_request(request, prompt_token_ids, sampling_hints, &*tokenizer)?;
+
+        let raw_stream = self.llm.generate(generate_request).await?;
+        Ok((text_request, raw_stream))
+    }
+
+    /// Shut down the underlying LLM client and its background tasks.
+    pub async fn shutdown(self) -> Result<()> {
+        self.llm.shutdown().await?;
+        Ok(())
+    }
+}
diff --git a/rust/src/text/src/lower.rs b/rust/src/text/src/lower.rs
new file mode 100644
index 000000000000..7cbbd53dc6aa
--- /dev/null
+++ b/rust/src/text/src/lower.rs
@@ -0,0 +1,739 @@
+use std::collections::BTreeSet;
+
+use vllm_engine_core_client::protocol::EngineCoreSamplingParams;
+use vllm_llm::GenerateRequest;
+use vllm_tokenizer::Tokenizer;
+
+use crate::backend::SamplingHints;
+use crate::error::{Error, Result};
+use crate::request::{SamplingParams, TextRequest};
+
+/// One text request after it has been lowered into the raw generate boundary.
+#[derive(Debug)]
+pub struct PreparedTextRequest {
+    /// The original high-level request, preserved for response-side metadata
+    /// and decoding options.
+    pub text_request: TextRequest,
+    /// The southbound request ready to be sent to `vllm-llm`.
+    pub generate_request: GenerateRequest,
+}
+
+/// Convert a high-level [`TextRequest`] into one lower-level
+/// [`GenerateRequest`] ready for the `llm` crate.
+pub fn lower_text_request(
+    request: TextRequest,
+    prompt_token_ids: Vec<u32>,
+    sampling_hints: SamplingHints,
+    tokenizer: &dyn Tokenizer,
+) -> Result<PreparedTextRequest> {
+    let prompt_len = prompt_token_ids.len() as u32;
+    let generate_request = GenerateRequest {
+        request_id: request.request_id.clone(),
+        prompt_token_ids,
+        mm_features: request.mm_features.clone(),
+        sampling_params: lower_sampling_params(
+            request.sampling_params.clone(),
+            sampling_hints,
+            prompt_len,
+            tokenizer,
+        )?,
+        cache_salt: request.cache_salt.clone(),
+        priority: request.priority,
+        data_parallel_rank: request.data_parallel_rank,
+        // Fields below are currently placeholders.
+        arrival_time: None,
+        trace_headers: None,
+        reasoning_ended: None,
+        lora_request: None,
+    };
+
+    Ok(PreparedTextRequest {
+        text_request: request,
+        generate_request,
+    })
+}
+
+/// Convert [`SamplingParams`] into [`EngineCoreSamplingParams`], enriching
+/// omitted user values with tokenizer/model-derived hints when available.
+pub fn lower_sampling_params(
+    sampling_params: SamplingParams,
+    SamplingHints {
+        primary_eos_token_id,
+        extra_eos_token_ids,
+        default_temperature,
+        default_top_p,
+        default_top_k,
+        default_min_p,
+        default_repetition_penalty,
+        default_max_tokens,
+        max_model_len,
+    }: SamplingHints,
+    prompt_len: u32,
+    tokenizer: &dyn Tokenizer,
+) -> Result<EngineCoreSamplingParams> {
+    let SamplingParams {
+        temperature,
+        top_p,
+        top_k,
+        seed,
+        max_tokens,
+        min_tokens,
+        logprobs,
+        prompt_logprobs,
+        min_p,
+        frequency_penalty,
+        presence_penalty,
+        repetition_penalty,
+        stop_token_ids,
+        ignore_eos,
+        logit_bias,
+        allowed_token_ids,
+        bad_words,
+        logprob_token_ids,
+        structured_outputs,
+        skip_reading_prefix_cache,
+        vllm_xargs,
+    } = sampling_params;
+
+    // Mirrors the model-generation-config inheritance used by vLLM's OpenAI chat
+    // path: https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/entrypoints/openai/chat_completion/protocol.py#L424-L450
+    // If neither the caller nor the model provides a value, fall back to 1.0 — the
+    // default used by the Python vLLM OpenAI-compatible API (via
+    // `_DEFAULT_SAMPLING_PARAMS`).
+    let temperature = temperature.or(default_temperature).unwrap_or(1.0);
+    let top_p = top_p.or(default_top_p).unwrap_or(1.0);
+    let top_k = top_k.or(default_top_k).unwrap_or(0);
+    let min_p = min_p.or(default_min_p).unwrap_or(0.0);
+    let repetition_penalty = repetition_penalty.or(default_repetition_penalty).unwrap_or(1.0);
+    let max_tokens = resolve_max_tokens(max_tokens, default_max_tokens, max_model_len, prompt_len)?;
+    let min_tokens = min_tokens.unwrap_or(0);
+    let frequency_penalty = frequency_penalty.unwrap_or(0.0);
+    let presence_penalty = presence_penalty.unwrap_or(0.0);
+
+    let mut stop_token_ids = stop_token_ids.unwrap_or_default();
+    let mut all_stop_token_ids = BTreeSet::from_iter(stop_token_ids.iter().copied());
+    if let Some(primary_eos_token_id) = primary_eos_token_id {
+        all_stop_token_ids.insert(primary_eos_token_id);
+    }
+    all_stop_token_ids.extend(extra_eos_token_ids.iter().copied());
+
+    if !ignore_eos {
+        merge_unique_token_ids(&mut stop_token_ids, extra_eos_token_ids.iter().copied());
+    }
+
+    Ok(EngineCoreSamplingParams {
+        temperature,
+        top_p,
+        top_k,
+        seed,
+        max_tokens,
+        min_tokens,
+        logprobs,
+        prompt_logprobs,
+        min_p,
+        frequency_penalty,
+        presence_penalty,
+        repetition_penalty,
+        stop_token_ids,
+        eos_token_id: (!ignore_eos).then_some(primary_eos_token_id).flatten(),
+        all_stop_token_ids,
+        logit_bias,
+        allowed_token_ids,
+        bad_words_token_ids: tokenize_bad_words(bad_words.as_deref(), tokenizer)?,
+        structured_outputs,
+        logprob_token_ids,
+        skip_reading_prefix_cache,
+        extra_args: vllm_xargs,
+    })
+}
+
+/// Convert bad-word strings into token-ID sequences, following the Python vLLM
+/// logic in `SamplingParams.update_from_tokenizer()`.
+///
+/// Each word is encoded both with and without a leading space so that the ban
+/// applies regardless of whether the word appears at the beginning or in the
+/// middle of generated text (this accounts for tokenizers that use an
+/// `add_prefix_space` convention).
+///
+/// Reference: <https://github.com/vllm-project/vllm/blob/f22d6e026/vllm/sampling_params.py#L555-L594>
+fn tokenize_bad_words(
+    bad_words: Option<&[String]>,
+    tokenizer: &dyn Tokenizer,
+) -> Result<Option<Vec<Vec<u32>>>> {
+    let bad_words = bad_words.filter(|w| !w.is_empty());
+    let mut all_token_ids = Vec::new();
+
+    for bad_word in bad_words.into_iter().flatten() {
+        // Without a leading space we always keep the encoding.
+        // With a leading space we only keep it when the prefix-space variant produces a
+        // distinct first token but the same sequence length — this mirrors the Python
+        // dedup condition that avoids redundant entries.
+        let without_space = tokenizer.encode(bad_word, false)?;
+        let with_space = tokenizer.encode(&format!(" {}", bad_word.trim_start()), false)?;
+
+        if !without_space.is_empty() {
+            all_token_ids.push(without_space);
+        }
+        if !with_space.is_empty()
+            && all_token_ids.last().is_some_and(|prev: &Vec<u32>| {
+                with_space[0] != prev[0] && with_space.len() == prev.len()
+            })
+        {
+            all_token_ids.push(with_space);
+        }
+    }
+
+    Ok((!all_token_ids.is_empty()).then_some(all_token_ids))
+}
+
+/// Resolve the effective `max_tokens` for generation, mirroring vLLM Python's
+/// `get_max_tokens()` in `vllm/entrypoints/utils.py`.
+///
+/// Takes the minimum of all available limits (user-specified, generation-config
+/// default, and `max_model_len - prompt_len`). When nothing is known, falls
+/// back to `u32::MAX` so the engine-core can apply its own context-window
+/// limit.
+pub fn resolve_max_tokens(
+    user_max_tokens: Option<u32>,
+    default_max_tokens: Option<u32>,
+    max_model_len: Option<u32>,
+    prompt_len: u32,
+) -> Result<u32> {
+    let model_max_tokens = match max_model_len {
+        Some(max_model_len) if prompt_len >= max_model_len => {
+            return Err(Error::PromptTooLong {
+                max_model_len,
+                prompt_len,
+            });
+        }
+        Some(max_model_len) => Some(max_model_len - prompt_len),
+        None => None,
+    };
+
+    let fallback_max_tokens = user_max_tokens.or(default_max_tokens);
+    Ok([fallback_max_tokens, model_max_tokens]
+        .into_iter()
+        .flatten()
+        .min()
+        .unwrap_or(u32::MAX /* TODO: a reasonable fallback? */))
+}
+
+fn merge_unique_token_ids(
+    stop_token_ids: &mut Vec<u32>,
+    extra_token_ids: impl Iterator<Item = u32>,
+) {
+    // Keep user-provided ordering stable while still folding in backend-derived EOS
+    // aliases.
+    for token_id in extra_token_ids {
+        if !stop_token_ids.contains(&token_id) {
+            stop_token_ids.push(token_id);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::BTreeSet;
+
+    use super::*;
+    use crate::backend::hf::HfTextBackend;
+    use crate::backend::{SamplingHints, TextBackend as _};
+    use crate::request::{Prompt, TextRequest};
+
+    /// Stub tokenizer that returns empty token IDs — sufficient for tests that
+    /// don't exercise bad-words tokenization.
+    struct StubTokenizer;
+
+    impl Tokenizer for StubTokenizer {
+        fn encode(
+            &self,
+            _text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<Vec<u32>> {
+            Ok(vec![])
+        }
+
+        fn decode(
+            &self,
+            _token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<String> {
+            Ok(String::new())
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            None
+        }
+    }
+
+    fn stub_tokenizer() -> StubTokenizer {
+        StubTokenizer
+    }
+
+    fn sample_request() -> TextRequest {
+        TextRequest {
+            prompt: Prompt::TokenIds(vec![1, 2, 3]),
+            request_id: "text-1".to_string(),
+            ..TextRequest::for_test()
+        }
+    }
+
+    fn sample_sampling_hints() -> SamplingHints {
+        SamplingHints {
+            primary_eos_token_id: Some(99),
+            extra_eos_token_ids: BTreeSet::from([77]),
+            default_temperature: None,
+            default_top_p: None,
+            default_top_k: None,
+            default_min_p: None,
+            default_repetition_penalty: None,
+            default_max_tokens: None,
+            max_model_len: None,
+        }
+    }
+
+    #[test]
+    fn lower_text_request_applies_python_style_eos_hints() {
+        let prepared = lower_text_request(
+            sample_request(),
+            vec![1, 2, 3],
+            sample_sampling_hints(),
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        let params = prepared.generate_request.sampling_params;
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 1.0,
+                top_p: 1.0,
+                top_k: 0,
+                seed: None,
+                max_tokens: 4294967295,
+                min_tokens: 0,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.0,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.0,
+                stop_token_ids: [
+                    77,
+                ],
+                eos_token_id: Some(
+                    99,
+                ),
+                all_stop_token_ids: {
+                    77,
+                    99,
+                },
+                logit_bias: None,
+                allowed_token_ids: None,
+                bad_words_token_ids: None,
+                structured_outputs: None,
+                logprob_token_ids: None,
+                skip_reading_prefix_cache: None,
+                extra_args: None,
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[test]
+    fn lower_text_request_respects_ignore_eos_for_stop_token_ids() {
+        let mut request = sample_request();
+        request.sampling_params.ignore_eos = true;
+
+        let prepared = lower_text_request(
+            request,
+            vec![1, 2, 3],
+            sample_sampling_hints(),
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        let params = prepared.generate_request.sampling_params;
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 1.0,
+                top_p: 1.0,
+                top_k: 0,
+                seed: None,
+                max_tokens: 4294967295,
+                min_tokens: 0,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.0,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.0,
+                stop_token_ids: [],
+                eos_token_id: None,
+                all_stop_token_ids: {
+                    77,
+                    99,
+                },
+                logit_bias: None,
+                allowed_token_ids: None,
+                bad_words_token_ids: None,
+                structured_outputs: None,
+                logprob_token_ids: None,
+                skip_reading_prefix_cache: None,
+                extra_args: None,
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[tokio::test]
+    #[ignore = "requires network access to Hugging Face"]
+    async fn lower_text_request_uses_real_qwen_generation_defaults() {
+        let backend = HfTextBackend::from_model("Qwen/Qwen3-0.6B")
+            .await
+            .expect("load qwen tokenizer and generation config");
+        let hints = backend.sampling_hints().expect("collect sampling hints");
+
+        expect_test::expect![[r#"
+            SamplingHints {
+                primary_eos_token_id: Some(
+                    151645,
+                ),
+                extra_eos_token_ids: {
+                    151643,
+                },
+                default_temperature: Some(
+                    0.6,
+                ),
+                default_top_p: Some(
+                    0.95,
+                ),
+                default_top_k: Some(
+                    20,
+                ),
+                default_min_p: Some(
+                    0.1,
+                ),
+                default_repetition_penalty: Some(
+                    1.2,
+                ),
+                default_max_tokens: None,
+                max_model_len: Some(
+                    40960,
+                ),
+            }
+        "#]]
+        .assert_debug_eq(&hints);
+
+        let prepared =
+            lower_text_request(sample_request(), vec![1, 2, 3], hints, &stub_tokenizer())
+                .expect("lower request");
+        let params = prepared.generate_request.sampling_params;
+
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 0.6,
+                top_p: 0.95,
+                top_k: 20,
+                seed: None,
+                max_tokens: 40957,
+                min_tokens: 0,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.1,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.2,
+                stop_token_ids: [
+                    151643,
+                ],
+                eos_token_id: Some(
+                    151645,
+                ),
+                all_stop_token_ids: {
+                    151643,
+                    151645,
+                },
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[test]
+    fn lower_sampling_params_preserves_explicit_stop_token_ids_in_all_stop_set() {
+        let sampling_params = SamplingParams {
+            stop_token_ids: Some(vec![11, 77]),
+            ..SamplingParams::default()
+        };
+
+        let params = lower_sampling_params(
+            sampling_params,
+            SamplingHints {
+                primary_eos_token_id: Some(99),
+                extra_eos_token_ids: BTreeSet::from([77, 88]),
+                default_temperature: None,
+                default_top_p: None,
+                default_top_k: None,
+                default_min_p: None,
+                default_repetition_penalty: None,
+                default_max_tokens: None,
+                max_model_len: None,
+            },
+            3,
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 1.0,
+                top_p: 1.0,
+                top_k: 0,
+                seed: None,
+                max_tokens: 4294967295,
+                min_tokens: 0,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.0,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.0,
+                stop_token_ids: [
+                    11,
+                    77,
+                    88,
+                ],
+                eos_token_id: Some(
+                    99,
+                ),
+                all_stop_token_ids: {
+                    11,
+                    77,
+                    88,
+                    99,
+                },
+                logit_bias: None,
+                allowed_token_ids: None,
+                bad_words_token_ids: None,
+                structured_outputs: None,
+                logprob_token_ids: None,
+                skip_reading_prefix_cache: None,
+                extra_args: None,
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[test]
+    fn lower_sampling_params_prefers_user_values_over_generation_defaults() {
+        let sampling_params = SamplingParams {
+            temperature: Some(0.2),
+            top_p: Some(0.3),
+            top_k: Some(4),
+            max_tokens: Some(32),
+            min_tokens: Some(2),
+            ..Default::default()
+        };
+
+        let params = lower_sampling_params(
+            sampling_params,
+            SamplingHints {
+                primary_eos_token_id: None,
+                extra_eos_token_ids: BTreeSet::new(),
+                default_temperature: Some(0.8),
+                default_top_p: Some(0.9),
+                default_top_k: Some(12),
+                default_min_p: Some(0.1),
+                default_repetition_penalty: Some(1.2),
+                default_max_tokens: Some(128),
+                max_model_len: None,
+            },
+            3,
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 0.2,
+                top_p: 0.3,
+                top_k: 4,
+                seed: None,
+                max_tokens: 32,
+                min_tokens: 2,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.1,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.2,
+                stop_token_ids: [],
+                eos_token_id: None,
+                all_stop_token_ids: {},
+                logit_bias: None,
+                allowed_token_ids: None,
+                bad_words_token_ids: None,
+                structured_outputs: None,
+                logprob_token_ids: None,
+                skip_reading_prefix_cache: None,
+                extra_args: None,
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[test]
+    fn lower_sampling_params_passes_logprobs_fields_through() {
+        let sampling_params = SamplingParams {
+            logprobs: Some(3),
+            prompt_logprobs: Some(-1),
+            ..Default::default()
+        };
+
+        let params = lower_sampling_params(
+            sampling_params,
+            SamplingHints {
+                primary_eos_token_id: None,
+                extra_eos_token_ids: BTreeSet::new(),
+                default_temperature: None,
+                default_top_p: None,
+                default_top_k: None,
+                default_min_p: None,
+                default_repetition_penalty: None,
+                default_max_tokens: None,
+                max_model_len: None,
+            },
+            3,
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        assert_eq!(params.logprobs, Some(3));
+        assert_eq!(params.prompt_logprobs, Some(-1));
+    }
+
+    #[test]
+    fn lower_sampling_params_uses_generation_defaults_when_user_omits_values() {
+        let params = lower_sampling_params(
+            SamplingParams::default(),
+            SamplingHints {
+                primary_eos_token_id: None,
+                extra_eos_token_ids: BTreeSet::new(),
+                default_temperature: Some(0.8),
+                default_top_p: Some(0.9),
+                default_top_k: Some(12),
+                default_min_p: Some(0.1),
+                default_repetition_penalty: Some(1.2),
+                default_max_tokens: Some(128),
+                max_model_len: None,
+            },
+            3,
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        expect_test::expect![[r#"
+            EngineCoreSamplingParams {
+                temperature: 0.8,
+                top_p: 0.9,
+                top_k: 12,
+                seed: None,
+                max_tokens: 128,
+                min_tokens: 0,
+                logprobs: None,
+                prompt_logprobs: None,
+                min_p: 0.1,
+                frequency_penalty: 0.0,
+                presence_penalty: 0.0,
+                repetition_penalty: 1.2,
+                stop_token_ids: [],
+                eos_token_id: None,
+                all_stop_token_ids: {},
+                logit_bias: None,
+                allowed_token_ids: None,
+                bad_words_token_ids: None,
+                structured_outputs: None,
+                logprob_token_ids: None,
+                skip_reading_prefix_cache: None,
+                extra_args: None,
+            }
+        "#]]
+        .assert_debug_eq(&params);
+    }
+
+    #[test]
+    fn resolve_max_tokens_caps_by_model_len() {
+        let result = resolve_max_tokens(Some(150), None, Some(200), 100);
+        assert_eq!(result.unwrap(), 100);
+    }
+
+    #[test]
+    fn lower_text_request_preserves_non_streaming_request_metadata() {
+        let mut request = sample_request();
+        request.intermediate = false;
+
+        let prepared = lower_text_request(
+            request,
+            vec![1, 2, 3],
+            sample_sampling_hints(),
+            &stub_tokenizer(),
+        )
+        .unwrap();
+
+        assert!(!prepared.text_request.intermediate);
+        assert_eq!(prepared.generate_request.request_id, "text-1");
+    }
+
+    #[test]
+    fn resolve_max_tokens_user_smaller_than_model_limit() {
+        let result = resolve_max_tokens(Some(50), None, Some(200), 100);
+        assert_eq!(result.unwrap(), 50);
+    }
+
+    #[test]
+    fn resolve_max_tokens_uses_default_when_user_omits() {
+        let result = resolve_max_tokens(None, Some(64), Some(200), 100);
+        assert_eq!(result.unwrap(), 64);
+    }
+
+    #[test]
+    fn resolve_max_tokens_default_capped_by_model_len() {
+        let result = resolve_max_tokens(None, Some(256), Some(200), 100);
+        assert_eq!(result.unwrap(), 100);
+    }
+
+    #[test]
+    fn resolve_max_tokens_no_model_len_falls_back() {
+        let result = resolve_max_tokens(Some(9999), None, None, 100);
+        assert_eq!(result.unwrap(), 9999);
+    }
+
+    #[test]
+    fn resolve_max_tokens_no_limits_known_falls_back_to_u32_max() {
+        let result = resolve_max_tokens(None, None, None, 100);
+        assert_eq!(result.unwrap(), u32::MAX);
+    }
+
+    #[test]
+    fn resolve_max_tokens_prompt_too_long() {
+        let result = resolve_max_tokens(Some(10), None, Some(100), 100);
+        assert!(matches!(
+            result,
+            Err(Error::PromptTooLong {
+                max_model_len: 100,
+                prompt_len: 100,
+            })
+        ));
+    }
+
+    #[test]
+    fn resolve_max_tokens_prompt_exceeds_model_len() {
+        let result = resolve_max_tokens(Some(10), None, Some(100), 200);
+        assert!(matches!(
+            result,
+            Err(Error::PromptTooLong {
+                max_model_len: 100,
+                prompt_len: 200,
+            })
+        ));
+    }
+}
diff --git a/rust/src/text/src/output/decoded.rs b/rust/src/text/src/output/decoded.rs
new file mode 100644
index 000000000000..2ebc6f385321
--- /dev/null
+++ b/rust/src/text/src/output/decoded.rs
@@ -0,0 +1,607 @@
+use std::sync::Arc;
+
+use asynk_strim_attr::{TryYielder, try_stream};
+use futures::{Stream, StreamExt};
+use serde::{Deserialize, Serialize};
+use tracing::{Level, debug, trace};
+use vllm_engine_core_client::AbortCause;
+use vllm_engine_core_client::protocol::StopReason;
+use vllm_llm::{FinishReason, GenerateOutput};
+use vllm_tokenizer::{DynTokenizer, IncrementalDecoder};
+
+use super::logprobs::{
+    DecodedLogprobs, DecodedPromptLogprobs, decode_logprobs, decode_prompt_logprobs,
+};
+use crate::error::Error;
+
+/// Request-neutral options for incremental text decoding.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TextDecodeOptions {
+    pub skip_special_tokens: bool,
+    pub include_stop_str_in_output: bool,
+    pub stop_strings: Option<Vec<String>>,
+    /// Minimum number of tokens to generate before stop-string checking kicks
+    /// in. Stop strings found within the first `min_tokens` tokens are
+    /// ignored.
+    pub min_tokens: u32,
+}
+
+impl Default for TextDecodeOptions {
+    fn default() -> Self {
+        Self {
+            skip_special_tokens: true,
+            include_stop_str_in_output: false,
+            stop_strings: None,
+            min_tokens: 0,
+        }
+    }
+}
+
+/// Terminal metadata carried on the final [`DecodedTextEvent`].
+#[derive(Debug, Clone, PartialEq)]
+pub struct Finished {
+    pub prompt_token_count: usize,
+    pub output_token_count: usize,
+    pub finish_reason: FinishReason,
+    /// Connector-specific KV transfer parameters for disaggregated serving.
+    pub kv_transfer_params: Option<serde_json::Value>,
+}
+
+/// Internal decoded-text event emitted before higher-level assistant
+/// adaptation.
+#[derive(Debug, Clone, PartialEq)]
+pub enum DecodedTextEvent {
+    /// The request has reached the point where prompt-scoped decoding metadata
+    /// is ready.
+    Start {
+        /// The actual prompt token IDs for this request.
+        prompt_token_ids: Arc<[u32]>,
+        /// Once-only prompt logprobs metadata, when requested.
+        ///
+        /// The first prompt token is carried separately because it has no left
+        /// context to score against; `scored_positions` covers the
+        /// remaining prompt positions.
+        prompt_logprobs: Option<DecodedPromptLogprobs>,
+    },
+    /// A delta of text has been decoded, optionally alongside token-position
+    /// logprobs.
+    ///
+    /// `delta` is the newly visible decoded text fragment for this update.
+    ///
+    /// `logprobs` covers the newly generated token positions from the same
+    /// update, but is not guaranteed to align with `delta` by character
+    /// span. One update may carry token logprobs but no newly visible text
+    /// yet, and one visible text fragment may reflect multiple token
+    /// positions becoming decodable together.
+    ///
+    /// Upper-level may further parse `delta` as reasoning or tool calls.
+    ///
+    /// When `finished` is `Some`, this is the terminal event for the request.
+    TextDelta {
+        delta: String,
+        token_ids: Vec<u32>,
+        logprobs: Option<DecodedLogprobs>,
+        finished: Option<Finished>,
+    },
+}
+
+/// Convert the output token stream from the `vllm_llm` layer into incrementally
+/// decoded text.
+#[try_stream]
+pub async fn decoded_text_event_stream(
+    request_id: String,
+    tokenizer: DynTokenizer,
+    mut raw_stream: impl Stream<Item = vllm_llm::Result<GenerateOutput>> + Unpin,
+    mut decode_options: TextDecodeOptions,
+    intermediate: bool,
+    mut y: TryYielder<DecodedTextEvent, Error>,
+) -> crate::Result<()> {
+    let mut decoder: Option<Box<dyn IncrementalDecoder>> = None;
+    let mut prompt_token_count = 0_usize;
+    let mut token_ids = Vec::new();
+    let mut output_token_count: usize = 0;
+    let mut logprobs: Option<DecodedLogprobs> = None;
+
+    while let Some(next) = raw_stream.next().await {
+        let output = next?;
+
+        // If it's the first output, init states and yield `Start` event.
+        if decoder.is_none() {
+            let prompt_token_ids =
+                output.prompt_token_ids().expect("first llm output must carry prompt token ids");
+            prompt_token_count = prompt_token_ids.len();
+
+            let dec = tokenizer.create_decode_stream(
+                prompt_token_ids,
+                decode_options.skip_special_tokens,
+                // If we are excluding stop strings from output, we need to buffer
+                // the output so that we don't return the beginning of a stop string
+                // when streaming the outputs.
+                match decode_options.include_stop_str_in_output {
+                    true => 0,
+                    false => {
+                        decode_options
+                            .stop_strings
+                            .as_ref()
+                            .and_then(|stops| stops.iter().map(|ss| ss.len()).max())
+                            .unwrap_or(1)
+                            - 1
+                    }
+                },
+            );
+            decoder = Some(dec);
+
+            y.yield_ok(DecodedTextEvent::Start {
+                prompt_token_ids: prompt_token_ids.clone(),
+                prompt_logprobs: output
+                    .prompt_logprobs()
+                    .map(|logprobs| {
+                        decode_prompt_logprobs(
+                            tokenizer.as_ref(),
+                            prompt_token_ids,
+                            logprobs,
+                            decode_options.skip_special_tokens,
+                        )
+                    })
+                    .transpose()?,
+            })
+            .await;
+        };
+        let decoder = decoder.as_mut().unwrap();
+
+        let kv_transfer_params = output.kv_transfer_params;
+        let mut finish_reason = output.finish_reason;
+        let mut stop_str_matched = false;
+        let suppress_terminal_stop_token = finish_reason.as_ref().is_some_and(|r| r.is_stop())
+            && !decode_options.include_stop_str_in_output;
+        let decodable_token_ids = if suppress_terminal_stop_token {
+            // Match Python V1 token-stop detokenization by keeping the stop token
+            // in metadata while excluding it from user-visible text.
+            output.token_ids.split_last().map(|(_, rest)| rest).unwrap_or(&[])
+        } else {
+            &output.token_ids
+        };
+
+        let mut delta: Option<String> = None;
+        let mut truncate_output_to = None;
+        let mut truncate_tokens_to = None;
+        for (tok_idx, &token_id) in decodable_token_ids.iter().enumerate() {
+            let new_bytes = decoder.push_token(token_id)?;
+            if output_token_count + tok_idx + 1 > decode_options.min_tokens as usize
+                && let Some(stops) = decode_options.stop_strings.as_mut()
+                && let Some((idx, off)) = matches_stop_string(stops, decoder.output(), new_bytes)
+            {
+                let stop_str = stops.swap_remove(idx);
+                truncate_output_to = match decode_options.include_stop_str_in_output {
+                    true => Some(off + stop_str.len()),
+                    false => Some(off),
+                };
+                finish_reason = Some(FinishReason::Stop(Some(StopReason::Text(stop_str))));
+                truncate_tokens_to = Some(tok_idx + 1);
+                stop_str_matched = true;
+
+                break;
+            }
+
+            if intermediate && let Some(chunk) = decoder.next_chunk() {
+                if let Some(delta_str) = delta.as_mut() {
+                    delta_str.push_str(&chunk);
+                } else {
+                    delta = Some(chunk);
+                }
+            }
+        }
+
+        let mut new_token_ids = output.token_ids;
+        let mut new_logprobs = output.logprobs;
+
+        // Trim tokens and logprobs if we matched stop string.
+        if let Some(num_tokens) = truncate_tokens_to {
+            new_token_ids.truncate(num_tokens);
+            if let Some(logprobs) = &mut new_logprobs {
+                logprobs.positions.truncate(num_tokens);
+            }
+        }
+
+        output_token_count += new_token_ids.len();
+
+        let decoded_logprobs = new_logprobs
+            .as_ref()
+            .map(|logprobs| {
+                decode_logprobs(
+                    tokenizer.as_ref(),
+                    logprobs,
+                    decode_options.skip_special_tokens,
+                )
+            })
+            .transpose()?;
+
+        if !intermediate {
+            token_ids.extend(&new_token_ids);
+            if let Some(dlp) = decoded_logprobs.as_ref() {
+                logprobs
+                    .get_or_insert_with(|| DecodedLogprobs { positions: vec![] })
+                    .positions
+                    .extend_from_slice(&dlp.positions);
+            }
+        }
+
+        if let Some(reason) = finish_reason {
+            // Flush any remaining buffered text.
+            let (last_chunk, mut text) = decoder.flush(truncate_output_to)?;
+            let text_len = text.len();
+            let full_text = tracing::enabled!(Level::TRACE).then(|| text.clone());
+
+            if intermediate {
+                if let Some(chunk) = last_chunk {
+                    if let Some(delta_str) = delta.as_mut() {
+                        delta_str.push_str(&chunk);
+                    } else {
+                        delta = Some(chunk);
+                    }
+                }
+                token_ids = new_token_ids;
+                logprobs = decoded_logprobs;
+                text = delta.unwrap_or_default();
+            }
+
+            debug!(
+                finish_reason = ?reason,
+                text_length_bytes = text_len,
+                output_token_count = output_token_count,
+                "request finished with terminal output"
+            );
+            if let Some(full_text) = full_text {
+                trace!(full_text, "request finished with terminal decoded text");
+            }
+
+            // Intentionally drop the stream with explicit cause, so that the engine core
+            // can distinguish between such normal completion vs an unexpected
+            // early drop.
+            if stop_str_matched {
+                AbortCause::StopStringMatched.drop_as(raw_stream);
+            }
+
+            y.yield_ok(DecodedTextEvent::TextDelta {
+                delta: text,
+                token_ids,
+                logprobs,
+                finished: Some(Finished {
+                    prompt_token_count,
+                    output_token_count,
+                    finish_reason: reason,
+                    kv_transfer_params,
+                }),
+            })
+            .await;
+            return Ok(());
+        }
+
+        if intermediate {
+            y.yield_ok(DecodedTextEvent::TextDelta {
+                delta: delta.unwrap_or_default(),
+                token_ids: new_token_ids,
+                logprobs: decoded_logprobs,
+                finished: None,
+            })
+            .await;
+        }
+    }
+
+    Err(Error::StreamClosedBeforeTerminalOutput { request_id })
+}
+
+/// If stop string matches, returns tuple
+/// (index into stop string vec, byte index of first byte of stop string in
+/// output)
+fn matches_stop_string(stops: &[String], output: &str, new_bytes: usize) -> Option<(usize, usize)> {
+    // We compare byte subslices to avoid utf8 boundary problem
+    let output = output.as_bytes();
+    let next_off = (output.len() + 1) - new_bytes;
+    stops
+        .iter()
+        .map(|ss| (ss.as_bytes(), ss.len(), next_off.saturating_sub(ss.len())))
+        .enumerate()
+        .find_map(|(ss_idx, (ss, len, start_off))| {
+            output[start_off..]
+                .windows(len)
+                .rposition(|w| w == ss)
+                .map(|pos| (ss_idx, start_off + pos))
+        })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::pin::Pin;
+    use std::sync::{Arc, Mutex};
+    use std::task::{Context, Poll};
+
+    use futures::{Stream, stream};
+    use vllm_engine_core_client::AbortCause;
+    use vllm_llm::GenerateOutput;
+    use vllm_tokenizer::Tokenizer;
+
+    use super::*;
+    use crate::output::TextOutputStreamExt as _;
+
+    /// Backend that treats each token ID as a raw byte, producing lossy UTF-8.
+    struct ByteTokenizer;
+
+    impl Tokenizer for ByteTokenizer {
+        fn encode(
+            &self,
+            _text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<Vec<u32>> {
+            unreachable!()
+        }
+
+        fn decode(
+            &self,
+            token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<String> {
+            let bytes = token_ids.iter().map(|id| *id as u8).collect::<Vec<_>>();
+            Ok(String::from_utf8_lossy(&bytes).into_owned())
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            unreachable!()
+        }
+    }
+
+    /// Helper: run `decoded_text_event_stream` to completion and return the
+    /// collected output.
+    async fn run_to_completion(
+        token_ids: Vec<u32>,
+        decode_options: TextDecodeOptions,
+    ) -> crate::output::CollectedTextOutput {
+        let prompt: Arc<[u32]> = Arc::from([]);
+        let raw_stream = stream::iter(vec![Ok(GenerateOutput::for_test(
+            Some(prompt),
+            token_ids,
+            Some(FinishReason::Length),
+        ))]);
+        let tokenizer: DynTokenizer = Arc::new(ByteTokenizer);
+        decoded_text_event_stream("test".into(), tokenizer, raw_stream, decode_options, false)
+            .collect_output()
+            .await
+            .unwrap()
+    }
+
+    /// Convert ASCII string to token IDs (one byte per token).
+    fn ascii_tokens(s: &str) -> Vec<u32> {
+        s.bytes().map(u32::from).collect()
+    }
+
+    fn opts(stop: &[&str], min_tokens: u32) -> TextDecodeOptions {
+        TextDecodeOptions {
+            stop_strings: Some(stop.iter().map(|s| s.to_string()).collect()),
+            min_tokens,
+            ..Default::default()
+        }
+    }
+
+    struct DropRecordingStream {
+        next: Option<vllm_llm::Result<GenerateOutput>>,
+        dropped_cause: Arc<Mutex<Option<AbortCause>>>,
+    }
+
+    impl Stream for DropRecordingStream {
+        type Item = vllm_llm::Result<GenerateOutput>;
+
+        fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+            Poll::Ready(self.next.take())
+        }
+    }
+
+    impl Drop for DropRecordingStream {
+        fn drop(&mut self) {
+            *self.dropped_cause.lock().unwrap() = Some(AbortCause::current());
+        }
+    }
+
+    // --- stop string stream tests ---
+
+    #[tokio::test]
+    async fn stream_stop_string_sets_task_local_abort_cause_on_raw_stream_drop() {
+        let prompt: Arc<[u32]> = Arc::from([]);
+        let dropped_cause = Arc::new(Mutex::new(None));
+        let raw_stream = DropRecordingStream {
+            next: Some(Ok(GenerateOutput::for_test(
+                Some(prompt),
+                ascii_tokens("hello"),
+                Some(FinishReason::Length),
+            ))),
+            dropped_cause: Arc::clone(&dropped_cause),
+        };
+        let tokenizer: DynTokenizer = Arc::new(ByteTokenizer);
+
+        let output = decoded_text_event_stream(
+            "test".into(),
+            tokenizer,
+            raw_stream,
+            opts(&["ll"], 0),
+            false,
+        )
+        .collect_output()
+        .await
+        .unwrap();
+
+        assert_eq!(output.text, "he");
+        assert!(output.finish_reason.is_stop());
+        assert_eq!(
+            *dropped_cause.lock().unwrap(),
+            Some(AbortCause::StopStringMatched)
+        );
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_truncates_at_match() {
+        let output = run_to_completion(ascii_tokens("hello"), opts(&["e"], 0)).await;
+        assert_eq!(output.text, "h");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_at_end() {
+        let output = run_to_completion(ascii_tokens("abcxyz"), opts(&["xyz"], 0)).await;
+        assert_eq!(output.text, "abc");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_first_token() {
+        let output = run_to_completion(ascii_tokens("xhello"), opts(&["x"], 0)).await;
+        assert_eq!(output.text, "");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_no_match_runs_to_completion() {
+        let output = run_to_completion(ascii_tokens("hello"), opts(&["z"], 0)).await;
+        assert_eq!(output.text, "hello");
+        assert_eq!(output.finish_reason, FinishReason::Length);
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_multi_char() {
+        let output = run_to_completion(ascii_tokens("say hello world"), opts(&["lo"], 0)).await;
+        assert_eq!(output.text, "say hel");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_first_of_multiple_wins() {
+        // Both "ll" and "lo" are present; "ll" appears first in the output.
+        let output = run_to_completion(ascii_tokens("hello"), opts(&["ll", "lo"], 0)).await;
+        assert_eq!(output.text, "he");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn stream_stop_string_include_in_output() {
+        let output = run_to_completion(
+            ascii_tokens("hello"),
+            TextDecodeOptions {
+                stop_strings: Some(vec!["ll".to_string()]),
+                include_stop_str_in_output: true,
+                ..Default::default()
+            },
+        )
+        .await;
+        assert_eq!(output.text, "hell");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    // --- min_tokens + stop string interaction ---
+
+    #[tokio::test]
+    async fn min_tokens_suppresses_early_stop_string() {
+        // stop="e", min_tokens=3: the 'e' at token 2 is within the first 3 tokens,
+        // so it should be skipped. No later 'e' exists, so output runs to completion.
+        let output = run_to_completion(ascii_tokens("hello"), opts(&["e"], 3)).await;
+        assert_eq!(output.text, "hello");
+        assert_eq!(output.finish_reason, FinishReason::Length);
+    }
+
+    #[tokio::test]
+    async fn min_tokens_allows_stop_string_after_threshold() {
+        // stop="e", min_tokens=2: the first 'e' at token 3 is past the threshold.
+        let output = run_to_completion(ascii_tokens("greet"), opts(&["e"], 2)).await;
+        assert_eq!(output.text, "gr");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[tokio::test]
+    async fn min_tokens_zero_behaves_like_absent() {
+        let output = run_to_completion(ascii_tokens("hello"), opts(&["e"], 0)).await;
+        assert_eq!(output.text, "h");
+        assert!(output.finish_reason.is_stop());
+    }
+
+    #[test]
+    fn stop_string_matches_at_end() {
+        let stops = vec!["wor".to_string()];
+        // Output: "say wor", last byte 'r' was just added (new_bytes=1)
+        let result = matches_stop_string(&stops, "say wor", 1);
+        assert_eq!(result, Some((0, 4)));
+    }
+
+    #[test]
+    fn stop_string_no_match() {
+        let stops = vec!["xyz".to_string()];
+        let result = matches_stop_string(&stops, "say wor", 1);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn stop_string_matches_first_of_multiple() {
+        let stops = vec!["wor".to_string(), "say".to_string()];
+        // "say" appears earlier but "wor" is checked first (index 0)
+        let result = matches_stop_string(&stops, "say wor", 1);
+        assert_eq!(result, Some((0, 4)));
+    }
+
+    #[test]
+    fn stop_string_matches_second_of_multiple() {
+        let stops = vec!["xyz".to_string(), "wor".to_string()];
+        let result = matches_stop_string(&stops, "say wor", 1);
+        assert_eq!(result, Some((1, 4)));
+    }
+
+    #[test]
+    fn stop_string_matches_with_multiple_new_bytes() {
+        let stops = vec!["wor".to_string()];
+        // "say wor" where last 3 bytes "wor" were added at once
+        let result = matches_stop_string(&stops, "say wor", 3);
+        assert_eq!(result, Some((0, 4)));
+    }
+
+    #[test]
+    fn stop_string_matches_at_beginning() {
+        let stops = vec!["say".to_string()];
+        let result = matches_stop_string(&stops, "say wor", 7);
+        assert_eq!(result, Some((0, 0)));
+    }
+
+    #[test]
+    fn stop_string_exact_output() {
+        let stops = vec!["abc".to_string()];
+        let result = matches_stop_string(&stops, "abc", 3);
+        assert_eq!(result, Some((0, 0)));
+    }
+
+    #[test]
+    fn stop_string_single_char() {
+        let stops = vec!["!".to_string()];
+        let result = matches_stop_string(&stops, "hello!", 1);
+        assert_eq!(result, Some((0, 5)));
+    }
+
+    #[test]
+    fn stop_string_not_in_new_bytes_region() {
+        let stops = vec!["say".to_string()];
+        // "say" is in the output but before the new byte region.
+        // new_bytes=1 means only 'r' was added; "say" ended at byte 3,
+        // but the search window starts at next_off - stop_len = 7+1-1 - 3 = 4.
+        let result = matches_stop_string(&stops, "say wor", 1);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn stop_string_empty_list() {
+        let stops: Vec<String> = vec![];
+        let result = matches_stop_string(&stops, "hello", 1);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn stop_string_multibyte_utf8() {
+        let stops = vec!["世界".to_string()];
+        // "你好世界" is 12 bytes: 你(3) + 好(3) + 世(3) + 界(3)
+        // "世界" starts at byte 6
+        let result = matches_stop_string(&stops, "你好世界", 3);
+        assert_eq!(result, Some((0, 6)));
+    }
+}
diff --git a/rust/src/text/src/output/logprobs.rs b/rust/src/text/src/output/logprobs.rs
new file mode 100644
index 000000000000..7024c52b779d
--- /dev/null
+++ b/rust/src/text/src/output/logprobs.rs
@@ -0,0 +1,236 @@
+use itertools::Itertools as _;
+use serde::{Deserialize, Serialize};
+use vllm_llm::{Logprobs, PositionLogprobs};
+use vllm_tokenizer::Tokenizer;
+
+use crate::error::Error;
+
+/// One decoded token candidate and its logprob metadata.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct DecodedTokenLogprob {
+    /// Original vocabulary token ID for this candidate.
+    pub token_id: u32,
+    /// Best-effort decoded token string for this candidate.
+    pub token: String,
+    /// Log probability of this token candidate.
+    pub logprob: f32,
+    /// Vocabulary rank of this token candidate.
+    pub rank: u32,
+}
+
+/// One position's decoded token candidates and their logprobs.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct DecodedPositionLogprobs {
+    /// Candidate tokens for this position.
+    pub entries: Vec<DecodedTokenLogprob>,
+}
+
+/// Decoded sample logprobs for generated token positions.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct DecodedLogprobs {
+    /// Generated token positions covered by this payload.
+    pub positions: Vec<DecodedPositionLogprobs>,
+}
+
+/// Decoded prompt logprobs for prompt token positions.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct DecodedPromptLogprobs {
+    /// Original vocabulary token ID for the first prompt token.
+    pub first_token_id: u32,
+    /// Best-effort decoded string for the first prompt token.
+    ///
+    /// The first prompt token has no left context to score against, so it is
+    /// stored separately instead of appearing in `scored_positions`.
+    pub first_token: String,
+    /// Scored prompt positions after the first prompt token.
+    ///
+    /// `scored_positions[i]` corresponds to the prompt token at position `i +
+    /// 1`.
+    pub scored_positions: Vec<DecodedPositionLogprobs>,
+}
+
+/// Decode generated-token logprobs from the raw `llm` token-ID shape into the
+/// text-layer decoded-token representation.
+///
+/// Each returned position corresponds to one generated token position from the
+/// same `llm` update.
+pub(super) fn decode_logprobs<T: Tokenizer + ?Sized>(
+    tokenizer: &T,
+    logprobs: &Logprobs,
+    skip_special_tokens: bool,
+) -> Result<DecodedLogprobs, Error> {
+    Ok(DecodedLogprobs {
+        positions: logprobs
+            .positions
+            .iter()
+            .map(|position| decode_position_logprobs(tokenizer, position, skip_special_tokens))
+            .try_collect()?,
+    })
+}
+
+/// Decode prompt logprobs from the raw `llm` token-ID shape into the text-layer
+/// decoded-token representation.
+///
+/// The returned payload stores the first prompt token separately and decodes
+/// the remaining scored prompt positions into `scored_positions`, matching
+/// vLLM's prompt-logprobs semantics.
+pub(super) fn decode_prompt_logprobs<T: Tokenizer + ?Sized>(
+    tokenizer: &T,
+    prompt_token_ids: &[u32],
+    logprobs: &Logprobs,
+    skip_special_tokens: bool,
+) -> Result<DecodedPromptLogprobs, Error> {
+    let first_token_id = prompt_token_ids
+        .first()
+        .copied()
+        .expect("prompt logprobs require at least one prompt token");
+    let first_token = tokenizer.decode(&[first_token_id], skip_special_tokens)?;
+    let scored_positions = logprobs
+        .positions
+        .iter()
+        .map(|position| decode_position_logprobs(tokenizer, position, skip_special_tokens))
+        .try_collect()?;
+
+    Ok(DecodedPromptLogprobs {
+        first_token_id,
+        first_token,
+        scored_positions,
+    })
+}
+
+/// Decode one token position's raw candidate set into decoded token strings
+/// plus logprob metadata.
+///
+/// This decodes every candidate token ID independently through the active text
+/// backend.
+fn decode_position_logprobs<T: Tokenizer + ?Sized>(
+    tokenizer: &T,
+    position: &PositionLogprobs,
+    skip_special_tokens: bool,
+) -> Result<DecodedPositionLogprobs, Error> {
+    Ok(DecodedPositionLogprobs {
+        entries: position
+            .entries
+            .iter()
+            .map(|entry| {
+                tokenizer.decode(&[entry.token_id], skip_special_tokens).map(|token| {
+                    DecodedTokenLogprob {
+                        token_id: entry.token_id,
+                        token,
+                        logprob: entry.logprob,
+                        rank: entry.rank,
+                    }
+                })
+            })
+            .try_collect()?,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use vllm_llm::{Logprobs, PositionLogprobs, TokenLogprob};
+
+    use super::*;
+
+    #[derive(Debug)]
+    struct ByteTokenizer;
+
+    impl vllm_tokenizer::Tokenizer for ByteTokenizer {
+        fn encode(
+            &self,
+            _text: &str,
+            _add_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<Vec<u32>> {
+            unreachable!()
+        }
+
+        fn decode(
+            &self,
+            token_ids: &[u32],
+            _skip_special_tokens: bool,
+        ) -> vllm_tokenizer::Result<String> {
+            Ok(String::from_utf8_lossy(
+                &token_ids.iter().map(|token_id| *token_id as u8).collect::<Vec<_>>(),
+            )
+            .into_owned())
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            unreachable!()
+        }
+    }
+
+    #[test]
+    fn decode_logprobs_decodes_every_candidate_token() {
+        let tokenizer = ByteTokenizer;
+        let logprobs = Logprobs {
+            positions: vec![PositionLogprobs {
+                entries: vec![
+                    TokenLogprob {
+                        token_id: b'a' as u32,
+                        logprob: -0.1,
+                        rank: 3,
+                    },
+                    TokenLogprob {
+                        token_id: b'b' as u32,
+                        logprob: -0.2,
+                        rank: 1,
+                    },
+                ],
+            }],
+        };
+
+        assert_eq!(
+            decode_logprobs(&tokenizer, &logprobs, false).unwrap(),
+            DecodedLogprobs {
+                positions: vec![DecodedPositionLogprobs {
+                    entries: vec![
+                        DecodedTokenLogprob {
+                            token_id: b'a' as u32,
+                            token: "a".to_string(),
+                            logprob: -0.1,
+                            rank: 3,
+                        },
+                        DecodedTokenLogprob {
+                            token_id: b'b' as u32,
+                            token: "b".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        },
+                    ],
+                }],
+            }
+        );
+    }
+
+    #[test]
+    fn decode_prompt_logprobs_separates_first_prompt_token() {
+        let tokenizer = ByteTokenizer;
+        let logprobs = Logprobs {
+            positions: vec![PositionLogprobs {
+                entries: vec![TokenLogprob {
+                    token_id: b'x' as u32,
+                    logprob: -0.4,
+                    rank: 1,
+                }],
+            }],
+        };
+
+        assert_eq!(
+            decode_prompt_logprobs(&tokenizer, &[b'p' as u32, b'x' as u32], &logprobs, false)
+                .unwrap(),
+            DecodedPromptLogprobs {
+                first_token_id: b'p' as u32,
+                first_token: "p".to_string(),
+                scored_positions: vec![DecodedPositionLogprobs {
+                    entries: vec![DecodedTokenLogprob {
+                        token_id: b'x' as u32,
+                        token: "x".to_string(),
+                        logprob: -0.4,
+                        rank: 1,
+                    }],
+                }],
+            }
+        );
+    }
+}
diff --git a/rust/src/text/src/output/mod.rs b/rust/src/text/src/output/mod.rs
new file mode 100644
index 000000000000..064b820d57f1
--- /dev/null
+++ b/rust/src/text/src/output/mod.rs
@@ -0,0 +1,323 @@
+//! Output processing helpers shared by text and chat layers.
+
+pub use decoded::{DecodedTextEvent, Finished, TextDecodeOptions, decoded_text_event_stream};
+pub use logprobs::{
+    DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs, DecodedTokenLogprob,
+};
+
+mod decoded;
+mod logprobs;
+
+use std::sync::Arc;
+
+use futures::{StreamExt as _, pin_mut};
+
+use crate::{Error, FinishReason, Result, TextOutputStream};
+
+/// Final decoded text plus terminal stream metadata.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CollectedTextOutput {
+    pub text: String,
+    pub prompt_token_ids: Arc<[u32]>,
+    pub prompt_logprobs: Option<DecodedPromptLogprobs>,
+    pub logprobs: Option<DecodedLogprobs>,
+    pub token_ids: Vec<u32>,
+    pub finish_reason: FinishReason,
+    /// Connector-specific KV transfer parameters for disaggregated serving.
+    pub kv_transfer_params: Option<serde_json::Value>,
+}
+
+#[allow(clippy::manual_async_fn, reason = "specify `Send` bound")]
+#[easy_ext::ext(TextOutputStreamExt)]
+impl<T: TextOutputStream> T {
+    /// Collect the stream to completion and return the final decoded text plus
+    /// terminal metadata.
+    pub fn collect_output(self) -> impl Future<Output = Result<CollectedTextOutput>> + Send {
+        async move {
+            let stream = self;
+            pin_mut!(stream);
+            let mut prompt_logprobs = None;
+            let mut prompt_token_ids: Arc<[u32]> = Arc::from([]);
+            let mut collected: Option<CollectedTextOutput> = None;
+
+            while let Some(event) = stream.next().await.transpose()? {
+                match event {
+                    DecodedTextEvent::Start {
+                        prompt_logprobs: start_prompt_logprobs,
+                        prompt_token_ids: start_prompt_token_ids,
+                        ..
+                    } => {
+                        prompt_logprobs = start_prompt_logprobs;
+                        prompt_token_ids = start_prompt_token_ids;
+                    }
+                    DecodedTextEvent::TextDelta {
+                        delta,
+                        token_ids: delta_token_ids,
+                        logprobs: mut delta_logprobs,
+                        finished,
+                    } => {
+                        if let Some(c) = collected.as_mut() {
+                            c.text.push_str(&delta);
+                            c.token_ids.extend(delta_token_ids);
+                            if let Some(dlp) = delta_logprobs.as_mut() {
+                                if let Some(lp) = c.logprobs.as_mut() {
+                                    lp.positions.extend_from_slice(&dlp.positions);
+                                } else {
+                                    c.logprobs = delta_logprobs;
+                                }
+                            }
+                        } else {
+                            collected = Some(CollectedTextOutput {
+                                text: delta,
+                                prompt_token_ids: Arc::clone(&prompt_token_ids),
+                                prompt_logprobs: prompt_logprobs.take(),
+                                logprobs: delta_logprobs,
+                                token_ids: delta_token_ids,
+                                finish_reason: FinishReason::Error,
+                                kv_transfer_params: None,
+                            })
+                        };
+
+                        if let Some(finished) = finished {
+                            let mut collected = collected.unwrap();
+                            collected.finish_reason = finished.finish_reason;
+                            collected.kv_transfer_params = finished.kv_transfer_params;
+                            return Ok(collected);
+                        }
+                    }
+                }
+            }
+
+            // Note: this is actually unreachable, as the underlying stream always emit an
+            // error on unexpected close.
+            Err(Error::StreamClosedBeforeTerminalOutput {
+                request_id: "unknown".to_string(),
+            })
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use futures::stream;
+    use vllm_llm::FinishReason;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn collect_output_retains_prompt_and_sample_logprobs() {
+        let stream = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![10, 11].into(),
+                prompt_logprobs: Some(DecodedPromptLogprobs {
+                    first_token_id: 0,
+                    first_token: "o".to_string(),
+                    scored_positions: vec![DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "p".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    }],
+                }),
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "bc".to_string(),
+                token_ids: vec![1, 2],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "a".to_string(),
+                                logprob: -0.2,
+                                rank: 1,
+                            }],
+                        },
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "bc".to_string(),
+                                logprob: -0.3,
+                                rank: 1,
+                            }],
+                        },
+                    ],
+                }),
+                finished: Some(Finished {
+                    prompt_token_count: 2,
+                    output_token_count: 2,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                }),
+            }),
+        ]);
+
+        let collected = stream.collect_output().await.unwrap();
+        assert_eq!(collected.text, "bc");
+        assert_eq!(
+            collected.prompt_logprobs,
+            Some(DecodedPromptLogprobs {
+                first_token_id: 0,
+                first_token: "o".to_string(),
+                scored_positions: vec![DecodedPositionLogprobs {
+                    entries: vec![DecodedTokenLogprob {
+                        token_id: 0,
+                        token: "p".to_string(),
+                        logprob: -0.1,
+                        rank: 1,
+                    }],
+                }],
+            })
+        );
+        assert_eq!(
+            collected.logprobs,
+            Some(DecodedLogprobs {
+                positions: vec![
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "a".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        }],
+                    },
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "bc".to_string(),
+                            logprob: -0.3,
+                            rank: 1,
+                        }],
+                    },
+                ],
+            })
+        );
+    }
+
+    #[tokio::test]
+    async fn collect_output_accumulates_intermediate_deltas() {
+        let stream = stream::iter(vec![
+            Ok(DecodedTextEvent::Start {
+                prompt_token_ids: vec![10, 11].into(),
+                prompt_logprobs: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "he".to_string(),
+                token_ids: vec![1, 2],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "h".to_string(),
+                                logprob: -0.1,
+                                rank: 1,
+                            }],
+                        },
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "e".to_string(),
+                                logprob: -0.2,
+                                rank: 1,
+                            }],
+                        },
+                    ],
+                }),
+                finished: None,
+            }),
+            Ok(DecodedTextEvent::TextDelta {
+                delta: "llo".to_string(),
+                token_ids: vec![3, 4, 5],
+                logprobs: Some(DecodedLogprobs {
+                    positions: vec![
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "l".to_string(),
+                                logprob: -0.3,
+                                rank: 1,
+                            }],
+                        },
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "l".to_string(),
+                                logprob: -0.4,
+                                rank: 1,
+                            }],
+                        },
+                        DecodedPositionLogprobs {
+                            entries: vec![DecodedTokenLogprob {
+                                token_id: 0,
+                                token: "o".to_string(),
+                                logprob: -0.5,
+                                rank: 1,
+                            }],
+                        },
+                    ],
+                }),
+                finished: Some(Finished {
+                    prompt_token_count: 2,
+                    output_token_count: 5,
+                    finish_reason: FinishReason::stop_eos(),
+                    kv_transfer_params: None,
+                }),
+            }),
+        ]);
+
+        let collected = stream.collect_output().await.unwrap();
+        assert_eq!(collected.text, "hello");
+        assert_eq!(collected.prompt_logprobs, None);
+        assert_eq!(collected.token_ids, vec![1, 2, 3, 4, 5]);
+        assert_eq!(
+            collected.logprobs,
+            Some(DecodedLogprobs {
+                positions: vec![
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "h".to_string(),
+                            logprob: -0.1,
+                            rank: 1,
+                        }],
+                    },
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "e".to_string(),
+                            logprob: -0.2,
+                            rank: 1,
+                        }],
+                    },
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "l".to_string(),
+                            logprob: -0.3,
+                            rank: 1,
+                        }],
+                    },
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "l".to_string(),
+                            logprob: -0.4,
+                            rank: 1,
+                        }],
+                    },
+                    DecodedPositionLogprobs {
+                        entries: vec![DecodedTokenLogprob {
+                            token_id: 0,
+                            token: "o".to_string(),
+                            logprob: -0.5,
+                            rank: 1,
+                        }],
+                    },
+                ],
+            })
+        );
+    }
+}
diff --git a/rust/src/text/src/request.rs b/rust/src/text/src/request.rs
new file mode 100644
index 000000000000..9e2464f14af5
--- /dev/null
+++ b/rust/src/text/src/request.rs
@@ -0,0 +1,197 @@
+use std::collections::HashMap;
+
+use enum_as_inner::EnumAsInner;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use vllm_engine_core_client::protocol::StructuredOutputsParams;
+use vllm_engine_core_client::protocol::multimodal::MmFeatures;
+
+use crate::error::{Error, Result};
+use crate::output::TextDecodeOptions;
+
+/// One raw text-generation prompt.
+///
+/// This supports either ordinary text that still needs tokenization or
+/// already-tokenized prompt IDs that should bypass tokenizer work entirely.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, EnumAsInner)]
+#[serde(untagged)]
+pub enum Prompt {
+    /// Untokenized prompt text that still needs tokenizer work before
+    /// generation.
+    Text(String),
+    /// Pre-tokenized prompt IDs that should be forwarded southbound without
+    /// re-encoding.
+    TokenIds(Vec<u32>),
+}
+
+impl Default for Prompt {
+    fn default() -> Self {
+        Self::Text(String::new()) // placeholder
+    }
+}
+
+/// User-facing sampling parameters accepted by `vllm-text`.
+///
+/// This intentionally keeps only the subset that the current Rust text layer
+/// supports as northbound request semantics. Engine-core-specific normalized
+/// fields are derived later during lowering.
+///
+/// Original Python definition:
+/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L155-L291>
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(default)]
+pub struct SamplingParams {
+    /// Controls randomness. Lower values are more deterministic; zero means
+    /// greedy sampling. `None` means no explicit user override.
+    pub temperature: Option<f32>,
+    /// Cumulative probability threshold for nucleus sampling.
+    pub top_p: Option<f32>,
+    /// Maximum number of top tokens to consider. `Some(0)` means all tokens.
+    pub top_k: Option<u32>,
+    /// Random seed used by the sampler when present.
+    pub seed: Option<i64>,
+    /// Maximum number of tokens to generate. `None` means no explicit user
+    /// override.
+    pub max_tokens: Option<u32>,
+    /// Minimum number of tokens to generate before EOS or stop-token handling.
+    pub min_tokens: Option<u32>,
+    /// Number of log probabilities to return per generated token.
+    ///
+    /// `None` disables sample logprobs. `-1` requests the full vocabulary.
+    pub logprobs: Option<i32>,
+    /// Number of log probabilities to return per prompt token.
+    ///
+    /// `None` disables prompt logprobs. `-1` requests the full vocabulary.
+    pub prompt_logprobs: Option<i32>,
+    /// Minimum probability threshold for token sampling. `None` means no
+    /// explicit user override.
+    pub min_p: Option<f32>,
+    /// Frequency penalty applied by the sampler. `None` means no explicit user
+    /// override.
+    pub frequency_penalty: Option<f32>,
+    /// Presence penalty applied by the sampler. `None` means no explicit user
+    /// override.
+    pub presence_penalty: Option<f32>,
+    /// Repetition penalty applied by the sampler. `None` means no explicit user
+    /// override.
+    pub repetition_penalty: Option<f32>,
+    /// Explicit stop token IDs provided by the caller. `None` means no explicit
+    /// user override.
+    pub stop_token_ids: Option<Vec<u32>>,
+    /// If true, do not stop on the model's primary EOS token.
+    pub ignore_eos: bool,
+    /// Modify the likelihood of specified tokens appearing in the completion.
+    /// Keys are token IDs.
+    pub logit_bias: Option<HashMap<u32, f32>>,
+    /// Restrict output to these token IDs only.
+    pub allowed_token_ids: Option<Vec<u32>>,
+    /// Words to avoid during generation (tokenized to IDs during lowering).
+    pub bad_words: Option<Vec<String>>,
+    /// Specific token IDs for which log probabilities should be returned at
+    /// each position.
+    ///
+    /// When set, the engine returns logprobs for exactly these tokens in
+    /// addition to the sampled/scored token. Mutually exclusive with
+    /// `logprobs` in practice.
+    pub logprob_token_ids: Option<Vec<u32>>,
+    /// Parameters for configuring structured outputs (guided decoding).
+    pub structured_outputs: Option<StructuredOutputsParams>,
+    /// If true, bypass reads from the prefix cache for this request (the prompt
+    /// will not reuse cached KV blocks from earlier requests, though newly
+    /// computed blocks may still populate the cache). `None` defers to
+    /// engine-core defaults.
+    pub skip_reading_prefix_cache: Option<bool>,
+    /// Additional request parameters for custom extensions.
+    pub vllm_xargs: Option<HashMap<String, Value>>,
+}
+
+#[allow(clippy::derivable_impls)] // more explicit
+impl Default for SamplingParams {
+    fn default() -> Self {
+        Self {
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            seed: None,
+            max_tokens: None,
+            min_tokens: None,
+            logprobs: None,
+            prompt_logprobs: None,
+            min_p: None,
+            frequency_penalty: None,
+            presence_penalty: None,
+            repetition_penalty: None,
+            stop_token_ids: None,
+            ignore_eos: false,
+            logit_bias: None,
+            allowed_token_ids: None,
+            bad_words: None,
+            logprob_token_ids: None,
+            structured_outputs: None,
+            skip_reading_prefix_cache: None,
+            vllm_xargs: None,
+        }
+    }
+}
+
+/// One raw text-generation request ready to be tokenized or sent directly to
+/// the engine.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TextRequest {
+    /// Stable caller-supplied request ID.
+    pub request_id: String,
+    /// Prompt text or prompt token IDs for this request.
+    pub prompt: Prompt,
+    /// Multimodal features prepared by a higher-level frontend. Raw text
+    /// requests keep this empty; multimodal chat uses it with pre-tokenized
+    /// prompt IDs.
+    pub mm_features: Option<MmFeatures>,
+    /// User-facing sampling parameters accepted by `vllm-text`.
+    pub sampling_params: SamplingParams,
+    /// Incremental detokenization options for the response path.
+    pub decode_options: TextDecodeOptions,
+    /// Whether to emit intermediate northbound deltas before the terminal
+    /// result.
+    ///
+    /// If `false`, callers only observe the terminal accumulated output. If
+    /// `true`, callers may receive zero or more incremental decoded updates
+    /// before the final terminal event.
+    pub intermediate: bool,
+    /// Request scheduling priority (lower means earlier handling; default 0).
+    pub priority: i32,
+    /// Salt for prefix cache isolation in multi-user environments.
+    pub cache_salt: Option<String>,
+    /// Whether to add special tokens (e.g. BOS) during prompt tokenization.
+    pub add_special_tokens: bool,
+    /// Override data parallel rank.
+    #[serde(default)]
+    pub data_parallel_rank: Option<u32>,
+}
+
+impl TextRequest {
+    /// Return one minimal valid request fixture for tests.
+    pub fn for_test() -> Self {
+        Self {
+            request_id: "test-request".to_string(),
+            prompt: Prompt::Text("test".to_string()),
+            mm_features: None,
+            sampling_params: SamplingParams::default(),
+            decode_options: TextDecodeOptions::default(),
+            intermediate: true,
+            priority: 0,
+            cache_salt: None,
+            add_special_tokens: false,
+            data_parallel_rank: None,
+        }
+    }
+
+    /// Validate the minimum invariants before tokenization or request lowering.
+    pub fn validate(&self) -> Result<()> {
+        if matches!(&self.prompt, Prompt::TokenIds(ids) if ids.is_empty()) {
+            return Err(Error::EmptyPromptTokenIds {
+                request_id: self.request_id.clone(),
+            });
+        }
+        Ok(())
+    }
+}
diff --git a/rust/src/tokenizer/Cargo.toml b/rust/src/tokenizer/Cargo.toml
new file mode 100644
index 000000000000..786c46f40316
--- /dev/null
+++ b/rust/src/tokenizer/Cargo.toml
@@ -0,0 +1,35 @@
+[package]
+name = "vllm-tokenizer"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+base64.workspace = true
+fastokens.workspace = true
+riptoken.workspace = true
+rustc-hash.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+tekken.workspace = true
+thiserror.workspace = true
+thiserror-ext.workspace = true
+tiktoken-rs.workspace = true
+tokenizers.workspace = true
+tracing.workspace = true
+
+[dev-dependencies]
+criterion.workspace = true
+hf-hub.workspace = true
+tempfile.workspace = true
+
+[[bench]]
+name = "hf"
+harness = false
+
+[[bench]]
+name = "tiktoken"
+harness = false
+
+[lints]
+workspace = true
diff --git a/rust/src/tokenizer/benches/hf.rs b/rust/src/tokenizer/benches/hf.rs
new file mode 100644
index 000000000000..9bf37778089c
--- /dev/null
+++ b/rust/src/tokenizer/benches/hf.rs
@@ -0,0 +1,118 @@
+use criterion::{Criterion, Throughput, black_box, criterion_group, criterion_main};
+use hf_hub::api::sync::ApiBuilder;
+use vllm_tokenizer::{HuggingFaceTokenizer, Tokenizer};
+
+const MODEL_ID: &str = "Qwen/Qwen3.5-0.8B";
+const SAMPLE_TEXT: &str = "\
+<|im_start|>system
+You are Qwen3.5, a helpful assistant.
+<|im_end|>
+<|im_start|>user
+请用中英混合总结以下需求，并给出一个简短的 JSON 示例。
+The service should stop cleanly at EOS, avoid leaking the next template turn, and keep decode latency low.
+Input: 4 concurrent requests, 10240 prompt tokens, 16 generated tokens.
+<|im_end|>
+<|im_start|>assistant
+";
+
+struct BenchFixture {
+    fastokens: HuggingFaceTokenizer,
+    hf: HuggingFaceTokenizer,
+    text: String,
+    token_ids: Vec<u32>,
+}
+
+impl BenchFixture {
+    fn load() -> Self {
+        let path = tokenizer_json();
+        let fastokens =
+            HuggingFaceTokenizer::new_fastokens(&path).expect("load fastokens tokenizer");
+        let hf = HuggingFaceTokenizer::new_hf(&path).expect("load huggingface tokenizer");
+
+        let text = SAMPLE_TEXT.repeat(32);
+        let hf_token_ids =
+            hf.encode(text.as_str(), false).expect("encode sample text with hf tokenizer");
+        let fastokens_token_ids = fastokens
+            .encode(text.as_str(), false)
+            .expect("encode sample text with fastokens");
+        assert_eq!(fastokens_token_ids, hf_token_ids);
+
+        let hf_decoded = hf
+            .decode(hf_token_ids.as_slice(), false)
+            .expect("decode sample token ids with hf tokenizer");
+        let fastokens_decoded = fastokens
+            .decode(hf_token_ids.as_slice(), false)
+            .expect("decode sample token ids with fastokens");
+        assert_eq!(fastokens_decoded, hf_decoded);
+
+        Self {
+            fastokens,
+            hf,
+            text,
+            token_ids: hf_token_ids,
+        }
+    }
+}
+
+fn tokenizer_json() -> std::path::PathBuf {
+    ApiBuilder::from_env()
+        .with_progress(false)
+        .build()
+        .expect("build hf-hub api")
+        .model(MODEL_ID.to_string())
+        .get("tokenizer.json")
+        .expect("fetch tokenizer.json from hf-hub")
+}
+
+fn bench_encode(c: &mut Criterion) {
+    let fixture = BenchFixture::load();
+    let mut group = c.benchmark_group("tokenizer_encode");
+    group.throughput(Throughput::Bytes(fixture.text.len() as u64));
+
+    group.bench_function("fastokens", |b| {
+        b.iter(|| {
+            fixture
+                .fastokens
+                .encode(black_box(fixture.text.as_str()), black_box(false))
+                .expect("encode sample text with fastokens")
+        })
+    });
+    group.bench_function("hf_tokenizers", |b| {
+        b.iter(|| {
+            fixture
+                .hf
+                .encode(black_box(fixture.text.as_str()), black_box(false))
+                .expect("encode sample text with hf tokenizer")
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_decode(c: &mut Criterion) {
+    let fixture = BenchFixture::load();
+    let mut group = c.benchmark_group("tokenizer_decode");
+    group.throughput(Throughput::Elements(fixture.token_ids.len() as u64));
+
+    group.bench_function("fastokens", |b| {
+        b.iter(|| {
+            fixture
+                .fastokens
+                .decode(black_box(fixture.token_ids.as_slice()), black_box(false))
+                .expect("decode sample token ids with fastokens")
+        })
+    });
+    group.bench_function("hf_tokenizers", |b| {
+        b.iter(|| {
+            fixture
+                .hf
+                .decode(black_box(fixture.token_ids.as_slice()), black_box(false))
+                .expect("decode sample token ids with hf tokenizer")
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_encode, bench_decode);
+criterion_main!(benches);
diff --git a/rust/src/tokenizer/benches/tiktoken.rs b/rust/src/tokenizer/benches/tiktoken.rs
new file mode 100644
index 000000000000..54b9805f01a6
--- /dev/null
+++ b/rust/src/tokenizer/benches/tiktoken.rs
@@ -0,0 +1,117 @@
+use criterion::{Criterion, Throughput, black_box, criterion_group, criterion_main};
+use hf_hub::api::sync::ApiBuilder;
+use vllm_tokenizer::{TiktokenTokenizer, Tokenizer};
+
+const MODEL_ID: &str = "moonshotai/Kimi-K2.5";
+const SAMPLE_TEXT: &str = "\
+<think>
+I'm sure it's fine, but I can't say I'd trust that it's what we'd ship.
+</think>
+请用中英混合总结以下需求，并保留 tool-call marker:
+<|tool_calls_section_begin|>{\"name\":\"summarize\",\"arguments\":{\"style\":\"brief\"}}<|tool_calls_section_end|>
+The service should stop cleanly at EOS, avoid leaking the next template turn, and keep decode latency low.
+";
+
+struct BenchFixture {
+    riptoken: TiktokenTokenizer,
+    tiktoken_rs: TiktokenTokenizer,
+    text: String,
+    token_ids: Vec<u32>,
+}
+
+impl BenchFixture {
+    fn load() -> Self {
+        let path = tiktoken_model();
+        let riptoken = TiktokenTokenizer::new_riptoken(&path).expect("load riptoken tokenizer");
+        let tiktoken_rs =
+            TiktokenTokenizer::new_tiktoken_rs(&path).expect("load tiktoken-rs tokenizer");
+
+        let text = SAMPLE_TEXT.repeat(32);
+        let riptoken_token_ids =
+            riptoken.encode(text.as_str(), false).expect("encode sample text with riptoken");
+        let tiktoken_rs_token_ids = tiktoken_rs
+            .encode(text.as_str(), false)
+            .expect("encode sample text with tiktoken-rs");
+        assert_eq!(riptoken_token_ids, tiktoken_rs_token_ids);
+
+        let riptoken_decoded = riptoken
+            .decode(riptoken_token_ids.as_slice(), false)
+            .expect("decode sample token ids with riptoken");
+        let tiktoken_rs_decoded = tiktoken_rs
+            .decode(riptoken_token_ids.as_slice(), false)
+            .expect("decode sample token ids with tiktoken-rs");
+        assert_eq!(riptoken_decoded, tiktoken_rs_decoded);
+
+        Self {
+            riptoken,
+            tiktoken_rs,
+            text,
+            token_ids: riptoken_token_ids,
+        }
+    }
+}
+
+fn tiktoken_model() -> std::path::PathBuf {
+    let repo = ApiBuilder::from_env()
+        .with_progress(false)
+        .build()
+        .expect("build hf-hub api")
+        .model(MODEL_ID.to_string());
+    repo.get("config.json").expect("fetch config.json from hf-hub");
+    repo.get("tokenizer_config.json")
+        .expect("fetch tokenizer_config.json from hf-hub");
+    repo.get("tiktoken.model").expect("fetch tiktoken.model from hf-hub")
+}
+
+fn bench_encode(c: &mut Criterion) {
+    let fixture = BenchFixture::load();
+    let mut group = c.benchmark_group("tiktoken_encode");
+    group.throughput(Throughput::Bytes(fixture.text.len() as u64));
+
+    group.bench_function("riptoken", |b| {
+        b.iter(|| {
+            fixture
+                .riptoken
+                .encode(black_box(fixture.text.as_str()), black_box(false))
+                .expect("encode sample text with riptoken")
+        })
+    });
+    group.bench_function("tiktoken_rs", |b| {
+        b.iter(|| {
+            fixture
+                .tiktoken_rs
+                .encode(black_box(fixture.text.as_str()), black_box(false))
+                .expect("encode sample text with tiktoken-rs")
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_decode(c: &mut Criterion) {
+    let fixture = BenchFixture::load();
+    let mut group = c.benchmark_group("tiktoken_decode");
+    group.throughput(Throughput::Elements(fixture.token_ids.len() as u64));
+
+    group.bench_function("riptoken", |b| {
+        b.iter(|| {
+            fixture
+                .riptoken
+                .decode(black_box(fixture.token_ids.as_slice()), black_box(false))
+                .expect("decode sample token ids with riptoken")
+        })
+    });
+    group.bench_function("tiktoken_rs", |b| {
+        b.iter(|| {
+            fixture
+                .tiktoken_rs
+                .decode(black_box(fixture.token_ids.as_slice()), black_box(false))
+                .expect("decode sample token ids with tiktoken-rs")
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_encode, bench_decode);
+criterion_main!(benches);
diff --git a/rust/src/tokenizer/src/byte_level_decode.rs b/rust/src/tokenizer/src/byte_level_decode.rs
new file mode 100644
index 000000000000..8208e8e8d48b
--- /dev/null
+++ b/rust/src/tokenizer/src/byte_level_decode.rs
@@ -0,0 +1,119 @@
+//! Fast GPT-2 byte-level detokenization that writes into a single `Vec<u8>`,
+//! avoiding the `Vec<String>` / `String::join` assembly in fastokens' generic
+//! `Decoder::decode` pipeline.
+
+/// Reverse GPT-2 byte-to-unicode mapping: codepoint → original byte. The GPT-2
+/// table only emits codepoints in U+0000..U+0143, so a flat array suffices.
+const CHAR_TO_BYTE: [u8; 324] = build_char_to_byte();
+
+const fn is_nice(b: u8) -> bool {
+    (b >= b'!' && b <= b'~') || (b >= 0xA1 && b <= 0xAC) || b >= 0xAE
+}
+
+const fn build_char_to_byte() -> [u8; 324] {
+    let mut table = [0u8; 324];
+    let mut b: u16 = 0;
+    while b < 256 {
+        let cp = if is_nice(b as u8) {
+            b as u32
+        } else {
+            256 + nice_offset(b as u8)
+        };
+        table[cp as usize] = b as u8;
+        b += 1;
+    }
+    table
+}
+
+const fn nice_offset(b: u8) -> u32 {
+    let mut i: u16 = 0;
+    let mut n: u32 = 0;
+    while i < b as u16 {
+        if !is_nice(i as u8) {
+            n += 1;
+        }
+        i += 1;
+    }
+    n
+}
+
+/// Decode byte-level encoded token strings into a single UTF-8 string,
+/// matching `fastokens::decoders::ByteLevelDecoder`.
+pub fn decode_byte_level<'a, I: IntoIterator<Item = &'a str>>(tokens: I) -> String {
+    let iter = tokens.into_iter();
+    let (lower, _) = iter.size_hint();
+    let mut bytes: Vec<u8> = Vec::with_capacity(lower.saturating_mul(4));
+    for token in iter {
+        for c in token.chars() {
+            let cp = c as usize;
+            if cp < CHAR_TO_BYTE.len() {
+                bytes.push(CHAR_TO_BYTE[cp]);
+            } else {
+                // Non-GPT2 codepoints (e.g. DeepSeek's U+FF5C, U+2581) pass through.
+                let mut buf = [0u8; 4];
+                bytes.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
+            }
+        }
+    }
+    String::from_utf8(bytes).unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn build_byte_to_char_ref() -> [char; 256] {
+        let mut table = ['\0'; 256];
+        let mut next: u32 = 256;
+        for b in 0..=255u8 {
+            let cp = if is_nice(b) {
+                b as u32
+            } else {
+                let cp = next;
+                next += 1;
+                cp
+            };
+            table[b as usize] = char::from_u32(cp).unwrap();
+        }
+        table
+    }
+
+    #[test]
+    fn char_to_byte_roundtrips_every_byte() {
+        let byte_to_char = build_byte_to_char_ref();
+        for b in 0..=255u8 {
+            let cp = byte_to_char[b as usize] as usize;
+            assert!(cp < CHAR_TO_BYTE.len());
+            assert_eq!(CHAR_TO_BYTE[cp], b, "mismatch for byte {b:#x}");
+        }
+    }
+
+    #[test]
+    fn decode_ascii() {
+        assert_eq!(decode_byte_level(["Hello"]), "Hello");
+    }
+
+    #[test]
+    fn decode_space_marker() {
+        // GPT-2 maps 0x20 → Ġ (U+0120).
+        assert_eq!(
+            decode_byte_level(["\u{120}Hello", "\u{120}world"]),
+            " Hello world",
+        );
+    }
+
+    #[test]
+    fn decode_multibyte_euro() {
+        // € → 0xE2 0x82 0xAC, each mapped to a specific GPT-2 char.
+        let byte_to_char = build_byte_to_char_ref();
+        let encoded: String =
+            [0xE2u8, 0x82, 0xAC].iter().map(|&b| byte_to_char[b as usize]).collect();
+        assert_eq!(decode_byte_level([encoded.as_str()]), "€");
+    }
+
+    #[test]
+    fn decode_preserves_non_gpt2_chars() {
+        let tok = "<\u{FF5C}begin\u{2581}of\u{2581}sentence\u{FF5C}>";
+        assert_eq!(decode_byte_level([tok]), "<｜begin▁of▁sentence｜>");
+    }
+}
diff --git a/rust/src/tokenizer/src/error.rs b/rust/src/tokenizer/src/error.rs
new file mode 100644
index 000000000000..cfe8253ffe27
--- /dev/null
+++ b/rust/src/tokenizer/src/error.rs
@@ -0,0 +1,9 @@
+use thiserror::Error;
+use thiserror_ext::Macro;
+
+pub type Result<T> = std::result::Result<T, TokenizerError>;
+
+#[derive(Debug, Error, Macro)]
+#[thiserror_ext(macro(path = "crate::error"))]
+#[error("tokenizer error: {0}")]
+pub struct TokenizerError(#[message] pub String);
diff --git a/rust/src/tokenizer/src/hf.rs b/rust/src/tokenizer/src/hf.rs
new file mode 100644
index 000000000000..93b48545a249
--- /dev/null
+++ b/rust/src/tokenizer/src/hf.rs
@@ -0,0 +1,344 @@
+use std::path::Path;
+use std::sync::Arc;
+
+use fastokens::Tokenizer as FastokensTokenizer;
+use fastokens::decoders::Decoder as FastokensDecoder;
+use thiserror_ext::AsReport as _;
+use tokenizers::Tokenizer as HfTokenizer;
+use tracing::{info, warn};
+
+use crate::byte_level_decode::decode_byte_level;
+use crate::{Result, Tokenizer};
+
+enum Backend {
+    Hf(Box<HfTokenizer>),
+    Fastokens(Box<FastokensTokenizer>),
+    /// Fastokens tokenizer whose decoder is pure GPT-2 byte-level, so we can
+    /// bypass `Decoder::decode`'s `Vec<String>`/`join("")` assembly.
+    FastokensByteLevel(Box<FastokensTokenizer>),
+}
+
+/// True if `dec` is effectively a single `ByteLevel` stage — one `ByteLevel`
+/// leaf in a tree of `Sequence`s (fastokens represents `Fuse` as an empty
+/// `Sequence`, which is a no-op for our purposes).
+fn is_byte_level_only(dec: &FastokensDecoder) -> bool {
+    fn count_byte_level(dec: &FastokensDecoder) -> usize {
+        match dec {
+            FastokensDecoder::ByteLevel(_) => 1,
+            FastokensDecoder::Sequence(steps) => steps.iter().map(count_byte_level).sum(),
+        }
+    }
+    count_byte_level(dec) == 1
+}
+
+fn decode_fastokens_byte_level(
+    t: &FastokensTokenizer,
+    token_ids: &[u32],
+    skip_special_tokens: bool,
+) -> Result<String> {
+    let tokens: Vec<&str> = token_ids
+        .iter()
+        .filter(|&&id| !(skip_special_tokens && t.is_special_token(id)))
+        .map(|&id| {
+            t.id_to_token(id)
+                .ok_or_else(|| tokenizer_error!("decoding failed: unknown token ID: {id}"))
+        })
+        .collect::<Result<_>>()?;
+    Ok(decode_byte_level(tokens))
+}
+
+/// Tokenizer from `tokenizer.json` in HuggingFace format.
+///
+/// This tries to load with `fastokens` first for better performance, then falls
+/// back to HuggingFace's `tokenizers` if the former fails (e.g. due to
+/// unsupported tokenizer features or file formats).
+pub struct HuggingFaceTokenizer {
+    backend: Backend,
+    special_token_ids: Arc<[u32]>,
+}
+
+impl HuggingFaceTokenizer {
+    fn from_hf_backend(tokenizer: HfTokenizer) -> Self {
+        let special_token_ids = {
+            let mut ids: Vec<u32> = tokenizer
+                .get_added_tokens_decoder()
+                .iter()
+                .filter(|(_id, token)| token.special)
+                .map(|(id, _token)| *id)
+                .collect();
+            ids.sort_unstable();
+            ids.dedup();
+            Arc::from(ids)
+        };
+        Self {
+            backend: Backend::Hf(Box::new(tokenizer)),
+            special_token_ids,
+        }
+    }
+
+    fn from_fastokens_backend(tokenizer: FastokensTokenizer) -> Self {
+        let special_token_ids = {
+            let mut ids: Vec<u32> = tokenizer
+                .added_tokens()
+                .into_iter()
+                .flat_map(|added_tokens| added_tokens.iter())
+                .filter(|token| token.special)
+                .map(|token| token.id)
+                .collect();
+            ids.sort_unstable();
+            ids.dedup();
+            Arc::from(ids)
+        };
+        let byte_level = tokenizer.decoder().is_some_and(is_byte_level_only);
+        let backend = if byte_level {
+            Backend::FastokensByteLevel(Box::new(tokenizer))
+        } else {
+            Backend::Fastokens(Box::new(tokenizer))
+        };
+        Self {
+            backend,
+            special_token_ids,
+        }
+    }
+
+    /// Load from `tokenizer.json` with `fastokens`.
+    pub fn new_fastokens(path: &Path) -> Result<Self> {
+        info!(path = %path.display(), "loading tokenizer with fastokens");
+        let t = FastokensTokenizer::from_file(path)
+            .map_err(|error| tokenizer_error!("failed to load tokenizer: {}", error.as_report()))?;
+        Ok(Self::from_fastokens_backend(t))
+    }
+
+    /// Load from `tokenizer.json` with Hugging Face `tokenizers`.
+    pub fn new_hf(path: &Path) -> Result<Self> {
+        info!(path = %path.display(), "loading tokenizer with huggingface tokenizers");
+        let t = HfTokenizer::from_file(path)
+            .map_err(|error| tokenizer_error!("failed to load tokenizer: {}", error.as_report()))?;
+        Ok(Self::from_hf_backend(t))
+    }
+
+    /// Load from `tokenizer.json` via fastokens or HuggingFace tokenizers.
+    pub fn new(path: &Path) -> Result<Self> {
+        match Self::new_fastokens(path) {
+            Ok(tokenizer) => Ok(tokenizer),
+            Err(error) => {
+                warn!(
+                    path = %path.display(),
+                    error = %error.as_report(),
+                    "failed to load tokenizer with fastokens; falling back to HuggingFace tokenizers"
+                );
+                Self::new_hf(path)
+            }
+        }
+    }
+}
+
+impl Tokenizer for HuggingFaceTokenizer {
+    fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>> {
+        match &self.backend {
+            Backend::Hf(t) => {
+                let encoding = t
+                    .encode(text, add_special_tokens)
+                    .map_err(|error| tokenizer_error!("encoding failed: {}", error.as_report()))?;
+                Ok(encoding.get_ids().to_vec())
+            }
+            Backend::Fastokens(t) | Backend::FastokensByteLevel(t) => t
+                .encode_with_special_tokens(text, add_special_tokens)
+                .map_err(|error| tokenizer_error!("encoding failed: {}", error.as_report())),
+        }
+    }
+
+    fn decode(&self, token_ids: &[u32], skip_special_tokens: bool) -> Result<String> {
+        match &self.backend {
+            Backend::Hf(t) => t
+                .decode(token_ids, skip_special_tokens)
+                .map_err(|error| tokenizer_error!("decoding failed: {}", error.as_report())),
+            Backend::Fastokens(t) => t
+                .decode(token_ids, skip_special_tokens)
+                .map_err(|error| tokenizer_error!("decoding failed: {}", error.as_report())),
+            Backend::FastokensByteLevel(t) => {
+                decode_fastokens_byte_level(t, token_ids, skip_special_tokens)
+            }
+        }
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        match &self.backend {
+            Backend::Hf(t) => t.token_to_id(token),
+            Backend::Fastokens(t) | Backend::FastokensByteLevel(t) => t.token_to_id(token),
+        }
+    }
+
+    fn id_to_token(&self, id: u32) -> Option<String> {
+        match &self.backend {
+            Backend::Hf(t) => t.id_to_token(id),
+            Backend::Fastokens(t) | Backend::FastokensByteLevel(t) => {
+                t.id_to_token(id).map(ToOwned::to_owned)
+            }
+        }
+    }
+
+    fn is_special_id(&self, token_id: u32) -> bool {
+        self.special_token_ids.binary_search(&token_id).is_ok()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use tempfile::tempdir;
+    use tokenizers::models::bpe::BPE;
+    use tokenizers::{AddedToken, Tokenizer as HfTokenizer};
+
+    use super::{HuggingFaceTokenizer, Tokenizer};
+
+    fn tiny_bpe_tokenizer() -> HfTokenizer {
+        let vocab = [
+            ("<unk>".to_string(), 0),
+            ("h".to_string(), 1),
+            ("e".to_string(), 2),
+            ("l".to_string(), 3),
+            ("o".to_string(), 4),
+            ("he".to_string(), 5),
+            ("ll".to_string(), 6),
+            ("hell".to_string(), 7),
+            ("hello".to_string(), 8),
+        ];
+        let merges = vec![
+            ("h".to_string(), "e".to_string()),
+            ("l".to_string(), "l".to_string()),
+            ("he".to_string(), "ll".to_string()),
+            ("hell".to_string(), "o".to_string()),
+        ];
+        let model = BPE::builder()
+            .vocab_and_merges(vocab, merges)
+            .unk_token("<unk>".to_string())
+            .build()
+            .expect("build bpe tokenizer");
+        HfTokenizer::new(model)
+    }
+
+    #[test]
+    fn hf_constructor_resolves_added_token_ids() {
+        let mut tokenizer = tiny_bpe_tokenizer();
+        tokenizer.add_special_tokens(&[AddedToken::from("<|im_end|>", true)]);
+
+        let dir = tempdir().expect("create temp dir");
+        let path = dir.path().join("tokenizer.json");
+        tokenizer.save(&path, false).expect("save tokenizer json");
+
+        let wrapper = HuggingFaceTokenizer::new_hf(&path).expect("load hf wrapper");
+        let special_id = wrapper.token_to_id("<|im_end|>").expect("resolve added special token id");
+        assert!(wrapper.is_special_id(special_id));
+    }
+
+    #[test]
+    fn new_fastokens_preserves_special_ids_from_fastokens_metadata() {
+        let mut tokenizer = tiny_bpe_tokenizer();
+        tokenizer.add_special_tokens(&[AddedToken::from("<|im_end|>", true)]);
+
+        let dir = tempdir().expect("create temp dir");
+        let path = dir.path().join("tokenizer.json");
+        tokenizer.save(&path, false).expect("save tokenizer json");
+
+        let wrapper = HuggingFaceTokenizer::new_fastokens(&path)
+            .expect("load wrapper with fastokens backend");
+        assert!(matches!(
+            wrapper.backend,
+            super::Backend::Fastokens(_) | super::Backend::FastokensByteLevel(_),
+        ));
+        let special_id = wrapper.token_to_id("<|im_end|>").expect("resolve added special token id");
+        assert!(wrapper.is_special_id(special_id));
+    }
+
+    /// BPE tokenizer that round-trips through fastokens with a genuine
+    /// `ByteLevel` decoder; vocab covers both GPT-2 (Ġ U+0120) and non-GPT-2
+    /// (｜ U+FF5C) codepoints.
+    fn tiny_byte_level_bpe() -> fastokens::Tokenizer {
+        let raw = r#"{
+            "version": "1.0",
+            "truncation": null,
+            "padding": null,
+            "added_tokens": [
+                {"id": 0, "content": "<|endoftext|>", "single_word": false,
+                 "lstrip": false, "rstrip": false, "normalized": false, "special": true}
+            ],
+            "normalizer": null,
+            "pre_tokenizer": {"type": "ByteLevel", "add_prefix_space": false,
+                              "trim_offsets": true, "use_regex": true},
+            "post_processor": null,
+            "decoder": {"type": "ByteLevel", "add_prefix_space": false,
+                        "trim_offsets": true, "use_regex": true},
+            "model": {
+                "type": "BPE",
+                "dropout": null,
+                "unk_token": null,
+                "continuing_subword_prefix": null,
+                "end_of_word_suffix": null,
+                "fuse_unk": false,
+                "byte_fallback": false,
+                "ignore_merges": false,
+                "vocab": {
+                    "<|endoftext|>": 0,
+                    "H": 1, "e": 2, "l": 3, "o": 4, "w": 5, "r": 6, "d": 7,
+                    "Ġ": 8, "!": 9,
+                    "｜": 10
+                },
+                "merges": []
+            }
+        }"#;
+        let value: serde_json::Value = serde_json::from_str(raw).expect("parse tokenizer json");
+        fastokens::Tokenizer::from_json(value).expect("build fastokens tokenizer")
+    }
+
+    #[test]
+    fn byte_level_detected_direct() {
+        let t = tiny_byte_level_bpe();
+        assert!(super::is_byte_level_only(t.decoder().expect("decoder")));
+    }
+
+    #[test]
+    fn byte_level_detected_inside_sequence() {
+        let raw = r#"{
+            "type": "Sequence",
+            "decoders": [
+                {"type": "ByteLevel", "add_prefix_space": false,
+                 "trim_offsets": true, "use_regex": true},
+                {"type": "Fuse"}
+            ]
+        }"#;
+        let config: fastokens::DecoderConfig =
+            serde_json::from_str(raw).expect("parse decoder config");
+        let dec =
+            fastokens::decoders::Decoder::from_config(config).expect("build decoder from config");
+        assert!(super::is_byte_level_only(&dec));
+    }
+
+    /// Fast path must produce byte-identical output to fastokens' own decode.
+    #[test]
+    fn fast_byte_level_matches_fastokens_decode() {
+        let t = tiny_byte_level_bpe();
+        let cases: &[&[u32]] = &[
+            &[],
+            &[1, 2, 3, 3, 4],                   // "Hello"
+            &[1, 2, 3, 3, 4, 8, 5, 4, 6, 3, 7], // "Hello world"
+            &[0, 1, 2, 3, 3, 4, 0, 9, 0],       // specials interleaved
+            &[10, 1, 2, 3, 3, 4, 10],           // ｜Hello｜ (non-GPT2 chars)
+        ];
+        for ids in cases {
+            for &skip in &[false, true] {
+                let expected = t.decode(ids, skip).expect("fastokens decode");
+                let got =
+                    super::decode_fastokens_byte_level(&t, ids, skip).expect("fast-path decode");
+                assert_eq!(got, expected, "ids={ids:?} skip={skip}");
+            }
+        }
+    }
+
+    #[test]
+    fn fast_byte_level_errors_on_unknown_id() {
+        let t = tiny_byte_level_bpe();
+        let err = super::decode_fastokens_byte_level(&t, &[999], false)
+            .expect_err("unknown id must error");
+        assert!(format!("{err:?}").contains("999"));
+    }
+}
diff --git a/rust/src/tokenizer/src/incremental.rs b/rust/src/tokenizer/src/incremental.rs
new file mode 100644
index 000000000000..7a025d35e5cd
--- /dev/null
+++ b/rust/src/tokenizer/src/incremental.rs
@@ -0,0 +1,359 @@
+use std::mem::take;
+
+use crate::{Result, Tokenizer};
+
+/// Stateful incremental decoder that emits text chunks one token at a time.
+pub trait IncrementalDecoder: Send {
+    /// Push one generated token and return how many new string bytes were
+    /// added.
+    fn push_token(&mut self, token_id: u32) -> Result<usize>;
+
+    /// Consume any text which is currently ready.
+    fn next_chunk(&mut self) -> Option<String>;
+
+    /// Flush any remaining buffered text that has not yet been emitted.
+    ///
+    /// Called after the final generated token to force out buffered/incomplete
+    /// fragments.
+    fn flush(&mut self, truncate_output_to: Option<usize>) -> Result<(Option<String>, String)>;
+
+    /// Return cumulative decoded text so far.
+    fn output(&self) -> &str;
+}
+
+/// [`IncrementalDecoder`] built on [`Tokenizer::decode()`] with prefix-diffing.
+///
+/// This is the same sliding-window algorithm used by `tokenizers::DecodeStream`
+pub(crate) struct DecodeStream<'a, T: Tokenizer + ?Sized> {
+    tokenizer: &'a T,
+    skip_special_tokens: bool,
+    min_bytes_to_buffer: usize,
+    // mutated state
+    ids: Vec<u32>,
+    prefix: String,
+    prefix_index: usize,
+    cumulative_output: String,
+    output_index: usize,
+}
+
+impl<'a, T: Tokenizer + ?Sized> DecodeStream<'a, T> {
+    pub(crate) fn new(
+        tokenizer: &'a T,
+        prompt_token_ids: &[u32],
+        skip_special_tokens: bool,
+        min_bytes_to_buffer: usize,
+    ) -> Self {
+        Self {
+            tokenizer,
+            skip_special_tokens,
+            min_bytes_to_buffer,
+            ids: prompt_token_ids.to_vec(),
+            prefix: String::new(),
+            prefix_index: 0,
+            cumulative_output: String::new(),
+            output_index: 0,
+        }
+    }
+}
+
+/// Try a short tail suffix first (covers a CJK glyph straddling 1-2 token
+/// boundaries); beyond 6 tokens the fallback full-prompt decode is no worse
+/// than baseline so widening the sweep just adds overhead.
+const SAFE_SUFFIX_MIN: usize = 4;
+const SAFE_SUFFIX_MAX: usize = 6;
+
+impl<T: Tokenizer + ?Sized> DecodeStream<'_, T> {
+    /// Seed `self.prefix` from the shortest trailing suffix whose decoded text
+    /// has no U+FFFD — a clean decode means the suffix starts and ends at
+    /// valid UTF-8/token boundaries, so priming from it is equivalent to
+    /// priming from the full prompt.
+    fn seed_prefix(&mut self) -> Result<()> {
+        let prompt_len = self.ids.len();
+        if prompt_len > SAFE_SUFFIX_MIN {
+            let max_try = SAFE_SUFFIX_MAX.min(prompt_len - 1);
+            for suffix_len in SAFE_SUFFIX_MIN..=max_try {
+                let start = prompt_len - suffix_len;
+                let decoded =
+                    self.tokenizer.decode(&self.ids[start..], self.skip_special_tokens)?;
+                if !decoded.contains('\u{FFFD}') {
+                    self.prefix = decoded;
+                    self.ids.drain(..start);
+                    self.prefix_index = self.ids.len();
+                    return Ok(());
+                }
+            }
+        }
+        let decoded = self.tokenizer.decode(&self.ids, self.skip_special_tokens)?;
+        if !decoded.ends_with('\u{FFFD}') {
+            self.prefix = decoded;
+            self.prefix_index = self.ids.len();
+        }
+        Ok(())
+    }
+}
+
+impl<T: Tokenizer + ?Sized> IncrementalDecoder for DecodeStream<'_, T> {
+    fn push_token(&mut self, token_id: u32) -> Result<usize> {
+        if self.prefix.is_empty() && !self.ids.is_empty() {
+            self.seed_prefix()?;
+        }
+
+        self.ids.push(token_id);
+        let string = self.tokenizer.decode(&self.ids, self.skip_special_tokens)?;
+        let prefix_len = self.prefix.len();
+        if string.len() <= prefix_len || string.ends_with('\u{FFFD}') {
+            return Ok(0);
+        }
+        // Ensure we split at a utf-8 char boundary.
+        let new_chunk = &string[string.floor_char_boundary(prefix_len)..];
+        self.cumulative_output.push_str(new_chunk);
+        self.ids.drain(..self.prefix_index);
+        self.prefix = self.tokenizer.decode(&self.ids, self.skip_special_tokens)?;
+        self.prefix_index = self.ids.len();
+        Ok(new_chunk.len())
+    }
+
+    fn next_chunk(&mut self) -> Option<String> {
+        let cutoff = self.cumulative_output.len().saturating_sub(self.min_bytes_to_buffer);
+        (cutoff > self.output_index).then(|| {
+            let chunk = self.cumulative_output[self.output_index..cutoff].to_string();
+            self.output_index = cutoff;
+            chunk
+        })
+    }
+
+    fn flush(&mut self, truncate_output_to: Option<usize>) -> Result<(Option<String>, String)> {
+        if !self.ids.is_empty() {
+            let string = self.tokenizer.decode(&self.ids, self.skip_special_tokens)?;
+            let prefix_len = self.prefix.len();
+            self.ids.clear();
+            self.prefix.clear();
+            self.prefix_index = 0;
+            // Ensure we split at a utf-8 char boundary.
+            self.cumulative_output
+                .push_str(&string[string.floor_char_boundary(prefix_len)..]);
+        }
+        if let Some(truncate_output_to) = truncate_output_to {
+            self.cumulative_output.truncate(truncate_output_to);
+        }
+        let last_chunk = (self.output_index < self.cumulative_output.len())
+            .then(|| self.cumulative_output[self.output_index..].to_string());
+        self.output_index = 0;
+        Ok((last_chunk, take(&mut self.cumulative_output)))
+    }
+
+    fn output(&self) -> &str {
+        &self.cumulative_output
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Backend that treats each token ID as a raw byte, producing lossy UTF-8.
+    #[derive(Debug)]
+    struct Utf8Backend;
+
+    impl Tokenizer for Utf8Backend {
+        fn encode(&self, _text: &str, _add_special_tokens: bool) -> Result<Vec<u32>> {
+            unreachable!()
+        }
+
+        fn decode(&self, token_ids: &[u32], _skip_special_tokens: bool) -> Result<String> {
+            let bytes = token_ids.iter().map(|id| *id as u8).collect::<Vec<_>>();
+            Ok(String::from_utf8_lossy(&bytes).into_owned())
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            unreachable!()
+        }
+    }
+
+    #[test]
+    fn holds_incomplete_utf8_until_complete() {
+        let backend = Utf8Backend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        // 你 = U+4F60 = 0xE4 0xBD 0xA0
+        assert_eq!(decoder.push_token(0xe4).unwrap(), 0);
+        assert_eq!(decoder.push_token(0xbd).unwrap(), 0);
+        assert_eq!(decoder.push_token(0xa0).unwrap(), 3); // "你" is 3 bytes
+        assert_eq!(decoder.output(), "你");
+    }
+
+    #[test]
+    fn emits_ascii_immediately() {
+        let backend = Utf8Backend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        assert_eq!(decoder.push_token(b'o' as u32).unwrap(), 1);
+        assert_eq!(decoder.push_token(b'k' as u32).unwrap(), 1);
+        assert_eq!(decoder.output(), "ok");
+    }
+
+    #[test]
+    fn flush_returns_none_when_fully_consumed() {
+        let backend = Utf8Backend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        assert_eq!(decoder.push_token(b'o' as u32).unwrap(), 1);
+        assert_eq!(decoder.next_chunk().as_deref(), Some("o"));
+        assert_eq!(decoder.push_token(b'k' as u32).unwrap(), 1);
+        assert_eq!(decoder.next_chunk().as_deref(), Some("k"));
+        // All text already consumed via next_chunk
+        let (last_chunk, full_text) = decoder.flush(None).unwrap();
+        assert_eq!(last_chunk, None);
+        assert_eq!(full_text, "ok");
+    }
+
+    #[test]
+    fn flush_emits_buffered_incomplete_utf8() {
+        let backend = Utf8Backend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        // Push incomplete multi-byte sequence — step returns 0 bytes.
+        assert_eq!(decoder.push_token(0xe4).unwrap(), 0);
+        assert_eq!(decoder.push_token(0xbd).unwrap(), 0);
+
+        // Flush forces out whatever the decoder can produce (lossy replacement).
+        let (last_chunk, _full_text) = decoder.flush(None).unwrap();
+        assert!(last_chunk.is_some());
+    }
+
+    /// Backend where token 0 is a special token.
+    #[derive(Debug)]
+    struct SpecialTokenBackend;
+
+    impl Tokenizer for SpecialTokenBackend {
+        fn encode(&self, _text: &str, _add_special_tokens: bool) -> Result<Vec<u32>> {
+            unreachable!()
+        }
+
+        fn decode(&self, token_ids: &[u32], skip_special_tokens: bool) -> Result<String> {
+            let mut text = String::new();
+            for &token_id in token_ids {
+                match token_id {
+                    0 if !skip_special_tokens => text.push_str("<special>"),
+                    0 => {}
+                    1 => text.push('a'),
+                    _ => {}
+                }
+            }
+            Ok(text)
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            unreachable!()
+        }
+    }
+
+    #[test]
+    fn respects_skip_special_tokens() {
+        let backend = SpecialTokenBackend;
+        let mut skip_decoder = backend.create_decode_stream(&[], true, 0);
+        let mut keep_decoder = backend.create_decode_stream(&[], false, 0);
+
+        assert_eq!(skip_decoder.push_token(0).unwrap(), 0);
+        assert_eq!(keep_decoder.push_token(0).unwrap(), 9); // "<special>" is 9 bytes
+        assert_eq!(keep_decoder.output(), "<special>");
+    }
+
+    #[test]
+    fn prompt_tokens_provide_context_without_re_emission() {
+        let backend = Utf8Backend;
+        let prompt = &[b'H' as u32, b'i' as u32];
+        let mut decoder = backend.create_decode_stream(prompt, false, 0);
+
+        // First generated token should not re-emit "Hi".
+        let added = decoder.push_token(b'!' as u32).unwrap();
+        assert_eq!(added, 1);
+        assert_eq!(decoder.output(), "!");
+    }
+
+    #[test]
+    fn chunks_concatenate_to_full_text() {
+        let backend = Utf8Backend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        let input = b"Hello, world!";
+        let mut full = String::new();
+        for &byte in input {
+            decoder.push_token(byte as u32).unwrap();
+            if let Some(chunk) = decoder.next_chunk() {
+                full.push_str(&chunk);
+            }
+        }
+        let (last_chunk, full_text) = decoder.flush(None).unwrap();
+        assert_eq!(last_chunk, None); // all consumed via next_chunk
+        assert_eq!(full, "Hello, world!");
+        assert_eq!(full_text, "Hello, world!");
+    }
+
+    /// Backend simulating non-monotonic decode where adding a token changes how
+    /// earlier tokens decode (context-dependent normalization), causing
+    /// prefix_len to land mid-UTF-8. Reproduces the class of bug from
+    /// vllm-project/vllm#17448.
+    #[derive(Debug)]
+    struct NonMonotonicBackend;
+
+    impl Tokenizer for NonMonotonicBackend {
+        fn encode(&self, _text: &str, _add_special_tokens: bool) -> Result<Vec<u32>> {
+            unreachable!()
+        }
+
+        fn decode(&self, token_ids: &[u32], _skip_special_tokens: bool) -> Result<String> {
+            match token_ids {
+                [1] => Ok("abc".into()),
+                [1, 2] => Ok("ab".into()),
+                // Token 3 triggers a normalization change: "ab" becomes emoji + "d".
+                // prefix_len=3 ("abc") lands inside the 4-byte emoji 🎉.
+                [1, 2, 3] => Ok("🎉d".into()), // 🎉 is 4 bytes + d = 5 bytes
+                [2, 3] => Ok("🎉d".into()),    // prefix recompute after drain
+                [3] => Ok("d".into()),         // after drain
+                _ => panic!("unexpected decode: {:?}", token_ids),
+            }
+        }
+
+        fn token_to_id(&self, _token: &str) -> Option<u32> {
+            unreachable!()
+        }
+    }
+
+    /// Without the char-boundary fix, this panics slicing mid-emoji.
+    #[test]
+    fn non_monotonic_decode_does_not_panic() {
+        let backend = NonMonotonicBackend;
+        let mut decoder = backend.create_decode_stream(&[], false, 0);
+
+        // Token 1: "abc", prefix="abc"
+        assert_eq!(decoder.push_token(1).unwrap(), 3);
+        // Token 2: "ab" (shorter), no emit
+        assert_eq!(decoder.push_token(2).unwrap(), 0);
+        // Token 3: "🎉d" — prefix_len=3 is mid-emoji. Without fix this panics.
+        let added = decoder.push_token(3).unwrap();
+        assert!(added > 0);
+    }
+
+    #[test]
+    fn next_chunk_with_hold_back() {
+        let backend = Utf8Backend;
+        // hold_back_bytes: 3 means we buffer the last 3 bytes
+        let mut decoder = backend.create_decode_stream(&[], false, 3);
+
+        let input = b"Hello!";
+        let mut chunks = String::new();
+        for &byte in input {
+            decoder.push_token(byte as u32).unwrap();
+            if let Some(chunk) = decoder.next_chunk() {
+                chunks.push_str(&chunk);
+            }
+        }
+        // With hold_back_bytes=3, last 3 bytes ("lo!") are held back
+        assert_eq!(chunks, "Hel");
+        // Flush returns the rest
+        let (last_chunk, full_text) = decoder.flush(None).unwrap();
+        assert_eq!(last_chunk.as_deref(), Some("lo!"));
+        assert_eq!(full_text, "Hello!");
+    }
+}
diff --git a/rust/src/tokenizer/src/lib.rs b/rust/src/tokenizer/src/lib.rs
new file mode 100644
index 000000000000..6a512a5a620e
--- /dev/null
+++ b/rust/src/tokenizer/src/lib.rs
@@ -0,0 +1,62 @@
+use std::sync::Arc;
+
+use crate::incremental::DecodeStream;
+
+mod byte_level_decode;
+#[macro_use]
+mod error;
+mod hf;
+mod incremental;
+mod tekken;
+mod tiktoken;
+
+pub use error::{Result, TokenizerError};
+pub use hf::HuggingFaceTokenizer;
+pub use incremental::IncrementalDecoder;
+pub use tekken::TekkenTokenizer;
+pub use tiktoken::TiktokenTokenizer;
+
+pub trait Tokenizer: Send + Sync {
+    /// Encode one prompt string into token IDs.
+    fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>>;
+
+    /// Decode one token sequence into text.
+    fn decode(&self, token_ids: &[u32], skip_special_tokens: bool) -> Result<String>;
+
+    /// Convert one token string into a token ID, returning `None` if the token
+    /// is not in the tokenizer vocabulary.
+    fn token_to_id(&self, token: &str) -> Option<u32>;
+
+    /// Convert one token ID into the tokenizer's raw token string.
+    fn id_to_token(&self, _id: u32) -> Option<String> {
+        // TODO: remove default impl and require this to be implemented by all
+        // tokenizers
+        None
+    }
+
+    /// Return whether the given token ID is special.
+    fn is_special_id(&self, _token_id: u32) -> bool {
+        false
+    }
+
+    /// Create a stateful incremental decoder primed with the given prompt
+    /// tokens.
+    ///
+    /// The prompt tokens provide left context for the first generated token;
+    /// the decoder does not re-emit prompt text.
+    fn create_decode_stream(
+        &self,
+        prompt_token_ids: &[u32],
+        skip_special_tokens: bool,
+        min_bytes_to_buffer: usize,
+    ) -> Box<dyn IncrementalDecoder + '_> {
+        Box::new(DecodeStream::new(
+            self,
+            prompt_token_ids,
+            skip_special_tokens,
+            min_bytes_to_buffer,
+        ))
+    }
+}
+
+pub type DynTokenizer = Arc<dyn Tokenizer>;
diff --git a/rust/src/tokenizer/src/tekken.rs b/rust/src/tokenizer/src/tekken.rs
new file mode 100644
index 000000000000..e8560c65a30b
--- /dev/null
+++ b/rust/src/tokenizer/src/tekken.rs
@@ -0,0 +1,62 @@
+use std::path::Path;
+
+use tekken::Tekkenizer;
+use tracing::info;
+
+use crate::{Result, Tokenizer};
+
+/// Mistral Tekken tokenizer from a `tekken.json` file.
+pub struct TekkenTokenizer {
+    inner: Tekkenizer,
+}
+
+impl TekkenTokenizer {
+    /// Load a Mistral Tekken tokenizer from a `tekken.json` file.
+    pub fn new(path: &Path) -> Result<Self> {
+        info!(path = %path.display(), "loading tokenizer with Mistral Tekken");
+
+        let inner = Tekkenizer::from_file(path).map_err(|error| {
+            tokenizer_error!(
+                "failed to load tekken tokenizer from {}: {error}",
+                path.display()
+            )
+        })?;
+        Ok(Self { inner })
+    }
+}
+
+impl Tokenizer for TekkenTokenizer {
+    fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>> {
+        self.inner
+            .encode(text, add_special_tokens, false)
+            .map_err(|error| tokenizer_error!("encoding failed: {error}"))
+    }
+
+    fn decode(&self, token_ids: &[u32], skip_special_tokens: bool) -> Result<String> {
+        let policy = if skip_special_tokens {
+            tekken::SpecialTokenPolicy::Ignore
+        } else {
+            tekken::SpecialTokenPolicy::Keep
+        };
+        self.inner
+            .decode(token_ids, policy)
+            .map_err(|error| tokenizer_error!("decoding failed: {error}"))
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        // tekken-rs exposes `get_control_token` for special tokens. Try that first,
+        // then fall back to encoding.
+        self.inner.get_control_token(token).ok().or_else(|| {
+            let ids = self.inner.encode(token, false, false).ok()?;
+            if ids.len() == 1 { Some(ids[0]) } else { None }
+        })
+    }
+
+    fn id_to_token(&self, id: u32) -> Option<String> {
+        self.inner.id_to_piece(id).ok()
+    }
+
+    fn is_special_id(&self, token_id: u32) -> bool {
+        self.inner.is_special_token(token_id)
+    }
+}
diff --git a/rust/src/tokenizer/src/tiktoken.rs b/rust/src/tokenizer/src/tiktoken.rs
new file mode 100644
index 000000000000..0c57ff5f6b69
--- /dev/null
+++ b/rust/src/tokenizer/src/tiktoken.rs
@@ -0,0 +1,1013 @@
+use std::collections::HashSet;
+use std::path::Path;
+use std::sync::Mutex;
+
+use base64::Engine as _;
+use rustc_hash::{FxHashMap, FxHashSet};
+use serde::Deserialize;
+use thiserror_ext::AsReport as _;
+use tracing::{info, warn};
+
+use crate::{Result, Tokenizer};
+
+/// Default regex pattern used when loading tiktoken from a BPE file. This is
+/// the same `cl100k_base` pattern that HuggingFace transformers uses as its
+/// default in `TikTokenConverter`.
+const CL100K_BASE_PATTERN: &str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+";
+
+/// Kimi BPE pattern from `moonshotai/Kimi-K2-Instruct/tokenization_kimi.py`.
+const KIMI_PATTERN: &str = r"[\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+";
+
+/// Fallback number of reserved special-token slots to assume when the model's
+/// `config.json` is not available (so we cannot read `vocab_size` directly).
+///
+/// 256 is the value used by Kimi K2 / K2.5 (`tokenization_kimi.py`'s
+/// `num_reserved_special_tokens`) and by Llama 3, and it appears to be the most
+/// common convention among modern tiktoken-based HF tokenizers. When
+/// `config.json` *is* present we honour the model's actual `vocab_size` instead
+/// of this fallback — see `Self::new`.
+const FALLBACK_NUM_RESERVED_SPECIAL_TOKENS: u32 = 256;
+const DISABLE_RIPTOKEN_ENV: &str = "VLLM_RS_DISABLE_RIPTOKEN";
+
+/// Parsed entry from `tokenizer_config.json`'s `added_tokens_decoder`.
+#[derive(Debug, Clone, Deserialize)]
+struct AddedToken {
+    content: String,
+    /// HuggingFace `added_tokens_decoder` entries can be marked `"special":
+    /// true|false`. Special tokens are dropped from output when `decode` is
+    /// called with `skip_special_tokens = true`. Defaults to `false` when
+    /// the field is omitted, matching HuggingFace's `AddedToken` default —
+    /// so only tokens explicitly marked special are stripped during normal
+    /// decode (where `skip_special_tokens` itself defaults to true).
+    #[serde(default)]
+    special: bool,
+}
+
+/// Minimal subset of `tokenizer_config.json` needed by the tiktoken loader.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+struct TiktokenTokenizerConfig {
+    /// Format:
+    /// `{ "added_tokens_decoder": { "163584": { "content": "[BOS]", "special":
+    /// true }, ... } }`
+    #[serde(default)]
+    added_tokens_decoder: FxHashMap<u32, AddedToken>,
+}
+
+/// Minimal subset of model `config.json` needed by the tiktoken loader.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+struct TiktokenModelConfig {
+    model_type: Option<String>,
+    vocab_size: Option<u32>,
+    text_config: Option<Box<TiktokenModelConfig>>,
+}
+
+impl TiktokenModelConfig {
+    /// Read `model_type` from a model `config.json` value, falling back to a
+    /// single-level nested `text_config.model_type` for composite (e.g.
+    /// multimodal) configs that keep text metadata under a `text_config`
+    /// object.
+    fn effective_model_type(&self) -> Option<&str> {
+        self.model_type
+            .as_deref()
+            .or_else(|| self.text_config.as_deref()?.effective_model_type())
+    }
+
+    /// Read `vocab_size` from a model `config.json` value, falling back to a
+    /// single-level nested `text_config.vocab_size` for composite (e.g.
+    /// multimodal) configs that keep text metadata under a `text_config`
+    /// object — matching the same shape `ModelConfig` parses.
+    fn effective_vocab_size(&self) -> Option<u32> {
+        self.vocab_size.or_else(|| self.text_config.as_deref()?.effective_vocab_size())
+    }
+}
+
+/// Tiktoken tokenizer from `tiktoken.model` or `*.tiktoken` BPE files.
+pub struct TiktokenTokenizer {
+    backend: Backend,
+    metadata: TokenMetadata,
+}
+
+enum Backend {
+    Riptoken(RiptokenBackend),
+    TiktokenRs(TiktokenRsBackend),
+}
+
+struct RiptokenBackend {
+    inner: Box<riptoken::CoreBPE>,
+    allowed_special_tokens: Vec<String>,
+}
+
+struct TiktokenRsBackend {
+    inner: Box<tiktoken_rs::CoreBPE>,
+    /// Reverse map for special / added token strings populated from the
+    /// reserved range. This lets `token_to_id` answer special-token lookups
+    /// directly without round-tripping through `tiktoken-rs`'s encoder,
+    /// which can panic for unknown special-looking strings.
+    special_token_ids_by_text: FxHashMap<String, u32>,
+    /// Set of out-of-vocab token IDs we have already warned about. The
+    /// reserved-slot population in the constructor should keep this empty
+    /// under normal operation; it only fills up if a model emits ids at or
+    /// above `vocab_upper_bound` (e.g. an engine sampling bug). We dedupe
+    /// so streaming decode (which calls `decode` repeatedly on the same prefix)
+    /// does not spam.
+    warned_unknown_ids: Mutex<FxHashSet<u32>>,
+}
+
+struct TokenMetadata {
+    /// Number of regular BPE tokens. Token ids in `[0, num_base_tokens)` are
+    /// BPE tokens that always decode to text; ids in `[num_base_tokens,
+    /// vocab_upper_bound)` live in the special-token slots and are subject
+    /// to `skip_special_tokens` filtering.
+    num_base_tokens: u32,
+    /// Exclusive upper bound on token IDs that `inner` is guaranteed to know
+    /// how to decode.
+    ///
+    /// The constructor registers every id in `[num_base_tokens,
+    /// vocab_upper_bound)` with the inner `CoreBPE` as a (named or
+    /// `<|reserved_token_{id}|>`) special token, and the BPE
+    /// encoder densely covers `[0, num_base_tokens)`. So any id below this
+    /// bound is in one of the inner `CoreBPE`'s decoder maps and
+    /// `_decode_native_and_split` will not panic on it. `decode` filters
+    /// out ids at or above this bound to keep that guarantee.
+    vocab_upper_bound: u32,
+    /// Ids in `[num_base_tokens, vocab_upper_bound)` whose
+    /// `added_tokens_decoder` entry was explicitly marked `"special":
+    /// false` — i.e. tokens that should still appear in output
+    /// even when `skip_special_tokens = true`. For Kimi K2 / K2.5 this
+    /// typically holds the tool-call markers and `<think>` / `</think>`.
+    /// Reserved-slot placeholders are not in this set (they default to
+    /// special and get skipped).
+    non_special_added_ids: FxHashSet<u32>,
+    /// Raw token string by token id. Base BPE tokens are represented with
+    /// lossy UTF-8, matching decode behavior for byte sequences that are not
+    /// valid UTF-8 on their own.
+    token_by_id: FxHashMap<u32, String>,
+}
+
+impl TokenMetadata {
+    fn filter_special_tokens(&self, token_ids: &[u32]) -> Vec<u32> {
+        token_ids
+            .iter()
+            .copied()
+            .filter(|&id| {
+                id < self.num_base_tokens
+                    || id >= self.vocab_upper_bound
+                    || self.non_special_added_ids.contains(&id)
+            })
+            .collect()
+    }
+
+    fn is_special_id(&self, token_id: u32) -> bool {
+        token_id >= self.num_base_tokens
+            && token_id < self.vocab_upper_bound
+            && !self.non_special_added_ids.contains(&token_id)
+    }
+
+    fn id_to_token(&self, token_id: u32) -> Option<String> {
+        self.token_by_id.get(&token_id).cloned()
+    }
+}
+
+impl RiptokenBackend {
+    fn encode(&self, text: &str) -> Vec<u32> {
+        // TODO: avoid collecting `allowed_special` every time this method is called.
+        let allowed_special: HashSet<&str> =
+            self.allowed_special_tokens.iter().map(String::as_str).collect();
+        self.inner.encode(text, &allowed_special)
+    }
+
+    fn decode(&self, token_ids: &[u32]) -> String {
+        let bytes = self.inner.decode_bytes(token_ids);
+        // TODO: use `from_utf8_lossy_owned` once it's stabilized.
+        String::from_utf8_lossy(&bytes).into_owned()
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        self.inner.encode_single_token(token.as_bytes())
+    }
+}
+
+impl TiktokenRsBackend {
+    fn encode(&self, text: &str) -> Vec<u32> {
+        self.inner.encode_with_special_tokens(text)
+    }
+
+    fn decode(&self, token_ids: &[u32], metadata: &TokenMetadata) -> String {
+        let safe_ids: Vec<u32> = token_ids
+            .iter()
+            .copied()
+            .filter(|&id| {
+                if id >= metadata.vocab_upper_bound {
+                    self.warn_unknown_id(id);
+                    return false;
+                }
+                true
+            })
+            .collect();
+        let bytes: Vec<u8> = self.inner._decode_native_and_split(safe_ids).flatten().collect();
+        // TODO: use `from_utf8_lossy_owned` once it's stabilized.
+        String::from_utf8_lossy(&bytes).into_owned()
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        if let Some(&token_id) = self.special_token_ids_by_text.get(token) {
+            return Some(token_id);
+        }
+
+        // Fall back to ordinary encoding for regular vocabulary items. This
+        // deliberately avoids `encode_with_special_tokens`: older `tiktoken-rs`
+        // versions can panic if the input text merely *looks* like a special
+        // token but is not registered in `special_tokens_encoder`.
+        let ids = self.inner.encode_ordinary(token);
+        if ids.len() == 1 { Some(ids[0]) } else { None }
+    }
+
+    /// Log a warning the first time an unknown token id is seen during decode,
+    /// deduped across calls so streaming decode does not spam the log for
+    /// the same id.
+    fn warn_unknown_id(&self, token_id: u32) {
+        let newly_inserted = self
+            .warned_unknown_ids
+            .lock()
+            .map(|mut set| set.insert(token_id))
+            .unwrap_or(false);
+        if newly_inserted {
+            warn!(
+                token_id,
+                "tiktoken-rs decode encountered token id not in the vocabulary; skipping. \
+                 This typically indicates a sparse-vocab model whose `added_tokens_decoder` \
+                 does not list every reserved id in the special-token range."
+            );
+        }
+    }
+}
+
+impl TiktokenTokenizer {
+    /// Load a tiktoken tokenizer from a `.tiktoken` / `tiktoken.model` BPE
+    /// file.
+    ///
+    /// The BPE file format is one `<base64-token-bytes> <rank>` pair per line,
+    /// the same format used by OpenAI's tiktoken and by HuggingFace model
+    /// repos that ship tiktoken files (e.g. DeepSeek, Kimi K2).
+    ///
+    /// Special / added tokens are read from `tokenizer_config.json` in the same
+    /// directory when present. The `cl100k_base` regex pattern is used as a
+    /// reasonable default.
+    pub fn new(path: &Path) -> Result<Self> {
+        if std::env::var_os(DISABLE_RIPTOKEN_ENV).is_some() {
+            return Self::new_tiktoken_rs(path);
+        }
+
+        match Self::new_riptoken(path) {
+            Ok(tokenizer) => Ok(tokenizer),
+            Err(error) => {
+                warn!(
+                    path = %path.display(),
+                    error = %error.as_report(),
+                    "failed to load tokenizer with riptoken; falling back to tiktoken-rs"
+                );
+                Self::new_tiktoken_rs(path)
+            }
+        }
+    }
+
+    /// Load from `tiktoken.model` / `*.tiktoken` with riptoken.
+    pub fn new_riptoken(path: &Path) -> Result<Self> {
+        info!(path = %path.display(), "loading tokenizer with riptoken (BPE file)");
+
+        let config = LoadedTiktokenConfig::load(path)?;
+        let allowed_special_tokens = config.special_tokens_encoder.keys().cloned().collect();
+        let inner = riptoken::CoreBPE::new(
+            config.encoder.into_iter().collect(),
+            config.special_tokens_encoder.into_iter().collect(),
+            config.pattern,
+        )
+        .map_err(|error| {
+            tokenizer_error!(
+                "failed to create riptoken tokenizer from {}: {error}",
+                path.display()
+            )
+        })?;
+
+        Ok(Self {
+            backend: Backend::Riptoken(RiptokenBackend {
+                inner: Box::new(inner),
+                allowed_special_tokens,
+            }),
+            metadata: config.metadata,
+        })
+    }
+
+    /// Load from `tiktoken.model` / `*.tiktoken` with tiktoken-rs.
+    pub fn new_tiktoken_rs(path: &Path) -> Result<Self> {
+        info!(path = %path.display(), "loading tokenizer with tiktoken-rs (BPE file)");
+
+        let config = LoadedTiktokenConfig::load(path)?;
+        let special_token_ids_by_text = config.special_tokens_encoder.clone();
+        let inner = tiktoken_rs::CoreBPE::new(
+            config.encoder,
+            config.special_tokens_encoder,
+            config.pattern,
+        )
+        .map_err(|error| {
+            tokenizer_error!(
+                "failed to create tiktoken-rs tokenizer from {}: {error}",
+                path.display()
+            )
+        })?;
+
+        Ok(Self {
+            backend: Backend::TiktokenRs(TiktokenRsBackend {
+                inner: Box::new(inner),
+                special_token_ids_by_text,
+                warned_unknown_ids: Mutex::new(FxHashSet::default()),
+            }),
+            metadata: config.metadata,
+        })
+    }
+}
+
+struct LoadedTiktokenConfig {
+    encoder: FxHashMap<Vec<u8>, u32>,
+    special_tokens_encoder: FxHashMap<String, u32>,
+    metadata: TokenMetadata,
+    pattern: &'static str,
+}
+
+impl LoadedTiktokenConfig {
+    fn load(path: &Path) -> Result<Self> {
+        let content = std::fs::read_to_string(path).map_err(|error| {
+            tokenizer_error!(
+                "failed to read tiktoken file {}: {}",
+                path.display(),
+                error.as_report()
+            )
+        })?;
+        let mut encoder: FxHashMap<Vec<u8>, u32> =
+            FxHashMap::with_capacity_and_hasher(content.lines().count(), Default::default());
+        for line in content.lines() {
+            if line.is_empty() {
+                continue;
+            }
+            let mut parts = line.split_whitespace();
+            let token_b64 =
+                parts.next().ok_or_else(|| tokenizer_error!("missing token in tiktoken file"))?;
+            let rank_str =
+                parts.next().ok_or_else(|| tokenizer_error!("missing rank in tiktoken file"))?;
+            let token_bytes = base64::engine::general_purpose::STANDARD
+                .decode(token_b64)
+                .map_err(|error| tokenizer_error!("invalid base64 in tiktoken file: {error}"))?;
+            let rank: u32 = rank_str
+                .parse()
+                .map_err(|error| tokenizer_error!("invalid rank in tiktoken file: {error}"))?;
+            encoder.insert(token_bytes, rank);
+        }
+
+        let parent_dir = path.parent();
+
+        // Read added/special tokens (id -> {name, special}) from
+        // tokenizer_config.json in the same dir.
+        let added_tokens_by_id = parent_dir
+            .map(|dir| dir.join("tokenizer_config.json"))
+            .filter(|p| p.exists())
+            .and_then(|config_path| {
+                let content = std::fs::read_to_string(&config_path).ok()?;
+                serde_json::from_str(&content).ok()
+            })
+            .map(|config: TiktokenTokenizerConfig| config.added_tokens_decoder)
+            .unwrap_or_default();
+
+        let model_config: Option<TiktokenModelConfig> = parent_dir
+            .map(|dir| dir.join("config.json"))
+            .filter(|p| p.exists())
+            .and_then(|config_path| {
+                let content = std::fs::read_to_string(&config_path).ok()?;
+                serde_json::from_str(&content).ok()
+            });
+        let vocab_size_from_config = model_config.as_ref().and_then(|c| c.effective_vocab_size());
+
+        // Build the full special-tokens encoder by populating the reserved
+        // range that follows the BPE vocabulary. Unknown reserved slots get
+        // Python-compatible placeholder names so sampled ids can still decode.
+        //
+        // Note: `*.tiktoken` ranks are token ids, and they are not guaranteed
+        // to be contiguous. The base-vocab boundary is therefore `max_rank + 1`,
+        // not `encoder.len()`.
+        let num_base_tokens =
+            encoder.values().copied().max().map_or(0, |max_rank| max_rank.saturating_add(1));
+        let max_added_id = added_tokens_by_id.keys().copied().max().unwrap_or(0);
+        let reserved_end = vocab_size_from_config
+            .unwrap_or_else(|| num_base_tokens.saturating_add(FALLBACK_NUM_RESERVED_SPECIAL_TOKENS))
+            .max(num_base_tokens)
+            .max(max_added_id.saturating_add(1));
+
+        let mut special_tokens_encoder: FxHashMap<String, u32> =
+            FxHashMap::with_capacity_and_hasher(
+                (reserved_end - num_base_tokens) as usize,
+                Default::default(),
+            );
+        let mut non_special_added_ids: FxHashSet<u32> = FxHashSet::default();
+        for id in num_base_tokens..reserved_end {
+            let name = match added_tokens_by_id.get(&id) {
+                Some(token) => {
+                    if !token.special {
+                        non_special_added_ids.insert(id);
+                    }
+                    token.content.clone()
+                }
+                None => format!("<|reserved_token_{id}|>"),
+            };
+            special_tokens_encoder.insert(name, id);
+        }
+
+        let mut token_by_id: FxHashMap<u32, String> = FxHashMap::with_capacity_and_hasher(
+            encoder.len() + special_tokens_encoder.len(),
+            Default::default(),
+        );
+        for (token_bytes, &id) in &encoder {
+            token_by_id.insert(id, String::from_utf8_lossy(token_bytes).into_owned());
+        }
+        for (token, &id) in &special_tokens_encoder {
+            token_by_id.insert(id, token.clone());
+        }
+
+        let pattern = model_config.as_ref().map_or(CL100K_BASE_PATTERN, detect_bpe_pattern);
+
+        Ok(Self {
+            encoder,
+            special_tokens_encoder,
+            metadata: TokenMetadata {
+                num_base_tokens,
+                vocab_upper_bound: reserved_end,
+                non_special_added_ids,
+                token_by_id,
+            },
+            pattern,
+        })
+    }
+}
+
+impl Tokenizer for TiktokenTokenizer {
+    fn encode(&self, text: &str, _add_special_tokens: bool) -> Result<Vec<u32>> {
+        // Tiktoken does not have a separate add_special_tokens toggle; both
+        // backends recognize registered special tokens in the input.
+        Ok(match &self.backend {
+            Backend::Riptoken(backend) => backend.encode(text),
+            Backend::TiktokenRs(backend) => backend.encode(text),
+        })
+    }
+
+    fn decode(&self, token_ids: &[u32], skip_special_tokens: bool) -> Result<String> {
+        // Filter passes:
+        //
+        // 1. The constructor registers every id in `[num_base_tokens, vocab_upper_bound)` as a
+        //    special token (named or `<|reserved_token_{id}|>` placeholder, matching
+        //    `tokenization_kimi.py`). The tiktoken-rs backend additionally drops ids at or above
+        //    that bound so `_decode_native_and_split` cannot panic; riptoken's `decode_bytes`
+        //    already skips unknown ids.
+        //
+        // 2. When `skip_special_tokens = true`, ids in `[num_base_tokens, vocab_upper_bound)` are
+        //    dropped *unless* they were marked `"special": false` in `added_tokens_decoder`. This
+        //    matches HuggingFace's tokenizer semantics: tool-call markers and `<think>` /
+        //    `</think>` (which Kimi K2 / K2.5 declare as non-special) stay in the output, while
+        //    BOS/EOS/header tokens and reserved-slot placeholders are stripped.
+        //
+        // Lossy UTF-8 decoding (instead of strict `String::from_utf8`) is used so
+        // partial multi-byte sequences become `\u{FFFD}`, which `DecodeStream`
+        // relies on to detect incomplete characters during streaming.
+        let ids = if skip_special_tokens {
+            &self.metadata.filter_special_tokens(token_ids)
+        } else {
+            token_ids
+        };
+
+        Ok(match &self.backend {
+            Backend::Riptoken(backend) => backend.decode(ids),
+            Backend::TiktokenRs(backend) => backend.decode(ids, &self.metadata),
+        })
+    }
+
+    fn token_to_id(&self, token: &str) -> Option<u32> {
+        match &self.backend {
+            Backend::Riptoken(backend) => backend.token_to_id(token),
+            Backend::TiktokenRs(backend) => backend.token_to_id(token),
+        }
+    }
+
+    fn id_to_token(&self, id: u32) -> Option<String> {
+        self.metadata.id_to_token(id)
+    }
+
+    fn is_special_id(&self, token_id: u32) -> bool {
+        self.metadata.is_special_id(token_id)
+    }
+}
+
+/// Select the BPE regex pattern for a tiktoken model based on `config.json`.
+///
+/// Most tiktoken models use the `cl100k_base` regex. Kimi models ship a custom
+/// regex in their Python tokenizer implementation; we mirror the explicit
+/// `model_type` switch used by Dynamo instead of heuristically parsing Python
+/// source files.
+fn detect_bpe_pattern(config: &TiktokenModelConfig) -> &'static str {
+    let model_type = config.effective_model_type();
+
+    match model_type {
+        Some("kimi" | "kimi_k2" | "kimi_k25" | "deepseek_v3") => KIMI_PATTERN,
+        _ => CL100K_BASE_PATTERN,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use std::path::{Path, PathBuf};
+
+    use base64::Engine as _;
+    use tempfile::TempDir;
+
+    use super::{
+        CL100K_BASE_PATTERN, KIMI_PATTERN, TiktokenModelConfig, TiktokenTokenizer,
+        TiktokenTokenizerConfig, detect_bpe_pattern,
+    };
+    use crate::Tokenizer;
+
+    macro_rules! config_json {
+        ($($json:tt)+) => {
+            serde_json::from_value::<TiktokenModelConfig>(serde_json::json!($($json)+)).unwrap()
+        };
+    }
+
+    /// Write a minimal `*.tiktoken` BPE file (one token per byte 0..=255) into
+    /// `dir` and return its path. The single-byte vocab is enough to
+    /// exercise the multi-byte / streaming UTF-8 paths without depending on
+    /// any pretrained tokenizer asset.
+    fn write_synthetic_bpe_file(dir: &std::path::Path) -> PathBuf {
+        let mut content = String::new();
+        for byte in 0u8..=255 {
+            let b64 = base64::engine::general_purpose::STANDARD.encode([byte]);
+            content.push_str(&format!("{b64} {}\n", byte as u32));
+        }
+        let path = dir.join("test.tiktoken");
+        fs::write(&path, content).expect("write tiktoken file");
+        path
+    }
+
+    /// Write a synthetic `*.tiktoken` file whose base-vocab ranks are
+    /// sparse/non-contiguous.
+    ///
+    /// This reproduces the important edge case for `num_base_tokens`: it must
+    /// be derived from `max_rank + 1`, not `encoder.len()`, otherwise
+    /// high-rank base tokens get misclassified as reserved/special ids.
+    fn write_sparse_rank_bpe_file(dir: &std::path::Path) -> PathBuf {
+        let mut content = String::new();
+        for byte in 0u8..=255 {
+            let b64 = base64::engine::general_purpose::STANDARD.encode([byte]);
+            content.push_str(&format!("{b64} {}\n", byte as u32));
+        }
+
+        let high_rank_token = base64::engine::general_purpose::STANDARD.encode(b"SPARSE");
+        content.push_str(&format!("{high_rank_token} 1000\n"));
+
+        let path = dir.join("sparse-rank.tiktoken");
+        fs::write(&path, content).expect("write sparse-rank tiktoken file");
+        path
+    }
+
+    /// Build a `TiktokenTokenizer` from the synthetic BPE file with no sibling
+    /// config files, so the constructor takes the
+    /// `FALLBACK_NUM_RESERVED_SPECIAL_TOKENS` (256) path.
+    fn explicit_backends(path: &Path) -> Vec<TiktokenTokenizer> {
+        vec![
+            TiktokenTokenizer::new_riptoken(path).expect("load riptoken backend"),
+            TiktokenTokenizer::new_tiktoken_rs(path).expect("load tiktoken-rs backend"),
+        ]
+    }
+
+    fn tiktoken_backends() -> (Vec<TiktokenTokenizer>, TempDir) {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let path = write_synthetic_bpe_file(dir.path());
+        (explicit_backends(&path), dir)
+    }
+
+    /// Verify that tiktoken decode uses lossy UTF-8 (producing `\u{FFFD}`)
+    /// rather than returning an error for incomplete multi-byte sequences.
+    /// This is critical for streaming decode — `DecodeStream` relies on
+    /// `\u{FFFD}` to detect incomplete characters.
+    #[test]
+    fn tiktoken_decode_incomplete_utf8_produces_replacement_char() {
+        let (backends, _dir) = tiktoken_backends();
+
+        for backend in backends {
+            let ids = backend.encode("你", false).unwrap();
+            let full = backend.decode(&ids, false).unwrap();
+            assert_eq!(full, "你");
+
+            let text_with_multibyte = "Hello你好World";
+            let all_ids = backend.encode(text_with_multibyte, false).unwrap();
+            for &id in &all_ids {
+                let result = backend.decode(&[id], false);
+                assert!(result.is_ok(), "decode of token {id} should not error");
+            }
+        }
+    }
+
+    /// When `config.json` exposes a `vocab_size`, the reserved-token range must
+    /// be sized to it rather than to the 256-slot fallback. This is the
+    /// general (non-Kimi-specific) path: any tiktoken model whose own
+    /// `config.json` says e.g. `vocab_size = 280` should populate
+    /// reserved slots for `[num_base_tokens, 280)` and nothing beyond.
+    #[test]
+    fn tiktoken_reserved_range_uses_vocab_size_from_config_json() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_synthetic_bpe_file(dir.path());
+        // num_base_tokens = 256, vocab_size = 280 → reserved range = [256, 280) (24
+        // slots, smaller than the 256 fallback so we can prove the config value
+        // is honoured).
+        fs::write(dir.path().join("config.json"), r#"{"vocab_size": 280}"#)
+            .expect("write config.json");
+
+        for backend in explicit_backends(&bpe_path) {
+            // Inside the configured range: reserved placeholder, round-trips both ways.
+            let in_range_id: u32 = 270;
+            let placeholder = format!("<|reserved_token_{in_range_id}|>");
+            assert_eq!(backend.decode(&[in_range_id], false).unwrap(), placeholder);
+            assert_eq!(
+                backend.encode(&placeholder, false).unwrap(),
+                vec![in_range_id]
+            );
+            assert_eq!(
+                backend.id_to_token(in_range_id).as_deref(),
+                Some(placeholder.as_str())
+            );
+
+            // Outside the configured range: not registered as a reserved slot — falls
+            // through to the backend's unknown-id behavior. The point is that we *don't*
+            // over-populate beyond what the model actually exposes.
+            let out_of_range_id: u32 = 290;
+            let out_of_range_placeholder = format!("<|reserved_token_{out_of_range_id}|>");
+            assert_eq!(backend.decode(&[out_of_range_id], false).unwrap(), "");
+            assert_eq!(backend.token_to_id(&out_of_range_placeholder), None);
+            assert_eq!(backend.id_to_token(out_of_range_id), None);
+        }
+    }
+
+    /// Sparse/non-contiguous BPE ranks must still count as base-vocab ids.
+    ///
+    /// Regression shape:
+    /// - base vocabulary contains ids 0..=255 and also a normal BPE token at id 1000
+    /// - if `num_base_tokens` were computed as `encoder.len()` (257), id 1000 would be
+    ///   misclassified as special/reserved and disappear under `skip_special_tokens = true`
+    #[test]
+    fn tiktoken_sparse_base_ranks_are_not_misclassified_as_special() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_sparse_rank_bpe_file(dir.path());
+        fs::write(dir.path().join("config.json"), r#"{"vocab_size": 1002}"#)
+            .expect("write config.json");
+
+        for backend in explicit_backends(&bpe_path) {
+            let sparse_id = backend.token_to_id("SPARSE");
+            assert_eq!(sparse_id, Some(1000));
+            assert_eq!(backend.id_to_token(1000).as_deref(), Some("SPARSE"));
+            assert!(!backend.is_special_id(1000));
+            assert_eq!(backend.decode(&[1000], false).unwrap(), "SPARSE");
+            assert_eq!(backend.decode(&[1000], true).unwrap(), "SPARSE");
+        }
+    }
+
+    /// `skip_special_tokens` must:
+    ///  * keep regular BPE token text unchanged,
+    ///  * drop ids whose `added_tokens_decoder` entry says `"special": true`,
+    ///  * drop reserved-slot placeholder ids (which default to special),
+    ///  * keep ids whose `added_tokens_decoder` entry says `"special": false` — this is how Kimi K2
+    ///    / K2.5 marks tool-call markers and `<think>` / `</think>`.
+    ///
+    /// Synthetic backend has `num_base_tokens = 256`. We write a
+    /// `tokenizer_config.json` that names ids 257 (special) and 258
+    /// (non-special), and a `config.json` with `vocab_size` covering both.
+    /// Id 259 stays a default reserved placeholder (special).
+    #[test]
+    fn tiktoken_skip_special_tokens_filters_special_but_keeps_non_special_added_tokens() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_synthetic_bpe_file(dir.path());
+        fs::write(
+            dir.path().join("tokenizer_config.json"),
+            r#"{
+                "added_tokens_decoder": {
+                    "257": { "content": "<|im_end|>", "special": true },
+                    "258": { "content": "<|tool_call_begin|>", "special": false }
+                }
+            }"#,
+        )
+        .expect("write tokenizer_config.json");
+        fs::write(dir.path().join("config.json"), r#"{"vocab_size": 260}"#)
+            .expect("write config.json");
+
+        for backend in explicit_backends(&bpe_path) {
+            // Resolve the BPE ids for "Hi" so we can interleave them with special-token
+            // ids.
+            let h = backend.encode("H", false).unwrap()[0];
+            let i = backend.encode("i", false).unwrap()[0];
+
+            let special_id: u32 = 257; // <|im_end|>
+            let non_special_id: u32 = 258; // <|tool_call_begin|>
+            let reserved_id: u32 = 259; // default <|reserved_token_259|> placeholder
+
+            let ids = vec![h, special_id, i, non_special_id, reserved_id];
+
+            // skip_special_tokens = false: everything is rendered as-is.
+            let kept = backend.decode(&ids, false).unwrap();
+            assert_eq!(
+                kept,
+                "H<|im_end|>i<|tool_call_begin|><|reserved_token_259|>"
+            );
+
+            // skip_special_tokens = true: special token (257) and reserved placeholder
+            // (259) are dropped; the non-special added token (258) survives.
+            let stripped = backend.decode(&ids, true).unwrap();
+            assert_eq!(stripped, "Hi<|tool_call_begin|>");
+        }
+    }
+
+    /// `vocab_size` may live under `text_config` for composite (e.g.
+    /// multimodal) configs.
+    #[test]
+    fn tiktoken_reserved_range_reads_text_config_vocab_size() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_synthetic_bpe_file(dir.path());
+        fs::write(
+            dir.path().join("config.json"),
+            r#"{"text_config": {"vocab_size": 270}}"#,
+        )
+        .expect("write config.json");
+
+        for backend in explicit_backends(&bpe_path) {
+            let in_range_id: u32 = 260;
+            let placeholder = format!("<|reserved_token_{in_range_id}|>");
+            assert_eq!(backend.decode(&[in_range_id], false).unwrap(), placeholder);
+
+            // Just outside the nested vocab_size — should not be registered.
+            assert_eq!(backend.decode(&[270], false).unwrap(), "");
+        }
+    }
+
+    #[test]
+    fn tiktoken_detects_kimi_pattern_from_model_type() {
+        let kimi = config_json!({ "model_type": "kimi_k25" });
+        let baseten_kimi = config_json!({ "model_type": "deepseek_v3" });
+        let nested_kimi = config_json!({
+            "model_type": "composite_wrapper",
+            "text_config": { "model_type": "kimi_k2" }
+        });
+        let generic = config_json!({ "model_type": "gpt2" });
+        let nested_generic = config_json!({
+            "model_type": "composite_wrapper",
+            "text_config": { "model_type": "gpt2" }
+        });
+        let missing = config_json!({ "text_config": {} });
+
+        assert_eq!(detect_bpe_pattern(&kimi), KIMI_PATTERN);
+        assert_eq!(detect_bpe_pattern(&baseten_kimi), KIMI_PATTERN);
+        assert_eq!(detect_bpe_pattern(&nested_kimi), CL100K_BASE_PATTERN);
+        assert_eq!(detect_bpe_pattern(&generic), CL100K_BASE_PATTERN);
+        assert_eq!(detect_bpe_pattern(&nested_generic), CL100K_BASE_PATTERN);
+        assert_eq!(detect_bpe_pattern(&missing), CL100K_BASE_PATTERN);
+    }
+
+    #[test]
+    fn tiktoken_reads_model_type_from_text_config_when_top_level_missing() {
+        let nested_only = config_json!({
+            "text_config": { "model_type": "kimi_k2" }
+        });
+        let direct_and_nested = config_json!({
+            "model_type": "kimi_k25",
+            "text_config": { "model_type": "kimi_k2" }
+        });
+        let missing = config_json!({
+            "text_config": {}
+        });
+
+        assert_eq!(nested_only.effective_model_type(), Some("kimi_k2"));
+        assert_eq!(direct_and_nested.effective_model_type(), Some("kimi_k25"));
+        assert_eq!(missing.effective_model_type(), None);
+    }
+
+    #[test]
+    fn tiktoken_tokenizer_config_models_added_tokens_decoder() {
+        let config: TiktokenTokenizerConfig = serde_json::from_value(serde_json::json!({
+            "added_tokens_decoder": {
+                "257": { "content": "<think>" },
+                "258": { "content": "</think>", "special": true }
+            }
+        }))
+        .unwrap();
+
+        let added_tokens = config.added_tokens_decoder;
+        assert_eq!(added_tokens.len(), 2);
+        assert_eq!(
+            added_tokens.get(&257).map(|t| t.content.as_str()),
+            Some("<think>")
+        );
+        assert_eq!(added_tokens.get(&257).map(|t| t.special), Some(false));
+        assert_eq!(
+            added_tokens.get(&258).map(|t| (t.content.as_str(), t.special)),
+            Some(("</think>", true))
+        );
+    }
+
+    /// Reserved token ids in `[num_base_tokens, num_base_tokens + 256)` must
+    /// decode to their placeholder name (matching `tokenization_kimi.py`'s
+    /// `<|reserved_token_{i}|>` format), even when the source
+    /// `tokenizer_config.json` does not list them in `added_tokens_decoder`.
+    ///
+    /// In our synthetic backend `num_base_tokens = 256` (256 single-byte BPE
+    /// tokens), so the reserved range is `[256, 512)`. Picking id 300 —
+    /// well inside that range and absent from any `added_tokens_decoder` —
+    /// should round-trip both ways.
+    #[test]
+    fn tiktoken_reserved_token_round_trip() {
+        let (backends, _dir) = tiktoken_backends();
+
+        for backend in backends {
+            let reserved_id: u32 = 300;
+            let placeholder = format!("<|reserved_token_{reserved_id}|>");
+
+            let decoded = backend.decode(&[reserved_id], false).unwrap();
+            assert_eq!(decoded, placeholder);
+
+            // The placeholder name should also encode back to the same single id, since
+            // the constructor registers it as a special token with `CoreBPE`.
+            let encoded = backend.encode(&placeholder, false).unwrap();
+            assert_eq!(encoded, vec![reserved_id]);
+
+            assert_eq!(backend.token_to_id(&placeholder), Some(reserved_id));
+            assert_eq!(
+                backend.id_to_token(reserved_id).as_deref(),
+                Some(placeholder.as_str())
+            );
+        }
+    }
+
+    /// Decoding a token id that is beyond even the reserved range must not
+    /// panic — it falls through to the warn-and-skip backstop instead of
+    /// crashing the worker thread.
+    #[test]
+    fn tiktoken_rs_decode_unknown_token_id_does_not_panic() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let path = write_synthetic_bpe_file(dir.path());
+        let backend = TiktokenTokenizer::new_tiktoken_rs(&path).expect("load tiktoken-rs backend");
+
+        // ID well above num_base_tokens (256) + reserved (256) = 512 — guaranteed
+        // unknown.
+        let unknown_id: u32 = 999_999;
+        let result = backend.decode(&[unknown_id], false);
+        assert_eq!(result.unwrap(), "");
+
+        // Mixed: known bytes for "Hi" surrounding an unknown id should yield just "Hi".
+        let h = backend.encode("H", false).unwrap()[0];
+        let i = backend.encode("i", false).unwrap()[0];
+        let result = backend.decode(&[h, unknown_id, i], false).unwrap();
+        assert_eq!(result, "Hi");
+    }
+
+    #[test]
+    fn riptoken_decode_unknown_token_id_does_not_panic() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let path = write_synthetic_bpe_file(dir.path());
+        let backend = TiktokenTokenizer::new_riptoken(&path).expect("load riptoken backend");
+
+        let unknown_id: u32 = 999_999;
+        assert_eq!(backend.decode(&[unknown_id], false).unwrap(), "");
+
+        let h = backend.encode("H", false).unwrap()[0];
+        let i = backend.encode("i", false).unwrap()[0];
+        assert_eq!(backend.decode(&[h, unknown_id, i], false).unwrap(), "Hi");
+    }
+
+    /// Streaming decode of CJK text through tiktoken should produce the
+    /// original text without errors, even though individual tokens may
+    /// represent partial UTF-8 byte sequences.
+    #[test]
+    fn tiktoken_streaming_decode_multibyte() {
+        let (backends, _dir) = tiktoken_backends();
+        for backend in backends {
+            let text = "你好世界"; // 4 CJK characters
+            let ids = backend.encode(text, false).unwrap();
+
+            let mut decoder = backend.create_decode_stream(&[], false, 0);
+            let mut output = String::new();
+            for &id in &ids {
+                decoder.push_token(id).unwrap();
+                if let Some(chunk) = decoder.next_chunk() {
+                    output.push_str(&chunk);
+                }
+            }
+            let (last_chunk, full_text) = decoder.flush(None).unwrap();
+            if let Some(chunk) = last_chunk {
+                output.push_str(&chunk);
+            }
+
+            assert_eq!(output, text);
+            assert_eq!(full_text, text);
+        }
+    }
+
+    /// Mixed ASCII and multi-byte text should stream correctly through
+    /// tiktoken.
+    #[test]
+    fn tiktoken_streaming_decode_mixed_ascii_and_multibyte() {
+        let (backends, _dir) = tiktoken_backends();
+        for backend in backends {
+            let text = "Hello 你好 World 🌍";
+            let ids = backend.encode(text, false).unwrap();
+
+            let mut decoder = backend.create_decode_stream(&[], false, 0);
+            let mut output = String::new();
+            for &id in &ids {
+                decoder.push_token(id).unwrap();
+                if let Some(chunk) = decoder.next_chunk() {
+                    output.push_str(&chunk);
+                }
+            }
+            let (last_chunk, full_text) = decoder.flush(None).unwrap();
+            if let Some(chunk) = last_chunk {
+                output.push_str(&chunk);
+            }
+
+            assert_eq!(output, text);
+            assert_eq!(full_text, text);
+        }
+    }
+
+    #[test]
+    fn tiktoken_token_to_id_resolves_added_special_tokens() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_synthetic_bpe_file(dir.path());
+        fs::write(
+            dir.path().join("tokenizer_config.json"),
+            r#"{
+                "added_tokens_decoder": {
+                    "257": { "content": "<think>", "special": false },
+                    "258": { "content": "</think>", "special": false }
+                }
+            }"#,
+        )
+        .expect("write tokenizer_config.json");
+        fs::write(dir.path().join("config.json"), r#"{"vocab_size": 259}"#)
+            .expect("write config.json");
+
+        for backend in explicit_backends(&bpe_path) {
+            assert_eq!(backend.token_to_id("<think>"), Some(257));
+            assert_eq!(backend.token_to_id("</think>"), Some(258));
+            assert_eq!(backend.id_to_token(257).as_deref(), Some("<think>"));
+            assert_eq!(backend.id_to_token(258).as_deref(), Some("</think>"));
+            assert_eq!(
+                backend.decode(&[257, 258], true).unwrap(),
+                "<think></think>"
+            );
+        }
+    }
+
+    #[test]
+    fn riptoken_token_to_id_uses_encode_single_token_path() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let bpe_path = write_synthetic_bpe_file(dir.path());
+        fs::write(
+            dir.path().join("tokenizer_config.json"),
+            r#"{
+                "added_tokens_decoder": {
+                    "257": { "content": "<think>", "special": false }
+                }
+            }"#,
+        )
+        .expect("write tokenizer_config.json");
+        fs::write(dir.path().join("config.json"), r#"{"vocab_size": 258}"#)
+            .expect("write config.json");
+        let backend = TiktokenTokenizer::new_riptoken(&bpe_path).expect("load riptoken backend");
+
+        assert_eq!(backend.token_to_id("H"), Some(b'H' as u32));
+        assert_eq!(backend.id_to_token(b'H' as u32).as_deref(), Some("H"));
+        assert_eq!(backend.token_to_id("<think>"), Some(257));
+        assert_eq!(backend.id_to_token(257).as_deref(), Some("<think>"));
+    }
+
+    #[test]
+    fn tiktoken_rs_token_to_id_handles_unknown_special_like_text_without_panicking() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let path = write_synthetic_bpe_file(dir.path());
+        let backend = TiktokenTokenizer::new_tiktoken_rs(&path).expect("load tiktoken-rs backend");
+
+        assert_eq!(backend.token_to_id("<|definitely_not_registered|>"), None);
+    }
+
+    #[test]
+    fn riptoken_token_to_id_handles_unknown_special_like_text_without_panicking() {
+        let dir = tempfile::tempdir().expect("create temp dir");
+        let path = write_synthetic_bpe_file(dir.path());
+        let backend = TiktokenTokenizer::new_riptoken(&path).expect("load riptoken backend");
+
+        assert_eq!(backend.token_to_id("<|definitely_not_registered|>"), None);
+    }
+}
diff --git a/rust/src/tool-parser/Cargo.toml b/rust/src/tool-parser/Cargo.toml
new file mode 100644
index 000000000000..e02c397cabf4
--- /dev/null
+++ b/rust/src/tool-parser/Cargo.toml
@@ -0,0 +1,75 @@
+[package]
+name = "vllm-tool-parser"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[features]
+test-util = []
+
+[dependencies]
+serde.workspace = true
+serde_json.workspace = true
+thiserror.workspace = true
+thiserror-ext.workspace = true
+winnow.workspace = true
+
+[dev-dependencies]
+criterion.workspace = true
+expect-test.workspace = true
+futures.workspace = true
+openai-protocol.workspace = true
+tool-parser.workspace = true
+
+[[bench]]
+name = "deepseek_v3"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "deepseek_v31"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "deepseek_v32"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "qwen3_coder"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "qwen3_xml"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "llama3_json"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "minimax_m2"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "glm45_moe"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "kimi_k2"
+harness = false
+required-features = ["test-util"]
+
+[[bench]]
+name = "gemma4"
+harness = false
+required-features = ["test-util"]
+
+[lints]
+workspace = true
diff --git a/rust/src/tool-parser/benches/deepseek_v3.rs b/rust/src/tool-parser/benches/deepseek_v3.rs
new file mode 100644
index 000000000000..75d2e417acee
--- /dev/null
+++ b/rust/src/tool-parser/benches/deepseek_v3.rs
@@ -0,0 +1,130 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::DeepSeekParser as ExternalDeepSeekParser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{DeepSeekV3ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<｜tool▁calls▁begin｜>",
+        "<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather\n",
+        "```json\n",
+        "{\"location\":\"Hangzhou\",\"days\":3}",
+        "\n```<｜tool▁call▁end｜>",
+        "<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather\n",
+        "```json\n",
+        "{\"location\":\"San Francisco\",\"days\":2}",
+        "\n```<｜tool▁call▁end｜>",
+        "<｜tool▁calls▁end｜>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no DeepSeek V3 tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    DeepSeekV3ToolParser::create(tools).expect("DeepSeek V3 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalDeepSeekParser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalDeepSeekParser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_deepseek_v3(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "deepseek_v3/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "deepseek_v3/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_deepseek_v3);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/deepseek_v31.rs b/rust/src/tool-parser/benches/deepseek_v31.rs
new file mode 100644
index 000000000000..bb6d029baff9
--- /dev/null
+++ b/rust/src/tool-parser/benches/deepseek_v31.rs
@@ -0,0 +1,128 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::DeepSeek31Parser as ExternalDeepSeek31Parser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{DeepSeekV31ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<｜tool▁calls▁begin｜>",
+        "<｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>",
+        "{\"location\":\"Hangzhou\",\"days\":3}",
+        "<｜tool▁call▁end｜>",
+        "<｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>",
+        "{\"location\":\"San Francisco\",\"days\":2}",
+        "<｜tool▁call▁end｜>",
+        "<｜tool▁calls▁end｜>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no DeepSeek V3.1 tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    DeepSeekV31ToolParser::create(tools).expect("DeepSeek V3.1 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalDeepSeek31Parser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalDeepSeek31Parser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_deepseek_v31(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "deepseek_v31/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "deepseek_v31/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_deepseek_v31);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/deepseek_v32.rs b/rust/src/tool-parser/benches/deepseek_v32.rs
new file mode 100644
index 000000000000..c7a8346120d7
--- /dev/null
+++ b/rust/src/tool-parser/benches/deepseek_v32.rs
@@ -0,0 +1,113 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{DeepSeekV32ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::feed_parser;
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<｜DSML｜function_calls>\n",
+        "<｜DSML｜invoke name=\"get_weather\">\n",
+        "<｜DSML｜parameter name=\"location\" string=\"true\">Hangzhou</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"date\" string=\"true\">2026-04-28</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"unit\" string=\"true\">celsius</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"days\" string=\"false\">3</｜DSML｜parameter>\n",
+        "</｜DSML｜invoke>\n",
+        "<｜DSML｜invoke name=\"get_weather\">\n",
+        "<｜DSML｜parameter name=\"location\" string=\"true\">San Francisco</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"date\" string=\"true\">2026-04-28</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"unit\" string=\"true\">fahrenheit</｜DSML｜parameter>\n",
+        "<｜DSML｜parameter name=\"days\" string=\"false\">2</｜DSML｜parameter>\n",
+        "</｜DSML｜invoke>\n",
+        "</｜DSML｜function_calls>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no DSML tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    DeepSeekV32ToolParser::create(tools).expect("DeepSeek V3.2 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("reuse_parser", |b| {
+        let mut parser = parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("create_parser", |b| {
+        b.iter_batched(
+            || parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_deepseek_v32(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "deepseek_v32/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "deepseek_v32/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_deepseek_v32);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/gemma4.rs b/rust/src/tool-parser/benches/gemma4.rs
new file mode 100644
index 000000000000..f638c14733a3
--- /dev/null
+++ b/rust/src/tool-parser/benches/gemma4.rs
@@ -0,0 +1,125 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{Gemma4ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::feed_parser;
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will inspect the data before answering.\n",
+        "<|tool_call>",
+        "call:convert{",
+        "whole:114.514,",
+        "flag:true,",
+        "empty:<|\"|><|\"|>,",
+        "payload:{",
+        "name:<|\"|>demo<|\"|>,",
+        "count:42,",
+        "enabled:false,",
+        "missing:null,",
+        "nested:{level:2,label:<|\"|>deep<|\"|>},",
+        "tags:[<|\"|>red<|\"|>,<|\"|>blue<|\"|>,3,true,null,{kind:<|\"|>leaf<|\"|>}]",
+        "},",
+        "items:[",
+        "<|\"|>alpha<|\"|>,",
+        "{key:<|\"|>value<|\"|>,score:0.75},",
+        "[1,2,3]",
+        "]",
+        "}",
+        "<tool_call|>",
+        "<|tool_call>",
+        "call:update_record{",
+        "data:{id:7,active:true,notes:[<|\"|>keep<|\"|>,<|\"|>review<|\"|>]}",
+        "}",
+        "<tool_call|>",
+        " Finished.",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no Gemma4 tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    Gemma4ToolParser::create(tools).expect("Gemma4 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("reuse_parser", |b| {
+        let mut parser = parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("create_parser", |b| {
+        b.iter_batched(
+            || parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_gemma4(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "gemma4/mixed_complex_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will inspect the data before answering.\n Finished.",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "gemma4/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_gemma4);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/glm45_moe.rs b/rust/src/tool-parser/benches/glm45_moe.rs
new file mode 100644
index 000000000000..8486885eceb1
--- /dev/null
+++ b/rust/src/tool-parser/benches/glm45_moe.rs
@@ -0,0 +1,210 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::Glm4MoeParser as ExternalGlm4MoeParser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{Glm45MoeToolParser, Glm47MoeToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const GLM45_PARSER_NAME: &str = "glm45";
+const GLM47_PARSER_NAME: &str = "glm47";
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn glm45_mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<tool_call>get_weather\n",
+        "<arg_key>city</arg_key>\n",
+        "<arg_value>Hangzhou</arg_value>\n",
+        "<arg_key>date</arg_key>\n",
+        "<arg_value>2026-05-07</arg_value>\n",
+        "<arg_key>unit</arg_key>\n",
+        "<arg_value>celsius</arg_value>\n",
+        "<arg_key>days</arg_key>\n",
+        "<arg_value>3</arg_value>\n",
+        "</tool_call>\n",
+        "<tool_call>get_weather\n",
+        "<arg_key>city</arg_key>\n",
+        "<arg_value>San Francisco</arg_value>\n",
+        "<arg_key>date</arg_key>\n",
+        "<arg_value>2026-05-07</arg_value>\n",
+        "<arg_key>unit</arg_key>\n",
+        "<arg_value>fahrenheit</arg_value>\n",
+        "<arg_key>days</arg_key>\n",
+        "<arg_value>2</arg_value>\n",
+        "</tool_call>",
+    )
+    .to_string()
+}
+
+fn glm47_mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<tool_call>get_weather",
+        "<arg_key>city</arg_key>",
+        "<arg_value>Hangzhou</arg_value>",
+        "<arg_key>date</arg_key>",
+        "<arg_value>2026-05-07</arg_value>",
+        "<arg_key>unit</arg_key>",
+        "<arg_value>celsius</arg_value>",
+        "<arg_key>days</arg_key>",
+        "<arg_value>3</arg_value>",
+        "</tool_call>",
+        "<tool_call>get_weather",
+        "<arg_key>city</arg_key>",
+        "<arg_value>San Francisco</arg_value>",
+        "<arg_key>date</arg_key>",
+        "<arg_value>2026-05-07</arg_value>",
+        "<arg_key>unit</arg_key>",
+        "<arg_value>fahrenheit</arg_value>",
+        "<arg_key>days</arg_key>",
+        "<arg_value>2</arg_value>",
+        "</tool_call>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no GLM MoE tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(name: &str, tools: &[Tool]) -> Box<dyn ToolParser> {
+    match name {
+        GLM45_PARSER_NAME => Glm45MoeToolParser::create(tools),
+        GLM47_PARSER_NAME => Glm47MoeToolParser::create(tools),
+        _ => unreachable!("unexpected GLM parser name"),
+    }
+    .expect("GLM MoE parser should initialize")
+}
+
+fn external_parser(name: &str) -> ExternalGlm4MoeParser {
+    match name {
+        GLM45_PARSER_NAME => ExternalGlm4MoeParser::glm45(),
+        GLM47_PARSER_NAME => ExternalGlm4MoeParser::glm47(),
+        _ => unreachable!("unexpected GLM parser name"),
+    }
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    parser_name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(parser_name, tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(parser_name, tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = external_parser(parser_name);
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            || external_parser(parser_name),
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_glm45_moe(c: &mut Criterion) {
+    let tools = test_tools();
+    let glm45_mixed_text = glm45_mixed_fixture();
+    let glm47_mixed_text = glm47_mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "glm45/mixed_text_tool_call",
+        GLM45_PARSER_NAME,
+        &tools,
+        &glm45_mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "glm47/mixed_text_tool_call",
+        GLM47_PARSER_NAME,
+        &tools,
+        &glm47_mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "glm45/long_normal_text",
+        GLM45_PARSER_NAME,
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+
+    run_stream_group(
+        c,
+        "glm47/long_normal_text",
+        GLM47_PARSER_NAME,
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_glm45_moe);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/kimi_k2.rs b/rust/src/tool-parser/benches/kimi_k2.rs
new file mode 100644
index 000000000000..5a80f6606735
--- /dev/null
+++ b/rust/src/tool-parser/benches/kimi_k2.rs
@@ -0,0 +1,149 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::KimiK2Parser as ExternalKimiK2Parser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{KimiK2ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<|tool_calls_section_begin|>",
+        "<|tool_call_begin|>functions.get_weather:0",
+        "<|tool_call_argument_begin|>{\"location\":\"Hangzhou\",\"days\":3}",
+        "<|tool_call_end|>",
+        "<|tool_call_begin|>functions.get_weather:1",
+        "<|tool_call_argument_begin|>{\"location\":\"San Francisco\",\"days\":2}",
+        "<|tool_call_end|>",
+        "<|tool_calls_section_end|>",
+    )
+    .to_string()
+}
+
+fn mixed_chunks() -> Vec<&'static str> {
+    vec![
+        "I will check two cities before answering.\n",
+        "<|tool_calls_section_begin|>",
+        "<|tool_call_begin|>functions.get_weather:0",
+        "<|tool_call_argument_begin|>",
+        "{\"location\":",
+        "\"Hangzhou\",",
+        "\"days\":3}",
+        "<|tool_call_end|>",
+        "<|tool_call_begin|>functions.get_weather:1",
+        "<|tool_call_argument_begin|>",
+        "{\"location\":",
+        "\"San Francisco\",",
+        "\"days\":2}",
+        "<|tool_call_end|>",
+        "<|tool_calls_section_end|>",
+    ]
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no Kimi K2 tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    KimiK2ToolParser::create(tools).expect("Kimi K2 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunks: &[&str],
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalKimiK2Parser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(chunks));
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalKimiK2Parser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(chunks));
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_kimi_k2(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let mixed_chunks = mixed_chunks();
+    let long_normal_text = long_normal_text_fixture();
+    let long_normal_chunks = split_by_chars(&long_normal_text, CHUNK_CHARS);
+
+    run_stream_group(
+        c,
+        "kimi_k2/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        &mixed_chunks,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "kimi_k2/long_normal_text",
+        &tools,
+        &long_normal_text,
+        &long_normal_chunks,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_kimi_k2);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/llama3_json.rs b/rust/src/tool-parser/benches/llama3_json.rs
new file mode 100644
index 000000000000..03b5b54ee78a
--- /dev/null
+++ b/rust/src/tool-parser/benches/llama3_json.rs
@@ -0,0 +1,128 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::LlamaParser as ExternalLlamaParser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{Llama3JsonToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn tool_call(function_name: &str, parameters: &str) -> String {
+    format!(r#"{{"name":"{function_name}","parameters":{parameters}}}"#)
+}
+
+fn mixed_fixture() -> String {
+    format!(
+        "{}; {}",
+        tool_call("get_weather", r#"{"location":"Hangzhou","days":3}"#),
+        tool_call(
+            "convert",
+            r#"{"whole":42.5,"flag":true,"payload":{"nested":["x",null]},"items":[1,2,3],"empty":""}"#
+        ),
+    )
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no Llama JSON tool call at the root.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    Llama3JsonToolParser::create(tools).expect("Llama JSON parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalLlamaParser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalLlamaParser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_llama3_json(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "llama3_json/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "llama3_json/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_llama3_json);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/minimax_m2.rs b/rust/src/tool-parser/benches/minimax_m2.rs
new file mode 100644
index 000000000000..4ad20400934b
--- /dev/null
+++ b/rust/src/tool-parser/benches/minimax_m2.rs
@@ -0,0 +1,136 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::MinimaxM2Parser as ExternalMinimaxM2Parser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{MinimaxM2ToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<minimax:tool_call>",
+        "<invoke name=\"get_weather\">",
+        "<parameter name=\"city\">Hangzhou</parameter>",
+        "<parameter name=\"date\">2026-04-30</parameter>",
+        "<parameter name=\"unit\">celsius</parameter>",
+        "<parameter name=\"days\">3</parameter>",
+        "</invoke>",
+        "<invoke name=\"get_weather\">",
+        "<parameter name=\"city\">San Francisco</parameter>",
+        "<parameter name=\"date\">2026-04-30</parameter>",
+        "<parameter name=\"unit\">fahrenheit</parameter>",
+        "<parameter name=\"days\">2</parameter>",
+        "</invoke>",
+        "</minimax:tool_call>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no MiniMax M2 tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    MinimaxM2ToolParser::create(tools).expect("MiniMax M2 parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalMinimaxM2Parser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalMinimaxM2Parser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_minimax_m2(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "minimax_m2/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "minimax_m2/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_minimax_m2);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/qwen3_coder.rs b/rust/src/tool-parser/benches/qwen3_coder.rs
new file mode 100644
index 000000000000..850badaac527
--- /dev/null
+++ b/rust/src/tool-parser/benches/qwen3_coder.rs
@@ -0,0 +1,138 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::QwenCoderParser as ExternalQwenCoderParser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{Qwen3CoderToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn mixed_fixture() -> String {
+    concat!(
+        "I will check two cities before answering.\n",
+        "<tool_call>\n",
+        "<function=get_weather>\n",
+        "<parameter=location>Hangzhou</parameter>\n",
+        "<parameter=date>2026-04-29</parameter>\n",
+        "<parameter=unit>celsius</parameter>\n",
+        "<parameter=days>3</parameter>\n",
+        "</function>\n",
+        "</tool_call>\n",
+        "<tool_call>\n",
+        "<function=get_weather>\n",
+        "<parameter=location>San Francisco</parameter>\n",
+        "<parameter=date>2026-04-29</parameter>\n",
+        "<parameter=unit>fahrenheit</parameter>\n",
+        "<parameter=days>2</parameter>\n",
+        "</function>\n",
+        "</tool_call>",
+    )
+    .to_string()
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no Qwen Coder tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    Qwen3CoderToolParser::create(tools).expect("Qwen Coder parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalQwenCoderParser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalQwenCoderParser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_qwen3_coder(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "qwen3_coder/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "qwen3_coder/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_qwen3_coder);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/qwen3_xml.rs b/rust/src/tool-parser/benches/qwen3_xml.rs
new file mode 100644
index 000000000000..f2e37551dda7
--- /dev/null
+++ b/rust/src/tool-parser/benches/qwen3_xml.rs
@@ -0,0 +1,125 @@
+use std::time::Duration;
+
+use criterion::{BatchSize, Criterion, Throughput, black_box, criterion_group, criterion_main};
+use tool_parser::parsers::QwenParser as ExternalQwenParser;
+use vllm_tool_parser::test_utils::{split_by_chars, test_tools};
+use vllm_tool_parser::{Qwen3XmlToolParser, Tool, ToolParser};
+
+mod utils;
+use utils::{feed_external_parser, feed_parser, openai_tools};
+
+const CHUNK_CHARS: usize = 7;
+const LONG_NORMAL_TEXT_REPEATS: usize = 2048;
+
+fn tool_call(function_name: &str, arguments: &str) -> String {
+    format!("<tool_call>\n{{\"name\":\"{function_name}\",\"arguments\":{arguments}}}\n</tool_call>")
+}
+
+fn mixed_fixture() -> String {
+    format!(
+        "I will check two cities before answering.\n{}{}",
+        tool_call("get_weather", r#"{"location":"Hangzhou","days":3}"#),
+        tool_call("get_weather", r#"{"location":"San Francisco","days":2}"#),
+    )
+}
+
+fn long_normal_text_fixture() -> String {
+    let line = "This is ordinary assistant text with no Qwen XML tool markers at all.\n";
+    line.repeat(LONG_NORMAL_TEXT_REPEATS)
+}
+
+fn native_parser(tools: &[Tool]) -> Box<dyn ToolParser> {
+    Qwen3XmlToolParser::create(tools).expect("Qwen XML parser should initialize")
+}
+
+fn run_stream_group(
+    c: &mut Criterion,
+    name: &str,
+    tools: &[Tool],
+    text: &str,
+    chunk_chars: usize,
+    expected_normal_text: &str,
+    expected_native_calls_len: usize,
+) {
+    let chunks = split_by_chars(text, chunk_chars);
+    let openai_tools = openai_tools(tools);
+
+    let mut group = c.benchmark_group(name);
+    group.sample_size(50);
+    group.warm_up_time(Duration::from_millis(300));
+    group.measurement_time(Duration::from_secs(2));
+    group.throughput(Throughput::Bytes(text.len() as u64));
+
+    group.bench_function("native_reuse_parser", |b| {
+        let mut parser = native_parser(tools);
+        b.iter(|| {
+            let result = feed_parser(&mut *parser, black_box(&chunks));
+            debug_assert_eq!(result.0, expected_normal_text);
+            debug_assert_eq!(result.1, expected_native_calls_len);
+            black_box(result);
+        })
+    });
+
+    group.bench_function("native_create_parser", |b| {
+        b.iter_batched(
+            || native_parser(tools),
+            |mut parser| {
+                let result = feed_parser(&mut *parser, black_box(&chunks));
+                debug_assert_eq!(result.0, expected_normal_text);
+                debug_assert_eq!(result.1, expected_native_calls_len);
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("external_reuse_parser", |b| {
+        let mut parser = ExternalQwenParser::new();
+        b.iter(|| {
+            let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+            black_box(result);
+        })
+    });
+
+    group.bench_function("external_create_parser", |b| {
+        b.iter_batched(
+            ExternalQwenParser::new,
+            |mut parser| {
+                let result = feed_external_parser(&mut parser, &openai_tools, black_box(&chunks));
+                black_box(result);
+            },
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
+fn bench_qwen3_xml(c: &mut Criterion) {
+    let tools = test_tools();
+    let mixed_text = mixed_fixture();
+    let long_normal_text = long_normal_text_fixture();
+
+    run_stream_group(
+        c,
+        "qwen3_xml/mixed_text_tool_call",
+        &tools,
+        &mixed_text,
+        CHUNK_CHARS,
+        "I will check two cities before answering.\n",
+        2,
+    );
+
+    run_stream_group(
+        c,
+        "qwen3_xml/long_normal_text",
+        &tools,
+        &long_normal_text,
+        CHUNK_CHARS,
+        &long_normal_text,
+        0,
+    );
+}
+
+criterion_group!(benches, bench_qwen3_xml);
+criterion_main!(benches);
diff --git a/rust/src/tool-parser/benches/utils/mod.rs b/rust/src/tool-parser/benches/utils/mod.rs
new file mode 100644
index 000000000000..a0ad768f1154
--- /dev/null
+++ b/rust/src/tool-parser/benches/utils/mod.rs
@@ -0,0 +1,49 @@
+#![allow(dead_code)]
+
+use futures::FutureExt as _;
+use openai_protocol::common::{Function as OpenAiFunction, Tool as OpenAiTool};
+use tool_parser::traits::ToolParser as ExternalToolParser;
+use vllm_tool_parser::test_utils::collect_stream;
+use vllm_tool_parser::{Tool, ToolParser};
+
+pub(super) fn openai_tools(tools: &[Tool]) -> Vec<OpenAiTool> {
+    tools
+        .iter()
+        .map(|tool| OpenAiTool {
+            tool_type: "function".to_string(),
+            function: OpenAiFunction {
+                name: tool.name.clone(),
+                description: tool.description.clone(),
+                parameters: tool.parameters.clone(),
+                strict: tool.strict,
+            },
+        })
+        .collect()
+}
+
+pub(super) fn feed_parser(parser: &mut dyn ToolParser, chunks: &[&str]) -> (String, usize) {
+    let result = collect_stream(parser, chunks);
+    (result.normal_text, result.calls.len())
+}
+
+pub(super) fn feed_external_parser(
+    parser: &mut impl ExternalToolParser,
+    tools: &[OpenAiTool],
+    chunks: &[&str],
+) -> (String, usize) {
+    ExternalToolParser::reset(parser);
+
+    let mut normal_text = String::new();
+    let mut calls_len = 0;
+    for chunk in chunks {
+        let delta = parser
+            .parse_incremental(chunk, tools)
+            .now_or_never()
+            .expect("external parser should not suspend")
+            .expect("chunk should parse");
+        normal_text.push_str(&delta.normal_text);
+        calls_len += delta.calls.len();
+    }
+    calls_len += parser.get_unstreamed_tool_args().unwrap_or_default().len();
+    (normal_text, calls_len)
+}
diff --git a/rust/src/tool-parser/src/deepseek_dsml/deepseek_v32.rs b/rust/src/tool-parser/src/deepseek_dsml/deepseek_v32.rs
new file mode 100644
index 000000000000..ddc630ceab4a
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_dsml/deepseek_v32.rs
@@ -0,0 +1,439 @@
+use super::{DeepSeekDsmlToolParser, DsmlTokens};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for DeepSeek V3.2 models.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <｜DSML｜function_calls>
+/// <｜DSML｜invoke name="get_weather">
+/// <｜DSML｜parameter name="location" string="true">杭州</｜DSML｜parameter>
+/// <｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+/// </｜DSML｜invoke>
+/// <｜DSML｜invoke name="get_weather">
+/// <｜DSML｜parameter name="location" string="true">北京</｜DSML｜parameter>
+/// <｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+/// </｜DSML｜invoke>
+/// </｜DSML｜function_calls>
+/// ```
+///
+/// Arguments are emitted only after a full `invoke` block is parsed.
+///
+/// DeepSeek V3.2 relies on DSML markers such as `｜DSML｜`, which are
+/// represented as special tokens in the tokenizer and therefore must be
+/// preserved during decode for parsing to work.
+pub struct DeepSeekV32ToolParser(DeepSeekDsmlToolParser);
+
+impl DeepSeekV32ToolParser {
+    /// Create a DeepSeek V3.2 tool parser.
+    pub(super) fn new(tools: &[Tool]) -> Self {
+        Self(DeepSeekDsmlToolParser::new(tools, DsmlTokens::V32))
+    }
+}
+
+impl ToolParser for DeepSeekV32ToolParser {
+    /// Create a boxed DeepSeek V3.2 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Preserve DSML special tokens while decoding.
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    /// Push one decoded text chunk through the DSML parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{Value, json};
+    use thiserror_ext::AsReport;
+
+    use super::DeepSeekV32ToolParser;
+    use crate::ToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn build_tool_call(function_name: &str, params: &[(&str, &str)]) -> String {
+        let params = params
+            .iter()
+            .map(|(name, value)| {
+                format!(
+                    r#"<｜DSML｜parameter name="{name}" string="true">{value}</｜DSML｜parameter>"#
+                )
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+        format!(
+            "<｜DSML｜function_calls>\n<｜DSML｜invoke name=\"{function_name}\">\n{params}\n</｜DSML｜invoke>\n</｜DSML｜function_calls>"
+        )
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_extracts_single_tool_call() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "get_weather",
+                &[("location", "SF"), ("date", "2024-01-16")],
+            ))
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "location": "SF",
+                "date": "2024-01-16"
+            })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_preserves_prefix_text() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let output = format!(
+            "Thinking... {}",
+            build_tool_call("get_weather", &[("location", "NYC")])
+        );
+        let result = parser.parse_complete(&output).unwrap();
+
+        assert_eq!(result.normal_text, "Thinking... ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_converts_schema_types() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                "<｜DSML｜function_calls>\n\
+                 <｜DSML｜invoke name=\"convert\">\n\
+                 <｜DSML｜parameter name=\"whole\" string=\"false\">5.0</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"flag\" string=\"false\">true</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"payload\" string=\"false\">{\"nested\":true}</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"items\" string=\"false\">[1,2]</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"empty\" string=\"false\">null</｜DSML｜parameter>\n\
+                 </｜DSML｜invoke>\n\
+                 </｜DSML｜function_calls>",
+            )
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "whole": 5.0,
+                "flag": true,
+                "payload": { "nested": true },
+                "items": [1, 2],
+                "empty": null,
+            })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_string_attr_overrides_schema_types() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                "<｜DSML｜function_calls>\n\
+                 <｜DSML｜invoke name=\"convert\">\n\
+                 <｜DSML｜parameter name=\"whole\" string=\"true\">5.0</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"flag\" string=\"true\">true</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"payload\" string=\"true\">{\"nested\":true}</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"items\" string=\"true\">[1,2]</｜DSML｜parameter>\n\
+                 <｜DSML｜parameter name=\"empty\" string=\"true\">null</｜DSML｜parameter>\n\
+                 </｜DSML｜invoke>\n\
+                 </｜DSML｜function_calls>",
+            )
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "whole": "5.0",
+                "flag": "true",
+                "payload": "{\"nested\":true}",
+                "items": "[1,2]",
+                "empty": "null",
+            })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_parse_complete_unescapes_literal_closing_tags_in_parameter_value() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "get_weather",
+                &[
+                    (
+                        "location",
+                        "Hangzhou &lt;/｜DSML｜parameter&gt;&lt;/｜DSML｜invoke&gt;&lt;/｜DSML｜function_calls&gt;",
+                    ),
+                    ("date", "2026-05-08"),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "location": "Hangzhou </｜DSML｜parameter></｜DSML｜invoke></｜DSML｜function_calls>",
+                "date": "2026-05-08",
+            })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_extracts_single_tool_call() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<｜DSML｜function_calls>\n",
+                "<｜DSML｜invoke name=\"get_weather\">\n",
+                "<｜DSML｜parameter name=\"location\" string=\"true\">SF</｜DSML｜parameter>\n",
+                "</｜DSML｜invoke>\n",
+                "</｜DSML｜function_calls>",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_preserves_prefix_text() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "Thinking... ",
+                "<｜DSML｜function_calls>\n",
+                "<｜DSML｜invoke name=\"get_weather\">\n",
+                "<｜DSML｜parameter name=\"location\" string=\"true\">SF</｜DSML｜parameter>\n",
+                "</｜DSML｜invoke>\n",
+                "</｜DSML｜function_calls>",
+            ],
+        );
+
+        assert_eq!(result.normal_text, "Thinking... ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_without_tool_call_emits_text_incrementally() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &["Hello, ", "world!"]);
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_extracts_multiple_tool_calls_in_order() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[&format!(
+                "{}\n{}",
+                build_tool_call("get_weather", &[("location", "SF")])
+                    .trim_end_matches("</｜DSML｜function_calls>"),
+                "<｜DSML｜invoke name=\"get_weather\">\n<｜DSML｜parameter name=\"location\" string=\"true\">NYC</｜DSML｜parameter>\n</｜DSML｜invoke>\n</｜DSML｜function_calls>"
+            )],
+        );
+
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[1].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[1].tool_index, 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({ "location": "NYC" })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_handles_start_token_split_across_chunks() {
+        let text = build_tool_call("get_weather", &[("location", "SF")]);
+        let chunks = split_by_chars(&text, 5);
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_handles_bpe_chunked_dsml_opener() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<｜DSML｜",
+                "function",
+                "_c",
+                "all",
+                "s",
+                ">\n",
+                "<｜DSML｜",
+                "invoke",
+                " name=\"",
+                "get_weather",
+                "\">\n",
+                "<｜DSML｜",
+                "parameter",
+                " name=\"location\" string=\"true\">",
+                "Beijing",
+                "</｜DSML｜",
+                "parameter>\n",
+                "</｜DSML｜",
+                "invoke>\n",
+                "</｜DSML｜",
+                "function_calls>",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "Beijing" })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_truncated_parameter_does_not_leak_eos() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        parser.push("<｜DSML｜function_calls>\n").unwrap();
+        parser.push("<｜DSML｜invoke name=\"get_weather\">\n").unwrap();
+        parser
+            .push("<｜DSML｜parameter name=\"location\" string=\"true\">Tokyo")
+            .unwrap();
+        parser.push("<｜end▁of▁sentence｜>").unwrap();
+
+        let error = parser.finish().unwrap_err();
+        assert!(error.to_report_string().contains("incomplete DeepSeek DSML tool call"));
+    }
+    #[test]
+    fn deepseek_v32_streaming_drops_eos_after_complete_tool_calls() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<｜DSML｜function_calls>\n",
+                "<｜DSML｜invoke name=\"get_weather\">\n",
+                "<｜DSML｜parameter name=\"location\" string=\"true\">SF</｜DSML｜parameter>\n",
+                "</｜DSML｜invoke>\n",
+                "</｜DSML｜function_calls><｜end▁of▁sentence｜>",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_ignores_text_after_complete_tool_calls() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<｜DSML｜function_calls>\n",
+                "<｜DSML｜invoke name=\"get_weather\">\n",
+                "<｜DSML｜parameter name=\"location\" string=\"true\">SF</｜DSML｜parameter>\n",
+                "</｜DSML｜invoke>\n",
+                "</｜DSML｜function_calls>",
+                "trailing text",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_does_not_emit_incomplete_invoke() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        parser.push("<｜DSML｜function_calls>\n").unwrap();
+        parser.push("<｜DSML｜invoke name=\"get_weather\">\n").unwrap();
+        parser
+            .push("<｜DSML｜parameter name=\"location\" string=\"true\">SF</｜DSML｜parameter>\n")
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+        assert!(error.to_report_string().contains("incomplete DeepSeek DSML tool call"));
+    }
+    #[test]
+    fn deepseek_v32_parser_state_resets_after_finish() {
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let first = parser
+            .parse_complete(&build_tool_call("get_weather", &[("location", "SF")]))
+            .unwrap();
+        let second = parser
+            .parse_complete(&build_tool_call("get_weather", &[("location", "NYC")]))
+            .unwrap();
+
+        assert_eq!(first.calls.len(), 1);
+        assert_eq!(second.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&second.calls[0].arguments).unwrap(),
+            json!({ "location": "NYC" })
+        );
+    }
+
+    #[test]
+    fn deepseek_v32_streaming_matches_parse_complete() {
+        let full_text = build_tool_call("add", &[("x", "3"), ("y", "4")]);
+        let chunks = split_by_chars(&full_text, 7);
+        let mut streaming_parser = DeepSeekV32ToolParser::new(&test_tools());
+        let streamed = collect_stream(&mut streaming_parser, &chunks);
+
+        let mut parser = DeepSeekV32ToolParser::new(&test_tools());
+        let complete = parser.parse_complete(&full_text).unwrap();
+
+        assert_eq!(streamed.normal_text, complete.normal_text);
+        assert_eq!(streamed.calls, complete.calls);
+    }
+}
diff --git a/rust/src/tool-parser/src/deepseek_dsml/deepseek_v4.rs b/rust/src/tool-parser/src/deepseek_dsml/deepseek_v4.rs
new file mode 100644
index 000000000000..20fe23b95f86
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_dsml/deepseek_v4.rs
@@ -0,0 +1,133 @@
+use super::{DeepSeekDsmlToolParser, DsmlTokens};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for DeepSeek V4 models.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <｜DSML｜tool_calls>
+/// <｜DSML｜invoke name="get_weather">
+/// <｜DSML｜parameter name="location" string="true">杭州</｜DSML｜parameter>
+/// <｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+/// </｜DSML｜invoke>
+/// <｜DSML｜invoke name="get_weather">
+/// <｜DSML｜parameter name="location" string="true">北京</｜DSML｜parameter>
+/// <｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+/// </｜DSML｜invoke>
+/// </｜DSML｜tool_calls>
+/// ```
+///
+/// Arguments are emitted only after a full `invoke` block is parsed.
+///
+/// V4 reuses the V3.2 DSML invoke/parameter grammar but wraps calls in
+/// `<｜DSML｜tool_calls>` instead of `<｜DSML｜function_calls>`.
+///
+/// DeepSeek V4 relies on DSML markers such as `｜DSML｜`, which are
+/// represented as special tokens in the tokenizer and therefore must be
+/// preserved during decode for parsing to work.
+pub struct DeepSeekV4ToolParser(DeepSeekDsmlToolParser);
+
+impl DeepSeekV4ToolParser {
+    /// Create a DeepSeek V4 tool parser.
+    fn new(tools: &[Tool]) -> Self {
+        Self(DeepSeekDsmlToolParser::new(tools, DsmlTokens::V4))
+    }
+}
+
+impl ToolParser for DeepSeekV4ToolParser {
+    /// Create a boxed DeepSeek V4 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Preserve DSML special tokens while decoding.
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    /// Push one decoded text chunk through the DSML parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{Value, json};
+
+    use super::{DeepSeekV4ToolParser, ToolParser};
+    use crate::test_utils::{collect_stream, test_tools};
+
+    fn build_tool_call(function_name: &str, params: &[(&str, &str)]) -> String {
+        let params = params
+            .iter()
+            .map(|(name, value)| {
+                format!(
+                    r#"<｜DSML｜parameter name="{name}" string="true">{value}</｜DSML｜parameter>"#
+                )
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+        format!(
+            "<｜DSML｜tool_calls>\n<｜DSML｜invoke name=\"{function_name}\">\n{params}\n</｜DSML｜invoke>\n</｜DSML｜tool_calls>"
+        )
+    }
+
+    #[test]
+    fn deepseek_v4_parse_complete_reuses_dsml_parser_with_tool_calls_token() {
+        let mut parser = DeepSeekV4ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "get_weather",
+                &[("location", "SF"), ("date", "2024-01-16")],
+            ))
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "location": "SF",
+                "date": "2024-01-16"
+            })
+        );
+    }
+
+    #[test]
+    fn deepseek_v4_streaming_handles_tool_calls_token_split_across_chunks() {
+        let mut parser = DeepSeekV4ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "Thinking... ",
+                "<｜DSML｜",
+                "tool",
+                "_calls>\n",
+                "<｜DSML｜invoke name=\"get_weather\">\n",
+                "<｜DSML｜parameter name=\"location\" string=\"true\">Beijing</｜DSML｜parameter>\n",
+                "</｜DSML｜invoke>\n",
+                "</｜DSML｜",
+                "tool_calls>",
+            ],
+        );
+
+        assert_eq!(result.normal_text, "Thinking... ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "Beijing" })
+        );
+    }
+}
diff --git a/rust/src/tool-parser/src/deepseek_dsml/mod.rs b/rust/src/tool-parser/src/deepseek_dsml/mod.rs
new file mode 100644
index 000000000000..dda0b2368ce3
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_dsml/mod.rs
@@ -0,0 +1,278 @@
+use winnow::ascii::{multispace0 as ws0, multispace1 as ws1};
+use winnow::combinator::{alt, delimited, eof, repeat, seq, terminated};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, rest, take_until};
+
+use super::parameters::ToolSchemas;
+use super::utils::{parse_buffered_event, safe_text_len, xml_unescape};
+use super::{Result, ToolCallDelta, ToolParseResult};
+use crate::Tool;
+
+mod deepseek_v32;
+mod deepseek_v4;
+
+pub use deepseek_v4::DeepSeekV4ToolParser;
+pub use deepseek_v32::DeepSeekV32ToolParser;
+
+const INVOKE_START: &str = "<｜DSML｜invoke";
+const INVOKE_END: &str = "</｜DSML｜invoke>";
+const PARAMETER_START: &str = "<｜DSML｜parameter";
+const PARAMETER_END: &str = "</｜DSML｜parameter>";
+
+type DsmlInput<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy)]
+struct DsmlTokens {
+    tool_calls_start: &'static str,
+    tool_calls_end: &'static str,
+}
+
+impl DsmlTokens {
+    const V32: Self = Self {
+        tool_calls_start: "<｜DSML｜function_calls>",
+        tool_calls_end: "</｜DSML｜function_calls>",
+    };
+    const V4: Self = Self {
+        tool_calls_start: "<｜DSML｜tool_calls>",
+        tool_calls_end: "</｜DSML｜tool_calls>",
+    };
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum DsmlMode {
+    Text,
+    ToolBlock,
+    Done,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum DsmlEvent {
+    Text {
+        len: usize,
+    },
+    ToolCallsStart,
+    Invoke {
+        name: String,
+        raw_params: Vec<DsmlParameter>,
+    },
+    ToolCallsEnd,
+    IgnoredRest,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct DsmlParameter {
+    name: String,
+    value: String,
+    is_string: bool,
+}
+
+/// Tool parser core for DeepSeek DSML tool calls.
+struct DeepSeekDsmlToolParser {
+    buffer: String,
+    mode: DsmlMode,
+    emitted_invoke_count: usize,
+    tool_parameters: ToolSchemas,
+    tokens: DsmlTokens,
+}
+
+impl DeepSeekDsmlToolParser {
+    /// Create a parser with DSML tokens for one DeepSeek format.
+    fn new(tools: &[Tool], tokens: DsmlTokens) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: DsmlMode::Text,
+            emitted_invoke_count: 0,
+            tool_parameters: ToolSchemas::from_tools(tools),
+            tokens,
+        }
+    }
+
+    /// Apply one parsed DSML event to parser state and output.
+    fn apply_event(&mut self, event: DsmlEvent, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            DsmlEvent::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            DsmlEvent::ToolCallsStart => self.mode = DsmlMode::ToolBlock,
+            DsmlEvent::Invoke { name, raw_params } => {
+                let mut arguments = serde_json::Map::with_capacity(raw_params.len());
+                for param in raw_params {
+                    let value = if param.is_string {
+                        serde_json::Value::String(param.value)
+                    } else {
+                        self.tool_parameters.convert_param_with_schema(
+                            &name,
+                            &param.name,
+                            &param.value,
+                        )
+                    };
+                    arguments.insert(param.name, value);
+                }
+                let arguments = serde_json::to_string(&arguments)
+                    .map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
+
+                result.calls.push(ToolCallDelta {
+                    tool_index: self.emitted_invoke_count,
+                    name: Some(name),
+                    arguments,
+                });
+                self.emitted_invoke_count += 1;
+            }
+            DsmlEvent::ToolCallsEnd => self.mode = DsmlMode::Done,
+            DsmlEvent::IgnoredRest => {}
+        };
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = DsmlMode::Text;
+        self.emitted_invoke_count = 0;
+    }
+
+    /// Push one decoded text chunk through the DSML parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        // Extract tool calls from streaming model output.
+        //
+        // Uses a buffer-until-complete-invoke strategy: text is buffered until
+        // a complete invoke block is available, then parsed and emitted in one
+        // shot.
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_dsml_event(input, self.mode, self.tokens)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match self.mode {
+            DsmlMode::Text => result.normal_text.push_str(&self.buffer),
+            DsmlMode::Done => {}
+            DsmlMode::ToolBlock => {
+                self.reset();
+                return Err(parsing_failed!("incomplete DeepSeek DSML tool call"));
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a DSML event for the current parser mode.
+fn parse_next_dsml_event(
+    input: &mut DsmlInput<'_>,
+    mode: DsmlMode,
+    tokens: DsmlTokens,
+) -> ModalResult<DsmlEvent> {
+    match mode {
+        DsmlMode::Text => parse_text_event(input, tokens),
+        DsmlMode::ToolBlock => parse_tool_block_event(input, tokens),
+        DsmlMode::Done => ignored_rest_event(input),
+    }
+}
+
+/// Parse a text-mode DSML event.
+fn parse_text_event(input: &mut DsmlInput<'_>, tokens: DsmlTokens) -> ModalResult<DsmlEvent> {
+    alt((
+        |input: &mut DsmlInput<'_>| tool_calls_start_event(input, tokens),
+        |input: &mut DsmlInput<'_>| safe_text_event(input, tokens),
+    ))
+    .parse_next(input)
+}
+
+/// Parse a tool-block DSML event.
+fn parse_tool_block_event(input: &mut DsmlInput<'_>, tokens: DsmlTokens) -> ModalResult<DsmlEvent> {
+    ws0.void().parse_next(input)?;
+    alt((invoke_event, |input: &mut DsmlInput<'_>| {
+        tool_calls_end_event(input, tokens)
+    }))
+    .parse_next(input)
+}
+
+/// Parse a DSML function-calls start marker.
+fn tool_calls_start_event(input: &mut DsmlInput<'_>, tokens: DsmlTokens) -> ModalResult<DsmlEvent> {
+    literal(tokens.tool_calls_start)
+        .value(DsmlEvent::ToolCallsStart)
+        .parse_next(input)
+}
+
+/// Parse a DSML function-calls end marker.
+fn tool_calls_end_event(input: &mut DsmlInput<'_>, tokens: DsmlTokens) -> ModalResult<DsmlEvent> {
+    literal(tokens.tool_calls_end).value(DsmlEvent::ToolCallsEnd).parse_next(input)
+}
+
+/// Parse a trailing rest after DSML function calls.
+fn ignored_rest_event(input: &mut DsmlInput<'_>) -> ModalResult<DsmlEvent> {
+    rest.value(DsmlEvent::IgnoredRest).parse_next(input)
+}
+
+/// Parse a safe text run before the next DSML marker.
+fn safe_text_event(input: &mut DsmlInput<'_>, tokens: DsmlTokens) -> ModalResult<DsmlEvent> {
+    safe_text_len(input, tokens.tool_calls_start).map(|len| DsmlEvent::Text { len })
+}
+
+/// Parse a DSML invoke block.
+fn invoke_event(input: &mut DsmlInput<'_>) -> ModalResult<DsmlEvent> {
+    let (name, body) = seq!(
+        _: literal(INVOKE_START),
+        _: ws1,
+        dsml_name_attr,
+        _: ws0,
+        _: ">",
+        take_until(0.., INVOKE_END),
+        _: literal(INVOKE_END),
+    )
+    .parse_next(input)?;
+    let raw_params = parse_invoke_params(body)?;
+    Ok(DsmlEvent::Invoke {
+        name: name.to_string(),
+        raw_params,
+    })
+}
+
+/// Parse a DSML invoke body.
+fn parse_invoke_params(invoke_body: &str) -> ModalResult<Vec<DsmlParameter>> {
+    let mut input = invoke_body;
+    delimited(ws0, repeat(0.., terminated(parse_parameter, ws0)), eof).parse_next(&mut input)
+}
+
+/// Parse a DSML parameter block.
+fn parse_parameter(input: &mut &str) -> ModalResult<DsmlParameter> {
+    seq! {DsmlParameter {
+        _: literal(PARAMETER_START),
+        _: ws1,
+        name: name_attr.map(|name: &str| name.to_string()),
+        _: ws1,
+        is_string: string_attr.map(|value| value == "true"),
+        _: ws0,
+        _: ">",
+        value: take_until(0.., PARAMETER_END).map(xml_unescape).map(|value| value.into_owned()),
+        _: literal(PARAMETER_END),
+    }}
+    .parse_next(input)
+}
+
+/// Parse a name attribute.
+fn name_attr<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    delimited("name=\"", take_until(1.., "\""), "\"").parse_next(input)
+}
+
+/// Parse a string attribute.
+fn string_attr<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    delimited("string=\"", alt(("true", "false")), "\"").parse_next(input)
+}
+
+/// Parse a DSML name attribute.
+fn dsml_name_attr<'i>(input: &mut DsmlInput<'i>) -> ModalResult<&'i str> {
+    delimited("name=\"", take_until(1.., "\""), "\"").parse_next(input)
+}
diff --git a/rust/src/tool-parser/src/deepseek_json/deepseek_v3.rs b/rust/src/tool-parser/src/deepseek_json/deepseek_v3.rs
new file mode 100644
index 000000000000..3481951b8c26
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_json/deepseek_v3.rs
@@ -0,0 +1,235 @@
+use super::{DeepSeekJsonFormat, DeepSeekJsonToolParser};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for DeepSeek V3 JSON-fenced tool calls.
+///
+/// Example tool call content:
+///
+/// ````text
+/// <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
+/// ```json
+/// {"location":"Tokyo"}
+/// ```<｜tool▁call▁end｜><｜tool▁calls▁end｜>
+/// ````
+///
+/// Arguments are already OpenAI-style JSON text inside the markdown fence, so
+/// they are streamed as raw argument deltas without schema conversion or JSON
+/// normalization.
+pub struct DeepSeekV3ToolParser(DeepSeekJsonToolParser);
+
+impl DeepSeekV3ToolParser {
+    /// Create a DeepSeek V3 tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self(DeepSeekJsonToolParser::new(DeepSeekJsonFormat::V3))
+    }
+}
+
+impl ToolParser for DeepSeekV3ToolParser {
+    /// Create a boxed DeepSeek V3 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the DeepSeek V3 parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::DeepSeekV3ToolParser;
+    use crate::deepseek_json::{
+        TOOL_CALL_SEPARATOR, TOOL_CALL_START, TOOL_CALLS_END, TOOL_CALLS_START, V3_ARGUMENT_END,
+        V3_JSON_START,
+    };
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn v3_tool_call(function_name: &str, arguments: &str) -> String {
+        format!(
+            "{TOOL_CALL_START}function{TOOL_CALL_SEPARATOR}{function_name}{V3_JSON_START}{arguments}{V3_ARGUMENT_END}"
+        )
+    }
+
+    fn tool_section(tool_calls: &[String]) -> String {
+        format!("{TOOL_CALLS_START}{}{TOOL_CALLS_END}", tool_calls.join(""))
+    }
+
+    #[test]
+    fn deepseek_v3_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn deepseek_v3_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Let me check.\n{} trailing text",
+                tool_section(&[v3_tool_call("get_weather", arguments)])
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Let me check.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v3_does_not_validate_or_normalize_arguments() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let arguments = r#"{"location":"Tokyo",}"#;
+        let result = parser
+            .parse_complete(&tool_section(&[v3_tool_call("get_weather", arguments)]))
+            .unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v3_streaming_emits_argument_deltas() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let chunks = [
+            TOOL_CALLS_START,
+            TOOL_CALL_START,
+            "function",
+            TOOL_CALL_SEPARATOR,
+            "get_weather",
+            V3_JSON_START,
+            "{\"location\":",
+            "\"Beijing\"",
+            "}",
+            V3_ARGUMENT_END,
+            TOOL_CALLS_END,
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn deepseek_v3_streaming_handles_split_markers() {
+        let input = format!(
+            "hello {}",
+            tool_section(&[v3_tool_call("get_weather", r#"{"location":"Tokyo"}"#)])
+        );
+        let chunks = split_by_chars(&input, 5);
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn deepseek_v3_keeps_fenced_end_marker_literal_inside_json_string() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let arguments = format!("{{\"text\":\"literal {V3_ARGUMENT_END} inside\"}}");
+        let input = tool_section(&[v3_tool_call("echo", &arguments)]);
+
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v3_streaming_extracts_multiple_tool_calls() {
+        let input = tool_section(&[
+            v3_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            v3_tool_call("add", r#"{"x":1,"y":2}"#),
+        ]);
+        let chunks = split_by_chars(&input, 7);
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn deepseek_v3_finish_fails_incomplete_tool_call() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        parser
+            .push(&format!(
+                "{TOOL_CALLS_START}{TOOL_CALL_START}function{TOOL_CALL_SEPARATOR}get_weather{V3_JSON_START}{{\"location\""
+            ))
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete DeepSeek V3 tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn deepseek_v3_malformed_type_fails_fast() {
+        let mut parser = DeepSeekV3ToolParser::new(&test_tools());
+        let input = format!(
+            "{TOOL_CALLS_START}{TOOL_CALL_START}tool{TOOL_CALL_SEPARATOR}get_weather{V3_JSON_START}{{}}"
+        );
+
+        let error = parser.push(&input).unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/deepseek_json/deepseek_v31.rs b/rust/src/tool-parser/src/deepseek_json/deepseek_v31.rs
new file mode 100644
index 000000000000..bde6bc8fe8a0
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_json/deepseek_v31.rs
@@ -0,0 +1,239 @@
+use super::{DeepSeekJsonFormat, DeepSeekJsonToolParser};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for DeepSeek V3.1 raw JSON tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{"location":"Tokyo"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+pub struct DeepSeekV31ToolParser(DeepSeekJsonToolParser);
+
+impl DeepSeekV31ToolParser {
+    /// Create a DeepSeek V3.1 tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self(DeepSeekJsonToolParser::new(DeepSeekJsonFormat::V31))
+    }
+}
+
+impl ToolParser for DeepSeekV31ToolParser {
+    /// Create a boxed DeepSeek V3.1 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the DeepSeek V3.1 parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::DeepSeekV31ToolParser;
+    use crate::deepseek_json::{
+        TOOL_CALL_END, TOOL_CALL_SEPARATOR, TOOL_CALL_START, TOOL_CALLS_END, TOOL_CALLS_START,
+    };
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn v31_tool_call(function_name: &str, arguments: &str) -> String {
+        format!("{TOOL_CALL_START}{function_name}{TOOL_CALL_SEPARATOR}{arguments}{TOOL_CALL_END}")
+    }
+
+    fn tool_section(tool_calls: &[String]) -> String {
+        format!("{TOOL_CALLS_START}{}{TOOL_CALLS_END}", tool_calls.join(""))
+    }
+
+    #[test]
+    fn deepseek_v31_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn deepseek_v31_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Let me check.{} trailing text",
+                tool_section(&[v31_tool_call("get_weather", arguments)])
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Let me check.");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v31_does_not_validate_or_normalize_arguments() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let arguments = r#"{"location":"Tokyo",}"#;
+        let result = parser
+            .parse_complete(&tool_section(&[v31_tool_call("get_weather", arguments)]))
+            .unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v31_streaming_emits_argument_deltas() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let chunks = [
+            TOOL_CALLS_START,
+            TOOL_CALL_START,
+            "get_weather",
+            TOOL_CALL_SEPARATOR,
+            "{\"location\":",
+            "\"Beijing\"",
+            "}",
+            TOOL_CALL_END,
+            TOOL_CALLS_END,
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn deepseek_v31_streaming_handles_split_markers() {
+        let input = format!(
+            "hello {}",
+            tool_section(&[v31_tool_call("get_weather", r#"{"location":"Tokyo"}"#)])
+        );
+        let chunks = split_by_chars(&input, 5);
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn deepseek_v31_keeps_end_marker_literal_inside_json_string() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let arguments = format!(r#"{{"text":"literal {TOOL_CALL_END} inside"}}"#);
+        let input = tool_section(&[v31_tool_call("echo", &arguments)]);
+
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn deepseek_v31_streaming_extracts_multiple_tool_calls() {
+        let input = tool_section(&[
+            v31_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            v31_tool_call("add", r#"{"x":1,"y":2}"#),
+        ]);
+        let chunks = split_by_chars(&input, 7);
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn deepseek_v31_streaming_drops_eos_after_complete_tool_calls() {
+        let input = format!(
+            "{}<｜end▁of▁sentence｜>",
+            tool_section(&[v31_tool_call("get_weather", r#"{"location":"Tokyo"}"#)])
+        );
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &[&input]);
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn deepseek_v31_finish_fails_incomplete_tool_call() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        parser
+            .push(&format!(
+                "{TOOL_CALLS_START}{TOOL_CALL_START}get_weather{TOOL_CALL_SEPARATOR}{{\"location\""
+            ))
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete DeepSeek V3.1 tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn deepseek_v31_malformed_empty_name_fails_fast() {
+        let mut parser = DeepSeekV31ToolParser::new(&test_tools());
+        let input = format!("{TOOL_CALLS_START}{TOOL_CALL_START}{TOOL_CALL_SEPARATOR}{{}}");
+
+        let error = parser.push(&input).unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/deepseek_json/mod.rs b/rust/src/tool-parser/src/deepseek_json/mod.rs
new file mode 100644
index 000000000000..a24d47ae468a
--- /dev/null
+++ b/rust/src/tool-parser/src/deepseek_json/mod.rs
@@ -0,0 +1,308 @@
+mod deepseek_v3;
+mod deepseek_v31;
+
+pub use deepseek_v3::DeepSeekV3ToolParser;
+pub use deepseek_v31::DeepSeekV31ToolParser;
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::{alt, seq};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, rest, take_until};
+
+use super::utils::{JsonObjectScanState, parse_buffered_event, safe_text_len, take_json_object};
+use super::{Result, ToolCallDelta, ToolParseResult};
+
+pub(super) const TOOL_CALLS_START: &str = "<｜tool▁calls▁begin｜>";
+pub(super) const TOOL_CALLS_END: &str = "<｜tool▁calls▁end｜>";
+pub(super) const TOOL_CALL_START: &str = "<｜tool▁call▁begin｜>";
+pub(super) const TOOL_CALL_END: &str = "<｜tool▁call▁end｜>";
+pub(super) const TOOL_CALL_SEPARATOR: &str = "<｜tool▁sep｜>";
+pub(super) const V3_JSON_START: &str = "\n```json\n";
+pub(super) const V3_ARGUMENT_END: &str = "\n```<｜tool▁call▁end｜>";
+
+type DeepSeekJsonInput<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum DeepSeekJsonFormat {
+    V3,
+    V31,
+}
+
+impl DeepSeekJsonFormat {
+    /// Return the parser name used in diagnostics.
+    const fn parser_name(self) -> &'static str {
+        match self {
+            Self::V3 => "DeepSeek V3",
+            Self::V31 => "DeepSeek V3.1",
+        }
+    }
+
+    /// Return the marker that closes the raw JSON arguments payload.
+    const fn argument_end_marker(self) -> &'static str {
+        match self {
+            Self::V3 => V3_ARGUMENT_END,
+            Self::V31 => TOOL_CALL_END,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum DeepSeekJsonMode {
+    Text,
+    ToolBlock,
+    Header,
+    Arguments { json_scan: JsonObjectScanState },
+    Done,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum DeepSeekJsonEvent {
+    Text { len: usize },
+    ToolCallsStart,
+    ToolCallStart,
+    ToolCallHeader { function_name: String },
+    Arguments { len: usize },
+    ToolCallEnd,
+    ToolCallsEnd,
+    IgnoredRest,
+}
+
+/// Tool parser core for DeepSeek JSON-argument tool calls.
+struct DeepSeekJsonToolParser {
+    buffer: String,
+    mode: DeepSeekJsonMode,
+    active_tool_index: Option<usize>,
+    emitted_tool_count: usize,
+    format: DeepSeekJsonFormat,
+}
+
+impl DeepSeekJsonToolParser {
+    /// Create a parser for one DeepSeek JSON-argument format.
+    fn new(format: DeepSeekJsonFormat) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: DeepSeekJsonMode::Text,
+            active_tool_index: None,
+            emitted_tool_count: 0,
+            format,
+        }
+    }
+
+    /// Apply one parsed DeepSeek JSON event to parser state and output.
+    fn apply_event(
+        &mut self,
+        event: DeepSeekJsonEvent,
+        result: &mut ToolParseResult,
+    ) -> Result<()> {
+        match event {
+            DeepSeekJsonEvent::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            DeepSeekJsonEvent::ToolCallsStart => self.mode = DeepSeekJsonMode::ToolBlock,
+            DeepSeekJsonEvent::ToolCallStart => self.mode = DeepSeekJsonMode::Header,
+            DeepSeekJsonEvent::ToolCallHeader { function_name } => {
+                let tool_index = self.emitted_tool_count;
+                self.emitted_tool_count += 1;
+                self.active_tool_index = Some(tool_index);
+                self.mode = DeepSeekJsonMode::Arguments {
+                    json_scan: JsonObjectScanState::default(),
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: Some(function_name),
+                    arguments: String::new(),
+                });
+            }
+            DeepSeekJsonEvent::Arguments { len: consumed_len } => {
+                let Some(tool_index) = self.active_tool_index else {
+                    return Err(parsing_failed!(
+                        "{} arguments without an active tool call",
+                        self.format.parser_name()
+                    ));
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: None,
+                    arguments: self.buffer[..consumed_len].to_string(),
+                });
+            }
+            DeepSeekJsonEvent::ToolCallEnd => {
+                self.active_tool_index = None;
+                self.mode = DeepSeekJsonMode::ToolBlock;
+            }
+            DeepSeekJsonEvent::ToolCallsEnd => {
+                self.active_tool_index = None;
+                self.mode = DeepSeekJsonMode::Done;
+            }
+            DeepSeekJsonEvent::IgnoredRest => {}
+        }
+        Ok(())
+    }
+
+    /// Push one decoded text chunk through the DeepSeek JSON parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_deepseek_json_event(input, &mut self.mode, self.format)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match &self.mode {
+            DeepSeekJsonMode::Text => result.normal_text.push_str(&self.buffer),
+            DeepSeekJsonMode::ToolBlock | DeepSeekJsonMode::Done => {}
+            DeepSeekJsonMode::Header | DeepSeekJsonMode::Arguments { .. } => {
+                return Err(parsing_failed!(
+                    "incomplete {} tool call",
+                    self.format.parser_name()
+                ));
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = DeepSeekJsonMode::Text;
+        self.active_tool_index = None;
+        self.emitted_tool_count = 0;
+    }
+}
+
+/// Parse a DeepSeek JSON event for the current parser mode.
+fn parse_next_deepseek_json_event(
+    input: &mut DeepSeekJsonInput<'_>,
+    mode: &mut DeepSeekJsonMode,
+    format: DeepSeekJsonFormat,
+) -> ModalResult<DeepSeekJsonEvent> {
+    match mode {
+        DeepSeekJsonMode::Text => parse_text_event(input),
+        DeepSeekJsonMode::ToolBlock => parse_tool_block_event(input),
+        DeepSeekJsonMode::Header => tool_call_header_event(input, format),
+        DeepSeekJsonMode::Arguments { json_scan } => {
+            parse_arguments_event(input, format, json_scan)
+        }
+        DeepSeekJsonMode::Done => ignored_rest_event(input),
+    }
+}
+
+/// Parse a text-mode DeepSeek JSON event.
+fn parse_text_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    alt((tool_calls_start_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse one event inside the DeepSeek tool-calls section.
+fn parse_tool_block_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    ws0.void().parse_next(input)?;
+    alt((tool_calls_end_event, tool_call_start_event)).parse_next(input)
+}
+
+/// Parse one event inside a DeepSeek tool-call arguments payload.
+fn parse_arguments_event(
+    input: &mut DeepSeekJsonInput<'_>,
+    format: DeepSeekJsonFormat,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<DeepSeekJsonEvent> {
+    if json_scan.complete() {
+        tool_call_end_event(input, format)
+    } else {
+        argument_delta_event(input, json_scan)
+    }
+}
+
+/// Parse a DeepSeek tool-calls start marker.
+fn tool_calls_start_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    literal(TOOL_CALLS_START)
+        .value(DeepSeekJsonEvent::ToolCallsStart)
+        .parse_next(input)
+}
+
+/// Parse a DeepSeek tool-calls end marker.
+fn tool_calls_end_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    literal(TOOL_CALLS_END).value(DeepSeekJsonEvent::ToolCallsEnd).parse_next(input)
+}
+
+/// Parse a DeepSeek tool-call start marker.
+fn tool_call_start_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    literal(TOOL_CALL_START)
+        .value(DeepSeekJsonEvent::ToolCallStart)
+        .parse_next(input)
+}
+
+/// Parse a DeepSeek tool-call end marker.
+fn tool_call_end_event(
+    input: &mut DeepSeekJsonInput<'_>,
+    format: DeepSeekJsonFormat,
+) -> ModalResult<DeepSeekJsonEvent> {
+    literal(format.argument_end_marker())
+        .value(DeepSeekJsonEvent::ToolCallEnd)
+        .parse_next(input)
+}
+
+/// Parse a DeepSeek tool-call header before the JSON arguments payload.
+fn tool_call_header_event(
+    input: &mut DeepSeekJsonInput<'_>,
+    format: DeepSeekJsonFormat,
+) -> ModalResult<DeepSeekJsonEvent> {
+    match format {
+        DeepSeekJsonFormat::V3 => v3_tool_call_header_event(input),
+        DeepSeekJsonFormat::V31 => v31_tool_call_header_event(input),
+    }
+}
+
+/// Parse a DeepSeek V3 tool-call header.
+fn v3_tool_call_header_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    let name = seq!(
+        _: literal("function"),
+        _: literal(TOOL_CALL_SEPARATOR),
+        take_until(1.., V3_JSON_START),
+        _: literal(V3_JSON_START),
+    )
+    .parse_next(input)?;
+
+    Ok(DeepSeekJsonEvent::ToolCallHeader {
+        function_name: name.0.trim().to_string(),
+    })
+}
+
+/// Parse a DeepSeek V3.1 tool-call header.
+fn v31_tool_call_header_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    let (name, _) = (
+        take_until(1.., TOOL_CALL_SEPARATOR),
+        literal(TOOL_CALL_SEPARATOR),
+    )
+        .parse_next(input)?;
+
+    Ok(DeepSeekJsonEvent::ToolCallHeader {
+        function_name: name.trim().to_string(),
+    })
+}
+
+/// Parse a DeepSeek raw JSON arguments delta.
+fn argument_delta_event(
+    input: &mut DeepSeekJsonInput<'_>,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<DeepSeekJsonEvent> {
+    take_json_object(input, json_scan).map(|len| DeepSeekJsonEvent::Arguments { len })
+}
+
+/// Parse a safe text run before the next DeepSeek tool-calls section.
+fn safe_text_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    safe_text_len(input, TOOL_CALLS_START).map(|len| DeepSeekJsonEvent::Text { len })
+}
+
+/// Parse ignored rest after the DeepSeek tool-calls section ends.
+fn ignored_rest_event(input: &mut DeepSeekJsonInput<'_>) -> ModalResult<DeepSeekJsonEvent> {
+    rest.value(DeepSeekJsonEvent::IgnoredRest).parse_next(input)
+}
diff --git a/rust/src/tool-parser/src/error.rs b/rust/src/tool-parser/src/error.rs
new file mode 100644
index 000000000000..0ac4a02c658c
--- /dev/null
+++ b/rust/src/tool-parser/src/error.rs
@@ -0,0 +1,13 @@
+use thiserror::Error;
+use thiserror_ext::Macro;
+
+/// Result alias for tool parser operations.
+pub type Result<T> = std::result::Result<T, ToolParserError>;
+
+/// Errors produced while creating or running tool parsers.
+#[derive(Debug, Error, Macro)]
+#[thiserror_ext(macro(path = "crate::error"))]
+pub enum ToolParserError {
+    #[error("tool parser parsing failed: {message}")]
+    ParsingFailed { message: String },
+}
diff --git a/rust/src/tool-parser/src/gemma4.rs b/rust/src/tool-parser/src/gemma4.rs
new file mode 100644
index 000000000000..2eb3608a5bbf
--- /dev/null
+++ b/rust/src/tool-parser/src/gemma4.rs
@@ -0,0 +1,653 @@
+use serde_json::{Map, Number, Value};
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::{alt, delimited, opt, separated, seq, terminated};
+use winnow::error::{ContextError, ErrMode, ModalResult};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, take_till, take_until};
+
+use super::utils::{parse_buffered_event, safe_text_len};
+use super::{Result, ToolCallDelta, ToolParseResult, ToolParser};
+use crate::Tool;
+
+const TOOL_CALL_START: &str = "<|tool_call>";
+const TOOL_CALL_END: &str = "<tool_call|>";
+const STRING_DELIM: &str = "<|\"|>";
+const CALL_PREFIX: &str = "call:";
+
+type Gemma4Input<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, PartialEq)]
+enum Gemma4Event {
+    Text {
+        len: usize,
+    },
+    ToolCall {
+        name: String,
+        args: Map<String, Value>,
+    },
+}
+
+/// Tool parser for Google Gemma4 models.
+///
+/// Original Python implementation:
+/// <https://github.com/vllm-project/vllm/blob/bf45e6d0a558da2b8d7b60efb07b4aa394f3b60b/vllm/tool_parsers/gemma4_tool_parser.py>
+///
+/// Handles the Gemma4 function call format:
+///
+/// `<|tool_call>call:func_name{key:<|"|>value<|"|>}<tool_call|>`
+///
+/// Arguments are emitted only after a full Gemma4 tool call is parsed.
+pub struct Gemma4ToolParser {
+    buffer: String,
+    emitted_tool_count: usize,
+}
+
+impl Gemma4ToolParser {
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            buffer: String::new(),
+            emitted_tool_count: 0,
+        }
+    }
+
+    fn apply_event(&mut self, event: Gemma4Event, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            Gemma4Event::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            Gemma4Event::ToolCall { name, args } => {
+                let arguments = serde_json::to_string(&args)
+                    .map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
+
+                result.calls.push(ToolCallDelta {
+                    tool_index: self.emitted_tool_count,
+                    name: Some(name),
+                    arguments,
+                });
+                self.emitted_tool_count += 1;
+            }
+        }
+        Ok(())
+    }
+
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.emitted_tool_count = 0;
+    }
+}
+
+impl ToolParser for Gemma4ToolParser {
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) =
+            parse_buffered_event(&self.buffer, parse_next_gemma4_event)?
+        {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+
+        if !self.buffer.is_empty() {
+            if self.buffer.starts_with(TOOL_CALL_START) {
+                self.reset();
+                return Err(parsing_failed!("incomplete Gemma4 tool call"));
+            }
+            result.normal_text.push_str(&self.buffer);
+        }
+
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse one Gemma4 event from buffered streaming input.
+fn parse_next_gemma4_event(input: &mut Gemma4Input<'_>) -> ModalResult<Gemma4Event> {
+    alt((tool_call_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse a complete Gemma4 tool call.
+// TODO: incremental parsing arguments to reduce scanning from O(n^2) to O(n).
+fn tool_call_event(input: &mut Gemma4Input<'_>) -> ModalResult<Gemma4Event> {
+    let (name, args) = seq!(
+        _: literal(TOOL_CALL_START),
+        _: literal(CALL_PREFIX),
+        gemma4_tool_name,
+        _: literal("{"),
+        gemma4_args,
+        _: literal("}"),
+        _: literal(TOOL_CALL_END),
+    )
+    .parse_next(input)?;
+
+    Ok(Gemma4Event::ToolCall { name, args })
+}
+
+/// Parse a Gemma4 tool name.
+fn gemma4_tool_name(input: &mut Gemma4Input<'_>) -> ModalResult<String> {
+    let name = take_until(1.., "{").parse_next(input)?.trim();
+    if name.is_empty() {
+        return Err(ErrMode::Cut(ContextError::new()));
+    }
+    Ok(name.to_string())
+}
+
+/// Parse a safe text run before the next Gemma4 marker.
+fn safe_text_event(input: &mut Gemma4Input<'_>) -> ModalResult<Gemma4Event> {
+    safe_text_len(input, TOOL_CALL_START).map(|len| Gemma4Event::Text { len })
+}
+
+/// Parse Gemma4's custom key-value argument object content.
+fn gemma4_args(input: &mut Gemma4Input<'_>) -> ModalResult<Map<String, Value>> {
+    let pairs: Vec<(String, Value)> = delimited(
+        ws0,
+        terminated(
+            separated(0.., gemma4_pair, comma_separator),
+            opt(comma_separator),
+        ),
+        ws0,
+    )
+    .parse_next(input)?;
+    Ok(pairs.into_iter().collect())
+}
+
+/// Parse a Gemma4 key-value pair.
+fn gemma4_pair(input: &mut Gemma4Input<'_>) -> ModalResult<(String, Value)> {
+    let (key, value) = seq!(
+        _: ws0,
+        gemma4_key,
+        _: ws0,
+        _: literal(":"),
+        _: ws0,
+        gemma4_value,
+    )
+    .parse_next(input)?;
+    Ok((key, value))
+}
+
+/// Parse a Gemma4 bare key.
+fn gemma4_key(input: &mut Gemma4Input<'_>) -> ModalResult<String> {
+    let key = take_till(1.., |char: char| char == ':').parse_next(input)?.trim();
+    if key.is_empty() {
+        return Err(ErrMode::Cut(ContextError::new()));
+    }
+    Ok(key.to_string())
+}
+
+/// Parse a Gemma4 value.
+fn gemma4_value(input: &mut Gemma4Input<'_>) -> ModalResult<Value> {
+    alt((
+        gemma4_string.map(|value: &str| Value::String(value.to_string())),
+        gemma4_object.map(Value::Object),
+        gemma4_array_value.map(Value::Array),
+        gemma4_bare_value,
+    ))
+    .parse_next(input)
+}
+
+/// Parse a Gemma4 string delimited by `<|"|>`.
+fn gemma4_string<'i>(input: &mut Gemma4Input<'i>) -> ModalResult<&'i str> {
+    delimited(
+        literal(STRING_DELIM),
+        take_until(0.., STRING_DELIM),
+        literal(STRING_DELIM),
+    )
+    .parse_next(input)
+}
+
+/// Parse a nested Gemma4 object.
+fn gemma4_object(input: &mut Gemma4Input<'_>) -> ModalResult<Map<String, Value>> {
+    delimited(literal("{"), gemma4_args, literal("}")).parse_next(input)
+}
+
+/// Parse a Gemma4 array value.
+fn gemma4_array_value(input: &mut Gemma4Input<'_>) -> ModalResult<Vec<Value>> {
+    delimited(literal("["), gemma4_array_content, literal("]")).parse_next(input)
+}
+
+/// Parse Gemma4 array content.
+fn gemma4_array_content(input: &mut Gemma4Input<'_>) -> ModalResult<Vec<Value>> {
+    delimited(
+        ws0,
+        terminated(
+            separated(0.., gemma4_value, comma_separator),
+            opt(comma_separator),
+        ),
+        ws0,
+    )
+    .parse_next(input)
+}
+
+/// Parse a Gemma4 bare scalar.
+fn gemma4_bare_value(input: &mut Gemma4Input<'_>) -> ModalResult<Value> {
+    take_till(1.., |char: char| matches!(char, ',' | '}' | ']'))
+        .map(parse_gemma4_scalar)
+        .parse_next(input)
+}
+
+/// Parse a Gemma4 comma separator.
+fn comma_separator(input: &mut Gemma4Input<'_>) -> ModalResult<()> {
+    delimited(ws0, literal(","), ws0).void().parse_next(input)
+}
+
+fn parse_gemma4_scalar(value: &str) -> Value {
+    let value = value.trim();
+    if value.is_empty() {
+        return Value::String(String::new());
+    }
+    if value == "true" {
+        return Value::Bool(true);
+    }
+    if value == "false" {
+        return Value::Bool(false);
+    }
+    if matches!(value, "null" | "none" | "nil" | "NULL" | "None" | "NIL") {
+        return Value::Null;
+    }
+    if value.contains('.') {
+        if let Ok(parsed) = value.parse::<f64>()
+            && let Some(number) = Number::from_f64(parsed)
+        {
+            return Value::Number(number);
+        }
+    } else if let Ok(parsed) = value.parse::<i64>() {
+        return Value::Number(Number::from(parsed));
+    }
+
+    Value::String(value.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{Value, json};
+    use thiserror_ext::AsReport;
+    use winnow::combinator::{eof, terminated};
+    use winnow::error::ErrMode;
+    use winnow::prelude::*;
+    use winnow::stream::Partial;
+
+    use super::{
+        Gemma4ToolParser, ToolCallDelta, ToolParseResult, ToolParser, gemma4_args,
+        gemma4_array_content,
+    };
+    use crate::Tool;
+
+    fn parse_gemma4_args(args: &str) -> super::Result<serde_json::Map<String, Value>> {
+        let mut input = Partial::new(args);
+        let _ = input.complete();
+        match terminated(gemma4_args, eof).parse_next(&mut input) {
+            Ok(value) => Ok(value),
+            Err(ErrMode::Incomplete(_)) => Err(parsing_failed!("incomplete Gemma4 arguments")),
+            Err(ErrMode::Backtrack(error) | ErrMode::Cut(error)) => {
+                Err(parsing_failed!("{}", error))
+            }
+        }
+    }
+
+    fn parse_gemma4_array(array: &str) -> super::Result<Vec<Value>> {
+        let mut input = Partial::new(array);
+        let _ = input.complete();
+        match terminated(gemma4_array_content, eof).parse_next(&mut input) {
+            Ok(value) => Ok(value),
+            Err(ErrMode::Incomplete(_)) => Err(parsing_failed!("incomplete Gemma4 array")),
+            Err(ErrMode::Backtrack(error) | ErrMode::Cut(error)) => {
+                Err(parsing_failed!("{}", error))
+            }
+        }
+    }
+
+    fn test_tools() -> Vec<Tool> {
+        vec![
+            Tool {
+                name: "get_weather".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "get_time".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "write_file".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "Edit".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "search".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "set".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "get_status".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+            Tool {
+                name: "todowrite".to_string(),
+                description: None,
+                parameters: json!({ "type": "object" }),
+                strict: None,
+            },
+        ]
+    }
+
+    fn collect_stream(chunks: &[&str]) -> ToolParseResult {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        let mut result = ToolParseResult::default();
+        for chunk in chunks {
+            result.append(parser.push(chunk).unwrap());
+        }
+        result.append(parser.finish().unwrap());
+        result.coalesce_calls()
+    }
+
+    fn first_call(result: &ToolParseResult) -> &ToolCallDelta {
+        result.calls.first().expect("expected one tool call")
+    }
+
+    #[test]
+    fn gemma4_parse_args_handles_scalars_and_nested_values() {
+        let parsed = parse_gemma4_args(
+            "name:<|\"|>test<|\"|>,count:42,active:true,score:114.514,nested:{inner:<|\"|>value<|\"|>},items:[<|\"|>a<|\"|>,<|\"|>b<|\"|>]",
+        )
+        .unwrap();
+
+        assert_eq!(
+            Value::Object(parsed),
+            json!({
+                "name": "test",
+                "count": 42,
+                "active": true,
+                "score": 114.514,
+                "nested": { "inner": "value" },
+                "items": ["a", "b"],
+            })
+        );
+    }
+
+    #[test]
+    fn gemma4_parse_args_handles_empty_arguments() {
+        let parsed = parse_gemma4_args("").unwrap();
+        assert_eq!(Value::Object(parsed), json!({}));
+    }
+
+    #[test]
+    fn gemma4_parse_array_handles_bare_values() {
+        let parsed = parse_gemma4_array("42,true,114.514").unwrap();
+        assert_eq!(Value::Array(parsed), json!([42, true, 114.514]));
+    }
+
+    #[test]
+    fn gemma4_parse_complete_extracts_single_tool_call() {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete("<|tool_call>call:get_weather{location:<|\"|>London<|\"|>}<tool_call|>")
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(first_call(&result).name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "location": "London" })
+        );
+    }
+
+    #[test]
+    fn gemma4_parse_complete_rejects_incomplete_tool_call() {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        let error = parser
+            .parse_complete("<|tool_call>call:get_weather{location:<|\"|>London")
+            .unwrap_err();
+
+        assert!(error.to_report_string().contains("incomplete Gemma4 tool call"));
+    }
+
+    #[test]
+    fn gemma4_streaming_basic_single_tool_call() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:get_weather{",
+            "location:<|\"|>Paris",
+            ", France",
+            "<|\"|>}",
+            "<tool_call|>",
+        ]);
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(first_call(&result).name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "location": "Paris, France" })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_text_before_and_after_tool_call() {
+        let result = collect_stream(&[
+            "Let me check ",
+            "the weather. ",
+            "<|tool_call>",
+            "call:get_weather{",
+            "location:<|\"|>London<|\"|>}",
+            "<tool_call|><",
+            "div>",
+        ]);
+
+        assert_eq!(result.normal_text, "Let me check the weather. <div>");
+        assert_eq!(first_call(&result).name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "location": "London" })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_waits_for_complete_tool_call() {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        let mut result = ToolParseResult::default();
+
+        for chunk in [
+            "<|tool_call>",
+            "call:get_weather{",
+            "location:<|\"|>Paris<|\"|>}",
+        ] {
+            result.append(parser.push(chunk).unwrap());
+            assert!(result.calls.is_empty());
+        }
+
+        result.append(parser.push("<tool_call|>").unwrap());
+        let result = result.coalesce_calls();
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "location": "Paris" })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_boolean_split_across_chunks() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:search{input:{all:tru",
+            "e}}",
+            "<tool_call|>",
+        ]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("search"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "input": { "all": true } })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_false_split_across_chunks() {
+        let result = collect_stream(&["<|tool_call>", "call:set{flag:fals", "e}", "<tool_call|>"]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("set"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "flag": false })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_number_split_across_chunks() {
+        let result = collect_stream(&["<|tool_call>", "call:set{count:4", "2}", "<tool_call|>"]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("set"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "count": 42 })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_split_string_delimiter() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:todowrite{",
+            "content:<|\"|>Buy milk<|",
+            "\"|>}",
+            "<tool_call|>",
+        ]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("todowrite"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "content": "Buy milk" })
+        );
+        assert!(!first_call(&result).arguments.contains("<|"));
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_end_marker_literal_inside_string() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:todowrite{",
+            "content:<|\"|>literal }<tool_call|> inside",
+            "<|\"|>}",
+            "<tool_call|>",
+        ]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("todowrite"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({ "content": "literal }<tool_call|> inside" })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_handles_html_argument_without_duplication() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:write_file{",
+            "path:<|\"|>index.html<|\"|>,",
+            "content:<|\"|><!DOCTYPE html>\n<",
+            "html lang=\"zh-CN\">\n<",
+            "head>\n    <",
+            "meta charset=\"UTF-8\">\n    <",
+            "meta name=\"viewport\" content=\"width=device-width\">\n",
+            "<|\"|>}",
+            "<tool_call|>",
+        ]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("write_file"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({
+                "path": "index.html",
+                "content": "<!DOCTYPE html>\n<html lang=\"zh-CN\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width\">\n",
+            })
+        );
+    }
+
+    #[test]
+    fn gemma4_streaming_trailing_bare_bool_is_not_duplicated() {
+        let result = collect_stream(&[
+            "<|tool_call>",
+            "call:Edit{",
+            "file_path:<|\"|>src/env.py<|\"|>,",
+            "old_string:<|\"|>old_val<|\"|>,",
+            "new_string:<|\"|>new_val<|\"|>,",
+            "replace_all:",
+            "false}",
+            "<tool_call|>",
+        ]);
+
+        assert_eq!(first_call(&result).name.as_deref(), Some("Edit"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&first_call(&result).arguments).unwrap(),
+            json!({
+                "file_path": "src/env.py",
+                "old_string": "old_val",
+                "new_string": "new_val",
+                "replace_all": false,
+            })
+        );
+        assert_eq!(
+            first_call(&result).arguments.matches("replace_all").count(),
+            1
+        );
+    }
+
+    #[test]
+    fn gemma4_finish_flushes_partial_start_marker_as_text() {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        let mut result = parser.push("<").unwrap();
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(result.normal_text, "<");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn gemma4_finish_rejects_complete_args_without_end_marker() {
+        let mut parser = Gemma4ToolParser::new(&test_tools());
+        for chunk in ["<|tool_call>", "call:get_status{}"] {
+            parser.push(chunk).unwrap();
+        }
+
+        let error = parser.finish().unwrap_err();
+
+        assert!(error.to_report_string().contains("incomplete Gemma4 tool call"));
+    }
+}
diff --git a/rust/src/tool-parser/src/glm_xml/glm45_moe.rs b/rust/src/tool-parser/src/glm_xml/glm45_moe.rs
new file mode 100644
index 000000000000..b145671429ca
--- /dev/null
+++ b/rust/src/tool-parser/src/glm_xml/glm45_moe.rs
@@ -0,0 +1,43 @@
+use super::{GlmXmlToolParser, Separator};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for GLM-4.5/4.6 MoE XML-style tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <tool_call>get_weather
+/// <arg_key>city</arg_key>
+/// <arg_value>Hangzhou</arg_value>
+/// </tool_call>
+/// ```
+///
+/// Arguments are emitted only after a full `tool_call` block is parsed.
+pub struct Glm45MoeToolParser(GlmXmlToolParser);
+
+impl Glm45MoeToolParser {
+    /// Create a GLM-4.5/4.6 MoE tool parser.
+    pub(super) fn new(tools: &[Tool]) -> Self {
+        Self(GlmXmlToolParser::new(tools, Separator::Newline))
+    }
+}
+
+impl ToolParser for Glm45MoeToolParser {
+    /// Create a boxed GLM-4.5/4.6 MoE tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the GLM MoE parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
diff --git a/rust/src/tool-parser/src/glm_xml/glm47_moe.rs b/rust/src/tool-parser/src/glm_xml/glm47_moe.rs
new file mode 100644
index 000000000000..7e7538d7c38a
--- /dev/null
+++ b/rust/src/tool-parser/src/glm_xml/glm47_moe.rs
@@ -0,0 +1,135 @@
+use super::{GlmXmlToolParser, Separator};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+/// Tool parser for GLM-4.7 MoE XML-style tool calls.
+///
+/// GLM-4.7 reuses the GLM-4.5 parser with a more flexible function-name
+/// separator, so the name may be followed by whitespace, a newline, or the
+/// first `<arg_key>` tag directly.
+pub struct Glm47MoeToolParser(GlmXmlToolParser);
+
+impl Glm47MoeToolParser {
+    fn new(tools: &[Tool]) -> Self {
+        Self(GlmXmlToolParser::new(tools, Separator::Flexible))
+    }
+}
+
+impl ToolParser for Glm47MoeToolParser {
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.0.push(chunk)
+    }
+
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.0.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{Value, json};
+
+    use super::{Glm47MoeToolParser, ToolParser};
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn glm47_tool_call(function_name: &str, params: &[(&str, &str)]) -> String {
+        let params = params
+            .iter()
+            .map(|(name, value)| format!("<arg_key>{name}</arg_key><arg_value>{value}</arg_value>"))
+            .collect::<Vec<_>>()
+            .join("");
+        format!("<tool_call>{function_name}{params}</tool_call>")
+    }
+
+    #[test]
+    fn glm47_parse_complete_extracts_single_tool_call() {
+        let mut parser = Glm47MoeToolParser::new(&test_tools());
+        let output = format!(
+            "Let me search for that.\n{}",
+            glm47_tool_call(
+                "get_weather",
+                &[("city", "Beijing"), ("date", "2024-12-25")]
+            )
+        );
+
+        let result = parser.parse_complete(&output).unwrap();
+
+        assert_eq!(result.normal_text, "Let me search for that.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({"city": "Beijing", "date": "2024-12-25"})
+        );
+    }
+
+    #[test]
+    fn glm47_streaming_extracts_multiple_tool_calls() {
+        let mut parser = Glm47MoeToolParser::new(&test_tools());
+        let output = format!(
+            "{}{}",
+            glm47_tool_call("get_weather", &[("city", "Shanghai")]),
+            glm47_tool_call("add", &[("x", "1"), ("y", "2")])
+        );
+
+        let chunks = split_by_chars(&output, 7);
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[1].name.as_deref(), Some("add"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({"x": 1, "y": 2})
+        );
+    }
+
+    #[test]
+    fn glm47_parse_complete_converts_schema_types() {
+        let mut parser = Glm47MoeToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&glm47_tool_call(
+                "convert",
+                &[
+                    ("whole", "42"),
+                    ("flag", "true"),
+                    ("payload", r#"{"nested":{"key":"value"}}"#),
+                    ("items", "[1, 2, 3]"),
+                    ("empty", ""),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "whole": 42,
+                "flag": true,
+                "payload": {"nested": {"key": "value"}},
+                "items": [1, 2, 3],
+                "empty": ""
+            })
+        );
+    }
+
+    #[test]
+    fn glm47_parse_complete_extracts_zero_argument_call() {
+        let mut parser = Glm47MoeToolParser::new(&test_tools());
+
+        let result = parser.parse_complete("<tool_call>add</tool_call>").unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("add"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({})
+        );
+    }
+}
diff --git a/rust/src/tool-parser/src/glm_xml/mod.rs b/rust/src/tool-parser/src/glm_xml/mod.rs
new file mode 100644
index 000000000000..ccb05e8d85f4
--- /dev/null
+++ b/rust/src/tool-parser/src/glm_xml/mod.rs
@@ -0,0 +1,435 @@
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::{alt, eof, repeat, seq, terminated};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, rest, take_until, take_while};
+
+use super::parameters::ToolSchemas;
+use super::utils::{parse_buffered_event, safe_text_len, xml_unescape};
+use super::{Result, ToolCallDelta, ToolParseResult};
+use crate::Tool;
+
+mod glm45_moe;
+mod glm47_moe;
+
+pub use glm45_moe::Glm45MoeToolParser;
+pub use glm47_moe::Glm47MoeToolParser;
+
+const TOOL_CALL_START: &str = "<tool_call>";
+const TOOL_CALL_END: &str = "</tool_call>";
+const ARG_KEY_START: &str = "<arg_key>";
+const ARG_KEY_END: &str = "</arg_key>";
+const ARG_VALUE_START: &str = "<arg_value>";
+const ARG_VALUE_END: &str = "</arg_value>";
+
+type GlmInput<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum GlmMode {
+    Text,
+    ToolCall,
+    AfterToolCall,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum Separator {
+    /// GLM-4.5/4.6 format: function name must end at a newline before
+    /// arguments.
+    Newline,
+    /// GLM-4.7 format: function name may end at whitespace or directly before
+    /// `<arg_key>`.
+    Flexible,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum GlmEvent {
+    Text {
+        len: usize,
+    },
+    ToolCallStart,
+    ToolCall {
+        name: String,
+        raw_params: Vec<(String, String)>,
+    },
+    IgnoredRest,
+}
+
+/// Tool parser core for GLM XML-style tool calls.
+struct GlmXmlToolParser {
+    buffer: String,
+    mode: GlmMode,
+    emitted_tool_count: usize,
+    tool_parameters: ToolSchemas,
+    separator: Separator,
+}
+
+impl GlmXmlToolParser {
+    /// Create a GLM XML tool parser with a function-name separator.
+    fn new(tools: &[Tool], separator: Separator) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: GlmMode::Text,
+            emitted_tool_count: 0,
+            tool_parameters: ToolSchemas::from_tools(tools),
+            separator,
+        }
+    }
+
+    /// Apply one parsed GLM event to parser state and output.
+    fn apply_event(&mut self, event: GlmEvent, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            GlmEvent::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            GlmEvent::ToolCallStart => self.mode = GlmMode::ToolCall,
+            GlmEvent::ToolCall { name, raw_params } => {
+                self.mode = GlmMode::AfterToolCall;
+                let arguments = self.tool_parameters.convert_params_with_schema(&name, raw_params);
+                let arguments = serde_json::to_string(&arguments)
+                    .map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
+
+                result.calls.push(ToolCallDelta {
+                    tool_index: self.emitted_tool_count,
+                    name: Some(name),
+                    arguments,
+                });
+                self.emitted_tool_count += 1;
+            }
+            GlmEvent::IgnoredRest => {}
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = GlmMode::Text;
+        self.emitted_tool_count = 0;
+    }
+
+    /// Push one decoded text chunk through the GLM MoE parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_glm_event(input, self.mode, self.separator)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        if !self.buffer.is_empty() {
+            match self.mode {
+                GlmMode::Text => result.normal_text.push_str(&self.buffer),
+                GlmMode::ToolCall => return Err(parsing_failed!("incomplete GLM MoE tool call")),
+                GlmMode::AfterToolCall => {}
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a GLM event for the current parser mode.
+fn parse_next_glm_event(
+    input: &mut GlmInput<'_>,
+    mode: GlmMode,
+    separator: Separator,
+) -> ModalResult<GlmEvent> {
+    match mode {
+        GlmMode::Text => parse_text_event(input),
+        GlmMode::ToolCall => tool_call_event(input, separator),
+        GlmMode::AfterToolCall => after_tool_call_event(input),
+    }
+}
+
+/// Parse a text-mode GLM event.
+fn parse_text_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
+    alt((tool_call_start_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse a GLM tool-call start marker.
+fn tool_call_start_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
+    literal(TOOL_CALL_START).value(GlmEvent::ToolCallStart).parse_next(input)
+}
+
+/// Parse a safe text run before the next GLM marker.
+fn safe_text_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
+    safe_text_len(input, TOOL_CALL_START).map(|len| GlmEvent::Text { len })
+}
+
+/// Parse text after a completed GLM tool call.
+fn after_tool_call_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
+    ws0.void().parse_next(input)?;
+    alt((tool_call_start_event, ignored_rest_event)).parse_next(input)
+}
+
+/// Parse a trailing rest after GLM tool calls.
+fn ignored_rest_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
+    rest.value(GlmEvent::IgnoredRest).parse_next(input)
+}
+
+/// Parse a complete GLM tool call.
+fn tool_call_event(input: &mut GlmInput<'_>, separator: Separator) -> ModalResult<GlmEvent> {
+    let (body,) = seq!(
+        take_until(0.., TOOL_CALL_END),
+        _: literal(TOOL_CALL_END),
+    )
+    .parse_next(input)?;
+
+    parse_tool_call_body(body, separator)
+}
+
+/// Parse a GLM tool-call body.
+fn parse_tool_call_body(body: &str, separator: Separator) -> ModalResult<GlmEvent> {
+    let mut input = body;
+    let (name, raw_params) = match separator {
+        Separator::Newline => seq!(
+            _: ws0,
+            parse_newline_separated_function_name,
+            parse_parameters,
+            _: ws0,
+            _: eof,
+        )
+        .parse_next(&mut input)?,
+        Separator::Flexible => seq!(
+            _: ws0,
+            parse_flexible_function_name,
+            parse_parameters,
+            _: ws0,
+            _: eof,
+        )
+        .parse_next(&mut input)?,
+    };
+
+    Ok(GlmEvent::ToolCall {
+        name: name.to_string(),
+        raw_params,
+    })
+}
+
+/// Parse a GLM-4.5 newline-separated function name.
+fn parse_newline_separated_function_name<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    terminated(take_until(1.., "\n"), "\n").map(str::trim).parse_next(input)
+}
+
+/// Parse a GLM-4.7 whitespace-or-tag-separated function name.
+fn parse_flexible_function_name<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    terminated(
+        take_while(1.., |ch: char| !ch.is_whitespace() && ch != '<'),
+        ws0,
+    )
+    .parse_next(input)
+}
+
+/// Parse GLM argument key-value pairs.
+fn parse_parameters(input: &mut &str) -> ModalResult<Vec<(String, String)>> {
+    repeat(0.., terminated(parse_parameter, ws0)).parse_next(input)
+}
+
+/// Parse a GLM argument key-value pair.
+fn parse_parameter(input: &mut &str) -> ModalResult<(String, String)> {
+    let (key, value) = seq!(
+        _: literal(ARG_KEY_START),
+        take_until(1.., ARG_KEY_END),
+        _: literal(ARG_KEY_END),
+        _: ws0,
+        _: literal(ARG_VALUE_START),
+        take_until(0.., ARG_VALUE_END).map(str::trim).map(xml_unescape),
+        _: literal(ARG_VALUE_END),
+    )
+    .parse_next(input)?;
+
+    Ok((key.trim().to_string(), value.into_owned()))
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::{Value, json};
+    use thiserror_ext::AsReport;
+
+    use super::Glm45MoeToolParser;
+    use crate::ToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn glm45_tool_call(function_name: &str, params: &[(&str, &str)]) -> String {
+        let params = params
+            .iter()
+            .map(|(name, value)| {
+                format!("<arg_key>{name}</arg_key>\n<arg_value>{value}</arg_value>")
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+        format!("<tool_call>{function_name}\n{params}\n</tool_call>")
+    }
+
+    #[test]
+    fn glm45_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn glm45_parse_complete_extracts_single_tool_call() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+        let output = format!(
+            "Let me search for that.\n{}",
+            glm45_tool_call(
+                "get_weather",
+                &[("city", "Beijing"), ("date", "2024-12-25")]
+            )
+        );
+
+        let result = parser.parse_complete(&output).unwrap();
+
+        assert_eq!(result.normal_text, "Let me search for that.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({"city": "Beijing", "date": "2024-12-25"})
+        );
+    }
+
+    #[test]
+    fn glm45_streaming_extracts_multiple_tool_calls() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+        let output = format!(
+            "{}\n{}",
+            glm45_tool_call("get_weather", &[("city", "Shanghai")]),
+            glm45_tool_call("add", &[("x", "1"), ("y", "2")])
+        );
+
+        let chunks = split_by_chars(&output, 11);
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[1].name.as_deref(), Some("add"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({"x": 1, "y": 2})
+        );
+    }
+
+    #[test]
+    fn glm45_parse_complete_unescapes_literal_closing_tags_in_arg_value() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&glm45_tool_call(
+                "get_weather",
+                &[
+                    ("city", "Paris &lt;/arg_value&gt;&lt;/tool_call&gt;"),
+                    ("date", "2026-05-08"),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "city": "Paris </arg_value></tool_call>",
+                "date": "2026-05-08",
+            })
+        );
+    }
+
+    #[test]
+    fn glm45_streaming_without_tool_call_emits_text_incrementally() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &["hello ", "world"]);
+
+        assert_eq!(result.normal_text, "hello world");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn glm45_streaming_preserves_prefix_text() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "Prefix ",
+                &glm45_tool_call("get_weather", &[("city", "Hangzhou")]),
+            ],
+        );
+
+        assert_eq!(result.normal_text, "Prefix ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn glm45_streaming_handles_start_token_split_across_chunks() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "hello <tool",
+                "_call>get_weather\n",
+                "<arg_key>city</arg_key><arg_value>Paris</arg_value></tool_call>",
+            ],
+        );
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+    }
+
+    #[test]
+    fn glm45_streaming_does_not_emit_incomplete_tool_call() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        let result = parser.push("<tool_call>get_weather\n<arg_key>city</arg_key>").unwrap();
+
+        assert_eq!(result.normal_text, "");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn glm45_finish_fails_incomplete_tool_call() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        parser.push("<tool_call>get_weather\n<arg_key>city</arg_key>").unwrap();
+        let error = parser.finish().unwrap_err();
+
+        assert!(error.as_report().to_string().contains("incomplete GLM MoE tool call"));
+    }
+
+    #[test]
+    fn glm45_malformed_tool_call_fails_fast() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        let error = parser.push("<tool_call>get_weather<arg_key>city</arg_key><arg_value>Paris</arg_value></tool_call>").unwrap_err();
+
+        assert!(error.as_report().to_string().contains("tool parser parsing failed"));
+    }
+
+    #[test]
+    fn glm45_streaming_ignores_trailing_text_after_tool_calls() {
+        let mut parser = Glm45MoeToolParser::new(&test_tools());
+
+        let result = collect_stream(
+            &mut parser,
+            &[&format!(
+                "{}<|endoftext|>",
+                glm45_tool_call("get_weather", &[("city", "Paris")])
+            )],
+        );
+
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.calls.len(), 1);
+    }
+}
diff --git a/rust/src/tool-parser/src/json/hermes.rs b/rust/src/tool-parser/src/json/hermes.rs
new file mode 100644
index 000000000000..d57c7de8ef33
--- /dev/null
+++ b/rust/src/tool-parser/src/json/hermes.rs
@@ -0,0 +1,221 @@
+use super::{JsonToolCallConfig, JsonToolCallParser, JsonToolCallWhitespace};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+const HERMES_CONFIG: JsonToolCallConfig = JsonToolCallConfig {
+    parser_name: "Hermes",
+    start_marker: "<tool_call>",
+    end_marker: "</tool_call>",
+    marker_whitespace: JsonToolCallWhitespace::Optional,
+    delimiter: None,
+    name_key: "name",
+    arguments_key: "arguments",
+};
+
+/// Tool parser for Hermes XML-wrapped JSON tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <tool_call>{"name": "get_weather", "arguments": {"location":"Tokyo"}}</tool_call>
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+///
+/// Note: parallel calls are represented as repeated
+/// `<tool_call>...</tool_call>` blocks, not as multiple calls inside one tag.
+pub struct HermesToolParser {
+    inner: JsonToolCallParser,
+}
+
+impl HermesToolParser {
+    /// Create a Hermes tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            inner: JsonToolCallParser::new(HERMES_CONFIG),
+        }
+    }
+}
+
+impl ToolParser for HermesToolParser {
+    /// Create a boxed Hermes tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the Hermes parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.inner.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.inner.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::HermesToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn build_tool_call(function_name: &str, arguments: &str) -> String {
+        format!(r#"<tool_call>{{"name":"{function_name}","arguments":{arguments}}}</tool_call>"#)
+    }
+
+    #[test]
+    fn hermes_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn hermes_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Let me check.\n{}",
+                build_tool_call("get_weather", arguments)
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Let me check.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn hermes_accepts_newline_after_tool_call_start() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                r#"<tool_call>
+{"name":"get_weather","arguments":{}}</tool_call>"#,
+            )
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+    }
+
+    #[test]
+    fn hermes_does_not_validate_or_normalize_arguments() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        let arguments = r#"{"location":"Tokyo",}"#;
+        let result = parser.parse_complete(&build_tool_call("get_weather", arguments)).unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn hermes_streaming_emits_argument_deltas() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        let chunks = [
+            "preface <tool",
+            "_call>{\"name\":\"get_weather\",\"arguments\":",
+            "{\"location\":",
+            "\"Beijing\"",
+            "}",
+            "}</tool_call> suffix",
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(result.normal_text, "preface  suffix");
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn hermes_streaming_handles_split_markers() {
+        let input = format!(
+            "hello {}",
+            build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+        );
+        let chunks = split_by_chars(&input, 5);
+        let mut parser = HermesToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn hermes_streaming_extracts_multiple_tool_calls() {
+        let input = format!(
+            "{}{}",
+            build_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            build_tool_call("add", r#"{"x":1,"y":2}"#),
+        );
+        let chunks = split_by_chars(&input, 7);
+        let mut parser = HermesToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn hermes_finish_fails_incomplete_tool_call() {
+        let mut parser = HermesToolParser::new(&test_tools());
+        parser
+            .push(r#"<tool_call>{"name":"get_weather","arguments":{"location""#)
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete Hermes tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/json/llama.rs b/rust/src/tool-parser/src/json/llama.rs
new file mode 100644
index 000000000000..3e579be72fbf
--- /dev/null
+++ b/rust/src/tool-parser/src/json/llama.rs
@@ -0,0 +1,487 @@
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::seq;
+use winnow::error::{ModalResult, StrContext};
+use winnow::prelude::*;
+use winnow::token::literal;
+
+use super::{
+    JsonToolCallConfig, JsonToolCallEvent, JsonToolCallWhitespace, JsonToolInput,
+    argument_delta_event, tool_call_header_event,
+};
+use crate::utils::{JsonObjectScanState, parse_buffered_event};
+use crate::{Result, Tool, ToolCallDelta, ToolParseResult, ToolParser};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum LlamaJsonMode {
+    Start,
+    Header,
+    Arguments { json_scan: JsonObjectScanState },
+    AfterCall,
+    Passthrough,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum LlamaJsonEvent {
+    ToolCallHeader { function_name: String },
+    Arguments { len: usize },
+    ToolCallClose,
+    Separator,
+}
+
+/// Tool parser for strict Llama JSON-template tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// {"name":"get_weather","parameters":{"location":"Tokyo"}}; {"name":"add","parameters":{"x":1,"y":2}}
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+///
+/// Natural text at the beginning of the stream permanently disables tool
+/// parsing for that assistant output.
+pub struct Llama3JsonToolParser {
+    buffer: String,
+    mode: LlamaJsonMode,
+    active_tool_index: Option<usize>,
+    emitted_tool_count: usize,
+}
+
+impl Llama3JsonToolParser {
+    /// Create a Llama JSON tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: LlamaJsonMode::Start,
+            active_tool_index: None,
+            emitted_tool_count: 0,
+        }
+    }
+
+    /// Commit the stream to JSON parsing or permanent passthrough.
+    fn commit_start(&mut self) -> bool {
+        if !matches!(self.mode, LlamaJsonMode::Start) {
+            return true;
+        }
+
+        if self.buffer.is_empty() {
+            return false;
+        }
+
+        if self.buffer.starts_with('{') {
+            self.mode = LlamaJsonMode::Header;
+        } else {
+            self.mode = LlamaJsonMode::Passthrough;
+        }
+        true
+    }
+
+    /// Apply one parsed Llama JSON event to parser state and output.
+    fn apply_event(&mut self, event: LlamaJsonEvent, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            LlamaJsonEvent::ToolCallHeader { function_name } => {
+                let tool_index = self.emitted_tool_count;
+                self.emitted_tool_count += 1;
+                self.active_tool_index = Some(tool_index);
+                self.mode = LlamaJsonMode::Arguments {
+                    json_scan: JsonObjectScanState::default(),
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: Some(function_name),
+                    arguments: String::new(),
+                });
+            }
+            LlamaJsonEvent::Arguments { len: consumed_len } => {
+                let Some(tool_index) = self.active_tool_index else {
+                    return Err(parsing_failed!(
+                        "Llama JSON arguments without an active tool call"
+                    ));
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: None,
+                    arguments: self.buffer[..consumed_len].to_string(),
+                });
+            }
+            LlamaJsonEvent::ToolCallClose => {
+                self.active_tool_index = None;
+                self.mode = LlamaJsonMode::AfterCall;
+            }
+            LlamaJsonEvent::Separator => {
+                self.active_tool_index = None;
+                self.mode = LlamaJsonMode::Header;
+            }
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = LlamaJsonMode::Start;
+        self.active_tool_index = None;
+        self.emitted_tool_count = 0;
+    }
+}
+
+impl ToolParser for Llama3JsonToolParser {
+    /// Create a boxed Llama JSON tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the Llama JSON parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        if !self.commit_start() {
+            return Ok(result);
+        }
+
+        if matches!(self.mode, LlamaJsonMode::Passthrough) {
+            result.normal_text.push_str(&self.buffer);
+            self.buffer.clear();
+            return Ok(result);
+        }
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_llama_json_event(input, &mut self.mode)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match &self.mode {
+            LlamaJsonMode::Start | LlamaJsonMode::Passthrough => {
+                result.normal_text.push_str(&self.buffer);
+            }
+            LlamaJsonMode::AfterCall if self.buffer.trim().is_empty() => {}
+            LlamaJsonMode::Header | LlamaJsonMode::Arguments { .. } => {
+                return Err(parsing_failed!("incomplete Llama JSON tool call"));
+            }
+            LlamaJsonMode::AfterCall => {
+                return Err(parsing_failed!("invalid Llama JSON"));
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a Llama JSON event for the current parser mode.
+fn parse_next_llama_json_event(
+    input: &mut JsonToolInput<'_>,
+    mode: &mut LlamaJsonMode,
+) -> ModalResult<LlamaJsonEvent> {
+    match mode {
+        LlamaJsonMode::Start | LlamaJsonMode::Passthrough => {
+            unreachable!("Llama JSON parser driver must commit before parsing events")
+        }
+        LlamaJsonMode::Header => llama_tool_call_header_event(input),
+        LlamaJsonMode::Arguments { json_scan } => parse_llama_arguments_event(input, json_scan),
+        LlamaJsonMode::AfterCall => after_call_event(input),
+    }
+}
+
+/// Parse a Llama JSON tool-call header.
+fn llama_tool_call_header_event(input: &mut JsonToolInput<'_>) -> ModalResult<LlamaJsonEvent> {
+    const CONFIG: JsonToolCallConfig = JsonToolCallConfig {
+        parser_name: "Llama JSON",
+        start_marker: "",
+        end_marker: "",
+        marker_whitespace: JsonToolCallWhitespace::Optional,
+        delimiter: Some(";"),
+        name_key: "name",
+        arguments_key: "parameters",
+    };
+
+    match tool_call_header_event(input, CONFIG)? {
+        JsonToolCallEvent::ToolCallHeader { function_name } => {
+            Ok(LlamaJsonEvent::ToolCallHeader { function_name })
+        }
+        _ => unreachable!("tool_call_header_event only emits ToolCallHeader"),
+    }
+}
+
+/// Parse one event inside a Llama JSON arguments payload.
+fn parse_llama_arguments_event(
+    input: &mut JsonToolInput<'_>,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<LlamaJsonEvent> {
+    if json_scan.complete() {
+        tool_call_close_event(input)
+    } else {
+        match argument_delta_event(input, json_scan)? {
+            JsonToolCallEvent::Arguments { len } => Ok(LlamaJsonEvent::Arguments { len }),
+            _ => unreachable!("argument_delta_event only emits Arguments"),
+        }
+    }
+}
+
+/// Parse the outer closing brace for one Llama JSON tool call.
+fn tool_call_close_event(input: &mut JsonToolInput<'_>) -> ModalResult<LlamaJsonEvent> {
+    literal("}").value(LlamaJsonEvent::ToolCallClose).parse_next(input)
+}
+
+/// Parse a semicolon separator after one Llama JSON tool call.
+fn after_call_event(input: &mut JsonToolInput<'_>) -> ModalResult<LlamaJsonEvent> {
+    seq!(
+        _: ws0,
+        _: literal(";"),
+        _: ws0,
+    )
+    .value(LlamaJsonEvent::Separator)
+    .context(StrContext::Label("Llama JSON"))
+    .parse_next(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::Llama3JsonToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn build_tool_call(function_name: &str, parameters: &str) -> String {
+        format!(r#"{{"name":"{function_name}","parameters":{parameters}}}"#)
+    }
+
+    #[test]
+    fn llama_json_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn llama_json_passthrough_never_reenters_tool_parsing() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let mut result = parser.push("plain text first ").unwrap();
+        result.append(
+            parser.push(&build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)).unwrap(),
+        );
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(
+            result.normal_text,
+            r#"plain text first {"name":"get_weather","parameters":{"location":"Tokyo"}}"#
+        );
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn llama_json_does_not_support_python_tag_prefix() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let input = format!(
+            "<|python_tag|>{}",
+            build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+        );
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.normal_text, input);
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn llama_json_rejects_leading_whitespace_before_tool_call() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let input = format!(
+            "\n  {}",
+            build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+        );
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.normal_text, input);
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn llama_json_extracts_raw_parameters_object() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": 3 }"#;
+        let result = parser.parse_complete(&build_tool_call("get_weather", arguments)).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn llama_json_rejects_arguments_key() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let error = parser
+            .parse_complete(r#"{"name":"get_weather","arguments":{"location":"Tokyo"}}"#)
+            .unwrap_err();
+
+        expect![[r#"
+            tool parser parsing failed: invalid Llama JSON
+            expected `parameters`"#]]
+        .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn llama_json_extracts_multiple_semicolon_separated_calls() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let input = format!(
+            "{} \n; {}",
+            build_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            build_tool_call("add", r#"{"x":1,"y":2}"#),
+        );
+        let result = parser.parse_complete(&input).unwrap();
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn llama_json_streaming_emits_argument_deltas() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let chunks = [
+            "{\"name\":\"get_weather\",\"parameters\":",
+            "{\"location\":",
+            "\"Beijing\"",
+            "}}",
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn llama_json_streaming_handles_split_objects_and_separator() {
+        let input = format!(
+            "{};{}",
+            build_tool_call("get_weather", r#"{"location":"Dallas","state":"TX"}"#),
+            build_tool_call("add", r#"{"x":4,"y":5}"#),
+        );
+        let chunks = split_by_chars(&input, 6);
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(
+            result.calls[0].arguments,
+            r#"{"location":"Dallas","state":"TX"}"#
+        );
+        assert_eq!(result.calls[1].name.as_deref(), Some("add"));
+        assert_eq!(result.calls[1].arguments, r#"{"x":4,"y":5}"#);
+    }
+
+    #[test]
+    fn llama_json_handles_nested_multiline_and_escaped_string_parameters() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let arguments = r#"{
+  "payload": {"items": [1, {"value": "literal { brace } and \"quote\""}]},
+  "flag": true
+}"#;
+        let result = parser.parse_complete(&build_tool_call("convert", arguments)).unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn llama_json_keeps_trailing_whitespace_after_tool_call() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&format!(
+                "{}\n\t ",
+                build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn llama_json_finish_fails_incomplete_tool_call() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        parser.push(r#"{"name":"get_weather","parameters":{"location""#).unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete Llama JSON tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn llama_json_malformed_field_order_fails_fast() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let error = parser.push(r#"{"parameters":{},"name":"get_weather"}"#).unwrap_err();
+
+        expect![[r#"
+            tool parser parsing failed: invalid Llama JSON
+            expected `name`"#]]
+        .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn llama_json_trailing_non_separator_content_errors() {
+        let mut parser = Llama3JsonToolParser::new(&test_tools());
+        let error = parser
+            .push(&format!(
+                "{} trailing",
+                build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+            ))
+            .unwrap_err();
+
+        expect!["tool parser parsing failed: invalid Llama JSON"]
+            .assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/json/mistral.rs b/rust/src/tool-parser/src/json/mistral.rs
new file mode 100644
index 000000000000..ac5dea804cc6
--- /dev/null
+++ b/rust/src/tool-parser/src/json/mistral.rs
@@ -0,0 +1,240 @@
+use super::{JsonToolCallConfig, JsonToolCallParser, JsonToolCallWhitespace};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+const MISTRAL_CONFIG: JsonToolCallConfig = JsonToolCallConfig {
+    parser_name: "Mistral",
+    start_marker: "[TOOL_CALLS] [",
+    end_marker: "]",
+    marker_whitespace: JsonToolCallWhitespace::Optional,
+    delimiter: Some(","),
+    name_key: "name",
+    arguments_key: "arguments",
+};
+
+/// Tool parser for Mistral JSON-array tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// [TOOL_CALLS] [{"name": "get_weather", "arguments": {"location":"Tokyo"}}]
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+pub struct MistralToolParser {
+    inner: JsonToolCallParser,
+}
+
+impl MistralToolParser {
+    /// Create a Mistral tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            inner: JsonToolCallParser::new(MISTRAL_CONFIG),
+        }
+    }
+}
+
+impl ToolParser for MistralToolParser {
+    /// Create a boxed Mistral tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the Mistral parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.inner.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.inner.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::MistralToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn build_tool_call(function_name: &str, arguments: &str) -> String {
+        format!(r#"{{"name":"{function_name}","arguments":{arguments}}}"#)
+    }
+
+    fn build_tool_calls(tool_calls: &[String]) -> String {
+        format!("[TOOL_CALLS] [{}]", tool_calls.join(","))
+    }
+
+    #[test]
+    fn mistral_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn mistral_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Let me check.\n{}",
+                build_tool_calls(&[build_tool_call("get_weather", arguments)])
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Let me check.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn mistral_parse_complete_extracts_pretty_multiple_tool_calls() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                r#"I'll help.
+[TOOL_CALLS] [
+    {"name": "get_weather", "arguments": {"city": "Tokyo", "units": "celsius"}}
+    ,
+    {"name": "add", "arguments": {"x": 1, "y": 2}}
+]"#,
+            )
+            .unwrap();
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "I'll help.\n",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"city\": \"Tokyo\", \"units\": \"celsius\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\": 1, \"y\": 2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn mistral_does_not_validate_or_normalize_arguments() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let arguments = r#"{"location":"Tokyo",}"#;
+        let result = parser
+            .parse_complete(&build_tool_calls(&[build_tool_call(
+                "get_weather",
+                arguments,
+            )]))
+            .unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn mistral_streaming_emits_argument_deltas() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let chunks = [
+            "preface [TOOL",
+            "_CALLS] [{\"name\":\"get_weather\",\"arguments\":",
+            "{\"location\":",
+            "\"Beijing\"",
+            "}",
+            "}] suffix",
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(result.normal_text, "preface  suffix");
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn mistral_streaming_handles_split_markers() {
+        let input = format!(
+            "hello {}",
+            build_tool_calls(&[build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)])
+        );
+        let chunks = split_by_chars(&input, 5);
+        let mut parser = MistralToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn mistral_keeps_array_bracket_literal_inside_json_string() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let arguments = r#"{"text":"Array notation: arr[0] = value[1]"}"#;
+        let result = parser
+            .parse_complete(&build_tool_calls(&[build_tool_call("echo", arguments)]))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn mistral_finish_fails_incomplete_tool_call() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        parser
+            .push(r#"[TOOL_CALLS] [{"name":"get_weather","arguments":{"location""#)
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete Mistral tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn mistral_malformed_field_order_fails_fast() {
+        let mut parser = MistralToolParser::new(&test_tools());
+        let error = parser
+            .push(r#"[TOOL_CALLS] [{"arguments":{},"name":"get_weather"}]"#)
+            .unwrap_err();
+
+        expect![[r#"
+            tool parser parsing failed: invalid Mistral
+            expected `name`"#]]
+        .assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/json/mod.rs b/rust/src/tool-parser/src/json/mod.rs
new file mode 100644
index 000000000000..ce96fbbde700
--- /dev/null
+++ b/rust/src/tool-parser/src/json/mod.rs
@@ -0,0 +1,465 @@
+//! Shared parser core for JSON tool calls wrapped by text markers.
+
+pub use hermes::HermesToolParser;
+pub use llama::Llama3JsonToolParser;
+pub use mistral::MistralToolParser;
+pub use qwen::Qwen3XmlToolParser;
+
+mod hermes;
+mod llama;
+mod mistral;
+mod qwen;
+
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::{alt, seq};
+use winnow::error::{ModalResult, StrContext, StrContextValue};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::literal;
+
+use super::utils::{
+    JsonObjectScanState, json_str, parse_buffered_event, safe_text_len, take_json_object,
+};
+use super::{Result, ToolCallDelta, ToolParseResult};
+
+type JsonToolInput<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy)]
+struct JsonToolCallConfig {
+    parser_name: &'static str,
+    start_marker: &'static str,
+    end_marker: &'static str,
+    marker_whitespace: JsonToolCallWhitespace,
+    delimiter: Option<&'static str>,
+    name_key: &'static str,
+    arguments_key: &'static str,
+}
+
+#[derive(Debug, Clone, Copy)]
+enum JsonToolCallWhitespace {
+    Optional,
+    Exact(&'static str),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum JsonToolCallMode {
+    Text,
+    Header,
+    Arguments { json_scan: JsonObjectScanState },
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum JsonToolCallEvent {
+    Text { len: usize },
+    ToolCallStart,
+    ToolCallHeader { function_name: String },
+    Arguments { len: usize },
+    ToolCallDelimiter,
+    ToolCallEnd,
+}
+
+/// Tool parser core for marker-wrapped JSON tool calls.
+#[derive(Debug)]
+struct JsonToolCallParser {
+    config: JsonToolCallConfig,
+    buffer: String,
+    mode: JsonToolCallMode,
+    active_tool_index: Option<usize>,
+    emitted_tool_count: usize,
+}
+
+impl JsonToolCallParser {
+    /// Create a marker-wrapped JSON tool-call parser.
+    fn new(config: JsonToolCallConfig) -> Self {
+        Self {
+            config,
+            buffer: String::new(),
+            mode: JsonToolCallMode::Text,
+            active_tool_index: None,
+            emitted_tool_count: 0,
+        }
+    }
+
+    /// Push one decoded text chunk through the JSON tool-call parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+        let config = self.config;
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_json_tool_call_event(input, &mut self.mode, config)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match &self.mode {
+            JsonToolCallMode::Text => result.normal_text.push_str(&self.buffer),
+            JsonToolCallMode::Header | JsonToolCallMode::Arguments { .. } => {
+                return Err(parsing_failed!(
+                    "incomplete {} tool call",
+                    self.config.parser_name
+                ));
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+
+    /// Apply one parsed JSON tool-call event to parser state and output.
+    fn apply_event(
+        &mut self,
+        event: JsonToolCallEvent,
+        result: &mut ToolParseResult,
+    ) -> Result<()> {
+        match event {
+            JsonToolCallEvent::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            JsonToolCallEvent::ToolCallStart => self.mode = JsonToolCallMode::Header,
+            JsonToolCallEvent::ToolCallHeader { function_name } => {
+                let tool_index = self.emitted_tool_count;
+                self.emitted_tool_count += 1;
+                self.active_tool_index = Some(tool_index);
+                self.mode = JsonToolCallMode::Arguments {
+                    json_scan: JsonObjectScanState::default(),
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: Some(function_name),
+                    arguments: String::new(),
+                });
+            }
+            JsonToolCallEvent::Arguments { len: consumed_len } => {
+                let Some(tool_index) = self.active_tool_index else {
+                    return Err(parsing_failed!(
+                        "{} arguments without an active tool call",
+                        self.config.parser_name
+                    ));
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: None,
+                    arguments: self.buffer[..consumed_len].to_string(),
+                });
+            }
+            JsonToolCallEvent::ToolCallDelimiter => {
+                self.active_tool_index = None;
+                self.mode = JsonToolCallMode::Header;
+            }
+            JsonToolCallEvent::ToolCallEnd => {
+                self.active_tool_index = None;
+                self.mode = JsonToolCallMode::Text;
+            }
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = JsonToolCallMode::Text;
+        self.active_tool_index = None;
+        self.emitted_tool_count = 0;
+    }
+}
+
+/// Parse a JSON tool-call event for the current parser mode.
+fn parse_next_json_tool_call_event(
+    input: &mut JsonToolInput<'_>,
+    mode: &mut JsonToolCallMode,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    match mode {
+        JsonToolCallMode::Text => parse_text_event(input, config),
+        JsonToolCallMode::Header => tool_call_header_event(input, config),
+        JsonToolCallMode::Arguments { json_scan } => {
+            parse_arguments_event(input, json_scan, config)
+        }
+    }
+}
+
+/// Parse a text-mode JSON tool-call event.
+fn parse_text_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    alt((
+        |input: &mut JsonToolInput<'_>| tool_call_start_event(input, config),
+        |input: &mut JsonToolInput<'_>| safe_text_event(input, config),
+    ))
+    .parse_next(input)
+}
+
+/// Parse a marker-wrapped JSON tool-call start marker.
+fn tool_call_start_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    seq!(
+        _: literal(config.start_marker),
+        _: |input: &mut JsonToolInput<'_>| marker_whitespace(input, config),
+    )
+    .value(JsonToolCallEvent::ToolCallStart)
+    .parse_next(input)
+}
+
+/// Parse a marker-wrapped JSON tool-call header before the raw arguments
+/// payload.
+fn tool_call_header_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    let (function_name,) = seq!(
+        _: ws0,
+        _: literal("{"),
+        _: ws0,
+        _: |input: &mut JsonToolInput<'_>| json_key(input, config.name_key),
+        _: ws0,
+        _: literal(":"),
+        _: ws0,
+        json_str,
+        _: ws0,
+        _: literal(","),
+        _: ws0,
+        _: |input: &mut JsonToolInput<'_>| json_key(input, config.arguments_key),
+        _: ws0,
+        _: literal(":"),
+        _: ws0,
+    )
+    .context(StrContext::Label(config.parser_name))
+    .parse_next(input)?;
+
+    Ok(JsonToolCallEvent::ToolCallHeader { function_name })
+}
+
+/// Parse a configured JSON object key.
+fn json_key(input: &mut JsonToolInput<'_>, key: &'static str) -> ModalResult<()> {
+    seq!(
+        _: literal("\""),
+        _: literal(key).context(StrContext::Expected(StrContextValue::StringLiteral(key))),
+        _: literal("\""),
+    )
+    .void()
+    .parse_next(input)
+}
+
+/// Parse one event inside a marker-wrapped JSON tool-call arguments payload.
+fn parse_arguments_event(
+    input: &mut JsonToolInput<'_>,
+    json_scan: &mut JsonObjectScanState,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    if json_scan.complete() {
+        tool_call_close_event(input, config)
+    } else {
+        argument_delta_event(input, json_scan)
+    }
+}
+
+/// Parse a raw JSON arguments delta.
+fn argument_delta_event(
+    input: &mut JsonToolInput<'_>,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<JsonToolCallEvent> {
+    take_json_object(input, json_scan).map(|len| JsonToolCallEvent::Arguments { len })
+}
+
+/// Parse a marker-wrapped JSON tool-call close marker.
+fn tool_call_close_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    let _ = literal("}").parse_next(input)?;
+
+    match config.delimiter {
+        Some(delimiter) => alt((
+            |input: &mut JsonToolInput<'_>| tool_call_end_event(input, config),
+            |input: &mut JsonToolInput<'_>| tool_call_delimiter_event(input, delimiter),
+        ))
+        .parse_next(input),
+        None => tool_call_end_event(input, config),
+    }
+}
+
+/// Parse a marker-wrapped JSON tool-call end marker.
+fn tool_call_end_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    seq!(
+        _: |input: &mut JsonToolInput<'_>| marker_whitespace(input, config),
+        _: literal(config.end_marker),
+    )
+    .value(JsonToolCallEvent::ToolCallEnd)
+    .parse_next(input)
+}
+
+/// Parse a delimiter between JSON tool calls inside one marker block.
+fn tool_call_delimiter_event(
+    input: &mut JsonToolInput<'_>,
+    delimiter: &'static str,
+) -> ModalResult<JsonToolCallEvent> {
+    seq!(
+        _: ws0,
+        _: literal(delimiter),
+        _: ws0,
+    )
+    .value(JsonToolCallEvent::ToolCallDelimiter)
+    .parse_next(input)
+}
+
+/// Parse configured whitespace around a marker-wrapped JSON tool call.
+fn marker_whitespace(input: &mut JsonToolInput<'_>, config: JsonToolCallConfig) -> ModalResult<()> {
+    match config.marker_whitespace {
+        JsonToolCallWhitespace::Optional => ws0.void().parse_next(input),
+        JsonToolCallWhitespace::Exact(whitespace) => literal(whitespace).void().parse_next(input),
+    }
+}
+
+/// Parse a safe text run before the next marker-wrapped JSON tool call.
+fn safe_text_event(
+    input: &mut JsonToolInput<'_>,
+    config: JsonToolCallConfig,
+) -> ModalResult<JsonToolCallEvent> {
+    safe_text_len(input, config.start_marker).map(|len| JsonToolCallEvent::Text { len })
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+
+    use super::{JsonToolCallConfig, JsonToolCallParser, JsonToolCallWhitespace};
+    use crate::ToolParseResult;
+
+    const DELIMITED_CONFIG: JsonToolCallConfig = JsonToolCallConfig {
+        parser_name: "Delimited JSON",
+        start_marker: "<tool_calls>",
+        end_marker: "</tool_calls>",
+        marker_whitespace: JsonToolCallWhitespace::Optional,
+        delimiter: Some("<"),
+        name_key: "function",
+        arguments_key: "parameters",
+    };
+
+    fn build_tool_call(function_name: &str, arguments: &str) -> String {
+        format!(r#"{{"function":"{function_name}","parameters":{arguments}}}"#)
+    }
+
+    fn build_tool_calls(tool_calls: &[String]) -> String {
+        format!("<tool_calls>{}</tool_calls>", tool_calls.join(" <\n"))
+    }
+
+    fn collect_chunks(parser: &mut JsonToolCallParser, chunks: &[&str]) -> ToolParseResult {
+        let mut result = ToolParseResult::default();
+        for chunk in chunks {
+            result.append(parser.push(chunk).unwrap());
+        }
+        result.append(parser.finish().unwrap());
+        result.coalesce_calls()
+    }
+
+    #[test]
+    fn json_tool_call_delimiter_extracts_multiple_calls_in_one_block() {
+        let input = build_tool_calls(&[
+            build_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            build_tool_call("add", r#"{"x":1,"y":2}"#),
+        ]);
+        let mut parser = JsonToolCallParser::new(DELIMITED_CONFIG);
+
+        let result = collect_chunks(&mut parser, &[&input]);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn json_tool_call_delimiter_can_arrive_in_later_chunk() {
+        let mut parser = JsonToolCallParser::new(DELIMITED_CONFIG);
+        let chunks = [
+            r#"<tool_calls>{"function":"get_weather","parameters":{"location":"Shanghai"}}"#,
+            " <\n",
+            r#"{"function":"add","parameters":{"x":1,"y":2}}"#,
+            "</tool_calls>",
+        ];
+
+        let result = collect_chunks(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn json_tool_call_end_marker_wins_over_delimiter_prefix() {
+        let mut parser = JsonToolCallParser::new(DELIMITED_CONFIG);
+        let chunks = [
+            r#"<tool_calls>{"function":"get_weather","parameters":{"location":"Shanghai"}}"#,
+            " ",
+            "</tool_calls> trailing text",
+        ];
+
+        let result = collect_chunks(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: " trailing text",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+}
diff --git a/rust/src/tool-parser/src/json/qwen.rs b/rust/src/tool-parser/src/json/qwen.rs
new file mode 100644
index 000000000000..34bb190ec098
--- /dev/null
+++ b/rust/src/tool-parser/src/json/qwen.rs
@@ -0,0 +1,279 @@
+use super::{JsonToolCallConfig, JsonToolCallParser, JsonToolCallWhitespace};
+use crate::{Result, Tool, ToolParseResult, ToolParser};
+
+const QWEN_XML_CONFIG: JsonToolCallConfig = JsonToolCallConfig {
+    parser_name: "Qwen XML",
+    start_marker: "<tool_call>",
+    end_marker: "</tool_call>",
+    marker_whitespace: JsonToolCallWhitespace::Exact("\n"),
+    delimiter: None,
+    name_key: "name",
+    arguments_key: "arguments",
+};
+
+/// Tool parser for Qwen XML-wrapped JSON tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <tool_call>
+/// {"name": "get_weather", "arguments": {"location":"Tokyo"}}
+/// </tool_call>
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+///
+/// Note: parallel calls are represented as repeated
+/// `<tool_call>...</tool_call>` blocks, not as multiple calls inside one tag.
+pub struct Qwen3XmlToolParser {
+    inner: JsonToolCallParser,
+}
+
+impl Qwen3XmlToolParser {
+    /// Create a Qwen XML tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            inner: JsonToolCallParser::new(QWEN_XML_CONFIG),
+        }
+    }
+}
+
+impl ToolParser for Qwen3XmlToolParser {
+    /// Create a boxed Qwen XML tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the Qwen XML parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.inner.push(chunk)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        self.inner.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::Qwen3XmlToolParser;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+    use crate::{ToolParseResult, ToolParser};
+
+    fn build_tool_call(function_name: &str, arguments: &str) -> String {
+        format!(
+            "<tool_call>\n{{\"name\": \"{function_name}\", \"arguments\": {arguments}}}\n</tool_call>"
+        )
+    }
+
+    #[test]
+    fn qwen_xml_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn qwen_xml_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "Tokyo", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Let me check.\n{}",
+                build_tool_call("get_weather", arguments)
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Let me check.\n");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn qwen_xml_does_not_validate_or_normalize_arguments() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let arguments = r#"{"location":"Tokyo",}"#;
+        let result = parser.parse_complete(&build_tool_call("get_weather", arguments)).unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn qwen_xml_streaming_emits_argument_deltas() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let chunks = [
+            "<tool_call>",
+            "\n{\"name\": \"get_weather\", \"arguments\": ",
+            "{\"location\":",
+            "\"Beijing\"",
+            "}",
+            "}\n</tool_call>",
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Beijing\"", "}"]);
+        assert_eq!(
+            result.coalesce_calls().calls[0].arguments,
+            r#"{"location":"Beijing"}"#
+        );
+    }
+
+    #[test]
+    fn qwen_xml_streaming_handles_split_markers() {
+        let input = format!(
+            "hello {}",
+            build_tool_call("get_weather", r#"{"location":"Tokyo"}"#)
+        );
+        let chunks = split_by_chars(&input, 5);
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Tokyo"}"#);
+    }
+
+    #[test]
+    fn qwen_xml_keeps_end_marker_literal_inside_json_string() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let arguments = r#"{"text":"literal </tool_call> inside"}"#;
+        let result = parser.parse_complete(&build_tool_call("echo", arguments)).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn qwen_xml_decodes_escaped_function_name() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                r#"<tool_call>
+{"name":"say_\"hi","arguments":{}}
+</tool_call>"#,
+            )
+            .unwrap();
+
+        assert_eq!(result.calls[0].name.as_deref(), Some("say_\"hi"));
+    }
+
+    #[test]
+    fn qwen_xml_requires_newline_after_tool_call_start() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let input = r#"<tool_call>{"name":"get_weather","arguments":{}}
+</tool_call>"#;
+
+        let result = parser.parse_complete(input).unwrap();
+
+        assert_eq!(result.normal_text, input);
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn qwen_xml_requires_newline_before_tool_call_end() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let error = parser
+            .parse_complete(
+                r#"<tool_call>
+{"name":"get_weather","arguments":{}}</tool_call>"#,
+            )
+            .unwrap_err();
+
+        assert!(error.to_report_string().starts_with("tool parser parsing failed:"));
+    }
+
+    #[test]
+    fn qwen_xml_streaming_extracts_multiple_tool_calls() {
+        let input = format!(
+            "{}{}",
+            build_tool_call("get_weather", r#"{"location":"Shanghai"}"#),
+            build_tool_call("add", r#"{"x":1,"y":2}"#),
+        );
+        let chunks = split_by_chars(&input, 7);
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn qwen_xml_finish_fails_incomplete_tool_call() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        parser
+            .push(
+                r#"<tool_call>
+{"name":"get_weather","arguments":{"location""#,
+            )
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete Qwen XML tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn qwen_xml_malformed_field_order_fails_fast() {
+        let mut parser = Qwen3XmlToolParser::new(&test_tools());
+        let error = parser
+            .push(
+                r#"<tool_call>
+{"arguments":{},"name":"get_weather"}
+</tool_call>"#,
+            )
+            .unwrap_err();
+
+        expect![[r#"
+            tool parser parsing failed: invalid Qwen XML
+            expected `name`"#]]
+        .assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/kimi_k2.rs b/rust/src/tool-parser/src/kimi_k2.rs
new file mode 100644
index 000000000000..921b6e0c0114
--- /dev/null
+++ b/rust/src/tool-parser/src/kimi_k2.rs
@@ -0,0 +1,560 @@
+use winnow::ascii::{digit1, multispace0 as ws0};
+use winnow::combinator::{alt, eof, repeat, seq};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, rest, take_until, take_while};
+
+use super::utils::{JsonObjectScanState, parse_buffered_event, safe_text_len, take_json_object};
+use super::{Result, ToolCallDelta, ToolParseResult, ToolParser};
+use crate::Tool;
+
+const TOOL_CALLS_START: &str = "<|tool_calls_section_begin|>";
+const TOOL_CALLS_END: &str = "<|tool_calls_section_end|>";
+const TOOL_CALL_START: &str = "<|tool_call_begin|>";
+const TOOL_CALL_END: &str = "<|tool_call_end|>";
+const TOOL_CALL_ARGUMENT_START: &str = "<|tool_call_argument_begin|>";
+
+type KimiK2Input<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum KimiK2Mode {
+    Text,
+    ToolBlock,
+    Header,
+    Arguments { json_scan: JsonObjectScanState },
+    Done,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum KimiK2Event {
+    Text {
+        len: usize,
+    },
+    ToolCallsStart,
+    ToolCallStart,
+    ToolCallHeader {
+        function_name: String,
+        function_index: usize,
+    },
+    Arguments {
+        len: usize,
+    },
+    ToolCallEnd,
+    ToolCallsEnd,
+    IgnoredRest,
+}
+
+/// Tool parser for Kimi K2 token-delimited tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <|tool_calls_section_begin|>
+/// <|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{"location":"NYC"}<|tool_call_end|>
+/// <|tool_calls_section_end|>
+/// ```
+///
+/// Arguments are already OpenAI-style JSON text, so they are streamed as raw
+/// argument deltas without schema conversion or JSON normalization.
+pub struct KimiK2ToolParser {
+    buffer: String,
+    mode: KimiK2Mode,
+    active_tool_index: Option<usize>,
+}
+
+impl KimiK2ToolParser {
+    /// Create a Kimi K2 tool parser.
+    fn new(_tools: &[Tool]) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: KimiK2Mode::Text,
+            active_tool_index: None,
+        }
+    }
+
+    /// Apply one parsed Kimi K2 event to parser state and output.
+    fn apply_event(&mut self, event: KimiK2Event, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            KimiK2Event::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            KimiK2Event::ToolCallsStart => self.mode = KimiK2Mode::ToolBlock,
+            KimiK2Event::ToolCallStart => self.mode = KimiK2Mode::Header,
+            KimiK2Event::ToolCallHeader {
+                function_name,
+                function_index,
+            } => {
+                let tool_index = function_index;
+                self.active_tool_index = Some(tool_index);
+                self.mode = KimiK2Mode::Arguments {
+                    json_scan: JsonObjectScanState::default(),
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: Some(function_name),
+                    arguments: String::new(),
+                });
+            }
+            KimiK2Event::Arguments { len: consumed_len } => {
+                let Some(tool_index) = self.active_tool_index else {
+                    return Err(parsing_failed!(
+                        "Kimi K2 arguments without an active tool call"
+                    ));
+                };
+                result.calls.push(ToolCallDelta {
+                    tool_index,
+                    name: None,
+                    arguments: self.buffer[..consumed_len].to_string(),
+                });
+            }
+            KimiK2Event::ToolCallEnd => {
+                self.active_tool_index = None;
+                self.mode = KimiK2Mode::ToolBlock;
+            }
+            KimiK2Event::ToolCallsEnd => {
+                self.active_tool_index = None;
+                self.mode = KimiK2Mode::Done;
+            }
+            KimiK2Event::IgnoredRest => {}
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = KimiK2Mode::Text;
+        self.active_tool_index = None;
+    }
+}
+
+impl ToolParser for KimiK2ToolParser {
+    /// Create a boxed Kimi K2 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Preserve Kimi K2 special-token markers while decoding.
+    fn preserve_special_tokens(&self) -> bool {
+        true
+    }
+
+    /// Push one decoded text chunk through the Kimi K2 parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_kimi_k2_event(input, &mut self.mode)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match &self.mode {
+            KimiK2Mode::Text => result.normal_text.push_str(&self.buffer),
+            KimiK2Mode::ToolBlock | KimiK2Mode::Done => {}
+            KimiK2Mode::Header | KimiK2Mode::Arguments { .. } => {
+                return Err(parsing_failed!("incomplete Kimi K2 tool call"));
+            }
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a Kimi K2 event for the current parser mode.
+fn parse_next_kimi_k2_event(
+    input: &mut KimiK2Input<'_>,
+    mode: &mut KimiK2Mode,
+) -> ModalResult<KimiK2Event> {
+    match mode {
+        KimiK2Mode::Text => parse_text_event(input),
+        KimiK2Mode::ToolBlock => parse_tool_block_event(input),
+        KimiK2Mode::Header => tool_call_header_event(input),
+        KimiK2Mode::Arguments { json_scan } => parse_arguments_event(input, json_scan),
+        KimiK2Mode::Done => ignored_rest_event(input),
+    }
+}
+
+/// Parse a text-mode Kimi K2 event.
+fn parse_text_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    alt((tool_calls_start_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse one event inside the Kimi K2 tool-calls section.
+fn parse_tool_block_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    alt((tool_calls_end_event, tool_call_start_event)).parse_next(input)
+}
+
+/// Parse one event inside a Kimi K2 tool-call arguments payload.
+fn parse_arguments_event(
+    input: &mut KimiK2Input<'_>,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<KimiK2Event> {
+    if json_scan.complete() {
+        tool_call_end_event(input)
+    } else {
+        argument_delta_event(input, json_scan)
+    }
+}
+
+/// Parse a Kimi K2 tool-calls section start marker.
+fn tool_calls_start_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    literal(TOOL_CALLS_START).value(KimiK2Event::ToolCallsStart).parse_next(input)
+}
+
+/// Parse a Kimi K2 tool-calls section end marker.
+fn tool_calls_end_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    (ws0, literal(TOOL_CALLS_END))
+        .value(KimiK2Event::ToolCallsEnd)
+        .parse_next(input)
+}
+
+/// Parse a Kimi K2 tool-call start marker.
+fn tool_call_start_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    (ws0, literal(TOOL_CALL_START))
+        .value(KimiK2Event::ToolCallStart)
+        .parse_next(input)
+}
+
+/// Parse a Kimi K2 tool-call end marker.
+fn tool_call_end_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    literal(TOOL_CALL_END).value(KimiK2Event::ToolCallEnd).parse_next(input)
+}
+
+/// Parse a Kimi K2 tool-call header before the argument marker.
+fn tool_call_header_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    let (header, _) = (
+        take_until(1.., TOOL_CALL_ARGUMENT_START),
+        literal(TOOL_CALL_ARGUMENT_START),
+    )
+        .parse_next(input)?;
+
+    let mut header_input = header;
+    let (header, _, _) = (tool_header, ws0, eof).parse_next(&mut header_input)?;
+
+    Ok(KimiK2Event::ToolCallHeader {
+        function_name: header.function_name,
+        function_index: header.function_index,
+    })
+}
+
+/// Parse a Kimi K2 raw JSON arguments delta.
+fn argument_delta_event(
+    input: &mut KimiK2Input<'_>,
+    json_scan: &mut JsonObjectScanState,
+) -> ModalResult<KimiK2Event> {
+    take_json_object(input, json_scan).map(|len| KimiK2Event::Arguments { len })
+}
+
+/// Parse a safe text run before the next Kimi K2 tool-calls section.
+fn safe_text_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    safe_text_len(input, TOOL_CALLS_START).map(|len| KimiK2Event::Text { len })
+}
+
+/// Parse ignored rest after the Kimi K2 tool-calls section ends.
+fn ignored_rest_event(input: &mut KimiK2Input<'_>) -> ModalResult<KimiK2Event> {
+    rest.value(KimiK2Event::IgnoredRest).parse_next(input)
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct KimiK2ToolHeader {
+    function_name: String,
+    function_index: usize,
+}
+
+/// Parse a Kimi K2 tool-call header.
+fn tool_header(input: &mut &str) -> ModalResult<KimiK2ToolHeader> {
+    let (function_name, function_index) = seq!(
+        _: ws0,
+        _: namespace_prefix,
+        tool_name_segment,
+        _: literal(":"),
+        tool_call_index,
+    )
+    .parse_next(input)?;
+
+    Ok(KimiK2ToolHeader {
+        function_name: function_name.to_string(),
+        function_index,
+    })
+}
+
+/// Parse Kimi K2 namespace segments before the final tool name.
+fn namespace_prefix(input: &mut &str) -> ModalResult<()> {
+    repeat(0.., namespace_segment).parse_next(input)
+}
+
+/// Parse a Kimi K2 namespace segment.
+fn namespace_segment<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    let (segment, _) = (tool_name_segment, literal(".")).parse_next(input)?;
+    Ok(segment)
+}
+
+/// Parse a Kimi K2 tool name segment.
+fn tool_name_segment<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    take_while(1.., |ch: char| {
+        !ch.is_whitespace() && ch != '<' && ch != ':' && ch != '.'
+    })
+    .parse_next(input)
+}
+
+/// Parse a Kimi K2 tool-call index.
+fn tool_call_index(input: &mut &str) -> ModalResult<usize> {
+    digit1.parse_to().parse_next(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use thiserror_ext::AsReport;
+
+    use super::{
+        KimiK2ToolParser, TOOL_CALL_ARGUMENT_START, TOOL_CALL_END, TOOL_CALL_START, TOOL_CALLS_END,
+        TOOL_CALLS_START, ToolParser, tool_header,
+    };
+    use crate::ToolParseResult;
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn build_tool_call(function_name: &str, index: usize, arguments: &str) -> String {
+        format!(
+            "{TOOL_CALL_START}functions.{function_name}:{index}{TOOL_CALL_ARGUMENT_START}{arguments}{TOOL_CALL_END}"
+        )
+    }
+
+    fn build_tool_section(tool_calls: &[String]) -> String {
+        format!("{TOOL_CALLS_START}{}{TOOL_CALLS_END}", tool_calls.join(""))
+    }
+
+    #[test]
+    fn kimi_k2_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn kimi_k2_parse_complete_extracts_raw_json_arguments() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let arguments = r#"{ "location": "NYC", "days": "3" }"#;
+        let result = parser
+            .parse_complete(&format!(
+                "Checking. {} trailing text",
+                build_tool_section(&[build_tool_call("get_weather", 0, arguments)])
+            ))
+            .unwrap();
+
+        assert_eq!(result.normal_text, "Checking. ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn kimi_k2_does_not_validate_or_normalize_arguments() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let arguments = r#"{"location":"NYC",}"#;
+        let result = parser
+            .parse_complete(&build_tool_section(&[build_tool_call(
+                "get_weather",
+                0,
+                arguments,
+            )]))
+            .unwrap();
+
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn kimi_k2_streaming_emits_argument_deltas() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let chunks = [
+            TOOL_CALLS_START,
+            TOOL_CALL_START,
+            "functions.get_weather:0",
+            TOOL_CALL_ARGUMENT_START,
+            "{\"location\":",
+            "\"Paris\"",
+            "}",
+            TOOL_CALL_END,
+            TOOL_CALLS_END,
+        ];
+
+        let mut result = ToolParseResult::default();
+        let mut observed_arguments = Vec::new();
+        for chunk in chunks {
+            let next = parser.push(chunk).unwrap();
+            observed_arguments.extend(
+                next.calls
+                    .iter()
+                    .filter(|call| call.name.is_none())
+                    .map(|call| call.arguments.clone()),
+            );
+            result.append(next);
+        }
+        result.append(parser.finish().unwrap());
+
+        assert_eq!(observed_arguments, ["{\"location\":", "\"Paris\"", "}"]);
+        let result = result.coalesce_calls();
+        assert_eq!(result.calls[0].arguments, r#"{"location":"Paris"}"#);
+    }
+
+    #[test]
+    fn kimi_k2_streaming_holds_back_split_markers() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let chunks = [
+            "hello <|tool_calls",
+            "_section_begin|>",
+            TOOL_CALL_START,
+            "functions.get_weather:0",
+            TOOL_CALL_ARGUMENT_START,
+            r#"{"location":"NYC"}"#,
+            "<|tool_call",
+            "_end|>",
+            TOOL_CALLS_END,
+        ];
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.normal_text, "hello ");
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, r#"{"location":"NYC"}"#);
+    }
+
+    #[test]
+    fn kimi_k2_keeps_end_marker_literal_inside_json_string() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let arguments = format!(r#"{{"text":"literal {TOOL_CALL_END} inside"}}"#);
+        let input = build_tool_section(&[build_tool_call("echo", 0, &arguments)]);
+
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].arguments, arguments);
+    }
+
+    #[test]
+    fn kimi_k2_streaming_keeps_split_end_marker_literal_inside_json_string() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let chunks = [
+            TOOL_CALLS_START,
+            TOOL_CALL_START,
+            "functions.echo:0",
+            TOOL_CALL_ARGUMENT_START,
+            r#"{"text":"literal <|tool"#,
+            r#"_call_end|> inside"}"#,
+            TOOL_CALL_END,
+            TOOL_CALLS_END,
+        ];
+
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            result.calls[0].arguments,
+            r#"{"text":"literal <|tool_call_end|> inside"}"#
+        );
+    }
+
+    #[test]
+    fn kimi_k2_streaming_extracts_multiple_tool_calls() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let input = build_tool_section(&[
+            build_tool_call("get_weather", 0, r#"{"location":"Shanghai"}"#),
+            build_tool_call("add", 1, r#"{"x":1,"y":2}"#),
+        ]);
+
+        let chunks = split_by_chars(&input, 7);
+        let result = collect_stream(&mut parser, &chunks);
+
+        expect![[r#"
+            ToolParseResult {
+                normal_text: "",
+                calls: [
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some(
+                            "get_weather",
+                        ),
+                        arguments: "{\"location\":\"Shanghai\"}",
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some(
+                            "add",
+                        ),
+                        arguments: "{\"x\":1,\"y\":2}",
+                    },
+                ],
+            }
+        "#]]
+        .assert_debug_eq(&result);
+    }
+
+    #[test]
+    fn kimi_k2_accepts_non_functions_header_prefix() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let input = format!(
+            "{TOOL_CALLS_START}{TOOL_CALL_START}api.tools.search:42{TOOL_CALL_ARGUMENT_START}{{}}{TOOL_CALL_END}{TOOL_CALLS_END}"
+        );
+
+        let result = parser.parse_complete(&input).unwrap();
+
+        assert_eq!(result.calls[0].tool_index, 42);
+        assert_eq!(result.calls[0].name.as_deref(), Some("search"));
+        assert_eq!(result.calls[0].arguments, "{}");
+    }
+
+    #[test]
+    fn kimi_k2_tool_header_parses_namespace_function_and_index() {
+        let mut input = "api.tools.search:42";
+        let header = tool_header(&mut input).unwrap();
+
+        expect![[r#"
+            KimiK2ToolHeader {
+                function_name: "search",
+                function_index: 42,
+            }
+        "#]]
+        .assert_debug_eq(&header);
+    }
+
+    #[test]
+    fn kimi_k2_finish_fails_incomplete_tool_call() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        parser
+            .push(&format!(
+                "{TOOL_CALLS_START}{TOOL_CALL_START}functions.get_weather:0{TOOL_CALL_ARGUMENT_START}{{\"location\""
+            ))
+            .unwrap();
+
+        let error = parser.finish().unwrap_err();
+
+        expect!["tool parser parsing failed: incomplete Kimi K2 tool call"]
+            .assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn kimi_k2_malformed_header_fails_fast() {
+        let mut parser = KimiK2ToolParser::new(&test_tools());
+        let input =
+            format!("{TOOL_CALLS_START}{TOOL_CALL_START}get_weather{TOOL_CALL_ARGUMENT_START}{{}}");
+
+        let error = parser.push(&input).unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/lib.rs b/rust/src/tool-parser/src/lib.rs
new file mode 100644
index 000000000000..f7d3411f3b7c
--- /dev/null
+++ b/rust/src/tool-parser/src/lib.rs
@@ -0,0 +1,141 @@
+//! Streaming tool parsers for chat completions.
+
+#[macro_use]
+mod error;
+mod deepseek_dsml;
+mod deepseek_json;
+mod gemma4;
+mod glm_xml;
+mod json;
+mod kimi_k2;
+mod minimax_m2;
+mod parameters;
+mod qwen_coder;
+#[cfg(any(test, feature = "test-util"))]
+pub mod test_utils;
+mod utils;
+
+use std::collections::{BTreeMap, btree_map};
+
+pub use deepseek_dsml::{DeepSeekV4ToolParser, DeepSeekV32ToolParser};
+pub use deepseek_json::{DeepSeekV3ToolParser, DeepSeekV31ToolParser};
+pub use error::{Result, ToolParserError};
+pub use gemma4::Gemma4ToolParser;
+pub use glm_xml::{Glm45MoeToolParser, Glm47MoeToolParser};
+pub use json::{HermesToolParser, Llama3JsonToolParser, MistralToolParser, Qwen3XmlToolParser};
+pub use kimi_k2::KimiK2ToolParser;
+pub use minimax_m2::MinimaxM2ToolParser;
+pub use qwen_coder::Qwen3CoderToolParser;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+/// One function-style tool made available to the model.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Tool {
+    pub name: String,
+    pub description: Option<String>,
+    pub parameters: Value,
+    pub strict: Option<bool>,
+}
+
+/// One tool-call update emitted while parsing assistant text.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ToolCallDelta {
+    /// Stable parser-local tool index for this call within one assistant turn.
+    pub tool_index: usize,
+    /// Function name, present on the first update for one tool call.
+    pub name: Option<String>,
+    /// Arguments text contributed by this update.
+    pub arguments: String,
+}
+
+/// Result of advancing tool parsing with one assistant-text input.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct ToolParseResult {
+    /// Plain assistant text that is not part of any tool call.
+    pub normal_text: String,
+    /// Tool-call updates extracted from this input.
+    pub calls: Vec<ToolCallDelta>,
+}
+
+impl ToolParseResult {
+    /// Append another parser result onto this one.
+    ///
+    /// Note that this does not attempt to merge multiple deltas for the same
+    /// tool call into one complete item. Call `coalesce_calls()` after if
+    /// that behavior is desired.
+    pub(crate) fn append(&mut self, mut other: Self) {
+        self.normal_text.push_str(&other.normal_text);
+        self.calls.append(&mut other.calls);
+    }
+
+    /// Merge multiple deltas for the same tool call into one complete item.
+    ///
+    /// This is primarily used by the default `parse_complete()` implementation,
+    /// which delegates through the incremental parser lifecycle and then
+    /// needs to collapse streaming-style argument fragments into one final
+    /// tool call.
+    pub(crate) fn coalesce_calls(mut self) -> Self {
+        let mut merged = BTreeMap::<usize, ToolCallDelta>::new();
+        let mut order = Vec::new();
+
+        for call in self.calls {
+            match merged.entry(call.tool_index) {
+                btree_map::Entry::Vacant(entry) => {
+                    order.push(call.tool_index);
+                    entry.insert(call);
+                }
+                btree_map::Entry::Occupied(mut entry) => {
+                    let existing = entry.get_mut();
+                    if existing.name.is_none() {
+                        existing.name = call.name;
+                    }
+                    existing.arguments.push_str(&call.arguments);
+                }
+            }
+        }
+
+        self.calls =
+            order.into_iter().filter_map(|tool_index| merged.remove(&tool_index)).collect();
+        self
+    }
+}
+
+/// Incremental parser that extracts tool calls from assistant output.
+pub trait ToolParser: Send {
+    /// Construct a boxed parser instance for one request stream.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static;
+
+    /// Return whether decoded output must preserve tokenizer special tokens.
+    ///
+    /// Some model families emit tool-call sentinels as special tokens. Those
+    /// parsers need `skip_special_tokens = false` while parsing is enabled.
+    fn preserve_special_tokens(&self) -> bool {
+        false
+    }
+
+    /// Feed one decoded text delta into the parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult>;
+
+    /// Flush any buffered partial state at end of stream.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        Ok(ToolParseResult::default())
+    }
+
+    /// Parse complete tool calls from final output.
+    ///
+    /// The default implementation reuses the incremental parser lifecycle by
+    /// feeding the full output through `push()` and then calling `finish()`.
+    /// This keeps one source of truth for robust parsers whose incremental
+    /// state machine is equivalent across arbitrary chunking.
+    fn parse_complete(&mut self, output: &str) -> Result<ToolParseResult> {
+        let mut result = self.push(output)?;
+        result.append(self.finish()?);
+        Ok(result.coalesce_calls())
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/rust/src/tool-parser/src/minimax_m2.rs b/rust/src/tool-parser/src/minimax_m2.rs
new file mode 100644
index 000000000000..34f2bc89b11e
--- /dev/null
+++ b/rust/src/tool-parser/src/minimax_m2.rs
@@ -0,0 +1,519 @@
+use winnow::ascii::{multispace0 as ws0, multispace1 as ws1};
+use winnow::combinator::{alt, delimited, repeat, seq, terminated};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, rest, take_until};
+
+use super::parameters::ToolSchemas;
+use super::utils::{parse_buffered_event, safe_text_len, xml_unescape};
+use super::{Result, ToolCallDelta, ToolParseResult, ToolParser};
+use crate::Tool;
+
+const TOOL_CALL_START: &str = "<minimax:tool_call>";
+const TOOL_CALL_END: &str = "</minimax:tool_call>";
+const INVOKE_START: &str = "<invoke";
+const INVOKE_END: &str = "</invoke>";
+const PARAMETER_START: &str = "<parameter";
+const PARAMETER_END: &str = "</parameter>";
+
+type MinimaxM2Input<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum MinimaxM2Mode {
+    Text,
+    ToolBlock,
+    Done,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum MinimaxM2Event {
+    Text {
+        len: usize,
+    },
+    ToolBlockStart,
+    Invoke {
+        name: String,
+        raw_params: Vec<(String, String)>,
+    },
+    ToolBlockEnd,
+    IgnoredRest,
+}
+
+/// Tool parser for MiniMax M2 XML-style tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <minimax:tool_call><invoke name="get_weather">
+/// <parameter name="city">Seattle</parameter>
+/// </invoke></minimax:tool_call>
+/// ```
+///
+/// Arguments are emitted only after a full `<invoke>` block is parsed.
+pub struct MinimaxM2ToolParser {
+    buffer: String,
+    mode: MinimaxM2Mode,
+    emitted_tool_count: usize,
+    tool_parameters: ToolSchemas,
+}
+
+impl MinimaxM2ToolParser {
+    /// Create a MiniMax M2 tool parser.
+    fn new(tools: &[Tool]) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: MinimaxM2Mode::Text,
+            emitted_tool_count: 0,
+            tool_parameters: ToolSchemas::from_tools(tools),
+        }
+    }
+
+    /// Apply one parsed MiniMax M2 event to parser state and output.
+    fn apply_event(&mut self, event: MinimaxM2Event, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            MinimaxM2Event::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            MinimaxM2Event::ToolBlockStart => self.mode = MinimaxM2Mode::ToolBlock,
+            MinimaxM2Event::Invoke { name, raw_params } => {
+                let arguments = self.tool_parameters.convert_params_with_schema(&name, raw_params);
+                let arguments = serde_json::to_string(&arguments)
+                    .map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
+
+                result.calls.push(ToolCallDelta {
+                    tool_index: self.emitted_tool_count,
+                    name: Some(name),
+                    arguments,
+                });
+                self.emitted_tool_count += 1;
+            }
+            MinimaxM2Event::ToolBlockEnd => self.mode = MinimaxM2Mode::Done,
+            MinimaxM2Event::IgnoredRest => {}
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = MinimaxM2Mode::Text;
+        self.emitted_tool_count = 0;
+    }
+}
+
+impl ToolParser for MinimaxM2ToolParser {
+    /// Create a boxed MiniMax M2 tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the MiniMax M2 parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_minimax_m2_event(input, self.mode)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        match self.mode {
+            MinimaxM2Mode::Text => {
+                result.normal_text.push_str(&self.buffer);
+            }
+            MinimaxM2Mode::ToolBlock => {
+                return Err(parsing_failed!("incomplete MiniMax M2 tool call"));
+            }
+            MinimaxM2Mode::Done => {}
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a MiniMax M2 event for the current parser mode.
+fn parse_next_minimax_m2_event(
+    input: &mut MinimaxM2Input<'_>,
+    mode: MinimaxM2Mode,
+) -> ModalResult<MinimaxM2Event> {
+    match mode {
+        MinimaxM2Mode::Text => parse_text_event(input),
+        MinimaxM2Mode::ToolBlock => parse_tool_block_event(input),
+        MinimaxM2Mode::Done => ignored_rest_event(input),
+    }
+}
+
+/// Parse a text-mode MiniMax M2 event.
+fn parse_text_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    alt((tool_block_start_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse a MiniMax M2 tool-block start marker.
+fn tool_block_start_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    literal(TOOL_CALL_START).value(MinimaxM2Event::ToolBlockStart).parse_next(input)
+}
+
+/// Parse a safe text run before the next MiniMax M2 marker.
+fn safe_text_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    safe_text_len(input, TOOL_CALL_START).map(|len| MinimaxM2Event::Text { len })
+}
+
+/// Parse one event inside a MiniMax M2 tool block.
+fn parse_tool_block_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    alt((tool_block_end_event, invoke_event)).parse_next(input)
+}
+
+/// Parse a MiniMax M2 tool-block end marker.
+fn tool_block_end_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    (ws0, literal(TOOL_CALL_END))
+        .value(MinimaxM2Event::ToolBlockEnd)
+        .parse_next(input)
+}
+
+/// Parse a complete MiniMax M2 invoke block.
+fn invoke_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    let (name, raw_params) = seq!(
+        _: ws0,
+        _: literal(INVOKE_START),
+        _: (ws1, literal("name=")),
+        attr_value,
+        _: literal(">"),
+        repeat(0.., terminated(parameter, ws0)),
+        _: literal(INVOKE_END),
+    )
+    .parse_next(input)?;
+
+    Ok(MinimaxM2Event::Invoke {
+        name: name.trim().to_string(),
+        raw_params,
+    })
+}
+
+/// Parse a MiniMax M2 parameter block.
+fn parameter(input: &mut MinimaxM2Input<'_>) -> ModalResult<(String, String)> {
+    let (name, value) = seq!(
+        _: literal(PARAMETER_START),
+        _: (ws1, literal("name=")),
+        attr_value,
+        _: literal(">"),
+        take_until(0.., PARAMETER_END).map(xml_unescape),
+        _: literal(PARAMETER_END),
+    )
+    .parse_next(input)?;
+
+    Ok((name.trim().to_string(), value.into_owned()))
+}
+
+/// Parse a quoted or unquoted XML attribute value.
+fn attr_value<'i>(input: &mut MinimaxM2Input<'i>) -> ModalResult<&'i str> {
+    alt((
+        delimited(literal("\""), take_until(1.., "\""), literal("\"")),
+        delimited(literal("'"), take_until(1.., "'"), literal("'")),
+        take_until(1.., ">"),
+    ))
+    .parse_next(input)
+}
+
+/// Parse ignored rest after the MiniMax M2 tool block ends.
+fn ignored_rest_event(input: &mut MinimaxM2Input<'_>) -> ModalResult<MinimaxM2Event> {
+    rest.value(MinimaxM2Event::IgnoredRest).parse_next(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use serde_json::{Value, json};
+    use thiserror_ext::AsReport;
+
+    use super::{MinimaxM2ToolParser, TOOL_CALL_END, TOOL_CALL_START, ToolParser};
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn build_tool_block(invokes: &[(&str, Vec<(&str, &str)>)]) -> String {
+        let invokes = invokes
+            .iter()
+            .map(|(function_name, params)| {
+                let params = params
+                    .iter()
+                    .map(|(name, value)| format!(r#"<parameter name="{name}">{value}</parameter>"#))
+                    .collect::<Vec<_>>()
+                    .join("");
+                format!(r#"<invoke name="{function_name}">{params}</invoke>"#)
+            })
+            .collect::<String>();
+        format!("{TOOL_CALL_START}{invokes}{TOOL_CALL_END}")
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_extracts_single_tool_call() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_block(&[(
+                "get_weather",
+                vec![("city", "Seattle"), ("days", "5")],
+            )]))
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "city": "Seattle", "days": 5 })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_preserves_prefix_and_ignores_trailing_text() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let output = format!(
+            "Let me check. {} This trailing text is ignored.",
+            build_tool_block(&[("get_weather", vec![("city", "Seattle")])])
+        );
+        let result = parser.parse_complete(&output).unwrap();
+
+        assert_eq!(result.normal_text, "Let me check. ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_extracts_multiple_invokes() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_block(&[
+                ("get_weather", vec![("city", "Seattle")]),
+                ("get_weather", vec![("city", "NYC")]),
+            ]))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[1].tool_index, 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "city": "Seattle" })
+        );
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({ "city": "NYC" })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_converts_schema_types() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_block(&[(
+                "convert",
+                vec![
+                    ("whole", "5.0"),
+                    ("flag", "true"),
+                    ("payload", r#"{"nested":true}"#),
+                    ("items", "[1,2]"),
+                    ("empty", "42"),
+                ],
+            )]))
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "whole": 5.0,
+                "flag": true,
+                "payload": { "nested": true },
+                "items": [1, 2],
+                "empty": "42",
+            })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_unescapes_literal_closing_tags_in_parameter_value() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_block(&[(
+                "get_weather",
+                vec![
+                    (
+                        "city",
+                        "Seattle &lt;/parameter&gt;&lt;/invoke&gt;&lt;/minimax:tool_call&gt;",
+                    ),
+                    ("days", "5"),
+                ],
+            )]))
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "city": "Seattle </parameter></invoke></minimax:tool_call>",
+                "days": 5,
+            })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_parse_complete_handles_multiline_parameters() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                "<minimax:tool_call>\
+                 <invoke name=\"calculate_area\">\
+                 <parameter name=\"shape\">\nrectangle\n</parameter>\
+                 <parameter name=\"dimensions\">{\"width\":10,\n\"height\":20}</parameter>\
+                 <parameter name=\"precision\">2</parameter>\
+                 </invoke>\
+                 </minimax:tool_call>",
+            )
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "shape": "\nrectangle\n",
+                "dimensions": { "width": 10, "height": 20 },
+                "precision": 2,
+            })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_streaming_extracts_single_tool_call() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<minimax:tool_call>",
+                r#"<invoke name="get_weather">"#,
+                r#"<parameter name="city">Seattle</parameter>"#,
+                "</invoke></minimax:tool_call>",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "city": "Seattle" })
+        );
+    }
+
+    #[test]
+    fn minimax_m2_streaming_preserves_prefix_text() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "Let me check. ",
+                "<minimax:tool_call>",
+                r#"<invoke name="get_weather"><parameter name="city">Seattle</parameter></invoke>"#,
+                "</minimax:tool_call>",
+            ],
+        );
+
+        assert_eq!(result.normal_text, "Let me check. ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn minimax_m2_streaming_without_tool_call_emits_text_incrementally() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &["Hello, ", "world!"]);
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn minimax_m2_streaming_handles_marker_split_across_chunks() {
+        let text = build_tool_block(&[("get_weather", vec![("city", "Seattle")])]);
+        let chunks = split_by_chars(&text, 3);
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.calls.len(), 1);
+        assert!(result.normal_text.is_empty());
+    }
+
+    #[test]
+    fn minimax_m2_streaming_extracts_multiple_invokes_in_order() {
+        let text = build_tool_block(&[
+            ("get_weather", vec![("city", "Seattle")]),
+            ("get_weather", vec![("city", "NYC")]),
+        ]);
+        let chunks = split_by_chars(&text, 7);
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[1].tool_index, 1);
+    }
+
+    #[test]
+    fn minimax_m2_streaming_ignores_text_after_tool_block() {
+        let text = format!(
+            "{} ignored",
+            build_tool_block(&[("get_weather", vec![("city", "Seattle")])])
+        );
+        let chunks = split_by_chars(&text, 5);
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn minimax_m2_streaming_does_not_emit_incomplete_tool_call() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let result = parser.push(r#"<minimax:tool_call><invoke name="get_weather">"#).unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn minimax_m2_finish_fails_incomplete_tool_call() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        parser.push(r#"<minimax:tool_call><invoke name="get_weather">"#).unwrap();
+
+        assert!(parser.finish().is_err());
+    }
+
+    #[test]
+    fn minimax_m2_finish_fails_after_bare_tool_block_start() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        parser.push("<minimax:tool_call>").unwrap();
+
+        assert!(parser.finish().is_err());
+    }
+
+    #[test]
+    fn minimax_m2_malformed_tool_call_fails_fast() {
+        let mut parser = MinimaxM2ToolParser::new(&test_tools());
+        let error = parser.push("<minimax:tool_call><bad></minimax:tool_call>").unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+}
diff --git a/rust/src/tool-parser/src/parameters.rs b/rust/src/tool-parser/src/parameters.rs
new file mode 100644
index 000000000000..cf21bad16cdf
--- /dev/null
+++ b/rust/src/tool-parser/src/parameters.rs
@@ -0,0 +1,508 @@
+use std::collections::BTreeMap;
+
+use serde_json::{Number, Value};
+
+use crate::Tool;
+
+/// Normalized parameter schemas for all tools in one request.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub(super) struct ToolSchemas {
+    tools: BTreeMap<String, ToolSchema>,
+}
+
+/// Normalized parameter schema for one tool.
+///
+/// This is a minimal subset of JSON Schema with some normalization heuristics
+/// to support common schema patterns and upstream schema variations, focused on
+/// coercing raw string parameter values into more specific JSON types for
+/// downstream tool call execution.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub(super) struct ToolSchema {
+    params: BTreeMap<String, JsonParamType>,
+}
+
+/// Normalized JSON parameter type used for raw string coercion.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(super) enum JsonParamType {
+    String,
+    Integer,
+    Number,
+    Boolean,
+    Object,
+    Array,
+    Null,
+    OneOf(Vec<JsonParamType>),
+}
+
+impl ToolSchemas {
+    /// Normalize OpenAI-style tool parameter JSON schemas for one request.
+    pub(super) fn from_tools(tools: &[Tool]) -> Self {
+        let tools = tools
+            .iter()
+            .map(|tool| (tool.name.clone(), ToolSchema::from_schema(&tool.parameters)))
+            .collect();
+
+        Self { tools }
+    }
+
+    /// Convert raw string parameter values for one named tool.
+    ///
+    /// Unknown tool names use an empty schema, so all parameters fall back to
+    /// strings.
+    pub(super) fn convert_params_with_schema(
+        &self,
+        function_name: &str,
+        params: Vec<(String, String)>,
+    ) -> serde_json::Map<String, Value> {
+        let tool_schema = self.tools.get(function_name).unwrap_or(ToolSchema::empty());
+        let mut converted = serde_json::Map::with_capacity(params.len());
+        for (name, value) in params {
+            let value = tool_schema.convert(&name, &value);
+            converted.insert(name, value);
+        }
+        converted
+    }
+
+    /// Convert one raw string parameter value for one named tool.
+    pub(super) fn convert_param_with_schema(
+        &self,
+        function_name: &str,
+        name: &str,
+        value: &str,
+    ) -> Value {
+        let tool_schema = self.tools.get(function_name).unwrap_or(ToolSchema::empty());
+        tool_schema.convert(name, value)
+    }
+}
+
+impl ToolSchema {
+    /// Return an empty schema with no parameter information, which causes all
+    /// parameters to be treated as strings.
+    const fn empty() -> &'static Self {
+        static EMPTY: ToolSchema = ToolSchema {
+            params: BTreeMap::new(),
+        };
+        &EMPTY
+    }
+
+    /// Normalize an OpenAI-style tool parameters JSON schema.
+    fn from_schema(parameters: &Value) -> Self {
+        let Some(properties) = parameters.get("properties").and_then(Value::as_object) else {
+            return Self::default();
+        };
+
+        let params = properties
+            .iter()
+            .filter_map(|(name, schema)| {
+                JsonParamType::from_schema(schema).map(|param_type| (name.clone(), param_type))
+            })
+            .collect();
+
+        Self { params }
+    }
+
+    /// Convert one raw parameter value using its normalized schema type.
+    ///
+    /// If the parameter name is unknown, or we don't have a schema for it, or
+    /// the value fails to convert, this falls back to returning the raw
+    /// string as a JSON string value.
+    fn convert(&self, name: &str, value: &str) -> Value {
+        if value.eq_ignore_ascii_case("null") {
+            return Value::Null;
+        }
+
+        let Some(param_type) = self.params.get(name) else {
+            return Value::String(value.to_string());
+        };
+
+        convert_value(param_type, value).unwrap_or_else(|| Value::String(value.to_string()))
+    }
+}
+
+impl JsonParamType {
+    /// Normalize one parameter property schema.
+    fn from_schema(schema: &Value) -> Option<Self> {
+        let schema = schema.as_object()?;
+
+        if let Some(type_value) = schema.get("type") {
+            return Self::from_type_value(type_value);
+        }
+
+        if let Some(composite) = schema.get("anyOf").or_else(|| schema.get("oneOf")) {
+            let param_type = composite
+                .as_array()
+                .map(|schemas| schemas.iter().filter_map(Self::from_schema).collect::<Vec<_>>())
+                .filter(|types| !types.is_empty())
+                .map(Self::one_of)
+                .unwrap_or(Self::Object);
+            return Some(param_type);
+        }
+
+        if schema.contains_key("enum") {
+            return Some(Self::String);
+        }
+        if schema.contains_key("items") {
+            return Some(Self::Array);
+        }
+        if schema.contains_key("properties") {
+            return Some(Self::Object);
+        }
+
+        None
+    }
+
+    /// Normalize a JSON schema `type` value.
+    fn from_type_value(type_value: &Value) -> Option<Self> {
+        match type_value {
+            Value::String(kind) => Self::from_type_name(kind),
+            Value::Array(kinds) => {
+                let types = kinds
+                    .iter()
+                    .filter_map(Value::as_str)
+                    .filter_map(Self::from_type_name)
+                    .collect::<Vec<_>>();
+                if types.is_empty() {
+                    None
+                } else {
+                    Some(Self::one_of(types))
+                }
+            }
+            _ => None,
+        }
+    }
+
+    /// Normalize one JSON schema type name.
+    fn from_type_name(kind: &str) -> Option<Self> {
+        let kind = kind.trim().to_ascii_lowercase();
+        match kind.as_str() {
+            "string" | "str" | "text" | "varchar" | "char" | "enum" => Some(Self::String),
+            "integer" | "int" => Some(Self::Integer),
+            "number" | "float" => Some(Self::Number),
+            "boolean" | "bool" | "binary" => Some(Self::Boolean),
+            "object" => Some(Self::Object),
+            "array" | "arr" | "sequence" => Some(Self::Array),
+            "null" => Some(Self::Null),
+            _ if kind.starts_with("int")
+                || kind.starts_with("uint")
+                || kind.starts_with("long")
+                || kind.starts_with("short")
+                || kind.starts_with("unsigned") =>
+            {
+                Some(Self::Integer)
+            }
+            _ if kind.starts_with("num") || kind.starts_with("float") => Some(Self::Number),
+            _ if kind.starts_with("dict") => Some(Self::Object),
+            _ if kind.starts_with("list") => Some(Self::Array),
+            _ => None,
+        }
+    }
+
+    /// Collapse a candidate type list into one normalized type.
+    fn one_of(mut types: Vec<Self>) -> Self {
+        if types.len() == 1 {
+            types.remove(0)
+        } else {
+            Self::OneOf(types)
+        }
+    }
+}
+
+/// Convert one raw string value to a normalized JSON type.
+fn convert_value(param_type: &JsonParamType, value: &str) -> Option<Value> {
+    match param_type {
+        JsonParamType::String => Some(Value::String(value.to_string())),
+        JsonParamType::Integer => value.parse::<i64>().ok().map(Number::from).map(Value::Number),
+        JsonParamType::Number => convert_number(value),
+        JsonParamType::Boolean => convert_boolean(value),
+        JsonParamType::Object | JsonParamType::Array => serde_json::from_str(value).ok(),
+        JsonParamType::Null => value.eq_ignore_ascii_case("null").then_some(Value::Null),
+        JsonParamType::OneOf(types) => {
+            types.iter().find_map(|param_type| convert_value(param_type, value))
+        }
+    }
+}
+
+/// Convert one raw string value to a JSON number.
+fn convert_number(value: &str) -> Option<Value> {
+    if let Ok(parsed) = value.parse::<i64>() {
+        return Some(Value::Number(Number::from(parsed)));
+    }
+    Number::from_f64(value.parse::<f64>().ok()?).map(Value::Number)
+}
+
+/// Convert one raw string value to a boolean.
+fn convert_boolean(value: &str) -> Option<Value> {
+    match value.trim().to_ascii_lowercase().as_str() {
+        "true" | "1" => Some(Value::Bool(true)),
+        "false" | "0" => Some(Value::Bool(false)),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::{ToolSchema, ToolSchemas};
+    use crate::Tool;
+
+    fn test_tool(name: &str, parameters: serde_json::Value) -> Tool {
+        Tool {
+            name: name.to_string(),
+            description: None,
+            parameters,
+            strict: None,
+        }
+    }
+
+    #[test]
+    fn invalid_schema_converts_everything_as_string() {
+        let params = ToolSchema::from_schema(&json!({ "type": "object" }));
+
+        assert_eq!(params.convert("count", "42"), json!("42"));
+        assert_eq!(params.convert("count", "null"), json!(null));
+    }
+
+    #[test]
+    fn skips_unknown_property_schema_and_unknown_type() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "unknown_schema": true,
+                "unknown_type": { "type": "mystery" },
+                "known": { "type": "integer" }
+            }
+        }));
+
+        assert_eq!(params.convert("unknown_schema", "42"), json!("42"));
+        assert_eq!(params.convert("unknown_type", "42"), json!("42"));
+        assert_eq!(params.convert("known", "42"), json!(42));
+    }
+
+    #[test]
+    fn converts_supported_types() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "text": { "type": "string" },
+                "count": { "type": "integer" },
+                "size": { "type": "number" },
+                "enabled": { "type": "boolean" },
+                "payload": { "type": "object" },
+                "items": { "type": "array" },
+                "nothing": { "type": "null" }
+            }
+        }));
+
+        assert_eq!(params.convert("text", "42"), json!("42"));
+        assert_eq!(params.convert("count", "42"), json!(42));
+        assert_eq!(params.convert("size", "5.0"), json!(5.0));
+        assert_eq!(params.convert("enabled", "1"), json!(true));
+        assert_eq!(params.convert("payload", r#"{"k":1}"#), json!({ "k": 1 }));
+        assert_eq!(params.convert("items", "[1,2]"), json!([1, 2]));
+        assert_eq!(params.convert("nothing", "null"), json!(null));
+    }
+
+    #[test]
+    fn number_conversion_parses_int_then_float() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "value": { "type": "number" }
+            }
+        }));
+
+        assert_eq!(params.convert("value", "5"), json!(5));
+        assert_eq!(params.convert("value", "5.0"), json!(5.0));
+        assert_eq!(params.convert("value", "5."), json!(5.0));
+        assert_eq!(params.convert("value", "+1"), json!(1));
+        assert_eq!(params.convert("value", "+1.0"), json!(1.0));
+        assert_eq!(
+            params.convert("value", "9223372036854775807.5"),
+            json!(9223372036854775808.0)
+        );
+    }
+
+    #[test]
+    fn converts_upstream_aliases() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "s": { "type": "varchar" },
+                "i": { "type": "unsigned_int" },
+                "n": { "type": "float64" },
+                "b": { "type": "binary" },
+                "a": { "type": "sequence" },
+                "o": { "type": "dict" }
+            }
+        }));
+
+        assert_eq!(params.convert("s", "x"), json!("x"));
+        assert_eq!(params.convert("i", "7"), json!(7));
+        assert_eq!(params.convert("n", "7.5"), json!(7.5));
+        assert_eq!(params.convert("b", "true"), json!(true));
+        assert_eq!(params.convert("a", "[1]"), json!([1]));
+        assert_eq!(params.convert("o", r#"{"x":1}"#), json!({ "x": 1 }));
+    }
+
+    #[test]
+    fn preserves_union_type_order() {
+        let integer_first = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "value": { "type": ["integer", "string"] }
+            }
+        }));
+        let string_first = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "value": { "type": ["string", "integer"] }
+            }
+        }));
+
+        assert_eq!(integer_first.convert("value", "42"), json!(42));
+        assert_eq!(string_first.convert("value", "42"), json!("42"));
+    }
+
+    #[test]
+    fn converts_composite_schemas() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "choice": {
+                    "anyOf": [
+                        { "type": "integer" },
+                        { "type": "string" }
+                    ]
+                },
+                "fallback_object": {
+                    "oneOf": [
+                        { "type": "mystery" }
+                    ]
+                }
+            }
+        }));
+
+        assert_eq!(params.convert("choice", "42"), json!(42));
+        assert_eq!(
+            params.convert("fallback_object", r#"{"x":1}"#),
+            json!({ "x": 1 })
+        );
+    }
+
+    #[test]
+    fn infers_type_from_schema_shape_without_type() {
+        let params = ToolSchema::from_schema(&json!({
+            "type": "object",
+            "properties": {
+                "choice": { "enum": ["a", "b"] },
+                "items": { "items": { "type": "integer" } },
+                "payload": { "properties": { "x": { "type": "integer" } } }
+            }
+        }));
+
+        assert_eq!(params.convert("choice", "a"), json!("a"));
+        assert_eq!(params.convert("items", "[1,2]"), json!([1, 2]));
+        assert_eq!(params.convert("payload", r#"{"x":1}"#), json!({ "x": 1 }));
+    }
+
+    #[test]
+    fn converts_params_for_known_tool() {
+        let schemas = ToolSchemas::from_tools(&[test_tool(
+            "search",
+            json!({
+                "type": "object",
+                "properties": {
+                    "query": { "type": "string" },
+                    "topn": { "type": "integer" }
+                }
+            }),
+        )]);
+
+        let converted = schemas.convert_params_with_schema(
+            "search",
+            vec![
+                ("query".to_string(), "rust".to_string()),
+                ("topn".to_string(), "5".to_string()),
+            ],
+        );
+
+        assert_eq!(converted.get("query"), Some(&json!("rust")));
+        assert_eq!(converted.get("topn"), Some(&json!(5)));
+    }
+
+    #[test]
+    fn convert_params_falls_back_to_string_for_failed_coercion() {
+        let schemas = ToolSchemas::from_tools(&[test_tool(
+            "convert",
+            json!({
+                "type": "object",
+                "properties": {
+                    "whole": { "type": "number" },
+                    "flag": { "type": "boolean" },
+                    "payload": { "type": "object" },
+                    "items": { "type": "array" },
+                    "missing_type": {}
+                }
+            }),
+        )]);
+
+        let converted = schemas.convert_params_with_schema(
+            "convert",
+            vec![
+                ("whole".to_string(), "not-a-number".to_string()),
+                ("flag".to_string(), "maybe".to_string()),
+                ("payload".to_string(), "not-json".to_string()),
+                ("items".to_string(), "not-json".to_string()),
+                ("missing_type".to_string(), "42".to_string()),
+                ("unknown_param".to_string(), "42".to_string()),
+            ],
+        );
+
+        assert_eq!(converted.get("whole"), Some(&json!("not-a-number")));
+        assert_eq!(converted.get("flag"), Some(&json!("maybe")));
+        assert_eq!(converted.get("payload"), Some(&json!("not-json")));
+        assert_eq!(converted.get("items"), Some(&json!("not-json")));
+        assert_eq!(converted.get("missing_type"), Some(&json!("42")));
+        assert_eq!(converted.get("unknown_param"), Some(&json!("42")));
+    }
+
+    #[test]
+    fn convert_params_preserves_null_for_known_param() {
+        let schemas = ToolSchemas::from_tools(&[test_tool(
+            "convert",
+            json!({
+                "type": "object",
+                "properties": {
+                    "value": { "type": "string" }
+                }
+            }),
+        )]);
+
+        let converted = schemas
+            .convert_params_with_schema("convert", vec![("value".to_string(), "NULL".to_string())]);
+
+        assert_eq!(converted.get("value"), Some(&json!(null)));
+    }
+
+    #[test]
+    fn unknown_tool_converts_values_without_schema() {
+        let schemas = ToolSchemas::from_tools(&[test_tool(
+            "search",
+            json!({ "type": "object", "properties": {} }),
+        )]);
+
+        let converted = schemas.convert_params_with_schema(
+            "missing",
+            vec![
+                ("query".to_string(), "rust".to_string()),
+                ("topn".to_string(), "5".to_string()),
+                ("nullish".to_string(), "null".to_string()),
+            ],
+        );
+
+        assert_eq!(converted.get("query"), Some(&json!("rust")));
+        assert_eq!(converted.get("topn"), Some(&json!("5")));
+        assert_eq!(converted.get("nullish"), Some(&json!(null)));
+    }
+}
diff --git a/rust/src/tool-parser/src/qwen_coder.rs b/rust/src/tool-parser/src/qwen_coder.rs
new file mode 100644
index 000000000000..227315f9bd24
--- /dev/null
+++ b/rust/src/tool-parser/src/qwen_coder.rs
@@ -0,0 +1,632 @@
+use winnow::ascii::multispace0 as ws0;
+use winnow::combinator::{alt, delimited, eof, repeat, seq, terminated};
+use winnow::prelude::*;
+use winnow::stream::Partial;
+use winnow::token::{literal, take_until};
+
+use super::parameters::ToolSchemas;
+use super::utils::{parse_buffered_event, safe_text_len, xml_unescape};
+use super::{Result, ToolCallDelta, ToolParseResult, ToolParser};
+use crate::Tool;
+
+const TOOL_CALL_START: &str = "<tool_call>";
+const TOOL_CALL_END: &str = "</tool_call>";
+const FUNCTION_START: &str = "<function=";
+const FUNCTION_END: &str = "</function>";
+const PARAMETER_START: &str = "<parameter=";
+const PARAMETER_END: &str = "</parameter>";
+
+type QwenCoderInput<'i> = Partial<&'i str>;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum QwenCoderMode {
+    Text,
+    ToolCall,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum QwenCoderEvent {
+    Text {
+        len: usize,
+    },
+    ToolCallStart,
+    ToolCall {
+        name: String,
+        raw_params: Vec<(String, String)>,
+    },
+}
+
+/// Tool parser for Qwen Coder XML-style tool calls.
+///
+/// Example tool call content:
+///
+/// ```text
+/// <tool_call>
+/// <function=get_weather>
+/// <parameter=location>杭州</parameter>
+/// </function>
+/// </tool_call>
+/// ```
+///
+/// Arguments are emitted only after a full `tool_call` block is parsed.
+///
+/// Note: parallel calls are represented as repeated
+/// `<tool_call>...</tool_call>` blocks, not as multiple calls inside one tag.
+pub struct Qwen3CoderToolParser {
+    buffer: String,
+    mode: QwenCoderMode,
+    emitted_tool_count: usize,
+    tool_parameters: ToolSchemas,
+}
+
+impl Qwen3CoderToolParser {
+    /// Create a Qwen Coder tool parser.
+    fn new(tools: &[Tool]) -> Self {
+        Self {
+            buffer: String::new(),
+            mode: QwenCoderMode::Text,
+            emitted_tool_count: 0,
+            tool_parameters: ToolSchemas::from_tools(tools),
+        }
+    }
+
+    /// Apply one parsed Qwen Coder event to parser state and output.
+    fn apply_event(&mut self, event: QwenCoderEvent, result: &mut ToolParseResult) -> Result<()> {
+        match event {
+            QwenCoderEvent::Text { len: consumed_len } => {
+                result.normal_text.push_str(&self.buffer[..consumed_len]);
+            }
+            QwenCoderEvent::ToolCallStart => self.mode = QwenCoderMode::ToolCall,
+            QwenCoderEvent::ToolCall { name, raw_params } => {
+                self.mode = QwenCoderMode::Text;
+                let arguments = self.tool_parameters.convert_params_with_schema(&name, raw_params);
+                let arguments = serde_json::to_string(&arguments)
+                    .map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
+
+                result.calls.push(ToolCallDelta {
+                    tool_index: self.emitted_tool_count,
+                    name: Some(name),
+                    arguments,
+                });
+                self.emitted_tool_count += 1;
+            }
+        }
+        Ok(())
+    }
+
+    /// Reset all streaming state.
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.mode = QwenCoderMode::Text;
+        self.emitted_tool_count = 0;
+    }
+}
+
+impl ToolParser for Qwen3CoderToolParser {
+    /// Create a boxed Qwen Coder tool parser.
+    fn create(tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self::new(tools)))
+    }
+
+    /// Push one decoded text chunk through the Qwen Coder parser.
+    fn push(&mut self, chunk: &str) -> Result<ToolParseResult> {
+        self.buffer.push_str(chunk);
+        let mut result = ToolParseResult::default();
+
+        while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
+            parse_next_qwen_coder_event(input, self.mode)
+        })? {
+            self.apply_event(event, &mut result)?;
+            self.buffer.drain(..consumed_len);
+        }
+
+        Ok(result)
+    }
+
+    /// Flush buffered text and reset parser state.
+    fn finish(&mut self) -> Result<ToolParseResult> {
+        let mut result = ToolParseResult::default();
+        if !self.buffer.is_empty() {
+            if self.mode == QwenCoderMode::ToolCall || self.buffer.starts_with(TOOL_CALL_START) {
+                return Err(parsing_failed!("incomplete Qwen Coder tool call"));
+            }
+            result.normal_text.push_str(&self.buffer);
+        }
+        self.reset();
+        Ok(result)
+    }
+}
+
+/// Parse a Qwen Coder event for the current parser mode.
+fn parse_next_qwen_coder_event(
+    input: &mut QwenCoderInput<'_>,
+    mode: QwenCoderMode,
+) -> ModalResult<QwenCoderEvent> {
+    match mode {
+        QwenCoderMode::Text => parse_text_event(input),
+        QwenCoderMode::ToolCall => tool_call_event(input),
+    }
+}
+
+/// Parse a text-mode Qwen Coder event.
+fn parse_text_event(input: &mut QwenCoderInput<'_>) -> ModalResult<QwenCoderEvent> {
+    alt((tool_call_start_event, safe_text_event)).parse_next(input)
+}
+
+/// Parse a Qwen Coder tool-call start marker.
+fn tool_call_start_event(input: &mut QwenCoderInput<'_>) -> ModalResult<QwenCoderEvent> {
+    literal(TOOL_CALL_START).value(QwenCoderEvent::ToolCallStart).parse_next(input)
+}
+
+/// Parse a safe text run before the next Qwen Coder marker.
+fn safe_text_event(input: &mut QwenCoderInput<'_>) -> ModalResult<QwenCoderEvent> {
+    safe_text_len(input, TOOL_CALL_START).map(|len| QwenCoderEvent::Text { len })
+}
+
+/// Parse a complete Qwen Coder tool call.
+fn tool_call_event(input: &mut QwenCoderInput<'_>) -> ModalResult<QwenCoderEvent> {
+    let (body,) = seq!(
+        _: ws0,
+        take_until(0.., TOOL_CALL_END),
+        _: literal(TOOL_CALL_END),
+    )
+    .parse_next(input)?;
+
+    parse_tool_call_body(body)
+}
+
+/// Parse a Qwen Coder function block.
+fn function_event(input: &mut &str) -> ModalResult<QwenCoderEvent> {
+    let (name, raw_params) = seq!(
+        _: literal(FUNCTION_START),
+        take_until(1.., ">"),
+        _: ">",
+        _: ws0,
+        repeat(0.., terminated(parameter, ws0)),
+        _: literal(FUNCTION_END),
+    )
+    .parse_next(input)?;
+
+    Ok(QwenCoderEvent::ToolCall {
+        name: name.to_string(),
+        raw_params,
+    })
+}
+
+/// Parse a Qwen Coder parameter block.
+fn parameter(input: &mut &str) -> ModalResult<(String, String)> {
+    let (name, value) = seq!(
+        _: literal(PARAMETER_START),
+        take_until(1.., ">"),
+        _: ">",
+        take_until(0.., PARAMETER_END).map(trim_one_wrapping_newline).map(xml_unescape),
+        _: literal(PARAMETER_END),
+    )
+    .parse_next(input)?;
+
+    Ok((name.to_string(), value.into_owned()))
+}
+
+/// Parse a Qwen Coder tool-call body.
+fn parse_tool_call_body(body: &str) -> ModalResult<QwenCoderEvent> {
+    let mut input = body;
+    delimited(ws0, function_event, (ws0, eof)).parse_next(&mut input)
+}
+
+/// Trim a single leading and trailing newline from a parameter value.
+fn trim_one_wrapping_newline(value: &str) -> &str {
+    let value = value.strip_prefix('\n').unwrap_or(value);
+    value.strip_suffix('\n').unwrap_or(value)
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::expect;
+    use serde_json::{Value, json};
+    use thiserror_ext::AsReport;
+
+    use super::{Qwen3CoderToolParser, ToolParser};
+    use crate::test_utils::{collect_stream, split_by_chars, test_tools};
+
+    fn build_tool_call(function_name: &str, params: &[(&str, &str)]) -> String {
+        let params = params
+            .iter()
+            .map(|(name, value)| format!("<parameter={name}>{value}</parameter>"))
+            .collect::<Vec<_>>()
+            .join("\n");
+        format!("<tool_call>\n<function={function_name}>\n{params}\n</function>\n</tool_call>")
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_without_tool_call_keeps_text() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser.parse_complete("Hello, world!").unwrap();
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_extracts_single_tool_call() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "get_weather",
+                &[("location", "SF"), ("date", "2026-04-29")],
+            ))
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "location": "SF",
+                "date": "2026-04-29"
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_preserves_prefix_text() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let output = format!(
+            "Thinking... {}",
+            build_tool_call("get_weather", &[("location", "NYC")])
+        );
+        let result = parser.parse_complete(&output).unwrap();
+
+        assert_eq!(result.normal_text, "Thinking... ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_converts_schema_types() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "convert",
+                &[
+                    ("whole", "5.0"),
+                    ("flag", "true"),
+                    ("payload", r#"{"nested":true}"#),
+                    ("items", "[1,2]"),
+                    ("empty", "42"),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "whole": 5.0,
+                "flag": true,
+                "payload": { "nested": true },
+                "items": [1, 2],
+                "empty": "42",
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_extracts_empty_arguments() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser.parse_complete(&build_tool_call("get_weather", &[])).unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({})
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_handles_upstream_multiline_typed_params() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                "<tool_call>\n\
+                 <function=calculate_area>\n\
+                 <parameter=shape>\n\
+                 rectangle\n\
+                 </parameter>\n\
+                 <parameter=dimensions>\n\
+                 {\"width\": 10,\n\
+                  \"height\": 20}\n\
+                 </parameter>\n\
+                 <parameter=precision>\n\
+                 2\n\
+                 </parameter>\n\
+                 </function>\n\
+                 </tool_call>",
+            )
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("calculate_area"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "shape": "rectangle",
+                "dimensions": { "width": 10, "height": 20 },
+                "precision": 2,
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_handles_nested_json_parameter() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "convert",
+                &[(
+                    "payload",
+                    r#"{"nested":{"value":[1,2,3],"child":{"enabled":true}}}"#,
+                )],
+            ))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "payload": {
+                    "nested": {
+                        "value": [1, 2, 3],
+                        "child": { "enabled": true },
+                    },
+                },
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_preserves_xml_like_parameter_values() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "process",
+                &[
+                    (
+                        "html_content",
+                        r#"<div class="test"><span>Hello</span></div>"#,
+                    ),
+                    ("xml_snippet", r#"<root><child attr="value"/></root>"#),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "html_content": r#"<div class="test"><span>Hello</span></div>"#,
+                "xml_snippet": r#"<root><child attr="value"/></root>"#,
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_unescapes_literal_closing_tags_in_parameter_value() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "get_weather",
+                &[
+                    (
+                        "location",
+                        "杭州 &lt;/parameter&gt;&lt;/function&gt;&lt;/tool_call&gt;",
+                    ),
+                    ("date", "2026-05-08"),
+                ],
+            ))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "location": "杭州 </parameter></function></tool_call>",
+                "date": "2026-05-08",
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_parse_complete_does_not_double_encode_anyof_object() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(&build_tool_call(
+                "update_record",
+                &[("data", r#"{"key":"value","count":42}"#)],
+            ))
+            .unwrap();
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({
+                "data": { "key": "value", "count": 42 },
+            })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_streaming_extracts_single_tool_call() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "<tool_call>\n",
+                "<function=get_weather>\n",
+                "<parameter=location>SF</parameter>\n",
+                "</function>\n",
+                "</tool_call>",
+            ],
+        );
+
+        assert!(result.normal_text.is_empty());
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_streaming_preserves_prefix_text() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(
+            &mut parser,
+            &[
+                "Thinking... ",
+                "<tool_call>\n",
+                "<function=get_weather>\n",
+                "<parameter=location>SF</parameter>\n",
+                "</function>\n",
+                "</tool_call>",
+            ],
+        );
+
+        assert_eq!(result.normal_text, "Thinking... ");
+        assert_eq!(result.calls.len(), 1);
+    }
+
+    #[test]
+    fn qwen_coder_streaming_without_tool_call_emits_text_incrementally() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &["Hello, ", "world!"]);
+
+        assert_eq!(result.normal_text, "Hello, world!");
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn qwen_coder_streaming_extracts_multiple_tool_calls_in_order() {
+        let text = format!(
+            "{}\n{}",
+            build_tool_call("get_weather", &[("location", "SF")]),
+            build_tool_call("get_weather", &[("location", "NYC")])
+        );
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &[&text]);
+
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(result.calls[0].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[1].name.as_deref(), Some("get_weather"));
+        assert_eq!(result.calls[0].tool_index, 0);
+        assert_eq!(result.calls[1].tool_index, 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({ "location": "NYC" })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_streaming_preserves_text_between_tool_calls() {
+        let text = format!(
+            "I'll check two cities.{}Between calls.{}Done.",
+            build_tool_call("get_weather", &[("city", "Dallas"), ("state", "TX")]),
+            build_tool_call("get_weather", &[("city", "Orlando"), ("state", "FL")])
+        );
+        let chunks = split_by_chars(&text, 5);
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(
+            result.normal_text,
+            "I'll check two cities.Between calls.Done."
+        );
+        assert_eq!(result.calls.len(), 2);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "city": "Dallas", "state": "TX" })
+        );
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[1].arguments).unwrap(),
+            json!({ "city": "Orlando", "state": "FL" })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_streaming_handles_start_token_split_across_chunks() {
+        let text = build_tool_call("get_weather", &[("location", "SF")]);
+        let chunks = split_by_chars(&text, 3);
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = collect_stream(&mut parser, &chunks);
+
+        assert_eq!(result.calls.len(), 1);
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "SF" })
+        );
+    }
+
+    #[test]
+    fn qwen_coder_streaming_does_not_emit_incomplete_tool_call() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .push("<tool_call>\n<function=get_weather>\n<parameter=location>SF</parameter>")
+            .unwrap();
+
+        assert!(result.normal_text.is_empty());
+        assert!(result.calls.is_empty());
+    }
+
+    #[test]
+    fn qwen_coder_finish_fails_incomplete_tool_call() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        parser
+            .push("<tool_call>\n<function=get_weather>\n<parameter=location>SF</parameter>")
+            .unwrap();
+
+        assert!(parser.finish().is_err());
+    }
+
+    #[test]
+    fn qwen_coder_malformed_tool_call_fails_fast() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let error = parser.push("<tool_call>\n<bad>\n</tool_call>").unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn qwen_coder_missing_parameter_end_fails_fast_after_function_end() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let error = parser
+            .push(
+                "<tool_call>\n<function=get_weather>\n<parameter=location>SF</function>\n</tool_call>",
+            )
+            .unwrap_err();
+
+        expect!["tool parser parsing failed: "].assert_eq(&error.to_report_string());
+    }
+
+    #[test]
+    fn qwen_coder_parse_function_body_trims_one_wrapping_newline() {
+        let mut parser = Qwen3CoderToolParser::new(&test_tools());
+        let result = parser
+            .parse_complete(
+                "<tool_call>\n<function=get_weather>\n<parameter=location>\nHangzhou\n</parameter>\n</function>\n</tool_call>",
+            )
+            .unwrap();
+
+        assert_eq!(
+            serde_json::from_str::<Value>(&result.calls[0].arguments).unwrap(),
+            json!({ "location": "Hangzhou" })
+        );
+    }
+}
diff --git a/rust/src/tool-parser/src/test_utils.rs b/rust/src/tool-parser/src/test_utils.rs
new file mode 100644
index 000000000000..45dda6ddfdf8
--- /dev/null
+++ b/rust/src/tool-parser/src/test_utils.rs
@@ -0,0 +1,115 @@
+use serde_json::json;
+
+use super::{ToolParseResult, ToolParser};
+use crate::Tool;
+
+/// Build a reusable set of function tools for parser unit tests.
+pub fn test_tools() -> Vec<Tool> {
+    vec![
+        Tool {
+            name: "get_weather".to_string(),
+            description: None,
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "location": { "type": "string" },
+                    "city": { "type": "string" },
+                    "state": { "type": "string" },
+                    "unit": { "type": "string" },
+                    "date": { "type": "string" },
+                    "days": { "type": "integer" }
+                }
+            }),
+            strict: None,
+        },
+        Tool {
+            name: "add".to_string(),
+            description: None,
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "x": { "type": "integer" },
+                    "y": { "type": "integer" }
+                }
+            }),
+            strict: None,
+        },
+        Tool {
+            name: "convert".to_string(),
+            description: None,
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "whole": { "type": "number" },
+                    "flag": { "type": "boolean" },
+                    "payload": { "type": "object" },
+                    "items": { "type": "array" },
+                    "empty": { "type": "string" }
+                }
+            }),
+            strict: None,
+        },
+        Tool {
+            name: "calculate_area".to_string(),
+            description: None,
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "shape": { "type": "string" },
+                    "dimensions": { "type": "object" },
+                    "precision": { "type": "integer" }
+                }
+            }),
+            strict: None,
+        },
+        Tool {
+            name: "update_record".to_string(),
+            description: None,
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "anyOf": [
+                            { "type": "object" },
+                            { "type": "null" }
+                        ]
+                    }
+                }
+            }),
+            strict: None,
+        },
+    ]
+}
+
+/// Push chunks through a streaming parser and coalesce its tool-call deltas.
+pub fn collect_stream<T: ToolParser + ?Sized>(parser: &mut T, chunks: &[&str]) -> ToolParseResult {
+    let mut result = ToolParseResult::default();
+    for chunk in chunks {
+        result.append(parser.push(chunk).unwrap());
+    }
+    result.append(parser.finish().unwrap());
+    result.coalesce_calls()
+}
+
+/// Split text into chunks containing at most `chunk_chars` Unicode scalar
+/// values.
+pub fn split_by_chars(text: &str, chunk_chars: usize) -> Vec<&str> {
+    let mut chunks = Vec::new();
+    let mut start = 0;
+    let mut count = 0;
+
+    for (index, _) in text.char_indices() {
+        if count == chunk_chars {
+            chunks.push(&text[start..index]);
+            start = index;
+            count = 0;
+        }
+        count += 1;
+    }
+
+    if start < text.len() {
+        chunks.push(&text[start..]);
+    }
+
+    chunks
+}
diff --git a/rust/src/tool-parser/src/tests.rs b/rust/src/tool-parser/src/tests.rs
new file mode 100644
index 000000000000..73e3b7bbf351
--- /dev/null
+++ b/rust/src/tool-parser/src/tests.rs
@@ -0,0 +1,97 @@
+use super::{Result, Tool, ToolCallDelta, ToolParseResult, ToolParser};
+
+struct DefaultParser;
+
+impl ToolParser for DefaultParser {
+    fn create(_tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+    where
+        Self: Sized + 'static,
+    {
+        Ok(Box::new(Self))
+    }
+
+    fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
+        Ok(ToolParseResult::default())
+    }
+}
+
+#[test]
+fn tool_parser_does_not_preserve_special_tokens_by_default() {
+    let parser = DefaultParser;
+
+    assert!(!parser.preserve_special_tokens());
+}
+
+#[test]
+fn default_parse_complete_delegates_through_push_and_finish() {
+    struct StreamingParser;
+
+    impl ToolParser for StreamingParser {
+        fn create(_tools: &[Tool]) -> Result<Box<dyn ToolParser>>
+        where
+            Self: Sized + 'static,
+        {
+            Ok(Box::new(Self))
+        }
+
+        fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
+            Ok(ToolParseResult {
+                normal_text: "prefix ".to_string(),
+                calls: vec![
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: Some("weather".to_string()),
+                        arguments: "{\"location\":".to_string(),
+                    },
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: None,
+                        arguments: "\"Paris\"".to_string(),
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: Some("time".to_string()),
+                        arguments: "{\"timezone\":".to_string(),
+                    },
+                ],
+            })
+        }
+
+        fn finish(&mut self) -> Result<ToolParseResult> {
+            Ok(ToolParseResult {
+                normal_text: "suffix".to_string(),
+                calls: vec![
+                    ToolCallDelta {
+                        tool_index: 0,
+                        name: None,
+                        arguments: "}".to_string(),
+                    },
+                    ToolCallDelta {
+                        tool_index: 1,
+                        name: None,
+                        arguments: "\"UTC\"}".to_string(),
+                    },
+                ],
+            })
+        }
+    }
+
+    let mut parser = StreamingParser;
+    let result = parser.parse_complete("ignored").unwrap();
+    assert_eq!(result.normal_text, "prefix suffix");
+    assert_eq!(
+        result.calls,
+        vec![
+            ToolCallDelta {
+                tool_index: 0,
+                name: Some("weather".to_string()),
+                arguments: "{\"location\":\"Paris\"}".to_string(),
+            },
+            ToolCallDelta {
+                tool_index: 1,
+                name: Some("time".to_string()),
+                arguments: "{\"timezone\":\"UTC\"}".to_string(),
+            },
+        ]
+    );
+}
diff --git a/rust/src/tool-parser/src/utils.rs b/rust/src/tool-parser/src/utils.rs
new file mode 100644
index 000000000000..171c1af0eeca
--- /dev/null
+++ b/rust/src/tool-parser/src/utils.rs
@@ -0,0 +1,581 @@
+//! Shared helpers for tool parsers.
+
+use std::borrow::Cow;
+
+use winnow::error::{ContextError, ErrMode, ModalResult, Needed, StrContext, StrContextValue};
+use winnow::stream::{Offset, Partial, Stream};
+
+use super::Result;
+
+/// Return the byte length of the longest proper prefix of `token` that is also
+/// a suffix of `buffer`.
+///
+/// Streaming parsers use this to keep only the trailing fragment that might
+/// still grow into a full marker after the next decoded chunk arrives.
+///
+/// The returned length is always a valid UTF-8 boundary in `token`, so callers
+/// can safely slice `&token[..len]` even when markers contain non-ASCII
+/// characters such as DeepSeek's DSML delimiters.
+pub(super) fn partial_prefix_len(buffer: &str, token: &str) -> usize {
+    let Some(first_byte) = token.as_bytes().first().copied() else {
+        return 0;
+    };
+
+    let max_len = buffer.len().min(token.len().saturating_sub(1));
+    let tail_start = buffer.len() - max_len;
+    let buffer_bytes = buffer.as_bytes();
+    let token_bytes = token.as_bytes();
+
+    // Scan from the longest possible suffix to preserve overlapping prefixes.
+    for index in tail_start..buffer.len() {
+        if buffer_bytes[index] != first_byte {
+            continue;
+        }
+
+        let len = buffer.len() - index;
+        if buffer.is_char_boundary(index)
+            && token.is_char_boundary(len)
+            && token_bytes[..len] == buffer_bytes[index..]
+        {
+            return len;
+        }
+    }
+
+    0
+}
+
+/// Parse a safe text run before the next marker.
+///
+/// Returns the text length in bytes, and advances the input.
+pub(super) fn safe_text_len(input: &mut Partial<&str>, marker: &str) -> ModalResult<usize> {
+    let text = **input;
+    if text.is_empty() {
+        return incomplete();
+    }
+
+    if let Some(start_idx) = text.find(marker) {
+        input.next_slice(start_idx);
+        return Ok(start_idx);
+    }
+
+    let keep_len = partial_prefix_len(text, marker);
+    let emit_len = text.len().saturating_sub(keep_len);
+    if emit_len == 0 {
+        return incomplete();
+    }
+
+    input.next_slice(emit_len);
+    Ok(emit_len)
+}
+
+/// Decode XML/HTML entities in XML-style parameter values.
+pub(super) fn xml_unescape(value: &str) -> Cow<'_, str> {
+    if !value.as_bytes().contains(&b'&') {
+        return Cow::Borrowed(value);
+    }
+
+    let mut output: Option<String> = None;
+    let mut copied_len = 0;
+    let mut rest = value;
+
+    while let Some(ampersand) = rest.find('&') {
+        let before_ampersand = &rest[..ampersand];
+        let after_ampersand = &rest[ampersand + '&'.len_utf8()..];
+        if let Some(semicolon) = after_ampersand.find(';') {
+            let entity = &after_ampersand[..semicolon];
+            if let Some(decoded) = decode_xml_entity(entity) {
+                match &mut output {
+                    Some(output) => output.push_str(before_ampersand),
+                    None => {
+                        let mut new_output = String::with_capacity(value.len());
+                        new_output.push_str(&value[..copied_len + ampersand]);
+                        output = Some(new_output);
+                    }
+                }
+                let output = output.as_mut().expect("output is initialized above");
+                output.push(decoded);
+                let consumed_len = ampersand + '&'.len_utf8() + semicolon + ';'.len_utf8();
+                copied_len += consumed_len;
+                rest = &rest[consumed_len..];
+                continue;
+            }
+        }
+
+        if let Some(output) = &mut output {
+            output.push_str(before_ampersand);
+            output.push('&');
+        }
+        let consumed_len = ampersand + '&'.len_utf8();
+        copied_len += consumed_len;
+        rest = after_ampersand;
+    }
+
+    if let Some(mut output) = output {
+        output.push_str(rest);
+        Cow::Owned(output)
+    } else {
+        Cow::Borrowed(value)
+    }
+}
+
+fn decode_xml_entity(entity: &str) -> Option<char> {
+    match entity {
+        "amp" => Some('&'),
+        "lt" => Some('<'),
+        "gt" => Some('>'),
+        "quot" => Some('"'),
+        "apos" => Some('\''),
+        entity if entity.starts_with("#x") || entity.starts_with("#X") => {
+            u32::from_str_radix(&entity[2..], 16).ok().and_then(char::from_u32)
+        }
+        entity if entity.starts_with('#') => {
+            entity[1..].parse::<u32>().ok().and_then(char::from_u32)
+        }
+        _ => None,
+    }
+}
+
+/// Streaming lexical state for a top-level JSON object.
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub(super) struct JsonObjectScanState {
+    object_depth: usize,
+    array_depth: usize,
+    in_string: bool,
+    escape: bool,
+    phase: JsonObjectScanPhase,
+}
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+enum JsonObjectScanPhase {
+    #[default]
+    Initial,
+    Scanning,
+    Complete,
+}
+
+impl JsonObjectScanState {
+    /// Returns whether the top-level JSON object has closed.
+    pub(super) const fn complete(&self) -> bool {
+        matches!(self.phase, JsonObjectScanPhase::Complete)
+    }
+}
+
+/// Parse a raw top-level JSON object argument prefix.
+///
+/// The returned length is safe to emit as raw argument text. This scans only
+/// lexical boundaries from `{` through the matching `}`, preserving
+/// malformed-but-balanced JSON without deserializing or normalizing it.
+pub(super) fn take_json_object(
+    input: &mut Partial<&str>,
+    state: &mut JsonObjectScanState,
+) -> ModalResult<usize> {
+    let text = **input;
+    if text.is_empty() {
+        return incomplete();
+    }
+    if state.complete() {
+        return Err(json_scan_error(
+            "JSON object argument",
+            StrContextValue::Description("active JSON object scan"),
+        ));
+    }
+
+    let bytes = text.as_bytes();
+    let just_started = matches!(state.phase, JsonObjectScanPhase::Initial);
+    if just_started {
+        if bytes[0] != b'{' {
+            return Err(json_scan_error(
+                "JSON object argument",
+                StrContextValue::CharLiteral('{'),
+            ));
+        }
+        state.phase = JsonObjectScanPhase::Scanning;
+        state.object_depth = 1;
+    }
+
+    let mut index = usize::from(just_started);
+
+    while index < bytes.len() {
+        let byte = bytes[index];
+        index += 1;
+
+        if state.in_string {
+            if state.escape {
+                state.escape = false;
+            } else if byte == b'\\' {
+                state.escape = true;
+            } else if byte == b'"' {
+                state.in_string = false;
+            }
+            continue;
+        }
+
+        match byte {
+            b'"' => state.in_string = true,
+            b'{' => state.object_depth += 1,
+            b'}' => {
+                state.object_depth = state.object_depth.checked_sub(1).ok_or_else(|| {
+                    json_scan_error(
+                        "JSON object argument",
+                        StrContextValue::Description("balanced object braces"),
+                    )
+                })?;
+                if state.object_depth == 0 && state.array_depth == 0 {
+                    state.phase = JsonObjectScanPhase::Complete;
+                    input.next_slice(index);
+                    return Ok(index);
+                }
+                if state.object_depth == 0 {
+                    return Err(json_scan_error(
+                        "JSON object argument",
+                        StrContextValue::Description(
+                            "nested arrays to close before the top-level object",
+                        ),
+                    ));
+                }
+            }
+            b'[' => state.array_depth += 1,
+            b']' => {
+                state.array_depth = state.array_depth.checked_sub(1).ok_or_else(|| {
+                    json_scan_error(
+                        "JSON object argument",
+                        StrContextValue::Description("balanced array brackets"),
+                    )
+                })?;
+            }
+            _ => {}
+        }
+    }
+
+    input.next_slice(text.len());
+    Ok(text.len())
+}
+
+/// Parse a JSON string literal.
+pub(super) fn json_str(input: &mut Partial<&str>) -> ModalResult<String> {
+    let text = **input;
+    if text.is_empty() {
+        return incomplete();
+    }
+
+    let bytes = text.as_bytes();
+    if bytes[0] != b'"' {
+        return Err(json_scan_error(
+            "JSON string",
+            StrContextValue::CharLiteral('"'),
+        ));
+    }
+
+    let mut escape = false;
+    let mut index = 1;
+    while index < bytes.len() {
+        let byte = bytes[index];
+        index += 1;
+
+        if escape {
+            escape = false;
+            continue;
+        }
+
+        match byte {
+            b'\\' => escape = true,
+            b'"' => {
+                let raw = &text[..index];
+                let value = serde_json::from_str::<String>(raw).map_err(|_| {
+                    json_scan_error(
+                        "JSON string",
+                        StrContextValue::Description("valid JSON string"),
+                    )
+                })?;
+                input.next_slice(index);
+                return Ok(value);
+            }
+            _ => {}
+        }
+    }
+
+    incomplete()
+}
+
+fn json_scan_error(label: &'static str, expected: StrContextValue) -> ErrMode<ContextError> {
+    let mut error = ContextError::new();
+    error.push(StrContext::Label(label));
+    error.push(StrContext::Expected(expected));
+    ErrMode::Cut(error)
+}
+
+/// Parse one event from a buffered streaming input.
+///
+/// Returns:
+/// - `Ok(Some((event, consumed_len)))` if an event was successfully parsed, along with the number
+///   of bytes consumed from the buffer.
+/// - `Ok(None)` if the buffer does not contain a full event yet, and more data is needed.
+/// - `Err` if a parsing error occurred.
+pub(super) fn parse_buffered_event<E>(
+    buffer: &str,
+    parse: impl FnOnce(&mut Partial<&str>) -> ModalResult<E>,
+) -> Result<Option<(E, usize)>> {
+    let mut input = Partial::new(buffer);
+    let checkpoint = input.checkpoint();
+    let event = match parse(&mut input) {
+        Ok(event) => event,
+        Err(ErrMode::Incomplete(_)) => return Ok(None),
+        Err(ErrMode::Backtrack(e) | ErrMode::Cut(e)) => {
+            // TODO: enrich context for error reporting
+            return Err(parsing_failed!("{}", e));
+        }
+    };
+    let consumed_len = input.offset_from(&checkpoint);
+    if consumed_len == 0 {
+        return Ok(None);
+    }
+
+    Ok(Some((event, consumed_len)))
+}
+
+/// Returns an error indicating that we need more data to continue parsing.
+pub(super) fn incomplete<T>() -> ModalResult<T> {
+    Err(ErrMode::Incomplete(Needed::Unknown))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::borrow::Cow;
+
+    use expect_test::expect;
+    use winnow::error::ErrMode;
+    use winnow::stream::{Offset, Partial, Stream};
+
+    use super::{
+        JsonObjectScanState, json_str, partial_prefix_len, safe_text_len, take_json_object,
+        xml_unescape,
+    };
+
+    #[test]
+    fn partial_prefix_len_handles_ascii_markers() {
+        assert_eq!(
+            partial_prefix_len("hello<|tool", "<|tool_call>"),
+            "<|tool".len()
+        );
+        assert_eq!(partial_prefix_len("hello world", "<|tool_call>"), 0);
+    }
+
+    #[test]
+    fn partial_prefix_len_prefers_longest_overlapping_prefix() {
+        assert_eq!(partial_prefix_len("chunk ending in aba", "ababa"), 3);
+    }
+
+    #[test]
+    fn partial_prefix_len_handles_unicode_markers() {
+        let token = "<｜DSML｜function_calls>";
+        assert_eq!(
+            partial_prefix_len("prefix <｜DSML｜fun", token),
+            "<｜DSML｜fun".len()
+        );
+        assert_eq!(partial_prefix_len("prefix <｜DSML", token), "<｜DSML".len());
+    }
+
+    #[test]
+    fn safe_text_len_stops_before_marker() {
+        let mut input = Partial::new("hello<tool_call>");
+        let checkpoint = input.checkpoint();
+
+        let len = safe_text_len(&mut input, "<tool_call>").unwrap();
+
+        assert_eq!(len, "hello".len());
+        assert_eq!(input.offset_from(&checkpoint), "hello".len());
+    }
+
+    #[test]
+    fn safe_text_len_holds_back_partial_marker() {
+        let mut input = Partial::new("hello<tool");
+        let checkpoint = input.checkpoint();
+
+        let len = safe_text_len(&mut input, "<tool_call>").unwrap();
+
+        assert_eq!(len, "hello".len());
+        assert_eq!(input.offset_from(&checkpoint), "hello".len());
+    }
+
+    #[test]
+    fn safe_text_len_reports_incomplete_for_only_partial_marker() {
+        let mut input = Partial::new("<tool");
+
+        let error = safe_text_len(&mut input, "<tool_call>").unwrap_err();
+
+        assert!(matches!(error, ErrMode::Incomplete(_)));
+    }
+
+    #[test]
+    fn xml_unescape_decodes_common_entities() {
+        assert_eq!(
+            xml_unescape("&lt;tag attr=&quot;value&quot;&gt;Tom &amp; Jerry&apos;s&lt;/tag&gt;"),
+            r#"<tag attr="value">Tom & Jerry's</tag>"#
+        );
+    }
+
+    #[test]
+    fn xml_unescape_decodes_numeric_entities() {
+        assert_eq!(xml_unescape("&#60;tag&#x3E;&#x1F600;"), "<tag>😀");
+    }
+
+    #[test]
+    fn xml_unescape_preserves_unknown_and_incomplete_entities() {
+        let output = xml_unescape("Tom & Jerry &unknown; &amp");
+
+        assert!(matches!(output, Cow::Borrowed(_)));
+        assert_eq!(output, "Tom & Jerry &unknown; &amp");
+    }
+
+    #[test]
+    fn xml_unescape_borrows_when_no_entity_is_present() {
+        let input = "plain text";
+        let output = xml_unescape(input);
+
+        assert!(matches!(output, Cow::Borrowed(_)));
+        assert_eq!(output, input);
+    }
+
+    #[test]
+    fn take_json_object_consumes_simple_object() {
+        let mut state = JsonObjectScanState::default();
+        let buffer = r#"{"location":"Paris"}<end>"#;
+        let mut input = Partial::new(buffer);
+        let checkpoint = input.checkpoint();
+
+        let len = take_json_object(&mut input, &mut state).unwrap();
+
+        assert_eq!(len, r#"{"location":"Paris"}"#.len());
+        assert_eq!(input.offset_from(&checkpoint), len);
+        assert!(state.complete());
+    }
+
+    #[test]
+    fn take_json_object_tracks_nested_values_and_strings() {
+        let mut state = JsonObjectScanState::default();
+        let arguments = r#"{"nested":{"items":[{"text":"} <|tool_call_end|> \" \\"}]}}"#;
+        let buffer = format!("{arguments}<end>");
+        let mut input = Partial::new(buffer.as_str());
+
+        let len = take_json_object(&mut input, &mut state).unwrap();
+
+        assert_eq!(len, arguments.len());
+        assert!(state.complete());
+    }
+
+    #[test]
+    fn take_json_object_rejects_leading_whitespace() {
+        let mut state = JsonObjectScanState::default();
+        let mut input = Partial::new(" {\"x\":1}");
+
+        let error = take_json_object(&mut input, &mut state).unwrap_err();
+
+        let ErrMode::Cut(error) = error else {
+            panic!("expected cut error");
+        };
+        expect![[r#"
+            invalid JSON object argument
+            expected `{`"#]]
+        .assert_eq(&error.to_string());
+    }
+
+    #[test]
+    fn take_json_object_leaves_trailing_whitespace_to_caller() {
+        let mut state = JsonObjectScanState::default();
+        let mut input = Partial::new("{\"x\":1}\n<end>");
+        let checkpoint = input.checkpoint();
+
+        let len = take_json_object(&mut input, &mut state).unwrap();
+
+        assert_eq!(len, "{\"x\":1}".len());
+        assert_eq!(input.offset_from(&checkpoint), len);
+        assert!(state.complete());
+    }
+
+    #[test]
+    fn take_json_object_continues_across_chunks() {
+        let mut state = JsonObjectScanState::default();
+        let chunks = [
+            r#"{"text":"literal "#,
+            r#"<|tool_call_end|>"#,
+            r#" inside"}<end>"#,
+        ];
+        let mut collected = String::new();
+
+        for chunk in chunks {
+            let mut input = Partial::new(chunk);
+            let len = take_json_object(&mut input, &mut state).unwrap();
+            collected.push_str(&chunk[..len]);
+        }
+
+        assert_eq!(collected, r#"{"text":"literal <|tool_call_end|> inside"}"#);
+        assert!(state.complete());
+    }
+
+    #[test]
+    fn take_json_object_rejects_non_object_top_level() {
+        let mut state = JsonObjectScanState::default();
+        let mut input = Partial::new(r#"[{"x":1}]"#);
+
+        let error = take_json_object(&mut input, &mut state).unwrap_err();
+
+        let ErrMode::Cut(error) = error else {
+            panic!("expected cut error");
+        };
+        expect![[r#"
+            invalid JSON object argument
+            expected `{`"#]]
+        .assert_eq(&error.to_string());
+    }
+
+    #[test]
+    fn take_json_object_reports_unbalanced_array() {
+        let mut state = JsonObjectScanState::default();
+        let mut input = Partial::new(r#"{"x":]}"#);
+
+        let error = take_json_object(&mut input, &mut state).unwrap_err();
+
+        let ErrMode::Cut(error) = error else {
+            panic!("expected cut error");
+        };
+        expect![[r#"
+            invalid JSON object argument
+            expected balanced array brackets"#]]
+        .assert_eq(&error.to_string());
+    }
+
+    #[test]
+    fn take_json_object_reports_top_level_close_before_nested_array() {
+        let mut state = JsonObjectScanState::default();
+        let mut input = Partial::new(r#"{"x":[}"#);
+
+        let error = take_json_object(&mut input, &mut state).unwrap_err();
+
+        let ErrMode::Cut(error) = error else {
+            panic!("expected cut error");
+        };
+        expect![[r#"
+            invalid JSON object argument
+            expected nested arrays to close before the top-level object"#]]
+        .assert_eq(&error.to_string());
+    }
+
+    #[test]
+    fn json_str_decodes_escaped_content() {
+        let mut input = Partial::new(r#""say_\"hi\u0021" rest"#);
+
+        let value = json_str(&mut input).unwrap();
+
+        assert_eq!(value, "say_\"hi!");
+        assert_eq!(*input, " rest");
+    }
+
+    #[test]
+    fn json_str_reports_incomplete_escaped_string() {
+        let mut input = Partial::new(r#""say_\"#);
+
+        let error = json_str(&mut input).unwrap_err();
+
+        assert!(matches!(error, ErrMode::Incomplete(_)));
+    }
+}
diff --git a/setup.py b/setup.py
index 74997702950e..b0b5337a9254 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,8 @@
 from packaging.version import Version, parse
 from setuptools import Extension, setup
 from setuptools.command.build_ext import build_ext
+from setuptools_rust import Binding, RustExtension
+from setuptools_rust.build import build_rust
 from setuptools_scm import get_version
 from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME
 
@@ -33,11 +35,24 @@ def load_module_from_path(module_name, path):
 ROOT_DIR = Path(__file__).parent
 logger = logging.getLogger(__name__)
 
+PRECOMPILED_RUST_FRONTEND_PATH = ROOT_DIR / "vllm" / "vllm-rs"
+
 # cannot import envs directly because it depends on vllm,
 #  which is not installed yet
 envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm", "envs.py"))
 
 VLLM_TARGET_DEVICE = envs.VLLM_TARGET_DEVICE
+USE_PRECOMPILED_EXTENSIONS = envs.VLLM_USE_PRECOMPILED
+# VLLM_USE_PRECOMPILED implies precompiled rust frontend too.
+USE_PRECOMPILED_RUST_FRONTEND = (
+    envs.VLLM_USE_PRECOMPILED or envs.VLLM_USE_PRECOMPILED_RUST
+)
+
+
+def should_require_rust_frontend() -> bool:
+    value = os.getenv("VLLM_REQUIRE_RUST_FRONTEND", "")
+    return value.lower() not in ("", "0", "false", "no")
+
 
 if sys.platform.startswith("darwin") and VLLM_TARGET_DEVICE != "cpu":
     logger.warning("VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS")
@@ -379,6 +394,20 @@ def run(self):
                 dirs_exist_ok=True,
             )
 
+        if _is_cuda():
+            # copy vendored deep_gemm package from build_lib to source tree
+            # for editable installs
+            deep_gemm_build = os.path.join(
+                self.build_lib, "vllm", "third_party", "deep_gemm"
+            )
+            if os.path.exists(deep_gemm_build):
+                print(f"Copying {deep_gemm_build} to vllm/third_party/deep_gemm")
+                shutil.copytree(
+                    deep_gemm_build,
+                    "vllm/third_party/deep_gemm",
+                    dirs_exist_ok=True,
+                )
+
 
 class precompiled_build_ext(build_ext):
     """Disables extension building when using precompiled binaries."""
@@ -391,6 +420,24 @@ def build_extensions(self) -> None:
         return
 
 
+class precompiled_build_rust(build_rust):
+    """Skips local Rust builds when the precompiled wheel already ships vllm-rs."""
+
+    def run(self) -> None:
+        if PRECOMPILED_RUST_FRONTEND_PATH.exists():
+            logger.info(
+                "Skipping local Rust build: using precompiled %s",
+                PRECOMPILED_RUST_FRONTEND_PATH,
+            )
+            return
+
+        logger.warning(
+            "Precompiled wheel did not provide %s; falling back to local Rust build.",
+            PRECOMPILED_RUST_FRONTEND_PATH,
+        )
+        super().run()
+
+
 class precompiled_wheel_utils:
     """Extracts libraries and other files from an existing wheel."""
 
@@ -639,7 +686,11 @@ def determine_wheel_url() -> tuple[str, str | None]:
 
     @staticmethod
     def extract_precompiled_and_patch_package(
-        wheel_url_or_path: str, download_filename: str | None
+        wheel_url_or_path: str,
+        download_filename: str | None,
+        *,
+        extract_extensions: bool,
+        extract_rust_frontend: bool,
     ) -> dict:
         import tempfile
         import zipfile
@@ -662,43 +713,63 @@ def extract_precompiled_and_patch_package(
             package_data_patch = {}
 
             with zipfile.ZipFile(wheel_path) as wheel:
-                files_to_copy = [
-                    "vllm/_C.abi3.so",
-                    "vllm/_C_stable_libtorch.abi3.so",
-                    "vllm/_moe_C.abi3.so",
-                    "vllm/_flashmla_C.abi3.so",
-                    "vllm/_flashmla_extension_C.abi3.so",
-                    "vllm/_sparse_flashmla_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
-                    "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
-                    "vllm/cumem_allocator.abi3.so",
-                    # ROCm-specific libraries
-                    "vllm/_rocm_C.abi3.so",
-                ]
+                exact_members = set()
+                if extract_extensions:
+                    exact_members.update(
+                        {
+                            "vllm/_C.abi3.so",
+                            "vllm/_C_stable_libtorch.abi3.so",
+                            "vllm/_moe_C.abi3.so",
+                            "vllm/_flashmla_C.abi3.so",
+                            "vllm/_flashmla_extension_C.abi3.so",
+                            "vllm/_sparse_flashmla_C.abi3.so",
+                            "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
+                            "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
+                            "vllm/cumem_allocator.abi3.so",
+                            "vllm/spinloop.abi3.so",
+                            # ROCm-specific libraries
+                            "vllm/_rocm_C.abi3.so",
+                        }
+                    )
+                if extract_rust_frontend:
+                    exact_members.add("vllm/vllm-rs")
 
                 flash_attn_regex = re.compile(
                     r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
                 )
+                # __init__.py and flash_attn_interface.py are source-controlled
+                # in vllm and should not be overwritten (matches cmake exclusions)
+                flash_attn_files_to_skip = {
+                    "vllm/vllm_flash_attn/__init__.py",
+                    "vllm/vllm_flash_attn/flash_attn_interface.py",
+                }
                 triton_kernels_regex = re.compile(
                     r"vllm/third_party/triton_kernels/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
                 )
                 flashmla_regex = re.compile(
                     r"vllm/third_party/flashmla/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
                 )
-                file_members = list(
-                    filter(lambda x: x.filename in files_to_copy, wheel.filelist)
-                )
-                file_members += list(
-                    filter(lambda x: flash_attn_regex.match(x.filename), wheel.filelist)
-                )
-                file_members += list(
-                    filter(
-                        lambda x: triton_kernels_regex.match(x.filename), wheel.filelist
-                    )
-                )
-                file_members += list(
-                    filter(lambda x: flashmla_regex.match(x.filename), wheel.filelist)
-                )
+                # DeepGEMM: extract all files (.py, .so, .cuh, .h, .hpp, etc.)
+                deep_gemm_regex = re.compile(r"vllm/third_party/deep_gemm/.*")
+                file_members = []
+                for member in wheel.filelist:
+                    if member.filename in exact_members:
+                        file_members.append(member)
+                        continue
+
+                    if not extract_extensions:
+                        continue
+
+                    if (
+                        (
+                            flash_attn_regex.match(member.filename)
+                            and member.filename not in flash_attn_files_to_skip
+                        )
+                        or triton_kernels_regex.match(member.filename)
+                        or flashmla_regex.match(member.filename)
+                        or deep_gemm_regex.match(member.filename)
+                    ):
+                        file_members.append(member)
 
                 for file in file_members:
                     print(f"[extract] {file.filename}")
@@ -709,6 +780,9 @@ def extract_precompiled_and_patch_package(
                         open(target_path, "wb") as dst,
                     ):
                         shutil.copyfileobj(src, dst)
+                    mode = file.external_attr >> 16
+                    if mode:
+                        os.chmod(target_path, mode)
 
                     pkg = os.path.dirname(file.filename).replace("/", ".")
                     package_data_patch.setdefault(pkg, []).append(
@@ -881,7 +955,7 @@ def get_vllm_version() -> str:
         if envs.VLLM_TARGET_DEVICE == "empty":
             version += f"{sep}empty"
     elif _is_cuda():
-        if envs.VLLM_USE_PRECOMPILED and not envs.VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX:
+        if USE_PRECOMPILED_EXTENSIONS and not envs.VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX:
             version += f"{sep}precompiled"
         else:
             cuda_version = str(get_nvcc_cuda_version())
@@ -898,7 +972,9 @@ def get_vllm_version() -> str:
     elif _is_tpu():
         version += f"{sep}tpu"
     elif _is_cpu():
-        if envs.VLLM_TARGET_DEVICE == "cpu":
+        # Check the local VLLM_TARGET_DEVICE (may be set by auto-detect above),
+        # not envs.VLLM_TARGET_DEVICE, so CPU-only hosts still get `+cpu`.
+        if VLLM_TARGET_DEVICE == "cpu":
             version += f"{sep}cpu"
     elif _is_xpu():
         version += f"{sep}xpu"
@@ -938,6 +1014,9 @@ def _read_requirements(filename: str) -> list[str]:
                 # vllm-flash-attn is built only for CUDA 12.x.
                 # Skip for other versions.
                 continue
+            if "nvidia-cutlass-dsl[cu13]" in req and cuda_major == "12":
+                # [cu13] extra is the default; strip it on CUDA 12 builds.
+                req = req.replace("nvidia-cutlass-dsl[cu13]", "nvidia-cutlass-dsl")
             modified_requirements.append(req)
         requirements = modified_requirements
     elif _is_hip():
@@ -962,12 +1041,14 @@ def _read_requirements(filename: str) -> list[str]:
     # copying the relevant .py files from the source repository.
     ext_modules.append(CMakeExtension(name="vllm.triton_kernels", optional=True))
 
+ext_modules.append(CMakeExtension(name="vllm.spinloop"))
+
 if _is_hip():
     ext_modules.append(CMakeExtension(name="vllm._rocm_C"))
 
 if _is_cuda():
     ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C"))
-    if envs.VLLM_USE_PRECOMPILED or (
+    if USE_PRECOMPILED_EXTENSIONS or (
         CUDA_HOME and get_nvcc_cuda_version() >= Version("12.3")
     ):
         # FA3 requires CUDA 12.3 or later
@@ -977,7 +1058,7 @@ def _read_requirements(filename: str) -> list[str]:
     ext_modules.append(
         CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa4_cutedsl_C", optional=True)
     )
-    if envs.VLLM_USE_PRECOMPILED or (
+    if USE_PRECOMPILED_EXTENSIONS or (
         CUDA_HOME and get_nvcc_cuda_version() >= Version("12.9")
     ):
         # FlashMLA requires CUDA 12.9 or later
@@ -987,6 +1068,12 @@ def _read_requirements(filename: str) -> list[str]:
         ext_modules.append(
             CMakeExtension(name="vllm._flashmla_extension_C", optional=True)
         )
+    if envs.VLLM_USE_PRECOMPILED or (
+        CUDA_HOME and get_nvcc_cuda_version() >= Version("12.3")
+    ):
+        # DeepGEMM requires CUDA 12.3+ (SM90/SM100)
+        # Optional since it won't build on unsupported architectures
+        ext_modules.append(CMakeExtension(name="vllm._deep_gemm_C", optional=True))
 
 if _is_cpu():
     import platform
@@ -1000,9 +1087,7 @@ def _read_requirements(filename: str) -> list[str]:
 
 if _build_custom_ops():
     ext_modules.append(CMakeExtension(name="vllm._C"))
-    # also _is_hip() once https://github.com/vllm-project/vllm/issues/35163 is
-    # fixed
-    if _is_cuda():
+    if _is_cuda() or _is_hip():
         ext_modules.append(CMakeExtension(name="vllm._C_stable_libtorch"))
 
 package_data = {
@@ -1013,19 +1098,34 @@ def _read_requirements(filename: str) -> list[str]:
         "model_executor/layers/quantization/utils/configs/*.json",
         "entrypoints/serve/instrumentator/static/*.js",
         "entrypoints/serve/instrumentator/static/*.css",
+        "distributed/kv_transfer/kv_connector/v1/hf3fs/utils/*.cpp",
+        # DeepGEMM JIT include headers (vendored via cmake)
+        "third_party/deep_gemm/include/**/*.cuh",
+        "third_party/deep_gemm/include/**/*.h",
+        "third_party/deep_gemm/include/**/*.hpp",
     ]
 }
 
 
-# If using precompiled, extract and patch package_data (in advance of setup)
-if envs.VLLM_USE_PRECOMPILED:
+# If using precompiled artifacts, extract and patch package_data in advance.
+if USE_PRECOMPILED_RUST_FRONTEND:
     wheel_url, download_filename = precompiled_wheel_utils.determine_wheel_url()
     patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
-        wheel_url, download_filename
+        wheel_url,
+        download_filename,
+        extract_extensions=USE_PRECOMPILED_EXTENSIONS,
+        extract_rust_frontend=True,
     )
     for pkg, files in patch.items():
         package_data.setdefault(pkg, []).extend(files)
 
+# If the rust frontend binary is already present in the source tree (e.g.,
+# pre-built in a separate Docker build stage), ship it as-is.
+if PRECOMPILED_RUST_FRONTEND_PATH.exists():
+    vllm_files = package_data.setdefault("vllm", [])
+    if "vllm-rs" not in vllm_files:
+        vllm_files.append("vllm-rs")
+
 if _no_device():
     ext_modules = []
 
@@ -1034,18 +1134,38 @@ def _read_requirements(filename: str) -> list[str]:
 else:
     cmdclass = {
         "build_ext": precompiled_build_ext
-        if envs.VLLM_USE_PRECOMPILED
+        if USE_PRECOMPILED_EXTENSIONS
         else cmake_build_ext,
     }
+if USE_PRECOMPILED_RUST_FRONTEND or PRECOMPILED_RUST_FRONTEND_PATH.exists():
+    cmdclass["build_rust"] = precompiled_build_rust
+
+# Rust frontend binary, built via setuptools-rust and installed into the
+# package directory alongside the Python modules.
+# TODO: we may use `RustBin` to directly install it into `bin` directory, but this
+# requires extra work on using precompiled binaries.
+rust_extensions = [
+    RustExtension(
+        target="vllm.vllm-rs",
+        path="rust/src/cmd/Cargo.toml",
+        args=["--bin", "vllm-rs"],
+        features=["native-tls-vendored"],
+        binding=Binding.Exec,
+        optional=not should_require_rust_frontend(),
+    ),
+]
 
 setup(
     # static metadata should rather go in pyproject.toml
     version=get_vllm_version(),
     ext_modules=ext_modules,
+    rust_extensions=rust_extensions,
     install_requires=get_requirements(),
     extras_require={
         # AMD Zen CPU optimizations via zentorch
-        "zen": ["zentorch"],
+        "zen": [
+            "zentorch-weekly==5.2.1.dev20260408"
+        ],  # Zentorch has weekly releases. This pulls the known-good version.
         "bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy", "plotly"],
         "tensorizer": ["tensorizer==2.10.1"],
         "fastsafetensors": ["fastsafetensors >= 0.2.2"],
@@ -1053,19 +1173,19 @@ def _read_requirements(filename: str) -> list[str]:
         "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
         "audio": [
             "av",
-            "resampy",
             "scipy",
             "soundfile",
             "mistral_common[audio]",
         ],  # Required for audio processing
         "video": [],  # Kept for backwards compatibility
         "flashinfer": [],  # Kept for backwards compatibility
-        # Optional deps for AMD FP4 quantization support
-        "petit-kernel": ["petit-kernel"],
         # Optional deps for Helion kernel development
-        "helion": ["helion==0.3.2"],
+        # NOTE: When updating helion version, also update CI files:
+        #   - .buildkite/test_areas/kernels.yaml
+        #   - .buildkite/test-amd.yaml
+        "helion": ["helion==1.0.0"],
         # Optional deps for gRPC server (vllm serve --grpc)
-        "grpc": ["smg-grpc-servicer[vllm] >= 0.5.0"],
+        "grpc": ["smg-grpc-servicer[vllm] >= 0.5.2"],
         # Optional deps for OpenTelemetry tracing
         "otel": [
             "opentelemetry-sdk>=1.26.0",
@@ -1073,6 +1193,11 @@ def _read_requirements(filename: str) -> list[str]:
             "opentelemetry-exporter-otlp>=1.26.0",
             "opentelemetry-semantic-conventions-ai>=0.4.1",
         ],
+        "triton-cpu": [
+            "triton @ "
+            "git+https://github.com/triton-lang/triton-cpu.git@270e696d ; "
+            "platform_machine == 'x86_64'",
+        ],  # Remove after stable release
     },
     cmdclass=cmdclass,
     package_data=package_data,
diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py
index b1a16cfcaba4..8d8f87f0a3c6 100644
--- a/tests/basic_correctness/test_cumem.py
+++ b/tests/basic_correctness/test_cumem.py
@@ -13,6 +13,8 @@
 
 from ..utils import create_new_process_for_each_test, requires_fp8
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @create_new_process_for_each_test("fork" if not current_platform.is_rocm() else "spawn")
 def test_python_error():
@@ -26,13 +28,13 @@ def test_python_error():
     tensors = []
     with allocator.use_memory_pool():
         # allocate 70% of the total memory
-        x = torch.empty(alloc_bytes, dtype=torch.uint8, device="cuda")
+        x = torch.empty(alloc_bytes, dtype=torch.uint8, device=DEVICE_TYPE)
         tensors.append(x)
     # release the memory
     allocator.sleep()
 
     # allocate more memory than the total memory
-    y = torch.empty(alloc_bytes, dtype=torch.uint8, device="cuda")
+    y = torch.empty(alloc_bytes, dtype=torch.uint8, device=DEVICE_TYPE)
     tensors.append(y)
     with pytest.raises(RuntimeError):
         # when the allocator is woken up, it should raise an error
@@ -44,17 +46,17 @@ def test_python_error():
 def test_basic_cumem():
     # some tensors from default memory pool
     shape = (1024, 1024)
-    x = torch.empty(shape, device="cuda")
+    x = torch.empty(shape, device=DEVICE_TYPE)
     x.zero_()
 
     # some tensors from custom memory pool
     allocator = CuMemAllocator.get_instance()
     with allocator.use_memory_pool():
         # custom memory pool
-        y = torch.empty(shape, device="cuda")
+        y = torch.empty(shape, device=DEVICE_TYPE)
         y.zero_()
         y += 1
-        z = torch.empty(shape, device="cuda")
+        z = torch.empty(shape, device=DEVICE_TYPE)
         z.zero_()
         z += 2
 
@@ -77,16 +79,16 @@ def test_basic_cumem():
 def test_cumem_with_cudagraph():
     allocator = CuMemAllocator.get_instance()
     with allocator.use_memory_pool():
-        weight = torch.eye(1024, device="cuda")
+        weight = torch.eye(1024, device=DEVICE_TYPE)
     with allocator.use_memory_pool(tag="discard"):
-        cache = torch.empty(1024, 1024, device="cuda")
+        cache = torch.empty(1024, 1024, device=DEVICE_TYPE)
 
     def model(x):
         out = x @ weight
         cache[: out.size(0)].copy_(out)
         return out + 1
 
-    x = torch.empty(128, 1024, device="cuda")
+    x = torch.empty(128, 1024, device=DEVICE_TYPE)
 
     # warmup
     model(x)
diff --git a/tests/benchmarks/test_custom_dataset_seed.py b/tests/benchmarks/test_custom_dataset_seed.py
new file mode 100644
index 000000000000..dac87e6e6d98
--- /dev/null
+++ b/tests/benchmarks/test_custom_dataset_seed.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import argparse
+import json
+from pathlib import Path
+
+import pytest
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+
+from vllm.benchmarks.datasets import get_samples
+
+
+@pytest.fixture(scope="session")
+def hf_tokenizer() -> PreTrainedTokenizerBase:
+    return AutoTokenizer.from_pretrained("gpt2")
+
+
+def _write_jsonl(path: Path, n_rows: int) -> None:
+    with path.open("w") as f:
+        for i in range(n_rows):
+            f.write(json.dumps({"prompt": f"row {i}: unique prompt content."}) + "\n")
+
+
+def _args_for_custom(dataset_path: str, seed: int) -> argparse.Namespace:
+    return argparse.Namespace(
+        dataset_name="custom",
+        dataset_path=dataset_path,
+        disable_shuffle=False,
+        num_prompts=30,
+        custom_output_len=32,
+        skip_chat_template=True,
+        no_oversample=False,
+        seed=seed,
+        request_id_prefix="",
+    )
+
+
+@pytest.mark.benchmark
+def test_custom_dataset_seed_propagates(
+    hf_tokenizer: PreTrainedTokenizerBase, tmp_path: Path
+) -> None:
+    """--seed must control the CustomDataset shuffle used by get_samples.
+
+    Without the fix, CustomDataset was instantiated without random_seed,
+    so its load-time shuffle always used DEFAULT_SEED=0 regardless of
+    args.seed, causing every run with --dataset-name custom to pick the
+    same subset of rows from a larger file.
+    """
+    jsonl = tmp_path / "data.jsonl"
+    _write_jsonl(jsonl, n_rows=60)
+
+    samples_a = get_samples(_args_for_custom(str(jsonl), seed=0), hf_tokenizer)
+    samples_b = get_samples(_args_for_custom(str(jsonl), seed=42), hf_tokenizer)
+
+    prompts_a = {s.prompt for s in samples_a}
+    prompts_b = {s.prompt for s in samples_b}
+
+    assert len(prompts_a) == 30
+    assert len(prompts_b) == 30
+    assert prompts_a != prompts_b
+
+
+@pytest.mark.benchmark
+def test_custom_dataset_same_seed_is_deterministic(
+    hf_tokenizer: PreTrainedTokenizerBase, tmp_path: Path
+) -> None:
+    """Same --seed must yield the same CustomDataset subset."""
+    jsonl = tmp_path / "data.jsonl"
+    _write_jsonl(jsonl, n_rows=60)
+
+    samples_a = get_samples(_args_for_custom(str(jsonl), seed=7), hf_tokenizer)
+    samples_b = get_samples(_args_for_custom(str(jsonl), seed=7), hf_tokenizer)
+
+    prompts_a = [s.prompt for s in samples_a]
+    prompts_b = [s.prompt for s in samples_b]
+
+    assert prompts_a == prompts_b
diff --git a/tests/benchmarks/test_sampling_params.py b/tests/benchmarks/test_sampling_params.py
new file mode 100644
index 000000000000..3bc34a84b377
--- /dev/null
+++ b/tests/benchmarks/test_sampling_params.py
@@ -0,0 +1,258 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import numpy as np
+import pytest
+
+from vllm.benchmarks.datasets.utils import get_sampling_params
+from vllm.tokenizers import TokenizerLike
+
+
+class _FakeTokenizer(TokenizerLike):
+    """Minimal tokenizer implementing the TokenizerLike protocol
+    for testing get_sampling_params."""
+
+    def __init__(self, vocab_size: int = 1000, num_special_tokens: int = 0) -> None:
+        self._vocab_size = vocab_size
+        self._num_special_tokens = num_special_tokens
+
+    # -- Properties required by TokenizerLike --
+
+    @classmethod
+    def from_pretrained(cls, path_or_repo_id, *a, **kw):  # type: ignore[override]
+        return cls()
+
+    @property
+    def vocab_size(self) -> int:
+        return self._vocab_size
+
+    @property
+    def all_special_tokens(self) -> list[str]:
+        return []
+
+    @property
+    def all_special_ids(self) -> list[int]:
+        return []
+
+    @property
+    def bos_token_id(self) -> int:
+        return 0
+
+    @property
+    def eos_token_id(self) -> int:
+        return 1
+
+    @property
+    def pad_token_id(self) -> int:
+        return 2
+
+    @property
+    def is_fast(self) -> bool:
+        return False
+
+    @property
+    def max_token_id(self) -> int:
+        return self._vocab_size - 1
+
+    @property
+    def max_chars_per_token(self) -> int:
+        return 4
+
+    @property
+    def truncation_side(self) -> str:
+        return "right"
+
+    def num_special_tokens_to_add(self) -> int:
+        return self._num_special_tokens
+
+    def __call__(self, text, text_pair=None, **kw):  # type: ignore[override]
+        raise NotImplementedError
+
+    def get_vocab(self) -> dict[str, int]:
+        return {}
+
+    def get_added_vocab(self) -> dict[str, int]:
+        return {}
+
+    def encode(self, text, **kw) -> list[int]:  # type: ignore[override]
+        raise NotImplementedError
+
+    def apply_chat_template(self, messages, **kw):  # type: ignore[override]
+        raise NotImplementedError
+
+    def convert_tokens_to_ids(self, tokens):  # type: ignore[override]
+        raise NotImplementedError
+
+    def convert_tokens_to_string(self, tokens: list[str]) -> str:
+        raise NotImplementedError
+
+    def decode(self, ids, skip_special_tokens: bool = False) -> str:  # type: ignore[override]
+        raise NotImplementedError
+
+    def convert_ids_to_tokens(  # type: ignore[override]
+        self, ids, skip_special_tokens: bool = False
+    ) -> list[str]:
+        raise NotImplementedError
+
+
+class TestGetSamplingParams:
+    """Tests for ``get_sampling_params`` in ``vllm.benchmarks.datasets.shared``."""
+
+    # -- helpers --
+
+    @staticmethod
+    def _tok(vocab_size: int = 1000, num_special: int = 0) -> _FakeTokenizer:
+        return _FakeTokenizer(vocab_size=vocab_size, num_special_tokens=num_special)
+
+    # -- return shape / dtype --
+
+    def test_returns_three_arrays(self):
+        rng = np.random.default_rng(0)
+        result = get_sampling_params(rng, 5, 0.0, 100, 50, self._tok())
+        assert len(result) == 3
+        for arr in result:
+            assert isinstance(arr, np.ndarray)
+
+    @pytest.mark.parametrize("n", [1, 10, 100])
+    def test_output_length_matches_num_requests(self, n: int):
+        rng = np.random.default_rng(42)
+        input_lens, output_lens, offsets = get_sampling_params(
+            rng, n, 0.0, 64, 32, self._tok()
+        )
+        assert input_lens.shape == (n,)
+        assert output_lens.shape == (n,)
+        assert offsets.shape == (n,)
+
+    # -- fixed lengths (range_ratio = 0) --
+
+    def test_zero_range_ratio_gives_constant_lengths(self):
+        rng = np.random.default_rng(7)
+        input_lens, output_lens, _ = get_sampling_params(
+            rng, 20, 0.0, 128, 64, self._tok()
+        )
+        assert np.all(input_lens == 128)
+        assert np.all(output_lens == 64)
+
+    def test_special_tokens_subtracted_from_input_only(self):
+        rng = np.random.default_rng(7)
+        input_lens, output_lens, _ = get_sampling_params(
+            rng, 10, 0.0, 100, 50, self._tok(num_special=4)
+        )
+        # real_input_len = 100 - 4 = 96, range_ratio 0 → all 96
+        assert np.all(input_lens == 96)
+        # special tokens are not subtracted from output length
+        assert np.all(output_lens == 50)
+
+    # -- range ratios --
+
+    def test_input_range_bounds(self):
+        rng = np.random.default_rng(0)
+        ratio = 0.5
+        base = 200
+        input_lens, _, _ = get_sampling_params(
+            rng, 500, {"input": ratio, "output": 0.0}, base, 50, self._tok()
+        )
+        lo = int(np.floor(base * (1 - ratio)))
+        hi = int(np.ceil(base * (1 + ratio)))
+        assert np.all(input_lens >= lo)
+        assert np.all(input_lens <= hi)
+
+    def test_output_range_bounds(self):
+        rng = np.random.default_rng(0)
+        ratio = 0.3
+        base = 100
+        _, output_lens, _ = get_sampling_params(
+            rng, 500, {"input": 0.0, "output": ratio}, 50, base, self._tok()
+        )
+        lo = max(1, int(np.floor(base * (1 - ratio))))
+        hi = int(np.ceil(base * (1 + ratio)))
+        assert np.all(output_lens >= lo)
+        assert np.all(output_lens <= hi)
+
+    def test_output_low_clamped_to_one(self):
+        """Even with a high ratio that would push output_low to 0,
+        the function clamps it to 1."""
+        rng = np.random.default_rng(0)
+        # output_len=1, ratio=0.99 → floor(1*0.01)=0, should clamp to 1
+        _, output_lens, _ = get_sampling_params(
+            rng, 50, {"input": 0.0, "output": 0.99}, 100, 1, self._tok()
+        )
+        assert np.all(output_lens >= 1)
+
+    # -- offsets bounded by vocab_size --
+
+    @pytest.mark.parametrize("vocab", [100, 32000, 128256])
+    def test_offsets_within_vocab(self, vocab: int):
+        rng = np.random.default_rng(0)
+        _, _, offsets = get_sampling_params(
+            rng, 200, 0.0, 64, 32, self._tok(vocab_size=vocab)
+        )
+        assert np.all(offsets >= 0)
+        assert np.all(offsets < vocab)
+
+    # -- reproducibility --
+
+    def test_same_seed_same_results(self):
+        tok = self._tok()
+        rr = {"input": 0.3, "output": 0.2}
+        a = get_sampling_params(np.random.default_rng(42), 50, rr, 256, 64, tok)
+        b = get_sampling_params(np.random.default_rng(42), 50, rr, 256, 64, tok)
+        for arr_a, arr_b in zip(a, b):
+            np.testing.assert_array_equal(arr_a, arr_b)
+
+    def test_different_seed_different_results(self):
+        tok = self._tok()
+        rr = {"input": 0.3, "output": 0.2}
+        a = get_sampling_params(np.random.default_rng(0), 50, rr, 256, 64, tok)
+        b = get_sampling_params(np.random.default_rng(1), 50, rr, 256, 64, tok)
+        # Extremely unlikely all three arrays match with different seeds
+        assert not all(np.array_equal(arr_a, arr_b) for arr_a, arr_b in zip(a, b))
+
+    # -- validation / error paths --
+
+    @pytest.mark.parametrize("bad_ratio", [-0.1, 1.0, 1.5])
+    def test_invalid_input_range_ratio(self, bad_ratio: float):
+        rng = np.random.default_rng(0)
+        with pytest.raises(ValueError, match="input_range_ratio"):
+            get_sampling_params(
+                rng, 10, {"input": bad_ratio, "output": 0.0}, 100, 50, self._tok()
+            )
+
+    @pytest.mark.parametrize("bad_ratio", [-0.1, 1.0, 1.5])
+    def test_invalid_output_range_ratio(self, bad_ratio: float):
+        rng = np.random.default_rng(0)
+        with pytest.raises(ValueError, match="output_range_ratio"):
+            get_sampling_params(
+                rng, 10, {"input": 0.0, "output": bad_ratio}, 100, 50, self._tok()
+            )
+
+    def test_invalid_dict_missing_keys(self):
+        rng = np.random.default_rng(0)
+        with pytest.raises(ValueError, match="input.*output"):
+            get_sampling_params(rng, 10, {"input": 0.1}, 100, 50, self._tok())
+
+    def test_input_len_zero_with_special_tokens(self):
+        """input_len < num_special_tokens → real_input_len = 0, which is fine
+        (range [0, 0])."""
+        rng = np.random.default_rng(0)
+        input_lens, _, _ = get_sampling_params(
+            rng, 5, 0.0, 5, 50, self._tok(num_special=10)
+        )
+        # real_input_len = max(0, 5 - 10) = 0
+        assert np.all(input_lens == 0)
+
+    # -- edge cases --
+
+    def test_single_request(self):
+        rng = np.random.default_rng(0)
+        i, o, off = get_sampling_params(rng, 1, 0.0, 100, 50, self._tok())
+        assert i.shape == (1,)
+        assert o.shape == (1,)
+        assert off.shape == (1,)
+
+    def test_large_num_requests(self):
+        rng = np.random.default_rng(0)
+        i, o, off = get_sampling_params(rng, 10_000, 0.5, 512, 128, self._tok())
+        assert i.shape == (10_000,)
+        assert o.shape == (10_000,)
+        assert off.shape == (10_000,)
diff --git a/tests/benchmarks/test_txt_slices_dataset.py b/tests/benchmarks/test_txt_slices_dataset.py
new file mode 100644
index 000000000000..7821e9a925a2
--- /dev/null
+++ b/tests/benchmarks/test_txt_slices_dataset.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
+from pathlib import Path
+
+import pytest
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+
+from vllm.benchmarks.datasets import CustomDataset
+from vllm.benchmarks.datasets.create_txt_slices_dataset import create_txt_slices_jsonl
+
+
+@pytest.fixture(scope="session")
+def hf_tokenizer() -> PreTrainedTokenizerBase:
+    # Use a small, commonly available tokenizer
+    return AutoTokenizer.from_pretrained("gpt2")
+
+
+text_content = """
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud
+exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat
+nulla pariatur. Excepteur sint occaecat cupidatat non proident,
+sunt in culpa qui officia deserunt mollit anim id est laborum.
+"""
+
+
+@pytest.mark.benchmark
+def test_create_txt_slices_jsonl(
+    hf_tokenizer: PreTrainedTokenizerBase, tmp_path: Path
+) -> None:
+    """Test that create_txt_slices_jsonl produces valid JSONL for CustomDataset."""
+    txt_path = tmp_path / "input.txt"
+    jsonl_path = tmp_path / "input.txt.jsonl"
+
+    txt_path.write_text(text_content)
+
+    create_txt_slices_jsonl(
+        input_path=str(txt_path),
+        output_path=str(jsonl_path),
+        tokenizer_name="gpt2",
+        num_prompts=10,
+        input_len=10,
+        output_len=10,
+    )
+
+    # Verify the JSONL file is valid and has the expected structure
+    records = [json.loads(line) for line in jsonl_path.read_text().splitlines()]
+
+    assert len(records) == 10
+    for record in records:
+        assert "prompt" in record
+        assert "output_tokens" in record
+        assert isinstance(record["prompt"], str)
+        assert record["output_tokens"] == 10
+
+    # Verify the JSONL file can be loaded by CustomDataset
+    dataset = CustomDataset(dataset_path=str(jsonl_path))
+    samples = dataset.sample(
+        tokenizer=hf_tokenizer,
+        num_requests=10,
+        output_len=10,
+        skip_chat_template=True,
+    )
+
+    assert len(samples) == 10
+    assert all(sample.expected_output_len == 10 for sample in samples)
diff --git a/tests/compile/backend.py b/tests/compile/backend.py
index ec4685324661..87f98946a8ad 100644
--- a/tests/compile/backend.py
+++ b/tests/compile/backend.py
@@ -8,14 +8,21 @@
 
 import depyf
 from torch import fx
-from torch._ops import OpOverload
+from torch._ops import OpOverload, OpOverloadPacket
 from torch.fx._utils import lazy_format_graph_code
 
 from vllm.compilation.passes.fx_utils import find_op_nodes
-from vllm.compilation.passes.inductor_pass import InductorPass
+from vllm.compilation.passes.inductor_pass import (
+    InductorPass,
+    pass_context,
+)
+from vllm.compilation.passes.ir.inplace_functionalization import (
+    VllmIRInplaceFunctionalizationPass,
+)
 from vllm.compilation.passes.pass_manager import with_pattern_match_debug
 from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
 from vllm.config import VllmConfig, get_current_vllm_config
+from vllm.config.utils import Range
 from vllm.logger import init_logger
 
 logger = init_logger("vllm.tests.compile.backend")
@@ -53,11 +60,17 @@ def __init__(self, *passes: InductorPass | Callable[[fx.Graph], None]):
         self.custom_passes = list(passes)
         vllm_config = get_current_vllm_config()
         compile_config = vllm_config.compilation_config
+        self.range = Range(1, vllm_config.scheduler_config.max_num_batched_tokens)
         # Deepcopy to allow multiple TestBackend instances to use the same VllmConfig
         self.inductor_config = deepcopy(compile_config.inductor_compile_config)
         self.inductor_config["force_disable_caches"] = True
         self.inductor_config["post_grad_custom_post_pass"] = self.post_pass
 
+        # Add VllmIRInplaceFunctionalizationPass as pre-grad pass by default
+        self.inductor_config["pre_grad_custom_pass"] = (
+            VllmIRInplaceFunctionalizationPass(vllm_config)
+        )
+
         if debug_dump_path := vllm_config.compile_debug_dump_path():
             logger.debug("Dumping depyf output to %s", debug_dump_path)
             self.debug_ctx = depyf.prepare_debug(debug_dump_path.as_posix())
@@ -68,7 +81,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs):
         self.graph_pre_compile = deepcopy(graph)
         from torch._inductor.compile_fx import compile_fx
 
-        with self.debug_ctx:
+        with self.debug_ctx, pass_context(self.range):
             return compile_fx(
                 graph, example_inputs, config_patches=self.inductor_config
             )
@@ -90,7 +103,9 @@ def post_pass(self, graph: fx.Graph):
         # assign by reference, will reflect the final state of the graph
         self.final_graph = graph
 
-    def check_before_ops(self, ops: Sequence[OpOverload], fully_replaced=True):
+    def check_before_ops(
+        self, ops: Sequence[OpOverload | OpOverloadPacket], fully_replaced=True
+    ):
         for op in ops:
             num_pre = len(list(find_op_nodes(op, self.graph_pre_pass)))
             num_post = len(list(find_op_nodes(op, self.graph_post_pass)))
@@ -99,13 +114,19 @@ def check_before_ops(self, ops: Sequence[OpOverload], fully_replaced=True):
             if fully_replaced:
                 assert num_post == 0, f"Unexpected op {op.name()} in post-pass graph"
 
-    def check_after_ops(self, ops: Sequence[OpOverload]):
+    def check_after_ops(self, ops: Sequence[OpOverload | OpOverloadPacket]):
         for op in ops:
             num_pre = len(list(find_op_nodes(op, self.graph_pre_pass)))
             num_post = len(list(find_op_nodes(op, self.graph_post_pass)))
             assert num_pre == 0, f"Unexpected op {op.name()} in pre-pass graph"
             assert num_post > 0, f"Op {op.name()} not found in post-pass graph"
 
-    def op_count(self, op: OpOverload, before=False) -> int:
+    def op_count(self, op: OpOverload | OpOverloadPacket, before=False) -> int:
         graph = self.graph_pre_pass if before else self.graph_post_pass
         return len(list(find_op_nodes(op, graph)))
+
+    def print_graphs(self):
+        print("=== Graph before custom passes ===")
+        print(self.graph_pre_pass.python_code(root_module="self", verbose=True).src)
+        print("=== Graph after custom passes ===")
+        print(self.graph_post_pass.python_code(root_module="self", verbose=True).src)
diff --git a/tests/compile/conftest.py b/tests/compile/conftest.py
index 6aafac7bcad3..1263cce04c6c 100644
--- a/tests/compile/conftest.py
+++ b/tests/compile/conftest.py
@@ -24,10 +24,24 @@ def test_something(mock_cuda_platform):
     def _mock_platform(is_cuda: bool = True, capability: tuple[int, int] | None = None):
         mock_platform = MagicMock()
         mock_platform.is_cuda.return_value = is_cuda
-        if capability is not None:
-            mock_platform.get_device_capability.return_value = DeviceCapability(
-                *capability
+        device_capability = (
+            DeviceCapability(*capability) if capability is not None else None
+        )
+        mock_platform.get_device_capability.return_value = device_capability
+
+        def is_device_capability_family(
+            requested_capability: int, device_id: int = 0
+        ) -> bool:
+            current_capability = mock_platform.get_device_capability(
+                device_id=device_id
             )
+            if current_capability is None:
+                return False
+            return current_capability.major == (requested_capability // 10)
+
+        mock_platform.is_device_capability_family.side_effect = (
+            is_device_capability_family
+        )
         with patch("vllm.platforms.current_platform", mock_platform):
             yield mock_platform
 
diff --git a/tests/compile/correctness_e2e/test_async_tp.py b/tests/compile/correctness_e2e/test_async_tp.py
index 3539e4d5abb4..28c7eb6fbc25 100644
--- a/tests/compile/correctness_e2e/test_async_tp.py
+++ b/tests/compile/correctness_e2e/test_async_tp.py
@@ -13,6 +13,17 @@
 from vllm.config import (
     CompilationMode,
 )
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import has_flashinfer
+
+NVFP4_MODEL_ID = "nvidia/Llama-3.1-8B-Instruct-NVFP4"
+NVFP4_HF_OVERRIDES = {
+    "num_hidden_layers": 4,
+    "hidden_size": 512,
+    "intermediate_size": 800,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 1,
+}
 
 
 @create_new_process_for_each_test()
@@ -81,4 +92,78 @@ def test_async_tp_pass_correctness(
         "mp",
     ]
 
-    compare_two_settings(model_id, async_tp_args, tp_args, method="generate")
+    compare_two_settings(
+        model_id,
+        async_tp_args,
+        tp_args,
+        method="generate",
+        force_v1_runner=True,
+    )
+
+
+@create_new_process_for_each_test()
+def test_async_tp_pass_nvfp4_correctness(num_gpus_available: int, monkeypatch):
+    if (
+        not current_platform.is_cuda()
+        or not current_platform.is_device_capability_family(100)
+    ):
+        pytest.skip("NVFP4 requires Blackwell")
+    if not has_flashinfer():
+        pytest.skip("FlashInfer is required for the NVFP4 AsyncTP path")
+
+    monkeypatch.setenv("VLLM_NVFP4_GEMM_BACKEND", "flashinfer-cutlass")
+
+    tp_size = 2
+    if num_gpus_available < tp_size:
+        pytest.skip(f"Need at least {tp_size} GPUs")
+
+    common_args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "8",
+        "--load-format",
+        "dummy",
+        "--hf-overrides",
+        json.dumps(NVFP4_HF_OVERRIDES),
+    ]
+
+    compilation_config = {
+        "mode": CompilationMode.VLLM_COMPILE,
+        "compile_sizes": [2, 4, 8],
+        "splitting_ops": [],
+        "pass_config": {
+            "enable_sp": True,
+            "fuse_gemm_comms": True,
+            "fuse_allreduce_rms": False,
+            "sp_min_token_num": 1,
+        },
+    }
+
+    async_tp_args = [
+        *common_args,
+        "--tensor-parallel-size",
+        str(tp_size),
+        "--distributed-executor-backend",
+        "mp",
+        "--compilation_config",
+        json.dumps(compilation_config),
+    ]
+
+    tp_args = [
+        *common_args,
+        "--tensor-parallel-size",
+        str(tp_size),
+        "--distributed-executor-backend",
+        "mp",
+    ]
+
+    compare_two_settings(
+        NVFP4_MODEL_ID,
+        async_tp_args,
+        tp_args,
+        method="generate",
+        force_v1_runner=True,
+    )
diff --git a/tests/compile/correctness_e2e/test_sequence_parallel.py b/tests/compile/correctness_e2e/test_sequence_parallel.py
index 281ffbfd2ec8..e320f5a11208 100644
--- a/tests/compile/correctness_e2e/test_sequence_parallel.py
+++ b/tests/compile/correctness_e2e/test_sequence_parallel.py
@@ -21,12 +21,14 @@
 from vllm.platforms import current_platform
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
-from ...models.registry import HF_EXAMPLE_MODELS
+from ...models.registry import HF_EXAMPLE_MODELS, _HfExamplesInfo
 from ...utils import compare_two_settings, create_new_process_for_each_test
 
 logger = init_logger("test_sequence_parallel")
 
 VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1"
+NVFP4_MODEL_ID = "nvidia/Llama-3.1-8B-Instruct-NVFP4"
+NVFP4_MODEL_INFO = _HfExamplesInfo(NVFP4_MODEL_ID)
 
 
 class ParallelSetup(NamedTuple):
@@ -41,6 +43,7 @@ class ParallelSetup(NamedTuple):
 class SPTestOptions(NamedTuple):
     multi_node_only: bool
     load_format: str | None = None
+    model_info: _HfExamplesInfo | None = None
 
 
 @dataclass
@@ -167,9 +170,11 @@ def _compare_sp(
     num_gpus_available: int,
     use_inductor_graph_partition: bool,
     fuse_gemm_comms: bool,
+    enable_prompt_embeds: bool,
     *,
     method: Literal["generate", "encode"],
     is_multimodal: bool,
+    dtype: str = "float16",
 ):
     (
         tp_size,
@@ -180,14 +185,15 @@ def _compare_sp(
         chunked_prefill,
     ) = parallel_setup
 
-    multi_node_only, load_format = test_options
+    multi_node_only = test_options.multi_node_only
+    load_format = test_options.load_format
 
-    model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id)
+    model_info = test_options.model_info or HF_EXAMPLE_MODELS.find_hf_info(model_id)
     model_info.check_transformers_version(on_fail="skip")
 
     trust_remote_code = model_info.trust_remote_code
     tokenizer_mode = model_info.tokenizer_mode
-    hf_overrides = model_info.hf_overrides
+    hf_overrides = dict(model_info.hf_overrides)
     require_embed_inputs = model_info.require_embed_inputs
 
     if load_format == "dummy":
@@ -220,7 +226,7 @@ def _compare_sp(
     common_args = [
         # use half precision for speed and memory savings in CI environment
         "--dtype",
-        "float16",
+        dtype,
         "--max-model-len",
         "2048",
         "--max-num-seqs",
@@ -248,6 +254,8 @@ def _compare_sp(
                 "--enable-mm-embeds",
             ]
         )
+    elif enable_prompt_embeds:
+        common_args.append("--enable-prompt-embeds")
 
     compilation_config = {
         "mode": CompilationMode.VLLM_COMPILE,
@@ -257,10 +265,14 @@ def _compare_sp(
             "fuse_gemm_comms": fuse_gemm_comms,
             "fuse_norm_quant": fuse_norm_quant,
             "fuse_act_quant": fuse_act_quant,
+            "fuse_allreduce_rms": False,
             "eliminate_noops": True,
+            "sp_min_token_num": 0,
         },
         "use_inductor_graph_partition": use_inductor_graph_partition,
     }
+    if not use_inductor_graph_partition:
+        compilation_config["splitting_ops"] = []
 
     tp_sp_args = [
         *common_args,
@@ -282,7 +294,13 @@ def _compare_sp(
         "mp",
     ]
 
-    compare_two_settings(model_id, tp_sp_args, tp_args, method=method)
+    compare_two_settings(
+        model_id,
+        tp_sp_args,
+        tp_args,
+        method=method,
+        force_v1_runner=True,
+    )
 
 
 SP_TEXT_GENERATION_MODELS = {
@@ -347,6 +365,84 @@ def test_tp_sp_generation(
         num_gpus_available,
         use_inductor_graph_partition,
         fuse_gemm_comms=fuse_gemm_comms,
+        enable_prompt_embeds=False,
+        method="generate",
+        is_multimodal=False,
+    )
+
+
+# Focused regression test for the SP + prompt_embeds graph-rewrite path.
+# Covers pp_size=1 (SP only) and pp_size=2 (SP + PP); kept small on purpose so
+# we don't double the matrix of `test_tp_sp_generation` above.
+SP_PROMPT_EMBEDS_PARALLEL_SETUPS = [
+    ParallelSetup(
+        tp_size=2,
+        pp_size=pp_size,
+        fuse_norm_quant=False,
+        fuse_act_quant=False,
+        eager_mode=False,
+        chunked_prefill=False,
+    )
+    for pp_size in [1, 2]
+]
+
+
+@pytest.mark.parametrize("parallel_setup", SP_PROMPT_EMBEDS_PARALLEL_SETUPS)
+@pytest.mark.parametrize("use_inductor_graph_partition", [True, False])
+@create_new_process_for_each_test()
+def test_tp_sp_generation_prompt_embeds(
+    parallel_setup: ParallelSetup,
+    num_gpus_available,
+    use_inductor_graph_partition: bool,
+):
+    if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
+        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
+
+    _compare_sp(
+        "hmellor/tiny-random-LlamaForCausalLM",
+        parallel_setup,
+        distributed_backend="mp",
+        runner="auto",
+        test_options=SPTestOptions(multi_node_only=False, load_format=None),
+        num_gpus_available=num_gpus_available,
+        use_inductor_graph_partition=use_inductor_graph_partition,
+        fuse_gemm_comms=False,
+        enable_prompt_embeds=True,
+        method="generate",
+        is_multimodal=False,
+    )
+
+
+@create_new_process_for_each_test()
+def test_tp_sp_nvfp4_generation(num_gpus_available: int):
+    if (
+        not current_platform.is_cuda()
+        or not current_platform.is_device_capability_family(100)
+    ):
+        pytest.skip("NVFP4 requires Blackwell")
+
+    _compare_sp(
+        NVFP4_MODEL_ID,
+        ParallelSetup(
+            tp_size=2,
+            pp_size=1,
+            fuse_norm_quant=True,
+            fuse_act_quant=True,
+            eager_mode=True,
+            chunked_prefill=False,
+        ),
+        "mp",
+        "auto",
+        SPTestOptions(
+            multi_node_only=False,
+            load_format="dummy",
+            model_info=NVFP4_MODEL_INFO,
+        ),
+        num_gpus_available,
+        use_inductor_graph_partition=False,
+        fuse_gemm_comms=False,
+        enable_prompt_embeds=False,
         method="generate",
         is_multimodal=False,
+        dtype="bfloat16",
     )
diff --git a/tests/compile/fullgraph/test_basic_correctness.py b/tests/compile/fullgraph/test_basic_correctness.py
index 33645699556b..35989dcde1dc 100644
--- a/tests/compile/fullgraph/test_basic_correctness.py
+++ b/tests/compile/fullgraph/test_basic_correctness.py
@@ -6,7 +6,6 @@
 
 from vllm.config import CompilationMode
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 from ...utils import compare_all_settings
 
@@ -109,10 +108,10 @@ def test_compile_correctness(
     tp_size = test_setting.tp_size
     attn_backend = test_setting.attn_backend
     method = test_setting.method
-    if cuda_device_count_stateless() < pp_size * tp_size:
+    if current_platform.device_count() < pp_size * tp_size:
         pytest.skip(
             f"Need at least {pp_size}*{tp_size} CUDA gpus but got "
-            f"{cuda_device_count_stateless()}"
+            f"{current_platform.device_count()}"
         )
 
     final_args = [
@@ -146,6 +145,7 @@ def test_compile_correctness(
             all_args,
             all_envs,
             method=method if method != "generate" else "generate_close",
+            force_v1_runner=True,
         )
         all_envs.clear()
         all_args.clear()
@@ -159,4 +159,4 @@ def test_compile_correctness(
         all_args.append(final_args + [f"-cc.mode={mode.name}", "-cc.backend=eager"])
         all_envs.append({})
 
-    compare_all_settings(model, all_args, all_envs, method=method)
+    compare_all_settings(model, all_args, all_envs, method=method, force_v1_runner=True)
diff --git a/tests/compile/fullgraph/test_full_cudagraph.py b/tests/compile/fullgraph/test_full_cudagraph.py
index c7c737371fc3..95306e2062f8 100644
--- a/tests/compile/fullgraph/test_full_cudagraph.py
+++ b/tests/compile/fullgraph/test_full_cudagraph.py
@@ -170,14 +170,3 @@ def test_full_cudagraph(self, batch_size, max_tokens, llm_pair: tuple[LLM, LLM])
                 piecewise_res.outputs[0].text.lower()
                 == full_res.outputs[0].text.lower()
             )
-
-
-@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
-def test_full_cudagraph_with_invalid_backend():
-    # Flex_Attention is not supported with full cuda graph
-    with pytest.raises(RuntimeError):
-        LLM(
-            model="Qwen/Qwen2-1.5B-Instruct",
-            compilation_config=CompilationConfig(cudagraph_mode="FULL"),
-            attention_config={"backend": "FLEX_ATTENTION"},
-        )
diff --git a/tests/compile/fullgraph/test_simple.py b/tests/compile/fullgraph/test_simple.py
index ed9c7a351e42..f1ea0e414d76 100644
--- a/tests/compile/fullgraph/test_simple.py
+++ b/tests/compile/fullgraph/test_simple.py
@@ -161,7 +161,14 @@ def _run_simple_model(
 @pytest.mark.parametrize("intermediate_unbacked", [True, False])
 @torch.inference_mode()
 @create_new_process_for_each_test("spawn")
-def test_simple_piecewise_compile(backend, intermediate_unbacked):
+def test_simple_piecewise_compile(backend, intermediate_unbacked, monkeypatch):
+    # `intermediate_unbacked` flips a control-flow branch inside
+    # `SillyModel.forward`, but the AOT-compile cache key only hashes the
+    # forward function's qualname + line number, so both parametrize variants
+    # share the same cache slot. Disabling the cache forces each variant to
+    # compile fresh; otherwise the second-running variant loads the first's
+    # artifact and segfaults with an illegal memory access.
+    monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
     _run_simple_model(
         splitting_ops=["silly::attention"],
         use_inductor_graph_partition=False,
diff --git a/tests/compile/fullgraph/test_toy_llama.py b/tests/compile/fullgraph/test_toy_llama.py
index 915fbc6ce7f3..69c758702e8a 100644
--- a/tests/compile/fullgraph/test_toy_llama.py
+++ b/tests/compile/fullgraph/test_toy_llama.py
@@ -17,7 +17,6 @@
 import torch
 from torch import nn
 
-from vllm.compilation.counter import compilation_counter
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import (
     CompilationConfig,
@@ -340,6 +339,8 @@ def run_model(llama_config, compile_config: CompilationConfig) -> torch.Tensor:
 def test_toy_llama(
     backend: str, use_inductor_graph_partition: bool, monkeypatch, tmp_path
 ):
+    from vllm.compilation.counter import compilation_counter
+
     # We disable the vLLM compile cache into a new tmp dir for 1 reason:
     # 1. To make sure we can properly track the number of Inductor compilations.
     monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
diff --git a/tests/compile/fusions_e2e/conftest.py b/tests/compile/fusions_e2e/conftest.py
index 7cd2acdf56c2..3a060874720d 100644
--- a/tests/compile/fusions_e2e/conftest.py
+++ b/tests/compile/fusions_e2e/conftest.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import logging
+from collections import defaultdict
 
 import pytest
 import regex as re
@@ -52,6 +53,16 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
         llm.llm_engine.vllm_config.compilation_config.compile_ranges_endpoints
     )
 
+    # Fetch match table from each worker via RPC and sum across workers.
+    worker_tables = llm.llm_engine.engine_core.collective_rpc(
+        "get_compilation_match_table"
+    )
+    combined: defaultdict[str, int] = defaultdict(int)
+    for table in worker_tables:
+        for k, v in table.items():
+            combined[k] += v
+    return dict(combined)
+
 
 @pytest.fixture
 def run_e2e_fusion_test(monkeypatch, caplog_mp_spawn):
@@ -73,21 +84,24 @@ def run(
         rocm_aiter_ops.refresh_env_variables()
 
         # Filter here to reduce code duplication
+        backend_name = attn_backend.backend.name.lower()
         requires_mla = "deepseek" in model_name.lower()
-        is_mla = "mla" in attn_backend.backend.name.lower()
+        is_mla = "mla" in backend_name
+        # DeepSeek V3.2 uses sparse MLA
+        requires_sparse = "v3.2" in model_name.lower()
+        is_sparse = "sparse" in backend_name
 
-        if requires_mla != is_mla:
+        if requires_mla != is_mla or requires_sparse != is_sparse:
             pytest.skip(
                 f"Incompatible model '{model_name}' and "
                 f"attention backend '{attn_backend.backend.name}'"
             )
 
-        # TODO: remove this after finishing migration from envs to model kwargs
-        if model_name == "openai/gpt-oss-20b":
-            from .common import is_blackwell
+        if attn_backend.backend.name == "FLASHINFER":
+            from vllm.utils.flashinfer import supports_trtllm_attention
 
-            if is_blackwell():
-                monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
+            if not supports_trtllm_attention():
+                matches = matches._replace(attn_quant_fusion=0)
 
         # Disable, compile cache to make sure custom passes run.
         # Otherwise, we can't verify fusion happened through the logs.
@@ -101,6 +115,27 @@ def run(
         model_kwargs["attention_config"] = {"backend": attn_backend.backend.name}
         model_kwargs["tensor_parallel_size"] = tp_size
 
+        # Cap warmup memory: tests use small max_model_len (1024) but the
+        # engine default max_num_batched_tokens is 16384. Warming up large
+        # models (e.g. Llama-4-Scout-FP8) at 16384 tokens may trigger OOM.
+        model_kwargs.setdefault("max_num_batched_tokens", 8192)
+
+        # Sparse MLA models (DSv3.2) hit an over-strict inductor assertion in
+        # decompose_auto_functionalized when +rotary_embedding is forced into
+        # the compile graph. Disable qk_norm+rope fusion (which auto-enables
+        # +rotary_embedding) for this combo to avoid the known torch bug.
+        # TODO: remove once upstream torch fix lands.
+        if requires_sparse:
+            if "pass_config" in compilation_config:
+                compilation_config["pass_config"].enable_qk_norm_rope_fusion = False
+                matches_check = [m for m in matches_check if m != "norm_rope_fusion"]
+            # DSv3.2 sparse indexer uses persistent_topk with k=config.index_topk
+            # (2048 for the default config). max_model_len must be >= index_topk
+            # or the topk kernel raises "k out of range" at runtime.
+            model_kwargs["max_model_len"] = max(
+                model_kwargs.get("max_model_len", 0), 2048
+            )
+
         # Always compile the full graph instead of piecewise
         if not compilation_config["use_inductor_graph_partition"]:
             compilation_config["splitting_ops"] = []
@@ -113,7 +148,7 @@ def run(
         )
 
         with caplog_mp_spawn(logging.DEBUG) as log_holder:
-            run_model(full_compilation_config, model_name, **model_kwargs)
+            match_table = run_model(full_compilation_config, model_name, **model_kwargs)
 
         num_compile_ranges = len(full_compilation_config.get_compile_ranges())
         assert num_compile_ranges in [1, 2, 3]
@@ -155,11 +190,14 @@ def run(
             else:
                 num_ranges_activated = num_compile_ranges
 
+            # TODO: Remove log counting in unit tests
+            # once all matchers implement VllmFusionPatternMatcherPass
             n_expected = tp_size * num_ranges_activated
-            assert len(log_matches) == n_expected, (
-                f"Could not find {n_expected} {match_name} "
-                f"(found {len(log_matches)}) in:\n {log_holder.text}"
-            )
+            if match_name not in ("attn_quant_fusion", "act_quant_fusion"):
+                assert len(log_matches) == n_expected, (
+                    f"Could not find {n_expected} {match_name} "
+                    f"(found {len(log_matches)}) in:\n {log_holder.text}"
+                )
 
             expected_matches = getattr(matches, match_name)
 
@@ -215,6 +253,21 @@ def run(
                     f"{tp_size * (num_ranges_activated - 1)} large-range "
                     f"entries (SP took precedence), found: {log_matches}"
                 )
+
+            elif match_name == "act_quant_fusion":
+                actual_match = match_table.get("activation_quant_fusion_pass", 0)
+                assert actual_match == expected_matches * n_expected, (
+                    f"Could not find {expected_matches * n_expected} "
+                    f"{match_name} (found {actual_match})."
+                )
+            elif match_name == "attn_quant_fusion":
+                actual_match = match_table.get(
+                    "attn_quant_fusion", 0
+                ) + match_table.get("mla_attn_quant_fusion", 0)
+                assert actual_match == expected_matches * n_expected, (
+                    f"Could not find {expected_matches * n_expected} "
+                    f"{match_name} (found {actual_match})."
+                )
             else:
                 expected_matches_list = [expected_matches] * n_expected
                 assert sorted(log_matches) == expected_matches_list, (
diff --git a/tests/compile/fusions_e2e/models.py b/tests/compile/fusions_e2e/models.py
index 1a5f18cc0d50..32f1ea350637 100644
--- a/tests/compile/fusions_e2e/models.py
+++ b/tests/compile/fusions_e2e/models.py
@@ -58,6 +58,18 @@
     id="TRITON_MLA",
 )
 
+FLASHMLA_SPARSE_ATTN = pytest.param(
+    AttentionBackendCase(
+        backend=AttentionBackendEnum.FLASHMLA_SPARSE,
+        model_kwargs=dict(kv_cache_dtype="fp8_ds_mla"),
+    ),
+    id="FLASHMLA_SPARSE",
+    marks=pytest.mark.skipif(
+        not is_blackwell(),
+        reason="FlashMLA Sparse requires Blackwell",
+    ),
+)
+
 # Models
 llama3_8b = ModelFusionInfo(
     model_name="meta-llama/Llama-3.1-8B-Instruct",
@@ -141,6 +153,18 @@
     ),
 )
 
+deepseek_coder_v2_lite_fp8 = ModelFusionInfo(
+    model_name="RedHatAI/DeepSeek-Coder-V2-Lite-Instruct-FP8",
+    matches=lambda n_layers: Matches(
+        # first_k_dense_replace=1; MoE hides most rms+quant sites
+        rms_quant_fusion=1,
+        act_quant_fusion=min(1, n_layers),  # dense layers only
+        # MLA attn + static FP8 quant
+        attn_quant_fusion=n_layers,
+        ar_rms_fusion=n_layers * 2 + 1,
+    ),
+)
+
 deepseek_v3_fp8 = ModelFusionInfo(
     model_name="deepseek-ai/DeepSeek-V3",
     matches=lambda n_layers: Matches(
@@ -150,12 +174,10 @@
         # - post_attn_layernorm + MLP
         # 2 per MoE layer (remaining) due to MoE wrapping
         rms_quant_fusion=n_layers * 2 + min(3, n_layers),  # add for 3 dense layers
-        # TODO silu+block quant
-        #  act_quant_fusion=min(3, n_layers), # dense layers only
-        act_quant_fusion=0,
-        # MLA attn + quant not supported yet:
-        # https://github.com/vllm-project/vllm/issues/35792
-        attn_quant_fusion=0,
+        # silu+block quant
+        act_quant_fusion=min(3, n_layers),  # dense layers only
+        # MLA attn + per-group FP8 quant
+        attn_quant_fusion=n_layers,
         ar_rms_fusion=n_layers * 2 + 1,
         # TODO
         # sequence_parallel= n_layers * 2 + 1,
@@ -163,6 +185,28 @@
     ),
 )
 
+deepseek_r1_fp4 = ModelFusionInfo(
+    model_name="nvidia/DeepSeek-R1-0528-NVFP4-v2",
+    matches=lambda n_layers: Matches(
+        rms_quant_fusion=0,
+        act_quant_fusion=min(3, n_layers),
+        attn_quant_fusion=n_layers,
+        ar_rms_fusion=n_layers * 2 + 1,
+    ),
+)
+
+deepseek_v32_fp4 = ModelFusionInfo(
+    model_name="nvidia/DeepSeek-V3.2-NVFP4",
+    matches=lambda n_layers: Matches(
+        rms_quant_fusion=0,
+        # silu+quant on dense layers only; MoE hides the act+quant site
+        act_quant_fusion=min(3, n_layers),
+        # MLA attn + NVFP4 output quant fuses on sparse MLA output path
+        attn_quant_fusion=n_layers,
+        ar_rms_fusion=n_layers * 2 + 1,
+    ),
+)
+
 gpt_oss_20b = ModelFusionInfo(
     model_name="openai/gpt-oss-20b",
     matches=lambda n_layers: Matches(
@@ -170,4 +214,9 @@
         sequence_parallel=n_layers * 2 + 1,
         async_tp=n_layers * 2,
     ),
+    model_kwargs=(
+        {"quantization_config": {"moe": {"activation": "mxfp8"}}}
+        if is_blackwell()
+        else {}
+    ),
 )
diff --git a/tests/compile/fusions_e2e/test_tp1_quant.py b/tests/compile/fusions_e2e/test_tp1_quant.py
index 8895dadcecc9..fbb382b4458d 100644
--- a/tests/compile/fusions_e2e/test_tp1_quant.py
+++ b/tests/compile/fusions_e2e/test_tp1_quant.py
@@ -18,11 +18,15 @@
 from .models import (
     FLASHINFER_ATTN,
     FLASHINFER_MLA_ATTN,
+    FLASHMLA_SPARSE_ATTN,
     ROCM_AITER_UNIFIED_ATTN,
     ROCM_ATTN,
     TRITON_ATTN,
     TRITON_MLA_ATTN,
+    deepseek_coder_v2_lite_fp8,
+    deepseek_r1_fp4,
     deepseek_v3_fp8,
+    deepseek_v32_fp4,
     llama3_8b_fp4,
     llama3_8b_fp8,
     llama4_scout_fp4,
@@ -37,6 +41,7 @@
         (*llama3_8b_fp8, False),
         (*qwen3_a3b_fp8, False),
         (*qwen3_a3b_fp8, True),
+        (*deepseek_coder_v2_lite_fp8, False),
         (*deepseek_v3_fp8, False),
         (*deepseek_v3_fp8, True),
         pytest.param(
@@ -99,6 +104,8 @@ def test_tp1_fp8_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
+
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
         custom_ops=custom_ops.split(","),
@@ -142,9 +149,12 @@ def test_tp1_fp8_fusions(
 
 @pytest.mark.parametrize(
     "model_name, matches_fn, model_kwargs, hf_overrides",
-    [llama3_8b_fp4, llama4_scout_fp4],
+    [llama3_8b_fp4, llama4_scout_fp4, deepseek_r1_fp4, deepseek_v32_fp4],
+)
+@pytest.mark.parametrize(
+    "attn_backend",
+    [FLASHINFER_ATTN, FLASHINFER_MLA_ATTN, FLASHMLA_SPARSE_ATTN],
 )
-@pytest.mark.parametrize("attn_backend", [FLASHINFER_ATTN])
 @pytest.mark.parametrize("n_layers", [6])
 @pytest.mark.parametrize("custom_ops", custom_ops_combos("rms_norm"))
 @pytest.mark.parametrize("inductor_graph_partition", INDUCTOR_GRAPH_PARTITION)
@@ -166,6 +176,7 @@ def test_tp1_fp4_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
diff --git a/tests/compile/fusions_e2e/test_tp2_ar_rms.py b/tests/compile/fusions_e2e/test_tp2_ar_rms.py
index 301409b2bf6a..b5e2b2dc07ea 100644
--- a/tests/compile/fusions_e2e/test_tp2_ar_rms.py
+++ b/tests/compile/fusions_e2e/test_tp2_ar_rms.py
@@ -18,8 +18,14 @@
 from .models import (
     FLASHINFER_ATTN,
     FLASHINFER_MLA_ATTN,
+    FLASHMLA_SPARSE_ATTN,
+    ROCM_AITER_UNIFIED_ATTN,
+    ROCM_ATTN,
     TRITON_ATTN,
+    deepseek_coder_v2_lite_fp8,
+    deepseek_r1_fp4,
     deepseek_v3_fp8,
+    deepseek_v32_fp4,
     gpt_oss_20b,
     llama3_8b,
     llama3_8b_fp4,
@@ -30,14 +36,22 @@
     qwen3_a3b_fp8,
 )
 
-pytestmark = pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test CUDA")
+pytestmark = pytest.mark.skipif(
+    not current_platform.is_cuda_alike(), reason="Only test CUDA/ROCm"
+)
 
 
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model_name, matches_fn, model_kwargs, hf_overrides",
     # qwen3 & dsv3 should still fuse AR+rms even though group quant is not yet supported
-    [llama3_8b_fp8, llama4_scout_fp8, qwen3_a3b_fp8, deepseek_v3_fp8],
+    [
+        llama3_8b_fp8,
+        llama4_scout_fp8,
+        qwen3_a3b_fp8,
+        deepseek_coder_v2_lite_fp8,
+        deepseek_v3_fp8,
+    ],
 )
 @pytest.mark.parametrize(
     "attn_backend", [TRITON_ATTN, FLASHINFER_ATTN, FLASHINFER_MLA_ATTN]
@@ -45,6 +59,7 @@
 @pytest.mark.parametrize("n_layers", [4])
 @pytest.mark.parametrize("custom_ops", custom_ops_combos("quant_fp8", "rms_norm"))
 @pytest.mark.parametrize("inductor_graph_partition", INDUCTOR_GRAPH_PARTITION)
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test CUDA")
 def test_tp2_ar_rms_fp8_fusions(
     model_name: str,
     matches_fn: Callable[[int], Matches],
@@ -68,6 +83,7 @@ def test_tp2_ar_rms_fp8_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
@@ -103,13 +119,17 @@ def test_tp2_ar_rms_fp8_fusions(
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model_name, matches_fn, model_kwargs, hf_overrides",
-    [llama3_8b_fp4, llama4_scout_fp4],
+    [llama3_8b_fp4, llama4_scout_fp4, deepseek_r1_fp4, deepseek_v32_fp4],
+)
+@pytest.mark.parametrize(
+    "attn_backend",
+    [FLASHINFER_ATTN, FLASHINFER_MLA_ATTN, FLASHMLA_SPARSE_ATTN],
 )
-@pytest.mark.parametrize("attn_backend", [FLASHINFER_ATTN])
 @pytest.mark.parametrize("n_layers", [4])
 @pytest.mark.parametrize("custom_ops", custom_ops_combos("rms_norm"))
 @pytest.mark.parametrize("inductor_graph_partition", INDUCTOR_GRAPH_PARTITION)
 @pytest.mark.skipif(not is_blackwell(), reason="Blackwell required for fp4")
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test CUDA")
 def test_tp2_ar_rms_fp4_fusions(
     model_name: str,
     matches_fn: Callable[[int], Matches],
@@ -128,6 +148,7 @@ def test_tp2_ar_rms_fp4_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
@@ -161,10 +182,19 @@ def test_tp2_ar_rms_fp4_fusions(
     "model_name, matches_fn, model_kwargs, hf_overrides",
     [llama3_8b, qwen3_a3b, gpt_oss_20b],
 )
-@pytest.mark.parametrize("attn_backend", [TRITON_ATTN])
+@pytest.mark.parametrize(
+    "attn_backend",
+    [
+        TRITON_ATTN,
+        FLASHINFER_ATTN,
+        ROCM_ATTN,
+        ROCM_AITER_UNIFIED_ATTN,
+    ],
+)
 @pytest.mark.parametrize("n_layers", [4])
-@pytest.mark.parametrize("custom_ops", custom_ops_combos("rms_norm"))
+@pytest.mark.parametrize("custom_ops", tuple(custom_ops_combos("rms_norm")))
 @pytest.mark.parametrize("inductor_graph_partition", INDUCTOR_GRAPH_PARTITION)
+@pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Only test CUDA/ROCm")
 def test_tp2_ar_rms_fusions(
     model_name: str,
     matches_fn: Callable[[int], Matches],
@@ -182,6 +212,7 @@ def test_tp2_ar_rms_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
@@ -205,4 +236,5 @@ def test_tp2_ar_rms_fusions(
         compilation_config,
         matches_check,
         tp_size=2,
+        use_aiter=current_platform.is_rocm(),
     )
diff --git a/tests/compile/fusions_e2e/test_tp2_async_tp.py b/tests/compile/fusions_e2e/test_tp2_async_tp.py
index 9657d64b88f7..a22c68f4bf92 100644
--- a/tests/compile/fusions_e2e/test_tp2_async_tp.py
+++ b/tests/compile/fusions_e2e/test_tp2_async_tp.py
@@ -19,6 +19,7 @@
     FLASHINFER_ATTN,
     TRITON_ATTN,
     llama3_8b,
+    llama3_8b_fp4,
     llama3_8b_fp8,
     llama4_scout_fp8,
     qwen3_a3b,
@@ -46,18 +47,14 @@ def test_tp2_async_tp_fp8_fusions(
     custom_ops: str,
     inductor_graph_partition: bool,
     run_e2e_fusion_test,
-    monkeypatch,
 ):
     matches = matches_fn(n_layers)
 
-    if is_blackwell():
-        # Disable FlashInfer scaled_mm FP8 as it's not supported in async tp patterns
-        monkeypatch.setenv("VLLM_DISABLED_KERNELS", "FlashInferFP8ScaledMMLinearKernel")
-
     # Reduce size of model and skip weight loading time
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
@@ -95,6 +92,69 @@ def test_tp2_async_tp_fp8_fusions(
     )
 
 
+@multi_gpu_test(num_gpus=2)
+@pytest.mark.parametrize(
+    "model_name, matches_fn, model_kwargs, hf_overrides",
+    [llama3_8b_fp4],
+)
+@pytest.mark.parametrize("attn_backend", [FLASHINFER_ATTN])
+@pytest.mark.parametrize("n_layers", [4])
+@pytest.mark.parametrize("custom_ops", custom_ops_combos("rms_norm"))
+@pytest.mark.parametrize("inductor_graph_partition", INDUCTOR_GRAPH_PARTITION)
+@pytest.mark.skipif(not is_blackwell(), reason="Blackwell required for fp4")
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test CUDA")
+def test_tp2_async_tp_nvfp4_fusions(
+    model_name: str,
+    matches_fn: Callable[[int], Matches],
+    model_kwargs: dict,
+    hf_overrides: Callable[[int], dict],
+    attn_backend: AttentionBackendCase,
+    n_layers: int,
+    custom_ops: str,
+    inductor_graph_partition: bool,
+    run_e2e_fusion_test,
+):
+    # NVFP4 currently wires the all-gather + GEMM path only.
+    matches = matches_fn(n_layers)._replace(async_tp=n_layers * 2)
+
+    # Reduce size of model and skip weight loading time
+    model_kwargs["hf_overrides"] = hf_overrides(n_layers)
+    model_kwargs["load_format"] = "dummy"
+    model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
+
+    compilation_config = dict(
+        use_inductor_graph_partition=inductor_graph_partition,
+        custom_ops=custom_ops.split(","),
+        pass_config=PassConfig(
+            fuse_act_quant=True,
+            fuse_attn_quant=True,
+            enable_sp=True,
+            fuse_gemm_comms=True,
+            fuse_allreduce_rms=False,
+            # Override threshold for testing (models have small hidden_size)
+            sp_min_token_num=512,
+        ),
+    )
+
+    matches_check = [
+        "act_quant_fusion",
+        "attn_quant_fusion",
+        "sequence_parallel",
+        "async_tp",
+    ]
+
+    run_e2e_fusion_test(
+        model_name,
+        matches,
+        model_kwargs,
+        attn_backend,
+        compilation_config,
+        matches_check,
+        tp_size=2,
+    )
+
+
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model_name, matches_fn, model_kwargs, hf_overrides",
@@ -121,6 +181,7 @@ def test_tp2_async_tp_fusions(
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
     model_kwargs["max_model_len"] = 1024
+    model_kwargs["kernel_config"] = {"enable_flashinfer_autotune": False}
 
     compilation_config = dict(
         use_inductor_graph_partition=inductor_graph_partition,
@@ -171,14 +232,9 @@ def test_tp2_sp_ar_rms_fp8_fusions(
     custom_ops: str,
     inductor_graph_partition: bool,
     run_e2e_fusion_test,
-    monkeypatch,
 ):
     matches = matches_fn(n_layers)
 
-    if is_blackwell():
-        # Disable FlashInfer scaled_mm FP8 as it's not supported in async tp patterns
-        monkeypatch.setenv("VLLM_DISABLED_KERNELS", "FlashInferFP8ScaledMMLinearKernel")
-
     # Reduce size of model and skip weight loading time
     model_kwargs["hf_overrides"] = hf_overrides(n_layers)
     model_kwargs["load_format"] = "dummy"
diff --git a/tests/compile/h100/test_startup.py b/tests/compile/h100/test_startup.py
index 6a94322b1b61..78554a3e93da 100644
--- a/tests/compile/h100/test_startup.py
+++ b/tests/compile/h100/test_startup.py
@@ -34,7 +34,10 @@ def _run_vllm(vllm_runner):
             mode=CompilationMode.VLLM_COMPILE,
             cudagraph_mode=CUDAGraphMode.NONE,
         ),
-        num_gpu_blocks_override=8,
+        # Phi-tiny-MoE uses SWA, whose admission cap is `cdiv(L, block_size) + 1`
+        # at default block_size=16 — i.e. 17 blocks for max_model_len=256. Use
+        # 32 for headroom.
+        num_gpu_blocks_override=32,
     ):
         pass
 
@@ -56,6 +59,7 @@ def _cold_start(vllm_runner):
 def test_moe_startup(monkeypatch, vllm_runner, fresh_vllm_cache, mega_aot_artifact):
     monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
     monkeypatch.setenv("VLLM_USE_MEGA_AOT_ARTIFACT", mega_aot_artifact)
+    monkeypatch.setenv("VLLM_DEEP_GEMM_WARMUP", "skip")
 
     # Cold start in a forked child (must fork before CUDA init).
     # This model has 32 identical transformer layers which produce
@@ -135,10 +139,9 @@ class ModelStartupSpec(NamedTuple):
             model="deepseek-ai/DeepSeek-V3.2",
             hf_overrides=_SMALL_MOE_OVERRIDES,
             cold_artifacts_saved=4,
-            # TODO: https://github.com/vllm-project/vllm/issues/38051
-            # We shouldn't be saving any artifacts on warm start.
-            warm_artifacts_saved=4,
-            warm_artifacts_loaded=0,
+            # https://github.com/vllm-project/vllm/issues/38051
+            warm_artifacts_saved=0 if is_torch_equal_or_newer("2.12.0") else 4,
+            warm_artifacts_loaded=4 if is_torch_equal_or_newer("2.12.0") else 0,
         ),
         id="deepseek_v3.2",
     ),
@@ -147,10 +150,9 @@ class ModelStartupSpec(NamedTuple):
             model="moonshotai/Kimi-K2.5",
             hf_overrides={"text_config": _SMALL_MOE_OVERRIDES},
             cold_artifacts_saved=4,
-            # TODO: https://github.com/vllm-project/vllm/issues/38051
-            # We shouldn't be saving any artifacts on warm start.
-            warm_artifacts_saved=4,
-            warm_artifacts_loaded=0,
+            # https://github.com/vllm-project/vllm/issues/38051
+            warm_artifacts_saved=0 if is_torch_equal_or_newer("2.12.0") else 4,
+            warm_artifacts_loaded=4 if is_torch_equal_or_newer("2.12.0") else 0,
         ),
         id="kimi_k2.5",
     ),
@@ -191,7 +193,7 @@ def _run_model(vllm_runner, spec: ModelStartupSpec):
             cudagraph_mode=CUDAGraphMode.NONE,
             pass_config=PassConfig(fuse_allreduce_rms=False),
         ),
-        num_gpu_blocks_override=8,
+        num_gpu_blocks_override=16,
     ):
         pass
 
@@ -237,6 +239,7 @@ def _cold_start_model(vllm_runner, spec: ModelStartupSpec):
 @fork_new_process_for_each_test
 def test_model_startup(monkeypatch, vllm_runner, fresh_vllm_cache, spec):
     monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+    monkeypatch.setenv("VLLM_DEEP_GEMM_WARMUP", "skip")
 
     # Cold start in a forked child (must fork before CUDA init).
     ctx = mp.get_context("fork")
diff --git a/tests/compile/passes/distributed/test_async_tp.py b/tests/compile/passes/distributed/test_async_tp.py
index 7edceee9811e..33e050d776e9 100644
--- a/tests/compile/passes/distributed/test_async_tp.py
+++ b/tests/compile/passes/distributed/test_async_tp.py
@@ -19,6 +19,7 @@
     VllmConfig,
     set_current_vllm_config,
 )
+from vllm.config.utils import Range
 from vllm.distributed import (
     tensor_model_parallel_all_gather,
     tensor_model_parallel_reduce_scatter,
@@ -31,6 +32,7 @@
 from vllm.utils.system_utils import update_environment_variables
 from vllm.utils.torch_utils import set_random_seed
 
+DEVICE_TYPE = current_platform.device_type
 FP8_DTYPE = current_platform.fp8_dtype()
 
 prompts = [
@@ -287,6 +289,22 @@ def run_torch_spawn(fn, nprocs):
     run_torch_spawn(async_tp_pass_on_test_model, num_processes)
 
 
+def test_async_tp_pass_requires_full_graph_compilation():
+    vllm_config = VllmConfig()
+    vllm_config.compilation_config.use_inductor_graph_partition = False
+    vllm_config.compilation_config.splitting_ops = [
+        "vllm::unified_attention_with_output"
+    ]
+
+    async_tp_pass = object.__new__(AsyncTPPass)
+    async_tp_pass.compilation_config = vllm_config.compilation_config
+
+    with pytest.raises(
+        AssertionError, match="AsyncTPPass requires full-graph compilation"
+    ):
+        async_tp_pass.is_applicable_for_range(Range(start=8, end=8))
+
+
 def async_tp_pass_on_test_model(
     local_rank: int,
     world_size: int,
@@ -299,7 +317,7 @@ def async_tp_pass_on_test_model(
 ):
     set_random_seed(0)
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = torch.device(f"{DEVICE_TYPE}:{local_rank}")
     torch.accelerator.set_device_index(device)
     torch.set_default_device(device)
     torch.set_default_dtype(dtype)
@@ -324,7 +342,7 @@ def async_tp_pass_on_test_model(
             fuse_gemm_comms=True,
         ),
     )
-    vllm_config.device_config = DeviceConfig(device=torch.device("cuda"))
+    vllm_config.device_config = DeviceConfig(device=torch.device(DEVICE_TYPE))
 
     # this is a fake model name to construct the model config
     # in the vllm_config, it's not really used.
diff --git a/tests/compile/passes/distributed/test_fusion_all_reduce.py b/tests/compile/passes/distributed/test_fusion_all_reduce.py
index 92e7402c0537..1a175b8dd335 100644
--- a/tests/compile/passes/distributed/test_fusion_all_reduce.py
+++ b/tests/compile/passes/distributed/test_fusion_all_reduce.py
@@ -8,8 +8,12 @@
 import vllm.envs as envs
 from tests.compile.backend import TestBackend
 from tests.utils import TestFP8Layer, has_module_attribute, multi_gpu_test
+from vllm._aiter_ops import IS_AITER_FOUND, rocm_aiter_ops
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-from vllm.compilation.passes.fusion.allreduce_rms_fusion import AllReduceFusionPass
+from vllm.compilation.passes.fusion.allreduce_rms_fusion import (
+    AllReduceFusionPass,
+    RocmAiterAllReduceFusionPass,
+)
 from vllm.compilation.passes.utility.fix_functionalization import (
     FixFunctionalizationPass,
 )
@@ -37,14 +41,24 @@
 from vllm.utils.system_utils import update_environment_variables
 from vllm.utils.torch_utils import set_random_seed
 
+DEVICE_TYPE = current_platform.device_type
+
 
 class TestAllReduceRMSNormModel(torch.nn.Module):
-    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
+    def __init__(
+        self,
+        hidden_size=16,
+        token_num=16,
+        eps=1e-6,
+        dtype: torch.dtype = torch.float16,
+        use_aiter: bool = False,
+    ):
         super().__init__()
         self.hidden_size = hidden_size
         self.eps = eps
         self.norm = [RMSNorm(hidden_size, eps) for i in range(4)]
         self.w = [torch.rand(hidden_size, hidden_size) for _ in range(3)]
+        self.use_aiter = use_aiter
 
     def forward(self, x):
         # avoid having graph input be an arg to a pattern directly
@@ -72,13 +86,17 @@ def ops_in_model_before(self):
         return [torch.ops.vllm.all_reduce.default]
 
     def ops_in_model_after(self):
+        if self.use_aiter:
+            return [rocm_aiter_ops.get_fused_allreduce_rmsnorm_op()]
         return [torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default]
 
 
 class TestAllReduceRMSNormStaticQuantFP8Model(torch.nn.Module):
     quant_key = kFp8StaticTensorSym
 
-    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
+    def __init__(
+        self, hidden_size=16, token_num=16, eps=1e-6, dtype: torch.dtype = torch.float16
+    ):
         super().__init__()
         self.hidden_size = hidden_size
         self.eps = eps
@@ -88,6 +106,7 @@ def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
                 weight_shape=(hidden_size, hidden_size),
                 activation_quant_key=self.quant_key,
                 weight_quant_key=self.quant_key,
+                input_dtype=dtype,
             )
             for i in range(3)
         ]
@@ -127,7 +146,9 @@ def ops_in_model_before(self):
 
 
 class TestAllReduceFusedAddRMSNormStaticQuantFP4Model(torch.nn.Module):
-    def __init__(self, hidden_size=16, token_num=16, eps=1e-6):
+    def __init__(
+        self, hidden_size=16, token_num=16, eps=1e-6, dtype: torch.dtype = torch.float16
+    ):
         super().__init__()
         self.hidden_size = hidden_size
         self.eps = eps
@@ -185,12 +206,36 @@ def ops_in_model_before(self):
 
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
-    "test_model, enable_quant_fp8_custom_op",
+    "test_model, enable_quant_fp8_custom_op, use_aiter",
     [
-        (TestAllReduceRMSNormModel, False),
-        (TestAllReduceRMSNormStaticQuantFP8Model, True),
-        (TestAllReduceRMSNormStaticQuantFP8Model, False),
-        (TestAllReduceFusedAddRMSNormStaticQuantFP4Model, False),
+        (TestAllReduceRMSNormModel, False, IS_AITER_FOUND),
+        pytest.param(
+            TestAllReduceRMSNormStaticQuantFP8Model,
+            True,
+            False,
+            marks=pytest.mark.skipif(
+                current_platform.is_rocm(),
+                reason="Not supported on ROCm platform",
+            ),
+        ),
+        pytest.param(
+            TestAllReduceRMSNormStaticQuantFP8Model,
+            False,
+            False,
+            marks=pytest.mark.skipif(
+                current_platform.is_rocm(),
+                reason="Not supported on ROCm platform",
+            ),
+        ),
+        pytest.param(
+            TestAllReduceFusedAddRMSNormStaticQuantFP4Model,
+            False,
+            False,
+            marks=pytest.mark.skipif(
+                current_platform.is_rocm(),
+                reason="Not supported on ROCm platform",
+            ),
+        ),
     ],
 )
 @pytest.mark.parametrize("batch_size", [8])
@@ -201,9 +246,18 @@ def ops_in_model_before(self):
 @pytest.mark.parametrize("flashinfer_allreduce_backend", ["trtllm", "mnnvl"])
 @pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda"], reason="Only test on CUDA")
 @pytest.mark.skipif(
-    not find_spec("flashinfer")
-    or not has_module_attribute("flashinfer.comm", "allreduce_fusion")
-    or not has_module_attribute("flashinfer.comm", "create_allreduce_fusion_workspace"),
+    current_platform.is_rocm() and not IS_AITER_FOUND,
+    reason="aiter is not found",
+)
+@pytest.mark.skipif(
+    current_platform.is_cuda()
+    and (
+        not find_spec("flashinfer")
+        or not has_module_attribute("flashinfer.comm", "allreduce_fusion")
+        or not has_module_attribute(
+            "flashinfer.comm", "create_allreduce_fusion_workspace"
+        )
+    ),
     reason="flashinfer is not found or flashinfer "
     "is not compiled with allreduce_fusion",
 )
@@ -216,7 +270,14 @@ def test_all_reduce_fusion_pass_replace(
     enable_rms_norm_custom_op,
     enable_quant_fp8_custom_op,
     flashinfer_allreduce_backend,
+    use_aiter: bool,
+    monkeypatch: pytest.MonkeyPatch,
 ):
+    if use_aiter:
+        with monkeypatch.context() as m:
+            m.setenv("VLLM_ROCM_USE_AITER", str(use_aiter))
+            rocm_aiter_ops.refresh_env_variables()
+
     num_processes = 2
     if (
         test_model == TestAllReduceFusedAddRMSNormStaticQuantFP4Model
@@ -240,6 +301,8 @@ def run_torch_spawn(fn, nprocs):
                 enable_rms_norm_custom_op,
                 enable_quant_fp8_custom_op,
                 flashinfer_allreduce_backend,
+                use_aiter,
+                monkeypatch,
             ),
             nprocs=nprocs,
         )
@@ -258,10 +321,12 @@ def all_reduce_fusion_pass_on_test_model(
     enable_rms_norm_custom_op,
     enable_quant_fp8_custom_op,
     flashinfer_allreduce_backend,
+    use_aiter: bool,
+    monkeypatch: pytest.MonkeyPatch,
 ):
     set_random_seed(0)
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = torch.device(f"{DEVICE_TYPE}:{local_rank}")
     torch.accelerator.set_device_index(device)
     torch.set_default_device(device)
     torch.set_default_dtype(dtype)
@@ -293,7 +358,7 @@ def all_reduce_fusion_pass_on_test_model(
     vllm_config.compilation_config.pass_config = PassConfig(
         fuse_allreduce_rms=True, eliminate_noops=True
     )
-    vllm_config.device_config = DeviceConfig(device=torch.device("cuda"))
+    vllm_config.device_config = DeviceConfig(device=torch.device(DEVICE_TYPE))
     vllm_config.parallel_config.rank = local_rank  # Setup rank for debug path
 
     # this is a fake model name to construct the model config
@@ -304,7 +369,11 @@ def all_reduce_fusion_pass_on_test_model(
     )
     with set_current_vllm_config(vllm_config):
         initialize_model_parallel(tensor_model_parallel_size=world_size)
-        all_reduce_fusion_pass = AllReduceFusionPass(vllm_config)
+        all_reduce_fusion_pass = (
+            RocmAiterAllReduceFusionPass(vllm_config)
+            if use_aiter
+            else AllReduceFusionPass(vllm_config)
+        )
         noop_pass = NoOpEliminationPass(vllm_config)
         func_pass = FixFunctionalizationPass(vllm_config)
         cleanup_pass = PostCleanupPass(vllm_config)
@@ -314,7 +383,12 @@ def all_reduce_fusion_pass_on_test_model(
         )
 
         token_num = batch_size * seq_len
-        model = test_model_cls(hidden_size, token_num)
+        if test_model_cls is TestAllReduceRMSNormModel:
+            model = test_model_cls(
+                hidden_size, token_num, dtype=dtype, use_aiter=use_aiter
+            )
+        else:
+            model = test_model_cls(hidden_size, token_num, dtype=dtype)
 
         hidden_states = torch.randn((token_num, hidden_size), requires_grad=False)
 
diff --git a/tests/compile/passes/distributed/test_sequence_parallelism.py b/tests/compile/passes/distributed/test_sequence_parallelism.py
index e7bf330ccabe..c40d75f6754a 100644
--- a/tests/compile/passes/distributed/test_sequence_parallelism.py
+++ b/tests/compile/passes/distributed/test_sequence_parallelism.py
@@ -9,7 +9,6 @@
 from tests.utils import TestFP8Layer, multi_gpu_test
 from vllm.compilation.passes.fusion.rms_quant_fusion import RMSNormQuantFusionPass
 from vllm.compilation.passes.fusion.sequence_parallelism import SequenceParallelismPass
-from vllm.compilation.passes.fx_utils import find_auto_fn
 from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
 from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
 from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
@@ -23,6 +22,7 @@
     get_current_vllm_config,
     set_current_vllm_config,
 )
+from vllm.config.utils import Range
 from vllm.distributed import tensor_model_parallel_all_reduce
 from vllm.distributed.parallel_state import (
     init_distributed_environment,
@@ -36,6 +36,8 @@
 from vllm.utils.system_utils import update_environment_variables
 from vllm.utils.torch_utils import set_random_seed
 
+DEVICE_TYPE = current_platform.device_type
+
 pytestmark = pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test CUDA")
 
 FP8_DTYPE = current_platform.fp8_dtype()
@@ -86,13 +88,10 @@ def ops_in_model_after(self):
         ]
 
     def ops_in_model(self):
-        if RMSNorm.enabled():
-            return [
-                torch.ops._C.rms_norm.default,
-                torch.ops._C.fused_add_rms_norm.default,
-            ]
-        else:
-            return []
+        return [
+            torch.ops.vllm_ir.rms_norm,
+            torch.ops.vllm_ir.fused_add_rms_norm,
+        ]
 
 
 class TestAllReduceRMSNormStaticQuantFP8Model(torch.nn.Module):
@@ -109,6 +108,7 @@ def __init__(self, hidden_size=16, eps=1e-6):
                 weight_shape=(hidden_size, hidden_size),
                 activation_quant_key=self.quant_key,
                 weight_quant_key=self.quant_key,
+                input_dtype=self.vllm_config.model_config.dtype,
             )
             for i in range(3)
         ]
@@ -148,16 +148,17 @@ def ops_in_model_before(self):
     def ops_in_model(self):
         if self.vllm_config.compilation_config.pass_config.fuse_norm_quant:
             return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]
-        elif RMSNorm.enabled():
-            return [
-                torch.ops._C.fused_add_rms_norm.default,
-            ]
-        elif any(layer.is_quant_fp8_enabled() for layer in self.fp8_linear_layers):
+        else:
+            quant_ops = (
+                [torch.ops._C.static_scaled_fp8_quant.default]
+                if any(layer.is_quant_fp8_enabled() for layer in self.fp8_linear_layers)
+                else [torch.ops.aten.reciprocal]
+            )
             return [
-                torch.ops._C.static_scaled_fp8_quant.default,
+                torch.ops.vllm_ir.rms_norm,
+                torch.ops.vllm_ir.fused_add_rms_norm,
+                *quant_ops,
             ]
-        else:
-            return []
 
 
 @multi_gpu_test(num_gpus=2)
@@ -213,6 +214,24 @@ def run_torch_spawn(fn, nprocs):
     run_torch_spawn(sequence_parallelism_pass_on_test_model, num_processes)
 
 
+def test_sequence_parallelism_pass_requires_full_graph_compilation():
+    vllm_config = VllmConfig()
+    vllm_config.compilation_config.use_inductor_graph_partition = False
+    vllm_config.compilation_config.splitting_ops = [
+        "vllm::unified_attention_with_output"
+    ]
+
+    sequence_parallelism_pass = object.__new__(SequenceParallelismPass)
+    sequence_parallelism_pass.compilation_config = vllm_config.compilation_config
+    sequence_parallelism_pass.min_token_num = 1
+
+    with pytest.raises(
+        AssertionError,
+        match="SequenceParallelismPass requires full-graph compilation",
+    ):
+        sequence_parallelism_pass.is_applicable_for_range(Range(start=8, end=8))
+
+
 def sequence_parallelism_pass_on_test_model(
     local_rank: int,
     world_size: int,
@@ -227,7 +246,7 @@ def sequence_parallelism_pass_on_test_model(
 ):
     set_random_seed(0)
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = torch.device(f"{DEVICE_TYPE}:{local_rank}")
     torch.accelerator.set_device_index(device)
     torch.set_default_device(device)
     torch.set_default_dtype(dtype)
@@ -257,7 +276,7 @@ def sequence_parallelism_pass_on_test_model(
             eliminate_noops=True,
         ),
     )  # NoOp needed for fusion
-    device_config = DeviceConfig(device=torch.device("cuda"))
+    device_config = DeviceConfig(device=torch.device(DEVICE_TYPE))
 
     # this is a fake model name to construct the model config
     # in the vllm_config, it's not really used.
@@ -321,4 +340,4 @@ def sequence_parallelism_pass_on_test_model(
             assert backend.op_count(op, before=False) == 4
 
         for op in model.ops_in_model():
-            find_auto_fn(backend.graph_post_pass.nodes, op)
+            assert backend.op_count(op, before=False) > 0
diff --git a/vllm/entrypoints/pooling/score/__init__.py b/tests/compile/passes/ir/__init__.py
similarity index 100%
rename from vllm/entrypoints/pooling/score/__init__.py
rename to tests/compile/passes/ir/__init__.py
diff --git a/tests/compile/passes/ir/test_clone_cleanup.py b/tests/compile/passes/ir/test_clone_cleanup.py
new file mode 100644
index 000000000000..9fedb5fc9177
--- /dev/null
+++ b/tests/compile/passes/ir/test_clone_cleanup.py
@@ -0,0 +1,412 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Comprehensive tests for UnsafeCloneEliminationPass.
+
+This test suite exercises all possible valid FX graph patterns involving clones:
+1. Clone with no users (dead code)
+2. Clone with read-only users
+3. Clone with mutation users
+4. Clone of graph input
+5. Clone with original used after mutation
+6. Clone chains
+"""
+
+import pytest
+import torch
+from torch import fx
+from torch.fx.experimental.proxy_tensor import make_fx
+
+from vllm.compilation.passes.fx_utils import find_op_nodes
+from vllm.compilation.passes.inductor_pass import get_pass_context, pass_context
+from vllm.compilation.passes.ir.clone_elimination import (
+    UnsafeCloneEliminationPass,
+    user_writes_to_node,
+)
+from vllm.config import VllmConfig
+from vllm.config.utils import Range
+
+
+def count_clones(graph: fx.Graph) -> int:
+    """Count clone nodes in a graph."""
+    return len(list(find_op_nodes(torch.ops.aten.clone.default, graph)))
+
+
+@pytest.fixture(scope="function")
+def clone_cleanup_pass():
+    return UnsafeCloneEliminationPass(VllmConfig())
+
+
+@pytest.fixture(autouse=True)
+def setup_pass_context():
+    """Set up pass context for each test."""
+    with pass_context(compile_range=Range(1, 8192)):
+        yield
+
+
+class TestCloneCleanup:
+    """Test UnsafeCloneEliminationPass behavior on various graph patterns."""
+
+    def test_remove_clone_readonly_users(self, clone_cleanup_pass):
+        """Clone with only read-only users should be removed."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x_clone = x.clone()
+            return x_clone + 1
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 1
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        assert count_clones(graph_module.graph) == 0
+        torch.testing.assert_close(actual, expected)
+
+    def test_keep_clone_with_mutation_and_original_used_after(self, clone_cleanup_pass):
+        """Clone must be kept if it's mutated AND original is used after mutation."""
+
+        def f(x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+            x = x.relu()  # not a graph param
+            x_clone = x.clone()
+            x_clone.add_(1)
+            return x, x_clone
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 1
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        # Clone should be KEPT because original is used after mutation
+        assert count_clones(graph_module.graph) == 1
+        torch.testing.assert_close(actual[0], expected[0])
+        torch.testing.assert_close(actual[1], expected[1])
+
+    def test_remove_clone_with_mutation_no_original_use(self, clone_cleanup_pass):
+        """Clone can be removed if it's mutated but original is not used after."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x = x.relu()  # not a graph param
+            x_clone = x.clone()
+            x_clone.add_(1)
+            return x_clone
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 1
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        assert count_clones(graph_module.graph) == 0
+        torch.testing.assert_close(actual, expected)
+
+    def test_clone_chain(self, clone_cleanup_pass):
+        """Test handling of clone chains: x -> clone1 -> clone2."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x = x.relu()  # not a graph param
+            x1 = x.clone()
+            x2 = x1.clone()
+            return x2 + 1
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 2
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        # Both clones should be removed
+        assert count_clones(graph_module.graph) == 0
+        torch.testing.assert_close(actual, expected)
+
+    def test_multiple_clones_of_same_input(self, clone_cleanup_pass):
+        """Test multiple independent clones of the same input."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x1 = x.clone()
+            x2 = x.clone()
+            return x1 + x2
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 2
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        # Both clones should be removed (only readonly uses)
+        assert count_clones(graph_module.graph) == 0
+        torch.testing.assert_close(actual, expected)
+
+    def test_no_clones_in_graph(self, clone_cleanup_pass):
+        """Test pass behavior when graph has no clones."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            return x + 1
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 0
+
+        expected = graph_module(inp)
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+        actual = graph_module(inp)
+
+        assert count_clones(graph_module.graph) == 0
+        torch.testing.assert_close(actual, expected)
+
+    def test_multiple_passes(self, clone_cleanup_pass):
+        """Test running the pass multiple times (should be idempotent)."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x1 = x.clone()
+            return x1 + 1
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 1
+
+        expected = graph_module(inp)
+
+        clone_cleanup_pass(graph_module.graph)
+        assert count_clones(graph_module.graph) == 0
+        graph_module.recompile()
+        actual = graph_module(inp)
+        torch.testing.assert_close(actual, expected)
+
+        clone_cleanup_pass(graph_module.graph)
+        assert count_clones(graph_module.graph) == 0
+        graph_module.recompile()
+        actual = graph_module(inp)
+        torch.testing.assert_close(actual, expected)
+
+    def test_output_node_no_write(self):
+        """Output nodes never write to their inputs."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            return x
+
+        graph_module = make_fx(f)(torch.randn(2, 3))
+        x_node = [n for n in graph_module.graph.nodes if n.op == "placeholder"][0]
+        output_node = [n for n in graph_module.graph.nodes if n.op == "output"][0]
+
+        assert not user_writes_to_node(output_node, x_node)
+
+    def test_readonly_op_no_write(self):
+        """Readonly operations don't write to inputs."""
+
+        def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            return x + y
+
+        graph_module = make_fx(f)(torch.randn(2, 3), torch.randn(2, 3))
+        placeholders = [n for n in graph_module.graph.nodes if n.op == "placeholder"]
+        add_node = [
+            n
+            for n in graph_module.graph.nodes
+            if n.op == "call_function" and n.target == torch.ops.aten.add.Tensor
+        ][0]
+
+        assert not user_writes_to_node(add_node, placeholders[0])
+        assert not user_writes_to_node(add_node, placeholders[1])
+
+    def test_inplace_op_writes(self):
+        """Inplace operations write to first argument."""
+
+        def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            x.add_(y)
+            return x
+
+        graph_module = make_fx(f)(torch.randn(2, 3), torch.randn(2, 3))
+        placeholders = [n for n in graph_module.graph.nodes if n.op == "placeholder"]
+        add_node = [
+            n
+            for n in graph_module.graph.nodes
+            if n.op == "call_function" and "add_" in str(n.target)
+        ][0]
+
+        # add_ writes to first arg but not second
+        assert user_writes_to_node(add_node, placeholders[0])
+        assert not user_writes_to_node(add_node, placeholders[1])
+
+    def test_copy_writes(self):
+        """copy_ operation writes to first argument."""
+
+        def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            x.copy_(y)
+            return x
+
+        graph_module = make_fx(f)(torch.randn(2, 3), torch.randn(2, 3))
+        placeholders = [n for n in graph_module.graph.nodes if n.op == "placeholder"]
+        copy_node = [
+            n
+            for n in graph_module.graph.nodes
+            if n.op == "call_function" and "copy_" in str(n.target)
+        ][0]
+
+        assert user_writes_to_node(copy_node, placeholders[0])
+        assert not user_writes_to_node(copy_node, placeholders[1])
+
+    def test_auto_functionalized_not_a_write(self):
+        """auto_functionalized ops are follow-up uses, not writes."""
+        from torch._higher_order_ops.auto_functionalize import auto_functionalized
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            return x
+
+        graph_module = make_fx(f)(torch.randn(2, 3))
+        x_node = [n for n in graph_module.graph.nodes if n.op == "placeholder"][0]
+
+        # Create an auto_functionalized node in the graph
+        with graph_module.graph.inserting_before(None):
+            af_node = graph_module.graph.call_function(
+                auto_functionalized, kwargs={"input": x_node}
+            )
+
+        # auto_functionalized should not be treated as a write
+        assert not user_writes_to_node(af_node, x_node)
+
+    def test_higher_order_op_conservatively_writes(self):
+        """Other higher-order operators are conservatively treated as writes."""
+        from torch._ops import HigherOrderOperator
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            return x
+
+        graph_module = make_fx(f)(torch.randn(2, 3))
+        x_node = [n for n in graph_module.graph.nodes if n.op == "placeholder"][0]
+
+        # Create a concrete higher-order operator subclass
+        class MockHigherOrderOp(HigherOrderOperator):
+            def __call__(self, *args, **kwargs):
+                return args[0] if args else None
+
+        mock_hoo = MockHigherOrderOp("mock_higher_order_op")
+
+        with graph_module.graph.inserting_before(None):
+            hoo_node = graph_module.graph.call_function(mock_hoo, args=(x_node,))
+
+        # Should be conservative and assume it could write
+        assert user_writes_to_node(hoo_node, x_node)
+
+
+class TestCloneCleanupWithDonatedInputs:
+    """Test UnsafeCloneEliminationPass with donated input tracking via PassContext."""
+
+    @pytest.fixture(autouse=True)
+    def setup_pass_context(self):
+        """Set up pass context for each test."""
+        with pass_context(compile_range=Range(1, 8192)):
+            yield
+
+    def test_donated_input_clone_removed(self, clone_cleanup_pass):
+        """Clone of donated input should be removed."""
+
+        def f(x: torch.Tensor) -> torch.Tensor:
+            x_clone = x.clone()
+            x_clone.add_(1)
+            return x_clone
+
+        inp = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp)
+        assert count_clones(graph_module.graph) == 1
+
+        # Mark first parameter as donated
+        get_pass_context().donated_input_ids = {0}
+
+        expected = graph_module(inp.clone())
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+
+        # Clone should be removed since input is donated
+        assert count_clones(graph_module.graph) == 0
+
+        # Input can be mutated (donated)
+        inp_copy = inp.clone()
+        actual = graph_module(inp_copy)
+        torch.testing.assert_close(actual, expected)
+
+    def test_non_donated_input_clone_kept(self, clone_cleanup_pass):
+        """Clone of non-donated input with mutation should be kept."""
+
+        def f(x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+            x_clone = x.clone()
+            x_clone.add_(1)
+            return x, x_clone
+
+        inp_x = torch.randn(2, 3)
+        inp_y = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp_x, inp_y)
+        assert count_clones(graph_module.graph) == 1
+
+        # No donated inputs
+        get_pass_context().donated_input_ids = set()
+
+        expected = graph_module(inp_x.clone(), inp_y.clone())
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+
+        # Clone should be kept since input is not donated and original is used
+        assert count_clones(graph_module.graph) == 1
+
+        # Verify inputs are not mutated
+        inp_x_before = inp_x.clone()
+        inp_y_before = inp_y.clone()
+        actual = graph_module(inp_x, inp_y)
+        torch.testing.assert_close(
+            inp_x, inp_x_before, msg="Input x should not be mutated"
+        )
+        torch.testing.assert_close(
+            inp_y, inp_y_before, msg="Input y should not be mutated"
+        )
+        torch.testing.assert_close(actual[0], expected[0])
+        torch.testing.assert_close(actual[1], expected[1])
+
+    def test_mixed_donated_inputs(self, clone_cleanup_pass):
+        """Test with some inputs donated and some not."""
+
+        def f(x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+            x_clone = x.clone()
+            x_clone.add_(1)
+            y_clone = y.clone()
+            y_clone.add_(2)
+            return x_clone, y_clone
+
+        inp_x = torch.randn(2, 3)
+        inp_y = torch.randn(2, 3)
+        graph_module = make_fx(f)(inp_x, inp_y)
+        assert count_clones(graph_module.graph) == 2
+
+        # Only x is donated
+        get_pass_context().donated_input_ids = {0}
+
+        expected = graph_module(inp_x.clone(), inp_y.clone())
+        clone_cleanup_pass(graph_module.graph)
+        graph_module.recompile()
+
+        # x_clone removed (x is donated), y_clone kept (y is not donated)
+        assert count_clones(graph_module.graph) == 1
+
+        # Verify y is not mutated (x can be mutated since it's donated)
+        inp_y_before = inp_y.clone()
+        actual = graph_module(inp_x.clone(), inp_y)
+        torch.testing.assert_close(
+            inp_y, inp_y_before, msg="Input y should not be mutated"
+        )
+        torch.testing.assert_close(actual[0], expected[0])
+        torch.testing.assert_close(actual[1], expected[1])
diff --git a/tests/compile/passes/ir/test_inplace_functionalization.py b/tests/compile/passes/ir/test_inplace_functionalization.py
new file mode 100644
index 000000000000..1e8d5662162f
--- /dev/null
+++ b/tests/compile/passes/ir/test_inplace_functionalization.py
@@ -0,0 +1,465 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for IR inplace functionalization pass integration.
+
+This test suite verifies that the inplace functionalization pass, lowering pass,
+and clone cleanup pass work together correctly with donated buffer tracking.
+"""
+
+from collections.abc import Callable
+
+import pytest
+import torch
+import torch._dynamo.exc
+from torch import nn
+
+import vllm.kernels  # noqa: F401 to register kernels
+from vllm.compilation.passes.inductor_pass import InductorPass, get_pass_context
+from vllm.compilation.passes.ir.clone_elimination import (
+    UnsafeCloneEliminationPass,
+)
+from vllm.compilation.passes.ir.inplace_functionalization import (
+    VllmIRInplaceFunctionalizationPass,
+)
+from vllm.compilation.passes.ir.lowering_pass import VllmIRLoweringPass
+from vllm.config import VllmConfig
+from vllm.ir import ops
+from vllm.platforms import current_platform
+from vllm.triton_utils import HAS_TRITON, tl, triton
+
+from ...backend import TestBackend
+
+
+class StoreDonationInfoPass(InductorPass):
+    def __init__(self):
+        self.donated_input_ids_sets: list[set[int]] = []
+
+    def __call__(self, *args, **kwargs):
+        ctx = get_pass_context()
+        self.donated_input_ids_sets += [ctx.donated_input_ids]
+
+
+class MaybeInplaceModel(nn.Module):
+    """Model using only maybe_inplace variants."""
+
+    def __init__(self, hidden_size=16):
+        super().__init__()
+        self.weight1 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+        self.weight2 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+    def forward(
+        self, x: torch.Tensor, residual1: torch.Tensor, residual2: torch.Tensor
+    ):
+        # First maybe_inplace - x & residual1 are donated
+        x_normed1, residual_out1 = ops.fused_add_rms_norm.maybe_inplace(
+            x, residual1, self.weight1, 1e-5
+        )
+        # Second maybe_inplace - residual2 is donated
+        x_normed2, residual_out2 = ops.fused_add_rms_norm.maybe_inplace(
+            x_normed1, residual2, self.weight2, 1e-5
+        )
+        return x_normed2, residual_out1, residual_out2
+
+
+class FunctionalModel(nn.Module):
+    """Model using only functional (default) variants."""
+
+    def __init__(self, hidden_size=16):
+        super().__init__()
+        self.weight1 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+        self.weight2 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+    def forward(
+        self, x: torch.Tensor, residual1: torch.Tensor, residual2: torch.Tensor
+    ):
+        # First functional - no donation
+        x_normed1, residual_out1 = ops.fused_add_rms_norm(
+            x, residual1, self.weight1, 1e-5
+        )
+        # Second functional - no donation
+        x_normed2, residual_out2 = ops.fused_add_rms_norm(
+            x_normed1, residual2, self.weight2, 1e-5
+        )
+        return x_normed2, residual_out1, residual_out2
+
+
+class MixedModel(nn.Module):
+    """Model mixing maybe_inplace and functional variants."""
+
+    def __init__(self, hidden_size=16):
+        super().__init__()
+        self.weight1 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+        self.weight2 = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+    def forward(
+        self, x: torch.Tensor, residual1: torch.Tensor, residual2: torch.Tensor
+    ):
+        # First maybe_inplace - x & residual1 are donated
+        x_normed1, residual_out1 = ops.fused_add_rms_norm.maybe_inplace(
+            x, residual1, self.weight1, 1e-5
+        )
+        # Second functional - no donation, x_normed1 must be preserved as it's returned
+        x_normed2, residual_out2 = ops.fused_add_rms_norm(
+            x_normed1, residual2, self.weight2, 1e-5
+        )
+        # Return both to prevent x_normed1 from being optimized away
+        return x_normed1, x_normed2, residual_out1, residual_out2
+
+
+class ModelWithTritonAfterMaybeInplace(nn.Module):
+    """
+    Model using maybe_inplace followed by a Triton kernel.
+    Test clone elimination can handle Triton in the graph
+    """
+
+    def __init__(self, hidden_size=16):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+        @triton.jit
+        def _triton_add_kernel(
+            x_ptr,
+            y_ptr,
+            n_elements,
+            BLOCK_SIZE: tl.constexpr,
+        ):
+            pid = tl.program_id(axis=0)
+            block_start = pid * BLOCK_SIZE
+            offsets = block_start + tl.arange(0, BLOCK_SIZE)
+            mask = offsets < n_elements
+            x = tl.load(x_ptr + offsets, mask=mask)
+            y = x + 0.1
+            tl.store(y_ptr + offsets, y, mask=mask)
+
+        def triton_add(x: torch.Tensor) -> torch.Tensor:
+            """Simple Triton add kernel."""
+            y = torch.empty_like(x)
+            n_elements = x.numel()
+            grid = (triton.cdiv(n_elements, 256),)
+            _triton_add_kernel[grid](x, y, n_elements, BLOCK_SIZE=256)
+            return y
+
+        self.triton_add = triton_add
+
+    def forward(self, x: torch.Tensor, residual: torch.Tensor, residual2: torch.Tensor):
+        x_normed, residual_out = ops.fused_add_rms_norm.maybe_inplace(
+            x, residual, self.weight, 1e-5
+        )
+
+        x_processed = self.triton_add(x_normed)
+
+        # x_processed does not need to be cloned, residual2 does
+        x_normed2, residual_out2 = ops.fused_add_rms_norm(
+            x_processed, residual2, self.weight, 1e-5
+        )
+        return x_normed2, residual_out2
+
+
+skipif_no_triton = pytest.mark.skipif(not HAS_TRITON, reason="Requires Triton")
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Only test on cuda and rocm platform",
+)
+@pytest.mark.parametrize(
+    "model_class,expected_functionalized,expected_donated,expected_clones",
+    [
+        # 2 inplace calls, all activations donated, all clones eliminated
+        (MaybeInplaceModel, 2, 3, 0),
+        # No inplace calls, no donations, 3 clones (one eliminated)
+        (FunctionalModel, 0, 0, 3),
+        # One inplace call, two donated activations, 2 clones
+        (MixedModel, 1, 2, 2),
+        # One inplace call, two donated, 1 clone remaining
+        pytest.param(ModelWithTritonAfterMaybeInplace, 1, 2, 1, marks=skipif_no_triton),
+    ],
+)
+def test_inplace_functionalization(
+    default_vllm_config: VllmConfig,
+    model_class,
+    expected_functionalized: int,
+    expected_clones: int,
+    expected_donated: int,
+):
+    """Test inplace functionalization, lowering, and clone cleanup."""
+    torch.set_default_device(current_platform.device_type)
+
+    # Use vllm_c so inplace path is triggered
+    default_vllm_config.kernel_config.ir_op_priority.fused_add_rms_norm = [
+        "vllm_c",
+        "native",
+    ]
+
+    # Create passes in order they run during compilation
+    functionalization_pass = VllmIRInplaceFunctionalizationPass(default_vllm_config)
+    lowering_pass = VllmIRLoweringPass(default_vllm_config)
+    donated_info_pass = StoreDonationInfoPass()
+    cleanup_pass = UnsafeCloneEliminationPass(default_vllm_config)
+
+    # Set up backend with pre-grad pass
+    backend = TestBackend(lowering_pass, donated_info_pass, cleanup_pass)
+    backend.inductor_config["pre_grad_custom_pass"] = functionalization_pass
+
+    model = model_class()
+    x = torch.randn(8, 16, dtype=torch.bfloat16)
+    residual1 = torch.randn(8, 16, dtype=torch.bfloat16)
+    residual2 = torch.randn(8, 16, dtype=torch.bfloat16)
+
+    with default_vllm_config.kernel_config.ir_op_priority.set_priority():
+        # Reference output without optimization
+        ref_output = model(x.clone(), residual1.clone(), residual2.clone())
+
+        # Compile with inplace optimization
+        compiled_model = torch.compile(model, backend=backend, fullgraph=True)
+        output = compiled_model(x.clone(), residual1.clone(), residual2.clone())
+
+    # Verify correctness (relaxed tolerance for bfloat16)
+    for i in range(len(ref_output)):
+        torch.testing.assert_close(output[i], ref_output[i], rtol=1e-2, atol=1e-2)
+
+    # Verify expected number of ops were functionalized
+    func_ops = functionalization_pass.functionalized_ops
+    assert len(func_ops) == int(bool(expected_functionalized))
+    if expected_functionalized > 0:
+        assert "fused_add_rms_norm" in func_ops
+        assert func_ops["fused_add_rms_norm"] == expected_functionalized
+
+    # Verify lowering happened (2 ops in all cases)
+    assert "fused_add_rms_norm" in lowering_pass.selected_impls
+    assert len(lowering_pass.selected_impls["fused_add_rms_norm"]) == 2
+    assert all(
+        provider == "vllm_c"
+        for node, provider in lowering_pass.selected_impls["fused_add_rms_norm"].items()
+    ), lowering_pass.selected_impls
+
+    # Verify correct number of donated IDs
+    assert len(donated_info_pass.donated_input_ids_sets) == 1
+    assert len(donated_info_pass.donated_input_ids_sets[0]) == expected_donated
+
+    # Verify expected number of clones after cleanup
+    actual_clones = backend.op_count(torch.ops.aten.clone.default, before=False)
+    assert actual_clones == expected_clones, (
+        f"Expected {expected_clones} clones, got {actual_clones}:"
+        f"{backend.print_graphs()}"
+    )
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Only test on cuda and rocm platform",
+)
+def test_donated_buffer_context_propagation(default_vllm_config):
+    """Test that donated_input_ids propagates correctly through pass_context."""
+    torch.set_default_device(current_platform.device_type)
+
+    # Create a custom backend that inspects pass_context in cleanup pass
+    functionalization_pass = VllmIRInplaceFunctionalizationPass(default_vllm_config)
+    lowering_pass = VllmIRLoweringPass(default_vllm_config)
+
+    donation_info_pass = StoreDonationInfoPass()
+    cleanup_pass = UnsafeCloneEliminationPass(default_vllm_config)
+
+    backend = TestBackend(lowering_pass, donation_info_pass, cleanup_pass)
+    backend.inductor_config["pre_grad_custom_pass"] = functionalization_pass
+
+    model = MaybeInplaceModel()
+    x = torch.randn(8, 16, dtype=torch.bfloat16)
+    residual1 = torch.randn(8, 16, dtype=torch.bfloat16)
+    residual2 = torch.randn(8, 16, dtype=torch.bfloat16)
+
+    compiled_model = torch.compile(model, backend=backend, fullgraph=True)
+    compiled_model(x.clone(), residual1.clone(), residual2.clone())
+
+    donated_ids_seen = donation_info_pass.donated_input_ids_sets
+    # Verify donated_input_ids was set and propagated
+    assert len(donated_ids_seen) == 1
+    # Should have donated inputs (exact indices depend on AOTAutograd)
+    assert len(donated_ids_seen[0]) == 3
+    # All donated ids should be valid non-negative integers
+    for idx in donated_ids_seen[0]:
+        assert isinstance(idx, int) and idx >= 0, f"Invalid donated index: {idx}"
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Only test on cuda and rocm platform",
+)
+def test_maybe_inplace_reuse_error(default_vllm_config):
+    """Test that reusing a donated activation input raises ValueError."""
+    torch.set_default_device(current_platform.device_type)
+
+    class ReuseModel(nn.Module):
+        """Model that incorrectly reuses a donated activation input."""
+
+        def __init__(self, hidden_size=16):
+            super().__init__()
+            self.weight = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+        def forward(self, x: torch.Tensor, residual: torch.Tensor):
+            # x is donated to maybe_inplace
+            x_normed, residual_out = ops.fused_add_rms_norm.maybe_inplace(
+                x, residual, self.weight, 1e-5
+            )
+            # ERROR: x is used again after being donated
+            return x_normed + x  # This should raise ValueError
+
+    functionalization_pass = VllmIRInplaceFunctionalizationPass(default_vllm_config)
+    lowering_pass = VllmIRLoweringPass(default_vllm_config)
+    cleanup_pass = UnsafeCloneEliminationPass(default_vllm_config)
+
+    backend = TestBackend(lowering_pass, cleanup_pass)
+    backend.inductor_config["pre_grad_custom_pass"] = functionalization_pass
+
+    model = ReuseModel()
+    x = torch.randn(8, 16, dtype=torch.bfloat16)
+    residual = torch.randn(8, 16, dtype=torch.bfloat16)
+
+    # Compilation should raise BackendCompilerFailed wrapping ValueError
+    with pytest.raises(
+        torch._dynamo.exc.BackendCompilerFailed,
+        match="is used again after the node",
+    ):
+        compiled_model = torch.compile(model, backend=backend, fullgraph=True)
+        compiled_model(x.clone(), residual.clone())
+
+
+# Piecewise compilation tests with graph splitting
+
+
+@torch.library.custom_op("vllm::test_split_marker", mutates_args=())
+def test_split_marker(x: torch.Tensor) -> torch.Tensor:
+    """Identity op that marks a split point for piecewise compilation."""
+    return x.clone()
+
+
+@test_split_marker.register_fake
+def _fake_split_marker(x: torch.Tensor) -> torch.Tensor:
+    return torch.empty_like(x)
+
+
+class TransformerBlockWithSplits(nn.Module):
+    """Transformer block with explicit split points for piecewise compilation."""
+
+    def __init__(self, hidden_size=32, intermediate_size=128):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+
+        # Attention-like projection
+        self.attn_proj = nn.Linear(
+            hidden_size, hidden_size, bias=False, dtype=torch.bfloat16
+        )
+
+        # Post-attention norm
+        self.post_attn_norm = nn.Parameter(
+            torch.ones(hidden_size, dtype=torch.bfloat16)
+        )
+
+        # MLP
+        self.gate_proj = nn.Linear(
+            hidden_size, intermediate_size, bias=False, dtype=torch.bfloat16
+        )
+        self.up_proj = nn.Linear(
+            hidden_size, intermediate_size, bias=False, dtype=torch.bfloat16
+        )
+        self.down_proj = nn.Linear(
+            intermediate_size, hidden_size, bias=False, dtype=torch.bfloat16
+        )
+
+        # Post-MLP norm
+        self.post_mlp_norm = nn.Parameter(torch.ones(hidden_size, dtype=torch.bfloat16))
+
+    def forward(self, x: torch.Tensor):
+        # Attention block with residual
+        residual1 = x
+        attn_out = self.attn_proj(x)
+
+        # Fused add + norm (maybe_inplace: residual1 is donated)
+        normed1, residual1 = ops.fused_add_rms_norm.maybe_inplace(
+            attn_out, residual1, self.post_attn_norm, 1e-5
+        )
+
+        # Force a graph split here
+        normed1 = torch.ops.vllm.test_split_marker(normed1)
+
+        # MLP block
+        gate = self.gate_proj(normed1)
+        up = self.up_proj(normed1)
+        mlp_out = self.down_proj(gate * torch.nn.functional.silu(up))
+
+        # Fused add + norm (maybe_inplace: residual1 is donated)
+        normed2, residual2 = ops.fused_add_rms_norm.maybe_inplace(
+            mlp_out, residual1, self.post_mlp_norm, 1e-5
+        )
+
+        return normed2, residual2
+
+
+def with_dyn_arg(fn: Callable, arg_index: int, dim_index: int):
+    def inner(*args):
+        torch._dynamo.mark_dynamic(args[arg_index], dim_index)
+        return fn(*args)
+
+    return inner
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Only test on cuda and rocm platform",
+)
+def test_piecewise_compilation_with_donated_buffers(monkeypatch, fresh_vllm_cache):
+    """
+    Test piecewise compilation with donated buffers across graph splits.
+    Utilizes a custom splitting op. Uses fresh cache to avoid compilation caching.
+    """
+    torch.set_default_device(current_platform.device_type)
+
+    # Disable compilation cache to avoid serialization issues
+    monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
+
+    from vllm.compilation.backends import VllmBackend
+    from vllm.config import CompilationConfig, VllmConfig
+
+    # Create config with custom splitting op
+    store_donation_info = StoreDonationInfoPass()
+    vllm_config = VllmConfig(
+        compilation_config=CompilationConfig(
+            custom_ops=["all"],
+            splitting_ops=["vllm::test_split_marker"],
+            inductor_compile_config={"post_grad_custom_post_pass": store_donation_info},
+        )
+    )
+
+    backend = VllmBackend(vllm_config)
+
+    model = TransformerBlockWithSplits()
+    x = torch.randn(8, 32, dtype=torch.bfloat16)
+
+    # Reference output
+    ref_output = with_dyn_arg(model, 0, 0)(x.clone())
+
+    # Compile with piecewise compilation (graph will split at split_marker)
+    compiled_model = torch.compile(model, backend=backend, fullgraph=False)
+    output = with_dyn_arg(compiled_model, 0, 0)(x.clone())
+
+    # Verify correctness (relaxed tolerance for bfloat16)
+    torch.testing.assert_close(output[0], ref_output[0], rtol=1e-2, atol=1e-2)
+    torch.testing.assert_close(output[1], ref_output[1], rtol=1e-2, atol=1e-2)
+
+    # Verify the model was split into multiple submodules
+    assert hasattr(backend, "split_gm"), "Backend should have split graph module"
+
+    # Should have at least 2 submodules (split by test_split_marker op)
+    submodules = list(backend.split_gm.named_children())
+    num_submodules = len(submodules)
+    assert num_submodules >= 2, (
+        f"Expected at least 2 submodules (split), got {num_submodules}"
+    )
+
+    # Check that donation info was propagated correctly
+    donated_inputs_sets = store_donation_info.donated_input_ids_sets
+    assert len(donated_inputs_sets) == 2
+    assert len(donated_inputs_sets[0]) == 1
+    assert len(donated_inputs_sets[1]) == 1
diff --git a/tests/compile/passes/ir/test_lowering.py b/tests/compile/passes/ir/test_lowering.py
new file mode 100644
index 000000000000..b7ca55e7d1e6
--- /dev/null
+++ b/tests/compile/passes/ir/test_lowering.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+from torch import nn
+
+import vllm.kernels  # noqa: F401 to register kernels
+from vllm import ir
+from vllm.compilation.passes.ir.lowering_pass import (
+    VllmIRLoweringPass,
+)
+from vllm.config import get_current_vllm_config
+from vllm.ir import ops
+from vllm.platforms import current_platform
+
+from ...backend import TestBackend
+
+
+class Model(nn.Module):
+    def __init__(self, hidden_size=16, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.hidden_size = hidden_size
+        self.weight = torch.ones(hidden_size, dtype=torch.bfloat16)
+
+    def forward(self, x):
+        x1 = x + 4.0
+        x2 = ops.rms_norm(x1, self.weight, 1e-5)
+        x3 = x2 * 5.0
+        # no weight
+        x4 = ops.rms_norm(x3, None, 1e-5)
+        x5 = x4 / 2.0
+        # dispatch to native due to variance_size parameter
+        x6 = ops.rms_norm(x5, self.weight, 1e-5, self.hidden_size // 2)
+        return x6 + 3.0
+
+
+@pytest.mark.parametrize("rms_provider", ops.rms_norm.supported_providers())
+def test_lowering_rms_norm(rms_provider, default_vllm_config):
+    torch.set_default_device(current_platform.device_type)
+
+    lowering_pass = VllmIRLoweringPass(get_current_vllm_config())
+    backend = TestBackend(lowering_pass)
+    backend_unlowered = TestBackend()
+
+    model = Model()
+    x = torch.randn(8, 16, dtype=torch.bfloat16)
+    with (
+        ops.rms_norm.set_priority([rms_provider, "native"]),
+        ir.enable_torch_wrap(True),
+    ):
+        compiled_model = torch.compile(model, backend=backend, fullgraph=True)
+        compiled_unlowered_model = torch.compile(
+            model, backend=backend_unlowered, fullgraph=True
+        )
+        output = compiled_model(x)
+        output_unlowered = compiled_unlowered_model(x)
+
+    selected = lowering_pass.selected_impls["rms_norm"]
+    assert len(selected) == 3
+    assert selected["rms_norm"] == rms_provider
+    assert selected["rms_norm_1"] == rms_provider
+    assert selected["rms_norm_2"] == "native"
+
+    # Compiled function guards on global value, avoid recompilation
+    with ir.enable_torch_wrap(True):
+        output2 = compiled_model(x)
+
+    torch.testing.assert_close(output_unlowered, output)
+    torch.testing.assert_close(output_unlowered, output2)
diff --git a/tests/compile/passes/test_double_aiter_rms_quant_fusion.py b/tests/compile/passes/test_double_aiter_rms_quant_fusion.py
new file mode 100644
index 000000000000..161c956548a7
--- /dev/null
+++ b/tests/compile/passes/test_double_aiter_rms_quant_fusion.py
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for the DoubleQuant fan-out variants registered by
+``RocmAiterRMSNormQuantFusionPass``.
+
+Both variants target a 1-to-2 fan-out where one ``rms_norm`` output feeds
+two distinct ``rocm_aiter_group_fp8_quant`` consumers and rewrite it into
+two independent fused ``rms_norm + group_fp8_quant`` ops:
+
+* ``DoubleAiterRMSFp8GroupQuantPattern`` matches the un-viewed shape
+  (e.g. Kimi-K2.5 / DSR1).
+* ``DoubleAiterRMSFp8GroupQuantViewPattern`` (this PR) is the view-tolerant
+  sibling that additionally matches the
+  ``rms_norm -> view -> group_fp8_quant`` shape that DSv3.2's MLA indexer
+  q_c norm exposes through ``Fp8BlockScaledMMLinearKernel.apply_weights``'s
+  2D-flatten boilerplate.
+"""
+
+import pytest
+import torch
+
+import vllm.config
+from tests.compile.backend import TestBackend
+from vllm._aiter_ops import is_aiter_found_and_supported, rocm_aiter_ops
+from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
+from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
+from vllm.config import (
+    CompilationConfig,
+    CompilationMode,
+    ModelConfig,
+    PassConfig,
+    VllmConfig,
+)
+
+EPS = 1e-5
+HIDDEN_SIZE = 256
+GROUP_SIZE = 128
+
+
+class _NoViewDoubleQuantModel(torch.nn.Module):
+    """``rms_norm -> 2x group_fp8_quant`` fan-out (Kimi-K2.5 / DSR1 shape)."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(HIDDEN_SIZE, dtype=torch.bfloat16))
+
+    def forward(
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        # avoid graph input being a direct arg to a matched pattern node
+        x = torch.relu(x)
+        rms = torch.ops.vllm_ir.rms_norm(x, self.weight, EPS)
+        q1, s1 = torch.ops.vllm.rocm_aiter_group_fp8_quant.default(rms, GROUP_SIZE)
+        q2, s2 = torch.ops.vllm.rocm_aiter_group_fp8_quant.default(rms, GROUP_SIZE)
+        return q1, s1, q2, s2
+
+
+class _ViewDoubleQuantModel(torch.nn.Module):
+    """``rms_norm -> view -> 2x group_fp8_quant`` fan-out (DSv3.2 shape).
+
+    Reproduces the FX-graph shape produced by ``Fp8BlockScaledMMLinearKernel``'s
+    2D-flatten before the FP8 group quant op.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(HIDDEN_SIZE, dtype=torch.bfloat16))
+
+    def forward(
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        x = torch.relu(x)
+        rms = torch.ops.vllm_ir.rms_norm(x, self.weight, EPS)
+        view = rms.view(-1, rms.shape[-1])
+        q1, s1 = torch.ops.vllm.rocm_aiter_group_fp8_quant.default(view, GROUP_SIZE)
+        q2, s2 = torch.ops.vllm.rocm_aiter_group_fp8_quant.default(view, GROUP_SIZE)
+        return q1, s1, q2, s2
+
+
+@pytest.mark.parametrize(
+    "model_cls",
+    [_NoViewDoubleQuantModel, _ViewDoubleQuantModel],
+    ids=["no_view", "with_view"],
+)
+@pytest.mark.skipif(
+    not is_aiter_found_and_supported(),
+    reason="Only test on ROCm with AITER installed and supported",
+)
+def test_double_aiter_rms_fp8_group_quant_fusion(
+    model_cls: type[torch.nn.Module],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """
+    Both fan-out shapes (with and without an intermediate view) must fuse
+    into ``rocm_aiter_rmsnorm_fp8_group_quant``: the no-view shape via
+    ``DoubleAiterRMSFp8GroupQuantPattern`` and the viewed shape via the
+    new ``DoubleAiterRMSFp8GroupQuantViewPattern`` sibling.
+
+    A failure on the ``with_view`` parametrization is a regression on the
+    DSv3.2 q_c norm path that this PR's view-tolerant pattern is intended
+    to cover.
+    """
+    torch._dynamo.reset()
+
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(dtype=torch.bfloat16),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            custom_ops=["+rms_norm", "+quant_fp8"],
+            pass_config=PassConfig(
+                fuse_norm_quant=True,
+                eliminate_noops=True,
+            ),
+        ),
+    )
+
+    with vllm.config.set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
+            RocmAiterRMSNormQuantFusionPass,
+        )
+
+        torch.set_default_device("cuda")
+        torch.set_default_dtype(torch.bfloat16)
+        torch.manual_seed(0)
+
+        m.setenv("VLLM_ROCM_USE_AITER", "1")
+        rocm_aiter_ops.refresh_env_variables()
+
+        fusion_pass = RocmAiterRMSNormQuantFusionPass(vllm_config)
+        passes = [
+            NoOpEliminationPass(vllm_config),
+            fusion_pass,
+            PostCleanupPass(vllm_config),
+        ]
+        backend = TestBackend(*passes)
+        model = model_cls()
+
+        x = torch.randn(8, HIDDEN_SIZE)
+        torch._dynamo.mark_dynamic(x, 0)
+
+        outputs_unfused = model(x)
+        model_fused = torch.compile(model, backend=backend)
+        outputs_fused = model_fused(x)
+
+        # Both consumers must be rewritten into the fused op (one
+        # ``register_replacement`` rewrite covers the whole 1-to-2 fan-out).
+        assert fusion_pass.matched_count == 1, (
+            f"Expected the {model_cls.__name__} fan-out to fuse via the "
+            f"DoubleQuant pattern (matched_count == 1), got "
+            f"{fusion_pass.matched_count}"
+        )
+
+        fused_op = rocm_aiter_ops.get_rmsnorm_group_fused_quant_op()
+        backend.check_after_ops([fused_op])
+
+        # Numerical parity sanity-check: the fused pair must match the
+        # unfused pair on FP8 outputs (exact byte-equality is the goal,
+        # but allow a tiny tolerance for any residual numeric noise).
+        for fused_t, unfused_t in zip(outputs_fused, outputs_unfused):
+            torch.testing.assert_close(
+                fused_t.to(torch.float32),
+                unfused_t.to(torch.float32),
+                atol=1e-2,
+                rtol=1e-2,
+            )
diff --git a/tests/compile/passes/test_functionalization.py b/tests/compile/passes/test_functionalization.py
index 8d13e622d81c..31bf225d4135 100644
--- a/tests/compile/passes/test_functionalization.py
+++ b/tests/compile/passes/test_functionalization.py
@@ -23,6 +23,7 @@
     ModelConfig,
     PassConfig,
     VllmConfig,
+    get_current_vllm_config,
     set_current_vllm_config,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
@@ -49,6 +50,7 @@ def __init__(self, hidden_size: int = 128):
                 weight_shape=(hidden_size, hidden_size),
                 activation_quant_key=self.quant_key,
                 weight_quant_key=self.quant_key,
+                input_dtype=get_current_vllm_config().model_config.dtype,
             )
 
     def forward(self, x):
@@ -92,6 +94,7 @@ def __init__(self, hidden_size=16, intermediate_size=32):
                 weight_shape=(hidden_size, intermediate_size),
                 activation_quant_key=self.quant_key,
                 weight_quant_key=self.quant_key,
+                input_dtype=get_current_vllm_config().model_config.dtype,
             )
 
     def forward(self, hidden_states, residual):
@@ -114,16 +117,16 @@ def forward(self, hidden_states, residual):
         else:
             return norm_output, residual_output
 
-    def example_inputs(self, batch_size=8, hidden_size=16, seq_len=16):
-        hidden_states = torch.randn((batch_size * seq_len, hidden_size))
-        residual = torch.randn((batch_size * seq_len, hidden_size))
+    def example_inputs(self, batch_size=8, seq_len=16):
+        hidden_states = torch.randn((batch_size * seq_len, self.hidden_size))
+        residual = torch.randn((batch_size * seq_len, self.intermediate_size))
         return (hidden_states, residual)
 
     def ops_in_model(self, do_fusion):
         if TEST_FP8 and do_fusion:
             return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]
         else:
-            return [torch.ops._C.fused_add_rms_norm.default]
+            return []
 
     def ops_not_in_model(self):
         return []
diff --git a/tests/compile/passes/test_fuse_act_padding.py b/tests/compile/passes/test_fuse_act_padding.py
index f3f3bda47277..41ae22501d90 100644
--- a/tests/compile/passes/test_fuse_act_padding.py
+++ b/tests/compile/passes/test_fuse_act_padding.py
@@ -7,7 +7,7 @@
 
 import vllm.config
 from tests.compile.backend import TestBackend
-from vllm._aiter_ops import is_aiter_found_and_supported, rocm_aiter_ops
+from vllm._aiter_ops import rocm_aiter_ops
 from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
 from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
 from vllm.config import (
@@ -59,7 +59,7 @@ def forward(self, x):
 
     def ops_in_model_before(self):
         return [
-            rocm_aiter_ops.get_rmsnorm_fused_add_op(),
+            torch.ops.vllm_ir.fused_add_rms_norm,
             torch.ops.aten.constant_pad_nd,
         ]
 
@@ -72,9 +72,8 @@ def ops_in_model_after(self):
 @pytest.mark.parametrize("hidden_size", [2880])
 @pytest.mark.parametrize("num_local_experts", [128])
 @pytest.mark.parametrize("x_pad_to_multiple", [256])
-@pytest.mark.skipif(
-    not is_aiter_found_and_supported(),
-    reason="Only test on ROCm with AITER installed and supported",
+@pytest.mark.skip(
+    reason="Skipping for now because of the accuracy issue. See: https://github.com/ROCm/aiter/issues/2614"
 )
 def test_fuse_act_padding(
     dtype: torch.dtype,
@@ -116,7 +115,6 @@ def test_fuse_act_padding(
 
         x = torch.rand(1, hidden_size)
         torch._dynamo.mark_dynamic(x, 0)
-
         outputs_unfused = model(x)
 
         model_fused = torch.compile(model, backend=backend)
diff --git a/tests/compile/passes/test_fuse_mla_dual_rms_norm.py b/tests/compile/passes/test_fuse_mla_dual_rms_norm.py
new file mode 100644
index 000000000000..080417c98966
--- /dev/null
+++ b/tests/compile/passes/test_fuse_mla_dual_rms_norm.py
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit test for the MLADualRMSNormFusionPass.
+
+The pass fuses paired q/kv RMS norms in MLA attention into a single
+fused_mla_dual_rms_norm op backed by AITER's fused_qk_rmsnorm kernel.
+"""
+
+import pytest
+import torch
+
+import vllm.config
+from tests.compile.backend import TestBackend
+from vllm._aiter_ops import is_aiter_found_and_supported, rocm_aiter_ops
+from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
+from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
+from vllm.config import (
+    CompilationConfig,
+    CompilationMode,
+    ModelConfig,
+    PassConfig,
+    VllmConfig,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+
+# MLA attention geometry for DeepSeek-V3 / Kimi-K2
+Q_DIM = 1536
+KV_C_DIM = 512
+K_PE_DIM = 64
+EPS = 1e-6
+
+
+class MLADualRMSNormTestModel(torch.nn.Module):
+    """
+    Minimal model reproducing the MLA dual RMS norm pattern:
+        linear -> split([q_dim, kv_dim])
+            +-- q_c (getitem 0) -> rms_norm(q_w, eps) -> linear
+            +-- kv_lora (getitem 1) -> split([kv_c_dim, k_pe_dim])
+                    +-- kv_c (getitem 0) -> rms_norm(kv_w, eps)
+                    +-- k_pe
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        q_dim: int = Q_DIM,
+        kv_c_dim: int = KV_C_DIM,
+        k_pe_dim: int = K_PE_DIM,
+        eps: float = EPS,
+    ):
+        super().__init__()
+        self.q_dim = q_dim
+        self.kv_dim = kv_c_dim + k_pe_dim
+        self.kv_c_dim = kv_c_dim
+        self.k_pe_dim = k_pe_dim
+
+        self.proj = torch.nn.Linear(hidden_size, q_dim + self.kv_dim, bias=False)
+        self.q_norm = RMSNorm(q_dim, eps=eps)
+        self.kv_norm = RMSNorm(kv_c_dim, eps=eps)
+        self.q_b_proj = torch.nn.Linear(q_dim, hidden_size, bias=False)
+
+    def forward(
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        # Avoid graph input being a direct arg to a matched pattern node
+        x = torch.relu(x)
+
+        projected = self.proj(x)
+
+        q_c, kv_lora = projected.split([self.q_dim, self.kv_dim], dim=-1)
+        kv_c, k_pe = kv_lora.split([self.kv_c_dim, self.k_pe_dim], dim=-1)
+
+        q_normed = self.q_norm(q_c)
+        kv_normed = self.kv_norm(kv_c)
+
+        q_out = self.q_b_proj(q_normed)
+        return q_out, kv_normed, k_pe
+
+    def ops_in_model_before(self):
+        return [torch.ops.vllm_ir.rms_norm.default]
+
+    def ops_in_model_after(self):
+        return [torch.ops.vllm.fused_mla_dual_rms_norm.default]
+
+
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize("hidden_size", [7168])
+@pytest.mark.skipif(
+    not is_aiter_found_and_supported(),
+    reason="Only test on ROCm with AITER installed and supported",
+)
+def test_fuse_mla_dual_rms_norm(
+    dtype: torch.dtype,
+    hidden_size: int,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    torch._dynamo.reset()
+
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(dtype=dtype),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            custom_ops=["+rms_norm"],
+            pass_config=PassConfig(
+                fuse_mla_dual_rms_norm=True,
+                eliminate_noops=True,
+            ),
+        ),
+    )
+
+    with vllm.config.set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
+            MLADualRMSNormFusionPass,
+        )
+
+        torch.set_default_device("cuda")
+        torch.set_default_dtype(dtype)
+        torch.manual_seed(42)
+
+        m.setenv("VLLM_ROCM_USE_AITER", "1")
+        rocm_aiter_ops.refresh_env_variables()
+
+        fusion_pass = MLADualRMSNormFusionPass(vllm_config)
+        passes = [
+            NoOpEliminationPass(vllm_config),
+            fusion_pass,
+            PostCleanupPass(vllm_config),
+        ]
+        backend = TestBackend(*passes)
+        model = MLADualRMSNormTestModel(hidden_size)
+
+        x = torch.randn(1, hidden_size)
+        torch._dynamo.mark_dynamic(x, 0)
+
+        outputs_unfused = model(x)
+
+        model_fused = torch.compile(model, backend=backend)
+        outputs_fused = model_fused(x)
+
+        torch.testing.assert_close(outputs_unfused, outputs_fused, atol=1e-2, rtol=1e-2)
+
+        assert fusion_pass.matched_count == 1, (
+            f"Expected 1 fused pair, got {fusion_pass.matched_count}"
+        )
+
+        backend.check_before_ops(model.ops_in_model_before())
+        backend.check_after_ops(model.ops_in_model_after())
diff --git a/tests/compile/passes/test_fusion.py b/tests/compile/passes/test_fusion.py
index 5df9424a5023..92d1902b2c2f 100644
--- a/tests/compile/passes/test_fusion.py
+++ b/tests/compile/passes/test_fusion.py
@@ -6,9 +6,10 @@
 import torch
 
 import vllm.config
+import vllm.ir.ops
 import vllm.plugins
 from tests.compile.backend import TestBackend
-from tests.utils import TestBlockFP8Layer, TestFP8Layer
+from tests.utils import TestFP8Layer
 from vllm._aiter_ops import IS_AITER_FOUND, rocm_aiter_ops
 from vllm.compilation.passes.fusion.matcher_utils import QUANT_OPS
 from vllm.compilation.passes.fusion.rms_quant_fusion import (
@@ -16,7 +17,6 @@
     FusedRMSQuantKey,
     RMSNormQuantFusionPass,
 )
-from vllm.compilation.passes.fx_utils import find_op_nodes
 from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
 from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
 from vllm.config import (
@@ -27,31 +27,35 @@
     VllmConfig,
 )
 from vllm.model_executor.kernels.linear import (
+    AiterFp8BlockScaledMMKernel,
     ChannelWiseTorchFP8ScaledMMLinearKernel,
+    CutlassFp8BlockScaledMMKernel,
     CutlassFP8ScaledMMLinearKernel,
+    DeepGemmFp8BlockScaledMMKernel,
+    FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
     FlashInferFP8ScaledMMLinearKernel,
-    FP8ScaledMMLinearKernel,
     PerTensorTorchFP8ScaledMMLinearKernel,
     ROCmFP8ScaledMMLinearKernel,
     RowWiseTorchFP8ScaledMMLinearKernel,
+    TritonFp8BlockScaledMMKernel,
+    _KernelT,
 )
-from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.layernorm import RMSNorm, RMSNormGated
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
-    QuantKey,
-    ScaleDesc,
+    create_fp8_quant_key,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     cutlass_block_fp8_supported,
 )
 from vllm.platforms import current_platform
 from vllm.utils.deep_gemm import (
+    is_deep_gemm_e8m0_used,
     is_deep_gemm_supported,
 )
 
 FP8_DTYPE = current_platform.fp8_dtype()
 
-RMS_OP = torch.ops._C.rms_norm.default
 RMS_ADD_OP = torch.ops._C.fused_add_rms_norm.default
 
 # Kernel and group_shape combinations: (kernel, group_shape)
@@ -66,9 +70,12 @@
     (PerTensorTorchFP8ScaledMMLinearKernel, GroupShape.PER_TENSOR),
     # ChannelWiseTorchFP8ScaledMMLinearKernel only supports per-token
     (ChannelWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN),
-    # Blockwise group shapes (no kernel abstraction)
-    (None, GroupShape(1, 128)),
-    (None, GroupShape(1, 64)),
+    # Blockwise group shapes
+    (FlashInferFp8DeepGEMMDynamicBlockScaledKernel, GroupShape(1, 128)),
+    (CutlassFp8BlockScaledMMKernel, GroupShape(1, 128)),
+    (DeepGemmFp8BlockScaledMMKernel, GroupShape(1, 128)),
+    (TritonFp8BlockScaledMMKernel, GroupShape(1, 128)),
+    (TritonFp8BlockScaledMMKernel, GroupShape(1, 64)),
 ]
 
 # ROCm kernels
@@ -80,8 +87,8 @@
     # ChannelWiseTorchFP8ScaledMMLinearKernel only supports per-token
     (ChannelWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN),
     # Blockwise group shapes (no kernel abstraction)
-    (None, GroupShape(1, 128)),
-    (None, GroupShape(1, 64)),
+    (TritonFp8BlockScaledMMKernel, GroupShape(1, 128)),
+    (TritonFp8BlockScaledMMKernel, GroupShape(1, 64)),
 ]
 
 KERNEL_GROUPSHAPE_COMBINATIONS = (
@@ -92,16 +99,14 @@
 
 # For Aiter tests we toggle use_aiter_quant_op
 AITER_KERNEL_GROUPSHAPE_COMBINATIONS = [
-    # Per-token with ROCmFP8ScaledMMLinearKernel
-    (ROCmFP8ScaledMMLinearKernel, GroupShape.PER_TENSOR, False),
     # Per-token with RowWiseTorchFP8ScaledMMLinearKernel
     (RowWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN, True),
     (RowWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN, False),
     # Per-token with ChannelWiseTorchFP8ScaledMMLinearKernel
     (ChannelWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN, True),
     (ChannelWiseTorchFP8ScaledMMLinearKernel, GroupShape.PER_TOKEN, False),
-    # Blockwise (no kernel abstraction)
-    (None, GroupShape(1, 128), True),
+    # Blockwise
+    (AiterFp8BlockScaledMMKernel, GroupShape(1, 128), True),
 ]
 
 
@@ -110,8 +115,9 @@ def __init__(
         self,
         hidden_size: int,
         eps: float,
-        force_kernel: FP8ScaledMMLinearKernel | None,
+        force_kernel: type[_KernelT] | None,
         group_shape: GroupShape,
+        dtype: torch.dtype,
         use_aiter_fusion: bool = False,
         use_aiter_quant: bool = False,
         *args,
@@ -129,54 +135,42 @@ def __init__(
         is_blockwise = group_shape.is_per_group()
 
         if is_blockwise:
-            act_quant_scale_desc = ScaleDesc(torch.float32, False, group_shape)
-            self.activation_quant_key = QuantKey(
-                dtype=FP8_DTYPE, scale=act_quant_scale_desc, symmetric=True
+            block_size = group_shape.col
+            self.activation_quant_key = create_fp8_quant_key(
+                static=False, group_shape=group_shape
             )
-            self.fp8_linear_layers = [
-                TestBlockFP8Layer(
-                    weight_shape=(hidden_size, hidden_size),
-                    group_shape=group_shape,
-                    cutlass_block_fp8_supported=cutlass_block_fp8_supported(),
-                    use_aiter_and_is_supported=use_aiter_quant,
-                    transpose_weights=use_aiter_fusion,
-                )
-                for _ in range(3)
-            ]
-
-            self.enable_quant_fp8_custom_op = (
-                False
-                if use_aiter_quant
-                else self.fp8_linear_layers[0].linear_op.input_quant_op.enabled()
+            self.weight_quant_key = create_fp8_quant_key(
+                static=True, group_shape=GroupShape(block_size, block_size)
             )
 
         else:
             is_static = group_shape == GroupShape.PER_TENSOR
-            act_quant_scale_desc = ScaleDesc(torch.float32, is_static, group_shape)
-            w_quant_scale_desc = ScaleDesc(torch.float32, True, group_shape)
-            self.activation_quant_key = QuantKey(
-                dtype=FP8_DTYPE, scale=act_quant_scale_desc, symmetric=True
+            self.activation_quant_key = create_fp8_quant_key(
+                is_static, group_shape=group_shape
             )
-            self.weight_quant_key = QuantKey(
-                dtype=FP8_DTYPE, scale=w_quant_scale_desc, symmetric=True
+            self.weight_quant_key = create_fp8_quant_key(
+                static=True, group_shape=group_shape
             )
-            self.fp8_linear_layers = [
-                TestFP8Layer(
-                    weight_shape=(hidden_size, hidden_size),
-                    activation_quant_key=self.activation_quant_key,
-                    weight_quant_key=self.weight_quant_key,
-                    force_kernel=force_kernel,
-                )
-                for _ in range(3)
-            ]
 
-            # Enable aiter quantization if requested
-            for layer in self.fp8_linear_layers:
-                layer.kernel.quant_fp8.use_aiter = use_aiter_quant
+        self.fp8_linear_layers = [
+            TestFP8Layer(
+                weight_shape=(hidden_size, hidden_size),
+                activation_quant_key=self.activation_quant_key,
+                weight_quant_key=self.weight_quant_key,
+                force_kernel=force_kernel,
+                transpose_weights=use_aiter_fusion,
+                input_dtype=dtype,
+            )
+            for _ in range(3)
+        ]
+
+        # Enable aiter quantization if requested
+        for layer in self.fp8_linear_layers:
+            layer.kernel.quant_fp8.use_aiter = use_aiter_quant
 
-            self.enable_quant_fp8_custom_op = self.fp8_linear_layers[
-                0
-            ].is_quant_fp8_enabled()
+        self.enable_quant_fp8_custom_op = self.fp8_linear_layers[
+            0
+        ].is_quant_fp8_enabled()
 
     def forward(self, x):
         # avoid having graph input be an arg to a pattern directly
@@ -246,11 +240,10 @@ def ops_in_model_after(self):
         ]
 
     def ops_in_model_before_partial(self):
-        return (
-            [RMS_OP, RMS_ADD_OP]
-            if self.enable_rms_norm_custom_op
-            else [torch.ops.aten.rsqrt]
-        )
+        return [
+            torch.ops.vllm_ir.rms_norm,
+            torch.ops.vllm_ir.fused_add_rms_norm.default,
+        ]
 
 
 def _run_fusion_test(
@@ -323,6 +316,26 @@ def test_fusion_rmsnorm_quant(
     ):
         pytest.skip("Unsupported group shape 64 for CUTLASS/DeepGemm")
 
+    # TODO(quant-rms-fusion): DeepGEMM UE8M0 activation quant on B200 lowers
+    # to a packed int32-scale op (per_token_group_quant_fp8_packed_for_deepgemm),
+    # but the rms+quant fusion pattern only matches the fp32-scale variant, so
+    # the fused output gets a mismatched scale layout and produces NaN. Only
+    # reproduces on bf16 (DeepGEMM UE8M0 on B200 is bf16-only).
+    # To re-enable: make rms_norm_per_block_quant emit packed UE8M0 scales
+    # and extend the fusion pattern to rewrite the packed activation quant.
+    deepgemm_kernels = (
+        DeepGemmFp8BlockScaledMMKernel,
+        FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
+    )
+    if (
+        dtype == torch.bfloat16
+        and force_kernel in deepgemm_kernels
+        and is_deep_gemm_e8m0_used()
+    ):
+        pytest.skip(
+            "rms+quant fusion does not yet match the packed UE8M0 DeepGEMM path"
+        )
+
     custom_ops = []
     if enable_rms_norm_custom_op:
         custom_ops.append("+rms_norm")
@@ -340,7 +353,10 @@ def test_fusion_rmsnorm_quant(
         ),
     )
 
-    with vllm.config.set_current_vllm_config(vllm_config):
+    with (
+        vllm.config.set_current_vllm_config(vllm_config),
+        vllm_config.kernel_config.ir_op_priority.set_priority(),
+    ):
         # Setup device before model creation
         torch.set_default_device("cuda")
         torch.set_default_dtype(dtype)
@@ -353,6 +369,7 @@ def test_fusion_rmsnorm_quant(
             eps=eps,
             force_kernel=force_kernel,
             group_shape=group_shape,
+            dtype=dtype,
             use_aiter_fusion=False,
             use_aiter_quant=False,
         )
@@ -364,16 +381,6 @@ def test_fusion_rmsnorm_quant(
             model.ops_in_model_before_partial(), fully_replaced=False
         )
 
-        # If RMSNorm custom op is disabled (native/torch impl used),
-        # there's a risk that the fused add doesn't get included in the
-        # replacement and only the rms part gets fused with quant.
-        # Hence, we check only 2 add nodes are left (final fused rmsnorm add).
-        if not enable_rms_norm_custom_op:
-            n_add_nodes = lambda g: sum(1 for _ in find_op_nodes(torch.ops.aten.add, g))
-            # 7 = 1 (RMS) + 3x2 (3xRMS_ADD, 2 each)
-            assert n_add_nodes(backend.graph_pre_pass) == 7
-            assert n_add_nodes(backend.graph_post_pass) == 2
-
 
 @pytest.mark.parametrize("dtype", [torch.bfloat16])
 @pytest.mark.parametrize("hidden_size", [256])
@@ -424,6 +431,7 @@ def test_aiter_fusion_rmsnorm_quant(
             eps=eps,
             force_kernel=force_kernel,
             group_shape=group_shape,
+            dtype=dtype,
             use_aiter_fusion=True,  # Always use aiter fusion ops in aiter test
             use_aiter_quant=use_aiter_quant_op,  # Toggle aiter quantization
         )
@@ -431,3 +439,242 @@ def test_aiter_fusion_rmsnorm_quant(
         _run_fusion_test(
             model, fusion_pass, vllm_config, dtype, hidden_size, num_tokens
         )
+
+
+class TestGatedModel(torch.nn.Module):
+    """Model that uses RMSNormGated + reshape + group FP8 quant + linear.
+
+    Mimics GatedDeltaNetAttention's output projection path where:
+    - RMSNormGated operates on per-head tensors (N*H, D)
+    - Output is reshaped to (N, H*D) before group quantization + linear
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_dim: int,
+        eps: float,
+        force_kernel: type[_KernelT],
+        group_shape: GroupShape,
+        dtype: torch.dtype,
+        use_aiter_quant: bool = True,
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.head_dim = head_dim
+        hidden_dim = num_heads * head_dim
+
+        self.norm = RMSNormGated(
+            head_dim,
+            eps=eps,
+            group_size=None,
+            norm_before_gate=True,
+        )
+
+        self.activation_quant_key = create_fp8_quant_key(
+            static=False, group_shape=group_shape
+        )
+        self.weight_quant_key = create_fp8_quant_key(
+            static=True, group_shape=GroupShape(group_shape.col, group_shape.col)
+        )
+
+        self.fp8_linear = TestFP8Layer(
+            weight_shape=(hidden_dim, hidden_dim),
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            force_kernel=force_kernel,
+            transpose_weights=True,
+            input_dtype=dtype,
+        )
+        self.fp8_linear.kernel.quant_fp8.use_aiter = use_aiter_quant
+
+    def forward(self, x, z):
+        num_heads = self.num_heads
+        head_dim = self.head_dim
+        hidden_dim = num_heads * head_dim
+        x = torch.relu(x)
+        z = torch.relu(z)
+        x_heads = x.reshape(-1, num_heads, head_dim).reshape(-1, head_dim)
+        z_heads = z.reshape(-1, num_heads, head_dim).reshape(-1, head_dim)
+        normed = self.norm(x_heads, z_heads)
+        merged = normed.reshape(-1, hidden_dim)
+        out = self.fp8_linear(merged)
+        return out
+
+    def ops_in_model_after(self):
+        from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
+            AiterRMSNormGatedFp8GroupQuantPattern,
+        )
+
+        return [AiterRMSNormGatedFp8GroupQuantPattern.FUSED_OP]
+
+
+class _MockGDNLayer:
+    """Minimal mock to populate static_forward_context for pass discovery.
+
+    Uses __class__ assignment to pass isinstance checks against
+    GatedDeltaNetAttention without requiring a full config-based init.
+    """
+
+    def __init__(self, num_v_heads: int, head_v_dim: int, tp_size: int = 1):
+        self.num_v_heads = num_v_heads
+        self.head_v_dim = head_v_dim
+        self.tp_size = tp_size
+
+        from vllm.model_executor.layers.mamba.gdn.base import (
+            GatedDeltaNetAttention,
+        )
+
+        self.__class__ = GatedDeltaNetAttention
+
+
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize("num_heads", [2])
+@pytest.mark.parametrize("head_dim", [128])
+@pytest.mark.parametrize("num_tokens", [8])
+@pytest.mark.parametrize("eps", [1e-5, 1e-6])
+@pytest.mark.skipif(
+    (not current_platform.is_rocm() or not IS_AITER_FOUND),
+    reason="Only test on ROCm with aiter package installed",
+)
+def test_aiter_fusion_rmsnorm_gated_quant(
+    dtype: torch.dtype,
+    num_heads: int,
+    head_dim: int,
+    num_tokens: int,
+    eps: float,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    group_shape = GroupShape(1, 128)
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(dtype=dtype),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            custom_ops=["-rms_norm", "-silu_and_mul", "-quant_fp8"],
+            pass_config=PassConfig(fuse_norm_quant=True, eliminate_noops=True),
+        ),
+    )
+
+    with vllm.config.set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
+            RocmAiterRMSNormQuantFusionPass,
+        )
+
+        m.setenv("VLLM_ROCM_USE_AITER", "1")
+        rocm_aiter_ops.refresh_env_variables()
+
+        # Register a mock GDN layer so the pass discovers num_heads/head_dim
+        mock_gdn = _MockGDNLayer(num_v_heads=num_heads, head_v_dim=head_dim, tp_size=1)
+        vllm_config.compilation_config.static_forward_context["mock_gdn_layer"] = (
+            mock_gdn
+        )
+
+        torch.set_default_device("cuda")
+        torch.set_default_dtype(dtype)
+        torch.manual_seed(1)
+
+        fusion_pass = RocmAiterRMSNormQuantFusionPass(vllm_config)
+
+        model = TestGatedModel(
+            num_heads=num_heads,
+            head_dim=head_dim,
+            eps=eps,
+            force_kernel=AiterFp8BlockScaledMMKernel,
+            group_shape=group_shape,
+            dtype=dtype,
+            use_aiter_quant=True,
+        )
+
+        noop_pass = NoOpEliminationPass(vllm_config)
+        cleanup_pass = PostCleanupPass(vllm_config)
+
+        backend = TestBackend(noop_pass, fusion_pass, cleanup_pass)
+        backend2 = TestBackend(noop_pass, cleanup_pass)
+
+        hidden_dim = num_heads * head_dim
+        x = torch.rand(num_tokens, hidden_dim)
+        z = torch.rand(num_tokens, hidden_dim)
+        torch._dynamo.mark_dynamic(x, 0)
+        torch._dynamo.mark_dynamic(z, 0)
+
+        model_fused = torch.compile(model, backend=backend)
+        result_fused = model_fused(x, z)
+
+        model_unfused = torch.compile(model, backend=backend2)
+        result_unfused = model_unfused(x, z)
+
+        torch.testing.assert_close(result_fused, result_unfused, atol=1e-2, rtol=1e-2)
+
+        assert fusion_pass.matched_count == 1
+        backend.check_after_ops(model.ops_in_model_after())
+
+
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize("num_heads", [2])
+@pytest.mark.parametrize("head_dim", [128])
+@pytest.mark.parametrize("num_tokens", [8])
+@pytest.mark.parametrize("eps", [1e-6])
+@pytest.mark.skipif(
+    (not current_platform.is_rocm() or not IS_AITER_FOUND),
+    reason="Only test on ROCm with aiter package installed",
+)
+def test_aiter_fusion_rmsnorm_gated_quant_no_gdn_layers(
+    dtype: torch.dtype,
+    num_heads: int,
+    head_dim: int,
+    num_tokens: int,
+    eps: float,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """Verify that without GDN layers in static_forward_context,
+    the gated pattern is not registered and no matches occur."""
+    group_shape = GroupShape(1, 128)
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(dtype=dtype),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            custom_ops=["-rms_norm", "-silu_and_mul", "-quant_fp8"],
+            pass_config=PassConfig(fuse_norm_quant=True, eliminate_noops=True),
+        ),
+    )
+
+    with vllm.config.set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
+            RocmAiterRMSNormQuantFusionPass,
+        )
+
+        m.setenv("VLLM_ROCM_USE_AITER", "1")
+        rocm_aiter_ops.refresh_env_variables()
+
+        torch.set_default_device("cuda")
+        torch.set_default_dtype(dtype)
+        torch.manual_seed(1)
+
+        # No mock GDN layer registered -- pass should not register gated pattern
+        fusion_pass = RocmAiterRMSNormQuantFusionPass(vllm_config)
+
+        model = TestGatedModel(
+            num_heads=num_heads,
+            head_dim=head_dim,
+            eps=eps,
+            force_kernel=AiterFp8BlockScaledMMKernel,
+            group_shape=group_shape,
+            dtype=dtype,
+            use_aiter_quant=True,
+        )
+
+        noop_pass = NoOpEliminationPass(vllm_config)
+        cleanup_pass = PostCleanupPass(vllm_config)
+
+        backend = TestBackend(noop_pass, fusion_pass, cleanup_pass)
+
+        hidden_dim = num_heads * head_dim
+        x = torch.rand(num_tokens, hidden_dim)
+        z = torch.rand(num_tokens, hidden_dim)
+        torch._dynamo.mark_dynamic(x, 0)
+        torch._dynamo.mark_dynamic(z, 0)
+
+        model_fused = torch.compile(model, backend=backend)
+        model_fused(x, z)
+
+        assert fusion_pass.matched_count == 0
diff --git a/tests/compile/passes/test_fusion_attn.py b/tests/compile/passes/test_fusion_attn.py
index 94014ca0107f..b776f6af98a1 100644
--- a/tests/compile/passes/test_fusion_attn.py
+++ b/tests/compile/passes/test_fusion_attn.py
@@ -9,7 +9,10 @@
 from tests.utils import TestFP8Layer, flat_product
 from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-from vllm.compilation.passes.fusion.attn_quant_fusion import ATTN_OP, AttnFusionPass
+from vllm.compilation.passes.fusion.attn_quant_fusion import (
+    ATTN_OP,
+    AttnQuantFusionPass,
+)
 from vllm.compilation.passes.fusion.matcher_utils import QUANT_OPS
 from vllm.compilation.passes.fx_utils import find_op_nodes
 from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
@@ -36,8 +39,9 @@
 from vllm.utils.flashinfer import has_flashinfer
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
-from vllm.v1.kv_cache_interface import AttentionSpec
+from vllm.v1.kv_cache_interface import AttentionSpec, get_kv_quant_mode
 
+DEVICE_TYPE = current_platform.device_type
 FP8_DTYPE = current_platform.fp8_dtype()
 FP4_DTYPE = torch.uint8
 
@@ -50,7 +54,6 @@ def __init__(
         num_qo_heads: int,
         num_kv_heads: int,
         head_size: int,
-        kv_cache_dtype: torch.dtype,
         device: torch.device,
         vllm_config: VllmConfig,
         block_size: int,
@@ -60,9 +63,9 @@ def __init__(
         self.num_qo_heads = num_qo_heads
         self.num_kv_heads = num_kv_heads
         self.head_size = head_size
-        self.kv_cache_dtype = kv_cache_dtype
         self.device = device
         self.vllm_config = vllm_config
+        self.dtype = vllm_config.model_config.dtype
 
         self.attn = Attention(
             num_heads=self.num_qo_heads,
@@ -77,13 +80,14 @@ def __init__(
 
         self.block_size = block_size
 
-        # Initialize attn MetadataBuilder
+        # Initialize attn MetadataBuilder (match Attention.get_kv_cache_spec)
         self.builder = self.attn.attn_backend.get_builder_cls()(
             kv_cache_spec=AttentionSpec(
                 block_size=self.block_size,
                 num_kv_heads=self.num_kv_heads,
                 head_size=self.head_size,
-                dtype=self.kv_cache_dtype,
+                dtype=self.attn.kv_cache_torch_dtype,
+                kv_quant_mode=get_kv_quant_mode(self.attn.kv_cache_dtype),
             ),
             layer_names=[self.attn.layer_name],
             vllm_config=self.vllm_config,
@@ -122,7 +126,7 @@ def build_attn_metadata(self, batch_size: int) -> AttentionMetadata:
         # Create dummy KV cache
         raw_tensor = torch.zeros(
             2 * num_blocks * self.block_size * self.num_kv_heads * self.head_size,
-            dtype=self.kv_cache_dtype,
+            dtype=self.attn.kv_cache_torch_dtype,
             device=self.device,
         )
         raw_tensor = raw_tensor.view(kv_cache_shape)
@@ -152,6 +156,7 @@ def __init__(self, *args, **kwargs):
             activation_quant_key=self.quant_key,
             weight_quant_key=self.quant_key,
             device=self.device,
+            input_dtype=self.dtype,
         )
 
         w = kwargs.get("w")
@@ -296,7 +301,7 @@ def test_attention_quant_pattern(
 
     custom_ops_list = custom_ops.split(",") if custom_ops else []
 
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
     torch.set_default_dtype(dtype)
     torch.manual_seed(42)
 
@@ -343,7 +348,6 @@ def test_attention_quant_pattern(
             num_qo_heads=num_qo_heads,
             num_kv_heads=num_kv_heads,
             head_size=head_size,
-            kv_cache_dtype=FP8_DTYPE,
             device=device,
             vllm_config=vllm_config_unfused,
             block_size=block_size,
@@ -371,7 +375,6 @@ def test_attention_quant_pattern(
             num_qo_heads=num_qo_heads,
             num_kv_heads=num_kv_heads,
             head_size=head_size,
-            kv_cache_dtype=FP8_DTYPE,
             device=device,
             vllm_config=vllm_config,
             w=model_unfused.w,
@@ -384,7 +387,7 @@ def test_attention_quant_pattern(
 
         # Create test backend with fusion passes enabled
         noop_pass = NoOpEliminationPass(vllm_config)
-        attn_pass = LazyInitPass(AttnFusionPass, vllm_config)
+        attn_pass = LazyInitPass(AttnQuantFusionPass, vllm_config)
         cleanup_pass = PostCleanupPass(vllm_config)
 
         test_backend = TestBackend(noop_pass, attn_pass, cleanup_pass)
@@ -434,7 +437,7 @@ def test_attention_quant_pattern(
     # Only output quant ops are fused into attention.
     test_backend.check_before_ops([quant_op], fully_replaced=quant_key is kNvfp4Dynamic)
 
-    # access the underlying `AttnFusionPass` on the `LazyInitPass`
+    # access the underlying `AttnQuantFusionPass` on the `LazyInitPass`
     assert attn_pass.pass_.matched_count == sum(attn_fusion_supported)
 
     # Check attention ops in the graph before and after fusion
diff --git a/tests/compile/passes/test_mla_attn_quant_fusion.py b/tests/compile/passes/test_mla_attn_quant_fusion.py
new file mode 100644
index 000000000000..0a38ffca483a
--- /dev/null
+++ b/tests/compile/passes/test_mla_attn_quant_fusion.py
@@ -0,0 +1,587 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
+
+import pytest
+import torch._dynamo
+
+from tests.compile.backend import LazyInitPass, TestBackend
+from tests.utils import TestFP8Layer, flat_product
+from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
+from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
+from vllm.compilation.passes.fusion.matcher_utils import QUANT_OPS
+from vllm.compilation.passes.fusion.mla_attn_quant_fusion import (
+    MLA_ATTN_OP,
+    MLAAttnQuantFusionPass,
+)
+from vllm.compilation.passes.fx_utils import find_op_nodes
+from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
+from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
+from vllm.config import (
+    AttentionConfig,
+    CacheConfig,
+    CompilationConfig,
+    CompilationMode,
+    ModelConfig,
+    PassConfig,
+    SchedulerConfig,
+    VllmConfig,
+    set_current_vllm_config,
+)
+from vllm.forward_context import get_forward_context, set_forward_context
+from vllm.model_executor.kernels.linear.scaled_mm.cutlass import (
+    CutlassFp8BlockScaledMMKernel,
+)
+from vllm.model_executor.layers.attention import MLAAttention
+from vllm.model_executor.layers.linear import ColumnParallelLinear
+from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+from vllm.model_executor.layers.quantization.modelopt import ModelOptNvFp4Config
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+    QuantKey,
+    create_fp8_quant_key,
+    kFp8Dynamic128Sym,
+    kFp8StaticTensorSym,
+    kNvfp4Dynamic,
+)
+from vllm.platforms import current_platform
+from vllm.v1.attention.backend import AttentionMetadata
+from vllm.v1.attention.backends.registry import AttentionBackendEnum
+from vllm.v1.kv_cache_interface import MLAAttentionSpec
+
+FP8_DTYPE = current_platform.fp8_dtype()
+FP4_DTYPE = torch.uint8
+DEVICE_TYPE = current_platform.device_type
+
+
+class MLAAttentionQuantPatternModel(torch.nn.Module):
+    """Base model for MLA AttentionQuantPattern fusion."""
+
+    def __init__(
+        self,
+        num_heads: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        kv_lora_rank: int,
+        kv_cache_dtype: torch.dtype,
+        device: torch.device,
+        vllm_config: VllmConfig,
+        **kwargs,
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.qk_nope_head_dim = qk_nope_head_dim
+        self.qk_rope_head_dim = qk_rope_head_dim
+        self.qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+        self.v_head_dim = v_head_dim
+        self.kv_lora_rank = kv_lora_rank
+        self.output_dim = num_heads * v_head_dim
+        self.head_size = kv_lora_rank + qk_rope_head_dim
+        self.kv_cache_dtype = kv_cache_dtype
+        self.device = device
+        self.vllm_config = vllm_config
+        self.dtype = vllm_config.model_config.dtype
+
+        kv_b_proj = ColumnParallelLinear(
+            input_size=kv_lora_rank,
+            output_size=num_heads * (qk_nope_head_dim + v_head_dim),
+            bias=False,
+            prefix="model.layers.0.self_attn.kv_b_proj",
+        ).to(device)
+        kv_b_proj_weight = kwargs.get("kv_b_proj_weight")
+        if kv_b_proj_weight is not None:
+            kv_b_proj.weight.data.copy_(kv_b_proj_weight)
+        else:
+            kv_b_proj.weight.data.normal_()
+
+        # Create MLAAttention
+        self.mla_attn = MLAAttention(
+            num_heads=num_heads,
+            scale=1.0 / (self.qk_head_dim**0.5),
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            q_lora_rank=None,
+            kv_lora_rank=kv_lora_rank,
+            kv_b_proj=kv_b_proj,
+            cache_config=vllm_config.cache_config,
+            quant_config=self.quant_config,
+            prefix="model.layers.0.self_attn.attn",
+        )
+        self.mla_attn._k_scale = self.mla_attn._k_scale.to(device)
+        self.mla_attn._v_scale = self.mla_attn._v_scale.to(device)
+
+        # Initialize W_UK_T and W_UV from kv_b_proj weights
+        self.mla_attn.process_weights_after_loading(torch.get_default_dtype())
+        self.kv_b_proj_weight = kv_b_proj.weight.data.clone()
+
+        self.block_size = 16
+
+        # Initialize MLA MetadataBuilder
+        self.builder = self.mla_attn.attn_backend.get_builder_cls()(
+            kv_cache_spec=MLAAttentionSpec(
+                block_size=self.block_size,
+                num_kv_heads=1,
+                head_size=self.head_size,
+                dtype=self.kv_cache_dtype,
+            ),
+            layer_names=[self.mla_attn.layer_name],
+            vllm_config=self.vllm_config,
+            device=self.device,
+        )
+
+    def build_attn_metadata(self, batch_size: int) -> AttentionMetadata:
+        """Initialize MLA attention metadata.
+
+        NOTE: Uses decode-only batch (query_len=1 per request). The prefill
+        (forward_mha) path is not separately tested here because it requires
+        FlashAttention availability and different input tensor shapes. The
+        quant logic in forward_impl is identical for both paths — it quantizes
+        the full output[:num_actual_toks] buffer after both forward_mha and
+        forward_mqa have written their results.
+        """
+
+        batch_spec = BatchSpec(seq_lens=[1] * batch_size, query_lens=[1] * batch_size)
+        common_attn_metadata = create_common_attn_metadata(
+            batch_spec, self.block_size, self.device, arange_block_indices=True
+        )
+
+        max_blocks = (max(batch_spec.seq_lens) + self.block_size - 1) // self.block_size
+        num_blocks = batch_size * max_blocks
+
+        # MLA KV cache is 3D: (num_blocks, block_size, head_size)
+        attn_backend = self.mla_attn.attn_backend
+        kv_cache_shape = attn_backend.get_kv_cache_shape(
+            num_blocks, self.block_size, 1, self.head_size
+        )
+        try:
+            kv_cache_stride_order = attn_backend.get_kv_cache_stride_order()
+        except (AttributeError, NotImplementedError):
+            kv_cache_stride_order = tuple(range(len(kv_cache_shape)))
+
+        ordered_shape = tuple(kv_cache_shape[i] for i in kv_cache_stride_order)
+        inv_order = [
+            kv_cache_stride_order.index(i) for i in range(len(kv_cache_stride_order))
+        ]
+
+        raw_tensor = torch.zeros(
+            ordered_shape, dtype=self.kv_cache_dtype, device=self.device
+        )
+        kv_cache = raw_tensor.permute(*inv_order)
+
+        self.mla_attn.kv_cache = kv_cache
+
+        self.attn_metadata = self.builder.build(
+            common_prefix_len=0, common_attn_metadata=common_attn_metadata
+        )
+
+        return self.attn_metadata
+
+
+class TestMLAAttentionFp8StaticQuantPatternModel(MLAAttentionQuantPatternModel):
+    """Test model for MLA Attention + FP8 static quant fusion."""
+
+    quant_key = kFp8StaticTensorSym
+    quant_config = Fp8Config()
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.fp8_linear = TestFP8Layer(
+            weight_shape=(self.output_dim, self.output_dim),
+            activation_quant_key=self.quant_key,
+            weight_quant_key=self.quant_key,
+            device=self.device,
+            input_dtype=self.dtype,
+        )
+
+        w = kwargs.get("w")
+        if w is not None:
+            self.fp8_linear.weight = w["weight"]
+            self.fp8_linear.weight_scale = w["wscale"]
+            self.fp8_linear.input_scale = w["scale"]
+
+        self.w = {
+            "weight": self.fp8_linear.weight,
+            "wscale": self.fp8_linear.weight_scale,
+            "scale": self.fp8_linear.input_scale,
+        }
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        kv_c_normed: torch.Tensor,
+        k_pe: torch.Tensor,
+    ):
+        """Forward pass that creates the MLA attention + FP8 quant pattern."""
+        attn_output = self.mla_attn(
+            q,
+            kv_c_normed,
+            k_pe,
+            output_shape=(q.shape[0], self.output_dim),
+        )
+        return self.fp8_linear(attn_output)
+
+
+class TestMLAAttentionNvfp4QuantPatternModel(MLAAttentionQuantPatternModel):
+    """Test model for MLA Attention + NVFP4 quant fusion."""
+
+    quant_key = kNvfp4Dynamic
+    quant_config = ModelOptNvFp4Config(
+        is_checkpoint_nvfp4_serialized=False,
+        kv_cache_quant_algo=None,
+        exclude_modules=[],
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.w = kwargs.get(
+            "w",
+            {
+                "weight": torch.randint(
+                    256,
+                    (self.output_dim, self.output_dim // 2),
+                    dtype=FP4_DTYPE,
+                    device=self.device,
+                ),
+                "wscale_swizzled": torch.randn(
+                    self.output_dim, self.output_dim // 16
+                ).to(dtype=FP8_DTYPE, device=self.device),
+                "wscale": torch.tensor([500], dtype=torch.float32, device=self.device),
+                "scale": torch.tensor([0.002], dtype=torch.float32, device=self.device),
+            },
+        )
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        kv_c_normed: torch.Tensor,
+        k_pe: torch.Tensor,
+    ):
+        """Forward pass that creates the MLA attention + NVFP4 quant pattern."""
+        attn_output = self.mla_attn(
+            q,
+            kv_c_normed,
+            k_pe,
+            output_shape=(q.shape[0], self.output_dim),
+        )
+        quant_output, output_block_scale = scaled_fp4_quant(
+            attn_output, 1 / self.w["scale"]
+        )
+        return cutlass_scaled_fp4_mm(
+            a=quant_output,
+            b=self.w["weight"],
+            block_scale_a=output_block_scale,
+            block_scale_b=self.w["wscale_swizzled"],
+            alpha=self.w["scale"] * self.w["wscale"],
+            out_dtype=attn_output.dtype,
+        )
+
+
+class TestMLAAttentionFp8GroupQuantPatternModel(MLAAttentionQuantPatternModel):
+    """Test model for MLA Attention + per-group FP8 (block quant) fusion."""
+
+    quant_key = kFp8Dynamic128Sym
+    quant_config = Fp8Config(
+        is_checkpoint_fp8_serialized=True,
+        weight_block_size=[128, 128],
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        weight_quant_key = create_fp8_quant_key(
+            static=True, group_shape=GroupShape(128, 128)
+        )
+        device = kwargs.get("device", torch.device("cuda:0"))
+
+        # Subclass to set weight_block_size before process_weights_after_loading
+        class _BlockFP8Layer(TestFP8Layer):
+            def __init__(self, *a, **kw):
+                self.weight_block_size = [128, 128]
+                super().__init__(*a, **kw)
+
+        # Force CutlassFp8BlockScaledMMKernel to ensure the graph uses
+        # per_token_group_fp8_quant (not the deepgemm packed variant).
+        self.block_fp8_linear = _BlockFP8Layer(
+            weight_shape=(self.output_dim, self.output_dim),
+            activation_quant_key=self.quant_key,
+            weight_quant_key=weight_quant_key,
+            input_dtype=self.dtype,
+            device=device,
+            force_kernel=CutlassFp8BlockScaledMMKernel,
+        )
+
+        w = kwargs.get("w")
+        if w is not None:
+            self.block_fp8_linear.weight = w["weight"]
+            # Block-wise uses weight_scale_inv, not weight_scale
+            self.block_fp8_linear.weight_scale_inv = w["wscale"]
+
+        self.w = {
+            "weight": self.block_fp8_linear.weight,
+            "wscale": self.block_fp8_linear.weight_scale_inv,
+        }
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        kv_c_normed: torch.Tensor,
+        k_pe: torch.Tensor,
+    ):
+        """Forward pass: MLA attention -> block FP8 linear (group quant)."""
+        attn_output = self.mla_attn(
+            q,
+            kv_c_normed,
+            k_pe,
+            output_shape=(q.shape[0], self.output_dim),
+        )
+        return self.block_fp8_linear(attn_output)
+
+
+def is_nvfp4_supported():
+    return current_platform.has_device_capability(100)
+
+
+# MLA test configuration
+MLA_DIMS: list[tuple[int, int, int, int, int]] = []
+PATTERN_TEST_MODELS_MLA_FP8: list[tuple[str, type]] = []
+PATTERN_TEST_MODELS_MLA_GROUP_FP8: list[tuple[str, type]] = []
+PATTERN_TEST_MODELS_MLA_FP4: list[tuple[str, type]] = []
+BACKENDS_MLA_FP8: list[AttentionBackendEnum] = []
+BACKENDS_MLA_FP4: list[AttentionBackendEnum] = []
+
+if current_platform.is_cuda():
+    # (num_heads, qk_nope_head_dim, qk_rope_head_dim, v_head_dim, kv_lora_rank)
+    MLA_DIMS = [(16, 128, 64, 128, 512)]
+    PATTERN_TEST_MODELS_MLA_FP8 = [
+        (
+            "deepseek-ai/DeepSeek-V2-Lite",
+            TestMLAAttentionFp8StaticQuantPatternModel,
+        )
+    ]
+    PATTERN_TEST_MODELS_MLA_GROUP_FP8 = [
+        (
+            "deepseek-ai/DeepSeek-V3",
+            TestMLAAttentionFp8GroupQuantPatternModel,
+        )
+    ]
+    PATTERN_TEST_MODELS_MLA_FP4 = [
+        (
+            "deepseek-ai/DeepSeek-V2-Lite",
+            TestMLAAttentionNvfp4QuantPatternModel,
+        )
+    ]
+    BACKENDS_MLA_FP8 = [AttentionBackendEnum.TRITON_MLA]
+    BACKENDS_MLA_FP4 = [AttentionBackendEnum.TRITON_MLA]
+
+
+@pytest.mark.parametrize(
+    "num_heads, qk_nope_head_dim, qk_rope_head_dim, v_head_dim, kv_lora_rank",
+    MLA_DIMS,
+)
+@pytest.mark.parametrize("batch_size", [7, 256] if current_platform.is_cuda() else [8])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize(
+    "backend, model_name, model_class, custom_ops",
+    list(
+        flat_product(
+            BACKENDS_MLA_FP8,
+            PATTERN_TEST_MODELS_MLA_FP8,
+            ["+quant_fp8", "-quant_fp8"],
+        )
+    )
+    + list(
+        flat_product(
+            BACKENDS_MLA_FP8,
+            PATTERN_TEST_MODELS_MLA_GROUP_FP8,
+            ["+quant_fp8"],
+        )
+    )
+    + list(flat_product(BACKENDS_MLA_FP4, PATTERN_TEST_MODELS_MLA_FP4, [""])),
+)
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(), reason="Only test ROCm or CUDA"
+)
+@pytest.mark.skipif(not current_platform.supports_fp8(), reason="Need FP8")
+def test_mla_attention_quant_pattern(
+    num_heads: int,
+    qk_nope_head_dim: int,
+    qk_rope_head_dim: int,
+    v_head_dim: int,
+    kv_lora_rank: int,
+    batch_size: int,
+    dtype: torch.dtype,
+    custom_ops: str,
+    model_name: str,
+    model_class: type[MLAAttentionQuantPatternModel],
+    backend: AttentionBackendEnum,
+    dist_init,
+    monkeypatch,
+    use_fresh_inductor_cache,
+):
+    """Test MLA AttentionQuantPattern fusion pass"""
+    if (
+        model_class is TestMLAAttentionNvfp4QuantPatternModel
+        and not is_nvfp4_supported()
+    ):
+        pytest.skip("NVFP4 is not supported on this GPU (requires SM 100+).")
+
+    monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
+
+    custom_ops_list = custom_ops.split(",") if custom_ops else []
+
+    device = torch.device(f"{DEVICE_TYPE}:0")
+    torch.set_default_dtype(dtype)
+    torch.manual_seed(42)
+
+    model_config = ModelConfig(
+        model=model_name,
+        max_model_len=2048,
+        dtype=dtype,
+    )
+    vllm_config = VllmConfig(
+        model_config=model_config,
+        scheduler_config=SchedulerConfig(
+            max_num_seqs=1024,
+            max_model_len=model_config.max_model_len,
+            is_encoder_decoder=model_config.is_encoder_decoder,
+        ),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            custom_ops=custom_ops_list,
+        ),
+        cache_config=CacheConfig(cache_dtype="auto"),
+        attention_config=AttentionConfig(backend=backend),
+    )
+
+    # MLA inputs: q(B, N, qk_head_dim), kv_c_normed(B, L), k_pe(B, 1, R)
+    qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+    q = torch.randn(batch_size, num_heads, qk_head_dim, dtype=dtype, device=device)
+    kv_c_normed = torch.randn(batch_size, kv_lora_rank, dtype=dtype, device=device)
+    k_pe = torch.randn(batch_size, 1, qk_rope_head_dim, dtype=dtype, device=device)
+
+    # Mark first dimension as dynamic
+    torch._dynamo.mark_dynamic(q, 0)
+    torch._dynamo.mark_dynamic(kv_c_normed, 0)
+    torch._dynamo.mark_dynamic(k_pe, 0)
+
+    # Run model without fusion
+    vllm_config_unfused = copy.deepcopy(vllm_config)
+    with (
+        set_current_vllm_config(vllm_config_unfused),
+        set_forward_context(attn_metadata=None, vllm_config=vllm_config_unfused),
+    ):
+        model_unfused = model_class(
+            num_heads=num_heads,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            kv_lora_rank=kv_lora_rank,
+            kv_cache_dtype=dtype,
+            device=device,
+            vllm_config=vllm_config_unfused,
+        )
+        model_unfused = model_unfused.to(device)
+        # HACK: See #131044
+        result_unfused_0 = model_unfused(q, kv_c_normed, k_pe)  # noqa: F841
+
+        forward_ctx = get_forward_context()
+        forward_ctx.attn_metadata = model_unfused.build_attn_metadata(batch_size)
+
+        compiled_unfused = torch.compile(model_unfused, fullgraph=True)
+        result_unfused = compiled_unfused(q, kv_c_normed, k_pe)
+
+    # Run model with attn fusion enabled
+    vllm_config.compilation_config.pass_config = PassConfig(
+        fuse_attn_quant=True, eliminate_noops=True
+    )
+    with (
+        set_current_vllm_config(vllm_config),
+        set_forward_context(attn_metadata=None, vllm_config=vllm_config),
+    ):
+        model_fused = model_class(
+            num_heads=num_heads,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            kv_lora_rank=kv_lora_rank,
+            kv_cache_dtype=dtype,
+            device=device,
+            vllm_config=vllm_config,
+            w=model_unfused.w,
+            kv_b_proj_weight=model_unfused.kv_b_proj_weight,
+        )
+        model_fused = model_fused.to(device)
+
+        forward_ctx = get_forward_context()
+        forward_ctx.attn_metadata = model_fused.build_attn_metadata(batch_size)
+
+        # Create test backend with fusion passes
+        noop_pass = NoOpEliminationPass(vllm_config)
+        attn_pass = LazyInitPass(MLAAttnQuantFusionPass, vllm_config)
+        cleanup_pass = PostCleanupPass(vllm_config)
+
+        test_backend = TestBackend(noop_pass, attn_pass, cleanup_pass)
+        # HACK: See https://github.com/vllm-project/vllm/issues/31044
+        result_fused_0 = model_fused(q, kv_c_normed, k_pe)  # noqa: F841
+
+        compiled_fused = torch.compile(
+            model_fused, backend=test_backend, fullgraph=True
+        )
+
+        result_fused = compiled_fused(q, kv_c_normed, k_pe)
+
+    # Check attn fusion support
+    quant_key: QuantKey = model_class.quant_key
+    attn_fusion_supported = [
+        layer.impl.fused_output_quant_supported(quant_key)
+        for key, layer in vllm_config.compilation_config.static_forward_context.items()
+        if isinstance(layer, MLAAttention)
+    ]
+    assert sum(attn_fusion_supported) == len(attn_fusion_supported), (
+        "All MLA layers should support attention fusion"
+    )
+
+    # Check quantization ops in the graph
+    is_per_group = quant_key.scale.group_shape.is_per_group()
+    quant_op = (
+        torch.ops.aten.reciprocal
+        if "-quant_fp8" in custom_ops_list
+        else QUANT_OPS[quant_key]
+    )
+    test_backend.check_before_ops([quant_op], fully_replaced=is_per_group)
+
+    assert attn_pass.pass_.matched_count == sum(attn_fusion_supported)
+
+    # Check MLA attention ops in the graph
+    attn_nodes_pre = list(find_op_nodes(MLA_ATTN_OP, test_backend.graph_pre_pass))
+    attn_nodes_post = list(find_op_nodes(MLA_ATTN_OP, test_backend.graph_post_pass))
+
+    assert len(attn_nodes_pre) > 0, "Should have MLA attention nodes before fusion"
+    assert len(attn_nodes_pre) == len(attn_nodes_post), (
+        "Should have same number of MLA attention nodes before and after fusion"
+    )
+
+    # Before fusion: neither scale should be set
+    assert attn_nodes_pre[0].kwargs.get("output_scale") is None
+    assert attn_nodes_pre[0].kwargs.get("output_block_scale") is None
+
+    # After fusion: derive expected scale presence from quant_key properties.
+    # - output_scale: present for static quant or non-FP8 (NVFP4 carries input_scale)
+    # - output_block_scale: present when quant uses per-group/block scaling
+    has_output_scale = attn_nodes_post[0].kwargs.get("output_scale") is not None
+    has_block_scale = attn_nodes_post[0].kwargs.get("output_block_scale") is not None
+
+    expects_output_scale = quant_key.scale.static or quant_key.dtype != FP8_DTYPE
+    assert has_output_scale == expects_output_scale, (
+        f"output_scale: expected present={expects_output_scale}, got {has_output_scale}"
+    )
+    assert has_block_scale == is_per_group, (
+        f"output_block_scale: expected present={is_per_group}, got {has_block_scale}"
+    )
+
+    # Check numerical correctness
+    torch.testing.assert_close(result_unfused, result_fused, atol=1e-2, rtol=1e-2)
diff --git a/tests/compile/passes/test_mla_rope_kvcache_cat_fusion.py b/tests/compile/passes/test_mla_rope_kvcache_cat_fusion.py
new file mode 100644
index 000000000000..cc3bfb7693ba
--- /dev/null
+++ b/tests/compile/passes/test_mla_rope_kvcache_cat_fusion.py
@@ -0,0 +1,413 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+import vllm.config
+from tests.compile.backend import TestBackend
+from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
+from vllm._aiter_ops import is_aiter_found_and_supported, rocm_aiter_ops
+from vllm.compilation.passes.fusion.mla_rope_kvcache_cat_fusion import (
+    MLARoPEKVCacheCatFusionPass,
+)
+from vllm.compilation.passes.utility.fix_functionalization import (
+    FixFunctionalizationPass,
+)
+from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
+from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
+from vllm.config import (
+    CacheConfig,
+    CompilationConfig,
+    CompilationMode,
+    ModelConfig,
+    PassConfig,
+    VllmConfig,
+)
+from vllm.forward_context import get_forward_context, set_forward_context
+from vllm.model_executor.layers.attention import MLAAttention
+from vllm.model_executor.layers.linear import ColumnParallelLinear
+from vllm.model_executor.layers.rotary_embedding import (
+    DeepseekScalingRotaryEmbedding,
+    RotaryEmbedding,
+)
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import _encode_layer_name
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    CommonAttentionMetadata,
+)
+from vllm.v1.attention.backends.fa_utils import flash_attn_supports_mla
+from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+INDEX_SELECT_OP = torch.ops.aten.index.Tensor
+VLLM_UNIFIED_MLA_KV_CACHE_UPDATE_OP = torch.ops.vllm.unified_mla_kv_cache_update
+FP8_DTYPE = current_platform.fp8_dtype()
+
+
+class MLARoPEKVCacheCatTestModel(torch.nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        attn_backend: AttentionBackendEnum,
+        use_deepseek_scaling_rope: bool,
+        num_heads: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        q_lora_rank: int,
+        kv_lora_rank: int,
+        is_neox: bool,
+        dtype: torch.dtype,
+        device: torch.device,
+        prefix: str = "model.layers.0.self_attn.attn",
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.qk_nope_head_dim = qk_nope_head_dim
+        self.qk_rope_head_dim = qk_rope_head_dim
+        self.qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+        self.v_head_dim = v_head_dim
+        self.q_lora_rank = q_lora_rank
+        self.kv_lora_rank = kv_lora_rank
+        self.dtype = dtype
+        self.device = device
+        self.layer_name = prefix
+
+        self.num_kv_heads = 1
+        self.head_size = kv_lora_rank + qk_rope_head_dim
+        self.block_size = vllm_config.cache_config.block_size
+        self.scale = self.qk_head_dim**-0.5
+
+        if use_deepseek_scaling_rope:
+            self.rotary_emb = DeepseekScalingRotaryEmbedding(
+                head_size=qk_rope_head_dim,
+                rotary_dim=qk_rope_head_dim,
+                max_position_embeddings=4096,
+                base=10000,
+                is_neox_style=is_neox,
+                scaling_factor=1.0,
+                dtype=dtype,
+            )
+        else:
+            self.rotary_emb = RotaryEmbedding(
+                head_size=qk_rope_head_dim,
+                rotary_dim=qk_rope_head_dim,
+                max_position_embeddings=4096,
+                base=10000,
+                is_neox_style=is_neox,
+                dtype=dtype,
+            )
+
+        # Initialize intermediate mm layers for unit test
+        self.q_b_proj = ColumnParallelLinear(
+            self.q_lora_rank,
+            self.num_heads * self.qk_head_dim,
+            bias=False,
+            prefix=f"{prefix}.q_b_proj",
+        ).to(device)
+        self.kv_b_proj = ColumnParallelLinear(
+            self.kv_lora_rank,
+            self.num_heads * (self.qk_nope_head_dim + self.v_head_dim),
+            bias=False,
+            prefix=f"{prefix}.kv_b_proj",
+        ).to(device)
+
+        # ColumnParallelLinear default init in bf16 with seed 0 produces
+        # near-zero weights (7/4.7M nonzero), making the GEMM output almost
+        # entirely zero and masking correctness bugs. Reinitialize to get
+        # dense outputs.
+        with torch.no_grad():
+            torch.nn.init.normal_(self.q_b_proj.weight, std=0.02)
+            torch.nn.init.normal_(self.kv_b_proj.weight, std=0.02)
+
+        # Register layer metadata for the fusion pass via MLAAttention
+        self.mla_attn = MLAAttention(
+            num_heads=self.num_heads,
+            scale=self.scale,
+            qk_nope_head_dim=self.qk_nope_head_dim,
+            qk_rope_head_dim=self.qk_rope_head_dim,
+            v_head_dim=self.v_head_dim,
+            q_lora_rank=self.q_lora_rank,
+            kv_lora_rank=self.kv_lora_rank,
+            kv_b_proj=self.kv_b_proj,
+            cache_config=vllm_config.cache_config,
+            quant_config=vllm_config.quant_config,
+            prefix=prefix,
+            attn_backend=attn_backend.get_class(),
+        )
+        self.attn_backend: type[AttentionBackend] = self.mla_attn.get_attn_backend()
+        self.mla_attn._k_scale = self.mla_attn._k_scale.to(device)
+        self.mla_attn._v_scale = self.mla_attn._v_scale.to(device)
+
+        # Keep both the string dtype (for ops) and torch dtype (for tensors)
+        self.kv_cache_dtype_str = vllm_config.cache_config.cache_dtype
+        self.kv_cache_dtype = (
+            FP8_DTYPE if self.kv_cache_dtype_str.startswith("fp8") else self.dtype
+        )
+
+        # Initialize attn MetadataBuilder
+        self.builder = self.attn_backend.get_builder_cls()(
+            kv_cache_spec=self.mla_attn.get_kv_cache_spec(vllm_config),
+            layer_names=[self.mla_attn.layer_name],
+            vllm_config=vllm_config,
+            device=device,
+        )
+
+    def build_attn_metadata(self, batch_size: int) -> CommonAttentionMetadata:
+        """Initialize attention metadata."""
+        # Create common attn metadata
+        batch_spec = BatchSpec(seq_lens=[1] * batch_size, query_lens=[1] * batch_size)
+        common_attn_metadata = create_common_attn_metadata(
+            batch_spec, self.block_size, self.device, arange_block_indices=True
+        )
+
+        max_blocks = (max(batch_spec.seq_lens) + self.block_size - 1) // self.block_size
+        num_blocks = batch_size * max_blocks
+
+        # Fetch the attention backend and kv cache shape and stride order
+        kv_cache_shape = self.attn_backend.get_kv_cache_shape(
+            num_blocks, self.block_size, self.num_kv_heads, self.head_size
+        )
+        try:
+            kv_cache_stride_order = self.attn_backend.get_kv_cache_stride_order()
+        except (AttributeError, NotImplementedError):
+            kv_cache_stride_order = tuple(range(len(kv_cache_shape)))
+
+        kv_cache_shape = tuple(kv_cache_shape[i] for i in kv_cache_stride_order)
+        inv_order = [
+            kv_cache_stride_order.index(i) for i in range(len(kv_cache_stride_order))
+        ]
+
+        raw_tensor = torch.zeros(
+            num_blocks * self.block_size * self.num_kv_heads * self.head_size,
+            dtype=self.kv_cache_dtype,
+            device=self.device,
+        )
+        raw_tensor = raw_tensor.view(kv_cache_shape)
+        kv_cache = raw_tensor.permute(*inv_order)
+
+        self.mla_attn.kv_cache = kv_cache
+
+        # Build attn metadata
+        attn_metadata = self.builder.build(
+            common_prefix_len=0, common_attn_metadata=common_attn_metadata
+        )
+
+        return attn_metadata
+
+    def forward(
+        self, qkv_lora: torch.Tensor, positions: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        qkv_lora = qkv_lora.clone()
+        q_c, kv_lora = qkv_lora.split(
+            [self.q_lora_rank, self.kv_lora_rank + self.qk_rope_head_dim],
+            dim=-1,
+        )
+        q = self.q_b_proj(q_c)[0]
+        kv_c, k_pe = kv_lora.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+
+        q = q.view(-1, self.num_heads, self.qk_head_dim)
+        k_pe = k_pe.unsqueeze(1)
+
+        q[..., self.qk_nope_head_dim :], k_pe = self.rotary_emb(
+            positions, q[..., self.qk_nope_head_dim :], k_pe
+        )
+
+        dummy = torch.ops.vllm.unified_mla_kv_cache_update(
+            kv_c,
+            k_pe,
+            _encode_layer_name(self.layer_name),
+            self.kv_cache_dtype_str,
+            self.mla_attn._k_scale,
+        )
+        return q, kv_c, k_pe, dummy
+
+    def ops_in_model_before(self) -> list[torch._ops.OpOverload]:
+        ops = [
+            INDEX_SELECT_OP,
+            torch.ops.vllm.unified_mla_kv_cache_update.default,
+        ]
+        return ops
+
+    def ops_in_model_after(self) -> list[torch._ops.OpOverload]:
+        return [torch.ops.vllm.fused_rope_unified_mla_kv_cache_update.default]
+
+
+MLA_BACKENDS = [AttentionBackendEnum.TRITON_MLA]
+if flash_attn_supports_mla():
+    MLA_BACKENDS += [AttentionBackendEnum.FLASH_ATTN_MLA]
+if is_aiter_found_and_supported():
+    MLA_BACKENDS += [AttentionBackendEnum.ROCM_AITER_MLA]
+
+
+@pytest.mark.parametrize("attn_backend", MLA_BACKENDS)
+@pytest.mark.parametrize("use_deepseek_scaling_rope", [True])
+@pytest.mark.parametrize("num_heads", [16])
+@pytest.mark.parametrize("qk_nope_head_dim", [128])
+@pytest.mark.parametrize("qk_rope_head_dim", [64])
+@pytest.mark.parametrize("v_head_dim", [128])
+@pytest.mark.parametrize("q_lora_rank", [1536])
+@pytest.mark.parametrize("kv_lora_rank", [512])
+@pytest.mark.parametrize("block_size", [16])
+@pytest.mark.parametrize("is_neox", [True, False])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8"])
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="MLA RoPE+KVCache+Cat fusion is only supported on CUDA and ROCm.",
+)
+def test_mla_rope_kvcache_cat_fusion(
+    attn_backend: AttentionBackendEnum,
+    use_deepseek_scaling_rope: bool,
+    num_heads: int,
+    qk_nope_head_dim: int,
+    qk_rope_head_dim: int,
+    v_head_dim: int,
+    q_lora_rank: int,
+    kv_lora_rank: int,
+    block_size: int,
+    is_neox: bool,
+    dtype: torch.dtype,
+    kv_cache_dtype: str,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    torch.set_default_device("cuda")
+    torch.set_default_dtype(dtype)
+    torch.manual_seed(0)
+
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(
+            model="deepseek-ai/DeepSeek-V2-Lite",
+            dtype=dtype,
+        ),
+        cache_config=CacheConfig(
+            block_size=block_size,
+            cache_dtype=kv_cache_dtype,
+        ),
+        compilation_config=CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            pass_config=PassConfig(
+                fuse_rope_kvcache_cat_mla=True,
+                eliminate_noops=True,
+            ),
+        ),
+    )
+
+    with vllm.config.set_current_vllm_config(vllm_config), monkeypatch.context() as m:
+        if not torch.distributed.is_initialized():
+            from vllm.distributed.parallel_state import (
+                init_distributed_environment,
+                initialize_model_parallel,
+            )
+            from vllm.utils.system_utils import update_environment_variables
+
+            update_environment_variables(
+                {
+                    "RANK": "0",
+                    "LOCAL_RANK": "0",
+                    "WORLD_SIZE": "1",
+                    "MASTER_ADDR": "localhost",
+                    "MASTER_PORT": "54321",
+                }
+            )
+            init_distributed_environment()
+            initialize_model_parallel()
+
+        if attn_backend == AttentionBackendEnum.ROCM_AITER_MLA:
+            m.setenv("VLLM_ROCM_USE_AITER", "1")
+            rocm_aiter_ops.refresh_env_variables()
+
+        model = MLARoPEKVCacheCatTestModel(
+            vllm_config=vllm_config,
+            attn_backend=attn_backend,
+            use_deepseek_scaling_rope=use_deepseek_scaling_rope,
+            num_heads=num_heads,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            q_lora_rank=q_lora_rank,
+            kv_lora_rank=kv_lora_rank,
+            is_neox=is_neox,
+            dtype=dtype,
+            device=torch.get_default_device(),
+        )
+
+        fusion_pass = MLARoPEKVCacheCatFusionPass(vllm_config)
+        # note: FixFunctionalizationPass is required to correctly lower
+        # the fused op to its inplace version with auto-functionalization v1.
+        # Without it, decompose_auto_functionalized calls clone_preserve_strides
+        # on the non-contiguous q_pe slice directly, and inductor's lowering
+        # of the resulting as_strided chain incorrectly drops the storage offset.
+        # auto-functionalization v2 avoids this: it clones the contiguous base
+        # tensor (_all_bases) and reconstructs the slice as a view, so the
+        # offset is never passed through as_strided lowering.
+        passes = [
+            NoOpEliminationPass(vllm_config),
+            fusion_pass,
+            PostCleanupPass(vllm_config),
+            FixFunctionalizationPass(vllm_config),
+        ]
+        backend = TestBackend(*passes)
+
+        T = 5
+
+        qkv_lora = torch.randn(
+            T,
+            q_lora_rank + kv_lora_rank + qk_rope_head_dim,
+            dtype=dtype,
+        )
+        pos = torch.arange(T, dtype=torch.long)
+
+        qkv_unfused = qkv_lora.clone()
+        pos_unfused = pos.clone()
+
+        # Run unfused version
+        with set_forward_context(None, vllm_config):
+            forward_context = get_forward_context()
+            attn_metadata = model.build_attn_metadata(T)
+            forward_context.slot_mapping = {
+                model.layer_name: attn_metadata.slot_mapping
+            }
+            q_unfused, kv_c_unfused, k_pe_unfused, dummy = model(
+                qkv_unfused, pos_unfused
+            )
+            attn_layer = forward_context.no_compile_layers[model.layer_name]
+            kv_cache_unfused = attn_layer.kv_cache.clone()
+        del dummy
+
+        # Run fused version (compiled)
+        torch._dynamo.mark_dynamic(qkv_lora, 0)
+        torch._dynamo.mark_dynamic(pos, 0)
+        with set_forward_context(None, vllm_config):
+            model_fused = torch.compile(model, backend=backend)
+            forward_context = get_forward_context()
+            attn_metadata = model.build_attn_metadata(T)
+            forward_context.slot_mapping = {
+                model.layer_name: attn_metadata.slot_mapping
+            }
+            q_fused, kv_c_fused, k_pe_fused, dummy = model_fused(qkv_lora, pos)
+            attn_layer = forward_context.no_compile_layers[model.layer_name]
+            kv_cache_fused = attn_layer.kv_cache
+        del dummy
+
+        assert fusion_pass.matched_count == 1
+
+        backend.check_before_ops(model.ops_in_model_before())
+        backend.check_after_ops(model.ops_in_model_after())
+
+        if dtype == torch.float16:
+            ATOL, RTOL = (2e-3, 2e-3)
+        else:
+            ATOL, RTOL = (1e-2, 1e-2)
+
+        torch.testing.assert_close(q_unfused, q_fused, atol=ATOL, rtol=RTOL)
+        torch.testing.assert_close(kv_c_unfused, kv_c_fused, atol=ATOL, rtol=RTOL)
+        torch.testing.assert_close(k_pe_unfused, k_pe_fused, atol=ATOL, rtol=RTOL)
+        # Cannot compare fp8_* directly here, cast to model dtype instead
+        torch.testing.assert_close(
+            kv_cache_unfused.view(dtype),
+            kv_cache_fused.view(dtype),
+            atol=ATOL,
+            rtol=RTOL,
+        )
diff --git a/tests/compile/passes/test_noop_elimination.py b/tests/compile/passes/test_noop_elimination.py
index 412e8056f9cc..c31acfaf7238 100644
--- a/tests/compile/passes/test_noop_elimination.py
+++ b/tests/compile/passes/test_noop_elimination.py
@@ -8,6 +8,9 @@
 from tests.compile.backend import TestBackend
 from vllm.compilation.passes.utility.noop_elimination import NoOpEliminationPass
 from vllm.config import CompilationConfig, CompilationMode, PassConfig, VllmConfig
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32])
@@ -17,7 +20,7 @@
 )
 @pytest.mark.parametrize("hidden_size", [64, 4096])
 def test_noop_elimination(dtype, num_tokens, hidden_size, buffer_size):
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
     torch.set_default_dtype(dtype)
     torch.manual_seed(1)
 
@@ -88,7 +91,7 @@ def test_non_noop_slice_preserved():
     Regression test for a bug where end=-1 was treated like an inferred
     dimension (reshape semantics) leading to incorrect elimination.
     """
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
     x = torch.randn(16, 16)
 
     class SliceModel(torch.nn.Module):
diff --git a/tests/compile/passes/test_qk_norm_rope_fusion.py b/tests/compile/passes/test_qk_norm_rope_fusion.py
index f9a86732c474..25b8ea56fe25 100644
--- a/tests/compile/passes/test_qk_norm_rope_fusion.py
+++ b/tests/compile/passes/test_qk_norm_rope_fusion.py
@@ -3,11 +3,11 @@
 
 import pytest
 import torch
+from torch._ops import OpOverload, OpOverloadPacket
 
 from tests.compile.backend import TestBackend
 from vllm.compilation.passes.fusion.matcher_utils import (
     FLASHINFER_ROTARY_OP,
-    RMS_OP,
     ROTARY_OP,
 )
 from vllm.compilation.passes.fusion.qk_norm_rope_fusion import (
@@ -100,13 +100,8 @@ def forward(self, qkv: torch.Tensor, positions: torch.Tensor):
         q, k = self.rotary_emb(positions, q, k)
         return q, k, v
 
-    def ops_in_model_before(self) -> list[torch._ops.OpOverload]:
-        ops = []
-        if self.enable_rms_norm_custom_op:
-            ops.append(RMS_OP)
-        else:
-            ops.append(RSQRT_OP)
-
+    def ops_in_model_before(self) -> list[OpOverload | OpOverloadPacket]:
+        ops: list[OpOverload | OpOverloadPacket] = [torch.ops.vllm_ir.rms_norm]
         if self.enable_rope_custom_op:
             if self.rotary_emb.use_flashinfer:
                 ops.append(FLASHINFER_ROTARY_OP)
@@ -116,7 +111,7 @@ def ops_in_model_before(self) -> list[torch._ops.OpOverload]:
             ops.append(INDEX_SELECT_OP)
         return ops
 
-    def ops_in_model_after(self) -> list[torch._ops.OpOverload]:
+    def ops_in_model_after(self) -> list[OpOverload | OpOverloadPacket]:
         return [FUSED_QK_ROPE_OP]
 
 
@@ -166,7 +161,10 @@ def test_qk_norm_rope_fusion(
     num_heads, num_kv_heads, head_dim = 16, 4, 128
     T = 5
 
-    with set_current_vllm_config(vllm_config):
+    with (
+        set_current_vllm_config(vllm_config),
+        vllm_config.kernel_config.ir_op_priority.set_priority(),
+    ):
         model = QKNormRoPETestModel(
             num_heads=num_heads,
             num_kv_heads=num_kv_heads,
diff --git a/tests/compile/passes/test_rope_kvcache_fusion.py b/tests/compile/passes/test_rope_kvcache_fusion.py
index eea21c9179bd..b27adfc46f51 100644
--- a/tests/compile/passes/test_rope_kvcache_fusion.py
+++ b/tests/compile/passes/test_rope_kvcache_fusion.py
@@ -28,12 +28,12 @@
 from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import _encode_layer_name
 from vllm.v1.attention.backend import (
     AttentionBackend,
     CommonAttentionMetadata,
 )
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
-from vllm.v1.kv_cache_interface import AttentionSpec
 
 INDEX_SELECT_OP = torch.ops.aten.index.Tensor
 VLLM_UNIFIED_KV_CACHE_UPDATE_OP = torch.ops.vllm.unified_kv_cache_update
@@ -101,13 +101,8 @@ def __init__(
         )
 
         # Initialize attn MetadataBuilder
-        self.builder = self.attn.attn_backend.get_builder_cls()(
-            kv_cache_spec=AttentionSpec(
-                block_size=self.block_size,
-                num_kv_heads=self.num_kv_heads,
-                head_size=head_size,
-                dtype=self.kv_cache_dtype,
-            ),
+        self.builder = self.attn_backend.get_builder_cls()(
+            kv_cache_spec=self.attn.get_kv_cache_spec(vllm_config),
             layer_names=[self.attn.layer_name],
             vllm_config=vllm_config,
             device=device,
@@ -125,12 +120,11 @@ def build_attn_metadata(self, batch_size: int) -> CommonAttentionMetadata:
         num_blocks = batch_size * max_blocks
 
         # Fetch the attention backend and kv cache shape and stride order
-        attn_backend = self.attn.attn_backend
-        kv_cache_shape = attn_backend.get_kv_cache_shape(
+        kv_cache_shape = self.attn_backend.get_kv_cache_shape(
             num_blocks, self.block_size, self.num_kv_heads, self.head_size
         )
         try:
-            kv_cache_stride_order = attn_backend.get_kv_cache_stride_order()
+            kv_cache_stride_order = self.attn_backend.get_kv_cache_stride_order()
         except (AttributeError, NotImplementedError):
             kv_cache_stride_order = tuple(range(len(kv_cache_shape)))
 
@@ -170,7 +164,7 @@ def forward(
         k = k.view(-1, self.num_kv_heads, self.head_size)
         v = v.view(-1, self.num_kv_heads, self.head_size)
         kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
-            k, v, self.layer_name
+            k, v, _encode_layer_name(self.layer_name)
         )
         return q, k, v, kv_cache_dummy_dep
 
diff --git a/tests/compile/passes/test_scatter_split_replace.py b/tests/compile/passes/test_scatter_split_replace.py
index 659960896403..e85fd9f9efcd 100644
--- a/tests/compile/passes/test_scatter_split_replace.py
+++ b/tests/compile/passes/test_scatter_split_replace.py
@@ -13,6 +13,9 @@
 from vllm.compilation.passes.utility.split_coalescing import SplitCoalescingPass
 from vllm.config import CompilationConfig, CompilationMode, VllmConfig
 from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 class ScatterSplitReplacementModel(nn.Module):
@@ -61,7 +64,7 @@ def ops_in_model_after(self) -> list[torch._ops.OpOverload]:
 
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
 def test_scatter_split_replace(dtype):
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
     torch.set_default_dtype(dtype)
     torch.manual_seed(0)
 
diff --git a/tests/compile/passes/test_silu_mul_quant_fusion.py b/tests/compile/passes/test_silu_mul_quant_fusion.py
index a77b4e6de7bd..bc134ed427a1 100644
--- a/tests/compile/passes/test_silu_mul_quant_fusion.py
+++ b/tests/compile/passes/test_silu_mul_quant_fusion.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import itertools
+from functools import partial
 
 import pytest
 import torch
@@ -9,7 +10,7 @@
 from tests.compile.backend import TestBackend
 from tests.kernels.quantization.nvfp4_utils import quant_nvfp4_tensor
 from tests.utils import TestFP8Layer
-from vllm._aiter_ops import IS_AITER_FOUND
+from vllm._aiter_ops import IS_AITER_FOUND, rocm_aiter_ops
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
 from vllm.compilation.passes.fusion.act_quant_fusion import (
     FUSED_OPS,
@@ -34,13 +35,16 @@
     ROCmFP8ScaledMMLinearKernel,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.quantization.utils.fp8_utils import W8A8BlockFp8LinearOp
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    create_fp8_quant_key,
+    kFp8Dynamic128Sym,
     kFp8StaticTensorSym,
     kNvfp4Dynamic,
 )
 from vllm.platforms import current_platform
+from vllm.utils.deep_gemm import is_deep_gemm_supported
 
 FP8_DTYPE = current_platform.fp8_dtype()
 FP4_DTYPE = torch.uint8
@@ -54,7 +58,11 @@ class TestSiluMulFp8QuantModel(torch.nn.Module):
     quant_key = kFp8StaticTensorSym
 
     def __init__(
-        self, hidden_size: int, force_kernel: FP8ScaledMMLinearKernel, **kwargs
+        self,
+        hidden_size: int,
+        force_kernel: FP8ScaledMMLinearKernel,
+        dtype: torch.dtype,
+        **kwargs,
     ):
         super().__init__()
         self.silu_and_mul = SiluAndMul()
@@ -64,6 +72,7 @@ def __init__(
             activation_quant_key=self.quant_key,
             weight_quant_key=self.quant_key,
             force_kernel=force_kernel,
+            input_dtype=dtype,
         )
 
         self.enable_silu_mul_custom_op = self.silu_and_mul.enabled()
@@ -133,38 +142,90 @@ def ops_in_model_after(self):
 
 
 class TestSiluMulGroupFp8QuantModel(torch.nn.Module):
-    def __init__(self, hidden_size: int, **kwargs):
+    act_quant_key = kFp8Dynamic128Sym
+
+    def __init__(self, hidden_size: int, dtype: torch.dtype, **kwargs):
         super().__init__()
         self.silu_and_mul = SiluAndMul()
-        self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp(
-            weight_group_shape=GroupShape(128, 128),
-            act_quant_group_shape=GroupShape(1, 128),
-            cutlass_block_fp8_supported=False,
-            use_aiter_and_is_supported=True,
+        self.weight_quant_key = create_fp8_quant_key(
+            static=True, group_shape=GroupShape(hidden_size, hidden_size)
         )
-        self.w = torch.rand(hidden_size, hidden_size).to(dtype=FP8_DTYPE).t()
 
-        scale_hidden_size = (hidden_size + 128 - 1) // 128
-        self.wscale = torch.rand(
-            (scale_hidden_size, scale_hidden_size), dtype=torch.float32
+        self.w8a8_block_fp8_linear = TestFP8Layer(
+            weight_shape=(hidden_size, hidden_size),
+            weight_quant_key=self.weight_quant_key,
+            activation_quant_key=self.act_quant_key,
+            input_dtype=dtype,
         )
 
+        if not current_platform.is_fp8_fnuz():
+            kernel = self.w8a8_block_fp8_linear.kernel
+            orig_quant = kernel.quant_fp8
+            kernel.quant_fp8 = lambda *a, use_triton=False, **kw: orig_quant(
+                *a, use_triton=True, **kw
+            )
+
         self.enable_silu_mul_custom_op = self.silu_and_mul.enabled()
 
     def forward(self, x):
         y = self.silu_and_mul(x)
-        x2 = self.w8a8_block_fp8_linear.apply(y, self.w, self.wscale)
+        x2 = self.w8a8_block_fp8_linear(y)
         return x2
 
     def ops_in_model_before(self):
         return [
             SILU_MUL_OP if self.enable_silu_mul_custom_op else torch.ops.aten.mul,
+            rocm_aiter_ops.get_group_quant_op()
+            if current_platform.is_fp8_fnuz()
+            else torch.ops.vllm.triton_per_token_group_quant_fp8.default,
         ]
 
     def ops_in_model_after(self):
         return [torch.ops.vllm.rocm_aiter_act_mul_and_fp8_group_quant]
 
 
+class TestSiluMulBlockQuantModel(torch.nn.Module):
+    quant_key = kFp8Dynamic128Sym
+
+    def __init__(self, hidden_size: int, is_scale_transposed: bool = False, **kwargs):
+        super().__init__()
+        self.silu_and_mul = SiluAndMul()
+        self.is_scale_transposed = is_scale_transposed
+        self.quant_fp8 = QuantFP8(
+            static=False,
+            group_shape=GroupShape(1, 128),
+            column_major_scales=is_scale_transposed,
+            compile_native=False,
+        )
+
+        self.enable_silu_mul_custom_op = self.silu_and_mul.enabled()
+        self.enable_quant_fp8_custom_op = self.quant_fp8.enabled()
+
+    def forward(self, x):
+        y = self.silu_and_mul(x)
+        out, scale = self.quant_fp8(y)
+        group_size = self.quant_key.scale.group_shape[1]
+        scale_expanded = scale.repeat_interleave(group_size, dim=1)
+        dequant = out.to(dtype=torch.float32) * scale_expanded
+        return (dequant,)
+
+    def ops_in_model_before(self):
+        ops = []
+        if self.enable_silu_mul_custom_op:
+            ops.append(SILU_MUL_OP)
+        # When silu custom op is disabled, aten.mul.Tensor also appears
+        # in dequant code, so we skip checking it to avoid false positives.
+        ops.append(
+            QUANT_OPS[self.quant_key]
+            if self.enable_quant_fp8_custom_op
+            else torch.ops.aten.reciprocal.default
+        )
+        return ops
+
+    def ops_in_model_after(self):
+        return [FUSED_OPS[self.quant_key]]
+
+
 ROCM_KERNELS = [ROCmFP8ScaledMMLinearKernel, PerTensorTorchFP8ScaledMMLinearKernel]
 CUDA_KERNELS = [
     FlashInferFP8ScaledMMLinearKernel,
@@ -200,6 +261,19 @@ def ops_in_model_after(self):
                 not current_platform.is_rocm(), reason="ROCm only"
             ),
         ),
+        # Block quant fusion for per-group FP8 (CUDA only).
+        *[
+            pytest.param(
+                partial(TestSiluMulBlockQuantModel, is_scale_transposed=transposed),
+                True,
+                None,
+                marks=pytest.mark.skipif(
+                    not current_platform.is_cuda(), reason="CUDA only"
+                ),
+                id=f"TestSiluMulBlockQuant-transposed={transposed}",
+            )
+            for transposed in [False, True]
+        ],
     ],
 )
 @pytest.mark.skipif(
@@ -213,6 +287,7 @@ def test_fusion_silu_and_mul_quant(
         TestSiluMulFp8QuantModel
         | TestSiluMulNvfp4QuantModel
         | TestSiluMulGroupFp8QuantModel
+        | TestSiluMulBlockQuantModel
     ],
     enable_silu_mul_custom_op: bool,
     enable_quant_fp8_custom_op: bool,
@@ -223,6 +298,12 @@ def test_fusion_silu_and_mul_quant(
         pytest.skip("NVFP4 is not supported on this GPU.")
     if model_class is TestSiluMulGroupFp8QuantModel and not IS_AITER_FOUND:
         pytest.skip("AITER is not supported on this GPU.")
+    if (
+        isinstance(model_class, partial)
+        and model_class.func is TestSiluMulBlockQuantModel
+        and is_deep_gemm_supported()
+    ):
+        pytest.skip("SiluMul+BlockQuant fusion not applicable with DeepGemm")
 
     torch.set_default_device("cuda")
     torch.set_default_dtype(dtype)
@@ -247,7 +328,6 @@ def test_fusion_silu_and_mul_quant(
     with set_current_vllm_config(config), monkeypatch.context() as m:
         fusion_passes = [ActivationQuantFusionPass(config)]
         if IS_AITER_FOUND and model_class is TestSiluMulGroupFp8QuantModel:
-            from vllm._aiter_ops import rocm_aiter_ops
             from vllm.compilation.passes.fusion.rocm_aiter_fusion import (
                 RocmAiterSiluMulFp8GroupQuantFusionPass,
             )
@@ -258,7 +338,9 @@ def test_fusion_silu_and_mul_quant(
 
         passes = [NoOpEliminationPass(config), *fusion_passes, PostCleanupPass(config)]
         backend = TestBackend(*passes)
-        model = model_class(hidden_size=hidden_size, force_kernel=force_kernel, x=x)
+        model = model_class(
+            hidden_size=hidden_size, force_kernel=force_kernel, x=x, dtype=dtype
+        )
 
         # First dimension dynamic
         torch._dynamo.mark_dynamic(x, 0)
@@ -269,12 +351,20 @@ def test_fusion_silu_and_mul_quant(
         result2 = model2(x)
 
         # Check that it gives the same answer
-        if model_class == TestSiluMulFp8QuantModel:
+        if isinstance(model, TestSiluMulFp8QuantModel):
             atol, rtol = 1e-3, 1e-3
-        elif model_class == TestSiluMulNvfp4QuantModel:
+        elif isinstance(model, TestSiluMulNvfp4QuantModel):
             atol, rtol = 1e-1, 1e-1
-        elif model_class == TestSiluMulGroupFp8QuantModel:
+        elif isinstance(model, TestSiluMulGroupFp8QuantModel):
             atol, rtol = 5e-2, 5e-2
+        elif isinstance(model, TestSiluMulBlockQuantModel):
+            if current_platform.is_rocm():
+                atol, rtol = 1e-3, 1e-3
+            else:
+                # CUDA fused kernel computes silu*mul in fp32 while the reference
+                # goes through bf16/fp16 storage, so group maxima (and thus scales)
+                # can shift by one FP8-e4m3 code (~1/8 relative step).
+                atol, rtol = 5e-2, 5e-2
 
         torch.testing.assert_close(
             result[0].to(dtype=dtype), result2[0].to(dtype=dtype), atol=atol, rtol=rtol
diff --git a/tests/compile/passes/test_split_coalescing.py b/tests/compile/passes/test_split_coalescing.py
index a217a4af9f29..ab7a0be1a215 100644
--- a/tests/compile/passes/test_split_coalescing.py
+++ b/tests/compile/passes/test_split_coalescing.py
@@ -8,6 +8,9 @@
 from tests.compile.backend import TestBackend
 from vllm.compilation.passes.utility.split_coalescing import SplitCoalescingPass
 from vllm.config import CompilationConfig, CompilationMode, PassConfig, VllmConfig
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 class SplitCoalescingModel(torch.nn.Module):
@@ -28,7 +31,7 @@ def forward(self, qkv: torch.Tensor):
 
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
 def test_split_coalescing(dtype):
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
     torch.set_default_dtype(dtype)
     torch.manual_seed(0)
 
diff --git a/tests/compile/passes/test_vllm_fusion_pattern_matcher_pass.py b/tests/compile/passes/test_vllm_fusion_pattern_matcher_pass.py
new file mode 100644
index 000000000000..381c215a9ae1
--- /dev/null
+++ b/tests/compile/passes/test_vllm_fusion_pattern_matcher_pass.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+import vllm.config
+from tests.compile.backend import TestBackend
+from vllm.compilation.passes.vllm_inductor_pass import (
+    VllmFusionPatternMatcherPass,
+    VllmPatternMatcherPass,
+    VllmPatternReplacement,
+)
+from vllm.config import CompilationConfig, CompilationMode, VllmConfig
+from vllm.platforms import current_platform
+
+
+class ReluToAbsPattern(VllmPatternReplacement):
+    """Replaces relu(x) with abs(x) — a minimal test fixture."""
+
+    @property
+    def pattern(self):
+        def _pattern(x: torch.Tensor) -> torch.Tensor:
+            return torch.ops.aten.relu.default(x)
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(x: torch.Tensor) -> torch.Tensor:
+            return torch.ops.aten.abs.default(x)
+
+        return _replacement
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        return [self.empty_fp32(4)]
+
+
+class ExpToSqrtPattern(VllmPatternReplacement):
+    """A second distinct pattern type — used to test uuid differentiation."""
+
+    @property
+    def pattern(self):
+        def _pattern(x: torch.Tensor) -> torch.Tensor:
+            return torch.ops.aten.exp.default(x)
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(x: torch.Tensor) -> torch.Tensor:
+            return torch.ops.aten.sqrt.default(x)
+
+        return _replacement
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        return [self.empty_fp32(4)]
+
+
+class ReluFusionPass(VllmFusionPatternMatcherPass):
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "test_relu_fusion")
+        self.register(ReluToAbsPattern())
+
+
+class TwoPatternFusionPass(VllmFusionPatternMatcherPass):
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "test_two_pattern_fusion")
+        self.register(ReluToAbsPattern())
+        self.register(ExpToSqrtPattern())
+
+
+@pytest.fixture
+def vllm_config():
+    return VllmConfig(
+        compilation_config=CompilationConfig(mode=CompilationMode.VLLM_COMPILE),
+    )
+
+
+@pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Requires CUDA")
+def test_register_tracks_patterns(vllm_config):
+    """register() appends each VllmPatternReplacement to _pattern_replacements."""
+    with vllm.config.set_current_vllm_config(vllm_config):
+        single = ReluFusionPass(vllm_config)
+        two = TwoPatternFusionPass(vllm_config)
+
+    assert len(single._pattern_replacements) == 1
+    assert len(two._pattern_replacements) == 2
+
+
+@pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Requires CUDA")
+def test_uuid_stable(vllm_config):
+    """Two instances of the same pass class produce identical uuids."""
+    with vllm.config.set_current_vllm_config(vllm_config):
+        p1 = ReluFusionPass(vllm_config)
+        p2 = ReluFusionPass(vllm_config)
+        p3 = TwoPatternFusionPass(vllm_config)
+
+    assert p1.uuid() == p2.uuid()
+    assert p1.uuid() != p3.uuid()
+    assert p2.uuid() != p3.uuid()
+
+
+@pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Requires CUDA")
+@pytest.mark.parametrize("N", [1, 2, 4])
+def test_matched_count_and_match_table(vllm_config, N):
+    """matched_count and match_table reflect the number of matched patterns."""
+
+    class Model(torch.nn.Module):
+        def forward(self, *inputs):
+            # N independent relus
+            return sum(torch.relu(x) for x in inputs)
+
+    with vllm.config.set_current_vllm_config(vllm_config):
+        torch.set_default_device("cuda")
+        torch.set_default_dtype(torch.float32)
+
+        fusion_pass = ReluFusionPass(vllm_config)
+        backend = TestBackend(fusion_pass)
+        model = torch.compile(Model(), backend=backend)
+
+        inputs = [torch.rand(8) for _ in range(N)]
+        model(*inputs)
+
+    assert fusion_pass.matched_count == N
+    assert VllmPatternMatcherPass.match_table["test_relu_fusion"] >= N
diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py
index c3a065c56142..13e988307047 100644
--- a/tests/compile/test_aot_compile.py
+++ b/tests/compile/test_aot_compile.py
@@ -1,9 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import functools
 import hashlib
-import multiprocessing
 import os
 import pickle
 import tempfile
@@ -15,7 +13,6 @@
 import torch
 
 import vllm.envs as envs
-import vllm.model_executor.layers.activation
 from vllm.compilation.backends import VllmBackend
 from vllm.compilation.caching import (
     StandaloneCompiledArtifacts,
@@ -476,64 +473,57 @@ def test_standalone_compile_correctness():
 @create_new_process_for_each_test("spawn")
 def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):
     """
-    Test that compiling gpt2 twice results in a cache hit and
-    capture torch dynamic symbol creations to ensure make_symbol
-    not called on cache hit.
-    """
+    Test that compiling gpt2 twice results in a cache hit.
 
-    import torch.fx.experimental.symbolic_shapes as symbolic_shapes_module
-    from torch.utils._sympy.symbol import make_symbol
+    Counter values are read from the EngineCore subprocess via
+    ``LLM.collective_rpc`` so the test works under default V1
+    multiprocessing (no shared memory between test and engine).
+    """
 
     from vllm import LLM
 
-    create_symbol_counter = multiprocessing.Value("i", 0)
-    original_make_symbol = make_symbol
+    def _snap(self):
+        from vllm.compilation.counter import compilation_counter
 
-    @functools.wraps(original_make_symbol)
-    def counting_make_symbol(prefix, idx, **kwargs):
-        with create_symbol_counter.get_lock():
-            create_symbol_counter.value += 1
-        return original_make_symbol(prefix, idx, **kwargs)
-
-    symbolic_shapes_module.make_symbol = counting_make_symbol
-    try:
-        with monkeypatch.context() as m, tempfile.TemporaryDirectory() as tmpdirname:
-            m.setenv("VLLM_CACHE_ROOT", tmpdirname)
-            m.setenv("VLLM_USE_AOT_COMPILE", "1")
-            # First compilation - initialize model and generate
-            llm_model = LLM(
-                model="gpt2",
-                compilation_config=CompilationConfig(
-                    mode=CompilationMode.VLLM_COMPILE,
-                ),
-                max_model_len=256,
-            )
+        return (
+            compilation_counter.num_aot_compiles,
+            compilation_counter.num_aot_artifacts_saved,
+            compilation_counter.num_aot_artifacts_loaded,
+        )
 
-            llm_model.generate("Hello, my name is")
-            assert create_symbol_counter.value == 2
-            create_symbol_counter.value = 0
+    # collective_rpc(callable) requires pickle-based serialization.
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
 
-            # Clean up first model
-            del llm_model
-            disable_envs_cache()
-            vllm.model_executor.layers.activation._ACTIVATION_REGISTRY._dict.clear()
+    with monkeypatch.context() as m, tempfile.TemporaryDirectory() as tmpdirname:
+        m.setenv("VLLM_CACHE_ROOT", tmpdirname)
+        m.setenv("VLLM_USE_AOT_COMPILE", "1")
+        # First compilation - initialize model and generate
+        llm_model = LLM(
+            model="gpt2",
+            compilation_config=CompilationConfig(
+                mode=CompilationMode.VLLM_COMPILE,
+            ),
+            max_model_len=256,
+        )
 
-            # Second compilation - should hit cache
-            m.setenv("VLLM_FORCE_AOT_LOAD", "1")
-            llm_model = LLM(
-                model="gpt2",
-                compilation_config=CompilationConfig(
-                    mode=CompilationMode.VLLM_COMPILE,
-                ),
-                max_model_len=256,
-            )
-            llm_model.generate("Hello, my name is")
+        llm_model.generate("Hello, my name is")
+        assert llm_model.collective_rpc(_snap)[0] == (1, 1, 0)
 
-            assert create_symbol_counter.value == 0
+        # Clean up first model
+        del llm_model
+        disable_envs_cache()
 
-    finally:
-        # Restore original method
-        symbolic_shapes_module.make_symbol = original_make_symbol
+        # Second compilation - should hit cache
+        m.setenv("VLLM_FORCE_AOT_LOAD", "1")
+        llm_model = LLM(
+            model="gpt2",
+            compilation_config=CompilationConfig(
+                mode=CompilationMode.VLLM_COMPILE,
+            ),
+            max_model_len=256,
+        )
+        llm_model.generate("Hello, my name is")
+        assert llm_model.collective_rpc(_snap)[0] == (0, 0, 1)
 
 
 @pytest.mark.skipif(not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10")
diff --git a/tests/compile/test_codegen.py b/tests/compile/test_codegen.py
new file mode 100644
index 000000000000..21db287a2478
--- /dev/null
+++ b/tests/compile/test_codegen.py
@@ -0,0 +1,376 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for vllm.compilation.codegen — execution code generation.
+
+Each test runs a real Python function through the same pipeline vLLM uses
+in production: ``make_fx`` to obtain an aten-level fx graph, ``split_graph``
+to split it into the stitching layer + submodules, and then
+``generate_execution_code``/``compile_execution_fn`` for codegen.
+"""
+
+from collections.abc import Callable
+
+import pytest
+import regex as re
+import torch
+import torch.fx as fx
+from torch.fx.experimental.proxy_tensor import make_fx
+
+from vllm.compilation.backends import split_graph
+from vllm.compilation.codegen import (
+    _node_ref,
+    compile_execution_fn,
+    generate_execution_code,
+    generate_execution_code_with_name,
+)
+from vllm.utils.torch_utils import is_torch_equal_or_newer
+
+
+def _trace_and_split(
+    model_fn: Callable[..., torch.Tensor],
+    example_inputs: tuple[torch.Tensor, ...],
+    split_ops: list[str],
+) -> fx.GraphModule:
+    """Trace ``model_fn`` with make_fx, then split on the named aten ops."""
+    gm = make_fx(model_fn)(*example_inputs)
+    split_gm, _ = split_graph(gm, split_ops)
+    return split_gm
+
+
+def _to_copy_model(x: torch.Tensor) -> torch.Tensor:
+    """Traces to ``aten._to_copy.default`` with device + dtype kwargs."""
+    return x.to(device=torch.device("cpu"), dtype=torch.float16)
+
+
+def _empty_model(x: torch.Tensor) -> torch.Tensor:
+    """Traces to ``aten.empty.memory_format`` with device + dtype kwargs."""
+    buf = torch.empty(x.shape, device=torch.device("cpu"), dtype=torch.float16)
+    return buf.fill_(0).add(x.to(dtype=torch.float16))
+
+
+@pytest.fixture
+def x() -> torch.Tensor:
+    return torch.zeros(2, 3)
+
+
+@pytest.mark.parametrize(
+    "model_fn,split_ops",
+    [
+        (_to_copy_model, ["aten::_to_copy.default"]),
+        (_empty_model, []),
+    ],
+    ids=["aten::_to_copy.default", "aten::empty.memory_format"],
+)
+def test_non_primitive_kwargs_lifted_to_consts(
+    model_fn: Callable[[torch.Tensor], torch.Tensor],
+    split_ops: list[str],
+    x: torch.Tensor,
+) -> None:
+    """Regression: arguments whose ``repr()`` is not a valid Python
+    expression in the generated function's namespace (notably
+    ``torch.device``) used to be inlined via ``repr()``, producing source
+    like
+
+        out = torch.ops.aten._to_copy.default(x, device=device(type='cpu'))
+
+    which fails at call time — only ``torch`` and ``operator`` are imported
+    into the namespace, so ``device`` is unbound. The fix collects such
+    objects into ``__vllm_consts__`` and references them by index. The
+    unqualified ``device(type=...)`` form must never appear in the
+    generated source."""
+    split_gm = _trace_and_split(model_fn, (x,), split_ops)
+    code, submod_names, consts = generate_execution_code(split_gm)
+
+    assert "device(type=" not in code, (
+        "Generated code contains unqualified `device(type=...)` from repr(); "
+        "torch.device should be lifted into __vllm_consts__"
+    )
+    assert torch.device("cpu") in consts, "torch.device kwarg not lifted to consts"
+    assert torch.float16 in consts, "torch.dtype kwarg not lifted to consts"
+
+    fn = compile_execution_fn(code, {}, submod_names, consts)
+    out = fn(x)
+    expected = model_fn(x)
+    assert torch.equal(out, expected), "Compiled output does not match reference"
+
+
+def test_dtype_singleton_deduped(x: torch.Tensor) -> None:
+    """``torch.float16`` is a process-wide singleton, so two ops referring
+    to it in the traced graph share a single consts slot via ``id()``-based
+    dedup. Distinct expressions (``x.to(...)`` vs ``(x*2).to(...)``) ensure
+    the tracer can't CSE the two ops into a single node."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return x.to(dtype=torch.float16) + (x * 2).to(dtype=torch.float16)
+
+    split_gm = _trace_and_split(model_fn, (x,), [])
+    code, submod_names, consts = generate_execution_code(split_gm)
+
+    # The traced graph must have two distinct _to_copy nodes (otherwise the
+    # dedup assertion below is trivially satisfied).
+    n_to_copy = sum(
+        1
+        for n in split_gm.graph.nodes
+        if n.op == "call_module"
+        for sn in getattr(split_gm, n.target).graph.nodes
+        if sn.op == "call_function" and "to_copy" in sn.name
+    )
+    assert n_to_copy >= 2, (
+        f"Test setup failed: expected ≥2 _to_copy nodes, got {n_to_copy}"
+    )
+
+    assert consts.count(torch.float16) == 1, (
+        f"torch.float16 should occupy exactly one slot, got consts={consts}"
+    )
+    assert code.count("__vllm_consts__[0]") >= 2, (
+        "Deduped const slot should be referenced from both _to_copy nodes"
+    )
+
+    fn = compile_execution_fn(code, {}, submod_names, consts)
+    assert torch.equal(fn(x), model_fn(x))
+
+
+def test_distinct_dtypes_get_distinct_slots(x: torch.Tensor) -> None:
+    """Distinct dtype singletons in the traced graph occupy distinct slots."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return x.to(dtype=torch.float16) + x.to(dtype=torch.bfloat16)
+
+    split_gm = _trace_and_split(model_fn, (x,), [])
+    _, _, consts = generate_execution_code(split_gm)
+
+    assert torch.float16 in consts
+    assert torch.bfloat16 in consts
+    assert len(consts) == 2, f"Expected 2 distinct dtype slots, got {consts}"
+
+
+def test_consts_ordering_deterministic(x: torch.Tensor) -> None:
+    """Two independent traces of the same model must produce equal consts
+    lists *in the same order*. Cache artifacts identify const slots by
+    index, so a non-deterministic order would invalidate cached code."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        # Multiple distinct non-primitives encountered in a fixed graph order.
+        a = x.to(device=torch.device("cpu"), dtype=torch.float16)
+        return a.to(dtype=torch.bfloat16)
+
+    _, _, consts1 = generate_execution_code(_trace_and_split(model_fn, (x,), []))
+    _, _, consts2 = generate_execution_code(_trace_and_split(model_fn, (x,), []))
+
+    assert len(consts1) >= 2, "Test setup: model should produce ≥2 const slots"
+    assert consts1 == consts2, (
+        f"consts ordering must be reproducible across traces; "
+        f"got {consts1} vs {consts2}"
+    )
+
+
+def test_primitive_args_inlined(x: torch.Tensor) -> None:
+    """Primitive args (int dim, etc.) stay inline as repr — no consts."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return torch.transpose(x, 0, 1).relu()
+
+    split_gm = _trace_and_split(model_fn, (x,), [])
+    code, submod_names, consts = generate_execution_code(split_gm)
+
+    assert consts == [], "Primitive-only graph must produce empty consts"
+
+    fn = compile_execution_fn(code, {}, submod_names, consts)
+    assert torch.equal(fn(x), model_fn(x))
+
+
+def test_consts_shared_across_split_submods(x: torch.Tensor) -> None:
+    """Dedup must apply across inlined submodules, not just within one.
+
+    The function below splits into three inlined submods, two of which
+    independently reference ``torch.float16``. The shared ``const_index``
+    threaded through recursive ``generate_execution_code_with_name`` calls
+    must collapse the dtype to a single slot used from both submods."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        a = x.to(dtype=torch.float16)  # submod_0: _to_copy(fp16)
+        b = a.relu()  # submod_1: relu (split point)
+        c = b.to(dtype=torch.float32)  # submod_2: _to_copy(fp32)
+        return c.to(dtype=torch.float16) + 1  # submod_2: another _to_copy(fp16)
+
+    split_gm = _trace_and_split(model_fn, (x,), ["aten::relu.default"])
+
+    n_submods = sum(1 for _ in split_gm.named_children())
+    assert n_submods >= 3, (
+        f"Test setup failed: expected ≥3 submods after split, got {n_submods}"
+    )
+
+    code, submod_names, consts = generate_execution_code(split_gm)
+
+    assert consts.count(torch.float16) == 1, (
+        f"fp16 singleton must dedup across submods, got consts={consts}"
+    )
+
+    # Find the consts index for fp16 and confirm at least two distinct
+    # inlined submods reference it. This rules out the false-positive where
+    # one submod references it twice and the other not at all.
+    fp16_idx = consts.index(torch.float16)
+    submod_bodies = re.findall(
+        r"def __vllm_inlined_submods__(\d+)\([^)]*\):\n((?:    .*\n)+)", code
+    )
+    assert len(submod_bodies) >= 2
+    referencing_submods = [
+        name for name, body in submod_bodies if f"__vllm_consts__[{fp16_idx}]" in body
+    ]
+    assert len(referencing_submods) >= 2, (
+        f"fp16 slot should be referenced from ≥2 inlined submods, "
+        f"got {referencing_submods}"
+    )
+
+    fn = compile_execution_fn(code, {}, submod_names, consts)
+    assert torch.equal(fn(x), model_fn(x))
+
+
+def test_non_graphmodule_submod_uses_indexed_callable(x: torch.Tensor) -> None:
+    """When a child of split_gm is *not* a ``torch.fx.GraphModule`` — as
+    happens in production once ``PiecewiseBackend`` replaces submods —
+    codegen emits ``__vllm_submods__[idx](...)`` instead of inlining, and
+    the runtime callable is bound from ``submod_callables``."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return x.relu().sigmoid()
+
+    split_gm = _trace_and_split(model_fn, (x,), ["aten::relu.default"])
+
+    # Find a GraphModule child and wrap it in a non-GraphModule nn.Module
+    # that delegates to the original — this is the structural shape vLLM
+    # produces after PiecewiseBackend takes over a submod.
+    child_names = [name for name, _ in split_gm.named_children()]
+    target_name = child_names[0]
+
+    class NonGMWrapper(torch.nn.Module):
+        def __init__(self, gm: fx.GraphModule) -> None:
+            super().__init__()
+            self.gm = gm
+
+        def forward(self, *args, **kwargs):
+            return self.gm(*args, **kwargs)
+
+    original = getattr(split_gm, target_name)
+    del split_gm._modules[target_name]
+    split_gm.add_module(target_name, NonGMWrapper(original))
+
+    code, submod_names, consts = generate_execution_code(split_gm)
+
+    assert "__vllm_submods__[" in code, (
+        "Non-GraphModule submod should produce an indexed callable reference"
+    )
+    assert target_name in submod_names
+
+    submod_callables = {
+        name: getattr(split_gm, name)
+        for name in submod_names
+        if not isinstance(getattr(split_gm, name), fx.GraphModule)
+    }
+    fn = compile_execution_fn(code, submod_callables, submod_names, consts)
+    assert torch.equal(fn(x), model_fn(x))
+
+
+# split_graph only passes tuple_return=True to split_module on PyTorch >= 2.12,
+# so getitem nodes only appear in the stitching graph from that version onward.
+@pytest.mark.skipif(
+    not is_torch_equal_or_newer("2.12.0.dev"),
+    reason="split_module tuple_return requires PyTorch >= 2.12",
+)
+def test_getitem_in_stitching_graph(x: torch.Tensor) -> None:
+    """``operator.getitem`` on submod tuple returns is the ``call_function``
+    special case at codegen.py — emitted as ``name = source[index]``
+    rather than a function call."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return x.relu().sigmoid()
+
+    split_gm = _trace_and_split(model_fn, (x,), ["aten::relu.default"])
+    code, _, _ = generate_execution_code(split_gm)
+
+    # split_module wraps each submod return in a tuple, so the stitching
+    # graph unpacks via getitem. The codegen must emit it as indexing.
+    assert re.search(r"\b\w+ = \w+\[\d+\]\n", code), (
+        "Stitching graph should emit `name = source[N]` for getitem nodes"
+    )
+
+
+def test_del_emitted_for_intermediate_values(x: torch.Tensor) -> None:
+    """The codegen schedules ``del`` after a value's last use to free
+    memory early. Multi-submod splits naturally have intermediates whose
+    last use is not the output node."""
+
+    def model_fn(x: torch.Tensor) -> torch.Tensor:
+        return x.relu().sigmoid().tanh()
+
+    split_gm = _trace_and_split(
+        model_fn, (x,), ["aten::relu.default", "aten::sigmoid.default"]
+    )
+    code, _, _ = generate_execution_code(split_gm)
+
+    assert re.search(r"^    del \w+", code, re.MULTILINE), (
+        "Liveness analysis should emit `del` for intermediates with "
+        "last-use before the output"
+    )
+
+
+def test_with_submod_false_rejects_call_module() -> None:
+    """``generate_execution_code_with_name(with_submod=False)`` is the
+    recursive entry for inlining a GraphModule into its parent. It must
+    refuse a graph that itself contains ``call_module`` nodes — the parent
+    is responsible for handling those."""
+    g = fx.Graph()
+    x_node = g.placeholder("x")
+    root = torch.nn.Module()
+    root.add_module("inner", torch.nn.Identity())
+    call = g.call_module("inner", args=(x_node,))
+    g.output(call)
+    gm = fx.GraphModule(root, g)
+
+    with pytest.raises(RuntimeError, match="call_module is not allowed"):
+        generate_execution_code_with_name(gm, "f", with_submod=False)
+
+
+def test_node_ref_recurses_through_containers() -> None:
+    """``_node_ref`` is the recursive walker that lifts non-primitives
+    nested inside list/tuple/dict args. Real aten ops rarely produce such
+    structures, but the path is needed for DTensor placement lists and
+    other future cases — unit-test the walker directly."""
+    consts: list = []
+    const_index: dict[int, int] = {}
+    cpu = torch.device("cpu")
+
+    # Non-primitive in a list, primitive alongside.
+    assert _node_ref([cpu, 1], consts, const_index) == "[__vllm_consts__[0], 1]"
+    assert consts == [cpu]
+
+    # Same object in a tuple — id-based dedup reuses the existing slot.
+    assert _node_ref((cpu, 2), consts, const_index) == "(__vllm_consts__[0], 2)"
+    assert consts == [cpu]
+
+    # Single-element tuple uses the trailing-comma form.
+    assert _node_ref((cpu,), consts, const_index) == "(__vllm_consts__[0],)"
+
+    # Dict value lifts the same way.
+    ref = _node_ref({"k": cpu}, consts, const_index)
+    assert ref == "{'k': __vllm_consts__[0]}"
+
+
+def test_legacy_code_without_consts() -> None:
+    """``compile_execution_fn(consts=None)`` must still load code that has
+    no ``__vllm_consts__`` reference, so older serialized cache artifacts
+    keep working."""
+    # Pre-consts codegen: no __vllm_consts__ reference, only torch/operator.
+    legacy_code = (
+        "import torch\n"
+        "def execution_fn(x, *, __vllm_submods__):\n"
+        "    return __vllm_submods__[0](x) + 1\n"
+    )
+
+    class AddOne(torch.nn.Module):
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            return x + 1
+
+    fn = compile_execution_fn(legacy_code, {"sub": AddOne()}, ["sub"], consts=None)
+    out = fn(torch.zeros(3))
+    assert torch.equal(out, torch.full((3,), 2.0))
diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py
index 53434b0b4c68..d822b68c5036 100644
--- a/tests/compile/test_config.py
+++ b/tests/compile/test_config.py
@@ -31,6 +31,8 @@
 # This import automatically registers `torch.ops.silly.attention`
 from . import silly_attention  # noqa: F401
 
+DEVICE_TYPE = current_platform.device_type
+
 
 def test_version():
     # Test the version comparison logic using the private function
@@ -203,6 +205,22 @@ def test_enforce_eager(vllm_runner, monkeypatch):
         pass
 
 
+@pytest.mark.forked
+def test_torch_compile_disable(vllm_runner, monkeypatch):
+    monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+    monkeypatch.setenv("TORCH_COMPILE_DISABLE", "1")
+    monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
+
+    with (
+        compilation_counter.expect(num_graphs_seen=0, stock_torch_compile_count=0),
+        vllm_runner(
+            "facebook/opt-125m",
+            gpu_memory_utilization=0.4,
+        ) as _,
+    ):
+        pass
+
+
 def test_splitting_ops_dynamic():
     # Default config
     config = VllmConfig()
@@ -216,12 +234,14 @@ def test_splitting_ops_dynamic():
         compilation_config=CompilationConfig(
             mode=CompilationMode.VLLM_COMPILE,
             use_inductor_graph_partition=True,
-            splitting_ops=["vllm::unified_attention"],
+            splitting_ops=["vllm::unified_attention_with_output"],
         )
     )
     # with inductor partition we use splitting_ops directly for
     # partition rules
-    assert config.compilation_config.splitting_ops == ["vllm::unified_attention"]
+    assert config.compilation_config.splitting_ops == [
+        "vllm::unified_attention_with_output"
+    ]
 
     # When attn_fusion pass enabled.
     config = VllmConfig(
@@ -281,7 +301,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
             mode=CompilationMode.VLLM_COMPILE,
             use_inductor_graph_partition=True,
             splitting_ops=[
-                "vllm::unified_attention",
+                "vllm::unified_attention_with_output",
                 "vllm::moe_forward",
                 "vllm::moe_forward_shared",
             ],
@@ -289,7 +309,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
     )
     splitting_ops = config.compilation_config.splitting_ops
     assert splitting_ops == [
-        "vllm::unified_attention",
+        "vllm::unified_attention_with_output",
         "vllm::moe_forward",
         "vllm::moe_forward_shared",
     ]
@@ -385,9 +405,12 @@ def test_should_split():
         (None, 0, 1, False, 2048, CUDAGraphMode.NONE, 0),
         # truncated to nearest multiple of 8 or 16
         (None, 257, 1, False, 2048, CUDAGraphMode.FULL_AND_PIECEWISE, 256),
+        # max_num_batched_tokens <= max_cudagraph_capture_size should always be
+        # captured even if not landing on a 16-stride step
+        (None, 2048, 1, False, 257, CUDAGraphMode.FULL_AND_PIECEWISE, 257),
         # max from list
         ([1, 2, 4, 15], None, 1, False, 2048, CUDAGraphMode.FULL_AND_PIECEWISE, 15),
-        # filtered out 15 due to SP
+        # SP forces full-graph compilation, sizes are filtered by TP
         ([1, 2, 4, 15], None, 2, True, 2048, CUDAGraphMode.FULL_AND_PIECEWISE, 4),
         # limited by the max_tokens
         ([1, 2, 4, 15], None, 1, False, 8, CUDAGraphMode.FULL_AND_PIECEWISE, 4),
@@ -412,7 +435,7 @@ def test_cudagraph_sizes_post_init(
 
     with (
         ctx,
-        patch("vllm.config.parallel.cuda_device_count_stateless", return_value=tp_size),
+        patch.object(current_platform, "device_count", return_value=tp_size),
     ):
         kwargs = {}
         if cudagraph_capture_sizes is not None:
@@ -445,6 +468,123 @@ def test_cudagraph_sizes_post_init(
         )
 
 
+@pytest.mark.skipif(
+    not current_platform.support_static_graph_mode(),
+    reason="Skip if not cudagraph mode supported",
+)
+@pytest.mark.parametrize(
+    (
+        "cudagraph_mode",
+        "use_inductor_graph_partition",
+        "expected_enable_sp",
+        "expected_cudagraph_mode",
+        "expected_piecewise_compile",
+        "expected_capture_sizes",
+        "expected_max_size",
+    ),
+    [
+        (CUDAGraphMode.PIECEWISE, False, True, CUDAGraphMode.FULL, False, [2, 4], 4),
+        (
+            CUDAGraphMode.FULL_DECODE_ONLY,
+            False,
+            True,
+            CUDAGraphMode.FULL_DECODE_ONLY,
+            False,
+            [2, 4],
+            4,
+        ),
+        (
+            CUDAGraphMode.FULL_AND_PIECEWISE,
+            False,
+            True,
+            CUDAGraphMode.FULL,
+            False,
+            [2, 4],
+            4,
+        ),
+        (
+            CUDAGraphMode.FULL_AND_PIECEWISE,
+            True,
+            True,
+            CUDAGraphMode.FULL_AND_PIECEWISE,
+            True,
+            [2, 4],
+            4,
+        ),
+    ],
+)
+def test_sequence_parallelism_requires_full_graph_compilation(
+    cudagraph_mode: CUDAGraphMode,
+    use_inductor_graph_partition: bool,
+    expected_enable_sp: bool,
+    expected_cudagraph_mode: CUDAGraphMode,
+    expected_piecewise_compile: bool,
+    expected_capture_sizes: list[int],
+    expected_max_size: int,
+):
+    with patch.object(current_platform, "device_count", return_value=2):
+        vllm_config = VllmConfig(
+            parallel_config=ParallelConfig(tensor_parallel_size=2),
+            scheduler_config=SchedulerConfig(
+                max_num_seqs=128,
+                max_num_batched_tokens=2048,
+                max_model_len=2048,
+                is_encoder_decoder=False,
+            ),
+        )
+        vllm_config.model_config = MagicMock(
+            dtype=torch.float16,
+            enforce_eager=False,
+            is_moe=False,
+            disable_cascade_attn=False,
+            get_hidden_size=MagicMock(return_value=4096),
+        )
+        vllm_config.compilation_config = CompilationConfig(
+            mode=CompilationMode.VLLM_COMPILE,
+            cudagraph_capture_sizes=[1, 2, 4, 15],
+            max_cudagraph_capture_size=None,
+            compile_sizes=["cudagraph_capture_sizes"],
+            use_inductor_graph_partition=use_inductor_graph_partition,
+            pass_config=PassConfig(
+                enable_sp=True,
+                fuse_gemm_comms=True,
+                fuse_norm_quant=True,
+                fuse_act_quant=True,
+                eliminate_noops=True,
+                sp_min_token_num=512,
+            ),
+            cudagraph_mode=cudagraph_mode,
+        )
+        vllm_config.compilation_config.set_splitting_ops_for_v1(
+            all2all_backend=vllm_config.parallel_config.all2all_backend,
+            data_parallel_size=1,
+        )
+        vllm_config._set_compile_ranges()
+        vllm_config._set_cudagraph_sizes()
+
+    assert (
+        vllm_config.compilation_config.use_inductor_graph_partition
+        == use_inductor_graph_partition
+    )
+    assert (
+        bool(vllm_config.compilation_config.splitting_ops) == expected_piecewise_compile
+    )
+    assert vllm_config.compilation_config.pass_config.enable_sp == expected_enable_sp
+    assert (
+        vllm_config.compilation_config.pass_config.fuse_gemm_comms == expected_enable_sp
+    )
+    assert vllm_config.compilation_config.cudagraph_mode == expected_cudagraph_mode
+    assert (
+        vllm_config.compilation_config.cudagraph_capture_sizes == expected_capture_sizes
+    )
+    assert (
+        vllm_config.compilation_config.max_cudagraph_capture_size == expected_max_size
+    )
+    assert (
+        511 in vllm_config.compilation_config.compile_ranges_endpoints
+    ) == expected_enable_sp
+
+
 def test_cached_compilation_config(default_vllm_config):
     import torch
     from torch._inductor.utils import run_and_get_code
@@ -454,7 +594,7 @@ def test_cached_compilation_config(default_vllm_config):
     from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 
     dtype = torch.bfloat16
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
     batch_size, num_qo_heads, head_size = 8, 16, 128
 
     # access and cache default compilation config
@@ -476,7 +616,7 @@ def test_cached_compilation_config(default_vllm_config):
         query_quant = QuantFP8(static=True, group_shape=GroupShape.PER_TENSOR)
         query_quant = torch.compile(query_quant)
 
-        _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
+        _q_scale = torch.tensor(1.0, dtype=torch.float32, device=DEVICE_TYPE)
         query = torch.randn(
             batch_size, num_qo_heads * head_size, dtype=dtype, device=device
         )
@@ -577,48 +717,6 @@ def test_compile_sizes_padding_validation():
     dispatcher.initialize_cudagraph_keys(CUDAGraphMode.NONE)  # Should not raise
 
 
-@pytest.mark.parametrize(
-    "capture_sizes, max_size, num_blocks, expected_sizes, expected_max",
-    [
-        # Normal capping: sizes filtered to <= num_blocks
-        (
-            [1, 2, 4, 8, 16, 32, 64, 128, 256, 512],
-            512,
-            200,
-            [1, 2, 4, 8, 16, 32, 64, 128],
-            128,
-        ),
-        # No capping needed: num_blocks >= max
-        ([1, 2, 4, 8, 16], 16, 1000, [1, 2, 4, 8, 16], 16),
-        # Exact boundary: num_blocks == max (no capping)
-        ([1, 2, 4, 8, 16, 32], 32, 32, [1, 2, 4, 8, 16, 32], 32),
-        # All sizes capped: num_blocks < smallest size
-        ([8, 16, 32], 32, 4, [], 0),
-        # num_blocks <= 0: early return, no change
-        ([1, 2, 4], 4, 0, [1, 2, 4], 4),
-    ],
-)
-def test_adjust_cudagraph_sizes_for_mamba_cache(
-    capture_sizes, max_size, num_blocks, expected_sizes, expected_max
-):
-    """Test that cudagraph capture sizes are correctly capped to fit
-    available Mamba cache blocks.
-
-    See: https://github.com/vllm-project/vllm/issues/34094
-    """
-    config = CompilationConfig(
-        cudagraph_capture_sizes=capture_sizes,
-        max_cudagraph_capture_size=max_size,
-        cudagraph_mode=CUDAGraphMode.NONE,
-    )
-    config.adjust_cudagraph_sizes_for_mamba_cache(num_blocks)
-    assert config.cudagraph_capture_sizes == expected_sizes
-    assert config.max_cudagraph_capture_size == expected_max
-    # Invariant: last element == max_cudagraph_capture_size
-    if expected_sizes:
-        assert config.cudagraph_capture_sizes[-1] == config.max_cudagraph_capture_size
-
-
 def test_inductor_asserts_default_disabled(monkeypatch):
     """Test that inductor runtime asserts are disabled by default
     (INFO logging level) on torch < 2.12."""
@@ -655,6 +753,24 @@ def test_inductor_asserts_enabled_in_debug(monkeypatch):
         assert config.inductor_compile_config.get("scalar_asserts") is True
 
 
+def test_get_inductor_factors_includes_configs():
+    """Changing inductor or functorch config must change the cache key factors."""
+    from torch._functorch import config as functorch_config
+    from torch._inductor import config as inductor_config
+
+    from vllm.compilation.compiler_interface import get_inductor_factors
+
+    baseline = get_inductor_factors()
+
+    with inductor_config.patch("max_autotune", not inductor_config.max_autotune):
+        patched = get_inductor_factors()
+    assert baseline != patched, "inductor config change was not reflected"
+
+    with functorch_config.patch("donated_buffer", not functorch_config.donated_buffer):
+        patched = get_inductor_factors()
+    assert baseline != patched, "functorch config change was not reflected"
+
+
 def test_inductor_asserts_user_override(monkeypatch):
     """Test that explicit inductor_compile_config overrides the
     debug-logging default."""
diff --git a/tests/compile/test_dynamic_shapes_compilation.py b/tests/compile/test_dynamic_shapes_compilation.py
index bbd62237c5e8..e45e5cf425fe 100644
--- a/tests/compile/test_dynamic_shapes_compilation.py
+++ b/tests/compile/test_dynamic_shapes_compilation.py
@@ -8,6 +8,7 @@
 import pytest
 import torch
 
+from tests.models.utils import check_logprobs_close
 from vllm import LLM, SamplingParams
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config
@@ -17,7 +18,6 @@
     DynamicShapesType,
 )
 from vllm.forward_context import set_forward_context
-from vllm.tokenizers import get_tokenizer
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 
@@ -28,7 +28,7 @@ def get_test_models():
         "Qwen/Qwen2-7B-Instruct",
         "meta-llama/Llama-3.1-8B",
     ]
-    if is_torch_equal_or_newer("2.12.0"):
+    if is_torch_equal_or_newer("2.12.0.dev"):
         models.append("Qwen/Qwen3-4B-Instruct-2507")
     return models
 
@@ -55,12 +55,10 @@ def test_dynamic_shapes_compilation(
     evaluate_guards,
 ):
     """Test that all dynamic shapes types compile successfully"""
-    if use_bytecode_hook and shapes_type == DynamicShapesType.UNBACKED:
-        pytest.skip("UNBACKED dynamic shapes require VLLM_USE_BYTECODE_HOOK=0")
-
     if evaluate_guards and shapes_type == DynamicShapesType.UNBACKED:
         pytest.skip("unbacked dynamic shapes do not add guards")
 
+    # TODO is this still a requirement?
     if evaluate_guards and use_aot_compile:
         pytest.skip("evaluate_guards requires use_aot_compile=0")
 
@@ -84,30 +82,37 @@ def test_dynamic_shapes_compilation(
         max_model_len=1024,
     )
 
-    output = model.generate(prompt)
-    result = output[0].outputs[0].text
-    # Example of setting the sampling parameters
-    tokenizer = get_tokenizer(model_name)
-    yes_tokens = tokenizer.encode("yes", add_special_tokens=False)
-    no_tokens = tokenizer.encode("no", add_special_tokens=False)
-    allowed_ids = list(set(yes_tokens + no_tokens))
-    sampling_params = SamplingParams(
-        max_tokens=1, temperature=0, allowed_token_ids=allowed_ids
-    )
+    sampling_params = SamplingParams(max_tokens=5, temperature=0, logprobs=10)
+    test_prompts = [prompt, "The capital of France is"]
 
-    output = model.generate(
-        "answer with yes or no is " + result + " rubbish for prompt " + prompt + "?",
-        sampling_params=sampling_params,
-    )
-    result = output[0].outputs[0].text
-    assert result == "yes"
+    compiled_outputs = []
+    for p in test_prompts:
+        output = model.generate(p, sampling_params)[0].outputs[0]
+        assert len(output.text.strip()) > 0, "Compiled model produced empty output"
+        compiled_outputs.append((output.token_ids, output.text, output.logprobs))
 
-    # Clean up GPU memory
     del model
     gc.collect()
     torch.accelerator.empty_cache()
     torch.accelerator.synchronize()
-    print("GPU memory cleared")
+
+    eager_model = LLM(model=model_name, enforce_eager=True, max_model_len=1024)
+    eager_outputs = []
+    for p in test_prompts:
+        output = eager_model.generate(p, sampling_params)[0].outputs[0]
+        assert len(output.text.strip()) > 0, "Eager model produced empty output"
+        eager_outputs.append((output.token_ids, output.text, output.logprobs))
+    del eager_model
+    gc.collect()
+    torch.accelerator.empty_cache()
+    torch.accelerator.synchronize()
+
+    check_logprobs_close(
+        outputs_0_lst=eager_outputs,
+        outputs_1_lst=compiled_outputs,
+        name_0="eager",
+        name_1="compiled",
+    )
 
 
 @pytest.mark.parametrize("use_aot_compile", ["0", "1"])
@@ -222,3 +227,47 @@ def test(model_class, input1, input2, is_01_specialization=False):
         torch.randn(1, 10).cuda(),
         is_01_specialization=True,
     )
+
+
+@pytest.mark.skipif(not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10")
+def test_piecewise_backend_empty_sym_shape_indices():
+    """Test that PiecewiseBackend handles empty sym_shape_indices correctly.
+
+    When all inputs have static shapes (no torch.SymInt), sym_shape_indices
+    will be empty. The fix in PiecewiseBackend.__call__ handles this case
+    by using the first compiled range_entry.
+    """
+    gc.collect()
+    torch.accelerator.empty_cache()
+    torch.accelerator.synchronize()
+
+    # Use small max_model_len and max_num_batched_tokens to encourage
+    # static shape compilation with empty sym_shape_indices
+    llm = LLM(
+        model="Qwen/Qwen3-0.6B",
+        max_model_len=512,
+        max_num_batched_tokens=1,
+        compilation_config={
+            "mode": CompilationMode.VLLM_COMPILE,
+            "dynamic_shapes_config": {
+                "type": DynamicShapesType.BACKED.value,
+            },
+        },
+    )
+
+    sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10)
+
+    # Generate with static shape inputs
+    output = llm.generate("Hello, my name is", sampling_params=sampling_params)
+    result = output[0].outputs[0].text
+    assert len(result) > 0, "Should generate non-empty output"
+
+    # Generate again to verify compilation works with empty sym_shape_indices
+    output = llm.generate("The capital of France is", sampling_params=sampling_params)
+    result = output[0].outputs[0].text
+    assert len(result) > 0, "Should generate non-empty output on second run"
+
+    del llm
+    gc.collect()
+    torch.accelerator.empty_cache()
+    torch.accelerator.synchronize()
diff --git a/tests/compile/test_graph_partition.py b/tests/compile/test_graph_partition.py
index 0b490e97f3f2..4cb199b5897d 100644
--- a/tests/compile/test_graph_partition.py
+++ b/tests/compile/test_graph_partition.py
@@ -9,12 +9,19 @@
 import torch.fx as fx
 from torch.fx.experimental.proxy_tensor import make_fx
 
-from vllm.compilation.backends import _is_empty_allocation_node, split_graph
+from vllm.compilation.backends import (
+    _decompose_size_nodes,
+    _is_empty_allocation_node,
+    split_graph,
+)
 from vllm.compilation.passes.fx_utils import find_op_nodes
+from vllm.platforms import current_platform
 
 # This import automatically registers `torch.ops.silly.attention`
 from . import silly_attention  # noqa: F401
 
+DEVICE_TYPE = current_platform.device_type
+
 
 def test_getitem_moved_to_producer_subgraph():
     """
@@ -147,7 +154,7 @@ def model_fn(x: torch.Tensor) -> torch.Tensor:
         final_result = torch.sigmoid(attn_inout)
         return final_result
 
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
 
     # Create the traced FX graph for the model
     x = torch.randn(8, 4)
@@ -325,7 +332,7 @@ def model_fn(x: torch.Tensor) -> torch.Tensor:
         "Expected two builtin empty_like nodes in merged non-splitting subgraph"
     )
 
-    x = torch.randn(2, 3, device="cuda")
+    x = torch.randn(2, 3, device=DEVICE_TYPE)
     output_original = gm(x)
     output_split = split_gm(x)
     assert torch.allclose(output_original, output_split), "Output mismatch after split"
@@ -622,3 +629,73 @@ def model_fn(x: torch.Tensor) -> torch.Tensor:
             else:
                 example_inputs.append(int(ev))
         standalone_compile(submod, example_inputs, dynamic_shapes="from_example_inputs")
+
+
+def test_decompose_size_with_getitem_user():
+    """
+    Regression test: _decompose_size_nodes must handle getitem users of size()
+    correctly.
+
+    When a graph contains x.shape[i], it can appear as:
+
+        %size = call_method[target="size"](args = (%x,))
+        %getitem = call_function[target=operator.getitem](args = (%size, 1))
+
+    The old code spliced *all* per-dim values into every user's args
+    unconditionally, turning the 2-arg getitem into a malformed 3-arg node:
+
+        %getitem(args = (%sym_size_int, 5120, 1))   # TypeError at runtime
+
+    The fix detects getitem users and replaces them with dims[idx] directly.
+    """
+    # Build a graph manually to guarantee the size() + getitem pattern.
+    #
+    # Graph:
+    #   %x = placeholder
+    #   %size = x.size()
+    #   %dim1 = getitem(%size, 1)       <-- the getitem branch we're testing
+    #   %relu = relu(%x)
+    #   %view = view(%relu, -1, %dim1)
+    #   return %view
+    graph = fx.Graph()
+    x = graph.placeholder("x")
+    size_node = graph.call_method("size", args=(x,))
+    getitem_node = graph.call_function(operator.getitem, args=(size_node, 1))
+    relu_node = graph.call_function(torch.ops.aten.relu.default, args=(x,))
+    view_node = graph.call_function(
+        torch.ops.aten.view.default, args=(relu_node, [-1, getitem_node])
+    )
+    graph.output(view_node)
+
+    # Attach example_value metadata so _decompose_size_nodes can inspect dims.
+    # dim 0 is dynamic (SymInt), dim 1 is static (8).
+    from torch._dynamo.source import LocalSource
+    from torch._subclasses.fake_tensor import FakeTensorMode
+    from torch.fx.experimental.symbolic_shapes import ShapeEnv
+
+    shape_env = ShapeEnv()
+    src = LocalSource("batch_size")
+    sym_batch = shape_env.create_symintnode(shape_env.create_symbol(4, src), hint=4)
+    fake_mode = FakeTensorMode(shape_env=shape_env)
+    with fake_mode:
+        fake_x = torch.empty_strided((sym_batch, 8), (8, 1))
+    x.meta["example_value"] = fake_x
+
+    gm = fx.GraphModule(torch.nn.Module(), graph)
+
+    # Run decomposition — this would produce a 3-arg getitem without the fix
+    _decompose_size_nodes(gm)
+
+    # Verify no size() nodes remain
+    remaining_size_nodes = list(gm.graph.find_nodes(op="call_method", target="size"))
+    assert len(remaining_size_nodes) == 0, (
+        f"size() nodes should be fully decomposed, found {len(remaining_size_nodes)}"
+    )
+
+    # Verify no malformed getitem nodes (3+ args)
+    for node in gm.graph.nodes:
+        if node.op == "call_function" and node.target is operator.getitem:
+            assert len(node.args) == 2, (
+                f"getitem node '{node.name}' has {len(node.args)} args "
+                f"(expected 2): {node.args}"
+            )
diff --git a/tests/compile/test_rotary_embedding_compile.py b/tests/compile/test_rotary_embedding_compile.py
index 76f5382534e1..69a4cc05084c 100644
--- a/tests/compile/test_rotary_embedding_compile.py
+++ b/tests/compile/test_rotary_embedding_compile.py
@@ -16,6 +16,8 @@
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.platforms import current_platform
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @support_torch_compile
 class RotaryEmbeddingCompileModule(torch.nn.Module):
@@ -45,7 +47,7 @@ def test_rotary_embedding_torch_compile_with_custom_op(monkeypatch):
     monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1")
     monkeypatch.setenv("VLLM_USE_AOT_COMPILE", "0")
 
-    device = "cuda"
+    device = DEVICE_TYPE
     positions = torch.arange(16, device=device)
     query = torch.randn(16, 32, device=device, dtype=torch.bfloat16)
     key = torch.randn(16, 32, device=device, dtype=torch.bfloat16)
diff --git a/tests/compile/test_structured_logging.py b/tests/compile/test_structured_logging.py
index 7813b7429b1f..10b0ed139cfe 100644
--- a/tests/compile/test_structured_logging.py
+++ b/tests/compile/test_structured_logging.py
@@ -17,8 +17,10 @@
 )
 from vllm.config.scheduler import SchedulerConfig
 from vllm.forward_context import set_forward_context
+from vllm.platforms import current_platform
 
 MLP_SIZE = 64
+DEVICE_TYPE = current_platform.device_type
 
 
 @support_torch_compile
@@ -71,7 +73,7 @@ def get(self, event_type: str, name_pattern: str) -> list[dict]:
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
 def test_vllm_structured_logging_artifacts(use_fresh_inductor_cache):
     """Test that all expected vLLM artifacts are logged during compilation."""
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
 
     capture = TraceStructuredCapture()
 
diff --git a/tests/config/base_model_arch_groundtruth.json b/tests/config/base_model_arch_groundtruth.json
index 81534886dcb6..14e3f4f49d46 100644
--- a/tests/config/base_model_arch_groundtruth.json
+++ b/tests/config/base_model_arch_groundtruth.json
@@ -356,6 +356,23 @@
         "is_multimodal_model": false,
         "dtype": "torch.float32"
     },
+    "stepfun-ai/Step-3.5-Flash": {
+        "architectures": [
+            "Step3p5ForCausalLM"
+        ],
+        "model_type": "step3p5",
+        "text_model_type": "step3p5",
+        "hidden_size": 4096,
+        "total_num_hidden_layers": 45,
+        "total_num_attention_heads": 64,
+        "head_size": 128,
+        "vocab_size": 128896,
+        "total_num_kv_heads": 8,
+        "num_experts": 288,
+        "is_deepseek_mla": false,
+        "is_multimodal_model": false,
+        "dtype": "torch.bfloat16"
+    },
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": {
         "architectures": [
             "NemotronHForCausalLM"
diff --git a/tests/config/test_model_arch_config.py b/tests/config/test_model_arch_config.py
index fbae31331be8..e172983b54f4 100644
--- a/tests/config/test_model_arch_config.py
+++ b/tests/config/test_model_arch_config.py
@@ -16,6 +16,7 @@
     "nvidia/Llama-3_3-Nemotron-Super-49B-v1",
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
     "XiaomiMiMo/MiMo-7B-RL",
+    "stepfun-ai/Step-3.5-Flash",
     # Excluded: Not available online right now
     # "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1",
     "meituan-longcat/LongCat-Flash-Chat",
diff --git a/tests/config/test_multimodal_config.py b/tests/config/test_multimodal_config.py
index e5c30f999a05..9720d84672fd 100644
--- a/tests/config/test_multimodal_config.py
+++ b/tests/config/test_multimodal_config.py
@@ -41,3 +41,21 @@ def test_language_model_only_affects_model_hash():
     base_hash = ModelConfig(model).compute_hash()
     lm_only_hash = ModelConfig(model, language_model_only=True).compute_hash()
     assert base_hash != lm_only_hash
+
+
+def test_mm_encoder_fp8_scale_path_requires_fp8():
+    with pytest.raises(ValueError, match="mm_encoder_attn_dtype"):
+        MultiModalConfig(mm_encoder_fp8_scale_path="/tmp/scales.json")
+
+
+def test_mm_encoder_attn_dtype_hash_updates(tmp_path):
+    scale_file = tmp_path / "scales.json"
+    scale_file.write_text("{}")
+    base_hash = MultiModalConfig().compute_hash()
+    fp8_hash = MultiModalConfig(mm_encoder_attn_dtype="fp8").compute_hash()
+    fp8_static_hash = MultiModalConfig(
+        mm_encoder_attn_dtype="fp8",
+        mm_encoder_fp8_scale_path=str(scale_file),
+    ).compute_hash()
+    assert base_hash != fp8_hash
+    assert fp8_hash != fp8_static_hash
diff --git a/tests/conftest.py b/tests/conftest.py
index f3b22d898903..3eaebc38bc63 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -224,19 +224,28 @@ def init_test_http_connection():
 def dist_init():
     from tests.utils import ensure_current_vllm_config
 
-    temp_file = tempfile.mkstemp()[1]
-
-    with ensure_current_vllm_config():
-        init_distributed_environment(
-            world_size=1,
-            rank=0,
-            distributed_init_method=f"file://{temp_file}",
-            local_rank=0,
-            backend="nccl",
-        )
-        initialize_model_parallel(1, 1)
-        yield
-    cleanup_dist_env_and_memory()
+    # Close the fd returned by mkstemp; FileStore opens the path itself.
+    # Leaving it open leaks one FD per test and eventually exhausts the
+    # ulimit, causing FileStore's destructor to throw c10::DistStoreError
+    # ("Too many open files") during gc and abort the process.
+    fd, temp_file = tempfile.mkstemp()
+    os.close(fd)
+
+    try:
+        with ensure_current_vllm_config():
+            init_distributed_environment(
+                world_size=1,
+                rank=0,
+                distributed_init_method=f"file://{temp_file}",
+                local_rank=0,
+                backend="nccl",
+            )
+            initialize_model_parallel(1, 1)
+            yield
+        cleanup_dist_env_and_memory()
+    finally:
+        with contextlib.suppress(OSError):
+            os.unlink(temp_file)
 
 
 @pytest.fixture
@@ -246,8 +255,9 @@ def default_vllm_config():
     """
     from vllm.config import VllmConfig, set_current_vllm_config
 
-    with set_current_vllm_config(VllmConfig()):
-        yield
+    config = VllmConfig()
+    with set_current_vllm_config(config):
+        yield config
 
 
 @pytest.fixture()
@@ -363,12 +373,15 @@ def __init__(
         model_name: str,
         dtype: str = "auto",
         *,
+        revision: str | None = None,
         model_kwargs: dict[str, Any] | None = None,
         trust_remote_code: bool = True,
         is_sentence_transformer: bool = False,
         is_cross_encoder: bool = False,
         skip_tokenizer_init: bool = False,
         auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
+        tokenizer_name: str | None = None,
+        processor: Any | None = None,
         # Set this to avoid hanging issue
         default_torch_num_threads: int | None = None,
     ) -> None:
@@ -382,12 +395,15 @@ def __init__(
             self._init(
                 model_name=model_name,
                 dtype=dtype,
+                revision=revision,
                 model_kwargs=model_kwargs,
                 trust_remote_code=trust_remote_code,
                 is_sentence_transformer=is_sentence_transformer,
                 is_cross_encoder=is_cross_encoder,
                 skip_tokenizer_init=skip_tokenizer_init,
                 auto_cls=auto_cls,
+                tokenizer_name=tokenizer_name,
+                processor=processor,
             )
 
     def _init(
@@ -395,12 +411,15 @@ def _init(
         model_name: str,
         dtype: str = "auto",
         *,
+        revision: str | None = None,
         model_kwargs: dict[str, Any] | None = None,
         trust_remote_code: bool = True,
         is_sentence_transformer: bool = False,
         is_cross_encoder: bool = False,
         skip_tokenizer_init: bool = False,
         auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
+        tokenizer_name: str | None = None,
+        processor: Any | None = None,
     ) -> None:
         model_name = maybe_model_redirect(model_name)
         self.model_name = model_name
@@ -409,6 +428,15 @@ def _init(
             model_name,
             trust_remote_code=trust_remote_code,
         )
+        # HF runner should use the HF config so that it's consistent with the HF model
+        if self.config.__module__.startswith("vllm.transformers_utils.configs"):
+            from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+
+            del CONFIG_MAPPING._extra_content[self.config.model_type]
+            self.config = AutoConfig.from_pretrained(
+                model_name,
+                trust_remote_code=trust_remote_code,
+            )
         self.device = self.get_default_device()
         self.dtype = dtype = _get_and_verify_dtype(
             self.model_name,
@@ -427,6 +455,7 @@ def _init(
 
             self.model = SentenceTransformer(
                 model_name,
+                revision=revision,
                 device=self.device,
                 model_kwargs=model_kwargs,
                 trust_remote_code=trust_remote_code,
@@ -437,6 +466,7 @@ def _init(
 
             self.model = CrossEncoder(
                 model_name,
+                revision=revision,
                 device=self.device,
                 automodel_args=model_kwargs,
                 trust_remote_code=trust_remote_code,
@@ -446,6 +476,7 @@ def _init(
                 nn.Module,
                 auto_cls.from_pretrained(
                     model_name,
+                    revision=revision,
                     trust_remote_code=trust_remote_code,
                     **model_kwargs,
                 ),
@@ -468,20 +499,27 @@ def _init(
         if not skip_tokenizer_init:
             self.tokenizer: "PreTrainedTokenizer | PreTrainedTokenizerFast" = (
                 AutoTokenizer.from_pretrained(
-                    model_name,
+                    tokenizer_name or model_name,
                     trust_remote_code=trust_remote_code,
                 )
             )
 
-        # don't put this import at the top level
-        # it will call torch.accelerator.device_count()
-        from transformers import AutoProcessor
+        if processor is not None:
+            self.processor = processor
+        else:
+            # don't put this import at the top level
+            # it will call torch.accelerator.device_count()
+            from transformers import AutoProcessor
 
-        self.processor = AutoProcessor.from_pretrained(
-            model_name,
-            trust_remote_code=trust_remote_code,
-        )
+            self.processor = AutoProcessor.from_pretrained(
+                model_name,
+                trust_remote_code=trust_remote_code,
+            )
         if skip_tokenizer_init:
+            if self.processor is None:
+                raise ValueError(
+                    "skip_tokenizer_init=True requires processor initialization."
+                )
             self.tokenizer = self.processor.tokenizer
 
     def get_inputs(
@@ -504,6 +542,12 @@ def get_inputs(
         all_inputs: list[BatchFeature | BatchEncoding | dict[str, torch.Tensor]] = []
         for i, prompt in enumerate(prompts):
             if isinstance(prompt, str):
+                if self.processor is None:
+                    raise RuntimeError(
+                        "HfRunner.processor is not initialized. "
+                        "Pass processor=... to HfRunner or set "
+                        "hf_model.processor before generation."
+                    )
                 # Create a copy to avoid modifying the original dict
                 processor_kwargs = (
                     tokenization_kwargs.copy()
@@ -601,6 +645,10 @@ def generate(
                 use_cache=True,
                 **kwargs,
             )
+            if self.processor is None:
+                raise RuntimeError(
+                    "HfRunner.processor is not initialized; cannot decode output."
+                )
             output_str = self.processor.batch_decode(
                 output_ids,
                 skip_special_tokens=True,
@@ -746,10 +794,15 @@ def generate_greedy_logprobs_limit(
         audios: PromptAudioInput | None = None,
         videos: PromptVideoInput | None = None,
         use_cache: bool = True,
+        tokenization_kwargs: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> list[TokensTextLogprobs]:
         all_inputs = self.get_inputs(
-            prompts, images=images, videos=videos, audios=audios
+            prompts,
+            images=images,
+            videos=videos,
+            audios=audios,
+            tokenization_kwargs=tokenization_kwargs,
         )
 
         all_logprobs: list[list[dict[int, float]]] = []
@@ -957,6 +1010,8 @@ def generate(
             req_sample_output_ids: list[list[int]] = []
             req_sample_output_strs: list[str] = []
             req_logprobs = []
+            if req_output.prompt_logprobs:
+                req_logprobs.extend(req_output.prompt_logprobs)
             for sample in req_output.outputs:
                 output_str = sample.text
                 output_ids = list(sample.token_ids)
@@ -1167,7 +1222,7 @@ def token_classify(self, prompts: list[str]) -> list[list[float]]:
         return [req_output.outputs.data for req_output in req_outputs]
 
     def reward(self, prompts: list[str]) -> list[list[float]]:
-        req_outputs = self.llm.reward(prompts)
+        req_outputs = self.llm.encode(prompts, pooling_task="token_classify")
         return [req_output.outputs.data for req_output in req_outputs]
 
     def score(
@@ -1192,11 +1247,35 @@ def collective_rpc(self, *args, **kwargs):
     def __enter__(self):
         return self
 
+    def _wait_for_rocm_memory_release(self, gpu_memory_utilization: float) -> None:
+        from tests.utils import wait_for_gpu_memory_to_clear
+        from vllm.platforms import current_platform
+
+        if not current_platform.is_rocm():
+            return
+
+        num_gpus = torch.accelerator.device_count()
+        if num_gpus == 0:
+            return
+
+        # V1 startup requires free_memory >= total * gpu_memory_utilization.
+        # Wait for the complementary used-memory ratio so the next runner does
+        # not fail the startup guard immediately after this runner exits. Bound
+        # the wait so cleanup failures fail this test instead of hanging.
+        wait_for_gpu_memory_to_clear(
+            devices=list(range(num_gpus)),
+            threshold_ratio=1.0 - gpu_memory_utilization,
+            timeout_s=120,
+        )
+
     def __exit__(self, exc_type, exc_value, traceback):
         # Explicitly shutdown the engine core to release GPU resources
         # This is needed because when executing consecutive tests, the GC
         # might not be fast enough in shutting down the llm engine. This can lead to OOMs
         # because when the next test starts some GPU memory is still in use.
+        gpu_memory_utilization = (
+            self.llm.llm_engine.vllm_config.cache_config.gpu_memory_utilization
+        )
         try:
             self.llm.llm_engine.engine_core.shutdown()
         except Exception:
@@ -1204,6 +1283,7 @@ def __exit__(self, exc_type, exc_value, traceback):
             pass
         del self.llm
         cleanup_dist_env_and_memory()
+        self._wait_for_rocm_memory_release(gpu_memory_utilization)
 
 
 @pytest.fixture(scope="session")
@@ -1622,3 +1702,67 @@ def fresh_vllm_cache(monkeypatch, use_fresh_inductor_cache):
 def enable_pickle(monkeypatch):
     """`LLM.apply_model` requires pickling a function."""
     monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+
+@pytest.fixture(scope="function")
+def disable_log_dedup(monkeypatch):
+    """
+    Disable log deduplication such that warning_once and info_once always print.
+    """
+
+    # Patch logger._print_warning_once to remove the lru_cache decorator
+    from vllm import logger
+
+    original_print_warning_once = logger._print_warning_once
+    original_print_info_once = logger._print_info_once
+    original_print_debug_once = logger._print_debug_once
+
+    logger._print_warning_once = original_print_warning_once.__wrapped__
+    logger._print_info_once = original_print_info_once.__wrapped__
+    logger._print_debug_once = original_print_debug_once.__wrapped__
+
+    yield
+    logger._print_warning_once = original_print_warning_once
+    logger._print_info_once = original_print_info_once
+    logger._print_debug_once = original_print_debug_once
+
+
+@pytest.fixture(scope="function")
+def fake_vllm_ir(monkeypatch):
+    """
+    Pytest fixture to allow isolated IR op registration in tests.
+
+    Replaces IrOp.registry with an empty dict and swaps ``vllm_ir_torch_lib`` for a
+    fresh ``Library`` with a unique namespace per test (see ``Library.ns``).
+
+    Torch keeps registrations for the process lifetime; reusing the fragment
+    name ``vllm_ir`` and defining the same op string again can segfault. A
+    random library name keeps each fixture run on a disjoint namespace.
+
+    The test Library is kept alive until after monkeypatch teardown so PyTorch's
+    C++ state is not freed while references may still exist.
+
+    Usage:
+        def test_my_ir_op(fake_vllm_ir):
+            @vllm.ir.register_op
+            def my_test_op(x: torch.Tensor) -> torch.Tensor:
+                return x * 2
+
+            result = my_test_op(torch.tensor([1, 2, 3]))
+            # Registry and library cleaned up automatically after the test
+    """
+    import secrets
+
+    from torch.library import Library
+    from vllm.ir.op import IrOp
+
+    monkeypatch.setattr(IrOp, "registry", {})
+
+    # Keep a local reference so the Library is not GC'd before monkeypatch
+    # teardown restores the original reference.
+    test_lib = Library(f"vllm_ir_{secrets.token_hex(8)}", "FRAGMENT")
+    monkeypatch.setattr("vllm.ir.op.vllm_ir_torch_lib", test_lib)
+
+    yield
+
+    del test_lib
diff --git a/tests/distributed/conftest.py b/tests/distributed/conftest.py
index 9c146a3323d9..da661c5e13ba 100644
--- a/tests/distributed/conftest.py
+++ b/tests/distributed/conftest.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 import random
 
 import msgspec
@@ -166,3 +167,31 @@ def close(self):
         self.sub.close()
         for replay in self.replay_sockets:
             replay.close()
+
+
+@pytest.fixture
+def enable_ray_v2_backend():
+    """Set env vars for the Ray V2 executor backend and shut down Ray
+    between tests."""
+    import ray
+
+    saved = {
+        "VLLM_USE_RAY_V2_EXECUTOR_BACKEND": os.environ.get(
+            "VLLM_USE_RAY_V2_EXECUTOR_BACKEND"
+        ),
+        "VLLM_ENABLE_V1_MULTIPROCESSING": os.environ.get(
+            "VLLM_ENABLE_V1_MULTIPROCESSING"
+        ),
+    }
+    os.environ["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
+    os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+    if ray.is_initialized():
+        ray.shutdown()
+    try:
+        yield
+    finally:
+        if ray.is_initialized():
+            ray.shutdown()
+        os.environ.update({k: v for k, v in saved.items() if v is not None})
+        for key in (k for k, v in saved.items() if v is None):
+            os.environ.pop(key, None)
diff --git a/tests/distributed/eplb_utils.py b/tests/distributed/eplb_utils.py
index 215aff32d8e1..e6a41842e19d 100644
--- a/tests/distributed/eplb_utils.py
+++ b/tests/distributed/eplb_utils.py
@@ -1,9 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import atexit
 import os
 import random
 
+import pytest
 import torch
 import torch.multiprocessing as mp
 
@@ -16,9 +18,20 @@
 mp.set_start_method("spawn", force=True)
 
 
+def _distributed_worker_wrapper(fn, env, world_size, args, rank, skip_queue):
+    try:
+        fn(env, world_size, *args)
+    except BaseException as exc:
+        if isinstance(exc, pytest.skip.Exception):
+            skip_queue.put((rank, str(exc)))
+            return
+        raise
+
+
 def distributed_run(fn, world_size, *args):
     number_of_processes = world_size
     processes: list[mp.Process] = []
+    skip_queue: mp.SimpleQueue = mp.SimpleQueue()
     for i in range(number_of_processes):
         env: dict[str, str] = {}
         env["RANK"] = str(i)
@@ -27,13 +40,32 @@ def distributed_run(fn, world_size, *args):
         env["LOCAL_WORLD_SIZE"] = str(number_of_processes)
         env["MASTER_ADDR"] = "localhost"
         env["MASTER_PORT"] = "12345"
-        p = mp.Process(target=fn, args=(env, world_size, *args))
+        p = mp.Process(
+            target=_distributed_worker_wrapper,
+            args=(fn, env, world_size, args, i, skip_queue),
+        )
         processes.append(p)
         p.start()
 
     for p in processes:
         p.join()
 
+    skipped: list[tuple[int, str]] = []
+    while not skip_queue.empty():
+        rank, reason = skip_queue.get()
+        skipped.append((rank, reason))
+
+    if len(skipped) == number_of_processes:
+        reason = skipped[0][1]
+        pytest.skip(reason)
+    if 0 < len(skipped) < number_of_processes:
+        skipped_ranks = sorted(rank for rank, _ in skipped)
+        raise AssertionError(
+            "Distributed test had partial skips; expected either all ranks "
+            f"to skip or none. Skipped ranks: {skipped_ranks}, "
+            f"total ranks: {number_of_processes}"
+        )
+
     for p in processes:
         assert p.exitcode == 0
 
@@ -48,7 +80,12 @@ def set_env_vars_and_device(env: dict[str, str]) -> None:
     vllm_config = VllmConfig()
     with set_current_vllm_config(vllm_config):
         init_distributed_environment()
-
+    atexit.register(_destroy_process_group_if_initialized)
     # Ensure each worker process has the same random seed
     random.seed(42)
     torch.manual_seed(42)
+
+
+def _destroy_process_group_if_initialized() -> None:
+    if torch.distributed.is_available() and torch.distributed.is_initialized():
+        torch.distributed.destroy_process_group()
diff --git a/tests/distributed/test_dcp_a2a.py b/tests/distributed/test_dcp_a2a.py
index 2f92413e58d9..d80ed36be650 100644
--- a/tests/distributed/test_dcp_a2a.py
+++ b/tests/distributed/test_dcp_a2a.py
@@ -10,10 +10,95 @@
 
 import math
 
+import multiprocess as mp
 import pytest
 import torch
+import torch.distributed as dist
 
 from vllm.config.parallel import ParallelConfig
+from vllm.utils.network_utils import get_open_port
+from vllm.utils.system_utils import update_environment_variables
+
+mp.set_start_method("spawn", force=True)
+
+
+class _FakeCPGroup:
+    def __init__(self, world_size: int, device_group: dist.ProcessGroup):
+        self.world_size = world_size
+        self.device_group = device_group
+
+
+def _dtype_from_name(dtype_name: str) -> torch.dtype:
+    return {
+        "float16": torch.float16,
+        "bfloat16": torch.bfloat16,
+        "float32": torch.float32,
+    }[dtype_name]
+
+
+def _packed_a2a_reference(
+    cp_attn_out: torch.Tensor,
+    cp_attn_lse: torch.Tensor,
+    world_size: int,
+    h_per_rank: int,
+    is_lse_base_on_e: bool,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    from vllm.v1.attention.ops.dcp_alltoall import _lse_weighted_combine
+
+    B, _H, D = cp_attn_out.shape
+    outputs = (
+        cp_attn_out.view(B, world_size, h_per_rank, D)
+        .permute(1, 0, 2, 3)
+        .contiguous()
+        .float()
+    )
+    lses = cp_attn_lse.view(B, world_size, h_per_rank).permute(1, 0, 2).contiguous()
+    return _lse_weighted_combine(
+        outputs,
+        lses,
+        return_lse=True,
+        is_lse_base_on_e=is_lse_base_on_e,
+    )
+
+
+def _assert_packed_a2a_close(
+    actual: torch.Tensor,
+    expected: torch.Tensor,
+    dtype: torch.dtype,
+) -> None:
+    if dtype == torch.float32:
+        torch.testing.assert_close(actual, expected, rtol=1e-5, atol=1e-5)
+    else:
+        torch.testing.assert_close(
+            actual.float(), expected.float(), rtol=3e-2, atol=3e-2
+        )
+
+
+def _distributed_run(fn, world_size: int, extra_env: dict[str, str]) -> None:
+    port = str(get_open_port())
+    processes: list[mp.Process] = []
+    for rank in range(world_size):
+        env = {
+            "RANK": str(rank),
+            "LOCAL_RANK": str(rank),
+            "WORLD_SIZE": str(world_size),
+            "LOCAL_WORLD_SIZE": str(world_size),
+            "MASTER_ADDR": "localhost",
+            "MASTER_PORT": port,
+            **extra_env,
+        }
+        process = mp.Process(target=fn, args=(env,))
+        processes.append(process)
+        process.start()
+
+    for process in processes:
+        process.join(timeout=120)
+
+    for process in processes:
+        if process.is_alive():
+            process.kill()
+            process.join()
+        assert process.exitcode == 0
 
 
 class TestDCPCommBackendConfig:
@@ -38,14 +123,14 @@ def test_a2a_with_dcp_valid(self):
         """A2A backend is valid when DCP > 1."""
         config = ParallelConfig(
             dcp_comm_backend="a2a",
-            tensor_parallel_size=8,
+            tensor_parallel_size=4,
             decode_context_parallel_size=4,
         )
         assert config.dcp_comm_backend == "a2a"
 
     def test_invalid_backend_rejected(self):
         """Invalid backend values are rejected."""
-        with pytest.raises(ValueError, match="must be one of"):
+        with pytest.raises(ValueError, match="must be one of|Input should be"):
             ParallelConfig(
                 dcp_comm_backend="invalid",
             )
@@ -134,7 +219,7 @@ def test_dominant_rank(self):
         result = _lse_weighted_combine(outputs, lses)
 
         assert result.shape == (B, H, D)
-        torch.testing.assert_close(result, outputs[1].squeeze(0), atol=1e-5, rtol=1e-5)
+        torch.testing.assert_close(result, outputs[1], atol=1e-5, rtol=1e-5)
 
     def test_mathematically_correct(self):
         """Verify mathematical correctness of LSE combination."""
@@ -187,6 +272,224 @@ def test_return_lse(self):
         assert global_lse.shape == (B, H)
         assert abs(global_lse.item() - expected_global_lse) < 1e-5
 
+    def test_base2_return_lse(self):
+        """Base-2 LSE mode returns log2-sum-exp2 global LSE."""
+        from vllm.v1.attention.ops.dcp_alltoall import _lse_weighted_combine
+
+        outputs = torch.tensor(
+            [
+                [[[1.0, 2.0]]],
+                [[[3.0, 4.0]]],
+            ]
+        )
+        lses = torch.tensor(
+            [
+                [[1.0]],
+                [[2.0]],
+            ]
+        )
+
+        result, global_lse = _lse_weighted_combine(
+            outputs,
+            lses,
+            return_lse=True,
+            is_lse_base_on_e=False,
+        )
+
+        expected_global_lse = math.log2(2**1 + 2**2)
+        w0 = 2**1 / (2**1 + 2**2)
+        w1 = 2**2 / (2**1 + 2**2)
+        expected = torch.tensor([[[w0 * 1.0 + w1 * 3.0, w0 * 2.0 + w1 * 4.0]]])
+
+        torch.testing.assert_close(result, expected, rtol=1e-5, atol=1e-5)
+        torch.testing.assert_close(
+            global_lse,
+            torch.tensor([[expected_global_lse]]),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+
+    def test_lse_pack_dim(self):
+        """Packed A2A stores one fp32 LSE in output-dtype lanes."""
+        from vllm.v1.attention.ops.dcp_alltoall import _dcp_a2a_lse_pack_dim
+
+        assert _dcp_a2a_lse_pack_dim(torch.bfloat16) == 2
+        assert _dcp_a2a_lse_pack_dim(torch.float16) == 2
+        assert _dcp_a2a_lse_pack_dim(torch.float32) == 1
+
+
+class TestPackedA2AKernels:
+    @pytest.mark.skipif(
+        torch.accelerator.device_count() < 1, reason="CUDA is required."
+    )
+    @pytest.mark.parametrize("dtype_name", ["float16", "bfloat16", "float32"])
+    @pytest.mark.parametrize("return_lse", [False, True])
+    @pytest.mark.parametrize("is_lse_base_on_e", [False, True])
+    def test_pack_unpack_combine_matches_reference(
+        self,
+        dtype_name: str,
+        return_lse: bool,
+        is_lse_base_on_e: bool,
+    ):
+        from vllm.v1.attention.ops.dcp_alltoall import (
+            _dcp_a2a_lse_pack_dim,
+            _dcp_a2a_pack_send,
+            _dcp_a2a_unpack_combine,
+        )
+
+        torch.manual_seed(0)
+        dtype = _dtype_from_name(dtype_name)
+        device = torch.device("cuda")
+        world_size, B, h_per_rank, D = 4, 7, 2, 32
+        H = world_size * h_per_rank
+        cp_attn_out = torch.randn(B, H, D, device=device, dtype=dtype)
+        cp_attn_lse = torch.randn(B, H, device=device, dtype=torch.float32)
+        lse_pack_dim = _dcp_a2a_lse_pack_dim(dtype)
+        send_buffer = torch.empty(
+            (world_size, B, h_per_rank, D + lse_pack_dim),
+            device=device,
+            dtype=dtype,
+        )
+
+        _dcp_a2a_pack_send(
+            cp_attn_out,
+            cp_attn_lse,
+            send_buffer,
+            world_size,
+            h_per_rank,
+            D,
+            lse_pack_dim,
+        )
+        actual = _dcp_a2a_unpack_combine(
+            send_buffer, D, lse_pack_dim, return_lse, is_lse_base_on_e
+        )
+        expected_out, expected_lse = _packed_a2a_reference(
+            cp_attn_out, cp_attn_lse, world_size, h_per_rank, is_lse_base_on_e
+        )
+
+        if return_lse:
+            actual_out, actual_lse = actual
+            _assert_packed_a2a_close(actual_out, expected_out, dtype)
+            torch.testing.assert_close(actual_lse, expected_lse, rtol=1e-4, atol=1e-4)
+        else:
+            _assert_packed_a2a_close(actual, expected_out, dtype)
+
+
+def _distributed_packed_a2a_worker(env: dict[str, str]) -> None:
+    update_environment_variables(env)
+    local_rank = int(env["LOCAL_RANK"])
+    torch.accelerator.set_device_index(local_rank)
+    dist.init_process_group(backend="nccl")
+    use_workspace = env.get("USE_WORKSPACE") == "1"
+    if use_workspace:
+        from vllm.v1.worker.workspace import init_workspace_manager
+
+        init_workspace_manager(torch.device(f"cuda:{local_rank}"))
+    try:
+        from vllm.v1.attention.ops.dcp_alltoall import dcp_a2a_lse_reduce
+
+        dtype = _dtype_from_name(env["TEST_DTYPE"])
+        return_lse = env["RETURN_LSE"] == "1"
+        is_lse_base_on_e = env["LSE_BASE_E"] == "1"
+        rank = dist.get_rank()
+        world_size = dist.get_world_size()
+        B, h_per_rank, D = 5, 2, 32
+        H = world_size * h_per_rank
+
+        generator = torch.Generator(device=f"cuda:{local_rank}")
+        generator.manual_seed(1234 + rank)
+        cp_attn_out = torch.randn(
+            B,
+            H,
+            D,
+            device=f"cuda:{local_rank}",
+            dtype=dtype,
+            generator=generator,
+        )
+        cp_attn_lse = torch.randn(
+            B,
+            H,
+            device=f"cuda:{local_rank}",
+            dtype=torch.float32,
+            generator=generator,
+        )
+        actual = dcp_a2a_lse_reduce(
+            cp_attn_out,
+            cp_attn_lse,
+            _FakeCPGroup(world_size, dist.group.WORLD),
+            return_lse=return_lse,
+            is_lse_base_on_e=is_lse_base_on_e,
+        )
+
+        gathered_out = [torch.empty_like(cp_attn_out) for _ in range(world_size)]
+        gathered_lse = [torch.empty_like(cp_attn_lse) for _ in range(world_size)]
+        dist.all_gather(gathered_out, cp_attn_out)
+        dist.all_gather(gathered_lse, cp_attn_lse)
+        outputs = torch.stack(
+            [
+                t[:, rank * h_per_rank : (rank + 1) * h_per_rank, :]
+                for t in gathered_out
+            ],
+            dim=0,
+        ).float()
+        lses = torch.stack(
+            [t[:, rank * h_per_rank : (rank + 1) * h_per_rank] for t in gathered_lse],
+            dim=0,
+        )
+        from vllm.v1.attention.ops.dcp_alltoall import _lse_weighted_combine
+
+        expected_out, expected_lse = _lse_weighted_combine(
+            outputs,
+            lses,
+            return_lse=True,
+            is_lse_base_on_e=is_lse_base_on_e,
+        )
+
+        if return_lse:
+            actual_out, actual_lse = actual
+            _assert_packed_a2a_close(actual_out, expected_out, dtype)
+            torch.testing.assert_close(actual_lse, expected_lse, rtol=1e-4, atol=1e-4)
+        else:
+            _assert_packed_a2a_close(actual, expected_out, dtype)
+    finally:
+        if use_workspace:
+            from vllm.v1.worker.workspace import reset_workspace_manager
+
+            reset_workspace_manager()
+        dist.destroy_process_group()
+
+
+@pytest.mark.skipif(
+    torch.accelerator.device_count() < 4, reason="Need at least 4 GPUs."
+)
+@pytest.mark.parametrize("dtype_name", ["float16", "bfloat16", "float32"])
+def test_distributed_packed_a2a_matches_reference(dtype_name: str):
+    _distributed_run(
+        _distributed_packed_a2a_worker,
+        world_size=4,
+        extra_env={
+            "TEST_DTYPE": dtype_name,
+            "RETURN_LSE": "1",
+            "LSE_BASE_E": "1",
+        },
+    )
+
+
+@pytest.mark.skipif(
+    torch.accelerator.device_count() < 4, reason="Need at least 4 GPUs."
+)
+def test_distributed_packed_a2a_with_workspace_matches_reference():
+    _distributed_run(
+        _distributed_packed_a2a_worker,
+        world_size=4,
+        extra_env={
+            "TEST_DTYPE": "bfloat16",
+            "RETURN_LSE": "1",
+            "LSE_BASE_E": "1",
+            "USE_WORKSPACE": "1",
+        },
+    )
+
 
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/tests/distributed/test_eplb_events.py b/tests/distributed/test_eplb_events.py
new file mode 100644
index 000000000000..40323dc89809
--- /dev/null
+++ b/tests/distributed/test_eplb_events.py
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import threading
+import time
+
+import torch
+
+from vllm.distributed.eplb.eplb_utils import CpuGpuEvent
+
+
+def test_wait_blocks_until_record():
+    event = CpuGpuEvent()
+    record_stream = torch.cuda.Stream()
+    wait_stream = torch.cuda.Stream()
+    wait_returned = threading.Event()
+
+    def waiter():
+        event.wait(stream=wait_stream)
+        wait_returned.set()
+
+    t = threading.Thread(target=waiter)
+    t.start()
+
+    time.sleep(0.05)
+    assert not wait_returned.is_set(), "wait() returned before record() was called"
+
+    event.record(stream=record_stream)
+    t.join(timeout=5.0)
+
+    assert not event._recorded.is_set()
+
+
+def test_reuse_across_multiple_cycles():
+    wrapper = CpuGpuEvent()
+    record_stream = torch.cuda.Stream()
+    wait_stream = torch.cuda.Stream()
+    NUM_CYCLES = 8
+    completed_cycles = []
+    barriers = [threading.Barrier(2) for _ in range(NUM_CYCLES)]
+
+    def waiter():
+        for i in range(NUM_CYCLES):
+            wrapper.wait(stream=wait_stream)
+            completed_cycles.append(True)
+            barriers[i].wait()
+
+    t = threading.Thread(target=waiter)
+    t.start()
+
+    for i in range(NUM_CYCLES):
+        wrapper.record(stream=record_stream)
+        barriers[i].wait()
+
+    t.join(timeout=10.0)
+    assert len(completed_cycles) == NUM_CYCLES
+
+
+def test_producer_consumer():
+    """
+    This test uses the CpuGpuEvent to synchronize reads and writes to/from a shared GPU
+    tensor on multiple CPU threads.
+    """
+    worker_stream = torch.cuda.Stream()
+    # Create a single element counter that will be shared between two threads
+    buf = torch.zeros(1, device="cuda")
+    NUM_ROUNDS = 5
+
+    ready_cpu = [threading.Event() for _ in range(NUM_ROUNDS)]
+    events = [CpuGpuEvent() for _ in range(NUM_ROUNDS)]
+    errors: list[str] = []
+
+    # For each round, the worker thread (writer) sets the counter in buf and waits for
+    # the main thread to read it.
+    def worker():
+        for i in range(NUM_ROUNDS):
+            if i > 0:
+                events[i - 1].wait(stream=worker_stream)
+
+            with torch.cuda.stream(worker_stream):
+                buf.fill_(float(i + 1))
+
+            worker_stream.synchronize()
+            ready_cpu[i].set()
+
+    t = threading.Thread(target=worker)
+    t.start()
+
+    for i in range(NUM_ROUNDS):
+        ready_cpu[i].wait()
+        snapshot = buf.clone()
+        events[i].record()
+        val = snapshot.item()
+        if val != float(i + 1):
+            errors.append(f"round {i}: expected {i + 1:.1f}, got {val:.1f}")
+
+    t.join(timeout=10.0)
+    assert not errors, f"Buffer ordering errors: {errors}"
diff --git a/tests/distributed/test_eplb_execute.py b/tests/distributed/test_eplb_execute.py
index 50c7e6538ffb..d9e6a739b01b 100644
--- a/tests/distributed/test_eplb_execute.py
+++ b/tests/distributed/test_eplb_execute.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import asyncio
 import random
 
 import pytest
@@ -9,6 +8,10 @@
 import torch.distributed
 
 from vllm.config import VllmConfig, set_current_vllm_config
+from vllm.distributed.eplb.eplb_communicator import (
+    create_eplb_communicator,
+    has_nixl,
+)
 from vllm.distributed.eplb.rebalance_execute import (
     move_from_buffer,
     rearrange_expert_weights_inplace,
@@ -130,9 +133,10 @@ def verify_expert_weights_after_shuffle(
     hidden_sizes: list[int],
     ep_rank: int,
     num_local_experts: int,
-):
+) -> bool:
     """Verify the weights after shuffling are correct."""
     num_layers = len(expert_weights)
+    ok = True
 
     for layer in range(num_layers):
         for weight_idx, hidden_size in enumerate(hidden_sizes):
@@ -155,29 +159,38 @@ def verify_expert_weights_after_shuffle(
                     dtype=actual_weights.dtype,
                 )
 
-                torch.testing.assert_close(
-                    actual_weights,
-                    expected_weights,
-                    msg=f"Layer {layer}, weight {weight_idx},"
-                    f"local expert {local_expert}: "
-                    f"weights do not match. "
-                    f"Expected logical expert {expected_logical_expert}",
-                )
+                if not torch.equal(actual_weights, expected_weights):
+                    ok = False
+                    actual_head = actual_weights[:8].detach().cpu().tolist()
+                    expected_head = expected_weights[:8].detach().cpu().tolist()
+                    print(
+                        "verify_expert_weights_after_shuffle failed: "
+                        f"rank={ep_rank}, "
+                        f"layer={layer}, weight_idx={weight_idx}, "
+                        f"local_expert={local_expert}, "
+                        f"expected_logical_expert={expected_logical_expert}, "
+                        f"actual_head={actual_head}, expected_head={expected_head}",
+                        flush=True,
+                    )
+
+    return ok
 
 
 def verify_redundant_experts_have_same_weights(
     expert_weights: list[list[torch.Tensor]],
     indices: torch.Tensor,
     hidden_sizes: list[int],
+    ep_rank: int,
     world_size: int,
     num_local_experts: int,
-):
+) -> bool:
     """
     Verify that all replicas of the same logical expert have the same weights.
     """
     num_layers = len(expert_weights)
     total_physical_experts = world_size * num_local_experts
 
+    ok = True
     for layer in range(num_layers):
         # Collect weights for all physical experts for each weight matrix
         all_weights: list[torch.Tensor] = []
@@ -227,14 +240,54 @@ def verify_redundant_experts_have_same_weights(
                 # Verify that current physical expert's weights match the
                 # previously saved logical expert weights
                 for weight_idx in range(len(hidden_sizes)):
-                    torch.testing.assert_close(
+                    if not torch.equal(
                         all_weights[weight_idx][physical_pos],
                         logical_expert_weights[logical_expert_id][weight_idx],
-                        msg=f"Layer {layer}, weight {weight_idx},"
-                        f"logical expert {logical_expert_id}: "
-                        f"Physical expert {physical_pos} has different weights"
-                        f"than expected",
-                    )
+                    ):
+                        ok = False
+                        actual_head = (
+                            all_weights[weight_idx][physical_pos][:8]
+                            .detach()
+                            .cpu()
+                            .tolist()
+                        )
+                        reference_head = (
+                            logical_expert_weights[logical_expert_id][weight_idx][:8]
+                            .detach()
+                            .cpu()
+                            .tolist()
+                        )
+                        print(
+                            "verify_redundant_experts_have_same_weights failed: "
+                            f"rank={ep_rank}, "
+                            f"layer={layer}, weight_idx={weight_idx}, "
+                            f"logical_expert={logical_expert_id}, "
+                            f"physical_pos={physical_pos}, "
+                            f"actual_head={actual_head}, "
+                            f"reference_head={reference_head}",
+                            flush=True,
+                        )
+
+    return ok
+
+
+def assert_verification_synced(local_ok: bool, msg: str) -> None:
+    ok_tensor = torch.tensor([1 if local_ok else 0], device="cuda", dtype=torch.int32)
+    torch.distributed.all_reduce(ok_tensor, op=torch.distributed.ReduceOp.MIN)
+    assert bool(ok_tensor.item()), msg
+
+
+def create_eplb_communicator_or_raise(*, group_coordinator, backend, expert_weights):
+    try:
+        return create_eplb_communicator(
+            group_coordinator=group_coordinator,
+            backend=backend,
+            expert_weights=expert_weights,
+        )
+    except Exception as exc:
+        raise RuntimeError(
+            f"Failed to create EPLB communicator for backend={backend}: {exc}"
+        ) from exc
 
 
 def _test_async_transfer_layer_without_mtp_worker(
@@ -243,6 +296,7 @@ def _test_async_transfer_layer_without_mtp_worker(
     num_layers: int,
     num_local_experts: int,
     num_logical_experts: int,
+    eplb_communicator: str,
 ) -> None:
     set_env_vars_and_device(env)
 
@@ -254,8 +308,8 @@ def _test_async_transfer_layer_without_mtp_worker(
             tensor_model_parallel_size=world_size, pipeline_model_parallel_size=1
         )
 
-        tp_group = get_tp_group()
-        ep_group = tp_group.device_group
+        ep_group_coordinator = get_tp_group()
+        ep_group = ep_group_coordinator.device_group
         ep_rank = torch.distributed.get_rank()
         device = torch.device(f"cuda:{ep_rank}")
 
@@ -298,46 +352,64 @@ def _test_async_transfer_layer_without_mtp_worker(
         expert_buffer = [torch.empty_like(w) for w in expert_weights[0]]
         cuda_stream = torch.cuda.Stream(device=device)
 
+        communicator = create_eplb_communicator_or_raise(
+            group_coordinator=ep_group_coordinator,
+            backend=eplb_communicator,
+            expert_weights=expert_weights[0],
+        )
+        communicator.set_stream(cuda_stream)
+
         for layer_idx in range(num_layers):
-            is_unchanged, is_received_locally, recv_metadata = asyncio.run(
-                transfer_layer(
-                    old_layer_indices=old_indices_cpu[layer_idx],
-                    new_layer_indices=new_indices_cpu[layer_idx],
-                    expert_weights=expert_weights[layer_idx],
-                    expert_weights_buffer=expert_buffer,
-                    ep_group=ep_group,
-                    cuda_stream=cuda_stream,
-                )
+            transfer_metadata = transfer_layer(
+                old_layer_indices=old_indices_cpu[layer_idx],
+                new_layer_indices=new_indices_cpu[layer_idx],
+                expert_weights=expert_weights[layer_idx],
+                expert_weights_buffer=expert_buffer,
+                ep_group=ep_group,
+                communicator=communicator,
+                cuda_stream=cuda_stream,
             )
             cuda_stream.synchronize()
             move_from_buffer(
                 expert_weights=expert_weights[layer_idx],
                 expert_weights_buffers=expert_buffer,
-                is_unchanged=is_unchanged,
-                is_received_locally=is_received_locally,
-                recv_metadata=recv_metadata,
+                transfer_metadata=transfer_metadata,
                 new_indices=new_indices_cpu[layer_idx].numpy(),
                 ep_rank=ep_rank,
             )
 
-        verify_expert_weights_after_shuffle(
-            expert_weights,
-            new_indices,
-            hidden_sizes,
-            ep_rank,
-            num_local_experts,
-        )
+    local_ok = verify_expert_weights_after_shuffle(
+        expert_weights,
+        new_indices,
+        hidden_sizes,
+        ep_rank,
+        num_local_experts,
+    )
+    local_ok = (
         verify_redundant_experts_have_same_weights(
             expert_weights,
             new_indices,
             hidden_sizes,
+            ep_rank,
             world_size,
             num_local_experts,
         )
+        and local_ok
+    )
+    assert_verification_synced(
+        local_ok,
+        "Async transfer verification failed on at least one rank. "
+        "See logs for details.",
+    )
 
 
 def _test_rearrange_expert_weights_with_redundancy(
-    env, world_size, num_layers, num_local_experts, num_logical_experts
+    env,
+    world_size,
+    num_layers,
+    num_local_experts,
+    num_logical_experts,
+    eplb_communicator: str,
 ) -> None:
     # Initialize model parallel (using tensor parallel as an entrypoint
     # to expert parallel)
@@ -351,7 +423,8 @@ def _test_rearrange_expert_weights_with_redundancy(
             tensor_model_parallel_size=world_size, pipeline_model_parallel_size=1
         )
 
-        ep_group = get_tp_group().cpu_group
+        ep_group_coordinator = get_tp_group()
+        ep_group = ep_group_coordinator.cpu_group
         ep_rank = torch.distributed.get_rank()
         device = torch.device(f"cuda:{ep_rank}")
 
@@ -387,6 +460,12 @@ def _test_rearrange_expert_weights_with_redundancy(
             num_layers, num_local_experts, hidden_sizes, ep_rank, device, old_indices
         )
 
+        communicator = create_eplb_communicator_or_raise(
+            group_coordinator=ep_group_coordinator,
+            backend=eplb_communicator,
+            expert_weights=expert_weights[0],
+        )
+
         # Execute weight rearrangement
         rearrange_expert_weights_inplace(
             old_indices,
@@ -394,24 +473,33 @@ def _test_rearrange_expert_weights_with_redundancy(
             expert_weights,
             ep_group,
             is_profile=False,
+            communicator=communicator,
         )
 
-        # Verify the rearrangement result
-        verify_expert_weights_after_shuffle(
-            expert_weights,
-            new_indices,
-            hidden_sizes,
-            ep_rank,
-            num_local_experts,
-        )
+    # Verify the rearrangement result
+    local_ok = verify_expert_weights_after_shuffle(
+        expert_weights,
+        new_indices,
+        hidden_sizes,
+        ep_rank,
+        num_local_experts,
+    )
 
+    local_ok = (
         verify_redundant_experts_have_same_weights(
             expert_weights,
             new_indices,
             hidden_sizes,
+            ep_rank,
             world_size,
             num_local_experts,
         )
+        and local_ok
+    )
+    assert_verification_synced(
+        local_ok,
+        "Rearrange verification failed on at least one rank. See logs for details.",
+    )
 
 
 @pytest.mark.parametrize(
@@ -437,11 +525,20 @@ def _test_rearrange_expert_weights_with_redundancy(
         (4, 8, 8, 16),
     ],
 )
+@pytest.mark.parametrize(
+    "eplb_communicator", ["torch_nccl", "torch_gloo", "pynccl", "nixl"]
+)
 def test_rearrange_expert_weights_with_redundancy(
-    world_size, num_layers, num_local_experts, num_logical_experts
+    world_size,
+    num_layers,
+    num_local_experts,
+    num_logical_experts,
+    eplb_communicator,
 ):
     """Test the functionality of rearranging expert weights with redundancy."""
 
+    if eplb_communicator == "nixl" and not has_nixl():
+        pytest.skip("NIXL is not available")
     if torch.accelerator.device_count() < world_size:
         pytest.skip(f"Need at least {world_size} GPUs to run the test")
     distributed_run(
@@ -450,6 +547,7 @@ def test_rearrange_expert_weights_with_redundancy(
         num_layers,
         num_local_experts,
         num_logical_experts,
+        eplb_communicator,
     )
 
 
@@ -464,7 +562,8 @@ def _test_rearrange_expert_weights_no_change(env, world_size) -> None:
             tensor_model_parallel_size=world_size, pipeline_model_parallel_size=1
         )
 
-        ep_group = get_tp_group().cpu_group
+        ep_group_coordinator = get_tp_group()
+        ep_group = ep_group_coordinator.cpu_group
         ep_rank = torch.distributed.get_rank()
         device = torch.device(f"cuda:{ep_rank}")
 
@@ -494,24 +593,40 @@ def _test_rearrange_expert_weights_no_change(env, world_size) -> None:
                 layer_copy.append(weight.clone())
             original_weights.append(layer_copy)
 
+        communicator = create_eplb_communicator_or_raise(
+            group_coordinator=ep_group_coordinator,
+            backend="torch_nccl",
+            expert_weights=expert_weights[0],
+        )
+
         # Execute rearrangement (should be no change)
         rearrange_expert_weights_inplace(
             indices,
             indices,  # Same indices
             expert_weights,
             ep_group,
+            communicator,
             is_profile=False,
         )
 
-        # Verify that the weights have not changed
-        for layer in range(num_layers):
-            for weight_idx in range(len(hidden_sizes)):
-                torch.testing.assert_close(
-                    expert_weights[layer][weight_idx],
-                    original_weights[layer][weight_idx],
-                    msg=f"""Layer {layer}, weight {weight_idx}
- should remain unchanged""",
+    # Verify that the weights have not changed
+    local_ok = True
+    for layer in range(num_layers):
+        for weight_idx in range(len(hidden_sizes)):
+            if not torch.equal(
+                expert_weights[layer][weight_idx],
+                original_weights[layer][weight_idx],
+            ):
+                local_ok = False
+                print(
+                    "test_rearrange_expert_weights_no_change failed: "
+                    f"layer={layer}, weight_idx={weight_idx}",
+                    flush=True,
                 )
+    assert_verification_synced(
+        local_ok,
+        "No-change EPLB verification failed on at least one rank.",
+    )
 
 
 @pytest.mark.parametrize(
@@ -520,14 +635,20 @@ def _test_rearrange_expert_weights_no_change(env, world_size) -> None:
         (2, 2, 2, 3),
     ],
 )
+@pytest.mark.parametrize(
+    "eplb_communicator", ["torch_nccl", "torch_gloo", "pynccl", "nixl"]
+)
 def test_async_transfer_layer_without_mtp(
     world_size: int,
     num_layers: int,
     num_local_experts: int,
     num_logical_experts: int,
+    eplb_communicator: str,
 ):
     """Exercise async EPLB transfer path without MTP/spec decode."""
 
+    if eplb_communicator == "nixl" and not has_nixl():
+        pytest.skip("NIXL is not available")
     if torch.accelerator.device_count() < world_size:
         pytest.skip(f"Need at least {world_size} GPUs to run the test")
 
@@ -537,6 +658,7 @@ def test_async_transfer_layer_without_mtp(
         num_layers,
         num_local_experts,
         num_logical_experts,
+        eplb_communicator,
     )
 
 
@@ -549,7 +671,10 @@ def test_rearrange_expert_weights_no_change(world_size):
 
     if torch.accelerator.device_count() < world_size:
         pytest.skip(f"Need at least {world_size} GPUs to run the test")
-    distributed_run(_test_rearrange_expert_weights_no_change, world_size)
+    distributed_run(
+        _test_rearrange_expert_weights_no_change,
+        world_size,
+    )
 
 
 def _test_rearrange_expert_weights_profile_mode(env, world_size) -> None:
@@ -563,7 +688,8 @@ def _test_rearrange_expert_weights_profile_mode(env, world_size) -> None:
             tensor_model_parallel_size=world_size, pipeline_model_parallel_size=1
         )
 
-        ep_group = get_tp_group().cpu_group
+        ep_group_coordinator = get_tp_group()
+        ep_group = ep_group_coordinator.cpu_group
         ep_rank = torch.distributed.get_rank()
         device = torch.device(f"cuda:{ep_rank}")
 
@@ -600,23 +726,40 @@ def _test_rearrange_expert_weights_profile_mode(env, world_size) -> None:
                 layer_copy.append(weight.clone())
             original_weights.append(layer_copy)
 
+        communicator = create_eplb_communicator_or_raise(
+            group_coordinator=ep_group_coordinator,
+            backend="torch_nccl",
+            expert_weights=expert_weights[0],
+        )
+
         # Execute profile mode rearrangement
         rearrange_expert_weights_inplace(
             old_indices,
             new_indices,
             expert_weights,
             ep_group,
+            communicator,
             is_profile=True,  # Profile mode
         )
 
-        # In profile mode, the weights should remain unchanged
-        for layer in range(num_layers):
-            for weight_idx in range(len(hidden_sizes)):
-                torch.testing.assert_close(
-                    expert_weights[layer][weight_idx],
-                    original_weights[layer][weight_idx],
-                    msg="In profile mode, the weights should remain unchanged",
+    # In profile mode, the weights should remain unchanged
+    local_ok = True
+    for layer in range(num_layers):
+        for weight_idx in range(len(hidden_sizes)):
+            if not torch.equal(
+                expert_weights[layer][weight_idx],
+                original_weights[layer][weight_idx],
+            ):
+                local_ok = False
+                print(
+                    "test_rearrange_expert_weights_profile_mode failed: "
+                    f"layer={layer}, weight_idx={weight_idx}",
+                    flush=True,
                 )
+    assert_verification_synced(
+        local_ok,
+        "Profile-mode EPLB verification failed on at least one rank.",
+    )
 
 
 @pytest.mark.parametrize("world_size", [2, 4])
@@ -625,4 +768,7 @@ def test_rearrange_expert_weights_profile_mode(world_size):
 
     if torch.accelerator.device_count() < world_size:
         pytest.skip(f"Need at least {world_size} GPUs to run the test")
-    distributed_run(_test_rearrange_expert_weights_profile_mode, world_size)
+    distributed_run(
+        _test_rearrange_expert_weights_profile_mode,
+        world_size,
+    )
diff --git a/tests/distributed/test_eplb_fused_moe_layer_dep_nvfp4.py b/tests/distributed/test_eplb_fused_moe_layer_dep_nvfp4.py
index 68b2407c2e4b..9ab785af3135 100644
--- a/tests/distributed/test_eplb_fused_moe_layer_dep_nvfp4.py
+++ b/tests/distributed/test_eplb_fused_moe_layer_dep_nvfp4.py
@@ -10,6 +10,7 @@
 
 from tests.kernels.moe.utils import make_test_quant_config
 from vllm.config import VllmConfig, set_current_vllm_config
+from vllm.distributed.eplb.eplb_state import EplbLayerState
 from vllm.distributed.eplb.rebalance_execute import rearrange_expert_weights_inplace
 from vllm.distributed.parallel_state import (
     ensure_model_parallel_initialized,
@@ -201,7 +202,7 @@ def _test_eplb_fml(env, world_size: int, test_config: TestConfig):
                 dtype=torch.int32,
                 device=device,
             )
-            fml.enable_eplb = True
+            fml.eplb_state = EplbLayerState()
             fml.set_eplb_state(
                 lidx,
                 torch.zeros(
diff --git a/tests/distributed/test_eplb_utils.py b/tests/distributed/test_eplb_utils.py
index 8b287244b742..53a4ce21af2e 100644
--- a/tests/distributed/test_eplb_utils.py
+++ b/tests/distributed/test_eplb_utils.py
@@ -80,7 +80,7 @@ def test_commit_eplb_maps_for_layer_logical_padding():
         .contiguous()
     )
     layer = 0
-    _commit_eplb_maps_for_layer(model_state, new_phy2log, layer)
+    _commit_eplb_maps_for_layer(model_state, new_phy2log[layer], layer)
 
     assert torch.all(model_state.logical_to_physical_map[layer, :, 2] == -1)
 
@@ -143,7 +143,7 @@ def test_commit_eplb_maps_for_layer():
     )
     new_logcnt = torch.tensor([[2, 1, 1], [1, 2, 1]], dtype=torch.long)
 
-    _commit_eplb_maps_for_layer(model_state, new_phy2log, layer=0)
+    _commit_eplb_maps_for_layer(model_state, new_phy2log[0], layer=0)
 
     # Layer 0 updated
     assert torch.equal(model_state.physical_to_logical_map[0], new_phy2log[0])
diff --git a/tests/distributed/test_expert_placement.py b/tests/distributed/test_expert_placement.py
index 8b3a64b9c134..46f63408f467 100644
--- a/tests/distributed/test_expert_placement.py
+++ b/tests/distributed/test_expert_placement.py
@@ -3,7 +3,9 @@
 
 import pytest
 
-from vllm.model_executor.layers.fused_moe.layer import determine_expert_map
+from vllm.model_executor.layers.fused_moe.expert_map_manager import (
+    determine_expert_map,
+)
 
 
 def verify_round_robin_pattern(expert_map, ep_rank, ep_size, global_num_experts):
diff --git a/tests/distributed/test_kv_cache_events.py b/tests/distributed/test_kv_cache_events.py
new file mode 100644
index 000000000000..aa39ab17b30a
--- /dev/null
+++ b/tests/distributed/test_kv_cache_events.py
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.distributed.kv_events import BlockRemoved, BlockStored
+
+# Minimal ExternalBlockHash for testing (bytes are a valid ExternalBlockHash).
+_FAKE_HASH: bytes = b"\xab" * 32
+
+
+def _make_block_stored(
+    group_idx: int | None = None,
+    kv_cache_spec_sliding_window: int | None = None,
+) -> BlockStored:
+    return BlockStored(
+        block_hashes=[_FAKE_HASH],
+        parent_block_hash=None,
+        token_ids=[1, 2, 3, 4],
+        block_size=4,
+        lora_id=None,
+        medium="GPU",
+        lora_name=None,
+        group_idx=group_idx,
+        kv_cache_spec_sliding_window=kv_cache_spec_sliding_window,
+    )
+
+
+def _make_block_removed(
+    group_idx: int | None = None,
+) -> BlockRemoved:
+    return BlockRemoved(
+        block_hashes=[_FAKE_HASH],
+        medium="GPU",
+        group_idx=group_idx,
+    )
+
+
+def test_block_stored_default_group_idx_is_none():
+    """group_idx defaults to None when not provided."""
+    event = _make_block_stored()
+    assert event.group_idx is None
+
+
+def test_block_removed_default_group_idx_is_none():
+    """group_idx defaults to None when not provided."""
+    event = _make_block_removed()
+    assert event.group_idx is None
+
+
+@pytest.mark.parametrize("group_idx", [1, 2, 3])
+def test_block_stored_hash_differs_by_group_idx(group_idx: int):
+    """BlockStored events that differ only in group_idx must hash differently."""
+    other_group_idx = group_idx + 1
+    event_a = _make_block_stored(group_idx=group_idx)
+    event_b = _make_block_stored(group_idx=other_group_idx)
+    assert hash(event_a) != hash(event_b)
+
+
+def test_block_stored_hash_same_for_equal_group_idx():
+    """Two BlockStored events with identical fields produce the same hash."""
+    event_a = _make_block_stored(group_idx=1)
+    event_b = _make_block_stored(group_idx=1)
+    assert hash(event_a) == hash(event_b)
+
+
+@pytest.mark.parametrize("group_idx", [1, 2, 3])
+def test_block_removed_hash_differs_by_group_idx(group_idx: int):
+    """BlockRemoved events that differ only in group_idx must hash differently."""
+    other_group_idx = group_idx + 1
+    event_a = _make_block_removed(group_idx=group_idx)
+    event_b = _make_block_removed(group_idx=other_group_idx)
+    assert hash(event_a) != hash(event_b)
+
+
+def test_block_removed_hash_same_for_equal_group_idx():
+    """Two BlockRemoved events with identical fields produce the same hash."""
+    event_a = _make_block_removed(group_idx=1)
+    event_b = _make_block_removed(group_idx=1)
+    assert hash(event_a) == hash(event_b)
+
+
+def test_block_stored_hash_differs_by_sliding_window():
+    event_a = _make_block_stored(group_idx=1, kv_cache_spec_sliding_window=128)
+    event_b = _make_block_stored(group_idx=1, kv_cache_spec_sliding_window=256)
+    assert hash(event_a) != hash(event_b)
diff --git a/tests/distributed/test_mnnvl_alltoall.py b/tests/distributed/test_mnnvl_alltoall.py
new file mode 100644
index 000000000000..875b65ff084c
--- /dev/null
+++ b/tests/distributed/test_mnnvl_alltoall.py
@@ -0,0 +1,858 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for MNNVL AllToAll operations.
+
+Requires: docker run ... --cap-add=SYS_PTRACE ...
+Run: pytest tests/distributed/test_mnnvl_alltoall.py -v
+"""
+
+import os
+import traceback
+
+import pytest
+import torch
+import torch.multiprocessing as mp
+
+from vllm.distributed import get_ep_group
+from vllm.utils.flashinfer import (
+    has_flashinfer_nvlink_one_sided,
+    has_flashinfer_nvlink_two_sided,
+)
+from vllm.utils.network_utils import get_open_port
+
+from ..utils import init_test_distributed_environment
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _has_sys_ptrace() -> bool:
+    """Check for SYS_PTRACE capability (bit 19 in CapEff)."""
+    try:
+        with open("/proc/self/status") as f:
+            for line in f:
+                if line.startswith("CapEff:"):
+                    return bool(int(line.split()[1], 16) & (1 << 19))
+    except Exception:
+        pass
+    return False
+
+
+def _spawn_workers(worker_fn, world_size, *, dp_size=None):
+    """Spawn one process per GPU, run worker_fn, assert all succeed.
+
+    Uses an mp.Queue to propagate worker tracebacks back to the parent
+    so pytest shows the actual failure, not just an exit code.
+    """
+    if mp.get_start_method(allow_none=True) is None:
+        mp.set_start_method("spawn")
+
+    port = str(get_open_port())
+    # Allocate a second port for DP master when dp_size is set, so the
+    # distributed init port and DP port can't collide even under xdist.
+    dp_port = str(get_open_port()) if dp_size is not None else None
+    err_queue: mp.Queue = mp.Queue()
+    procs = []
+    for rank in range(world_size):
+        p = mp.Process(
+            target=_run_worker,
+            args=(rank, world_size, port, worker_fn, dp_size, dp_port, err_queue),
+        )
+        p.start()
+        procs.append(p)
+    for p in procs:
+        p.join()
+
+    # Collect any errors from workers before asserting.
+    errors = []
+    while not err_queue.empty():
+        errors.append(err_queue.get_nowait())
+    err_queue.close()
+    err_queue.join_thread()
+    if errors:
+        pytest.fail("Worker(s) failed:\n" + "\n---\n".join(errors))
+
+
+def _run_worker(rank, world_size, port, worker_fn, dp_size, dp_port, err_queue):
+    """Per-process setup: device, distributed env, then call worker_fn.
+
+    Args:
+        dp_size: If set, initialize with tp=1 and data_parallel_size=dp_size.
+                 Otherwise use tp=world_size (default for EP-based tests).
+        dp_port: Separate port for the DP master (only used when dp_size is set).
+        err_queue: Queue for propagating tracebacks to the parent process.
+    """
+    try:
+        os.environ.pop("CUDA_VISIBLE_DEVICES", None)
+        torch.accelerator.set_device_index(rank)
+        if dp_size is not None:
+            _init_dp_environment(world_size, rank, port, dp_size, dp_port)
+        else:
+            init_test_distributed_environment(world_size, 1, rank, port)
+        worker_fn(rank, world_size)
+        torch.distributed.barrier()
+    except Exception:
+        err_queue.put(f"[Rank {rank}]\n{traceback.format_exc()}")
+        # Don't re-raise: the parent reads errors from err_queue.
+        # A non-zero exit from the re-raise would be redundant.
+        import sys
+
+        sys.exit(1)
+
+
+def _init_dp_environment(world_size, rank, port, dp_size, dp_port):
+    """Initialize distributed env with data parallelism.
+
+    Sets up tp=1, pp=1, dp=dp_size. Each process is one DP rank
+    with local rank 0 within its (trivial) tp*pp group.
+
+    Args:
+        port: Port for torch.distributed init.
+        dp_port: Separate port for the DP master group init.
+    """
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.config.parallel import ParallelConfig
+    from vllm.distributed.parallel_state import (
+        ensure_model_parallel_initialized,
+        init_distributed_environment,
+    )
+
+    vllm_config = VllmConfig()
+    vllm_config.parallel_config = ParallelConfig(
+        data_parallel_size=dp_size,
+        data_parallel_rank=rank,
+        # Pre-populate port list so __post_init__ doesn't auto-generate
+        # random ports. All DP ranks must agree on the same port.
+        _data_parallel_master_port_list=[int(dp_port)],
+    )
+    with set_current_vllm_config(vllm_config):
+        # rank=0 here because each DP rank has a single (tp=1,pp=1) process,
+        # so the local rank within the tp*pp group is always 0.
+        # init_distributed_environment will offset by data_parallel_rank.
+        init_distributed_environment(
+            world_size=1,  # tp * pp = 1
+            rank=0,
+            distributed_init_method=f"tcp://localhost:{port}",
+            local_rank=rank,
+        )
+        ensure_model_parallel_initialized(1, 1)
+
+
+def _make_forward_context(rank, world_size, num_tokens_per_rank):
+    """Create a forward context with mock DP metadata for AgRs tests.
+
+    Returns a context manager suitable for ``with`` statements.
+    The real DPMetadata (with sp_local_sizes etc.) is created internally
+    by set_forward_context from num_tokens_across_dp; the attn_metadata
+    placeholder just satisfies the "attn_metadata is not None" guard.
+    """
+    from vllm.config.parallel import ParallelConfig
+    from vllm.config.vllm import VllmConfig
+    from vllm.forward_context import set_forward_context
+
+    class _AttnMeta:
+        """Minimal placeholder so set_forward_context's
+        ``attn_metadata is not None`` guard (forward_context.py:334)
+        is satisfied. The real DPMetadata is built from num_tokens_across_dp."""
+
+        dp_metadata = None
+
+    vllm_config = VllmConfig()
+    vllm_config.parallel_config = ParallelConfig(
+        data_parallel_size=world_size,
+        is_moe_model=True,
+        data_parallel_rank=rank,
+    )
+    return set_forward_context(
+        _AttnMeta(),
+        vllm_config,
+        num_tokens=num_tokens_per_rank,
+        num_tokens_across_dp=torch.tensor(
+            [num_tokens_per_rank] * world_size, dtype=torch.int
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Skip conditions
+# ---------------------------------------------------------------------------
+
+requires_multi_gpu = pytest.mark.skipif(
+    torch.accelerator.device_count() < 2, reason="Need >= 2 GPUs"
+)
+requires_two_sided = pytest.mark.skipif(
+    not has_flashinfer_nvlink_two_sided(),
+    reason="FlashInfer NVLink two-sided not available",
+)
+requires_one_sided = pytest.mark.skipif(
+    not has_flashinfer_nvlink_one_sided(),
+    reason="FlashInfer NVLink one-sided not available",
+)
+requires_ptrace = pytest.mark.skipif(
+    not _has_sys_ptrace(),
+    reason="SYS_PTRACE required (docker run --cap-add=SYS_PTRACE)",
+)
+
+# NOTE: No module-level pytestmark here. The FlashInfer lifecycle tests have
+# their own @requires_two_sided / @requires_one_sided decorators, and
+# test_args_dispatch_combine uses only standard torch.distributed ops and
+# should run even when FlashInfer NVLink backends are not installed.
+
+
+# ---------------------------------------------------------------------------
+# Test 1: Two-sided manager lifecycle (init, cleanup, reinit, ensure_init)
+# ---------------------------------------------------------------------------
+#
+# Tests FlashInferNVLinkTwoSidedManager which wraps FlashInfer's MnnvlMoe.
+# initialize() allocates MNNVL shared workspaces via MnnvlMoe.get_moe_workspaces,
+# which uses pidfd_getfd() to share memory file descriptors across processes —
+# hence the SYS_PTRACE requirement.
+#
+# Uses EP group (get_ep_group) because the two-sided manager is constructed
+# with an EP-scoped communicator in production. With tp=world_size the EP
+# group spans all ranks, giving us a multi-rank group for testing.
+# ---------------------------------------------------------------------------
+
+
+def _two_sided_lifecycle_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import (
+        FlashInferNVLinkTwoSidedManager,
+    )
+
+    cpu_group = get_ep_group().cpu_group
+    num_gpus = torch.accelerator.device_count()
+    manager = FlashInferNVLinkTwoSidedManager(cpu_group)
+
+    # Not initialized yet
+    assert not manager.initialized
+    assert manager.rank == rank
+    assert manager.world_size == world_size
+
+    # Initialize
+    manager.initialize(world_size=world_size, rank=rank, gpus_per_node=num_gpus)
+    assert manager.initialized
+    assert manager.workspace_tensor is not None
+    assert manager.prepare_workspace_tensor is not None
+    assert manager.mapping is not None
+
+    torch.distributed.barrier()
+
+    # Cleanup
+    manager.cleanup()
+    assert not manager.initialized
+    assert manager.workspace_tensor is None
+    assert manager.prepare_workspace_tensor is None
+
+    torch.distributed.barrier()
+
+    # Reinitialize
+    manager.initialize(world_size=world_size, rank=rank, gpus_per_node=num_gpus)
+    assert manager.initialized
+
+    torch.distributed.barrier()
+
+    # ensure_alltoall_workspace_initialized is idempotent when already init'd
+    assert manager.ensure_alltoall_workspace_initialized()
+    assert manager.initialized
+
+    manager.cleanup()
+    assert not manager.initialized
+
+
+@requires_multi_gpu
+@requires_two_sided
+@requires_ptrace
+@pytest.mark.parametrize("world_size", [2])
+def test_two_sided_manager_lifecycle(world_size):
+    """Test init, cleanup, reinit, and ensure_initialized idempotency."""
+    _spawn_workers(_two_sided_lifecycle_worker, world_size)
+
+
+# ---------------------------------------------------------------------------
+# Test 2: One-sided manager lifecycle (init, cleanup, reinit)
+# ---------------------------------------------------------------------------
+#
+# Tests FlashInferNVLinkOneSidedManager which wraps FlashInfer's MoeAlltoAll.
+# initialize() creates MoeAlltoAll with an MnnvlConfig, which allocates MNNVL
+# shared workspaces — same cross-process memory sharing as two-sided, hence
+# the SYS_PTRACE requirement.
+#
+# Uses DP group (get_dp_group) because the one-sided manager's initialize()
+# internally calls get_dp_group() to set up the MnnvlConfig communicator.
+# We therefore need a real DP group with world_size > 1, which requires
+# dp_size=world_size via _init_dp_environment.
+# ---------------------------------------------------------------------------
+
+
+def _one_sided_lifecycle_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import (
+        FlashInferNVLinkOneSidedManager,
+    )
+    from vllm.distributed.parallel_state import get_dp_group
+
+    cpu_group = get_dp_group().cpu_group
+    manager = FlashInferNVLinkOneSidedManager(cpu_group)
+
+    assert not manager.initialized
+    assert manager.rank == rank
+    assert manager.world_size == world_size
+
+    init_kwargs = dict(
+        max_num_tokens=1024,
+        top_k=2,
+        num_experts=world_size * 8,
+        hidden_size=4096,
+    )
+
+    # Initialize
+    manager.initialize(**init_kwargs)
+    assert manager.initialized
+    assert manager.moe_alltoall is not None
+    assert manager.mapping is not None
+
+    torch.distributed.barrier()
+
+    # Cleanup
+    manager.cleanup()
+    assert not manager.initialized
+    assert manager.moe_alltoall is None
+
+    torch.distributed.barrier()
+
+    # Reinitialize with different token count
+    manager.initialize(**{**init_kwargs, "max_num_tokens": 2048})
+    assert manager.initialized
+
+    torch.distributed.barrier()
+    manager.cleanup()
+
+
+@requires_multi_gpu
+@requires_one_sided
+@requires_ptrace
+@pytest.mark.parametrize("world_size", [2])
+def test_one_sided_manager_lifecycle(world_size):
+    """Test init, cleanup, and reinit with different params."""
+    _spawn_workers(
+        _one_sided_lifecycle_worker,
+        world_size,
+        dp_size=world_size,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 2b: One-sided manager grows workspace across heterogeneous MoE layers
+# ---------------------------------------------------------------------------
+#
+# Models with heterogeneous MoE quantization — most notably a quantized base
+# MoE combined with an unquantized MTP head — can call initialize() multiple
+# times with different per-token dispatch payload sizes. The shared workspace
+# must grow to the union and the MoeAlltoAll must be rebuilt; otherwise a
+# later layer's combine call overruns the workspace sized for the first
+# layer's smaller payload and trips FlashInfer's combinePayloadOffset assert.
+# ---------------------------------------------------------------------------
+
+
+def _one_sided_workspace_grow_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import (
+        FlashInferNVLinkOneSidedManager,
+    )
+    from vllm.distributed.parallel_state import get_dp_group
+
+    cpu_group = get_dp_group().cpu_group
+    manager = FlashInferNVLinkOneSidedManager(cpu_group)
+
+    base_kwargs = dict(
+        max_num_tokens=1024,
+        top_k=2,
+        num_experts=world_size * 8,
+        hidden_size=4096,
+    )
+    nvfp4_kwargs = dict(
+        dispatch_dtype_bytes_per_elem=0,
+        dispatch_scale_bytes_per_token=base_kwargs["hidden_size"] // 16,
+    )
+    bf16_kwargs = dict(
+        dispatch_dtype_bytes_per_elem=2,
+        dispatch_scale_bytes_per_token=0,
+    )
+
+    # First init: NVFP4-like (hidden_bytes = hidden // 2 + hidden // 16).
+    manager.initialize(**base_kwargs, **nvfp4_kwargs)
+    assert manager.initialized
+    nvfp4_workspace_size = manager.workspace_size
+    nvfp4_moe_alltoall = manager.moe_alltoall
+
+    torch.distributed.barrier()
+
+    # Second init: bf16-like (hidden_bytes = hidden * 2). Models the case of
+    # a quantized base MoE followed by an unquantized MoE layer (e.g. an MTP
+    # head). Per-token dispatch payload is ~4x larger, so the union workspace
+    # must grow and MoeAlltoAll must be rebuilt.
+    manager.initialize(**base_kwargs, **bf16_kwargs)
+    assert manager.initialized
+    assert manager.workspace_size > nvfp4_workspace_size
+    assert manager.moe_alltoall is not nvfp4_moe_alltoall
+    bf16_workspace_size = manager.workspace_size
+    bf16_moe_alltoall = manager.moe_alltoall
+
+    torch.distributed.barrier()
+
+    # Third init: back to NVFP4-like shape. Existing workspace already covers
+    # it, so initialize() must no-op — no shrink, no rebuild.
+    manager.initialize(**base_kwargs, **nvfp4_kwargs)
+    assert manager.initialized
+    assert manager.workspace_size == bf16_workspace_size
+    assert manager.moe_alltoall is bf16_moe_alltoall
+
+    torch.distributed.barrier()
+    manager.cleanup()
+
+
+@requires_multi_gpu
+@requires_one_sided
+@requires_ptrace
+@pytest.mark.parametrize("world_size", [2])
+def test_one_sided_manager_workspace_grow(world_size):
+    """A later initialize() with a larger per-token payload must grow the
+    workspace and rebuild MoeAlltoAll; a later initialize() with a smaller
+    payload must no-op."""
+    _spawn_workers(
+        _one_sided_workspace_grow_worker,
+        world_size,
+        dp_size=world_size,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 3: AgRs dispatch/combine with value validation
+# ---------------------------------------------------------------------------
+#
+# Tests AgRsAll2AllManager which uses only standard torch.distributed
+# all_gatherv / reduce_scatterv — no FlashInfer or MNNVL dependency.
+# This test validates the reference all-to-all implementation that other
+# backends are compared against.
+# ---------------------------------------------------------------------------
+
+
+def _args_dispatch_combine_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import AgRsAll2AllManager
+    from vllm.forward_context import get_forward_context
+
+    cpu_group = get_ep_group().cpu_group
+    device = torch.device(f"cuda:{rank}")
+
+    hidden_size = 64
+    tokens_per_rank = 16
+    experts_per_token = 2
+    num_experts = world_size * 4
+    total_tokens = world_size * tokens_per_rank
+
+    # Deterministic per-rank data: rank r has value (r + 1)
+    hidden = torch.full(
+        (tokens_per_rank, hidden_size),
+        float(rank + 1),
+        device=device,
+        dtype=torch.float32,
+    )
+    router = torch.full(
+        (tokens_per_rank, num_experts),
+        float(rank + 1) * 10,
+        device=device,
+        dtype=torch.float32,
+    )
+    weights = torch.full(
+        (tokens_per_rank, experts_per_token),
+        float(rank + 1) * 100,
+        device=device,
+        dtype=torch.float32,
+    )
+    ids = torch.full(
+        (tokens_per_rank, experts_per_token),
+        rank,
+        device=device,
+        dtype=torch.long,
+    )
+
+    with _make_forward_context(rank, world_size, tokens_per_rank):
+        manager = AgRsAll2AllManager(cpu_group)
+        dp_metadata = get_forward_context().dp_metadata
+
+        with dp_metadata.sp_local_sizes(sequence_parallel_size=1):
+            # -- dispatch_router_logits --
+            d_hidden, d_router = manager.dispatch_router_logits(
+                hidden.clone(),
+                router.clone(),
+                is_sequence_parallel=True,
+            )
+            assert d_hidden.shape == (total_tokens, hidden_size)
+            assert d_router.shape == (total_tokens, num_experts)
+
+            for r in range(world_size):
+                s = r * tokens_per_rank
+                e = (r + 1) * tokens_per_rank
+                torch.testing.assert_close(
+                    d_hidden[s:e],
+                    torch.full_like(d_hidden[s:e], float(r + 1)),
+                )
+                torch.testing.assert_close(
+                    d_router[s:e],
+                    torch.full_like(d_router[s:e], float(r + 1) * 10),
+                )
+
+            # -- dispatch --
+            d_hidden2, d_weights, d_ids = manager.dispatch(
+                hidden.clone(),
+                weights.clone(),
+                ids.clone(),
+                is_sequence_parallel=True,
+            )
+            assert d_hidden2.shape == (total_tokens, hidden_size)
+            assert d_weights.shape == (total_tokens, experts_per_token)
+            assert d_ids.shape == (total_tokens, experts_per_token)
+
+            for r in range(world_size):
+                s = r * tokens_per_rank
+                e = (r + 1) * tokens_per_rank
+                torch.testing.assert_close(
+                    d_weights[s:e],
+                    torch.full_like(d_weights[s:e], float(r + 1) * 100),
+                )
+                assert (d_ids[s:e] == r).all()
+
+            # -- combine (reduce-scatter) --
+            # Each token i has value i in all columns; after reduce-scatter
+            # each rank gets its slice, summed across ranks.
+            expert_out = (
+                torch.arange(total_tokens, device=device, dtype=torch.float32)
+                .unsqueeze(1)
+                .expand(total_tokens, hidden_size)
+                .contiguous()
+            )
+
+            combined = manager.combine(expert_out, is_sequence_parallel=True)
+            assert combined.shape == (tokens_per_rank, hidden_size)
+
+            for i in range(tokens_per_rank):
+                expected_val = float(rank * tokens_per_rank + i) * world_size
+                torch.testing.assert_close(
+                    combined[i],
+                    torch.full_like(combined[i], expected_val),
+                )
+
+            torch.distributed.barrier()
+
+
+@requires_multi_gpu
+@pytest.mark.parametrize("world_size", [2])
+def test_args_dispatch_combine(world_size):
+    """Validate dispatch gathers all-rank data and combine reduces correctly."""
+    _spawn_workers(_args_dispatch_combine_worker, world_size)
+
+
+# ---------------------------------------------------------------------------
+# Test 4: FlashInfer two-sided dispatch/combine data communication
+# ---------------------------------------------------------------------------
+#
+# Tests actual data flow through the FlashInfer NVLink two-sided backend
+# by calling flashinfer_alltoall_dispatch (with defer_input_quant=True to
+# skip quantization) and flashinfer_alltoall_combine, then verifying exact
+# round-trip values. Dispatch sends each token once per distinct expert
+# rank, and combine performs an unweighted sum, so:
+#   dispatch(hidden) → identity → combine = hidden * num_distinct_ranks(i)
+# ---------------------------------------------------------------------------
+
+
+def _two_sided_data_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import (
+        FlashInferNVLinkTwoSidedManager,
+    )
+    from vllm.distributed.parallel_state import get_dp_group
+    from vllm.forward_context import get_forward_context
+    from vllm.model_executor.layers.fused_moe.config import (
+        FusedMoEQuantConfig,
+        FusedMoEQuantDesc,
+    )
+    from vllm.model_executor.layers.fused_moe.prepare_finalize.flashinfer_nvlink_two_sided import (  # noqa: E501
+        flashinfer_alltoall_combine,
+        flashinfer_alltoall_dispatch,
+    )
+
+    # Use DP group because MnnvlMoe workspace allocation calls get_dp_group()
+    # internally and requires dp_size == ep_size.
+    cpu_group = get_dp_group().cpu_group
+    device = torch.device(f"cuda:{rank}")
+    num_gpus = torch.accelerator.device_count()
+
+    hidden_size = 128
+    tokens_per_rank = 32
+    experts_per_token = 2
+    num_experts = world_size * 4
+
+    # Initialize the FlashInfer two-sided manager
+    manager = FlashInferNVLinkTwoSidedManager(cpu_group)
+    manager.initialize(world_size=world_size, rank=rank, gpus_per_node=num_gpus)
+    assert manager.initialized
+
+    torch.distributed.barrier()
+
+    # Create deterministic per-rank test data
+    torch.manual_seed(rank + 42)
+    hidden = torch.randn(
+        tokens_per_rank,
+        hidden_size,
+        device=device,
+        dtype=torch.bfloat16,
+    )
+    # Assign each token to experts spread across ranks so tokens move between GPUs
+    topk_ids = torch.randint(
+        0,
+        num_experts,
+        (tokens_per_rank, experts_per_token),
+        device=device,
+        dtype=torch.int32,
+    )
+    topk_weights = torch.rand(
+        tokens_per_rank,
+        experts_per_token,
+        device=device,
+        dtype=torch.float32,
+    )
+
+    # Unquantized config: quant_dtype=None means moe_kernel_quantize_input is a no-op
+    no_quant = FusedMoEQuantDesc()
+    quant_config = FusedMoEQuantConfig(
+        _a1=no_quant,
+        _a2=no_quant,
+        _w1=no_quant,
+        _w2=no_quant,
+    )
+    assert quant_config.quant_dtype is None  # sanity: no quantization
+
+    with _make_forward_context(rank, world_size, tokens_per_rank):
+        dp_metadata = get_forward_context().dp_metadata
+
+        with dp_metadata.sp_local_sizes(sequence_parallel_size=1):
+            local_sizes = dp_metadata.get_chunk_sizes_across_dp_rank()
+
+            # --- FlashInfer two-sided dispatch ---
+            alltoall_info, fi_topk_ids, fi_topk_weights, fi_hidden, fi_scale = (
+                flashinfer_alltoall_dispatch(
+                    manager,
+                    local_sizes,
+                    hidden.clone(),
+                    None,  # no global scale
+                    topk_ids.clone(),
+                    topk_weights.clone(),
+                    experts_per_token,
+                    num_experts,
+                    quant_config,
+                    defer_input_quant=True,
+                )
+            )
+            assert fi_scale is None  # deferred quant: no scale produced
+            assert fi_hidden is not None
+            assert fi_hidden.shape[1] == hidden_size
+            assert fi_hidden.numel() > 0
+
+            # --- Round-trip exact verification ---
+            # The all-to-all sends each token once per *distinct* expert
+            # rank. Combine performs an unweighted sum of the per-rank
+            # contributions. With identity expert (feeding dispatched
+            # hidden straight back):
+            #   result[i] = hidden[i] * num_distinct_expert_ranks(i)
+            combined = flashinfer_alltoall_combine(
+                manager,
+                fi_hidden,
+                top_k=experts_per_token,
+                token_count=tokens_per_rank,
+                alltoall_info=alltoall_info,
+            )
+            assert combined.shape == (tokens_per_rank, hidden_size)
+
+            experts_per_rank = num_experts // world_size
+            expert_ranks = topk_ids // experts_per_rank  # (tokens, top_k)
+            num_distinct = torch.tensor(
+                [len(set(row.tolist())) for row in expert_ranks],
+                device=device,
+                dtype=torch.float32,
+            ).unsqueeze(1)  # (tokens, 1)
+            expected = (hidden.float() * num_distinct).to(hidden.dtype)
+            torch.testing.assert_close(combined, expected)
+
+            # --- Linearity check with scaled expert output ---
+            # Scaling the expert output by a constant should scale the
+            # combined result by the same constant.
+            scale = 3.0
+            combined_scaled = flashinfer_alltoall_combine(
+                manager,
+                fi_hidden * scale,
+                top_k=experts_per_token,
+                token_count=tokens_per_rank,
+                alltoall_info=alltoall_info,
+            )
+            expected_scaled = (hidden.float() * num_distinct * scale).to(hidden.dtype)
+            torch.testing.assert_close(combined_scaled, expected_scaled)
+
+            torch.distributed.barrier()
+
+    manager.cleanup()
+
+
+@requires_multi_gpu
+@requires_two_sided
+@requires_ptrace
+@pytest.mark.parametrize("world_size", [2])
+def test_two_sided_dispatch_combine(world_size):
+    """Test FlashInfer two-sided dispatch/combine with exact value verification."""
+    _spawn_workers(_two_sided_data_worker, world_size, dp_size=world_size)
+
+
+# ---------------------------------------------------------------------------
+# Test 5: FlashInfer one-sided dispatch/combine data communication
+# ---------------------------------------------------------------------------
+#
+# Tests actual data flow through the FlashInfer NVLink one-sided backend
+# by calling MoeAlltoAll.dispatch() and MoeAlltoAll.combine() directly
+# with synthetic payloads, then verifying shapes and round-trip consistency.
+# ---------------------------------------------------------------------------
+
+
+def _one_sided_data_worker(rank, world_size):
+    from vllm.distributed.device_communicators.all2all import (
+        FlashInferNVLinkOneSidedManager,
+    )
+    from vllm.distributed.parallel_state import get_dp_group
+    from vllm.forward_context import get_forward_context
+
+    cpu_group = get_dp_group().cpu_group
+    device = torch.device(f"cuda:{rank}")
+
+    hidden_size = 256
+    tokens_per_rank = 32
+    experts_per_token = 2
+    num_experts = world_size * 8
+
+    # Initialize the one-sided manager
+    manager = FlashInferNVLinkOneSidedManager(cpu_group)
+    manager.initialize(
+        max_num_tokens=tokens_per_rank,
+        top_k=experts_per_token,
+        num_experts=num_experts,
+        hidden_size=hidden_size,
+    )
+    assert manager.initialized
+    assert manager.moe_alltoall is not None
+
+    with _make_forward_context(rank, world_size, tokens_per_rank):
+        dp_metadata = get_forward_context().dp_metadata
+
+        with dp_metadata.sp_local_sizes(sequence_parallel_size=1):
+            local_sizes = dp_metadata.get_chunk_sizes_across_dp_rank()
+            runtime_max_tokens = max(local_sizes)
+
+            # Create test data with raw tensors matching the nvfp4 payload
+            # sizes the workspace was allocated for:
+            #   a1q: (tokens, hidden_size // 2) — nvfp4 hidden states
+            #   a1q_scale: (tokens, hidden_size // 16) — fp8 scaling factors
+            torch.manual_seed(rank + 42)
+            a1q = torch.randint(
+                0,
+                256,
+                (tokens_per_rank, hidden_size // 2),
+                device=device,
+                dtype=torch.uint8,
+            )
+            a1q_scale = torch.randint(
+                0,
+                256,
+                (tokens_per_rank, hidden_size // 16),
+                device=device,
+                dtype=torch.uint8,
+            )
+            topk_ids = torch.randint(
+                0,
+                num_experts,
+                (tokens_per_rank, experts_per_token),
+                device=device,
+                dtype=torch.int32,
+            )
+            topk_weights = torch.rand(
+                tokens_per_rank,
+                experts_per_token,
+                device=device,
+                dtype=torch.float32,
+            )
+
+            # --- One-sided dispatch ---
+            payloads = [a1q, a1q_scale, topk_ids, topk_weights]
+            recv_payloads = manager.moe_alltoall.dispatch(
+                token_selected_experts=topk_ids,
+                input_payloads=payloads,
+                runtime_max_tokens_per_rank=runtime_max_tokens,
+            )
+            assert len(recv_payloads) == 4
+            recv_a1q, recv_scale, recv_ids, recv_weights = recv_payloads
+            assert recv_a1q.numel() > 0
+            assert recv_ids.numel() > 0
+
+            # --- Round-trip exact verification ---
+            # The dispatch routes each token once per *distinct* expert
+            # rank. Combine performs an unweighted sum of per-rank
+            # contributions. With constant expert output (all 1s):
+            #   result[i] = 1.0 * num_distinct_expert_ranks(i)
+            expert_output = torch.ones(
+                world_size,
+                runtime_max_tokens,
+                hidden_size,
+                device=device,
+                dtype=torch.bfloat16,
+            )
+            combined = manager.moe_alltoall.combine(
+                payload=expert_output,
+                runtime_max_tokens_per_rank=runtime_max_tokens,
+            )
+            assert combined.shape == (tokens_per_rank, hidden_size)
+
+            experts_per_rank = num_experts // world_size
+            expert_ranks = topk_ids // experts_per_rank  # (tokens, top_k)
+            num_distinct = torch.tensor(
+                [len(set(row.tolist())) for row in expert_ranks],
+                device=device,
+                dtype=torch.bfloat16,
+            ).unsqueeze(1)  # (tokens, 1)
+            expected = num_distinct.expand_as(combined)
+            torch.testing.assert_close(combined, expected)
+
+            # --- Linearity check with scaled expert output ---
+            # Scaling the expert output by a constant should scale the
+            # combined result by the same constant.
+            # Re-dispatch to reset internal state (one-sided requires a
+            # fresh dispatch before each combine).
+            manager.moe_alltoall.dispatch(
+                token_selected_experts=topk_ids,
+                input_payloads=payloads,
+                runtime_max_tokens_per_rank=runtime_max_tokens,
+            )
+            scale = 3.0
+            combined_scaled = manager.moe_alltoall.combine(
+                payload=expert_output * scale,
+                runtime_max_tokens_per_rank=runtime_max_tokens,
+            )
+            expected_scaled = (expected * scale).to(torch.bfloat16)
+            torch.testing.assert_close(combined_scaled, expected_scaled)
+
+            torch.distributed.barrier()
+
+    manager.cleanup()
+
+
+@requires_multi_gpu
+@requires_one_sided
+@requires_ptrace
+@pytest.mark.parametrize("world_size", [2])
+def test_one_sided_dispatch_combine(world_size):
+    """Test FlashInfer one-sided dispatch/combine with actual data flow."""
+    _spawn_workers(_one_sided_data_worker, world_size, dp_size=world_size)
diff --git a/tests/distributed/test_packed_tensor.py b/tests/distributed/test_packed_tensor.py
index 134629e2b790..6a2ab78446ae 100644
--- a/tests/distributed/test_packed_tensor.py
+++ b/tests/distributed/test_packed_tensor.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Tests for packed tensor broadcasting functionality.
 
-Unit tests for packed_broadcast_producer and packed_broadcast_consumer.
+Unit tests for packed_nccl_broadcast_producer and packed_nccl_broadcast_consumer.
 These utilities enable efficient batched tensor transfer over NCCL.
 """
 
@@ -11,8 +11,12 @@
 
 from vllm.distributed.weight_transfer.nccl_engine import NCCLWeightTransferUpdateInfo
 from vllm.distributed.weight_transfer.packed_tensor import (
-    packed_broadcast_consumer,
-    packed_broadcast_producer,
+    pack_tensors,
+    packed_ipc_consumer,
+    packed_ipc_producer,
+    packed_nccl_broadcast_consumer,
+    packed_nccl_broadcast_producer,
+    unpack_tensor,
 )
 
 
@@ -90,91 +94,18 @@ def test_packed_can_be_set_true(self):
         assert info.packed is True
 
 
-# --- Unit Tests: packed_broadcast_producer ---
+# --- Unit Tests: packed_nccl_broadcast_producer ---
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
 class TestPackedBroadcastProducer:
-    """Test packed_broadcast_producer function."""
-
-    def test_producer_broadcasts_tensors(self):
-        """Test that producer broadcasts all tensors."""
-        params = create_mock_model_params()
-        params_cuda = [(name, tensor.cuda()) for name, tensor in params]
-
-        mock_group = MockCommunicationGroup()
-
-        # Use a small target size to force multiple batches
-        packed_broadcast_producer(
-            iterator=iter(params_cuda),
-            group=mock_group,
-            src=0,
-            post_iter_func=lambda x: x[1],
-            buffer_size_bytes=500,
-        )
-
-        # Should have broadcasted some tensors
-        assert mock_group.broadcast_count > 0
-        assert len(mock_group.broadcasted_tensors) > 0
-
-    def test_producer_single_large_tensor(self):
-        """Test with a single tensor larger than target size."""
-        # Create a large tensor
-        large_tensor = torch.randn(1000, 1000, dtype=torch.float32).cuda()
-        params = [("large_weight", large_tensor)]
-
-        mock_group = MockCommunicationGroup()
-
-        # Small target size to force the tensor to exceed it
-        packed_broadcast_producer(
-            iterator=iter(params),
-            group=mock_group,
-            src=0,
-            post_iter_func=lambda x: x[1],
-            buffer_size_bytes=100,
-        )
-
-        # Should still broadcast the tensor (at least 1 broadcast)
-        assert mock_group.broadcast_count >= 1
-        assert len(mock_group.broadcasted_tensors) >= 1
-
-        # Verify the total broadcasted size matches the tensor
-        expected_size = large_tensor.numel() * large_tensor.element_size()
-        actual_size = sum(t.numel() for t in mock_group.broadcasted_tensors)
-        assert actual_size == expected_size
-
-    def test_producer_multiple_batches(self):
-        """Test that tensors are properly batched when exceeding target size."""
-        # Create many small tensors
-        params = [
-            (f"weight_{i}", torch.randn(10, 10, dtype=torch.float32).cuda())
-            for i in range(20)
-        ]
-
-        mock_group = MockCommunicationGroup()
-
-        # Small target size to force multiple batches
-        packed_broadcast_producer(
-            iterator=iter(params),
-            group=mock_group,
-            src=0,
-            post_iter_func=lambda x: x[1],
-            buffer_size_bytes=2000,
-        )
-
-        # Should have multiple broadcasts
-        assert mock_group.broadcast_count > 1
-
-        # Total size should match sum of all tensors
-        expected_total = sum(t.numel() * t.element_size() for _, t in params)
-        actual_total = sum(t.numel() for t in mock_group.broadcasted_tensors)
-        assert actual_total == expected_total
+    """Test packed_nccl_broadcast_producer function."""
 
     def test_producer_empty_iterator(self):
         """Test producer handles empty iterator gracefully."""
         mock_group = MockCommunicationGroup()
 
-        packed_broadcast_producer(
+        packed_nccl_broadcast_producer(
             iterator=iter([]),
             group=mock_group,
             src=0,
@@ -186,64 +117,6 @@ def test_producer_empty_iterator(self):
         assert mock_group.broadcast_count == 0
 
 
-# --- Unit Tests: packed_broadcast_consumer ---
-
-
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-class TestPackedBroadcastConsumer:
-    """Test packed_broadcast_consumer function."""
-
-    def test_consumer_receives_tensors(self):
-        """Test that consumer receives and unpacks tensors."""
-        params = create_mock_model_params()
-        params_cuda = [(name, tensor.cuda()) for name, tensor in params]
-
-        buffer_size = 2000
-
-        # First, run producer to get the broadcasted tensors
-        producer_group = MockCommunicationGroup()
-
-        packed_broadcast_producer(
-            iterator=iter(params_cuda),
-            group=producer_group,
-            src=0,
-            post_iter_func=lambda x: x[1],
-            buffer_size_bytes=buffer_size,
-        )
-
-        # Now run consumer with the broadcasted tensors
-        consumer_group = MockConsumerCommunicationGroup(
-            producer_group.broadcasted_tensors
-        )
-
-        state_dict_info = create_state_dict_info(params_cuda)
-
-        unpacked_tensors = {}
-
-        def post_unpack_func(tensor_list):
-            for name, tensor in tensor_list:
-                unpacked_tensors[name] = tensor.clone()
-
-        packed_broadcast_consumer(
-            iterator=iter(state_dict_info.items()),
-            group=consumer_group,
-            src=0,
-            post_unpack_func=post_unpack_func,
-            buffer_size_bytes=buffer_size,
-        )
-
-        # Verify all parameters were unpacked
-        assert len(unpacked_tensors) == len(params)
-
-        # Verify each tensor matches the original
-        for name, original_tensor in params_cuda:
-            assert name in unpacked_tensors
-            unpacked = unpacked_tensors[name]
-            assert unpacked.shape == original_tensor.shape
-            assert unpacked.dtype == original_tensor.dtype
-            assert torch.allclose(unpacked, original_tensor, rtol=1e-5, atol=1e-7)
-
-
 # --- Integration Tests: Producer-Consumer Roundtrip ---
 
 
@@ -260,7 +133,7 @@ def test_roundtrip_different_dtypes(self, dtype):
         buffer_size = 1000
         producer_group = MockCommunicationGroup()
 
-        packed_broadcast_producer(
+        packed_nccl_broadcast_producer(
             iterator=iter(params_cuda),
             group=producer_group,
             src=0,
@@ -279,7 +152,7 @@ def post_unpack_func(tensor_list):
             for name, tensor in tensor_list:
                 unpacked_tensors[name] = tensor.clone()
 
-        packed_broadcast_consumer(
+        packed_nccl_broadcast_consumer(
             iterator=iter(state_dict_info.items()),
             group=consumer_group,
             src=0,
@@ -306,7 +179,7 @@ def test_roundtrip_mixed_dtypes(self):
         buffer_size = 500
         producer_group = MockCommunicationGroup()
 
-        packed_broadcast_producer(
+        packed_nccl_broadcast_producer(
             iterator=iter(params),
             group=producer_group,
             src=0,
@@ -325,7 +198,7 @@ def post_unpack_func(tensor_list):
             for name, tensor in tensor_list:
                 unpacked_tensors[name] = tensor.clone()
 
-        packed_broadcast_consumer(
+        packed_nccl_broadcast_consumer(
             iterator=iter(state_dict_info.items()),
             group=consumer_group,
             src=0,
@@ -341,7 +214,7 @@ def post_unpack_func(tensor_list):
             assert unpacked.dtype == original_tensor.dtype
             assert torch.allclose(unpacked, original_tensor, rtol=1e-4, atol=1e-6)
 
-    @pytest.mark.parametrize("target_size", [100, 1000, 10000, 100000])
+    @pytest.mark.parametrize("target_size", [100, 100000])
     def test_roundtrip_different_batch_sizes(self, target_size):
         """Test roundtrip with different target batch sizes."""
         params = create_mock_model_params(num_layers=5)
@@ -349,7 +222,7 @@ def test_roundtrip_different_batch_sizes(self, target_size):
 
         producer_group = MockCommunicationGroup()
 
-        packed_broadcast_producer(
+        packed_nccl_broadcast_producer(
             iterator=iter(params_cuda),
             group=producer_group,
             src=0,
@@ -368,7 +241,7 @@ def post_unpack_func(tensor_list):
             for name, tensor in tensor_list:
                 unpacked_tensors[name] = tensor.clone()
 
-        packed_broadcast_consumer(
+        packed_nccl_broadcast_consumer(
             iterator=iter(state_dict_info.items()),
             group=consumer_group,
             src=0,
@@ -407,7 +280,7 @@ def test_roundtrip_non_contiguous_tensors(self):
         buffer_size = 500
         producer_group = MockCommunicationGroup()
 
-        packed_broadcast_producer(
+        packed_nccl_broadcast_producer(
             iterator=iter(params),
             group=producer_group,
             src=0,
@@ -426,7 +299,7 @@ def post_unpack_func(tensor_list):
             for name, tensor in tensor_list:
                 unpacked_tensors[name] = tensor.clone()
 
-        packed_broadcast_consumer(
+        packed_nccl_broadcast_consumer(
             iterator=iter(state_dict_info.items()),
             group=consumer_group,
             src=0,
@@ -441,3 +314,462 @@ def post_unpack_func(tensor_list):
             assert unpacked.shape == original_tensor.shape
             assert unpacked.dtype == original_tensor.dtype
             assert torch.allclose(unpacked, original_tensor, rtol=1e-4, atol=1e-6)
+
+
+# --- Unit Tests: unpack_tensor ---
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+class TestUnpackTensor:
+    """Test the shared unpack_tensor function."""
+
+    def test_unpack_produces_independent_copies(self):
+        """Verify unpacked tensors don't share memory with packed buffer."""
+        original = torch.randn(10, dtype=torch.float32).cuda()
+        packed = original.contiguous().view(torch.uint8).view(-1)
+
+        result = unpack_tensor(
+            packed,
+            names=["w"],
+            shapes=[[10]],
+            dtypes=[torch.float32],
+            tensor_sizes=[packed.numel()],
+        )
+
+        # Mutate the packed buffer
+        packed.zero_()
+
+        # Unpacked tensor should be unaffected
+        assert torch.allclose(result[0][1], original)
+
+
+# --- Unit Tests: pack_tensors ---
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+class TestPackTensors:
+    """Test the shared pack_tensors function."""
+
+    def test_pack_basic(self):
+        """Test packing a few tensors into one buffer."""
+        params = [
+            ("w1", torch.randn(10, 20, dtype=torch.float32).cuda()),
+            ("w2", torch.randn(5, dtype=torch.float16).cuda()),
+        ]
+
+        chunk = pack_tensors(
+            iterator=iter(params),
+            post_iter_func=lambda x: x[1],
+            buffer_size_bytes=10_000_000,
+        )
+
+        assert chunk is not None
+        assert len(chunk.names) == 2
+        assert chunk.names == ["w1", "w2"]
+        assert chunk.shapes == [[10, 20], [5]]
+        assert chunk.dtypes == [torch.float32, torch.float16]
+        assert chunk.packed_tensor.dtype == torch.uint8
+
+    def test_pack_respects_buffer_limit(self):
+        """Test that packing stops when buffer_size_bytes is exceeded."""
+        params = [
+            (f"w{i}", torch.randn(100, 100, dtype=torch.float32).cuda())
+            for i in range(10)
+        ]
+
+        chunk = pack_tensors(
+            iterator=iter(params),
+            post_iter_func=lambda x: x[1],
+            buffer_size_bytes=50_000,
+        )
+
+        assert chunk is not None
+        assert len(chunk.names) < 10
+
+    def test_pack_empty_iterator(self):
+        """Test that an empty iterator returns None."""
+        chunk = pack_tensors(
+            iterator=iter([]),
+            post_iter_func=lambda x: x[1],
+            buffer_size_bytes=1000,
+        )
+        assert chunk is None
+
+    def test_pack_single_tensor_larger_than_buffer_warns(self):
+        """Test that a tensor exceeding buffer_size_bytes emits a warning."""
+        big = torch.randn(1000, 1000, dtype=torch.float32).cuda()
+        params = [("big", big)]
+
+        import warnings
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            chunk = pack_tensors(
+                iterator=iter(params),
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=100,
+            )
+        assert chunk is not None
+        assert len(chunk.names) == 1
+        assert any("exceeds buffer_size_bytes" in str(wi.message) for wi in w)
+
+    def test_pack_unpack_roundtrip(self):
+        """Test pack then unpack produces identical tensors."""
+        params = [
+            ("a", torch.randn(8, 16, dtype=torch.float32).cuda()),
+            ("b", torch.randn(4, dtype=torch.float16).cuda()),
+            ("c", torch.randn(3, 5, 7, dtype=torch.bfloat16).cuda()),
+        ]
+
+        chunk = pack_tensors(
+            iterator=iter(params),
+            post_iter_func=lambda x: x[1],
+            buffer_size_bytes=10_000_000,
+        )
+
+        assert chunk is not None
+        result = unpack_tensor(
+            chunk.packed_tensor,
+            chunk.names,
+            chunk.shapes,
+            chunk.dtypes,
+            chunk.tensor_sizes,
+        )
+
+        assert len(result) == len(params)
+        for (orig_name, orig_tensor), (res_name, res_tensor) in zip(params, result):
+            assert orig_name == res_name
+            assert res_tensor.shape == orig_tensor.shape
+            assert res_tensor.dtype == orig_tensor.dtype
+            assert torch.allclose(res_tensor, orig_tensor, rtol=1e-4, atol=1e-6)
+
+    def test_pack_multiple_chunks(self):
+        """Test consuming an iterator across multiple pack_tensors calls."""
+        params = [
+            (f"w{i}", torch.randn(50, 50, dtype=torch.float32).cuda()) for i in range(6)
+        ]
+        it = iter(params)
+
+        all_names = []
+        chunks = []
+        while True:
+            chunk = pack_tensors(it, lambda x: x[1], buffer_size_bytes=12_000)
+            if chunk is None:
+                break
+            chunks.append(chunk)
+            all_names.extend(chunk.names)
+
+        assert len(chunks) > 1
+        assert all_names == [f"w{i}" for i in range(6)]
+
+
+# --- Unit Tests: packed_ipc_producer ---
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+class TestPackedIpcProducer:
+    """Test the packed_ipc_producer generator."""
+
+    def test_producer_yields_chunks(self):
+        """Test that the producer yields PackedIpcChunk objects."""
+        params = [
+            (f"w{i}", torch.randn(50, 50, dtype=torch.float32).cuda()) for i in range(6)
+        ]
+
+        chunks = list(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid="test-uuid",
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=12_000,
+            )
+        )
+
+        assert len(chunks) > 1
+
+    def test_producer_ipc_handle_has_uuid(self):
+        """Test that each chunk's ipc_handle is keyed by the given UUID."""
+        params = [("w", torch.randn(10, dtype=torch.float32).cuda())]
+
+        chunks = list(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid="my-gpu-uuid",
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            )
+        )
+
+        assert "my-gpu-uuid" in chunks[0].ipc_handle
+
+    def test_producer_dtype_names_are_strings(self):
+        """Test that dtype_names are string representations."""
+        params = [
+            ("a", torch.randn(10, dtype=torch.float32).cuda()),
+            ("b", torch.randn(10, dtype=torch.float16).cuda()),
+        ]
+
+        chunks = list(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid="uuid",
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            )
+        )
+
+        assert chunks[0].dtype_names == ["float32", "float16"]
+
+    def test_producer_empty_iterator(self):
+        """Test producer with empty iterator yields nothing."""
+        chunks = list(
+            packed_ipc_producer(
+                iterator=iter([]),
+                gpu_uuid="uuid",
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=1000,
+            )
+        )
+        assert len(chunks) == 0
+
+
+# --- Integration Tests: IPC Producer-Consumer Roundtrip ---
+
+
+def _ipc_consumer_worker(cmd_q, ack_q, result_q, done_event, device_index):
+    """Worker that consumes chunks streamed one at a time from the parent.
+
+    CUDA IPC requires the consumer to be in a separate process from the
+    producer. The producer reuses a single IPC buffer between chunks, so
+    the parent must wait for our ack (sent after we copy the chunk to
+    CPU) before advancing the producer.
+    """
+    try:
+        torch.accelerator.set_device_index(device_index)
+        all_results = []
+        while True:
+            cd = cmd_q.get()
+            if cd is None:
+                break
+            result = packed_ipc_consumer(
+                ipc_handle=cd["ipc_handle"],
+                names=cd["names"],
+                shapes=cd["shapes"],
+                dtype_names=cd["dtype_names"],
+                tensor_sizes=cd["tensor_sizes"],
+                device_index=device_index,
+            )
+            # .cpu() forces a GPU→CPU copy off the shared IPC buffer, so
+            # the producer is free to overwrite it once we ack.
+            all_results.extend([(name, tensor.cpu()) for name, tensor in result])
+            del result
+            ack_q.put("ack")
+        result_q.put(("ok", all_results))
+    except Exception as e:
+        result_q.put(("error", str(e)))
+    # Keep the process alive until the parent has finished reading from
+    # the result queue — torch serializes CPU tensors via fd sharing,
+    # which requires this process's resource-sharer server to be running.
+    done_event.wait(timeout=60)
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+class TestPackedIpcRoundtrip:
+    """Test IPC producer-consumer roundtrip using real CUDA IPC.
+
+    These tests spawn a child process for the consumer because
+    rebuild_cuda_tensor requires a separate process from the one that
+    called reduce_tensor.
+    """
+
+    def _get_gpu_uuid(self) -> str:
+        device_index = torch.cuda.current_device()
+        props = torch.cuda.get_device_properties(device_index)
+        return str(props.uuid)
+
+    def _run_roundtrip(self, chunk_iter, device_index, timeout=30):
+        """Stream chunks through a child consumer one at a time.
+
+        ``packed_ipc_producer`` reuses a single IPC buffer for every
+        chunk, so the producer must not be advanced until the consumer
+        has finished reading the current chunk. We enforce that with an
+        ack queue: the consumer puts ``"ack"`` after it has copied the
+        chunk to CPU, and only then do we pull the next chunk from the
+        generator.
+
+        Returns ``(num_chunks, results)``.
+        """
+        import multiprocessing as mp
+
+        ctx = mp.get_context("spawn")
+        cmd_q = ctx.Queue()
+        ack_q = ctx.Queue()
+        result_q = ctx.Queue()
+        done_event = ctx.Event()
+        proc = ctx.Process(
+            target=_ipc_consumer_worker,
+            args=(cmd_q, ack_q, result_q, done_event, device_index),
+        )
+        proc.start()
+
+        num_chunks = 0
+        try:
+            for chunk in chunk_iter:
+                cmd_q.put(
+                    {
+                        "ipc_handle": chunk.ipc_handle,
+                        "names": chunk.names,
+                        "shapes": chunk.shapes,
+                        "dtype_names": chunk.dtype_names,
+                        "tensor_sizes": chunk.tensor_sizes,
+                    }
+                )
+                if ack_q.get(timeout=timeout) != "ack":
+                    raise RuntimeError("Consumer did not ack chunk")
+                num_chunks += 1
+            cmd_q.put(None)
+            status, payload = result_q.get(timeout=timeout)
+        finally:
+            done_event.set()
+            proc.join(timeout=10)
+            if proc.is_alive():
+                proc.kill()
+
+        if status == "error":
+            raise RuntimeError(f"Consumer process failed: {payload}")
+        # Reclaim IPC-shared memory now that the child has released it
+        torch.cuda.ipc_collect()
+        return num_chunks, payload
+
+    def test_roundtrip_basic(self):
+        """Test basic IPC producer -> consumer roundtrip."""
+        params = [
+            ("w1", torch.randn(10, 20, dtype=torch.float32).cuda()),
+            ("w2", torch.randn(5, dtype=torch.float16).cuda()),
+        ]
+        gpu_uuid = self._get_gpu_uuid()
+        device_index = torch.cuda.current_device()
+
+        num_chunks, result = self._run_roundtrip(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid=gpu_uuid,
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            ),
+            device_index,
+        )
+
+        assert num_chunks == 1
+        assert len(result) == 2
+        for (orig_name, orig_tensor), (res_name, res_tensor) in zip(params, result):
+            assert orig_name == res_name
+            assert res_tensor.shape == orig_tensor.shape
+            assert res_tensor.dtype == orig_tensor.dtype
+            assert torch.allclose(res_tensor, orig_tensor.cpu(), rtol=1e-4, atol=1e-6)
+
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16])
+    def test_roundtrip_dtypes(self, dtype):
+        """Test IPC roundtrip with different dtypes."""
+        params = create_mock_model_params(num_layers=2, dtype=dtype)
+        params_cuda = [(n, t.cuda()) for n, t in params]
+        gpu_uuid = self._get_gpu_uuid()
+        device_index = torch.cuda.current_device()
+
+        _, result = self._run_roundtrip(
+            packed_ipc_producer(
+                iterator=iter(params_cuda),
+                gpu_uuid=gpu_uuid,
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            ),
+            device_index,
+        )
+
+        assert len(result) == len(params_cuda)
+        for (orig_name, orig_tensor), (res_name, res_tensor) in zip(
+            params_cuda, result
+        ):
+            assert orig_name == res_name
+            assert res_tensor.dtype == dtype
+            assert torch.allclose(res_tensor, orig_tensor.cpu(), rtol=1e-4, atol=1e-6)
+
+    def test_roundtrip_multiple_chunks(self):
+        """Test IPC roundtrip across multiple chunks."""
+        params = [
+            (f"layer{i}.weight", torch.randn(100, 100, dtype=torch.float32).cuda())
+            for i in range(8)
+        ]
+        gpu_uuid = self._get_gpu_uuid()
+        device_index = torch.cuda.current_device()
+
+        num_chunks, result = self._run_roundtrip(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid=gpu_uuid,
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=50_000,
+            ),
+            device_index,
+        )
+
+        assert num_chunks > 1
+        assert len(result) == len(params)
+        for (orig_name, orig_tensor), (res_name, res_tensor) in zip(params, result):
+            assert orig_name == res_name
+            assert torch.allclose(res_tensor, orig_tensor.cpu(), rtol=1e-5, atol=1e-7)
+
+    def test_roundtrip_non_contiguous(self):
+        """Test IPC roundtrip with non-contiguous tensors."""
+        params = [
+            ("transposed", torch.randn(20, 10, dtype=torch.float32).cuda().T),
+            ("sliced", torch.randn(40, 30, dtype=torch.float16).cuda()[::2, ::2]),
+        ]
+        gpu_uuid = self._get_gpu_uuid()
+        device_index = torch.cuda.current_device()
+
+        for _, t in params:
+            assert not t.is_contiguous()
+
+        _, result = self._run_roundtrip(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid=gpu_uuid,
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            ),
+            device_index,
+        )
+
+        for (orig_name, orig_tensor), (res_name, res_tensor) in zip(params, result):
+            assert orig_name == res_name
+            assert res_tensor.shape == orig_tensor.shape
+            assert res_tensor.dtype == orig_tensor.dtype
+            assert torch.allclose(res_tensor, orig_tensor.cpu(), rtol=1e-4, atol=1e-6)
+
+    def test_consumer_wrong_uuid_raises(self):
+        """Test that consumer raises ValueError for unknown GPU UUID."""
+        params = [("w", torch.randn(10, dtype=torch.float32).cuda())]
+        gpu_uuid = self._get_gpu_uuid()
+
+        chunks = list(
+            packed_ipc_producer(
+                iterator=iter(params),
+                gpu_uuid=gpu_uuid,
+                post_iter_func=lambda x: x[1],
+                buffer_size_bytes=10_000_000,
+            )
+        )
+
+        c = chunks[0]
+        fake_handle = {"fake-uuid-12345": c.ipc_handle[gpu_uuid]}
+
+        with pytest.raises(ValueError, match="IPC handle not found"):
+            packed_ipc_consumer(
+                ipc_handle=fake_handle,
+                names=c.names,
+                shapes=c.shapes,
+                dtype_names=c.dtype_names,
+                tensor_sizes=c.tensor_sizes,
+                device_index=torch.cuda.current_device(),
+            )
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index 3a05440e41cc..2742663093f2 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -349,7 +349,14 @@ def _compare_tp(
         "mp",
     ]
 
-    compare_two_settings(model_id, pp_args, tp_args, pp_env, tp_env, method=method)
+    compare_two_settings(
+        model_id,
+        pp_args,
+        tp_args,
+        pp_env,
+        tp_env,
+        method=method,
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/distributed/test_quick_all_reduce.py b/tests/distributed/test_quick_all_reduce.py
index 9fbc4e0e9ca6..6c1ae9dda301 100644
--- a/tests/distributed/test_quick_all_reduce.py
+++ b/tests/distributed/test_quick_all_reduce.py
@@ -11,7 +11,14 @@
 
 from vllm import _custom_ops as ops
 from vllm.distributed.communication_op import tensor_model_parallel_all_reduce  # noqa
+from vllm.distributed.device_communicators.quick_all_reduce import (
+    KB,
+    MB,
+    QuickAllReduce,
+    QuickReduceRegime,
+)
 from vllm.distributed.parallel_state import get_tp_group, graph_capture
+from vllm.envs import disable_envs_cache
 from vllm.platforms import current_platform
 
 from ..utils import (
@@ -28,6 +35,177 @@
     test_sizes[i] -= v % 8
 
 
+@pytest.fixture
+def envs_cache_disabled():
+    disable_envs_cache()
+    yield
+    disable_envs_cache()
+
+
+def _make_quick_allreduce_for_test(
+    min_size_mb: int | None = None,
+    quantization_min_size: int | None = None,
+) -> QuickAllReduce:
+    quick_reduce = QuickAllReduce.__new__(QuickAllReduce)
+    quick_reduce.disabled = False
+    quick_reduce.qr_max_size = 16 * MB
+    quick_reduce.qr_min_size = min_size_mb * MB if min_size_mb is not None else None
+    quick_reduce.qr_quant_level = QuickReduceRegime.INT4
+    quick_reduce.qr_quantization_min_size = quantization_min_size
+    quick_reduce.use_fp16_kernels = False
+    quick_reduce.world_size = 2
+    return quick_reduce
+
+
+def test_should_quick_allreduce_uses_builtin_min_size_when_unset():
+    quick_reduce = _make_quick_allreduce_for_test(min_size_mb=None)
+
+    below_builtin_min = torch.empty(MB // 4, dtype=torch.float16)
+    at_builtin_min = torch.empty(MB // 2, dtype=torch.float16)
+
+    assert not quick_reduce.should_quick_allreduce(below_builtin_min)
+    assert quick_reduce.should_quick_allreduce(at_builtin_min)
+
+
+def test_should_quick_allreduce_uses_min_size_override():
+    quick_reduce = _make_quick_allreduce_for_test(min_size_mb=0)
+
+    below_builtin_min = torch.empty(8, dtype=torch.float16)
+
+    assert quick_reduce.should_quick_allreduce(below_builtin_min)
+
+
+def test_quick_allreduce_min_size_env_unset(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.delenv("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", raising=False)
+
+    assert QuickAllReduce._get_qr_min_size(qr_max_size=16 * MB) is None
+
+
+def test_quick_allreduce_min_size_env_converts_mb_to_bytes(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", "4")
+
+    assert QuickAllReduce._get_qr_min_size(qr_max_size=16 * MB) == 4 * MB
+
+
+def test_quick_allreduce_min_size_env_rejects_negative(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", "-1")
+
+    with pytest.raises(ValueError, match="must be non-negative"):
+        QuickAllReduce._get_qr_min_size(qr_max_size=16 * MB)
+
+
+def test_quick_allreduce_min_size_env_allows_equal_to_max(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", "16")
+
+    assert QuickAllReduce._get_qr_min_size(qr_max_size=16 * MB) == 16 * MB
+
+
+def test_quick_allreduce_min_size_env_rejects_larger_than_max(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", "17")
+
+    with pytest.raises(ValueError, match="effective QuickReduce max size"):
+        QuickAllReduce._get_qr_min_size(qr_max_size=16 * MB)
+
+
+def test_quick_allreduce_quantization_min_size_env_unset(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.delenv("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB", raising=False)
+
+    assert QuickAllReduce._get_qr_quantization_min_size() is None
+
+
+def test_quick_allreduce_quantization_min_size_env_converts_kb_to_bytes(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB", "2048")
+
+    assert QuickAllReduce._get_qr_quantization_min_size() == 2048 * KB
+
+
+def test_quick_allreduce_quantization_min_size_env_rejects_negative(
+    monkeypatch: pytest.MonkeyPatch,
+    envs_cache_disabled,
+):
+    monkeypatch.setenv("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB", "-1")
+
+    with pytest.raises(ValueError, match="must be non-negative"):
+        QuickAllReduce._get_qr_quantization_min_size()
+
+
+def test_quick_allreduce_quantization_min_size_unset_uses_configured_codec():
+    quick_reduce = _make_quick_allreduce_for_test(quantization_min_size=None)
+    inp = torch.empty(8, dtype=torch.float16)
+
+    assert quick_reduce._get_qr_quant_level(inp) == QuickReduceRegime.INT4.value
+
+
+def test_quick_allreduce_quantization_min_size_uses_fp_below_threshold():
+    quick_reduce = _make_quick_allreduce_for_test(quantization_min_size=2048)
+    inp = torch.empty(1024 // 2, dtype=torch.float16)
+
+    assert quick_reduce._get_qr_quant_level(inp) == QuickReduceRegime.FP.value
+
+
+def test_quick_allreduce_quantization_min_size_uses_configured_codec_at_threshold():
+    quick_reduce = _make_quick_allreduce_for_test(quantization_min_size=2048)
+    inp = torch.empty(2048 // 2, dtype=torch.float16)
+
+    assert quick_reduce._get_qr_quant_level(inp) == QuickReduceRegime.INT4.value
+
+
+def test_quick_allreduce_quantization_min_size_does_not_change_eligibility():
+    quick_reduce = _make_quick_allreduce_for_test(quantization_min_size=2 * MB)
+
+    below_builtin_min = torch.empty(MB // 4, dtype=torch.float16)
+    at_builtin_min = torch.empty(MB // 2, dtype=torch.float16)
+
+    assert not quick_reduce.should_quick_allreduce(below_builtin_min)
+    assert quick_reduce.should_quick_allreduce(at_builtin_min)
+
+
+def test_quick_allreduce_passes_dynamic_quant_level(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    quick_reduce = _make_quick_allreduce_for_test(quantization_min_size=2 * KB)
+    quick_reduce._ptr = object()
+    inp = torch.empty(KB // 2, dtype=torch.float16)
+    called_quant_level = None
+
+    def fake_qr_all_reduce(
+        fa,
+        inp,
+        out,
+        quant_level,
+        cast_bf2half,
+    ):
+        nonlocal called_quant_level
+        called_quant_level = quant_level
+
+    monkeypatch.setattr(ops, "qr_all_reduce", fake_qr_all_reduce)
+
+    quick_reduce.quick_all_reduce(inp)
+
+    assert called_quant_level == QuickReduceRegime.FP.value
+
+
 @ray.remote(num_gpus=1, max_calls=1)
 def graph_quickreduce(
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/distributed/test_ray_v2_executor.py b/tests/distributed/test_ray_v2_executor.py
new file mode 100644
index 000000000000..5daec22df6fc
--- /dev/null
+++ b/tests/distributed/test_ray_v2_executor.py
@@ -0,0 +1,345 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Integration tests for RayExecutorV2 at the executor level.
+Validates executor initialization, placement group support, RPC calls,
+and distributed execution with various TP/PP configurations.
+"""
+
+import gc
+import threading
+from unittest.mock import patch
+
+import pytest
+import ray
+
+from vllm import LLM
+from vllm.config import VllmConfig
+from vllm.engine.arg_utils import EngineArgs
+from vllm.v1.executor.ray_executor_v2 import RayExecutorV2
+
+pytestmark = pytest.mark.usefixtures("enable_ray_v2_backend")
+
+MODEL = "facebook/opt-125m"
+
+
+def create_vllm_config(
+    tensor_parallel_size: int = 1,
+    pipeline_parallel_size: int = 1,
+    max_model_len: int = 256,
+    gpu_memory_utilization: float = 0.3,
+    placement_group=None,
+) -> VllmConfig:
+    engine_args = EngineArgs(
+        model=MODEL,
+        tensor_parallel_size=tensor_parallel_size,
+        pipeline_parallel_size=pipeline_parallel_size,
+        max_model_len=max_model_len,
+        gpu_memory_utilization=gpu_memory_utilization,
+        distributed_executor_backend="ray",
+        enforce_eager=True,
+    )
+    vllm_config = engine_args.create_engine_config()
+
+    if placement_group is not None:
+        vllm_config.parallel_config.placement_group = placement_group
+
+    return vllm_config
+
+
+def ensure_ray_initialized():
+    if not ray.is_initialized():
+        ray.init(ignore_reinit_error=True)
+
+
+@pytest.fixture
+def create_placement_group(request):
+    ensure_ray_initialized()
+    num_gpus = request.param
+    bundles = [{"GPU": 1, "CPU": 1} for _ in range(num_gpus)]
+    pg = ray.util.placement_group(bundles, strategy="PACK")
+    ray.get(pg.ready())
+    yield pg
+    ray.util.remove_placement_group(pg)
+
+
+@pytest.fixture
+def executor(request):
+    """Create a RayExecutorV2 and shut it down after the test."""
+    executor = RayExecutorV2(vllm_config=request.param)
+    yield executor
+    executor.shutdown()
+
+
+def assert_executor(executor, tp_size, pp_size):
+    """Common assertions for executor initialization tests."""
+    world_size = tp_size * pp_size
+    expected_output_rank = (pp_size - 1) * tp_size
+
+    assert executor.world_size == world_size
+    assert len(executor.ray_worker_handles) == world_size
+    assert len(executor.response_mqs) == world_size
+    assert executor._get_output_rank() == expected_output_rank
+
+    if pp_size > 1:
+        assert executor.max_concurrent_batches == pp_size
+
+    executor.check_health()
+    assert not executor.is_failed
+
+    ranks = sorted(h.rank for h in executor.ray_worker_handles)
+    assert ranks == list(range(world_size))
+
+    for handle in executor.ray_worker_handles:
+        assert handle.node_id is not None
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(1, 1), (2, 1), (4, 1), (2, 2)])
+def test_ray_v2_executor(tp_size, pp_size):
+    """Validate RayExecutorV2 with various TP/PP configs."""
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        assert_executor(executor, tp_size, pp_size)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "tp_size, pp_size, create_placement_group",
+    [(2, 1, 2), (4, 1, 4), (2, 2, 4)],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_executor_pg(tp_size, pp_size, create_placement_group):
+    """Validate RayExecutorV2 with various TP/PP configs using external PG."""
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+        placement_group=create_placement_group,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        assert_executor(executor, tp_size, pp_size)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_failure_callback(executor):
+    """Validate failure callback registration."""
+    callback_invoked = False
+
+    def test_callback():
+        nonlocal callback_invoked
+        callback_invoked = True
+
+    executor.register_failure_callback(test_callback)
+    assert not callback_invoked
+
+    executor.is_failed = True
+    executor.register_failure_callback(test_callback)
+    assert callback_invoked
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_collective_rpc(executor):
+    """Validate collective RPC calls through MessageQueue."""
+    executor.check_health()
+    assert not executor.is_failed
+    assert executor.rpc_broadcast_mq is not None
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_driver_node_rank_0(executor):
+    """Validate that driver node workers get the lowest ranks."""
+    driver_node = ray.get_runtime_context().get_node_id()
+
+    for handle in executor.ray_worker_handles:
+        assert handle.node_id == driver_node
+
+    rank0_handle = next(h for h in executor.ray_worker_handles if h.rank == 0)
+    assert rank0_handle.node_id == driver_node
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_executor_worker_death(executor):
+    """Validate executor detects worker death via ray.wait()."""
+    callback_event = threading.Event()
+
+    def on_failure():
+        callback_event.set()
+
+    executor.register_failure_callback(on_failure)
+    assert not executor.is_failed
+
+    # Kill one worker actor externally
+    victim = executor.ray_worker_handles[1].actor
+    ray.kill(victim, no_restart=True)
+
+    # Monitor thread should detect the death and invoke callback
+    assert callback_event.wait(timeout=30)
+    assert executor.is_failed
+    assert executor.shutting_down
+
+
+def test_ray_v2_executor_shutdown():
+    """Validate graceful shutdown: ray.kill() terminates all worker actors."""
+    executor = RayExecutorV2(vllm_config=create_vllm_config(tensor_parallel_size=2))
+    assert executor.rpc_broadcast_mq is not None
+    assert len(executor.response_mqs) == executor.world_size
+
+    actors = [h.actor for h in executor.ray_worker_handles]
+    executor.shutdown()
+
+    for actor in actors:
+        with pytest.raises(ray.exceptions.RayActorError):
+            ray.get(actor.wait_for_init.remote(), timeout=5)
+
+    assert executor.rpc_broadcast_mq is None
+    assert len(executor.response_mqs) == 0
+
+
+@pytest.mark.parametrize(
+    "executor",
+    [create_vllm_config(tensor_parallel_size=2)],
+    indirect=True,
+)
+def test_ray_v2_run_refs_stored_for_monitoring(executor):
+    """Validate worker handles store run_ref for monitoring."""
+    for handle in executor.ray_worker_handles:
+        assert handle.run_ref is not None
+        ready, _ = ray.wait([handle.run_ref], timeout=0)
+        assert len(ready) == 0, "run_ref should be pending"
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(2, 1), (2, 2)])
+def test_ray_v2_single_node_generation(tp_size, pp_size):
+    """End-to-end LLM generation with RayExecutorV2."""
+
+    llm = LLM(
+        model=MODEL,
+        tensor_parallel_size=tp_size,
+        pipeline_parallel_size=pp_size,
+        distributed_executor_backend="ray",
+        enforce_eager=True,
+        max_model_len=256,
+        gpu_memory_utilization=0.3,
+    )
+    try:
+        prompts = [
+            "Hello, my name is",
+            "The capital of France is",
+            "The future of AI is",
+        ]
+        outputs = llm.generate(prompts)
+
+        assert len(outputs) == len(prompts)
+        for output in outputs:
+            assert len(output.outputs) > 0
+            assert len(output.outputs[0].text) > 0
+    finally:
+        llm.llm_engine.model_executor.shutdown()
+        del llm
+        gc.collect()
+
+
+@pytest.mark.parametrize(
+    "bundle_indices, expected_bundle_ids, create_placement_group",
+    [("2,3", [2, 3], 4), ("3,2", [3, 2], 4)],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_bundle_indices_env(
+    bundle_indices, expected_bundle_ids, create_placement_group, monkeypatch
+):
+    """Validate explicit VLLM_RAY_BUNDLE_INDICES bundle placement."""
+    monkeypatch.setenv("VLLM_RAY_BUNDLE_INDICES", bundle_indices)
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=2,
+        placement_group=create_placement_group,
+    )
+    executor = RayExecutorV2(vllm_config=vllm_config)
+    try:
+        actual = [
+            h.bundle_id_idx
+            for h in sorted(executor.ray_worker_handles, key=lambda h: h.rank)
+        ]
+        assert actual == expected_bundle_ids
+        assert_executor(executor, tp_size=2, pp_size=1)
+    finally:
+        executor.shutdown()
+
+
+@pytest.mark.parametrize(
+    "bundle_indices, expected_error, create_placement_group",
+    [
+        ("1,1", "cannot have duplicate values,", 4),
+        ("0,1,2", "must have the same size", 4),
+    ],
+    indirect=["create_placement_group"],
+)
+def test_ray_v2_invalid_bundle_indices(
+    bundle_indices, expected_error, create_placement_group, monkeypatch
+):
+    """Validate invalid bundle indices are rejected."""
+    monkeypatch.setenv("VLLM_RAY_BUNDLE_INDICES", bundle_indices)
+    vllm_config = create_vllm_config(
+        tensor_parallel_size=2, placement_group=create_placement_group
+    )
+    with pytest.raises(AssertionError, match=expected_error):
+        RayExecutorV2(vllm_config=vllm_config)
+
+
+@pytest.mark.parametrize("tp_size, pp_size", [(2, 1), (2, 2)])
+def test_ray_v2_single_node_generation_with_pg(tp_size, pp_size):
+    """E2E LLM generation with a user-provided placement group."""
+    ensure_ray_initialized()
+    bundles = [{"GPU": 1, "CPU": 1} for _ in range(tp_size * pp_size)]
+    pg = ray.util.placement_group(bundles, strategy="PACK")
+    ray.get(pg.ready())
+
+    try:
+        with patch.object(ray.util, "get_current_placement_group", return_value=pg):
+            llm = LLM(
+                model=MODEL,
+                tensor_parallel_size=tp_size,
+                pipeline_parallel_size=pp_size,
+                distributed_executor_backend="ray",
+                enforce_eager=True,
+                max_model_len=256,
+                gpu_memory_utilization=0.3,
+            )
+        prompts = [
+            "Hello, my name is",
+            "The capital of France is",
+            "The future of AI is",
+        ]
+        outputs = llm.generate(prompts)
+
+        assert len(outputs) == len(prompts)
+        for output in outputs:
+            assert len(output.outputs) > 0
+            assert len(output.outputs[0].text) > 0
+    finally:
+        llm.llm_engine.model_executor.shutdown()
+        del llm
+        gc.collect()
diff --git a/tests/distributed/test_ray_v2_executor_e2e.py b/tests/distributed/test_ray_v2_executor_e2e.py
new file mode 100644
index 000000000000..fb5830132698
--- /dev/null
+++ b/tests/distributed/test_ray_v2_executor_e2e.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Orchestration-level integration tests for RayExecutorV2.
+"""
+
+import gc
+import os
+import pathlib
+
+import pytest
+import ray
+
+pytestmark = pytest.mark.usefixtures("enable_ray_v2_backend")
+
+MODEL = "facebook/opt-125m"
+
+
+def _get_env_var(worker, name):
+    return os.environ.get(name)
+
+
+def _ray_init():
+    """Start Ray with the project root on workers' PYTHONPATH.
+
+    Without this, workers cannot unpickle actor classes defined in the
+    ``tests`` package, causing FunctionActorManager to fall back to
+    TemporaryActor which drops async method signatures."""
+    project_root = str(pathlib.Path(__file__).resolve().parents[2])
+    ray.init(
+        ignore_reinit_error=True,
+        runtime_env={"env_vars": {"PYTHONPATH": project_root}},
+    )
+
+
+@pytest.fixture
+def ray_init():
+    _ray_init()
+
+
+class _AsyncLLMActor:
+    def start(self, pg, bundle_indices=None, ray_runtime_env=None):
+        os.environ["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
+        # Needed so collective_rpc can pickle _get_env_var over the
+        # AsyncLLM -> EngineCore ZMQ boundary.
+        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
+        if bundle_indices is not None:
+            os.environ["VLLM_RAY_BUNDLE_INDICES"] = bundle_indices
+        else:
+            os.environ.pop("VLLM_RAY_BUNDLE_INDICES", None)
+
+        from vllm.engine.arg_utils import AsyncEngineArgs
+        from vllm.v1.engine.async_llm import AsyncLLM
+        from vllm.v1.executor.abstract import Executor
+
+        engine_args = AsyncEngineArgs(
+            model=MODEL,
+            tensor_parallel_size=2,
+            distributed_executor_backend="ray",
+            enforce_eager=True,
+            max_model_len=256,
+            gpu_memory_utilization=0.8,
+        )
+        vllm_config = engine_args.create_engine_config()
+        vllm_config.parallel_config.placement_group = pg
+        if ray_runtime_env is not None:
+            vllm_config.parallel_config.ray_runtime_env = ray_runtime_env
+
+        executor_class = Executor.get_class(vllm_config)
+        self.engine = AsyncLLM(
+            vllm_config=vllm_config,
+            executor_class=executor_class,
+            log_stats=False,
+            log_requests=False,
+        )
+
+    async def generate(self, prompt):
+        from vllm.sampling_params import SamplingParams
+
+        params = SamplingParams(max_tokens=16)
+        result = None
+        async for output in self.engine.generate(
+            prompt, params, request_id="test_request_id"
+        ):
+            result = output
+        assert result is not None
+        return result.outputs[0].text
+
+    async def generate_and_get_worker_envs(self, prompt, env_names):
+        from vllm.sampling_params import SamplingParams
+
+        params = SamplingParams(max_tokens=16)
+        result = None
+        async for output in self.engine.generate(
+            prompt, params, request_id="test_request_id"
+        ):
+            result = output
+        assert result is not None
+        text = result.outputs[0].text
+
+        env_results = {}
+        for name in env_names:
+            vals = await self.engine.collective_rpc(
+                _get_env_var, timeout=10, args=(name,)
+            )
+            env_results[name] = vals
+        return text, env_results
+
+    def shutdown(self):
+        if engine := getattr(self, "engine", None):
+            engine.shutdown()
+            del self.engine
+            gc.collect()
+
+
+AsyncLLMActor = ray.remote(num_cpus=0, max_concurrency=1)(_AsyncLLMActor)
+
+
+def test_multi_replicas(ray_init):
+    pg1 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+    pg2 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+    ray.get([pg1.ready(), pg2.ready()])
+
+    actor1 = AsyncLLMActor.remote()
+    actor2 = AsyncLLMActor.remote()
+
+    ray.get(actor1.start.remote(pg1))
+    ray.get(actor2.start.remote(pg2))
+
+    out1, out2 = ray.get(
+        [
+            actor1.generate.remote("Hello world"),
+            actor2.generate.remote("Hello world"),
+        ]
+    )
+    assert len(out1) > 0
+    assert len(out2) > 0
+
+
+def test_multi_replicas_with_bundle_indices(ray_init):
+    pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 4, strategy="PACK")
+    ray.get(pg.ready())
+
+    actor1 = AsyncLLMActor.remote()
+    actor2 = AsyncLLMActor.remote()
+
+    ray.get(actor1.start.remote(pg, bundle_indices="2,1"))
+    ray.get(actor2.start.remote(pg, bundle_indices="0,3"))
+
+    out1, out2 = ray.get(
+        [
+            actor1.generate.remote("Hello world"),
+            actor2.generate.remote("Hello world"),
+        ]
+    )
+    assert len(out1) > 0
+    assert len(out2) > 0
+
+
+def test_env_var_and_runtime_env_propagation():
+    """
+    Verify env vars (NCCL_, HF_) and parallel_config.ray_runtime_env
+    propagate to RayWorkerProc actors.
+    """
+    sentinel_vars = {
+        "NCCL_DEBUG": "INFO",
+        "HF_TOKEN": "test_sentinel_token",
+    }
+    for k, v in sentinel_vars.items():
+        os.environ[k] = v
+
+    try:
+        # Called directly (not via the ray_init fixture) because sentinel
+        # env vars must be in os.environ before ray.init() so that Ray
+        # worker processes inherit them.
+        _ray_init()
+
+        pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
+        ray.get(pg.ready())
+
+        # Include the project root so that RayWorkerProc actors can
+        # unpickle _get_env_var.
+        project_root = str(pathlib.Path(__file__).resolve().parents[2])
+        ray_runtime_env = {
+            "env_vars": {
+                "RAY_RUNTIME_ENV_TEST": "ray_runtime_env",
+                "PYTHONPATH": project_root,
+            },
+        }
+
+        actor = AsyncLLMActor.remote()
+        ray.get(actor.start.remote(pg, ray_runtime_env=ray_runtime_env))
+
+        all_env_names = list(sentinel_vars) + ["RAY_RUNTIME_ENV_TEST"]
+        text, env_results = ray.get(
+            actor.generate_and_get_worker_envs.remote("Hello world", all_env_names)
+        )
+        assert len(text) > 0
+
+        for name, expected in sentinel_vars.items():
+            for val in env_results[name]:
+                assert val == expected
+
+        for val in env_results["RAY_RUNTIME_ENV_TEST"]:
+            assert val == "ray_runtime_env"
+
+    finally:
+        for k in sentinel_vars:
+            os.environ.pop(k, None)
diff --git a/tests/distributed/test_torchrun_example.py b/tests/distributed/test_torchrun_example.py
index 8c9898ca20f3..e72f00bc91e0 100644
--- a/tests/distributed/test_torchrun_example.py
+++ b/tests/distributed/test_torchrun_example.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# unit test for `examples/offline_inference/torchrun_example.py`
+# unit test for `examples/features/torchrun/torchrun_example_offline.py`
 import os
 import random
 
@@ -10,7 +10,8 @@
 from vllm import LLM, SamplingParams
 from vllm.distributed.parallel_state import get_world_group
 
-dist.init_process_group(backend="gloo")
+# Let PyTorch choose the WORLD backend for the current device type.
+dist.init_process_group()
 
 # Create prompts
 prompts = [
@@ -29,7 +30,7 @@
     tensor_parallel_size=2,
     pipeline_parallel_size=int(os.getenv("PP_SIZE", 1)),
     distributed_executor_backend="external_launcher",
-    gpu_memory_utilization=random.uniform(0.7, 0.9),
+    gpu_memory_utilization=random.uniform(0.8, 0.92),
     seed=0,
 )
 
diff --git a/tests/distributed/test_torchrun_example_moe.py b/tests/distributed/test_torchrun_example_moe.py
index a6298d1b6739..969b5e92e3fc 100644
--- a/tests/distributed/test_torchrun_example_moe.py
+++ b/tests/distributed/test_torchrun_example_moe.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-# unit test for `examples/offline_inference/torchrun_example.py`
+# unit test for `examples/features/torchrun/torchrun_example_offline.py`
 import os
 import random
 
@@ -10,7 +10,8 @@
 from vllm import LLM, SamplingParams
 from vllm.distributed.parallel_state import get_tp_group, get_world_group
 
-dist.init_process_group(backend="gloo")
+# Let PyTorch choose the WORLD backend for the current device type.
+dist.init_process_group()
 
 # Create prompts
 prompts = [
@@ -36,8 +37,10 @@
     pipeline_parallel_size=int(os.getenv("PP_SIZE", "1")),
     enable_expert_parallel=int(os.getenv("ENABLE_EP", "0")) == 1,
     distributed_executor_backend="external_launcher",
-    gpu_memory_utilization=random.uniform(0.7, 0.9),
+    gpu_memory_utilization=random.uniform(0.8, 0.92),
     seed=0,
+    max_model_len=1024,
+    max_num_seqs=16,
 )
 
 outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/distributed/test_utils.py b/tests/distributed/test_utils.py
index 784918642e09..031937d7f5e1 100644
--- a/tests/distributed/test_utils.py
+++ b/tests/distributed/test_utils.py
@@ -13,7 +13,6 @@
 from vllm.platforms import current_platform
 from vllm.utils.network_utils import get_open_port
 from vllm.utils.system_utils import update_environment_variables
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 from ..utils import multi_gpu_test
 
@@ -21,7 +20,7 @@
 @ray.remote
 class _CUDADeviceCountStatelessTestActor:
     def get_count(self):
-        return cuda_device_count_stateless()
+        return current_platform.device_count()
 
     def set_cuda_visible_devices(self, cuda_visible_devices: str):
         update_environment_variables({"CUDA_VISIBLE_DEVICES": cuda_visible_devices})
diff --git a/tests/distributed/test_weight_transfer.py b/tests/distributed/test_weight_transfer.py
index 1c9bc766ab1d..295e812a1245 100644
--- a/tests/distributed/test_weight_transfer.py
+++ b/tests/distributed/test_weight_transfer.py
@@ -41,6 +41,7 @@ def create_mock_parallel_config(
     config.rank = rank
     config.world_size = world_size
     config.data_parallel_rank = dp_rank
+    config.data_parallel_index = dp_rank
     return config
 
 
@@ -99,7 +100,9 @@ def test_parse_init_info_valid(self):
         """Test parsing valid init info dict."""
         config = WeightTransferConfig(backend="nccl")
         parallel_config = create_mock_parallel_config()
-        engine = NCCLWeightTransferEngine(config, parallel_config)
+        engine = NCCLWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
 
         init_info = engine.parse_init_info(
             {
@@ -120,7 +123,9 @@ def test_parse_init_info_missing_field_raises(self):
         """Test parsing init info with missing required field."""
         config = WeightTransferConfig(backend="nccl")
         parallel_config = create_mock_parallel_config()
-        engine = NCCLWeightTransferEngine(config, parallel_config)
+        engine = NCCLWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
 
         with pytest.raises(ValueError, match="Invalid init_info"):
             engine.parse_init_info(
@@ -134,7 +139,9 @@ def test_parse_update_info_valid(self):
         """Test parsing valid update info dict."""
         config = WeightTransferConfig(backend="nccl")
         parallel_config = create_mock_parallel_config()
-        engine = NCCLWeightTransferEngine(config, parallel_config)
+        engine = NCCLWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
 
         update_info = engine.parse_update_info(
             {
@@ -160,35 +167,28 @@ def test_create_engine_nccl(self):
         """Test factory creates NCCL engine."""
         config = WeightTransferConfig(backend="nccl")
         parallel_config = create_mock_parallel_config()
-        engine = WeightTransferEngineFactory.create_engine(config, parallel_config)
+        engine = WeightTransferEngineFactory.create_engine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
         assert isinstance(engine, NCCLWeightTransferEngine)
 
     def test_create_engine_ipc(self):
         """Test factory creates IPC engine."""
         config = WeightTransferConfig(backend="ipc")
         parallel_config = create_mock_parallel_config()
-        engine = WeightTransferEngineFactory.create_engine(config, parallel_config)
+        engine = WeightTransferEngineFactory.create_engine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
         assert isinstance(engine, IPCWeightTransferEngine)
 
     def test_create_engine_invalid_backend(self):
         """Test factory raises for invalid backend."""
-        # Pydantic validates Literal types at construction, so we can't create
-        # a config with an invalid backend. Instead, we test by directly
-        # accessing the registry or using model_construct to bypass validation.
-        from pydantic import ValidationError
-
-        # Test that Pydantic prevents invalid backend at construction
-        with pytest.raises(ValidationError):
-            WeightTransferConfig(backend="invalid")
-
-        # Test factory error by creating a config with valid backend but
-        # then manually modifying the backend attribute (bypassing validation)
-        config = WeightTransferConfig(backend="nccl")
-        # Use object.__setattr__ to bypass Pydantic validation
-        object.__setattr__(config, "backend", "invalid")
+        config = WeightTransferConfig(backend="invalid")
         parallel_config = create_mock_parallel_config()
         with pytest.raises(ValueError, match="Invalid weight transfer backend"):
-            WeightTransferEngineFactory.create_engine(config, parallel_config)
+            WeightTransferEngineFactory.create_engine(
+                config, parallel_config, MagicMock(spec=torch.nn.Module)
+            )
 
     def test_register_duplicate_raises(self):
         """Test registering duplicate engine name raises."""
@@ -208,7 +208,9 @@ def test_nccl_receive_weights_without_init_raises():
 
     config = WeightTransferConfig(backend="nccl")
     parallel_config = create_mock_parallel_config()
-    engine = NCCLWeightTransferEngine(config, parallel_config)
+    engine = NCCLWeightTransferEngine(
+        config, parallel_config, MagicMock(spec=torch.nn.Module)
+    )
 
     update_info = NCCLWeightTransferUpdateInfo(
         names=["w"],
@@ -283,8 +285,11 @@ def inference_receive_tensor(
     parallel_config.rank = 0
     parallel_config.world_size = 1
     parallel_config.data_parallel_rank = 0
+    parallel_config.data_parallel_index = 0
 
-    engine = NCCLWeightTransferEngine(config, parallel_config)
+    engine = NCCLWeightTransferEngine(
+        config, parallel_config, MagicMock(spec=torch.nn.Module)
+    )
 
     # Initialize the engine (joins as rank 1)
     init_info = NCCLWeightTransferInitInfo(
@@ -387,7 +392,7 @@ def test_valid_update_info(self):
 
         # Create a dummy tensor and IPC handle
         dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
+        _, ipc_handle = reduce_tensor(dummy_tensor)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
         ipc_handles = [{gpu_uuid: ipc_handle}]
 
@@ -408,7 +413,7 @@ def test_mismatched_dtype_names_raises(self):
             pytest.skip("Need at least 1 GPU for this test")
 
         dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
+        _, ipc_handle = reduce_tensor(dummy_tensor)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
         ipc_handles = [{gpu_uuid: ipc_handle}, {gpu_uuid: ipc_handle}]
 
@@ -426,7 +431,7 @@ def test_mismatched_shapes_raises(self):
             pytest.skip("Need at least 1 GPU for this test")
 
         dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
+        _, ipc_handle = reduce_tensor(dummy_tensor)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
         ipc_handles = [{gpu_uuid: ipc_handle}, {gpu_uuid: ipc_handle}]
 
@@ -444,7 +449,7 @@ def test_mismatched_ipc_handles_raises(self):
             pytest.skip("Need at least 1 GPU for this test")
 
         dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
+        _, ipc_handle = reduce_tensor(dummy_tensor)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
         ipc_handles = [{gpu_uuid: ipc_handle}]  # Only one handle
 
@@ -456,65 +461,9 @@ def test_mismatched_ipc_handles_raises(self):
                 ipc_handles=ipc_handles,
             )
 
-    def test_valid_update_info_from_pickled(self, monkeypatch):
-        """Test creating IPCWeightTransferUpdateInfo from pickled handles."""
-        if torch.accelerator.device_count() < 1:
-            pytest.skip("Need at least 1 GPU for this test")
-
-        monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
-
-        dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
-        gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
-        ipc_handles = [{gpu_uuid: ipc_handle}]
-
-        pickled = base64.b64encode(pickle.dumps(ipc_handles)).decode("utf-8")
-
-        info = IPCWeightTransferUpdateInfo(
-            names=["layer.weight"],
-            dtype_names=["float32"],
-            shapes=[[10, 10]],
-            ipc_handles_pickled=pickled,
-        )
-        assert info.ipc_handles == ipc_handles
-        assert info.ipc_handles_pickled is None
-
-    def test_pickled_requires_insecure_serialization_flag(self, monkeypatch):
-        """Test that pickled handles are rejected unless env flag is enabled."""
-        monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0")
-
-        with pytest.raises(ValueError, match="VLLM_ALLOW_INSECURE_SERIALIZATION=1"):
-            IPCWeightTransferUpdateInfo(
-                names=[],
-                dtype_names=[],
-                shapes=[],
-                ipc_handles_pickled=base64.b64encode(pickle.dumps([])).decode("utf-8"),
-            )
-
-    def test_both_handles_and_pickled_raises(self):
-        """Test that providing both ipc_handles and ipc_handles_pickled raises."""
-        if torch.accelerator.device_count() < 1:
-            pytest.skip("Need at least 1 GPU for this test")
-
-        dummy_tensor = torch.ones(10, 10, device="cuda:0")
-        ipc_handle = reduce_tensor(dummy_tensor)
-        gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
-        ipc_handles = [{gpu_uuid: ipc_handle}]
-
-        pickled = base64.b64encode(pickle.dumps(ipc_handles)).decode("utf-8")
-
-        with pytest.raises(ValueError, match="Cannot specify both"):
-            IPCWeightTransferUpdateInfo(
-                names=["layer.weight"],
-                dtype_names=["float32"],
-                shapes=[[10, 10]],
-                ipc_handles=ipc_handles,
-                ipc_handles_pickled=pickled,
-            )
-
-    def test_neither_handles_nor_pickled_raises(self):
-        """Test that providing neither ipc_handles nor ipc_handles_pickled raises."""
-        with pytest.raises(ValueError, match="must be provided"):
+    def test_missing_ipc_handles_raises(self):
+        """Test that omitting ipc_handles raises TypeError."""
+        with pytest.raises(TypeError):
             IPCWeightTransferUpdateInfo(
                 names=["layer.weight"],
                 dtype_names=["float32"],
@@ -545,15 +494,17 @@ def test_parse_update_info_valid(self):
 
         config = WeightTransferConfig(backend="ipc")
         parallel_config = create_mock_parallel_config()
-        engine = IPCWeightTransferEngine(config, parallel_config)
+        engine = IPCWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
 
         # Create dummy IPC handles
         dummy_tensor1 = torch.ones(100, 100, device="cuda:0")
         dummy_tensor2 = torch.ones(50, device="cuda:0")
-        ipc_handle1 = reduce_tensor(dummy_tensor1)
-        ipc_handle2 = reduce_tensor(dummy_tensor2)
+        _, ipc_args1 = reduce_tensor(dummy_tensor1)
+        _, ipc_args2 = reduce_tensor(dummy_tensor2)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
-        ipc_handles = [{gpu_uuid: ipc_handle1}, {gpu_uuid: ipc_handle2}]
+        ipc_handles = [{gpu_uuid: ipc_args1}, {gpu_uuid: ipc_args2}]
 
         update_info = engine.parse_update_info(
             {
@@ -579,14 +530,16 @@ def test_parse_update_info_pickled(self, monkeypatch):
 
         config = WeightTransferConfig(backend="ipc")
         parallel_config = create_mock_parallel_config()
-        engine = IPCWeightTransferEngine(config, parallel_config)
+        engine = IPCWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
 
         dummy_tensor1 = torch.ones(100, 100, device="cuda:0")
         dummy_tensor2 = torch.ones(50, device="cuda:0")
-        ipc_handle1 = reduce_tensor(dummy_tensor1)
-        ipc_handle2 = reduce_tensor(dummy_tensor2)
+        _, ipc_args1 = reduce_tensor(dummy_tensor1)
+        _, ipc_args2 = reduce_tensor(dummy_tensor2)
         gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
-        ipc_handles = [{gpu_uuid: ipc_handle1}, {gpu_uuid: ipc_handle2}]
+        ipc_handles = [{gpu_uuid: ipc_args1}, {gpu_uuid: ipc_args2}]
 
         pickled = base64.b64encode(pickle.dumps(ipc_handles)).decode("utf-8")
 
@@ -602,10 +555,38 @@ def test_parse_update_info_pickled(self, monkeypatch):
         assert isinstance(update_info, IPCWeightTransferUpdateInfo)
         assert update_info.names == ["w1", "w2"]
         assert len(update_info.ipc_handles) == 2
-        assert update_info.ipc_handles_pickled is None
         assert gpu_uuid in update_info.ipc_handles[0]
         assert gpu_uuid in update_info.ipc_handles[1]
 
+    def test_parse_update_info_both_handles_and_pickled_raises(self):
+        """Test that providing both ipc_handles and ipc_handles_pickled raises."""
+        if torch.accelerator.device_count() < 1:
+            pytest.skip("Need at least 1 GPU for this test")
+
+        config = WeightTransferConfig(backend="ipc")
+        parallel_config = create_mock_parallel_config()
+        engine = IPCWeightTransferEngine(
+            config, parallel_config, MagicMock(spec=torch.nn.Module)
+        )
+
+        dummy_tensor = torch.ones(10, 10, device="cuda:0")
+        _, ipc_handle = reduce_tensor(dummy_tensor)
+        gpu_uuid = str(torch.cuda.get_device_properties(0).uuid)
+        ipc_handles = [{gpu_uuid: ipc_handle}]
+
+        pickled = base64.b64encode(pickle.dumps(ipc_handles)).decode("utf-8")
+
+        with pytest.raises(ValueError, match="Cannot specify both"):
+            engine.parse_update_info(
+                {
+                    "names": ["layer.weight"],
+                    "dtype_names": ["float32"],
+                    "shapes": [[10, 10]],
+                    "ipc_handles": ipc_handles,
+                    "ipc_handles_pickled": pickled,
+                }
+            )
+
 
 # --- Integration Test: IPC Weight Transfer Between Ray Tasks ---
 
@@ -627,13 +608,15 @@ def __init__(self, tensor_shape: list[int], tensor_dtype: str):
         self.tensor.fill_(42.0)  # Fill with 42 to verify correct transfer
 
         # Create IPC handle (tensor must stay alive for IPC to work)
-        ipc_handle = reduce_tensor(self.tensor)
+        # reduce_tensor returns (rebuild_func, args); we only send args
+        # since the receiver imports rebuild_cuda_tensor directly.
+        _, ipc_args = reduce_tensor(self.tensor)
         gpu_uuid = get_physical_gpu_id(0)
 
         torch.accelerator.synchronize()
 
         self.ipc_handle_dict = {
-            "ipc_handle": ipc_handle,
+            "ipc_handle": ipc_args,
             "gpu_uuid": gpu_uuid,
             "shape": tensor_shape,
             "dtype": tensor_dtype,
@@ -650,6 +633,12 @@ def inference_receive_ipc_tensor(
     mode: str = "ray",
 ) -> dict:
     """Inference task that receives tensor via IPCWeightTransferEngine."""
+    import os
+
+    # Worker-side: ipc_handles_pickled is deserialized via pickle.
+    if mode == "http":
+        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
+
     from unittest.mock import MagicMock
 
     import torch
@@ -666,8 +655,11 @@ def inference_receive_ipc_tensor(
     parallel_config.rank = 0
     parallel_config.world_size = 1
     parallel_config.data_parallel_rank = 0
+    parallel_config.data_parallel_index = 0
 
-    engine = IPCWeightTransferEngine(config, parallel_config)
+    engine = IPCWeightTransferEngine(
+        config, parallel_config, MagicMock(spec=torch.nn.Module)
+    )
 
     # Initialize the engine (no-op for IPC)
     init_info = IPCWeightTransferInitInfo()
@@ -681,7 +673,6 @@ def noop_load_weights(weights: list[tuple[str, torch.Tensor]]):
             # Clone tensor to keep it after engine cleans up
             received_tensors.append((name, tensor.clone()))
 
-    # Build update dict and go through parse_update_info (exercises __post_init__)
     ipc_handles = [{ipc_handle_dict["gpu_uuid"]: ipc_handle_dict["ipc_handle"]}]
 
     if mode == "ray":
@@ -692,6 +683,7 @@ def noop_load_weights(weights: list[tuple[str, torch.Tensor]]):
             "ipc_handles": ipc_handles,
         }
     elif mode == "http":
+        # Simulate HTTP transport: pickle + base64 encode handles
         pickled = base64.b64encode(pickle.dumps(ipc_handles)).decode("utf-8")
         update_dict = {
             "names": ["test.weight"],
@@ -740,7 +732,8 @@ def test_ipc_weight_transfer_between_processes(mode: str):
 
     Parametrized over transport modes:
     - 'ray':  ipc_handles passed directly.
-    - 'http': ipc_handles pickled + base64-encoded, unpickled via __post_init__.
+    - 'http': ipc_handles pickled + base64-encoded, deserialized in
+              parse_update_info before constructing the dataclass.
 
     IPC requires same-GPU access, so we use a placement group to co-locate
     the trainer actor and inference task on the same GPU.
@@ -794,11 +787,13 @@ def test_ipc_receive_weights_missing_gpu_uuid_raises():
 
     config = WeightTransferConfig(backend="ipc")
     parallel_config = create_mock_parallel_config()
-    engine = IPCWeightTransferEngine(config, parallel_config)
+    engine = IPCWeightTransferEngine(
+        config, parallel_config, MagicMock(spec=torch.nn.Module)
+    )
 
     # Create IPC handle with wrong GPU UUID
     dummy_tensor = torch.ones(10, 10, device="cuda:0")
-    ipc_handle = reduce_tensor(dummy_tensor)
+    _, ipc_handle = reduce_tensor(dummy_tensor)
     wrong_uuid = "wrong-uuid-12345"
     ipc_handles = [{wrong_uuid: ipc_handle}]
 
diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py
index d1986e0a44ff..f595ca6ecbd3 100644
--- a/tests/engine/test_arg_utils.py
+++ b/tests/engine/test_arg_utils.py
@@ -9,9 +9,10 @@
 import pytest
 from pydantic import Field
 
-from vllm.config import AttentionConfig, CompilationConfig, config
+from vllm.config import AttentionConfig, CompilationConfig, ModelConfig, config
 from vllm.engine.arg_utils import (
     EngineArgs,
+    _expand_json_human_readable_numbers,
     contains_type,
     get_kwargs,
     get_type,
@@ -115,6 +116,10 @@ class DummyConfig:
     """Regular bool with default True"""
     optional_bool: bool | None = None
     """Optional bool with default None"""
+
+    optional_bool_or_str: bool | str | None = None
+    """Optional bool-or-str with default None"""
+
     optional_literal: Literal["x", "y"] | None = None
     """Optional literal with default None"""
     tuple_n: tuple[int, ...] = Field(default_factory=lambda: (1, 2, 3))
@@ -169,6 +174,11 @@ def test_get_kwargs():
     # bools should not have their type set
     assert kwargs["regular_bool"].get("type") is None
     assert kwargs["optional_bool"].get("type") is None
+    # optional bool-or-str should accept an optional string value
+    assert kwargs["optional_bool_or_str"]["type"] is str
+    assert kwargs["optional_bool_or_str"]["nargs"] == "?"
+    assert kwargs["optional_bool_or_str"]["const"] is True
+    assert "action" not in kwargs["optional_bool_or_str"]
     # optional literals should have None as a choice
     assert kwargs["optional_literal"]["choices"] == ["x", "y", "None"]
     # tuples should have the correct nargs
@@ -196,6 +206,32 @@ def test_get_kwargs():
     assert kwargs["nested_config"]["type"]('{"field": 2}') == NestedConfig(2)  # type: ignore[call-arg]
 
 
+def test_hf_token_get_kwargs():
+    kwargs = get_kwargs(ModelConfig)["hf_token"]
+
+    assert kwargs["type"] is str
+    assert kwargs["nargs"] == "?"
+    assert kwargs["const"] is True
+    assert "action" not in kwargs
+
+
+@pytest.mark.parametrize(
+    ("cli_args", "expected"),
+    [
+        ([], None),
+        (["--hf-token"], True),
+        (["--hf-token", "hf_secret"], "hf_secret"),
+        (["--hf-token", "None"], "None"),
+    ],
+)
+def test_hf_token_cli_arg(cli_args, expected):
+    parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
+
+    args = parser.parse_args(cli_args)
+
+    assert args.hf_token == expected
+
+
 @pytest.mark.parametrize(
     ("arg", "expected"),
     [
@@ -332,14 +368,8 @@ def test_attention_config():
             "true",
             "--attention-config.flash_attn_max_num_splits_for_cuda_graph",
             "16",
-            "--attention-config.use_cudnn_prefill",
-            "true",
-            "--attention-config.use_trtllm_ragged_deepseek_prefill",
-            "true",
             "--attention-config.use_trtllm_attention",
             "true",
-            "--attention-config.disable_flashinfer_prefill",
-            "true",
             "--attention-config.disable_flashinfer_q_quantization",
             "true",
         ]
@@ -351,10 +381,7 @@ def test_attention_config():
     assert engine_args.attention_config.flash_attn_version == 3
     assert engine_args.attention_config.use_prefill_decode_attention is True
     assert engine_args.attention_config.flash_attn_max_num_splits_for_cuda_graph == 16
-    assert engine_args.attention_config.use_cudnn_prefill is True
-    assert engine_args.attention_config.use_trtllm_ragged_deepseek_prefill is True
     assert engine_args.attention_config.use_trtllm_attention is True
-    assert engine_args.attention_config.disable_flashinfer_prefill is True
     assert engine_args.attention_config.disable_flashinfer_q_quantization is True
 
     # set to string form of a dict with all fields
@@ -364,10 +391,7 @@ def test_attention_config():
             '{"backend": "FLASHINFER", "flash_attn_version": 2, '
             '"use_prefill_decode_attention": false, '
             '"flash_attn_max_num_splits_for_cuda_graph": 8, '
-            '"use_cudnn_prefill": false, '
-            '"use_trtllm_ragged_deepseek_prefill": false, '
             '"use_trtllm_attention": false, '
-            '"disable_flashinfer_prefill": false, '
             '"disable_flashinfer_q_quantization": false}',
         ]
     )
@@ -378,10 +402,7 @@ def test_attention_config():
     assert engine_args.attention_config.flash_attn_version == 2
     assert engine_args.attention_config.use_prefill_decode_attention is False
     assert engine_args.attention_config.flash_attn_max_num_splits_for_cuda_graph == 8
-    assert engine_args.attention_config.use_cudnn_prefill is False
-    assert engine_args.attention_config.use_trtllm_ragged_deepseek_prefill is False
     assert engine_args.attention_config.use_trtllm_attention is False
-    assert engine_args.attention_config.disable_flashinfer_prefill is False
     assert engine_args.attention_config.disable_flashinfer_q_quantization is False
 
     # test --attention-backend flows into VllmConfig.attention_config
@@ -523,3 +544,72 @@ def test_human_readable_model_len():
     for invalid in ["1a", "pwd", "10.24", "1.23M", "1.22T"]:
         with pytest.raises(ArgumentError):
             parser.parse_args(["--max-model-len", invalid])
+
+
+def test_numa_bind_args():
+    parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
+    args = parser.parse_args(
+        [
+            "--numa-bind",
+            "--numa-bind-nodes",
+            "0",
+            "0",
+            "1",
+            "1",
+            "--numa-bind-cpus",
+            "0-3",
+            "4-7",
+            "8-11",
+            "12-15",
+        ]
+    )
+    engine_args = EngineArgs.from_cli_args(args=args)
+    assert engine_args.numa_bind is True
+    assert engine_args.numa_bind_nodes == [0, 0, 1, 1]
+    assert engine_args.numa_bind_cpus == ["0-3", "4-7", "8-11", "12-15"]
+
+
+def test_ir_op_priority():
+    from vllm.config.kernel import IrOpPriorityConfig, KernelConfig
+
+    ir_op_priority = IrOpPriorityConfig(rms_norm=["vllm_c"])
+    cfg1 = EngineArgs(ir_op_priority=ir_op_priority).create_engine_config()
+    cfg2 = EngineArgs(
+        kernel_config=KernelConfig(ir_op_priority=ir_op_priority)
+    ).create_engine_config()
+    assert cfg1.kernel_config.ir_op_priority == cfg2.kernel_config.ir_op_priority
+
+    with pytest.raises(ValueError, match="rms_norm"):
+        _ = EngineArgs(
+            ir_op_priority=ir_op_priority,
+            kernel_config=KernelConfig(ir_op_priority=ir_op_priority),
+        ).create_engine_config()
+
+
+@pytest.mark.parametrize(
+    ("input_json", "expected_json"),
+    [
+        # Decimal suffixes (lowercase)
+        ('{"x": 80g}', '{"x": 80000000000}'),
+        ('{"x": 1k}', '{"x": 1000}'),
+        ('{"x": 5m}', '{"x": 5000000}'),
+        ('{"x": 2t}', '{"x": 2000000000000}'),
+        # Binary suffixes (uppercase)
+        ('{"x": 1K}', f'{{"x": {2**10}}}'),
+        ('{"x": 1G}', f'{{"x": {2**30}}}'),
+        # Decimal values
+        ('{"x": 1.5g}', '{"x": 1500000000}'),
+        # Quoted strings must NOT be modified
+        ('{"my_key": 80g}', '{"my_key": 80000000000}'),
+        ('{"name": "80g"}', '{"name": "80g"}'),
+        ('{"model_name": "foo_bar"}', '{"model_name": "foo_bar"}'),
+        # Multiple values
+        ('{"a": 1k, "b": 2m}', '{"a": 1000, "b": 2000000}'),
+        # Plain numbers are untouched
+        ('{"x": 42}', '{"x": 42}'),
+        # Nested JSON
+        ('{"outer": {"inner": 10g}}', '{"outer": {"inner": 10000000000}}'),
+    ],
+)
+def test_expand_json_human_readable_numbers(input_json, expected_json):
+    assert _expand_json_human_readable_numbers(input_json) == expected_json
diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py
index 34465b7d2708..82f38adfb772 100644
--- a/tests/entrypoints/llm/test_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@@ -91,6 +91,12 @@ def test_multiple_priority(llm: LLM):
         outputs = llm.generate(PROMPTS, sampling_params=None, priority=[])
 
 
+def test_single_prompt_priority(llm: LLM):
+    # Single string prompts should be normalized to one request.
+    outputs = llm.generate(PROMPTS[0], sampling_params=None, priority=[0])
+    assert len(outputs) == 1
+
+
 def test_max_model_len():
     max_model_len = 20
     llm = LLM(
diff --git a/tests/entrypoints/llm/test_mm_cache_external_injection.py b/tests/entrypoints/llm/test_mm_cache_external_injection.py
new file mode 100644
index 000000000000..3023457c5fed
--- /dev/null
+++ b/tests/entrypoints/llm/test_mm_cache_external_injection.py
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Test that ``InputProcessor.inject_into_mm_cache()`` correctly injects
+pre-processed mm_kwargs into the processor cache and reports MM cache
+hit rate metrics accurately.
+
+This is used by frameworks like Dynamo that run the HF processor on a
+frontend and transfer pre-processed mm_kwargs to the backend, avoiding
+redundant processing.
+"""
+
+import logging
+
+import pytest
+import regex as re
+
+from tests.entrypoints.openai.chat_completion.test_vision import TEST_IMAGE_ASSETS
+from vllm import LLM, SamplingParams
+from vllm.renderers.params import ChatParams
+from vllm.v1.metrics import loggers as stat_loggers
+from vllm.v1.metrics.reader import Counter, Metric
+
+
+def _make_messages(image_url: str):
+    return [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": image_url},
+                },
+            ],
+        }
+    ]
+
+
+def _get_counter_value(metrics: list[Metric], name: str):
+    metric = next(m for m in metrics if m.name == name)
+    assert isinstance(metric, Counter)
+    return metric.value
+
+
+def _get_mm_cache_stats(metrics: list[Metric]):
+    mm_cache_queries = _get_counter_value(metrics, "vllm:mm_cache_queries")
+    mm_cache_hits = _get_counter_value(metrics, "vllm:mm_cache_hits")
+    return mm_cache_queries, mm_cache_hits
+
+
+def _get_mm_cache_log(llm: LLM, caplog_vllm: pytest.LogCaptureFixture) -> float:
+    caplog_vllm.clear()
+    with caplog_vllm.at_level(logging.INFO, logger=stat_loggers.__name__):
+        llm.llm_engine.do_log_stats()
+
+    assert len(caplog_vllm.records) == 1
+    msg = caplog_vllm.records[0].getMessage()
+
+    assert "MM cache hit rate" in msg
+    match = re.search(r"MM cache hit rate: ([0-9.]+)%", msg)
+    assert match is not None
+    return float(match.group(1))
+
+
+@pytest.mark.parametrize("image_urls", [TEST_IMAGE_ASSETS[:2]], indirect=True)
+@pytest.mark.parametrize("mm_processor_cache_type", ["lru", "shm"])
+def test_inject_into_mm_cache(
+    num_gpus_available,
+    image_urls,
+    mm_processor_cache_type,
+    caplog_vllm,
+):
+    """Test that inject_into_mm_cache() injects pre-processed mm_kwargs into
+    the processor cache and MM cache hit metrics are updated correctly.
+
+    Steps:
+    1. Two normal requests (same image) -> cache miss then hit (baseline)
+    2. Extract cached kwargs, call inject_into_mm_cache with a new hash,
+       then generate with a pre-rendered input -> verifies injection works
+    """
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf",
+        max_model_len=4096,
+        max_num_seqs=5,
+        enforce_eager=True,
+        disable_log_stats=False,
+        limit_mm_per_prompt={"image": 2},
+        mm_processor_cache_type=mm_processor_cache_type,
+    )
+
+    # Step 1: Normal requests to populate the cache
+    llm.chat(_make_messages(image_urls[0]))
+    assert _get_mm_cache_stats(llm.get_metrics()) == (1, 0)
+
+    llm.chat(_make_messages(image_urls[0]))
+    assert _get_mm_cache_stats(llm.get_metrics()) == (2, 1)
+    assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(50.0)
+
+    # Step 2: Use a second image to get valid expanded tokens and
+    # placeholder positions via the renderer.
+    llm.chat(_make_messages(image_urls[1]))
+    queries_before = _get_mm_cache_stats(llm.get_metrics())[0]  # 3
+
+    renderer = llm.llm_engine.renderer
+    cache = renderer.mm_processor_cache
+    assert cache is not None, "Processor cache should be enabled"
+
+    _, eng_prompts = renderer.render_chat(
+        [_make_messages(image_urls[1])],
+        ChatParams(),
+    )
+    eng_input = eng_prompts[0]
+
+    # Inject pre-processed mm_kwargs with a NEW hash via public API
+    new_mm_hash = "deadbeef" * 8
+    mm_hashes = {"image": [new_mm_hash]}
+    mm_kwargs = eng_input["mm_kwargs"]
+
+    llm.llm_engine.input_processor.inject_into_mm_cache(mm_hashes, mm_kwargs)
+
+    # Build pre-rendered input (no externally_processed flag needed)
+    pre_rendered_input = {
+        "type": "multimodal",
+        "prompt_token_ids": eng_input["prompt_token_ids"],
+        "mm_kwargs": mm_kwargs,
+        "mm_hashes": mm_hashes,
+        "mm_placeholders": eng_input["mm_placeholders"],
+    }
+
+    llm.generate(
+        pre_rendered_input,
+        sampling_params=SamplingParams(max_tokens=1),
+    )
+
+    # Verify cache was queried and injection happened
+    queries_after = _get_mm_cache_stats(llm.get_metrics())[0]
+    assert queries_after > queries_before, (
+        "Cache should have been queried for the injected item"
+    )
+    mm_rate = _get_mm_cache_log(llm, caplog_vllm)
+    assert mm_rate >= 0.0, "MM cache hit rate should be reported"
+
+
+@pytest.mark.parametrize("image_urls", [TEST_IMAGE_ASSETS[:1]], indirect=True)
+def test_inject_into_mm_cache_without_cache(
+    num_gpus_available,
+    image_urls,
+):
+    """Test that inject_into_mm_cache works gracefully when processor cache
+    is disabled (mm_processor_cache_gb=0). Should not crash.
+    """
+    llm = LLM(
+        model="llava-hf/llava-1.5-7b-hf",
+        max_model_len=4096,
+        max_num_seqs=5,
+        enforce_eager=True,
+        disable_log_stats=False,
+        limit_mm_per_prompt={"image": 2},
+        mm_processor_cache_gb=0,
+    )
+
+    # Run a normal chat request first to warm up the model.
+    llm.chat(_make_messages(image_urls[0]))
+
+    # Use the renderer to get a proper EngineInput with expanded tokens
+    renderer = llm.llm_engine.renderer
+    _, eng_prompts = renderer.render_chat(
+        [_make_messages(image_urls[0])],
+        ChatParams(),
+    )
+    eng_input = eng_prompts[0]
+
+    mm_hashes = {"image": ["abcd1234" * 8]}
+    mm_kwargs = eng_input["mm_kwargs"]
+
+    # inject_into_mm_cache should not crash even without cache
+    llm.llm_engine.input_processor.inject_into_mm_cache(mm_hashes, mm_kwargs)
+
+    # Build and generate with pre-rendered input
+    pre_rendered_input = {
+        "type": "multimodal",
+        "prompt_token_ids": eng_input["prompt_token_ids"],
+        "mm_kwargs": mm_kwargs,
+        "mm_hashes": mm_hashes,
+        "mm_placeholders": eng_input["mm_placeholders"],
+    }
+
+    result = llm.generate(
+        pre_rendered_input,
+        sampling_params=SamplingParams(max_tokens=1),
+    )
+    assert len(result) == 1, "Should produce one output"
+    assert len(result[0].outputs) >= 1, "Should have at least one output sequence"
diff --git a/tests/entrypoints/llm/test_mm_processor_kwargs.py b/tests/entrypoints/llm/test_mm_processor_kwargs.py
new file mode 100644
index 000000000000..f970d68acab7
--- /dev/null
+++ b/tests/entrypoints/llm/test_mm_processor_kwargs.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+import pytest
+
+from vllm import LLM, SamplingParams
+
+
+def _make_mock_llm() -> LLM:
+    llm = object.__new__(LLM)
+    llm.model_config = SimpleNamespace(
+        runner_type="generate", enable_prompt_embeds=False
+    )
+    return llm
+
+
+def test_generate_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"num_crops": 4}
+    sampling_params = SamplingParams(max_tokens=1)
+
+    llm._run_completion = Mock(return_value=["ok"])
+
+    outputs = llm.generate(
+        "prompt",
+        sampling_params=sampling_params,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == ["ok"]
+    assert llm._run_completion.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_enqueue_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"do_resize": False}
+    sampling_params = SamplingParams(max_tokens=1)
+
+    llm._add_completion_requests = Mock(return_value=["req-0"])
+
+    request_ids = llm.enqueue(
+        "prompt",
+        sampling_params=sampling_params,
+        use_tqdm=False,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert request_ids == ["req-0"]
+    assert llm._add_completion_requests.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_chat_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"do_pan_and_scan": True}
+    sampling_params = SamplingParams(max_tokens=1)
+    messages = [{"role": "user", "content": "hello"}]
+
+    llm._run_chat = Mock(return_value=["ok"])
+
+    outputs = llm.chat(
+        messages,
+        sampling_params=sampling_params,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == ["ok"]
+    assert llm._run_chat.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_enqueue_chat_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"do_pan_and_scan": True}
+    sampling_params = SamplingParams(max_tokens=1)
+    messages = [{"role": "user", "content": "hello"}]
+
+    llm._add_chat_requests = Mock(return_value=["req-0"])
+
+    request_ids = llm.enqueue_chat(
+        messages,
+        sampling_params=sampling_params,
+        use_tqdm=False,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert request_ids == ["req-0"]
+    assert llm._add_chat_requests.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_run_chat_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"num_crops": 8}
+    sampling_params = SamplingParams(max_tokens=1)
+    messages = [{"role": "user", "content": "hello"}]
+    sentinel_output = ["done"]
+
+    llm._add_chat_requests = Mock()
+    llm._run_engine = Mock(return_value=sentinel_output)
+
+    outputs = llm._run_chat(
+        messages=messages,
+        params=sampling_params,
+        output_type=object,
+        use_tqdm=False,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == sentinel_output
+    assert llm._add_chat_requests.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_run_completion_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"min_pixels": 4 * 28 * 28}
+    sampling_params = SamplingParams(max_tokens=1)
+    sentinel_output = ["done"]
+
+    llm._add_completion_requests = Mock()
+    llm._run_engine = Mock(return_value=sentinel_output)
+
+    outputs = llm._run_completion(
+        prompts=["prompt"],
+        params=sampling_params,
+        output_type=object,
+        use_tqdm=False,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == sentinel_output
+    assert llm._add_completion_requests.call_args.kwargs["mm_processor_kwargs"] == (
+        mm_processor_kwargs
+    )
+
+
+def test_add_completion_requests_forwards_mm_processor_kwargs() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"max_dynamic_patch": 4}
+    sampling_params = SamplingParams(max_tokens=1)
+
+    llm._params_to_seq = Mock(return_value=[sampling_params])
+    llm._lora_request_to_seq = Mock(return_value=[None])
+    llm._priority_to_seq = Mock(return_value=[0])
+    llm._preprocess_cmpl_one = Mock(return_value={"prompt_token_ids": [1]})
+
+    captured_prompts = []
+
+    def fake_render_and_add_requests(*, prompts, **_kwargs):
+        captured_prompts.extend(prompts)
+        return ["req-0"]
+
+    llm._render_and_add_requests = Mock(side_effect=fake_render_and_add_requests)
+
+    request_ids = llm._add_completion_requests(
+        prompts=["prompt"],
+        params=sampling_params,
+        use_tqdm=False,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert request_ids == ["req-0"]
+    llm._preprocess_cmpl_one.assert_called_once_with(
+        "prompt",
+        None,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+    assert captured_prompts == [{"prompt_token_ids": [1]}]
+
+
+def test_preprocess_cmpl_applies_mm_processor_kwargs_to_renderer(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"num_crops": 8}
+    prompt = {"prompt": "<image>", "multi_modal_data": {"image": object()}}
+
+    renderer = Mock()
+    renderer.default_cmpl_tok_params = Mock()
+    renderer.default_cmpl_tok_params.with_kwargs.return_value = "tok-params"
+    renderer.render_cmpl.return_value = ["engine-input"]
+    llm.renderer = renderer
+
+    monkeypatch.setattr(
+        "vllm.entrypoints.llm.parse_model_prompt",
+        lambda _model_config, parsed_prompt: parsed_prompt,
+    )
+
+    outputs = llm._preprocess_cmpl(
+        [prompt],
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == ["engine-input"]
+    renderer.render_cmpl.assert_called_once_with(
+        [prompt],
+        "tok-params",
+        prompt_extras={"mm_processor_kwargs": mm_processor_kwargs},
+    )
+
+
+def test_preprocess_cmpl_keeps_prompt_mm_processor_kwargs_when_no_override(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    llm = _make_mock_llm()
+    prompt = {
+        "prompt": "<image>",
+        "multi_modal_data": {"image": object()},
+        "mm_processor_kwargs": {"num_crops": 2},
+    }
+
+    renderer = Mock()
+    renderer.default_cmpl_tok_params = Mock()
+    renderer.default_cmpl_tok_params.with_kwargs.return_value = "tok-params"
+    renderer.render_cmpl.return_value = ["engine-input"]
+    llm.renderer = renderer
+
+    monkeypatch.setattr(
+        "vllm.entrypoints.llm.parse_model_prompt",
+        lambda _model_config, parsed_prompt: parsed_prompt,
+    )
+
+    outputs = llm._preprocess_cmpl([prompt])
+
+    assert outputs == ["engine-input"]
+    renderer.render_cmpl.assert_called_once_with(
+        [prompt],
+        "tok-params",
+        prompt_extras=None,
+    )
+
+
+def test_preprocess_chat_applies_mm_processor_kwargs_to_renderer() -> None:
+    llm = _make_mock_llm()
+    mm_processor_kwargs = {"num_crops": 8}
+    messages = [[{"role": "user", "content": "Describe this image."}]]
+
+    renderer = Mock()
+    renderer.tokenizer = object()
+    renderer.default_chat_tok_params = Mock()
+    renderer.default_chat_tok_params.with_kwargs.return_value = "tok-params"
+    renderer.render_chat.return_value = (messages, ["engine-input"])
+    llm.renderer = renderer
+
+    outputs = llm._preprocess_chat(
+        messages,
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
+
+    assert outputs == ["engine-input"]
+    call_args = renderer.render_chat.call_args
+    assert call_args.args[0] == messages
+    assert call_args.args[1].mm_processor_kwargs == mm_processor_kwargs
+    assert call_args.args[2] == "tok-params"
+    assert call_args.kwargs["prompt_extras"] == {
+        "mm_processor_kwargs": mm_processor_kwargs
+    }
+
+
+def test_preprocess_chat_omits_mm_processor_kwargs_when_no_override() -> None:
+    llm = _make_mock_llm()
+    messages = [[{"role": "user", "content": "Describe this image."}]]
+
+    renderer = Mock()
+    renderer.tokenizer = object()
+    renderer.default_chat_tok_params = Mock()
+    renderer.default_chat_tok_params.with_kwargs.return_value = "tok-params"
+    renderer.render_chat.return_value = (messages, ["engine-input"])
+    llm.renderer = renderer
+
+    outputs = llm._preprocess_chat(messages)
+
+    assert outputs == ["engine-input"]
+    call_args = renderer.render_chat.call_args
+    assert call_args.args[0] == messages
+    assert call_args.args[1].mm_processor_kwargs is None
+    assert call_args.args[2] == "tok-params"
+    assert call_args.kwargs["prompt_extras"] is None
diff --git a/tests/entrypoints/openai/chat_completion/test_audio_in_video.py b/tests/entrypoints/openai/chat_completion/test_audio_in_video.py
index 8c024995b938..61ee91eab4d0 100644
--- a/tests/entrypoints/openai/chat_completion/test_audio_in_video.py
+++ b/tests/entrypoints/openai/chat_completion/test_audio_in_video.py
@@ -64,11 +64,12 @@ async def test_online_audio_in_video(
     ]
 
     # multi-turn to test mm processor cache as well
-    for _ in range(2):
+    for turn in range(2):
         chat_completion = await client.chat.completions.create(
             model=MODEL_NAME,
             messages=messages,
-            max_tokens=16,
+            max_tokens=8,
+            temperature=0.0,
             extra_body={
                 "mm_processor_kwargs": {
                     "use_audio_in_video": True,
@@ -78,6 +79,12 @@ async def test_online_audio_in_video(
 
         assert len(chat_completion.choices) == 1
         choice = chat_completion.choices[0]
+        print(
+            f"[DEBUG][single-video] turn={turn} "
+            f"finish_reason={choice.finish_reason!r} "
+            f"content={choice.message.content!r} "
+            f"usage={chat_completion.usage}"
+        )
         assert choice.finish_reason == "length"
 
 
@@ -111,11 +118,12 @@ async def test_online_audio_in_video_multi_videos(
     ]
 
     # multi-turn to test mm processor cache as well
-    for _ in range(2):
+    for turn in range(2):
         chat_completion = await client.chat.completions.create(
             model=MODEL_NAME,
             messages=messages,
-            max_tokens=16,
+            max_tokens=8,
+            temperature=0.0,
             extra_body={
                 "mm_processor_kwargs": {
                     "use_audio_in_video": True,
@@ -125,6 +133,12 @@ async def test_online_audio_in_video_multi_videos(
 
         assert len(chat_completion.choices) == 1
         choice = chat_completion.choices[0]
+        print(
+            f"[DEBUG][multi-video] turn={turn} "
+            f"finish_reason={choice.finish_reason!r} "
+            f"content={choice.message.content!r} "
+            f"usage={chat_completion.usage}"
+        )
         assert choice.finish_reason == "length"
 
 
diff --git a/tests/entrypoints/openai/chat_completion/test_chat.py b/tests/entrypoints/openai/chat_completion/test_chat.py
index 212839f78d5c..6703095aec4a 100644
--- a/tests/entrypoints/openai/chat_completion/test_chat.py
+++ b/tests/entrypoints/openai/chat_completion/test_chat.py
@@ -845,9 +845,10 @@ async def test_chat_completion_n_parameter_non_streaming(
     chat_completion = await client.chat.completions.create(
         model=model_name,
         messages=messages,
-        max_completion_tokens=20,
-        temperature=0.7,
+        max_completion_tokens=50,
+        temperature=1.0,
         n=3,
+        seed=42,
         stream=False,
     )
 
@@ -859,7 +860,6 @@ async def test_chat_completion_n_parameter_non_streaming(
         assert choice.message.content is not None
         assert len(choice.message.content) > 0
 
-    # Verify all responses are different (highly likely with temperature > 0)
     contents = [choice.message.content for choice in chat_completion.choices]
     assert len(set(contents)) > 1, "Expected different responses with n=3"
 
@@ -1002,6 +1002,31 @@ def test_chat_completion_request_n_parameter_default():
     assert sampling_params.n == 1, f"Expected n=1 (default), got n={sampling_params.n}"
 
 
+def test_chat_completion_request_accepts_model_specific_reasoning_effort():
+    request = ChatCompletionRequest(
+        model="test-model",
+        messages=[{"role": "user", "content": "Hello"}],
+        reasoning_effort="max",
+    )
+
+    chat_params = request.build_chat_params(
+        default_template=None,
+        default_template_content_format="auto",
+    )
+
+    assert request.reasoning_effort == "max"
+    assert chat_params.chat_template_kwargs["reasoning_effort"] == "max"
+
+
+def test_chat_completion_request_rejects_unknown_reasoning_effort():
+    with pytest.raises(ValueError, match="Input should be"):
+        ChatCompletionRequest(
+            model="test-model",
+            messages=[{"role": "user", "content": "Hello"}],
+            reasoning_effort="extra_high",
+        )
+
+
 def test_chat_completion_request_n_parameter_various_values():
     """Test n parameter with various values."""
     for n_value in [1, 2, 5, 10]:
diff --git a/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_audio_embeds.py b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_audio_embeds.py
new file mode 100644
index 000000000000..d005edc950cc
--- /dev/null
+++ b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_audio_embeds.py
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""E2E test for mixing `prompt_embeds` with `audio_embeds` in a single
+Chat Completions request."""
+
+import json
+
+import openai
+import pytest
+import pytest_asyncio
+import safetensors
+import torch
+import torch.nn as nn
+from huggingface_hub import hf_hub_download
+from transformers import AutoConfig, AutoTokenizer
+
+from tests.utils import RemoteOpenAIServer
+from vllm.utils.serial_utils import tensor2base64
+
+QWEN2AUDIO_MODEL = "Qwen/Qwen2-Audio-7B-Instruct"
+
+# Use the model's native dtype to avoid an implicit cast inside
+# `safe_load_prompt_embeds` (mismatched floating-point dtypes are cast to the
+# model's dtype automatically, matching here just skips the conversion).
+QWEN2AUDIO_DTYPE = torch.bfloat16
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_server_args() -> list[str]:
+    return [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "4",
+        "--enforce-eager",
+        "--trust-remote-code",
+        "--gpu-memory-utilization",
+        "0.85",
+        "--limit-mm-per-prompt",
+        json.dumps({"audio": 1}),
+        "--enable-prompt-embeds",
+        "--enable-mm-embeds",
+    ]
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_server(qwen2audio_server_args):
+    with RemoteOpenAIServer(
+        QWEN2AUDIO_MODEL,
+        qwen2audio_server_args,
+        max_wait_seconds=600,
+    ) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def qwen2audio_client(qwen2audio_server):
+    async with qwen2audio_server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_hidden_size() -> int:
+    config = AutoConfig.from_pretrained(QWEN2AUDIO_MODEL, trust_remote_code=True)
+    return config.text_config.hidden_size
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_prompt_embeds_b64(qwen2audio_hidden_size: int) -> str:
+    tensor = torch.randn(4, qwen2audio_hidden_size, dtype=QWEN2AUDIO_DTYPE)
+    return tensor2base64(tensor)
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_audio_embeds_b64(qwen2audio_hidden_size: int) -> str:
+    # Shape matches the `audio_embeds` unit-test fixture.
+    torch.manual_seed(0)
+    tensor = torch.randn(1, 128, qwen2audio_hidden_size, dtype=QWEN2AUDIO_DTYPE)
+    return tensor2base64(tensor)
+
+
+@pytest.mark.asyncio
+async def test_prompt_embeds_plus_audio_embeds(
+    qwen2audio_client: openai.AsyncOpenAI,
+    qwen2audio_prompt_embeds_b64: str,
+    qwen2audio_audio_embeds_b64: str,
+):
+    """Single user message carrying both prompt_embeds and audio_embeds parts."""
+    chat = await qwen2audio_client.chat.completions.create(
+        model=QWEN2AUDIO_MODEL,
+        max_tokens=5,
+        temperature=0.0,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "prompt_embeds",
+                        "data": qwen2audio_prompt_embeds_b64,
+                    },
+                    {
+                        "type": "audio_embeds",
+                        "audio_embeds": qwen2audio_audio_embeds_b64,
+                    },
+                    {"type": "text", "text": "Continue."},
+                ],
+            }
+        ],
+    )
+    assert chat.choices[0].message.content is not None
+    assert len(chat.choices[0].message.content) > 0
+
+
+@pytest.fixture(scope="module")
+def qwen2audio_aligned_content_and_embeds_b64() -> tuple[str, str]:
+    """Return `(content, base64_embeds)` where the embeddings are the model's
+    embedding of `content` tokenized WITHOUT special tokens.
+
+    Loads only the `embed_tokens` shard from disk on CPU (~1.1 GB of host
+    RAM) instead of the full 7B model on GPU.
+    """
+    content = "Describe this audio."
+    tokenizer = AutoTokenizer.from_pretrained(QWEN2AUDIO_MODEL, trust_remote_code=True)
+
+    index_path = hf_hub_download(QWEN2AUDIO_MODEL, "model.safetensors.index.json")
+    with open(index_path) as f:
+        weight_map = json.load(f)["weight_map"]
+    embed_key = next(k for k in weight_map if k.endswith("embed_tokens.weight"))
+    shard_path = hf_hub_download(QWEN2AUDIO_MODEL, weight_map[embed_key])
+    with safetensors.safe_open(shard_path, framework="pt", device="cpu") as f:
+        embed_weight = f.get_tensor(embed_key)
+    embed_layer = nn.Embedding.from_pretrained(embed_weight.to(QWEN2AUDIO_DTYPE))
+
+    ids = tokenizer(content, add_special_tokens=False, return_tensors="pt").input_ids
+    embeds = embed_layer(ids).squeeze(0)
+    return content, tensor2base64(embeds)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "audio_first",
+    [True, False],
+    ids=["audio_embeds-then-text", "text-then-audio_embeds"],
+)
+async def test_text_content_and_prompt_embeds_match_with_audio_embeds(
+    qwen2audio_client: openai.AsyncOpenAI,
+    qwen2audio_audio_embeds_b64: str,
+    qwen2audio_aligned_content_and_embeds_b64: tuple[str, str],
+    audio_first: bool,
+):
+    """Same content as text vs `prompt_embeds` should yield identical Chat
+    Completions output when mixed with `audio_embeds` in the same message.
+    """
+    content, encoded_text_embeds = qwen2audio_aligned_content_and_embeds_b64
+
+    audio_part = {
+        "type": "audio_embeds",
+        "audio_embeds": qwen2audio_audio_embeds_b64,
+    }
+    text_part = {"type": "text", "text": content}
+    embeds_part = {"type": "prompt_embeds", "data": encoded_text_embeds}
+
+    if audio_first:
+        text_content = [audio_part, text_part]
+        embeds_content = [audio_part, embeds_part]
+    else:
+        text_content = [text_part, audio_part]
+        embeds_content = [embeds_part, audio_part]
+
+    text_resp = await qwen2audio_client.chat.completions.create(
+        model=QWEN2AUDIO_MODEL,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": text_content}],
+    )
+    embeds_resp = await qwen2audio_client.chat.completions.create(
+        model=QWEN2AUDIO_MODEL,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": embeds_content}],
+    )
+
+    text_out = text_resp.choices[0].message.content
+    embeds_out = embeds_resp.choices[0].message.content
+    assert text_out is not None and len(text_out) > 0
+    assert embeds_out is not None and len(embeds_out) > 0
+    assert text_out == embeds_out
diff --git a/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_image_embeds.py b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_image_embeds.py
new file mode 100644
index 000000000000..dbbed3c47127
--- /dev/null
+++ b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_mixed_image_embeds.py
@@ -0,0 +1,212 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""E2E tests for mixing `prompt_embeds` with image content parts in a single
+Chat Completions request.
+"""
+
+import json
+
+import openai
+import pytest
+import pytest_asyncio
+import safetensors
+import torch
+import torch.nn as nn
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer
+
+from tests.utils import RemoteOpenAIServer
+from vllm.assets.image import ImageAsset
+from vllm.multimodal.utils import encode_image_url
+from vllm.utils.serial_utils import tensor2base64
+
+MODEL_NAME = "Qwen/Qwen2-VL-2B-Instruct"
+
+# Use the model's native dtype to skip the implicit cast inside
+# `safe_load_prompt_embeds` (mismatched floating-point dtypes are cast to the
+# model's dtype automatically).
+MODEL_DTYPE = torch.bfloat16
+
+
+@pytest.fixture(scope="module")
+def server_args() -> list[str]:
+    return [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "4",
+        "--enforce-eager",
+        "--gpu-memory-utilization",
+        "0.4",
+        "--limit-mm-per-prompt",
+        json.dumps({"image": 1}),
+        "--enable-prompt-embeds",
+        "--enable-mm-embeds",
+    ]
+
+
+@pytest.fixture(scope="module")
+def server(server_args):
+    with RemoteOpenAIServer(
+        MODEL_NAME,
+        server_args,
+        max_wait_seconds=600,
+    ) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.fixture(scope="module")
+def image_url() -> str:
+    """Stable real image as a data URL, kept identical across both the
+    text and prompt_embeds requests so any output difference must come from
+    how the text content is delivered."""
+    return encode_image_url(ImageAsset("stop_sign").pil_image)
+
+
+@pytest.fixture(scope="module")
+def aligned_content_and_embeds_b64() -> tuple[str, str]:
+    """`(content, base64_embeds)` where the embeddings are the model's
+    embedding of `content` tokenized WITHOUT special tokens.
+
+    Loads only the `embed_tokens` shard from disk on CPU instead of the full
+    model on GPU, so the fixture has zero VRAM footprint and won't contend
+    with the running vLLM server.
+    """
+    content = "Describe this image."
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+
+    index_path = hf_hub_download(MODEL_NAME, "model.safetensors.index.json")
+    with open(index_path) as f:
+        weight_map = json.load(f)["weight_map"]
+    embed_key = next(k for k in weight_map if k.endswith("embed_tokens.weight"))
+    shard_path = hf_hub_download(MODEL_NAME, weight_map[embed_key])
+    with safetensors.safe_open(shard_path, framework="pt", device="cpu") as f:
+        embed_weight = f.get_tensor(embed_key)
+    embed_layer = nn.Embedding.from_pretrained(embed_weight.to(MODEL_DTYPE))
+
+    ids = tokenizer(content, add_special_tokens=False, return_tensors="pt").input_ids
+    embeds = embed_layer(ids).squeeze(0)
+    return content, tensor2base64(embeds)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "image_first",
+    [True, False],
+    ids=["image_url-then-text", "text-then-image_url"],
+)
+async def test_text_content_and_prompt_embeds_match_with_image_url(
+    client: openai.AsyncOpenAI,
+    image_url: str,
+    aligned_content_and_embeds_b64: tuple[str, str],
+    image_first: bool,
+):
+    """Same content as text vs `prompt_embeds` should yield identical Chat
+    Completions output when mixed with an `image_url` part in the same
+    message under greedy decoding.
+    """
+    content, encoded_text_embeds = aligned_content_and_embeds_b64
+
+    image_part = {"type": "image_url", "image_url": {"url": image_url}}
+    text_part = {"type": "text", "text": content}
+    embeds_part = {"type": "prompt_embeds", "data": encoded_text_embeds}
+
+    if image_first:
+        text_content = [image_part, text_part]
+        embeds_content = [image_part, embeds_part]
+    else:
+        text_content = [text_part, image_part]
+        embeds_content = [embeds_part, image_part]
+
+    text_resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": text_content}],
+    )
+    embeds_resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": embeds_content}],
+    )
+
+    text_out = text_resp.choices[0].message.content
+    embeds_out = embeds_resp.choices[0].message.content
+    assert text_out is not None and len(text_out) > 0
+    assert embeds_out is not None and len(embeds_out) > 0
+    assert text_out == embeds_out
+
+
+@pytest.fixture(scope="module")
+def image_embeds_b64() -> dict[str, str]:
+    """Synthetic but stable `image_embeds` for Qwen2-VL."""
+    grid = (1, 4, 4)
+    spatial_merge_size = 2
+    num_patches = (grid[1] // spatial_merge_size) * (grid[2] // spatial_merge_size)
+    text_hidden_size = 1536  # Qwen2-VL-2B
+    torch.manual_seed(0)
+    return {
+        "image_embeds": tensor2base64(
+            torch.randn(num_patches, text_hidden_size, dtype=MODEL_DTYPE)
+        ),
+        "image_grid_thw": tensor2base64(torch.tensor(grid)),
+    }
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "image_first",
+    [True, False],
+    ids=["image_embeds-then-text", "text-then-image_embeds"],
+)
+async def test_text_content_and_prompt_embeds_match_with_image_embeds(
+    client: openai.AsyncOpenAI,
+    image_embeds_b64: dict[str, str],
+    aligned_content_and_embeds_b64: tuple[str, str],
+    image_first: bool,
+):
+    """Same content as text vs `prompt_embeds` should yield identical Chat
+    Completions output when mixed with a precomputed `image_embeds` part in
+    the same message under greedy decoding.
+    """
+    content, encoded_text_embeds = aligned_content_and_embeds_b64
+
+    image_part = {"type": "image_embeds", "image_embeds": image_embeds_b64}
+    text_part = {"type": "text", "text": content}
+    embeds_part = {"type": "prompt_embeds", "data": encoded_text_embeds}
+
+    if image_first:
+        text_content = [image_part, text_part]
+        embeds_content = [image_part, embeds_part]
+    else:
+        text_content = [text_part, image_part]
+        embeds_content = [embeds_part, image_part]
+
+    text_resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": text_content}],
+    )
+    embeds_resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=10,
+        temperature=0.0,
+        messages=[{"role": "user", "content": embeds_content}],
+    )
+
+    text_out = text_resp.choices[0].message.content
+    embeds_out = embeds_resp.choices[0].message.content
+    assert text_out is not None and len(text_out) > 0
+    assert embeds_out is not None and len(embeds_out) > 0
+    assert text_out == embeds_out
diff --git a/tests/entrypoints/openai/chat_completion/test_chat_completion_with_prompt_embeds.py b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_prompt_embeds.py
new file mode 100644
index 000000000000..1813d74798de
--- /dev/null
+++ b/tests/entrypoints/openai/chat_completion/test_chat_completion_with_prompt_embeds.py
@@ -0,0 +1,293 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""E2E tests for `prompt_embeds` content parts in the Chat Completions API."""
+
+import asyncio
+import io
+
+import openai
+import pybase64 as base64
+import pytest
+import pytest_asyncio
+import torch
+from openai import BadRequestError
+
+from tests.utils import VLLM_PATH, RemoteOpenAIServer
+
+MODEL_NAME = "facebook/opt-125m"
+CHAT_TEMPLATE = VLLM_PATH / "examples/template_chatml.jinja"
+# Matches `--dtype` in `server_args` to avoid an implicit cast in
+# `safe_load_prompt_embeds` (mismatched floating-point dtypes are cast to the
+# model's dtype automatically, we match here just to skip the conversion).
+SERVER_DTYPE: torch.dtype = torch.bfloat16
+
+
+@pytest.fixture(scope="module")
+def server_args() -> list[str]:
+    return [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "128",
+        "--enforce-eager",
+        "--chat-template",
+        str(CHAT_TEMPLATE),
+        # Prompt Embeds server args
+        "--enable-prompt-embeds",
+    ]
+
+
+@pytest.fixture(scope="module")
+def server(server_args):
+    with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+def _encode_embeds(embeds: torch.Tensor) -> str:
+    buf = io.BytesIO()
+    torch.save(embeds, buf)
+    return base64.b64encode(buf.getvalue()).decode("utf-8")
+
+
+@pytest.fixture(scope="module")
+def prompt_embeds_b64(hf_runner) -> list[str]:
+    """Pre-compute embeddings for two short prompts and return as base64."""
+    prompts = ["Hello, my name is", "What is an LLM?"]
+    with hf_runner(MODEL_NAME) as hf_model:
+        embeddings = hf_model.get_prompt_embeddings(prompts)
+    # Cast to the server's dtype so `safe_load_prompt_embeds` doesn't need to
+    # convert on its own, the function accepts any floating-point dtype and
+    # will cast to the model's dtype, but matching up front skips the work.
+    return [_encode_embeds(e.to(SERVER_DTYPE)) for e in embeddings]
+
+
+@pytest.mark.asyncio
+async def test_single_prompt_embeds_part(
+    client: openai.AsyncOpenAI,
+    prompt_embeds_b64: list[str],
+):
+    """A user message with one prompt_embeds part + text."""
+    b64 = prompt_embeds_b64[0]
+    chat = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=5,
+        temperature=0.0,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "prompt_embeds", "data": b64},
+                    {"type": "text", "text": "Continue:"},
+                ],
+            }
+        ],
+    )
+    assert chat.choices[0].message.content is not None
+    assert len(chat.choices[0].message.content) > 0
+
+
+@pytest.mark.asyncio
+async def test_multiple_prompt_embeds_parts(
+    client: openai.AsyncOpenAI,
+    prompt_embeds_b64: list[str],
+):
+    """Multiple prompt_embeds parts in a single message."""
+    b64_a, b64_b = prompt_embeds_b64
+    chat = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=5,
+        temperature=0.0,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "prompt_embeds", "data": b64_a},
+                    {"type": "text", "text": " and "},
+                    {"type": "prompt_embeds", "data": b64_b},
+                ],
+            }
+        ],
+    )
+    assert chat.choices[0].message.content is not None
+    assert len(chat.choices[0].message.content) > 0
+
+
+@pytest.mark.asyncio
+async def test_multi_message_conversation(
+    client: openai.AsyncOpenAI,
+    prompt_embeds_b64: list[str],
+):
+    """prompt_embeds in both system and user messages."""
+    b64_sys, b64_usr = prompt_embeds_b64
+    chat = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=5,
+        temperature=0.0,
+        messages=[
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": "You are helpful."},
+                    {"type": "prompt_embeds", "data": b64_sys},
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "prompt_embeds", "data": b64_usr},
+                    {"type": "text", "text": "Summarize."},
+                ],
+            },
+        ],
+    )
+    assert chat.choices[0].message.content is not None
+    assert len(chat.choices[0].message.content) > 0
+
+
+@pytest.mark.asyncio
+async def test_streaming(
+    client: openai.AsyncOpenAI,
+    prompt_embeds_b64: list[str],
+):
+    """Streaming chat completion with prompt_embeds."""
+    b64 = prompt_embeds_b64[0]
+
+    # Non-streaming baseline.
+    baseline = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=5,
+        temperature=0.0,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "prompt_embeds", "data": b64},
+                    {"type": "text", "text": "Continue:"},
+                ],
+            }
+        ],
+    )
+    expected = baseline.choices[0].message.content
+
+    # Streaming.
+    stream = await client.chat.completions.create(
+        model=MODEL_NAME,
+        max_tokens=5,
+        temperature=0.0,
+        stream=True,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "prompt_embeds", "data": b64},
+                    {"type": "text", "text": "Continue:"},
+                ],
+            }
+        ],
+    )
+    chunks: list[str] = []
+    async for chunk in stream:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            chunks.append(delta)
+    assert "".join(chunks) == expected
+
+
+@pytest.fixture(scope="module")
+def aligned_content_and_embeds_b64(hf_runner) -> tuple[str, str]:
+    """Return `(content, base64_embeds)` where the embeddings are the model's
+    embedding of `content` tokenized WITHOUT special tokens.
+    """
+    content = "Hello, my name is"
+    with hf_runner(MODEL_NAME) as hf_model:
+        ids = hf_model.tokenizer(
+            content, add_special_tokens=False, return_tensors="pt"
+        ).input_ids
+        ids = hf_model.wrap_device({"input_ids": ids})["input_ids"]
+        embed_layer = hf_model.model.get_input_embeddings()
+        embeds = embed_layer(ids).squeeze(0).to(SERVER_DTYPE).cpu()
+    return content, _encode_embeds(embeds)
+
+
+@pytest.mark.asyncio
+async def test_text_content_and_prompt_embeds_match(
+    client: openai.AsyncOpenAI,
+    aligned_content_and_embeds_b64: tuple[str, str],
+):
+    """Equal content in text and `prompt_embeds` should yield identical
+    Chat Completions output under greedy decoding.
+    """
+    content, encoded_embeds = aligned_content_and_embeds_b64
+
+    text_resp, embeds_resp = await asyncio.gather(
+        client.chat.completions.create(
+            model=MODEL_NAME,
+            max_tokens=10,
+            temperature=0.0,
+            messages=[{"role": "user", "content": content}],
+        ),
+        client.chat.completions.create(
+            model=MODEL_NAME,
+            max_tokens=10,
+            temperature=0.0,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [{"type": "prompt_embeds", "data": encoded_embeds}],
+                }
+            ],
+        ),
+    )
+
+    text_out = text_resp.choices[0].message.content
+    embeds_out = embeds_resp.choices[0].message.content
+    assert text_out is not None and len(text_out) > 0
+    assert embeds_out is not None and len(embeds_out) > 0
+    assert text_out == embeds_out
+
+
+@pytest.mark.asyncio
+async def test_missing_data_field(
+    client: openai.AsyncOpenAI,
+):
+    """A prompt_embeds part without `data` should return a clear error."""
+    with pytest.raises(BadRequestError):
+        await client.chat.completions.create(
+            model=MODEL_NAME,
+            max_tokens=5,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [{"type": "prompt_embeds"}],
+                }
+            ],
+        )
+
+
+@pytest.mark.asyncio
+async def test_invalid_base64(
+    client: openai.AsyncOpenAI,
+):
+    """Invalid base64 in the `data` field should return a clear error."""
+    with pytest.raises(BadRequestError):
+        await client.chat.completions.create(
+            model=MODEL_NAME,
+            max_tokens=5,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "prompt_embeds", "data": "not_valid_base64!!"},
+                    ],
+                }
+            ],
+        )
diff --git a/tests/entrypoints/openai/chat_completion/test_chat_error.py b/tests/entrypoints/openai/chat_completion/test_chat_error.py
index 46070e4810be..582e0792156c 100644
--- a/tests/entrypoints/openai/chat_completion/test_chat_error.py
+++ b/tests/entrypoints/openai/chat_completion/test_chat_error.py
@@ -87,7 +87,6 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
     serving_render = OpenAIServingRender(
         model_config=engine.model_config,
         renderer=engine.renderer,
-        io_processor=engine.io_processor,
         model_registry=models.registry,
         request_logger=None,
         chat_template=None,
@@ -123,7 +122,6 @@ async def test_chat_error_non_stream():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
@@ -166,6 +164,58 @@ async def mock_generate(*args, **kwargs):
         await serving_chat.create_chat_completion(request)
 
 
+@pytest.mark.asyncio
+async def test_openai_chat_keeps_mm_cache_for_engine_execution():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    serving_chat = _build_serving_chat(mock_engine)
+
+    request = ChatCompletionRequest(
+        model=MODEL_NAME,
+        messages=[{"role": "user", "content": "Test prompt"}],
+    )
+
+    result = await serving_chat.render_chat_request(request)
+
+    assert isinstance(result, tuple)
+    assert (
+        serving_chat.openai_serving_render.preprocess_chat.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is False
+    )
+
+
+@pytest.mark.asyncio
+async def test_renderer_only_chat_request_skips_mm_cache():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    serving_chat = _build_serving_chat(mock_engine)
+
+    request = ChatCompletionRequest(
+        model=MODEL_NAME,
+        messages=[{"role": "user", "content": "Test prompt"}],
+    )
+
+    result = await serving_chat.openai_serving_render.render_chat_request(request)
+
+    assert result.token_ids == [1, 2, 3]
+    assert (
+        serving_chat.openai_serving_render.preprocess_chat.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is True
+    )
+
+
 @pytest.mark.asyncio
 async def test_chat_error_stream():
     """test finish_reason='error' returns 500 InternalServerError (streaming)"""
@@ -173,7 +223,6 @@ async def test_chat_error_stream():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
diff --git a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
index 965b21351302..839793fde856 100644
--- a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
@@ -518,7 +518,13 @@ async def test_inconsistent_tool_choice_and_tools(
 
 
 @pytest.mark.asyncio
-async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
+@pytest.mark.parametrize(
+    "tool_choice",
+    ["required", {"type": "function", "function": {"name": "get_current_weather"}}],
+)
+async def test_max_tokens_with_tool_choice_required(
+    client: openai.AsyncOpenAI, tool_choice
+):
     """ """
     models = await client.models.list()
     model_name: str = models.data[0].id
@@ -530,7 +536,7 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
         max_completion_tokens=1,
         model=model_name,
         tools=tools,
-        tool_choice="required",
+        tool_choice=tool_choice,
     )
     # When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`,
     # both `tool_calls` and `content` should be empty.
@@ -538,4 +544,3 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
     choice = chat_completion.choices[0]
     assert choice.finish_reason == "length"
     assert len(choice.message.tool_calls) == 0
-    assert choice.message.content == ""
diff --git a/tests/entrypoints/openai/chat_completion/test_enable_force_include_usage.py b/tests/entrypoints/openai/chat_completion/test_enable_force_include_usage.py
index 1bc545e86464..75ddeb43ab77 100644
--- a/tests/entrypoints/openai/chat_completion/test_enable_force_include_usage.py
+++ b/tests/entrypoints/openai/chat_completion/test_enable_force_include_usage.py
@@ -67,58 +67,3 @@ async def test_chat_with_enable_force_include_usage(
             chunk.usage.prompt_tokens + chunk.usage.completion_tokens
         )
         last_completion_tokens = chunk.usage.completion_tokens
-
-
-@pytest.fixture(scope="module")
-def transcription_server_with_force_include_usage():
-    args = [
-        # use half precision for speed and memory savings in CI environment
-        "--dtype",
-        "bfloat16",
-        "--max-num-seqs",
-        "4",
-        "--enforce-eager",
-        "--enable-force-include-usage",
-        "--gpu-memory-utilization",
-        "0.2",
-    ]
-
-    with RemoteOpenAIServer("openai/whisper-large-v3-turbo", args) as remote_server:
-        yield remote_server
-
-
-@pytest_asyncio.fixture
-async def transcription_client_with_force_include_usage(
-    transcription_server_with_force_include_usage,
-):
-    async with (
-        transcription_server_with_force_include_usage.get_async_client() as async_client
-    ):
-        yield async_client
-
-
-@pytest.mark.asyncio
-async def test_transcription_with_enable_force_include_usage(
-    transcription_client_with_force_include_usage, winning_call
-):
-    res = (
-        await transcription_client_with_force_include_usage.audio.transcriptions.create(
-            model="openai/whisper-large-v3-turbo",
-            file=winning_call,
-            language="en",
-            temperature=0.0,
-            stream=True,
-            timeout=30,
-        )
-    )
-
-    async for chunk in res:
-        if not len(chunk.choices):
-            # final usage sent
-            usage = chunk.usage
-            assert isinstance(usage, dict)
-            assert usage["prompt_tokens"] > 0
-            assert usage["completion_tokens"] > 0
-            assert usage["total_tokens"] > 0
-        else:
-            assert not hasattr(chunk, "usage")
diff --git a/tests/entrypoints/openai/chat_completion/test_serving_chat.py b/tests/entrypoints/openai/chat_completion/test_serving_chat.py
index 89bb8a1fe9c6..45fae821af3b 100644
--- a/tests/entrypoints/openai/chat_completion/test_serving_chat.py
+++ b/tests/entrypoints/openai/chat_completion/test_serving_chat.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
+import json
 from contextlib import suppress
 from dataclasses import dataclass, field
 from typing import Any
@@ -537,6 +538,7 @@ class MockModelConfig:
     is_encoder_decoder: bool = False
     is_multimodal_model: bool = False
     renderer_num_workers: int = 1
+    enable_prompt_embeds: bool = False
 
     def get_diff_sampling_param(self):
         return self.diff_sampling_param or {}
@@ -566,7 +568,6 @@ def _build_serving_render(
     return OpenAIServingRender(
         model_config=engine.model_config,
         renderer=engine.renderer,
-        io_processor=engine.io_processor,
         model_registry=model_registry,
         request_logger=None,
         chat_template=CHAT_TEMPLATE,
@@ -598,8 +599,8 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
 class MockEngine:
     model_config: MockModelConfig = field(default_factory=MockModelConfig)
     input_processor: MagicMock = field(default_factory=MagicMock)
-    io_processor: MagicMock = field(default_factory=MagicMock)
     renderer: MagicMock = field(default_factory=MagicMock)
+    errored: bool = False
 
 
 async def _async_serving_chat_init():
@@ -631,7 +632,6 @@ async def test_serving_chat_returns_correct_model_name():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
@@ -661,7 +661,6 @@ async def test_serving_chat_should_set_correct_max_tokens():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
@@ -692,7 +691,6 @@ async def test_serving_chat_should_set_correct_max_tokens():
     mock_engine.errored = False
     mock_engine.model_config = mock_model_config
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     # Initialize the serving chat
@@ -736,7 +734,6 @@ async def test_serving_chat_should_set_correct_max_tokens():
     mock_engine.errored = False
     mock_engine.model_config = mock_model_config
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
@@ -778,7 +775,6 @@ async def test_serving_chat_should_set_correct_max_tokens():
     mock_engine.errored = False
     mock_engine.model_config = mock_model_config
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     # Initialize the serving chat
@@ -812,6 +808,101 @@ async def test_serving_chat_should_set_correct_max_tokens():
     assert mock_engine.generate.call_args.args[1].max_tokens == 5
 
 
+@pytest.mark.asyncio
+async def test_serving_chat_truncate_prompt_tokens_max_token_accounting():
+    """When truncate_prompt_tokens is set, max_tokens must be calculated using
+    the truncated prompt length, not the original prompt length.
+
+    Regression: without the fix, get_max_tokens received the untruncated prompt
+    length, causing the output budget to be underestimated.
+    """
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    serving_chat = _build_serving_chat(mock_engine)
+
+    # "what is 1+1?" tokenizes to 7 tokens with the test chat template
+    # (max_model_len=100 -> max_tokens = 93 without truncation, confirmed by
+    # test_serving_chat_should_set_correct_max_tokens above).
+    messages = [{"role": "user", "content": "what is 1+1?"}]
+
+    # Baseline: no truncation -> max_tokens = 100 - 7 = 93.
+    req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
+    with suppress(Exception):
+        await serving_chat.create_chat_completion(req)
+    assert mock_engine.generate.call_args.args[1].max_tokens == 93
+
+    # With truncate_prompt_tokens=5 (less than 7): the effective prompt length
+    # is 5, so max_tokens should be 100 - 5 = 95, not 93.
+    req = ChatCompletionRequest(
+        model=MODEL_NAME,
+        messages=messages,
+        truncate_prompt_tokens=5,
+    )
+    with suppress(Exception):
+        await serving_chat.create_chat_completion(req)
+    assert mock_engine.generate.call_args.args[1].max_tokens == 95
+
+    # With truncate_prompt_tokens=-1 (meaning use full max_model_len as the
+    # truncation limit, i.e., no practical truncation vs the window): effective
+    # length = min(7, 100) = 7 -> max_tokens = 93 again.
+    req = ChatCompletionRequest(
+        model=MODEL_NAME,
+        messages=messages,
+        truncate_prompt_tokens=-1,
+    )
+    with suppress(Exception):
+        await serving_chat.create_chat_completion(req)
+    assert mock_engine.generate.call_args.args[1].max_tokens == 93
+
+
+@pytest.mark.asyncio
+async def test_serving_chat_truncation_side_controls_prompt_truncation():
+    model_config = MockModelConfig()
+    model_config.model = MODEL_NAME_SHORT
+    model_config.tokenizer = MODEL_NAME_SHORT
+    mock_engine = MockEngine(
+        model_config=model_config,
+        renderer=_build_renderer(model_config),
+    )
+
+    serving_chat = _build_serving_chat(mock_engine)
+    messages = [
+        {
+            "role": "user",
+            "content": "Summarize how prompt truncation works in one sentence.",
+        }
+    ]
+
+    full_token_ids = await _render_chat_prompt_token_ids(
+        serving_chat,
+        messages,
+        model_name=MODEL_NAME_SHORT,
+    )
+    assert len(full_token_ids) > 4
+
+    right_token_ids = await _render_chat_prompt_token_ids(
+        serving_chat,
+        messages,
+        model_name=MODEL_NAME_SHORT,
+        truncate_prompt_tokens=4,
+        truncation_side="right",
+    )
+    assert right_token_ids == full_token_ids[:4]
+
+    left_token_ids = await _render_chat_prompt_token_ids(
+        serving_chat,
+        messages,
+        model_name=MODEL_NAME_SHORT,
+        truncate_prompt_tokens=4,
+        truncation_side="left",
+    )
+    assert left_token_ids == full_token_ids[-4:]
+
+
 @pytest.mark.asyncio
 async def test_serving_chat_mistral_token_ids_prompt_is_validated():
     """Regression test: when the Mistral tokenizer path returns token IDs
@@ -822,7 +913,6 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig(skip_tokenizer_init=True)
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
 
     mock_tokenizer = MagicMock(spec=MistralTokenizer)
     mock_renderer = MistralRenderer(
@@ -862,7 +952,6 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig(skip_tokenizer_init=True)
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
 
     mock_tokenizer = MagicMock(spec=MistralTokenizer)
     mock_renderer = MistralRenderer(
@@ -905,7 +994,6 @@ async def test_serving_chat_could_load_correct_generation_config():
     mock_engine.errored = False
     mock_engine.model_config = mock_model_config
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     # Initialize the serving chat
@@ -951,7 +1039,6 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
     mock_engine.errored = False
     mock_engine.model_config = mock_model_config
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_chat = _build_serving_chat(mock_engine)
@@ -1002,7 +1089,6 @@ async def test_serving_chat_data_parallel_rank_extraction():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     # Mock the generate method to return an async generator
@@ -1070,6 +1156,37 @@ async def mock_generate(*args, **kwargs):
     assert mock_engine.generate.call_args.kwargs["data_parallel_rank"] is None
 
 
+async def _render_chat_prompt_token_ids(
+    serving_chat: OpenAIServingChat,
+    messages: list[dict[str, str]],
+    *,
+    model_name: str = MODEL_NAME,
+    truncate_prompt_tokens: int | None = None,
+    truncation_side: str | None = None,
+) -> list[int]:
+    request = ChatCompletionRequest(
+        model=model_name,
+        messages=messages,
+        max_tokens=1,
+        temperature=0.0,
+        return_token_ids=True,
+        truncate_prompt_tokens=truncate_prompt_tokens,
+        truncation_side=truncation_side,
+    )
+
+    result = await serving_chat.render_chat_request(request)
+    assert not isinstance(result, ErrorResponse)
+
+    _, engine_inputs = result
+    assert len(engine_inputs) == 1
+
+    prompt_token_ids = serving_chat._extract_prompt_components(
+        engine_inputs[0]
+    ).token_ids
+    assert prompt_token_ids is not None
+    return prompt_token_ids
+
+
 class TestServingChatWithHarmony:
     """
     These tests ensure Chat Completion requests are being properly converted into
@@ -1094,7 +1211,6 @@ def mock_engine(self) -> AsyncLLM:
         mock_engine.errored = False
         mock_engine.model_config = MockModelConfig()
         mock_engine.input_processor = MagicMock()
-        mock_engine.io_processor = MagicMock()
         mock_engine.renderer = _build_renderer(mock_engine.model_config)
         return mock_engine
 
@@ -1731,7 +1847,6 @@ async def test_tool_choice_validation_without_parser():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     models = OpenAIServingModels(
@@ -1792,6 +1907,169 @@ async def test_tool_choice_validation_without_parser():
     assert "--tool-call-parser" in response_named.error.message
 
 
+@pytest.mark.asyncio
+async def test_streaming_n_gt1_independent_tool_parsers():
+    """n>1 streaming must use independent parser instances
+    and token-id histories per choice.
+    """
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    models = OpenAIServingModels(
+        engine_client=mock_engine,
+        base_model_paths=BASE_MODEL_PATHS,
+    )
+    openai_serving_render = _build_serving_render(mock_engine, models.registry)
+
+    serving_chat = OpenAIServingChat(
+        mock_engine,
+        models,
+        response_role="assistant",
+        openai_serving_render=openai_serving_render,
+        chat_template=CHAT_TEMPLATE,
+        chat_template_content_format="auto",
+        request_logger=None,
+        enable_auto_tools=True,
+        tool_parser="hermes",
+    )
+
+    tokenizer = get_tokenizer(MODEL_NAME)
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+
+    num_choices = 2
+
+    request = ChatCompletionRequest(
+        model=MODEL_NAME,
+        messages=[{"role": "user", "content": "test"}],
+        n=num_choices,
+        stream=True,
+        tools=tools,
+        tool_choice="auto",
+    )
+
+    tool_call_text = (
+        "<tool_call>\n"
+        '{"name": "get_weather", "arguments": {"city": "Tokyo"}}\n'
+        "</tool_call>"
+    )
+    all_token_ids = tokenizer.encode(tool_call_text, add_special_tokens=False)
+
+    # Compute proper delta text for each token so that concatenated deltas
+    # reproduce the original string exactly.
+    steps: list[tuple[str, int]] = []
+    prev_decoded = ""
+    for i, tid in enumerate(all_token_ids):
+        decoded_so_far = tokenizer.decode(all_token_ids[: i + 1])
+        delta = decoded_so_far[len(prev_decoded) :]
+        steps.append((delta, tid))
+        prev_decoded = decoded_so_far
+
+    async def result_generator():
+        for delta_text, token_id in steps:
+            yield RequestOutput(
+                request_id="test-req",
+                prompt="test",
+                prompt_token_ids=[1, 2, 3],
+                prompt_logprobs=None,
+                outputs=[
+                    CompletionOutput(
+                        index=choice_idx,
+                        text=delta_text,
+                        token_ids=[token_id],
+                        cumulative_logprob=0.0,
+                        logprobs=None,
+                    )
+                    for choice_idx in range(num_choices)
+                ],
+                finished=False,
+            )
+        # Final output with finish_reason
+        yield RequestOutput(
+            request_id="test-req",
+            prompt="test",
+            prompt_token_ids=[1, 2, 3],
+            prompt_logprobs=None,
+            outputs=[
+                CompletionOutput(
+                    index=choice_idx,
+                    text="",
+                    token_ids=[],
+                    cumulative_logprob=0.0,
+                    logprobs=None,
+                    finish_reason="stop",
+                )
+                for choice_idx in range(num_choices)
+            ],
+            finished=True,
+        )
+
+    # Collect tool-call deltas per choice from the SSE stream.
+    tc_deltas_by_choice: dict[int, list[dict]] = {i: [] for i in range(num_choices)}
+    async for chunk_str in serving_chat.chat_completion_stream_generator(
+        request=request,
+        result_generator=result_generator(),
+        request_id="test-req",
+        model_name=MODEL_NAME,
+        conversation=[],
+        tokenizer=tokenizer,
+        request_metadata=RequestResponseMetadata(
+            request_id="test-req",
+            model_name=MODEL_NAME,
+        ),
+    ):
+        if not chunk_str.strip() or "data: [DONE]" in chunk_str:
+            continue
+        if chunk_str.startswith("data: "):
+            data = json.loads(chunk_str[6:].strip())
+            for choice in data.get("choices", []):
+                idx = choice["index"]
+                delta = choice.get("delta", {})
+                if delta.get("tool_calls"):
+                    for tc in delta["tool_calls"]:
+                        tc_deltas_by_choice[idx].append(tc)
+
+    # Both choices must independently produce the correct tool call.
+    for choice_idx in range(num_choices):
+        deltas = tc_deltas_by_choice[choice_idx]
+        assert len(deltas) > 0, (
+            f"Choice {choice_idx}: expected tool-call deltas but got none"
+        )
+
+        name = None
+        args_buf = ""
+        for tc in deltas:
+            fn = tc.get("function", {})
+            if fn.get("name"):
+                name = fn["name"]
+            if fn.get("arguments"):
+                args_buf += fn["arguments"]
+
+        assert name == "get_weather", (
+            f"Choice {choice_idx}: expected 'get_weather', got {name!r}"
+        )
+        parsed_args = json.loads(args_buf)
+        assert parsed_args == {"city": "Tokyo"}, (
+            f"Choice {choice_idx}: expected {{'city': 'Tokyo'}}, got {parsed_args}"
+        )
+
+
 class TestCreateRemainingArgsDelta:
     """Tests for _create_remaining_args_delta helper function.
 
diff --git a/tests/entrypoints/openai/chat_completion/test_serving_chat_stream_harmony.py b/tests/entrypoints/openai/chat_completion/test_serving_chat_stream_harmony.py
index 9f8c36f0473d..0a0802a79392 100644
--- a/tests/entrypoints/openai/chat_completion/test_serving_chat_stream_harmony.py
+++ b/tests/entrypoints/openai/chat_completion/test_serving_chat_stream_harmony.py
@@ -199,12 +199,107 @@ def test_returns_preambles_as_content(self):
         assert delta_message.content == delta_text
         assert tools_streamed is False
 
+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_new_tool_call_without_functions_prefix(
+        self, mock_make_tool_call_id, channel
+    ):
+        mock_make_tool_call_id.return_value = "call_bare123"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel=channel, recipient="get_weather", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id == "call_bare123"
+        assert tool_call.type == "function"
+        assert tool_call.function.name == "get_weather"
+        assert tool_call.function.arguments == ""
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    def test_tool_call_argument_streaming_without_functions_prefix(self, channel):
+        parser = MockStreamableParser()
+        args_text = '{"location": "Paris"}'
+
+        token_states = [
+            TokenState(channel=channel, recipient="get_weather", text=args_text)
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="get_weather",
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id is None
+        assert tool_call.function.arguments == args_text
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
+    def test_tool_call_index_from_previous_messages_without_functions_prefix(self):
+        messages = [
+            MockMessage(channel="commentary", recipient="tool1"),
+        ]
+        parser = MockStreamableParser(messages=messages)
+
+        token_states = [
+            TokenState(channel="commentary", recipient="tool2", text="args")
+        ]
+
+        delta_message, _ = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="tool2",
+            include_reasoning=False,
+        )
+
+        assert delta_message.tool_calls[0].index == 1
+
+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_new_tool_call_dotted_function_name(self, mock_make_tool_call_id, channel):
+        mock_make_tool_call_id.return_value = "call_dotted123"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel=channel, recipient="math.sum", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id == "call_dotted123"
+        assert tool_call.type == "function"
+        assert tool_call.function.name == "math.sum"
+        assert tool_call.function.arguments == ""
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
     @pytest.mark.parametrize(
         "channel,recipient",
         [
             (None, None),
             ("unknown_channel", None),
             ("commentary", "browser.search"),
+            ("commentary", "assistant"),
         ],
     )
     def test_returns_none_for_invalid_inputs(self, channel, recipient):
@@ -348,3 +443,92 @@ def test_tool_call_index_consistency_with_ongoing_call(self, mock_make_id):
         assert tool_c_args.function.arguments == '{"key_c": "val_c"}'
 
         assert delta_message.content == "Thinking... Thinking again..."
+
+
+class TestToolCallsOnNonStandardChannels:
+    """Tool calls are detected by recipient, not channel.
+
+    Models sometimes emit tool calls on unexpected channels (e.g. ``comment``
+    instead of ``commentary``).  These tests verify that the streaming delta
+    extraction is channel-agnostic for tool call detection.
+    """
+
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_prefixed_tool_call_on_comment_channel(self, mock_make_tool_call_id):
+        mock_make_tool_call_id.return_value = "call_comment_chan"
+        parser = MockStreamableParser()
+
+        token_states = [
+            TokenState(channel="comment", recipient="functions.get_weather", text="")
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        assert delta_message.tool_calls[0].function.name == "get_weather"
+        assert tools_streamed is True
+
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_bare_tool_call_on_comment_channel(self, mock_make_tool_call_id):
+        mock_make_tool_call_id.return_value = "call_bare_comment"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel="comment", recipient="get_weather", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        assert delta_message.tool_calls[0].function.name == "get_weather"
+        assert tools_streamed is True
+
+    def test_tool_call_arguments_on_comment_channel(self):
+        parser = MockStreamableParser()
+        args_text = '{"location": "Paris"}'
+
+        token_states = [
+            TokenState(
+                channel="comment", recipient="functions.get_weather", text=args_text
+            )
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="functions.get_weather",
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert delta_message.tool_calls[0].function.arguments == args_text
+        assert tools_streamed is True
+
+    def test_base_index_counts_tool_calls_on_comment_channel(self):
+        messages = [
+            MockMessage(channel="comment", recipient="functions.tool1"),
+        ]
+        parser = MockStreamableParser(messages=messages)
+
+        token_states = [
+            TokenState(channel="commentary", recipient="functions.tool2", text="args")
+        ]
+
+        delta_message, _ = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="functions.tool2",
+            include_reasoning=False,
+        )
+
+        assert delta_message.tool_calls[0].index == 1
diff --git a/tests/entrypoints/openai/chat_completion/test_thinking_token_budget.py b/tests/entrypoints/openai/chat_completion/test_thinking_token_budget.py
new file mode 100644
index 000000000000..ae2b597e13ac
--- /dev/null
+++ b/tests/entrypoints/openai/chat_completion/test_thinking_token_budget.py
@@ -0,0 +1,333 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""E2E tests for ``thinking_token_budget`` with reasoning models.
+
+Covers Qwen3-0.6B and Qwen3.5 FP8 + MTP.
+"""
+
+import asyncio
+import json
+from typing import Literal
+
+import openai
+import pytest
+import pytest_asyncio
+
+from tests.utils import RemoteOpenAIServer, multi_gpu_only, requires_fp8
+from vllm.platforms import current_platform
+from vllm.tokenizers import get_tokenizer
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+QWEN35_FP8_MTP_MODEL = "Qwen/Qwen3.5-35B-A3B-FP8"
+MESSAGES = [{"role": "user", "content": "What is 1+1? Be concise."}]
+THINK_BUDGET = 5
+
+REASONING_START_STR = "<think>"
+REASONING_END_STR = "</think>"
+
+
+def _count_reasoning_decode_token_ids_between_markers(
+    full_token_ids: list[int],
+    reasoning_start_ids: list[int],
+    reasoning_end_ids: list[int],
+) -> int | None:
+    """Count decode tokens in the thinking span (after last start, before first end)."""
+
+    if not reasoning_start_ids or not reasoning_end_ids:
+        raise ValueError("reasoning marker token id lists must be non-empty")
+
+    def _last_subseq_index(haystack: list[int], needle: list[int]) -> int:
+        n = len(needle)
+        if n > len(haystack):
+            return -1
+        for i in range(len(haystack) - n, -1, -1):
+            if haystack[i : i + n] == needle:
+                return i
+        return -1
+
+    last_start = _last_subseq_index(full_token_ids, reasoning_start_ids)
+    if last_start < 0:
+        return None
+
+    pos_after_start = last_start + len(reasoning_start_ids)
+    end_n = len(reasoning_end_ids)
+    for j in range(pos_after_start, len(full_token_ids) - end_n + 1):
+        if full_token_ids[j : j + end_n] == reasoning_end_ids:
+            return j - pos_after_start
+    return len(full_token_ids) - pos_after_start
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--reasoning-parser",
+        "qwen3",
+        "--reasoning-config",
+        '{"reasoning_start_str": "<think>", "reasoning_end_str": "</think>"}',
+        "--max-model-len",
+        "2048",
+        "--enforce-eager",
+        "--gpu-memory-utilization",
+        "0.4",
+        "--no-async-scheduling",
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.fixture(scope="module")
+def server_with_auto_reasoning_config():
+    args = [
+        "--reasoning-parser",
+        "qwen3",
+        "--max-model-len",
+        "2048",
+        "--enforce-eager",
+        "--gpu-memory-utilization",
+        "0.4",
+        "--no-async-scheduling",
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.fixture(scope="module")
+def server_qwen35_fp8_mtp_tp2():
+    """Qwen3.5-35B FP8 with MTP speculative decoding and tensor parallel size 2."""
+    if current_platform.device_count() < 2:
+        pytest.skip("Need at least 2 GPUs for --tensor-parallel-size 2")
+    if not current_platform.supports_fp8():
+        pytest.skip("FP8 is not supported on this platform")
+
+    spec_cfg = {
+        "method": "mtp",
+        "num_speculative_tokens": 2,
+        "max_model_len": 32768,
+    }
+    args = [
+        "--tensor-parallel-size",
+        "2",
+        "--max-model-len",
+        "32768",
+        "--speculative-config",
+        json.dumps(spec_cfg),
+        "--reasoning-parser",
+        "qwen3",
+        "--reasoning-config",
+        json.dumps(
+            {
+                "reasoning_start_str": REASONING_START_STR,
+                "reasoning_end_str": REASONING_END_STR,
+            }
+        ),
+    ]
+    # With 4+ GPUs, run TP=2 on physical devices 2,3 so module-scoped 0.6B servers
+    # on 0,1 do not exhaust memory on the same devices as this worker.
+    env_dict = None
+    if current_platform.device_count() >= 4:
+        env_dict = {"CUDA_VISIBLE_DEVICES": "2,3"}
+
+    with RemoteOpenAIServer(
+        QWEN35_FP8_MTP_MODEL,
+        args,
+        max_wait_seconds=3000,
+        env_dict=env_dict,
+    ) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(request, server, server_with_auto_reasoning_config):
+    server_map = {
+        "default": server,
+        "auto_config": server_with_auto_reasoning_config,
+    }
+    target_server = server_map[request.param]
+    async with target_server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client", ["default", "auto_config"], indirect=True)
+async def test_thinking_token_budget_mixed_requests(client: openai.AsyncOpenAI):
+    """Test that mixed requests (some with thinking_token_budget, some without)
+    complete successfully without errors."""
+
+    response_with_budget = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=MESSAGES,
+        max_tokens=100,
+        extra_body={"thinking_token_budget": THINK_BUDGET},
+    )
+    response_without_budget = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=MESSAGES,
+        max_tokens=100,
+    )
+
+    msg_with = response_with_budget.choices[0].message
+    msg_without = response_without_budget.choices[0].message
+
+    assert msg_with.content or getattr(msg_with, "reasoning", None)
+    assert msg_without.content or getattr(msg_without, "reasoning", None)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client", ["default", "auto_config"], indirect=True)
+async def test_thinking_token_budget_limits_reasoning(client: openai.AsyncOpenAI):
+    """Test that thinking_token_budget limits the number of reasoning tokens.
+
+    Counts non-empty streaming ``delta.reasoning`` chunks (coarse proxy; each
+    chunk may represent multiple decode tokens — see
+    ``_count_reasoning_decode_token_ids_between_markers`` and the Qwen3.5 MTP
+    test for id-based checks).
+    """
+
+    reasoning_token_count = 0
+    stream = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=MESSAGES,
+        max_tokens=100,
+        stream=True,
+        extra_body={"thinking_token_budget": THINK_BUDGET},
+    )
+    async for chunk in stream:
+        delta = chunk.choices[0].delta
+        if getattr(delta, "reasoning", None):
+            reasoning_token_count += 1
+
+    assert reasoning_token_count == THINK_BUDGET, (
+        f"reasoning tokens ({reasoning_token_count}) exceeded "
+        f"thinking_token_budget ({THINK_BUDGET})"
+    )
+
+
+@pytest.mark.asyncio
+@multi_gpu_only(num_gpus=2)
+@requires_fp8
+async def test_thinking_token_budget_qwen35_fp8_mtp_concurrent_mixed_budget_and_plain(
+    server_qwen35_fp8_mtp_tp2,
+):
+    """Concurrent chat requests: some with ``thinking_token_budget``, some without.
+
+    Exercises the scheduler / input processor under a mixed batch on the same
+    Qwen3.5 FP8 + MTP (TP=2) server. Budgeted calls are checked with
+    ``_count_reasoning_decode_token_ids_between_markers`` on full token ids.
+    """
+
+    _batch_spec: list[tuple[Literal["budget"], int] | tuple[Literal["plain"], None]] = [
+        ("budget", 1),
+        ("budget", 12),
+        ("plain", None),
+        ("budget", 20),
+        ("budget", 14),
+        ("plain", None),
+        ("plain", None),
+        ("budget", 12),
+        ("plain", None),
+    ]
+
+    tokenizer = get_tokenizer(tokenizer_name=QWEN35_FP8_MTP_MODEL)
+    start_ids = list(tokenizer.encode(REASONING_START_STR, add_special_tokens=False))
+    end_ids = list(tokenizer.encode(REASONING_END_STR, add_special_tokens=False))
+
+    async with server_qwen35_fp8_mtp_tp2.get_async_client() as client:
+
+        async def budgeted_call(expected_budget: int):
+            return await client.chat.completions.create(
+                model=QWEN35_FP8_MTP_MODEL,
+                messages=MESSAGES,
+                max_tokens=256,
+                stream=False,
+                extra_body={
+                    "thinking_token_budget": expected_budget,
+                    "return_token_ids": True,
+                },
+            )
+
+        async def plain_call():
+            return await client.chat.completions.create(
+                model=QWEN35_FP8_MTP_MODEL,
+                messages=MESSAGES,
+                max_tokens=256,
+                stream=False,
+            )
+
+        coros = []
+        for row in _batch_spec:
+            if row[0] == "budget":
+                b = row[1]
+                assert isinstance(b, int)
+                coros.append(budgeted_call(b))
+            else:
+                coros.append(plain_call())
+        results = await asyncio.gather(*coros)
+
+    for i, (response, (kind, expected_budget)) in enumerate(
+        zip(results, _batch_spec, strict=True)
+    ):
+        msg = response.choices[0].message
+        assert msg.content or getattr(msg, "reasoning", None), (
+            f"index {i} ({kind}): empty message"
+        )
+
+        if kind == "budget":
+            assert expected_budget is not None
+            assert response.prompt_token_ids is not None
+            assert response.choices[0].token_ids is not None
+            full_ids = list(response.prompt_token_ids) + list(
+                response.choices[0].token_ids
+            )
+            n_reason = _count_reasoning_decode_token_ids_between_markers(
+                full_ids, start_ids, end_ids
+            )
+            assert n_reason is not None, f"index {i}: missing reasoning start in ids"
+            assert n_reason == expected_budget, (
+                f"index {i}: reasoning decode token ids ({n_reason}) != "
+                f"thinking_token_budget ({expected_budget})"
+            )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("client", ["default", "auto_config"], indirect=True)
+async def test_streaming_with_thinking_disabled_stays_in_content(
+    client: openai.AsyncOpenAI,
+):
+    request_kwargs = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": "Which is larger, 4 or 12?"
+                " Output exactly one token: 4 or 12.",
+            }
+        ],
+        "max_tokens": 16,
+        "temperature": 0.0,
+        "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
+    }
+
+    response = await client.chat.completions.create(**request_kwargs)
+    message = response.choices[0].message
+    assert message.content is not None and message.content.strip() != ""
+    assert getattr(message, "reasoning", None) in (None, "")
+
+    stream = await client.chat.completions.create(
+        **request_kwargs,
+        stream=True,
+    )
+
+    content_chunks = []
+    reasoning_chunks = []
+    async for chunk in stream:
+        if not chunk.choices:
+            continue
+        delta = chunk.choices[0].delta
+        if getattr(delta, "content", None):
+            content_chunks.append(delta.content)
+        if getattr(delta, "reasoning", None):
+            reasoning_chunks.append(delta.reasoning)
+
+    assert "".join(content_chunks).strip() != ""
+    assert reasoning_chunks == []
diff --git a/tests/entrypoints/openai/chat_completion/test_vision_embeds.py b/tests/entrypoints/openai/chat_completion/test_vision_embeds.py
index 574a8f1c86a9..da9787e3f89e 100644
--- a/tests/entrypoints/openai/chat_completion/test_vision_embeds.py
+++ b/tests/entrypoints/openai/chat_completion/test_vision_embeds.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import importlib.util
+
 import numpy as np
 import pybase64 as base64
 import pytest
@@ -10,7 +12,16 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.utils.serial_utils import tensor2base64
 
+# Prithvi requires terratorch, which is temporarily unavailable while PyPI has
+# `lightning` quarantined (#41376). Skip just the Prithvi case; leave the
+# Qwen3-VL case in the same file untouched.
+_TERRATORCH_AVAILABLE = importlib.util.find_spec("terratorch") is not None
+
 
+@pytest.mark.skipif(
+    not _TERRATORCH_AVAILABLE,
+    reason="terratorch unavailable while PyPI has `lightning` quarantined; see #41376",
+)
 @pytest.mark.parametrize(
     "model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
 )
diff --git a/tests/entrypoints/openai/completion/test_completion.py b/tests/entrypoints/openai/completion/test_completion.py
index bbb8c104f446..8ca0d1604b14 100644
--- a/tests/entrypoints/openai/completion/test_completion.py
+++ b/tests/entrypoints/openai/completion/test_completion.py
@@ -73,6 +73,43 @@ async def test_single_completion(client: openai.AsyncOpenAI, model_name: str) ->
     assert completion.choices[0].prompt_logprobs is None
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_completion_truncation_side_controls_prompt_truncation(
+    client: openai.AsyncOpenAI, model_name: str
+) -> None:
+    prompt_token_ids = list(range(8))
+
+    right_completion = await client.completions.create(
+        model=model_name,
+        prompt=prompt_token_ids,
+        max_tokens=1,
+        temperature=0.0,
+        extra_body={
+            "return_token_ids": True,
+            "truncate_prompt_tokens": 4,
+            "truncation_side": "right",
+        },
+    )
+    assert right_completion.choices[0].prompt_token_ids == prompt_token_ids[:4]
+
+    left_completion = await client.completions.create(
+        model=model_name,
+        prompt=prompt_token_ids,
+        max_tokens=1,
+        temperature=0.0,
+        extra_body={
+            "return_token_ids": True,
+            "truncate_prompt_tokens": 4,
+            "truncation_side": "left",
+        },
+    )
+    assert left_completion.choices[0].prompt_token_ids == prompt_token_ids[-4:]
+
+
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "model_name",
diff --git a/tests/entrypoints/openai/completion/test_completion_error.py b/tests/entrypoints/openai/completion/test_completion_error.py
index 46eb02e3c599..c95e47fa1b16 100644
--- a/tests/entrypoints/openai/completion/test_completion_error.py
+++ b/tests/entrypoints/openai/completion/test_completion_error.py
@@ -3,7 +3,7 @@
 
 from dataclasses import dataclass, field
 from typing import Any
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
@@ -79,7 +79,6 @@ def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion:
     serving_render = OpenAIServingRender(
         model_config=engine.model_config,
         renderer=engine.renderer,
-        io_processor=engine.io_processor,
         model_registry=models.registry,
         request_logger=None,
         chat_template=None,
@@ -107,7 +106,6 @@ async def test_completion_error_non_stream():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_completion = _build_serving_completion(mock_engine)
@@ -150,6 +148,66 @@ async def mock_generate(*args, **kwargs):
         await serving_completion.create_completion(request)
 
 
+@pytest.mark.asyncio
+async def test_openai_completion_keeps_mm_cache_for_engine_execution():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    serving_completion = _build_serving_completion(mock_engine)
+    serving_completion.openai_serving_render.preprocess_completion = AsyncMock(
+        return_value=[{"prompt_token_ids": [1, 2, 3]}]
+    )
+
+    request = CompletionRequest(
+        model=MODEL_NAME,
+        prompt="Test prompt",
+    )
+
+    result = await serving_completion.render_completion_request(request)
+
+    assert isinstance(result, list)
+    assert (
+        serving_completion.openai_serving_render.preprocess_completion.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is False
+    )
+
+
+@pytest.mark.asyncio
+async def test_renderer_only_completion_request_skips_mm_cache():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = _build_renderer(mock_engine.model_config)
+
+    serving_completion = _build_serving_completion(mock_engine)
+    serving_completion.openai_serving_render.preprocess_completion = AsyncMock(
+        return_value=[{"prompt_token_ids": [1, 2, 3]}]
+    )
+
+    request = CompletionRequest(
+        model=MODEL_NAME,
+        prompt="Test prompt",
+    )
+
+    result = await serving_completion.openai_serving_render.render_completion_request(
+        request
+    )
+
+    assert isinstance(result, list)
+    assert (
+        serving_completion.openai_serving_render.preprocess_completion.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is True
+    )
+
+
 @pytest.mark.asyncio
 async def test_completion_error_stream():
     """test finish_reason='error' returns 500 InternalServerError (streaming)"""
@@ -157,7 +215,6 @@ async def test_completion_error_stream():
     mock_engine.errored = False
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     serving_completion = _build_serving_completion(mock_engine)
diff --git a/tests/entrypoints/openai/completion/test_lora_resolvers.py b/tests/entrypoints/openai/completion/test_lora_resolvers.py
index 8d5283de5cf4..6a0bec92516d 100644
--- a/tests/entrypoints/openai/completion/test_lora_resolvers.py
+++ b/tests/entrypoints/openai/completion/test_lora_resolvers.py
@@ -137,7 +137,6 @@ async def mock_generate(*args, **kwargs):
 
     mock_engine.model_config = MockModelConfig()
     mock_engine.input_processor = MagicMock()
-    mock_engine.io_processor = MagicMock()
     mock_engine.renderer = _build_renderer(mock_engine.model_config)
 
     models = OpenAIServingModels(
@@ -148,7 +147,6 @@ async def mock_generate(*args, **kwargs):
     serving_render = OpenAIServingRender(
         model_config=mock_engine.model_config,
         renderer=mock_engine.renderer,
-        io_processor=mock_engine.io_processor,
         model_registry=models.registry,
         request_logger=None,
         chat_template=None,
diff --git a/tests/entrypoints/openai/completion/test_prompt_validation.py b/tests/entrypoints/openai/completion/test_prompt_validation.py
index f44d13c555c5..81204b27bc0b 100644
--- a/tests/entrypoints/openai/completion/test_prompt_validation.py
+++ b/tests/entrypoints/openai/completion/test_prompt_validation.py
@@ -62,6 +62,8 @@ def test_load_prompt_embeds(
 ):
     model_config = Mock(spec=ModelConfig)
     model_config.enable_prompt_embeds = True
+    model_config.get_hidden_size.return_value = hidden_size
+    model_config.dtype = dtype
 
     # construct arbitrary tensors of various dtypes, layouts, and sizes.
     # We need to check against different layouts to make sure that if a user
diff --git a/tests/entrypoints/openai/completion/test_shutdown.py b/tests/entrypoints/openai/completion/test_shutdown.py
index 80d00bd2397a..82a18c24eaa0 100644
--- a/tests/entrypoints/openai/completion/test_shutdown.py
+++ b/tests/entrypoints/openai/completion/test_shutdown.py
@@ -26,6 +26,9 @@
 _PROCESS_EXIT_TIMEOUT = 15
 _SHUTDOWN_DETECTION_TIMEOUT = 10
 _CHILD_CLEANUP_TIMEOUT = 10
+_INFLIGHT_REQUEST_START_TIMEOUT = 5
+_INFLIGHT_REQUEST_POLL_INTERVAL = 0.1
+_ABORT_CLIENT_TIMEOUT = 3
 
 
 def _get_child_pids(parent_pid: int) -> list[int]:
@@ -71,6 +74,7 @@ class ShutdownState:
     requests_after_sigterm: int = 0
     aborted_requests: int = 0
     connection_errors: int = 0
+    inflight_requests: int = 0
     stop_requesting: bool = False
     errors: list[str] = field(default_factory=list)
 
@@ -86,6 +90,7 @@ async def _concurrent_request_loop(
     async def single_request():
         while not state.stop_requesting:
             try:
+                state.inflight_requests += 1
                 response = await client.completions.create(
                     model=MODEL_NAME,
                     prompt="Write a story: ",
@@ -110,6 +115,8 @@ async def single_request():
             except Exception as e:
                 state.errors.append(f"Unexpected error: {e}")
                 break
+            finally:
+                state.inflight_requests -= 1
             await asyncio.sleep(0.01)
 
     tasks = [asyncio.create_task(single_request()) for _ in range(concurrency)]
@@ -299,19 +306,16 @@ async def test_abort_timeout_exits_quickly(wait_for_engine_idle: float):
         start_time = time.time()
         proc.send_signal(signal.SIGTERM)
 
-        # abort timeout (0) should exit promptly
-        for _ in range(20):
-            if proc.poll() is not None:
-                break
-            time.sleep(0.1)
-
-        if proc.poll() is None:
+        # abort timeout (0) should stop the server promptly.
+        try:
+            proc.wait(timeout=4.0)
+        except subprocess.TimeoutExpired:
             proc.kill()
             proc.wait(timeout=5)
             pytest.fail("Process did not exit after SIGTERM with abort timeout")
 
         exit_time = time.time() - start_time
-        assert exit_time < 2, f"Default shutdown took too long: {exit_time:.1f}s"
+        assert exit_time < 4.1, f"Default shutdown took too long: {exit_time:.1f}s"
         assert proc.returncode in (0, -15, None), f"Unexpected: {proc.returncode}"
 
         await _assert_children_cleaned_up(child_pids)
@@ -395,7 +399,7 @@ async def test_abort_timeout_fails_inflight_requests():
     ]
 
     with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
-        client = remote_server.get_async_client()
+        client = remote_server.get_async_client(timeout=_ABORT_CLIENT_TIMEOUT)
         proc = remote_server.proc
         child_pids = _get_child_pids(proc.pid)
 
@@ -406,7 +410,10 @@ async def test_abort_timeout_fails_inflight_requests():
             _concurrent_request_loop(client, state, sigterm_sent, concurrency=10)
         )
 
-        await asyncio.sleep(0.5)
+        deadline = time.time() + _INFLIGHT_REQUEST_START_TIMEOUT
+        while state.inflight_requests == 0 and time.time() < deadline:
+            await asyncio.sleep(_INFLIGHT_REQUEST_POLL_INTERVAL)
+        assert state.inflight_requests > 0
 
         proc.send_signal(signal.SIGTERM)
         sigterm_sent.set()
diff --git a/vllm/entrypoints/openai/realtime/__init__.py b/tests/entrypoints/openai/generative_scoring/__init__.py
similarity index 100%
rename from vllm/entrypoints/openai/realtime/__init__.py
rename to tests/entrypoints/openai/generative_scoring/__init__.py
diff --git a/tests/entrypoints/openai/generative_scoring/test_generative_scoring.py b/tests/entrypoints/openai/generative_scoring/test_generative_scoring.py
new file mode 100644
index 000000000000..632c4bcc90ae
--- /dev/null
+++ b/tests/entrypoints/openai/generative_scoring/test_generative_scoring.py
@@ -0,0 +1,324 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the Generative Scoring API.
+
+Tests cover:
+1. Protocol models (request/response construction)
+2. Probability computation (softmax normalization)
+3. Input validation
+4. Score formula: P(token[0]) / (P(token[0]) + P(token[1]))
+5. Prompt building and item ordering
+"""
+
+import math
+from dataclasses import dataclass, field
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from vllm.config.multimodal import MultiModalConfig
+from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.openai.generative_scoring.serving import (
+    GenerativeScoringItemResult,
+    GenerativeScoringRequest,
+    GenerativeScoringResponse,
+    OpenAIServingGenerativeScoring,
+)
+from vllm.entrypoints.openai.models.protocol import BaseModelPath
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.logprobs import Logprob
+from vllm.outputs import CompletionOutput, RequestOutput
+from vllm.tokenizers import get_tokenizer
+from vllm.v1.engine.async_llm import AsyncLLM
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
+
+
+@dataclass
+class MockHFConfig:
+    model_type: str = "any"
+
+
+@dataclass
+class MockModelConfig:
+    task = "generate"
+    runner_type = "generate"
+    tokenizer = MODEL_NAME
+    trust_remote_code = False
+    tokenizer_mode = "auto"
+    max_model_len = 100
+    tokenizer_revision = None
+    multimodal_config = MultiModalConfig()
+    hf_config = MockHFConfig()
+    logits_processor_pattern = None
+    logits_processors: list[str] | None = None
+    diff_sampling_param: dict | None = None
+    allowed_local_media_path: str = ""
+    allowed_media_domains: list[str] | None = None
+    encoder_config = None
+    generation_config: str = "auto"
+    media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
+    skip_tokenizer_init = False
+    vocab_size = 151936
+
+    def get_diff_sampling_param(self):
+        return self.diff_sampling_param or {}
+
+    def get_vocab_size(self):
+        return self.vocab_size
+
+
+def _create_mock_engine():
+    """Create a mock AsyncLLM engine."""
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+
+    # renderer is accessed by OpenAIServing.__init__ and serving.py
+    mock_renderer = MagicMock()
+    mock_renderer.tokenizer = get_tokenizer(MODEL_NAME)
+    mock_engine.renderer = mock_renderer
+
+    return mock_engine
+
+
+def _create_serving(mock_engine) -> OpenAIServingGenerativeScoring:
+    """Create an OpenAIServingGenerativeScoring instance with mocks."""
+    models = OpenAIServingModels(
+        engine_client=mock_engine,
+        base_model_paths=BASE_MODEL_PATHS,
+    )
+    return OpenAIServingGenerativeScoring(mock_engine, models, request_logger=None)
+
+
+def _create_mock_request_output(logprobs_dict: dict[int, float]) -> RequestOutput:
+    """Create a mock RequestOutput with specified logprobs."""
+    logprobs_with_objs = {
+        tid: Logprob(logprob=lp, rank=i + 1)
+        for i, (tid, lp) in enumerate(logprobs_dict.items())
+    }
+    completion_output = CompletionOutput(
+        index=0,
+        text="",
+        token_ids=[100],
+        cumulative_logprob=-1.0,
+        logprobs=[logprobs_with_objs],
+        finish_reason="length",
+    )
+    return RequestOutput(
+        request_id="test-request",
+        prompt="test prompt",
+        prompt_token_ids=[1, 2, 3],
+        prompt_logprobs=None,
+        outputs=[completion_output],
+        finished=True,
+    )
+
+
+class TestProtocolModels:
+    """Tests for GenerativeScoringRequest and GenerativeScoringResponse."""
+
+    def test_request_and_response_all_fields(self):
+        """Test request construction with all field types and response structure."""
+        # Test request with string inputs
+        req_str = GenerativeScoringRequest(
+            query="Is this the capital?",
+            items=["Paris", "London"],
+            label_token_ids=[9454, 2753],
+        )
+        assert req_str.query == "Is this the capital?"
+        assert req_str.items == ["Paris", "London"]
+        assert req_str.label_token_ids == [9454, 2753]
+        assert req_str.apply_softmax is True  # default
+        assert req_str.item_first is False  # default
+        assert req_str.add_special_tokens is True  # default
+
+        # Test request with pre-tokenized inputs and custom options
+        req_tok = GenerativeScoringRequest(
+            query=[100, 200, 300],
+            items=[[400, 500], [600, 700]],
+            label_token_ids=[1234, 5678],
+            apply_softmax=False,
+            item_first=True,
+            add_special_tokens=False,
+        )
+        assert req_tok.query == [100, 200, 300]
+        assert req_tok.items == [[400, 500], [600, 700]]
+        assert req_tok.apply_softmax is False
+        assert req_tok.item_first is True
+        assert req_tok.add_special_tokens is False
+
+        # Test response structure
+        response = GenerativeScoringResponse(
+            model="test-model",
+            data=[
+                GenerativeScoringItemResult(index=0, score=0.7),
+                GenerativeScoringItemResult(index=1, score=0.4),
+            ],
+            usage={"prompt_tokens": 10, "total_tokens": 12, "completion_tokens": 2},
+        )
+        assert response.object == "list"
+        assert response.model == "test-model"
+        assert len(response.data) == 2
+        assert response.data[0].score == 0.7
+        assert response.data[0].object == "score"
+        assert response.data[1].score == 0.4
+        assert response.usage.prompt_tokens == 10
+
+
+class TestProbabilityComputation:
+    """Tests for _compute_probabilities with both softmax modes."""
+
+    @pytest.mark.parametrize(
+        "label_logprobs,apply_softmax,should_sum_to_one",
+        [
+            ({100: -1.0, 200: -2.0}, True, True),
+            ({100: -100.0, 200: -100.5}, True, True),  # numerical stability
+            ({100: -1.0, 200: -2.0}, False, False),
+        ],
+        ids=["softmax_basic", "softmax_extreme_values", "true_probs"],
+    )
+    def test_compute_probabilities(
+        self, label_logprobs, apply_softmax, should_sum_to_one
+    ):
+        """Test probability computation for softmax and true probability modes."""
+        serving = OpenAIServingGenerativeScoring.__new__(OpenAIServingGenerativeScoring)
+        probs = serving._compute_probabilities(
+            label_logprobs, apply_softmax=apply_softmax
+        )
+
+        # Verify sum behavior
+        total = sum(probs.values())
+        if should_sum_to_one:
+            assert abs(total - 1.0) < 1e-6
+        else:
+            assert total < 1.0
+
+        # Verify math
+        if apply_softmax:
+            max_lp = max(label_logprobs.values())
+            exp_vals = {k: math.exp(v - max_lp) for k, v in label_logprobs.items()}
+            sum_exp = sum(exp_vals.values())
+            for tid, lp in label_logprobs.items():
+                assert abs(probs[tid] - exp_vals[tid] / sum_exp) < 1e-9
+        else:
+            for tid, lp in label_logprobs.items():
+                assert abs(probs[tid] - math.exp(lp)) < 1e-9
+
+    def test_score_formula(self):
+        """Test the score formula: P(token[0]) / (P(token[0]) + P(token[1]))."""
+        serving = OpenAIServingGenerativeScoring.__new__(OpenAIServingGenerativeScoring)
+
+        # With logprobs -0.5 and -2.0, softmax gives higher prob to first token
+        logprobs = {9454: -0.5, 2753: -2.0}
+        probs = serving._compute_probabilities(logprobs, apply_softmax=True)
+
+        # Score = P(9454) / (P(9454) + P(2753)) = P(9454) since they sum to 1
+        score = probs[9454]
+
+        # Manual calculation
+        exp_0 = math.exp(-0.5)
+        exp_1 = math.exp(-2.0)
+        expected_score = exp_0 / (exp_0 + exp_1)
+
+        assert abs(score - expected_score) < 1e-9
+        assert score > 0.5  # First token has higher logprob, so higher probability
+
+
+class TestValidation:
+    """Tests for input validation errors."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "request_kwargs,expected_error",
+        [
+            (
+                {"query": "q", "items": ["i"], "label_token_ids": [999999, 999998]},
+                "out of vocabulary",
+            ),
+            (
+                {"query": "q", "items": [], "label_token_ids": [100, 200]},
+                "at least one item",
+            ),
+        ],
+        ids=["invalid_token_id", "empty_items"],
+    )
+    async def test_validation_errors(self, request_kwargs, expected_error):
+        """Test that invalid inputs return appropriate errors."""
+        mock_engine = _create_mock_engine()
+        serving = _create_serving(mock_engine)
+        request = GenerativeScoringRequest(model=MODEL_NAME, **request_kwargs)
+        result = await serving.create_generative_scoring(request, None)
+
+        assert isinstance(result, ErrorResponse)
+        assert expected_error in result.error.message.lower()
+
+
+class TestPromptBuilding:
+    """Tests for prompt construction and item ordering."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "item_first,expected",
+        [
+            (False, [[100, 101, 200, 201], [100, 101, 300, 301]]),  # query + item
+            (True, [[200, 201, 100, 101], [300, 301, 100, 101]]),  # item + query
+        ],
+        ids=["query_first", "item_first"],
+    )
+    async def test_item_ordering(self, item_first, expected):
+        """Test that item_first flag controls prompt concatenation order."""
+        mock_engine = _create_mock_engine()
+        serving = _create_serving(mock_engine)
+
+        request = GenerativeScoringRequest(
+            query=[100, 101],
+            items=[[200, 201], [300, 301]],
+            label_token_ids=[500, 501],
+            item_first=item_first,
+        )
+        engine_inputs, _ = await serving._build_prompts(
+            request, MagicMock(), max_model_len=4096
+        )
+
+        for i, exp in enumerate(expected):
+            assert engine_inputs[i]["prompt_token_ids"] == exp
+
+
+class TestGeneration:
+    """Tests for the full generation flow with mocked engine."""
+
+    @pytest.mark.asyncio
+    async def test_successful_generation(self):
+        """Test successful score generation returns valid response."""
+        mock_engine = _create_mock_engine()
+        serving = _create_serving(mock_engine)
+
+        mock_logprobs = {1234: -0.5, 5678: -2.0, 100: -3.0}
+        mock_output = _create_mock_request_output(mock_logprobs)
+
+        async def mock_generate(*args, **kwargs):
+            yield mock_output
+
+        mock_engine.generate = mock_generate
+
+        request = GenerativeScoringRequest(
+            model=MODEL_NAME,
+            query="Is Paris the capital?",
+            items=["Yes", "No"],
+            label_token_ids=[1234, 5678],
+        )
+        result = await serving.create_generative_scoring(request, None)
+
+        assert isinstance(result, GenerativeScoringResponse)
+        assert len(result.data) == 2
+        for item_result in result.data:
+            assert 0.0 <= item_result.score <= 1.0
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/entrypoints/openai/generative_scoring/test_generative_scoring_e2e.py b/tests/entrypoints/openai/generative_scoring/test_generative_scoring_e2e.py
new file mode 100644
index 000000000000..64a59b270f14
--- /dev/null
+++ b/tests/entrypoints/openai/generative_scoring/test_generative_scoring_e2e.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""End-to-end tests for the Generative Scoring API.
+
+Tests verify the full HTTP request/response flow using RemoteOpenAIServer.
+"""
+
+import pytest
+import requests
+
+from ....utils import RemoteOpenAIServer
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "512",
+        "--enforce-eager",
+        "--max-num-seqs",
+        "32",
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+class TestGenerativeScoringAPI:
+    """End-to-end tests for the Generative Scoring API."""
+
+    @pytest.mark.asyncio
+    async def test_basic_score_and_response_structure(self, server: RemoteOpenAIServer):
+        """Test basic generative scoring request and verify response structure."""
+        response = requests.post(
+            server.url_for("generative_scoring"),
+            json={
+                "model": MODEL_NAME,
+                "query": "Is Paris the capital of France? Answer Yes or No: ",
+                "items": ["Paris is beautiful.", "London is rainy."],
+                "label_token_ids": [9454, 2753],
+            },
+        )
+        assert response.status_code == 200, f"Response: {response.text}"
+        data = response.json()
+
+        # Verify response structure
+        assert data["id"].startswith("generative-scoring-")
+        assert data["object"] == "list"
+        assert "model" in data
+        assert "usage" in data
+        assert len(data["data"]) == 2
+
+        # Verify each result
+        for i, result in enumerate(data["data"]):
+            assert result["index"] == i
+            assert result["object"] == "score"
+            assert 0.0 <= result["score"] <= 1.0
+
+        # Verify usage tracking
+        usage = data["usage"]
+        assert usage["prompt_tokens"] > 0
+        assert usage["completion_tokens"] > 0
+        assert (
+            usage["total_tokens"] == usage["prompt_tokens"] + usage["completion_tokens"]
+        )
+
+    @pytest.mark.asyncio
+    async def test_multiple_items(self, server: RemoteOpenAIServer):
+        """Test generative scoring request with multiple items."""
+        response = requests.post(
+            server.url_for("generative_scoring"),
+            json={
+                "model": MODEL_NAME,
+                "query": "Is this city a capital? ",
+                "items": ["Paris", "London", "Berlin", "New York", "Tokyo"],
+                "label_token_ids": [9454, 2753],
+            },
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["data"]) == 5
+
+    @pytest.mark.asyncio
+    async def test_validation_missing_label_token_ids(self, server: RemoteOpenAIServer):
+        """Test that missing label_token_ids returns a validation error."""
+        response = requests.post(
+            server.url_for("generative_scoring"),
+            json={
+                "model": MODEL_NAME,
+                "query": "Test query",
+                "items": ["item1", "item2"],
+            },
+        )
+        # Missing required field returns 400 (manual JSON parsing)
+        assert response.status_code == 400
+
+    @pytest.mark.asyncio
+    async def test_validation_empty_items(self, server: RemoteOpenAIServer):
+        """Test that empty items returns an error."""
+        response = requests.post(
+            server.url_for("generative_scoring"),
+            json={
+                "model": MODEL_NAME,
+                "query": "Test query",
+                "items": [],
+                "label_token_ids": [100, 200],
+            },
+        )
+        assert response.status_code == 400
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        "label_token_ids,expected_status",
+        [
+            ([9999999999, 9999999998], 400),  # Out of vocab range
+        ],
+        ids=["invalid_token_ids"],
+    )
+    async def test_validation_errors(
+        self, server: RemoteOpenAIServer, label_token_ids, expected_status
+    ):
+        """Test validation errors for various invalid inputs."""
+        response = requests.post(
+            server.url_for("generative_scoring"),
+            json={
+                "model": MODEL_NAME,
+                "query": "Test query",
+                "items": ["item1"],
+                "label_token_ids": label_token_ids,
+            },
+        )
+        assert response.status_code == expected_status
+
+    @pytest.mark.asyncio
+    async def test_score_consistency(self, server: RemoteOpenAIServer):
+        """Test that scores are deterministic across identical requests."""
+        request_body = {
+            "model": MODEL_NAME,
+            "query": "Is this consistent? ",
+            "items": ["Yes it is."],
+            "label_token_ids": [100, 200],
+        }
+
+        r1 = requests.post(server.url_for("generative_scoring"), json=request_body)
+        r2 = requests.post(server.url_for("generative_scoring"), json=request_body)
+
+        assert r1.status_code == 200 and r2.status_code == 200
+        r1_score = r1.json()["data"][0]["score"]
+        r2_score = r2.json()["data"][0]["score"]
+        assert abs(r1_score - r2_score) < 1e-6
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/entrypoints/openai/parser/test_harmony_render_parity.py b/tests/entrypoints/openai/parser/test_harmony_render_parity.py
new file mode 100644
index 000000000000..b5ba3344990c
--- /dev/null
+++ b/tests/entrypoints/openai/parser/test_harmony_render_parity.py
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Cross-API render parity tests.
+
+Verifies that the chat completion input path (parse_chat_input_to_harmony_message)
+and the responses API input path (response_input_to_harmony) produce identical
+Harmony messages and identical rendered token sequences when given equivalent
+conversation representations.
+
+The chat completion API encodes reasoning and tool calls as fields on a single
+assistant message dict; the responses API encodes them as separate typed items
+in request.input. Both paths must converge on the same Harmony message list and
+therefore the same rendered prompt.
+
+Each test:
+  1. Builds Harmony messages from each path for a single message or sequence.
+  2. Asserts message-level properties (role, channel, recipient, content)
+     using verify_harmony_messages.
+  3. Asserts that render_for_completion produces identical token sequences.
+"""
+
+from openai.types.responses import ResponseFunctionToolCall
+
+from tests.entrypoints.openai.utils import verify_harmony_messages
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    get_system_message,
+    parse_chat_input_to_harmony_message,
+    render_for_completion,
+)
+from vllm.entrypoints.openai.responses.harmony import response_input_to_harmony
+
+# Use a fixed date so the system message is deterministic across both paths.
+_DATE = "2025-01-01"
+
+
+def _system():
+    return get_system_message(start_date=_DATE)
+
+
+class TestResponseInputToHarmonyRenderParity:
+    """Each test drives the same conversation through both APIs and asserts
+    identical Harmony messages and rendered token sequences."""
+
+    # -----------------------------------------------------------------------
+    # Single-message cases
+    # -----------------------------------------------------------------------
+
+    def test_user_message(self):
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {"role": "user", "content": "What's the weather in Paris?"}
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": "What's the weather in Paris?",
+                },
+                prev_responses=[],
+            )
+        ]
+
+        expected = [{"role": "user", "content": "What's the weather in Paris?"}]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_assistant_final_message(self):
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {"role": "assistant", "content": "It is 18°C in Paris."}
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "message",
+                    "role": "assistant",
+                    "content": "It is 18°C in Paris.",
+                },
+                prev_responses=[],
+            )
+        ]
+
+        expected = [
+            {"role": "assistant", "channel": "final", "content": "It is 18°C in Paris."}
+        ]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_reasoning_item(self):
+        # Chat path: assistant message with only a reasoning field and no content.
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "reasoning": "I should call get_weather.",
+                "content": "",
+            }
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "reasoning",
+                    "content": [
+                        {"type": "reasoning_text", "text": "I should call get_weather."}
+                    ],
+                },
+                prev_responses=[],
+            )
+        ]
+
+        expected = [
+            {
+                "role": "assistant",
+                "channel": "analysis",
+                "content": "I should call get_weather.",
+            }
+        ]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_function_call(self):
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"location": "Paris"}',
+                        },
+                    }
+                ],
+            }
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "function_call",
+                    "name": "get_weather",
+                    "arguments": '{"location": "Paris"}',
+                },
+                prev_responses=[],
+            )
+        ]
+
+        expected = [
+            {
+                "role": "assistant",
+                "channel": "commentary",
+                "recipient": "functions.get_weather",
+                "content": '{"location": "Paris"}',
+                "content_type": "json",
+            }
+        ]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_tool_output(self):
+        prev_call = ResponseFunctionToolCall(
+            id="fc_1",
+            call_id="call_1",
+            name="get_weather",
+            arguments='{"location": "Paris"}',
+            type="function_call",
+        )
+
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {"role": "tool", "tool_call_id": "call_1", "content": "18°C, clear skies."},
+            tool_id_names={"call_1": "get_weather"},
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "function_call_output",
+                    "call_id": "call_1",
+                    "output": "18°C, clear skies.",
+                },
+                prev_responses=[prev_call],
+            )
+        ]
+
+        expected = [
+            {
+                "role": "tool",
+                "author_name": "functions.get_weather",
+                "channel": "commentary",
+                "recipient": "assistant",
+                "content": "18°C, clear skies.",
+            }
+        ]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    # -----------------------------------------------------------------------
+    # Combined and multi-turn cases
+    # -----------------------------------------------------------------------
+
+    def test_reasoning_combined_with_function_call(self):
+        """Chat API packs reasoning + tool_calls into one dict; responses API
+        represents them as two separate items. Both must produce the same two
+        Harmony messages in the same order: analysis then commentary."""
+        chat_msgs = parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "reasoning": "I should get the weather for Paris.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"location": "Paris"}',
+                        },
+                    }
+                ],
+            }
+        )
+        resp_msgs = [
+            response_input_to_harmony(
+                {
+                    "type": "reasoning",
+                    "content": [
+                        {
+                            "type": "reasoning_text",
+                            "text": "I should get the weather for Paris.",
+                        }
+                    ],
+                },
+                prev_responses=[],
+            ),
+            response_input_to_harmony(
+                {
+                    "type": "function_call",
+                    "name": "get_weather",
+                    "arguments": '{"location": "Paris"}',
+                },
+                prev_responses=[],
+            ),
+        ]
+
+        expected = [
+            {
+                "role": "assistant",
+                "channel": "analysis",
+                "content": "I should get the weather for Paris.",
+            },
+            {
+                "role": "assistant",
+                "channel": "commentary",
+                "recipient": "functions.get_weather",
+                "content": '{"location": "Paris"}',
+                "content_type": "json",
+            },
+        ]
+        verify_harmony_messages(chat_msgs, expected)
+        verify_harmony_messages(resp_msgs, expected)
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_full_multi_turn_tool_call_conversation(self):
+        """Full conversation: user -> reasoning + tool_call -> tool_output -> final.
+
+        Both APIs must render the complete conversation to identical token sequences.
+        This exercises the entire input pipeline including all message types and
+        the Rust harmony encoder.
+        """
+        prev_call = ResponseFunctionToolCall(
+            id="fc_1",
+            call_id="call_1",
+            name="get_weather",
+            arguments='{"location": "Paris"}',
+            type="function_call",
+        )
+
+        # --- Chat completion API path ---
+        tool_id_names = {"call_1": "get_weather"}
+        chat_msgs = []
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {"role": "user", "content": "What's the weather in Paris?"}
+        )
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "reasoning": "I should call get_weather for Paris.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"location": "Paris"}',
+                        },
+                    }
+                ],
+            }
+        )
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {"role": "tool", "tool_call_id": "call_1", "content": "18°C, clear skies."},
+            tool_id_names=tool_id_names,
+        )
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "content": "It is currently 18°C in Paris with clear skies.",
+            }
+        )
+
+        # --- Responses API path ---
+        resp_input = [
+            {
+                "type": "message",
+                "role": "user",
+                "content": "What's the weather in Paris?",
+            },
+            {
+                "type": "reasoning",
+                "content": [
+                    {
+                        "type": "reasoning_text",
+                        "text": "I should call get_weather for Paris.",
+                    }
+                ],
+            },
+            {
+                "type": "function_call",
+                "name": "get_weather",
+                "arguments": '{"location": "Paris"}',
+            },
+            {
+                "type": "function_call_output",
+                "call_id": "call_1",
+                "output": "18°C, clear skies.",
+            },
+            {
+                "type": "message",
+                "role": "assistant",
+                "content": "It is currently 18°C in Paris with clear skies.",
+            },
+        ]
+        resp_msgs = [
+            response_input_to_harmony(item, prev_responses=[prev_call])
+            for item in resp_input
+        ]
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
+
+    def test_multi_turn_two_tool_calls_with_reasoning_between(self):
+        """Validates parity for a chain of two tool calls, each with its own
+        reasoning trace. Reasoning traces in between commentary-channel tool
+        calls must survive as analysis-channel messages in both paths.
+        """
+        prev_call_1 = ResponseFunctionToolCall(
+            id="fc_1",
+            call_id="call_1",
+            name="get_weather",
+            arguments='{"location": "Paris"}',
+            type="function_call",
+        )
+        prev_call_2 = ResponseFunctionToolCall(
+            id="fc_2",
+            call_id="call_2",
+            name="get_forecast",
+            arguments='{"location": "Paris", "days": 7}',
+            type="function_call",
+        )
+
+        # --- Chat completion API path ---
+        tool_id_names = {"call_1": "get_weather", "call_2": "get_forecast"}
+        chat_msgs = []
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {"role": "user", "content": "What's the weather and forecast for Paris?"}
+        )
+        # First reasoning + tool call
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "reasoning": "I need current weather first.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"location": "Paris"}',
+                        },
+                    }
+                ],
+            }
+        )
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {"role": "tool", "tool_call_id": "call_1", "content": "18°C, clear skies."},
+            tool_id_names=tool_id_names,
+        )
+        # Second reasoning + tool call
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {
+                "role": "assistant",
+                "reasoning": "Now I need the weekly forecast.",
+                "tool_calls": [
+                    {
+                        "id": "call_2",
+                        "function": {
+                            "name": "get_forecast",
+                            "arguments": '{"location": "Paris", "days": 7}',
+                        },
+                    }
+                ],
+            }
+        )
+        chat_msgs += parse_chat_input_to_harmony_message(
+            {
+                "role": "tool",
+                "tool_call_id": "call_2",
+                "content": "Mon 17°C, Tue 19°C, Wed 16°C",
+            },
+            tool_id_names=tool_id_names,
+        )
+
+        # --- Responses API path ---
+        prev_responses = [prev_call_1, prev_call_2]
+        resp_input = [
+            {
+                "type": "message",
+                "role": "user",
+                "content": "What's the weather and forecast for Paris?",
+            },
+            # First reasoning + tool call
+            {
+                "type": "reasoning",
+                "content": [
+                    {"type": "reasoning_text", "text": "I need current weather first."}
+                ],
+            },
+            {
+                "type": "function_call",
+                "name": "get_weather",
+                "arguments": '{"location": "Paris"}',
+            },
+            {
+                "type": "function_call_output",
+                "call_id": "call_1",
+                "output": "18°C, clear skies.",
+            },
+            # Second reasoning + tool call
+            {
+                "type": "reasoning",
+                "content": [
+                    {
+                        "type": "reasoning_text",
+                        "text": "Now I need the weekly forecast.",
+                    }
+                ],
+            },
+            {
+                "type": "function_call",
+                "name": "get_forecast",
+                "arguments": '{"location": "Paris", "days": 7}',
+            },
+            {
+                "type": "function_call_output",
+                "call_id": "call_2",
+                "output": "Mon 17°C, Tue 19°C, Wed 16°C",
+            },
+        ]
+        resp_msgs = [
+            response_input_to_harmony(item, prev_responses=prev_responses)
+            for item in resp_input
+        ]
+
+        assert render_for_completion([_system()] + chat_msgs) == render_for_completion(
+            [_system()] + resp_msgs
+        )
diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py
index 21b53dff1507..2ec200d58377 100644
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -7,9 +7,11 @@
 from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
     auto_drop_analysis_messages,
+    extract_function_from_recipient,
     get_encoding,
     get_system_message,
     has_custom_tools,
+    is_function_recipient,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
 )
@@ -19,6 +21,182 @@
 )
 
 
+class TestIsFunctionRecipient:
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "functions.get_weather",
+            "functions.search_web",
+            "functions.math.sum",
+        ],
+    )
+    def test_functions_prefix_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "get_weather",
+            "search_web",
+            "calculator",
+            "my-tool",
+        ],
+    )
+    def test_bare_function_name_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "assistant",
+        ],
+    )
+    def test_assistant_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "math.sum",
+            "code.run",
+            "namespace.tool_name",
+            "my.deeply.nested.tool",
+        ],
+    )
+    def test_dotted_function_names_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "python",
+            "browser",
+            "container",
+        ],
+    )
+    def test_builtin_tool_names_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "python.run",
+            "python.execute",
+            "browser.search",
+            "browser.open",
+            "container.exec",
+        ],
+    )
+    def test_builtin_dotted_variants_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "",
+            "functions.",
+        ],
+    )
+    def test_empty_recipients_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "<|start|>",
+            "<|end|>",
+            "<|channel|>",
+        ],
+    )
+    def test_harmony_tokens_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+
+class TestIsFunctionRecipientWithAllowedNames:
+    """Tests for is_function_recipient with allowed_function_tool_names."""
+
+    def test_prefixed_always_accepted(self):
+        """functions. prefix is always accepted regardless of allowed names."""
+        fn_names = frozenset({"other_tool"})
+        assert is_function_recipient("functions.get_weather", fn_names) is True
+
+    def test_bare_name_accepted_when_in_allowed_names(self):
+        fn_names = frozenset({"get_weather", "search_web"})
+        assert is_function_recipient("get_weather", fn_names) is True
+        assert is_function_recipient("search_web", fn_names) is True
+
+    def test_bare_name_rejected_when_not_in_allowed_names(self):
+        fn_names = frozenset({"get_weather"})
+        assert is_function_recipient("unknown_tool", fn_names) is False
+
+    def test_dotted_name_accepted_when_in_allowed_names(self):
+        fn_names = frozenset({"math.sum", "namespace.tool_name"})
+        assert is_function_recipient("math.sum", fn_names) is True
+        assert is_function_recipient("namespace.tool_name", fn_names) is True
+
+    def test_dotted_name_rejected_when_not_in_allowed_names(self):
+        fn_names = frozenset({"get_weather"})
+        assert is_function_recipient("custom_server.search", fn_names) is False
+
+    def test_empty_allowed_names_rejects_bare_names(self):
+        """Empty frozenset means no function tools — bare names are not functions."""
+        fn_names: frozenset[str] = frozenset()
+        assert is_function_recipient("get_weather", fn_names) is False
+        assert is_function_recipient("math.sum", fn_names) is False
+
+    def test_builtin_tools_always_rejected(self):
+        fn_names = frozenset({"python", "browser", "container"})
+        assert is_function_recipient("python", fn_names) is False
+        assert is_function_recipient("browser", fn_names) is False
+        assert is_function_recipient("container", fn_names) is False
+
+    def test_builtin_dotted_always_rejected(self):
+        fn_names = frozenset({"python.run", "browser.search"})
+        assert is_function_recipient("python.run", fn_names) is False
+        assert is_function_recipient("browser.search", fn_names) is False
+
+    def test_none_allowed_names_uses_heuristic(self):
+        """When allowed names is None (Chat Completions), use heuristic."""
+        assert is_function_recipient("get_weather", None) is True
+        assert is_function_recipient("math.sum", None) is True
+        assert is_function_recipient("python", None) is False
+
+
+class TestExtractFunctionFromRecipient:
+    @pytest.mark.parametrize(
+        "recipient,expected",
+        [
+            ("functions.get_weather", "get_weather"),
+            ("functions.search_web", "search_web"),
+            ("functions.", ""),
+        ],
+    )
+    def test_strips_functions_prefix(self, recipient, expected):
+        assert extract_function_from_recipient(recipient) == expected
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "get_weather",
+            "calculator",
+            "my-tool",
+        ],
+    )
+    def test_bare_name_returned_as_is(self, recipient):
+        assert extract_function_from_recipient(recipient) == recipient
+
+    @pytest.mark.parametrize(
+        "recipient,expected",
+        [
+            ("functions.math.sum", "math.sum"),
+            ("math.sum", "math.sum"),
+            ("namespace.tool_name", "namespace.tool_name"),
+        ],
+    )
+    def test_dotted_function_name_extraction(self, recipient, expected):
+        assert extract_function_from_recipient(recipient) == expected
+
+
 class TestCommonParseInputToHarmonyMessage:
     """
     Tests for scenarios that are common to both Chat Completion
@@ -843,6 +1021,13 @@ def test_all_standard_channels_present(self) -> None:
                     f"{channel} missing when with_custom_tools={with_tools}"
                 )
 
+    def test_unsupported_reasoning_effort_raises_clear_error(self) -> None:
+        with pytest.raises(
+            ValueError,
+            match="reasoning_effort='max' is not supported by Harmony",
+        ):
+            get_system_message(reasoning_effort="max")
+
 
 class TestResponseInputToHarmonyReasoningItem:
     """Tests for response_input_to_harmony handling of reasoning input items.
diff --git a/tests/entrypoints/openai/responses/test_errors.py b/tests/entrypoints/openai/responses/test_errors.py
index 0ef9bb901a64..e21f6aa2a42a 100644
--- a/tests/entrypoints/openai/responses/test_errors.py
+++ b/tests/entrypoints/openai/responses/test_errors.py
@@ -6,7 +6,9 @@
 
 import pytest
 
+import vllm.envs as envs
 from vllm.entrypoints.openai.engine.serving import GenerationError, OpenAIServing
+from vllm.envs import disable_envs_cache
 
 
 @pytest.mark.asyncio
@@ -60,3 +62,35 @@ async def test_convert_generation_error_to_streaming_response():
     assert isinstance(error_json, str)
     assert "Internal server error" in error_json
     assert "InternalServerError" in error_json
+
+
+def test_is_model_supported_skip_name_validation_env(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """When VLLM_SKIP_MODEL_NAME_VALIDATION is set, accept any model id."""
+    disable_envs_cache()
+    monkeypatch.delenv("VLLM_SKIP_MODEL_NAME_VALIDATION", raising=False)
+
+    mock_engine = MagicMock()
+    mock_engine.model_config = MagicMock()
+    mock_engine.model_config.max_model_len = 100
+    mock_models = MagicMock()
+    mock_models.is_base_model.return_value = False
+
+    serving = OpenAIServing(
+        engine_client=mock_engine,
+        models=mock_models,
+        request_logger=None,
+    )
+
+    assert serving._is_model_supported("not-a-registered-model") is False
+
+    monkeypatch.setenv("VLLM_SKIP_MODEL_NAME_VALIDATION", "1")
+    disable_envs_cache()
+    assert envs.VLLM_SKIP_MODEL_NAME_VALIDATION is True
+    assert serving._is_model_supported("not-a-registered-model") is True
+
+    monkeypatch.setenv("VLLM_SKIP_MODEL_NAME_VALIDATION", "true")
+    disable_envs_cache()
+    assert envs.VLLM_SKIP_MODEL_NAME_VALIDATION is True
+    assert serving._is_model_supported("another-alias") is True
diff --git a/tests/entrypoints/openai/responses/test_function_call.py b/tests/entrypoints/openai/responses/test_function_call.py
index bacb084c7eb6..9dcbd74c890b 100644
--- a/tests/entrypoints/openai/responses/test_function_call.py
+++ b/tests/entrypoints/openai/responses/test_function_call.py
@@ -249,50 +249,176 @@ async def test_function_calling_with_streaming_expected_arguments(
                 "additionalProperties": False,
             },
             "strict": True,
-        }
+        },
+        {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current local time for provided location.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string"},
+                },
+                "required": ["location"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        },
     ]
 
     stream_response = await client.responses.create(
         model=model_name,
-        input="Can you tell me what the current weather is in Berlin?",
+        input=(
+            "Use tools only. Call get_weather for Berlin and get_time for Tokyo. "
+            "Do not answer directly."
+        ),
         tools=tools,
         stream=True,
     )
 
-    tool_call_item = None
-    completed_event = None
+    tool_call_items = {}
+    arguments_done_events = {}
+    completed_events = {}
     async for event in stream_response:
         if (
             event.type == "response.output_item.added"
             and event.item.type == "function_call"
         ):
-            tool_call_item = event.item
-        elif event.type == "response.function_call_arguments.delta" and tool_call_item:
+            tool_call_items[event.output_index] = event.item
+        elif event.type == "response.function_call_arguments.delta":
+            tool_call_item = tool_call_items[event.output_index]
             tool_call_item.arguments += event.delta
+        elif event.type == "response.function_call_arguments.done":
+            arguments_done_events[event.output_index] = event
         elif (
             event.type == "response.output_item.done"
             and event.item.type == "function_call"
         ):
-            completed_event = event
-    assert tool_call_item is not None
-    assert tool_call_item.type == "function_call"
-    assert tool_call_item.name == "get_weather"
-    assert completed_event is not None
-    assert tool_call_item.arguments == completed_event.item.arguments
-    assert tool_call_item.name == completed_event.item.name
-    args = json.loads(tool_call_item.arguments)
-    assert "location" in args
-    assert args["location"] is not None
+            completed_events[event.output_index] = event
+    assert len(tool_call_items) >= 2
+    assert len(arguments_done_events) >= 2
+    assert len(completed_events) >= 2
+
+    tool_calls_by_name = {
+        event.item.name: (
+            tool_call_items[output_index],
+            arguments_done_events[output_index],
+            event.item,
+        )
+        for output_index, event in completed_events.items()
+    }
+    assert {"get_weather", "get_time"}.issubset(tool_calls_by_name)
+    for added_item, arguments_done_event, completed_item in tool_calls_by_name.values():
+        assert added_item.type == "function_call"
+        assert added_item.arguments == arguments_done_event.arguments
+        assert added_item.arguments == completed_item.arguments
+        assert added_item.name == arguments_done_event.name
+        assert added_item.name == completed_item.name
+        args = json.loads(added_item.arguments)
+        assert "location" in args
+        assert args["location"] is not None
 
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "tool_choice",
+    ["auto", "required", {"type": "function", "name": "get_current_weather"}],
+)
+@pytest.mark.parametrize(
+    "enable_thinking",
+    [True, False],
+)
 async def test_function_calling_with_streaming_types(
-    client: openai.AsyncOpenAI, model_name: str
+    client: openai.AsyncOpenAI, model_name: str, tool_choice, enable_thinking: bool
 ):
     # this links the "done" type with the "start" type
     # so every "done" type should have a corresponding "start" type
     # and every open block should be closed by the end of the stream
+    #
+    # stream of events for a response with function call could look like this:
+    # option1: reasoning -> content(option) -> function_call
+    # response.created
+    # -> response.in_progress
+    # -> response.output_item.added
+    # -> response.reasoning_part.added
+    # -> response.reasoning_text.delta
+    # ....
+    # -> response.reasoning_text.delta
+    # -> response.reasoning_text.done
+    # -> response.reasoning_part.done
+    # -> response.output_item.done
+    # -> response.output_item.added
+    # -> response.content_part.added
+    # -> response.output_text.delta
+    # ...
+    # -> response.output_text.delta
+    # -> response.output_text.done
+    # -> response.content_part.done
+    # -> response.output_item.done
+    # -> response.output_item.added
+    # -> response.function_call_arguments.delta
+    # ...
+    # -> response.function_call_arguments.delta
+    # -> response.function_call_arguments.done
+    # -> response.output_item.done
+    # -> response.completed
+    #
+    #
+    # option2: reasoning -> content
+    # response.created
+    # -> response.in_progress
+    # -> response.output_item.added
+    # -> response.reasoning_part.added
+    # -> response.reasoning_text.delta
+    # ....
+    # -> response.reasoning_text.delta
+    # -> response.reasoning_text.done
+    # -> response.reasoning_part.done
+    # -> response.output_item.done
+    # -> response.output_item.added
+    # -> response.content_part.added
+    # -> response.output_text.delta
+    # ..
+    # -> response.output_text.delta
+    # -> response.output_text.done
+    # -> response.content_part.done
+    # -> response.output_item.done
+    # -> response.completed
+    #
+    # option3: content
+    #
+    # response.created
+    # -> response.in_progress
+    # -> response.output_item.added
+    # -> response.content_part.added
+    # -> response.output_text.delta
+    # ...
+    # -> response.output_text.delta
+    # -> response.output_text.done
+    # -> response.content_part.done
+    # -> response.output_item.done
+    # -> response.completed
+    #
+    # option4: content -> function_call
+    # response.created
+    # -> response.in_progress
+    # -> response.output_item.added
+    # -> response.content_part.added
+    # -> response.output_text.delta
+    # ...
+    # -> response.output_text.delta
+    # -> response.output_text.done
+    # -> response.content_part.done
+    # -> response.output_item.done
+    # -> response.output_item.added
+    # -> response.function_call_arguments.delta
+    # ...
+    # -> response.function_call_arguments.delta
+    # -> response.function_call_arguments.done
+    # -> response.output_item.done
+    # -> response.completed
+
     pairs_of_event_types = {
         "response.completed": "response.created",
         "response.output_item.done": "response.output_item.added",
@@ -313,6 +439,8 @@ async def test_function_calling_with_streaming_types(
         model=model_name,
         input=input_list,
         tools=tools,
+        tool_choice=tool_choice,
+        extra_body={"chat_template_kwargs": {"enable_thinking": enable_thinking}},
         stream=True,
     )
 
@@ -333,3 +461,69 @@ async def test_function_calling_with_streaming_types(
             assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
             stack_of_event_types.pop()
     assert len(stack_of_event_types) == 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "tool_choice",
+    ["required", "auto", {"type": "function", "name": "get_weather"}],
+)
+async def test_function_calling_with_streaming_forced_tool_choice(
+    client: openai.AsyncOpenAI, model_name: str, tool_choice: str
+):
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get current temperature for provided location in celsius.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string"},
+                },
+                "required": ["location"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        }
+    ]
+
+    stream_response = await client.responses.create(
+        model=model_name,
+        input="Call the get_weather function for Berlin and do not answer directly.",
+        tools=tools,
+        tool_choice=tool_choice,
+        stream=True,
+    )
+
+    tool_call_item = None
+    completed_event = None
+    text_deltas = []
+    async for event in stream_response:
+        if (
+            event.type == "response.output_item.added"
+            and event.item.type == "function_call"
+        ):
+            tool_call_item = event.item
+        elif event.type == "response.output_text.delta":
+            text_deltas.append(event.delta)
+        elif event.type == "response.function_call_arguments.delta" and tool_call_item:
+            tool_call_item.arguments += event.delta
+        elif (
+            event.type == "response.output_item.done"
+            and event.item.type == "function_call"
+        ):
+            completed_event = event
+
+    assert tool_call_item is not None
+    assert tool_call_item.type == "function_call"
+    assert tool_call_item.name == "get_weather"
+    assert completed_event is not None
+    assert tool_call_item.arguments == completed_event.item.arguments
+    assert tool_call_item.name == completed_event.item.name
+    args = json.loads(tool_call_item.arguments)
+    assert "location" in args
+    assert args["location"] is not None
+    # Forced tool choice should not leak tool-call JSON via output_text delta.
+    assert "".join(text_deltas).strip() == ""
diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py
index 74f3360df45f..88dd2d38457d 100644
--- a/tests/entrypoints/openai/responses/test_harmony.py
+++ b/tests/entrypoints/openai/responses/test_harmony.py
@@ -999,17 +999,21 @@ async def test_mcp_tool_multi_turn(client: OpenAI, model_name: str, server):
         (msg.get("recipient") or "").startswith("python")
         for msg in response1.output_messages
     )
+    parsed_output_messages = [
+        Message.from_dict(msg) for msg in response1.output_messages
+    ]
     tool_response_found = any(
-        msg.get("author", {}).get("role") == "tool"
-        and (msg.get("author", {}).get("name") or "").startswith("python")
-        for msg in response1.output_messages
+        (msg.author.role == "tool" and (msg.author.name or "").startswith("python"))
+        for msg in parsed_output_messages
     )
     assert tool_call_found, "MCP tool call not found in output_messages"
     assert tool_response_found, "MCP tool response not found in output_messages"
 
     # No developer messages expected for elevated tools
     developer_msgs = [
-        msg for msg in response1.input_messages if msg["author"]["role"] == "developer"
+        msg
+        for msg in (Message.from_dict(raw) for raw in response1.input_messages)
+        if msg.author.role == "developer"
     ]
     assert len(developer_msgs) == 0, "No developer message expected for elevated tools"
 
@@ -1119,12 +1123,10 @@ async def test_function_call_with_previous_input_messages(
     num_system = 0
     num_developer = 0
     num_tool = 0
-    for msg_dict in response_2.input_messages:
-        # input_messages use {"author": {"role": "..."}} format,
-        # not the top-level {"role": "..."} that Message.from_dict
-        # expects.
-        author = msg_dict.get("author", {})
-        role = author.get("role") if isinstance(author, dict) else None
+    for message in (
+        Message.from_dict(msg_dict) for msg_dict in response_2.input_messages
+    ):
+        role = message.author.role
         if role == "system":
             num_system += 1
         elif role == "developer":
@@ -1183,12 +1185,8 @@ async def test_system_prompt_override_no_duplication(client: OpenAI, model_name:
     assert response.output_text is not None
 
     num_system = 0
-    for msg in response.input_messages:
-        # input_messages use {"author": {"role": "system"}} format,
-        # not the top-level {"role": "system"} that Message.from_dict expects.
-        author = msg.get("author", {})
-        role = author.get("role") if isinstance(author, dict) else None
-        if role == "system":
+    for message in (Message.from_dict(msg) for msg in response.input_messages):
+        if message.author.role == "system":
             num_system += 1
     assert num_system == 1, f"Expected 1 system message, got {num_system}"
 
diff --git a/tests/entrypoints/openai/responses/test_harmony_utils.py b/tests/entrypoints/openai/responses/test_harmony_utils.py
index e51538298ff9..f1434ce2bd58 100644
--- a/tests/entrypoints/openai/responses/test_harmony_utils.py
+++ b/tests/entrypoints/openai/responses/test_harmony_utils.py
@@ -246,7 +246,8 @@ def test_commentary_with_unknown_recipient_creates_mcp_call(self):
         message = message.with_channel("commentary")
         message = message.with_recipient("custom_tool")
 
-        output_items = harmony_to_response_output(message)
+        fn_names = frozenset({"other_tool"})
+        output_items = harmony_to_response_output(message, fn_names)
 
         assert len(output_items) == 1
         assert isinstance(output_items[0], McpCall)
@@ -286,13 +287,179 @@ def test_non_assistant_message_returns_empty(self):
         assert len(output_items) == 0
 
 
+class TestHarmonyToResponseOutputWithFunctionToolNames:
+    """Tests for bare function name handling with function_tool_names."""
+
+    def test_bare_name_creates_function_call_when_in_tool_names(self):
+        """Bare function name matching a known tool creates function call."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, '{"location": "San Francisco"}'
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("get_weather")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].type == "function_call"
+        assert output_items[0].name == "get_weather"
+        assert output_items[0].arguments == '{"location": "San Francisco"}'
+
+    def test_bare_name_creates_mcp_call_when_not_in_tool_names(self):
+        """Bare name not matching any known tool creates MCP call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("custom_tool")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+        assert output_items[0].type == "mcp_call"
+
+    def test_dotted_function_name_creates_function_call(self):
+        """Dotted function name in tool names creates function call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"a": 1, "b": 2}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("math.sum")
+
+        fn_names = frozenset({"math.sum"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "math.sum"
+
+    def test_empty_tool_names_defaults_to_mcp(self):
+        """With empty function_tool_names, bare names become MCP calls."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("get_weather")
+
+        output_items = harmony_to_response_output(message, frozenset())
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+
+    def test_prefixed_name_always_function_call(self):
+        """functions. prefix always creates function call even with empty tool names."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message, frozenset())
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "get_weather"
+
+
+class TestParserStateWithFunctionToolNames:
+    """Tests for parser_state_to_response_output with function_tool_names."""
+
+    def test_bare_name_creates_function_call(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "commentary"
+        parser.current_recipient = "get_weather"
+
+        fn_names = frozenset({"get_weather"})
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], ResponseFunctionToolCall)
+        assert items[0].name == "get_weather"
+        assert items[0].status == "in_progress"
+
+    def test_bare_name_creates_mcp_when_not_in_tool_names(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "commentary"
+        parser.current_recipient = "unknown_tool"
+
+        fn_names = frozenset({"get_weather"})
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], McpCall)
+        assert items[0].name == "unknown_tool"
+
+
+class TestToolCallsOnNonStandardChannels:
+    """Tests verifying tool calls are detected regardless of channel."""
+
+    def test_function_call_on_comment_channel(self):
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
+        message = message.with_channel("comment")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].type == "function_call"
+        assert output_items[0].name == "get_weather"
+
+    def test_bare_function_on_comment_channel(self):
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
+        message = message.with_channel("comment")
+        message = message.with_recipient("get_weather")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "get_weather"
+
+    def test_parser_state_comment_channel_function(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "comment"
+        parser.current_recipient = "functions.get_weather"
+
+        items = parser_state_to_response_output(parser)
+
+        assert len(items) == 1
+        assert isinstance(items[0], ResponseFunctionToolCall)
+        assert items[0].name == "get_weather"
+
+    def test_parser_state_comment_channel_mcp(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "comment"
+        parser.current_recipient = "mcp.server.tool"
+
+        fn_names: frozenset[str] = frozenset()
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], McpCall)
+
+
 def test_parse_mcp_call_basic() -> None:
     """Test that MCP calls are parsed with correct type and server_label."""
     message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
     message = message.with_recipient("filesystem")
     message = message.with_channel("commentary")
 
-    output_items = harmony_to_response_output(message)
+    fn_names: frozenset[str] = frozenset()
+    output_items = harmony_to_response_output(message, fn_names)
 
     assert len(output_items) == 1
     assert isinstance(output_items[0], McpCall)
@@ -309,7 +476,8 @@ def test_parse_mcp_call_dotted_recipient() -> None:
     message = message.with_recipient("repo_browser.list")
     message = message.with_channel("commentary")
 
-    output_items = harmony_to_response_output(message)
+    fn_names: frozenset[str] = frozenset()
+    output_items = harmony_to_response_output(message, fn_names)
 
     assert len(output_items) == 1
     assert isinstance(output_items[0], McpCall)
@@ -371,7 +539,8 @@ def test_parser_state_to_response_output_commentary_channel() -> None:
     parser_mcp.current_channel = "commentary"
     parser_mcp.current_recipient = "filesystem"
 
-    mcp_items = parser_state_to_response_output(parser_mcp)
+    fn_names: frozenset[str] = frozenset()
+    mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
 
     assert len(mcp_items) == 1
     assert isinstance(mcp_items[0], McpCall)
@@ -438,7 +607,8 @@ def test_parser_state_to_response_output_analysis_channel() -> None:
     parser_mcp.current_channel = "analysis"
     parser_mcp.current_recipient = "database"
 
-    mcp_items = parser_state_to_response_output(parser_mcp)
+    fn_names: frozenset[str] = frozenset()
+    mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
 
     assert len(mcp_items) == 1
     assert isinstance(mcp_items[0], McpCall)
diff --git a/tests/entrypoints/openai/responses/test_mcp_tools.py b/tests/entrypoints/openai/responses/test_mcp_tools.py
index 763e2b208555..330d4b9e4bc6 100644
--- a/tests/entrypoints/openai/responses/test_mcp_tools.py
+++ b/tests/entrypoints/openai/responses/test_mcp_tools.py
@@ -7,7 +7,7 @@
 import pytest
 import pytest_asyncio
 from openai import OpenAI
-from openai_harmony import ToolDescription, ToolNamespaceConfig
+from openai_harmony import Message, ToolDescription, ToolNamespaceConfig
 
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.mcp.tool_server import MCPToolServer
@@ -173,10 +173,10 @@ async def test_mcp_tool_env_flag_enabled(self, client: OpenAI, model_name: str):
             if recipient and recipient.startswith("python"):
                 tool_call_found = True
                 assert message.get("channel") == "commentary"
-            author = message.get("author", {})
-            if author.get("role") == "tool" and (author.get("name") or "").startswith(
-                "python"
-            ):
+            parsed_message = Message.from_dict(message)
+            if parsed_message.author.role == "tool" and (
+                parsed_message.author.name or ""
+            ).startswith("python"):
                 tool_response_found = True
                 assert message.get("channel") == "commentary"
 
@@ -188,7 +188,7 @@ async def test_mcp_tool_env_flag_enabled(self, client: OpenAI, model_name: str):
         assert tool_response_found, "No Python tool response found"
 
         for message in response.input_messages:
-            assert message.get("author", {}).get("role") != "developer"
+            assert Message.from_dict(message).author.role != "developer"
 
     @pytest.mark.asyncio
     @pytest.mark.parametrize("model_name", [MODEL_NAME])
diff --git a/tests/entrypoints/openai/responses/test_response_input_to_harmony.py b/tests/entrypoints/openai/responses/test_response_input_to_harmony.py
new file mode 100644
index 000000000000..8efd01577328
--- /dev/null
+++ b/tests/entrypoints/openai/responses/test_response_input_to_harmony.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for response_input_to_harmony.
+
+Covers every type branch in the function and verifies that each produced
+Harmony Message has the correct role, channel, recipient, content_type,
+author name, and text content.
+"""
+
+import pytest
+from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
+from openai.types.responses.response_reasoning_item import (
+    Content as ReasoningTextContent,
+)
+from openai_harmony import Role
+
+from vllm.entrypoints.openai.responses.harmony import response_input_to_harmony
+
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+
+_PREV_CALL = ResponseFunctionToolCall(
+    id="fc_test",
+    call_id="call_test",
+    name="get_weather",
+    arguments='{"location": "Paris"}',
+    type="function_call",
+)
+
+_REASONING_ITEM = ResponseReasoningItem(
+    id="rs_test",
+    type="reasoning",
+    content=[ReasoningTextContent(type="reasoning_text", text="Thinking hard.")],
+    summary=[],
+    status=None,
+)
+
+
+class TestResponseInputToHarmonyMessage:
+    """Unit tests for every message type handled by response_input_to_harmony."""
+
+    # -----------------------------------------------------------------------
+    # type="message" (or no type key)
+    # -----------------------------------------------------------------------
+
+    def test_user_message_string_content(self):
+        msg = response_input_to_harmony(
+            {"type": "message", "role": "user", "content": "Hello"},
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.USER
+        assert msg.content[0].text == "Hello"
+        assert msg.channel is None
+
+    def test_no_type_key_defaults_to_message_branch(self):
+        """Omitting 'type' should fall through to the message branch."""
+        msg = response_input_to_harmony(
+            {"role": "user", "content": "Hello"},
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.USER
+        assert msg.content[0].text == "Hello"
+
+    def test_system_message(self):
+        msg = response_input_to_harmony(
+            {"type": "message", "role": "system", "content": "Be helpful."},
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.SYSTEM
+        assert msg.content[0].text == "Be helpful."
+        assert msg.channel is None
+
+    def test_assistant_message_gets_final_channel(self):
+        msg = response_input_to_harmony(
+            {"type": "message", "role": "assistant", "content": "The answer is 42."},
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.ASSISTANT
+        assert msg.channel == "final"
+        assert msg.content[0].text == "The answer is 42."
+
+    def test_developer_message_gets_instructions_prefix(self):
+        msg = response_input_to_harmony(
+            {"type": "message", "role": "developer", "content": "Be concise."},
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.DEVELOPER
+        assert msg.content[0].text == "Instructions:\nBe concise."
+        assert msg.channel is None
+
+    def test_message_with_array_content(self):
+        msg = response_input_to_harmony(
+            {
+                "type": "message",
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Part one. "},
+                    {"type": "text", "text": "Part two."},
+                ],
+            },
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.USER
+        assert len(msg.content) == 2
+        assert msg.content[0].text == "Part one. "
+        assert msg.content[1].text == "Part two."
+
+    def test_developer_message_array_content_gets_prefix_on_each_part(self):
+        msg = response_input_to_harmony(
+            {
+                "type": "message",
+                "role": "developer",
+                "content": [
+                    {"type": "text", "text": "Rule 1."},
+                    {"type": "text", "text": "Rule 2."},
+                ],
+            },
+            prev_responses=[],
+        )
+
+        assert msg.content[0].text == "Instructions:\nRule 1."
+        assert msg.content[1].text == "Instructions:\nRule 2."
+
+    # -----------------------------------------------------------------------
+    # type="reasoning"
+    # -----------------------------------------------------------------------
+
+    def test_reasoning_gets_analysis_channel(self):
+        msg = response_input_to_harmony(
+            {
+                "type": "reasoning",
+                "content": [
+                    {"type": "reasoning_text", "text": "I should call get_weather."}
+                ],
+            },
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.ASSISTANT
+        assert msg.channel == "analysis"
+        assert msg.content[0].text == "I should call get_weather."
+
+    def test_reasoning_pydantic_model_input(self):
+        """A Pydantic ResponseReasoningItem should be model_dump()'d before parsing."""
+        msg = response_input_to_harmony(_REASONING_ITEM, prev_responses=[])
+
+        assert msg.author.role == Role.ASSISTANT
+        assert msg.channel == "analysis"
+        assert msg.content[0].text == "Thinking hard."
+
+    # -----------------------------------------------------------------------
+    # type="function_call"
+    # -----------------------------------------------------------------------
+
+    def test_function_call_channel_recipient_and_content_type(self):
+        msg = response_input_to_harmony(
+            {
+                "type": "function_call",
+                "name": "get_weather",
+                "arguments": '{"location": "Paris"}',
+            },
+            prev_responses=[],
+        )
+
+        assert msg.author.role == Role.ASSISTANT
+        assert msg.channel == "commentary"
+        assert msg.recipient == "functions.get_weather"
+        assert msg.content_type == "json"
+        assert msg.content[0].text == '{"location": "Paris"}'
+
+    def test_function_call_empty_arguments(self):
+        msg = response_input_to_harmony(
+            {"type": "function_call", "name": "ping", "arguments": ""},
+            prev_responses=[],
+        )
+
+        assert msg.recipient == "functions.ping"
+        assert msg.content[0].text == ""
+
+    # -----------------------------------------------------------------------
+    # type="function_call_output"
+    # -----------------------------------------------------------------------
+
+    def test_function_call_output_channel_recipient_and_author_name(self):
+        msg = response_input_to_harmony(
+            {"type": "function_call_output", "call_id": "call_test", "output": "18°C"},
+            prev_responses=[_PREV_CALL],
+        )
+
+        assert msg.author.role == Role.TOOL
+        assert msg.author.name == "functions.get_weather"
+        assert msg.channel == "commentary"
+        assert msg.recipient == "assistant"
+        assert msg.content[0].text == "18°C"
+
+    def test_function_call_output_uses_most_recent_matching_call(self):
+        """When multiple prev_responses share a call_id, the last one wins
+        because the search is reversed."""
+        earlier = ResponseFunctionToolCall(
+            id="fc_old",
+            call_id="call_test",
+            name="old_func",
+            arguments="{}",
+            type="function_call",
+        )
+        later = ResponseFunctionToolCall(
+            id="fc_new",
+            call_id="call_test",
+            name="get_weather",
+            arguments="{}",
+            type="function_call",
+        )
+
+        msg = response_input_to_harmony(
+            {
+                "type": "function_call_output",
+                "call_id": "call_test",
+                "output": "result",
+            },
+            prev_responses=[earlier, later],
+        )
+
+        assert msg.author.name == "functions.get_weather"
+
+    def test_function_call_output_skips_non_function_call_items_in_prev_responses(
+        self,
+    ):
+        """ResponseReasoningItem entries in prev_responses should be ignored."""
+        msg = response_input_to_harmony(
+            {
+                "type": "function_call_output",
+                "call_id": "call_test",
+                "output": "18°C",
+            },
+            prev_responses=[_REASONING_ITEM, _PREV_CALL],
+        )
+
+        assert msg.author.name == "functions.get_weather"
+
+    def test_function_call_output_raises_if_no_matching_call(self):
+        with pytest.raises(ValueError, match="No call message found for"):
+            response_input_to_harmony(
+                {
+                    "type": "function_call_output",
+                    "call_id": "no_such_id",
+                    "output": "x",
+                },
+                prev_responses=[_PREV_CALL],
+            )
+
+    def test_function_call_output_raises_on_empty_prev_responses(self):
+        with pytest.raises(ValueError, match="No call message found for"):
+            response_input_to_harmony(
+                {"type": "function_call_output", "call_id": "call_test", "output": "x"},
+                prev_responses=[],
+            )
+
+    # -----------------------------------------------------------------------
+    # Error cases
+    # -----------------------------------------------------------------------
+
+    def test_unknown_type_raises_value_error(self):
+        with pytest.raises(ValueError, match="Unknown input type"):
+            response_input_to_harmony(
+                {"type": "image_url", "url": "https://example.com/img.png"},
+                prev_responses=[],
+            )
diff --git a/tests/entrypoints/openai/responses/test_responses_utils.py b/tests/entrypoints/openai/responses/test_responses_utils.py
index 3a4476984d3d..c9ba52b143e0 100644
--- a/tests/entrypoints/openai/responses/test_responses_utils.py
+++ b/tests/entrypoints/openai/responses/test_responses_utils.py
@@ -4,7 +4,6 @@
 from unittest.mock import patch
 
 import pytest
-from openai.types.chat import ChatCompletionMessageParam
 from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
 from openai.types.responses.response_function_tool_call_output_item import (
     ResponseFunctionToolCallOutputItem,
@@ -17,16 +16,105 @@
     Summary,
 )
 
-from vllm.entrypoints.constants import MCP_PREFIX
 from vllm.entrypoints.openai.responses.utils import (
-    _construct_single_message_from_response_item,
-    _maybe_combine_reasoning_and_tool_call,
+    _construct_message_from_response_item,
     construct_chat_messages_with_tool_call,
+    construct_input_messages,
     convert_tool_responses_to_completions_format,
     should_continue_final_message,
 )
 
 
+def _single_chat_message(item):
+    message = _construct_message_from_response_item(item)
+    assert message is not None
+    return message
+
+
+def make_output_message(
+    text: str,
+    *,
+    id: str = "msg_1",
+    status: str = "completed",
+) -> ResponseOutputMessage:
+    return ResponseOutputMessage(
+        id=id,
+        content=[
+            ResponseOutputText(
+                annotations=[],
+                text=text,
+                type="output_text",
+                logprobs=None,
+            )
+        ],
+        role="assistant",
+        status=status,
+        type="message",
+    )
+
+
+def make_reasoning_item(
+    *,
+    content_text: str | None = None,
+    summary_text: str | None = None,
+    content: list[Content] | None = None,
+    summary: list[Summary] | None = None,
+    encrypted_content: str | None = None,
+    id: str = "reasoning_1",
+    status: str | None = None,
+) -> ResponseReasoningItem:
+    if content is None and content_text is not None:
+        content = [Content(text=content_text, type="reasoning_text")]
+    if summary is None and summary_text is not None:
+        summary = [Summary(text=summary_text, type="summary_text")]
+
+    return ResponseReasoningItem(
+        id=id,
+        summary=[] if summary is None else summary,
+        type="reasoning",
+        content=content,
+        encrypted_content=encrypted_content,
+        status=status,
+    )
+
+
+def make_function_call(
+    *,
+    call_id: str,
+    name: str = "test_function",
+    arguments: str = "{}",
+    id: str = "tool_id",
+    status: str | None = None,
+) -> ResponseFunctionToolCall:
+    kwargs = {
+        "type": "function_call",
+        "id": id,
+        "call_id": call_id,
+        "name": name,
+        "arguments": arguments,
+    }
+    if status is not None:
+        kwargs["status"] = status
+
+    return ResponseFunctionToolCall(**kwargs)
+
+
+def make_function_call_output(
+    *,
+    call_id: str,
+    output: str = "42",
+    id: str = "output_1",
+    status: str = "completed",
+) -> ResponseFunctionToolCallOutputItem:
+    return ResponseFunctionToolCallOutputItem(
+        id=id,
+        type="function_call_output",
+        call_id=call_id,
+        output=output,
+        status=status,
+    )
+
+
 class TestResponsesUtils:
     """Tests for convert_tool_responses_to_completions_format function."""
 
@@ -86,7 +174,7 @@ def test_construct_chat_messages_with_tool_call(self):
             message["tool_calls"][0]["function"]["arguments"] == '{"code": "123+456"}'
         )
 
-    def test_construct_single_message_from_response_item(self):
+    def test_construct_chat_messages_preserves_single_item_conversions(self):
         item = ResponseReasoningItem(
             id="lol",
             summary=[],
@@ -100,7 +188,7 @@ def test_construct_single_message_from_response_item(self):
             encrypted_content=None,
             status=None,
         )
-        formatted_item = _construct_single_message_from_response_item(item)
+        formatted_item = _single_chat_message(item)
         assert formatted_item["role"] == "assistant"
         assert formatted_item["reasoning"] == "Leroy Jenkins"
 
@@ -118,7 +206,7 @@ def test_construct_single_message_from_response_item(self):
             status=None,
         )
 
-        formatted_item = _construct_single_message_from_response_item(item)
+        formatted_item = _single_chat_message(item)
         assert formatted_item["role"] == "assistant"
         assert (
             formatted_item["reasoning"]
@@ -132,11 +220,22 @@ def test_construct_single_message_from_response_item(self):
             output="1234",
             status="completed",
         )
-        formatted_item = _construct_single_message_from_response_item(tool_call_output)
+        formatted_item = _single_chat_message(tool_call_output)
         assert formatted_item["role"] == "tool"
         assert formatted_item["content"] == "1234"
         assert formatted_item["tool_call_id"] == "temp"
 
+        formatted_item = _single_chat_message(
+            {
+                "type": "function_call_output",
+                "call_id": "temp_dict",
+                "output": "5678",
+            }
+        )
+        assert formatted_item["role"] == "tool"
+        assert formatted_item["content"] == "5678"
+        assert formatted_item["tool_call_id"] == "temp_dict"
+
         item = ResponseReasoningItem(
             id="lol",
             summary=[],
@@ -146,7 +245,7 @@ def test_construct_single_message_from_response_item(self):
             status=None,
         )
         with pytest.raises(ValueError):
-            _construct_single_message_from_response_item(item)
+            construct_chat_messages_with_tool_call([item])
 
         output_item = ResponseOutputMessage(
             id="msg_bf585bbbe3d500e0",
@@ -163,7 +262,7 @@ def test_construct_single_message_from_response_item(self):
             type="message",
         )
 
-        formatted_item = _construct_single_message_from_response_item(output_item)
+        formatted_item = _single_chat_message(output_item)
         assert formatted_item["role"] == "assistant"
         assert formatted_item["content"] == "dongyi"
 
@@ -191,7 +290,7 @@ def test_content_preferred_over_summary(self):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == "This is the actual content"
 
     def test_content_only(self):
@@ -209,7 +308,7 @@ def test_content_only(self):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == "Content without summary"
 
     @patch("vllm.entrypoints.openai.responses.utils.logger")
@@ -228,7 +327,7 @@ def test_summary_fallback_when_no_content(self, mock_logger):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == "Fallback summary text"
         mock_logger.warning.assert_called_once()
         assert (
@@ -251,7 +350,7 @@ def test_summary_fallback_when_content_empty(self, mock_logger):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == "Summary when content empty"
         mock_logger.warning.assert_called_once()
         assert (
@@ -268,7 +367,7 @@ def test_neither_content_nor_summary(self):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == ""
 
     def test_encrypted_content_raises(self):
@@ -292,7 +391,7 @@ def test_encrypted_content_raises(self):
             status=None,
         )
         with pytest.raises(ValueError):
-            _construct_single_message_from_response_item(item)
+            construct_chat_messages_with_tool_call([item])
 
     @patch("vllm.entrypoints.openai.responses.utils.logger")
     def test_summary_with_multiple_entries_uses_first(self, mock_logger):
@@ -314,7 +413,7 @@ def test_summary_with_multiple_entries_uses_first(self, mock_logger):
             encrypted_content=None,
             status=None,
         )
-        formatted = _construct_single_message_from_response_item(item)
+        formatted = _single_chat_message(item)
         assert formatted["reasoning"] == "First summary"
         mock_logger.warning.assert_called_once()
         assert (
@@ -342,7 +441,7 @@ def test_no_warning_when_content_used(self, mock_logger):
             encrypted_content=None,
             status=None,
         )
-        _construct_single_message_from_response_item(item)
+        construct_chat_messages_with_tool_call([item])
         mock_logger.warning.assert_not_called()
 
 
@@ -625,116 +724,190 @@ def test_dict_with_none_status_returns_false(self):
         assert should_continue_final_message([dict_item]) is False
 
 
-class TestMaybeCombineReasoningAndToolCall:
-    """Tests for _maybe_combine_reasoning_and_tool_call function."""
-
-    def test_returns_none_when_item_id_is_none(self):
-        """
-        Test fix from PR #31999: when item.id is None, should return None
-        instead of raising TypeError on startswith().
-        """
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id=None,  # This was causing TypeError before the fix
-            call_id="call_123",
-            name="test_function",
-            arguments="{}",
-        )
-        messages: list[ChatCompletionMessageParam] = []
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
-
-    def test_returns_none_when_id_does_not_start_with_mcp_prefix(self):
-        """Test that non-MCP tool calls are not combined."""
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id="regular_id",  # Does not start with MCP_PREFIX
-            call_id="call_123",
-            name="test_function",
-            arguments="{}",
-        )
-        messages = [{"role": "assistant", "reasoning": "some reasoning"}]
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
-
-    def test_returns_none_when_last_message_is_not_assistant(self):
-        """Test that non-assistant last message returns None."""
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id=f"{MCP_PREFIX}tool_id",
-            call_id="call_123",
-            name="test_function",
-            arguments="{}",
-        )
-        messages = [{"role": "user", "content": "hello"}]
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
-
-    def test_returns_none_when_last_message_has_no_reasoning(self):
-        """Test that assistant message without reasoning returns None."""
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id=f"{MCP_PREFIX}tool_id",
-            call_id="call_123",
-            name="test_function",
-            arguments="{}",
-        )
-        messages = [{"role": "assistant", "content": "some content"}]
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
-
-    def test_combines_reasoning_and_mcp_tool_call(self):
-        """Test successful combination of reasoning message and MCP tool call."""
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id=f"{MCP_PREFIX}tool_id",
-            call_id="call_123",
-            name="test_function",
-            arguments='{"arg": "value"}',
-        )
-        messages = [{"role": "assistant", "reasoning": "I need to call this tool"}]
+class TestConstructChatMessagesCombinePolicy:
+    """Tests for contiguous assistant-side merging."""
+
+    @pytest.mark.parametrize(
+        ("items", "expected_content", "expected_reasoning", "expected_tool_call_ids"),
+        [
+            pytest.param(
+                [
+                    make_reasoning_item(content_text="Let me think"),
+                    make_output_message("Hello"),
+                ],
+                "Hello",
+                "Let me think",
+                None,
+                id="reasoning-output-messages",
+            ),
+            pytest.param(
+                [
+                    make_function_call(call_id="call_123"),
+                    make_function_call(call_id="call_456"),
+                ],
+                None,
+                None,
+                ["call_123", "call_456"],
+                id="consecutive-tool-calls",
+            ),
+            pytest.param(
+                [
+                    make_reasoning_item(content_text="Let me think"),
+                    make_function_call(call_id="call_123"),
+                ],
+                None,
+                "Let me think",
+                ["call_123"],
+                id="reasoning-tool-call",
+            ),
+            pytest.param(
+                [
+                    make_output_message("Hello"),
+                    make_function_call(call_id="call_123"),
+                ],
+                "Hello",
+                None,
+                ["call_123"],
+                id="output-tool-call",
+            ),
+            pytest.param(
+                [
+                    make_reasoning_item(content_text="Thinking"),
+                    make_output_message("Hello"),
+                    make_function_call(call_id="call_123"),
+                    make_function_call(call_id="call_456"),
+                ],
+                "Hello",
+                "Thinking",
+                ["call_123", "call_456"],
+                id="reasoning-output-tool-call",
+            ),
+        ],
+    )
+    def test_assistant_side_items_merge_until_tool_output(
+        self,
+        items,
+        expected_content,
+        expected_reasoning,
+        expected_tool_call_ids,
+    ):
+        messages = construct_chat_messages_with_tool_call(items)
 
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is not None
-        assert result["role"] == "assistant"
-        assert result["reasoning"] == "I need to call this tool"
-        assert "tool_calls" in result
-        assert len(result["tool_calls"]) == 1
-        assert result["tool_calls"][0]["id"] == "call_123"
-        assert result["tool_calls"][0]["function"]["name"] == "test_function"
-        assert result["tool_calls"][0]["function"]["arguments"] == '{"arg": "value"}'
-        assert result["tool_calls"][0]["type"] == "function"
-
-    def test_returns_none_for_non_function_tool_call_type(self):
-        """Test that non-ResponseFunctionToolCall items return None."""
-        # Pass a dict instead of ResponseFunctionToolCall
-        item = {"type": "message", "content": "hello"}
-        messages = [{"role": "assistant", "reasoning": "some reasoning"}]
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
-
-    def test_returns_none_when_id_is_empty_string(self):
-        """Test that empty string id returns None (falsy check)."""
-        item = ResponseFunctionToolCall(
-            type="function_call",
-            id="",  # Empty string is falsy
-            call_id="call_123",
-            name="test_function",
-            arguments="{}",
-        )
-        messages = [{"role": "assistant", "reasoning": "some reasoning"}]
-
-        result = _maybe_combine_reasoning_and_tool_call(item, messages)
-
-        assert result is None
+        assert len(messages) == 1
+        assert messages[0]["role"] == "assistant"
+        if expected_content is None:
+            assert "content" not in messages[0]
+        else:
+            assert messages[0]["content"] == expected_content
+        if expected_reasoning is None:
+            assert "reasoning" not in messages[0]
+        else:
+            assert messages[0]["reasoning"] == expected_reasoning
+        if expected_tool_call_ids is None:
+            assert "tool_calls" not in messages[0]
+        else:
+            assert [tool_call["id"] for tool_call in messages[0]["tool_calls"]] == (
+                expected_tool_call_ids
+            )
+
+    @pytest.mark.parametrize(
+        ("items", "num_expected_messages"),
+        [
+            pytest.param(
+                [
+                    make_output_message("Hello"),
+                    make_output_message("World"),
+                ],
+                2,
+                id="consecutive-output-messages",
+            ),
+            pytest.param(
+                [
+                    make_reasoning_item(content_text="Let me think"),
+                    make_reasoning_item(content_text="Let me think more"),
+                ],
+                2,
+                id="consecutive-reasoning-messages",
+            ),
+            pytest.param(
+                [
+                    make_function_call(call_id="call_123"),
+                    make_function_call_output(call_id="call_123", output="42"),
+                    make_function_call(call_id="call_456"),
+                ],
+                3,
+                id="interrupted-by-non-assistant-item",
+            ),
+        ],
+    )
+    def test_merge_chain_breaks(self, items, num_expected_messages):
+        messages = construct_chat_messages_with_tool_call(items)
+        assert len(messages) == num_expected_messages
+
+
+class TestConstructInputMessagesInstructionsLeak:
+    """Regression tests for #37697: instructions from a prior response
+    should NOT leak through previous_response_id."""
+
+    def test_old_instructions_stripped_from_prev_msg(self):
+        """System message in prev_msg must be dropped so the new request's
+        instructions are the only system message in the conversation."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "What is 2+2?"},
+            {"role": "assistant", "content": "4"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions="new instructions",
+            request_input="What is 3+3?",
+            prev_msg=prev,
+        )
+        system_msgs = [m for m in msgs if m.get("role") == "system"]
+        assert len(system_msgs) == 1
+        assert system_msgs[0]["content"] == "new instructions"
+
+    def test_no_instructions_in_new_request(self):
+        """If the new request has no instructions, old ones should still
+        be stripped -- they must not carry over."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "Hi"},
+            {"role": "assistant", "content": "Hello"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions=None,
+            request_input="What is 3+3?",
+            prev_msg=prev,
+        )
+        system_msgs = [m for m in msgs if m.get("role") == "system"]
+        assert len(system_msgs) == 0
+
+    def test_non_system_messages_preserved(self):
+        """User/assistant messages from prev_msg must remain intact."""
+        prev = [
+            {"role": "system", "content": "old instructions"},
+            {"role": "user", "content": "Hi"},
+            {"role": "assistant", "content": "Hello"},
+        ]
+        msgs = construct_input_messages(
+            request_instructions="new instructions",
+            request_input="Follow up",
+            prev_msg=prev,
+        )
+        roles = [m["role"] for m in msgs]
+        assert roles == ["system", "user", "assistant", "user"]
+        assert msgs[0]["content"] == "new instructions"
+        assert msgs[1]["content"] == "Hi"
+        assert msgs[2]["content"] == "Hello"
+        assert msgs[3]["content"] == "Follow up"
+
+    def test_no_prev_msg(self):
+        """Baseline: when there's no prev_msg, instructions work normally."""
+        msgs = construct_input_messages(
+            request_instructions="be helpful",
+            request_input="hello",
+            prev_msg=None,
+        )
+        assert len(msgs) == 2
+        assert msgs[0] == {"role": "system", "content": "be helpful"}
+        assert msgs[1] == {"role": "user", "content": "hello"}
diff --git a/tests/entrypoints/openai/responses/test_serving_responses.py b/tests/entrypoints/openai/responses/test_serving_responses.py
index 157f7f12fdd3..90c3183939ad 100644
--- a/tests/entrypoints/openai/responses/test_serving_responses.py
+++ b/tests/entrypoints/openai/responses/test_serving_responses.py
@@ -11,8 +11,12 @@
     ResponseReasoningItem,
     ResponseReasoningTextDeltaEvent,
     ResponseReasoningTextDoneEvent,
+    ResponseTextConfig,
     ResponseTextDeltaEvent,
 )
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
 from openai.types.responses.tool import (
     CodeInterpreterContainerCodeInterpreterToolAuto,
     LocalShell,
@@ -23,12 +27,20 @@
 import vllm.envs as envs
 from vllm.entrypoints.mcp.tool_server import ToolServer
 from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
     DeltaMessage,
+    DeltaToolCall,
     ErrorResponse,
     RequestResponseMetadata,
 )
 from vllm.entrypoints.openai.responses.context import ConversationContext, SimpleContext
-from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponseCreatedEvent,
+    ResponseRawMessageAndToken,
+    ResponsesRequest,
+    ResponsesResponse,
+    serialize_message,
+)
 from vllm.entrypoints.openai.responses.serving import (
     OpenAIServingResponses,
     _extract_allowed_tools_from_mcp_requests,
@@ -73,6 +85,16 @@ async def cleanup_session(self) -> None:
         pass
 
 
+def test_serialize_message_pydantic_model_returns_dict() -> None:
+    msg = ResponseRawMessageAndToken(message="hello", tokens=[1, 2, 3])
+
+    serialized = serialize_message(msg)
+
+    assert isinstance(serialized, dict)
+    assert serialized["type"] == "raw_message_tokens"
+    assert serialized["message"] == "hello"
+
+
 @pytest.fixture
 def mock_serving_responses():
     """Create a mock OpenAIServingResponses instance"""
@@ -132,6 +154,56 @@ def test_extract_tool_types(monkeypatch: pytest.MonkeyPatch) -> None:
     }
 
 
+@pytest.mark.skip_global_cleanup
+def test_response_created_event_uses_public_json_schema_alias() -> None:
+    schema = {
+        "type": "object",
+        "properties": {
+            "event_name": {"type": "string"},
+            "date": {"type": "string"},
+            "participants": {"type": "array", "items": {"type": "string"}},
+        },
+        "required": ["event_name", "date", "participants"],
+        "additionalProperties": False,
+    }
+    text = ResponseTextConfig()
+    text.format = ResponseFormatTextJSONSchemaConfig(
+        type="json_schema",
+        name="calendar_event",
+        schema=schema,
+        description="A calendar event.",
+        strict=True,
+    )
+    request = ResponsesRequest(
+        model="test-model",
+        input="Alice and Bob are going to a science fair on Friday.",
+        text=text,
+    )
+    sampling_params = request.to_sampling_params(default_max_tokens=64)
+    initial_response = ResponsesResponse.from_request(
+        request=request,
+        sampling_params=sampling_params,
+        model_name="test-model",
+        created_time=0,
+        output=[],
+        status="in_progress",
+        usage=None,
+    ).model_dump(mode="json", by_alias=True)
+
+    fmt = initial_response["text"]["format"]
+    assert fmt["schema"] == schema
+    assert "schema_" not in fmt
+
+    event = ResponseCreatedEvent(
+        type="response.created",
+        sequence_number=0,
+        response=initial_response,
+    )
+    assert event.response.text is not None
+    assert event.response.text.format is not None
+    assert event.response.text.format.model_dump(by_alias=True)["schema"] == schema
+
+
 class TestInitializeToolSessions:
     """Test class for _initialize_tool_sessions method"""
 
@@ -148,7 +220,6 @@ async def serving_responses_instance(self):
         engine_client.model_config = model_config
 
         engine_client.input_processor = MagicMock()
-        engine_client.io_processor = MagicMock()
         engine_client.renderer = MagicMock()
 
         models = MagicMock()
@@ -237,7 +308,6 @@ async def serving_responses_instance(self):
         engine_client.model_config = model_config
 
         engine_client.input_processor = MagicMock()
-        engine_client.io_processor = MagicMock()
         engine_client.renderer = MagicMock()
 
         models = MagicMock()
@@ -299,7 +369,6 @@ def get_vocab(self):
     model_config.get_diff_sampling_param.return_value = {}
     engine_client.model_config = model_config
     engine_client.input_processor = MagicMock()
-    engine_client.io_processor = MagicMock()
     engine_client.renderer = MagicMock()
 
     tokenizer = FakeTokenizer()
@@ -602,7 +671,6 @@ def _make_serving_instance_with_reasoning():
     model_config.get_diff_sampling_param.return_value = {}
     engine_client.model_config = model_config
     engine_client.input_processor = MagicMock()
-    engine_client.io_processor = MagicMock()
     engine_client.renderer = MagicMock()
 
     models = MagicMock()
@@ -628,6 +696,31 @@ def _identity_increment(event):
     return event
 
 
+def _mock_parser_with_reasoning(serving, delta_sequence: list[DeltaMessage]):
+    """Set up serving.parser so that it returns a mock parser instance
+    with a reasoning parser that returns the given delta_sequence.
+
+    The mock has reasoning_parser set (truthy) but tool_parser as None,
+    so the parser's parse_delta enters the reasoning-only branch.
+    """
+    call_count = 0
+
+    def mock_parse_delta(**kwargs):
+        nonlocal call_count
+        if call_count >= len(delta_sequence):
+            return None
+        result = delta_sequence[call_count]
+        call_count += 1
+        return result
+
+    mock_parser_instance = MagicMock()
+    mock_parser_instance.reasoning_parser = MagicMock()  # truthy
+    mock_parser_instance.tool_parser = None
+    mock_parser_instance.parse_delta = mock_parse_delta
+    mock_parser_instance.is_reasoning_end = MagicMock(return_value=False)
+    serving.parser = MagicMock(return_value=mock_parser_instance)
+
+
 class TestStreamingReasoningToContentTransition:
     """Tests for _process_simple_streaming_events reasoning-to-content
     transition, specifically the fix for mixed deltas that carry both
@@ -646,27 +739,13 @@ async def test_mixed_delta_reasoning_and_content_emits_reasoning_delta(
         monkeypatch.setattr(envs, "VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT", False)
         serving = _make_serving_instance_with_reasoning()
 
-        # Sequence of DeltaMessages the mock reasoning parser will return
+        # Sequence of DeltaMessages the mock orchestrator will return
         delta_sequence = [
             DeltaMessage(reasoning="thinking..."),
             DeltaMessage(reasoning=" end", content="hello"),  # mixed delta
             DeltaMessage(content=" world"),
         ]
-        call_count = 0
-
-        def mock_extract_reasoning_streaming(**kwargs):
-            nonlocal call_count
-            result = delta_sequence[call_count]
-            call_count += 1
-            return result
-
-        # Mock the reasoning parser on the serving instance
-        mock_parser = MagicMock()
-        mock_parser.extract_reasoning_streaming = mock_extract_reasoning_streaming
-        mock_parser.extract_tool_calls_streaming = mock_extract_reasoning_streaming
-        serving.parser = MagicMock()
-        serving.parser.reasoning_parser_cls = MagicMock(return_value=mock_parser)
-        serving.parser.tool_parser_cls = MagicMock(return_value=mock_parser)
+        _mock_parser_with_reasoning(serving, delta_sequence)
         # Create contexts for each streaming chunk
         contexts = [
             _make_simple_context_with_output("chunk1", [10]),
@@ -734,20 +813,7 @@ async def test_transition_without_mixed_delta_no_extra_reasoning_event(
             DeltaMessage(reasoning="thinking"),
             DeltaMessage(content="answer"),
         ]
-        call_count = 0
-
-        def mock_extract_reasoning_streaming(**kwargs):
-            nonlocal call_count
-            result = delta_sequence[call_count]
-            call_count += 1
-            return result
-
-        mock_parser = MagicMock()
-        mock_parser.extract_reasoning_streaming = mock_extract_reasoning_streaming
-        mock_parser.extract_tool_calls_streaming = mock_extract_reasoning_streaming
-        serving.parser = MagicMock()
-        serving.parser.reasoning_parser_cls = MagicMock(return_value=mock_parser)
-        serving.parser.tool_parser_cls = MagicMock(return_value=mock_parser)
+        _mock_parser_with_reasoning(serving, delta_sequence)
 
         contexts = [
             _make_simple_context_with_output("chunk1", [10]),
@@ -809,20 +875,7 @@ async def test_reasoning_only_stream_no_content(self, monkeypatch):
             DeltaMessage(reasoning="step 1"),
             DeltaMessage(reasoning=" step 2"),
         ]
-        call_count = 0
-
-        def mock_extract_reasoning_streaming(**kwargs):
-            nonlocal call_count
-            result = delta_sequence[call_count]
-            call_count += 1
-            return result
-
-        mock_parser = MagicMock()
-        mock_parser.extract_reasoning_streaming = mock_extract_reasoning_streaming
-        mock_parser.extract_tool_calls_streaming = mock_extract_reasoning_streaming
-        serving.parser = MagicMock()
-        serving.parser.reasoning_parser_cls = MagicMock(return_value=mock_parser)
-        serving.parser.tool_parser_cls = MagicMock(return_value=mock_parser)
+        _mock_parser_with_reasoning(serving, delta_sequence)
 
         contexts = [
             _make_simple_context_with_output("chunk1", [10]),
@@ -877,3 +930,197 @@ async def result_generator():
         ]
         assert len(item_done_events) == 1
         assert isinstance(item_done_events[0].item, ResponseReasoningItem)
+
+
+class TestAutoToolStreaming:
+    @staticmethod
+    async def _collect_events(delta_sequence: list[DeltaMessage]):
+        serving = _make_serving_instance_with_reasoning()
+        _mock_parser_with_reasoning(serving, delta_sequence)
+
+        contexts = [
+            _make_simple_context_with_output("chunk", [i])
+            for i in range(len(delta_sequence))
+        ]
+
+        async def result_generator():
+            for ctx in contexts:
+                yield ctx
+
+        request = ResponsesRequest(
+            input="hi",
+            tools=[
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "description": "Get weather.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                        "required": ["location"],
+                        "additionalProperties": False,
+                    },
+                }
+            ],
+            tool_choice="auto",
+            stream=True,
+        )
+        sampling_params = SamplingParams(max_tokens=64)
+        metadata = RequestResponseMetadata(request_id="req")
+        _identity_increment._counter = 0  # type: ignore
+
+        events = []
+        async for event in serving._process_simple_streaming_events(
+            request=request,
+            sampling_params=sampling_params,
+            result_generator=result_generator(),
+            context=SimpleContext(),
+            model_name="test-model",
+            tokenizer=MagicMock(),
+            request_metadata=metadata,
+            created_time=0,
+            _increment_sequence_number_and_return=_identity_increment,
+        ):
+            events.append(event)
+        return events
+
+    @pytest.mark.skip_global_cleanup
+    @pytest.mark.asyncio
+    async def test_auto_multi_tool_streaming_opens_one_item_per_tool(self, monkeypatch):
+        monkeypatch.setattr(envs, "VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT", False)
+
+        delta_sequence = [
+            DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        id="call_vienna",
+                        type="function",
+                        index=0,
+                        function=DeltaFunctionCall(
+                            name="get_weather",
+                            arguments="",
+                        ),
+                    )
+                ]
+            ),
+            DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=0,
+                        function=DeltaFunctionCall(
+                            arguments='{"location":"Vienna"}',
+                        ),
+                    )
+                ]
+            ),
+            DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        id="call_berlin",
+                        type="function",
+                        index=1,
+                        function=DeltaFunctionCall(
+                            name="get_weather",
+                            arguments='{"location":"Berlin"}',
+                        ),
+                    )
+                ]
+            ),
+        ]
+        events = await self._collect_events(delta_sequence)
+
+        function_items = [
+            event
+            for event in events
+            if event.type == "response.output_item.added"
+            and getattr(event.item, "type", None) == "function_call"
+        ]
+        assert len(function_items) == 2
+        assert [event.item.name for event in function_items] == [
+            "get_weather",
+            "get_weather",
+        ]
+        assert [event.output_index for event in function_items] == [0, 1]
+
+        argument_deltas = [
+            event.delta
+            for event in events
+            if event.type == "response.function_call_arguments.delta"
+        ]
+        assert argument_deltas == [
+            '{"location":"Vienna"}',
+            '{"location":"Berlin"}',
+        ]
+
+        argument_done = [
+            event
+            for event in events
+            if event.type == "response.function_call_arguments.done"
+        ]
+        assert [event.arguments for event in argument_done] == [
+            '{"location":"Vienna"}',
+            '{"location":"Berlin"}',
+        ]
+        assert [event.output_index for event in argument_done] == [0, 1]
+
+        function_done = [
+            event
+            for event in events
+            if event.type == "response.output_item.done"
+            and getattr(event.item, "type", None) == "function_call"
+        ]
+        assert [event.item.arguments for event in function_done] == [
+            '{"location":"Vienna"}',
+            '{"location":"Berlin"}',
+        ]
+        assert [event.output_index for event in function_done] == [0, 1]
+
+    @pytest.mark.skip_global_cleanup
+    @pytest.mark.asyncio
+    async def test_auto_tool_choice_first_delta_tool_call_does_not_duplicate_item(
+        self, monkeypatch
+    ):
+        monkeypatch.setattr(envs, "VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT", False)
+
+        delta_sequence = [
+            DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        id="call_test",
+                        type="function",
+                        index=0,
+                        function=DeltaFunctionCall(
+                            name="get_weather",
+                            arguments="",
+                        ),
+                    )
+                ]
+            ),
+            DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=0,
+                        function=DeltaFunctionCall(
+                            arguments='{"location":"Berlin"}',
+                        ),
+                    )
+                ]
+            ),
+        ]
+        events = await self._collect_events(delta_sequence)
+
+        function_items = [
+            event
+            for event in events
+            if event.type == "response.output_item.added"
+            and getattr(event.item, "type", None) == "function_call"
+        ]
+        assert len(function_items) == 1
+        assert function_items[0].item.name == "get_weather"
+
+        argument_deltas = [
+            event.delta
+            for event in events
+            if event.type == "response.function_call_arguments.delta"
+        ]
+        assert "".join(argument_deltas) == '{"location":"Berlin"}'
diff --git a/tests/entrypoints/openai/test_dp_supervisor.py b/tests/entrypoints/openai/test_dp_supervisor.py
new file mode 100644
index 000000000000..0b678b226254
--- /dev/null
+++ b/tests/entrypoints/openai/test_dp_supervisor.py
@@ -0,0 +1,610 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Tests for DPSupervisor: unit tests and lifecycle integration tests.
+
+Lifecycle integration tests replace child vLLM servers with lightweight
+aiohttp "fake" servers controlled by the test, so the suite runs without GPUs.
+_start_children is monkeypatched to install FakeProcess objects (with
+controllable liveness/timing) alongside those fake HTTP servers.
+
+Port allocation (kept far from default vLLM ports to avoid conflicts):
+  Supervisor : 19256
+  Children   : 18000, 18001
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import contextlib
+import os
+import signal
+import time
+from types import SimpleNamespace
+
+import aiohttp
+import pytest
+import uvicorn
+from fastapi import FastAPI, Response
+
+import vllm.entrypoints.openai.dp_supervisor as dp_sup
+from vllm.entrypoints.openai.dp_supervisor import (
+    CHILD_EXIT_GRACE_S,
+    DPSupervisor,
+    _build_vllm_dp_server_args,
+    infer_multi_port_external_lb_start_rank,
+)
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+_SUPERVISOR_PORT = 19256
+_CHILD_PORT_BASE = 18000
+_N_CHILDREN = 2
+_PROBE_INTERVAL = 1.0
+_POLL_INTERVAL = 1.0
+
+
+# ---------------------------------------------------------------------------
+# Args factories
+# ---------------------------------------------------------------------------
+
+
+def _make_unit_args(**overrides) -> argparse.Namespace:
+    """Minimal args for unit tests (no real network activity)."""
+    base = {
+        "host": None,
+        "port": 8000,
+        "data_parallel_multi_port_external_lb": True,
+        "data_parallel_supervisor_port": 9256,
+        "dp_supervisor_probe_interval_s": 5.0,
+        "dp_supervisor_probe_timeout_s": 5.0,
+        "dp_supervisor_probe_failure_threshold": 3,
+        "data_parallel_size": 8,
+        "data_parallel_size_local": 4,
+        "data_parallel_start_rank": None,
+        "data_parallel_rank": None,
+        "data_parallel_external_lb": False,
+        "data_parallel_hybrid_lb": False,
+        "api_server_count": None,
+        "headless": False,
+        "grpc": False,
+        "uds": None,
+        "ssl_keyfile": None,
+        "ssl_certfile": None,
+        "ssl_ca_certs": None,
+        "node_rank": 1,
+        "tensor_parallel_size": 1,
+        "pipeline_parallel_size": 1,
+        "uvicorn_log_level": "info",
+        "shutdown_timeout": 5.0,
+    }
+    base.update(overrides)
+    return argparse.Namespace(**base)
+
+
+def _make_args(**overrides) -> argparse.Namespace:
+    """Args for lifecycle integration tests (real loopback servers)."""
+    base: dict = dict(
+        host="127.0.0.1",
+        port=_CHILD_PORT_BASE,
+        data_parallel_multi_port_external_lb=True,
+        data_parallel_supervisor_port=_SUPERVISOR_PORT,
+        dp_supervisor_probe_interval_s=_PROBE_INTERVAL,
+        dp_supervisor_probe_timeout_s=1.0,
+        dp_supervisor_probe_failure_threshold=3,
+        data_parallel_size=_N_CHILDREN,
+        data_parallel_size_local=_N_CHILDREN,
+        data_parallel_start_rank=0,
+        data_parallel_rank=None,
+        data_parallel_external_lb=False,
+        data_parallel_hybrid_lb=False,
+        api_server_count=None,
+        headless=False,
+        grpc=False,
+        uds=None,
+        ssl_keyfile=None,
+        ssl_certfile=None,
+        ssl_ca_certs=None,
+        node_rank=0,
+        tensor_parallel_size=1,
+        pipeline_parallel_size=1,
+        uvicorn_log_level="warning",
+        shutdown_timeout=0.0,
+    )
+    base.update(overrides)
+    return argparse.Namespace(**base)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests
+# ---------------------------------------------------------------------------
+
+
+def test_infer_multi_port_external_lb_start_rank_uses_node_rank():
+    args = _make_unit_args()
+    assert infer_multi_port_external_lb_start_rank(args) == 4
+
+
+def test_build_multi_port_external_lb_child_args_sets_external_rank_server():
+    args = _make_unit_args(data_parallel_start_rank=8, api_server_count=None)
+    child_args = _build_vllm_dp_server_args(args, local_rank=2)
+
+    assert child_args.port == 8002
+    assert child_args.data_parallel_rank == 10
+    assert child_args.data_parallel_size_local == 1
+    assert child_args.data_parallel_external_lb is True
+    assert child_args.data_parallel_hybrid_lb is False
+    assert child_args.data_parallel_multi_port_external_lb is False
+    assert child_args.api_server_count == 1
+
+
+def test_aggregates_health():
+    supervisor = DPSupervisor(_make_unit_args())
+    supervisor._is_ready = True
+    assert supervisor.is_ready is True
+
+
+def test_handles_shutdown_event():
+    supervisor = DPSupervisor(_make_unit_args())
+    supervisor._is_ready = True
+    supervisor._shutdown_event.set()
+    assert supervisor.is_ready is False
+
+
+@pytest.mark.asyncio
+async def test_handles_child_exit(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    supervisor = DPSupervisor(_make_unit_args())
+    supervisor._processes = [
+        SimpleNamespace(
+            name="APIServer_DPRank_4", exitcode=None, is_alive=lambda: True
+        ),
+        SimpleNamespace(name="APIServer_DPRank_5", exitcode=17, is_alive=lambda: False),
+    ]
+
+    async def fake_probe(*_args, **_kwargs) -> bool:
+        return True
+
+    monkeypatch.setattr(dp_sup, "_probe_endpoint", fake_probe)
+
+    await supervisor._monitor_children()
+    assert supervisor._is_ready is False
+
+
+@pytest.mark.asyncio
+async def test_handles_probe_failure(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    supervisor = DPSupervisor(_make_unit_args(dp_supervisor_probe_interval_s=0.0))
+    supervisor.child_ports = [8000]
+    probe_results = iter([True, False])
+
+    async def fake_probe(*_args, **_kwargs) -> bool:
+        return next(probe_results)
+
+    monkeypatch.setattr(dp_sup, "_probe_endpoint", fake_probe)
+
+    await supervisor._monitor_children()
+    assert supervisor._is_ready is False
+
+
+@pytest.mark.asyncio
+async def test_shutdown_if_supervisor_server_error_on_startup(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    class FakeLoop:
+        def add_signal_handler(self, *_args, **_kwargs):
+            pass
+
+        def remove_signal_handler(self, *_args, **_kwargs):
+            pass
+
+    class FakeServer:
+        def __init__(self, _config):
+            self.started = False
+            self.should_exit = False
+
+        async def serve(self):
+            raise ValueError("supervisor boom")
+
+    async def fake_shutdown_children(self):
+        return None
+
+    monkeypatch.setattr(dp_sup.asyncio, "get_running_loop", lambda: FakeLoop())
+    monkeypatch.setattr(dp_sup.uvicorn, "Server", FakeServer)
+    monkeypatch.setattr(DPSupervisor, "_shutdown_children", fake_shutdown_children)
+
+    supervisor = DPSupervisor(_make_unit_args())
+
+    with pytest.raises(ValueError, match="supervisor boom"):
+        await supervisor.run()
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle integration tests – MockVLLMServer
+# ---------------------------------------------------------------------------
+
+
+class MockVLLMServer:
+    """
+    Minimal FastAPI server that mimics one vLLM replica.
+    GET /health returns 200 when healthy, 503 otherwise.
+    Health state is toggled by the test via set_healthy().
+    """
+
+    def __init__(self, port: int, drain_seconds: float = 0.0) -> None:
+        self.port = port
+        self._healthy = False
+        self._drain_seconds = drain_seconds
+        self._server: uvicorn.Server | None = None
+        self._serve_task: asyncio.Task | None = None
+
+    async def start(self) -> None:
+        app = FastAPI()
+
+        @app.get("/health")
+        async def health() -> Response:
+            print(f"MockServer {self.port}: /health: {self._healthy}")
+            return Response(status_code=200 if self._healthy else 503)
+
+        @app.get("/set_healthy")
+        async def set_healthy() -> Response:
+            print(f"MockServer {self.port}: /set_healthy")
+            self._healthy = True
+            return Response(status_code=200)
+
+        @app.get("/set_unhealthy")
+        async def set_unhealthy() -> Response:
+            print(f"MockServer {self.port}: /set_unhealthy")
+            self._healthy = False
+            return Response(status_code=200)
+
+        @app.get("/kill")
+        async def kill() -> Response:
+            print(f"MockServer {self.port}: /kill")
+            os.kill(os.getpid(), signal.SIGKILL)
+
+        config = uvicorn.Config(
+            app,
+            host="127.0.0.1",
+            port=self.port,
+            log_level="warning",
+            lifespan="off",
+        )
+        self._server = uvicorn.Server(config)
+
+        # Configure request draining if needed.
+        # Uvicorn's capture_signals() installs signal.signal(SIGTERM, self.handle_exit),
+        # which sets should_exit=True immediately. Override handle_exit on the instance
+        # so capture_signals() picks up our version that drains first.
+        if self._drain_seconds > 0:
+            self._shutdown_event = asyncio.Event()
+            loop = asyncio.get_running_loop()
+
+            async def _drain_and_stop() -> None:
+                await self._shutdown_event.wait()
+                print(f"MockServer {self.port}: draining for {self._drain_seconds}s.")
+                await asyncio.sleep(self._drain_seconds)
+                print("Setting should_exit")
+                if self._server is not None:
+                    self._server.should_exit = True
+
+            self._drain_task = asyncio.create_task(_drain_and_stop())
+
+            def _custom_handle_exit(sig: int, frame: object) -> None:
+                print("Got SIGTERM, setting shutdown.")
+                if not self._shutdown_event.is_set():
+                    loop.call_soon_threadsafe(self._shutdown_event.set)
+
+            self._server.handle_exit = _custom_handle_exit
+
+        self._serve_task = asyncio.create_task(self._server.serve())
+        while not self._server.started:
+            await asyncio.sleep(0.01)
+        print(f"Mock DP Server on port {self.port} started")
+
+        await self._serve_task
+
+
+def launch_mock_vllm(child_args: argparse.Namespace, env_updates: dict[str, str]):
+    logger.info("Launching mock vLLM on port %s", child_args.port)
+    mock_vllm = MockVLLMServer(port=child_args.port)
+    asyncio.run(mock_vllm.start())
+
+
+def launch_mock_vllm_with_drain(
+    child_args: argparse.Namespace, env_updates: dict[str, str]
+):
+    logger.info("Launching mock vLLM with 15s drain on port %s", child_args.port)
+    mock_vllm = MockVLLMServer(port=child_args.port, drain_seconds=10.0)
+    asyncio.run(mock_vllm.start())
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle test helpers
+# ---------------------------------------------------------------------------
+
+
+async def _poll_supervisor_health(expected_status: int) -> bool:
+    """
+    Poll GET /health on the supervisor until expected_status is seen.
+    A connection error is treated as 503-equivalent when expected_status != 200.
+    """
+    url = f"http://127.0.0.1:{_SUPERVISOR_PORT}/health"
+    async with aiohttp.ClientSession() as session:
+        try:
+            async with session.get(url) as resp:
+                if resp.status != expected_status:
+                    print(f"expected: {expected_status=}, got: {resp.status=}")
+                    return False
+                return True
+        except aiohttp.ClientError:
+            if expected_status != -1:
+                print(f"expected: {expected_status=}, got: aiohttp.ClientError")
+                return False
+            return True
+
+
+async def _poll_until_api_server_running(port: int, retries: int = 10) -> None:
+    url = f"http://127.0.0.1:{port}/health"
+    async with aiohttp.ClientSession() as session:
+        for _ in range(retries):
+            try:
+                async with session.get(url) as resp:
+                    if resp.status != 200:
+                        return
+                await asyncio.sleep(1.0)
+            except aiohttp.ClientError:
+                print("Test detected not started yet, sleeping for 1s")
+                await asyncio.sleep(1.0)
+
+
+async def _set_healthy(port: int) -> None:
+    url = f"http://127.0.0.1:{port}/set_healthy"
+    async with aiohttp.ClientSession() as session, session.get(url) as resp:
+        assert resp.status == 200
+
+
+async def _set_unhealthy(port: int) -> None:
+    url = f"http://127.0.0.1:{port}/set_unhealthy"
+    async with aiohttp.ClientSession() as session, session.get(url) as resp:
+        assert resp.status == 200
+
+
+async def _kill_server(port: int) -> None:
+    url = f"http://127.0.0.1:{port}/kill"
+    try:
+        async with aiohttp.ClientSession() as session, session.get(url) as resp:
+            assert resp.status != 200
+    except Exception as e:
+        assert isinstance(e, aiohttp.ClientConnectorError)
+
+
+@contextlib.asynccontextmanager
+async def _run_supervisor(
+    args: argparse.Namespace,
+    monkeypatch: pytest.MonkeyPatch,
+    launch_fn=None,
+):
+    if launch_fn is None:
+        launch_fn = launch_mock_vllm
+    monkeypatch.setattr(dp_sup, "_run_vllm_dp_server", launch_fn)
+    supervisor = DPSupervisor(args)
+    task = asyncio.create_task(supervisor.run())
+    await asyncio.sleep(1.0)
+    try:
+        yield supervisor, task
+    finally:
+        task.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await task
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle integration tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_basic_lifecycle(monkeypatch):
+    """
+    A) Supervisor /health returns 503 while children are unhealthy.
+    B) /health returns 200 once every child reports healthy.
+    C) SIGTERM and shutdown
+    """
+    args = _make_args()
+
+    vllm_server_ports = [_CHILD_PORT_BASE + i for i in range(_N_CHILDREN)]
+
+    async with _run_supervisor(args, monkeypatch) as (supervisor, _task):
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+
+        for port in vllm_server_ports:
+            assert await _poll_supervisor_health(503)
+            assert not supervisor.is_ready
+            await _poll_until_api_server_running(port)
+
+        await _set_healthy(vllm_server_ports[0])
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+        print("/health is 503 --- expected!")
+
+        for port in vllm_server_ports:
+            await _set_healthy(port)
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(200)
+        assert supervisor.is_ready
+        print("/health is 200 --- expected!")
+
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(200)
+        assert supervisor.is_ready
+        print("/health is 200 --- expected!")
+
+        os.kill(os.getpid(), signal.SIGTERM)
+
+        await asyncio.wait_for(_task, timeout=5.0)
+        for p in supervisor._processes:
+            assert not p.is_alive()
+        print("everything was cleaned up!")
+
+
+@pytest.mark.asyncio
+async def test_failed_startup(monkeypatch):
+    """
+    A) One of the vLLM servers crashes during startup.
+    B) DPSupervisor detects this, and cleans up resources.
+    """
+    args = _make_args()
+
+    vllm_server_ports = [_CHILD_PORT_BASE + i for i in range(_N_CHILDREN)]
+
+    async with _run_supervisor(args, monkeypatch) as (supervisor, _task):
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+
+        for port in vllm_server_ports:
+            await _poll_until_api_server_running(port)
+
+        await _kill_server(port)
+
+        await asyncio.wait_for(_task, timeout=5.0)
+        for p in supervisor._processes:
+            assert not p.is_alive()
+
+
+@pytest.mark.asyncio
+async def test_becomes_unhealthy(monkeypatch):
+    """
+    A) Supervisor /health returns 503 while children are unhealthy.
+    B) /health returns 200 once every child reports healthy.
+    C) Child process becomes unhealthy.
+    D) Detected and shutdown.
+    """
+    args = _make_args()
+
+    vllm_server_ports = [_CHILD_PORT_BASE + i for i in range(_N_CHILDREN)]
+
+    async with _run_supervisor(args, monkeypatch) as (supervisor, _task):
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+
+        for port in vllm_server_ports:
+            assert await _poll_supervisor_health(503)
+            assert not supervisor.is_ready
+            await _poll_until_api_server_running(port)
+
+        await _set_healthy(vllm_server_ports[0])
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+        print("/health is 503 --- expected!")
+
+        for port in vllm_server_ports:
+            await _set_healthy(port)
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(200)
+        assert supervisor.is_ready
+        print("/health is 200 --- expected!")
+
+        await _set_unhealthy(port)
+
+        await asyncio.wait_for(_task, timeout=5.0)
+        for p in supervisor._processes:
+            assert not p.is_alive()
+        print("everything was cleaned up!")
+
+
+@pytest.mark.asyncio
+async def test_dp_server_fails(monkeypatch):
+    """
+    A) Supervisor /health returns 503 while children are unhealthy.
+    B) /health returns 200 once every child reports healthy.
+    C) Child process fails.
+    D) Detected and shutdown.
+    """
+    args = _make_args()
+
+    vllm_server_ports = [_CHILD_PORT_BASE + i for i in range(_N_CHILDREN)]
+
+    async with _run_supervisor(args, monkeypatch) as (supervisor, _task):
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+
+        for port in vllm_server_ports:
+            assert await _poll_supervisor_health(503)
+            assert not supervisor.is_ready
+            await _poll_until_api_server_running(port)
+
+        await _set_healthy(vllm_server_ports[0])
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(503)
+        assert not supervisor.is_ready
+        print("/health is 503 --- expected!")
+
+        for port in vllm_server_ports:
+            await _set_healthy(port)
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(200)
+        assert supervisor.is_ready
+        print("/health is 200 --- expected!")
+
+        dp_mock_server_process = supervisor._processes[0]
+        os.kill(dp_mock_server_process.pid, signal.SIGKILL)
+        await asyncio.sleep(1.0)
+        assert not dp_mock_server_process.is_alive()
+
+        await asyncio.wait_for(_task, timeout=5.0)
+        for p in supervisor._processes:
+            assert not p.is_alive()
+        print("everything was cleaned up!")
+
+
+@pytest.mark.asyncio
+async def test_shutdown_timeout(monkeypatch: pytest.MonkeyPatch):
+    """
+    Child mock servers delay shutdown by 10s on SIGTERM (simulating in-flight
+    request drain).  The supervisor is configured with shutdown_timeout=10,
+    so its total wait budget is 10 + CHILD_EXIT_GRACE_S seconds.  The
+    children exit naturally within that window, so no force-kill should occur
+    and the measured wall-clock time must be >= 10s.
+    """
+    _DRAIN_SECONDS = 10.0
+    _SHUTDOWN_TIMEOUT = 10.0
+
+    args = _make_args(shutdown_timeout=_SHUTDOWN_TIMEOUT)
+    vllm_server_ports = [_CHILD_PORT_BASE + i for i in range(_N_CHILDREN)]
+
+    async with _run_supervisor(
+        args, monkeypatch, launch_fn=launch_mock_vllm_with_drain
+    ) as (supervisor, _task):
+        for port in vllm_server_ports:
+            await _poll_until_api_server_running(port)
+
+        for port in vllm_server_ports:
+            await _set_healthy(port)
+        await asyncio.sleep(1.0)
+        assert await _poll_supervisor_health(200)
+        assert supervisor.is_ready
+
+        start_t = time.perf_counter()
+        os.kill(os.getpid(), signal.SIGTERM)
+
+        print(f"DRAINING FOR {_DRAIN_SECONDS}")
+        await asyncio.wait_for(_task, timeout=_DRAIN_SECONDS + CHILD_EXIT_GRACE_S + 5.0)
+        elapsed = time.perf_counter() - start_t
+
+        assert elapsed >= _DRAIN_SECONDS, (
+            f"Supervisor exited after only {elapsed:.1f}s; "
+            f"expected >= {_DRAIN_SECONDS}s for request draining"
+        )
+
+        for p in supervisor._processes:
+            assert not p.is_alive()
+        print(f"Supervisor waited {elapsed:.1f}s for children to drain — expected!")
diff --git a/tests/entrypoints/openai/test_fingerprint.py b/tests/entrypoints/openai/test_fingerprint.py
new file mode 100644
index 000000000000..b78ed38636c5
--- /dev/null
+++ b/tests/entrypoints/openai/test_fingerprint.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for ``system_fingerprint`` construction."""
+
+from types import SimpleNamespace
+
+import pytest
+
+from vllm.entrypoints.openai import fingerprint as fp
+
+
+def _cfg(tp=1, pp=1, dp=1, ep=False, digest="a3b21f94deadbeef"):
+    c = SimpleNamespace(
+        parallel_config=SimpleNamespace(
+            tensor_parallel_size=tp,
+            pipeline_parallel_size=pp,
+            data_parallel_size=dp,
+            enable_expert_parallel=ep,
+        )
+    )
+    c.compute_hash = lambda: digest  # type: ignore[attr-defined]
+    return c
+
+
+@pytest.fixture(autouse=True)
+def _reset():
+    fp.set_default_fingerprint_mode("full")
+    yield
+    fp.set_default_fingerprint_mode("full")
+
+
+def test_four_modes_produce_expected_shapes():
+    from vllm import __version__ as v
+
+    cfg = _cfg(tp=8, ep=True)
+
+    assert fp.build_system_fingerprint(cfg, "full") == (f"vllm-{v}-tp8-ep-a3b21f94")
+    assert fp.build_system_fingerprint(cfg, "hash") == f"vllm-{v}-a3b21f94"
+    assert fp.build_system_fingerprint(cfg, "custom", "my-fp") == "my-fp"
+    assert fp.build_system_fingerprint(cfg, "none") is None
+
+
+def test_full_mode_emits_only_non_trivial_parallelism():
+    from vllm import __version__ as v
+
+    # Single-GPU: nothing between version and hash.
+    assert fp.build_system_fingerprint(_cfg(), "full") == f"vllm-{v}-a3b21f94"
+    # All parallelism axes.
+    assert (
+        fp.build_system_fingerprint(_cfg(tp=8, pp=2, dp=4, ep=True), "full")
+        == f"vllm-{v}-tp8-pp2-dp4-ep-a3b21f94"
+    )
+
+
+def test_get_respects_set_default():
+    cfg = _cfg(tp=8)
+    full = fp.get_system_fingerprint(cfg)
+    assert full == fp.get_system_fingerprint(cfg)
+
+    fp.set_default_fingerprint_mode("hash")
+    hashed = fp.get_system_fingerprint(cfg)
+    assert hashed != full
+    assert "tp8" not in hashed
+
+    fp.set_default_fingerprint_mode("custom", "deploy-42")
+    assert fp.get_system_fingerprint(cfg) == "deploy-42"
+
+    fp.set_default_fingerprint_mode("none")
+    assert fp.get_system_fingerprint(cfg) is None
+
+
+def test_compute_hash_failure_does_not_raise():
+    cfg = _cfg()
+    cfg.compute_hash = lambda: (_ for _ in ()).throw(RuntimeError("boom"))
+    assert fp.build_system_fingerprint(cfg, "full").endswith("-nohash")
+    assert fp.build_system_fingerprint(cfg, "hash").endswith("-nohash")
diff --git a/tests/entrypoints/openai/test_mm_serde.py b/tests/entrypoints/openai/test_mm_serde.py
new file mode 100644
index 000000000000..c568d822e1c0
--- /dev/null
+++ b/tests/entrypoints/openai/test_mm_serde.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Roundtrip tests for multimodal serde used by the disagg generate endpoint."""
+
+import torch
+
+from vllm.entrypoints.serve.disagg.mm_serde import (
+    decode_mm_kwargs_item,
+    encode_mm_kwargs_item,
+)
+from vllm.entrypoints.serve.disagg.protocol import (
+    MultiModalFeatures,
+    PlaceholderRangeInfo,
+)
+from vllm.multimodal.inputs import (
+    MultiModalBatchedField,
+    MultiModalFieldElem,
+    MultiModalFlatField,
+    MultiModalKwargsItem,
+    MultiModalSharedField,
+)
+
+
+def test_mm_kwargs_item_roundtrip():
+    """Full roundtrip test with all three field types and multiple dtypes."""
+    e1 = MultiModalFieldElem(
+        data=torch.zeros(1000, dtype=torch.bfloat16),
+        field=MultiModalBatchedField(),
+    )
+    e2 = MultiModalFieldElem(
+        data=torch.ones(100, dtype=torch.int32),
+        field=MultiModalSharedField(batch_size=4),
+    )
+    e3 = MultiModalFieldElem(
+        data=torch.randn(20, dtype=torch.float32),
+        field=MultiModalFlatField(slices=[slice(0, 10), slice(10, 20)], dim=0),
+    )
+
+    item = MultiModalKwargsItem({"pixel_values": e1, "grid_thw": e2, "embeds": e3})
+    encoded = encode_mm_kwargs_item(item)
+
+    # Encoded result is a base64 string
+    assert isinstance(encoded, str)
+
+    decoded = decode_mm_kwargs_item(encoded)
+
+    assert set(decoded.keys()) == {"pixel_values", "grid_thw", "embeds"}
+    assert torch.equal(item["pixel_values"].data, decoded["pixel_values"].data)
+    assert torch.equal(item["grid_thw"].data, decoded["grid_thw"].data)
+    assert torch.equal(item["embeds"].data, decoded["embeds"].data)
+    assert isinstance(decoded["pixel_values"].field, MultiModalBatchedField)
+    assert isinstance(decoded["grid_thw"].field, MultiModalSharedField)
+    assert isinstance(decoded["embeds"].field, MultiModalFlatField)
+
+
+def test_mm_kwargs_item_none_data():
+    """Roundtrip with None data field."""
+    elem = MultiModalFieldElem(
+        data=None,
+        field=MultiModalSharedField(batch_size=2),
+    )
+    item = MultiModalKwargsItem({"empty": elem})
+    encoded = encode_mm_kwargs_item(item)
+    decoded = decode_mm_kwargs_item(encoded)
+
+    assert decoded["empty"].data is None
+    assert isinstance(decoded["empty"].field, MultiModalSharedField)
+
+
+def test_mm_kwargs_item_nested_tensors():
+    """Roundtrip with nested tensor data."""
+    nested = [torch.randn(3, 4), torch.randn(5, 4)]
+    elem = MultiModalFieldElem(
+        data=nested,
+        field=MultiModalBatchedField(),
+    )
+    item = MultiModalKwargsItem({"nested": elem})
+    encoded = encode_mm_kwargs_item(item)
+    decoded = decode_mm_kwargs_item(encoded)
+
+    decoded_data = decoded["nested"].data
+    assert len(decoded_data) == 2
+    assert torch.equal(nested[0], decoded_data[0])
+    assert torch.equal(nested[1], decoded_data[1])
+
+
+def test_mm_features_with_kwargs_data():
+    """Test that MultiModalFeatures can carry serialized tensor data."""
+    elem = MultiModalFieldElem(
+        data=torch.randn(5, 3, dtype=torch.float32),
+        field=MultiModalBatchedField(),
+    )
+    item = MultiModalKwargsItem({"pixel_values": elem})
+    encoded = encode_mm_kwargs_item(item)
+
+    features = MultiModalFeatures(
+        mm_hashes={"image": ["abc123"]},
+        mm_placeholders={"image": [PlaceholderRangeInfo(offset=0, length=10)]},
+        kwargs_data={"image": [encoded]},
+    )
+
+    # JSON roundtrip
+    json_str = features.model_dump_json()
+    features2 = MultiModalFeatures.model_validate_json(json_str)
+
+    assert features2.mm_hashes == {"image": ["abc123"]}
+    assert features2.kwargs_data is not None
+    assert len(features2.kwargs_data["image"]) == 1
+
+    decoded = decode_mm_kwargs_item(features2.kwargs_data["image"][0])
+    assert torch.equal(elem.data, decoded["pixel_values"].data)
diff --git a/tests/entrypoints/openai/test_openai_schema.py b/tests/entrypoints/openai/test_openai_schema.py
index 083290ed5b3a..56e4e9baf2e8 100644
--- a/tests/entrypoints/openai/test_openai_schema.py
+++ b/tests/entrypoints/openai/test_openai_schema.py
@@ -1,18 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
-from http import HTTPStatus
 from typing import Final
 
 import pytest
 import schemathesis
-from httpx import URL
-from hypothesis import settings
+from hypothesis import HealthCheck, settings
 from schemathesis import GenerationConfig
-from schemathesis.checks import not_a_server_error
-from schemathesis.internal.checks import CheckContext
 from schemathesis.models import Case
-from schemathesis.transports.responses import GenericResponse
 
 from vllm.platforms import current_platform
 
@@ -65,20 +60,10 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
 
     def no_invalid_types(case: schemathesis.models.Case):
         """
-        This filter skips test cases with invalid data that schemathesis
-        incorrectly generates due to permissive schema configurations.
-        
-        1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in 
-           message content, which isn't implemented.
-        
-        2. Skips tool_calls with `"type": "custom"` which schemathesis 
-           incorrectly generates instead of the valid `"type": "function"`.
-
-        Example test cases that are skipped:
-        curl -X POST -H 'Content-Type: application/json' \
-            -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
-            http://localhost:8000/tokenize
+        Skips tool_calls with `"type": "custom"` which schemathesis incorrectly
+        generates instead of the valid `"type": "function"`.
 
+        Example test case that is skipped:
         curl -X POST -H 'Content-Type: application/json' \
             -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \
             http://localhost:8000/v1/chat/completions
@@ -93,20 +78,6 @@ def no_invalid_types(case: schemathesis.models.Case):
                     if not isinstance(message, dict):
                         continue
 
-                    # Check for invalid file type in tokenize endpoint
-                    if op.method.lower() == "post" and op.path == "/tokenize":
-                        content = message.get("content", [])
-                        if (
-                            isinstance(content, list)
-                            and len(content) > 0
-                            and any(
-                                isinstance(item, dict) and item.get("type") == "file"
-                                for item in content
-                            )
-                        ):
-                            return False
-
-                    # Check for invalid tool_calls with non-function types
                     tool_calls = message.get("tool_calls", [])
                     if isinstance(tool_calls, list):
                         for tool_call in tool_calls:
@@ -136,24 +107,21 @@ def no_invalid_types(case: schemathesis.models.Case):
     return strategy.filter(no_invalid_types)
 
 
-def customized_not_a_server_error(
-    ctx: CheckContext, response: GenericResponse, case: Case
-) -> bool | None:
-    try:
-        return not_a_server_error(ctx, response, case)
-    except Exception:
-        if (
-            URL(response.request.url).path
-            in ["/v1/chat/completions/render", "/v1/chat/completions"]
-            and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value
-        ):
-            return True
-        raise
-
-
 @schema.parametrize()
 @schema.override(headers={"Content-Type": "application/json"})
-@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50)
+@settings(
+    deadline=LONG_TIMEOUT_SECONDS * 1000,
+    max_examples=50,
+    # Under CI's derandomized hypothesis seed, the schemathesis strategy
+    # for /v1/chat/completions/batch's nested-message body, combined with
+    # the no_invalid_types filter (notably the grammar=="" rule), exceeds
+    # the default filtered-vs-good ratio. The filter is intentional, so
+    # suppress the health check rather than drop the filter — dropping it
+    # exposes pre-existing server bugs out of scope here.
+    # The same nested schema can also trip Hypothesis' entropy budget while
+    # generating large-but-valid request bodies before vLLM is called.
+    suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.data_too_large],
+)
 def test_openapi_stateless(case: Case):
     key = (
         case.operation.method.upper(),
@@ -167,7 +135,9 @@ def test_openapi_stateless(case: Case):
     # (weight_transfer_config) and are meant to be stateful.
     if case.operation.path in (
         "/init_weight_transfer_engine",
+        "/start_weight_update",
         "/update_weights",
+        "/finish_weight_update",
     ):
         return
 
@@ -177,12 +147,8 @@ def test_openapi_stateless(case: Case):
         ("POST", "/v1/chat/completions/batch"): LONG_TIMEOUT_SECONDS,
         ("POST", "/v1/completions"): LONG_TIMEOUT_SECONDS,
         ("POST", "/v1/messages"): LONG_TIMEOUT_SECONDS,
+        ("POST", "/inference/v1/generate"): LONG_TIMEOUT_SECONDS,
     }.get(key, DEFAULT_TIMEOUT_SECONDS)
 
     # No need to verify SSL certificate for localhost
-    case.call_and_validate(
-        verify=False,
-        timeout=timeout,
-        additional_checks=(customized_not_a_server_error,),
-        excluded_checks=(not_a_server_error,),
-    )
+    case.call_and_validate(verify=False, timeout=timeout)
diff --git a/tests/entrypoints/openai/test_return_routed_experts.py b/tests/entrypoints/openai/test_return_routed_experts.py
new file mode 100644
index 000000000000..b69c4c49158e
--- /dev/null
+++ b/tests/entrypoints/openai/test_return_routed_experts.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import io
+
+import numpy as np
+import pybase64 as base64
+import pytest
+
+from ...utils import RemoteOpenAIServer
+
+MODEL_NAME = "TitanML/tiny-mixtral"
+
+# tiny-mixtral config: 8 local experts, top-2 routing, 2 hidden layers.
+# The published config has sliding_window=4096, which produces
+# SlidingWindowSpec kv-cache groups; RoutedExpertsManager requires a
+# FullAttentionSpec group, so we override sliding_window=null below.
+NUM_LOCAL_EXPERTS = 8
+NUM_EXPERTS_PER_TOK = 2
+NUM_HIDDEN_LAYERS = 2
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--max-model-len",
+        "256",
+        "--max-num-seqs",
+        "32",
+        "--enforce-eager",
+        "--enable-return-routed-experts",
+        "--hf-overrides",
+        '{"sliding_window": null}',
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.mark.asyncio
+async def test_routed_experts(server):
+    """Test that /v1/completions returns routed_experts when enabled."""
+    async with server.get_async_client() as client:
+        result = await client.completions.create(
+            model=MODEL_NAME,
+            prompt="Hello, world",
+            max_tokens=10,
+            temperature=0,
+            extra_body={"return_token_ids": True},
+        )
+
+        choice = result.model_dump()["choices"][0]
+
+        assert choice["routed_experts"] is not None
+        assert choice["token_ids"] is not None
+
+        # routed_experts is base64-encoded .npy bytes; decode to ndarray.
+        routed_experts = np.load(io.BytesIO(base64.b64decode(choice["routed_experts"])))
+        assert routed_experts.ndim == 3
+        num_tokens, num_layers, topk = routed_experts.shape
+        assert num_tokens > 0
+        assert num_layers == NUM_HIDDEN_LAYERS
+        assert topk == NUM_EXPERTS_PER_TOK
+        assert (routed_experts >= 0).all()
+        assert (routed_experts < NUM_LOCAL_EXPERTS).all()
diff --git a/tests/entrypoints/openai/test_run_batch.py b/tests/entrypoints/openai/test_run_batch.py
index bf670105bbc4..cd1daf0bbbc2 100644
--- a/tests/entrypoints/openai/test_run_batch.py
+++ b/tests/entrypoints/openai/test_run_batch.py
@@ -4,11 +4,15 @@
 import json
 import subprocess
 import tempfile
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from vllm.assets.audio import AudioAsset
-from vllm.entrypoints.openai.run_batch import BatchRequestOutput
+from vllm.entrypoints.openai.run_batch import (
+    BatchRequestOutput,
+    download_bytes_from_url,
+)
 
 CHAT_MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
 EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-small"
@@ -746,3 +750,131 @@ def test_tool_calling():
                 assert "arguments" in tool_call["function"]
                 # Verify the tool name matches our tool definition
                 assert tool_call["function"]["name"] == "get_current_weather"
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for download_bytes_from_url SSRF protection
+# ---------------------------------------------------------------------------
+
+
+def _make_aiohttp_mocks(response_data: bytes = b"fake-data", status: int = 200):
+    """Create mock objects that simulate aiohttp.ClientSession context managers."""
+    mock_resp = MagicMock()
+    mock_resp.status = status
+    mock_resp.read = AsyncMock(return_value=response_data)
+    mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+    mock_resp.__aexit__ = AsyncMock(return_value=False)
+
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=mock_resp)
+    mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_session.__aexit__ = AsyncMock(return_value=False)
+    return mock_session
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_data_url_bypasses_domain_check():
+    """data: URLs must work regardless of the domain allowlist."""
+    data_url = f"data:audio/wav;base64,{MINIMAL_WAV_BASE64}"
+    result = await download_bytes_from_url(
+        data_url, allowed_media_domains=["example.com"]
+    )
+    assert isinstance(result, bytes)
+    assert len(result) > 0
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_rejects_disallowed_domain():
+    """HTTP URLs whose hostname is not in the allowlist must be rejected."""
+    url = "https://evil.internal/secret"
+    with pytest.raises(ValueError, match="allowed domains"):
+        await download_bytes_from_url(url, allowed_media_domains=["example.com"])
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_rejects_cloud_metadata_ip():
+    """Cloud metadata endpoints must be blocked when an allowlist is set."""
+    url = "http://169.254.169.254/latest/meta-data/"
+    with pytest.raises(ValueError, match="allowed domains"):
+        await download_bytes_from_url(url, allowed_media_domains=["example.com"])
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_rejects_internal_ip():
+    """Private-range IPs must be blocked when an allowlist is set."""
+    for internal_url in [
+        "http://10.0.0.1/secret",
+        "http://192.168.1.1/admin",
+        "http://127.0.0.1:8080/internal",
+    ]:
+        with pytest.raises(ValueError, match="allowed domains"):
+            await download_bytes_from_url(
+                internal_url, allowed_media_domains=["example.com"]
+            )
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_allows_permitted_domain():
+    """HTTP URLs whose hostname IS in the allowlist must be fetched."""
+    url = "https://example.com/audio.wav"
+    expected = b"audio-bytes"
+    mock_session = _make_aiohttp_mocks(expected)
+
+    with patch(
+        "vllm.entrypoints.openai.run_batch.aiohttp.ClientSession",
+        return_value=mock_session,
+    ):
+        result = await download_bytes_from_url(
+            url, allowed_media_domains=["example.com"]
+        )
+    assert result == expected
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_no_allowlist_permits_any_domain():
+    """Without an allowlist all HTTP URLs must be attempted (backward compat)."""
+    url = "https://any-domain.example.org/file.wav"
+    expected = b"some-data"
+    mock_session = _make_aiohttp_mocks(expected)
+
+    with patch(
+        "vllm.entrypoints.openai.run_batch.aiohttp.ClientSession",
+        return_value=mock_session,
+    ):
+        result = await download_bytes_from_url(url, allowed_media_domains=None)
+    assert result == expected
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_empty_allowlist_denies_all():
+    """An empty allowlist must deny all HTTP URLs (least privilege)."""
+    url = "https://any-domain.example.org/file.wav"
+    with pytest.raises(ValueError, match="allowed domains"):
+        await download_bytes_from_url(url, allowed_media_domains=[])
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_unsupported_scheme():
+    """Unsupported URL schemes must be rejected regardless of allowlist."""
+    with pytest.raises(ValueError, match="Unsupported URL scheme"):
+        await download_bytes_from_url("ftp://example.com/file.wav")
+
+    with pytest.raises(ValueError, match="Unsupported URL scheme"):
+        await download_bytes_from_url(
+            "ftp://example.com/file.wav",
+            allowed_media_domains=["example.com"],
+        )
+
+
+@pytest.mark.asyncio
+async def test_download_bytes_backslash_bypass():
+    """Backslash-@ URL confusion must not bypass the allowlist.
+
+    urllib3.parse_url() and aiohttp/yarl disagree on backslash-before-@.
+    The fix normalizes through urllib3 before handing to aiohttp.
+    """
+    bypass_url = "http://allowed.example.com\\@evil.internal/secret"
+    with pytest.raises(ValueError, match="allowed domains"):
+        await download_bytes_from_url(
+            bypass_url, allowed_media_domains=["evil.internal"]
+        )
diff --git a/tests/entrypoints/openai/test_tool_calls_serialization.py b/tests/entrypoints/openai/test_tool_calls_serialization.py
new file mode 100644
index 000000000000..cedc2575b80e
--- /dev/null
+++ b/tests/entrypoints/openai/test_tool_calls_serialization.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for tool_calls Iterable → list materialisation.
+
+Regression tests for https://github.com/vllm-project/vllm/issues/34792.
+
+Setting VLLM_LOGGING_LEVEL=debug caused tool calling to break for Mistral
+models because:
+  1. The OpenAI Python SDK types tool_calls as Iterable[...] in
+     ChatCompletionAssistantMessageParam.
+  2. Pydantic v2, when validating from Python objects (not from raw JSON),
+     wraps Iterable fields in a one-shot lazy iterator.
+  3. Debug logging called model_dump_json() which consumed that iterator.
+  4. The Mistral tokenizer then saw empty tool_calls and raised
+     "ValueError: Unexpected tool call id ...".
+"""
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+
+
+def _make_tool_call(tc_id: str, name: str, args: str) -> dict:
+    return {
+        "id": tc_id,
+        "type": "function",
+        "function": {"name": name, "arguments": args},
+    }
+
+
+def _make_request(messages: list) -> ChatCompletionRequest:
+    return ChatCompletionRequest(
+        model="test-model",
+        messages=messages,
+    )
+
+
+def test_tool_calls_list_preserved_after_model_dump():
+    """tool_calls in assistant messages must be readable after model_dump_json.
+
+    When the request is built from Python dicts (as in the Anthropic → OpenAI
+    conversion path), Pydantic v2 previously wrapped the Iterable tool_calls
+    in a one-shot iterator.  model_dump_json() consumed it, leaving subsequent
+    readers (e.g. the Mistral tokenizer) with an empty sequence.
+    """
+    tool_call = _make_tool_call("call_abc123", "get_weather", '{"city": "Paris"}')
+    messages = [
+        {"role": "user", "content": "What is the weather in Paris?"},
+        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
+        {
+            "role": "tool",
+            "tool_call_id": "call_abc123",
+            "content": '{"temperature": 20}',
+        },
+    ]
+
+    req = _make_request(messages)
+
+    # Simulate debug logging: serialize the model (this was the trigger)
+    _ = req.model_dump_json()
+
+    # The assistant message must still have accessible tool_calls afterwards
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+    tool_calls = assistant_msg.get("tool_calls")
+    assert tool_calls is not None, "tool_calls must not be None after model_dump_json"
+    assert isinstance(tool_calls, list), "tool_calls must be a list"
+    assert len(tool_calls) > 0, "tool_calls must not be empty after model_dump_json"
+
+
+def test_tool_calls_from_generator_are_materialised():
+    """tool_calls passed as a generator must be converted to list on validation."""
+    tool_call = _make_tool_call("call_gen1", "search", '{"query": "vllm"}')
+
+    def tool_calls_gen():
+        yield tool_call
+
+    messages = [
+        {"role": "user", "content": "Search for vllm"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": tool_calls_gen(),  # one-shot generator
+        },
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+
+    # Iterate twice — must not raise or return empty on second pass
+    tool_calls_first = list(assistant_msg.get("tool_calls", []))
+    tool_calls_second = list(assistant_msg.get("tool_calls", []))
+
+    assert len(tool_calls_first) == 1, "First read must return the tool call"
+    assert len(tool_calls_second) == 1, "Second read must also return the tool call"
+
+
+def test_tool_calls_list_passthrough():
+    """tool_calls already provided as a list must remain a list."""
+    tool_call = _make_tool_call("call_list1", "calculate", '{"expr": "2+2"}')
+    messages = [
+        {"role": "user", "content": "Calculate 2+2"},
+        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+    assert isinstance(assistant_msg.get("tool_calls"), list)
+
+
+def test_messages_without_tool_calls_unaffected():
+    """Messages without tool_calls must be handled correctly."""
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+        {"role": "assistant", "content": "Hi there!"},
+    ]
+
+    req = _make_request(messages)
+    # None of the messages should have tool_calls injected
+    for msg in req.messages:
+        assert isinstance(msg, dict)
+        assert msg.get("tool_calls") is None or msg.get("tool_calls") == []
+
+
+@pytest.mark.parametrize("num_tool_calls", [1, 3])
+def test_multiple_tool_calls_materialised(num_tool_calls: int):
+    """Multiple tool calls in a single message are all preserved."""
+    tool_calls = [
+        _make_tool_call(f"call_{i}", f"func_{i}", f'{{"arg": {i}}}')
+        for i in range(num_tool_calls)
+    ]
+    messages = [
+        {"role": "user", "content": "Do things"},
+        {"role": "assistant", "content": None, "tool_calls": iter(tool_calls)},
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+
+    result_tool_calls = assistant_msg.get("tool_calls")
+    assert isinstance(result_tool_calls, list)
+    assert len(result_tool_calls) == num_tool_calls
+
+    # Verify after model_dump_json too
+    _ = req.model_dump_json()
+    assert len(assistant_msg.get("tool_calls", [])) == num_tool_calls
diff --git a/tests/entrypoints/openai/test_tool_choice_content_none.py b/tests/entrypoints/openai/test_tool_choice_content_none.py
new file mode 100644
index 000000000000..c1da5918697c
--- /dev/null
+++ b/tests/entrypoints/openai/test_tool_choice_content_none.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.parser.abstract_parser import DelegatingParser
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+class _DummyDelegatingParser(DelegatingParser):
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output: str, request):
+        return None, model_output
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: list[int],
+        current_token_ids: list[int],
+        delta_token_ids: list[int],
+    ):
+        return None
+
+    def extract_tool_calls(self, model_output: str, request):
+        return None
+
+
+def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "test"}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            ],
+            "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+        }
+    )
+
+    tool_calls, content = OpenAIServing._parse_tool_calls_from_content(
+        request=request,
+        tokenizer=None,
+        enable_auto_tools=True,
+        tool_parser_cls=None,
+        content=None,
+    )
+
+    assert content is None
+    assert tool_calls is not None
+    assert tool_calls == []
+
+
+def test_responses_parser_allows_named_tool_choice_with_none_content():
+    request = ResponsesRequest.model_validate(
+        {
+            "model": "test-model",
+            "input": "test",
+            "tools": [
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "parameters": {"type": "object", "properties": {}},
+                }
+            ],
+            "tool_choice": {"type": "function", "name": "get_weather"},
+        }
+    )
+    parser = _DummyDelegatingParser(tokenizer=None)
+
+    tool_calls, content = parser._parse_tool_calls(
+        request=request,
+        content=None,
+        enable_auto_tools=False,
+    )
+
+    assert content is None
+    assert tool_calls == []
diff --git a/tests/entrypoints/openai/utils.py b/tests/entrypoints/openai/utils.py
index da65b8ad50bd..a791cab2a0cf 100644
--- a/tests/entrypoints/openai/utils.py
+++ b/tests/entrypoints/openai/utils.py
@@ -10,9 +10,7 @@
     ChatCompletionStreamResponse,
     ChatMessage,
 )
-from vllm.entrypoints.openai.engine.protocol import (
-    UsageInfo,
-)
+from vllm.entrypoints.openai.engine.protocol import UsageInfo
 
 
 async def accumulate_streaming_response(
diff --git a/tests/entrypoints/pooling/classify/test_offline.py b/tests/entrypoints/pooling/classify/test_offline.py
index 76a5303e5b3a..2f6a9f1db4a9 100644
--- a/tests/entrypoints/pooling/classify/test_offline.py
+++ b/tests/entrypoints/pooling/classify/test_offline.py
@@ -1,13 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import logging
 import weakref
 
 import pytest
 import torch
 
 from tests.models.utils import softmax
-from vllm import LLM, ClassificationRequestOutput, PoolingParams, PoolingRequestOutput
+from vllm import LLM, ClassificationRequestOutput, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.tasks import PoolingTask
 
@@ -66,18 +65,6 @@ def test_list_prompts(llm: LLM):
         assert len(outputs[i].outputs.probs) == num_labels
 
 
-@pytest.mark.skip_global_cleanup
-def test_token_classify(llm: LLM, caplog_vllm):
-    with caplog_vllm.at_level(level=logging.WARNING, logger="vllm"):
-        outputs = llm.encode(prompt, pooling_task="token_classify", use_tqdm=False)
-        assert "deprecated" in caplog_vllm.text
-
-    assert len(outputs) == 1
-    assert isinstance(outputs[0], PoolingRequestOutput)
-    assert outputs[0].prompt_token_ids == prompt_token_ids
-    assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)
-
-
 @pytest.mark.skip_global_cleanup
 def test_pooling_params(llm: LLM):
     def get_outputs(use_activation):
@@ -105,13 +92,18 @@ def get_outputs(use_activation):
 
 @pytest.mark.skip_global_cleanup
 def test_score_api(llm: LLM):
-    err_msg = "Score API is only enabled for num_labels == 1."
+    err_msg = "Scoring API is only enabled for num_labels == 1."
     with pytest.raises(ValueError, match=err_msg):
         llm.score("ping", "pong", use_tqdm=False)
 
 
-@pytest.mark.parametrize("task", ["embed", "token_embed"])
+@pytest.mark.parametrize("task", ["embed", "token_embed", "token_classify", "plugin"])
 def test_unsupported_tasks(llm: LLM, task: PoolingTask):
-    err_msg = "Embedding API is not supported by this model.+"
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "token_classify":
+        err_msg = "Try switching the model's pooling_task via.+"
+    else:
+        err_msg = "Embedding API is not supported by this model.+"
     with pytest.raises(ValueError, match=err_msg):
         llm.encode(prompt, pooling_task=task, use_tqdm=False)
diff --git a/tests/entrypoints/pooling/classify/test_online.py b/tests/entrypoints/pooling/classify/test_online.py
index e23918fb8db8..c37da642b494 100644
--- a/tests/entrypoints/pooling/classify/test_online.py
+++ b/tests/entrypoints/pooling/classify/test_online.py
@@ -390,7 +390,7 @@ async def get_outputs(use_activation):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_score(server: RemoteOpenAIServer, model_name: str):
-    # score api is only enabled for num_labels == 1.
+    # Scoring API is only enabled for num_labels == 1.
     response = requests.post(
         server.url_for("score"),
         json={
@@ -405,7 +405,7 @@ async def test_score(server: RemoteOpenAIServer, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_rerank(server: RemoteOpenAIServer, model_name: str):
-    # rerank api is only enabled for num_labels == 1.
+    # Scoring API is only enabled for num_labels == 1.
     response = requests.post(
         server.url_for("rerank"),
         json={
@@ -436,26 +436,7 @@ async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
-    task = "token_classify"
-    response = requests.post(
-        server.url_for("pooling"),
-        json={
-            "model": model_name,
-            "input": input_text,
-            "encoding_format": "float",
-            "task": task,
-        },
-    )
-    poolings = PoolingResponse.model_validate(response.json())
-    assert len(poolings.data) == 1
-    assert len(poolings.data[0].data) == 8
-    assert len(poolings.data[0].data[0]) == 2
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
+@pytest.mark.parametrize("task", ["embed", "token_embed", "token_classify", "plugin"])
 async def test_pooling_not_supported(
     server: RemoteOpenAIServer, model_name: str, task: str
 ):
@@ -469,4 +450,11 @@ async def test_pooling_not_supported(
         },
     )
     assert response.json()["error"]["type"] == "BadRequestError"
-    assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")
+
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "token_classify":
+        err_msg = "Try switching the model's pooling_task via"
+    else:
+        err_msg = f"Unsupported task: {task!r}"
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/pooling/embed/test_io_processor.py b/tests/entrypoints/pooling/embed/test_io_processor.py
index f25911b661f5..341ccbd5f0c5 100644
--- a/tests/entrypoints/pooling/embed/test_io_processor.py
+++ b/tests/entrypoints/pooling/embed/test_io_processor.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from vllm import PoolingParams
 from vllm.entrypoints.pooling.embed.io_processor import EmbedIOProcessor
 from vllm.entrypoints.pooling.embed.protocol import (
     CohereEmbedContent,
@@ -218,6 +219,7 @@ class TestPreProcessCohereOnline:
     def _make_context(**request_kwargs) -> PoolingServeContext[CohereEmbedRequest]:
         return PoolingServeContext(
             request=CohereEmbedRequest(model="test", **request_kwargs),
+            pooling_params=PoolingParams(),
             model_name="test",
             request_id="embd-test",
         )
@@ -233,13 +235,13 @@ def test_text_only_without_task_prefix_uses_completion_path(self):
         ctx = self._make_context(texts=["hello"])
         calls: list[tuple[str, object]] = []
 
-        def preprocess_completion(request, prompt_input, prompt_embeds):
+        def preprocess_cmpl_online(request, prompt_input, prompt_embeds):
             calls.append(("completion", prompt_input))
             return ["completion"]
 
         handler._get_task_instruction_prefix = lambda _input_type: None
         handler._has_chat_template = lambda: False
-        handler._preprocess_completion_online = preprocess_completion
+        handler._preprocess_cmpl_online = preprocess_cmpl_online
         handler._batch_render_chat = lambda *_args, **_kwargs: (
             pytest.fail("text-only request should not require chat rendering")
         )
@@ -254,7 +256,7 @@ def test_text_only_falls_back_to_prefixed_completion_without_template(self):
         ctx = self._make_context(texts=["hello"], input_type="query")
         calls: list[tuple[str, object]] = []
 
-        def preprocess_completion(request, prompt_input, prompt_embeds):
+        def preprocess_cmpl(request, prompt_input, prompt_embeds):
             calls.append(("completion", prompt_input))
             return ["fallback"]
 
@@ -263,7 +265,7 @@ def preprocess_completion(request, prompt_input, prompt_embeds):
         handler._batch_render_chat = lambda *_args, **_kwargs: (
             pytest.fail("chat rendering should be skipped without a template")
         )
-        handler._preprocess_completion_online = preprocess_completion
+        handler._preprocess_cmpl_online = preprocess_cmpl
 
         handler._pre_process_cohere_online(ctx)
 
@@ -297,7 +299,7 @@ def batch_render_chat(
         handler._get_task_instruction_prefix = lambda _input_type: "query: "
         handler._has_chat_template = lambda: True
         handler._batch_render_chat = batch_render_chat
-        handler._preprocess_completion_online = lambda *_args, **_kwargs: (
+        handler._preprocess_cmpl_online = lambda *_args, **_kwargs: (
             pytest.fail("completion path should be skipped when a template exists")
         )
 
diff --git a/tests/entrypoints/pooling/embed/test_offline.py b/tests/entrypoints/pooling/embed/test_offline.py
index e8d84ed45e0d..c19e13075eb8 100644
--- a/tests/entrypoints/pooling/embed/test_offline.py
+++ b/tests/entrypoints/pooling/embed/test_offline.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import logging
 import weakref
 
 import pytest
@@ -38,11 +37,11 @@ def llm():
         seed=0,
         attention_config=attention_config,
     )
+    assert embedding_size == llm.model_config.embedding_size
 
     yield weakref.proxy(llm)
 
     del llm
-
     cleanup_dist_env_and_memory()
 
 
@@ -74,16 +73,6 @@ def test_list_prompts(llm: LLM):
         assert len(outputs[i].outputs.embedding) == embedding_size
 
 
-@pytest.mark.skip_global_cleanup
-def test_token_embed(llm: LLM, caplog_vllm):
-    with caplog_vllm.at_level(level=logging.WARNING, logger="vllm"):
-        outputs = llm.encode(prompt, pooling_task="token_embed", use_tqdm=False)
-        assert "deprecated" in caplog_vllm.text
-
-    multi_vector = outputs[0].outputs.data
-    assert multi_vector.shape == (11, 384)
-
-
 @pytest.mark.skip_global_cleanup
 def test_pooling_params(llm: LLM):
     def get_outputs(normalize):
@@ -107,8 +96,15 @@ def get_outputs(normalize):
     )
 
 
-@pytest.mark.parametrize("task", ["token_classify", "classify"])
+@pytest.mark.parametrize(
+    "task", ["token_classify", "classify", "token_embed", "plugin"]
+)
 def test_unsupported_tasks(llm: LLM, task: PoolingTask):
-    err_msg = "Classification API is not supported by this model.+"
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "token_embed":
+        err_msg = "Try switching the model's pooling_task via.+"
+    else:
+        err_msg = "Classification API is not supported by this model.+"
     with pytest.raises(ValueError, match=err_msg):
         llm.encode(prompt, pooling_task=task, use_tqdm=False)
diff --git a/tests/entrypoints/pooling/embed/test_online.py b/tests/entrypoints/pooling/embed/test_online.py
index dc61244c9445..d5565f25d37c 100644
--- a/tests/entrypoints/pooling/embed/test_online.py
+++ b/tests/entrypoints/pooling/embed/test_online.py
@@ -732,28 +732,9 @@ async def test_pooling_embed(server: RemoteOpenAIServer, model_name: str):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_pooling_token_embed(server: RemoteOpenAIServer, model_name: str):
-    task = "token_embed"
-    response = requests.post(
-        server.url_for("pooling"),
-        json={
-            "model": model_name,
-            "input": input_text,
-            "encoding_format": "float",
-            "task": task,
-        },
-    )
-
-    poolings = PoolingResponse.model_validate(response.json())
-
-    assert len(poolings.data) == 1
-    assert len(poolings.data[0].data) == len(input_tokens)
-    assert len(poolings.data[0].data[0]) == 384
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.parametrize("task", ["classify", "token_classify", "plugin"])
+@pytest.mark.parametrize(
+    "task", ["classify", "token_classify", "token_embed", "plugin"]
+)
 async def test_pooling_not_supported(
     server: RemoteOpenAIServer, model_name: str, task: str
 ):
@@ -767,4 +748,10 @@ async def test_pooling_not_supported(
         },
     )
     assert response.json()["error"]["type"] == "BadRequestError"
-    assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "token_embed":
+        err_msg = "Try switching the model's pooling_task via"
+    else:
+        err_msg = f"Unsupported task: {task!r}"
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/pooling/reward/test_offline.py b/tests/entrypoints/pooling/reward/test_token_reward_offline.py
similarity index 100%
rename from tests/entrypoints/pooling/reward/test_offline.py
rename to tests/entrypoints/pooling/reward/test_token_reward_offline.py
diff --git a/tests/entrypoints/pooling/pooling/test_online.py b/tests/entrypoints/pooling/reward/test_token_reward_online.py
similarity index 100%
rename from tests/entrypoints/pooling/pooling/test_online.py
rename to tests/entrypoints/pooling/reward/test_token_reward_online.py
diff --git a/tests/entrypoints/pooling/scoring/test_bi_encoder_online.py b/tests/entrypoints/pooling/scoring/test_bi_encoder_online.py
index fb925836fbdb..392514056645 100644
--- a/tests/entrypoints/pooling/scoring/test_bi_encoder_online.py
+++ b/tests/entrypoints/pooling/scoring/test_bi_encoder_online.py
@@ -7,7 +7,7 @@
 from tests.entrypoints.pooling.scoring.util import EncoderScoringHfRunner
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
-from vllm.entrypoints.pooling.score.protocol import RerankResponse, ScoreResponse
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse, ScoreResponse
 from vllm.platforms import current_platform
 
 MODEL_NAME = "BAAI/bge-base-en-v1.5"
@@ -411,4 +411,8 @@ async def test_pooling_not_supported(server: RemoteOpenAIServer, task: str):
         },
     )
     assert response.json()["error"]["type"] == "BadRequestError"
-    assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    else:
+        err_msg = f"Unsupported task: {task!r}"
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/pooling/scoring/test_cross_encoder_offline.py b/tests/entrypoints/pooling/scoring/test_cross_encoder_offline.py
index cb76d74608e1..56e83de3f74f 100644
--- a/tests/entrypoints/pooling/scoring/test_cross_encoder_offline.py
+++ b/tests/entrypoints/pooling/scoring/test_cross_encoder_offline.py
@@ -112,6 +112,35 @@ def test_classify(llm):
     assert len(outputs[0].outputs.data) == 1
 
 
+@pytest.mark.skip_global_cleanup
+def test_max_tokens_per_doc(llm: LLM):
+    """Test max_tokens_per_doc via PoolingParams.extra_kwargs (offline)."""
+    long_doc = "The capital of France is Paris. " * 20
+
+    # Without truncation
+    outputs_no_limit = llm.score(
+        TEXTS_1[0],
+        long_doc,
+        use_tqdm=False,
+    )
+
+    # With truncation via extra_kwargs
+    outputs_with_limit = llm.score(
+        TEXTS_1[0],
+        long_doc,
+        pooling_params=PoolingParams(extra_kwargs={"max_tokens_per_doc": 10}),
+        use_tqdm=False,
+    )
+
+    assert len(outputs_no_limit) == 1
+    assert len(outputs_with_limit) == 1
+
+    # Truncated version should have fewer prompt tokens
+    no_limit_tokens = len(outputs_no_limit[0].prompt_token_ids)
+    with_limit_tokens = len(outputs_with_limit[0].prompt_token_ids)
+    assert with_limit_tokens < no_limit_tokens
+
+
 def test_pooling_params(llm: LLM):
     def get_outputs(use_activation):
         outputs = llm.score(
diff --git a/tests/entrypoints/pooling/scoring/test_cross_encoder_online.py b/tests/entrypoints/pooling/scoring/test_cross_encoder_online.py
index c6747a464976..54b4fe075b47 100644
--- a/tests/entrypoints/pooling/scoring/test_cross_encoder_online.py
+++ b/tests/entrypoints/pooling/scoring/test_cross_encoder_online.py
@@ -8,7 +8,7 @@
 
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
-from vllm.entrypoints.pooling.score.protocol import RerankResponse, ScoreResponse
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse, ScoreResponse
 from vllm.platforms import current_platform
 
 MODEL_NAME = "BAAI/bge-reranker-base"
@@ -453,26 +453,79 @@ async def test_pooling_classify(server: RemoteOpenAIServer):
 
 
 @pytest.mark.asyncio
-async def test_pooling_token_classify(server: RemoteOpenAIServer):
-    response = requests.post(
-        server.url_for("pooling"),
+async def test_rerank_max_tokens_per_doc(
+    server: RemoteOpenAIServer,
+):
+    """Test that max_tokens_per_doc actually reduces the token count."""
+    query = "What is the capital of France?"
+    # Use a doc that fits within max_model_len=100 (query ~8 tokens + 4 special)
+    long_doc = "The capital of France is Paris. " * 10  # ~70 tokens
+
+    # Without max_tokens_per_doc
+    response_no_limit = requests.post(
+        server.url_for("rerank"),
         json={
             "model": MODEL_NAME,
-            "task": "token_classify",
-            "input": input_text,
-            "encoding_format": "float",
+            "query": query,
+            "documents": [long_doc],
+            "truncate_prompt_tokens": 99,
         },
     )
+    response_no_limit.raise_for_status()
+    rerank_no_limit = RerankResponse.model_validate(response_no_limit.json())
 
-    poolings = PoolingResponse.model_validate(response.json())
+    # With max_tokens_per_doc
+    response_with_limit = requests.post(
+        server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": [long_doc],
+            "max_tokens_per_doc": 10,
+        },
+    )
+    response_with_limit.raise_for_status()
+    rerank_with_limit = RerankResponse.model_validate(response_with_limit.json())
 
-    assert len(poolings.data) == 1
-    assert len(poolings.data[0].data) == len(input_tokens)
-    assert len(poolings.data[0].data[0]) == 1
+    assert rerank_with_limit.usage.prompt_tokens < rerank_no_limit.usage.prompt_tokens
+
+
+@pytest.mark.asyncio
+async def test_rerank_max_tokens_per_doc_validation(
+    server: RemoteOpenAIServer,
+):
+    """Test that max_tokens_per_doc validation works correctly."""
+    query = "What is the capital of France?"
+    documents = ["The capital of France is Paris."]
+
+    # Test with max_tokens_per_doc=0 (should succeed — means no truncation)
+    response = requests.post(
+        server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": documents,
+            "max_tokens_per_doc": 0,
+        },
+    )
+    response.raise_for_status()
+
+    # Test with invalid max_tokens_per_doc (negative)
+    response = requests.post(
+        server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": documents,
+            "max_tokens_per_doc": -5,
+        },
+    )
+    assert response.status_code == 400
+    assert "max_tokens_per_doc must be a non-negative integer" in response.text
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
+@pytest.mark.parametrize("task", ["embed", "token_embed", "token_classify", "plugin"])
 async def test_pooling_not_supported(server: RemoteOpenAIServer, task: str):
     response = requests.post(
         server.url_for("pooling"),
@@ -484,4 +537,10 @@ async def test_pooling_not_supported(server: RemoteOpenAIServer, task: str):
         },
     )
     assert response.json()["error"]["type"] == "BadRequestError"
-    assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "token_classify":
+        err_msg = "Try switching the model's pooling_task via"
+    else:
+        err_msg = f"Unsupported task: {task!r}"
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py b/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
index cc4ba6a8ec28..e6b4d3f873e0 100644
--- a/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
+++ b/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
@@ -7,7 +7,7 @@
 import requests
 
 from tests.utils import VLLM_PATH, RemoteOpenAIServer
-from vllm.entrypoints.pooling.score.protocol import RerankResponse, ScoreResponse
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse, ScoreResponse
 from vllm.multimodal.utils import encode_image_url, fetch_image
 from vllm.platforms import current_platform
 
@@ -38,6 +38,15 @@
     "FLEX_ATTENTION": 0.045,  # gfx950:~3.25%, gfx942:~1.10%
 }
 
+# ROCm 7.2/gfx950 shows small absolute drift on the low text-vs-text
+# probability even though larger scores remain well inside the relative
+# tolerance. Keep the relative tolerances tight and add only a small floor.
+BACKEND_ABS_TOL: dict[str, float] = {
+    "default": 0.0,
+    "ROCM_AITER_FA": 0.005,
+    "FLEX_ATTENTION": 0.006,
+}
+
 # ROCm: disable skinny GEMM to avoid non-deterministic results from
 # atomic reductions in wvSplitKrc kernel.
 # See: https://github.com/vllm-project/vllm/pull/33493#issuecomment-3906083975
@@ -57,18 +66,23 @@ def get_tol(backend: str) -> float:
     return BACKEND_TOL.get(backend, BACKEND_TOL["default"])
 
 
+def get_abs_tol(backend: str) -> float:
+    return BACKEND_ABS_TOL.get(backend, BACKEND_ABS_TOL["default"])
+
+
 def assert_score(actual: float, expected: float, backend: str, label: str):
     tol = get_tol(backend)
+    abs_tol = get_abs_tol(backend)
     diff = abs(actual - expected)
     rel_diff = diff / abs(expected) if expected != 0 else diff
     print(
         f"[{backend}] {label}: actual={actual:.6f} expected={expected:.6f} "
-        f"diff={diff:.6f} rel_diff={rel_diff:.4f} tol={tol}"
+        f"diff={diff:.6f} rel_diff={rel_diff:.4f} tol={tol} abs_tol={abs_tol}"
     )
-    assert actual == pytest.approx(expected, rel=tol), (
+    assert actual == pytest.approx(expected, rel=tol, abs=abs_tol), (
         f"[{backend}] {label}: score mismatch — "
         f"actual={actual:.6f}, expected={expected:.6f}, "
-        f"rel_diff={rel_diff:.4f}, tol={tol}"
+        f"rel_diff={rel_diff:.4f}, tol={tol}, abs_tol={abs_tol}"
     )
 
 
@@ -234,7 +248,7 @@ async def test_score_api_queries_str_documents_image_url_plus_text_content(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 1
-    assert score.usage.prompt_tokens == 108
+    assert score.usage.prompt_tokens == 107
     assert_score(
         score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image"
     )
@@ -264,7 +278,7 @@ async def test_score_api_queries_str_documents_list(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 4
-    assert score.usage.prompt_tokens == 368
+    assert score.usage.prompt_tokens == 367
     assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text")
     assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text")
     assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image")
@@ -353,7 +367,7 @@ async def test_score_api_queries_list_documents_list(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 4
-    assert score.usage.prompt_tokens == 368
+    assert score.usage.prompt_tokens == 367
     assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text")
     assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text")
     assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image")
@@ -363,3 +377,135 @@ async def test_score_api_queries_list_documents_list(
         backend,
         "paired[3]_text_vs_text_plus_image",
     )
+
+
+INSTRUCTION = (
+    "Given a multimodal retrieval query, retrieve candidates that "
+    "visually or textually match the requested scene, object, or action."
+)
+
+
+@pytest.mark.asyncio
+async def test_score_api_instruction_field(
+    server: tuple[RemoteOpenAIServer, str],
+):
+    remote_server, _ = server
+
+    default_response = requests.post(
+        remote_server.url_for("score"),
+        json={
+            "model": MODEL_NAME,
+            "queries": query,
+            "documents": document,
+        },
+    )
+    default_response.raise_for_status()
+    default_score = ScoreResponse.model_validate(default_response.json())
+
+    instruction_response = requests.post(
+        remote_server.url_for("score"),
+        json={
+            "model": MODEL_NAME,
+            "queries": query,
+            "documents": document,
+            "instruction": INSTRUCTION,
+        },
+    )
+    instruction_response.raise_for_status()
+    instruction_score = ScoreResponse.model_validate(instruction_response.json())
+
+    assert instruction_score.id is not None
+    assert instruction_score.data is not None
+    assert len(instruction_score.data) == 1
+    assert instruction_score.usage.prompt_tokens > default_score.usage.prompt_tokens
+
+
+@pytest.mark.asyncio
+async def test_rerank_api_instruction_field(
+    server: tuple[RemoteOpenAIServer, str],
+):
+    remote_server, _ = server
+
+    doc_list = [
+        document,
+        {"content": [documents[0]]},
+        {"content": [documents[1]]},
+        {"content": [documents[0], documents[1]]},
+    ]
+
+    default_response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": doc_list,
+        },
+    )
+    default_response.raise_for_status()
+    default_rerank = RerankResponse.model_validate(default_response.json())
+
+    instruction_response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": doc_list,
+            "instruction": INSTRUCTION,
+        },
+    )
+    instruction_response.raise_for_status()
+    instruction_rerank = RerankResponse.model_validate(instruction_response.json())
+
+    assert instruction_rerank.id is not None
+    assert instruction_rerank.model is not None
+    assert instruction_rerank.usage is not None
+    assert len(instruction_rerank.results) == len(default_rerank.results)
+    assert instruction_rerank.usage.prompt_tokens > default_rerank.usage.prompt_tokens
+
+
+@pytest.mark.asyncio
+async def test_rerank_api_instruction_field_matches_chat_template_kwargs(
+    server: tuple[RemoteOpenAIServer, str],
+):
+    remote_server, _ = server
+
+    doc_list = [
+        document,
+        {"content": [documents[0]]},
+        {"content": [documents[1]]},
+        {"content": [documents[0], documents[1]]},
+    ]
+
+    field_response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": doc_list,
+            "instruction": INSTRUCTION,
+        },
+    )
+    field_response.raise_for_status()
+    field_rerank = RerankResponse.model_validate(field_response.json())
+
+    kwargs_response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": doc_list,
+            "chat_template_kwargs": {"instruction": INSTRUCTION},
+        },
+    )
+    kwargs_response.raise_for_status()
+    kwargs_rerank = RerankResponse.model_validate(kwargs_response.json())
+
+    assert kwargs_rerank.usage.prompt_tokens == field_rerank.usage.prompt_tokens
+
+    field_scores = [
+        r.relevance_score for r in sorted(field_rerank.results, key=lambda x: x.index)
+    ]
+    kwargs_scores = [
+        r.relevance_score for r in sorted(kwargs_rerank.results, key=lambda x: x.index)
+    ]
+    assert field_scores == pytest.approx(kwargs_scores)
diff --git a/tests/entrypoints/pooling/scoring/test_late_interaction_offline_vision.py b/tests/entrypoints/pooling/scoring/test_late_interaction_offline_vision.py
new file mode 100644
index 000000000000..f5bbb208b4d6
--- /dev/null
+++ b/tests/entrypoints/pooling/scoring/test_late_interaction_offline_vision.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import weakref
+
+import pytest
+
+from vllm import LLM
+from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.platforms import current_platform
+
+from .util import make_base64_image, make_image_mm_param
+
+MODEL_NAME = "vidore/colpali-v1.3-hf"
+
+
+@pytest.fixture(scope="module")
+def llm():
+    # ROCm: Use FLEX_ATTENTION backend as it's the only attention backend
+    # that supports encoder-only models on ROCm.
+    attention_config = None
+    if current_platform.is_rocm():
+        attention_config = {"backend": "FLEX_ATTENTION"}
+
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm = LLM(
+        model=MODEL_NAME,
+        max_num_batched_tokens=32768,
+        tensor_parallel_size=1,
+        gpu_memory_utilization=0.75,
+        enforce_eager=True,
+        seed=0,
+        attention_config=attention_config,
+    )
+
+    yield weakref.proxy(llm)
+
+    del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skip_global_cleanup
+def test_query_text_vs_docs_image(llm):
+    """Score a text query against image documents via the multimodal path."""
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    blue_image = make_base64_image(64, 64, color=(0, 0, 255))
+
+    query = "Describe the red object"
+    image_docs = [
+        make_image_mm_param(red_image),
+        make_image_mm_param(blue_image),
+    ]
+
+    scores = llm.score(query, image_docs)
+
+    assert len(scores) == 2
+    assert scores[0].outputs.score > scores[1].outputs.score
+
+
+@pytest.mark.skip_global_cleanup
+def test_query_text_vs_docs_mix(llm) -> None:
+    """Score a text query against a mix of text and image documents."""
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+
+    query = "What is the capital of France?"
+    documents: list = [
+        "The capital of France is Paris.",
+        make_image_mm_param(red_image),
+    ]
+
+    scores = llm.score(query, documents)
+
+    assert len(scores) == 2
+    assert scores[0].outputs.score > scores[1].outputs.score
+
+
+@pytest.mark.skip_global_cleanup
+def test_query_image_vs_docs_text(llm) -> None:
+    """Score an image query against text documents."""
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    image_query = make_image_mm_param(red_image, text="red color")
+
+    documents = [
+        "Describe the red object.",
+        "The capital of France is Paris.",
+    ]
+
+    scores = llm.score(image_query, documents)
+
+    assert len(scores) == 2
+    assert scores[0].outputs.score > scores[1].outputs.score
diff --git a/tests/entrypoints/pooling/scoring/test_late_interaction_online.py b/tests/entrypoints/pooling/scoring/test_late_interaction_online.py
index 77d1fa16c661..7e4501fe8500 100644
--- a/tests/entrypoints/pooling/scoring/test_late_interaction_online.py
+++ b/tests/entrypoints/pooling/scoring/test_late_interaction_online.py
@@ -6,7 +6,7 @@
 import requests
 
 from tests.utils import RemoteOpenAIServer
-from vllm.entrypoints.pooling.score.protocol import RerankResponse, ScoreResponse
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse, ScoreResponse
 
 from .util import ColBERTScoringHfRunner
 
@@ -26,13 +26,18 @@
 ]
 
 
-@pytest.fixture(scope="module")
-def server():
+@pytest.fixture(scope="module", params=[True, False])
+def server(request):
     args = [
         "--max-model-len",
         str(MAX_MODEL_LEN),
     ]
 
+    # Test run pooling score MaxSim on worker side (GPU)
+    # aka flash-late-interaction
+    if not request.param:
+        args += ["--no-enable-flash-late-interaction"]
+
     with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
         yield remote_server
 
diff --git a/tests/entrypoints/pooling/scoring/test_late_interaction_online_vision.py b/tests/entrypoints/pooling/scoring/test_late_interaction_online_vision.py
new file mode 100644
index 000000000000..7d8f7342b486
--- /dev/null
+++ b/tests/entrypoints/pooling/scoring/test_late_interaction_online_vision.py
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import requests
+
+from tests.entrypoints.pooling.scoring.util import (
+    make_base64_image,
+    make_image_mm_param,
+)
+from tests.utils import RemoteOpenAIServer
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse, ScoreResponse
+
+MODEL_NAME = "vidore/colpali-v1.3-hf"
+
+
+@pytest.fixture(scope="module")
+def server():
+    with RemoteOpenAIServer(MODEL_NAME, []) as remote_server:
+        yield remote_server
+
+
+@pytest.mark.asyncio
+async def test_score_api_query_text_vs_docs_image(server: RemoteOpenAIServer):
+    query = "Describe the red object"
+
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    blue_image = make_base64_image(64, 64, color=(0, 0, 255))
+
+    documents = [
+        make_image_mm_param(red_image),
+        make_image_mm_param(blue_image),
+    ]
+
+    score_response = requests.post(
+        server.url_for("score"),
+        json={
+            "model": MODEL_NAME,
+            "queries": query,
+            "documents": documents,
+        },
+    )
+    score_response.raise_for_status()
+    scores = ScoreResponse.model_validate(score_response.json())
+
+    assert scores.id is not None
+    assert scores.data is not None
+    assert len(scores.data) == 2
+    assert scores.data[0].score > scores.data[1].score
+
+
+@pytest.mark.asyncio
+async def test_score_api_query_text_vs_docs_mix(server: RemoteOpenAIServer):
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    query = "What is the capital of France?"
+    documents: list = [
+        "The capital of France is Paris.",
+        make_image_mm_param(red_image),
+    ]
+
+    score_response = requests.post(
+        server.url_for("score"),
+        json={
+            "model": MODEL_NAME,
+            "queries": query,
+            "documents": documents,
+        },
+    )
+    score_response.raise_for_status()
+    scores = ScoreResponse.model_validate(score_response.json())
+
+    assert scores.id is not None
+    assert scores.data is not None
+    assert len(scores.data) == 2
+    assert scores.data[0].score > scores.data[1].score
+
+
+@pytest.mark.asyncio
+async def test_score_api_query_image_vs_docs_text(server: RemoteOpenAIServer):
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    image_query = make_image_mm_param(red_image, text="red color")
+
+    documents = [
+        "Describe the red object.",
+        "The capital of France is Paris.",
+    ]
+
+    score_response = requests.post(
+        server.url_for("score"),
+        json={
+            "model": MODEL_NAME,
+            "queries": image_query,
+            "documents": documents,
+        },
+    )
+    score_response.raise_for_status()
+    scores = ScoreResponse.model_validate(score_response.json())
+
+    assert scores.id is not None
+    assert scores.data is not None
+    assert len(scores.data) == 2
+    assert scores.data[0].score > scores.data[1].score
+
+
+@pytest.mark.asyncio
+async def test_rerank_api_query_text_vs_docs_image(server: RemoteOpenAIServer):
+    query = "Describe the red object"
+
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    blue_image = make_base64_image(64, 64, color=(0, 0, 255))
+
+    documents = [
+        make_image_mm_param(red_image),
+        make_image_mm_param(blue_image),
+    ]
+
+    rerank_response = requests.post(
+        server.url_for("rerank"),
+        json={"model": MODEL_NAME, "query": query, "documents": documents},
+    )
+
+    rerank_response.raise_for_status()
+    rerank = RerankResponse.model_validate(rerank_response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 2
+
+    red_result = next(r for r in rerank.results if r.index == 0)
+    blue_result = next(r for r in rerank.results if r.index == 1)
+
+    assert red_result.relevance_score > blue_result.relevance_score
+
+
+@pytest.mark.asyncio
+async def test_rerank_api_query_text_vs_docs_mix(server: RemoteOpenAIServer):
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    query = "What is the capital of France?"
+    documents: list = [
+        "The capital of France is Paris.",
+        make_image_mm_param(red_image),
+    ]
+
+    rerank_response = requests.post(
+        server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": query,
+            "documents": documents,
+        },
+    )
+    rerank_response.raise_for_status()
+    rerank = RerankResponse.model_validate(rerank_response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 2
+
+    result0 = next(r for r in rerank.results if r.index == 0)
+    result1 = next(r for r in rerank.results if r.index == 1)
+
+    assert result0.relevance_score > result1.relevance_score
+
+
+@pytest.mark.asyncio
+async def test_rerank_api_query_image_vs_docs_text(server: RemoteOpenAIServer):
+    red_image = make_base64_image(64, 64, color=(255, 0, 0))
+    image_query = make_image_mm_param(red_image, text="red color")
+
+    documents = [
+        "Describe the red object.",
+        "The capital of France is Paris.",
+    ]
+
+    rerank_response = requests.post(
+        server.url_for("rerank"),
+        json={
+            "model": MODEL_NAME,
+            "query": image_query,
+            "documents": documents,
+        },
+    )
+    rerank_response.raise_for_status()
+    rerank = RerankResponse.model_validate(rerank_response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 2
+
+    result0 = next(r for r in rerank.results if r.index == 0)
+    result1 = next(r for r in rerank.results if r.index == 1)
+
+    assert result0.relevance_score > result1.relevance_score
diff --git a/tests/entrypoints/pooling/scoring/test_utils.py b/tests/entrypoints/pooling/scoring/test_utils.py
deleted file mode 100644
index 20b6df4a9bef..000000000000
--- a/tests/entrypoints/pooling/scoring/test_utils.py
+++ /dev/null
@@ -1,353 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from unittest.mock import patch
-
-import pytest
-
-from vllm.config import ModelConfig
-from vllm.entrypoints.chat_utils import ChatTemplateResolutionError
-from vllm.entrypoints.pooling.score.utils import (
-    get_score_prompt,
-)
-from vllm.inputs import TokensPrompt
-from vllm.tokenizers import get_tokenizer
-
-# A cross-encoder model for testing
-CROSS_ENCODER_MODEL_ID = "cross-encoder/ms-marco-MiniLM-L-6-v2"
-
-
-def assert_prompt_tokenization_consistent(
-    tokenizer, full_prompt, engine_prompt, add_special_tokens=True
-):
-    """Verify that engine_prompt token_ids match tokenizing full_prompt."""
-    expected_ids = tokenizer(full_prompt, add_special_tokens=add_special_tokens)[
-        "input_ids"
-    ]
-    actual_ids = engine_prompt["prompt_token_ids"]
-    assert actual_ids == expected_ids, (
-        f"Token IDs don't match.\nExpected: {expected_ids}\nActual:   {actual_ids}"
-    )
-
-
-@pytest.fixture(scope="module")
-def cross_encoder_model_config():
-    return ModelConfig(
-        CROSS_ENCODER_MODEL_ID,
-        runner="pooling",
-    )
-
-
-@pytest.fixture(scope="module")
-def cross_encoder_tokenizer(cross_encoder_model_config):
-    return get_tokenizer(
-        CROSS_ENCODER_MODEL_ID,
-        trust_remote_code=cross_encoder_model_config.trust_remote_code,
-    )
-
-
-@pytest.fixture(scope="module")
-def llm_reranker_model_config():
-    """Model config for LLM-as-reranker style (no pad token)."""
-    config = ModelConfig(
-        CROSS_ENCODER_MODEL_ID,
-        runner="pooling",
-    )
-    # use_sep_token is a property that reads from hf_config,
-    # so we set it there to override the default (True)
-    config.hf_config.use_sep_token = False
-    return config
-
-
-@pytest.fixture
-def tokenization_kwargs():
-    """Common tokenization kwargs used across tests."""
-    return {"add_special_tokens": True, "return_tensors": None}
-
-
-@pytest.fixture
-def mock_model_with_score_template():
-    """Mock model class that supports score template and tracks post_process calls."""
-
-    class MockModelWithScoreTemplate:
-        supports_score_template = True
-        post_process_called: list[TokensPrompt] = []
-
-        @staticmethod
-        def get_score_template(p1: str, p2: str) -> str:
-            return f"[QUERY]{p1}[SEP][DOC]{p2}"
-
-        @staticmethod
-        def post_process_tokens(prompt: TokensPrompt) -> None:
-            MockModelWithScoreTemplate.post_process_called.append(prompt)
-
-    return MockModelWithScoreTemplate
-
-
-@pytest.fixture
-def mock_model_no_score_template():
-    """Mock model class that does not support score template."""
-
-    class MockModelNoScoreTemplate:
-        supports_score_template = False
-
-    return MockModelNoScoreTemplate
-
-
-class TestGetScorePrompt:
-    """Tests for the get_score_prompt function."""
-
-    def test_tokenization_kwargs_passed_through(
-        self,
-        llm_reranker_model_config,
-        cross_encoder_tokenizer,
-    ):
-        """Test that tokenization kwargs are properly passed through."""
-        data_1 = "Query text"
-        data_2 = "Document text"
-
-        # Test with truncation - custom kwargs for this test
-        custom_tokenization_kwargs = {
-            "add_special_tokens": True,
-            "return_tensors": None,
-            "truncation": True,
-            "max_length": 20,
-        }
-
-        full_prompt, engine_prompt = get_score_prompt(
-            llm_reranker_model_config,
-            cross_encoder_tokenizer,
-            custom_tokenization_kwargs,
-            data_1,
-            data_2,
-        )
-
-        assert isinstance(full_prompt, str)
-        assert "prompt_token_ids" in engine_prompt
-        # With max_length=20 and truncation, should not exceed this
-        assert len(engine_prompt["prompt_token_ids"]) <= 20
-        # Since truncation was applied, token_ids should be a prefix of full encoding
-        full_ids = cross_encoder_tokenizer(full_prompt, add_special_tokens=True)[
-            "input_ids"
-        ]
-        actual_ids = engine_prompt["prompt_token_ids"]
-        assert full_ids[: len(actual_ids)] == actual_ids, (
-            f"Token IDs are not a prefix of full encoding.\n"
-            f"Full IDs:   {full_ids}\n"
-            f"Actual IDs: {actual_ids}"
-        )
-
-    def test_model_supports_score_template(
-        self,
-        cross_encoder_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_with_score_template,
-    ):
-        """Test when model supports score template (no score_template arg)."""
-        with patch(
-            "vllm.model_executor.model_loader.get_model_cls",
-            return_value=mock_model_with_score_template,
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                cross_encoder_model_config,
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "query text",
-                "document text",
-            )
-
-        assert full_prompt == "[QUERY]query text[SEP][DOC]document text"
-        assert "prompt_token_ids" in engine_prompt
-        assert len(engine_prompt["prompt_token_ids"]) > 0
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer, full_prompt, engine_prompt
-        )
-
-    def test_model_supports_score_template_but_custom_template_provided(
-        self,
-        cross_encoder_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_with_score_template,
-    ):
-        """Test when model supports score template but custom template is provided."""
-        template = (
-            'TEMPLATE_USED {{ messages[0]["content"] }} {{ messages[1]["content"] }}'
-        )
-        with (
-            patch(
-                "vllm.model_executor.model_loader.get_model_cls",
-                return_value=mock_model_with_score_template,
-            ),
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                cross_encoder_model_config,
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "query",
-                "doc",
-                score_template=template,  # Providing a template
-            )
-
-        assert "prompt_token_ids" in engine_prompt
-        assert full_prompt == "TEMPLATE_USED query doc"
-
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer, full_prompt, engine_prompt
-        )
-
-    def test_not_using_default_template(
-        self,
-        llm_reranker_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_no_score_template,
-    ):
-        # FIXME: For now, we only apply a template when one is explicitly provided.
-        # We cannot rely on the tokenizer's chat template because many models
-        # inherit junk templates from their base LLM, which breaks both the models
-        # and the tests that use them.
-        with (
-            patch(
-                "vllm.model_executor.model_loader.get_model_cls",
-                return_value=mock_model_no_score_template,
-            ),
-            patch(
-                "vllm.entrypoints.pooling.score.utils.safe_apply_chat_template",
-                return_value="test querytest doc",
-            ),
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                llm_reranker_model_config,
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "test query",
-                "test doc",
-            )
-
-        assert full_prompt == "test querytest doc"
-        assert "prompt_token_ids" in engine_prompt
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer, full_prompt, engine_prompt
-        )
-
-    def test_fallback_with_sep_token(
-        self,
-        cross_encoder_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_no_score_template,
-    ):
-        """Test fallback path when ChatTemplateResolutionError
-        and use_sep_token=True."""
-        with (
-            patch(
-                "vllm.model_executor.model_loader.get_model_cls",
-                return_value=mock_model_no_score_template,
-            ),
-            patch(
-                "vllm.entrypoints.pooling.score.utils.safe_apply_chat_template",
-                side_effect=ChatTemplateResolutionError("No template"),
-            ),
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                cross_encoder_model_config,  # use_sep_token=True
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "query",
-                "document",
-            )
-
-        assert "prompt_token_ids" in engine_prompt
-        # Should have token_type_ids from text_pair encoding
-        assert "token_type_ids" in engine_prompt
-        assert "query" in full_prompt
-        assert "document" in full_prompt
-        assert full_prompt != "querydocument"
-        assert (
-            engine_prompt["prompt_token_ids"]
-            == cross_encoder_tokenizer(
-                "query", text_pair="document", add_special_tokens=True
-            )["input_ids"]
-        )
-
-        # FIXME(?): add_special_tokens=False is needed because in this case
-        # full_prompt is obtained by decoding the tokenized prompt, which includes
-        # special tokens and we would get duplicated special tokens otherwise.
-        # This is inconsistent with other cases.
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer,
-            full_prompt,
-            engine_prompt,
-            add_special_tokens=False,
-        )
-
-    def test_fallback_without_sep_token(
-        self,
-        llm_reranker_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_no_score_template,
-    ):
-        """Test fallback path when ChatTemplateResolutionError
-        and use_sep_token=False."""
-        with (
-            patch(
-                "vllm.model_executor.model_loader.get_model_cls",
-                return_value=mock_model_no_score_template,
-            ),
-            patch(
-                "vllm.entrypoints.pooling.score.utils.safe_apply_chat_template",
-                side_effect=ChatTemplateResolutionError("No template"),
-            ),
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                llm_reranker_model_config,  # use_sep_token=False
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "query",
-                "document",
-            )
-
-        assert full_prompt == "querydocument"
-        assert "prompt_token_ids" in engine_prompt
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer, full_prompt, engine_prompt
-        )
-
-    def test_post_process_tokens_called(
-        self,
-        cross_encoder_model_config,
-        cross_encoder_tokenizer,
-        tokenization_kwargs,
-        mock_model_with_score_template,
-    ):
-        """Test that post_process_tokens is called on the engine prompt."""
-        # Reset the call tracker
-        mock_model_with_score_template.post_process_called.clear()
-
-        with (
-            patch(
-                "vllm.model_executor.model_loader.get_model_cls",
-                return_value=mock_model_with_score_template,
-            ),
-            patch(
-                "vllm.entrypoints.pooling.score.utils.safe_apply_chat_template",
-                side_effect=ChatTemplateResolutionError("No template"),
-            ),
-        ):
-            full_prompt, engine_prompt = get_score_prompt(
-                cross_encoder_model_config,
-                cross_encoder_tokenizer,
-                tokenization_kwargs,
-                "query",
-                "doc",
-            )
-
-        # post_process_tokens should have been called once
-        assert len(mock_model_with_score_template.post_process_called) == 1
-        assert mock_model_with_score_template.post_process_called[0] is engine_prompt
-        assert_prompt_tokenization_consistent(
-            cross_encoder_tokenizer, full_prompt, engine_prompt
-        )
diff --git a/tests/entrypoints/pooling/scoring/util.py b/tests/entrypoints/pooling/scoring/util.py
index 6d9aa15243ff..8aab9cd10692 100644
--- a/tests/entrypoints/pooling/scoring/util.py
+++ b/tests/entrypoints/pooling/scoring/util.py
@@ -1,14 +1,23 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from io import BytesIO
+
+import pybase64 as base64
 import torch
 import torch.nn.functional as F
 from huggingface_hub import hf_hub_download
+from PIL import Image
 from safetensors.torch import load_file
 from transformers import AutoModel, AutoTokenizer
 
 from tests.conftest import HfRunner
-from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+from vllm.entrypoints.chat_utils import (
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartTextParam,
+)
+from vllm.entrypoints.pooling.scoring.typing import ScoreMultiModalParam
+from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
 
 class ColBERTScoringHfRunner(torch.nn.Module):
@@ -67,3 +76,32 @@ def predict(self, prompts: list[list[str]], *args, **kwargs):
             for pair in hf_embeddings
         ]
         return torch.as_tensor(hf_outputs)
+
+
+def make_base64_image(
+    width: int = 64, height: int = 64, color: tuple[int, int, int] = (255, 0, 0)
+) -> str:
+    """Create a small solid-color PNG image and return its base64 data URI."""
+    img = Image.new("RGB", (width, height), color)
+    buf = BytesIO()
+    img.save(buf, format="PNG")
+    b64 = base64.b64encode(buf.getvalue()).decode()
+    return f"data:image/png;base64,{b64}"
+
+
+def make_image_mm_param(
+    image_uri: str,
+    text: str | None = None,
+) -> ScoreMultiModalParam:
+    """Build a ScoreMultiModalParam containing an image (and optional text)."""
+    content: list = [
+        ChatCompletionContentPartImageParam(
+            type="image_url",
+            image_url={"url": image_uri},
+        ),
+    ]
+    if text is not None:
+        content.append(
+            ChatCompletionContentPartTextParam(type="text", text=text),
+        )
+    return ScoreMultiModalParam(content=content)
diff --git a/tests/entrypoints/pooling/test_utils.py b/tests/entrypoints/pooling/test_utils.py
new file mode 100644
index 000000000000..13a89f2520ec
--- /dev/null
+++ b/tests/entrypoints/pooling/test_utils.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import importlib
+import importlib.util
+import json
+import warnings
+from types import SimpleNamespace
+
+import numpy as np
+import pytest
+import torch
+
+from vllm.entrypoints.pooling.utils import encode_pooling_output_float_or_ndarray
+
+
+def _pooling_output(data):
+    return SimpleNamespace(outputs=SimpleNamespace(data=data))
+
+
+def test_encode_pooling_output_float_or_ndarray_returns_numpy_array():
+    output = _pooling_output(torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32))
+
+    encoded = encode_pooling_output_float_or_ndarray(output)
+
+    assert isinstance(encoded, np.ndarray)
+    np.testing.assert_allclose(encoded, [1.0, 2.0, 3.0])
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("orjson") is None,
+    reason="orjson is not installed",
+)
+def test_orjson_serializes_numpy_array():
+    from fastapi.responses import ORJSONResponse
+
+    output = _pooling_output(torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32))
+    encoded = encode_pooling_output_float_or_ndarray(output)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", DeprecationWarning)
+        response = ORJSONResponse(content={"embedding": encoded})
+    assert json.loads(response.body)["embedding"] == pytest.approx([1.0, 2.0, 3.0])
+
+
+def test_encode_pooling_output_float_or_ndarray_falls_back_to_list():
+    class DataWithUnsupportedNumpy:
+        def is_contiguous(self):
+            return True
+
+        def numpy(self):
+            raise TypeError("unsupported dtype")
+
+        def tolist(self):
+            return [1.0, 2.0, 3.0]
+
+    output = _pooling_output(DataWithUnsupportedNumpy())
+
+    assert encode_pooling_output_float_or_ndarray(output) == [1.0, 2.0, 3.0]
diff --git a/tests/entrypoints/pooling/token_classify/test_offline.py b/tests/entrypoints/pooling/token_classify/test_offline.py
index 35fedd989201..3f59b375177f 100644
--- a/tests/entrypoints/pooling/token_classify/test_offline.py
+++ b/tests/entrypoints/pooling/token_classify/test_offline.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import logging
 import weakref
 
 import pytest
@@ -60,19 +59,19 @@ def test_token_ids_prompts(llm: LLM):
 
 @pytest.mark.skip_global_cleanup
 def test_score_api(llm: LLM):
-    err_msg = "Score API is only enabled for num_labels == 1."
+    err_msg = "This model does not support the Scoring API."
     with pytest.raises(ValueError, match=err_msg):
         llm.score("ping", "pong", use_tqdm=False)
 
 
-@pytest.mark.parametrize("task", ["classify", "embed", "token_embed"])
+@pytest.mark.parametrize("task", ["classify", "embed", "token_embed", "plugin"])
 def test_unsupported_tasks(llm: LLM, task: PoolingTask, caplog_vllm):
-    if task == "classify":
-        with caplog_vllm.at_level(level=logging.WARNING, logger="vllm"):
-            llm.encode(prompt, pooling_task=task, use_tqdm=False)
-        assert "deprecated" in caplog_vllm.text
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "classify":
+        err_msg = "Try switching the model's pooling_task via.+"
     else:
         err_msg = "Embedding API is not supported by this model.+"
 
-        with pytest.raises(ValueError, match=err_msg):
-            llm.encode(prompt, pooling_task=task, use_tqdm=False)
+    with pytest.raises(ValueError, match=err_msg):
+        llm.encode(prompt, pooling_task=task, use_tqdm=False)
diff --git a/tests/entrypoints/pooling/token_classify/test_online.py b/tests/entrypoints/pooling/token_classify/test_online.py
index e91d0bc9a396..a8e240ab1aeb 100644
--- a/tests/entrypoints/pooling/token_classify/test_online.py
+++ b/tests/entrypoints/pooling/token_classify/test_online.py
@@ -63,8 +63,12 @@ async def test_pooling_not_supported(
             "task": task,
         },
     )
+    assert response.json()["error"]["type"] == "BadRequestError"
 
-    if task != "classify":
-        assert response.json()["error"]["type"] == "BadRequestError"
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "classify":
+        err_msg = "Try switching the model's pooling_task via"
+    else:
         err_msg = f"Unsupported task: {task!r}"
-        assert response.json()["error"]["message"].startswith(err_msg)
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/pooling/token_embed/test_offline.py b/tests/entrypoints/pooling/token_embed/test_offline.py
index 697f4f81a11b..5bf15052d16a 100644
--- a/tests/entrypoints/pooling/token_embed/test_offline.py
+++ b/tests/entrypoints/pooling/token_embed/test_offline.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import logging
 import weakref
 
 import pytest
@@ -62,14 +61,14 @@ def test_token_ids_prompts(llm: LLM):
     assert outputs[0].outputs.data.shape == (11, 384)
 
 
-@pytest.mark.parametrize("task", ["embed", "classify", "token_classify"])
+@pytest.mark.parametrize("task", ["embed", "classify", "token_classify", "plugin"])
 def test_unsupported_tasks(llm: LLM, task: PoolingTask, caplog_vllm):
-    if task == "embed":
-        with caplog_vllm.at_level(level=logging.WARNING, logger="vllm"):
-            llm.encode(prompt, pooling_task=task, use_tqdm=False)
-        assert "deprecated" in caplog_vllm.text
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "embed":
+        err_msg = "Try switching the model's pooling_task via.+"
     else:
         err_msg = "Classification API is not supported by this model.+"
 
-        with pytest.raises(ValueError, match=err_msg):
-            llm.encode(prompt, pooling_task=task, use_tqdm=False)
+    with pytest.raises(ValueError, match=err_msg):
+        llm.encode(prompt, pooling_task=task, use_tqdm=False)
diff --git a/tests/entrypoints/pooling/token_embed/test_online.py b/tests/entrypoints/pooling/token_embed/test_online.py
index 922c624e98ee..42713ae852b2 100644
--- a/tests/entrypoints/pooling/token_embed/test_online.py
+++ b/tests/entrypoints/pooling/token_embed/test_online.py
@@ -86,8 +86,12 @@ async def test_pooling_not_supported(
             "task": task,
         },
     )
+    assert response.json()["error"]["type"] == "BadRequestError"
 
-    if task != "embed":
-        assert response.json()["error"]["type"] == "BadRequestError"
+    if task == "plugin":
+        err_msg = "No IOProcessor plugin installed."
+    elif task == "embed":
+        err_msg = "Try switching the model's pooling_task via"
+    else:
         err_msg = f"Unsupported task: {task!r}"
-        assert response.json()["error"]["message"].startswith(err_msg)
+    assert response.json()["error"]["message"].startswith(err_msg)
diff --git a/tests/entrypoints/serve/disagg/test_generate_stream.py b/tests/entrypoints/serve/disagg/test_generate_stream.py
new file mode 100644
index 000000000000..ac5b8bcd9158
--- /dev/null
+++ b/tests/entrypoints/serve/disagg/test_generate_stream.py
@@ -0,0 +1,514 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from dataclasses import dataclass, field
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from vllm.config.multimodal import MultiModalConfig
+from vllm.entrypoints.openai.engine.protocol import StreamOptions
+from vllm.entrypoints.openai.models.protocol import BaseModelPath
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.entrypoints.serve.disagg.protocol import (
+    GenerateRequest,
+    GenerateResponse,
+)
+from vllm.entrypoints.serve.disagg.serving import ServingTokens
+from vllm.entrypoints.serve.render.serving import OpenAIServingRender
+from vllm.logprobs import Logprob
+from vllm.outputs import CompletionOutput, RequestOutput
+from vllm.renderers import renderer_from_config
+from vllm.sampling_params import SamplingParams
+from vllm.v1.engine.async_llm import AsyncLLM
+
+MODEL_NAME = "openai-community/gpt2"
+BASE_MODEL_PATHS = [
+    BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME),
+]
+
+
+@dataclass
+class MockHFConfig:
+    model_type: str = "any"
+
+
+@dataclass
+class MockModelConfig:
+    task = "generate"
+    runner_type = "generate"
+    model = MODEL_NAME
+    tokenizer = MODEL_NAME
+    trust_remote_code = False
+    tokenizer_mode = "auto"
+    max_model_len = 100
+    tokenizer_revision = None
+    multimodal_config = MultiModalConfig()
+    hf_config = MockHFConfig()
+    hf_text_config = MockHFConfig()
+    logits_processors: list[str] | None = None
+    diff_sampling_param: dict | None = None
+    allowed_local_media_path: str = ""
+    allowed_media_domains: list[str] | None = None
+    encoder_config = None
+    generation_config: str = "auto"
+    media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
+    skip_tokenizer_init = False
+    is_encoder_decoder: bool = False
+    is_multimodal_model: bool = False
+    renderer_num_workers: int = 1
+
+    def get_diff_sampling_param(self):
+        return self.diff_sampling_param or {}
+
+
+@dataclass
+class MockParallelConfig:
+    _api_process_rank: int = 0
+
+
+@dataclass
+class MockSchedulerConfig:
+    max_num_seqs: int = 128
+
+
+@dataclass
+class MockVllmConfig:
+    model_config: MockModelConfig
+    parallel_config: MockParallelConfig
+    scheduler_config: MockSchedulerConfig = field(default_factory=MockSchedulerConfig)
+
+
+def _build_renderer(model_config: MockModelConfig):
+    return renderer_from_config(
+        MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
+    )
+
+
+def _build_serving_tokens(engine: AsyncLLM, **kwargs) -> ServingTokens:
+    models = OpenAIServingModels(
+        engine_client=engine,
+        base_model_paths=BASE_MODEL_PATHS,
+    )
+    serving_render = OpenAIServingRender(
+        model_config=engine.model_config,
+        renderer=engine.renderer,
+        model_registry=models.registry,
+        request_logger=None,
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+    serving = ServingTokens(
+        engine,
+        models,
+        openai_serving_render=serving_render,
+        request_logger=None,
+        **kwargs,
+    )
+
+    async def _fake_preprocess(*args, **kwargs):
+        return [{"prompt_token_ids": [1, 2, 3]}]
+
+    serving.openai_serving_render.preprocess_completion = AsyncMock(
+        side_effect=_fake_preprocess
+    )
+    return serving
+
+
+def _make_request_output(
+    request_id: str,
+    token_ids: list[int],
+    finish_reason: str | None = None,
+    finished: bool = False,
+    prompt_token_ids: list[int] | None = None,
+    logprobs: list[dict[int, Any] | None] | None = None,
+    num_cached_tokens: int | None = None,
+    index: int = 0,
+) -> RequestOutput:
+    return RequestOutput(
+        request_id=request_id,
+        prompt=None,
+        prompt_token_ids=prompt_token_ids or [1, 2, 3],
+        prompt_logprobs=None,
+        outputs=[
+            CompletionOutput(
+                index=index,
+                text="",
+                token_ids=token_ids,
+                cumulative_logprob=None,
+                logprobs=logprobs,
+                finish_reason=finish_reason,
+            )
+        ],
+        finished=finished,
+        metrics=None,
+        lora_request=None,
+        encoder_prompt=None,
+        encoder_prompt_token_ids=None,
+        num_cached_tokens=num_cached_tokens,
+    )
+
+
+def _mock_engine() -> MagicMock:
+    engine = MagicMock(spec=AsyncLLM)
+    engine.errored = False
+    engine.model_config = MockModelConfig()
+    engine.vllm_config = MockVllmConfig(
+        engine.model_config, parallel_config=MockParallelConfig()
+    )
+    engine.input_processor = MagicMock()
+    engine.renderer = _build_renderer(engine.model_config)
+    return engine
+
+
+def _parse_sse_chunks(chunks: list[str]) -> list[Any]:
+    """Parse SSE chunks into dicts (JSON) or raw strings ([DONE])."""
+    parsed: list[Any] = []
+    for chunk in chunks:
+        assert chunk.startswith("data: ") and chunk.endswith("\n\n")
+        payload = chunk[len("data: ") : -len("\n\n")]
+        if payload == "[DONE]":
+            parsed.append("[DONE]")
+        else:
+            parsed.append(json.loads(payload))
+    return parsed
+
+
+@pytest.mark.asyncio
+async def test_serve_tokens_skips_mm_cache_for_remote_engine_execution():
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output(
+            "req-1", token_ids=[10], finish_reason="stop", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=1),
+        model=MODEL_NAME,
+        stream=False,
+    )
+
+    response = await serving.serve_tokens(request)
+
+    assert isinstance(response, GenerateResponse)
+    assert (
+        serving.openai_serving_render.preprocess_completion.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is True
+    )
+
+
+@pytest.mark.asyncio
+async def test_stream_basic():
+    """Streaming returns SSE chunks with correct token_ids and ends with [DONE]."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output("req-1", token_ids=[20, 30])
+        yield _make_request_output(
+            "req-1", token_ids=[40], finish_reason="stop", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+
+    # 3 data chunks + [DONE]
+    assert parsed[-1] == "[DONE]"
+    data_chunks = [c for c in parsed if c != "[DONE]"]
+    assert len(data_chunks) == 3
+
+    assert data_chunks[0]["choices"][0]["token_ids"] == [10]
+    assert data_chunks[1]["choices"][0]["token_ids"] == [20, 30]
+    assert data_chunks[2]["choices"][0]["token_ids"] == [40]
+    assert data_chunks[2]["choices"][0]["finish_reason"] == "stop"
+
+
+@pytest.mark.asyncio
+async def test_stream_error_mid_generation():
+    """finish_reason='error' mid-stream yields error chunk then [DONE]."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output(
+            "req-1", token_ids=[20], finish_reason="error", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    assert len(chunks) >= 2
+    assert any("Internal server error" in chunk for chunk in chunks), (
+        f"Expected error message in chunks: {chunks}"
+    )
+    assert chunks[-1] == "data: [DONE]\n\n"
+
+
+@pytest.mark.asyncio
+async def test_stream_error_with_empty_delta():
+    """finish_reason='error' with empty delta_token_ids still raises."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output(
+            "req-1", token_ids=[], finish_reason="error", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    assert any("Internal server error" in chunk for chunk in chunks), (
+        f"Expected error message in chunks: {chunks}"
+    )
+    assert chunks[-1] == "data: [DONE]\n\n"
+
+
+@pytest.mark.asyncio
+async def test_stream_skips_empty_token_output():
+    """Outputs with empty token_ids are skipped (no chunk emitted)."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output("req-1", token_ids=[])
+        yield _make_request_output(
+            "req-1", token_ids=[20], finish_reason="stop", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+    assert parsed[-1] == "[DONE]"
+    data_chunks = [c for c in parsed if c != "[DONE]"]
+
+    # Only 2 data chunks — the empty one is skipped
+    assert len(data_chunks) == 2
+    assert data_chunks[0]["choices"][0]["token_ids"] == [10]
+    assert data_chunks[1]["choices"][0]["token_ids"] == [20]
+
+
+@pytest.mark.asyncio
+async def test_stream_include_usage():
+    """stream_options.include_usage emits a final usage-only chunk."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output(
+            "req-1", token_ids=[20], finish_reason="stop", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+        stream_options=StreamOptions(include_usage=True),
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+    assert parsed[-1] == "[DONE]"
+
+    # The chunk before [DONE] should be the usage-only chunk
+    usage_chunk = parsed[-2]
+    assert usage_chunk["choices"] == []
+    assert usage_chunk["usage"]["prompt_tokens"] == 3
+    assert usage_chunk["usage"]["completion_tokens"] == 2
+    assert usage_chunk["usage"]["total_tokens"] == 5
+
+
+@pytest.mark.asyncio
+async def test_stream_continuous_usage():
+    """continuous_usage_stats adds usage to every data chunk."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output("req-1", token_ids=[10])
+        yield _make_request_output(
+            "req-1", token_ids=[20], finish_reason="stop", finished=True
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+        stream_options=StreamOptions(
+            include_usage=True,
+            continuous_usage_stats=True,
+        ),
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+    data_chunks = [c for c in parsed if isinstance(c, dict) and c.get("choices")]
+
+    # Every data chunk should have usage
+    for i, dc in enumerate(data_chunks):
+        assert dc["usage"] is not None, f"chunk {i} missing usage"
+        assert dc["usage"]["prompt_tokens"] == 3
+
+    # First chunk: 1 completion token
+    assert data_chunks[0]["usage"]["completion_tokens"] == 1
+    assert data_chunks[0]["usage"]["total_tokens"] == 4
+
+    # Second chunk: 2 completion tokens (cumulative)
+    assert data_chunks[1]["usage"]["completion_tokens"] == 2
+    assert data_chunks[1]["usage"]["total_tokens"] == 5
+
+
+@pytest.mark.asyncio
+async def test_stream_with_logprobs():
+    """Streaming with logprobs includes logprob data in each chunk."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output(
+            "req-1",
+            token_ids=[10],
+            logprobs=[{10: Logprob(logprob=-0.5)}],
+        )
+        yield _make_request_output(
+            "req-1",
+            token_ids=[20],
+            logprobs=[{20: Logprob(logprob=-1.0)}],
+            finish_reason="stop",
+            finished=True,
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10, logprobs=1),
+        model=MODEL_NAME,
+        stream=True,
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+    data_chunks = [c for c in parsed if isinstance(c, dict) and c.get("choices")]
+
+    for dc in data_chunks:
+        lp = dc["choices"][0]["logprobs"]
+        assert lp is not None
+        assert len(lp["content"]) == 1
+        assert lp["content"][0]["token"].startswith("token_id:")
+
+
+@pytest.mark.asyncio
+async def test_stream_prompt_tokens_details():
+    """enable_prompt_tokens_details includes cached_tokens in final usage."""
+    engine = _mock_engine()
+
+    async def mock_generate(*args, **kwargs):
+        yield _make_request_output(
+            "req-1",
+            token_ids=[10],
+            finish_reason="stop",
+            finished=True,
+            num_cached_tokens=2,
+        )
+
+    engine.generate = MagicMock(side_effect=mock_generate)
+    serving = _build_serving_tokens(engine, enable_prompt_tokens_details=True)
+
+    request = GenerateRequest(
+        token_ids=[1, 2, 3],
+        sampling_params=SamplingParams(max_tokens=10),
+        model=MODEL_NAME,
+        stream=True,
+        stream_options=StreamOptions(include_usage=True),
+    )
+
+    response = await serving.serve_tokens(request)
+    chunks = []
+    async for chunk in response:
+        chunks.append(chunk)
+
+    parsed = _parse_sse_chunks(chunks)
+    # Usage-only chunk (before [DONE])
+    usage_chunk = parsed[-2]
+    assert usage_chunk["choices"] == []
+    assert usage_chunk["usage"]["prompt_tokens_details"]["cached_tokens"] == 2
diff --git a/tests/entrypoints/serve/disagg/test_protocol.py b/tests/entrypoints/serve/disagg/test_protocol.py
new file mode 100644
index 000000000000..414fc2a26125
--- /dev/null
+++ b/tests/entrypoints/serve/disagg/test_protocol.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for the disagg request/response protocol.
+
+These tests intentionally avoid spinning up a server — they exercise the
+pydantic validators on ``GenerateRequest`` directly so they run fast and
+fail loudly if the validator semantics ever drift.
+"""
+
+import json
+
+from vllm.entrypoints.serve.disagg.protocol import GenerateRequest
+from vllm.sampling_params import SamplingParams
+
+
+def _base_payload() -> dict:
+    return {"token_ids": [1, 2, 3], "sampling_params": {}}
+
+
+def test_omitted_max_tokens_is_not_provided():
+    """Body without ``max_tokens`` must surface as 'not provided' so the
+    server can apply its own default instead of the dataclass 16."""
+    req = GenerateRequest.model_validate(_base_payload())
+    # SamplingParams' dataclass default leaks through the parsed instance —
+    # this is exactly the bug the server-side defaulting works around.
+    assert req.sampling_params.max_tokens == 16
+    assert not req.is_sampling_param_provided("max_tokens")
+
+
+def test_explicit_max_tokens_is_provided():
+    """Even when the client picks the same value as the dataclass default,
+    it must register as explicitly set so the server won't override it."""
+    payload = _base_payload()
+    payload["sampling_params"] = {"max_tokens": 16}
+    req = GenerateRequest.model_validate(payload)
+    assert req.sampling_params.max_tokens == 16
+    assert req.is_sampling_param_provided("max_tokens")
+
+    payload["sampling_params"] = {"max_tokens": 256}
+    req = GenerateRequest.model_validate(payload)
+    assert req.sampling_params.max_tokens == 256
+    assert req.is_sampling_param_provided("max_tokens")
+
+
+def test_other_fields_tracked_independently():
+    payload = _base_payload()
+    payload["sampling_params"] = {"temperature": 0.0}
+    req = GenerateRequest.model_validate(payload)
+    assert not req.is_sampling_param_provided("max_tokens")
+    assert req.is_sampling_param_provided("temperature")
+
+
+def test_json_roundtrip_preserves_provided_keys():
+    payload = _base_payload()
+    payload["sampling_params"] = {"temperature": 0.5}
+    req = GenerateRequest.model_validate_json(json.dumps(payload))
+    assert not req.is_sampling_param_provided("max_tokens")
+    assert req.is_sampling_param_provided("temperature")
+
+
+def test_internal_instance_construction_treats_all_as_provided():
+    """When internal callers build ``GenerateRequest`` from a pre-resolved
+    ``SamplingParams`` instance, every field is considered explicitly set
+    so server-side defaulting can't clobber values resolved upstream."""
+    sp = SamplingParams(max_tokens=500, temperature=0.0)
+    req = GenerateRequest(token_ids=[1, 2, 3], sampling_params=sp)
+    assert req.is_sampling_param_provided("max_tokens")
+    assert req.is_sampling_param_provided("temperature")
+    # And keys we never touched should also count as provided in this path.
+    assert req.is_sampling_param_provided("top_p")
diff --git a/tests/entrypoints/serve/disagg/test_return_routed_experts.py b/tests/entrypoints/serve/disagg/test_return_routed_experts.py
new file mode 100644
index 000000000000..f7fd1795168a
--- /dev/null
+++ b/tests/entrypoints/serve/disagg/test_return_routed_experts.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import io
+
+import httpx
+import numpy as np
+import pybase64 as base64
+import pytest
+import pytest_asyncio
+
+from tests.utils import RemoteOpenAIServer
+
+MODEL_NAME = "TitanML/tiny-mixtral"
+GEN_ENDPOINT = "/inference/v1/generate"
+
+# tiny-mixtral config: 8 local experts, top-2 routing, 2 hidden layers.
+# The published config has sliding_window=4096, which produces
+# SlidingWindowSpec kv-cache groups; RoutedExpertsManager requires a
+# FullAttentionSpec group, so we override sliding_window=null below.
+NUM_LOCAL_EXPERTS = 8
+NUM_EXPERTS_PER_TOK = 2
+NUM_HIDDEN_LAYERS = 2
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--max-model-len",
+        "256",
+        "--max-num-seqs",
+        "32",
+        "--enforce-eager",
+        "--enable-return-routed-experts",
+        "--hf-overrides",
+        '{"sliding_window": null}',
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server: RemoteOpenAIServer):
+    transport = httpx.AsyncHTTPTransport(uds=server.uds) if server.uds else None
+    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
+    async with httpx.AsyncClient(
+        transport=transport,
+        base_url=server.url_root,
+        timeout=600,
+        headers=headers,
+    ) as c:
+        yield c
+
+
+@pytest.mark.asyncio
+async def test_generate_routed_experts(client):
+    """Test that /inference/v1/generate returns routed_experts when enabled."""
+    payload = {
+        "model": MODEL_NAME,
+        "token_ids": [1, 2, 3],
+        "sampling_params": {"max_tokens": 10, "temperature": 0.0},
+        "stream": False,
+    }
+    resp = await client.post(GEN_ENDPOINT, json=payload)
+    resp.raise_for_status()
+    data = resp.json()
+
+    choice = data["choices"][0]
+
+    assert choice["routed_experts"] is not None
+    assert choice["token_ids"] is not None
+
+    # routed_experts is base64-encoded .npy bytes; decode to ndarray.
+    routed_experts = np.load(io.BytesIO(base64.b64decode(choice["routed_experts"])))
+    assert routed_experts.ndim == 3
+    num_tokens, num_layers, topk = routed_experts.shape
+    assert num_tokens > 0
+    assert num_layers == NUM_HIDDEN_LAYERS
+    assert topk == NUM_EXPERTS_PER_TOK
+    assert (routed_experts >= 0).all()
+    assert (routed_experts < NUM_LOCAL_EXPERTS).all()
diff --git a/tests/entrypoints/serve/disagg/test_serving_multimodal_tokens.py b/tests/entrypoints/serve/disagg/test_serving_multimodal_tokens.py
new file mode 100644
index 000000000000..e13dd425af14
--- /dev/null
+++ b/tests/entrypoints/serve/disagg/test_serving_multimodal_tokens.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for multimodal features through the /inference/v1/generate endpoint.
+
+Mirrors test_serving_tokens.py but exercises the multimodal piping
+using Qwen/Qwen3-VL-2B-Instruct end-to-end via the server's /render ->
+/generate -> /detokenize path. Intentionally avoids running the HF
+processor in the pytest parent process to keep os.fork() in sibling
+tests (e.g. test_weight_transfer_llm.py) deadlock-free.
+"""
+
+import os
+
+import httpx
+import pytest
+import pytest_asyncio
+from PIL import Image
+
+from tests.utils import RemoteOpenAIServer
+from vllm.multimodal.utils import encode_image_url
+
+MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
+GEN_ENDPOINT = "/inference/v1/generate"
+RENDER_ENDPOINT = "/v1/chat/completions/render"
+DETOKENIZE_ENDPOINT = "/detokenize"
+
+
+@pytest.fixture(scope="module")
+def test_image():
+    return Image.new("RGB", (224, 224), color=(255, 0, 0))
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "4096",
+        "--enforce-eager",
+        "--no-enable-prefix-caching",
+    ]
+
+    envs = os.environ.copy()
+    envs["VLLM_ROCM_USE_SKINNY_GEMM"] = "0"
+
+    with RemoteOpenAIServer(MODEL_NAME, args, env_dict=envs) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server: RemoteOpenAIServer):
+    transport = httpx.AsyncHTTPTransport(uds=server.uds) if server.uds else None
+    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
+    async with httpx.AsyncClient(
+        transport=transport,
+        base_url=server.url_root,
+        timeout=600,
+        headers=headers,
+    ) as c:
+        yield c
+
+
+@pytest.mark.asyncio
+async def test_render_to_generate_roundtrip(client, test_image):
+    """End-to-end: render a multimodal chat -> feed into generate -> decode.
+
+    All preprocessing and detokenization happens in the server subprocess;
+    the pytest parent never imports transformers or touches torch tensors.
+    """
+    data_url = encode_image_url(test_image, format="PNG")
+
+    render_payload = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {
+                        "type": "text",
+                        "text": "What color is this image? Answer in one word.",
+                    },
+                ],
+            }
+        ],
+    }
+
+    render_resp = await client.post(RENDER_ENDPOINT, json=render_payload)
+    render_resp.raise_for_status()
+    render_data = render_resp.json()
+
+    # Validate render output structure: keys exist and values are non-empty
+    # and well-typed.
+    assert "token_ids" in render_data
+    assert isinstance(render_data["token_ids"], list)
+    assert len(render_data["token_ids"]) > 0
+    assert all(isinstance(t, int) for t in render_data["token_ids"])
+
+    assert "features" in render_data
+    features = render_data["features"]
+    assert features is not None
+    assert isinstance(features, dict)
+
+    assert "mm_hashes" in features
+    assert "image" in features["mm_hashes"]
+    image_hashes = features["mm_hashes"]["image"]
+    assert isinstance(image_hashes, list)
+    assert len(image_hashes) > 0
+    assert all(isinstance(h, str) and h for h in image_hashes)
+
+    assert "mm_placeholders" in features
+    assert "image" in features["mm_placeholders"]
+    image_placeholders = features["mm_placeholders"]["image"]
+    assert isinstance(image_placeholders, list)
+    assert len(image_placeholders) > 0
+    for p in image_placeholders:
+        assert isinstance(p.get("offset"), int)
+        assert isinstance(p.get("length"), int)
+        assert p["length"] > 0
+
+    assert "kwargs_data" in features
+    assert "image" in features["kwargs_data"]
+    assert len(features["kwargs_data"]["image"]) > 0
+
+    # Build generate request from render output
+    generate_payload = render_data
+    generate_payload["sampling_params"] = {
+        "max_tokens": 10,
+        "temperature": 0.0,
+    }
+
+    gen_resp = await client.post(GEN_ENDPOINT, json=generate_payload)
+    gen_resp.raise_for_status()
+    gen_data = gen_resp.json()
+
+    assert "choices" in gen_data
+    assert isinstance(gen_data["choices"], list)
+    assert len(gen_data["choices"]) >= 1
+    choice = gen_data["choices"][0]
+    assert "token_ids" in choice
+    assert isinstance(choice["token_ids"], list)
+    assert len(choice["token_ids"]) > 0
+    assert all(isinstance(t, int) for t in choice["token_ids"])
+
+    detok_resp = await client.post(
+        DETOKENIZE_ENDPOINT,
+        json={"model": MODEL_NAME, "tokens": choice["token_ids"]},
+    )
+    detok_resp.raise_for_status()
+    detok_data = detok_resp.json()
+    assert "prompt" in detok_data
+    text = detok_data["prompt"]
+    assert isinstance(text, str)
+    assert len(text) > 0
+    assert "red" in text.lower(), (
+        f"Expected model to identify the red image, got: {text!r}"
+    )
diff --git a/tests/entrypoints/serve/disagg/test_serving_tokens.py b/tests/entrypoints/serve/disagg/test_serving_tokens.py
index b62cb01bb45b..3425bdef63f3 100644
--- a/tests/entrypoints/serve/disagg/test_serving_tokens.py
+++ b/tests/entrypoints/serve/disagg/test_serving_tokens.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import json
 import os
 
 import httpx
@@ -113,6 +114,84 @@ async def test_generate_endpoint(client):
     assert "choices" in data
 
 
+@pytest.mark.asyncio
+async def test_generate_defaults_max_tokens_when_omitted(client):
+    """Regression: omitting ``max_tokens`` must not silently cap at 16.
+
+    ``SamplingParams.max_tokens`` defaults to 16. Before the server-side
+    defaulting was wired up, every request that didn't set ``max_tokens``
+    truncated mid-generation. The server should now fill it in from
+    ``max_model_len - prompt_len`` (matching ``/v1/chat/completions``).
+    """
+    payload = {
+        "model": MODEL_NAME,
+        "token_ids": [1, 2, 3],
+        "sampling_params": {
+            "temperature": 0.0,
+            "ignore_eos": True,
+        },
+        "stream": False,
+    }
+    resp = await client.post(GEN_ENDPOINT, json=payload)
+    resp.raise_for_status()
+    data = resp.json()
+    completion_tokens = len(data["choices"][0]["token_ids"])
+    # max_model_len=1024 in the test fixture, prompt is 3 tokens, so we
+    # should get ~1021 tokens of output (capped at max_model_len boundary).
+    assert completion_tokens > 16, (
+        f"expected server-side default to exceed the legacy 16-token cap, "
+        f"got {completion_tokens}"
+    )
+
+
+@pytest.mark.asyncio
+async def test_generate_stream(client):
+    payload = {
+        "model": MODEL_NAME,
+        "token_ids": [1, 2, 3],
+        "sampling_params": {"max_tokens": 5},
+        "stream": True,
+    }
+    async with client.stream("POST", GEN_ENDPOINT, json=payload) as resp:
+        resp.raise_for_status()
+        chunks = []
+        async for line in resp.aiter_lines():
+            if not line.startswith("data: "):
+                continue
+            payload_str = line[len("data: ") :]
+            if payload_str == "[DONE]":
+                break
+            chunks.append(json.loads(payload_str))
+
+    assert len(chunks) > 0
+    # Every chunk has choices with token_ids
+    all_token_ids = []
+    for chunk in chunks:
+        assert "choices" in chunk
+        assert len(chunk["choices"]) == 1
+        choice = chunk["choices"][0]
+        assert "token_ids" in choice
+        assert len(choice["token_ids"]) > 0
+        all_token_ids.extend(choice["token_ids"])
+
+    # Last chunk should have a finish_reason
+    assert chunks[-1]["choices"][0]["finish_reason"] is not None
+
+    # Streaming should produce the same tokens as non-streaming
+    non_stream_resp = await client.post(
+        GEN_ENDPOINT,
+        json={
+            "model": MODEL_NAME,
+            "token_ids": [1, 2, 3],
+            "sampling_params": {"max_tokens": 5, "temperature": 0.0},
+            "stream": False,
+        },
+    )
+    non_stream_data = non_stream_resp.json()
+    # Just verify we got the right number of tokens
+    assert len(all_token_ids) == len(non_stream_data["choices"][0]["token_ids"])
+
+
 @pytest.mark.asyncio
 @pytest.mark.parametrize("logprobs_value", [0, 1, 5])
 async def test_generate_logprobs(client, logprobs_value):
diff --git a/tests/entrypoints/serve/disagg/test_tokens_logprobs.py b/tests/entrypoints/serve/disagg/test_tokens_logprobs.py
new file mode 100644
index 000000000000..844dd24d5418
--- /dev/null
+++ b/tests/entrypoints/serve/disagg/test_tokens_logprobs.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.entrypoints.serve.disagg.serving import ServingTokens
+from vllm.logprobs import Logprob
+
+
+def test_top_logprobs_alternatives_have_own_token_ids():
+    """Each top_logprobs alternative must carry its own token_id placeholder."""
+    result = ServingTokens._create_tokens_logprobs(
+        None,
+        token_ids=[262],
+        top_logprobs=[{262: Logprob(-0.1), 257: Logprob(-1.2), 428: Logprob(-2.3)}],
+        num_output_top_logprobs=2,
+    )
+    tokens = {e.token for e in result.content[0].top_logprobs}
+    assert tokens == {"token_id:262", "token_id:257"}, f"got {tokens}"
+
+
+def test_logprobs_zero_emits_sampled_token():
+    """logprobs=0 must still emit 1 entry (the sampled token)."""
+    result = ServingTokens._create_tokens_logprobs(
+        None,
+        token_ids=[7],
+        top_logprobs=[{7: Logprob(-0.9), 8: Logprob(-1.1)}],
+        num_output_top_logprobs=0,
+    )
+    assert len(result.content[0].top_logprobs) == 1
diff --git a/tests/entrypoints/serve/instrumentator/test_metrics.py b/tests/entrypoints/serve/instrumentator/test_metrics.py
index ba4e65977c70..9095f80e20f2 100644
--- a/tests/entrypoints/serve/instrumentator/test_metrics.py
+++ b/tests/entrypoints/serve/instrumentator/test_metrics.py
@@ -182,6 +182,7 @@ async def test_metrics_counts(
 EXPECTED_METRICS_V1 = [
     "vllm:num_requests_running",
     "vllm:num_requests_waiting",
+    "vllm:num_requests_waiting_by_reason",
     "vllm:kv_cache_usage_perc",
     "vllm:prefix_cache_queries",
     "vllm:prefix_cache_hits",
diff --git a/tests/entrypoints/serve/instrumentator/test_optional_middleware.py b/tests/entrypoints/serve/instrumentator/test_optional_middleware.py
index fef10cdc0cdf..865bd445c812 100644
--- a/tests/entrypoints/serve/instrumentator/test_optional_middleware.py
+++ b/tests/entrypoints/serve/instrumentator/test_optional_middleware.py
@@ -86,13 +86,56 @@ async def test_passed_api_token(server: RemoteOpenAIServer):
     indirect=True,
 )
 @pytest.mark.asyncio
-async def test_not_v1_api_token(server: RemoteOpenAIServer):
-    # Authorization check is skipped for any paths that
-    # don't start with /v1 (e.g. /v1/chat/completions).
+async def test_not_v1_or_v2_path_skips_auth(server: RemoteOpenAIServer):
+    # Authorization check is skipped for paths that
+    # don't start with /v1 or /v2 (e.g. /health, /metrics).
     response = requests.get(server.url_for("health"))
     assert response.status_code == HTTPStatus.OK
 
 
+# ---------------------------------------------------------------------------
+# /v2 path authentication tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "server",
+    [["--api-key", "test"]],
+    indirect=True,
+)
+@pytest.mark.asyncio
+async def test_v2_endpoint_rejects_missing_api_token(server: RemoteOpenAIServer):
+    # /v2/embed should require authentication when --api-key is set.
+    body = {
+        "model": MODEL_NAME,
+        "texts": ["hello"],
+        "embedding_types": ["float"],
+    }
+    response = requests.post(server.url_for("/v2/embed"), json=body)
+    assert response.status_code == HTTPStatus.UNAUTHORIZED
+
+
+@pytest.mark.parametrize(
+    "server",
+    [["--api-key", "test"]],
+    indirect=True,
+)
+@pytest.mark.asyncio
+async def test_v2_endpoint_accepts_valid_api_token(server: RemoteOpenAIServer):
+    # /v2/embed should accept requests with a valid API key.
+    body = {
+        "model": MODEL_NAME,
+        "texts": ["hello"],
+        "embedding_types": ["float"],
+    }
+    response = requests.post(
+        server.url_for("/v2/embed"),
+        json=body,
+        headers={"Authorization": "Bearer test"},
+    )
+    assert response.status_code == HTTPStatus.OK
+
+
 @pytest.mark.parametrize(
     "server",
     ["--enable-request-id-headers"],
diff --git a/tests/entrypoints/serve/lora/test_serving_models.py b/tests/entrypoints/serve/lora/test_serving_models.py
index f6755f489343..ce9fdcc2bfb2 100644
--- a/tests/entrypoints/serve/lora/test_serving_models.py
+++ b/tests/entrypoints/serve/lora/test_serving_models.py
@@ -34,7 +34,6 @@ async def _async_serving_models_init() -> OpenAIServingModels:
     mock_model_config.max_model_len = 2048
     mock_engine_client.model_config = mock_model_config
     mock_engine_client.input_processor = MagicMock()
-    mock_engine_client.io_processor = MagicMock()
     mock_engine_client.renderer = MagicMock()
 
     serving_models = OpenAIServingModels(
diff --git a/tests/entrypoints/serve/tokenize/test_serving_tokenization.py b/tests/entrypoints/serve/tokenize/test_serving_tokenization.py
new file mode 100644
index 000000000000..ba9d7989a865
--- /dev/null
+++ b/tests/entrypoints/serve/tokenize/test_serving_tokenization.py
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import dataclass, field
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from vllm.config.multimodal import MultiModalConfig
+from vllm.entrypoints.openai.models.protocol import BaseModelPath
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.entrypoints.serve.render.serving import OpenAIServingRender
+from vllm.entrypoints.serve.tokenize.protocol import (
+    TokenizeChatRequest,
+    TokenizeCompletionRequest,
+)
+from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
+from vllm.v1.engine.async_llm import AsyncLLM
+
+MODEL_NAME = "openai-community/gpt2"
+BASE_MODEL_PATHS = [
+    BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME),
+]
+
+
+@dataclass
+class MockHFConfig:
+    model_type: str = "any"
+
+
+@dataclass
+class MockModelConfig:
+    task = "generate"
+    runner_type = "generate"
+    model = MODEL_NAME
+    tokenizer = MODEL_NAME
+    trust_remote_code = False
+    tokenizer_mode = "auto"
+    max_model_len = 100
+    tokenizer_revision = None
+    multimodal_config = MultiModalConfig()
+    hf_config = MockHFConfig()
+    hf_text_config = MockHFConfig()
+    logits_processors: list[str] | None = None
+    diff_sampling_param: dict | None = None
+    allowed_local_media_path: str = ""
+    allowed_media_domains: list[str] | None = None
+    encoder_config = None
+    generation_config: str = "auto"
+    media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
+    skip_tokenizer_init = False
+    is_encoder_decoder: bool = False
+    is_multimodal_model: bool = False
+    renderer_num_workers: int = 1
+
+    def get_diff_sampling_param(self):
+        return self.diff_sampling_param or {}
+
+
+def _build_serving_tokenization(engine: AsyncLLM) -> OpenAIServingTokenization:
+    models = OpenAIServingModels(
+        engine_client=engine,
+        base_model_paths=BASE_MODEL_PATHS,
+    )
+    serving_render = OpenAIServingRender(
+        model_config=engine.model_config,
+        renderer=engine.renderer,
+        model_registry=models.registry,
+        request_logger=None,
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+    return OpenAIServingTokenization(
+        engine,
+        models,
+        openai_serving_render=serving_render,
+        request_logger=None,
+        chat_template=None,
+        chat_template_content_format="auto",
+    )
+
+
+@pytest.mark.asyncio
+async def test_tokenize_chat_skips_mm_cache_for_renderer_only_path():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = MagicMock()
+
+    serving = _build_serving_tokenization(mock_engine)
+    serving.openai_serving_render.preprocess_chat = AsyncMock(
+        return_value=(
+            [{"role": "user", "content": "Test"}],
+            [{"prompt_token_ids": [1, 2, 3]}],
+        )
+    )
+
+    request = TokenizeChatRequest(
+        model=MODEL_NAME,
+        messages=[{"role": "user", "content": "Test prompt"}],
+    )
+
+    response = await serving.create_tokenize(request, MagicMock(headers={}))
+
+    assert response.tokens == [1, 2, 3]
+    assert (
+        serving.openai_serving_render.preprocess_chat.call_args.kwargs["skip_mm_cache"]
+        is True
+    )
+
+
+@pytest.mark.asyncio
+async def test_tokenize_completion_skips_mm_cache_for_renderer_only_path():
+    mock_engine = MagicMock(spec=AsyncLLM)
+    mock_engine.errored = False
+    mock_engine.model_config = MockModelConfig()
+    mock_engine.input_processor = MagicMock()
+    mock_engine.renderer = MagicMock()
+
+    serving = _build_serving_tokenization(mock_engine)
+    serving.openai_serving_render.preprocess_completion = AsyncMock(
+        return_value=[{"prompt_token_ids": [1, 2, 3]}]
+    )
+
+    request = TokenizeCompletionRequest(
+        model=MODEL_NAME,
+        prompt="Test prompt",
+    )
+
+    response = await serving.create_tokenize(request, MagicMock(headers={}))
+
+    assert response.tokens == [1, 2, 3]
+    assert (
+        serving.openai_serving_render.preprocess_completion.call_args.kwargs[
+            "skip_mm_cache"
+        ]
+        is True
+    )
diff --git a/tests/entrypoints/serve/tokenize/test_tokenization.py b/tests/entrypoints/serve/tokenize/test_tokenization.py
index 5fe83db81c3a..e0a70cfd671c 100644
--- a/tests/entrypoints/serve/tokenize/test_tokenization.py
+++ b/tests/entrypoints/serve/tokenize/test_tokenization.py
@@ -9,7 +9,7 @@
 from vllm.tokenizers import get_tokenizer
 
 # any model with a chat template should work here
-MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/entrypoints/serve/tokenize/test_tokenize_then_chat_vlm.py b/tests/entrypoints/serve/tokenize/test_tokenize_then_chat_vlm.py
new file mode 100644
index 000000000000..50083e345538
--- /dev/null
+++ b/tests/entrypoints/serve/tokenize/test_tokenize_then_chat_vlm.py
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Regression test: calling ``/tokenize`` with multimodal data followed by
+``/v1/chat/completions`` with the same data must not cause an error.
+
+Ensures that the ``/tokenize`` endpoint does not pollute internal caches
+(e.g. multimodal feature caches) and that a subsequent
+``/v1/chat/completions`` request with the same multimodal payload
+completes successfully.
+"""
+
+import json
+
+import openai
+import pytest
+import pytest_asyncio
+import requests
+
+from tests.utils import RemoteOpenAIServer
+
+MODEL_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--dtype",
+        "bfloat16",
+        "--max-model-len",
+        "4096",
+        "--max-num-seqs",
+        "5",
+        "--enforce-eager",
+        "--limit-mm-per-prompt",
+        json.dumps({"image": 1}),
+    ]
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+async def test_tokenize_then_chat_completion_with_image(
+    client: openai.AsyncOpenAI,
+    server: RemoteOpenAIServer,
+    local_asset_server,
+):
+    """Tokenize a multimodal message, then send the same message to chat
+    completions.  The chat completion must succeed (not 500)."""
+
+    image_url = local_asset_server.url_for("stop_sign.jpg")
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "Describe this image briefly."},
+            ],
+        }
+    ]
+
+    tok_resp = requests.post(
+        server.url_for("tokenize"),
+        json={"model": MODEL_NAME, "messages": messages},
+    )
+    tok_resp.raise_for_status()
+    tok_data = tok_resp.json()
+    assert tok_data["count"] > 0, "Tokenization must return tokens"
+
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=10,
+        temperature=0.0,
+    )
+
+    assert chat_completion.choices[0].message.content, (
+        "Chat completion must produce non-empty content after tokenize"
+    )
diff --git a/tests/entrypoints/speech_to_text/__init__.py b/tests/entrypoints/speech_to_text/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/entrypoints/openai/conftest.py b/tests/entrypoints/speech_to_text/conftest.py
similarity index 100%
rename from tests/entrypoints/openai/conftest.py
rename to tests/entrypoints/speech_to_text/conftest.py
diff --git a/tests/entrypoints/speech_to_text/correctness/__init__.py b/tests/entrypoints/speech_to_text/correctness/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py b/tests/entrypoints/speech_to_text/correctness/test_transcription_api_correctness.py
similarity index 93%
rename from tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
rename to tests/entrypoints/speech_to_text/correctness/test_transcription_api_correctness.py
index 194c52eae35e..fedbd74795b5 100644
--- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
+++ b/tests/entrypoints/speech_to_text/correctness/test_transcription_api_correctness.py
@@ -13,7 +13,6 @@
 import time
 from statistics import mean, median
 
-import librosa
 import pytest
 import soundfile
 import torch
@@ -21,11 +20,16 @@
 from evaluate import load
 from transformers.models.whisper.english_normalizer import EnglishTextNormalizer
 
+from vllm.multimodal.audio import get_audio_duration
 from vllm.tokenizers import get_tokenizer
 
 from ....models.registry import HF_EXAMPLE_MODELS
 from ....utils import RemoteOpenAIServer
 
+# Tuned to prevent OOM on 18GB GPUs in transcription correctness tests.
+MAX_SEQS_FOR_TRANSCRIPTION_TEST = 8
+GPU_UTIL_FOR_TRANSCRIPTION_TEST = 0.5
+
 
 def to_bytes(y, sr):
     buffer = io.BytesIO()
@@ -84,7 +88,7 @@ async def process_dataset(model, client, data, concurrent_request):
         trust_remote_code=model_info.trust_remote_code,
     )
 
-    # Warmup call as the first `librosa.load` server-side is quite slow.
+    # Warmup call as the first `load_audio` server-side is quite slow.
     audio, sr = data[0]["audio"]["array"], data[0]["audio"]["sampling_rate"]
     _ = await bound_transcribe(sem, client, tokenizer, (audio, sr), "")
 
@@ -118,7 +122,7 @@ def print_performance_metrics(results, total_time):
 
 def add_duration(sample):
     y, sr = sample["audio"]["array"], sample["audio"]["sampling_rate"]
-    sample["duration_ms"] = librosa.get_duration(y=y, sr=sr) * 1000
+    sample["duration_ms"] = get_audio_duration(y=y, sr=sr) * 1000
     return sample
 
 
@@ -167,9 +171,8 @@ def run_evaluation(
     "model_config",
     [
         ("openai/whisper-large-v3", 12.744980),
-        # TODO (ekagra): turn on after asr release
         # CohereASR is used to test the variable encoder length code paths
-        # ("CohereLabs/cohere-transcribe-03-2026", 11.92),
+        ("CohereLabs/cohere-transcribe-03-2026", 11.92),
     ],
 )
 # Original dataset is 20GB+ in size, hence we use a pre-filtered slice.
@@ -185,6 +188,8 @@ def test_wer_correctness(
     server_args = [
         "--enforce-eager",
         f"--tokenizer_mode={model_info.tokenizer_mode}",
+        f"--max_num_seqs={MAX_SEQS_FOR_TRANSCRIPTION_TEST}",
+        f"--gpu_memory_utilization={GPU_UTIL_FOR_TRANSCRIPTION_TEST}",
     ]
     if model_info.trust_remote_code:
         server_args.append("--trust-remote-code")
diff --git a/tests/entrypoints/speech_to_text/realtime/__init__.py b/tests/entrypoints/speech_to_text/realtime/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/entrypoints/openai/realtime/test_realtime_validation.py b/tests/entrypoints/speech_to_text/realtime/test_realtime_validation.py
similarity index 96%
rename from tests/entrypoints/openai/realtime/test_realtime_validation.py
rename to tests/entrypoints/speech_to_text/realtime/test_realtime_validation.py
index bb6b02f5c99e..675922f58840 100644
--- a/tests/entrypoints/openai/realtime/test_realtime_validation.py
+++ b/tests/entrypoints/speech_to_text/realtime/test_realtime_validation.py
@@ -5,15 +5,15 @@
 import json
 import warnings
 
-import librosa
 import numpy as np
 import pybase64 as base64
 import pytest
 import websockets
 
-from tests.entrypoints.openai.conftest import add_attention_backend
+from tests.entrypoints.speech_to_text.conftest import add_attention_backend
 from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
 from vllm.assets.audio import AudioAsset
+from vllm.multimodal.media.audio import load_audio
 
 # Increase engine iteration timeout for ROCm where first-use JIT compilation
 # can exceed the default 60s, causing a silent deadlock in feed_tokens.
@@ -56,7 +56,7 @@ async def send_event(ws, event: dict) -> None:
 def mary_had_lamb_audio_chunks() -> list[str]:
     """Audio split into ~1 second chunks for streaming."""
     path = AudioAsset("mary_had_lamb").get_local_path()
-    audio, _ = librosa.load(str(path), sr=16000, mono=True)
+    audio, _ = load_audio(str(path), sr=16000, mono=True)
 
     # Split into ~0.1 second chunks (1600 samples at 16kHz)
     chunk_size = 1600
@@ -163,6 +163,11 @@ async def test_multi_chunk_streaming(
                 " A little piece of practical poetry. Mary had a little lamb,"
                 " it sleeps with quite a flow, and everywhere that Mary went,"
                 " the lamb was sure to go."
+            ) or full_text == (
+                " First words I spoke in the original phonograph."
+                " A little piece of practical poetry. Mary had a little lamb,"
+                " it squeaked with quite a flow, and everywhere that Mary went,"
+                " the lamb was sure to go."
             )
 
 
diff --git a/tests/entrypoints/speech_to_text/test_speech_to_text_cancellation.py b/tests/entrypoints/speech_to_text/test_speech_to_text_cancellation.py
new file mode 100644
index 000000000000..08553c641103
--- /dev/null
+++ b/tests/entrypoints/speech_to_text/test_speech_to_text_cancellation.py
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+
+from vllm.entrypoints.speech_to_text.base.serving import OpenAISpeechToText
+from vllm.entrypoints.speech_to_text.transcription.protocol import TranscriptionResponse
+
+
+async def _never_finishes():
+    await asyncio.Event().wait()
+    yield
+
+
+async def _records_start_then_never_finishes(started_request_ids, request_id):
+    started_request_ids.append(request_id)
+    await asyncio.Event().wait()
+    yield
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("engine_inputs", "expected_request_ids"),
+    [
+        ([{"prompt": "chunk"}], ["transcribe-outer-request"]),
+        (
+            [{"prompt": "chunk-0"}, {"prompt": "chunk-1"}],
+            ["transcribe-outer-request-0", "transcribe-outer-request-1"],
+        ),
+    ],
+)
+async def test_non_streaming_cancel_aborts_engine_requests(
+    engine_inputs, expected_request_ids
+):
+    engine_client = SimpleNamespace(
+        errored=False,
+        generate=Mock(side_effect=lambda *_args, **_kwargs: _never_finishes()),
+        abort=AsyncMock(),
+        is_tracing_enabled=AsyncMock(return_value=False),
+    )
+
+    server = OpenAISpeechToText.__new__(OpenAISpeechToText)
+    server.engine_client = engine_client
+    server.task_type = "transcribe"
+    server.models = SimpleNamespace(model_name=lambda: "audio")
+    server.model_config = SimpleNamespace(max_model_len=1024)
+    server.model_cls = SimpleNamespace(no_space_languages=set())
+    server.default_sampling_params = {}
+    server.asr_config = SimpleNamespace(max_audio_clip_s=30)
+    server._check_model = AsyncMock(return_value=None)
+    server._maybe_get_adapters = Mock(return_value=None)
+    server._preprocess_speech_to_text = AsyncMock(return_value=(engine_inputs, 40.0))
+    server._log_inputs = Mock()
+
+    request = SimpleNamespace(
+        model="audio",
+        response_format="json",
+        stream=False,
+        use_beam_search=False,
+        max_completion_tokens=None,
+        language="en",
+        prompt="",
+        to_sampling_params=Mock(return_value=object()),
+    )
+    raw_request = SimpleNamespace(
+        headers={"X-Request-Id": "outer-request"},
+        state=SimpleNamespace(),
+    )
+
+    task = asyncio.create_task(
+        server._create_speech_to_text(
+            audio_data=b"audio",
+            request=request,
+            raw_request=raw_request,
+            response_class=TranscriptionResponse,
+            stream_generator_method=Mock(),
+        )
+    )
+    await asyncio.sleep(0)
+
+    task.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        await task
+
+    generated_request_ids = [
+        call.args[2] for call in engine_client.generate.call_args_list
+    ]
+    assert generated_request_ids == expected_request_ids
+    engine_client.abort.assert_awaited_once_with(expected_request_ids)
+
+
+@pytest.mark.asyncio
+async def test_non_streaming_cancel_advances_all_chunk_generators():
+    started_request_ids: list[str] = []
+    engine_client = SimpleNamespace(
+        errored=False,
+        generate=Mock(
+            side_effect=lambda *_args, **_kwargs: (
+                _records_start_then_never_finishes(started_request_ids, _args[2])
+            )
+        ),
+        abort=AsyncMock(),
+        is_tracing_enabled=AsyncMock(return_value=False),
+    )
+
+    engine_inputs = [
+        {"prompt": "chunk-0"},
+        {"prompt": "chunk-1"},
+        {"prompt": "chunk-2"},
+    ]
+    server = OpenAISpeechToText.__new__(OpenAISpeechToText)
+    server.engine_client = engine_client
+    server.task_type = "transcribe"
+    server.models = SimpleNamespace(model_name=lambda: "audio")
+    server.model_config = SimpleNamespace(max_model_len=1024)
+    server.model_cls = SimpleNamespace(no_space_languages=set())
+    server.default_sampling_params = {}
+    server.asr_config = SimpleNamespace(max_audio_clip_s=30)
+    server._check_model = AsyncMock(return_value=None)
+    server._maybe_get_adapters = Mock(return_value=None)
+    server._preprocess_speech_to_text = AsyncMock(return_value=(engine_inputs, 90.0))
+    server._log_inputs = Mock()
+
+    request = SimpleNamespace(
+        model="audio",
+        response_format="json",
+        stream=False,
+        use_beam_search=False,
+        max_completion_tokens=None,
+        language="en",
+        prompt="",
+        to_sampling_params=Mock(return_value=object()),
+    )
+    raw_request = SimpleNamespace(
+        headers={"X-Request-Id": "outer-request"},
+        state=SimpleNamespace(),
+    )
+
+    task = asyncio.create_task(
+        server._create_speech_to_text(
+            audio_data=b"audio",
+            request=request,
+            raw_request=raw_request,
+            response_class=TranscriptionResponse,
+            stream_generator_method=Mock(),
+        )
+    )
+    await asyncio.sleep(0.01)
+
+    expected_request_ids = [
+        "transcribe-outer-request-0",
+        "transcribe-outer-request-1",
+        "transcribe-outer-request-2",
+    ]
+    assert set(started_request_ids) == set(expected_request_ids)
+
+    task.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        await task
+
+
+@pytest.mark.asyncio
+async def test_language_detection_cancel_aborts_engine_request():
+    engine_client = SimpleNamespace(
+        generate=Mock(return_value=_never_finishes()),
+        abort=AsyncMock(),
+    )
+
+    server = OpenAISpeechToText.__new__(OpenAISpeechToText)
+    server.engine_client = engine_client
+    server.asr_config = SimpleNamespace()
+    server.tokenizer = Mock()
+    server.model_cls = SimpleNamespace(
+        get_language_detection_prompt=Mock(return_value={"prompt": "detect"}),
+        get_language_token_ids=Mock(return_value=[1]),
+        parse_language_detection_output=Mock(),
+    )
+
+    request_id = "transcribe-outer-request-lang_detect"
+    task = asyncio.create_task(server._detect_language(Mock(), request_id))
+    await asyncio.sleep(0)
+
+    task.cancel()
+    with pytest.raises(asyncio.CancelledError):
+        await task
+
+    engine_client.abort.assert_awaited_once_with(request_id)
diff --git a/tests/entrypoints/speech_to_text/transcription/__init__.py b/tests/entrypoints/speech_to_text/transcription/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/entrypoints/speech_to_text/transcription/test_enable_force_include_usage.py b/tests/entrypoints/speech_to_text/transcription/test_enable_force_include_usage.py
new file mode 100644
index 000000000000..0477e7082bc7
--- /dev/null
+++ b/tests/entrypoints/speech_to_text/transcription/test_enable_force_include_usage.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import pytest_asyncio
+
+from tests.utils import RemoteOpenAIServer
+
+
+@pytest.fixture(scope="module")
+def transcription_server_with_force_include_usage():
+    args = [
+        # use half precision for speed and memory savings in CI environment
+        "--dtype",
+        "bfloat16",
+        "--max-num-seqs",
+        "4",
+        "--enforce-eager",
+        "--enable-force-include-usage",
+        "--gpu-memory-utilization",
+        "0.2",
+    ]
+
+    with RemoteOpenAIServer("openai/whisper-large-v3-turbo", args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def transcription_client_with_force_include_usage(
+    transcription_server_with_force_include_usage,
+):
+    async with (
+        transcription_server_with_force_include_usage.get_async_client() as async_client
+    ):
+        yield async_client
+
+
+@pytest.mark.asyncio
+async def test_transcription_with_enable_force_include_usage(
+    transcription_client_with_force_include_usage, winning_call
+):
+    res = (
+        await transcription_client_with_force_include_usage.audio.transcriptions.create(
+            model="openai/whisper-large-v3-turbo",
+            file=winning_call,
+            language="en",
+            temperature=0.0,
+            stream=True,
+            timeout=30,
+        )
+    )
+
+    async for chunk in res:
+        if not len(chunk.choices):
+            # final usage sent
+            usage = chunk.usage
+            assert isinstance(usage, dict)
+            assert usage["prompt_tokens"] > 0
+            assert usage["completion_tokens"] > 0
+            assert usage["total_tokens"] > 0
+        else:
+            assert not hasattr(chunk, "usage")
diff --git a/tests/entrypoints/speech_to_text/transcription/test_transcription_inter_chunk_spacing.py b/tests/entrypoints/speech_to_text/transcription/test_transcription_inter_chunk_spacing.py
new file mode 100644
index 000000000000..c4da9a80f7ab
--- /dev/null
+++ b/tests/entrypoints/speech_to_text/transcription/test_transcription_inter_chunk_spacing.py
@@ -0,0 +1,273 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""ASR inter-chunk spacing: ``asr_inter_chunk_separator`` and transcription
+serving (mocked).
+
+Unit tests cover the helper and ``SupportsTranscription.no_space_languages``.
+Integration-style tests exercise ``OpenAIServingTranscription`` streaming and
+``create_transcription`` without loading a model.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import AsyncGenerator
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from vllm.config import ModelConfig
+from vllm.config.speech_to_text import SpeechToTextConfig
+from vllm.entrypoints.openai.engine.protocol import (
+    ErrorResponse,
+    RequestResponseMetadata,
+)
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.entrypoints.speech_to_text.base.serving import (
+    OpenAISpeechToText,
+    asr_inter_chunk_separator,
+)
+from vllm.entrypoints.speech_to_text.transcription.protocol import TranscriptionRequest
+from vllm.entrypoints.speech_to_text.transcription.serving import (
+    OpenAIServingTranscription,
+)
+from vllm.model_executor.models.interfaces import SupportsTranscription
+from vllm.outputs import CompletionOutput, RequestOutput
+
+# --- Unit: helper + protocol -------------------------------------------------
+
+
+def test_default_no_space_languages_includes_zh_and_ja():
+    assert SupportsTranscription.no_space_languages == {"ja", "zh"}
+
+
+@pytest.mark.parametrize(
+    ("language", "expected_sep"),
+    [
+        ("en", " "),
+        ("EN", " "),
+        ("zh", ""),
+        ("ZH", ""),
+        ("ja", ""),
+        (None, " "),
+    ],
+)
+def test_asr_inter_chunk_separator_matches_protocol(language, expected_sep):
+    sep = asr_inter_chunk_separator(language, SupportsTranscription.no_space_languages)
+    assert sep == expected_sep
+
+
+def test_joined_chunks_english_has_space_between():
+    sep = asr_inter_chunk_separator("en", SupportsTranscription.no_space_languages)
+    assert sep.join(["hello", "world"]) == "hello world"
+
+
+def test_joined_chunks_chinese_has_no_space_between():
+    sep = asr_inter_chunk_separator("zh", SupportsTranscription.no_space_languages)
+    assert sep.join(["你好", "世界"]) == "你好世界"
+
+
+# --- Integration: serving (no model) -----------------------------------------
+
+
+class _StubTranscriptionModel:
+    """Minimal stand-in for a SupportsTranscription implementation (no torch)."""
+
+    no_space_languages: set[str] = {"ja", "zh"}
+    supports_segment_timestamp = False
+
+    @classmethod
+    def get_speech_to_text_config(
+        cls, model_config: ModelConfig, task_type: str
+    ) -> SpeechToTextConfig:
+        return SpeechToTextConfig(
+            sample_rate=16000.0,
+            max_audio_clip_s=5.0,
+        )
+
+    @classmethod
+    def post_process_output(cls, text: str) -> str:
+        return text
+
+
+def _request_output(text: str) -> RequestOutput:
+    return RequestOutput(
+        request_id="rid",
+        prompt=None,
+        prompt_token_ids=None,
+        prompt_logprobs=None,
+        outputs=[
+            CompletionOutput(
+                index=0,
+                text=text,
+                token_ids=(1, 2, 3),
+                cumulative_logprob=None,
+                logprobs=None,
+                finish_reason="stop",
+            )
+        ],
+        finished=True,
+    )
+
+
+def _sse_delta_contents(sse_body: str) -> list[str]:
+    """Extract ``choices[0].delta.content`` from each ``data:`` line (streaming API)."""
+    contents: list[str] = []
+    for line in sse_body.splitlines():
+        if not line.startswith("data: "):
+            continue
+        payload = line.removeprefix("data: ").strip()
+        if payload == "[DONE]":
+            continue
+        obj = json.loads(payload)
+        for choice in obj.get("choices") or []:
+            delta = choice.get("delta") or {}
+            if "content" in delta:
+                contents.append(delta["content"])
+    return contents
+
+
+@pytest.mark.asyncio
+async def test_transcription_stream_generator_english_inserts_space_between_chunks():
+    """Online streaming: first output per audio chunk is prefixed with *separator*."""
+
+    async def gen_hello() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("hello")
+
+    async def gen_world() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("world")
+
+    serving = OpenAIServingTranscription.__new__(OpenAIServingTranscription)
+    serving.enable_force_include_usage = False
+    serving.model_cls = _StubTranscriptionModel
+    serving.task_type = "transcribe"
+    request = SimpleNamespace(
+        model="stub-model",
+        stream_include_usage=False,
+        stream_continuous_usage_stats=False,
+    )
+    sep = asr_inter_chunk_separator("en", _StubTranscriptionModel.no_space_languages)
+    assert sep == " "
+
+    out_lines: list[str] = []
+    agen = OpenAIServingTranscription.transcription_stream_generator(
+        serving,
+        request=request,
+        result_generator=[gen_hello(), gen_world()],
+        request_id="test-req",
+        request_metadata=RequestResponseMetadata(request_id="test-req"),
+        audio_duration_s=1.0,
+        separator=sep,
+    )
+    async for line in agen:
+        out_lines.append(line)
+    sse = "".join(out_lines)
+    combined = "".join(_sse_delta_contents(sse))
+    assert combined.strip() == "hello world"
+
+
+@pytest.mark.asyncio
+async def test_transcription_stream_generator_chinese_no_space_between_chunks():
+    async def gen_a() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("你好")
+
+    async def gen_b() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("世界")
+
+    serving = OpenAIServingTranscription.__new__(OpenAIServingTranscription)
+    serving.enable_force_include_usage = False
+    serving.model_cls = _StubTranscriptionModel
+    serving.task_type = "transcribe"
+    request = SimpleNamespace(
+        model="stub-model",
+        stream_include_usage=False,
+        stream_continuous_usage_stats=False,
+    )
+    sep = asr_inter_chunk_separator("zh", _StubTranscriptionModel.no_space_languages)
+    assert sep == ""
+
+    out_lines: list[str] = []
+    agen = OpenAIServingTranscription.transcription_stream_generator(
+        serving,
+        request=request,
+        result_generator=[gen_a(), gen_b()],
+        request_id="test-req-zh",
+        request_metadata=RequestResponseMetadata(request_id="test-req-zh"),
+        audio_duration_s=1.0,
+        separator=sep,
+    )
+    async for line in agen:
+        out_lines.append(line)
+    combined = "".join(_sse_delta_contents("".join(out_lines)))
+    assert combined == "你好世界"
+
+
+@pytest.mark.asyncio
+async def test_create_transcription_non_streaming_joins_chunks_by_language():
+    """``create_transcription`` uses the same separator logic as the helper."""
+
+    async def gen_hello() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("hello")
+
+    async def gen_world() -> AsyncGenerator[RequestOutput, None]:
+        yield _request_output("world")
+
+    engine_client = MagicMock()
+    engine_client.model_config = MagicMock()
+    engine_client.model_config.get_diff_sampling_param.return_value = {
+        "max_tokens": 256,
+        "temperature": 0.0,
+    }
+    engine_client.model_config.max_model_len = 8192
+    engine_client.errored = False
+    engine_client.generate.side_effect = [gen_hello(), gen_world()]
+
+    models = MagicMock(spec=OpenAIServingModels)
+    models.lora_requests = {}
+    models.is_base_model.return_value = True
+
+    preprocess_mock = AsyncMock(return_value=([MagicMock(), MagicMock()], 1.0))
+
+    with (
+        patch(
+            "vllm.model_executor.model_loader.get_model_cls",
+            return_value=_StubTranscriptionModel,
+        ),
+        patch.object(OpenAISpeechToText, "_preprocess_speech_to_text", preprocess_mock),
+    ):
+        serving = OpenAIServingTranscription(engine_client, models, request_logger=None)
+
+        req_en = TranscriptionRequest.model_construct(
+            file=MagicMock(),
+            model="stub-model",
+            language="en",
+            stream=False,
+            response_format="json",
+        )
+        out_en = await serving.create_transcription(
+            b"\x00\x00", req_en, raw_request=None
+        )
+        assert not isinstance(out_en, ErrorResponse)
+        assert out_en.text == "hello world"
+
+        async def gen_nihao() -> AsyncGenerator[RequestOutput, None]:
+            yield _request_output("你好")
+
+        async def gen_shijie() -> AsyncGenerator[RequestOutput, None]:
+            yield _request_output("世界")
+
+        engine_client.generate.side_effect = [gen_nihao(), gen_shijie()]
+
+        req_zh = TranscriptionRequest.model_construct(
+            file=MagicMock(),
+            model="stub-model",
+            language="zh",
+            stream=False,
+            response_format="json",
+        )
+        out_zh = await serving.create_transcription(
+            b"\x00\x00", req_zh, raw_request=None
+        )
+        assert not isinstance(out_zh, ErrorResponse)
+        assert out_zh.text == "你好世界"
diff --git a/tests/entrypoints/openai/speech_to_text/test_transcription_validation.py b/tests/entrypoints/speech_to_text/transcription/test_transcription_validation.py
similarity index 98%
rename from tests/entrypoints/openai/speech_to_text/test_transcription_validation.py
rename to tests/entrypoints/speech_to_text/transcription/test_transcription_validation.py
index 4ac48699a022..5ea218406b98 100644
--- a/tests/entrypoints/openai/speech_to_text/test_transcription_validation.py
+++ b/tests/entrypoints/speech_to_text/transcription/test_transcription_validation.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from tests.entrypoints.openai.conftest import add_attention_backend
+from tests.entrypoints.speech_to_text.conftest import add_attention_backend
 from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
 
 MISTRAL_FORMAT_ARGS = [
diff --git a/tests/entrypoints/openai/speech_to_text/test_transcription_validation_whisper.py b/tests/entrypoints/speech_to_text/transcription/test_transcription_validation_whisper.py
similarity index 99%
rename from tests/entrypoints/openai/speech_to_text/test_transcription_validation_whisper.py
rename to tests/entrypoints/speech_to_text/transcription/test_transcription_validation_whisper.py
index 8dba1b59742b..511179f7fcb1 100644
--- a/tests/entrypoints/openai/speech_to_text/test_transcription_validation_whisper.py
+++ b/tests/entrypoints/speech_to_text/transcription/test_transcription_validation_whisper.py
@@ -6,7 +6,6 @@
 import io
 import json
 
-import librosa
 import numpy as np
 import openai
 import pytest
@@ -14,6 +13,7 @@
 import soundfile as sf
 
 from tests.utils import RemoteOpenAIServer
+from vllm.multimodal.media.audio import load_audio
 from vllm.platforms import current_platform
 
 MODEL_NAME = "openai/whisper-large-v3-turbo"
@@ -134,7 +134,7 @@ async def test_bad_requests(mary_had_lamb, whisper_client):
 @pytest.mark.asyncio
 async def test_long_audio_request(mary_had_lamb, whisper_client):
     mary_had_lamb.seek(0)
-    audio, sr = librosa.load(mary_had_lamb)
+    audio, sr = load_audio(mary_had_lamb)
     # Add small silence after each audio for repeatability in the split process
     audio = np.pad(audio, (0, 1600))
     repeated_audio = np.tile(audio, 10)
diff --git a/tests/entrypoints/speech_to_text/translation/__init__.py b/tests/entrypoints/speech_to_text/translation/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/entrypoints/openai/speech_to_text/test_translation_validation.py b/tests/entrypoints/speech_to_text/translation/test_translation_validation.py
similarity index 85%
rename from tests/entrypoints/openai/speech_to_text/test_translation_validation.py
rename to tests/entrypoints/speech_to_text/translation/test_translation_validation.py
index 6fb60d537583..ed3cff5f1c22 100644
--- a/tests/entrypoints/openai/speech_to_text/test_translation_validation.py
+++ b/tests/entrypoints/speech_to_text/translation/test_translation_validation.py
@@ -7,19 +7,50 @@
 import json
 
 import httpx
-import librosa
 import numpy as np
 import openai
 import pytest
 import pytest_asyncio
 import soundfile as sf
 
-from tests.entrypoints.openai.conftest import add_attention_backend
+from tests.entrypoints.speech_to_text.conftest import add_attention_backend
 from tests.utils import RemoteOpenAIServer
+from vllm.logger import init_logger
+from vllm.multimodal.media.audio import load_audio
+
+logger = init_logger(__name__)
 
 SERVER_ARGS = ["--enforce-eager"]
 
 
+def _get_rocm_attention_config(model_name):
+    """Return appropriate ROCm attention config for the given model.
+
+    Whisper uses cross-attention (ENCODER_DECODER) which ROCM_AITER_FA does
+    not support. For Whisper we use ROCM_AITER_UNIFIED_ATTN (or TRITON_ATTN
+    as fallback); other models can use ROCM_AITER_FA.
+    """
+    from vllm.platforms import current_platform
+
+    if not current_platform.is_rocm():
+        return None
+
+    if "whisper" in model_name.lower():
+        try:
+            from vllm.platforms.rocm import _ON_MI3XX
+
+            if _ON_MI3XX:
+                return {"backend": "ROCM_AITER_UNIFIED_ATTN"}
+        except ImportError:
+            logger.warning(
+                "Could not import _ON_MI3XX from rocm platform, "
+                "falling back to TRITON_ATTN for Whisper."
+            )
+        return {"backend": "TRITON_ATTN"}
+
+    return {"backend": "ROCM_AITER_FA"}
+
+
 def _get_server_args(attention_config):
     """Get server args with attention backend if specified."""
     args = SERVER_ARGS.copy()
@@ -30,10 +61,11 @@ def _get_server_args(attention_config):
 @pytest.fixture(
     scope="module", params=["openai/whisper-small", "google/gemma-3n-E2B-it"]
 )
-def server(request, rocm_aiter_fa_attention):
+def server(request):
     # Parametrize over model name
+    attention_config = _get_rocm_attention_config(request.param)
     with RemoteOpenAIServer(
-        request.param, _get_server_args(rocm_aiter_fa_attention)
+        request.param, _get_server_args(attention_config)
     ) as remote_server:
         yield remote_server, request.param
 
@@ -46,11 +78,12 @@ async def client_and_model(server):
 
 
 @pytest.mark.asyncio
-async def test_non_asr_model(foscolo, rocm_aiter_fa_attention):
+async def test_non_asr_model(foscolo):
     # text to text model
     model_name = "JackFram/llama-68m"
+    attention_config = _get_rocm_attention_config(model_name)
     with RemoteOpenAIServer(
-        model_name, _get_server_args(rocm_aiter_fa_attention)
+        model_name, _get_server_args(attention_config)
     ) as remote_server:
         client = remote_server.get_async_client()
 
@@ -61,7 +94,7 @@ async def test_non_asr_model(foscolo, rocm_aiter_fa_attention):
 
 
 @pytest.mark.asyncio
-async def test_basic_audio_with_lora(mary_had_lamb, rocm_aiter_fa_attention):
+async def test_basic_audio_with_lora(mary_had_lamb):
     """Ensure STT (translate) requests can pass LoRA through to generate."""
     # ROCm SPECIFIC CONFIGURATION:
     # To ensure the test passes on ROCm, we modify the max model length to 512.
@@ -85,7 +118,7 @@ async def test_basic_audio_with_lora(mary_had_lamb, rocm_aiter_fa_attention):
         "1",
     ]
 
-    add_attention_backend(server_args, rocm_aiter_fa_attention)
+    add_attention_backend(server_args, _get_rocm_attention_config(model_name))
 
     # Based on https://github.com/openai/openai-cookbook/blob/main/examples/Whisper_prompting_guide.ipynb.
     with RemoteOpenAIServer(model_name, server_args) as remote_server:
@@ -231,7 +264,7 @@ async def test_long_audio_request(foscolo, client_and_model):
     if model_name == "google/gemma-3n-E2B-it":
         pytest.skip("Gemma3n does not support long audio requests")
     foscolo.seek(0)
-    audio, sr = librosa.load(foscolo)
+    audio, sr = load_audio(foscolo)
     repeated_audio = np.tile(audio, 2)
     # Repeated audio to buffer
     buffer = io.BytesIO()
diff --git a/tests/entrypoints/test_grpc_health.py b/tests/entrypoints/test_grpc_health.py
new file mode 100644
index 000000000000..d63b8294c5f4
--- /dev/null
+++ b/tests/entrypoints/test_grpc_health.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+grpc = pytest.importorskip("grpc")
+health_pb2 = pytest.importorskip("grpc_health.v1.health_pb2")
+VllmHealthServicer = pytest.importorskip(
+    "smg_grpc_servicer.vllm.health_servicer"
+).VllmHealthServicer
+
+SERVING = health_pb2.HealthCheckResponse.SERVING
+NOT_SERVING = health_pb2.HealthCheckResponse.NOT_SERVING
+SERVICE_UNKNOWN = health_pb2.HealthCheckResponse.SERVICE_UNKNOWN
+
+
+@pytest.fixture
+def async_llm():
+    mock = MagicMock()
+    mock.check_health = AsyncMock()
+    return mock
+
+
+@pytest.fixture
+def context():
+    return MagicMock(spec=grpc.aio.ServicerContext)
+
+
+@pytest.fixture
+def servicer(async_llm):
+    return VllmHealthServicer(async_llm)
+
+
+@pytest.fixture
+def request_msg():
+    msg = MagicMock()
+    msg.service = ""
+    return msg
+
+
+# -- Check() tests --
+
+
+@pytest.mark.asyncio
+async def test_check_serving_overall(servicer, request_msg, context, async_llm):
+    request_msg.service = ""
+    response = await servicer.Check(request_msg, context)
+    assert response.status == SERVING
+    async_llm.check_health.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_check_serving_vllm_service(servicer, request_msg, context, async_llm):
+    request_msg.service = "vllm.grpc.engine.VllmEngine"
+    response = await servicer.Check(request_msg, context)
+    assert response.status == SERVING
+    async_llm.check_health.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_check_not_serving_engine_errored(
+    servicer, request_msg, context, async_llm
+):
+    async_llm.check_health = AsyncMock(side_effect=Exception("engine dead"))
+    request_msg.service = ""
+    response = await servicer.Check(request_msg, context)
+    assert response.status == NOT_SERVING
+
+
+@pytest.mark.asyncio
+async def test_check_not_serving_shutting_down(
+    servicer, request_msg, context, async_llm
+):
+    servicer.set_not_serving()
+    request_msg.service = ""
+    response = await servicer.Check(request_msg, context)
+    assert response.status == NOT_SERVING
+    async_llm.check_health.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_check_unknown_service_status(servicer, request_msg, context):
+    request_msg.service = "nonexistent.Service"
+    response = await servicer.Check(request_msg, context)
+    assert response.status == SERVICE_UNKNOWN
+
+
+@pytest.mark.asyncio
+async def test_check_unknown_service_grpc_code(servicer, request_msg, context):
+    request_msg.service = "fake.Svc"
+    await servicer.Check(request_msg, context)
+    context.set_code.assert_called_once_with(grpc.StatusCode.NOT_FOUND)
+    context.set_details.assert_called_once()
+    details_arg = context.set_details.call_args[0][0]
+    assert "fake.Svc" in details_arg
+
+
+@pytest.mark.asyncio
+@patch("smg_grpc_servicer.vllm.health_servicer.logger")
+async def test_check_logs_exception_on_error(
+    mock_logger, servicer, request_msg, context, async_llm
+):
+    async_llm.check_health = AsyncMock(side_effect=Exception("engine exploded"))
+    request_msg.service = ""
+    await servicer.Check(request_msg, context)
+    mock_logger.exception.assert_called_once()
+    log_args = mock_logger.exception.call_args
+    assert "service" in str(log_args).lower()
+
+
+# -- Watch() tests --
+
+
+@pytest.mark.asyncio
+async def test_watch_yields_serving(servicer, request_msg, context, async_llm):
+    request_msg.service = ""
+    watch_iter = servicer.Watch(request_msg, context)
+    first = await anext(watch_iter.__aiter__())
+    assert first.status == SERVING
+
+
+@pytest.mark.asyncio
+async def test_watch_yields_not_serving(servicer, request_msg, context, async_llm):
+    async_llm.check_health = AsyncMock(side_effect=Exception("engine down"))
+    request_msg.service = ""
+    watch_iter = servicer.Watch(request_msg, context)
+    first = await anext(watch_iter.__aiter__())
+    assert first.status == NOT_SERVING
+
+
+@pytest.mark.asyncio
+async def test_watch_unknown_service(servicer, request_msg, context):
+    request_msg.service = "fake.Service"
+    results = []
+    async for response in servicer.Watch(request_msg, context):
+        results.append(response)
+    assert len(results) == 1
+    assert results[0].status == SERVICE_UNKNOWN
+    # Watch returns SERVICE_UNKNOWN in the response body (not as a gRPC error
+    # code) so the stream terminates normally -- unlike Check, which sets
+    # NOT_FOUND on the gRPC context for unknown services.
+    context.set_code.assert_not_called()
diff --git a/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py b/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py
index 7d6d330aa544..6c6269865075 100644
--- a/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py
+++ b/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py
@@ -63,8 +63,8 @@ class MockWeightTransferEngine(WeightTransferEngine[MockInitInfo, MockUpdateInfo
     last_init_info: MockInitInfo | None = None
     last_update_info: MockUpdateInfo | None = None
 
-    def __init__(self, config, parallel_config):
-        super().__init__(config, parallel_config)
+    def __init__(self, config, parallel_config, model):
+        super().__init__(config, parallel_config, model)
         # Reset tracking on init
         MockWeightTransferEngine.init_transfer_engine_called = False
         MockWeightTransferEngine.receive_weights_called = False
@@ -95,9 +95,9 @@ def trainer_send_weights(self, *args, **kwargs):
         pass
 
 
-def mock_create_engine(config, parallel_config):
+def mock_create_engine(config, parallel_config, model):
     """Mock factory function that returns our mock engine."""
-    return MockWeightTransferEngine(config, parallel_config)
+    return MockWeightTransferEngine(config, parallel_config, model)
 
 
 # --- Tests ---
@@ -199,6 +199,9 @@ def test_update_weights_calls_engine():
             WeightTransferInitRequest(init_info={"test_param": "init"})
         )
 
+        # Start weight update (required before update_weights)
+        llm.start_weight_update(is_checkpoint_format=True)
+
         # Call update_weights
         test_names = ["layer.weight", "layer.bias"]
         test_dtypes = ["float32", "float32"]
@@ -229,10 +232,14 @@ def check_update_called(self):
             assert dtypes == test_dtypes
             assert shapes == test_shapes
 
+        # Finish weight update
+        llm.finish_weight_update()
+
 
 @create_new_process_for_each_test()
 def test_full_weight_transfer_flow():
-    """Test the complete weight transfer flow: init -> update."""
+    """Test the complete weight transfer flow:
+    init -> start -> update -> finish."""
     if torch.accelerator.device_count() < 1:
         pytest.skip("Need at least 1 GPU for this test")
 
@@ -253,12 +260,15 @@ def test_full_weight_transfer_flow():
             weight_transfer_config=WeightTransferConfig(backend="nccl"),
         )
 
-        # Step 1: Initialize
+        # Step 1: Initialize weight transfer engine
         llm.init_weight_transfer_engine(
             WeightTransferInitRequest(init_info={"test_param": "flow_test"})
         )
 
-        # Step 2: Update weights
+        # Step 2: Start weight update
+        llm.start_weight_update(is_checkpoint_format=True)
+
+        # Step 3: Update weights
         llm.update_weights(
             WeightTransferUpdateRequest(
                 update_info={
@@ -269,6 +279,9 @@ def test_full_weight_transfer_flow():
             )
         )
 
+        # Step 4: Finish weight update
+        llm.finish_weight_update()
+
         # Verify the full flow completed
         def check_flow(self):
             engine = self.weight_transfer_engine
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-flashinfer-mxfp4-mxfp8-cutlass.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-flashinfer-mxfp4-mxfp8-cutlass.yaml
index 23ec14819ef4..5a8e6534a6c2 100644
--- a/tests/evals/gpt_oss/configs/gpt-oss-20b-flashinfer-mxfp4-mxfp8-cutlass.yaml
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-flashinfer-mxfp4-mxfp8-cutlass.yaml
@@ -3,6 +3,4 @@
 model_name: "openai/gpt-oss-20b"
 metric_threshold: 0.568
 reasoning_effort: "low"
-server_args: "--tensor-parallel-size 2"
-env:
-  VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: "1"
+server_args: "--tensor-parallel-size 2 --moe-backend flashinfer_cutlass --quantization-config.moe.activation mxfp8"
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml
index 76b1d796230e..ec1c2b3922d5 100644
--- a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml
@@ -3,4 +3,4 @@
 model_name: openai/gpt-oss-20b
 metric_threshold: 0.568
 reasoning_effort: low
-server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN"
\ No newline at end of file
+server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN --tensor-parallel-size 2"
\ No newline at end of file
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-aiter.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-aiter.yaml
new file mode 100644
index 000000000000..4ff2648ca82d
--- /dev/null
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-aiter.yaml
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+model_name: amd/gpt-oss-20b-w-mxfp4-a-bf16
+metric_threshold: 0.568
+reasoning_effort: low
+server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN --moe-backend aiter --tokenizer openai/gpt-oss-20b --tensor-parallel-size 2"
+env:
+  VLLM_ROCM_USE_AITER: "1"
\ No newline at end of file
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-triton.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-triton.yaml
new file mode 100644
index 000000000000..5ae665a044a3
--- /dev/null
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-bf16-triton.yaml
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+model_name: amd/gpt-oss-20b-w-mxfp4-a-bf16
+metric_threshold: 0.568
+reasoning_effort: low
+server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN --moe-backend triton --tokenizer openai/gpt-oss-20b --tensor-parallel-size 2"
\ No newline at end of file
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-fp8-triton.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-fp8-triton.yaml
new file mode 100644
index 000000000000..81270e0105f3
--- /dev/null
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-quark-mxfp4-fp8-triton.yaml
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+model_name: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
+metric_threshold: 0.568
+reasoning_effort: low
+server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN --tensor-parallel-size 2"
+env:
+  VLLM_ROCM_USE_AITER: "1"
\ No newline at end of file
diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-sm100-fi-mxfp4-mxfp8-trtllm.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-sm100-fi-mxfp4-mxfp8-trtllm.yaml
index 4cea743490f7..895f72d09926 100644
--- a/tests/evals/gpt_oss/configs/gpt-oss-20b-sm100-fi-mxfp4-mxfp8-trtllm.yaml
+++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-sm100-fi-mxfp4-mxfp8-trtllm.yaml
@@ -3,6 +3,4 @@
 model_name: "openai/gpt-oss-20b"
 metric_threshold: 0.568
 reasoning_effort: "low"
-server_args: "--tensor-parallel-size 2"
-env:
-  VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: "1"
+server_args: "--tensor-parallel-size 2 --quantization-config.moe.activation mxfp8"
diff --git a/tests/evals/gpt_oss/configs/models-gfx950.txt b/tests/evals/gpt_oss/configs/models-gfx950.txt
index 2b6ff4f4a8d3..d25f4e3a5e2f 100644
--- a/tests/evals/gpt_oss/configs/models-gfx950.txt
+++ b/tests/evals/gpt_oss/configs/models-gfx950.txt
@@ -1,3 +1,6 @@
 # GFX950 model configurations for GPQA evaluation
 # Tests different environment variable combinations
-gpt-oss-20b-rocm-baseline.yaml
\ No newline at end of file
+gpt-oss-20b-rocm-baseline.yaml
+gpt-oss-20b-rocm-quark-mxfp4-bf16-aiter.yaml
+gpt-oss-20b-rocm-quark-mxfp4-bf16-triton.yaml
+gpt-oss-20b-rocm-quark-mxfp4-fp8-triton.yaml
diff --git a/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-BF16.yaml b/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-BF16.yaml
index d9110efaaad0..b0f886a86ad0 100644
--- a/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-BF16.yaml
+++ b/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-BF16.yaml
@@ -8,4 +8,5 @@ server_args: >-
   --max-model-len 4096
   --tensor-parallel-size 8
   --enable-expert-parallel
+  --mamba-backend flashinfer
   --speculative-config '{"method":"mtp","num_speculative_tokens":5}'
diff --git a/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-NVFP4.yaml b/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-NVFP4.yaml
index 50f097319462..71ba7d52f144 100644
--- a/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-NVFP4.yaml
+++ b/tests/evals/gsm8k/configs/Nemotron-3-Super-120B-A12B-NVFP4.yaml
@@ -8,4 +8,5 @@ server_args: >-
   --max-model-len 4096
   --tensor-parallel-size 2
   --enable-expert-parallel
+  --mamba-backend flashinfer
   --speculative-config '{"method":"mtp","num_speculative_tokens":5}'
diff --git a/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k3v4nc.yaml b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k3v4nc.yaml
new file mode 100644
index 000000000000..b9f9a7944f2f
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k3v4nc.yaml
@@ -0,0 +1,5 @@
+model_name: "Qwen/Qwen3-4B"
+accuracy_threshold: 0.78
+num_questions: 1319
+num_fewshot: 5
+server_args: "--kv-cache-dtype turboquant_k3v4_nc --max-model-len 4096"
diff --git a/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k8v4.yaml b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k8v4.yaml
new file mode 100644
index 000000000000..200b570e23d5
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-k8v4.yaml
@@ -0,0 +1,5 @@
+model_name: "Qwen/Qwen3-4B"
+accuracy_threshold: 0.80
+num_questions: 1319
+num_fewshot: 5
+server_args: "--kv-cache-dtype turboquant_k8v4 --max-model-len 4096"
diff --git a/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t3nc.yaml b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t3nc.yaml
new file mode 100644
index 000000000000..1c833fe7bf2d
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t3nc.yaml
@@ -0,0 +1,5 @@
+model_name: "Qwen/Qwen3-4B"
+accuracy_threshold: 0.75
+num_questions: 1319
+num_fewshot: 5
+server_args: "--kv-cache-dtype turboquant_3bit_nc --max-model-len 4096"
diff --git a/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t4nc.yaml b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t4nc.yaml
new file mode 100644
index 000000000000..6a7f82b66099
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-4B-TQ-t4nc.yaml
@@ -0,0 +1,5 @@
+model_name: "Qwen/Qwen3-4B"
+accuracy_threshold: 0.80
+num_questions: 1319
+num_fewshot: 5
+server_args: "--kv-cache-dtype turboquant_4bit_nc --max-model-len 4096"
diff --git a/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-AITER-TP2.yaml b/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-AITER-TP2.yaml
new file mode 100644
index 000000000000..657251a66038
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-AITER-TP2.yaml
@@ -0,0 +1,12 @@
+model_name: "amd/Qwen3.5-35B-A3B-MXFP4"
+accuracy_threshold: 0.89
+tolerance: 0.03
+num_questions: 1319
+num_fewshot: 5
+server_args: >-
+  --max-model-len 4096
+  --tensor-parallel-size 2
+  --gpu-memory-utilization 0.35
+  --moe-backend aiter
+env:
+  VLLM_ROCM_USE_AITER: "1"
diff --git a/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-EMU-TP2.yaml b/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-EMU-TP2.yaml
new file mode 100644
index 000000000000..ad5ca701258e
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-MXFP4-EMU-TP2.yaml
@@ -0,0 +1,10 @@
+model_name: "amd/Qwen3.5-35B-A3B-MXFP4"
+accuracy_threshold: 0.89
+tolerance: 0.03
+num_questions: 1319
+num_fewshot: 5
+server_args: >-
+  --max-model-len 4096
+  --tensor-parallel-size 2
+  --moe-backend emulation
+  --gpu-memory-utilization 0.35
diff --git a/tests/evals/gsm8k/configs/Qwen3.5-397B-A17B-NVFP4-DEP2.yaml b/tests/evals/gsm8k/configs/Qwen3.5-397B-A17B-NVFP4-DEP2.yaml
index cd35790c3456..09e0c9ab0913 100644
--- a/tests/evals/gsm8k/configs/Qwen3.5-397B-A17B-NVFP4-DEP2.yaml
+++ b/tests/evals/gsm8k/configs/Qwen3.5-397B-A17B-NVFP4-DEP2.yaml
@@ -7,3 +7,4 @@ server_args: >-
   --max-model-len 4096
   --data-parallel-size 2
   --enable-expert-parallel
+  --max-num-seqs 512
diff --git a/tests/evals/gsm8k/configs/models-blackwell-ep.txt b/tests/evals/gsm8k/configs/models-blackwell-ep.txt
new file mode 100644
index 000000000000..c2a19051986f
--- /dev/null
+++ b/tests/evals/gsm8k/configs/models-blackwell-ep.txt
@@ -0,0 +1,3 @@
+Qwen3-Next-80B-A3B-NVFP4-EP2.yaml
+Qwen3-Next-FP8-EP2.yaml
+Nemotron-3-Super-120B-A12B-NVFP4.yaml
diff --git a/tests/evals/gsm8k/configs/models-blackwell.txt b/tests/evals/gsm8k/configs/models-blackwell.txt
index 2936fa891e2c..3c9b1084de7b 100644
--- a/tests/evals/gsm8k/configs/models-blackwell.txt
+++ b/tests/evals/gsm8k/configs/models-blackwell.txt
@@ -3,6 +3,3 @@ Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
 Qwen1.5-MoE-W4A16-CT.yaml
 DeepSeek-V2-Lite-Instruct-FP8.yaml
 Qwen3-30B-A3B-NVFP4.yaml
-Qwen3-Next-80B-A3B-NVFP4-EP2.yaml
-Qwen3-Next-FP8-EP2.yaml
-Nemotron-3-Super-120B-A12B-NVFP4.yaml
diff --git a/tests/evals/gsm8k/configs/models-mi3xx.txt b/tests/evals/gsm8k/configs/models-mi3xx.txt
index 6cf833b64642..e8759d7d02b1 100644
--- a/tests/evals/gsm8k/configs/models-mi3xx.txt
+++ b/tests/evals/gsm8k/configs/models-mi3xx.txt
@@ -2,3 +2,6 @@ DeepSeek-R1-TP_MI325.yaml
 DeepSeek-R1-DP_MI325.yaml
 DeepSeek-V3.2-TP_MI325.yaml
 DeepSeek-V3.2-DP_MI325.yaml
+Qwen3-30B-A3B-NVFP4.yaml
+Qwen3.5-35B-A3B-MXFP4-AITER-TP2.yaml
+Qwen3.5-35B-A3B-MXFP4-EMU-TP2.yaml
\ No newline at end of file
diff --git a/tests/evals/gsm8k/configs/models-qwen35-mi355.txt b/tests/evals/gsm8k/configs/models-qwen35-mi355.txt
index 4e7af71c7f4a..49925c827e3c 100644
--- a/tests/evals/gsm8k/configs/models-qwen35-mi355.txt
+++ b/tests/evals/gsm8k/configs/models-qwen35-mi355.txt
@@ -1 +1,3 @@
 Qwen3.5-35B-A3B-DEP2.yaml
+Qwen3.5-35B-A3B-MXFP4-AITER-TP2.yaml
+Qwen3.5-35B-A3B-MXFP4-EMU-TP2.yaml
diff --git a/tests/evals/gsm8k/configs/models-turboquant.txt b/tests/evals/gsm8k/configs/models-turboquant.txt
new file mode 100644
index 000000000000..518aac780b90
--- /dev/null
+++ b/tests/evals/gsm8k/configs/models-turboquant.txt
@@ -0,0 +1,4 @@
+Qwen3-4B-TQ-k8v4.yaml
+Qwen3-4B-TQ-t4nc.yaml
+Qwen3-4B-TQ-k3v4nc.yaml
+Qwen3-4B-TQ-t3nc.yaml
diff --git a/tests/evals/gsm8k/configs/moe-refactor/DeepSeek-V4-Flash-deepgemm-mega-moe.yaml b/tests/evals/gsm8k/configs/moe-refactor/DeepSeek-V4-Flash-deepgemm-mega-moe.yaml
new file mode 100644
index 000000000000..742d9e40b8a0
--- /dev/null
+++ b/tests/evals/gsm8k/configs/moe-refactor/DeepSeek-V4-Flash-deepgemm-mega-moe.yaml
@@ -0,0 +1,5 @@
+model_name: "deepseek-ai/DeepSeek-V4-Flash"
+accuracy_threshold: 0.95
+num_questions: 1319
+num_fewshot: 5
+server_args: "--trust-remote-code --kv-cache-dtype fp8 --block-size 256 --enable-expert-parallel --tensor-parallel-size 2 --attention_config.use_fp4_indexer_cache=True --moe-backend deep_gemm_mega_moe --tokenizer-mode deepseek_v4 --tool-call-parser deepseek_v4 --enable-auto-tool-choice --reasoning-parser deepseek_v4 --speculative_config.method=mtp --speculative_config.num_speculative_tokens=2"
diff --git a/tests/evals/gsm8k/configs/moe-refactor/config-b200.txt b/tests/evals/gsm8k/configs/moe-refactor/config-b200.txt
index d8bb5aa28fc6..25106cd34f2f 100644
--- a/tests/evals/gsm8k/configs/moe-refactor/config-b200.txt
+++ b/tests/evals/gsm8k/configs/moe-refactor/config-b200.txt
@@ -16,3 +16,4 @@ Mixtral-8x7B-BF16-triton.yaml
 Nemotron-Nano-30B-Fp8-ModelOpt-fi-trtllm.yaml
 Nemotron-Nano-30B-NvFp4-ModelOpt-fi-cutlass.yaml
 Nemotron-Nano-30B-NvFp4-ModelOpt-vllm-cutlass.yaml
+DeepSeek-V4-Flash-deepgemm-mega-moe.yaml
diff --git a/tests/evals/mrcr/README.md b/tests/evals/mrcr/README.md
new file mode 100644
index 000000000000..59acc11ac48c
--- /dev/null
+++ b/tests/evals/mrcr/README.md
@@ -0,0 +1,44 @@
+# MRCR Long-Context Accuracy Evaluation
+
+Smoke test for long-context behavior using OpenAI's public [`openai/mrcr`](https://huggingface.co/datasets/openai/mrcr) dataset. The model sees a long chat with several near-duplicate "needles" and must reproduce a specific earlier assistant turn verbatim, prepended with a random anti-guessing string.
+
+**Scoring:** if the response doesn't start with `random_string_to_prepend`, score is 0; otherwise the prefix is stripped and the mean `SequenceMatcher.ratio()` against the reference answer is reported.
+
+## Usage
+
+```bash
+# Pytest (spawns the server)
+pytest -s -v tests/evals/mrcr/test_mrcr_correctness.py \
+    --config-list-file=configs/models-small.txt
+
+# Standalone (server already running; model and context auto-discovered)
+vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --port 8000
+python tests/evals/mrcr/mrcr_eval.py --port 8000
+```
+
+## Configuration
+
+```yaml
+model_name: "Qwen/Qwen3-0.6B"
+# Per-needle thresholds catch bucket-specific regressions (sliding window,
+# chunked prefill, prefix cache) that an aggregate can hide. A scalar
+# (e.g. `match_ratio_threshold: 0.20`) is also accepted and checked against
+# the mean match ratio.
+match_ratio_threshold:
+  2: 0.30
+  4: 0.15
+  8: 0.10
+num_samples: 30
+needles: [2, 4, 8]
+# max_prompt_tokens: 32768       # Optional; defaults to server max_model_len - max_tokens - 256
+max_tokens: 2048
+concurrency: 8
+server_args: "--max-model-len 32768 --reasoning-parser qwen3"
+```
+
+## Notes
+
+- Samples stream from three parquet shards (`{N}needle/{N}needle_0.parquet`); only the first few row groups are fetched, not the full 1.4 GB repo.
+- `max_prompt_tokens` defaults to `max_model_len - max_tokens - 256`, i.e. fills whatever context the server advertises. Set `--max-model-len` on the server to control the smoke-test context length; override `--max-prompt-tokens` on the client to cap below that.
+- Sample length is pre-filtered by `n_chars × 4 ≤ max_prompt_tokens`, then verified via the server's `/tokenize` endpoint under the actual chat template.
+- Reasoning models: start the server with `--reasoning-parser <name>` (e.g. `qwen3`, `deepseek_r1`) so `<think>` goes to `message.reasoning_content` and doesn't contaminate the scored answer.
diff --git a/vllm/entrypoints/openai/speech_to_text/__init__.py b/tests/evals/mrcr/__init__.py
similarity index 100%
rename from vllm/entrypoints/openai/speech_to_text/__init__.py
rename to tests/evals/mrcr/__init__.py
diff --git a/tests/evals/mrcr/configs/Qwen3.5-4B.yaml b/tests/evals/mrcr/configs/Qwen3.5-4B.yaml
new file mode 100644
index 000000000000..c2fd438dd377
--- /dev/null
+++ b/tests/evals/mrcr/configs/Qwen3.5-4B.yaml
@@ -0,0 +1,7 @@
+model_name: "Qwen/Qwen3.5-4B"
+needles: [2, 4, 8]
+match_ratio_threshold:
+  2: 0.99
+  4: 0.84
+  8: 0.76
+server_args: "--max-model-len 128K --reasoning-parser qwen3"
diff --git a/tests/evals/mrcr/configs/models-small.txt b/tests/evals/mrcr/configs/models-small.txt
new file mode 100644
index 000000000000..b78704fe539f
--- /dev/null
+++ b/tests/evals/mrcr/configs/models-small.txt
@@ -0,0 +1 @@
+Qwen3.5-4B.yaml
diff --git a/tests/evals/mrcr/conftest.py b/tests/evals/mrcr/conftest.py
new file mode 100644
index 000000000000..46f59a56c238
--- /dev/null
+++ b/tests/evals/mrcr/conftest.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from pathlib import Path
+
+
+def pytest_addoption(parser):
+    """Add custom command line options."""
+    parser.addoption(
+        "--config-list-file",
+        default="configs/models-small.txt",
+        help="File containing list of config files to test",
+    )
+
+
+def pytest_generate_tests(metafunc):
+    """Generate test parameters from config files."""
+    if "config_filename" in metafunc.fixturenames:
+        config_list_file = metafunc.config.getoption("--config-list-file")
+
+        config_list_path = Path(config_list_file)
+        if not config_list_path.is_absolute():
+            test_dir_path = Path(__file__).parent / config_list_file
+            if test_dir_path.exists():
+                config_list_path = test_dir_path
+            else:
+                config_list_path = Path.cwd() / config_list_file
+
+        print(f"Looking for config list at: {config_list_path}")
+
+        config_files = []
+        if config_list_path.exists():
+            config_dir = config_list_path.parent
+            with open(config_list_path) as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith("#"):
+                        config_path = config_dir / line
+                        if config_path.exists():
+                            config_files.append(config_path)
+                            print(f"  ✓ Found: {config_path}")
+                        else:
+                            print(f"  ✗ Missing: {config_path}")
+        else:
+            print(f"Config list file not found: {config_list_path}")
+
+        if config_files:
+            metafunc.parametrize(
+                "config_filename",
+                config_files,
+                ids=[config_file.stem for config_file in config_files],
+            )
+        else:
+            print("No config files found, test will be skipped")
diff --git a/tests/evals/mrcr/mrcr_eval.py b/tests/evals/mrcr/mrcr_eval.py
new file mode 100644
index 000000000000..3ab87a57d122
--- /dev/null
+++ b/tests/evals/mrcr/mrcr_eval.py
@@ -0,0 +1,333 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""MRCR long-context evaluation for vLLM's OpenAI-compatible server.
+
+Streams samples from `openai/mrcr` on HuggingFace, sends chat completions to
+the server, and scores each response with a prefix-gated SequenceMatcher ratio
+against the reference answer.
+"""
+
+import argparse
+import asyncio
+import json
+import time
+from difflib import SequenceMatcher
+
+import aiohttp
+import numpy as np
+import requests
+from tqdm.asyncio import tqdm
+
+DATASET_REPO = "openai/mrcr"
+NEEDLE_SHARDS = {
+    2: "2needle/2needle_0.parquet",
+    4: "4needle/4needle_0.parquet",
+    8: "8needle/8needle_0.parquet",
+}
+# Reserve headroom for chat-template tokens on top of the messages.
+PROMPT_SAFETY_BUFFER = 256
+# Pre-filter heuristic before the authoritative /tokenize check.
+CHARS_PER_TOKEN = 4
+# Skip chain-of-thought on reasoning models; ignored by non-reasoning templates.
+DEFAULT_EXTRA_BODY: dict = {"chat_template_kwargs": {"enable_thinking": False}}
+
+
+def discover_server_model(base_url: str) -> tuple[str, int | None]:
+    """Return (model_id, max_model_len) from /v1/models."""
+    resp = requests.get(f"{base_url}/v1/models", timeout=30)
+    resp.raise_for_status()
+    data = resp.json().get("data", [])
+    if not data:
+        raise RuntimeError(f"No models advertised at {base_url}/v1/models")
+    entry = data[0]
+    return entry["id"], entry.get("max_model_len")
+
+
+def count_chat_tokens(base_url: str, model: str, messages: list[dict]) -> int:
+    """Return the chat-template-rendered token count via /tokenize."""
+    resp = requests.post(
+        f"{base_url}/tokenize",
+        json={"model": model, "messages": messages, "add_generation_prompt": True},
+        timeout=120,
+    )
+    resp.raise_for_status()
+    return int(resp.json()["count"])
+
+
+def _load_mrcr_samples(
+    needles: list[int],
+    max_prompt_tokens: int,
+    num_samples: int,
+    seed: int,
+    base_url: str,
+    model_name: str,
+) -> list[dict]:
+    """Stream MRCR samples balanced across needle buckets, token-verified."""
+    try:
+        from datasets import load_dataset
+    except ImportError as e:
+        raise ImportError(
+            "MRCR eval requires `datasets`. Install with: uv pip install datasets"
+        ) from e
+
+    max_chars = max_prompt_tokens * CHARS_PER_TOKEN
+    per_bucket = num_samples // len(needles)
+    leftover = num_samples - per_bucket * len(needles)
+
+    samples: list[dict] = []
+    for idx, n in enumerate(needles):
+        if n not in NEEDLE_SHARDS:
+            raise ValueError(f"Unsupported needle count {n}")
+        target = per_bucket + (1 if idx < leftover else 0)
+        if target == 0:
+            continue
+
+        ds = load_dataset(
+            DATASET_REPO,
+            data_files=NEEDLE_SHARDS[n],
+            split="train",
+            streaming=True,
+        ).shuffle(seed=seed + n, buffer_size=16)
+
+        taken = 0
+        for row in ds:
+            if int(row.get("n_chars", 0)) > max_chars:
+                continue
+            prompt = row["prompt"]
+            messages = json.loads(prompt) if isinstance(prompt, str) else list(prompt)
+            n_tokens = count_chat_tokens(base_url, model_name, messages)
+            if n_tokens > max_prompt_tokens:
+                continue
+            samples.append(
+                {
+                    "messages": messages,
+                    "answer": row["answer"],
+                    "random_string_to_prepend": row["random_string_to_prepend"],
+                    "n_needles": int(row["n_needles"]),
+                    "n_tokens": n_tokens,
+                }
+            )
+            taken += 1
+            if taken >= target:
+                break
+
+        if taken < target:
+            print(f"Warning: only {taken}/{target} samples for n_needles={n}")
+
+    if not samples:
+        raise RuntimeError("No MRCR samples fit; loosen max_prompt_tokens.")
+    return samples
+
+
+def score_mrcr(response: str, answer: str, random_prefix: str) -> float:
+    """Prefix-gated SequenceMatcher ratio; 0 if the prefix is missing."""
+    if not response.startswith(random_prefix):
+        return 0.0
+    stripped = response[len(random_prefix) :]
+    return SequenceMatcher(a=answer, b=stripped, autojunk=False).ratio()
+
+
+async def _call_chat(
+    session: aiohttp.ClientSession,
+    url: str,
+    model: str,
+    messages: list[dict],
+    max_tokens: int,
+    temperature: float,
+    seed: int | None,
+    extra_body: dict,
+) -> tuple[str, int]:
+    data = {
+        "model": model,
+        "messages": messages,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        **extra_body,
+    }
+    if seed is not None:
+        data["seed"] = seed
+    try:
+        async with session.post(f"{url}/v1/chat/completions", json=data) as resp:
+            resp.raise_for_status()
+            result = await resp.json()
+            text = result["choices"][0]["message"]["content"] or ""
+            return text, result.get("usage", {}).get("completion_tokens", 0)
+    except Exception as e:
+        print(f"chat request failed: {e}")
+        return "", 0
+
+
+def evaluate_mrcr(
+    model_name: str | None = None,
+    num_samples: int = 40,
+    needles: list[int] | None = None,
+    max_prompt_tokens: int | None = None,
+    max_tokens: int = 2048,
+    host: str = "http://127.0.0.1",
+    port: int = 8000,
+    temperature: float = 0.0,
+    seed: int | None = 42,
+    concurrency: int = 8,
+    extra_body: dict | None = None,
+) -> dict:
+    """Run MRCR against a vLLM server; auto-discovers model and context."""
+    needles = needles or [2, 4, 8]
+    extra_body = DEFAULT_EXTRA_BODY if extra_body is None else extra_body
+    base_url = f"{host}:{port}"
+
+    discovered_model, server_max_len = discover_server_model(base_url)
+    if model_name is None:
+        model_name = discovered_model
+    if max_prompt_tokens is None:
+        if server_max_len is None:
+            raise RuntimeError(
+                "Server did not advertise max_model_len; pass --max-prompt-tokens."
+            )
+        max_prompt_tokens = max(512, server_max_len - max_tokens - PROMPT_SAFETY_BUFFER)
+    print(
+        f"Model: {model_name} | max_prompt_tokens={max_prompt_tokens} "
+        f"(server max_model_len={server_max_len}, max_tokens={max_tokens})"
+    )
+
+    samples = _load_mrcr_samples(
+        needles=needles,
+        max_prompt_tokens=max_prompt_tokens,
+        num_samples=num_samples,
+        seed=seed or 0,
+        base_url=base_url,
+        model_name=model_name,
+    )
+    tok_counts = [s["n_tokens"] for s in samples]
+    print(
+        f"Loaded {len(samples)} samples (needles={needles}, "
+        f"tokens={min(tok_counts)}-{max(tok_counts)})"
+    )
+
+    async def run():
+        sem = asyncio.Semaphore(concurrency)
+        responses = [""] * len(samples)
+        out_tokens = [0] * len(samples)
+
+        async def one(session, i):
+            async with sem:
+                text, toks = await _call_chat(
+                    session=session,
+                    url=base_url,
+                    model=model_name,
+                    messages=samples[i]["messages"],
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    seed=seed,
+                    extra_body=extra_body,
+                )
+                responses[i] = text
+                out_tokens[i] = toks
+
+        timeout = aiohttp.ClientTimeout(total=1800)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            await tqdm.gather(
+                *[one(session, i) for i in range(len(samples))], desc="MRCR"
+            )
+        return responses, out_tokens
+
+    tic = time.perf_counter()
+    responses, out_tokens = asyncio.run(run())
+    latency = time.perf_counter() - tic
+
+    scores = np.array(
+        [
+            score_mrcr(r, s["answer"], s["random_string_to_prepend"])
+            for r, s in zip(responses, samples)
+        ]
+    )
+    prefix_hits = np.array(
+        [
+            r.startswith(s["random_string_to_prepend"])
+            for r, s in zip(responses, samples)
+        ]
+    )
+    per_needle = {
+        f"match_ratio_n{n}": float(
+            scores[np.array([s["n_needles"] == n for s in samples])].mean()
+        )
+        for n in needles
+        if any(s["n_needles"] == n for s in samples)
+    }
+
+    total_out = int(sum(out_tokens))
+    return {
+        "model": model_name,
+        "match_ratio": float(scores.mean()),
+        "prefix_hit_rate": float(prefix_hits.mean()),
+        "per_needle": per_needle,
+        "num_samples": len(samples),
+        "latency": latency,
+        "total_output_tokens": total_out,
+        "tokens_per_second": total_out / latency if latency > 0 else 0.0,
+        "max_tokens": max_tokens,
+        "needles": needles,
+        "max_prompt_tokens": max_prompt_tokens,
+    }
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description="MRCR evaluation for vLLM serve")
+    p.add_argument("--model", default=None, help="Default: discovered from /v1/models")
+    p.add_argument("--num-samples", type=int, default=40)
+    p.add_argument(
+        "--needles", type=int, nargs="+", default=[2, 4, 8], choices=[2, 4, 8]
+    )
+    p.add_argument(
+        "--max-prompt-tokens",
+        type=int,
+        default=None,
+        help="Default: server max_model_len - max_tokens - buffer",
+    )
+    p.add_argument("--max-tokens", type=int, default=2048)
+    p.add_argument("--host", default="http://127.0.0.1")
+    p.add_argument("--port", type=int, default=8000)
+    p.add_argument("--temperature", type=float, default=0.0)
+    p.add_argument("--seed", type=int, default=42)
+    p.add_argument("--concurrency", type=int, default=8)
+    p.add_argument(
+        "--extra-body",
+        default=None,
+        help="JSON merged into each request. "
+        "Pass '{}' to disable the default enable_thinking=false.",
+    )
+    p.add_argument("--save-results", default=None)
+    args = p.parse_args()
+
+    extra_body = json.loads(args.extra_body) if args.extra_body else None
+
+    result = evaluate_mrcr(
+        model_name=args.model,
+        num_samples=args.num_samples,
+        needles=args.needles,
+        max_prompt_tokens=args.max_prompt_tokens,
+        max_tokens=args.max_tokens,
+        host=args.host,
+        port=args.port,
+        temperature=args.temperature,
+        seed=args.seed,
+        concurrency=args.concurrency,
+        extra_body=extra_body,
+    )
+
+    print("\nResults:")
+    print(f"  match_ratio:     {result['match_ratio']:.4f}")
+    print(f"  prefix_hit_rate: {result['prefix_hit_rate']:.4f}")
+    for k, v in result["per_needle"].items():
+        print(f"  {k}: {v:.4f}")
+    print(f"  samples:         {result['num_samples']}")
+    print(f"  latency:         {result['latency']:.1f}s")
+    print(f"  output tok/s:    {result['tokens_per_second']:.1f}")
+
+    if args.save_results:
+        with open(args.save_results, "w") as f:
+            json.dump(result, f, indent=2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/evals/mrcr/test_mrcr_correctness.py b/tests/evals/mrcr/test_mrcr_correctness.py
new file mode 100644
index 000000000000..3adfd4dc8799
--- /dev/null
+++ b/tests/evals/mrcr/test_mrcr_correctness.py
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+MRCR long-context accuracy test.
+
+Usage:
+    pytest -s -v tests/evals/mrcr/test_mrcr_correctness.py \
+        --config-list-file=configs/models-small.txt
+"""
+
+import shlex
+
+import yaml
+
+from tests.utils import RemoteOpenAIServer
+
+from .mrcr_eval import evaluate_mrcr
+
+
+def _split_host_port(url: str, default_port: int = 8000) -> tuple[str, int]:
+    if "://" in url:
+        url = url.split("://", 1)[1]
+    host_port = url.split("/", 1)[0]
+    if ":" in host_port:
+        host, p = host_port.split(":", 1)
+        return f"http://{host}", int(p)
+    return f"http://{host_port}", default_port
+
+
+def test_mrcr_correctness(config_filename):
+    cfg = yaml.safe_load(config_filename.read_text(encoding="utf-8"))
+
+    server_args = shlex.split(cfg.get("server_args", ""))
+    server_args += ["--trust-remote-code", "--disable-uvicorn-access-log"]
+
+    print(
+        f"MRCR eval for {cfg['model_name']} (threshold {cfg['match_ratio_threshold']})"
+    )
+
+    with RemoteOpenAIServer(
+        cfg["model_name"],
+        server_args,
+        env_dict=cfg.get("env"),
+        max_wait_seconds=cfg.get("startup_max_wait_seconds", 600),
+    ) as server:
+        host, port = _split_host_port(server.url_for("v1"))
+        results = evaluate_mrcr(
+            model_name=cfg.get("model_name"),
+            num_samples=cfg.get("num_samples", 40),
+            needles=cfg.get("needles", [2, 4, 8]),
+            max_prompt_tokens=cfg.get("max_prompt_tokens"),
+            max_tokens=cfg.get("max_tokens", 2048),
+            host=host,
+            port=port,
+            concurrency=cfg.get("concurrency", 8),
+            extra_body=cfg.get("extra_body"),
+        )
+
+    threshold = cfg["match_ratio_threshold"]
+    tol = cfg.get("tolerance", 0.05)
+
+    print(f"  match_ratio:     {results['match_ratio']:.4f}")
+    print(f"  prefix_hit_rate: {results['prefix_hit_rate']:.4f}")
+    for k, v in results["per_needle"].items():
+        print(f"  {k}: {v:.4f}")
+
+    failures: list[str] = []
+    if isinstance(threshold, dict):
+        for n, expected in threshold.items():
+            key = f"match_ratio_n{int(n)}"
+            measured = results["per_needle"].get(key)
+            if measured is None:
+                failures.append(f"{key}: no samples collected")
+            elif measured < expected - tol:
+                failures.append(f"{key}: {measured:.4f} < {expected:.4f} - {tol:.4f}")
+    else:
+        measured = results["match_ratio"]
+        if measured < threshold - tol:
+            failures.append(
+                f"match_ratio: {measured:.4f} < {threshold:.4f} - {tol:.4f}"
+            )
+
+    assert not failures, "MRCR thresholds failed: " + "; ".join(failures)
diff --git a/tests/ir/ir_test_utils.py b/tests/ir/ir_test_utils.py
new file mode 100644
index 000000000000..a82206b6f72e
--- /dev/null
+++ b/tests/ir/ir_test_utils.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Shared test utilities for vLLM IR op correctness tests.
+"""
+
+import torch
+
+from vllm.ir.op import IrOp
+
+NUM_TOKENS = [1, 8, 17, 32, 512, 2048]
+COMMON_HIDDEN_SIZES = [
+    2048,  # Llama 3.2 1B, Qwen 3 MoE 30B-A3B, Gemma 3n
+    4096,  # Llama 3 8B, Qwen 3 8B
+    5120,  # Llama 4 Scout 17B-16E
+    7168,  # DeepSeek V3
+    8192,  # Llama 3 70B
+]
+
+
+def clone_args(args: tuple) -> tuple:
+    return tuple(a.clone() if isinstance(a, torch.Tensor) else a for a in args)
+
+
+def supported_providers(op: IrOp) -> list[str]:
+    return [
+        name for name, impl in op.impls.items() if name != "native" and impl.supported
+    ]
+
+
+def assert_close(op: IrOp, actual, expected):
+    if isinstance(actual, torch.Tensor):
+        tol = op.get_tolerance(actual.dtype)
+        try:
+            torch.testing.assert_close(actual, expected, **tol)
+        except AssertionError as e:
+            raise AssertionError(
+                f"{e}\n\nTo adjust tolerance, use:\n"
+                f"  ir.ops.{op.name}.override_tolerance("
+                f"{actual.dtype}, atol=..., rtol=...)"
+            ) from None
+    elif isinstance(actual, (tuple, list)):
+        for a, ex in zip(actual, expected):
+            assert_close(op, a, ex)
+    else:
+        assert actual == expected
diff --git a/tests/ir/test_inplace_op.py b/tests/ir/test_inplace_op.py
new file mode 100644
index 000000000000..decc4f51c777
--- /dev/null
+++ b/tests/ir/test_inplace_op.py
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch import Tensor
+from torch.fx.experimental.proxy_tensor import make_fx
+
+import vllm.ir.op
+from vllm.ir.op import IrOp, IrOpInplaceOverload
+
+
+@vllm.ir.register_op(allow_inplace=True)
+def _custom_mm2(x: Tensor, w: Tensor) -> Tensor:
+    return x @ w
+
+
+@_custom_mm2.register_impl("regular")
+def _custom_mm2_regular(x: Tensor, w: Tensor) -> Tensor:
+    return x @ w + 1
+
+
+@_custom_mm2.register_impl("inplace", inplace=True)
+def _custom_mm2_inplace(x: Tensor, w: Tensor) -> Tensor:
+    x.copy_(x @ w + 2)
+    return x
+
+
+class TestInplaceOp:
+    def test_registration(self):
+        # Test that the inplace op is registered correctly.
+        assert "_custom_mm2" in IrOp.registry
+        assert IrOp.registry["_custom_mm2"] is _custom_mm2
+        assert _custom_mm2.torch_op is torch.ops.vllm_ir._custom_mm2.default
+        assert isinstance(_custom_mm2.maybe_inplace, IrOpInplaceOverload)
+        assert (
+            _custom_mm2.maybe_inplace.torch_op
+            is torch.ops.vllm_ir._custom_mm2.maybe_inplace
+        )
+
+    def test_inplace_dispatching(self):
+        # check that the correct implementation is dispatched based on priority,
+        # and inplace semantics hold
+        w = torch.randn(3, 3)
+        x = torch.randn(2, 3)
+        x1 = x.clone()
+
+        with _custom_mm2.set_priority(["regular"]):
+            result_regular = _custom_mm2.maybe_inplace(x, w)
+
+        # check that the regular op does not modify x
+        torch.testing.assert_close(x, x1, atol=0, rtol=0)
+
+        with _custom_mm2.set_priority(["inplace"]):
+            result_inplace: Tensor = _custom_mm2.maybe_inplace(x, w)
+
+        # check that the inplace op returns x directly
+        assert result_inplace.data_ptr() == x.data_ptr()
+
+        torch.testing.assert_close(result_inplace, x1 @ w + 2)
+        torch.testing.assert_close(result_regular, x1 @ w + 1)
+
+    def test_default_dispatching(self):
+        # check that the correct implementation is dispatched,
+        # and ops do not modify inputs when using the default overload
+        w = torch.randn(3, 3)
+        x = torch.randn(2, 3)
+        x1 = x.clone()
+
+        with _custom_mm2.set_priority(["regular"]):
+            result_regular = _custom_mm2(x, w)
+
+        with _custom_mm2.set_priority(["inplace"]):
+            result_inplace = _custom_mm2(x, w)
+
+        # check that x was not modified by either impl
+        torch.testing.assert_close(x, x1, atol=0, rtol=0)
+
+        torch.testing.assert_close(result_inplace, x1 @ w + 2)
+        torch.testing.assert_close(result_regular, x1 @ w + 1)
+
+    def test_trace(self):
+        # Test that the inplace op can be used in a graph.
+        def func(x: Tensor, y: Tensor) -> Tensor:
+            return _custom_mm2.maybe_inplace(x, y)
+
+        x = torch.randn(2, 3)
+        y = torch.randn(3, 4)
+        graph = make_fx(func)(x, y)
+        assert any(
+            node.target == torch.ops.vllm_ir._custom_mm2.maybe_inplace
+            for node in graph.graph.nodes
+        )
diff --git a/tests/ir/test_op.py b/tests/ir/test_op.py
new file mode 100644
index 000000000000..4928c0723e01
--- /dev/null
+++ b/tests/ir/test_op.py
@@ -0,0 +1,766 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import importlib.util
+import logging
+from pathlib import Path
+from typing import Any
+
+import pytest
+import torch
+from torch import fx
+from torch.fx.experimental.proxy_tensor import make_fx
+
+import vllm.ir.op
+from vllm.ir.op import RESERVED_PROVIDERS, IrOp, IrOpImpl
+
+
+class CustomError(Exception):
+    pass
+
+
+@pytest.fixture
+def custom_add_op(fake_vllm_ir):
+    """Register ``_custom_add`` plus impl_a, impl_b, impl_even for this test."""
+
+    @vllm.ir.register_op(allow_inplace=True)
+    def _custom_add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x + y
+
+    @_custom_add.register_impl("impl_a")
+    def impl_a(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x + y + 10
+
+    @_custom_add.register_impl("impl_b", inplace=True)
+    def impl_b(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """Computes x+y+20"""
+        x.add_(y)
+        x.add_(20)
+        return x
+
+    @_custom_add.register_impl(
+        "impl_even", supports_args=lambda x, y: x.size(1) % 2 == 0
+    )
+    def impl_even(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x + y + 50
+
+    return _custom_add
+
+
+def test_registration_overloads(fake_vllm_ir):
+    assert all(
+        n not in IrOp.registry for n in ["_custom_sub", "_custom_mul", "_custom_div"]
+    )
+
+    # Calling with decorator
+    @vllm.ir.register_op()
+    def _custom_sub(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x - y
+
+    assert _custom_sub.name == "_custom_sub"
+    assert _custom_sub is IrOp.registry["_custom_sub"]
+
+    # Custom name
+    @vllm.ir.register_op(name="_custom_mul")
+    def custom_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x * y
+
+    assert custom_mul.name == "_custom_mul"
+    assert custom_mul is IrOp.registry["_custom_mul"]
+
+    # Direct construction does not register directly
+    def _custom_div(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x / y
+
+    custom_div = IrOp("_custom_div", _custom_div)
+    assert custom_div.name == "_custom_div"
+    assert "_custom_div" not in IrOp.registry
+
+    # Duplicate op registration not allowed
+    with pytest.raises(AssertionError):
+
+        @vllm.ir.register_op
+        def _custom_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            return x * y - 100
+
+
+def test_no_kw_only_args(fake_vllm_ir):
+    # kw-only args not supported
+    with pytest.raises(ValueError, match="keyword-only arguments"):
+
+        @vllm.ir.register_op
+        def _custom_kwarg_op(
+            x: torch.Tensor, y: torch.Tensor, *, kwarg: int = 0
+        ) -> torch.Tensor:
+            return x + y + kwarg
+
+    assert "_custom_kwarg_op" not in IrOp.registry
+
+
+class TestIrOpCustomAdd:
+    # Registration invariants
+    def test_decorated_object(self, custom_add_op):
+        """Make sure that referring directly to an op is correct"""
+        _custom_add = custom_add_op
+        assert isinstance(_custom_add, IrOp)
+        assert "_custom_add" in IrOp.registry
+        assert _custom_add is IrOp.registry["_custom_add"]
+
+    def test_torch_op_is_registered(self, custom_add_op):
+        _custom_add = custom_add_op
+        torch_ops = getattr(torch.ops, vllm.ir.op.vllm_ir_torch_lib.ns)
+        assert hasattr(torch_ops, "_custom_add")
+        assert callable(torch_ops._custom_add.default)
+        assert _custom_add.torch_op is torch_ops._custom_add.default
+
+    # Semantic correctness
+    def test_semantics_match_native(self, custom_add_op):
+        _custom_add = custom_add_op
+        x = torch.randn(4, 5)
+        y = torch.randn(4, 5)
+
+        # Calls native by default
+        out = _custom_add(x, y)
+        ref = x + y
+
+        torch.testing.assert_close(out, ref)
+
+    # -------------------------
+    # Implementation registration
+    # -------------------------
+
+    def test_register_impl_is_non_intrusive(self, custom_add_op):
+        _custom_add = custom_add_op
+
+        @_custom_add.register_impl("dummy_provider")
+        def dummy_impl(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            return x + y + 123
+
+        assert "dummy_provider" in _custom_add.impls
+        assert isinstance(_custom_add.impls["dummy_provider"], IrOpImpl)
+
+        x = torch.ones(2, 2)
+        y = torch.ones(2, 2)
+
+        # Native semantics must still hold
+        torch.testing.assert_close(_custom_add(x, y), x + y)
+
+    def test_schema_contains_tensor_signature(self, custom_add_op):
+        _custom_add = custom_add_op
+        schema = _custom_add._schema_str
+
+        assert "Tensor" in schema
+        assert "-> Tensor" in schema
+
+    # -------------------------
+    # FX visibility
+    # -------------------------
+
+    @pytest.mark.parametrize("enable_torch_wrap", [True, False])
+    @pytest.mark.parametrize("symbolic_trace", [True, False])
+    @pytest.mark.parametrize("overload", ["default", "maybe_inplace"])
+    def test_trace_sees_single_custom_op(
+        self,
+        custom_add_op,
+        symbolic_trace: bool,
+        enable_torch_wrap: bool,
+        overload: str,
+    ):
+        _custom_add = custom_add_op
+        op_fn = _custom_add if overload == "default" else _custom_add.maybe_inplace
+        torch_op = (
+            _custom_add.torch_op
+            if overload == "default"
+            else _custom_add.maybe_inplace.torch_op
+        )
+
+        def fn(x, y):
+            return op_fn(x, y)
+
+        def find_fn(target: Any, gm: fx.GraphModule):
+            return gm.graph.find_nodes(op="call_function", target=target)
+
+        with pytest.raises(CustomError), vllm.ir.enable_torch_wrap(enable_torch_wrap):
+            if symbolic_trace:
+                gm = torch.fx.symbolic_trace(fn)
+            else:
+                gm = make_fx(fn)(torch.randn(2, 2), torch.randn(2, 2))
+
+            x1, y1 = torch.rand(5, 4), torch.rand(5, 4)
+            out_fx = gm(x1, y1)
+            out_eager = fn(x1, y1)
+
+            # raise error to check enable_torch_wrap context restored correctly
+            raise CustomError
+
+        # check behavior matches eager in all cases
+        torch.testing.assert_close(out_fx, out_eager)
+
+        # check that IR nodes only appear if enable_torch_wrap=True
+        ir_nodes = find_fn(torch_op, gm)
+        if enable_torch_wrap:
+            assert len(ir_nodes) == 1, gm.code
+        else:
+            assert len(ir_nodes) == 0, gm.code
+
+        # with torch wrapping enabled (default), IR nodes appear
+        if symbolic_trace:
+            gm = torch.fx.symbolic_trace(fn)
+        else:
+            gm = make_fx(fn)(torch.randn(2, 2), torch.randn(2, 2))
+
+        ir_nodes = find_fn(torch_op, gm)
+        assert len(ir_nodes) == 1, gm.code
+
+
+class TestIrOpImplDispatch:
+    def test_register_impl(self, custom_add_op):
+        _custom_add = custom_add_op
+        assert "impl_a" in _custom_add.impls
+        impl = _custom_add.impls["impl_a"]
+
+        assert impl is _custom_add.impls["impl_a"]
+        assert impl.op is _custom_add
+        assert impl.provider == "impl_a"
+        assert callable(impl.impl_fn)
+
+        # Test duplicate registration rejected
+        with pytest.raises(AssertionError):
+
+            @_custom_add.register_impl("impl_a")
+            def impl_a_dup(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+                return x + y + 30
+
+        # Check the original impl is still intact
+        assert _custom_add.impls["impl_a"] is impl
+
+        # Check support all args
+        assert _custom_add.impls["impl_a"].supports_all_args
+        assert _custom_add.impls["impl_b"].supports_all_args
+        assert not _custom_add.impls["impl_even"].supports_all_args
+
+    def test_reserved_provider_rejected(self, custom_add_op):
+        _custom_add = custom_add_op
+        for provider in RESERVED_PROVIDERS:
+            with pytest.raises(AssertionError):
+
+                @_custom_add.register_impl(provider)
+                def bad_impl(x, y):
+                    return x + y
+
+    def test_set_priority_scoped(self, custom_add_op):
+        _custom_add = custom_add_op
+        assert _custom_add.get_priority() == []
+
+        with _custom_add.set_priority(["impl_even", "impl_b"]):
+            assert _custom_add.get_priority() == ["impl_even", "impl_b"]
+
+            # Check nesting
+            with _custom_add.set_priority(["impl_b"]):
+                assert _custom_add.get_priority() == ["impl_b"]
+
+            # Restored
+            assert _custom_add.get_priority() == ["impl_even", "impl_b"]
+
+            # Check that exception restores priority
+            with pytest.raises(CustomError), _custom_add.set_priority(["impl_a"]):
+                assert _custom_add.get_priority() == ["impl_a"]
+                raise CustomError
+
+            # Restored again
+            assert _custom_add.get_priority() == ["impl_even", "impl_b"]
+
+        # Restored to empty
+        assert _custom_add.get_priority() == []
+
+    @pytest.mark.parametrize(
+        "default,override",
+        [
+            (["impl_even", "impl_b"], ["impl_a"]),
+            (["impl_a"], ["impl_even", "impl_b"]),
+        ],
+    )
+    def test_set_default_priority(
+        self, custom_add_op, default: list[str], override: list[str]
+    ):
+        _custom_add = custom_add_op
+        assert _custom_add.get_priority() == []
+
+        _custom_add.set_default(default)
+        assert _custom_add.get_priority() == default
+
+        # Priority doesn't change after exiting the set_priority context.
+        with _custom_add.set_priority(override):
+            assert _custom_add.get_priority() == override
+        assert _custom_add.get_priority() == default
+
+        # Should override the previous default.
+        _custom_add.set_default(override)
+        assert _custom_add.get_priority() == override
+
+    @pytest.mark.parametrize("overload", ["default", "maybe_inplace"])
+    def test_dispatch_priority_order(self, custom_add_op, overload: str):
+        _custom_add = custom_add_op
+        op_fn = _custom_add if overload == "default" else _custom_add.maybe_inplace
+        torch_op = (
+            _custom_add.torch_op
+            if overload == "default"
+            else _custom_add.maybe_inplace.torch_op
+        )
+
+        x = torch.tensor(1, dtype=torch.int32)
+        y = torch.tensor(2, dtype=torch.int32)
+
+        with _custom_add.set_priority(["impl_b", "impl_a"]):
+            assert _custom_add.dispatch(x, y) is _custom_add.impls["impl_b"]
+            out1 = op_fn(x.clone(), y)
+            out2 = torch_op(x.clone(), y)
+
+            with _custom_add.set_priority(["impl_a"]):
+                assert _custom_add.dispatch(x, y) is _custom_add.impls["impl_a"]
+                out3 = op_fn(x.clone(), y)
+                out4 = torch_op(x.clone(), y)
+
+        # impl_b
+        assert out1.item() == 1 + 2 + 20
+        assert out2.item() == 1 + 2 + 20
+        # impl_a
+        assert out3.item() == 1 + 2 + 10
+        assert out4.item() == 1 + 2 + 10
+
+    def test_unsupported_impl_filtered(self, custom_add_op):
+        _custom_add = custom_add_op
+
+        @_custom_add.register_impl("impl_unsupported", supported=False)
+        def impl_unsupported(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            return x + y + 999
+
+        x = torch.tensor(1, dtype=torch.int32)
+        y = torch.tensor(2, dtype=torch.int32)
+
+        with _custom_add.set_priority(["impl_unsupported", "impl_a"]):
+            assert _custom_add.get_priority() == ["impl_a"]
+            out = _custom_add(x, y)
+
+        # impl_unsupported skipped → impl_a
+        assert out.item() == 1 + 2 + 10
+
+    def test_supports_args_runtime_dispatch_and_warning(
+        self, custom_add_op, caplog_vllm: pytest.LogCaptureFixture
+    ):
+        _custom_add = custom_add_op
+        x1 = torch.ones((2, 2), dtype=torch.int32)
+        y1 = torch.full((2, 2), 2, dtype=torch.int32)
+
+        x2 = torch.ones((2, 3), dtype=torch.int32)
+        y2 = torch.full((2, 3), 2, dtype=torch.int32)
+
+        with (
+            caplog_vllm.at_level(logging.WARNING),
+            _custom_add.set_priority(["impl_even"]),
+        ):
+            # Test the warning about native fallback is logged (before even dispatching)
+            assert len(caplog_vllm.records) == 1
+            message = caplog_vllm.records[0].message
+            assert "_custom_add" in message
+            assert "fallback to native" in message
+            assert "priority" in message
+
+            # Check dispatching
+            assert _custom_add.get_priority() == ["impl_even", "native"]
+            assert _custom_add.dispatch(x1, y1) is _custom_add.impls["impl_even"]
+            assert _custom_add.dispatch(x2, y2) is _custom_add.impls["native"]
+
+            out1 = _custom_add(x1, y1)  # size(1) == 2 → impl_even
+            out2 = _custom_add(x2, y2)  # size(1) == 3 → native fallback
+
+        # no other warnings
+        assert len(caplog_vllm.records) == 1
+        assert torch.all(out1 == 1 + 2 + 50)
+        assert torch.all(out2 == 1 + 2)
+
+    def test_default_priority(
+        self,
+        custom_add_op,
+        caplog_vllm: pytest.LogCaptureFixture,
+        disable_log_dedup,
+    ):
+        _custom_add = custom_add_op
+        # Make sure logs are not deduplicated to properly test the warning
+        x = torch.tensor([3], dtype=torch.int32)
+        y = torch.tensor([4], dtype=torch.int32)
+
+        # No priority set → falls back to native
+        assert _custom_add.get_priority() == []
+        with caplog_vllm.at_level(logging.WARNING):
+            # Native by default
+            assert _custom_add.dispatch(x, y) is _custom_add.impls["native"]
+            out = _custom_add(x, y)
+
+        # Check dispatching to native by default
+        assert out.item() == 3 + 4
+
+        # Check warning
+        assert len(caplog_vllm.records) == 2
+        message = caplog_vllm.records[0].message.lower()
+        assert "_custom_add" in message
+        assert "priority not set" in message
+
+
+@pytest.mark.parametrize("default", [True, False])
+def test_set_default_torch_wrap(default: bool):
+    """set_default_torch_wrap permanently flips the global flag."""
+    original = vllm.ir.op._ENABLE_TORCH_WRAP
+    try:
+        vllm.ir.set_default_torch_wrap(default)
+        assert vllm.ir.op._ENABLE_TORCH_WRAP is default
+
+        # Flag doesn't change after exiting the enable_torch_wrap context.
+        with vllm.ir.enable_torch_wrap(not default):
+            assert vllm.ir.op._ENABLE_TORCH_WRAP is (not default)
+        assert vllm.ir.op._ENABLE_TORCH_WRAP is default
+
+        # Should override the previous default.
+        vllm.ir.set_default_torch_wrap(not default)
+        assert vllm.ir.op._ENABLE_TORCH_WRAP is (not default)
+    finally:
+        vllm.ir.op._ENABLE_TORCH_WRAP = original
+
+
+@pytest.fixture
+def custom_mm_op(fake_vllm_ir):
+    """Fixture that registers ``_custom_mm`` (isolated by ``fake_vllm_ir``)."""
+
+    @vllm.ir.register_op
+    def _custom_mm(
+        x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        tmp = x @ y
+        return tmp if bias is None else tmp + bias
+
+    return _custom_mm
+
+
+def test_default_args(custom_mm_op):
+    _custom_mm = custom_mm_op
+
+    # Test that default args are properly applied when dispatching and calling
+    @_custom_mm.register_impl("impl_mm", supports_args=lambda x, y, bias=None: True)
+    def impl_mm(
+        x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        tmp = x @ y
+        return tmp + 50 if bias is None else tmp + bias + 100
+
+    x1 = torch.tensor([1, 2], dtype=torch.int32)
+    x2 = torch.tensor([3, 4], dtype=torch.int32)
+
+    # Test that supports_args receives the defaulted args
+    assert impl_mm.supports_args(x1, x2)
+    with _custom_mm.set_priority(["impl_mm", "native"]):
+        assert _custom_mm.dispatch(x1, x2) is impl_mm
+
+
+def test_bad_impl_registrations(custom_mm_op):
+    _custom_mm = custom_mm_op
+    # Check bad schema
+    with pytest.raises(ValueError, match="does not match native schema"):
+
+        @_custom_mm.register_impl("impl_mm_bad_schema")
+        def impl_mm_bad_schema(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+            return x @ y - 1
+
+    with pytest.raises(ValueError, match="does not match native schema"):
+
+        @_custom_mm.register_impl("impl_mm_bad_schema_2")
+        def impl_mm_bad_schema_2(
+            x: torch.Tensor, y: torch.Tensor, b: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + b - 2
+
+    with pytest.raises(ValueError, match="does not match native schema"):
+
+        @_custom_mm.register_impl("impl_mm_bad_schema_3")
+        def impl_mm_bad_schema_3(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor
+        ) -> torch.Tensor:
+            return x @ y + bias - 5
+
+    # check supports_args with incorrect params
+    with pytest.raises(ValueError, match="supports_args must be a callable"):
+
+        @_custom_mm.register_impl("impl_mm_bad_supports_args", supports_args=True)
+        def impl_mm_bad_supports_args(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + 10
+
+    with pytest.raises(ValueError, match="number of parameters"):
+
+        @_custom_mm.register_impl(
+            "impl_mm_bad_supports_args_2", supports_args=lambda x, y: True
+        )
+        def impl_mm_bad_supports_args(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + 10
+
+    with pytest.raises(ValueError, match="keyword-only parameters"):
+
+        @_custom_mm.register_impl(
+            "impl_mm_bad_supports_args_3", supports_args=lambda x, y, *, b: True
+        )
+        def impl_mm_bad_supports_args_2(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + 20
+
+    with pytest.raises(ValueError, match="does not match native parameter"):
+
+        @_custom_mm.register_impl(
+            "impl_mm_bad_supports_args_4", supports_args=lambda x, y, b: True
+        )
+        def impl_mm_bad_supports_args_4(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + 30
+
+    with pytest.raises(ValueError, match="does not match native default"):
+
+        @_custom_mm.register_impl(
+            "impl_mm_bad_supports_args_5", supports_args=lambda x, y, bias=1: True
+        )
+        def impl_mm_bad_supports_args_5(
+            x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+        ) -> torch.Tensor:
+            return x @ y + 40
+
+    # With fixture, each test gets a fresh op with only "native" impl
+    assert set(_custom_mm.impls.keys()) == {"native"}
+
+
+IMPL_OOT_SRC = """
+import torch
+
+@_custom_mm.register_impl("impl_mm_oot")
+def impl_mm_oot(
+    x: torch.Tensor, y: torch.Tensor, bias: torch.Tensor | None = None
+) -> torch.Tensor:
+    return x @ y - 99
+"""
+
+
+def load_custom_mm_module(file_path: Path, custom_mm_op):
+    spec = importlib.util.spec_from_file_location("_custom_mm_oot", file_path)
+    assert spec is not None
+    module = importlib.util.module_from_spec(spec)
+
+    # Inject the variable into the module's global namespace
+    # This allows the @_custom_mm.register_impl decorator to work
+    module._custom_mm = custom_mm_op  # type: ignore[attr-defined]
+
+    # Execute the file; this triggers the decorator
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def test_uuid_and_oot(custom_mm_op, tmp_path: Path):
+    _custom_mm = custom_mm_op
+    file_path = tmp_path / "_custom_mm_oot.py"
+    file_path.write_text(IMPL_OOT_SRC)
+
+    assert "impl_mm_oot" not in _custom_mm.impls
+    _ = load_custom_mm_module(file_path, _custom_mm)
+    assert "impl_mm_oot" in _custom_mm.impls
+
+    uuid = _custom_mm.impls["impl_mm_oot"].uuid()
+    del _custom_mm.impls["impl_mm_oot"]
+
+    # Replace file source
+    file_path.write_text(IMPL_OOT_SRC + " # added file source")
+    assert "impl_mm_oot" not in _custom_mm.impls
+    _ = load_custom_mm_module(file_path, _custom_mm)
+    assert "impl_mm_oot" in _custom_mm.impls
+
+    uuid1 = _custom_mm.impls["impl_mm_oot"].uuid()
+    assert uuid1 != uuid
+    del _custom_mm.impls["impl_mm_oot"]
+
+    # Back to original
+    file_path.write_text(IMPL_OOT_SRC)
+    assert "impl_mm_oot" not in _custom_mm.impls
+    _ = load_custom_mm_module(file_path, _custom_mm)
+    assert "impl_mm_oot" in _custom_mm.impls
+
+    uuid2 = _custom_mm.impls["impl_mm_oot"].uuid()
+    assert uuid2 == uuid
+    assert uuid2 != uuid1
+    del _custom_mm.impls["impl_mm_oot"]
+
+
+def _test_native(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    return x + y
+
+
+def _make_op_with_generator(name: str = "_ig_test"):
+    op = IrOp(name, _test_native)
+
+    @op.register_input_generator
+    def _gen(n: int = 4):
+        x = torch.randn(n, 3)
+        y = torch.randn(n, 3)
+        return x, y
+
+    return op
+
+
+def _test_native_single(x: torch.Tensor) -> torch.Tensor:
+    return x
+
+
+class TestInputGenerator:
+    def test_no_input_generator_by_default(self):
+        op = IrOp("_ig_test_no_gen", _test_native_single)
+        assert not op.has_input_generator
+
+    def test_register_input_generator(self):
+        op = _make_op_with_generator("_ig_test_reg")
+        assert op.has_input_generator
+
+    def test_generate_inputs_returns_tuple(self):
+        op = _make_op_with_generator("_ig_test_tuple")
+        result = op.generate_inputs(n=2)
+        assert isinstance(result, tuple)
+        assert len(result) == 2
+        assert result[0].shape == (2, 3)
+        assert result[1].shape == (2, 3)
+
+    def test_generate_inputs_default_kwargs(self):
+        op = _make_op_with_generator("_ig_test_default")
+        result = op.generate_inputs()
+        assert result[0].shape == (4, 3)
+
+    def test_generate_inputs_without_registration_raises(self):
+        op = IrOp("_ig_test_no_gen_raises", _test_native_single)
+        with pytest.raises(RuntimeError, match="No input generator"):
+            op.generate_inputs()
+
+
+class TestTolerance:
+    def test_override_and_get_tolerance(self):
+        op = IrOp("_tol_test", _test_native)
+
+        tol = op.get_tolerance(torch.float32)
+        assert tol == {"atol": 1e-5, "rtol": 1.3e-6}
+
+        op.override_tolerance(torch.float32, atol=0.1, rtol=0.2)
+        assert op.get_tolerance(torch.float32) == {"atol": 0.1, "rtol": 0.2}
+        assert op.get_tolerance(torch.float16) == {"atol": 1e-3, "rtol": 1e-3}
+
+    def test_get_tolerance_raises_for_unknown_dtype(self):
+        op = IrOp("_tol_test_unknown", _test_native)
+        with pytest.raises(ValueError, match="No tolerance defined"):
+            op.get_tolerance(torch.complex64)
+
+
+def test_naming_validation(fake_vllm_ir):
+    """Test that op and provider names are validated ([a-z_][a-z_0-9]*)."""
+
+    # Valid op and provider names
+    @vllm.ir.register_op
+    def _valid_name_123(x: torch.Tensor) -> torch.Tensor:
+        return x
+
+    @_valid_name_123.register_impl("valid_provider_123")
+    def valid_impl(x: torch.Tensor) -> torch.Tensor:
+        return x + 1
+
+    # Invalid op names should fail
+    with pytest.raises(ValueError, match="name.*invalid"):
+
+        @vllm.ir.register_op
+        def InvalidName(x: torch.Tensor) -> torch.Tensor:
+            return x
+
+    with pytest.raises(ValueError, match="name.*invalid"):
+
+        @vllm.ir.register_op(name="123invalid")
+        def some_func(x: torch.Tensor) -> torch.Tensor:
+            return x
+
+    # Invalid provider names should fail
+    with pytest.raises(ValueError, match="name.*invalid"):
+
+        @_valid_name_123.register_impl("Invalid-Provider")
+        def invalid_impl(x: torch.Tensor) -> torch.Tensor:
+            return x + 1
+
+
+def test_registration_stack_traces(fake_vllm_ir):
+    """Test that stack traces are captured for ops and impls."""
+
+    @vllm.ir.register_op
+    def _test_stack(x: torch.Tensor) -> torch.Tensor:
+        return x
+
+    @_test_stack.register_impl("test_provider")
+    def test_impl(x: torch.Tensor) -> torch.Tensor:
+        return x + 1
+
+    # Verify op stack trace
+    assert hasattr(_test_stack, "_registration_stack")
+    assert len(_test_stack._registration_stack) > 0
+    op_stack_str = "".join(_test_stack._registration_stack)
+    assert "test_op.py" in op_stack_str
+    # Last frame should be the decorator in user code, not internal decorator logic
+    assert "@vllm.ir.register_op" in _test_stack._registration_stack[-1]
+    assert "return decorator(f)" not in op_stack_str
+
+    # Verify impl stack trace
+    impl = _test_stack.impls["test_provider"]
+    assert hasattr(impl, "_registration_stack")
+    assert len(impl._registration_stack) > 0
+    impl_stack_str = "".join(impl._registration_stack)
+    assert "test_op.py" in impl_stack_str
+    # Last frame should be the decorator in user code
+    assert '@_test_stack.register_impl("test_provider")' in impl._registration_stack[-1]
+
+
+def test_op_repr_uses_docstring(fake_vllm_ir):
+    """Test that __str__ uses the function's docstring and __repr__ is simple."""
+
+    @vllm.ir.register_op
+    def _test_repr_with_doc(x: torch.Tensor) -> torch.Tensor:
+        """First line of docstring.
+
+        Additional details here.
+        """
+        return x
+
+    @vllm.ir.register_op
+    def _test_repr_no_doc(x: torch.Tensor) -> torch.Tensor:
+        return x
+
+    # __str__ with docstring: uses first line only
+    str_with = str(_test_repr_with_doc)
+    assert "IrOp('_test_repr_with_doc')" in str_with
+    assert "First line of docstring." in str_with
+    assert "Additional details" not in str_with
+
+    # __str__ without docstring: simple format
+    assert str(_test_repr_no_doc) == "IrOp('_test_repr_no_doc')"
+
+    # __repr__ should be simple for both
+    assert repr(_test_repr_with_doc) == "IrOp('_test_repr_with_doc')"
+    assert repr(_test_repr_no_doc) == "IrOp('_test_repr_no_doc')"
+
+
+def test_vllm_ir_fixture(fake_vllm_ir):
+    """Test that the fake_vllm_ir fixture provides test isolation."""
+
+    @vllm.ir.register_op
+    def _test_fixture(x: torch.Tensor) -> torch.Tensor:
+        return x
+
+    assert "_test_fixture" in IrOp.registry
+    # Fixture will automatically clean up after test
diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 3ebf9cc3713a..db4dcc8a636e 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -427,3 +427,122 @@ def test_per_head_quant_scales_backend_selection(
                     use_per_head_quant_scales=True,
                 )
             assert backend_name in str(exc_info.value)
+
+
+@pytest.mark.parametrize(
+    "backend_name,use_non_causal,should_succeed",
+    [
+        ("FLASH_ATTN", True, True),  # FlashAttn supports non-causal
+        ("FLASH_ATTN", False, True),  # FlashAttn also works with causal
+        ("FLASHINFER", True, False),  # FlashInfer does not support non-causal
+        ("FLASHINFER", False, True),  # FlashInfer works with causal
+    ],
+)
+def test_non_causal_backend_selection(
+    backend_name: str, use_non_causal: bool, should_succeed: bool
+):
+    """Test that use_non_causal on AttentionConfig controls backend filtering.
+
+    DFlashProposer sets use_non_causal=True on the draft model's
+    AttentionConfig so only non-causal-capable backends are selected.
+    The target model keeps use_non_causal=False (default) and can use
+    any backend.
+    """
+    _cached_get_attn_backend.cache_clear()
+
+    attention_config = AttentionConfig(
+        backend=AttentionBackendEnum[backend_name],
+        use_non_causal=use_non_causal,
+    )
+    cache_config = CacheConfig(block_size=16)
+    vllm_config = VllmConfig(
+        attention_config=attention_config, cache_config=cache_config
+    )
+
+    if CudaPlatform is None:
+        pytest.skip("CudaPlatform not available")
+    with (
+        set_current_vllm_config(vllm_config),
+        patch("vllm.platforms.current_platform", CudaPlatform()),
+    ):
+        if should_succeed:
+            backend = get_attn_backend(
+                head_size=128,
+                dtype=torch.float16,
+                kv_cache_dtype=None,
+            )
+            assert backend.get_name() == backend_name
+        else:
+            with pytest.raises(ValueError) as exc_info:
+                get_attn_backend(
+                    head_size=128,
+                    dtype=torch.float16,
+                    kv_cache_dtype=None,
+                )
+            assert "non-causal" in str(exc_info.value).lower()
+
+
+def test_non_causal_autoselect_backend():
+    """Test that when backend=None with use_non_causal=True, auto-selection
+    picks a compatible backend.
+
+    This simulates the DFlash scenario where the user doesn't specify
+    --attention-backend or --speculative-config.attention_backend.
+    The drafter inherits backend=None and auto-selects a backend that
+    supports non-causal attention.
+    """
+    _cached_get_attn_backend.cache_clear()
+
+    attention_config = AttentionConfig(
+        backend=None,
+        use_non_causal=True,
+    )
+    cache_config = CacheConfig(block_size=16)
+    vllm_config = VllmConfig(
+        attention_config=attention_config, cache_config=cache_config
+    )
+
+    if CudaPlatform is None:
+        pytest.skip("CudaPlatform not available")
+    with (
+        set_current_vllm_config(vllm_config),
+        patch("vllm.platforms.current_platform", CudaPlatform()),
+    ):
+        backend = get_attn_backend(
+            head_size=128,
+            dtype=torch.float16,
+            kv_cache_dtype=None,
+        )
+        assert backend.supports_non_causal()
+
+
+@pytest.mark.parametrize(
+    "kv_cache_dtype",
+    [
+        "fp8_e5m2",
+        "fp8_ds_mla",
+        "fp8_inc",
+        "nvfp4",
+        "fp8_per_token_head",
+        "int8_per_token_head",
+    ],
+)
+def test_flash_attn_rejects_unhandled_kv_cache_dtypes(kv_cache_dtype: str):
+    """FlashAttentionBackend must not claim support for kv_cache dtypes
+    that it cannot handle."""
+    from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
+
+    assert not FlashAttentionBackend.supports_kv_cache_dtype(kv_cache_dtype)
+
+
+@pytest.mark.parametrize("kv_cache_dtype", ["fp8", "fp8_e4m3"])
+def test_flash_attn_accepts_handled_fp8_variants(
+    kv_cache_dtype: str, monkeypatch: pytest.MonkeyPatch
+):
+    """FlashAttentionBackend must accept the two fp8 dtypes it can actually
+    handle: 'fp8' (alias for fp8_e4m3fn) and 'fp8_e4m3'."""
+    import vllm.v1.attention.backends.flash_attn as fa_mod
+    from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
+
+    monkeypatch.setattr(fa_mod.current_platform, "is_xpu", lambda: True)
+    assert FlashAttentionBackend.supports_kv_cache_dtype(kv_cache_dtype)
diff --git a/tests/kernels/attention/test_cache.py b/tests/kernels/attention/test_cache.py
index 0249461dd2fd..9b022a042c81 100644
--- a/tests/kernels/attention/test_cache.py
+++ b/tests/kernels/attention/test_cache.py
@@ -10,7 +10,7 @@
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.quant_utils import scaled_dequantize
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import set_random_seed
+from vllm.utils.torch_utils import nvfp4_kv_cache_split_views, set_random_seed
 
 COPYING_DIRECTION = [("cuda", "cpu"), ("cuda", "cuda"), ("cpu", "cuda")]
 DTYPES = [torch.bfloat16, torch.float]
@@ -172,7 +172,7 @@ def test_reshape_and_cache(
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("seed", SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
-@pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPE)
+@pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPE + ["nvfp4"])
 @pytest.mark.parametrize("kv_cache_layout", CACHE_LAYOUTS)
 @pytest.mark.parametrize("kv_scale_type", KV_SCALE_TYPES)
 @pytest.mark.parametrize("implementation", RESHAPE_FLASH_IMPLEMENTATIONS)
@@ -202,6 +202,25 @@ def test_reshape_and_cache_flash(
     if kv_scale_type == "attn_head" and implementation != "cuda":
         pytest.skip("Only CUDA implementation supports attn_head scaling.")
 
+    if kv_cache_dtype == "nvfp4":
+        if not current_platform.has_device_capability(100):
+            pytest.skip("NVFP4 requires compute capability >= 10.0 (Blackwell).")
+        if implementation != "cuda":
+            pytest.skip("NVFP4 only supports CUDA implementation.")
+        if kv_scale_type != "tensor":
+            pytest.skip("NVFP4 only supports per-tensor scaling.")
+        if head_size % 16 != 0:
+            pytest.skip("NVFP4 requires head_size divisible by 16.")
+        if (head_size // 16) % 4 != 0:
+            pytest.skip(
+                "NVFP4 requires (head_size // 16) divisible by 4 "
+                "for 4x4 block scale swizzle."
+            )
+        if block_size % 4 != 0:
+            pytest.skip("NVFP4 requires block_size divisible by 4.")
+        if dtype not in (torch.float16, torch.bfloat16):
+            pytest.skip("NVFP4 quantization only supports fp16/bf16 input.")
+
     # fp8 conversion requires continugous memory buffer. Reduce the number of
     # blocks and tokens to consume less memory.
     num_tokens = num_tokens // 2
@@ -229,7 +248,23 @@ def test_reshape_and_cache_flash(
     del key_caches
     del value_caches
 
-    if kv_scale_type == "tensor":
+    # For nvfp4, the factory returns kv[:, 0] and kv[:, 1] like all dtypes.
+    # Split views are still needed for dequant verification.
+    key_scale_cache = None
+    value_scale_cache = None
+    nvfp4_key_data = None
+    nvfp4_value_data = None
+    if kv_cache_dtype == "nvfp4":
+        (nvfp4_key_data,), (key_scale_cache,) = nvfp4_kv_cache_split_views(key_cache)
+        (nvfp4_value_data,), (value_scale_cache,) = nvfp4_kv_cache_split_views(
+            value_cache
+        )
+
+    if kv_cache_dtype == "nvfp4":
+        # Global scale = amax / 448 (per-tensor)
+        k_scale = (key.abs().amax() / 448.0).to(torch.float32)
+        v_scale = (value.abs().amax() / 448.0).to(torch.float32)
+    elif kv_scale_type == "tensor":
         k_scale = (key.amax() / 64.0).to(torch.float32)
         v_scale = (value.amax() / 64.0).to(torch.float32)
     else:  # "attn_head"
@@ -240,8 +275,9 @@ def permute_and_compact(x):
         y = x if kv_cache_layout == "NHD" else x.permute(0, 2, 1, 3)
         return y.contiguous()
 
-    key_cache_compact = permute_and_compact(key_cache)
-    value_cache_compact = permute_and_compact(value_cache)
+    if kv_cache_dtype != "nvfp4":
+        key_cache_compact = permute_and_compact(key_cache)
+        value_cache_compact = permute_and_compact(value_cache)
 
     def convert_fp8_local(output, input, scale, kv_dtype):
         fp8_input = input.view(current_platform.fp8_dtype())
@@ -257,7 +293,7 @@ def convert_fp8_local(output, input, scale, kv_dtype):
                 result = fp8_input.to(output.dtype) * scale.view(1, -1, 1, 1)
         output.copy_(result)
 
-    # Clone the KV caches.
+    # Clone the KV caches (for non-nvfp4, used as reference baseline).
     if kv_cache_dtype == "fp8":
         cloned_key_cache = torch.empty_like(key_cache_compact, dtype=torch.float16)
         convert_fp8_local(cloned_key_cache, key_cache_compact, k_scale, kv_cache_dtype)
@@ -265,25 +301,27 @@ def convert_fp8_local(output, input, scale, kv_dtype):
         convert_fp8_local(
             cloned_value_cache, value_cache_compact, v_scale, kv_cache_dtype
         )
-    else:
+    elif kv_cache_dtype != "nvfp4":
         cloned_key_cache = key_cache_compact.clone()
         cloned_value_cache = value_cache_compact.clone()
+
     # Call the reshape_and_cache kernel.
     if implementation == "cuda":
-        opcheck(
-            torch.ops._C_cache_ops.reshape_and_cache_flash,
-            (
-                key,
-                value,
-                key_cache,
-                value_cache,
-                slot_mapping,
-                kv_cache_dtype,
-                k_scale,
-                v_scale,
-            ),
-            cond=(head_size == HEAD_SIZES[0]),
-        )
+        if kv_cache_dtype != "nvfp4":
+            opcheck(
+                torch.ops._C_cache_ops.reshape_and_cache_flash,
+                (
+                    key,
+                    value,
+                    key_cache,
+                    value_cache,
+                    slot_mapping,
+                    kv_cache_dtype,
+                    k_scale,
+                    v_scale,
+                ),
+                cond=(head_size == HEAD_SIZES[0]),
+            )
         ops.reshape_and_cache_flash(
             key,
             value,
@@ -309,6 +347,46 @@ def convert_fp8_local(output, input, scale, kv_dtype):
             k_scale,
             v_scale,
         )
+
+    if kv_cache_dtype == "nvfp4":
+        # Verify NVFP4 by dequantizing the entire cache and comparing
+        # the written positions against original bf16 values.
+        # Same pattern as FP8: dequant whole cache, then extract and compare.
+        from tests.kernels.quantization.nvfp4_utils import (
+            dequant_nvfp4_kv_cache,
+        )
+
+        def dequant_nvfp4_cache_nhd(data_cache, scale_cache, global_scale):
+            # data_cache:  [N, T, H, data_dim]  NHD (contiguous inner dims)
+            # scale_cache: [N, T, H, scale_dim] NHD (contiguous inner dims)
+            # Permute to HND layout for the dequant utility.
+            data_hnd = data_cache.permute(0, 2, 1, 3)
+            scale_hnd = scale_cache.permute(0, 2, 1, 3)
+            result_hnd = dequant_nvfp4_kv_cache(
+                data_hnd, scale_hnd, global_scale, head_size, block_size
+            )
+            return result_hnd.permute(0, 2, 1, 3)  # back to [N, T, H, D]
+
+        result_key_cache = dequant_nvfp4_cache_nhd(
+            nvfp4_key_data, key_scale_cache, k_scale.item()
+        )
+        result_value_cache = dequant_nvfp4_cache_nhd(
+            nvfp4_value_data, value_scale_cache, v_scale.item()
+        )
+
+        # Flatten [num_blocks, block_size] → [num_slots] and index by slot_mapping.
+        num_slots = num_blocks * block_size
+        result_key_flat = result_key_cache.reshape(num_slots, num_heads, head_size)
+        result_value_flat = result_value_cache.reshape(num_slots, num_heads, head_size)
+
+        torch.testing.assert_close(
+            result_key_flat[slot_mapping], key.float(), atol=1.5, rtol=0.5
+        )
+        torch.testing.assert_close(
+            result_value_flat[slot_mapping], value.float(), atol=1.5, rtol=0.5
+        )
+        return
+
     key_cache_compact = permute_and_compact(key_cache)
     value_cache_compact = permute_and_compact(value_cache)
 
diff --git a/tests/kernels/attention/test_cpu_attn.py b/tests/kernels/attention/test_cpu_attn.py
index 7e3d77134600..6af1bfe1e7ac 100644
--- a/tests/kernels/attention/test_cpu_attn.py
+++ b/tests/kernels/attention/test_cpu_attn.py
@@ -20,12 +20,18 @@
     cpu_attn_reshape_and_cache,
 )
 
+# Enable AMX tile data registers so isolated runs (e.g. -k fp8_amx) don't rely
+# on ref_paged_attn's einsum to trigger oneDNN's _init_amx() first.
+if torch.cpu._is_amx_tile_supported():
+    torch.cpu._init_amx()
+
+
 NUM_HEADS = [
     (4, 4),
     (8, 2),
     (9, 3),
 ]
-HEAD_SIZES = [96, 128]
+HEAD_SIZES = [96, 128, 512]
 HEAD_SIZES_VEC16 = [96, 80, 112, 128]
 QTYPES = [torch.bfloat16, torch.half, torch.float32]
 SLIDING_WINDOWS = [None, 256]
@@ -43,15 +49,14 @@ def get_attn_isa(
     block_size: int | None = None,
     dtype: torch.dtype | None = None,
 ):
-    if block_size and dtype:
-        return _get_attn_isa(dtype, block_size)
-    else:
-        if current_platform.get_cpu_architecture() == CpuArchEnum.ARM:
-            return "neon"
-        elif torch.cpu._is_amx_tile_supported():
-            return "amx"
-        else:
-            return "vec"
+    # Delegate to _get_attn_isa so the fallback path applies the same arch
+    # gating (e.g. RISC-V RVV is only chosen when the build's hardcoded
+    # VLEN=128 kernel is actually present; on VLEN=256 / scalar hosts it
+    # correctly falls through to vec/vec16).
+    return _get_attn_isa(
+        dtype if dtype is not None else torch.bfloat16,
+        block_size if block_size else 32,
+    )
 
 
 # rand number generation takes too much time, cache rand tensors
@@ -178,6 +183,10 @@ def ref_paged_attn(
     return torch.cat(outputs, dim=0)
 
 
+_FP8_ATOL = {"fp8_e4m3": 0.2, "fp8_e5m2": 0.3}
+_FP8_RTOL = 0.1
+
+
 @torch.inference_mode()
 def varlen_with_paged_kv(
     seq_lens: list[tuple[int, int]],
@@ -191,6 +200,9 @@ def varlen_with_paged_kv(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str = "auto",
+    k_scale: float = 1.0,
+    v_scale: float = 1.0,
 ) -> None:
     set_random_seed(0)
     num_seqs = len(seq_lens)
@@ -212,6 +224,10 @@ def varlen_with_paged_kv(
         15 * torch.rand((num_query_heads,), dtype=torch.bfloat16) if use_sink else None
     )
 
+    is_fp8 = kv_cache_dtype != "auto"
+    if is_fp8 and current_platform.get_cpu_architecture() != CpuArchEnum.X86:
+        pytest.skip("FP8 KV cache only supported on x86")
+
     query = tensor_cache(
         elem_num=token_num * num_query_heads * head_size,
         dtype=dtype,
@@ -233,11 +249,17 @@ def varlen_with_paged_kv(
         num_kv_heads,
         head_size,
     )
+    if is_fp8:
+        # Clamp KV to [-1, 1] so FP8 quantization error (<=12.5% for E4M3,
+        # <=25% for E5M2) stays within the test tolerances regardless of
+        # which tensor_cache values happen to be in use.
+        key_value = key_value.clamp(-1, 1)
     key_cache, value_cache = key_value.unbind(0)
 
     # KV cache for CPU attention
+    cache_dtype = torch.uint8 if is_fp8 else dtype
     packed_key_cache = torch.empty(
-        num_blocks, num_kv_heads, block_size, head_size, dtype=dtype
+        num_blocks, num_kv_heads, block_size, head_size, dtype=cache_dtype
     )
     packed_value_cache = torch.empty_like(packed_key_cache)
 
@@ -252,6 +274,11 @@ def varlen_with_paged_kv(
 
     # use reshape_and_cache to pack key_cache and value_cache
     slot_mapping = torch.arange(0, num_blocks * block_size, dtype=torch.int64)
+    fp8_kwargs: dict = (
+        dict(k_scale=k_scale, v_scale=v_scale, kv_cache_dtype=kv_cache_dtype)
+        if is_fp8
+        else {}
+    )
     cpu_attn_reshape_and_cache(
         key=key_cache.view(-1, num_kv_heads, head_size),
         value=value_cache.view(-1, num_kv_heads, head_size),
@@ -259,6 +286,7 @@ def varlen_with_paged_kv(
         value_cache=packed_value_cache,
         slot_mapping=slot_mapping,
         isa=isa,
+        **fp8_kwargs,
     )
 
     metadata = cpu_attn_get_scheduler_metadata(
@@ -291,6 +319,7 @@ def varlen_with_paged_kv(
         softcap=soft_cap if soft_cap is not None else 0,
         scheduler_metadata=metadata,
         s_aux=s_aux,
+        **fp8_kwargs,
     )
 
     metadata = cpu_attn_get_scheduler_metadata(
@@ -323,23 +352,59 @@ def varlen_with_paged_kv(
         softcap=soft_cap if soft_cap is not None else 0,
         scheduler_metadata=metadata,
         s_aux=s_aux,
+        **fp8_kwargs,
     )
 
-    ref_output = ref_paged_attn(
-        query=query,
-        key_cache=key_cache,
-        value_cache=value_cache,
-        query_lens=query_lens,
-        kv_lens=kv_lens,
-        block_tables=block_tables,
-        scale=scale,
-        sliding_window=sliding_window,
-        soft_cap=soft_cap,
-        alibi_slopes=alibi_slopes,
-        s_aux=s_aux,
-    )
+    if is_fp8:
+        # Build a float KV cache via the non-FP8 path and run float attention
+        # to use as the reference.
+        ref_key_cache = torch.empty(
+            num_blocks, num_kv_heads, block_size, head_size, dtype=dtype
+        )
+        ref_value_cache = torch.empty_like(ref_key_cache)
+        cpu_attn_reshape_and_cache(
+            key=key_cache.view(-1, num_kv_heads, head_size),
+            value=value_cache.view(-1, num_kv_heads, head_size),
+            key_cache=ref_key_cache,
+            value_cache=ref_value_cache,
+            slot_mapping=slot_mapping,
+            isa=isa,
+        )
+        ref_output = torch.empty_like(query)
+        cpu_attention_with_kv_cache(
+            query=query,
+            key_cache=ref_key_cache,
+            value_cache=ref_value_cache,
+            output=ref_output,
+            query_start_loc=cu_query_lens,
+            seq_lens=kv_lens_tensor,
+            scale=scale,
+            causal=True,
+            alibi_slopes=alibi_slopes,
+            sliding_window=window_size,
+            block_table=block_tables,
+            softcap=soft_cap if soft_cap is not None else 0,
+            scheduler_metadata=metadata,
+            s_aux=s_aux,
+        )
+        atol = _FP8_ATOL[kv_cache_dtype]
+        rtol = _FP8_RTOL
+    else:
+        ref_output = ref_paged_attn(
+            query=query,
+            key_cache=key_cache,
+            value_cache=value_cache,
+            query_lens=query_lens,
+            kv_lens=kv_lens,
+            block_tables=block_tables,
+            scale=scale,
+            sliding_window=sliding_window,
+            soft_cap=soft_cap,
+            alibi_slopes=alibi_slopes,
+            s_aux=s_aux,
+        )
+        atol, rtol = 1.5e-2, 1e-2
 
-    atol, rtol = 1.5e-2, 1e-2
     (
         torch.testing.assert_close(out_with_split, ref_output, atol=atol, rtol=rtol),
         f"{torch.max(torch.abs(out_with_split - ref_output))}",
@@ -350,6 +415,7 @@ def varlen_with_paged_kv(
     )
 
 
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3", "fp8_e5m2"])
 @pytest.mark.parametrize("seq_lens", SEQ_LENS)
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
 @pytest.mark.parametrize("head_size", HEAD_SIZES)
@@ -373,6 +439,7 @@ def test_varlen_with_paged_kv_normal_vec(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str,
 ) -> None:
     varlen_with_paged_kv(
         seq_lens=seq_lens,
@@ -386,9 +453,11 @@ def test_varlen_with_paged_kv_normal_vec(
         use_alibi=use_alibi,
         use_sink=use_sink,
         isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
     )
 
 
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3", "fp8_e5m2"])
 @pytest.mark.parametrize("seq_lens", SEQ_LENS)
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
 @pytest.mark.parametrize("head_size", HEAD_SIZES)
@@ -413,6 +482,7 @@ def test_varlen_with_paged_kv_normal_amx(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str,
 ) -> None:
     varlen_with_paged_kv(
         seq_lens=seq_lens,
@@ -426,6 +496,7 @@ def test_varlen_with_paged_kv_normal_amx(
         use_alibi=use_alibi,
         use_sink=use_sink,
         isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
     )
 
 
@@ -511,6 +582,53 @@ def test_varlen_with_paged_kv_normal_neon(
     )
 
 
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3"])
+@pytest.mark.parametrize("seq_lens", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("block_size", [96, 128])
+@pytest.mark.parametrize("sliding_window", SLIDING_WINDOWS)
+@pytest.mark.parametrize("dtype", QTYPES)
+@pytest.mark.parametrize("soft_cap", [None])
+@pytest.mark.parametrize("num_blocks", NUM_BLOCKS)
+@pytest.mark.parametrize("use_alibi", [False])
+@pytest.mark.parametrize("use_sink", [False])
+@pytest.mark.parametrize("isa", ["rvv"])
+@pytest.mark.skipif(
+    current_platform.get_cpu_architecture() != CpuArchEnum.RISCV,
+    reason="Not a RISC-V CPU.",
+)
+def test_varlen_with_paged_kv_normal_rvv(
+    seq_lens: list[tuple[int, int]],
+    num_heads: tuple[int, int],
+    head_size: int,
+    sliding_window: int | None,
+    dtype: torch.dtype,
+    block_size: int,
+    soft_cap: float | None,
+    num_blocks: int,
+    use_alibi: bool,
+    use_sink: bool,
+    isa: str,
+    kv_cache_dtype: str,
+) -> None:
+    varlen_with_paged_kv(
+        seq_lens=seq_lens,
+        num_heads=num_heads,
+        head_size=head_size,
+        sliding_window=sliding_window,
+        dtype=dtype,
+        block_size=block_size,
+        soft_cap=soft_cap,
+        num_blocks=num_blocks,
+        use_alibi=use_alibi,
+        use_sink=use_sink,
+        isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
+    )
+
+
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3"])
 @pytest.mark.parametrize("seq_lens", SEQ_LENS)
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
 @pytest.mark.parametrize("head_size", [96])
@@ -534,6 +652,7 @@ def test_varlen_with_paged_kv_softcap(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str,
 ) -> None:
     varlen_with_paged_kv(
         seq_lens=seq_lens,
@@ -547,9 +666,11 @@ def test_varlen_with_paged_kv_softcap(
         use_alibi=use_alibi,
         use_sink=use_sink,
         isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
     )
 
 
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3"])
 @pytest.mark.parametrize("seq_lens", SEQ_LENS)
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
 @pytest.mark.parametrize("head_size", [96])
@@ -573,6 +694,7 @@ def test_varlen_with_paged_kv_alibi(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str,
 ) -> None:
     varlen_with_paged_kv(
         seq_lens=seq_lens,
@@ -586,9 +708,11 @@ def test_varlen_with_paged_kv_alibi(
         use_alibi=use_alibi,
         use_sink=use_sink,
         isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
     )
 
 
+@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8_e4m3"])
 @pytest.mark.parametrize("seq_lens", SEQ_LENS)
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
 @pytest.mark.parametrize("head_size", [96])
@@ -612,6 +736,7 @@ def test_varlen_with_paged_kv_sink(
     use_alibi: bool,
     use_sink: bool,
     isa: str,
+    kv_cache_dtype: str,
 ) -> None:
     varlen_with_paged_kv(
         seq_lens=seq_lens,
@@ -625,4 +750,5 @@ def test_varlen_with_paged_kv_sink(
         use_alibi=use_alibi,
         use_sink=use_sink,
         isa=isa,
+        kv_cache_dtype=kv_cache_dtype,
     )
diff --git a/tests/kernels/attention/test_deepgemm_attention.py b/tests/kernels/attention/test_deepgemm_attention.py
index 2dc522598e4e..0cea46d6284f 100644
--- a/tests/kernels/attention/test_deepgemm_attention.py
+++ b/tests/kernels/attention/test_deepgemm_attention.py
@@ -9,8 +9,8 @@
 from vllm.utils.deep_gemm import (
     _ceil_to_ue8m0,
     calc_diff,
-    fp8_mqa_logits,
-    fp8_paged_mqa_logits,
+    fp8_fp4_mqa_logits,
+    fp8_fp4_paged_mqa_logits,
     get_num_sms,
     get_paged_mqa_logits_metadata,
 )
@@ -127,8 +127,8 @@ def test_deepgemm_fp8_mqa_logits(clean_logits: bool):
 
                 q_fp8 = q.to(torch.float8_e4m3fn)
                 kv_fp8 = per_custom_dims_cast_to_fp8(kv, (0,), False)
-                logits = fp8_mqa_logits(
-                    q_fp8, kv_fp8, weights, ks, ke, clean_logits=clean_logits
+                logits = fp8_fp4_mqa_logits(
+                    (q_fp8, None), kv_fp8, weights, ks, ke, clean_logits=clean_logits
                 )
 
                 ref_logits = _ref_fp8_mqa_logits(
@@ -150,7 +150,7 @@ def test_deepgemm_fp8_mqa_logits(clean_logits: bool):
                 assert diff < 1e-3, f"{diff=}"
 
 
-def _ref_fp8_paged_mqa_logits(
+def _ref_fp8_fp4_paged_mqa_logits(
     q: torch.Tensor,
     kv_cache: torch.Tensor,
     weights: torch.Tensor,
@@ -205,8 +205,10 @@ def _ref_fp8_paged_mqa_logits(
 @pytest.mark.skipif(
     not current_platform.has_device_capability(90), reason="SM90 and SM100 only"
 )
-@pytest.mark.parametrize("clean_logits", [True, False])
-def test_deepgemm_fp8_paged_mqa_logits(clean_logits: bool):
+def test_deepgemm_fp8_fp4_paged_mqa_logits():
+    # NOTE: clean_logits=True is incompatible with the 2D context_lens
+    # required by csrc/apis/attention.hpp; only the False path is exercised.
+    clean_logits = False
     torch.manual_seed(0)
     random.seed(0)
 
@@ -258,21 +260,29 @@ def test_deepgemm_fp8_paged_mqa_logits(clean_logits: bool):
                 q_fp8 = q.to(torch.float8_e4m3fn)
                 kv_cache_fp8 = kv_cache_cast_to_fp8(kv_cache)
 
+                # deep_gemm paged MQA logits requires 2D context_lens of
+                # shape (B, next_n) (csrc/apis/attention.hpp:332-335);
+                # see indexer.py:607-608. For each batch/next_n token, the
+                # effective context length is context_lens[b] - next_n + j + 1.
+                next_n_arange = torch.arange(next_n, device="cuda", dtype=torch.int32)
+                context_lens_2d = (
+                    context_lens.unsqueeze(-1) - next_n + 1 + next_n_arange
+                ).contiguous()
                 schedule_metadata = get_paged_mqa_logits_metadata(
-                    context_lens, blocksize, get_num_sms()
+                    context_lens_2d, blocksize, get_num_sms()
                 )
-                logits = fp8_paged_mqa_logits(
-                    q_fp8,
+                logits = fp8_fp4_paged_mqa_logits(
+                    (q_fp8, None),
                     kv_cache_fp8,
                     weights,
-                    context_lens,
+                    context_lens_2d,
                     block_tables,
                     schedule_metadata,
                     max_model_len,
                     clean_logits=clean_logits,
                 )
 
-                ref_logits = _ref_fp8_paged_mqa_logits(
+                ref_logits = _ref_fp8_fp4_paged_mqa_logits(
                     q,
                     kv_cache,
                     weights,
diff --git a/tests/kernels/attention/test_flashinfer_trtllm_attention.py b/tests/kernels/attention/test_flashinfer_trtllm_attention.py
index b5f8584015be..87a12c2ff395 100644
--- a/tests/kernels/attention/test_flashinfer_trtllm_attention.py
+++ b/tests/kernels/attention/test_flashinfer_trtllm_attention.py
@@ -5,12 +5,17 @@
 import torch
 
 from tests.kernels.quantization.nvfp4_utils import (
+    dequant_nvfp4_kv_cache,
     dequantize_nvfp4_to_dtype,
     get_nvfp4_global_scale,
 )
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import round_up
-from vllm.utils.torch_utils import set_random_seed
+from vllm.utils.torch_utils import (
+    nvfp4_kv_cache_full_dim,
+    nvfp4_kv_cache_split_views,
+    set_random_seed,
+)
 
 if not current_platform.is_device_capability_family(100):
     pytest.skip(
@@ -33,6 +38,117 @@ def to_float8(x, dtype=torch.float8_e4m3fn):
     return x_scl_sat.to(dtype), scale.float().reciprocal()
 
 
+def build_paged_kv_metadata(
+    seq_lens: torch.Tensor,
+    block_tables: torch.Tensor,
+    block_size: int,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Build paged-KV indptr/indices/last_page_lens from seq_lens + block_tables."""
+    kv_indptr = [0]
+    kv_indices = []
+    kv_last_page_lens = []
+    for i in range(len(seq_lens)):
+        sl = int(seq_lens[i])
+        assert sl > 0
+        nb = (sl + block_size - 1) // block_size
+        kv_indices.extend(block_tables[i, :nb].tolist())
+        kv_indptr.append(kv_indptr[-1] + nb)
+        kv_last_page_lens.append(sl % block_size or block_size)
+    return (
+        torch.tensor(kv_indptr, dtype=torch.int32),
+        torch.tensor(kv_indices, dtype=torch.int32),
+        torch.tensor(kv_last_page_lens, dtype=torch.int32),
+    )
+
+
+def make_nvfp4_kv_cache(
+    kv_bf16_hnd: torch.Tensor, block_size: int, head_size: int
+) -> tuple:
+    """Quantize bf16 KV cache to nvfp4 via reshape_and_cache_flash.
+
+    Returns (k_data, v_data), (k_scales, v_scales), kv_scale, ref_kv_bf16.
+    """
+    num_blocks, _, num_kv_heads, _, _ = kv_bf16_hnd.shape
+    kv_scale_val = (kv_bf16_hnd.abs().amax() / 448.0).item()
+    kv_scale_tensor = torch.tensor(
+        kv_scale_val, dtype=torch.float32, device=kv_bf16_hnd.device
+    )
+
+    # Allocate in HND physical order, permute to NHD logical order.
+    # hnd_order swaps dims 2↔3; it is its own inverse.
+    full_dim = nvfp4_kv_cache_full_dim(head_size)
+    hnd_order = (0, 1, 3, 2, 4)
+    kv_cache = torch.zeros(
+        (num_blocks, 2, num_kv_heads, block_size, full_dim),
+        dtype=torch.uint8,
+        device=kv_bf16_hnd.device,
+    ).permute(*hnd_order)
+
+    # Flatten NHD [N, T, H, D] → token tensors [N*T, H, D] for the kernel.
+    num_tokens = num_blocks * block_size
+    k_tokens = (
+        kv_bf16_hnd[:, 0]
+        .permute(0, 2, 1, 3)
+        .reshape(num_tokens, num_kv_heads, head_size)
+    )
+    v_tokens = (
+        kv_bf16_hnd[:, 1]
+        .permute(0, 2, 1, 3)
+        .reshape(num_tokens, num_kv_heads, head_size)
+    )
+    slot_mapping = torch.arange(num_tokens, dtype=torch.long, device=kv_bf16_hnd.device)
+
+    # reshape_and_cache_flash: kernel receives kv_cache[:, 0] and [:, 1]
+    # (full K/V buffers containing both data and scale).
+    torch.ops._C_cache_ops.reshape_and_cache_flash(
+        k_tokens,
+        v_tokens,
+        kv_cache[:, 0],
+        kv_cache[:, 1],
+        slot_mapping,
+        "nvfp4",
+        kv_scale_tensor,
+        kv_scale_tensor,
+    )
+
+    # Split in HND order for trtllm kernel (expects HND numTokensPerPage).
+    kv_cache_hnd = kv_cache.permute(*hnd_order)
+    (k_data, v_data), (k_scales, v_scales) = nvfp4_kv_cache_split_views(kv_cache_hnd)
+
+    # Dequantize for the FA2 reference baseline.
+    ref_k = dequant_nvfp4_kv_cache(
+        k_data, k_scales, kv_scale_val, head_size, block_size
+    ).to(torch.bfloat16)
+    ref_v = dequant_nvfp4_kv_cache(
+        v_data, v_scales, kv_scale_val, head_size, block_size
+    ).to(torch.bfloat16)
+    ref_kv_bf16 = torch.stack([ref_k, ref_v], dim=1)  # [N, 2, H, T, D]
+
+    return (k_data, v_data), (k_scales, v_scales), kv_scale_val, ref_kv_bf16
+
+
+def make_quantized_kv_cache(
+    kv_cache: torch.Tensor,
+    kv_quant_dtype: torch.dtype,
+    block_size: int,
+    head_size: int,
+) -> tuple:
+    """Quantize kv_cache based on dtype. Returns (kv_cache, kv_cache_sf,
+    kv_scale, ref_kv_cache, is_nvfp4_kv)."""
+    is_nvfp4_kv = kv_quant_dtype == FP4_DTYPE
+    if is_nvfp4_kv:
+        data, scales, kv_scale, ref = make_nvfp4_kv_cache(
+            kv_cache, block_size, head_size
+        )
+        return data, scales, kv_scale, ref, True
+    elif kv_quant_dtype == FP8_DTYPE:
+        kv_fp8, kv_scale = to_float8(kv_cache)
+        ref = kv_fp8.to(kv_cache.dtype) * kv_scale
+        return kv_fp8, None, kv_scale, ref, False
+    else:
+        return kv_cache, None, 1.0, kv_cache, False
+
+
 DTYPE = [torch.bfloat16]
 QUANT_DTYPES = [
     # (q_quant_dtype, kv_quant_dtype, o_quant_dtype)
@@ -41,6 +157,7 @@ def to_float8(x, dtype=torch.float8_e4m3fn):
     (FP8_DTYPE, FP8_DTYPE, None),
     (FP8_DTYPE, FP8_DTYPE, FP8_DTYPE),
     (FP8_DTYPE, FP8_DTYPE, FP4_DTYPE),
+    (FP8_DTYPE, FP4_DTYPE, FP8_DTYPE),  # nvfp4 KV cache
 ]
 BATCH_SIZE = [4, 12]
 MAX_SEQ_LENS = [(1024, 4096)]
@@ -127,35 +244,19 @@ def test_flashinfer_trtllm_decode_with_baseline(
     max_seq_len = torch.max(seq_lens).item()
 
     kv_cache = torch.randn(kv_cache_shape, dtype=dtype)
-    if kv_quant_dtype == FP8_DTYPE:
-        kv_cache, kv_scale = to_float8(kv_cache)
-        ref_kv_cache = kv_cache.to(dtype) * kv_scale
-    else:
-        kv_scale = 1.0
-        ref_kv_cache = kv_cache
+    kv_cache, kv_cache_sf, kv_scale, ref_kv_cache, is_nvfp4_kv = (
+        make_quantized_kv_cache(kv_cache, kv_quant_dtype, block_size, head_size)
+    )
+
     k_scale = v_scale = kv_scale
 
     max_num_blocks_per_seq = (max_seq_len + block_size - 1) // block_size
     block_tables = torch.randint(
         0, NUM_BLOCKS, (batch_size, max_num_blocks_per_seq), dtype=torch.int32
     )
-    kv_indptr = [0]
-    kv_indices = []
-    kv_last_page_lens = []
-    for i in range(batch_size):
-        seq_len = seq_lens[i]
-        assert seq_len > 0
-        num_blocks = (seq_len + block_size - 1) // block_size
-        kv_indices.extend(block_tables[i, :num_blocks])
-        kv_indptr.append(kv_indptr[-1] + num_blocks)
-        kv_last_page_len = seq_len % block_size
-        if kv_last_page_len == 0:
-            kv_last_page_len = block_size
-        kv_last_page_lens.append(kv_last_page_len)
-
-    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
-    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
-    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
+    kv_indptr, kv_indices, kv_last_page_lens = build_paged_kv_metadata(
+        seq_lens, block_tables, block_size
+    )
     workspace_buffer = torch.zeros(128 * 1024 * 1024, dtype=torch.int8)
 
     # Baseline Decode
@@ -225,6 +326,7 @@ def test_flashinfer_trtllm_decode_with_baseline(
         sinks=sinks,
         o_sf_scale=o_sf_scale_float,
         out=output_trtllm,
+        kv_cache_sf=kv_cache_sf,
     )
     if o_quant_dtype == FP8_DTYPE:
         output_trtllm = output_trtllm.to(dtype) * o_scale
@@ -237,7 +339,9 @@ def test_flashinfer_trtllm_decode_with_baseline(
         )
         output_trtllm = output_trtllm.reshape(-1, query.shape[1], query.shape[2])
 
-    if q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP4_DTYPE:
+    if is_nvfp4_kv:
+        rtol, atol = 1.0, 1.0  # nvfp4 has higher quantization error
+    elif q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP4_DTYPE:
         rtol, atol = 7e-2, 9e-2
     elif q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP8_DTYPE:
         rtol, atol = 3e-2, 4e-2
@@ -287,7 +391,12 @@ def test_flashinfer_trtllm_prefill_with_baseline(
     kv_quant_dtype = kv_quant_dtype or dtype
     o_quant_dtype = o_quant_dtype or dtype
 
-    if q_quant_dtype != kv_quant_dtype:
+    # FP8 Q + nvfp4 KV is the required combination for the nvfp4 KV path.
+    # All other mixed Q/KV dtype combinations are unsupported.
+    is_nvfp4_kv = kv_quant_dtype == FP4_DTYPE
+    if q_quant_dtype != kv_quant_dtype and not (
+        q_quant_dtype == FP8_DTYPE and is_nvfp4_kv
+    ):
         pytest.skip("Skipped mixed QKV dtypes for prefill")
 
     max_q_len, max_kv_len = max_seq_lens
@@ -329,35 +438,19 @@ def test_flashinfer_trtllm_prefill_with_baseline(
     max_seq_len = torch.max(seq_lens).item()
 
     kv_cache = torch.randn(kv_cache_shape, dtype=dtype)
-    if kv_quant_dtype == FP8_DTYPE:
-        kv_cache, kv_scale = to_float8(kv_cache)
-        ref_kv_cache = kv_cache.to(dtype) * kv_scale
-    else:
-        kv_scale = 1.0
-        ref_kv_cache = kv_cache
+    kv_cache, kv_cache_sf, kv_scale, ref_kv_cache, is_nvfp4_kv = (
+        make_quantized_kv_cache(kv_cache, kv_quant_dtype, block_size, head_size)
+    )
+
     k_scale = v_scale = kv_scale
 
     max_num_blocks_per_seq = (max_seq_len + block_size - 1) // block_size
     block_tables = torch.randint(
         0, NUM_BLOCKS, (batch_size, max_num_blocks_per_seq), dtype=torch.int32
     )
-    kv_indptr = [0]
-    kv_indices = []
-    kv_last_page_lens = []
-    for i in range(batch_size):
-        seq_len = seq_lens[i]
-        assert seq_len > 0
-        num_blocks = (seq_len + block_size - 1) // block_size
-        kv_indices.extend(block_tables[i, :num_blocks])
-        kv_indptr.append(kv_indptr[-1] + num_blocks)
-        kv_last_page_len = seq_len % block_size
-        if kv_last_page_len == 0:
-            kv_last_page_len = block_size
-        kv_last_page_lens.append(kv_last_page_len)
-
-    kv_indptr = torch.tensor(kv_indptr, dtype=torch.int32)
-    kv_indices = torch.tensor(kv_indices, dtype=torch.int32)
-    kv_last_page_lens = torch.tensor(kv_last_page_lens, dtype=torch.int32)
+    kv_indptr, kv_indices, kv_last_page_lens = build_paged_kv_metadata(
+        seq_lens, block_tables, block_size
+    )
     workspace_buffer = torch.zeros(128 * 1024 * 1024, dtype=torch.int8)
 
     # Baseline Prefill
@@ -431,6 +524,7 @@ def test_flashinfer_trtllm_prefill_with_baseline(
         sinks=sinks,
         o_sf_scale=o_sf_scale_float,
         out=output_trtllm,
+        kv_cache_sf=kv_cache_sf,
     )
     if o_quant_dtype == FP8_DTYPE:
         output_trtllm = output_trtllm.to(dtype) * o_scale
@@ -443,7 +537,9 @@ def test_flashinfer_trtllm_prefill_with_baseline(
         )
         output_trtllm = output_trtllm.reshape(-1, query.shape[1], query.shape[2])
 
-    if q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP4_DTYPE:
+    if is_nvfp4_kv:
+        rtol, atol = 1.0, 1.5  # nvfp4 has higher quantization error
+    elif q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP4_DTYPE:
         rtol, atol = 3e-1, 4e-1
     elif q_quant_dtype == FP8_DTYPE and o_quant_dtype == FP8_DTYPE:
         rtol, atol = 4e-2, 6e-2
diff --git a/tests/kernels/attention/test_lightning_attn.py b/tests/kernels/attention/test_lightning_attn.py
index 37fd85ccec04..46757cc10b6a 100644
--- a/tests/kernels/attention/test_lightning_attn.py
+++ b/tests/kernels/attention/test_lightning_attn.py
@@ -122,8 +122,6 @@ def test_linear_decode_forward_triton(
     dtype: torch.dtype,
 ):
     torch.set_default_device("cuda")
-    torch.manual_seed(42)
-    torch.cuda.manual_seed_all(42)
     set_random_seed(42)
     base = 0.01
     q = base * torch.randn(batch_size, num_heads, 1, head_size, dtype=dtype)
@@ -165,8 +163,6 @@ def test_linear_decode_forward_triton_with_padding(
     dtype: torch.dtype,
 ):
     torch.set_default_device("cuda")
-    torch.manual_seed(42)
-    torch.cuda.manual_seed_all(42)
     set_random_seed(42)
 
     batch_size = 4
@@ -229,8 +225,6 @@ def test_lightning_attention_reference(
     dtype: torch.dtype,
 ):
     torch.set_default_device("cuda")
-    torch.manual_seed(42)
-    torch.cuda.manual_seed_all(42)
     set_random_seed(42)
 
     base = 0.01
diff --git a/tests/kernels/attention/test_merge_attn_states.py b/tests/kernels/attention/test_merge_attn_states.py
index 6fccb8ccfede..40af84887a99 100644
--- a/tests/kernels/attention/test_merge_attn_states.py
+++ b/tests/kernels/attention/test_merge_attn_states.py
@@ -4,7 +4,12 @@
 import pytest
 import torch
 
-from vllm._custom_ops import merge_attn_states as merge_attn_states_cuda
+from vllm._custom_ops import (
+    merge_attn_states as merge_attn_states_cuda,
+)
+from vllm._custom_ops import (
+    scaled_fp8_quant,
+)
 from vllm.platforms import current_platform
 from vllm.v1.attention.ops.triton_merge_attn_states import (
     merge_attn_states as merge_attn_states_triton,
@@ -20,7 +25,12 @@ def merge_attn_states_torch(
     suffix_output: torch.Tensor,  # [NUM_TOKENS, NUM_HEADS, HEAD_SIZE]
     suffix_lse: torch.Tensor,  # [NUM_HEADS, NUM_TOKENS]
     output_lse: torch.Tensor | None = None,  # [NUM_HEADS, NUM_TOKENS]
+    prefill_tokens_with_context: int | None = None,
+    output_scale: torch.Tensor | None = None,  # scalar, per-tensor FP8 scale
 ):
+    # Apply prefill_tokens_with_context mask if needed
+    if prefill_tokens_with_context is None:
+        prefill_tokens_with_context = output.shape[0]
     p_lse = prefix_lse
     s_lse = suffix_lse
     # inf -> -inf
@@ -28,6 +38,9 @@ def merge_attn_states_torch(
     s_lse[s_lse == torch.inf] = -torch.inf
     # max_lse [NUM_HEADS, NUM_TOKENS]
     max_lse = torch.maximum(p_lse, s_lse)
+
+    mask = torch.ones((prefix_lse.shape[1], 1, 1), device=p_lse.device)
+    mask[prefill_tokens_with_context:].fill_(0)
     p_lse = p_lse - max_lse
     s_lse = s_lse - max_lse
     p_lse_exp = torch.exp(p_lse)
@@ -35,11 +48,20 @@ def merge_attn_states_torch(
     out_se = p_lse_exp + s_lse_exp
     if output_lse is not None:
         output_lse = torch.log(out_se) + max_lse
+        output_lse[prefill_tokens_with_context:] = suffix_lse[
+            prefill_tokens_with_context:
+        ]
     p_scale = p_lse_exp / out_se  # [NUM_HEADS, NUM_TOKENS]
     s_scale = s_lse_exp / out_se  # [NUM_HEADS, NUM_TOKENS]
     p_scale = torch.transpose(p_scale, 0, 1).unsqueeze(2)  # [NUM_TOKENS, NUM_HEADS, 1]
     s_scale = torch.transpose(s_scale, 0, 1).unsqueeze(2)  # [NUM_TOKENS, NUM_HEADS, 1]
-    output = prefix_output * p_scale + suffix_output * s_scale
+    output = prefix_output * p_scale * mask + suffix_output * (
+        s_scale * mask + (1 - mask)
+    )
+    if output_scale is not None:
+        shape = output.shape
+        output, _ = scaled_fp8_quant(output.float().view(-1, shape[-1]), output_scale)
+        output = output.view(shape)
     return output, output_lse
 
 
@@ -90,13 +112,20 @@ def shortly_device(device: str) -> str:
         )
 
 
+@pytest.mark.parametrize("use_fp8", [False, True])
+@pytest.mark.parametrize("prefill_tokens_with_context", [None, 128])
 @pytest.mark.parametrize("num_tokens", NUM_BATCH_TOKENS)
 @pytest.mark.parametrize("num_query_heads", NUM_QUERY_HEADS)
 @pytest.mark.parametrize("head_size", HEAD_SIZES)
-@pytest.mark.parametrize("output_dtype", DTYPES)
+@pytest.mark.parametrize("input_dtype", DTYPES)
 @torch.inference_mode()
 def test_merge_attn_states(
-    num_tokens: int, num_query_heads: int, head_size: int, output_dtype: torch.dtype
+    prefill_tokens_with_context: int | None,
+    num_tokens: int,
+    num_query_heads: int,
+    head_size: int,
+    input_dtype: torch.dtype,
+    use_fp8: bool,
 ):
     if not current_platform.is_cuda():
         pytest.skip(
@@ -108,9 +137,19 @@ def test_merge_attn_states(
     NUM_HEADS = num_query_heads
     HEAD_SIZE = head_size
 
+    # When use_fp8 is set, inputs stay as input_dtype (bf16/fp16/fp32)
+    # and output becomes FP8.
+    output_dtype = input_dtype
+    output_scale = None
+    if use_fp8:
+        output_dtype = current_platform.fp8_dtype()
+        output_scale = torch.tensor([0.05], dtype=torch.float32, device="cuda")
+
     print(
         f"\nNUM_TOKENS:{NUM_TOKENS}, NUM_HEADS:{NUM_HEADS}, "
-        f"HEAD_SIZE:{HEAD_SIZE}, DTYPE: {output_dtype}, "
+        f"HEAD_SIZE:{HEAD_SIZE}, input_dtype: {input_dtype}, "
+        f"output_dtype: {output_dtype}, use_fp8: {use_fp8}, "
+        f"prefill_tokens_with_context: {prefill_tokens_with_context}, "
         f"Device: {current_platform.get_device_name()}"
     )
 
@@ -138,10 +177,10 @@ def test_merge_attn_states(
         (NUM_HEADS, NUM_TOKENS), dtype=torch.float32, device="cuda"
     )
     prefix_output = torch.randn(
-        (NUM_TOKENS, NUM_HEADS, HEAD_SIZE), dtype=output_dtype, device="cuda"
+        (NUM_TOKENS, NUM_HEADS, HEAD_SIZE), dtype=input_dtype, device="cuda"
     )
     suffix_output = torch.randn(
-        (NUM_TOKENS, NUM_HEADS, HEAD_SIZE), dtype=output_dtype, device="cuda"
+        (NUM_TOKENS, NUM_HEADS, HEAD_SIZE), dtype=input_dtype, device="cuda"
     )
 
     warmup_times = 2
@@ -164,6 +203,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse_torch,
             output_lse_torch,
+            prefill_tokens_with_context,
+            output_scale,
         )
     torch.accelerator.synchronize()
 
@@ -176,6 +217,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse_torch,
             output_lse_torch,
+            prefill_tokens_with_context,
+            output_scale,
         )
         end.record()
         torch.accelerator.synchronize()
@@ -199,6 +242,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse,
             output_lse_ref_triton,
+            prefill_tokens_with_context,
+            output_scale,
         )
     torch.accelerator.synchronize()
 
@@ -211,6 +256,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse,
             output_lse_ref_triton,
+            prefill_tokens_with_context,
+            output_scale,
         )
         end.record()
         torch.accelerator.synchronize()
@@ -231,6 +278,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse,
             output_lse_cuda,
+            prefill_tokens_with_context,
+            output_scale,
         )
     torch.accelerator.synchronize()
 
@@ -243,6 +292,8 @@ def test_merge_attn_states(
             suffix_output,
             suffix_lse,
             output_lse_cuda,
+            prefill_tokens_with_context,
+            output_scale,
         )
         end.record()
         torch.accelerator.synchronize()
@@ -264,7 +315,19 @@ def test_merge_attn_states(
     # Liger Kernel: Efficient Triton Kernels for LLM Training
     # https://arxiv.org/pdf/2410.10989, 3.3 Correctness
     # use rtol = 1e-2 for bfloat16.
-    rtol = 1e-2 if output_dtype == torch.bfloat16 else 1e-3
+    if use_fp8:
+        # Compare in dequantized space (multiply back by scale) so that
+        # absolute differences reflect real precision, not amplified FP8
+        # quantization steps.
+        atol, rtol = 1e-1, 1e-1
+        assert output_scale is not None
+        scale = output_scale.item()
+    elif output_dtype == torch.bfloat16:
+        atol, rtol = 1e-3, 1e-2
+        scale = 1.0
+    else:
+        atol, rtol = 1e-3, 1e-3
+        scale = 1.0
 
     def diff(a: torch.Tensor, b: torch.Tensor):
         max_diff = torch.max(torch.abs(a.float() - b.float()))
@@ -276,16 +339,26 @@ def diff(a: torch.Tensor, b: torch.Tensor):
     output_ref = output_ref_triton
     output_lse_ref = output_lse_ref_triton
     torch.testing.assert_close(
-        output_cuda.float(), output_ref.float(), atol=1e-3, rtol=rtol
+        output_cuda.float() * scale,
+        output_ref.float() * scale,
+        atol=atol,
+        rtol=rtol,
+    )
+    print(
+        "Output all match, max abs diff (dequantized):"
+        if use_fp8
+        else "Output all match, max abs diff:"
     )
-    print("Output all match, max abs diff:")
-    print(f"(Triton vs Torch) : {diff(output_torch, output_ref)}")
-    print(f"  (CUDA vs Torch) : {diff(output_torch, output_cuda)}")
-    print(f"  (CUDA vs Triton): {diff(output_ref, output_cuda)}")
+    _diff = diff(output_ref.float() * scale, output_torch.float() * scale)
+    print(f"(Triton vs Torch) : {_diff}")
+    _diff = diff(output_torch.float() * scale, output_cuda.float() * scale)
+    print(f"  (CUDA vs Torch) : {_diff}")
+    _diff = diff(output_ref.float() * scale, output_cuda.float() * scale)
+    print(f"  (CUDA vs Triton): {_diff}")
     print("-" * 100)
 
     torch.testing.assert_close(
-        output_lse_cuda.float(), output_lse_ref.float(), atol=1e-3, rtol=rtol
+        output_lse_cuda.float(), output_lse_ref.float(), atol=atol, rtol=rtol
     )
     print("Output LSE all match, max abs diff:")
     print(f"(Triton vs Torch) : {diff(output_lse_torch, output_lse_ref)}")
diff --git a/tests/kernels/attention/test_rocm_triton_attn_dsv4.py b/tests/kernels/attention/test_rocm_triton_attn_dsv4.py
new file mode 100644
index 000000000000..d4fa9697cb7f
--- /dev/null
+++ b/tests/kernels/attention/test_rocm_triton_attn_dsv4.py
@@ -0,0 +1,377 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+
+pytestmark = pytest.mark.skipif(
+    not current_platform.is_rocm(), reason="Only used by ROCm"
+)
+
+NOPE_HEAD_DIM = 448
+ROPE_HEAD_DIM = 64
+HEAD_DIM = NOPE_HEAD_DIM + ROPE_HEAD_DIM
+
+
+def _ref_global_topk_ragged(
+    topk_indices: torch.Tensor,
+    token_to_req_indices: torch.Tensor,
+    block_table: torch.Tensor,
+    block_size: int,
+    is_valid_token: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    topk = topk_indices.reshape(topk_indices.shape[0], -1)
+    valid = (topk >= 0) & is_valid_token[:, None]
+    lens = valid.sum(dim=1, dtype=torch.int32)
+    indptr = torch.zeros(lens.shape[0] + 1, dtype=torch.int32, device=topk.device)
+    torch.cumsum(lens, dim=0, out=indptr[1:])
+
+    safe_topk = torch.clamp(topk, min=0)
+    block_indices = safe_topk // block_size
+    block_offsets = safe_topk % block_size
+    req_indices = token_to_req_indices[:, None].expand_as(topk)
+    slot_ids = block_table[req_indices, block_indices] * block_size + block_offsets
+
+    offsets = torch.arange(topk.shape[1], dtype=torch.int32, device=topk.device)
+    positions = indptr[:-1, None] + offsets[None, :]
+    return slot_ids[valid], positions[valid].to(torch.long), indptr, lens
+
+
+def _ref_sparse_prefill_ragged(
+    q: torch.Tensor,
+    kv: torch.Tensor,
+    rows: list[list[int]],
+    scale: float,
+    attn_sink: torch.Tensor | None,
+) -> torch.Tensor:
+    q_f32 = q.float()
+    kv_f32 = kv.float()
+    out = torch.empty_like(q_f32)
+
+    for query_idx in range(q.shape[0]):
+        row_indices = rows[query_idx]
+        for head_idx in range(q.shape[1]):
+            if row_indices:
+                selected_kv = kv_f32[row_indices]
+                scores = torch.mv(selected_kv, q_f32[query_idx, head_idx]) * scale
+                if attn_sink is not None:
+                    scores_with_sink = torch.cat(
+                        [scores, attn_sink[head_idx].float().reshape(1)]
+                    )
+                    probs = torch.softmax(scores_with_sink, dim=0)[:-1]
+                else:
+                    probs = torch.softmax(scores, dim=0)
+                out[query_idx, head_idx] = torch.sum(
+                    probs[:, None] * selected_kv, dim=0
+                )
+            else:
+                out[query_idx, head_idx] = 0
+    return out.to(torch.bfloat16)
+
+
+def _pack_fp8_ds_mla_cache(kv: torch.Tensor, block_size: int) -> torch.Tensor:
+    assert kv.shape[-1] == HEAD_DIM
+    num_tokens = kv.shape[0]
+    num_blocks = (num_tokens + block_size - 1) // block_size
+    cache = torch.zeros(
+        (num_blocks, block_size, 584),
+        dtype=torch.uint8,
+        device=kv.device,
+    )
+    cache_flat = cache.view(torch.uint8).flatten()
+    kv_nope_fp8 = (
+        kv[:, :NOPE_HEAD_DIM].to(current_platform.fp8_dtype()).view(torch.uint8)
+    )
+    kv_rope_u8 = kv[:, NOPE_HEAD_DIM:].contiguous().view(torch.uint8)
+
+    for slot in range(num_tokens):
+        block_idx = slot // block_size
+        pos = slot % block_size
+        block_base = block_idx * cache.stride(0)
+        token_base = block_base + pos * 576
+        scale_base = block_base + block_size * 576 + pos * 8
+        cache_flat[token_base : token_base + NOPE_HEAD_DIM].copy_(kv_nope_fp8[slot])
+        cache_flat[
+            token_base + NOPE_HEAD_DIM : token_base + NOPE_HEAD_DIM + ROPE_HEAD_DIM * 2
+        ].copy_(kv_rope_u8[slot])
+        cache_flat[scale_base : scale_base + 7].fill_(127)
+    return cache
+
+
+def _read_fp8_ds_mla_cache(
+    cache: torch.Tensor, slot: int, block_size: int
+) -> torch.Tensor:
+    cache_flat = cache.view(torch.uint8).flatten()
+    block_idx = slot // block_size
+    pos = slot % block_size
+    block_base = block_idx * cache.stride(0)
+    token_base = block_base + pos * 576
+
+    nope_u8 = cache_flat[token_base : token_base + NOPE_HEAD_DIM]
+    nope = nope_u8.view(current_platform.fp8_dtype()).to(torch.float32)
+    rope_u8 = cache_flat[
+        token_base + NOPE_HEAD_DIM : token_base + NOPE_HEAD_DIM + ROPE_HEAD_DIM * 2
+    ]
+    rope = rope_u8.view(torch.bfloat16).to(torch.float32)
+    return torch.cat([nope, rope])
+
+
+def _ref_sparse_decode_ragged(
+    q: torch.Tensor,
+    main_cache: torch.Tensor,
+    main_rows: list[list[int]],
+    scale: float,
+    attn_sink: torch.Tensor | None,
+    block_size: int,
+    extra_cache: torch.Tensor | None = None,
+    extra_rows: list[list[int]] | None = None,
+) -> torch.Tensor:
+    q_f32 = q.float()
+    out = torch.empty_like(q_f32)
+
+    for query_idx in range(q.shape[0]):
+        row_kv = [
+            _read_fp8_ds_mla_cache(main_cache, int(slot), block_size)
+            for slot in main_rows[query_idx]
+        ]
+        if extra_cache is not None and extra_rows is not None:
+            row_kv.extend(
+                _read_fp8_ds_mla_cache(extra_cache, int(slot), block_size)
+                for slot in extra_rows[query_idx]
+            )
+
+        kv = torch.stack(row_kv).to(q.device)
+        for head_idx in range(q.shape[1]):
+            scores = torch.mv(kv, q_f32[query_idx, head_idx]) * scale
+            if attn_sink is not None:
+                scores_with_sink = torch.cat(
+                    [scores, attn_sink[head_idx].float().reshape(1)]
+                )
+                probs = torch.softmax(scores_with_sink, dim=0)[:-1]
+            else:
+                probs = torch.softmax(scores, dim=0)
+            out[query_idx, head_idx] = torch.sum(probs[:, None] * kv, dim=0)
+    return out.to(torch.bfloat16)
+
+
+def _ref_combine_topk_swa_ragged(
+    device: torch.device,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    expected_ragged = torch.tensor(
+        [
+            100,
+            101,
+            7,
+            8,
+            9,
+            110,
+            111,
+            8,
+            9,
+            10,
+            120,
+            121,
+            122,
+            9,
+            10,
+            11,
+            150,
+            27,
+            28,
+            29,
+            160,
+            161,
+            28,
+            29,
+            30,
+        ],
+        dtype=torch.int32,
+        device=device,
+    )
+    expected_lens = torch.tensor([5, 5, 6, 4, 5], dtype=torch.int32, device=device)
+    expected_indptr = torch.zeros(6, dtype=torch.int32, device=device)
+    torch.cumsum(expected_lens, dim=0, out=expected_indptr[1:])
+    return expected_ragged, expected_indptr, expected_lens
+
+
+@torch.inference_mode()
+def test_compute_global_topk_ragged_indices_and_indptr() -> None:
+    from vllm.models.deepseek_v4.amd.rocm import (
+        compute_global_topk_ragged_indices_and_indptr,
+    )
+
+    device = torch.device("cuda")
+    block_size = 4
+    topk_indices = torch.tensor(
+        [
+            [0, 3, 4, -1],
+            [5, 8, -1, -1],
+            [2, 7, 9, -1],
+        ],
+        dtype=torch.int32,
+        device=device,
+    )
+    token_to_req_indices = torch.tensor([0, 1, 1], dtype=torch.int32, device=device)
+    block_table = torch.tensor(
+        [
+            [10, 11, 12],
+            [20, 21, 22],
+        ],
+        dtype=torch.int32,
+        device=device,
+    )
+    is_valid_token = torch.tensor([True, False, True], dtype=torch.bool, device=device)
+
+    actual_ragged, actual_indptr, actual_lens = (
+        compute_global_topk_ragged_indices_and_indptr(
+            topk_indices,
+            token_to_req_indices,
+            block_table,
+            block_size,
+            is_valid_token,
+        )
+    )
+    expected_values, expected_positions, expected_indptr, expected_lens = (
+        _ref_global_topk_ragged(
+            topk_indices,
+            token_to_req_indices,
+            block_table,
+            block_size,
+            is_valid_token,
+        )
+    )
+
+    torch.testing.assert_close(actual_ragged[expected_positions], expected_values)
+    torch.testing.assert_close(actual_indptr, expected_indptr)
+    torch.testing.assert_close(actual_lens, expected_lens)
+
+
+@torch.inference_mode()
+def test_sparse_attn_prefill_ragged_kernel() -> None:
+    from vllm.v1.attention.ops.rocm_aiter_mla_sparse import (
+        _rocm_sparse_attn_prefill_ragged_triton,
+    )
+
+    device = torch.device("cuda")
+    torch.manual_seed(0)
+    q = torch.randn(3, 3, HEAD_DIM, dtype=torch.bfloat16, device=device) * 0.125
+    kv = torch.randn(5, HEAD_DIM, dtype=torch.bfloat16, device=device) * 0.125
+    indices = torch.tensor([0, 2, 1, 3, 4], dtype=torch.int32, device=device)
+    indptr = torch.tensor([0, 2, 5, 5], dtype=torch.int32, device=device)
+    attn_sink = torch.tensor([-0.25, 0.0, 0.25], dtype=torch.float32, device=device)
+    scale = HEAD_DIM**-0.5
+
+    actual = _rocm_sparse_attn_prefill_ragged_triton(
+        q=q,
+        kv=kv,
+        indices=indices,
+        indptr=indptr,
+        scale=scale,
+        attn_sink=attn_sink,
+        nope_head_dim=NOPE_HEAD_DIM,
+        rope_head_dim=ROPE_HEAD_DIM,
+    )
+    expected = _ref_sparse_prefill_ragged(
+        q, kv, [[0, 2], [1, 3, 4], []], scale, attn_sink
+    )
+
+    torch.testing.assert_close(actual, expected, atol=2e-2, rtol=2e-2)
+
+
+@torch.inference_mode()
+def test_sparse_attn_decode_ragged_kernel() -> None:
+    from vllm.v1.attention.ops.rocm_aiter_mla_sparse import (
+        _rocm_sparse_attn_decode_ragged_triton,
+    )
+
+    device = torch.device("cuda")
+    torch.manual_seed(1)
+    block_size = 4
+    q = torch.randn(2, 3, HEAD_DIM, dtype=torch.bfloat16, device=device) * 0.125
+    main_kv = torch.randn(6, HEAD_DIM, dtype=torch.bfloat16, device=device) * 0.125
+    extra_kv = torch.randn(5, HEAD_DIM, dtype=torch.bfloat16, device=device) * 0.125
+    main_cache = _pack_fp8_ds_mla_cache(main_kv, block_size)
+    extra_cache = _pack_fp8_ds_mla_cache(extra_kv, block_size)
+    main_indices = torch.tensor([0, 2, 4, 1], dtype=torch.int32, device=device)
+    main_indptr = torch.tensor([0, 2, 4], dtype=torch.int32, device=device)
+    extra_indices = torch.tensor([1, 3, 0], dtype=torch.int32, device=device)
+    extra_indptr = torch.tensor([0, 1, 3], dtype=torch.int32, device=device)
+    attn_sink = torch.tensor([-0.1, 0.0, 0.1], dtype=torch.float32, device=device)
+    scale = HEAD_DIM**-0.5
+
+    actual = _rocm_sparse_attn_decode_ragged_triton(
+        q=q,
+        main_cache=main_cache,
+        main_indices=main_indices,
+        main_indptr=main_indptr,
+        scale=scale,
+        attn_sink=attn_sink,
+        nope_head_dim=NOPE_HEAD_DIM,
+        rope_head_dim=ROPE_HEAD_DIM,
+        extra_cache=extra_cache,
+        extra_indices=extra_indices,
+        extra_indptr=extra_indptr,
+    )
+    expected = _ref_sparse_decode_ragged(
+        q=q,
+        main_cache=main_cache,
+        main_rows=[[0, 2], [4, 1]],
+        scale=scale,
+        attn_sink=attn_sink,
+        block_size=block_size,
+        extra_cache=extra_cache,
+        extra_rows=[[1], [3, 0]],
+    )
+
+    torch.testing.assert_close(actual, expected, atol=2e-2, rtol=2e-2)
+
+
+@torch.inference_mode()
+def test_combine_topk_swa_indices_ragged() -> None:
+    from vllm.models.deepseek_v4.amd.rocm import (
+        combine_topk_swa_indices_ragged,
+    )
+
+    device = torch.device("cuda")
+    topk_indices = torch.tensor(
+        [
+            [100, 101, 102, 103],
+            [110, 111, 112, 113],
+            [120, 121, 122, 123],
+            [130, 131, 132, 133],
+            [140, 141, 142, 143],
+        ],
+        dtype=torch.int32,
+        device=device,
+    )
+    query_start_loc = torch.tensor([0, 3, 5], dtype=torch.int32, device=device)
+    seq_lens = torch.tensor([6, 4], dtype=torch.int32, device=device)
+    gather_lens = torch.tensor([4, 3], dtype=torch.int32, device=device)
+    window_size = 3
+    compress_ratio = 2
+    topk = 4
+    M = 20
+    N = 8
+
+    actual_ragged, actual_indptr, actual_lens = combine_topk_swa_indices_ragged(
+        topk_indices,
+        query_start_loc,
+        seq_lens,
+        gather_lens,
+        window_size,
+        compress_ratio,
+        topk,
+        M,
+        N,
+    )
+    expected_ragged, expected_indptr, expected_lens = _ref_combine_topk_swa_ragged(
+        device
+    )
+
+    torch.testing.assert_close(
+        actual_ragged[: expected_ragged.numel()], expected_ragged
+    )
+    torch.testing.assert_close(actual_indptr, expected_indptr)
+    torch.testing.assert_close(actual_lens, expected_lens)
diff --git a/tests/kernels/attention/test_triton_decode_attention.py b/tests/kernels/attention/test_triton_decode_attention.py
index a9b881629441..81e8bb17e7bc 100644
--- a/tests/kernels/attention/test_triton_decode_attention.py
+++ b/tests/kernels/attention/test_triton_decode_attention.py
@@ -4,9 +4,12 @@
 import pytest
 import torch
 
+from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
 from vllm.v1.attention.ops.triton_decode_attention import decode_attention_fwd
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @pytest.mark.parametrize("B", [3, 5])
 @pytest.mark.parametrize("L", [1027, 1025])
@@ -25,33 +28,35 @@ def test_decode_attention(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE):
 
     num_pages_per_batch = cdiv(seq_len, PAGE_SIZE)
     req_to_page = torch.randint(
-        0, CACHE_SIZE // PAGE_SIZE, (B, num_pages_per_batch, 1), device="cuda"
+        0, CACHE_SIZE // PAGE_SIZE, (B, num_pages_per_batch, 1), device=DEVICE_TYPE
     )
     req_to_token = req_to_page * PAGE_SIZE
     req_to_token = req_to_token.expand(B, num_pages_per_batch, PAGE_SIZE)
-    req_to_token = req_to_token + torch.arange(PAGE_SIZE, device="cuda").view(1, 1, -1)
+    req_to_token = req_to_token + torch.arange(PAGE_SIZE, device=DEVICE_TYPE).view(
+        1, 1, -1
+    )
     req_to_token = req_to_token.view(B, -1)
     req_to_token = req_to_token[:, :seq_len].contiguous()
 
     # q represents the new token being generated, one per batch
-    q = torch.randn(B, H_Q, D_QK, dtype=dtype, device="cuda")
+    q = torch.randn(B, H_Q, D_QK, dtype=dtype, device=DEVICE_TYPE)
 
     # k_buffer and v_buffer represent all previous tokens
     # Page size is 1.
-    k_buffer = torch.randn(CACHE_SIZE, H_KV, D_QK, dtype=dtype, device="cuda")
-    v_buffer = torch.randn(CACHE_SIZE, H_KV, D_V, dtype=dtype, device="cuda")
+    k_buffer = torch.randn(CACHE_SIZE, H_KV, D_QK, dtype=dtype, device=DEVICE_TYPE)
+    v_buffer = torch.randn(CACHE_SIZE, H_KV, D_V, dtype=dtype, device=DEVICE_TYPE)
 
     # o will have the same shape as q
-    o = torch.zeros(B, H_Q, D_V, dtype=dtype, device="cuda")
+    o = torch.zeros(B, H_Q, D_V, dtype=dtype, device=DEVICE_TYPE)
 
-    lse = torch.zeros(B, H_Q, dtype=dtype, device="cuda")
+    lse = torch.zeros(B, H_Q, dtype=dtype, device=DEVICE_TYPE)
 
-    b_seq_len = torch.full((B,), seq_len, device="cuda")
+    b_seq_len = torch.full((B,), seq_len, device=DEVICE_TYPE)
 
     attn_logits = torch.empty(
         (B, H_Q, num_kv_splits, D_V + 1),
         dtype=torch.float32,
-        device="cuda",
+        device=DEVICE_TYPE,
     )
 
     # Call the original implementation.
@@ -127,25 +132,27 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
 
     num_pages_per_batch = cdiv(seq_len, PAGE_SIZE)
     req_to_page = torch.randint(
-        0, CACHE_SIZE // PAGE_SIZE, (B, num_pages_per_batch, 1), device="cuda"
+        0, CACHE_SIZE // PAGE_SIZE, (B, num_pages_per_batch, 1), device=DEVICE_TYPE
     )
     req_to_token = req_to_page * PAGE_SIZE
     req_to_token = req_to_token.expand(B, num_pages_per_batch, PAGE_SIZE)
-    req_to_token = req_to_token + torch.arange(PAGE_SIZE, device="cuda").view(1, 1, -1)
+    req_to_token = req_to_token + torch.arange(PAGE_SIZE, device=DEVICE_TYPE).view(
+        1, 1, -1
+    )
     req_to_token = req_to_token.view(B, -1)
     req_to_token = req_to_token[:, :seq_len].contiguous()
 
-    q = torch.randn(B, H_Q, D_QK, dtype=dtype, device="cuda")
+    q = torch.randn(B, H_Q, D_QK, dtype=dtype, device=DEVICE_TYPE)
 
     # Create BF16 K/V as reference
-    k_bf16 = torch.randn(CACHE_SIZE, H_KV, D_QK, dtype=dtype, device="cuda")
-    v_bf16 = torch.randn(CACHE_SIZE, H_KV, D_V, dtype=dtype, device="cuda")
+    k_bf16 = torch.randn(CACHE_SIZE, H_KV, D_QK, dtype=dtype, device=DEVICE_TYPE)
+    v_bf16 = torch.randn(CACHE_SIZE, H_KV, D_V, dtype=dtype, device=DEVICE_TYPE)
 
     # --- BF16 reference ---
-    o_ref = torch.zeros(B, H_Q, D_V, dtype=dtype, device="cuda")
-    lse_ref = torch.zeros(B, H_Q, dtype=dtype, device="cuda")
+    o_ref = torch.zeros(B, H_Q, D_V, dtype=dtype, device=DEVICE_TYPE)
+    lse_ref = torch.zeros(B, H_Q, dtype=dtype, device=DEVICE_TYPE)
     attn_logits = torch.empty(
-        (B, H_Q, num_kv_splits, D_V + 1), dtype=torch.float32, device="cuda"
+        (B, H_Q, num_kv_splits, D_V + 1), dtype=torch.float32, device=DEVICE_TYPE
     )
 
     if PAGE_SIZE == 1:
@@ -156,7 +163,7 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
             o_ref,
             lse_ref,
             req_to_token,
-            b_seq_len=torch.full((B,), seq_len, device="cuda"),
+            b_seq_len=torch.full((B,), seq_len, device=DEVICE_TYPE),
             attn_logits=attn_logits,
             num_kv_splits=num_kv_splits,
             sm_scale=sm_scale,
@@ -171,7 +178,7 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
             o_ref,
             lse_ref,
             req_to_page,
-            b_seq_len=torch.full((B,), seq_len, device="cuda"),
+            b_seq_len=torch.full((B,), seq_len, device=DEVICE_TYPE),
             attn_logits=attn_logits,
             num_kv_splits=num_kv_splits,
             sm_scale=sm_scale,
@@ -182,10 +189,10 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
     k_fp8, k_scale = _quantize_to_fp8(k_bf16)
     v_fp8, v_scale = _quantize_to_fp8(v_bf16)
 
-    o_fp8 = torch.zeros(B, H_Q, D_V, dtype=dtype, device="cuda")
-    lse_fp8 = torch.zeros(B, H_Q, dtype=dtype, device="cuda")
+    o_fp8 = torch.zeros(B, H_Q, D_V, dtype=dtype, device=DEVICE_TYPE)
+    lse_fp8 = torch.zeros(B, H_Q, dtype=dtype, device=DEVICE_TYPE)
     attn_logits_fp8 = torch.empty(
-        (B, H_Q, num_kv_splits, D_V + 1), dtype=torch.float32, device="cuda"
+        (B, H_Q, num_kv_splits, D_V + 1), dtype=torch.float32, device=DEVICE_TYPE
     )
 
     if PAGE_SIZE == 1:
@@ -196,7 +203,7 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
             o_fp8,
             lse_fp8,
             req_to_token,
-            b_seq_len=torch.full((B,), seq_len, device="cuda"),
+            b_seq_len=torch.full((B,), seq_len, device=DEVICE_TYPE),
             attn_logits=attn_logits_fp8,
             num_kv_splits=num_kv_splits,
             sm_scale=sm_scale,
@@ -213,7 +220,7 @@ def test_decode_attention_fp8(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE)
             o_fp8,
             lse_fp8,
             req_to_page,
-            b_seq_len=torch.full((B,), seq_len, device="cuda"),
+            b_seq_len=torch.full((B,), seq_len, device=DEVICE_TYPE),
             attn_logits=attn_logits_fp8,
             num_kv_splits=num_kv_splits,
             sm_scale=sm_scale,
diff --git a/tests/kernels/attention/test_triton_prefill_attention.py b/tests/kernels/attention/test_triton_prefill_attention.py
index f4505d91f5f7..6316a926ae33 100644
--- a/tests/kernels/attention/test_triton_prefill_attention.py
+++ b/tests/kernels/attention/test_triton_prefill_attention.py
@@ -5,8 +5,11 @@
 import torch
 import torch.nn.functional as F
 
+from vllm.platforms import current_platform
 from vllm.v1.attention.ops.triton_prefill_attention import context_attention_fwd
 
+DEVICE_TYPE = current_platform.device_type
+
 
 def ref_masked_attention(
     q: torch.Tensor,
@@ -92,17 +95,19 @@ def test_context_attention(
     torch.manual_seed(42)
 
     # Generate random sequence lengths for each batch
-    seq_lens = torch.randint(max_seq_len // 2, max_seq_len + 1, (B,), device="cuda")
+    seq_lens = torch.randint(
+        max_seq_len // 2, max_seq_len + 1, (B,), device=DEVICE_TYPE
+    )
     total_tokens = seq_lens.sum().item()
 
     # Create batch start locations
-    b_start_loc = torch.zeros(B, dtype=torch.int32, device="cuda")
+    b_start_loc = torch.zeros(B, dtype=torch.int32, device=DEVICE_TYPE)
     b_start_loc[1:] = torch.cumsum(seq_lens[:-1], dim=0)
 
     # Create input tensors
-    q = torch.randn(total_tokens, H_Q, D, dtype=dtype, device="cuda")
-    k = torch.randn(total_tokens, H_KV, D, dtype=dtype, device="cuda")
-    v = torch.randn(total_tokens, H_KV, D, dtype=dtype, device="cuda")
+    q = torch.randn(total_tokens, H_Q, D, dtype=dtype, device=DEVICE_TYPE)
+    k = torch.randn(total_tokens, H_KV, D, dtype=dtype, device=DEVICE_TYPE)
+    v = torch.randn(total_tokens, H_KV, D, dtype=dtype, device=DEVICE_TYPE)
     o = torch.zeros_like(q)
 
     # Call Triton kernel
@@ -169,17 +174,19 @@ def test_context_attention_sliding_window(
     torch.manual_seed(42)
 
     # Generate random sequence lengths for each batch
-    seq_lens = torch.randint(max_seq_len // 2, max_seq_len + 1, (B,), device="cuda")
+    seq_lens = torch.randint(
+        max_seq_len // 2, max_seq_len + 1, (B,), device=DEVICE_TYPE
+    )
     total_tokens = seq_lens.sum().item()
 
     # Create batch start locations
-    b_start_loc = torch.zeros(B, dtype=torch.int32, device="cuda")
+    b_start_loc = torch.zeros(B, dtype=torch.int32, device=DEVICE_TYPE)
     b_start_loc[1:] = torch.cumsum(seq_lens[:-1], dim=0)
 
     # Create input tensors
-    q = torch.randn(total_tokens, H_Q, D, dtype=dtype, device="cuda")
-    k = torch.randn(total_tokens, H_KV, D, dtype=dtype, device="cuda")
-    v = torch.randn(total_tokens, H_KV, D, dtype=dtype, device="cuda")
+    q = torch.randn(total_tokens, H_Q, D, dtype=dtype, device=DEVICE_TYPE)
+    k = torch.randn(total_tokens, H_KV, D, dtype=dtype, device=DEVICE_TYPE)
+    v = torch.randn(total_tokens, H_KV, D, dtype=dtype, device=DEVICE_TYPE)
     o = torch.zeros_like(q)
 
     # Call Triton kernel
diff --git a/tests/kernels/attention/test_triton_unified_attention.py b/tests/kernels/attention/test_triton_unified_attention.py
index 99cdc7ffa4a3..6440ba3156e3 100644
--- a/tests/kernels/attention/test_triton_unified_attention.py
+++ b/tests/kernels/attention/test_triton_unified_attention.py
@@ -9,6 +9,9 @@
 from vllm.utils.math_utils import next_power_of_2
 from vllm.utils.torch_utils import set_random_seed
 from vllm.v1.attention.ops.triton_unified_attention import unified_attention
+from vllm.v1.kv_cache_interface import KVQuantMode
+
+DEVICE_TYPE = current_platform.device_type
 
 NUM_HEADS = [(4, 4), (8, 2), (5, 1)]
 HEAD_SIZES = [128, 256]
@@ -114,7 +117,7 @@ def test_triton_unified_attn(
     q_dtype: torch.dtype | None,
     seq_threshold_3D: int,
 ) -> None:
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
 
     set_random_seed(0)
     num_seqs = len(seq_lens)
@@ -151,16 +154,20 @@ def test_triton_unified_attn(
     q_descale = None
     k_descale = None
     v_descale = None
+    kv_quant_mode = KVQuantMode.NONE
     if q_dtype is not None:
-        # QKV are drawn from N(0, 1): no need for a fp8 scaling factor
-        maybe_quantized_query = query.to(q_dtype)
-        maybe_quantized_key_cache = key_cache.to(q_dtype)
-        maybe_quantized_value_cache = value_cache.to(q_dtype)
-
+        # Use non-1 scales so FP8 Q/K/V descale handling is tested explicitly.
+        q_scale = torch.tensor(0.75, dtype=torch.float32)
+        k_scale = torch.tensor(0.5, dtype=torch.float32)
+        v_scale = torch.tensor(0.25, dtype=torch.float32)
+        q_descale = q_scale
         scale_shape = (num_seqs, num_kv_heads)
-        q_descale = None  # Not yet supported
-        k_descale = torch.rand(scale_shape, dtype=torch.float32)
-        v_descale = torch.rand(scale_shape, dtype=torch.float32)
+        k_descale = torch.full(scale_shape, k_scale.item(), dtype=torch.float32)
+        v_descale = torch.full(scale_shape, v_scale.item(), dtype=torch.float32)
+        maybe_quantized_query = (query / q_scale).to(q_dtype)
+        maybe_quantized_key_cache = (key_cache / k_scale).to(q_dtype)
+        maybe_quantized_value_cache = (value_cache / v_scale).to(q_dtype)
+        kv_quant_mode = KVQuantMode.FP8_PER_TENSOR
 
     num_par_softmax_segments = 16
     head_size_padded = next_power_of_2(head_size)
@@ -199,6 +206,7 @@ def test_triton_unified_attn(
         softmax_segm_output=softmax_segm_output,
         softmax_segm_max=softmax_segm_max,
         softmax_segm_expsum=softmax_segm_expsum,
+        kv_quant_mode=kv_quant_mode,
     )
 
     ref_output = ref_paged_attn(
@@ -221,6 +229,123 @@ def test_triton_unified_attn(
     )
 
 
+@pytest.mark.parametrize(
+    "seq_lens", [[(1, 1328), (5, 18), (129, 463)], [(1, 523), (1, 37), (1, 2011)]]
+)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("block_size", BLOCK_SIZES)
+@pytest.mark.parametrize("num_blocks", NUM_BLOCKS)
+@pytest.mark.parametrize("seq_threshold_3D", SEQ_THRESHOLD_3D_VALUES)
+@torch.inference_mode()
+def test_triton_unified_attn_bf16_query_fp8_kv(
+    seq_lens: list[tuple[int, int]],
+    num_heads: tuple[int, int],
+    head_size: int,
+    block_size: int,
+    num_blocks: int,
+    seq_threshold_3D: int,
+) -> None:
+    """Test bf16 Q with FP8 per-tensor KV cache (dequant via _cast_kv_tile)."""
+    torch.set_default_device(DEVICE_TYPE)
+    set_random_seed(0)
+
+    num_seqs = len(seq_lens)
+    query_lens = [x[0] for x in seq_lens]
+    kv_lens = [x[1] for x in seq_lens]
+    num_query_heads = num_heads[0]
+    num_kv_heads = num_heads[1]
+    assert num_query_heads % num_kv_heads == 0
+    max_query_len = max(query_lens)
+    max_kv_len = max(kv_lens)
+    window_size = (-1, -1)
+    scale = head_size**-0.5
+
+    dtype = torch.bfloat16
+    query = torch.randn(sum(query_lens), num_query_heads, head_size, dtype=dtype)
+    key_cache = torch.randn(
+        num_blocks, block_size, num_kv_heads, head_size, dtype=dtype
+    )
+    value_cache = torch.randn_like(key_cache)
+
+    k_scale = torch.tensor(0.5, dtype=torch.float32)
+    v_scale = torch.tensor(0.25, dtype=torch.float32)
+    fp8_key_cache = (key_cache / k_scale).to(FP8_DTYPE)
+    fp8_value_cache = (value_cache / v_scale).to(FP8_DTYPE)
+
+    scale_shape = (num_seqs, num_kv_heads)
+    k_descale = torch.full(scale_shape, k_scale.item(), dtype=torch.float32)
+    v_descale = torch.full(scale_shape, v_scale.item(), dtype=torch.float32)
+
+    cu_query_lens = torch.tensor([0] + query_lens, dtype=torch.int32).cumsum(
+        dim=0, dtype=torch.int32
+    )
+    kv_lens_t = torch.tensor(kv_lens, dtype=torch.int32)
+
+    max_num_blocks_per_seq = (max_kv_len + block_size - 1) // block_size
+    block_tables = torch.randint(
+        0, num_blocks, (num_seqs, max_num_blocks_per_seq), dtype=torch.int32
+    )
+
+    output = torch.empty_like(query)
+
+    num_par_softmax_segments = 16
+    head_size_padded = next_power_of_2(head_size)
+    softmax_segm_output = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments, head_size_padded),
+        dtype=torch.float32,
+    )
+    softmax_segm_max = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+    softmax_segm_expsum = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+
+    unified_attention(
+        q=query,
+        k=fp8_key_cache,
+        v=fp8_value_cache,
+        out=output,
+        cu_seqlens_q=cu_query_lens,
+        seqused_k=kv_lens_t,
+        max_seqlen_q=max_query_len,
+        max_seqlen_k=max_kv_len,
+        softmax_scale=scale,
+        causal=True,
+        window_size=window_size,
+        block_table=block_tables,
+        softcap=0,
+        q_descale=None,
+        k_descale=k_descale,
+        v_descale=v_descale,
+        seq_threshold_3D=seq_threshold_3D,
+        num_par_softmax_segments=num_par_softmax_segments,
+        softmax_segm_output=softmax_segm_output,
+        softmax_segm_max=softmax_segm_max,
+        softmax_segm_expsum=softmax_segm_expsum,
+        kv_quant_mode=KVQuantMode.FP8_PER_TENSOR,
+    )
+
+    ref_output = ref_paged_attn(
+        query=query,
+        key_cache=key_cache,
+        value_cache=value_cache,
+        query_lens=query_lens,
+        kv_lens=kv_lens,
+        block_tables=block_tables,
+        scale=scale,
+    )
+
+    atol, rtol = 1.5e-1, 1.5e-1
+    (
+        torch.testing.assert_close(output, ref_output, atol=atol, rtol=rtol),
+        f"{torch.max(torch.abs(output - ref_output))}",
+    )
+
+
 @pytest.mark.parametrize(
     "seq_lens",
     [
@@ -249,7 +374,7 @@ def test_triton_unified_attn_fp16_input_fp8_output(
     seq_threshold_3D: int,
 ) -> None:
     """Test with fp16 input and fp8 output using output_scale."""
-    torch.set_default_device("cuda")
+    torch.set_default_device(DEVICE_TYPE)
 
     set_random_seed(0)
     num_seqs = len(seq_lens)
@@ -343,3 +468,182 @@ def test_triton_unified_attn_fp16_input_fp8_output(
         torch.testing.assert_close(output_fp16, ref_output, atol=atol, rtol=rtol),
         f"{torch.max(torch.abs(output_fp16 - ref_output))}",
     )
+
+
+# USE_TD path covers two head-size regimes:
+# - pow2 (HEAD_SIZE == HEAD_SIZE_PADDED): full TD path including Q/O.
+# - non-pow2 (96, HEAD_SIZE_PADDED=128): gates USE_TD_QO off — Q load
+#   and output store fall back to pointer path, KV tile TD load remains.
+# The non-pow2 case mirrors real models like Phi-3-mini (head_size=96).
+HEAD_SIZES_USE_TD = [128, 256, 96]
+
+
+def _run_use_td_case(
+    seq_lens: list[tuple[int, int]],
+    num_heads: tuple[int, int],
+    head_size: int,
+    block_size: int,
+    sliding_window: int | None,
+    soft_cap: float | None,
+    seq_threshold_3D: int,
+    dtype: torch.dtype = torch.bfloat16,
+    num_blocks: int = 2048,
+) -> None:
+    """Shared driver for the USE_TD test cases.
+
+    Runs ``unified_attention(..., use_td=True)`` and compares against the
+    reference paged-attention implementation that the sibling non-TD
+    tests use.
+    """
+    torch.set_default_device(DEVICE_TYPE)
+    set_random_seed(0)
+
+    num_seqs = len(seq_lens)
+    query_lens = [x[0] for x in seq_lens]
+    kv_lens = [x[1] for x in seq_lens]
+    num_query_heads, num_kv_heads = num_heads
+    assert num_query_heads % num_kv_heads == 0
+    max_query_len = max(query_lens)
+    max_kv_len = max(kv_lens)
+    window_size = (sliding_window - 1, 0) if sliding_window is not None else (-1, -1)
+    scale = head_size**-0.5
+
+    query = torch.randn(sum(query_lens), num_query_heads, head_size, dtype=dtype)
+    key_cache = torch.randn(
+        num_blocks, block_size, num_kv_heads, head_size, dtype=dtype
+    )
+    value_cache = torch.randn_like(key_cache)
+    cu_query_lens = torch.tensor([0] + query_lens, dtype=torch.int32).cumsum(
+        dim=0, dtype=torch.int32
+    )
+    kv_lens_tensor = torch.tensor(kv_lens, dtype=torch.int32)
+
+    max_num_blocks_per_seq = (max_kv_len + block_size - 1) // block_size
+    block_tables = torch.randint(
+        0, num_blocks, (num_seqs, max_num_blocks_per_seq), dtype=torch.int32
+    )
+
+    output = torch.empty_like(query)
+
+    num_par_softmax_segments = 16
+    head_size_padded = next_power_of_2(head_size)
+    softmax_segm_output = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments, head_size_padded),
+        dtype=torch.float32,
+    )
+    softmax_segm_max = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+    softmax_segm_expsum = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+
+    unified_attention(
+        q=query,
+        k=key_cache,
+        v=value_cache,
+        out=output,
+        cu_seqlens_q=cu_query_lens,
+        seqused_k=kv_lens_tensor,
+        max_seqlen_q=max_query_len,
+        max_seqlen_k=max_kv_len,
+        softmax_scale=scale,
+        causal=True,
+        window_size=window_size,
+        block_table=block_tables,
+        softcap=soft_cap if soft_cap is not None else 0,
+        q_descale=None,
+        k_descale=None,
+        v_descale=None,
+        seq_threshold_3D=seq_threshold_3D,
+        num_par_softmax_segments=num_par_softmax_segments,
+        softmax_segm_output=softmax_segm_output,
+        softmax_segm_max=softmax_segm_max,
+        softmax_segm_expsum=softmax_segm_expsum,
+        use_td=True,
+    )
+
+    ref_output = ref_paged_attn(
+        query=query,
+        key_cache=key_cache,
+        value_cache=value_cache,
+        query_lens=query_lens,
+        kv_lens=kv_lens,
+        block_tables=block_tables,
+        scale=scale,
+        sliding_window=sliding_window,
+        soft_cap=soft_cap,
+    )
+    torch.testing.assert_close(output, ref_output, atol=1.5e-2, rtol=1e-2)
+
+
+@pytest.mark.parametrize(
+    "seq_lens", [[(1, 1328), (5, 18), (129, 463)], [(1, 523), (1, 37), (1, 2011)]]
+)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES_USE_TD)
+@pytest.mark.parametrize("block_size", BLOCK_SIZES)
+@pytest.mark.parametrize("sliding_window", [None, 128])
+@pytest.mark.parametrize("soft_cap", [None, 50.0])
+@pytest.mark.parametrize("num_blocks", NUM_BLOCKS)
+@pytest.mark.parametrize("seq_threshold_3D", SEQ_THRESHOLD_3D_VALUES)
+@torch.inference_mode()
+def test_triton_unified_attn_use_td(
+    seq_lens: list[tuple[int, int]],
+    num_heads: tuple[int, int],
+    head_size: int,
+    sliding_window: int | None,
+    block_size: int,
+    soft_cap: float | None,
+    num_blocks: int,
+    seq_threshold_3D: int,
+) -> None:
+    """Exercise the USE_TD (tensor-descriptor) Q/K/V load/store path.
+
+    Covers both 2D and 3D kernels via ``seq_threshold_3D``. Two routes
+    to the USE_TD_QO=False fallback (pointer path for Q/O with TD still
+    active for KV tile loads):
+
+    - non-pow2 ``num_queries_per_kv`` via ``NUM_HEADS`` entry ``(5, 1)``,
+    - non-pow2 ``head_size`` via ``HEAD_SIZES_USE_TD`` entry ``96``.
+    """
+    _run_use_td_case(
+        seq_lens=seq_lens,
+        num_heads=num_heads,
+        head_size=head_size,
+        block_size=block_size,
+        sliding_window=sliding_window,
+        soft_cap=soft_cap,
+        seq_threshold_3D=seq_threshold_3D,
+        num_blocks=num_blocks,
+    )
+
+
+# Prefill-heavy shape: long query drives the prefill kernel path where
+# ``_get_tile_size`` returns 32, which exceeds block_size=16 and must be
+# clamped by the fix in 'clamp TILE_SIZE to block_size when USE_TD'.
+# Only the prefill launch exercises the clamp, so parameterize only over
+# the (num_heads, seq_threshold_3D=0) combinations needed to cover it.
+@pytest.mark.parametrize("num_heads", [(4, 4), (5, 1)])
+@torch.inference_mode()
+def test_triton_unified_attn_use_td_tile_clamp(
+    num_heads: tuple[int, int],
+) -> None:
+    """Regression guard: ``USE_TD`` needs ``BLOCK_SIZE % TILE_SIZE == 0``.
+
+    With ``block_size=16`` and ``head_size=128`` (non-Gemma3),
+    ``_get_tile_size`` returns 32 for prefill, which violates the
+    ``USE_TD`` constraint unless clamped to ``block_size``.  Without
+    the clamp the triton kernel ``static_assert`` fires at compile time.
+    """
+    _run_use_td_case(
+        seq_lens=[(256, 256), (128, 128)],
+        num_heads=num_heads,
+        head_size=128,
+        block_size=16,
+        sliding_window=None,
+        soft_cap=None,
+        seq_threshold_3D=0,
+    )
diff --git a/tests/kernels/core/test_activation.py b/tests/kernels/core/test_activation.py
index e7de7731286f..3f1d45ba8e9e 100644
--- a/tests/kernels/core/test_activation.py
+++ b/tests/kernels/core/test_activation.py
@@ -16,6 +16,7 @@
     NewGELU,
     QuickGELU,
     SiluAndMul,
+    SiluAndMulWithClamp,
     SwigluOAIAndMul,
     SwigluStepAndMul,
     swiglustep_and_mul_triton,
@@ -116,6 +117,85 @@ def _get_rtol(output) -> float:
         opcheck(fn, (out, x))
 
 
+SWIGLU_LIMITS = [3.0, 7.0, 15.0]
+
+
+@pytest.mark.parametrize("swiglu_limit", SWIGLU_LIMITS)
+@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("d", D)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device", CUDA_DEVICES)
+@torch.inference_mode()
+def test_silu_and_mul_with_clamp(
+    default_vllm_config,
+    swiglu_limit: float,
+    num_tokens: int,
+    d: int,
+    dtype: torch.dtype,
+    seed: int,
+    device: str,
+) -> None:
+    """SiluAndMulWithClamp: cuda kernel must match native reference."""
+    set_random_seed(seed)
+    torch.set_default_device(device)
+    # Use large values to ensure clamping is exercised.
+    x = torch.randn(num_tokens, 2 * d, dtype=dtype) * swiglu_limit * 2
+
+    layer = SiluAndMulWithClamp(swiglu_limit, compile_native=False)
+    out = layer(x)
+    ref_out = layer.forward_native(x)
+
+    rtol = {
+        torch.float16: 2e-3,
+        torch.bfloat16: 2e-2,
+        torch.float: 1.3e-6,
+    }
+    torch.testing.assert_close(
+        out, ref_out, atol=get_default_atol(out), rtol=rtol[out.dtype]
+    )
+
+    # Verify clamping is actually being applied: the clamped output should
+    # differ from the unclamped SiluAndMul output when inputs are large.
+    unclamped_out = SiluAndMul.forward_native(x)
+    assert not torch.equal(ref_out.float(), unclamped_out.float()), (
+        "Input was not large enough to exercise the clamp; increase scale"
+    )
+
+    # Verify gate clamping semantics with a controlled scalar case.
+    # gate=large_val is clamped to limit first, then silu(limit) * 1.0.
+    x_gate = torch.tensor(
+        [[swiglu_limit * 20.0, 1.0]], dtype=torch.float32, device=device
+    )
+    out_gate = SiluAndMulWithClamp(swiglu_limit, compile_native=False)(x_gate)
+    expected_gate = torch.nn.functional.silu(
+        torch.tensor(swiglu_limit, dtype=torch.float32)
+    ).item()
+    torch.testing.assert_close(
+        out_gate,
+        torch.tensor([[expected_gate]], dtype=torch.float32, device=device),
+        atol=1e-3,
+        rtol=1e-3,
+    )
+
+    # Verify up clamping semantics: up >> limit gets clamped to limit.
+    x_up = torch.tensor(
+        [[1.0, swiglu_limit * 20.0]], dtype=torch.float32, device=device
+    )
+    out_up = SiluAndMulWithClamp(swiglu_limit, compile_native=False)(x_up)
+    silu_1 = torch.nn.functional.silu(torch.tensor(1.0)).item()
+    torch.testing.assert_close(
+        out_up,
+        torch.tensor([[silu_1 * swiglu_limit]], dtype=torch.float32, device=device),
+        atol=1e-3,
+        rtol=1e-3,
+    )
+
+    # opcheck
+    out_buf = torch.empty(x.shape[:-1] + (d,), dtype=dtype, device=device)
+    opcheck(torch.ops._C.silu_and_mul_with_clamp, (out_buf, x, swiglu_limit))
+
+
 @pytest.mark.parametrize(
     "activation",
     [
diff --git a/tests/kernels/core/test_cpu_activation.py b/tests/kernels/core/test_cpu_activation.py
new file mode 100644
index 000000000000..40b5f0454683
--- /dev/null
+++ b/tests/kernels/core/test_cpu_activation.py
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from tests.kernels.allclose_default import get_default_atol, get_default_rtol
+from tests.kernels.utils import opcheck
+from vllm.platforms import CpuArchEnum, current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+if not current_platform.is_cpu():
+    pytest.skip("skipping CPU-only tests", allow_module_level=True)
+
+from vllm.model_executor.layers.activation import (
+    GELU,
+    FastGELU,
+    GeluAndMul,
+    NewGELU,
+    QuickGELU,
+    SiluAndMul,
+)
+
+DTYPES = [torch.bfloat16, torch.float32]
+NUM_TOKENS = [7, 83]
+D = [512, 2048]
+SEEDS = [0]
+
+
+@pytest.mark.parametrize(
+    ("activation_cls", "fn"),
+    [
+        (SiluAndMul, torch.ops._C.silu_and_mul),
+        (GeluAndMul, torch.ops._C.gelu_and_mul),
+        (GeluAndMul, torch.ops._C.gelu_tanh_and_mul),
+    ],
+)
+@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("d", D)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("seed", SEEDS)
+@torch.inference_mode()
+def test_cpu_act_and_mul(
+    default_vllm_config,
+    activation_cls: type[torch.nn.Module],
+    fn: object,
+    num_tokens: int,
+    d: int,
+    dtype: torch.dtype,
+    seed: int,
+) -> None:
+    set_random_seed(seed)
+    x = torch.randn(num_tokens, 2 * d, dtype=dtype)
+
+    layer = activation_cls()
+    out = layer(x)
+    ref_out = layer.forward_native(x)
+
+    torch.testing.assert_close(
+        out, ref_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
+    )
+
+    output_shape = x.shape[:-1] + (x.shape[-1] // 2,)
+    raw_out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+    opcheck(fn, (raw_out, x))
+
+
+@pytest.mark.parametrize(
+    ("activation_cls", "fn", "op_args"),
+    [
+        (NewGELU, torch.ops._C.gelu_new, ()),
+        (FastGELU, torch.ops._C.gelu_fast, ()),
+        (QuickGELU, torch.ops._C.gelu_quick, ()),
+        pytest.param(
+            GELU,
+            getattr(torch.ops._C, "activation_lut_bf16", None),
+            ("gelu",),
+            marks=pytest.mark.skipif(
+                current_platform.get_cpu_architecture() != CpuArchEnum.ARM,
+                reason="activation_lut_bf16 is only built on Arm CPU",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("d", D)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("seed", SEEDS)
+@torch.inference_mode()
+def test_cpu_unary_activation(
+    default_vllm_config,
+    activation_cls: type[torch.nn.Module],
+    fn: object,
+    op_args: tuple[str, ...],
+    num_tokens: int,
+    d: int,
+    dtype: torch.dtype,
+    seed: int,
+) -> None:
+    set_random_seed(seed)
+    x = torch.randn(num_tokens, d, dtype=dtype)
+    layer = activation_cls()
+    out = layer(x)
+    ref_out = layer.forward_native(x)
+    torch.testing.assert_close(
+        out, ref_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
+    )
+    # gelu with activation_lut_bf16 only makes sense for BF16
+    if not (activation_cls is GELU and dtype != torch.bfloat16):
+        raw_out = torch.empty_like(x)
+        opcheck(fn, (raw_out, x, *op_args))
diff --git a/tests/kernels/core/test_fused_q_kv_rmsnorm.py b/tests/kernels/core/test_fused_q_kv_rmsnorm.py
new file mode 100644
index 000000000000..b6a70b19b03d
--- /dev/null
+++ b/tests/kernels/core/test_fused_q_kv_rmsnorm.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Correctness + large-token-count launch tests for fused_q_kv_rmsnorm.
+
+Before the grid-dim fix the kernel used grid ``(2, num_tokens)``, which hit
+CUDA's 65535 grid-y cap for ``num_tokens >= 65536`` and failed with
+``Triton Error [CUDA]: invalid argument`` at every large chunked-prefill
+profile run. These tests pin the new grid layout.
+"""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from vllm.models.deepseek_v4.common.ops import fused_q_kv_rmsnorm
+from vllm.platforms import current_platform
+
+pytestmark = pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="fused_q_kv_rmsnorm requires a CUDA/ROCm device",
+)
+
+
+def _ref_rmsnorm(x: torch.Tensor, w: torch.Tensor, eps: float) -> torch.Tensor:
+    x_f32 = x.to(torch.float32)
+    variance = x_f32.pow(2).mean(dim=-1, keepdim=True)
+    y = x_f32 * torch.rsqrt(variance + eps) * w.to(torch.float32)
+    return y.to(x.dtype)
+
+
+@pytest.mark.parametrize("num_tokens", [1, 17, 1024, 8192])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+def test_fused_q_kv_rmsnorm_correctness(num_tokens: int, dtype: torch.dtype):
+    torch.manual_seed(0)
+    device = "cuda"
+    q_size, kv_size = 192, 576
+    qr = torch.randn(num_tokens, q_size, dtype=dtype, device=device)
+    kv = torch.randn(num_tokens, kv_size, dtype=dtype, device=device)
+    qw = torch.randn(q_size, dtype=dtype, device=device)
+    kvw = torch.randn(kv_size, dtype=dtype, device=device)
+    eps = 1e-6
+
+    qr_out, kv_out = fused_q_kv_rmsnorm(qr, kv, qw, kvw, eps)
+
+    qr_ref = _ref_rmsnorm(qr, qw, eps)
+    kv_ref = _ref_rmsnorm(kv, kvw, eps)
+
+    tol = dict(rtol=1e-2, atol=1e-2)
+    torch.testing.assert_close(qr_out, qr_ref, **tol)
+    torch.testing.assert_close(kv_out, kv_ref, **tol)
+
+
+@pytest.mark.parametrize("num_tokens", [65535, 65536, 131072])
+def test_fused_q_kv_rmsnorm_launches_past_grid_y_cap(num_tokens: int):
+    """Regression guard: grid used to be (2, num_tokens), hitting CUDA's
+    65535 grid-y cap at num_tokens >= 65536. The new grid (num_tokens, 2)
+    lifts that bound to 2**31-1."""
+    device = "cuda"
+    dtype = torch.bfloat16
+    q_size, kv_size = 192, 576
+    qr = torch.randn(num_tokens, q_size, dtype=dtype, device=device)
+    kv = torch.randn(num_tokens, kv_size, dtype=dtype, device=device)
+    qw = torch.randn(q_size, dtype=dtype, device=device)
+    kvw = torch.randn(kv_size, dtype=dtype, device=device)
+
+    qr_out, kv_out = fused_q_kv_rmsnorm(qr, kv, qw, kvw, 1e-6)
+    # spot-check a couple of rows against the torch reference
+    for row in (0, num_tokens // 2, num_tokens - 1):
+        torch.testing.assert_close(
+            qr_out[row],
+            _ref_rmsnorm(qr[row : row + 1], qw, 1e-6)[0],
+            rtol=1e-2,
+            atol=1e-2,
+        )
+        torch.testing.assert_close(
+            kv_out[row],
+            _ref_rmsnorm(kv[row : row + 1], kvw, 1e-6)[0],
+            rtol=1e-2,
+            atol=1e-2,
+        )
diff --git a/tests/kernels/core/test_fused_quant_layernorm.py b/tests/kernels/core/test_fused_quant_layernorm.py
index f9c01f4f1e62..07d15e3b1dfe 100644
--- a/tests/kernels/core/test_fused_quant_layernorm.py
+++ b/tests/kernels/core/test_fused_quant_layernorm.py
@@ -17,6 +17,7 @@
     per_token_group_quant_int8,
 )
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
 
 DTYPES = [torch.bfloat16, torch.float]
 QUANT_DTYPES = [torch.int8, current_platform.fp8_dtype()]
@@ -180,9 +181,7 @@ def test_rms_norm(
     device: str,
     strided_input: bool,
 ) -> None:
-    torch.random.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
+    set_random_seed(seed)
     torch.set_default_device(device)
     torch.accelerator.set_device_index(device)
 
diff --git a/tests/kernels/core/test_fused_silu_mul_block_quant.py b/tests/kernels/core/test_fused_silu_mul_block_quant.py
new file mode 100644
index 000000000000..37b76056cc21
--- /dev/null
+++ b/tests/kernels/core/test_fused_silu_mul_block_quant.py
@@ -0,0 +1,189 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+import vllm._custom_ops as ops
+from tests.kernels.utils import opcheck
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    per_token_group_quant_fp8,
+)
+from vllm.model_executor.layers.quantization.utils.int8_utils import (
+    per_token_group_quant_int8,
+)
+from vllm.platforms import current_platform
+
+DTYPES = [torch.float16, torch.bfloat16]
+QUANT_DTYPES = [current_platform.fp8_dtype(), torch.int8]
+VEC_HIDDEN_SIZES = [1024, 1025, 1027, 1029]
+NUM_TOKENS_HIDDEN_SIZES = [
+    *[(1, i) for i in [64, *VEC_HIDDEN_SIZES, 2048, 5120]],
+    *[(16, i) for i in [64, *VEC_HIDDEN_SIZES, 5120]],
+    *[(128, i) for i in [64, *VEC_HIDDEN_SIZES]],
+    *[(512, i) for i in [64, 5120]],
+]
+SCALE_UBS = [False]
+GROUP_SIZES = [64, 128]
+IS_SCALE_TRANSPOSED = [False, True]
+SEEDS = [0]
+CUDA_DEVICES = [i for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
+
+
+def ref_silu_and_mul_per_block_quant(
+    x: torch.Tensor,
+    quant_dtype: torch.dtype,
+    group_size: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Reference implementation: unfused SiLU+Mul then group quantization."""
+    hidden = x.shape[-1] // 2
+    gate, up = x.split(hidden, dim=-1)
+    silu_out = F.silu(gate) * up
+
+    if quant_dtype == current_platform.fp8_dtype():
+        return per_token_group_quant_fp8(
+            silu_out, group_size=group_size, use_ue8m0=False
+        )
+    elif quant_dtype == torch.int8:
+        return per_token_group_quant_int8(silu_out, group_size=group_size)
+    else:
+        raise ValueError(f"Unsupported quant_dtype: {quant_dtype}")
+
+
+@pytest.mark.parametrize("num_tokens, hidden_size", NUM_TOKENS_HIDDEN_SIZES)
+@pytest.mark.parametrize("has_scale_ub", SCALE_UBS)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("quant_dtype", QUANT_DTYPES)
+@pytest.mark.parametrize("group_size", GROUP_SIZES)
+@pytest.mark.parametrize("is_scale_transposed", IS_SCALE_TRANSPOSED)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device_idx", CUDA_DEVICES)
+@torch.inference_mode()
+def test_silu_and_mul_per_block_quant(
+    default_vllm_config,
+    num_tokens: int,
+    hidden_size: int,
+    has_scale_ub: bool,
+    dtype: torch.dtype,
+    quant_dtype: torch.dtype,
+    group_size: int,
+    is_scale_transposed: bool,
+    seed: int,
+    device_idx: str,
+) -> None:
+    """Test SiLU+Mul+Block Quantization kernel correctness."""
+    torch.accelerator.set_device_index(device_idx)
+    device = f"cuda:{device_idx}"
+    torch.random.manual_seed(seed)
+    torch.set_default_device(device)
+
+    if hidden_size % group_size != 0:
+        return
+
+    if has_scale_ub:
+        pytest.skip("Scale upper bound not yet supported")
+
+    scale = 1 / hidden_size
+    x = torch.randn(num_tokens, hidden_size * 2, dtype=dtype, device=device) * scale
+
+    # Reference implementation
+    ref_out, ref_scales = ref_silu_and_mul_per_block_quant(x, quant_dtype, group_size)
+
+    # Fused kernel implementation
+    ops_out, ops_scales = ops.silu_and_mul_per_block_quant(
+        x, group_size, quant_dtype, None, is_scale_transposed
+    )
+
+    # Check for NaN/Inf
+    assert not torch.isnan(ops_out.float()).any(), "Kernel output contains NaN"
+    assert not torch.isinf(ops_out.float()).any(), "Kernel output contains Inf"
+    assert not torch.isnan(ops_scales).any(), "Kernel scales contain NaN"
+    assert not torch.isinf(ops_scales).any(), "Kernel scales contain Inf"
+
+    # Check dtypes
+    assert ref_out.dtype == quant_dtype
+    assert ops_out.dtype == quant_dtype
+
+    # Check scales match
+    torch.testing.assert_close(ref_scales, ops_scales, rtol=1e-5, atol=1e-5)
+
+    # Check output correctness via dequantized values
+    ref_scales_expanded = ref_scales.repeat_interleave(group_size, dim=1)
+    ops_scales_expanded = ops_scales.repeat_interleave(group_size, dim=1)
+    ref_deq = ref_out.to(dtype=torch.float32) * ref_scales_expanded
+    ops_deq = ops_out.to(dtype=torch.float32) * ops_scales_expanded
+    torch.testing.assert_close(ref_deq, ops_deq, atol=5e-2, rtol=5e-2)
+
+    # opcheck
+    output = torch.empty(num_tokens, hidden_size, device=device, dtype=quant_dtype)
+    num_groups = hidden_size // group_size
+    if is_scale_transposed:
+        scales = torch.empty(num_groups, num_tokens, device=device, dtype=torch.float32)
+    else:
+        scales = torch.empty(num_tokens, num_groups, device=device, dtype=torch.float32)
+    opcheck(
+        torch.ops._C.silu_and_mul_per_block_quant,
+        (output, x, scales, group_size, None, is_scale_transposed),
+    )
+
+
+@pytest.mark.parametrize("dtype", [torch.float16])
+@pytest.mark.parametrize("hidden_size", [4096])
+@pytest.mark.parametrize("num_tokens", [128])
+@pytest.mark.parametrize("group_size", [128])
+def test_silu_block_quant_shapes(
+    default_vllm_config,
+    dtype: torch.dtype,
+    hidden_size: int,
+    num_tokens: int,
+    group_size: int,
+):
+    """Test that output shapes are correct."""
+    torch.set_default_device("cuda")
+    x = torch.randn(num_tokens, hidden_size * 2, dtype=dtype, device="cuda")
+
+    # Row-major scales
+    out, scales = ops.silu_and_mul_per_block_quant(
+        x,
+        group_size=group_size,
+        quant_dtype=current_platform.fp8_dtype(),
+        is_scale_transposed=False,
+    )
+    assert out.shape == (num_tokens, hidden_size)
+    assert scales.shape == (num_tokens, hidden_size // group_size)
+
+    # Column-major scales (logical shape same after .t() in _custom_ops)
+    out, scales = ops.silu_and_mul_per_block_quant(
+        x,
+        group_size=group_size,
+        quant_dtype=current_platform.fp8_dtype(),
+        is_scale_transposed=True,
+    )
+    assert out.shape == (num_tokens, hidden_size)
+    assert scales.shape == (num_tokens, hidden_size // group_size)
+
+
+@pytest.mark.parametrize("dtype", [torch.float16])
+@pytest.mark.parametrize("batch_size", [1, 16, 256])
+@pytest.mark.parametrize("hidden_size", [1024, 5120, 14336])
+def test_silu_block_quant_edge_cases(
+    default_vllm_config, dtype: torch.dtype, batch_size: int, hidden_size: int
+):
+    """Test edge cases: single token, large batch, large hidden size."""
+    torch.set_default_device("cuda")
+    x = torch.randn(batch_size, hidden_size * 2, dtype=dtype, device="cuda")
+
+    out, scales = ops.silu_and_mul_per_block_quant(
+        x,
+        group_size=128,
+        quant_dtype=current_platform.fp8_dtype(),
+        is_scale_transposed=False,
+    )
+
+    assert out.shape == (batch_size, hidden_size)
+    assert out.dtype == current_platform.fp8_dtype()
+    assert scales.dtype == torch.float32
+    assert not torch.isnan(out.float()).any()
+    assert not torch.isnan(scales).any()
+    assert not torch.isinf(scales).any()
diff --git a/tests/kernels/core/test_layernorm.py b/tests/kernels/core/test_layernorm.py
index f8f9660942af..c39d42c75930 100644
--- a/tests/kernels/core/test_layernorm.py
+++ b/tests/kernels/core/test_layernorm.py
@@ -6,13 +6,21 @@
 
 from tests.kernels.quant_utils import FP8_DTYPE
 from tests.kernels.utils import opcheck
-from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.layernorm import GemmaRMSNorm, RMSNorm
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
 
+if current_platform.is_rocm():
+    from vllm.platforms.rocm import on_gfx90a
+
+    on_mi250 = on_gfx90a()
+else:
+    on_mi250 = False
+
 DTYPES = [torch.half, torch.bfloat16, torch.float]
 NUM_TOKENS = [7, 83, 4096]  # Arbitrary values for testing
 HIDDEN_SIZES = [8, 768, 769, 5120, 5125, 8192]  # Arbitrary values for testing
-ADD_RESIDUAL = [False, True]
+ADD_RESIDUAL = [False, True] if not on_mi250 else [True]
 SEEDS = [0]
 CUDA_DEVICES = [
     f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)
@@ -154,3 +162,31 @@ def test_fused_rms_norm_quant(
         atol=1e-3,
         rtol=1e-3,
     )
+
+
+@torch.inference_mode()
+def test_gemma_rms_norm_mixed_input_weight_dtype(default_vllm_config) -> None:
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA required")
+
+    device = CUDA_DEVICES[0]
+    torch.set_default_device(device)
+
+    num_tokens, hidden_size = 32, 1024
+    x = torch.randn(num_tokens, hidden_size, dtype=torch.bfloat16, device=device)
+    layer = GemmaRMSNorm(hidden_size, eps=1e-6).to(device=device)
+    layer.weight.data.normal_(mean=0.0, std=0.1)
+
+    # Gemma uses fp32 weight parameter while activations can be bf16.
+    assert layer.weight.dtype == torch.float32
+    out = layer(x)
+
+    x_fp32 = x.float()
+    weight_fp32 = layer.weight.data.float() + 1.0
+    variance = x_fp32.pow(2).mean(dim=-1, keepdim=True)
+    ref = (x_fp32 * torch.rsqrt(variance + layer.variance_epsilon) * weight_fp32).to(
+        x.dtype
+    )
+
+    assert out.dtype == x.dtype
+    torch.testing.assert_close(out, ref, atol=1e-2, rtol=1e-2)
diff --git a/tests/kernels/core/test_minimax_reduce_rms.py b/tests/kernels/core/test_minimax_reduce_rms.py
new file mode 100644
index 000000000000..d17a448bd973
--- /dev/null
+++ b/tests/kernels/core/test_minimax_reduce_rms.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for MiniMax QK RMS-norm: NCCL reference vs Lamport fused kernel."""
+
+import pytest
+import torch
+import torch.nn as nn
+from torch.multiprocessing import spawn
+
+from tests.kernels.utils import opcheck
+from tests.utils import ensure_current_vllm_config, init_test_distributed_environment
+from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.model_executor.layers.mamba.linear_attn import MiniMaxText01RMSNormTP
+from vllm.platforms import current_platform
+from vllm.utils.network_utils import get_open_port
+from vllm.utils.torch_utils import set_random_seed
+
+
+@ensure_current_vllm_config()
+def _worker_forward_qk(
+    local_rank,
+    world_size,
+    port,
+    num_tokens,
+    hidden_q_full,
+    hidden_k_full,
+    dtype,
+    seed,
+    eps,
+):
+    """Per-rank worker: compare NCCL allreduce path vs Lamport fused kernel."""
+
+    if not hasattr(torch.ops._C, "minimax_allreduce_rms_qk"):
+        cleanup_dist_env_and_memory()
+        return
+    device = torch.device(f"cuda:{local_rank}")
+    torch.accelerator.set_device_index(device)
+    init_test_distributed_environment(
+        world_size, 1, local_rank, port, local_rank=local_rank
+    )
+
+    hq = hidden_q_full // world_size
+    hk = hidden_k_full // world_size
+
+    q_norm = MiniMaxText01RMSNormTP(hidden_q_full, eps=eps).cuda()
+    k_norm = MiniMaxText01RMSNormTP(hidden_k_full, eps=eps).cuda()
+
+    set_random_seed(seed)
+    qw = torch.randn(hidden_q_full, dtype=dtype, device="cuda")
+    kw = torch.randn(hidden_k_full, dtype=dtype, device="cuda")
+    q_norm.weight = nn.Parameter(qw[local_rank * hq : (local_rank + 1) * hq])
+    k_norm.weight = nn.Parameter(kw[local_rank * hk : (local_rank + 1) * hk])
+
+    torch.manual_seed(seed + 1000 + local_rank)
+    qkv = torch.randn(num_tokens, hq + hk + hk, dtype=dtype, device="cuda")
+
+    q_ref, k_ref, v_ref = qkv.clone().split([hq, hk, hk], dim=-1)
+    ref_q, ref_k = MiniMaxText01RMSNormTP.forward_qk(q_norm, k_norm, q_ref, k_ref)
+
+    # Set up Lamport workspace.
+    from vllm.distributed.parallel_state import get_tp_group
+    from vllm.model_executor.layers.mamba.lamport_workspace import (
+        get_allreduce_workspace,
+    )
+
+    workspace = get_allreduce_workspace(
+        rank=local_rank,
+        world_size=world_size,
+        max_tokens=num_tokens,
+        process_group=get_tp_group().cpu_group,
+    )
+
+    opcheck(
+        torch.ops._C.minimax_allreduce_rms_qk,
+        (
+            qkv.clone(),
+            q_norm.weight,
+            k_norm.weight,
+            workspace,
+            hq,
+            hk,
+            local_rank,
+            world_size,
+            eps,
+        ),
+    )
+    fused_q, fused_k = torch.ops._C.minimax_allreduce_rms_qk(
+        qkv.clone(),
+        q_norm.weight,
+        k_norm.weight,
+        workspace,
+        hq,
+        hk,
+        local_rank,
+        world_size,
+        eps,
+    )
+    _, _, fused_v = qkv.split([hq, hk, hk], dim=-1)
+    torch.accelerator.synchronize()
+
+    torch.testing.assert_close(
+        fused_q,
+        ref_q,
+        atol=3e-2,
+        rtol=3e-2,
+    )
+    torch.testing.assert_close(fused_k, ref_k, atol=3e-2, rtol=3e-2)
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(),
+    reason="CUDA required",
+)
+@pytest.mark.parametrize("world_size", [2, 4, 8])
+@pytest.mark.parametrize("num_tokens", [1, 128, 333])
+@pytest.mark.parametrize(
+    "hidden_dims",
+    [(6144, 1024)],
+)
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
+@pytest.mark.parametrize("eps", [1e-6])
+@pytest.mark.parametrize("seed", [42])
+def test_minimax_reduce_rms_qk(
+    world_size,
+    num_tokens,
+    hidden_dims,
+    dtype,
+    eps,
+    seed,
+):
+    num_gpus = current_platform.device_count()
+    if num_gpus < world_size:
+        pytest.skip(f"Need >= {world_size} GPUs, have {num_gpus}")
+    hidden_q_full, hidden_k_full = hidden_dims
+    port = str(get_open_port())
+    spawn(
+        _worker_forward_qk,
+        args=(
+            world_size,
+            port,
+            num_tokens,
+            hidden_q_full,
+            hidden_k_full,
+            dtype,
+            seed,
+            eps,
+        ),
+        nprocs=world_size,
+        join=True,
+    )
diff --git a/tests/kernels/core/test_rotary_embedding.py b/tests/kernels/core/test_rotary_embedding.py
index 6cdd94fdc865..1cbb5dbd1881 100644
--- a/tests/kernels/core/test_rotary_embedding.py
+++ b/tests/kernels/core/test_rotary_embedding.py
@@ -35,6 +35,7 @@ def rotary_embedding_opcheck(
 @pytest.mark.parametrize("seq_len", [11, 1024])
 @pytest.mark.parametrize("use_key", [True, False])
 @pytest.mark.parametrize("head_stride_is_contiguous", [True, False])
+@pytest.mark.parametrize("dtype", [torch.float32, torch.bfloat16])
 def test_rotary_embedding_opcheck(
     default_vllm_config,
     dist_init,
@@ -46,19 +47,20 @@ def test_rotary_embedding_opcheck(
     seq_len,
     use_key,
     head_stride_is_contiguous,
+    dtype,
 ):
     batch_size = 1
     base = 10000
     num_heads = 7
     rot = RotaryEmbedding(
-        head_size, rotary_dim, max_position, base, is_neox_style, torch.float32
+        head_size, rotary_dim, max_position, base, is_neox_style, dtype
     )
 
     positions = torch.randint(0, max_position, (batch_size, seq_len), device=device)
     head_stride = head_size + (64 if head_stride_is_contiguous else 0)
 
     query = torch.randn(
-        batch_size, seq_len, num_heads, head_stride, dtype=torch.float32, device=device
+        batch_size, seq_len, num_heads, head_stride, dtype=dtype, device=device
     )
     key = torch.randn_like(query) if use_key else None
     query = query[..., :head_size]
diff --git a/tests/kernels/core/test_vit_bilinear_pos_embed.py b/tests/kernels/core/test_vit_bilinear_pos_embed.py
new file mode 100644
index 000000000000..66571e3a2fb7
--- /dev/null
+++ b/tests/kernels/core/test_vit_bilinear_pos_embed.py
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Accuracy tests for the fused Triton bilinear position-embedding kernel.
+
+Compares ``triton_pos_embed_interpolate`` against the pure-PyTorch
+``pos_embed_interpolate_native`` across a variety of grid shapes and dtypes.
+"""
+
+import pytest
+import torch
+
+from vllm.triton_utils import HAS_TRITON
+
+if HAS_TRITON:
+    from vllm.model_executor.models.qwen3_vl import (
+        pos_embed_interpolate_native,
+        triton_pos_embed_interpolate,
+    )
+
+
+DTYPES = [torch.float32, torch.bfloat16]
+# Qwen3-VL default
+NUM_GRID_PER_SIDE = 48
+SPATIAL_MERGE_SIZE = 2
+HIDDEN_DIM = 1152
+
+# 4 square + 4 non-square grids (h, w divisible by spatial_merge_size=2)
+SQUARE_GRIDS = [(1, 4, 4), (1, 16, 16), (1, 32, 32), (1, 48, 48)]
+NON_SQUARE_GRIDS = [(1, 8, 16), (1, 14, 20), (1, 32, 48), (1, 60, 80)]
+ALL_GRIDS = SQUARE_GRIDS + NON_SQUARE_GRIDS
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+@pytest.mark.parametrize("dtype", DTYPES, ids=lambda d: str(d).split(".")[-1])
+@pytest.mark.parametrize(
+    "grid_thw",
+    ALL_GRIDS,
+    ids=[f"{t}x{h}x{w}" for t, h, w in ALL_GRIDS],
+)
+def test_triton_matches_native(
+    grid_thw: tuple[int, int, int],
+    dtype: torch.dtype,
+) -> None:
+    """Triton kernel output must match the native PyTorch implementation."""
+    t, h, w = grid_thw
+    device = "cuda"
+
+    # Scale to match real Qwen3-VL pos_embed weight distribution (std~0.23).
+    torch.manual_seed(42)
+    embed_weight = (
+        torch.randn(
+            NUM_GRID_PER_SIDE * NUM_GRID_PER_SIDE,
+            HIDDEN_DIM,
+            device=device,
+            dtype=dtype,
+        )
+        * 0.25
+    )
+
+    native_out = pos_embed_interpolate_native(
+        embed_weight, t, h, w, NUM_GRID_PER_SIDE, SPATIAL_MERGE_SIZE, dtype
+    )
+    triton_out = triton_pos_embed_interpolate(
+        embed_weight, t, h, w, NUM_GRID_PER_SIDE, SPATIAL_MERGE_SIZE, dtype
+    )
+
+    assert native_out.shape == triton_out.shape, (
+        f"Shape mismatch: native {native_out.shape} vs triton {triton_out.shape}"
+    )
+
+    # Small numerical differences arise from the precomputed h/w_scale
+    # in the triton kernel vs torch.linspace in the native path, which can
+    # cause single-ULP output differences
+    # in a handful of elements.
+    atol = {torch.float32: 5e-5, torch.bfloat16: 1e-2}[dtype]
+    rtol = {torch.float32: 1e-5, torch.bfloat16: 1e-2}[dtype]
+    torch.testing.assert_close(triton_out, native_out, atol=atol, rtol=rtol)
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+@pytest.mark.parametrize("dtype", DTYPES, ids=lambda d: str(d).split(".")[-1])
+def test_temporal_repeat(dtype: torch.dtype) -> None:
+    """Verify temporal dimension t > 1 correctly repeats the spatial pattern."""
+    device = "cuda"
+    h, w = 16, 16
+    t_single, t_multi = 1, 3
+
+    # Scale to match real Qwen3-VL pos_embed weight distribution (std~0.23).
+    torch.manual_seed(42)
+    embed_weight = (
+        torch.randn(
+            NUM_GRID_PER_SIDE * NUM_GRID_PER_SIDE,
+            HIDDEN_DIM,
+            device=device,
+            dtype=dtype,
+        )
+        * 0.25
+    )
+
+    out_single = triton_pos_embed_interpolate(
+        embed_weight,
+        t_single,
+        h,
+        w,
+        NUM_GRID_PER_SIDE,
+        SPATIAL_MERGE_SIZE,
+        dtype,
+    )
+    out_multi = triton_pos_embed_interpolate(
+        embed_weight,
+        t_multi,
+        h,
+        w,
+        NUM_GRID_PER_SIDE,
+        SPATIAL_MERGE_SIZE,
+        dtype,
+    )
+
+    expected = out_single.repeat(t_multi, 1)
+    torch.testing.assert_close(out_multi, expected, atol=0, rtol=0)
diff --git a/tests/kernels/core/test_vit_fp8_attn.py b/tests/kernels/core/test_vit_fp8_attn.py
new file mode 100644
index 000000000000..ef1c44cada29
--- /dev/null
+++ b/tests/kernels/core/test_vit_fp8_attn.py
@@ -0,0 +1,279 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the full FP8 ViT attention path (quantize -> cuDNN -> un-pad)."""
+
+import contextlib
+
+import pytest
+import torch
+
+from vllm.triton_utils import HAS_TRITON
+from vllm.utils.flashinfer import (
+    is_flashinfer_cudnn_fp8_prefill_attn_supported,
+)
+from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+
+def _has_flashinfer_cudnn() -> bool:
+    """Check if FlashInfer cuDNN backend is available."""
+    try:
+        from flashinfer.prefill import (
+            cudnn_batch_prefill_with_kv_cache,  # noqa: F401
+        )
+
+        return True
+    except ImportError:
+        return False
+
+
+HEAD_DIMS = [72, 80]
+SEQ_LENS = [256]
+NUM_HEADS = [16]
+
+
+@pytest.fixture
+def _fp8_attention():
+    """Create FP8-enabled MMEncoderAttention via config."""
+    from types import SimpleNamespace
+    from unittest.mock import patch
+
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.config.multimodal import MultiModalConfig
+
+    if not is_flashinfer_cudnn_fp8_prefill_attn_supported():
+        pytest.skip("FlashInfer cuDNN FP8 prefill attention not supported")
+
+    mm_config = MultiModalConfig(mm_encoder_attn_dtype="fp8")
+    vllm_config = VllmConfig()
+    vllm_config.model_config = SimpleNamespace(multimodal_config=mm_config)
+
+    # MMEncoderAttention reads torch.get_default_dtype() during init
+    # to determine the output dtype. In real model loading this is bf16.
+    old_dtype = torch.get_default_dtype()
+    torch.set_default_dtype(torch.bfloat16)
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.model_executor.layers.attention.mm_encoder_attention"
+            ".get_vit_attn_backend",
+            return_value=AttentionBackendEnum.FLASHINFER,
+        ),
+    ):
+        yield
+
+    torch.set_default_dtype(old_dtype)
+
+
+def _build_cu_seqlens_and_meta(
+    seq_len: int,
+    num_heads: int,
+    head_dim: int,
+    fp8_padded_hidden_size: int | None = None,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Build cu_seqlens, max_seqlen, sequence_lengths for a single sequence."""
+    import numpy as np
+
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+
+    cu_seqlens_np = np.array([0, seq_len], dtype=np.int32)
+
+    sequence_lengths = MMEncoderAttention.maybe_compute_seq_lens(
+        AttentionBackendEnum.FLASHINFER,
+        cu_seqlens_np,
+        torch.device("cuda"),
+    )
+
+    max_seqlen = torch.tensor(
+        MMEncoderAttention.compute_max_seqlen(
+            AttentionBackendEnum.FLASHINFER, cu_seqlens_np
+        ),
+        dtype=torch.int32,
+    )
+
+    cu_seqlens = MMEncoderAttention.maybe_recompute_cu_seqlens(
+        AttentionBackendEnum.FLASHINFER,
+        cu_seqlens_np,
+        num_heads * head_dim,
+        1,  # tp_size
+        torch.device("cuda"),
+        fp8_padded_hidden_size=fp8_padded_hidden_size,
+    )
+
+    return cu_seqlens, max_seqlen, sequence_lengths
+
+
+@pytest.mark.skipif(
+    not (HAS_TRITON and _has_flashinfer_cudnn()),
+    reason="Triton and FlashInfer cuDNN required",
+)
+@pytest.mark.parametrize("head_dim", HEAD_DIMS)
+@pytest.mark.parametrize("seq_len", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+def test_fp8_attn_output_shape(
+    head_dim: int,
+    seq_len: int,
+    num_heads: int,
+    _fp8_attention,
+) -> None:
+    """Verify FP8 attention produces correct output shape after un-padding."""
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+    from vllm.utils.math_utils import round_up
+
+    attn = None
+    with contextlib.suppress(ValueError, ImportError):
+        attn = MMEncoderAttention(
+            num_heads=num_heads,
+            head_size=head_dim,
+            prefix="visual.blocks.0.attn",
+        ).to("cuda")
+
+    if attn is None or not attn.fp8_enabled:
+        pytest.skip("FP8 MMEncoderAttention not available")
+    assert attn is not None  # mypy narrowing
+
+    # FP8 always needs fp8_padded_hidden_size for correct cu_seqlens
+    fp8_padded_hidden_size = num_heads * round_up(head_dim, 16)
+
+    cu_seqlens, max_seqlen, sequence_lengths = _build_cu_seqlens_and_meta(
+        seq_len, num_heads, head_dim, fp8_padded_hidden_size=fp8_padded_hidden_size
+    )
+
+    q = torch.randn(
+        seq_len,
+        num_heads,
+        head_dim,
+        device="cuda",
+        dtype=torch.bfloat16,
+    )
+    k = torch.randn_like(q)
+    v = torch.randn_like(q)
+
+    output = attn._forward_flashinfer(q, k, v, cu_seqlens, max_seqlen, sequence_lengths)
+
+    # Output should have original head_dim (un-padded)
+    assert output.shape[-1] == head_dim
+    assert output.dtype == torch.bfloat16
+
+
+@pytest.mark.skipif(
+    not (HAS_TRITON and _has_flashinfer_cudnn()),
+    reason="Triton and FlashInfer cuDNN required",
+)
+@pytest.mark.parametrize("head_dim", HEAD_DIMS)
+@pytest.mark.parametrize("seq_len", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+def test_fp8_vs_bf16_close(
+    head_dim: int, seq_len: int, num_heads: int, _fp8_attention
+) -> None:
+    """FP8 attention output should be reasonably close to BF16 baseline."""
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+    from vllm.utils.math_utils import round_up
+
+    torch.manual_seed(42)
+    q = torch.randn(
+        1,
+        seq_len,
+        num_heads,
+        head_dim,
+        device="cuda",
+        dtype=torch.bfloat16,
+    )
+    k = torch.randn_like(q)
+    v = torch.randn_like(q)
+
+    # FP8 path
+    attn_fp8 = None
+    with contextlib.suppress(ValueError, ImportError):
+        attn_fp8 = MMEncoderAttention(
+            num_heads=num_heads,
+            head_size=head_dim,
+            prefix="visual.blocks.0.attn",
+        ).to("cuda")
+
+    if attn_fp8 is None or not attn_fp8.fp8_enabled:
+        pytest.skip("FP8 MMEncoderAttention not available")
+    assert attn_fp8 is not None  # mypy narrowing
+
+    fp8_padded_hidden_size = num_heads * round_up(head_dim, 16)
+    cu_seqlens, max_seqlen, seq_lengths = _build_cu_seqlens_and_meta(
+        seq_len,
+        num_heads,
+        head_dim,
+        fp8_padded_hidden_size=fp8_padded_hidden_size,
+    )
+
+    out_fp8 = attn_fp8._forward_flashinfer(
+        q.clone(),
+        k.clone(),
+        v.clone(),
+        cu_seqlens,
+        max_seqlen,
+        seq_lengths,
+    )
+
+    # BF16 baseline (create non-FP8 attention by using scale=attn_fp8.scale
+    # and calling the wrapper directly without FP8 quantization)
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        _get_flashinfer_workspace_buffer,
+    )
+    from vllm.v1.attention.ops.vit_attn_wrappers import (
+        vit_flashinfer_wrapper,
+    )
+
+    out_bf16 = vit_flashinfer_wrapper(
+        q=q.clone(),
+        k=k.clone(),
+        v=v.clone(),
+        scale=attn_fp8.scale,
+        workspace_buffer=_get_flashinfer_workspace_buffer(),
+        cu_seqlens=cu_seqlens,
+        max_seqlen=max_seqlen,
+        sequence_lengths=seq_lengths,
+    )
+
+    out_fp8_f = out_fp8.float()
+    out_bf16_f = out_bf16.float()
+
+    abs_diff = (out_fp8_f - out_bf16_f).abs()
+    abs_diff_flat = abs_diff.flatten()
+
+    # Relative diff (avoid division by zero)
+    denom = out_bf16_f.abs().clamp(min=1e-6)
+    rel_diff_flat = (abs_diff / denom).flatten()
+
+    cosine_sim = torch.nn.functional.cosine_similarity(
+        out_fp8_f.flatten().unsqueeze(0),
+        out_bf16_f.flatten().unsqueeze(0),
+    ).item()
+
+    pcts = [50, 90, 95, 99, 99.9]
+    abs_pct = {p: torch.quantile(abs_diff_flat, p / 100).item() for p in pcts}
+    rel_pct = {p: torch.quantile(rel_diff_flat, p / 100).item() for p in pcts}
+
+    print(f"\nFP8 vs BF16 (head_dim={head_dim}, seq_len={seq_len}):")
+    print(f"  cosine_sim={cosine_sim:.6f}")
+    print(
+        f"  abs_diff: max={abs_diff_flat.max().item():.6f}, "
+        f"mean={abs_diff_flat.mean().item():.6f}, "
+        + ", ".join(f"p{p}={abs_pct[p]:.6f}" for p in pcts)
+    )
+    print(
+        f"  rel_diff: max={rel_diff_flat.max().item():.6f}, "
+        f"mean={rel_diff_flat.mean().item():.6f}, "
+        + ", ".join(f"p{p}={rel_pct[p]:.6f}" for p in pcts)
+    )
+
+    assert abs_diff_flat.max().item() < 0.3, (
+        f"FP8 vs BF16 max abs diff too large: {abs_diff_flat.max().item()}"
+    )
+    assert abs_diff_flat.mean().item() < 0.03, (
+        f"FP8 vs BF16 mean abs diff too large: {abs_diff_flat.mean().item()}"
+    )
+    assert cosine_sim > 0.99, f"Cosine similarity too low: {cosine_sim:.6f}"
diff --git a/tests/kernels/core/test_vit_fp8_quant.py b/tests/kernels/core/test_vit_fp8_quant.py
new file mode 100644
index 000000000000..0c63d0069f16
--- /dev/null
+++ b/tests/kernels/core/test_vit_fp8_quant.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the stride-aware FP8 quantization kernel with head_dim padding."""
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+from vllm.triton_utils import HAS_TRITON
+
+if HAS_TRITON:
+    from vllm.kernels.triton.qkv_padded_fp8_quant import (
+        quantize_fp8_pad_head_dim_triton,
+    )
+
+HEAD_DIMS = [72, 80, 128]
+SEQ_LENS = [64, 256]
+NUM_HEADS = [16]
+SCALES = [0.01, 0.1, 1.0]
+
+
+def _naive_fp8_quantize(
+    tensor: torch.Tensor, scale: torch.Tensor, skip_scale: bool
+) -> torch.Tensor:
+    """Reference FP8 quantization in PyTorch."""
+    fp8_dtype = current_platform.fp8_dtype()
+    fp8_max = torch.finfo(fp8_dtype).max
+    fp8_min = -fp8_max
+
+    x = tensor.float()
+    if not skip_scale:
+        x = x / scale.item()
+    x = x.clamp(fp8_min, fp8_max)
+    return x.to(fp8_dtype)
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+@pytest.mark.parametrize("head_dim", HEAD_DIMS)
+@pytest.mark.parametrize("seq_len", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("scale_val", SCALES)
+def test_quantize_contiguous(
+    head_dim: int, seq_len: int, num_heads: int, scale_val: float
+) -> None:
+    """Test quantization of contiguous 3D tensors."""
+    torch.manual_seed(42)
+    tensor = torch.randn(
+        seq_len, num_heads, head_dim, device="cuda", dtype=torch.bfloat16
+    )
+    scale = torch.tensor([scale_val], dtype=torch.float32, device="cuda").view(
+        1, 1, 1, 1
+    )
+
+    result = quantize_fp8_pad_head_dim_triton(tensor, scale)
+
+    padded_dim = (head_dim + 15) // 16 * 16
+    assert result.shape == (seq_len, num_heads, padded_dim)
+    assert result.is_contiguous()
+    assert result.dtype == current_platform.fp8_dtype()
+
+    # Compare unpadded portion against reference
+    ref = _naive_fp8_quantize(tensor, scale, skip_scale=False)
+    torch.testing.assert_close(result[:, :, :head_dim].float(), ref.float())
+
+    # Padded region should be zero
+    if padded_dim > head_dim:
+        assert (result[:, :, head_dim:].float() == 0).all()
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+@pytest.mark.parametrize("head_dim", [72, 80])
+def test_quantize_non_contiguous(head_dim: int) -> None:
+    """Test quantization from non-contiguous QKV views (interleaved buffer)."""
+    seq_len, num_heads = 64, 16
+    # Simulate interleaved QKV buffer: shape (seq_len, 3 * num_heads, head_dim)
+    qkv = torch.randn(
+        seq_len, 3 * num_heads, head_dim, device="cuda", dtype=torch.bfloat16
+    )
+    # Q is every 3rd head slice - non-contiguous view
+    q = qkv[:, 0::3, :]
+    assert not q.is_contiguous()
+
+    scale = torch.tensor([0.1], dtype=torch.float32, device="cuda").view(1, 1, 1, 1)
+    result = quantize_fp8_pad_head_dim_triton(q, scale)
+
+    padded_dim = (head_dim + 15) // 16 * 16
+    assert result.shape == (seq_len, num_heads, padded_dim)
+    assert result.is_contiguous()
+
+    # Compare against contiguous reference
+    ref = _naive_fp8_quantize(q.contiguous(), scale, skip_scale=False)
+    torch.testing.assert_close(result[:, :, :head_dim].float(), ref.float())
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+def test_skip_scale() -> None:
+    """Test skip_scale=True produces cast-only output (no division)."""
+    seq_len, num_heads, head_dim = 32, 8, 80
+    tensor = torch.randn(
+        seq_len, num_heads, head_dim, device="cuda", dtype=torch.bfloat16
+    )
+    scale = torch.tensor([0.5], dtype=torch.float32, device="cuda").view(1, 1, 1, 1)
+
+    result_skip = quantize_fp8_pad_head_dim_triton(tensor, scale, skip_scale=True)
+    result_noskip = quantize_fp8_pad_head_dim_triton(tensor, scale, skip_scale=False)
+
+    # skip_scale should just cast, not divide
+    ref_cast = _naive_fp8_quantize(tensor, scale, skip_scale=True)
+    torch.testing.assert_close(result_skip[:, :, :head_dim].float(), ref_cast.float())
+
+    # With scale != 1.0, skip and no-skip should differ
+    assert not torch.equal(result_skip.float(), result_noskip.float())
+
+
+@pytest.mark.skipif(not HAS_TRITON, reason="Triton not available")
+def test_4d_input() -> None:
+    """Test that 4D input (B, S, H, D) is handled correctly."""
+    B, S, H, D = 2, 32, 8, 72
+    tensor = torch.randn(B, S, H, D, device="cuda", dtype=torch.bfloat16)
+    scale = torch.tensor([0.1], dtype=torch.float32, device="cuda").view(1, 1, 1, 1)
+
+    result = quantize_fp8_pad_head_dim_triton(tensor, scale)
+    padded_dim = (D + 15) // 16 * 16
+    assert result.shape == (B, S, H, padded_dim)
diff --git a/tests/kernels/core/test_vit_fp8_scaling.py b/tests/kernels/core/test_vit_fp8_scaling.py
new file mode 100644
index 000000000000..a197439237fb
--- /dev/null
+++ b/tests/kernels/core/test_vit_fp8_scaling.py
@@ -0,0 +1,251 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for FP8 scaling (dynamic and static) in MMEncoderAttention."""
+
+import contextlib
+import json
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.attention.mm_encoder_attention import (
+    _FP8_AMAX_HISTORY_LEN,
+    _FP8_MAX,
+)
+from vllm.utils.flashinfer import (
+    is_flashinfer_cudnn_fp8_prefill_attn_supported,
+)
+
+LAYER_0 = "visual.blocks.0.attn.attn"
+LAYER_1 = "visual.blocks.1.attn.attn"
+NUM_HEADS = 16
+HEAD_DIM = 72
+
+
+@contextlib.contextmanager
+def _build_attention(mm_config):
+    """Yield an MMEncoderAttention with the given multimodal config.
+
+    The VllmConfig context stays active while the test runs so that
+    ``get_multimodal_config()`` calls during the forward path resolve. Also
+    invokes ``process_weights_after_loading`` to simulate the model loader's
+    auto-scan. Yields ``None`` if FlashInfer cuDNN is not available.
+    """
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+    from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+    if not is_flashinfer_cudnn_fp8_prefill_attn_supported():
+        yield None
+        return
+
+    vllm_config = VllmConfig()
+    vllm_config.model_config = SimpleNamespace(multimodal_config=mm_config)
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.model_executor.layers.attention.mm_encoder_attention"
+            ".get_vit_attn_backend",
+            return_value=AttentionBackendEnum.FLASHINFER,
+        ),
+    ):
+        attn = MMEncoderAttention(
+            num_heads=NUM_HEADS,
+            head_size=HEAD_DIM,
+            prefix=LAYER_0,
+        )
+        attn.process_weights_after_loading(torch.bfloat16)
+        yield attn
+
+
+@pytest.fixture
+def _make_attention():
+    """Create an MMEncoderAttention with dynamic FP8 scaling."""
+    from vllm.config.multimodal import MultiModalConfig
+
+    with _build_attention(MultiModalConfig(mm_encoder_attn_dtype="fp8")) as attn:
+        yield attn
+
+
+@pytest.fixture
+def _make_static_attention(tmp_path):
+    """Create an MMEncoderAttention with static FP8 scales from a file."""
+    from vllm.config.multimodal import MultiModalConfig
+
+    scale_file = tmp_path / "scales.json"
+    scale_file.write_text(
+        json.dumps(
+            {
+                LAYER_0: {"q": 224.0, "k": 198.0, "v": 210.0},
+                LAYER_1: {"q": 100.0, "k": 110.0, "v": 120.0},
+            }
+        )
+    )
+    with _build_attention(
+        MultiModalConfig(
+            mm_encoder_attn_dtype="fp8",
+            mm_encoder_fp8_scale_path=str(scale_file),
+        )
+    ) as attn:
+        yield attn
+
+
+def test_dynamic_scaling_updates_scales(_make_attention) -> None:
+    """Verify that _record_amax_and_update_scales updates scale buffers."""
+    attn = _make_attention
+    if attn is None or not attn.fp8_enabled:
+        pytest.skip("FP8 attention not available (FlashInfer backend required)")
+
+    attn = attn.to("cuda")
+
+    S, H, D = 32, NUM_HEADS, HEAD_DIM
+    q = torch.full((S, H, D), 2.0, device="cuda", dtype=torch.bfloat16)
+    k = torch.full((S, H, D), 3.0, device="cuda", dtype=torch.bfloat16)
+    v = torch.full((S, H, D), 4.0, device="cuda", dtype=torch.bfloat16)
+
+    attn._record_amax_and_update_scales(q, k, v)
+
+    expected_q_scale = 2.0 / _FP8_MAX
+    expected_k_scale = 3.0 / _FP8_MAX
+    expected_v_scale = 4.0 / _FP8_MAX
+
+    torch.testing.assert_close(attn._fp8_q_scale.item(), expected_q_scale)
+    torch.testing.assert_close(attn._fp8_k_scale.item(), expected_k_scale)
+    torch.testing.assert_close(attn._fp8_v_scale.item(), expected_v_scale)
+
+
+def test_circular_buffer_wraps(_make_attention) -> None:
+    """Verify the amax circular buffer wraps at HISTORY_LEN."""
+    attn = _make_attention
+    if attn is None or not attn.fp8_enabled:
+        pytest.skip("FP8 attention not available (FlashInfer backend required)")
+
+    attn = attn.to("cuda")
+    S, H, D = 16, NUM_HEADS, HEAD_DIM
+
+    for i in range(_FP8_AMAX_HISTORY_LEN + 2):
+        mag = float(i + 1)
+        q = torch.full((S, H, D), mag, device="cuda", dtype=torch.bfloat16)
+        k = torch.full((S, H, D), mag, device="cuda", dtype=torch.bfloat16)
+        v = torch.full((S, H, D), mag, device="cuda", dtype=torch.bfloat16)
+        attn._record_amax_and_update_scales(q, k, v)
+
+    assert attn._fp8_amax_pos == 2
+
+    expected_max = float(_FP8_AMAX_HISTORY_LEN + 2)
+    expected_scale = expected_max / _FP8_MAX
+    torch.testing.assert_close(attn._fp8_q_scale.item(), expected_scale)
+
+
+def test_static_scales_loaded(_make_static_attention) -> None:
+    """Verify static scales are loaded from the JSON file."""
+    attn = _make_static_attention
+    if attn is None or not attn.fp8_enabled:
+        pytest.skip("FP8 attention not available (FlashInfer backend required)")
+
+    assert attn.fp8_enabled
+    assert not attn._fp8_dynamic_scale
+
+    # Layer 0 scales (the layer this attention was created with).
+    assert attn._fp8_q_scale.item() == 224.0
+    assert attn._fp8_k_scale.item() == 198.0
+    assert attn._fp8_v_scale.item() == 210.0
+
+    assert not attn.skip_scale_q
+    assert not attn.skip_scale_k
+    assert not attn.skip_scale_v
+
+    # No amax history buffers for static scaling.
+    assert not hasattr(attn, "_fp8_q_amax")
+
+
+def test_static_scales_missing_layer(tmp_path) -> None:
+    """Verify error when requested layer is not in the scale file."""
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.config.multimodal import MultiModalConfig
+    from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+    if not is_flashinfer_cudnn_fp8_prefill_attn_supported():
+        pytest.skip("FlashInfer cuDNN not available")
+
+    scale_file = tmp_path / "wrong_layer.json"
+    scale_file.write_text(
+        json.dumps({"visual.blocks.99.attn": {"q": 1.0, "k": 1.0, "v": 1.0}})
+    )
+    mm_config = MultiModalConfig(
+        mm_encoder_attn_dtype="fp8",
+        mm_encoder_fp8_scale_path=str(scale_file),
+    )
+    vllm_config = VllmConfig()
+    vllm_config.model_config = SimpleNamespace(multimodal_config=mm_config)
+
+    from vllm.model_executor.layers.attention.mm_encoder_attention import (
+        MMEncoderAttention,
+    )
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.model_executor.layers.attention.mm_encoder_attention"
+            ".get_vit_attn_backend",
+            return_value=AttentionBackendEnum.FLASHINFER,
+        ),
+    ):
+        attn = MMEncoderAttention(
+            num_heads=NUM_HEADS,
+            head_size=HEAD_DIM,
+            prefix=LAYER_0,
+        )
+        with pytest.raises(ValueError, match="scales not found for layer"):
+            attn.process_weights_after_loading(torch.bfloat16)
+
+
+def test_dynamic_scales_auto_save(tmp_path) -> None:
+    """Verify scales are saved to disk after the amax buffer fills."""
+    import vllm.model_executor.layers.attention.mm_encoder_attention as _mod
+    from vllm.config.multimodal import MultiModalConfig
+
+    if not is_flashinfer_cudnn_fp8_prefill_attn_supported():
+        pytest.skip("FlashInfer cuDNN not available")
+
+    # Reset module-level state between runs (other tests may have left
+    # state behind after triggering a save).
+    _mod._fp8_scale_save_path = None
+    _mod._fp8_saved_scale_refs.clear()
+
+    save_file = tmp_path / "auto_scales.json"
+    with _build_attention(
+        MultiModalConfig(
+            mm_encoder_attn_dtype="fp8",
+            mm_encoder_fp8_scale_save_path=str(save_file),
+        )
+    ) as attn:
+        if attn is None or not attn.fp8_enabled:
+            pytest.skip("FP8 attention not available")
+
+        attn = attn.to("cuda")
+        S, H, D = 16, NUM_HEADS, HEAD_DIM
+
+        # Run exactly _FP8_AMAX_HISTORY_LEN forward passes.
+        for i in range(_FP8_AMAX_HISTORY_LEN):
+            mag = float(i + 1)
+            q = torch.full((S, H, D), mag, device="cuda", dtype=torch.bfloat16)
+            k = torch.full((S, H, D), mag * 0.5, device="cuda", dtype=torch.bfloat16)
+            v = torch.full((S, H, D), mag * 0.3, device="cuda", dtype=torch.bfloat16)
+            attn._record_amax_and_update_scales(q, k, v)
+
+    # File should have been written on the 16th call (buffer wrap).
+    assert save_file.is_file(), "Scale file was not saved"
+    scales = json.loads(save_file.read_text())
+    assert LAYER_0 in scales
+    assert set(scales[LAYER_0].keys()) == {"q", "k", "v"}
+    for val in scales[LAYER_0].values():
+        assert isinstance(val, float) and val > 0
+
+    # Path is cleared after the one-shot save fires.
+    assert _mod._fp8_scale_save_path is None
diff --git a/tests/kernels/helion/helpers.py b/tests/kernels/helion/helpers.py
index dbe553be5589..f25c0a274aaa 100644
--- a/tests/kernels/helion/helpers.py
+++ b/tests/kernels/helion/helpers.py
@@ -6,24 +6,26 @@
 from collections.abc import Callable
 from contextlib import contextmanager
 from pathlib import Path
+from typing import Any
 from unittest.mock import patch
 
 import helion
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.kernels.helion.config_manager import ConfigManager
 from vllm.kernels.helion.register import register_kernel
 from vllm.kernels.helion.utils import get_canonical_gpu_name
 
 GPU_PLATFORM = get_canonical_gpu_name()
 
-DEFAULT_CONFIGS: dict[str, helion.Config] = {
-    "default": helion.Config(block_sizes=[32]),
+DEFAULT_CONFIGS: dict[CaseKey, helion.Config] = {
+    CaseKey.default(): helion.Config(block_sizes=[32]),
 }
 
 
 @contextmanager
 def dummy_kernel_registry(
-    configs: dict[str, helion.Config] | None = None,
+    configs: dict[CaseKey, helion.Config] | None = None,
 ):
     """Context manager providing a register function with automatic config setup.
 
@@ -34,7 +36,13 @@ def dummy_kernel_registry(
     """
     if configs is None:
         configs = DEFAULT_CONFIGS
-    config_data = {k: v.__dict__["config"] for k, v in configs.items()}
+
+    def _to_config_entries(cfgs: dict) -> list[dict[str, Any]]:
+        pairs: list[dict[str, Any]] = []
+        for k, v in cfgs.items():
+            config_data = v.__dict__["config"]
+            pairs.append({"key": dict(k), "config": config_data})
+        return pairs
 
     with tempfile.TemporaryDirectory() as tmpdir:
         config_dir = Path(tmpdir)
@@ -55,7 +63,7 @@ def decorator(fn: Callable) -> Callable:
                     kernel_dir = config_dir / name
                     kernel_dir.mkdir(parents=True, exist_ok=True)
                     (kernel_dir / f"{GPU_PLATFORM}.json").write_text(
-                        json.dumps(config_data)
+                        json.dumps(_to_config_entries(configs))
                     )
                     return register_kernel(op_name, **kwargs)(fn)
 
diff --git a/tests/kernels/helion/test_autotune.py b/tests/kernels/helion/test_autotune.py
index 87f06c43581e..8b42e145d484 100644
--- a/tests/kernels/helion/test_autotune.py
+++ b/tests/kernels/helion/test_autotune.py
@@ -63,7 +63,7 @@ def test_autotune_disabled_kernel_produces_valid_config(self):
         with dummy_kernel_registry(configs={}) as register:
             wrapper = register(
                 "autotune_test_kernel",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 fake_impl=lambda *a, **kw: None,
                 input_generator=lambda: {
                     "small": (
diff --git a/tests/kernels/helion/test_case_key.py b/tests/kernels/helion/test_case_key.py
new file mode 100644
index 000000000000..335902fd9ef6
--- /dev/null
+++ b/tests/kernels/helion/test_case_key.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.utils.import_utils import has_helion
+
+if not has_helion():
+    pytest.skip(
+        "Helion is not installed. Install with: pip install vllm[helion]",
+        allow_module_level=True,
+    )
+
+from vllm.kernels.helion.case_key import CaseKey
+
+
+class TestCaseKey:
+    """Test suite for CaseKey class."""
+
+    def test_construction_with_dict(self):
+        key = CaseKey({"intermediate": 2048, "numtokens": 256})
+        assert key["intermediate"] == 2048
+        assert key["numtokens"] == 256
+
+    def test_empty_construction_raises(self):
+        with pytest.raises(TypeError, match="at least one key-value pair"):
+            CaseKey()
+        with pytest.raises(TypeError, match="at least one key-value pair"):
+            CaseKey({})
+
+    def test_default_construction(self):
+        key = CaseKey.default()
+        assert len(key) == 0
+        assert key.is_default()
+
+    def test_non_default_is_not_default(self):
+        key = CaseKey({"intermediate": 2048})
+        assert not key.is_default()
+
+    def test_hashable_and_equality(self):
+        a = CaseKey({"intermediate": 2048, "numtokens": 256})
+        b = CaseKey({"numtokens": 256, "intermediate": 2048})
+        assert a == b
+        assert hash(a) == hash(b)
+        assert a != CaseKey({"intermediate": 4096})
+        assert CaseKey.default() == CaseKey.default()
+
+        configs = {
+            CaseKey.default(): "default_config",
+            a: "a_config",
+        }
+        assert configs[b] == "a_config"
+        assert configs[CaseKey.default()] == "default_config"
+
+    def test_str_is_sorted_json(self):
+        assert str(CaseKey({"z": 1, "a": 2})) == '{"a":2,"z":1}'
+        assert str(CaseKey.default()) == "{}"
+
+    def test_immutable(self):
+        key = CaseKey({"intermediate": 2048})
+        with pytest.raises(TypeError, match="immutable"):
+            key["intermediate"] = 4096
+        with pytest.raises(TypeError, match="immutable"):
+            del key["intermediate"]
+        with pytest.raises(TypeError, match="immutable"):
+            key.update({"numtokens": 256})
+        with pytest.raises(TypeError, match="immutable"):
+            key.clear()
diff --git a/tests/kernels/helion/test_config_manager.py b/tests/kernels/helion/test_config_manager.py
index 337696ee066b..f8e5eae6f106 100644
--- a/tests/kernels/helion/test_config_manager.py
+++ b/tests/kernels/helion/test_config_manager.py
@@ -23,6 +23,7 @@
 
 import helion
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.kernels.helion.config_manager import (
     ConfigManager,
     ConfigSet,
@@ -49,22 +50,25 @@ def test_config_set_creation(self):
 
     def test_config_set_from_dict(self):
         """Test creating ConfigSet from dictionary data."""
-        # Use realistic config data that helion.Config can handle
         config_data = {
             "block_sizes": [32, 16],
             "num_warps": 4,
             "num_stages": 3,
             "pid_type": "persistent_interleaved",
         }
-        data = {"h100": {"batch_32_hidden_4096": config_data}}
+        data = {
+            "h100": [
+                {"key": {"batch": 32, "hidden": 4096}, "config": config_data},
+            ]
+        }
 
         config_set = ConfigSet.from_dict("test_kernel", data)
 
         assert config_set.kernel_name == "test_kernel"
         assert config_set.get_platforms() == ["h100"]
 
-        # Verify the config was created correctly
-        config = config_set.get_config("h100", "batch_32_hidden_4096")
+        internal_key = CaseKey({"batch": 32, "hidden": 4096})
+        config = config_set.get_config("h100", internal_key)
         assert isinstance(config, helion.Config)
         assert config.block_sizes == [32, 16]
         assert config.num_warps == 4
@@ -76,17 +80,19 @@ def test_config_set_get_config_keyerror(self):
         config_set = ConfigSet("test_kernel")
 
         with pytest.raises(KeyError, match="platform 'h100' not found"):
-            config_set.get_config("h100", "batch_32_hidden_4096")
+            config_set.get_config("h100", "nonexistent")
 
-        # Use realistic config data
         config_data = {"num_warps": 8, "num_stages": 4}
-        data = {"h100": {"batch_64_hidden_2048": config_data}}
+        data = {
+            "h100": [
+                {"key": {"batch": 64, "hidden": 2048}, "config": config_data},
+            ]
+        }
         config_set = ConfigSet.from_dict("test_kernel", data)
 
-        with pytest.raises(
-            KeyError, match="config_key 'batch_32_hidden_4096' not found"
-        ):
-            config_set.get_config("h100", "batch_32_hidden_4096")
+        nonexistent_key = CaseKey({"batch": 32, "hidden": 4096})
+        with pytest.raises(KeyError, match="config_key .* not found"):
+            config_set.get_config("h100", nonexistent_key)
 
     def test_config_set_get_platforms(self):
         """Test get_platforms method."""
@@ -95,8 +101,12 @@ def test_config_set_get_platforms(self):
         config2 = {"num_warps": 8, "num_stages": 5}
 
         data = {
-            "h100": {"batch_32_hidden_4096": config1},
-            "a100": {"batch_16_hidden_2048": config2},
+            "h100": [
+                {"key": {"batch": 32, "hidden": 4096}, "config": config1},
+            ],
+            "a100": [
+                {"key": {"batch": 16, "hidden": 2048}, "config": config2},
+            ],
         }
         config_set = ConfigSet.from_dict("test_kernel", data)
 
@@ -105,39 +115,49 @@ def test_config_set_get_platforms(self):
 
     def test_config_set_get_config_keys(self):
         """Test get_config_keys method."""
-        # Use realistic config data
         config1 = {"num_warps": 4, "num_stages": 3}
         config2 = {"num_warps": 8, "num_stages": 5}
 
         data = {
-            "h100": {
-                "batch_32_hidden_4096": config1,
-                "batch_64_hidden_2048": config2,
-            }
+            "h100": [
+                {"key": {"batch": 32, "hidden": 4096}, "config": config1},
+                {"key": {"batch": 64, "hidden": 2048}, "config": config2},
+            ]
         }
         config_set = ConfigSet.from_dict("test_kernel", data)
 
         config_keys = config_set.get_config_keys("h100")
-        assert config_keys == ["batch_32_hidden_4096", "batch_64_hidden_2048"]
+        expected_keys = sorted(
+            [
+                CaseKey({"batch": 32, "hidden": 4096}),
+                CaseKey({"batch": 64, "hidden": 2048}),
+            ],
+            key=lambda k: str(k) if k is not None else "",
+        )
+        assert config_keys == expected_keys
 
         assert config_set.get_config_keys("v100") == []
 
     def test_config_set_to_dict(self):
         """Test converting ConfigSet to dictionary."""
-        # Use realistic config data
         original_config = {
             "block_sizes": [64, 32],
             "num_warps": 16,
             "num_stages": 4,
             "pid_type": "persistent_blocked",
         }
-        original_data = {"h100": {"batch_32_hidden_4096": original_config}}
+        original_data = {
+            "h100": [
+                {"key": {"batch": 32, "hidden": 4096}, "config": original_config},
+            ]
+        }
 
         config_set = ConfigSet.from_dict("test_kernel", original_data)
         result_data = config_set.to_dict()
 
-        # The result should match the original (Config roundtrip should work)
-        assert result_data == original_data
+        internal_key = CaseKey({"batch": 32, "hidden": 4096})
+        assert internal_key in result_data["h100"]
+        assert result_data["h100"][internal_key] == original_config
 
 
 class TestConfigManager:
@@ -202,7 +222,10 @@ def test_load_config_set_valid_file(self):
             kernel_dir.mkdir()
             platform_file = kernel_dir / "h100.json"
             with open(platform_file, "w") as f:
-                json.dump({"batch_32_hidden_4096": kernel_config}, f)
+                json.dump(
+                    [{"key": {"batch": 32, "hidden": 4096}, "config": kernel_config}],
+                    f,
+                )
 
             manager = ConfigManager(base_dir=temp_dir)
             config_set = manager.load_config_set("test_kernel")
@@ -211,7 +234,8 @@ def test_load_config_set_valid_file(self):
             assert config_set.kernel_name == "test_kernel"
             assert config_set.get_platforms() == ["h100"]
 
-            config = config_set.get_config("h100", "batch_32_hidden_4096")
+            internal_key = CaseKey({"batch": 32, "hidden": 4096})
+            config = config_set.get_config("h100", internal_key)
             assert isinstance(config, helion.Config)
             assert config.block_sizes == [128, 64]
             assert config.num_warps == 8
@@ -241,7 +265,11 @@ def test_save_config_set(self):
                 "num_stages": 8,
                 "pid_type": "persistent_blocked",
             }
-            data = {"h100": {"batch_32_hidden_4096": kernel_config}}
+            data = {
+                "h100": [
+                    {"key": {"batch": 32, "hidden": 4096}, "config": kernel_config},
+                ]
+            }
             config_set = ConfigSet.from_dict("test_kernel", data)
 
             manager = ConfigManager(base_dir=temp_dir)
@@ -255,13 +283,21 @@ def test_save_config_set(self):
             assert platform_file.exists()
             with open(platform_file) as f:
                 loaded_data = json.load(f)
-            assert loaded_data == data["h100"]
+            assert isinstance(loaded_data, list)
+            assert len(loaded_data) == 1
+            entry = loaded_data[0]
+            assert entry["key"] == {"batch": 32, "hidden": 4096}
+            assert entry["config"] == kernel_config
 
     def test_save_config_set_creates_directory(self):
         """Test that save_config_set creates parent directories if needed."""
         with tempfile.TemporaryDirectory() as temp_dir:
             nested_dir = Path(temp_dir) / "nested" / "configs"
-            data = {"h100": {"default": {"num_warps": 4}}}
+            data = {
+                "h100": [
+                    {"key": {}, "config": {"num_warps": 4}},
+                ]
+            }
             config_set = ConfigSet.from_dict("test_kernel", data)
 
             manager = ConfigManager(base_dir=nested_dir)
@@ -288,34 +324,41 @@ def test_get_platform_configs(self):
             kernel_dir.mkdir()
             with open(kernel_dir / "h100.json", "w") as f:
                 json.dump(
-                    {
-                        "batch_32_hidden_4096": config_1,
-                        "batch_64_hidden_2048": config_2,
-                        "default": default_config,
-                    },
+                    [
+                        {"key": {"batch": 32, "hidden": 4096}, "config": config_1},
+                        {"key": {"batch": 64, "hidden": 2048}, "config": config_2},
+                        {"key": {}, "config": default_config},
+                    ],
                     f,
                 )
             with open(kernel_dir / "a100.json", "w") as f:
-                json.dump({"batch_16_hidden_1024": config_3}, f)
+                json.dump(
+                    [{"key": {"batch": 16, "hidden": 1024}, "config": config_3}],
+                    f,
+                )
 
             manager = ConfigManager(base_dir=temp_dir)
 
+            key_b32_h4096 = CaseKey({"batch": 32, "hidden": 4096})
+            key_b64_h2048 = CaseKey({"batch": 64, "hidden": 2048})
+            key_b16_h1024 = CaseKey({"batch": 16, "hidden": 1024})
+
             h100_configs = manager.get_platform_configs("test_kernel", "h100")
             assert len(h100_configs) == 3
-            assert "batch_32_hidden_4096" in h100_configs
-            assert "batch_64_hidden_2048" in h100_configs
-            assert "default" in h100_configs
+            assert key_b32_h4096 in h100_configs
+            assert key_b64_h2048 in h100_configs
+            assert CaseKey.default() in h100_configs
             for config in h100_configs.values():
                 assert isinstance(config, helion.Config)
 
-            assert h100_configs["batch_32_hidden_4096"].num_warps == 4
-            assert h100_configs["default"].num_stages == 7
+            assert h100_configs[key_b32_h4096].num_warps == 4
+            assert h100_configs[CaseKey.default()].num_stages == 7
 
             a100_configs = manager.get_platform_configs("test_kernel", "a100")
             assert len(a100_configs) == 1
-            assert "batch_16_hidden_1024" in a100_configs
-            assert isinstance(a100_configs["batch_16_hidden_1024"], helion.Config)
-            assert a100_configs["batch_16_hidden_1024"].num_warps == 2
+            assert key_b16_h1024 in a100_configs
+            assert isinstance(a100_configs[key_b16_h1024], helion.Config)
+            assert a100_configs[key_b16_h1024].num_warps == 2
 
             nonexistent_configs = manager.get_platform_configs("test_kernel", "v100")
             assert len(nonexistent_configs) == 0
diff --git a/tests/kernels/helion/test_pattern_matching.py b/tests/kernels/helion/test_pattern_matching.py
index 9be567a4afda..fc7345ca0b08 100644
--- a/tests/kernels/helion/test_pattern_matching.py
+++ b/tests/kernels/helion/test_pattern_matching.py
@@ -67,6 +67,7 @@ class TestMakeFxHop:
     def setup_method(self):
         helion_kernel_side_table.reset_table()
 
+    @pytest.mark.skip(reason="SymInt proxy tracking issue with PyTorch 2.11+")
     def test_make_fx_symbolic(self):
         def raw_add_scale(
             x: torch.Tensor, y: torch.Tensor, scale: float
@@ -128,6 +129,7 @@ def fn(x, y):
             for out_s, in_s in zip(val.shape, input_shape):
                 assert out_s == in_s
 
+    @pytest.mark.skip(reason="SymInt proxy tracking issue with PyTorch 2.11+")
     def test_pattern_matcher_replaces_with_helion_hop(self):
         def raw_silu_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
             M, N = x.size()
diff --git a/tests/kernels/helion/test_register.py b/tests/kernels/helion/test_register.py
index cb1e66d9eb85..c82c3c8358ed 100644
--- a/tests/kernels/helion/test_register.py
+++ b/tests/kernels/helion/test_register.py
@@ -24,6 +24,7 @@
 import helion.language as hl
 
 from tests.kernels.helion.helpers import dummy_kernel_registry
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.kernels.helion.config_manager import ConfigManager
 from vllm.kernels.helion.register import (
     _HOP_AVAILABLE,
@@ -35,6 +36,13 @@
     validate_helion_settings,
 )
 
+if _HOP_AVAILABLE:
+    from helion._compat import supports_torch_compile_fusion
+    from helion._compiler._dynamo.higher_order_ops import (
+        helion_kernel_wrapper_mutation,
+    )
+    from torch._inductor.utils import run_and_get_code
+
 
 def _add_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
     out = torch.empty_like(x)
@@ -47,22 +55,22 @@ def _add_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 def sample_configs():
     """Create real Helion config objects for testing."""
     return {
-        "hiddensize_4096_batchsize_32": helion.Config(
+        CaseKey({"batchsize": 32, "hiddensize": 4096}): helion.Config(
             block_sizes=[128],
             num_warps=4,
             num_stages=3,
         ),
-        "hiddensize_4096_batchsize_64": helion.Config(
+        CaseKey({"batchsize": 64, "hiddensize": 4096}): helion.Config(
             block_sizes=[256],
             num_warps=8,
             num_stages=4,
         ),
-        "hiddensize_4096_batchsize_128": helion.Config(
+        CaseKey({"batchsize": 128, "hiddensize": 4096}): helion.Config(
             block_sizes=[512],
             num_warps=16,
             num_stages=2,
         ),
-        "default": helion.Config(
+        CaseKey.default(): helion.Config(
             block_sizes=[64],
             num_warps=2,
             num_stages=2,
@@ -94,8 +102,7 @@ def configured_kernel(sample_kernel, sample_configs, config_manager_with_test_co
     """Create a ConfiguredHelionKernel for testing."""
 
     def test_config_picker(args, config_keys):
-        """Simple config picker that returns default."""
-        return "default"
+        return None
 
     with (
         patch(
@@ -108,7 +115,6 @@ def test_config_picker(args, config_keys):
         ),
         patch("vllm.kernels.helion.register.helion.kernel") as mock_kernel,
     ):
-        # Mock just the helion.kernel decorator to avoid actual kernel compilation
         mock_decorated = Mock()
         mock_kernel.return_value = Mock(return_value=mock_decorated)
 
@@ -192,7 +198,9 @@ class TestConfiguredHelionKernel:
 
     def test_init_raises_without_picker(self, sample_kernel, sample_configs):
         """Test that __init__ raises when no picker registered."""
-        configs = {"default": sample_configs["default"]}
+        configs: dict[CaseKey, helion.Config] = {
+            CaseKey.default(): sample_configs[CaseKey.default()]
+        }
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value=configs)
 
@@ -220,7 +228,7 @@ def test_config_selector_validates_picker_result(
         """Test that config selector validates picker returns valid key."""
 
         def invalid_picker(args, config_keys):
-            return "invalid_key"
+            return {"invalid": 999}
 
         kernel = create_configured_kernel_with_configs(
             op_name="test_kernel",
@@ -256,7 +264,7 @@ def none_picker(args, config_keys):
         selector = kernel._create_config_selector(key_computer)
 
         result = selector((torch.randn(32, 4096),))
-        assert result is kernel.configs["default"]
+        assert result is kernel.configs[CaseKey.default()]
 
     def test_create_decorated_kernel_passes_helion_settings(
         self, sample_kernel, sample_configs
@@ -264,7 +272,7 @@ def test_create_decorated_kernel_passes_helion_settings(
         """Test that _create_decorated_kernel passes helion_settings."""
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         settings = helion.Settings()
         settings.print_output_code = True
@@ -308,10 +316,10 @@ def tracking_picker(args, config_keys):
             x = args[0]
             batch_size = x.shape[0]
             if batch_size <= 32:
-                return "hiddensize_4096_batchsize_32"
+                return CaseKey({"batchsize": 32, "hiddensize": 4096})
             elif batch_size <= 64:
-                return "hiddensize_4096_batchsize_64"
-            return "hiddensize_4096_batchsize_128"
+                return CaseKey({"batchsize": 64, "hiddensize": 4096})
+            return CaseKey({"batchsize": 128, "hiddensize": 4096})
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value=sample_configs)
@@ -343,13 +351,13 @@ def tracking_picker(args, config_keys):
 
             tensor = torch.randn(50, 4096)  # batch=50, should select batchsize_64
 
-            # key receives unpacked args, autotuner receives args as tuple
             key_result = key_fn(tensor)
             autotuner = autotuner_fn(None, (tensor,))
             config = autotuner.autotune()
 
-            assert key_result == "hiddensize_4096_batchsize_64"
-            assert config is kernel.configs["hiddensize_4096_batchsize_64"]
+            expected_key = CaseKey({"batchsize": 64, "hiddensize": 4096})
+            assert key_result == str(expected_key)
+            assert config is kernel.configs[expected_key]
 
 
 class TestHelionKernelWrapper:
@@ -362,7 +370,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(
@@ -399,7 +407,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value={})
@@ -434,7 +442,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value={})
@@ -469,7 +477,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         expected_inputs = {"key1": (torch.randn(4),)}
         input_gen = Mock(return_value=expected_inputs)
@@ -509,7 +517,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value={})
@@ -556,7 +564,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value=sample_configs)
@@ -594,7 +602,9 @@ def test_init_eagerly_initializes_hop_path(self):
         on the HOP path (no custom op registration needed)."""
         from vllm.kernels.helion.utils import get_canonical_gpu_name
 
-        configs = {"default": helion.Config(block_sizes=[4, 4])}
+        configs: dict[CaseKey, helion.Config] = {
+            CaseKey.default(): helion.Config(block_sizes=[4, 4])
+        }
         with (
             dummy_kernel_registry(configs=configs) as register,
             patch(
@@ -603,7 +613,7 @@ def test_init_eagerly_initializes_hop_path(self):
             ) as mock_gpu,
         ):
             wrapper = register(
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
             )(_add_kernel)
 
             mock_gpu.assert_called_once()
@@ -635,7 +645,7 @@ def test_init_eagerly_initializes(self):
             ) as mock_gpu,
         ):
             wrapper = register(
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
             )(_add_kernel)
 
             # Init must have detected GPU and built the kernel
@@ -653,7 +663,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value=sample_configs)
@@ -696,7 +706,7 @@ def fake_impl(*args, **kwargs):
             return torch.zeros_like(args[0])
 
         def default_picker(args, config_keys):
-            return "default"
+            return None
 
         mock_config_manager = Mock(spec=ConfigManager)
         mock_config_manager.get_platform_configs = Mock(return_value=sample_configs)
@@ -777,9 +787,9 @@ def test_get_registered_kernels_returns_copy(self):
     def test_get_kernel_by_name_returns_kernel(self):
         """Test get_kernel_by_name returns registered kernel."""
         with dummy_kernel_registry() as register:
-            wrapper = register(
-                "test_kernel", config_picker=lambda args, keys: "default"
-            )(_add_kernel)
+            wrapper = register("test_kernel", config_picker=lambda args, keys: None)(
+                _add_kernel
+            )
 
         from vllm.kernels.helion.register import _REGISTERED_KERNELS
 
@@ -802,7 +812,7 @@ def test_register_kernel_auto_generates_fake_impl(self):
             mock_fake = Mock()
             mock_infer.return_value = mock_fake
             wrapper = register(
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
             )(_add_kernel)
 
         mock_infer.assert_called_once_with(_add_kernel, None)
@@ -811,7 +821,7 @@ def test_register_kernel_auto_generates_fake_impl(self):
     def test_register_kernel_creates_wrapper(self):
         """Test register_kernel creates HelionKernelWrapper."""
         with dummy_kernel_registry() as register:
-            result = register("test_name", config_picker=lambda args, keys: "default")(
+            result = register("test_name", config_picker=lambda args, keys: None)(
                 _add_kernel
             )
 
@@ -822,16 +832,16 @@ def test_register_kernel_creates_wrapper(self):
     def test_register_kernel_auto_detects_name(self):
         """Test register_kernel uses function name when no name provided."""
         with dummy_kernel_registry() as register:
-            wrapper = register(config_picker=lambda args, keys: "default")(_add_kernel)
+            wrapper = register(config_picker=lambda args, keys: None)(_add_kernel)
 
         assert wrapper.op_name == "_add_kernel"
 
     def test_register_kernel_registers_in_global_registry(self):
         """Test register_kernel adds wrapper to global registry."""
         with dummy_kernel_registry() as register:
-            wrapper = register(
-                "test_kernel", config_picker=lambda args, keys: "default"
-            )(_add_kernel)
+            wrapper = register("test_kernel", config_picker=lambda args, keys: None)(
+                _add_kernel
+            )
 
         registered_kernels = get_registered_kernels()
         assert "test_kernel" in registered_kernels
@@ -845,7 +855,7 @@ def test_register_kernel_passes_helion_settings(self):
         with dummy_kernel_registry() as register:
             result = register(
                 "test_name",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 helion_settings=settings,
             )(_add_kernel)
 
@@ -858,7 +868,7 @@ def test_register_kernel_supports_decorator_syntax(self):
         with dummy_kernel_registry() as register:
             result = register(
                 "custom_name",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 fake_impl=mock_fake,
             )(_add_kernel)
 
@@ -868,12 +878,12 @@ def test_register_kernel_supports_decorator_syntax(self):
     def test_register_kernel_raises_on_duplicate_registration(self):
         """Test register_kernel raises error on duplicate names."""
         with dummy_kernel_registry() as register:
-            register("duplicate_name", config_picker=lambda args, keys: "default")(
+            register("duplicate_name", config_picker=lambda args, keys: None)(
                 _add_kernel
             )
 
             with pytest.raises(ValueError, match="already registered"):
-                register("duplicate_name", config_picker=lambda args, keys: "default")(
+                register("duplicate_name", config_picker=lambda args, keys: None)(
                     _add_kernel
                 )
 
@@ -886,7 +896,7 @@ def test_register_kernel_rejects_autotuner_fn_in_settings(self):
 
             @register_kernel(
                 "test",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 helion_settings=mock_settings,
             )
             def test_kernel(x):
@@ -903,7 +913,7 @@ def test_register_kernel_no_warning_with_static_shapes_false(self):
         ):
             register(
                 "test",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 helion_settings=mock_settings,
             )(_add_kernel)
 
@@ -933,7 +943,7 @@ def fake_impl(*args, **kwargs):
 
             wrapper = register_kernel(
                 "disabled_kernel",
-                config_picker=lambda args, keys: "default",
+                config_picker=lambda args, keys: None,
                 fake_impl=fake_impl,
             )(_add_kernel)
 
@@ -941,3 +951,110 @@ def fake_impl(*args, **kwargs):
         registered = get_registered_kernels()
         assert "disabled_kernel" in registered
         assert registered["disabled_kernel"] is wrapper
+
+
+@pytest.mark.skipif(not _HOP_AVAILABLE, reason="Requires PyTorch >= 2.11 for HOP")
+class TestTorchCompileHOP:
+    """Test that HelionKernelWrapper emits the correct HOP under torch.compile."""
+
+    def test_compiled_graph_contains_helion_hop(self):
+        """Verify torch.compile on a HelionKernelWrapper emits a
+        helion_kernel_wrapper_mutation HOP node in the FX graph."""
+        configs: dict[CaseKey, helion.Config] = {
+            CaseKey.default(): helion.Config(block_sizes=[4, 4])
+        }
+
+        with dummy_kernel_registry(configs=configs) as register:
+            add_helion_kernel = register(
+                op_name="test_torch_compile_add_kernel",
+                config_picker=lambda args, keys: None,
+            )(_add_kernel)
+
+        captured_graph: torch.fx.GraphModule | None = None
+
+        def capturing_backend(gm, example_inputs):
+            nonlocal captured_graph
+            assert captured_graph is None, "Backend called multiple times"
+            captured_graph = gm
+            return gm.forward
+
+        def f(x, y):
+            return add_helion_kernel(x, y)
+
+        torch._dynamo.reset()
+        compiled_f = torch.compile(f, backend=capturing_backend, fullgraph=True)
+
+        x = torch.randn(4, 4, device="cuda")
+        y = torch.randn(4, 4, device="cuda")
+
+        # Run compiled version and capture graph
+        compiled_result = compiled_f(x, y)
+
+        assert captured_graph is not None
+        hop_nodes = [
+            node
+            for node in captured_graph.graph.nodes
+            if node.op == "call_function"
+            and node.target is helion_kernel_wrapper_mutation
+        ]
+        assert len(hop_nodes) > 0, (
+            "Expected helion_kernel_wrapper_mutation HOP node in compiled graph, "
+            f"but found none. Graph nodes: "
+            f"{[(n.op, n.target) for n in captured_graph.graph.nodes]}"
+        )
+
+        # Verify compiled result matches eager execution
+        eager_result = f(x, y)  # Run in eager mode
+
+        assert torch.allclose(compiled_result, eager_result, atol=1e-5, rtol=1e-5), (
+            "Compiled execution result doesn't match eager execution. "
+            f"Max difference: {torch.max(torch.abs(compiled_result - eager_result))}"
+        )
+
+    @pytest.mark.skipif(
+        not (_HOP_AVAILABLE and supports_torch_compile_fusion()),
+        reason="Requires PyTorch with Helion inductor fusion support",
+    )
+    def test_inductor_backend_compiles_helion_hop(self):
+        """Test torch.compile with inductor backend and Helion fusion enabled."""
+
+        configs: dict[CaseKey, helion.Config] = {
+            CaseKey.default(): helion.Config(block_sizes=[4, 4])
+        }
+
+        with dummy_kernel_registry(configs=configs) as register:
+            add_helion_kernel = register(
+                op_name="test_inductor_add_kernel",
+                config_picker=lambda args, keys: None,
+                helion_settings=helion.Settings(
+                    torch_compile_fusion=True, static_shapes=False
+                ),
+            )(_add_kernel)
+
+        def f(x, y):
+            x = x * 2.0
+            y = y + 1.0
+            out = add_helion_kernel(x, y)
+            return out.relu()
+
+        torch._dynamo.reset()
+        compiled_f = torch.compile(f, backend="inductor", fullgraph=True)
+
+        x = torch.randn(4, 4, device="cuda")
+        y = torch.randn(4, 4, device="cuda")
+
+        compiled_result, source_codes = run_and_get_code(compiled_f, x, y)
+        eager_result = f(x, y)
+
+        assert torch.allclose(compiled_result, eager_result, atol=1e-5, rtol=1e-5), (
+            "Inductor-compiled result doesn't match eager execution. "
+            f"Max difference: {torch.max(torch.abs(compiled_result - eager_result))}"
+        )
+
+        # With fusion enabled, prologue/epilogue ops should be fused into
+        # a single triton kernel rather than generating separate kernels.
+        kernel_count = sum(code.count("@triton.jit") for code in source_codes)
+        assert kernel_count == 1, (
+            f"Expected 1 fused triton kernel, got {kernel_count}. "
+            "Prologue/epilogue ops were not fused into the Helion kernel."
+        )
diff --git a/tests/kernels/helion/test_silu_mul_fp8.py b/tests/kernels/helion/test_silu_mul_fp8.py
index 887f20b9f563..bd3131e08da2 100644
--- a/tests/kernels/helion/test_silu_mul_fp8.py
+++ b/tests/kernels/helion/test_silu_mul_fp8.py
@@ -13,8 +13,10 @@
         allow_module_level=True,
     )
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.kernels.helion.config_manager import ConfigManager
 from vllm.kernels.helion.ops.silu_mul_fp8 import (
+    _pick_cache,
     pick_silu_mul_fp8_config,
     silu_mul_fp8,
     silu_mul_fp8_baseline,
@@ -52,10 +54,13 @@ def reset_config_manager_singleton():
 
 
 class TestSiluMulFp8ConfigPicker:
+    def setup_method(self):
+        _pick_cache.clear()
+
     def test_config_picker_exact_match(self):
         config_keys = [
-            "intermediate_2048_numtokens_256",
-            "intermediate_4096_numtokens_256",
+            CaseKey({"intermediate": 2048, "numtokens": 256}),
+            CaseKey({"intermediate": 4096, "numtokens": 256}),
         ]
 
         input_tensor = torch.randn(32, 4096, dtype=torch.bfloat16, device="cuda")
@@ -63,33 +68,22 @@ def test_config_picker_exact_match(self):
         args = (input_tensor, scale)
 
         selected_key = pick_silu_mul_fp8_config(args, config_keys)
-        assert selected_key == "intermediate_2048_numtokens_256"
+        assert selected_key == CaseKey({"intermediate": 2048, "numtokens": 256})
 
     def test_config_picker_closest_match(self):
         config_keys = [
-            "intermediate_2048_numtokens_256",
-            "intermediate_4096_numtokens_256",
+            CaseKey({"intermediate": 2048, "numtokens": 256}),
+            CaseKey({"intermediate": 4096, "numtokens": 256}),
         ]
-        # Use 7000 (intermediate_size=3500) which is closer to 4096 than 2048
         input_tensor = torch.randn(32, 7000, dtype=torch.bfloat16, device="cuda")
         scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
         args = (input_tensor, scale)
 
         selected_key = pick_silu_mul_fp8_config(args, config_keys)
-        assert selected_key == "intermediate_4096_numtokens_256"
-
-    def test_config_picker_fallback_to_default(self):
-        config_keys = ["default"]
-
-        input_tensor = torch.randn(32, 4096, dtype=torch.bfloat16, device="cuda")
-        scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
-        args = (input_tensor, scale)
-
-        selected_key = pick_silu_mul_fp8_config(args, config_keys)
-        assert selected_key == "default"
+        assert selected_key == CaseKey({"intermediate": 4096, "numtokens": 256})
 
     def test_config_picker_no_configs(self):
-        config_keys: list[str] = []
+        config_keys: list[dict] = []
 
         input_tensor = torch.randn(32, 4096, dtype=torch.bfloat16, device="cuda")
         scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
@@ -101,9 +95,9 @@ def test_config_picker_no_configs(self):
     @pytest.mark.parametrize("intermediate_size", [2048, 4096, 5120])
     def test_config_picker_different_sizes(self, intermediate_size):
         config_keys = [
-            "intermediate_2048_numtokens_256",
-            "intermediate_4096_numtokens_256",
-            "intermediate_5120_numtokens_256",
+            CaseKey({"intermediate": 2048, "numtokens": 256}),
+            CaseKey({"intermediate": 4096, "numtokens": 256}),
+            CaseKey({"intermediate": 5120, "numtokens": 256}),
         ]
 
         input_tensor = torch.randn(
@@ -113,72 +107,47 @@ def test_config_picker_different_sizes(self, intermediate_size):
         args = (input_tensor, scale)
 
         selected_key = pick_silu_mul_fp8_config(args, config_keys)
-        expected_key = f"intermediate_{intermediate_size}_numtokens_256"
-        assert selected_key == expected_key
+        assert selected_key == {
+            "intermediate": intermediate_size,
+            "numtokens": 256,
+        }
 
     def test_config_picker_numtokens_ceiling(self):
-        """Pick the smallest numtokens >= input num_tokens."""
         config_keys = [
-            "intermediate_4096_numtokens_8",
-            "intermediate_4096_numtokens_32",
-            "intermediate_4096_numtokens_128",
-            "intermediate_4096_numtokens_256",
+            CaseKey({"intermediate": 4096, "numtokens": 8}),
+            CaseKey({"intermediate": 4096, "numtokens": 32}),
+            CaseKey({"intermediate": 4096, "numtokens": 128}),
+            CaseKey({"intermediate": 4096, "numtokens": 256}),
         ]
-        # 20 tokens -> should pick numtokens_32 (smallest >= 20)
         input_tensor = torch.randn(20, 8192, dtype=torch.bfloat16, device="cuda")
         scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
 
         selected_key = pick_silu_mul_fp8_config((input_tensor, scale), config_keys)
-        assert selected_key == "intermediate_4096_numtokens_32"
+        assert selected_key == CaseKey({"intermediate": 4096, "numtokens": 32})
 
     def test_config_picker_numtokens_exact(self):
-        """Exact num_tokens match is preferred over ceiling."""
         config_keys = [
-            "intermediate_4096_numtokens_8",
-            "intermediate_4096_numtokens_32",
-            "intermediate_4096_numtokens_128",
+            CaseKey({"intermediate": 4096, "numtokens": 8}),
+            CaseKey({"intermediate": 4096, "numtokens": 32}),
+            CaseKey({"intermediate": 4096, "numtokens": 128}),
         ]
         input_tensor = torch.randn(32, 8192, dtype=torch.bfloat16, device="cuda")
         scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
 
         selected_key = pick_silu_mul_fp8_config((input_tensor, scale), config_keys)
-        assert selected_key == "intermediate_4096_numtokens_32"
+        assert selected_key == CaseKey({"intermediate": 4096, "numtokens": 32})
 
     def test_config_picker_numtokens_fallback_to_largest(self):
-        """Fall back to the largest numtokens when input exceeds all."""
         config_keys = [
-            "intermediate_4096_numtokens_8",
-            "intermediate_4096_numtokens_32",
-            "intermediate_4096_numtokens_128",
+            CaseKey({"intermediate": 4096, "numtokens": 8}),
+            CaseKey({"intermediate": 4096, "numtokens": 32}),
+            CaseKey({"intermediate": 4096, "numtokens": 128}),
         ]
-        # 512 tokens -> exceeds all available, should pick largest (128)
         input_tensor = torch.randn(512, 8192, dtype=torch.bfloat16, device="cuda")
         scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
 
         selected_key = pick_silu_mul_fp8_config((input_tensor, scale), config_keys)
-        assert selected_key == "intermediate_4096_numtokens_128"
-
-    def test_config_picker_malformed_key_raises(self):
-        """Malformed config keys should raise ValueError."""
-        config_keys = ["intermediate_4096_badformat_256"]
-        input_tensor = torch.randn(32, 8192, dtype=torch.bfloat16, device="cuda")
-        scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
-
-        with pytest.raises(ValueError, match="Malformed config key"):
-            pick_silu_mul_fp8_config((input_tensor, scale), config_keys)
-
-    def test_config_picker_default_ignored_when_valid_keys_exist(self):
-        """'default' is skipped in favor of a real match."""
-        config_keys = [
-            "default",
-            "intermediate_4096_numtokens_32",
-            "intermediate_4096_numtokens_128",
-        ]
-        input_tensor = torch.randn(64, 8192, dtype=torch.bfloat16, device="cuda")
-        scale = torch.tensor([0.5], dtype=torch.float32, device="cuda")
-
-        selected_key = pick_silu_mul_fp8_config((input_tensor, scale), config_keys)
-        assert selected_key == "intermediate_4096_numtokens_128"
+        assert selected_key == CaseKey({"intermediate": 4096, "numtokens": 128})
 
 
 class TestSiluMulFp8Correctness:
diff --git a/tests/kernels/ir/test_ir_ops.py b/tests/kernels/ir/test_ir_ops.py
new file mode 100644
index 000000000000..1ee36b8f4c90
--- /dev/null
+++ b/tests/kernels/ir/test_ir_ops.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Meta-tests for vLLM IR op infrastructure.
+
+Ensures all registered ops have input generators defined.
+Per-op correctness tests live alongside their op definitions
+(e.g. tests/kernels/ir/test_layernorm.py).
+"""
+
+import vllm.kernels  # noqa: F401 — registers provider implementations
+from vllm.ir.op import IrOp
+
+
+def test_all_ops_have_input_generator():
+    missing = [name for name, op in IrOp.registry.items() if not op.has_input_generator]
+    assert not missing, (
+        f"IR ops without input generators: {missing}. "
+        f"Register one with @ir.ops.<name>.register_input_generator"
+    )
diff --git a/tests/kernels/ir/test_layernorm.py b/tests/kernels/ir/test_layernorm.py
new file mode 100644
index 000000000000..6fc9d7543d15
--- /dev/null
+++ b/tests/kernels/ir/test_layernorm.py
@@ -0,0 +1,386 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+# This registers op implementations
+import vllm.kernels  # noqa: F401
+from tests.ir.ir_test_utils import (
+    COMMON_HIDDEN_SIZES,
+    NUM_TOKENS,
+    assert_close,
+    clone_args,
+    supported_providers,
+)
+from tests.kernels.allclose_default import get_default_rtol
+from vllm import ir
+from vllm.platforms import current_platform
+
+rms_norm_native = ir.ops.rms_norm.impls["native"].impl_fn
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike() and not current_platform.is_xpu(),
+    reason="Currently only kernels on CUDA, ROCm and XPU",
+)
+def test_rms_norm_registration():
+    expected = {
+        "native": True,
+        "vllm_c": current_platform.is_cuda_alike(),
+        "aiter": current_platform.is_rocm(),
+        "oink": current_platform.has_device_capability(100)
+        and hasattr(torch.ops, "oink")
+        and hasattr(torch.ops.oink, "rmsnorm"),
+        "xpu_kernels": current_platform.is_xpu(),
+    }
+
+    actual = {
+        provider: impl.supported for provider, impl in ir.ops.rms_norm.impls.items()
+    }
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32])
+@pytest.mark.parametrize("n_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("hidden_size", COMMON_HIDDEN_SIZES)
+@pytest.mark.parametrize("epsilon", [1e-6, 1e-5])
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike() and not current_platform.is_xpu(),
+    reason="Currently only kernels on CUDA, ROCm and XPU",
+)
+class TestRMSNorm:
+    @classmethod
+    def setup_class(cls, **kwargs):
+        torch.set_default_device(current_platform.device_type)
+
+    def test_native_semantics(self, dtype, n_tokens, hidden_size, epsilon):
+        x, weight, epsilon = ir.ops.rms_norm.generate_inputs(
+            num_tokens=4, hidden_size=8, dtype=dtype, epsilon=epsilon
+        )
+        out = rms_norm_native(x, weight, epsilon=epsilon)
+
+        # Check shape, dtype, device
+        assert out.shape == x.shape
+        assert out.dtype == x.dtype
+        assert out.device == x.device
+
+        # Check the scaling property of rms norm
+        out2 = rms_norm_native(x * 2.0, weight, epsilon=epsilon)
+        torch.testing.assert_close(out2, out, rtol=get_default_rtol(out), atol=1e-3)
+
+        # Mean square should be approximately 1 (ignoring epsilon and weight scaling)
+        combined_norm = out.float() / weight.float()
+        variance = combined_norm.pow(2).mean(dim=-1)
+        # After RMS normalization, variance should be close to 1
+        torch.testing.assert_close(
+            variance, torch.ones_like(variance), rtol=1e-2, atol=1e-2
+        )
+
+        # Check behavior with and without weight
+        weight1 = torch.ones_like(weight)
+        out3 = rms_norm_native(x, weight1, epsilon=epsilon)
+        out4 = rms_norm_native(x, None, epsilon=epsilon)
+        torch.testing.assert_close(out3, out4)
+
+    @pytest.mark.parametrize("provider", supported_providers(ir.ops.rms_norm))
+    def test_impls(self, dtype, n_tokens, hidden_size, epsilon, provider):
+        impl = ir.ops.rms_norm.impls[provider]
+        x, weight, eps = ir.ops.rms_norm.generate_inputs(
+            num_tokens=n_tokens, hidden_size=hidden_size, dtype=dtype, epsilon=epsilon
+        )
+        args = (x, weight, eps)
+
+        if not impl.supports_args(*args):
+            pytest.skip(f"{provider} does not support args")
+
+        ref_output = rms_norm_native(*clone_args(args))
+        output = impl.impl_fn(*clone_args(args))
+        assert_close(ir.ops.rms_norm, output, ref_output)
+
+        # check that dispatched call matches direct call
+        with ir.ops.rms_norm.set_priority([provider, "native"]):
+            out_dispatched = ir.ops.rms_norm(*args)
+        out_direct = impl.impl_fn(*args)
+        torch.testing.assert_close(out_dispatched, out_direct, rtol=0.0, atol=0.0)
+
+        # none of these support variance_size override
+        assert not impl.supports_args(x, weight, eps, 4)
+        assert not impl.supports_args(x, weight, eps, variance_size=4)
+
+        # test weight=None behavior
+        out_no_weight = impl.impl_fn(x, None, eps)
+        out_unit_weight = impl.impl_fn(x, torch.ones_like(weight), eps)
+        assert_close(ir.ops.rms_norm, out_no_weight, out_unit_weight)
+
+    @pytest.mark.parametrize("provider", ["vllm_c", "aiter", "xpu_kernels", "native"])
+    def test_torch_opcheck(self, dtype, n_tokens, hidden_size, epsilon, provider):
+        if not ir.ops.rms_norm.impls[provider].supported:
+            pytest.skip(f"{provider} impl not supported on this platform")
+
+        args = ir.ops.rms_norm.generate_inputs(
+            num_tokens=n_tokens, hidden_size=hidden_size, dtype=dtype, epsilon=epsilon
+        )
+
+        # When checking the torch op, we have to set priority and use dispatch
+        with ir.ops.rms_norm.set_priority([provider, "native"]):
+            torch.library.opcheck(torch.ops.vllm_ir.rms_norm, args)
+
+
+@pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="aiter is only supported on ROCm",
+)
+def test_aiter_rejects_unsupported_dtypes():
+    torch.set_default_device(current_platform.device_type)
+    impl = ir.ops.rms_norm.impls["aiter"]
+    for dtype in [torch.float32, torch.float64]:
+        args = ir.ops.rms_norm.generate_inputs(
+            num_tokens=8, hidden_size=4096, dtype=dtype, epsilon=1e-5
+        )
+        assert not impl.supports_args(*args), f"aiter should reject dtype={dtype}"
+
+
+@pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="ROCm vllm_c RMSNorm needs explicit ND input handling",
+)
+def test_vllm_c_rms_norm_accepts_nd_input():
+    torch.set_default_device(current_platform.device_type)
+    impl = ir.ops.rms_norm.impls["vllm_c"]
+    if not impl.supported:
+        pytest.skip("vllm_c impl not supported on this platform")
+
+    base = torch.randn(3, 8, 192, dtype=torch.float16)
+    x = base.split(64, dim=-1)[0].view(3, 8, 4, 16)
+    assert not x.is_contiguous()
+    weight = torch.randn(16, dtype=torch.float16)
+    epsilon = 1e-5
+
+    output = impl.impl_fn(x, weight, epsilon)
+    ref_output = rms_norm_native(x, weight, epsilon)
+
+    assert output.shape == x.shape
+    assert_close(ir.ops.rms_norm, output, ref_output)
+
+
+fused_add_rms_norm_native = ir.ops.fused_add_rms_norm.impls["native"].impl_fn
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike() and not current_platform.is_xpu(),
+    reason="Currently only kernels on CUDA, ROCm and XPU",
+)
+def test_fused_add_rms_norm_registration():
+    expected = {
+        "native": True,
+        "vllm_c": current_platform.is_cuda_alike(),
+        "aiter": current_platform.is_rocm(),
+        "oink": current_platform.has_device_capability(100)
+        and hasattr(torch.ops, "oink")
+        and hasattr(torch.ops.oink, "fused_add_rms_norm"),
+        "xpu_kernels": current_platform.is_xpu(),
+    }
+
+    actual = {
+        provider: impl.supported
+        for provider, impl in ir.ops.fused_add_rms_norm.impls.items()
+    }
+
+    assert actual == expected
+
+
+@pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="ROCm vllm_c fused_add_rms_norm needs explicit ND input handling",
+)
+def test_vllm_c_fused_add_rms_norm_accepts_nd_input():
+    torch.set_default_device(current_platform.device_type)
+    impl = ir.ops.fused_add_rms_norm.impls["vllm_c"]
+    if not impl.supported:
+        pytest.skip("vllm_c impl not supported on this platform")
+
+    base = torch.randn(3, 8, 192, dtype=torch.float16)
+    residual_base = torch.randn(3, 8, 192, dtype=torch.float16)
+    x = base.split(64, dim=-1)[0].view(3, 8, 4, 16)
+    x_residual = residual_base.split(64, dim=-1)[0].view(3, 8, 4, 16)
+    assert not x.is_contiguous()
+    assert not x_residual.is_contiguous()
+    weight = torch.randn(16, dtype=torch.float16)
+    epsilon = 1e-5
+
+    output, residual = impl.impl_fn(x.clone(), x_residual.clone(), weight, epsilon)
+    ref_output, ref_residual = fused_add_rms_norm_native(x, x_residual, weight, epsilon)
+
+    assert output.shape == x.shape
+    assert residual.shape == x_residual.shape
+    assert_close(ir.ops.fused_add_rms_norm, output, ref_output)
+    assert_close(ir.ops.fused_add_rms_norm, residual, ref_residual)
+
+
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32])
+@pytest.mark.parametrize("n_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("hidden_size", COMMON_HIDDEN_SIZES)
+@pytest.mark.parametrize("epsilon", [1e-6, 1e-5])
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike() and not current_platform.is_xpu(),
+    reason="Currently only kernels on CUDA, ROCm and XPU",
+)
+class TestFusedAddRMSNorm:
+    @classmethod
+    def setup_class(cls, **kwargs):
+        torch.set_default_device(current_platform.device_type)
+
+    def test_native_semantics(self, dtype, n_tokens, hidden_size, epsilon):
+        x, x_residual, weight, eps = ir.ops.fused_add_rms_norm.generate_inputs(
+            num_tokens=4, hidden_size=8, dtype=dtype, epsilon=epsilon
+        )
+        out, residual_out = fused_add_rms_norm_native(x, x_residual, weight, eps)
+
+        # Check shape, dtype, device
+        assert out.shape == x.shape
+        assert out.dtype == x.dtype
+        assert out.device == x.device
+        assert residual_out.shape == x_residual.shape
+        assert residual_out.dtype == x_residual.dtype
+        assert residual_out.device == x_residual.device
+
+        # Check that residual_out = x + x_residual
+        expected_residual = (x.float() + x_residual.float()).to(dtype)
+        torch.testing.assert_close(
+            residual_out, expected_residual, rtol=1e-3, atol=1e-3
+        )
+
+        # Verify that the output is RMS normalized version of (x + x_residual)
+        expected_out = rms_norm_native(expected_residual, weight, epsilon)
+        assert_close(
+            ir.ops.fused_add_rms_norm,
+            (out, residual_out),
+            (expected_out, expected_residual),
+        )
+
+        # Check the scaling property of rms norm
+        out1, _ = fused_add_rms_norm_native(
+            x, torch.zeros_like(x), weight, epsilon=epsilon
+        )
+        out2, _ = fused_add_rms_norm_native(
+            x * 2.0, torch.zeros_like(x), weight, epsilon=epsilon
+        )
+        torch.testing.assert_close(out2, out1, rtol=get_default_rtol(out), atol=1e-3)
+
+        # Check behavior with and without weight
+        weight1 = torch.ones_like(weight)
+        out3, _ = fused_add_rms_norm_native(x, x_residual, weight1, eps)
+        out4, _ = fused_add_rms_norm_native(x, x_residual, None, eps)
+        torch.testing.assert_close(out3, out4)
+
+    @pytest.mark.parametrize("provider", supported_providers(ir.ops.fused_add_rms_norm))
+    def test_impls(self, dtype, n_tokens, hidden_size, epsilon, provider):
+        impl = ir.ops.fused_add_rms_norm.impls[provider]
+        x, x_residual, weight, eps = ir.ops.fused_add_rms_norm.generate_inputs(
+            num_tokens=n_tokens, hidden_size=hidden_size, dtype=dtype, epsilon=epsilon
+        )
+        args = (x, x_residual, weight, eps, None)
+
+        if not impl.supports_args(*args):
+            pytest.skip(f"{provider} does not support args")
+
+        ref_output, ref_residual = fused_add_rms_norm_native(*clone_args(args))
+        output, residual = impl.impl_fn(*clone_args(args))
+        assert_close(ir.ops.fused_add_rms_norm, output, ref_output)
+        assert_close(ir.ops.fused_add_rms_norm, residual, ref_residual)
+
+        # check that dispatched call matches direct call
+        with ir.ops.fused_add_rms_norm.set_priority([provider, "native"]):
+            out_dispatched, residual_dispatched = ir.ops.fused_add_rms_norm(*args[:4])
+        out_direct, residual_direct = impl.impl_fn(*clone_args(args))
+        torch.testing.assert_close(out_dispatched, out_direct, rtol=0.0, atol=0.0)
+        torch.testing.assert_close(
+            residual_dispatched, residual_direct, rtol=0.0, atol=0.0
+        )
+
+        # none of these support variance_size override
+        assert not impl.supports_args(x, x_residual, weight, epsilon, 4)
+        assert not impl.supports_args(x, x_residual, weight, epsilon, variance_size=4)
+
+        # test weight=None behavior
+        out_no_weight, residual_no_weight = impl.impl_fn(
+            x.clone(), x_residual.clone(), None, epsilon
+        )
+        out_unit_weight, residual_unit_weight = impl.impl_fn(
+            x.clone(), x_residual.clone(), torch.ones_like(weight), epsilon
+        )
+        assert_close(ir.ops.fused_add_rms_norm, out_no_weight, out_unit_weight)
+        assert_close(
+            ir.ops.fused_add_rms_norm, residual_no_weight, residual_unit_weight
+        )
+
+    @pytest.mark.parametrize("provider", ["vllm_c"])
+    def test_inplace_semantics(self, dtype, n_tokens, hidden_size, epsilon, provider):
+        """Test that inplace implementations reuse inputs,
+        for maybe_inplace overload but not for default overload."""
+        impl = ir.ops.fused_add_rms_norm.impls[provider]
+        if not impl.supported:
+            pytest.skip(f"{provider} impl not supported on this platform")
+
+        x, x_residual, weight, eps = ir.ops.fused_add_rms_norm.generate_inputs(
+            num_tokens=n_tokens, hidden_size=hidden_size, dtype=dtype, epsilon=epsilon
+        )
+
+        # Test default overload - should NOT modify inputs even with inplace impl
+        x_default = x.clone()
+        x_residual_default = x_residual.clone()
+        x_default_ptr = x_default.data_ptr()
+        x_residual_default_ptr = x_residual_default.data_ptr()
+
+        with ir.ops.fused_add_rms_norm.set_priority([provider, "native"]):
+            out_default, residual_default = ir.ops.fused_add_rms_norm(
+                x_default, x_residual_default, weight, eps
+            )
+
+        # Default should NOT be inplace (even with inplace implementation)
+        assert out_default.data_ptr() != x_default_ptr
+        assert residual_default.data_ptr() != x_residual_default_ptr
+        torch.testing.assert_close(x, x_default, rtol=0.0, atol=0.0)
+        torch.testing.assert_close(x_residual, x_residual_default, rtol=0.0, atol=0.0)
+
+        # Test maybe_inplace overload - should modify inputs with inplace impl
+        x_inplace = x.clone()
+        x_residual_inplace = x_residual.clone()
+        x_inplace_ptr = x_inplace.data_ptr()
+        x_residual_inplace_ptr = x_residual_inplace.data_ptr()
+
+        with ir.ops.fused_add_rms_norm.set_priority([provider, "native"]):
+            out_inplace, residual_inplace = ir.ops.fused_add_rms_norm.maybe_inplace(
+                x_inplace, x_residual_inplace, weight, eps
+            )
+
+        # maybe_inplace should be inplace
+        assert out_inplace.data_ptr() == x_inplace_ptr
+        assert residual_inplace.data_ptr() == x_residual_inplace_ptr
+
+        # Both should produce same results
+        torch.testing.assert_close(out_default, out_inplace, atol=0.0, rtol=0.0)
+        torch.testing.assert_close(
+            residual_default, residual_inplace, atol=0.0, rtol=0.0
+        )
+
+    @pytest.mark.parametrize("provider", supported_providers(ir.ops.fused_add_rms_norm))
+    def test_torch_opcheck(self, dtype, n_tokens, hidden_size, epsilon, provider):
+        args = ir.ops.fused_add_rms_norm.generate_inputs(
+            num_tokens=n_tokens, hidden_size=hidden_size, dtype=dtype, epsilon=epsilon
+        )
+        args = args + (None,)  # Add variance_size parameter
+
+        # When checking the torch op, we have to set priority and use dispatch
+        with ir.ops.fused_add_rms_norm.set_priority([provider, "native"]):
+            torch.library.opcheck(torch.ops.vllm_ir.fused_add_rms_norm.default, args)
+
+            # Only test maybe_inplace with non-inplace implementations
+            # Inplace implementations return aliases of inputs which is not allowed.
+            # We break this invariant, but we also convert maybe_inplace to the default
+            # overload during compilation, so maybe_inplace never reaches Inductor.
+            if not ir.ops.fused_add_rms_norm.impls[provider].inplace:
+                torch.library.opcheck(
+                    torch.ops.vllm_ir.fused_add_rms_norm.maybe_inplace, args
+                )
diff --git a/tests/kernels/mamba/__init__.py b/tests/kernels/mamba/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/kernels/mamba/test_causal_conv1d.py b/tests/kernels/mamba/test_causal_conv1d.py
index 1d10bd297ae3..0ebc527d54d3 100644
--- a/tests/kernels/mamba/test_causal_conv1d.py
+++ b/tests/kernels/mamba/test_causal_conv1d.py
@@ -12,7 +12,7 @@
     causal_conv1d_update,
 )
 from vllm.utils.torch_utils import set_random_seed
-from vllm.v1.attention.backends.utils import PAD_SLOT_ID
+from vllm.v1.attention.backends.utils import NULL_BLOCK_ID
 
 
 def causal_conv1d_ref(
@@ -122,7 +122,7 @@ def causal_conv1d_opcheck_fn(
     has_initial_state: torch.Tensor | None = None,
     conv_states: torch.Tensor | None = None,
     activation: str | None = "silu",
-    pad_slot_id: int = PAD_SLOT_ID,
+    null_block_id: int = NULL_BLOCK_ID,
 ):
     """
     x: (batch, dim, seqlen)
@@ -158,15 +158,16 @@ def test_causal_conv1d_update(dim, width, seqlen, has_bias, silu_activation, ity
     batch = 2
     x = torch.randn(batch, dim, seqlen, device=device, dtype=itype)
     x_ref = x.clone()
-    conv_state = torch.randn(batch, dim, width - 1, device=device, dtype=itype)
+    # +1 entry to reserve index 0 as null block
+    conv_state = torch.randn(batch + 1, dim, width - 1, device=device, dtype=itype)
 
     weight = torch.randn(dim, width, device=device, dtype=itype)
     bias = torch.randn(dim, device=device, dtype=itype) if has_bias else None
-    conv_state_ref = conv_state.detach().clone()
+    # Start indices from 1, skipping null block at index 0
+    conv_state_indices = torch.arange(1, batch + 1, dtype=torch.int32, device=device)
+    conv_state_ref = conv_state[conv_state_indices].detach().clone()
     activation = None if not silu_activation else "silu"
 
-    conv_state_indices = torch.arange(batch, dtype=torch.int32, device=device)
-
     out = causal_conv1d_update(
         x,
         conv_state,
@@ -179,7 +180,7 @@ def test_causal_conv1d_update(dim, width, seqlen, has_bias, silu_activation, ity
         x_ref, conv_state_ref, weight, bias, activation=activation
     )
 
-    assert torch.equal(conv_state, conv_state_ref)
+    assert torch.equal(conv_state[conv_state_indices], conv_state_ref)
     assert torch.allclose(out, out_ref, rtol=rtol, atol=atol)
 
 
@@ -215,7 +216,8 @@ def test_causal_conv1d_update_with_batch_gather(
 
     x_ref = x.clone()
 
-    conv_state_indices = torch.randperm(total_entries)[:batch_size].to(
+    # +1 to exclude index 0 (null block)
+    conv_state_indices = (torch.randperm(total_entries - 1)[:batch_size] + 1).to(
         dtype=torch.int32, device=device
     )
     unused_states_bool = torch.ones(total_entries, dtype=torch.bool, device=device)
@@ -223,7 +225,9 @@ def test_causal_conv1d_update_with_batch_gather(
     padded_state_indices = torch.concat(
         [
             conv_state_indices,
-            torch.as_tensor([PAD_SLOT_ID] * padding, dtype=torch.int32, device=device),
+            torch.as_tensor(
+                [NULL_BLOCK_ID] * padding, dtype=torch.int32, device=device
+            ),
         ],
         dim=0,
     )
@@ -248,7 +252,6 @@ def test_causal_conv1d_update_with_batch_gather(
         bias,
         activation=activation,
         conv_state_indices=padded_state_indices,
-        pad_slot_id=PAD_SLOT_ID,
     )
     out_ref = causal_conv1d_update_ref(
         x_ref[:batch_size], conv_state_ref, weight, bias, activation=activation
@@ -317,13 +320,19 @@ def test_causal_conv1d_varlen(
     has_initial_states = torch.randint(
         0, 2, (cumsum.shape[0] - 1,), dtype=torch.bool, device=x.device
     )
-    state_indices = torch.randperm(total_entries, dtype=torch.int32, device=x.device)[
-        :batch_size
-    ]
+    # +1 to exclude index 0 (null block)
+    state_indices = (
+        torch.randperm(total_entries - 1, dtype=torch.int32, device=x.device)[
+            :batch_size
+        ]
+        + 1
+    )
     padded_state_indices = torch.concat(
         [
             state_indices,
-            torch.as_tensor([PAD_SLOT_ID] * padding, dtype=torch.int32, device=device),
+            torch.as_tensor(
+                [NULL_BLOCK_ID] * padding, dtype=torch.int32, device=device
+            ),
         ],
         dim=-1,
     )
@@ -336,7 +345,6 @@ def test_causal_conv1d_varlen(
         cache_indices=padded_state_indices,
         has_initial_state=has_initial_states,
         activation=activation,
-        pad_slot_id=PAD_SLOT_ID,
     )
 
     out_ref = []
@@ -345,7 +353,7 @@ def test_causal_conv1d_varlen(
     splits = [torch.split(var, seqlens[0], dim=-1) for var in (x_ref)]
     for i in range(len(seqlens[0])):
         x_s = [v[i].unsqueeze(0) for v in splits][0]
-        if padded_state_indices[i] == PAD_SLOT_ID:
+        if padded_state_indices[i] == NULL_BLOCK_ID:
             continue
         out_ref_b.append(
             causal_conv1d_ref(
diff --git a/tests/kernels/mamba/test_mamba_ssm.py b/tests/kernels/mamba/test_mamba_ssm.py
index e8cbba29f363..d812242cba96 100644
--- a/tests/kernels/mamba/test_mamba_ssm.py
+++ b/tests/kernels/mamba/test_mamba_ssm.py
@@ -6,86 +6,16 @@
 import torch.nn.functional as F
 from einops import rearrange, repeat
 
+from tests.kernels.mamba.utils import selective_state_update_ref
 from tests.kernels.utils import opcheck
 from vllm import _custom_ops as ops  # noqa: F401
 from vllm.model_executor.layers.mamba.ops.mamba_ssm import (
     selective_scan_fn,
     selective_state_update,
 )
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
-from vllm.v1.attention.backends.utils import PAD_SLOT_ID
-
-
-def selective_state_update_ref(
-    state, x, dt, A, B, C, D=None, z=None, dt_bias=None, dt_softplus=False
-):
-    """
-    Argument:
-        state: (batch, dim, dstate) or (batch, nheads, dim, dstate)
-        x: (batch, dim) or (batch, nheads, dim)
-        dt: (batch, dim) or (batch, nheads, dim)
-        A: (dim, dstate) or (nheads, dim, dstate)
-        B: (batch, dstate) or (batch, ngroups, dstate)
-        C: (batch, dstate) or (batch, ngroups, dstate)
-        D: (dim,) or (nheads, dim)
-        z: (batch, dim) or (batch, nheads, dim)
-        dt_bias: (dim,) or (nheads, dim)
-    Return:
-        out: (batch, dim) or (batch, nheads, dim)
-    """
-    has_heads = state.dim() > 3
-    if state.dim() == 3:
-        state = state.unsqueeze(1)
-    if x.dim() == 2:
-        x = x.unsqueeze(1)
-    if dt.dim() == 2:
-        dt = dt.unsqueeze(1)
-    if A.dim() == 2:
-        A = A.unsqueeze(0)
-    if B.dim() == 2:
-        B = B.unsqueeze(1)
-    if C.dim() == 2:
-        C = C.unsqueeze(1)
-    if D is not None and D.dim() == 1:
-        D = D.unsqueeze(0)
-    if z is not None and z.dim() == 2:
-        z = z.unsqueeze(1)
-    if dt_bias is not None and dt_bias.dim() == 1:
-        dt_bias = dt_bias.unsqueeze(0)
-    batch, nheads, dim, dstate = state.shape
-    assert x.shape == (batch, nheads, dim)
-    assert dt.shape == x.shape
-    assert A.shape == (nheads, dim, dstate)
-    ngroups = B.shape[1]
-    assert nheads % ngroups == 0, "nheads must be divisible by ngroups"
-    assert B.shape == (batch, ngroups, dstate)
-    assert C.shape == B.shape
-    if D is not None:
-        assert D.shape == (nheads, dim)
-    if z is not None:
-        assert z.shape == x.shape
-    if dt_bias is not None:
-        assert dt_bias.shape == (nheads, dim)
-        dt = dt + dt_bias
-    dt = F.softplus(dt) if dt_softplus else dt
-    dA = torch.exp(
-        rearrange(dt, "b h d -> b h d 1") * A
-    )  # (batch, nheads, dim, dstate)
-    B = repeat(B, "b g n -> b (g h) n", h=nheads // ngroups)  # (batch, nheads, dstate)
-    C = repeat(C, "b g n -> b (g h) n", h=nheads // ngroups)  # (batch, nheads, dstate)
-    dB = rearrange(dt, "b h d -> b h d 1") * rearrange(
-        B, "b h n -> b h 1 n"
-    )  # (batch, nheads, dim, dstate)
-    state.copy_(
-        state * dA + dB * rearrange(x, "b h d -> b h d 1")
-    )  # (batch, dim, dstate
-    out = torch.einsum("bhdn,bhn->bhd", state.to(C.dtype), C)
-    if D is not None:
-        out += (x * D).to(out.dtype)
-    out = (out if z is None else out * F.silu(z)).to(x.dtype)
-    if not has_heads:
-        out = out.squeeze(1)
-    return out
+from vllm.v1.attention.backends.utils import NULL_BLOCK_ID
 
 
 def selective_scan_ref(
@@ -178,7 +108,7 @@ def selective_scan_opcheck_fn(
     cache_indices=None,
     has_initial_state=None,
     ssm_states=None,
-    pad_slot_id=PAD_SLOT_ID,
+    null_block_id=NULL_BLOCK_ID,
     block_size=2048,
     block_idx_first_scheduled_token=None,
     block_idx_last_scheduled_token=None,
@@ -228,7 +158,7 @@ def selective_scan_opcheck_fn(
             cache_indices,
             has_initial_state,
             ssm_states,
-            pad_slot_id,
+            null_block_id,
             block_size,
             block_idx_first_scheduled_token,
             block_idx_last_scheduled_token,
@@ -350,7 +280,6 @@ def test_selective_scan(
             has_initial_state=torch.ones(batch_size, device=u.device, dtype=torch.bool)
             if c > 0
             else None,
-            pad_slot_id=PAD_SLOT_ID,
             block_size=2048,
             block_idx_first_scheduled_token=None,
             block_idx_last_scheduled_token=None,
@@ -429,6 +358,59 @@ def test_selective_state_update(dim, dstate, has_z, itype):
     assert torch.allclose(out, out_ref, rtol=rtol, atol=atol)
 
 
+@pytest.mark.parametrize("philox_rounds", [0, 4])
+@pytest.mark.parametrize("has_z", [False, True])
+@pytest.mark.parametrize("dstate", [16, 64])
+@pytest.mark.parametrize("dim", [2048, 4096])
+@pytest.mark.skipif(
+    not (
+        current_platform.is_cuda() and current_platform.is_device_capability_family(100)
+    ),
+    reason="Stochastic rounding in triton is only supported"
+    " on compute capability 10.0 CUDA devices.",
+)
+def test_selective_state_update_stochastic_rounding(dim, dstate, has_z, philox_rounds):
+    device = "cuda"
+    rtol, atol = 5e-3, 1e-1
+    # set seed
+    set_random_seed(0)
+    batch_size = 1
+    state = torch.randn(batch_size, dim, dstate, dtype=torch.float16, device=device)
+    x = torch.randn(batch_size, dim, device=device, dtype=torch.bfloat16)
+    out = torch.empty_like(x)
+    dt = torch.randn(batch_size, dim, device=device, dtype=torch.bfloat16)
+    dt_bias = torch.rand(dim, device=device) - 4.0
+    A = -torch.rand(dim, dstate, device=device) - 1.0
+    B = torch.randn(batch_size, dstate, device=device)
+    C = torch.randn(batch_size, dstate, device=device)
+    D = torch.randn(dim, device=device)
+    z = torch.randn_like(x) if has_z else None
+    # Reference uses fp32 state to get ground truth
+    state_ref = state.float()
+    selective_state_update(
+        state,
+        x,
+        dt,
+        A,
+        B,
+        C,
+        D=D,
+        z=z,
+        dt_bias=dt_bias,
+        dt_softplus=True,
+        out=out,
+        enable_stochastic_rounding=True,
+        cache_philox_rounds=philox_rounds,
+    )
+    out_ref = selective_state_update_ref(
+        state_ref, x, dt, A, B, C, D=D, z=z, dt_bias=dt_bias, dt_softplus=True
+    )
+
+    assert state.dtype == torch.float16
+    assert torch.allclose(state, state_ref.to(torch.float16), rtol=rtol, atol=atol)
+    assert torch.allclose(out, out_ref, rtol=rtol, atol=atol)
+
+
 @pytest.mark.parametrize("itype", [torch.float32, torch.bfloat16])
 @pytest.mark.parametrize("has_z", [False, True])
 @pytest.mark.parametrize("dstate", [16, 64])
@@ -598,15 +580,21 @@ def test_selective_scan_varlen(
         prev_state_shape, device=u.device, dtype=itype, requires_grad=False
     )
     prev_state_ref = prev_state.clone()
-    state_indices = torch.randperm(total_entries, dtype=torch.int32, device=u.device)[
-        :batch_size
-    ]
+    # +1 to exclude index 0 (null block)
+    state_indices = (
+        torch.randperm(total_entries - 1, dtype=torch.int32, device=u.device)[
+            :batch_size
+        ]
+        + 1
+    )
     unused_states_bool = torch.ones(total_entries, dtype=torch.bool, device=device)
     unused_states_bool[state_indices] = False
     padded_state_indices = torch.concat(
         [
             state_indices,
-            torch.as_tensor([PAD_SLOT_ID] * padding, dtype=torch.int32, device=device),
+            torch.as_tensor(
+                [NULL_BLOCK_ID] * padding, dtype=torch.int32, device=device
+            ),
         ],
         dim=-1,
     )
@@ -636,7 +624,7 @@ def test_selective_scan_varlen(
     ]
     for i in range(len(seqlens[0])):
         u_s, delta_s, B_s, C_s, z_s = (v[i].unsqueeze(0) for v in splits)
-        if padded_state_indices[i] == PAD_SLOT_ID:
+        if padded_state_indices[i] == NULL_BLOCK_ID:
             continue
         out_ref_s, _ = selective_scan_ref(
             u_s,
@@ -704,7 +692,8 @@ def test_selective_state_update_with_batch_indices(
     padded_batch_size = batch_size + padding
     total_entries = 10 * batch_size
     state = torch.randn(total_entries, dim, dstate, dtype=itype, device=device)
-    state_indices = torch.randperm(total_entries)[:batch_size].to(
+    # +1 to exclude index 0 (null block)
+    state_indices = (torch.randperm(total_entries - 1)[:batch_size] + 1).to(
         dtype=torch.int32, device=device
     )
     unused_states_bool = torch.ones(total_entries, dtype=torch.bool, device=device)
@@ -712,7 +701,9 @@ def test_selective_state_update_with_batch_indices(
     padded_state_indices = torch.concat(
         [
             state_indices,
-            torch.as_tensor([PAD_SLOT_ID] * padding, dtype=torch.int32, device=device),
+            torch.as_tensor(
+                [NULL_BLOCK_ID] * padding, dtype=torch.int32, device=device
+            ),
         ],
         dim=0,
     )
@@ -739,7 +730,6 @@ def test_selective_state_update_with_batch_indices(
         dt_bias=dt_bias,
         dt_softplus=True,
         state_batch_indices=padded_state_indices,
-        pad_slot_id=PAD_SLOT_ID,
         out=out,
     )
     out_ref = selective_state_update_ref(
@@ -795,7 +785,8 @@ def test_selective_state_update_with_heads_with_batch_indices(
     state = torch.randn(
         total_entries, nheads, headdim, dstate, dtype=itype, device=device
     )
-    state_indices = torch.randperm(total_entries)[:batch_size].to(
+    # +1 to exclude index 0 (null block)
+    state_indices = (torch.randperm(total_entries - 1)[:batch_size] + 1).to(
         dtype=torch.int32, device=device
     )
 
@@ -833,7 +824,6 @@ def test_selective_state_update_with_heads_with_batch_indices(
         dt_bias=dt_bias,
         dt_softplus=True,
         state_batch_indices=state_indices,
-        pad_slot_id=PAD_SLOT_ID,
         out=out,
     )
     out_ref = selective_state_update_ref(
@@ -881,17 +871,18 @@ def test_selective_state_update_with_num_accepted_tokens(
     state = torch.randn(total_state_slots, dim, dstate, dtype=itype, device=device)
 
     state_batch_indices = torch.full(
-        (batch_size, max_seq_len), PAD_SLOT_ID, dtype=torch.int32, device=device
+        (batch_size, max_seq_len), NULL_BLOCK_ID, dtype=torch.int32, device=device
     )
+    # Start from 1 to exclude null block at index 0
     initial_state_slots = torch.randint(
-        0, 15, (batch_size,), device=device, dtype=torch.int32
+        1, 15, (batch_size,), device=device, dtype=torch.int32
     )
     for seq_idx in range(batch_size):
         token_pos = max(num_accepted_tokens[seq_idx].item() - 1, 0)
         state_batch_indices[seq_idx, token_pos] = initial_state_slots[seq_idx]
 
     dst_state_batch_indices = torch.full(
-        (batch_size, max_seq_len), PAD_SLOT_ID, dtype=torch.int32, device=device
+        (batch_size, max_seq_len), NULL_BLOCK_ID, dtype=torch.int32, device=device
     )
     slot_offset = 15
     dst_slots_map = {}
@@ -959,7 +950,6 @@ def test_selective_state_update_with_num_accepted_tokens(
         state_batch_indices=state_batch_indices,
         dst_state_batch_indices=dst_state_batch_indices,
         num_accepted_tokens=num_accepted_tokens,
-        pad_slot_id=PAD_SLOT_ID,
     )
 
     assert torch.allclose(out, out_ref, rtol=rtol, atol=atol)
@@ -1007,18 +997,19 @@ def test_selective_state_update_varlen_with_num_accepted(
     state = torch.randn(total_state_slots, dim, dstate, dtype=itype, device=device)
 
     state_batch_indices = torch.full(
-        (batch_size, max_seq_len), PAD_SLOT_ID, dtype=torch.int32, device=device
+        (batch_size, max_seq_len), NULL_BLOCK_ID, dtype=torch.int32, device=device
     )
 
+    # Start from 1 to exclude null block at index 0
     initial_state_slots = torch.randint(
-        0, 15, (batch_size,), device=device, dtype=torch.int32
+        1, 15, (batch_size,), device=device, dtype=torch.int32
     )
     for seq_idx in range(batch_size):
         token_pos = max(num_accepted_tokens[seq_idx].item() - 1, 0)
         state_batch_indices[seq_idx, token_pos] = initial_state_slots[seq_idx]
 
     dst_state_batch_indices = torch.full(
-        (batch_size, max_seq_len), PAD_SLOT_ID, dtype=torch.int32, device=device
+        (batch_size, max_seq_len), NULL_BLOCK_ID, dtype=torch.int32, device=device
     )
 
     slot_offset = 15
@@ -1084,7 +1075,6 @@ def test_selective_state_update_varlen_with_num_accepted(
         state_batch_indices=state_batch_indices,
         dst_state_batch_indices=dst_state_batch_indices,
         num_accepted_tokens=num_accepted_tokens,
-        pad_slot_id=PAD_SLOT_ID,
     )
 
     for seq_idx in range(batch_size):
diff --git a/tests/kernels/mamba/test_mamba_ssm_configs.py b/tests/kernels/mamba/test_mamba_ssm_configs.py
new file mode 100644
index 000000000000..d35ce95746c7
--- /dev/null
+++ b/tests/kernels/mamba/test_mamba_ssm_configs.py
@@ -0,0 +1,212 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for the JSON-based config loader added to selective_state_update.
+
+Tests cover:
+  - Flat MoE-style filename generation
+  - VLLM_TUNED_CONFIG_FOLDER env-var override
+  - Fallback to heuristic when no config file exists
+  - Nearest effective_batch interpolation
+  - Edge cases: non-dict JSON, empty config
+"""
+
+import json
+
+from vllm.model_executor.layers.mamba.ops.mamba_ssm import (
+    _get_default_ssm_launch_config,
+    _try_get_optimal_ssm_config_cached,
+    get_ssm_config_file_name,
+    get_ssm_configs,
+    get_ssm_device_name,
+    try_get_optimal_ssm_config,
+)
+
+# Common kwargs for try_get_optimal_ssm_config. Tests pick (batch, nheads) so
+# their product (effective_batch) matches the value being probed.
+_HEADDIM = 64
+_CACHE_DTYPE = "float32"
+
+
+def _clear_caches() -> None:
+    get_ssm_configs.cache_clear()
+    _try_get_optimal_ssm_config_cached.cache_clear()
+
+
+def _write_config(tmp_path, dstate: int, payload: dict) -> None:
+    """Write payload as the bundled config for (headdim, dstate, cache_dtype)."""
+    device_name = get_ssm_device_name()
+    config_path = tmp_path / get_ssm_config_file_name(
+        _HEADDIM, dstate, _CACHE_DTYPE, device_name
+    )
+    with open(config_path, "w") as f:
+        json.dump(payload, f)
+
+
+# ---------------------------------------------------------------------------
+# Config filename generation
+# ---------------------------------------------------------------------------
+
+
+def test_config_file_name_format():
+    name = get_ssm_config_file_name(
+        headdim=64, dstate=128, cache_dtype="float32", device_name="NVIDIA_B200"
+    )
+    assert name == (
+        "headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float32.json"
+    )
+
+
+# ---------------------------------------------------------------------------
+# VLLM_TUNED_CONFIG_FOLDER override
+# ---------------------------------------------------------------------------
+
+
+def test_env_override_loads_custom_config(monkeypatch, tmp_path):
+    """VLLM_TUNED_CONFIG_FOLDER should take precedence over the bundled dir."""
+    _write_config(
+        tmp_path,
+        dstate=16,
+        payload={
+            "1": {"BLOCK_SIZE_M": 4, "num_warps": 1},
+        },
+    )
+
+    monkeypatch.setenv("VLLM_TUNED_CONFIG_FOLDER", str(tmp_path))
+    _clear_caches()
+
+    cfg = get_ssm_configs(_HEADDIM, 16, _CACHE_DTYPE)
+    assert cfg is not None
+    assert cfg[1] == {"BLOCK_SIZE_M": 4, "num_warps": 1}
+
+    _clear_caches()
+
+
+# ---------------------------------------------------------------------------
+# Fallback to heuristic when no config file exists
+# ---------------------------------------------------------------------------
+
+
+def test_fallback_when_no_config(monkeypatch, tmp_path):
+    """try_get_optimal_ssm_config must fall back to _get_default_ssm_launch_config
+    when no JSON file is found for the current
+    (device, headdim, dstate, cache_dtype) combination.
+    """
+    monkeypatch.setenv("VLLM_TUNED_CONFIG_FOLDER", str(tmp_path))
+    monkeypatch.setattr(
+        "vllm.model_executor.layers.mamba.ops.mamba_ssm._CONFIGS_DIR",
+        str(tmp_path),
+    )
+
+    for dstate in (8, 16, 32, 64, 128, 256):
+        for is_blackwell in (False, True):
+            _clear_caches()
+            block_m, warps = try_get_optimal_ssm_config(
+                headdim=_HEADDIM,
+                dstate=dstate,
+                batch=1,
+                nheads=1,
+                cache_dtype=_CACHE_DTYPE,
+                is_blackwell=is_blackwell,
+            )
+            assert (block_m, warps) == _get_default_ssm_launch_config(
+                dstate, is_blackwell=is_blackwell
+            )
+
+    _clear_caches()
+
+
+# ---------------------------------------------------------------------------
+# Nearest effective_batch interpolation
+# ---------------------------------------------------------------------------
+
+
+def test_nearest_effective_batch_interpolation(monkeypatch, tmp_path):
+    """When effective_batch = batch*nheads is not an exact key, the closest
+    key should be selected."""
+    _write_config(
+        tmp_path,
+        dstate=32,
+        payload={
+            "64": {"BLOCK_SIZE_M": 8, "num_warps": 1},
+            "4096": {"BLOCK_SIZE_M": 32, "num_warps": 4},
+        },
+    )
+
+    monkeypatch.setenv("VLLM_TUNED_CONFIG_FOLDER", str(tmp_path))
+    _clear_caches()
+
+    # effective_batch = 1*128 = 128 -> closer to 64 than to 4096
+    block_m, warps = try_get_optimal_ssm_config(
+        headdim=_HEADDIM,
+        dstate=32,
+        batch=1,
+        nheads=128,
+        cache_dtype=_CACHE_DTYPE,
+        is_blackwell=False,
+    )
+    assert block_m == 8 and warps == 1
+
+    # effective_batch = 4*1024 = 4096 -> exact match on 4096
+    block_m, warps = try_get_optimal_ssm_config(
+        headdim=_HEADDIM,
+        dstate=32,
+        batch=4,
+        nheads=1024,
+        cache_dtype=_CACHE_DTYPE,
+        is_blackwell=False,
+    )
+    assert block_m == 32 and warps == 4
+
+    _clear_caches()
+
+
+# ---------------------------------------------------------------------------
+# Edge cases: malformed / empty config files
+# ---------------------------------------------------------------------------
+
+
+def test_non_dict_json_returns_none(monkeypatch, tmp_path):
+    """A valid JSON file that is not a dict (e.g. a list) must be ignored
+    and return None rather than raising AttributeError."""
+    device_name = get_ssm_device_name()
+    config_path = tmp_path / get_ssm_config_file_name(
+        _HEADDIM, 16, _CACHE_DTYPE, device_name
+    )
+    with open(config_path, "w") as f:
+        json.dump([1, 2, 3], f)
+
+    monkeypatch.setenv("VLLM_TUNED_CONFIG_FOLDER", str(tmp_path))
+    monkeypatch.setattr(
+        "vllm.model_executor.layers.mamba.ops.mamba_ssm._CONFIGS_DIR",
+        str(tmp_path),
+    )
+    _clear_caches()
+
+    assert get_ssm_configs(_HEADDIM, 16, _CACHE_DTYPE) is None
+
+    _clear_caches()
+
+
+def test_empty_config_falls_back_to_heuristic(monkeypatch, tmp_path):
+    """An empty JSON object {} must not crash min() — should fall back
+    to the hard-coded heuristic."""
+    _write_config(tmp_path, dstate=64, payload={})
+
+    monkeypatch.setenv("VLLM_TUNED_CONFIG_FOLDER", str(tmp_path))
+    _clear_caches()
+
+    dstate = 64
+    block_m, warps = try_get_optimal_ssm_config(
+        headdim=_HEADDIM,
+        dstate=dstate,
+        batch=1,
+        nheads=64,
+        cache_dtype=_CACHE_DTYPE,
+        is_blackwell=False,
+    )
+    assert (block_m, warps) == _get_default_ssm_launch_config(
+        dstate=dstate, is_blackwell=False
+    )
+
+    _clear_caches()
diff --git a/tests/kernels/mamba/test_ssu_dispatch.py b/tests/kernels/mamba/test_ssu_dispatch.py
new file mode 100644
index 000000000000..703a5df163e9
--- /dev/null
+++ b/tests/kernels/mamba/test_ssu_dispatch.py
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from vllm.config.mamba import MambaBackendEnum, MambaConfig
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import (
+    FlashInferSSUBackend,
+    TritonSSUBackend,
+    get_mamba_ssu_backend,
+    initialize_mamba_ssu_backend,
+    selective_state_update,
+)
+from vllm.utils.torch_utils import set_random_seed
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+from vllm.v1.kv_cache_interface import (
+    KVCacheConfig,
+    KVCacheGroupSpec,
+    MambaSpec,
+)
+
+try:
+    import flashinfer.mamba  # noqa: F401
+
+    HAS_FLASHINFER = True
+except ImportError:
+    HAS_FLASHINFER = False
+
+
+def _kv_cache_config_with_ssu(
+    mamba_type: MambaAttentionBackendEnum = MambaAttentionBackendEnum.MAMBA2,
+) -> KVCacheConfig:
+    spec = MambaSpec(
+        block_size=16,
+        shapes=((16, 64),),
+        dtypes=(torch.float16,),
+        mamba_type=mamba_type,
+    )
+    return KVCacheConfig(
+        num_blocks=1,
+        kv_cache_tensors=[],
+        kv_cache_groups=[KVCacheGroupSpec(layer_names=["l0"], kv_cache_spec=spec)],
+    )
+
+
+def test_default_backend_is_triton():
+    initialize_mamba_ssu_backend(MambaConfig(), _kv_cache_config_with_ssu())
+    backend = get_mamba_ssu_backend()
+    assert isinstance(backend, TritonSSUBackend)
+    assert backend.name == "triton"
+
+
+def test_explicit_triton_backend():
+    initialize_mamba_ssu_backend(
+        MambaConfig(backend=MambaBackendEnum.TRITON), _kv_cache_config_with_ssu()
+    )
+    backend = get_mamba_ssu_backend()
+    assert isinstance(backend, TritonSSUBackend)
+
+
+@pytest.mark.skipif(not HAS_FLASHINFER, reason="flashinfer not installed")
+def test_flashinfer_backend_init():
+    initialize_mamba_ssu_backend(
+        MambaConfig(backend=MambaBackendEnum.FLASHINFER), _kv_cache_config_with_ssu()
+    )
+    backend = get_mamba_ssu_backend()
+    assert isinstance(backend, FlashInferSSUBackend)
+    assert backend.name == "flashinfer"
+
+
+def test_uninitialized_backend_raises():
+    import vllm.model_executor.layers.mamba.ops.ssu_dispatch as mod
+
+    old = mod._mamba_ssu_backend
+    mod._mamba_ssu_backend = None
+    with pytest.raises(RuntimeError, match="not been initialized"):
+        get_mamba_ssu_backend()
+    mod._mamba_ssu_backend = old
+
+
+@pytest.mark.parametrize(
+    "mamba_type",
+    [
+        MambaAttentionBackendEnum.LINEAR,
+        MambaAttentionBackendEnum.GDN_ATTN,
+        MambaAttentionBackendEnum.SHORT_CONV,
+    ],
+)
+def test_init_is_noop_for_non_ssu_mamba_type(mamba_type):
+    import vllm.model_executor.layers.mamba.ops.ssu_dispatch as mod
+
+    old = mod._mamba_ssu_backend
+    mod._mamba_ssu_backend = None
+    try:
+        initialize_mamba_ssu_backend(
+            MambaConfig(), _kv_cache_config_with_ssu(mamba_type)
+        )
+        assert mod._mamba_ssu_backend is None
+        with pytest.raises(RuntimeError, match="not been initialized"):
+            get_mamba_ssu_backend()
+    finally:
+        mod._mamba_ssu_backend = old
+
+
+@pytest.mark.skipif(HAS_FLASHINFER, reason="flashinfer is installed")
+def test_flashinfer_import_error():
+    with pytest.raises(ImportError, match="FlashInfer is required"):
+        FlashInferSSUBackend(MambaConfig())
+
+
+def test_triton_basic_call():
+    set_random_seed(0)
+    initialize_mamba_ssu_backend(
+        MambaConfig(backend=MambaBackendEnum.TRITON), _kv_cache_config_with_ssu()
+    )
+    device = "cuda"
+    batch_size = 2
+    dim = 64
+    dstate = 16
+
+    state = torch.randn(batch_size, dim, dstate, device=device)
+    x = torch.randn(batch_size, dim, device=device)
+    out = torch.empty_like(x)
+    dt = torch.randn(batch_size, dim, device=device)
+    dt_bias = torch.rand(dim, device=device) - 4.0
+    A = -torch.rand(dim, dstate, device=device)
+    B = torch.randn(batch_size, dstate, device=device)
+    C = torch.randn(batch_size, dstate, device=device)
+    D = torch.randn(dim, device=device)
+
+    selective_state_update(
+        state,
+        x,
+        dt,
+        A,
+        B,
+        C,
+        D=D,
+        dt_bias=dt_bias,
+        dt_softplus=True,
+        out=out,
+    )
+    assert not torch.isnan(out).any()
diff --git a/tests/kernels/mamba/utils.py b/tests/kernels/mamba/utils.py
new file mode 100644
index 000000000000..fb8a4b0a28ec
--- /dev/null
+++ b/tests/kernels/mamba/utils.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+import torch.nn.functional as F
+from einops import rearrange, repeat
+
+
+def selective_state_update_ref(
+    state, x, dt, A, B, C, D=None, z=None, dt_bias=None, dt_softplus=False
+):
+    """
+    Argument:
+        state: (batch, dim, dstate) or (batch, nheads, dim, dstate)
+        x: (batch, dim) or (batch, nheads, dim)
+        dt: (batch, dim) or (batch, nheads, dim)
+        A: (dim, dstate) or (nheads, dim, dstate)
+        B: (batch, dstate) or (batch, ngroups, dstate)
+        C: (batch, dstate) or (batch, ngroups, dstate)
+        D: (dim,) or (nheads, dim)
+        z: (batch, dim) or (batch, nheads, dim)
+        dt_bias: (dim,) or (nheads, dim)
+    Return:
+        out: (batch, dim) or (batch, nheads, dim)
+    """
+    has_heads = state.dim() > 3
+    if state.dim() == 3:
+        state = state.unsqueeze(1)
+    if x.dim() == 2:
+        x = x.unsqueeze(1)
+    if dt.dim() == 2:
+        dt = dt.unsqueeze(1)
+    if A.dim() == 2:
+        A = A.unsqueeze(0)
+    if B.dim() == 2:
+        B = B.unsqueeze(1)
+    if C.dim() == 2:
+        C = C.unsqueeze(1)
+    if D is not None and D.dim() == 1:
+        D = D.unsqueeze(0)
+    if z is not None and z.dim() == 2:
+        z = z.unsqueeze(1)
+    if dt_bias is not None and dt_bias.dim() == 1:
+        dt_bias = dt_bias.unsqueeze(0)
+    batch, nheads, dim, dstate = state.shape
+    assert x.shape == (batch, nheads, dim)
+    assert dt.shape == x.shape
+    assert A.shape == (nheads, dim, dstate)
+    ngroups = B.shape[1]
+    assert nheads % ngroups == 0, "nheads must be divisible by ngroups"
+    assert B.shape == (batch, ngroups, dstate)
+    assert C.shape == B.shape
+    if D is not None:
+        assert D.shape == (nheads, dim)
+    if z is not None:
+        assert z.shape == x.shape
+    if dt_bias is not None:
+        assert dt_bias.shape == (nheads, dim)
+        dt = dt + dt_bias
+    dt = F.softplus(dt) if dt_softplus else dt
+    dA = torch.exp(
+        rearrange(dt, "b h d -> b h d 1") * A
+    )  # (batch, nheads, dim, dstate)
+    B = repeat(B, "b g n -> b (g h) n", h=nheads // ngroups)  # (batch, nheads, dstate)
+    C = repeat(C, "b g n -> b (g h) n", h=nheads // ngroups)  # (batch, nheads, dstate)
+    dB = rearrange(dt, "b h d -> b h d 1") * rearrange(
+        B, "b h n -> b h 1 n"
+    )  # (batch, nheads, dim, dstate)
+    state.copy_(
+        state * dA + dB * rearrange(x, "b h d -> b h d 1")
+    )  # (batch, dim, dstate
+    out = torch.einsum("bhdn,bhn->bhd", state.to(C.dtype), C)
+    if D is not None:
+        out += (x * D).to(out.dtype)
+    out = (out if z is None else out * F.silu(z)).to(x.dtype)
+    if not has_heads:
+        out = out.squeeze(1)
+    return out
diff --git a/tests/kernels/moe/conftest.py b/tests/kernels/moe/conftest.py
new file mode 100644
index 000000000000..a217fe684eb9
--- /dev/null
+++ b/tests/kernels/moe/conftest.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--subtests", action="store", type=str, default=None, help="subtest ids"
+    )
+
+
+@pytest.fixture
+def subtests(request):
+    return request.config.getoption("--subtests")
diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py
index a6f3bc35a0b6..f07d4c75e752 100644
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -46,6 +46,7 @@
     has_deep_gemm,
     has_mori,
 )
+from vllm.utils.math_utils import next_power_of_2
 
 from .mk_objects import (
     TestMoEQuantConfig,
@@ -604,13 +605,6 @@ def make_modular_kernel(
     vllm_config: VllmConfig,
     quant_config: FusedMoEQuantConfig,
 ) -> mk.FusedMoEKernel:
-    def next_power_of_2(x):
-        import math
-
-        if x == 0:
-            return 1
-        return 2 ** math.ceil(math.log2(x))
-
     # make moe config
     moe_parallel_config: FusedMoEParallelConfig = FusedMoEParallelConfig.make(
         tp_size_=get_tensor_model_parallel_world_size(),
diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
index a56435379943..5d3572b7caa2 100644
--- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py
+++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
@@ -7,24 +7,24 @@
 # Fused experts and PrepareFinalize imports
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.model_executor.layers.fused_moe import TritonExperts
-from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
-    BatchedDeepGemmExperts,
-)
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
-from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
+from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
+    BatchedDeepGemmExperts,
+)
+from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import DeepGemmExperts
+from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
     BatchedTritonExperts,
     NaiveBatchedExperts,
 )
+from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
+    TritonOrDeepGemmExperts,
+)
 from vllm.model_executor.layers.fused_moe.prepare_finalize import (
     MoEPrepareAndFinalizeNoDPEPModular,
 )
-from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
-    TritonOrDeepGemmExperts,
-)
 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
     cutlass_fp4_supported,
 )
@@ -223,7 +223,7 @@ def expert_info(kind) -> ExpertInfo:
     )
 
 if has_mori():
-    from vllm.model_executor.layers.fused_moe.mori_prepare_finalize import (
+    from vllm.model_executor.layers.fused_moe.prepare_finalize.mori import (
         MoriPrepareAndFinalize,
     )
 
@@ -237,7 +237,7 @@ def expert_info(kind) -> ExpertInfo:
     )
 
 if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
-    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (
         FlashInferExperts,
     )
     from vllm.model_executor.layers.fused_moe.prepare_finalize.flashinfer_nvlink_two_sided import (  # noqa: E501
@@ -298,7 +298,7 @@ def expert_info(kind) -> ExpertInfo:
     )
 
 if has_aiter():
-    from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
         AiterExperts,
     )
 
@@ -367,7 +367,9 @@ def expert_info(kind) -> ExpertInfo:
     CutlassExpertsFp8 = None
 
 if cutlass_fp4_supported():
-    from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassExpertsFp4
+    from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+        CutlassExpertsFp4,
+    )
 
     register_experts(
         CutlassExpertsFp4,
diff --git a/tests/kernels/moe/modular_kernel_tools/parallel_utils.py b/tests/kernels/moe/modular_kernel_tools/parallel_utils.py
index 3ff2ce3b3c01..07f244451b45 100644
--- a/tests/kernels/moe/modular_kernel_tools/parallel_utils.py
+++ b/tests/kernels/moe/modular_kernel_tools/parallel_utils.py
@@ -11,7 +11,11 @@
 from typing_extensions import ParamSpec
 
 from vllm.config import VllmConfig, set_current_vllm_config
-from vllm.distributed import init_distributed_environment, initialize_model_parallel
+from vllm.distributed import (
+    cleanup_dist_env_and_memory,
+    init_distributed_environment,
+    initialize_model_parallel,
+)
 from vllm.utils.network_utils import get_open_port
 
 ## Parallel Processes Utils
@@ -36,10 +40,17 @@ def _set_vllm_config(
 
     temp_file = tempfile.mkstemp()[1]
 
+    # When DP is enabled, processes are organized as:
+    #  rank = dp_rank * tp_pp_world_size + tp_pp_rank
+    tp_pp_world_size = vllm_config.parallel_config.world_size
+    vllm_config.parallel_config.data_parallel_rank = rank // tp_pp_world_size
+    tp_pp_rank = rank % tp_pp_world_size
+    vllm_config.parallel_config.rank = tp_pp_rank
+
     with set_current_vllm_config(vllm_config):
         init_distributed_environment(
-            world_size=world_size,
-            rank=rank,
+            world_size=tp_pp_world_size,
+            rank=tp_pp_rank,
             distributed_init_method=f"file://{temp_file}",
             local_rank=local_rank,
             backend="nccl",
@@ -59,15 +70,15 @@ def _worker_parallel_launch(
     world_local_size: int,
     node_rank: int,
     init_method: str,
-    worker: Callable[Concatenate[ProcessGroupInfo, VllmConfig | None, Any, P], None],
+    worker: Callable[..., None],
     vllm_config: VllmConfig | None,
     env_dict: dict | None,
-    *args: P.args,
-    **kwargs: P.kwargs,
+    worker_kwargs: dict[str, Any],
+    *args: Any,
 ) -> None:
     rank = node_rank * world_local_size + local_rank
-    torch.accelerator.set_device_index(local_rank)
     device = torch.device("cuda", local_rank)
+    torch.accelerator.set_device_index(device)
     torch.distributed.init_process_group(
         backend="cpu:gloo,cuda:nccl",
         init_method=init_method,
@@ -98,25 +109,27 @@ def _worker_parallel_launch(
             vllm_config,
             cpu_group,
             *args,
-            **kwargs,
+            **worker_kwargs,
         )
     except Exception as ex:
         print(ex)
         traceback.print_exc()
         raise
     finally:
-        torch.distributed.destroy_process_group()
+        if vllm_config is not None:
+            cleanup_dist_env_and_memory()
+        else:
+            torch.distributed.destroy_process_group()
 
 
 def parallel_launch_with_config(
     world_size: int,
     worker: Callable[Concatenate[ProcessGroupInfo, VllmConfig, Any, P], None],
     vllm_config: VllmConfig,
-    env_dict: dict[Any, Any],
+    env_dict: dict[Any, Any] | None,
     *args: P.args,
     **kwargs: P.kwargs,
 ) -> None:
-    assert not kwargs
     spawn(
         _worker_parallel_launch,
         args=(
@@ -127,6 +140,7 @@ def parallel_launch_with_config(
             worker,
             vllm_config,
             env_dict,
+            kwargs,
         )
         + args,
         nprocs=world_size,
diff --git a/tests/kernels/moe/test_batched_deepgemm.py b/tests/kernels/moe/test_batched_deepgemm.py
index 20763b91dfd9..ab5672f83203 100644
--- a/tests/kernels/moe/test_batched_deepgemm.py
+++ b/tests/kernels/moe/test_batched_deepgemm.py
@@ -5,15 +5,17 @@
 import torch
 
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
-from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
+from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
     BatchedDeepGemmExperts,
 )
-from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
-from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
-    BatchedPrepareAndFinalize,
+from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
     BatchedTritonExperts,
 )
 from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEKernel
+from vllm.model_executor.layers.fused_moe.prepare_finalize.batched import (
+    BatchedPrepareAndFinalize,
+)
 from vllm.utils.deep_gemm import calc_diff, is_deep_gemm_supported
 
 from .test_deepgemm import make_block_quant_fp8_weights
diff --git a/tests/kernels/moe/test_batched_moe.py b/tests/kernels/moe/test_batched_moe.py
index d78e1947fac0..b9fe8ceafcdd 100644
--- a/tests/kernels/moe/test_batched_moe.py
+++ b/tests/kernels/moe/test_batched_moe.py
@@ -16,7 +16,7 @@
 from tests.kernels.utils import torch_experts
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.model_executor.layers.fused_moe import fused_topk
-from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
+from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
     invoke_moe_batched_triton_kernel,
 )
 from vllm.platforms import current_platform
diff --git a/tests/kernels/moe/test_block_fp8.py b/tests/kernels/moe/test_block_fp8.py
index f27fd6f34ee7..d11573bd31d4 100644
--- a/tests/kernels/moe/test_block_fp8.py
+++ b/tests/kernels/moe/test_block_fp8.py
@@ -28,10 +28,10 @@
 from vllm.model_executor.layers.fused_moe.config import (
     fp8_w8a8_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
     _valid_deep_gemm_shape,
 )
-from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
     TritonOrDeepGemmExperts,
 )
 from vllm.platforms import current_platform
diff --git a/tests/kernels/moe/test_cpu_fused_moe.py b/tests/kernels/moe/test_cpu_fused_moe.py
index 467ba3c5f691..73859175cd1f 100644
--- a/tests/kernels/moe/test_cpu_fused_moe.py
+++ b/tests/kernels/moe/test_cpu_fused_moe.py
@@ -20,7 +20,7 @@
 HIDDEN_DIM = [128, 2880]
 INTERMEDIATE_DIM = [128, 2880]
 BATCH_SIZE = [1, 64, 256]
-ACT = [MoEActivation.SILU, MoEActivation.SWIGLUOAI]
+ACT = [MoEActivation.SILU, MoEActivation.SWIGLUOAI, MoEActivation.GELU]
 USE_BIAS = [True, False]
 ISA = ["amx", "vec"] if torch.cpu._is_amx_tile_supported() else ["vec"]
 DTYPE = [torch.bfloat16]
diff --git a/tests/kernels/moe/test_cpu_quant_fused_moe.py b/tests/kernels/moe/test_cpu_quant_fused_moe.py
new file mode 100644
index 000000000000..f8967b199226
--- /dev/null
+++ b/tests/kernels/moe/test_cpu_quant_fused_moe.py
@@ -0,0 +1,500 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for CPU quantized fused MoE kernels (FP8 W8A16 and MXFP4 W4A16)."""
+
+import math
+import sys
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+if not current_platform.is_cpu():
+    pytest.skip("skipping CPU-only tests", allow_module_level=True)
+
+import vllm._custom_ops as ops  # noqa: E402
+
+if not hasattr(torch.ops._C, "fused_experts_cpu"):
+    pytest.skip("fused_experts_cpu op not available", allow_module_level=True)
+
+
+def _silu_and_mul(x: torch.Tensor) -> torch.Tensor:
+    d = x.shape[-1] // 2
+    return F.silu(x[..., :d]) * x[..., d:]
+
+
+def _prepack_experts(w: torch.Tensor) -> torch.Tensor:
+    """VNNI-prepack expert weights via ``convert_weight_packed``."""
+    return torch.ops._C.convert_weight_packed(w)
+
+
+# FP8 W8A16 block-scaled fused MoE
+
+BLOCK_SIZE = [128, 128]  # [block_n, block_k]
+
+_FP8_INFO = torch.finfo(torch.float8_e4m3fn)
+FP8_SCALE = _FP8_INFO.max  # 448.0
+FACTOR_FOR_SCALE = 1e-3
+
+
+def _block_dequant_weight(
+    weight: torch.Tensor,
+    scales: torch.Tensor,
+    block_size: list[int],
+) -> torch.Tensor:
+    """Block-dequantize FP8 weight [E, N, K] -> float [E, N, K]."""
+    E, N, K = weight.shape
+    block_n, block_k = block_size
+    pad_N = (block_n - N % block_n) % block_n
+    pad_K = (block_k - K % block_k) % block_k
+
+    if pad_N > 0 or pad_K > 0:
+        weight = F.pad(weight, (0, pad_K, 0, pad_N))
+
+    n_tiles = math.ceil(N / block_n)
+    k_tiles = math.ceil(K / block_k)
+
+    weight_block = (
+        weight.view(E, n_tiles, block_n, k_tiles, block_k)
+        .permute(0, 1, 3, 2, 4)
+        .float()
+        .contiguous()
+    )
+    weight_scaled = (
+        (weight_block * scales.view(E, n_tiles, k_tiles, 1, 1))
+        .permute(0, 1, 3, 2, 4)
+        .contiguous()
+    )
+    if pad_N > 0 or pad_K > 0:
+        weight_scaled = weight_scaled.view(E, N + pad_N, K + pad_K)
+        weight_scaled = weight_scaled[..., :N, :K].contiguous()
+    else:
+        weight_scaled = weight_scaled.view(E, N, K)
+    return weight_scaled
+
+
+def ref_w8a16_block_fp8_moe(
+    a: torch.Tensor,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    w1_s: torch.Tensor,
+    w2_s: torch.Tensor,
+    topk_weight: torch.Tensor,
+    topk_ids: torch.Tensor,
+    block_size: list[int],
+) -> torch.Tensor:
+    """Reference FP8 W8A16 block-scaled fused MoE in pure torch."""
+    B, D = a.shape
+    topk = topk_ids.size(1)
+
+    w1_dq = _block_dequant_weight(w1, w1_s, block_size)
+    w2_dq = _block_dequant_weight(w2, w2_s, block_size)
+
+    a_exp = a.view(B, -1, D).repeat(1, topk, 1).reshape(-1, D).float()
+    out = torch.zeros(B * topk, w2_dq.shape[1], dtype=torch.float32)
+
+    topk_weight_flat = topk_weight.view(-1)
+    topk_ids_flat = topk_ids.view(-1)
+
+    for i in range(w1_dq.shape[0]):
+        mask = topk_ids_flat == i
+        if mask.sum():
+            ic0 = torch.matmul(a_exp[mask], w1_dq[i].transpose(0, 1))
+            ic1 = _silu_and_mul(ic0)
+            out[mask] = torch.matmul(ic1, w2_dq[i].transpose(0, 1))
+
+    return (
+        (out.view(B, -1, w2_dq.shape[1]) * topk_weight_flat.view(B, -1, 1))
+        .sum(dim=1)
+        .to(a.dtype)
+    )
+
+
+def _make_fp8_moe_weights(
+    E: int,
+    N: int,
+    K: int,
+    block_size: list[int],
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Generate random FP8 MoE weights with random block scales."""
+    block_n, block_k = block_size
+
+    w1 = (
+        (torch.randn(E, 2 * N, K) * FP8_SCALE)
+        .clamp(min=-FP8_SCALE, max=FP8_SCALE)
+        .to(torch.float8_e4m3fn)
+    )
+    w2 = (
+        (torch.randn(E, K, N) * FP8_SCALE)
+        .clamp(min=-FP8_SCALE, max=FP8_SCALE)
+        .to(torch.float8_e4m3fn)
+    )
+
+    w1_s = (
+        torch.randn(E, math.ceil(2 * N / block_n), math.ceil(K / block_k))
+        * FACTOR_FOR_SCALE
+    )
+    w2_s = (
+        torch.randn(E, math.ceil(K / block_n), math.ceil(N / block_k))
+        * FACTOR_FOR_SCALE
+    )
+    return w1, w2, w1_s, w2_s
+
+
+FP8_NUM_TOKENS = [1, 2, 64, 121]
+FP8_MOE_CONFIGS = [
+    (256, 512, 8, 2),
+    (256, 512, 8, 4),
+    (512, 256, 8, 2),
+    (512, 256, 8, 4),
+    (512, 512, 8, 2),
+    (512, 512, 8, 4),
+    (768, 2048, 8, 2),
+    (768, 2048, 8, 4),
+    (768, 2048, 128, 8),
+]
+
+
+@pytest.mark.parametrize("M", FP8_NUM_TOKENS)
+@pytest.mark.parametrize("N,K,E,topk", FP8_MOE_CONFIGS)
+@pytest.mark.parametrize("seed", [0])
+def test_w8a16_block_fp8_cpu_fused_moe(M, N, K, E, topk, seed):
+    """Test fused_experts_cpu FP8 W8A16 against dequantised torch reference."""
+    set_random_seed(seed)
+
+    a = torch.randn(M, K, dtype=torch.bfloat16) / math.sqrt(K)
+    w1, w2, w1_s, w2_s = _make_fp8_moe_weights(E, N, K, BLOCK_SIZE)
+
+    score = torch.randn(M, E, dtype=torch.bfloat16)
+    score = torch.softmax(score, dim=-1, dtype=torch.float32)
+    topk_weight, topk_ids = torch.topk(score, topk)
+    topk_ids = topk_ids.to(torch.int32)
+
+    ref_out = ref_w8a16_block_fp8_moe(
+        a, w1, w2, w1_s, w2_s, topk_weight, topk_ids, BLOCK_SIZE
+    )
+
+    pw1, pw2 = _prepack_experts(w1), _prepack_experts(w2)
+
+    # Test inplace=False against reference
+    out = ops.fused_experts_cpu(
+        a.clone(),
+        pw1,
+        pw2,
+        topk_weight,
+        topk_ids,
+        False,
+        ops.CPUQuantMethod.FP8_W8A16,
+        w1_s,
+        w2_s,
+        None,
+        None,
+        BLOCK_SIZE,
+        is_vnni=True,
+    )
+    torch.testing.assert_close(ref_out.bfloat16(), out, atol=1e-2, rtol=1e-2)
+
+    # Test inplace=True produces identical output
+    out_inplace = ops.fused_experts_cpu(
+        a.clone(),
+        pw1,
+        pw2,
+        topk_weight,
+        topk_ids,
+        True,
+        ops.CPUQuantMethod.FP8_W8A16,
+        w1_s,
+        w2_s,
+        None,
+        None,
+        BLOCK_SIZE,
+        is_vnni=True,
+    )
+    torch.testing.assert_close(out_inplace, out, atol=0, rtol=0)
+
+
+# MXFP4 W4A16 fused MoE
+
+
+class MXFP4QuantizeUtil:
+    """MXFP4 quantization utility."""
+
+    E2M1_max = 6.0
+    E2M1_values = [0, 0.5, 1, 1.5, 2, 3, 4, 6]
+    E2M1_bounds = torch.tensor([0.25, 0.75, 1.25, 1.75, 2.5, 3.5, 5])
+    block_size = 32
+
+    @classmethod
+    def quantize(cls, input: torch.Tensor) -> tuple:
+        """Quantize BF16 tensor to MXFP4 packed uint8 format."""
+
+        def cast_fp4(x):
+            sign = torch.sign(x)
+            sign_bit = (2 - sign) // 2
+            ord_ = torch.sum(
+                (x.abs().unsqueeze(-1) - cls.E2M1_bounds.to(x.device)) > 0, dim=-1
+            )
+            fp4_val = (sign_bit * 0b1000 + ord_).to(torch.uint8)
+            return fp4_val
+
+        def fuse_uint4_to_uint8(x):
+            left_side = x[..., 0::2]
+            right_side = x[..., 1::2]
+            new_data = right_side.clone() << 4
+            new_data[..., : left_side.shape[-1]] += left_side
+            return new_data
+
+        original_shape = input.shape
+        input = input.view(-1, cls.block_size)
+        input_amax = input.abs().max(dim=-1, keepdim=True).values
+        descale = input_amax / cls.E2M1_max
+        min_value = torch.tensor(-127.0, device=descale.device)
+        e8m0_scale = torch.ceil(torch.maximum(torch.log2(descale), min_value))
+
+        input = (input / torch.exp2(e8m0_scale)).view(original_shape)
+        input_q = cast_fp4(input)
+        input_q = fuse_uint4_to_uint8(input_q)
+        e8m0_scale = (e8m0_scale + 127).to(torch.uint8)
+        return input_q, e8m0_scale
+
+    @classmethod
+    def dequantize(cls, quantized_data, dtype: torch.dtype, scale):
+        """Dequantize MXFP4 packed tensor back to float."""
+
+        def unfuse_uint8_to_uint4(x):
+            left_side = x & 0x0F
+            right_side = (x >> 4) & 0x0F
+            shape = list(x.shape)
+            shape[-1] = shape[-1] * 2
+            result = torch.zeros(shape, dtype=torch.uint8, device=x.device)
+            result[..., 0::2] = left_side
+            result[..., 1::2] = right_side
+            return result
+
+        e8m0_scale = scale
+        x_unfused = unfuse_uint8_to_uint4(quantized_data)
+        sign = 1 - 2 * ((x_unfused & 0b1000) >> 3).to(torch.float32)
+        magnitude = (x_unfused & 0b0111).to(torch.long)
+        values = torch.tensor(cls.E2M1_values, device=quantized_data.device)
+        original_shape = magnitude.shape
+        x_float = values[magnitude.reshape(-1)].reshape(original_shape)
+        x_float = sign.float() * x_float
+        x_float = x_float.reshape(-1, cls.block_size)
+        scale_factor = torch.exp2(e8m0_scale.float() - 127)
+        scale_factor = scale_factor.reshape(-1, 1)
+        x_float = x_float * scale_factor
+        return x_float.reshape(original_shape).to(dtype)
+
+
+def _swiglu(x: torch.Tensor, alpha: float, limit: float) -> torch.Tensor:
+    """SwigLU activation used in GPT-OSS.
+    Input is interleaved: [gate_0, up_0, gate_1, up_1, ...] in last dim.
+    """
+    gate = x[..., 0::2]
+    up = x[..., 1::2]
+    gate_clamped = torch.clamp(gate, max=limit)
+    up_clamped = torch.clamp(up, min=-limit, max=limit)
+    return gate_clamped * torch.sigmoid(alpha * gate_clamped) * (up_clamped + 1)
+
+
+def ref_mxfp4_fused_moe(
+    a: torch.Tensor,
+    w1_dq: torch.Tensor,
+    w2_dq: torch.Tensor,
+    topk_weight: torch.Tensor,
+    topk_ids: torch.Tensor,
+    topk: int,
+) -> torch.Tensor:
+    """Reference MXFP4 fused MoE with SiLU activation."""
+    B, D = a.shape
+    a_f = a.float()
+    out = torch.zeros(B * topk, w2_dq.shape[1], dtype=torch.float32)
+    topk_ids_flat = topk_ids.view(-1)
+
+    for i in range(w1_dq.shape[0]):
+        mask = topk_ids_flat == i
+        if mask.sum() == 0:
+            continue
+        token_indices = torch.where(mask)[0]
+        source_indices = token_indices // topk
+        ic0 = torch.matmul(a_f[source_indices], w1_dq[i].float().T)
+        ic1 = _silu_and_mul(ic0)
+        out[mask] = torch.matmul(ic1, w2_dq[i].float().T)
+
+    return (out.view(B, topk, -1) * topk_weight.unsqueeze(-1)).sum(dim=1).to(a.dtype)
+
+
+def ref_mxfp4_fused_moe_gptoss(
+    a: torch.Tensor,
+    w1_dq: torch.Tensor,
+    w2_dq: torch.Tensor,
+    w1_bias: torch.Tensor,
+    w2_bias: torch.Tensor,
+    topk_weight: torch.Tensor,
+    topk_ids: torch.Tensor,
+    alpha: float,
+    limit: float,
+) -> torch.Tensor:
+    """Reference MXFP4 fused MoE with SwigLU+bias (GPT-OSS style)."""
+    B, D = a.shape
+    topk = topk_ids.shape[1]
+    a_f = a.float()
+    E = w1_dq.shape[0]
+    out = torch.zeros(B * topk, w2_dq.shape[1], dtype=torch.float32)
+    topk_ids_flat = topk_ids.view(-1)
+
+    for i in range(E):
+        mask = topk_ids_flat == i
+        if mask.sum() == 0:
+            continue
+        token_indices = torch.where(mask)[0]
+        source_indices = token_indices // topk
+        ic0 = torch.matmul(a_f[source_indices], w1_dq[i].float().T)
+        ic0 = ic0 + w1_bias[i].float()
+        ic1 = _swiglu(ic0, alpha, limit)
+        ic2 = torch.matmul(ic1, w2_dq[i].float().T)
+        ic2 = ic2 + w2_bias[i].float()
+        out[mask] = ic2
+
+    return (out.view(B, topk, -1) * topk_weight.unsqueeze(-1)).sum(dim=1).to(a.dtype)
+
+
+def _prepack_mxfp4_experts(
+    w: torch.Tensor, w_scale: torch.Tensor
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """VNNI-prepack MXFP4 weights and repack scales."""
+    packed_w = torch.ops._C.convert_weight_packed(w)
+    packed_s = torch.ops._C.convert_scale_packed(w_scale)
+    return packed_w, packed_s
+
+
+MXFP4_NUM_TOKENS = [1, 2, 32, 121]
+MXFP4_MOE_CONFIGS = [
+    (128, 128, 4, 2),
+    (256, 256, 8, 4),
+    (352, 256, 8, 4),
+    (512, 320, 8, 4),
+]
+
+
+@pytest.mark.parametrize("M", MXFP4_NUM_TOKENS)
+@pytest.mark.parametrize("N,K,E,topk", MXFP4_MOE_CONFIGS)
+@pytest.mark.parametrize("seed", [0])
+def test_mxfp4_cpu_fused_moe(M, N, K, E, topk, seed):
+    """Test fused_experts_mxfp4_cpu against dequantized torch reference."""
+    set_random_seed(seed)
+    dtype = torch.bfloat16
+
+    a = torch.randn(M, K, dtype=dtype) / 10
+
+    # Generate and quantize weights
+    w1_bf16 = torch.randn(E, 2 * N, K, dtype=dtype) / 10
+    w1q, w1s = MXFP4QuantizeUtil.quantize(w1_bf16)
+    w1s = w1s.reshape(E, 2 * N, K // 32)
+    w1dq = MXFP4QuantizeUtil.dequantize(w1q, dtype, w1s)
+
+    w2_bf16 = torch.randn(E, K, N, dtype=dtype) / 10
+    w2q, w2s = MXFP4QuantizeUtil.quantize(w2_bf16)
+    w2s = w2s.reshape(E, K, N // 32)
+    w2dq = MXFP4QuantizeUtil.dequantize(w2q, dtype, w2s)
+
+    # Routing
+    score = torch.randn(M, E, dtype=dtype)
+    score = torch.softmax(score, dim=-1, dtype=torch.float32)
+    topk_weight, topk_ids = torch.topk(score, topk)
+    topk_ids = topk_ids.to(torch.int32)
+
+    # Reference
+    ref_out = ref_mxfp4_fused_moe(a, w1dq, w2dq, topk_weight, topk_ids, topk)
+
+    # Pack weights for kernel
+    pw1, pw1s = _prepack_mxfp4_experts(w1q, w1s)
+    pw2, pw2s = _prepack_mxfp4_experts(w2q, w2s)
+
+    # Kernel
+    out = ops.fused_experts_cpu(
+        a.clone(),
+        pw1,
+        pw2,
+        topk_weight,
+        topk_ids,
+        False,  # inplace
+        ops.CPUQuantMethod.MXFP4,
+        pw1s,  # w1_scale
+        pw2s,  # w2_scale
+        None,  # w1_zero
+        None,  # w2_zero
+        None,  # block_size
+    )
+
+    torch.testing.assert_close(ref_out.bfloat16(), out, atol=1e-2, rtol=1e-2)
+
+
+@pytest.mark.parametrize("M", [1, 32])
+@pytest.mark.parametrize("N,K,E,topk", [(128, 128, 4, 2), (64, 64, 4, 2)])
+@pytest.mark.parametrize("seed", [0])
+def test_mxfp4_cpu_fused_moe_bias_swiglu(M, N, K, E, topk, seed):
+    """Test fused_experts_mxfp4_cpu with bias and SwigLU activation (GPT-OSS)."""
+    set_random_seed(seed)
+    dtype = torch.bfloat16
+    alpha = 1.702
+    limit = 7.0
+
+    a = torch.randn(M, K, dtype=dtype) / 10
+
+    # Generate and quantize weights
+    w1_bf16 = torch.randn(E, 2 * N, K, dtype=dtype) / 10
+    w1q, w1s = MXFP4QuantizeUtil.quantize(w1_bf16)
+    w1s = w1s.reshape(E, 2 * N, K // 32)
+    w1dq = MXFP4QuantizeUtil.dequantize(w1q, dtype, w1s)
+    w1_b = torch.randn(E, 2 * N, dtype=torch.float32) / 10
+
+    w2_bf16 = torch.randn(E, K, N, dtype=dtype) / 10
+    w2q, w2s = MXFP4QuantizeUtil.quantize(w2_bf16)
+    w2s = w2s.reshape(E, K, N // 32)
+    w2dq = MXFP4QuantizeUtil.dequantize(w2q, dtype, w2s)
+    w2_b = torch.randn(E, K, dtype=torch.float32) / 10
+
+    # Routing
+    score = torch.randn(M, E, dtype=dtype)
+    score = torch.softmax(score, dim=-1, dtype=torch.float32)
+    topk_weight, topk_ids = torch.topk(score, topk)
+    topk_ids = topk_ids.to(torch.int32)
+
+    # Reference
+    ref_out = ref_mxfp4_fused_moe_gptoss(
+        a, w1dq, w2dq, w1_b, w2_b, topk_weight, topk_ids, alpha, limit
+    )
+
+    # Pack weights for kernel
+    pw1, pw1s = _prepack_mxfp4_experts(w1q, w1s)
+    pw2, pw2s = _prepack_mxfp4_experts(w2q, w2s)
+
+    # Kernel
+    out = ops.fused_experts_cpu(
+        a.clone(),
+        pw1,
+        pw2,
+        topk_weight,
+        topk_ids,
+        False,  # inplace
+        ops.CPUQuantMethod.MXFP4,
+        pw1s,  # w1_scale
+        pw2s,  # w2_scale
+        None,  # w1_zero
+        None,  # w2_zero
+        None,  # block_size
+        w1_bias=w1_b,
+        w2_bias=w2_b,
+        alpha=alpha,
+        limit=limit,
+    )
+
+    torch.testing.assert_close(ref_out.bfloat16(), out, atol=1e-2, rtol=1e-2)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))
diff --git a/tests/kernels/moe/test_cutedsl_moe.py b/tests/kernels/moe/test_cutedsl_moe.py
index bca3eba0f91c..405dc4f44818 100644
--- a/tests/kernels/moe/test_cutedsl_moe.py
+++ b/tests/kernels/moe/test_cutedsl_moe.py
@@ -17,7 +17,7 @@
 from torch.nn import functional as F
 
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutedsl_moe import (
+from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutedsl_batched_moe import (  # noqa: E501
     flashinfer_cutedsl_moe_masked,
 )
 from vllm.utils.flashinfer import (
@@ -142,7 +142,9 @@ def prepare_inputs(
     # Initialize the hidden_states_3d with ones instead of empty to avoid nan
     # issue.
     hidden_states_3d = torch.ones(
-        (num_experts, max(masked_m), hidden_states.shape[1]), dtype=hidden_states.dtype
+        (num_experts, max(masked_m), hidden_states.shape[1]),
+        dtype=hidden_states.dtype,
+        device=hidden_states.device,
     )
     for i in range(num_experts):
         hidden_states_3d[i, : masked_m[i], :] = hidden_states[topk_idx.view(-1) == i]
@@ -426,7 +428,7 @@ def test_flashinfer_cutedsl_moe_masked(
     w1_alpha = 1.0 / (input_global_scale * w1_global_scale)
     w2_alpha = 1.0 / (a2_global_scale * w2_global_scale)
 
-    out = torch.empty_like(hidden_states_3d)
+    out = torch.empty_like(hidden_states_3d, device=hidden_states.device)
     # Note: the 1st dim shouldn't be bs
     wk = torch.empty(
         num_experts,
@@ -451,11 +453,15 @@ def test_flashinfer_cutedsl_moe_masked(
     )
 
     # reference
-    a_fp4, a_scale_interleaved = fp4_quantize(hidden_states, input_global_scale)
+    # input_global_scale is per-expert ([num_experts]); fp4_quantize and
+    # dequantize_nvfp4_to_dtype are non-grouped APIs that expect [1] or
+    # [num_tokens]. Use a single element since all values are uniform here.
+    a_global = input_global_scale[:1].contiguous()
+    a_fp4, a_scale_interleaved = fp4_quantize(hidden_states, a_global)
     a_in_dtype = dequantize_nvfp4_to_dtype(
         a_fp4,
         a_scale_interleaved,
-        input_global_scale,
+        a_global,
         dtype=hidden_states.dtype,
         device=hidden_states.device,
         block_size=16,
diff --git a/tests/kernels/moe/test_cutlass_moe.py b/tests/kernels/moe/test_cutlass_moe.py
index e06672f41d0c..a613e7d2e290 100644
--- a/tests/kernels/moe/test_cutlass_moe.py
+++ b/tests/kernels/moe/test_cutlass_moe.py
@@ -21,7 +21,7 @@
     FusedMoEQuantConfig,
     fp8_w8a8_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
     CutlassExpertsFp8,
     run_cutlass_moe_fp8,
 )
diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py
index 9dd8b13d6963..6caa9d8c0687 100644
--- a/tests/kernels/moe/test_deepep_deepgemm_moe.py
+++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py
@@ -29,6 +29,7 @@
     is_deep_gemm_supported,
 )
 from vllm.utils.import_utils import has_deep_ep, has_deep_gemm
+from vllm.utils.math_utils import next_power_of_2
 from vllm.utils.torch_utils import set_random_seed
 from vllm.v1.worker.workspace import init_workspace_manager
 
@@ -47,10 +48,12 @@
     from .parallel_utils import DeepEPHTArgs, DeepEPLLArgs, make_deepep_a2a
 
 if has_deep_gemm():
-    from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
         BatchedDeepGemmExperts,
     )
-    from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
+    from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
+        DeepGemmExperts,
+    )
 
 requires_deep_ep = pytest.mark.skipif(
     not has_deep_ep(),
@@ -82,14 +85,6 @@ def with_dp_metadata(M: int, world_size: int):
         yield
 
 
-def next_power_of_2(x):
-    import math
-
-    if x == 0:
-        return 1
-    return 2 ** math.ceil(math.log2(x))
-
-
 def make_block_quant_fp8_weights(
     e: int,
     n: int,
diff --git a/tests/kernels/moe/test_deepep_moe.py b/tests/kernels/moe/test_deepep_moe.py
index d04c3c99ce77..fd6dda384c11 100644
--- a/tests/kernels/moe/test_deepep_moe.py
+++ b/tests/kernels/moe/test_deepep_moe.py
@@ -19,7 +19,9 @@
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.fused_batched_moe import BatchedTritonExperts
+from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
+    BatchedTritonExperts,
+)
 from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEKernel
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     per_token_group_quant_fp8,
diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py
index c2949391c798..9095bbc2c76c 100644
--- a/tests/kernels/moe/test_deepgemm.py
+++ b/tests/kernels/moe/test_deepgemm.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-Unit-test DeepGEMM FP8 kernels (no DeepEP).
+Unit-test DeepGEMM FP8 and FP4 kernels (no DeepEP).
 Compare DeepGEMM path against the Triton fallback inside vLLM's fused_experts.
 """
 
@@ -21,12 +21,14 @@
     maybe_make_prepare_finalize,
 )
 from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEQuantConfig,
+    FusedMoEQuantDesc,
     fp8_w8a8_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
-from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
     TritonOrDeepGemmExperts,
 )
+from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     per_token_group_quant_fp8,
 )
@@ -175,7 +177,7 @@ def test_deepgemm_vs_triton(m, n, k, topk, num_experts, monkeypatch, workspace_i
         mp.setenv("VLLM_USE_DEEP_GEMM", "1")
 
         _DeepGemmExperts = importlib.import_module(
-            "vllm.model_executor.layers.fused_moe.deep_gemm_moe"
+            "vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe"
         ).DeepGemmExperts
 
         call_counter = {"cnt": 0}
@@ -204,3 +206,195 @@ def _spy_apply(*args, **kwargs):
             f"DeepGEMM path was not executed during the test. "
             f"Call counter: {call_counter['cnt']}"
         )
+
+
+# ---------------------------------------------------------------------------
+# FP4 weight tests (DeepGEMM m_grouped_fp8_fp4_gemm_nt_contiguous)
+# ---------------------------------------------------------------------------
+
+
+def make_mxfp4_weights(
+    e: int,
+    n: int,
+    k: int,
+):
+    """
+    Generate (w1, w2) expert weights in MXFP4 packed format with float32 scales,
+    plus BF16 reference weights for validation.
+
+      w1 shape: (E, 2N, K//2) uint8    — packed FP4
+      w2 shape: (E, K, N//2)  uint8    — packed FP4
+      w1_s shape: (E, 2N, K//32) float32  — per-row block-32 scales
+      w2_s shape: (E, K, N//32)  float32  — per-row block-32 scales
+      w1_bf16: (E, 2N, K)   — original BF16 for reference
+      w2_bf16: (E, K, N)    — original BF16 for reference
+    """
+    from deep_gemm.utils.math import per_token_cast_to_fp4
+
+    dtype = torch.bfloat16
+    gran_k = 32  # MXFP4 block size
+
+    # bf16 reference weights — scale by 1/sqrt(dim) for numerical stability
+    w1_bf16 = torch.randn(e, 2 * n, k, device="cuda", dtype=dtype) * (k**-0.5)
+    w2_bf16 = torch.randn(e, k, n, device="cuda", dtype=dtype) * (n**-0.5)
+
+    # Quantize per-expert to FP4
+    w1 = torch.empty(e, 2 * n, k // 2, device="cuda", dtype=torch.uint8)
+    w2 = torch.empty(e, k, n // 2, device="cuda", dtype=torch.uint8)
+    w1_s = torch.empty(
+        e, 2 * n, math.ceil(k / gran_k), device="cuda", dtype=torch.float32
+    )
+    w2_s = torch.empty(e, k, math.ceil(n / gran_k), device="cuda", dtype=torch.float32)
+
+    for i in range(e):
+        w1[i], w1_s[i] = per_token_cast_to_fp4(
+            w1_bf16[i].float(), use_ue8m0=True, gran_k=gran_k
+        )
+        w2[i], w2_s[i] = per_token_cast_to_fp4(
+            w2_bf16[i].float(), use_ue8m0=True, gran_k=gran_k
+        )
+
+    return w1, w2, w1_s, w2_s, w1_bf16, w2_bf16
+
+
+def _bf16_moe_reference(x, w1, w2, topk_weights, topk_ids):
+    """BF16 token-loop MoE reference for correctness testing."""
+    import torch.nn.functional as F
+
+    num_tokens, hidden_size = x.shape
+    intermediate = w1.shape[1] // 2
+    top_k = topk_ids.shape[1]
+
+    output = torch.zeros(num_tokens, hidden_size, dtype=torch.float32, device=x.device)
+    for t in range(num_tokens):
+        for kk in range(top_k):
+            e = topk_ids[t, kk].item()
+            w = topk_weights[t, kk].item()
+            fc1 = x[t : t + 1].float() @ w1[e].float().T
+            linear = fc1[:, :intermediate]
+            gate = fc1[:, intermediate:]
+            act = F.silu(gate) * linear
+            fc2 = act @ w2[e].float().T
+            output[t] += w * fc2[0]
+    return output.to(torch.bfloat16)
+
+
+def run_single_fp4_case(m, n, k, topk, num_experts):
+    """
+    Run one (M,N,K) configuration with FP4 weights on DeepGEMM and assert
+    DeepGEMM FP4 == BF16 reference within tolerance.
+    """
+    tokens_bf16 = torch.randn(m, k, device="cuda", dtype=torch.bfloat16) * (k**-0.5)
+
+    # FP4 expert weight tensors + BF16 originals for reference
+    w1, w2, w1_s, w2_s, w1_bf16, w2_bf16 = make_mxfp4_weights(num_experts, n, k)
+
+    router_logits = torch.randn(m, num_experts, device="cuda", dtype=torch.float32)
+    topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1)
+    topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
+
+    from vllm.model_executor.layers.quantization.utils.quant_utils import (
+        GroupShape,
+    )
+    from vllm.platforms import current_platform
+
+    _fp8_dtype = current_platform.fp8_dtype()
+    _block_shape = GroupShape(128, 128)
+    quant_config = FusedMoEQuantConfig(
+        _a1=FusedMoEQuantDesc(_fp8_dtype, _block_shape, None, None, None, None),
+        _a2=FusedMoEQuantDesc(_fp8_dtype, _block_shape, None, None, None, None),
+        _w1=FusedMoEQuantDesc("mxfp4", None, w1_s, None, None, None),
+        _w2=FusedMoEQuantDesc("mxfp4", None, w2_s, None, None, None),
+    )
+    moe_config = make_dummy_moe_config()
+
+    from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
+        DeepGemmFP4Experts,
+    )
+
+    deep_gemm_fp4_experts = mk.FusedMoEKernel(
+        prepare_finalize=maybe_make_prepare_finalize(
+            moe=moe_config,
+            quant_config=quant_config,
+            allow_new_interface=True,
+            use_monolithic=False,
+        ),
+        fused_experts=DeepGemmFP4Experts(
+            moe_config=moe_config,
+            quant_config=quant_config,
+        ),
+        inplace=False,
+    )
+
+    # DeepGEMM FP4 path
+    out_deepgemm_fp4 = deep_gemm_fp4_experts.apply(
+        hidden_states=tokens_bf16,
+        w1=w1,
+        w2=w2,
+        topk_weights=topk_weights,
+        topk_ids=topk_ids,
+        global_num_experts=num_experts,
+        activation=MoEActivation.SILU,
+        apply_router_weight_on_input=False,
+        expert_map=None,
+    )
+
+    # BF16 reference using the same original weights
+    out_ref = _bf16_moe_reference(tokens_bf16, w1_bf16, w2_bf16, topk_weights, topk_ids)
+
+    # FP4 vs BF16 reference: quantization error from FP4 weights + FP8 activations
+    diff = calc_diff(out_deepgemm_fp4, out_ref)
+    assert diff < 0.05, f"FP4 diff exceeded 5%: {diff}"
+
+
+# DeepSeek V4 dims: H=4096, I=2048, so N=2*I=4096, K=H=4096.
+# FP4 quantization with block_k=32 needs large K for good accuracy.
+FP4_MNKs = [
+    (128, 4096, 4096),  # DeepSeek V4 shape
+    (256, 2048, 2048),  # Half-size variant
+]
+
+FP4_TOPKS = [2]
+FP4_NUM_EXPERTS = [8]
+
+
+@pytest.mark.parametrize(("m", "n", "k"), FP4_MNKs)
+@pytest.mark.parametrize("topk", FP4_TOPKS)
+@pytest.mark.parametrize("num_experts", FP4_NUM_EXPERTS)
+@pytest.mark.skipif(not is_deep_gemm_supported(), reason="Requires deep_gemm kernels")
+def test_deepgemm_fp4_vs_triton(
+    m, n, k, topk, num_experts, monkeypatch, workspace_init
+):
+    pytest.importorskip("deep_gemm.utils.math")
+    with monkeypatch.context() as mp:
+        mp.setenv("VLLM_USE_DEEP_GEMM", "1")
+
+        _DeepGemmFP4Experts = importlib.import_module(
+            "vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe"
+        ).DeepGemmFP4Experts
+
+        call_counter = {"cnt": 0}
+
+        orig_fn = _DeepGemmFP4Experts.apply
+
+        def _spy_apply(*args, **kwargs):
+            call_counter["cnt"] += 1
+            return orig_fn(*args, **kwargs)
+
+        monkeypatch.setattr(_DeepGemmFP4Experts, "apply", _spy_apply)
+        if topk > num_experts:
+            pytest.skip(f"topk={topk} > num_experts={num_experts}")
+
+        run_single_fp4_case(
+            m=m,
+            n=n,
+            k=k,
+            topk=topk,
+            num_experts=num_experts,
+        )
+
+        # ensure that the DeepGEMM FP4 path was indeed taken.
+        assert call_counter["cnt"] == 1, (
+            f"DeepGEMM FP4 path was not executed during the test. "
+            f"Call counter: {call_counter['cnt']}"
+        )
diff --git a/tests/kernels/moe/test_flashinfer.py b/tests/kernels/moe/test_flashinfer.py
index db499b68843f..9e35be0db801 100644
--- a/tests/kernels/moe/test_flashinfer.py
+++ b/tests/kernels/moe/test_flashinfer.py
@@ -18,12 +18,12 @@
     RoutingMethodType,
     fp8_w8a8_moe_quant_config,
 )
+from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (
+    FlashInferExperts,
+)
 from vllm.model_executor.layers.fused_moe.experts.trtllm_fp8_moe import (
     TrtLlmFp8ExpertsMonolithic,
 )
-from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
-    FlashInferExperts,
-)
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
     rotate_weights_for_fi_trtllm_fp8_per_tensor_moe,
@@ -32,6 +32,7 @@
 from vllm.model_executor.layers.quantization.utils.fp8_utils import input_to_float8
 from vllm.model_executor.models.llama4 import Llama4MoE
 from vllm.platforms import current_platform
+from vllm.utils.math_utils import next_power_of_2
 from vllm.utils.torch_utils import set_random_seed
 
 try:
@@ -174,6 +175,7 @@ def make_moe_tensors_8bit(
             routing_method=layer.routing_method_type,
             activation=activation,
             device=w13_quantized.device,
+            max_num_tokens=next_power_of_2(m),
         )
 
         return TestData(
@@ -348,6 +350,7 @@ def get_fused_moe_quant_config(n: torch.nn.Module) -> FusedMoEQuantConfig:
             in_dtype=torch.bfloat16,
             is_act_and_mul=activation.is_gated,
             routing_method=RoutingMethodType.TopK,
+            max_num_tokens=next_power_of_2(m),
         )
 
         kernel = mk.FusedMoEKernel(
diff --git a/tests/kernels/moe/test_flashinfer_b12x_moe.py b/tests/kernels/moe/test_flashinfer_b12x_moe.py
new file mode 100644
index 000000000000..ec0a9594fe12
--- /dev/null
+++ b/tests/kernels/moe/test_flashinfer_b12x_moe.py
@@ -0,0 +1,229 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+
+if not current_platform.is_device_capability_family(120):
+    pytest.skip(
+        reason="FlashInfer CuteDSL SM12x MoE requires SM120 "
+        "(RTX Pro 6000 / DGX Spark).",
+        allow_module_level=True,
+    )
+
+from vllm.utils.flashinfer import has_flashinfer_b12x_moe
+
+if not has_flashinfer_b12x_moe():
+    pytest.skip(
+        reason=(
+            "FlashInfer cute_dsl_fused_moe_nvfp4 / convert_sf_to_mma_layout "
+            "not available in installed FlashInfer (needs PRs #3051 and #3066)."
+        ),
+        allow_module_level=True,
+    )
+
+# Import fp4_quantize after the skip guard — FlashInfer must be installed.
+from flashinfer.fp4_quantization import fp4_quantize
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from tests.kernels.moe.utils import make_dummy_moe_config
+from tests.kernels.utils import torch_moe
+from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
+from vllm.model_executor.layers.fused_moe import fused_topk
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
+from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config
+from vllm.model_executor.layers.fused_moe.experts.flashinfer_b12x_moe import (
+    FlashInferB12xExperts,
+)
+from vllm.utils.flashinfer import flashinfer_convert_sf_to_mma_layout
+from vllm.utils.torch_utils import set_random_seed
+
+# Dimensions chosen to satisfy FP4 alignment requirements (k multiple of 256,
+# n multiple of 128) while keeping tests fast.
+MNK_FACTORS = [
+    (2, 128, 256),
+    (2, 256, 512),
+    (16, 128, 256),
+    (64, 256, 512),
+]
+
+
+def _reorder_gate_up_to_up_gate(
+    w: torch.Tensor,
+    w_s: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Swap gate and up-projection halves along dim=1 to [up, gate] order.
+
+    The SM12x kernel expects weights in [up (w3), gate (w1)] order while the
+    BF16 reference uses [gate (w1), up (w3)].  This replicates the reordering
+    done at model-load time by ``prepare_nvfp4_moe_layer_for_fi_or_cutlass``.
+    """
+    n = w.shape[1] // 2
+    return (
+        torch.cat([w[:, n:, :], w[:, :n, :]], dim=1),
+        torch.cat([w_s[:, n:, :], w_s[:, :n, :]], dim=1),
+    )
+
+
+@pytest.mark.parametrize("m,n,k", MNK_FACTORS)
+@pytest.mark.parametrize("e", [8, 16])
+@pytest.mark.parametrize("topk", [1, 2, 4])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@torch.inference_mode()
+def test_flashinfer_b12x_moe(
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    topk: int,
+    dtype: torch.dtype,
+    workspace_init,
+):
+    """Test FlashInferB12xExperts against a BF16 torch reference.
+
+    The SM12x kernel takes BF16 hidden states directly and fuses token
+    dispatch, W1 GEMM, SwiGLU, and W2 GEMM into one call.  We verify
+    correctness against ``torch_moe`` using generous tolerances to account
+    for the internal FP4 quantization of activations and weights.
+
+    Scale convention
+    ----------------
+    The SM12x kernel uses ``w1_alpha`` as *both* the activation-quantisation
+    global scale and the weight dequantisation factor.  These two roles are
+    conflated into a single parameter in ``launch_sm120_moe``, so they must
+    equal the same value.  We use ``global_scale = 1.0`` for
+    ``fp4_quantize`` so that ``w1_alpha = ones`` satisfies both roles
+    simultaneously.  The alternative — vLLM's convention of baking a large
+    ``w_gs`` into block-scale values and compensating with
+    ``g1_alphas = 1/w_gs`` — is incompatible with this kernel.
+    """
+    set_random_seed(7)
+    with set_current_vllm_config(
+        VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
+    ):
+        a = torch.randn((m, k), device="cuda", dtype=dtype) / 10
+
+        # Generate BF16 reference weights in [gate, up] order.
+        # Shape: w1=(e, 2n, k), w2=(e, k, n).
+        w1_bf16 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 15
+        w2_bf16 = torch.randn((e, k, n), device="cuda", dtype=dtype) / 15
+
+        # ------------------------------------------------------------------ #
+        # Quantise weights for the SM12x kernel using FlashInfer's convention:
+        #   global_scale = 1.0   →   block_scale = max_abs_block / fp4_max
+        #   w1_alpha = 1.0       (no extra global factor to compensate)
+        #
+        # The scale factors returned by fp4_quantize(..., is_sf_swizzled_layout=True)
+        # are already in the swizzled 2D layout expected by convert_sf_to_mma_layout.
+        # No additional swizzle_blockscale() call is needed.
+        # ------------------------------------------------------------------ #
+        gs = torch.ones(1, device="cuda", dtype=torch.float32)
+        sf_vec_size = 16
+
+        # W1: reorder BF16 from [gate, up] → [up, gate], then quantise.
+        w1_reordered = torch.cat(
+            [w1_bf16[:, n:, :], w1_bf16[:, :n, :]], dim=1
+        )  # shape (e, 2n, k), [up, gate]
+        w1_flat = w1_reordered.reshape(e * 2 * n, k)
+        w1_q_flat, w1_sf_flat = fp4_quantize(
+            w1_flat,
+            global_scale=gs,
+            sf_vec_size=sf_vec_size,
+            is_sf_swizzled_layout=True,
+        )
+        w1_q = w1_q_flat.view(e, 2 * n, k // 2)  # uint8, packed FP4
+        w1_blockscale = w1_sf_flat.view(e, 2 * n, w1_sf_flat.shape[1])  # float8
+
+        # W2: no row reordering needed for the down-projection.
+        w2_flat = w2_bf16.reshape(e * k, n)
+        w2_q_flat, w2_sf_flat = fp4_quantize(
+            w2_flat,
+            global_scale=gs,
+            sf_vec_size=sf_vec_size,
+            is_sf_swizzled_layout=True,
+        )
+        w2_q = w2_q_flat.view(e, k, n // 2)  # uint8, packed FP4
+        w2_blockscale = w2_sf_flat.view(e, k, w2_sf_flat.shape[1])  # float8
+
+        # All per-expert alphas are 1.0 (global_scale = 1.0, no compensation).
+        ones_e = torch.ones(e, device="cuda", dtype=torch.float32)
+
+        quant_config = nvfp4_moe_quant_config(
+            g1_alphas=ones_e,
+            g2_alphas=ones_e,
+            a1_gscale=ones_e,
+            a2_gscale=ones_e,
+            w1_scale=w1_blockscale,
+            w2_scale=w2_blockscale,
+        )
+
+        moe_config = make_dummy_moe_config(
+            num_experts=e,
+            experts_per_token=topk,
+            hidden_dim=k,
+            intermediate_size_per_partition=n,
+            in_dtype=dtype,
+        )
+
+        experts = FlashInferB12xExperts(
+            moe_config=moe_config,
+            quant_config=quant_config,
+        )
+        # In production, process_weights_after_loading computes these after
+        # normalizing block scales. In the test the scales are already in final
+        # form (global_scale=1.0), so we compute the MMA layouts directly.
+        num_experts_w1, m1, k1_sf = w1_blockscale.shape
+        experts.w1_sf_mma = flashinfer_convert_sf_to_mma_layout(
+            w1_blockscale.reshape(num_experts_w1 * m1, k1_sf),
+            m=m1,
+            k=k1_sf * 16,
+            num_groups=num_experts_w1,
+        )
+        num_experts_w2, m2, k2_sf = w2_blockscale.shape
+        experts.w2_sf_mma = flashinfer_convert_sf_to_mma_layout(
+            w2_blockscale.reshape(num_experts_w2 * m2, k2_sf),
+            m=m2,
+            k=k2_sf * 16,
+            num_groups=num_experts_w2,
+        )
+
+        kernel = mk.FusedMoEKernel(
+            maybe_make_prepare_finalize(
+                moe=moe_config,
+                quant_config=quant_config,
+                allow_new_interface=True,
+                use_monolithic=False,
+            ),
+            experts,
+            inplace=False,
+        )
+
+        score = torch.randn((m, e), device="cuda", dtype=dtype)
+        topk_weights, topk_ids, _ = fused_topk(a, score, topk, renormalize=False)
+
+        sm12x_output = kernel.apply(
+            hidden_states=a,
+            w1=w1_q,
+            w2=w2_q,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            global_num_experts=e,
+            activation=MoEActivation.SILU,
+            apply_router_weight_on_input=False,
+            expert_map=None,
+        )
+
+        # Reference: BF16 torch MoE using original [gate, up] BF16 weights.
+        # torch_moe's SiluAndMul expects [gate, up] order, matching w1_bf16.
+        torch_output = torch_moe(a, w1_bf16, w2_bf16, score, topk)
+
+        torch.testing.assert_close(sm12x_output, torch_output, atol=2e-1, rtol=2e-1)
+
+
+if __name__ == "__main__":
+    test_flashinfer_b12x_moe(16, 128, 256, 8, 2, torch.bfloat16)
diff --git a/tests/kernels/moe/test_flashinfer_moe.py b/tests/kernels/moe/test_flashinfer_moe.py
index a3fb474f1517..6ae67fa0e987 100644
--- a/tests/kernels/moe/test_flashinfer_moe.py
+++ b/tests/kernels/moe/test_flashinfer_moe.py
@@ -22,13 +22,14 @@
     FusedMoEParallelConfig,
     RoutingMethodType,
 )
-from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (
     FlashInferExperts,
     is_valid_flashinfer_cutlass_fused_moe,
 )
 from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEKernel
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
+from vllm.utils.math_utils import next_power_of_2
 from vllm.utils.torch_utils import set_random_seed
 
 if not has_flashinfer_cutlass_fused_moe() or not current_platform.has_device_capability(
@@ -105,6 +106,7 @@ def test_flashinfer_fp4_moe_no_graph(
             in_dtype=dtype,
             is_act_and_mul=is_gated_act,
             routing_method=RoutingMethodType.TopK,
+            max_num_tokens=next_power_of_2(m),
         )
 
         flashinfer_experts = FusedMoEKernel(
diff --git a/tests/kernels/moe/test_fused_topk.py b/tests/kernels/moe/test_fused_topk.py
index 5384d8964b58..825cd20263d7 100644
--- a/tests/kernels/moe/test_fused_topk.py
+++ b/tests/kernels/moe/test_fused_topk.py
@@ -135,3 +135,139 @@ def test_fused_topk_bias(
         topk_weights_ref.to(torch.float32), topk_weights, atol=1e-2, rtol=1e-2
     )
     torch.testing.assert_close(topk_ids_ref.to(torch.int32), topk_ids, atol=0, rtol=0)
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="This test is skipped on non-CUDA platform."
+)
+@pytest.mark.parametrize("num_experts", [6, 8, 16])
+@pytest.mark.parametrize("topk", [3, 4])
+@pytest.mark.parametrize("scoring_func", ["softmax", "sigmoid"])
+@pytest.mark.parametrize("bad_value", [float("nan"), float("inf")])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.half, torch.float32])
+def test_fused_topk_nan_inf_clamp(
+    num_experts: int,
+    topk: int,
+    scoring_func: str,
+    bad_value: float,
+    dtype: torch.dtype,
+):
+    """Regression test for the NaN/Inf clamp in topk_softmax_kernels.cu.
+
+    Degenerate hidden states (e.g., from CUDA graph padding) can produce
+    NaN/Inf gating logits. Without the clamp, softmax/sigmoid outputs are
+    NaN and the argmax loop picks expert 0 for every top-k slot (since
+    "NaN > NaN" is false per IEEE 754), yielding duplicate expert IDs that
+    crash downstream MoE sort kernels. The fix clamps NaN/Inf to 0 before
+    argmax so index tie-breaking selects unique experts [0, 1, ..., k-1].
+    """
+    torch.manual_seed(0)
+    num_tokens = 4
+    hidden_size = 1024
+    hidden_states = torch.randn((num_tokens, hidden_size), dtype=dtype, device="cuda")
+
+    # Row 0: all normal. Rows 1-3: fully poisoned with NaN or Inf.
+    gating_output = torch.randn((num_tokens, num_experts), dtype=dtype, device="cuda")
+    gating_output[1:, :] = bad_value
+
+    topk_weights, topk_ids, _ = fused_topk(
+        hidden_states=hidden_states,
+        gating_output=gating_output,
+        topk=topk,
+        renormalize=False,
+        scoring_func=scoring_func,
+    )
+
+    # Normal row must still match the torch reference.
+    ref_weights, ref_ids = torch_topk(
+        gating_output=gating_output[:1],
+        topk=topk,
+        renormalize=False,
+        scoring_func=scoring_func,
+    )
+    torch.testing.assert_close(
+        ref_weights.to(torch.float32), topk_weights[:1], atol=1e-2, rtol=1e-2
+    )
+    torch.testing.assert_close(ref_ids.to(torch.int32), topk_ids[:1], atol=0, rtol=0)
+
+    # Poisoned rows: IDs must be unique (no duplicates) and weights must be
+    # finite (no NaN/Inf propagation into downstream MoE kernels).
+    for row in range(1, num_tokens):
+        row_ids = topk_ids[row]
+        assert row_ids.unique().numel() == topk, (
+            f"Row {row} has duplicate expert IDs {row_ids.tolist()} "
+            f"(bad_value={bad_value}, scoring_func={scoring_func})"
+        )
+        assert torch.isfinite(topk_weights[row]).all(), (
+            f"Row {row} has non-finite weights {topk_weights[row].tolist()} "
+            f"(bad_value={bad_value}, scoring_func={scoring_func})"
+        )
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="This test is skipped on non-CUDA platform."
+)
+@pytest.mark.parametrize("num_experts", [6, 8, 16])
+@pytest.mark.parametrize("topk", [3, 4])
+@pytest.mark.parametrize("scoring_func", ["softmax", "sigmoid"])
+@pytest.mark.parametrize("bad_value", [float("nan"), float("inf")])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.half, torch.float32])
+def test_fused_topk_bias_nan_inf_clamp(
+    num_experts: int,
+    topk: int,
+    scoring_func: str,
+    bad_value: float,
+    dtype: torch.dtype,
+):
+    """Regression test: NaN/Inf in gating logits must not produce duplicate
+    expert IDs or non-finite weights when e_score_correction_bias is present.
+
+    Same scenario as test_fused_topk_nan_inf_clamp but exercising the bias
+    path (fused_topk_bias) so the fix in topk_softmax_kernels.cu is covered
+    for that entry point as well.
+    """
+    torch.manual_seed(0)
+    num_tokens = 4
+    hidden_size = 1024
+    hidden_states = torch.randn((num_tokens, hidden_size), dtype=dtype, device="cuda")
+    e_score_correction_bias = torch.randn(
+        (num_experts,), dtype=torch.float32, device="cuda"
+    )
+
+    gating_output = torch.randn((num_tokens, num_experts), dtype=dtype, device="cuda")
+    gating_output[1:, :] = bad_value
+
+    topk_weights, topk_ids = fused_topk_bias(
+        hidden_states=hidden_states,
+        gating_output=gating_output,
+        e_score_correction_bias=e_score_correction_bias,
+        topk=topk,
+        renormalize=False,
+        scoring_func=scoring_func,
+    )
+
+    # Normal row must still match the torch reference.
+    ref_weights, ref_ids = torch_topk(
+        gating_output=gating_output[:1],
+        topk=topk,
+        renormalize=False,
+        e_score_correction_bias=e_score_correction_bias,
+        scoring_func=scoring_func,
+    )
+    torch.testing.assert_close(
+        ref_weights.to(torch.float32), topk_weights[:1], atol=1e-2, rtol=1e-2
+    )
+    torch.testing.assert_close(ref_ids.to(torch.int32), topk_ids[:1], atol=0, rtol=0)
+
+    # Poisoned rows: IDs must be unique (no duplicates) and weights must be
+    # finite (no NaN/Inf propagation into downstream MoE kernels).
+    for row in range(1, num_tokens):
+        row_ids = topk_ids[row]
+        assert row_ids.unique().numel() == topk, (
+            f"Row {row} has duplicate expert IDs {row_ids.tolist()} "
+            f"(bad_value={bad_value}, scoring_func={scoring_func})"
+        )
+        assert torch.isfinite(topk_weights[row]).all(), (
+            f"Row {row} has non-finite weights {topk_weights[row].tolist()} "
+            f"(bad_value={bad_value}, scoring_func={scoring_func})"
+        )
diff --git a/tests/kernels/moe/test_gemma4router.py b/tests/kernels/moe/test_gemma4router.py
new file mode 100644
index 000000000000..ba69d6927495
--- /dev/null
+++ b/tests/kernels/moe/test_gemma4router.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+from vllm.model_executor.models.gemma4 import (
+    gemma4_fused_routing_kernel_triton,
+    gemma4_routing_function_torch,
+)
+
+
+def sort_by_id(w, ids):
+    order = ids.argsort(dim=-1)
+    return w.gather(1, order), ids.gather(1, order)
+
+
+# Gemma4 MoE Model has context length of 250K
+# the minus 1 is to ensure that edge cases are tested
+@pytest.mark.parametrize("num_tokens", [1, 2, 2048, 250000])
+@pytest.mark.parametrize("num_experts", [128])  # gemma4 moe experts
+@pytest.mark.parametrize("topk", [8])  # gemma4 topk
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.half, torch.float32])
+def test_gemma4_routing_kernel_triton(
+    num_tokens: int,
+    num_experts: int,
+    topk: int,
+    dtype: torch.dtype,
+):
+    torch.manual_seed(0)
+
+    gating = torch.randn(num_tokens, num_experts, dtype=dtype, device="cuda")
+    scales = torch.rand(num_experts, dtype=torch.float32, device="cuda")
+
+    ref_w, ref_ids = gemma4_routing_function_torch(gating, topk, scales)
+    tri_w, tri_ids = gemma4_fused_routing_kernel_triton(gating, topk, scales)
+
+    # Sort by expert id — to remove tie-breaking differences
+    ref_ws, ref_is = sort_by_id(ref_w, ref_ids)
+    tri_ws, tri_is = sort_by_id(tri_w, tri_ids)
+
+    ids_match = (ref_is == tri_is).all().item()
+    weights_match = torch.allclose(ref_ws, tri_ws, atol=1e-2, rtol=1e-2)
+    all_match = ids_match and weights_match
+    max_err = (ref_ws - tri_ws).abs().max().item()
+    print(
+        f"T={num_tokens:5d} E={num_experts:4d} K={topk} "
+        f"{str(dtype).split('.')[-1]:7s} ids={ids_match} max_Δweight={max_err:.2e}"
+    )
+    if not all_match:
+        bad = (ref_is != tri_is).any(dim=-1).nonzero(as_tuple=True)[0]
+        if len(bad):
+            r = bad[0].item()
+            print(
+                f"  first bad row {r}: ref_ids={ref_ids[r].tolist()} "
+                f"tri_ids={tri_ids[r].tolist()}"
+            )
+        assert all_match
diff --git a/tests/kernels/moe/test_gpt_oss_triton_kernels.py b/tests/kernels/moe/test_gpt_oss_triton_kernels.py
index 172938f18e4c..c61004acaa35 100644
--- a/tests/kernels/moe/test_gpt_oss_triton_kernels.py
+++ b/tests/kernels/moe/test_gpt_oss_triton_kernels.py
@@ -23,16 +23,12 @@
 from triton_kernels.tensor import FP4, convert_layout, wrap_torch_tensor
 from triton_kernels.tensor_details import layout
 from triton_kernels.testing import assert_close
-from triton_kernels.topk import topk as topk_fn
 
 from vllm.model_executor.layers.fused_moe.config import mxfp4_w4a16_moe_quant_config
-from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
-    legacy_routing,
-    make_routing_data,
+from vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe import (
     triton_kernel_moe_forward,
 )
 from vllm.utils.math_utils import round_up
-from vllm.utils.torch_utils import set_random_seed
 
 from .utils import shuffle_weight
 
@@ -97,10 +93,18 @@ def init_compute_data(M, K, N, E, a_dtype: str, w_dtype: str, num_warps: int):
     if w_dtype != "mx4":
         pytest.skip("NYI")
     else:  # quantize to mx4
-        # careful on the padding here, the activation padding need to be
-        # multiple of 64, the actual engine is not implemented
-        w1_bottom_pad = round_up(w1_tri.shape[1], 64) - w1_tri.shape[1]
-        w1_right_pad = round_up(w1_tri.shape[2], 128) - w1_tri.shape[2]
+        # Padding alignment depends on the platform.  On CDNA4 the scale
+        # swizzle requires SCALE_K % 8 == 0 (K % 256) and
+        # SCALE_N % 32 == 0 (2*N % 512), matching the production
+        # alignment in mxfp4_round_up_hidden_size_and_intermediate_size.
+        # On CUDA (Hopper) the scale layout pads internally, so the
+        # original 64/128 alignment is sufficient.
+        if current_platform.is_rocm():
+            k_align, n2_align = 256, 512
+        else:
+            k_align, n2_align = 64, 128
+        w1_bottom_pad = round_up(w1_tri.shape[1], k_align) - w1_tri.shape[1]
+        w1_right_pad = round_up(w1_tri.shape[2], n2_align) - w1_tri.shape[2]
 
         w2_bottom_pad = w1_right_pad // 2
         w2_right_pad = w1_bottom_pad
@@ -367,52 +371,3 @@ def test_unit_shuffle():
     )
 
     assert_close(ref=out_ref, tri=out)
-
-
-@pytest.mark.parametrize("num_tokens", [2, 8, 64])
-@pytest.mark.parametrize("num_experts", [32, 128])
-@pytest.mark.parametrize("topk", [1, 4])
-@pytest.mark.parametrize("renormalize", [True, False])
-@pytest.mark.parametrize("dtype", [torch.bfloat16])
-def test_legacy_routing(
-    num_tokens: int, num_experts: int, topk: int, renormalize: bool, dtype: torch.dtype
-):
-    set_random_seed(0)
-    gating_output = torch.randn(num_tokens, num_experts, device="cuda", dtype=dtype)
-
-    sm_first = not renormalize
-    logits = gating_output
-    if sm_first:
-        logits = torch.softmax(logits, dim=-1)
-    topk_result = topk_fn(logits, topk, apply_softmax=not sm_first)
-    # topk_fn returns SparseMatrix on NVIDIA, plain tuple on ROCm.
-    if isinstance(topk_result, tuple):
-        topk_weights, topk_ids_raw, bitmatrix = topk_result
-        from triton_kernels.routing import routing_from_bitmatrix
-
-        routing_data_ref, gather_indx_ref, scatter_indx_ref = routing_from_bitmatrix(
-            bitmatrix, topk_weights, topk_ids_raw, num_experts, topk
-        )
-    else:
-        topk_ids = topk_result.indx.to(torch.long)
-        topk_weights = topk_result.vals
-        routing_data_ref, gather_indx_ref, scatter_indx_ref = make_routing_data(
-            topk_ids, topk_weights, num_experts
-        )
-
-    routing_data, gather_indx, scatter_indx = legacy_routing(
-        gating_output, topk, sm_first=sm_first
-    )
-
-    assert_close(
-        ref=gather_indx_ref.src_indx, tri=gather_indx.src_indx, maxtol=0, rmstol=0
-    )
-    assert_close(
-        ref=gather_indx_ref.dst_indx, tri=gather_indx.dst_indx, maxtol=0, rmstol=0
-    )
-    assert_close(
-        ref=scatter_indx_ref.src_indx, tri=scatter_indx.src_indx, maxtol=0, rmstol=0
-    )
-    assert_close(
-        ref=scatter_indx_ref.dst_indx, tri=scatter_indx.dst_indx, maxtol=0, rmstol=0
-    )
diff --git a/tests/kernels/moe/test_marlin_vs_trtllm_mxint4.py b/tests/kernels/moe/test_marlin_vs_trtllm_mxint4.py
index aaf255ca8b6a..eaeca6a8a5dc 100644
--- a/tests/kernels/moe/test_marlin_vs_trtllm_mxint4.py
+++ b/tests/kernels/moe/test_marlin_vs_trtllm_mxint4.py
@@ -5,7 +5,7 @@
 import pytest
 import torch
 
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
     fused_marlin_moe,
 )
 from vllm.model_executor.layers.fused_moe.router.grouped_topk_router import (
@@ -16,6 +16,7 @@
 )
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
+from vllm.utils.torch_utils import set_random_seed
 
 
 def mxint4_quantize(
@@ -134,7 +135,7 @@ def test_marlin_vs_trtllm_mxint4_moe_kimik2(monkeypatch, m, n, k, e, topk, group
     pytest.importorskip("flashinfer")
     monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_INT4", "1")
 
-    torch.cuda.manual_seed(0)
+    set_random_seed(0)
 
     dtype = torch.bfloat16
 
@@ -269,3 +270,96 @@ def test_marlin_vs_trtllm_mxint4_moe_kimik2(monkeypatch, m, n, k, e, topk, group
     # Note: Different quantization schemes (UINT4b8 vs signed MXINT4) cause
     # some differences
     torch.testing.assert_close(marlin_output, trtllm_output, atol=0.3, rtol=6.0)
+
+
+@pytest.mark.skipif(not TRTLLM_GEN_AVAILABLE, reason="Skip for non SM100")
+@pytest.mark.parametrize("m", [1, 33])
+@pytest.mark.parametrize("n", [7168])
+@pytest.mark.parametrize("k", [512])
+@pytest.mark.parametrize("e", [384])
+@pytest.mark.parametrize("topk", [8])
+@torch.inference_mode()
+def test_flashinfer_trtllm_mxint4_moe_wrapper(m, n, k, e, topk):
+    """Test that the flashinfer_trtllm_mxint4_moe wrapper matches the raw
+    trtllm_mxint4_block_scale_moe kernel call."""
+    pytest.importorskip("flashinfer")
+    from flashinfer import RoutingMethodType
+    from flashinfer.fused_moe import trtllm_mxint4_block_scale_moe
+
+    from vllm.model_executor.layers.quantization.utils.flashinfer_mxint4_moe import (
+        flashinfer_trtllm_mxint4_moe,
+    )
+
+    set_random_seed(0)
+    dtype = torch.bfloat16
+
+    a = torch.randn((m, k), device="cuda", dtype=dtype) * 0.5
+    router_logits = torch.randn((m, e), device="cuda", dtype=torch.float32) * 1.5
+    routing_bias = torch.randn(e, device="cuda", dtype=torch.float32) * 0.8
+
+    std_w1 = (2.0 / (k + 2 * n)) ** 0.5
+    std_w2 = (2.0 / (n + k)) ** 0.5
+    w1_bf16 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) * std_w1
+    w2_bf16 = torch.randn((e, k, n), device="cuda", dtype=dtype) * std_w2
+
+    w1_int4, w1_scales = mxint4_quantize_moe_weights(w1_bf16)
+    w2_int4, w2_scales = mxint4_quantize_moe_weights(w2_bf16)
+
+    prepared = prepare_static_weights_for_trtllm_mxint4_moe(
+        gemm1_weights=w1_int4,
+        gemm1_scales=w1_scales,
+        gemm2_weights=w2_int4,
+        gemm2_scales=w2_scales,
+    )
+
+    # Raw kernel call (reference)
+    raw_out = trtllm_mxint4_block_scale_moe(
+        routing_logits=router_logits.to(torch.float32),
+        routing_bias=routing_bias.to(torch.bfloat16),
+        hidden_states=a,
+        gemm1_weights=prepared["gemm1_weights"].data,
+        gemm1_weights_scale=prepared["gemm1_scales"].data,
+        gemm1_alpha=None,
+        gemm1_beta=None,
+        gemm1_clamp_limit=None,
+        gemm2_weights=prepared["gemm2_weights"].data,
+        gemm2_weights_scale=prepared["gemm2_scales"].data,
+        num_experts=e,
+        top_k=topk,
+        n_group=1,
+        topk_group=1,
+        intermediate_size=n,
+        local_expert_offset=0,
+        local_num_experts=e,
+        routed_scaling_factor=None,
+        routing_method_type=RoutingMethodType.DeepSeekV3,
+        enable_pdl=None,
+        output=None,
+        tune_max_num_tokens=8192,
+    )
+    if not isinstance(raw_out, torch.Tensor):
+        raw_out = raw_out[0]
+    raw_out = raw_out.to(dtype)
+
+    # Wrapper call
+    wrapper_out = flashinfer_trtllm_mxint4_moe(
+        x=a,
+        router_logits=router_logits,
+        w13_weight_packed=prepared["gemm1_weights"],
+        w13_weight_scale=prepared["gemm1_scales"],
+        w2_weight_packed=prepared["gemm2_weights"],
+        w2_weight_scale=prepared["gemm2_scales"],
+        global_num_experts=e,
+        top_k=topk,
+        intermediate_size_per_partition=n,
+        local_num_experts=e,
+        ep_rank=0,
+        num_expert_group=1,
+        topk_group=1,
+        e_score_correction_bias=routing_bias,
+        routing_method_type=RoutingMethodType.DeepSeekV3,
+    )
+
+    assert wrapper_out.shape == (m, k)
+    assert wrapper_out.dtype == dtype
+    torch.testing.assert_close(wrapper_out, raw_out, atol=0.0, rtol=0.0)
diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py
index 19367e7d1d9f..c7295f3ed6ed 100644
--- a/tests/kernels/moe/test_modular_kernel_combinations.py
+++ b/tests/kernels/moe/test_modular_kernel_combinations.py
@@ -15,7 +15,7 @@
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
 from vllm.utils.import_utils import has_deep_ep, has_deep_gemm
-from vllm.utils.torch_utils import cuda_device_count_stateless, set_random_seed
+from vllm.utils.torch_utils import set_random_seed
 from vllm.v1.worker.workspace import init_workspace_manager
 
 from .modular_kernel_tools.common import (
@@ -310,10 +310,10 @@ def test_modular_kernel_combinations_multigpu(
     world_size: int,
     pytestconfig,
 ):
-    if cuda_device_count_stateless() < world_size:
+    if current_platform.device_count() < world_size:
         pytest.skip(
             f"Not enough GPUs available to run, got "
-            f"{cuda_device_count_stateless()} expected "
+            f"{current_platform.device_count()} expected "
             f"{world_size}."
         )
 
diff --git a/tests/kernels/moe/test_modular_oai_triton_moe.py b/tests/kernels/moe/test_modular_oai_triton_moe.py
index b071e72dafbb..589d90d1eca7 100644
--- a/tests/kernels/moe/test_modular_oai_triton_moe.py
+++ b/tests/kernels/moe/test_modular_oai_triton_moe.py
@@ -29,7 +29,7 @@
     maybe_make_prepare_finalize,
 )
 from vllm.model_executor.layers.fused_moe.config import mxfp4_w4a16_moe_quant_config
-from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
+from vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe import (
     OAITritonExperts,
     UnfusedOAITritonExperts,
 )
diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py
index 28be9f23d661..23ea85b52d76 100644
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -14,8 +14,6 @@
 import torch
 from torch.nn import Parameter
 from torch.nn import functional as F
-from transformers import MixtralConfig
-from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
 
 import vllm.model_executor.layers.fused_moe  # noqa
 from tests.kernels.moe.utils import (
@@ -24,10 +22,7 @@
     modular_triton_fused_moe,
 )
 from tests.kernels.utils import opcheck, stack_and_dev, torch_experts, torch_moe
-from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import VllmConfig, set_current_vllm_config
-from vllm.distributed.parallel_state import init_distributed_environment
-from vllm.forward_context import get_forward_context, set_forward_context
 from vllm.model_executor.layers.fused_moe import (
     MoEActivation,
     fused_topk,
@@ -37,7 +32,7 @@
     int4_w4a16_moe_quant_config,
     int8_w8a16_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
     batched_fused_marlin_moe,
     fused_marlin_moe,
 )
@@ -56,11 +51,10 @@
     marlin_quantize,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import quantize_weights
-from vllm.model_executor.models.mixtral import MixtralMoE
 from vllm.platforms import current_platform
 from vllm.scalar_type import ScalarType, scalar_types
+from vllm.utils.math_utils import next_power_of_2
 from vllm.utils.torch_utils import set_random_seed
-from vllm.v1.worker.workspace import init_workspace_manager
 
 
 def iterative_moe(
@@ -149,6 +143,14 @@ def iterative_moe(
     {
         "a_type": [scalar_types.bfloat16],
         "b_type": scalar_types.float4_e2m1f,
+        "c_type": [scalar_types.bfloat16],
+        "group_blocks": [2],
+    },
+    # MXFP8
+    {
+        "a_type": [scalar_types.bfloat16],
+        "b_type": scalar_types.float8_e4m3fn,
+        "c_type": [scalar_types.bfloat16],
         "group_blocks": [2],
     },
     # AWQ-INT4 with INT8 activation
@@ -674,154 +676,35 @@ def test_fused_moe_wn16(
     torch.testing.assert_close(triton_output, torch_output, atol=2e-2, rtol=0)
 
 
-@pytest.mark.parametrize("dtype", [torch.bfloat16])
-@pytest.mark.parametrize("padding", [True, False])
-@pytest.mark.parametrize(
-    "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]
-)
-@torch.inference_mode()
-def test_mixtral_moe(
-    default_vllm_config,
-    dist_init,
-    dtype: torch.dtype,
-    padding: bool,
-    use_rocm_aiter: bool,
-    monkeypatch,
-):
-    """Make sure our Mixtral MoE implementation agrees with the one from
-    huggingface."""
-
-    # Explicitly set AITER env var based on test parameter to ensure
-    # consistent behavior regardless of external environment
-    monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1" if use_rocm_aiter else "0")
-    rocm_aiter_ops.refresh_env_variables()
-
-    if use_rocm_aiter and dtype == torch.float32:
-        pytest.skip("AITER ROCm test skip for float32")
-
-    monkeypatch.setenv("RANK", "0")
-    monkeypatch.setenv("LOCAL_RANK", "0")
-    monkeypatch.setenv("WORLD_SIZE", "1")
-    monkeypatch.setenv("MASTER_ADDR", "localhost")
-    monkeypatch.setenv("MASTER_PORT", "12345")
-    init_distributed_environment()
-    init_workspace_manager(torch.accelerator.current_device_index())
-
-    # Instantiate our and huggingface's MoE blocks
-    vllm_config.compilation_config.static_forward_context = dict()
-    with set_current_vllm_config(vllm_config), set_forward_context(None, vllm_config):
-        config = MixtralConfig()
-        hf_moe = MixtralSparseMoeBlock(config).to(dtype).to("cuda")
-        vllm_moe = MixtralMoE(
-            num_experts=config.num_local_experts,
-            top_k=config.num_experts_per_tok,
-            hidden_size=config.hidden_size,
-            intermediate_size=config.intermediate_size,
-            params_dtype=dtype,
-            tp_size=1,
-            dp_size=1,
-        ).cuda()
-
-        # Load the weights
-        vllm_moe.gate.weight.data[:] = hf_moe.gate.weight.data
-        if isinstance(hf_moe.experts, torch.nn.ModuleList):
-            # Transformers v4
-            for i in range(config.num_local_experts):
-                weights = (
-                    hf_moe.experts[i].w1.weight.data,
-                    hf_moe.experts[i].w3.weight.data,
-                )
-                vllm_moe.experts.w13_weight[i][:] = torch.cat(weights, dim=0)
-                vllm_moe.experts.w2_weight[i][:] = hf_moe.experts[i].w2.weight.data
-        else:
-            # Transformers v5
-            vllm_moe.experts.w13_weight.data[:] = hf_moe.experts.gate_up_proj.data
-            vllm_moe.experts.w2_weight.data[:] = hf_moe.experts.down_proj.data
-            # TODO: remove this line after https://github.com/huggingface/transformers/pull/43622
-            hf_moe.experts.config._experts_implementation = "eager"
-
-        # Generate input batch of dimensions [batch_size, seq_len, hidden_dim]
-        hf_inputs = torch.randn((1, 64, config.hidden_size)).to(dtype).to("cuda")
-        # vLLM uses 1D query [num_tokens, hidden_dim]
-        vllm_inputs = hf_inputs.flatten(0, 1)
-
-        # Pad the weight if moe padding is enabled
-        if padding:
-            vllm_moe.experts.w13_weight = Parameter(
-                F.pad(vllm_moe.experts.w13_weight, (0, 128), "constant", 0)[
-                    ..., 0:-128
-                ],
-                requires_grad=False,
-            )
-            vllm_moe.experts.w2_weight = Parameter(
-                F.pad(vllm_moe.experts.w2_weight, (0, 128), "constant", 0)[..., 0:-128],
-                requires_grad=False,
-            )
-            torch.accelerator.synchronize()
-            torch.accelerator.empty_cache()
-
-        # FIXME (zyongye) fix this after we move self.kernel
-        # assignment in FusedMoE.__init__
-
-        vllm_moe.experts.quant_method.process_weights_after_loading(vllm_moe.experts)
-
-        # need to override the forward context for unittests, otherwise it assumes
-        # we're running the model forward pass (the model specified in vllm_config)
-        get_forward_context().all_moe_layers = None
-
-        # Run forward passes for both MoE blocks
-        hf_states = hf_moe.forward(hf_inputs)
-        if isinstance(hf_states, tuple):
-            # Transformers v4
-            hf_states = hf_states[0]
-        vllm_states = vllm_moe.forward(vllm_inputs)
-
-    mixtral_moe_tol = {
-        torch.float32: 1e-3,
-        torch.float16: 1e-3,
-        torch.bfloat16: 1e-2,
-    }
-
-    if use_rocm_aiter:
-        # The values of rtol and atol are set based on the tests in ROCM AITER package.
-        # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
-        torch.testing.assert_close(
-            hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
-        )
-    else:
-        torch.testing.assert_close(
-            hf_states.flatten(0, 1),
-            vllm_states,
-            rtol=mixtral_moe_tol[dtype],
-            atol=mixtral_moe_tol[dtype],
-        )
+MARLIN_MOE_SCENARIOS = [
+    # (m, n, k, e, topk, ep_size, act_order, is_k_full)
+    # No act_order: is_k_full=True matches usual case (marlin_is_k_full).
+    # N>=256 required for Marlin kernel thread config for MXFP8.
+    # Single token, small matrices
+    (1, 128, 256, 5, 2, 1, False, True),
+    # Single token, large matrices
+    (1, 1024, 2048, 5, 2, 1, False, True),
+    # Unaligned m, small matrices
+    (133, 256, 256, 5, 2, 1, False, True),
+    # Unaligned m, large matrices
+    (133, 1024, 2048, 12, 3, 1, False, True),
+    # Aligned batch, small matrices
+    (128, 256, 256, 5, 2, 1, False, True),
+    # Aligned batch, large matrices
+    (128, 1024, 2048, 12, 3, 1, False, True),
+    # Expert parallelism
+    (64, 1024, 2048, 12, 3, 4, False, True),
+    # Act order with is_k_full=True (no tensor parallelism)
+    (1, 1024, 2048, 5, 2, 1, True, True),
+    # Act order with is_k_full=False (tensor parallelism)
+    (133, 256, 256, 5, 2, 1, True, False),
+]
 
 
 def marlin_moe_generate_valid_test_cases():
     import itertools
 
-    m_list = [1, 123, 666]
-    n_list = [128, 1024]
-    k_list = [256, 2048]
-    e_list = [5, 12]
-    topk_list = [2, 3]
-    ep_size_list = [1, 4]
-    act_order_list = [True, False]
-    is_k_full_list = [True, False]
-
-    all_combinations = itertools.product(
-        MOE_MARLIN_QUANT_TEST_CONFIGS,
-        m_list,
-        n_list,
-        k_list,
-        e_list,
-        topk_list,
-        ep_size_list,
-        act_order_list,
-        is_k_full_list,
-    )
-
-    def is_invalid(
+    def is_valid(
         a_type,
         b_type,
         c_type,
@@ -838,39 +721,43 @@ def is_invalid(
         group_size = group_blocks if group_blocks <= 0 else group_blocks * 16
         if group_size > 0 and k % group_size != 0:
             return False
-
         if act_order and group_size in [-1, k, n]:
             return False
         if group_size in [k, n]:
             return False
-        if not act_order and is_k_full:
+        if b_type == scalar_types.float8_e4m3fn and group_size == 32 and is_k_full:
             return False
-
         return a_type.size_bits < 16 or a_type is c_type
 
     cases = []
-    for case in all_combinations:
-        quant_test_config, m, n, k, _, _, _, act_order, *_ = case
-        if act_order and not quant_test_config.get("support_act_order", False):
-            continue
-
+    for quant_test_config in MOE_MARLIN_QUANT_TEST_CONFIGS:
         f16_types = [scalar_types.float16]
-        inner_combinations = itertools.product(
-            quant_test_config.get("a_type", f16_types),
-            [quant_test_config["b_type"]],
-            quant_test_config.get("c_type", f16_types),
-            quant_test_config["group_blocks"],
+        inner_combinations = list(
+            itertools.product(
+                quant_test_config.get("a_type", f16_types),
+                [quant_test_config["b_type"]],
+                quant_test_config.get("c_type", f16_types),
+                quant_test_config["group_blocks"],
+            )
         )
 
+        supports_act_order = quant_test_config.get("support_act_order", False)
+
         for sub_case in inner_combinations:
             if (
                 sub_case[0] == scalar_types.float8_e4m3fn
-                and current_platform.get_device_capability() not in [89, 120]
+                and not current_platform.is_device_capability(89)
+                and not current_platform.is_device_capability_family(120)
             ):
                 continue
-            args = sub_case + (m, n, k) + case[4:]
-            if is_invalid(*args):
-                cases.append(args)
+
+            for scenario in MARLIN_MOE_SCENARIOS:
+                m, n, k, e, topk, ep_size, act_order, is_k_full = scenario
+                if act_order and not supports_act_order:
+                    continue
+                args = sub_case + (m, n, k, e, topk, ep_size, act_order, is_k_full)
+                if is_valid(*args):
+                    cases.append(args)
     return cases
 
 
@@ -1011,6 +898,7 @@ def make(
     marlin_moe_generate_valid_test_cases(),
 )
 @pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
+@pytest.mark.usefixtures("default_vllm_config")
 def test_fused_marlin_moe(
     a_type: ScalarType,
     b_type: ScalarType,
@@ -1025,7 +913,7 @@ def test_fused_marlin_moe(
     act_order: bool,
     is_k_full: bool,
 ):
-    torch.cuda.manual_seed(1)
+    set_random_seed(1)
     group_size = group_blocks if group_blocks <= 0 else group_blocks * 16
 
     if c_type == scalar_types.float16:
@@ -1123,9 +1011,10 @@ def test_fused_marlin_moe(
 
 @pytest.mark.flaky(reruns=2)
 @pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
+@pytest.mark.usefixtures("default_vllm_config")
 @pytest.mark.parametrize("m", [1, 256])
 def test_fused_marlin_moe_with_bias(m):
-    torch.cuda.manual_seed(0)
+    set_random_seed(0)
 
     e, topk = 32, 4
     n, k = 2048, 2048
@@ -1195,6 +1084,7 @@ def test_fused_marlin_moe_with_bias(m):
 
 @pytest.mark.flaky(reruns=2)
 @pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
+@pytest.mark.usefixtures("default_vllm_config")
 @pytest.mark.parametrize("m", [1, 64, 256])
 @pytest.mark.parametrize("n,k", [(1024, 1024), (2048, 2048)])
 @pytest.mark.parametrize("e,topk", [(8, 2), (64, 4)])
@@ -1207,7 +1097,7 @@ def test_fused_marlin_moe_non_gated(
     Non-gated activations like relu2 don't have the gate-up projection pattern,
     so w1 has shape (e, n, k) instead of (e, 2*n, k).
     """
-    torch.cuda.manual_seed(42)
+    set_random_seed(42)
 
     group_size = 16  # NVFP4 group size
     is_k_full = True
@@ -1391,7 +1281,7 @@ def test_cpu_fused_moe_basic(
     from vllm.model_executor.layers.fused_moe.cpu_fused_moe import CPUFusedMOE
 
     device = "cpu"
-    torch.manual_seed(7)
+    set_random_seed(7)
 
     a = torch.randn((m, k), device=device, dtype=dtype) / 10
     w13 = torch.randn((e, 2 * n, k), device=device, dtype=dtype) / 10
@@ -1448,36 +1338,96 @@ def __init__(self, w13, w2, b1=None, b2=None):
     torch.testing.assert_close(out, ref, atol=atol, rtol=0)
 
 
-@pytest.mark.parametrize("m", [16, 32, 64])
-@pytest.mark.parametrize("n", [128])
-@pytest.mark.parametrize("k", [128])
-@pytest.mark.parametrize("e", [8, 12, 16, 32])
-@pytest.mark.parametrize("topk", [2, 4])
-@pytest.mark.parametrize("max_tokens_per_batch", [16, 32, 64])
+def _batched_fused_marlin_moe_cases() -> list[Any]:
+    cases = [
+        pytest.param(
+            m,
+            128,
+            128,
+            e,
+            topk,
+            max_tokens_per_batch,
+            torch.bfloat16,
+            scalar_types.float4_e2m1f,
+            None,
+            1e-3,
+            id=(
+                f"m{m}-n128-k128-e{e}-topk{topk}-max_tokens{max_tokens_per_batch}-mxfp4"
+            ),
+        )
+        for m in [16, 32, 64]
+        for e in [8, 12, 16, 32]
+        for topk in [2, 4]
+        for max_tokens_per_batch in [16, 32, 64]
+    ]
+    cases.append(
+        pytest.param(
+            32,
+            128,
+            128,
+            8,
+            2,
+            64,
+            torch.float16,
+            scalar_types.uint4,
+            scalar_types.int8,
+            4e-2,
+            id="awq-int8-activation-metadata",
+        )
+    )
+    return cases
+
+
+@pytest.mark.parametrize(
+    ("m,n,k,e,topk,max_tokens_per_batch,dtype,quant_dtype,input_type,atol"),
+    _batched_fused_marlin_moe_cases(),
+)
 @pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
 def test_batched_fused_marlin_moe(
-    m: int, n: int, k: int, e: int, topk: int, max_tokens_per_batch: int
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    topk: int,
+    max_tokens_per_batch: int,
+    dtype: torch.dtype,
+    quant_dtype: ScalarType,
+    input_type: ScalarType | None,
+    atol: float,
 ):
     print(
         f"testing m={m}, n={n}, k={k}, e={e}, "
         f"topk={topk}, "
-        f"max_tokens_per_batch={max_tokens_per_batch}"
+        f"max_tokens_per_batch={max_tokens_per_batch}, "
+        f"dtype={dtype}, quant_dtype={quant_dtype}, input_type={input_type}"
     )
-    torch.cuda.manual_seed(0)
+    set_random_seed(0)
 
-    dtype = torch.bfloat16
-    quant_dtype = scalar_types.float4_e2m1f
     group_size = 32
+    if input_type == scalar_types.int8:
+        input_dtype = torch.int8
+    elif input_type == scalar_types.float8_e4m3fn:
+        input_dtype = torch.float8_e4m3fn
+    else:
+        input_dtype = None
 
     a = torch.randn((m, k), device="cuda", dtype=dtype) / 10
     w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 20
     w2 = torch.randn((e, k, n), device="cuda", dtype=dtype) / 20
 
     w1_data = MarlinMoEWeightData.make(
-        w=w1, quant_type=quant_dtype, group_size=group_size, act_order=None
+        w=w1,
+        quant_type=quant_dtype,
+        group_size=group_size,
+        act_order=None,
+        input_type=input_type,
     )
     w2_data = MarlinMoEWeightData.make(
-        w=w2, quant_type=quant_dtype, group_size=group_size, act_order=None
+        w=w2,
+        quant_type=quant_dtype,
+        group_size=group_size,
+        act_order=None,
+        input_type=input_type,
     )
 
     score = torch.randn((m, e), device="cuda", dtype=dtype)
@@ -1597,6 +1547,12 @@ def run(
         "quant_type_id": quant_dtype.id,
         "is_k_full": True,
     }
+    if input_dtype is not None:
+        kwargs["input_dtype"] = input_dtype
+        if w1_data.a_scales_factor is not None:
+            kwargs["input_global_scale1"] = w1_data.a_scales_factor
+        if w2_data.a_scales_factor is not None:
+            kwargs["input_global_scale2"] = w2_data.a_scales_factor
 
     # Reference
     fused_marlin_moe_kwargs = kwargs | {
@@ -1612,7 +1568,7 @@ def run(
         pytest.skip("Cannot represent data in Batched Format.")
     marlin_output = br.run(a, kwargs)
 
-    torch.testing.assert_close(marlin_output, ref_marlin_output, atol=1e-3, rtol=0)
+    torch.testing.assert_close(marlin_output, ref_marlin_output, atol=atol, rtol=0)
 
 
 @pytest.mark.parametrize("m,n,k", [(32, 1024, 1024)])
@@ -1664,13 +1620,13 @@ def test_unquantized_bf16_flashinfer_trtllm_backend(
         intermediate_size_per_partition=n,
         num_local_experts=e,
         num_logical_experts=e,
-        activation="silu",
+        activation=MoEActivation.SILU,
         device="cuda",
         moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
         in_dtype=dtype,
         is_act_and_mul=True,
         routing_method=RoutingMethodType.Renormalize,
-        max_num_tokens=m,
+        max_num_tokens=next_power_of_2(m),
     )
 
     with set_current_vllm_config(vllm_config):
@@ -1695,13 +1651,25 @@ def test_unquantized_bf16_flashinfer_trtllm_backend(
         layer.topk_group = 1
         layer.intermediate_size_per_partition = n
         layer.ep_rank = 0
-        layer.activation = "silu"
+        layer.activation = MoEActivation.SILU
         layer.e_score_correction_bias = None
         layer.routing_method_type = RoutingMethodType.Renormalize
+        layer.expert_map = None
+        layer.apply_router_weight_on_input = False
+        layer.routed_scaling_factor = None
+        layer.shared_experts = None
+        layer._expert_routing_tables = lambda: None
 
         quant_method.process_weights_after_loading(layer)
 
-        trtllm_output = quant_method.forward_monolithic_cuda(
+        assert quant_method.moe_kernel is not None, (
+            "moe_kernel should be set after process_weights_after_loading"
+        )
+        assert quant_method.supports_internal_mk, (
+            "supports_internal_mk should be True after setup"
+        )
+
+        trtllm_output = quant_method.apply_monolithic(
             layer=layer,
             x=a,
             router_logits=router_logits,
diff --git a/tests/kernels/moe/test_moe_layer.py b/tests/kernels/moe/test_moe_layer.py
new file mode 100644
index 000000000000..7942f738a1e5
--- /dev/null
+++ b/tests/kernels/moe/test_moe_layer.py
@@ -0,0 +1,1861 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the MOE layer.
+
+Run `pytest tests/kernels/test_moe_layer.py`.
+"""
+
+import functools
+import os
+import traceback
+import types
+from collections.abc import Callable
+from dataclasses import astuple, dataclass, fields
+from itertools import product
+from typing import get_args
+
+import pytest
+import torch
+
+import vllm.model_executor.layers.quantization.utils.w8a8_utils
+from tests.kernels.moe.modular_kernel_tools.parallel_utils import (
+    ProcessGroupInfo,
+    _set_vllm_config,
+    parallel_launch_with_config,
+)
+from tests.kernels.moe.utils import TestMLP, make_test_weights, moe_quantize_weights
+from vllm.config import (
+    CompilationConfig,
+    ParallelConfig,
+    SchedulerConfig,
+    VllmConfig,
+    set_current_vllm_config,
+)
+from vllm.distributed import (
+    get_ep_group,
+    get_eplb_group,
+    tensor_model_parallel_all_gather,
+)
+from vllm.distributed.eplb.eplb_communicator import create_eplb_communicator
+from vllm.distributed.eplb.rebalance_execute import rearrange_expert_weights_inplace
+from vllm.forward_context import set_forward_context
+from vllm.model_executor.layers.fused_moe import FusedMoE, fused_experts
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
+from vllm.model_executor.layers.fused_moe.router.router_factory import (
+    create_fused_moe_router,
+)
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.modelopt import (
+    ModelOptFp8Config,
+    ModelOptNvFp4Config,
+)
+from vllm.model_executor.models.utils import sequence_parallel_chunk
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import (
+    has_flashinfer_nvlink_one_sided,
+    has_flashinfer_nvlink_two_sided,
+)
+from vllm.utils.import_utils import has_deep_ep, has_mori, has_nixl_ep
+from vllm.utils.math_utils import cdiv, next_power_of_2
+from vllm.utils.torch_utils import set_random_seed
+from vllm.v1.worker.workspace import (
+    init_workspace_manager,
+    is_workspace_manager_initialized,
+)
+
+fp8_dtype = torch.float8_e4m3fn  # current_platform.fp8_dtype
+
+SHAPE_COMBOS = [
+    (1, 128, 256),
+    (32, 512, 512),
+    (222, 1024, 2048),
+]
+MAX_M = max([x[0] for x in SHAPE_COMBOS])
+
+NUM_EXPERTS = [8, 64]
+TOP_KS = [2, 6]
+
+# dp_size, tp_size, use_ep
+# Note: DP+TP is not yet supported in the FusedMoE layer.
+PARALLEL_COMBOS = [
+    [1, 2, False],
+    [1, 4, False],
+    [2, 1, True],
+    [4, 1, True],
+    # This combination indicates sequence parallel.
+    # See ParallelConfig.use_sequence_parallel.
+    [2, 2, True],
+]
+
+# TODO: should this even be set manually?  let oracles handle this
+BACKENDS = ["allgather_reducescatter"]
+
+if has_mori():
+    BACKENDS += ["mori"]
+
+if has_flashinfer_nvlink_two_sided():
+    BACKENDS += ["flashinfer_nvlink_two_sided"]
+
+if has_flashinfer_nvlink_one_sided():
+    BACKENDS += ["flashinfer_nvlink_one_sided"]
+
+if has_deep_ep():
+    BACKENDS += ["deepep_high_throughput", "deepep_low_latency"]
+
+if has_nixl_ep():
+    BACKENDS += ["nixl_ep"]
+
+QUANT_METHODS = [
+    None,
+    "fp8",
+    "fp8_blocked",
+    "modelopt_fp8",
+    "modelopt_fp4",
+]
+
+# Which quantization methods each backend supports.
+# fmt: off
+BACKEND_SUPPORTED_QUANTS: dict[str, set[str | None]] = {
+    "allgather_reducescatter":     {None,         "fp8", "modelopt_fp8", "modelopt_fp4"}, # noqa: E501
+    "mori":                        {None,         "fp8", "modelopt_fp8"},
+    "flashinfer_nvlink_two_sided": {None, "fp8_blocked",                 "modelopt_fp4"}, # noqa: E501
+    "flashinfer_nvlink_one_sided": {None,                                "modelopt_fp4"}, # noqa: E501
+    "deepep_low_latency":          {None, "fp8_blocked",                 "modelopt_fp4"}, # noqa: E501
+    "deepep_high_throughput":      {None, "fp8_blocked", "modelopt_fp8", "modelopt_fp4"}, # noqa: E501
+    "nixl_ep":                     {None, "fp8_blocked", "modelopt_fp8"},
+}
+
+# Map from backend -> (DP/EP support, DP support, TP support, SP support)
+BACKEND_EP_DP_TP_SUPPORT: dict[str, tuple[bool, bool, bool, bool]] = {
+    "allgather_reducescatter":     (True,  True,  True,  True),
+    "mori":                        (True, False, False,  True),
+    "flashinfer_nvlink_two_sided": (False, True, False, False),
+    "flashinfer_nvlink_one_sided": (False, True, False, False),
+    "deepep_low_latency":          (True, False, False,  True),
+    "deepep_high_throughput":      (True, False, False,  True),
+    "nixl_ep":                     (True, False, False,  True),
+}
+# fmt: on
+
+# Which quantization methods support EPLB.
+# ModelOptFp8MoEMethod inherits supports_eplb=False from FusedMoEMethodBase.
+# TODO: double check modelopt fp8
+# modelopt_fp4 excluded: get_expert_weights() can't handle NvFP4 packed format.
+EPLB_SUPPORTED_QUANTS: list[str | None] = [None, "fp8"]
+
+# Which backends support EPLB.
+# deepep backends fail in get_expert_weights / rearrange_expert_weights_inplace.
+# TODO(bnell): check this
+EPLB_SUPPORTED_BACKENDS: list[str] = ["allgather_reducescatter"]
+
+
+def mock_normalize_e4m3fn_to_e4m3fnuz(
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    input_scale: torch.Tensor | None = None,
+):
+    return weight, weight_scale, input_scale
+
+
+# Needed since weights will already be in e4m3fnuz format on platforms that
+# use the fnuz fp8 format and the normalize_e4m3fn_to_e4m3fnuz() function
+# is not being tested here.
+# NOTE: The weights are quantized by moe_quantize_weights_2d in
+# _quantize_fp8_halves.
+# NOTE: Not able to use monkeypatch because of the spawned parallel workers.
+def override_normalize_e4m3fn_to_e4m3fnuz():
+    vllm.model_executor.layers.quantization.utils.w8a8_utils.normalize_e4m3fn_to_e4m3fnuz = mock_normalize_e4m3fn_to_e4m3fnuz  # noqa: E501
+
+
+def sp_wrapper(
+    fn: Callable | FusedMoE, is_sequence_parallel: bool | None = None
+) -> Callable:
+    """Wrapper to handle sequence parallelism chunking and gathering.
+
+    For SP with EP:
+    - The TP group is created with the original tensor_parallel_size (e.g., 2)
+    - get_tp_group() has the correct world_size for SP operations
+    - sequence_parallel_chunk() uses get_tensor_model_parallel_world_size()
+    - tensor_model_parallel_all_gather() uses get_tp_group()
+    - Both should work correctly even when EP is enabled
+    """
+    if isinstance(fn, FusedMoE):
+        assert is_sequence_parallel is None
+        is_sequence_parallel = fn.is_sequence_parallel
+    else:
+        assert is_sequence_parallel is not None
+
+    if is_sequence_parallel:
+
+        def wrapper(
+            hidden_states: torch.Tensor,
+            router_logits: torch.Tensor,
+        ) -> torch.Tensor:
+            # Split sequence across TP ranks
+            # Both hidden_states and router_logits have [num_tokens, ...] shape
+            hidden_states = sequence_parallel_chunk(hidden_states)
+            router_logits = sequence_parallel_chunk(router_logits)
+            # Run MoE on local chunk
+            result = fn(hidden_states, router_logits)
+            # Gather results from all TP ranks
+            result = tensor_model_parallel_all_gather(result, 0)
+            # Remove any padding added by SP.
+            return result[: hidden_states.shape[0]]
+
+        return wrapper
+    return fn
+
+
+def maybe_roundup_layer_hidden_size(
+    hidden_size: int,
+    act_dtype: torch.dtype,
+    backend: str | None,
+) -> int:
+    """
+    Given layer hidden size and MoE configurations, round up hidden_size
+    if necessary.
+
+    Args:
+        hidden_size: Layer hidden-size
+        act_dtype: Data type of the layer activations.
+        moe_parallel_config: Fused MoE parallelization strategy configuration.
+
+    Return:
+        Rounded up hidden_size if rounding up is required based on the configs
+        and all2all backend.
+        Original hidden size otherwise.
+    """
+    if backend == "deepep_high_throughput":
+        from vllm.model_executor.layers.fused_moe.prepare_finalize.deepep_ht import (
+            DeepEPHTPrepareAndFinalize,
+        )
+
+        hidden_size = DeepEPHTPrepareAndFinalize.maybe_roundup_layer_hidden_size(
+            hidden_size, act_dtype
+        )
+
+    if backend == "deepep_low_latency":
+        from vllm.model_executor.layers.fused_moe.prepare_finalize.deepep_ll import (
+            DeepEPLLPrepareAndFinalize,
+        )
+
+        hidden_size = DeepEPLLPrepareAndFinalize.maybe_roundup_layer_hidden_size(
+            hidden_size
+        )
+
+    return hidden_size
+
+
+def rank_chunk(num: int, r: int, w: int) -> int:
+    rem = num % w
+    return (num // w) + (1 if r < rem else 0)
+
+
+def chunk_by_rank(
+    t: torch.Tensor,
+    r: int,
+    w: int,
+    dim: int = 0,
+    device: torch.device | None = None,
+) -> torch.Tensor:
+    chunk = cdiv(t.shape[dim], w)
+    t = t.narrow(dim, r * chunk, chunk)
+    if device is not None:
+        t = t.to(device)
+    return t
+
+
+def maybe_chunk_by_rank(
+    t: torch.Tensor | None,
+    r: int,
+    w: int,
+    dim: int = 0,
+    device: torch.device | None = None,
+) -> torch.Tensor | None:
+    if t is not None:
+        return chunk_by_rank(t, r, w, dim, device)
+    else:
+        return t
+
+
+def tp_chunk_gate_up(
+    w: torch.Tensor,
+    tp_rank: int,
+    tp_size: int,
+    dim: int,
+    device: torch.device | int | None = None,
+) -> torch.Tensor:
+    """TP-chunk a combined [gate; up] weight, splitting each half separately
+    so every rank gets a portion of both gate and up."""
+    half = w.shape[dim] // 2
+    gate = chunk_by_rank(
+        w.narrow(dim, 0, half), tp_rank, tp_size, dim=dim, device=device
+    )
+    up = chunk_by_rank(
+        w.narrow(dim, half, half), tp_rank, tp_size, dim=dim, device=device
+    )
+    return torch.cat([gate, up], dim=dim)
+
+
+@dataclass
+class MoETestConfig:
+    m: int
+    n: int
+    k: int
+    num_experts: int
+    top_k: int
+    in_dtype: torch.dtype
+    quantization: str | None
+    use_shared_experts: bool
+    use_gate: bool
+    use_routed_input_transform: bool
+    enable_eplb: bool = False
+    backend: str | None = None
+    ep_size: int = 1
+    dp_size: int = 1
+    tp_size: int = 1
+
+    @property
+    def is_sequence_parallel(self) -> bool:
+        # Sequence parallelism: EP enabled + TP dimension used for sequence splitting
+        # In test config: ep_size represents total expert parallel size
+        # tp_size represents the original TP dimension (becomes sp_size in FusedMoE)
+        # dp_size represents data parallel size
+        # For SP: we need EP enabled (ep_size > 1) and sequence splitting (tp_size > 1)
+        return self.ep_size > 1 and self.tp_size > 1
+
+    # TODO: add more error messages
+    def id(self) -> str:
+        def proc(s: str) -> str:
+            return s.removeprefix("torch.")
+
+        id_str = "-".join([proc(str(f)) for f in astuple(self)])
+        return f"[{id_str}]"
+
+    # TODO: add more error messages
+    @staticmethod
+    def from_id(id: str) -> "MoETestConfig":
+        id = id[1:-1]
+        str_values = id.split("-")
+
+        def convert(v: str, ty):
+            if isinstance(ty, types.UnionType):
+                sub_ty = list(get_args(ty))
+                assert len(sub_ty) == 2 and types.NoneType in sub_ty
+                sub_ty.remove(types.NoneType)
+                return sub_ty[0](v) if v != "None" else None
+            elif ty is torch.dtype:
+                ty_val = getattr(torch, v, None)
+                assert isinstance(ty_val, torch.dtype)
+                return ty_val
+            elif ty is bool:
+                return v == "True"
+            else:
+                return ty(v)
+
+        values = tuple(
+            [convert(v, f.type) for v, f in zip(str_values, fields(MoETestConfig))]
+        )
+        return MoETestConfig(*values)
+
+
+def generate_valid_test_configs(
+    backend: str,
+    ep_size: int,
+    dp_size: int,
+    tp_size: int,
+    enable_eplb: bool,
+    verbosity: int = 0,
+) -> list[MoETestConfig]:
+    configs: list[MoETestConfig] = []
+
+    for (
+        shape,
+        num_experts,
+        top_k,
+        quantization,
+        use_shared_experts,
+        use_gate,
+        use_routed_input_transform,
+    ) in product(
+        SHAPE_COMBOS,
+        NUM_EXPERTS,
+        TOP_KS,
+        QUANT_METHODS,
+        [False, True],  # shared
+        [False, True],  # gate
+        [False, True],  # routed input exform
+    ):
+        config = MoETestConfig(
+            shape[0],  # m
+            shape[1],  # n
+            shape[2],  # k
+            num_experts,
+            top_k,
+            torch.bfloat16,
+            quantization,
+            use_shared_experts,
+            use_gate,
+            use_routed_input_transform,
+            enable_eplb,
+            backend,
+            ep_size,
+            dp_size,
+            tp_size,
+        )
+
+        valid, reason = is_valid_config(config)
+        if valid:
+            configs.append(config)
+        elif verbosity > 1:
+            print(f"Skipping invalid config {config} - {reason}")
+
+    return configs
+
+
+# TODO: break this up into sections
+def is_valid_config(config: MoETestConfig) -> tuple[bool, str | None]:
+    # routed_input_transform only makes sense with shared_experts (latent MoE)
+    # TODO: not sure this is true
+    if config.use_routed_input_transform and not config.use_shared_experts:
+        return False, "routed_input_transform requires shared_experts"
+
+    # TODO: disable for now
+    if config.use_routed_input_transform and config.enable_eplb:
+        return False, "routed_input_transform not supported with EPLB."
+
+    # TODO: disable for now
+    if config.use_routed_input_transform and config.use_gate:
+        return (
+            False,
+            "routed_input_transform not supported with gate because of "
+            "padding problems",
+        )
+
+    # TODO: disable for now
+    if config.use_routed_input_transform and config.backend in [
+        "deepep_low_latency",
+        "deepep_high_throughput",
+    ]:
+        return (
+            False,
+            "routed_input_transform not supported with DeepEP backends because "
+            "of padding problems",
+        )
+
+    # routed_input_transform + quantization + high hidden dimensions
+    # TODO: Disable >= 2048 for now due to insane errors.
+    if (
+        config.use_routed_input_transform
+        and config.quantization is not None
+        and config.k >= 2048
+    ):
+        return (
+            False,
+            "routed_input_transform + quantization + higher hidden dimensions "
+            "leads to large differences.",
+        )
+
+    # Skip modelopt_fp4 if not on B100+ (compute capability 10.0+)
+    if (
+        config.quantization == "modelopt_fp4"
+        and not current_platform.has_device_capability(100)
+    ):
+        return False, "modelopt_fp4 not supported on H100+ GPUs"
+
+    # Skip flashinfer_nvlink if not on H100+ (compute capability 10.0+)
+    if (
+        config.backend is not None
+        and config.backend.startswith("flashinfer_nvlink")
+        and not current_platform.has_device_capability(90)
+    ):
+        return False, "flashinfer_nvlink needs H100+ GPUs"
+
+    # Backend-specific checks
+    if config.backend is not None:
+        supported_quants = BACKEND_SUPPORTED_QUANTS.get(config.backend)
+        if supported_quants is not None and config.quantization not in supported_quants:
+            return (
+                False,
+                f"{config.backend} does not support quantization={config.quantization}",
+            )
+
+        if config.backend == "deepep_low_latency":
+            from vllm.model_executor.layers.fused_moe.prepare_finalize.deepep_ll import (  # noqa: E501
+                DeepEPLLPrepareAndFinalize,
+            )
+
+            if config.k not in DeepEPLLPrepareAndFinalize.SUPPORTED_HIDDEN_SIZES:
+                return (
+                    False,
+                    f"Skipping unsupported K {config.k} in {config.backend} w/o EP.",
+                )
+
+        if config.backend == "nixl_ep":
+            from vllm.model_executor.layers.fused_moe.prepare_finalize.nixl_ep import (  # noqa: E501
+                NixlEPPrepareAndFinalize,
+            )
+
+            if config.k not in NixlEPPrepareAndFinalize.SUPPORTED_HIDDEN_SIZES:
+                return (
+                    False,
+                    f"Skipping unsupported K {config.k} in {config.backend} w/o EP.",
+                )
+
+    if config.backend is not None:
+        supports_ep_dp, supports_dp, supports_tp, supports_sp = (
+            BACKEND_EP_DP_TP_SUPPORT[config.backend]
+        )
+
+        if config.tp_size > 1 and not supports_tp and not config.is_sequence_parallel:
+            return False, f"{config.backend} does not support TP."
+
+        if config.dp_size > 1 and config.ep_size == 1 and not supports_dp:
+            return False, f"{config.backend} does not support DP."
+
+        if config.dp_size > 1 and config.ep_size > 1 and not supports_ep_dp:
+            return False, f"{config.backend} does not support EP/DP."
+
+        if config.is_sequence_parallel and not supports_sp:
+            return False, f"{config.backend} does not support SP."
+    else:
+        if config.tp_size > 1 or config.ep_size > 1 or config.dp_size > 1:
+            return False, "An all2all backend is required for parallelism."
+
+    # Sequence parallelism specific validations
+    if config.is_sequence_parallel:
+        if config.ep_size == 1:
+            return False, "Sequence parallelism requires EP to be enabled (ep_size > 1)"
+
+        if config.tp_size == 1:
+            return (
+                False,
+                "Sequence parallelism requires tp_size > 1 for sequence splitting",
+            )
+
+        # SP is essentially EP + sequence splitting
+        # Verify the relationship: ep_size should equal dp_size * tp_size
+        # (when pcp_size=1).
+        expected_ep_size = config.dp_size * config.tp_size
+        if config.ep_size != expected_ep_size:
+            return False, (
+                f"For sequence parallelism: ep_size ({config.ep_size}) should equal "
+                f"dp_size * tp_size ({expected_ep_size})"
+            )
+
+    if config.enable_eplb:
+        if config.ep_size == 1:
+            return False, "EPLB requires EP."
+
+        if config.quantization not in EPLB_SUPPORTED_QUANTS:
+            return False, f"EPLB not supported with {config.quantization} quantization."
+
+        if config.backend not in EPLB_SUPPORTED_BACKENDS:
+            return False, f"EPLB not supported with {config.backend}."
+
+        if config.num_experts % config.dp_size != 0:
+            return False, "EPLB requires num_experts divisible by ep_size"
+
+    return True, None
+
+
+def chunk_scales_by_rank(
+    t: torch.Tensor | None,
+    r: int,
+    w: int,
+    device: torch.device | None = None,
+) -> torch.Tensor | None:
+    if t is not None and t.numel() > 1:
+        # Calculate start index by summing chunk sizes for all previous ranks
+        # start = sum(rank_chunk(t.shape[0], i, w) for i in range(r))
+        # chunk = rank_chunk(t.shape[0], r, w)
+        # t = t[start:(start + chunk)]
+        chunk = rank_chunk(t.shape[0], r, w)
+        t = t[(r * chunk) : max(t.shape[0], (r + 1) * chunk)]
+
+    if t is not None and device is not None:
+        t = t.to(device)
+
+    return t
+
+
+def chunk_scales(
+    t: torch.Tensor | None,
+    start: int,
+    end: int,
+    device: torch.device | None = None,
+) -> torch.Tensor | None:
+    if t is not None and t.numel() > 1:
+        t = t[start:end]
+
+    if t is not None and device is not None:
+        t = t.to(device)
+
+    return t
+
+
+@dataclass
+class QuantizedWeights:
+    w13_weight: torch.Tensor
+    w2_weight: torch.Tensor
+    w13_weight_scale: torch.Tensor | None = None
+    w2_weight_scale: torch.Tensor | None = None
+    w13_weight_scale_2: torch.Tensor | None = None
+    w2_weight_scale_2: torch.Tensor | None = None
+    w13_input_scale: torch.Tensor | None = None
+    w2_input_scale: torch.Tensor | None = None
+
+
+def _quantize_fp8_halves(
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    block_shape: list[int] | None = None,
+) -> QuantizedWeights:
+    """Quantize w13 gate/up halves separately to FP8, producing per-shard scales."""
+    half = w1.shape[1] // 2
+    w1q_a, w1s_a, _ = moe_quantize_weights(
+        w1[:, :half, :],
+        None,
+        fp8_dtype,
+        False,
+        block_shape,
+    )
+    w1q_b, w1s_b, _ = moe_quantize_weights(
+        w1[:, half:, :],
+        None,
+        fp8_dtype,
+        False,
+        block_shape,
+    )
+    assert w1s_a is not None and w1s_b is not None
+
+    w2q, w2s, _ = moe_quantize_weights(w2, None, fp8_dtype, False, block_shape)
+    assert w2s is not None
+
+    if block_shape is not None:
+        # Blocked quantization: scales have shape (E, n_tiles, k_tiles)
+        # Concatenate gate and up scales along the n_tiles dimension (dim=1)
+        # to match the concatenation of gate and up weights
+        w13_weight_scale = torch.cat([w1s_a, w1s_b], dim=1)
+        # w2 scales keep their blocked shape (E, k_tiles, n_tiles)
+        w2_weight_scale = w2s
+    else:
+        # Non-blocked quantization: scales have shape (E, 1, 1)
+        # Each w1s_x is (E, 1, 1) -> reshape to (E, 1), cat to (E, 2)
+        w13_weight_scale = torch.cat([w1s_a.view(-1, 1), w1s_b.view(-1, 1)], dim=1)
+        # w2s is (E, 1, 1) -> reshape to (E,)
+        w2_weight_scale = w2s.view(-1)
+
+    return QuantizedWeights(
+        w13_weight=torch.cat([w1q_a, w1q_b], dim=1),
+        w2_weight=w2q,
+        w13_weight_scale=w13_weight_scale,
+        w2_weight_scale=w2_weight_scale,
+    )
+
+
+def quantization_to_quant_dtype(
+    quantization: str | None,
+) -> torch.dtype | str | None:
+    if quantization is None:
+        return None
+    elif quantization in ["fp8", "fp8_blocked", "modelopt_fp8"]:
+        return fp8_dtype
+    elif quantization in ["modelopt_fp4"]:
+        return "nvfp4"
+    else:
+        raise NotImplementedError(f"Unsupported quantization: {quantization}")
+
+
+def make_quant_config(
+    quantization: str | None,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    num_experts: int,
+) -> tuple[QuantizationConfig | None, QuantizedWeights]:
+    from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+
+    if quantization is None:
+        return None, QuantizedWeights(w13_weight=w1, w2_weight=w2)
+
+    if quantization == "fp8":
+        return Fp8Config(True), _quantize_fp8_halves(w1, w2)
+
+    if quantization == "fp8_blocked":
+        block_shape = [128, 128]
+        return Fp8Config(True, weight_block_size=block_shape), _quantize_fp8_halves(
+            w1, w2, block_shape
+        )
+
+    if quantization == "modelopt_fp8":
+        qw = _quantize_fp8_halves(w1, w2)
+        # why?
+        qw.w13_input_scale = torch.ones(
+            num_experts, dtype=torch.float32, device=w1.device
+        )
+        # why?
+        qw.w2_input_scale = torch.ones(
+            num_experts, dtype=torch.float32, device=w2.device
+        )
+        quant_config = ModelOptFp8Config(
+            quant_method="FP8",
+            is_checkpoint_fp8_serialized=True,
+            kv_cache_quant_method=None,
+            exclude_modules=[],
+        )
+        return quant_config, qw
+
+    if quantization == "modelopt_fp4":
+        # Quantize full w13 at once so both gate/up halves share the same
+        # global scale per expert.  process_weights_after_loading uses
+        # w13_weight_scale_2[:, 0] for the entire tensor, so the two shard
+        # scales must match.
+        w1q, w1s, w1gs = moe_quantize_weights(w1, None, "nvfp4", False, None)
+        assert w1s is not None and w1gs is not None
+
+        w2q, w2s, w2gs = moe_quantize_weights(w2, None, "nvfp4", False, None)
+        assert w2s is not None and w2gs is not None
+
+        qw = QuantizedWeights(
+            w13_weight=w1q,
+            w2_weight=w2q,
+            w13_weight_scale=w1s,
+            w2_weight_scale=w2s,
+            # weight_scale_2 = 1/w_gs: the kernel computes
+            # g_alphas = a_scale * w_scale_2, and correct dequant needs 1/w_gs.
+            # Expand per-expert scalar to (E, 2) for the two shards.
+            w13_weight_scale_2=(1.0 / w1gs).unsqueeze(1).expand(-1, 2).contiguous(),
+            w2_weight_scale_2=1.0 / w2gs,
+            w13_input_scale=torch.ones(
+                (num_experts, 2), dtype=torch.float32, device=w1.device
+            ),
+            w2_input_scale=torch.ones(
+                num_experts, dtype=torch.float32, device=w2.device
+            ),
+        )
+        quant_config = ModelOptNvFp4Config(
+            is_checkpoint_nvfp4_serialized=True,
+            kv_cache_quant_algo=None,
+            exclude_modules=[],
+        )
+        return quant_config, qw
+
+    raise NotImplementedError(f"Unsupported quantization: {quantization}")
+
+
+@dataclass
+class SharedExpertsConfig:
+    w1: torch.Tensor
+    w2: torch.Tensor
+    w1_s: torch.Tensor | None = None
+    w2_s: torch.Tensor | None = None
+    quant_dtype: torch.dtype | str | None = None
+
+
+@dataclass
+class MoETestData:
+    """Container for MOE test data and transforms."""
+
+    w1: torch.Tensor
+    w2: torch.Tensor
+    hidden_states: torch.Tensor
+    router_logits: torch.Tensor
+    shared_experts_config: SharedExpertsConfig | None
+    gate: torch.nn.Module | None
+    routed_input_transform: torch.nn.Module | None
+    routed_output_transform: torch.nn.Module | None
+    routed_expert_hidden_size: int
+
+
+class SimpleGate(torch.nn.Module):
+    """Simple gate module for testing: computes router logits from hidden states."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_experts: int,
+        dtype: torch.dtype,
+        device: str = "cuda",
+    ):
+        super().__init__()
+        self.weight = torch.nn.Parameter(
+            torch.randn(num_experts, hidden_size, device=device, dtype=dtype) / 10
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, None]:
+        """Returns (router_logits, None) to match expected signature."""
+        router_logits = torch.nn.functional.linear(hidden_states, self.weight)
+        return router_logits, None
+
+
+class SimpleRoutedInputTransform(torch.nn.Module):
+    """Simple linear transform for testing routed input transform
+    (e.g., latent projection).
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        dtype: torch.dtype,
+        device: str = "cuda",
+    ):
+        super().__init__()
+        self.weight = torch.nn.Parameter(
+            torch.randn(out_features, in_features, device=device, dtype=dtype) / 10
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.linear(x, self.weight)
+
+
+def create_shared_experts_from_config(
+    shared_experts_config: SharedExpertsConfig | None,
+    in_dtype: torch.dtype,
+    tp_size: int = 1,
+    tp_rank: int = 0,
+    is_sequence_parallel: bool = False,
+    device: torch.device | str | None = "cuda",
+) -> TestMLP | None:
+    """Create TestMLP for shared experts from config.
+
+    Args:
+        shared_experts_config: Configuration for shared experts
+        in_dtype: Output data type
+        tp_size: Tensor parallel size (for weight chunking)
+        tp_rank: Tensor parallel rank (for weight chunking)
+        device: Device to move weights to (optional)
+
+    Returns:
+        TestMLP instance or None if config is None
+    """
+    if shared_experts_config is None:
+        return None
+
+    s_w1 = shared_experts_config.w1
+    s_w2 = shared_experts_config.w2
+
+    # Apply TP chunking if needed
+    if tp_size > 1 and not is_sequence_parallel:
+        s_w1 = tp_chunk_gate_up(s_w1, tp_rank, tp_size, dim=1, device=device)
+        s_w2 = chunk_by_rank(s_w2, tp_rank, tp_size, dim=0, device=device)
+    else:
+        s_w1 = s_w1.to(device)
+        s_w2 = s_w2.to(device)
+
+    return TestMLP(w1=s_w1, w2=s_w2, out_dtype=in_dtype)
+
+
+# Make version that takes a MoETestConfig?
+def setup_moe_test_data(
+    m: int,
+    k: int,
+    n: int,
+    num_experts: int,
+    in_dtype: torch.dtype,
+    use_shared_experts: bool,
+    use_gate: bool,
+    use_routed_input_transform: bool,
+    backend: str | None,
+    device: str = "cuda",
+) -> MoETestData:
+    """Setup test data and transforms for MOE tests.
+
+    Args:
+        m: Number of tokens
+        k: Hidden size
+        n: Intermediate size
+        num_experts: Number of experts
+        in_dtype: Data type for tensors
+        use_shared_experts: Whether to create shared experts config
+        use_gate: Whether to create gate module
+        use_routed_input_transform: Whether to create routed input/output transforms
+        device: Device to create tensors on ("cuda" or "cpu")
+
+    Returns:
+        MoETestData containing all test data and transforms
+    """
+    # For latent MoE: latent_size = k // 2
+    latent_size = k // 2
+
+    # k = maybe_roundup_layer_hidden_size(k, in_dtype, backend)
+    # latent_size = maybe_roundup_layer_hidden_size(latent_size, in_dtype, backend)
+
+    # Determine dimensions for routed experts (may be transformed)
+    # For latent MoE, routed experts operate entirely in latent space
+    # (k//2). The routed_output_transform then projects back to k before
+    # adding with shared experts.
+    # w1: (E, 2*N, latent_size) - input latent_size
+    # w2: (E, latent_size, N) - output latent_size (fused_experts returns
+    # same shape as input)
+    routed_expert_hidden_size = latent_size if use_routed_input_transform else k
+
+    # Create expert weights
+    (w1, _, _, _), (w2, _, _, _) = make_test_weights(
+        num_experts,
+        n,
+        routed_expert_hidden_size,  # Both w1 input and w2 output use latent_size
+        in_dtype=in_dtype,
+    )
+
+    # Create shared experts config if needed
+    if use_shared_experts:
+        shared_experts_config = SharedExpertsConfig(
+            w1=torch.randn((k, n * 2), device=device, dtype=in_dtype) / 15,
+            w2=torch.randn((n, k), device=device, dtype=in_dtype) / 15,
+        )
+    else:
+        shared_experts_config = None
+
+    # Create routed input transform if needed
+    routed_input_transform = (
+        SimpleRoutedInputTransform(k, latent_size, in_dtype, device=device)
+        if use_routed_input_transform
+        else None
+    )
+
+    # Create gate if needed
+    # Note: gate is called AFTER routed_input_transform, so it should expect
+    # the transformed dimension (latent_size) when routed_input_transform is used
+    gate_input_dim = latent_size if use_routed_input_transform else k
+    gate = (
+        SimpleGate(gate_input_dim, num_experts, in_dtype, device=device)
+        if use_gate
+        else None
+    )
+
+    # Create routed output transform if needed (projects latent space back to original)
+    routed_output_transform = (
+        SimpleRoutedInputTransform(latent_size, k, in_dtype, device=device)
+        if use_routed_input_transform
+        else None
+    )
+
+    # Create test inputs
+    hidden_states = torch.randn((m, k), device=device, dtype=in_dtype) / 10
+    router_logits = torch.randn((m, num_experts), device=device, dtype=in_dtype)
+
+    return MoETestData(
+        w1=w1,
+        w2=w2,
+        hidden_states=hidden_states,
+        router_logits=router_logits,
+        shared_experts_config=shared_experts_config,
+        gate=gate,
+        routed_input_transform=routed_input_transform,
+        routed_output_transform=routed_output_transform,
+        routed_expert_hidden_size=routed_expert_hidden_size,
+    )
+
+
+def make_fused_moe_layer(
+    quantization: str | None,
+    use_ep: bool,
+    hidden_size: int,
+    intermediate_size: int,
+    in_dtype: torch.dtype,
+    tp_size: int,
+    ep_size: int,
+    dp_size: int,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    top_k: int,
+    global_num_experts: int,
+    renormalize: bool = False,
+    shared_experts: torch.nn.Module | None = None,
+    use_grouped_topk: bool = False,
+    topk_group: int | None = None,
+    num_expert_group: int | None = None,
+    custom_routing_function: Callable | None = None,
+    scoring_func: str = "softmax",
+    routed_scaling_factor: float = 1.0,
+    e_score_correction_bias: torch.Tensor | None = None,
+    apply_router_weight_on_input: bool = False,
+    activation: str = "silu",
+    indices_type: torch.dtype | None = None,
+    expert_map: torch.Tensor | None = None,
+    enable_eplb: bool = False,
+    expert_load_view: torch.Tensor | None = None,
+    logical_to_physical_map: torch.Tensor | None = None,
+    logical_replica_count: torch.Tensor | None = None,
+    num_redundant_experts: int = 0,
+    has_bias: bool = False,
+    gate: torch.nn.Module | None = None,
+    routed_input_transform: torch.nn.Module | None = None,
+    routed_output_transform: torch.nn.Module | None = None,
+    pcp_size: int | None = 1,
+    is_sequence_parallel: bool = False,
+) -> FusedMoE:
+    quant_config, qw = make_quant_config(quantization, w1, w2, global_num_experts)
+
+    kwargs = dict()
+    kwargs["shared_experts"] = shared_experts
+
+    # Add gate and routed_input_transform if provided
+    if gate is not None:
+        kwargs["gate"] = gate
+
+    if routed_input_transform is not None:
+        kwargs["routed_input_transform"] = routed_input_transform
+        kwargs["routed_output_transform"] = routed_output_transform
+
+    layer = FusedMoE(
+        num_experts=global_num_experts,
+        top_k=top_k,
+        hidden_size=hidden_size,
+        intermediate_size=intermediate_size,
+        params_dtype=in_dtype,
+        renormalize=renormalize,
+        use_grouped_topk=use_grouped_topk,
+        num_expert_group=num_expert_group,
+        topk_group=topk_group,
+        quant_config=quant_config,
+        tp_size=tp_size,
+        ep_size=ep_size,
+        dp_size=dp_size,
+        pcp_size=pcp_size,
+        prefix="from_forward_context",
+        custom_routing_function=custom_routing_function,
+        scoring_func=scoring_func,
+        routed_scaling_factor=routed_scaling_factor,
+        e_score_correction_bias=e_score_correction_bias,
+        apply_router_weight_on_input=apply_router_weight_on_input,
+        activation=activation,
+        enable_eplb=enable_eplb,
+        num_redundant_experts=num_redundant_experts,
+        has_bias=has_bias,
+        is_sequence_parallel=is_sequence_parallel,
+        **kwargs,
+    )
+
+    weight_scale_name = getattr(layer.quant_method, "weight_scale_name", "weight_scale")
+
+    for name, value in [
+        ("w13_weight", qw.w13_weight),
+        ("w2_weight", qw.w2_weight),
+        (f"w13_{weight_scale_name}", qw.w13_weight_scale),
+        (f"w2_{weight_scale_name}", qw.w2_weight_scale),
+        ("w13_weight_scale_2", qw.w13_weight_scale_2),
+        ("w2_weight_scale_2", qw.w2_weight_scale_2),
+        ("w13_input_scale", qw.w13_input_scale),
+        ("w2_input_scale", qw.w2_input_scale),
+    ]:
+        if value is not None:
+            layer.register_parameter(
+                name, torch.nn.Parameter(value, requires_grad=False)
+            )
+
+    layer.quant_method.process_weights_after_loading(layer)
+
+    return layer
+
+
+def make_fake_moe_layer(
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    top_k: int,
+    global_num_experts: int,
+    in_dtype: torch.dtype,
+    quantization: str | None,
+    renormalize: bool = False,
+    shared_experts_config: SharedExpertsConfig | None = None,
+    use_grouped_topk: bool = False,
+    topk_group: int | None = None,
+    num_expert_group: int | None = None,
+    custom_routing_function: Callable | None = None,
+    scoring_func: str = "softmax",
+    routed_scaling_factor: float = 1.0,
+    e_score_correction_bias: torch.Tensor | None = None,
+    apply_router_weight_on_input: bool = False,
+    activation: str = "silu",
+    indices_type: torch.dtype | None = None,
+    expert_map: torch.Tensor | None = None,
+    expert_load_view: torch.Tensor | None = None,
+    logical_to_physical_map: torch.Tensor | None = None,
+    logical_replica_count: torch.Tensor | None = None,
+    gate: torch.nn.Module | None = None,
+    routed_input_transform: torch.nn.Module | None = None,
+    routed_output_transform: torch.nn.Module | None = None,
+    use_ep: bool = False,
+    tp_size: int = 1,
+    dp_size: int = 1,
+    ep_size: int = 1,
+    is_sequence_parallel: bool = False,
+) -> Callable:
+    quant_dtype = None
+    activation = MoEActivation.from_str(activation)
+
+    router = create_fused_moe_router(
+        top_k=top_k,
+        global_num_experts=global_num_experts,
+        renormalize=renormalize,
+        use_grouped_topk=use_grouped_topk,
+        num_expert_group=num_expert_group,
+        topk_group=topk_group,
+        custom_routing_function=custom_routing_function,
+        scoring_func=scoring_func,
+        routed_scaling_factor=routed_scaling_factor,
+        e_score_correction_bias=e_score_correction_bias,
+        num_fused_shared_experts=0,  # TODO
+        # TODO(bnell): once we can construct the MK at init time, we
+        # can make this a value.
+        indices_type_getter=lambda: indices_type,
+    )
+
+    if quant_dtype is not None:
+        w1, w1_s, _ = moe_quantize_weights(w1, None, quant_dtype, False, None)
+        w2, w2_s, _ = moe_quantize_weights(w2, None, quant_dtype, False, None)
+    else:
+        w1_s = None
+        w2_s = None
+
+    shared_experts = create_shared_experts_from_config(
+        shared_experts_config,
+        in_dtype,
+    )
+
+    quant_config = FusedMoEQuantConfig.make(
+        quant_dtype,
+        w1_scale=w1_s,
+        w2_scale=w2_s,
+    )
+
+    def _moe(
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+    ) -> torch.Tensor:
+        # Save original hidden_states for shared experts (before transform)
+        original_hidden_states = hidden_states
+
+        # Apply routed input transform if provided
+        if routed_input_transform is not None:
+            hidden_states = routed_input_transform(hidden_states)
+
+        # If gate provided, compute router_logits from hidden_states
+        # Note: gate operates on transformed hidden_states (after
+        # routed_input_transform)
+        if gate is not None:
+            router_logits, _ = gate(hidden_states)
+
+        topk_weights, topk_ids = router.select_experts(
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+        )
+
+        # Shared experts use original (untransformed) hidden_states
+        if shared_experts is not None:
+            shared_output = shared_experts(original_hidden_states)
+        else:
+            shared_output = None
+
+        # Routed experts use transformed hidden_states
+        output = fused_experts(
+            hidden_states=hidden_states,
+            w1=w1,
+            w2=w2,
+            quant_config=quant_config,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            inplace=False,
+            activation=activation,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+            global_num_experts=global_num_experts,
+            expert_map=expert_map,
+        )
+
+        # Apply routed output transform if provided
+        # (e.g., latent space -> original space)
+        if routed_output_transform is not None:
+            output = routed_output_transform(output)
+
+        if shared_experts is not None:
+            assert shared_output is not None
+            output += shared_output
+
+        # Apply TP/DP reduction if not already reduced
+        # if (tp_size > 1 or dp_size > 1):
+        #    output = tensor_model_parallel_all_reduce(output)
+
+        return output
+
+    return _moe
+
+
+def _test_body_regular(
+    moe_layer: FusedMoE,
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    vllm_config: VllmConfig,
+    num_tokens: int,
+    num_tokens_across_dp: torch.Tensor,
+    **kwargs,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Regular MoE test body: compare layer output to baseline."""
+    baseline_output = kwargs["baseline_output"]
+
+    with set_forward_context(
+        None,
+        vllm_config,
+        num_tokens=num_tokens,
+        num_tokens_across_dp=num_tokens_across_dp,
+    ):
+        output = sp_wrapper(moe_layer)(hidden_states, router_logits)
+
+    return baseline_output, output
+
+
+def _test_body_eplb(
+    moe_layer: FusedMoE,
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    vllm_config: VllmConfig,
+    num_tokens: int,
+    num_tokens_across_dp: torch.Tensor,
+    cpu_group,
+    in_dtype: torch.dtype,
+    quantization: str | None,
+    use_ep: bool,
+    tp_size: int,
+    ep_size: int,
+    dp_size: int,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    num_experts: int,
+    k: int,
+    n: int,
+    top_k: int,
+    shared_experts,
+    gate: torch.nn.Module | None,
+    routed_input_transform: torch.nn.Module | None,
+    routed_output_transform: torch.nn.Module | None,
+    **kwargs,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    device = torch.accelerator.current_accelerator()
+
+    is_sequence_parallel = moe_layer.is_sequence_parallel
+
+    """EPLB test body: compare output before and after expert weight rearrangement."""
+    # Get "before" output with original weight arrangement
+    with set_forward_context(
+        None,
+        vllm_config,
+        num_tokens=num_tokens,
+        num_tokens_across_dp=num_tokens_across_dp,
+    ):
+        output_before = sp_wrapper(moe_layer)(hidden_states, router_logits)
+
+    # Create a fresh FusedMoE layer with enable_eplb=True
+    # Delete the original layer's registration so the constructor can
+    # re-use the same "from_forward_context" prefix
+    cc = vllm_config.compilation_config
+    del cc.static_forward_context["from_forward_context"]
+    cc.static_all_moe_layers.remove("from_forward_context")
+
+    # Determine hidden size for MoE layer
+    # When using routed_input_transform, experts operate in latent space
+    hidden_size_for_layer = k // 2 if routed_input_transform is not None else k
+
+    eplb_moe_layer = make_fused_moe_layer(
+        quantization=quantization,
+        use_ep=use_ep,
+        hidden_size=hidden_size_for_layer,
+        intermediate_size=n,
+        in_dtype=in_dtype,
+        tp_size=tp_size,
+        ep_size=ep_size,
+        dp_size=dp_size,
+        w1=w1,
+        w2=w2,
+        top_k=top_k,
+        global_num_experts=num_experts,
+        shared_experts=shared_experts,
+        enable_eplb=True,
+        gate=gate,
+        routed_input_transform=routed_input_transform,
+        routed_output_transform=routed_output_transform,
+        is_sequence_parallel=is_sequence_parallel,
+    )
+
+    if eplb_moe_layer._expert_map is not None:
+        eplb_moe_layer._expert_map = eplb_moe_layer._expert_map.to(device)
+
+    # All ranks must generate the same permutation
+    initial_indices = torch.arange(num_experts, dtype=torch.long)
+    shuffled_indices = initial_indices[torch.randperm(num_experts)]
+
+    expert_weights = [list(eplb_moe_layer.get_expert_weights())]
+
+    communicator = create_eplb_communicator(
+        group_coordinator=get_eplb_group(),
+        backend=vllm_config.parallel_config.eplb_config.communicator,
+        expert_weights=expert_weights[0],
+    )
+
+    # Rearrange expert weights across EP ranks
+    rearrange_expert_weights_inplace(
+        old_global_expert_indices=initial_indices.unsqueeze(0),
+        new_global_expert_indices=shuffled_indices.unsqueeze(0),
+        expert_weights=expert_weights,
+        ep_group=cpu_group,
+        communicator=communicator,
+    )
+
+    # Build logical_to_physical_map from shuffled_indices
+    # shuffled_indices[physical] = logical, we need the inverse
+    logical_to_physical = torch.empty(num_experts, dtype=torch.int32, device=device)
+    logical_to_physical[shuffled_indices.to(device)] = torch.arange(
+        num_experts, dtype=torch.int32, device=device
+    )
+
+    eplb_moe_layer.set_eplb_state(
+        moe_layer_idx=0,
+        expert_load_view=torch.zeros(
+            (1, num_experts),
+            dtype=torch.int32,
+            device=device,
+        ),
+        logical_to_physical_map=logical_to_physical.reshape(num_experts, 1).unsqueeze(
+            0
+        ),
+        logical_replica_count=torch.ones(
+            (1, num_experts),
+            dtype=torch.int32,
+            device=device,
+        ),
+    )
+
+    eplb_moe_layer.eplb_state.should_record_tensor = torch.ones(
+        (), dtype=torch.bool, device=device
+    )
+
+    # Get "after" output with rearranged weights and EPLB routing
+    with set_forward_context(
+        None,
+        vllm_config,
+        num_tokens=num_tokens,
+        num_tokens_across_dp=num_tokens_across_dp,
+    ):
+        output_after = sp_wrapper(eplb_moe_layer)(hidden_states, router_logits)
+
+    return output_before, output_after
+
+
+# TODO: make this take a MoETestConfig
+def _run_one_config(
+    vllm_config: VllmConfig,
+    ep_size: int,  # Expert parallel size (total across all ranks)
+    dp_size: int,  # Data parallel size (number of DP groups)
+    tp_size: int,  # Tensor parallel size OR sequence parallel size (when use_ep=True)
+    dp_rank: int,  # Current rank in data parallel dimension
+    tp_rank: int,  # Current rank in tensor/sequence parallel dimension
+    is_sequence_parallel: bool,  # Whether to use sequence parallelism
+    m: int,
+    n: int,
+    k: int,
+    num_experts: int,
+    top_k: int,
+    quantization: str | None,
+    backend: str | None,
+    test_body_fn: Callable,
+    use_shared_experts: bool,
+    use_gate: bool,
+    use_routed_input_transform: bool,
+    **kwargs,
+) -> None:
+    """Generic test loop that sets up environment and delegates to test_body_fn.
+
+    Parameter Interpretation:
+    - When is_sequence_parallel=False (standard TP or EP):
+      * ep_size: Number of expert parallel ranks (or 1 if no EP)
+      * tp_size: Number of tensor parallel ranks (or 1 if no TP)
+      * Weights are chunked by ep_size (experts) and tp_size (tensors)
+
+    - When is_sequence_parallel=True (EP + sequence splitting):
+      * ep_size: Number of expert parallel ranks (equals dp_size * tp_size)
+      * tp_size: Number of ranks to split sequence across (becomes sp_size in FusedMoE)
+      * Weights are chunked by ep_size (experts) but NOT by tp_size
+      * Input sequences are chunked by tp_size (via sp_wrapper)
+    """
+    set_random_seed(7)
+
+    use_ep = ep_size > 1
+
+    assert vllm_config.parallel_config.enable_expert_parallel == use_ep
+
+    in_dtype = torch.bfloat16
+    device = torch.accelerator.current_accelerator()
+
+    if not is_workspace_manager_initialized():
+        init_workspace_manager(device)
+
+    # Create test data and transforms
+    test_data = setup_moe_test_data(
+        m=m,
+        k=k,
+        n=n,
+        num_experts=num_experts,
+        in_dtype=in_dtype,
+        use_shared_experts=use_shared_experts,
+        use_gate=use_gate,
+        use_routed_input_transform=use_routed_input_transform,
+        backend=backend,
+        device=device,
+    )
+
+    # Extract data from test_data
+    hidden_states = test_data.hidden_states
+    router_logits = test_data.router_logits
+    w1 = test_data.w1
+    w2 = test_data.w2
+    shared_experts_config = test_data.shared_experts_config
+    gate = test_data.gate
+    routed_input_transform = test_data.routed_input_transform
+    routed_output_transform = test_data.routed_output_transform
+    activation = "silu"
+
+    # Create baseline layer with FULL weights (no EP chunking)
+    # Baseline represents the expected output using full model
+    baseline_layer = make_fake_moe_layer(
+        w1=w1,
+        w2=w2,
+        top_k=top_k,
+        global_num_experts=num_experts,
+        in_dtype=in_dtype,
+        quantization=quantization,
+        renormalize=False,
+        shared_experts_config=shared_experts_config,
+        gate=gate,
+        routed_input_transform=routed_input_transform,
+        routed_output_transform=routed_output_transform,
+        use_ep=use_ep,
+        tp_size=tp_size,
+        ep_size=ep_size,
+        dp_size=dp_size,
+        activation=activation,
+        is_sequence_parallel=is_sequence_parallel,
+    )
+
+    with set_current_vllm_config(vllm_config):
+        # Compute baseline output with SP wrapper if needed
+        # sp_wrapper handles sequence chunking/gathering for SP
+        baseline_output = sp_wrapper(baseline_layer, is_sequence_parallel)(
+            hidden_states, router_logits
+        )
+
+    del baseline_layer
+    torch.accelerator.empty_cache()
+
+    with set_current_vllm_config(vllm_config):
+        # Chunk weights for EP BEFORE creating FusedMoE
+        # FusedMoE uses EP-chunked weights and handles reductions internally
+        if ep_size > 1:
+            # Split experts across ranks (dimension 0 is the expert dimension)
+            # When EP is enabled, use EP group rank and ep_size for chunking
+            ep_rank = get_ep_group().rank_in_group
+            w1 = chunk_by_rank(w1, ep_rank, ep_size, dim=0, device=device)
+            w2 = chunk_by_rank(w2, ep_rank, ep_size, dim=0, device=device)
+
+        # Chunk weights for TP (only if NOT doing sequence parallelism)
+        # Sequence parallelism splits tokens/sequences, not weight tensors
+        if tp_size > 1 and not is_sequence_parallel:
+            w1 = tp_chunk_gate_up(w1, tp_rank, tp_size, dim=1, device=device)
+            w2 = chunk_by_rank(w2, tp_rank, tp_size, dim=2, device=device)
+
+        # Setup shared experts if needed
+        # In SP mode, shared experts should NOT be TP-chunked (same as routed experts)
+        # tp_size is used for sequence splitting, not weight splitting
+        shared_experts = create_shared_experts_from_config(
+            shared_experts_config,
+            in_dtype,
+            tp_size,
+            tp_rank,
+            is_sequence_parallel,
+            device,
+        )
+
+        # Determine hidden size for MoE layer
+        # When using routed_input_transform, experts operate in latent space
+        hidden_size_for_layer = k // 2 if routed_input_transform is not None else k
+
+        # Create initial MoE layer
+        moe_layer = make_fused_moe_layer(
+            quantization=quantization,
+            use_ep=use_ep,
+            hidden_size=hidden_size_for_layer,
+            intermediate_size=n,
+            in_dtype=in_dtype,
+            tp_size=tp_size,
+            ep_size=ep_size,
+            dp_size=dp_size,
+            w1=w1,
+            w2=w2,
+            top_k=top_k,
+            global_num_experts=num_experts,
+            shared_experts=shared_experts,
+            gate=gate,
+            routed_input_transform=routed_input_transform,
+            routed_output_transform=routed_output_transform,
+            activation=activation,
+            is_sequence_parallel=is_sequence_parallel,
+        )
+
+        if moe_layer._expert_map is not None:
+            moe_layer._expert_map = moe_layer._expert_map.to(device)
+
+        num_tokens = m
+        # num_tokens_across_dp should have one entry per DP group, not per total rank
+        # When EP is enabled, dp_size represents the number of DP groups
+        num_tokens_across_dp = torch.tensor(
+            [num_tokens] * dp_size,
+            device=device,
+            dtype=torch.int,
+        )
+
+        # Call the test body function with all necessary context
+        expected, actual = test_body_fn(
+            moe_layer=moe_layer,
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+            vllm_config=vllm_config,
+            num_tokens=num_tokens,
+            num_tokens_across_dp=num_tokens_across_dp,
+            in_dtype=in_dtype,
+            quantization=quantization,
+            use_ep=use_ep,
+            tp_size=tp_size,
+            ep_size=ep_size,
+            dp_size=dp_size,
+            w1=w1,
+            w2=w2,
+            num_experts=num_experts,
+            k=k,
+            n=n,
+            m=m,
+            top_k=top_k,
+            shared_experts=shared_experts,
+            gate=gate,
+            routed_input_transform=routed_input_transform,
+            routed_output_transform=routed_output_transform,
+            baseline_output=baseline_output,
+            **kwargs,
+        )
+
+    # Common tolerance logic
+    # TODO: consider associating tolerances with quant methods.
+    if quantization is None:
+        if k >= 2048:
+            atol, rtol = 7.6e-2, 7.6e-2
+        else:
+            atol, rtol = 3.5e-2, 3.5e-2
+    elif quantization in ("fp8", "fp8_blocked", "modelopt_fp8"):
+        atol, rtol = 6.5e-2, 6.5e-2
+    elif quantization == "modelopt_fp4":
+        if k >= 2048:
+            atol = rtol = 1e-1 + (k * 1e-4)
+        else:
+            atol = rtol = 1e-1
+
+        if backend == "allgather_reducescatter" and tp_size > 1:
+            atol += 2e-1
+            rtol += 2e-1
+    else:
+        atol, rtol = 6e-2, 6e-2
+
+    torch.accelerator.synchronize()  # TODO: Is this needed?
+    torch.testing.assert_close(expected, actual, atol=atol, rtol=rtol)
+
+
+# Test for non-parallel cases (world_size == 1) - backend doesn't matter
+@pytest.mark.parametrize("m, n, k", SHAPE_COMBOS)
+@pytest.mark.parametrize("num_experts", NUM_EXPERTS)
+@pytest.mark.parametrize("top_k", TOP_KS)
+@pytest.mark.parametrize("quantization", QUANT_METHODS)
+@pytest.mark.parametrize("use_shared_experts", [False, True])
+@pytest.mark.parametrize("use_gate", [False, True])
+@pytest.mark.parametrize("use_routed_input_transform", [False, True])
+def test_moe_layer_no_parallel(
+    m: int,
+    n: int,
+    k: int,
+    num_experts: int,
+    top_k: int,
+    quantization: str | None,
+    use_shared_experts: bool,
+    use_gate: bool,
+    use_routed_input_transform: bool,
+    monkeypatch,
+):
+    """Test MoE layer without parallelism (dp_size=1, tp_size=1, use_ep=False)."""
+
+    if os.environ.get("VLLM_LOGGING_LEVEL") is None:
+        monkeypatch.setenv("VLLM_LOGGING_LEVEL", "ERROR")
+
+    # Needed since weights will already be in e4m3fnuz format and the
+    # normalize_e4m3fn_to_e4m3fnuz() function is not being tested here.
+    if current_platform.is_fp8_fnuz():
+        override_normalize_e4m3fn_to_e4m3fnuz()
+
+    test_config = MoETestConfig(
+        m,
+        n,
+        k,
+        num_experts,
+        top_k,
+        torch.bfloat16,
+        quantization,
+        use_shared_experts,
+        use_gate,
+        use_routed_input_transform,
+    )
+
+    valid, reason = is_valid_config(test_config)
+    if not valid:
+        pytest.skip(reason)
+
+    set_random_seed(7)
+
+    parallel_config = ParallelConfig()
+    compilation_config = CompilationConfig()
+    compilation_config.pass_config.fuse_allreduce_rms = False
+
+    vllm_config = VllmConfig(
+        parallel_config=parallel_config, compilation_config=compilation_config
+    )
+
+    # Initialize distributed environment for single GPU
+    _set_vllm_config(vllm_config, 1, rank=0, local_rank=0)
+
+    _run_one_config(
+        vllm_config,
+        test_config.ep_size,
+        test_config.dp_size,
+        test_config.tp_size,
+        0,
+        0,
+        False,
+        test_config.m,
+        test_config.n,
+        test_config.k,
+        test_config.num_experts,
+        test_config.top_k,
+        test_config.quantization,
+        test_config.backend,
+        _test_body_regular,
+        use_shared_experts=test_config.use_shared_experts,
+        use_gate=test_config.use_gate,
+        use_routed_input_transform=test_config.use_routed_input_transform,
+    )
+
+
+def _test_body_config(test_config: MoETestConfig, cpu_group, **kwargs):
+    if not test_config.enable_eplb:
+        return _test_body_regular(**kwargs)
+    else:
+        return _test_body_eplb(**kwargs, cpu_group=cpu_group)
+
+
+def _parallel_worker(
+    pgi: ProcessGroupInfo,
+    vllm_config: VllmConfig,
+    cpu_group,
+    test_configs: list[MoETestConfig],
+    verbosity: int,
+    **kwargs,
+) -> None:
+    set_random_seed(7)
+
+    total = 0
+    passed = 0
+    failed = 0
+    fail_ids = []
+
+    dp_rank = vllm_config.parallel_config.data_parallel_rank
+
+    if current_platform.is_fp8_fnuz():
+        override_normalize_e4m3fn_to_e4m3fnuz()
+
+    for test_config in test_configs:
+        cc = vllm_config.compilation_config
+        if "from_forward_context" in cc.static_forward_context:
+            del cc.static_forward_context["from_forward_context"]
+            cc.static_all_moe_layers.remove("from_forward_context")
+
+        tp_rank = pgi.rank % test_config.tp_size
+
+        if verbosity > 0:
+            print(f"subtest: {test_config.id()}", end="")
+
+        try:
+            _run_one_config(
+                vllm_config,
+                test_config.ep_size,
+                test_config.dp_size,
+                test_config.tp_size,
+                dp_rank,
+                tp_rank,
+                test_config.is_sequence_parallel,
+                test_config.m,
+                test_config.n,
+                test_config.k,
+                test_config.num_experts,
+                test_config.top_k,
+                test_config.quantization,
+                test_config.backend,
+                functools.partial(
+                    _test_body_config, test_config=test_config, cpu_group=cpu_group
+                ),
+                use_shared_experts=test_config.use_shared_experts,
+                use_gate=test_config.use_gate,
+                use_routed_input_transform=test_config.use_routed_input_transform,
+            )
+            if verbosity > 0:
+                print(" PASSED")
+            else:
+                print(".", end="")
+            passed = passed + 1
+        except Exception as ex:
+            fail_ids.append(test_config.id())
+            failed = failed + 1
+            if verbosity > 0:
+                traceback.print_exc()
+                print(f"\n{str(ex)}\nFAILED")
+            else:
+                print("F", end="")
+        finally:
+            # DeepEP managers are not reliably reusable across many subtests in
+            # a single worker process. Tear them down after each DeepEP case so
+            # later subtests do not inherit stale communication state.
+            if test_config.backend in {
+                "deepep_low_latency",
+                "deepep_high_throughput",
+            }:
+                torch.accelerator.synchronize()
+                all2all_manager = get_ep_group().device_communicator.all2all_manager
+                if all2all_manager is not None:
+                    all2all_manager.destroy()
+            total = total + 1
+            torch.distributed.barrier()
+
+    skipped = total - (passed + failed)
+
+    fails = f"{failed} failed" if failed > 0 else ""
+    sep = ", " if fails != "" else ""
+    skips = f"{sep}{skipped} skipped" if skipped > 0 else ""
+    sep = ", " if skips != "" or fails != "" else ""
+    passes = f"{sep}{passed} passed" if passed > 0 else ""
+
+    report = (
+        f"============= {fails}{skips}{passes} of {total} total tests ============="
+    )
+
+    sep = "\n" if verbosity == 0 else ""
+    print(f"{sep}{report}")
+
+    if failed > 0:
+        fail_ids_str = "\n".join(fail_ids)
+        raise RuntimeError(
+            f"\n============= Failed subtests =============\n{fail_ids_str}\n{report}"
+        )
+
+
+# TODO: add cudagraphs/torch.compile tests
+@pytest.mark.parametrize("dp_size, tp_size, use_ep", PARALLEL_COMBOS)
+@pytest.mark.parametrize("backend", BACKENDS)
+@pytest.mark.parametrize("enable_eplb", [False, True])
+def test_moe_layer(
+    dp_size: int,
+    tp_size: int,
+    use_ep: bool,
+    backend: str,
+    enable_eplb: bool,
+    monkeypatch,
+    pytestconfig,
+    subtests,
+):
+    """Test MoE layer with parallelism (multi-GPU or TP/EP enabled).
+
+    For non-parallel cases (world_size == 1), use test_moe_layer_no_parallel instead.
+    """
+    num_gpus = current_platform.device_count()
+    world_size = tp_size * dp_size
+    # When use_ep=True: FusedMoEParallelConfig flattens tp_size across dp ranks
+    # Result: ep_size = dp_size * pcp_size * tp_size
+    # Since pcp_size=1 in these tests: ep_size = dp_size * tp_size = world_size
+    # When use_ep=False: no expert parallelism, ep_size = 1
+    ep_size = 1 if not use_ep else world_size
+
+    assert world_size > 1
+
+    # Check if enough GPUs available
+    if world_size is not None and num_gpus is not None and world_size > num_gpus:
+        pytest.skip(f"Not enough GPUs got {num_gpus}, expected {world_size}.")
+
+    if enable_eplb and not use_ep:
+        pytest.skip("EPLB requires EP.")
+
+    verbosity = pytestconfig.getoption("verbose")
+
+    if os.environ.get("VLLM_LOGGING_LEVEL") is None:
+        monkeypatch.setenv("VLLM_LOGGING_LEVEL", "ERROR")
+
+    # TODO
+    # VLLM_FLASHINFER_MOE_BACKEND=latency
+    # VLLM_USE_FLASHINFER_MOE_FP16=1
+    # VLLM_USE_FLASHINFER_MOE_FP8
+    # VLLM_USE_FLASHINFER_MOE_FP4
+    # VLLM_USE_FLASHINFER_MOE_INT4
+
+    parallel_config = ParallelConfig(
+        pipeline_parallel_size=1,
+        data_parallel_size=dp_size,
+        tensor_parallel_size=tp_size,
+        enable_expert_parallel=use_ep,
+        all2all_backend=backend,
+        enable_eplb=enable_eplb,
+    )
+
+    compilation_config = CompilationConfig()
+    # compilation_config.mode = CompilationMode.NONE  # for now
+    compilation_config.pass_config.fuse_allreduce_rms = False  # for now
+
+    vllm_config = VllmConfig(
+        parallel_config=parallel_config,
+        compilation_config=compilation_config,
+        scheduler_config=SchedulerConfig.default_factory(
+            max_num_batched_tokens=next_power_of_2(MAX_M)
+        ),
+    )
+
+    test_configs = generate_valid_test_configs(
+        backend, ep_size, dp_size, tp_size, enable_eplb, verbosity
+    )
+
+    if subtests is not None:
+        new_test_configs = []
+        for subtest in subtests.split(","):
+            sub_test_config = MoETestConfig.from_id(subtest)
+            if sub_test_config in test_configs:
+                new_test_configs.append(sub_test_config)
+            else:
+                pytest.skip(
+                    f"subtest config {subtest} does not match any valid test "
+                    "configuration"
+                )
+        test_configs = new_test_configs
+
+    if len(test_configs) == 0:
+        pytest.skip("No supported configs found for this testpoint.")
+
+    try:
+        parallel_launch_with_config(
+            world_size,
+            _parallel_worker,
+            vllm_config,
+            None,
+            test_configs,
+            verbosity,
+        )
+    finally:
+        torch.accelerator.synchronize()  # TODO: Is this needed?
+        torch.accelerator.empty_cache()
diff --git a/tests/kernels/moe/test_moe_permute_unpermute.py b/tests/kernels/moe/test_moe_permute_unpermute.py
index 92126171a17b..5aafb89589fd 100644
--- a/tests/kernels/moe/test_moe_permute_unpermute.py
+++ b/tests/kernels/moe/test_moe_permute_unpermute.py
@@ -10,7 +10,9 @@
 import torch
 
 from vllm.model_executor.layers.fused_moe import fused_topk
-from vllm.model_executor.layers.fused_moe.layer import determine_expert_map
+from vllm.model_executor.layers.fused_moe.expert_map_manager import (
+    determine_expert_map,
+)
 from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
     moe_permute,
     moe_permute_unpermute_supported,
diff --git a/tests/kernels/moe/test_moe_weight_loading_padded.py b/tests/kernels/moe/test_moe_weight_loading_padded.py
new file mode 100644
index 000000000000..abe473879f1d
--- /dev/null
+++ b/tests/kernels/moe/test_moe_weight_loading_padded.py
@@ -0,0 +1,327 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for FusedMoE weight loading with padded hidden dimensions.
+
+When using DeepEP backends or NIXL EP with models like nemotron_h,
+hidden_size may be rounded up (e.g., 2688 -> 3072) for backend requirements.
+Weight parameters are created with the padded size, but checkpoint weights
+have the original unpadded size. These tests verify that weight loading
+correctly handles this mismatch.
+"""
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+
+
+class TestGetHiddenDim:
+    """Unit tests for _get_hidden_dim."""
+
+    def test_2d_non_transposed_w2(self):
+        # w2: shard_dim=1 (intermediate), hidden=0
+        assert FusedMoE._get_hidden_dim(shard_dim=1, ndim=2) == 0
+
+    def test_2d_non_transposed_w13(self):
+        # w1/w3: shard_dim=0 (intermediate), hidden=1
+        assert FusedMoE._get_hidden_dim(shard_dim=0, ndim=2) == 1
+
+    def test_2d_transposed_w2(self):
+        # transposed w2: shard_dim=0, hidden=1
+        assert FusedMoE._get_hidden_dim(shard_dim=0, ndim=2) == 1
+
+    def test_2d_transposed_w13(self):
+        # transposed w1/w3: shard_dim=1, hidden=0
+        assert FusedMoE._get_hidden_dim(shard_dim=1, ndim=2) == 0
+
+    def test_3d_non_transposed_w2(self):
+        # 3D w2: shard_dim=2, hidden=1
+        assert FusedMoE._get_hidden_dim(shard_dim=2, ndim=3) == 1
+
+    def test_3d_non_transposed_w13(self):
+        # 3D w1/w3: shard_dim=1, hidden=2
+        assert FusedMoE._get_hidden_dim(shard_dim=1, ndim=3) == 2
+
+    def test_3d_transposed_w2(self):
+        # transposed 3D w2: shard_dim=1, hidden=2
+        assert FusedMoE._get_hidden_dim(shard_dim=1, ndim=3) == 2
+
+    def test_3d_transposed_w13(self):
+        # transposed 3D w1/w3: shard_dim=2, hidden=1
+        assert FusedMoE._get_hidden_dim(shard_dim=2, ndim=3) == 1
+
+    def test_1d_returns_zero(self):
+        # 1D per-channel scales: always returns 0
+        assert FusedMoE._get_hidden_dim(shard_dim=0, ndim=1) == 0
+        assert FusedMoE._get_hidden_dim(shard_dim=1, ndim=1) == 0
+
+    def test_invalid_shard_dim_raises(self):
+        # shard_dim outside the data dimensions should raise
+        with pytest.raises(ValueError, match="not a valid data dimension"):
+            FusedMoE._get_hidden_dim(shard_dim=0, ndim=3)
+
+
+class TestNarrowExpertDataForPadding:
+    """Unit tests for _narrow_expert_data_for_padding."""
+
+    def test_no_narrowing_when_shapes_match(self):
+        expert_data = torch.zeros(1024, 1024)
+        loaded_weight = torch.randn(1024, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        assert result.shape == loaded_weight.shape
+        assert result.data_ptr() == expert_data.data_ptr()
+
+    def test_narrow_w2_hidden_dim(self):
+        # w2: (hidden_size, intermediate_size) - hidden_size padded at dim 0
+        expert_data = torch.zeros(3072, 1024)
+        loaded_weight = torch.randn(2688, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        assert result.shape == (2688, 1024)
+
+    def test_narrow_w13_hidden_dim(self):
+        # w1/w3: (intermediate_size, hidden_size) - hidden_size padded at dim 1
+        expert_data = torch.zeros(2048, 3072)
+        loaded_weight = torch.randn(2048, 2688)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=1
+        )
+        assert result.shape == (2048, 2688)
+
+    def test_narrow_transposed_w2(self):
+        # transposed w2: (intermediate_size, hidden_size) - hidden at dim 1
+        expert_data = torch.zeros(1024, 3072)
+        loaded_weight = torch.randn(1024, 2688)
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=0, ndim=2)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=hidden_dim
+        )
+        assert result.shape == (1024, 2688)
+
+    def test_narrow_3d_full_load(self):
+        # 3D tensor for full_load path: w2 (num_experts, hidden_size, intermediate)
+        expert_data = torch.zeros(8, 3072, 1024)
+        loaded_weight = torch.randn(8, 2688, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=1
+        )
+        assert result.shape == (8, 2688, 1024)
+
+    def test_narrow_1d_scale(self):
+        # 1D scale tensor: per-channel w2 scale (hidden_size,)
+        expert_data = torch.zeros(3072)
+        loaded_weight = torch.randn(2688)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        assert result.shape == (2688,)
+
+    def test_scalar_weight_no_op(self):
+        # 0-dim tensor should be a no-op
+        expert_data = torch.zeros(3072)
+        loaded_weight = torch.tensor(1.0)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        # ndim == 0, so no narrowing
+        assert result.shape == (3072,)
+
+    def test_no_narrowing_when_loaded_weight_larger(self):
+        # Guard: don't narrow if loaded_weight is larger than expert_data
+        expert_data = torch.zeros(2688, 1024)
+        loaded_weight = torch.randn(3072, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        assert result.shape == (2688, 1024)
+        assert result.data_ptr() == expert_data.data_ptr()
+
+    def test_negative_hidden_dim_is_noop(self):
+        # Negative hidden_dim should be a safe no-op (0 <= check)
+        expert_data = torch.zeros(3072, 1024)
+        loaded_weight = torch.randn(2688, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=-1
+        )
+        # -1 fails the 0 <= check, so no narrowing
+        assert result.shape == (3072, 1024)
+        assert result.data_ptr() == expert_data.data_ptr()
+
+    def test_only_narrows_hidden_dim(self):
+        # Verify that only the specified hidden_dim is narrowed,
+        # even when other dimensions also differ
+        expert_data = torch.zeros(3072, 2048)
+        loaded_weight = torch.randn(2688, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        # Only dim 0 (hidden) should be narrowed; dim 1 stays at 2048
+        assert result.shape == (2688, 2048)
+
+    def test_narrowed_data_shares_storage(self):
+        # Verify narrowing returns a view (writes go to original tensor)
+        expert_data = torch.zeros(3072, 1024)
+        loaded_weight = torch.randn(2688, 1024)
+        result = FusedMoE._narrow_expert_data_for_padding(
+            expert_data, loaded_weight, hidden_dim=0
+        )
+        result.copy_(loaded_weight)
+        # The first 2688 rows of expert_data should now have loaded_weight
+        assert torch.equal(expert_data[:2688, :], loaded_weight)
+        # Padded region should remain zero
+        assert torch.equal(expert_data[2688:, :], torch.zeros(3072 - 2688, 1024))
+
+
+class TestWeightLoadingWithPaddedHiddenSize:
+    """Integration-style tests that simulate padded weight loading."""
+
+    def test_load_w2_with_padding(self):
+        """Simulate loading w2 weights when hidden_size is padded."""
+        padded_hidden = 3072
+        original_hidden = 2688
+        intermediate = 1024
+
+        expert_data_full = torch.zeros(padded_hidden, intermediate)
+        loaded_weight = torch.randn(original_hidden, intermediate)
+
+        # w2 non-transposed: shard_dim=1, hidden_dim=0
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=1, ndim=2)
+        expert_data = FusedMoE._narrow_expert_data_for_padding(
+            expert_data_full, loaded_weight, hidden_dim=hidden_dim
+        )
+        expert_data.copy_(loaded_weight)
+
+        assert torch.equal(expert_data_full[:original_hidden, :], loaded_weight)
+        assert torch.equal(
+            expert_data_full[original_hidden:, :],
+            torch.zeros(padded_hidden - original_hidden, intermediate),
+        )
+
+    def test_load_w13_with_padding(self):
+        """Simulate loading w1/w3 weights when hidden_size is padded."""
+        padded_hidden = 3072
+        original_hidden = 2688
+        intermediate = 1024
+
+        # w1/w3: (intermediate_size, hidden_size)
+        expert_data_full = torch.zeros(intermediate, padded_hidden)
+        loaded_weight = torch.randn(intermediate, original_hidden)
+
+        # w1 non-transposed: shard_dim=0, hidden_dim=1
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=0, ndim=2)
+        expert_data = FusedMoE._narrow_expert_data_for_padding(
+            expert_data_full, loaded_weight, hidden_dim=hidden_dim
+        )
+        expert_data.copy_(loaded_weight)
+
+        assert torch.equal(expert_data_full[:, :original_hidden], loaded_weight)
+        assert torch.equal(
+            expert_data_full[:, original_hidden:],
+            torch.zeros(intermediate, padded_hidden - original_hidden),
+        )
+
+    def test_load_transposed_w2_with_padding(self):
+        """Simulate loading transposed w2 (GPTQ) with padded hidden_size."""
+        padded_hidden = 3072
+        original_hidden = 2688
+        intermediate = 1024
+
+        # transposed w2: (intermediate_size, hidden_size), shard_dim=0
+        expert_data_full = torch.zeros(intermediate, padded_hidden)
+        loaded_weight = torch.randn(intermediate, original_hidden)
+
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=0, ndim=2)
+        expert_data = FusedMoE._narrow_expert_data_for_padding(
+            expert_data_full, loaded_weight, hidden_dim=hidden_dim
+        )
+        expert_data.copy_(loaded_weight)
+
+        assert torch.equal(expert_data_full[:, :original_hidden], loaded_weight)
+
+    def test_no_padding_is_noop(self):
+        """Verify that when sizes match, behavior is unchanged."""
+        hidden = 2048
+        intermediate = 1024
+
+        expert_data_full = torch.zeros(hidden, intermediate)
+        loaded_weight = torch.randn(hidden, intermediate)
+
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=1, ndim=2)
+        expert_data = FusedMoE._narrow_expert_data_for_padding(
+            expert_data_full, loaded_weight, hidden_dim=hidden_dim
+        )
+        expert_data.copy_(loaded_weight)
+
+        assert torch.equal(expert_data_full, loaded_weight)
+
+    def test_narrow_shard_dim(self):
+        """Simulate loading w2 when both hidden_size and intermediate_size
+        are padded.
+        """
+        padded_hidden = 3072
+        original_hidden = 2688
+        padded_intermediate = 1024
+        original_intermediate = 896
+
+        expert_data_full = torch.zeros(padded_hidden, padded_intermediate)
+        loaded_weight = torch.randn(original_hidden, original_intermediate)
+
+        shard_dim = 1
+        hidden_dim = FusedMoE._get_hidden_dim(shard_dim=shard_dim, ndim=2)
+        expert_data = FusedMoE._narrow_expert_data_for_padding(
+            expert_data_full,
+            loaded_weight,
+            hidden_dim=hidden_dim,
+            shard_dim=shard_dim,
+        )
+        expert_data.copy_(loaded_weight)
+
+        assert torch.equal(
+            expert_data_full[:original_hidden, :original_intermediate],
+            loaded_weight,
+        )
+        assert torch.equal(
+            expert_data_full[original_hidden:, :],
+            torch.zeros(padded_hidden - original_hidden, padded_intermediate),
+        )
+        assert torch.equal(
+            expert_data_full[:original_hidden, original_intermediate:],
+            torch.zeros(original_hidden, padded_intermediate - original_intermediate),
+        )
+
+    def test_bnb_shape_mismatch_raises(self):
+        """BnB + padded hidden_size should raise via weight_loader."""
+        from unittest.mock import MagicMock
+
+        num_experts = 1
+        padded_packed = 3072  # padded packed size
+        original_packed = 2688  # original packed size
+
+        # Build a param that looks like a BnB 4-bit MoE weight.
+        param_data = torch.zeros(num_experts, padded_packed, 1, dtype=torch.uint8)
+        param = torch.nn.Parameter(param_data, requires_grad=False)
+        param.use_bitsandbytes_4bit = True
+
+        loaded_weight = torch.randint(0, 255, (original_packed, 1), dtype=torch.uint8)
+
+        # Minimal FusedMoE mock so weight_loader reaches the BnB path.
+        moe = MagicMock(spec=FusedMoE)
+        moe.quant_config = None
+        moe.quant_method = MagicMock()
+        moe.quant_method.__class__.__name__ = "BitsAndBytesMethod"
+        moe._expert_map = None
+        moe.tp_rank = 0
+
+        # Call the real weight_loader (unbound) with our mock as self.
+        with pytest.raises(ValueError, match="BitsAndBytes"):
+            FusedMoE.weight_loader(
+                moe,
+                param,
+                loaded_weight,
+                weight_name="w2",
+                shard_id="w2",
+                expert_id=0,
+            )
diff --git a/tests/kernels/moe/test_mxfp4_moe.py b/tests/kernels/moe/test_mxfp4_moe.py
new file mode 100644
index 000000000000..11fd853f54f3
--- /dev/null
+++ b/tests/kernels/moe/test_mxfp4_moe.py
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for SM100 CUTLASS MXFP4 x MXFP4 grouped MoE kernels."""
+
+import random
+
+import pytest
+import torch
+
+from tests.kernels.utils import torch_moe_single
+from vllm import _custom_ops as ops
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+random.seed(42)
+set_random_seed(42)
+
+MXFP4_BLOCK_SIZE = 32
+
+
+def align(val: int, alignment: int = 128) -> int:
+    return int((val + alignment - 1) // alignment * alignment)
+
+
+def calc_diff(x, y):
+    x, y = x.double(), y.double()
+    denominator = (x * x + y * y).sum()
+    sim = 2 * (x * y).sum() / denominator
+    return 1 - sim
+
+
+def is_sm100_supported() -> bool:
+    return current_platform.is_cuda() and current_platform.is_device_capability_family(
+        100
+    )
+
+
+def compute_ref_output(
+    input_tensor: torch.Tensor,
+    weight_list: list[torch.Tensor],
+    expert_offsets: list[int],
+    expert_offset: int,
+    num_experts: int,
+) -> torch.Tensor:
+    """Reference output using torch_moe_single with top-1 routing."""
+    score = torch.full(
+        (expert_offset, num_experts),
+        -1e9,
+        device=input_tensor.device,
+        dtype=torch.float32,
+    )
+    for g in range(num_experts):
+        start = expert_offsets[g]
+        end = expert_offsets[g + 1] if g + 1 < num_experts else expert_offset
+        score[start:end, g] = 0.0
+
+    return torch_moe_single(
+        input_tensor, torch.stack(weight_list, dim=0), score, topk=1
+    )
+
+
+@pytest.mark.skipif(
+    not is_sm100_supported(),
+    reason="cutlass_mxfp4_group_mm requires CUDA SM100",
+)
+@pytest.mark.parametrize("num_experts", [8, 16, 32])
+@pytest.mark.parametrize("out_dtype", [torch.bfloat16])
+def test_cutlass_mxfp4_grouped_mm(num_experts, out_dtype):
+    """
+    Test the MXFP4 grouped GEMM kernel by:
+    1. Creating random per-expert inputs and weights
+    2. Quantizing both to MXFP4 using the CUDA kernel
+    3. Running the CUTLASS grouped GEMM
+    4. Comparing against BF16 reference
+    """
+    device = "cuda"
+    alignment = 128
+    # N and K must be multiples of 128 for clean swizzle layout
+    n_g = random.randint(1, 16) * alignment
+    k_g = random.randint(1, 16) * alignment
+
+    expert_offset = 0
+    expert_offsets_input = []
+    problem_sizes = []
+    input_list = []
+    weight_list = []
+
+    for g in range(num_experts):
+        m_g = random.randint(1, 256)
+        expert_offsets_input.append(expert_offset)
+        expert_offset += m_g
+        problem_sizes.append([m_g, n_g, k_g])
+
+        input_list.append(
+            torch.normal(0.0, std=0.5, size=(m_g, k_g), device=device, dtype=out_dtype)
+        )
+        weight_list.append(
+            torch.normal(0.0, std=0.5, size=(n_g, k_g), device=device, dtype=out_dtype)
+        )
+
+    input_tensor = torch.concat(input_list, dim=0)  # [M_total, K]
+
+    # --- Quantize INPUTS via mxfp4_experts_quant ---
+    input_bs_offsets = []
+    tot = 0
+    for g in range(num_experts):
+        input_bs_offsets.append(tot)
+        tot += align(problem_sizes[g][0], 128)
+    input_bs_offsets.append(tot)
+
+    _inp_expert_offsets = torch.tensor(
+        expert_offsets_input + [expert_offset], device=device, dtype=torch.int32
+    )
+    _inp_bs_offsets = torch.tensor(input_bs_offsets, device=device, dtype=torch.int32)
+
+    input_quant, input_sf = ops.mxfp4_experts_quant(
+        input_tensor,
+        _inp_expert_offsets,
+        _inp_bs_offsets,
+        num_experts,
+        topk=1,
+    )
+
+    # --- Quantize WEIGHTS via mxfp4_experts_quant ---
+    # Treat each expert's N weight rows as an "expert" with N tokens
+    weight_tensor = torch.concat(weight_list, dim=0)  # [E*N, K]
+    weight_expert_offsets = [g * n_g for g in range(num_experts)] + [num_experts * n_g]
+    # N is always multiple of 128, so blockscale offsets are clean
+    weight_bs_offsets = [g * n_g for g in range(num_experts)] + [num_experts * n_g]
+
+    _wt_expert_offsets = torch.tensor(
+        weight_expert_offsets, device=device, dtype=torch.int32
+    )
+    _wt_bs_offsets = torch.tensor(weight_bs_offsets, device=device, dtype=torch.int32)
+
+    weight_quant, weight_sf = ops.mxfp4_experts_quant(
+        weight_tensor,
+        _wt_expert_offsets,
+        _wt_bs_offsets,
+        num_experts,
+        topk=1,
+    )
+
+    # Reshape weight quantized data to [E, N, K//2]
+    weight_quant = weight_quant[: num_experts * n_g].view(num_experts, n_g, k_g // 2)
+
+    # Reshape weight scale factors to [E, N, K//32]
+    # The quant kernel produces uint8 SF buffer. Each row has K//32 SFs.
+    scales_per_row = k_g // MXFP4_BLOCK_SIZE
+    weight_sf_flat = weight_sf.view(-1)[: num_experts * n_g * scales_per_row]
+    weight_sf_3d = weight_sf_flat.view(num_experts, n_g, scales_per_row)
+
+    # Output
+    output = torch.empty((expert_offset, n_g), device=device, dtype=out_dtype)
+
+    _problem_sizes = torch.tensor(problem_sizes, device=device, dtype=torch.int32)
+    _expert_offsets = torch.tensor(
+        expert_offsets_input, device=device, dtype=torch.int32
+    )
+    _input_bs = torch.tensor(input_bs_offsets[:-1], device=device, dtype=torch.int32)
+
+    # Run the MXFP4 grouped GEMM
+    ops.cutlass_mxfp4_moe_mm(
+        output,
+        input_quant,
+        weight_quant,
+        input_sf,
+        weight_sf_3d,
+        _problem_sizes,
+        _expert_offsets,
+        _input_bs,
+    )
+
+    # Reference: BF16 matmul
+    ref_output = compute_ref_output(
+        input_tensor=input_tensor,
+        weight_list=weight_list,
+        expert_offsets=expert_offsets_input,
+        expert_offset=expert_offset,
+        num_experts=num_experts,
+    )
+
+    # Compare per-expert
+    for g in range(num_experts):
+        start = expert_offsets_input[g]
+        end = expert_offsets_input[g + 1] if g + 1 < num_experts else expert_offset
+        if start == end:
+            continue
+        baseline = ref_output[start:end]
+        actual = output[start:end]
+        diff = calc_diff(actual, baseline)
+        print(
+            f"m_g={end - start} n_g={n_g} k_g={k_g} "
+            f"num_experts={num_experts}, "
+            f"out_dtype={out_dtype}, diff={diff:.5f}"
+        )
+        # FP4 quantization is very lossy (~4 bits precision)
+        # Comparing quantized vs full-precision gives cosine diff of 0.05-0.15
+        assert diff < 0.15, f"Expert {g}: diff={diff:.5f} exceeds threshold"
+
+
+@pytest.mark.skipif(
+    not is_sm100_supported(),
+    reason="mxfp4_experts_quant requires CUDA SM100",
+)
+def test_mxfp4_experts_quant_basic():
+    """
+    Basic smoke test for the MXFP4 experts quantization kernel.
+    """
+    device = "cuda"
+    num_experts = 4
+    k = 256
+    tokens_per_expert = 16
+
+    total_tokens = tokens_per_expert * num_experts
+    input_tensor = torch.randn(total_tokens, k, device=device, dtype=torch.bfloat16) / 5
+
+    expert_offsets = [i * tokens_per_expert for i in range(num_experts + 1)]
+    blockscale_offsets = [
+        align(i * tokens_per_expert, 128) for i in range(num_experts + 1)
+    ]
+
+    _expert_offsets = torch.tensor(expert_offsets, device=device, dtype=torch.int32)
+    _blockscale_offsets = torch.tensor(
+        blockscale_offsets, device=device, dtype=torch.int32
+    )
+
+    output, output_sf = ops.mxfp4_experts_quant(
+        input_tensor,
+        _expert_offsets,
+        _blockscale_offsets,
+        num_experts,
+        topk=1,
+    )
+
+    assert output.shape == (total_tokens, k // 2)
+    assert output.dtype == torch.uint8
+    assert output_sf.dtype == torch.uint8
+    assert output.any(), "Quantized output is all zeros"
+    print(
+        f"MXFP4 experts quant: output shape={output.shape}, sf shape={output_sf.shape}"
+    )
+    print("PASSED")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/kernels/moe/test_nvfp4_moe.py b/tests/kernels/moe/test_nvfp4_moe.py
index e12659729c9c..e2a6cd1a7dcc 100644
--- a/tests/kernels/moe/test_nvfp4_moe.py
+++ b/tests/kernels/moe/test_nvfp4_moe.py
@@ -19,7 +19,7 @@
     maybe_make_prepare_finalize,
 )
 from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config
-from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
     CutlassExpertsFp4,
 )
 from vllm.model_executor.layers.fused_moe.prepare_finalize import (
diff --git a/tests/kernels/moe/test_ocp_mx_moe.py b/tests/kernels/moe/test_ocp_mx_moe.py
index e54e7a9cd18e..8ed7757f6553 100644
--- a/tests/kernels/moe/test_ocp_mx_moe.py
+++ b/tests/kernels/moe/test_ocp_mx_moe.py
@@ -28,6 +28,25 @@
     and has_flashinfer()
 )
 
+# ROCm platform and dependencies
+ROCM_AVAILABLE = current_platform.is_rocm()
+ROCM_TRITON_KERNELS_AVAILABLE = False
+ROCM_AITER_AVAILABLE = False
+ROCM_GFX950 = False
+
+if ROCM_AVAILABLE:
+    from vllm._aiter_ops import rocm_aiter_ops
+    from vllm.platforms.rocm import on_gfx950
+    from vllm.utils.import_utils import has_triton_kernels
+
+    ROCM_TRITON_KERNELS_AVAILABLE = has_triton_kernels()
+    ROCM_GFX950 = on_gfx950()
+    ROCM_AITER_AVAILABLE = rocm_aiter_ops.is_enabled()
+
+    if ROCM_AITER_AVAILABLE:
+        from aiter.ops.triton.moe.quant_moe import upcast_from_mxfp
+        from aiter.ops.triton.quant import dynamic_mxfp4_quant
+
 if TRTLLM_GEN_MXFP4_AVAILABLE:
     from flashinfer import (
         fp4_quantize,
@@ -111,6 +130,7 @@ def test_mxfp4_loading_and_execution_moe(vllm_runner, model_case: ModelCase):
 
 def swiglu(x, alpha: float = 1.702, beta: float = 1.0, limit: float | None = None):
     # Note we add an extra bias of 1 to the linear layer
+    # Uses chunked layout: first half is gate, second half is up
     x_glu, x_linear = torch.chunk(x, 2, dim=-1)
     if limit is not None:
         x_glu = x_glu.clamp(max=limit)
@@ -119,6 +139,16 @@ def swiglu(x, alpha: float = 1.702, beta: float = 1.0, limit: float | None = Non
     return out_glu * (x_linear + beta)
 
 
+def swigluoai(x, alpha: float = 1.702, limit: float = 7.0):
+    # OAI swiglu uses interleaved layout: gate/up alternating
+    # See SwigluOAIAndMul in vllm/model_executor/layers/activation.py
+    gate, up = x[..., ::2], x[..., 1::2]
+    gate = gate.clamp(max=limit)
+    up = up.clamp(min=-limit, max=limit)
+    glu = gate * torch.sigmoid(gate * alpha)
+    return (up + 1) * glu
+
+
 fp4_lookup_table = [0, 0.5, 1, 1.5, 2, 3, 4, 6, -0, -0.5, -1, -1.5, -2, -3, -4, -6]
 
 
@@ -168,8 +198,20 @@ def reference_moe(
     beta,
     limit,
     act_type,
-    is_gated,
+    activation: str = "swiglu",
+    use_interleaved_layout: bool = False,
 ):
+    """
+    Reference MoE implementation for accuracy testing.
+
+    Args:
+        activation: One of "swiglu", "silu", "relu2". Controls the activation
+            function used after the first MLP.
+        use_interleaved_layout: If True, uses interleaved gate/up layout
+            (gate=x[..., ::2], up=x[..., 1::2]) as used by SWIGLUOAI.
+            If False, uses chunked layout (gate, up = chunk(x, 2)) as used
+            by standard swiglu/silu.
+    """
     # renormalize routing
     experts = torch.topk(roouting_logits, k=topk, dim=-1, sorted=True)
     expert_weights = torch.nn.functional.softmax(experts.values, dim=1)
@@ -179,12 +221,21 @@ def reference_moe(
     mlp1_weight = w13[expert_indices, ...]
     mlp1_bias = bias13[expert_indices, ...]
     t = torch.einsum("beck,bk->bec", mlp1_weight, t) + mlp1_bias
-    if is_gated:
-        t = swiglu(t, alpha=alpha, beta=beta, limit=limit)
-    else:
+
+    # Apply activation
+    if activation in ("swiglu", "silu"):
+        if use_interleaved_layout:
+            # SWIGLUOAI: interleaved gate/up layout
+            t = swigluoai(t, alpha=alpha, limit=limit)
+        else:
+            # Standard swiglu/silu: chunked layout
+            t = swiglu(t, alpha=alpha, beta=beta, limit=limit)
+    elif activation == "relu2":
         # RELU2_NO_MUL: relu(x)^2
         t = torch.relu(t)
         t = t * t
+    else:
+        raise ValueError(f"Unknown activation: {activation}")
 
     if act_type == "mxfp8":
         t_quantized, t_scale = mxfp8_quantize(
@@ -548,7 +599,9 @@ def test_trtllm_gen_mxfp4_fused_moe(
         hidden_states, hidden_states_scale = mxfp8_quantize(
             hidden_states, is_sf_swizzled_layout=False
         )
-        hidden_states_scale = hidden_states_scale.view(torch.float8_e4m3fn).reshape(-1)
+        hidden_states_scale = hidden_states_scale.view(torch.float8_e4m3fn).reshape(
+            *hidden_states.shape[:-1], -1
+        )
     else:
         hidden_states_scale = None
 
@@ -583,7 +636,8 @@ def test_trtllm_gen_mxfp4_fused_moe(
             beta,
             limit,
             act_type,
-            is_gated=True,
+            activation="swiglu",
+            use_interleaved_layout=False,
         )
         ref_result[start_idx:end_idx].copy_(chunk_result)
 
@@ -595,20 +649,20 @@ def test_trtllm_gen_mxfp4_fused_moe(
     if beta is not None:
         beta = torch.full((num_experts,), beta, device=hidden_states.device)
     tg_result = tg_mxfp4_moe(
-        router_logits,
-        topk,
-        num_experts,
-        intermediate_size,
-        hidden_size,
-        hidden_states,
-        hidden_states_scale,
-        w13,
-        w13_scale,
-        bias13,
-        w2,
-        w2_scale,
-        bias2,
-        act_type,
+        router_logits=router_logits,
+        topk=topk,
+        num_experts=num_experts,
+        intermediate_size=intermediate_size,
+        hidden_size=hidden_size,
+        hidden_states=hidden_states,
+        hidden_states_scale=hidden_states_scale,
+        w13_weight=w13,
+        w13_weight_scale=w13_scale,
+        w13_bias=bias13,
+        w2_weight=w2,
+        w2_weight_scale=w2_scale,
+        w2_bias=bias2,
+        act_type=act_type,
         alpha=alpha,
         beta=beta,
         limit=limit,
@@ -720,7 +774,8 @@ def test_flashinfer_cutlass_mxfp4_fused_moe(
         beta,
         limit,
         "bf16",
-        is_gated=True,
+        activation="swiglu",
+        use_interleaved_layout=False,
     )
 
     from vllm.utils.flashinfer import flashinfer_cutlass_fused_moe
@@ -906,7 +961,8 @@ def dequant_mxfp4_batches(mat_fp4: torch.Tensor, scale_tensor: torch.Tensor):
         beta,
         limit,
         "mxfp8",
-        is_gated=True,
+        activation="swiglu",
+        use_interleaved_layout=False,
     )
 
     # Prepare inputs for FlashInfer CUTLASS fused MoE
@@ -1078,7 +1134,8 @@ def test_trtllm_gen_mxfp8_block_scale_moe(
         beta=0.0,
         limit=None,
         act_type="mxfp8",
-        is_gated=is_gated,
+        activation="swiglu" if is_gated else "relu2",
+        use_interleaved_layout=False,
     )
 
     # Shuffle weights/scales with the same indexed layout used by TRTLLM kernels.
@@ -1148,3 +1205,328 @@ def test_trtllm_gen_mxfp8_block_scale_moe(
 
     # Block-scale MXFP8 kernels are approximate; require majority close.
     check_accuracy(ref, out, atol=0.1, rtol=0.85, percent=0.8)
+
+
+# -----------------------------------------------------------------------------
+# ROCm Oracle-based kernel execution tests
+# -----------------------------------------------------------------------------
+# TODO: Further tighten the accuracy threshold.
+# - More accurate ref moe to include activation quantization
+# - Check aiter kernel accuracy. E.g., quant / dequant details.
+ROCM_BACKEND_CONFIGS = {
+    "TRITON": {
+        "activation": "SWIGLUOAI",
+        "rtol": 0.3,
+        "percent": 0.95,
+        "requires_aiter": False,
+        "requires_gfx950": False,
+    },
+    "TRITON_UNFUSED": {
+        "activation": "SWIGLUOAI",
+        "rtol": 0.3,
+        "percent": 0.95,
+        "requires_aiter": False,
+        "requires_gfx950": False,
+    },
+    "AITER_MXFP4_BF16": {
+        "activation": "SILU",
+        "rtol": 1.0,
+        "percent": 0.7,
+        "requires_aiter": True,
+        "requires_gfx950": True,
+    },
+    "AITER_MXFP4_FP8": {
+        "activation": "SWIGLUOAI",
+        "rtol": 0.5,
+        "percent": 0.9,
+        "requires_aiter": True,
+        "requires_gfx950": True,
+    },
+}
+
+
+@pytest.mark.parametrize("backend_name", list(ROCM_BACKEND_CONFIGS.keys()))
+@pytest.mark.parametrize("topk", [4])
+@pytest.mark.parametrize("num_experts", [8])
+@pytest.mark.parametrize("num_tokens,hidden_size,intermediate_size", [(16, 256, 256)])
+@pytest.mark.skipif(
+    not ROCM_AVAILABLE,
+    reason="ROCm is required for this test",
+)
+@torch.inference_mode()
+def test_rocm_mxfp4_moe_oracle(
+    backend_name: str,
+    topk: int,
+    num_experts: int,
+    num_tokens: int,
+    hidden_size: int,
+    intermediate_size: int,
+):
+    """
+    Test ROCm MXFP4 MoE using oracle functions.
+
+    This test validates that the oracle functions work end-to-end:
+    - select_mxfp4_moe_backend() selects a valid backend
+    - convert_to_mxfp4_moe_kernel_format() converts weights without error
+    - make_mxfp4_moe_quant_config() builds a valid quant config
+    - make_mxfp4_moe_kernel() creates a kernel that runs without error
+    - The kernel output is within accuracy tolerance of reference
+    """
+    config = ROCM_BACKEND_CONFIGS[backend_name]
+
+    # Check platform requirements
+    if not ROCM_TRITON_KERNELS_AVAILABLE:
+        pytest.skip("triton_kernels required for quantization")
+    if config["requires_aiter"] and not ROCM_AITER_AVAILABLE:
+        pytest.skip(f"Backend {backend_name} requires AITER")
+    if config["requires_gfx950"] and not ROCM_GFX950:
+        pytest.skip(f"Backend {backend_name} requires GFX950")
+
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+    from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
+        Mxfp4MoeBackend,
+        backend_to_kernel_cls,
+        convert_to_mxfp4_moe_kernel_format,
+        make_mxfp4_moe_kernel,
+        make_mxfp4_moe_quant_config,
+    )
+    from vllm.v1.worker.workspace import init_workspace_manager
+
+    # Initialize workspace manager (needed for modular kernels)
+    init_workspace_manager(torch.accelerator.current_device_index())
+
+    # Map string to enum
+    backend = Mxfp4MoeBackend[backend_name]
+
+    # Get experts class from oracle
+    experts_cls_list = backend_to_kernel_cls(backend)
+    if experts_cls_list is None or len(experts_cls_list) == 0:
+        pytest.skip(f"Backend {backend_name} not available")
+
+    # Use first experts class
+    experts_cls = experts_cls_list[0]
+
+    torch.manual_seed(42)
+    dtype = torch.bfloat16
+    device = "cuda:0"
+
+    # Create MoE config with Renormalize routing (required by monolithic kernels)
+    from vllm.model_executor.layers.fused_moe import FusedMoEConfig
+    from vllm.model_executor.layers.fused_moe.config import (
+        FusedMoEParallelConfig,
+        RoutingMethodType,
+    )
+
+    moe_config = FusedMoEConfig(
+        num_experts=num_experts,
+        experts_per_token=topk,
+        hidden_dim=hidden_size,
+        intermediate_size_per_partition=intermediate_size,
+        num_local_experts=num_experts,
+        num_logical_experts=num_experts,
+        moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
+        activation=MoEActivation[config["activation"]],
+        in_dtype=dtype,
+        device="cuda",
+        routing_method=RoutingMethodType.Renormalize,
+    )
+
+    # Create float weights in checkpoint format:
+    # w13: [num_experts, 2*intermediate_size, hidden_size]
+    # w2: [num_experts, hidden_size, intermediate_size]
+    w13_float = torch.randn(
+        num_experts, 2 * intermediate_size, hidden_size, dtype=dtype, device=device
+    )
+    w2_float = torch.randn(
+        num_experts, hidden_size, intermediate_size, dtype=dtype, device=device
+    )
+
+    # dynamic_mxfp4_quant expects 2D input, so reshape 3D weights
+    # w13: [E, 2*I, H] -> [E*2*I, H] -> quantize -> [E, 2*I, H//2]
+    # w2: [E, H, I] -> [E*H, I] -> quantize -> [E, H, I//2]
+    w13_2d = w13_float.reshape(-1, hidden_size)
+    w13_quant_2d, w13_scale_2d = dynamic_mxfp4_quant(w13_2d)
+    w13_quant = w13_quant_2d.reshape(num_experts, 2 * intermediate_size, -1)
+    w13_scale = w13_scale_2d.reshape(num_experts, 2 * intermediate_size, -1)
+
+    w2_2d = w2_float.reshape(-1, intermediate_size)
+    w2_quant_2d, w2_scale_2d = dynamic_mxfp4_quant(w2_2d)
+    w2_quant = w2_quant_2d.reshape(num_experts, hidden_size, -1)
+    w2_scale = w2_scale_2d.reshape(num_experts, hidden_size, -1)
+
+    w13_bias = torch.randn(
+        num_experts, 2 * intermediate_size, dtype=dtype, device=device
+    )
+    w2_bias = torch.randn(num_experts, hidden_size, dtype=dtype, device=device)
+
+    # Create static input scales for W4A8 backend (AITER_MXFP4_FP8)
+    w13_input_scale: torch.Tensor | None = None
+    w2_input_scale: torch.Tensor | None = None
+    if backend_name == "AITER_MXFP4_FP8":
+        # Static FP8 scales: one scale per expert
+        w13_input_scale = torch.ones(num_experts, dtype=torch.float32, device=device)
+        w2_input_scale = torch.ones(num_experts, dtype=torch.float32, device=device)
+
+    # Create mock layer for oracle functions
+    class MockLayer:
+        w13_weight: torch.Tensor
+        w2_weight: torch.Tensor
+        w13_weight_scale: torch.Tensor
+        w2_weight_scale: torch.Tensor
+        w13_input_scale: torch.Tensor | None
+        w2_input_scale: torch.Tensor | None
+
+    layer = MockLayer()
+    layer.w13_weight = w13_quant
+    layer.w2_weight = w2_quant
+    layer.w13_weight_scale = w13_scale
+    layer.w2_weight_scale = w2_scale
+    layer.w13_input_scale = w13_input_scale
+    layer.w2_input_scale = w2_input_scale
+
+    # Convert weights using oracle
+    w13_conv, w2_conv, w13_scale_conv, w2_scale_conv, w13_bias_conv, w2_bias_conv = (
+        convert_to_mxfp4_moe_kernel_format(
+            mxfp4_backend=backend,
+            layer=layer,  # type: ignore[arg-type]
+            w13_weight=w13_quant,
+            w2_weight=w2_quant,
+            w13_weight_scale=w13_scale,
+            w2_weight_scale=w2_scale,
+            w13_bias=w13_bias,
+            w2_bias=w2_bias,
+        )
+    )
+
+    # Build quant config using oracle
+    quant_config = make_mxfp4_moe_quant_config(
+        mxfp4_backend=backend,
+        w1_scale=w13_scale_conv,
+        w2_scale=w2_scale_conv,
+        w1_bias=w13_bias_conv,
+        w2_bias=w2_bias_conv,
+        a1_scale=w13_input_scale,
+        a2_scale=w2_input_scale,
+    )
+
+    # Select activation based on backend
+    activation_name = str(config["activation"])
+    activation = MoEActivation[activation_name]
+
+    # Build kernel using oracle
+    assert quant_config is not None, "Failed to create quant config"
+    with set_current_vllm_config(VllmConfig()):
+        kernel = make_mxfp4_moe_kernel(
+            moe_quant_config=quant_config,
+            moe_config=moe_config,
+            mxfp4_backend=backend,
+            experts_cls=experts_cls,
+            routing_tables=None,
+            shared_experts=None,
+        )
+
+        # Create inputs
+        x = torch.randn(num_tokens, hidden_size, dtype=dtype, device=device)
+        router_logits = torch.randn(
+            num_tokens, num_experts, dtype=torch.float32, device=device
+        )
+        topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1, sorted=True)
+        topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1)
+
+        # Run kernel - use appropriate method based on impl type
+        if kernel.is_monolithic:
+            # Monolithic impl uses router_logits
+            out = kernel.apply_monolithic(
+                hidden_states=x,
+                w1=w13_conv,
+                w2=w2_conv,
+                router_logits=router_logits,
+                activation=activation,
+                global_num_experts=num_experts,
+                expert_map=None,
+                apply_router_weight_on_input=False,
+            )
+        else:
+            # Modular impl uses topk_weights and topk_ids
+            out = kernel.apply(
+                hidden_states=x,
+                w1=w13_conv,
+                w2=w2_conv,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
+                activation=activation,
+                global_num_experts=num_experts,
+                expert_map=None,
+                apply_router_weight_on_input=False,
+            )
+
+    # Verify output is valid (no NaN/Inf) and has expected shape
+    assert out.shape == (num_tokens, hidden_size), f"Unexpected shape: {out.shape}"
+    assert not torch.any(torch.isnan(out)), "Output contains NaN"
+    assert not torch.any(torch.isinf(out)), "Output contains Inf"
+
+    # Verify output has reasonable magnitude (not all zeros)
+    assert out.abs().max() > 0.01, "Output is effectively zero"
+
+    # Dequantize weights for reference computation
+    w13_dq = upcast_from_mxfp(
+        w13_quant.view(torch.uint8), w13_scale, torch.bfloat16, axis=-1
+    )
+    w2_dq = upcast_from_mxfp(
+        w2_quant.view(torch.uint8), w2_scale, torch.bfloat16, axis=-1
+    )
+
+    # Determine activation type and layout
+    # SWIGLUOAI uses interleaved layout (gate/up alternating)
+    # SILU uses chunked layout (first half gate, second half up)
+    use_interleaved = activation == MoEActivation.SWIGLUOAI
+    if activation in [MoEActivation.SWIGLUOAI, MoEActivation.SILU]:
+        act_name = "swiglu"
+    else:
+        act_name = "relu2"
+
+    ref = reference_moe(
+        router_logits,
+        topk,
+        num_experts,
+        x.to(torch.float32),
+        w13_dq.to(torch.float32),
+        w13_bias.to(torch.float32),
+        w2_dq.to(torch.float32),
+        w2_bias.to(torch.float32),
+        alpha=1.702 if activation == MoEActivation.SWIGLUOAI else 1.0,
+        beta=1.0 if activation == MoEActivation.SWIGLUOAI else 0.0,
+        limit=7.0 if activation == MoEActivation.SWIGLUOAI else None,
+        act_type="bf16",
+        activation=act_name,
+        use_interleaved_layout=use_interleaved,
+    )
+
+    # Compute and print accuracy statistics
+    diff = (ref.float() - out.float()).abs()
+    rel_diff = diff / (ref.float().abs() + 1e-6)
+
+    print(f"\n[{backend_name}] Accuracy statistics:")
+    print(
+        f"  Reference: min={ref.min():.4f}, max={ref.max():.4f}, mean={ref.mean():.4f}"
+    )
+    print(
+        f"  Output:    min={out.min():.4f}, max={out.max():.4f}, mean={out.mean():.4f}"
+    )
+    print(
+        f"  Abs diff:  min={diff.min():.4f}, max={diff.max():.4f}, "
+        f"mean={diff.mean():.4f}"
+    )
+    print(
+        f"  Rel diff:  min={rel_diff.min():.4f}, max={rel_diff.max():.4f}, "
+        f"mean={rel_diff.mean():.4f}"
+    )
+
+    # Check what percentage of values are within various tolerances
+    for rtol in [0.1, 0.5, 1.0, 2.0]:
+        within_tol = (diff <= rtol * out.float().abs()).float().mean()
+        print(f"  Within rtol={rtol}: {within_tol * 100:.1f}%")
+
+    # Check accuracy using per-backend thresholds
+    check_accuracy(ref, out, atol=0.1, rtol=config["rtol"], percent=config["percent"])
diff --git a/tests/kernels/moe/test_rocm_aiter_topk.py b/tests/kernels/moe/test_rocm_aiter_topk.py
index b0ecc9ed71f6..11f9cf28ae87 100644
--- a/tests/kernels/moe/test_rocm_aiter_topk.py
+++ b/tests/kernels/moe/test_rocm_aiter_topk.py
@@ -20,7 +20,7 @@
     pytest.skip("This test can only run on ROCm.", allow_module_level=True)
 
 # this import statement is needed to ensure the ops are registered
-import vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe  # noqa: F401
+import vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe  # noqa: F401
 
 # need to import once to ensure the ops are registered
 # Check if aiter package is installed
diff --git a/tests/kernels/moe/test_router_gemm.py b/tests/kernels/moe/test_router_gemm.py
deleted file mode 100644
index 906e47708f29..000000000000
--- a/tests/kernels/moe/test_router_gemm.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Tests for optimized router GEMM kernel
-
-Run `pytest tests/kernels/moe/test_router_gemm.py`.
-"""
-
-import pytest
-import torch
-
-import vllm._custom_ops as ops
-from vllm.platforms import current_platform
-from vllm.utils.torch_utils import set_random_seed
-
-
-@pytest.mark.skipif(
-    not (
-        current_platform.is_cuda()
-        and (
-            current_platform.is_device_capability(90)
-            or current_platform.is_device_capability_family(100)
-        )
-    ),
-    reason="This test only runs on Hopper or Blackwell GPUs.",
-)
-@pytest.mark.parametrize("batch_size", [1, 2, 4, 8])
-@pytest.mark.parametrize("input_dim", [360, 720, 1440, 2880])
-@pytest.mark.parametrize("output_dim", [32, 64, 128])
-def test_gpt_oss_router_gemm(batch_size, input_dim, output_dim):
-    set_random_seed(0)
-    x = torch.randn(batch_size, input_dim, device="cuda", dtype=torch.bfloat16)
-    weight = torch.randn(output_dim, input_dim, device="cuda", dtype=torch.bfloat16)
-    bias = torch.randn(output_dim, device="cuda", dtype=torch.bfloat16)
-
-    output = ops.gpt_oss_router_gemm(x, weight, bias)
-    output_ref = torch.nn.functional.linear(x, weight, bias)
-    torch.testing.assert_close(output, output_ref, atol=1e-2, rtol=1e-2)
diff --git a/tests/kernels/moe/test_routing.py b/tests/kernels/moe/test_routing.py
index 47c0fb8a2c5a..41dea8121938 100644
--- a/tests/kernels/moe/test_routing.py
+++ b/tests/kernels/moe/test_routing.py
@@ -8,6 +8,9 @@
 
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.distributed.eplb.eplb_state import EplbLayerState
+from vllm.model_executor.layers.fused_moe.router.base_router import (
+    eplb_map_to_physical_and_record,
+)
 from vllm.model_executor.layers.fused_moe.router.router_factory import (
     create_fused_moe_router,
 )
@@ -33,9 +36,11 @@ def _is_aiter_capable() -> bool:
 NUM_EXPERTS = [8, 16, 64]
 
 
-def setup_eplb_state(enable_eplb: bool, global_num_experts: int) -> EplbLayerState:
+def setup_eplb_state(
+    enable_eplb: bool, global_num_experts: int
+) -> EplbLayerState | None:
     if not enable_eplb:
-        return EplbLayerState()
+        return None
 
     # Initialize EPLB state with proper tensors for testing
     # For testing purposes, we use a simple 1:1 mapping (no redundant experts)
@@ -55,11 +60,13 @@ def setup_eplb_state(enable_eplb: bool, global_num_experts: int) -> EplbLayerSta
     logical_replica_count = torch.ones(
         global_num_experts, dtype=torch.int64, device="cuda"
     )
+    should_record_tensor = torch.ones((), dtype=torch.bool, device="cuda")
 
     return EplbLayerState(
         expert_load_view=expert_load_view,
         logical_to_physical_map=logical_to_physical_map,
         logical_replica_count=logical_replica_count,
+        should_record_tensor=should_record_tensor,
     )
 
 
@@ -344,7 +351,6 @@ def test_fused_topk(
         top_k=top_k,
         global_num_experts=global_num_experts,
         renormalize=renormalize,
-        enable_eplb=enable_eplb,
         eplb_state=eplb_state,
     )
 
@@ -395,7 +401,6 @@ def test_fused_topk_bias(
         top_k=top_k,
         global_num_experts=global_num_experts,
         renormalize=renormalize,
-        enable_eplb=enable_eplb,
         eplb_state=eplb_state,
     )
 
@@ -464,7 +469,6 @@ def test_grouped_topk(
         top_k=top_k,
         global_num_experts=global_num_experts,
         renormalize=renormalize,
-        enable_eplb=enable_eplb,
         eplb_state=eplb_state,
     )
 
@@ -535,7 +539,6 @@ def test_custom(
         global_num_experts=global_num_experts,
         custom_routing_function=custom_routing_function,
         renormalize=renormalize,
-        enable_eplb=enable_eplb,
         eplb_state=eplb_state,
     )
 
@@ -575,9 +578,207 @@ def test_custom(
 #     router = create_fused_moe_router(
 #         top_k=top_k,
 #         global_num_experts=global_num_experts,
-#         enable_eplb=enable_eplb,
 #         eplb_state=eplb_state,
 #     )
 
 #     hidden_states, router_logits = make_test_data(m, k, global_num_experts)
 #     topk_weights, topk_ids = router.select_experts(hidden_states, router_logits)
+
+
+# ---------------------------------------------------------------------------
+# Tests for eplb_map_to_physical_and_record
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("record_enabled", [True, False])
+@pytest.mark.parametrize(
+    "l2p_map, replica_count, num_physical, topk_ids, expected_out, expected_load",
+    [
+        pytest.param(
+            # logical i → physical i
+            [[0], [1], [2], [3]],
+            [1, 1, 1, 1],
+            4,
+            [[0, 1], [2, 3], [0, 2]],
+            [[0, 1], [2, 3], [0, 2]],
+            [2, 1, 2, 1],
+            id="identity",
+        ),
+        pytest.param(
+            # logical 0→3, 1→0, 2→1, 3→2
+            [[3], [0], [1], [2]],
+            [1, 1, 1, 1],
+            4,
+            [[0, 1], [2, 3], [0, 2]],
+            [[3, 0], [1, 2], [3, 1]],
+            [1, 2, 1, 2],
+            id="shuffled",
+        ),
+        pytest.param(
+            # logical 0→5, 1→2, 2→7, 3→0 in a larger physical space
+            [[5], [2], [7], [0]],
+            [1, 1, 1, 1],
+            8,
+            [[0, 1], [2, 3]],
+            [[5, 2], [7, 0]],
+            [1, 0, 1, 0, 0, 1, 0, 1],
+            id="sparse",
+        ),
+    ],
+)
+def test_eplb_map_no_redundancy(
+    record_enabled,
+    l2p_map,
+    replica_count,
+    num_physical,
+    topk_ids,
+    expected_out,
+    expected_load,
+):
+    l2p = torch.tensor(l2p_map, dtype=torch.int64, device="cuda")
+    rc = torch.tensor(replica_count, dtype=torch.int64, device="cuda")
+    load = torch.zeros(num_physical, dtype=torch.int32, device="cuda")
+    rec = torch.tensor(record_enabled, dtype=torch.bool, device="cuda")
+    ids = torch.tensor(topk_ids, dtype=torch.int32, device="cuda")
+
+    out = eplb_map_to_physical_and_record(
+        topk_ids=ids,
+        expert_load_view=load,
+        logical_to_physical_map=l2p,
+        logical_replica_count=rc,
+        record_enabled=rec,
+    )
+
+    exp_out = torch.tensor(expected_out, dtype=out.dtype, device="cuda")
+    torch.testing.assert_close(out, exp_out)
+
+    if record_enabled:
+        exp_load = torch.tensor(expected_load, dtype=torch.int32, device="cuda")
+        torch.testing.assert_close(load, exp_load)
+    else:
+        assert load.sum().item() == 0
+
+
+@pytest.mark.parametrize("top_k,R", [(2, 2), (4, 2), (8, 4), (8, 8)])
+def test_eplb_map_hot_expert_replica_balance(top_k, R):
+    """Hot logical expert with R replicas must be balanced across replicas
+    even when ``top_k`` is a multiple of ``R``. In that regime every top-k
+    offset for the hot expert lands on a multiple of ``top_k`` in the flat
+    ``topk_ids`` view, so per-replica assignment must not collapse onto a
+    single replica.
+    """
+    num_tokens = 8192
+    num_logical = 16
+    num_physical = R + (num_logical - 1)
+
+    l2p = torch.full((num_logical, R), -1, dtype=torch.int64, device="cuda")
+    l2p[0] = torch.arange(R, dtype=torch.int64, device="cuda")
+    for i in range(1, num_logical):
+        l2p[i, 0] = R + i - 1
+    rc = torch.tensor([R] + [1] * (num_logical - 1), dtype=torch.int64, device="cuda")
+
+    torch.manual_seed(0)
+    topk_ids = torch.randint(
+        1,
+        num_logical,
+        (num_tokens, top_k),
+        dtype=torch.int32,
+        device="cuda",
+    )
+    topk_ids[:, 0] = 0
+
+    load = torch.zeros(num_physical, dtype=torch.int32, device="cuda")
+    rec = torch.tensor(True, dtype=torch.bool, device="cuda")
+
+    eplb_map_to_physical_and_record(
+        topk_ids=topk_ids,
+        expert_load_view=load,
+        logical_to_physical_map=l2p,
+        logical_replica_count=rc,
+        record_enabled=rec,
+    )
+
+    hot_load = load[:R].float()
+    max_mean = (hot_load.max() / hot_load.mean()).item()
+    assert max_mean < 1.15, (
+        f"Hot expert replicas uneven: {hot_load.tolist()}, max/mean={max_mean:.3f}"
+    )
+
+
+@pytest.mark.parametrize("record_enabled", [True, False])
+@pytest.mark.parametrize(
+    "l2p_map, replica_count, num_physical, topk_ids, expected_out, expected_load",
+    [
+        pytest.param(
+            # experts 0,1 have 2 replicas; 2,3 have 1
+            [[0, 4], [1, 5], [2, -1], [3, -1]],
+            [2, 2, 1, 1],
+            6,
+            [[0, 1], [2, 3], [0, 2]],
+            # replica = (token_idx * KNUTH) & 0xFFFFFFFF % R.
+            # token 0 hash=0x00000000: %2=0, %1=0.
+            # token 1 hash=0x9E3779B9: %2=1, %1=0.
+            # token 2 hash=0x3C6EF372: %2=0, %1=0.
+            [[0, 1], [2, 3], [0, 2]],
+            [2, 1, 2, 1, 0, 0],
+            id="partial",
+        ),
+        pytest.param(
+            # all 4 experts have 2 replicas
+            [[0, 4], [1, 5], [2, 6], [3, 7]],
+            [2, 2, 2, 2],
+            8,
+            [[0, 1], [2, 3], [0, 2]],
+            # token 0 hash=0x00000000: %2=0.
+            # token 1 hash=0x9E3779B9: %2=1.
+            # token 2 hash=0x3C6EF372: %2=0.
+            [[0, 1], [6, 7], [0, 2]],
+            [2, 1, 1, 0, 0, 0, 1, 1],
+            id="full",
+        ),
+        pytest.param(
+            # expert 0: 4 replicas, experts 1,2: 2 replicas
+            [[0, 3, 5, 7], [1, 4, -1, -1], [2, 6, -1, -1]],
+            [4, 2, 2],
+            8,
+            [[0, 1], [2, 0], [1, 2]],
+            # token 0 hash=0x00000000: %4=0, %2=0.
+            # token 1 hash=0x9E3779B9: %4=1, %2=1.
+            # token 2 hash=0x3C6EF372: %4=2, %2=0.
+            [[0, 1], [6, 3], [1, 2]],
+            [1, 2, 1, 1, 0, 0, 1, 0],
+            id="uneven",
+        ),
+    ],
+)
+def test_eplb_map_with_redundancy(
+    record_enabled,
+    l2p_map,
+    replica_count,
+    num_physical,
+    topk_ids,
+    expected_out,
+    expected_load,
+):
+    l2p = torch.tensor(l2p_map, dtype=torch.int64, device="cuda")
+    rc = torch.tensor(replica_count, dtype=torch.int64, device="cuda")
+    load = torch.zeros(num_physical, dtype=torch.int32, device="cuda")
+    rec = torch.tensor(record_enabled, dtype=torch.bool, device="cuda")
+    ids = torch.tensor(topk_ids, dtype=torch.int32, device="cuda")
+
+    out = eplb_map_to_physical_and_record(
+        topk_ids=ids,
+        expert_load_view=load,
+        logical_to_physical_map=l2p,
+        logical_replica_count=rc,
+        record_enabled=rec,
+    )
+
+    exp_out = torch.tensor(expected_out, dtype=out.dtype, device="cuda")
+    torch.testing.assert_close(out, exp_out)
+
+    if record_enabled:
+        exp_load = torch.tensor(expected_load, dtype=torch.int32, device="cuda")
+        torch.testing.assert_close(load, exp_load)
+    else:
+        assert load.sum().item() == 0
diff --git a/tests/kernels/moe/test_shared_fused_moe_routed_transform.py b/tests/kernels/moe/test_shared_fused_moe_routed_transform.py
index 366009dce99a..4515021a4e91 100644
--- a/tests/kernels/moe/test_shared_fused_moe_routed_transform.py
+++ b/tests/kernels/moe/test_shared_fused_moe_routed_transform.py
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-Tests for SharedFusedMoE with routed_input_transform.
+Tests for FusedMoE with routed_input_transform.
 
-Verifies that applying routed_input_transform inside SharedFusedMoE
+Verifies that applying routed_input_transform inside FusedMoE
 produces the same results as applying the transform manually outside.
 """
 
@@ -13,9 +13,9 @@
 
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.forward_context import set_forward_context
-from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import is_torch_equal_or_newer
+from vllm.utils.torch_utils import is_torch_equal_or_newer, set_random_seed
 
 
 class SimpleLinear(nn.Module):
@@ -133,9 +133,9 @@ def test_routed_input_transform_inside_vs_outside(
     workspace_init,
     monkeypatch,
 ):
-    """Compare SharedFusedMoE with transform inside vs manually applying outside.
-    Method A (inside): SharedFusedMoE with routed_input_transform
-    Method B (outside): Manually transform, then SharedFusedMoE without transform
+    """Compare FusedMoE with transform inside vs manually applying outside.
+    Method A (inside): FusedMoE with routed_input_transform
+    Method B (outside): Manually transform, then FusedMoE without transform
     """
     if current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1" if use_rocm_aiter else "0")
@@ -144,8 +144,7 @@ def test_routed_input_transform_inside_vs_outside(
 
         rocm_aiter_ops.refresh_env_variables()
 
-    torch.manual_seed(42)
-    torch.cuda.manual_seed(42)
+    set_random_seed(42)
 
     num_experts = 8
     top_k = 2
@@ -158,15 +157,14 @@ def test_routed_input_transform_inside_vs_outside(
     routed_transform = SimpleLinear(hidden_size, latent_size, dtype)
 
     with set_current_vllm_config(vllm_config):
-        # Method A: SharedFusedMoE WITH routed_input_transform
-        moe_with_transform = SharedFusedMoE(
+        # Method A: FusedMoE WITH routed_input_transform
+        moe_with_transform = FusedMoE(
             shared_experts=shared_experts,
             routed_input_transform=routed_transform,
             num_experts=num_experts,
             top_k=top_k,
             hidden_size=latent_size,
             intermediate_size=intermediate_size,
-            reduce_results=False,
             renormalize=True,
             params_dtype=dtype,
             tp_size=1,
@@ -175,16 +173,15 @@ def test_routed_input_transform_inside_vs_outside(
             prefix="moe_with_transform",
         )
 
-        # Method B: SharedFusedMoE WITHOUT routed_input_transform
+        # Method B: FusedMoE WITHOUT routed_input_transform
         # Note: shared_experts=None because when transform is done outside,
-        moe_without_transform = SharedFusedMoE(
+        moe_without_transform = FusedMoE(
             shared_experts=None,
             routed_input_transform=None,
             num_experts=num_experts,
             top_k=top_k,
             hidden_size=latent_size,
             intermediate_size=intermediate_size,
-            reduce_results=False,
             renormalize=True,
             params_dtype=dtype,
             tp_size=1,
@@ -213,34 +210,20 @@ def test_routed_input_transform_inside_vs_outside(
         hidden_states = torch.randn(num_tokens, hidden_size, device="cuda", dtype=dtype)
         router_logits = torch.randn(num_tokens, num_experts, device="cuda", dtype=dtype)
 
-        # Clone inputs so any in-place modification by Method A
-        # cannot affect Method B's computation.
-        hidden_states_A = hidden_states.clone()
-        router_logits_A = router_logits.clone()
-
         with set_forward_context(None, vllm_config, num_tokens=num_tokens):
-            shared_out_A, routed_out_A = moe_with_transform(
-                hidden_states_A, router_logits_A
-            )
+            # Method A: combined output (shared + routed)
+            combined_A = moe_with_transform(hidden_states, router_logits)
 
+            # Method B: manually transform, get routed output, add shared
             transformed_hidden = routed_transform(hidden_states)
-            shared_out_B, routed_out_B = moe_without_transform(
-                transformed_hidden, router_logits
-            )
-
-        expected_shared_out = shared_experts(hidden_states)
+            routed_out_B = moe_without_transform(transformed_hidden, router_logits)
+            shared_out_B = shared_experts(hidden_states)
+            combined_B = shared_out_B + routed_out_B
 
-        _assert_close(
-            routed_out_A,
-            routed_out_B,
-            atol=1e-3,
-            rtol=1e-3,
-            label="Routed output: transform inside vs outside",
-        )
-        _assert_close(
-            shared_out_A,
-            expected_shared_out,
+        torch.testing.assert_close(
+            combined_A,
+            combined_B,
             atol=1e-3,
             rtol=1e-3,
-            label="Shared expert output",
+            msg="Combined output should match: transform inside vs outside",
         )
diff --git a/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py b/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py
index 4a447ba7cfd9..6a5fad8262c2 100644
--- a/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py
+++ b/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py
@@ -7,14 +7,18 @@
 import pytest
 import torch
 
-from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
     persistent_masked_m_silu_mul_quant,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     get_fp8_min_max,
 )
 from vllm.platforms import current_platform
-from vllm.utils.deep_gemm import DeepGemmQuantScaleFMT, has_deep_gemm
+from vllm.utils.deep_gemm import (
+    DeepGemmQuantScaleFMT,
+    has_deep_gemm,
+    transform_sf_into_required_layout,
+)
 from vllm.utils.math_utils import cdiv, round_up
 from vllm.utils.torch_utils import set_random_seed
 
@@ -256,8 +260,6 @@ def test_silu_mul_fp8_quant_deep_gemm(E: int, T: int, H: int, fp8_type: torch.dt
             and current_platform.has_device_capability(100)
             and scale_fmt == DeepGemmQuantScaleFMT.UE8M0
         ):
-            from deep_gemm import transform_sf_into_required_layout
-
             _q, _s = ref_with_scale_fmt(
                 E,
                 T,
diff --git a/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py b/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py
index cca02928b498..cb01db44f8cb 100644
--- a/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py
+++ b/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py
@@ -66,6 +66,24 @@ def reference(x: torch.Tensor, use_ue8m0: bool) -> tuple[torch.Tensor, torch.Ten
     return reference_quant(ref_act_out, use_ue8m0)
 
 
+def reference_with_clamp(
+    x: torch.Tensor, use_ue8m0: bool, clamp_limit: float
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Pre-clamp inputs (gate from above, up symmetric) at the input dtype to
+    match the C++ compute() template, then run the standard silu_and_mul +
+    quant reference."""
+    N_2 = x.size(1) // 2
+    dtype = x.dtype
+    gate = x[..., :N_2].to(torch.float32).clamp(max=clamp_limit).to(dtype)
+    up = (
+        x[..., N_2:]
+        .to(torch.float32)
+        .clamp(min=-clamp_limit, max=clamp_limit)
+        .to(dtype)
+    )
+    return reference(torch.cat([gate, up], dim=-1), use_ue8m0)
+
+
 @pytest.mark.parametrize("T", [128, 256, 512])
 @pytest.mark.parametrize("N", [128 * 2, 256 * 2, 768 * 2, 2048 * 2, 7168 * 2])
 @pytest.mark.skipif(
@@ -89,3 +107,32 @@ def test_silu_mul_fp8_quant_deep_gemm(T: int, N: int):
 
     torch.testing.assert_close(output.to(torch.float32), ref_output.to(torch.float32))
     torch.testing.assert_close(output_scales, ref_output_scales)
+
+
+@pytest.mark.parametrize("T", [128, 256, 512])
+@pytest.mark.parametrize("N", [128 * 2, 256 * 2, 768 * 2, 2048 * 2, 7168 * 2])
+@pytest.mark.parametrize("clamp_limit", [7.0, 10.0])
+@pytest.mark.skipif(
+    current_platform.is_rocm(),
+    reason="ROCm does not support DeepGemm.",
+)
+def test_silu_mul_fp8_quant_deep_gemm_clamp(T: int, N: int, clamp_limit: float):
+    set_random_seed(42)
+
+    # Use a wide distribution so values routinely exceed both clamp limits and
+    # the clamp branch is actually exercised (uniform [0, 1) inputs would never
+    # trigger it).
+    input = torch.randn((T, N), dtype=torch.bfloat16, device="cuda") * 8.0
+
+    use_ue8m0 = is_deep_gemm_e8m0_used()
+
+    # Test
+    output, output_scales = silu_mul_per_token_group_quant_fp8_colmajor(
+        input, use_ue8m0=use_ue8m0, clamp_limit=clamp_limit
+    )
+
+    # Reference
+    ref_output, ref_output_scales = reference_with_clamp(input, use_ue8m0, clamp_limit)
+
+    torch.testing.assert_close(output.to(torch.float32), ref_output.to(torch.float32))
+    torch.testing.assert_close(output_scales, ref_output_scales)
diff --git a/tests/kernels/moe/test_topk_softplus_sqrt.py b/tests/kernels/moe/test_topk_softplus_sqrt.py
new file mode 100644
index 000000000000..1b68213fafef
--- /dev/null
+++ b/tests/kernels/moe/test_topk_softplus_sqrt.py
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from vllm.model_executor.layers.fused_moe.config import (
+    RoutingMethodType,
+    get_routing_method_type,
+)
+from vllm.model_executor.layers.fused_moe.router.fused_topk_bias_router import (
+    fused_topk_bias,
+)
+from vllm.platforms import current_platform
+
+
+def _torch_topk_softplus_sqrt(
+    gating_output: torch.Tensor,
+    topk: int,
+    renormalize: bool,
+    routed_scaling_factor: float,
+    e_score_correction_bias: torch.Tensor | None = None,
+    input_ids: torch.Tensor | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+):
+    scores = F.softplus(gating_output.float()).sqrt()
+    original_scores = scores
+    if e_score_correction_bias is not None:
+        scores_for_choice = scores + e_score_correction_bias.unsqueeze(0)
+    else:
+        scores_for_choice = scores
+
+    if hash_indices_table is not None:
+        assert input_ids is not None
+        topk_ids = hash_indices_table[input_ids.long()]
+    else:
+        topk_ids = torch.topk(scores_for_choice, k=topk, dim=-1, sorted=True)[1]
+
+    topk_weights = original_scores.gather(1, topk_ids.long())
+    if renormalize:
+        topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
+    if routed_scaling_factor != 1.0:
+        topk_weights = topk_weights * routed_scaling_factor
+    return topk_weights.to(torch.float32), topk_ids.to(torch.int32)
+
+
+def test_sqrtsoftplus_bias_uses_deepseek_v4_routing_method():
+    assert (
+        get_routing_method_type(
+            scoring_func="sqrtsoftplus",
+            top_k=8,
+            renormalize=True,
+            num_expert_group=None,
+            has_e_score_bias=True,
+        )
+        == RoutingMethodType.DeepseekV4
+    )
+    assert (
+        get_routing_method_type(
+            scoring_func="sqrtsoftplus",
+            top_k=8,
+            renormalize=False,
+            num_expert_group=None,
+            has_e_score_bias=True,
+        )
+        == RoutingMethodType.Unspecified
+    )
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="This test is skipped on non-CUDA platform.",
+)
+@pytest.mark.parametrize("num_tokens", [1, 33, 128])
+@pytest.mark.parametrize("hidden_size", [1024, 2048])
+@pytest.mark.parametrize("num_experts", [128, 256, 384, 512])
+@pytest.mark.parametrize("topk", [6, 8, 16])
+@pytest.mark.parametrize("renormalize", [True, False])
+@pytest.mark.parametrize("routed_scaling_factor", [1.0, 1.5])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.half, torch.float32])
+def test_fused_topk_softplus_sqrt(
+    num_tokens: int,
+    hidden_size: int,
+    num_experts: int,
+    topk: int,
+    renormalize: bool,
+    routed_scaling_factor: float,
+    dtype: torch.dtype,
+):
+    torch.manual_seed(0)
+    hidden_states = torch.randn((num_tokens, hidden_size), dtype=dtype, device="cuda")
+    gating_output = torch.randn((num_tokens, num_experts), dtype=dtype, device="cuda")
+    e_score_correction_bias = torch.randn(
+        (num_experts,), dtype=torch.float32, device="cuda"
+    )
+
+    topk_weights_ref, topk_ids_ref = _torch_topk_softplus_sqrt(
+        gating_output=gating_output,
+        topk=topk,
+        renormalize=renormalize,
+        routed_scaling_factor=routed_scaling_factor,
+        e_score_correction_bias=e_score_correction_bias,
+    )
+
+    topk_weights, topk_ids = fused_topk_bias(
+        hidden_states=hidden_states,
+        gating_output=gating_output,
+        scoring_func="sqrtsoftplus",
+        e_score_correction_bias=e_score_correction_bias,
+        topk=topk,
+        renormalize=renormalize,
+        routed_scaling_factor=routed_scaling_factor,
+    )
+
+    # Different kernels may return the topk experts in different orders when
+    # scores tie; sort by expert id before comparing.
+    sorted_ref_ids, idx_ref = topk_ids_ref.sort(dim=-1)
+    sorted_ids, idx_ops = topk_ids.sort(dim=-1)
+    torch.testing.assert_close(sorted_ref_ids, sorted_ids, atol=0, rtol=0)
+
+    sorted_w_ref = topk_weights_ref.gather(1, idx_ref)
+    sorted_w = topk_weights.gather(1, idx_ops)
+    torch.testing.assert_close(sorted_w_ref, sorted_w, atol=2e-2, rtol=1e-2)
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="This test is skipped on non-CUDA platform.",
+)
+@pytest.mark.parametrize("num_tokens", [1, 33, 128])
+@pytest.mark.parametrize("hidden_size", [1024, 2048])
+@pytest.mark.parametrize("num_experts", [256, 384, 512])
+@pytest.mark.parametrize("topk", [6, 8, 16])
+@pytest.mark.parametrize("renormalize", [True, False])
+@pytest.mark.parametrize("routed_scaling_factor", [1.0, 2.5])
+@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.half, torch.float32])
+def test_fused_topk_softplus_sqrt_hash(
+    num_tokens: int,
+    hidden_size: int,
+    num_experts: int,
+    topk: int,
+    renormalize: bool,
+    routed_scaling_factor: float,
+    dtype: torch.dtype,
+):
+    torch.manual_seed(0)
+    vocab_size = 1024
+    hidden_states = torch.randn((num_tokens, hidden_size), dtype=dtype, device="cuda")
+    gating_output = torch.randn((num_tokens, num_experts), dtype=dtype, device="cuda")
+    # Per-token fixed expert selection: for each vocab id pick `topk` distinct
+    # experts.
+    hash_indices_table = torch.stack(
+        [torch.randperm(num_experts)[:topk] for _ in range(vocab_size)]
+    ).to(device="cuda", dtype=torch.int32)
+    input_ids = torch.randint(
+        0, vocab_size, (num_tokens,), dtype=torch.int32, device="cuda"
+    )
+
+    topk_weights_ref, topk_ids_ref = _torch_topk_softplus_sqrt(
+        gating_output=gating_output,
+        topk=topk,
+        renormalize=renormalize,
+        routed_scaling_factor=routed_scaling_factor,
+        input_ids=input_ids,
+        hash_indices_table=hash_indices_table,
+    )
+
+    topk_weights, topk_ids = fused_topk_bias(
+        hidden_states=hidden_states,
+        gating_output=gating_output,
+        scoring_func="sqrtsoftplus",
+        e_score_correction_bias=None,
+        topk=topk,
+        renormalize=renormalize,
+        input_tokens=input_ids,
+        hash_indices_table=hash_indices_table,
+        routed_scaling_factor=routed_scaling_factor,
+    )
+
+    sorted_ref_ids, idx_ref = topk_ids_ref.sort(dim=-1)
+    sorted_ids, idx_ops = topk_ids.sort(dim=-1)
+    torch.testing.assert_close(sorted_ref_ids, sorted_ids, atol=0, rtol=0)
+
+    sorted_w_ref = topk_weights_ref.gather(1, idx_ref)
+    sorted_w = topk_weights.gather(1, idx_ops)
+    torch.testing.assert_close(sorted_w_ref, sorted_w, atol=2e-2, rtol=1e-2)
diff --git a/tests/kernels/moe/test_triton_moe_no_act_mul.py b/tests/kernels/moe/test_triton_moe_no_act_mul.py
index 1dfac3cf0fdc..9d16ae5b63db 100644
--- a/tests/kernels/moe/test_triton_moe_no_act_mul.py
+++ b/tests/kernels/moe/test_triton_moe_no_act_mul.py
@@ -15,7 +15,7 @@
 from vllm.model_executor.layers.fused_moe.config import (
     FUSED_MOE_UNQUANTIZED_CONFIG,
 )
-from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
 from vllm.platforms import current_platform
 
 # Test parameters
@@ -151,7 +151,7 @@ def test_triton_experts_no_mul_activation(
 @torch.inference_mode()
 def test_workspace_shapes_no_mul_vs_gated():
     """Test that workspace shapes differ correctly between gated and non-gated."""
-    from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
+    from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
 
     M, N, K, topk = 64, 256, 128, 2
 
@@ -192,7 +192,7 @@ def test_workspace_shapes_no_mul_vs_gated():
 @torch.inference_mode()
 def test_adjust_n_for_activation():
     """Test the adjust_N_for_activation method."""
-    from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
+    from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
 
     experts = TritonExperts(
         moe_config=make_dummy_moe_config(),
diff --git a/tests/kernels/moe/test_trtllm_nvfp4_moe.py b/tests/kernels/moe/test_trtllm_nvfp4_moe.py
new file mode 100644
index 000000000000..ca10ebaddce0
--- /dev/null
+++ b/tests/kernels/moe/test_trtllm_nvfp4_moe.py
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for the FlashInfer TRTLLM NvFP4 MoE backend
+(`TrtLlmNvFp4ExpertsModular`).
+
+Covers the activations the wrapper claims to support — SiLU, RELU^2 (non-gated),
+and GELU — including a Gemma4-shaped case (128 experts, top-k 8,
+intermediate_size 704) that exercises the non-256-aligned padding path.
+"""
+
+import pytest
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from tests.kernels.moe.utils import make_test_quant_config
+from tests.kernels.quantization.nvfp4_utils import (
+    FLOAT4_E2M1_MAX,
+    FLOAT8_E4M3_MAX,
+    dequantize_nvfp4_to_dtype,
+)
+from tests.kernels.utils import torch_moe
+from vllm import _custom_ops as ops
+from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
+from vllm.model_executor.custom_op import CustomOp, op_registry
+from vllm.model_executor.layers.activation import SiluAndMulWithClamp
+from vllm.model_executor.layers.fused_moe import fused_topk
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.fused_moe.experts.trtllm_nvfp4_moe import (
+    TrtLlmNvFp4ExpertsModular,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import has_flashinfer_trtllm_fused_moe
+from vllm.utils.math_utils import next_power_of_2
+from vllm.utils.torch_utils import set_random_seed
+
+if pytest and (
+    not has_flashinfer_trtllm_fused_moe()
+    or not current_platform.has_device_capability(100)
+):
+    pytest.skip(
+        "Requires flashinfer TRTLLM fused MoE and NvFP4 (SM100)",
+        allow_module_level=True,
+    )
+
+# (m, n, k) = (tokens, intermediate_size_per_partition, hidden_dim).
+# The (64, 704, 4096) row matches Gemma4's MoE shape and exercises the
+# non-256-aligned intermediate (padded inside the wrapper).
+MNK_FACTORS = [
+    (2, 1024, 1024),
+    (64, 2048, 1536),
+    (64, 704, 4096),
+]
+
+_SWIGLU_LIMIT = 0.1
+_LARGE_OUTPUT1_SCALE = 32768.0
+_CLAMP_OP_NAME = "test_silu_and_mul_with_clamp"
+
+# Test-only fixed-limit clamp. ``custom_op_name`` makes the class itself
+# valid as an ``activation=`` argument to ``torch_moe`` (which only looks
+# up ``activation.custom_op_name`` in ``op_registry``), so no
+# ``MoEActivation`` enum extension is needed.
+if _CLAMP_OP_NAME not in op_registry:
+
+    @CustomOp.register(_CLAMP_OP_NAME)
+    class _SiluAndMulWithClampTest(SiluAndMulWithClamp):
+        custom_op_name = _CLAMP_OP_NAME
+
+        def __init__(self, *, compile_native: bool = True) -> None:
+            super().__init__(_SWIGLU_LIMIT, compile_native=compile_native)
+
+
+SILU_WITH_CLAMP = op_registry[_CLAMP_OP_NAME]
+
+
+ACTIVATION_CASES = [
+    pytest.param(MoEActivation.SILU, MoEActivation.SILU, None, id="silu"),
+    pytest.param(MoEActivation.SILU, SILU_WITH_CLAMP, _SWIGLU_LIMIT, id="silu_clamp"),
+    pytest.param(
+        MoEActivation.RELU2_NO_MUL,
+        MoEActivation.RELU2_NO_MUL,
+        None,
+        id="relu2_no_mul",
+    ),
+    pytest.param(MoEActivation.GELU, MoEActivation.GELU, None, id="gelu"),
+]
+
+
+@pytest.mark.parametrize("m,n,k", MNK_FACTORS)
+@pytest.mark.parametrize("e", [128])
+@pytest.mark.parametrize("topk", [8])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+@pytest.mark.parametrize("activation,torch_activation,swiglu_limit", ACTIVATION_CASES)
+@torch.inference_mode()
+def test_trtllm_fp4_moe_no_graph(
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    topk: int,
+    dtype: torch.dtype,
+    activation: MoEActivation,
+    torch_activation: MoEActivation | type[SiluAndMulWithClamp],
+    swiglu_limit: float | None,
+    workspace_init,
+):
+    # FlashInfer's trtllm_batched_gemm_runner has no precompiled tile
+    # config for non-gated RELU^2 at non-256-aligned intermediate_size
+    # (e.g. Gemma4's 704). Other activations (SiLU/GELU) work at the
+    # same shape. Tracked upstream in FlashInfer; unrelated to this
+    # PR's GELU enablement (Gemma4 uses GeGLU, not non-gated RELU^2).
+    if activation == MoEActivation.RELU2_NO_MUL and (m, n, k) == (64, 704, 4096):
+        pytest.skip(
+            "FlashInfer trtllm_batched_gemm_runner: no valid tile config "
+            "for non-gated RELU^2 at intermediate_size=704 "
+            "(getValidConfigIndices throws). Tracked upstream."
+        )
+
+    set_random_seed(7)
+    with set_current_vllm_config(
+        VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
+    ):
+        a = torch.randn((m, k), device="cuda", dtype=dtype) / 10
+
+        quant_blocksize = 16
+        is_gated_act = activation.is_gated
+
+        w1_q, w2_q, quant_config = make_test_quant_config(
+            e,
+            n,
+            k,
+            in_dtype=dtype,
+            quant_dtype="nvfp4",
+            block_shape=None,
+            per_act_token_quant=False,
+            make_gate=is_gated_act,
+            # The TRT-LLM FP4 MoE kernel rejects swizzled (padded) activation
+            # scales — its numel-based vec_size check requires numel == M*K/16.
+            # Match what oracle/nvfp4.py does for this backend.
+            is_scale_swizzled=False,
+        )
+        quant_config.gemm1_clamp_limit = swiglu_limit
+        if swiglu_limit is not None:
+            assert quant_config.g1_alphas is not None
+            assert quant_config.a2_gscale is not None
+            assert torch.all(quant_config.a2_gscale == 1)
+            # With a2_gscale == 1, g1_alphas is the TRTLLM
+            # output1_scale_gate_scalar. Make it large enough to catch
+            # clamp/output-scale coupling in the FlashInfer kernel wrapper.
+            quant_config.g1_alphas.fill_(_LARGE_OUTPUT1_SCALE)
+
+        score = torch.randn((m, e), device="cuda", dtype=dtype)
+        topk_weights, topk_ids, _ = fused_topk(a, score, topk, renormalize=False)
+
+        moe_config = FusedMoEConfig(
+            num_experts=e,
+            experts_per_token=topk,
+            hidden_dim=k,
+            intermediate_size_per_partition=n,
+            num_local_experts=e,
+            num_logical_experts=e,
+            activation=activation,
+            device="cuda",
+            moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
+            in_dtype=dtype,
+            is_act_and_mul=is_gated_act,
+            routing_method=RoutingMethodType.TopK,
+            max_num_tokens=next_power_of_2(m),
+        )
+
+        trtllm_inner = TrtLlmNvFp4ExpertsModular(
+            moe_config=moe_config, quant_config=quant_config
+        )
+        # Mimic the production weight-loader path so per-expert tensors that
+        # are normally precomputed in process_weights_after_loading (g1_scale_c
+        # and the rescaled gemm1_clamp_limit) get materialized. The test's
+        # synthetic quant_config has g1_alphas/g2_alphas already at their
+        # post-fusion values, so we set w13_weight_scale_2 to alias g1_alphas
+        # (same tensor) and use input_scale=1 to make the in-place
+        # weight_scale_2 *= input_scale step a no-op.
+        fake_layer = torch.nn.Module()
+        fake_layer.w13_weight_scale_2 = quant_config.g1_alphas
+        fake_layer.w2_weight_scale_2 = quant_config.g2_alphas
+        fake_layer.w13_input_scale = torch.ones_like(quant_config.g1_alphas)
+        fake_layer.w2_input_scale = torch.ones_like(quant_config.g2_alphas)
+        trtllm_inner.process_weights_after_loading(fake_layer)
+
+        trtllm_experts = mk.FusedMoEKernel(
+            maybe_make_prepare_finalize(
+                moe=moe_config,
+                quant_config=quant_config,
+                allow_new_interface=True,
+                use_monolithic=False,
+            ),
+            trtllm_inner,
+            inplace=False,
+        )
+
+        trtllm_output = trtllm_experts.apply(
+            hidden_states=a,
+            w1=w1_q,
+            w2=w2_q,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=activation,
+            global_num_experts=e,
+            expert_map=None,
+            apply_router_weight_on_input=False,
+        )
+
+        # Reference: round-trip activations and weights through FP4
+        # quant/dequant so the comparison isolates kernel/activation behavior
+        # from quantization error.
+        a_global_scale = ((FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX) / a.abs().max()).to(
+            torch.float32
+        )
+        a_fp4, a_scale_interleaved = ops.scaled_fp4_quant(a, a_global_scale)
+        a_in_dtype = dequantize_nvfp4_to_dtype(
+            a_fp4,
+            a_scale_interleaved,
+            a_global_scale,
+            dtype=a.dtype,
+            device=a.device,
+            block_size=quant_blocksize,
+        )
+
+        w1_d = torch.empty(
+            (e, (2 if is_gated_act else 1) * n, k), device="cuda", dtype=dtype
+        )
+        w2_d = torch.empty((e, k, n), device="cuda", dtype=dtype)
+        for idx in range(e):
+            w1_d[idx] = dequantize_nvfp4_to_dtype(
+                w1_q[idx],
+                quant_config.w1_scale[idx],
+                (1 / quant_config.g1_alphas[idx]),
+                dtype=dtype,
+                device=w1_q.device,
+                block_size=quant_blocksize,
+            )
+            w2_d[idx] = dequantize_nvfp4_to_dtype(
+                w2_q[idx],
+                quant_config.w2_scale[idx],
+                (1 / quant_config.g2_alphas[idx]),
+                dtype=dtype,
+                device=w2_q.device,
+                block_size=quant_blocksize,
+            )
+
+        torch_output = torch_moe(
+            a_in_dtype, w1_d, w2_d, score, topk, activation=torch_activation
+        )
+
+        torch.testing.assert_close(torch_output, trtllm_output, atol=2e-1, rtol=2e-1)
+
+
+if __name__ == "__main__":
+    test_trtllm_fp4_moe_no_graph(
+        64,
+        704,
+        4096,
+        128,
+        8,
+        torch.bfloat16,
+        MoEActivation.GELU,
+        MoEActivation.GELU,
+        None,
+        None,
+    )
diff --git a/tests/kernels/moe/test_unquantized_backend_selection.py b/tests/kernels/moe/test_unquantized_backend_selection.py
index 1d9e1d685756..bc322aed3903 100644
--- a/tests/kernels/moe/test_unquantized_backend_selection.py
+++ b/tests/kernels/moe/test_unquantized_backend_selection.py
@@ -11,6 +11,11 @@
 )
 from vllm.platforms import current_platform
 
+skipif_not_cuda_rocm = pytest.mark.skipif(
+    not (current_platform.is_cuda() or current_platform.is_rocm()),
+    reason="Only supported on CUDA/ROCm platforms.",
+)
+
 
 @pytest.mark.parametrize(
     "platform_method,expected_backend",
@@ -24,7 +29,7 @@
     ],
 )
 @patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
+    "vllm.utils.flashinfer.has_flashinfer",
     return_value=False,
 )
 @patch(
@@ -54,18 +59,33 @@ def test_select_default_backend_by_platform(
         # Set only the specified platform to True
         getattr(mock_platform, platform_method).return_value = True
 
+    with (
+        patch.object(current_platform, "is_cuda", return_value=False),
+        patch.object(current_platform, "is_rocm", return_value=False),
+        patch.object(current_platform, "is_cpu", return_value=False),
+        patch.object(current_platform, "is_xpu", return_value=False),
+        patch.object(current_platform, "is_tpu", return_value=False),
+        patch.object(current_platform, "is_out_of_tree", return_value=False),
+        patch.object(current_platform, platform_method, return_value=True),
+    ):
         moe_config = make_dummy_moe_config()
-        selected_backend = select_unquantized_moe_backend(
-            moe_config=moe_config,
-            use_ep=False,
-            use_dp=False,
+        selected_backend, expert_cls = select_unquantized_moe_backend(
+            moe_config=moe_config
         )
 
         assert selected_backend == expected_backend
+        if expected_backend in [
+            UnquantizedMoeBackend.CPU,
+            UnquantizedMoeBackend.OOT,
+            UnquantizedMoeBackend.TPU,
+        ]:
+            assert expert_cls is None
+        else:
+            assert expert_cls is not None
 
 
 @patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
+    "vllm.utils.flashinfer.has_flashinfer",
     return_value=False,
 )
 @patch(
@@ -88,91 +108,170 @@ def test_select_rocm_aiter_backend(mock_aiter_enabled, mock_has_flashinfer):
         mock_platform.is_out_of_tree.return_value = False
 
         moe_config = make_dummy_moe_config()
-        selected_backend = select_unquantized_moe_backend(
+        selected_backend, expert_cls = select_unquantized_moe_backend(
             moe_config=moe_config,
-            use_ep=False,
-            use_dp=False,
         )
 
         assert selected_backend == UnquantizedMoeBackend.AITER
+        assert expert_cls is not None
 
 
 @patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
-    return_value=True,
-)
-@patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.is_supported_config_trtllm_bf16",
+    "vllm.model_executor.layers.fused_moe.experts.trtllm_bf16_moe.TrtLlmBf16Experts.is_supported_config",
     return_value=(True, None),
 )
 @pytest.mark.skipif(
     not current_platform.is_cuda(), reason="Only supported on NVIDIA platforms."
 )
-def test_select_cuda_flashinfer_trtllm_backend(
-    mock_has_flashinfer, mock_is_supported_trtllm, monkeypatch
-):
+def test_select_cuda_flashinfer_trtllm_backend(mock_is_supported_trtllm, monkeypatch):
     """Test CUDA backend selection when FlashInfer TRTLLM is available and enabled."""
-    with patch(
-        "vllm.model_executor.layers.fused_moe.oracle.unquantized.current_platform"
-    ) as mock_platform:
-        # Set as CUDA platform
-        mock_platform.is_cuda.return_value = True
-        mock_platform.is_rocm.return_value = False
-        mock_platform.is_cpu.return_value = False
-        mock_platform.is_xpu.return_value = False
-        mock_platform.is_tpu.return_value = False
-        mock_platform.is_out_of_tree.return_value = False
-
+    with (
+        patch.object(current_platform, "is_cuda", return_value=True),
+        patch.object(current_platform, "is_rocm", return_value=False),
+        patch.object(current_platform, "is_cpu", return_value=False),
+        patch.object(current_platform, "is_xpu", return_value=False),
+        patch.object(current_platform, "is_tpu", return_value=False),
+        patch.object(current_platform, "is_out_of_tree", return_value=False),
+        patch.object(current_platform, "has_device_capability", return_value=True),
+    ):
         monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP16", "1")
 
         moe_config = make_dummy_moe_config()
+        # TRTLLM requires EP and does not support DP
+        moe_config.moe_parallel_config.use_ep = True
+        moe_config.moe_parallel_config.use_dp = False
 
-        selected_backend = select_unquantized_moe_backend(
-            moe_config=moe_config,
-            use_ep=True,
-            use_dp=False,
+        selected_backend, experts_cls = select_unquantized_moe_backend(
+            moe_config=moe_config
         )
 
         assert selected_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM
+        assert experts_cls is not None
 
 
 @patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
+    "vllm.utils.flashinfer.has_flashinfer",
     return_value=True,
 )
 @patch(
-    "vllm.model_executor.layers.fused_moe.oracle.unquantized.is_supported_config_trtllm_bf16",
+    "vllm.model_executor.layers.fused_moe.experts.trtllm_bf16_moe.TrtLlmBf16Experts.is_supported_config",
     return_value=(False, None),
 )
+@patch(
+    "vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe.FlashInferExperts.is_supported_config",
+    return_value=(True, None),
+)
 @pytest.mark.skipif(
     not current_platform.is_cuda(), reason="Only supported on NVIDIA platforms."
 )
 def test_select_cuda_flashinfer_cutlass_backend(
-    mock_has_flashinfer, mock_is_supported_trtllm, monkeypatch
+    mock_has_flashinfer,
+    mock_is_supported_trtllm,
+    mock_is_supported_cutlass,
+    monkeypatch,
 ):
     """Test CUDA backend selection when FlashInfer TRTLLM is not available
     and FlashInfer CUTLASS is available."""
-    with patch(
-        "vllm.model_executor.layers.fused_moe.oracle.unquantized.current_platform"
-    ) as mock_platform:
-        # Set as CUDA platform with Hopper capability
-        mock_platform.is_cuda.return_value = True
-        mock_platform.is_rocm.return_value = False
-        mock_platform.is_cpu.return_value = False
-        mock_platform.is_xpu.return_value = False
-        mock_platform.is_tpu.return_value = False
-        mock_platform.is_out_of_tree.return_value = False
-        mock_platform.has_device_capability.return_value = True  # SM90+
-
+    with (
+        patch.object(current_platform, "is_cuda", return_value=True),
+        patch.object(current_platform, "is_rocm", return_value=False),
+        patch.object(current_platform, "is_cpu", return_value=False),
+        patch.object(current_platform, "is_xpu", return_value=False),
+        patch.object(current_platform, "is_tpu", return_value=False),
+        patch.object(current_platform, "is_out_of_tree", return_value=False),
+        patch.object(current_platform, "has_device_capability", return_value=True),
+    ):
         # Enable FlashInfer via env var
         monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP16", "1")
 
         moe_config = make_dummy_moe_config()
+        # CUTLASS requires EP and does not support DP
+        moe_config.moe_parallel_config.use_ep = True
+        moe_config.moe_parallel_config.use_dp = False
 
-        selected_backend = select_unquantized_moe_backend(
-            moe_config=moe_config,
-            use_ep=True,  # CUTLASS requires EP
-            use_dp=False,  # CUTLASS doesn't support DP
+        selected_backend, experts_cls = select_unquantized_moe_backend(
+            moe_config=moe_config
         )
 
         assert selected_backend == UnquantizedMoeBackend.FLASHINFER_CUTLASS
+        assert experts_cls is not None
+
+
+@skipif_not_cuda_rocm
+def test_select_lora_backend_prefers_triton():
+    """LoRA-enabled unquantized MoE should select Triton backend."""
+    moe_config = make_dummy_moe_config()
+    moe_config.is_lora_enabled = True
+    selected_backend, experts_cls = select_unquantized_moe_backend(
+        moe_config=moe_config
+    )
+
+    assert selected_backend == UnquantizedMoeBackend.TRITON
+    assert experts_cls is not None
+
+
+@skipif_not_cuda_rocm
+def test_select_lora_explicit_non_triton_backend():
+    """LoRA should override explicit non-Triton backend to Triton."""
+    moe_config = make_dummy_moe_config()
+    moe_config.is_lora_enabled = True
+
+    # Use string from mapping in function map_unquantized_backend()
+    moe_config.moe_backend = "flashinfer_cutlass"
+
+    selected_backend, experts_cls = select_unquantized_moe_backend(
+        moe_config=moe_config
+    )
+
+    assert selected_backend == UnquantizedMoeBackend.TRITON
+    assert experts_cls is not None
+
+
+@skipif_not_cuda_rocm
+@pytest.mark.parametrize("is_lora_enabled", [False, True])
+def test_select_explicit_triton_backend(is_lora_enabled):
+    """Explicit triton backend selection should return Triton."""
+    moe_config = make_dummy_moe_config()
+    moe_config.is_lora_enabled = is_lora_enabled
+    moe_config.moe_backend = "triton"
+
+    selected_backend, experts_cls = select_unquantized_moe_backend(
+        moe_config=moe_config
+    )
+
+    assert selected_backend == UnquantizedMoeBackend.TRITON
+    assert experts_cls is not None
+
+
+@skipif_not_cuda_rocm
+def test_select_explicit_triton_ignores_flashinfer_env(monkeypatch):
+    """Explicit triton backend should override FlashInfer env selection."""
+    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP16", "1")
+    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput")
+
+    moe_config = make_dummy_moe_config()
+    moe_config.is_lora_enabled = False
+    moe_config.moe_backend = "triton"
+
+    selected_backend, experts_cls = select_unquantized_moe_backend(
+        moe_config=moe_config
+    )
+
+    assert selected_backend == UnquantizedMoeBackend.TRITON
+    assert experts_cls is not None
+
+
+@skipif_not_cuda_rocm
+def test_select_lora_ignores_flashinfer_env(monkeypatch):
+    """LoRA path should still choose Triton even if FlashInfer env is on."""
+    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP16", "1")
+    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput")
+
+    moe_config = make_dummy_moe_config()
+    moe_config.is_lora_enabled = True
+    selected_backend, experts_cls = select_unquantized_moe_backend(
+        moe_config=moe_config
+    )
+
+    assert selected_backend == UnquantizedMoeBackend.TRITON
+    assert experts_cls is not None
diff --git a/tests/kernels/moe/test_zero_expert_moe.py b/tests/kernels/moe/test_zero_expert_moe.py
new file mode 100644
index 000000000000..f10459aa5192
--- /dev/null
+++ b/tests/kernels/moe/test_zero_expert_moe.py
@@ -0,0 +1,283 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for FusedMoE with zero experts.
+
+Verifies that:
+- The ZeroExpertRouter is properly created and used as the layer router.
+- A forward pass through FusedMoE with zero experts produces correct output.
+- The output decomposes correctly into real expert + zero expert contributions.
+
+Note: tests generated with Claude.
+"""
+
+import pytest
+import torch
+
+from vllm.config import VllmConfig, set_current_vllm_config
+from vllm.forward_context import get_forward_context, set_forward_context
+from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+from vllm.model_executor.layers.fused_moe.router.zero_expert_router import (
+    ZeroExpertRouter,
+)
+from vllm.v1.worker.workspace import init_workspace_manager
+
+
+@pytest.fixture
+def zero_expert_moe(dist_init, default_vllm_config):
+    """Create a FusedMoE layer with zero experts."""
+    num_experts = 4
+    top_k = 2
+    # hidden_size must be >= 256 for the zero expert identity kernel to
+    # produce output (its BLOCK_SIZE=256 causes grid=0 when hidden_dim<256).
+    hidden_size = 256
+    intermediate_size = 512
+    zero_expert_num = 1
+
+    e_score_correction_bias = torch.zeros(
+        num_experts + zero_expert_num,
+        dtype=torch.float32,
+        device="cuda",
+    )
+
+    vllm_config = VllmConfig()
+    vllm_config.compilation_config.static_forward_context = dict()
+
+    with set_current_vllm_config(vllm_config), set_forward_context(None, vllm_config):
+        init_workspace_manager(torch.accelerator.current_device_index())
+
+        layer = FusedMoE(
+            zero_expert_type="identity",
+            e_score_correction_bias=e_score_correction_bias,
+            num_experts=num_experts,
+            top_k=top_k,
+            hidden_size=hidden_size,
+            intermediate_size=intermediate_size,
+            params_dtype=torch.bfloat16,
+            prefix="test_zero_expert_moe",
+            renormalize=False,
+            routed_scaling_factor=1.0,
+            scoring_func="softmax",
+        ).cuda()
+
+        layer.quant_method.process_weights_after_loading(layer)
+
+        yield layer, vllm_config
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32])
+def test_zero_expert_moe_router_is_zero_expert_router(zero_expert_moe, num_tokens):
+    """Verify that FusedMoE with zero_expert_type creates a ZeroExpertRouter."""
+    layer, _ = zero_expert_moe
+    assert isinstance(layer.router, ZeroExpertRouter), (
+        f"Expected ZeroExpertRouter but got {type(layer.router).__name__}."
+    )
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32])
+def test_zero_expert_moe_no_custom_routing_fn(zero_expert_moe, num_tokens):
+    """Verify that custom_routing_function is not set (routing is handled
+    by ZeroExpertRouter, not a memoizing closure)."""
+    layer, _ = zero_expert_moe
+    assert layer.custom_routing_function is None
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32])
+def test_zero_expert_moe_forward(zero_expert_moe, num_tokens):
+    """Run a forward pass through FusedMoE with zero experts and verify output shape."""
+    layer, vllm_config = zero_expert_moe
+
+    hidden_size = layer.hidden_size
+    num_experts = 4
+    zero_expert_num = 1
+    total_experts = num_experts + zero_expert_num
+
+    hidden_states = torch.randn(
+        num_tokens, hidden_size, dtype=torch.bfloat16, device="cuda"
+    )
+    router_logits = torch.randn(
+        num_tokens, total_experts, dtype=torch.float32, device="cuda"
+    )
+
+    # Initialize weights to small random values to avoid NaN from
+    # uninitialized memory.
+    with torch.no_grad():
+        for param in layer.parameters():
+            if param.dtype.is_floating_point:
+                param.normal_(0, 0.01)
+
+    with set_current_vllm_config(vllm_config), set_forward_context(None, vllm_config):
+        get_forward_context().all_moe_layers = None
+        output = layer.forward(hidden_states, router_logits)
+
+    assert output.shape == hidden_states.shape, (
+        f"Expected output shape {hidden_states.shape}, got {output.shape}"
+    )
+    assert output.dtype == hidden_states.dtype
+    assert not torch.isnan(output).any(), "Output contains NaN values"
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32])
+def test_zero_expert_moe_output_decomposition(zero_expert_moe, num_tokens):
+    """Validate that the FusedMoE output equals a plain FusedMoE
+    output (real experts only) plus the zero expert contribution.
+
+    The key invariant is:
+        zero_layer.forward(h, r_full) == plain_layer.forward(h, r_real)
+                                         + zero_expert_output
+
+    We create a plain FusedMoE layer with the same weights and real-expert-only
+    router logits, compute the zero expert output via the ZeroExpertRouter, and
+    verify the sum matches the FusedMoE output.
+    """
+    layer, vllm_config = zero_expert_moe
+    num_experts = 4
+    zero_expert_num = 1
+    total_experts = num_experts + zero_expert_num
+
+    hidden_states = torch.randn(
+        num_tokens, layer.hidden_size, dtype=torch.bfloat16, device="cuda"
+    )
+    router_logits = torch.randn(
+        num_tokens, total_experts, dtype=torch.float32, device="cuda"
+    )
+
+    with torch.no_grad():
+        for param in layer.parameters():
+            if param.dtype.is_floating_point:
+                param.normal_(0, 0.01)
+
+    with set_current_vllm_config(vllm_config), set_forward_context(None, vllm_config):
+        get_forward_context().all_moe_layers = None
+
+        # Create a plain FusedMoE layer with the same config but no zero
+        # experts. Use a separate prefix to avoid collision.
+        plain_layer = FusedMoE(
+            num_experts=num_experts,
+            top_k=layer.top_k,
+            hidden_size=layer.hidden_size,
+            intermediate_size=layer.intermediate_size_per_partition,
+            params_dtype=torch.bfloat16,
+            prefix="test_zero_expert_moe_plain",
+            renormalize=False,
+            scoring_func="softmax",
+            e_score_correction_bias=layer.e_score_correction_bias,
+        ).cuda()
+
+        # Share weights from the zero expert layer.
+        plain_layer.w13_weight.data.copy_(layer.w13_weight.data)
+        plain_layer.w2_weight.data.copy_(layer.w2_weight.data)
+        plain_layer.quant_method.process_weights_after_loading(plain_layer)
+
+        # Compute routing via the ZeroExpertRouter. This produces masked
+        # topk_weights/topk_ids (zero expert entries have weight=0, id=0)
+        # and stores zero_expert_output as a side effect.
+        topk_weights, topk_ids = layer.router.select_experts(
+            hidden_states, router_logits
+        )
+        zero_output = layer.router.zero_expert_output
+
+        # Compute real expert output using the plain layer with the masked
+        # routing from the ZeroExpertRouter.
+        real_output = plain_layer.quant_method.apply(
+            layer=plain_layer,
+            x=hidden_states,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            shared_experts=None,
+            shared_experts_input=None,
+        )
+
+        # Get the combined output from the zero expert layer.
+        full_output = layer.forward(hidden_states, router_logits)
+
+    assert zero_output is not None, "Zero expert output should not be None"
+    assert not torch.isnan(real_output).any(), "Real expert output has NaN"
+    assert not torch.isnan(zero_output).any(), "Zero expert output has NaN"
+    assert not torch.isnan(full_output).any(), "Full output has NaN"
+
+    expected = real_output + zero_output
+    torch.testing.assert_close(
+        full_output,
+        expected,
+        atol=0,
+        rtol=0,
+        msg="FusedMoE output should equal plain FusedMoE output "
+        "plus zero expert contribution",
+    )
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32])
+def test_zero_expert_moe_zero_expert_is_identity(zero_expert_moe, num_tokens):
+    """Validate zero expert identity behavior.
+
+    When routing strongly favors the zero expert, its contribution should
+    be a scaled version of hidden_states (identity operation). We verify
+    this by manually computing the expected zero expert output from the
+    routing weights and comparing against what the router produces.
+    """
+    layer, vllm_config = zero_expert_moe
+    num_experts = 4
+    zero_expert_num = 1
+    total_experts = num_experts + zero_expert_num
+
+    hidden_states = torch.randn(
+        num_tokens, layer.hidden_size, dtype=torch.bfloat16, device="cuda"
+    )
+    # Strongly bias toward the zero expert (index 4).
+    router_logits = torch.full(
+        (num_tokens, total_experts), -10.0, dtype=torch.float32, device="cuda"
+    )
+    router_logits[:, num_experts] = 10.0  # zero expert gets high logit
+
+    with torch.no_grad():
+        for param in layer.parameters():
+            if param.dtype.is_floating_point:
+                param.normal_(0, 0.01)
+
+    with set_current_vllm_config(vllm_config), set_forward_context(None, vllm_config):
+        get_forward_context().all_moe_layers = None
+
+        # Run routing to get topk_weights/topk_ids before masking.
+        from vllm.model_executor.layers.fused_moe.router.fused_topk_bias_router import (
+            fused_topk_bias,
+        )
+
+        topk_weights, topk_ids = fused_topk_bias(
+            hidden_states=hidden_states,
+            gating_output=router_logits,
+            e_score_correction_bias=layer.router.e_score_correction_bias.data,
+            topk=layer.top_k,
+            renormalize=layer.router.renormalize,
+            scoring_func=layer.router.scoring_func,
+        )
+
+        # Manually compute expected zero expert identity output:
+        # For each token, sum routing weights assigned to zero expert slots,
+        # then multiply by hidden_states.
+        zero_mask = topk_ids >= num_experts
+        zero_weight_per_token = (topk_weights * zero_mask.float()).sum(
+            dim=-1, keepdim=True
+        )
+        expected_zero_output = (hidden_states.float() * zero_weight_per_token).to(
+            hidden_states.dtype
+        )
+
+        # Run routing directly to trigger zero expert computation
+        # without going through the runner (which consumes the output).
+        layer.router.select_experts(hidden_states, router_logits)
+        actual_zero_output = layer.router.zero_expert_output
+
+    assert actual_zero_output is not None
+    assert zero_mask.any(), (
+        "With high zero expert logit, at least some slots should route "
+        "to the zero expert"
+    )
+
+    torch.testing.assert_close(
+        actual_zero_output,
+        expected_zero_output,
+        atol=1e-3,
+        rtol=1e-3,
+        msg="Zero expert identity output should equal "
+        "hidden_states * sum(zero_expert_weights)",
+    )
diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py
index 4b693d8c8a55..acb2c21b3896 100644
--- a/tests/kernels/moe/utils.py
+++ b/tests/kernels/moe/utils.py
@@ -17,16 +17,20 @@
     FusedMoEQuantConfig,
     RoutingMethodType,
 )
-from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
-    BatchedPrepareAndFinalize,
+from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
     BatchedTritonExperts,
     NaiveBatchedExperts,
 )
-from vllm.model_executor.layers.fused_moe.fused_moe import (
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import (
     TritonExperts,
+)
+from vllm.model_executor.layers.fused_moe.fused_moe import (
     fused_experts,
 )
 from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEKernel
+from vllm.model_executor.layers.fused_moe.prepare_finalize.batched import (
+    BatchedPrepareAndFinalize,
+)
 from vllm.model_executor.layers.fused_moe.router.fused_topk_router import fused_topk
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
 from vllm.utils.deep_gemm import per_block_cast_to_fp8
@@ -69,6 +73,7 @@ def make_dummy_moe_config(
         in_dtype=in_dtype,
         device="cuda",
         routing_method=RoutingMethodType.TopK,
+        max_num_tokens=512,
     )
 
 
@@ -248,7 +253,7 @@ def make_quantized_test_activations(
     return a, a_q, a_scale
 
 
-def moe_quantize_weights(
+def moe_quantize_weights_2d(
     w: torch.Tensor,
     w_s: torch.Tensor | None,
     quant_dtype: torch.dtype | str | None,
@@ -293,6 +298,40 @@ def moe_quantize_weights(
     return w, w_s, w_gs
 
 
+def moe_quantize_weights(
+    w: torch.Tensor,
+    w_s: torch.Tensor | None,
+    quant_dtype: torch.dtype | str | None,
+    per_token_quant: bool,
+    block_shape: list[int] | None,
+) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]:
+    assert w.dim() == 3
+    e, rows, cols = w.shape
+    w_l = [None] * e
+    w_s_l = [None] * e
+    w_gs_l = [None] * e
+    for idx in range(e):
+        w_l[idx], w_s_l[idx], w_gs_l[idx] = moe_quantize_weights_2d(
+            w[idx], None, quant_dtype, per_token_quant, block_shape
+        )
+
+    w = torch.stack(w_l)
+    w_s = torch.stack(w_s_l)
+    w_gs = torch.stack(w_gs_l) if e > 0 and w_gs_l[0] is not None else None
+
+    if w_s.ndim == 2:
+        assert w_s.shape[-1] == 1
+        w_s = w_s.view(-1, 1, 1)
+
+    if block_shape is not None:
+        block_n, block_k = block_shape
+        n_tiles = (rows + block_n - 1) // block_n
+        k_tiles = (cols + block_k - 1) // block_k
+        assert w_s.shape == (e, n_tiles, k_tiles)
+
+    return w, w_s, w_gs
+
+
 def make_test_weight(
     e: int,
     rows: int,
@@ -303,30 +342,11 @@ def make_test_weight(
     per_out_ch_quant: bool = False,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor | None, torch.Tensor | None]:
     w_16 = torch.randn((e, rows, cols), device="cuda", dtype=in_dtype) / 15
-    w_gs = None
 
     if quant_dtype is not None:
-        w_l = [None] * e
-        w_s_l = [None] * e
-        w_gs_l = [None] * e
-        for idx in range(e):
-            w_l[idx], w_s_l[idx], w_gs_l[idx] = moe_quantize_weights(
-                w_16[idx], None, quant_dtype, per_out_ch_quant, block_shape
-            )
-
-        w = torch.stack(w_l)
-        w_s = torch.stack(w_s_l)
-        if e > 0 and w_gs_l[0] is not None:
-            w_gs = torch.stack(w_gs_l)
-        if w_s.ndim == 2:
-            assert w_s.shape[-1] == 1
-            w_s = w_s.view(-1, 1, 1)
-
-        if block_shape is not None:
-            block_n, block_k = block_shape
-            n_tiles = (rows + block_n - 1) // block_n
-            k_tiles = (cols + block_k - 1) // block_k
-            assert w_s.shape == (e, n_tiles, k_tiles)
+        w, w_s, w_gs = moe_quantize_weights(
+            w_16, None, quant_dtype, per_out_ch_quant, block_shape
+        )
     else:
         w = w_16
         w_s = None
@@ -384,6 +404,7 @@ def make_test_quant_config(
     per_act_token_quant: bool = False,
     block_shape: list[int] | None = None,
     make_gate: bool = True,
+    is_scale_swizzled: bool = True,
 ) -> tuple[torch.Tensor, torch.Tensor, FusedMoEQuantConfig]:
     (_, w1, w1_s, w1_gs), (_, w2, w2_s, w2_gs) = make_test_weights(
         e,
@@ -424,6 +445,7 @@ def make_test_quant_config(
             # TODO: make sure this is handled properly
             g1_alphas=(1 / w1_gs) if w1_gs is not None else None,
             g2_alphas=(1 / w2_gs) if w2_gs is not None else None,
+            is_scale_swizzled=is_scale_swizzled,
         ),
     )
 
@@ -454,7 +476,6 @@ def fused_moe(
     )
 
 
-# CustomOp?
 class BaselineMM(torch.nn.Module):
     def __init__(
         self,
@@ -462,13 +483,22 @@ def __init__(
         out_dtype: torch.dtype,
     ):
         super().__init__()
-        self.b = b.to(dtype=torch.float32)
+        self.b = torch.nn.Parameter(b.to(dtype=torch.float32))
         self.out_dtype = out_dtype
 
     def forward(self, a: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor | None]:
         return torch.mm(a.to(dtype=torch.float32), self.b).to(self.out_dtype), None
 
 
+class BaselineSiluAndMul(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        return torch.nn.functional.silu(x[..., :d]) * x[..., d:]
+
+
 class TestMLP(torch.nn.Module):
     def __init__(
         self,
@@ -479,7 +509,7 @@ def __init__(
         super().__init__()
         self.gate_up_proj = BaselineMM(w1, out_dtype)
         self.down_proj = BaselineMM(w2, out_dtype)
-        self.act_fn = SiluAndMul()
+        self.act_fn = BaselineSiluAndMul()
 
     def forward(self, x):
         x, _ = self.gate_up_proj(x)
@@ -564,35 +594,24 @@ def forward(self, x):
         return x
 
 
-def make_shared_experts(
+def make_shared_experts_with_weights(
     N: int,
     K: int,
-    in_dtype: torch.dtype = torch.bfloat16,
+    in_dtype: torch.dtype,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    w1_s: torch.Tensor | None = None,
+    w2_s: torch.Tensor | None = None,
     quant_dtype: torch.dtype | str | None = None,
 ) -> torch.nn.Module:
-    from vllm.model_executor.layers.quantization.fp8 import Fp8Config
-
-    (_, w1, w1_s, _), (_, w2, w2_s, _) = make_test_weights(
-        1,
-        N,
-        K,
-        in_dtype=in_dtype,
-        quant_dtype=quant_dtype,
-    )
     old_dtype = torch.get_default_dtype()
     try:
         torch.set_default_dtype(in_dtype)
         if quant_dtype == torch.float8_e4m3fn:
-            w1 = w1[0].transpose(0, 1)
-            w2 = w2[0].transpose(0, 1)
-            w1_s = w1_s[0].transpose(0, 1) if w1_s is not None else None
-            w2_s = w2_s[0].transpose(0, 1) if w2_s is not None else None
+            from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+
             quant_config = Fp8Config(True)
         else:
-            w1 = w1[0]
-            w2 = w2[0]
-            w1_s = None
-            w2_s = None
             quant_config = None
 
         return RealMLP(K, N, w1, w2, "silu", quant_config, w1_s=w1_s, w2_s=w2_s)
@@ -603,7 +622,6 @@ def make_shared_experts(
 def modular_triton_fused_moe(
     moe_config: FusedMoEConfig,
     quant_config: FusedMoEQuantConfig,
-    shared_experts: torch.nn.Module | None = None,
 ) -> FusedMoEKernel:
     return FusedMoEKernel(
         maybe_make_prepare_finalize(
@@ -613,6 +631,24 @@ def modular_triton_fused_moe(
             use_monolithic=False,
         ),
         TritonExperts(moe_config, quant_config),
-        shared_experts,
         inplace=False,
     )
+
+
+def make_shared_experts(
+    N: int,
+    K: int,
+    in_dtype: torch.dtype = torch.bfloat16,
+    quant_dtype: torch.dtype | str | None = None,
+) -> torch.nn.Module:
+    (_, w1, w1_s, _), (_, w2, w2_s, _) = make_test_weights(
+        1,
+        N,
+        K,
+        in_dtype=in_dtype,
+        quant_dtype=quant_dtype,
+    )
+
+    return make_shared_experts_with_weights(
+        N, K, in_dtype, w1, w2, w1_s=w1_s, w2_s=w2_s, quant_dtype=quant_dtype
+    )
diff --git a/tests/kernels/quantization/nvfp4_utils.py b/tests/kernels/quantization/nvfp4_utils.py
index 778895271432..df6513c131d4 100644
--- a/tests/kernels/quantization/nvfp4_utils.py
+++ b/tests/kernels/quantization/nvfp4_utils.py
@@ -88,6 +88,60 @@ def break_fp4_bytes(a, dtype):
     return values.reshape(m, n * 2).to(dtype=dtype)
 
 
+def dequant_nvfp4_kv_cache(
+    fp4_data: torch.Tensor,
+    block_scale: torch.Tensor,
+    global_scale: float,
+    head_size: int,
+    block_size: int,
+) -> torch.Tensor:
+    """Dequantize an NVFP4 KV cache with 4x4-swizzled block scales.
+
+    The input must be in HND layout so that the last two dims are
+    (block_size, last_dim).  For NHD caches, permute to HND first.
+
+    Args:
+        fp4_data: [..., num_heads, block_size, head_size//2] uint8 packed fp4.
+        block_scale: [..., num_heads, block_size, head_size//16] fp8 block
+            scales (as uint8 or float8_e4m3fn).
+        global_scale: checkpoint dequant scale (k_scale or v_scale).
+        head_size: head dimension.
+        block_size: page size.
+
+    Returns:
+        [..., num_heads, block_size, head_size] float32.
+    """
+    data_dim = head_size // 2
+    scale_dim = head_size // 16
+
+    fp4_packed = fp4_data
+    sf_swizzled = block_scale.view(torch.uint8)
+
+    # Unswizzle 4x4 block scales on (block_size, scale_dim) plane.
+    # [..., T, S] → [..., T//4, 4, sg, 4] → permute → [..., T, S]
+    batch_shape = sf_swizzled.shape[:-2]
+    T, S = block_size, scale_dim
+    sg = S // 4
+    sf_reshape = sf_swizzled.reshape(*batch_shape, T // 4, 4, sg, 4)
+    ndim = sf_reshape.ndim
+    # Swap the last four dims: (..., T//4, 4, sg, 4) → (..., T//4, 4, 4, sg)
+    perm = list(range(ndim - 4)) + [ndim - 4, ndim - 1, ndim - 3, ndim - 2]
+    sf_linear = sf_reshape.permute(*perm).reshape(*batch_shape, T, S)
+    sf_f32 = sf_linear.view(torch.float8_e4m3fn).to(torch.float32)
+
+    # Unpack fp4
+    shape = fp4_packed.shape  # [..., T, data_dim]
+    fp4_flat = fp4_packed.reshape(-1, data_dim)
+    fp4_vals = break_fp4_bytes(fp4_flat, torch.float32)
+    fp4_vals = fp4_vals.reshape(*shape[:-1], head_size)
+
+    # Dequant: fp4_val * block_scale * global_scale per 16-element group
+    return (
+        fp4_vals.reshape(*shape[:-1], scale_dim, 16)
+        * (sf_f32 * global_scale).unsqueeze(-1)
+    ).reshape(*shape[:-1], head_size)
+
+
 def get_nvfp4_global_scale(a: torch.Tensor):
     return (FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX) / torch.abs(a).max().to(torch.float32)
 
diff --git a/tests/kernels/quantization/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py
index 936516576ce1..4cb638e47af0 100644
--- a/tests/kernels/quantization/test_block_fp8.py
+++ b/tests/kernels/quantization/test_block_fp8.py
@@ -12,8 +12,8 @@
     native_w8a8_block_matmul,
 )
 from vllm.config import VllmConfig
+from vllm.model_executor.kernels.linear.scaled_mm.cutlass import cutlass_scaled_mm
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    cutlass_scaled_mm,
     per_token_group_quant_fp8,
     w8a8_triton_block_scaled_mm,
 )
@@ -202,7 +202,7 @@ def test_w8a8_block_fp8_deep_gemm_matmul(M, N, K, block_size, out_dtype, seed):
 
     # only aligned sizes are supported by deepgemm
     if not should_use_deepgemm_for_fp8_linear(
-        output_dtype=out_dtype, weight=B_fp32, supports_deep_gemm=True
+        output_dtype=out_dtype, weight_shape=B_fp32.shape, supports_deep_gemm=True
     ):
         pytest.skip(f"Skipping test; invalid size {M}, {N}, {K}")
 
diff --git a/tests/kernels/quantization/test_cpu_fp8_scaled_mm.py b/tests/kernels/quantization/test_cpu_fp8_scaled_mm.py
new file mode 100644
index 000000000000..3154e2cb98bb
--- /dev/null
+++ b/tests/kernels/quantization/test_cpu_fp8_scaled_mm.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for CPU FP8 W8A16 block-scaled GEMM kernel (fp8_scaled_mm_cpu).
+
+Run `pytest tests/kernels/quantization/test_cpu_fp8_scaled_mm.py -v`.
+"""
+
+import pytest
+import torch
+
+from vllm import _custom_ops as ops
+from vllm.platforms import current_platform
+
+if not current_platform.is_cpu():
+    pytest.skip("skipping CPU-only tests", allow_module_level=True)
+
+if not ops._supports_cpu_fp8_w8a16:
+    pytest.skip("fp8_scaled_mm_cpu op not available", allow_module_level=True)
+
+BLOCK_SIZE = [128, 128]
+
+
+def cdiv(a: int, b: int) -> int:
+    return -(a // -b)
+
+
+def quantize_weight_block_fp8(
+    weight: torch.Tensor,
+    block_size: list[int],
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Quantize weight [N, K] to FP8 with block scales.
+
+    Returns:
+        fp8_weight: [N, K] float8_e4m3fn
+        scales: [n_tiles, k_tiles] float32
+    """
+    N, K = weight.shape
+    block_n, block_k = block_size
+    fp8_max = torch.finfo(torch.float8_e4m3fn).max
+
+    n_tiles = cdiv(N, block_n)
+    k_tiles = cdiv(K, block_k)
+
+    # Pad for even blocking
+    pad_N = (block_n - (N % block_n)) % block_n
+    pad_K = (block_k - (K % block_k)) % block_k
+    if pad_N > 0 or pad_K > 0:
+        weight = torch.nn.functional.pad(weight, (0, pad_K, 0, pad_N))
+
+    # Reshape into blocks
+    w_blocks = weight.view(n_tiles, block_n, k_tiles, block_k)
+    w_blocks = w_blocks.permute(0, 2, 1, 3).contiguous()
+
+    # Per-block scale
+    abs_max = w_blocks.abs().amax(dim=(-2, -1), keepdim=True)
+    scales = abs_max / fp8_max
+    scales = torch.where(scales == 0, torch.ones_like(scales), scales)
+
+    # Quantize
+    q_fp8 = (w_blocks / scales).clamp(-fp8_max, fp8_max).to(torch.float8_e4m3fn)
+
+    # Reshape back
+    fp8_weight = (
+        q_fp8.permute(0, 2, 1, 3)
+        .contiguous()
+        .view(N + pad_N, K + pad_K)[:N, :K]
+        .contiguous()
+    )
+
+    scales = scales.view(n_tiles, k_tiles)
+    return fp8_weight, scales
+
+
+def dequant_weight_block_fp8(
+    fp8_weight: torch.Tensor,
+    scales: torch.Tensor,
+    block_size: list[int],
+    out_dtype: torch.dtype,
+) -> torch.Tensor:
+    """Dequantize FP8 weight back to float for reference computation."""
+    N, K = fp8_weight.shape
+    block_n, block_k = block_size
+    n_tiles, k_tiles = scales.shape
+
+    pad_N = (block_n - (N % block_n)) % block_n
+    pad_K = (block_k - (K % block_k)) % block_k
+    if pad_N > 0 or pad_K > 0:
+        fp8_padded = torch.nn.functional.pad(fp8_weight.float(), (0, pad_K, 0, pad_N))
+    else:
+        fp8_padded = fp8_weight.float()
+
+    w_blocks = fp8_padded.view(n_tiles, block_n, k_tiles, block_k)
+    w_blocks = w_blocks.permute(0, 2, 1, 3).contiguous()
+    dq = w_blocks * scales.view(n_tiles, k_tiles, 1, 1)
+    dq = dq.permute(0, 2, 1, 3).contiguous().view(N + pad_N, K + pad_K)
+    return dq[:N, :K].to(out_dtype)
+
+
+def ref_fp8_block_scaled_mm(
+    x: torch.Tensor,
+    fp8_weight: torch.Tensor,
+    scales: torch.Tensor,
+    block_size: list[int],
+    bias: torch.Tensor | None,
+    out_dtype: torch.dtype,
+) -> torch.Tensor:
+    """Reference: dequant FP8→float32, matmul in float32, cast to out_dtype."""
+    w_dq = dequant_weight_block_fp8(fp8_weight, scales, block_size, torch.float32)
+    out = torch.mm(x.float(), w_dq.t())
+    if bias is not None:
+        out = out + bias.float()
+    return out.to(out_dtype)
+
+
+# ---------------------------------------------------------------------------
+# Test parameters
+# ---------------------------------------------------------------------------
+M_SIZES = [1, 4, 16, 64, 128]
+# (N, K) — weight shape is [N, K], output has N columns.
+NK_SIZES = [
+    (128, 256),
+    (256, 512),
+    (512, 1024),
+    (1024, 2048),
+    (5120, 5120),
+    (17408, 5120),
+    (5120, 17408),
+]
+
+
+@pytest.mark.parametrize("M", M_SIZES)
+@pytest.mark.parametrize("N,K", NK_SIZES)
+@pytest.mark.parametrize("use_bias", [False, True])
+def test_cpu_fp8_scaled_mm(M: int, N: int, K: int, use_bias: bool):
+    """fp8_scaled_mm_cpu correctness against float reference."""
+    torch.manual_seed(42)
+    out_dtype = torch.bfloat16
+    block_size = BLOCK_SIZE
+
+    x = torch.randn(M, K, dtype=out_dtype) / (K**0.5)
+    w_f32 = torch.randn(N, K, dtype=torch.float32) / (K**0.5)
+    fp8_weight, scales = quantize_weight_block_fp8(w_f32, block_size)
+
+    bias = torch.randn(N, dtype=torch.float32) * 0.1 if use_bias else None
+
+    ref_out = ref_fp8_block_scaled_mm(
+        x, fp8_weight, scales, block_size, bias, out_dtype
+    )
+
+    packed_weight = torch.ops._C.convert_weight_packed(fp8_weight)
+    kernel_out = ops.fp8_scaled_mm_cpu(
+        x,
+        packed_weight,
+        scales,
+        block_size,
+        bias,
+        out_dtype,
+        True,
+    )
+
+    assert kernel_out.dtype == out_dtype
+    torch.testing.assert_close(kernel_out, ref_out, rtol=0.02, atol=0.01)
diff --git a/tests/kernels/quantization/test_cutlass_scaled_mm.py b/tests/kernels/quantization/test_cutlass_scaled_mm.py
index a8adec49a955..a937c30fed74 100644
--- a/tests/kernels/quantization/test_cutlass_scaled_mm.py
+++ b/tests/kernels/quantization/test_cutlass_scaled_mm.py
@@ -40,6 +40,18 @@
     (512, 24576, 128),
 ]
 
+# Shapes with N or K not divisible by 16.  These exercise the padding path
+# inside CutlassFP8ScaledMMLinearKernel.apply_scaled_mm (e.g. Qwen2.5-VL
+# vision MLP dims).
+UNALIGNED_MNK_FACTORS = [
+    (32, 3420, 1280),
+    (32, 1280, 6840),
+    (1, 3420, 1280),
+    (64, 6840, 1280),
+    (16, 100, 200),
+    (33, 255, 513),
+]
+
 CUDA_DEVICES = [
     f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)
 ]
@@ -152,6 +164,71 @@ def test_cutlass_fp8_gemm(
     cutlass_fp8_gemm_helper(m, n, k, a_scale_group_shape, b_scale_group_shape, use_bias)
 
 
+@pytest.mark.parametrize("m,n,k", UNALIGNED_MNK_FACTORS)
+@pytest.mark.parametrize(
+    "a_scale_group_shape", [PER_TOKEN_GROUP_SHAPE, TENSORWISE_GROUP_SHAPE]
+)
+@pytest.mark.parametrize(
+    "b_scale_group_shape", [PER_OUT_CH_GROUP_SHAPE, TENSORWISE_GROUP_SHAPE]
+)
+@pytest.mark.parametrize("use_bias", [True, False])
+@pytest.mark.skipif(
+    not current_platform.has_device_capability(89),
+    reason="FP8 is not supported on this GPU type.",
+)
+def test_cutlass_fp8_gemm_padded(
+    m: int, n: int, k: int, a_scale_group_shape, b_scale_group_shape, use_bias: bool
+):
+    """Test CUTLASS FP8 GEMM with padding for non-16-aligned N/K dims.
+
+    Exercises CutlassFP8ScaledMMLinearKernel.apply_scaled_mm which pads
+    inputs to satisfy CUTLASS alignment requirements — the path taken by
+    models like Qwen2.5-VL whose vision MLP has non-16-aligned dims.
+    """
+    from vllm.model_executor.kernels.linear.scaled_mm.cutlass import (
+        CutlassFP8ScaledMMLinearKernel,
+    )
+
+    a = to_fp8(torch.randn((m, k), device="cuda"))
+    b = to_fp8(torch.randn((n, k), device="cuda").t())
+
+    a_scales_shape = scale_shape(a.shape, a_scale_group_shape)
+    b_scales_shape = scale_shape(b.shape, b_scale_group_shape)
+
+    scale_a = torch.randn(a_scales_shape, device="cuda", dtype=torch.float32)
+    scale_b = torch.randn(b_scales_shape, device="cuda", dtype=torch.float32)
+
+    scale_a = scale_a.t().contiguous().t()
+    scale_b = scale_b.t().contiguous().t()
+
+    out_dtype = torch.bfloat16
+    bias = torch.rand((n,), device="cuda", dtype=out_dtype) * 10 if use_bias else None
+
+    baseline = baseline_scaled_mm(a, b, scale_a, scale_b, out_dtype, bias)
+
+    # process_weights_after_loading pad b to 16
+    pad_k = (16 - k % 16) % 16
+    pad_n = (16 - n % 16) % 16
+    if pad_k > 0 or pad_n > 0:
+        b = torch.nn.functional.pad(b.t().contiguous(), (0, pad_k, 0, pad_n)).t()
+        if pad_n > 0 and scale_b.numel() > 1:
+            scale_b = torch.nn.functional.pad(scale_b, (0, pad_n), value=1.0)
+
+    kernel = object.__new__(CutlassFP8ScaledMMLinearKernel)
+    kernel.logical_output_size = n
+    out = kernel.apply_scaled_mm(
+        A=a,
+        B=b,
+        out_dtype=out_dtype,
+        As=scale_a,
+        Bs=scale_b,
+        bias=bias,
+        output_shape=[m, n],
+    )
+
+    torch.testing.assert_close(out, baseline, rtol=5e-1, atol=1.5e-1)
+
+
 @pytest.mark.parametrize("m,n,k", MNK_FACTORS)
 @pytest.mark.parametrize(
     "a_scale_group_shape,b_scale_group_shape", [((1, 128), (128, 128))]
diff --git a/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py b/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
index e414ba7d2cc3..698c679a201c 100644
--- a/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
+++ b/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
@@ -13,6 +13,7 @@
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import (
     flashinfer_scaled_fp4_mm,
+    has_flashinfer_b12x_gemm,
 )
 from vllm.utils.torch_utils import set_random_seed
 
@@ -74,7 +75,7 @@ def get_ref_results(
 @pytest.mark.parametrize("shape", SHAPES)
 @pytest.mark.parametrize("seed", SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
-@pytest.mark.parametrize("backend", ["cutlass", "cudnn", "trtllm"])
+@pytest.mark.parametrize("backend", ["cutlass", "cudnn", "trtllm", "b12x"])
 @pytest.mark.parametrize("autotune", [False, True])
 @torch.inference_mode()
 def test_flashinfer_nvfp4_gemm(
@@ -87,6 +88,10 @@ def test_flashinfer_nvfp4_gemm(
 ) -> None:
     if "trtllm" in backend and dtype == torch.float16:
         pytest.skip("Only torch.bfloat16 is supported for TRTLLM FP4 GEMM operations")
+    if backend == "b12x" and not current_platform.has_device_capability(120):
+        pytest.skip("b12x FP4 GEMM requires SM120+ (CC 12.0+)")
+    if backend == "b12x" and not has_flashinfer_b12x_gemm():
+        pytest.skip("b12x FP4 GEMM backend not available in installed FlashInfer")
 
     set_random_seed(seed)
     m, n, packed_k = shape
@@ -105,8 +110,7 @@ def test_flashinfer_nvfp4_gemm(
 
     # ops.scaled_fp4_quant returns swizzled scales, while weights
     # from checkpoints are in linear scales.
-    # So instead of needing to swizzle for cutlass as in modelopt.py,
-    # we need to unswizzle for trtllm here.
+    # cutlass and b12x use swizzled scales directly; trtllm needs them unswizzled.
     a_fp4, a_scale_interleaved = ops.scaled_fp4_quant(
         a_dtype, a_global_scale, is_sf_swizzled_layout=True, backend=backend
     )
diff --git a/tests/kernels/quantization/test_marlin_gemm.py b/tests/kernels/quantization/test_marlin_gemm.py
index f918212f763c..8b35fab81ef8 100644
--- a/tests/kernels/quantization/test_marlin_gemm.py
+++ b/tests/kernels/quantization/test_marlin_gemm.py
@@ -381,7 +381,8 @@ def is_invalid(
         for sub_case in inner_combinations:
             if (
                 sub_case[0] == scalar_types.float8_e4m3fn
-                and current_platform.get_device_capability() not in [89, 120]
+                and not current_platform.is_device_capability(89)
+                and not current_platform.is_device_capability_family(120)
             ):
                 continue
             args = sub_case + (size_m, size_n, size_k) + case[4:]
diff --git a/tests/kernels/quantization/test_mxfp4_triton_ep.py b/tests/kernels/quantization/test_mxfp4_triton_ep.py
index 6c8aebe42c07..93d30f459816 100644
--- a/tests/kernels/quantization/test_mxfp4_triton_ep.py
+++ b/tests/kernels/quantization/test_mxfp4_triton_ep.py
@@ -4,17 +4,14 @@
 Tests that triton_kernel_moe_forward correctly applies expert_map
 remapping when expert parallelism (EP) is enabled.
 
-Previously, legacy_routing was always used and it produced routing data
-with global expert IDs that didn't correspond to local weight indices,
-causing illegal memory access with EP.  The fix splits routing: when
-expert_map is provided, topk selection is performed first, expert_map is
-applied to remap global→local IDs, and make_routing_data builds routing
-structures from the local IDs.
+When expert_map is provided, global expert IDs are remapped to local IDs
+via topk + expert_map remap + make_routing_data before building routing
+structures, and the expert_map passed downstream to triton_kernel_fused_experts
+is None (already applied).
 """
 
 from unittest.mock import MagicMock, patch
 
-import pytest
 import torch
 
 
@@ -22,58 +19,43 @@ class TestTritonMoeForwardExpertMap:
     """Test that triton_kernel_moe_forward applies expert_map remapping
     when expert_map is provided (EP active)."""
 
-    @pytest.mark.parametrize("expert_map_present", [False, True])
-    def test_routing_path_selection(self, expert_map_present):
-        """Verify that the EP-aware routing path is taken when expert_map
-        is present, and the legacy_routing path is taken otherwise."""
-
+    def test_expert_map_remap(self):
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        # This is a structural test: we mock the routing functions to
-        # verify the correct path is exercised.
-        mock_expert_map = (
-            torch.tensor([0, -1, 1, -1], device=device) if expert_map_present else None
-        )
+        mock_expert_map = torch.tensor([0, -1, 1, -1], device=device)
+
+        from vllm.utils.import_utils import import_triton_kernels
+
+        import_triton_kernels()
+
+        mock_routing_data = MagicMock()
+        mock_gather = MagicMock()
+        mock_scatter = MagicMock()
 
         with (
-            patch(
-                "vllm.model_executor.layers.fused_moe."
-                "gpt_oss_triton_kernels_moe.legacy_routing"
-            ) as mock_legacy,
             patch("triton_kernels.topk.topk") as mock_topk,
             patch(
-                "vllm.model_executor.layers.fused_moe."
+                "vllm.model_executor.layers.fused_moe.experts."
                 "gpt_oss_triton_kernels_moe.make_routing_data"
             ) as mock_make_routing,
             patch(
-                "vllm.model_executor.layers.fused_moe."
+                "vllm.model_executor.layers.fused_moe.experts."
                 "gpt_oss_triton_kernels_moe.triton_kernel_fused_experts"
             ) as mock_fused_experts,
         ):
-            from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (  # noqa: E501
+            from vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe import (  # noqa: E501
                 triton_kernel_moe_forward,
             )
 
-            # Set up return values
-            mock_routing_data = MagicMock()
-            mock_gather = MagicMock()
-            mock_scatter = MagicMock()
-
-            if expert_map_present:
-                sparse_result = MagicMock()
-                sparse_result.indx = torch.tensor([[0, 2]], dtype=torch.int32)
-                sparse_result.vals = torch.tensor([[0.6, 0.4]])
-                mock_topk.return_value = sparse_result
-                mock_make_routing.return_value = (
-                    mock_routing_data,
-                    mock_gather,
-                    mock_scatter,
-                )
-            else:
-                mock_legacy.return_value = (
-                    mock_routing_data,
-                    mock_gather,
-                    mock_scatter,
-                )
+            sparse_result = MagicMock()
+            sparse_result.indx = torch.tensor([[0, 2]], dtype=torch.int32)
+            sparse_result.vals = torch.tensor([[0.6, 0.4]])
+            mock_topk.return_value = sparse_result
+
+            mock_make_routing.return_value = (
+                mock_routing_data,
+                mock_gather,
+                mock_scatter,
+            )
 
             mock_fused_experts.return_value = torch.zeros((1, 8), device=device)
 
@@ -92,20 +74,10 @@ def test_routing_path_selection(self, expert_map_present):
                 expert_map=mock_expert_map,
             )
 
-            if expert_map_present:
-                # EP path: should use topk + make_routing_data, NOT
-                # legacy_routing
-                mock_topk.assert_called_once()
-                mock_make_routing.assert_called_once()
-                mock_legacy.assert_not_called()
-                # expert_map should be None in the fused_experts call
-                # (already applied)
-                call_kwargs = mock_fused_experts.call_args
-                assert call_kwargs[1].get("expert_map") is None or (
-                    len(call_kwargs[0]) > 0
-                )
-            else:
-                # Non-EP path: should use legacy_routing
-                mock_legacy.assert_called_once()
-                mock_topk.assert_not_called()
-                mock_make_routing.assert_not_called()
+            mock_topk.assert_called_once()
+            mock_make_routing.assert_called_once()
+
+            # expert_map should be None in the fused_experts call
+            # (already applied).
+            call_kwargs = mock_fused_experts.call_args
+            assert call_kwargs[1].get("expert_map") is None or (len(call_kwargs[0]) > 0)
diff --git a/tests/kernels/quantization/test_nvfp4_emulation.py b/tests/kernels/quantization/test_nvfp4_emulation.py
new file mode 100644
index 000000000000..71072d9e9fff
--- /dev/null
+++ b/tests/kernels/quantization/test_nvfp4_emulation.py
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import huggingface_hub
+import pytest
+import torch
+from safetensors import safe_open
+
+from vllm.model_executor.layers.quantization.utils import (
+    nvfp4_emulation_utils,
+)
+from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
+    dequantize_to_dtype,
+    ref_nvfp4_quant_dequant,
+)
+from vllm.platforms import current_platform
+from vllm.triton_utils import triton
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Triton NVFP4 kernel requires CUDA.",
+)
+def test_triton_dequantize_nvfp4(monkeypatch) -> None:
+    """Test the Triton dequantization kernel against the CPU reference
+    using real NVFP4 weights from a checkpoint.
+
+    Tests both 2D (attention projection) and 3D (stacked MoE experts).
+    """
+    checkpoint_path = huggingface_hub.snapshot_download(
+        "nvidia/Qwen3-30B-A3B-NVFP4",
+        allow_patterns=["model-00001-of-00004.safetensors"],
+    )
+    shard_path = f"{checkpoint_path}/model-00001-of-00004.safetensors"
+    block_size = 16
+
+    with safe_open(shard_path, framework="pt", device="cpu") as f:
+        all_keys = list(f.keys())
+
+        # 2D case: attention projection
+        tensor_fp4_2d = f.get_tensor("model.layers.9.self_attn.k_proj.weight")
+        tensor_sf_2d = f.get_tensor("model.layers.9.self_attn.k_proj.weight_scale")
+        global_scale_2d = f.get_tensor("model.layers.9.self_attn.k_proj.weight_scale_2")
+
+        # 3D case: stack ALL experts for layer 9 up_proj
+        expert_prefix = "model.layers.9.mlp.experts."
+        expert_indices = sorted(
+            int(key.split(".")[5])
+            for key in all_keys
+            if key.startswith(expert_prefix) and key.endswith(".up_proj.weight")
+        )
+        assert len(expert_indices) > 0
+
+        all_fp4 = []
+        all_sf = []
+        all_global_scale = []
+        for index in expert_indices:
+            name = f"{expert_prefix}{index}.up_proj"
+            all_fp4.append(f.get_tensor(f"{name}.weight"))
+            all_sf.append(f.get_tensor(f"{name}.weight_scale"))
+            all_global_scale.append(f.get_tensor(f"{name}.weight_scale_2"))
+
+    tensor_fp4_3d = torch.stack(all_fp4)
+    tensor_sf_3d = torch.stack(all_sf)
+    global_scale_3d = torch.stack(all_global_scale)
+
+    test_cases = [
+        ("2D base", tensor_fp4_2d, tensor_sf_2d, global_scale_2d),
+        (
+            "2D 2x rows",
+            tensor_fp4_2d.repeat(2, 1),
+            tensor_sf_2d.repeat(2, 1),
+            global_scale_2d,
+        ),
+        (
+            "2D 4x rows",
+            tensor_fp4_2d.repeat(4, 1),
+            tensor_sf_2d.repeat(4, 1),
+            global_scale_2d,
+        ),
+        (
+            "2D 2x cols",
+            tensor_fp4_2d.repeat(1, 2),
+            tensor_sf_2d.repeat(1, 2),
+            global_scale_2d,
+        ),
+        ("3D base", tensor_fp4_3d, tensor_sf_3d, global_scale_3d),
+        (
+            "3D 2x experts",
+            tensor_fp4_3d.repeat(2, 1, 1),
+            tensor_sf_3d.repeat(2, 1, 1),
+            global_scale_3d.repeat(2),
+        ),
+        (
+            "3D 2x rows",
+            tensor_fp4_3d.repeat(1, 2, 1),
+            tensor_sf_3d.repeat(1, 2, 1),
+            global_scale_3d,
+        ),
+        (
+            "3D 2x cols",
+            tensor_fp4_3d.repeat(1, 1, 2),
+            tensor_sf_3d.repeat(1, 1, 2),
+            global_scale_3d,
+        ),
+    ]
+
+    quantiles = [0.5, 0.001, 0.999]
+
+    # Move the E2M1 lookup table to CUDA ahead of time, as would normally
+    # happen during model loading (process_weights_after_loading).  Both the
+    # Triton and PyTorch reference paths run on CUDA.
+    nvfp4_emulation_utils.kE2M1ToFloat_handle.val = (
+        nvfp4_emulation_utils.kE2M1ToFloat_handle.val.cuda()
+    )
+
+    for label, tensor_fp4, tensor_sf, global_scale in test_cases:
+        fp4_cuda = tensor_fp4.cuda()
+        sf_cuda = tensor_sf.cuda()
+        gs_cuda = global_scale.cuda()
+
+        # Triton path
+        triton_result = dequantize_to_dtype(
+            fp4_cuda,
+            sf_cuda,
+            gs_cuda,
+            torch.bfloat16,
+            block_size,
+            swizzle=False,
+        )
+
+        # Reference path (PyTorch ops on CUDA, Triton dispatch disabled)
+        with monkeypatch.context() as m:
+            m.setattr(
+                nvfp4_emulation_utils.current_platform,
+                "is_cuda_alike",
+                lambda: False,
+            )
+            reference = dequantize_to_dtype(
+                fp4_cuda,
+                sf_cuda,
+                gs_cuda,
+                torch.bfloat16,
+                block_size,
+                swizzle=False,
+            )
+
+        torch.testing.assert_close(triton_result, reference, atol=0, rtol=0)
+
+        # Benchmark
+        shape = list(tensor_fp4.shape)
+
+        def _triton_bench(
+            fp4_cuda=fp4_cuda,
+            scale_cuda=sf_cuda,
+            global_scale_cuda=gs_cuda,
+            block_size=block_size,
+        ):
+            return dequantize_to_dtype(
+                fp4_cuda,
+                scale_cuda,
+                global_scale_cuda,
+                torch.bfloat16,
+                block_size,
+                swizzle=False,
+            )
+
+        triton_ms, triton_min, triton_max = triton.testing.do_bench(
+            _triton_bench, quantiles=quantiles
+        )
+
+        def _reference_bench(
+            fp4_cuda=fp4_cuda,
+            scale_cuda=sf_cuda,
+            global_scale_cuda=gs_cuda,
+            block_size=block_size,
+        ):
+            with monkeypatch.context() as m2:
+                m2.setattr(
+                    nvfp4_emulation_utils.current_platform,
+                    "is_cuda_alike",
+                    lambda: False,
+                )
+                dequantize_to_dtype(
+                    fp4_cuda,
+                    scale_cuda,
+                    global_scale_cuda,
+                    torch.bfloat16,
+                    block_size,
+                    swizzle=False,
+                )
+
+        ref_ms, ref_min, ref_max = triton.testing.do_bench(
+            _reference_bench, quantiles=quantiles
+        )
+
+        speedup = ref_ms / triton_ms if triton_ms > 0 else float("inf")
+        print(f"  dequantize {label} {shape}:")
+        print(
+            f"    triton:    median={triton_ms:.3f}ms, "
+            f"min={triton_min:.3f}ms, max={triton_max:.3f}ms"
+        )
+        print(
+            f"    reference: median={ref_ms:.3f}ms, "
+            f"min={ref_min:.3f}ms, max={ref_max:.3f}ms"
+        )
+        print(f"    speedup:   {speedup:.2f}x")
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Triton NVFP4 kernel requires CUDA.",
+)
+@pytest.mark.parametrize(
+    "m, k",
+    [
+        (1, 16),
+        (1, 4096),
+        (2, 4096),
+        (4, 4096),
+        (8, 4096),
+        (16, 4096),
+        (24, 4096),
+        (32, 4096),
+        (1, 8192),
+        (2, 8192),
+        (4, 8192),
+        (8, 8192),
+        (16, 8192),
+        (24, 8192),
+        (32, 8192),
+        (1, 32),
+        (2, 48),
+        (7, 64),
+        (16, 128),
+        (33, 160),
+        (128, 256),
+        (256, 512),
+        (1024, 1024),
+        (5120, 2048),
+        (2048, 4096),
+        (4096, 7168),
+        (8192, 8192),
+        (128, 16384),
+    ],
+)
+@pytest.mark.parametrize("global_scale_value", [0.5, 1.0, 0.001])
+def test_triton_nvfp4_quant_dequant(
+    monkeypatch, m: int, k: int, global_scale_value: float
+) -> None:
+    """Test the Triton quant-dequant kernel against the CPU reference."""
+    block_size = 16
+    x = torch.randn(m, k, dtype=torch.bfloat16, device="cuda")
+    global_scale = torch.tensor(global_scale_value, dtype=torch.float32, device="cuda")
+
+    # Triton path
+    triton_result = ref_nvfp4_quant_dequant(x, global_scale, block_size)
+
+    # CPU reference path
+    with monkeypatch.context() as mp:
+        mp.setattr(
+            nvfp4_emulation_utils.current_platform,
+            "is_cuda_alike",
+            lambda: False,
+        )
+        reference = ref_nvfp4_quant_dequant(x.cpu(), global_scale.cpu(), block_size)
+
+    torch.testing.assert_close(triton_result.cpu(), reference, atol=0, rtol=0)
+
+    # Benchmark (both paths on CUDA tensors for fair comparison)
+    quantiles = [0.5, 0.001, 0.999]
+
+    def _triton_bench(
+        input_tensor=x, input_global_scale=global_scale, input_block_size=block_size
+    ):
+        return ref_nvfp4_quant_dequant(
+            input_tensor, input_global_scale, input_block_size
+        )
+
+    triton_ms, triton_min, triton_max = triton.testing.do_bench(
+        _triton_bench, quantiles=quantiles
+    )
+
+    def _reference_bench(
+        input_tensor=x, input_global_scale=global_scale, input_block_size=block_size
+    ):
+        with monkeypatch.context() as mp2:
+            mp2.setattr(
+                nvfp4_emulation_utils.current_platform,
+                "is_cuda_alike",
+                lambda: False,
+            )
+            ref_nvfp4_quant_dequant(input_tensor, input_global_scale, input_block_size)
+
+    ref_ms, ref_min, ref_max = triton.testing.do_bench(
+        _reference_bench, quantiles=quantiles
+    )
+
+    speedup = ref_ms / triton_ms if triton_ms > 0 else float("inf")
+    print(f"  quant_dequant [{m}x{k}] gs={global_scale_value}:")
+    print(
+        f"    triton:    median={triton_ms:.3f}ms, "
+        f"min={triton_min:.3f}ms, max={triton_max:.3f}ms"
+    )
+    print(
+        f"    reference: median={ref_ms:.3f}ms, "
+        f"min={ref_min:.3f}ms, max={ref_max:.3f}ms"
+    )
+    print(f"    speedup:   {speedup:.2f}x")
diff --git a/tests/kernels/quantization/test_nvfp4_quant.py b/tests/kernels/quantization/test_nvfp4_quant.py
index e2db5975882e..f4eaf7187e16 100644
--- a/tests/kernels/quantization/test_nvfp4_quant.py
+++ b/tests/kernels/quantization/test_nvfp4_quant.py
@@ -34,6 +34,7 @@
     (64, 7152),
     (32, 14336),
 ]
+PADDED_OUTPUT_SHAPES = [(128, 48), (128, 80), (150, 48), (150, 80), (64, 7152)]
 SEEDS = [42]
 CUDA_DEVICES = ["cuda:0"]
 
@@ -130,6 +131,10 @@ def recover_swizzled_scales(scale, m, n):
     return result[:m, :scale_n]
 
 
+def round_up(x: int, y: int) -> int:
+    return (x + y - 1) // y * y
+
+
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("shape", SHAPES)
 @pytest.mark.parametrize("seed", SEEDS)
@@ -205,6 +210,60 @@ def test_python_util_matches_cpp_allocation(
     )
 
 
+@pytest.mark.parametrize("shape", PADDED_OUTPUT_SHAPES)
+@pytest.mark.parametrize("is_sf_swizzled_layout", [True, False])
+@torch.inference_mode()
+def test_quantize_to_fp4_with_padded_output(
+    shape: tuple[int, int],
+    is_sf_swizzled_layout: bool,
+) -> None:
+    from vllm._custom_ops import create_fp4_output_tensors
+
+    dtype = torch.float16
+    set_random_seed(42)
+    torch.set_default_device("cuda:0")
+
+    m, n = shape
+    padded_n = round_up(n, 32)
+    assert padded_n > n
+
+    x = torch.randn((m, n), dtype=dtype)
+    tensor_amax = torch.abs(x).max().to(torch.float32)
+    global_scale = FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX / tensor_amax
+    out_ref, scale_ref = ref_nvfp4_quant(x, global_scale)
+
+    out, out_scale = ops.scaled_fp4_quant(
+        x,
+        global_scale,
+        is_sf_swizzled_layout=is_sf_swizzled_layout,
+        padded_n=padded_n,
+    )
+    py_out, py_scale = create_fp4_output_tensors(
+        m,
+        n,
+        torch.device("cuda:0"),
+        is_sf_swizzled_layout,
+        padded_n=padded_n,
+    )
+
+    assert out.shape == (m, padded_n // 2)
+    assert out.shape == py_out.shape
+    assert out_scale.shape == py_scale.view(torch.float8_e4m3fn).shape
+
+    out_ans = cast_from_fp4(out[:, : n // 2], m, n)
+    torch.testing.assert_close(out_ans, out_ref)
+    assert torch.count_nonzero(out[:, n // 2 :]) == 0
+
+    if is_sf_swizzled_layout:
+        scale_ans = recover_swizzled_scales(out_scale, m, padded_n)
+        torch.testing.assert_close(scale_ans[:, : n // BLOCK_SIZE], scale_ref)
+        assert torch.count_nonzero(scale_ans[:, n // BLOCK_SIZE :]) == 0
+    else:
+        scale_ans = out_scale.to(torch.float32)
+        torch.testing.assert_close(scale_ans[:, : n // BLOCK_SIZE], scale_ref)
+        assert torch.count_nonzero(scale_ans[:, n // BLOCK_SIZE :]) == 0
+
+
 @pytest.mark.parametrize("pad_shape", PAD_SHAPES)
 @torch.inference_mode()
 def test_quantize_to_fp4_padded(pad_shape: tuple[int, int]) -> None:
diff --git a/tests/kernels/quantization/test_per_token_group_quant.py b/tests/kernels/quantization/test_per_token_group_quant.py
index e3b934722b94..4089e9bc4688 100644
--- a/tests/kernels/quantization/test_per_token_group_quant.py
+++ b/tests/kernels/quantization/test_per_token_group_quant.py
@@ -6,6 +6,7 @@
 import torch
 
 from vllm.model_executor.layers.quantization.utils import fp8_utils, int8_utils
+from vllm.platforms import current_platform
 
 
 @pytest.mark.parametrize(
@@ -48,6 +49,295 @@ def test_per_token_group_quant_fp8(
     assert torch.allclose(scale, ref_s, atol=0.01, rtol=0.01)
 
 
+@pytest.mark.parametrize(
+    "num_tokens,hidden_dim,group_size",
+    [
+        # No padding: mn=4 (mult of 4), groups_per_row=56 (mult of 4)
+        (4, 7168, 128),
+        # MN padding only: mn=1, tma_aligned_mn=4
+        (1, 7168, 128),
+        # MN padding only: mn=3, tma_aligned_mn=4
+        (3, 7168, 128),
+        # K padding only: groups_per_row=5 (5%4=1)
+        (4, 640, 128),
+        # K padding only: groups_per_row=6 (6%4=2)
+        (4, 768, 128),
+        # Single packed column, no padding: k_num_packed=1, mn%4=0
+        (4, 384, 128),
+        # Both MN and K padding
+        (1, 384, 128),
+        (3, 640, 128),
+        # Larger shapes with no padding
+        (64, 7168, 128),
+        (128, 14336, 128),
+        # Larger shapes with padding
+        (127, 7168, 128),
+        (253, 640, 128),
+    ],
+)
+@pytest.mark.parametrize("poisoned_scales", [False, True])
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="DeepGEMM not available on this platform"
+)
+def test_per_token_group_quant_fp8_packed(
+    num_tokens, hidden_dim, group_size, poisoned_scales
+):
+    """Test the packed DeepGEMM quantization kernel against the Triton
+    reference (row-major, UE8M0 scales)."""
+
+    device = "cuda"
+    torch.manual_seed(42)
+
+    x = torch.randn((num_tokens, hidden_dim), device=device, dtype=torch.bfloat16) * 8
+
+    mn = num_tokens
+    groups_per_row = hidden_dim // group_size
+    k_num_packed = (groups_per_row + 3) // 4
+    tma_aligned_mn = ((mn + 3) // 4) * 4
+    num_scale_elems = mn + (k_num_packed - 1) * tma_aligned_mn
+
+    if poisoned_scales:
+        # Call the kernel with poisoned scale buffer to
+        # ensure padded indices are correctly zeroed.
+        fp8_dtype = torch.float8_e4m3fn
+        finfo = torch.finfo(fp8_dtype)
+        out_q = torch.empty_like(x, dtype=fp8_dtype)
+        out_s_packed = torch.empty_strided(
+            (mn, k_num_packed),
+            (1, tma_aligned_mn),
+            device=device,
+            dtype=torch.int32,
+        )
+        torch.as_strided(out_s_packed, (num_scale_elems,), (1,)).fill_(0x7F7F7F7F)
+        torch.ops._C.per_token_group_fp8_quant_packed(
+            x,
+            out_q,
+            out_s_packed,
+            group_size,
+            1e-10,
+            finfo.min,
+            finfo.max,
+        )
+    else:
+        out_q, out_s_packed = fp8_utils.per_token_group_quant_fp8_packed_for_deepgemm(
+            x,
+            group_size=group_size,
+            use_ue8m0=True,
+        )
+
+    # Triton reference (row-major float32 scales, UE8M0)
+    with patch("vllm.platforms.current_platform.is_cuda", return_value=False):
+        ref_q, ref_s = fp8_utils.per_token_group_quant_fp8(
+            x,
+            group_size,
+            use_ue8m0=True,
+        )
+
+    # Quantized values must match.
+    assert torch.equal(out_q, ref_q), "Quantized output mismatch"
+
+    # Verify packed scales (valid exponents + padding zeros).
+    ref_s_flat = ref_s.reshape(mn, groups_per_row)
+    ref_exponents = (ref_s_flat.view(torch.int32) >> 23) & 0xFF
+
+    expected = torch.zeros(num_scale_elems, dtype=torch.int32, device="cpu")
+    for row in range(mn):
+        for g in range(groups_per_row):
+            pack_col = g // 4
+            pos = g % 4
+            idx = pack_col * tma_aligned_mn + row
+            expected[idx] |= int(ref_exponents[row, g].item()) << (pos * 8)
+
+    actual = torch.as_strided(out_s_packed, (num_scale_elems,), (1,)).cpu()
+    assert torch.equal(actual, expected), (
+        f"Packed scale storage mismatch.\n"
+        f"First diff at index "
+        f"{(actual != expected).nonzero(as_tuple=True)[0][0].item()}"
+    )
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="DeepGEMM not available on this platform"
+)
+def test_per_token_group_quant_fp8_packed_all_zero():
+    """All-zero input must produce well-defined UE8M0 scale bytes via the eps
+    floor in the kernel's UE8M0 path. Locks down the all-zero behavior before
+    optimization.
+
+    The CUDA kernel computes:
+        y_s = eps / fp8_max
+        y_s = exp2(ceil(log2(fmax(y_s, 1e-10))))
+    For all-zero input, eps/fp8_max < 1e-10, so the inner fmax clamps back to
+    1e-10, giving exp2(ceil(log2(1e-10))) = exp2(-33) => UE8M0 byte 0x5E (94).
+    """
+
+    device = "cuda"
+    num_tokens, hidden_dim, group_size = 4, 7168, 128
+    x = torch.zeros((num_tokens, hidden_dim), device=device, dtype=torch.bfloat16)
+
+    out_q, out_s_packed = fp8_utils.per_token_group_quant_fp8_packed_for_deepgemm(
+        x,
+        group_size=group_size,
+        use_ue8m0=True,
+    )
+
+    # Quantized values must be all zero.
+    assert torch.equal(
+        out_q.view(torch.uint8),
+        torch.zeros_like(out_q, dtype=torch.uint8),
+    ), "All-zero input should produce all-zero FP8 output"
+
+    # UE8M0 byte produced by the kernel for all-zero input.
+    # The kernel's inner fmax(y_s, 1e-10) clamps eps/fp8_max back to 1e-10.
+    # 1e-10 as float32 has biased exponent 0x5D and a non-zero mantissa, so
+    # the kernel's bit-twiddle (exp_bits + (mant_bits != 0)) rounds up to
+    # 0x5E. This matches exp2(ceil(log2(1e-10))) = exp2(-33).
+    expected_exp_byte = 0x5E
+
+    mn = num_tokens
+    groups_per_row = hidden_dim // group_size
+    k_num_packed = (groups_per_row + 3) // 4
+    tma_aligned_mn = ((mn + 3) // 4) * 4
+    num_scale_elems = mn + (k_num_packed - 1) * tma_aligned_mn
+
+    # All valid scale slots must contain the expected packed value.
+    # Padding slots must be zero.
+    actual = torch.as_strided(out_s_packed, (num_scale_elems,), (1,)).cpu()
+
+    expected = torch.zeros(num_scale_elems, dtype=torch.int32, device="cpu")
+    for row in range(mn):
+        for g in range(groups_per_row):
+            pack_col = g // 4
+            pos = g % 4
+            idx = pack_col * tma_aligned_mn + row
+            expected[idx] |= expected_exp_byte << (pos * 8)
+
+    assert torch.equal(actual, expected), "All-zero scale bytes mismatch"
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="DeepGEMM not available on this platform"
+)
+def test_per_token_group_quant_fp8_packed_mantissa_rounds_up():
+    """Inputs whose absmax/max_8bit produces a non-power-of-2 force the
+    mantissa-rounding-up branch (exp_byte += 1). Locks down this behavior
+    before optimization."""
+
+    device = "cuda"
+    num_tokens, hidden_dim, group_size = 4, 7168, 128
+
+    # Build a tensor whose per-group absmax = 1.5 * fp8_max * 2^k for various k.
+    # fp8_max = torch.finfo(torch.float8_e4m3fn).max = 448.0.
+    # Then absmax/fp8_max = 1.5 * 2^k -> non-zero mantissa, triggers ceil
+    # rounding to 2^(k+1). Use k=0 for simplicity; the bf16 representation of
+    # 1.5*448=672.0 is exact.
+    x = torch.full(
+        (num_tokens, hidden_dim),
+        672.0,
+        device=device,
+        dtype=torch.bfloat16,
+    )
+
+    out_q, out_s_packed = fp8_utils.per_token_group_quant_fp8_packed_for_deepgemm(
+        x,
+        group_size=group_size,
+        use_ue8m0=True,
+    )
+
+    with patch("vllm.platforms.current_platform.is_cuda", return_value=False):
+        ref_q, ref_s = fp8_utils.per_token_group_quant_fp8(
+            x,
+            group_size,
+            use_ue8m0=True,
+        )
+
+    assert torch.equal(out_q, ref_q), "Quantized output mismatch"
+
+    mn = num_tokens
+    groups_per_row = hidden_dim // group_size
+    k_num_packed = (groups_per_row + 3) // 4
+    tma_aligned_mn = ((mn + 3) // 4) * 4
+    num_scale_elems = mn + (k_num_packed - 1) * tma_aligned_mn
+
+    ref_s_flat = ref_s.reshape(mn, groups_per_row)
+    ref_exponents = (ref_s_flat.view(torch.int32) >> 23) & 0xFF
+    expected = torch.zeros(num_scale_elems, dtype=torch.int32, device="cpu")
+    for row in range(mn):
+        for g in range(groups_per_row):
+            pack_col = g // 4
+            pos = g % 4
+            idx = pack_col * tma_aligned_mn + row
+            expected[idx] |= int(ref_exponents[row, g].item()) << (pos * 8)
+
+    actual = torch.as_strided(out_s_packed, (num_scale_elems,), (1,)).cpu()
+    assert torch.equal(actual, expected), "Scale bytes mismatch"
+
+
+@pytest.mark.parametrize(
+    "num_tokens,hidden_dim",
+    [
+        (1, 7168),  # mn padded 1 -> 4
+        (2, 7168),  # mn padded 2 -> 4
+        (3, 7168),  # mn padded 3 -> 4
+        (5, 7168),  # mn padded 5 -> 8
+        (127, 7168),  # mn padded 127 -> 128
+        (253, 640),  # both mn and groups padded
+        (1, 384),  # extreme: 1 group, 1 mn row -> both axes padded
+    ],
+)
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="DeepGEMM not available on this platform"
+)
+def test_per_token_group_quant_fp8_packed_zero_fills_padded_output_q(
+    num_tokens, hidden_dim
+):
+    """When output_q is allocated with shape (tma_aligned_mn, k) instead of
+    (mn, k), the kernel must overwrite the padded mn rows with zeros so
+    callers can use ``torch.empty`` instead of ``torch.zeros``."""
+
+    device = "cuda"
+    group_size = 128
+    torch.manual_seed(42)
+    x = torch.randn((num_tokens, hidden_dim), device=device, dtype=torch.bfloat16) * 8
+
+    mn = num_tokens
+    groups_per_row = hidden_dim // group_size
+    k_num_packed = (groups_per_row + 3) // 4
+    tma_aligned_mn = ((mn + 3) // 4) * 4
+
+    fp8_dtype = torch.float8_e4m3fn
+    finfo = torch.finfo(fp8_dtype)
+    # Allocate output_q with the padded mn extent and pre-fill with 0xFF
+    # so the kernel cannot rely on a clean buffer.
+    out_q = torch.empty((tma_aligned_mn, hidden_dim), device=device, dtype=fp8_dtype)
+    out_q.view(torch.uint8).fill_(0xFF)
+
+    out_s_packed = torch.empty_strided(
+        (mn, k_num_packed),
+        (1, tma_aligned_mn),
+        device=device,
+        dtype=torch.int32,
+    )
+
+    torch.ops._C.per_token_group_fp8_quant_packed(
+        x, out_q, out_s_packed, group_size, 1e-10, finfo.min, finfo.max
+    )
+
+    # Live rows must match the Triton reference.
+    with patch("vllm.platforms.current_platform.is_cuda", return_value=False):
+        ref_q, _ = fp8_utils.per_token_group_quant_fp8(x, group_size, use_ue8m0=True)
+    assert torch.equal(out_q[:mn], ref_q), "Live region mismatch"
+
+    # Padded rows must be all-zero; without this, downstream TMA loads would
+    # see uninitialised data.
+    if tma_aligned_mn > mn:
+        padded_bytes = out_q[mn:tma_aligned_mn].view(torch.uint8)
+        assert padded_bytes.eq(0).all(), (
+            f"Padded rows [{mn}, {tma_aligned_mn}) not zeroed; "
+            f"{padded_bytes.ne(0).sum().item()} non-zero bytes"
+        )
+
+
 @pytest.mark.parametrize("shape", [(32, 128), (64, 256), (16, 512)])
 @pytest.mark.parametrize("group_size", [64, 128])
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
diff --git a/tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py b/tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py
new file mode 100644
index 000000000000..a9b35a4ea642
--- /dev/null
+++ b/tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""End-to-end smoke test for CT W4A16 models on ROCm.
+
+This validates that a real compressed-tensors W4A16 model can run inference
+end-to-end (which will exercise the Triton W4A16 kernel when selected).
+
+Run `pytest tests/kernels/quantization/test_rocm_compressed_tensors_w4a16.py`.
+"""
+
+import pytest
+
+from vllm.platforms import current_platform
+
+
+@pytest.mark.parametrize(
+    "model_path",
+    [
+        # Listed in tests/weight_loading/models.txt
+        "nm-testing/tinyllama-oneshot-w4a16-group128-v2",
+    ],
+)
+@pytest.mark.parametrize("max_tokens", [32])
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="Should only run on ROCm")
+def test_rocm_compressed_tensors_w4a16_e2e(
+    vllm_runner, example_prompts, model_path, max_tokens
+):
+    # Use fp16 activations for maximum compatibility.
+    # gpu_memory_utilization lowered to work on shared nodes.
+    with vllm_runner(
+        model_path, dtype="float16", gpu_memory_utilization=0.3
+    ) as vllm_model:
+        # If the W4A16 kernel is broken, this will typically throw.
+        vllm_model.generate_greedy(example_prompts, max_tokens=max_tokens)
diff --git a/tests/kernels/quantization/test_scaled_mm_kernel_selection.py b/tests/kernels/quantization/test_scaled_mm_kernel_selection.py
index 1ac663ff6de5..bedebdb59b85 100644
--- a/tests/kernels/quantization/test_scaled_mm_kernel_selection.py
+++ b/tests/kernels/quantization/test_scaled_mm_kernel_selection.py
@@ -7,15 +7,21 @@
 
 import inspect
 from abc import ABC
+from unittest.mock import patch
 
 import pytest
+import torch
 
 from vllm.model_executor.kernels.linear import (
     AiterInt8ScaledMMLinearKernel,
     CPUInt8ScaledMMLinearKernel,
+    Int8ScaledMMLinearKernel,
     Int8ScaledMMLinearLayerConfig,
     ScaledMMLinearKernel,
+    init_int8_linear_kernel,
+    register_linear_kernel,
 )
+from vllm.platforms import PlatformEnum
 
 pytestmark = pytest.mark.cpu_test
 
@@ -85,3 +91,39 @@ def test_cpu_kernel_accepts_all_configs():
         assert can_impl, (
             f"CPUInt8ScaledMMLinearKernel should accept config {config}: {reason}"
         )
+
+
+class OOTInt8ScaledMMLinearKernel(Int8ScaledMMLinearKernel):
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        return True, None
+
+    @classmethod
+    def can_implement(cls, c: Int8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        pass
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        pass
+
+
+@patch("vllm.model_executor.kernels.linear.current_platform")
+def test_register_oot_linear_kernel(platform_mock):
+    """Test that the linear kernel registration works correctly."""
+    platform_mock._enum = PlatformEnum.OOT
+    register_linear_kernel(OOTInt8ScaledMMLinearKernel, PlatformEnum.OOT, "int8")
+
+    kernel = init_int8_linear_kernel(True, True, True, "module")
+
+    assert isinstance(kernel, OOTInt8ScaledMMLinearKernel), (
+        "init_int8_linear_kernel should return an instance of the registered kernel"
+    )
diff --git a/tests/kernels/quantization/test_triton_w4a16.py b/tests/kernels/quantization/test_triton_w4a16.py
new file mode 100644
index 000000000000..6502f5244292
--- /dev/null
+++ b/tests/kernels/quantization/test_triton_w4a16.py
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the ROCm Triton W4A16 GEMM kernel.
+
+Run `pytest tests/kernels/quantization/test_triton_w4a16.py`.
+"""
+
+import importlib
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+# This test module is ROCm/Triton specific. Avoid import-time failures on
+# non-ROCm or environments without Triton by skipping early.
+if not current_platform.is_rocm():
+    pytest.skip("ROCm only", allow_module_level=True)
+
+pytest.importorskip("triton")
+
+device = "cuda"
+
+triton_w4a16_module = importlib.import_module(
+    "vllm.model_executor.kernels.linear.mixed_precision.triton_w4a16"
+)
+triton_w4a16_gemm = triton_w4a16_module.triton_w4a16_gemm
+TritonW4A16LinearKernel = triton_w4a16_module.TritonW4A16LinearKernel
+
+
+def _pack_int4_along_n(w_int4_kn: torch.Tensor) -> torch.Tensor:
+    """Pack int4 values along N: [K, N] -> [K, N//8] int32."""
+    assert w_int4_kn.dtype == torch.int32
+    K, N = w_int4_kn.shape
+    assert N % 8 == 0
+    shifts = torch.arange(8, device=w_int4_kn.device, dtype=torch.int32) * 4
+    return torch.sum(
+        (w_int4_kn.view(K, N // 8, 8) & 0xF) << shifts,
+        dim=2,
+        dtype=torch.int32,
+    ).contiguous()
+
+
+def _unpack_int4_along_n(w_packed_kn8: torch.Tensor) -> torch.Tensor:
+    """Unpack int4 values along N: [K, N//8] -> [K, N] int32."""
+    assert w_packed_kn8.dtype == torch.int32
+    K, N8 = w_packed_kn8.shape
+    shifts = torch.arange(8, device=w_packed_kn8.device, dtype=torch.int32) * 4
+    nibbles = (w_packed_kn8.unsqueeze(-1) >> shifts) & 0xF
+    return nibbles.reshape(K, N8 * 8)
+
+
+def _pack_int4_along_k_to_ckpt(w_int4_kn: torch.Tensor) -> torch.Tensor:
+    """Pack int4 values along K into CT checkpoint layout: [K,N] -> [N, K//8]."""
+    assert w_int4_kn.dtype == torch.int32
+    K, N = w_int4_kn.shape
+    assert K % 8 == 0
+    out = torch.zeros((N, K // 8), dtype=torch.int32, device=w_int4_kn.device)
+    for i in range(8):
+        out |= (w_int4_kn[i::8, :].t() & 0xF) << (i * 4)
+    return out.contiguous()
+
+
+def _w4a16_reference(
+    a_mk: torch.Tensor,
+    b_packed_kn8: torch.Tensor,
+    scales_gn: torch.Tensor,
+    *,
+    group_size: int,
+    qzeros_gn8: torch.Tensor | None,
+    zp_bias: int,
+) -> torch.Tensor:
+    """Reference implementation for W4A16.
+
+    a_mk: [M,K] fp16/bf16
+    b_packed_kn8: [K, N//8] int32, N-packed int4 weights
+    scales_gn: [K//G, N] fp16/bf16
+    qzeros_gn8: [K//G, N//8] int32, N-packed int4 zeros, or None
+    """
+    assert a_mk.dtype in (torch.float16, torch.bfloat16)
+    assert b_packed_kn8.dtype == torch.int32
+    assert scales_gn.dtype == a_mk.dtype
+
+    M, K = a_mk.shape
+    N = b_packed_kn8.shape[1] * 8
+    assert b_packed_kn8.shape[0] == K
+
+    assert group_size > 0 and K % group_size == 0
+    G = group_size
+    num_groups = K // G
+    assert scales_gn.shape == (num_groups, N)
+
+    w_int4 = _unpack_int4_along_n(b_packed_kn8)  # [K,N]
+    if qzeros_gn8 is None:
+        z_full = torch.full((K, N), zp_bias, dtype=torch.int32, device=a_mk.device)
+    else:
+        assert qzeros_gn8.shape == (num_groups, N // 8)
+        z_gn = _unpack_int4_along_n(qzeros_gn8)  # [G,N] in groups
+        z_full = z_gn.repeat_interleave(G, dim=0)  # [K,N]
+
+    s_full = scales_gn.repeat_interleave(G, dim=0).to(torch.float32)  # [K,N]
+    w_fp = (w_int4 - z_full).to(torch.float32) * s_full  # [K,N]
+
+    out = a_mk.to(torch.float32) @ w_fp  # [M,N]
+    return out.to(a_mk.dtype)
+
+
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="ROCm only")
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
+@pytest.mark.parametrize(
+    "M,K,N,G,has_zp",
+    [
+        (1, 256, 256, 32, False),
+        (17, 256, 512, 32, False),
+        (32, 512, 256, 64, False),
+        (33, 512, 512, 128, False),
+        (64, 1024, 256, 256, False),
+        (128, 256, 1024, 32, True),
+        (64, 512, 512, 64, True),
+    ],
+)
+def test_triton_w4a16_gemm_matches_reference(dtype, M, K, N, G, has_zp):
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA/HIP device not available")
+    if N % 8 != 0 or K % G != 0:
+        pytest.skip("Invalid test shape")
+
+    set_random_seed(0)
+
+    a = (0.25 * torch.randn((M, K), device=device, dtype=torch.float32)).to(dtype)
+    w_int4 = torch.randint(0, 16, (K, N), device=device, dtype=torch.int32)
+    b_packed = _pack_int4_along_n(w_int4)
+
+    scales = (0.05 * torch.rand((K // G, N), device=device, dtype=torch.float32)).to(
+        dtype
+    )
+
+    qzeros = None
+    if has_zp:
+        zeros_int4 = torch.randint(0, 16, (K // G, N), device=device, dtype=torch.int32)
+        qzeros = _pack_int4_along_n(zeros_int4)
+
+    out = triton_w4a16_gemm(
+        a=a,
+        b_q=b_packed,
+        scales=scales,
+        qzeros=qzeros,
+        group_size=G,
+        zp_bias=8,
+    )
+    ref = _w4a16_reference(
+        a,
+        b_packed,
+        scales,
+        group_size=G,
+        qzeros_gn8=qzeros,
+        zp_bias=8,
+    )
+
+    torch.testing.assert_close(out, ref, rtol=1e-2, atol=1e-2)
+
+
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="ROCm only")
+def test_triton_w4a16_gemm_requires_contiguous_inputs():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA/HIP device not available")
+
+    set_random_seed(0)
+    M, K, N, G = 32, 256, 256, 32
+    a = torch.randn((K, M), device=device, dtype=torch.float16).t()  # non-contiguous
+    w_int4 = torch.randint(0, 16, (K, N), device=device, dtype=torch.int32)
+    b_packed = _pack_int4_along_n(w_int4)
+    scales = torch.rand((K // G, N), device=device, dtype=torch.float16)
+
+    with pytest.raises(AssertionError):
+        triton_w4a16_gemm(
+            a=a,
+            b_q=b_packed,
+            scales=scales,
+            qzeros=None,
+            group_size=G,
+            zp_bias=8,
+        )
+
+
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="ROCm only")
+def test_triton_w4a16_process_weights_after_loading_repacks_layout():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA/HIP device not available")
+
+    from vllm.config import VllmConfig, set_current_vllm_config
+    from vllm.distributed import (
+        ensure_model_parallel_initialized,
+        init_distributed_environment,
+    )
+    from vllm.model_executor.kernels.linear.mixed_precision.MPLinearKernel import (
+        MPLinearLayerConfig,
+    )
+    from vllm.model_executor.parameter import (
+        GroupQuantScaleParameter,
+        PackedColumnParameter,
+        PackedvLLMParameter,
+    )
+    from vllm.scalar_type import scalar_types
+
+    with set_current_vllm_config(VllmConfig()):
+        init_distributed_environment(
+            world_size=1,
+            rank=0,
+            distributed_init_method="tcp://127.0.0.1:0",
+            local_rank=0,
+        )
+        ensure_model_parallel_initialized(1, 1)
+
+    set_random_seed(0)
+
+    # Small-but-nontrivial shapes.
+    K, N = 256, 256
+    G = 32
+    assert K % 8 == 0 and N % 8 == 0 and K % G == 0
+
+    # Build a canonical int4 weight grid then pack into the CT checkpoint layout.
+    w_int4_kn = torch.randint(0, 16, (K, N), device=device, dtype=torch.int32)
+    w_ckpt_nk8 = _pack_int4_along_k_to_ckpt(w_int4_kn)  # [N, K//8]
+
+    # Scales in CT checkpoint layout for WNA16: [N, K//G]
+    scales_ckpt_nkg = 0.05 * torch.rand((N, K // G), device=device, dtype=torch.float16)
+
+    # Asymmetric case: zero points in CT checkpoint layout [N//8, K//G] (N-packed)
+    zeros_int4_gn = torch.randint(0, 16, (K // G, N), device=device, dtype=torch.int32)
+    zeros_packed_gn8 = _pack_int4_along_n(zeros_int4_gn)  # [K//G, N//8]
+    zeros_ckpt_n8kg = zeros_packed_gn8.t().contiguous()  # [N//8, K//G]
+
+    config = MPLinearLayerConfig(
+        full_weight_shape=(K, N),
+        partition_weight_shape=(K, N),
+        weight_type=scalar_types.uint4,  # asymmetric
+        act_type=torch.float16,
+        group_size=G,
+        zero_points=True,
+        has_g_idx=False,
+    )
+    kernel = TritonW4A16LinearKernel(
+        config,
+        w_q_param_name="weight_packed",
+        w_s_param_name="weight_scale",
+        w_zp_param_name="weight_zero_point",
+        w_gidx_param_name=None,
+    )
+
+    # Build dummy layer with vLLM parameter wrappers.
+    weight_loader = lambda *args, **kwargs: None
+
+    class DummyLayer(torch.nn.Module):
+        pass
+
+    layer = DummyLayer()
+    layer.register_parameter(
+        "weight_packed",
+        PackedvLLMParameter(
+            data=w_ckpt_nk8,
+            weight_loader=weight_loader,
+            input_dim=1,
+            output_dim=0,
+            packed_factor=8,
+            packed_dim=1,
+        ),
+    )
+    layer.register_parameter(
+        "weight_scale",
+        GroupQuantScaleParameter(
+            data=scales_ckpt_nkg,
+            weight_loader=weight_loader,
+            input_dim=1,
+            output_dim=0,
+        ),
+    )
+    layer.register_parameter(
+        "weight_zero_point",
+        PackedColumnParameter(
+            data=zeros_ckpt_n8kg,
+            weight_loader=weight_loader,
+            output_dim=0,
+            packed_factor=8,
+            packed_dim=0,
+        ),
+    )
+
+    kernel.process_weights_after_loading(layer)
+
+    # Expected transformed layouts.
+    expected_w_kn8 = _pack_int4_along_n(w_int4_kn)  # [K, N//8]
+    expected_scales_gn = scales_ckpt_nkg.t().contiguous()  # [K//G, N]
+    expected_zeros_gn8 = zeros_ckpt_n8kg.t().contiguous()  # [K//G, N//8]
+
+    assert tuple(layer.weight_packed.shape) == (K, N // 8)
+    assert tuple(layer.weight_scale.shape) == (K // G, N)
+    assert tuple(layer.weight_zero_point.shape) == (K // G, N // 8)
+
+    torch.testing.assert_close(layer.weight_packed, expected_w_kn8)
+    torch.testing.assert_close(layer.weight_scale, expected_scales_gn)
+    torch.testing.assert_close(layer.weight_zero_point, expected_zeros_gn8)
diff --git a/tests/kernels/quantization/test_w4a16_kernel_selection.py b/tests/kernels/quantization/test_w4a16_kernel_selection.py
new file mode 100644
index 000000000000..f0696191d3f6
--- /dev/null
+++ b/tests/kernels/quantization/test_w4a16_kernel_selection.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for W4A16 kernel selection logic (ROCm).
+
+Run `pytest tests/kernels/quantization/test_w4a16_kernel_selection.py`.
+"""
+
+import pytest
+import torch
+
+from vllm.model_executor.kernels.linear import (
+    MPLinearLayerConfig,
+    choose_mp_linear_kernel,
+)
+from vllm.platforms import current_platform
+from vllm.scalar_type import scalar_types
+
+
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="ROCm only")
+def test_choose_mp_linear_kernel_picks_triton_w4a16_for_uint4b8():
+    # int4 weights, 16-bit activations (CT W4A16 typical config).
+    K, N = 1024, 256
+    config = MPLinearLayerConfig(
+        full_weight_shape=(K, N),
+        partition_weight_shape=(K, N),
+        weight_type=scalar_types.uint4b8,  # symmetric int4 (bias=8)
+        act_type=torch.float16,
+        group_size=128,
+        zero_points=False,
+        has_g_idx=False,
+    )
+
+    kernel_type = choose_mp_linear_kernel(config)
+    assert kernel_type.__name__ == "TritonW4A16LinearKernel"
+
+
+@pytest.mark.skipif(not current_platform.is_rocm(), reason="ROCm only")
+def test_choose_mp_linear_kernel_picks_triton_w4a16_for_uint4_asymmetric():
+    # Asymmetric int4 weights should also be supported (explicit zero points).
+    K, N = 512, 512
+    config = MPLinearLayerConfig(
+        full_weight_shape=(K, N),
+        partition_weight_shape=(K, N),
+        weight_type=scalar_types.uint4,  # asymmetric int4 (explicit zeros)
+        act_type=torch.bfloat16,
+        group_size=64,
+        zero_points=True,
+        has_g_idx=False,
+    )
+
+    kernel_type = choose_mp_linear_kernel(config)
+    assert kernel_type.__name__ == "TritonW4A16LinearKernel"
diff --git a/tests/kernels/test_awq_int4_to_int8.py b/tests/kernels/test_awq_int4_to_int8.py
new file mode 100644
index 000000000000..f94214785d22
--- /dev/null
+++ b/tests/kernels/test_awq_int4_to_int8.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for AWQ INT4 W4A8 GEMM pipeline (SGLang kernel migration).
+
+Part 1: Weight packing tests
+  - convert_weight_packed_scale_zp correctness
+
+Part 2: INT4 W4A8 GEMM tests
+  - int4_scaled_mm_cpu correctness w.r.t. float reference
+  - Bias, 3D input, various shapes
+
+Part 3: create_weights shapes
+
+cmd:
+    VLLM_CPU_INT4_W4A8=1 python -m pytest tests/kernels/test_awq_int4_to_int8.py -v -s
+"""
+
+import numpy as np
+import pytest
+import torch
+
+from vllm._custom_ops import (
+    CPUQuantAlgo,
+    convert_weight_packed_scale_zp,
+    int4_scaled_mm_cpu,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    pack_cols,
+)
+from vllm.platforms import current_platform
+
+if not current_platform.is_cpu():
+    pytest.skip("skipping CPU-only tests", allow_module_level=True)
+
+
+def make_awq_checkpoint_data(K, N, group_size, seed=42):
+    """Create synthetic AWQ checkpoint data in packed int32 format.
+
+    Returns:
+        packed_qweight: [K, N//8] int32 (AWQ interleaved + packed)
+        packed_qzeros:  [num_groups, N//8] int32 (AWQ interleaved + packed)
+        scales:         [num_groups, N] float32
+        float_ref:      [K, N] float32, reference dequantized weights
+        weight_int4_orig: [K, N] int32, original int4 values (0-15)
+        zeros_int4_orig:  [num_groups, N] int32, original zero points (0-15)
+    """
+    rng = np.random.RandomState(seed)
+    num_groups = K // group_size
+
+    weight_int4_orig = torch.from_numpy(
+        rng.randint(0, 16, size=(K, N)).astype(np.int32)
+    )
+    zeros_int4_orig = torch.from_numpy(
+        rng.randint(0, 16, size=(num_groups, N)).astype(np.int32)
+    )
+    scales = torch.from_numpy((rng.randn(num_groups, N) * 0.05).astype(np.float32))
+
+    scales_exp = scales.repeat_interleave(group_size, dim=0)
+    zeros_exp = zeros_int4_orig.repeat_interleave(group_size, dim=0)
+    float_ref = (weight_int4_orig.float() - zeros_exp.float()) * scales_exp
+
+    awq_interleave = [0, 2, 4, 6, 1, 3, 5, 7]
+    weight_interleaved = (
+        weight_int4_orig.reshape(-1, 8)[:, awq_interleave].reshape(K, N).contiguous()
+    )
+    packed_qweight = pack_cols(weight_interleaved, 4, K, N)
+
+    zeros_interleaved = (
+        zeros_int4_orig.reshape(-1, 8)[:, awq_interleave]
+        .reshape(num_groups, N)
+        .contiguous()
+    )
+    packed_qzeros = pack_cols(zeros_interleaved, 4, num_groups, N)
+
+    return (
+        packed_qweight,
+        packed_qzeros,
+        scales,
+        float_ref,
+        weight_int4_orig,
+        zeros_int4_orig,
+    )
+
+
+class TestConvertWeightPackedScaleZp:
+    """Tests for convert_weight_packed_scale_zp weightpacking."""
+
+    @pytest.mark.parametrize(
+        "K,N,group_size",
+        [
+            (128, 128, 128),
+            (256, 256, 128),
+            (512, 256, 64),
+        ],
+    )
+    def test_packing_output_shapes(self, K, N, group_size):
+        """Packed outputs should have expected shapes."""
+        (packed_qweight, packed_qzeros, scales, _, _, _) = make_awq_checkpoint_data(
+            K, N, group_size
+        )
+
+        blocked_w, blocked_zp, blocked_s = convert_weight_packed_scale_zp(
+            packed_qweight,
+            packed_qzeros,
+            scales,
+            CPUQuantAlgo.AWQ,
+        )
+
+        block_n = 32
+        Nc = N // block_n
+
+        assert blocked_w.dim() >= 2, (
+            f"blocked_w should have >= 2 dims, got {blocked_w.dim()}"
+        )
+        assert blocked_s.size(0) == Nc, (
+            f"Expected Nc={Nc} scale blocks, got {blocked_s.size(0)}"
+        )
+        assert blocked_zp.size(0) == Nc, (
+            f"Expected Nc={Nc} qzeros blocks, got {blocked_zp.size(0)}"
+        )
+
+        print(
+            f"  [PASS] packing shapes K={K}, N={N}, gs={group_size}: "
+            f"blocked_w={list(blocked_w.shape)}, "
+            f"blocked_s={list(blocked_s.shape)}, blocked_zp={list(blocked_zp.shape)}"
+        )
+
+
+class TestInt4ScaledMmCpu:
+    """Tests for int4_scaled_mm_cpu GEMM kernel."""
+
+    @pytest.mark.parametrize(
+        "M,K,N,group_size",
+        [
+            (1, 128, 128, 128),
+            (4, 256, 256, 128),
+            (16, 512, 256, 64),
+            (32, 256, 512, 128),
+            (64, 512, 512, 128),
+        ],
+    )
+    def test_gemm_vs_float_reference(self, M, K, N, group_size):
+        """INT4 W4A8 GEMM should approximate float matmul."""
+        (packed_qweight, packed_qzeros, scales, float_ref, _, _) = (
+            make_awq_checkpoint_data(K, N, group_size)
+        )
+
+        blocked_w, blocked_zp, blocked_s = convert_weight_packed_scale_zp(
+            packed_qweight,
+            packed_qzeros,
+            scales,
+            CPUQuantAlgo.AWQ,
+        )
+
+        x = torch.randn(M, K, dtype=torch.bfloat16)
+        out = int4_scaled_mm_cpu(x, blocked_w, blocked_zp, blocked_s, None)
+
+        ref_out = torch.mm(x.float(), float_ref)
+
+        abs_diff = (out.float() - ref_out).abs()
+        mean_abs = abs_diff.mean().item()
+        pct95 = torch.quantile(abs_diff, 0.95).item()
+        ref_mag = ref_out.abs().mean().item() + 1e-6
+        mean_rel = mean_abs / ref_mag
+
+        assert mean_rel < 0.05, (
+            f"Mean relative error {mean_rel:.4f} exceeds 5% threshold"
+        )
+        assert pct95 < ref_mag * 0.15, (
+            f"95th-pctile abs_diff {pct95:.4f} exceeds 15% of ref magnitude"
+        )
+        print(f"  [PASS] INT4 GEMM correct: M={M}, K={K}, N={N}")
+
+    @pytest.mark.parametrize("M", [1, 8, 32])
+    def test_gemm_with_bias(self, M):
+        """INT4 W4A8 GEMM with bias should match reference."""
+        K, N, group_size = 256, 128, 128
+        (packed_qweight, packed_qzeros, scales, float_ref, _, _) = (
+            make_awq_checkpoint_data(K, N, group_size)
+        )
+
+        blocked_w, blocked_zp, blocked_s = convert_weight_packed_scale_zp(
+            packed_qweight,
+            packed_qzeros,
+            scales,
+            CPUQuantAlgo.AWQ,
+        )
+
+        bias = torch.randn(N, dtype=torch.float32)
+        x = torch.randn(M, K, dtype=torch.bfloat16)
+
+        out = int4_scaled_mm_cpu(x, blocked_w, blocked_zp, blocked_s, bias)
+
+        ref_out = torch.mm(x.float(), float_ref) + bias
+        abs_diff = (out.float() - ref_out).abs()
+        mean_abs = abs_diff.mean().item()
+        ref_mag = ref_out.abs().mean().item() + 1e-6
+        mean_rel = mean_abs / ref_mag
+        assert mean_rel < 0.05, (
+            f"Mean relative error {mean_rel:.4f} with bias exceeds 5%"
+        )
+        print(f"  [PASS] INT4 GEMM with bias: M={M}")
+
+    def test_gemm_3d_input(self):
+        """apply() reshapes 3D input [B, S, K] -> [B*S, K] -> back to 3D."""
+        K, N, group_size = 256, 128, 128
+        (packed_qweight, packed_qzeros, scales, float_ref, _, _) = (
+            make_awq_checkpoint_data(K, N, group_size)
+        )
+
+        blocked_w, blocked_zp, blocked_s = convert_weight_packed_scale_zp(
+            packed_qweight,
+            packed_qzeros,
+            scales,
+            CPUQuantAlgo.AWQ,
+        )
+
+        B, S = 2, 8
+        x_3d = torch.randn(B, S, K, dtype=torch.bfloat16)
+        x_2d = x_3d.reshape(-1, K)
+
+        out_2d = int4_scaled_mm_cpu(x_2d, blocked_w, blocked_zp, blocked_s, None)
+        out_3d = out_2d.reshape(B, S, N)
+
+        ref_out = torch.mm(x_2d.float(), float_ref).reshape(B, S, N)
+
+        assert out_3d.shape == (B, S, N)
+        abs_diff = (out_3d.float() - ref_out).abs()
+        mean_abs = abs_diff.mean().item()
+        ref_mag = ref_out.abs().mean().item() + 1e-6
+        mean_rel = mean_abs / ref_mag
+
+        assert mean_rel < 0.05, f"Mean relative error {mean_rel:.4f} for 3D exceeds 5%"
+        print(f"  [PASS] 3D input [{B},{S},{K}] -> output [{B},{S},{N}]")
+
+    def test_gemm_fp16_input(self):
+        """INT4 GEMM should also work with fp16 input."""
+        K, N, group_size, M = 256, 256, 128, 8
+        (packed_qweight, packed_qzeros, scales, float_ref, _, _) = (
+            make_awq_checkpoint_data(K, N, group_size)
+        )
+
+        blocked_w, blocked_zp, blocked_s = convert_weight_packed_scale_zp(
+            packed_qweight,
+            packed_qzeros,
+            scales,
+            CPUQuantAlgo.AWQ,
+        )
+
+        x = torch.randn(M, K, dtype=torch.float16)
+        out = int4_scaled_mm_cpu(x, blocked_w, blocked_zp, blocked_s, None)
+
+        ref_out = torch.mm(x.float(), float_ref)
+        abs_diff = (out.float() - ref_out).abs()
+        ref_mag = ref_out.abs().mean().item() + 1e-6
+        mean_rel = abs_diff.mean().item() / ref_mag
+
+        assert mean_rel < 0.05, (
+            f"Mean relative error {mean_rel:.4f} for fp16 exceeds 5%"
+        )
+        print(f"  [PASS] fp16 input M={M}, K={K}, N={N}")
+
+
+class TestCreateWeightsUnchanged:
+    """Create_weights should still produce correct int4 placeholder shapes."""
+
+    @pytest.mark.parametrize(
+        "K,N,group_size",
+        [
+            (128, 128, 128),
+            (256, 256, 128),
+            (512, 256, 64),
+        ],
+    )
+    def test_int4_placeholder_shapes(self, K, N, group_size):
+        """Verify qweight, qzeros, scales shapes."""
+        pack_factor = 8
+        num_groups = K // group_size
+
+        qweight = torch.empty(K, N // pack_factor, dtype=torch.int32)
+        qzeros = torch.empty(num_groups, N // pack_factor, dtype=torch.int32)
+        scales = torch.empty(num_groups, N, dtype=torch.bfloat16)
+
+        assert qweight.shape == (K, N // pack_factor)
+        assert qzeros.shape == (num_groups, N // pack_factor)
+        assert scales.shape == (num_groups, N)
+        print(f"  [PASS] create_weights shapes: K={K}, N={N}, gs={group_size}")
diff --git a/tests/kernels/test_compressor_kv_cache.py b/tests/kernels/test_compressor_kv_cache.py
new file mode 100644
index 000000000000..c6daab2d86be
--- /dev/null
+++ b/tests/kernels/test_compressor_kv_cache.py
@@ -0,0 +1,669 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Round-trip tests for compressor → FP8 quant + KV cache insert → gather + dequant.
+
+These tests cover:
+  A) DeepseekV4 Attention: head_dim=512 (448 FP8 nope + 64 bf16 rope), quant_block=64
+  B) Fused dequant+gather K cache
+  C) Indexer:       head_dim=128 (all FP8), quant_block=128
+  D) DeepseekV4 Attention magnitude range: correctness across small/large values
+  E) Indexer fused Triton kernel: compress+norm+rope+quant+insert
+"""
+
+import math
+
+import pytest
+import torch
+
+from vllm import _custom_ops as ops
+from vllm.models.deepseek_v4.common.ops import (
+    dequantize_and_gather_k_cache,
+    quantize_and_insert_k_cache,
+)
+from vllm.models.deepseek_v4.common.ops.fused_compress_quant_cache import (
+    _fused_kv_compress_norm_rope_insert_indexer_attn,
+    _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn,
+)
+
+from .test_fused_indexer_q_rope_quant import quantize_to_mxfp4
+
+
+def _ue8m0_reference(x: torch.Tensor, block_size: int, fp8_max: float):
+    """PyTorch reference for UE8M0 FP8 quantization (per-block, power-of-2 scale).
+
+    Returns (x_fp8, scales) where x_fp8 is float8_e4m3fn and scales are float32.
+    """
+    assert x.dim() == 1
+    n = x.numel()
+    n_blocks = math.ceil(n / block_size)
+    x_fp8 = torch.zeros(n, dtype=torch.float8_e4m3fn, device=x.device)
+    scales = torch.zeros(n_blocks, dtype=torch.float32, device=x.device)
+
+    for i in range(n_blocks):
+        start = i * block_size
+        end = min(start + block_size, n)
+        block = x[start:end].float()
+        amax = block.abs().max().clamp(min=1e-4)
+        raw_scale = amax / fp8_max
+        exponent = math.ceil(math.log2(raw_scale.item()))
+        scale = 2.0**exponent
+        scales[i] = scale
+        quantized = (block / scale).clamp(-fp8_max, fp8_max)
+        x_fp8[start:end] = quantized.to(torch.float8_e4m3fn)
+
+    return x_fp8, scales
+
+
+# ── Test A: DeepseekV4 Attention path ──────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("num_tokens", [1, 4, 8, 17])
+@pytest.mark.parametrize("block_size", [16, 64])
+def test_deepseek_v4_attention_quant_cache_roundtrip(num_tokens: int, block_size: int):
+    """compressed_kv → quantize_and_insert_k_cache → dequantize_and_gather_k_cache
+    → compare against original."""
+
+    HEAD_DIM = 512
+    NOPE_DIM = 448
+    HEAD_BYTES = 584  # 448 fp8 + 128 bf16 + 8 uint8 scale
+    FP8_MAX = 448.0
+    QUANT_BLOCK = 64
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 1
+    device = "cuda"
+
+    # Random compressed_kv (simulates compressor output)
+    compressed_kv = torch.randn(
+        num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device
+    )
+
+    # ── Quant + insert ──────────────────────────────────────────────────
+    k_cache = torch.zeros(
+        num_blocks, block_size, HEAD_BYTES, dtype=torch.uint8, device=device
+    )
+    k_cache_2d = k_cache.view(num_blocks, -1)
+
+    # Sequential slot mapping: token i → slot i
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    quantize_and_insert_k_cache(
+        compressed_kv, k_cache_2d, slot_mapping, block_size=block_size
+    )
+
+    # ── Gather + dequant ────────────────────────────────────────────────
+    num_reqs = 1
+    max_blocks_per_seq = num_blocks
+    out = torch.zeros(
+        num_reqs, num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device
+    )
+    seq_lens = torch.tensor([num_tokens], dtype=torch.int32, device=device)
+    # block_table: request 0 uses physical blocks 0, 1, ...
+    block_table = torch.arange(
+        max_blocks_per_seq, dtype=torch.int32, device=device
+    ).unsqueeze(0)
+
+    dequantize_and_gather_k_cache(
+        out, k_cache, seq_lens, None, block_table, block_size, offset=0
+    )
+
+    recovered = out[0, :num_tokens]
+
+    # ── NoPE portion (first 448): FP8 quantized, expect UE8M0 error ──
+    nope_orig = compressed_kv[:, :NOPE_DIM].float()
+    nope_recv = recovered[:, :NOPE_DIM].float()
+    nope_diff = (nope_recv - nope_orig).abs()
+
+    # Per-token check: FP8 e4m3 (3-bit mantissa) worst-case error is
+    # half-ULP at the largest representable value.  At y ≈ 448 (max),
+    # ULP = 2^(8-3) = 32, so error ≤ 16 * scale.
+    for t in range(num_tokens):
+        _, scales = _ue8m0_reference(
+            compressed_kv[t, :NOPE_DIM].float(), QUANT_BLOCK, FP8_MAX
+        )
+        max_allowed = 16.0 * scales.max().item()
+        token_diff = nope_diff[t].max().item()
+        assert token_diff <= max_allowed, (
+            f"Token {t} nope diff {token_diff} exceeds max_allowed "
+            f"{max_allowed} (scale={scales.max().item()})"
+        )
+
+    # ── RoPE portion (last 64): stored as bf16, should be exact ─────
+    rope_diff = (recovered[:, NOPE_DIM:] - compressed_kv[:, NOPE_DIM:]).abs()
+    assert rope_diff.max().item() == 0.0, (
+        f"RoPE portion should be exact but got max diff {rope_diff.max().item()}"
+    )
+
+
+# ── Test B: Fused dequant+gather K cache ────────────────────────────────────
+
+
+def _dequantize_and_gather_k_cache_reference(
+    out: torch.Tensor,
+    k_cache: torch.Tensor,
+    seq_lens: torch.Tensor,
+    gather_lens: torch.Tensor | None,
+    block_table: torch.Tensor,
+    block_size: int,
+    offset: int,
+) -> None:
+    fp8_dim = 448
+    bf16_dim = 64
+    scale_dim = 8
+    quant_block = 64
+    token_data_size = fp8_dim + bf16_dim * 2
+
+    for req_id in range(seq_lens.shape[0]):
+        seq_len = seq_lens[req_id].item()
+        gather_len = gather_lens[req_id].item() if gather_lens is not None else seq_len
+        start_pos = seq_len - gather_len
+
+        for i in range(gather_len):
+            pos = start_pos + i
+            pos_in_block = pos % block_size
+            block_idx = block_table[req_id, pos // block_size].item()
+            cache_block = k_cache[block_idx].view(-1)
+
+            token_data_start = pos_in_block * token_data_size
+            fp8_bytes = cache_block[token_data_start : token_data_start + fp8_dim]
+            fp8_vals = fp8_bytes.view(torch.float8_e4m3fn).float()
+
+            scale_start = block_size * token_data_size + pos_in_block * scale_dim
+            encoded_scales = cache_block[scale_start : scale_start + scale_dim]
+            scales = torch.exp2(encoded_scales[:7].float() - 127.0)
+            dequant = fp8_vals * scales.repeat_interleave(quant_block)
+
+            bf16_start = token_data_start + fp8_dim
+            bf16_bytes = cache_block[bf16_start : bf16_start + bf16_dim * 2]
+            bf16_tail = bf16_bytes.view(torch.bfloat16)
+
+            out[req_id, offset + i, :fp8_dim] = dequant
+            out[req_id, offset + i, fp8_dim:] = bf16_tail
+
+
+@pytest.mark.parametrize(
+    ("seq_lens_host", "gather_lens_host", "offset"),
+    [
+        ([9, 23, 7], None, 0),
+        ([19, 8, 257], [6, 8, 129], 5),
+    ],
+)
+def test_dequantize_and_gather_k_cache(
+    seq_lens_host: list[int],
+    gather_lens_host: list[int] | None,
+    offset: int,
+):
+    block_size = 64
+    head_dim = 512
+    nope_dim = 448
+    scale_dim = 8
+    head_bytes = nope_dim + (head_dim - nope_dim) * 2 + scale_dim
+    device = "cuda"
+    num_reqs = len(seq_lens_host)
+    num_tokens = sum(seq_lens_host)
+    max_gather_len = max(gather_lens_host or seq_lens_host)
+    max_blocks_per_seq = math.ceil(max(seq_lens_host) / block_size)
+    num_blocks = sum(math.ceil(seq_len / block_size) for seq_len in seq_lens_host)
+
+    compressed_kv = torch.randn(
+        num_tokens, head_dim, dtype=torch.bfloat16, device=device
+    )
+
+    # Randomize physical pages so the test covers block-table translation.
+    # Keep padded block-table entries invalid to catch accidental reads.
+    physical_blocks = torch.randperm(num_blocks, device=device)
+    block_table = torch.full(
+        (num_reqs, max_blocks_per_seq), int(-1e6), dtype=torch.int32, device=device
+    )
+    start = 0
+    for req_id, seq_len in enumerate(seq_lens_host):
+        num_req_blocks = math.ceil(seq_len / block_size)
+        req_blocks = physical_blocks[start : start + num_req_blocks]
+        block_table[req_id, :num_req_blocks] = req_blocks
+        start += num_req_blocks
+
+    # Build slot_mapping for quantize_and_insert_k_cache.
+    slot_mapping = torch.empty(num_tokens, dtype=torch.int64, device=device)
+    start = 0
+    for req_id, seq_len in enumerate(seq_lens_host):
+        logical_pos = torch.arange(seq_len, dtype=torch.int64, device=device)
+        block_idx = block_table[req_id, logical_pos // block_size].to(torch.int64)
+        token_slots = block_idx * block_size + logical_pos % block_size
+        slot_mapping[start : start + seq_len] = token_slots
+        start += seq_len
+
+    # Insert compressed K into the paged cache layout used by the gather op.
+    k_cache = torch.empty(
+        num_blocks, block_size, head_bytes, dtype=torch.uint8, device=device
+    )
+    k_cache_2d = k_cache.view(num_blocks, -1)
+    quantize_and_insert_k_cache(compressed_kv, k_cache_2d, slot_mapping, block_size)
+
+    out_shape = (num_reqs, offset + max_gather_len + 3, head_dim)
+    ref_out = torch.empty(out_shape, dtype=torch.bfloat16, device=device)
+    actual_out = torch.empty_like(ref_out)
+    seq_lens = torch.tensor(seq_lens_host, dtype=torch.int32, device=device)
+    gather_lens = (
+        torch.tensor(gather_lens_host, dtype=torch.int32, device=device)
+        if gather_lens_host is not None
+        else None
+    )
+
+    # Compare production gather against a PyTorch reference for valid output rows.
+    _dequantize_and_gather_k_cache_reference(
+        ref_out, k_cache, seq_lens, gather_lens, block_table, block_size, offset
+    )
+    dequantize_and_gather_k_cache(
+        actual_out, k_cache, seq_lens, gather_lens, block_table, block_size, offset
+    )
+    torch.accelerator.synchronize()
+
+    # only check non-padded content
+    for req_id, seq_len in enumerate(seq_lens_host):
+        gather_len = (
+            gather_lens_host[req_id] if gather_lens_host is not None else seq_len
+        )
+        actual = actual_out[req_id, offset : offset + gather_len]
+        expected = ref_out[req_id, offset : offset + gather_len]
+        torch.testing.assert_close(actual, expected, rtol=0, atol=0)
+
+
+# ── Test C: Indexer path ────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("num_tokens", [1, 4, 8, 17])
+@pytest.mark.parametrize("block_size", [16, 64])
+def test_indexer_quant_cache_roundtrip(num_tokens: int, block_size: int):
+    """k → indexer_k_quant_and_cache → cp_gather_indexer_k_quant_cache
+    → manual dequant → compare against original."""
+
+    HEAD_DIM = 128
+    QUANT_BLOCK_SIZE = 128
+    # cache_stride = head_dim + (head_dim * 4 / quant_block_size) = 128 + 4 = 132
+    CACHE_STRIDE = HEAD_DIM + HEAD_DIM * 4 // QUANT_BLOCK_SIZE
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 1
+    device = "cuda"
+
+    # Random K (simulates compressor output for indexer)
+    k = torch.randn(num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device)
+
+    # ── Quant + insert ──────────────────────────────────────────────────
+    kv_cache = torch.zeros(
+        num_blocks, block_size, CACHE_STRIDE, dtype=torch.uint8, device=device
+    )
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    ops.indexer_k_quant_and_cache(k, kv_cache, slot_mapping, QUANT_BLOCK_SIZE, "ue8m0")
+
+    # ── Gather ──────────────────────────────────────────────────────────
+    max_blocks_per_seq = num_blocks
+    block_table = torch.arange(
+        max_blocks_per_seq, dtype=torch.int32, device=device
+    ).unsqueeze(0)
+    cu_seq_lens = torch.tensor([0, num_tokens], dtype=torch.int32, device=device)
+
+    # dst_k: [total_seq_len, head_dim] as uint8 (raw FP8 bytes)
+    dst_k = torch.zeros(num_tokens, HEAD_DIM, dtype=torch.uint8, device=device)
+    # dst_scale: [total_seq_len, head_dim/quant_block*4] as uint8 (raw float32 bytes)
+    num_scale_bytes = HEAD_DIM * 4 // QUANT_BLOCK_SIZE  # 4
+    dst_scale = torch.zeros(
+        num_tokens, num_scale_bytes, dtype=torch.uint8, device=device
+    )
+
+    ops.cp_gather_indexer_k_quant_cache(
+        kv_cache, dst_k, dst_scale, block_table, cu_seq_lens
+    )
+
+    # ── Manual dequant ──────────────────────────────────────────────────
+    k_fp8 = dst_k.view(torch.float8_e4m3fn).float()  # [num_tokens, 128]
+    scale = dst_scale.view(torch.float32)  # [num_tokens, 1]
+    k_recovered = k_fp8 * scale  # [num_tokens, 128]
+
+    # ── Compare ─────────────────────────────────────────────────────────
+    diff = (k_recovered - k.float()).abs()
+    k_abs = k.float().abs()
+
+    for t in range(num_tokens):
+        amax = k_abs[t].max().clamp(min=1e-4).item()
+        # UE8M0: scale = 2^ceil(log2(amax / 448))
+        exponent = math.ceil(math.log2(amax / 448.0))
+        ue8m0_scale = 2.0**exponent
+        # FP8 e4m3 (3-bit mantissa): worst-case error = 16 * scale
+        max_allowed = 16.0 * ue8m0_scale
+        token_diff = diff[t].max().item()
+        assert token_diff <= max_allowed, (
+            f"Token {t} diff {token_diff} exceeds max_allowed "
+            f"{max_allowed} (scale={ue8m0_scale})"
+        )
+
+
+def test_indexer_gather_accepts_upper_bound_output():
+    """Gather only exact cu_seq_lens even when dst is over-allocated."""
+
+    head_dim = 128
+    quant_block_size = 128
+    cache_stride = head_dim + head_dim * 4 // quant_block_size
+    valid_tokens = 9
+    upper_bound_tokens = 13
+    block_size = 16
+    num_blocks = 2
+    sentinel = 123
+    device = "cuda"
+
+    k = torch.randn(valid_tokens, head_dim, dtype=torch.bfloat16, device=device)
+    kv_cache = torch.zeros(
+        num_blocks, block_size, cache_stride, dtype=torch.uint8, device=device
+    )
+    slot_mapping = torch.arange(valid_tokens, dtype=torch.int64, device=device)
+    ops.indexer_k_quant_and_cache(k, kv_cache, slot_mapping, quant_block_size, "ue8m0")
+
+    block_table = torch.arange(num_blocks, dtype=torch.int32, device=device).unsqueeze(
+        0
+    )
+    cu_seq_lens = torch.tensor([0, valid_tokens], dtype=torch.int32, device=device)
+    dst_k = torch.full(
+        (upper_bound_tokens, head_dim), sentinel, dtype=torch.uint8, device=device
+    )
+    num_scale_bytes = head_dim * 4 // quant_block_size
+    dst_scale = torch.full(
+        (upper_bound_tokens, num_scale_bytes),
+        sentinel,
+        dtype=torch.uint8,
+        device=device,
+    )
+
+    ops.cp_gather_indexer_k_quant_cache(
+        kv_cache, dst_k, dst_scale, block_table, cu_seq_lens
+    )
+    torch.accelerator.synchronize()
+
+    k_recovered = dst_k[:valid_tokens].view(torch.float8_e4m3fn).float() * dst_scale[
+        :valid_tokens
+    ].view(torch.float32)
+    diff = (k_recovered - k.float()).abs()
+    max_allowed = (16.0 * dst_scale[:valid_tokens].view(torch.float32).max()).item()
+    assert diff.max().item() <= max_allowed
+    assert torch.all(dst_k[valid_tokens:] == sentinel)
+    assert torch.all(dst_scale[valid_tokens:] == sentinel)
+
+
+# ── Test D: DeepseekV4 attention with values at different magnitudes ───────────
+
+
+def test_deepseek_v4_quant_magnitude_range():
+    """Test that quantization handles a range of magnitudes correctly."""
+
+    HEAD_DIM = 512
+    NOPE_DIM = 448
+    HEAD_BYTES = 584
+    block_size = 16
+    num_tokens = 4
+    num_blocks = 2
+    device = "cuda"
+
+    # Create inputs with varying magnitudes: small, medium, large
+    compressed_kv = torch.zeros(
+        num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device
+    )
+    compressed_kv[0] = 0.001  # very small
+    compressed_kv[1] = 1.0  # unit scale
+    compressed_kv[2] = 100.0  # large
+    compressed_kv[3] = torch.randn(HEAD_DIM, dtype=torch.bfloat16, device=device)
+
+    k_cache = torch.zeros(
+        num_blocks, block_size, HEAD_BYTES, dtype=torch.uint8, device=device
+    )
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    quantize_and_insert_k_cache(
+        compressed_kv, k_cache.view(num_blocks, -1), slot_mapping, block_size
+    )
+
+    out = torch.zeros(1, num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device)
+    seq_lens = torch.tensor([num_tokens], dtype=torch.int32, device=device)
+    block_table = torch.arange(num_blocks, dtype=torch.int32, device=device).unsqueeze(
+        0
+    )
+
+    dequantize_and_gather_k_cache(
+        out, k_cache, seq_lens, None, block_table, block_size, offset=0
+    )
+
+    recovered = out[0, :num_tokens]
+
+    # RoPE portion must be exact
+    rope_diff = (recovered[:, NOPE_DIM:] - compressed_kv[:, NOPE_DIM:]).abs().max()
+    assert rope_diff.item() == 0.0, f"RoPE diff {rope_diff.item()}"
+
+    # NoPE: relative error should be reasonable
+    for t in range(num_tokens):
+        orig = compressed_kv[t, :NOPE_DIM].float()
+        recv = recovered[t, :NOPE_DIM].float()
+        abs_diff = (recv - orig).abs().max().item()
+        magnitude = orig.abs().max().item()
+        if magnitude > 0.01:
+            rel_err = abs_diff / magnitude
+            assert rel_err < 0.15, (
+                f"Token {t}: rel_err={rel_err:.4f}, abs_diff={abs_diff:.6f}, "
+                f"magnitude={magnitude:.4f}"
+            )
+
+
+# ── Test E: Indexer fused K-cache insert (Triton kernels) ────────────────────
+#
+# Both kernels share the same Triton signature; use_fp4 selects between them.
+# Full pipeline: state-cache gather → softmax-weighted compress → RMSNorm →
+#   GPT-J RoPE → quant (MXFP4 or FP8) → paged cache insert.
+
+
+def _reference_kv_compress_norm_rope(
+    state_cache: torch.Tensor,
+    block_table: torch.Tensor,
+    positions: torch.Tensor,
+    rms_weight: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    compress_ratio: int = 1,
+    overlap: int = 0,
+    use_fp4: bool = False,
+    rms_eps: float = 1e-6,
+    fp8_max: float = 448.0,
+):
+    """Compress → RMSNorm → GPT-J RoPE → quantize.
+
+    Gathers (1+overlap)*compress_ratio state entries per output token, applies
+    per-element softmax over the scores, and computes the weighted kv sum.
+    Returns (quantized_values, scale) matching the kernel's output layout.
+    """
+    device = state_cache.device
+    head_dim = rms_weight.shape[0]
+    rope_dim = cos_sin_cache.shape[-1]
+    state_block_size = state_cache.shape[1]
+    state_width = state_cache.shape[-1] // 2
+    nope_dim = head_dim - rope_dim
+    total = (1 + overlap) * compress_ratio
+    results = []
+    for pos in positions.tolist():
+        src = torch.arange(pos - total + 1, pos + 1, dtype=torch.int64, device=device)
+        valid = src >= 0
+        idx = src.clamp(min=0)
+        pages = block_table[0, idx // state_block_size]
+        offsets = idx % state_block_size
+        raw = state_cache[pages, offsets].float()  # [total, state_dim]
+
+        # Group 0 (tokens 0..cr-1):   kv[:H],   score[SW:SW+H]
+        # Group 1 (tokens cr..2cr-1): kv[H:2H], score[SW+H:SW+2H]
+        if overlap:
+            sw = state_width
+            g0_kv = raw[:compress_ratio, :head_dim]
+            g1_kv = raw[compress_ratio:, head_dim : 2 * head_dim]
+            g0_scores = raw[:compress_ratio, sw : sw + head_dim]
+            g1_scores = raw[compress_ratio:, sw + head_dim : sw + 2 * head_dim]
+            kv = torch.cat([g0_kv, g1_kv])
+            scores = torch.cat([g0_scores, g1_scores])
+        else:
+            kv = raw[:, :head_dim]
+            scores = raw[:, state_width : state_width + head_dim]
+
+        scores[~valid] = float("-inf")
+        kv[~valid] = 0.0
+        weights = torch.softmax(scores, dim=0)
+        compressed = (kv * weights).sum(dim=0)  # [H]
+        var = (compressed * compressed).mean()
+        normed = compressed * torch.rsqrt(var + rms_eps) * rms_weight.float()
+        compressed_pos = (pos // compress_ratio) * compress_ratio
+        cos, sin = cos_sin_cache[compressed_pos].float().chunk(2)
+        nope, rope = normed.split([nope_dim, rope_dim])
+        rope = torch.stack(
+            [rope[0::2] * cos - rope[1::2] * sin, rope[1::2] * cos + rope[0::2] * sin],
+            dim=-1,
+        ).reshape(rope_dim)
+        results.append(torch.cat([nope, rope]).to(state_cache.dtype))
+    result = torch.stack(results)
+
+    if use_fp4:
+        return quantize_to_mxfp4(result)
+    else:
+        pairs = [
+            _ue8m0_reference(result[t], head_dim, fp8_max) for t in range(len(result))
+        ]
+        quants, scales = zip(*pairs)
+        return torch.stack(quants), torch.cat(scales)
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32])
+@pytest.mark.parametrize("kv_block_size", [16, 32])
+@pytest.mark.parametrize("use_fp4", [False, True])
+def test_fused_kv_insert_indexer(num_tokens: int, kv_block_size: int, use_fp4: bool):
+    """Fused K compress+norm+rope+quant+insert for the indexer KV cache."""
+    HEAD_DIM = 128
+    ROPE_DIM = 64
+    BLOCK_SIZE = 16
+    RMS_EPS = 1e-6
+    FP8_MAX = 448.0
+
+    device = "cuda"
+    torch.manual_seed(42)
+    compress_ratio = 4
+
+    if use_fp4:
+        TOKEN_STRIDE = HEAD_DIM // 2  # packed nibbles: 64 bytes
+        SCALE_DIM = HEAD_DIM // 32  # ue8m0 bytes: 4
+        QUANT_BLOCK = 32
+        kernel = _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn
+    else:
+        TOKEN_STRIDE = HEAD_DIM  # FP8 bytes: 128
+        SCALE_DIM = 4  # 1 float32: 4 bytes
+        QUANT_BLOCK = HEAD_DIM
+        kernel = _fused_kv_compress_norm_rope_insert_indexer_attn
+
+    # overlap=1 whenever compress_ratio==4, matching DeepseekCompressor logic.
+    overlap = 1 if compress_ratio == 4 else 0
+    coff = 1 + overlap  # multiplier for state_dim per entry
+
+    num_pages = (compress_ratio * num_tokens - 1) // BLOCK_SIZE + 2
+    state_cache = torch.randn(
+        num_pages,
+        BLOCK_SIZE,
+        2 * coff * HEAD_DIM,  # kv_state + score_state, each coff*HEAD_DIM wide
+        dtype=torch.bfloat16,
+        device=device,
+    )
+    block_table = torch.arange(num_pages, dtype=torch.int32, device=device).unsqueeze(0)
+    token_to_req = torch.zeros(num_tokens, dtype=torch.int32, device=device)
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+    positions = torch.arange(
+        compress_ratio - 1,
+        compress_ratio * num_tokens,
+        compress_ratio,
+        dtype=torch.int64,
+        device=device,
+    )
+    rms_weight = torch.randn(HEAD_DIM, dtype=torch.bfloat16, device=device)
+    cos_sin_cache = torch.randn(compress_ratio * num_tokens, ROPE_DIM, device=device)
+
+    kv_n_blocks = (num_tokens + kv_block_size - 1) // kv_block_size + 1
+    kv_cache = torch.zeros(
+        kv_n_blocks,
+        kv_block_size * (TOKEN_STRIDE + SCALE_DIM),
+        dtype=torch.uint8,
+        device=device,
+    )
+
+    kernel[(num_tokens,)](
+        state_cache,
+        state_cache.stride(0),
+        state_cache.stride(1),
+        token_to_req,
+        positions,
+        slot_mapping,
+        block_table,
+        block_table.stride(0),
+        BLOCK_SIZE,
+        rms_weight,
+        RMS_EPS,
+        cos_sin_cache,
+        cos_sin_cache.stride(0),
+        kv_cache,
+        slot_mapping,
+        kv_block_size,
+        HEAD_SIZE=HEAD_DIM,
+        TRITON_BLOCK_SIZE=HEAD_DIM,
+        STATE_WIDTH=coff * HEAD_DIM,
+        COMPRESS_RATIO=compress_ratio,
+        OVERLAP=overlap,
+        ROPE_HEAD_DIM=ROPE_DIM,
+        FP8_MAX=FP8_MAX,
+        QUANT_BLOCK=QUANT_BLOCK,
+        TOKEN_STRIDE=TOKEN_STRIDE,
+        SCALE_DIM=SCALE_DIM,
+        KV_BLOCK_STRIDE=kv_cache.stride(0),
+        num_warps=1,
+    )
+
+    k_quant, scale = _reference_kv_compress_norm_rope(
+        state_cache,
+        block_table,
+        positions,
+        rms_weight,
+        cos_sin_cache,
+        compress_ratio,
+        overlap,
+        use_fp4,
+        rms_eps=RMS_EPS,
+        fp8_max=FP8_MAX,
+    )
+
+    if use_fp4:
+        for i in range(num_tokens):
+            blk, pos = i // kv_block_size, i % kv_block_size
+            val_off = pos * TOKEN_STRIDE
+            fp4_actual = kv_cache[blk, val_off : val_off + TOKEN_STRIDE]
+            assert torch.equal(k_quant[i], fp4_actual), (
+                f"token {i}: packed nibbles differ, "
+                f"{(k_quant[i] != fp4_actual).sum()} "
+                f"/ {TOKEN_STRIDE}"
+            )
+
+            scale_off = kv_block_size * TOKEN_STRIDE + pos * SCALE_DIM
+            scale_actual = kv_cache[blk, scale_off : scale_off + SCALE_DIM]
+            assert torch.equal(scale_actual, scale[i]), (
+                f"token {i}: ue8m0 {scale_actual.tolist()} != {scale[i].tolist()}"
+            )
+
+    else:
+        k_quant = k_quant.view(torch.uint8)
+        for i in range(num_tokens):
+            blk, pos = i // kv_block_size, i % kv_block_size
+            val_off = pos * TOKEN_STRIDE
+            assert torch.equal(
+                k_quant[i], kv_cache[blk, val_off : val_off + TOKEN_STRIDE]
+            ), f"token {i}: FP8 bytes differ"
+
+            scale_off = kv_block_size * TOKEN_STRIDE + pos * SCALE_DIM
+            actual_scale = kv_cache[blk, scale_off : scale_off + SCALE_DIM].view(
+                torch.float32
+            )
+            assert torch.equal(actual_scale, scale[i : i + 1]), (
+                f"token {i}: scale {actual_scale.item()} != {scale[i].item()}"
+            )
diff --git a/tests/kernels/test_flex_attention.py b/tests/kernels/test_flex_attention.py
index 69113b57c74e..41d298134762 100644
--- a/tests/kernels/test_flex_attention.py
+++ b/tests/kernels/test_flex_attention.py
@@ -26,6 +26,59 @@
 DIRECT_BUILD_VERSION = version.parse("2.9.dev0")
 
 
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or TORCH_VERSION < MINIMUM_TORCH_VERSION,
+    reason="CUDA not available or PyTorch version < 2.7",
+)
+def test_flex_attention_full_cudagraphs(vllm_runner):
+    """Test the numerics for flex attention full cudagraphs support."""
+    model_name = "Qwen/Qwen2.5-1.5B-Instruct"
+    seed = 42
+    max_tokens = 24
+    num_logprobs = 5
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+    ]
+
+    # Run with flex attention eager
+    set_random_seed(seed)
+    with vllm_runner(
+        model_name,
+        runner="generate",
+        tensor_parallel_size=1,
+        num_gpu_blocks_override=128,
+        enforce_eager=True,
+        attention_config={"backend": "FLEX_ATTENTION"},
+    ) as llm_flex:
+        output_eager = llm_flex.generate_greedy_logprobs(
+            prompts, max_tokens, num_logprobs
+        )
+
+    # Run with flex attention compiled
+    set_random_seed(seed)
+    with vllm_runner(
+        model_name,
+        runner="generate",
+        tensor_parallel_size=1,
+        num_gpu_blocks_override=128,
+        enforce_eager=False,
+        gpu_memory_utilization=0.85,
+        attention_config={"backend": "FLEX_ATTENTION"},
+    ) as llm_default:
+        output_compile = llm_default.generate_greedy_logprobs(
+            prompts, max_tokens, num_logprobs
+        )
+
+    check_logprobs_close(
+        outputs_0_lst=output_eager,
+        outputs_1_lst=output_compile,
+        name_0="eager",
+        name_1="compile",
+    )
+
+
 @pytest.mark.skipif(
     not torch.cuda.is_available() or TORCH_VERSION < MINIMUM_TORCH_VERSION,
     reason="CUDA not available or PyTorch version < 2.7",
diff --git a/tests/kernels/test_fp32_router_gemm.py b/tests/kernels/test_fp32_router_gemm.py
new file mode 100644
index 000000000000..f855eb7aa171
--- /dev/null
+++ b/tests/kernels/test_fp32_router_gemm.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for fp32_router_gemm kernel: activation×weight→fp32, H=3072, E=256.
+
+Correctness baseline: torch.matmul in float64.
+"""
+
+import pytest
+import torch
+
+from vllm._custom_ops import fp32_router_gemm
+
+NUM_EXPERTS = 256
+HIDDEN_DIM = 3072
+# Absolute tolerance for fp32 kernel vs float64 reference
+ATOL_FP32 = 2e-4
+ATOL_BF16 = 2e-2  # bf16 activation has lower precision
+
+
+def _requires_sm90():
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+    major, minor = torch.cuda.get_device_capability()
+    if major * 10 + minor < 90:
+        pytest.skip(f"fp32_router_gemm requires SM90+, got SM{major}{minor}")
+
+
+def _ref(mat_a: torch.Tensor, mat_b: torch.Tensor) -> torch.Tensor:
+    """Reference: F.linear in float32 on GPU."""
+    return torch.nn.functional.linear(mat_a.float(), mat_b.float())
+
+
+@pytest.mark.parametrize("num_tokens", [1, 2, 4, 8, 16, 32])
+def test_fp32_activation(num_tokens: int):
+    """fp32 activation → fp32 output should match reference closely."""
+    _requires_sm90()
+    torch.manual_seed(42)
+    device = torch.device("cuda")
+    mat_a = torch.randn(num_tokens, HIDDEN_DIM, dtype=torch.float32, device=device)
+    mat_b = torch.randn(NUM_EXPERTS, HIDDEN_DIM, dtype=torch.float32, device=device)
+
+    out = fp32_router_gemm(mat_a, mat_b)
+    ref = _ref(mat_a, mat_b)
+
+    assert out.shape == (num_tokens, NUM_EXPERTS)
+    assert out.dtype == torch.float32
+    torch.testing.assert_close(out, ref, atol=ATOL_FP32, rtol=0)
+
+
+@pytest.mark.parametrize("num_tokens", [1, 2, 4, 8, 16, 32])
+def test_bf16_activation(num_tokens: int):
+    """bf16 activation → fp32 output should match reference within bf16 error."""
+    _requires_sm90()
+    torch.manual_seed(42)
+    device = torch.device("cuda")
+    mat_a_bf16 = torch.randn(
+        num_tokens, HIDDEN_DIM, dtype=torch.bfloat16, device=device
+    )
+    mat_b = torch.randn(NUM_EXPERTS, HIDDEN_DIM, dtype=torch.float32, device=device)
+
+    out = fp32_router_gemm(mat_a_bf16, mat_b)
+    ref = _ref(mat_a_bf16, mat_b).to(device)
+
+    assert out.shape == (num_tokens, NUM_EXPERTS)
+    assert out.dtype == torch.float32
+    torch.testing.assert_close(out, ref, atol=ATOL_BF16, rtol=0)
+
+
+def test_output_shape_and_dtype():
+    """Basic shape and dtype checks."""
+    _requires_sm90()
+    device = torch.device("cuda")
+    mat_a = torch.randn(4, HIDDEN_DIM, dtype=torch.float32, device=device)
+    mat_b = torch.randn(NUM_EXPERTS, HIDDEN_DIM, dtype=torch.float32, device=device)
+    out = fp32_router_gemm(mat_a, mat_b)
+    assert out.shape == (4, NUM_EXPERTS)
+    assert out.dtype == torch.float32
+    assert out.device.type == "cuda"
diff --git a/tests/kernels/test_fused_deepseek_v4_qnorm_rope_kv_insert.py b/tests/kernels/test_fused_deepseek_v4_qnorm_rope_kv_insert.py
new file mode 100644
index 000000000000..13010540d973
--- /dev/null
+++ b/tests/kernels/test_fused_deepseek_v4_qnorm_rope_kv_insert.py
@@ -0,0 +1,362 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Standalone unit test for the horizontally-fused DeepseekV4-MLA kernel:
+
+  fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert
+    - Q side:  per-head RMSNorm (no weight) + GPT-J RoPE on last 64 dims
+    - KV side: GPT-J RoPE on last 64 + UE8M0 FP8 quant + paged cache insert
+
+We compare against:
+  - PyTorch reference for RMSNorm + GPT-J RoPE on Q
+  - Existing Triton `quantize_and_insert_k_cache` + round-trip via
+    `dequantize_and_gather_k_cache` for KV
+
+The kernel is imported via
+`torch.ops._C.fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert`.
+"""
+
+import pytest
+import torch
+
+from vllm.models.deepseek_v4.common.ops import (
+    dequantize_and_gather_k_cache,
+    quantize_and_insert_k_cache,
+)
+
+# ── Constants matching the kernel ────────────────────────────────────────────
+HEAD_DIM = 512
+ROPE_DIM = 64
+NOPE_DIM = HEAD_DIM - ROPE_DIM  # 448
+QUANT_BLOCK = 64
+FP8_MAX = 448.0
+HEAD_BYTES = NOPE_DIM + ROPE_DIM * 2 + 8  # 448 + 128 + 8 = 584
+
+
+# ── PyTorch reference implementations ────────────────────────────────────────
+
+
+def make_cos_sin_cache(max_pos: int, rope_dim: int, dtype, device):
+    """Build a cos||sin cache matching DeepseekV4ScalingRotaryEmbedding layout.
+    cos_sin_cache[pos, :rope_dim/2] = cos(theta), [rope_dim/2:] = sin(theta).
+    """
+    base = 10000.0
+    inv_freq = 1.0 / (
+        base
+        ** (torch.arange(0, rope_dim, 2, dtype=torch.float32, device=device) / rope_dim)
+    )
+    t = torch.arange(max_pos, dtype=torch.float32, device=device)
+    freqs = torch.einsum("i,j -> ij", t, inv_freq)  # [max_pos, rope_dim/2]
+    cache = torch.cat((freqs.cos(), freqs.sin()), dim=-1)  # [max_pos, rope_dim]
+    return cache.to(dtype)
+
+
+def apply_rope_gptj_last_k(
+    x: torch.Tensor, positions: torch.Tensor, cos_sin_cache: torch.Tensor
+) -> torch.Tensor:
+    """GPT-J-style (interleaved-pair) RoPE on the LAST rope_dim elements.
+
+    x: [..., head_dim] float32
+    positions: [num_tokens] int64 (positions[i] corresponds to x[i, ...])
+    cos_sin_cache: [max_pos, rope_dim] float (cos|sin layout)
+
+    Returns rotated x (same shape/dtype).
+    """
+    rope_dim = cos_sin_cache.shape[-1]
+    half = rope_dim // 2
+    head_dim = x.shape[-1]
+    nope_dim = head_dim - rope_dim
+
+    cs = cos_sin_cache[positions].to(torch.float32)
+    cos = cs[..., :half]
+    sin = cs[..., half:]
+
+    rope = x[..., nope_dim:].float()
+    shape = rope.shape
+    rope = rope.reshape(*shape[:-1], half, 2)
+    even = rope[..., 0]
+    odd = rope[..., 1]
+
+    for _ in range(rope.ndim - 3):
+        cos = cos.unsqueeze(1)
+        sin = sin.unsqueeze(1)
+
+    # Use addcmul (compiles to FMA on CUDA) for the 2x2 rotation. nvcc lowers
+    # the kernel's `e*c - o*s` to fma(e, c, -o*s); matching that here keeps
+    # near-cancellation pairs on the same bf16 grid as the kernel output and
+    # avoids spurious 1-ULP boundary flips at high num_tokens.
+    new_even = torch.addcmul(-odd * sin, even, cos)
+    new_odd = torch.addcmul(odd * cos, even, sin)
+    rope_rotated = torch.stack((new_even, new_odd), dim=-1).reshape(shape)
+
+    out = x.clone().float()
+    out[..., nope_dim:] = rope_rotated
+    return out.to(x.dtype)
+
+
+def rmsnorm_no_weight(x: torch.Tensor, eps: float) -> torch.Tensor:
+    """RMSNorm with no learnable weight, matching
+    `RMSNorm(head_dim, has_weight=False)`.
+
+    Returns fp32 so callers can chain RoPE without an intermediate bf16 round
+    (the kernel keeps the whole RMSNorm→RoPE pipeline in fp32 and rounds once
+    at the final store).
+    """
+    xf = x.float()
+    variance = xf.pow(2).mean(dim=-1, keepdim=True)
+    return xf * torch.rsqrt(variance + eps)
+
+
+# ── Dispatch to the CUDA op (skip test cleanly if it isn't built in) ─────────
+
+
+def _op_available() -> bool:
+    return hasattr(torch.ops._C, "fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert")
+
+
+pytestmark = pytest.mark.skipif(
+    not torch.cuda.is_available() or not _op_available(),
+    reason="CUDA not available or fused DeepseekV4 op not built in",
+)
+
+
+def _call_fused(q, kv, k_cache, slot_mapping, positions, cos_sin_cache, eps, bs):
+    torch.ops._C.fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert(
+        q, kv, k_cache, slot_mapping, positions, cos_sin_cache, eps, bs
+    )
+
+
+# ── Test 1: Q path numerical parity ──────────────────────────────────────────
+
+
+@pytest.mark.parametrize("num_tokens", [1, 4, 17, 64, 2048])
+@pytest.mark.parametrize("n_heads", [8, 64])
+def test_q_path_matches_reference(num_tokens: int, n_heads: int):
+    torch.manual_seed(0)
+    device = "cuda"
+    dtype = torch.bfloat16
+    eps = 1e-6
+    max_pos = 4096
+
+    q = torch.randn(num_tokens, n_heads, HEAD_DIM, dtype=dtype, device=device)
+    positions = torch.arange(num_tokens, dtype=torch.int64, device=device)
+    cos_sin_cache = make_cos_sin_cache(max_pos, ROPE_DIM, torch.float32, device)
+
+    # Reference: RMSNorm (no weight) per head, then GPT-J RoPE on last 64.
+    # Keep the chain in fp32 (rmsnorm_no_weight returns fp32) and round to
+    # bf16 once at the end, matching the kernel.
+    q_ref = rmsnorm_no_weight(q, eps)
+    q_ref = apply_rope_gptj_last_k(q_ref, positions, cos_sin_cache).to(dtype)
+
+    # Fused call with dummy KV tensors (KV branch will write slot_mapping=-1 → noop).
+    num_blocks = 2
+    bs = 16
+    kv = torch.zeros(num_tokens, HEAD_DIM, dtype=dtype, device=device)
+    k_cache = torch.zeros(
+        num_blocks, bs, HEAD_BYTES, dtype=torch.uint8, device=device
+    ).view(num_blocks, -1)
+    slot_mapping = torch.full((num_tokens,), -1, dtype=torch.int64, device=device)
+    q_fused = q.clone()
+    _call_fused(q_fused, kv, k_cache, slot_mapping, positions, cos_sin_cache, eps, bs)
+
+    torch.testing.assert_close(q_fused, q_ref, rtol=1e-2, atol=1e-2)
+
+
+# ── Test 2: KV path round-trip byte/value parity ─────────────────────────────
+
+
+def _ue8m0_per_block_scales(kv_roped_nope_f32: torch.Tensor, qblock: int):
+    """Return per-token per-block max scale (used to bound FP8 error)."""
+    n_tok, nope = kv_roped_nope_f32.shape
+    n_blocks = nope // qblock
+    blocks = kv_roped_nope_f32.view(n_tok, n_blocks, qblock)
+    absmax = blocks.abs().amax(dim=-1).clamp(min=1e-4)
+    raw = absmax / FP8_MAX
+    exponent = torch.ceil(torch.log2(raw))
+    return torch.pow(2.0, exponent)  # [n_tok, n_blocks]
+
+
+@pytest.mark.parametrize("num_tokens", [1, 4, 17, 64, 2048])
+@pytest.mark.parametrize("block_size", [16, 64])
+def test_kv_path_matches_reference(num_tokens: int, block_size: int):
+    torch.manual_seed(1)
+    device = "cuda"
+    dtype = torch.bfloat16
+    eps = 1e-6
+    max_pos = 4096
+
+    kv = torch.randn(num_tokens, HEAD_DIM, dtype=dtype, device=device)
+    positions = torch.arange(num_tokens, dtype=torch.int64, device=device)
+    cos_sin_cache = make_cos_sin_cache(max_pos, ROPE_DIM, torch.float32, device)
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 1
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    # ── Reference path: RoPE on kv, then existing Triton quant+insert ──────
+    kv_ref = apply_rope_gptj_last_k(kv, positions, cos_sin_cache)
+    k_cache_ref = torch.zeros(
+        num_blocks, block_size * HEAD_BYTES, dtype=torch.uint8, device=device
+    )
+    quantize_and_insert_k_cache(
+        kv_ref, k_cache_ref, slot_mapping, block_size=block_size
+    )
+
+    # ── Fused path (dummy q, single head) ──────────────────────────────────
+    k_cache_fused = torch.zeros_like(k_cache_ref)
+    q_dummy = torch.zeros(num_tokens, 1, HEAD_DIM, dtype=dtype, device=device)
+    _call_fused(
+        q_dummy,
+        kv,
+        k_cache_fused,
+        slot_mapping,
+        positions,
+        cos_sin_cache,
+        eps,
+        block_size,
+    )
+
+    # ── Round-trip compare via dequant+gather ──────────────────────────────
+    def _dequant(k_cache_2d):
+        num_reqs = 1
+        max_blocks = num_blocks
+        out = torch.zeros(
+            num_reqs, num_tokens, HEAD_DIM, dtype=torch.bfloat16, device=device
+        )
+        seq_lens = torch.tensor([num_tokens], dtype=torch.int32, device=device)
+        block_table = torch.arange(
+            max_blocks, dtype=torch.int32, device=device
+        ).unsqueeze(0)
+        # gather_lens arg is None (use seq_lens)
+        k_cache_3d = k_cache_2d.view(num_blocks, block_size, HEAD_BYTES)
+        dequantize_and_gather_k_cache(
+            out, k_cache_3d, seq_lens, None, block_table, block_size, offset=0
+        )
+        return out[0, :num_tokens]
+
+    recovered_ref = _dequant(k_cache_ref)
+    recovered_fused = _dequant(k_cache_fused)
+
+    # NoPE: per-block UE8M0 FP8 error bound (half-ULP at max = 16 * scale).
+    scales = _ue8m0_per_block_scales(kv_ref[:, :NOPE_DIM].float(), QUANT_BLOCK)
+    for t in range(num_tokens):
+        max_allowed = 16.0 * scales[t].max().item()
+        diff_ref = (
+            (recovered_ref[t, :NOPE_DIM] - kv_ref[t, :NOPE_DIM]).abs().max().item()
+        )
+        diff_fused = (
+            (recovered_fused[t, :NOPE_DIM] - kv_ref[t, :NOPE_DIM]).abs().max().item()
+        )
+        assert diff_ref <= max_allowed, (
+            f"ref NoPE token {t} diff {diff_ref} > {max_allowed}"
+        )
+        assert diff_fused <= max_allowed, (
+            f"fused NoPE token {t} diff {diff_fused} > {max_allowed}"
+        )
+
+    # RoPE region: bf16 stored exactly → zero diff.
+    rope_diff = (recovered_fused[:, NOPE_DIM:] - kv_ref[:, NOPE_DIM:]).abs().max()
+    assert rope_diff.item() == 0.0, f"RoPE portion not exact: {rope_diff.item()}"
+
+    # Exact byte equality of the two cache buffers — strong parity.
+    torch.testing.assert_close(k_cache_fused, k_cache_ref, rtol=0, atol=0)
+
+
+# ── Test 2b: DP padding (slot_mapping shorter than q/kv) ─────────────────────
+
+
+@pytest.mark.parametrize("num_tokens", [4, 17, 2048])
+@pytest.mark.parametrize("pad", [1, 5])
+@pytest.mark.parametrize("block_size", [16, 64])
+def test_kv_path_with_dp_padding(num_tokens: int, pad: int, block_size: int):
+    """slot_mapping.size(0) < q.size(0): the kernel must skip padded
+    tokens in the KV branch while still running Q-norm+RoPE on all rows."""
+    torch.manual_seed(3)
+    device = "cuda"
+    dtype = torch.bfloat16
+    eps = 1e-6
+    max_pos = 4096
+    total = num_tokens + pad
+
+    kv = torch.randn(total, HEAD_DIM, dtype=dtype, device=device)
+    positions = torch.arange(total, dtype=torch.int64, device=device)
+    cos_sin_cache = make_cos_sin_cache(max_pos, ROPE_DIM, torch.float32, device)
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 1
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    # Reference: only the first num_tokens kv rows get inserted.
+    kv_ref = apply_rope_gptj_last_k(
+        kv[:num_tokens], positions[:num_tokens], cos_sin_cache
+    )
+    k_cache_ref = torch.zeros(
+        num_blocks, block_size * HEAD_BYTES, dtype=torch.uint8, device=device
+    )
+    quantize_and_insert_k_cache(
+        kv_ref, k_cache_ref, slot_mapping, block_size=block_size
+    )
+
+    # Fused: pass full-sized q/kv/positions, shorter slot_mapping.
+    q_dummy = torch.zeros(total, 1, HEAD_DIM, dtype=dtype, device=device)
+    k_cache_fused = torch.zeros_like(k_cache_ref)
+    _call_fused(
+        q_dummy,
+        kv,
+        k_cache_fused,
+        slot_mapping,
+        positions,
+        cos_sin_cache,
+        eps,
+        block_size,
+    )
+
+    torch.testing.assert_close(k_cache_fused, k_cache_ref, rtol=0, atol=0)
+
+
+# ── Test 3: combined single-call Q + KV parity ───────────────────────────────
+
+
+@pytest.mark.parametrize("num_tokens", [1, 4, 17, 2048])
+@pytest.mark.parametrize("n_heads", [8, 64])
+@pytest.mark.parametrize("block_size", [16, 64])
+def test_combined_q_and_kv(num_tokens: int, n_heads: int, block_size: int):
+    torch.manual_seed(2)
+    device = "cuda"
+    dtype = torch.bfloat16
+    eps = 1e-6
+    max_pos = 4096
+
+    q = torch.randn(num_tokens, n_heads, HEAD_DIM, dtype=dtype, device=device)
+    kv = torch.randn(num_tokens, HEAD_DIM, dtype=dtype, device=device)
+    positions = torch.arange(num_tokens, dtype=torch.int64, device=device)
+    cos_sin_cache = make_cos_sin_cache(max_pos, ROPE_DIM, torch.float32, device)
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 1
+    slot_mapping = torch.arange(num_tokens, dtype=torch.int64, device=device)
+
+    # Reference.
+    q_ref = rmsnorm_no_weight(q, eps)
+    q_ref = apply_rope_gptj_last_k(q_ref, positions, cos_sin_cache).to(dtype)
+    kv_ref = apply_rope_gptj_last_k(kv, positions, cos_sin_cache)
+    k_cache_ref = torch.zeros(
+        num_blocks, block_size * HEAD_BYTES, dtype=torch.uint8, device=device
+    )
+    quantize_and_insert_k_cache(
+        kv_ref, k_cache_ref, slot_mapping, block_size=block_size
+    )
+
+    # Fused single call.
+    q_fused = q.clone()
+    k_cache_fused = torch.zeros_like(k_cache_ref)
+    _call_fused(
+        q_fused,
+        kv,
+        k_cache_fused,
+        slot_mapping,
+        positions,
+        cos_sin_cache,
+        eps,
+        block_size,
+    )
+
+    torch.testing.assert_close(q_fused, q_ref, rtol=1e-2, atol=1e-2)
+    torch.testing.assert_close(k_cache_fused, k_cache_ref, rtol=0, atol=0)
diff --git a/tests/kernels/test_fused_gdn_post_conv.py b/tests/kernels/test_fused_gdn_post_conv.py
new file mode 100644
index 000000000000..ffc8ce281f90
--- /dev/null
+++ b/tests/kernels/test_fused_gdn_post_conv.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for fused_gdn_prefill_post_conv kernel.
+
+Verifies that the fused kernel matches the reference:
+  split → rearrange → contiguous → l2norm → gating
+"""
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from vllm.model_executor.layers.fla.ops.fused_gdn_prefill_post_conv import (
+    fused_post_conv_prep,
+)
+
+
+def reference_post_conv(
+    conv_output: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    A_log: torch.Tensor,
+    dt_bias: torch.Tensor,
+    H: int,
+    K: int,
+    V: int,
+    apply_l2norm: bool = True,
+    output_g_exp: bool = False,
+):
+    """Reference implementation using individual ops."""
+    L = conv_output.shape[0]
+    HV = A_log.shape[0]
+
+    # Split
+    q_flat, k_flat, v_flat = torch.split(conv_output, [H * K, H * K, HV * V], dim=-1)
+
+    # Rearrange + contiguous
+    q = q_flat.view(L, H, K).contiguous()
+    k = k_flat.view(L, H, K).contiguous()
+    v = v_flat.view(L, HV, V).contiguous()
+
+    # L2 norm
+    if apply_l2norm:
+        q = F.normalize(q.float(), p=2, dim=-1, eps=1e-6).to(conv_output.dtype)
+        k = F.normalize(k.float(), p=2, dim=-1, eps=1e-6).to(conv_output.dtype)
+
+    # Gating
+    x = a.float() + dt_bias.float()
+    sp = F.softplus(x, beta=1.0, threshold=20.0)
+    g = -torch.exp(A_log.float()) * sp
+
+    if output_g_exp:
+        g = torch.exp(g)
+
+    beta_out = torch.sigmoid(b.float())
+
+    return q, k, v, g, beta_out
+
+
+# Qwen3.5-35B config: H=16, HV=32, K=128, V=128
+# Qwen3.5-397B config: H=16, HV=64, K=128, V=128
+@pytest.mark.parametrize(
+    "H, HV, K, V",
+    [
+        (16, 32, 128, 128),  # 35B
+        (16, 64, 128, 128),  # 397B
+        (4, 8, 64, 64),  # small
+    ],
+)
+@pytest.mark.parametrize("L", [1, 16, 128, 512, 2048])
+@pytest.mark.parametrize("apply_l2norm", [True, False])
+@pytest.mark.parametrize("output_g_exp", [True, False])
+@pytest.mark.parametrize("dtype", [torch.bfloat16])
+def test_fused_post_conv_correctness(H, HV, K, V, L, apply_l2norm, output_g_exp, dtype):
+    """Test fused kernel matches reference for all configs."""
+    torch.manual_seed(42)
+    device = "cuda"
+    qkv_dim = 2 * H * K + HV * V
+
+    conv_output = torch.randn(L, qkv_dim, dtype=dtype, device=device)
+    a = torch.randn(L, HV, dtype=dtype, device=device)
+    b = torch.randn(L, HV, dtype=dtype, device=device)
+    A_log = torch.randn(HV, dtype=torch.float32, device=device) - 2.0
+    dt_bias = torch.randn(HV, dtype=torch.float32, device=device) * 0.1
+
+    # Reference
+    ref_q, ref_k, ref_v, ref_g, ref_beta = reference_post_conv(
+        conv_output,
+        a,
+        b,
+        A_log,
+        dt_bias,
+        H,
+        K,
+        V,
+        apply_l2norm,
+        output_g_exp,
+    )
+
+    # Fused kernel
+    fused_q, fused_k, fused_v, fused_g, fused_beta = fused_post_conv_prep(
+        conv_output,
+        a,
+        b,
+        A_log,
+        dt_bias,
+        num_k_heads=H,
+        head_k_dim=K,
+        head_v_dim=V,
+        apply_l2norm=apply_l2norm,
+        output_g_exp=output_g_exp,
+    )
+
+    # Check shapes
+    assert fused_q.shape == (L, H, K), f"q shape: {fused_q.shape}"
+    assert fused_k.shape == (L, H, K), f"k shape: {fused_k.shape}"
+    assert fused_v.shape == (L, HV, V), f"v shape: {fused_v.shape}"
+    assert fused_g.shape == (L, HV), f"g shape: {fused_g.shape}"
+    assert fused_beta.shape == (L, HV), f"beta shape: {fused_beta.shape}"
+
+    # Check dtypes
+    assert fused_q.dtype == dtype
+    assert fused_k.dtype == dtype
+    assert fused_v.dtype == dtype
+    assert fused_g.dtype == torch.float32
+    assert fused_beta.dtype == torch.float32
+
+    # Check contiguity
+    assert fused_q.is_contiguous()
+    assert fused_k.is_contiguous()
+    assert fused_v.is_contiguous()
+
+    # Check values
+    atol_qkv = 1e-2 if apply_l2norm else 1e-3
+    rtol_qkv = 1e-2 if apply_l2norm else 1e-3
+
+    torch.testing.assert_close(fused_q, ref_q, atol=atol_qkv, rtol=rtol_qkv)
+    torch.testing.assert_close(fused_k, ref_k, atol=atol_qkv, rtol=rtol_qkv)
+    torch.testing.assert_close(fused_v, ref_v, atol=1e-3, rtol=1e-3)
+    torch.testing.assert_close(fused_g, ref_g, atol=1e-4, rtol=1e-4)
+    torch.testing.assert_close(fused_beta, ref_beta, atol=1e-4, rtol=1e-4)
+
+
+@pytest.mark.parametrize("L", [1, 64, 256])
+def test_fused_post_conv_sanity(L):
+    """Sanity checks: no NaN, unit-norm q/k, beta in (0,1)."""
+    torch.manual_seed(0)
+    device = "cuda"
+    H, HV, K, V = 16, 32, 128, 128
+    qkv_dim = 2 * H * K + HV * V
+
+    conv_output = torch.randn(L, qkv_dim, dtype=torch.bfloat16, device=device)
+    a = torch.randn(L, HV, dtype=torch.bfloat16, device=device)
+    b = torch.randn(L, HV, dtype=torch.bfloat16, device=device)
+    A_log = torch.randn(HV, dtype=torch.float32, device=device) - 2.0
+    dt_bias = torch.randn(HV, dtype=torch.float32, device=device)
+
+    q, k, v, g, beta = fused_post_conv_prep(
+        conv_output,
+        a,
+        b,
+        A_log,
+        dt_bias,
+        num_k_heads=H,
+        head_k_dim=K,
+        head_v_dim=V,
+    )
+
+    # Basic sanity
+    assert not torch.isnan(q).any(), "NaN in q"
+    assert not torch.isnan(k).any(), "NaN in k"
+    assert not torch.isnan(v).any(), "NaN in v"
+    assert not torch.isnan(g).any(), "NaN in g"
+    assert not torch.isnan(beta).any(), "NaN in beta"
+
+    # L2 norm check: each head vector should have unit norm
+    q_norms = torch.norm(q.float(), dim=-1)
+    k_norms = torch.norm(k.float(), dim=-1)
+    torch.testing.assert_close(q_norms, torch.ones_like(q_norms), atol=1e-3, rtol=1e-3)
+    torch.testing.assert_close(k_norms, torch.ones_like(k_norms), atol=1e-3, rtol=1e-3)
+
+    # Beta should be in (0, 1)
+    assert (beta >= 0).all() and (beta <= 1).all(), "beta out of range"
+
+
+def test_fused_post_conv_l0():
+    """Test L=0 edge case."""
+    device = "cuda"
+    H, HV, K, V = 16, 32, 128, 128
+    qkv_dim = 2 * H * K + HV * V
+
+    conv_output = torch.empty(0, qkv_dim, dtype=torch.bfloat16, device=device)
+    a = torch.empty(0, HV, dtype=torch.bfloat16, device=device)
+    b = torch.empty(0, HV, dtype=torch.bfloat16, device=device)
+    A_log = torch.randn(HV, dtype=torch.float32, device=device)
+    dt_bias = torch.randn(HV, dtype=torch.float32, device=device)
+
+    q, k, v, g, beta = fused_post_conv_prep(
+        conv_output,
+        a,
+        b,
+        A_log,
+        dt_bias,
+        num_k_heads=H,
+        head_k_dim=K,
+        head_v_dim=V,
+    )
+    assert q.shape == (0, H, K)
+    assert g.shape == (0, HV)
diff --git a/tests/kernels/test_fused_indexer_q_rope_quant.py b/tests/kernels/test_fused_indexer_q_rope_quant.py
new file mode 100644
index 000000000000..6114b7efd6e7
--- /dev/null
+++ b/tests/kernels/test_fused_indexer_q_rope_quant.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit test for fused_indexer_q_rope_quant.
+
+Compares the fused Triton kernel against the unfused reference flow used by
+the DeepseekV4 indexer in model_tracking:
+    q_rot = ops.rotary_embedding(positions, q, None, head_dim, cos_sin_cache,
+                                 is_neox_style=False,
+                                 rope_dim_offset=head_dim - rope_dim)
+    q_fp8, q_scale = per_token_group_quant_fp8(q_rot, head_dim, use_ue8m0=True)
+    weights_out = weights * q_scale * softmax_scale * head_scale
+
+Expects bit-exact equality on both q_fp8 and weights_out.
+"""
+
+import contextlib
+from unittest import mock
+
+import pytest
+import torch
+
+from vllm import _custom_ops as ops
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    per_token_group_quant_fp8,
+)
+from vllm.models.deepseek_v4.common.ops import fused_indexer_q_rope_quant
+from vllm.utils.import_utils import has_cutedsl
+
+HEAD_DIM = 128
+ROPE_DIM = 64
+N_HEAD = 64
+MAX_POS = 4096
+
+
+def quantize_to_mxfp4(
+    x: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Reference MXFP4 quantization.
+
+    Args:
+        x: [..., head_dim] where head_dim is divisible by 32
+    Returns:
+        packed: [..., head_dim//2]  uint8   2 E2M1 nibbles/byte, low nibble = even index
+        scales: [..., head_dim//32] uint8   1 ue8m0 byte
+    """
+    MXFP4_BLOCK_SIZE = 32
+    orig_shape = x.shape
+    head_dim = orig_shape[-1]
+    n_blocks = head_dim // MXFP4_BLOCK_SIZE
+
+    x_f32 = x.float().reshape(-1, n_blocks, MXFP4_BLOCK_SIZE)
+
+    # Per-block ue8m0 scale: 2^ceil(log2(amax / 6.0)), stored as byte = exp + 127
+    # 6 * 2^-126 is from https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/blob/main/inference/kernel.py#L163
+    amax = x_f32.abs().amax(dim=-1, keepdim=True).clamp(min=6 * (2**-126))
+    log2_ratio = (amax * (1.0 / 6.0)).log2().ceil().clamp(-127.0, 127.0)
+    scale = log2_ratio.exp2()
+    ue8m0 = (log2_ratio + 127.0).to(torch.uint8)  # [*, n_blocks]
+
+    # E2M1 round-to-nearest-even: midpoints round to the even code.
+    # E2M1 values: [0.00, 0.50, 1.00, 1.50, 2.00, 3.00, 4.00, 6.00]
+    # boundaries:  [   0.25, 0.75, 1.25, 1.75, 2.50, 3.50, 5.00]
+    x_scaled = (x_f32 / scale).clamp(-6.0, 6.0)
+    abs_x = x_scaled.abs()
+    code = torch.zeros_like(abs_x, dtype=torch.int32)
+    code = torch.where(abs_x > 0.25, 1, code)
+    code = torch.where(abs_x >= 0.75, 2, code)
+    code = torch.where(abs_x > 1.25, 3, code)
+    code = torch.where(abs_x >= 1.75, 4, code)
+    code = torch.where(abs_x > 2.5, 5, code)
+    code = torch.where(abs_x >= 3.5, 6, code)
+    code = torch.where(abs_x > 5.0, 7, code)
+    sign = ((x_scaled.view(torch.int32) >> 31) & 1).to(torch.uint8)
+    nibble = code.to(torch.uint8) | (sign << 3)
+
+    # Pack: even-index element → low nibble, odd-index → high nibble
+    nibble_flat = nibble.reshape(-1, head_dim)
+    packed = (nibble_flat[:, 0::2] | (nibble_flat[:, 1::2] << 4)).contiguous()
+    packed = packed.reshape(*orig_shape[:-1], head_dim // 2)
+
+    scales = ue8m0.view(*orig_shape[:-1], n_blocks)
+    return packed, scales
+
+
+def _reference(
+    positions: torch.Tensor,
+    q: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    weights: torch.Tensor,
+    softmax_scale: float,
+    head_scale: float,
+    use_fp4: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    q_rot = q.clone()
+    ops.rotary_embedding(
+        positions,
+        q_rot,
+        None,
+        HEAD_DIM,
+        cos_sin_cache,
+        False,  # is_neox_style=False → GPT-J interleaved
+        HEAD_DIM - ROPE_DIM,  # rope_dim_offset → rotate the tail
+        False,
+    )
+
+    if use_fp4:
+        q_packed, ue8m0 = quantize_to_mxfp4(q_rot.view(-1, N_HEAD, HEAD_DIM))
+        # Pack 4 ue8m0 bytes into 1 int32
+        q_scale = ue8m0.view(torch.int32).squeeze(-1)
+        # FP4 path: q_scale stays separate (cannot be folded into a per-token scalar)
+        weights_out = weights.to(torch.float32) * softmax_scale * head_scale
+        return (q_packed, q_scale), weights_out
+
+    else:
+        q_fp8, q_scale = per_token_group_quant_fp8(
+            q_rot.view(-1, HEAD_DIM).contiguous(),
+            HEAD_DIM,
+            use_ue8m0=True,
+        )
+        q_fp8 = q_fp8.view(-1, N_HEAD, HEAD_DIM)
+        q_scale = q_scale.view(-1, N_HEAD)
+
+        weights_out = weights.to(torch.float32) * q_scale * softmax_scale * head_scale
+        return q_fp8, weights_out
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32, 257, 1023])
+@pytest.mark.parametrize("cache_dtype", [torch.float32, torch.bfloat16])
+@pytest.mark.parametrize("use_fp4", [False, True])
+@pytest.mark.parametrize("use_cutedsl", [False, True])
+@torch.inference_mode()
+def test_fused_indexer_q_rope_quant_matches_unfused(
+    num_tokens, cache_dtype, use_fp4, use_cutedsl
+):
+    if use_cutedsl and not has_cutedsl():
+        pytest.skip("cutedsl (cutlass) not installed")
+
+    device = "cuda"
+    torch.manual_seed(0)
+
+    q = torch.randn(num_tokens, N_HEAD, HEAD_DIM, dtype=torch.bfloat16, device=device)
+    positions = torch.randint(
+        0, MAX_POS, (num_tokens,), dtype=torch.int64, device=device
+    )
+    cos_sin_cache = torch.randn(MAX_POS, ROPE_DIM, dtype=cache_dtype, device=device)
+    weights = torch.randn(num_tokens, N_HEAD, dtype=torch.bfloat16, device=device)
+    softmax_scale = HEAD_DIM**-0.5
+    head_scale = N_HEAD**-0.5
+
+    q_quant_ref, weights_ref = _reference(
+        positions, q, cos_sin_cache, weights, softmax_scale, head_scale, use_fp4
+    )
+    # use_cutedsl=False: force the triton path even when cutedsl is installed
+    # by patching the dispatcher's has_cutedsl() binding to return False.
+    cutedsl_patch = (
+        mock.patch(
+            "vllm.models.deepseek_v4.common.ops.fused_indexer_q.has_cutedsl",
+            return_value=False,
+        )
+        if not use_cutedsl
+        else contextlib.nullcontext()
+    )
+    with cutedsl_patch:
+        q_quant_fused, weights_fused = fused_indexer_q_rope_quant(
+            positions,
+            q.clone(),
+            cos_sin_cache,
+            weights,
+            softmax_scale,
+            head_scale,
+            use_fp4,
+        )
+
+    if use_fp4:
+        q_quant_ref, q_scale_ref = q_quant_ref
+        q_quant_fused, q_scale_fused = q_quant_fused
+
+        assert torch.equal(q_scale_ref, q_scale_fused), (
+            f"q_scale mismatch: "
+            f"{(q_scale_ref != q_scale_fused).sum().item()} "
+            f"/ {q_scale_ref.numel()} bytes differ"
+        )
+
+    # fp8 tensors aren't directly comparable via torch.equal — reinterpret as int8.
+    ref_bits = q_quant_ref.view(torch.int8)
+    fused_bits = q_quant_fused.view(torch.int8)
+    assert torch.equal(ref_bits, fused_bits), (
+        f"q_quant_fused mismatch: "
+        f"{(ref_bits != fused_bits).sum().item()} / {ref_bits.numel()} bytes differ"
+    )
+
+    assert weights_fused.dtype == torch.float32
+    assert torch.equal(weights_ref, weights_fused), (
+        f"weights mismatch: max abs diff "
+        f"{(weights_ref - weights_fused).abs().max().item()}"
+    )
diff --git a/tests/kernels/test_fused_inv_rope_fp8_quant.py b/tests/kernels/test_fused_inv_rope_fp8_quant.py
new file mode 100644
index 000000000000..563661e441c1
--- /dev/null
+++ b/tests/kernels/test_fused_inv_rope_fp8_quant.py
@@ -0,0 +1,909 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for the fused inverse RoPE + block-scaled FP8 quantization kernel.
+
+Tests compare the fused kernel against a reference implementation built from
+the existing separate operations (inverse RoPE via rotate_neox + FP8 quant
+via per_token_group_quant_fp8).
+
+The reference faithfully reproduces the exact flow in
+deepseek_v4/nvidia/ops/attention.py:295-310:
+  1. Apply inverse RoPE (NeoX style, last rope_dim=64 dims of each head)
+  2. Reshape [T, H, head_dim] -> [T, G, D]
+  3. Transpose+flatten to [G*T, D], quantize, reshape back
+  4. Return o_fp8 and o_scale with strides (D, T*D, 1) and (S, T*S, 1)
+     (non-contiguous [T, G, ...] view backed by contiguous [G, T, ...] memory)
+
+Usage:
+    pytest tests/kernels/test_fused_inv_rope_fp8_quant.py -v
+"""
+
+import pytest
+import torch
+
+from vllm.models.deepseek_v4.common.ops import fused_inv_rope_fp8_quant
+
+# -- Default dimensions matching DeepSeek V3/V4 --------------------------
+HEAD_DIM = 512
+NOPE_DIM = 448
+ROPE_DIM = 64
+QUANT_GROUP_SIZE = 128
+FP8_MAX = 448.0  # torch.finfo(torch.float8_e4m3fn).max
+FP8_DTYPE = torch.float8_e4m3fn
+EPS = 1e-10
+
+
+# =========================================================================
+# Helpers
+# =========================================================================
+
+
+def assert_dequant_close(
+    fp8_a: torch.Tensor,
+    scale_a: torch.Tensor,
+    fp8_b: torch.Tensor,
+    scale_b: torch.Tensor,
+    msg: str = "",
+):
+    """Compare two FP8-quantized tensors via their dequantized values.
+
+    Uses cosine-similarity-based diff (same as deep_gemm calc_diff).
+    Both fused and reference paths rotate in fp32 using an fp32
+    cos_sin_cache, so differences are only fp32 ordering ULPs that can
+    occasionally shift FP8 values at quantization boundaries.
+    """
+    S = scale_a.shape[-1]
+    shape = fp8_a.shape
+
+    dq_a = fp8_a.float() * scale_a.unsqueeze(-1).expand(
+        *shape[:-1], S, QUANT_GROUP_SIZE
+    ).reshape(shape)
+    dq_b = fp8_b.float() * scale_b.unsqueeze(-1).expand(
+        *shape[:-1], S, QUANT_GROUP_SIZE
+    ).reshape(shape)
+
+    # Cosine diff: 1 - cos_sim (0 = identical, higher = worse)
+    dq_a_flat = dq_a.flatten().float()
+    dq_b_flat = dq_b.flatten().float()
+    cos_sim = torch.nn.functional.cosine_similarity(
+        dq_a_flat.unsqueeze(0), dq_b_flat.unsqueeze(0)
+    ).item()
+    diff = 1.0 - cos_sim
+
+    assert diff < 1e-4, f"Dequant diff too large: {diff:.8f} (expected < 1e-4). {msg}"
+
+
+def rotate_gptj(x: torch.Tensor) -> torch.Tensor:
+    """GPT-J style rotation: interleaved pairs, negate-swap.
+
+    Matches vllm/model_executor/layers/rotary_embedding/common.py:23-27.
+    DeepseekV4 uses is_neox_style=False, so this is the correct rotation.
+    """
+    x1 = x[..., ::2]
+    x2 = x[..., 1::2]
+    x = torch.stack((-x2, x1), dim=-1)
+    return x.flatten(-2)
+
+
+def make_cos_sin_cache(
+    max_pos: int,
+    rope_dim: int = ROPE_DIM,
+    dtype: torch.dtype = torch.float32,
+    device: str = "cuda",
+) -> torch.Tensor:
+    """Create a synthetic cos_sin_cache matching the layout used by
+    DeepseekV4ScalingRotaryEmbedding._compute_cos_sin_cache.
+
+    Shape: [max_pos, rope_dim] where first half is cos, second half is sin.
+    The fused kernel requires fp32; callers can override dtype if passing
+    the cache into the bf16-only paths.
+    """
+    half = rope_dim // 2
+    # Use random but bounded frequencies so cos/sin are well-behaved
+    inv_freq = 1.0 / (
+        10000.0 ** (torch.arange(0, half, device=device, dtype=torch.float32) / half)
+    )
+    t = torch.arange(max_pos, device=device, dtype=torch.float32)
+    freqs = torch.outer(t, inv_freq)  # [max_pos, half]
+    cos = freqs.cos()
+    sin = freqs.sin()
+    cache = torch.cat((cos, sin), dim=-1)  # [max_pos, rope_dim]
+    return cache.to(dtype)
+
+
+def reference_inv_rope(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    nope_dim: int = NOPE_DIM,
+    rope_dim: int = ROPE_DIM,
+) -> torch.Tensor:
+    """Apply inverse RoPE to the last rope_dim dimensions of each head.
+
+    Matches the GPT-J inverse rotation in pos_encoding_kernels.cu, which
+    promotes the cache to fp32 and performs the rotation in fp32. The
+    result is cast back to the input dtype.
+
+    Args:
+        o: [T, H, head_dim] bf16
+        positions: [T] int64
+        cos_sin_cache: [max_pos, rope_dim] fp32
+
+    Returns:
+        o with inverse RoPE applied on the rope portion (bf16).
+    """
+    assert cos_sin_cache.dtype == torch.float32
+    cos_sin = cos_sin_cache[positions]  # [T, rope_dim] fp32
+    half = rope_dim // 2
+    cos = cos_sin[:, :half]
+    sin = cos_sin[:, half:]
+
+    # GPT-J style: repeat_interleave (not repeat) to match interleaved pairs
+    cos = cos.repeat_interleave(2, dim=-1).unsqueeze(1)
+    sin = sin.repeat_interleave(2, dim=-1).unsqueeze(1)
+    sin = -sin  # inverse
+
+    o_pass = o[..., :nope_dim]
+    o_rot_f32 = o[..., nope_dim:].float()
+    o_rot_f32 = o_rot_f32 * cos + rotate_gptj(o_rot_f32) * sin
+    o_rot = o_rot_f32.to(o.dtype)
+
+    return torch.cat([o_pass, o_rot], dim=-1)
+
+
+def _ref_ue8m0_quant_block(x_f32: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """Per-block UE8M0 FP8 quantization in pure float32.
+
+    Matches the Triton kernel logic exactly:
+      absmax -> 2^ceil(log2(absmax / fp8_max)) -> clamp(x / scale) -> fp8
+
+    Args:
+        x_f32: [..., quant_group_size] float32 — one or more 128-element blocks.
+
+    Returns:
+        x_fp8: same shape, float8_e4m3fn
+        scales: [...] float32, one scale per block
+    """
+    assert x_f32.shape[-1] == QUANT_GROUP_SIZE
+    absmax = x_f32.abs().amax(dim=-1, keepdim=True).clamp(min=EPS)
+    scale_raw = absmax * (1.0 / FP8_MAX)
+    scale = torch.exp2(torch.ceil(torch.log2(scale_raw)))
+    x_scaled = (x_f32 / scale).clamp(-FP8_MAX, FP8_MAX)
+    x_fp8 = x_scaled.to(FP8_DTYPE)
+    return x_fp8, scale.squeeze(-1)
+
+
+def reference_inv_rope_fp8_quant(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    n_groups: int,
+    heads_per_group: int,
+    nope_dim: int = NOPE_DIM,
+    rope_dim: int = ROPE_DIM,
+    quant_group_size: int = QUANT_GROUP_SIZE,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Full reference: inverse RoPE in fp32 + UE8M0 FP8 quant in fp32.
+
+    Mimics the Triton kernel's precision path exactly:
+      Load bf16 -> cast to fp32 -> apply inverse RoPE with fp32 cos/sin ->
+      UE8M0 quant in fp32 -> write fp8 + scale
+
+    Returns:
+        o_fp8: [T, G, D] FP8 with strides (D, T*D, 1)
+        o_scale: [T, G, S] FP32 with strides (S, T*S, 1)
+    """
+    assert cos_sin_cache.dtype == torch.float32
+    T, _H, head_dim = o.shape
+    d = heads_per_group * head_dim
+    S = d // quant_group_size
+    half_rope = rope_dim // 2
+    chunks_per_head = head_dim // quant_group_size
+
+    # Reshape [T, H, head_dim] -> [T, G, heads_per_group, head_dim]
+    o_4d = o.view(T, n_groups, heads_per_group, head_dim)
+
+    # Lookup cos/sin directly in fp32
+    cos_sin = cos_sin_cache[positions]  # [T, rope_dim] fp32
+    cos = cos_sin[:, :half_rope]  # [T, half_rope] fp32
+    sin = cos_sin[:, half_rope:]  # [T, half_rope] fp32
+
+    # Allocate outputs in [G, T, ...] contiguous layout
+    fp8_buf = torch.empty(n_groups, T, d, dtype=FP8_DTYPE, device=o.device)
+    scale_buf = torch.empty(n_groups, T, S, dtype=torch.float32, device=o.device)
+
+    # Process each quant block, matching the Triton kernel's per-program logic
+    for g in range(n_groups):
+        for qb in range(S):
+            head_in_group = qb // chunks_per_head
+            chunk_in_head = qb % chunks_per_head
+            offset = chunk_in_head * quant_group_size
+
+            # Load 128 bf16 elements and promote to fp32 for rotation+quant
+            block = o_4d[:, g, head_in_group, offset : offset + quant_group_size]
+            x = block.float()
+
+            # Apply inverse RoPE in fp32 if this is the last chunk
+            # GPT-J style: interleaved pairs (even=x, odd=y)
+            if chunk_in_head == chunks_per_head - 1:
+                rope_start = nope_dim % quant_group_size  # 64
+                rope_region = x[:, rope_start:].clone()
+                x_vals = rope_region[:, ::2]
+                y_vals = rope_region[:, 1::2]
+                x_new = x_vals * cos + y_vals * sin
+                y_new = y_vals * cos - x_vals * sin
+                x = x.clone()
+                x[:, rope_start::2] = x_new
+                x[:, rope_start + 1 :: 2] = y_new
+
+            # UE8M0 quant in fp32
+            x_fp8, scale = _ref_ue8m0_quant_block(x)
+
+            # Write to [G, T, D] contiguous memory
+            fp8_buf[g, :, qb * quant_group_size : (qb + 1) * quant_group_size] = x_fp8
+            scale_buf[g, :, qb] = scale
+
+    # Return transposed views
+    return fp8_buf.transpose(0, 1), scale_buf.transpose(0, 1)
+
+
+# =========================================================================
+# Tests
+# =========================================================================
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32, 128])
+@pytest.mark.parametrize(
+    "num_heads,n_groups",
+    [(64, 8), (32, 4), (128, 8)],
+    ids=["H64_G8", "H32_G4", "H128_G8"],
+)
+@pytest.mark.parametrize("seed", [0, 42])
+@torch.inference_mode()
+def test_correctness(num_tokens, num_heads, n_groups, seed):
+    """Compare fused kernel against reference for FP8 values and scales."""
+    torch.manual_seed(seed)
+
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    # Create inputs
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(
+        max_pos, ROPE_DIM, dtype=torch.float32, device=device
+    )
+
+    # Reference
+    ref_fp8, ref_scale = reference_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Fused kernel
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Check shapes
+    d = heads_per_group * HEAD_DIM
+    S = d // QUANT_GROUP_SIZE
+    assert ref_fp8.shape == (num_tokens, n_groups, d)
+    assert fused_fp8.shape == (num_tokens, n_groups, d)
+    assert ref_scale.shape == (num_tokens, n_groups, S)
+    assert fused_scale.shape == (num_tokens, n_groups, S)
+
+    # Scales: exact match (both use identical UE8M0 algorithm)
+    # Scales may differ by one UE8M0 step (factor of 2) if fp32 rotation
+    # ordering shifts absmax across a power-of-2 boundary. Check ratio is
+    # close to 1.
+    scale_ratio = fused_scale / ref_scale.clamp(min=1e-30)
+    assert scale_ratio.max() <= 2.0 and scale_ratio.min() >= 0.5, (
+        f"Scale ratio out of [0.5, 2]: min={scale_ratio.min():.4f} "
+        f"max={scale_ratio.max():.4f}"
+    )
+
+    # Compare via dequant (Triton vs PyTorch fp32 may differ by ULPs)
+    assert_dequant_close(ref_fp8, ref_scale, fused_fp8, fused_scale)
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32, 128])
+@pytest.mark.parametrize(
+    "num_heads,n_groups",
+    [(64, 8), (128, 8)],
+    ids=["H64_G8", "H128_G8"],
+)
+@torch.inference_mode()
+def test_output_strides(num_tokens, num_heads, n_groups):
+    """Verify fused output layout:
+    - FP8: logical [T, G, D] backed by contiguous [G, T, D].
+    - Scale: MN-major TMA-aligned (column-major: T-stride=1).
+    """
+
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # FP8: logical [T, G, D] backed by [G, T, D] row-major
+    d = heads_per_group * HEAD_DIM
+    expected_fp8_stride = (d, num_tokens * d, 1)
+    assert fused_fp8.stride() == expected_fp8_stride, (
+        f"FP8 stride mismatch: got {fused_fp8.stride()}, expected {expected_fp8_stride}"
+    )
+
+    # Scale: MN-major TMA-aligned layout. After fp8_einsum permutes
+    # [T,G,S] -> [G,T,S], T-dim should have stride 1.
+    # Our output is [T,G,S] = transpose of [G,T,S].
+    # So fused_scale.permute(1,0,2) should have T-stride=1.
+    perm = fused_scale.permute(1, 0, 2)  # [G, T, S]
+    assert perm.stride(1) == 1 or num_tokens == 1, (
+        f"Scale T-stride (after permute to [G,T,S]) should be 1, got {perm.stride(1)}"
+    )
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32, 128])
+@torch.inference_mode()
+def test_per_group_contiguity(num_tokens):
+    """FP8 per-group slices must be contiguous. Scale per-group slices
+    are column-major (T-stride=1) — not row-major contiguous, which is
+    correct for TMA loads."""
+    num_heads, n_groups = 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    for g in range(n_groups):
+        fp8_slice = fused_fp8[:, g, :]
+        assert fp8_slice.is_contiguous(), (
+            f"o_fp8[:, {g}, :] is not contiguous: "
+            f"shape={list(fp8_slice.shape)}, stride={list(fp8_slice.stride())}"
+        )
+
+
+@torch.inference_mode()
+def test_scales_are_power_of_two():
+    """Verify all scales are exact powers of 2 (UE8M0 property)."""
+    num_tokens, num_heads, n_groups = 32, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    _, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # log2 of a power-of-two is an exact integer
+    log2_scales = torch.log2(fused_scale)
+    residual = (log2_scales - log2_scales.round()).abs()
+    assert residual.max() < 1e-5, (
+        f"Not all scales are powers of 2: max log2 residual = {residual.max().item()}"
+    )
+
+
+@torch.inference_mode()
+def test_nope_dims_unchanged():
+    """Nope dimensions (first 448 per head) should only be quantized,
+    not rotated. Verify by dequantizing and comparing against
+    quantize-only reference (no RoPE)."""
+    num_tokens, num_heads, n_groups = 16, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+    torch.manual_seed(0)
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    # Fused kernel result
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Reference: quantize without RoPE (identity rotation)
+    # Create a zero-sin cache so RoPE is identity
+    zero_cache = torch.zeros_like(cos_sin_cache)
+    half = ROPE_DIM // 2
+    zero_cache[:, :half] = 1.0  # cos = 1
+    # sin = 0 (already zero)
+
+    norope_fp8, norope_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        zero_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Extract nope quant blocks only (first 3 of every 4 blocks per head)
+    chunks_per_head = HEAD_DIM // QUANT_GROUP_SIZE  # 4
+
+    for h in range(heads_per_group):
+        for c in range(chunks_per_head - 1):  # skip last chunk (has rope)
+            qb = h * chunks_per_head + c
+            start = qb * QUANT_GROUP_SIZE
+            end = start + QUANT_GROUP_SIZE
+
+            fused_nope = fused_fp8[:, :, start:end].view(torch.uint8)
+            norope_nope = norope_fp8[:, :, start:end].view(torch.uint8)
+            assert torch.equal(fused_nope, norope_nope), (
+                f"Nope block (head={h}, chunk={c}) differs between "
+                f"fused and no-rope reference"
+            )
+
+            fused_s = fused_scale[:, :, qb]
+            norope_s = norope_scale[:, :, qb]
+            assert torch.equal(fused_s, norope_s), (
+                f"Nope scale (head={h}, chunk={c}) differs"
+            )
+
+
+@torch.inference_mode()
+def test_single_token():
+    """Edge case: single token."""
+    num_tokens, num_heads, n_groups = 1, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.tensor([42], device=device, dtype=torch.long)
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    ref_fp8, ref_scale = reference_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    assert_dequant_close(ref_fp8, ref_scale, fused_fp8, fused_scale)
+
+
+@torch.inference_mode()
+def test_zero_positions():
+    """Edge case: all positions are 0."""
+    num_tokens, num_heads, n_groups = 16, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.zeros(num_tokens, device=device, dtype=torch.long)
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    ref_fp8, ref_scale = reference_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    assert_dequant_close(ref_fp8, ref_scale, fused_fp8, fused_scale)
+
+
+@torch.inference_mode()
+def test_large_values():
+    """Edge case: values near FP8 saturation to test clamping."""
+    num_tokens, num_heads, n_groups = 8, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+
+    # Create inputs with large values that will saturate FP8
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    o = o * 1000.0  # scale up to force saturation
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    ref_fp8, ref_scale = reference_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    assert_dequant_close(ref_fp8, ref_scale, fused_fp8, fused_scale)
+
+
+@torch.inference_mode()
+def test_dequant_numerical_accuracy():
+    """Verify dequantized values are close to the original (after inv RoPE)."""
+    num_tokens, num_heads, n_groups = 32, 64, 8
+    heads_per_group = num_heads // n_groups
+    max_pos = 4096
+    device = "cuda"
+    torch.manual_seed(0)
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    # Get the post-inv-RoPE values (ground truth before quantization)
+    o_after_rope = reference_inv_rope(o.clone(), positions, cos_sin_cache)
+    d = heads_per_group * HEAD_DIM
+    o_after_rope = o_after_rope.view(num_tokens, n_groups, d)
+
+    # Get fused quantized output
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Dequantize: broadcast scale [T, G, S] to [T, G, D] via repeat
+    S = d // QUANT_GROUP_SIZE
+    scale_expanded = (
+        fused_scale.unsqueeze(-1)
+        .expand(num_tokens, n_groups, S, QUANT_GROUP_SIZE)
+        .reshape(num_tokens, n_groups, d)
+    )
+    dequant = fused_fp8.float() * scale_expanded
+
+    # Check relative error.
+    # FP8 e4m3 with UE8M0 (power-of-two scales that round UP) quantizes more
+    # coarsely than optimal scaling. Both paths rotate in fp32, so the bulk
+    # of the error comes from UE8M0 quantization itself (~10-12% typical).
+    o_gt = o_after_rope.transpose(0, 1).contiguous().transpose(0, 1)
+    dequant_contig = dequant.transpose(0, 1).contiguous().transpose(0, 1)
+
+    abs_err = (dequant_contig.float() - o_gt.float()).abs()
+    rel_err = abs_err / (o_gt.float().abs().clamp(min=1e-6))
+    mean_rel_err = rel_err.mean().item()
+
+    assert mean_rel_err < 0.15, (
+        f"Mean relative error too high: {mean_rel_err:.4f} (expected < 0.15)"
+    )
+
+
+def _unfused_inv_rope_fp8_quant(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    n_groups: int,
+    heads_per_group: int,
+    nope_dim: int = NOPE_DIM,
+    rope_dim: int = ROPE_DIM,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Unfused path matching deepseek_v4/nvidia/ops/attention.py:295-310.
+
+    Uses the production CUDA RoPE kernel + per_token_group_quant_fp8.
+    """
+    from vllm import _custom_ops as ops
+    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+        per_token_group_quant_fp8,
+    )
+
+    head_dim = o.shape[-1]
+    rope_dim_offset = head_dim - rope_dim
+
+    # Step 1: In-place CUDA RoPE (same as production)
+    ops.rotary_embedding(
+        positions,
+        o,
+        None,
+        head_dim,
+        cos_sin_cache,
+        False,  # is_neox=False for DeepseekV4 (GPT-J style)
+        rope_dim_offset=rope_dim_offset,
+        inverse=True,
+    )
+
+    # Step 2: Reshape + quant + reshape (same as production)
+    T = o.shape[0]
+    d = heads_per_group * head_dim
+    o = o.view(T, n_groups, -1)
+    o_flat = o.transpose(0, 1).contiguous().reshape(-1, d)
+    o_fp8, o_scale = per_token_group_quant_fp8(
+        o_flat,
+        group_size=QUANT_GROUP_SIZE,
+        use_ue8m0=True,
+    )
+    o_fp8 = o_fp8.view(n_groups, T, d).transpose(0, 1)
+    o_scale = o_scale.view(n_groups, T, -1).transpose(0, 1)
+    return o_fp8, o_scale
+
+
+# =========================================================================
+# End-to-end test including fp8_einsum
+# =========================================================================
+
+
+@pytest.mark.parametrize("num_tokens", [1, 7, 32, 128, 1024])
+@pytest.mark.parametrize(
+    "num_heads,n_groups",
+    [(64, 8)],
+    ids=["H64_G8"],
+)
+@torch.inference_mode()
+def test_einsum_end_to_end(num_tokens, num_heads, n_groups):
+    """End-to-end: fused inv_rope+quant → fp8_einsum must match
+    unfused CUDA_rope+quant → fp8_einsum bitwise.
+
+    This catches stride/layout bugs that only manifest when the einsum
+    kernel actually consumes the quantized activations.
+    """
+    from deep_gemm.utils.math import ceil_div
+
+    from vllm.utils.deep_gemm import (
+        fp8_einsum,
+        per_block_cast_to_fp8,
+        transform_sf_into_required_layout,
+    )
+
+    heads_per_group = num_heads // n_groups
+    d = heads_per_group * HEAD_DIM
+    o_lora_rank = 1024
+    max_pos = 4096
+    device = "cuda"
+    torch.manual_seed(0)
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(
+        0, max_pos, (num_tokens,), device=device, dtype=torch.long
+    )
+    cos_sin_cache = make_cos_sin_cache(max_pos, device=device)
+
+    # -- Weight quantization (shared between both paths) --
+    w = torch.randn(n_groups, o_lora_rank, d, device=device, dtype=torch.bfloat16)
+    w_fp8 = torch.empty_like(w, dtype=torch.float8_e4m3fn)
+    w_scale = torch.empty(
+        n_groups,
+        ceil_div(o_lora_rank, 128),
+        ceil_div(d, 128),
+        device=device,
+        dtype=torch.float32,
+    )
+    for g in range(n_groups):
+        w_fp8[g], w_scale[g] = per_block_cast_to_fp8(w[g], use_ue8m0=True)
+
+    recipe = (1, 1, 128)
+    w_scale_t = transform_sf_into_required_layout(
+        sf=w_scale,
+        mn=o_lora_rank,
+        k=d,
+        recipe=(1, 128, 128),
+        num_groups=n_groups,
+        is_sfa=False,
+    )
+
+    # -- UNFUSED path --
+    ref_fp8, ref_scale = _unfused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+    z_ref = torch.empty(
+        num_tokens, n_groups, o_lora_rank, device=device, dtype=torch.bfloat16
+    )
+    fp8_einsum(
+        "bhr,hdr->bhd", (ref_fp8, ref_scale), (w_fp8, w_scale_t), z_ref, recipe=recipe
+    )
+
+    # -- FUSED path --
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+    z_fused = torch.empty(
+        num_tokens, n_groups, o_lora_rank, device=device, dtype=torch.bfloat16
+    )
+    fp8_einsum(
+        "bhr,hdr->bhd",
+        (fused_fp8, fused_scale),
+        (w_fp8, w_scale_t),
+        z_fused,
+        recipe=recipe,
+    )
+
+    # -- Checks --
+    # Einsum output: Triton and CUDA both rotate in fp32 now, so diffs
+    # come from fp32 ordering and UE8M0 boundary shifts only.
+    # Use relative diff (same metric as test_fp8_einsum.py).
+    from deep_gemm.testing import calc_diff
+
+    z_diff = calc_diff(z_fused, z_ref)
+    assert z_diff < 0.01, (
+        f"Einsum output diff too large: {z_diff:.6f} (expected < 0.01)"
+    )
+
+
+@pytest.mark.parametrize("num_tokens", [1, 32, 256])
+@torch.inference_mode()
+def test_with_real_deepseek_v4_rope(num_tokens, default_vllm_config):
+    """Test with real DeepseekV4ScalingRotaryEmbedding (GPT-J style,
+    mscale=0, YaRN scaling) matching the production config."""
+
+    num_heads = 64
+    n_groups = 8
+    heads_per_group = num_heads // n_groups
+    device = "cuda"
+    torch.manual_seed(0)
+
+    # Build YaRN-scaled cos_sin_cache matching real DeepSeek V3/V4 config
+    # (mscale=0 → mscale=1.0, so no magnitude scaling)
+    from vllm.model_executor.layers.rotary_embedding.common import (
+        yarn_find_correction_range,
+        yarn_linear_ramp_mask,
+    )
+
+    scaling_factor = 16
+    base = 10000.0
+    max_pos = 65536
+    beta_fast, beta_slow = 32, 1
+
+    pos_freqs = base ** (
+        torch.arange(0, ROPE_DIM, 2, dtype=torch.float32, device=device) / ROPE_DIM
+    )
+    inv_freq_extra = 1.0 / pos_freqs
+    inv_freq_interp = 1.0 / (scaling_factor * pos_freqs)
+    low, high = yarn_find_correction_range(
+        beta_fast, beta_slow, ROPE_DIM, base, max_pos
+    )
+    mask = 1 - yarn_linear_ramp_mask(low, high, ROPE_DIM // 2, dtype=torch.float32).to(
+        device
+    )
+    inv_freq = inv_freq_interp * (1 - mask) + inv_freq_extra * mask
+    t = torch.arange(max_pos * scaling_factor, device=device, dtype=torch.float32)
+    freqs = torch.outer(t, inv_freq)
+    # mscale=0 → yarn_get_mscale returns 1.0
+    cos_sin_cache = torch.cat([freqs.cos(), freqs.sin()], dim=-1)  # fp32
+
+    o = torch.randn(
+        num_tokens, num_heads, HEAD_DIM, device=device, dtype=torch.bfloat16
+    )
+    positions = torch.randint(0, 4096, (num_tokens,), device=device, dtype=torch.long)
+
+    # UNFUSED: CUDA RoPE with is_neox=False (GPT-J)
+    from vllm import _custom_ops as ops
+    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+        per_token_group_quant_fp8,
+    )
+
+    o_unfused = o.clone()
+    ops.rotary_embedding(
+        positions,
+        o_unfused,
+        None,
+        HEAD_DIM,
+        cos_sin_cache,
+        False,  # is_neox=False (GPT-J style)
+        rope_dim_offset=NOPE_DIM,
+        inverse=True,
+    )
+    d = heads_per_group * HEAD_DIM
+    T = num_tokens
+    o_unfused = o_unfused.view(T, n_groups, d)
+    o_flat = o_unfused.transpose(0, 1).contiguous().reshape(-1, d)
+    ref_fp8, ref_scale = per_token_group_quant_fp8(
+        o_flat,
+        group_size=QUANT_GROUP_SIZE,
+        use_ue8m0=True,
+    )
+    ref_fp8 = ref_fp8.view(n_groups, T, d).transpose(0, 1)
+    ref_scale = ref_scale.view(n_groups, T, -1).transpose(0, 1)
+
+    # FUSED: use the real YaRN-scaled cos_sin_cache
+    fused_fp8, fused_scale = fused_inv_rope_fp8_quant(
+        o.clone(),
+        positions,
+        cos_sin_cache,
+        n_groups,
+        heads_per_group,
+    )
+
+    # Scales must match exactly (same UE8M0 algorithm)
+    # Compare via dequant (Triton bf16 rotation may differ from CUDA by 1 ULP)
+    assert_dequant_close(
+        ref_fp8, ref_scale, fused_fp8, fused_scale, msg="Real DeepSeek V4 rope"
+    )
diff --git a/tests/kernels/test_fused_quant_activation.py b/tests/kernels/test_fused_quant_activation.py
index 2670f224d7cb..0696ebb8d556 100644
--- a/tests/kernels/test_fused_quant_activation.py
+++ b/tests/kernels/test_fused_quant_activation.py
@@ -7,6 +7,7 @@
 from tests.kernels.utils import opcheck
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
 
 DTYPES = [torch.bfloat16, torch.float16]
 QUANT_DTYPES = [current_platform.fp8_dtype()]
@@ -49,9 +50,7 @@ def test_silu_and_mul(
     seed: int,
     device: str,
 ) -> None:
-    torch.random.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
+    set_random_seed(seed)
     torch.set_default_device(device)
 
     layer = SiluAndMul()
diff --git a/tests/kernels/test_kda.py b/tests/kernels/test_kda.py
new file mode 100644
index 000000000000..18531fad999a
--- /dev/null
+++ b/tests/kernels/test_kda.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Precision tests for vllm's chunk_kda Triton operator.
+
+Compares chunk_kda against a naive recurrent reference (float32).
+Uses torch.rand for q/k/v to match FLA's test pattern.
+"""
+
+import pytest
+import torch
+import torch.nn.functional as F
+
+from vllm.model_executor.layers.fla.ops.kda import chunk_kda
+from vllm.model_executor.layers.fla.ops.l2norm import l2norm_fwd
+
+DEVICE = "cuda"
+
+
+def naive_recurrent_kda(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    g: torch.Tensor,
+    beta: torch.Tensor,
+    scale: float | None = None,
+    initial_state: torch.Tensor | None = None,
+    output_final_state: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor | None]:
+    """Naive recurrent KDA reference, ported from FLA's naive.py."""
+    dtype = v.dtype
+    B, T, H, K = q.shape
+    V = v.shape[-1]
+    if scale is None:
+        scale = K**-0.5
+
+    q, k, v, g, beta = (x.to(torch.float) for x in [q, k, v, g, beta])
+    q = q * scale
+
+    S = k.new_zeros(B, H, K, V).to(q)
+    if initial_state is not None:
+        S += initial_state
+    o = torch.zeros_like(v)
+    for i in range(T):
+        q_i, k_i, v_i, g_i, b_i = q[:, i], k[:, i], v[:, i], g[:, i], beta[:, i]
+        S = S * g_i[..., None].exp()
+        S = S + torch.einsum(
+            "bhk,bhv->bhkv",
+            b_i[..., None] * k_i,
+            v_i - (k_i[..., None] * S).sum(-2),
+        )
+        o[:, i] = torch.einsum("bhk,bhkv->bhv", q_i, S)
+    if not output_final_state:
+        S = None
+    return o.to(dtype), S
+
+
+def assert_close(
+    name: str,
+    ref: torch.Tensor,
+    tri: torch.Tensor,
+    ratio: float,
+    err_atol: float = 1e-6,
+):
+    """RMSE-based relative error comparison."""
+    abs_err = (ref.detach() - tri.detach()).flatten().abs().max().item()
+    rmse_diff = (ref.detach() - tri.detach()).flatten().square().mean().sqrt().item()
+    rmse_base = ref.detach().flatten().square().mean().sqrt().item()
+    rel_err = rmse_diff / (rmse_base + 1e-8)
+    print(f"{name:>4} | abs={abs_err:.6f} | rmse={rel_err:.6f} | thr={ratio}")
+    if abs_err <= err_atol:
+        return
+    assert not torch.isnan(ref).any(), f"{name}: NaN detected in ref"
+    assert not torch.isnan(tri).any(), f"{name}: NaN detected in tri"
+    assert rel_err < ratio, (
+        f"{name}: max abs err {abs_err:.6f}, rmse ratio {rel_err:.6f} >= {ratio}"
+    )
+
+
+@pytest.mark.parametrize(
+    ("H", "D", "cu_seqlens", "dtype"),
+    [
+        pytest.param(
+            *test,
+            id="H{}-D{}-cu{}-{}".format(*test),
+        )
+        for test in [
+            (32, 128, [0, 64], torch.float16),
+            (32, 128, [0, 1024], torch.float16),
+            (32, 128, [0, 15], torch.float16),
+            (32, 128, [0, 256, 512, 768, 1024], torch.float16),
+            (32, 128, [0, 15, 100, 300, 1200], torch.float16),
+            (64, 128, [0, 256, 500, 1000], torch.float16),
+            (32, 128, [0, 8192], torch.float16),
+            (32, 128, [0, 256, 500, 1000], torch.bfloat16),
+        ]
+    ],
+)
+@torch.inference_mode()
+def test_chunk_kda(
+    H: int,
+    D: int,
+    cu_seqlens: list[int],
+    dtype: torch.dtype,
+):
+    T = cu_seqlens[-1]
+    torch.manual_seed(42)
+    B = 1
+    cu_seqlens_t = torch.LongTensor(cu_seqlens).to(DEVICE)
+    N = len(cu_seqlens) - 1
+
+    q = torch.rand(B, T, H, D, dtype=dtype, device=DEVICE)
+    k = torch.rand(B, T, H, D, dtype=dtype, device=DEVICE)
+    v = torch.rand(B, T, H, D, dtype=dtype, device=DEVICE)
+    g = F.logsigmoid(torch.randn(B, T, H, D, dtype=torch.float32, device=DEVICE)).to(
+        dtype
+    )
+    beta = torch.rand(B, T, H, dtype=dtype, device=DEVICE).sigmoid()
+    h0 = torch.randn(N, H, D, D, dtype=torch.float32, device=DEVICE)
+
+    # Naive reference with l2norm_fwd (same kernel as chunk_kda)
+    ref_outputs = []
+    ref_states = []
+    for i in range(N):
+        s, e = cu_seqlens[i], cu_seqlens[i + 1]
+        q_i = l2norm_fwd(q[:, s:e].contiguous())
+        k_i = l2norm_fwd(k[:, s:e].contiguous())
+        o_i, ht_i = naive_recurrent_kda(
+            q_i,
+            k_i,
+            v[:, s:e],
+            g[:, s:e],
+            beta[:, s:e],
+            initial_state=h0[i],
+            output_final_state=True,
+        )
+        ref_outputs.append(o_i)
+        ref_states.append(ht_i)
+    ref_o = torch.cat(ref_outputs, dim=1)
+    ref_ht = torch.cat(ref_states, dim=0)
+
+    # h0 transposed to (V, K) layout for the kernel; naive uses (K, V)
+    tri_o, tri_ht = chunk_kda(
+        q=q.clone(),
+        k=k.clone(),
+        v=v.clone(),
+        g=g.clone(),
+        beta=beta.clone(),
+        initial_state=h0.transpose(-1, -2).contiguous().clone(),
+        output_final_state=True,
+        cu_seqlens=cu_seqlens_t,
+        use_qk_l2norm_in_kernel=True,
+    )
+
+    assert not torch.isnan(tri_o).any(), "Triton output o contains NaN"
+    assert not torch.isnan(tri_ht).any(), "Triton output ht contains NaN"
+    assert_close("o", ref_o, tri_o, 0.005)
+    assert_close("ht", ref_ht, tri_ht.transpose(-1, -2).contiguous(), 0.005)
diff --git a/tests/kernels/test_mhc_kernels.py b/tests/kernels/test_mhc_kernels.py
new file mode 100644
index 000000000000..81fceeceac10
--- /dev/null
+++ b/tests/kernels/test_mhc_kernels.py
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+import vllm.model_executor.kernels.mhc  # noqa: F401
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+DEVICE = current_platform.device_type
+
+
+def sinkhorn_normalize_ref(x: torch.Tensor, repeat: int, eps: float) -> torch.Tensor:
+    x = x.softmax(-1) + eps
+    x = x / (x.sum(-2, keepdim=True) + eps)
+    for _ in range(repeat - 1):
+        x = x / (x.sum(-1, keepdim=True) + eps)
+        x = x / (x.sum(-2, keepdim=True) + eps)
+    return x
+
+
+def mhc_pre_ref(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """mHC pre reference kernel from tilelang repo: https://github.com/tile-ai/tilelang/blob/d135bd1cd2d2eee74fbb41dd0a0831a427194c86/examples/deepseek_mhc/example_mhc_pre.py#L303"""
+    hc_mult = residual.shape[-2]
+
+    residual_flat = residual.flatten(-2, -1).float()
+    sqrsum = residual_flat.square().sum(-1)
+    mixes = (
+        residual_flat @ fn.T * (sqrsum.unsqueeze(-1) / fn.shape[-1] + rms_eps).rsqrt()
+    )
+
+    hc_scale = torch.cat(
+        [
+            hc_scale[0].expand(hc_mult),
+            hc_scale[1].expand(hc_mult),
+            hc_scale[2].expand(hc_mult * hc_mult),
+        ],
+    )
+    mixes = mixes * hc_scale + hc_base
+
+    pre_mix = mixes[:, :hc_mult].sigmoid().unsqueeze(-1) + hc_pre_eps
+    post_mix = (
+        mixes[:, hc_mult : 2 * hc_mult].sigmoid() * hc_post_mult_value
+    ).unsqueeze(-1)
+    res_mix = mixes[:, 2 * hc_mult :].view(-1, hc_mult, hc_mult)
+
+    res_mix = sinkhorn_normalize_ref(
+        res_mix, repeat=sinkhorn_repeat, eps=hc_sinkhorn_eps
+    )
+
+    layer_input = (residual * pre_mix).sum(-2).bfloat16()
+
+    return post_mix, res_mix, layer_input
+
+
+def mhc_post_ref(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    """mHC post reference kernel from tilelang repo: https://github.com/tile-ai/tilelang/blob/d135bd1cd2d2eee74fbb41dd0a0831a427194c86/examples/deepseek_mhc/example_mhc_post.py#L68"""
+    term2 = torch.bmm(comb_res_mix.mT, residual.float())
+    return (x.float().unsqueeze(-2) * post_layer_mix + term2).bfloat16()
+
+
+def hc_head_ref(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_eps: float,
+) -> torch.Tensor:
+    residual_flat = residual.flatten(-2).float()
+    residual_norm = residual_flat * torch.rsqrt(
+        residual_flat.square().mean(dim=-1, keepdim=True) + rms_eps
+    )
+    pre_mix = torch.nn.functional.linear(residual_norm, fn)
+    pre_mix = torch.sigmoid(pre_mix * hc_scale + hc_base) + hc_eps
+    return torch.sum(pre_mix.unsqueeze(-1) * residual.float(), dim=-2).bfloat16()
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda(),
+    reason="CUDA required",
+)
+@pytest.mark.parametrize("num_tokens", [1, 4, 8, 128])
+@pytest.mark.parametrize("hidden_size", [4096, 7168])
+@pytest.mark.parametrize("hc_mult", [4])
+def test_mhc_fused_post_pre(num_tokens, hidden_size, hc_mult):
+    torch.set_default_device(DEVICE)
+    set_random_seed(0)
+
+    x = torch.randn((num_tokens, hidden_size), dtype=torch.bfloat16)
+    residual = torch.randn((num_tokens, hc_mult, hidden_size), dtype=torch.bfloat16)
+    post_layer_mix = torch.randn((num_tokens, hc_mult, 1), dtype=torch.float32)
+    comb_res_mix = torch.randn((num_tokens, hc_mult, hc_mult), dtype=torch.float32)
+
+    hc_mult2 = hc_mult * hc_mult
+    hc_mult3 = hc_mult * 2 + hc_mult2
+    fn = (
+        torch.randn((hc_mult3, hc_mult, hidden_size), dtype=torch.float)
+        * 1e-4
+        * (1 + torch.arange(hc_mult).mul(0.01).view(1, -1, 1))
+    ).flatten(1, 2)
+    hc_scale = torch.randn((3,), dtype=torch.float) * 0.1
+    hc_base = torch.randn((hc_mult3,), dtype=torch.float) * 0.1
+
+    hc_sinkhorn_eps = hc_pre_eps = rms_eps = 1e-6
+    sinkhorn_repeat = 20
+    hc_post_alpha = 1.0
+
+    def run_ref():
+        residual_ref = mhc_post_ref(x, residual, post_layer_mix, comb_res_mix)
+        post_mix_ref, res_mix_ref, layer_input_ref = mhc_pre_ref(
+            residual_ref,
+            fn,
+            hc_scale,
+            hc_base,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_alpha,
+            sinkhorn_repeat,
+        )
+        return residual_ref, post_mix_ref, res_mix_ref, layer_input_ref
+
+    residual_ref, post_mix_ref, res_mix_ref, layer_input_ref = run_ref()
+
+    residual, post_mix, res_mix, x = torch.ops.vllm.mhc_fused_post_pre_tilelang(
+        x,
+        residual,
+        post_layer_mix,
+        comb_res_mix,
+        fn,
+        hc_scale,
+        hc_base,
+        rms_eps,
+        hc_pre_eps,
+        hc_sinkhorn_eps,
+        hc_post_alpha,
+        sinkhorn_repeat,
+    )
+
+    torch.testing.assert_close(residual, residual_ref, atol=1e-2, rtol=1e-2)
+    torch.testing.assert_close(post_mix, post_mix_ref, atol=1e-2, rtol=1e-2)
+    torch.testing.assert_close(res_mix, res_mix_ref, atol=1e-2, rtol=1e-2)
+    torch.testing.assert_close(x, layer_input_ref, atol=1e-2, rtol=1e-2)
+
+
+@pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="ROCm required",
+)
+@pytest.mark.parametrize("num_tokens", [1, 4, 8, 128])
+@pytest.mark.parametrize("hidden_size", [4096, 7168])
+@pytest.mark.parametrize("hc_mult", [4])
+def test_hc_head_triton(num_tokens, hidden_size, hc_mult):
+    torch.set_default_device(DEVICE)
+    set_random_seed(0)
+
+    residual = torch.randn((num_tokens, hc_mult, hidden_size), dtype=torch.bfloat16)
+    fn = torch.randn((hc_mult, hc_mult * hidden_size), dtype=torch.float32) * 1e-4
+    hc_scale = torch.randn((1,), dtype=torch.float32) * 0.1
+    hc_base = torch.randn((hc_mult,), dtype=torch.float32) * 0.1
+    rms_eps = hc_eps = 1e-6
+
+    out = torch.empty((num_tokens, hidden_size), dtype=torch.bfloat16)
+    out.fill_(float("nan"))
+
+    result = torch.ops.vllm.hc_head_triton(
+        residual,
+        fn,
+        hc_scale,
+        hc_base,
+        out,
+        hidden_size,
+        rms_eps,
+        hc_eps,
+        hc_mult,
+    )
+
+    assert result is None
+    assert not torch.isnan(out).any()
+
+    out_ref = hc_head_ref(residual, fn, hc_scale, hc_base, rms_eps, hc_eps)
+    torch.testing.assert_close(out, out_ref, atol=5e-2, rtol=1e-2)
diff --git a/tests/kernels/test_top_k_per_row.py b/tests/kernels/test_top_k_per_row.py
index f4bfc1666c09..7b9c11495e8b 100644
--- a/tests/kernels/test_top_k_per_row.py
+++ b/tests/kernels/test_top_k_per_row.py
@@ -122,6 +122,39 @@ def compare_top_k_results(
     return True
 
 
+def validate_topk_against_reference(
+    logits: torch.Tensor,
+    cuda_indices: torch.Tensor,
+    row_starts: torch.Tensor,
+    row_ends: torch.Tensor,
+    top_k: int,
+    kernel_name: str,
+) -> None:
+    """
+    Validate CUDA top-k results against PyTorch reference implementation.
+
+    Args:
+        logits: Input logits tensor
+        cuda_indices: CUDA kernel output indices
+        row_starts: Row start positions
+        row_ends: Row end positions
+        top_k: Number of top elements to select
+        kernel_name: Name of the kernel being tested (for error messages)
+    """
+    num_rows = cuda_indices.shape[0]
+    torch_indices = torch.empty((num_rows, top_k), dtype=torch.int32, device="cuda")
+
+    for i in range(num_rows):
+        row_end = int(row_ends[i])
+        k_i = min(top_k, row_end)
+        idx = logits[i, :row_end].topk(k_i, dim=-1)[1]
+        torch_indices[i, :k_i] = idx
+
+    assert compare_top_k_results(
+        logits, cuda_indices, torch_indices, row_starts, row_ends, top_k
+    ), f"{kernel_name} results don't match torch.topk"
+
+
 @pytest.mark.parametrize("num_rows", NUM_ROWS)
 @pytest.mark.parametrize("top_k", TOP_K_VALUES)
 @pytest.mark.parametrize("clean_logits", [True, False])
@@ -278,111 +311,535 @@ def test_top_k_per_row_decode_large_vocab_size(clean_logits: bool) -> None:
 
 
 @pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@pytest.mark.parametrize(
+    "seq_len_range,test_id",
+    [
+        pytest.param((4000, 8000), "short_sequences", id="short"),
+        pytest.param((8000, 32000), "medium_sequences", id="medium"),
+        pytest.param((32000, 163840), "long_sequences", id="long"),
+    ],
+)
 @pytest.mark.parametrize("clean_logits", [True, False])
+@pytest.mark.parametrize("top_k", [2048])
+@pytest.mark.parametrize("next_n", [1, 4])
 @torch.inference_mode()
-def test_deepseek_hybrid_topk(clean_logits: bool) -> None:
+def test_deepseek_persistent_topk(
+    seq_len_range: tuple[int, int],
+    test_id: str,
+    clean_logits: bool,
+    top_k: int,
+    next_n: int,
+) -> None:
+    """
+    Test persistent_topk with varying sequence lengths and speculative decoding.
+    Supports speculative decoding with next_n > 1.
+    """
+    set_random_seed(42 if test_id == "short_sequences" else 43)
     torch.set_default_device("cuda:0")
 
-    top_k = 2048
-
-    # Test case 1: Short sequences (< 8192)
-    batch_size_short = 4
-    next_n = 1
-    num_rows_short = batch_size_short * next_n
+    batch_size = 4
+    num_rows = batch_size * next_n
 
-    # Create sequences with max length < 8192
-    seq_lens_short = torch.randint(
-        4000, 8000, (batch_size_short,), dtype=torch.int32, device="cuda"
+    seq_lens = torch.randint(
+        seq_len_range[0],
+        seq_len_range[1],
+        (batch_size,),
+        dtype=torch.int32,
+        device="cuda",
     )
 
-    row_starts_short = torch.zeros(num_rows_short, dtype=torch.int32, device="cuda")
-    row_indices_short = torch.arange(num_rows_short, device="cuda") // next_n
-    next_n_offset_short = torch.arange(num_rows_short, device="cuda") % next_n
-    row_ends_short = (
-        seq_lens_short[row_indices_short] - next_n + next_n_offset_short + 1
+    # Compute row boundaries for speculative decoding
+    row_starts = torch.zeros(num_rows, dtype=torch.int32, device="cuda")
+    row_indices = torch.arange(num_rows, device="cuda") // next_n
+    next_n_offset = torch.arange(num_rows, device="cuda") % next_n
+    row_ends = seq_lens[row_indices] - next_n + next_n_offset + 1
+
+    logits = create_random_logits(
+        row_starts, row_ends, torch.float32, 42, clean_logits, "random"
     )
 
-    logits_short = create_random_logits(
-        row_starts_short, row_ends_short, torch.float32, 42, clean_logits, "random"
+    indices = torch.empty((num_rows, top_k), dtype=torch.int32, device="cuda")
+
+    if next_n == 1:
+        lengths = seq_lens
+    else:
+        offsets = torch.arange(next_n, device=logits.device, dtype=torch.int32)
+        lengths = (seq_lens.unsqueeze(1) - next_n + 1 + offsets).flatten()
+
+    workspace = torch.empty(1024 * 1024, dtype=torch.uint8, device="cuda")
+    max_seq_len = int(seq_lens.max().item())
+    torch.ops._C.persistent_topk(
+        logits, lengths, indices, workspace, top_k, max_seq_len
     )
 
-    indices_vllm = torch.empty(
-        (num_rows_short, top_k), dtype=torch.int32, device="cuda"
+    validate_topk_against_reference(
+        logits, indices, row_starts, row_ends, top_k, f"persistent_topk ({test_id})"
     )
 
-    # Use vllm's kernel for short sequences
-    torch.ops._C.top_k_per_row_decode(
-        logits_short,
-        next_n,
-        seq_lens_short,
-        indices_vllm,
-        num_rows_short,
-        logits_short.stride(0),
-        logits_short.stride(1),
-        top_k,
+
+def run_large_context_topk_test(
+    batch_size: int,
+    seq_lens: list[int],
+    top_k: int,
+    data_type: str = "random",
+    seed: int = 42,
+) -> None:
+    """
+    Helper to run persistent_topk kernel test with given parameters.
+
+    Args:
+        batch_size: Number of rows/sequences
+        seq_lens: List of sequence lengths (one per row)
+        top_k: Number of top elements to select
+        data_type: Type of test data to generate
+        seed: Random seed for reproducibility
+    """
+    torch.set_default_device("cuda:0")
+    set_random_seed(seed)
+
+    # Create test data
+    num_rows = batch_size
+    max_len = max(seq_lens)
+    lengths = torch.tensor(seq_lens, dtype=torch.int32, device="cuda")
+
+    if data_type == "random":
+        logits = torch.randn(num_rows, max_len, dtype=torch.float32, device="cuda")
+    elif data_type == "sorted_asc":
+        # Each row gets its own ascending sequence based on its length
+        logits = torch.empty(num_rows, max_len, dtype=torch.float32, device="cuda")
+        for i, length in enumerate(seq_lens):
+            logits[i, :length] = torch.arange(
+                length, dtype=torch.float32, device="cuda"
+            )
+            if length < max_len:
+                logits[i, length:] = float("-inf")
+    elif data_type == "sorted_desc":
+        # Each row gets its own descending sequence based on its length
+        logits = torch.empty(num_rows, max_len, dtype=torch.float32, device="cuda")
+        for i, length in enumerate(seq_lens):
+            logits[i, :length] = torch.arange(
+                length, 0, -1, dtype=torch.float32, device="cuda"
+            )
+            if length < max_len:
+                logits[i, length:] = float("-inf")
+    elif data_type == "all_same":
+        logits = torch.ones(num_rows, max_len, dtype=torch.float32, device="cuda")
+        for i, length in enumerate(seq_lens):
+            if length < max_len:
+                logits[i, length:] = float("-inf")
+    elif data_type == "many_ties":
+        # Only 10 unique values, many duplicates
+        logits = torch.randint(0, 10, (num_rows, max_len), device="cuda").float() / 10.0
+        for i, length in enumerate(seq_lens):
+            if length < max_len:
+                logits[i, length:] = float("-inf")
+    elif data_type == "small_differences":
+        # Very small differences to test float precision
+        base = torch.randn(num_rows, max_len, dtype=torch.float32, device="cuda")
+        noise = (
+            torch.randn(num_rows, max_len, dtype=torch.float32, device="cuda") * 1e-6
+        )
+        logits = base + noise
+        for i, length in enumerate(seq_lens):
+            if length < max_len:
+                logits[i, length:] = float("-inf")
+    else:
+        raise ValueError(f"Unknown data_type: {data_type}")
+
+    # Create output tensor
+    indices = torch.empty((num_rows, top_k), dtype=torch.int32, device="cuda")
+
+    workspace = torch.empty(1024 * 1024, dtype=torch.uint8, device="cuda")
+    max_seq_len = max(seq_lens)
+    torch.ops._C.persistent_topk(
+        logits, lengths, indices, workspace, top_k, max_seq_len
     )
 
-    # Test case 2: Long sequences (>= 8192) - should use large_context_topk kernel
-    batch_size_long = 4
-    num_rows_long = batch_size_long * next_n
+    torch.accelerator.synchronize()
+
+    torch_indices = torch.empty((num_rows, top_k), dtype=torch.int32, device="cuda")
+    for i in range(num_rows):
+        length = seq_lens[i]
+        k_i = min(top_k, length)
+        if k_i > 0:
+            idx = logits[i, :length].topk(k_i, dim=-1)[1]
+            torch_indices[i, :k_i] = idx
+            if k_i < top_k:
+                torch_indices[i, k_i:] = -1
+        else:
+            torch_indices[i, :] = -1
+
+    # Compare results
+    for i in range(num_rows):
+        length = seq_lens[i]
+        k_i = min(top_k, length)
+
+        if k_i == 0:
+            continue
+
+        cuda_row = indices[i, :k_i].cpu()
+        torch_row = torch_indices[i, :k_i].cpu()
+
+        # Filter out -1 padding values from cuda_row
+        valid_mask = cuda_row >= 0
+        cuda_row = cuda_row[valid_mask]
+
+        # Compare sets (order may differ for ties)
+        cuda_set = set(cuda_row.tolist())
+        torch_set = set(torch_row.tolist())
+
+        if cuda_set == torch_set:
+            continue
+
+        # If sets differ, check if it's due to equal values (ties)
+        cuda_vals = logits[i, cuda_row].cpu()
+        torch_vals = logits[i, torch_row].cpu()
+
+        # Check that min CUDA value >= max of values NOT in top-k
+        if k_i < length:
+            non_topk_indices = torch.tensor(
+                list(set(range(length)) - cuda_set), dtype=torch.int32
+            )
+            if len(non_topk_indices) > 0:
+                non_topk_vals = logits[i, non_topk_indices].cpu()
+                min_cuda_val = cuda_vals.min()
+                max_non_topk = non_topk_vals.max()
+
+                # Allow small tolerance for floating point errors
+                assert min_cuda_val >= max_non_topk - 1e-4, (
+                    f"Row {i}: CUDA top-k contains values smaller than non-top-k. "
+                    f"Min CUDA: {min_cuda_val}, Max non-top-k: {max_non_topk}, "
+                    f"Length: {length}, k: {k_i}, CUDA indices: {sorted(cuda_set)[:10]}..., "  # noqa: E501
+                    f"Expected indices: {sorted(torch_set)[:10]}..."
+                )
+
+        # For ties, verify the values are close
+        assert torch.allclose(
+            cuda_vals.sort(descending=True)[0],
+            torch_vals.sort(descending=True)[0],
+            rtol=1e-4,
+            atol=1e-4,
+        ), f"""Row {i}: Top-k values don't match.
+            CUDA: {cuda_vals.sort(descending=True)[0][:10]},
+            Torch: {torch_vals.sort(descending=True)[0][:10]}"""
 
-    # Create sequences with max length >= 8192
-    seq_lens_long = torch.randint(
-        8192, 16384, (batch_size_long,), dtype=torch.int32, device="cuda"
+
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@pytest.mark.parametrize(
+    "test_config",
+    [
+        # ==================== CATEGORY: Sequence Length Edge Cases ====================
+        pytest.param(
+            {"seq_lens": [1, 10, 100, 2048], "top_k": 2048, "data_type": "random"},
+            id="seq_len_edge_very_small_to_medium",
+        ),
+        pytest.param(
+            {
+                "seq_lens": [2049, 2100, 2500, 3000],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="seq_len_edge_above_k",
+        ),
+        pytest.param(
+            {"seq_lens": [8000, 16384, 20000], "top_k": 2048, "data_type": "random"},
+            id="algo_transition_filtered_radix",
+        ),
+        # ==================== CATEGORY: Data Distributions ====================
+        pytest.param(
+            {"seq_lens": [5000, 10000], "top_k": 2048, "data_type": "sorted_asc"},
+            id="data_sorted_ascending",
+        ),
+        pytest.param(
+            {"seq_lens": [5000, 10000], "top_k": 2048, "data_type": "sorted_desc"},
+            id="data_sorted_descending",
+        ),
+        pytest.param(
+            {"seq_lens": [5000, 10000], "top_k": 2048, "data_type": "all_same"},
+            id="data_all_same",
+        ),
+        pytest.param(
+            {"seq_lens": [5000, 10000], "top_k": 2048, "data_type": "many_ties"},
+            id="data_many_ties",
+        ),
+        pytest.param(
+            {
+                "seq_lens": [5000, 10000],
+                "top_k": 2048,
+                "data_type": "small_differences",
+            },
+            id="data_float_precision",
+        ),
+        # ==================== CATEGORY: Alignment / Vectorization ====================
+        pytest.param(
+            {
+                "seq_lens": [2055, 2056, 2057, 2063],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="align_vec_boundaries_low",
+        ),
+        pytest.param(
+            {
+                "seq_lens": [4095, 4096, 4097, 4102],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="align_4k_boundary",
+        ),
+        pytest.param(
+            {
+                "seq_lens": [8191, 8192, 8193, 8198],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="align_8k_boundary",
+        ),
+        pytest.param(
+            {
+                "seq_lens": [16383, 16384, 16385, 16390],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="align_16k_boundary",
+        ),
+    ],
+)
+@torch.inference_mode()
+def test_persistent_topk_correctness(test_config: dict) -> None:
+    """
+    Comprehensive correctness tests covering:
+    - Sequence length edge cases (trivial, boundary, varied)
+    - Very small sequences (< 100 elements)
+    - Mixed sequence lengths in same batch
+    - Data distributions (sorted, ties, precision)
+    - Memory alignment / vectorization boundaries
+    """
+    run_large_context_topk_test(
+        batch_size=len(test_config["seq_lens"]),
+        seq_lens=test_config["seq_lens"],
+        top_k=test_config["top_k"],
+        data_type=test_config.get("data_type", "random"),
     )
 
-    row_starts_long = torch.zeros(num_rows_long, dtype=torch.int32, device="cuda")
-    row_indices_long = torch.arange(num_rows_long, device="cuda") // next_n
-    next_n_offset_long = torch.arange(num_rows_long, device="cuda") % next_n
-    row_ends_long = seq_lens_long[row_indices_long] - next_n + next_n_offset_long + 1
 
-    logits_long = create_random_logits(
-        row_starts_long, row_ends_long, torch.float32, 43, clean_logits, "random"
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@pytest.mark.parametrize(
+    "test_config",
+    [
+        # ==================== CATEGORY: Batch Size Scalability ====================
+        pytest.param(
+            {"batch_size": 1, "seq_len": 5000, "top_k": 2048},
+            id="batch_1",
+        ),
+        pytest.param(
+            {"batch_size": 4, "seq_len": 5000, "top_k": 2048},
+            id="batch_4",
+        ),
+        pytest.param(
+            {"batch_size": 32, "seq_len": 5000, "top_k": 2048},
+            id="batch_32",
+        ),
+        pytest.param(
+            {"batch_size": 256, "seq_len": 5000, "top_k": 2048},
+            id="batch_256",
+        ),
+        # ==================== CATEGORY: Single-CTA vs Multi-CTA ====================
+        pytest.param(
+            {"batch_size": 2, "seq_len": 4096, "top_k": 2048},
+            id="single_cta_4k",
+        ),
+        pytest.param(
+            {"batch_size": 2, "seq_len": 8192, "top_k": 2048},
+            id="single_cta_8k",
+        ),
+        pytest.param(
+            {"batch_size": 2, "seq_len": 163840, "top_k": 2048},
+            id="multi_cta_163840_dsv3_max",
+        ),
+        # ==================== CATEGORY: Extreme Cases ====================
+        pytest.param(
+            {"batch_size": 512, "seq_len": 5000, "top_k": 2048},
+            id="extreme_large_batch",
+        ),
+        pytest.param(
+            {"batch_size": 2, "seq_len": 163840, "top_k": 2048},
+            id="extreme_dsv3_max_context",
+        ),
+    ],
+)
+@torch.inference_mode()
+def test_persistent_topk_algorithm_paths(test_config: dict) -> None:
+    """
+    Test different algorithm execution paths (capped at 163840 for DeepSeek V3.2):
+    - Batch size scalability (1, 4, 32, 256)
+    - Single-CTA vs Multi-CTA execution
+    - Extreme configurations (large batch, max context length)
+    """
+    run_large_context_topk_test(
+        batch_size=test_config["batch_size"],
+        seq_lens=[test_config["seq_len"]] * test_config["batch_size"],
+        top_k=test_config["top_k"],
     )
 
-    indices = torch.empty((num_rows_long, top_k), dtype=torch.int32, device="cuda")
 
-    # Use large_context_topk kernel for long sequences
-    if next_n == 1:
-        lengths = seq_lens_long
-    else:
-        offsets = torch.arange(next_n, device=logits_long.device, dtype=torch.int32)
-        lengths = (seq_lens_long.unsqueeze(1) - next_n + 1 + offsets).flatten()
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@torch.inference_mode()
+def test_persistent_topk_stress() -> None:
+    """
+    Stress test with random configurations to catch edge cases.
+    Capped at 163840 (DeepSeek V3.2 max context) for realistic testing.
+    """
+    torch.set_default_device("cuda:0")
+    top_k = 2048
 
-    torch.ops._C.large_context_topk(
-        logits_long,
-        indices,
-        lengths,
-        None,
+    for seed in range(3):
+        set_random_seed(seed)
+
+        # Random batch size (limited for speed)
+        batch_size = torch.randint(1, 32, (1,)).item()
+
+        # Random sequence lengths capped at DeepSeek V3.2 max context
+        seq_lens = torch.randint(100, 163840, (batch_size,)).tolist()
+
+        run_large_context_topk_test(
+            batch_size=batch_size,
+            seq_lens=seq_lens,
+            top_k=top_k,
+            seed=seed,
+        )
+
+
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@pytest.mark.parametrize(
+    "test_config",
+    [
+        # Mixed batch: rows spanning all four paths (trivial, decode, medium, large)
+        pytest.param(
+            {
+                "seq_lens": [2000, 6000, 30000, 80000],
+                "data_type": "random",
+            },
+            id="mixed_all_paths",
+        ),
+        # All decode/medium rows (typical decode scenario)
+        pytest.param(
+            {
+                "seq_lens": [2048, 4096, 8192, 16000],
+                "data_type": "random",
+            },
+            id="all_decode_medium",
+        ),
+        # All large rows
+        pytest.param(
+            {
+                "seq_lens": [70000, 100000, 163840],
+                "data_type": "random",
+            },
+            id="all_large",
+        ),
+        # Boundary around LARGE_THRESHOLD (32K)
+        pytest.param(
+            {
+                "seq_lens": [32767, 32768, 32769, 32772],
+                "data_type": "random",
+            },
+            id="large_threshold_boundary",
+        ),
+        # Single row medium
+        pytest.param(
+            {
+                "seq_lens": [5000],
+                "data_type": "random",
+            },
+            id="single_row_medium",
+        ),
+        # Single row large
+        pytest.param(
+            {
+                "seq_lens": [100000],
+                "top_k": 2048,
+                "data_type": "random",
+            },
+            id="single_row_large",
+        ),
+        # Trivial rows mixed with medium and large
+        pytest.param(
+            {
+                "seq_lens": [100, 2048, 10000, 80000],
+                "data_type": "random",
+            },
+            id="trivial_medium_large_mix",
+        ),
+    ],
+)
+@pytest.mark.parametrize("top_k", [512, 2048])
+@torch.inference_mode()
+def test_persistent_topk(test_config: dict, top_k: int) -> None:
+    """
+    Tests specific to the persistent_topk kernel:
+    - Mixed medium/large rows in the same batch (dynamic per-row dispatch)
+    - Boundary around LARGE_THRESHOLD (32K)
+    - Trivial + medium + large rows in a single batch
+    """
+    run_large_context_topk_test(
+        batch_size=len(test_config["seq_lens"]),
+        seq_lens=test_config["seq_lens"],
+        top_k=top_k,
+        data_type=test_config.get("data_type", "random"),
     )
 
-    torch_indices_short = torch.empty(
-        (num_rows_short, top_k), dtype=torch.int32, device="cuda"
+
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="This test requires CUDA")
+@pytest.mark.parametrize("top_k", [512, 2048])
+@torch.inference_mode()
+def test_persistent_topk_padded_stride(top_k: int) -> None:
+    """
+    Test persistent_topk with padded logits (large stride, small seq_len)
+    to simulate the e2e CUDAGraph scenario where fp8_paged_mqa_logits
+    returns [B, max_model_len] with max_model_len=163840.
+    """
+    set_random_seed(42)
+    torch.set_default_device("cuda:0")
+
+    batch_size = 4
+    padded_stride = 163840  # DeepSeek-V3.2 max_model_len
+    actual_seq_lens = [3000, 5000, 8000, 12000]
+
+    # Create padded logits tensor (like fp8_paged_mqa_logits output)
+    logits = torch.full(
+        (batch_size, padded_stride),
+        float("-inf"),
+        dtype=torch.float32,
+        device="cuda",
     )
-    for i in range(num_rows_short):
-        row_end = int(row_ends_short[i])
-        k_i = min(top_k, row_end)
-        idx = logits_short[i, :row_end].topk(k_i, dim=-1)[1]
-        torch_indices_short[i, :k_i] = idx
+    for i, sl in enumerate(actual_seq_lens):
+        logits[i, :sl] = torch.randn(sl, dtype=torch.float32, device="cuda")
 
-    assert compare_top_k_results(
-        logits_short,
-        indices_vllm,
-        torch_indices_short,
-        row_starts_short,
-        row_ends_short,
-        top_k,
-    ), "top_k_per_row_decode kernel (short sequences) doesn't match torch.topk"
+    lengths = torch.tensor(actual_seq_lens, dtype=torch.int32, device="cuda")
+    indices = torch.empty((batch_size, top_k), dtype=torch.int32, device="cuda")
+    workspace = torch.empty(1024 * 1024, dtype=torch.uint8, device="cuda")
 
-    torch_indices_long = torch.empty(
-        (num_rows_long, top_k), dtype=torch.int32, device="cuda"
+    torch.ops._C.persistent_topk(
+        logits, lengths, indices, workspace, top_k, max(actual_seq_lens)
     )
-    for i in range(num_rows_long):
-        row_end = int(row_ends_long[i])
-        k_i = min(top_k, row_end)
-        idx = logits_long[i, :row_end].topk(k_i, dim=-1)[1]
-        torch_indices_long[i, :k_i] = idx
+    torch.accelerator.synchronize()
 
-    assert compare_top_k_results(
-        logits_long, indices, torch_indices_long, row_starts_long, row_ends_long, top_k
-    ), "large_context_topk kernel (long sequences) doesn't match torch.topk"
+    # Validate against torch.topk
+    for i in range(batch_size):
+        sl = actual_seq_lens[i]
+        k_i = min(top_k, sl)
+        expected = logits[i, :sl].topk(k_i, dim=-1)[1].cpu()
+        actual = indices[i, :k_i].cpu()
+
+        expected_set = set(expected.tolist())
+        actual_set = set(actual.tolist())
+
+        if expected_set != actual_set:
+            # Allow ties
+            expected_vals = logits[i, expected].cpu().sort(descending=True)[0]
+            actual_vals = logits[i, actual].cpu().sort(descending=True)[0]
+            assert torch.allclose(expected_vals, actual_vals, rtol=1e-4, atol=1e-4), (
+                f"Row {i}: persistent_topk with padded stride doesn't match. "
+                f"seq_len={sl}, stride={padded_stride}"
+            )
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index b97a9a0ea274..dea54ed21aea 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -3,6 +3,7 @@
 
 import tempfile
 from collections import OrderedDict
+from importlib import reload
 from unittest.mock import MagicMock
 
 import pytest
@@ -43,15 +44,25 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
         cleanup_dist_env_and_memory(shutdown_ray=True)
 
 
+@pytest.fixture
+def maybe_enable_lora_dual_stream(monkeypatch: pytest.MonkeyPatch):
+    if current_platform.is_cuda():
+        monkeypatch.setenv("VLLM_LORA_ENABLE_DUAL_STREAM", "1")
+        import vllm.lora.layers.base_linear
+
+        if not hasattr(vllm.lora.layers.base_linear, "lora_linear_async"):
+            # Reload the module to ensure the environment variable takes effect.
+            reload(vllm.lora.layers.base_linear)
+    yield
+
+
 @pytest.fixture
 def dist_init():
     from tests.utils import ensure_current_vllm_config
 
     temp_file = tempfile.mkstemp()[1]
 
-    backend = "nccl"
-    if current_platform.is_cpu() or current_platform.is_tpu():
-        backend = "gloo"
+    backend = "gloo" if current_platform.is_tpu() else current_platform.dist_backend
 
     with ensure_current_vllm_config():
         init_distributed_environment(
@@ -70,9 +81,7 @@ def dist_init():
 def dist_init_torch_only():
     if torch.distributed.is_initialized():
         return
-    backend = "nccl"
-    if current_platform.is_cpu():
-        backend = "gloo"
+    backend = current_platform.dist_backend
 
     temp_file = tempfile.mkstemp()[1]
     torch.distributed.init_process_group(
@@ -304,6 +313,16 @@ def qwen35_vl_lora_files():
     return snapshot_download(repo_id="jeeejeee/qwen35-4b-all-linear-pokemon-lora")
 
 
+@pytest.fixture(scope="session")
+def qwen36_moe_2d_lora_files():
+    return snapshot_download(repo_id="jeeejeee/qwen36-35ba3b-2d-weights-poken-lora")
+
+
+@pytest.fixture(scope="session")
+def qwen36_moe_3d_lora_files():
+    return snapshot_download(repo_id="jeeejeee/qwen36-35ba3b-moe-all-linear-poken-lora")
+
+
 @pytest.fixture
 def reset_default_device():
     """
diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py
index 8f42243387d2..ace4fb5f50ef 100644
--- a/tests/lora/test_chatglm3_tp.py
+++ b/tests/lora/test_chatglm3_tp.py
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import pytest
+
 import vllm
 import vllm.config
 from vllm.lora.request import LoRARequest
+from vllm.platforms import current_platform
 
 from ..utils import create_new_process_for_each_test, multi_gpu_test
 
@@ -50,6 +53,9 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
     return generated_texts
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @create_new_process_for_each_test()
 def test_chatglm3_lora(chatglm3_lora_files):
     llm = vllm.LLM(
@@ -70,6 +76,9 @@ def test_chatglm3_lora(chatglm3_lora_files):
         assert output2[i] == EXPECTED_LORA_OUTPUT[i]
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @multi_gpu_test(num_gpus=4)
 def test_chatglm3_lora_tp4(chatglm3_lora_files):
     llm = vllm.LLM(
diff --git a/tests/lora/test_default_mm_loras.py b/tests/lora/test_default_mm_loras.py
index c76d3c6e798e..673e8e85555a 100644
--- a/tests/lora/test_default_mm_loras.py
+++ b/tests/lora/test_default_mm_loras.py
@@ -11,6 +11,7 @@
 from huggingface_hub import snapshot_download
 
 from vllm.lora.request import LoRARequest
+from vllm.platforms import current_platform
 
 from ..conftest import AudioTestAssets, VllmRunner
 from ..utils import create_new_process_for_each_test
@@ -76,6 +77,9 @@ def test_active_default_mm_lora(
     )
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @create_new_process_for_each_test()
 def test_inactive_default_mm_lora(
     vllm_runner: type[VllmRunner],
@@ -92,6 +96,9 @@ def test_inactive_default_mm_lora(
     )
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @create_new_process_for_each_test()
 def test_default_mm_lora_succeeds_with_redundant_lora_request(
     vllm_runner: type[VllmRunner],
@@ -107,6 +114,9 @@ def test_default_mm_lora_succeeds_with_redundant_lora_request(
     )
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @create_new_process_for_each_test()
 def test_default_mm_lora_fails_with_overridden_lora_request(
     vllm_runner: type[VllmRunner],
diff --git a/tests/lora/test_fused_moe_lora_kernel.py b/tests/lora/test_fused_moe_lora_kernel.py
index 66a985a067e9..e50d7d5aacfe 100644
--- a/tests/lora/test_fused_moe_lora_kernel.py
+++ b/tests/lora/test_fused_moe_lora_kernel.py
@@ -637,7 +637,7 @@ def _get_shard_slice(shard_size):
 
     set_random_seed(seed)
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = torch.device(f"{DEVICE_TYPE}:{local_rank}")
     torch.accelerator.set_device_index(device)
     torch.set_default_device(device)
     torch.set_default_dtype(dtype)
@@ -647,6 +647,7 @@ def _get_shard_slice(shard_size):
         rank=local_rank,
         local_rank=local_rank,
         distributed_init_method=init_method,
+        backend=current_platform.dist_backend,
     )
     with ensure_current_vllm_config():
         initialize_model_parallel(world_size, 1)
diff --git a/tests/lora/test_gptoss_tp.py b/tests/lora/test_gptoss_tp.py
index 68dd87233ac0..648660734655 100644
--- a/tests/lora/test_gptoss_tp.py
+++ b/tests/lora/test_gptoss_tp.py
@@ -129,6 +129,7 @@ def test_gpt_oss_lora_tp2(
             tensor_parallel_size=2,
             gpu_memory_utilization=0.8,
             fully_sharded_loras=fully_sharded_loras,
+            enable_expert_parallel=not fully_sharded_loras,
             compilation_config=vllm.config.CompilationConfig(  # Avoid OOM
                 cudagraph_specialize_lora=False,
             ),
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 08fd037249ba..c366b2cf2976 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -44,6 +44,7 @@
     VocabParallelEmbedding,
     get_masked_input_and_mask,
 )
+from vllm.model_executor.models.deepseek_v2 import DeepSeekV2FusedQkvAProjLinear
 from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
 
@@ -56,13 +57,21 @@
 }
 
 pytestmark = pytest.mark.skipif(
-    not (current_platform.is_cuda_alike() or current_platform.is_cpu()),
+    not (
+        current_platform.is_cuda_alike()
+        or current_platform.is_cpu()
+        or current_platform.is_xpu()
+    ),
     reason="Backend not supported",
 )
 
+DEVICE_TYPE = current_platform.device_type
 DEVICES = (
-    [f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
-    if current_platform.is_cuda_alike()
+    [
+        f"{DEVICE_TYPE}:{i}"
+        for i in range(1 if torch.accelerator.device_count() == 1 else 2)
+    ]
+    if (current_platform.is_cuda_alike() or current_platform.is_xpu())
     else ["cpu"]
 )
 
@@ -91,7 +100,7 @@ def skip_cuda_with_stage_false(request):
     On cuda-like platforms, we use the same kernels for prefill and decode
     stage, and 'stage' is generally ignored, so we only need to test once.
     """
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         try:
             if hasattr(request.node, "callspec") and hasattr(
                 request.node.callspec, "params"
@@ -196,7 +205,7 @@ def create_random_inputs(
     input_size: tuple[int, ...],
     input_range: tuple[float, float],
     input_type: torch.dtype = torch.int,
-    device: torch.device = "cuda",
+    device: torch.device = DEVICE_TYPE,
 ) -> tuple[list[torch.Tensor], list[int], list[int]]:
     """Creates random inputs.
 
@@ -244,6 +253,10 @@ def check_punica_wrapper(punica_wrapper) -> bool:
         from vllm.lora.punica_wrapper.punica_cpu import PunicaWrapperCPU
 
         return type(punica_wrapper) is PunicaWrapperCPU
+    elif current_platform.is_xpu():
+        from vllm.lora.punica_wrapper.punica_xpu import PunicaWrapperXPU
+
+        return type(punica_wrapper) is PunicaWrapperXPU
     else:
         return False
 
@@ -259,7 +272,7 @@ def test_embeddings(
     # For multi-GPU testing of Triton kernel, we must explicitly set the CUDA
     # device, see: https://github.com/triton-lang/triton/issues/2925
     # Same below.
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     torch.set_default_device(device)
@@ -358,7 +371,7 @@ def create_random_embedding_layer():
 def test_lm_head_logits_processor(
     default_vllm_config, dist_init, num_loras, device, vocab_size, stage
 ) -> None:
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     torch.set_default_device(device)
@@ -475,7 +488,7 @@ def test_lm_head_logits_processor_invalid_vocab_size(
     default_vllm_config, dist_init, vocab_size, device
 ) -> None:
     """Test that LogitsProcessorWithLoRA raises ValueError for invalid vocab sizes."""
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     torch.set_default_device(device)
@@ -504,7 +517,7 @@ def test_linear_replicated(
     device,
     stage,
 ) -> None:
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     max_loras = 8
@@ -517,8 +530,10 @@ def test_linear_replicated(
     punica_wrapper = get_punica_wrapper(8192, 256, device, lora_config=lora_config)
     assert check_punica_wrapper(punica_wrapper)
 
-    def create_random_linear_replicated_layer():
-        linear = ReplicatedLinear(4096, 4096, bias=False, params_dtype=torch.float16)
+    def create_random_linear_replicated_layer(idx: int = 0):
+        linear = ReplicatedLinear(
+            4096, 4096, bias=False, params_dtype=torch.float16, prefix=f"layer_{idx}"
+        )
         linear.weight.data = torch.rand_like(linear.weight.data)
         lora_linear = ReplicatedLinearWithLoRA(linear)
 
@@ -535,7 +550,7 @@ def create_random_linear_replicated_layer():
         set_random_seed(i)
 
         id_to_index = get_random_id_to_index(num_loras, max_loras)
-        linear, lora_linear = create_random_linear_replicated_layer()
+        linear, lora_linear = create_random_linear_replicated_layer(i)
         assert torch.equal(linear.weight, lora_linear.weight)
         lora_linear.set_mapping(punica_wrapper)
         lora_dict, _ = populate_loras(
@@ -611,7 +626,7 @@ def create_random_linear_replicated_layer():
 def test_linear_parallel(
     default_vllm_config, dist_init, num_loras, orientation, fully_shard, device, stage
 ) -> None:
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     max_loras = 8
@@ -625,10 +640,14 @@ def test_linear_parallel(
     punica_wrapper = get_punica_wrapper(8192, 256, device, lora_config=lora_config)
     assert check_punica_wrapper(punica_wrapper)
 
-    def create_random_linear_parallel_layer():
+    def create_random_linear_parallel_layer(idx: int = 0):
         if orientation == "row":
             linear = RowParallelLinear(
-                4096, 4096, bias=False, params_dtype=torch.float16
+                4096,
+                4096,
+                bias=False,
+                params_dtype=torch.float16,
+                prefix=f"layer_{idx}",
             )
             linear.weight.data = torch.rand_like(linear.weight.data)
             lora_linear = (
@@ -638,7 +657,11 @@ def create_random_linear_parallel_layer():
             )
         else:
             linear = ColumnParallelLinear(
-                4096, 4096, bias=False, params_dtype=torch.float16
+                4096,
+                4096,
+                bias=False,
+                params_dtype=torch.float16,
+                prefix=f"layer_{idx}",
             )
             linear.weight.data = torch.rand_like(linear.weight.data)
             lora_linear = (
@@ -660,7 +683,7 @@ def create_random_linear_parallel_layer():
         set_random_seed(i)
 
         id_to_index = get_random_id_to_index(num_loras, max_loras)
-        linear, lora_linear = create_random_linear_parallel_layer()
+        linear, lora_linear = create_random_linear_parallel_layer(i)
         assert torch.equal(linear.weight, lora_linear.weight)
         lora_linear.set_mapping(punica_wrapper)
         lora_dict, _ = populate_loras(
@@ -736,7 +759,7 @@ def create_random_linear_parallel_layer():
 def test_column_parallel_packed(
     default_vllm_config, dist_init, num_loras, repeats, fully_shard, device, stage
 ) -> None:
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     max_loras = 8
@@ -750,10 +773,14 @@ def test_column_parallel_packed(
     punica_wrapper = get_punica_wrapper(8192, 256, device, lora_config=lora_config)
     assert check_punica_wrapper(punica_wrapper)
 
-    def create_column_parallel_packed_layer():
+    def create_column_parallel_packed_layer(idx: int = 0):
         if repeats == 2:
             linear = MergedColumnParallelLinear(
-                4096, [4096] * repeats, bias=False, params_dtype=torch.float16
+                4096,
+                [4096] * repeats,
+                bias=False,
+                params_dtype=torch.float16,
+                prefix=f"layer_{idx}",
             )
             linear.weight.data = torch.rand_like(linear.weight.data)
             lora_linear = (
@@ -763,7 +790,12 @@ def create_column_parallel_packed_layer():
             )
         elif repeats == 3:
             linear = QKVParallelLinear(
-                4096, 64, 32, bias=False, params_dtype=torch.float16
+                4096,
+                64,
+                32,
+                bias=False,
+                params_dtype=torch.float16,
+                prefix=f"layer_{idx}",
             )
             linear.weight.data = torch.rand_like(linear.weight.data)
             lora_linear = (
@@ -773,7 +805,12 @@ def create_column_parallel_packed_layer():
             )
         else:
             linear = QKVParallelLinear(
-                4096, 64, 32, bias=False, params_dtype=torch.float16
+                4096,
+                64,
+                32,
+                bias=False,
+                params_dtype=torch.float16,
+                prefix=f"layer_{idx}",
             )
             linear.weight.data = torch.rand_like(linear.weight.data)
             lora_linear = (
@@ -806,7 +843,7 @@ class FakeConfig:
 
         id_to_index = get_random_id_to_index(num_loras, max_loras)
 
-        linear, lora_linear = create_column_parallel_packed_layer()
+        linear, lora_linear = create_column_parallel_packed_layer(i)
         assert torch.equal(linear.weight, lora_linear.weight)
         lora_linear.set_mapping(punica_wrapper)
         lora_dict, sublora_dict = populate_loras(
@@ -884,7 +921,7 @@ class FakeConfig:
 def test_merged_column_parallel_variable_slice(
     default_vllm_config, dist_init, num_loras, num_slices, device, stage
 ) -> None:
-    if current_platform.is_cuda_alike():
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
         torch.accelerator.set_device_index(device)
 
     max_loras = 8
@@ -898,10 +935,14 @@ def test_merged_column_parallel_variable_slice(
     output_sizes = [1024 + i * 256 for i in range(num_slices)]
     total_output = sum(output_sizes)
 
-    def create_layer():
+    def create_layer(idx: int = 0):
         # Create linear layer
         linear = MergedColumnParallelLinear(
-            4096, output_sizes, bias=False, params_dtype=torch.float16
+            4096,
+            output_sizes,
+            bias=False,
+            params_dtype=torch.float16,
+            prefix=f"layer_{idx}",
         )
         linear.weight.data = torch.rand_like(linear.weight.data)
 
@@ -913,7 +954,7 @@ def create_layer():
     for i in range(NUM_RANDOM_SEEDS):
         set_random_seed(i)
         id_to_index = get_random_id_to_index(num_loras, max_loras)
-        linear, lora_linear = create_layer()
+        linear, lora_linear = create_layer(i)
         lora_linear.set_mapping(punica_wrapper)
 
         # Populate LoRA weights
@@ -1390,7 +1431,107 @@ def test_variable_slice_lora_class_selection(default_vllm_config, dist_init):
         f"for 2 packed modules, got {type(selected_layer_merged).__name__}"
     )
 
-    # Case 5: Plain ColumnParallelLinear (not merged) - common in many models
+    fully_sharded_tp_lora_config = LoRAConfig(
+        max_loras=8,
+        max_lora_rank=16,
+        lora_dtype=torch.float16,
+        fully_sharded_loras=True,
+    )
+    fully_sharded_tp_layer = MergedColumnParallelLinear(
+        4096, [2048, 2048], bias=False, params_dtype=torch.float16
+    )
+    fully_sharded_tp_layer.tp_size = 2
+
+    assert not MergedColumnParallelLinearWithLoRA.can_replace_layer(
+        source_layer=fully_sharded_tp_layer,
+        lora_config=fully_sharded_tp_lora_config,
+        packed_modules_list=packed_modules_two,
+    ), "Generic merged wrapper should reject fully sharded TP layers"
+
+    assert MergedColumnParallelLinearWithShardedLoRA.can_replace_layer(
+        source_layer=fully_sharded_tp_layer,
+        lora_config=fully_sharded_tp_lora_config,
+        packed_modules_list=packed_modules_two,
+    ), "Sharded merged wrapper should remain eligible for fully sharded TP layers"
+
+    selected_fully_sharded_tp_layer = from_layer(
+        fully_sharded_tp_layer,
+        max_loras=8,
+        lora_config=fully_sharded_tp_lora_config,
+        packed_modules_list=packed_modules_two,
+    )
+    assert isinstance(
+        selected_fully_sharded_tp_layer,
+        MergedColumnParallelLinearWithShardedLoRA,
+    ), (
+        "from_layer should select MergedColumnParallelLinearWithShardedLoRA "
+        "for fully sharded TP merged layers, got "
+        f"{type(selected_fully_sharded_tp_layer).__name__}"
+    )
+
+    # Case 5: DeepSeek's fused_qkv_a_proj should reuse the generic merged
+    # wrapper while preserving its custom base forward path.
+    deepseek_fused_layer = DeepSeekV2FusedQkvAProjLinear(
+        4096, [2048, 2048], prefix="model.layers.0.self_attn.fused_qkv_a_proj"
+    )
+    selected_deepseek_layer = from_layer(
+        deepseek_fused_layer,
+        max_loras=8,
+        lora_config=lora_config,
+        packed_modules_list=packed_modules_two,
+    )
+    assert isinstance(selected_deepseek_layer, MergedColumnParallelLinearWithLoRA), (
+        "from_layer should select MergedColumnParallelLinearWithLoRA "
+        f"for DeepSeek fused_qkv_a_proj, got {type(selected_deepseek_layer).__name__}"
+    )
+
+    fully_sharded_lora_config = LoRAConfig(
+        max_loras=8,
+        max_lora_rank=16,
+        lora_dtype=torch.float16,
+        fully_sharded_loras=True,
+    )
+    selected_fully_sharded_deepseek_layer = from_layer(
+        deepseek_fused_layer,
+        max_loras=8,
+        lora_config=fully_sharded_lora_config,
+        packed_modules_list=packed_modules_two,
+    )
+    assert isinstance(
+        selected_fully_sharded_deepseek_layer,
+        MergedColumnParallelLinearWithLoRA,
+    ), (
+        "from_layer should keep using MergedColumnParallelLinearWithLoRA "
+        "for fused_qkv_a_proj when the base layer is effectively unsharded, got "
+        f"{type(selected_fully_sharded_deepseek_layer).__name__}"
+    )
+
+    # Case 6: Generic subclass of MergedColumnParallelLinear with 2 packed
+    # modules should still use the generic merged wrapper.
+    class CustomMergedColumnParallelLinear(MergedColumnParallelLinear):
+        pass
+
+    custom_merged_layer = CustomMergedColumnParallelLinear(
+        4096, [2048, 2048], bias=False, params_dtype=torch.float16
+    )
+    assert MergedColumnParallelLinearWithLoRA.can_replace_layer(
+        source_layer=custom_merged_layer,
+        lora_config=lora_config,
+        packed_modules_list=packed_modules_two,
+    ), "MergedColumnParallelLinearWithLoRA should handle subclasses"
+
+    selected_custom_layer = from_layer(
+        custom_merged_layer,
+        max_loras=8,
+        lora_config=lora_config,
+        packed_modules_list=packed_modules_two,
+    )
+    assert isinstance(selected_custom_layer, MergedColumnParallelLinearWithLoRA), (
+        f"from_layer should select MergedColumnParallelLinearWithLoRA "
+        f"for subclassed merged layers, got {type(selected_custom_layer).__name__}"
+    )
+
+    # Case 7: Plain ColumnParallelLinear (not merged) - common in many models
     # -> ColumnParallelLinearWithLoRA should be selected
     plain_column_parallel = ColumnParallelLinear(
         4096, 4096, bias=False, params_dtype=torch.float16
@@ -1423,7 +1564,7 @@ def test_variable_slice_lora_class_selection(default_vllm_config, dist_init):
         f"for plain ColumnParallelLinear, got {type(selected_plain).__name__}"
     )
 
-    # Case 6: MergedColumnParallelLinear with exactly 2 output sizes
+    # Case 8: MergedColumnParallelLinear with exactly 2 output sizes
     # and empty packed_modules_list
     # -> ColumnParallelLinearWithLoRA should NOT match (packed_modules_list != 1)
     # -> MergedColumnParallelLinearVariableSliceWithLoRA should NOT match (< 3 slices)
@@ -1441,3 +1582,178 @@ def test_variable_slice_lora_class_selection(default_vllm_config, dist_init):
         "MergedColumnParallelLinearVariableSliceWithLoRA "
         "should NOT handle 2 slices even with empty packed_modules_list"
     )
+
+
+@pytest.mark.parametrize(
+    "wrapper_cls",
+    [ColumnParallelLinearWithLoRA, ColumnParallelLinearWithShardedLoRA],
+)
+def test_get_and_maybe_dequant_weights_accepts_lora_wrappers(dist_init, wrapper_cls):
+    from vllm.model_executor.layers.quantization.utils.quant_utils import (
+        get_and_maybe_dequant_weights,
+    )
+
+    linear = ColumnParallelLinear(4096, 4096, bias=False, params_dtype=torch.float16)
+    lora_linear = wrapper_cls(linear)
+
+    # Should work with LoRA wrappers and return [out, in] weights.
+    dequant_weight = get_and_maybe_dequant_weights(lora_linear, out_dtype=torch.float16)
+    assert dequant_weight.shape == linear.weight.shape
+
+
+@torch.inference_mode()
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("stage", STAGES)
+@pytest.mark.parametrize("fully_sharded", [False, True])
+def test_deepseek_fused_qkv_a_proj_lora_preserves_base_forward(
+    default_vllm_config, dist_init, device, stage, fully_sharded
+):
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
+        torch.accelerator.set_device_index(device)
+
+    torch.set_default_device(device)
+    dtype = (
+        torch.float16
+        if (current_platform.is_cuda_alike() or current_platform.is_xpu())
+        else torch.float32
+    )
+    max_loras = 8
+    lora_config = LoRAConfig(
+        max_loras=max_loras,
+        max_lora_rank=8,
+        lora_dtype=dtype,
+        fully_sharded_loras=fully_sharded,
+    )
+    punica_wrapper = get_punica_wrapper(8192, 256, device, lora_config=lora_config)
+    assert check_punica_wrapper(punica_wrapper)
+
+    class OffsetDeepSeekFusedQkvAProjLinear(DeepSeekV2FusedQkvAProjLinear):
+        def forward(self, input_):
+            output, output_bias = super().forward(input_)
+            return output + 1, output_bias
+
+    layer = OffsetDeepSeekFusedQkvAProjLinear(
+        32, [16, 16], prefix="model.layers.0.self_attn.fused_qkv_a_proj"
+    )
+    layer.weight.data = torch.rand_like(layer.weight.data, dtype=dtype)
+
+    lora_layer = MergedColumnParallelLinearWithLoRA(layer)
+    lora_layer.create_lora_weights(max_loras, lora_config)
+    lora_layer.set_mapping(punica_wrapper)
+
+    id_to_index = get_random_id_to_index(1, max_loras, log=False)
+    active_slot = next(i for i, lora_id in enumerate(id_to_index) if lora_id == 1)
+    lora_a = [
+        torch.rand(8, 32, dtype=dtype, device=device),
+        torch.rand(8, 32, dtype=dtype, device=device),
+    ]
+    lora_b = [
+        torch.rand(16, 8, dtype=dtype, device=device),
+        torch.rand(16, 8, dtype=dtype, device=device),
+    ]
+    lora_layer.set_lora(active_slot, lora_a=lora_a, lora_b=lora_b)
+
+    inputs, index_mapping, prompt_mapping = create_random_inputs(
+        active_lora_ids=[1],
+        num_inputs=4,
+        input_size=(1, 32),
+        input_range=(0, 1),
+        input_type=dtype,
+        device=device,
+    )
+    lora_mapping = LoRAMapping(index_mapping, prompt_mapping, is_prefill=stage)
+    punica_wrapper.update_metadata(lora_mapping, id_to_index, max_loras, 512)
+
+    lora_result = lora_layer(torch.cat(inputs))[0]
+
+    expected_results = []
+    for input_ in inputs:
+        result = layer(input_)[0]
+        result[:, :16] += input_ @ lora_a[0].T @ lora_b[0].T
+        result[:, 16:] += input_ @ lora_a[1].T @ lora_b[1].T
+        expected_results.append(result)
+
+    rtol, atol = TOLERANCES[lora_result.dtype]
+    torch.testing.assert_close(
+        lora_result, torch.cat(expected_results), rtol=rtol, atol=atol
+    )
+
+    merged_layer = OffsetDeepSeekFusedQkvAProjLinear(
+        32, [16, 16], prefix="model.layers.0.self_attn.fused_qkv_a_proj"
+    )
+    merged_layer.weight.data = layer.weight.data.clone()
+    merged_layer.weight.data[:16].add_(lora_b[0] @ lora_a[0])
+    merged_layer.weight.data[16:].add_(lora_b[1] @ lora_a[1])
+    merged_result = merged_layer(torch.cat(inputs))[0]
+
+    torch.testing.assert_close(lora_result, merged_result, rtol=rtol, atol=atol)
+
+
+@torch.inference_mode()
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("stage", STAGES)
+def test_replicated_lora_preserves_base_forward_for_subclasses(
+    default_vllm_config, dist_init, device, stage
+):
+    if current_platform.is_cuda_alike() or current_platform.is_xpu():
+        torch.accelerator.set_device_index(device)
+
+    torch.set_default_device(device)
+    dtype = (
+        torch.float16
+        if current_platform.is_cuda_alike() or current_platform.is_xpu()
+        else torch.float32
+    )
+    max_loras = 8
+    lora_config = LoRAConfig(max_loras=max_loras, max_lora_rank=8, lora_dtype=dtype)
+    punica_wrapper = get_punica_wrapper(8192, 256, device, lora_config=lora_config)
+    assert check_punica_wrapper(punica_wrapper)
+
+    class OffsetReplicatedLinear(ReplicatedLinear):
+        def forward(self, input_):
+            output, output_bias = super().forward(input_)
+            return output + 1, output_bias
+
+    layer = OffsetReplicatedLinear(32, 16, bias=False, params_dtype=dtype)
+    layer.weight.data = torch.rand_like(layer.weight.data, dtype=dtype)
+
+    lora_layer = ReplicatedLinearWithLoRA(layer)
+    lora_layer.create_lora_weights(max_loras, lora_config)
+    lora_layer.set_mapping(punica_wrapper)
+
+    id_to_index = get_random_id_to_index(1, max_loras, log=False)
+    active_slot = next(i for i, lora_id in enumerate(id_to_index) if lora_id == 1)
+    lora_a = torch.rand(8, 32, dtype=dtype, device=device)
+    lora_b = torch.rand(16, 8, dtype=dtype, device=device)
+    lora_layer.set_lora(active_slot, lora_a=lora_a, lora_b=lora_b)
+
+    inputs, index_mapping, prompt_mapping = create_random_inputs(
+        active_lora_ids=[1],
+        num_inputs=4,
+        input_size=(1, 32),
+        input_range=(0, 1),
+        input_type=dtype,
+        device=device,
+    )
+    lora_mapping = LoRAMapping(index_mapping, prompt_mapping, is_prefill=stage)
+    punica_wrapper.update_metadata(lora_mapping, id_to_index, max_loras, 512)
+
+    lora_result = lora_layer(torch.cat(inputs))[0]
+
+    expected_results = []
+    for input_ in inputs:
+        result = layer(input_)[0]
+        result += input_ @ lora_a.T @ lora_b.T
+        expected_results.append(result)
+
+    rtol, atol = TOLERANCES[lora_result.dtype]
+    torch.testing.assert_close(
+        lora_result, torch.cat(expected_results), rtol=rtol, atol=atol
+    )
+
+    merged_layer = OffsetReplicatedLinear(32, 16, bias=False, params_dtype=dtype)
+    merged_layer.weight.data = layer.weight.data.clone()
+    merged_layer.weight.data.add_(lora_b @ lora_a)
+    merged_result = merged_layer(torch.cat(inputs))[0]
+
+    torch.testing.assert_close(lora_result, merged_result, rtol=rtol, atol=atol)
diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py
index 483235ff5129..42f6ddc2f690 100644
--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -10,6 +10,7 @@
 from vllm import LLM
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
+from vllm.platforms import current_platform
 
 from ..utils import VLLM_PATH, create_new_process_for_each_test, multi_gpu_test
 
@@ -139,6 +140,9 @@ def test_llama_lora(llama32_lora_files, cudagraph_specialize_lora: bool):
     generate_and_test(llm, llama32_lora_files)
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @multi_gpu_test(num_gpus=4)
 def test_llama_lora_tp4(llama32_lora_files):
     llm = vllm.LLM(
@@ -184,7 +188,7 @@ def test_tp2_serialize_and_deserialize_lora(
         result = subprocess.run(
             [
                 sys.executable,
-                f"{VLLM_PATH}/examples/others/tensorize_vllm_model.py",
+                f"{VLLM_PATH}/examples/features/tensorize_vllm_model.py",
                 "--model",
                 MODEL_PATH,
                 "--lora-path",
@@ -224,6 +228,10 @@ def test_tp2_serialize_and_deserialize_lora(
         max_model_len=1024,
         tensor_parallel_size=2,
         max_loras=2,
+        # Leave headroom for LoRA adapter loading and Triton JIT
+        # compilation, which can allocate GPU memory concurrently
+        # during the first inference step.
+        gpu_memory_utilization=0.85,
     )
 
     tc_as_dict = tensorizer_config.to_serializable()
diff --git a/tests/lora/test_lora_checkpoints.py b/tests/lora/test_lora_checkpoints.py
index e6816e83da00..7c263e2a2276 100644
--- a/tests/lora/test_lora_checkpoints.py
+++ b/tests/lora/test_lora_checkpoints.py
@@ -5,7 +5,9 @@
 
 from vllm.lora.lora_model import LoRAModel
 from vllm.lora.peft_helper import PEFTHelper
+from vllm.lora.utils import parse_fine_tuned_lora_name
 from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
+from vllm.model_executor.models.gemma4 import Gemma4ForCausalLM
 from vllm.model_executor.models.utils import WeightsMapper
 
 lora_lst = ["baichuan7B", "baichuan7B-zero", "baichuan7B-zero-regex", "chatglm3-6b"]
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
     for name in lora_model.loras:
         assert name.startswith(hf_to_vllm_mapper.orig_to_new_prefix["model."])
         assert ".baichuan_layers." in name
+
+
+def test_gemma4_lora_weights_mapping():
+    mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
+    name = "base_model.model.model.language_model.layers.9.mlp.down_proj.lora_A.weight"
+    assert parse_fine_tuned_lora_name(name, mapper) == (
+        "model.layers.9.mlp.down_proj",
+        True,
+    )
+
+
+def test_gemma4_moe_lora_weights_mapping():
+    mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
+    name = (
+        "base_model.model.model.language_model.layers.9.moe.experts."
+        "gate_up_proj.lora_B.weight"
+    )
+    assert parse_fine_tuned_lora_name(name, mapper) == (
+        "model.layers.9.moe.gate_up_proj",
+        False,
+    )
diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py
index e7addab119df..49436d662431 100644
--- a/tests/lora/test_lora_manager.py
+++ b/tests/lora/test_lora_manager.py
@@ -13,6 +13,7 @@
 from vllm.lora.layers import (
     ColumnParallelLinearWithLoRA,
     MergedColumnParallelLinearWithLoRA,
+    ReplicatedLinearWithLoRA,
     RowParallelLinearWithLoRA,
 )
 from vllm.lora.lora_model import LoRAModel
@@ -26,6 +27,7 @@
 from vllm.lora.peft_helper import PEFTHelper
 from vllm.lora.request import LoRARequest
 from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager, WorkerLoRAManager
+from vllm.model_executor.layers.fused_moe import GateLinear
 from vllm.platforms import current_platform
 
 from .utils import create_peft_lora
@@ -35,10 +37,10 @@
     "lm_head": "output_embeddings",
 }
 
-
+DEVICE_TYPE = current_platform.device_type
 DEVICES = (
-    [f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
-    if current_platform.is_cuda_alike()
+    [f"{DEVICE_TYPE}:{i}" for i in range(min(torch.accelerator.device_count(), 2))]
+    if (current_platform.is_cuda_alike() or current_platform.is_xpu())
     else ["cpu"]
 )
 
@@ -132,6 +134,229 @@ def test_replace_submodules(default_vllm_config, dist_init, dummy_model):
     assert isinstance(model.get_submodule("layer1.dense2"), RowParallelLinearWithLoRA)
 
 
+def test_wrap_replicated_linear_subclasses(default_vllm_config, dist_init, dummy_model):
+    from vllm.model_executor.layers.linear import ReplicatedLinear
+
+    class CustomReplicatedLinear(ReplicatedLinear):
+        pass
+
+    model = dummy_model
+    model.add_module("custom_gate", CustomReplicatedLinear(10, 10, bias=False))
+
+    manager = LoRAModelManager(
+        model,
+        1,
+        1,
+        1,
+        LoRAConfig(
+            max_lora_rank=8, max_cpu_loras=8, max_loras=8, lora_dtype=DEFAULT_DTYPE
+        ),
+        torch.device(DEVICES[0]),
+    )
+
+    assert isinstance(
+        manager.model.get_submodule("custom_gate"), ReplicatedLinearWithLoRA
+    )
+
+
+def test_wrap_gate_linear(default_vllm_config, dist_init, dummy_model):
+    model = dummy_model
+    model.add_module("router_gate", GateLinear(10, 4, bias=False))
+
+    manager = LoRAModelManager(
+        model,
+        1,
+        1,
+        1,
+        LoRAConfig(
+            max_lora_rank=8, max_cpu_loras=8, max_loras=8, lora_dtype=DEFAULT_DTYPE
+        ),
+        torch.device(DEVICES[0]),
+    )
+
+    assert isinstance(
+        manager.model.get_submodule("router_gate"), ReplicatedLinearWithLoRA
+    )
+
+
+def test_dedup_shared_module_across_paths(default_vllm_config, dist_init, dummy_model):
+    """A module reachable from two attribute paths (e.g. a MoE gate held
+    both directly on the block and inside its inner runner) must produce a
+    single LoRA wrapper. Both paths must end up pointing to that same
+    wrapper instance, and only the canonical path should live in
+    `manager.modules` — otherwise activate_adapter would call `reset_lora`
+    on the alias and clobber weights set under the canonical name.
+    """
+    from vllm.model_executor.layers.linear import ReplicatedLinear
+
+    class AliasContainer(nn.Module):
+        def __init__(self, gate: nn.Module):
+            super().__init__()
+            self.gate = gate  # canonical path: "moe.gate"
+
+            # Inner submodule holding the SAME gate instance under another
+            # path. This mirrors how FusedMoE.runner.gate references the
+            # block's gate in qwen3_moe.
+            class _Runner(nn.Module):
+                def __init__(self, g):
+                    super().__init__()
+                    self.gate = g  # alias path: "moe.runner.gate"
+
+            self.runner = _Runner(gate)
+
+    gate = ReplicatedLinear(10, 4, bias=False)
+    model = dummy_model
+    model.add_module("moe", AliasContainer(gate))
+
+    assert model.moe.gate is model.moe.runner.gate
+
+    manager = LoRAModelManager(
+        model,
+        1,
+        1,
+        1,
+        LoRAConfig(
+            max_lora_rank=8, max_cpu_loras=8, max_loras=8, lora_dtype=DEFAULT_DTYPE
+        ),
+        torch.device(DEVICES[0]),
+    )
+
+    canonical = manager.model.get_submodule("moe.gate")
+    alias = manager.model.get_submodule("moe.runner.gate")
+
+    # Same wrapper instance on both paths so forward through either side
+    # sees the LoRA-augmented module.
+    assert isinstance(canonical, ReplicatedLinearWithLoRA)
+    assert alias is canonical
+
+    # Only the canonical path is tracked as a LoRA target. Tracking the
+    # alias would cause activate_adapter to reset_lora on it after the
+    # canonical entry already populated the weights.
+    assert "moe.gate" in manager.modules
+    assert "moe.runner.gate" not in manager.modules
+
+
+def test_lm_head_exempt_from_dedup(default_vllm_config, dist_init, dummy_model):
+    """The dedup logic must NOT collapse `lm_head` even when it is reachable
+    from another attribute path (tied-embedding models do
+    `self.lm_head = self.model.embed_tokens`, sharing the same nn.Module
+    instance). The lm_head branch additionally rewires `logits_processor`
+    into a `LogitsProcessorWithLoRA`, so skipping it would silently break
+    LoRA on lm_head.
+    """
+    from vllm.lora.layers import LogitsProcessorWithLoRA
+
+    # Add a non-lm_head alias to the same module instance as lm_head. The
+    # dedup keys on id(module); without the lm_head exemption the alias
+    # would consume the wrapped_by_id slot first and lm_head would be
+    # silently skipped, so logits_processor would never be wrapped.
+    model = dummy_model
+    model.add_module("embed_tokens", model.lm_head)
+    assert model.embed_tokens is model.lm_head
+
+    manager = LoRAModelManager(
+        model,
+        1,
+        1,
+        1,
+        LoRAConfig(
+            max_lora_rank=8, max_cpu_loras=8, max_loras=8, lora_dtype=DEFAULT_DTYPE
+        ),
+        torch.device(DEVICES[0]),
+    )
+
+    # lm_head's special handling still ran: logits_processor got wrapped
+    # and the lm_head entry is tracked under self.modules.
+    assert isinstance(
+        manager.model.get_submodule("logits_processor"), LogitsProcessorWithLoRA
+    )
+    assert "lm_head" in manager.modules
+
+
+def test_skip_unsupported_matched_modules(default_vllm_config, dist_init, dummy_model):
+    class UnsupportedContainer(nn.Module):
+        def __init__(self):
+            super().__init__()
+            # This name matches a supported target suffix ("dense1"),
+            # but nn.Linear is not currently a LoRA-wrappable layer type.
+            self.dense1 = nn.Linear(10, 10, bias=False)
+
+    model = dummy_model
+    model.add_module("unsupported", UnsupportedContainer())
+
+    manager = LoRAModelManager(
+        model,
+        1,
+        1,
+        1,
+        LoRAConfig(
+            max_lora_rank=8, max_cpu_loras=8, max_loras=8, lora_dtype=DEFAULT_DTYPE
+        ),
+        torch.device(DEVICES[0]),
+    )
+
+    # Should not crash and should keep unsupported matched modules unchanged.
+    assert isinstance(manager.model.get_submodule("unsupported.dense1"), nn.Linear)
+    assert "unsupported.dense1" not in manager.modules
+
+
+def test_target_modules_fail_closed_on_unsupported_matched_modules(
+    default_vllm_config, dist_init, dummy_model
+):
+    class UnsupportedContainer(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.dense1 = nn.Linear(10, 10, bias=False)
+
+    model = dummy_model
+    model.add_module("unsupported", UnsupportedContainer())
+
+    with pytest.raises(ValueError, match="unsupported.dense1"):
+        LoRAModelManager(
+            model,
+            1,
+            1,
+            1,
+            LoRAConfig(
+                max_lora_rank=8,
+                max_cpu_loras=8,
+                max_loras=8,
+                lora_dtype=DEFAULT_DTYPE,
+                target_modules=["dense1"],
+            ),
+            torch.device(DEVICES[0]),
+        )
+
+
+def test_get_dummy_lora_warmup_rank_for_fully_sharded_moe():
+    manager = LoRAModelManager.__new__(LoRAModelManager)
+    manager.lora_config = LoRAConfig(
+        max_lora_rank=64,
+        max_cpu_loras=1,
+        max_loras=1,
+        lora_dtype=DEFAULT_DTYPE,
+        fully_sharded_loras=True,
+    )
+
+    class DummyModule:
+        def __init__(self, tp_size: int, fully_sharded: bool):
+            self.tp_size = tp_size
+            self.fully_sharded = fully_sharded
+
+    manager.modules = {
+        "model.layers.0.self_attn.q_proj": DummyModule(
+            tp_size=32,
+            fully_sharded=True,
+        ),
+        "model.layers.0.mlp.experts": DummyModule(
+            tp_size=32,
+            fully_sharded=True,
+        ),
+    }
+
+    assert manager.get_dummy_lora_warmup_rank(8) == 32
+
+
 @pytest.mark.parametrize("device", DEVICES)
 def test_lora_model_manager(default_vllm_config, dist_init, dummy_model, device):
     model = dummy_model
@@ -796,107 +1021,19 @@ def test_target_modules_none_uses_all(
 
 
 @pytest.mark.parametrize("device", DEVICES)
-def test_load_adapter_warns_on_unsupported_modules(
-    default_vllm_config, dist_init, dummy_model_gate_up, device, tmp_path
+def test_target_modules_match_packed_runtime_modules(
+    default_vllm_config, dist_init, dummy_model_gate_up, device
 ):
-    """Test that _load_adapter warns when a LoRA adapter contains modules
-    not in the model's supported LoRA target modules."""
-    from unittest.mock import patch
-
-    import vllm.lora.worker_manager as wm_module
-
-    lora_config = LoRAConfig(
-        max_lora_rank=8, max_cpu_loras=4, max_loras=4, lora_dtype=DEFAULT_DTYPE
-    )
-
-    dummy_lora_files = f"{tmp_path}/lora_adapter"
-    os.makedirs(dummy_lora_files, exist_ok=True)
-    create_peft_lora(
-        dummy_model_gate_up,
-        save_dir=dummy_lora_files,
-        target_modules=["layer1.dense1", "dense2"],
-        lora_dtype=DEFAULT_DTYPE,
-    )
-
-    model_config = ModelConfig(max_model_len=16)
-    vllm_config = VllmConfig(model_config=model_config, lora_config=lora_config)
-    vllm_config.scheduler_config.max_num_seqs = 4
-    vllm_config.scheduler_config.max_num_batched_tokens = 2
-
-    worker_manager = WorkerLoRAManager(vllm_config, device, EMBEDDING_MODULES)
-    worker_manager.vocab_size = dummy_model_gate_up.unpadded_vocab_size
-    worker_manager.create_lora_manager(dummy_model_gate_up)
-
-    # Patch from_local_checkpoint to inject an unsupported module
-    original_from_checkpoint = LoRAModel.from_local_checkpoint
-
-    def patched_from_checkpoint(*args, **kwargs):
-        lora = original_from_checkpoint(*args, **kwargs)
-        lora.loras["unsupported_module"] = LoRALayerWeights(
-            module_name="unsupported_module",
-            rank=8,
-            lora_alpha=16,
-            lora_a=torch.randn(8, 10),
-            lora_b=torch.randn(10, 8),
-        )
-        return lora
-
-    lora_request = LoRARequest("test", 1, dummy_lora_files)
-    with (
-        patch.object(LoRAModel, "from_local_checkpoint", patched_from_checkpoint),
-        patch.object(wm_module.logger, "warning_once") as mock_warning,
-    ):
-        worker_manager._load_adapter(lora_request)
-        warning_args = mock_warning.call_args_list
-        found = any("unsupported_module" in str(call) for call in warning_args)
-        assert found, (
-            f"Expected warning about 'unsupported_module', got: {warning_args}"
-        )
-
-
-@pytest.mark.parametrize("device", DEVICES)
-def test_load_adapter_warns_on_target_modules_restriction(
-    default_vllm_config, dist_init, dummy_model_gate_up, device, tmp_path
-):
-    """Test that _load_adapter warns when a LoRA adapter contains modules
-    excluded by the deployment-time target_modules restriction."""
-    from unittest.mock import patch
-
-    import vllm.lora.worker_manager as wm_module
-
-    # Restrict to only dense2 — adapter has dense1 which will be excluded
-    lora_config = LoRAConfig(
-        max_lora_rank=8,
-        max_cpu_loras=4,
-        max_loras=4,
-        lora_dtype=DEFAULT_DTYPE,
-        target_modules=["dense2"],
-    )
-
-    dummy_lora_files = f"{tmp_path}/lora_adapter"
-    os.makedirs(dummy_lora_files, exist_ok=True)
-    create_peft_lora(
+    """Packed runtime modules should be selected by their adapter-visible names."""
+    _test_target_modules(
         dummy_model_gate_up,
-        save_dir=dummy_lora_files,
-        target_modules=["layer1.dense1", "dense2"],
-        lora_dtype=DEFAULT_DTYPE,
+        ["gate_proj"],
+        device,
+        expected_lora=[("gate_up_proj", MergedColumnParallelLinearWithLoRA)],
+        expected_no_lora=[
+            ("dense1", ColumnParallelLinearWithLoRA),
+            ("dense2", RowParallelLinearWithLoRA),
+            ("layer1.dense1", ColumnParallelLinearWithLoRA),
+            ("layer1.dense2", RowParallelLinearWithLoRA),
+        ],
     )
-
-    model_config = ModelConfig(max_model_len=16)
-    vllm_config = VllmConfig(model_config=model_config, lora_config=lora_config)
-    vllm_config.scheduler_config.max_num_seqs = 4
-    vllm_config.scheduler_config.max_num_batched_tokens = 2
-
-    worker_manager = WorkerLoRAManager(vllm_config, device, EMBEDDING_MODULES)
-    worker_manager.vocab_size = dummy_model_gate_up.unpadded_vocab_size
-    worker_manager.create_lora_manager(dummy_model_gate_up)
-
-    lora_request = LoRARequest("test", 1, dummy_lora_files)
-    with patch.object(wm_module.logger, "warning_once") as mock_warning:
-        worker_manager._load_adapter(lora_request)
-        warning_args = mock_warning.call_args_list
-        # dense1 is supported by the model but excluded by target_modules
-        found = any("target_modules" in str(call) for call in warning_args)
-        assert found, (
-            f"Expected warning about target_modules restriction, got: {warning_args}"
-        )
diff --git a/tests/lora/test_lora_utils.py b/tests/lora/test_lora_utils.py
index da66aa60b0d8..603ec9297491 100644
--- a/tests/lora/test_lora_utils.py
+++ b/tests/lora/test_lora_utils.py
@@ -58,3 +58,24 @@ def test_exact_name_match(self):
 
     def test_exact_name_no_match(self):
         assert not is_in_target_modules("dense3", ["dense1", "dense2"])
+
+    def test_packed_parent_matches_child_target_modules(self):
+        assert is_in_target_modules(
+            "model.layers.0.mlp.gate_up_proj",
+            ["gate_proj", "up_proj"],
+            {"gate_up_proj": ["gate_proj", "up_proj"]},
+        )
+
+    def test_packed_child_matches_parent_target_modules(self):
+        assert is_in_target_modules(
+            "model.layers.0.mlp.gate_proj",
+            ["gate_up_proj"],
+            {"gate_up_proj": ["gate_proj", "up_proj"]},
+        )
+
+    def test_fused_parent_matches_child_target_modules(self):
+        assert is_in_target_modules(
+            "model.layers.0.self_attn.fused_qkv_a_proj",
+            ["q_a_proj", "kv_a_proj_with_mqa"],
+            {"fused_qkv_a_proj": ["q_a_proj", "kv_a_proj_with_mqa"]},
+        )
diff --git a/tests/lora/test_minicpmv_tp.py b/tests/lora/test_minicpmv_tp.py
index e430826461a1..0090f9c569b0 100644
--- a/tests/lora/test_minicpmv_tp.py
+++ b/tests/lora/test_minicpmv_tp.py
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from importlib.metadata import version
+
 import pytest
+from packaging.version import Version
 
 import vllm
 from vllm.assets.image import ImageAsset
@@ -10,6 +13,14 @@
 
 from ..utils import multi_gpu_test
 
+pytestmark = pytest.mark.skipif(
+    Version("5.0") <= Version(version("transformers")),
+    reason=(
+        "MiniCPMV custom processor uses tokenizer.im_start_id which is not "
+        "available on TokenizersBackend in transformers v5.0+"
+    ),
+)
+
 MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
 
 PROMPT_TEMPLATE = (
@@ -57,6 +68,9 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
     return generated_texts
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 def test_minicpmv_lora(minicpmv_lora_files):
     llm = vllm.LLM(
         MODEL_PATH,
diff --git a/tests/lora/test_moe_lora_align_sum.py b/tests/lora/test_moe_lora_align_sum.py
index bb46b4d86807..77a44970ba3b 100644
--- a/tests/lora/test_moe_lora_align_sum.py
+++ b/tests/lora/test_moe_lora_align_sum.py
@@ -6,6 +6,9 @@
 import torch
 
 from vllm import _custom_ops as ops
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 def round_up(x, base):
@@ -27,7 +30,7 @@ def sample_data(num_experts, max_loras, num_tokens, topk_num):
             topk_ids[i, j] = pool[j]
         token_lora_mapping[i] = random.randint(0, max_loras - 1)
 
-    return topk_ids.to("cuda"), token_lora_mapping.to("cuda")
+    return topk_ids.to(DEVICE_TYPE), token_lora_mapping.to(DEVICE_TYPE)
 
 
 @pytest.mark.parametrize("num_tokens", [100, 200, 1024, 4096])  # 81920
@@ -56,14 +59,21 @@ def test_moe_lora_align_block_size(
         (max_loras * max_num_tokens_padded,),
         topk_ids.numel(),
         dtype=torch.int32,
-        device="cuda",
+        device=DEVICE_TYPE,
     )
     expert_ids = torch.full(
-        (max_loras * max_num_m_blocks,), num_experts, dtype=torch.int32, device="cuda"
+        (max_loras * max_num_m_blocks,),
+        num_experts,
+        dtype=torch.int32,
+        device=DEVICE_TYPE,
+    )
+    num_tokens_post_pad = torch.zeros(
+        (max_loras,), dtype=torch.int32, device=DEVICE_TYPE
+    )
+    adapter_enabled = torch.ones(
+        (max_loras + 1,), dtype=torch.int32, device=DEVICE_TYPE
     )
-    num_tokens_post_pad = torch.zeros((max_loras,), dtype=torch.int32, device="cuda")
-    adapter_enabled = torch.ones((max_loras + 1,), dtype=torch.int32, device="cuda")
-    lora_ids = torch.arange(max_loras + 2, dtype=torch.int32, device="cuda")
+    lora_ids = torch.arange(max_loras + 2, dtype=torch.int32, device=DEVICE_TYPE)
 
     # call kernel
     ops.moe_lora_align_block_size(
@@ -94,5 +104,222 @@ def test_moe_lora_align_block_size(
                 assert torch.all(topk_ids.view(-1)[indices] == expert_id)
 
 
+# Sentinel values for the regression tests below. Distinctive out-of-domain
+# ints so that "kernel never wrote this slot" is directly observable: the
+# kernel only ever writes a real expert id in [0, num_experts) or -1
+# (expert_ids), a token index or the `numel` padding value (sorted_token_ids),
+# and a block-aligned cumsum count (num_tokens_post_pad).
+SENTINEL_EXPERT = -2
+SENTINEL_TOKEN = -7
+SENTINEL_NPAD = -13
+
+
+def _build_and_run_align(
+    *,
+    num_lora_tokens,
+    num_base_tokens,
+    max_loras,
+    num_experts=64,
+    topk_num=6,
+    block_size=16,
+    lora_ids_override=None,
+    disabled_slots=(),
+    seed=1,
+):
+    """Build inputs the way ``LoRAKernelMeta.prepare_tensors`` does, run
+    ``moe_lora_align_block_size``, and return a dict of result tensors plus
+    derived sizes. Output buffers are pre-filled with ``SENTINEL_*`` so
+    callers can assert which slots the kernel did / did not touch.
+
+    Tokens are assigned to LoRA slot 0 (first ``num_lora_tokens``) then -1
+    (remaining ``num_base_tokens``), matching the "mixed base + 1 LoRA"
+    shape used to repro vllm-project/vllm#32235.
+
+    ``lora_ids_override``: optional 1-D int tensor of length ``max_loras+1``
+    used verbatim. Default mirrors ``prepare_tensors`` (sorted-unique into
+    the head, -1 tail).
+    ``disabled_slots``: iterable of slot indices to clear in ``adapter_enabled``.
+    """
+    random.seed(seed)
+    num_tokens = num_lora_tokens + num_base_tokens
+    assert num_tokens > 0, "test requires at least one token"
+
+    topk_ids = torch.zeros((num_tokens, topk_num), dtype=torch.int32)
+    token_lora_mapping = torch.empty((num_tokens,), dtype=torch.int32)
+    for i in range(num_tokens):
+        pool = list(range(num_experts))
+        random.shuffle(pool)
+        for j in range(topk_num):
+            topk_ids[i, j] = pool[j]
+        token_lora_mapping[i] = 0 if i < num_lora_tokens else -1
+    topk_ids = topk_ids.to(DEVICE_TYPE)
+    token_lora_mapping = token_lora_mapping.to(DEVICE_TYPE)
+
+    max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
+    max_num_tokens_padded = round_up(max_num_tokens_padded, block_size)
+    if topk_ids.numel() < num_experts:
+        max_num_tokens_padded = topk_ids.numel() * block_size
+    max_num_m_blocks = CEILDIV(max_num_tokens_padded, block_size)
+
+    if lora_ids_override is None:
+        lora_ids = torch.full(
+            (max_loras + 1,), -1, dtype=torch.int32, device=DEVICE_TYPE
+        )
+        unique_ids = torch.unique(token_lora_mapping, sorted=True)
+        lora_ids[: unique_ids.numel()] = unique_ids.to(torch.int32)
+    else:
+        assert lora_ids_override.numel() == max_loras + 1
+        lora_ids = lora_ids_override.to(dtype=torch.int32, device=DEVICE_TYPE)
+
+    adapter_enabled = torch.ones(
+        (max_loras + 1,), dtype=torch.int32, device=DEVICE_TYPE
+    )
+    for slot in disabled_slots:
+        adapter_enabled[slot] = 0
+
+    sorted_token_ids = torch.full(
+        (max_loras * max_num_tokens_padded,),
+        SENTINEL_TOKEN,
+        dtype=torch.int32,
+        device=DEVICE_TYPE,
+    )
+    expert_ids = torch.full(
+        (max_loras * max_num_m_blocks,),
+        SENTINEL_EXPERT,
+        dtype=torch.int32,
+        device=DEVICE_TYPE,
+    )
+    num_tokens_post_pad = torch.full(
+        (max_loras,), SENTINEL_NPAD, dtype=torch.int32, device=DEVICE_TYPE
+    )
+
+    ops.moe_lora_align_block_size(
+        topk_ids,
+        token_lora_mapping,
+        num_experts,
+        block_size,
+        max_loras,
+        max_num_tokens_padded,
+        max_num_m_blocks,
+        sorted_token_ids,
+        expert_ids,
+        num_tokens_post_pad,
+        adapter_enabled,
+        lora_ids,
+    )
+
+    return {
+        "lora_ids": lora_ids,
+        "sorted_token_ids": sorted_token_ids,
+        "expert_ids": expert_ids,
+        "num_tokens_post_pad": num_tokens_post_pad,
+        "max_num_tokens_padded": max_num_tokens_padded,
+        "block_size": block_size,
+        "max_loras": max_loras,
+    }
+
+
+@pytest.mark.parametrize(
+    "max_loras",
+    [
+        1,
+        2,
+    ],
+)
+def test_moe_lora_align_block_size_mixed_base_and_lora(max_loras):
+    """Regression test for issue #32235: real LoRA slot must not be skipped
+    when ``active_lora_ids`` has -1 at position 0."""
+    out = _build_and_run_align(
+        num_lora_tokens=8, num_base_tokens=8, max_loras=max_loras
+    )
+
+    # Sanity check on the layout being tested.
+    assert out["lora_ids"][0].item() == -1, (
+        "prepare_tensors layout mismatch: -1 expected at position 0 for mixed batch"
+    )
+
+    real_slot = 0
+    post_pad = out["num_tokens_post_pad"][real_slot].item()
+    assert post_pad != SENTINEL_NPAD, (
+        f"num_tokens_post_pad[{real_slot}] was never written by the kernel; "
+        "the align kernel skipped the real LoRA slot."
+    )
+    assert (
+        0 < post_pad <= out["max_num_tokens_padded"]
+        and post_pad % out["block_size"] == 0
+    ), f"num_tokens_post_pad[{real_slot}]={post_pad} is not a valid block-aligned count"
+
+    expert_row = out["expert_ids"].view(max_loras, -1)[real_slot]
+    assert (expert_row != SENTINEL_EXPERT).all(), (
+        f"expert_ids row for slot {real_slot} has unwritten sentinel entries; "
+        "the align kernel skipped the real LoRA slot."
+    )
+
+    sorted_row = out["sorted_token_ids"].view(max_loras, -1)[real_slot]
+    assert (sorted_row != SENTINEL_TOKEN).all(), (
+        f"sorted_token_ids row for slot {real_slot} has unwritten sentinel "
+        "entries; the align kernel skipped the real LoRA slot."
+    )
+
+
+def test_moe_lora_align_block_size_disabled_adapter_untouched():
+    """Disabled-adapter slot rows must remain untouched by all three align
+    kernels. Pins the invariant protected by the ``adapter_enabled`` guard
+    in ``lora_count_and_sort_expert_tokens_kernel``: without it the sort
+    kernel reads uninitialized ``token_mask`` values for disabled slots and
+    pollutes ``sorted_token_ids`` / ``cumsum_buffer``."""
+    max_loras = 1
+    out = _build_and_run_align(
+        num_lora_tokens=16,
+        num_base_tokens=0,
+        max_loras=max_loras,
+        disabled_slots=(0,),
+    )
+    # Sanity: slot 0 IS present in active_lora_ids (otherwise we would only
+    # exercise the lora_id == -1 / >= max_loras guards).
+    assert (out["lora_ids"] == 0).any().item()
+
+    assert out["num_tokens_post_pad"][0].item() == SENTINEL_NPAD, (
+        "num_tokens_post_pad[0] was modified for a disabled adapter slot."
+    )
+    expert_row = out["expert_ids"].view(max_loras, -1)[0]
+    assert (expert_row == SENTINEL_EXPERT).all(), (
+        "expert_ids row for disabled slot 0 was partially written."
+    )
+    # Row specifically protected by the sort-kernel adapter_enabled guard.
+    sorted_row = out["sorted_token_ids"].view(max_loras, -1)[0]
+    assert (sorted_row == SENTINEL_TOKEN).all(), (
+        "sorted_token_ids row for disabled slot 0 was polluted by the sort "
+        "kernel; lora_count_and_sort_expert_tokens_kernel must skip "
+        "adapter_enabled == 0 slots."
+    )
+
+
+def test_moe_lora_align_block_size_lora_id_oob_guard():
+    """Regression test for the ``lora_id >= max_loras`` guard.
+
+    Production ``LoRAKernelMeta.prepare_tensors`` pre-fills the tail of
+    ``active_lora_ids`` with -1, so the existing ``lora_id == -1`` check
+    covers the extra slot. This test bypasses that invariant and injects
+    an out-of-range value (5 with max_loras=1) at the tail to verify the
+    explicit guard prevents OOB reads against ``adapter_enabled`` and
+    OOB writes against the max_loras-sized output buffers. Without the
+    guard, an illegal-memory-access would surface on the next CUDA sync.
+    """
+    max_loras = 1
+    lora_ids_override = torch.tensor([0, 5], dtype=torch.int32)
+    out = _build_and_run_align(
+        num_lora_tokens=16,
+        num_base_tokens=0,
+        max_loras=max_loras,
+        lora_ids_override=lora_ids_override,
+    )
+    # The .item() call below syncs and would surface any async
+    # illegal-memory-access from the OOB iteration.
+    assert out["num_tokens_post_pad"][0].item() != SENTINEL_NPAD, (
+        "real LoRA slot 0 was skipped by the align kernel"
+    )
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/tests/lora/test_moe_lora_ep_load.py b/tests/lora/test_moe_lora_ep_load.py
new file mode 100644
index 000000000000..ead40b25522b
--- /dev/null
+++ b/tests/lora/test_moe_lora_ep_load.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+End-to-end correctness test for 2D MoE LoRA expert-parallel
+load-time slicing
+"""
+
+import pytest
+import torch
+
+from vllm.lora.lora_model import LoRAModel, MoEEPLoadSpec
+from vllm.lora.peft_helper import PEFTHelper
+
+NUM_LAYERS = 48
+GLOBAL_NUM_EXPERTS = 128
+LOCAL_NUM_EXPERTS = 64  # ep_size = 2
+EXPERT_PROJECTIONS = ("down_proj", "gate_proj", "up_proj")
+NON_EXPERT_MODULES = ("q_proj", "k_proj", "v_proj", "o_proj", "gate")
+
+
+def _expected_lora_modules() -> set[str]:
+    """Replicate the set ``WorkerLoRAManager._load_adapter`` would build
+    from this model's ``packed_modules_mapping``."""
+    expected: set[str] = set(NON_EXPERT_MODULES)
+    for expert in range(GLOBAL_NUM_EXPERTS):
+        for proj in EXPERT_PROJECTIONS:
+            expected.add(f"experts.{expert}.{proj}")
+    return expected
+
+
+def _load(lora_dir, peft_helper, *, moe_ep_spec, lora_id):
+    return LoRAModel.from_local_checkpoint(
+        lora_dir,
+        _expected_lora_modules(),
+        peft_helper=peft_helper,
+        lora_model_id=lora_id,
+        device="cpu",
+        moe_ep_spec=moe_ep_spec,
+    )
+
+
+@pytest.mark.parametrize("ep_rank", [0, 1])
+def test_moe_lora_ep2_real_qwen3moe(qwen3moe_lora_files, ep_rank):
+    """ep_size=2 against the real Qwen3-MoE adapter: each rank's loaded
+    LoRA has the right size, the right expert membership, and the
+    right tensor values."""
+    peft_helper = PEFTHelper.from_local_dir(
+        qwen3moe_lora_files, max_position_embeddings=4096
+    )
+
+    # Baseline: no spec → loads every expert × projection × layer plus
+    # all non-expert LoRA modules.
+    ground_truth = _load(qwen3moe_lora_files, peft_helper, moe_ep_spec=None, lora_id=1)
+    expected_baseline = (
+        GLOBAL_NUM_EXPERTS * len(EXPERT_PROJECTIONS) * NUM_LAYERS
+        + len(NON_EXPERT_MODULES) * NUM_LAYERS
+    )
+    assert len(ground_truth.loras) == expected_baseline
+
+    # Sliced load: only this rank's experts; non-expert LoRA is untouched.
+    spec = MoEEPLoadSpec(
+        ep_rank=ep_rank,
+        local_num_experts=LOCAL_NUM_EXPERTS,
+        global_num_experts=GLOBAL_NUM_EXPERTS,
+    )
+    sliced = _load(
+        qwen3moe_lora_files,
+        peft_helper,
+        moe_ep_spec=spec,
+        lora_id=100 + ep_rank,
+    )
+
+    expected_sliced = (
+        LOCAL_NUM_EXPERTS * len(EXPERT_PROJECTIONS) * NUM_LAYERS
+        + len(NON_EXPERT_MODULES) * NUM_LAYERS
+    )
+    assert len(sliced.loras) == expected_sliced
+
+    expert_start = ep_rank * LOCAL_NUM_EXPERTS
+    expert_end = expert_start + LOCAL_NUM_EXPERTS
+
+    for name, lora in sliced.loras.items():
+        gt = ground_truth.loras[name]
+        torch.testing.assert_close(lora.lora_a, gt.lora_a)
+        torch.testing.assert_close(lora.lora_b, gt.lora_b)
+        if ".experts." in name:
+            expert_idx = int(name.split(".experts.")[-1].split(".")[0])
+            assert expert_start <= expert_idx < expert_end, (
+                f"non-local expert {expert_idx} leaked: {name}"
+            )
diff --git a/tests/lora/test_olmoe_tp.py b/tests/lora/test_olmoe_tp.py
index 492716b46451..bbc25cb6b8e4 100644
--- a/tests/lora/test_olmoe_tp.py
+++ b/tests/lora/test_olmoe_tp.py
@@ -11,6 +11,7 @@
 
 import vllm
 from vllm.lora.request import LoRARequest
+from vllm.platforms import current_platform
 
 from ..utils import multi_gpu_test
 
@@ -110,7 +111,10 @@ def generate_and_test(
         )
 
 
-def test_olmoe_lora(olmoe_lora_files):
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
+def test_olmoe_lora(olmoe_lora_files, maybe_enable_lora_dual_stream):
     # We enable enforce_eager=True here to reduce VRAM usage for lora-test CI,
     # Otherwise, the lora-test will fail due to CUDA OOM.
     llm = vllm.LLM(
@@ -141,7 +145,9 @@ def test_olmoe_lora_mixed(olmoe_lora_files):
     generate_and_test(llm, olmoe_lora_files, lora_id=[1, None, 3, None])
 
 
-def test_olmoe_lora_mixed_random(olmoe_lora_files, tmp_path):
+def test_olmoe_lora_mixed_random(
+    olmoe_lora_files, tmp_path, maybe_enable_lora_dual_stream
+):
     # Create a dummy LoRA with random weights based on the real one
     random_lora_path = tmp_path / "random_lora"
     shutil.copytree(olmoe_lora_files, random_lora_path)
@@ -176,6 +182,9 @@ def test_olmoe_lora_mixed_random(olmoe_lora_files, tmp_path):
     assert outputs[0].outputs[0].text.strip().startswith(EXPECTED_LORA_OUTPUT[0])
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @pytest.mark.parametrize("fully_sharded_loras", [False, True])
 @multi_gpu_test(num_gpus=2)
 def test_olmoe_lora_tp2(olmoe_lora_files, fully_sharded_loras):
diff --git a/tests/lora/test_punica_ops.py b/tests/lora/test_punica_ops.py
index 8a2634e82ba9..7706d0e2aab7 100644
--- a/tests/lora/test_punica_ops.py
+++ b/tests/lora/test_punica_ops.py
@@ -9,10 +9,13 @@
 import vllm.lora.ops.triton_ops as triton_ops
 from vllm.lora.ops.triton_ops import LoRAKernelMeta
 from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
 
 from .utils import PunicaTensors, assert_close, generate_data_for_nslices
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @pytest.fixture(autouse=True)
 def reset_device(reset_default_device):
@@ -146,7 +149,9 @@ def check_lora_shrink_kernel(
 
     # Setup metadata information for the LoRA kernel.
     lora_meta = LoRAKernelMeta.make(
-        max_loras=num_loras, max_num_tokens=token_nums, device="cuda"
+        max_loras=num_loras,
+        max_num_tokens=token_nums,
+        device=DEVICE_TYPE,
     )
     lora_meta.prepare_tensors(data.token_lora_mapping)
 
@@ -219,7 +224,9 @@ def check_lora_expand_kernel(
 
     # Setup metadata information for the LoRA kernel.
     lora_meta = LoRAKernelMeta.make(
-        max_loras=num_loras, max_num_tokens=token_nums, device="cuda"
+        max_loras=num_loras,
+        max_num_tokens=token_nums,
+        device=DEVICE_TYPE,
     )
     lora_meta.prepare_tensors(data.token_lora_mapping)
 
@@ -367,7 +374,7 @@ def check_lora_expand_kernel(
 }
 
 DTYPES = [torch.float16, torch.bfloat16]
-DEVICES = [f"cuda:{0}"]
+DEVICES = [f"{DEVICE_TYPE}:{0}"]
 SEED = [0]
 
 
diff --git a/tests/lora/test_punica_ops_fp8.py b/tests/lora/test_punica_ops_fp8.py
index 04231333642f..3e7fe7b27582 100644
--- a/tests/lora/test_punica_ops_fp8.py
+++ b/tests/lora/test_punica_ops_fp8.py
@@ -28,9 +28,11 @@
     _SHRINK_LORA_SCALE_PTR_DICT,
 )
 from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import set_random_seed
 
-DEVICES = [f"cuda:{0}"]
+DEVICE_TYPE = current_platform.device_type
+DEVICES = [f"{DEVICE_TYPE}:{0}"]
 SEED = [0]
 
 _dict_lock = Lock()
diff --git a/tests/lora/test_qwen35_densemodel_lora.py b/tests/lora/test_qwen35_densemodel_lora.py
index 665fb99de0fb..e926bbcef27c 100644
--- a/tests/lora/test_qwen35_densemodel_lora.py
+++ b/tests/lora/test_qwen35_densemodel_lora.py
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import pytest
 from transformers import AutoTokenizer
 
 import vllm
 import vllm.config
 from vllm.assets.image import ImageAsset
 from vllm.lora.request import LoRARequest
+from vllm.platforms import current_platform
 
 from ..utils import create_new_process_for_each_test, multi_gpu_test
 
@@ -311,8 +313,13 @@ def _assert_qwen35_text_vl_and_mixed_lora(
     )
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 @create_new_process_for_each_test()
-def test_qwen35_text_lora(qwen35_text_lora_files, qwen35_vl_lora_files):
+def test_qwen35_text_lora(
+    qwen35_text_lora_files, qwen35_vl_lora_files, maybe_enable_lora_dual_stream
+):
     llm = vllm.LLM(
         model=MODEL_PATH,
         max_model_len=4096,
@@ -335,7 +342,9 @@ def test_qwen35_text_lora(qwen35_text_lora_files, qwen35_vl_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-def test_qwen35_text_lora_tp4(qwen35_text_lora_files, qwen35_vl_lora_files):
+def test_qwen35_text_lora_tp4(
+    qwen35_text_lora_files, qwen35_vl_lora_files, maybe_enable_lora_dual_stream
+):
     llm = vllm.LLM(
         model=MODEL_PATH,
         max_model_len=4096,
diff --git a/tests/lora/test_qwen36_moe_lora.py b/tests/lora/test_qwen36_moe_lora.py
new file mode 100644
index 000000000000..cb1b9cb87c95
--- /dev/null
+++ b/tests/lora/test_qwen36_moe_lora.py
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+import vllm
+import vllm.config
+from vllm.assets.image import ImageAsset
+from vllm.lora.request import LoRARequest
+
+from ..utils import multi_gpu_test
+
+MODEL_PATH = "Qwen/Qwen3.6-35B-A3B"
+
+LORA_2D_ID = 1
+LORA_3D_ID = 2
+
+PROMPT_TEMPLATE = """<|im_start|>user
+<|vision_start|><|image_pad|><|vision_end|>What is in the image?<|im_end|>
+<|im_start|>assistant
+<think>
+
+</think>
+"""
+
+# Visual captioning prompts: each image will be paired with one LoRA in the
+# mixed-batch case so we can check per-prompt routing.
+VL_TEST_IMAGES = [
+    ImageAsset("stop_sign"),
+    ImageAsset("cherry_blossom"),
+]
+
+
+def _build_prompts() -> list[dict]:
+    return [
+        {
+            "prompt": PROMPT_TEMPLATE,
+            "multi_modal_data": {"image": asset.pil_image},
+        }
+        for asset in VL_TEST_IMAGES
+    ]
+
+
+def _generate(llm: vllm.LLM, lora_request) -> list[str]:
+    outputs = llm.generate(
+        _build_prompts(),
+        vllm.SamplingParams(temperature=0, max_tokens=128),
+        lora_request=lora_request,
+    )
+    return [out.outputs[0].text.strip() for out in outputs]
+
+
+def _run_mixed_2d_3d_lora_test(
+    lora_2d_files: str,
+    lora_3d_files: str,
+    tensor_parallel_size: int,
+    fully_sharded_loras: bool,
+) -> None:
+    llm = vllm.LLM(
+        model=MODEL_PATH,
+        max_model_len=4096,
+        enable_lora=True,
+        enable_mixed_moe_lora_format=True,
+        max_loras=2,
+        max_lora_rank=8,
+        max_num_seqs=4,
+        enforce_eager=True,
+        tensor_parallel_size=tensor_parallel_size,
+        enable_expert_parallel=not fully_sharded_loras,
+        fully_sharded_loras=fully_sharded_loras,
+        trust_remote_code=True,
+        enable_tower_connector_lora=True,
+        mm_processor_cache_gb=0,
+        limit_mm_per_prompt={"image": 1},
+        compilation_config=vllm.config.CompilationConfig(
+            cudagraph_specialize_lora=False,
+        ),
+    )
+
+    lora_2d = LoRARequest(
+        "lora_2d",
+        LORA_2D_ID,
+        lora_2d_files,
+        is_3d_lora_weight=False,
+    )
+    lora_3d = LoRARequest(
+        "lora_3d",
+        LORA_3D_ID,
+        lora_3d_files,
+        is_3d_lora_weight=True,
+    )
+
+    # Reference: each adapter alone over both prompts.
+    outputs_2d_alone = _generate(llm, lora_2d)
+    outputs_3d_alone = _generate(llm, lora_3d)
+
+    assert len(outputs_2d_alone) == len(VL_TEST_IMAGES)
+    assert len(outputs_3d_alone) == len(VL_TEST_IMAGES)
+    for text in outputs_2d_alone + outputs_3d_alone:
+        assert text, "Empty output from single-adapter LoRA generation"
+
+    # Mixed batch: prompt 0 uses the 2D adapter, prompt 1 uses the 3D
+    # adapter. Per-prompt outputs must match the standalone runs.
+    mixed_outputs = _generate(llm, [lora_2d, lora_3d])
+
+    assert mixed_outputs[0] == outputs_2d_alone[0], (
+        f"Mixed-batch 2D output {mixed_outputs[0]!r} does not match "
+        f"standalone 2D output {outputs_2d_alone[0]!r}"
+    )
+    assert mixed_outputs[1] == outputs_3d_alone[1], (
+        f"Mixed-batch 3D output {mixed_outputs[1]!r} does not match "
+        f"standalone 3D output {outputs_3d_alone[1]!r}"
+    )
+
+    # Reverse assignment: neither adapter should be silently aliased.
+    swapped_outputs = _generate(llm, [lora_3d, lora_2d])
+    assert swapped_outputs[0] == outputs_3d_alone[0], (
+        f"Swapped-batch 3D output {swapped_outputs[0]!r} does not match "
+        f"standalone 3D output {outputs_3d_alone[0]!r}"
+    )
+    assert swapped_outputs[1] == outputs_2d_alone[1], (
+        f"Swapped-batch 2D output {swapped_outputs[1]!r} does not match "
+        f"standalone 2D output {outputs_2d_alone[1]!r}"
+    )
+
+
+@pytest.mark.skip(reason="This model is too big, so skip this test temporarily.")
+@pytest.mark.parametrize("fully_sharded_loras", [False, True])
+@multi_gpu_test(num_gpus=2)
+def test_qwen36_moe_mixed_2d_3d_lora_tp2(
+    qwen36_moe_2d_lora_files,
+    qwen36_moe_3d_lora_files,
+    fully_sharded_loras,
+):
+    _run_mixed_2d_3d_lora_test(
+        lora_2d_files=qwen36_moe_2d_lora_files,
+        lora_3d_files=qwen36_moe_3d_lora_files,
+        tensor_parallel_size=2,
+        fully_sharded_loras=fully_sharded_loras,
+    )
+
+
+@pytest.mark.skip(reason="This model is too big, so skip this test temporarily.")
+@pytest.mark.parametrize("fully_sharded_loras", [False, True])
+@multi_gpu_test(num_gpus=4)
+def test_qwen36_moe_mixed_2d_3d_lora_tp4(
+    qwen36_moe_2d_lora_files,
+    qwen36_moe_3d_lora_files,
+    fully_sharded_loras,
+):
+    _run_mixed_2d_3d_lora_test(
+        lora_2d_files=qwen36_moe_2d_lora_files,
+        lora_3d_files=qwen36_moe_3d_lora_files,
+        tensor_parallel_size=4,
+        fully_sharded_loras=fully_sharded_loras,
+    )
diff --git a/tests/lora/test_llm_with_multi_loras.py b/tests/lora/test_qwen3_with_multi_loras.py
similarity index 100%
rename from tests/lora/test_llm_with_multi_loras.py
rename to tests/lora/test_qwen3_with_multi_loras.py
diff --git a/tests/lora/test_qwen3moe_tp.py b/tests/lora/test_qwen3moe_tp.py
index fcac4275cc40..16d8354d4d18 100644
--- a/tests/lora/test_qwen3moe_tp.py
+++ b/tests/lora/test_qwen3moe_tp.py
@@ -5,6 +5,8 @@
 # NOTE To avoid overloading the CI pipeline, this test script will not
 # be triggered on CI and is primarily intended for local testing and verification.
 
+import pytest
+
 import vllm
 from vllm.lora.request import LoRARequest
 
@@ -75,6 +77,7 @@ def test_qwen3moe_lora(qwen3moe_lora_files):
         enforce_eager=True,
         trust_remote_code=True,
         enable_chunked_prefill=True,
+        enable_mixed_moe_lora_format=True,
     )
 
     generate_and_test(llm, qwen3moe_lora_files, lora_id=1)
@@ -82,16 +85,18 @@ def test_qwen3moe_lora(qwen3moe_lora_files):
 
 
 @multi_gpu_test(num_gpus=2)
-def test_qwen3moe_lora_tp2(qwen3moe_lora_files):
+@pytest.mark.parametrize("ep", [False, True])
+@pytest.mark.parametrize("enable_mixed_moe_lora_format", [False, True])
+def test_qwen3moe_lora_tp2(ep, qwen3moe_lora_files, enable_mixed_moe_lora_format):
     llm = vllm.LLM(
         MODEL_PATH,
         max_model_len=1024,
         enable_lora=True,
         max_loras=4,
-        enforce_eager=True,
         trust_remote_code=True,
-        enable_chunked_prefill=True,
+        enable_expert_parallel=ep,
         tensor_parallel_size=2,
+        enable_mixed_moe_lora_format=enable_mixed_moe_lora_format,
     )
 
     generate_and_test(llm, qwen3moe_lora_files, lora_id=1)
@@ -99,16 +104,18 @@ def test_qwen3moe_lora_tp2(qwen3moe_lora_files):
 
 
 @multi_gpu_test(num_gpus=4)
-def test_qwen3moe_lora_tp4(qwen3moe_lora_files):
+@pytest.mark.parametrize("ep", [False, True])
+@pytest.mark.parametrize("enable_mixed_moe_lora_format", [False, True])
+def test_qwen3moe_lora_tp4(ep, qwen3moe_lora_files, enable_mixed_moe_lora_format):
     llm = vllm.LLM(
         MODEL_PATH,
         max_model_len=1024,
         enable_lora=True,
         max_loras=4,
-        enforce_eager=True,
         trust_remote_code=True,
-        enable_chunked_prefill=True,
+        enable_expert_parallel=ep,
         tensor_parallel_size=4,
+        enable_mixed_moe_lora_format=enable_mixed_moe_lora_format,
     )
 
     generate_and_test(llm, qwen3moe_lora_files, lora_id=1)
diff --git a/tests/lora/test_qwenvl.py b/tests/lora/test_qwenvl.py
index 5f8fc26c16d3..a4a32278db01 100644
--- a/tests/lora/test_qwenvl.py
+++ b/tests/lora/test_qwenvl.py
@@ -2,12 +2,14 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from dataclasses import dataclass
 
+import pytest
 from packaging.version import Version
 from transformers import __version__ as TRANSFORMERS_VERSION
 
 import vllm
 from vllm.assets.image import ImageAsset
 from vllm.lora.request import LoRARequest
+from vllm.platforms import current_platform
 from vllm.sampling_params import BeamSearchParams
 
 
@@ -206,6 +208,9 @@ def test_qwen2vl_lora_beam_search(qwen2vl_lora_files):
         )
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 def test_qwen25vl_lora(qwen25vl_lora_files):
     """Test Qwen 2.5 VL model with LoRA"""
     config = TestConfig(model_path=QWEN25VL_MODEL_PATH, lora_path=qwen25vl_lora_files)
@@ -216,6 +221,9 @@ def test_qwen25vl_lora(qwen25vl_lora_files):
         tester.run_test(TEST_IMAGES, expected_outputs=EXPECTED_OUTPUTS, lora_id=lora_id)
 
 
+@pytest.mark.skipif(
+    current_platform.is_cuda_alike(), reason="Skipping to avoid redundant model tests"
+)
 def test_qwen25vl_vision_lora(qwen25vl_vision_lora_files):
     config = TestConfig(
         model_path=QWEN25VL_MODEL_PATH,
diff --git a/tests/lora/test_whisper.py b/tests/lora/test_whisper.py
index 83b814d49f7f..ea8179a9c661 100644
--- a/tests/lora/test_whisper.py
+++ b/tests/lora/test_whisper.py
@@ -124,30 +124,3 @@ def test_whisper_multi_lora(whisper_lora_files):
         f"Expected same outputs for same adapter with different IDs. "
         f"Got: {outputs_lora1} vs {outputs_lora2}"
     )
-
-
-@create_new_process_for_each_test()
-def test_whisper_with_and_without_lora(whisper_lora_files):
-    """Test that Whisper produces different outputs with and without LoRA.
-
-    This test verifies that the LoRA adapter actually affects the model output.
-    """
-    llm = create_whisper_llm(enable_lora=True)
-
-    # Run with LoRA
-    outputs_with_lora = run_whisper_inference(
-        llm, lora_path=whisper_lora_files, lora_id=1
-    )
-
-    # Run without LoRA (base model only)
-    outputs_without_lora = run_whisper_inference(llm, lora_path=None)
-
-    # Both should produce valid outputs
-    assert len(outputs_with_lora[0]) > 0
-    assert len(outputs_without_lora[0]) > 0
-
-    print(f"Output with LoRA: {outputs_with_lora[0]}")
-    print(f"Output without LoRA: {outputs_without_lora[0]}")
-
-    # Note: Outputs may or may not differ depending on the adapter
-    # The main verification is that both configurations work
diff --git a/tests/lora/test_worker.py b/tests/lora/test_worker.py
index 4af3ccf893ff..e929fcad2896 100644
--- a/tests/lora/test_worker.py
+++ b/tests/lora/test_worker.py
@@ -19,11 +19,18 @@
 from vllm.config.lora import LoRAConfig
 from vllm.lora.model_manager import LoRAMapping
 from vllm.lora.request import LoRARequest
-from vllm.v1.worker.gpu_worker import Worker
+from vllm.platforms import current_platform
+
+if current_platform.is_xpu():
+    from vllm.v1.worker.xpu_worker import XPUWorker as Worker
+else:
+    from vllm.v1.worker.gpu_worker import Worker
 
 MODEL_PATH = "Qwen/Qwen3-0.6B"
 NUM_LORAS = 16
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @patch.dict(os.environ, {"RANK": "0"})
 def test_worker_apply_lora(qwen3_lora_files):
@@ -61,7 +68,7 @@ def set_active_loras(worker: Worker, lora_requests: list[LoRARequest]):
             max_num_seqs=32,
             max_num_partial_prefills=32,
         ),
-        device_config=DeviceConfig("cuda"),
+        device_config=DeviceConfig(DEVICE_TYPE),
         cache_config=CacheConfig(
             block_size=16,
             cache_dtype="auto",
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
index 6aba5299b582..e5ce7a88464c 100644
--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py
@@ -9,10 +9,13 @@
 from safetensors.torch import save_file
 
 from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 class DummyLoRAManager:
-    def __init__(self, device: torch.device = "cuda:0"):
+    def __init__(self, device: torch.device = f"{DEVICE_TYPE}:0"):
         super().__init__()
         self._loras: dict[str, LoRALayerWeights] = {}
         self._device = device
@@ -57,8 +60,8 @@ def init_lora(
             module_name,
             rank=rank,
             lora_alpha=1,
-            lora_a=torch.rand([rank, input_dim], device="cuda"),
-            lora_b=torch.rand([output_dim, input_dim], device="cuda"),
+            lora_a=torch.rand([rank, input_dim], device=DEVICE_TYPE),
+            lora_b=torch.rand([output_dim, input_dim], device=DEVICE_TYPE),
             embeddings_tensor=embeddings_tensor,
         )
         self.set_module_lora(module_name, lora)
diff --git a/tests/model_executor/layers/test_pooler_activations.py b/tests/model_executor/layers/test_pooler_activations.py
new file mode 100644
index 000000000000..697207a68bba
--- /dev/null
+++ b/tests/model_executor/layers/test_pooler_activations.py
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for vllm.model_executor.layers.pooler.activations."""
+
+from types import SimpleNamespace
+
+import pytest
+import torch
+import torch.nn as nn
+
+from vllm.model_executor.layers.pooler.activations import (
+    LambdaPoolerActivation,
+    PoolerClassify,
+    PoolerIdentity,
+    PoolerMultiLabelClassify,
+    PoolerNormalize,
+    get_act_fn,
+    resolve_classifier_act_fn,
+)
+
+
+# ---------------------------------------------------------------------------
+# PoolerIdentity
+# ---------------------------------------------------------------------------
+class TestPoolerIdentity:
+    def test_returns_input_unchanged(self):
+        pooler = PoolerIdentity()
+        x = torch.randn(4, 128)
+        out = pooler(x)
+        assert torch.equal(out, x)
+
+    def test_forward_list(self):
+        pooler = PoolerIdentity()
+        tensors = [torch.randn(128), torch.randn(256)]
+        out = pooler(tensors)
+        assert len(out) == 2
+        for orig, result in zip(tensors, out):
+            assert torch.equal(orig, result)
+
+
+# ---------------------------------------------------------------------------
+# PoolerNormalize
+# ---------------------------------------------------------------------------
+class TestPoolerNormalize:
+    def test_output_has_unit_norm(self):
+        pooler = PoolerNormalize()
+        x = torch.randn(4, 128)
+        out = pooler(x)
+        norms = torch.linalg.norm(out, dim=-1)
+        assert torch.allclose(norms, torch.ones(4), atol=1e-5)
+
+    def test_single_vector(self):
+        pooler = PoolerNormalize()
+        x = torch.randn(1, 64)
+        out = pooler(x)
+        norm = torch.linalg.norm(out, dim=-1)
+        assert torch.allclose(norm, torch.ones(1), atol=1e-5)
+
+    def test_forward_list(self):
+        pooler = PoolerNormalize()
+        tensors = [torch.randn(1, 64), torch.randn(1, 128)]
+        out = pooler(tensors)
+        for t in out:
+            norm = torch.linalg.norm(t, dim=-1)
+            assert torch.allclose(norm, torch.ones(1), atol=1e-5)
+
+
+# ---------------------------------------------------------------------------
+# PoolerMultiLabelClassify
+# ---------------------------------------------------------------------------
+class TestPoolerMultiLabelClassify:
+    def test_output_in_zero_one(self):
+        pooler = PoolerMultiLabelClassify()
+        x = torch.randn(4, 10)
+        out = pooler(x)
+        assert (out >= 0).all() and (out <= 1).all()
+
+    def test_large_positive_maps_near_one(self):
+        pooler = PoolerMultiLabelClassify()
+        x = torch.full((1, 3), 100.0)
+        out = pooler(x)
+        assert torch.allclose(out, torch.ones(1, 3), atol=1e-4)
+
+    def test_large_negative_maps_near_zero(self):
+        pooler = PoolerMultiLabelClassify()
+        x = torch.full((1, 3), -100.0)
+        out = pooler(x)
+        assert torch.allclose(out, torch.zeros(1, 3), atol=1e-4)
+
+
+# ---------------------------------------------------------------------------
+# PoolerClassify
+# ---------------------------------------------------------------------------
+class TestPoolerClassify:
+    def test_infers_from_shape_when_num_labels_none(self):
+        pooler = PoolerClassify(num_labels=None)
+        assert pooler.num_labels is None
+        x = torch.randn(2, 5)
+        out = pooler(x)
+        sums = out.sum(dim=-1)
+        assert torch.allclose(sums, torch.ones(2), atol=1e-5)
+
+    def test_sigmoid_when_num_labels_lt_2(self):
+        pooler = PoolerClassify(num_labels=1)
+        x = torch.zeros(1, 1)
+        out = pooler(x)
+        assert torch.allclose(out, torch.tensor([[0.5]]), atol=1e-5)
+
+    def test_num_labels_zero_uses_sigmoid(self):
+        pooler = PoolerClassify(num_labels=0)
+        assert pooler.num_labels == 0
+        x = torch.zeros(1, 3)
+        out = pooler(x)
+        assert torch.allclose(out, torch.full((1, 3), 0.5), atol=1e-5)
+
+    def test_num_labels_ge_2_uses_softmax(self):
+        pooler = PoolerClassify(num_labels=4)
+        assert pooler.num_labels == 4
+        x = torch.randn(2, 4)
+        out = pooler(x)
+        sums = out.sum(dim=-1)
+        assert torch.allclose(sums, torch.ones(2), atol=1e-5)
+
+    def test_default_num_labels_is_none(self):
+        pooler = PoolerClassify()
+        assert pooler.num_labels is None
+
+
+# ---------------------------------------------------------------------------
+# LambdaPoolerActivation
+# ---------------------------------------------------------------------------
+class TestLambdaPoolerActivation:
+    def test_applies_custom_fn(self):
+        pooler = LambdaPoolerActivation(nn.ReLU())
+        x = torch.tensor([[-1.0, 2.0, -3.0]])
+        out = pooler(x)
+        expected = torch.tensor([[0.0, 2.0, 0.0]])
+        assert torch.equal(out, expected)
+
+    def test_forward_list(self):
+        pooler = LambdaPoolerActivation(nn.ReLU())
+        tensors = [torch.tensor([-1.0, 2.0]), torch.tensor([3.0, -4.0])]
+        out = pooler(tensors)
+        assert torch.equal(out[0], torch.tensor([0.0, 2.0]))
+        assert torch.equal(out[1], torch.tensor([3.0, 0.0]))
+
+
+# ---------------------------------------------------------------------------
+# get_act_fn factory
+# ---------------------------------------------------------------------------
+class TestGetActFn:
+    @staticmethod
+    def _make_config(**kwargs):
+        return SimpleNamespace(**kwargs)
+
+    def test_regression(self):
+        cfg = self._make_config(problem_type="regression")
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerIdentity)
+
+    def test_single_label_classification(self):
+        cfg = self._make_config(
+            problem_type="single_label_classification", num_labels=3
+        )
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerClassify)
+        assert result.num_labels == 3
+
+    def test_multi_label_classification(self):
+        cfg = self._make_config(problem_type="multi_label_classification")
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerMultiLabelClassify)
+
+    def test_sentence_transformers_activation(self):
+        cfg = self._make_config(
+            problem_type="",
+            sentence_transformers={
+                "activation_fn": "torch.nn.modules.activation.Sigmoid"
+            },
+        )
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerClassify)
+
+    def test_sbert_activation(self):
+        cfg = self._make_config(
+            problem_type="",
+            sbert_ce_default_activation_function=(
+                "torch.nn.modules.activation.Sigmoid"
+            ),
+        )
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerClassify)
+
+    def test_default_fallback(self):
+        cfg = self._make_config(problem_type="")
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerClassify)
+
+    def test_sentence_transformers_takes_priority(self):
+        cfg = self._make_config(
+            problem_type="",
+            sentence_transformers={"activation_fn": "torch.nn.modules.linear.Identity"},
+            sbert_ce_default_activation_function=(
+                "torch.nn.modules.activation.Sigmoid"
+            ),
+        )
+        result = get_act_fn(cfg)
+        assert isinstance(result, PoolerIdentity)
+
+    def test_rejects_non_torch_activation(self):
+        cfg = self._make_config(
+            problem_type="",
+            sentence_transformers={"activation_fn": "os.system"},
+        )
+        with pytest.raises(ValueError, match="restricted"):
+            get_act_fn(cfg)
+
+
+# ---------------------------------------------------------------------------
+# resolve_classifier_act_fn
+# ---------------------------------------------------------------------------
+class TestResolveClassifierActFn:
+    def test_delegates_to_get_act_fn_when_none(self):
+        model_config = SimpleNamespace(
+            hf_config=SimpleNamespace(num_labels=3, problem_type="")
+        )
+        result = resolve_classifier_act_fn(model_config, act_fn=None)
+        assert isinstance(result, PoolerClassify)
+        assert result.num_labels == 3
+
+    def test_passes_through_provided_act_fn(self):
+        custom = PoolerIdentity()
+        result = resolve_classifier_act_fn(None, act_fn=custom)
+        assert result is custom
diff --git a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
index 3b950c843c56..a15a624c905d 100644
--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@@ -460,7 +460,7 @@ async def test_serialize_and_serve_entrypoints(tmp_path):
         result = subprocess.run(
             [
                 sys.executable,
-                f"{VLLM_PATH}/examples/others/tensorize_vllm_model.py",
+                f"{VLLM_PATH}/examples/features/tensorize_vllm_model.py",
                 "--model",
                 model_ref,
                 "serialize",
diff --git a/tests/model_executor/model_loader/test_modelexpress_loader.py b/tests/model_executor/model_loader/test_modelexpress_loader.py
new file mode 100644
index 000000000000..8c43865b4eee
--- /dev/null
+++ b/tests/model_executor/model_loader/test_modelexpress_loader.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import sys
+from types import ModuleType, SimpleNamespace
+
+import pytest
+from torch import nn
+
+from vllm.config import VllmConfig
+from vllm.config.load import LoadConfig
+from vllm.model_executor.model_loader import get_model_loader
+from vllm.model_executor.model_loader.modelexpress_loader import (
+    ModelExpressModelLoader,
+)
+
+
+class FakeModelexpressLoader:
+    calls: list[tuple[str, tuple, dict]] = []
+    loaded_model: nn.Module
+
+    def __init__(self, load_config: LoadConfig):
+        self.load_config = load_config
+
+    def download_model(self, *args, **kwargs):
+        self.calls.append(("download_model", args, kwargs))
+
+    def load_weights(self, *args, **kwargs):
+        self.calls.append(("load_weights", args, kwargs))
+
+    def load_model(self, *args, **kwargs):
+        self.calls.append(("load_model", args, kwargs))
+        return self.loaded_model
+
+
+def _install_fake_modelexpress(monkeypatch):
+    FakeModelexpressLoader.calls = []
+    FakeModelexpressLoader.loaded_model = nn.Module()
+
+    for name in [
+        "modelexpress",
+        "modelexpress.engines",
+        "modelexpress.engines.vllm",
+    ]:
+        monkeypatch.setitem(sys.modules, name, ModuleType(name))
+
+    module = ModuleType("modelexpress.engines.vllm.loader")
+    module.__dict__["MxModelLoader"] = FakeModelexpressLoader
+    monkeypatch.setitem(sys.modules, module.__name__, module)
+
+
+def test_modelexpress_load_format_resolves_to_modelexpress_loader(monkeypatch):
+    _install_fake_modelexpress(monkeypatch)
+
+    loader = get_model_loader(LoadConfig(load_format="modelexpress"))
+
+    assert isinstance(loader, ModelExpressModelLoader)
+
+
+def test_modelexpress_loader_delegates_to_modelexpress(monkeypatch):
+    _install_fake_modelexpress(monkeypatch)
+    loader = ModelExpressModelLoader(LoadConfig(load_format="modelexpress"))
+    model = nn.Module()
+    model_config = SimpleNamespace()
+    vllm_config = SimpleNamespace()
+
+    loader.download_model(model_config)
+    loader.load_weights(model, model_config)
+    FakeModelexpressLoader.loaded_model.train()
+    result = loader.load_model(
+        vllm_config=vllm_config,
+        model_config=model_config,
+        prefix="model",
+    )
+
+    assert result is FakeModelexpressLoader.loaded_model
+    assert not result.training
+    assert FakeModelexpressLoader.calls == [
+        ("download_model", (model_config,), {}),
+        ("load_weights", (model, model_config), {}),
+        (
+            "load_model",
+            (),
+            {
+                "vllm_config": vllm_config,
+                "model_config": model_config,
+                "prefix": "model",
+            },
+        ),
+    ]
+
+
+def test_modelexpress_loader_missing_modelexpress_error(monkeypatch):
+    import importlib
+
+    def missing_modelexpress(name):
+        raise ModuleNotFoundError(name=name)
+
+    monkeypatch.setattr(importlib, "import_module", missing_modelexpress)
+
+    with pytest.raises(ImportError, match="requires the ModelExpress Python package"):
+        ModelExpressModelLoader(LoadConfig(load_format="modelexpress"))
+
+
+def test_modelexpress_loader_preserves_internal_import_errors(monkeypatch):
+    import importlib
+
+    def missing_dependency(name):
+        raise ModuleNotFoundError(name="not_modelexpress_dependency")
+
+    monkeypatch.setattr(importlib, "import_module", missing_dependency)
+
+    with pytest.raises(ModuleNotFoundError) as exc_info:
+        ModelExpressModelLoader(LoadConfig(load_format="modelexpress"))
+    assert exc_info.value.name == "not_modelexpress_dependency"
+
+
+def test_modelexpress_load_format_allows_object_storage_model_weights():
+    model_config = SimpleNamespace(
+        architecture="UnknownForTest",
+        config_updated=False,
+        convert_type=None,
+        is_hybrid=False,
+        model="test-model",
+        model_weights="s3://bucket/model",
+    )
+    vllm_config = object.__new__(VllmConfig)
+    vllm_config.model_config = model_config
+    vllm_config.load_config = LoadConfig(load_format="modelexpress")
+
+    vllm_config.try_verify_and_update_config()
+
+    assert vllm_config.load_config.load_format == "modelexpress"
diff --git a/tests/model_executor/model_loader/test_reload.py b/tests/model_executor/model_loader/test_reload.py
index 6a04e7eb306d..0f6dccd84776 100644
--- a/tests/model_executor/model_loader/test_reload.py
+++ b/tests/model_executor/model_loader/test_reload.py
@@ -6,8 +6,15 @@
 
 import pytest
 import torch
+from torch.nn.parameter import UninitializedParameter
 
+import vllm.model_executor.model_loader.reload.meta as reload_meta
 from vllm.model_executor.layers.linear import QKVParallelLinear
+from vllm.model_executor.model_loader.reload.layerwise import (
+    finalize_layerwise_reload,
+    initialize_layerwise_reload,
+    record_metadata_for_reloading,
+)
 from vllm.model_executor.model_loader.reload.meta import (
     capture_layer_to_meta,
     get_numel_loaded,
@@ -19,7 +26,38 @@
 from vllm.model_executor.model_loader.reload.types import LayerReloadingInfo
 from vllm.model_executor.model_loader.reload.utils import get_layer_tensors
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import cuda_device_count_stateless
+
+
+class _AliasedBufferLayer(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        weight = torch.arange(6, dtype=torch.float32).reshape(2, 3)
+        self.weight = torch.nn.Parameter(weight)
+        self.register_buffer(
+            "weight_view", self.weight.detach().view(-1), persistent=False
+        )
+
+
+class _ParentAliasedChildBufferLayer(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.scale = torch.nn.Parameter(torch.ones(1))
+        self.conv1d = torch.nn.Linear(3, 2, bias=False)
+        self.conv1d.weight.data.copy_(
+            torch.arange(6, dtype=torch.float32).reshape(2, 3)
+        )
+        self.register_buffer(
+            "conv_weights", self.conv1d.weight.detach().view(-1), persistent=False
+        )
+
+
+class _AliasedBufferWithUninitializedChildLayer(_AliasedBufferLayer):
+    def __init__(self):
+        super().__init__()
+        self.child = torch.nn.Module()
+        self.child.register_parameter(
+            "lazy_weight", UninitializedParameter(requires_grad=False)
+        )
 
 
 def test_move_metatensors():
@@ -60,6 +98,34 @@ def test_reload_lifecycle():
         assert tensor.__dict__ == materialized_tensor.__dict__
 
 
+def test_materialize_layer_preserves_non_meta_tensors():
+    """Ensure that materialize_layer does not overwrite non meta tensors."""
+    layer = torch.nn.Linear(2, 3, bias=True)
+
+    # Create a non meta bias tensor and meta weight, which can happen with FP8
+    bias_values = torch.ones(3)
+    layer.bias.data.copy_(bias_values)
+    layer.weight = torch.nn.Parameter(layer.weight.data.to("meta"))
+
+    assert layer.weight.is_meta
+    assert not layer.bias.is_meta
+
+    # materialize the layer weights after the bias is initialized
+    info = LayerReloadingInfo(
+        restore_metadata=({}, {}),
+        restore_device=torch.device("cpu"),
+    )
+    materialize_layer(layer, info)
+
+    # Ensure the weight materialized off meta
+    assert not layer.weight.is_meta
+    assert layer.weight.device.type == "cpu"
+
+    # Ensure that the bias is (still) not meta and values are unchanged
+    assert not layer.bias.is_meta
+    assert torch.equal(layer.bias.data, bias_values)
+
+
 def test_model_cleanup(dist_init, default_vllm_config):
     layer = QKVParallelLinear(2, 3, 4)
     assert layer.weight.weight_loader.__self__ is layer
@@ -96,6 +162,81 @@ def complex_weight_loader(param, loaded_weight):
     assert ret == "value"
 
 
+def test_layerwise_reload_skips_non_persistent_parameter_alias_buffers(monkeypatch):
+    layer = _AliasedBufferLayer()
+    model = torch.nn.Sequential(layer)
+    loaded_weight = torch.full_like(layer.weight, 7.0)
+
+    def materialize_with_sentinel(meta_tensor):
+        tensor = torch.empty_strided(
+            size=tuple(meta_tensor.size()),
+            stride=tuple(meta_tensor.stride()),
+            dtype=meta_tensor.dtype,
+            requires_grad=False,
+        )
+        tensor.fill_(-123.0)
+        tensor.__class__ = meta_tensor.__class__
+        tensor.__dict__ = meta_tensor.__dict__.copy()
+        return tensor
+
+    monkeypatch.setattr(
+        reload_meta, "materialize_meta_tensor", materialize_with_sentinel
+    )
+
+    record_metadata_for_reloading(model)
+    initialize_layerwise_reload(model)
+    layer.weight.weight_loader(layer.weight, loaded_weight)
+    finalize_layerwise_reload(model, model_config=None)
+
+    assert torch.equal(layer.weight, loaded_weight)
+    assert layer.weight_view.untyped_storage().data_ptr() == (
+        layer.weight.untyped_storage().data_ptr()
+    )
+
+
+def test_capture_layer_to_meta_skips_uninitialized_parameter_storage_ptrs():
+    layer = _AliasedBufferWithUninitializedChildLayer()
+
+    _, buffers = capture_layer_to_meta(layer)
+
+    assert "weight_view" not in buffers
+
+
+def test_layerwise_reload_skips_child_parameter_alias_buffers(monkeypatch):
+    layer = _ParentAliasedChildBufferLayer()
+    model = torch.nn.Sequential(layer)
+    loaded_conv = torch.full_like(layer.conv1d.weight, 7.0)
+    loaded_scale = torch.full_like(layer.scale, 3.0)
+
+    def materialize_with_sentinel(meta_tensor):
+        tensor = torch.empty_strided(
+            size=tuple(meta_tensor.size()),
+            stride=tuple(meta_tensor.stride()),
+            dtype=meta_tensor.dtype,
+            requires_grad=False,
+        )
+        tensor.fill_(-123.0)
+        tensor.__class__ = meta_tensor.__class__
+        tensor.__dict__ = meta_tensor.__dict__.copy()
+        return tensor
+
+    monkeypatch.setattr(
+        reload_meta, "materialize_meta_tensor", materialize_with_sentinel
+    )
+
+    record_metadata_for_reloading(model)
+    initialize_layerwise_reload(model)
+    layer.conv1d.weight.weight_loader(layer.conv1d.weight, loaded_conv)
+    layer.scale.weight_loader(layer.scale, loaded_scale)
+    finalize_layerwise_reload(model, model_config=None)
+
+    assert torch.equal(layer.conv1d.weight, loaded_conv)
+    assert torch.equal(layer.conv_weights, loaded_conv.view(-1))
+    assert layer.conv_weights.untyped_storage().data_ptr() == (
+        layer.conv1d.weight.untyped_storage().data_ptr()
+    )
+
+
 @pytest.mark.parametrize(
     "tp_size", [pytest.param(1), pytest.param(2, marks=[pytest.mark.slow_test])]
 )
@@ -140,7 +281,7 @@ def complex_weight_loader(param, loaded_weight):
     ],
 )
 def test_reload_weights(base_model, mul_model, add_model, tp_size, vllm_runner):
-    if cuda_device_count_stateless() < tp_size:
+    if current_platform.device_count() < tp_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     if "FP8" in base_model and not current_platform.supports_fp8():
@@ -165,6 +306,34 @@ def test_reload_weights(base_model, mul_model, add_model, tp_size, vllm_runner):
         assert add_perp < mul_perp
 
 
+def test_kv_scale_reload(vllm_runner):
+    """Test reloading a checkpoint that contains k_scale/v_scale weights."""
+    if not current_platform.supports_fp8():
+        pytest.skip(reason="Requires FP8 support")
+
+    model = "nm-testing/Llama-3.2-1B-Instruct-FP8-KV"
+
+    # Load dummy weights, then reload real checkpoint
+    with vllm_runner(
+        model_name=model,
+        load_format="dummy",
+        enable_prefix_caching=False,
+        max_model_len=16,
+        max_num_seqs=1,
+    ) as llm:
+        llm.collective_rpc(
+            "update_config",
+            kwargs={"overrides": {"load_config": {"load_format": "auto"}}},
+        )
+        llm.collective_rpc("reload_weights", kwargs={"weights_path": model})
+        reloaded_perp = llm.generate_prompt_perplexity(
+            ["The capital of France is the city of Paris"],
+            mask=["The capital of France is"],
+        )[0]
+
+    assert reloaded_perp < 10
+
+
 @pytest.mark.parametrize(
     "tp_size", [pytest.param(1), pytest.param(2, marks=[pytest.mark.slow_test])]
 )
@@ -206,8 +375,8 @@ def test_reload_weights(base_model, mul_model, add_model, tp_size, vllm_runner):
 def test_online_quantize_reload(
     base_model, mul_model, add_model, quantization, tp_size, vllm_runner
 ):
-    if cuda_device_count_stateless() < tp_size:
-        pytest.skip(reason="Not enough CUDA devices")
+    if current_platform.device_count() < tp_size:
+        pytest.skip(reason="Not enough GPU devices")
 
     if quantization == "fp8" and not current_platform.supports_fp8():
         pytest.skip(reason="Requires FP8 support")
diff --git a/tests/model_executor/test_eagle_quantization.py b/tests/model_executor/test_eagle_quantization.py
index 519a48cae52e..481715da9cd7 100644
--- a/tests/model_executor/test_eagle_quantization.py
+++ b/tests/model_executor/test_eagle_quantization.py
@@ -10,9 +10,10 @@
 from vllm.model_executor.models.utils import get_draft_quant_config
 from vllm.platforms import current_platform
 
+DEVICE_TYPE = current_platform.device_type
 DEVICES = (
-    [f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
-    if current_platform.is_cuda_alike()
+    [f"{DEVICE_TYPE}:{i}" for i in range(min(torch.accelerator.device_count(), 2))]
+    if not current_platform.is_cpu()
     else ["cpu"]
 )
 
diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py
index 36d7f5cc47b7..490284f43954 100644
--- a/tests/model_executor/test_enabled_custom_ops.py
+++ b/tests/model_executor/test_enabled_custom_ops.py
@@ -23,12 +23,7 @@
     vllm_topk_sigmoid,
     vllm_topk_softmax,
 )
-from vllm.model_executor.layers.layernorm import (
-    RMSNorm,
-    dispatch_rocm_rmsnorm_func,
-    fused_add_rms_norm,
-    rms_norm,
-)
+from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.platforms import current_platform
 
 RMS_NORM_SUPPORTED_DTYPES = [torch.float16, torch.bfloat16]
@@ -154,30 +149,3 @@ def test_topk_sigmoid_dispatch(use_rocm_aiter: bool):
         assert topk_func == rocm_aiter_ops.topk_sigmoid
     else:
         assert topk_func == vllm_topk_sigmoid
-
-
-@pytest.mark.parametrize("add_residual", [True, False])
-@pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16])
-@pytest.mark.parametrize("use_rocm_aiter", [True, False])
-@pytest.mark.skipif(
-    not current_platform.is_rocm(), reason="AITER is a feature exclusive for ROCm"
-)
-def test_rms_norm_dispatch(
-    add_residual: bool, dtype: torch.dtype, use_rocm_aiter: bool
-):
-    rms_norm_func = dispatch_rocm_rmsnorm_func(add_residual, dtype, use_rocm_aiter)
-
-    should_use_rocm_aiter = (
-        current_platform.is_rocm()
-        and use_rocm_aiter
-        and dtype in RMS_NORM_SUPPORTED_DTYPES
-    )
-
-    if add_residual and should_use_rocm_aiter:
-        assert rms_norm_func == rocm_aiter_ops.rms_norm2d_with_add
-    elif should_use_rocm_aiter:
-        assert rms_norm_func == rocm_aiter_ops.rms_norm
-    elif add_residual:
-        assert rms_norm_func == fused_add_rms_norm
-    else:
-        assert rms_norm_func == rms_norm
diff --git a/tests/model_executor/test_ernie45_vl_mrope.py b/tests/model_executor/test_ernie45_vl_mrope.py
new file mode 100644
index 000000000000..8344115ba341
--- /dev/null
+++ b/tests/model_executor/test_ernie45_vl_mrope.py
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+
+import pytest
+import torch
+
+from vllm.model_executor.models.ernie45_vl import (
+    Ernie4_5_VLMoeForConditionalGeneration,
+)
+from vllm.multimodal.inputs import (
+    MultiModalFeatureSpec,
+    MultiModalFieldElem,
+    MultiModalKwargsItem,
+    PlaceholderRange,
+)
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+@pytest.fixture(autouse=True, scope="module")
+def _force_cpu_default_device():
+    original = torch.get_default_device()
+    torch.set_default_device("cpu")
+    yield
+    torch.set_default_device(original)
+
+
+@dataclass
+class DummyConfig:
+    spatial_conv_size: int = 2
+    temporal_conv_size: int = 2
+
+
+def make_model(config: DummyConfig) -> Ernie4_5_VLMoeForConditionalGeneration:
+    model = object.__new__(Ernie4_5_VLMoeForConditionalGeneration)
+    model.config = config
+    return model
+
+
+def make_mm_feature(
+    *,
+    modality: str,
+    offset: int,
+    length: int,
+    grid_thw: tuple[int, int, int],
+) -> MultiModalFeatureSpec:
+    field_name = "image_grid_thw" if modality == "image" else "video_grid_thw"
+    return MultiModalFeatureSpec(
+        data=MultiModalKwargsItem(
+            {
+                field_name: MultiModalFieldElem(
+                    data=torch.tensor(grid_thw),
+                    field=None,  # HACK.
+                ),
+            }
+        ),
+        modality=modality,
+        identifier="DUMMY",
+        mm_position=PlaceholderRange(offset=offset, length=length),
+    )
+
+
+def test_get_mrope_input_positions_text_only():
+    model = make_model(DummyConfig())
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[11, 12, 13, 14, 15],
+        mm_features=[],
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == 0
+
+
+def test_get_mrope_input_positions_single_image():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="image",
+            offset=1,
+            length=4,
+            grid_thw=(1, 4, 4),
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 20, 21, 22, 23, 30, 31],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4],
+            [0, 1, 1, 2, 2, 3, 4],
+            [0, 1, 2, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == -2
+
+
+def test_get_mrope_input_positions_interleaved_image_and_video():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="image",
+            offset=1,
+            length=4,
+            grid_thw=(1, 4, 4),
+        ),
+        make_mm_feature(
+            modality="video",
+            offset=7,
+            length=2,
+            grid_thw=(2, 4, 2),
+        ),
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 20, 21, 22, 23, 30, 31, 40, 41, 50, 51],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4, 5, 5, 7, 8],
+            [0, 1, 1, 2, 2, 3, 4, 5, 6, 7, 8],
+            [0, 1, 2, 1, 2, 3, 4, 5, 5, 7, 8],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == -2
diff --git a/tests/model_executor/test_flashinfer_autotune_cache.py b/tests/model_executor/test_flashinfer_autotune_cache.py
new file mode 100644
index 000000000000..7e6a83bb4d14
--- /dev/null
+++ b/tests/model_executor/test_flashinfer_autotune_cache.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import sys
+from hashlib import sha256
+from pathlib import Path
+from types import SimpleNamespace
+
+from vllm.model_executor.warmup import kernel_warmup
+
+
+def test_resolve_flashinfer_autotune_file_default_layout(
+    monkeypatch, tmp_path: Path
+) -> None:
+    fake_jit = SimpleNamespace(
+        env=SimpleNamespace(
+            FLASHINFER_WORKSPACE_DIR=Path("/flashinfer-cache/0.6.11.post2/103a")
+        )
+    )
+    fake_flashinfer = SimpleNamespace(jit=fake_jit)
+    monkeypatch.setitem(sys.modules, "flashinfer", fake_flashinfer)
+    monkeypatch.setitem(sys.modules, "flashinfer.jit", fake_jit)
+    monkeypatch.setattr(
+        kernel_warmup, "aot_compile_hash_factors", lambda _: ["env-hash", "config-hash"]
+    )
+    monkeypatch.setattr(kernel_warmup.envs, "VLLM_CACHE_ROOT", str(tmp_path))
+    monkeypatch.setattr(kernel_warmup.envs, "VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR", None)
+
+    runner = SimpleNamespace(vllm_config=SimpleNamespace())
+    cache_hash = sha256(str(["env-hash", "config-hash"]).encode()).hexdigest()
+
+    path = kernel_warmup._resolve_flashinfer_autotune_file(runner)
+
+    assert path == (
+        tmp_path
+        / "flashinfer_autotune_cache"
+        / "0.6.11.post2"
+        / "103a"
+        / cache_hash
+        / "autotune_configs.json"
+    )
+    assert path.parent.is_dir()
+
+
+def test_resolve_flashinfer_autotune_file_uses_override_dir(
+    monkeypatch, tmp_path: Path
+) -> None:
+    monkeypatch.setattr(
+        kernel_warmup.envs, "VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR", str(tmp_path)
+    )
+    monkeypatch.setattr(
+        kernel_warmup, "aot_compile_hash_factors", lambda _: ["env-hash", "config-hash"]
+    )
+
+    runner = SimpleNamespace(vllm_config=SimpleNamespace())
+    cache_hash = sha256(str(["env-hash", "config-hash"]).encode()).hexdigest()
+
+    path = kernel_warmup._resolve_flashinfer_autotune_file(runner)
+
+    assert path == tmp_path / cache_hash / "autotune_configs.json"
diff --git a/tests/model_executor/test_gemma_hidden_act.py b/tests/model_executor/test_gemma_hidden_act.py
new file mode 100644
index 000000000000..d34851a6f107
--- /dev/null
+++ b/tests/model_executor/test_gemma_hidden_act.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.activation import (
+    GeluAndMul,
+    SiluAndMul,
+    get_act_and_mul_fn,
+    get_act_fn,
+)
+from vllm.model_executor.models.gemma3 import Gemma3MLP
+from vllm.model_executor.models.gemma4 import Gemma4MLP
+
+
+@pytest.mark.parametrize(
+    ("activation_name", "expected_type"),
+    [
+        ("gelu_pytorch_tanh", GeluAndMul),
+        ("silu", SiluAndMul),
+        ("swish", SiluAndMul),
+    ],
+)
+def test_get_act_and_mul_fn_supports_gemma_hidden_act_aliases(
+    activation_name: str,
+    expected_type: type[torch.nn.Module],
+    default_vllm_config,
+) -> None:
+    assert isinstance(get_act_and_mul_fn(activation_name), expected_type)
+
+
+def test_get_act_fn_supports_swish_alias() -> None:
+    assert isinstance(get_act_fn("swish"), torch.nn.SiLU)
+
+
+@pytest.mark.parametrize("mlp_cls", [Gemma3MLP, Gemma4MLP])
+@pytest.mark.parametrize(
+    ("activation_name", "expected_type"),
+    [
+        ("gelu_pytorch_tanh", GeluAndMul),
+        ("silu", SiluAndMul),
+        ("swish", SiluAndMul),
+    ],
+)
+def test_gemma_mlp_supports_hidden_act_variants(
+    mlp_cls: type[torch.nn.Module],
+    activation_name: str,
+    expected_type: type[torch.nn.Module],
+    default_vllm_config,
+    dist_init,
+) -> None:
+    mlp = mlp_cls(
+        hidden_size=16,
+        intermediate_size=32,
+        hidden_activation=activation_name,
+    )
+
+    assert isinstance(mlp.act_fn, expected_type)
+    assert mlp(torch.randn(3, 16)).shape == (3, 16)
diff --git a/tests/model_executor/test_keye_mrope.py b/tests/model_executor/test_keye_mrope.py
new file mode 100644
index 000000000000..2289a59e2bf9
--- /dev/null
+++ b/tests/model_executor/test_keye_mrope.py
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass, field
+
+import pytest
+import torch
+
+from vllm.model_executor.models.keye import KeyeForConditionalGeneration
+from vllm.multimodal.inputs import (
+    MultiModalFeatureSpec,
+    MultiModalFieldElem,
+    MultiModalKwargsItem,
+    PlaceholderRange,
+)
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+@pytest.fixture(autouse=True, scope="module")
+def _force_cpu_default_device():
+    original = torch.get_default_device()
+    torch.set_default_device("cpu")
+    yield
+    torch.set_default_device(original)
+
+
+@dataclass
+class DummyVisionConfig:
+    spatial_merge_size: int = 2
+
+
+@dataclass
+class DummyConfig:
+    vision_config: DummyVisionConfig = field(default_factory=DummyVisionConfig)
+
+
+def make_model(config: DummyConfig) -> KeyeForConditionalGeneration:
+    model = object.__new__(KeyeForConditionalGeneration)
+    model.config = config
+    return model
+
+
+def make_mm_feature(
+    *,
+    modality: str,
+    offset: int,
+    length: int,
+    grid_thw: tuple[int, int, int] | list[tuple[int, int, int]],
+) -> MultiModalFeatureSpec:
+    field_name = "image_grid_thw" if modality == "image" else "video_grid_thw"
+    return MultiModalFeatureSpec(
+        data=MultiModalKwargsItem(
+            {
+                field_name: MultiModalFieldElem(
+                    data=torch.tensor(grid_thw),
+                    field=None,  # HACK.
+                ),
+            }
+        ),
+        modality=modality,
+        identifier="DUMMY",
+        mm_position=PlaceholderRange(offset=offset, length=length),
+    )
+
+
+def test_get_mrope_input_positions_text_only():
+    model = make_model(DummyConfig())
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[11, 12, 13, 14, 15],
+        mm_features=[],
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == 0
+
+
+def test_get_mrope_input_positions_single_image():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="image",
+            offset=1,
+            length=4,
+            grid_thw=(1, 4, 4),
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 20, 21, 22, 23, 30, 31],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4],
+            [0, 1, 1, 2, 2, 3, 4],
+            [0, 1, 2, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == -2
+
+
+def test_get_mrope_input_positions_interleaved_image_and_video():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="image",
+            offset=1,
+            length=4,
+            grid_thw=(1, 4, 4),
+        ),
+        make_mm_feature(
+            modality="video",
+            offset=7,
+            length=4,
+            grid_thw=[(2, 4, 2)],
+        ),
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 20, 21, 22, 23, 30, 31, 40, 41, 42, 43, 50, 51],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4, 5, 5, 7, 7, 9, 10],
+            [0, 1, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            [0, 1, 2, 1, 2, 3, 4, 5, 5, 7, 7, 9, 10],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == -2
diff --git a/tests/model_executor/test_keye_vl1_5_mrope.py b/tests/model_executor/test_keye_vl1_5_mrope.py
new file mode 100644
index 000000000000..de3488bc922e
--- /dev/null
+++ b/tests/model_executor/test_keye_vl1_5_mrope.py
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass, field
+
+import pytest
+import torch
+
+from vllm.model_executor.models.keye_vl1_5 import KeyeVL1_5ForConditionalGeneration
+from vllm.multimodal.inputs import (
+    MultiModalFeatureSpec,
+    MultiModalFieldElem,
+    MultiModalKwargsItem,
+    PlaceholderRange,
+)
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+@pytest.fixture(autouse=True, scope="module")
+def _force_cpu_default_device():
+    original = torch.get_default_device()
+    torch.set_default_device("cpu")
+    yield
+    torch.set_default_device(original)
+
+
+@dataclass
+class DummyVisionConfig:
+    spatial_merge_size: int = 2
+
+
+@dataclass
+class DummyConfig:
+    vision_config: DummyVisionConfig = field(default_factory=DummyVisionConfig)
+
+
+def make_model(config: DummyConfig) -> KeyeVL1_5ForConditionalGeneration:
+    model = object.__new__(KeyeVL1_5ForConditionalGeneration)
+    model.config = config
+    return model
+
+
+def make_mm_feature(
+    *,
+    modality: str,
+    offset: int,
+    length: int,
+    grid_thw: tuple[int, int, int] | list[tuple[int, int, int]],
+    is_embed: list[bool] | None = None,
+) -> MultiModalFeatureSpec:
+    field_name = "image_grid_thw" if modality == "image" else "video_grid_thw"
+    return MultiModalFeatureSpec(
+        data=MultiModalKwargsItem(
+            {
+                field_name: MultiModalFieldElem(
+                    data=torch.tensor(grid_thw),
+                    field=None,  # HACK.
+                ),
+            }
+        ),
+        modality=modality,
+        identifier="DUMMY",
+        mm_position=PlaceholderRange(
+            offset=offset,
+            length=length,
+            is_embed=None if is_embed is None else torch.tensor(is_embed),
+        ),
+    )
+
+
+def test_get_mrope_input_positions_text_only():
+    model = make_model(DummyConfig())
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[11, 12, 13, 14, 15],
+        mm_features=[],
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == 0
+
+
+def test_get_mrope_input_positions_single_image():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="image",
+            offset=1,
+            length=4,
+            grid_thw=(1, 4, 4),
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 20, 21, 22, 23, 30, 31],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4],
+            [0, 1, 1, 2, 2, 3, 4],
+            [0, 1, 2, 1, 2, 3, 4],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == -2
+
+
+def test_get_mrope_input_positions_video_uses_embed_ranges():
+    model = make_model(DummyConfig())
+    mm_features = [
+        make_mm_feature(
+            modality="video",
+            offset=1,
+            length=8,
+            grid_thw=[(2, 4, 2)],
+            is_embed=[False, False, True, True, False, False, True, True],
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=[10, 101, 102, 20, 21, 103, 104, 30, 31, 40, 41],
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 3, 3, 5, 6, 7, 7, 9, 10],
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            [0, 1, 2, 3, 3, 5, 6, 7, 7, 9, 10],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    assert delta == 0
diff --git a/tests/model_executor/test_oink_integration.py b/tests/model_executor/test_oink_integration.py
index d7f38fdd5158..2f37472b73ef 100644
--- a/tests/model_executor/test_oink_integration.py
+++ b/tests/model_executor/test_oink_integration.py
@@ -1,60 +1,97 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
+import multiprocessing
 import types
 
 import pytest
-import torch
-
-
-def _load_oink_ops_module():
-    # Import the module normally (vllm is installed as an editable package in CI).
-    from vllm import _oink_ops
-
-    return _oink_ops
-
 
-def test_oink_availability_checks(monkeypatch: pytest.MonkeyPatch):
-    _oink_ops = _load_oink_ops_module()
-
-    # Ensure the ops namespace exists and is mutable for tests.
-    monkeypatch.setattr(
-        torch.ops,
-        "oink",
-        types.SimpleNamespace(rmsnorm=lambda x, w, eps: x),
-        raising=False,
-    )
-
-    # Case 1: CUDA not available.
-    monkeypatch.setattr(torch.cuda, "is_available", lambda: False)
-    assert _oink_ops.is_oink_available_for_device(0) is False
-
-    # Case 2: CUDA available but < SM100.
-    monkeypatch.setattr(torch.cuda, "is_available", lambda: True)
-    monkeypatch.setattr(torch.cuda, "get_device_capability", lambda idx: (9, 0))
-    assert _oink_ops.is_oink_available_for_device(0) is False
-
-    # Case 3: CUDA available and SM100, rmsnorm op registered.
-    monkeypatch.setattr(torch.cuda, "get_device_capability", lambda idx: (10, 0))
-    assert _oink_ops.is_oink_available_for_device(0) is True
-
-    # fused op presence probe
-    assert _oink_ops.has_fused_add_rms_norm() is False
-    monkeypatch.setattr(
-        torch.ops,
-        "oink",
-        types.SimpleNamespace(
-            rmsnorm=lambda x, w, eps: x,
-            fused_add_rms_norm=lambda x, residual, w, eps: None,
+from vllm.platforms import current_platform
+
+
+def _test_oink_availability_impl(
+    device_capability: tuple[int, int],
+    has_rmsnorm: bool,
+    has_fused_add_rms_norm: bool,
+    expected_available: bool,
+    expected_fused: bool,
+) -> None:
+    """Test OINK support detection with mocked state."""
+    import torch
+
+    from vllm import platforms
+
+    # Mock device capability (class method, override on class)
+    dc = platforms.interface.DeviceCapability(*device_capability)
+    platforms.current_platform.__class__.get_device_capability = lambda device_id=0: dc
+
+    # Mock oink ops
+    oink_ops = types.SimpleNamespace()
+    if has_rmsnorm:
+        oink_ops.rmsnorm = lambda x, w, eps: x
+    if has_fused_add_rms_norm:
+        oink_ops.fused_add_rms_norm = lambda x, residual, w, eps: None
+
+    torch.ops.oink = oink_ops
+
+    # Now import vllm modules with mocks in place (fresh import with mocked platform)
+    import vllm.kernels.oink_ops  # noqa: F401
+    from vllm.ir.ops import fused_add_rms_norm, rms_norm
+
+    # Verify support checks
+    assert rms_norm.impls["oink"].supported is expected_available
+    assert fused_add_rms_norm.impls["oink"].supported is expected_fused
+
+
+@pytest.mark.parametrize(
+    "device_capability,has_rmsnorm,has_fused_add_rms_norm,expected_available,expected_fused",
+    [
+        # Case 1: < SM100, ops not supported
+        ((9, 0), True, False, False, False),
+        # Case 2: CUDA available and SM100, rmsnorm op registered
+        ((10, 0), True, False, True, False),
+        # Case 3: SM100 with both rmsnorm and fused_add_rms_norm
+        ((10, 0), True, True, True, True),
+    ],
+)
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Only test on CUDA")
+def test_oink_availability_checks(
+    device_capability: tuple[int, int],
+    has_rmsnorm: bool,
+    has_fused_add_rms_norm: bool,
+    expected_available: bool,
+    expected_fused: bool,
+):
+    """Test OINK support detection with clean import state for each parameter set."""
+
+    # Use spawn to run function in fresh process with clean imports
+    # TODO migrate to spawn utility:
+    # https://github.com/vllm-project/vllm/issues/41415
+    ctx = multiprocessing.get_context("spawn")
+    process = ctx.Process(
+        target=_test_oink_availability_impl,
+        args=(
+            device_capability,
+            has_rmsnorm,
+            has_fused_add_rms_norm,
+            expected_available,
+            expected_fused,
         ),
-        raising=False,
     )
-    assert _oink_ops.has_fused_add_rms_norm() is True
+    process.start()
+    process.join()
+
+    if process.exitcode != 0:
+        raise AssertionError(
+            f"Subprocess test failed with exit code {process.exitcode}"
+        )
 
 
 def test_can_view_as_2d_stride_guard():
-    # Import the helper from the layernorm module.
-    from vllm.model_executor.layers.layernorm import _can_view_as_2d
+    # No global import
+    import torch
+
+    # Import the helper from the kernels module.
+    from vllm.kernels.oink_ops import _can_view_as_2d
 
     x = torch.zeros((2, 3, 4))
     assert _can_view_as_2d(x) is True
diff --git a/tests/model_executor/test_paddleocr_vl_mrope.py b/tests/model_executor/test_paddleocr_vl_mrope.py
new file mode 100644
index 000000000000..9090a9254d00
--- /dev/null
+++ b/tests/model_executor/test_paddleocr_vl_mrope.py
@@ -0,0 +1,210 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass, field
+
+import pytest
+import torch
+
+from vllm.model_executor.models.paddleocr_vl import (
+    PaddleOCRVLForConditionalGeneration,
+)
+from vllm.multimodal.inputs import (
+    MultiModalFeatureSpec,
+    MultiModalFieldElem,
+    MultiModalKwargsItem,
+    PlaceholderRange,
+)
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+@pytest.fixture(autouse=True, scope="module")
+def _force_cpu_default_device():
+    original = torch.get_default_device()
+    torch.set_default_device("cpu")
+    yield
+    torch.set_default_device(original)
+
+
+@dataclass
+class DummyVisionConfig:
+    spatial_merge_size: int = 2
+    patch_size: int = 14
+
+
+@dataclass
+class DummyConfig:
+    image_token_id: int = 151655
+    video_token_id: int = 151654
+    vision_start_token_id: int = 151652
+    vision_end_token_id: int = 151653
+    vision_config: DummyVisionConfig = field(default_factory=DummyVisionConfig)
+
+
+def make_model(config: DummyConfig) -> PaddleOCRVLForConditionalGeneration:
+    model = object.__new__(PaddleOCRVLForConditionalGeneration)
+    model.config = config
+    return model
+
+
+def make_mm_feature(
+    *,
+    offset: int,
+    length: int,
+    image_grid_thw: tuple[int, int, int],
+) -> MultiModalFeatureSpec:
+    return MultiModalFeatureSpec(
+        data=MultiModalKwargsItem(
+            {
+                "image_grid_thw": MultiModalFieldElem(
+                    data=torch.tensor(image_grid_thw),
+                    field=None,
+                ),
+            }
+        ),
+        modality="image",
+        identifier="DUMMY",
+        mm_position=PlaceholderRange(offset=offset, length=length),
+    )
+
+
+def test_get_mrope_input_positions_text_only():
+    model = make_model(DummyConfig())
+    input_tokens = [11, 12, 13, 14, 15]
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=input_tokens,
+        mm_features=[],
+    )
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+        ]
+    )
+    assert torch.equal(positions, expected)
+    assert delta == 0
+
+
+def test_get_mrope_input_positions_single_image():
+    model = make_model(DummyConfig())
+    spatial_merge_size = model.config.vision_config.spatial_merge_size
+
+    t, h, w = 1, 2, 2
+    num_image_tokens = t * h * w
+
+    input_tokens = (
+        [10]
+        + [model.config.vision_start_token_id]
+        + [model.config.image_token_id] * num_image_tokens
+        + [model.config.vision_end_token_id]
+        + [30, 31]
+    )
+
+    mm_features = [
+        make_mm_feature(
+            offset=2,  # 1 (text) + 1 (vision_start)
+            length=num_image_tokens,
+            image_grid_thw=(t, h * spatial_merge_size, w * spatial_merge_size),
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=input_tokens,
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 2, 2, 2, 2, 4, 5, 6],
+            [0, 1, 2, 2, 3, 3, 4, 5, 6],
+            [0, 1, 2, 3, 2, 3, 4, 5, 6],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
+    expected_delta = (positions.max().item() + 1) - len(input_tokens)
+    assert delta == expected_delta
+
+
+def test_get_mrope_input_positions_multiple_images():
+    model = make_model(DummyConfig())
+    spatial_merge_size = model.config.vision_config.spatial_merge_size
+
+    t1, h1, w1 = 1, 2, 2
+    num1 = t1 * h1 * w1
+
+    t2, h2, w2 = 1, 1, 3
+    num2 = t2 * h2 * w2
+
+    input_tokens = (
+        [10]
+        + [model.config.vision_start_token_id]
+        + [model.config.image_token_id] * num1
+        + [model.config.vision_end_token_id]
+        + [20, 21]
+        + [model.config.vision_start_token_id]
+        + [model.config.image_token_id] * num2
+        + [model.config.vision_end_token_id]
+        + [30]
+    )
+
+    mm_features = [
+        make_mm_feature(
+            offset=2,
+            length=num1,
+            image_grid_thw=(t1, h1 * spatial_merge_size, w1 * spatial_merge_size),
+        ),
+        make_mm_feature(
+            offset=2 + num1 + 1 + 2 + 1,
+            length=num2,
+            image_grid_thw=(t2, h2 * spatial_merge_size, w2 * spatial_merge_size),
+        ),
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=input_tokens,
+        mm_features=mm_features,
+    )
+
+    assert positions.shape == (3, 15)
+    assert not torch.equal(positions[:, 2:6], torch.arange(4).expand(3, 4) + 2)
+    assert not torch.equal(positions[:, 10:13], torch.arange(3).expand(3, 3) + 10)
+
+
+def test_get_mrope_input_positions_image_at_start():
+    model = make_model(DummyConfig())
+    spatial_merge_size = model.config.vision_config.spatial_merge_size
+
+    t, h, w = 1, 2, 2
+    num_tokens = t * h * w
+
+    input_tokens = (
+        [model.config.vision_start_token_id]
+        + [model.config.image_token_id] * num_tokens
+        + [model.config.vision_end_token_id]
+        + [10, 11]
+    )
+
+    mm_features = [
+        make_mm_feature(
+            offset=1,  # start token at index 0
+            length=num_tokens,
+            image_grid_thw=(t, h * spatial_merge_size, w * spatial_merge_size),
+        )
+    ]
+
+    positions, delta = model.get_mrope_input_positions(
+        input_tokens=input_tokens,
+        mm_features=mm_features,
+    )
+
+    expected = torch.tensor(
+        [
+            [0, 1, 1, 1, 1, 3, 4, 5],
+            [0, 1, 1, 2, 2, 3, 4, 5],
+            [0, 1, 2, 1, 2, 3, 4, 5],
+        ]
+    )
+
+    assert torch.equal(positions, expected)
diff --git a/tests/model_executor/test_routed_experts_capture.py b/tests/model_executor/test_routed_experts_capture.py
index 45bf4bcac6a8..152feac9e3ae 100644
--- a/tests/model_executor/test_routed_experts_capture.py
+++ b/tests/model_executor/test_routed_experts_capture.py
@@ -1,23 +1,54 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import types
+from types import SimpleNamespace
+from unittest.mock import patch
 
 import pytest
 import torch
 
 from vllm.distributed.eplb.eplb_state import EplbLayerState
 from vllm.model_executor.layers.fused_moe.config import RoutingMethodType
+from vllm.model_executor.layers.fused_moe.routed_experts_capturer import (
+    RoutedExpertsCapturer,
+)
 from vllm.model_executor.layers.fused_moe.router.base_router import BaseRouter
 
 pytestmark = pytest.mark.cpu_test
 
+_REC_MODULE = "vllm.model_executor.layers.fused_moe.routed_experts_capturer"
+
+
+def _capturer_with_buffer(
+    *,
+    max_tokens: int = 8,
+    num_layers: int = 4,
+    num_experts_per_tok: int = 2,
+    dp_rank: int = 0,
+    tp_size: int = 1,
+) -> RoutedExpertsCapturer:
+    # Bypass __init__ so the test can use a CPU buffer and skip the
+    # VllmConfig dependency. The CUDA device-tensor allocation in the
+    # real constructor is not what we are exercising here.
+    c = RoutedExpertsCapturer.__new__(RoutedExpertsCapturer)
+    c.dp_rank = dp_rank
+    c.tp_size = tp_size
+    c.device_buffer = torch.full(
+        (max_tokens, num_layers, num_experts_per_tok),
+        -1,
+        dtype=torch.int32,
+    )
+    return c
+
 
 class DummyRouter(BaseRouter):
     @property
     def routing_method_type(self) -> RoutingMethodType:
         return RoutingMethodType.FUSED_TOPK
 
-    def _compute_routing(self, hidden_states, router_logits, indices_type):
+    def _compute_routing(
+        self, hidden_states, router_logits, indices_type, *, input_ids=None
+    ):
         topk_ids = torch.tensor([[1, 2], [3, 4]], dtype=torch.int64)
         topk_weights = torch.ones_like(topk_ids, dtype=torch.float32)
         return topk_weights, topk_ids
@@ -27,12 +58,11 @@ def _apply_eplb_mapping(self, topk_ids: torch.Tensor) -> torch.Tensor:
         return topk_ids + 10
 
 
-def _make_router() -> DummyRouter:
+def _make_router(eplb_state: EplbLayerState | None = None) -> DummyRouter:
     return DummyRouter(
         top_k=2,
         global_num_experts=16,
-        eplb_state=EplbLayerState(),
-        enable_eplb=False,
+        eplb_state=eplb_state,
         indices_type_getter=None,
     )
 
@@ -57,11 +87,12 @@ def capture_fn(ids):
 
 
 def test_base_router_capture_with_eplb_enabled():
-    router = _make_router()
-    router.enable_eplb = True
-    router.eplb_state.expert_load_view = torch.zeros(32, dtype=torch.int64)
-    router.eplb_state.logical_to_physical_map = torch.arange(32).view(32, 1)
-    router.eplb_state.logical_replica_count = torch.ones(32, dtype=torch.int64)
+    eplb_state = EplbLayerState()
+    eplb_state.expert_load_view = torch.zeros(32, dtype=torch.int64)
+    eplb_state.logical_to_physical_map = torch.arange(32).view(32, 1)
+    eplb_state.logical_replica_count = torch.ones(32, dtype=torch.int64)
+    eplb_state.should_record_tensor = torch.ones((), dtype=torch.bool)
+    router = _make_router(eplb_state=eplb_state)
 
     captured = []
 
@@ -158,3 +189,61 @@ def capture(self, layer_id, topk_ids):
     assert callable(dummy_module.router.capture_fn)
     dummy_module.router.capture_fn(torch.tensor([[9, 10]]))
     assert len(capturer.calls) == 1
+
+
+def test_routed_experts_capturer_single_dp_no_metadata():
+    """dp_metadata is None: capture writes the full topk_ids rows."""
+    capturer = _capturer_with_buffer(dp_rank=0)
+    topk = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=torch.int32)
+    ctx = SimpleNamespace(dp_metadata=None)
+    with patch(f"{_REC_MODULE}.get_forward_context", return_value=ctx):
+        capturer.capture(layer_id=0, topk_ids=topk)
+    assert torch.equal(capturer.device_buffer[:3, 0, :], topk)
+    assert capturer.device_buffer[3, 0, 0].item() == -1
+
+
+def test_routed_experts_capturer_dp_naive_concatenated_all_ranks():
+    """n == sum(num_tokens_dp): slice this rank's segment from concatenated topk."""
+    capturer = _capturer_with_buffer(dp_rank=1)
+    num_tokens_dp = torch.tensor([2, 3], dtype=torch.int32)
+    ctx = SimpleNamespace(
+        dp_metadata=SimpleNamespace(num_tokens_across_dp_cpu=num_tokens_dp)
+    )
+    # Concatenated order: rank0 rows then rank1 rows.
+    topk = torch.tensor(
+        [[0, 1], [2, 3], [10, 11], [12, 13], [14, 15]], dtype=torch.int32
+    )
+    with patch(f"{_REC_MODULE}.get_forward_context", return_value=ctx):
+        capturer.capture(layer_id=0, topk_ids=topk)
+    want = topk[2:5]
+    assert torch.equal(capturer.device_buffer[:3, 0, :], want)
+
+
+def test_routed_experts_capturer_dp_modular_local_tokens():
+    """n == token_num_per_dp: topk is already local to this DP rank."""
+    capturer = _capturer_with_buffer(dp_rank=1)
+    num_tokens_dp = torch.tensor([2, 3], dtype=torch.int32)
+    ctx = SimpleNamespace(
+        dp_metadata=SimpleNamespace(num_tokens_across_dp_cpu=num_tokens_dp)
+    )
+    topk = torch.tensor([[10, 11], [12, 13], [14, 15]], dtype=torch.int32)
+    with patch(f"{_REC_MODULE}.get_forward_context", return_value=ctx):
+        capturer.capture(layer_id=0, topk_ids=topk)
+    assert torch.equal(capturer.device_buffer[:3, 0, :], topk)
+
+
+def test_routed_experts_capturer_dp_unexpected_batch_raises():
+    """Mismatch between topk batch dim and DP layout: fail fast."""
+    capturer = _capturer_with_buffer(dp_rank=0)
+    num_tokens_dp = torch.tensor([2, 3], dtype=torch.int32)
+    ctx = SimpleNamespace(
+        dp_metadata=SimpleNamespace(num_tokens_across_dp_cpu=num_tokens_dp)
+    )
+    # total=5, local=2: n=1 matches neither naive (5) nor modular (2).
+    topk = torch.tensor([[1, 2]], dtype=torch.int32)
+    with (
+        patch(f"{_REC_MODULE}.get_forward_context", return_value=ctx),
+        pytest.raises(AssertionError, match="unexpected topk_ids batch dim"),
+    ):
+        capturer.capture(layer_id=0, topk_ids=topk)
+    assert capturer.device_buffer[0, 0, 0].item() == -1
diff --git a/tests/model_executor/test_weight_utils.py b/tests/model_executor/test_weight_utils.py
index 93535ae0aacd..260ebdcefb3b 100644
--- a/tests/model_executor/test_weight_utils.py
+++ b/tests/model_executor/test_weight_utils.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import os
 import tempfile
 
 import huggingface_hub.constants
@@ -10,26 +9,10 @@
 
 from vllm.model_executor.model_loader.weight_utils import (
     download_weights_from_hf,
-    enable_hf_transfer,
     maybe_remap_kv_scale_name,
 )
 
 
-def test_hf_transfer_auto_activation():
-    if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
-        # in case it is already set, we can't test the auto activation
-        pytest.skip("HF_HUB_ENABLE_HF_TRANSFER is set, can't test auto activation")
-    enable_hf_transfer()
-    try:
-        # enable hf hub transfer if available
-        import hf_transfer  # type: ignore # noqa
-
-        HF_TRANSFER_ACTIVE = True
-    except ImportError:
-        HF_TRANSFER_ACTIVE = False
-    assert huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == HF_TRANSFER_ACTIVE
-
-
 def test_download_weights_from_hf():
     with tempfile.TemporaryDirectory() as tmpdir:
         # assert LocalEntryNotFoundError error is thrown
@@ -178,5 +161,4 @@ def test_missing_target_returns_none(self):
 
 
 if __name__ == "__main__":
-    test_hf_transfer_auto_activation()
     test_download_weights_from_hf()
diff --git a/tests/models/fixtures/ministral_3b_chat.json b/tests/models/fixtures/ministral_3b_chat.json
new file mode 100644
index 000000000000..22dd9527adca
--- /dev/null
+++ b/tests/models/fixtures/ministral_3b_chat.json
@@ -0,0 +1 @@
+[[[4380, 3937, 6122, 1261, 7244, 10575, 18970, 41132, 3923, 1408, 1261, 32656, 4691, 1454, 2246, 22131, 15179, 11521, 17277, 1046, 2], "This image shows a black dog sitting attentively on a wooden surface with its gaze directed straight ahead.", [{"4380": {"logprob": -1.0445597171783447, "rank": 1, "decoded_token": "This"}, "1784": {"logprob": -1.5445597171783447, "rank": 2, "decoded_token": "The"}, "1065": {"logprob": -1.7945597171783447, "rank": 3, "decoded_token": "A"}, "1785": {"logprob": -2.2945597171783447, "rank": 4, "decoded_token": "In"}, "4051": {"logprob": -4.357059478759766, "rank": 5, "decoded_token": "An"}}, {"3937": {"logprob": -0.012832445092499256, "rank": 1, "decoded_token": " image"}, "13083": {"logprob": -6.8253326416015625, "rank": 2, "decoded_token": " picture"}, "16649": {"logprob": -6.8878326416015625, "rank": 3, "decoded_token": " photo"}, "1395": {"logprob": -7.2003326416015625, "rank": 4, "decoded_token": " is"}, "7244": {"logprob": -7.5128326416015625, "rank": 5, "decoded_token": " black"}}, {"6122": {"logprob": -0.2629571557044983, "rank": 1, "decoded_token": " shows"}, "51948": {"logprob": -2.2629570960998535, "rank": 2, "decoded_token": " depicts"}, "6971": {"logprob": -2.5129570960998535, "rank": 3, "decoded_token": " features"}, "25981": {"logprob": -3.6379570960998535, "rank": 4, "decoded_token": " displays"}, "89995": {"logprob": -4.7004570960998535, "rank": 5, "decoded_token": " showc"}}, {"1261": {"logprob": -0.00752826826646924, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -5.132528305053711, "rank": 2, "decoded_token": " an"}, "1278": {"logprob": -6.632528305053711, "rank": 3, "decoded_token": " the"}, "7244": {"logprob": -11.195028305053711, "rank": 4, "decoded_token": " black"}, "44056": {"logprob": -11.757528305053711, "rank": 5, "decoded_token": "\ta"}}, {"7244": {"logprob": -0.19620661437511444, "rank": 1, "decoded_token": " black"}, "8500": {"logprob": -3.446206569671631, "rank": 2, "decoded_token": " dark"}, "4329": {"logprob": -3.446206569671631, "rank": 3, "decoded_token": " large"}, "6231": {"logprob": -3.446206569671631, "rank": 4, "decoded_token": " close"}, "85596": {"logprob": -4.196206569671631, "rank": 5, "decoded_token": " solemn"}}, {"10575": {"logprob": -0.08389955013990402, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -3.14639949798584, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -4.83389949798584, "rank": 3, "decoded_token": " puppy"}, "1044": {"logprob": -5.14639949798584, "rank": 4, "decoded_token": ","}, "9566": {"logprob": -5.58389949798584, "rank": 5, "decoded_token": " medium"}}, {"18970": {"logprob": -1.0850104093551636, "rank": 1, "decoded_token": " sitting"}, "1454": {"logprob": -1.6475104093551636, "rank": 2, "decoded_token": " with"}, "28528": {"logprob": -2.022510528564453, "rank": 3, "decoded_token": " lying"}, "7283": {"logprob": -2.085010528564453, "rank": 4, "decoded_token": " looking"}, "38235": {"logprob": -2.460010528564453, "rank": 5, "decoded_token": " resting"}}, {"41132": {"logprob": -1.2341952323913574, "rank": 1, "decoded_token": " attent"}, "106534": {"logprob": -1.4841952323913574, "rank": 2, "decoded_token": " calmly"}, "1505": {"logprob": -2.2966952323913574, "rank": 3, "decoded_token": " or"}, "17558": {"logprob": -2.3591952323913574, "rank": 4, "decoded_token": " closely"}, "1408": {"logprob": -2.5466952323913574, "rank": 5, "decoded_token": " on"}}, {"3923": {"logprob": -0.005889324937015772, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -6.693389415740967, "rank": 2, "decoded_token": "ive"}, "3929": {"logprob": -6.880889415740967, "rank": 3, "decoded_token": "ently"}, "10980": {"logprob": -7.193389415740967, "rank": 4, "decoded_token": "ibly"}, "14194": {"logprob": -7.943389415740967, "rank": 5, "decoded_token": "antly"}}, {"1408": {"logprob": -0.5112090110778809, "rank": 1, "decoded_token": " on"}, "1454": {"logprob": -1.7612090110778809, "rank": 2, "decoded_token": " with"}, "1321": {"logprob": -2.386209011077881, "rank": 3, "decoded_token": " and"}, "3675": {"logprob": -2.761209011077881, "rank": 4, "decoded_token": " against"}, "1505": {"logprob": -4.198709011077881, "rank": 5, "decoded_token": " or"}}, {"1261": {"logprob": -0.189262256026268, "rank": 1, "decoded_token": " a"}, "2549": {"logprob": -2.1892621517181396, "rank": 2, "decoded_token": " what"}, "32656": {"logprob": -3.8767621517181396, "rank": 3, "decoded_token": " wooden"}, "17253": {"logprob": -4.626762390136719, "rank": 4, "decoded_token": " weather"}, "3403": {"logprob": -4.751762390136719, "rank": 5, "decoded_token": " text"}}, {"32656": {"logprob": -0.9549442529678345, "rank": 1, "decoded_token": " wooden"}, "3403": {"logprob": -1.2049442529678345, "rank": 2, "decoded_token": " text"}, "17253": {"logprob": -1.8924442529678345, "rank": 3, "decoded_token": " weather"}, "44130": {"logprob": -2.267444133758545, "rank": 4, "decoded_token": " rust"}, "16673": {"logprob": -4.142444133758545, "rank": 5, "decoded_token": " rough"}}, {"4691": {"logprob": -0.2555857002735138, "rank": 1, "decoded_token": " surface"}, "3403": {"logprob": -2.6930856704711914, "rank": 2, "decoded_token": " text"}, "1615": {"logprob": -2.8805856704711914, "rank": 3, "decoded_token": " pl"}, "1044": {"logprob": -4.193085670471191, "rank": 4, "decoded_token": ","}, "26228": {"logprob": -4.380585670471191, "rank": 5, "decoded_token": " texture"}}, {"1454": {"logprob": -0.5042062997817993, "rank": 1, "decoded_token": " with"}, "1044": {"logprob": -1.6292062997817993, "rank": 2, "decoded_token": ","}, "1046": {"logprob": -2.2542061805725098, "rank": 3, "decoded_token": "."}, "7283": {"logprob": -3.7542061805725098, "rank": 4, "decoded_token": " looking"}, "1338": {"logprob": -4.44170618057251, "rank": 5, "decoded_token": ".\n\n"}}, {"2246": {"logprob": -1.1512703895568848, "rank": 1, "decoded_token": " its"}, "1261": {"logprob": -1.2137703895568848, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -2.3387703895568848, "rank": 3, "decoded_token": " an"}, "9924": {"logprob": -2.4637703895568848, "rank": 4, "decoded_token": " wide"}, "12593": {"logprob": -3.7762703895568848, "rank": 5, "decoded_token": " slightly"}}, {"22131": {"logprob": -1.0600039958953857, "rank": 1, "decoded_token": " gaze"}, "5731": {"logprob": -1.4350039958953857, "rank": 2, "decoded_token": " eyes"}, "9924": {"logprob": -2.7475039958953857, "rank": 3, "decoded_token": " wide"}, "14781": {"logprob": -2.9350039958953857, "rank": 4, "decoded_token": " focused"}, "3518": {"logprob": -3.1225039958953857, "rank": 5, "decoded_token": " head"}}, {"15179": {"logprob": -1.0535285472869873, "rank": 1, "decoded_token": " directed"}, "9247": {"logprob": -1.5535285472869873, "rank": 2, "decoded_token": " fixed"}, "14781": {"logprob": -2.4285285472869873, "rank": 3, "decoded_token": " focused"}, "7283": {"logprob": -2.6785285472869873, "rank": 4, "decoded_token": " looking"}, "12593": {"logprob": -2.7410285472869873, "rank": 5, "decoded_token": " slightly"}}, {"11521": {"logprob": -1.4163023233413696, "rank": 1, "decoded_token": " straight"}, "40022": {"logprob": -1.6038023233413696, "rank": 2, "decoded_token": " upward"}, "74606": {"logprob": -1.6663023233413696, "rank": 3, "decoded_token": " upwards"}, "12593": {"logprob": -2.16630220413208, "rank": 4, "decoded_token": " slightly"}, "8848": {"logprob": -2.16630220413208, "rank": 5, "decoded_token": " forward"}}, {"17277": {"logprob": -0.20115239918231964, "rank": 1, "decoded_token": " ahead"}, "8848": {"logprob": -2.4511523246765137, "rank": 2, "decoded_token": " forward"}, "1513": {"logprob": -2.9511523246765137, "rank": 3, "decoded_token": " at"}, "1046": {"logprob": -4.576152324676514, "rank": 4, "decoded_token": "."}, "8994": {"logprob": -4.826152324676514, "rank": 5, "decoded_token": " towards"}}, {"1046": {"logprob": -0.1819322109222412, "rank": 1, "decoded_token": "."}, "1338": {"logprob": -2.619432210922241, "rank": 2, "decoded_token": ".\n\n"}, "1321": {"logprob": -3.119432210922241, "rank": 3, "decoded_token": " and"}, "1044": {"logprob": -4.05693244934082, "rank": 4, "decoded_token": ","}, "1626": {"logprob": -5.18193244934082, "rank": 5, "decoded_token": ".\n"}}, {"2": {"logprob": -2.0367233753204346, "rank": 1, "decoded_token": "</s>"}, "35": {"logprob": -4.6617231369018555, "rank": 2, "decoded_token": "[/THINK]"}, "9": {"logprob": -4.9742231369018555, "rank": 3, "decoded_token": "[TOOL_CALLS]"}, "108349": {"logprob": -5.4742231369018555, "rank": 4, "decoded_token": "\u305d\u306e\u4e00\u65b9\u3067"}, "32": {"logprob": -5.8492231369018555, "rank": 5, "decoded_token": "[ARGS]"}}]], [[1784, 2158, 3937, 6122, 1261, 7244, 10575, 18970, 41132, 3923, 1408, 1261, 32656, 4691, 1454, 1261, 26517, 1321, 14781, 4818, 1338, 1784, 2667, 3937, 51948, 1261, 10726, 1290, 3719, 1307, 122203, 1044, 23745, 3591, 13194, 24361, 27469, 1294, 1278, 7786, 1454, 1295, 3506, 11223, 47260, 1408, 1278, 61263, 1046, 2], "The first image shows a black dog sitting attentively on a wooden surface with a calm and focused expression.\n\nThe second image depicts a scenic view of rugged, snow-capped mountain peaks in the distance with lush green vegetation on the slopes.", [{"1784": {"logprob": -0.6781838536262512, "rank": 1, "decoded_token": "The"}, "1049": {"logprob": -1.1781837940216064, "rank": 2, "decoded_token": "1"}, "69957": {"logprob": -4.0531840324401855, "rank": 3, "decoded_token": "Sure"}, "11745": {"logprob": -4.6781840324401855, "rank": 4, "decoded_token": "Here"}, "1785": {"logprob": -5.1781840324401855, "rank": 5, "decoded_token": "In"}}, {"2158": {"logprob": -0.17460788786411285, "rank": 1, "decoded_token": " first"}, "3937": {"logprob": -2.612107992172241, "rank": 2, "decoded_token": " image"}, "8061": {"logprob": -4.299607753753662, "rank": 3, "decoded_token": " images"}, "7244": {"logprob": -5.487107753753662, "rank": 4, "decoded_token": " black"}, "5662": {"logprob": -5.549607753753662, "rank": 5, "decoded_token": " provided"}}, {"3937": {"logprob": -0.014750940725207329, "rank": 1, "decoded_token": " image"}, "13083": {"logprob": -5.889750957489014, "rank": 2, "decoded_token": " picture"}, "2016": {"logprob": -7.264750957489014, "rank": 3, "decoded_token": " set"}, "16649": {"logprob": -7.764750957489014, "rank": 4, "decoded_token": " photo"}, "1877": {"logprob": -7.764750957489014, "rank": 5, "decoded_token": ":\n"}}, {"6122": {"logprob": -0.29441142082214355, "rank": 1, "decoded_token": " shows"}, "51948": {"logprob": -2.2944114208221436, "rank": 2, "decoded_token": " depicts"}, "1877": {"logprob": -3.4194114208221436, "rank": 3, "decoded_token": ":\n"}, "1395": {"logprob": -3.5444114208221436, "rank": 4, "decoded_token": " is"}, "25981": {"logprob": -3.7319114208221436, "rank": 5, "decoded_token": " displays"}}, {"1261": {"logprob": -0.05616755038499832, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -4.0561676025390625, "rank": 2, "decoded_token": " an"}, "1877": {"logprob": -5.8061676025390625, "rank": 3, "decoded_token": ":\n"}, "1278": {"logprob": -7.1186676025390625, "rank": 4, "decoded_token": " the"}, "20806": {"logprob": -7.6186676025390625, "rank": 5, "decoded_token": "\uff1a\n"}}, {"7244": {"logprob": -0.3175433278083801, "rank": 1, "decoded_token": " black"}, "4329": {"logprob": -2.4425432682037354, "rank": 2, "decoded_token": " large"}, "8500": {"logprob": -3.3175432682037354, "rank": 3, "decoded_token": " dark"}, "6231": {"logprob": -3.6925432682037354, "rank": 4, "decoded_token": " close"}, "85596": {"logprob": -3.7550432682037354, "rank": 5, "decoded_token": " solemn"}}, {"10575": {"logprob": -0.1241316869854927, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -2.624131679534912, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -4.686631679534912, "rank": 3, "decoded_token": " puppy"}, "15812": {"logprob": -5.436631679534912, "rank": 4, "decoded_token": " Lab"}, "1044": {"logprob": -5.561631679534912, "rank": 5, "decoded_token": ","}}, {"18970": {"logprob": -0.9899296164512634, "rank": 1, "decoded_token": " sitting"}, "7283": {"logprob": -1.8649296760559082, "rank": 2, "decoded_token": " looking"}, "28528": {"logprob": -2.114929676055908, "rank": 3, "decoded_token": " lying"}, "1454": {"logprob": -2.364929676055908, "rank": 4, "decoded_token": " with"}, "38235": {"logprob": -2.489929676055908, "rank": 5, "decoded_token": " resting"}}, {"41132": {"logprob": -1.1735738515853882, "rank": 1, "decoded_token": " attent"}, "106534": {"logprob": -1.7360738515853882, "rank": 2, "decoded_token": " calmly"}, "1408": {"logprob": -1.9235738515853882, "rank": 3, "decoded_token": " on"}, "1505": {"logprob": -2.8610739707946777, "rank": 4, "decoded_token": " or"}, "38263": {"logprob": -3.4860739707946777, "rank": 5, "decoded_token": " quietly"}}, {"3923": {"logprob": -0.011853850446641445, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -5.6368536949157715, "rank": 2, "decoded_token": "ive"}, "3929": {"logprob": -6.6993536949157715, "rank": 3, "decoded_token": "ently"}, "10980": {"logprob": -6.6993536949157715, "rank": 4, "decoded_token": "ibly"}, "14194": {"logprob": -7.2618536949157715, "rank": 5, "decoded_token": "antly"}}, {"1408": {"logprob": -0.3410560190677643, "rank": 1, "decoded_token": " on"}, "1454": {"logprob": -2.4660561084747314, "rank": 2, "decoded_token": " with"}, "1321": {"logprob": -2.9660561084747314, "rank": 3, "decoded_token": " and"}, "3675": {"logprob": -3.2160561084747314, "rank": 4, "decoded_token": " against"}, "25644": {"logprob": -3.7785561084747314, "rank": 5, "decoded_token": " beside"}}, {"1261": {"logprob": -0.16969695687294006, "rank": 1, "decoded_token": " a"}, "2549": {"logprob": -2.7946970462799072, "rank": 2, "decoded_token": " what"}, "32656": {"logprob": -2.9821970462799072, "rank": 3, "decoded_token": " wooden"}, "3403": {"logprob": -5.107196807861328, "rank": 4, "decoded_token": " text"}, "3977": {"logprob": -5.169696807861328, "rank": 5, "decoded_token": " top"}}, {"32656": {"logprob": -0.49769073724746704, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -1.8726906776428223, "rank": 2, "decoded_token": " rust"}, "3403": {"logprob": -2.2476906776428223, "rank": 3, "decoded_token": " text"}, "17253": {"logprob": -2.9976906776428223, "rank": 4, "decoded_token": " weather"}, "16673": {"logprob": -3.8726906776428223, "rank": 5, "decoded_token": " rough"}}, {"4691": {"logprob": -0.42273157835006714, "rank": 1, "decoded_token": " surface"}, "1615": {"logprob": -1.985231637954712, "rank": 2, "decoded_token": " pl"}, "11237": {"logprob": -3.735231637954712, "rank": 3, "decoded_token": " floor"}, "26808": {"logprob": -3.735231637954712, "rank": 4, "decoded_token": " bench"}, "3403": {"logprob": -3.860231637954712, "rank": 5, "decoded_token": " text"}}, {"1454": {"logprob": -0.8123087286949158, "rank": 1, "decoded_token": " with"}, "1338": {"logprob": -1.5623087882995605, "rank": 2, "decoded_token": ".\n\n"}, "1626": {"logprob": -2.3748087882995605, "rank": 3, "decoded_token": ".\n"}, "7283": {"logprob": -2.8123087882995605, "rank": 4, "decoded_token": " looking"}, "1044": {"logprob": -3.5623087882995605, "rank": 5, "decoded_token": ","}}, {"1261": {"logprob": -1.0109002590179443, "rank": 1, "decoded_token": " a"}, "2246": {"logprob": -1.1984002590179443, "rank": 2, "decoded_token": " its"}, "1420": {"logprob": -2.0109002590179443, "rank": 3, "decoded_token": " an"}, "9924": {"logprob": -3.9484002590179443, "rank": 4, "decoded_token": " wide"}, "14781": {"logprob": -4.698400497436523, "rank": 5, "decoded_token": " focused"}}, {"26517": {"logprob": -2.2192542552948, "rank": 1, "decoded_token": " calm"}, "14781": {"logprob": -2.4067542552948, "rank": 2, "decoded_token": " focused"}, "11304": {"logprob": -2.6567542552948, "rank": 3, "decoded_token": " serious"}, "29691": {"logprob": -2.7192542552948, "rank": 4, "decoded_token": " contempl"}, "12593": {"logprob": -3.1567542552948, "rank": 5, "decoded_token": " slightly"}}, {"1321": {"logprob": -0.8886916637420654, "rank": 1, "decoded_token": " and"}, "4818": {"logprob": -1.0136916637420654, "rank": 2, "decoded_token": " expression"}, "1044": {"logprob": -2.2636916637420654, "rank": 3, "decoded_token": ","}, "22131": {"logprob": -3.1386916637420654, "rank": 4, "decoded_token": " gaze"}, "1311": {"logprob": -3.9511916637420654, "rank": 5, "decoded_token": " de"}}, {"14781": {"logprob": -1.208945870399475, "rank": 1, "decoded_token": " focused"}, "38462": {"logprob": -1.583945870399475, "rank": 2, "decoded_token": " curious"}, "97680": {"logprob": -2.9589457511901855, "rank": 3, "decoded_token": " thoughtful"}, "11304": {"logprob": -2.9589457511901855, "rank": 4, "decoded_token": " serious"}, "29691": {"logprob": -3.3964457511901855, "rank": 5, "decoded_token": " contempl"}}, {"4818": {"logprob": -0.16998574137687683, "rank": 1, "decoded_token": " expression"}, "22131": {"logprob": -2.669985771179199, "rank": 2, "decoded_token": " gaze"}, "2985": {"logprob": -3.232485771179199, "rank": 3, "decoded_token": " look"}, "1311": {"logprob": -3.919985771179199, "rank": 4, "decoded_token": " de"}, "13988": {"logprob": -4.982485771179199, "rank": 5, "decoded_token": " appearance"}}, {"1338": {"logprob": -0.4974508285522461, "rank": 1, "decoded_token": ".\n\n"}, "1626": {"logprob": -1.372450828552246, "rank": 2, "decoded_token": ".\n"}, "1046": {"logprob": -3.497450828552246, "rank": 3, "decoded_token": "."}, "1044": {"logprob": -4.122450828552246, "rank": 4, "decoded_token": ","}, "12560": {"logprob": -4.934950828552246, "rank": 5, "decoded_token": "\u0589\n\n"}}, {"1784": {"logprob": -0.05534925311803818, "rank": 1, "decoded_token": "The"}, "1785": {"logprob": -4.305349349975586, "rank": 2, "decoded_token": "In"}, "6958": {"logprob": -5.617849349975586, "rank": 3, "decoded_token": "There"}, "84593": {"logprob": -5.617849349975586, "rank": 4, "decoded_token": "_The"}, "4393": {"logprob": -6.180349349975586, "rank": 5, "decoded_token": "For"}}, {"2667": {"logprob": -0.035550788044929504, "rank": 1, "decoded_token": " second"}, "3937": {"logprob": -6.160550594329834, "rank": 2, "decoded_token": " image"}, "13023": {"logprob": -6.348050594329834, "rank": 3, "decoded_token": "second"}, "6360": {"logprob": -7.660550594329834, "rank": 4, "decoded_token": " description"}, "2158": {"logprob": -7.785550594329834, "rank": 5, "decoded_token": " first"}}, {"3937": {"logprob": -0.045355767011642456, "rank": 1, "decoded_token": " image"}, "13083": {"logprob": -5.857855796813965, "rank": 2, "decoded_token": " picture"}, "16649": {"logprob": -6.295355796813965, "rank": 3, "decoded_token": " photo"}, "2016": {"logprob": -6.482855796813965, "rank": 4, "decoded_token": " set"}, "5662": {"logprob": -6.857855796813965, "rank": 5, "decoded_token": " provided"}}, {"51948": {"logprob": -0.5656330585479736, "rank": 1, "decoded_token": " depicts"}, "25981": {"logprob": -1.8156330585479736, "rank": 2, "decoded_token": " displays"}, "66583": {"logprob": -2.5031330585479736, "rank": 3, "decoded_token": " captures"}, "6971": {"logprob": -2.7531330585479736, "rank": 4, "decoded_token": " features"}, "1395": {"logprob": -3.3781330585479736, "rank": 5, "decoded_token": " is"}}, {"1261": {"logprob": -0.13412977755069733, "rank": 1, "decoded_token": " a"}, "122203": {"logprob": -2.946629762649536, "rank": 2, "decoded_token": " rugged"}, "1420": {"logprob": -3.821629762649536, "rank": 3, "decoded_token": " an"}, "10726": {"logprob": -4.946630001068115, "rank": 4, "decoded_token": " scen"}, "13770": {"logprob": -5.196630001068115, "rank": 5, "decoded_token": " maj"}}, {"10726": {"logprob": -0.7839541435241699, "rank": 1, "decoded_token": " scen"}, "37849": {"logprob": -2.47145414352417, "rank": 2, "decoded_token": " breat"}, "122203": {"logprob": -2.72145414352417, "rank": 3, "decoded_token": " rugged"}, "23874": {"logprob": -2.72145414352417, "rank": 4, "decoded_token": " pictures"}, "15375": {"logprob": -3.47145414352417, "rank": 5, "decoded_token": " vast"}}, {"1290": {"logprob": -0.000259365770034492, "rank": 1, "decoded_token": "ic"}, "2981": {"logprob": -9.187759399414062, "rank": 2, "decoded_token": "ically"}, "1702": {"logprob": -11.250259399414062, "rank": 3, "decoded_token": "ice"}, "16832": {"logprob": -12.375259399414062, "rank": 4, "decoded_token": "...\n"}, "1685": {"logprob": -12.500259399414062, "rank": 5, "decoded_token": "ical"}}, {"3719": {"logprob": -0.9477202892303467, "rank": 1, "decoded_token": " view"}, "24361": {"logprob": -1.3227202892303467, "rank": 2, "decoded_token": " mountain"}, "127945": {"logprob": -1.9477202892303467, "rank": 3, "decoded_token": " mountainous"}, "1044": {"logprob": -3.0102202892303467, "rank": 4, "decoded_token": ","}, "28035": {"logprob": -3.0727202892303467, "rank": 5, "decoded_token": " landscape"}}, {"1307": {"logprob": -0.030216755345463753, "rank": 1, "decoded_token": " of"}, "1562": {"logprob": -4.217716693878174, "rank": 2, "decoded_token": " from"}, "24018": {"logprob": -5.717716693878174, "rank": 3, "decoded_token": " featuring"}, "11050": {"logprob": -6.217716693878174, "rank": 4, "decoded_token": " showing"}, "2015": {"logprob": -6.280216693878174, "rank": 5, "decoded_token": " up"}}, {"122203": {"logprob": -1.1189241409301758, "rank": 1, "decoded_token": " rugged"}, "1261": {"logprob": -1.6189241409301758, "rank": 2, "decoded_token": " a"}, "127945": {"logprob": -2.681424140930176, "rank": 3, "decoded_token": " mountainous"}, "11223": {"logprob": -2.931424140930176, "rank": 4, "decoded_token": " green"}, "23745": {"logprob": -3.056424140930176, "rank": 5, "decoded_token": " snow"}}, {"1044": {"logprob": -1.033378005027771, "rank": 1, "decoded_token": ","}, "24361": {"logprob": -1.158378005027771, "rank": 2, "decoded_token": " mountain"}, "35463": {"logprob": -1.658378005027771, "rank": 3, "decoded_token": " mountains"}, "127945": {"logprob": -2.2833781242370605, "rank": 4, "decoded_token": " mountainous"}, "1321": {"logprob": -4.9708781242370605, "rank": 5, "decoded_token": " and"}}, {"23745": {"logprob": -0.8830229043960571, "rank": 1, "decoded_token": " snow"}, "127945": {"logprob": -1.8830229043960571, "rank": 2, "decoded_token": " mountainous"}, "11223": {"logprob": -1.8830229043960571, "rank": 3, "decoded_token": " green"}, "1394": {"logprob": -2.5705227851867676, "rank": 4, "decoded_token": " for"}, "95746": {"logprob": -3.5080227851867676, "rank": 5, "decoded_token": " rocky"}}, {"3591": {"logprob": -0.5006332397460938, "rank": 1, "decoded_token": "-c"}, "114525": {"logprob": -1.1256332397460938, "rank": 2, "decoded_token": "-covered"}, "18928": {"logprob": -3.5006332397460938, "rank": 3, "decoded_token": "-top"}, "1099": {"logprob": -4.500633239746094, "rank": 4, "decoded_token": "c"}, "24263": {"logprob": -4.938133239746094, "rank": 5, "decoded_token": "-cl"}}, {"13194": {"logprob": -0.007475971709936857, "rank": 1, "decoded_token": "apped"}, "36649": {"logprob": -6.132475852966309, "rank": 2, "decoded_token": "rowned"}, "13234": {"logprob": -6.694975852966309, "rank": 3, "decoded_token": "aped"}, "10261": {"logprob": -6.819975852966309, "rank": 4, "decoded_token": "rest"}, "4681": {"logprob": -7.757475852966309, "rank": 5, "decoded_token": "ored"}}, {"24361": {"logprob": -0.6446366906166077, "rank": 1, "decoded_token": " mountain"}, "35463": {"logprob": -0.8946366906166077, "rank": 2, "decoded_token": " mountains"}, "127945": {"logprob": -3.394636631011963, "rank": 3, "decoded_token": " mountainous"}, "116555": {"logprob": -4.894636631011963, "rank": 4, "decoded_token": " alpine"}, "1321": {"logprob": -4.894636631011963, "rank": 5, "decoded_token": " and"}}, {"27469": {"logprob": -0.12543931603431702, "rank": 1, "decoded_token": " peaks"}, "26236": {"logprob": -2.625439405441284, "rank": 2, "decoded_token": " ranges"}, "103398": {"logprob": -3.937939405441284, "rank": 3, "decoded_token": " ridges"}, "24765": {"logprob": -4.875439167022705, "rank": 4, "decoded_token": " terrain"}, "84497": {"logprob": -5.187939167022705, "rank": 5, "decoded_token": " landscapes"}}, {"1294": {"logprob": -1.4923679828643799, "rank": 1, "decoded_token": " in"}, "1454": {"logprob": -1.8673679828643799, "rank": 2, "decoded_token": " with"}, "29817": {"logprob": -2.55486798286438, "rank": 3, "decoded_token": " surrounded"}, "26619": {"logprob": -2.61736798286438, "rank": 4, "decoded_token": " rising"}, "1321": {"logprob": -2.61736798286438, "rank": 5, "decoded_token": " and"}}, {"1278": {"logprob": -0.3165818154811859, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.4415818452835083, "rank": 2, "decoded_token": " a"}, "1420": {"logprob": -4.316581726074219, "rank": 3, "decoded_token": " an"}, "5561": {"logprob": -5.754081726074219, "rank": 4, "decoded_token": " various"}, "2549": {"logprob": -6.066581726074219, "rank": 5, "decoded_token": " what"}}, {"7786": {"logprob": -0.5495827198028564, "rank": 1, "decoded_token": " distance"}, "7042": {"logprob": -1.0495827198028564, "rank": 2, "decoded_token": " background"}, "30594": {"logprob": -3.1745827198028564, "rank": 3, "decoded_token": " distant"}, "115381": {"logprob": -5.237082481384277, "rank": 4, "decoded_token": " Alps"}, "92504": {"logprob": -5.237082481384277, "rank": 5, "decoded_token": " backdrop"}}, {"1454": {"logprob": -0.6429675817489624, "rank": 1, "decoded_token": " with"}, "1044": {"logprob": -2.205467700958252, "rank": 2, "decoded_token": ","}, "3675": {"logprob": -2.330467700958252, "rank": 3, "decoded_token": " against"}, "1046": {"logprob": -3.330467700958252, "rank": 4, "decoded_token": "."}, "2136": {"logprob": -3.392967700958252, "rank": 5, "decoded_token": " over"}}, {"1295": {"logprob": -1.4274311065673828, "rank": 1, "decoded_token": " l"}, "1261": {"logprob": -1.6774311065673828, "rank": 2, "decoded_token": " a"}, "11223": {"logprob": -1.9274311065673828, "rank": 3, "decoded_token": " green"}, "47147": {"logprob": -3.239931106567383, "rank": 4, "decoded_token": " steep"}, "50373": {"logprob": -3.302431106567383, "rank": 5, "decoded_token": " patches"}}, {"3506": {"logprob": -0.0006933192489668727, "rank": 1, "decoded_token": "ush"}, "16938": {"logprob": -8.563193321228027, "rank": 2, "decoded_token": "usher"}, "1374": {"logprob": -9.188193321228027, "rank": 3, "decoded_token": "us"}, "90716": {"logprob": -9.563193321228027, "rank": 4, "decoded_token": "USH"}, "5245": {"logprob": -9.688193321228027, "rank": 5, "decoded_token": "acy"}}, {"11223": {"logprob": -0.5139672160148621, "rank": 1, "decoded_token": " green"}, "1044": {"logprob": -1.5764672756195068, "rank": 2, "decoded_token": ","}, "4174": {"logprob": -2.451467275619507, "rank": 3, "decoded_token": " gre"}, "47260": {"logprob": -3.451467275619507, "rank": 4, "decoded_token": " vegetation"}, "1394": {"logprob": -4.763967037200928, "rank": 5, "decoded_token": " for"}}, {"47260": {"logprob": -1.0371246337890625, "rank": 1, "decoded_token": " vegetation"}, "61263": {"logprob": -1.8496246337890625, "rank": 2, "decoded_token": " slopes"}, "94549": {"logprob": -1.9121246337890625, "rank": 3, "decoded_token": " valleys"}, "50373": {"logprob": -3.4121246337890625, "rank": 4, "decoded_token": " patches"}, "4953": {"logprob": -3.5371246337890625, "rank": 5, "decoded_token": " lower"}}, {"1408": {"logprob": -1.3317866325378418, "rank": 1, "decoded_token": " on"}, "1294": {"logprob": -1.7067866325378418, "rank": 2, "decoded_token": " in"}, "1321": {"logprob": -2.394286632537842, "rank": 3, "decoded_token": " and"}, "5956": {"logprob": -2.644286632537842, "rank": 4, "decoded_token": " below"}, "1513": {"logprob": -2.706786632537842, "rank": 5, "decoded_token": " at"}}, {"1278": {"logprob": -0.6638099551200867, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -2.1638100147247314, "rank": 2, "decoded_token": " a"}, "47147": {"logprob": -2.6013100147247314, "rank": 3, "decoded_token": " steep"}, "4953": {"logprob": -2.9138100147247314, "rank": 4, "decoded_token": " lower"}, "95746": {"logprob": -3.0388100147247314, "rank": 5, "decoded_token": " rocky"}}, {"61263": {"logprob": -1.3242369890213013, "rank": 1, "decoded_token": " slopes"}, "95746": {"logprob": -2.3242368698120117, "rank": 2, "decoded_token": " rocky"}, "4953": {"logprob": -2.3867368698120117, "rank": 3, "decoded_token": " lower"}, "79831": {"logprob": -2.7617368698120117, "rank": 4, "decoded_token": " foreground"}, "47147": {"logprob": -2.8867368698120117, "rank": 5, "decoded_token": " steep"}}, {"1046": {"logprob": -0.7772958278656006, "rank": 1, "decoded_token": "."}, "5956": {"logprob": -1.1522958278656006, "rank": 2, "decoded_token": " below"}, "1294": {"logprob": -2.3397958278656006, "rank": 3, "decoded_token": " in"}, "1321": {"logprob": -3.5897958278656006, "rank": 4, "decoded_token": " and"}, "1307": {"logprob": -4.27729606628418, "rank": 5, "decoded_token": " of"}}, {"2": {"logprob": -0.031993232667446136, "rank": 1, "decoded_token": "</s>"}, "3730": {"logprob": -7.281993389129639, "rank": 2, "decoded_token": " There"}, "1531": {"logprob": -7.281993389129639, "rank": 3, "decoded_token": " The"}, "2409": {"logprob": -8.65699291229248, "rank": 4, "decoded_token": " This"}, "2157": {"logprob": -8.65699291229248, "rank": 5, "decoded_token": " It"}}]], [[1049, 1046, 1531, 2158, 3937, 6122, 1261, 7244, 10575, 18970, 41132, 3923, 1408, 1261, 32656, 4691, 1626, 1256, 1462, 1531, 2667, 3937, 1319, 3715, 4326, 1294, 2143, 4098, 1041, 10249, 1317, 1402, 1261, 24361, 28035, 1454, 122203, 24765, 1321, 30594, 27469, 1338, 1050, 1046, 1531, 5888, 3937, 51948, 1261, 2169, 2509, 29397, 13327, 1454, 26905, 22140, 11981, 1278, 46422, 1321, 1261, 92731, 2965, 19710, 4837, 1278, 46422, 1338, 1051, 1046, 1531, 12432, 3937, 6971, 1261, 53301, 59396, 3549, 121040, 1536, 23170, 1321, 16429, 1294, 1261, 23874, 1872, 41730, 9436, 1338, 1052, 1046, 1531, 19723, 3937, 1319, 3715, 24512, 1435, 1651, 1278, 5719, 4546, 1041, 2168, 1402, 1278, 1925, 1454, 1278, 10575, 2790, 1044, 1809, 4136, 1494, 1681, 5314, 5055, 1044, 3226, 2190, 1261, 2801, 6468, 1693, 1729, 3369, 1278, 3629, 75275, 1877, 1256, 1462, 1531, 5719, 5662, 3937, 1395, 1261, 7244, 10575, 7283, 2015, 1454, 1261, 26517, 4818, 1408, 1261, 44130, 1290, 32656, 1615, 2395, 7042, 1338, 2892, 38695, 1044, 3226, 2190, 1278, 6298, 6360, 1394, 1278, 5662, 7244, 10575, 3937, 1877, 1065, 7244, 10575, 11589, 1264, 1935, 3929, 40022, 1454, 1261, 26517, 4818, 1408, 1261, 44130, 1290, 32656, 1615, 2395, 7042, 1046, 2], "1. The first image shows a black dog sitting attentively on a wooden surface.\n   - The second image (not shown in your question) appears to be a mountain landscape with rugged terrain and distant peaks.\n\n2. The third image depicts a serene beach scene with gentle waves meeting the shore and a lone person walking along the shore.\n\n3. The fourth image features a winding gravel path bordered by grass and trees in a picturesque outdoor setting.\n\n4. The fifth image (not applicable as per the initial request) would be the one with the dog again, but since it's already described, here\u2019s a different focus if we consider the following hypothetical:\n   - The initial provided image is a black dog looking up with a calm expression on a rustic wooden plank background.\n\nTo clarify, here\u2019s the correct description for the provided black dog image:\nA black dog gazes intently upward with a calm expression on a rustic wooden plank background.", [{"1049": {"logprob": -0.3098178803920746, "rank": 1, "decoded_token": "1"}, "1784": {"logprob": -2.1848177909851074, "rank": 2, "decoded_token": "The"}, "11745": {"logprob": -2.6223177909851074, "rank": 3, "decoded_token": "Here"}, "2757": {"logprob": -6.059817790985107, "rank": 4, "decoded_token": "It"}, "69957": {"logprob": -6.122317790985107, "rank": 5, "decoded_token": "Sure"}}, {"1046": {"logprob": -0.16265615820884705, "rank": 1, "decoded_token": "."}, "1626": {"logprob": -6.475156307220459, "rank": 2, "decoded_token": ".\n"}, "1314": {"logprob": -6.975156307220459, "rank": 3, "decoded_token": "st"}, "1319": {"logprob": -7.412656307220459, "rank": 4, "decoded_token": " ("}, "27": {"logprob": -7.725156307220459, "rank": 5, "decoded_token": "<SPECIAL_27>"}}, {"1531": {"logprob": -0.5334714651107788, "rank": 1, "decoded_token": " The"}, "11967": {"logprob": -3.9709715843200684, "rank": 2, "decoded_token": " Image"}, "1349": {"logprob": -3.9709715843200684, "rank": 3, "decoded_token": " A"}, "7610": {"logprob": -3.9709715843200684, "rank": 4, "decoded_token": " First"}, "2048": {"logprob": -4.345971584320068, "rank": 5, "decoded_token": " An"}}, {"2158": {"logprob": -0.4163813889026642, "rank": 1, "decoded_token": " first"}, "3937": {"logprob": -1.2913813591003418, "rank": 2, "decoded_token": " image"}, "7244": {"logprob": -3.853881359100342, "rank": 3, "decoded_token": " black"}, "13083": {"logprob": -4.166381359100342, "rank": 4, "decoded_token": " picture"}, "16649": {"logprob": -5.103881359100342, "rank": 5, "decoded_token": " photo"}}, {"3937": {"logprob": -0.09788376092910767, "rank": 1, "decoded_token": " image"}, "13083": {"logprob": -3.785383701324463, "rank": 2, "decoded_token": " picture"}, "2016": {"logprob": -4.410383701324463, "rank": 3, "decoded_token": " set"}, "1319": {"logprob": -4.472883701324463, "rank": 4, "decoded_token": " ("}, "5662": {"logprob": -4.847883701324463, "rank": 5, "decoded_token": " provided"}}, {"6122": {"logprob": -0.3766213655471802, "rank": 1, "decoded_token": " shows"}, "1395": {"logprob": -2.1266212463378906, "rank": 2, "decoded_token": " is"}, "51948": {"logprob": -2.6891212463378906, "rank": 3, "decoded_token": " depicts"}, "6971": {"logprob": -3.4391212463378906, "rank": 4, "decoded_token": " features"}, "1058": {"logprob": -3.5641212463378906, "rank": 5, "decoded_token": ":"}}, {"1261": {"logprob": -0.04542229697108269, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -3.170422315597534, "rank": 2, "decoded_token": " an"}, "1278": {"logprob": -6.732922077178955, "rank": 3, "decoded_token": " the"}, "1877": {"logprob": -8.795422554016113, "rank": 4, "decoded_token": ":\n"}, "7244": {"logprob": -9.232922554016113, "rank": 5, "decoded_token": " black"}}, {"7244": {"logprob": -0.45382726192474365, "rank": 1, "decoded_token": " black"}, "4329": {"logprob": -2.266327381134033, "rank": 2, "decoded_token": " large"}, "85596": {"logprob": -3.141327381134033, "rank": 3, "decoded_token": " solemn"}, "8500": {"logprob": -3.203827381134033, "rank": 4, "decoded_token": " dark"}, "16450": {"logprob": -3.766327381134033, "rank": 5, "decoded_token": " sle"}}, {"10575": {"logprob": -0.09157121181488037, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -3.27907133102417, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -4.40407133102417, "rank": 3, "decoded_token": " puppy"}, "15812": {"logprob": -5.40407133102417, "rank": 4, "decoded_token": " Lab"}, "1044": {"logprob": -5.52907133102417, "rank": 5, "decoded_token": ","}}, {"18970": {"logprob": -0.9958467483520508, "rank": 1, "decoded_token": " sitting"}, "7283": {"logprob": -2.058346748352051, "rank": 2, "decoded_token": " looking"}, "28528": {"logprob": -2.120846748352051, "rank": 3, "decoded_token": " lying"}, "38235": {"logprob": -2.183346748352051, "rank": 4, "decoded_token": " resting"}, "1454": {"logprob": -2.558346748352051, "rank": 5, "decoded_token": " with"}}, {"41132": {"logprob": -1.2059022188186646, "rank": 1, "decoded_token": " attent"}, "1408": {"logprob": -1.8309022188186646, "rank": 2, "decoded_token": " on"}, "106534": {"logprob": -1.8934022188186646, "rank": 3, "decoded_token": " calmly"}, "1321": {"logprob": -3.268402099609375, "rank": 4, "decoded_token": " and"}, "1454": {"logprob": -3.330902099609375, "rank": 5, "decoded_token": " with"}}, {"3923": {"logprob": -0.005489394534379244, "rank": 1, "decoded_token": "ively"}, "1556": {"logprob": -5.505489349365234, "rank": 2, "decoded_token": "ive"}, "3929": {"logprob": -7.505489349365234, "rank": 3, "decoded_token": "ently"}, "10980": {"logprob": -8.505489349365234, "rank": 4, "decoded_token": "ibly"}, "14194": {"logprob": -9.130489349365234, "rank": 5, "decoded_token": "antly"}}, {"1408": {"logprob": -0.3085399270057678, "rank": 1, "decoded_token": " on"}, "1454": {"logprob": -2.433539867401123, "rank": 2, "decoded_token": " with"}, "1321": {"logprob": -3.308539867401123, "rank": 3, "decoded_token": " and"}, "25644": {"logprob": -3.496039867401123, "rank": 4, "decoded_token": " beside"}, "3675": {"logprob": -3.746039867401123, "rank": 5, "decoded_token": " against"}}, {"1261": {"logprob": -0.17512622475624084, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -2.550126314163208, "rank": 2, "decoded_token": " wooden"}, "2549": {"logprob": -3.800126314163208, "rank": 3, "decoded_token": " what"}, "44130": {"logprob": -4.675126075744629, "rank": 4, "decoded_token": " rust"}, "17253": {"logprob": -4.862626075744629, "rank": 5, "decoded_token": " weather"}}, {"32656": {"logprob": -0.5213224291801453, "rank": 1, "decoded_token": " wooden"}, "44130": {"logprob": -1.70882248878479, "rank": 2, "decoded_token": " rust"}, "3403": {"logprob": -2.45882248878479, "rank": 3, "decoded_token": " text"}, "17253": {"logprob": -2.89632248878479, "rank": 4, "decoded_token": " weather"}, "16673": {"logprob": -3.89632248878479, "rank": 5, "decoded_token": " rough"}}, {"4691": {"logprob": -0.7510372996330261, "rank": 1, "decoded_token": " surface"}, "1615": {"logprob": -1.188537359237671, "rank": 2, "decoded_token": " pl"}, "9710": {"logprob": -3.751037359237671, "rank": 3, "decoded_token": " board"}, "3403": {"logprob": -3.751037359237671, "rank": 4, "decoded_token": " text"}, "11237": {"logprob": -3.938537359237671, "rank": 5, "decoded_token": " floor"}}, {"1626": {"logprob": -0.5343737006187439, "rank": 1, "decoded_token": ".\n"}, "1454": {"logprob": -1.4093737602233887, "rank": 2, "decoded_token": " with"}, "1338": {"logprob": -2.7843737602233887, "rank": 3, "decoded_token": ".\n\n"}, "7283": {"logprob": -3.5343737602233887, "rank": 4, "decoded_token": " looking"}, "1044": {"logprob": -3.8468737602233887, "rank": 5, "decoded_token": ","}}, {"1256": {"logprob": -1.0727235078811646, "rank": 1, "decoded_token": "  "}, "1050": {"logprob": -1.4477235078811646, "rank": 2, "decoded_token": "2"}, "1293": {"logprob": -2.947723388671875, "rank": 3, "decoded_token": "   "}, "6837": {"logprob": -5.260223388671875, "rank": 4, "decoded_token": "\u06f2"}, "1260": {"logprob": -5.822723388671875, "rank": 5, "decoded_token": "    "}}, {"1462": {"logprob": -2.970078468322754, "rank": 1, "decoded_token": " -"}, "1319": {"logprob": -3.470078468322754, "rank": 2, "decoded_token": " ("}, "1032": {"logprob": -4.220078468322754, "rank": 3, "decoded_token": " "}, "49958": {"logprob": -4.720078468322754, "rank": 4, "decoded_token": " ```\n"}, "9380": {"logprob": -5.282578468322754, "rank": 5, "decoded_token": " Here"}}, {"1531": {"logprob": -1.9009761810302734, "rank": 1, "decoded_token": " The"}, "10322": {"logprob": -2.9634761810302734, "rank": 2, "decoded_token": " Second"}, "1319": {"logprob": -3.1509761810302734, "rank": 3, "decoded_token": " ("}, "9380": {"logprob": -3.3384761810302734, "rank": 4, "decoded_token": " Here"}, "11967": {"logprob": -3.4634761810302734, "rank": 5, "decoded_token": " Image"}}, {"2667": {"logprob": -0.49421006441116333, "rank": 1, "decoded_token": " second"}, "3937": {"logprob": -2.9942100048065186, "rank": 2, "decoded_token": " image"}, "7244": {"logprob": -3.6192100048065186, "rank": 3, "decoded_token": " black"}, "13827": {"logprob": -3.7442100048065186, "rank": 4, "decoded_token": " subsequent"}, "15115": {"logprob": -3.8692100048065186, "rank": 5, "decoded_token": " detailed"}}, {"3937": {"logprob": -0.938382625579834, "rank": 1, "decoded_token": " image"}, "2016": {"logprob": -2.500882625579834, "rank": 2, "decoded_token": " set"}, "1319": {"logprob": -2.938382625579834, "rank": 3, "decoded_token": " ("}, "5662": {"logprob": -3.000882625579834, "rank": 4, "decoded_token": " provided"}, "7293": {"logprob": -3.438382625579834, "rank": 5, "decoded_token": " actual"}}, {"1319": {"logprob": -2.159437417984009, "rank": 3, "decoded_token": " ("}, "51948": {"logprob": -2.159437417984009, "rank": 1, "decoded_token": " depicts"}, "1395": {"logprob": -2.159437417984009, "rank": 2, "decoded_token": " is"}, "10249": {"logprob": -2.659437417984009, "rank": 4, "decoded_token": " appears"}, "1058": {"logprob": -2.784437417984009, "rank": 5, "decoded_token": ":"}}, {"3715": {"logprob": -2.364915370941162, "rank": 1, "decoded_token": "not"}, "11018": {"logprob": -2.614915370941162, "rank": 2, "decoded_token": "which"}, "2914": {"logprob": -2.927415370941162, "rank": 3, "decoded_token": "inc"}, "39575": {"logprob": -3.239915370941162, "rank": 4, "decoded_token": "mist"}, "3265": {"logprob": -3.302415370941162, "rank": 5, "decoded_token": "the"}}, {"4326": {"logprob": -1.9997965097427368, "rank": 1, "decoded_token": " shown"}, "24512": {"logprob": -2.2497963905334473, "rank": 2, "decoded_token": " applicable"}, "5662": {"logprob": -2.8747963905334473, "rank": 3, "decoded_token": " provided"}, "13874": {"logprob": -2.9372963905334473, "rank": 4, "decoded_token": " visible"}, "1278": {"logprob": -3.1247963905334473, "rank": 5, "decoded_token": " the"}}, {"1294": {"logprob": -1.0730445384979248, "rank": 1, "decoded_token": " in"}, "1041": {"logprob": -2.260544538497925, "rank": 2, "decoded_token": ")"}, "4244": {"logprob": -2.448044538497925, "rank": 3, "decoded_token": "):"}, "3226": {"logprob": -2.635544538497925, "rank": 4, "decoded_token": " here"}, "1435": {"logprob": -3.135544538497925, "rank": 5, "decoded_token": " as"}}, {"2143": {"logprob": -0.9278546571731567, "rank": 1, "decoded_token": " your"}, "1278": {"logprob": -1.2403546571731567, "rank": 2, "decoded_token": " the"}, "4098": {"logprob": -2.177854537963867, "rank": 3, "decoded_token": " question"}, "12705": {"logprob": -3.740354537963867, "rank": 4, "decoded_token": " detail"}, "1593": {"logprob": -3.927854537963867, "rank": 5, "decoded_token": " this"}}, {"4098": {"logprob": -1.8063793182373047, "rank": 1, "decoded_token": " question"}, "4546": {"logprob": -2.3063793182373047, "rank": 2, "decoded_token": " request"}, "4618": {"logprob": -2.4313793182373047, "rank": 3, "decoded_token": " original"}, "7330": {"logprob": -2.5563793182373047, "rank": 4, "decoded_token": " query"}, "5719": {"logprob": -2.8688793182373047, "rank": 5, "decoded_token": " initial"}}, {"1041": {"logprob": -1.411703109741211, "rank": 1, "decoded_token": ")"}, "4244": {"logprob": -2.099203109741211, "rank": 2, "decoded_token": "):"}, "1809": {"logprob": -2.536703109741211, "rank": 3, "decoded_token": " but"}, "1044": {"logprob": -2.661703109741211, "rank": 4, "decoded_token": ","}, "1681": {"logprob": -3.474203109741211, "rank": 5, "decoded_token": "'s"}}, {"10249": {"logprob": -1.607055425643921, "rank": 1, "decoded_token": " appears"}, "51948": {"logprob": -1.982055425643921, "rank": 2, "decoded_token": " depicts"}, "7444": {"logprob": -2.044555425643921, "rank": 3, "decoded_token": " seems"}, "1395": {"logprob": -2.169555425643921, "rank": 4, "decoded_token": " is"}, "2168": {"logprob": -2.982055425643921, "rank": 5, "decoded_token": " would"}}, {"1317": {"logprob": -0.1792934685945511, "rank": 1, "decoded_token": " to"}, "51723": {"logprob": -4.116793632507324, "rank": 2, "decoded_token": " unrelated"}, "5711": {"logprob": -4.616793632507324, "rank": 3, "decoded_token": " mis"}, "73751": {"logprob": -4.804293632507324, "rank": 4, "decoded_token": " incorrectly"}, "1605": {"logprob": -4.866793632507324, "rank": 5, "decoded_token": " not"}}, {"1402": {"logprob": -0.8325714468955994, "rank": 1, "decoded_token": " be"}, "17767": {"logprob": -1.4575715065002441, "rank": 2, "decoded_token": " depict"}, "7326": {"logprob": -3.395071506500244, "rank": 3, "decoded_token": " feature"}, "1736": {"logprob": -3.520071506500244, "rank": 4, "decoded_token": " have"}, "32688": {"logprob": -3.582571506500244, "rank": 5, "decoded_token": " illustrate"}}, {"1261": {"logprob": -1.2619296312332153, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -2.824429512023926, "rank": 2, "decoded_token": " an"}, "51723": {"logprob": -3.261929512023926, "rank": 3, "decoded_token": " unrelated"}, "25342": {"logprob": -3.511929512023926, "rank": 4, "decoded_token": " incorrect"}, "1307": {"logprob": -3.511929512023926, "rank": 5, "decoded_token": " of"}}, {"24361": {"logprob": -2.2388875484466553, "rank": 1, "decoded_token": " mountain"}, "10726": {"logprob": -2.5513875484466553, "rank": 2, "decoded_token": " scen"}, "127945": {"logprob": -3.3013875484466553, "rank": 3, "decoded_token": " mountainous"}, "15115": {"logprob": -3.6138875484466553, "rank": 4, "decoded_token": " detailed"}, "3719": {"logprob": -3.7388875484466553, "rank": 5, "decoded_token": " view"}}, {"28035": {"logprob": -0.8160803318023682, "rank": 1, "decoded_token": " landscape"}, "4521": {"logprob": -1.2535803318023682, "rank": 2, "decoded_token": " range"}, "3719": {"logprob": -2.691080331802368, "rank": 3, "decoded_token": " view"}, "13327": {"logprob": -3.253580331802368, "rank": 4, "decoded_token": " scene"}, "1454": {"logprob": -3.566080331802368, "rank": 5, "decoded_token": " with"}}, {"1454": {"logprob": -0.6144524812698364, "rank": 1, "decoded_token": " with"}, "1626": {"logprob": -2.051952362060547, "rank": 2, "decoded_token": ".\n"}, "1338": {"logprob": -2.176952362060547, "rank": 3, "decoded_token": ".\n\n"}, "3719": {"logprob": -3.926952362060547, "rank": 4, "decoded_token": " view"}, "1562": {"logprob": -3.989452362060547, "rank": 5, "decoded_token": " from"}}, {"122203": {"logprob": -1.7088322639465332, "rank": 1, "decoded_token": " rugged"}, "23745": {"logprob": -2.208832263946533, "rank": 2, "decoded_token": " snow"}, "11223": {"logprob": -2.521332263946533, "rank": 3, "decoded_token": " green"}, "27469": {"logprob": -2.833832263946533, "rank": 4, "decoded_token": " peaks"}, "47147": {"logprob": -2.896332263946533, "rank": 5, "decoded_token": " steep"}}, {"24765": {"logprob": -0.6277848482131958, "rank": 1, "decoded_token": " terrain"}, "27469": {"logprob": -1.2527848482131958, "rank": 2, "decoded_token": " peaks"}, "1044": {"logprob": -2.9402847290039062, "rank": 3, "decoded_token": ","}, "130655": {"logprob": -3.6277847290039062, "rank": 4, "decoded_token": " cliffs"}, "57912": {"logprob": -4.002784729003906, "rank": 5, "decoded_token": " terrains"}}, {"1321": {"logprob": -0.6374254822731018, "rank": 1, "decoded_token": " and"}, "1626": {"logprob": -1.762425422668457, "rank": 2, "decoded_token": ".\n"}, "1338": {"logprob": -1.762425422668457, "rank": 3, "decoded_token": ".\n\n"}, "1294": {"logprob": -3.887425422668457, "rank": 4, "decoded_token": " in"}, "2425": {"logprob": -4.012425422668457, "rank": 5, "decoded_token": " under"}}, {"30594": {"logprob": -1.8202354907989502, "rank": 1, "decoded_token": " distant"}, "23745": {"logprob": -2.13273549079895, "rank": 2, "decoded_token": " snow"}, "27469": {"logprob": -2.25773549079895, "rank": 3, "decoded_token": " peaks"}, "11223": {"logprob": -2.94523549079895, "rank": 4, "decoded_token": " green"}, "11692": {"logprob": -3.07023549079895, "rank": 5, "decoded_token": " mist"}}, {"27469": {"logprob": -0.40320226550102234, "rank": 1, "decoded_token": " peaks"}, "23745": {"logprob": -2.2157022953033447, "rank": 2, "decoded_token": " snow"}, "35463": {"logprob": -3.4032022953033447, "rank": 3, "decoded_token": " mountains"}, "24361": {"logprob": -3.4032022953033447, "rank": 4, "decoded_token": " mountain"}, "46866": {"logprob": -4.090702056884766, "rank": 5, "decoded_token": " clouds"}}, {"1338": {"logprob": -0.5224027633666992, "rank": 1, "decoded_token": ".\n\n"}, "1626": {"logprob": -1.3349027633666992, "rank": 2, "decoded_token": ".\n"}, "2425": {"logprob": -3.397402763366699, "rank": 3, "decoded_token": " under"}, "1294": {"logprob": -4.084902763366699, "rank": 4, "decoded_token": " in"}, "13875": {"logprob": -4.334902763366699, "rank": 5, "decoded_token": " covered"}}, {"1050": {"logprob": -0.6029570698738098, "rank": 1, "decoded_token": "2"}, "1256": {"logprob": -1.352957010269165, "rank": 2, "decoded_token": "  "}, "11745": {"logprob": -3.227957010269165, "rank": 3, "decoded_token": "Here"}, "1293": {"logprob": -3.915457010269165, "rank": 4, "decoded_token": "   "}, "4393": {"logprob": -4.602957248687744, "rank": 5, "decoded_token": "For"}}, {"1046": {"logprob": -0.19250261783599854, "rank": 1, "decoded_token": "."}, "1319": {"logprob": -3.567502498626709, "rank": 2, "decoded_token": " ("}, "1626": {"logprob": -4.505002498626709, "rank": 3, "decoded_token": ".\n"}, "1877": {"logprob": -5.942502498626709, "rank": 4, "decoded_token": ":\n"}, "26667": {"logprob": -6.442502498626709, "rank": 5, "decoded_token": ".("}}, {"1531": {"logprob": -1.897325873374939, "rank": 1, "decoded_token": " The"}, "9380": {"logprob": -1.897325873374939, "rank": 2, "decoded_token": " Here"}, "1319": {"logprob": -2.0848259925842285, "rank": 3, "decoded_token": " ("}, "2898": {"logprob": -2.8973259925842285, "rank": 4, "decoded_token": " For"}, "10322": {"logprob": -3.3348259925842285, "rank": 5, "decoded_token": " Second"}}, {"5888": {"logprob": -1.483720064163208, "rank": 1, "decoded_token": " third"}, "2667": {"logprob": -2.296220064163208, "rank": 2, "decoded_token": " second"}, "5662": {"logprob": -2.858720064163208, "rank": 3, "decoded_token": " provided"}, "3937": {"logprob": -3.296220064163208, "rank": 4, "decoded_token": " image"}, "6298": {"logprob": -3.421220064163208, "rank": 5, "decoded_token": " correct"}}, {"3937": {"logprob": -0.6041796207427979, "rank": 1, "decoded_token": " image"}, "1319": {"logprob": -2.604179620742798, "rank": 2, "decoded_token": " ("}, "2016": {"logprob": -2.729179620742798, "rank": 3, "decoded_token": " set"}, "1321": {"logprob": -3.916679620742798, "rank": 4, "decoded_token": " and"}, "13083": {"logprob": -3.979179620742798, "rank": 5, "decoded_token": " picture"}}, {"51948": {"logprob": -1.6496541500091553, "rank": 1, "decoded_token": " depicts"}, "1319": {"logprob": -1.8371541500091553, "rank": 2, "decoded_token": " ("}, "1058": {"logprob": -1.9621541500091553, "rank": 3, "decoded_token": ":"}, "25981": {"logprob": -2.4621541500091553, "rank": 4, "decoded_token": " displays"}, "1395": {"logprob": -2.7746541500091553, "rank": 5, "decoded_token": " is"}}, {"1261": {"logprob": -0.5954864025115967, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -2.5954864025115967, "rank": 2, "decoded_token": " an"}, "22140": {"logprob": -2.6579864025115967, "rank": 3, "decoded_token": " waves"}, "1278": {"logprob": -2.9704864025115967, "rank": 4, "decoded_token": " the"}, "26517": {"logprob": -3.9079864025115967, "rank": 5, "decoded_token": " calm"}}, {"2169": {"logprob": -2.0800817012786865, "rank": 1, "decoded_token": " ser"}, "10726": {"logprob": -2.2050817012786865, "rank": 2, "decoded_token": " scen"}, "29397": {"logprob": -2.3925817012786865, "rank": 3, "decoded_token": " beach"}, "2965": {"logprob": -2.4550817012786865, "rank": 4, "decoded_token": " person"}, "1958": {"logprob": -2.8925817012786865, "rank": 5, "decoded_token": " sur"}}, {"2509": {"logprob": -0.0010151476599276066, "rank": 1, "decoded_token": "ene"}, "1391": {"logprob": -8.188514709472656, "rank": 2, "decoded_token": "if"}, "25863": {"logprob": -8.938514709472656, "rank": 3, "decoded_token": "rated"}, "3414": {"logprob": -9.438514709472656, "rank": 4, "decoded_token": "ena"}, "10049": {"logprob": -9.688514709472656, "rank": 5, "decoded_token": "\u00e8ne"}}, {"29397": {"logprob": -0.5060035586357117, "rank": 1, "decoded_token": " beach"}, "62557": {"logprob": -2.5685036182403564, "rank": 2, "decoded_token": " seas"}, "27208": {"logprob": -2.9435036182403564, "rank": 3, "decoded_token": " ocean"}, "38167": {"logprob": -3.0685036182403564, "rank": 4, "decoded_token": " coastal"}, "13327": {"logprob": -3.2560036182403564, "rank": 5, "decoded_token": " scene"}}, {"13327": {"logprob": -0.8083749413490295, "rank": 1, "decoded_token": " scene"}, "1454": {"logprob": -1.1208748817443848, "rank": 2, "decoded_token": " with"}, "1513": {"logprob": -2.9333748817443848, "rank": 3, "decoded_token": " at"}, "3184": {"logprob": -3.4958748817443848, "rank": 4, "decoded_token": " during"}, "2478": {"logprob": -4.058374881744385, "rank": 5, "decoded_token": " where"}}, {"1454": {"logprob": -0.322070449590683, "rank": 1, "decoded_token": " with"}, "3184": {"logprob": -2.447070360183716, "rank": 2, "decoded_token": " during"}, "1513": {"logprob": -2.759570360183716, "rank": 3, "decoded_token": " at"}, "2425": {"logprob": -3.509570360183716, "rank": 4, "decoded_token": " under"}, "2478": {"logprob": -3.697070360183716, "rank": 5, "decoded_token": " where"}}, {"26905": {"logprob": -1.1276788711547852, "rank": 1, "decoded_token": " gentle"}, "22140": {"logprob": -1.4401788711547852, "rank": 2, "decoded_token": " waves"}, "1261": {"logprob": -2.377678871154785, "rank": 3, "decoded_token": " a"}, "3306": {"logprob": -2.815178871154785, "rank": 4, "decoded_token": " people"}, "3709": {"logprob": -3.252678871154785, "rank": 5, "decoded_token": " small"}}, {"22140": {"logprob": -0.048392221331596375, "rank": 1, "decoded_token": " waves"}, "27208": {"logprob": -3.6733922958374023, "rank": 2, "decoded_token": " ocean"}, "29661": {"logprob": -5.735892295837402, "rank": 3, "decoded_token": " surf"}, "32911": {"logprob": -5.735892295837402, "rank": 4, "decoded_token": " rolling"}, "11196": {"logprob": -6.173392295837402, "rank": 5, "decoded_token": " sea"}}, {"11981": {"logprob": -1.4009065628051758, "rank": 1, "decoded_token": " meeting"}, "1321": {"logprob": -1.5884065628051758, "rank": 2, "decoded_token": " and"}, "86928": {"logprob": -2.088406562805176, "rank": 3, "decoded_token": " crashing"}, "1427": {"logprob": -2.650906562805176, "rank": 4, "decoded_token": " la"}, "44278": {"logprob": -2.838406562805176, "rank": 5, "decoded_token": " approaching"}}, {"1278": {"logprob": -0.8703328967094421, "rank": 1, "decoded_token": " the"}, "100991": {"logprob": -1.370332956314087, "rank": 2, "decoded_token": " sandy"}, "1261": {"logprob": -1.932832956314087, "rank": 3, "decoded_token": " a"}, "14693": {"logprob": -2.932832956314087, "rank": 4, "decoded_token": " sand"}, "46422": {"logprob": -3.557832956314087, "rank": 5, "decoded_token": " shore"}}, {"46422": {"logprob": -0.19113700091838837, "rank": 1, "decoded_token": " shore"}, "100991": {"logprob": -2.7536370754241943, "rank": 2, "decoded_token": " sandy"}, "1627": {"logprob": -3.0036370754241943, "rank": 3, "decoded_token": " sh"}, "14693": {"logprob": -3.3786370754241943, "rank": 4, "decoded_token": " sand"}, "124562": {"logprob": -5.378636837005615, "rank": 5, "decoded_token": " coastline"}}, {"1321": {"logprob": -1.191644549369812, "rank": 1, "decoded_token": " and"}, "3184": {"logprob": -2.0666446685791016, "rank": 2, "decoded_token": " during"}, "1338": {"logprob": -2.3166446685791016, "rank": 3, "decoded_token": ".\n\n"}, "1044": {"logprob": -2.3791446685791016, "rank": 4, "decoded_token": ","}, "1513": {"logprob": -2.5666446685791016, "rank": 5, "decoded_token": " at"}}, {"1261": {"logprob": -0.937598705291748, "rank": 1, "decoded_token": " a"}, "3306": {"logprob": -2.000098705291748, "rank": 2, "decoded_token": " people"}, "29661": {"logprob": -2.625098705291748, "rank": 3, "decoded_token": " surf"}, "2269": {"logprob": -2.937598705291748, "rank": 4, "decoded_token": " some"}, "30594": {"logprob": -3.062598705291748, "rank": 5, "decoded_token": " distant"}}, {"92731": {"logprob": -1.4627695083618164, "rank": 1, "decoded_token": " lone"}, "2965": {"logprob": -1.6502695083618164, "rank": 2, "decoded_token": " person"}, "4517": {"logprob": -2.0877695083618164, "rank": 3, "decoded_token": " few"}, "81249": {"logprob": -2.3377695083618164, "rank": 4, "decoded_token": " silhouette"}, "79013": {"logprob": -2.4627695083618164, "rank": 5, "decoded_token": " solitary"}}, {"2965": {"logprob": -0.9964981079101562, "rank": 1, "decoded_token": " person"}, "8240": {"logprob": -1.1839981079101562, "rank": 2, "decoded_token": " figure"}, "1958": {"logprob": -1.9964981079101562, "rank": 3, "decoded_token": " sur"}, "81249": {"logprob": -3.2464981079101562, "rank": 4, "decoded_token": " silhouette"}, "4597": {"logprob": -3.3089981079101562, "rank": 5, "decoded_token": " individual"}}, {"19710": {"logprob": -1.8438996076583862, "rank": 1, "decoded_token": " walking"}, "15866": {"logprob": -2.093899726867676, "rank": 2, "decoded_token": " standing"}, "6117": {"logprob": -2.093899726867676, "rank": 3, "decoded_token": " near"}, "1285": {"logprob": -2.468899726867676, "rank": 4, "decoded_token": " w"}, "1294": {"logprob": -2.843899726867676, "rank": 5, "decoded_token": " in"}}, {"4837": {"logprob": -1.405532717704773, "rank": 1, "decoded_token": " along"}, "6117": {"logprob": -1.718032717704773, "rank": 2, "decoded_token": " near"}, "1408": {"logprob": -2.5930328369140625, "rank": 3, "decoded_token": " on"}, "8994": {"logprob": -2.7180328369140625, "rank": 4, "decoded_token": " towards"}, "1338": {"logprob": -2.8430328369140625, "rank": 5, "decoded_token": ".\n\n"}}, {"1278": {"logprob": -0.1722739040851593, "rank": 1, "decoded_token": " the"}, "1494": {"logprob": -2.734773874282837, "rank": 2, "decoded_token": " it"}, "1338": {"logprob": -3.484773874282837, "rank": 3, "decoded_token": ".\n\n"}, "1261": {"logprob": -3.984773874282837, "rank": 4, "decoded_token": " a"}, "1626": {"logprob": -4.984774112701416, "rank": 5, "decoded_token": ".\n"}}, {"46422": {"logprob": -1.466295599937439, "rank": 1, "decoded_token": " shore"}, "1627": {"logprob": -1.528795599937439, "rank": 2, "decoded_token": " sh"}, "4180": {"logprob": -1.841295599937439, "rank": 3, "decoded_token": " water"}, "10314": {"logprob": -2.0912957191467285, "rank": 4, "decoded_token": " edge"}, "14693": {"logprob": -3.0912957191467285, "rank": 5, "decoded_token": " sand"}}, {"1338": {"logprob": -0.9661335945129395, "rank": 1, "decoded_token": ".\n\n"}, "2839": {"logprob": -2.0911335945129395, "rank": 2, "decoded_token": "line"}, "1626": {"logprob": -2.1536335945129395, "rank": 3, "decoded_token": ".\n"}, "1513": {"logprob": -2.1536335945129395, "rank": 4, "decoded_token": " at"}, "3184": {"logprob": -2.6536335945129395, "rank": 5, "decoded_token": " during"}}, {"1051": {"logprob": -0.8673074841499329, "rank": 1, "decoded_token": "3"}, "1256": {"logprob": -2.054807424545288, "rank": 2, "decoded_token": "  "}, "11745": {"logprob": -2.242307424545288, "rank": 3, "decoded_token": "Here"}, "1052": {"logprob": -2.242307424545288, "rank": 4, "decoded_token": "4"}, "1050": {"logprob": -3.367307424545288, "rank": 5, "decoded_token": "2"}}, {"1046": {"logprob": -0.09165985882282257, "rank": 1, "decoded_token": "."}, "1626": {"logprob": -4.341660022735596, "rank": 2, "decoded_token": ".\n"}, "1319": {"logprob": -4.841660022735596, "rank": 3, "decoded_token": " ("}, "1045": {"logprob": -6.029160022735596, "rank": 4, "decoded_token": "-"}, "1321": {"logprob": -6.529160022735596, "rank": 5, "decoded_token": " and"}}, {"1531": {"logprob": -0.5217734575271606, "rank": 1, "decoded_token": " The"}, "9380": {"logprob": -2.396773338317871, "rank": 2, "decoded_token": " Here"}, "2898": {"logprob": -3.521773338317871, "rank": 3, "decoded_token": " For"}, "45663": {"logprob": -3.959273338317871, "rank": 4, "decoded_token": " Fourth"}, "1319": {"logprob": -4.084273338317871, "rank": 5, "decoded_token": " ("}}, {"12432": {"logprob": -0.41262897849082947, "rank": 1, "decoded_token": " fourth"}, "5662": {"logprob": -3.8501288890838623, "rank": 2, "decoded_token": " provided"}, "5888": {"logprob": -4.037629127502441, "rank": 3, "decoded_token": " third"}, "3937": {"logprob": -4.037629127502441, "rank": 4, "decoded_token": " image"}, "2667": {"logprob": -4.162629127502441, "rank": 5, "decoded_token": " second"}}, {"3937": {"logprob": -0.5903608798980713, "rank": 1, "decoded_token": " image"}, "1319": {"logprob": -2.3403608798980713, "rank": 2, "decoded_token": " ("}, "2016": {"logprob": -2.7153608798980713, "rank": 3, "decoded_token": " set"}, "1321": {"logprob": -3.4653608798980713, "rank": 4, "decoded_token": " and"}, "1925": {"logprob": -4.090360641479492, "rank": 5, "decoded_token": " one"}}, {"6971": {"logprob": -1.8004755973815918, "rank": 1, "decoded_token": " features"}, "1319": {"logprob": -2.175475597381592, "rank": 2, "decoded_token": " ("}, "6122": {"logprob": -2.300475597381592, "rank": 3, "decoded_token": " shows"}, "1395": {"logprob": -2.487975597381592, "rank": 4, "decoded_token": " is"}, "1058": {"logprob": -2.487975597381592, "rank": 5, "decoded_token": ":"}}, {"1261": {"logprob": -0.16380631923675537, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -2.976306438446045, "rank": 2, "decoded_token": " an"}, "2295": {"logprob": -3.351306438446045, "rank": 3, "decoded_token": " two"}, "1278": {"logprob": -4.413806438446045, "rank": 4, "decoded_token": " the"}, "16429": {"logprob": -4.851306438446045, "rank": 5, "decoded_token": " trees"}}, {"53301": {"logprob": -1.6735851764678955, "rank": 1, "decoded_token": " winding"}, "59396": {"logprob": -1.9235851764678955, "rank": 2, "decoded_token": " gravel"}, "101727": {"logprob": -2.1110851764678955, "rank": 3, "decoded_token": " paved"}, "47945": {"logprob": -3.1110851764678955, "rank": 4, "decoded_token": " dirt"}, "23874": {"logprob": -3.1110851764678955, "rank": 5, "decoded_token": " pictures"}}, {"59396": {"logprob": -0.6121145486831665, "rank": 1, "decoded_token": " gravel"}, "47945": {"logprob": -1.7996145486831665, "rank": 2, "decoded_token": " dirt"}, "101727": {"logprob": -2.612114429473877, "rank": 3, "decoded_token": " paved"}, "3549": {"logprob": -2.799614429473877, "rank": 4, "decoded_token": " path"}, "1044": {"logprob": -2.862114429473877, "rank": 5, "decoded_token": ","}}, {"3549": {"logprob": -0.4002755880355835, "rank": 1, "decoded_token": " path"}, "14801": {"logprob": -1.7752755880355835, "rank": 2, "decoded_token": " pathway"}, "1505": {"logprob": -2.275275707244873, "rank": 3, "decoded_token": " or"}, "33659": {"logprob": -3.962775707244873, "rank": 4, "decoded_token": " trail"}, "9480": {"logprob": -4.337775707244873, "rank": 5, "decoded_token": " road"}}, {"121040": {"logprob": -1.1325652599334717, "rank": 1, "decoded_token": " bordered"}, "121313": {"logprob": -1.6950652599334717, "rank": 2, "decoded_token": " flanked"}, "29817": {"logprob": -2.1950652599334717, "rank": 3, "decoded_token": " surrounded"}, "8924": {"logprob": -2.6950652599334717, "rank": 4, "decoded_token": " leading"}, "1294": {"logprob": -2.8200652599334717, "rank": 5, "decoded_token": " in"}}, {"1536": {"logprob": -0.02329995296895504, "rank": 1, "decoded_token": " by"}, "1454": {"logprob": -4.5233001708984375, "rank": 2, "decoded_token": " with"}, "1408": {"logprob": -4.5858001708984375, "rank": 3, "decoded_token": " on"}, "98245": {"logprob": -8.460800170898438, "rank": 4, "decoded_token": " beautifully"}, "100910": {"logprob": -8.648300170898438, "rank": 5, "decoded_token": " neatly"}}, {"23170": {"logprob": -1.2441778182983398, "rank": 1, "decoded_token": " grass"}, "1295": {"logprob": -1.4941778182983398, "rank": 2, "decoded_token": " l"}, "11223": {"logprob": -1.5566778182983398, "rank": 3, "decoded_token": " green"}, "1261": {"logprob": -3.05667781829834, "rank": 4, "decoded_token": " a"}, "4174": {"logprob": -3.61917781829834, "rank": 5, "decoded_token": " gre"}}, {"1321": {"logprob": -0.8162240982055664, "rank": 1, "decoded_token": " and"}, "1121": {"logprob": -1.4412240982055664, "rank": 2, "decoded_token": "y"}, "1044": {"logprob": -1.8787240982055664, "rank": 3, "decoded_token": ","}, "8924": {"logprob": -3.3162240982055664, "rank": 4, "decoded_token": " leading"}, "1454": {"logprob": -3.5037240982055664, "rank": 5, "decoded_token": " with"}}, {"16429": {"logprob": -1.8316444158554077, "rank": 1, "decoded_token": " trees"}, "87833": {"logprob": -2.4566445350646973, "rank": 2, "decoded_token": " flowering"}, "17744": {"logprob": -2.5816445350646973, "rank": 3, "decoded_token": " blo"}, "1261": {"logprob": -2.6441445350646973, "rank": 4, "decoded_token": " a"}, "29817": {"logprob": -3.0191445350646973, "rank": 5, "decoded_token": " surrounded"}}, {"1294": {"logprob": -1.625852108001709, "rank": 1, "decoded_token": " in"}, "1044": {"logprob": -1.750852108001709, "rank": 2, "decoded_token": ","}, "2425": {"logprob": -2.125852108001709, "rank": 3, "decoded_token": " under"}, "8924": {"logprob": -2.250852108001709, "rank": 4, "decoded_token": " leading"}, "1408": {"logprob": -2.625852108001709, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -0.3194517195224762, "rank": 1, "decoded_token": " a"}, "1278": {"logprob": -2.3194518089294434, "rank": 2, "decoded_token": " the"}, "1420": {"logprob": -2.5694518089294434, "rank": 3, "decoded_token": " an"}, "2549": {"logprob": -2.6319518089294434, "rank": 4, "decoded_token": " what"}, "5346": {"logprob": -5.569451808929443, "rank": 5, "decoded_token": " front"}}, {"23874": {"logprob": -1.2531365156173706, "rank": 1, "decoded_token": " pictures"}, "10726": {"logprob": -1.6906365156173706, "rank": 2, "decoded_token": " scen"}, "54742": {"logprob": -2.12813663482666, "rank": 3, "decoded_token": " peaceful"}, "12097": {"logprob": -2.69063663482666, "rank": 4, "decoded_token": " park"}, "2169": {"logprob": -3.00313663482666, "rank": 5, "decoded_token": " ser"}}, {"1872": {"logprob": -0.0001784403866622597, "rank": 1, "decoded_token": "que"}, "1348": {"logprob": -8.875178337097168, "rank": 2, "decoded_token": "qu"}, "21451": {"logprob": -11.937678337097168, "rank": 3, "decoded_token": "QUE"}, "18954": {"logprob": -12.687678337097168, "rank": 4, "decoded_token": "qu\u00e9"}, "14016": {"logprob": -12.812678337097168, "rank": 5, "decoded_token": "quare"}}, {"41730": {"logprob": -1.2438380718231201, "rank": 1, "decoded_token": " outdoor"}, "12097": {"logprob": -1.4938380718231201, "rank": 2, "decoded_token": " park"}, "1044": {"logprob": -1.8688380718231201, "rank": 3, "decoded_token": ","}, "28035": {"logprob": -2.55633807182312, "rank": 4, "decoded_token": " landscape"}, "6967": {"logprob": -2.68133807182312, "rank": 5, "decoded_token": " natural"}}, {"9436": {"logprob": -0.28005897998809814, "rank": 1, "decoded_token": " setting"}, "6093": {"logprob": -2.9050588607788086, "rank": 2, "decoded_token": " environment"}, "12097": {"logprob": -2.9675588607788086, "rank": 3, "decoded_token": " park"}, "28035": {"logprob": -3.5300588607788086, "rank": 4, "decoded_token": " landscape"}, "4457": {"logprob": -3.7175588607788086, "rank": 5, "decoded_token": " area"}}, {"1338": {"logprob": -0.40908101201057434, "rank": 1, "decoded_token": ".\n\n"}, "1626": {"logprob": -2.284080982208252, "rank": 2, "decoded_token": ".\n"}, "2425": {"logprob": -2.721580982208252, "rank": 3, "decoded_token": " under"}, "1454": {"logprob": -3.096580982208252, "rank": 4, "decoded_token": " with"}, "1044": {"logprob": -3.284080982208252, "rank": 5, "decoded_token": ","}}, {"1052": {"logprob": -1.0536651611328125, "rank": 1, "decoded_token": "4"}, "11745": {"logprob": -2.1161651611328125, "rank": 2, "decoded_token": "Here"}, "16840": {"logprob": -3.2411651611328125, "rank": 3, "decoded_token": "Since"}, "2892": {"logprob": -3.3661651611328125, "rank": 4, "decoded_token": "To"}, "1040": {"logprob": -3.3661651611328125, "rank": 5, "decoded_token": "("}}, {"1046": {"logprob": -0.15314939618110657, "rank": 1, "decoded_token": "."}, "1626": {"logprob": -4.215649604797363, "rank": 2, "decoded_token": ".\n"}, "1319": {"logprob": -4.528149604797363, "rank": 3, "decoded_token": " ("}, "26667": {"logprob": -5.840649604797363, "rank": 4, "decoded_token": ".("}, "9380": {"logprob": -6.340649604797363, "rank": 5, "decoded_token": " Here"}}, {"1531": {"logprob": -1.7034302949905396, "rank": 1, "decoded_token": " The"}, "9380": {"logprob": -2.20343017578125, "rank": 2, "decoded_token": " Here"}, "1319": {"logprob": -2.26593017578125, "rank": 3, "decoded_token": " ("}, "2898": {"logprob": -2.95343017578125, "rank": 4, "decoded_token": " For"}, "9748": {"logprob": -3.01593017578125, "rank": 5, "decoded_token": " Since"}}, {"19723": {"logprob": -1.0940117835998535, "rank": 1, "decoded_token": " fifth"}, "5662": {"logprob": -3.2190117835998535, "rank": 2, "decoded_token": " provided"}, "3804": {"logprob": -3.4690117835998535, "rank": 3, "decoded_token": " last"}, "12432": {"logprob": -3.5940117835998535, "rank": 4, "decoded_token": " fourth"}, "5719": {"logprob": -3.6565117835998535, "rank": 5, "decoded_token": " initial"}}, {"3937": {"logprob": -1.0828688144683838, "rank": 1, "decoded_token": " image"}, "1319": {"logprob": -2.020368814468384, "rank": 2, "decoded_token": " ("}, "1925": {"logprob": -2.332868814468384, "rank": 3, "decoded_token": " one"}, "2016": {"logprob": -3.457868814468384, "rank": 4, "decoded_token": " set"}, "5662": {"logprob": -3.895368814468384, "rank": 5, "decoded_token": " provided"}}, {"1319": {"logprob": -0.9146400094032288, "rank": 1, "decoded_token": " ("}, "10045": {"logprob": -2.789639949798584, "rank": 2, "decoded_token": " isn"}, "1395": {"logprob": -2.914639949798584, "rank": 3, "decoded_token": " is"}, "5662": {"logprob": -3.477139949798584, "rank": 4, "decoded_token": " provided"}, "1294": {"logprob": -3.602139949798584, "rank": 5, "decoded_token": " in"}}, {"3715": {"logprob": -2.2420246601104736, "rank": 1, "decoded_token": "not"}, "3265": {"logprob": -3.3045246601104736, "rank": 2, "decoded_token": "the"}, "11018": {"logprob": -3.3670246601104736, "rank": 3, "decoded_token": "which"}, "1391": {"logprob": -3.4920246601104736, "rank": 4, "decoded_token": "if"}, "5011": {"logprob": -3.5545246601104736, "rank": 5, "decoded_token": "from"}}, {"24512": {"logprob": -1.8148819208145142, "rank": 1, "decoded_token": " applicable"}, "4326": {"logprob": -2.6273818016052246, "rank": 2, "decoded_token": " shown"}, "5662": {"logprob": -2.8773818016052246, "rank": 3, "decoded_token": " provided"}, "5656": {"logprob": -3.1898818016052246, "rank": 4, "decoded_token": " included"}, "1805": {"logprob": -3.1898818016052246, "rank": 5, "decoded_token": " part"}}, {"1435": {"logprob": -1.9905282258987427, "rank": 1, "decoded_token": " as"}, "1317": {"logprob": -2.115528106689453, "rank": 2, "decoded_token": " to"}, "1294": {"logprob": -2.303028106689453, "rank": 3, "decoded_token": " in"}, "3226": {"logprob": -2.553028106689453, "rank": 4, "decoded_token": " here"}, "1394": {"logprob": -2.553028106689453, "rank": 5, "decoded_token": " for"}}, {"1651": {"logprob": -1.3723630905151367, "rank": 1, "decoded_token": " per"}, "1494": {"logprob": -1.6848630905151367, "rank": 2, "decoded_token": " it"}, "2156": {"logprob": -2.4348630905151367, "rank": 3, "decoded_token": " there"}, "1278": {"logprob": -2.9973630905151367, "rank": 4, "decoded_token": " the"}, "1636": {"logprob": -3.0598630905151367, "rank": 5, "decoded_token": " you"}}, {"1278": {"logprob": -1.4684513807296753, "rank": 1, "decoded_token": " the"}, "2143": {"logprob": -1.5309513807296753, "rank": 2, "decoded_token": " your"}, "5719": {"logprob": -2.218451499938965, "rank": 3, "decoded_token": " initial"}, "5662": {"logprob": -3.280951499938965, "rank": 4, "decoded_token": " provided"}, "4618": {"logprob": -3.405951499938965, "rank": 5, "decoded_token": " original"}}, {"5719": {"logprob": -1.7317447662353516, "rank": 1, "decoded_token": " initial"}, "5662": {"logprob": -2.4817447662353516, "rank": 2, "decoded_token": " provided"}, "4618": {"logprob": -2.6692447662353516, "rank": 3, "decoded_token": " original"}, "2158": {"logprob": -3.1067447662353516, "rank": 4, "decoded_token": " first"}, "4265": {"logprob": -3.2317447662353516, "rank": 5, "decoded_token": " given"}}, {"4546": {"logprob": -2.323227643966675, "rank": 1, "decoded_token": " request"}, "4098": {"logprob": -2.635727643966675, "rank": 2, "decoded_token": " question"}, "5662": {"logprob": -2.760727643966675, "rank": 3, "decoded_token": " provided"}, "6360": {"logprob": -2.885727643966675, "rank": 4, "decoded_token": " description"}, "4249": {"logprob": -3.260727643966675, "rank": 5, "decoded_token": " single"}}, {"1041": {"logprob": -2.016270160675049, "rank": 1, "decoded_token": ")"}, "1394": {"logprob": -2.203770160675049, "rank": 2, "decoded_token": " for"}, "1044": {"logprob": -2.891270160675049, "rank": 3, "decoded_token": ","}, "3640": {"logprob": -3.016270160675049, "rank": 4, "decoded_token": "):\n"}, "1321": {"logprob": -3.078770160675049, "rank": 5, "decoded_token": " and"}}, {"2168": {"logprob": -2.4032278060913086, "rank": 1, "decoded_token": " would"}, "1395": {"logprob": -2.4657278060913086, "rank": 2, "decoded_token": " is"}, "7444": {"logprob": -2.5907278060913086, "rank": 3, "decoded_token": " seems"}, "2715": {"logprob": -2.6532278060913086, "rank": 4, "decoded_token": " should"}, "10045": {"logprob": -3.2157278060913086, "rank": 5, "decoded_token": " isn"}}, {"1402": {"logprob": -1.638601303100586, "rank": 1, "decoded_token": " be"}, "1605": {"logprob": -2.138601303100586, "rank": 2, "decoded_token": " not"}, "96594": {"logprob": -2.513601303100586, "rank": 3, "decoded_token": " logically"}, "2534": {"logprob": -2.888601303100586, "rank": 4, "decoded_token": " need"}, "79961": {"logprob": -3.201101303100586, "rank": 5, "decoded_token": " ideally"}}, {"1278": {"logprob": -2.479240894317627, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -2.854240894317627, "rank": 2, "decoded_token": " a"}, "3866": {"logprob": -3.041740894317627, "rank": 3, "decoded_token": " another"}, "57773": {"logprob": -3.479240894317627, "rank": 4, "decoded_token": " irrelevant"}, "1877": {"logprob": -3.604240894317627, "rank": 5, "decoded_token": ":\n"}}, {"1925": {"logprob": -2.5215530395507812, "rank": 1, "decoded_token": " one"}, "2879": {"logprob": -3.1465530395507812, "rank": 2, "decoded_token": " same"}, "25342": {"logprob": -3.4590530395507812, "rank": 3, "decoded_token": " incorrect"}, "7244": {"logprob": -3.5840530395507812, "rank": 4, "decoded_token": " black"}, "4275": {"logprob": -3.5840530395507812, "rank": 5, "decoded_token": " next"}}, {"1454": {"logprob": -2.0894155502319336, "rank": 1, "decoded_token": " with"}, "1636": {"logprob": -3.0269155502319336, "rank": 2, "decoded_token": " you"}, "1307": {"logprob": -3.0269155502319336, "rank": 3, "decoded_token": " of"}, "1455": {"logprob": -3.2769155502319336, "rank": 4, "decoded_token": " that"}, "1562": {"logprob": -3.4019155502319336, "rank": 5, "decoded_token": " from"}}, {"1278": {"logprob": -1.3837254047393799, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.6962254047393799, "rank": 2, "decoded_token": " a"}, "3866": {"logprob": -3.13372540473938, "rank": 3, "decoded_token": " another"}, "1420": {"logprob": -3.19622540473938, "rank": 4, "decoded_token": " an"}, "2586": {"logprob": -3.69622540473938, "rank": 5, "decoded_token": " just"}}, {"10575": {"logprob": -1.6755551099777222, "rank": 1, "decoded_token": " dog"}, "7244": {"logprob": -2.3005552291870117, "rank": 2, "decoded_token": " black"}, "25342": {"logprob": -3.7380552291870117, "rank": 3, "decoded_token": " incorrect"}, "3549": {"logprob": -3.8630552291870117, "rank": 4, "decoded_token": " path"}, "4811": {"logprob": -4.238055229187012, "rank": 5, "decoded_token": " specific"}}, {"2790": {"logprob": -1.985573649406433, "rank": 1, "decoded_token": " again"}, "1044": {"logprob": -2.4230737686157227, "rank": 2, "decoded_token": ","}, "1562": {"logprob": -2.7355737686157227, "rank": 3, "decoded_token": " from"}, "1338": {"logprob": -2.7980737686157227, "rank": 4, "decoded_token": ".\n\n"}, "1408": {"logprob": -3.0480737686157227, "rank": 5, "decoded_token": " on"}}, {"1044": {"logprob": -1.7941354513168335, "rank": 1, "decoded_token": ","}, "1693": {"logprob": -2.231635570526123, "rank": 2, "decoded_token": " if"}, "1809": {"logprob": -2.731635570526123, "rank": 3, "decoded_token": " but"}, "1505": {"logprob": -3.356635570526123, "rank": 4, "decoded_token": " or"}, "1562": {"logprob": -3.419135570526123, "rank": 5, "decoded_token": " from"}}, {"1809": {"logprob": -1.9572563171386719, "rank": 1, "decoded_token": " but"}, "1878": {"logprob": -2.269756317138672, "rank": 2, "decoded_token": " so"}, "1799": {"logprob": -2.769756317138672, "rank": 3, "decoded_token": " which"}, "1321": {"logprob": -3.769756317138672, "rank": 4, "decoded_token": " and"}, "1693": {"logprob": -3.957256317138672, "rank": 5, "decoded_token": " if"}}, {"4136": {"logprob": -2.412797212600708, "rank": 1, "decoded_token": " since"}, "3226": {"logprob": -2.475297212600708, "rank": 2, "decoded_token": " here"}, "1278": {"logprob": -2.850297212600708, "rank": 3, "decoded_token": " the"}, "2878": {"logprob": -2.975297212600708, "rank": 4, "decoded_token": " let"}, "1494": {"logprob": -3.287797212600708, "rank": 5, "decoded_token": " it"}}, {"1494": {"logprob": -1.4206243753433228, "rank": 1, "decoded_token": " it"}, "1278": {"logprob": -2.045624256134033, "rank": 2, "decoded_token": " the"}, "1729": {"logprob": -2.295624256134033, "rank": 3, "decoded_token": " we"}, "2156": {"logprob": -2.483124256134033, "rank": 4, "decoded_token": " there"}, "1636": {"logprob": -2.483124256134033, "rank": 5, "decoded_token": " you"}}, {"1681": {"logprob": -1.4698597192764282, "rank": 1, "decoded_token": "'s"}, "2190": {"logprob": -1.6573597192764282, "rank": 2, "decoded_token": "\u2019s"}, "1395": {"logprob": -2.1573596000671387, "rank": 3, "decoded_token": " is"}, "1486": {"logprob": -2.5323596000671387, "rank": 4, "decoded_token": " was"}, "7444": {"logprob": -3.2823596000671387, "rank": 5, "decoded_token": " seems"}}, {"5314": {"logprob": -1.6933988332748413, "rank": 1, "decoded_token": " already"}, "1605": {"logprob": -2.130898952484131, "rank": 2, "decoded_token": " not"}, "1278": {"logprob": -2.193398952484131, "rank": 3, "decoded_token": " the"}, "2342": {"logprob": -3.318398952484131, "rank": 4, "decoded_token": " only"}, "13578": {"logprob": -3.568398952484131, "rank": 5, "decoded_token": " repeated"}}, {"5055": {"logprob": -1.3677805662155151, "rank": 1, "decoded_token": " described"}, "13875": {"logprob": -1.4927805662155151, "rank": 2, "decoded_token": " covered"}, "24511": {"logprob": -2.6802806854248047, "rank": 3, "decoded_token": " addressed"}, "10910": {"logprob": -2.7427806854248047, "rank": 4, "decoded_token": " mentioned"}, "5656": {"logprob": -3.4302806854248047, "rank": 5, "decoded_token": " included"}}, {"1044": {"logprob": -1.6195299625396729, "rank": 1, "decoded_token": ","}, "2100": {"logprob": -2.057029962539673, "rank": 2, "decoded_token": ":\n\n"}, "1294": {"logprob": -2.057029962539673, "rank": 3, "decoded_token": " in"}, "1877": {"logprob": -2.369529962539673, "rank": 4, "decoded_token": ":\n"}, "15423": {"logprob": -2.869529962539673, "rank": 5, "decoded_token": " initially"}}, {"3226": {"logprob": -1.7415276765823364, "rank": 1, "decoded_token": " here"}, "2878": {"logprob": -1.8665276765823364, "rank": 2, "decoded_token": " let"}, "1362": {"logprob": -2.304027557373047, "rank": 3, "decoded_token": " I"}, "1494": {"logprob": -2.616527557373047, "rank": 4, "decoded_token": " it"}, "1278": {"logprob": -3.179027557373047, "rank": 5, "decoded_token": " the"}}, {"2190": {"logprob": -1.3488249778747559, "rank": 1, "decoded_token": "\u2019s"}, "1681": {"logprob": -1.4113249778747559, "rank": 2, "decoded_token": "'s"}, "1395": {"logprob": -1.5363249778747559, "rank": 3, "decoded_token": " is"}, "1584": {"logprob": -2.473824977874756, "rank": 4, "decoded_token": " are"}, "1494": {"logprob": -4.348824977874756, "rank": 5, "decoded_token": " it"}}, {"1261": {"logprob": -1.4024583101272583, "rank": 1, "decoded_token": " a"}, "1278": {"logprob": -1.9024583101272583, "rank": 2, "decoded_token": " the"}, "3866": {"logprob": -2.1524581909179688, "rank": 3, "decoded_token": " another"}, "1420": {"logprob": -2.5274581909179688, "rank": 4, "decoded_token": " an"}, "1605": {"logprob": -4.152458190917969, "rank": 5, "decoded_token": " not"}}, {"2801": {"logprob": -2.7750463485717773, "rank": 1, "decoded_token": " different"}, "6468": {"logprob": -3.1500463485717773, "rank": 2, "decoded_token": " focus"}, "21788": {"logprob": -3.2750463485717773, "rank": 3, "decoded_token": " repeat"}, "17793": {"logprob": -3.4625463485717773, "rank": 4, "decoded_token": " summary"}, "13426": {"logprob": -3.6500463485717773, "rank": 5, "decoded_token": " brief"}}, {"6468": {"logprob": -2.8590242862701416, "rank": 1, "decoded_token": " focus"}, "1925": {"logprob": -3.4215242862701416, "rank": 2, "decoded_token": " one"}, "19190": {"logprob": -3.9215242862701416, "rank": 3, "decoded_token": " interpretation"}, "6360": {"logprob": -3.9840242862701416, "rank": 4, "decoded_token": " description"}, "3336": {"logprob": -3.9840242862701416, "rank": 5, "decoded_token": " example"}}, {"1693": {"logprob": -2.391437530517578, "rank": 1, "decoded_token": " if"}, "1877": {"logprob": -2.516437530517578, "rank": 2, "decoded_token": ":\n"}, "1408": {"logprob": -2.516437530517578, "rank": 3, "decoded_token": " on"}, "1058": {"logprob": -2.578937530517578, "rank": 4, "decoded_token": ":"}, "2100": {"logprob": -2.703937530517578, "rank": 5, "decoded_token": ":\n\n"}}, {"1729": {"logprob": -2.6488425731658936, "rank": 1, "decoded_token": " we"}, "6618": {"logprob": -2.6488425731658936, "rank": 2, "decoded_token": " needed"}, "1494": {"logprob": -2.9613425731658936, "rank": 3, "decoded_token": " it"}, "2258": {"logprob": -3.2113425731658936, "rank": 4, "decoded_token": " any"}, "1636": {"logprob": -3.2738425731658936, "rank": 5, "decoded_token": " you"}}, {"3369": {"logprob": -2.0612776279449463, "rank": 1, "decoded_token": " consider"}, "1722": {"logprob": -2.6862776279449463, "rank": 2, "decoded_token": " were"}, "1880": {"logprob": -3.1237776279449463, "rank": 3, "decoded_token": " had"}, "14649": {"logprob": -3.1862776279449463, "rank": 4, "decoded_token": " assume"}, "55328": {"logprob": -3.2487776279449463, "rank": 5, "decoded_token": " mistaken"}}, {"1278": {"logprob": -1.7770118713378906, "rank": 1, "decoded_token": " the"}, "3866": {"logprob": -2.4645118713378906, "rank": 2, "decoded_token": " another"}, "1261": {"logprob": -2.5895118713378906, "rank": 3, "decoded_token": " a"}, "1494": {"logprob": -3.5270118713378906, "rank": 4, "decoded_token": " it"}, "1420": {"logprob": -3.5895118713378906, "rank": 5, "decoded_token": " an"}}, {"3629": {"logprob": -3.3516223430633545, "rank": 1, "decoded_token": " following"}, "4275": {"logprob": -3.5391223430633545, "rank": 2, "decoded_token": " next"}, "12432": {"logprob": -3.7266223430633545, "rank": 3, "decoded_token": " fourth"}, "5719": {"logprob": -3.7891223430633545, "rank": 4, "decoded_token": " initial"}, "2158": {"logprob": -3.9141223430633545, "rank": 5, "decoded_token": " first"}}, {"75275": {"logprob": -3.0541036128997803, "rank": 1, "decoded_token": " hypothetical"}, "2100": {"logprob": -3.1166036128997803, "rank": 2, "decoded_token": ":\n\n"}, "1877": {"logprob": -3.1166036128997803, "rank": 3, "decoded_token": ":\n"}, "1435": {"logprob": -4.179103851318359, "rank": 4, "decoded_token": " as"}, "1319": {"logprob": -4.179103851318359, "rank": 5, "decoded_token": " ("}}, {"1877": {"logprob": -2.70977520942688, "rank": 1, "decoded_token": ":\n"}, "2100": {"logprob": -2.83477520942688, "rank": 2, "decoded_token": ":\n\n"}, "1115": {"logprob": -3.20977520942688, "rank": 3, "decoded_token": "s"}, "12432": {"logprob": -3.64727520942688, "rank": 4, "decoded_token": " fourth"}, "1058": {"logprob": -3.89727520942688, "rank": 5, "decoded_token": ":"}}, {"1256": {"logprob": -0.4959573745727539, "rank": 1, "decoded_token": "  "}, "1293": {"logprob": -1.870957374572754, "rank": 2, "decoded_token": "   "}, "11745": {"logprob": -4.183457374572754, "rank": 3, "decoded_token": "Here"}, "1784": {"logprob": -4.495957374572754, "rank": 4, "decoded_token": "The"}, "1053": {"logprob": -4.495957374572754, "rank": 5, "decoded_token": "5"}}, {"1462": {"logprob": -1.5276975631713867, "rank": 1, "decoded_token": " -"}, "1319": {"logprob": -2.4026975631713867, "rank": 2, "decoded_token": " ("}, "9246": {"logprob": -3.2776975631713867, "rank": 3, "decoded_token": " Let"}, "9380": {"logprob": -3.3401975631713867, "rank": 4, "decoded_token": " Here"}, "1531": {"logprob": -3.3401975631713867, "rank": 5, "decoded_token": " The"}}, {"1531": {"logprob": -2.32918381690979, "rank": 1, "decoded_token": " The"}, "1319": {"logprob": -2.82918381690979, "rank": 2, "decoded_token": " ("}, "1349": {"logprob": -3.01668381690979, "rank": 3, "decoded_token": " A"}, "2898": {"logprob": -3.20418381690979, "rank": 4, "decoded_token": " For"}, "9380": {"logprob": -3.57918381690979, "rank": 5, "decoded_token": " Here"}}, {"5719": {"logprob": -2.6479814052581787, "rank": 1, "decoded_token": " initial"}, "19723": {"logprob": -3.2729814052581787, "rank": 2, "decoded_token": " fifth"}, "3804": {"logprob": -3.3979814052581787, "rank": 3, "decoded_token": " last"}, "25342": {"logprob": -3.8979814052581787, "rank": 4, "decoded_token": " incorrect"}, "5662": {"logprob": -3.9604814052581787, "rank": 5, "decoded_token": " provided"}}, {"5662": {"logprob": -2.9346542358398438, "rank": 1, "decoded_token": " provided"}, "6468": {"logprob": -3.1846542358398438, "rank": 2, "decoded_token": " focus"}, "7244": {"logprob": -3.6221542358398438, "rank": 3, "decoded_token": " black"}, "10575": {"logprob": -3.6221542358398438, "rank": 4, "decoded_token": " dog"}, "6360": {"logprob": -3.8096542358398438, "rank": 5, "decoded_token": " description"}}, {"3937": {"logprob": -3.0049667358398438, "rank": 1, "decoded_token": " image"}, "10575": {"logprob": -3.0674667358398438, "rank": 2, "decoded_token": " dog"}, "8061": {"logprob": -3.5049667358398438, "rank": 3, "decoded_token": " images"}, "2016": {"logprob": -3.5049667358398438, "rank": 4, "decoded_token": " set"}, "2667": {"logprob": -3.5674667358398438, "rank": 5, "decoded_token": " second"}}, {"1395": {"logprob": -2.7246885299682617, "rank": 1, "decoded_token": " is"}, "1319": {"logprob": -2.7871885299682617, "rank": 2, "decoded_token": " ("}, "1307": {"logprob": -3.6621885299682617, "rank": 3, "decoded_token": " of"}, "2016": {"logprob": -3.7871885299682617, "rank": 4, "decoded_token": " set"}, "1058": {"logprob": -3.7871885299682617, "rank": 5, "decoded_token": ":"}}, {"1261": {"logprob": -2.914872646331787, "rank": 1, "decoded_token": " a"}, "1605": {"logprob": -3.164872646331787, "rank": 2, "decoded_token": " not"}, "1278": {"logprob": -3.164872646331787, "rank": 3, "decoded_token": " the"}, "2586": {"logprob": -3.227372646331787, "rank": 4, "decoded_token": " just"}, "13578": {"logprob": -3.352372646331787, "rank": 5, "decoded_token": " repeated"}}, {"7244": {"logprob": -2.4122366905212402, "rank": 1, "decoded_token": " black"}, "15115": {"logprob": -3.1622366905212402, "rank": 2, "decoded_token": " detailed"}, "10575": {"logprob": -3.1622366905212402, "rank": 3, "decoded_token": " dog"}, "6468": {"logprob": -3.2247366905212402, "rank": 4, "decoded_token": " focus"}, "6165": {"logprob": -3.3497366905212402, "rank": 5, "decoded_token": " simple"}}, {"10575": {"logprob": -0.2919383645057678, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -3.104438304901123, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -3.479438304901123, "rank": 3, "decoded_token": " puppy"}, "15812": {"logprob": -4.104438304901123, "rank": 4, "decoded_token": " Lab"}, "2001": {"logprob": -4.479438304901123, "rank": 5, "decoded_token": " po"}}, {"7283": {"logprob": -1.594571590423584, "rank": 1, "decoded_token": " looking"}, "1408": {"logprob": -2.219571590423584, "rank": 2, "decoded_token": " on"}, "18970": {"logprob": -2.407071590423584, "rank": 3, "decoded_token": " sitting"}, "1454": {"logprob": -2.657071590423584, "rank": 4, "decoded_token": " with"}, "11589": {"logprob": -3.282071590423584, "rank": 5, "decoded_token": " gaz"}}, {"2015": {"logprob": -2.1110990047454834, "rank": 1, "decoded_token": " up"}, "7655": {"logprob": -2.1735990047454834, "rank": 2, "decoded_token": " directly"}, "1935": {"logprob": -2.4860990047454834, "rank": 3, "decoded_token": " int"}, "4524": {"logprob": -3.0485990047454834, "rank": 4, "decoded_token": " cur"}, "40022": {"logprob": -3.2985990047454834, "rank": 5, "decoded_token": " upward"}}, {"1454": {"logprob": -1.676759123802185, "rank": 1, "decoded_token": " with"}, "1513": {"logprob": -2.1142592430114746, "rank": 2, "decoded_token": " at"}, "4914": {"logprob": -2.5517592430114746, "rank": 3, "decoded_token": " thought"}, "1935": {"logprob": -2.8642592430114746, "rank": 4, "decoded_token": " int"}, "1408": {"logprob": -2.8642592430114746, "rank": 5, "decoded_token": " on"}}, {"1261": {"logprob": -1.243793249130249, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -1.931293249130249, "rank": 2, "decoded_token": " an"}, "2246": {"logprob": -2.618793249130249, "rank": 3, "decoded_token": " its"}, "9924": {"logprob": -2.618793249130249, "rank": 4, "decoded_token": " wide"}, "14781": {"logprob": -3.743793249130249, "rank": 5, "decoded_token": " focused"}}, {"26517": {"logprob": -1.6467483043670654, "rank": 1, "decoded_token": " calm"}, "14781": {"logprob": -2.5842483043670654, "rank": 2, "decoded_token": " focused"}, "29691": {"logprob": -2.7092483043670654, "rank": 3, "decoded_token": " contempl"}, "85596": {"logprob": -3.1467483043670654, "rank": 4, "decoded_token": " solemn"}, "16318": {"logprob": -3.2092483043670654, "rank": 5, "decoded_token": " neutral"}}, {"4818": {"logprob": -0.6443419456481934, "rank": 1, "decoded_token": " expression"}, "1311": {"logprob": -2.2068419456481934, "rank": 2, "decoded_token": " de"}, "1321": {"logprob": -2.5193419456481934, "rank": 3, "decoded_token": " and"}, "1044": {"logprob": -2.9568419456481934, "rank": 4, "decoded_token": ","}, "22131": {"logprob": -3.4568419456481934, "rank": 5, "decoded_token": " gaze"}}, {"1408": {"logprob": -0.7509063482284546, "rank": 1, "decoded_token": " on"}, "1338": {"logprob": -2.188406467437744, "rank": 2, "decoded_token": ".\n\n"}, "2136": {"logprob": -2.875906467437744, "rank": 3, "decoded_token": " over"}, "3675": {"logprob": -3.188406467437744, "rank": 4, "decoded_token": " against"}, "1046": {"logprob": -3.313406467437744, "rank": 5, "decoded_token": "."}}, {"1261": {"logprob": -0.41352635622024536, "rank": 1, "decoded_token": " a"}, "44130": {"logprob": -2.2885262966156006, "rank": 2, "decoded_token": " rust"}, "32656": {"logprob": -2.8510262966156006, "rank": 3, "decoded_token": " wooden"}, "2549": {"logprob": -3.6010262966156006, "rank": 4, "decoded_token": " what"}, "3403": {"logprob": -4.03852653503418, "rank": 5, "decoded_token": " text"}}, {"44130": {"logprob": -1.1644353866577148, "rank": 1, "decoded_token": " rust"}, "3403": {"logprob": -1.7269353866577148, "rank": 2, "decoded_token": " text"}, "32656": {"logprob": -1.7894353866577148, "rank": 3, "decoded_token": " wooden"}, "6165": {"logprob": -3.351935386657715, "rank": 4, "decoded_token": " simple"}, "1615": {"logprob": -3.601935386657715, "rank": 5, "decoded_token": " pl"}}, {"1290": {"logprob": -0.007089222315698862, "rank": 1, "decoded_token": "ic"}, "1121": {"logprob": -6.319589138031006, "rank": 2, "decoded_token": "y"}, "2981": {"logprob": -6.507089138031006, "rank": 3, "decoded_token": "ically"}, "12500": {"logprob": -6.694589138031006, "rank": 4, "decoded_token": "icated"}, "86794": {"logprob": -7.632089138031006, "rank": 5, "decoded_token": "-colored"}}, {"32656": {"logprob": -0.5749364495277405, "rank": 1, "decoded_token": " wooden"}, "1615": {"logprob": -2.4499363899230957, "rank": 2, "decoded_token": " pl"}, "1044": {"logprob": -2.6999363899230957, "rank": 3, "decoded_token": ","}, "12603": {"logprob": -2.8249363899230957, "rank": 4, "decoded_token": " wood"}, "4691": {"logprob": -3.3249363899230957, "rank": 5, "decoded_token": " surface"}}, {"1615": {"logprob": -0.6836710572242737, "rank": 1, "decoded_token": " pl"}, "7042": {"logprob": -2.433670997619629, "rank": 2, "decoded_token": " background"}, "4691": {"logprob": -2.433670997619629, "rank": 3, "decoded_token": " surface"}, "92504": {"logprob": -2.558670997619629, "rank": 4, "decoded_token": " backdrop"}, "9710": {"logprob": -3.058670997619629, "rank": 5, "decoded_token": " board"}}, {"2395": {"logprob": -0.063260018825531, "rank": 1, "decoded_token": "ank"}, "5933": {"logprob": -3.063260078430176, "rank": 2, "decoded_token": "anks"}, "122370": {"logprob": -4.313260078430176, "rank": 3, "decoded_token": "anking"}, "4713": {"logprob": -7.688260078430176, "rank": 4, "decoded_token": "atter"}, "58739": {"logprob": -8.563260078430176, "rank": 5, "decoded_token": "ANK"}}, {"7042": {"logprob": -1.4006984233856201, "rank": 1, "decoded_token": " background"}, "92504": {"logprob": -1.6506984233856201, "rank": 2, "decoded_token": " backdrop"}, "4691": {"logprob": -2.08819842338562, "rank": 3, "decoded_token": " surface"}, "1338": {"logprob": -2.83819842338562, "rank": 4, "decoded_token": ".\n\n"}, "9436": {"logprob": -2.96319842338562, "rank": 5, "decoded_token": " setting"}}, {"1338": {"logprob": -0.7081566452980042, "rank": 1, "decoded_token": ".\n\n"}, "1046": {"logprob": -0.8956566452980042, "rank": 2, "decoded_token": "."}, "1319": {"logprob": -4.145656585693359, "rank": 3, "decoded_token": " ("}, "2100": {"logprob": -4.395656585693359, "rank": 4, "decoded_token": ":\n\n"}, "1626": {"logprob": -4.708156585693359, "rank": 5, "decoded_token": ".\n"}}, {"2892": {"logprob": -2.5741822719573975, "rank": 1, "decoded_token": "To"}, "4393": {"logprob": -2.8866822719573975, "rank": 2, "decoded_token": "For"}, "11745": {"logprob": -3.1366822719573975, "rank": 3, "decoded_token": "Here"}, "12598": {"logprob": -3.1991822719573975, "rank": 4, "decoded_token": "Let"}, "1040": {"logprob": -3.2616822719573975, "rank": 5, "decoded_token": "("}}, {"38695": {"logprob": -1.3182454109191895, "rank": 1, "decoded_token": " clarify"}, "10035": {"logprob": -2.3807454109191895, "rank": 2, "decoded_token": " avoid"}, "11811": {"logprob": -3.0057454109191895, "rank": 3, "decoded_token": " ensure"}, "66370": {"logprob": -3.0682454109191895, "rank": 4, "decoded_token": " summarize"}, "36993": {"logprob": -3.4432454109191895, "rank": 5, "decoded_token": " strictly"}}, {"1044": {"logprob": -1.8413705825805664, "rank": 1, "decoded_token": ","}, "1278": {"logprob": -2.5913705825805664, "rank": 2, "decoded_token": " the"}, "1321": {"logprob": -2.9038705825805664, "rank": 3, "decoded_token": " and"}, "1877": {"logprob": -3.2163705825805664, "rank": 4, "decoded_token": ":\n"}, "4057": {"logprob": -3.5288705825805664, "rank": 5, "decoded_token": " based"}}, {"3226": {"logprob": -1.5737794637680054, "rank": 1, "decoded_token": " here"}, "1362": {"logprob": -2.698779582977295, "rank": 2, "decoded_token": " I"}, "1278": {"logprob": -2.761279582977295, "rank": 3, "decoded_token": " the"}, "2342": {"logprob": -2.886279582977295, "rank": 4, "decoded_token": " only"}, "2878": {"logprob": -2.948779582977295, "rank": 5, "decoded_token": " let"}}, {"2190": {"logprob": -1.2323426008224487, "rank": 1, "decoded_token": "\u2019s"}, "1681": {"logprob": -1.5448426008224487, "rank": 2, "decoded_token": "'s"}, "1584": {"logprob": -1.5448426008224487, "rank": 3, "decoded_token": " are"}, "1395": {"logprob": -2.0448427200317383, "rank": 4, "decoded_token": " is"}, "6510": {"logprob": -4.607342720031738, "rank": 5, "decoded_token": "with"}}, {"1278": {"logprob": -1.6423555612564087, "rank": 1, "decoded_token": " the"}, "1261": {"logprob": -1.7048555612564087, "rank": 2, "decoded_token": " a"}, "2606": {"logprob": -3.267355442047119, "rank": 3, "decoded_token": " how"}, "1420": {"logprob": -3.329855442047119, "rank": 4, "decoded_token": " an"}, "2342": {"logprob": -3.517355442047119, "rank": 5, "decoded_token": " only"}}, {"6298": {"logprob": -2.6795427799224854, "rank": 1, "decoded_token": " correct"}, "4811": {"logprob": -3.3670427799224854, "rank": 2, "decoded_token": " specific"}, "17793": {"logprob": -3.6795427799224854, "rank": 3, "decoded_token": " summary"}, "15115": {"logprob": -3.8045427799224854, "rank": 4, "decoded_token": " detailed"}, "6468": {"logprob": -3.8670427799224854, "rank": 5, "decoded_token": " focus"}}, {"6360": {"logprob": -2.9291810989379883, "rank": 1, "decoded_token": " description"}, "6468": {"logprob": -3.4291810989379883, "rank": 2, "decoded_token": " focus"}, "7980": {"logprob": -3.5541810989379883, "rank": 3, "decoded_token": " sequence"}, "1321": {"logprob": -3.5541810989379883, "rank": 4, "decoded_token": " and"}, "44433": {"logprob": -3.8041810989379883, "rank": 5, "decoded_token": " breakdown"}}, {"1394": {"logprob": -1.4118860960006714, "rank": 1, "decoded_token": " for"}, "1307": {"logprob": -2.161886215209961, "rank": 2, "decoded_token": " of"}, "4057": {"logprob": -2.599386215209961, "rank": 3, "decoded_token": " based"}, "30557": {"logprob": -3.724386215209961, "rank": 4, "decoded_token": " focusing"}, "2342": {"logprob": -3.911886215209961, "rank": 5, "decoded_token": " only"}}, {"1278": {"logprob": -1.170920729637146, "rank": 1, "decoded_token": " the"}, "2744": {"logprob": -2.4834208488464355, "rank": 2, "decoded_token": " each"}, "2143": {"logprob": -3.1709208488464355, "rank": 3, "decoded_token": " your"}, "1747": {"logprob": -3.2959208488464355, "rank": 4, "decoded_token": " all"}, "2342": {"logprob": -3.3584208488464355, "rank": 5, "decoded_token": " only"}}, {"5662": {"logprob": -2.8693411350250244, "rank": 1, "decoded_token": " provided"}, "5719": {"logprob": -3.1193411350250244, "rank": 2, "decoded_token": " initial"}, "8061": {"logprob": -3.3693411350250244, "rank": 3, "decoded_token": " images"}, "12432": {"logprob": -3.4318411350250244, "rank": 4, "decoded_token": " fourth"}, "2667": {"logprob": -3.4943411350250244, "rank": 5, "decoded_token": " second"}}, {"7244": {"logprob": -2.477940559387207, "rank": 1, "decoded_token": " black"}, "10575": {"logprob": -2.727940559387207, "rank": 2, "decoded_token": " dog"}, "3937": {"logprob": -2.727940559387207, "rank": 3, "decoded_token": " image"}, "8061": {"logprob": -2.790440559387207, "rank": 4, "decoded_token": " images"}, "2016": {"logprob": -2.852940559387207, "rank": 5, "decoded_token": " set"}}, {"10575": {"logprob": -0.09506329894065857, "rank": 1, "decoded_token": " dog"}, "3937": {"logprob": -3.8450632095336914, "rank": 2, "decoded_token": " image"}, "63524": {"logprob": -4.470063209533691, "rank": 3, "decoded_token": "dog"}, "3028": {"logprob": -4.970063209533691, "rank": 4, "decoded_token": "-d"}, "1321": {"logprob": -5.220063209533691, "rank": 5, "decoded_token": " and"}}, {"3937": {"logprob": -0.6087309122085571, "rank": 1, "decoded_token": " image"}, "2100": {"logprob": -2.7337307929992676, "rank": 2, "decoded_token": ":\n\n"}, "1294": {"logprob": -3.2337307929992676, "rank": 3, "decoded_token": " in"}, "13083": {"logprob": -3.3587307929992676, "rank": 4, "decoded_token": " picture"}, "1877": {"logprob": -3.3587307929992676, "rank": 5, "decoded_token": ":\n"}}, {"1877": {"logprob": -1.969537615776062, "rank": 1, "decoded_token": ":\n"}, "2100": {"logprob": -2.0945377349853516, "rank": 2, "decoded_token": ":\n\n"}, "2342": {"logprob": -2.4695377349853516, "rank": 3, "decoded_token": " only"}, "13703": {"logprob": -3.0945377349853516, "rank": 4, "decoded_token": " specifically"}, "9412": {"logprob": -3.1570377349853516, "rank": 5, "decoded_token": " alone"}}, {"1065": {"logprob": -1.341880202293396, "rank": 1, "decoded_token": "A"}, "1784": {"logprob": -1.841880202293396, "rank": 2, "decoded_token": "The"}, "1256": {"logprob": -1.966880202293396, "rank": 3, "decoded_token": "  "}, "1438": {"logprob": -2.4668803215026855, "rank": 4, "decoded_token": "**"}, "1045": {"logprob": -2.5293803215026855, "rank": 5, "decoded_token": "-"}}, {"7244": {"logprob": -0.6517431139945984, "rank": 1, "decoded_token": " black"}, "6231": {"logprob": -2.714243173599243, "rank": 2, "decoded_token": " close"}, "85596": {"logprob": -3.026743173599243, "rank": 3, "decoded_token": " solemn"}, "14781": {"logprob": -3.401743173599243, "rank": 4, "decoded_token": " focused"}, "26517": {"logprob": -3.526743173599243, "rank": 5, "decoded_token": " calm"}}, {"10575": {"logprob": -0.10892026871442795, "rank": 1, "decoded_token": " dog"}, "119075": {"logprob": -3.7964203357696533, "rank": 2, "decoded_token": " Labrador"}, "116572": {"logprob": -3.9839203357696533, "rank": 3, "decoded_token": " puppy"}, "1044": {"logprob": -4.983920097351074, "rank": 4, "decoded_token": ","}, "94057": {"logprob": -5.171420097351074, "rank": 5, "decoded_token": " canine"}}, {"11589": {"logprob": -1.534816861152649, "rank": 1, "decoded_token": " gaz"}, "1395": {"logprob": -1.722316861152649, "rank": 2, "decoded_token": " is"}, "1454": {"logprob": -2.0348167419433594, "rank": 3, "decoded_token": " with"}, "53048": {"logprob": -2.2848167419433594, "rank": 4, "decoded_token": " sits"}, "10637": {"logprob": -2.6598167419433594, "rank": 5, "decoded_token": " looks"}}, {"1264": {"logprob": -0.4754399061203003, "rank": 1, "decoded_token": "es"}, "1302": {"logprob": -0.9754399061203003, "rank": 2, "decoded_token": "ing"}, "1944": {"logprob": -8.16294002532959, "rank": 3, "decoded_token": "ely"}, "47885": {"logprob": -8.16294002532959, "rank": 4, "decoded_token": "edly"}, "15006": {"logprob": -8.41294002532959, "rank": 5, "decoded_token": "ingly"}}, {"1935": {"logprob": -1.7025537490844727, "rank": 1, "decoded_token": " int"}, "7655": {"logprob": -2.0775537490844727, "rank": 2, "decoded_token": " directly"}, "40022": {"logprob": -2.2650537490844727, "rank": 3, "decoded_token": " upward"}, "2015": {"logprob": -2.4525537490844727, "rank": 4, "decoded_token": " up"}, "74606": {"logprob": -2.7025537490844727, "rank": 5, "decoded_token": " upwards"}}, {"3929": {"logprob": -0.0234219990670681, "rank": 1, "decoded_token": "ently"}, "2749": {"logprob": -5.335921764373779, "rank": 2, "decoded_token": "ros"}, "1533": {"logprob": -6.398421764373779, "rank": 3, "decoded_token": "ang"}, "3923": {"logprob": -6.523421764373779, "rank": 4, "decoded_token": "ively"}, "20626": {"logprob": -6.773421764373779, "rank": 5, "decoded_token": "rep"}}, {"40022": {"logprob": -0.8856239318847656, "rank": 1, "decoded_token": " upward"}, "74606": {"logprob": -1.5106239318847656, "rank": 2, "decoded_token": " upwards"}, "1454": {"logprob": -2.2606239318847656, "rank": 3, "decoded_token": " with"}, "8848": {"logprob": -2.7606239318847656, "rank": 4, "decoded_token": " forward"}, "1513": {"logprob": -3.5106239318847656, "rank": 5, "decoded_token": " at"}}, {"1454": {"logprob": -1.170392632484436, "rank": 1, "decoded_token": " with"}, "1408": {"logprob": -1.295392632484436, "rank": 2, "decoded_token": " on"}, "1562": {"logprob": -2.1703925132751465, "rank": 3, "decoded_token": " from"}, "3016": {"logprob": -2.4828925132751465, "rank": 4, "decoded_token": " while"}, "3675": {"logprob": -3.1703925132751465, "rank": 5, "decoded_token": " against"}}, {"1261": {"logprob": -0.8887192010879517, "rank": 1, "decoded_token": " a"}, "1420": {"logprob": -1.7637192010879517, "rank": 2, "decoded_token": " an"}, "2246": {"logprob": -2.138719081878662, "rank": 3, "decoded_token": " its"}, "9924": {"logprob": -2.888719081878662, "rank": 4, "decoded_token": " wide"}, "26517": {"logprob": -3.888719081878662, "rank": 5, "decoded_token": " calm"}}, {"26517": {"logprob": -1.2183846235275269, "rank": 1, "decoded_token": " calm"}, "2169": {"logprob": -2.8433847427368164, "rank": 2, "decoded_token": " ser"}, "16318": {"logprob": -3.0933847427368164, "rank": 3, "decoded_token": " neutral"}, "14781": {"logprob": -3.2183847427368164, "rank": 4, "decoded_token": " focused"}, "6444": {"logprob": -3.2183847427368164, "rank": 5, "decoded_token": " soft"}}, {"4818": {"logprob": -0.4964141249656677, "rank": 1, "decoded_token": " expression"}, "1321": {"logprob": -1.9339141845703125, "rank": 2, "decoded_token": " and"}, "1311": {"logprob": -2.4339141845703125, "rank": 3, "decoded_token": " de"}, "1044": {"logprob": -3.3089141845703125, "rank": 4, "decoded_token": ","}, "2985": {"logprob": -3.4964141845703125, "rank": 5, "decoded_token": " look"}}, {"1408": {"logprob": -0.8785426616668701, "rank": 1, "decoded_token": " on"}, "1562": {"logprob": -2.06604266166687, "rank": 2, "decoded_token": " from"}, "18970": {"logprob": -2.31604266166687, "rank": 3, "decoded_token": " sitting"}, "3675": {"logprob": -2.62854266166687, "rank": 4, "decoded_token": " against"}, "38235": {"logprob": -3.06604266166687, "rank": 5, "decoded_token": " resting"}}, {"1261": {"logprob": -0.3605937659740448, "rank": 1, "decoded_token": " a"}, "32656": {"logprob": -2.923093795776367, "rank": 2, "decoded_token": " wooden"}, "17253": {"logprob": -3.110593795776367, "rank": 3, "decoded_token": " weather"}, "3403": {"logprob": -3.173093795776367, "rank": 4, "decoded_token": " text"}, "44130": {"logprob": -3.298093795776367, "rank": 5, "decoded_token": " rust"}}, {"44130": {"logprob": -1.4187138080596924, "rank": 1, "decoded_token": " rust"}, "32656": {"logprob": -1.4812138080596924, "rank": 2, "decoded_token": " wooden"}, "3403": {"logprob": -1.6687138080596924, "rank": 3, "decoded_token": " text"}, "17253": {"logprob": -2.4812138080596924, "rank": 4, "decoded_token": " weather"}, "8500": {"logprob": -4.168713569641113, "rank": 5, "decoded_token": " dark"}}, {"1290": {"logprob": -0.28350138664245605, "rank": 1, "decoded_token": "ic"}, "1970": {"logprob": -2.283501386642456, "rank": 2, "decoded_token": "led"}, "1121": {"logprob": -2.971001386642456, "rank": 3, "decoded_token": "y"}, "2981": {"logprob": -4.283501625061035, "rank": 4, "decoded_token": "ically"}, "11395": {"logprob": -4.471001625061035, "rank": 5, "decoded_token": "iced"}}, {"32656": {"logprob": -0.7727869749069214, "rank": 1, "decoded_token": " wooden"}, "1044": {"logprob": -1.4602869749069214, "rank": 2, "decoded_token": ","}, "3403": {"logprob": -2.585287094116211, "rank": 3, "decoded_token": " text"}, "1615": {"logprob": -2.835287094116211, "rank": 4, "decoded_token": " pl"}, "12603": {"logprob": -3.335287094116211, "rank": 5, "decoded_token": " wood"}}, {"1615": {"logprob": -0.7352191805839539, "rank": 1, "decoded_token": " pl"}, "4691": {"logprob": -2.1102192401885986, "rank": 2, "decoded_token": " surface"}, "9710": {"logprob": -2.6102192401885986, "rank": 3, "decoded_token": " board"}, "7042": {"logprob": -2.9852192401885986, "rank": 4, "decoded_token": " background"}, "92504": {"logprob": -3.1102192401885986, "rank": 5, "decoded_token": " backdrop"}}, {"2395": {"logprob": -0.13373544812202454, "rank": 1, "decoded_token": "ank"}, "5933": {"logprob": -2.133735418319702, "rank": 2, "decoded_token": "anks"}, "122370": {"logprob": -6.883735656738281, "rank": 3, "decoded_token": "anking"}, "11847": {"logprob": -8.071235656738281, "rank": 4, "decoded_token": "anned"}, "2077": {"logprob": -8.071235656738281, "rank": 5, "decoded_token": "ink"}}, {"7042": {"logprob": -1.197163701057434, "rank": 1, "decoded_token": " background"}, "4691": {"logprob": -1.572163701057434, "rank": 2, "decoded_token": " surface"}, "92504": {"logprob": -1.884663701057434, "rank": 3, "decoded_token": " backdrop"}, "26228": {"logprob": -3.3221635818481445, "rank": 4, "decoded_token": " texture"}, "9436": {"logprob": -3.3846635818481445, "rank": 5, "decoded_token": " setting"}}, {"1046": {"logprob": -0.13945358991622925, "rank": 1, "decoded_token": "."}, "1338": {"logprob": -2.576953649520874, "rank": 2, "decoded_token": ".\n\n"}, "1294": {"logprob": -4.764453411102295, "rank": 3, "decoded_token": " in"}, "1044": {"logprob": -5.701953411102295, "rank": 4, "decoded_token": ","}, "1319": {"logprob": -5.889453411102295, "rank": 5, "decoded_token": " ("}}, {"2": {"logprob": -0.5447223782539368, "rank": 1, "decoded_token": "</s>"}, "1319": {"logprob": -2.607222318649292, "rank": 2, "decoded_token": " ("}, "9380": {"logprob": -3.669722318649292, "rank": 3, "decoded_token": " Here"}, "1531": {"logprob": -3.919722318649292, "rank": 4, "decoded_token": " The"}, "2898": {"logprob": -4.107222557067871, "rank": 5, "decoded_token": " For"}}]]]
\ No newline at end of file
diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py
index c524480839bc..2a693603f023 100644
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@@ -46,7 +46,7 @@
         ),
         pytest.param(
             "openai-community/gpt2",  # gpt2
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[pytest.mark.core_model],
         ),
         pytest.param("Milos/slovak-gpt-j-405M"),  # gptj
         pytest.param("bigcode/tiny_starcoder_py"),  # gpt_bigcode
@@ -100,7 +100,7 @@
         pytest.param("bigcode/starcoder2-3b"),  # starcoder2
         pytest.param(
             "TitanML/tiny-mixtral",  # mixtral
-            marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+            marks=[pytest.mark.core_model],
         ),
         pytest.param("swiss-ai/Apertus-8B-Instruct-2509"),  # apertus
         pytest.param(
@@ -143,6 +143,15 @@ def test_models(
         # in parts of the operators
         pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
 
+    if model == "bigcode/starcoder2-3b":
+        # Replace example.txt's Test1 (an NL prompt) with a code prompt:
+        # starcoder2-3b is a code model, so NL prompts give near-uniform
+        # digit logits where HF<->vLLM bf16 drift can reorder top-K.
+        example_prompts = list(example_prompts)
+        example_prompts[1] = (
+            "def add(a, b):\n    return a + b\n\ndef sub(a, b):\n    return a - "
+        )
+
     with hf_runner(model) as hf_model:
         hf_outputs = hf_model.generate_greedy_logprobs_limit(
             example_prompts, max_tokens, num_logprobs
diff --git a/tests/models/language/generation/test_granite.py b/tests/models/language/generation/test_granite.py
index e569e75ff3a8..c0498b2f7de1 100644
--- a/tests/models/language/generation/test_granite.py
+++ b/tests/models/language/generation/test_granite.py
@@ -15,6 +15,7 @@
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [64])
 @pytest.mark.parametrize("num_logprobs", [5])
+@pytest.mark.cpu_model
 def test_models(
     hf_runner,
     vllm_runner,
diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py
index 225418356648..e410daf2fcdd 100644
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -57,6 +57,16 @@
 # Avoid OOM
 MAX_NUM_SEQS = 4
 
+ATTN_BACKEND = "TRITON_ATTN" if current_platform.is_rocm() else "auto"
+
+
+def _set_conv_state_layout(monkeypatch, layout: str) -> None:
+    """Set conv state layout env var and clear cache to pick up new value."""
+    from vllm.model_executor.layers.mamba import mamba_utils
+
+    monkeypatch.setenv("VLLM_SSM_CONV_STATE_LAYOUT", layout)
+    mamba_utils.get_conv_state_layout.cache_clear()
+
 
 @pytest.mark.parametrize("model", SSM_MODELS + HYBRID_MODELS)
 @pytest.mark.parametrize("max_tokens", [64])
@@ -82,7 +92,9 @@ def test_models(
             example_prompts, max_tokens, num_logprobs
         )
 
-    with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
+    with vllm_runner(
+        model, max_num_seqs=MAX_NUM_SEQS, attention_backend=ATTN_BACKEND
+    ) as vllm_model:
         vllm_outputs = vllm_model.generate_greedy_logprobs(
             example_prompts, max_tokens, num_logprobs
         )
@@ -98,12 +110,15 @@ def test_models(
 @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]])
 @pytest.mark.parametrize("max_tokens", [64])
 @pytest.mark.parametrize("num_logprobs", [5])
+@pytest.mark.parametrize("conv_state_layout", ["SD", "DS"])
 def test_batching(
     vllm_runner,
     example_prompts,
+    monkeypatch,
     model: str,
     max_tokens: int,
     num_logprobs: int,
+    conv_state_layout: str,
 ) -> None:
     try:
         model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
@@ -112,6 +127,8 @@ def test_batching(
     except ValueError:
         pass
 
+    _set_conv_state_layout(monkeypatch, conv_state_layout)
+
     for_loop_outputs = []
     with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
         for prompt in example_prompts:
@@ -134,11 +151,14 @@ def test_batching(
 
 @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]])
 @pytest.mark.parametrize("max_tokens", [10])
+@pytest.mark.parametrize("conv_state_layout", ["SD", "DS"])
 def test_chunked_prefill_with_parallel_sampling(
     vllm_runner,
     example_prompts,
+    monkeypatch,
     model: str,
     max_tokens: int,
+    conv_state_layout: str,
 ) -> None:
     """
     Tests chunked prefill in conjunction with n > 1.
@@ -150,6 +170,8 @@ def test_chunked_prefill_with_parallel_sampling(
     decoding steps inside a chunked prefill forward pass
     (where we have both prefill and decode together)
     """
+    _set_conv_state_layout(monkeypatch, conv_state_layout)
+
     sampling_params = SamplingParams(n=3, temperature=1, seed=0, max_tokens=max_tokens)
     with vllm_runner(
         model,
@@ -157,23 +179,29 @@ def test_chunked_prefill_with_parallel_sampling(
         # forces prefill chunks with decoding
         max_num_batched_tokens=MAX_NUM_SEQS * 3,
         max_num_seqs=MAX_NUM_SEQS,
+        attention_backend=ATTN_BACKEND,
     ) as vllm_model:
         vllm_model.generate(example_prompts, sampling_params)
 
 
 @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]])
 @pytest.mark.parametrize("max_tokens", [20])
+@pytest.mark.parametrize("conv_state_layout", ["SD", "DS"])
 def test_mamba_cache_cg_padding(
     vllm_runner,
     example_prompts,
+    monkeypatch,
     model: str,
     max_tokens: int,
+    conv_state_layout: str,
 ) -> None:
     """
     This test is for verifying that mamba cache is padded to CG captured
     batch size. If it's not, a torch RuntimeError will be raised because
     tensor dimensions aren't compatible.
     """
+    _set_conv_state_layout(monkeypatch, conv_state_layout)
+
     vllm_config = EngineArgs(model=model, trust_remote_code=True).create_engine_config()
     cudagraph_dispatcher = CudagraphDispatcher(vllm_config)
     cudagraph_dispatcher.initialize_cudagraph_keys(
@@ -301,7 +329,9 @@ def test_full_cuda_graph(
             example_prompts, max_tokens, num_logprobs
         )
 
-    with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
+    with vllm_runner(
+        model, max_num_seqs=MAX_NUM_SEQS, attention_backend=ATTN_BACKEND
+    ) as vllm_model:
         vllm_outputs = vllm_model.generate_greedy_logprobs(
             example_prompts, max_tokens, num_logprobs
         )
@@ -370,6 +400,7 @@ def _get_vllm_runner_params(
         "max_model_len": max_model_len,
         "tensor_parallel_size": tensor_parallel_size,
         "gpu_memory_utilization": 0.4,
+        "attention_backend": ATTN_BACKEND,
     }
 
 
@@ -844,12 +875,13 @@ def test_apc_common_prefix_same_batch(
         mamba_block_size=16,
         enable_prefix_caching=True,
         seed=42,
+        attention_backend=ATTN_BACKEND,
     )
     prompts = [
         "hello what is one plus one what is one plus one what is one plus one the answer is",  # noqa: E501
         "hello what is one plus one what is one plus one what is one plus one the answer is",  # noqa: E501
     ]
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=20)
+    sampling_params = SamplingParams(temperature=0.0, max_tokens=20)
     outputs = llm.generate(prompts, sampling_params)
     for output in outputs:
         assert "two" in output.outputs[0].text
diff --git a/tests/models/language/pooling/test_all_pooling_plus_chunked_prefill.py b/tests/models/language/pooling/test_all_pooling_plus_chunked_prefill.py
index c259c532220b..4119e1d5e00d 100644
--- a/tests/models/language/pooling/test_all_pooling_plus_chunked_prefill.py
+++ b/tests/models/language/pooling/test_all_pooling_plus_chunked_prefill.py
@@ -6,6 +6,7 @@
 
 from tests.models.utils import check_embeddings_close
 from vllm import TokensPrompt
+from vllm.config import PoolerConfig
 
 
 @pytest.mark.parametrize(
@@ -21,6 +22,7 @@ def test_embed_models(hf_runner, vllm_runner, model: str):
     with vllm_runner(
         model,
         runner="pooling",
+        pooler_config=PoolerConfig(task="token_embed"),
         max_model_len=128,
         max_num_batched_tokens=chunk_size,
         enforce_eager=True,
diff --git a/tests/models/language/pooling/test_bge_m3.py b/tests/models/language/pooling/test_bge_m3.py
index c0ef263c7781..772f60197fb5 100644
--- a/tests/models/language/pooling/test_bge_m3.py
+++ b/tests/models/language/pooling/test_bge_m3.py
@@ -3,7 +3,6 @@
 import httpx
 import openai
 import pytest
-import pytest_asyncio
 import torch
 
 from ....utils import RemoteOpenAIServer
@@ -25,29 +24,42 @@
 similarity_reference = [[0.6259, 0.3474], [0.3309, 0.6734]]
 lexical_score_reference = [0.19554901123046875, 0.0]
 colbert_score_reference = [0.7797, 0.4620]
+SUPPORTED_TASKS = ["embed", "token_embed", "token_classify"]
+
+
+@pytest.fixture(scope="module", params=SUPPORTED_TASKS)
+def pooling_task(request):
+    yield request.param
 
 
 @pytest.fixture(scope="module")
-def server():
+def server(pooling_task):
     args = [
         "--max-model-len",
         str(MAX_MODEL_LEN),
         "--hf-overrides",
         '{"architectures": ["BgeM3EmbeddingModel"]}',
+        "--pooler-config.task",
+        pooling_task,
     ]
 
     with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
         yield remote_server
 
 
-@pytest_asyncio.fixture
-async def client(server):
-    async with server.get_async_client() as async_client:
-        yield async_client
-
-
 @pytest.mark.asyncio
-async def test_bge_m3_api_server_embedding(client: openai.AsyncOpenAI):
+async def test_bge_m3_api_server_embedding(server, pooling_task):
+    client = server.get_async_client()
+
+    if pooling_task != "embed":
+        with pytest.raises(openai.InternalServerError):
+            await run_client_embeddings(
+                client,
+                MODEL_NAME,
+                sentences_1,
+            )
+        return
+
     embeddings_list_1 = await run_client_embeddings(
         client,
         MODEL_NAME,
@@ -117,7 +129,14 @@ def compute_lexical_matching_score(
 
 
 @pytest.mark.asyncio
-async def test_bge_m3_api_server_sparse_embedding(client: openai.AsyncOpenAI):
+async def test_bge_m3_api_server_sparse_embedding(server, pooling_task):
+    client = server.get_async_client()
+
+    if pooling_task != "token_classify":
+        with pytest.raises(openai.BadRequestError):
+            await sparse_embeddings(client, sentences_1)
+        return
+
     embeddings_1 = await sparse_embeddings(client, sentences_1)
     embeddings_2 = await sparse_embeddings(client, sentences_2)
 
@@ -137,9 +156,11 @@ async def test_bge_m3_api_server_sparse_embedding(client: openai.AsyncOpenAI):
 
 
 @pytest.mark.asyncio
-async def test_bge_m3_api_server_sparse_embedding_corner_case(
-    client: openai.AsyncOpenAI,
-):
+async def test_bge_m3_api_server_sparse_embedding_corner_case(server, pooling_task):
+    if pooling_task != "token_classify":
+        return
+
+    client = server.get_async_client()
     embeddings = await sparse_embeddings(client, ["Hi"])
     assert len(embeddings) == 1
     assert 2673 in embeddings[0]
@@ -155,7 +176,18 @@ def colbert_score(q_reps: torch.Tensor, p_reps: torch.Tensor) -> torch.Tensor:
 
 
 @pytest.mark.asyncio
-async def test_bge_m3_api_server_multi_vector(client: openai.AsyncOpenAI):
+async def test_bge_m3_api_server_multi_vector(server, pooling_task):
+    client = server.get_async_client()
+
+    if pooling_task != "token_embed":
+        with pytest.raises(openai.BadRequestError):
+            await client.post(
+                "../pooling",
+                body={"model": MODEL_NAME, "input": sentences_1, "task": "token_embed"},
+                cast_to=httpx.Response,
+            )
+        return
+
     result_1 = await client.post(
         "../pooling",
         body={"model": MODEL_NAME, "input": sentences_1, "task": "token_embed"},
diff --git a/tests/models/language/pooling/test_colbert.py b/tests/models/language/pooling/test_colbert.py
index a245f879ba2b..10c229fe063b 100644
--- a/tests/models/language/pooling/test_colbert.py
+++ b/tests/models/language/pooling/test_colbert.py
@@ -9,7 +9,7 @@
 import pytest
 import torch
 
-from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
 # -----------------------------------------------------------------------
 # Model definitions: (model_name, colbert_dim, extra vllm_runner kwargs)
@@ -109,6 +109,14 @@ def _load_hf_model(model_name: str, hf_spec: dict, device: torch.device):
         **extra,
     ).to(device)
     model.eval()
+
+    # Transformers 5.0 weight materialization can clear non-persistent
+    # buffers (e.g. rotary inv_freq) that were registered with
+    # persistent=False.  Re-compute them so the model produces valid output.
+    for mod in model.modules():
+        if hasattr(mod, "_compute_inv_freq") and hasattr(mod, "inv_freq"):
+            mod.inv_freq = mod._compute_inv_freq(device=device)
+
     return model
 
 
diff --git a/tests/models/language/pooling/test_extract_hidden_states.py b/tests/models/language/pooling/test_extract_hidden_states.py
index 488b27e2da0f..40bbeb1c39f6 100644
--- a/tests/models/language/pooling/test_extract_hidden_states.py
+++ b/tests/models/language/pooling/test_extract_hidden_states.py
@@ -4,6 +4,7 @@
 import torch
 
 from vllm import TokensPrompt
+from vllm.config import PoolerConfig
 
 
 @pytest.mark.parametrize(
@@ -20,6 +21,7 @@ def test_extract_hidden_states(hf_runner, vllm_runner, model: str):
         max_model_len=128,
         enforce_eager=True,
         runner="pooling",
+        pooler_config=PoolerConfig(task="token_embed"),
         enable_prefix_caching=True,
     ) as vllm_model:
         pooling_outputs = vllm_model.llm.encode(
@@ -44,14 +46,3 @@ def test_extract_hidden_states(hf_runner, vllm_runner, model: str):
             assert len(output.prompt_token_ids) == n
             assert len(output.outputs.data) == n
             assert output.num_cached_tokens == 0
-
-        # skip_reading_prefix_cache can still write to cache
-        # to accelerate following requests
-        pooling_outputs = vllm_model.llm.encode(
-            [TokensPrompt(prompt_token_ids=t) for t in token_prompts],
-            pooling_task="embed",
-        )
-
-        for n, output in zip(n_prompt_tokens, pooling_outputs):
-            assert len(output.prompt_token_ids) == n
-            assert output.num_cached_tokens > 0
diff --git a/tests/models/language/pooling/test_gritlm.py b/tests/models/language/pooling/test_gritlm.py
index 5ff5073e869f..b1296a64171e 100644
--- a/tests/models/language/pooling/test_gritlm.py
+++ b/tests/models/language/pooling/test_gritlm.py
@@ -7,7 +7,7 @@
 from vllm import LLM, SamplingParams
 from vllm.config import ModelConfig
 
-from ....utils import RemoteOpenAIServer
+from ....utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
 from .embed_utils import run_client_embeddings
 
 MODEL_NAME = "parasail-ai/GritLM-7B-vllm"
@@ -126,9 +126,15 @@ def test_gritlm_offline_embedding(vllm_runner):
 async def test_gritlm_api_server_embedding():
     queries, q_instruction, documents, d_instruction = get_test_data()
 
-    args = ["--runner", "pooling", "--max_model_len", str(MAX_MODEL_LEN)]
+    args = [
+        "--runner",
+        "pooling",
+        "--max_model_len",
+        str(MAX_MODEL_LEN),
+        *ROCM_EXTRA_ARGS,
+    ]
 
-    with RemoteOpenAIServer(MODEL_NAME, args) as server:
+    with RemoteOpenAIServer(MODEL_NAME, args, env_dict=ROCM_ENV_OVERRIDES) as server:
         client_embedding = server.get_async_client()
 
         d_rep = await run_client_embeddings(
diff --git a/tests/models/language/pooling/test_jina_reranker_v3.py b/tests/models/language/pooling/test_jina_reranker_v3.py
new file mode 100644
index 000000000000..dcce6d5bd4aa
--- /dev/null
+++ b/tests/models/language/pooling/test_jina_reranker_v3.py
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+import pytest
+import requests
+import torch
+import torch.nn.functional as F
+
+from tests.utils import RemoteOpenAIServer
+from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
+from vllm.entrypoints.pooling.scoring.protocol import ScoreResponse
+
+model_name = "jinaai/jina-reranker-v3"
+query = "What are the health benefits of green tea?"
+documents = [
+    "Green tea contains antioxidants called catechins that may help reduce inflammation and protect cells from damage.",
+    "El precio del café ha aumentado un 20% este año debido a problemas en la cadena de suministro.",
+    "Studies show that drinking green tea regularly can improve brain function and boost metabolism.",
+    "Basketball is one of the most popular sports in the United States.",
+    "绿茶富含儿茶素等抗氧化剂，可以降低心脏病风险，还有助于控制体重。",
+    "Le thé vert est riche en antioxydants et peut améliorer la fonction cérébrale.",
+]
+
+EMBEDDING_SIZE = 512
+REFERENCE_1_VS_1 = [
+    0.345703125,
+    -0.10498046,
+    0.314453125,
+    -0.1376953125,
+    0.3398437500,
+    0.2539062,
+]
+REFERENCE_1_VS_N = [
+    0.294921875,
+    -0.16015625,
+    0.189453125,
+    -0.1708984375,
+    0.2255859375,
+    0.1640625,
+]
+TOL = 0.01
+
+
+def test_offline(vllm_runner):
+    with vllm_runner(model_name, runner="pooling") as llm_runner:
+        llm = llm_runner.get_llm()
+        _test_offline_1_v_1(llm)
+        _test_offline_1_v_n(llm)
+        _test_offline_n_v_n(llm)
+        _test_offline_token_embed_illegal_inputs(llm)
+        assert llm.model_config.embedding_size == EMBEDDING_SIZE
+
+
+def test_online():
+    with RemoteOpenAIServer(model_name, ["--runner", "pooling"]) as server:
+        _test_online_1_v_1(server)
+        _test_online_1_v_n(server)
+        _test_online_n_v_n(server)
+        _test_online_token_embed_illegal_inputs(server)
+
+
+def _test_offline_1_v_1(llm):
+    # test llm.score
+    outputs = llm.score(query, documents[0])
+    assert len(outputs) == 1
+    assert outputs[0].outputs.score == pytest.approx(REFERENCE_1_VS_1[0], abs=TOL)
+
+    # test llm.encode
+    outputs = llm.encode(documents[:1] + [query], pooling_task="token_embed")
+    embeds = outputs[0].outputs.data.float()
+    assert embeds.shape[0] == 2
+    assert embeds.shape[-1] == EMBEDDING_SIZE
+
+    doc_embeds = embeds[:-1]
+    query_embeds = embeds[-1]
+
+    scores = F.cosine_similarity(query_embeds, doc_embeds)
+    assert scores[0] == pytest.approx(REFERENCE_1_VS_1[0], abs=TOL)
+
+
+def _test_offline_1_v_n(llm):
+    # test llm.score
+    outputs = llm.score(query, documents)
+    assert len(outputs) == len(documents)
+
+    for expected, output in zip(REFERENCE_1_VS_N, outputs):
+        actual = output.outputs.score
+        assert actual == pytest.approx(expected, abs=TOL)
+
+    # test llm.encode
+    outputs = llm.encode(documents + [query], pooling_task="token_embed")
+    embeds = outputs[0].outputs.data.float()
+    assert embeds.shape[0] == len(documents) + 1
+
+    doc_embeds = embeds[:-1]
+    query_embeds = embeds[-1]
+
+    scores = F.cosine_similarity(query_embeds, doc_embeds)
+
+    assert len(scores) == len(documents)
+    for expected, actual in zip(REFERENCE_1_VS_N, scores):
+        assert actual == pytest.approx(expected, abs=TOL)
+
+
+def _test_offline_n_v_n(llm):
+    # test llm.score
+    outputs = llm.score([query] * len(documents), documents)
+    assert len(outputs) == len(documents)
+
+    for expected, output in zip(REFERENCE_1_VS_1, outputs):
+        actual = output.outputs.score
+        assert actual == pytest.approx(expected, abs=TOL)
+
+    # test llm.encode
+    for doc, expected in zip(documents, REFERENCE_1_VS_1):
+        outputs = llm.encode([doc, query], pooling_task="token_embed")
+        embeds = outputs[0].outputs.data.float()
+        assert embeds.shape[0] == 2
+
+        doc_embeds = embeds[:-1]
+        query_embeds = embeds[-1]
+
+        scores = F.cosine_similarity(query_embeds, doc_embeds)
+        assert scores[0] == pytest.approx(expected, abs=TOL)
+
+
+def _test_offline_token_embed_illegal_inputs(llm):
+    with pytest.raises(
+        ValueError, match="The JinaForRanking model requires at least 2 inputs."
+    ):
+        llm.encode([query], pooling_task="token_embed")
+
+    with pytest.raises(
+        ValueError, match="The JinaForRanking model only supports text as input."
+    ):
+        llm.encode([1, 2, 3], pooling_task="token_embed")
+
+
+def _get_scores(server, query, document):
+    score_response = requests.post(
+        server.url_for("score"),
+        json={
+            "model": model_name,
+            "queries": query,
+            "documents": document,
+        },
+    )
+
+    score_response.raise_for_status()
+    score = ScoreResponse.model_validate(score_response.json())
+
+    return [d.score for d in score.data]
+
+
+def _get_embeds(server, prompts: list[str]):
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "task": "token_embed",
+            "input": prompts,
+            "encoding_format": "float",
+        },
+    )
+    response.raise_for_status()
+    poolings = PoolingResponse.model_validate(response.json())
+
+    return torch.as_tensor([d.data for d in poolings.data][0]).float()
+
+
+def _test_online_1_v_1(server):
+    # test scoring api
+    scores = _get_scores(server, query, documents[0])
+    assert len(scores) == 1
+    assert scores[0] == pytest.approx(REFERENCE_1_VS_1[0], abs=TOL)
+
+    # test pooling api
+    embeds = _get_embeds(server, [documents[0], query])
+    assert embeds.shape[0] == 2
+    assert embeds.shape[-1] == EMBEDDING_SIZE
+
+    doc_embeds = embeds[:-1]
+    query_embeds = embeds[-1]
+
+    scores = F.cosine_similarity(query_embeds, doc_embeds)
+    assert scores[0] == pytest.approx(REFERENCE_1_VS_1[0], abs=TOL)
+
+
+def _test_online_1_v_n(server):
+    # test scoring api
+    scores = _get_scores(server, query, documents)
+    assert len(scores) == len(documents)
+
+    for expected, actual in zip(REFERENCE_1_VS_N, scores):
+        assert actual == pytest.approx(expected, abs=TOL)
+
+    # test pooling api
+    embeds = _get_embeds(server, documents + [query])
+    assert embeds.shape[0] == len(documents) + 1
+
+    doc_embeds = embeds[:-1]
+    query_embeds = embeds[-1]
+
+    scores = F.cosine_similarity(query_embeds, doc_embeds)
+
+    assert len(scores) == len(documents)
+    for expected, actual in zip(REFERENCE_1_VS_N, scores):
+        assert actual == pytest.approx(expected, abs=TOL)
+
+
+def _test_online_n_v_n(server):
+    # test scoring api
+    scores = _get_scores(server, [query] * len(documents), documents)
+    assert len(scores) == len(documents)
+
+    for expected, actual in zip(REFERENCE_1_VS_1, scores):
+        assert actual == pytest.approx(expected, abs=TOL)
+
+    # test pooling api
+    for doc, expected in zip(documents, REFERENCE_1_VS_1):
+        embeds = _get_embeds(server, [doc, query])
+        assert embeds.shape[0] == 2
+
+        doc_embeds = embeds[:-1]
+        query_embeds = embeds[-1]
+
+        scores = F.cosine_similarity(query_embeds, doc_embeds)
+        assert len(scores) == 1
+        assert scores[0] == pytest.approx(expected, abs=TOL)
+
+
+def _test_online_token_embed_illegal_inputs(server):
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "task": "token_embed",
+            "input": [query],
+            "encoding_format": "float",
+        },
+    )
+    assert response.json()["error"]["message"].startswith(
+        "The JinaForRanking model requires at least 2 inputs."
+    )
+
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "task": "token_embed",
+            "input": [1, 2, 3],
+            "encoding_format": "float",
+        },
+    )
+    assert response.json()["error"]["message"].startswith(
+        "The JinaForRanking model only supports text as input."
+    )
+
+    response = requests.post(
+        server.url_for("pooling"),
+        json={
+            "model": model_name,
+            "task": "token_embed",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "The cat sat on the mat.",
+                }
+            ],
+            "encoding_format": "float",
+        },
+    )
+    assert response.json()["error"]["message"].startswith(
+        "The JinaForRanking does not support chat Request."
+    )
diff --git a/tests/models/language/pooling/test_max_tokens_per_doc.py b/tests/models/language/pooling/test_max_tokens_per_doc.py
new file mode 100644
index 000000000000..2e5ab70d2741
--- /dev/null
+++ b/tests/models/language/pooling/test_max_tokens_per_doc.py
@@ -0,0 +1,228 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for max_tokens_per_doc and max_tokens_per_query.
+"""
+
+import json
+import os
+from dataclasses import dataclass
+
+import pytest
+import requests
+
+from tests.utils import VLLM_PATH, RemoteOpenAIServer
+from vllm.entrypoints.pooling.scoring.protocol import RerankResponse
+
+os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
+
+TEMPLATE_DIR = str(VLLM_PATH / "examples/pooling/score/template")
+ExpectedPromptTokens = int | tuple[int, ...]
+
+long_query = "What is the capital of France?" * 20
+long_doc = "The capital of France is Paris. " * 20
+
+
+@dataclass
+class TestConfig:
+    model: str
+    args: list[str]
+    without_truncated_prompt_tokens: ExpectedPromptTokens
+    with_max_tokens_per_query_prompt_tokens: ExpectedPromptTokens
+    with_max_tokens_per_doc_prompt_tokens: ExpectedPromptTokens
+    with_max_tokens_per_query_and_doc_prompt_tokens: ExpectedPromptTokens
+
+
+RERANK_CONFIGS = [
+    # 1. cross-encoder
+    TestConfig(
+        model="jinaai/jina-reranker-v2-base-multilingual",
+        args=[
+            "--enforce-eager",
+            "--max-model-len",
+            "1024",
+            "--trust-remote-code",
+        ],
+        without_truncated_prompt_tokens=284,
+        with_max_tokens_per_query_prompt_tokens=154,
+        with_max_tokens_per_doc_prompt_tokens=154,
+        with_max_tokens_per_query_and_doc_prompt_tokens=24,
+    ),
+    # 2. cross-encoder + score template
+    TestConfig(
+        model="Qwen/Qwen3-Reranker-0.6B",
+        args=[
+            "--enforce-eager",
+            "--max-model-len",
+            "1024",
+            "--hf-overrides",
+            json.dumps(
+                {
+                    "architectures": ["Qwen3ForSequenceClassification"],
+                    "classifier_from_token": ["no", "yes"],
+                    "is_original_qwen3_reranker": True,
+                }
+            ),
+            "--chat-template",
+            os.path.join(TEMPLATE_DIR, "qwen3_reranker.jinja"),
+        ],
+        without_truncated_prompt_tokens=352,
+        with_max_tokens_per_query_prompt_tokens=223,
+        with_max_tokens_per_doc_prompt_tokens=221,
+        with_max_tokens_per_query_and_doc_prompt_tokens=92,
+    ),
+    # 3. bi-encoder
+    TestConfig(
+        model="intfloat/multilingual-e5-small",
+        args=[
+            "--enforce-eager",
+            "--max-model-len",
+            "512",
+            "--trust-remote-code",
+        ],
+        # This model has produced both prompt-token totals in CI/local cache;
+        # keep truncation checks exact while tolerating the boundary delta.
+        without_truncated_prompt_tokens=(285, 286),
+        with_max_tokens_per_query_prompt_tokens=(155, 156),
+        with_max_tokens_per_doc_prompt_tokens=155,
+        with_max_tokens_per_query_and_doc_prompt_tokens=25,
+    ),
+    # 4. late-interaction
+    TestConfig(
+        model="answerdotai/answerai-colbert-small-v1",
+        args=[
+            "--enforce-eager",
+            "--max-model-len",
+            "512",
+            "--trust-remote-code",
+        ],
+        without_truncated_prompt_tokens=285,
+        with_max_tokens_per_query_prompt_tokens=155,
+        with_max_tokens_per_doc_prompt_tokens=155,
+        with_max_tokens_per_query_and_doc_prompt_tokens=25,
+    ),
+    # 5. jinaai/jina-reranker-v3
+    TestConfig(
+        model="jinaai/jina-reranker-v3",
+        args=[
+            "--enforce-eager",
+            "--max-model-len",
+            "1024",
+            "--trust-remote-code",
+        ],
+        without_truncated_prompt_tokens=567,
+        with_max_tokens_per_query_prompt_tokens=308,
+        with_max_tokens_per_doc_prompt_tokens=436,
+        with_max_tokens_per_query_and_doc_prompt_tokens=177,
+    ),
+]
+
+
+def assert_prompt_tokens(actual: int, expected: ExpectedPromptTokens) -> None:
+    if isinstance(expected, int):
+        assert actual == expected
+    else:
+        assert actual in expected
+
+
+@pytest.fixture(scope="module", params=RERANK_CONFIGS, ids=lambda c: c.model)
+def server(request):
+    config: TestConfig = request.param
+    with RemoteOpenAIServer(config.model, config.args) as remote_server:
+        yield config, remote_server
+
+
+def test_without_truncated(server):
+    """Test that max_tokens_per_doc truncates documents correctly."""
+    config, remote_server = server
+
+    response = requests.post(
+        remote_server.url_for("rerank"),
+        json={"model": config.model, "query": long_query, "documents": [long_doc]},
+    )
+    response.raise_for_status()
+    rerank = RerankResponse.model_validate(response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 1
+    assert_prompt_tokens(
+        rerank.usage.prompt_tokens,
+        config.without_truncated_prompt_tokens,
+    )
+
+
+def test_max_tokens_per_query(server):
+    """Test that max_tokens_per_doc truncates documents correctly."""
+    config, remote_server = server
+
+    response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": config.model,
+            "query": long_query,
+            "documents": [long_doc],
+            "max_tokens_per_query": 10,
+        },
+    )
+    response.raise_for_status()
+    rerank = RerankResponse.model_validate(response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 1
+    assert_prompt_tokens(
+        rerank.usage.prompt_tokens,
+        config.with_max_tokens_per_query_prompt_tokens,
+    )
+
+
+def test_max_tokens_per_doc(server):
+    """Test that max_tokens_per_doc truncates documents correctly."""
+    config, remote_server = server
+
+    response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": config.model,
+            "query": long_query,
+            "documents": [long_doc],
+            "max_tokens_per_doc": 10,
+        },
+    )
+    response.raise_for_status()
+    rerank = RerankResponse.model_validate(response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 1
+    assert_prompt_tokens(
+        rerank.usage.prompt_tokens,
+        config.with_max_tokens_per_doc_prompt_tokens,
+    )
+
+
+def test_max_tokens_per_query_and_doc(server):
+    """Test that max_tokens_per_doc truncates documents correctly."""
+    config, remote_server = server
+
+    response = requests.post(
+        remote_server.url_for("rerank"),
+        json={
+            "model": config.model,
+            "query": long_query,
+            "documents": [long_doc],
+            "max_tokens_per_query": 10,
+            "max_tokens_per_doc": 10,
+        },
+    )
+    response.raise_for_status()
+    rerank = RerankResponse.model_validate(response.json())
+
+    assert rerank.id is not None
+    assert rerank.results is not None
+    assert len(rerank.results) == 1
+    assert_prompt_tokens(
+        rerank.usage.prompt_tokens,
+        config.with_max_tokens_per_query_and_doc_prompt_tokens,
+    )
diff --git a/tests/models/language/pooling/test_multi_vector_retrieval.py b/tests/models/language/pooling/test_multi_vector_retrieval.py
index 302f2df13557..3161271be091 100644
--- a/tests/models/language/pooling/test_multi_vector_retrieval.py
+++ b/tests/models/language/pooling/test_multi_vector_retrieval.py
@@ -5,6 +5,7 @@
 from transformers import AutoModel
 
 from tests.models.utils import check_embeddings_close
+from vllm.config import PoolerConfig
 
 
 @pytest.mark.parametrize(
@@ -17,6 +18,7 @@ def test_embed_models(hf_runner, vllm_runner, example_prompts, model: str, dtype
     with vllm_runner(
         model,
         runner="pooling",
+        pooler_config=PoolerConfig(task="token_embed"),
         max_model_len=None,
     ) as vllm_model:
         vllm_outputs = vllm_model.token_embed(example_prompts)
diff --git a/tests/models/language/pooling/test_nomic_max_model_len.py b/tests/models/language/pooling/test_nomic_max_model_len.py
index d6216a87a229..6ea29f6d0677 100644
--- a/tests/models/language/pooling/test_nomic_max_model_len.py
+++ b/tests/models/language/pooling/test_nomic_max_model_len.py
@@ -1,14 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # ruff: noqa: SIM117
-from typing import Any
 
 import pytest
 
 from ...utils import EmbedModelInfo
 
 MODELS = [
-    EmbedModelInfo("nomic-ai/nomic-embed-text-v1"),
+    EmbedModelInfo(
+        "nomic-ai/nomic-embed-text-v1",
+        # Fixme:
+        #  Update nomic-embed code to support the latest
+        #  HF version and remove revision set.
+        revision="720244025c1a7e15661a174c63cce63c8218e52b",
+    ),
     # EmbedModelInfo("nomic-ai/nomic-embed-text-v1.5"),
     # EmbedModelInfo("nomic-ai/CodeRankEmbed"),
     EmbedModelInfo("nomic-ai/nomic-embed-text-v2-moe"),
@@ -24,58 +29,64 @@
 @pytest.mark.parametrize("model_info", MODELS)
 def test_default(model_info, vllm_runner):
     with vllm_runner(
-        model_info.name, runner="pooling", max_model_len=None
+        model_info.name,
+        revision=model_info.revision,
+        runner="pooling",
+        max_model_len=None,
     ) as vllm_model:
         model_config = vllm_model.llm.llm_engine.model_config
         if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
             # For nomic-embed-text-v2-moe the length is set to 512
             # by sentence_bert_config.json.
             assert model_config.max_model_len == 512
-        else:
-            assert model_config.max_model_len == original_max_position_embeddings
+        if model_info.name == "nomic-ai/nomic-embed-text-v1":
+            assert model_config.max_model_len == 8192
 
 
 @pytest.mark.parametrize("model_info", MODELS)
 def test_set_max_model_len_legal(model_info, vllm_runner):
     # set max_model_len <= 512
     with vllm_runner(
-        model_info.name, runner="pooling", max_model_len=256
+        model_info.name,
+        revision=model_info.revision,
+        runner="pooling",
+        max_model_len=256,
     ) as vllm_model:
         model_config = vllm_model.llm.llm_engine.model_config
         assert model_config.max_model_len == 256
 
-    # set 512 < max_model_len <= 2048
+    # For nomic-embed-text-v2-moe the length is set to 512
+    # by sentence_bert_config.json.
     if model_info.name == "nomic-ai/nomic-embed-text-v2-moe":
-        # For nomic-embed-text-v2-moe the length is set to 512
-        # by sentence_bert_config.json.
         with pytest.raises(ValueError):
-            with vllm_runner(model_info.name, runner="pooling", max_model_len=1024):
+            with vllm_runner(
+                model_info.name,
+                revision=model_info.revision,
+                runner="pooling",
+                max_model_len=1024,
+            ):
                 pass
-    else:
-        with vllm_runner(
-            model_info.name, runner="pooling", max_model_len=1024
-        ) as vllm_model:
-            model_config = vllm_model.llm.llm_engine.model_config
-            assert model_config.max_model_len == 1024
+        return
 
+    # set 512 < max_model_len <= 2048
+    with vllm_runner(
+        model_info.name,
+        revision=model_info.revision,
+        runner="pooling",
+        max_model_len=1024,
+    ) as vllm_model:
+        model_config = vllm_model.llm.llm_engine.model_config
+        assert model_config.max_model_len == 1024
 
-@pytest.mark.parametrize("model_info", MODELS)
-def test_set_max_model_len_illegal(model_info, vllm_runner):
     # set max_model_len > 2048
-    with pytest.raises(ValueError):
-        with vllm_runner(model_info.name, runner="pooling", max_model_len=4096):
-            pass
-
-    # set max_model_len > 2048 by hf_overrides
-    hf_overrides = {"max_model_len": 4096}
-    with pytest.raises(ValueError):
-        with vllm_runner(
-            model_info.name,
-            runner="pooling",
-            max_model_len=None,
-            hf_overrides=hf_overrides,
-        ):
-            pass
+    with vllm_runner(
+        model_info.name,
+        revision=model_info.revision,
+        runner="pooling",
+        max_model_len=4096,
+    ) as vllm_model:
+        model_config = vllm_model.llm.llm_engine.model_config
+        assert model_config.max_model_len == 4096
 
 
 @pytest.mark.parametrize("model_info", MODELS)
@@ -91,46 +102,10 @@ def test_use_rope_scaling_legal(model_info, vllm_runner):
     }
 
     with vllm_runner(
-        model_info.name, runner="pooling", max_model_len=None, hf_overrides=hf_overrides
+        model_info.name,
+        revision=model_info.revision,
+        runner="pooling",
+        max_model_len=None,
+        hf_overrides=hf_overrides,
     ):
         pass
-
-
-@pytest.mark.parametrize("model_info", MODELS)
-def test_use_rope_scaling_illegal(model_info, vllm_runner):
-    hf_overrides: dict[str, Any] = {
-        "rope_parameters": {
-            "rope_theta": rope_theta,
-            "rope_type": "yarn",
-            "factor": factor,
-            "original_max_position_embeddings": original_max_position_embeddings,
-        },
-    }
-    # illegal max_model_len
-    with pytest.raises(ValueError):
-        with vllm_runner(
-            model_info.name,
-            runner="pooling",
-            max_model_len=max_model_len + 1,
-            hf_overrides=hf_overrides,
-        ):
-            pass
-
-    hf_overrides = {
-        "rope_parameters": {
-            "rope_theta": rope_theta,
-            "rope_type": "yarn",
-            "factor": factor,
-            "original_max_position_embeddings": original_max_position_embeddings,
-        },
-        "max_model_len": max_model_len + 1,
-    }
-    # illegal max_model_len by hf_overrides
-    with pytest.raises(ValueError):
-        with vllm_runner(
-            model_info.name,
-            runner="pooling",
-            max_model_len=None,
-            hf_overrides=hf_overrides,
-        ):
-            pass
diff --git a/tests/models/language/pooling/test_pooler_config_init_behaviour.py b/tests/models/language/pooling/test_pooler_config_init_behaviour.py
index a5a0c07e0c5d..2f6fb9c873f8 100644
--- a/tests/models/language/pooling/test_pooler_config_init_behaviour.py
+++ b/tests/models/language/pooling/test_pooler_config_init_behaviour.py
@@ -146,7 +146,7 @@ def test_multi_vector_retrieval_models_using_normalize(
         model,
         max_model_len=512,
         dtype=dtype,
-        pooler_config=PoolerConfig(use_activation=False),
+        pooler_config=PoolerConfig(use_activation=False, task="token_embed"),
     ) as vllm_model:
         wo_normalize = vllm_model.token_embed(example_prompts)
 
@@ -154,7 +154,7 @@ def test_multi_vector_retrieval_models_using_normalize(
         model,
         max_model_len=512,
         dtype=dtype,
-        pooler_config=PoolerConfig(use_activation=True),
+        pooler_config=PoolerConfig(use_activation=True, task="token_embed"),
     ) as vllm_model:
         w_normalize = vllm_model.token_embed(example_prompts)
 
diff --git a/tests/models/language/pooling/test_splade_sparse_pooler.py b/tests/models/language/pooling/test_splade_sparse_pooler.py
index af4fd764ef53..38a90d07abeb 100644
--- a/tests/models/language/pooling/test_splade_sparse_pooler.py
+++ b/tests/models/language/pooling/test_splade_sparse_pooler.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import types
-
 import pytest
 import torch
 import torch.nn as nn
@@ -11,6 +9,8 @@
     BertMLMHead,
     SPLADESparsePooler,
 )
+from vllm.pooling_params import PoolingParams
+from vllm.v1.pool.metadata import PoolingMetadata, PoolingStates
 
 # ---------------------------------------------------------------------
 # Functional test: SPLADE formula correctness (no HF download needed)
@@ -38,8 +38,12 @@ def test_splade_pooler_matches_reference_formula(B, T, H, V):
         ],
         dtype=torch.long,
     )
-    meta = types.SimpleNamespace(
-        prompt_lens=prompt_lens_tenser, prompt_token_ids=token_ids
+    meta = PoolingMetadata(
+        prompt_lens=prompt_lens_tenser,
+        prompt_token_ids=token_ids,
+        prompt_token_ids_cpu=token_ids,
+        pooling_params=[PoolingParams(task="embed")] * B,
+        pooling_states=[PoolingStates() for _ in range(B)],
     )
 
     # MLM head (prefer BertMLMHead, fallback to Linear if unavailable)
diff --git a/tests/models/language/pooling/test_token_classification.py b/tests/models/language/pooling/test_token_classification.py
index 42511f22f58a..be71f7918ec4 100644
--- a/tests/models/language/pooling/test_token_classification.py
+++ b/tests/models/language/pooling/test_token_classification.py
@@ -1,25 +1,20 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import random
 
-import numpy as np
 import pytest
 import torch
 from transformers import AutoModelForTokenClassification
 
 from tests.models.utils import softmax
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
 
 
 @pytest.fixture(autouse=True)
 def seed_everything():
     """Seed all random number generators for reproducibility."""
     seed = 0
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed_all(seed)
+    set_random_seed(seed)
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
     yield
diff --git a/tests/models/language/pooling_mteb_test/mteb_embed_utils.py b/tests/models/language/pooling_mteb_test/mteb_embed_utils.py
index da0b16449a6e..fc575c399d04 100644
--- a/tests/models/language/pooling_mteb_test/mteb_embed_utils.py
+++ b/tests/models/language/pooling_mteb_test/mteb_embed_utils.py
@@ -74,10 +74,25 @@ def similarity_pairwise(
         return sim
 
 
+class HfMtebEncoder(MtebEmbedMixin):
+    def __init__(self, model):
+        self.model = model
+
+    def encode(
+        self,
+        inputs: DataLoader[mteb.types.BatchedInput],
+        *args,
+        **kwargs,
+    ) -> np.ndarray:
+        sentences = [text for batch in inputs for text in batch["text"]]
+        return self.model.encode(sentences)
+
+
 class VllmMtebEncoder(MtebEmbedMixin):
-    def __init__(self, vllm_model):
+    def __init__(self, vllm_model, prompt_prefix: str | None = None):
         self.llm = vllm_model
         self.rng = np.random.default_rng(seed=42)
+        self.prompt_prefix = prompt_prefix
 
     def encode(
         self,
@@ -87,7 +102,11 @@ def encode(
     ) -> np.ndarray:
         # Hoping to discover potential scheduling
         # issues by randomizing the order.
-        sentences = [text for batch in inputs for text in batch["text"]]
+        sentences = [
+            self.prompt_prefix + text if self.prompt_prefix else text
+            for batch in inputs
+            for text in batch["text"]
+        ]
         r = self.rng.permutation(len(sentences))
         sentences = [sentences[i] for i in r]
         outputs = self.llm.embed(sentences, use_tqdm=False)
@@ -143,6 +162,7 @@ def mteb_test_embed_models(
     vllm_extra_kwargs=None,
     hf_model_callback=None,
     atol=MTEB_EMBED_TOL,
+    prompt_prefix: str | None = None,
 ):
     vllm_extra_kwargs = get_vllm_extra_kwargs(model_info, vllm_extra_kwargs)
 
@@ -151,6 +171,7 @@ def mteb_test_embed_models(
 
     with vllm_runner(
         model_info.name,
+        revision=model_info.revision,
         runner="pooling",
         max_model_len=model_info.max_model_len,
         **vllm_extra_kwargs,
@@ -181,7 +202,7 @@ def mteb_test_embed_models(
             )
 
         vllm_main_score = run_mteb_embed_task(
-            VllmMtebEncoder(vllm_model), MTEB_EMBED_TASKS
+            VllmMtebEncoder(vllm_model, prompt_prefix=prompt_prefix), MTEB_EMBED_TASKS
         )
         vllm_dtype = vllm_model.llm.llm_engine.model_config.dtype
         head_dtype = model_config.head_dtype
@@ -201,6 +222,7 @@ def mteb_test_embed_models(
     if model_info.mteb_score is None:
         with hf_runner(
             model_info.name,
+            revision=model_info.revision,
             is_sentence_transformer=True,
             dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
         ) as hf_model:
@@ -208,7 +230,9 @@ def mteb_test_embed_models(
             if hf_model_callback is not None:
                 hf_model_callback(hf_model)
 
-            st_main_score = run_mteb_embed_task(hf_model, MTEB_EMBED_TASKS)
+            st_main_score = run_mteb_embed_task(
+                HfMtebEncoder(hf_model), MTEB_EMBED_TASKS
+            )
             st_dtype = next(hf_model.model.parameters()).dtype
 
             # Check embeddings close to hf outputs
diff --git a/tests/models/language/pooling_mteb_test/mteb_score_utils.py b/tests/models/language/pooling_mteb_test/mteb_score_utils.py
index 621aff0e998f..16081cbe238b 100644
--- a/tests/models/language/pooling_mteb_test/mteb_score_utils.py
+++ b/tests/models/language/pooling_mteb_test/mteb_score_utils.py
@@ -241,6 +241,7 @@ def mteb_test_rerank_models(
 
     with vllm_runner(
         model_info.name,
+        revision=model_info.revision,
         runner="pooling",
         max_model_len=None,
         max_num_seqs=8,
@@ -286,7 +287,9 @@ def mteb_test_rerank_models(
     # Accelerate mteb test by setting
     # SentenceTransformers mteb score to a constant
     if model_info.mteb_score is None:
-        with hf_runner(model_info.name, dtype=model_info.hf_dtype) as hf_model:
+        with hf_runner(
+            model_info.name, revision=model_info.revision, dtype=model_info.hf_dtype
+        ) as hf_model:
             hf_model.chat_template = chat_template
             st_main_score = run_mteb_rerank(
                 hf_model,
diff --git a/tests/models/language/pooling_mteb_test/test_baai.py b/tests/models/language/pooling_mteb_test/test_baai.py
index 1199393d4b74..ec11960fda07 100644
--- a/tests/models/language/pooling_mteb_test/test_baai.py
+++ b/tests/models/language/pooling_mteb_test/test_baai.py
@@ -69,7 +69,10 @@
         attn_type="decoder",
         is_prefix_caching_supported=True,
         is_chunked_prefill_supported=True,
-        enable_test=True,
+        # Skip: model's custom tokenizer on HF hub is incompatible with
+        # transformers v5 (sets attrs before super().__init__, triggering
+        # AttributeError on 'verbose' in __getattr__).
+        enable_test=False,
     ),
 ]
 
diff --git a/tests/models/language/pooling_mteb_test/test_gte.py b/tests/models/language/pooling_mteb_test/test_gte.py
index 0c35d66c3667..0a54262e124f 100644
--- a/tests/models/language/pooling_mteb_test/test_gte.py
+++ b/tests/models/language/pooling_mteb_test/test_gte.py
@@ -72,7 +72,8 @@
         attn_type="encoder_only",
         is_prefix_caching_supported=False,
         is_chunked_prefill_supported=False,
-        enable_test=True,
+        # Skip: numerical regression with transformers v5.
+        enable_test=False,
     ),
     ########## ModernBertModel
     EmbedModelInfo(
diff --git a/tests/models/language/pooling_mteb_test/test_jina.py b/tests/models/language/pooling_mteb_test/test_jina.py
index 627cc0431943..24aa3188f8be 100644
--- a/tests/models/language/pooling_mteb_test/test_jina.py
+++ b/tests/models/language/pooling_mteb_test/test_jina.py
@@ -28,7 +28,16 @@
         attn_type="encoder_only",
         is_prefix_caching_supported=False,
         is_chunked_prefill_supported=False,
-    )
+    ),
+    EmbedModelInfo(
+        "jinaai/jina-embeddings-v5-text-small",
+        mteb_score=0.794535707854956,
+        architecture="JinaEmbeddingsV5Model",
+        seq_pooling_type="LAST",
+        attn_type="decoder",
+        is_prefix_caching_supported=True,
+        is_chunked_prefill_supported=True,
+    ),
 ]
 
 RERANK_MODELS = [
@@ -46,11 +55,18 @@
 
 @pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
 def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo) -> None:
+    task = "retrieval" if "v5" in model_info.name else "text-matching"
+    prompt_prefix: str | None = "Document: " if "v5" in model_info.name else None
+
     def hf_model_callback(model):
-        model.encode = partial(model.encode, task="text-matching")
+        model.encode = partial(model.encode, task=task)
 
     mteb_test_embed_models(
-        hf_runner, vllm_runner, model_info, hf_model_callback=hf_model_callback
+        hf_runner,
+        vllm_runner,
+        model_info,
+        hf_model_callback=hf_model_callback,
+        prompt_prefix=prompt_prefix,
     )
 
 
@@ -58,8 +74,10 @@ def hf_model_callback(model):
 def test_embed_models_correctness(
     hf_runner, vllm_runner, model_info: EmbedModelInfo, example_prompts
 ) -> None:
+    task = "retrieval" if "v5" in model_info.name else "text-matching"
+
     def hf_model_callback(model):
-        model.encode = partial(model.encode, task="text-matching")
+        model.encode = partial(model.encode, task=task)
 
     correctness_test_embed_models(
         hf_runner,
@@ -75,6 +93,10 @@ def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
     mteb_test_rerank_models(vllm_runner, model_info)
 
 
+@pytest.mark.skip(
+    reason="jinaai/jina-embeddings-v3 custom XLMRobertaLoRA model on HF hub "
+    "is incompatible with transformers v5 (missing all_tied_weights_keys)"
+)
 @pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("dimensions", [16, 32])
@@ -93,12 +115,14 @@ def test_matryoshka(
     # ST will strip the input texts, see test_embedding.py
     example_prompts = [str(s).strip() for s in example_prompts]
 
+    task = "retrieval" if "v5" in model_info.name else "text-matching"
+
     with hf_runner(
         model_info.name,
         dtype=dtype,
         is_sentence_transformer=True,
     ) as hf_model:
-        hf_outputs = hf_model.encode(example_prompts, task="text-matching")
+        hf_outputs = hf_model.encode(example_prompts, task=task)
         hf_outputs = matryoshka_fy(hf_outputs, dimensions)
 
     with vllm_runner(
diff --git a/tests/models/language/pooling_mteb_test/test_nomic.py b/tests/models/language/pooling_mteb_test/test_nomic.py
index fa987fab7cdd..d7947f73f95e 100644
--- a/tests/models/language/pooling_mteb_test/test_nomic.py
+++ b/tests/models/language/pooling_mteb_test/test_nomic.py
@@ -12,6 +12,10 @@
     EmbedModelInfo(
         "nomic-ai/nomic-embed-text-v1",
         architecture="NomicBertModel",
+        # Fixme:
+        #  Update nomic-embed code to support the latest
+        #  HF version and remove revision set.
+        revision="720244025c1a7e15661a174c63cce63c8218e52b",
         mteb_score=0.737568559,
         enable_test=True,
         seq_pooling_type="MEAN",
diff --git a/tests/models/multimodal/conftest.py b/tests/models/multimodal/conftest.py
index d00c3df786dc..9283556d3024 100644
--- a/tests/models/multimodal/conftest.py
+++ b/tests/models/multimodal/conftest.py
@@ -5,11 +5,21 @@
 import os
 import warnings
 
+import pytest
 import torch
 
+from tests.utils import prewarm_hf_cache
 from vllm.platforms import current_platform
 
 
+@pytest.fixture(scope="session", autouse=True)
+def _prewarm_hf_cache():
+    # tokenization_qwen.py downloads SimSun.ttf from
+    # qianwen-res.oss-cn-beijing.aliyuncs.com; both Qwen/Qwen-VL and
+    # Qwen/Qwen-VL-Chat look it up from the Chat repo.
+    prewarm_hf_cache([("Qwen/Qwen-VL-Chat", "SimSun.ttf")])
+
+
 def pytest_configure(config):
     """Early ROCm configuration that must happen before test collection."""
     if not current_platform.is_rocm():
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 1404d9628faa..88267fc6d66a 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -86,6 +86,29 @@
 # which cases would be selected and deselected by pytest. In general,
 # this is a good idea for checking your command first, since tests are slow.
 
+
+def _granite4_vision_vllm_to_hf_output(vllm_output, model):
+    """Post-processor for granite4_vision vLLM output.
+
+    Self-contained to avoid calling AutoConfig/AutoTokenizer without
+    trust_remote_code (needed while the model is not in upstream HF).
+    """
+    output_ids, output_str, out_logprobs = vllm_output
+    mm_token_id = 100352
+    hf_output_ids = [
+        token_id
+        for idx, token_id in enumerate(output_ids)
+        if token_id != mm_token_id or idx == 0 or output_ids[idx - 1] != mm_token_id
+    ]
+    hf_output_str = (
+        output_str[1:] if output_str and output_str[0] == " " else output_str
+    )
+    eos_token_id = 100257
+    if hf_output_ids and hf_output_ids[-1] == eos_token_id:
+        hf_output_str = hf_output_str + "<|end_of_text|>"
+    return hf_output_ids, hf_output_str, out_logprobs
+
+
 VLM_TEST_SETTINGS = {
     #### Core tests to always run in the CI
     "llava": VLMTestInfo(
@@ -186,7 +209,14 @@
         max_num_seqs=2,
         auto_cls=AutoModel,
         hf_output_post_proc=model_utils.ultravox_trunc_hf_output,
-        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
+        marks=[
+            pytest.mark.core_model,
+            pytest.mark.cpu_model,
+            # TODO: Remove skip once model has been upstreamed to Transformers
+            pytest.mark.skip(
+                reason="Custom model code is not compatible with Transformers v5"
+            ),
+        ],
     ),
     #### Transformers fallback to test
     ## To reduce test burden, we only test batching arbitrary image size
@@ -286,7 +316,15 @@
         stop_str=["<|im_end|>"],
         image_size_factors=[(0.10, 0.15)],
         max_tokens=64,
-        marks=[large_gpu_mark(min_gb=64)],
+        marks=[
+            pytest.mark.skip(
+                reason="Aria needs to update for latest transformers, "
+                "must have a vision_processor.py."
+                "An issue has been filed:"
+                "https://huggingface.co/rhymes-ai/Aria/discussions/23"
+            ),
+            large_gpu_mark(min_gb=64),
+        ],
     ),
     "aya_vision": VLMTestInfo(
         models=["CohereLabs/aya-vision-8b"],
@@ -394,6 +432,22 @@
         vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
         patch_hf_runner=model_utils.gemma3_patch_hf_runner,
     ),
+    "gemma4": VLMTestInfo(
+        models=["google/gemma-4-E2B-it"],
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        prompt_formatter=lambda img_prompt: f"<bos><|turn>user\n{img_prompt}<turn|>\n<|turn>model\n",  # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<|image|>What's the content in the center of the image?",  # noqa: E501
+                "cherry_blossom": "<|image|>What is the season?",
+            }
+        ),
+        multi_image_prompt="<|image|><|image|>Describe the two images in detail.",  # noqa: E501
+        max_model_len=4096,
+        max_num_seqs=2,
+        auto_cls=AutoModelForImageTextToText,
+        vllm_runner_kwargs={"limit_mm_per_prompt": {"image": 4}},
+    ),
     "granite_vision": VLMTestInfo(
         models=["ibm-granite/granite-vision-3.3-2b"],
         test_type=(VLMTestType.IMAGE),
@@ -422,7 +476,14 @@
         max_tokens=8,
         num_logprobs=10,
         auto_cls=AutoModelForCausalLM,
-        marks=[large_gpu_mark(min_gb=32)],
+        marks=[
+            pytest.mark.skip(
+                reason="The code for this model has a bug."
+                "Please see the issue here:"
+                "https://huggingface.co/zai-org/glm-4v-9b/discussions/46."
+            ),
+            large_gpu_mark(min_gb=32),
+        ],
     ),
     "glm4_1v": VLMTestInfo(
         models=["zai-org/GLM-4.1V-9B-Thinking"],
@@ -467,7 +528,28 @@
         num_logprobs=10,
         image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
         auto_cls=AutoModelForImageTextToText,
-        marks=[large_gpu_mark(min_gb=32)],
+        marks=[
+            pytest.mark.skip(
+                reason="This test fails on both AMD and NV"
+                "hardware. please see the issue:"
+                "https://github.com/vllm-project/vllm/issues/42016"
+            ),
+            large_gpu_mark(min_gb=32),
+        ],
+    ),
+    "granite4_vision": VLMTestInfo(
+        models=["ibm-granite/granite-vision-4.1-4b"],
+        test_type=(VLMTestType.IMAGE),
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}\n<|assistant|>\n",
+        max_model_len=8192,
+        auto_cls=AutoModelForImageTextToText,
+        vllm_output_post_proc=_granite4_vision_vllm_to_hf_output,
+        image_size_factors=[(1.0,)],
+        vllm_runner_kwargs={
+            "enable_lora": True,
+            "max_lora_rank": 256,
+            "default_mm_loras": {"image": "ibm-granite/granite-vision-4.1-4b"},
+        },
     ),
     "h2ovl": VLMTestInfo(
         models=[
@@ -517,6 +599,12 @@
         max_model_len=4096,
         use_tokenizer_eos=True,
         patch_hf_runner=model_utils.internvl_patch_hf_runner,
+        # TODO: Remove skip once model has been upstreamed to Transformers
+        marks=[
+            pytest.mark.skip(
+                reason="Custom model code tries to access data from meta-tensor"
+            )
+        ],
     ),
     "intern_vl-video": VLMTestInfo(
         models=[
@@ -529,6 +617,12 @@
         use_tokenizer_eos=True,
         patch_hf_runner=model_utils.internvl_patch_hf_runner,
         num_logprobs=10 if current_platform.is_rocm() else 5,
+        # TODO: Remove skip once model has been upstreamed to Transformers
+        marks=[
+            pytest.mark.skip(
+                reason="Custom model code tries to access data from meta-tensor"
+            )
+        ],
     ),
     "intern_vl-hf": VLMTestInfo(
         models=["OpenGVLab/InternVL3-1B-hf"],
@@ -575,6 +669,8 @@
         hf_model_kwargs={"device_map": "auto"},
         patch_hf_runner=model_utils.isaac_patch_hf_runner,
         image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        # TODO: Remove skip once model has been upstreamed to Transformers
+        marks=[pytest.mark.skip(reason="Custom model imports deleted object")],  # noqa: E501
     ),
     "kimi_vl": VLMTestInfo(
         models=["moonshotai/Kimi-VL-A3B-Instruct"],
@@ -731,6 +827,32 @@
         max_num_seqs=2,
         patch_hf_runner=model_utils.molmo_patch_hf_runner,
     ),
+    "moondream3": VLMTestInfo(
+        models=["moondream/moondream3-preview"],
+        test_type=VLMTestType.IMAGE,
+        prompt_formatter=identity,
+        img_idx_to_prompt=lambda idx: "<|endoftext|><image>",
+        # Common-image coverage here targets query/caption. The native
+        # detect/point skills are not exposed by vLLM.
+        single_image_prompts=IMAGE_ASSETS.prompts(
+            {
+                "stop_sign": "<vlm_image><|md_reserved_0|>query<|md_reserved_1|>What is this sign?<|md_reserved_2|>",  # noqa: E501
+                "cherry_blossom": (
+                    "<vlm_image><|md_reserved_0|>query<|md_reserved_1|>What season is shown?<|md_reserved_2|>"  # noqa: E501
+                ),
+            }
+        ),
+        max_model_len=4096,
+        max_num_seqs=2,
+        dtype="bfloat16",
+        hf_processor=model_utils.moondream3_processor,
+        patch_hf_runner=model_utils.moondream3_patch_hf_runner,
+        # Single size factor to avoid GPU OOM when running multiple test
+        # cases sequentially (9B MoE model uses ~18 GiB per instance).
+        image_size_factors=[(1.0,)],
+        # Moondream3 is 9B params with MoE, needs significant GPU memory
+        marks=[large_gpu_mark(min_gb=48)],
+    ),
     "ovis1_6-gemma2": VLMTestInfo(
         models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
@@ -790,7 +912,12 @@
             pytest.mark.skipif(
                 Version(TRANSFORMERS_VERSION) == Version("4.57.3"),
                 reason="This model is broken in Transformers v4.57.3",
-            )
+            ),
+            pytest.mark.skipif(
+                Version(TRANSFORMERS_VERSION) >= Version("5.0.0"),
+                reason="Model's custom code uses ROPE_INIT_FUNCTIONS"
+                "['default'] which was removed in transformers v5",
+            ),
         ],
     ),
     "phi3v": VLMTestInfo(
@@ -823,6 +950,16 @@
             ),
         ],
     ),
+    "qianfan_ocr": VLMTestInfo(
+        models=["baidu/Qianfan-OCR"],
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>",
+        max_model_len=4096,
+        use_tokenizer_eos=True,
+        auto_cls=AutoModelForImageTextToText,
+        hf_model_kwargs=model_utils.qianfan_ocr_hf_model_kwargs("baidu/Qianfan-OCR"),
+    ),
     "qwen_vl": VLMTestInfo(
         models=["Qwen/Qwen-VL"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
@@ -842,6 +979,7 @@
         multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
+        num_logprobs=10,
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
         image_size_factors=[(0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
@@ -944,6 +1082,12 @@
             )
             for inp in custom_inputs.different_patch_input_cases_internvl()
         ],
+        # TODO: Remove skip once model has been upstreamed to Transformers
+        marks=[
+            pytest.mark.skip(
+                reason="Custom model code tries to access data from meta-tensor"
+            )
+        ],
     ),
     "llava_onevision-multiple-images": VLMTestInfo(
         models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
diff --git a/tests/models/multimodal/generation/test_memory_leak.py b/tests/models/multimodal/generation/test_memory_leak.py
new file mode 100644
index 000000000000..743a71f928fa
--- /dev/null
+++ b/tests/models/multimodal/generation/test_memory_leak.py
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import gc
+import random
+import string
+import sys
+import weakref
+
+import pytest
+import torch
+
+from tests.models.registry import HF_EXAMPLE_MODELS
+from vllm import LLM, SamplingParams
+from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
+from vllm.platforms import current_platform
+from vllm.utils.mem_utils import KiB_bytes, MiB_bytes, format_mib
+
+MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
+RANDOM_PREFIX_LEN = 100
+TEST_IMAGE_NAMES = [
+    "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+    "Grayscale_8bits_palette_sample_image.png",
+]
+MAX_MODEL_LEN = 8192
+REQUESTS_PER_ROUND = 4
+WARMUP_ROUNDS = 1
+MEASURED_ROUNDS = 16
+GPU_GROWTH_THRESHOLD_MIB = 0
+CPU_PEAK_GROWTH_THRESHOLD_MIB = 0
+
+SAMPLING_PARAMS = SamplingParams(
+    temperature=0.0,
+    max_tokens=16,
+)
+
+
+def _make_messages(image_url: str) -> list[ChatCompletionMessageParam]:
+    # Avoid obscuring memory leaks because of prefix caching
+    random_text = "".join(random.choices(string.ascii_uppercase, k=RANDOM_PREFIX_LEN))
+
+    return [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": f"Ignore this random string: {random_text}",
+                },
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {
+                    "type": "text",
+                    "text": "Describe this image in one short sentence.",
+                },
+            ],
+        }
+    ]
+
+
+def _build_request_batch(
+    image_urls: list[str],
+) -> list[list[ChatCompletionMessageParam]]:
+    return [
+        _make_messages(image_urls[i % len(image_urls)])
+        for i in range(REQUESTS_PER_ROUND)
+    ]
+
+
+def _ru_maxrss_bytes() -> int | None:
+    try:
+        import resource
+    except ImportError:
+        return None
+
+    rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    if rss <= 0:
+        return 0
+
+    # Linux reports kilobytes, macOS reports bytes.
+    return rss if sys.platform == "darwin" else rss * KiB_bytes
+
+
+def _gpu_used_bytes() -> int:
+    torch.accelerator.synchronize()
+    free_bytes, total_bytes = current_platform.mem_get_info()
+    return int(total_bytes - free_bytes)
+
+
+def _format_mib(num_bytes: int | None) -> str:
+    if num_bytes is None:
+        return "n/a"
+
+    return f"{format_mib(num_bytes)} MiB"
+
+
+@pytest.fixture(scope="function")
+def llm(monkeypatch):
+    monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+
+    # pytest caches the fixture so we use weakref.proxy to
+    # enable garbage collection
+    llm_kwargs = dict(
+        model=MODEL_NAME,
+        enforce_eager=True,
+        max_model_len=MAX_MODEL_LEN,
+        max_num_seqs=REQUESTS_PER_ROUND,
+        limit_mm_per_prompt={"image": 1},
+        seed=0,
+        disable_log_stats=True,
+        gpu_memory_utilization=0.8,
+    )
+    if current_platform.is_rocm():
+        llm_kwargs["attention_backend"] = "TRITON_ATTN"
+
+    llm = LLM(**llm_kwargs)
+
+    yield weakref.proxy(llm)
+
+    del llm
+
+    cleanup_dist_env_and_memory()
+
+
+@pytest.mark.core_model
+@pytest.mark.parametrize("image_urls", [TEST_IMAGE_NAMES], indirect=True)
+def test_no_memory_leak(llm, image_urls: list[str]) -> None:
+    model_info = HF_EXAMPLE_MODELS.find_hf_info(MODEL_NAME)
+    model_info.check_available_online(on_fail="skip")
+    model_info.check_transformers_version(on_fail="skip")
+
+    request_batch = _build_request_batch(image_urls)
+
+    # Establish a warmup baseline after model load and the first multimodal
+    # requests complete. Later rounds should remain near this steady state.
+    for _ in range(WARMUP_ROUNDS):
+        outputs = llm.chat(request_batch, sampling_params=SAMPLING_PARAMS)
+        assert len(outputs) == len(request_batch)
+        assert llm.llm_engine.get_num_unfinished_requests() == 0
+        del outputs
+
+    gc.collect()
+    warmup_gpu = _gpu_used_bytes()
+    warmup_cpu_peak = _ru_maxrss_bytes()
+
+    post_warmup_gpu_samples: list[int] = []
+    post_warmup_cpu_peak_samples: list[int] = []
+
+    for _ in range(MEASURED_ROUNDS):
+        outputs = llm.chat(request_batch, sampling_params=SAMPLING_PARAMS)
+        assert len(outputs) == len(request_batch)
+        assert llm.llm_engine.get_num_unfinished_requests() == 0
+        del outputs
+
+        gc.collect()
+        post_warmup_gpu_samples.append(_gpu_used_bytes())
+        cpu_peak = _ru_maxrss_bytes()
+        if cpu_peak is not None:
+            post_warmup_cpu_peak_samples.append(cpu_peak)
+
+    gpu_growth = max(post_warmup_gpu_samples) - warmup_gpu
+    gpu_threshold = GPU_GROWTH_THRESHOLD_MIB * MiB_bytes
+
+    assert gpu_growth <= gpu_threshold, (
+        "Qwen3-VL GPU memory kept growing after warmup. "
+        f"warmup_baseline={_format_mib(warmup_gpu)}, "
+        f"post_warmup_samples={[_format_mib(x) for x in post_warmup_gpu_samples]}, "
+        f"gpu_growth={_format_mib(gpu_growth)}, "
+        f"gpu_threshold={GPU_GROWTH_THRESHOLD_MIB} MiB"
+    )
+
+    if warmup_cpu_peak is not None and post_warmup_cpu_peak_samples:
+        cpu_peak_growth = max(post_warmup_cpu_peak_samples) - warmup_cpu_peak
+        cpu_threshold = CPU_PEAK_GROWTH_THRESHOLD_MIB * MiB_bytes
+
+        assert cpu_peak_growth <= cpu_threshold, (
+            "Qwen3-VL CPU peak RSS kept growing after warmup. "
+            f"warmup_ru_maxrss={_format_mib(warmup_cpu_peak)}, "
+            f"post_warmup_ru_maxrss={[_format_mib(x) for x in post_warmup_cpu_peak_samples]}, "  # noqa: E501
+            f"cpu_peak_growth={_format_mib(cpu_peak_growth)}, "
+            f"cpu_peak_threshold={CPU_PEAK_GROWTH_THRESHOLD_MIB} MiB"
+        )
diff --git a/tests/models/multimodal/generation/test_moondream3.py b/tests/models/multimodal/generation/test_moondream3.py
new file mode 100644
index 000000000000..e4aaa5a55b55
--- /dev/null
+++ b/tests/models/multimodal/generation/test_moondream3.py
@@ -0,0 +1,176 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Generation tests for Moondream3 query and caption support."""
+
+import pytest
+
+from tests.models.registry import HF_EXAMPLE_MODELS
+from vllm.platforms import current_platform
+
+from ....conftest import IMAGE_ASSETS, ImageTestAssets
+from ....utils import large_gpu_mark, multi_gpu_test
+
+MOONDREAM3_MODEL_ID = "moondream/moondream3-preview"
+MOONDREAM3_TOKENIZER = "moondream/starmie-v1"
+
+HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts(
+    {
+        "stop_sign": "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What color is the stop sign?<|md_reserved_2|>",  # noqa: E501
+        "cherry_blossom": "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What color are the flowers?<|md_reserved_2|>",  # noqa: E501
+    }
+)
+
+
+def make_query_prompt(question: str) -> str:
+    """Create a direct-answer query prompt for Moondream3."""
+    return (
+        "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>"
+        f"{question}<|md_reserved_2|>"
+    )
+
+
+def make_caption_prompt(length: str = "normal") -> str:
+    """Create a caption prompt for Moondream3."""
+    return (
+        "<|endoftext|><image><|md_reserved_0|>"
+        f"describe<|md_reserved_1|>{length}<|md_reserved_2|>"
+    )
+
+
+@multi_gpu_test(num_gpus=2)
+@large_gpu_mark(min_gb=80)
+def test_tensor_parallel(image_assets: ImageTestAssets):
+    import gc
+
+    from vllm import LLM, SamplingParams
+    from vllm.distributed.parallel_state import destroy_model_parallel
+
+    destroy_model_parallel()
+    gc.collect()
+    current_platform.empty_cache()
+
+    llm = LLM(
+        model=MOONDREAM3_MODEL_ID,
+        tokenizer=MOONDREAM3_TOKENIZER,
+        trust_remote_code=True,
+        dtype="bfloat16",
+        tensor_parallel_size=2,
+        max_model_len=1024,
+        enforce_eager=True,
+        limit_mm_per_prompt={"image": 1},
+        gpu_memory_utilization=0.45,
+    )
+
+    image = image_assets[0].pil_image
+    prompt = make_query_prompt("What color is the stop sign?")
+
+    try:
+        outputs = llm.generate(
+            {"prompt": prompt, "multi_modal_data": {"image": image}},
+            SamplingParams(max_tokens=20, temperature=0),
+        )
+
+        assert len(outputs) > 0
+        assert outputs[0].outputs[0].text is not None
+    finally:
+        del llm
+        gc.collect()
+        current_platform.empty_cache()
+
+
+@pytest.fixture(scope="module")
+def llm():
+    model_info = HF_EXAMPLE_MODELS.get_hf_info("Moondream3ForCausalLM")
+    model_info.check_transformers_version(on_fail="skip")
+
+    from vllm import LLM
+
+    try:
+        return LLM(
+            model=MOONDREAM3_MODEL_ID,
+            tokenizer=MOONDREAM3_TOKENIZER,
+            trust_remote_code=True,
+            dtype="bfloat16",
+            max_model_len=2048,
+            enforce_eager=True,
+            limit_mm_per_prompt={"image": 1},
+            gpu_memory_utilization=0.45,
+        )
+    except Exception as exc:
+        pytest.skip(f"Failed to load {MOONDREAM3_MODEL_ID}: {exc}")
+
+
+@large_gpu_mark(min_gb=48)
+def test_model_loading(llm):
+    assert llm is not None
+
+
+@large_gpu_mark(min_gb=48)
+def test_query_skill(llm, image_assets: ImageTestAssets):
+    from vllm import SamplingParams
+
+    image = image_assets[0].pil_image
+    prompt = make_query_prompt("What color is the stop sign?")
+
+    outputs = llm.generate(
+        {"prompt": prompt, "multi_modal_data": {"image": image}},
+        SamplingParams(max_tokens=50, temperature=0),
+    )
+
+    output_text = outputs[0].outputs[0].text
+    assert output_text is not None
+    assert len(output_text) > 0
+
+
+@large_gpu_mark(min_gb=48)
+def test_caption_skill(llm, image_assets: ImageTestAssets):
+    from vllm import SamplingParams
+
+    image = image_assets[1].pil_image
+    prompt = make_caption_prompt()
+
+    outputs = llm.generate(
+        {"prompt": prompt, "multi_modal_data": {"image": image}},
+        SamplingParams(max_tokens=100, temperature=0),
+    )
+
+    output_text = outputs[0].outputs[0].text
+    assert output_text is not None
+    assert len(output_text) > 0
+
+
+@large_gpu_mark(min_gb=48)
+def test_batched_inference(llm, image_assets: ImageTestAssets):
+    from vllm import SamplingParams
+
+    images = [asset.pil_image for asset in image_assets]
+    prompts = [
+        {"prompt": prompt, "multi_modal_data": {"image": img}}
+        for img, prompt in zip(images, HF_IMAGE_PROMPTS)
+    ]
+
+    outputs = llm.generate(prompts, SamplingParams(max_tokens=50, temperature=0))
+
+    assert len(outputs) == len(images)
+    for output in outputs:
+        assert output.outputs[0].text is not None
+        assert len(output.outputs[0].text) > 0
+
+
+@pytest.mark.parametrize("asset_name", ["stop_sign", "cherry_blossom"])
+@large_gpu_mark(min_gb=48)
+def test_image_assets(llm, image_assets: ImageTestAssets, asset_name: str):
+    from vllm import SamplingParams
+
+    asset_idx = 0 if asset_name == "stop_sign" else 1
+    image = image_assets[asset_idx].pil_image
+    prompt = HF_IMAGE_PROMPTS[asset_idx]
+
+    outputs = llm.generate(
+        {"prompt": prompt, "multi_modal_data": {"image": image}},
+        SamplingParams(max_tokens=50, temperature=0),
+    )
+
+    output_text = outputs[0].outputs[0].text
+    assert output_text is not None
+    assert len(output_text) > 0
diff --git a/tests/models/multimodal/generation/test_nemotron_parse.py b/tests/models/multimodal/generation/test_nemotron_parse.py
index e224f31e6df9..db0d3d06840a 100644
--- a/tests/models/multimodal/generation/test_nemotron_parse.py
+++ b/tests/models/multimodal/generation/test_nemotron_parse.py
@@ -14,7 +14,9 @@
 from ....conftest import HfRunner, PromptImageInput, VllmRunner
 
 IMAGE = ImageAsset("paper-11").pil_image_ext(ext="png").convert("RGB")
-PROMPT = "</s><s><predict_bbox><predict_classes><output_markdown>"
+PROMPT = (
+    "</s><s><predict_bbox><predict_classes><output_markdown><predict_no_text_in_pic>"
+)
 
 
 class DummyLogprobs(dict[int, Logprob]):
@@ -85,7 +87,7 @@ def run_test(
                 max_tokens,
                 num_logprobs=num_logprobs,
                 images=images,
-                use_cache=False,  # HF Nemotron Parse crashes here without this
+                tokenization_kwargs={"add_special_tokens": False},
             )
             for prompts, images in inputs
         ]
@@ -103,7 +105,7 @@ def run_test(
         )
 
 
-@pytest.mark.parametrize("model", ["nvidia/NVIDIA-Nemotron-Parse-v1.1"])
+@pytest.mark.parametrize("model", ["nvidia/NVIDIA-Nemotron-Parse-v1.2"])
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("num_logprobs", [5])
 def test_models(
diff --git a/tests/models/multimodal/generation/test_phi4mm.py b/tests/models/multimodal/generation/test_phi4mm.py
index 7f1a12f04474..1a4fb35a28aa 100644
--- a/tests/models/multimodal/generation/test_phi4mm.py
+++ b/tests/models/multimodal/generation/test_phi4mm.py
@@ -4,7 +4,6 @@
 import os
 from collections.abc import Sequence
 
-import librosa
 import pytest
 import regex as re
 from huggingface_hub import snapshot_download
@@ -14,6 +13,7 @@
 from vllm.logprobs import SampleLogprobs
 from vllm.lora.request import LoRARequest
 from vllm.multimodal.image import convert_image_mode, rescale_image_size
+from vllm.multimodal.media.audio import load_audio
 
 from ....conftest import (
     IMAGE_ASSETS,
@@ -290,7 +290,7 @@ def test_vision_speech_models(
     num_logprobs: int,
 ) -> None:
     # use the example speech question so that the model outputs are reasonable
-    audio = librosa.load(speech_question, sr=None)
+    audio = load_audio(speech_question, sr=None)
     image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
 
     inputs_vision_speech = [
diff --git a/tests/models/multimodal/generation/test_phi4siglip.py b/tests/models/multimodal/generation/test_phi4siglip.py
new file mode 100644
index 000000000000..f80b16c341b6
--- /dev/null
+++ b/tests/models/multimodal/generation/test_phi4siglip.py
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from importlib.metadata import version
+
+import pytest
+import regex as re
+from packaging.version import Version
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from vllm.logprobs import SampleLogprobs
+from vllm.multimodal.image import rescale_image_size
+
+from ....conftest import (
+    IMAGE_ASSETS,
+    HfRunner,
+    PromptImageInput,
+    VllmRunner,
+)
+from ....utils import multi_gpu_test
+from ...utils import check_logprobs_close
+
+pytestmark = pytest.mark.skipif(
+    Version("5.0") <= Version(version("transformers")),
+    reason=(
+        "vllm upgraded transformers above v5.4 where HF model custom code uses siglip2 "
+        "internals (filter_out_non_signature_kwargs) removed by "
+        "huggingface/transformers#43514"
+    ),
+)
+
+MODEL_ID = "microsoft/Phi-4-reasoning-vision-15B"
+
+HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts(
+    {
+        "stop_sign": "<|user|>\n<image>\nWhat's the content of the image?<|end|>\n<|assistant|>\n",  # noqa: E501
+        "cherry_blossom": "<|user|>\n<image>\nPlease infer the season with reason in details.<|end|>\n<|assistant|>\n",  # noqa: E501
+    }
+)
+HF_MULTIIMAGE_IMAGE_PROMPT = (
+    "<|user|>\n<image>\n<image>\nDescribe these images.<|end|>\n<|assistant|>\n"  # noqa: E501
+)
+
+DTYPE = "half"
+MAX_TOKENS = 128
+NUM_LOGPROBS = 10
+
+
+def vllm_to_hf_output(
+    vllm_output: tuple[list[int], str, SampleLogprobs | None], model: str
+):
+    """Sanitize vllm output to be comparable with hf output."""
+    _, output_str, out_logprobs = vllm_output
+
+    output_str_without_image = re.sub(r"(<image>)+", "", output_str)
+    if output_str_without_image and output_str_without_image[0] == " ":
+        output_str_without_image = output_str_without_image[1:]
+
+    hf_output_str = output_str_without_image + "<|end|><|endoftext|>"
+
+    tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
+    hf_output_ids = tokenizer.encode(output_str_without_image)
+    if hf_output_ids and hf_output_ids[0] == tokenizer.bos_token_id:
+        hf_output_ids = hf_output_ids[1:]
+
+    return hf_output_ids, hf_output_str, out_logprobs
+
+
+def _build_single_image_inputs(
+    image_assets,
+) -> list[tuple[list[str], PromptImageInput]]:
+    """Build single-image inputs for all size_factors at once."""
+    images = [asset.pil_image for asset in image_assets]
+    all_inputs: list[tuple[list[str], PromptImageInput]] = []
+    for size_factors in [[1.0], [0.25, 0.5, 1.0]]:
+        for image, prompt in zip(images, HF_IMAGE_PROMPTS):
+            all_inputs.append(
+                (
+                    [prompt for _ in size_factors],
+                    [rescale_image_size(image, f) for f in size_factors],
+                )
+            )
+    return all_inputs
+
+
+def _build_multi_image_inputs(
+    image_assets,
+) -> list[tuple[list[str], PromptImageInput]]:
+    """Build multi-image inputs for all size_factors at once."""
+    images = [asset.pil_image for asset in image_assets]
+    all_inputs: list[tuple[list[str], PromptImageInput]] = []
+    for size_factors in [[0.5], [0.15, 0.30]]:
+        all_inputs.append(
+            (
+                [HF_MULTIIMAGE_IMAGE_PROMPT for _ in size_factors],
+                [
+                    [rescale_image_size(image, factor) for image in images]
+                    for factor in size_factors
+                ],
+            )
+        )
+    return all_inputs
+
+
+def _run_and_compare(
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    all_inputs: Sequence[tuple[list[str], PromptImageInput]],
+    model: str,
+    max_model_len: int,
+    max_num_seqs: int,
+    mm_limit: int,
+    gpu_memory_utilization: float,
+):
+    """Load each runner once, run all inputs, then compare."""
+    # NOTE: run vLLM first, then HF.  vLLM needs a fresh process without
+    # cuda initialization; running HF first would break the multiprocessing
+    # backend with fork method.
+    with vllm_runner(
+        model,
+        runner="generate",
+        max_model_len=max_model_len,
+        max_num_seqs=max_num_seqs,
+        gpu_memory_utilization=gpu_memory_utilization,
+        dtype=DTYPE,
+        limit_mm_per_prompt={"image": mm_limit},
+        tensor_parallel_size=2,
+        trust_remote_code=True,
+        enforce_eager=True,
+    ) as vllm_model:
+        vllm_outputs_per_case = [
+            vllm_model.generate_greedy_logprobs(
+                prompts,
+                MAX_TOKENS,
+                num_logprobs=NUM_LOGPROBS,
+                images=images,
+            )
+            for prompts, images in all_inputs
+        ]
+
+    hf_model_kwargs = {"_attn_implementation": "sdpa", "device_map": "auto"}
+    with hf_runner(
+        model,
+        dtype=DTYPE,
+        model_kwargs=hf_model_kwargs,
+        auto_cls=AutoModelForCausalLM,
+        trust_remote_code=True,
+    ) as hf_model:
+        hf_outputs_per_case = [
+            hf_model.generate_greedy_logprobs_limit(
+                prompts,
+                MAX_TOKENS,
+                num_logprobs=NUM_LOGPROBS,
+                images=images,
+            )
+            for prompts, images in all_inputs
+        ]
+
+    for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
+        check_logprobs_close(
+            outputs_0_lst=hf_outputs,
+            outputs_1_lst=vllm_outputs,
+            name_0="hf",
+            name_1="vllm",
+        )
+
+
+@multi_gpu_test(num_gpus=2)
+@pytest.mark.parametrize("model", [MODEL_ID])
+def test_models(hf_runner, vllm_runner, image_assets, model) -> None:
+    all_inputs = _build_single_image_inputs(image_assets)
+    _run_and_compare(
+        hf_runner,
+        vllm_runner,
+        all_inputs,
+        model,
+        max_model_len=8192,
+        max_num_seqs=2,
+        mm_limit=1,
+        gpu_memory_utilization=0.80,
+    )
+
+
+@multi_gpu_test(num_gpus=2)
+@pytest.mark.parametrize("model", [MODEL_ID])
+def test_multi_images_models(hf_runner, vllm_runner, image_assets, model) -> None:
+    all_inputs = _build_multi_image_inputs(image_assets)
+    _run_and_compare(
+        hf_runner,
+        vllm_runner,
+        all_inputs,
+        model,
+        max_model_len=8192,
+        max_num_seqs=2,
+        mm_limit=2,
+        gpu_memory_utilization=0.80,
+    )
diff --git a/tests/models/multimodal/generation/test_pixtral.py b/tests/models/multimodal/generation/test_pixtral.py
index 48329d9aea3d..2ce732342bdb 100644
--- a/tests/models/multimodal/generation/test_pixtral.py
+++ b/tests/models/multimodal/generation/test_pixtral.py
@@ -25,6 +25,7 @@
 
 PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
 MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
+MINISTRAL_3B_ID = "mistralai/Ministral-3-3B-Instruct-2512"
 
 MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]
 
@@ -116,6 +117,7 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
 FIXTURE_LOGPROBS_CHAT = {
     PIXTRAL_ID: FIXTURES_PATH / "pixtral_chat.json",
     MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
+    MINISTRAL_3B_ID: FIXTURES_PATH / "ministral_3b_chat.json",
 }
 
 OutputsLogprobs = list[tuple[list[int], str, SampleLogprobs | None]]
@@ -209,3 +211,41 @@ def test_chat(
         name_0="h100_ref",
         name_1="output",
     )
+
+
+@large_gpu_test(min_gb=16)
+@pytest.mark.parametrize("dtype", ["bfloat16"])
+def test_chat_consolidated(vllm_runner, dtype: str, local_asset_server) -> None:
+    EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
+        FIXTURE_LOGPROBS_CHAT[MINISTRAL_3B_ID]
+    )
+    with vllm_runner(
+        MINISTRAL_3B_ID,
+        dtype=dtype,
+        tokenizer_mode="mistral",
+        load_format="mistral",
+        config_format="mistral",
+        max_model_len=8192,
+        limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
+    ) as vllm_model:
+        outputs = []
+        urls_all = [local_asset_server.url_for(u) for u in IMG_URLS]
+        msgs = [
+            _create_msg_format(urls_all[:1]),
+            _create_msg_format(urls_all[:2]),
+            _create_msg_format(urls_all),
+        ]
+        for msg in msgs:
+            output = vllm_model.llm.chat(msg, sampling_params=SAMPLING_PARAMS)
+            outputs.extend(output)
+
+    logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
+    for i in range(len(logprobs)):
+        assert logprobs[i][-1] is None
+        logprobs[i] = logprobs[i][:-1]
+    check_logprobs_close(
+        outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
+        outputs_1_lst=logprobs,
+        name_0="h100_ref",
+        name_1="output",
+    )
diff --git a/tests/models/multimodal/generation/test_qwen2_5_vl.py b/tests/models/multimodal/generation/test_qwen2_5_vl.py
index 3ba665710af4..15a14da24d1d 100644
--- a/tests/models/multimodal/generation/test_qwen2_5_vl.py
+++ b/tests/models/multimodal/generation/test_qwen2_5_vl.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from vllm.assets.image import ImageAsset
 from vllm.multimodal.video import sample_frames_from_video
 
 from ....conftest import VIDEO_ASSETS
@@ -11,6 +12,7 @@
 target_dtype = "bfloat16"
 
 VIDEO_PLACEHOLDER = "<|vision_start|><|video_pad|><|vision_end|>"
+IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
 
 
 def qwen2_5_vl_chat_template(*query):
@@ -28,6 +30,26 @@ def qwen2_5_vl_chat_template(*query):
 )
 
 
+WINDOW_ATTN_IMAGE_PROMPT = qwen2_5_vl_chat_template(
+    IMAGE_PLACEHOLDER,
+    "Describe the image.",
+)
+IMAGE_ONLY_LIMIT_MM_PER_PROMPT = {"image": 1, "video": 0}
+
+
+def _window_attention_regression_image():
+    # image from regression issue: https://github.com/vllm-project/vllm/issues/15122
+    image = ImageAsset("hato").pil_image
+    return image.resize((image.width // 2, image.height // 2))
+
+
+def _encoder_cudagraph_config(*, max_vision_items: int) -> dict:
+    return {
+        "cudagraph_mm_encoder": True,
+        "encoder_cudagraph_max_vision_items_per_batch": max_vision_items,
+    }
+
+
 @pytest.mark.core_model
 @pytest.mark.parametrize("model", models)
 @pytest.mark.parametrize("video_pruning_rate", [0.0, 0.75])
@@ -146,3 +168,77 @@ def test_qwen2_5_vl_evs_batched_videos(
 
             # Ensure the output is a string
             assert isinstance(output_text, str)
+
+
+@pytest.mark.core_model
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize("dtype", [target_dtype])
+@pytest.mark.parametrize("max_tokens", [128])
+@pytest.mark.parametrize("use_bytecode_hook", [True, False])
+def test_qwen2_5_vl_window_attention_image(
+    vllm_runner,
+    model,
+    dtype: str,
+    max_tokens: int,
+    use_bytecode_hook: bool,
+    monkeypatch,
+) -> None:
+    """Regression test for Qwen2.5 window-attention image path."""
+    monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1" if use_bytecode_hook else "0")
+
+    prompt = [WINDOW_ATTN_IMAGE_PROMPT]
+    images = [[_window_attention_regression_image()]]
+
+    with vllm_runner(
+        model,
+        runner="generate",
+        max_model_len=4096,
+        dtype=dtype,
+        limit_mm_per_prompt=IMAGE_ONLY_LIMIT_MM_PER_PROMPT,
+        compilation_config=_encoder_cudagraph_config(max_vision_items=1),
+    ) as vllm_model:
+        outputs = vllm_model.generate_greedy(prompt, max_tokens, images=images)
+
+        assert len(outputs) == 1
+        output_ids, output_text = outputs[0]
+        assert len(output_ids) > 0
+        assert len(output_text) > 0
+        assert isinstance(output_text, str)
+
+
+@pytest.mark.core_model
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize("dtype", [target_dtype])
+@pytest.mark.parametrize("max_tokens", [128])
+@pytest.mark.parametrize("use_bytecode_hook", [True, False])
+def test_qwen2_5_vl_window_attention_image_batch(
+    vllm_runner,
+    model,
+    dtype: str,
+    max_tokens: int,
+    use_bytecode_hook: bool,
+    monkeypatch,
+) -> None:
+    """Regression test window-attention with a small image batch."""
+    monkeypatch.setenv("VLLM_USE_BYTECODE_HOOK", "1" if use_bytecode_hook else "0")
+
+    image = _window_attention_regression_image()
+    prompts = [WINDOW_ATTN_IMAGE_PROMPT, WINDOW_ATTN_IMAGE_PROMPT]
+    images = [[image], [image]]
+
+    with vllm_runner(
+        model,
+        runner="generate",
+        max_model_len=4096,
+        max_num_seqs=2,
+        dtype=dtype,
+        limit_mm_per_prompt=IMAGE_ONLY_LIMIT_MM_PER_PROMPT,
+        compilation_config=_encoder_cudagraph_config(max_vision_items=2),
+    ) as vllm_model:
+        outputs = vllm_model.generate_greedy(prompts, max_tokens, images=images)
+
+        assert len(outputs) == 2
+        for output_ids, output_text in outputs:
+            assert len(output_ids) > 0
+            assert len(output_text) > 0
+            assert isinstance(output_text, str)
diff --git a/tests/models/multimodal/generation/test_vit_cudagraph.py b/tests/models/multimodal/generation/test_vit_cudagraph.py
new file mode 100644
index 000000000000..18630e3559a3
--- /dev/null
+++ b/tests/models/multimodal/generation/test_vit_cudagraph.py
@@ -0,0 +1,232 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import dataclass, field
+from functools import partial
+
+import pytest
+
+from vllm.multimodal.video import sample_frames_from_video
+from vllm.platforms import current_platform
+
+from ....conftest import IMAGE_ASSETS, VIDEO_ASSETS
+from ...utils import dummy_hf_overrides
+from .vlm_utils.builders import sample_frames_with_video_metadata
+
+
+@dataclass
+class VitCudagraphTestConfig:
+    model: str
+    modalities: list[str] = field(default_factory=lambda: ["image", "video"])
+    image_prompt: str | None = None
+    video_prompt: str | None = None
+    dtype: str = "bfloat16"
+    max_model_len: int = 4096
+    max_tokens: int = 64
+    max_num_seqs: int = 2
+    num_video_frames: int = 16
+    needs_video_metadata: bool = False
+    vllm_runner_kwargs: dict = field(default_factory=dict)
+    compilation_config_overrides: dict = field(default_factory=dict)
+    marks: list = field(default_factory=list)
+
+
+def params_with_marks(
+    configs: dict[str, VitCudagraphTestConfig],
+) -> list[pytest.param]:
+    return [
+        pytest.param(model_id, marks=cfg.marks) for model_id, cfg in configs.items()
+    ]
+
+
+def qwen_vl_chat_template(content: str) -> str:
+    return f"<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n"
+
+
+def step3_vl_chat_template(content: str) -> str:
+    return (
+        "<｜begin▁of▁sentence｜> You are a helpful assistant.<|BOT|>user\n "
+        f"<im_patch>{content} <|EOT|><|BOT|>assistant\n"
+    )
+
+
+MODEL_CONFIGS: dict[str, VitCudagraphTestConfig] = {
+    "qwen2_5_vl": VitCudagraphTestConfig(
+        model="Qwen/Qwen2.5-VL-3B-Instruct",
+        image_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|image_pad|><|vision_end|>What is in this image?"
+        ),
+        video_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|video_pad|><|vision_end|>"
+            "Describe this video in one sentence."
+        ),
+        needs_video_metadata=False,
+        marks=[pytest.mark.core_model],
+    ),
+    "qwen3_vl": VitCudagraphTestConfig(
+        model="Qwen/Qwen3-VL-2B-Instruct",
+        image_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|image_pad|><|vision_end|>What is in this image?"
+        ),
+        video_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|video_pad|><|vision_end|>"
+            "Describe this video in one sentence."
+        ),
+        needs_video_metadata=True,
+        marks=[pytest.mark.core_model],
+    ),
+    "qwen3_5": VitCudagraphTestConfig(
+        model="Qwen/Qwen3.5-0.8B",
+        image_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|image_pad|><|vision_end|>What is in this image?"
+        ),
+        video_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|video_pad|><|vision_end|>"
+            "Describe this video in one sentence."
+        ),
+        needs_video_metadata=True,
+        marks=[pytest.mark.core_model],
+    ),
+    "qwen2_vl": VitCudagraphTestConfig(
+        model="Qwen/Qwen2-VL-2B-Instruct",
+        image_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|image_pad|><|vision_end|>What is in this image?"
+        ),
+        video_prompt=qwen_vl_chat_template(
+            "<|vision_start|><|video_pad|><|vision_end|>"
+            "Describe this video in one sentence."
+        ),
+        needs_video_metadata=False,
+        marks=[pytest.mark.core_model],
+    ),
+    "step3_vl": VitCudagraphTestConfig(
+        model="stepfun-ai/Step3-VL-10B",
+        modalities=["image"],
+        image_prompt=step3_vl_chat_template("What is in this image?"),
+        # Single bucket sized to cover the largest test image's output
+        # tokens (1152 > 1141 for cherry_blossom). The default auto-
+        # inferred range fans out into multiple power-of-2 buckets, each
+        # holding a full ViT capture pool.
+        compilation_config_overrides={
+            "encoder_cudagraph_token_budgets": [1152],
+        },
+        # Shrink to 1 text + 1 vision layer with random weights so the
+        # test runs on any CI GPU (incl. L4) and skips the 20 GiB weight
+        # download. The test only validates that encoder CG capture/
+        # replay functions correctly, not output quality.
+        vllm_runner_kwargs={
+            "load_format": "dummy",
+            "hf_overrides": partial(
+                dummy_hf_overrides,
+                model_arch="StepVLForConditionalGeneration",
+            ),
+        },
+    ),
+}
+
+
+def get_compilation_config(config: VitCudagraphTestConfig):
+    return {
+        "cudagraph_mm_encoder": True,
+        "encoder_cudagraph_max_vision_items_per_batch": 1,
+        "encoder_cudagraph_max_frames_per_batch": 16,
+        **config.compilation_config_overrides,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("model_id", params_with_marks(MODEL_CONFIGS))
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Requires CUDA")
+def test_vit_cudagraph_image(model_id, vllm_runner, image_assets):
+    config = MODEL_CONFIGS[model_id]
+
+    if "image" not in config.modalities:
+        pytest.skip(f"{model_id} does not support the image modality.")
+
+    image_prompts = IMAGE_ASSETS.prompts(
+        {
+            "stop_sign": config.image_prompt,  # type: ignore[typeddict-item]
+            "cherry_blossom": config.image_prompt,  # type: ignore[typeddict-item]
+        }
+    )
+    images = [[asset.pil_image] for asset in image_assets]
+
+    with vllm_runner(
+        config.model,
+        dtype=config.dtype,
+        max_model_len=config.max_model_len,
+        max_num_seqs=config.max_num_seqs,
+        limit_mm_per_prompt={"image": 1},
+        compilation_config=get_compilation_config(config),
+        **config.vllm_runner_kwargs,
+    ) as vllm_model:
+        outputs = vllm_model.generate_greedy(
+            image_prompts, config.max_tokens, images=images
+        )
+
+        # Basic validation that we got a response
+        assert len(outputs) == 2
+        output_ids, output_text = outputs[0]
+
+        # Ensure we got some output
+        assert len(output_ids) > 0
+        assert len(output_text) > 0
+
+        # Ensure the output is a string
+        assert isinstance(output_text, str)
+
+
+@pytest.mark.parametrize("model_id", params_with_marks(MODEL_CONFIGS))
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Requires CUDA")
+def test_vit_cudagraph_video(model_id, vllm_runner, video_assets):
+    config = MODEL_CONFIGS[model_id]
+
+    if "video" not in config.modalities:
+        pytest.skip(f"{model_id} does not support the video modality")
+
+    video_prompts = VIDEO_ASSETS.prompts(
+        {
+            "baby_reading": config.video_prompt,  # type: ignore[typeddict-item]
+        }
+    )
+    if config.needs_video_metadata:
+        sampled_vids = [
+            sample_frames_with_video_metadata(
+                (asset.np_ndarrays, asset.metadata), config.num_video_frames
+            )
+            for asset in video_assets
+        ]
+    else:
+        sampled_vids = [
+            sample_frames_from_video(asset.np_ndarrays, config.num_video_frames)
+            for asset in video_assets
+        ]
+    videos = [sampled_vids[0]]
+
+    with vllm_runner(
+        config.model,
+        dtype=config.dtype,
+        max_model_len=config.max_model_len,
+        max_num_seqs=config.max_num_seqs,
+        limit_mm_per_prompt={"video": 1},
+        compilation_config=get_compilation_config(config),
+        **config.vllm_runner_kwargs,
+    ) as vllm_model:
+        outputs = vllm_model.generate_greedy(
+            video_prompts, config.max_tokens, videos=videos
+        )
+
+        # Basic validation that we got a response
+        assert len(outputs) == 1
+        output_ids, output_text = outputs[0]
+
+        # Ensure we got some output
+        assert len(output_ids) > 0
+        assert len(output_text) > 0
+
+        # Ensure the output is a string
+        assert isinstance(output_text, str)
diff --git a/tests/models/multimodal/generation/test_voxtral.py b/tests/models/multimodal/generation/test_voxtral.py
index 590b549dcf59..82db1dc6812c 100644
--- a/tests/models/multimodal/generation/test_voxtral.py
+++ b/tests/models/multimodal/generation/test_voxtral.py
@@ -149,6 +149,10 @@ def _asset_to_openai_chunk(asset):
     )
 
 
+@pytest.mark.skip(
+    reason="VoxtralProcessor.apply_chat_template() in transformers v5 "
+    "doesn't resolve chat_template=None to the default template"
+)
 def test_hf_reference(hf_runner, vllm_runner, audio_assets: AudioTestAssets):
     """Compare vLLM Mistral-format output against HF Transformers reference.
 
diff --git a/tests/models/multimodal/generation/test_whisper.py b/tests/models/multimodal/generation/test_whisper.py
index babf7e7a4978..93634760a576 100644
--- a/tests/models/multimodal/generation/test_whisper.py
+++ b/tests/models/multimodal/generation/test_whisper.py
@@ -4,11 +4,11 @@
 from collections.abc import Sequence
 from typing import Any
 
-import librosa
 import pytest
 from transformers import AutoModelForSpeechSeq2Seq
 
 from vllm.assets.audio import AudioAsset
+from vllm.multimodal.audio import AudioResampler
 from vllm.platforms import current_platform
 
 from ....conftest import HfRunner, PromptAudioInput, VllmRunner
@@ -41,6 +41,7 @@ def run_test(
     tensor_parallel_size: int,
     distributed_executor_backend: str | None = None,
     enforce_eager: bool = True,
+    gpu_memory_utilization: float = 0.9,
 ) -> None:
     """Inference result should be the same between hf and vllm.
 
@@ -57,6 +58,7 @@ def run_test(
         distributed_executor_backend=distributed_executor_backend,
         limit_mm_per_prompt={"audio": 2},
         enforce_eager=enforce_eager,
+        gpu_memory_utilization=gpu_memory_utilization,
         disable_custom_all_reduce=True,
     ) as vllm_model:
         vllm_outputs_per_case = [
@@ -93,13 +95,12 @@ def run_test(
 def resampled_assets() -> list[tuple[Any, int]]:
     audio_assets = [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")]
     sampled_assets = []
+    resampler = AudioResampler(target_sr=WHISPER_SAMPLE_RATE)
     for asset in audio_assets:
         audio, orig_sr = asset.audio_and_sample_rate
         # Resample to Whisper's expected sample rate (16kHz)
         if orig_sr != WHISPER_SAMPLE_RATE:
-            audio = librosa.resample(
-                audio, orig_sr=orig_sr, target_sr=WHISPER_SAMPLE_RATE
-            )
+            audio = resampler.resample(audio, orig_sr=orig_sr)
         sampled_assets.append(
             (audio, WHISPER_SAMPLE_RATE),
         )
@@ -320,6 +321,7 @@ def test_models_distributed(
         tensor_parallel_size=2,
         distributed_executor_backend=distributed_executor_backend,
         enforce_eager=False,
+        gpu_memory_utilization=0.65,
     )
 
 
diff --git a/tests/models/multimodal/generation/vlm_utils/core.py b/tests/models/multimodal/generation/vlm_utils/core.py
index 3de4ca209a6f..207d3a3202a1 100644
--- a/tests/models/multimodal/generation/vlm_utils/core.py
+++ b/tests/models/multimodal/generation/vlm_utils/core.py
@@ -38,6 +38,7 @@ def run_test(
     limit_mm_per_prompt: dict[str, int],
     vllm_runner_kwargs: dict[str, Any] | None,
     hf_model_kwargs: dict[str, Any] | None,
+    hf_processor: Callable[[str], Any] | None,
     patch_hf_runner: Callable[[HfRunner], HfRunner] | None,
     runner: RunnerOption = "auto",
     distributed_executor_backend: str | None = None,
@@ -80,6 +81,11 @@ def run_test(
     if vllm_runner_kwargs:
         vllm_runner_kwargs_.update(vllm_runner_kwargs)
 
+    # Avoid passing limit_mm_per_prompt twice when vllm_runner_kwargs
+    # already contains it (e.g. gemma4 sets it via vllm_runner_kwargs).
+    if "limit_mm_per_prompt" in vllm_runner_kwargs_:
+        limit_mm_per_prompt = vllm_runner_kwargs_.pop("limit_mm_per_prompt")
+
     with vllm_runner(
         model,
         max_model_len=max_model_len,
@@ -111,8 +117,18 @@ def run_test(
             )
             vllm_outputs_per_mm.append(vllm_output)
 
+    hf_runner_kwargs: dict[str, Any] = {}
+    if model_info.tokenizer:
+        hf_runner_kwargs["tokenizer_name"] = model_info.tokenizer
+    if hf_processor is not None:
+        hf_runner_kwargs["processor"] = hf_processor(model)
+
     hf_model = hf_runner(
-        model, dtype=dtype, auto_cls=auto_cls, model_kwargs=hf_model_kwargs
+        model,
+        dtype=dtype,
+        auto_cls=auto_cls,
+        model_kwargs=hf_model_kwargs,
+        **hf_runner_kwargs,
     )
 
     # Some models need to patch things like the model processor, e.g., internvl
diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py
index 0a692387cffc..62ea36061c9c 100644
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -1336,3 +1336,312 @@ def patched_generate(*args, **kwargs):
     hf_model.get_inputs = patched_get_inputs  # type: ignore[method-assign, assignment]
     hf_model.model.generate = patched_generate  # type: ignore[method-assign]
     return hf_model
+
+
+def moondream3_processor(model: str):
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    return Moondream3Processor.from_pretrained(model, trust_remote_code=True)
+
+
+def moondream3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    """Patch HfRunner for Moondream3."""
+    moondream_processor = hf_model.processor
+
+    def processor(*args, text="", images=None, **kwargs):
+        if images is None:
+            return moondream_processor(text=text, **kwargs)
+
+        images_list = [images] if isinstance(images, Image) else images
+        return moondream_processor(images=images_list, text=text, **kwargs)
+
+    hf_model.processor = processor
+
+    # Expose the LM head for logprob extraction.
+    hf_model.model.get_output_embeddings = lambda: hf_model.model.model.text.lm_head
+
+    native_model = hf_model.model.model  # MoondreamModel instance
+
+    from torch.nn import functional as F
+
+    from vllm.model_executor.models.moondream3 import reconstruct_from_crops
+
+    # Resolve the placeholder tokens from the tokenizer instead of hard-coding.
+    image_placeholder_ids = moondream_processor.tokenizer.encode(
+        "<image>", add_special_tokens=False
+    )
+
+    def _normalize_tiling(tilings):
+        """Extract (h, w) tuple from various tiling container formats."""
+        tiling = tilings
+        if isinstance(tiling, torch.Tensor):
+            tiling = tuple(tiling.squeeze().tolist())
+        elif isinstance(tiling, (list, tuple)):
+            t0 = tiling[0]
+            if isinstance(t0, torch.Tensor):
+                tiling = tuple(t0.tolist())
+            elif isinstance(t0, (list, tuple)):
+                tiling = tuple(t0)
+        return tiling
+
+    def _encode_vision(pixel_values, tilings):
+        """Run preprocessed crops through vision encoder + projection."""
+        device = native_model.device
+        dtype = native_model.vision.pos_emb.dtype
+        config = native_model.config
+
+        pv = pixel_values
+        while pv.dim() > 4:
+            pv = pv.squeeze(0)
+        pv = pv.to(device=device, dtype=dtype)
+
+        features = native_model._vis_enc(pv)
+        grid_size = config.vision.crop_size // config.vision.enc_patch_size
+        global_feat = features[0]
+
+        if features.shape[0] > 1 and tilings is not None:
+            tiling = _normalize_tiling(tilings)
+            local = features[1:].view(-1, grid_size, grid_size, config.vision.enc_dim)
+            reconstructed = reconstruct_from_crops(
+                local,
+                tiling,
+                config.vision.overlap_margin,
+                patch_size=1,
+            )
+        else:
+            reconstructed = global_feat.view(
+                grid_size, grid_size, config.vision.enc_dim
+            )
+
+        return native_model._vis_proj(global_feat, reconstructed)
+
+    def _find_subsequence(seq, subseq):
+        """Find start index of subseq in seq, or None."""
+        n = len(subseq)
+        for i in range(len(seq) - n + 1):
+            if seq[i : i + n] == subseq:
+                return i
+        return None
+
+    def _generate(
+        self,
+        input_ids=None,
+        pixel_values=None,
+        tilings=None,
+        attention_mask=None,
+        **kwargs,
+    ):
+        max_new_tokens = kwargs.get("max_new_tokens", 128)
+        return_dict = kwargs.get("return_dict_in_generate", False)
+        output_hs = kwargs.get("output_hidden_states", False)
+
+        if pixel_values is None:
+            sequences = input_ids
+            if return_dict:
+                return types.SimpleNamespace(
+                    sequences=sequences,
+                    hidden_states=() if output_hs else None,
+                )
+            return sequences
+
+        # Processor may return lists; extract the single element.
+        if isinstance(pixel_values, (list, tuple)):
+            pixel_values = pixel_values[0]
+        if (
+            isinstance(tilings, (list, tuple))
+            and tilings
+            and not isinstance(tilings[0], int)
+        ):
+            tilings = tilings[0]
+
+        hf_model.model._setup_caches()
+        native_model.use_flex_decoding = False
+
+        device = native_model.device
+        config = native_model.config
+
+        with torch.inference_mode():
+            for block in native_model.text.blocks:
+                block.kv_cache.k_cache.zero_()
+                block.kv_cache.v_cache.zero_()
+
+            img_emb = _encode_vision(pixel_values, tilings)
+
+            bos_emb = F.embedding(
+                torch.tensor([[config.tokenizer.bos_id]], device=device),
+                native_model.text.wte,
+            )
+            img_input = torch.cat([bos_emb, img_emb.unsqueeze(0)], dim=1)
+            prefix_len = img_input.size(1)
+
+            mask = native_model.attn_mask[:, :, :prefix_len, :]
+            pos_ids = torch.arange(prefix_len, dtype=torch.long, device=device)
+            native_model._prefill(img_input, mask, pos_ids, None)
+
+            ids = input_ids.squeeze(0).tolist()
+            img_start = _find_subsequence(ids, image_placeholder_ids)
+
+            if img_start is None:
+                sequences = input_ids
+                if return_dict:
+                    return types.SimpleNamespace(
+                        sequences=sequences,
+                        hidden_states=() if output_hs else None,
+                    )
+                return sequences
+
+            prompt_tokens = ids[img_start + len(image_placeholder_ids) :]
+
+            if not prompt_tokens:
+                sequences = input_ids
+                if return_dict:
+                    return types.SimpleNamespace(
+                        sequences=sequences,
+                        hidden_states=() if output_hs else None,
+                    )
+                return sequences
+
+            prompt_tensor = torch.tensor([prompt_tokens], device=device)
+            prompt_emb = F.embedding(prompt_tensor, native_model.text.wte)
+            prompt_len = prompt_emb.size(1)
+
+            mask = native_model.attn_mask[:, :, prefix_len : prefix_len + prompt_len, :]
+            pos_ids = torch.arange(
+                prefix_len,
+                prefix_len + prompt_len,
+                dtype=torch.long,
+                device=device,
+            )
+            hidden = native_model._prefill(prompt_emb, mask, pos_ids, None)
+            pos = prefix_len + prompt_len
+
+            hidden_last = native_model.text.post_ln(hidden[:, -1:, :])
+            logits = native_model.text.lm_head(hidden_last.squeeze(1))
+
+            generated = []
+            all_hidden_states = []
+            # Record the hidden state that predicted each generated token.
+            prev_hs = hidden_last
+            for _ in range(max_new_tokens):
+                next_token = logits.argmax(dim=-1).item()
+                if next_token == 0:
+                    break
+                generated.append(next_token)
+                if output_hs:
+                    all_hidden_states.append((prev_hs,))
+
+                next_emb = F.embedding(
+                    torch.tensor([[next_token]], device=device),
+                    native_model.text.wte,
+                )
+                mask = native_model.attn_mask[:, :, pos : pos + 1, :]
+                pos_ids_step = torch.tensor([pos], dtype=torch.long, device=device)
+                hidden = native_model._prefill(next_emb, mask, pos_ids_step, None)
+                hidden_last = native_model.text.post_ln(hidden[:, -1:, :])
+                prev_hs = hidden_last
+                logits = native_model.text.lm_head(hidden_last.squeeze(1))
+                pos += 1
+
+            result_ids = ids + generated
+            sequences = torch.tensor([result_ids], device=device)
+
+            if return_dict:
+                return types.SimpleNamespace(
+                    sequences=sequences,
+                    hidden_states=tuple(all_hidden_states) if output_hs else None,
+                )
+            return sequences
+
+    hf_model.model.generate = types.MethodType(_generate, hf_model.model)
+    return hf_model
+
+
+def qianfan_ocr_hf_model_kwargs(model_name: str) -> dict:
+    """Return hf_model_kwargs with a patched config for QianfanOCR."""
+    from vllm.transformers_utils.configs.qianfan_ocr import QianfanOCRConfig
+
+    config = QianfanOCRConfig.from_pretrained(model_name)
+    vc = config.vision_config
+    if isinstance(vc.image_size, int):
+        vc.image_size = (vc.image_size, vc.image_size)
+    if isinstance(vc.patch_size, int):
+        vc.patch_size = (vc.patch_size, vc.patch_size)
+    return {"config": config}
+
+
+def qianfan_ocr_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    """Patches an HfRunner instance to run QianfanOCR model inference.
+
+    QianfanOCR shares the same architecture as InternVLChatModel, so the
+    patching logic mirrors ``internvl_patch_hf_runner``.  The only difference
+    is that we load the config via vllm's registered ``QianfanOCRConfig``
+    instead of relying on ``trust_remote_code``.
+    """
+
+    class QianfanOCRProcessor:
+        def __init__(self, hf_runner: HfRunner):
+            self.tokenizer = hf_runner.tokenizer
+
+            from vllm.transformers_utils.configs.qianfan_ocr import QianfanOCRConfig
+
+            self.config = QianfanOCRConfig.from_pretrained(hf_runner.model_name)
+            self.vision_config = self.config.vision_config
+            self.use_thumbnail = self.config.use_thumbnail
+            self.min_num = self.config.min_dynamic_patch
+            self.max_num = self.config.max_dynamic_patch
+            self.image_size = self.vision_config.image_size
+
+            # Compute num_image_token from config instead of model attribute,
+            # since the transformers-native model doesn't expose it.
+            image_size = self.config.force_image_size or self.vision_config.image_size
+            patch_size = self.vision_config.patch_size
+            downsample_ratio = self.config.downsample_ratio
+            self.num_image_token = int(
+                (image_size // patch_size) ** 2 * (downsample_ratio**2)
+            )
+
+        def __call__(
+            self,
+            text: str,
+            images: PIL.Image.Image | list[PIL.Image.Image] = None,
+            **kwargs,
+        ):
+            from vllm.transformers_utils.processors.internvl import (
+                image_to_pixel_values_internvl,
+            )
+
+            IMG_START = "<img>"
+            IMG_END = "</img>"
+            IMG_CONTEXT = "<IMG_CONTEXT>"
+
+            images = [images] if isinstance(images, PIL.Image.Image) else images
+            pixel_values_list = [
+                image_to_pixel_values_internvl(
+                    image,
+                    input_size=self.image_size,
+                    min_num=self.min_num,
+                    max_num=self.max_num,
+                    use_thumbnail=self.use_thumbnail,
+                )
+                for image in images
+            ]
+            num_patches_list = [pv.shape[0] for pv in pixel_values_list]
+            pixel_values = torch.cat(pixel_values_list, dim=0)
+
+            for num_patches in num_patches_list:
+                context_tokens = IMG_CONTEXT * self.num_image_token * num_patches
+                image_tokens = IMG_START + context_tokens + IMG_END
+                text = text.replace("<image>", image_tokens, 1)
+
+            prompt = self.tokenizer(text, return_tensors="pt")
+            prompt.update({"pixel_values": pixel_values})
+            return prompt
+
+    img_context_token_id = hf_model.tokenizer.convert_tokens_to_ids("<IMG_CONTEXT>")
+    hf_model.model.img_context_token_id = img_context_token_id
+    hf_model.processor = QianfanOCRProcessor(hf_model)
+    hf_model.model.get_output_embeddings = (
+        lambda: hf_model.model.language_model.get_output_embeddings()
+    )
+    hf_model.model.generate = types.MethodType(_internvl_generate, hf_model.model)
+    return hf_model
diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py
index ae2f75481359..af48a1479bad 100644
--- a/tests/models/multimodal/generation/vlm_utils/types.py
+++ b/tests/models/multimodal/generation/vlm_utils/types.py
@@ -133,6 +133,7 @@ class VLMTestInfo(NamedTuple):
 
     # Exposed options for HF runner
     hf_model_kwargs: dict[str, Any] | None = None
+    hf_processor: Callable[[str], Any] | None = None
     # Indicates we should explicitly pass the EOS from the tokenizer
     use_tokenizer_eos: bool = False
     auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
@@ -196,6 +197,7 @@ def get_non_parametrized_runner_kwargs(self):
             "comparator": self.comparator,
             "get_stop_token_ids": self.get_stop_token_ids,
             "hf_model_kwargs": self.hf_model_kwargs,
+            "hf_processor": self.hf_processor,
             "stop_str": self.stop_str,
             "patch_hf_runner": self.patch_hf_runner,
         }
diff --git a/tests/models/multimodal/pooling/test_colmodernvbert.py b/tests/models/multimodal/pooling/test_colmodernvbert.py
index 01f3843c34e8..efeb3195b15b 100644
--- a/tests/models/multimodal/pooling/test_colmodernvbert.py
+++ b/tests/models/multimodal/pooling/test_colmodernvbert.py
@@ -10,7 +10,7 @@
 import pytest
 import torch
 
-from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
 MODEL_NAME = "ModernVBERT/colmodernvbert-merged"
 COLBERT_DIM = 128
diff --git a/tests/models/multimodal/pooling/test_colpali.py b/tests/models/multimodal/pooling/test_colpali.py
index 321e9fb60756..7c91731065bb 100644
--- a/tests/models/multimodal/pooling/test_colpali.py
+++ b/tests/models/multimodal/pooling/test_colpali.py
@@ -18,7 +18,7 @@
     ChatCompletionContentPartImageParam,
     ChatCompletionContentPartTextParam,
 )
-from vllm.entrypoints.pooling.score.utils import ScoreMultiModalParam
+from vllm.entrypoints.pooling.scoring.typing import ScoreMultiModalParam
 
 from ....conftest import VllmRunner
 
@@ -114,7 +114,7 @@ def _run_late_interaction_test(
     dtype: str,
 ) -> None:
     """Verify MaxSim scoring matches manual computation."""
-    from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+    from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
     with vllm_runner(
         model,
diff --git a/tests/models/multimodal/pooling/test_colqwen3.py b/tests/models/multimodal/pooling/test_colqwen3.py
index 50f0108c3701..9eefedc153c2 100644
--- a/tests/models/multimodal/pooling/test_colqwen3.py
+++ b/tests/models/multimodal/pooling/test_colqwen3.py
@@ -18,10 +18,15 @@
     ChatCompletionContentPartImageParam,
     ChatCompletionContentPartTextParam,
 )
-from vllm.entrypoints.pooling.score.utils import ScoreMultiModalParam
+from vllm.entrypoints.pooling.scoring.typing import ScoreMultiModalParam
 
 from ....conftest import VllmRunner
 
+pytestmark = pytest.mark.skip(
+    reason="ColQwen3 model's weight tying is incompatible with "
+    "transformers v5 (missing all_tied_weights_keys)"
+)
+
 MODELS = [
     "TomoroAI/tomoro-colqwen3-embed-4b",
     "OpenSearch-AI/Ops-Colqwen3-4B",
@@ -125,7 +130,7 @@ def _run_late_interaction_test(
     dtype: str,
 ) -> None:
     """Verify MaxSim scoring matches manual computation."""
-    from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+    from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
     with vllm_runner(
         model,
diff --git a/tests/models/multimodal/pooling/test_colqwen3_5.py b/tests/models/multimodal/pooling/test_colqwen3_5.py
index d5899b7a427c..2b6a5a263c58 100644
--- a/tests/models/multimodal/pooling/test_colqwen3_5.py
+++ b/tests/models/multimodal/pooling/test_colqwen3_5.py
@@ -73,7 +73,7 @@ def _run_late_interaction_test(
     dtype: str,
 ) -> None:
     """Verify MaxSim scoring matches manual computation."""
-    from vllm.entrypoints.pooling.score.utils import compute_maxsim_score
+    from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 
     with vllm_runner(
         model,
diff --git a/tests/models/multimodal/pooling/test_intern_vit.py b/tests/models/multimodal/pooling/test_intern_vit.py
index cd457c62c0af..d7b67b8bdb6a 100644
--- a/tests/models/multimodal/pooling/test_intern_vit.py
+++ b/tests/models/multimodal/pooling/test_intern_vit.py
@@ -7,14 +7,22 @@
 from transformers import AutoConfig, AutoModel, CLIPImageProcessor
 
 from vllm.distributed import cleanup_dist_env_and_memory
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
 from ....conftest import ImageTestAssets
 
+pytestmark = pytest.mark.skip(
+    reason="InternVisionModel's custom code is incompatible with "
+    "transformers v5 (missing all_tied_weights_keys)"
+)
+
 # we use snapshot_download to prevent conflicts between
 # dynamic_module and trust_remote_code for hf_runner
 DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @torch.inference_mode()
 def run_intern_vit_test(
@@ -39,9 +47,9 @@ def run_intern_vit_test(
 
     hf_model = AutoModel.from_pretrained(
         model, dtype=torch_dtype, trust_remote_code=True
-    ).to("cuda")
+    ).to(DEVICE_TYPE)
     hf_outputs_per_image = [
-        hf_model(pixel_value.to("cuda")).last_hidden_state
+        hf_model(pixel_value.to(DEVICE_TYPE)).last_hidden_state
         for pixel_value in pixel_values
     ]
 
@@ -53,9 +61,10 @@ def run_intern_vit_test(
     del hf_model
     cleanup_dist_env_and_memory()
 
-    vllm_model = vllm_model.to("cuda", torch_dtype)
+    vllm_model = vllm_model.to(DEVICE_TYPE, torch_dtype)
     vllm_outputs_per_image = [
-        vllm_model(pixel_values=pixel_value.to("cuda")) for pixel_value in pixel_values
+        vllm_model(pixel_values=pixel_value.to(DEVICE_TYPE))
+        for pixel_value in pixel_values
     ]
     del vllm_model
     cleanup_dist_env_and_memory()
diff --git a/tests/models/multimodal/pooling/test_jinavl_reranker.py b/tests/models/multimodal/pooling/test_jinavl_reranker.py
index fef5b420de6b..18a02625ea44 100644
--- a/tests/models/multimodal/pooling/test_jinavl_reranker.py
+++ b/tests/models/multimodal/pooling/test_jinavl_reranker.py
@@ -11,10 +11,15 @@
     ChatCompletionContentPartImageParam,
     ChatCompletionContentPartTextParam,
 )
-from vllm.entrypoints.pooling.score.utils import ScoreMultiModalParam
+from vllm.entrypoints.pooling.scoring.typing import ScoreMultiModalParam
 
 from ....conftest import HfRunner, VllmRunner
 
+pytestmark = pytest.mark.skip(
+    reason="jinaai/jina-reranker-m0 custom code is incompatible with "
+    "transformers v5 (missing all_tied_weights_keys)"
+)
+
 MODELS = ["jinaai/jina-reranker-m0"]
 
 MM_PROCESSOR_KWARGS = {
diff --git a/tests/models/multimodal/pooling/test_llama_nemotron_vl.py b/tests/models/multimodal/pooling/test_llama_nemotron_vl.py
index 6bea808152f6..a2f1d3424c34 100644
--- a/tests/models/multimodal/pooling/test_llama_nemotron_vl.py
+++ b/tests/models/multimodal/pooling/test_llama_nemotron_vl.py
@@ -21,7 +21,7 @@
     ChatCompletionContentPartImageParam,
     ChatCompletionContentPartTextParam,
 )
-from vllm.entrypoints.pooling.score.utils import ScoreMultiModalParam
+from vllm.entrypoints.pooling.scoring.typing import ScoreMultiModalParam
 from vllm.platforms import current_platform
 
 from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
diff --git a/tests/models/multimodal/pooling/test_prithvi_mae.py b/tests/models/multimodal/pooling/test_prithvi_mae.py
index 19154c27da9a..1a466931a0e5 100644
--- a/tests/models/multimodal/pooling/test_prithvi_mae.py
+++ b/tests/models/multimodal/pooling/test_prithvi_mae.py
@@ -1,11 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import importlib.util
+
 import pytest
 import torch
 
 from ....conftest import VllmRunner
 
+pytestmark = pytest.mark.skipif(
+    importlib.util.find_spec("terratorch") is None,
+    reason="terratorch unavailable while PyPI has `lightning` quarantined; see #41376",
+)
+
 
 def _run_test(
     vllm_runner: type[VllmRunner],
diff --git a/tests/models/multimodal/pooling/test_radio.py b/tests/models/multimodal/pooling/test_radio.py
index 86b5b1b5d1f9..fcab077fbba8 100644
--- a/tests/models/multimodal/pooling/test_radio.py
+++ b/tests/models/multimodal/pooling/test_radio.py
@@ -8,6 +8,7 @@
 
 from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.model_executor.models.radio import RadioModel
+from vllm.platforms import current_platform
 from vllm.transformers_utils.configs.radio import RadioConfig
 from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
@@ -17,6 +18,8 @@
 # dynamic_module and trust_remote_code for hf_runner
 DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @torch.inference_mode()
 def run_radio_test(
@@ -51,7 +54,7 @@ def run_radio_test(
         config=hf_config,
         dtype=torch_dtype,
         trust_remote_code=True,
-    ).to("cuda")
+    ).to(DEVICE_TYPE)
     hf_model.eval()
 
     # A HF model has image normalization as a part of model's forward
@@ -62,7 +65,7 @@ def run_radio_test(
     hf_model.make_preprocessor_external()
 
     hf_outputs_per_image = [
-        hf_model(pixel_value.to("cuda")) for pixel_value in pixel_values
+        hf_model(pixel_value.to(DEVICE_TYPE)) for pixel_value in pixel_values
     ]
 
     vllm_config = RadioConfig(
@@ -71,10 +74,11 @@ def run_radio_test(
     )
     vllm_model = RadioModel(vllm_config)
     vllm_model.load_weights(hf_model.state_dict())
-    vllm_model = vllm_model.to("cuda", torch_dtype)
+    vllm_model = vllm_model.to(DEVICE_TYPE, torch_dtype)
 
     vllm_outputs_per_image = [
-        vllm_model(pixel_values=pixel_value.to("cuda")) for pixel_value in pixel_values
+        vllm_model(pixel_values=pixel_value.to(DEVICE_TYPE))
+        for pixel_value in pixel_values
     ]
     del vllm_model, hf_model
     cleanup_dist_env_and_memory()
diff --git a/tests/models/multimodal/processing/test_audioflamingo3.py b/tests/models/multimodal/processing/test_audioflamingo3.py
index 24311e5212b2..3a6da75ed5b6 100644
--- a/tests/models/multimodal/processing/test_audioflamingo3.py
+++ b/tests/models/multimodal/processing/test_audioflamingo3.py
@@ -140,88 +140,3 @@ def test_audio_token_count_matches_hf_processor_math():
         _count_audio_tokens_from_mask(feature_attention_mask, chunk_counts, 0) == 1499
     )
     assert _count_audio_tokens_from_mask(feature_attention_mask, chunk_counts, 1) == 375
-
-
-def test_audio_feature_pipeline_matches_hf_small_config():
-    from transformers.models.audioflamingo3 import (
-        modeling_audioflamingo3 as hf_audioflamingo3_modeling,
-    )
-    from transformers.models.audioflamingo3.configuration_audioflamingo3 import (
-        AudioFlamingo3Config,
-    )
-
-    from vllm.model_executor.models.audioflamingo3 import (
-        AudioFlamingo3Encoder,
-        AudioFlamingo3MultiModalProjector,
-        _build_audio_encoder_attention_mask,
-        _flatten_valid_audio_embeddings,
-    )
-
-    text_config = {
-        "model_type": "qwen2",
-        "intermediate_size": 64,
-        "initializer_range": 0.02,
-        "hidden_size": 32,
-        "max_position_embeddings": 1024,
-        "num_hidden_layers": 2,
-        "num_attention_heads": 4,
-        "num_key_value_heads": 2,
-        "vocab_size": 128,
-        "pad_token_id": 1,
-        "use_mrope": False,
-    }
-    audio_config = {
-        "hidden_size": 16,
-        "num_attention_heads": 4,
-        "intermediate_size": 32,
-        "num_hidden_layers": 2,
-        "num_mel_bins": 80,
-        "max_source_positions": 1500,
-        "dropout": 0.0,
-        "attention_dropout": 0.0,
-        "activation_dropout": 0.0,
-        "encoder_layerdrop": 0.0,
-    }
-
-    torch.manual_seed(0)
-    config = AudioFlamingo3Config(
-        text_config=text_config,
-        audio_config=audio_config,
-        audio_token_id=0,
-    )
-    hf_model = hf_audioflamingo3_modeling.AudioFlamingo3ForConditionalGeneration(
-        config
-    ).eval()
-
-    vllm_encoder = AudioFlamingo3Encoder(config.audio_config).eval()
-    vllm_encoder.load_state_dict(hf_model.audio_tower.state_dict())
-
-    vllm_projector = AudioFlamingo3MultiModalProjector(config).eval()
-    vllm_projector.load_state_dict(hf_model.multi_modal_projector.state_dict())
-
-    input_features = torch.randn(3, 80, 3000)
-    feature_attention_mask = torch.zeros(3, 3000, dtype=torch.bool)
-    feature_attention_mask[0, :3000] = True
-    feature_attention_mask[1, :2500] = True
-    feature_attention_mask[2, :1500] = True
-
-    hf_output = hf_model.get_audio_features(
-        input_features,
-        feature_attention_mask,
-        return_dict=True,
-    ).pooler_output
-    vllm_attention_mask = _build_audio_encoder_attention_mask(
-        feature_attention_mask,
-        dtype=vllm_encoder.conv1.weight.dtype,
-        device=vllm_encoder.conv1.weight.device,
-    )
-    vllm_hidden_states = vllm_encoder(
-        input_features,
-        attention_mask=vllm_attention_mask,
-    )
-    vllm_output, _ = _flatten_valid_audio_embeddings(
-        vllm_projector(vllm_hidden_states),
-        feature_attention_mask,
-    )
-
-    torch.testing.assert_close(vllm_output, hf_output)
diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
index cce69e15b00b..26ab67e5d949 100644
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -311,6 +311,9 @@ def _to_dummy_options(modality: str, count: int) -> BaseDummyOptions:
             baseline_processor,
             cached_processor,
             batch_idx,
+            hit_rate,
+            num_batches,
+            simplify_rate,
         )
 
 
@@ -320,6 +323,9 @@ def _test_processing_correctness_one(
     baseline_processor: BaseMultiModalProcessor,
     cached_processor: BaseMultiModalProcessor,
     batch_idx: int,
+    hit_rate: float,
+    num_batches: int,
+    simplify_rate: float,
 ):
     model_type = model_config.hf_config.model_type
 
@@ -343,7 +349,11 @@ def _test_processing_correctness_one(
         baseline_tokenized_result,
         cached_tokenized_result,
         ignore_mm_keys=ignore_mm_keys,
-        msg=f"Failed ({batch_idx=}, {token_prompt=}, {mm_data=})",
+        msg=(
+            f"Failed ({batch_idx=}, {hit_rate=}, "
+            f"{num_batches=}, {simplify_rate=}, "
+            f"{text_prompt=}, {token_prompt=}, {mm_data=})"
+        ),
     )
 
     if text_prompt is not None:
@@ -362,21 +372,33 @@ def _test_processing_correctness_one(
             baseline_text_result,
             cached_text_result,
             ignore_mm_keys=ignore_mm_keys,
-            msg=f"Failed ({batch_idx=}, {text_prompt=}, {mm_data=})",
+            msg=(
+                f"Failed ({batch_idx=}, {hit_rate=}, "
+                f"{num_batches=}, {simplify_rate=}, "
+                f"{text_prompt=}, {token_prompt=}, {mm_data=})"
+            ),
         )
 
         _assert_inputs_equal(
             baseline_text_result,
             baseline_tokenized_result,
             ignore_mm_keys=ignore_mm_keys,
-            msg=f"Failed ({batch_idx=}, {text_prompt=}, {token_prompt=}, {mm_data=})",
+            msg=(
+                f"Failed ({batch_idx=}, {hit_rate=}, "
+                f"{num_batches=}, {simplify_rate=}, "
+                f"{text_prompt=}, {token_prompt=}, {mm_data=})"
+            ),
         )
 
         _assert_inputs_equal(
             cached_text_result,
             cached_tokenized_result,
             ignore_mm_keys=ignore_mm_keys,
-            msg=f"Failed ({batch_idx=}, {text_prompt=}, {token_prompt=}, {mm_data=})",
+            msg=(
+                f"Failed ({batch_idx=}, {hit_rate=}, "
+                f"{num_batches=}, {simplify_rate=}, "
+                f"{text_prompt=}, {token_prompt=}, {mm_data=})"
+            ),
         )
 
 
@@ -394,13 +416,13 @@ def test_processing_correctness(
         pytest.skip("Fix later")
     if model_id == "OpenGVLab/InternVL2-2B":
         pytest.skip("Fix later")
-    if model_id == "jinaai/jina-reranker-m0":
-        pytest.skip("Fix later")
-    if model_id in {"Qwen/Qwen-VL", "Qwen/Qwen-VL-Chat"}:
+    if model_id == "openvla/openvla-7b":
         pytest.skip(
-            "Qwen-VL tokenizer requires downloading a font file from "
-            "servers that often refuse connections in CI"
+            "OpenVLA uses a custom vLLM processor because its HF remote "
+            "processor is incompatible with current Transformers."
         )
+    if model_id == "jinaai/jina-reranker-m0":
+        pytest.skip("Fix later")
     if model_id == "mistralai/Voxtral-Mini-4B-Realtime-2602":
         pytest.skip(
             "Voxtral Realtime doesn't make use of any place-holder "
@@ -408,6 +430,8 @@ def test_processing_correctness(
             "correctness test as is. Let's revisit adapting this "
             "test once more realtime models exist."
         )
+    if model_id == "CohereLabs/cohere-transcribe-03-2026":
+        pytest.skip("Fix later")
 
     _test_processing_correctness(
         model_id,
diff --git a/tests/models/multimodal/processing/test_gemma4.py b/tests/models/multimodal/processing/test_gemma4.py
new file mode 100644
index 000000000000..a355501fdd80
--- /dev/null
+++ b/tests/models/multimodal/processing/test_gemma4.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Mapping
+
+import pytest
+import torch
+from PIL import Image as PILImage
+
+from vllm.model_executor.models.gemma4_mm import (
+    Gemma4ForConditionalGeneration,
+    Gemma4ImagePixelInputs,
+)
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import MultiModalFieldConfig
+from vllm.utils.mem_constants import GiB_bytes
+
+from ....conftest import ImageTestAssets
+from ...utils import build_model_context
+
+# TODO: to be updated to "google/gemma-4-e2b-it" once the models are available
+GEMMA4_MODEL_ID = "google/gemma-4-E2B-it"
+
+
+def test_gemma4_image_schema_accepts_variable_patch_counts():
+    Gemma4ImagePixelInputs(
+        pixel_values=[
+            torch.randn(10080, 768),
+            torch.randn(2520, 768),
+        ],
+        pixel_position_ids=[
+            torch.zeros(10080, 2, dtype=torch.long),
+            torch.zeros(2520, 2, dtype=torch.long),
+        ],
+    )
+
+
+def test_gemma4_image_batching_keeps_variable_patch_counts_unstacked():
+    field = MultiModalFieldConfig.batched("image").field
+    elems = field.build_elems(
+        "image",
+        "pixel_values",
+        [torch.randn(10080, 768), torch.randn(2520, 768)],
+    )
+
+    reduced = field.reduce_data(list(elems))
+
+    assert isinstance(reduced, list)
+    assert [tensor.shape for tensor in reduced] == [
+        torch.Size([10080, 768]),
+        torch.Size([2520, 768]),
+    ]
+
+
+@pytest.mark.parametrize(
+    "image_width,image_height,max_soft_tokens",
+    [
+        # Production repro: a 3x900 image (extreme aspect ratio) made the
+        # prompt-side estimator return 289 while the HF Gemma 4 image
+        # processor's vision tower output capped at 280, producing the
+        # "Attempted to assign 280 multimodal tokens to 289 placeholders"
+        # mismatch that crashed EngineCore.
+        (900, 3, 280),
+        (3, 900, 280),
+        # Same pathology should hold for the video-frame budget (70 tokens).
+        (900, 3, 70),
+        # And for any other supported budget.
+        (4000, 2, 1120),
+    ],
+)
+@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
+def test_compute_num_soft_tokens_does_not_exceed_max_soft_tokens(
+    model_id: str,
+    image_width: int,
+    image_height: int,
+    max_soft_tokens: int,
+):
+    """Regression for the Gemma 3/4 multimodal crash.
+
+    `_compute_num_soft_tokens` must never return a value larger than
+    `max_soft_tokens`. The HF Gemma 4 image processor clamps its vision
+    tower output to that value; if the prompt-side estimator returns more,
+    the prompt has more `image` placeholder tokens than the encoder will
+    fill, and `_merge_multimodal_embeddings` raises `ValueError` deep in
+    the model forward.
+    """
+    ctx = build_model_context(
+        model_id,
+        mm_processor_kwargs={"do_pan_and_scan": True},
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    num_soft_tokens = processor.info._compute_num_soft_tokens(
+        image_width=image_width,
+        image_height=image_height,
+        max_soft_tokens=max_soft_tokens,
+    )
+
+    assert num_soft_tokens <= max_soft_tokens, (
+        f"_compute_num_soft_tokens returned {num_soft_tokens} for "
+        f"image_width={image_width}, image_height={image_height}, "
+        f"max_soft_tokens={max_soft_tokens} — exceeds the cap that the HF "
+        f"image processor enforces on its vision tower output. This is "
+        f"the placeholder/encoder count mismatch that crashes EngineCore."
+    )
+
+
+@pytest.mark.parametrize(
+    ("mm_processor_kwargs", "expected_image_tokens"),
+    [
+        ({}, 280),
+        ({"max_soft_tokens": 70}, 70),
+        ({"max_soft_tokens": 280}, 280),
+        ({"max_soft_tokens": 1120}, 1120),
+        ({"images_kwargs": {"max_soft_tokens": 560}}, 560),
+        ({"images_kwargs": None}, 280),
+        ({"images_kwargs": "not-a-dict"}, 280),
+    ],
+)
+@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
+def test_get_mm_max_tokens_per_item_respects_configured_max_soft_tokens(
+    model_id: str,
+    mm_processor_kwargs: dict[str, object],
+    expected_image_tokens: int,
+):
+    ctx = build_model_context(
+        model_id,
+        mm_processor_kwargs=mm_processor_kwargs,
+        limit_mm_per_prompt={"image": 1, "video": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    tokens = processor.info.get_mm_max_tokens_per_item(
+        seq_len=ctx.model_config.max_model_len,
+        mm_counts={"image": 1, "video": 1},
+    )
+
+    assert tokens is not None
+    assert tokens["image"] == expected_image_tokens
+    assert tokens["video"] == 32 * (70 + 2 + 6)
+
+
+@pytest.mark.parametrize(
+    ("limit_mm_per_prompt", "expected_video_tokens"),
+    [
+        ({"video": 1}, 32 * (70 + 2 + 6)),
+        ({"video": {"count": 1}}, 32 * (70 + 2 + 6)),
+        ({"video": {"count": 1, "num_frames": 1}}, 1 * (70 + 2 + 6)),
+        ({"video": {"count": 1, "num_frames": 8}}, 8 * (70 + 2 + 6)),
+        ({"video": {"count": 1, "num_frames": 32}}, 32 * (70 + 2 + 6)),
+        ({"video": {"count": 1, "num_frames": 40}}, 32 * (70 + 2 + 6)),
+    ],
+)
+@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
+def test_get_mm_max_tokens_per_item_respects_configured_video_num_frames(
+    model_id: str,
+    limit_mm_per_prompt: Mapping[str, int | Mapping[str, int]],
+    expected_video_tokens: int,
+):
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt=limit_mm_per_prompt,
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    tokens = processor.info.get_mm_max_tokens_per_item(
+        seq_len=ctx.model_config.max_model_len,
+        mm_counts={"video": 1},
+    )
+
+    assert tokens is not None
+    assert tokens["image"] == 280
+    assert tokens["video"] == expected_video_tokens
+
+
+@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
+def test_get_prompt_updates_respects_nested_max_soft_tokens(model_id: str):
+    ctx = build_model_context(
+        model_id,
+        mm_processor_kwargs={"images_kwargs": {"max_soft_tokens": 560}},
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    image = PILImage.new("RGB", (1000, 1000), color="white")
+    image_size = image.size
+    mm_items = processor.info.parse_mm_data({"image": image})
+
+    prompt_update = processor._get_prompt_updates(mm_items, {}, {})[0]
+    replacement = prompt_update.resolve(0).content.full
+    expected = processor.info.get_image_repl(
+        image_width=image_size[0],
+        image_height=image_size[1],
+        processor=processor.info.get_hf_processor(),
+        max_soft_tokens=560,
+    ).full
+
+    assert replacement == expected
+
+
+@pytest.mark.parametrize("model_id", [GEMMA4_MODEL_ID])
+def test_limit_mm_per_prompt(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that limit_mm_per_prompt accurately restricts multiple images."""
+    # We only allow 1 image
+    ctx = build_model_context(
+        model_id,
+        mm_processor_kwargs={},
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    # Provide 2 images in the prompt
+    prompt = "<image><image>"
+    # image_assets usually has multiple images
+    images = [asset.pil_image for asset in image_assets][:2]
+    if len(images) < 2:
+        images = [images[0], images[0]]
+
+    mm_data = {"image": images}
+
+    # Expect ValueError when exceeding limit
+    with pytest.raises(ValueError, match="At most 1 image"):
+        processor(
+            prompt,
+            mm_items=processor.info.parse_mm_data(mm_data),
+            hf_processor_mm_kwargs={},
+        )
+
+
+# Regression guard for PR #43169 follow-up: the batched Gemma4 vision encoder
+# admitted ``chunk ~= 53`` on a 22 GiB L4 with a 26B AWQ model loaded,
+# allocating 2.43 GiB int64 inside
+# ``F.one_hot(num_classes=position_embedding_size)`` and OOMing because only
+# 2.41 GiB was actually free.  The fix sizes ``chunk`` from currently-free GPU
+# memory and counts the ``F.one_hot`` transient as the dominant cost.
+
+_encoder_chunk = Gemma4ForConditionalGeneration._encoder_chunk
+
+# Gemma4 vision config default (HF: configuration_gemma4.py).
+_POSITION_EMBEDDING_SIZE = 10240
+# Video frame: max_soft_tokens=70, pooling_kernel_size=2 -> 70 * 4 patches.
+_VIDEO_PATCHES_PER_FRAME = 280
+
+
+def test_encoder_chunk_tight_budget_fits_in_free():
+    free = 3 * GiB_bytes  # L4 22 GiB after 26B AWQ load.
+    total = 22 * GiB_bytes
+    chunk = _encoder_chunk(
+        _VIDEO_PATCHES_PER_FRAME, free, total, _POSITION_EMBEDDING_SIZE
+    )
+    one_hot_bytes = chunk * _VIDEO_PATCHES_PER_FRAME * 2 * _POSITION_EMBEDDING_SIZE * 8
+    assert one_hot_bytes <= free // 2
+
+
+def test_encoder_chunk_roomy_gpu_keeps_batching():
+    chunk = _encoder_chunk(
+        _VIDEO_PATCHES_PER_FRAME,
+        60 * GiB_bytes,
+        80 * GiB_bytes,
+        _POSITION_EMBEDDING_SIZE,
+    )
+    assert chunk > 8
+
+
+def test_encoder_chunk_zero_patches_is_safe():
+    assert (
+        _encoder_chunk(0, 60 * GiB_bytes, 80 * GiB_bytes, _POSITION_EMBEDDING_SIZE) == 1
+    )
+
+
+def test_encoder_chunk_zero_position_embedding_size_is_safe():
+    # Degenerate config: must not raise ZeroDivisionError.
+    assert (
+        _encoder_chunk(_VIDEO_PATCHES_PER_FRAME, 60 * GiB_bytes, 80 * GiB_bytes, 0) == 1
+    )
+
+
+def test_encoder_chunk_no_free_memory_falls_back_to_one():
+    assert (
+        _encoder_chunk(
+            _VIDEO_PATCHES_PER_FRAME, 0, 22 * GiB_bytes, _POSITION_EMBEDDING_SIZE
+        )
+        == 1
+    )
diff --git a/tests/models/multimodal/processing/test_glm4_1v.py b/tests/models/multimodal/processing/test_glm4_1v.py
index f70d00524275..5798c5663472 100644
--- a/tests/models/multimodal/processing/test_glm4_1v.py
+++ b/tests/models/multimodal/processing/test_glm4_1v.py
@@ -6,7 +6,7 @@
 from vllm.assets.video import VideoAsset
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import batched_tensors_equal
-from vllm.multimodal.video import OpenCVDynamicVideoBackend, OpenCVVideoBackend
+from vllm.multimodal.video import DynamicVideoBackend, VideoBackend
 
 from ...utils import build_model_context
 
@@ -70,9 +70,11 @@ def test_processor_override(
 
 @pytest.mark.parametrize("model_id", ["zai-org/GLM-4.1V-9B-Thinking"])
 @pytest.mark.parametrize("fps", [2])
+@pytest.mark.parametrize("backend", ["opencv", "pyav"])
 def test_video_loader_consistency(
     model_id: str,
     fps: int,
+    backend: str,
 ):
     """
     Ensure dynamic video loader (pre-sampled by loader) and normal video
@@ -93,9 +95,11 @@ def test_video_loader_consistency(
     with open(video_path, "rb") as f:
         video_bytes = f.read()
 
-    static_video, static_metadata = OpenCVVideoBackend.load_bytes(video_bytes)
-    dynamic_video, dynamic_metadata = OpenCVDynamicVideoBackend.load_bytes(
-        video_bytes, fps=fps
+    static_video, static_metadata = VideoBackend.load_bytes(
+        video_bytes, backend=backend
+    )
+    dynamic_video, dynamic_metadata = DynamicVideoBackend.load_bytes(
+        video_bytes, fps=fps, backend=backend
     )
 
     # pre-sampled loader shouldn't read all frames
diff --git a/tests/models/multimodal/processing/test_molmo2.py b/tests/models/multimodal/processing/test_molmo2.py
new file mode 100644
index 000000000000..c12f70c2765a
--- /dev/null
+++ b/tests/models/multimodal/processing/test_molmo2.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from types import SimpleNamespace
+
+import torch
+
+from vllm.model_executor.models.molmo2 import build_flat_image_bool_length
+
+
+def test_build_flat_image_bool_length_matches_molmoweb_processor_tokens():
+    hf_config = SimpleNamespace(
+        image_patch_id=151938,
+        low_res_image_start_token_id=151940,
+        image_start_token_id=151936,
+        image_col_id=151939,
+        image_end_token_id=151937,
+    )
+    image_grids = torch.tensor([[14, 14, 14, 23]], dtype=torch.long)
+
+    image_tokens, num_image_tokens = build_flat_image_bool_length(
+        image_grids,
+        hf_config,
+        image_use_col_tokens=True,
+        use_single_crop_col_tokens=None,
+        use_single_crop_start_token=False,
+    )
+
+    assert num_image_tokens.tolist() == [550]
+    assert len(image_tokens) == 550
+    assert image_tokens[0].item() == hf_config.image_start_token_id
+    assert (image_tokens == hf_config.image_col_id).sum().item() == 28
+
+
+def test_build_flat_image_bool_length_respects_disabled_col_tokens():
+    hf_config = SimpleNamespace(
+        image_patch_id=151938,
+        low_res_image_start_token_id=151940,
+        image_start_token_id=151936,
+        image_col_id=151939,
+        image_end_token_id=151937,
+    )
+    image_grids = torch.tensor([[2, 3, 5, 7]], dtype=torch.long)
+
+    image_tokens, num_image_tokens = build_flat_image_bool_length(
+        image_grids,
+        hf_config,
+        image_use_col_tokens=False,
+        use_single_crop_col_tokens=False,
+        use_single_crop_start_token=True,
+    )
+
+    assert num_image_tokens.tolist() == [45]
+    assert len(image_tokens) == 45
+    assert image_tokens[0].item() == hf_config.low_res_image_start_token_id
+    assert (image_tokens == hf_config.image_col_id).sum().item() == 0
diff --git a/tests/models/multimodal/processing/test_moondream3.py b/tests/models/multimodal/processing/test_moondream3.py
new file mode 100644
index 000000000000..a6284ae45fd1
--- /dev/null
+++ b/tests/models/multimodal/processing/test_moondream3.py
@@ -0,0 +1,553 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for Moondream3 multimodal processing.
+
+Includes:
+- Processor creation and application tests
+- Image tokenization and placeholder expansion tests
+- Tiling and cropping logic tests (CPU-based)
+- Pixel normalization tests
+"""
+
+import numpy as np
+import pytest
+import torch
+
+from vllm.multimodal import MULTIMODAL_REGISTRY
+
+from ....conftest import ImageTestAssets
+from ...utils import build_model_context
+
+MOONDREAM3_MODEL_ID = "moondream/moondream3-preview"
+# Expected multimodal prefix: BOS + 729 image tokens.
+EXPECTED_IMAGE_TOKENS = 730
+# Vision encoder constants
+CROP_SIZE = 378
+PATCH_SIZE = 14
+MAX_CROPS = 12
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_creation(model_id: str):
+    """Test that Moondream3 processor can be created."""
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    assert processor is not None
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_apply(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that Moondream3 processor can process inputs.
+
+    NOTE: The prompt includes the leading BOS token because Moondream3
+    pre-fills BOS and image embeddings together.
+    """
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What is this?<|md_reserved_2|>"  # noqa: E501
+    mm_data = {"image": [image_assets[0].pil_image]}
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data(mm_data),
+        hf_processor_mm_kwargs={},
+    )
+
+    assert "prompt_token_ids" in processed_inputs
+    image_placeholders = processed_inputs["mm_placeholders"]["image"]
+    assert len(image_placeholders) == 1
+    assert image_placeholders[0].length == EXPECTED_IMAGE_TOKENS
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_pixel_values(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that pixel values are correctly produced."""
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What is this?<|md_reserved_2|>"  # noqa: E501
+    mm_data = {"image": [image_assets[0].pil_image]}
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data(mm_data),
+        hf_processor_mm_kwargs={},
+    )
+
+    # Check mm_kwargs contains pixel_values
+    mm_kwargs = processed_inputs.get("mm_kwargs")
+    assert mm_kwargs is not None
+    mm_data_result = mm_kwargs.get_data()
+    assert "pixel_values" in mm_data_result
+
+    # Verify pixel_values shape
+    pixel_values = mm_data_result["pixel_values"]
+    assert pixel_values.dim() == 5  # [batch, num_crops, C, H, W]
+    assert pixel_values.shape[2] == 3  # RGB channels
+    assert pixel_values.shape[3] == 378  # crop height
+    assert pixel_values.shape[4] == 378  # crop width
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_image_token_expansion(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that <image> placeholder is expanded to correct number of tokens."""
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>Describe.<|md_reserved_2|>"  # noqa: E501
+    mm_data = {"image": [image_assets[0].pil_image]}
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data(mm_data),
+        hf_processor_mm_kwargs={},
+    )
+    image_placeholders = processed_inputs["mm_placeholders"]["image"]
+    assert len(image_placeholders) == 1
+    assert image_placeholders[0].length == EXPECTED_IMAGE_TOKENS
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_multi_crop_tiling(
+    model_id: str,
+):
+    """Test that large images produce correct multi-crop tiling."""
+    from PIL import Image
+
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    processor = Moondream3Processor.from_pretrained(model_id, trust_remote_code=True)
+
+    # Create a large image that requires multiple crops
+    large_image = Image.new("RGB", (1000, 1000), color="blue")
+    pixel_values, tiling = processor.preprocess_image(large_image)
+
+    # Large images should produce more than 1x1 tiling
+    assert tiling[0] >= 1 and tiling[1] >= 1
+    # Check that we have global crop + local crops
+    expected_crops = tiling[0] * tiling[1] + 1
+    assert pixel_values.shape[0] == expected_crops
+
+
+@pytest.mark.parametrize(
+    "image_size",
+    [
+        (500, 500),
+        (800, 600),
+        (1920, 1080),
+    ],
+)
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_tiling_various_sizes(
+    image_size: tuple[int, int],
+    model_id: str,
+):
+    """Test tiling with various image sizes."""
+    from PIL import Image
+
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    processor = Moondream3Processor.from_pretrained(model_id, trust_remote_code=True)
+
+    width, height = image_size
+    image = Image.new("RGB", (width, height), color="red")
+    pixel_values, tiling = processor.preprocess_image(image)
+
+    # Basic shape checks
+    assert pixel_values.dim() == 4  # [num_crops, C, H, W]
+    assert pixel_values.shape[1] == 3  # RGB
+    assert pixel_values.shape[2] == 378  # crop height
+    assert pixel_values.shape[3] == 378  # crop width
+
+    # Tiling should respect max_crops (12)
+    assert tiling[0] * tiling[1] <= 12
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_pixel_normalization(
+    model_id: str,
+):
+    """Test that pixel values are normalized to [-1, 1] range."""
+    from PIL import Image
+
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    processor = Moondream3Processor.from_pretrained(model_id, trust_remote_code=True)
+
+    # Create test image
+    image = Image.new("RGB", (378, 378), color="green")
+    pixel_values, _ = processor.preprocess_image(image)
+
+    # Normalization: (x - 0.5) / 0.5 = 2*x - 1
+    # For input [0, 1], output should be [-1, 1]
+    assert pixel_values.min() >= -1.0
+    assert pixel_values.max() <= 1.0
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_chat_template_with_image(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that chat template correctly formats BOS + image + prompt."""
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    tokenizer = ctx.tokenizer
+
+    # Use the chat template format
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What is this?<|md_reserved_2|>"  # noqa: E501
+    mm_data = {"image": [image_assets[0].pil_image]}
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data(mm_data),
+        hf_processor_mm_kwargs={},
+    )
+    token_ids = processed_inputs["prompt_token_ids"]
+
+    # BOS token (<|endoftext|>) should be token ID 0
+    bos_token_id = tokenizer.encode("<|endoftext|>", add_special_tokens=False)[0]
+    assert bos_token_id == 0
+
+    # First token should be BOS
+    assert token_ids[0] == bos_token_id
+
+
+@pytest.mark.parametrize(
+    "content",
+    [
+        pytest.param(
+            [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.invalid/image.png"},
+                },
+                {"type": "text", "text": "What is in this image?"},
+            ],
+            id="image-first",
+        ),
+        pytest.param(
+            [
+                {"type": "text", "text": "What is in this image?"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.invalid/image.png"},
+                },
+            ],
+            id="text-first",
+        ),
+    ],
+)
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_chat_template_content_list_uses_moondream_image_prefix(
+    image_assets: ImageTestAssets,
+    content: list[dict[str, object]],
+    model_id: str,
+):
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+    hf_processor = processor.info.get_hf_processor()
+
+    prompt = hf_processor.tokenizer.apply_chat_template(
+        [{"role": "user", "content": content}],
+        chat_template=hf_processor.chat_template,
+        tokenize=False,
+    )
+
+    expected_prompt = (
+        "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>"
+        "What is in this image?<|md_reserved_2|>"
+    )
+    assert prompt == expected_prompt
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data({"image": [image_assets[0].pil_image]}),
+        hf_processor_mm_kwargs={},
+    )
+    image_placeholders = processed_inputs["mm_placeholders"]["image"]
+    assert len(image_placeholders) == 1
+    assert image_placeholders[0].length == EXPECTED_IMAGE_TOKENS
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_bos_token_always_first(
+    image_assets: ImageTestAssets,
+    model_id: str,
+):
+    """Test that BOS token (ID 0) is always at position 0."""
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    # Start with BOS token explicitly
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>Describe this image.<|md_reserved_2|>"  # noqa: E501
+    mm_data = {"image": [image_assets[0].pil_image]}
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data(mm_data),
+        hf_processor_mm_kwargs={},
+    )
+    token_ids = processed_inputs["prompt_token_ids"]
+
+    # Token ID 0 (<|endoftext|>) should be the first token
+    assert token_ids[0] == 0, (
+        f"Expected BOS token (0) at position 0, got {token_ids[0]}"
+    )
+
+
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_with_small_image(
+    model_id: str,
+):
+    """Test processor with image smaller than crop size."""
+    from PIL import Image
+
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    processor = Moondream3Processor.from_pretrained(model_id, trust_remote_code=True)
+
+    # Small image (smaller than crop size)
+    small_image = Image.new("RGB", (100, 100), color="yellow")
+    pixel_values, tiling = processor.preprocess_image(small_image)
+
+    # Small images should use 1x1 tiling
+    assert tiling == (1, 1)
+    # Should have 2 crops (global + 1 local)
+    assert pixel_values.shape[0] == 2
+
+
+@pytest.mark.parametrize(
+    "image_kind",
+    [
+        pytest.param("numpy_hwc", id="numpy-hwc"),
+        pytest.param("numpy_chw", id="numpy-chw"),
+        pytest.param("torch_chw", id="torch-chw"),
+    ],
+)
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_preprocess_image_accepts_non_pil_inputs(
+    image_assets: ImageTestAssets,
+    image_kind: str,
+    model_id: str,
+):
+    from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+    processor = Moondream3Processor.from_pretrained(model_id, trust_remote_code=True)
+    pil_image = image_assets[0].pil_image.convert("RGB")
+    hwc_array = np.asarray(pil_image)
+    expected_pixel_values, expected_tiling = processor.preprocess_image(pil_image)
+
+    if image_kind == "numpy_hwc":
+        image = hwc_array
+    elif image_kind == "numpy_chw":
+        image = np.transpose(hwc_array, (2, 0, 1))
+    else:
+        image = torch.from_numpy(np.transpose(hwc_array, (2, 0, 1)).copy())
+
+    pixel_values, tiling = processor.preprocess_image(image)
+
+    assert tiling == expected_tiling
+    assert pixel_values.shape == expected_pixel_values.shape
+    assert pixel_values.dtype == torch.bfloat16
+    assert torch.equal(pixel_values, expected_pixel_values)
+
+
+@pytest.mark.parametrize("image_kind", ["numpy_chw", "torch_chw"])
+@pytest.mark.parametrize("model_id", [MOONDREAM3_MODEL_ID])
+def test_processor_apply_accepts_non_pil_image_inputs(
+    image_assets: ImageTestAssets,
+    image_kind: str,
+    model_id: str,
+):
+    ctx = build_model_context(
+        model_id,
+        limit_mm_per_prompt={"image": 1},
+    )
+    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
+
+    prompt = "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>What is this?<|md_reserved_2|>"  # noqa: E501
+    hwc_array = np.asarray(image_assets[0].pil_image.convert("RGB"))
+    chw_array = np.transpose(hwc_array, (2, 0, 1)).copy()
+    image = chw_array if image_kind == "numpy_chw" else torch.from_numpy(chw_array)
+
+    processed_inputs = processor(
+        prompt,
+        mm_items=processor.info.parse_mm_data({"image": [image]}),
+        hf_processor_mm_kwargs={},
+    )
+
+    image_placeholders = processed_inputs["mm_placeholders"]["image"]
+    assert len(image_placeholders) == 1
+    assert image_placeholders[0].length == EXPECTED_IMAGE_TOKENS
+
+    mm_kwargs = processed_inputs["mm_kwargs"].get_data()
+    assert mm_kwargs["pixel_values"].shape[2:] == (3, 378, 378)
+
+
+class TestMoondream3TilingLogic:
+    """CPU-based tests for Moondream3 tiling selection logic.
+
+    These tests validate the select_tiling() function which determines
+    how images are divided into crops for the vision encoder.
+    """
+
+    def test_small_image_no_tiling(self):
+        """Small images should use 1x1 tiling."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=300, width=300, crop_size=CROP_SIZE, max_crops=MAX_CROPS
+        )
+        assert tiling == (1, 1)
+
+    def test_exact_crop_size(self):
+        """Image exactly at crop size should use 1x1."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=CROP_SIZE, width=CROP_SIZE, crop_size=CROP_SIZE, max_crops=MAX_CROPS
+        )
+        assert tiling == (1, 1)
+
+    def test_large_square_image(self):
+        """Large square image should use multiple tiles."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=800, width=800, crop_size=CROP_SIZE, max_crops=MAX_CROPS
+        )
+        h_tiles, w_tiles = tiling
+        assert h_tiles >= 2
+        assert w_tiles >= 2
+        assert h_tiles * w_tiles <= MAX_CROPS
+
+    def test_wide_image(self):
+        """Wide image should have more width tiles."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=400, width=1200, crop_size=CROP_SIZE, max_crops=MAX_CROPS
+        )
+        h_tiles, w_tiles = tiling
+        assert w_tiles >= h_tiles
+
+    def test_tall_image(self):
+        """Tall image should have more height tiles."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=1200, width=400, crop_size=CROP_SIZE, max_crops=MAX_CROPS
+        )
+        h_tiles, w_tiles = tiling
+        assert h_tiles >= w_tiles
+
+    def test_respects_max_crops(self):
+        """Tiling should not exceed max_crops."""
+        from vllm.transformers_utils.processors.moondream3 import select_tiling
+
+        tiling = select_tiling(
+            height=2000, width=2000, crop_size=CROP_SIZE, max_crops=4
+        )
+        h_tiles, w_tiles = tiling
+        assert h_tiles * w_tiles <= 4
+
+
+class TestMoondream3VisionShapes:
+    """CPU-based tests for vision encoder expected shapes.
+
+    These tests verify the mathematical relationships between
+    crop size, patch size, and token counts.
+    """
+
+    def test_expected_patch_count(self):
+        """Test 378/14 = 27 patches per side, 729 total."""
+        patches_per_side = CROP_SIZE // PATCH_SIZE
+        total_patches = patches_per_side**2
+
+        assert patches_per_side == 27
+        assert total_patches == EXPECTED_IMAGE_TOKENS - 1
+
+    def test_patch_embedding_input_dim(self):
+        """Test patch embedding input dimension."""
+        channels = 3
+        input_dim = PATCH_SIZE * PATCH_SIZE * channels
+
+        assert input_dim == 14 * 14 * 3
+        assert input_dim == 588
+
+
+class TestMoondream3TauAttention:
+    """CPU-based tests for tau attention scaling components.
+
+    These tests validate the tau attention formula used in Moondream3:
+    - Token-based: tok_q = tanh(gelu(qkv) @ tau_wq.T)
+    - Position-based: tau_pos = 1 + (sigmoid(alpha * log(pos+1)) - 0.5)
+    """
+
+    def test_tau_position_range(self):
+        """Test tau position scaling produces values in valid range."""
+        num_heads = 32
+        seq_len = 100
+
+        tau_alpha = torch.randn(num_heads)
+        positions = torch.arange(seq_len)
+
+        pos_float = (positions.float() + 1.0).clamp(min=1e-6)
+        pos_log = pos_float.log()
+        tau_pos = 1.0 + (torch.sigmoid(tau_alpha[:, None] * pos_log[None, :]) - 0.5)
+
+        assert tau_pos.shape == (num_heads, seq_len)
+        # tau_pos should be between 0.5 and 1.5
+        assert tau_pos.min() >= 0.5
+        assert tau_pos.max() <= 1.5
+
+    def test_tau_token_output_range(self):
+        """Test tau token scaling output is bounded by tanh."""
+        import torch.nn.functional as F
+
+        seq_len = 100
+        qkv_dim = 6144  # 2048 * 3
+        num_heads = 32
+
+        qkv = torch.randn(seq_len, qkv_dim)
+        tau_wq = torch.randn(num_heads, qkv_dim)
+
+        tok_feat = F.gelu(qkv)
+        tok_q = torch.tanh(tok_feat @ tau_wq.t())
+
+        assert tok_q.shape == (seq_len, num_heads)
+        # tanh output is bounded by [-1, 1]
+        assert tok_q.min() >= -1.0
+        assert tok_q.max() <= 1.0
diff --git a/tests/models/multimodal/processing/test_musicflamingo.py b/tests/models/multimodal/processing/test_musicflamingo.py
index 625e1ad8d29b..ba14b7760299 100644
--- a/tests/models/multimodal/processing/test_musicflamingo.py
+++ b/tests/models/multimodal/processing/test_musicflamingo.py
@@ -17,11 +17,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from importlib.metadata import version
 from unittest.mock import MagicMock
 
 import numpy as np
 import pytest
 import torch
+from packaging.version import Version
 from transformers import PretrainedConfig
 
 from tests.models.registry import HF_EXAMPLE_MODELS
@@ -122,6 +124,11 @@ def test_musicflamingo_dummy_text_uses_plain_audio_tokens(mock_ctx):
     assert builder.get_dummy_text({"audio": 2}) == "<sound><sound>"
 
 
+@pytest.mark.skipif(
+    Version(version("transformers")) >= Version("5.5"),
+    reason="transformers v5.5 added native MusicFlamingoForConditionalGeneration "
+    "with a different get_audio_features signature (requires input_ids)",
+)
 def test_musicflamingo_audio_feature_pipeline_matches_hf_small_config():
     from transformers.models.musicflamingo import (
         modeling_musicflamingo as hf_musicflamingo_modeling,
diff --git a/tests/models/multimodal/processing/test_openvla.py b/tests/models/multimodal/processing/test_openvla.py
new file mode 100644
index 000000000000..b9ed02000d9a
--- /dev/null
+++ b/tests/models/multimodal/processing/test_openvla.py
@@ -0,0 +1,210 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for OpenVLA multimodal preprocessing."""
+
+import numpy as np
+import pytest
+import torch
+from PIL import Image
+from transformers import LlamaConfig
+
+from vllm.model_executor.models.openvla import (
+    OpenVLAForActionPrediction,
+    OpenVLAMultiModalProcessor,
+    OpenVLAProcessingInfo,
+)
+from vllm.multimodal.parse import ImageProcessorItems, MultiModalDataItems
+from vllm.transformers_utils.configs.openvla import OpenVLAConfig
+from vllm.transformers_utils.processors.openvla import (
+    IMAGENET_MEAN,
+    IMAGENET_STD,
+    SIGLIP_MEAN,
+    SIGLIP_STD,
+    OpenVLAImageProcessor,
+    OpenVLAProcessor,
+    preprocess_openvla_image,
+    to_rgb_image,
+)
+
+pytestmark = pytest.mark.cpu_test
+
+
+class _FakeTokenizer:
+    bos_token_id = 1
+    init_kwargs: dict[str, object] = {}
+
+    def encode(self, prompt: str, **kwargs: object) -> list[int]:
+        assert prompt == "In: test\nOut:"
+        if kwargs == {"add_special_tokens": True}:
+            return [self.bos_token_id, 10, 11]
+        assert kwargs == {"add_special_tokens": False}
+        return [10, 11]
+
+    def __call__(self, text: str, **kwargs: object) -> dict[str, list[list[int]]]:
+        return {"input_ids": [self.encode(text, **kwargs)]}
+
+
+class _FakeProcessingInfo:
+    def __init__(self) -> None:
+        self.config = OpenVLAConfig()
+
+    def get_hf_config(self) -> OpenVLAConfig:
+        return self.config
+
+    def get_tokenizer(self) -> _FakeTokenizer:
+        return _FakeTokenizer()
+
+    def get_num_image_tokens(self, *, image_width: int, image_height: int) -> int:
+        assert image_width > 0
+        assert image_height > 0
+        return 256
+
+
+class _FakeOpenVLAProcessingInfo(OpenVLAProcessingInfo):
+    def get_hf_config(self) -> OpenVLAConfig:
+        return OpenVLAConfig()
+
+
+def _make_processor() -> OpenVLAMultiModalProcessor:
+    processor = OpenVLAMultiModalProcessor.__new__(OpenVLAMultiModalProcessor)
+    processor.info = _FakeProcessingInfo()
+    return processor
+
+
+def test_openvla_config_converts_text_config_dict() -> None:
+    config = OpenVLAConfig(
+        text_config={
+            "vocab_size": 123,
+            "hidden_size": 64,
+            "intermediate_size": 128,
+            "num_hidden_layers": 2,
+            "num_attention_heads": 4,
+        },
+    )
+
+    assert isinstance(config.text_config, LlamaConfig)
+    assert config.text_config.vocab_size == 123
+    assert config.text_config.hidden_size == 64
+    assert config.text_config.architectures == ["LlamaForCausalLM"]
+
+
+@pytest.mark.parametrize(
+    ("image", "expected_size", "expected_pixel"),
+    [
+        (
+            torch.tensor(
+                [
+                    [[1.0, 1.0], [1.0, 1.0]],
+                    [[0.0, 0.0], [0.0, 0.0]],
+                    [[0.0, 0.0], [0.0, 0.0]],
+                ]
+            ),
+            (2, 2),
+            (255, 0, 0),
+        ),
+        (
+            np.full((4, 5, 1), 128, dtype=np.uint8),
+            (5, 4),
+            (128, 128, 128),
+        ),
+    ],
+)
+def test_openvla_to_rgb_image(
+    image: torch.Tensor | np.ndarray,
+    expected_size: tuple[int, int],
+    expected_pixel: tuple[int, int, int],
+) -> None:
+    rgb_image = to_rgb_image(image)
+
+    assert rgb_image.mode == "RGB"
+    assert rgb_image.size == expected_size
+    assert rgb_image.getpixel((0, 0)) == expected_pixel
+
+
+def test_openvla_preprocess_image_matches_expected_normalization() -> None:
+    image = Image.fromarray(
+        np.arange(12 * 10 * 3, dtype=np.uint8).reshape(10, 12, 3),
+        mode="RGB",
+    )
+
+    pixel_values = preprocess_openvla_image(image, image_size=224)
+
+    resized = image.resize((224, 224), Image.Resampling.BICUBIC)
+    raw = np.asarray(resized, dtype=np.float32) / 255.0
+    expected_dinov2 = ((raw - IMAGENET_MEAN) / IMAGENET_STD).transpose(2, 0, 1)
+    expected_siglip = ((raw - SIGLIP_MEAN) / SIGLIP_STD).transpose(2, 0, 1)
+    expected = np.concatenate([expected_dinov2, expected_siglip], axis=0)
+
+    assert pixel_values.shape == (6, 224, 224)
+    assert pixel_values.dtype == torch.float32
+    torch.testing.assert_close(pixel_values, torch.from_numpy(expected))
+
+
+def test_openvla_processor_outputs_pixel_values() -> None:
+    processor = OpenVLAProcessor(
+        image_processor=OpenVLAImageProcessor(image_size=224),
+        tokenizer=_FakeTokenizer(),
+    )
+    image = Image.new("RGB", (8, 8), color=(255, 0, 0))
+
+    batch = processor(
+        text="In: test\nOut:",
+        images=image,
+        text_kwargs={"add_special_tokens": True},
+    )
+
+    assert batch["input_ids"] == [[1, 10, 11]]
+    assert batch["pixel_values"].shape == (1, 6, 224, 224)
+    assert batch["pixel_values"].dtype == torch.float32
+
+
+def test_openvla_image_processor_outputs_pixel_values() -> None:
+    processor = OpenVLAImageProcessor(image_size=224)
+    image = Image.new("RGB", (8, 8), color=(255, 0, 0))
+
+    output = processor(images=image)
+
+    assert output["pixel_values"].shape == (1, 6, 224, 224)
+    assert output["pixel_values"].dtype == torch.float32
+
+
+def test_openvla_processing_info_token_counts() -> None:
+    info = _FakeOpenVLAProcessingInfo.__new__(_FakeOpenVLAProcessingInfo)
+
+    assert info.get_supported_mm_limits() == {"image": 1}
+    assert info.get_num_image_tokens(image_width=640, image_height=480) == 256
+    assert info.get_image_size_with_most_features().width == 224
+    assert info.get_image_size_with_most_features().height == 224
+    assert info.get_mm_max_tokens_per_item(seq_len=2048, mm_counts={"image": 1}) == {
+        "image": 256
+    }
+
+
+def test_openvla_prompt_update_inserts_image_tokens_after_bos() -> None:
+    processor = _make_processor()
+    image = Image.new("RGB", (640, 480), color=(255, 255, 255))
+    mm_items = MultiModalDataItems({"image": ImageProcessorItems([image])})
+
+    assert (
+        processor._hf_processor_applies_updates("In: test\nOut:", mm_items, {}, {})
+        is False
+    )
+
+    prompt_update = processor._get_prompt_updates(mm_items, {}, {})[0]
+    resolved = prompt_update.resolve(0)
+    content = resolved.content
+
+    assert resolved.modality == "image"
+    assert [
+        (match.start_idx, match.end_idx)
+        for match in resolved.iter_matches([1, 10, 11], _FakeTokenizer())
+    ] == [(1, 1)]
+    assert content.full == [32000] * 256
+
+    is_embed = content.is_embed(None, content.full)
+    assert is_embed.dtype == torch.bool
+    assert is_embed.tolist() == [True] * 256
+
+
+def test_openvla_placeholder_string() -> None:
+    assert OpenVLAForActionPrediction.get_placeholder_str("image", 0) is None
diff --git a/tests/models/multimodal/processing/test_step3_vl_image_embeds.py b/tests/models/multimodal/processing/test_step3_vl_image_embeds.py
new file mode 100644
index 000000000000..a43c93c0741e
--- /dev/null
+++ b/tests/models/multimodal/processing/test_step3_vl_image_embeds.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for Step3-VL precomputed image embedding inputs."""
+
+import pytest
+import torch
+
+from vllm.model_executor.models.step3_vl import (
+    Step3VLForConditionalGeneration,
+    Step3VLImageEmbeddingInputs,
+)
+
+
+class _FakeStep3VL:
+    @staticmethod
+    def _process_image_features(image_features: torch.Tensor) -> torch.Tensor:
+        return image_features
+
+
+def test_image_embedding_inputs_construction():
+    """Step3VLImageEmbeddingInputs should store embeddings in the data field."""
+    image_embeds = torch.randn(2, 16, 64)
+
+    inputs = Step3VLImageEmbeddingInputs(
+        type="image_embeds",
+        data=image_embeds,
+    )
+
+    assert inputs["type"] == "image_embeds"
+    assert torch.equal(inputs["data"], image_embeds)
+    assert torch.equal(inputs.data, image_embeds)
+
+
+def test_image_embedding_inputs_validation_rejects_wrong_rank():
+    """Validation should reject tensors with wrong rank."""
+    with pytest.raises(ValueError, match="rank"):
+        Step3VLImageEmbeddingInputs(
+            type="image_embeds",
+            data=torch.randn(16, 64),
+        )
+
+
+def test_process_image_embeds_does_not_require_pixel_input_fields():
+    """The image_embeds branch should not reference patch pixel metadata."""
+    image_embeds = torch.randn(2, 4, 8)
+    image_input = Step3VLImageEmbeddingInputs(
+        type="image_embeds",
+        data=image_embeds,
+    )
+
+    outputs = Step3VLForConditionalGeneration._process_image_input(
+        _FakeStep3VL(),
+        image_input,
+    )
+
+    assert len(outputs) == 2
+    assert torch.equal(outputs[0], image_embeds[0])
+    assert torch.equal(outputs[1], image_embeds[1])
diff --git a/tests/models/multimodal/test_nano_nemotron_vl.py b/tests/models/multimodal/test_nano_nemotron_vl.py
new file mode 100644
index 000000000000..aa93ee31168d
--- /dev/null
+++ b/tests/models/multimodal/test_nano_nemotron_vl.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.model_executor.models.nano_nemotron_vl import NemotronH_Nano_VL_V2
+
+
+class _TextOnlyMultiModalConfig:
+    def get_limit_per_prompt(self, modality: str) -> int:
+        return 0
+
+
+class _ImageOnlyMultiModalConfig:
+    def get_limit_per_prompt(self, modality: str) -> int:
+        return 1 if modality == "image" else 0
+
+
+class _ModelConfig:
+    multimodal_config = _TextOnlyMultiModalConfig()
+
+
+class _ImageOnlyModelConfig:
+    multimodal_config = _ImageOnlyMultiModalConfig()
+
+
+class _LanguageModel:
+    def __init__(self) -> None:
+        self.loaded_weights: list[tuple[str, object]] = []
+
+    def load_weights(self, weights):
+        self.loaded_weights = list(weights)
+
+
+class _MissingMultiModalModule:
+    def named_parameters(self):
+        raise AssertionError("multimodal weights should not be inspected")
+
+    def load_weights(self, weights):
+        raise AssertionError("multimodal weights should not be loaded")
+
+
+class _AdapterModule:
+    def named_parameters(self):
+        return []
+
+
+class _VisionModel:
+    def __init__(self) -> None:
+        self.loaded_weights: list[tuple[str, object]] = []
+
+    def load_weights(self, weights):
+        self.loaded_weights = list(weights)
+
+
+class _FakeTensor:
+    """Sentinel stand-in for torch.Tensor in load_weights tests. Supports the
+    .detach().clone() chain used by load_weights for buffered mm weights;
+    both methods return self so identity (and the existing equality
+    assertions) are preserved through cloning."""
+
+    def detach(self):
+        return self
+
+    def clone(self):
+        return self
+
+
+def test_nano_nemotron_vl_skips_multimodal_weights_in_text_only_mode():
+    model = object.__new__(NemotronH_Nano_VL_V2)
+    language_model = _LanguageModel()
+    object.__setattr__(model, "model_config", _ModelConfig())
+    object.__setattr__(model, "language_model", language_model)
+    object.__setattr__(model, "mlp1", _AdapterModule())
+    object.__setattr__(model, "vision_model", _MissingMultiModalModule())
+    object.__setattr__(model, "sound_encoder", None)
+
+    language_weight = object()
+    model.load_weights(
+        [
+            ("language_model.layers.0.weight", language_weight),
+            ("mlp1.0.weight", object()),
+            ("vision_model.radio_model.encoder.weight", object()),
+            ("sound_encoder.encoder.weight", object()),
+        ]
+    )
+
+    assert language_model.loaded_weights == [("layers.0.weight", language_weight)]
+
+
+def test_nano_nemotron_vl_loads_vision_weights_without_sound_encoder():
+    model = object.__new__(NemotronH_Nano_VL_V2)
+    language_model = _LanguageModel()
+    vision_model = _VisionModel()
+    object.__setattr__(model, "model_config", _ImageOnlyModelConfig())
+    object.__setattr__(model, "language_model", language_model)
+    object.__setattr__(model, "mlp1", _AdapterModule())
+    object.__setattr__(model, "vision_model", vision_model)
+    object.__setattr__(model, "sound_encoder", None)
+
+    language_weight = object()
+    vision_weight = _FakeTensor()
+    model.load_weights(
+        [
+            ("language_model.layers.0.weight", language_weight),
+            ("vision_model.radio_model.encoder.weight", vision_weight),
+        ]
+    )
+
+    assert language_model.loaded_weights == [("layers.0.weight", language_weight)]
+    assert vision_model.loaded_weights == [
+        ("radio_model.encoder.weight", vision_weight)
+    ]
+
+
+def test_nano_nemotron_vl_requires_sound_encoder_for_sound_weights():
+    model = object.__new__(NemotronH_Nano_VL_V2)
+    language_model = _LanguageModel()
+    vision_model = _VisionModel()
+    object.__setattr__(model, "model_config", _ImageOnlyModelConfig())
+    object.__setattr__(model, "language_model", language_model)
+    object.__setattr__(model, "mlp1", _AdapterModule())
+    object.__setattr__(model, "vision_model", vision_model)
+    object.__setattr__(model, "sound_encoder", None)
+
+    with pytest.raises(AssertionError):
+        model.load_weights([("sound_encoder.encoder.weight", object())])
diff --git a/tests/models/quantization/test_awq.py b/tests/models/quantization/test_awq.py
index 6b34262d3e9e..25a63f6bd907 100644
--- a/tests/models/quantization/test_awq.py
+++ b/tests/models/quantization/test_awq.py
@@ -93,6 +93,37 @@ def run_awq_test(
         )
 
 
+@pytest.mark.parametrize(
+    ("model", "quantization", "dtype"),
+    [
+        ("mattbucci/gemma-4-26B-AWQ", "awq", "float16"),
+        ("cyankiwi/gemma-4-26B-A4B-it-AWQ-4bit", "compressed-tensors", "bfloat16"),
+    ],
+    ids=[
+        "gemma4-moe-standard-awq-dot-suffix",
+        "gemma4-moe-compressed-tensors-underscore-suffix",
+    ],
+)
+@torch.inference_mode()
+def test_awq_load(
+    vllm_runner: type[VllmRunner],
+    example_prompts: list[str],
+    model: str,
+    quantization: str,
+    dtype: str,
+) -> None:
+    """Regression test: AWQ weight loading must not KeyError."""
+    with vllm_runner(
+        model,
+        quantization=quantization,
+        dtype=dtype,
+        max_model_len=128,
+        enforce_eager=True,
+    ) as vllm_model:
+        outputs = vllm_model.generate_greedy(example_prompts[:2], max_tokens=32)
+    assert len(outputs) == 2
+
+
 @pytest.mark.parametrize(
     ("source_model", "quant_model"),
     [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")],
diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index de4f19aff5c8..d6f2b86c7af3 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -137,7 +137,11 @@ def test_load_pp_4bit_bnb_model(model_name, description) -> None:
         "--pipeline-parallel-size",
         "2",
     ]
-    compare_two_settings(model_name, common_args, pp_args)
+    compare_two_settings(
+        model_name,
+        common_args,
+        pp_args,
+    )
 
 
 @pytest.mark.skipif(
diff --git a/tests/models/quantization/test_fp8.py b/tests/models/quantization/test_fp8.py
index 9be5fd33022f..5f3c65476127 100644
--- a/tests/models/quantization/test_fp8.py
+++ b/tests/models/quantization/test_fp8.py
@@ -9,8 +9,8 @@
 import pytest
 
 from tests.quantization.utils import is_quant_method_supported
-from vllm.v1.attention.backends.fa_utils import flash_attn_supports_fp8
 from vllm.platforms import current_platform
+from vllm.v1.attention.backends.fa_utils import get_flash_attn_version
 from ..utils import check_logprobs_close
 
 
@@ -68,7 +68,13 @@ def test_models(
     if kv_cache_dtype == "fp8_e5m2" and current_platform.is_rocm():
         pytest.skip(f"{kv_cache_dtype} is currently not supported on ROCm/HIP.")
 
-    if not flash_attn_supports_fp8():
+    if not (
+        current_platform.is_xpu()
+        or (
+            get_flash_attn_version() == 3
+            and current_platform.is_device_capability_family(90)
+        )
+    ):
         pytest.skip(
             f"{kv_cache_dtype} is not supported on this GPU type with {backend} attention."
         )
diff --git a/tests/models/quantization/test_gpt_oss.py b/tests/models/quantization/test_gpt_oss.py
index 21cc9555bfde..fe9ddd2f6bae 100644
--- a/tests/models/quantization/test_gpt_oss.py
+++ b/tests/models/quantization/test_gpt_oss.py
@@ -21,8 +21,8 @@
 import pytest
 from packaging import version
 
+from vllm.platforms import current_platform
 from vllm.platforms.rocm import on_gfx950
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 MODEL_ACCURACIES = {
     # Full quantization: attention linears and MoE linears
@@ -89,7 +89,7 @@ def test_gpt_oss_attention_quantization(
     expected_accuracy: float,
     monkeypatch: pytest.MonkeyPatch,
 ):
-    if tp_size > cuda_device_count_stateless():
+    if tp_size > current_platform.device_count():
         pytest.skip("Not enough GPUs to run this test case")
 
     if "amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8" in model_name and on_gfx950():
diff --git a/tests/models/quantization/test_gptq_marlin.py b/tests/models/quantization/test_gptq_marlin.py
index cf52ae39214d..de97ed95624c 100644
--- a/tests/models/quantization/test_gptq_marlin.py
+++ b/tests/models/quantization/test_gptq_marlin.py
@@ -1,10 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Compares the outputs of gptq vs gptq_marlin.
+"""Tests AutoGPTQ (GPTQ with Marlin kernels) output correctness.
 
-Note: GPTQ and Marlin do not have bitwise correctness.
-As a result, in this test, we just confirm that the top selected tokens of the
-Marlin/GPTQ models are in the top 5 selections of each other.
 Note: Marlin internally uses locks to synchronize the threads. This can
 result in very slight nondeterminism for Marlin. As a result, we re-run the test
 up to 3 times to see if we pass.
@@ -36,10 +33,10 @@
 
 @pytest.mark.flaky(reruns=3)
 @pytest.mark.skipif(
-    not is_quant_method_supported("gptq_marlin")
+    not is_quant_method_supported("auto_gptq")
     or current_platform.is_rocm()
     or not current_platform.is_cuda(),
-    reason="gptq_marlin is not supported on this GPU type.",
+    reason="auto_gptq is not supported on this GPU type.",
 )
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half", "bfloat16"])
diff --git a/tests/models/quantization/test_mxfp8.py b/tests/models/quantization/test_mxfp8.py
index 2cb0f2008878..7c250d11576e 100644
--- a/tests/models/quantization/test_mxfp8.py
+++ b/tests/models/quantization/test_mxfp8.py
@@ -23,7 +23,7 @@
 from ..utils import check_logprobs_close
 
 # A small MoE model that fits on a single GPU and has both linear + MoE layers.
-MOE_MODEL = "Qwen/Qwen3-30B-A3B"
+MOE_MODEL = "allenai/OLMoE-1B-7B-0125-Instruct"
 # A small dense model (no MoE) to validate the linear-only path.
 DENSE_MODEL = "Qwen/Qwen3-0.6B"
 
diff --git a/tests/models/quantization/test_nvfp4.py b/tests/models/quantization/test_nvfp4.py
index b73462bfd198..5ca307a4b191 100644
--- a/tests/models/quantization/test_nvfp4.py
+++ b/tests/models/quantization/test_nvfp4.py
@@ -89,23 +89,56 @@ def test_models(example_prompts, model_name) -> None:
 
 EAGER = [True, False]
 
+SM_100_NVFP4_BACKENDS = [
+    "flashinfer_cudnn",
+    "flashinfer_trtllm",
+    "flashinfer_cutlass",
+]
+
 
-@pytest.mark.skipif(
-    not current_platform.has_device_capability(100),
-    reason="modelopt_fp4 is not supported on this GPU type.",
-)
 @pytest.mark.parametrize("model", ["nvidia/Llama-3.1-8B-Instruct-NVFP4"])
 @pytest.mark.parametrize("eager", EAGER)
 @pytest.mark.parametrize(
     "backend",
     [
-        "flashinfer-cudnn",
-        "flashinfer-trtllm",  # the small seq_len ensures trtllm_8x4_layout backend is used
-        "flashinfer-cutlass",
+        "emulation",
+        "flashinfer_cudnn",
+        "flashinfer_trtllm",  # the small seq_len ensures trtllm_8x4_layout backend is used
+        "flashinfer_cutlass",
     ],
 )
-def test_nvfp4(vllm_runner, model, eager, backend, monkeypatch):
-    monkeypatch.setenv("VLLM_NVFP4_GEMM_BACKEND", backend)
-    with vllm_runner(model, enforce_eager=eager) as llm:
+def test_nvfp4(vllm_runner, model, eager, backend):
+    if (
+        not current_platform.has_device_capability(100)
+        and backend in SM_100_NVFP4_BACKENDS
+    ):
+        pytest.skip(
+            f"The backend {backend} is not supported with current_platform.has_device_capability(100) == False"
+        )
+
+    with vllm_runner(model, enforce_eager=eager, linear_backend=backend) as llm:
         output = llm.generate_greedy(["1 2 3 4 5"], max_tokens=2)
     assert output[0][1] == "1 2 3 4 5 6"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "nvidia/Qwen3-30B-A3B-NVFP4",
+        "RedHatAI/Qwen3-30B-A3B-NVFP4",
+    ],
+)
+@pytest.mark.parametrize("backend", ["emulation"])
+@pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="NVFP4 MOE emulation is only useful on AMD Instinct MI3xx",
+)
+def test_nvfp4_moe(vllm_runner, model, backend, monkeypatch):
+    monkeypatch.setenv("VLLM_NVFP4_GEMM_BACKEND", backend)
+    with vllm_runner(
+        model,
+        moe_backend=backend,
+        load_format="dummy",
+        hf_overrides={"num_hidden_layers": 2},
+    ) as llm:
+        _ = llm.generate_greedy(["1 2 3 4 5"], max_tokens=2)
diff --git a/tests/models/quantization/test_per_token_kv_cache.py b/tests/models/quantization/test_per_token_kv_cache.py
new file mode 100644
index 000000000000..c581f01eb925
--- /dev/null
+++ b/tests/models/quantization/test_per_token_kv_cache.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""End-to-end accuracy tests for per-token-head KV cache quantization.
+
+Compares logprobs between a baseline bf16 model and the same model with
+per-token-head quantized KV cache (int8 or fp8) using the Triton attention
+backend.
+
+Run: pytest tests/models/quantization/test_per_token_kv_cache.py -v -s
+"""
+
+import pytest
+
+from vllm.platforms import current_platform
+
+from ..utils import check_logprobs_close
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(),
+    reason="Per-token-head KV cache requires CUDA or ROCm GPU.",
+)
+@pytest.mark.parametrize(
+    "base_model,test_model",
+    [
+        (
+            "meta-llama/Llama-3.2-1B-Instruct",
+            "meta-llama/Llama-3.2-1B-Instruct",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "kv_cache_dtype", ["int8_per_token_head", "fp8_per_token_head"]
+)
+@pytest.mark.parametrize("max_tokens", [4])
+@pytest.mark.parametrize("enforce_eager", [True])
+@pytest.mark.parametrize("backend", ["TRITON_ATTN"])
+@pytest.mark.parametrize("tensor_parallel_size", [1])
+def test_per_token_head_kv_cache_accuracy(
+    vllm_runner,
+    example_prompts,
+    base_model: str,
+    test_model: str,
+    kv_cache_dtype: str,
+    max_tokens: int,
+    enforce_eager: bool,
+    backend: str,
+    tensor_parallel_size: int,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Compare logprobs between bf16 baseline and per-token-head quantized KV
+    cache.
+
+    Uses calculate_kv_scales (dynamic scale computation) since there are
+    no per-token-head calibrated checkpoints available yet.
+    """
+    with monkeypatch.context() as m:
+        m.setenv("TOKENIZERS_PARALLELISM", "true")
+
+        MAX_MODEL_LEN = 1024
+        NUM_LOG_PROBS = 8
+
+        with vllm_runner(
+            base_model,
+            max_model_len=MAX_MODEL_LEN,
+            tensor_parallel_size=tensor_parallel_size,
+            enforce_eager=enforce_eager,
+            kv_cache_dtype="auto",
+            attention_config={"backend": backend},
+        ) as vllm_model:
+            baseline_outputs = vllm_model.generate_greedy_logprobs(
+                example_prompts, max_tokens, NUM_LOG_PROBS
+            )
+
+        with vllm_runner(
+            test_model,
+            max_model_len=MAX_MODEL_LEN,
+            tensor_parallel_size=tensor_parallel_size,
+            enforce_eager=enforce_eager,
+            kv_cache_dtype=kv_cache_dtype,
+            calculate_kv_scales=True,
+            attention_config={"backend": backend},
+        ) as vllm_model:
+            test_outputs = vllm_model.generate_greedy_logprobs(
+                example_prompts, max_tokens, NUM_LOG_PROBS
+            )
+
+        check_logprobs_close(
+            outputs_0_lst=baseline_outputs,
+            outputs_1_lst=test_outputs,
+            name_0="bf16_kv_cache",
+            name_1=f"{kv_cache_dtype}_kv_cache",
+        )
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 4e13b49e4ea0..a6bd6a310e67 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -7,6 +7,7 @@
 
 import pytest
 from packaging.version import Version
+from transformers import PretrainedConfig
 from transformers import __version__ as TRANSFORMERS_VERSION
 
 from vllm.config.model import ModelDType, TokenizerMode
@@ -237,6 +238,11 @@ def check_available_online(
         "CohereLabs/c4ai-command-r7b-12-2024",
         trust_remote_code=True,
     ),
+    "Cohere2MoeForCausalLM": _HfExamplesInfo(
+        "CohereLabs/command-a-plus-05-2026",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
     "CwmForCausalLM": _HfExamplesInfo("facebook/cwm", min_transformers_version="4.58"),
     # FIXME: databricks/dbrx-instruct has been deleted
     "DbrxForCausalLM": _HfExamplesInfo(
@@ -259,6 +265,9 @@ def check_available_online(
         trust_remote_code=True,
     ),
     "DeepseekV32ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3.2-Exp"),
+    "DeepseekV4ForCausalLM": _HfExamplesInfo(
+        "deepseek-ai/DeepSeek-V4-Flash", is_available_online=False
+    ),
     "Ernie4_5ForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-0.3B-PT"),
     "Ernie4_5_MoeForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT"),
     "ExaoneForCausalLM": _HfExamplesInfo(
@@ -277,6 +286,10 @@ def check_available_online(
         "google/gemma-2-9b", extras={"tiny": "google/gemma-2-2b-it"}
     ),
     "Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
+    "Gemma4ForCausalLM": _HfExamplesInfo(
+        "google/gemma-4-E2B-it",
+        min_transformers_version="5.0.0",
+    ),
     "Gemma3nForCausalLM": _HfExamplesInfo("google/gemma-3n-E2B-it"),
     "GlmForCausalLM": _HfExamplesInfo("zai-org/glm-4-9b-chat-hf"),
     "Glm4ForCausalLM": _HfExamplesInfo("zai-org/GLM-4-9B-0414"),
@@ -319,6 +332,7 @@ def check_available_online(
     "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
         "tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
     ),
+    "HYV3ForCausalLM": _HfExamplesInfo("tencent/Hy3-preview", trust_remote_code=True),
     "HyperCLOVAXForCausalLM": _HfExamplesInfo(
         "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
         trust_remote_code=True,
@@ -330,7 +344,15 @@ def check_available_online(
         "internlm/internlm2-chat-7b", trust_remote_code=True
     ),
     "InternLM2VEForCausalLM": _HfExamplesInfo(
-        "OpenGVLab/Mono-InternVL-2B", trust_remote_code=True
+        "OpenGVLab/Mono-InternVL-2B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "Custom config cannot be loaded with Transformers "
+                "v5 because `vision_config` is not always set"
+            )
+        },
     ),
     "InternLM3ForCausalLM": _HfExamplesInfo(
         "internlm/internlm3-8b-instruct", trust_remote_code=True
@@ -355,6 +377,7 @@ def check_available_online(
     "KimiLinearForCausalLM": _HfExamplesInfo(
         "moonshotai/Kimi-Linear-48B-A3B-Instruct", trust_remote_code=True
     ),
+    "LagunaForCausalLM": _HfExamplesInfo("poolside/Laguna-XS.2"),
     "Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B"),
     "Lfm2MoeForCausalLM": _HfExamplesInfo(
         "LiquidAI/LFM2-8B-A1B",
@@ -411,6 +434,7 @@ def check_available_online(
         "MiniMaxAI/MiniMax-M2",
         trust_remote_code=True,
     ),
+    "Ministral3ForCausalLM": _HfExamplesInfo("mistralai/Ministral-3-3B-Instruct-2512"),
     "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
     "MistralLarge3ForCausalLM": _HfExamplesInfo(
         "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"
@@ -456,6 +480,10 @@ def check_available_online(
         trust_remote_code=True,
         is_available_online=False,
     ),
+    "Param2MoEForCausalLM": _HfExamplesInfo(
+        "bharatgenai/Param2-17B-A2.4B-Thinking",
+        trust_remote_code=True,
+    ),
     "PersimmonForCausalLM": _HfExamplesInfo("adept/persimmon-8b-chat"),
     "PhiForCausalLM": _HfExamplesInfo("microsoft/phi-2"),
     "Phi3ForCausalLM": _HfExamplesInfo("microsoft/Phi-3-mini-4k-instruct"),
@@ -465,6 +493,13 @@ def check_available_online(
     "Plamo2ForCausalLM": _HfExamplesInfo(
         "pfnet/plamo-2-1b",
         trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "hf": (
+                "Custom model code uses `_tied_weight_keys: list[str]` but "
+                "Transformers v5 now expects `_tied_weight_keys: dict[str, str]`"
+            )
+        },
     ),
     "Plamo3ForCausalLM": _HfExamplesInfo(
         "pfnet/plamo-3-nict-2b-base",
@@ -493,6 +528,10 @@ def check_available_online(
         extras={"tiny-random": "tiny-random/qwen3-next-moe"},
         min_transformers_version="4.56.3",
     ),
+    "Rnj1ForCausalLM": _HfExamplesInfo(
+        "EssentialAI/rnj-1-instruct",
+        is_available_online=False,
+    ),
     "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"),
     "SarvamMoEForCausalLM": _HfExamplesInfo(
         "sarvamai/sarvam-30b",
@@ -505,6 +544,13 @@ def check_available_online(
         trust_remote_code=True,
         max_model_len=4096,
         is_available_online=True,
+        max_transformers_version="5.3",
+        transformers_version_reason={
+            "vllm": (
+                "vllm upgraded transformers above v5.4 where "
+                "validate_rope() no longer accepts ignore_keys param"
+            )
+        },
     ),
     "SeedOssForCausalLM": _HfExamplesInfo(
         "ByteDance-Seed/Seed-OSS-36B-Instruct",
@@ -533,6 +579,9 @@ def check_available_online(
     "TeleChat2ForCausalLM": _HfExamplesInfo(
         "Tele-AI/TeleChat2-3B", trust_remote_code=True
     ),
+    "TeleChat3ForCausalLM": _HfExamplesInfo(
+        "Tele-AI/TeleChat3-36B-Thinking", trust_remote_code=True
+    ),
     "TeleFLMForCausalLM": _HfExamplesInfo(
         "CofeAI/FLM-2-52B-Instruct-2407", trust_remote_code=True
     ),
@@ -540,12 +589,20 @@ def check_available_online(
         "xverse/XVERSE-7B-Chat",
         tokenizer="meta-llama/Llama-2-7b",
         trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "XVERSE tokenizer is incompatible with transformers v5 "
+            "(add_prefix_space / prepend_scheme mismatch).",
+        },
     ),
     "Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct"),
     "MiMoForCausalLM": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL", trust_remote_code=True),
     "MiMoV2FlashForCausalLM": _HfExamplesInfo(
         "XiaomiMiMo/MiMo-V2-Flash", trust_remote_code=True
     ),
+    "MiMoV2ForCausalLM": _HfExamplesInfo(
+        "XiaomiMiMo/MiMo-V2.5-Pro", trust_remote_code=True
+    ),
     "Dots1ForCausalLM": _HfExamplesInfo("rednote-hilab/dots.llm1.inst"),
 }
 
@@ -569,6 +626,10 @@ def check_available_online(
         trust_remote_code=True,
         hf_overrides={"architectures": ["GteNewModel"]},
     ),
+    "JinaEmbeddingsV5Model": _HfExamplesInfo(
+        "jinaai/jina-embeddings-v5-text-small",
+        trust_remote_code=True,
+    ),
     "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
     "LlamaBidirectionalModel": _HfExamplesInfo(
         "nvidia/llama-nemotron-embed-1b-v2", trust_remote_code=True
@@ -633,6 +694,7 @@ def check_available_online(
         trust_remote_code=True,
         hf_overrides={"architectures": ["ColBERTLfm2Model"]},
     ),
+    "JinaForRanking": _HfExamplesInfo("jinaai/jina-reranker-v3"),
     # [Multimodal]
     "ColModernVBertForRetrieval": _HfExamplesInfo(
         "ModernVBERT/colmodernvbert-merged",
@@ -749,10 +811,18 @@ def check_available_online(
     # [Decoder-only]
     "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
     "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
-        "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0"
+        "nvidia/audio-flamingo-3-hf",
+        min_transformers_version="5.3.0",
+        transformers_version_reason={
+            "vllm": "Needs https://github.com/huggingface/transformers/pull/43538"
+        },
     ),
     "MusicFlamingoForConditionalGeneration": _HfExamplesInfo(
-        "nvidia/music-flamingo-2601-hf", min_transformers_version="5.3.0"
+        "nvidia/music-flamingo-2601-hf",
+        min_transformers_version="5.3.0",
+        transformers_version_reason={
+            "vllm": "Needs https://github.com/huggingface/transformers/pull/43538"
+        },
     ),
     "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"),
     "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"),
@@ -765,6 +835,14 @@ def check_available_online(
         extras={"6b": "Salesforce/blip2-opt-6.7b"},
     ),
     "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"),
+    "Cheers": _HfExamplesInfo(
+        "ai9stars/Cheers",
+        trust_remote_code=True,
+    ),
+    "CheersForConditionalGeneration": _HfExamplesInfo(
+        "ai9stars/Cheers",
+        trust_remote_code=True,
+    ),
     "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
         "CohereLabs/command-a-vision-07-2025"
     ),
@@ -793,22 +871,55 @@ def check_available_online(
         trust_remote_code=True,
         revision="refs/pr/17",
     ),
+    "Exaone4_5_ForConditionalGeneration": _HfExamplesInfo(
+        "LGAI-EXAONE/EXAONE-4.5-33B",
+        min_transformers_version="5.6.0",
+    ),
     "FireRedASR2ForConditionalGeneration": _HfExamplesInfo(
         "allendou/FireRedASR2-LLM-vllm",
+        trust_remote_code=True,
+        max_transformers_version="5.1",
+        transformers_version_reason={
+            "vllm": "Incompatible with transformers v5.2+ "
+            "(dict object has no attribute '__name__').",
+        },
+    ),
+    "FireRedLIDForConditionalGeneration": _HfExamplesInfo(
+        "PatchyTisa/FireRedLID-vllm",
+        trust_remote_code=True,
+        max_transformers_version="5.1",
+        transformers_version_reason={
+            "vllm": "Incompatible with transformers v5.2+ "
+            "(dict object has no attribute '__name__').",
+        },
     ),
     "FunASRForConditionalGeneration": _HfExamplesInfo(
         "allendou/Fun-ASR-Nano-2512-vllm",
+        trust_remote_code=True,
+        max_transformers_version="5.1",
+        transformers_version_reason={
+            "vllm": "Incompatible with transformers v5.2+ "
+            "(dict object has no attribute '__name__').",
+        },
     ),
     "FunAudioChatForConditionalGeneration": _HfExamplesInfo(
         "funaudiochat", is_available_online=False
     ),
     "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
     "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
+    "Gemma4ForConditionalGeneration": _HfExamplesInfo(
+        "google/gemma-4-E2B-it",
+        min_transformers_version="5.5.0",
+    ),
     "Gemma3nForConditionalGeneration": _HfExamplesInfo("google/gemma-3n-E2B-it"),
     "GlmAsrForConditionalGeneration": _HfExamplesInfo(
         "zai-org/GLM-ASR-Nano-2512",
         min_transformers_version="5.0.0",
     ),
+    "Granite4VisionForConditionalGeneration": _HfExamplesInfo(
+        "ibm-granite/granite-vision-4.1-4b",
+        is_available_online=False,
+    ),
     "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
     "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
         "ibm-granite/granite-speech-3.3-2b",
@@ -852,14 +963,36 @@ def check_available_online(
         "PerceptronAI/Isaac-0.1",
         trust_remote_code=True,
         extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "Custom Isaac code is not compatible with Transformers v5. "
+                "The model should be upstreamed to Transformers for "
+                "long-term support."
+            ),
+            "hf": (
+                "Isaac's remote model and processor code import or configure "
+                "APIs that changed in Transformers v5."
+            ),
+        },
     ),
     "InternS1ForConditionalGeneration": _HfExamplesInfo(
-        "internlm/Intern-S1", trust_remote_code=True
+        "internlm/Intern-S1",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom tokenizer code is not compatible with Transformers v5."
+        },
     ),
     "InternS1ProForConditionalGeneration": _HfExamplesInfo(
         "internlm/Intern-S1-Pro",
         trust_remote_code=True,
     ),
+    "InternS2PreviewForConditionalGeneration": _HfExamplesInfo(
+        "internlm/Intern-S2-Preview",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
     "InternVLChatModel": _HfExamplesInfo(
         "OpenGVLab/InternVL2-1B",
         extras={
@@ -943,7 +1076,17 @@ def check_available_online(
     "MiDashengLMModel": _HfExamplesInfo(
         "mispeech/midashenglm-7b", trust_remote_code=True
     ),
-    "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", trust_remote_code=True),
+    "MiMoV2OmniForCausalLM": _HfExamplesInfo(
+        "XiaomiMiMo/MiMo-V2.5-Omni", trust_remote_code=True, is_available_online=False
+    ),
+    "MiniCPMO": _HfExamplesInfo(
+        "openbmb/MiniCPM-o-2_6",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "hf": "Custom processor code is not compatible with Transformers v5."
+        },
+    ),
     "MiniCPMV": _HfExamplesInfo(
         "openbmb/MiniCPM-Llama3-V-2_5",
         extras={
@@ -951,8 +1094,19 @@ def check_available_online(
             "4.0": "openbmb/MiniCPM-V-4",
             "4.5": "openbmb/MiniCPM-V-4_5",
         },
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "MiniCPMVBatchFeature is incompatible with its base class in "
+                "Transformers v5. See https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/discussions/78"
+            )
+        },
         trust_remote_code=True,
     ),
+    "MiniCPMV4_6ForConditionalGeneration": _HfExamplesInfo(
+        "openbmb/MiniCPM-V-4_6",
+        min_transformers_version="5.7.0",
+    ),
     "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
         "MiniMaxAI/MiniMax-VL-01",
         trust_remote_code=True,
@@ -970,6 +1124,16 @@ def check_available_online(
         extras={"olmo": "allenai/Molmo-7B-O-0924"},
         trust_remote_code=True,
     ),
+    "Moondream3ForCausalLM": _HfExamplesInfo(
+        "moondream/moondream3-preview",
+        tokenizer="moondream/starmie-v1",
+        trust_remote_code=True,
+    ),
+    "HfMoondream": _HfExamplesInfo(
+        "moondream/moondream3-preview",
+        tokenizer="moondream/starmie-v1",
+        trust_remote_code=True,
+    ),
     "Molmo2ForConditionalGeneration": _HfExamplesInfo(
         "allenai/Molmo2-8B",
         extras={"olmo": "allenai/Molmo2-O-7B"},
@@ -984,16 +1148,64 @@ def check_available_online(
         trust_remote_code=True,
     ),
     "NemotronH_Nano_VL_V2": _HfExamplesInfo(
-        "nano_vl_dummy", is_available_online=False, trust_remote_code=True
+        "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16",
+        max_model_len=4096,
+        # NemotronH layers are constructed via `hybrid_override_pattern`
+        use_original_num_layers=True,
+        hf_overrides={
+            "text_config": {"num_hidden_layers": 2, "hybrid_override_pattern": "M*"},
+        },
+        trust_remote_code=True,
+    ),
+    "NemotronH_Nano_Omni_Reasoning_V3": _HfExamplesInfo(
+        "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-BF16",
+        max_model_len=4096,
+        # NemotronH layers are constructed via `hybrid_override_pattern`
+        use_original_num_layers=True,
+        hf_overrides={
+            "vision_config": PretrainedConfig(
+                args={
+                    "min_num_patches": 1,
+                    "max_num_patches": 12,
+                    "model": "vit_huge_patch16_224",
+                },
+                video_temporal_patch_size=2,
+                # TODO(nhaber): This is `true` in the official `config.json`,
+                # but this causes a processor exception in the tests due to a known bug
+                # with mixed-resolution video when `true`. To be resolved.
+                video_maintain_aspect_ratio=False,
+            ),
+            "text_config": {"num_hidden_layers": 2, "hybrid_override_pattern": "M*"},
+        },
+        trust_remote_code=True,
+    ),
+    "NemotronH_Super_Omni_Reasoning_V3": _HfExamplesInfo(
+        "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-BF16", is_available_online=False
     ),
     "OpenCUAForConditionalGeneration": _HfExamplesInfo(
-        "xlangai/OpenCUA-7B", trust_remote_code=True
+        "xlangai/OpenCUA-7B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Tokenizer cannot be initialised in Transformers v5."
+        },
     ),
     "OpenPanguVLForConditionalGeneration": _HfExamplesInfo(
         "FreedomIntelligence/openPangu-VL-7B",
         trust_remote_code=True,
         max_model_len=4096,
         enforce_eager=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": (
+                "OpenPanguVLVideoProcessorInitKwargs does not specify total=False, "
+                "making all kwargs required. See https://huggingface.co/FreedomIntelligence/openPangu-VL-7B/discussions/2"
+            )
+        },
+    ),
+    "OpenVLAForActionPrediction": _HfExamplesInfo(
+        "openvla/openvla-7b",
+        trust_remote_code=True,
     ),
     "Ovis": _HfExamplesInfo(
         "AIDC-AI/Ovis2-1B",
@@ -1005,12 +1217,24 @@ def check_available_online(
             "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B",
         },
     ),
-    "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True),
+    "Ovis2_5": _HfExamplesInfo(
+        "AIDC-AI/Ovis2.5-2B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom processor code is not compatible with Transformers v5."
+        },
+    ),
     "Ovis2_6ForCausalLM": _HfExamplesInfo(
         "AIDC-AI/Ovis2.6-2B", is_available_online=False, trust_remote_code=True
     ),
     "Ovis2_6_MoeForCausalLM": _HfExamplesInfo(
-        "AIDC-AI/Ovis2.6-30B-A3B", trust_remote_code=True
+        "AIDC-AI/Ovis2.6-30B-A3B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "vllm": "Custom processor code is not compatible with Transformers v5."
+        },
     ),
     "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
         "PaddlePaddle/PaddleOCR-VL",
@@ -1029,6 +1253,19 @@ def check_available_online(
         },  # noqa: E501
         extras={"phi3.5": "microsoft/Phi-3.5-vision-instruct"},
     ),
+    "Phi4ForCausalLMV": _HfExamplesInfo(
+        "microsoft/Phi-4-reasoning-vision-15B",
+        trust_remote_code=True,
+        max_transformers_version="5.3",
+        transformers_version_reason={
+            "vllm": (
+                "vllm upgraded transformers above v5.4 where HF model "
+                "custom code uses siglip2 internals "
+                "(filter_out_non_signature_kwargs) removed "
+                "by huggingface/transformers#43514"
+            )
+        },
+    ),
     "Phi4MMForCausalLM": _HfExamplesInfo(
         "microsoft/Phi-4-multimodal-instruct", trust_remote_code=True
     ),
@@ -1040,6 +1277,10 @@ def check_available_online(
         },
         tokenizer_mode="mistral",
     ),
+    "QianfanOCRForConditionalGeneration": _HfExamplesInfo(
+        "baidu/Qianfan-OCR",
+        min_transformers_version="5.6.0",
+    ),
     "QwenVLForConditionalGeneration": _HfExamplesInfo(
         "Qwen/Qwen-VL",
         extras={"chat": "Qwen/Qwen-VL-Chat"},
@@ -1102,7 +1343,16 @@ def check_available_online(
     ),
     "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
     "SkyworkR1VChatModel": _HfExamplesInfo(
-        "Skywork/Skywork-R1V-38B", trust_remote_code=True
+        "Skywork/Skywork-R1V-38B",
+        trust_remote_code=True,
+        max_transformers_version="4.57",
+        transformers_version_reason={
+            "hf": (
+                "SkyworkChatModel.__init__ does not call self.post_init(), so "
+                "`all_tied_weights_keys` is never set; Transformers v5 requires "
+                "this attribute in _move_missing_keys_from_meta_to_device."
+            )
+        },
     ),
     "SmolVLMForConditionalGeneration": _HfExamplesInfo(
         "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
@@ -1124,6 +1374,14 @@ def check_available_online(
             "architectures": ["Tarsier2ForConditionalGeneration"],
             "model_type": "tarsier2",
         },
+        max_transformers_version="5.3",
+        transformers_version_reason={
+            "vllm": (
+                "Qwen2VLConfig was split into Qwen2VLConfig + "
+                "Qwen2VLTextConfig in transformers v5, breaking "
+                "attribute access (num_attention_heads, hidden_size, etc.)"
+            )
+        },
     ),
     "VoxtralForConditionalGeneration": _HfExamplesInfo(
         "mistralai/Voxtral-Mini-3B-2507",
@@ -1136,12 +1394,10 @@ def check_available_online(
     ),
     # [Encoder-decoder]
     "CohereAsrForConditionalGeneration": _HfExamplesInfo(
-        "CohereLabs/cohere-transcribe-03-2026",
-        trust_remote_code=True,
-        is_available_online=False,  # TODO (ekagra): revert after asr release
+        "CohereLabs/cohere-transcribe-03-2026", trust_remote_code=True
     ),
     "NemotronParseForConditionalGeneration": _HfExamplesInfo(
-        "nvidia/NVIDIA-Nemotron-Parse-v1.1", trust_remote_code=True
+        "nvidia/NVIDIA-Nemotron-Parse-v1.2", trust_remote_code=True
     ),
     "WhisperForConditionalGeneration": _HfExamplesInfo(
         "openai/whisper-large-v3-turbo",
@@ -1163,7 +1419,22 @@ def check_available_online(
     #     "JackFram/llama-160m",
     #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
     # ),
+    # [DFlash]
+    "DFlashDraftModel": _HfExamplesInfo(
+        "Qwen/Qwen3.5-4B",
+        speculative_model="z-lab/Qwen3.5-4B-DFlash",
+        use_original_num_layers=True,  # Need all layers since DFlash has >1 layer,
+        max_model_len=8192,  # Reduce max len to ensure test runs in low-VRAM CI env
+        max_num_seqs=32,
+    ),
     # [Eagle]
+    "EagleCohereForCausalLM": _HfExamplesInfo(
+        "/host/engines/cohere-moe",
+        speculative_model="/host/engines/cohere-moe/eagle",
+        tokenizer="/host/engines/cohere-moe",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
     "EagleDeepSeekMTPModel": _HfExamplesInfo(
         "eagle618/deepseek-v3-random",
         speculative_model="eagle618/eagle-deepseek-v3-random",
@@ -1195,6 +1466,17 @@ def check_available_online(
         use_original_num_layers=True,
         max_model_len=10240,
     ),
+    "Eagle3MiniMaxM2ForCausalLM": _HfExamplesInfo(
+        "MiniMaxAI/MiniMax-M2",
+        trust_remote_code=True,
+        speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
+        tokenizer="MiniMaxAI/MiniMax-M2",
+    ),
+    "EagleMistralForCausalLM": _HfExamplesInfo(
+        "mistralai/Mistral-Medium-3.5-128B",
+        speculative_model="mistralai/Mistral-Medium-3.5-128B-EAGLE",
+        is_available_online=False,
+    ),
     "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
         "mistralai/Mistral-Large-3-675B-Instruct-2512",
         speculative_model="mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle",
@@ -1229,12 +1511,39 @@ def check_available_online(
         "Qwen/Qwen3-VL-8B-Instruct",
         speculative_model="taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3",
     ),
+    # [PEagle]
+    "PEagleDraftModel": _HfExamplesInfo(
+        "Qwen/Qwen3-8B",
+        trust_remote_code=True,
+        speculative_model="nm-testing/qwen3-8b-peagle-speculators",
+        tokenizer="Qwen/Qwen3-8B",
+        use_original_num_layers=True,
+    ),
+    "PeagleLlamaForCausalLM": _HfExamplesInfo(
+        "Qwen/Qwen3-8B",
+        trust_remote_code=True,
+        speculative_model="nm-testing/qwen3-8b-peagle-speculators",
+        tokenizer="Qwen/Qwen3-8B",
+        use_original_num_layers=True,
+    ),
     # [MTP]
     "DeepSeekMTPModel": _HfExamplesInfo(
         "luccafong/deepseek_mtp_main_random",
         speculative_model="luccafong/deepseek_mtp_draft_random",
         trust_remote_code=True,
     ),
+    "DeepSeekV4MTPModel": _HfExamplesInfo(
+        "deepseek-ai/DeepSeek-V4-Flash",
+        speculative_model="deepseek-ai/DeepSeek-V4-Flash",
+        trust_remote_code=True,
+        is_available_online=False,
+    ),
+    "Gemma4MTPModel": _HfExamplesInfo(
+        "google/gemma-4-E4B-it",
+        speculative_model="google/gemma-4-E4B-it-assistant",
+        trust_remote_code=True,
+        min_transformers_version="5.8.0",
+    ),
     "ErnieMTPModel": _HfExamplesInfo(
         "baidu/ERNIE-4.5-21B-A3B-PT",
         trust_remote_code=True,
@@ -1246,6 +1555,11 @@ def check_available_online(
         min_transformers_version="5.1.0",
         enable_prefix_caching=False,
     ),
+    "Exaone4_5_MTP": _HfExamplesInfo(
+        "LGAI-EXAONE/EXAONE-4.5-33B",
+        speculative_model="LGAI-EXAONE/EXAONE-4.5-33B",
+        min_transformers_version="5.6.0",
+    ),
     "ExtractHiddenStatesModel": _HfExamplesInfo(
         "Qwen/Qwen3-8B",
         speculative_method="extract_hidden_states",
@@ -1265,6 +1579,10 @@ def check_available_online(
         is_available_online=False,
         min_transformers_version="5.1.0",
     ),
+    "HYV3MTPModel": _HfExamplesInfo(
+        "tencent/Hy3-preview",
+        speculative_model="tencent/Hy3-preview",
+    ),
     "LongCatFlashMTPModel": _HfExamplesInfo(
         "meituan-longcat/LongCat-Flash-Chat",
         trust_remote_code=True,
@@ -1275,6 +1593,18 @@ def check_available_online(
         trust_remote_code=True,
         speculative_model="XiaomiMiMo/MiMo-7B-RL",
     ),
+    "MiMoV2MTPModel": _HfExamplesInfo(
+        "XiaomiMiMo/MiMo-V2.5-Pro",
+        trust_remote_code=True,
+        speculative_model="XiaomiMiMo/MiMo-V2.5-Pro",
+        is_available_online=False,
+    ),
+    "MiMoV2OmniMTPModel": _HfExamplesInfo(
+        "XiaomiMiMo/MiMo-V2.5-Omni",
+        trust_remote_code=True,
+        speculative_model="XiaomiMiMo/MiMo-V2.5-Omni",
+        is_available_online=False,
+    ),
     "NemotronHMTPModel": _HfExamplesInfo(
         "nvidia/Nemotron-Super-Placeholder",
         speculative_model="nvidia/Nemotron-Super-Placeholder",
diff --git a/tests/models/test_adapters.py b/tests/models/test_adapters.py
new file mode 100644
index 000000000000..7b1815998f23
--- /dev/null
+++ b/tests/models/test_adapters.py
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for model adapter weight loading (adapters.py)."""
+
+import pytest
+import torch
+
+from vllm.model_executor.models.adapters import _create_pooling_model_cls
+from vllm.model_executor.models.utils import AutoWeightsLoader, StageMissingLayer
+
+pytestmark = pytest.mark.cpu_test
+
+
+class SimpleInnerModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.embed = torch.nn.Linear(4, 8, bias=False)
+        self.layer0 = torch.nn.Linear(8, 8, bias=False)
+        self.layer1 = torch.nn.Linear(8, 8, bias=False)
+        self.norm = torch.nn.Linear(8, 4, bias=False)
+
+    def load_weights(self, weights):
+        params = dict(self.named_parameters())
+        loaded = set()
+        for name, tensor in weights:
+            if name in params:
+                params[name].data.copy_(tensor)
+                loaded.add(name)
+        return loaded
+
+
+class SimpleModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = SimpleInnerModel()
+        self.lm_head = torch.nn.Linear(8, 16, bias=False)
+
+    def load_weights(self, weights):
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
+
+
+class PackedWeightInnerModel(torch.nn.Module):
+    """Remaps q_proj/k_proj into a fused qkv_proj (Qwen2/Llama pattern)."""
+
+    def __init__(self):
+        super().__init__()
+        self.qkv_proj = torch.nn.Linear(4, 16, bias=False)
+        self.out = torch.nn.Linear(8, 4, bias=False)
+
+    def load_weights(self, weights):
+        params = dict(self.named_parameters())
+        loaded = set()
+        for name, tensor in weights:
+            if name == "q_proj.weight":
+                params["qkv_proj.weight"].data[:8].copy_(tensor)
+                loaded.add("qkv_proj.weight")
+            elif name == "k_proj.weight":
+                params["qkv_proj.weight"].data[8:].copy_(tensor)
+                loaded.add("qkv_proj.weight")
+            elif name in params:
+                params[name].data.copy_(tensor)
+                loaded.add(name)
+        return loaded
+
+
+class PackedWeightModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = PackedWeightInnerModel()
+        self.lm_head = torch.nn.Linear(4, 8, bias=False)
+
+    def load_weights(self, weights):
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
+
+
+def _buffer_reusing_iterator(weight_dict):
+    """Yield weights through a shared buffer overwritten each step.
+
+    Mimics ``runai_model_streamer`` with ``RUNAI_STREAMER_MEMORY_LIMIT=0``.
+    """
+    buf = None
+    for name, tensor in weight_dict.items():
+        if buf is None or buf.numel() < tensor.numel():
+            buf = torch.empty(tensor.numel(), dtype=tensor.dtype)
+        view = buf[: tensor.numel()].view(tensor.shape)
+        view.copy_(tensor)
+        yield name, view
+
+
+def _make_pooling_model(base_cls=SimpleModel):
+    PoolingModel = _create_pooling_model_cls(base_cls)
+    model = base_cls()
+    model.__class__ = PoolingModel
+    model.lm_head = StageMissingLayer("output", model.lm_head)
+    return model
+
+
+def _make_reference_weights():
+    torch.manual_seed(42)
+    return {
+        "model.embed.weight": torch.randn(8, 4),
+        "model.layer0.weight": torch.randn(8, 8),
+        "model.layer1.weight": torch.randn(8, 8),
+        "model.norm.weight": torch.randn(4, 8),
+        "lm_head.weight": torch.randn(16, 8),
+    }
+
+
+def _make_packed_reference_weights():
+    torch.manual_seed(42)
+    return {
+        "model.q_proj.weight": torch.randn(8, 4),
+        "model.k_proj.weight": torch.randn(8, 4),
+        "model.out.weight": torch.randn(4, 8),
+        "lm_head.weight": torch.randn(8, 4),
+    }
+
+
+def _load_and_compare(model, ref, expected):
+    for p in model.parameters():
+        p.data.zero_()
+    model.load_weights(_buffer_reusing_iterator(ref))
+    for name, param in model.named_parameters():
+        assert torch.equal(param.data, expected[name]), name
+
+
+def test_pooling_load_weights_with_buffer_reuse():
+    """Ensure ModelForPooling.load_weights works with buffer-reusing iterators."""
+    ref = _make_reference_weights()
+
+    ground_truth = SimpleModel()
+    ground_truth.load_weights(ref.items())
+    expected = {n: p.data.clone() for n, p in ground_truth.named_parameters()}
+
+    _load_and_compare(_make_pooling_model(), ref, expected)
+
+
+def test_pooling_load_weights_clones_probed_weights():
+    """Ensure probed weights survive buffer reuse during packed remapping."""
+    ref = _make_packed_reference_weights()
+
+    ground_truth = PackedWeightModel()
+    ground_truth.load_weights(ref.items())
+    expected = {n: p.data.clone() for n, p in ground_truth.named_parameters()}
+
+    _load_and_compare(_make_pooling_model(PackedWeightModel), ref, expected)
diff --git a/tests/models/test_deepseek_v4_mega_moe.py b/tests/models/test_deepseek_v4_mega_moe.py
new file mode 100644
index 000000000000..3d4657bba130
--- /dev/null
+++ b/tests/models/test_deepseek_v4_mega_moe.py
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+from vllm.models.deepseek_v4.nvidia.model import (
+    DeepseekV4MegaMoEExperts,
+    _stage_deepseek_v4_mega_moe_inputs,
+    make_deepseek_v4_expert_params_mapping,
+)
+from vllm.platforms import current_platform
+
+pytestmark = pytest.mark.skipif(
+    not current_platform.is_cuda(),
+    reason="DeepSeek V4 MegaMoE requires CUDA",
+)
+
+
+def test_deepseek_v4_mega_moe_expert_mapping():
+    mapping = make_deepseek_v4_expert_params_mapping(2)
+
+    assert mapping == [
+        ("experts.w13_", "experts.0.w1.", 0, "w1"),
+        ("experts.w2_", "experts.0.w2.", 0, "w2"),
+        ("experts.w13_", "experts.0.w3.", 0, "w3"),
+        ("experts.w13_", "experts.1.w1.", 1, "w1"),
+        ("experts.w2_", "experts.1.w2.", 1, "w2"),
+        ("experts.w13_", "experts.1.w3.", 1, "w3"),
+    ]
+
+
+def test_deepseek_v4_mega_moe_ue8m0_uint8_to_float():
+    raw = torch.tensor([0, 126, 127, 128], dtype=torch.uint8)
+
+    decoded = DeepseekV4MegaMoEExperts._ue8m0_uint8_to_float(raw)
+
+    assert torch.equal(decoded.view(torch.int32), raw.to(torch.int32) << 23)
+    assert decoded[0].item() == 0.0
+    assert decoded[1].item() == 0.5
+    assert decoded[2].item() == 1.0
+    assert decoded[3].item() == 2.0
+
+
+def test_deepseek_v4_mega_moe_weight_loader_uses_ep_expert_ownership():
+    vllm_config = SimpleNamespace(
+        scheduler_config=SimpleNamespace(max_num_batched_tokens=4)
+    )
+    experts = DeepseekV4MegaMoEExperts(
+        vllm_config,
+        num_experts=4,
+        num_local_experts=2,
+        experts_start_idx=2,
+        top_k=2,
+        hidden_size=128,
+        intermediate_size=128,
+    )
+
+    nonlocal_weight = torch.ones(128, 64, dtype=torch.uint8)
+    assert (
+        experts.weight_loader(
+            experts.w13_weight,
+            nonlocal_weight,
+            "experts.w13_weight",
+            shard_id="w1",
+            expert_id=1,
+            return_success=True,
+        )
+        is False
+    )
+
+    w1 = torch.full((128, 64), 3, dtype=torch.uint8)
+    w3 = torch.full((128, 64), 7, dtype=torch.uint8)
+    w2 = torch.full((128, 64), 11, dtype=torch.uint8)
+
+    assert experts.weight_loader(
+        experts.w13_weight,
+        w1,
+        "experts.w13_weight",
+        shard_id="w1",
+        expert_id=2,
+        return_success=True,
+    )
+    assert experts.weight_loader(
+        experts.w13_weight,
+        w3,
+        "experts.w13_weight",
+        shard_id="w3",
+        expert_id=2,
+        return_success=True,
+    )
+    assert experts.weight_loader(
+        experts.w2_weight,
+        w2,
+        "experts.w2_weight",
+        shard_id="w2",
+        expert_id=2,
+        return_success=True,
+    )
+
+    assert torch.equal(experts.w13_weight[0, :128], w1)
+    assert torch.equal(experts.w13_weight[0, 128:], w3)
+    assert torch.equal(experts.w2_weight[0], w2)
+    assert torch.count_nonzero(experts.w13_weight[1]) == 0
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available(),
+    reason="DeepSeek V4 MegaMoE fused input staging requires CUDA.",
+)
+def test_deepseek_v4_mega_moe_fused_input_staging_is_bitwise_exact():
+    from vllm.third_party.deep_gemm.utils import per_token_cast_to_fp8
+
+    device = torch.device("cuda")
+    num_tokens = 7
+    hidden_size = 256
+    top_k = 8
+
+    generator = torch.Generator(device=device)
+    generator.manual_seed(0)
+    hidden_states = (
+        torch.randn(
+            num_tokens,
+            hidden_size,
+            device=device,
+            dtype=torch.float32,
+            generator=generator,
+        )
+        * 17.0
+    ).to(torch.bfloat16)
+    hidden_states[0, :32] = 0
+    hidden_states[1, 32:64] = 1.0e-6
+    hidden_states[2, 64:96] = -1.0e-6
+
+    topk_ids = torch.randint(
+        0,
+        256,
+        (num_tokens, top_k),
+        device=device,
+        dtype=torch.int32,
+        generator=generator,
+    )
+    topk_weights = torch.randn(
+        num_tokens,
+        top_k,
+        device=device,
+        dtype=torch.float32,
+        generator=generator,
+    )
+
+    ref_x, ref_x_sf = per_token_cast_to_fp8(
+        hidden_states,
+        use_ue8m0=True,
+        gran_k=32,
+        use_packed_ue8m0=True,
+    )
+    ref_topk_idx = topk_ids.to(torch.int64)
+    ref_topk_weights = topk_weights.clone()
+
+    fused_x = torch.empty_like(ref_x)
+    fused_x_sf = torch.empty_like(ref_x_sf)
+    fused_topk_idx = torch.empty_like(ref_topk_idx)
+    fused_topk_weights = torch.empty_like(ref_topk_weights)
+
+    _stage_deepseek_v4_mega_moe_inputs(
+        hidden_states,
+        topk_weights,
+        topk_ids,
+        fused_x,
+        fused_x_sf,
+        fused_topk_idx,
+        fused_topk_weights,
+    )
+    torch.accelerator.synchronize()
+
+    assert torch.equal(fused_x.view(torch.uint8), ref_x.view(torch.uint8))
+    assert torch.equal(fused_x_sf, ref_x_sf)
+    assert torch.equal(fused_topk_idx, ref_topk_idx)
+    assert torch.equal(
+        fused_topk_weights.view(torch.uint8),
+        ref_topk_weights.view(torch.uint8),
+    )
diff --git a/tests/models/test_gguf_download.py b/tests/models/test_gguf_download.py
index e9ca35afd66a..7cf8a7660caa 100644
--- a/tests/models/test_gguf_download.py
+++ b/tests/models/test_gguf_download.py
@@ -122,14 +122,17 @@ def test_prepare_weights_repo_filename(self, mock_isfile, mock_hf_download):
 
         mock_hf_download.return_value = "/downloaded/model.gguf"
 
-        # Create a simple mock ModelConfig with only the model attribute
         model_config = MagicMock()
         model_config.model = "unsloth/Qwen3-0.6B-GGUF/model.gguf"
+        model_config.revision = "abc123"
 
         result = loader._prepare_weights(model_config)
         assert result == "/downloaded/model.gguf"
         mock_hf_download.assert_called_once_with(
-            repo_id="unsloth/Qwen3-0.6B-GGUF", filename="model.gguf"
+            repo_id="unsloth/Qwen3-0.6B-GGUF",
+            filename="model.gguf",
+            revision="abc123",
+            cache_dir=None,
         )
 
     @patch("vllm.config.model.get_hf_image_processor_config", return_value=None)
diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py
index 979c8d31775c..476ad1c7c17f 100644
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -109,6 +109,16 @@ def _initialize_kv_caches_v1(self, vllm_config):
             "which is not configured in test environment"
         )
 
+    if model_arch in ("PrithviGeoSpatialMAE", "Terratorch"):
+        import importlib.util
+
+        if importlib.util.find_spec("terratorch") is None:
+            pytest.skip(
+                "terratorch is not installed; "
+                "temporarily skipped while PyPI has `lightning` quarantined "
+                "(see #41376)"
+            )
+
     if model_arch in ["DeepseekV32ForCausalLM", "GlmMoeDsaForCausalLM"]:
         from vllm.platforms import current_platform
 
diff --git a/tests/models/test_registry.py b/tests/models/test_registry.py
index 81fae02efda1..0715409abda6 100644
--- a/tests/models/test_registry.py
+++ b/tests/models/test_registry.py
@@ -36,6 +36,17 @@ def test_registry_imports(model_arch):
         check_max_version=False,
         check_version_reason="vllm",
     )
+
+    if model_arch in ("PrithviGeoSpatialMAE", "Terratorch"):
+        import importlib.util
+
+        if importlib.util.find_spec("terratorch") is None:
+            pytest.skip(
+                "terratorch is not installed; "
+                "temporarily skipped while PyPI has `lightning` quarantined "
+                "(see #41376)"
+            )
+
     # Ensure all model classes can be imported successfully
     model_cls = ModelRegistry._try_load_model_cls(model_arch)
     assert model_cls is not None
diff --git a/tests/models/test_terratorch.py b/tests/models/test_terratorch.py
index 71125dbe94f8..6d4d1921a88d 100644
--- a/tests/models/test_terratorch.py
+++ b/tests/models/test_terratorch.py
@@ -1,12 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import importlib.util
+
 import pytest
 import torch
 
 from tests.conftest import VllmRunner
 from tests.utils import create_new_process_for_each_test
 
+pytestmark = pytest.mark.skipif(
+    importlib.util.find_spec("terratorch") is None,
+    reason="terratorch unavailable while PyPI has `lightning` quarantined; see #41376",
+)
+
 
 @create_new_process_for_each_test()  # Hangs otherwise
 @pytest.mark.parametrize(
diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py
index 7cc4ee3c1856..8d47b4436575 100644
--- a/tests/models/test_utils.py
+++ b/tests/models/test_utils.py
@@ -4,9 +4,13 @@
 import pytest
 import torch
 
-from vllm.model_executor.models.utils import AutoWeightsLoader
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    _merge_multimodal_embeddings,
+)
+from vllm.platforms import current_platform
 
-pytestmark = pytest.mark.cpu_test
+DEVICE_TYPE = current_platform.device_type
 
 
 class ModuleWithBatchNorm(torch.nn.Module):
@@ -27,6 +31,7 @@ def forward(self, x):
         return self.nested_mod(x)
 
 
+@pytest.mark.cpu_test
 def test_module_with_batchnorm_can_load():
     """Ensure the auto weight loader can load batchnorm stats."""
     mod = ModuleWithBatchNorm()
@@ -52,6 +57,7 @@ def weight_generator():
     assert new_mod.bn.num_batches_tracked.item() == 1
 
 
+@pytest.mark.cpu_test
 def test_module_with_child_containing_batchnorm_can_autoload():
     """Ensure the auto weight loader can load nested modules batchnorm stats."""
     mod = ModuleWithNestedBatchNorm()
@@ -83,6 +89,7 @@ def weight_generator():
     assert new_mod.nested_mod.bn.num_batches_tracked.item() == 1
 
 
+@pytest.mark.cpu_test
 def test_module_skip_prefix():
     """Ensure the auto weight loader can skip prefix."""
     mod = ModuleWithNestedBatchNorm()
@@ -119,6 +126,7 @@ def weight_generator():
     assert new_mod.nested_mod.bn.num_batches_tracked.item() == 1
 
 
+@pytest.mark.cpu_test
 def test_module_skip_substr():
     """Ensure the auto weight loader can skip prefix."""
     mod = ModuleWithNestedBatchNorm()
@@ -155,3 +163,27 @@ def weight_generator():
     )
     assert torch.all(new_mod.nested_mod.bn.running_var == mod.nested_mod.bn.running_var)
     assert new_mod.nested_mod.bn.num_batches_tracked.item() == 1
+
+
+class raise_if_cuda_sync:
+    def __enter__(self):
+        self.previous_debug_mode = torch.cuda.get_sync_debug_mode()
+        torch.cuda.set_sync_debug_mode("error")
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        torch.cuda.set_sync_debug_mode(self.previous_debug_mode)
+
+
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
+def test_merge_multimodal_embeddings_no_sync():
+    inputs_embeds = torch.zeros(
+        [5, 10], dtype=torch.bfloat16, device=f"{DEVICE_TYPE}:0"
+    )
+    multimodal_embeddings = [
+        torch.ones([3, 10], dtype=torch.bfloat16, device=f"{DEVICE_TYPE}:0")
+    ]
+    is_multimodal = torch.tensor([True, False, True, True, False], device="cpu")
+    with raise_if_cuda_sync():
+        _merge_multimodal_embeddings(
+            inputs_embeds, multimodal_embeddings, is_multimodal
+        )
diff --git a/tests/models/utils.py b/tests/models/utils.py
index 4830f18dccf5..a5d1844a3071 100644
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import warnings
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 from typing import Any
 
@@ -277,7 +277,7 @@ def build_model_context(
     dtype: ModelDType = "auto",
     model_config_kwargs: dict[str, Any] | None = None,
     mm_processor_kwargs: dict[str, Any] | None = None,
-    limit_mm_per_prompt: dict[str, int] | None = None,
+    limit_mm_per_prompt: Mapping[str, int | Mapping[str, int]] | None = None,
     mm_processor_cache_gb: int = 0,
 ):
     """Creates an InputProcessingContext for a given model.
@@ -300,7 +300,10 @@ def build_model_context(
     )
 
     model_config_kwargs = model_config_kwargs or {}
-    limit_mm_per_prompt = limit_mm_per_prompt or {}
+    limit_mm_per_prompt = {
+        modality: dict(limit) if isinstance(limit, Mapping) else limit
+        for modality, limit in (limit_mm_per_prompt or {}).items()
+    }
     model_config = ModelConfig(
         model_id,
         runner=runner,
@@ -375,6 +378,7 @@ def softmax(data):
 @dataclass
 class ModelInfo:
     name: str
+    revision: str | None = None
     architecture: str = ""
     dtype: str = "auto"
     max_model_len: int | None = None
@@ -447,9 +451,16 @@ def dummy_hf_overrides(
     Dummy HF overrides function used to create dummy model
     with only minimum nums of layer.
     """
-    hf_config.update(exist_overrides or {})
+    # Copy because this helper is called more than once
+    # while loading config, and we `.pop()`
+    exist_overrides = (exist_overrides or {}).copy()
+    text_config_override = exist_overrides.pop("text_config", None)
+    hf_config.update(exist_overrides)
 
     text_config = hf_config.get_text_config()
+    if text_config_override is not None:
+        # multimodal test models may override *some* text-model fields
+        text_config.update(text_config_override)
 
     # Ensure at least 2 expert per group
     # Since `grouped_topk` assumes top-2
@@ -468,7 +479,16 @@ def dummy_hf_overrides(
     else:
         # Use minimal layers for testing
         num_layers = 1
-        num_hidden_layers = 3 if model_arch == "Gemma3nForConditionalGeneration" else 1
+        num_hidden_layers = (
+            3
+            if model_arch
+            in (
+                "Gemma3nForConditionalGeneration",
+                "Gemma4ForCausalLM",
+                "Gemma4ForConditionalGeneration",
+            )
+            else 1
+        )
 
     update_dict = {
         "num_layers": num_layers,
@@ -506,6 +526,17 @@ class DummyConfig:
 
     text_config.update(update_dict)
 
+    # Update n_layers and moe configs for Moondream3 model
+    if model_arch in ("Moondream3ForCausalLM", "HfMoondream"):
+        text_config.update(
+            {
+                "n_layers": num_hidden_layers,
+                "moe_num_experts": num_experts,
+                "moe_experts_per_token": 2,
+                "moe_start_layer": num_hidden_layers,
+            }
+        )
+
     if hasattr(hf_config, "vision_config"):
         hf_config.vision_config.update(
             {
@@ -514,6 +545,9 @@ class DummyConfig:
             }
         )
 
+        if model_arch in ("Moondream3ForCausalLM", "HfMoondream"):
+            hf_config.vision_config.update({"enc_n_layers": 1})
+
     # e.g.: ibm-granite/granite-speech-3.3-2b
     if hasattr(hf_config, "encoder_config"):
         hf_config.encoder_config.update(
diff --git a/tests/multimodal/media/test_audio.py b/tests/multimodal/media/test_audio.py
index 4361066ab885..3729e71f24e7 100644
--- a/tests/multimodal/media/test_audio.py
+++ b/tests/multimodal/media/test_audio.py
@@ -3,12 +3,12 @@
 from pathlib import Path
 from unittest.mock import patch
 
-import librosa
 import numpy as np
 import pybase64 as base64
 import pytest
 
 from vllm.multimodal.media import AudioMediaIO
+from vllm.multimodal.media.audio import load_audio
 
 from ...conftest import AudioTestAssets
 
@@ -73,6 +73,6 @@ def test_audio_media_io_from_video(video_assets):
     video_path = video_assets[0].video_path
     with open(video_path, "rb") as f:
         audio, sr = audio_io.load_bytes(f.read())
-    audio_ref, sr_ref = librosa.load(video_path, sr=None)
+    audio_ref, sr_ref = load_audio(video_path, sr=None)
     assert sr == sr_ref
     np.testing.assert_allclose(audio_ref, audio, atol=1e-4)
diff --git a/tests/multimodal/media/test_connector.py b/tests/multimodal/media/test_connector.py
index c771cc9a3fdf..b78d24d189f8 100644
--- a/tests/multimodal/media/test_connector.py
+++ b/tests/multimodal/media/test_connector.py
@@ -4,6 +4,8 @@
 import asyncio
 import mimetypes
 import os
+import shutil
+import time
 from tempfile import NamedTemporaryFile, TemporaryDirectory
 
 import aiohttp
@@ -375,3 +377,113 @@ async def test_ssrf_bypass_backslash_disallowed_domain():
 
     with pytest.raises(ValueError, match="allowed domains"):
         await connector.fetch_image_async(bypass_url)
+
+
+def _make_cached_connector(cache_dir, *, max_mb=10, ttl_hours=24):
+    """Create a MediaConnector with caching enabled via monkeypatched internals.
+
+    We bypass __init__'s env-var path and wire up the cache fields directly
+    so tests don't depend on environment variables. URLs in these tests are
+    only used as cache keys (hashed to derive filenames); no HTTP requests
+    are made.
+    """
+    connector = MediaConnector()
+    connector._media_cache_dir = cache_dir
+    connector._media_cache_max_bytes = max_mb * 1024 * 1024
+    connector._media_cache_ttl_secs = ttl_hours * 3600
+    return connector
+
+
+def test_cache_put_and_get():
+    """Basic round-trip: put bytes, get them back."""
+    with TemporaryDirectory() as cache_dir:
+        connector = _make_cached_connector(cache_dir)
+        url = "https://example.com/image.png"
+        data = b"fake-image-bytes"
+
+        connector._put_cached_bytes(url, data)
+        cached = connector._get_cached_bytes(url)
+        assert cached == data
+
+
+def test_cache_ttl_expiry():
+    """Entries older than TTL are evicted on read."""
+    with TemporaryDirectory() as cache_dir:
+        connector = _make_cached_connector(cache_dir, ttl_hours=24)
+        url = "https://example.com/old.png"
+        data = b"old-data"
+
+        connector._put_cached_bytes(url, data)
+
+        # Backdate the file's mtime so it appears expired
+        cache_path = connector._media_cache_path(url)
+        expired_time = time.time() - (25 * 3600)  # 25 hours ago
+        os.utime(cache_path, (expired_time, expired_time))
+
+        assert connector._get_cached_bytes(url) is None
+        assert not cache_path.exists()
+
+
+def test_cache_lru_eviction():
+    """Oldest entries are evicted when cache exceeds size budget."""
+    with TemporaryDirectory() as cache_dir:
+        # Set a very small max size: 100 bytes
+        connector = _make_cached_connector(cache_dir, max_mb=0)
+        connector._media_cache_max_bytes = 100
+
+        # Write three 50-byte entries (total 150 > 100 budget)
+        urls = [f"https://example.com/{i}.png" for i in range(3)]
+        for i, url in enumerate(urls):
+            connector._put_cached_bytes(url, b"x" * 50)
+            # Stagger mtime so eviction order is deterministic
+            path = connector._media_cache_path(url)
+            os.utime(path, (time.time() + i, time.time() + i))
+
+        # The oldest entry (urls[0]) should have been evicted
+        assert connector._get_cached_bytes(urls[0]) is None
+        # The newest entries should still be present
+        assert connector._get_cached_bytes(urls[2]) == b"x" * 50
+
+
+def test_cache_ttl_eviction_during_write():
+    """_maybe_evict removes expired files even if under size budget."""
+    with TemporaryDirectory() as cache_dir:
+        connector = _make_cached_connector(cache_dir, ttl_hours=1)
+        url_old = "https://example.com/stale.png"
+        url_new = "https://example.com/fresh.png"
+
+        connector._put_cached_bytes(url_old, b"stale")
+        # Backdate old entry past TTL
+        old_path = connector._media_cache_path(url_old)
+        expired_time = time.time() - (2 * 3600)
+        os.utime(old_path, (expired_time, expired_time))
+
+        # Writing a new entry triggers _maybe_evict
+        connector._put_cached_bytes(url_new, b"fresh")
+
+        assert not old_path.exists()
+        assert connector._get_cached_bytes(url_new) == b"fresh"
+
+
+def test_put_cached_bytes_missing_dir():
+    """_put_cached_bytes does not crash when the cache dir disappears."""
+    with TemporaryDirectory() as cache_dir:
+        connector = _make_cached_connector(cache_dir)
+        # Remove the directory to simulate it disappearing at runtime
+        shutil.rmtree(cache_dir)
+
+        # Should not raise (graceful degradation)
+        connector._put_cached_bytes("https://example.com/x.png", b"data")
+
+
+def test_get_cached_bytes_file_deleted_before_read():
+    """_get_cached_bytes returns None if the file vanishes mid-read."""
+    with TemporaryDirectory() as cache_dir:
+        connector = _make_cached_connector(cache_dir)
+        url = "https://example.com/vanish.png"
+
+        connector._put_cached_bytes(url, b"data")
+        # Delete the file to simulate concurrent eviction
+        connector._media_cache_path(url).unlink()
+
+        assert connector._get_cached_bytes(url) is None
diff --git a/tests/multimodal/media/test_image.py b/tests/multimodal/media/test_image.py
index 065a40d68e35..65196d7805cc 100644
--- a/tests/multimodal/media/test_image.py
+++ b/tests/multimodal/media/test_image.py
@@ -131,3 +131,77 @@ def test_image_media_io_rgba_background_color_validation():
     ImageMediaIO(rgba_background_color=(0, 0, 0))  # Should not raise
     ImageMediaIO(rgba_background_color=[255, 255, 255])  # Should not raise
     ImageMediaIO(rgba_background_color=(128, 128, 128))  # Should not raise
+
+
+def test_image_media_io_load_bytes(tmp_path):
+    """Test load_bytes with valid and invalid image data."""
+    # Save a valid RGB image to use as source bytes
+    valid_image = Image.new("RGB", (8, 8), (100, 150, 200))
+    valid_path = tmp_path / "valid.png"
+    valid_image.save(valid_path)
+
+    valid_data = valid_path.read_bytes()
+
+    # Test 1: Valid image bytes load successfully and are fully decoded
+    image_io = ImageMediaIO()
+    result = image_io.load_bytes(valid_data)
+
+    # Check the returned media is a properly loaded image
+    assert isinstance(result.media, Image.Image)
+    assert result.media.size == (8, 8)
+    assert result.media.getpixel((0, 0)) == (100, 150, 200)
+
+    # Test 2: Garbage bytes raise ValueError
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_bytes(b"not an image")
+
+    # Test 3: Truncated PNG header raises ValueError
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 10)
+
+    # Test 4: Real PNG truncated mid-stream raises ValueError
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_bytes(valid_data[: len(valid_data) // 2])
+
+    # Test 5: Empty bytes raise ValueError
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_bytes(b"")
+
+
+def test_image_media_io_load_file(tmp_path):
+    """Test load_file with valid and invalid image files."""
+    # Save a valid RGB image to disk
+    valid_image = Image.new("RGB", (4, 4), (10, 20, 30))
+    valid_path = tmp_path / "valid.png"
+    valid_image.save(valid_path)
+
+    # Test 1: Valid image file loads successfully and is fully decoded
+    image_io = ImageMediaIO()
+    result = image_io.load_file(valid_path)
+
+    # Check the returned media is a properly loaded image
+    assert isinstance(result.media, Image.Image)
+    assert result.media.size == (4, 4)
+    assert result.media.getpixel((0, 0)) == (10, 20, 30)
+
+    # Test 2: File with garbage content raises ValueError
+    bad_file = tmp_path / "bad.png"
+    bad_file.write_bytes(b"this is not an image")
+
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_file(bad_file)
+
+    # Test 3: File with truncated PNG header raises ValueError
+    truncated_file = tmp_path / "truncated.png"
+    truncated_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 10)
+
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_file(truncated_file)
+
+    # Test 4: Real PNG file truncated mid-stream raises ValueError
+    valid_data = valid_path.read_bytes()
+    truncated_real_file = tmp_path / "truncated_real.png"
+    truncated_real_file.write_bytes(valid_data[: len(valid_data) // 2])
+
+    with pytest.raises(ValueError, match="Failed to load image"):
+        image_io.load_file(truncated_real_file)
diff --git a/tests/multimodal/media/test_video.py b/tests/multimodal/media/test_video.py
index a1223ebc07e2..73283ba8c331 100644
--- a/tests/multimodal/media/test_video.py
+++ b/tests/multimodal/media/test_video.py
@@ -239,6 +239,17 @@ def test_video_media_io_backend_env_var_fallback(monkeypatch: pytest.MonkeyPatch
         assert metadata_missing["video_backend"] == "test_video_backend_override_2"
 
 
+def _make_jpeg_b64_frames(n: int, width: int = 8, height: int = 8) -> list[str]:
+    """Return *n* tiny base64-encoded JPEG frames."""
+    frames: list[str] = []
+    for i in range(n):
+        img = Image.new("RGB", (width, height), color=(i % 256, 0, 0))
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG")
+        frames.append(pybase64.b64encode(buf.getvalue()).decode("ascii"))
+    return frames
+
+
 def test_load_base64_jpeg_returns_metadata():
     """Regression test: load_base64 with video/jpeg must return metadata.
 
@@ -248,16 +259,8 @@ def test_load_base64_jpeg_returns_metadata():
     """
 
     num_test_frames = 3
-    frame_width, frame_height = 8, 8
-
-    # Build a few tiny JPEG frames and base64-encode them
-    b64_frames = []
-    for i in range(num_test_frames):
-        img = Image.new("RGB", (frame_width, frame_height), color=(i * 80, 0, 0))
-        buf = io.BytesIO()
-        img.save(buf, format="JPEG")
-        b64_frames.append(pybase64.b64encode(buf.getvalue()).decode("ascii"))
 
+    b64_frames = _make_jpeg_b64_frames(num_test_frames)
     data = ",".join(b64_frames)
 
     imageio = ImageMediaIO()
@@ -287,3 +290,52 @@ def test_load_base64_jpeg_returns_metadata():
     # Default fps=1 → duration == num_frames
     assert metadata["fps"] == 1.0
     assert metadata["duration"] == float(num_test_frames)
+
+
+def test_load_base64_jpeg_enforces_num_frames_limit():
+    """Frames beyond num_frames must be truncated in the video/jpeg path.
+
+    Without the limit an attacker can send thousands of base64 JPEG frames
+    in a single request and exhaust server memory (OOM).
+    """
+    num_frames_limit = 4
+    sent_frames = 20
+
+    b64_frames = _make_jpeg_b64_frames(sent_frames)
+    data = ",".join(b64_frames)
+
+    imageio = ImageMediaIO()
+    videoio = VideoMediaIO(imageio, num_frames=num_frames_limit)
+    frames, metadata = videoio.load_base64("video/jpeg", data)
+
+    assert frames.shape[0] == num_frames_limit
+    assert metadata["total_num_frames"] == num_frames_limit
+    assert metadata["frames_indices"] == list(range(num_frames_limit))
+
+
+def test_load_base64_jpeg_no_limit_when_num_frames_negative():
+    """When num_frames is -1, all frames should be loaded without truncation."""
+    sent_frames = 10
+
+    b64_frames = _make_jpeg_b64_frames(sent_frames)
+    data = ",".join(b64_frames)
+
+    imageio = ImageMediaIO()
+    videoio = VideoMediaIO(imageio, num_frames=-1)
+    frames, metadata = videoio.load_base64("video/jpeg", data)
+
+    assert frames.shape[0] == sent_frames
+    assert metadata["total_num_frames"] == sent_frames
+    assert metadata["frames_indices"] == list(range(sent_frames))
+
+
+def test_load_base64_jpeg_raises_on_zero_num_frames():
+    """num_frames=0 is invalid and should raise ValueError."""
+    b64_frames = _make_jpeg_b64_frames(3)
+    data = ",".join(b64_frames)
+
+    imageio = ImageMediaIO()
+    videoio = VideoMediaIO(imageio, num_frames=0)
+
+    with pytest.raises(ValueError, match="num_frames must be greater than 0 or -1"):
+        videoio.load_base64("video/jpeg", data)
diff --git a/tests/multimodal/test_audio.py b/tests/multimodal/test_audio.py
index 0bc8988452f0..7e6a66946a32 100644
--- a/tests/multimodal/test_audio.py
+++ b/tests/multimodal/test_audio.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # test_audio.py
+import math
 from unittest.mock import patch
 
 import numpy as np
@@ -45,7 +46,6 @@ def test_resample_audio_scipy(dummy_audio):
     assert np.all(out_same == dummy_audio)
 
 
-@pytest.mark.xfail(reason="resample_audio_scipy is buggy for non-integer ratios")
 def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
     out = resample_audio_scipy(dummy_audio, orig_sr=5, target_sr=3)
 
@@ -56,6 +56,26 @@ def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
     assert np.isfinite(out).all()
 
 
+def test_resample_audio_scipy_non_divisible_sample_rates():
+    audio = np.arange(441, dtype=float)
+    out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
+
+    expected_len = math.ceil(len(audio) * 16000 / 44100)
+    assert len(out) == expected_len
+
+    assert isinstance(out, np.ndarray)
+    assert np.isfinite(out).all()
+
+
+def test_resample_audio_scipy_resamples_last_axis_for_multichannel():
+    audio = np.arange(2 * 441, dtype=float).reshape(2, 441)
+    out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
+
+    expected_len = math.ceil(audio.shape[-1] * 16000 / 44100)
+    assert out.shape == (2, expected_len)
+    assert np.isfinite(out).all()
+
+
 def test_audio_resampler_pyav_calls_resample(dummy_audio):
     resampler = AudioResampler(target_sr=22050, method="pyav")
     with patch("vllm.multimodal.audio.resample_audio_pyav") as mock_resample:
diff --git a/tests/multimodal/test_cache.py b/tests/multimodal/test_cache.py
index d01b94ac9af2..30df1d831a0a 100644
--- a/tests/multimodal/test_cache.py
+++ b/tests/multimodal/test_cache.py
@@ -549,3 +549,42 @@ def test_processor_cache_shared_across_loras():
 
     receiver_cache.get_and_update_features([feature_lora_b])
     assert feature_lora_b.data == item_data
+
+
+_SLEEP_VISION_PROMPT = (
+    "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
+    "\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>"
+    "What is in the image?<|im_end|>\n"
+    "<|im_start|>assistant\n"
+)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available(),
+    reason="sleep mode regression requires a CUDA GPU",
+)
+def test_sleep_wake_preserves_mm_cache_consistency():
+    """Regression for vllm-project/vllm#42995."""
+    from vllm import LLM, SamplingParams
+    from vllm.assets.image import ImageAsset
+
+    image = ImageAsset("stop_sign").pil_image
+    prompt = {
+        "prompt": _SLEEP_VISION_PROMPT,
+        "multi_modal_data": {"image": image},
+    }
+    sampling_params = SamplingParams(temperature=0, max_tokens=8)
+
+    llm = LLM(
+        model="Qwen/Qwen2-VL-2B-Instruct",
+        enable_sleep_mode=True,
+        enforce_eager=True,
+        gpu_memory_utilization=0.5,
+        max_model_len=2048,
+    )
+
+    llm.generate([prompt], sampling_params)
+    llm.sleep(level=1)
+    llm.wake_up()
+    output2 = llm.generate([prompt], sampling_params)
+    assert output2[0].outputs[0].text
diff --git a/tests/multimodal/test_inputs.py b/tests/multimodal/test_inputs.py
index d6bdf76a6f79..7752a543f429 100644
--- a/tests/multimodal/test_inputs.py
+++ b/tests/multimodal/test_inputs.py
@@ -26,11 +26,8 @@ def test_placeholder_range_get_num_embeds(is_embed, expected):
     "is_embed,expected",
     [
         (None, None),
-        (
-            torch.tensor([False, True, False, True, True]),
-            torch.tensor([0, 1, 1, 2, 3]),
-        ),
-        (torch.tensor([True, True, True]), torch.tensor([1, 2, 3])),
+        (torch.tensor([False, True, False, True, True]), [0, 1, 1, 2, 3]),
+        (torch.tensor([True, True, True]), [1, 2, 3]),
     ],
 )
 def test_placeholder_range_embeds_cumsum(is_embed, expected):
@@ -41,6 +38,6 @@ def test_placeholder_range_embeds_cumsum(is_embed, expected):
         assert pr.embeds_cumsum is None
         return
 
-    assert torch.equal(pr.embeds_cumsum, expected)
+    assert pr.embeds_cumsum == expected
     # cached_property should return the same object on repeated access
     assert pr.embeds_cumsum is pr.embeds_cumsum
diff --git a/tests/multimodal/test_registry.py b/tests/multimodal/test_registry.py
index 3b01bda7f54c..7ee83cc4f99b 100644
--- a/tests/multimodal/test_registry.py
+++ b/tests/multimodal/test_registry.py
@@ -5,6 +5,8 @@
 Qwen2.5-VL visual component loading behavior.
 """
 
+from types import SimpleNamespace
+
 import pytest
 
 from vllm.multimodal import MULTIMODAL_REGISTRY
@@ -32,3 +34,17 @@ def test_supports_multimodal_inputs(model_id, limit_mm_per_prompt, expected):
         limit_mm_per_prompt=limit_mm_per_prompt,
     )
     assert MULTIMODAL_REGISTRY.supports_multimodal_inputs(ctx.model_config) is expected
+
+
+def test_create_processor_error_uses_served_model_name():
+    model_config = SimpleNamespace(
+        is_multimodal_model=False,
+        model="/path/to/model/weights",
+        served_model_name="friendly-model-name",
+    )
+
+    with pytest.raises(
+        ValueError,
+        match="friendly-model-name is not a multimodal model",
+    ):
+        MULTIMODAL_REGISTRY.create_processor(model_config)
diff --git a/tests/multimodal/test_video.py b/tests/multimodal/test_video.py
index 3ece384348bc..7c024052a439 100644
--- a/tests/multimodal/test_video.py
+++ b/tests/multimodal/test_video.py
@@ -13,7 +13,7 @@
     VideoLoader,
 )
 
-from .utils import create_video_from_image
+from .utils import create_long_gop_video, create_video_from_image
 
 pytestmark = pytest.mark.cpu_test
 
@@ -71,7 +71,9 @@ def test_video_backend_handles_broken_frames(monkeypatch: pytest.MonkeyPatch):
             video_data = f.read()
 
         loader = VIDEO_LOADER_REGISTRY.load("opencv")
-        frames, metadata = loader.load_bytes(video_data, num_frames=-1)
+        frames, metadata = loader.load_bytes(
+            video_data, num_frames=-1, backend="opencv"
+        )
 
         # Verify metadata consistency:
         # frames_indices must match actual loaded frames
@@ -158,12 +160,12 @@ def release(self):
 
         # Test WITHOUT recovery - should have fewer frames due to failures
         frames_no_recovery, meta_no = loader.load_bytes(
-            video_data, num_frames=8, frame_recovery=False
+            video_data, num_frames=8, frame_recovery=False, backend="opencv"
         )
 
         # Test WITH recovery - should recover using next valid frames
         frames_with_recovery, meta_yes = loader.load_bytes(
-            video_data, num_frames=8, frame_recovery=True
+            video_data, num_frames=8, frame_recovery=True, backend="opencv"
         )
 
         # With recovery should have MORE frames than without
@@ -214,12 +216,12 @@ def test_video_recovery_with_corrupted_file(monkeypatch: pytest.MonkeyPatch):
 
         # Test without recovery - frame 17 will be skipped
         frames_no_recovery, meta_no_recovery = loader.load_bytes(
-            video_data, num_frames=8, frame_recovery=False
+            video_data, num_frames=8, frame_recovery=False, backend="opencv"
         )
 
         # Test with recovery - frame 18 should fill in for frame 17
         frames_with_recovery, meta_with_recovery = loader.load_bytes(
-            video_data, num_frames=8, frame_recovery=True
+            video_data, num_frames=8, frame_recovery=True, backend="opencv"
         )
 
         # Verify metadata consistency for both modes
@@ -271,12 +273,16 @@ def test_video_recovery_dynamic_backend(monkeypatch: pytest.MonkeyPatch):
 
         # Test without recovery
         frames_no_recovery, meta_no = loader.load_bytes(
-            video_data, fps=2, max_duration=10, frame_recovery=False
+            video_data,
+            fps=2,
+            max_duration=10,
+            frame_recovery=False,
+            backend="opencv",
         )
 
         # Test with frame_recovery enabled
         frames_with_recovery, meta_with = loader.load_bytes(
-            video_data, fps=2, max_duration=10, frame_recovery=True
+            video_data, fps=2, max_duration=10, frame_recovery=True, backend="opencv"
         )
 
         # Verify basic properties
@@ -310,27 +316,124 @@ def dummy_video_path(tmp_path):
     return video_path
 
 
+# ============================================================================
+# PyAV Backend Tests
+# ============================================================================
+
+
+def test_pyav_backend_loads_frames(dummy_video_path, monkeypatch: pytest.MonkeyPatch):
+    """Test that the pyav codec backend can load frames from a valid video."""
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_VIDEO_LOADER_BACKEND", "opencv")
+
+        with open(dummy_video_path, "rb") as f:
+            video_data = f.read()
+
+        loader = VIDEO_LOADER_REGISTRY.load("opencv")
+        frames, metadata = loader.load_bytes(video_data, num_frames=8, backend="pyav")
+
+        assert frames.ndim == 4
+        assert frames.shape[3] == 3  # RGB
+        assert frames.shape[0] == 8
+        assert frames.shape[0] == len(metadata["frames_indices"])
+        assert metadata["video_backend"] == "pyav"
+        assert "total_num_frames" in metadata
+        assert "fps" in metadata
+        assert "duration" in metadata
+
+
+def test_pyav_dynamic_backend_loads_frames(
+    dummy_video_path, monkeypatch: pytest.MonkeyPatch
+):
+    """Test that the pyav codec with dynamic sampling can load frames."""
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_VIDEO_LOADER_BACKEND", "opencv_dynamic")
+
+        with open(dummy_video_path, "rb") as f:
+            video_data = f.read()
+
+        loader = VIDEO_LOADER_REGISTRY.load("opencv_dynamic")
+        frames, metadata = loader.load_bytes(
+            video_data, fps=2, max_duration=10, backend="pyav"
+        )
+
+        assert frames.ndim == 4
+        assert frames.shape[3] == 3  # RGB
+        assert frames.shape[0] > 0
+        assert frames.shape[0] == len(metadata["frames_indices"])
+        assert metadata["video_backend"] == "pyav_dynamic"
+
+
+def test_pyav_backend_returns_target_frames_not_keyframes():
+    """Regression test: PyAV must decode forward past the seek keyframe.
+
+    container.seek() snaps backward to the nearest keyframe. With a long GOP
+    (here: one keyframe at frame 0), a decoder that does not advance forward
+    to the target PTS collapses every sampled slot onto the keyframe. This
+    test encodes a per-frame marker on the green channel and verifies the
+    returned frames are distinct, ordered, and match the requested indices.
+    """
+    num_frames = 50
+    num_sampled = 4
+    height, width = 64, 64
+
+    video_bytes = create_long_gop_video(
+        num_frames=num_frames, width=width, height=height
+    )
+
+    loader = VIDEO_LOADER_REGISTRY.load("opencv")
+    frames, metadata = loader.load_bytes(
+        video_bytes, num_frames=num_sampled, backend="pyav"
+    )
+    assert frames.shape == (num_sampled, height, width, 3)
+
+    requested = list(metadata["frames_indices"])
+    assert len(requested) == num_sampled
+
+    actual = [int(f[height // 2, width // 2, 1]) for f in frames]
+
+    assert len(set(actual)) == num_sampled, (
+        f"PyAV returned only {len(set(actual))} distinct frames for "
+        f"{num_sampled} requested indices: markers={actual}, "
+        f"requested={requested}. Keyframe-snap regression."
+    )
+
+    assert actual == sorted(actual), f"Returned frames out of order: markers={actual}"
+
+    for marker, want_idx in zip(actual, requested):
+        assert abs(marker - want_idx) <= 10, (
+            f"Frame mismatch: requested index {want_idx}, "
+            f"got marker {marker} (tolerance ±10)"
+        )
+
+
 @pytest.mark.parametrize(
-    "backend, kwargs, expected_num_frames",
+    "loader_key, kwargs, expected_num_frames",
     [
-        # opencv: num_frames directly controls count
-        pytest.param("opencv", {"num_frames": 32}, 32, id="opencv-num_frames"),
-        pytest.param("opencv", {"fps": 2}, 120, id="opencv-fps"),
+        # uniform sampling + opencv codec
         pytest.param(
             "opencv",
-            {"num_frames": 500, "fps": 2},
+            {"num_frames": 32, "backend": "opencv"},
+            32,
+            id="opencv-num_frames",
+        ),
+        pytest.param("opencv", {"fps": 2, "backend": "opencv"}, 120, id="opencv-fps"),
+        pytest.param(
+            "opencv",
+            {"num_frames": 500, "fps": 2, "backend": "opencv"},
             120,
             id="opencv-num_frames_wins_fps",
         ),
+        # dynamic sampling + opencv codec
         pytest.param(
             "opencv_dynamic",
-            {"fps": 1, "max_duration": 60},
+            {"fps": 1, "max_duration": 60, "backend": "opencv"},
             60,
             id="opencv_dynamic-within_max_duration",
         ),
         pytest.param(
             "opencv_dynamic",
-            {"fps": 2, "max_duration": 30},
+            {"fps": 2, "max_duration": 30, "backend": "opencv"},
             60,
             id="opencv_dynamic-exceeds_max_duration",
         ),
@@ -349,18 +452,45 @@ def dummy_video_path(tmp_path):
             119,
             id="molmo2-fps",
         ),
+        # uniform sampling + pyav codec (same frame counts as opencv)
+        pytest.param(
+            "opencv",
+            {"num_frames": 32, "backend": "pyav"},
+            32,
+            id="pyav-num_frames",
+        ),
+        pytest.param("opencv", {"fps": 2, "backend": "pyav"}, 120, id="pyav-fps"),
+        pytest.param(
+            "opencv",
+            {"num_frames": 500, "fps": 2, "backend": "pyav"},
+            120,
+            id="pyav-num_frames_wins_fps",
+        ),
+        # dynamic sampling + pyav codec
+        pytest.param(
+            "opencv_dynamic",
+            {"fps": 1, "max_duration": 60, "backend": "pyav"},
+            60,
+            id="pyav_dynamic-within_max_duration",
+        ),
+        pytest.param(
+            "opencv_dynamic",
+            {"fps": 2, "max_duration": 30, "backend": "pyav"},
+            60,
+            id="pyav_dynamic-exceeds_max_duration",
+        ),
     ],
 )
 def test_video_loader_frames_sampling(
     dummy_video_path,
     monkeypatch: pytest.MonkeyPatch,
-    backend: str,
+    loader_key: str,
     kwargs: dict,
     expected_num_frames: int,
 ):
     """Test video loader frames sampling functionality."""
-    monkeypatch.setenv("VLLM_VIDEO_LOADER_BACKEND", backend)
-    loader = VIDEO_LOADER_REGISTRY.load(backend)
+    monkeypatch.setenv("VLLM_VIDEO_LOADER_BACKEND", loader_key)
+    loader = VIDEO_LOADER_REGISTRY.load(loader_key)
 
     with open(dummy_video_path, "rb") as f:
         long_video_bytes = f.read()
diff --git a/tests/multimodal/utils.py b/tests/multimodal/utils.py
index 485bde939f69..32f3ec0e4233 100644
--- a/tests/multimodal/utils.py
+++ b/tests/multimodal/utils.py
@@ -66,6 +66,43 @@ def create_video_from_image(
     return video_path
 
 
+def create_long_gop_video(
+    num_frames: int = 50,
+    fps: int = 30,
+    width: int = 64,
+    height: int = 64,
+) -> bytes:
+    """Encode an H.264 clip with one keyframe and green-channel = frame index.
+
+    The marker lets a test recover which frame the decoder actually returned,
+    independent of any metadata label.
+    """
+    import io
+
+    import av
+
+    buf = io.BytesIO()
+    with av.open(buf, mode="w", format="mp4") as container:
+        stream = container.add_stream("h264", rate=fps)
+        stream.width = width
+        stream.height = height
+        stream.pix_fmt = "yuv420p"
+        stream.codec_context.gop_size = num_frames
+        stream.codec_context.max_b_frames = 0
+        stream.codec_context.options = {
+            "x264-params": (f"scenecut=0:keyint={num_frames}:min-keyint={num_frames}")
+        }
+        for i in range(num_frames):
+            img = np.zeros((height, width, 3), dtype=np.uint8)
+            img[:, :, 1] = i
+            frame = av.VideoFrame.from_ndarray(img, format="rgb24")
+            for packet in stream.encode(frame):
+                container.mux(packet)
+        for packet in stream.encode():
+            container.mux(packet)
+    return buf.getvalue()
+
+
 def cosine_similarity(A: npt.NDArray, B: npt.NDArray, axis: int = -1) -> npt.NDArray:
     """Compute cosine similarity between two vectors."""
     return np.sum(A * B, axis=axis) / (
diff --git a/tests/quantization/test_mi3xx_moe.py b/tests/parser/__init__.py
similarity index 57%
rename from tests/quantization/test_mi3xx_moe.py
rename to tests/parser/__init__.py
index 2f8dfde68477..208f01a7cb5e 100644
--- a/tests/quantization/test_mi3xx_moe.py
+++ b/tests/parser/__init__.py
@@ -1,6 +1,2 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-
-def test_mi3xx_moe():
-    print("TODO: add tests for Mi3xx MoE quantization")
diff --git a/tests/parser/test_streaming.py b/tests/parser/test_streaming.py
new file mode 100644
index 000000000000..c4409117ad91
--- /dev/null
+++ b/tests/parser/test_streaming.py
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.parser.abstract_parser import _WrappedParser
+from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
+from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
+
+
+class ThinkReasoningParser(BaseThinkingReasoningParser):
+    @property
+    def start_token(self) -> str:
+        return "<think>"
+
+    @property
+    def end_token(self) -> str:
+        return "</think>"
+
+
+MODEL_OUTPUT = (
+    "<think>let me think about this</think>"
+    '<tool_call>\n{"name": "get_weather", '
+    '"arguments": {"city": "Dallas"}}\n</tool_call>'
+)
+
+
+@pytest.fixture(scope="module")
+def tokenizer():
+    from vllm.tokenizers import get_tokenizer
+
+    return get_tokenizer("Qwen/Qwen3-32B")
+
+
+@pytest.fixture
+def request_obj():
+    return ChatCompletionRequest(
+        model="test-model",
+        messages=[{"role": "user", "content": "hi"}],
+    )
+
+
+def make_parser(tokenizer, reasoning=False, tool=False):
+    _WrappedParser.reasoning_parser_cls = ThinkReasoningParser if reasoning else None
+    _WrappedParser.tool_parser_cls = Hermes2ProToolParser if tool else None
+    return _WrappedParser(tokenizer)
+
+
+def stream_text(parser, tokenizer, text, request, prompt_token_ids=None):
+    token_ids = tokenizer.encode(text, add_special_tokens=False)
+    results: list[DeltaMessage | None] = []
+    for tid in token_ids:
+        delta_text = tokenizer.decode([tid])
+        result = parser.parse_delta(
+            delta_text, [tid], request, prompt_token_ids=prompt_token_ids
+        )
+        prompt_token_ids = None
+        results.append(result)
+    return results
+
+
+def collect_fields(results):
+    all_reasoning = "".join(r.reasoning for r in results if r and r.reasoning)
+    all_content = "".join(r.content for r in results if r and r.content)
+    all_tool_calls = [tc for r in results if r and r.tool_calls for tc in r.tool_calls]
+    return all_reasoning, all_content, all_tool_calls
+
+
+def test_parse_delta_neither_parser(tokenizer, request_obj):
+    parser = make_parser(tokenizer, reasoning=False, tool=False)
+    results = stream_text(
+        parser, tokenizer, MODEL_OUTPUT, request_obj, prompt_token_ids=[]
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert reasoning == ""
+    assert len(tool_calls) == 0
+    assert "<think>" in content
+    assert "let me think about this" in content
+    assert "<tool_call>" in content
+    assert "get_weather" in content
+
+
+def test_parse_delta_tool_parser_only(tokenizer, request_obj):
+    parser = make_parser(tokenizer, reasoning=False, tool=True)
+    results = stream_text(
+        parser, tokenizer, MODEL_OUTPUT, request_obj, prompt_token_ids=[]
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert reasoning == ""
+    assert "<think>" in content
+    assert "let me think about this" in content
+    assert "</think>" in content
+
+    assert len(tool_calls) > 0
+    assert tool_calls[0].function.name == "get_weather"
+    tool_args = "".join(
+        tc.function.arguments for tc in tool_calls if tc.function.arguments
+    )
+    assert json.loads(tool_args) == {"city": "Dallas"}
+
+
+def test_parse_delta_reasoning_parser_only(tokenizer, request_obj):
+    parser = make_parser(tokenizer, reasoning=True, tool=False)
+    results = stream_text(
+        parser, tokenizer, MODEL_OUTPUT, request_obj, prompt_token_ids=[]
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert "let me think about this" in reasoning
+    assert len(tool_calls) == 0
+    assert "<tool_call>" in content
+    assert "get_weather" in content
+    assert "</tool_call>" in content
+
+
+def test_parse_delta_both_parsers(tokenizer, request_obj):
+    parser = make_parser(tokenizer, reasoning=True, tool=True)
+    results = stream_text(
+        parser, tokenizer, MODEL_OUTPUT, request_obj, prompt_token_ids=[]
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert "let me think about this" in reasoning
+    assert content == ""
+
+    assert len(tool_calls) > 0
+    assert tool_calls[0].function.name == "get_weather"
+    tool_args = "".join(
+        tc.function.arguments for tc in tool_calls if tc.function.arguments
+    )
+    assert json.loads(tool_args) == {"city": "Dallas"}
+
+
+def stream_chunks(parser, tokenizer, chunks, request_obj):
+    """Stream pre-split token-ID chunks through the parser."""
+    results: list[DeltaMessage | None] = []
+    prompt_token_ids: list[int] | None = []
+    for chunk in chunks:
+        delta_text = tokenizer.decode(chunk)
+        result = parser.parse_delta(
+            delta_text, chunk, request_obj, prompt_token_ids=prompt_token_ids
+        )
+        prompt_token_ids = None
+        results.append(result)
+    return results
+
+
+def _boundary_chunks(tokenizer, parser):
+    """Split MODEL_OUTPUT into 3 chunks that straddle the </think> boundary."""
+    token_ids = tokenizer.encode(MODEL_OUTPUT, add_special_tokens=False)
+    end_token_id = parser._reasoning_parser.end_token_id
+    end_idx = token_ids.index(end_token_id)
+    return [
+        token_ids[: end_idx - 1],
+        token_ids[end_idx - 1 : end_idx + 2],
+        token_ids[end_idx + 2 :],
+    ]
+
+
+def test_parse_delta_reasoning_not_dropped_on_boundary(tokenizer, request_obj):
+    """Regression: reasoning must not be lost when a multi-token delta
+    spans the reasoning/tool-call boundary."""
+    parser = make_parser(tokenizer, reasoning=True, tool=True)
+    chunks = _boundary_chunks(tokenizer, parser)
+    results = stream_chunks(parser, tokenizer, chunks, request_obj)
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert "think about this" in reasoning
+    assert content == ""
+    assert len(tool_calls) > 0
+    assert tool_calls[0].function.name == "get_weather"
+    tool_args = "".join(
+        tc.function.arguments for tc in tool_calls if tc.function.arguments
+    )
+    assert json.loads(tool_args) == {"city": "Dallas"}
+
+
+def test_parse_delta_reasoning_boundary_no_tool_parser(tokenizer, request_obj):
+    """When no tool parser is active, boundary-spanning chunks must still
+    preserve reasoning and pass post-</think> text as content."""
+    parser = make_parser(tokenizer, reasoning=True, tool=False)
+    chunks = _boundary_chunks(tokenizer, parser)
+    results = stream_chunks(parser, tokenizer, chunks, request_obj)
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert "think about this" in reasoning
+    assert len(tool_calls) == 0
+    assert "<tool_call>" in content
+    assert "get_weather" in content
+
+
+def test_parse_delta_reasoning_only_no_think_leak(tokenizer, request_obj):
+    """Regression: </think> must not leak into content when streaming
+    token-by-token with reasoning=True, tool=False."""
+    parser = make_parser(tokenizer, reasoning=True, tool=False)
+    results = stream_text(
+        parser, tokenizer, MODEL_OUTPUT, request_obj, prompt_token_ids=[]
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert "let me think about this" in reasoning
+    assert "</think>" not in content
+    assert "<think>" not in content
+
+
+def test_parse_delta_reasoning_only_thinking_disabled(tokenizer, request_obj):
+    """Regression test for vllm-project/vllm#40466.
+
+    When enable_thinking=False, the chat template places <think>\\n\\n</think>
+    in the prompt. The model then generates pure content (no think tokens).
+    All streaming output must go to delta.content, not delta.reasoning.
+    """
+    parser = make_parser(tokenizer, reasoning=True, tool=False)
+
+    end_token_id = parser._reasoning_parser.end_token_id
+    prompt_token_ids = [1, 2, end_token_id, 3]
+
+    content_text = "Hello! How can I assist you today?"
+    results = stream_text(
+        parser,
+        tokenizer,
+        content_text,
+        request_obj,
+        prompt_token_ids=prompt_token_ids,
+    )
+    reasoning, content, tool_calls = collect_fields(results)
+
+    assert reasoning == "", f"Expected no reasoning, got: {reasoning!r}"
+    assert "Hello" in content
+    assert "assist" in content
+    assert len(tool_calls) == 0
diff --git a/tests/plugins_tests/test_terratorch_io_processor_plugins.py b/tests/plugins_tests/test_terratorch_io_processor_plugins.py
index 34799b3c42c0..b4c84b30d2ca 100644
--- a/tests/plugins_tests/test_terratorch_io_processor_plugins.py
+++ b/tests/plugins_tests/test_terratorch_io_processor_plugins.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import importlib.util
 import io
 
 import imagehash
@@ -11,6 +12,11 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.pooling.pooling.protocol import IOProcessorResponse
 
+pytestmark = pytest.mark.skipif(
+    importlib.util.find_spec("terratorch") is None,
+    reason="terratorch unavailable while PyPI has `lightning` quarantined; see #41376",
+)
+
 models_config = {
     "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11": {
         "image_url": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/valencia_example_2024-10-26.tiff",  # noqa: E501
diff --git a/tests/quantization/test_auto_gptq.py b/tests/quantization/test_auto_gptq.py
new file mode 100644
index 000000000000..b733ee486c18
--- /dev/null
+++ b/tests/quantization/test_auto_gptq.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests that the auto_gptq quantization method works correctly.
+
+Run `pytest tests/quantization/test_auto_gptq.py -v -s`.
+"""
+
+import pytest
+import torch
+
+from tests.quantization.utils import is_quant_method_supported
+from vllm.model_executor.layers.quantization.auto_gptq import (
+    AutoGPTQConfig,
+    AutoGPTQLinearMethod,
+)
+
+PROMPT = "On the surface of Mars, we found"
+
+MODELS = [
+    "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ",
+]
+
+
+@pytest.mark.skipif(
+    not is_quant_method_supported("auto_gptq"),
+    reason="auto_gptq is not supported on this GPU type.",
+)
+@pytest.mark.parametrize("model_id", MODELS)
+def test_auto_gptq_quantization_method(vllm_runner, model_id: str, monkeypatch):
+    """Test that quantization='auto_gptq' loads and runs correctly."""
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+    with vllm_runner(
+        model_id,
+        dtype=torch.float16,
+        quantization="auto_gptq",
+        max_model_len=2048,
+        enforce_eager=True,
+    ) as llm:
+
+        def check_model(model):
+            for name, submodule in model.named_modules():
+                if name == "model.layers.0.self_attn.qkv_proj":
+                    assert isinstance(submodule.quant_method, AutoGPTQLinearMethod)
+                    break
+
+        llm.apply_model(check_model)
+
+        outputs = llm.generate_greedy([PROMPT], max_tokens=8)
+        assert outputs
+        assert len(outputs[0][1]) > 0
+
+
+def test_auto_gptq_config_get_name():
+    """Test that AutoGPTQConfig.get_name() returns 'auto_gptq'."""
+    assert AutoGPTQConfig.get_name() == "auto_gptq"
diff --git a/tests/quantization/test_blackwell_moe.py b/tests/quantization/test_blackwell_moe.py
index 3af08e0269ab..8c525149ca70 100644
--- a/tests/quantization/test_blackwell_moe.py
+++ b/tests/quantization/test_blackwell_moe.py
@@ -190,13 +190,27 @@ def test_gptoss_mxfp4bf16_moe_flashinfer(monkeypatch: pytest.MonkeyPatch):
 
 
 def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch):
-    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS", "1")
-    can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
+    can_initialize(
+        "openai/gpt-oss-20b",
+        hf_overrides=HF_OVERRIDE_TEXT,
+        extra_args=[
+            "--moe-backend",
+            "flashinfer_cutlass",
+            "--quantization-config.moe.activation",
+            "mxfp8",
+        ],
+    )
 
 
 def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
-    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
-    can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
+    can_initialize(
+        "openai/gpt-oss-20b",
+        hf_overrides=HF_OVERRIDE_TEXT,
+        extra_args=[
+            "--quantization-config.moe.activation",
+            "mxfp8",
+        ],
+    )
 
 
 def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
@@ -210,6 +224,13 @@ def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
 ## Qwen3 Next ##
 
 
+@pytest.mark.skip(
+    reason=(
+        "FLASHINFER TRTLLM MoE has a bug with all negative router logits "
+        "for models with RENORMALIZE. This will be re-enabled once the "
+        "issue is fixed in flashinfer."
+    )
+)
 def test_qwen3_next_bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
     can_initialize(
         "Qwen/Qwen3-Next-80B-A3B-Instruct",
diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py
index 913b755b9074..2165361da678 100644
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -5,27 +5,42 @@
 Run `pytest tests/quantization/test_compressed_tensors.py`.
 """
 
+from unittest.mock import Mock
+
 import pytest
 import torch
-from compressed_tensors.quantization import QuantizationType
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationStrategy,
+    QuantizationType,
+)
 
 from tests.models.utils import check_logprobs_close
+from vllm.model_executor.kernels.linear import (
+    Fp8BlockScaledMMLinearKernel,
+)
 from vllm.model_executor.layers.fused_moe import UnquantizedFusedMoEMethod
 from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors import (  # noqa: E501
+    CompressedTensorsConfig,
     CompressedTensorsLinearMethod,
     CompressedTensorsW4A4Fp4,
+    CompressedTensorsW4A4Mxfp4,
     CompressedTensorsW4A8Fp8,
     CompressedTensorsW4A16Fp4,
     CompressedTensorsW8A8Fp8,
     CompressedTensorsW8A8Int8,
+    CompressedTensorsW8A8Mxfp8,
     CompressedTensorsW8A16Fp8,
     CompressedTensorsWNA16,
 )
+from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+    find_matched_target,
+)
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
-from vllm.model_executor.layers.quantization.utils.fp8_utils import W8A8BlockFp8LinearOp
 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
     cutlass_fp4_supported,
 )
+from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
 from vllm.platforms import current_platform
 from vllm.v1.attention.backends.fa_utils import get_flash_attn_version
 
@@ -358,9 +373,6 @@ def test_compressed_tensors_kv_cache_fp8_per_attn_head(vllm_runner):
         assert output
 
 
-@pytest.mark.skipif(
-    not current_platform.is_cuda(), reason="This test is skipped on non-CUDA platform."
-)
 @pytest.mark.parametrize(
     "args",
     [
@@ -390,7 +402,7 @@ def check_model(model):
             assert qkv_proj.scheme.group_size == 16
 
         llm.apply_model(check_model)
-        output = llm.generate_greedy("Hello my name is", max_tokens=4)
+        output = llm.generate_greedy(["Hello my name is"], max_tokens=4)
         print(output)
         assert output
 
@@ -468,16 +480,14 @@ def check_model(model):
             qkv_proj = layer.self_attn.qkv_proj
             assert isinstance(qkv_proj.quant_method, CompressedTensorsLinearMethod)
             assert isinstance(qkv_proj.scheme, CompressedTensorsW8A8Fp8)
-            assert isinstance(
-                qkv_proj.scheme.w8a8_block_fp8_linear, W8A8BlockFp8LinearOp
-            )
+            assert isinstance(qkv_proj.scheme.fp8_linear, Fp8BlockScaledMMLinearKernel)
 
             assert qkv_proj.weight.dtype is fp8_dtype
             assert qkv_proj.weight_scale.dtype is torch.float32
             assert len(qkv_proj.weight.shape) == 2
             assert len(qkv_proj.weight_scale.shape) == 2
 
-            input_quant_op = qkv_proj.scheme.w8a8_block_fp8_linear.input_quant_op
+            input_quant_op = qkv_proj.scheme.fp8_linear.quant_fp8
             assert isinstance(input_quant_op, QuantFP8)
             assert input_quant_op._forward_method in (
                 input_quant_op.forward_cuda,
@@ -558,3 +568,151 @@ def test_w4a16_moe_torch_compile(vllm_runner):
     ) as llm:
         output = llm.generate_greedy("Hi", max_tokens=1)
         assert output
+
+
+def _make_ct_config(*, target: str = "Linear") -> CompressedTensorsConfig:
+    """Build a minimal CompressedTensorsConfig with INT8 channel quant."""
+    weight_quant = QuantizationArgs(
+        num_bits=8,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.CHANNEL,
+        symmetric=True,
+        dynamic=False,
+    )
+    return CompressedTensorsConfig(
+        target_scheme_map={
+            target: {
+                "weights": weight_quant,
+                "input_activations": None,
+                "format": "pack-quantized",
+            }
+        },
+        ignore=[],
+        quant_format="pack-quantized",
+    )
+
+
+def test_get_quant_method_returns_linear_method_for_parallel_lm_head():
+    """ParallelLMHead whose name matches a target must get a quantised method."""
+    config = _make_ct_config(target="re:.*lm_head")
+    mock_lm_head = Mock(spec=ParallelLMHead)
+    mock_lm_head.__class__ = ParallelLMHead
+
+    method = config.get_quant_method(mock_lm_head, prefix="model.lm_head")
+
+    assert isinstance(method, CompressedTensorsLinearMethod), (
+        f"Expected CompressedTensorsLinearMethod, got {type(method).__name__}"
+    )
+
+
+def test_get_quant_method_returns_none_for_ignored_parallel_lm_head():
+    """ParallelLMHead on the ignore list should be left unquantized (None)."""
+    config = _make_ct_config(target="re:.*lm_head")
+    config.ignore = ["re:.*lm_head"]
+    mock_lm_head = Mock(spec=ParallelLMHead)
+    mock_lm_head.__class__ = ParallelLMHead
+
+    method = config.get_quant_method(mock_lm_head, prefix="model.lm_head")
+
+    assert method is None, (
+        f"Expected None for ignored ParallelLMHead, got {type(method).__name__}"
+    )
+
+
+def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head():
+    """ParallelLMHead with target='Linear' (typical real model) must not crash.
+
+    Most compressed-tensors models only target 'Linear'. ParallelLMHead does
+    not match that target, so get_quant_method should return None (unquantized)
+    instead of raising ValueError.
+    """
+    config = _make_ct_config(target="Linear")
+    mock_lm_head = Mock(spec=ParallelLMHead)
+    mock_lm_head.__class__ = ParallelLMHead
+
+    method = config.get_quant_method(mock_lm_head, prefix="model.lm_head")
+
+    assert method is None, (
+        f"Expected None for unmatched ParallelLMHead, got {type(method).__name__}"
+    )
+
+
+def test_find_matched_target_returns_none_on_no_match():
+    result = find_matched_target(
+        layer_name="model.layers.0.self_attn.qkv_proj",
+        module=Mock(spec=torch.nn.Linear),
+        targets=["no_match_target"],
+    )
+    assert result is None
+
+
+def test_get_scheme_dict_returns_none_on_no_match():
+    config = _make_ct_config(target="matched_layer")
+    result = config.get_scheme_dict(
+        layer=Mock(spec=torch.nn.Linear),
+        layer_name="model.layers.0.unmatched_layer",
+    )
+    assert result is None
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda() or not current_platform.has_device_capability(75),
+    reason="MXFP8 requires Turing (sm_75+) or newer.",
+)
+def test_compressed_tensors_mxfp8_moe_setup(vllm_runner):
+    """Verify MXFP8 scheme, dtypes, and generation for a MoE model."""
+    model_path = "AliEdalati97/Qwen3-30B-A3B-MXFP8"
+    with vllm_runner(
+        model_path,
+        enforce_eager=True,
+        load_format="dummy",
+        hf_overrides={"num_hidden_layers": 4},
+    ) as llm:
+
+        def check_model(model):
+            from vllm.model_executor.layers.fused_moe import FusedMoE
+            from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.compressed_tensors_moe_w8a8_mxfp8 import (  # noqa: E501
+                CompressedTensorsW8A8Mxfp8MoEMethod,
+            )
+
+            layer = model.model.layers[0]
+
+            qkv = layer.self_attn.qkv_proj
+            assert isinstance(qkv.quant_method, CompressedTensorsLinearMethod)
+            assert isinstance(qkv.scheme, CompressedTensorsW8A8Mxfp8)
+
+            experts = layer.mlp.experts
+            assert isinstance(experts, FusedMoE)
+            assert isinstance(experts.quant_method, CompressedTensorsW8A8Mxfp8MoEMethod)
+
+        llm.apply_model(check_model)
+        output = llm.generate_greedy("Hello my name is", max_tokens=4)
+        assert output
+
+
+@pytest.mark.skipif(
+    not current_platform.is_cuda() or not current_platform.has_device_capability(80),
+    reason="MXFP4 requires ampere or newer",
+)
+def test_compressed_tensors_mxfp4(vllm_runner):
+    model_path = "nm-testing/TinyLlama-1.1B-Chat-v1.0-MXFP4"
+    with vllm_runner(model_path, enforce_eager=True) as llm:
+
+        def check_model(model):
+            layer = model.model.layers[0]
+
+            qkv_proj = layer.self_attn.qkv_proj
+            o_proj = layer.self_attn.o_proj
+            gate_up_proj = layer.mlp.gate_up_proj
+            down_proj = layer.mlp.down_proj
+
+            for proj in (qkv_proj, o_proj, gate_up_proj, down_proj):
+                assert isinstance(proj.quant_method, CompressedTensorsLinearMethod)
+                assert isinstance(proj.scheme, CompressedTensorsW4A4Mxfp4)
+
+                # Verify group size
+                assert proj.scheme.group_size == 32
+
+        llm.apply_model(check_model)
+        output = llm.generate_greedy("Hello my name is", max_tokens=4)
+        assert output
diff --git a/tests/quantization/test_configs.py b/tests/quantization/test_configs.py
index 40ca587bc6ef..fe5f8735d6c5 100644
--- a/tests/quantization/test_configs.py
+++ b/tests/quantization/test_configs.py
@@ -24,43 +24,35 @@ class ModelPair:
     # AUTOGPTQ
     # compat: autogptq <=0.7.1 is_marlin_format: bool
     # Model Serialized in Exllama Format.
-    (
-        "TheBloke/Llama-2-7B-Chat-GPTQ",
-        None,
-        "gptq_marlin" if current_platform.is_cuda() else "gptq",
-    ),
+    ("TheBloke/Llama-2-7B-Chat-GPTQ", None, "auto_gptq"),
     (
         "TheBloke/Llama-2-7B-Chat-GPTQ",
         "marlin",
-        "gptq_marlin" if current_platform.is_cuda() else "ERROR",
+        "auto_gptq" if current_platform.is_cuda() else "ERROR",
     ),
-    ("TheBloke/Llama-2-7B-Chat-GPTQ", "gptq", "gptq"),
+    ("TheBloke/Llama-2-7B-Chat-GPTQ", "gptq", "auto_gptq"),
     ("TheBloke/Llama-2-7B-Chat-GPTQ", "awq", "ERROR"),
     # compat: autogptq >=0.8.0 use checkpoint_format: str
     # Model Serialized in Exllama Format.
-    (
-        "LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit",
-        None,
-        "gptq_marlin" if current_platform.is_cuda() else "gptq",
-    ),
+    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", None, "auto_gptq"),
     (
         "LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit",
         "marlin",
-        "gptq_marlin" if current_platform.is_cuda() else "ERROR",
+        "auto_gptq" if current_platform.is_cuda() else "ERROR",
     ),
-    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "gptq", "gptq"),
+    ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "gptq", "auto_gptq"),
     ("LnL-AI/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit", "awq", "ERROR"),
     # AUTOAWQ
     (
         "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ",
         None,
-        "awq_marlin" if current_platform.is_cuda() else "awq",
+        "awq_marlin" if current_platform.is_cuda_alike() else "awq",
     ),
     ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", "awq", "awq"),
     (
         "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ",
         "marlin",
-        "awq_marlin" if current_platform.is_cuda() else "ERROR",
+        "awq_marlin" if current_platform.is_cuda_alike() else "ERROR",
     ),
     ("TheBloke/OpenHermes-2.5-Mistral-7B-AWQ", "gptq", "ERROR"),
 ]
diff --git a/tests/quantization/test_cpu_offload.py b/tests/quantization/test_cpu_offload.py
index 3b58614e58d4..151b5d97ddf3 100644
--- a/tests/quantization/test_cpu_offload.py
+++ b/tests/quantization/test_cpu_offload.py
@@ -70,4 +70,5 @@ def test_cpu_offload_compressed_tensors(monkeypatch):
         ["--enforce_eager"],
         ["--enforce_eager", "--cpu-offload-gb", "1"],
         max_wait_seconds=480,
+        include_seeded_sampling=False,
     )
diff --git a/tests/quantization/test_cpu_wna16.py b/tests/quantization/test_cpu_wna16.py
index 650bf714a071..5414d7571a53 100644
--- a/tests/quantization/test_cpu_wna16.py
+++ b/tests/quantization/test_cpu_wna16.py
@@ -12,6 +12,10 @@
     "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ",  # with g_idx
     "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4",  # without g_idx
     "RedHatAI/Qwen3-1.7B-quantized.w4a16",  # with zp
+    "OPEA/Qwen2.5-0.5B-Instruct-int4-sym-inc",
+    "Qwen/Qwen3-0.6B-FP8",  # FP8 W8A16 block-quantized linear
+    "Qwen/Qwen3-30B-A3B-FP8",  # FP8 W8A16 block-quantized MoE
+    "openai/gpt-oss-20b",  # MXFP4 W4A16
 ]
 DTYPE = ["bfloat16"]
 
diff --git a/tests/quantization/test_cutlass_w4a16.py b/tests/quantization/test_cutlass_w4a16.py
new file mode 100644
index 000000000000..b0ccb6c68a97
--- /dev/null
+++ b/tests/quantization/test_cutlass_w4a16.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for Cutlass W4A16 (Machete) kernel on Hopper.
+
+Verifies that W4A16 quantized models loaded through vllm select the
+MacheteLinearKernel on sm_90 GPUs, that weights are correctly repacked,
+and that inference produces valid output.
+
+Run `pytest tests/quantization/test_cutlass_w4a16.py`.
+"""
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+
+if not current_platform.has_device_capability(90) or current_platform.is_rocm():
+    pytest.skip(
+        "Machete W4A16 requires Hopper (sm_90).",
+        allow_module_level=True,
+    )
+
+from vllm.model_executor.kernels.linear import (
+    MPLinearLayerConfig,
+    choose_mp_linear_kernel,
+)
+from vllm.model_executor.kernels.linear.mixed_precision import (
+    MacheteLinearKernel,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors import (  # noqa: E501
+    CompressedTensorsLinearMethod,
+    CompressedTensorsWNA16,
+)
+from vllm.scalar_type import scalar_types
+
+
+@pytest.fixture(scope="function", autouse=True)
+def enable_pickle(monkeypatch):
+    """`LLM.apply_model` requires pickling a function."""
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+
+@pytest.mark.parametrize(
+    "act_type,weight_type,group_size,zero_points",
+    [
+        (torch.float16, scalar_types.uint4b8, 128, False),
+        (torch.bfloat16, scalar_types.uint4b8, 128, False),
+        (torch.float16, scalar_types.uint4, 128, True),
+        (torch.float16, scalar_types.uint4b8, -1, False),
+    ],
+    ids=[
+        "fp16-gptq-g128",
+        "bf16-gptq-g128",
+        "fp16-awq-g128",
+        "fp16-channelwise",
+    ],
+)
+def test_machete_kernel_selected(act_type, weight_type, group_size, zero_points):
+    """Verify choose_mp_linear_kernel picks MacheteLinearKernel."""
+    config = MPLinearLayerConfig(
+        full_weight_shape=(4096, 4096),
+        partition_weight_shape=(4096, 4096),
+        act_type=act_type,
+        weight_type=weight_type,
+        group_size=group_size,
+        zero_points=zero_points,
+        has_g_idx=False,
+    )
+    kernel = choose_mp_linear_kernel(config)
+    assert kernel is MacheteLinearKernel, (
+        f"Expected MacheteLinearKernel, got {kernel.__name__}"
+    )
+
+
+@pytest.mark.parametrize(
+    "full_shape,part_shape,weight_type,group_size,has_g_idx,expected_reason",
+    [
+        ((4096, 4096), (2048, 4096), scalar_types.uint4b8, 128, True, "Act reordering"),
+        (
+            (4096, 4096),
+            (4096, 4096),
+            scalar_types.float6_e3m2f,
+            128,
+            False,
+            "Quant type",
+        ),
+        ((4096, 4096), (4096, 4096), scalar_types.uint4b8, 32, False, "Group size"),
+    ],
+    ids=["partitioned-g_idx", "unsupported-quant-type", "unsupported-group-size"],
+)
+def test_machete_rejects_invalid_config(
+    full_shape, part_shape, weight_type, group_size, has_g_idx, expected_reason
+):
+    """Verify Machete rejects unsupported configurations."""
+    config = MPLinearLayerConfig(
+        full_weight_shape=full_shape,
+        partition_weight_shape=part_shape,
+        act_type=torch.float16,
+        weight_type=weight_type,
+        group_size=group_size,
+        zero_points=False,
+        has_g_idx=has_g_idx,
+    )
+    can_impl, reason = MacheteLinearKernel.can_implement(config)
+    assert not can_impl
+    assert expected_reason in reason
+
+
+def test_kernel_selection_with_disabled_machete(monkeypatch):
+    """Verify kernel selection falls back when Machete is disabled."""
+    monkeypatch.setattr("vllm.envs.VLLM_DISABLED_KERNELS", ["MacheteLinearKernel"])
+
+    config = MPLinearLayerConfig(
+        full_weight_shape=(4096, 4096),
+        partition_weight_shape=(4096, 4096),
+        act_type=torch.float16,
+        weight_type=scalar_types.uint4b8,
+        group_size=128,
+        zero_points=False,
+        has_g_idx=False,
+    )
+    kernel = choose_mp_linear_kernel(config)
+    assert kernel is not MacheteLinearKernel, "MacheteLinearKernel should be disabled"
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    [
+        "nm-testing/tinyllama-oneshot-w4a16-channel-v2",
+        "nm-testing/TinyLlama-1.1B-Chat-v1.0-W4A16-G128-Asym-Updated-ActOrder",
+    ],
+)
+def test_w4a16_machete_e2e(vllm_runner, model_name):
+    """Load a W4A16 model, verify Machete kernel is used, and generate."""
+    with vllm_runner(model_name, enforce_eager=True, gpu_memory_utilization=0.5) as llm:
+
+        def check_model(model):
+            layer = model.model.layers[0]
+            qkv_proj = layer.self_attn.qkv_proj
+
+            assert isinstance(qkv_proj.quant_method, CompressedTensorsLinearMethod)
+            assert isinstance(qkv_proj.scheme, CompressedTensorsWNA16)
+            assert isinstance(qkv_proj.scheme.kernel, MacheteLinearKernel), (
+                f"Expected MacheteLinearKernel on Hopper, "
+                f"got {type(qkv_proj.scheme.kernel).__name__}"
+            )
+
+            assert hasattr(qkv_proj, "weight_packed")
+            assert hasattr(qkv_proj, "weight_scale")
+            assert qkv_proj.weight_packed.dtype == torch.int32
+
+        llm.apply_model(check_model)
+
+        output = llm.generate_greedy("Hello my name is", max_tokens=10)
+        assert output
+        assert len(output[0][1]) > 0
+
+
+def test_w4a16_machete_bfloat16_deterministic(vllm_runner):
+    """Verify Machete works with bf16 activations and is deterministic."""
+    model_name = "nm-testing/tinyllama-oneshot-w4a16-channel-v2"
+    prompt = "The capital of France is"
+
+    with vllm_runner(
+        model_name,
+        enforce_eager=True,
+        dtype="bfloat16",
+        gpu_memory_utilization=0.5,
+    ) as llm:
+
+        def check_kernel_type(model):
+            layer = model.model.layers[0]
+            scheme = layer.self_attn.qkv_proj.scheme
+            assert isinstance(scheme.kernel, MacheteLinearKernel), (
+                f"Expected MacheteLinearKernel with bf16, "
+                f"got {type(scheme.kernel).__name__}"
+            )
+
+        llm.apply_model(check_kernel_type)
+
+        out1 = llm.generate_greedy(prompt, max_tokens=10)
+        out2 = llm.generate_greedy(prompt, max_tokens=10)
+        assert out1[0][1] == out2[0][1], (
+            f"Non-deterministic: '{out1[0][1]}' vs '{out2[0][1]}'"
+        )
diff --git a/tests/quantization/test_experts_int8.py b/tests/quantization/test_experts_int8.py
index 22edb9c58daf..7cdb135fa077 100644
--- a/tests/quantization/test_experts_int8.py
+++ b/tests/quantization/test_experts_int8.py
@@ -38,6 +38,5 @@ def test_model_experts_int8_startup(
         dtype=dtype,
         enforce_eager=True,
         quantization="experts_int8",
-        allow_deprecated_quantization=True,
     ) as vllm_model:
         vllm_model.generate_greedy(example_prompts, max_tokens)
diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py
index 6f8e0f87b890..b93d34afbb9b 100644
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -13,6 +13,7 @@
 
 from tests.quantization.utils import is_quant_method_supported
 from vllm import _custom_ops as ops
+from vllm.config.model import ModelConfig
 from vllm.model_executor.layers.fused_moe import FusedMoE
 from vllm.model_executor.layers.quantization.fp8 import (
     Fp8Config,
@@ -23,6 +24,8 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.platforms import current_platform
 
+DEVICE_TYPE = current_platform.device_type
+
 MODELS = [
     "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
     # The checkpoint below was removed from the HF.
@@ -313,7 +316,7 @@ def per_tensor_dequantize(tensor, inv_scale, dtype):
 
     # Note that we use a shape % 4 != 0 to cover edge cases,
     # because scaled_fp8_quant is vectorized by 4.
-    x = (torch.randn(size=(11, 11), device="cuda") * 13).to(dtype)
+    x = (torch.randn(size=(11, 11), device=DEVICE_TYPE) * 13).to(dtype)
 
     # Dynamic quantization
     ref_y, inv_scale = ops.scaled_fp8_quant(x, None)
@@ -337,7 +340,9 @@ def per_tensor_dequantize(tensor, inv_scale, dtype):
 
     # non-contiguous input with padding
     m, n, padded_stride = 975, 512, 576
-    padded_tensor = (torch.randn(size=(m, padded_stride), device="cuda") * 13).to(dtype)
+    padded_tensor = (torch.randn(size=(m, padded_stride), device=DEVICE_TYPE) * 13).to(
+        dtype
+    )
     x_nc = padded_tensor[:, :n]  # shape (m, n) with stride (padded_stride, 1)
 
     assert not x_nc.is_contiguous()
@@ -406,7 +411,9 @@ def test_fp8_reloading(
             "If this is your use case, consider using a restore function like #26327"
         )
 
-    with torch.device("cuda:0"):
+    # Set model config as model_config.dtype is required in Fp8LinearMethod.
+    default_vllm_config.model_config = ModelConfig()
+    with torch.device(f"{DEVICE_TYPE}:0"):
         config = Fp8Config(
             is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized,
             weight_block_size=weight_block_size,
@@ -458,11 +465,9 @@ def test_fp8_reloading(
     method.process_weights_after_loading(layer)
 
     # test reloading works after loading
-    # assuming that no reshaping occurred
-    for name, shape, original_weight_loader in original_metadata:
+    for name, shape, _ in original_metadata:
         param = getattr(layer, name)
         weight_loader = getattr(param, "weight_loader", default_weight_loader)
-        assert weight_loader is original_weight_loader
         weight_loader(param, torch.zeros(shape))  # cannot use empty
 
     method.process_weights_after_loading(layer)
diff --git a/tests/quantization/test_gfx950_moe.py b/tests/quantization/test_gfx950_moe.py
new file mode 100644
index 000000000000..4b65961d8dbd
--- /dev/null
+++ b/tests/quantization/test_gfx950_moe.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for MXFP4 MoE oracle backend selection on mi355x (GFX950).
+
+These tests run on real hardware — no mocks. Skipped on non-GFX950 platforms.
+"""
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
+    Mxfp4MoeBackend,
+    select_mxfp4_moe_backend,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kMxfp4Dynamic,
+)
+from vllm.platforms import current_platform
+
+ROCM_AVAILABLE = current_platform.is_rocm()
+ROCM_GFX950 = False
+ROCM_AITER_AVAILABLE = False
+
+if ROCM_AVAILABLE:
+    from vllm._aiter_ops import rocm_aiter_ops
+    from vllm.platforms.rocm import on_gfx950
+
+    ROCM_GFX950 = on_gfx950()
+    ROCM_AITER_AVAILABLE = rocm_aiter_ops.is_fused_moe_enabled()
+
+
+def _make_w4a4_moe_config(moe_backend: str = "auto") -> FusedMoEConfig:
+    from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+
+    return FusedMoEConfig(
+        num_experts=8,
+        experts_per_token=2,
+        hidden_dim=256,
+        intermediate_size_per_partition=256,
+        num_local_experts=8,
+        num_logical_experts=8,
+        moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
+        activation=MoEActivation.SILU,
+        in_dtype=torch.bfloat16,
+        device="cuda",
+        routing_method=RoutingMethodType.Renormalize,
+        moe_backend=moe_backend,
+    )
+
+
+@pytest.mark.skipif(not ROCM_GFX950, reason="Requires GFX950 (mi355x)")
+@pytest.mark.skipif(not ROCM_AITER_AVAILABLE, reason="Requires AITER enabled")
+def test_w4a4_dispatches_to_aiter():
+    """With AITER enabled + GFX950, W4A4 selects AITER_MXFP4_MXFP4."""
+    config = _make_w4a4_moe_config()
+    backend, experts_cls = select_mxfp4_moe_backend(
+        config, activation_key=kMxfp4Dynamic
+    )
+    assert backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4
+    assert experts_cls is not None
+
+
+@pytest.mark.skipif(not ROCM_GFX950, reason="Requires GFX950 (mi355x)")
+@pytest.mark.skipif(
+    ROCM_AITER_AVAILABLE,
+    reason="Test requires AITER disabled (unset VLLM_ROCM_USE_AITER)",
+)
+def test_w4a4_raises_without_aiter_and_no_moe_backend():
+    """Without AITER and no --moe-backend, raises NotImplementedError
+    with hint to use --moe-backend emulation."""
+    config = _make_w4a4_moe_config()
+    with pytest.raises(NotImplementedError, match="--moe-backend emulation"):
+        select_mxfp4_moe_backend(config, activation_key=kMxfp4Dynamic)
+
+
+@pytest.mark.skipif(not ROCM_GFX950, reason="Requires GFX950 (mi355x)")
+def test_w4a4_dispatches_to_emulation_with_moe_backend():
+    """With --moe-backend emulation, W4A4 selects EMULATION."""
+    config = _make_w4a4_moe_config(moe_backend="emulation")
+    backend, experts_cls = select_mxfp4_moe_backend(
+        config, activation_key=kMxfp4Dynamic
+    )
+    assert backend == Mxfp4MoeBackend.EMULATION
+    assert experts_cls is not None
diff --git a/tests/quantization/test_gptq_dynamic.py b/tests/quantization/test_gptq_dynamic.py
index f35e49094fc4..809f31aec25c 100644
--- a/tests/quantization/test_gptq_dynamic.py
+++ b/tests/quantization/test_gptq_dynamic.py
@@ -3,46 +3,36 @@
 """Tests whether gptq models with dynamic quantized can be loaded.
 
 Run `pytest tests/quantization/test_gptq_dynamic.py --forked`.
+
+Note: Only symmetric GPTQ models are supported after consolidation to Marlin.
 """
 
 import pytest
 import torch
 
 from vllm.model_executor.layers.linear import UnquantizedLinearMethod
-from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
-from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinLinearMethod
+from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQLinearMethod
 from vllm.model_executor.layers.quantization.utils.gptq_utils import (
     get_dynamic_override,
 )
-from vllm.platforms import current_platform
 
 PROMPT = "On the surface of Mars, we found"
 
 # The first layer is quantized using bits=4, group_size=128
 # The second layer is quantized using bits=8, group_size=32
 # All other layers (layer index >= 2) are not quantized
-MODEL_QUANT = [
-    (
-        "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue",
-        current_platform.is_cuda(),
-    ),
-    (
-        "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse",
-        False,
-    ),
+# Note: Only symmetric models are supported with Marlin kernels
+MODELS = [
+    "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue",
 ]
 
 
-@pytest.mark.parametrize("model_id, use_marlin_kernel", MODEL_QUANT)
-def test_gptq_with_dynamic(
-    vllm_runner, model_id: str, use_marlin_kernel: bool, monkeypatch
-):
+@pytest.mark.parametrize("model_id", MODELS)
+def test_gptq_with_dynamic(vllm_runner, model_id: str, monkeypatch):
     # `LLM.apply_model` requires pickling a function.
     monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
 
-    linear_method_cls = (
-        GPTQMarlinLinearMethod if use_marlin_kernel else (GPTQLinearMethod)
-    )
+    linear_method_cls = AutoGPTQLinearMethod
 
     with vllm_runner(
         model_id, dtype=torch.float16, max_model_len=2048, enforce_eager=True
diff --git a/tests/quantization/test_gptq_v2.py b/tests/quantization/test_gptq_v2.py
index dbafa2e8e7d1..a10088f40390 100644
--- a/tests/quantization/test_gptq_v2.py
+++ b/tests/quantization/test_gptq_v2.py
@@ -3,6 +3,9 @@
 """Tests whether vllm correctly load and run gptq_v2 format checkpoints.
 
 Run `pytest tests/quantization/test_gptq_v2.py --forked`.
+
+Note: 2/3-bit GPTQ models are no longer supported after the consolidation
+to Marlin kernels. Only 4/8-bit symmetric GPTQ models are supported.
 """
 
 import pytest
@@ -10,9 +13,10 @@
 from transformers import AutoTokenizer
 
 from vllm import SamplingParams
-from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
+from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQLinearMethod
 
 # A dummy small model quantized by GPTQModel, stored in GPTQ v2 format
+# Note: This is a 2-bit model which is no longer supported with Marlin kernels
 MODELS = ["XXXXyu/Qwen3-1.7B-w2g64-gptq_v2"]
 
 # Generate multiple sequences for testing, because an 1.7B 2-bit model
@@ -20,27 +24,19 @@
 N_SEQ = 5
 
 
+@pytest.mark.skip(reason="2-bit GPTQ is no longer supported after Marlin consolidation")
 @pytest.mark.parametrize("model_id", MODELS)
 def test_model_load(vllm_runner, model_id, monkeypatch):
     # `LLM.apply_model` requires pickling a function.
     monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
 
-    # Only check the default GPTQ linear method (used for 2/3-bit models).
-    # 4/8-bit linear methods like Marlin already support gptq_v2.
-    linear_method_cls = GPTQLinearMethod
-
     with vllm_runner(model_id, dtype=torch.float16, max_model_len=512) as llm:
 
         def check_model(model_id):
             for name, submodule in model_id.named_modules():
                 # Could check more modules if necessary
                 if name == "model_id.layers.0.self_attn.qkv_proj":
-                    assert isinstance(submodule.quant_method, linear_method_cls)
-
-                    config = submodule.quant_method.quant_config
-                    assert config.checkpoint_format == "gptq_v2"
-                    assert submodule.quant_method.use_v2_format
-
+                    assert isinstance(submodule.quant_method, AutoGPTQLinearMethod)
                     # Just break since currently we only check 1 module
                     break
 
@@ -48,6 +44,7 @@ def check_model(model_id):
         llm.apply_model(check_model)
 
 
+@pytest.mark.skip(reason="2-bit GPTQ is no longer supported after Marlin consolidation")
 @pytest.mark.parametrize("model_id", MODELS)
 def test_model_inference(vllm_runner, model_id):
     # Prepare prompt to test the model's generation result.
diff --git a/tests/quantization/test_lm_head.py b/tests/quantization/test_lm_head.py
index d92dfaa2cc7b..196949b80856 100644
--- a/tests/quantization/test_lm_head.py
+++ b/tests/quantization/test_lm_head.py
@@ -8,8 +8,7 @@
 import pytest
 import torch
 
-from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
-from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinLinearMethod
+from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQLinearMethod
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     UnquantizedEmbeddingMethod,
 )
@@ -40,7 +39,7 @@ def check_model(model):
             if lm_head_quantized:
                 assert isinstance(
                     lm_head_layer.quant_method,
-                    (GPTQLinearMethod, GPTQMarlinLinearMethod),
+                    AutoGPTQLinearMethod,
                 )
             else:
                 assert isinstance(
diff --git a/tests/quantization/test_mixed_precision.py b/tests/quantization/test_mixed_precision.py
index 5087f9049cc5..d04692046340 100755
--- a/tests/quantization/test_mixed_precision.py
+++ b/tests/quantization/test_mixed_precision.py
@@ -43,7 +43,7 @@ def get_model_args(self) -> str:
     "amd/Qwen3-8B-WMXFP4FP8-AMXFP4FP8-AMP-KVFP8": {"arc_challenge": 0.52, "mmlu": 0.72},
     # Non-mixed-precision (PTQ) model
     # - Reference for pipeline compatibility verification -> No conflicts or breakings
-    "amd/Llama-2-70b-chat-hf-FP8-MLPerf-fp8_attn_quark_format": {
+    "amd/Llama-2-70b-chat-hf_FP8_MLPerf_V2": {
         "arc_challenge": 0.53,
         "mmlu": 0.61,
     },
diff --git a/tests/quantization/test_modelopt.py b/tests/quantization/test_modelopt.py
index 154b29d7017a..a11d5d9e66c3 100644
--- a/tests/quantization/test_modelopt.py
+++ b/tests/quantization/test_modelopt.py
@@ -6,12 +6,14 @@
 """
 
 import os
-from typing import NoReturn
+from typing import Any, NoReturn
+from unittest.mock import MagicMock, patch
 
 import pytest
 import torch
 
 from tests.quantization.utils import is_quant_method_supported
+from vllm.config.model import ModelConfig
 
 
 @pytest.fixture(scope="function", autouse=True)
@@ -46,7 +48,7 @@ def _snapshot_download_or_skip(model_id: str) -> str:
     not is_quant_method_supported("modelopt"),
     reason="ModelOpt FP8 is not supported on this GPU type.",
 )
-def test_modelopt_fp8_checkpoint_setup(vllm_runner):
+def test_modelopt_fp8_checkpoint_setup(default_vllm_config, vllm_runner):
     """Test ModelOpt FP8 checkpoint loading and structure validation."""
     # TODO: provide a small publicly available test checkpoint
     model_path = (
@@ -61,6 +63,8 @@ def test_modelopt_fp8_checkpoint_setup(vllm_runner):
             "This test requires a local ModelOpt FP8 checkpoint."
         )
 
+    # Set model config as model_config.dtype is required in ModelOptFp8LinearMethod.
+    default_vllm_config.model_config = ModelConfig()
     with vllm_runner(model_path, quantization="modelopt", enforce_eager=True) as llm:
 
         def check_model(model):
@@ -120,11 +124,13 @@ def check_model(model):
     not is_quant_method_supported("modelopt"),
     reason="ModelOpt FP8 is not supported on this GPU type.",
 )
-def test_modelopt_fp8_pc_pt_checkpoint_setup(vllm_runner):
+def test_modelopt_fp8_pc_pt_checkpoint_setup(default_vllm_config, vllm_runner):
     """Test ModelOpt FP8_PER_CHANNEL_PER_TOKEN checkpoint setup."""
     model_id = "CedricHwang/qwen2.5-0.5b-modelopt-fp8-pc-pt"
     model_path = _snapshot_download_or_skip(model_id)
 
+    # Set model config as model_config.dtype is required in ModelOptFp8LinearMethod.
+    default_vllm_config.model_config = ModelConfig()
     with vllm_runner(model_path, quantization="modelopt", enforce_eager=True) as llm:
 
         def check_model(model):
@@ -181,11 +187,13 @@ def check_model(model):
     not is_quant_method_supported("modelopt"),
     reason="ModelOpt FP8 is not supported on this GPU type.",
 )
-def test_modelopt_fp8_pb_wo_checkpoint_setup(vllm_runner):
+def test_modelopt_fp8_pb_wo_checkpoint_setup(default_vllm_config, vllm_runner):
     """Test ModelOpt FP8_PB_WO checkpoint setup."""
     model_id = "CedricHwang/qwen2.5-0.5b-modelopt-fp8-pb-wo"
     model_path = _snapshot_download_or_skip(model_id)
 
+    # Set model config as model_config.dtype is required in ModelOptFp8LinearMethod.
+    default_vllm_config.model_config = ModelConfig()
     with vllm_runner(model_path, quantization="modelopt", enforce_eager=True) as llm:
 
         def check_model(model):
@@ -232,3 +240,187 @@ def check_model(model):
         output = llm.generate_greedy(["Hello my name is"], max_tokens=4)
         assert output
         print(f"ModelOpt FP8_PB_WO output: {output}")
+
+
+def test_modelopt_nvfp4_config_dispatches_w4a4_method():
+    """``quant_method="NVFP4"`` (W4A4 default) routes to the existing
+    ``ModelOptNvFp4LinearMethod``."""
+    from vllm.model_executor.layers.quantization.modelopt import (
+        ModelOptNvFp4Config,
+        ModelOptNvFp4LinearMethod,
+    )
+
+    config = ModelOptNvFp4Config(
+        quant_method="NVFP4",
+        is_checkpoint_nvfp4_serialized=True,
+        kv_cache_quant_algo=None,
+        exclude_modules=[],
+    )
+    assert config.LinearMethodCls is ModelOptNvFp4LinearMethod
+    assert config.quant_method == "NVFP4"
+
+
+def test_modelopt_nvfp4_config_dispatches_w4a16_method():
+    """``quant_method="W4A16_NVFP4"`` routes to the new
+    ``ModelOptNvFp4W4A16LinearMethod`` instead of the W4A4 sibling.
+
+    Mirrors the FP8 dispatch precedent (``ModelOptFp8Config`` selects
+    one of three FP8 LinearMethods on ``quant_method``); a regression
+    here would mean a W4A16 NVFP4 checkpoint silently loaded under the
+    W4A4 method, which would try to register an ``input_scale`` runtime
+    parameter and (more importantly) call the cutlass W4A4 NVFP4 GEMM
+    instead of FP4 Marlin.
+    """
+    from vllm.model_executor.layers.quantization.modelopt import (
+        ModelOptNvFp4Config,
+        ModelOptNvFp4LinearMethod,
+        ModelOptNvFp4W4A16LinearMethod,
+    )
+
+    config = ModelOptNvFp4Config(
+        quant_method="W4A16_NVFP4",
+        is_checkpoint_nvfp4_serialized=True,
+        kv_cache_quant_algo=None,
+        exclude_modules=[],
+    )
+    assert config.LinearMethodCls is ModelOptNvFp4W4A16LinearMethod
+    assert config.LinearMethodCls is not ModelOptNvFp4LinearMethod
+    assert config.quant_method == "W4A16_NVFP4"
+
+
+@pytest.mark.parametrize(
+    "quant_method, expected_use_a16, act_key_is_none",
+    [
+        ("NVFP4", False, False),  # W4A4 default
+        ("W4A16_NVFP4", True, True),  # native W4A16 ckpt
+    ],
+)
+def test_modelopt_nvfp4_moe_dispatches_to_marlin_when_w4a16(
+    quant_method, expected_use_a16, act_key_is_none
+):
+    """``ModelOptNvFp4FusedMoE``: when the ckpt's ``quant_method`` is
+    ``W4A16_NVFP4``, the MoE class must pass ``activation_key=None`` to
+    ``select_nvfp4_moe_backend``. That filters out every W4A4 backend
+    (their ``_supports_quant_scheme`` requires
+    ``(kNvfp4Static, kNvfp4Dynamic)`` exactly); Marlin survives because
+    it only checks ``weight_key``. A regression here would mean a W4A16
+    ckpt silently went to the cutlass W4A4 path.
+    """
+    from vllm.model_executor.layers.quantization.modelopt import (
+        ModelOptNvFp4Config,
+        ModelOptNvFp4FusedMoE,
+    )
+    from vllm.model_executor.layers.quantization.utils.quant_utils import (
+        kNvfp4Dynamic,
+        kNvfp4Static,
+    )
+
+    config = ModelOptNvFp4Config(
+        quant_method=quant_method,
+        is_checkpoint_nvfp4_serialized=True,
+        kv_cache_quant_algo=None,
+        exclude_modules=[],
+        group_size=16,
+    )
+
+    mock_select = MagicMock(return_value=(MagicMock(), MagicMock()))
+    with (
+        patch(
+            "vllm.model_executor.layers.quantization.modelopt.select_nvfp4_moe_backend",
+            mock_select,
+        ),
+        patch(
+            "vllm.model_executor.layers.quantization.modelopt."
+            "is_global_sf_supported_for_nvfp4_backend",
+            return_value=False,
+        ),
+    ):
+        moe = ModelOptNvFp4FusedMoE(config, MagicMock())
+
+    assert moe.use_a16 is expected_use_a16
+    _, kwargs = mock_select.call_args
+    assert kwargs["weight_key"] is kNvfp4Static
+    if act_key_is_none:
+        assert kwargs["activation_key"] is None
+    else:
+        assert kwargs["activation_key"] is kNvfp4Dynamic
+
+
+@pytest.mark.parametrize(
+    "per_layer_algo, expected_linear_cls_name",
+    [
+        ("NVFP4", "ModelOptNvFp4LinearMethod"),
+        ("W4A16_NVFP4", "ModelOptNvFp4W4A16LinearMethod"),
+    ],
+)
+def test_modelopt_mixed_precision_dispatches_w4a16_layer(
+    per_layer_algo, expected_linear_cls_name
+):
+    """``ModelOptMixedPrecisionConfig.get_quant_method`` must route a Linear
+    layer to the right LinearMethod based on its per-layer ``quant_algo``
+    entry in ``quantized_layers``. Verifies the new ``W4A16_NVFP4`` branch
+    coexists with the existing ``NVFP4`` branch without regression. A
+    regression here would mean a W4A16 layer in a mixed-precision ckpt
+    silently fell through to ``UnquantizedLinearMethod``.
+
+    NOTE: FP8 dispatch (the third branch of get_quant_method) is not
+    covered here because ``ModelOptFp8LinearMethod.__init__`` reads
+    ``get_current_vllm_config().model_config.dtype``, which requires a
+    fully constructed ``ModelConfig`` (real model path). FP8 routing in
+    mixed-precision is exercised by the existing integration tests
+    above that use the ``vllm_runner`` fixture (e.g.
+    ``test_modelopt_fp8_checkpoint_setup``). Our PR doesn't change the
+    FP8 branch, so this isn't a coverage gap.
+    """
+    from vllm.model_executor.layers.linear import LinearBase
+    from vllm.model_executor.layers.quantization import modelopt as m
+
+    hf_quant_config: dict[str, Any] = {
+        "quantization": {
+            "quant_algo": "MIXED_PRECISION",
+            "kv_cache_quant_algo": None,
+            "exclude_modules": [],
+            "group_size": 16,
+            "quantized_layers": {
+                "model.layers.0.fake_proj": {"quant_algo": per_layer_algo},
+            },
+        }
+    }
+    config = m.ModelOptMixedPrecisionConfig.from_config(hf_quant_config)
+
+    fake_layer = MagicMock(spec=LinearBase)
+    method = config.get_quant_method(fake_layer, "model.layers.0.fake_proj")
+
+    expected_cls = getattr(m, expected_linear_cls_name)
+    assert isinstance(method, expected_cls), (
+        f"Expected {expected_linear_cls_name}, got {type(method).__name__}"
+    )
+
+
+def test_modelopt_mixed_precision_builds_w4a16_sibling_config():
+    """Sanity: ``ModelOptMixedPrecisionConfig._from_config`` builds **two**
+    NVFP4 sub-configs — one for W4A4 (default) and one tagged
+    ``quant_method='W4A16_NVFP4'`` — so per-layer dispatch can hand
+    Marlin-bound layers the right config without re-instantiating it on
+    every call.
+    """
+    from vllm.model_executor.layers.quantization import modelopt as m
+
+    hf_quant_config: dict[str, Any] = {
+        "quantization": {
+            "quant_algo": "MIXED_PRECISION",
+            "kv_cache_quant_algo": None,
+            "exclude_modules": [],
+            "group_size": 16,
+            "quantized_layers": {
+                "model.layers.0.a": {"quant_algo": "NVFP4"},
+                "model.layers.0.b": {"quant_algo": "W4A16_NVFP4"},
+            },
+        }
+    }
+    config = m.ModelOptMixedPrecisionConfig.from_config(hf_quant_config)
+
+    assert config.nvfp4_config.quant_method == "NVFP4"
+    assert config.nvfp4_config.LinearMethodCls is m.ModelOptNvFp4LinearMethod
+    assert config.w4a16_nvfp4_config.quant_method == "W4A16_NVFP4"
+    assert config.w4a16_nvfp4_config.LinearMethodCls is m.ModelOptNvFp4W4A16LinearMethod
diff --git a/tests/quantization/test_online.py b/tests/quantization/test_online.py
new file mode 100644
index 000000000000..0254da79e101
--- /dev/null
+++ b/tests/quantization/test_online.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests online quantization."""
+
+import pytest
+import torch
+
+from tests.quantization.utils import (
+    _test_online_quant_peak_mem_impl,
+    is_quant_method_supported,
+)
+from vllm.model_executor.layers.linear import UnquantizedLinearMethod
+from vllm.model_executor.layers.quantization.online.fp8 import (
+    Fp8PerBlockOnlineLinearMethod,
+    Fp8PerBlockOnlineMoEMethod,
+    Fp8PerTensorOnlineLinearMethod,
+    Fp8PerTensorOnlineMoEMethod,
+)
+from vllm.platforms import current_platform
+
+
+@pytest.mark.skipif(
+    not is_quant_method_supported("fp8"),
+    reason="FP8 is not supported on this GPU type.",
+)
+@pytest.mark.parametrize(
+    "quant_scheme,online_quant_args,expected_linear_cls,expected_moe_cls",
+    [
+        # simple case - quantization='fp8_per_tensor'
+        (
+            "fp8_per_tensor",
+            None,
+            Fp8PerTensorOnlineLinearMethod,
+            Fp8PerTensorOnlineMoEMethod,
+        ),
+        # simple case - quantization='fp8_per_block'
+        (
+            "fp8_per_block",
+            None,
+            Fp8PerBlockOnlineLinearMethod,
+            Fp8PerBlockOnlineMoEMethod,
+        ),
+        # quantization='online' with per-layer-kind overrides
+        (
+            "online",
+            {
+                "linear": "fp8_per_block",
+                "moe": "fp8_per_tensor",
+            },
+            Fp8PerBlockOnlineLinearMethod,
+            Fp8PerTensorOnlineMoEMethod,
+        ),
+        # ignore with direct layer name
+        (
+            "fp8_per_tensor",
+            # qkv_proj is fused from q_proj/k_proj/v_proj, so currently the
+            # ignore regex must match the unfused shard names
+            # TODO(future PR): also make 're:.*qkv_proj.*' work
+            {"ignore": ["model.layers.1.self_attn.o_proj", "re:.*[qkv]_proj"]},
+            Fp8PerTensorOnlineLinearMethod,
+            Fp8PerTensorOnlineMoEMethod,
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]
+)
+def test_online_quantization(
+    vllm_runner,
+    quant_scheme: str,
+    online_quant_args: dict | None,
+    expected_linear_cls,
+    expected_moe_cls,
+    use_rocm_aiter: bool,
+    monkeypatch,
+) -> None:
+    """
+    Tests that online quantization frontend configuration works -
+    selecting quant schemes, overriding quant schemes by type, ignoring
+    layers.
+
+    Does not test performance, peak memory usage, etc.
+    """
+
+    if use_rocm_aiter:
+        monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
+
+    # `LLM.apply_model` requires pickling a function.
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+    # a tiny model with both dense and MoE layers
+    model_name = "ibm-granite/granite-3.0-1b-a400m-base"
+
+    runner_kwargs = dict(
+        quantization=quant_scheme,
+        enforce_eager=True,
+    )
+    if online_quant_args is not None:
+        runner_kwargs["quantization_config"] = online_quant_args
+
+    with vllm_runner(
+        model_name,
+        **runner_kwargs,
+    ) as llm:
+
+        def check_model(model):
+            # checks further down in the test case are hardcoded for this
+            # model
+            assert model_name == "ibm-granite/granite-3.0-1b-a400m-base"
+
+            o_proj = model.model.layers[0].self_attn.o_proj
+            moe = model.model.layers[0].block_sparse_moe.experts
+
+            # o_proj and moe in layer 0 are always quantized (never ignored)
+            # because of how we craft the test case inputs
+            assert isinstance(o_proj.quant_method, expected_linear_cls)
+            if moe is not None:
+                assert isinstance(moe.quant_method, expected_moe_cls)
+
+            if current_platform.is_cuda():
+                assert o_proj.weight.dtype == torch.float8_e4m3fn
+            elif current_platform.is_rocm():
+                assert o_proj.weight.dtype == current_platform.fp8_dtype()
+            else:
+                pytest.skip("Only runs on CUDA and ROCm.")
+
+            # Verify ignored layers are unquantized.
+            if isinstance(online_quant_args, dict) and "ignore" in online_quant_args:
+                # only .*1.self_attn_o_proj is skipped
+                for layer_idx in range(len(model.model.layers)):
+                    o_proj = model.model.layers[layer_idx].self_attn.o_proj
+                    if layer_idx == 1:
+                        assert isinstance(o_proj.quant_method, UnquantizedLinearMethod)
+                    else:
+                        assert isinstance(o_proj.quant_method, expected_linear_cls)
+
+                # every .*self_attn.qkv_proj is skipped
+                for layer_idx in range(len(model.model.layers)):
+                    qkv_proj = model.model.layers[layer_idx].self_attn.qkv_proj
+                    assert isinstance(qkv_proj.quant_method, UnquantizedLinearMethod)
+
+        llm.apply_model(check_model)
+
+        outputs = llm.generate_greedy(["Hello my name is"], max_tokens=4)
+        print(outputs[0][1])
+
+
+@pytest.mark.skipif(
+    not is_quant_method_supported("fp8"),
+    reason="FP8 is not supported on this GPU type.",
+)
+def test_online_quant_peak_mem(
+    vllm_runner,
+    caplog_mp_spawn,
+    monkeypatch,
+) -> None:
+    _test_online_quant_peak_mem_impl(
+        "fp8_per_tensor", vllm_runner, caplog_mp_spawn, monkeypatch
+    )
+
+
+@pytest.mark.skipif(
+    not is_quant_method_supported("fp8"),
+    reason="FP8 is not supported on this GPU type.",
+)
+def test_online_quant_load_format_dummy(
+    vllm_runner,
+    monkeypatch,
+    caplog,
+) -> None:
+    with vllm_runner(
+        "ibm-granite/granite-3.0-1b-a400m-base",
+        quantization="fp8_per_tensor",
+        enforce_eager=True,
+        load_format="dummy",
+    ) as llm:
+        outputs = llm.generate_greedy(["The future of AI is"], max_tokens=4)
+        print(outputs[0][1])
diff --git a/tests/quantization/test_per_token_kv_cache.py b/tests/quantization/test_per_token_kv_cache.py
new file mode 100644
index 000000000000..254e284efb51
--- /dev/null
+++ b/tests/quantization/test_per_token_kv_cache.py
@@ -0,0 +1,562 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for per-token-head KV cache quantization (INT8 and FP8).
+
+Covers:
+- Per-token-head Triton reshape-and-cache kernel
+- Round-trip quantize/dequantize accuracy
+- process_weights_after_loading early-return path
+- End-to-end integration with Triton unified attention kernel
+
+Run: pytest tests/quantization/test_per_token_kv_cache.py -v -s
+"""
+
+import random
+from dataclasses import dataclass
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    get_fp8_min_max,
+)
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+from vllm.v1.kv_cache_interface import KVQuantMode, is_quantized_kv_cache
+
+DEVICE_TYPE = current_platform.device_type
+
+# Skip entire module if no CUDA/ROCm GPU available
+pytestmark = [
+    pytest.mark.skipif(
+        current_platform.is_cpu(),
+        reason="Per-token-head KV cache tests require GPU.",
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Test parameters
+# ---------------------------------------------------------------------------
+NUM_TOKENS = [1, 7, 42]
+NUM_KV_HEADS = [1, 4, 8]
+HEAD_SIZES = [64, 128]
+BLOCK_SIZES = [16]
+SEEDS = [0]
+
+# Platform-dependent FP8 dtype and range
+FP8_DTYPE = current_platform.fp8_dtype()
+FP8_MIN, FP8_MAX = get_fp8_min_max()
+
+
+# ---------------------------------------------------------------------------
+# Per-dtype quantization config
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class QuantConfig:
+    """Quantization parameters for a given cache dtype."""
+
+    cache_dtype: torch.dtype  # torch.int8 or FP8_DTYPE
+    kv_cache_dtype_str: str  # "int8_per_token_head" or "fp8_per_token_head"
+    quant_max: float
+    quant_min: float
+    kv_quant_mode: KVQuantMode
+    # INT8 Triton stores truncate; FP8 hardware casts round.
+    uses_trunc: bool
+
+
+INT8_CONFIG = QuantConfig(
+    cache_dtype=torch.int8,
+    kv_cache_dtype_str="int8_per_token_head",
+    quant_max=127.0,
+    quant_min=-128.0,
+    kv_quant_mode=KVQuantMode.INT8_PER_TOKEN_HEAD,
+    uses_trunc=True,
+)
+FP8_CONFIG = QuantConfig(
+    cache_dtype=FP8_DTYPE,
+    kv_cache_dtype_str="fp8_per_token_head",
+    quant_max=FP8_MAX,
+    quant_min=FP8_MIN,
+    kv_quant_mode=KVQuantMode.FP8_PER_TOKEN_HEAD,
+    uses_trunc=False,
+)
+
+QUANT_CONFIGS = [INT8_CONFIG, FP8_CONFIG]
+
+
+@pytest.fixture(params=QUANT_CONFIGS, ids=["int8", "fp8"])
+def qcfg(request) -> QuantConfig:
+    return request.param
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _quantize_per_token_head_ref(
+    data: torch.Tensor,  # [num_tokens, num_heads, head_size]
+    cfg: QuantConfig,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Reference per-token-head quantization (one scale per token per head).
+
+    Returns (quantized, scales) where scales is [num_tokens, num_heads].
+    """
+    absmax = data.float().abs().amax(dim=2)  # [num_tokens, num_heads]
+    scales = (absmax / cfg.quant_max).clamp(min=1e-6)
+    scaled = data.float() * (1.0 / scales[:, :, None])
+    if cfg.uses_trunc:
+        q = scaled.round().clamp(cfg.quant_min, cfg.quant_max).to(cfg.cache_dtype)
+    else:
+        q = scaled.clamp(cfg.quant_min, cfg.quant_max).to(cfg.cache_dtype)
+    return q, scales
+
+
+# ===========================================================================
+# 1. is_quantized_kv_cache / get_kv_quant_mode
+# ===========================================================================
+class TestIsQuantizedKvCache:
+    def test_fp8_variants(self):
+        assert is_quantized_kv_cache("fp8")
+        assert is_quantized_kv_cache("fp8_e4m3")
+        assert is_quantized_kv_cache("fp8_e5m2")
+
+    def test_int8_per_token_head(self):
+        assert is_quantized_kv_cache("int8_per_token_head")
+
+    def test_fp8_per_token_head(self):
+        assert is_quantized_kv_cache("fp8_per_token_head")
+
+    def test_auto(self):
+        assert not is_quantized_kv_cache("auto")
+
+    def test_bfloat16(self):
+        assert not is_quantized_kv_cache("bfloat16")
+
+    def test_kv_quant_mode_int8(self):
+        from vllm.v1.kv_cache_interface import get_kv_quant_mode
+
+        assert (
+            get_kv_quant_mode("int8_per_token_head") == KVQuantMode.INT8_PER_TOKEN_HEAD
+        )
+
+    def test_kv_quant_mode_fp8(self):
+        from vllm.v1.kv_cache_interface import get_kv_quant_mode
+
+        assert get_kv_quant_mode("fp8_per_token_head") == KVQuantMode.FP8_PER_TOKEN_HEAD
+
+
+# ===========================================================================
+# 2. Triton per-token-head kernel (reshape-and-cache)
+# ===========================================================================
+@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
+@pytest.mark.parametrize("num_heads", NUM_KV_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("block_size", BLOCK_SIZES)
+@pytest.mark.parametrize("seed", SEEDS)
+@torch.inference_mode()
+def test_reshape_and_cache_per_token_head(
+    qcfg: QuantConfig,
+    num_tokens: int,
+    num_heads: int,
+    head_size: int,
+    block_size: int,
+    seed: int,
+):
+    """Test triton_reshape_and_cache_flash_per_token_head_quant kernel."""
+    from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
+        triton_reshape_and_cache_flash_per_token_head_quant,
+    )
+
+    set_random_seed(seed)
+    torch.set_default_device(DEVICE_TYPE)
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 4
+
+    key = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16)
+    value = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16)
+
+    key_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    value_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    k_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+    v_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+
+    num_slots = block_size * num_blocks
+    slot_mapping = torch.tensor(
+        random.sample(range(num_slots), num_tokens), dtype=torch.long
+    )
+
+    triton_reshape_and_cache_flash_per_token_head_quant(
+        key,
+        value,
+        key_cache,
+        value_cache,
+        k_scale_cache,
+        v_scale_cache,
+        slot_mapping,
+    )
+
+    # Reference
+    ref_k_quant, ref_k_scales = _quantize_per_token_head_ref(key, qcfg)
+    ref_v_quant, ref_v_scales = _quantize_per_token_head_ref(value, qcfg)
+
+    # Compare dequantized values rather than raw quantized values.
+    # Triton and PyTorch reductions can differ at FP8 rounding boundaries
+    # (up to 32 in quantized domain for fp8_e4m3), but the dequantized
+    # error is bounded by the scale.
+    for i, slot in enumerate(slot_mapping.tolist()):
+        blk = slot // block_size
+        off = slot % block_size
+
+        actual_k_scale = k_scale_cache[blk, off]  # [num_heads]
+        k_deq = key_cache[blk, off].float() * actual_k_scale[:, None]
+        k_ref_deq = key[i].float()
+        torch.testing.assert_close(
+            k_deq,
+            k_ref_deq,
+            atol=0.1,
+            rtol=0.1,
+        )
+        actual_v_scale = v_scale_cache[blk, off]  # [num_heads]
+        v_deq = value_cache[blk, off].float() * actual_v_scale[:, None]
+        v_ref_deq = value[i].float()
+        torch.testing.assert_close(
+            v_deq,
+            v_ref_deq,
+            atol=0.1,
+            rtol=0.1,
+        )
+        # Per-head scales: [num_heads]
+        torch.testing.assert_close(
+            k_scale_cache[blk, off], ref_k_scales[i], atol=1e-4, rtol=1e-3
+        )
+        torch.testing.assert_close(
+            v_scale_cache[blk, off], ref_v_scales[i], atol=1e-4, rtol=1e-3
+        )
+
+
+# ===========================================================================
+# 3. Per-token-head round-trip accuracy (quantize -> dequantize)
+# ===========================================================================
+@pytest.mark.parametrize("num_tokens", [1, 16])
+@pytest.mark.parametrize("num_heads", [4])
+@pytest.mark.parametrize("head_size", [128])
+@pytest.mark.parametrize("block_size", [16])
+@torch.inference_mode()
+def test_per_token_head_round_trip_accuracy(
+    qcfg: QuantConfig,
+    num_tokens: int,
+    num_heads: int,
+    head_size: int,
+    block_size: int,
+):
+    """Verify per-token-head round-trip: kernel dequant matches reference.
+
+    INT8: Triton truncates on float->int8 store.
+    FP8: hardware cast (clamp then cast).
+    """
+    from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
+        triton_reshape_and_cache_flash_per_token_head_quant,
+    )
+
+    torch.set_default_device(DEVICE_TYPE)
+    set_random_seed(42)
+
+    num_blocks = (num_tokens + block_size - 1) // block_size + 2
+
+    key = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16) * 0.5
+    value = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16) * 0.5
+
+    key_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    value_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    k_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+    v_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+
+    slot_mapping = torch.arange(num_tokens, dtype=torch.long)
+
+    triton_reshape_and_cache_flash_per_token_head_quant(
+        key,
+        value,
+        key_cache,
+        value_cache,
+        k_scale_cache,
+        v_scale_cache,
+        slot_mapping,
+    )
+
+    for i in range(num_tokens):
+        blk = i // block_size
+        off = i % block_size
+
+        for label, data, cache, sc in [
+            ("key", key, key_cache, k_scale_cache),
+            ("val", value, value_cache, v_scale_cache),
+        ]:
+            for h in range(num_heads):
+                orig = data[i, h].float()  # [head_size]
+
+                actual_q = cache[blk, off, h]
+                actual_sc = sc[blk, off, h]
+                actual_deq = actual_q.float() * actual_sc
+
+                # Round-trip: dequantized should be close to original
+                torch.testing.assert_close(
+                    actual_deq,
+                    orig,
+                    atol=0.1,
+                    rtol=0.1,
+                )
+
+
+# ===========================================================================
+# 4. Negative slot mapping (padding tokens should be skipped)
+# ===========================================================================
+@torch.inference_mode()
+def test_per_token_head_negative_slot_skipped(qcfg: QuantConfig):
+    """Tokens with slot_mapping=-1 should leave the cache unchanged."""
+    from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
+        triton_reshape_and_cache_flash_per_token_head_quant,
+    )
+
+    torch.set_default_device(DEVICE_TYPE)
+    num_tokens = 4
+    num_heads = 2
+    head_size = 64
+    block_size = 16
+    num_blocks = 2
+
+    key = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16)
+    value = torch.randn(num_tokens, num_heads, head_size, dtype=torch.bfloat16)
+
+    key_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    value_cache = torch.zeros(
+        num_blocks, block_size, num_heads, head_size, dtype=qcfg.cache_dtype
+    )
+    k_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+    v_scale_cache = torch.ones(num_blocks, block_size, num_heads, dtype=torch.float32)
+
+    slot_mapping = torch.tensor([0, -1, 1, -1], dtype=torch.long)
+
+    key_cache_before = key_cache.clone()
+    val_cache_before = value_cache.clone()
+
+    triton_reshape_and_cache_flash_per_token_head_quant(
+        key,
+        value,
+        key_cache,
+        value_cache,
+        k_scale_cache,
+        v_scale_cache,
+        slot_mapping,
+    )
+
+    # Slots 0 and 1 should have been written (tokens 0 and 2)
+    assert not torch.equal(key_cache[0, 0], key_cache_before[0, 0])
+    assert not torch.equal(key_cache[0, 1], key_cache_before[0, 1])
+    assert not torch.equal(value_cache[0, 0], val_cache_before[0, 0])
+
+    # All other slots should be unchanged
+    assert torch.equal(key_cache[0, 2:], key_cache_before[0, 2:])
+    assert torch.equal(key_cache[1], key_cache_before[1])
+    assert torch.equal(value_cache[0, 2:], val_cache_before[0, 2:])
+
+
+# ===========================================================================
+# 5. process_weights_after_loading -- per-token-head early return
+# ===========================================================================
+@pytest.mark.parametrize(
+    "kv_cache_dtype", ["int8_per_token_head", "fp8_per_token_head"]
+)
+def test_process_weights_sets_placeholder_scales(kv_cache_dtype: str):
+    """Per-token-head should set _k_scale=1.0, _v_scale=1.0
+    and delete checkpoint attrs."""
+    from vllm.model_executor.layers.quantization.kv_cache import (
+        BaseKVCacheMethod,
+    )
+
+    layer = MagicMock()
+    layer.kv_cache_dtype = kv_cache_dtype
+    layer.calculate_kv_scales = False
+    layer.k_scale = torch.nn.Parameter(torch.tensor(-1.0), requires_grad=False)
+    layer.v_scale = torch.nn.Parameter(torch.tensor(-1.0), requires_grad=False)
+    layer.q_scale = torch.nn.Parameter(torch.tensor(-1.0), requires_grad=False)
+    layer.prob_scale = torch.nn.Parameter(torch.tensor(-1.0), requires_grad=False)
+    layer._k_scale = torch.tensor(0.0)
+    layer._v_scale = torch.tensor(0.0)
+    layer._k_scale_float = 0.0
+    layer._v_scale_float = 0.0
+
+    method = BaseKVCacheMethod.__new__(BaseKVCacheMethod)
+    method.quant_config = MagicMock()
+    method.process_weights_after_loading(layer)
+
+    assert layer._k_scale_float == 1.0
+    assert layer._v_scale_float == 1.0
+    assert not hasattr(layer, "k_scale")
+    assert not hasattr(layer, "v_scale")
+    assert not hasattr(layer, "q_scale")
+    assert not hasattr(layer, "prob_scale")
+
+
+# ===========================================================================
+# 6. Triton unified_attention -- per-token-head scale cache (INT8 and FP8)
+# ===========================================================================
+@pytest.mark.parametrize(
+    "seq_lens",
+    [
+        [(1, 128)],
+        [(1, 64), (1, 32)],
+    ],
+)
+@pytest.mark.parametrize("num_heads", [(4, 4)])
+@pytest.mark.parametrize("head_size", [128])
+@pytest.mark.parametrize("block_size", [16])
+@torch.inference_mode()
+def test_triton_unified_attention_per_token_head_scale(
+    qcfg: QuantConfig,
+    seq_lens: list[tuple[int, int]],
+    num_heads: tuple[int, int],
+    head_size: int,
+    block_size: int,
+):
+    """End-to-end: quantized KV with per-token-head scale caches."""
+    from vllm.utils.math_utils import next_power_of_2
+    from vllm.v1.attention.ops.triton_unified_attention import unified_attention
+
+    torch.set_default_device(DEVICE_TYPE)
+    set_random_seed(0)
+
+    num_seqs = len(seq_lens)
+    query_lens = [s[0] for s in seq_lens]
+    kv_lens = [s[1] for s in seq_lens]
+    num_query_heads, num_kv_heads = num_heads
+    max_query_len = max(query_lens)
+    max_kv_len = max(kv_lens)
+    scale = head_size**-0.5
+    num_blocks = 2048
+
+    query = torch.randn(
+        sum(query_lens), num_query_heads, head_size, dtype=torch.bfloat16
+    )
+
+    key_cache_bf16 = torch.randn(
+        num_blocks, block_size, num_kv_heads, head_size, dtype=torch.bfloat16
+    )
+    value_cache_bf16 = torch.randn_like(key_cache_bf16)
+
+    # Per-token-head quantization: one scale per (block, slot, head)
+    k_absmax = key_cache_bf16.float().abs().amax(dim=-1)  # [..., num_kv_heads]
+    v_absmax = value_cache_bf16.float().abs().amax(dim=-1)
+    k_scale_cache = (k_absmax / qcfg.quant_max).clamp(min=1e-6).to(torch.float32)
+    v_scale_cache = (v_absmax / qcfg.quant_max).clamp(min=1e-6).to(torch.float32)
+
+    scaled_k = key_cache_bf16.float() / k_scale_cache[:, :, :, None]
+    scaled_v = value_cache_bf16.float() / v_scale_cache[:, :, :, None]
+    if qcfg.uses_trunc:
+        key_cache_q = (
+            scaled_k.round().clamp(qcfg.quant_min, qcfg.quant_max).to(qcfg.cache_dtype)
+        )
+        value_cache_q = (
+            scaled_v.round().clamp(qcfg.quant_min, qcfg.quant_max).to(qcfg.cache_dtype)
+        )
+    else:
+        key_cache_q = scaled_k.clamp(qcfg.quant_min, qcfg.quant_max).to(
+            qcfg.cache_dtype
+        )
+        value_cache_q = scaled_v.clamp(qcfg.quant_min, qcfg.quant_max).to(
+            qcfg.cache_dtype
+        )
+
+    # Dequantized reference
+    key_cache_deq = key_cache_q.float() * k_scale_cache[:, :, :, None]
+    value_cache_deq = value_cache_q.float() * v_scale_cache[:, :, :, None]
+
+    cu_query_lens = torch.tensor([0] + query_lens, dtype=torch.int32).cumsum(
+        dim=0, dtype=torch.int32
+    )
+    kv_lens_t = torch.tensor(kv_lens, dtype=torch.int32)
+
+    max_num_blocks_per_seq = (max_kv_len + block_size - 1) // block_size
+    block_tables = torch.randint(
+        0, num_blocks, (num_seqs, max_num_blocks_per_seq), dtype=torch.int32
+    )
+
+    head_size_padded = next_power_of_2(head_size)
+    seq_threshold_3D = 0
+    num_par_softmax_segments = 16
+    softmax_segm_output = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments, head_size_padded),
+        dtype=torch.float32,
+    )
+    softmax_segm_max = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+    softmax_segm_expsum = torch.empty(
+        (seq_threshold_3D, num_query_heads, num_par_softmax_segments),
+        dtype=torch.float32,
+    )
+
+    output_q = torch.empty_like(query)
+    unified_attention(
+        q=query,
+        k=key_cache_q,
+        v=value_cache_q,
+        out=output_q,
+        cu_seqlens_q=cu_query_lens,
+        seqused_k=kv_lens_t,
+        max_seqlen_q=max_query_len,
+        max_seqlen_k=max_kv_len,
+        softmax_scale=scale,
+        causal=True,
+        window_size=(-1, -1),
+        block_table=block_tables,
+        softcap=0,
+        q_descale=None,
+        k_descale=None,
+        v_descale=None,
+        seq_threshold_3D=seq_threshold_3D,
+        num_par_softmax_segments=num_par_softmax_segments,
+        softmax_segm_output=softmax_segm_output,
+        softmax_segm_max=softmax_segm_max,
+        softmax_segm_expsum=softmax_segm_expsum,
+        kv_quant_mode=qcfg.kv_quant_mode,
+        k_scale_cache=k_scale_cache,
+        v_scale_cache=v_scale_cache,
+    )
+
+    output_ref = torch.empty_like(query)
+    unified_attention(
+        q=query,
+        k=key_cache_deq.to(torch.bfloat16),
+        v=value_cache_deq.to(torch.bfloat16),
+        out=output_ref,
+        cu_seqlens_q=cu_query_lens,
+        seqused_k=kv_lens_t,
+        max_seqlen_q=max_query_len,
+        max_seqlen_k=max_kv_len,
+        softmax_scale=scale,
+        causal=True,
+        window_size=(-1, -1),
+        block_table=block_tables,
+        softcap=0,
+        q_descale=None,
+        k_descale=None,
+        v_descale=None,
+        seq_threshold_3D=seq_threshold_3D,
+        num_par_softmax_segments=num_par_softmax_segments,
+        softmax_segm_output=softmax_segm_output,
+        softmax_segm_max=softmax_segm_max,
+        softmax_segm_expsum=softmax_segm_expsum,
+    )
+
+    torch.testing.assert_close(output_q, output_ref, atol=5e-2, rtol=5e-2)
diff --git a/tests/quantization/test_quantization_config_args.py b/tests/quantization/test_quantization_config_args.py
new file mode 100644
index 000000000000..7e881d6b0b8e
--- /dev/null
+++ b/tests/quantization/test_quantization_config_args.py
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for QuantizationConfigArgs parsing."""
+
+import pytest
+
+from vllm.config.quantization import (
+    QUANT_KEY_NAMES,
+    QuantizationConfigArgs,
+    QuantSpec,
+    resolve_quantization_config,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8Dynamic128Sym,
+    kFp8DynamicTokenSym,
+    kFp8Static128BlockSym,
+    kFp8StaticTensorSym,
+    kInt8StaticChannelSym,
+    kMxfp8Dynamic,
+)
+
+# ---- QuantSpec ------------------------------------------------------------
+
+
+def test_quant_spec_resolves_string_to_quant_key():
+    spec = QuantSpec(weight="mxfp8", activation="fp8_per_token")
+    assert spec.weight == kMxfp8Dynamic
+    assert spec.activation == kFp8DynamicTokenSym
+
+
+def test_quant_spec_accepts_quant_key_directly():
+    spec = QuantSpec(weight=kFp8StaticTensorSym)
+    assert spec.weight is kFp8StaticTensorSym
+    assert spec.activation is None
+
+
+def test_quant_spec_rejects_unknown_name():
+    with pytest.raises(ValueError, match="unknown quantization name"):
+        QuantSpec(weight="not_a_real_format")
+
+
+# ---- QuantizationConfigArgs string shorthand on linear/moe ----------------
+
+
+def test_args_linear_string_resolves_via_quant_key_names():
+    # A bare QUANT_KEY_NAMES entry desugars to QuantSpec(weight=<key>).
+    args = QuantizationConfigArgs(linear="fp8_per_block_static")
+    assert args.linear == QuantSpec(weight=kFp8Static128BlockSym)
+    assert args.moe is None
+
+
+def test_args_moe_string_resolves_via_online_shorthand():
+    # An online-shorthand name pulls the matching slot from _ONLINE_SHORTHANDS
+    # (so `linear: "fp8_per_block"` and `moe: "fp8_per_block"` produce the
+    # same per-layer-kind spec the `--quantization fp8_per_block` shorthand
+    # would).
+    args = QuantizationConfigArgs(moe="fp8_per_block")
+    assert args.moe == QuantSpec(weight=kFp8Static128BlockSym)
+
+
+def test_args_string_shorthand_missing_slot_raises():
+    # int8_per_channel_weight_only sets only `moe`; using it on `linear`
+    # has no defined spec and should raise rather than silently no-op.
+    with pytest.raises(ValueError, match="does not define a linear spec"):
+        QuantizationConfigArgs(linear="int8_per_channel_weight_only")
+
+
+def test_args_accepts_dict_form():
+    args = QuantizationConfigArgs(moe={"activation": "mxfp8"})
+    assert args.moe == QuantSpec(weight=None, activation=kMxfp8Dynamic)
+
+
+# ---- resolve_quantization_config -----------------------------------------
+
+
+def test_resolve_shorthand_only_populates_both_slots():
+    args = resolve_quantization_config("fp8_per_block", None)
+    assert args.linear == QuantSpec(weight=kFp8Static128BlockSym)
+    assert args.moe == QuantSpec(weight=kFp8Static128BlockSym)
+
+
+def test_resolve_int8_shorthand_leaves_linear_unset():
+    # int8_per_channel_weight_only is MoE-only; linear stays None so that
+    # OnlineQuantizationConfig leaves Linear layers in full precision.
+    args = resolve_quantization_config("int8_per_channel_weight_only", None)
+    assert args.linear is None
+    assert args.moe == QuantSpec(weight=kInt8StaticChannelSym)
+
+
+def test_resolve_quantization_config_only():
+    # When only `quantization_config` is given (e.g. for an already-quantized
+    # checkpoint that needs an activation override), it's returned as-is.
+    args = resolve_quantization_config(None, {"moe": {"activation": "mxfp8"}})
+    assert args.linear is None
+    assert args.moe == QuantSpec(weight=None, activation=kMxfp8Dynamic)
+
+
+def test_resolve_merges_explicit_over_shorthand():
+    # Explicit linear in quantization_config wins; moe falls back to the
+    # shorthand's slot.
+    args = resolve_quantization_config(
+        "fp8_per_tensor",
+        {"linear": "fp8_per_block"},
+    )
+    assert args.linear == QuantSpec(weight=kFp8Static128BlockSym)
+    assert args.moe == QuantSpec(weight=kFp8StaticTensorSym)
+
+
+def test_resolve_rejects_quantization_config_with_non_shorthand_quant():
+    # If --quantization names something other than an online shorthand,
+    # quantization_config is not allowed via this path (checkpoint quant
+    # paths read it directly off ModelConfig instead).
+    with pytest.raises(ValueError, match="quantization_config is only supported"):
+        resolve_quantization_config("gptq", {"linear": "fp8_per_block"})
+
+
+# ---- QUANT_KEY_NAMES coverage --------------------------------------------
+
+
+def test_quant_key_names_round_trip():
+    # Every advertised name should round-trip through QuantSpec without error
+    # and produce the same QuantKey it maps to.
+    for name, expected in QUANT_KEY_NAMES.items():
+        assert QuantSpec(weight=name).weight == expected, name
+        assert QuantSpec(activation=name).activation == expected, name
+
+
+def test_static_block_weight_paired_with_dynamic_block_activation():
+    # The block-FP8 shorthand pair: 128x128 static weights + 1x128 dynamic
+    # activations. Pinning this so renames in QUANT_KEY_NAMES don't quietly
+    # rewire the kernel dispatch.
+    spec = QuantSpec(weight="fp8_per_block_static", activation="fp8_per_block_dynamic")
+    assert spec.weight == kFp8Static128BlockSym
+    assert spec.activation == kFp8Dynamic128Sym
diff --git a/tests/quantization/test_quark.py b/tests/quantization/test_quark.py
index afb0437f5b36..fe474d7e0cc8 100644
--- a/tests/quantization/test_quark.py
+++ b/tests/quantization/test_quark.py
@@ -22,6 +22,9 @@
     QuarkW8A8Fp8,
     QuarkW8A8Int8,
 )
+from vllm.model_executor.layers.quantization.quark.quark_moe import (  # noqa: E501
+    QuarkW8A8Int8MoEMethod,
+)
 from vllm.platforms import current_platform
 
 from .reference_mxfp4 import dq_mxfp4_torch, qdq_mxfp4_torch
@@ -33,6 +36,8 @@
     importlib.metadata.version("amd-quark")
 ) >= version.parse(QUARK_MXFP4_MIN_VERSION)
 
+DEVICE_TYPE = current_platform.device_type
+
 if QUARK_MXFP4_AVAILABLE:
     from quark.torch.export.nn.modules.realquantizer import StaticScaledRealQuantizer
     from quark.torch.kernel import mx as mx_kernel
@@ -126,6 +131,34 @@ def check_model(model):
         assert output
 
 
+@pytest.mark.parametrize("tp", [1])
+def test_quark_int8_w8a8_moe(vllm_runner, tp):
+    """Test W8A8 INT8 MoE quantization with a tiny Qwen3 MoE model."""
+    model_path = "nameistoken/tiny-qwen3-moe-w8a8-int8-quark"
+    with vllm_runner(
+        model_path,
+        enforce_eager=True,
+        tensor_parallel_size=tp,
+        gpu_memory_utilization=0.1,
+    ) as llm:
+
+        def check_model(model):
+            layer = model.model.layers[0]
+            # MoE experts should use QuarkW8A8Int8MoEMethod
+            moe = layer.mlp.experts
+            assert isinstance(moe.quant_method, QuarkW8A8Int8MoEMethod), (
+                f"Expected QuarkW8A8Int8MoEMethod, got {type(moe.quant_method)}"
+            )
+            # Non-MoE linear layers should use QuarkW8A8Int8
+            qkv_proj = layer.self_attn.qkv_proj
+            assert isinstance(qkv_proj.scheme, QuarkW8A8Int8)
+
+        llm.apply_model(check_model)
+
+        output = llm.generate_greedy("Hello", max_tokens=4)
+        assert output
+
+
 def test_quark_fp8_parity(vllm_runner):
     quark_model_id = "amd-quark/llama-tiny-fp8-quark-quant-method"
     fp8_model_id = "amd-quark/llama-tiny-fp8-quant-method"
@@ -207,8 +240,13 @@ def get_model_args(
     not QUARK_MXFP4_AVAILABLE,
     reason=f"amd-quark>={QUARK_MXFP4_MIN_VERSION} is not available",
 )
-@pytest.mark.parametrize("config", WIKITEXT_ACCURACY_CONFIGS)
-@pytest.mark.parametrize("tp_size", [1, 2])
+@pytest.mark.parametrize(
+    "config",
+    [pytest.param(val, id=f"config:{val}") for val in WIKITEXT_ACCURACY_CONFIGS],
+)
+@pytest.mark.parametrize(
+    "tp_size", [pytest.param(val, id=f"tp_size:{val}") for val in [1, 2]]
+)
 def test_ocp_mx_wikitext_correctness(config: AccuracyTestConfig, tp_size: int):
     device_count = torch.accelerator.device_count()
     if device_count < tp_size:
@@ -235,6 +273,53 @@ def test_ocp_mx_wikitext_correctness(config: AccuracyTestConfig, tp_size: int):
     ), f"Expected: {EXPECTED_VALUE} |  Measured: {measured_value}"
 
 
+@pytest.mark.skipif(
+    not QUARK_MXFP4_AVAILABLE,
+    reason=f"amd-quark>={QUARK_MXFP4_MIN_VERSION} is not available",
+)
+@pytest.mark.parametrize("tp_size", [1, 2])
+def test_nvfp4_wikitext_correctness(tp_size: int):
+    device_count = torch.accelerator.device_count()
+    if device_count < tp_size:
+        pytest.skip(f"This test requires >={tp_size} gpus, got only {device_count}")
+
+    # NOTE: expected_value from nvidia/Qwen3-30B-A3B-NVFP4
+    expected_value = 11.2391
+
+    model_name = "amd-quark/Qwen3-30B-A3B-nvfp4-quark"
+    task = "wikitext"
+
+    rtol = 0.25
+
+    config = AccuracyTestConfig(
+        model_name=model_name,
+        excepted_value=expected_value,
+    )
+
+    model_args = config.get_model_args(
+        tp_size=tp_size,
+        kwargs={
+            "cudagraph_capture_sizes": [16],
+        },
+    )
+    model_args.pop("add_bos_token")
+
+    # Smaller cudagraph_capture_sizes to speed up the test.
+    results = lm_eval.simple_evaluate(
+        model="vllm",
+        model_args=model_args,
+        tasks=task,
+        batch_size=64,
+    )
+
+    EXPECTED_VALUE = config.excepted_value
+    measured_value = results["results"][task]["word_perplexity,none"]
+    assert (
+        measured_value < EXPECTED_VALUE + rtol
+        and measured_value > EXPECTED_VALUE - rtol
+    ), f"Expected: {EXPECTED_VALUE} |  Measured: {measured_value}"
+
+
 @pytest.mark.parametrize("config", GSM8K_ACCURACY_CONFIGS)
 @pytest.mark.skipif(
     not QUARK_MXFP4_AVAILABLE,
@@ -278,7 +363,7 @@ def test_mxfp4_fused_qdq_match_quark(float_dtype: torch.dtype, scalings: list[in
     torch.manual_seed(0)
 
     hidden_size = 64 * 32
-    inp = (torch.rand(1, hidden_size, dtype=float_dtype, device="cuda") - 0.5) * 2
+    inp = (torch.rand(1, hidden_size, dtype=float_dtype, device=DEVICE_TYPE) - 0.5) * 2
     for i in range(hidden_size // 32):
         inp[:, i * 32 : (i + 1) * 32] = (
             inp[:, i * 32 : (i + 1) * 32] * scalings[i % len(scalings)]
@@ -322,15 +407,15 @@ def test_mxfp4_dequant_kernel_match_quark(
         reorder=False,
         real_quantized=True,
         float_dtype=float_dtype,
-        device="cuda",
+        device=DEVICE_TYPE,
     )
 
-    observer = qspec.observer_cls(qspec, device="cuda")
+    observer = qspec.observer_cls(qspec, device=DEVICE_TYPE)
 
     hidden_size = 512
     shape = (11008, hidden_size)
 
-    w = (torch.rand(shape, device="cuda", dtype=float_dtype) - 0.5) * 2
+    w = (torch.rand(shape, device=DEVICE_TYPE, dtype=float_dtype) - 0.5) * 2
 
     # Make it so that different groups have different scales.
     for i in range(hidden_size // 32):
@@ -342,7 +427,7 @@ def test_mxfp4_dequant_kernel_match_quark(
     scale, _ = observer._calculate_qparams()
     weight_quantizer.scale = scale
 
-    w_mxfp4 = weight_quantizer.to_real_quantize_params(w).to("cuda")
+    w_mxfp4 = weight_quantizer.to_real_quantize_params(w).to(DEVICE_TYPE)
     weight_quantizer.maybe_convert_and_transpose_scale()
 
     scale = weight_quantizer.scale
diff --git a/tests/quantization/test_torchao.py b/tests/quantization/test_torchao.py
index fb794baa53f0..8efc6742a2d9 100644
--- a/tests/quantization/test_torchao.py
+++ b/tests/quantization/test_torchao.py
@@ -8,6 +8,7 @@
 from vllm.model_executor.model_loader import get_model_loader
 from vllm.platforms import current_platform
 
+DEVICE_TYPE = current_platform.device_type
 DTYPE = ["bfloat16"]
 
 TORCHAO_AVAILABLE = importlib.util.find_spec("torchao") is not None
@@ -33,7 +34,7 @@ def test_pre_quantized_model(vllm_runner):
 @pytest.mark.parametrize(
     "pt_load_map_location",
     [
-        "cuda:0",
+        f"{DEVICE_TYPE}:0",
         # {"": "cuda"},
     ],
 )
@@ -60,7 +61,7 @@ def test_qwenvl_int8wo_model_loading_with_params(vllm_runner):
         model_name=model_name,
         quantization="torchao",
         dtype="bfloat16",
-        pt_load_map_location="cuda:0",
+        pt_load_map_location=f"{DEVICE_TYPE}:0",
         enforce_eager=True,
     ) as llm:
         output = llm.generate_greedy(["The capital of France is"], max_tokens=4)
@@ -81,7 +82,7 @@ def test_opt_125m_awq_int4wo_model_loading_with_params(vllm_runner):
         model_name=model_name,
         quantization="torchao",
         dtype="bfloat16",
-        pt_load_map_location="cuda:0",
+        pt_load_map_location=f"{DEVICE_TYPE}:0",
     ) as llm:
         output = llm.generate_greedy(["The capital of France is"], max_tokens=4)
 
@@ -112,7 +113,7 @@ def test_online_quant_config_dict_json(vllm_runner, enable_pickle):
     with vllm_runner(
         model_name=model_name,
         dtype="bfloat16",
-        pt_load_map_location="cuda:0",
+        pt_load_map_location=f"{DEVICE_TYPE}:0",
         quantization="torchao",
         hf_overrides=hf_overrides,
         enforce_eager=True,
@@ -158,7 +159,7 @@ def test_online_quant_config_file(vllm_runner):
         with vllm_runner(
             model_name=model_name,
             dtype="bfloat16",
-            pt_load_map_location="cuda:0",
+            pt_load_map_location=f"{DEVICE_TYPE}:0",
             quantization="torchao",
             hf_overrides=hf_overrides,
             enforce_eager=True,
@@ -248,7 +249,7 @@ def test_opt_125m_module_fqn_to_config_regex_model(vllm_runner):
     torch._dynamo.reset()
     model_name = "torchao-testing/opt-125m-ModuleFqnToConfig-v1-regex-0.14.0.dev"
     with vllm_runner(
-        model_name=model_name, dtype="bfloat16", pt_load_map_location="cuda:0"
+        model_name=model_name, dtype="bfloat16", pt_load_map_location=f"{DEVICE_TYPE}:0"
     ) as llm:
         output = llm.generate_greedy(["The capital of France is"], max_tokens=4)
 
@@ -278,7 +279,7 @@ def test_opt_125m_int4wo_model_running_preshuffled_kernel(vllm_runner, monkeypat
         model_name=model_name,
         quantization="torchao",
         dtype="bfloat16",
-        pt_load_map_location="cuda:0",
+        pt_load_map_location=f"{DEVICE_TYPE}:0",
         enforce_eager=True,
     ) as llm:
 
@@ -357,7 +358,7 @@ def test_opt_125m_int4wo_model_running_preshuffled_kernel_online_quant(
         model_name=model_name,
         quantization="torchao",
         dtype="bfloat16",
-        pt_load_map_location="cuda:0",
+        pt_load_map_location=f"{DEVICE_TYPE}:0",
         hf_overrides=hf_overrides,
         enforce_eager=True,
     ) as llm:
diff --git a/tests/quantization/test_trtllm_nvfp4_hidden_dim_padding.py b/tests/quantization/test_trtllm_nvfp4_hidden_dim_padding.py
new file mode 100644
index 000000000000..88c9e5f867cd
--- /dev/null
+++ b/tests/quantization/test_trtllm_nvfp4_hidden_dim_padding.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
+    align_trtllm_fp4_moe_hidden_dim_for_fi,
+)
+
+
+def test_align_trtllm_fp4_moe_hidden_dim_noop():
+    w13 = torch.arange(2 * 8 * 256, dtype=torch.uint8).reshape(2, 8, 256)
+    w13_scale = torch.arange(2 * 8 * 32, dtype=torch.uint8).reshape(2, 8, 32)
+    w2 = torch.arange(2 * 512 * 4, dtype=torch.uint8).reshape(2, 512, 4)
+    w2_scale = torch.arange(2 * 512 * 1, dtype=torch.uint8).reshape(2, 512, 1)
+
+    out_w13, out_w13_scale, out_w2, out_w2_scale, padded_hidden = (
+        align_trtllm_fp4_moe_hidden_dim_for_fi(w13, w13_scale, w2, w2_scale)
+    )
+
+    assert padded_hidden == 512
+    assert out_w13 is w13
+    assert out_w13_scale is w13_scale
+    assert out_w2 is w2
+    assert out_w2_scale is w2_scale
+
+
+def test_align_trtllm_fp4_moe_hidden_dim_pads_to_256_multiple():
+    hidden_dim = 2688
+    padded_hidden_dim = 2816
+
+    w13 = torch.arange(2 * 12 * (hidden_dim // 2), dtype=torch.uint8).reshape(
+        2, 12, hidden_dim // 2
+    )
+    w13_scale = torch.arange(2 * 12 * (hidden_dim // 16), dtype=torch.uint8).reshape(
+        2, 12, hidden_dim // 16
+    )
+
+    w2 = torch.arange(2 * hidden_dim * 6, dtype=torch.uint8).reshape(2, hidden_dim, 6)
+    w2_scale = torch.arange(2 * hidden_dim * 2, dtype=torch.uint8).reshape(
+        2, hidden_dim, 2
+    )
+
+    out_w13, out_w13_scale, out_w2, out_w2_scale, out_hidden_dim = (
+        align_trtllm_fp4_moe_hidden_dim_for_fi(w13, w13_scale, w2, w2_scale)
+    )
+
+    assert out_hidden_dim == padded_hidden_dim
+    assert out_w13.shape == (2, 12, padded_hidden_dim // 2)
+    assert out_w13_scale.shape == (2, 12, padded_hidden_dim // 16)
+    assert out_w2.shape == (2, padded_hidden_dim, 6)
+    assert out_w2_scale.shape == (2, padded_hidden_dim, 2)
+
+    torch.testing.assert_close(out_w13[:, :, : hidden_dim // 2], w13)
+    torch.testing.assert_close(out_w13_scale[:, :, : hidden_dim // 16], w13_scale)
+    torch.testing.assert_close(out_w2[:, :hidden_dim, :], w2)
+    torch.testing.assert_close(out_w2_scale[:, :hidden_dim, :], w2_scale)
+
+    assert torch.count_nonzero(out_w13[:, :, hidden_dim // 2 :]) == 0
+    assert torch.count_nonzero(out_w13_scale[:, :, hidden_dim // 16 :]) == 0
+    assert torch.count_nonzero(out_w2[:, hidden_dim:, :]) == 0
+    assert torch.count_nonzero(out_w2_scale[:, hidden_dim:, :]) == 0
diff --git a/tests/quantization/test_turboquant.py b/tests/quantization/test_turboquant.py
new file mode 100644
index 000000000000..b9567195b3a8
--- /dev/null
+++ b/tests/quantization/test_turboquant.py
@@ -0,0 +1,625 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for TurboQuant KV-cache quantization.
+
+Run: .venv/bin/python -m pytest tests/quantization/test_turboquant.py -v
+"""
+
+import math
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.quantization.turboquant.centroids import (
+    get_centroids,
+    solve_lloyd_max,
+)
+from vllm.model_executor.layers.quantization.turboquant.config import (
+    TQ_PRESETS,
+    TurboQuantConfig,
+)
+from vllm.platforms import current_platform
+from vllm.utils.math_utils import next_power_of_2
+
+# ============================================================================
+# Helpers
+# ============================================================================
+
+ALL_PRESETS = list(TQ_PRESETS.keys())
+
+
+def _assert_strictly_sorted(seq, name="sequence"):
+    for i in range(len(seq) - 1):
+        assert seq[i] < seq[i + 1], f"{name} not sorted at index {i}"
+
+
+def _is_power_of_2(n: int) -> bool:
+    return n > 0 and next_power_of_2(n) == n
+
+
+# Expected concrete values for each preset at head_dim=128.
+# fmt: off
+PRESET_EXPECTED = {
+    "turboquant_k8v4": dict(
+        key_fp8=True,  key_quant_bits=8,
+        key_mse_bits=0, value_quant_bits=4,
+        mse_bits=4, n_centroids=16, centroid_bits=4,
+        norm_correction=False,
+        key_packed_size=128, value_packed_size=68,
+        slot_size=196, slot_size_aligned=196,
+    ),
+    "turboquant_4bit_nc": dict(
+        key_fp8=False, key_quant_bits=4,
+        key_mse_bits=4, value_quant_bits=4,
+        mse_bits=4, n_centroids=16, centroid_bits=4,
+        norm_correction=True,
+        key_packed_size=66, value_packed_size=68,
+        slot_size=134, slot_size_aligned=134,
+    ),
+    "turboquant_k3v4_nc": dict(
+        key_fp8=False, key_quant_bits=3,
+        key_mse_bits=3, value_quant_bits=4,
+        mse_bits=3, n_centroids=8, centroid_bits=3,
+        norm_correction=True,
+        key_packed_size=50, value_packed_size=68,
+        slot_size=118, slot_size_aligned=118,
+    ),
+    "turboquant_3bit_nc": dict(
+        key_fp8=False, key_quant_bits=3,
+        key_mse_bits=3, value_quant_bits=3,
+        mse_bits=3, n_centroids=8, centroid_bits=3,
+        norm_correction=True,
+        key_packed_size=50, value_packed_size=52,
+        slot_size=102, slot_size_aligned=102,
+    ),
+}
+# fmt: on
+
+
+# ============================================================================
+# Config tests (CPU-only, no dependencies beyond config.py)
+# ============================================================================
+
+
+class TestTurboQuantConfig:
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_preset_parses(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert isinstance(cfg, TurboQuantConfig)
+
+    def test_invalid_preset_raises(self):
+        with pytest.raises(ValueError, match="Unknown TurboQuant"):
+            TurboQuantConfig.from_cache_dtype("turboquant_invalid", head_dim=128)
+
+    # ---- Per-preset concrete value checks (table-driven) ----
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_key_mode(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        exp = PRESET_EXPECTED[preset]
+        assert cfg.key_fp8 is exp["key_fp8"]
+        assert cfg.key_quant_bits == exp["key_quant_bits"]
+        assert cfg.key_mse_bits == exp["key_mse_bits"]
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_value_mode(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        exp = PRESET_EXPECTED[preset]
+        assert cfg.value_quant_bits == exp["value_quant_bits"]
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_bits_and_centroids(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        exp = PRESET_EXPECTED[preset]
+        assert cfg.mse_bits == exp["mse_bits"]
+        assert cfg.n_centroids == exp["n_centroids"]
+        assert cfg.centroid_bits == exp["centroid_bits"]
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_norm_correction(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.norm_correction is PRESET_EXPECTED[preset]["norm_correction"]
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_packed_sizes(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        exp = PRESET_EXPECTED[preset]
+        assert cfg.key_packed_size == exp["key_packed_size"]
+        assert cfg.value_packed_size == exp["value_packed_size"]
+        assert cfg.slot_size == exp["slot_size"]
+        assert cfg.slot_size_aligned == exp["slot_size_aligned"]
+
+    # ---- Cross-preset structural invariants ----
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_slot_equals_key_plus_value(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.slot_size == cfg.key_packed_size + cfg.value_packed_size
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_padded_slot_is_even(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.slot_size_aligned >= cfg.slot_size
+        assert cfg.slot_size_aligned % 2 == 0, (
+            f"slot_size_aligned={cfg.slot_size_aligned} is not even"
+        )
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_key_value_packed_sizes_positive(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.key_packed_size > 0
+        assert cfg.value_packed_size > 0
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_n_centroids_is_2_to_mse_bits(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.n_centroids == 2**cfg.mse_bits
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_centroid_bits_always_positive(self, preset):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        assert cfg.centroid_bits > 0
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    def test_mse_key_or_fp8_exclusive(self, preset):
+        """Each preset is either FP8 keys or MSE keys, never both."""
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        if cfg.key_fp8:
+            assert cfg.key_mse_bits == 0
+            assert cfg.key_quant_bits == 8
+        else:
+            assert cfg.key_mse_bits > 0
+            assert cfg.key_quant_bits in (3, 4)
+
+    @pytest.mark.parametrize("preset", ALL_PRESETS)
+    @pytest.mark.parametrize("head_dim", [64, 96, 128, 256])
+    def test_all_presets_all_head_dims(self, preset, head_dim):
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=head_dim)
+        assert cfg.head_dim == head_dim
+        assert cfg.slot_size == cfg.key_packed_size + cfg.value_packed_size
+        assert cfg.slot_size_aligned >= cfg.slot_size
+        assert cfg.slot_size_aligned % 2 == 0
+
+    # ---- Boundary skip layers ----
+
+    @staticmethod
+    def _dense_model_config(num_layers):
+        from types import SimpleNamespace
+
+        return SimpleNamespace(
+            is_hybrid=False,
+            hf_text_config=SimpleNamespace(num_hidden_layers=num_layers),
+        )
+
+    def test_boundary_skip_layers_basic(self):
+        mc = self._dense_model_config(32)
+        layers = TurboQuantConfig.get_boundary_skip_layers(mc)
+        assert layers == ["0", "1", "30", "31"]
+
+    def test_boundary_skip_layers_zero(self):
+        mc = self._dense_model_config(32)
+        assert TurboQuantConfig.get_boundary_skip_layers(mc, 0) == []
+
+    def test_boundary_skip_layers_small_model(self):
+        mc = self._dense_model_config(4)
+        layers = TurboQuantConfig.get_boundary_skip_layers(mc)
+        assert layers == ["0", "1", "2", "3"]
+
+    def test_boundary_skip_layers_cap_at_half(self):
+        mc = self._dense_model_config(8)
+        layers = TurboQuantConfig.get_boundary_skip_layers(mc, 10)
+        assert len(layers) == 8
+
+
+class TestHybridAttentionIndices:
+    """Regression tests for boundary protection on hybrid models.
+
+    Hybrid models (attention + Mamba / linear-attention) identify KV-carrying
+    layers via layer_types / layers_block_type / attn_type_list. The helper
+    must return the *global* layer indices of the full-attention layers so
+    that kv_cache_dtype_skip_layers matches what extract_layer_index(prefix)
+    reports on the Attention layers at runtime.
+    """
+
+    @staticmethod
+    def _fake_model_config(text_cfg=None, hf_cfg=None):
+        from types import SimpleNamespace
+
+        return SimpleNamespace(
+            hf_text_config=text_cfg if text_cfg is not None else SimpleNamespace(),
+            hf_config=hf_cfg if hf_cfg is not None else SimpleNamespace(),
+        )
+
+    def test_layer_types_full_attention(self):
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            _get_full_attention_layer_indices,
+        )
+
+        cfg = type("C", (), {})()
+        cfg.layer_types = [
+            "linear_attention",
+            "linear_attention",
+            "full_attention",
+            "linear_attention",
+            "full_attention",
+            "full_attention",
+        ]
+        mc = self._fake_model_config(text_cfg=cfg)
+        assert _get_full_attention_layer_indices(mc) == [2, 4, 5]
+
+    def test_layers_block_type_jamba(self):
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            _get_full_attention_layer_indices,
+        )
+
+        cfg = type("C", (), {})()
+        cfg.layers_block_type = ["mamba", "attention", "mamba", "attention"]
+        mc = self._fake_model_config(text_cfg=cfg)
+        assert _get_full_attention_layer_indices(mc) == [1, 3]
+
+    def test_attn_type_list_minimax(self):
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            _get_full_attention_layer_indices,
+        )
+
+        hf = type("C", (), {})()
+        hf.attn_type_list = [0, 1, 0, 1, 1]
+        mc = self._fake_model_config(hf_cfg=hf)
+        assert _get_full_attention_layer_indices(mc) == [1, 3, 4]
+
+    def test_no_hybrid_hints_returns_empty(self):
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            _get_full_attention_layer_indices,
+        )
+
+        mc = self._fake_model_config()
+        assert _get_full_attention_layer_indices(mc) == []
+
+
+# ============================================================================
+# Centroids tests (CPU-only)
+# ============================================================================
+
+
+class TestCentroids:
+    @pytest.mark.parametrize("bits,expected_n", [(2, 4), (3, 8), (4, 16)])
+    def test_centroids_shape(self, bits, expected_n):
+        c = get_centroids(128, bits)
+        assert c.shape == (expected_n,)
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_centroids_sorted(self, bits):
+        _assert_strictly_sorted(get_centroids(128, bits), "centroids")
+
+    def test_centroids_cached(self):
+        c1 = get_centroids(128, 3)
+        c2 = get_centroids(128, 3)
+        assert c1 is c2, "get_centroids should return cached object"
+
+    def test_centroids_different_dims_not_identical(self):
+        c64 = get_centroids(64, 3)
+        c128 = get_centroids(128, 3)
+        assert not torch.equal(c64, c128)
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_centroids_symmetric_around_zero(self, bits):
+        """N(0, 1/d) is symmetric, so centroids should be ~symmetric."""
+        c = get_centroids(128, bits)
+        assert abs(c.mean().item()) < 0.01, "Centroids not centered near 0"
+        assert abs(c[0].item() + c[-1].item()) < 0.01
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_centroids_within_4sigma(self, bits):
+        """All centroids should be within ~4 sigma of N(0, 1/d)."""
+        sigma = math.sqrt(1.0 / 128)
+        c = get_centroids(128, bits)
+        for i, val in enumerate(c):
+            assert abs(val.item()) < 4 * sigma, (
+                f"Centroid {i}={val:.6f} outside 4*sigma={4 * sigma:.6f}"
+            )
+
+
+class TestLloydMax:
+    @pytest.mark.parametrize("bits,expected_n", [(2, 4), (3, 8), (4, 16)])
+    def test_solve_shapes(self, bits, expected_n):
+        centroids, boundaries = solve_lloyd_max(128, bits)
+        assert centroids.shape == (expected_n,)
+        assert boundaries.shape == (expected_n - 1,)
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_centroids_sorted(self, bits):
+        centroids, _ = solve_lloyd_max(128, bits)
+        _assert_strictly_sorted(centroids, "centroids")
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_boundaries_sorted(self, bits):
+        _, boundaries = solve_lloyd_max(128, bits)
+        _assert_strictly_sorted(boundaries, "boundaries")
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_boundaries_between_centroids(self, bits):
+        """Each boundary must lie between its adjacent centroids."""
+        centroids, boundaries = solve_lloyd_max(128, bits)
+        for i in range(len(boundaries)):
+            assert centroids[i] < boundaries[i] < centroids[i + 1], (
+                f"Boundary {i}={boundaries[i]:.6f} not between "
+                f"c[{i}]={centroids[i]:.6f} and c[{i + 1}]={centroids[i + 1]:.6f}"
+            )
+
+    @pytest.mark.parametrize("bits", [2, 3, 4])
+    def test_boundaries_are_midpoints(self, bits):
+        """Lloyd-Max boundaries are midpoints of adjacent centroids."""
+        centroids, boundaries = solve_lloyd_max(128, bits)
+        for i in range(len(boundaries)):
+            expected = (centroids[i] + centroids[i + 1]) / 2.0
+            assert abs(boundaries[i].item() - expected.item()) < 1e-6
+
+    def test_solve_deterministic(self):
+        c1, b1 = solve_lloyd_max(128, 3)
+        c2, b2 = solve_lloyd_max(128, 3)
+        assert torch.equal(c1, c2)
+        assert torch.equal(b1, b2)
+
+    def test_solve_dtype_float32(self):
+        centroids, boundaries = solve_lloyd_max(128, 3)
+        assert centroids.dtype == torch.float32
+        assert boundaries.dtype == torch.float32
+
+    @pytest.mark.parametrize("bits", [3, 4])
+    def test_centroids_match_scipy_reference(self, bits):
+        """Verify _trapz(n=200) centroids match scipy.integrate.quad reference.
+
+        This ensures our scipy-free trapezoid integration doesn't silently
+        drift from the published Lloyd-Max quality.
+        """
+        pytest.importorskip("scipy")
+        from scipy.integrate import quad
+
+        d = 128
+        sigma2 = 1.0 / d
+        sigma = math.sqrt(sigma2)
+
+        def pdf(x):
+            return (1.0 / math.sqrt(2 * math.pi * sigma2)) * math.exp(
+                -x * x / (2 * sigma2)
+            )
+
+        n_levels = 2**bits
+        lo, hi = -3.5 * sigma, 3.5 * sigma
+        ref_centroids = [lo + (hi - lo) * (i + 0.5) / n_levels for i in range(n_levels)]
+        for _ in range(200):
+            boundaries = [
+                (ref_centroids[i] + ref_centroids[i + 1]) / 2.0
+                for i in range(n_levels - 1)
+            ]
+            edges = [lo * 3] + boundaries + [hi * 3]
+            new_centroids = []
+            for i in range(n_levels):
+                a, b = edges[i], edges[i + 1]
+                num, _ = quad(lambda x: x * pdf(x), a, b)
+                den, _ = quad(pdf, a, b)
+                new_centroids.append(num / den if den > 1e-15 else ref_centroids[i])
+            if (
+                max(abs(new_centroids[i] - ref_centroids[i]) for i in range(n_levels))
+                < 1e-10
+            ):
+                break
+            ref_centroids = new_centroids
+
+        # Compare our _trapz centroids against scipy reference
+        our_centroids, _ = solve_lloyd_max(d, bits)
+        ref_t = torch.tensor(ref_centroids, dtype=torch.float32)
+        max_err = (our_centroids - ref_t).abs().max().item()
+        # _trapz(n=200) has ~O(h^2) error vs adaptive quad; 1e-3 is tight
+        # enough to catch regression while allowing trapezoid approximation.
+        assert max_err < 1e-3, (
+            f"d={d}, bits={bits}: max centroid error vs scipy = {max_err:.2e}"
+        )
+
+
+# ============================================================================
+# Rotation matrix tests (GPU required)
+# ============================================================================
+
+GPGPU_AVAILABLE = torch.cuda.is_available() or torch.xpu.is_available()
+DEVICE_TYPE = current_platform.device_type
+
+
+def generate_rotation_matrix(d: int, seed: int, device: str = "cpu") -> torch.Tensor:
+    """Haar-distributed random orthogonal matrix via QR (test/benchmark only)."""
+    gen = torch.Generator(device="cpu")
+    gen.manual_seed(seed)
+    G = torch.randn(d, d, generator=gen, device="cpu", dtype=torch.float32)
+    # torch.linalg.qr on CPU requires LAPACK, which some torch wheels
+    # (ROCm) ship without. Run QR on accelerator instead
+    qr_device = "cuda" if torch.cuda.is_available() else "cpu"
+    Q, R = torch.linalg.qr(G.to(qr_device))
+    diag_sign = torch.sign(torch.diag(R))
+    diag_sign[diag_sign == 0] = 1.0
+    Q = Q * diag_sign.unsqueeze(0)
+    return Q.to(device)
+
+
+@pytest.mark.skipif(not GPGPU_AVAILABLE, reason="GPGPU not available")
+class TestRotationMatrix:
+    """Tests for the QR-based rotation (standalone benchmarks only)."""
+
+    @pytest.mark.parametrize("dim", [64, 96, 128, 256])
+    def test_rotation_matrix_shape_and_orthogonal(self, dim):
+        Pi = generate_rotation_matrix(dim, seed=42, device=DEVICE_TYPE)
+        assert Pi.shape == (dim, dim)
+        eye = Pi @ Pi.T
+        assert torch.allclose(eye, torch.eye(dim, device=DEVICE_TYPE), atol=1e-5), (
+            f"Pi not orthogonal for dim={dim}"
+        )
+
+    def test_rotation_matrix_deterministic(self):
+        Pi1 = generate_rotation_matrix(128, seed=42)
+        Pi2 = generate_rotation_matrix(128, seed=42)
+        assert torch.equal(Pi1, Pi2)
+
+    def test_rotation_matrix_different_seeds(self):
+        Pi1 = generate_rotation_matrix(128, seed=42)
+        Pi2 = generate_rotation_matrix(128, seed=99)
+        assert not torch.equal(Pi1, Pi2)
+
+    def test_rotation_matrix_det_is_pm1(self):
+        """Orthogonal matrix determinant must be +1 or -1."""
+        Pi = generate_rotation_matrix(128, seed=42, device=DEVICE_TYPE)
+        det = torch.linalg.det(Pi)
+        assert abs(abs(det.item()) - 1.0) < 1e-4
+
+
+# ============================================================================
+# Hadamard rotation tests (serving path: _build_hadamard)
+# ============================================================================
+
+
+def _build_hadamard(d: int, device: str = "cpu") -> torch.Tensor:
+    """Reproduce the serving-path Hadamard construction."""
+    H = torch.tensor([[1.0]])
+    while H.shape[0] < d:
+        H = torch.cat([torch.cat([H, H], 1), torch.cat([H, -H], 1)], 0)
+    return (H / math.sqrt(d)).to(torch.device(device))
+
+
+@pytest.mark.skipif(not GPGPU_AVAILABLE, reason="GPGPU not available")
+class TestHadamardRotation:
+    """Tests for the Hadamard rotation used in serving."""
+
+    @pytest.mark.parametrize("dim", [64, 128, 256])
+    def test_hadamard_orthonormal(self, dim):
+        """H must be orthonormal: H @ H^T = I."""
+        H = _build_hadamard(dim, DEVICE_TYPE)
+        eye = H @ H.T
+        assert torch.allclose(eye, torch.eye(dim, device=DEVICE_TYPE), atol=1e-5), (
+            f"Hadamard not orthonormal for dim={dim}"
+        )
+
+    @pytest.mark.parametrize("dim", [64, 128, 256])
+    def test_hadamard_symmetric(self, dim):
+        """Sylvester Hadamard must be symmetric: H = H^T."""
+        H = _build_hadamard(dim, DEVICE_TYPE)
+        assert torch.allclose(H, H.T, atol=1e-6), (
+            f"Hadamard not symmetric for dim={dim}"
+        )
+
+
+# ============================================================================
+# Store → Decode round-trip test (GPU + Triton required)
+# ============================================================================
+
+
+@pytest.mark.skipif(not GPGPU_AVAILABLE, reason="GPGPU not available")
+class TestStoreDecodeRoundTrip:
+    """End-to-end: store KV into TQ cache, decode, compare vs fp16 ref."""
+
+    @pytest.mark.parametrize(
+        "preset",
+        ["turboquant_k8v4", "turboquant_4bit_nc"],
+    )
+    def test_single_token_roundtrip(self, preset):
+        """Store 1 token, decode with query=key, check attention output.
+
+        For a single token with query=key, attention output should equal
+        the value (softmax over single key = 1.0). Quantization error
+        means we check cosine similarity rather than exact equality.
+        """
+        from vllm.model_executor.layers.quantization.turboquant.centroids import (
+            solve_lloyd_max,
+        )
+        from vllm.v1.attention.ops.triton_turboquant_decode import (
+            triton_turboquant_decode_attention,
+        )
+        from vllm.v1.attention.ops.triton_turboquant_store import (
+            triton_turboquant_store,
+        )
+
+        cfg = TurboQuantConfig.from_cache_dtype(preset, head_dim=128)
+        D = 128
+        Hk = 4  # num_kv_heads
+        Hq = 4  # num_q_heads (no GQA for simplicity)
+        B = 1  # single token
+        block_size = 16
+        num_blocks = 1
+
+        device = torch.device(DEVICE_TYPE)
+
+        # Pure Hadamard rotation (symmetric: H = H^T, so Pi = PiT = H)
+        H = _build_hadamard(D, DEVICE_TYPE)
+        PiT = H
+        Pi = H
+
+        # Generate centroids
+        centroids, _ = solve_lloyd_max(D, cfg.centroid_bits)
+        centroids = centroids.float().to(device)
+        c_sorted, _ = centroids.sort()
+        midpoints = ((c_sorted[:-1] + c_sorted[1:]) / 2).to(device)
+
+        # Random K, V
+        torch.manual_seed(123)
+        key = torch.randn(B, Hk, D, device=device, dtype=torch.float16)
+        value = torch.randn(B, Hk, D, device=device, dtype=torch.float16)
+
+        # Allocate KV cache
+        padded_slot = cfg.slot_size_aligned
+        kv_cache = torch.zeros(
+            num_blocks,
+            block_size,
+            Hk,
+            padded_slot,
+            device=device,
+            dtype=torch.uint8,
+        )
+        slot_mapping = torch.tensor([0], device=device, dtype=torch.int32)
+
+        # Store
+        triton_turboquant_store(
+            key,
+            value,
+            kv_cache,
+            slot_mapping,
+            PiT,
+            midpoints,
+            mse_bits=cfg.key_mse_bits,
+            key_packed_size=cfg.key_packed_size,
+            value_quant_bits=cfg.effective_value_quant_bits,
+            key_fp8=cfg.key_fp8,
+        )
+
+        # Decode: use key as query so attention = softmax([1]) * V = V
+        query = key.expand(B, Hq, D).contiguous().to(torch.float16)
+        block_table = torch.tensor([[0]], device=device, dtype=torch.int32)
+        seq_lens = torch.tensor([1], device=device, dtype=torch.int32)
+
+        output = triton_turboquant_decode_attention(
+            query=query,
+            kv_cache=kv_cache,
+            block_table=block_table,
+            seq_lens=seq_lens,
+            Pi=Pi,
+            centroids=centroids,
+            scale=1.0 / math.sqrt(D),
+            mse_bits=cfg.key_mse_bits,
+            key_packed_size=cfg.key_packed_size,
+            value_quant_bits=cfg.effective_value_quant_bits,
+            key_fp8=cfg.key_fp8,
+            norm_correction=cfg.norm_correction,
+            PiT=PiT,
+            max_num_kv_splits=4,
+        )
+
+        # With single KV, output should approximate the stored value.
+        # Check per-head cosine similarity > threshold.
+        out_fp32 = output.float()
+        val_fp32 = value.expand(B, Hq, D).float()
+        for h in range(Hq):
+            cos_sim = torch.nn.functional.cosine_similarity(
+                out_fp32[0, h].unsqueeze(0),
+                val_fp32[0, h].unsqueeze(0),
+            ).item()
+            # FP8 keys should be very accurate; MSE keys have more error
+            threshold = 0.95 if cfg.key_fp8 else 0.85
+            assert cos_sim > threshold, (
+                f"Preset {preset} head {h}: cosine_sim={cos_sim:.4f} < {threshold}"
+            )
diff --git a/tests/quantization/utils.py b/tests/quantization/utils.py
index cf3da37b073e..8ab9c310dcad 100644
--- a/tests/quantization/utils.py
+++ b/tests/quantization/utils.py
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import logging
+
+import regex as re
+
 from vllm.model_executor.layers.quantization import get_quantization_config
 from vllm.platforms import current_platform
 
@@ -21,3 +25,73 @@ def is_quant_method_supported(quant_method: str) -> bool:
     min_capability = get_quantization_config(quant_method).get_min_capability()
 
     return capability.to_int() >= min_capability
+
+
+def _test_online_quant_peak_mem_impl(
+    quantization_arg_value,
+    vllm_runner,
+    caplog_mp_spawn,
+    monkeypatch,
+) -> None:
+    # Note: `allenai/OLMoE-1B-7B-0125-Instruct` was selected because:
+    # 1. it covers both Linear and MoE paths
+    # 2. it is already used by other tests in CI, so adding it here
+    #    does not increase disk space for CI runners
+    # I really wanted to use `ibm-granite/granite-3.0-1b-a400m-base`
+    # which I think is the smallest MoE model in vLLM (2.5 GiB bf16,
+    # 1.3 GiB fp8), but could not as adding one more model makes CI
+    # run out of disk space.
+    model_name = "allenai/OLMoE-1B-7B-0125-Instruct"
+
+    # Force spawn to ensure caplog_mp_spawn works consistently
+    # (it relies on VLLM_LOGGING_CONFIG_PATH which spawn reads but fork ignores)
+    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
+
+    with (
+        caplog_mp_spawn(logging.DEBUG) as log_holder,
+        vllm_runner(
+            model_name,
+            quantization=quantization_arg_value,
+            enforce_eager=True,
+        ) as llm,
+    ):
+        outputs = llm.generate_greedy(["The future of AI is"], max_tokens=4)
+        print(outputs[0][1])
+
+    log_text = log_holder.text
+
+    # Parse memory usage from captured logs
+    model_memory_gib = None
+    peak_memory_gib = None
+    for line in log_text.splitlines():
+        if model_memory_gib is None:
+            match = re.search(r"Model loading took ([\d.]+) GiB memory", line)
+            if match:
+                model_memory_gib = float(match.group(1))
+        if peak_memory_gib is None:
+            match = re.search(
+                r"Peak GPU memory after loading weights: ([\d.]+) GiB", line
+            )
+            if match:
+                peak_memory_gib = float(match.group(1))
+
+    assert model_memory_gib is not None, "Could not find model loading memory log"
+    assert peak_memory_gib is not None, "Could not find peak memory log"
+    print(f"GPU memory used after loading weights: {model_memory_gib} GiB")
+    print(f"Peak GPU memory usage while loading weights: {peak_memory_gib} GiB")
+
+    expected_model_memory_gib = 6.7
+
+    # for allenai/OLMoE-1B-7B-0125-Instruct the number we see today is 9.06
+    # GiB on CUDA, which is 1.36x above model_memory_gib. A slightly higher
+    # number is expected as when we load and quantize weights in a streaming
+    # fashion we need to have individual weights in bf16 + fp8 alive at the
+    # same time.
+    expected_peak_memory_gib = expected_model_memory_gib * 1.4
+
+    assert model_memory_gib < expected_model_memory_gib, (
+        f"{model_memory_gib=} higher than {expected_model_memory_gib}"
+    )
+    assert peak_memory_gib < expected_peak_memory_gib, (
+        f"{peak_memory_gib=} higher than {expected_peak_memory_gib}"
+    )
diff --git a/tests/reasoning/test_deepseekv3_reasoning_parser.py b/tests/reasoning/test_deepseekv3_reasoning_parser.py
index 4b0938d15520..f5b37194f927 100644
--- a/tests/reasoning/test_deepseekv3_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekv3_reasoning_parser.py
@@ -6,6 +6,7 @@
 
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
 from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.reasoning import ReasoningParserManager
 from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
 from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
 from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
@@ -33,6 +34,12 @@ def test_parser_selection(tokenizer, thinking, expected_parser_type):
     assert isinstance(parser._parser, expected_parser_type)
 
 
+def test_deepseek_v4_reasoning_parser_alias():
+    parser_cls = ReasoningParserManager.get_reasoning_parser("deepseek_v4")
+
+    assert parser_cls is DeepSeekV3ReasoningParser
+
+
 def test_identity_reasoning_parser_basic(tokenizer):
     parser = IdentityReasoningParser(tokenizer)
 
diff --git a/tests/reasoning/test_gemma4_reasoning_parser.py b/tests/reasoning/test_gemma4_reasoning_parser.py
new file mode 100644
index 000000000000..699fc509d828
--- /dev/null
+++ b/tests/reasoning/test_gemma4_reasoning_parser.py
@@ -0,0 +1,275 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+# Using mistral tokenizer as a generic mock since the actual model is not on HF
+from vllm.tokenizers.registry import get_tokenizer
+
+parser_name = "gemma4"
+
+
+@pytest.fixture(scope="module")
+def generic_tokenizer():
+    return get_tokenizer("google/gemma-4-E2B-it")
+
+
+INVALID_SIMPLE_NONSTREAMING = {
+    "output": "This is a reasoning section<channel|>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+INVALID_SIMPLE_STREAMING = {
+    "output": "This is a reasoning section<channel|>This is the rest",
+    "reasoning": None,
+    "content": "This is a reasoning sectionThis is the rest",
+    "is_reasoning_end": True,
+}
+INVALID_COMPLETE_NONSTREAMING = {
+    "output": "This is a reasoning section<channel|>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+INVALID_COMPLETE_STREAMING = {
+    "output": "This is a reasoning section<channel|>",
+    "reasoning": None,
+    "content": "This is a reasoning section",
+    "is_reasoning_end": True,
+}
+NO_CONTENT = {
+    "output": "<|channel>This is reasoning",
+    "reasoning": "This is reasoning",
+    "content": None,
+    "is_reasoning_end": False,
+}
+NO_REASONING = {
+    "output": "This is content",
+    "reasoning": None,
+    "content": "This is content",
+    "is_reasoning_end": False,
+}
+REASONING_WITH_CHANNEL = {
+    "output": "<|channel>This is a reasoning section<channel|>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING_WITH_CHANNEL = {
+    "output": "<|channel>This is a reasoning section<channel|>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+MULTIPLE_LINES_WITH_CHANNEL = {
+    "output": "<|channel>This\nThat<channel|>This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+CHANNEL_NO_END = {
+    "output": "<|channel>This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY = {
+    "output": "",
+    "reasoning": None,
+    "content": "",
+    "is_reasoning_end": False,
+}
+NEW_LINE_NONSTREAMING = {
+    "output": (
+        "Before\n<|channel>This is a reasoning section<channel|>\nThis is the rest"
+    ),
+    "reasoning": "This is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+NEW_LINE_STREAMING = {
+    "output": (
+        "Before\n<|channel>This is a reasoning section<channel|>\nThis is the rest"
+    ),
+    "reasoning": "This is a reasoning section",
+    "content": "Before\n\nThis is the rest",
+    "is_reasoning_end": True,
+}
+
+THOUGHT_PREFIX = {
+    "output": "<|channel>thought\nActual reasoning here<channel|>Final answer",
+    "reasoning": "Actual reasoning here",
+    "content": "Final answer",
+    "is_reasoning_end": True,
+}
+THOUGHT_PREFIX_ONLY = {
+    "output": "<|channel>thought\n<channel|>",
+    "reasoning": "",
+    "content": None,
+    "is_reasoning_end": True,
+}
+THOUGHT_PREFIX_MULTILINE = {
+    "output": "<|channel>thought\nLine1\nLine2<channel|>Answer",
+    "reasoning": "Line1\nLine2",
+    "content": "Answer",
+    "is_reasoning_end": True,
+}
+# "thousand" starts like "thought" but diverges — exercises Case 2→3 in streaming.
+THOUGHT_PREFIX_DIVERGE = {
+    "output": "<|channel>thousand reasons<channel|>Done",
+    "reasoning": "thousand reasons",
+    "content": "Done",
+    "is_reasoning_end": True,
+}
+# The model isn't reasoning if we're generating tool calls.
+TOOL_CALL_STARTED = {
+    "output": "<|tool_call>",
+    "reasoning": None,
+    "content": "<|tool_call>",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    pytest.param(False, INVALID_SIMPLE_NONSTREAMING, id="invalid_simple"),
+    pytest.param(True, INVALID_SIMPLE_STREAMING, id="invalid_simple_streaming"),
+    pytest.param(False, INVALID_COMPLETE_NONSTREAMING, id="invalid_complete"),
+    pytest.param(True, INVALID_COMPLETE_STREAMING, id="invalid_complete_streaming"),
+    pytest.param(False, NO_CONTENT, id="no_content"),
+    pytest.param(False, NO_REASONING, id="no_reasoning"),
+    pytest.param(False, REASONING_WITH_CHANNEL, id="reasoning"),
+    pytest.param(True, REASONING_WITH_CHANNEL, id="reasoning_streaming"),
+    pytest.param(False, COMPLETE_REASONING_WITH_CHANNEL, id="complete_reasoning"),
+    pytest.param(
+        True, COMPLETE_REASONING_WITH_CHANNEL, id="complete_reasoning_streaming"
+    ),
+    pytest.param(False, MULTIPLE_LINES_WITH_CHANNEL, id="multiple_lines"),
+    pytest.param(True, MULTIPLE_LINES_WITH_CHANNEL, id="multiple_lines_streaming"),
+    pytest.param(False, CHANNEL_NO_END, id="no_end"),
+    pytest.param(True, CHANNEL_NO_END, id="no_end_streaming"),
+    pytest.param(False, EMPTY, id="empty"),
+    pytest.param(False, NEW_LINE_NONSTREAMING, id="new_line"),
+    pytest.param(True, NEW_LINE_STREAMING, id="new_line_streaming"),
+    pytest.param(False, THOUGHT_PREFIX, id="thought_prefix"),
+    pytest.param(True, THOUGHT_PREFIX, id="thought_prefix_streaming"),
+    pytest.param(False, THOUGHT_PREFIX_ONLY, id="thought_prefix_only"),
+    pytest.param(True, THOUGHT_PREFIX_ONLY, id="thought_prefix_only_streaming"),
+    pytest.param(False, THOUGHT_PREFIX_MULTILINE, id="thought_prefix_multiline"),
+    pytest.param(
+        True, THOUGHT_PREFIX_MULTILINE, id="thought_prefix_multiline_streaming"
+    ),
+    pytest.param(False, THOUGHT_PREFIX_DIVERGE, id="thought_prefix_diverge"),
+    pytest.param(True, THOUGHT_PREFIX_DIVERGE, id="thought_prefix_diverge_streaming"),
+    pytest.param(False, TOOL_CALL_STARTED, id="tool_call_started"),
+    pytest.param(True, TOOL_CALL_STARTED, id="tool_call_started_streaming"),
+]
+
+
+def gemma4_encode_output(generic_tokenizer, output: str) -> list[int]:
+    # Resolve token IDs dynamically from the real tokenizer
+    vocab = generic_tokenizer.get_vocab()
+    start_token_id = vocab["<|channel>"]
+    end_token_id = vocab["<channel|>"]
+
+    index_start = output.find("<|channel>")
+    len_start = len("<|channel>")
+    index_end = output.find("<channel|>")
+    len_end = len("<channel|>")
+
+    output_tokens = []
+
+    def _encode(text: str) -> list[int]:
+        if not text:
+            return []
+        # Handle both raw transformers and vLLM wrappers
+        enc = getattr(generic_tokenizer, "tokenizer", generic_tokenizer)
+        try:
+            return enc.encode(text, add_special_tokens=False)
+        except TypeError:
+            return enc.encode(text)
+
+    if index_start != -1:
+        output_before = output[:index_start]
+        output_tokens += _encode(output_before)
+        output_tokens += [start_token_id]
+
+        if index_end != -1:
+            output_middle = output[index_start + len_start : index_end]
+            output_after = output[index_end + len_end :]
+            output_tokens += _encode(output_middle)
+            output_tokens += [end_token_id]
+            output_tokens += _encode(output_after)
+        else:
+            output_middle = output[index_start + len_start :]
+            output_tokens += _encode(output_middle)
+    elif index_end != -1:
+        output_before = output[:index_end]
+        output_after = output[index_end + len_end :]
+        output_tokens += _encode(output_before)
+        output_tokens += [end_token_id]
+        output_tokens += _encode(output_after)
+    else:
+        output_tokens += _encode(output)
+
+    return output_tokens
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_gemma4_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    generic_tokenizer,
+):
+    output = param_dict["output"]
+    output_tokens = gemma4_encode_output(generic_tokenizer, output)
+
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        generic_tokenizer
+    )
+
+    # We use the generic run_reasoning_extraction from utils
+    # Use decode per token to get standard spaces instead of
+    # SentencePiece space characters
+    output_token_strings = [generic_tokenizer.decode([t]) for t in output_tokens]
+    reasoning, content = run_reasoning_extraction(
+        parser, output_token_strings, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    is_reasoning_end = parser.is_reasoning_end(output_tokens)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+
+def test_gemma4_adjust_request(generic_tokenizer):
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        generic_tokenizer
+    )
+
+    request = ChatCompletionRequest(messages=[], model="test-model")
+    assert request.skip_special_tokens is True
+
+    result = parser.adjust_request(request)
+    assert result.skip_special_tokens is False
+    assert result is request
+
+
+def test_gemma4_previous_turn_reasoning_is_reasoning_end(generic_tokenizer):
+    output = (
+        "<|channel>thought\n1st thought<channel|>1st content<turn|>\n"
+        "<|turn>user\nThanks<|turn>model\n"
+    )
+    output_tokens = gemma4_encode_output(generic_tokenizer, output)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        generic_tokenizer
+    )
+    is_reasoning_end = parser.is_reasoning_end(output_tokens)
+    assert not is_reasoning_end
diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py
index 3b1327acb688..a6f815b6ae5c 100644
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -280,3 +280,72 @@ def test_tag_format_consistency(self, reasoning_parser):
             assert tag["content"]["type"] == "any_text"
             assert tag["end"] == "<|end|>"
             assert tag["begin"].startswith("<|channel|>")
+
+
+@pytest.mark.parametrize(
+    "output, is_reasoning_end",
+    [(t["output"], t["is_reasoning_end"]) for t in TEST_CASES],
+)
+def test_gptoss_is_reasoning_end_streaming(
+    output,
+    is_reasoning_end,
+    gpt_oss_tokenizer,
+):
+    """Streaming override must agree with is_reasoning_end for all cases."""
+    tokens = gpt_oss_tokenizer.tokenize(output)
+    parser: ReasoningParser = GptOssReasoningParser(gpt_oss_tokenizer)
+    output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(tokens)
+    delta_ids = output_ids[-1:] if output_ids else []
+    actual = parser.is_reasoning_end_streaming(output_ids, delta_ids)
+    assert is_reasoning_end == actual
+
+
+@pytest.mark.parametrize(
+    "output, is_reasoning_end",
+    [(t["output"], t["is_reasoning_end"]) for t in TEST_CASES],
+)
+def test_gptoss_is_reasoning_end_streaming_long_prefix(
+    output,
+    is_reasoning_end,
+    gpt_oss_tokenizer,
+):
+    """Windowing must produce correct results even with a long prefix."""
+    tokens = gpt_oss_tokenizer.tokenize(output)
+    parser: ReasoningParser = GptOssReasoningParser(gpt_oss_tokenizer)
+    output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(tokens)
+    # Prepend 10k dummy reasoning tokens to simulate a long generation
+    long_prefix = [1] * 10_000
+    padded_ids = long_prefix + list(output_ids)
+    delta_ids = output_ids[-1:] if output_ids else []
+    actual = parser.is_reasoning_end_streaming(padded_ids, delta_ids)
+    assert is_reasoning_end == actual
+
+
+@pytest.mark.parametrize(
+    "output, is_reasoning_end",
+    [(t["output"], t["is_reasoning_end"]) for t in TEST_CASES],
+)
+def test_gptoss_is_reasoning_end_streaming_large_delta(
+    output,
+    is_reasoning_end,
+    gpt_oss_tokenizer,
+):
+    """Simulate speculative decoding where the entire test sequence arrives
+    as a single large delta appended after a long prefix.  The window must
+    expand to cover delta_ids so the end pattern is never missed."""
+    tokens = gpt_oss_tokenizer.tokenize(output)
+    parser: ReasoningParser = GptOssReasoningParser(gpt_oss_tokenizer)
+    output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(tokens)
+    long_prefix = [1] * 10_000
+    padded_ids = long_prefix + list(output_ids)
+    # delta_ids = the entire test sequence (as if accepted in one spec step)
+    delta_ids = list(output_ids)
+    actual = parser.is_reasoning_end_streaming(padded_ids, delta_ids)
+    assert is_reasoning_end == actual
+
+
+def test_gptoss_is_reasoning_end_streaming_signature(gpt_oss_tokenizer):
+    """Verify the method is callable with the expected signature."""
+    parser = GptOssReasoningParser(gpt_oss_tokenizer)
+    result = parser.is_reasoning_end_streaming([], [])
+    assert result is False
diff --git a/tests/reasoning/test_hy_v3_reasoning_parser.py b/tests/reasoning/test_hy_v3_reasoning_parser.py
new file mode 100644
index 000000000000..e527c979f6e2
--- /dev/null
+++ b/tests/reasoning/test_hy_v3_reasoning_parser.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+from vllm.reasoning.hy_v3_reasoning_parser import HYV3ReasoningParser
+from vllm.tokenizers import get_tokenizer
+
+parser_name = "hy_v3"
+MODEL = "tencent/Hy3-preview"
+
+
+@pytest.fixture(scope="module")
+def hy_v3_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
+
+
+WITH_THINK = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+    "reasoning_effort": "high",
+}
+
+WITH_THINK_STREAM = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+    "reasoning_effort": "high",
+}
+
+WITHOUT_THINK = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+    "reasoning_effort": "no_think",
+}
+
+WITHOUT_THINK_STREAM = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+    "reasoning_effort": "no_think",
+}
+
+WITH_REASONING_EFFORT_NONE = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+
+WITH_REASONING_EFFORT_NONE_STREAM = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+
+COMPLETE_REASONING = {
+    "output": "This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+    "reasoning_effort": "high",
+}
+MULTILINE_REASONING = {
+    "output": "This is a reasoning\nsection</think>This is the rest\nThat",
+    "reasoning": "This is a reasoning\nsection",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+    "reasoning_effort": "high",
+}
+ONLY_OPEN_TAG = {
+    "output": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+    "reasoning_effort": "high",
+}
+
+ONLY_OPEN_TAG_STREAM = {
+    "output": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+    "reasoning_effort": "high",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITHOUT_THINK,
+        id="without_think",
+    ),
+    pytest.param(
+        True,
+        WITHOUT_THINK_STREAM,
+        id="without_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITH_REASONING_EFFORT_NONE,
+        id="with_reasoning_effort_none",
+    ),
+    pytest.param(
+        True,
+        WITH_REASONING_EFFORT_NONE_STREAM,
+        id="with_reasoning_effort_none_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        MULTILINE_REASONING,
+        id="multiline_reasoning",
+    ),
+    pytest.param(
+        True,
+        MULTILINE_REASONING,
+        id="multiline_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        ONLY_OPEN_TAG,
+        id="only_open_tag",
+    ),
+    pytest.param(
+        True,
+        ONLY_OPEN_TAG_STREAM,
+        id="only_open_tag_stream",
+    ),
+]
+
+STILL_REASONING_PROMPT = """<｜hy_begin▁of▁sentence｜>
+You are a helpful assistant.
+<｜reasoning_mode｜>reasoning_effort:high<｜hy_User｜>
+What is the capital of France?<｜hy_Assistant｜>
+<think>The user is asking for the capital of"""
+
+DONE_REASONING_PROMPT = """<｜hy_begin▁of▁sentence｜>
+You are a helpful assistant.
+<｜reasoning_mode｜>reasoning_effort:high<｜hy_User｜>
+What is the capital of France?<｜hy_Assistant｜>
+<think>The user is asking for the capital of France.</think>
+The capital of France is Paris."""
+
+MULTI_TURN_STILL_REASONING_PROMPT = """<｜hy_begin▁of▁sentence｜>
+You are a helpful assistant.
+<｜reasoning_mode｜>reasoning_effort:high<｜hy_User｜>
+What is the capital of France?<｜hy_Assistant｜
+><think></think>The capital of France is Paris.<eos:6124c78e>
+<｜hy_User｜>What about Chile?<｜hy_Assistant｜>
+<think>The user is asking for the capital of"""
+
+MULTI_TURN_DONE_REASONING_PROMPT = """<｜hy_begin▁of▁sentence｜>
+You are a helpful assistant.
+<｜reasoning_mode｜>reasoning_effort:high<｜hy_User｜>
+What is the capital of France?<｜hy_Assistant｜
+><think></think>The capital of France is Paris.<eos:6124c78e>
+<｜hy_User｜>What about Chile?<｜hy_Assistant｜>
+<think>The user is asking for the capital of Chile.</think>
+The capital of Chile is Santiago."""
+
+REASONING_END_TEST_CASES = [
+    pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"),
+    pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"),
+    pytest.param(
+        MULTI_TURN_STILL_REASONING_PROMPT, False, id="multi_turn_still_reasoning"
+    ),
+    pytest.param(
+        MULTI_TURN_DONE_REASONING_PROMPT, True, id="multi_turn_done_reasoning"
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    hy_v3_tokenizer,
+):
+    output = hy_v3_tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = [
+        hy_v3_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+
+    parser_kwargs = {}
+    if "reasoning_effort" in param_dict:
+        parser_kwargs["chat_template_kwargs"] = {
+            "reasoning_effort": param_dict["reasoning_effort"]
+        }
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        hy_v3_tokenizer,
+        **parser_kwargs,
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    output_ids = hy_v3_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+
+@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES)
+def test_is_reasoning_end_full_prompt(
+    prompt: str, is_reasoning_end: bool, hy_v3_tokenizer
+):
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        hy_v3_tokenizer,
+        chat_template_kwargs={"reasoning_effort": "high"},
+    )
+    tokens = hy_v3_tokenizer.tokenize(prompt)
+    token_ids = hy_v3_tokenizer.convert_tokens_to_ids(tokens)
+    check_is_reasoning_end = parser.is_reasoning_end(token_ids)
+    assert check_is_reasoning_end == is_reasoning_end
+
+
+def test_constructor_does_not_mutate_shared_chat_template_kwargs(hy_v3_tokenizer):
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    chat_template_kwargs = {"reasoning_effort": "low"}
+
+    first_parser: ReasoningParser = parser_cls(
+        hy_v3_tokenizer,
+        chat_template_kwargs=chat_template_kwargs,
+    )
+    second_parser: ReasoningParser = parser_cls(
+        hy_v3_tokenizer,
+        chat_template_kwargs=chat_template_kwargs,
+    )
+
+    assert chat_template_kwargs == {"reasoning_effort": "low"}
+    assert isinstance(first_parser, HYV3ReasoningParser)
+    assert isinstance(second_parser, HYV3ReasoningParser)
+    assert first_parser._identity_parser is None
+    assert second_parser._identity_parser is None
+
+
+def test_constructor_falls_back_to_outer_reasoning_effort(hy_v3_tokenizer):
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        hy_v3_tokenizer,
+        reasoning_effort="low",
+    )
+
+    assert isinstance(parser, HYV3ReasoningParser)
+    assert parser._identity_parser is None
diff --git a/tests/reasoning/test_kimi_k2_reasoning_parser.py b/tests/reasoning/test_kimi_k2_reasoning_parser.py
index 0f80bb8854a8..dfce2075c6a9 100644
--- a/tests/reasoning/test_kimi_k2_reasoning_parser.py
+++ b/tests/reasoning/test_kimi_k2_reasoning_parser.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from unittest.mock import MagicMock
+
 import pytest
 
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
@@ -12,6 +14,20 @@
 REASONING_MODEL_NAME = "moonshotai/Kimi-K2.5"
 
 
+@pytest.fixture
+def mock_kimi_k2_tokenizer():
+    tokenizer = MagicMock()
+    tokenizer.get_vocab.return_value = {
+        "<think>": 100,
+        "</think>": 101,
+        "<|tool_calls_section_begin|>": 200,
+        "<|tool_calls_section_end|>": 201,
+        "<|tool_call_begin|>": 202,
+        "<|tool_call_end|>": 203,
+    }
+    return tokenizer
+
+
 @pytest.fixture(scope="module")
 def kimi_k2_tokenizer():
     return get_tokenizer(tokenizer_name=REASONING_MODEL_NAME, trust_remote_code=True)
@@ -153,3 +169,50 @@ def test_streaming_tool_section_ends_reasoning(kimi_k2_tokenizer):
     )
     assert isinstance(result, DeltaMessage)
     assert result.content == "<|tool_calls_section_begin|>"
+
+
+def test_streaming_end_token_id_buffered(mock_kimi_k2_tokenizer):
+    """When stop sequences buffer text, </think> ID arrives before its text.
+
+    The token ID is present in delta_token_ids but the actual string is not
+    yet in delta_text (still buffered). The parser must return None to wait
+    for the next delta, instead of calling find() which returns -1 and
+    silently corrupting the text split.
+    """
+    parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer)
+    think_id = parser._start_token_id
+    end_think_id = parser._end_token_id
+
+    # Simulate: </think> ID arrived but text not yet flushed.
+    # Two token IDs in delta to bypass the single-special-token guard.
+    result = parser.extract_reasoning_streaming(
+        previous_text="some reasoning",
+        current_text="some reasoning extra",
+        delta_text="extra",  # </think> text not yet flushed
+        previous_token_ids=[think_id],
+        current_token_ids=[think_id, end_think_id, 999],
+        delta_token_ids=[end_think_id, 999],
+    )
+    assert result is None
+
+
+def test_streaming_tool_section_id_buffered(mock_kimi_k2_tokenizer):
+    """When stop sequences buffer text, tool section start ID arrives before its text.
+
+    Same buffering scenario as above but for <|tool_calls_section_begin|>.
+    Without the guard, find() returns -1 and delta_text[:tool_index] silently
+    drops the last character of reasoning.
+    """
+    parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer)
+    think_id = parser._start_token_id
+    tool_begin_id = parser._tool_section_start_token_id
+
+    result = parser.extract_reasoning_streaming(
+        previous_text="some reasoning",
+        current_text="some reasoning extra",
+        delta_text="extra",  # tool section text not yet flushed
+        previous_token_ids=[think_id],
+        current_token_ids=[think_id, tool_begin_id, 999],
+        delta_token_ids=[tool_begin_id, 999],
+    )
+    assert result is None
diff --git a/tests/reasoning/test_olmo3_reasoning_parser.py b/tests/reasoning/test_olmo3_reasoning_parser.py
index bc0e72e2a456..a74ca50d11a4 100644
--- a/tests/reasoning/test_olmo3_reasoning_parser.py
+++ b/tests/reasoning/test_olmo3_reasoning_parser.py
@@ -41,6 +41,12 @@
     "content": "\n\n\nThis is the rest",
 }
 
+SIMPLE_REASONING_WITH_TRAILING_SPACE = {
+    "output": f"{START_REASONING}\nLook!\nI'm thinking... {END_REASONING}\nThis is the rest",  # noqa: E501
+    "reasoning": "\nLook!\nI'm thinking... ",
+    "content": "\nThis is the rest",
+}
+
 NO_REASONING_ONLY_END_THINK = {
     "output": f"{END_REASONING}\n\nNo thoughts, head empty!",
     "reasoning": None,
@@ -114,6 +120,11 @@
         SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
         id="simple_reasoning_with_multiple_newlines_streaming",
     ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_TRAILING_SPACE,
+        id="simple_reasoning_with_trailing_space_streaming",
+    ),
     pytest.param(
         True,  # enable streaming
         NO_REASONING_ONLY_END_THINK,
@@ -127,7 +138,7 @@
 ]
 
 # Global tokenizer initialization to avoid repeated loading
-tokenizer = AutoTokenizer.from_pretrained("allenai/dolma2-tokenizer")
+tokenizer = AutoTokenizer.from_pretrained("allenai/Olmo-3-7B-Think")
 
 
 @pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
diff --git a/tests/reasoning/test_qwen3_reasoning_parser.py b/tests/reasoning/test_qwen3_reasoning_parser.py
index 411c7ba485a8..f42458560f9f 100644
--- a/tests/reasoning/test_qwen3_reasoning_parser.py
+++ b/tests/reasoning/test_qwen3_reasoning_parser.py
@@ -78,6 +78,25 @@ def qwen3_tokenizer(request):
     "content": None,
 }
 
+# --- <tool_call> without </think> (implicit reasoning end) ---
+
+TOOL_CALL_BODY = (
+    "<tool_call>\n<function=bash>\n<parameter=command>"
+    "\ncat /etc/hosts\n</parameter>\n</function>\n</tool_call>"
+)
+
+TOOL_CALL_NO_THINK_END = {
+    "output": "I need to read the file.\n\n" + TOOL_CALL_BODY,
+    "reasoning": "I need to read the file.\n\n",
+    "content": TOOL_CALL_BODY,
+}
+
+TOOL_CALL_WITH_THINK_NO_END = {
+    "output": "<think>I need to read the file.\n\n" + TOOL_CALL_BODY,
+    "reasoning": "I need to read the file.\n\n",
+    "content": TOOL_CALL_BODY,
+}
+
 # --- Edge cases ---
 
 COMPLETE_REASONING = {
@@ -199,6 +218,26 @@ def qwen3_tokenizer(request):
         TRUNCATED_NO_START_TOKEN_STREAM,
         id="truncated_no_start_token_stream",
     ),
+    pytest.param(
+        False,
+        TOOL_CALL_NO_THINK_END,
+        id="tool_call_no_think_end",
+    ),
+    pytest.param(
+        True,
+        TOOL_CALL_NO_THINK_END,
+        id="tool_call_no_think_end_stream",
+    ),
+    pytest.param(
+        False,
+        TOOL_CALL_WITH_THINK_NO_END,
+        id="tool_call_with_think_no_end",
+    ),
+    pytest.param(
+        True,
+        TOOL_CALL_WITH_THINK_NO_END,
+        id="tool_call_with_think_no_end_stream",
+    ),
 ]
 
 
@@ -255,6 +294,13 @@ def test_reasoning(
         "content",
         id="no_start_end_grouped_with_content",
     ),
+    pytest.param(
+        # <tool_call> arrives in a separate delta after reasoning text
+        ["I need to read the file.\n\n", "<tool_call>\n<function=bash>"],
+        "I need to read the file.\n\n",
+        "<tool_call>\n<function=bash>",
+        id="tool_call_implicit_reasoning_end",
+    ),
 ]
 
 
@@ -296,6 +342,12 @@ def test_reasoning_streaming_multi_token_deltas(
         "Some output without think tokens",
         id="thinking_disabled_no_think_tokens",
     ),
+    pytest.param(
+        "I need to read the file.\n\n" + TOOL_CALL_BODY,
+        None,
+        "I need to read the file.\n\n" + TOOL_CALL_BODY,
+        id="thinking_disabled_with_tool_call",
+    ),
 ]
 
 
diff --git a/tests/reasoning/test_step3p5_reasoning_parser.py b/tests/reasoning/test_step3p5_reasoning_parser.py
index 2196d247cb45..8f62e7a2cb4d 100644
--- a/tests/reasoning/test_step3p5_reasoning_parser.py
+++ b/tests/reasoning/test_step3p5_reasoning_parser.py
@@ -2,10 +2,10 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import pytest
-from transformers import AutoTokenizer
 
 from tests.reasoning.utils import run_reasoning_extraction
 from vllm.reasoning import ReasoningParser, ReasoningParserManager
+from vllm.tokenizers import get_tokenizer
 
 parser_name = "step3p5"
 start_token = "<think>"
@@ -16,7 +16,7 @@
 
 @pytest.fixture(scope="module")
 def step3p5_tokenizer():
-    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+    return get_tokenizer(tokenizer_name=REASONING_MODEL_NAME)
 
 
 SIMPLE_REASONING = {
diff --git a/tests/renderers/conftest.py b/tests/renderers/conftest.py
new file mode 100644
index 000000000000..c33ab351608d
--- /dev/null
+++ b/tests/renderers/conftest.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from tests.utils import prewarm_hf_cache
+
+
+@pytest.fixture(scope="session", autouse=True)
+def _prewarm_hf_cache():
+    # tokenization_qwen.py downloads SimSun.ttf from
+    # qianwen-res.oss-cn-beijing.aliyuncs.com; both Qwen/Qwen-VL and
+    # Qwen/Qwen-VL-Chat look it up from the Chat repo.
+    prewarm_hf_cache([("Qwen/Qwen-VL-Chat", "SimSun.ttf")])
diff --git a/tests/renderers/inputs/test_preprocess.py b/tests/renderers/inputs/test_preprocess.py
index 98219bb14382..9b120c03a301 100644
--- a/tests/renderers/inputs/test_preprocess.py
+++ b/tests/renderers/inputs/test_preprocess.py
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.renderers.inputs.preprocess import prompt_to_seq
+import pytest
+
+from vllm.renderers.inputs.preprocess import (
+    parse_dec_only_prompt,
+    parse_enc_dec_prompt,
+    prompt_to_seq,
+)
 
 
 def test_empty_input():
@@ -39,3 +45,23 @@ def test_dict_input():
         {"prompt": "foo"},
         {"prompt_token_ids": [1, 2]},
     ]
+
+
+def test_parse_dec_only_prompt_rejects_non_string_prompt_field():
+    with pytest.raises(TypeError, match="Prompt text should be a string"):
+        parse_dec_only_prompt({"prompt": [1, 2, 3], "cache_salt": "abc"})
+
+
+def test_parse_dec_only_prompt_rejects_non_string_prompt_list():
+    with pytest.raises(TypeError, match="Prompt text should be a string"):
+        parse_dec_only_prompt({"prompt": [1, "x"]})
+
+
+def test_parse_enc_dec_prompt_rejects_nested_non_string_prompt_field():
+    with pytest.raises(TypeError, match="Prompt text should be a string"):
+        parse_enc_dec_prompt(
+            {
+                "encoder_prompt": {"prompt": [1, 2, 3]},
+                "decoder_prompt": {"prompt": [4, 5]},
+            }
+        )
diff --git a/tests/renderers/test_chat_utils_prompt_embeds.py b/tests/renderers/test_chat_utils_prompt_embeds.py
new file mode 100644
index 000000000000..e33cc304710d
--- /dev/null
+++ b/tests/renderers/test_chat_utils_prompt_embeds.py
@@ -0,0 +1,576 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Offline unit tests for `prompt_embeds` chat-completion content parts."""
+
+from __future__ import annotations
+
+import inspect
+import io
+from typing import Final
+from unittest import mock
+
+import pybase64 as base64
+import pytest
+import regex as re
+import torch
+from transformers import AutoTokenizer
+
+from vllm.entrypoints.chat_utils import (
+    _ENABLE_PROMPT_EMBEDS_ERROR,
+    _PROMPT_EMBEDS_MISSING_DATA_ERROR,
+    _RESERVED_PLACEHOLDER_IN_TEXT_ERROR,
+    MM_PARSER_MAP,
+    MODALITY_PLACEHOLDERS_MAP,
+    PROMPT_EMBEDS_PLACEHOLDER_TOKEN,
+    parse_chat_messages,
+    parse_chat_messages_async,
+)
+from vllm.renderers.hf import (
+    _PROMPT_EMBEDS_PLACEHOLDER_SPAN_MISMATCH_ERROR,
+    _build_mixed_prompt_embeds,
+    _build_prompt_embeds_positions,
+    _build_prompt_embeds_updates,
+    _ensure_prompt_embeds_placeholder_token,
+    _expand_prompt_embeds_placeholders,
+)
+
+# Cover distinct tokenizer families:
+#   GPT2TokenizerFast  (BPE, OpenAI-style)
+#   Qwen2TokenizerFast (SentencePiece BPE variant)
+#   BertTokenizerFast  (WordPiece)
+TOKENIZER_IDS: Final[list[str]] = [
+    "gpt2",
+    "Qwen/Qwen2.5-1.5B-Instruct",
+    "bert-base-uncased",
+]
+
+
+@pytest.fixture(params=TOKENIZER_IDS, ids=TOKENIZER_IDS)
+def tokenizer(request):
+    """A fresh tokenizer instance per tokenizer family."""
+    return AutoTokenizer.from_pretrained(request.param)
+
+
+# Minimal chat template that works with any tokenizer.  Iterates
+# `message.content` as either a string or a list of dicts (openai format).
+_SIMPLE_CHAT_TEMPLATE: Final[str] = (
+    "{% for m in messages %}"
+    "{% if m['content'] is string %}{{m['content']}}"
+    "{% else %}{% for p in m['content'] %}{{p['text']}}{% endfor %}"
+    "{% endif %}\n{% endfor %}"
+)
+
+
+async def _maybe_await(fn, *args, **kwargs):
+    """Call *fn* and `await` the result if it's a coroutine."""
+    result = fn(*args, **kwargs)
+    if inspect.iscoroutine(result):
+        result = await result
+    return result
+
+
+# Parametrize over sync / async parse paths so every end-to-end test
+# exercises both.
+_PARSE_FUNCTIONS = [parse_chat_messages, parse_chat_messages_async]
+
+
+@pytest.fixture(params=_PARSE_FUNCTIONS, ids=["sync", "async"])
+def parse_fn(request):
+    """Either the sync or async `parse_chat_messages` callable."""
+    return request.param
+
+
+def _encode_tensor(t: torch.Tensor) -> str:
+    buf = io.BytesIO()
+    torch.save(t, buf)
+    return base64.b64encode(buf.getvalue()).decode("utf-8")
+
+
+_MOCK_HIDDEN_SIZE: Final[int] = 8
+_MOCK_DTYPE: Final[torch.dtype] = torch.float32
+
+
+def _make_mock_model_config(*, enable_prompt_embeds: bool = True) -> mock.MagicMock:
+    mc = mock.MagicMock()
+    mc.enable_prompt_embeds = enable_prompt_embeds
+    mc.multimodal_config = None
+    mc.allowed_local_media_path = None
+    mc.allowed_media_domains = None
+    # Test text-only code path in `MultiModalItemTracker.resolve_items`.
+    mc.is_multimodal_model = False
+    # `safe_load_prompt_embeds` pins each tensor to the model's hidden_size
+    # and dtype, so the mock must return concrete values.
+    mc.get_hidden_size.return_value = _MOCK_HIDDEN_SIZE
+    mc.dtype = _MOCK_DTYPE
+    return mc
+
+
+def test_prompt_embeds_keys_registered():
+    assert "prompt_embeds" in MODALITY_PLACEHOLDERS_MAP
+    assert MODALITY_PLACEHOLDERS_MAP["prompt_embeds"] == "<##PROMPT_EMBEDS##>"
+    assert "prompt_embeds" in MM_PARSER_MAP
+
+
+def test_ensure_placeholder_token_is_single_token_and_idempotent(tokenizer):
+    """Ensure the placeholder token is a single token and that multiple calls to
+    "ensure" are idempotent, across all tokenizer families."""
+    tid1 = _ensure_prompt_embeds_placeholder_token(tokenizer)
+    tid2 = _ensure_prompt_embeds_placeholder_token(tokenizer)
+    assert tid1 == tid2
+
+    ids = tokenizer.encode(PROMPT_EMBEDS_PLACEHOLDER_TOKEN, add_special_tokens=False)
+    assert ids == [tid1]
+
+    # Repeating it in a string N times must produce exactly that many tokens.
+    N = 5
+    ids_rep = tokenizer.encode(
+        PROMPT_EMBEDS_PLACEHOLDER_TOKEN * N, add_special_tokens=False
+    )
+    assert ids_rep == [tid1] * N
+
+
+def test_parse_chat_messages_openai_format():
+    NUM_TOKENS = 3
+    t = torch.randn(NUM_TOKENS, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+    b64 = _encode_tensor(t)
+    mc = _make_mock_model_config()
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Hello "},
+                {"type": "prompt_embeds", "data": b64},
+                {"type": "text", "text": " world"},
+            ],
+        }
+    ]
+    conv, mm_data, _ = parse_chat_messages(
+        messages,
+        mc,
+        content_format="openai",
+    )
+    # The middle content part is rewritten to a single placeholder-token
+    # sentinel.
+    texts = [p["text"] for p in conv[0]["content"]]
+    assert texts == [
+        "Hello ",
+        PROMPT_EMBEDS_PLACEHOLDER_TOKEN,
+        " world",
+    ]
+    assert mm_data is not None and "prompt_embeds" in mm_data
+    assert torch.equal(mm_data["prompt_embeds"][0], t)
+
+
+# Each layout entry is one content part:
+#   ("text", "A")  -> {"type": "text", "text": "A"}
+#   ("embed", N)   -> {"type": "prompt_embeds", "data": <base64 of (N, H) tensor>}
+@pytest.mark.parametrize(
+    "layout",
+    [
+        # Case: Single embed only.
+        [("embed", 2)],
+        # Case: Embed at the start of the message.
+        [("embed", 3), ("text", "B")],
+        # Case: Embed at the end of the message.
+        [("text", "A"), ("embed", 1)],
+        # Case: Embed sandwiched between text spans.
+        [("text", "A"), ("embed", 2), ("text", "B")],
+        # Case: Multiple embeds with text in between.
+        [("text", "A"), ("embed", 2), ("text", "B"), ("embed", 3)],
+        # Case: Adjacent embeds with no separating text.
+        [("embed", 1), ("embed", 2)],
+        # Case: Multiple text spans before a trailing embed.
+        [("text", "A"), ("text", "B"), ("embed", 1)],
+        # Case: Long-ish run mixing both kinds.
+        [
+            ("text", "head"),
+            ("embed", 4),
+            ("text", "mid"),
+            ("embed", 1),
+            ("embed", 2),
+            ("text", "tail"),
+        ],
+    ],
+    ids=[
+        "single-embed",
+        "embed-then-text",
+        "text-then-embed",
+        "text-embed-text",
+        "text-embed-text-embed",
+        "adjacent-embeds",
+        "text-text-embed",
+        "long-mixed-run",
+    ],
+)
+@pytest.mark.parametrize(
+    "interleave_mm_strings",
+    # `None`: text-only path where `multimodal_config` is absent.
+    # `False`: non-interleave multimodal path (the common default).
+    # `True`: sentinel-substitution interleave path.
+    # All three must preserve the request ordering of prompt_embeds
+    # relative to surrounding text because prompt_embeds are spliced at the
+    # token offset during rendering.
+    [None, False, True],
+    ids=["text-only", "interleave-off", "interleave-on"],
+)
+def test_parse_chat_messages_string_format_preserves_position(
+    layout, interleave_mm_strings
+):
+    mc = _make_mock_model_config()
+    if interleave_mm_strings is not None:
+        mm_cfg = mock.MagicMock()
+        mm_cfg.interleave_mm_strings = interleave_mm_strings
+        mc.multimodal_config = mm_cfg
+
+    content: list[dict] = []
+    expected_parts: list[str] = []
+    expected_embeds: list[torch.Tensor] = []
+    for kind, value in layout:
+        if kind == "text":
+            content.append({"type": "text", "text": value})
+            expected_parts.append(value)
+        else:  # prompt embeds
+            num_tokens = value
+            t = torch.randn(num_tokens, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+            expected_embeds.append(t)
+            content.append({"type": "prompt_embeds", "data": _encode_tensor(t)})
+            # Parser emits ONE sentinel per part.
+            expected_parts.append(PROMPT_EMBEDS_PLACEHOLDER_TOKEN)
+
+    messages = [{"role": "user", "content": content}]
+    conv, mm_data, _ = parse_chat_messages(
+        messages,
+        mc,
+        content_format="string",
+    )
+
+    assert conv[0]["content"] == "\n".join(expected_parts)
+    assert mm_data is not None and "prompt_embeds" in mm_data
+    assert len(mm_data["prompt_embeds"]) == len(expected_embeds)
+    for got, want in zip(mm_data["prompt_embeds"], expected_embeds, strict=True):
+        assert torch.equal(got, want)
+
+
+def test_parse_chat_messages_requires_flag():
+    t = torch.randn(2, 4)
+    b64 = _encode_tensor(t)
+    mc = _make_mock_model_config(enable_prompt_embeds=False)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [{"type": "prompt_embeds", "data": b64}],
+        }
+    ]
+    with pytest.raises(ValueError, match=_ENABLE_PROMPT_EMBEDS_ERROR):
+        parse_chat_messages(
+            messages,
+            mc,
+            content_format="openai",
+        )
+
+
+def test_parse_chat_messages_rejects_missing_data():
+    # `data` is marked `Required` on `ChatCompletionContentPartPromptEmbedsParam`;
+    # malformed requests without `data` must surface a clear validation error
+    # rather than being silently dropped.
+    mc = _make_mock_model_config()
+    messages = [
+        {
+            "role": "user",
+            "content": [{"type": "prompt_embeds"}],  # no `data`
+        }
+    ]
+    with pytest.raises(ValueError, match=_PROMPT_EMBEDS_MISSING_DATA_ERROR):
+        parse_chat_messages(
+            messages,
+            mc,
+            content_format="openai",
+        )
+
+
+# Reserved placeholder guard: when `enable_prompt_embeds=True` the tokenizer is
+# mutated to make `<prompt_embeds>` a single unsplittable token. Any user text
+# containing that literal sequence would tokenize to the same sentinel ID and
+# be mistaken for a splice point, so we reject it at parse time.
+_PLACEHOLDER_ERROR_PATTERN: Final[str] = re.sub(
+    r"\\{[^}]*\\}", ".*", re.escape(_RESERVED_PLACEHOLDER_IN_TEXT_ERROR)
+)
+
+
+@pytest.mark.parametrize(
+    "content",
+    [
+        # Case: Top-level string content (wrapped as a single text part).
+        f"hello {PROMPT_EMBEDS_PLACEHOLDER_TOKEN} world",
+        # Case: List with a typed text part containing the placeholder.
+        [{"type": "text", "text": f"leading {PROMPT_EMBEDS_PLACEHOLDER_TOKEN}"}],
+        # Case: List with a plain-string part (no wrapping dict).
+        [f"raw string {PROMPT_EMBEDS_PLACEHOLDER_TOKEN}"],
+    ],
+    ids=["top-level-string", "typed-text-part", "plain-string-part"],
+)
+def test_parse_chat_messages_rejects_placeholder_in_user_text(content):
+    mc = _make_mock_model_config()  # enable_prompt_embeds=True by default
+    messages = [{"role": "user", "content": content}]
+    with pytest.raises(ValueError, match=_PLACEHOLDER_ERROR_PATTERN):
+        parse_chat_messages(messages, mc, content_format="openai")
+
+
+def test_parse_chat_messages_allows_placeholder_in_text_when_feature_disabled():
+    # When `enable_prompt_embeds=False` the tokenizer is never mutated, so the
+    # literal `<prompt_embeds>` is just ordinary text and must pass through.
+    mc = _make_mock_model_config(enable_prompt_embeds=False)
+    messages = [
+        {
+            "role": "user",
+            "content": f"benign mention of {PROMPT_EMBEDS_PLACEHOLDER_TOKEN} here",
+        }
+    ]
+    conv, mm_data, _ = parse_chat_messages(messages, mc, content_format="openai")
+    assert mm_data is None or "prompt_embeds" not in mm_data
+    # Text reaches the rendered conversation unchanged.
+    texts = [p["text"] for p in conv[0]["content"]]
+    assert PROMPT_EMBEDS_PLACEHOLDER_TOKEN in "".join(texts)
+
+
+# Token-stream spec: ints are regular token IDs, tuples `(N,)` expand to
+# a placeholder span of length N (creates corresponding `(N, H)` tensor).
+# `expected` lists the `(start_idx, length)` pairs that
+# `_build_prompt_embeds_positions` should return.
+@pytest.mark.parametrize(
+    "stream, expected",
+    [
+        # Case: Single run in the middle.
+        ([10, 20, (3,), 30], [(2, 3)]),
+        # Case: Single run at the start.
+        ([(2,), 10, 20], [(0, 2)]),
+        # Case: Single run at the end.
+        ([10, 20, (4,)], [(2, 4)]),
+        # Case: Two runs with tokens between.
+        ([1, (2,), 2, 3, (3,), 4], [(1, 2), (5, 3)]),
+        # Case: Adjacent runs (no separating tokens).
+        ([(1,), (2,)], [(0, 1), (1, 2)]),
+        # Case: Three runs.
+        ([5, (2,), 6, (1,), 7, (3,), 8], [(1, 2), (4, 1), (6, 3)]),
+    ],
+    ids=[
+        "single-middle",
+        "single-start",
+        "single-end",
+        "two-runs-separated",
+        "two-runs-adjacent",
+        "three-runs",
+    ],
+)
+def test_build_positions(tokenizer, stream, expected):
+    H = 4
+    tid = _ensure_prompt_embeds_placeholder_token(tokenizer)
+    tensors: list[torch.Tensor] = []
+    token_ids: list[int] = []
+    for item in stream:
+        if isinstance(item, tuple):
+            length = item[0]
+            tensors.append(torch.randn(length, H))
+            token_ids.extend([tid] * length)
+        else:
+            token_ids.append(item)
+    mm_updates = _build_prompt_embeds_updates(tensors, tid)
+    positions = _build_prompt_embeds_positions(token_ids, len(tensors), mm_updates)
+    assert positions == expected
+
+
+def test_build_positions_length_mismatch(tokenizer):
+    N1, H1 = 2, 4
+    N2, H2 = 3, 4
+    tid = _ensure_prompt_embeds_placeholder_token(tokenizer)
+    # 2 tensors expected but only a single placeholder run in the token
+    # stream (simulating dropping the second one).
+    tensors = [torch.randn(N1, H1), torch.randn(N2, H2)]
+    token_ids = [1, tid, tid, 2, 3]
+    mm_updates = _build_prompt_embeds_updates(tensors, tid)
+    # The error constant is a `str.format` template, escape it and turn
+    # the `{field}` placeholders into `.*` so it matches any substitution.
+    pattern = re.sub(
+        r"\\{[^}]*\\}", ".*", re.escape(_PROMPT_EMBEDS_PLACEHOLDER_SPAN_MISMATCH_ERROR)
+    )
+    with pytest.raises(ValueError, match=pattern):
+        _build_prompt_embeds_positions(token_ids, len(tensors), mm_updates)
+
+
+# ints  = regular token IDs (any value)
+# (N,)  = embed span of length N
+@pytest.mark.parametrize(
+    "stream",
+    [
+        [10, 20, (3,), 30],
+        [(2,), 10, 20],
+        [10, 20, (4,)],
+        [1, (2,), 2, 3, (3,), 4],
+        [(1,), (2,)],
+        [5, (2,), 6, (1,), 7, (3,), 8],
+    ],
+    ids=[
+        "single-middle",
+        "single-start",
+        "single-end",
+        "two-spans-separated",
+        "two-spans-adjacent",
+        "three-spans",
+    ],
+)
+def test_build_mixed_prompt_embeds(stream):
+    H = 8
+    _PLACEHOLDER = 0  # sentinel for embed positions in token_ids
+
+    tensors: list[torch.Tensor] = []
+    token_ids: list[int] = []
+    positions: list[tuple[int, int]] = []
+    cursor = 0
+    for item in stream:
+        if isinstance(item, tuple):
+            length = item[0]
+            tensors.append(torch.randn(length, H))
+            positions.append((cursor, length))
+            token_ids.extend([_PLACEHOLDER] * length)
+            cursor += length
+        else:
+            token_ids.append(item)
+            cursor += 1
+
+    embeds, mask = _build_mixed_prompt_embeds(token_ids, tensors, positions)
+
+    assert embeds.shape == (len(token_ids), H)
+    assert len(mask) == len(token_ids)
+
+    # Mask: False exactly at embed positions, True everywhere else.
+    expected_mask = torch.ones(len(token_ids), dtype=torch.bool)
+    for start, length in positions:
+        expected_mask[start : start + length] = False
+    assert mask == expected_mask.tolist()
+
+    # Embed rows match input tensors at the right positions.
+    for tensor, (start, length) in zip(tensors, positions):
+        assert torch.equal(embeds[start : start + length], tensor)
+
+    # Non-embed positions remain zero-filled.
+    assert torch.all(embeds[expected_mask] == 0)
+
+
+# End-to-end tests: each runs both sync and async parse paths via the
+# `parse_fn` fixture.
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("role", ["user", "system"])
+async def test_end_to_end_expand_and_build(tokenizer, parse_fn, role):
+    """Full renderer pipeline: parse -> chat template -> expand -> locate
+    -> build mixed prompt, across tokenizers, roles, and sync/async."""
+    tokenizer.chat_template = _SIMPLE_CHAT_TEMPLATE
+    tid = _ensure_prompt_embeds_placeholder_token(tokenizer)
+
+    LEN_A, LEN_B = 3, 2
+    t_a = torch.randn(LEN_A, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+    t_b = torch.randn(LEN_B, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+    NUM_TENSORS = 2
+
+    mc = _make_mock_model_config()
+
+    messages = [
+        {
+            "role": role,
+            "content": [
+                {"type": "text", "text": "Hello "},
+                {"type": "prompt_embeds", "data": _encode_tensor(t_a)},
+                {"type": "text", "text": " world "},
+                {"type": "prompt_embeds", "data": _encode_tensor(t_b)},
+                {"type": "text", "text": "!"},
+            ],
+        }
+    ]
+
+    conv, mm_data, _ = await _maybe_await(
+        parse_fn, messages, mc, content_format="openai"
+    )
+    tensors = list(mm_data["prompt_embeds"])
+    assert len(tensors) == NUM_TENSORS
+
+    # Tokenize: each prompt_embeds part becomes 1 placeholder token.
+    # `return_dict=False` to get a flat `list[int]` on transformers v5
+    # (where the default flipped to True and yields a `BatchEncoding` dict).
+    token_ids = tokenizer.apply_chat_template(conv, tokenize=True, return_dict=False)
+    assert sum(t == tid for t in token_ids) == NUM_TENSORS
+
+    # Expand, locate, and build.
+    mm_updates = _build_prompt_embeds_updates(tensors, tid)
+    expanded = _expand_prompt_embeds_placeholders(token_ids, mm_updates)
+    assert len(expanded) == len(token_ids) + LEN_A + LEN_B - NUM_TENSORS
+
+    positions = _build_prompt_embeds_positions(expanded, len(tensors), mm_updates)
+    assert positions[0][1] == LEN_A
+    assert positions[1][1] == LEN_B
+
+    embeds, mask = _build_mixed_prompt_embeds(expanded, tensors, positions)
+    assert embeds.shape == (len(expanded), _MOCK_HIDDEN_SIZE)
+    assert mask.count(False) == LEN_A + LEN_B
+    assert torch.equal(embeds[positions[0][0] : positions[0][0] + LEN_A], t_a)
+    assert torch.equal(embeds[positions[1][0] : positions[1][0] + LEN_B], t_b)
+
+
+@pytest.mark.asyncio
+async def test_end_to_end_multi_message_conversation(tokenizer, parse_fn):
+    """Full pipeline with prompt_embeds spread across system + user messages,
+    verifying ordering and positioning in the final token stream."""
+    tokenizer.chat_template = _SIMPLE_CHAT_TEMPLATE
+    tid = _ensure_prompt_embeds_placeholder_token(tokenizer)
+
+    LEN_SYS, LEN_USR = 4, 3
+    t_sys = torch.randn(LEN_SYS, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+    t_usr = torch.randn(LEN_USR, _MOCK_HIDDEN_SIZE, dtype=_MOCK_DTYPE)
+    NUM_TENSORS = 2  # t_sys and t_usr.
+
+    mc = _make_mock_model_config()
+
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are helpful."},
+                {"type": "prompt_embeds", "data": _encode_tensor(t_sys)},
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "prompt_embeds", "data": _encode_tensor(t_usr)},
+                {"type": "text", "text": "Summarize."},
+            ],
+        },
+    ]
+
+    conv, mm_data, _ = await _maybe_await(
+        parse_fn, messages, mc, content_format="openai"
+    )
+    tensors = list(mm_data["prompt_embeds"])
+    assert len(tensors) == NUM_TENSORS
+
+    # Tokenize, expand, locate, and build.
+    # `return_dict=False` to get a flat `list[int]` on transformers v5
+    # (where the default flipped to True and yields a `BatchEncoding` dict).
+    token_ids = tokenizer.apply_chat_template(conv, tokenize=True, return_dict=False)
+    mm_updates = _build_prompt_embeds_updates(tensors, tid)
+    expanded = _expand_prompt_embeds_placeholders(token_ids, mm_updates)
+    positions = _build_prompt_embeds_positions(expanded, len(tensors), mm_updates)
+
+    assert positions[0][1] == LEN_SYS
+    assert positions[1][1] == LEN_USR
+    # System embed must appear before user embed in the token stream.
+    assert positions[0][0] < positions[1][0]
+
+    embeds, mask = _build_mixed_prompt_embeds(expanded, tensors, positions)
+    assert embeds.shape == (len(expanded), _MOCK_HIDDEN_SIZE)
+    assert mask.count(False) == LEN_SYS + LEN_USR
+    assert torch.equal(embeds[positions[0][0] : positions[0][0] + LEN_SYS], t_sys)
+    assert torch.equal(embeds[positions[1][0] : positions[1][0] + LEN_USR], t_usr)
diff --git a/tests/renderers/test_completions.py b/tests/renderers/test_completions.py
index ccc806ba137d..00d604afdcf9 100644
--- a/tests/renderers/test_completions.py
+++ b/tests/renderers/test_completions.py
@@ -39,6 +39,11 @@ class MockModelConfig:
     is_encoder_decoder: bool = False
     is_multimodal_model: bool = False
     renderer_num_workers: int = 1
+    hidden_size: int = 768
+    dtype: torch.dtype = torch.float32
+
+    def get_hidden_size(self) -> int:
+        return self.hidden_size
 
 
 @dataclass
@@ -384,12 +389,13 @@ def test_single_prompt_embed(self):
         assert torch.equal(results[0]["prompt_embeds"], tensor_input)
 
     def test_multiple_prompt_embeds(self):
-        renderer = _build_renderer(MockModelConfig())
+        hidden_size = 512
+        renderer = _build_renderer(MockModelConfig(hidden_size=hidden_size))
 
         # Create multiple test tensors
         tensor_inputs = [
-            torch.randn(8, 512, dtype=torch.float32),
-            torch.randn(12, 512, dtype=torch.float32),
+            torch.randn(8, hidden_size, dtype=torch.float32),
+            torch.randn(12, hidden_size, dtype=torch.float32),
         ]
 
         prompts = renderer.render_prompts(
@@ -432,13 +438,15 @@ def test_prompt_embed_truncation(self):
         assert torch.equal(results[0]["prompt_embeds"], expected)
 
     def test_prompt_embed_different_dtypes(self):
-        renderer = _build_renderer(MockModelConfig())
-
+        hidden_size = 256
         # Test different supported dtypes
         dtypes = [torch.float32, torch.float16, torch.bfloat16]
 
         for dtype in dtypes:
-            tensor_input = torch.randn(5, 256, dtype=dtype)
+            renderer = _build_renderer(
+                MockModelConfig(hidden_size=hidden_size, dtype=dtype)
+            )
+            tensor_input = torch.randn(5, hidden_size, dtype=dtype)
 
             prompts = renderer.render_prompts(
                 _preprocess_prompt(
@@ -474,10 +482,11 @@ def test_prompt_embed_squeeze_batch_dim(self):
         assert results[0]["prompt_embeds"].shape == (10, 768)
 
     def test_both_prompts_and_embeds(self):
-        renderer = _build_renderer(MockModelConfig())
+        hidden_size = 256
+        renderer = _build_renderer(MockModelConfig(hidden_size=hidden_size))
 
         text_input = "Hello world"
-        tensor_input = torch.randn(5, 256, dtype=torch.float32)
+        tensor_input = torch.randn(5, hidden_size, dtype=torch.float32)
 
         prompts = renderer.render_prompts(
             _preprocess_prompt(
diff --git a/tests/renderers/test_gemma4_chat_template.py b/tests/renderers/test_gemma4_chat_template.py
new file mode 100644
index 000000000000..a4a0b41d053a
--- /dev/null
+++ b/tests/renderers/test_gemma4_chat_template.py
@@ -0,0 +1,345 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for Gemma4 chat template rendering."""
+
+from pathlib import Path
+
+import jinja2.sandbox
+import pytest
+
+TEMPLATE_PATH = (
+    Path(__file__).resolve().parent.parent.parent
+    / "examples"
+    / "tool_chat_template_gemma4.jinja"
+)
+
+
+@pytest.fixture(scope="module")
+def gemma4_template():
+    """Load and compile the Gemma4 chat template."""
+    template_str = TEMPLATE_PATH.read_text()
+    env = jinja2.sandbox.ImmutableSandboxedEnvironment()
+    return env.from_string(template_str)
+
+
+def _render(template, messages, **kwargs):
+    """Render the template with sensible defaults."""
+    kwargs.setdefault("bos_token", "<bos>")
+    kwargs.setdefault("add_generation_prompt", False)
+    return template.render(messages=messages, **kwargs)
+
+
+class TestGemma4ChatTemplate:
+    def test_basic_multiturn_thinking_disabled(self, gemma4_template):
+        """With enable_thinking=False (default), generation prompt ends with
+        an empty thought channel to suppress thinking."""
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "user", "content": "How are you?"},
+        ]
+        result = _render(gemma4_template, messages, add_generation_prompt=True)
+        assert "<|turn>user\n" in result
+        assert "<|turn>model\n" in result
+        assert "Hello" in result
+        assert "Hi there!" in result
+        assert "How are you?" in result
+        assert result.rstrip("\n").endswith("<|channel>thought\n<channel|>")
+
+    def test_basic_multiturn_thinking_enabled(self, gemma4_template):
+        """With enable_thinking=True, generation prompt ends with model
+        turn opener (no thought suppression)."""
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "user", "content": "How are you?"},
+        ]
+        result = _render(
+            gemma4_template,
+            messages,
+            add_generation_prompt=True,
+            enable_thinking=True,
+        )
+        assert "<|turn>user\n" in result
+        assert "<|turn>model\n" in result
+        assert "Hello" in result
+        assert "Hi there!" in result
+        assert "How are you?" in result
+        assert result.rstrip("\n").endswith("<|turn>model")
+
+    def test_system_message(self, gemma4_template):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        result = _render(gemma4_template, messages)
+        assert "<|turn>system\n" in result
+        assert "You are helpful." in result
+
+    def test_thinking_enabled(self, gemma4_template):
+        messages = [{"role": "user", "content": "Think about this"}]
+        result = _render(
+            gemma4_template,
+            messages,
+            add_generation_prompt=True,
+            enable_thinking=True,
+        )
+        assert "<|think|>" in result
+        assert "<|turn>system\n" in result
+
+    def test_tool_declarations(self, gemma4_template):
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get weather for a city",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "description": "City name",
+                            }
+                        },
+                        "required": ["city"],
+                    },
+                },
+            }
+        ]
+        messages = [{"role": "user", "content": "What is the weather?"}]
+        result = _render(
+            gemma4_template,
+            messages,
+            tools=tools,
+            add_generation_prompt=True,
+        )
+        assert "<|tool>" in result
+        assert "declaration:get_weather" in result
+        assert "<tool|>" in result
+        assert '<|"|>City name<|"|>' in result
+
+    def test_tool_calls_in_assistant(self, gemma4_template):
+        messages = [
+            {"role": "user", "content": "Weather in London?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"city": "London"},
+                        },
+                    }
+                ],
+            },
+        ]
+        result = _render(gemma4_template, messages)
+        assert "<|tool_call>call:get_weather{" in result
+        assert "}<tool_call|>" in result
+        assert '<|"|>London<|"|>' in result
+
+    def test_tool_responses_openai_style(self, gemma4_template):
+        """role='tool' messages are formatted as <|tool_response> blocks
+        with content dumped as-is."""
+        messages = [
+            {"role": "user", "content": "Weather?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"city": "London"},
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_1",
+                "content": '{"temperature": 15, "condition": "sunny"}',
+            },
+        ]
+        result = _render(gemma4_template, messages, add_generation_prompt=True)
+        assert "<|tool_response>" in result
+        assert "response:get_weather{" in result
+        assert "<tool_response|>" in result
+        assert '"temperature": 15' in result
+
+    def test_tool_responses_legacy_style(self, gemma4_template):
+        """tool_responses embedded on the assistant message."""
+        messages = [
+            {"role": "user", "content": "Weather?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"city": "London"},
+                        },
+                    }
+                ],
+                "tool_responses": [
+                    {
+                        "name": "get_weather",
+                        "response": {"temperature": 20},
+                    }
+                ],
+            },
+        ]
+        result = _render(gemma4_template, messages)
+        assert "<|tool_response>" in result
+        assert "response:get_weather{" in result
+        assert "temperature:" in result
+
+    def test_generation_prompt_not_after_tool_response(self, gemma4_template):
+        """add_generation_prompt=True should NOT add <|turn>model when the
+        last message type was tool_response (the model turn continues)."""
+        messages = [
+            {"role": "user", "content": "Weather?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"city": "London"},
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_1",
+                "content": "sunny",
+            },
+        ]
+        result = _render(gemma4_template, messages, add_generation_prompt=True)
+        assert not result.strip().endswith("<|turn>model\n")
+
+    def test_reasoning_in_tool_chains(self, gemma4_template):
+        """reasoning field on assistant with tool_calls after last user
+        message emits <|channel>thought\\n...<channel|>."""
+        messages = [
+            {"role": "user", "content": "Calculate something"},
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning": "Let me think about this...",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": "calculator",
+                            "arguments": {"expr": "2+2"},
+                        },
+                    }
+                ],
+            },
+        ]
+        result = _render(gemma4_template, messages)
+        assert "<|channel>thought\n" in result
+        assert "Let me think about this..." in result
+        assert "<channel|>" in result
+
+    def test_reasoning_not_before_last_user(self, gemma4_template):
+        """reasoning on assistant BEFORE the last user message is dropped."""
+        messages = [
+            {"role": "user", "content": "First"},
+            {
+                "role": "assistant",
+                "content": "Response",
+                "reasoning": "Old reasoning that should be dropped",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": "fn",
+                            "arguments": {},
+                        },
+                    }
+                ],
+            },
+            {"role": "user", "content": "Second"},
+        ]
+        result = _render(gemma4_template, messages, add_generation_prompt=True)
+        assert "Old reasoning" not in result
+
+    def test_strip_thinking_in_model_content(self, gemma4_template):
+        """<|channel>...<channel|> in model content is stripped by the
+        strip_thinking macro."""
+        messages = [
+            {"role": "user", "content": "Hi"},
+            {
+                "role": "assistant",
+                "content": ("<|channel>internal thought<channel|>Visible answer"),
+            },
+        ]
+        result = _render(gemma4_template, messages)
+        assert "internal thought" not in result
+        assert "Visible answer" in result
+
+    def test_multi_turn_tool_chain(self, gemma4_template):
+        """assistant->tool->assistant->tool produces exactly one
+        <|turn>model (later assistants continue the same turn)."""
+        messages = [
+            {"role": "user", "content": "Do two things"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "c1",
+                        "function": {"name": "step1", "arguments": {}},
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "c1", "content": "result1"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "c2",
+                        "function": {"name": "step2", "arguments": {}},
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "c2", "content": "result2"},
+        ]
+        result = _render(gemma4_template, messages, add_generation_prompt=True)
+        assert result.count("<|turn>model\n") == 1
+
+    def test_format_argument_types(self, gemma4_template):
+        """Strings wrapped in <|"|>, booleans as true/false, numbers bare."""
+        messages = [
+            {"role": "user", "content": "Test"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "function": {
+                            "name": "test_fn",
+                            "arguments": {
+                                "name": "Alice",
+                                "active": True,
+                                "count": 42,
+                            },
+                        },
+                    }
+                ],
+            },
+        ]
+        result = _render(gemma4_template, messages)
+        assert '<|"|>Alice<|"|>' in result
+        assert "active:true" in result
+        assert "count:42" in result
diff --git a/tests/renderers/test_hf.py b/tests/renderers/test_hf.py
index edeff54f4705..c2eb6556394a 100644
--- a/tests/renderers/test_hf.py
+++ b/tests/renderers/test_hf.py
@@ -363,6 +363,7 @@ def test_resolve_chat_template_kwargs_with_template_name():
         ("microsoft/Phi-3.5-vision-instruct", "string"),
         ("Qwen/Qwen2-VL-2B-Instruct", "openai"),
         ("Qwen/Qwen2.5-VL-3B-Instruct", "openai"),
+        ("Qwen/Qwen3.5-4B", "openai"),
         ("fixie-ai/ultravox-v0_5-llama-3_2-1b", "string"),
         ("Qwen/Qwen2-Audio-7B-Instruct", "openai"),
         ("meta-llama/Llama-Guard-3-1B", "openai"),
diff --git a/tests/renderers/test_sparse_tensor_validation.py b/tests/renderers/test_sparse_tensor_validation.py
index 5c51cd30a336..642867086fc9 100644
--- a/tests/renderers/test_sparse_tensor_validation.py
+++ b/tests/renderers/test_sparse_tensor_validation.py
@@ -12,6 +12,7 @@
 import pytest
 import torch
 
+from vllm.exceptions import VLLMValidationError
 from vllm.multimodal.media import AudioEmbeddingMediaIO, ImageEmbeddingMediaIO
 from vllm.renderers.embed_utils import safe_load_prompt_embeds
 
@@ -53,8 +54,14 @@ def _create_malicious_sparse_tensor() -> torch.Tensor:
     values = torch.tensor([1.0])
     shape = (3, 3)
 
-    # Create sparse tensor (this will be invalid)
-    sparse_tensor = torch.sparse_coo_tensor(indices, values, shape, dtype=torch.float32)
+    # Create sparse tensor (this will be invalid). Pass `check_invariants=False`
+    # explicitly so this fixture is robust to process-wide invariant-check state
+    # left enabled by other tests (the global flag isn't thread-local, and
+    # concurrent users of the `check_sparse_tensor_invariants` context manager
+    # can leak the "enabled" state across tests).
+    sparse_tensor = torch.sparse_coo_tensor(
+        indices, values, shape, dtype=torch.float32, check_invariants=False
+    )
     return sparse_tensor
 
 
@@ -117,7 +124,7 @@ def test_extremely_large_indices_rejected(self, model_config):
         shape = (10, 10)
 
         malicious_tensor = torch.sparse_coo_tensor(
-            indices, values, shape, dtype=torch.float32
+            indices, values, shape, dtype=torch.float32, check_invariants=False
         )
         encoded = _encode_tensor(malicious_tensor)
 
@@ -132,13 +139,69 @@ def test_negative_indices_rejected(self, model_config):
         shape = (10, 10)
 
         malicious_tensor = torch.sparse_coo_tensor(
-            indices, values, shape, dtype=torch.float32
+            indices, values, shape, dtype=torch.float32, check_invariants=False
         )
         encoded = _encode_tensor(malicious_tensor)
 
         with pytest.raises((RuntimeError, ValueError)):
             safe_load_prompt_embeds(model_config, encoded)
 
+    def test_hidden_size_mismatch_rejected(self, model_config):
+        """Tensors whose trailing dim doesn't match the model's hidden_size
+        must be rejected at parse time."""
+        # opt-125m has hidden_size=768, passing 512 triggers the check.
+        wrong_hidden = torch.randn(10, 512, dtype=torch.float32)
+        encoded = _encode_tensor(wrong_hidden)
+
+        with pytest.raises(VLLMValidationError, match="hidden_size"):
+            safe_load_prompt_embeds(model_config, encoded)
+
+    def test_float_dtype_mismatch_cast_to_model_dtype(self, model_config):
+        """Tensors whose dtype doesn't match the model's dtype but are still
+        floating-point are cast, since API clients generally can't know the
+        server's `--dtype` setting ahead of time."""
+        # Fixture pins model dtype to float32, upload a bfloat16 tensor.
+        mismatched_float = torch.randn(10, 768, dtype=torch.bfloat16)
+        encoded = _encode_tensor(mismatched_float)
+
+        result = safe_load_prompt_embeds(model_config, encoded)
+
+        assert result.dtype == torch.float32
+        assert result.shape == mismatched_float.shape
+
+    def test_non_float_dtype_rejected(self, model_config):
+        """Non-floating-point dtypes cannot be safely cast for embeddings
+        (e.g. integer tensors almost certainly indicate caller confusion),
+        so they are rejected at parse time."""
+        non_float = torch.randint(0, 100, (10, 768), dtype=torch.int32)
+        encoded = _encode_tensor(non_float)
+
+        with pytest.raises(VLLMValidationError, match="floating-point"):
+            safe_load_prompt_embeds(model_config, encoded)
+
+    def test_non_2d_tensor_rejected(self, model_config):
+        """Tensors that aren't 2D (even after squeezing a leading dim)
+        must be rejected with a clear error."""
+        # A 1D tensor cannot be interpreted as (num_tokens, hidden_size).
+        bad = torch.randn(768, dtype=torch.float32)
+        encoded = _encode_tensor(bad)
+
+        with pytest.raises(VLLMValidationError, match="2D tensor"):
+            safe_load_prompt_embeds(model_config, encoded)
+
+    def test_non_tensor_payload_rejected(self, model_config):
+        """Deserializing to a non-Tensor object must raise a clear error
+        instead of propagating an AssertionError."""
+        # `torch.save` will serialize a plain dict; `weights_only=True` allows
+        # loading built-in containers, so this exercises the isinstance check.
+        buffer = io.BytesIO()
+        torch.save({"not": "a tensor"}, buffer)
+        buffer.seek(0)
+        encoded = base64.b64encode(buffer.read())
+
+        with pytest.raises(VLLMValidationError, match="torch.Tensor"):
+            safe_load_prompt_embeds(model_config, encoded)
+
 
 class TestImageEmbedsValidation:
     """Test sparse tensor validation in image embeddings (Chat API)."""
diff --git a/tests/renderers/test_warmup.py b/tests/renderers/test_warmup.py
new file mode 100644
index 000000000000..ac951aae4618
--- /dev/null
+++ b/tests/renderers/test_warmup.py
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for BaseRenderer.warmup MM-warmup behavior.
+
+These tests exercise:
+  - Zero-limit modalities are filtered from mm_counts passed to
+    get_dummy_processor_inputs (e.g. --limit-mm-per-prompt image=0 ...)
+  - MM warmup is skipped entirely when mm_processor is None
+
+No model weights are required: warmup() is called directly on a MagicMock
+that acts as the renderer instance.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from vllm.renderers.base import BaseRenderer
+from vllm.renderers.params import ChatParams
+
+
+def _make_renderer_mock(mm_limits: dict[str, int]) -> MagicMock:
+    """Return a MagicMock that quacks like a BaseRenderer instance.
+
+    render_chat is mocked to raise ChatTemplateResolutionError so the chat
+    warmup block is skipped cleanly, keeping the test focused on MM warmup.
+    """
+    from vllm.entrypoints.chat_utils import ChatTemplateResolutionError
+
+    renderer = MagicMock()
+
+    # chat warmup: make render_chat raise so we skip past it cleanly
+    renderer.render_chat.side_effect = ChatTemplateResolutionError("no template")
+
+    # MM processor with configurable limits
+    mm_processor = MagicMock()
+    mm_processor.info.allowed_mm_limits = mm_limits
+    renderer.mm_processor = mm_processor
+    renderer._readonly_mm_processor = None
+    renderer._warmup_mm_processor = BaseRenderer._warmup_mm_processor.__get__(
+        renderer, BaseRenderer
+    )
+    renderer._clear_processor_cache = BaseRenderer._clear_processor_cache
+    renderer.clear_mm_cache = MagicMock()
+    renderer.model_config.max_model_len = 128
+    renderer.model_config.get_multimodal_config.return_value.limit_per_prompt = {}
+
+    return renderer
+
+
+class TestMmWarmupZeroLimitFiltering:
+    """Zero-limit modalities must be excluded from mm_counts."""
+
+    def test_zero_limit_modality_excluded_from_mm_counts(self):
+        """A modality with limit=0 must not appear in mm_counts."""
+        renderer = _make_renderer_mock({"image": 1, "video": 0})
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
+        get_inputs.assert_called_once()
+        _, kwargs = get_inputs.call_args
+        assert "video" not in kwargs["mm_counts"]
+        assert kwargs["mm_counts"]["image"] == 1
+
+    def test_all_zero_limits_passes_empty_mm_counts(self):
+        """When all limits are 0, mm_counts must be empty."""
+        renderer = _make_renderer_mock({"image": 0, "video": 0})
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
+        get_inputs.assert_called_once()
+        _, kwargs = get_inputs.call_args
+        assert kwargs["mm_counts"] == {}
+
+    def test_positive_limits_all_included_in_mm_counts(self):
+        """All modalities with limit > 0 must be present in mm_counts."""
+        renderer = _make_renderer_mock({"image": 2, "video": 1})
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        get_inputs = renderer.mm_processor.dummy_inputs.get_dummy_processor_inputs
+        get_inputs.assert_called_once()
+        _, kwargs = get_inputs.call_args
+        assert kwargs["mm_counts"] == {"image": 1, "video": 1}
+
+
+class TestMmWarmupRunsNormally:
+    """MM warmup must run when mm_processor is set and limits > 0."""
+
+    def test_processor_apply_called(self):
+        renderer = _make_renderer_mock({"image": 1})
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        renderer.mm_processor.apply.assert_called_once()
+
+    def test_mm_cache_cleared_after_warmup(self):
+        renderer = _make_renderer_mock({"image": 1})
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        renderer.clear_mm_cache.assert_called_once()
+
+
+class TestMmWarmupSkippedWhenNoProcessor:
+    """MM warmup must be skipped when mm_processor is None (text-only model)."""
+
+    def test_no_warmup_without_processor(self):
+        renderer = _make_renderer_mock({})
+        renderer.mm_processor = None  # override to None
+
+        BaseRenderer.warmup(renderer, ChatParams())
+
+        renderer.model_config.get_multimodal_config.assert_not_called()
+
+
+class TestReadonlyMmWarmup:
+    """Readonly MM processor warmup must mirror the render path behavior."""
+
+    def test_readonly_processor_apply_called_and_cache_cleared(self):
+        renderer = _make_renderer_mock({"image": 1})
+        readonly_mm_processor = MagicMock()
+        readonly_mm_processor.info.allowed_mm_limits = {"image": 1}
+        renderer._readonly_mm_processor = readonly_mm_processor
+
+        with patch("vllm.multimodal.processing.TimingContext", autospec=True):
+            BaseRenderer.warmup(renderer, ChatParams())
+
+        readonly_mm_processor.apply.assert_called_once()
+        readonly_mm_processor.cache.clear_cache.assert_called_once()
diff --git a/tests/spec_decode/test_custom_proposer.py b/tests/spec_decode/test_custom_proposer.py
new file mode 100755
index 000000000000..acd6089fb9ce
--- /dev/null
+++ b/tests/spec_decode/test_custom_proposer.py
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Integration test for custom proposer class in speculative decoding.
+
+Usage:
+    .venv/bin/python test_custom_proposer.py
+"""
+
+import os
+
+import torch
+
+from vllm import LLM, SamplingParams
+from vllm.config import VllmConfig
+
+MODEL_ID = "facebook/opt-125m"
+NUM_SPEC_TOKENS = 5
+
+
+class DummyDraftProposer:
+    """Custom proposer class that repeats the last token of each sequence.
+
+    This demonstrates the class-based custom proposer interface.
+    """
+
+    def __init__(self, vllm_config: VllmConfig):
+        """Initialize the custom proposer.
+
+        Args:
+            vllm_config: vLLM configuration containing model and speculative settings.
+        """
+        self.num_speculative_tokens = (
+            vllm_config.speculative_config.num_speculative_tokens
+        )
+        self.max_model_len = vllm_config.model_config.max_model_len
+        print(
+            f"[DummyDraftProposer.__init__] num_speculative_tokens="
+            f"{self.num_speculative_tokens}, max_model_len={self.max_model_len}"
+        )
+
+    def propose(
+        self,
+        sampled_token_ids: list[list[int]],
+        num_tokens_no_spec: int,
+        token_ids_cpu: torch.Tensor,
+        slot_mappings: torch.Tensor | None = None,
+    ) -> list[list[int]]:
+        """Generate draft tokens by repeating the last token of each sequence.
+
+        Args:
+            sampled_token_ids: Recently sampled token IDs per request.
+            num_tokens_no_spec: Number of non-speculative tokens per request.
+            token_ids_cpu: Full token IDs tensor on CPU.
+            slot_mappings: Slot mapping for KV cache (optional).
+
+        Returns:
+            List of draft token sequences for each request.
+        """
+        # Cross-process flag to prove this method was executed
+        with open("proposer_called.flag", "w") as f:
+            f.write("called")
+
+        batch_size = len(sampled_token_ids)
+        last_tokens = [seq[-1] for seq in sampled_token_ids]
+        drafts = [[t] * self.num_speculative_tokens for t in last_tokens]
+        print(
+            f"[DummyDraftProposer.propose] batch_size={batch_size}, "
+            f"num_speculative_tokens={self.num_speculative_tokens}, "
+            f"drafts_shape={len(drafts)}x{len(drafts[0])}"
+        )
+        return drafts
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("Custom Proposer Backend Integration Test")
+    print("=" * 60)
+
+    # Cleanup any leftover flag from previous failed runs
+    if os.path.exists("proposer_called.flag"):
+        os.remove("proposer_called.flag")
+
+    llm = LLM(
+        model=MODEL_ID,
+        speculative_config={
+            "model": f"{__name__}.DummyDraftProposer",
+            "num_speculative_tokens": NUM_SPEC_TOKENS,
+        },
+        gpu_memory_utilization=0.4,
+        enforce_eager=True,
+    )
+
+    prompts = [
+        "Hello, my name is",
+        "The future of AI is",
+    ]
+
+    sampling_params = SamplingParams(
+        max_tokens=32,
+        temperature=0.0,
+    )
+
+    print(f"\nRunning generate with {len(prompts)} prompt(s)...\n")
+    outputs = llm.generate(prompts, sampling_params)
+
+    for output in outputs:
+        prompt = output.prompt
+        generated = output.outputs[0].text
+        print(f"Prompt:          {prompt!r}")
+        print(f"Generated text:  {generated!r}")
+        print("-" * 60)
+
+    # Verify the custom proposer's propose() was actually called across processes
+    assert os.path.exists("proposer_called.flag"), (
+        "The custom proposer's propose() method was never called!"
+    )
+    os.remove("proposer_called.flag")
+
+    print("✓ Custom proposer was actively used during generation!")
+    print("Test completed successfully.")
diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh
index adfab113960f..c189549d7dae 100644
--- a/tests/standalone_tests/python_only_compile.sh
+++ b/tests/standalone_tests/python_only_compile.sh
@@ -68,7 +68,6 @@ apt autoremove -y
 echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
 
 VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
-
 # Run the script
 python3 -c 'import vllm'
 
diff --git a/tests/standalone_tests/pytorch_nightly_dependency.sh b/tests/standalone_tests/pytorch_nightly_dependency.sh
index 92820b269f9d..3a23b1c824f2 100644
--- a/tests/standalone_tests/pytorch_nightly_dependency.sh
+++ b/tests/standalone_tests/pytorch_nightly_dependency.sh
@@ -28,8 +28,8 @@ uv pip freeze | grep -E '^torch|^torchvision|^torchaudio' | sort > before.txt
 echo "Before:"
 cat before.txt
 
-echo ">>> Installing requirements/nightly_torch_test.txt"
-uv pip install --quiet -r requirements/nightly_torch_test.txt
+echo ">>> Installing requirements/test/nightly-torch.txt"
+uv pip install --quiet -r requirements/test/nightly-torch.txt
 
 echo ">>> Capturing torch-related versions after requirements install"
 uv pip freeze | grep -E '^torch|^torchvision|^torchaudio' | sort > after.txt
@@ -40,7 +40,7 @@ echo ">>> Comparing versions"
 if diff before.txt after.txt; then
   echo "torch version not overridden."
 else
-  echo "torch version overridden by nightly_torch_test.txt, \
+  echo "torch version overridden by test/nightly-torch.txt, \
   if the dependency is not triggered by the pytorch nightly test,\
   please add the dependency to the list 'white_list' in tools/pre_commit/generate_nightly_torch_test.py"
   exit 1
diff --git a/tests/test_config.py b/tests/test_config.py
index f07a649cab5e..5c01d652a17a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -4,14 +4,19 @@
 import logging
 import os
 from dataclasses import MISSING, Field, asdict, dataclass, field
+from types import SimpleNamespace
 from unittest.mock import patch
 
+import pydantic
 import pytest
 from pydantic import ValidationError
 
+import vllm.config.vllm as vllm_config_module
+import vllm.envs as envs
 from vllm.compilation.backends import VllmBackend
 from vllm.config import (
     CompilationConfig,
+    KernelConfig,
     ModelConfig,
     ParallelConfig,
     PoolerConfig,
@@ -21,6 +26,7 @@
     update_config,
 )
 from vllm.config.compilation import CompilationMode, CUDAGraphMode
+from vllm.config.kernel import IrOpPriorityConfig
 from vllm.config.load import LoadConfig
 from vllm.config.utils import get_field
 from vllm.config.vllm import (
@@ -29,6 +35,8 @@
 )
 from vllm.platforms import current_platform
 
+DEVICE_TYPE = current_platform.device_type
+
 
 def test_compile_config_repr_succeeds():
     # setup: VllmBackend mutates the config object
@@ -42,6 +50,179 @@ def test_compile_config_repr_succeeds():
     assert "inductor_passes" in val
 
 
+@pytest.mark.parametrize(
+    ("env_value", "expected"),
+    [
+        (None, None),
+        ("0", False),
+        ("1", True),
+    ],
+)
+def test_v2_model_runner_env_tri_state(monkeypatch, env_value, expected):
+    if env_value is None:
+        monkeypatch.delenv("VLLM_USE_V2_MODEL_RUNNER", raising=False)
+    else:
+        monkeypatch.setenv("VLLM_USE_V2_MODEL_RUNNER", env_value)
+
+    assert envs.VLLM_USE_V2_MODEL_RUNNER is expected
+
+
+@pytest.mark.parametrize(
+    ("model_config", "expected"),
+    [
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3-1.7B-Base",
+                architectures=["Qwen3ForCausalLM"],
+                runner_type="generate",
+                is_moe=False,
+                is_quantized=False,
+            ),
+            True,
+        ),
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3-32B",
+                architectures=["Qwen3ForCausalLM"],
+                runner_type="generate",
+                is_moe=False,
+                is_quantized=False,
+            ),
+            True,
+        ),
+        (
+            SimpleNamespace(
+                model="facebook/opt-125m",
+                architectures=["OPTForCausalLM"],
+                runner_type="generate",
+                is_moe=False,
+                is_quantized=False,
+            ),
+            False,
+        ),
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3-30B-A3B",
+                architectures=["Qwen3MoeForCausalLM"],
+                runner_type="generate",
+                is_moe=True,
+                is_quantized=False,
+            ),
+            False,
+        ),
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3-1.7B-FP8",
+                architectures=["Qwen3ForCausalLM"],
+                runner_type="generate",
+                is_moe=False,
+                is_quantized=True,
+            ),
+            False,
+        ),
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3.5-4B",
+                architectures=["Qwen3_5ForConditionalGeneration"],
+                runner_type="generate",
+                is_moe=False,
+                is_quantized=False,
+            ),
+            False,
+        ),
+        (
+            SimpleNamespace(
+                model="Qwen/Qwen3-Embedding-0.6B",
+                architectures=["Qwen3ForCausalLM"],
+                runner_type="pooling",
+                is_moe=False,
+                is_quantized=False,
+            ),
+            False,
+        ),
+    ],
+)
+def test_is_default_v2_model_runner_model(model_config, expected):
+    config = SimpleNamespace(model_config=model_config)
+
+    assert VllmConfig._is_default_v2_model_runner_model(config) is expected
+
+
+@pytest.mark.skip_global_cleanup
+def test_with_hf_config_populates_missing_architectures_from_causal_lm_mapping(
+    monkeypatch,
+):
+    monkeypatch.setattr(
+        vllm_config_module,
+        "replace",
+        lambda self, **kwargs: SimpleNamespace(**kwargs),
+    )
+    cfg = SimpleNamespace(
+        model_config=SimpleNamespace(
+            is_multimodal_model=False,
+            hf_config=SimpleNamespace(),
+            get_model_arch_config=lambda: "arch-config",
+        )
+    )
+    hf_config = SimpleNamespace(model_type="mistral", architectures=None)
+
+    updated = VllmConfig.with_hf_config(cfg, hf_config)
+
+    assert updated.model_config.hf_config.architectures == ["MistralForCausalLM"]
+    assert hf_config.architectures is None
+
+
+@pytest.mark.skip_global_cleanup
+def test_with_hf_config_preserves_explicit_architectures_override(monkeypatch):
+    monkeypatch.setattr(
+        vllm_config_module,
+        "replace",
+        lambda self, **kwargs: SimpleNamespace(**kwargs),
+    )
+    cfg = SimpleNamespace(
+        model_config=SimpleNamespace(
+            is_multimodal_model=False,
+            hf_config=SimpleNamespace(),
+            get_model_arch_config=lambda: "arch-config",
+        )
+    )
+    hf_config = SimpleNamespace(model_type="mistral", architectures=None)
+
+    updated = VllmConfig.with_hf_config(
+        cfg,
+        hf_config,
+        architectures=["Ministral3ForCausalLM"],
+    )
+
+    assert updated.model_config.hf_config.architectures == ["Ministral3ForCausalLM"]
+
+
+@pytest.mark.skip_global_cleanup
+def test_with_hf_config_leaves_unknown_model_type_without_architectures(
+    monkeypatch,
+):
+    monkeypatch.setattr(
+        vllm_config_module,
+        "replace",
+        lambda self, **kwargs: SimpleNamespace(**kwargs),
+    )
+    cfg = SimpleNamespace(
+        model_config=SimpleNamespace(
+            is_multimodal_model=False,
+            hf_config=SimpleNamespace(),
+            get_model_arch_config=lambda: "arch-config",
+        )
+    )
+    hf_config = SimpleNamespace(
+        model_type="not_a_real_model",
+        architectures=None,
+    )
+
+    updated = VllmConfig.with_hf_config(cfg, hf_config)
+
+    assert updated.model_config.hf_config.architectures is None
+
+
 def test_async_scheduling_with_pipeline_parallelism_is_allowed():
     cfg = VllmConfig(
         scheduler_config=SchedulerConfig(
@@ -424,8 +605,8 @@ def test_generation_config_loading():
 @pytest.mark.parametrize(
     "pt_load_map_location",
     [
-        "cuda",
-        {"": "cuda"},
+        DEVICE_TYPE,
+        {"": DEVICE_TYPE},
     ],
 )
 def test_load_config_pt_load_map_location(pt_load_map_location):
@@ -1077,6 +1258,39 @@ def test_vllm_config_explicit_overrides():
     assert config.compilation_config.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE
 
 
+def test_fusion_pass_op_priority():
+    """This test checks that custom op enablement & IR op priority
+    correctly control default fusions"""
+
+    # Default config, O2, rms_norm+quant fusion disabled
+    cfg1 = VllmConfig()
+    assert not cfg1.compilation_config.pass_config.fuse_norm_quant
+
+    # rms_norm manually enabled, O1, rms_norm+quant fusion enabled
+    cfg2 = VllmConfig(
+        optimization_level=OptimizationLevel.O1,
+        compilation_config=CompilationConfig(
+            custom_ops=["+rms_norm"],
+        ),
+    )
+    assert cfg2.compilation_config.pass_config.fuse_norm_quant
+
+    # using custom kernel for RMSNorm via IR:
+    # Note that vLLM IR only supports the non-residual rms_norm for now;
+    # soon this will be resolved.
+    cfg3 = VllmConfig(
+        kernel_config=KernelConfig(
+            ir_op_priority=IrOpPriorityConfig(rms_norm=["vllm_c"])
+        )
+    )
+    assert cfg3.compilation_config.pass_config.fuse_norm_quant
+
+    # block-fp8 model should enable quant_fp8 automatically
+    cfg4 = VllmConfig(model_config=ModelConfig("Qwen/Qwen3-4B-FP8"))
+    assert "+quant_fp8" in cfg4.compilation_config.custom_ops
+    assert cfg4.compilation_config.pass_config.fuse_norm_quant
+
+
 def test_scheduler_config_init():
     with pytest.raises(ValidationError):
         # Positional InitVars missing
@@ -1100,8 +1314,6 @@ def test_scheduler_config_init():
         ("facebook/opt-125m", 1, False, False),
         # Non-MoE model with DP>1 internal LB should need coordinator
         ("facebook/opt-125m", 2, False, True),
-        # Non-MoE model with DP>1 external LB should not need coordinator
-        ("facebook/opt-125m", 2, True, False),
         # MoE model with DP=1 should not need coordinator
         ("mistralai/Mixtral-8x7B-Instruct-v0.1", 1, False, False),
         # MoE model with DP>1 internal LB should need both coordinator
@@ -1171,3 +1383,87 @@ def test_eagle_draft_model_config():
     assert draft_model_config.hf_text_config.model_type == "eagle"
     assert draft_model_config.architectures == ["EagleLlamaForCausalLM"]
     assert draft_model_config.architecture == "EagleLlamaForCausalLM"
+
+
+def test_draft_sample_method_probabilistic_is_accepted():
+    speculative_config = SpeculativeConfig(
+        method="ngram",
+        num_speculative_tokens=1,
+        draft_sample_method="probabilistic",
+    )
+    assert speculative_config.draft_sample_method == "probabilistic"
+
+
+def test_draft_sample_method_gumbel_is_rejected():
+    with pytest.raises(ValidationError):
+        SpeculativeConfig(
+            method="ngram",
+            num_speculative_tokens=1,
+            draft_sample_method="gumbel",
+        )
+
+
+def test_ir_op_priority_default():
+    """Test that IR op priority defaults are set correctly."""
+    from vllm.config.kernel import IrOpPriorityConfig
+
+    # Assert default is applied to ops
+    priority_config = IrOpPriorityConfig.with_default(["vllm_c", "native"])
+    assert priority_config.rms_norm == ["vllm_c", "native"]
+    assert priority_config.fused_add_rms_norm == ["vllm_c", "native"]
+
+    # Assert single ops override the default
+    priority_config = IrOpPriorityConfig.with_default(
+        ["native"], rms_norm=["oink", "native"]
+    )
+    assert priority_config.rms_norm == ["oink", "native"]
+    assert priority_config.fused_add_rms_norm == ["native"]
+
+
+def test_ir_op_priority_str():
+    """Test that passing a comma-delimited string works"""
+    from vllm.config.kernel import IrOpPriorityConfig
+
+    priority_config = IrOpPriorityConfig(rms_norm="vllm_c")
+    assert priority_config.rms_norm == ["vllm_c"]
+
+    priority_config = IrOpPriorityConfig(rms_norm="vllm_c,native")
+    assert priority_config.rms_norm == ["vllm_c", "native"]
+
+    priority_config = IrOpPriorityConfig(rms_norm=" native, vllm_c ")
+    assert priority_config.rms_norm == ["native", "vllm_c"]
+
+    with pytest.raises(pydantic.ValidationError):
+        # must be list of only strings
+        priority_config = IrOpPriorityConfig(rms_norm=["vllm_c", 4, "native"])
+
+
+def test_ir_op_priority_ctx():
+    """Test that the priority-setting context sets priority correctly."""
+    from vllm import ir
+    from vllm.config.kernel import IrOpPriorityConfig
+
+    priority = IrOpPriorityConfig.with_default(["native"], rms_norm=["vllm_c"])
+    priority2 = IrOpPriorityConfig.with_default(
+        ["native"], fused_add_rms_norm=["vllm_c"]
+    )
+    with priority.set_priority():
+        assert ir.ops.rms_norm.get_priority() == ["vllm_c", "native"]
+        assert ir.ops.fused_add_rms_norm.get_priority() == ["native"]
+        with priority2.set_priority():
+            assert ir.ops.rms_norm.get_priority() == ["native"]
+            assert ir.ops.fused_add_rms_norm.get_priority() == ["vllm_c", "native"]
+
+        # context restored
+        assert ir.ops.rms_norm.get_priority() == ["vllm_c", "native"]
+        assert ir.ops.fused_add_rms_norm.get_priority() == ["native"]
+
+        with pytest.raises(ValueError), priority2.set_priority():
+            assert ir.ops.rms_norm.get_priority() == ["native"]
+            assert ir.ops.fused_add_rms_norm.get_priority() == ["vllm_c", "native"]
+
+            raise ValueError
+
+        # context restored even after exception
+        assert ir.ops.rms_norm.get_priority() == ["vllm_c", "native"]
+        assert ir.ops.fused_add_rms_norm.get_priority() == ["native"]
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 3f3add2ab764..e0211b56308f 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -28,6 +28,14 @@ def test_getattr_without_cache(monkeypatch: pytest.MonkeyPatch):
     assert not hasattr(envs.__getattr__, "cache_info")
 
 
+def test_nixl_side_channel_host_is_not_compile_factor(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setenv("VLLM_NIXL_SIDE_CHANNEL_HOST", "10.0.0.15")
+
+    assert "VLLM_NIXL_SIDE_CHANNEL_HOST" not in envs.compile_factors()
+
+
 def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch):
     monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1")
     monkeypatch.setenv("VLLM_PORT", "1234")
@@ -95,6 +103,19 @@ def test_is_envs_cache_enabled() -> None:
     assert not envs._is_envs_cache_enabled()
 
 
+def test_precompiled_install_flags_are_orthogonal() -> None:
+    with patch.dict(
+        os.environ,
+        {
+            "VLLM_PRECOMPILED_WHEEL_LOCATION": "/tmp/vllm.whl",
+            "VLLM_USE_PRECOMPILED_RUST": "1",
+        },
+        clear=False,
+    ):
+        assert environment_variables["VLLM_USE_PRECOMPILED"]() is False
+        assert environment_variables["VLLM_USE_PRECOMPILED_RUST"]() is True
+
+
 class TestEnvWithChoices:
     """Test cases for env_with_choices function."""
 
diff --git a/tests/test_fxgraphcache_pickle_patch.py b/tests/test_fxgraphcache_pickle_patch.py
new file mode 100644
index 000000000000..8a3f395267c4
--- /dev/null
+++ b/tests/test_fxgraphcache_pickle_patch.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for the FxGraphCachePickler.dumps ValueError patch in env_override.py.
+
+Validates that _apply_fxgraphcache_pickle_patch correctly wraps a pickler's
+dumps method to convert ValueError into a bypass exception, without affecting
+other exception types or normal return values.
+"""
+
+import pytest
+
+from vllm.env_override import _apply_fxgraphcache_pickle_patch
+
+
+class _BypassStub(Exception):
+    """Stand-in for BypassFxGraphCache in unit tests."""
+
+
+class TestApplyFxgraphcachePicklePatch:
+    def test_valueerror_converted_to_bypass(self):
+        class Pickler:
+            def dumps(self, obj):
+                raise ValueError("can't serialize blocked layout")
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        with pytest.raises(_BypassStub, match="Failed to pickle cache key"):
+            Pickler().dumps(object())
+
+    def test_original_valueerror_chained(self):
+        class Pickler:
+            def dumps(self, obj):
+                raise ValueError("bad tensor layout")
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        with pytest.raises(_BypassStub) as exc_info:
+            Pickler().dumps(object())
+
+        cause = exc_info.value.__cause__
+        assert isinstance(cause, ValueError)
+        assert str(cause) == "bad tensor layout"
+
+    def test_non_valueerror_propagates(self):
+        class Pickler:
+            def dumps(self, obj):
+                raise TypeError("unexpected type")
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        with pytest.raises(TypeError, match="unexpected type"):
+            Pickler().dumps(object())
+
+    def test_normal_return_preserved(self):
+        sentinel = b"serialized-graph-key"
+
+        class Pickler:
+            def dumps(self, obj):
+                return sentinel
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        assert Pickler().dumps(object()) is sentinel
+
+    def test_idempotent(self):
+        class Pickler:
+            def dumps(self, obj):
+                return b"ok"
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+        first_dumps = Pickler.dumps
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        assert Pickler.dumps is first_dumps
+
+    def test_sentinel_attribute_set(self):
+        class Pickler:
+            def dumps(self, obj):
+                return b"ok"
+
+        assert not hasattr(Pickler.dumps, "_vllm_patched")
+        assert not getattr(Pickler, "_vllm_fxgraph_dumps_patched", False)
+
+        _apply_fxgraphcache_pickle_patch(Pickler, _BypassStub)
+
+        assert Pickler.dumps._vllm_patched is True  # type: ignore[attr-defined]
+        assert Pickler._vllm_fxgraph_dumps_patched is True  # type: ignore[attr-defined]
+
+
+def test_patch_applied_in_current_environment():
+    """Integration: verify patch state matches current torch version."""
+    from torch._inductor.codecache import FxGraphCachePickler
+
+    from vllm.utils.torch_utils import is_torch_equal_or_newer
+
+    should_be_patched = is_torch_equal_or_newer(
+        "2.10.0"
+    ) and not is_torch_equal_or_newer("2.11.0")
+
+    assert getattr(FxGraphCachePickler, "_vllm_fxgraph_dumps_patched", False) == (
+        should_be_patched
+    )
+    assert hasattr(FxGraphCachePickler.dumps, "_vllm_patched") == should_be_patched
diff --git a/tests/test_jit_monitor.py b/tests/test_jit_monitor.py
new file mode 100644
index 000000000000..a463f4b5faa1
--- /dev/null
+++ b/tests/test_jit_monitor.py
@@ -0,0 +1,240 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+import sys
+from types import SimpleNamespace
+from unittest import mock
+
+import pytest
+
+from vllm.triton_utils import jit_monitor
+
+
+@pytest.fixture(autouse=True)
+def _reset_monitor():
+    """Reset global monitor state between tests."""
+    jit_monitor._active = False
+    yield
+    jit_monitor._active = False
+
+
+# ------------------------------------------------------------------
+# Helpers — lightweight stand-ins for triton.knobs
+# ------------------------------------------------------------------
+
+
+def _make_fake_knobs(*, autotuning_print=False, jit_hook=None):
+    """Build a minimal fake ``triton.knobs`` namespace."""
+    autotuning = SimpleNamespace(print=autotuning_print)
+    runtime = SimpleNamespace(jit_post_compile_hook=jit_hook)
+    return SimpleNamespace(autotuning=autotuning, runtime=runtime)
+
+
+def _patch_triton_knobs(fake_knobs):
+    """Context manager that makes ``from triton import knobs`` return *fake_knobs*."""
+    fake_triton = SimpleNamespace(knobs=fake_knobs)
+    return mock.patch.dict(sys.modules, {"triton": fake_triton})
+
+
+# ------------------------------------------------------------------
+# Unit tests (no GPU required, triton is mocked)
+# ------------------------------------------------------------------
+
+
+class TestActivateBasic:
+    def test_sets_active(self):
+        assert not jit_monitor.is_active()
+        with _patch_triton_knobs(_make_fake_knobs()):
+            jit_monitor.activate()
+        assert jit_monitor.is_active()
+
+    def test_idempotent(self):
+        fake = _make_fake_knobs()
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+            first_hook = fake.runtime.jit_post_compile_hook
+            jit_monitor.activate()
+            assert fake.runtime.jit_post_compile_hook is first_hook
+
+    def test_logs_info_on_activation(self):
+        with (
+            mock.patch.object(jit_monitor.logger, "info") as m,
+            _patch_triton_knobs(_make_fake_knobs()),
+        ):
+            jit_monitor.activate()
+        m.assert_called_once()
+        assert "Kernel JIT monitor activated" in m.call_args[0][0]
+
+
+class TestAutotuningPrint:
+    def test_enables_autotuning_print(self):
+        fake = _make_fake_knobs(autotuning_print=False)
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+        assert fake.autotuning.print is True
+
+    def test_respects_user_opt_out(self):
+        fake = _make_fake_knobs(autotuning_print=False)
+        with (
+            mock.patch.dict(os.environ, {"TRITON_PRINT_AUTOTUNING": "0"}),
+            _patch_triton_knobs(fake),
+        ):
+            jit_monitor.activate()
+        assert fake.autotuning.print is False
+
+    def test_noop_when_user_already_enabled(self):
+        fake = _make_fake_knobs(autotuning_print=True)
+        with (
+            mock.patch.dict(os.environ, {"TRITON_PRINT_AUTOTUNING": "1"}),
+            _patch_triton_knobs(fake),
+        ):
+            jit_monitor.activate()
+        assert fake.autotuning.print is True
+
+
+class TestJitHook:
+    def test_hook_registered(self):
+        fake = _make_fake_knobs()
+        assert fake.runtime.jit_post_compile_hook is None
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+        assert fake.runtime.jit_post_compile_hook is not None
+
+    def test_hook_logs_warning(self):
+        fake = _make_fake_knobs()
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+
+        hook = fake.runtime.jit_post_compile_hook
+        mock_fn = SimpleNamespace(name="test_kernel")
+
+        with mock.patch.object(jit_monitor.logger, "warning") as m:
+            hook(
+                key="some_key",
+                repr="some_repr",
+                fn=mock_fn,
+                compile=lambda: None,
+                is_manual_warmup=False,
+                already_compiled=False,
+            )
+
+        m.assert_called_once()
+        msg = m.call_args[0][0] % m.call_args[0][1:]
+        assert "Triton kernel JIT compilation during inference" in msg
+        assert "test_kernel" in msg
+
+    def test_hook_chains_existing_hook(self):
+        existing = mock.MagicMock(return_value="existing_result")
+        fake = _make_fake_knobs(jit_hook=existing)
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+
+        hook = fake.runtime.jit_post_compile_hook
+        mock_fn = SimpleNamespace(name="chained_kernel")
+        kwargs = dict(
+            key="k",
+            repr="r",
+            fn=mock_fn,
+            compile=lambda: None,
+            is_manual_warmup=False,
+            already_compiled=False,
+        )
+        result = hook(**kwargs)
+
+        existing.assert_called_once()
+        assert result == "existing_result"
+
+    def test_hook_works_without_existing_hook(self):
+        fake = _make_fake_knobs(jit_hook=None)
+        with _patch_triton_knobs(fake):
+            jit_monitor.activate()
+
+        hook = fake.runtime.jit_post_compile_hook
+        mock_fn = SimpleNamespace(name="solo_kernel")
+        result = hook(
+            key="k",
+            repr="r",
+            fn=mock_fn,
+            compile=lambda: None,
+            is_manual_warmup=False,
+            already_compiled=False,
+        )
+        assert result is None
+
+
+class TestNoTritonFallback:
+    def test_activate_without_triton(self):
+        with mock.patch.object(jit_monitor, "HAS_TRITON", False):
+            jit_monitor.activate()
+        assert jit_monitor.is_active()
+
+
+# ------------------------------------------------------------------
+# Integration tests (real Triton + GPU)
+# ------------------------------------------------------------------
+
+try:
+    import torch
+
+    _HAS_CUDA = torch.cuda.is_available()
+except ImportError:
+    _HAS_CUDA = False
+
+try:
+    import triton
+    import triton.language as tl
+
+    _HAS_TRITON = True
+except ImportError:
+    _HAS_TRITON = False
+
+_skip_no_gpu = pytest.mark.skipif(
+    not (_HAS_CUDA and _HAS_TRITON),
+    reason="Requires CUDA GPU and Triton",
+)
+
+
+if _HAS_TRITON:
+
+    @triton.jit
+    def _add_kernel(x_ptr, y_ptr, out_ptr, n, BLOCK: tl.constexpr):
+        pid = tl.program_id(0)
+        offs = pid * BLOCK + tl.arange(0, BLOCK)
+        mask = offs < n
+        x = tl.load(x_ptr + offs, mask=mask)
+        y = tl.load(y_ptr + offs, mask=mask)
+        tl.store(out_ptr + offs, x + y, mask=mask)
+
+
+def _run_add_kernel(n: int, block: int = 256) -> None:
+    """Launch ``_add_kernel`` with vectors of length *n*."""
+    x = torch.randn(n, device="cuda")
+    y = torch.randn(n, device="cuda")
+    out = torch.empty(n, device="cuda")
+    grid = ((n + block - 1) // block,)
+    _add_kernel[grid](x, y, out, n, BLOCK=block)
+    torch.accelerator.synchronize()
+
+
+@_skip_no_gpu
+class TestTritonJitHookIntegration:
+    """End-to-end: real Triton kernel, real GPU, real hook."""
+
+    def test_no_warning_on_cached_shape(self):
+        _run_add_kernel(1024)
+
+        jit_monitor.activate()
+        with mock.patch.object(jit_monitor.logger, "warning") as w:
+            _run_add_kernel(1024)
+        w.assert_not_called()
+
+    def test_warning_on_new_constexpr(self):
+        _run_add_kernel(1024, block=256)
+
+        jit_monitor.activate()
+        with mock.patch.object(jit_monitor.logger, "warning") as w:
+            # Different BLOCK (a tl.constexpr) forces recompilation.
+            _run_add_kernel(1024, block=512)
+        w.assert_called()
+        msg = w.call_args[0][0] % w.call_args[0][1:]
+        assert "_add_kernel" in msg
diff --git a/tests/test_ray_env.py b/tests/test_ray_env.py
index c08f088acd22..945b2d80b691 100644
--- a/tests/test_ray_env.py
+++ b/tests/test_ray_env.py
@@ -6,6 +6,7 @@
 from unittest.mock import patch
 
 from vllm.ray.ray_env import get_env_vars_to_copy
+from vllm.v1.executor.ray_utils import WORKER_SPECIFIC_ENV_VARS
 
 # ---------------------------------------------------------------------------
 # Default prefix matching
@@ -106,6 +107,19 @@ def test_exclude_vars(self):
         result = get_env_vars_to_copy(exclude_vars={"CUDA_VISIBLE_DEVICES"})
         assert "CUDA_VISIBLE_DEVICES" not in result
 
+    @patch.dict(
+        os.environ,
+        {
+            "VLLM_HOST_IP": "10.0.0.1",
+            "VLLM_NIXL_SIDE_CHANNEL_HOST": "10.0.0.1",
+        },
+        clear=False,
+    )
+    def test_worker_specific_host_vars_are_excluded(self):
+        result = get_env_vars_to_copy(exclude_vars=WORKER_SPECIFIC_ENV_VARS)
+        assert "VLLM_HOST_IP" not in result
+        assert "VLLM_NIXL_SIDE_CHANNEL_HOST" not in result
+
     @patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
     @patch(
         "vllm.ray.ray_env.RAY_NON_CARRY_OVER_ENV_VARS",
diff --git a/tests/test_ray_env_utils.py b/tests/test_ray_env_utils.py
new file mode 100644
index 000000000000..d311de41ba96
--- /dev/null
+++ b/tests/test_ray_env_utils.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for vllm.v1.executor.ray_env_utils."""
+
+import os
+from unittest.mock import patch
+
+from vllm.v1.executor.ray_env_utils import get_driver_env_vars
+
+WORKER_VARS: set[str] = {
+    "CUDA_VISIBLE_DEVICES",
+    "LOCAL_RANK",
+}
+
+
+class TestDefaultPropagation:
+    """All env vars are propagated unless explicitly excluded."""
+
+    @patch.dict(os.environ, {"NCCL_DEBUG": "INFO"}, clear=False)
+    def test_nccl_prefix(self):
+        assert get_driver_env_vars(WORKER_VARS)["NCCL_DEBUG"] == "INFO"
+
+    @patch.dict(os.environ, {"HF_TOKEN": "secret"}, clear=False)
+    def test_hf_token(self):
+        assert "HF_TOKEN" in get_driver_env_vars(WORKER_VARS)
+
+    @patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
+    def test_lmcache_prefix(self):
+        assert "LMCACHE_LOCAL_CPU" in get_driver_env_vars(WORKER_VARS)
+
+    @patch.dict(os.environ, {"PYTHONHASHSEED": "42"}, clear=False)
+    def test_pythonhashseed(self):
+        assert get_driver_env_vars(WORKER_VARS)["PYTHONHASHSEED"] == "42"
+
+    @patch.dict(os.environ, {"MYLIB_FOO": "bar"}, clear=False)
+    def test_arbitrary_var_propagated(self):
+        assert get_driver_env_vars(WORKER_VARS)["MYLIB_FOO"] == "bar"
+
+
+class TestExclusion:
+    @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}, clear=False)
+    def test_worker_specific_excluded(self):
+        assert "CUDA_VISIBLE_DEVICES" not in get_driver_env_vars(WORKER_VARS)
+
+    @patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
+    @patch(
+        "vllm.v1.executor.ray_env_utils.RAY_NON_CARRY_OVER_ENV_VARS",
+        {"LMCACHE_LOCAL_CPU"},
+    )
+    def test_non_carry_over_blacklist(self):
+        assert "LMCACHE_LOCAL_CPU" not in get_driver_env_vars(WORKER_VARS)
diff --git a/tests/test_regression.py b/tests/test_regression.py
index a38b4428dea5..c38d974119aa 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -17,22 +17,6 @@
 from vllm.platforms import current_platform
 
 
-@pytest.mark.skip(reason="In V1, we reject tokens > max_seq_len")
-def test_duplicated_ignored_sequence_group():
-    """https://github.com/vllm-project/vllm/issues/1655"""
-
-    sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=256)
-    llm = LLM(
-        model="distilbert/distilgpt2",
-        max_num_batched_tokens=4096,
-        tensor_parallel_size=1,
-    )
-    prompts = ["This is a short prompt", "This is a very long prompt " * 1000]
-    outputs = llm.generate(prompts, sampling_params=sampling_params)
-
-    assert len(prompts) == len(outputs)
-
-
 @pytest.mark.parametrize(
     "model",
     [
@@ -72,9 +56,10 @@ def test_gc():
 
 
 def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
-    # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
+    # model: https://www.modelscope.ai/models/qwen/Qwen1.5-0.5B-Chat
     with monkeypatch.context() as m:
         m.setenv("VLLM_USE_MODELSCOPE", "True")
+        m.setenv("MODELSCOPE_DOMAIN", "www.modelscope.ai")
         # Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
         # with 400 Client Error: Bad Request.
         m.setenv("HF_TOKEN", "")
diff --git a/tests/tokenizers_/conftest.py b/tests/tokenizers_/conftest.py
new file mode 100644
index 000000000000..c33ab351608d
--- /dev/null
+++ b/tests/tokenizers_/conftest.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from tests.utils import prewarm_hf_cache
+
+
+@pytest.fixture(scope="session", autouse=True)
+def _prewarm_hf_cache():
+    # tokenization_qwen.py downloads SimSun.ttf from
+    # qianwen-res.oss-cn-beijing.aliyuncs.com; both Qwen/Qwen-VL and
+    # Qwen/Qwen-VL-Chat look it up from the Chat repo.
+    prewarm_hf_cache([("Qwen/Qwen-VL-Chat", "SimSun.ttf")])
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_input_1.json b/tests/tokenizers_/fixtures/deepseek_v4/test_input_1.json
new file mode 100644
index 000000000000..35e49588dfa3
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_input_1.json
@@ -0,0 +1,81 @@
+{
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather for a specific location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name"
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "Temperature unit"
+                        }
+                    },
+                    "required": ["location"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "search",
+                "description": "Search the web for information",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query"
+                        },
+                        "num_results": {
+                            "type": "integer",
+                            "description": "Number of results to return"
+                        }
+                    },
+                    "required": ["query"]
+                }
+            }
+        }
+    ],
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant."
+        },
+        {
+            "role": "user",
+            "content": "What's the weather in Beijing?"
+        },
+        {
+            "role": "assistant",
+            "reasoning": "The user wants to know the weather in Beijing. I should use the get_weather tool.",
+            "tool_calls": [
+                {
+                    "id": "call_001",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"Beijing\", \"unit\": \"celsius\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_001",
+            "content": "{\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}"
+        },
+        {
+            "role": "assistant",
+            "reasoning": "Got the weather data. Let me format a nice response.",
+            "content": "The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity."
+        }
+    ]
+}
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_input_2.json b/tests/tokenizers_/fixtures/deepseek_v4/test_input_2.json
new file mode 100644
index 000000000000..a301609ac2b7
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_input_2.json
@@ -0,0 +1,24 @@
+[
+  {
+    "role": "system",
+    "content": "You are a helpful assistant."
+  },
+  {
+    "role": "user",
+    "content": "Hello"
+  },
+  {
+    "role": "assistant",
+    "reasoning": "The user said hello, I should greet back.",
+    "content": "Hi there! How can I help you?"
+  },
+  {
+    "role": "user",
+    "content": "What is the capital of France?"
+  },
+  {
+    "role": "assistant",
+    "reasoning": "The user asks about the capital of France. It is Paris.",
+    "content": "The capital of France is Paris."
+  }
+]
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_input_3.json b/tests/tokenizers_/fixtures/deepseek_v4/test_input_3.json
new file mode 100644
index 000000000000..d2dc42e3de20
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_input_3.json
@@ -0,0 +1,159 @@
+[
+  {
+    "role": "system",
+    "content": "该助手为DeepSeek，由深度求索公司创造。"
+  },
+  {
+    "role": "latest_reminder",
+    "content": "2026-02-21,星期六,广州,App,中文"
+  },
+  {
+    "role": "developer",
+    "content": "小柴胡冲剂和布洛芬能一起吃吗？\n\nCITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】",
+    "tools": [
+      {
+        "type": "function",
+        "function": {
+          "name": "search",
+          "description": "Web search. Split multiple queries with '||'.",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "queries": {
+                "type": "string",
+                "description": "query1||query2"
+              }
+            },
+            "required": [
+              "queries"
+            ],
+            "additionalProperties": false,
+            "$schema": "http://json-schema.org/draft-07/schema#"
+          }
+        }
+      },
+      {
+        "type": "function",
+        "function": {
+          "name": "open",
+          "description": "Batch open IDs (format 【{id}†...】) or URLs.",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "open_list": {
+                "type": "array",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "description": "ID or URL",
+                      "anyOf": [
+                        {
+                          "type": "integer"
+                        },
+                        {
+                          "type": "string"
+                        }
+                      ],
+                      "default": -1
+                    },
+                    "cursor": {
+                      "type": "integer",
+                      "description": "",
+                      "default": -1
+                    },
+                    "loc": {
+                      "type": "integer",
+                      "description": "Start line",
+                      "default": -1
+                    },
+                    "num_lines": {
+                      "type": "integer",
+                      "description": "",
+                      "default": -1
+                    },
+                    "view_source": {
+                      "type": "boolean",
+                      "description": "",
+                      "default": false
+                    }
+                  },
+                  "additionalProperties": false
+                },
+                "description": ""
+              }
+            },
+            "required": [
+              "open_list"
+            ],
+            "additionalProperties": false,
+            "$schema": "http://json-schema.org/draft-07/schema#"
+          }
+        }
+      },
+      {
+        "type": "function",
+        "function": {
+          "name": "find",
+          "description": "Find exact text pattern in pages.",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "find_list": {
+                "type": "array",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "pattern": {
+                      "type": "string",
+                      "description": ""
+                    },
+                    "cursor": {
+                      "type": "integer",
+                      "description": "",
+                      "default": -1
+                    }
+                  },
+                  "required": [
+                    "pattern"
+                  ],
+                  "additionalProperties": false
+                },
+                "description": ""
+              }
+            },
+            "required": [
+              "find_list"
+            ],
+            "additionalProperties": false,
+            "$schema": "http://json-schema.org/draft-07/schema#"
+          }
+        }
+      }
+    ]
+  },
+  {
+    "role": "assistant",
+    "content": "",
+    "reasoning": "用户想知道小柴胡冲剂和布洛芬能否一起服用。",
+    "tool_calls": [
+      {
+        "type": "function",
+        "function": {
+          "name": "search",
+          "arguments": "{\"queries\": \"小柴胡冲剂 布洛芬 相互作用 一起吃\"}"
+        }
+      }
+    ]
+  },
+  {
+    "role": "tool",
+    "content": "[0]"
+  },
+  {
+    "role": "assistant",
+    "content": "请及时就医。",
+    "reasoning": "现在开始组织回答。",
+    "tool_calls": []
+  }
+]
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_input_4.json b/tests/tokenizers_/fixtures/deepseek_v4/test_input_4.json
new file mode 100644
index 000000000000..d5e0791dd699
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_input_4.json
@@ -0,0 +1,28 @@
+[
+  {
+    "role": "system",
+    "content": "该助手为DeepSeek-V3，由深度求索公司创造。\n今天是2025年10月17日，星期五。"
+  },
+  {
+    "role": "latest_reminder",
+    "content": "2024-11-15,上海市,App,中文"
+  },
+  {
+    "role": "user",
+    "content": "热海大滚锅是世界著名温泉吗"
+  },
+  {
+    "role": "assistant",
+    "content": "关于热海大滚锅是否是世界著名温泉，可以这样总结：它在**中国乃至全球的地热奇观中占有重要地位**，但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**，而非普遍的温泉疗养体验。\n\n为了让你快速了解，我整理了一个简要的表格：\n\n| 维度 | 热海大滚锅的地位与特点 |\n| :--- | :--- |\n| **地质奇观** | **世界罕见**的高温地热系统，泉眼多、水温高、形态多样。 |\n| **国内声誉** | **中国三大地热区之一**，**国家5A级旅游景区**，自古闻名（徐霞客曾游历并记载）。 |\n| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一，但此说法流传不广，其国际声誉更多建立在地质独特性上。 |\n| **核心体验** | **观赏地热奇观**（如97℃沸腾的“大滚锅”）、**体验温泉煮鸡蛋**。 |\n\n### 💡 游玩攻略与温馨提示\n\n如果你计划前往热海大滚锅，这里有一些实用信息供你参考：\n\n- **门票与开放时间**：\n    - **门票**：景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐，价格会更高，例如约**288元**。\n    - **开放时间**：景区一般**08:00-18:00**开放，但具体时间可能变动，建议提前核实。\n\n- **特色体验**：\n    - **温泉煮鸡蛋**：这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋（约5-8元/串），然后到“大滚锅”旁的指定区域蒸煮，几分钟便可熟食，趣味十足。\n    - **金汤足浴**：可以直接用从“大滚锅”流出的温泉水泡脚，缓解旅途疲劳。\n\n- **注意事项**：\n    - **安全第一**：“大滚锅”水温极高，务必遵守游览规则，在指定区域内观赏，切勿随意触碰泉水。\n    - **规划行程**：建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路，出入口有观光车接送。\n\n希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问，我很乐意提供进一步的信息。",
+    "mask": 1
+  },
+  {
+    "role": "user",
+    "content": "世界著名温泉有哪些",
+    "task": "action"
+  },
+  {
+    "role": "assistant",
+    "content": "Search"
+  }
+]
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_output_1.txt b/tests/tokenizers_/fixtures/deepseek_v4/test_output_1.txt
new file mode 100644
index 000000000000..dbd823476c1c
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_output_1.txt
@@ -0,0 +1,36 @@
+<｜begin▁of▁sentence｜>
+
+## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<｜DSML｜tool_calls>" block like the following:
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="$TOOL_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$TOOL_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
+
+Otherwise, output directly after </think> with tool calls or final response.
+
+### Available Tool Schemas
+
+{"name": "get_weather", "description": "Get the weather for a specific location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}}, "required": ["location"]}}
+{"name": "search", "description": "Search the web for information", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}, "num_results": {"type": "integer", "description": "Number of results to return"}}, "required": ["query"]}}
+
+You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
+You are a helpful assistant.<｜User｜>What's the weather in Beijing?<｜Assistant｜><think>The user wants to know the weather in Beijing. I should use the get_weather tool.</think>
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="unit" string="true">celsius</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜tool_calls><｜end▁of▁sentence｜><｜User｜><tool_result>{"temperature": 22, "condition": "sunny", "humidity": 45}</tool_result><｜Assistant｜><think>Got the weather data. Let me format a nice response.</think>The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity.<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_output_2.txt b/tests/tokenizers_/fixtures/deepseek_v4/test_output_2.txt
new file mode 100644
index 000000000000..fc397ef54972
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_output_2.txt
@@ -0,0 +1 @@
+<｜begin▁of▁sentence｜>You are a helpful assistant.<｜User｜>Hello<｜Assistant｜></think>Hi there! How can I help you?<｜end▁of▁sentence｜><｜User｜>What is the capital of France?<｜Assistant｜><think>The user asks about the capital of France. It is Paris.</think>The capital of France is Paris.<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_output_3.txt b/tests/tokenizers_/fixtures/deepseek_v4/test_output_3.txt
new file mode 100644
index 000000000000..edee563300d4
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_output_3.txt
@@ -0,0 +1,38 @@
+<｜begin▁of▁sentence｜>该助手为DeepSeek，由深度求索公司创造。<｜latest_reminder｜>2026-02-21,星期六,广州,App,中文<｜User｜>小柴胡冲剂和布洛芬能一起吃吗？
+
+CITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】
+
+## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<｜DSML｜tool_calls>" block like the following:
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="$TOOL_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$TOOL_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
+
+Otherwise, output directly after </think> with tool calls or final response.
+
+### Available Tool Schemas
+
+{"name": "search", "description": "Web search. Split multiple queries with '||'.", "parameters": {"type": "object", "properties": {"queries": {"type": "string", "description": "query1||query2"}}, "required": ["queries"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "open", "description": "Batch open IDs (format 【{id}†...】) or URLs.", "parameters": {"type": "object", "properties": {"open_list": {"type": "array", "items": {"type": "object", "properties": {"id": {"description": "ID or URL", "anyOf": [{"type": "integer"}, {"type": "string"}], "default": -1}, "cursor": {"type": "integer", "description": "", "default": -1}, "loc": {"type": "integer", "description": "Start line", "default": -1}, "num_lines": {"type": "integer", "description": "", "default": -1}, "view_source": {"type": "boolean", "description": "", "default": false}}, "additionalProperties": false}, "description": ""}}, "required": ["open_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+{"name": "find", "description": "Find exact text pattern in pages.", "parameters": {"type": "object", "properties": {"find_list": {"type": "array", "items": {"type": "object", "properties": {"pattern": {"type": "string", "description": ""}, "cursor": {"type": "integer", "description": "", "default": -1}}, "required": ["pattern"], "additionalProperties": false}, "description": ""}}, "required": ["find_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
+
+You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
+<｜Assistant｜><think>用户想知道小柴胡冲剂和布洛芬能否一起服用。</think>
+
+<｜DSML｜tool_calls>
+<｜DSML｜invoke name="search">
+<｜DSML｜parameter name="queries" string="true">小柴胡冲剂 布洛芬 相互作用 一起吃</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜tool_calls><｜end▁of▁sentence｜><｜User｜><tool_result>[0]</tool_result><｜Assistant｜><think>现在开始组织回答。</think>请及时就医。<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/tests/tokenizers_/fixtures/deepseek_v4/test_output_4.txt b/tests/tokenizers_/fixtures/deepseek_v4/test_output_4.txt
new file mode 100644
index 000000000000..d30bd5d06cf3
--- /dev/null
+++ b/tests/tokenizers_/fixtures/deepseek_v4/test_output_4.txt
@@ -0,0 +1,29 @@
+<｜begin▁of▁sentence｜>该助手为DeepSeek-V3，由深度求索公司创造。
+今天是2025年10月17日，星期五。<｜latest_reminder｜>2024-11-15,上海市,App,中文<｜User｜>热海大滚锅是世界著名温泉吗<｜Assistant｜></think>关于热海大滚锅是否是世界著名温泉，可以这样总结：它在**中国乃至全球的地热奇观中占有重要地位**，但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**，而非普遍的温泉疗养体验。
+
+为了让你快速了解，我整理了一个简要的表格：
+
+| 维度 | 热海大滚锅的地位与特点 |
+| :--- | :--- |
+| **地质奇观** | **世界罕见**的高温地热系统，泉眼多、水温高、形态多样。 |
+| **国内声誉** | **中国三大地热区之一**，**国家5A级旅游景区**，自古闻名（徐霞客曾游历并记载）。 |
+| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一，但此说法流传不广，其国际声誉更多建立在地质独特性上。 |
+| **核心体验** | **观赏地热奇观**（如97℃沸腾的“大滚锅”）、**体验温泉煮鸡蛋**。 |
+
+### 💡 游玩攻略与温馨提示
+
+如果你计划前往热海大滚锅，这里有一些实用信息供你参考：
+
+- **门票与开放时间**：
+    - **门票**：景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐，价格会更高，例如约**288元**。
+    - **开放时间**：景区一般**08:00-18:00**开放，但具体时间可能变动，建议提前核实。
+
+- **特色体验**：
+    - **温泉煮鸡蛋**：这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋（约5-8元/串），然后到“大滚锅”旁的指定区域蒸煮，几分钟便可熟食，趣味十足。
+    - **金汤足浴**：可以直接用从“大滚锅”流出的温泉水泡脚，缓解旅途疲劳。
+
+- **注意事项**：
+    - **安全第一**：“大滚锅”水温极高，务必遵守游览规则，在指定区域内观赏，切勿随意触碰泉水。
+    - **规划行程**：建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路，出入口有观光车接送。
+
+希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问，我很乐意提供进一步的信息。<｜end▁of▁sentence｜><｜User｜>世界著名温泉有哪些<｜Assistant｜></think><｜action｜>Search<｜end▁of▁sentence｜>
\ No newline at end of file
diff --git a/tests/tokenizers_/test_deepseek_v4.py b/tests/tokenizers_/test_deepseek_v4.py
new file mode 100644
index 000000000000..358732eabf40
--- /dev/null
+++ b/tests/tokenizers_/test_deepseek_v4.py
@@ -0,0 +1,290 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from vllm.entrypoints.chat_utils import parse_chat_messages
+from vllm.renderers.registry import RENDERER_REGISTRY
+from vllm.tokenizers.deepseek_v4 import get_deepseek_v4_tokenizer
+from vllm.tokenizers.registry import TokenizerRegistry
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures" / "deepseek_v4"
+
+
+class FakeHfTokenizer:
+    vocab_size = 100
+
+    def get_added_vocab(self) -> dict[str, int]:
+        return {"</think>": 100}
+
+    def encode(
+        self,
+        text: str,
+        add_special_tokens: bool = False,
+        **kwargs,
+    ) -> list[int]:
+        self.last_encode = (text, add_special_tokens, kwargs)
+        return [len(text)]
+
+
+def _tokenizer():
+    return get_deepseek_v4_tokenizer(FakeHfTokenizer())
+
+
+def _model_config():
+    return SimpleNamespace(
+        multimodal_config=None,
+        allowed_local_media_path="",
+        allowed_media_domains=None,
+        enable_prompt_embeds=False,
+    )
+
+
+def _load_reference_case(case_id: int):
+    data = json.loads((FIXTURES_DIR / f"test_input_{case_id}.json").read_text())
+    if isinstance(data, dict):
+        return data["messages"], data.get("tools")
+    return data, None
+
+
+def _render_reference_case(case_id: int, **kwargs):
+    messages, tools = _load_reference_case(case_id)
+    conversation, _, _ = parse_chat_messages(
+        messages,
+        _model_config(),
+        content_format="string",
+    )
+    return _tokenizer().apply_chat_template(
+        conversation=conversation,
+        messages=messages,
+        tools=tools,
+        tokenize=False,
+        **kwargs,
+    )
+
+
+def test_deepseek_v4_tokenizer_registered():
+    assert TokenizerRegistry.load_tokenizer_cls("deepseek_v4").__name__ == (
+        "DeepseekV4Tokenizer"
+    )
+    assert RENDERER_REGISTRY.load_renderer_cls("deepseek_v4").__name__ == (
+        "DeepseekV4Renderer"
+    )
+
+
+def test_deepseek_v4_defaults_to_chat_mode():
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+    )
+
+    assert prompt == ("<｜begin▁of▁sentence｜><｜User｜>Hello<｜Assistant｜></think>")
+
+
+@pytest.mark.parametrize("kwargs", [{"thinking": True}, {"enable_thinking": True}])
+def test_deepseek_v4_enables_thinking_with_compatible_kwargs(kwargs):
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        **kwargs,
+    )
+
+    assert prompt == ("<｜begin▁of▁sentence｜><｜User｜>Hello<｜Assistant｜><think>")
+
+
+def test_deepseek_v4_uses_v4_tool_prompt_from_request_tools():
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get weather for a city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Weather?"}],
+        tools=tools,
+        tokenize=False,
+    )
+
+    assert "## Tools" in prompt
+    assert "<｜DSML｜tool_calls>" in prompt
+    assert "</｜DSML｜tool_calls>" in prompt
+    assert "function_calls" not in prompt
+    assert '"name": "get_weather"' in prompt
+    assert prompt.endswith("<｜User｜>Weather?<｜Assistant｜></think>")
+
+
+def test_deepseek_v4_renders_parsed_history_tool_arguments():
+    messages = [
+        {"role": "user", "content": "List the repo"},
+        {
+            "role": "assistant",
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "str_replace_editor",
+                        "arguments": '{"command": "view", "path": "/testbed"}',
+                    },
+                }
+            ],
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_1",
+            "content": "file list",
+        },
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "str_replace_editor",
+                "description": "Edit files",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "command": {"type": "string"},
+                        "path": {"type": "string"},
+                    },
+                    "required": ["command", "path"],
+                },
+            },
+        }
+    ]
+    conversation, _, _ = parse_chat_messages(
+        messages,
+        _model_config(),
+        content_format="string",
+    )
+
+    prompt = _tokenizer().apply_chat_template(
+        conversation=conversation,
+        messages=messages,
+        tools=tools,
+        tokenize=False,
+    )
+
+    assert '<｜DSML｜parameter name="command" string="true">view' in prompt
+    assert '<｜DSML｜parameter name="path" string="true">/testbed' in prompt
+    assert 'parameter name="arguments"' not in prompt
+
+
+@pytest.mark.parametrize("reasoning_effort", ["minimal", "low", "medium", "high"])
+def test_deepseek_v4_accepts_openai_reasoning_effort_values(reasoning_effort):
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        enable_thinking=True,
+        reasoning_effort=reasoning_effort,
+    )
+
+    assert prompt.endswith("<｜Assistant｜><think>")
+    assert "Reasoning Effort: Absolute maximum" not in prompt
+
+
+def test_deepseek_v4_none_reasoning_effort_disables_thinking():
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        enable_thinking=True,
+        reasoning_effort="none",
+    )
+
+    assert prompt == ("<｜begin▁of▁sentence｜><｜User｜>Hello<｜Assistant｜></think>")
+
+
+@pytest.mark.parametrize(
+    ("reasoning_effort", "expected_mode", "expected_effort"),
+    [
+        ("none", "chat", None),
+        ("minimal", "thinking", "high"),
+        ("low", "thinking", "high"),
+        ("medium", "thinking", "high"),
+        ("high", "thinking", "high"),
+        ("xhigh", "thinking", "max"),
+        ("max", "thinking", "max"),
+        ("unexpected", "thinking", "high"),
+    ],
+)
+def test_deepseek_v4_maps_compatible_thinking_reasoning_effort_values(
+    monkeypatch: pytest.MonkeyPatch,
+    reasoning_effort,
+    expected_mode,
+    expected_effort,
+):
+    captured_kwargs = []
+
+    def fake_encode_messages(messages, **kwargs):
+        captured_kwargs.append(kwargs)
+        return "prompt"
+
+    monkeypatch.setattr(
+        "vllm.tokenizers.deepseek_v4.encode_messages",
+        fake_encode_messages,
+    )
+
+    _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        enable_thinking=True,
+        reasoning_effort=reasoning_effort,
+    )
+
+    assert captured_kwargs[-1]["thinking_mode"] == expected_mode
+    assert captured_kwargs[-1]["reasoning_effort"] == expected_effort
+
+
+def test_deepseek_v4_preserves_reference_max_reasoning_effort():
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        enable_thinking=True,
+        reasoning_effort="max",
+    )
+
+    assert prompt.startswith(
+        "<｜begin▁of▁sentence｜>Reasoning Effort: Absolute maximum"
+    )
+
+
+def test_deepseek_v4_maps_xhigh_to_reference_max_reasoning_effort():
+    prompt = _tokenizer().apply_chat_template(
+        [{"role": "user", "content": "Hello"}],
+        tokenize=False,
+        enable_thinking=True,
+        reasoning_effort="xhigh",
+    )
+
+    assert prompt.startswith(
+        "<｜begin▁of▁sentence｜>Reasoning Effort: Absolute maximum"
+    )
+
+
+@pytest.mark.parametrize(
+    ("case_id", "kwargs"),
+    [
+        (1, {"thinking": True}),
+        (2, {"thinking": True}),
+        (3, {"thinking": True}),
+        (4, {}),
+    ],
+)
+def test_deepseek_v4_matches_reference_golden_fixtures(case_id, kwargs):
+    prompt = _render_reference_case(case_id, **kwargs)
+
+    expected = (FIXTURES_DIR / f"test_output_{case_id}.txt").read_text()
+    assert prompt == expected
diff --git a/tests/tokenizers_/test_mistral.py b/tests/tokenizers_/test_mistral.py
index faff61150265..2023337e8577 100644
--- a/tests/tokenizers_/test_mistral.py
+++ b/tests/tokenizers_/test_mistral.py
@@ -1,361 +1,45 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import copy
 from typing import Any
 
+import llguidance
 import pytest
 from mistral_common.exceptions import InvalidMessageStructureException
+from mistral_common.guidance.grammar_factory import GrammarFactory
 from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy
 
 from vllm.tokenizers.mistral import (
     MistralTokenizer,
-    _prepare_apply_chat_template_tools_and_messages,
+    _validate_apply_chat_template_args,
 )
 
 
-@pytest.mark.parametrize(
-    "openai_request,expected_mistral_output",
-    [
-        (
-            {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                "tools": [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                        },
-                    }
-                ],
-            },
-            (
-                [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    }
-                ],
-            ),
-        ),
-        (
-            {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                "tools": [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    }
-                ],
-            },
-            (
-                [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    }
-                ],
-            ),
-        ),
-        (
-            {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                "tools": [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "unsupported_field": False,
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "unsupported_field2": False,
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                ],
-            },
-            (
-                [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                ],
-            ),
-        ),
-        (
-            {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                "tools": [
-                    {
-                        "type": "function",
-                        "unsupported_field": False,
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                    {
-                        "type": "function",
-                        "unsupported_field2": False,
-                        "function": {
-                            "description": "Fetch the current local date and time 2.",
-                            "name": "get_current_time2",
-                            "parameters": {"a": "1"},
-                        },
-                    },
-                ],
-            },
-            (
-                [
-                    {
-                        "role": "user",
-                        "content": "What is the current local date and time?",
-                    }
-                ],
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time.",
-                            "name": "get_current_time",
-                            "parameters": {},
-                        },
-                    },
-                    {
-                        "type": "function",
-                        "function": {
-                            "description": "Fetch the current local date and time 2.",
-                            "name": "get_current_time2",
-                            "parameters": {"a": "1"},
-                        },
-                    },
-                ],
-            ),
-        ),
-    ],
-)
-def test_prepare_apply_chat_template_tools_and_messages(
-    openai_request, expected_mistral_output
-):
-    actual_request = _prepare_apply_chat_template_tools_and_messages(
-        openai_request["messages"], openai_request["tools"]
-    )
-    assert actual_request == expected_mistral_output
-
-
-# Tool use with list content and reasoning
-@pytest.mark.parametrize(
-    "openai_request,expected_mistral_output",
-    [
-        (
-            {
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "What's the weather in Paris?",
-                    },
-                    {
-                        "role": "assistant",
-                        "reasoning": None,
-                        "content": None,
-                        "tool_calls": [
-                            {
-                                "id": "call123",
-                                "type": "function",
-                                "function": {
-                                    "name": "get_weather",
-                                    "arguments": '{"city": "Paris"}',
-                                },
-                            }
-                        ],
-                    },
-                    {
-                        "role": "tool",
-                        "content": [{"type": "text", "text": "Rainy"}],
-                        "name": "get_weather",
-                        "tool_call_id": "call123",
-                    },
-                ],
-                "tools": [
-                    {
-                        "type": "function",
-                        "function": {
-                            "name": "get_weather",
-                            "description": "Gets the current weather in a city.",
-                            "parameters": {
-                                "type": "object",
-                                "properties": {
-                                    "city": {
-                                        "type": "string",
-                                        "description": "The city name",
-                                    }
-                                },
-                                "required": ["city"],
-                            },
-                        },
-                    }
-                ],
-            },
-            (
-                [
-                    {
-                        "role": "user",
-                        "content": "What's the weather in Paris?",
-                    },
-                    {
-                        "role": "assistant",
-                        "content": None,
-                        "tool_calls": [
-                            {
-                                "id": "call123",
-                                "type": "function",
-                                "function": {
-                                    "name": "get_weather",
-                                    "arguments": '{"city": "Paris"}',
-                                },
-                            }
-                        ],
-                    },
-                    {
-                        "role": "tool",
-                        "content": [{"type": "text", "text": "Rainy"}],
-                        "name": "get_weather",
-                        "tool_call_id": "call123",
-                    },
-                ],
-                [
-                    {
-                        "type": "function",
-                        "function": {
-                            "name": "get_weather",
-                            "description": "Gets the current weather in a city.",
-                            "parameters": {
-                                "type": "object",
-                                "properties": {
-                                    "city": {
-                                        "type": "string",
-                                        "description": "The city name",
-                                    }
-                                },
-                                "required": ["city"],
-                            },
-                        },
-                    }
-                ],
-            ),
-        )
-    ],
-)
-def test_prepare_apply_chat_template_tools_and_messages_list_content(
-    openai_request, expected_mistral_output
-):
-    actual_request = _prepare_apply_chat_template_tools_and_messages(
-        openai_request["messages"], openai_request["tools"]
-    )
-    assert actual_request == expected_mistral_output
-
-
-def test_prepare_apply_chat_template_generation_prompt_and_continue():
+def test_validate_apply_chat_template_args():
+    # add_generation_prompt with assistant last message → error
     messages = [{"role": "assistant", "content": "Hello"}]
-    tools: list[dict[str, Any]] = []
     with pytest.raises(ValueError):
-        _prepare_apply_chat_template_tools_and_messages(
-            messages, tools, add_generation_prompt=True
-        )
+        _validate_apply_chat_template_args(messages, add_generation_prompt=True)
 
+    # add_generation_prompt with user last message → ok
     messages = [{"role": "user", "content": "Hello"}]
-    out_messages, _ = _prepare_apply_chat_template_tools_and_messages(
-        messages, tools, add_generation_prompt=True
-    )
-    assert out_messages == [{"role": "user", "content": "Hello"}]
+    _validate_apply_chat_template_args(messages, add_generation_prompt=True)
 
+    # both add_generation_prompt and continue_final_message → error
     with pytest.raises(ValueError):
-        _prepare_apply_chat_template_tools_and_messages(
-            messages, tools, add_generation_prompt=True, continue_final_message=True
+        _validate_apply_chat_template_args(
+            messages, add_generation_prompt=True, continue_final_message=True
         )
 
+    # continue_final_message with assistant last message → ok
     messages = [{"role": "assistant", "content": "Hello"}]
-    out_messages, _ = _prepare_apply_chat_template_tools_and_messages(
-        messages, tools, add_generation_prompt=False, continue_final_message=True
-    )
-    assert out_messages == [{"role": "assistant", "content": "Hello"}]
+    _validate_apply_chat_template_args(messages, continue_final_message=True)
 
+    # continue_final_message with user last message → error
     messages = [{"role": "user", "content": "Hello"}]
     with pytest.raises(ValueError):
-        _prepare_apply_chat_template_tools_and_messages(
-            messages, tools, add_generation_prompt=False, continue_final_message=True
-        )
+        _validate_apply_chat_template_args(messages, continue_final_message=True)
 
 
 @pytest.fixture(scope="module")
@@ -2407,3 +2091,146 @@ def test_convert_ids_to_tokens(
         assert actual_tokens == expected_tokens
 
         assert mistral_tokenizer.convert_ids_to_tokens([]) == []
+
+    def test_grammar_factory(self, mistral_tokenizer: MistralTokenizer) -> None:
+        # works in this case cause Mistral 7B is < v11 and SPM
+        if not mistral_tokenizer.is_tekken:
+            with pytest.raises(AttributeError):
+                mistral_tokenizer.grammar_factory  # noqa: B018
+            return
+        factory = mistral_tokenizer.grammar_factory
+        assert isinstance(factory, GrammarFactory)
+
+        # Test caching
+        factory_2 = mistral_tokenizer.grammar_factory
+        assert factory is factory_2
+
+    def test_llg_tokenizer(self, mistral_tokenizer: MistralTokenizer) -> None:
+        if not mistral_tokenizer.is_tekken:
+            with pytest.raises(ValueError):
+                mistral_tokenizer.llg_tokenizer  # noqa: B018
+            return
+
+        llg_tokenizer = mistral_tokenizer.llg_tokenizer
+        assert isinstance(llg_tokenizer, llguidance.LLTokenizer)
+
+        # Test caching
+        llg_tokenizer_2 = mistral_tokenizer.llg_tokenizer
+        assert llg_tokenizer is llg_tokenizer_2
+
+    @pytest.mark.parametrize(
+        "messages,tools,tekken_expected_substrings,spm_expected_substrings",
+        [
+            pytest.param(
+                [{"role": "user", "content": "Hello"}],
+                [{"type": "function", "function": {"name": "do_nothing"}}],
+                ["do_nothing", '"description": ""', '"parameters": {}'],
+                ["do_nothing", '"description":▁""', '"parameters":▁{}'],
+                id="tool_without_description_and_parameters",
+            ),
+            pytest.param(
+                [
+                    {"role": "user", "content": "Do nothing"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "123456789",
+                                "type": "function",
+                                "function": {
+                                    "name": "do_nothing",
+                                    "arguments": None,
+                                },
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": "123456789",
+                        "content": "done",
+                    },
+                ],
+                [{"type": "function", "function": {"name": "do_nothing"}}],
+                ["do_nothing"],
+                ["do_nothing"],
+                id="tool_call_with_none_arguments",
+            ),
+        ],
+    )
+    def test_apply_chat_template_tool_optional_fields(
+        self,
+        mistral_tokenizer: MistralTokenizer,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]],
+        tekken_expected_substrings: list[str],
+        spm_expected_substrings: list[str],
+    ) -> None:
+        output = mistral_tokenizer.apply_chat_template(
+            messages, tools=tools, add_generation_prompt=True
+        )
+        decoded = mistral_tokenizer.tokenizer.decode(output, SpecialTokenPolicy.KEEP)
+
+        expected = (
+            tekken_expected_substrings
+            if mistral_tokenizer.is_tekken
+            else spm_expected_substrings
+        )
+        for substring in expected:
+            assert substring in decoded
+
+    def test_apply_chat_template_tools_not_mutated(
+        self, mistral_tokenizer: MistralTokenizer
+    ) -> None:
+        messages: list[dict[str, Any]] = [
+            {"role": "user", "content": "Hello"},
+        ]
+        tools: list[dict[str, Any]] = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Gets weather.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {"type": "string"},
+                        },
+                    },
+                },
+            },
+        ]
+        original_tools = copy.deepcopy(tools)
+
+        mistral_tokenizer.apply_chat_template(
+            messages, tools=tools, add_generation_prompt=True
+        )
+
+        assert tools == original_tools
+
+    @pytest.mark.parametrize(
+        "reasoning_key",
+        ["reasoning", "reasoning_content"],
+    )
+    def test_apply_chat_template_reasoning_assistant(
+        self, mistral_tokenizer: MistralTokenizer, reasoning_key: str
+    ) -> None:
+        if not mistral_tokenizer.is_tekken:
+            pytest.skip("Reasoning tokens only supported on tekken tokenizers")
+
+        messages: list[dict[str, Any]] = [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "4",
+                reasoning_key: "2+2 equals 4",
+            },
+            {"role": "user", "content": "Are you sure?"},
+        ]
+
+        output = mistral_tokenizer.apply_chat_template(
+            messages, add_generation_prompt=True
+        )
+        decoded = mistral_tokenizer.tokenizer.decode(output, SpecialTokenPolicy.KEEP)
+
+        assert "[THINK]2+2 equals 4[/THINK]" in decoded
diff --git a/tests/tool_parsers/test_apertus_tool_parser.py b/tests/tool_parsers/test_apertus_tool_parser.py
new file mode 100644
index 000000000000..5ab0a2bcc732
--- /dev/null
+++ b/tests/tool_parsers/test_apertus_tool_parser.py
@@ -0,0 +1,433 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.tool_parsers.apertus_tool_parser import (
+    TOOL_CALLS_PREFIX,
+    TOOL_CALLS_SUFFIX,
+    ApertusToolParser,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def mock_tokenizer():
+    tokenizer = MagicMock()
+    tokenizer.encode.return_value = [1, 2, 3]
+    # Include the tool call tokens in the vocab for the parser
+    tokenizer.get_vocab.return_value = {TOOL_CALLS_PREFIX: 100, TOOL_CALLS_SUFFIX: 101}
+    return tokenizer
+
+
+@pytest.fixture
+def parser(mock_tokenizer):
+    return ApertusToolParser(mock_tokenizer)
+
+
+@pytest.fixture
+def mock_request():
+    request = MagicMock(spec=ChatCompletionRequest)
+    request.tools = []
+    request.tool_choice = "auto"
+    return request
+
+
+# ---------------------------------------------------------------------------
+# Non-streaming extraction tests
+# ---------------------------------------------------------------------------
+
+
+class TestExtractToolCalls:
+    def test_no_tool_calls(self, parser, mock_request):
+        model_output = "Hello, how can I help you today?"
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is False
+        assert result.tool_calls == []
+        assert result.content == model_output
+
+    def test_single_tool_call(self, parser, mock_request):
+        model_output = (
+            '<|tools_prefix|>[{"get_weather": {"location": "London"}}]<|tools_suffix|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "London"}
+
+    def test_multiple_arguments(self, parser, mock_request):
+        model_output = (
+            '<|tools_prefix|>[{"get_weather": '
+            '{"location": "San Francisco", '
+            '"unit": "celsius"}}]<|tools_suffix|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "San Francisco", "unit": "celsius"}
+
+    def test_text_before_tool_call(self, parser, mock_request):
+        model_output = (
+            "Let me check the weather for you. "
+            '<|tools_prefix|>[{"get_weather": {"location": "Paris"}}]<|tools_suffix|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert result.content == "Let me check the weather for you."
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+
+    def test_multiple_tool_calls(self, parser, mock_request):
+        model_output = (
+            '<|tools_prefix|>[{"get_weather": '
+            '{"location": "London"}}, '
+            '{"get_time": {"location": "London"}}]<|tools_suffix|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 2
+        assert result.tool_calls[0].function.name == "get_weather"
+        assert result.tool_calls[1].function.name == "get_time"
+
+    def test_nested_arguments(self, parser, mock_request):
+        model_output = (
+            '<|tools_prefix|>[{"complex_function": '
+            '{"nested": {"inner": "value"}, '
+            '"list": ["a", "b"]}}]<|tools_suffix|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "complex_function"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"nested": {"inner": "value"}, "list": ["a", "b"]}
+
+    def test_incomplete_tool_call(self, parser, mock_request):
+        model_output = '<|tools_prefix|>[{"get_weather": {"location": "London"}'
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "London"}
+
+    def test_missing_tool_suffix(self, parser, mock_request):
+        model_output = (
+            '<|tools_prefix|>[{"get_weather": '
+            '{"location": "San Francisco", "unit": "celsius"}}]'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "San Francisco", "unit": "celsius"}
+
+
+# ---------------------------------------------------------------------------
+# Streaming extraction tests
+# ---------------------------------------------------------------------------
+
+
+class TestStreamingExtraction:
+    def _simulate_streaming(
+        self, parser: ApertusToolParser, mock_request: Any, chunks: list[str]
+    ) -> list[tuple[Any, str]]:
+        results: list[tuple[Any, str]] = []
+        previous_text: str = ""
+        previous_token_ids: list[int] = []
+
+        for chunk in chunks:
+            current_text = previous_text + chunk
+            # Simulate a token ID sequence matching the chunk progression
+            delta_token_ids: list[int] = [0] * max(1, len(chunk) // 4)
+            current_token_ids = previous_token_ids + delta_token_ids
+
+            delta = parser.extract_tool_calls_streaming(
+                previous_text=previous_text,
+                current_text=current_text,
+                delta_text=chunk,
+                previous_token_ids=tuple(previous_token_ids),
+                current_token_ids=tuple(current_token_ids),
+                delta_token_ids=tuple(delta_token_ids),
+                request=mock_request,
+            )
+            results.append((delta, current_text))
+            previous_text = current_text
+            previous_token_ids = list(current_token_ids)
+
+        return results
+
+    def _collect_tool_calls(self, results) -> dict[int, dict[str, Any]]:
+        """Properly tracks and concatenates streamed tool arguments by their Index."""
+        tool_calls = {}
+        for delta, _ in results:
+            if not delta or not getattr(delta, "tool_calls", None):
+                continue
+
+            for tc in delta.tool_calls:
+                idx = (
+                    tc.get("index", 0)
+                    if isinstance(tc, dict)
+                    else getattr(tc, "index", 0)
+                )
+                func = (
+                    tc.get("function", {})
+                    if isinstance(tc, dict)
+                    else getattr(tc, "function", None)
+                )
+                if not func:
+                    continue
+
+                name = (
+                    func.get("name")
+                    if isinstance(func, dict)
+                    else getattr(func, "name", None)
+                )
+                args = (
+                    func.get("arguments")
+                    if isinstance(func, dict)
+                    else getattr(func, "arguments", None)
+                )
+
+                if idx not in tool_calls:
+                    tool_calls[idx] = {"name": "", "arguments": ""}
+
+                if name:
+                    tool_calls[idx]["name"] += name
+                if args:
+                    tool_calls[idx]["arguments"] += args
+
+        return tool_calls
+
+    def _collect_content(self, results) -> str:
+        """Collects generated normal text outside of the tool calls."""
+        return "".join(
+            delta.content
+            for delta, _ in results
+            if delta and getattr(delta, "content", None)
+        )
+
+    def test_basic_streaming_single_tool(self, parser, mock_request):
+        chunks = [
+            "<|tools_prefix|>",
+            '[{"get_weather": ',
+            '{"location": "Paris, ',
+            'France"}}]',
+            "<|tools_suffix|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        tcs = self._collect_tool_calls(results)
+
+        assert len(tcs) == 1
+        assert tcs[0]["name"] == "get_weather"
+        assert json.loads(tcs[0]["arguments"]) == {"location": "Paris, France"}
+
+    def test_streaming_missing_tool_suffix(self, parser, mock_request):
+        chunks = [
+            "<|tools_prefix|>",
+            '[{"get_weather": ',
+            '{"location": "Paris, ',
+            'France"}}]',
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        tcs = self._collect_tool_calls(results)
+
+        assert len(tcs) == 1
+        assert tcs[0]["name"] == "get_weather"
+        assert json.loads(tcs[0]["arguments"]) == {"location": "Paris, France"}
+
+    def test_streaming_partial_tag_buffering_missing_tool_suffix(
+        self, parser, mock_request
+    ):
+        chunks = ["Content", "<|tools_", "prefix|>", '[{"f": ', '{"a": 1}}]']
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        content = self._collect_content(results)
+
+        assert "Content" in content
+        assert "<|tools_prefix|>" not in content
+        assert "<|tools_suffix|>" not in content
+
+        tcs = self._collect_tool_calls(results)
+
+        assert len(tcs) == 1
+        assert tcs[0]["name"] == "f"
+        assert json.loads(tcs[0]["arguments"]) == {"a": 1}
+
+    def test_streaming_multi_tool(self, parser, mock_request):
+        chunks = [
+            "<|tools_prefix|>",
+            '[{"get_weather": {"location": "Tokyo"}}',
+            ', {"get_time": {"location": "Tokyo"}}]',
+            "<|tools_suffix|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        tcs = self._collect_tool_calls(results)
+
+        assert len(tcs) == 2
+        assert tcs[0]["name"] == "get_weather"
+        assert json.loads(tcs[0]["arguments"]) == {"location": "Tokyo"}
+        assert tcs[1]["name"] == "get_time"
+        assert json.loads(tcs[1]["arguments"]) == {"location": "Tokyo"}
+
+    def test_streaming_text_before_tool_call(self, parser, mock_request):
+        chunks = [
+            "Let me check ",
+            "the weather. ",
+            "<|tools_prefix|>",
+            '[{"get_weather": {"location": "London"}}]',
+            "<|tools_suffix|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        content = self._collect_content(results)
+
+        assert content.strip() == "Let me check the weather."
+        tcs = self._collect_tool_calls(results)
+
+        assert len(tcs) == 1
+        assert tcs[0]["name"] == "get_weather"
+        assert json.loads(tcs[0]["arguments"]) == {"location": "London"}
+
+    def test_streaming_partial_tag_buffering(self, parser, mock_request):
+        chunks = [
+            "Content",
+            "<|tools_",
+            "prefix|>",
+            '[{"f": {"a": 1}}]',
+            "<|tools_suf",
+            "fix|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        content = self._collect_content(results)
+
+        assert "Content" in content
+        assert "<|tools_prefix|>" not in content
+        assert "<|tools_suffix|>" not in content
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 1
+        assert tc[0]["name"] == "f"
+        assert json.loads(tc[0]["arguments"]) == {"a": 1}
+
+    # ---------------------------------------------------------------------------
+    # Edge Cases: Multi-Token Prediction (MTP) & vLLM Chunking Anomalies
+    # ---------------------------------------------------------------------------
+
+    def test_mtp_streaming_massive_chunk(self, parser, mock_request):
+        """Simulates MTP predicting text, tool calls,
+        and trailing text all in a single chunk."""
+        chunks = [
+            "Sure! "
+            '<|tools_prefix|>[{"get_weather": {"location": "London"}}]<|tools_suffix|>'
+        ]
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        content = self._collect_content(results)
+        assert "Sure! " in content
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 1
+        assert tc[0]["name"] == "get_weather"
+        assert json.loads(tc[0]["arguments"]) == {"location": "London"}
+
+    def test_mtp_streaming_multiple_tools_burst(self, parser, mock_request):
+        """Simulates MTP predicting an array of multiple tools in one single chunk."""
+        chunks = [
+            '<|tools_prefix|>[{"get_weather": '
+            '{"location": "London"}}, '
+            '{"get_time": {"location": "Paris"}}]<|tools_suffix|>'
+        ]
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 2
+        assert tc[0]["name"] == "get_weather"
+        assert json.loads(tc[0]["arguments"]) == {"location": "London"}
+        assert tc[1]["name"] == "get_time"
+        assert json.loads(tc[1]["arguments"]) == {"location": "Paris"}
+
+    def test_mtp_streaming_skip_and_catch_up(self, parser, mock_request):
+        """Simulates MTP chunks that jump over entire tools
+        (e.g., from middle of tool 1 to middle of tool 3)."""
+        chunks = [
+            '<|tools_prefix|>[{"t1": {"a": 1}',
+            '}, {"t2": {"b": 2}}, {"t3": {"c": 3',
+            "}}]<|tools_suffix|>",
+        ]
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 3
+        assert tc[0]["name"] == "t1"
+        assert json.loads(tc[0]["arguments"]) == {"a": 1}
+        assert tc[1]["name"] == "t2"
+        assert json.loads(tc[1]["arguments"]) == {"b": 2}
+        assert tc[2]["name"] == "t3"
+        assert json.loads(tc[2]["arguments"]) == {"c": 3}
+
+    def test_vllm_streaming_character_by_character(self, parser, mock_request):
+        """Simulates worst-case vLLM fragmentation where
+        chunks arrive character-by-character."""
+        text = (
+            'Hi <|tools_prefix|>[{"get_weather": '
+            '{"location": "London"}}]<|tools_suffix|> '
+        )
+        chunks = list(text)
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        content = self._collect_content(results)
+        assert "Hi" in content
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 1
+        assert tc[0]["name"] == "get_weather"
+        assert json.loads(tc[0]["arguments"]) == {"location": "London"}
+
+    def test_vllm_streaming_empty_deltas(self, parser, mock_request):
+        """Simulates vLLM stream producing empty string chunks
+        (e.g., hidden tokens or artifacts)."""
+        chunks = [
+            "Wait",
+            "",
+            "<|tools_prefix|>",
+            "",
+            '[{"get_weather": ',
+            "",
+            '{"location": "London"}}]',
+            "<|tools_suffix|>",
+        ]
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        content = self._collect_content(results)
+        assert content == "Wait"
+
+        tc = self._collect_tool_calls(results)
+        assert len(tc) == 1
+        assert tc[0]["name"] == "get_weather"
+        assert json.loads(tc[0]["arguments"]) == {"location": "London"}
diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index 14462da5b9cb..5929be09fa3f 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -11,6 +11,11 @@
 
 import pytest
 
+from tests.tool_parsers.utils import run_tool_extraction_streaming
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionToolsParam,
+    FunctionDefinition,
+)
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
 # ---------------------------------------------------------------------------
@@ -21,10 +26,11 @@
 # tokenizer object to be truthy (the parser checks `if not self.model_tokenizer`).
 MOCK_TOKENIZER = MagicMock()
 MOCK_TOKENIZER.get_vocab.return_value = {}
+MOCK_TOKENIZER.tokenize.return_value = []
 
 
-def make_parser() -> DeepSeekV32ToolParser:
-    return DeepSeekV32ToolParser(MOCK_TOKENIZER)
+def make_parser(tools=None) -> DeepSeekV32ToolParser:
+    return DeepSeekV32ToolParser(MOCK_TOKENIZER, tools=tools)
 
 
 def make_tool_param(name: str, params: dict) -> MagicMock:
@@ -58,58 +64,6 @@ def build_tool_call(func_name: str, params: dict[str, str]) -> str:
     return f'{FC_START}\n{INV_START}{func_name}">\n{param_strs}\n{INV_END}\n{FC_END}'
 
 
-# ---------------------------------------------------------------------------
-# Tests: DeepSeekV32ToolParser._convert_param_value
-# ---------------------------------------------------------------------------
-
-
-class TestConvertParamValue:
-    @pytest.fixture
-    def parser(self):
-        return make_parser()
-
-    def test_null(self, parser):
-        assert parser._convert_param_value("null", "string") is None
-        assert parser._convert_param_value("NULL", "integer") is None
-
-    def test_string(self, parser):
-        assert parser._convert_param_value("hello", "string") == "hello"
-
-    def test_integer_valid(self, parser):
-        assert parser._convert_param_value("42", "integer") == 42
-
-    def test_integer_invalid_falls_back_to_str(self, parser):
-        assert parser._convert_param_value("abc", "int") == "abc"
-
-    def test_number_float(self, parser):
-        assert parser._convert_param_value("3.14", "number") == pytest.approx(3.14)
-
-    def test_number_whole_returns_int(self, parser):
-        assert parser._convert_param_value("5.0", "number") == 5
-        assert isinstance(parser._convert_param_value("5.0", "number"), int)
-
-    def test_boolean_true(self, parser):
-        assert parser._convert_param_value("true", "boolean") is True
-        assert parser._convert_param_value("1", "bool") is True
-
-    def test_boolean_false(self, parser):
-        assert parser._convert_param_value("false", "boolean") is False
-        assert parser._convert_param_value("False", "bool") is False
-
-    def test_object_valid_json(self, parser):
-        assert parser._convert_param_value('{"k": 1}', "object") == {"k": 1}
-
-    def test_object_invalid_json_falls_back(self, parser):
-        assert parser._convert_param_value("not-json", "object") == "not-json"
-
-    def test_array_valid_json(self, parser):
-        assert parser._convert_param_value("[1, 2]", "array") == [1, 2]
-
-    def test_unknown_type_tries_json_then_string(self, parser):
-        assert parser._convert_param_value("123", "unknown") == 123
-        assert parser._convert_param_value("hello", "unknown") == "hello"
-
-
 # ---------------------------------------------------------------------------
 # Tests: extract_tool_calls (non-streaming)
 # ---------------------------------------------------------------------------
@@ -181,6 +135,399 @@ def test_multiple_tools(self, parser):
             "location": "NYC"
         }
 
+    def test_type_conversion_in_non_streaming(self):
+        """Non-streaming extraction must convert params using the tool schema."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="toggle",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "enabled": {"type": "boolean"},
+                        "count": {"type": "integer"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}toggle">\n'
+            f'{PARAM_START}enabled" string="false">true{PARAM_END}\n'
+            f'{PARAM_START}count" string="false">42{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        assert len(result.tool_calls) == 1
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"enabled": True, "count": 42}
+        assert isinstance(args["enabled"], bool)
+        assert isinstance(args["count"], int)
+
+    def test_string_attr_true_preserves_literal_despite_schema(self):
+        """string="true" must keep the value as a string even
+        if the schema says integer."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="score",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "value": {"type": "integer"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}score">\n'
+            f'{PARAM_START}value" string="true">42{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"value": "42"}
+        assert isinstance(args["value"], str)
+
+    def test_string_attr_false_allows_schema_conversion(self):
+        """string="false" allows the parser to convert via the tool schema."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="score",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "value": {"type": "integer"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}score">\n'
+            f'{PARAM_START}value" string="false">42{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"value": 42}
+        assert isinstance(args["value"], int)
+
+    @pytest.mark.skip_global_cleanup
+    def test_composed_schema_converts_object_and_array_params(self):
+        """Composed JSON Schema types must still drive DSML type coercion."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="set_timer",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "wait": {
+                            "anyOf": [
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {"const": "until"},
+                                        "date": {"type": "string"},
+                                    },
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {"const": "for"},
+                                        "minutes": {"type": "number"},
+                                    },
+                                },
+                            ],
+                        },
+                        "patches": {
+                            "oneOf": [
+                                {"type": "array", "items": {"type": "object"}},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}set_timer">\n'
+            f'{PARAM_START}wait" string="false">'
+            f'{{"type":"for","minutes":2880}}'
+            f"{PARAM_END}\n"
+            f'{PARAM_START}patches" string="false">'
+            f'[{{"op":"replace","path":"/schedule","value":"quiet"}}]'
+            f"{PARAM_END}\n"
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {
+            "wait": {"type": "for", "minutes": 2880},
+            "patches": [{"op": "replace", "path": "/schedule", "value": "quiet"}],
+        }
+        assert isinstance(args["wait"], dict)
+        assert isinstance(args["patches"], list)
+
+    @pytest.mark.skip_global_cleanup
+    def test_string_attr_true_preserves_literal_for_composed_schema(self):
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="set_timer",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "wait": {
+                            "anyOf": [
+                                {"type": "object"},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}set_timer">\n'
+            f'{PARAM_START}wait" string="true">'
+            f'{{"type":"for","minutes":2880}}'
+            f"{PARAM_END}\n"
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"wait": '{"type":"for","minutes":2880}'}
+
+    def test_arguments_wrapper_repaired(self):
+        """A single 'arguments' wrapper parameter must be unwrapped when it
+        is not part of the tool schema and the inner object matches schema fields."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {"type": "string"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}get_weather">\n'
+            f'{PARAM_START}arguments" string="false">'
+            f'{{"location":"Beijing"}}'
+            f"{PARAM_END}\n"
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "Beijing"}
+
+    def test_input_wrapper_repaired(self):
+        """A single 'input' wrapper parameter must be unwrapped similarly."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {"type": "string"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}get_weather">\n'
+            f'{PARAM_START}input" string="true">'
+            f'{{"location":"Beijing"}}'
+            f"{PARAM_END}\n"
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "Beijing"}
+
+    def test_object_and_array_params(self):
+        """Object and array schema types must be JSON-parsed, not left as strings."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="update",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "tags": {"type": "array"},
+                        "meta": {"type": "object"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}update">\n'
+            f'{PARAM_START}tags" string="false">["a", "b"]{PARAM_END}\n'
+            f'{PARAM_START}meta" string="false">{{"k": 1}}{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["tags"] == ["a", "b"]
+        assert isinstance(args["tags"], list)
+        assert args["meta"] == {"k": 1}
+        assert isinstance(args["meta"], dict)
+
+    def test_number_param(self):
+        """Number (float) schema type must be converted correctly."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="measure",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "ratio": {"type": "number"},
+                        "whole": {"type": "number"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}measure">\n'
+            f'{PARAM_START}ratio" string="false">3.14{PARAM_END}\n'
+            f'{PARAM_START}whole" string="false">5.0{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["ratio"] == pytest.approx(3.14)
+        assert args["whole"] == 5
+        assert isinstance(args["whole"], int)
+
+    def test_multi_typed_schema(self):
+        """Schema with type: ["integer", "null"] must handle both cases."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="set_val",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "count": {"type": ["integer", "null"]},
+                        "label": {"type": ["string", "null"]},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}set_val">\n'
+            f'{PARAM_START}count" string="false">42{PARAM_END}\n'
+            f'{PARAM_START}label" string="false">hello{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["count"] == 42
+        assert isinstance(args["count"], int)
+        assert args["label"] == "hello"
+
+    def test_multi_typed_null_value(self):
+        """Literal 'null' must become None when the schema includes 'null'."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="clear",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "value": {"type": ["integer", "null"]},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}clear">\n'
+            f'{PARAM_START}value" string="false">null{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["value"] is None
+
+    def test_null_not_coerced_without_null_in_schema(self):
+        """Literal 'null' must stay as a string when the schema is just 'string'."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="echo",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "text": {"type": "string"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}echo">\n'
+            f'{PARAM_START}text" string="false">null{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["text"] == "null"
+        assert isinstance(args["text"], str)
+
+    def test_no_schema_keeps_strings(self):
+        """Without a tool schema, all string='false' params default to string."""
+        parser = make_parser(tools=None)
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}unknown_fn">\n'
+            f'{PARAM_START}count" string="false">42{PARAM_END}\n'
+            f'{PARAM_START}flag" string="false">true{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["count"] == "42"
+        assert args["flag"] == "true"
+
 
 # ---------------------------------------------------------------------------
 # Tests: extract_tool_calls_streaming
@@ -274,23 +621,103 @@ def test_content_before_tool_call_streaming(self, parser):
         content = "".join(d.content for d in deltas if d.content is not None)
         assert "Thinking" in content
 
-    def test_type_conversion_in_streaming(self, parser):
-        tool = make_tool_param(
-            "add",
-            {
-                "type": "object",
-                "properties": {
-                    "x": {"type": "integer"},
-                    "y": {"type": "integer"},
+    def test_type_conversion_in_streaming(self):
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="add",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "x": {"type": "integer"},
+                        "y": {"type": "integer"},
+                    },
                 },
-            },
+            ),
         )
-        request = make_request(tools=[tool])
-        full_text = build_tool_call("add", {"x": "3", "y": "4"})
-        deltas = self._stream(parser, full_text, request=request)
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}add">\n'
+            f'{PARAM_START}x" string="false">3{PARAM_END}\n'
+            f'{PARAM_START}y" string="false">4{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
         args_str = self._reconstruct_args(deltas)
         assert json.loads(args_str) == {"x": 3, "y": 4}
 
+    def test_string_attr_true_preserves_literal_in_streaming(self):
+        """Streaming: string='true' must keep the value literal despite schema."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="score",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "value": {"type": "integer"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}score">\n'
+            f'{PARAM_START}value" string="true">42{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args_str = self._reconstruct_args(deltas)
+        args = json.loads(args_str)
+        assert args == {"value": "42"}
+        assert isinstance(args["value"], str)
+
+    @pytest.mark.skip_global_cleanup
+    def test_composed_schema_conversion_in_streaming(self):
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="set_timer",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "wait": {
+                            "anyOf": [
+                                {"type": "object"},
+                                {"type": "null"},
+                            ],
+                        },
+                        "patches": {
+                            "oneOf": [
+                                {"type": "array", "items": {"type": "object"}},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}set_timer">\n'
+            f'{PARAM_START}wait" string="false">'
+            f'{{"type":"for","minutes":2880}}'
+            f"{PARAM_END}\n"
+            f'{PARAM_START}patches" string="false">'
+            f'[{{"op":"replace","path":"/schedule","value":"quiet"}}]'
+            f"{PARAM_END}\n"
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args = json.loads(self._reconstruct_args(deltas))
+        assert args == {
+            "wait": {"type": "for", "minutes": 2880},
+            "patches": [{"op": "replace", "path": "/schedule", "value": "quiet"}],
+        }
+
     def test_multiple_tools_streaming(self, parser):
         full_text = (
             f"{FC_START}\n"
@@ -474,3 +901,241 @@ def test_no_emission_while_incomplete(self, parser):
         deltas = self._stream(parser, partial_text)
         # Should have no tool call deltas yet
         assert all(not d.tool_calls for d in deltas)
+
+    def test_no_marker_leak_chunked(self, parser):
+        """Chunked streaming must NOT leak DSML start-marker fragments
+        as content (GitHub #40801)."""
+        full_text = build_tool_call("fn", {"k": "v"})
+        deltas = self._stream_chunked(parser, full_text, chunk_size=5)
+        content = "".join(d.content for d in deltas if d.content is not None)
+        assert content == ""
+        args_str = self._reconstruct_args(deltas)
+        assert json.loads(args_str) == {"k": "v"}
+
+    def test_no_marker_leak_with_prefix_chunked(self, parser):
+        """Content before a tool call must not include start-marker
+        fragments when chunked (GitHub #40801)."""
+        full_text = "Hello!" + build_tool_call("fn", {"a": "b"})
+        deltas = self._stream_chunked(parser, full_text, chunk_size=5)
+        content = "".join(d.content for d in deltas if d.content is not None)
+        assert content == "Hello!"
+        assert "DSML" not in content
+        assert "<｜" not in content
+        args_str = self._reconstruct_args(deltas)
+        assert json.loads(args_str) == {"a": "b"}
+
+    def test_no_marker_leak_char_by_char(self, parser):
+        """Character-by-character streaming must not leak marker
+        fragments (GitHub #40801)."""
+        full_text = build_tool_call("fn", {"k": "v"})
+        deltas = self._stream_chunked(parser, full_text, chunk_size=1)
+        content = "".join(d.content for d in deltas if d.content is not None)
+        assert content == ""
+        args_str = self._reconstruct_args(deltas)
+        assert json.loads(args_str) == {"k": "v"}
+
+    def test_no_marker_leak_all_split_points(self, parser):
+        """Start token split at every possible boundary must not
+        leak (GitHub #40801)."""
+        for chunk_size in range(1, len(FC_START) + 2):
+            p = make_parser()
+            full_text = build_tool_call("fn", {"k": "v"})
+            deltas = self._stream_chunked(p, full_text, chunk_size=chunk_size)
+            content = "".join(d.content for d in deltas if d.content is not None)
+            assert content == "", (
+                f"Leaked content {content!r} at chunk_size={chunk_size}"
+            )
+
+    def test_false_partial_marker_emitted(self, parser):
+        """Text ending with a prefix of the start token that turns out
+        NOT to be a marker must still be emitted as content."""
+        full_text = "<｜DSM some regular text"
+        deltas = self._stream_chunked(parser, full_text, chunk_size=3)
+        content = "".join(d.content for d in deltas if d.content is not None)
+        assert content == full_text
+
+    def test_object_and_array_params_streaming(self):
+        """Streaming: object/array params must be JSON-parsed."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="update",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "tags": {"type": "array"},
+                        "meta": {"type": "object"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}update">\n'
+            f'{PARAM_START}tags" string="false">["a", "b"]{PARAM_END}\n'
+            f'{PARAM_START}meta" string="false">{{"k": 1}}{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args = json.loads(self._reconstruct_args(deltas))
+        assert args["tags"] == ["a", "b"]
+        assert args["meta"] == {"k": 1}
+
+    def test_multi_typed_schema_streaming(self):
+        """Streaming: type: ["integer", "null"] must coerce correctly."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="set_val",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "count": {"type": ["integer", "null"]},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}set_val">\n'
+            f'{PARAM_START}count" string="false">42{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args = json.loads(self._reconstruct_args(deltas))
+        assert args["count"] == 42
+        assert isinstance(args["count"], int)
+
+    def test_multi_typed_null_streaming(self):
+        """Streaming: 'null' with ["integer", "null"] schema must become None."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="clear",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "value": {"type": ["integer", "null"]},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}clear">\n'
+            f'{PARAM_START}value" string="false">null{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args = json.loads(self._reconstruct_args(deltas))
+        assert args["value"] is None
+
+    def test_number_param_streaming(self):
+        """Streaming: number type must be converted."""
+        tool = ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="measure",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "ratio": {"type": "number"},
+                    },
+                },
+            ),
+        )
+        parser = make_parser(tools=[tool])
+        full_text = (
+            f"{FC_START}\n"
+            f'{INV_START}measure">\n'
+            f'{PARAM_START}ratio" string="false">3.14{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+        deltas = self._stream(parser, full_text)
+        args = json.loads(self._reconstruct_args(deltas))
+        assert args["ratio"] == pytest.approx(3.14)
+
+
+class TestDelimiterPreservation:
+    """Regression: fast detokenization skipping DSML delimiters (PR #33964)."""
+
+    @pytest.fixture
+    def parser(self):
+        return make_parser()
+
+    def test_delimiter_preserved_fast_detokenization(self, parser):
+        """DSML delimiters as literal text must still be detected."""
+        # Delimiters appear as regular text (fast detokenization scenario).
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}get_weather">\n'
+            f'{PARAM_START}location" string="true">Tokyo{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+
+        # Non-streaming: parser must detect the tool call
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        assert json.loads(result.tool_calls[0].function.arguments) == {
+            "location": "Tokyo"
+        }
+
+        assert result.content is None
+
+        # With content prefix
+        prefixed_output = "Here is the weather: " + model_output
+        result2 = parser.extract_tool_calls(prefixed_output, None)
+        assert result2.tools_called
+        assert result2.content == "Here is the weather: "
+
+    def test_tool_detection_skip_special_tokens_false(self, parser):
+        """Regression: skip_special_tokens must be False when tools are enabled."""
+        # adjust_request must set skip_special_tokens=False
+        tool = make_tool_param(
+            "search",
+            {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string"},
+                },
+            },
+        )
+        request = make_request(tools=[tool])
+        request.tool_choice = "auto"
+        adjusted = parser.adjust_request(request)
+        assert adjusted.skip_special_tokens is False
+
+        full_text = build_tool_call("search", {"query": "vllm documentation"})
+
+        # Non-streaming extraction
+        non_stream_result = parser.extract_tool_calls(full_text, request)
+        assert non_stream_result.tools_called
+        assert len(non_stream_result.tool_calls) == 1
+        assert non_stream_result.tool_calls[0].function.name == "search"
+        ns_args = json.loads(non_stream_result.tool_calls[0].function.arguments)
+        assert ns_args == {"query": "vllm documentation"}
+
+        # Streaming extraction: drive the parser line-by-line
+        chunks: list[str] = []
+        remaining = full_text
+        while remaining:
+            nl = remaining.find("\n")
+            if nl == -1:
+                chunks.append(remaining)
+                break
+            chunks.append(remaining[: nl + 1])
+            remaining = remaining[nl + 1 :]
+
+        reconstructor = run_tool_extraction_streaming(
+            parser, chunks, request, assert_one_tool_per_delta=False
+        )
+        assert len(reconstructor.tool_calls) == 1
+        assert reconstructor.tool_calls[0].function.name == "search"
+        streamed_args = json.loads(reconstructor.tool_calls[0].function.arguments)
+        assert streamed_args == ns_args
diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
new file mode 100644
index 000000000000..2394575bc581
--- /dev/null
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Unit tests for DeepSeekV4ToolParser."""
+
+import json
+from unittest.mock import MagicMock
+
+import pytest
+from xgrammar import StructuralTag
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedFunction,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
+from vllm.tool_parsers import ToolParserManager
+from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser
+
+MOCK_TOKENIZER = MagicMock()
+MOCK_TOKENIZER.get_vocab.return_value = {}
+
+TC_START = "<｜DSML｜tool_calls>"
+TC_END = "</｜DSML｜tool_calls>"
+INV_START = '<｜DSML｜invoke name="'
+INV_END = "</｜DSML｜invoke>"
+PARAM_START = '<｜DSML｜parameter name="'
+PARAM_END = "</｜DSML｜parameter>"
+
+
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionToolsParam]:
+    return [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city name"},
+                        "state": {"type": "string", "description": "The state code"},
+                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+                    },
+                    "required": ["city", "state"],
+                },
+            },
+        ),
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "calculate_area",
+                "description": "Calculate area of a shape",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "shape": {"type": "string"},
+                        "dimensions": {"type": "object"},
+                        "precision": {"type": "integer"},
+                    },
+                },
+            },
+        ),
+    ]
+
+
+def make_parser(tools=None) -> DeepSeekV4ToolParser:
+    return DeepSeekV4ToolParser(MOCK_TOKENIZER, tools=tools)
+
+
+def make_request(tools=None) -> MagicMock:
+    req = MagicMock()
+    req.tools = tools
+    return req
+
+
+def build_tool_call(func_name: str, params: dict[str, str]) -> str:
+    param_strs = "".join(
+        f'{PARAM_START}{k}" string="true">{v}{PARAM_END}\n' for k, v in params.items()
+    )
+    return f'{TC_START}\n{INV_START}{func_name}">\n{param_strs}{INV_END}\n{TC_END}'
+
+
+def stream(parser: DeepSeekV4ToolParser, full_text: str, chunk_size: int = 7):
+    deltas = []
+    previous_text = ""
+    for start in range(0, len(full_text), chunk_size):
+        delta_text = full_text[start : start + chunk_size]
+        current_text = previous_text + delta_text
+        delta = parser.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[1],
+            request=make_request(),
+        )
+        previous_text = current_text
+        if delta is not None:
+            deltas.append(delta)
+    return deltas
+
+
+def reconstruct_args(deltas, tool_index: int = 0) -> str:
+    fragments = []
+    for delta in deltas:
+        if delta.tool_calls:
+            for tool_call in delta.tool_calls:
+                if (
+                    tool_call.index == tool_index
+                    and tool_call.function
+                    and tool_call.function.arguments
+                ):
+                    fragments.append(tool_call.function.arguments)
+    return "".join(fragments)
+
+
+def test_registered():
+    assert ToolParserManager.get_tool_parser("deepseek_v4") is DeepSeekV4ToolParser
+
+
+def test_extract_tool_calls():
+    parser = make_parser()
+    model_output = "Let me check. " + build_tool_call(
+        "get_weather", {"location": "Beijing", "unit": "celsius"}
+    )
+
+    result = parser.extract_tool_calls(model_output, make_request())
+
+    assert result.tools_called
+    assert result.content == "Let me check. "
+    assert len(result.tool_calls) == 1
+    tool_call = result.tool_calls[0]
+    assert tool_call.function.name == "get_weather"
+    assert json.loads(tool_call.function.arguments) == {
+        "location": "Beijing",
+        "unit": "celsius",
+    }
+
+
+def test_function_calls_block_is_not_accepted():
+    parser = make_parser()
+    model_output = build_tool_call("search", {"query": "vllm"}).replace(
+        "tool_calls", "function_calls"
+    )
+
+    result = parser.extract_tool_calls(model_output, make_request())
+
+    assert not result.tools_called
+    assert result.content == model_output
+
+
+def test_streaming_extracts_complete_invokes():
+    parser = make_parser()
+    full_text = build_tool_call("search", {"query": "deepseek v4"})
+
+    deltas = stream(parser, full_text, chunk_size=5)
+
+    names = [
+        tool_call.function.name
+        for delta in deltas
+        if delta.tool_calls
+        for tool_call in delta.tool_calls
+    ]
+    assert names == ["search"]
+    assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"}
+
+
+def test_get_vllm_registry_structural_tag_returns_structural_tag(
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    parser = make_parser()
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    if sample_tools:
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+        )
+        req.tool_choice = ChatCompletionNamedToolChoiceParam(
+            function=ChatCompletionNamedFunction(name=tool.function.name)
+        )
+        tag = parser.get_structural_tag(req)
+        assert isinstance(tag, StructuralTag)
+
+
+def test_extract_tool_calls_arguments_wrapper():
+    mock_tokenizer = MagicMock()
+    mock_tokenizer.get_vocab.return_value = {}
+
+    tool = ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "get_weather",
+            "parameters": {
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+            },
+        },
+    )
+
+    parser = DeepSeekV4ToolParser(mock_tokenizer, tools=[tool])
+    request = MagicMock()
+    request.tools = [tool]
+
+    model_output = (
+        f"{TC_START}"
+        f'{INV_START}get_weather">'
+        f'{PARAM_START}arguments" string="false">{{"location":"Beijing"}}{PARAM_END}'
+        f"{INV_END}"
+        f"{TC_END}"
+    )
+
+    result = parser.extract_tool_calls(model_output, request)
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args == {"location": "Beijing"}
+
+
+@pytest.mark.skip_global_cleanup
+def test_composed_schema_converts_object_and_array_params():
+    tool = ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "set_timer",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "wait": {
+                        "anyOf": [
+                            {"type": "object"},
+                            {"type": "null"},
+                        ],
+                    },
+                    "patches": {
+                        "allOf": [
+                            {"type": "array", "items": {"type": "object"}},
+                        ],
+                    },
+                },
+            },
+        },
+    )
+    parser = make_parser(tools=[tool])
+    request = make_request(tools=[tool])
+    model_output = (
+        f"{TC_START}\n"
+        f'{INV_START}set_timer">\n'
+        f'{PARAM_START}wait" string="false">'
+        f'{{"type":"for","minutes":2880}}'
+        f"{PARAM_END}\n"
+        f'{PARAM_START}patches" string="false">'
+        f'[{{"op":"replace","path":"/schedule","value":"quiet"}}]'
+        f"{PARAM_END}\n"
+        f"{INV_END}\n"
+        f"{TC_END}"
+    )
+
+    result = parser.extract_tool_calls(model_output, request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args == {
+        "wait": {"type": "for", "minutes": 2880},
+        "patches": [{"op": "replace", "path": "/schedule", "value": "quiet"}],
+    }
diff --git a/tests/tool_parsers/test_ernie45_moe_tool_parser.py b/tests/tool_parsers/test_ernie45_moe_tool_parser.py
index a00e43894767..ffee62441c97 100644
--- a/tests/tool_parsers/test_ernie45_moe_tool_parser.py
+++ b/tests/tool_parsers/test_ernie45_moe_tool_parser.py
@@ -328,7 +328,7 @@ def test_extract_tool_calls_streaming_incremental(
     expected_content,
 ):
     """Verify the Ernie45 Parser streaming behavior by verifying each chunk is as expected."""  # noqa: E501
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=[])
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     tool_calls_dict = {}
     for delta_message in stream_delta_message_generator(
diff --git a/tests/tool_parsers/test_gemma4_tool_parser.py b/tests/tool_parsers/test_gemma4_tool_parser.py
new file mode 100644
index 000000000000..6f3709e19a45
--- /dev/null
+++ b/tests/tool_parsers/test_gemma4_tool_parser.py
@@ -0,0 +1,730 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.tool_parsers.gemma4_tool_parser import (
+    TOOL_CALL_END,
+    TOOL_CALL_START,
+    Gemma4ToolParser,
+    _parse_gemma4_args,
+    _parse_gemma4_array,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def mock_tokenizer():
+    tokenizer = MagicMock()
+    tokenizer.encode.return_value = [1, 2, 3]
+    # Include the tool call start token in the vocab for the parser
+    tokenizer.get_vocab.return_value = {TOOL_CALL_START: 48, TOOL_CALL_END: 49}
+    return tokenizer
+
+
+@pytest.fixture
+def parser(mock_tokenizer):
+    return Gemma4ToolParser(mock_tokenizer)
+
+
+@pytest.fixture
+def mock_request():
+    request = MagicMock(spec=ChatCompletionRequest)
+    request.tools = []
+    request.tool_choice = "auto"
+    return request
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for _parse_gemma4_args (shared parser logic)
+# ---------------------------------------------------------------------------
+
+
+class TestParseGemma4Args:
+    def test_empty_string(self):
+        assert _parse_gemma4_args("") == {}
+
+    def test_whitespace_only(self):
+        assert _parse_gemma4_args("   ") == {}
+
+    def test_single_string_value(self):
+        result = _parse_gemma4_args('location:<|"|>Paris<|"|>')
+        assert result == {"location": "Paris"}
+
+    def test_string_value_with_comma(self):
+        result = _parse_gemma4_args('location:<|"|>Paris, France<|"|>')
+        assert result == {"location": "Paris, France"}
+
+    def test_multiple_string_values(self):
+        result = _parse_gemma4_args(
+            'location:<|"|>San Francisco<|"|>,unit:<|"|>celsius<|"|>'
+        )
+        assert result == {"location": "San Francisco", "unit": "celsius"}
+
+    def test_integer_value(self):
+        result = _parse_gemma4_args("count:42")
+        assert result == {"count": 42}
+
+    def test_float_value(self):
+        result = _parse_gemma4_args("score:3.14")
+        assert result == {"score": 3.14}
+
+    def test_boolean_true(self):
+        result = _parse_gemma4_args("flag:true")
+        assert result == {"flag": True}
+
+    def test_boolean_false(self):
+        result = _parse_gemma4_args("flag:false")
+        assert result == {"flag": False}
+
+    def test_null_value(self):
+        # Bare `null` must parse as None (Python), not the string "null".
+        # Without this, tool_choice=auto would emit `{"param": "null"}`
+        # instead of `{"param": null}` for nullable tool parameters.
+        result = _parse_gemma4_args("param:null")
+        assert result == {"param": None}
+        assert json.dumps(result) == '{"param": null}'
+
+    def test_mixed_types(self):
+        result = _parse_gemma4_args(
+            'name:<|"|>test<|"|>,count:42,active:true,score:3.14'
+        )
+        assert result == {
+            "name": "test",
+            "count": 42,
+            "active": True,
+            "score": 3.14,
+        }
+
+    def test_nested_object(self):
+        result = _parse_gemma4_args('nested:{inner:<|"|>value<|"|>}')
+        assert result == {"nested": {"inner": "value"}}
+
+    def test_array_of_strings(self):
+        result = _parse_gemma4_args('items:[<|"|>a<|"|>,<|"|>b<|"|>]')
+        assert result == {"items": ["a", "b"]}
+
+    def test_unterminated_string(self):
+        """Unterminated strings should take everything after the delimiter."""
+        result = _parse_gemma4_args('key:<|"|>unterminated')
+        assert result == {"key": "unterminated"}
+
+    def test_empty_value(self):
+        """Key with no value after colon."""
+        result = _parse_gemma4_args("key:")
+        assert result == {"key": ""}
+
+    def test_empty_value_partial_withheld(self):
+        """Key with no value is withheld in partial mode to avoid premature emission."""
+        result = _parse_gemma4_args("key:", partial=True)
+        assert result == {}
+        # also with a space after the colon
+        result = _parse_gemma4_args("key: ", partial=True)
+        assert result == {}
+
+    def test_empty_value_after_other_keys_partial_withheld(self):
+        """Trailing key with no value is withheld; earlier keys are kept."""
+        result = _parse_gemma4_args('name:<|"|>test<|"|>,flag:', partial=True)
+        assert result == {"name": "test"}
+
+    def test_trailing_dot_float_partial_withheld(self):
+        """Bare float ending with '.' is withheld in partial mode.
+
+        Regression test for #42047: float("108.") → 108.0 causes
+        streaming diff corruption (108.0 → 108.2 becomes 108.02).
+        """
+        # Single key with trailing dot — withheld entirely
+        result = _parse_gemma4_args("left:108.,right:22.8", partial=True)
+        assert result == {}
+
+        # Stable key before trailing-dot key — stable key is kept
+        result = _parse_gemma4_args(
+            'name:<|"|>test<|"|>,score:3.,count:1', partial=True
+        )
+        assert result == {"name": "test"}
+
+        # Non-partial mode parses trailing dot normally
+        result = _parse_gemma4_args("left:108.,right:22.8", partial=False)
+        assert result == {"left": 108.0, "right": 22.8}
+
+    @pytest.mark.timeout(5)
+    def test_malformed_partial_array(self):
+        result = _parse_gemma4_args(":[t:[]")
+        assert isinstance(result, dict)
+
+
+class TestParseGemma4Array:
+    def test_string_array(self):
+        result = _parse_gemma4_array('<|"|>a<|"|>,<|"|>b<|"|>')
+        assert result == ["a", "b"]
+
+    def test_empty_array(self):
+        result = _parse_gemma4_array("")
+        assert result == []
+
+    def test_bare_values(self):
+        result = _parse_gemma4_array("42,true,3.14")
+        assert result == [42, True, 3.14]
+
+    @pytest.mark.timeout(5)
+    def test_string_element_with_closing_bracket(self):
+        result = _parse_gemma4_array('[<|"|>a]b<|"|>,<|"|>c<|"|>],<|"|>tail<|"|>')
+        assert result == [["a]b", "c"], "tail"]
+
+    @pytest.mark.timeout(5)
+    def test_stray_closing_bracket(self):
+        result = _parse_gemma4_array("42,]trailing")
+        assert result == [42]
+
+    def test_trailing_dot_float_partial_withheld(self):
+        """Array elements with trailing dot withheld in partial mode."""
+        result = _parse_gemma4_array("108.,22.8", partial=True)
+        assert result == []
+
+        # Stable elements before trailing-dot element are kept
+        result = _parse_gemma4_array("42,108.,3", partial=True)
+        assert result == [42]
+
+
+# ---------------------------------------------------------------------------
+# Non-streaming extraction tests
+# ---------------------------------------------------------------------------
+
+
+class TestExtractToolCalls:
+    def test_no_tool_calls(self, parser, mock_request):
+        model_output = "Hello, how can I help you today?"
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is False
+        assert result.tool_calls == []
+        assert result.content == model_output
+
+    def test_single_tool_call(self, parser, mock_request):
+        model_output = (
+            '<|tool_call>call:get_weather{location:<|"|>London<|"|>}<tool_call|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "London"}
+
+    def test_multiple_arguments(self, parser, mock_request):
+        model_output = (
+            "<|tool_call>call:get_weather{"
+            'location:<|"|>San Francisco<|"|>,'
+            'unit:<|"|>celsius<|"|>}'
+            "<tool_call|>"
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"location": "San Francisco", "unit": "celsius"}
+
+    def test_text_before_tool_call(self, parser, mock_request):
+        model_output = (
+            "Let me check the weather for you. "
+            '<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}'
+            "<tool_call|>"
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert result.content == "Let me check the weather for you."
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+
+    def test_multiple_tool_calls(self, parser, mock_request):
+        model_output = (
+            '<|tool_call>call:get_weather{location:<|"|>London<|"|>}'
+            "<tool_call|>"
+            '<|tool_call>call:get_time{location:<|"|>London<|"|>}'
+            "<tool_call|>"
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 2
+        assert result.tool_calls[0].function.name == "get_weather"
+        assert result.tool_calls[1].function.name == "get_time"
+
+    def test_nested_arguments(self, parser, mock_request):
+        model_output = (
+            "<|tool_call>call:complex_function{"
+            'nested:{inner:<|"|>value<|"|>},'
+            'list:[<|"|>a<|"|>,<|"|>b<|"|>]}'
+            "<tool_call|>"
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "complex_function"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"nested": {"inner": "value"}, "list": ["a", "b"]}
+
+    def test_tool_call_with_number_and_boolean(self, parser, mock_request):
+        model_output = (
+            "<|tool_call>call:set_status{"
+            "is_active:true,"
+            "count:42,"
+            "score:3.14}"
+            "<tool_call|>"
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "set_status"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {"is_active": True, "count": 42, "score": 3.14}
+
+    def test_incomplete_tool_call(self, parser, mock_request):
+        model_output = '<|tool_call>call:get_weather{location:<|"|>London'
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        # Incomplete — no <tool_call|> end marker, regex won't match
+        assert result.tools_called is False
+        assert result.content == model_output
+
+    def test_hyphenated_function_name(self, parser, mock_request):
+        """Ensure function names with hyphens are parsed correctly."""
+        model_output = (
+            '<|tool_call>call:get-weather{location:<|"|>London<|"|>}<tool_call|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert result.tool_calls[0].function.name == "get-weather"
+
+    def test_dotted_function_name(self, parser, mock_request):
+        """Ensure function names with dots are parsed correctly."""
+        model_output = (
+            '<|tool_call>call:weather.get{location:<|"|>London<|"|>}<tool_call|>'
+        )
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert result.tool_calls[0].function.name == "weather.get"
+
+    def test_no_arguments(self, parser, mock_request):
+        """Tool calls with empty arguments."""
+        model_output = "<|tool_call>call:get_status{}<tool_call|>"
+        result = parser.extract_tool_calls(model_output, mock_request)
+
+        assert result.tools_called is True
+        assert result.tool_calls[0].function.name == "get_status"
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args == {}
+
+
+# ---------------------------------------------------------------------------
+# Streaming extraction tests
+# ---------------------------------------------------------------------------
+
+
+class TestStreamingExtraction:
+    """Tests for the streaming tool call extraction.
+
+    These simulate the token-by-token streaming that vLLM performs,
+    feeding incremental text to extract_tool_calls_streaming() and
+    verifying that the accumulated argument deltas form valid JSON.
+    """
+
+    def _simulate_streaming(
+        self, parser: Gemma4ToolParser, mock_request: Any, chunks: list[str]
+    ) -> list[tuple[Any, str]]:
+        """Feed chunks through the streaming parser and collect results.
+
+        Returns a list of (delta_message, accumulated_text) tuples.
+        """
+        results: list[tuple[Any, str]] = []
+        previous_text: str = ""
+        previous_token_ids: list[int] = []
+
+        for chunk in chunks:
+            current_text = previous_text + chunk
+            # Use token ID 48 for tool_call start, 49 for end, 0 otherwise
+            delta_token_ids: list[int] = []
+            if TOOL_CALL_START in chunk:
+                delta_token_ids.append(48)
+            elif TOOL_CALL_END in chunk:
+                delta_token_ids.append(49)
+            else:
+                delta_token_ids.append(0)
+
+            current_token_ids = previous_token_ids + delta_token_ids
+
+            delta = parser.extract_tool_calls_streaming(
+                previous_text=previous_text,
+                current_text=current_text,
+                delta_text=chunk,
+                previous_token_ids=tuple(previous_token_ids),
+                current_token_ids=tuple(current_token_ids),
+                delta_token_ids=tuple(delta_token_ids),
+                request=mock_request,
+            )
+            results.append((delta, current_text))
+            previous_text = current_text
+            previous_token_ids = list(current_token_ids)
+
+        return results
+
+    def _collect_arguments(self, results):
+        """Collect all argument deltas from streaming results into one string."""
+        args_text = ""
+        for delta, _ in results:
+            if delta and delta.tool_calls:
+                for tc in delta.tool_calls:
+                    func = tc.function if isinstance(tc.function, dict) else tc.function
+                    if isinstance(func, dict):
+                        arg = func.get("arguments", "")
+                    else:
+                        arg = getattr(func, "arguments", "") or ""
+                    if arg:
+                        args_text += arg
+        return args_text
+
+    def _collect_function_name(self, results):
+        """Extract the function name from streaming results."""
+        for delta, _ in results:
+            if delta and delta.tool_calls:
+                for tc in delta.tool_calls:
+                    func = tc.function if isinstance(tc.function, dict) else tc.function
+                    if isinstance(func, dict):
+                        name = func.get("name")
+                    else:
+                        name = getattr(func, "name", None)
+                    if name:
+                        return name
+        return None
+
+    def test_basic_streaming_single_tool(self, parser, mock_request):
+        """Simulate the exact streaming scenario from the bug report.
+
+        Model generates:
+        <|tool_call>call:get_weather{location:<|"|>Paris, France<|"|>}<tool_call|>
+
+        Expected: arguments should be valid JSON {"location": "Paris, France"}
+        """
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>Paris',
+            ", France",
+            '<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        # Verify function name
+        name = self._collect_function_name(results)
+        assert name == "get_weather", f"Expected 'get_weather', got '{name}'"
+
+        # Verify arguments form valid JSON
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+        parsed_args = json.loads(args_text)
+        assert parsed_args == {"location": "Paris, France"}
+
+    def test_streaming_multi_arg(self, parser, mock_request):
+        """Streaming with multiple arguments."""
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>Tokyo<|"|>,',
+            'unit:<|"|>celsius<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        name = self._collect_function_name(results)
+        assert name == "get_weather"
+
+        args_text = self._collect_arguments(results)
+        assert args_text
+        parsed_args = json.loads(args_text)
+        assert parsed_args == {"location": "Tokyo", "unit": "celsius"}
+
+    def test_streaming_no_extra_brace(self, parser, mock_request):
+        """Verify the closing } is NOT leaked into arguments (Bug #2)."""
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>London<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text
+
+        # The args text must be valid JSON (no extra })
+        parsed = json.loads(args_text)
+        assert parsed == {"location": "London"}
+
+        # Specifically assert no double-brace
+        assert args_text.count("}") <= 1, (
+            f"Arguments contain extra closing brace: {args_text!r}"
+        )
+
+    def test_streaming_no_unquoted_keys(self, parser, mock_request):
+        """Verify keys are properly quoted in JSON (Bug #1)."""
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>Paris<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+
+        # Must start with { and contain quoted key
+        assert args_text.lstrip().startswith("{"), (
+            f"Arguments don't start with '{{': {args_text!r}"
+        )
+        assert '"location"' in args_text, (
+            f"Key 'location' not properly quoted: {args_text!r}"
+        )
+
+    def test_streaming_name_no_call_prefix(self, parser, mock_request):
+        """Verify function name has no 'call:' prefix."""
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>Paris<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        name = self._collect_function_name(results)
+        assert name == "get_weather"
+        assert not name.startswith("call:"), f"Name has 'call:' prefix: {name!r}"
+
+    def test_streaming_text_before_tool_call(self, parser, mock_request):
+        """Text before tool call should be emitted as content."""
+        chunks = [
+            "Let me check ",
+            "the weather. ",
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>London<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        # First chunks should be content
+        content_parts = []
+        for delta, _ in results:
+            if delta and delta.content:
+                content_parts.append(delta.content)
+
+        assert "".join(content_parts).strip().startswith("Let me check")
+
+    def test_streaming_numeric_args(self, parser, mock_request):
+        """Streaming with numeric and boolean argument values."""
+        chunks = [
+            "<|tool_call>",
+            "call:set_config{",
+            "count:42,",
+            "active:true}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        if args_text:
+            parsed_args = json.loads(args_text)
+            assert parsed_args["count"] == 42
+            assert parsed_args["active"] is True
+
+    def test_streaming_boolean_split_across_chunks(self, parser, mock_request):
+        """Boolean value split across token boundaries must not corrupt JSON."""
+        chunks = [
+            "<|tool_call>",
+            "call:search{input:{all:" + "true"[:3],
+            "e}}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+        parsed_args = json.loads(args_text)
+        assert parsed_args["input"]["all"] is True
+
+    def test_streaming_false_split_across_chunks(self, parser, mock_request):
+        """Boolean false split across chunks."""
+        chunks = [
+            "<|tool_call>",
+            "call:set{flag:" + "false"[:4],
+            "e}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+        parsed_args = json.loads(args_text)
+        assert parsed_args["flag"] is False
+
+    def test_streaming_number_split_across_chunks(self, parser, mock_request):
+        """Number split across chunks must not change type."""
+        chunks = [
+            "<|tool_call>",
+            "call:set{count:4",
+            "2}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+        parsed_args = json.loads(args_text)
+        assert parsed_args["count"] == 42
+
+    def test_streaming_empty_args(self, parser, mock_request):
+        """Tool call with no arguments."""
+        chunks = [
+            "<|tool_call>",
+            "call:get_status{}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        name = self._collect_function_name(results)
+        assert name == "get_status"
+
+    def test_streaming_split_delimiter_no_invalid_json(self, parser, mock_request):
+        """Partial <|"|> delimiter chars must not leak into streamed JSON.
+
+        Reproduces the bug from https://github.com/vllm-project/vllm/issues/38946
+        where a token boundary splits the string delimiter, leaving fragments
+        like '<|' at the end of a parsed value which then corrupt the JSON.
+        """
+        chunks = [
+            "<|tool_call>",
+            "call:todowrite{",
+            'content:<|"|>Buy milk<|',
+            '"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+
+        # Must be valid JSON — the original bug caused a JSON parse error
+        parsed_args = json.loads(args_text)
+        assert parsed_args["content"] == "Buy milk"
+
+        # Ensure no raw delimiter fragments leaked into the JSON
+        assert "<|" not in args_text, (
+            f"Partial delimiter leaked into JSON: {args_text!r}"
+        )
+
+    def test_streaming_does_not_duplicate_plain_text_after_tool_call(
+        self, parser, mock_request, monkeypatch
+    ):
+        """Buffered plain text after a tool call must not corrupt current_text."""
+        captured_current_texts: list[str] = []
+        original_extract_streaming = parser._extract_streaming
+
+        def wrapped_extract_streaming(previous_text, current_text, delta_text):
+            captured_current_texts.append(current_text)
+            return original_extract_streaming(previous_text, current_text, delta_text)
+
+        monkeypatch.setattr(parser, "_extract_streaming", wrapped_extract_streaming)
+
+        chunks = [
+            "<|tool_call>",
+            "call:get_weather{",
+            'location:<|"|>Paris<|"|>}',
+            "<tool_call|><",
+            "div>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        content_parts = [
+            delta.content for delta, _ in results if delta is not None and delta.content
+        ]
+        assert "".join(content_parts) == "<div>"
+        assert captured_current_texts[-1].endswith("<tool_call|><div>")
+        assert not captured_current_texts[-1].endswith("<tool_call|><<div>")
+
+    def test_streaming_html_argument_does_not_duplicate_tag_prefixes(
+        self, parser, mock_request
+    ):
+        """HTML content inside tool arguments must not be duplicated."""
+        chunks = [
+            "<|tool_call>",
+            "call:write_file{",
+            'path:<|"|>index.html<|"|>,',
+            'content:<|"|><!DOCTYPE html>\n<',
+            'html lang="zh-CN">\n<',
+            "head>\n    <",
+            'meta charset="UTF-8">\n    <',
+            'meta name="viewport" content="width=device-width">\n',
+            '<|"|>}',
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text
+
+        parsed_args = json.loads(args_text)
+        assert parsed_args["path"] == "index.html"
+        assert (
+            parsed_args["content"] == "<!DOCTYPE html>\n"
+            '<html lang="zh-CN">\n'
+            "<head>\n"
+            '    <meta charset="UTF-8">\n'
+            '    <meta name="viewport" content="width=device-width">\n'
+        )
+
+    def test_streaming_trailing_bare_bool_not_duplicated(self, parser, mock_request):
+        """Trailing bare boolean must not be streamed twice."""
+        chunks = [
+            "<|tool_call>",
+            "call:Edit{",
+            'file_path:<|"|>src/env.py<|"|>,',
+            'old_string:<|"|>old_val<|"|>,',
+            'new_string:<|"|>new_val<|"|>,',
+            "replace_all:",
+            "false}",
+            "<tool_call|>",
+        ]
+
+        results = self._simulate_streaming(parser, mock_request, chunks)
+        args_text = self._collect_arguments(results)
+        assert args_text, "No arguments were streamed"
+
+        parsed_args = json.loads(args_text)
+        assert parsed_args == {
+            "file_path": "src/env.py",
+            "old_string": "old_val",
+            "new_string": "new_val",
+            "replace_all": False,
+        }
+
+        assert args_text.count("replace_all") == 1
diff --git a/tests/tool_parsers/test_glm47_moe_tool_parser.py b/tests/tool_parsers/test_glm47_moe_tool_parser.py
index c7170e67500f..51696c954788 100644
--- a/tests/tool_parsers/test_glm47_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm47_moe_tool_parser.py
@@ -25,14 +25,8 @@ def glm47_tokenizer():
 
 
 @pytest.fixture
-def glm47_tool_parser(glm47_tokenizer):
-    return Glm47MoeModelToolParser(glm47_tokenizer)
-
-
-@pytest.fixture
-def mock_request() -> ChatCompletionRequest:
-    request = Mock(spec=ChatCompletionRequest)
-    request.tools = [
+def sample_tools():
+    return [
         ChatCompletionToolsParam(
             function=FunctionDefinition(name="get_current_date", parameters={}),
         ),
@@ -49,6 +43,17 @@ def mock_request() -> ChatCompletionRequest:
             ),
         ),
     ]
+
+
+@pytest.fixture
+def glm47_tool_parser(glm47_tokenizer, sample_tools):
+    return Glm47MoeModelToolParser(glm47_tokenizer, tools=sample_tools)
+
+
+@pytest.fixture
+def mock_request(sample_tools) -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = sample_tools
     request.tool_choice = "auto"
     return request
 
@@ -86,6 +91,12 @@ def test_args_with_newlines(self, glm47_tool_parser, mock_request):
         assert r.tools_called
         assert json.loads(r.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
+    def test_whitespace_preserved_in_arg_values(self, glm47_tool_parser, mock_request):
+        out = "<tool_call>get_weather<arg_key>city</arg_key><arg_value>  Beijing  </arg_value></tool_call>"
+        r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert json.loads(r.tool_calls[0].function.arguments) == {"city": "  Beijing  "}
+
     def test_content_before(self, glm47_tool_parser, mock_request):
         out = "Checking.<tool_call>get_current_date</tool_call>"
         r = glm47_tool_parser.extract_tool_calls(out, request=mock_request)
@@ -112,28 +123,24 @@ def test_whitespace_content_none(self, glm47_tool_parser, mock_request):
 
 
 def _reset(parser):
-    parser._buffer = ""
-    parser._in_tool_call = False
     parser.current_tool_name_sent = False
-    parser._current_tool_name = None
-    parser._pending_key = None
-    parser._streaming_string_value = False
     parser.prev_tool_call_arr = []
     parser.current_tool_id = -1
     parser.streamed_args_for_tool = []
     parser._tool_call_ids = []
-    parser._args_started = []
-    parser._args_closed = []
-    parser._seen_keys = []
+    parser._sent_content_idx = 0
 
 
 class TestGlm47Streaming:
     def test_no_args(self, glm47_tool_parser, mock_request):
         _reset(glm47_tool_parser)
-        for chunk in ["<tool_call>", "get_current_date", "</tool_call>"]:
+        chunks = ["<tool_call>", "get_current_date", "</tool_call>"]
+        current_text = ""
+        for chunk in chunks:
+            current_text += chunk
             glm47_tool_parser.extract_tool_calls_streaming(
                 previous_text="",
-                current_text="",
+                current_text=current_text,
                 delta_text=chunk,
                 previous_token_ids=[],
                 current_token_ids=[],
@@ -144,10 +151,7 @@ def test_no_args(self, glm47_tool_parser, mock_request):
 
     def test_with_args(self, glm47_tool_parser, mock_request):
         _reset(glm47_tool_parser)
-        # Split chunks so that the incremental string streaming path
-        # processes the value, its closing tag, and the tool-call closing
-        # tag in separate calls.
-        for chunk in [
+        chunks = [
             "<tool_call>",
             "get_weather\n",
             "<arg_key>city</arg_key>",
@@ -155,14 +159,18 @@ def test_with_args(self, glm47_tool_parser, mock_request):
             "Beijing",
             "</arg_value>",
             "</tool_call>",
-        ]:
+        ]
+        current_text = ""
+        for chunk in chunks:
+            current_text += chunk
             glm47_tool_parser.extract_tool_calls_streaming(
                 previous_text="",
-                current_text="",
+                current_text=current_text,
                 delta_text=chunk,
                 previous_token_ids=[],
                 current_token_ids=[],
                 delta_token_ids=[],
                 request=mock_request,
             )
-        assert glm47_tool_parser.prev_tool_call_arr[0]["arguments"]["city"] == "Beijing"
+        args = json.loads(glm47_tool_parser.prev_tool_call_arr[0]["arguments"])
+        assert args["city"] == "Beijing"
diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py
index 213cc75db7ea..b0300297ddc4 100644
--- a/tests/tool_parsers/test_glm4_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -5,6 +5,7 @@
 from unittest.mock import Mock
 
 import pytest
+from openai.types.responses import FunctionTool
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -27,14 +28,8 @@ def glm4_moe_tokenizer():
 
 
 @pytest.fixture
-def glm4_moe_tool_parser(glm4_moe_tokenizer):
-    return Glm4MoeModelToolParser(glm4_moe_tokenizer)
-
-
-@pytest.fixture
-def mock_request() -> ChatCompletionRequest:
-    request = Mock(spec=ChatCompletionRequest)
-    request.tools = [  # GLM45 parser needs this attribute to enable tool parsing.
+def sample_tools():
+    return [
         ChatCompletionToolsParam(
             function=FunctionDefinition(
                 name="get_weather",
@@ -42,6 +37,17 @@ def mock_request() -> ChatCompletionRequest:
             ),
         ),
     ]
+
+
+@pytest.fixture
+def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools):
+    return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools)
+
+
+@pytest.fixture
+def mock_request(sample_tools) -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = sample_tools
     return request
 
 
@@ -352,81 +358,69 @@ def test_extract_tool_calls_mixed_content(glm4_moe_tool_parser, mock_request):
 
 def test_streaming_basic_functionality(glm4_moe_tool_parser, mock_request):
     """Test basic streaming functionality."""
-    # Reset streaming state
-    glm4_moe_tool_parser.current_tool_name_sent = False
-    glm4_moe_tool_parser.prev_tool_call_arr = []
-    glm4_moe_tool_parser.current_tool_id = -1
-    glm4_moe_tool_parser.streamed_args_for_tool = []
+    _reset_streaming_state(glm4_moe_tool_parser)
 
-    # Test with a simple tool call
     current_text = """<tool_call>get_weather
 <arg_key>city</arg_key>
 <arg_value>Beijing</arg_value>
 </tool_call>"""
 
-    # Mock token IDs for testing
-    tool_call_start_id = glm4_moe_tool_parser.tool_call_start_token_id or 12345
-    tool_call_end_id = glm4_moe_tool_parser.tool_call_end_token_id or 12346
-
     result = glm4_moe_tool_parser.extract_tool_calls_streaming(
         previous_text="",
         current_text=current_text,
-        delta_text="</tool_call>",
+        delta_text=current_text,
         previous_token_ids=[],
-        current_token_ids=[tool_call_start_id, tool_call_end_id],
-        delta_token_ids=[tool_call_end_id],
+        current_token_ids=[],
+        delta_token_ids=[],
         request=mock_request,
     )
 
-    # The result behavior depends on the streaming state
-    # This test mainly ensures no exceptions are thrown
-    assert result is None or hasattr(result, "tool_calls") or hasattr(result, "content")
+    # Should return tool call with name and arguments in one shot
+    assert result is not None
+    assert result.tool_calls is not None
+    assert len(result.tool_calls) >= 1
 
 
 def test_streaming_no_tool_calls(glm4_moe_tool_parser, mock_request):
     """Test streaming when there are no tool calls."""
+    _reset_streaming_state(glm4_moe_tool_parser)
+
     current_text = "This is just regular text without any tool calls."
 
     result = glm4_moe_tool_parser.extract_tool_calls_streaming(
-        previous_text="This is just regular text",
+        previous_text="",
         current_text=current_text,
-        delta_text=" without any tool calls.",
+        delta_text=current_text,
         previous_token_ids=[],
         current_token_ids=[],
         delta_token_ids=[],
         request=mock_request,
     )
 
-    # Should return the delta text as content
+    # Should return content
     assert result is not None
-    assert hasattr(result, "content")
-    assert result.content == " without any tool calls."
+    assert result.content == current_text
 
 
 def test_streaming_with_content_before_tool_calls(glm4_moe_tool_parser, mock_request):
     """Test streaming when there's content before tool calls."""
-    # Reset streaming state
-    glm4_moe_tool_parser.current_tool_name_sent = False
-    glm4_moe_tool_parser.prev_tool_call_arr = []
-    glm4_moe_tool_parser.current_tool_id = -1
-    glm4_moe_tool_parser.streamed_args_for_tool = []
+    _reset_streaming_state(glm4_moe_tool_parser)
 
-    current_text = "I will help you get the weather<tool_call>"
+    current_text = "I will help you get the weather.<tool_call>"
 
     result = glm4_moe_tool_parser.extract_tool_calls_streaming(
-        previous_text="I will help you",
+        previous_text="",
         current_text=current_text,
-        delta_text="get the weather.<tool_call>",
+        delta_text=current_text,
         previous_token_ids=[],
         current_token_ids=[],
         delta_token_ids=[],
         request=mock_request,
     )
 
-    # Should return content when no tool call tokens are detected
+    # Should return content before the <tool_call> tag
     assert result is not None
-    assert hasattr(result, "content")
-    assert result.content == "get the weather."
+    assert result.content == "I will help you get the weather."
 
 
 def test_extract_tool_calls_special_characters(glm4_moe_tool_parser, mock_request):
@@ -474,26 +468,19 @@ def test_extract_tool_calls_incomplete_tool_call(glm4_moe_tool_parser, mock_requ
 
 def _reset_streaming_state(parser):
     """Helper to reset parser streaming state."""
-    parser._buffer = ""
-    parser._in_tool_call = False
     parser.current_tool_name_sent = False
-    parser._current_tool_name = None
-    parser._pending_key = None
-    parser._streaming_string_value = False
     parser.prev_tool_call_arr = []
     parser.current_tool_id = -1
     parser.streamed_args_for_tool = []
     parser._tool_call_ids = []
-    parser._args_started = []
-    parser._args_closed = []
-    parser._seen_keys = []
+    parser._sent_content_idx = 0
 
 
 def test_streaming_incremental_string_value(glm4_moe_tool_parser, mock_request):
     """Test incremental streaming of string argument values."""
     _reset_streaming_state(glm4_moe_tool_parser)
 
-    # Simulate streaming a tool call character by character
+    # Simulate streaming a tool call chunk by chunk
     chunks = [
         "<tool_call>",
         "get_weather\n",
@@ -506,30 +493,31 @@ def test_streaming_incremental_string_value(glm4_moe_tool_parser, mock_request):
     ]
 
     collected_fragments = []
+    current_text = ""
     for chunk in chunks:
+        current_text += chunk
         result = glm4_moe_tool_parser.extract_tool_calls_streaming(
             previous_text="",
-            current_text="",
+            current_text=current_text,
             delta_text=chunk,
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[],
             request=mock_request,
         )
-        if result is not None and hasattr(result, "tool_calls") and result.tool_calls:
+        if result is not None and result.tool_calls:
             for tc in result.tool_calls:
-                if hasattr(tc, "function") and tc.function:
-                    func = tc.function
-                    if isinstance(func, dict):
-                        if func.get("arguments"):
-                            collected_fragments.append(func["arguments"])
-                        if func.get("name"):
-                            collected_fragments.append(f"name:{func['name']}")
-                    else:
-                        if func.arguments:
-                            collected_fragments.append(func.arguments)
-                        if func.name:
-                            collected_fragments.append(f"name:{func.name}")
+                func = tc.function
+                if isinstance(func, dict):
+                    if func.get("arguments"):
+                        collected_fragments.append(func["arguments"])
+                    if func.get("name"):
+                        collected_fragments.append(f"name:{func['name']}")
+                else:
+                    if func.arguments:
+                        collected_fragments.append(func.arguments)
+                    if func.name:
+                        collected_fragments.append(f"name:{func.name}")
 
     # Verify we got incremental streaming of the argument value
     assert len(collected_fragments) > 0
@@ -542,11 +530,11 @@ def test_streaming_empty_tool_call(glm4_moe_tool_parser, mock_request):
     """Test that empty tool calls don't cause infinite loops."""
     _reset_streaming_state(glm4_moe_tool_parser)
 
-    # Empty tool call should be handled gracefully
+    current_text = "<tool_call></tool_call>"
     result = glm4_moe_tool_parser.extract_tool_calls_streaming(
         previous_text="",
-        current_text="",
-        delta_text="<tool_call></tool_call>",
+        current_text=current_text,
+        delta_text=current_text,
         previous_token_ids=[],
         current_token_ids=[],
         delta_token_ids=[],
@@ -556,60 +544,52 @@ def test_streaming_empty_tool_call(glm4_moe_tool_parser, mock_request):
     # Should not hang and should return something (None or content)
     # The key is that this completes without hanging
     assert result is None or hasattr(result, "content") or hasattr(result, "tool_calls")
-    # State should be properly reset
-    assert glm4_moe_tool_parser.current_tool_id == -1
 
 
 def test_streaming_prev_tool_call_arr_updates(glm4_moe_tool_parser, mock_request):
-    """Test that prev_tool_call_arr contains parsed dict after tool call."""
+    """Test that prev_tool_call_arr is populated incrementally."""
     _reset_streaming_state(glm4_moe_tool_parser)
 
-    # Stream a complete tool call
-    name_only = {"name": "get_weather", "arguments": {}}
-    name_and_args = {"name": "get_weather", "arguments": {"city": "Beijing"}}
     chunks = [
-        # Delta, expected streamed_args_for_tool, expected prev_tool_call_arr
-        ("<tool_call>get_weather\n", "", name_only),
-        ("<arg_key>city</arg_key>", "", name_only),
-        ("<arg_value>Beijing</arg_value>", '{"city": "Beijing"', name_only),
-        # Note: arguments are only updated when the tool call is complete.
-        ("</tool_call>", '{"city": "Beijing"}', name_and_args),
+        "<tool_call>get_weather\n",
+        "<arg_key>city</arg_key>",
+        "<arg_value>Beijing</arg_value>",
+        "</tool_call>",
     ]
 
-    for chunk, exp_streamed, exp_prev_tc in chunks:
+    current_text = ""
+    for chunk in chunks:
+        current_text += chunk
         glm4_moe_tool_parser.extract_tool_calls_streaming(
             previous_text="",
-            current_text="",
+            current_text=current_text,
             delta_text=chunk,
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[],
             request=mock_request,
         )
-        assert glm4_moe_tool_parser.streamed_args_for_tool[0] == exp_streamed
-        assert glm4_moe_tool_parser.prev_tool_call_arr[0] == exp_prev_tc
 
-    # After the tool call completes, prev_tool_call_arr should have parsed dict
+    # After the tool call completes, prev_tool_call_arr should be populated
     assert len(glm4_moe_tool_parser.prev_tool_call_arr) == 1
     tool_entry = glm4_moe_tool_parser.prev_tool_call_arr[0]
     assert tool_entry.get("name") == "get_weather"
-    # arguments should be a dict, not a string
-    args = tool_entry.get("arguments")
-    assert isinstance(args, dict), f"Expected dict, got {type(args)}"
-    assert args.get("city") == "Beijing"
 
-    # Test equivalence of prev_tool_call_arr and streamed_args_for_tool
-    # Simulates logic in chat_completion/serving.py:chat_completion_stream_generator
-    tool_call_json = json.dumps(tool_entry.get("arguments", {}))
-    streamed_content = glm4_moe_tool_parser.streamed_args_for_tool[0]
-    assert tool_call_json.startswith(streamed_content)
+    # arguments is a JSON string in the re-parse approach
+    args_str = tool_entry.get("arguments")
+    assert isinstance(args_str, str), f"Expected str, got {type(args_str)}"
+    parsed = json.loads(args_str)
+    assert parsed["city"] == "Beijing"
+
+    # streamed_args_for_tool should match prev_tool_call_arr arguments
+    streamed = glm4_moe_tool_parser.streamed_args_for_tool[0]
+    assert streamed == args_str
 
 
 def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser, mock_request):
     """Test streaming multiple sequential tool calls."""
     _reset_streaming_state(glm4_moe_tool_parser)
 
-    # Stream two tool calls
     chunks = [
         "<tool_call>get_weather\n",
         "<arg_key>city</arg_key>",
@@ -621,10 +601,12 @@ def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser, mock_req
         "</tool_call>",
     ]
 
+    current_text = ""
     for chunk in chunks:
+        current_text += chunk
         glm4_moe_tool_parser.extract_tool_calls_streaming(
             previous_text="",
-            current_text="",
+            current_text=current_text,
             delta_text=chunk,
             previous_token_ids=[],
             current_token_ids=[],
@@ -634,15 +616,16 @@ def test_streaming_multiple_tool_calls_sequential(glm4_moe_tool_parser, mock_req
 
     # Should have two tool calls in prev_tool_call_arr
     assert len(glm4_moe_tool_parser.prev_tool_call_arr) == 2
-    assert glm4_moe_tool_parser.prev_tool_call_arr[0]["arguments"]["city"] == "Beijing"
-    assert glm4_moe_tool_parser.prev_tool_call_arr[1]["arguments"]["city"] == "Shanghai"
+    args0 = json.loads(glm4_moe_tool_parser.prev_tool_call_arr[0]["arguments"])
+    args1 = json.loads(glm4_moe_tool_parser.prev_tool_call_arr[1]["arguments"])
+    assert args0["city"] == "Beijing"
+    assert args1["city"] == "Shanghai"
 
 
 def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
     """Test that special characters in string values are properly escaped."""
     _reset_streaming_state(glm4_moe_tool_parser)
 
-    # String with characters that need JSON escaping
     chunks = [
         "<tool_call>send_message\n",
         "<arg_key>message</arg_key>",
@@ -650,10 +633,12 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
         "</tool_call>",
     ]
 
+    current_text = ""
     for chunk in chunks:
+        current_text += chunk
         glm4_moe_tool_parser.extract_tool_calls_streaming(
             previous_text="",
-            current_text="",
+            current_text=current_text,
             delta_text=chunk,
             previous_token_ids=[],
             current_token_ids=[],
@@ -664,21 +649,18 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
     # The streamed_args_for_tool should contain valid JSON
     assert len(glm4_moe_tool_parser.streamed_args_for_tool) == 1
     args_json = glm4_moe_tool_parser.streamed_args_for_tool[0]
-    # Should be parseable as JSON
     parsed = json.loads(args_json)
     assert "message" in parsed
-    # The value should preserve the special characters
     assert '"' in parsed["message"] or "world" in parsed["message"]
 
 
-def test_streaming_long_content_incremental(glm4_moe_tool_parser):
+def test_streaming_long_content_incremental(glm4_moe_tokenizer):
     """Test incremental streaming of long content (Issue #32829).
 
     This is the core fix: for long string values like code (4000+ chars),
     the parser should stream incrementally rather than buffering until
     complete. This test verifies we get many fragments, not just 1-3.
     """
-    _reset_streaming_state(glm4_moe_tool_parser)
 
     # Bubble sort example from Issue #32829 - realistic long content
     bubble_sort_code = '''#!/usr/bin/env python3
@@ -705,27 +687,28 @@ def bubble_sort(arr):
     sorted_arr = bubble_sort(test_arr.copy())
     print(f"Sorted: {sorted_arr}")'''
 
-    # Create a request with tool schema to enable string type detection
+    # Create tools with schema to enable string type detection
     # This is required for incremental streaming of string values
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="write_to_file",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "file_path": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            ),
+        ),
+    ]
+    glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
     request = ChatCompletionRequest(
         model=MODEL,
         messages=[],
-        tools=[
-            {
-                "type": "function",
-                "function": {
-                    "name": "write_to_file",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {"type": "string"},
-                            "content": {"type": "string"},
-                        },
-                    },
-                },
-            }
-        ],
-    )  # type: ignore
+        tools=tools,
+    )
 
     # Simulate token-based streaming (special tags as single tokens)
     chunks = [
@@ -744,27 +727,27 @@ def bubble_sort(arr):
 
     # Count argument fragments
     fragment_count = 0
+    current_text = ""
     for chunk in chunks:
+        current_text += chunk
         result = glm4_moe_tool_parser.extract_tool_calls_streaming(
             previous_text="",
-            current_text="",
+            current_text=current_text,
             delta_text=chunk,
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[],
             request=request,
         )
-        if result is not None and hasattr(result, "tool_calls") and result.tool_calls:
+        if result is not None and result.tool_calls:
             for tc in result.tool_calls:
-                if hasattr(tc, "function") and tc.function:
-                    func = tc.function
-                    args = (
-                        func.get("arguments")
-                        if isinstance(func, dict)
-                        else getattr(func, "arguments", None)
-                    )
-                    if args:
-                        fragment_count += 1
+                func = tc.function
+                if isinstance(func, dict):
+                    args = func.get("arguments")
+                else:
+                    args = getattr(func, "arguments", None)
+                if args:
+                    fragment_count += 1
 
     # For true incremental streaming, we expect many fragments (10+)
     # Old buffered implementation would give only 1-3 fragments
@@ -817,3 +800,686 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_r
     # Boolean should be deserialized as bool
     assert args["enabled"] is True
     assert isinstance(args["enabled"], bool)
+
+
+def test_whitespace_preserved_in_arg_values(glm4_moe_tokenizer):
+    """Test that string arguments preserve leading and trailing whitespace."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="apply_diff",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "s": {"type": "string"},
+                    },
+                    "required": ["s"],
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    model_output = """<tool_call>apply_diff
+<arg_key>s</arg_key>
+<arg_value>    indented code    </arg_value>
+</tool_call>"""
+
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
+    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
+
+    assert args["s"] == "    indented code    "
+
+
+def test_zero_argument_tool_call(glm4_moe_tool_parser, mock_request):
+    """Regression: zero-argument tool call crash (PR #32321)."""
+    model_output = """<tool_call>get_time
+</tool_call>"""
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called
+    assert len(extracted.tool_calls) == 1
+    assert extracted.tool_calls[0].function.name == "get_time"
+    args = json.loads(extracted.tool_calls[0].function.arguments)
+    assert args == {}
+
+
+def test_malformed_tool_call_no_regex_match(glm4_moe_tool_parser, mock_request):
+    """Regression: malformed tool_call with no regex match (PR #32321)."""
+    model_output = "<tool_call>   </tool_call>"
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called is False
+    assert extracted.tool_calls == []
+
+
+def test_delimiter_preserved_transformers_5x(glm4_moe_tool_parser):
+    """Regression: adjust_request sets skip_special_tokens=False (PR #31622)."""
+    # Tools enabled
+    request_with_tools = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    )  # type: ignore
+    adjusted = glm4_moe_tool_parser.adjust_request(request_with_tools)
+    assert adjusted.skip_special_tokens is False
+
+    # tool_choice="none"
+    request_no_choice = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+        tool_choice="none",
+    )  # type: ignore
+    adjusted_none = glm4_moe_tool_parser.adjust_request(request_no_choice)
+    assert adjusted_none.skip_special_tokens is True
+
+    # No tools at all
+    request_no_tools = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+    )  # type: ignore
+    adjusted_empty = glm4_moe_tool_parser.adjust_request(request_no_tools)
+    assert adjusted_empty.skip_special_tokens is True
+
+
+def test_unicode_characters_preserved(glm4_moe_tool_parser, mock_request):
+    """Regression: Unicode chars must not be escaped to \\uXXXX (PR #30920)."""
+    model_output = """<tool_call>send_message
+<arg_key>greeting</arg_key>
+<arg_value>你好世界</arg_value>
+<arg_key>emoji</arg_key>
+<arg_value>🎉</arg_value>
+</tool_call>"""
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called
+    assert len(extracted.tool_calls) == 1
+
+    raw_args = extracted.tool_calls[0].function.arguments
+    assert "你好世界" in raw_args
+    assert "🎉" in raw_args
+    assert "\\u4f60" not in raw_args
+    parsed_args = json.loads(raw_args)
+    assert parsed_args["greeting"] == "你好世界"
+    assert parsed_args["emoji"] == "🎉"
+
+
+def test_streaming_multi_token_chunks(glm4_moe_tool_parser, mock_request):
+    """Test that multi-token chunks (stream_interval > 1) are handled correctly.
+
+    With stream_interval > 1 or MTP, multiple XML tags arrive in one delta.
+    The old buffer-based parser could only return one delta per call, losing
+    data on the final output. The re-parse approach handles this correctly.
+    """
+    _reset_streaming_state(glm4_moe_tool_parser)
+
+    # Simulate stream_interval=3: chunks contain multiple XML tags
+    chunks = [
+        "<tool_call>get_weather\n<arg_key>city</arg_key><arg_value>Bei",
+        "jing</arg_value>",
+        "</tool_call>",
+    ]
+
+    current_text = ""
+    for chunk in chunks:
+        current_text += chunk
+        glm4_moe_tool_parser.extract_tool_calls_streaming(
+            previous_text="",
+            current_text=current_text,
+            delta_text=chunk,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            request=mock_request,
+        )
+
+    # All data should be captured despite multi-token chunks
+    assert len(glm4_moe_tool_parser.prev_tool_call_arr) == 1
+    args = json.loads(glm4_moe_tool_parser.streamed_args_for_tool[0])
+    assert args["city"] == "Beijing"
+
+
+def test_streaming_entire_tool_call_at_once(glm4_moe_tool_parser, mock_request):
+    """Test that a complete tool call arriving in one delta works.
+
+    This simulates the extreme MTP case where all tokens arrive at once.
+    """
+    _reset_streaming_state(glm4_moe_tool_parser)
+
+    full_text = (
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+    )
+
+    result = glm4_moe_tool_parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=full_text,
+        delta_text=full_text,
+        previous_token_ids=[],
+        current_token_ids=[],
+        delta_token_ids=[],
+        request=mock_request,
+    )
+
+    # Should emit tool call with complete arguments in one shot
+    assert result is not None
+    assert result.tool_calls is not None
+
+    # Verify final state
+    assert len(glm4_moe_tool_parser.prev_tool_call_arr) == 1
+    args = json.loads(glm4_moe_tool_parser.streamed_args_for_tool[0])
+    assert args["city"] == "Beijing"
+
+
+def test_streaming_content_between_tool_calls_multi_token(
+    glm4_moe_tool_parser, mock_request
+):
+    """Test content between tool calls with multi-token chunks."""
+    _reset_streaming_state(glm4_moe_tool_parser)
+
+    # Deliver everything at once — worst case for the old buffer parser
+    full_text = (
+        "I will check.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+        "\nAlso Shanghai.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Shanghai</arg_value>"
+        "</tool_call>"
+    )
+
+    # First call with partial text (content only)
+    partial = "I will check.\n"
+    result1 = glm4_moe_tool_parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=partial,
+        delta_text=partial,
+        previous_token_ids=[],
+        current_token_ids=[],
+        delta_token_ids=[],
+        request=mock_request,
+    )
+    assert result1 is not None
+    assert result1.content == "I will check.\n"
+
+    # Second call with everything
+    glm4_moe_tool_parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=full_text,
+        delta_text=full_text[len(partial) :],
+        previous_token_ids=[],
+        current_token_ids=[],
+        delta_token_ids=[],
+        request=mock_request,
+    )
+
+    # Should have both tool calls
+    assert len(glm4_moe_tool_parser.prev_tool_call_arr) == 2
+    args0 = json.loads(glm4_moe_tool_parser.prev_tool_call_arr[0]["arguments"])
+    args1 = json.loads(glm4_moe_tool_parser.prev_tool_call_arr[1]["arguments"])
+    assert args0["city"] == "Beijing"
+    assert args1["city"] == "Shanghai"
+
+
+def test_streaming_multi_token_with_multiple_args(glm4_moe_tokenizer):
+    """Test multi-token streaming with multiple arguments of mixed types."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="calculate",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "operation": {"type": "string"},
+                        "a": {"type": "number"},
+                        "b": {"type": "number"},
+                    },
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=tools,
+    )
+
+    # All arguments arrive in two big chunks (simulates stream_interval=5)
+    chunks = [
+        "<tool_call>calculate\n<arg_key>operation</arg_key><arg_value>add</arg_value><arg_key>a</arg_key>",
+        "<arg_value>42</arg_value><arg_key>b</arg_key><arg_value>3.14</arg_value></tool_call>",
+    ]
+
+    current_text = ""
+    for chunk in chunks:
+        current_text += chunk
+        parser.extract_tool_calls_streaming(
+            previous_text="",
+            current_text=current_text,
+            delta_text=chunk,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            request=request,
+        )
+
+    args = json.loads(parser.streamed_args_for_tool[0])
+    assert args["operation"] == "add"
+    assert args["a"] == 42
+    assert args["b"] == 3.14
+
+
+def _simulate_streaming(tokenizer, parser, request, text, stream_interval=1):
+    """Simulate streaming with a given stream_interval.
+
+    Tokens are batched into chunks of ``stream_interval`` tokens,
+    mimicking how the output processor delivers them.
+    Returns a list of non-None DeltaMessages.
+    """
+    tokens = tokenizer.encode(text)
+    previous_text = ""
+    deltas = []
+    for i in range(0, len(tokens), stream_interval):
+        chunk_ids = tokens[i : i + stream_interval]
+        delta_text = tokenizer.decode(chunk_ids)
+        current_text = previous_text + delta_text
+        delta = parser.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=chunk_ids,
+            request=request,
+        )
+        previous_text = current_text
+        if delta is not None:
+            deltas.append(delta)
+    return deltas
+
+
+def _collect_from_deltas(deltas):
+    """Reconstruct tool call names/args and content from a delta stream."""
+    tools: dict[int, dict] = {}
+    content_parts: list[str] = []
+    for d in deltas:
+        if d.content:
+            content_parts.append(d.content)
+        if d.tool_calls:
+            for tc in d.tool_calls:
+                func = tc.function
+                if isinstance(func, dict):
+                    name = func.get("name")
+                    args = func.get("arguments")
+                else:
+                    name = getattr(func, "name", None)
+                    args = getattr(func, "arguments", None)
+                idx = tc.index
+                if idx not in tools:
+                    tools[idx] = {"name": None, "args_fragments": []}
+                if name:
+                    tools[idx]["name"] = name
+                if args:
+                    tools[idx]["args_fragments"].append(args)
+    return content_parts, tools
+
+
+@pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+def test_stream_interval_single_tool_call(glm4_moe_tokenizer, stream_interval):
+    """Tool call streaming produces correct name + args at any interval."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+    )
+
+    deltas = _simulate_streaming(
+        glm4_moe_tokenizer, parser, request, text, stream_interval
+    )
+    _, tools_found = _collect_from_deltas(deltas)
+
+    assert 0 in tools_found
+    assert tools_found[0]["name"] == "get_weather"
+    args_json = "".join(tools_found[0]["args_fragments"])
+    parsed = json.loads(args_json)
+    assert parsed == {"city": "Beijing"}
+
+
+@pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+def test_stream_interval_multiple_tool_calls(glm4_moe_tokenizer, stream_interval):
+    """Multiple sequential tool calls with correct indices at any interval."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Shanghai</arg_value>"
+        "</tool_call>"
+    )
+
+    deltas = _simulate_streaming(
+        glm4_moe_tokenizer, parser, request, text, stream_interval
+    )
+    _, tools_found = _collect_from_deltas(deltas)
+
+    assert 0 in tools_found and 1 in tools_found
+    args0 = json.loads("".join(tools_found[0]["args_fragments"]))
+    args1 = json.loads("".join(tools_found[1]["args_fragments"]))
+    assert args0 == {"city": "Beijing"}
+    assert args1 == {"city": "Shanghai"}
+
+
+@pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+def test_stream_interval_content_then_tool_call(glm4_moe_tokenizer, stream_interval):
+    """Content before a tool call is fully emitted before tool deltas."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "I will check the weather for you.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+    )
+
+    deltas = _simulate_streaming(
+        glm4_moe_tokenizer, parser, request, text, stream_interval
+    )
+    content_parts, tools_found = _collect_from_deltas(deltas)
+
+    # Content must be present and precede tool calls
+    full_content = "".join(content_parts)
+    assert "I will check the weather" in full_content
+
+    # Tool call must be correct
+    assert 0 in tools_found
+    assert tools_found[0]["name"] == "get_weather"
+    args = json.loads("".join(tools_found[0]["args_fragments"]))
+    assert args == {"city": "Beijing"}
+
+
+def test_stream_interval_extreme_single_chunk(glm4_moe_tokenizer):
+    """Extreme MTP: entire output arrives in one chunk (interval=9999)."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "Here is the weather.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+    )
+
+    deltas = _simulate_streaming(
+        glm4_moe_tokenizer, parser, request, text, stream_interval=9999
+    )
+    content_parts, tools_found = _collect_from_deltas(deltas)
+
+    assert "Here is the weather" in "".join(content_parts)
+    assert 0 in tools_found
+    assert tools_found[0]["name"] == "get_weather"
+    args = json.loads("".join(tools_found[0]["args_fragments"]))
+    assert args == {"city": "Beijing"}
+
+
+@pytest.mark.parametrize("stream_interval", [1, 2, 5])
+def test_stream_interval_content_between_tool_calls(
+    glm4_moe_tokenizer, stream_interval
+):
+    """Content between tool calls must be emitted, not silently dropped."""
+    tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            ),
+        ),
+    ]
+    parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "Checking Beijing.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Beijing</arg_value>"
+        "</tool_call>"
+        "\nAlso Shanghai.\n"
+        "<tool_call>get_weather\n"
+        "<arg_key>city</arg_key>"
+        "<arg_value>Shanghai</arg_value>"
+        "</tool_call>"
+    )
+
+    deltas = _simulate_streaming(
+        glm4_moe_tokenizer, parser, request, text, stream_interval
+    )
+    content_parts, tools_found = _collect_from_deltas(deltas)
+
+    full_content = "".join(content_parts)
+    # Both prefix and inter-tool-call content must appear
+    assert "Checking Beijing" in full_content
+    assert "Also Shanghai" in full_content
+
+    # Both tool calls must be correct
+    assert 0 in tools_found and 1 in tools_found
+    args0 = json.loads("".join(tools_found[0]["args_fragments"]))
+    args1 = json.loads("".join(tools_found[1]["args_fragments"]))
+    assert args0 == {"city": "Beijing"}
+    assert args1 == {"city": "Shanghai"}
+
+
+# ── FunctionTool (Responses API) tests ──────────────────────────────
+
+
+@pytest.fixture
+def function_tools():
+    return [
+        FunctionTool(
+            type="function",
+            name="get_weather",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "city": {"type": "string"},
+                    "unit": {"type": "string"},
+                },
+            },
+        ),
+        FunctionTool(
+            type="function",
+            name="calculate",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "operation": {"type": "string"},
+                    "a": {"type": "number"},
+                    "b": {"type": "number"},
+                },
+            },
+        ),
+    ]
+
+
+@pytest.fixture
+def glm4_moe_parser_function_tools(glm4_moe_tokenizer, function_tools):
+    return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=function_tools)
+
+
+@pytest.fixture
+def mock_request_function_tools(function_tools) -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = function_tools
+    return request
+
+
+def test_extract_tool_calls_with_function_tool(
+    glm4_moe_parser_function_tools, mock_request_function_tools
+):
+    model_output = """<tool_call>get_weather
+<arg_key>city</arg_key>
+<arg_value>Dallas</arg_value>
+<arg_key>unit</arg_key>
+<arg_value>fahrenheit</arg_value>
+</tool_call>"""
+
+    extracted = glm4_moe_parser_function_tools.extract_tool_calls(
+        model_output, request=mock_request_function_tools
+    )
+    assert extracted.tools_called
+    assert len(extracted.tool_calls) == 1
+    assert extracted.tool_calls[0].function.name == "get_weather"
+    args = json.loads(extracted.tool_calls[0].function.arguments)
+    assert args["city"] == "Dallas"
+    assert args["unit"] == "fahrenheit"
+
+
+def test_extract_tool_calls_with_function_tool_mixed_types(
+    glm4_moe_parser_function_tools, mock_request_function_tools
+):
+    model_output = """<tool_call>calculate
+<arg_key>operation</arg_key>
+<arg_value>add</arg_value>
+<arg_key>a</arg_key>
+<arg_value>42</arg_value>
+<arg_key>b</arg_key>
+<arg_value>3.14</arg_value>
+</tool_call>"""
+
+    extracted = glm4_moe_parser_function_tools.extract_tool_calls(
+        model_output, request=mock_request_function_tools
+    )
+    assert extracted.tools_called
+    args = json.loads(extracted.tool_calls[0].function.arguments)
+    assert args["operation"] == "add"
+    assert isinstance(args["a"], (int, float))
+    assert isinstance(args["b"], float)
+
+
+def test_streaming_with_function_tool(
+    glm4_moe_parser_function_tools, mock_request_function_tools
+):
+    _reset_streaming_state(glm4_moe_parser_function_tools)
+
+    chunks = [
+        "<tool_call>get_weather\n",
+        "<arg_key>city</arg_key>",
+        "<arg_value>Bei",
+        "jing",
+        "</arg_value>",
+        "</tool_call>",
+    ]
+
+    current_text = ""
+    for chunk in chunks:
+        current_text += chunk
+        glm4_moe_parser_function_tools.extract_tool_calls_streaming(
+            previous_text="",
+            current_text=current_text,
+            delta_text=chunk,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            request=mock_request_function_tools,
+        )
+
+    assert len(glm4_moe_parser_function_tools.prev_tool_call_arr) == 1
+    args = json.loads(glm4_moe_parser_function_tools.prev_tool_call_arr[0]["arguments"])
+    assert args["city"] == "Beijing"
diff --git a/tests/tool_parsers/test_hy_v3_tool_parser.py b/tests/tool_parsers/test_hy_v3_tool_parser.py
new file mode 100644
index 000000000000..b5aaaf52988d
--- /dev/null
+++ b/tests/tool_parsers/test_hy_v3_tool_parser.py
@@ -0,0 +1,274 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+"""Tests for the HYV3 tool call parser."""
+
+import json
+from unittest.mock import Mock
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+    FunctionDefinition,
+)
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.tokenizers import get_tokenizer
+from vllm.tool_parsers.hy_v3_tool_parser import HYV3ToolParser
+
+parser_name = "hy_v3"
+MODEL = "tencent/Hy3-preview"
+
+
+@pytest.fixture(scope="module")
+def hy_v3_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
+
+
+@pytest.fixture
+def hy_v3_tool_parser(hy_v3_tokenizer):
+    return HYV3ToolParser(hy_v3_tokenizer)
+
+
+@pytest.fixture
+def mock_request() -> ChatCompletionRequest:
+    request = Mock(spec=ChatCompletionRequest)
+    request.tools = [
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(name="get_current_date", parameters={}),
+        ),
+        ChatCompletionToolsParam(
+            function=FunctionDefinition(
+                name="get_weather",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string"},
+                        "date": {"type": "string"},
+                    },
+                },
+            ),
+        ),
+    ]
+    request.tool_choice = "auto"
+    return request
+
+
+class TestHYV3ExtractToolCalls:
+    def test_no_tool_call(self, hy_v3_tool_parser, mock_request):
+        out = "This is a plain response."
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert not r.tools_called
+        assert r.content == out
+
+    def test_zero_arg_inline(self, hy_v3_tool_parser, mock_request):
+        out = (
+            "<tool_calls><tool_call>get_current_date<tool_sep></tool_call></tool_calls>"
+        )
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.tool_calls[0].function.name == "get_current_date"
+        assert json.loads(r.tool_calls[0].function.arguments) == {}
+        assert r.content is None
+
+    def test_zero_arg_newline(self, hy_v3_tool_parser, mock_request):
+        out = "<tool_calls>\n<tool_call>get_current_date<tool_sep>\n</tool_call>\n</tool_calls>"
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.tool_calls[0].function.name == "get_current_date"
+
+    def test_args_same_line(self, hy_v3_tool_parser, mock_request):
+        out = (
+            "<tool_calls><tool_call>get_weather<tool_sep><arg_key>city</arg_key><arg_value>Beijing"
+            "</arg_value><arg_key>date</arg_key><arg_value>2026-03-30</arg_value></tool_call></tool_calls>"
+        )
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert json.loads(r.tool_calls[0].function.arguments) == {
+            "city": "Beijing",
+            "date": "2026-03-30",
+        }
+
+    def test_args_with_newlines(self, hy_v3_tool_parser, mock_request):
+        out = (
+            "<tool_calls>\n<tool_call>get_weather<tool_sep>\n<arg_key>city</arg_key>\n<arg_value>Beijing"
+            "</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2026-03-30</arg_value>\n</tool_call>\n</tool_calls>"
+        )
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert json.loads(r.tool_calls[0].function.arguments) == {
+            "city": "Beijing",
+            "date": "2026-03-30",
+        }
+
+    def test_content_before(self, hy_v3_tool_parser, mock_request):
+        out = "Checking.<tool_calls>\n<tool_call>get_current_date<tool_sep>\n</tool_call>\n</tool_calls>"
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.tools_called
+        assert r.content == "Checking."
+
+    def test_multiple(self, hy_v3_tool_parser, mock_request):
+        out = (
+            "<tool_calls>\n<tool_call>get_weather<tool_sep>\n<arg_key>city</arg_key>\n<arg_value>Beijing"
+            "</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2026-03-30</arg_value>\n</tool_call>\n"
+            "<tool_call>get_weather<tool_sep>\n<arg_key>city</arg_key>\n<arg_value>Hangzhou</arg_value>\n"
+            "<arg_key>date</arg_key>\n<arg_value>2026-03-30</arg_value>\n</tool_call>\n</tool_calls>"
+        )
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert len(r.tool_calls) == 2
+
+    def test_empty_content_none(self, hy_v3_tool_parser, mock_request):
+        out = "<tool_calls>\n<tool_call>get_current_date<tool_sep>\n</tool_call>\n</tool_calls>"
+        r = hy_v3_tool_parser.extract_tool_calls(out, request=mock_request)
+        assert r.content is None
+
+
+def _simulate_streaming(
+    parser: HYV3ToolParser,
+    deltas: list[str],
+    request: ChatCompletionRequest,
+) -> list[DeltaMessage | None]:
+    results: list[DeltaMessage | None] = []
+    previous_text = ""
+    previous_token_ids: list[int] = []
+    vocab = parser.vocab
+    for delta_text in deltas:
+        current_text = previous_text + delta_text
+        delta_token_ids = [tid for tok, tid in vocab.items() if tok in delta_text]
+        current_token_ids = previous_token_ids + delta_token_ids
+        result = parser.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=previous_token_ids,
+            current_token_ids=current_token_ids,
+            delta_token_ids=delta_token_ids,
+            request=request,
+        )
+        results.append(result)
+        previous_text = current_text
+        previous_token_ids = current_token_ids
+    return results
+
+
+def _collect_streaming_tool_calls(results: list[DeltaMessage | None]) -> list[dict]:
+    tool_calls: dict[int, dict] = {}
+    for result in results:
+        if result is None or not result.tool_calls:
+            continue
+        for tc in result.tool_calls:
+            idx = tc.index
+            if idx not in tool_calls:
+                tool_calls[idx] = {
+                    "name": tc.function.name or "",
+                    "arguments": tc.function.arguments or "",
+                }
+            else:
+                if tc.function.name:
+                    tool_calls[idx]["name"] += tc.function.name
+                if tc.function.arguments:
+                    tool_calls[idx]["arguments"] += tc.function.arguments
+    return [tool_calls[i] for i in sorted(tool_calls.keys())]
+
+
+def _collect_streaming_content(results: list[DeltaMessage | None]) -> str:
+    parts = []
+    for result in results:
+        if result is not None and result.content:
+            parts.append(result.content)
+    return "".join(parts)
+
+
+class TestHYV3ExtractToolCallsStreaming:
+    def test_no_tool_call_streaming(self, hy_v3_tool_parser, mock_request):
+        deltas = ["This is ", "a plain ", "response."]
+        results = _simulate_streaming(hy_v3_tool_parser, deltas, mock_request)
+        content = _collect_streaming_content(results)
+        assert content == "This is a plain response."
+        assert len(_collect_streaming_tool_calls(results)) == 0
+
+    def test_zero_arg_streaming(self, hy_v3_tool_parser, mock_request):
+        deltas = [
+            "<tool_calls>",
+            "\n<tool_call>",
+            "get_current_date",
+            "<tool_sep>",
+            "\n</tool_call>",
+            "\n</tool_calls>",
+        ]
+        results = _simulate_streaming(hy_v3_tool_parser, deltas, mock_request)
+        tc = _collect_streaming_tool_calls(results)
+        assert len(tc) == 1
+        assert tc[0]["name"] == "get_current_date"
+        assert json.loads(tc[0]["arguments"]) == {}
+
+    def test_args_streaming(self, hy_v3_tool_parser, mock_request):
+        deltas = [
+            "<tool_calls>",
+            "\n<tool_call>",
+            "get_weather",
+            "<tool_sep>",
+            "\n<arg_key>city</arg_key>",
+            "\n<arg_value>Beijing</arg_value>",
+            "\n<arg_key>date</arg_key>",
+            "\n<arg_value>2026-03-30</arg_value>",
+            "\n</tool_call>",
+            "\n</tool_calls>",
+        ]
+        results = _simulate_streaming(hy_v3_tool_parser, deltas, mock_request)
+        tc = _collect_streaming_tool_calls(results)
+        assert len(tc) == 1 and tc[0]["name"] == "get_weather"
+        assert json.loads(tc[0]["arguments"]) == {
+            "city": "Beijing",
+            "date": "2026-03-30",
+        }
+
+    def test_content_before_streaming(self, hy_v3_tool_parser, mock_request):
+        deltas = [
+            "Checking.",
+            "<tool_calls>",
+            "\n<tool_call>",
+            "get_current_date",
+            "<tool_sep>",
+            "\n</tool_call>",
+            "\n</tool_calls>",
+        ]
+        results = _simulate_streaming(hy_v3_tool_parser, deltas, mock_request)
+        assert "Checking." in _collect_streaming_content(results)
+        tc = _collect_streaming_tool_calls(results)
+        assert len(tc) == 1 and tc[0]["name"] == "get_current_date"
+
+    def test_multiple_streaming(self, hy_v3_tool_parser, mock_request):
+        deltas = [
+            "<tool_calls>",
+            "\n<tool_call>",
+            "get_weather",
+            "<tool_sep>",
+            "\n<arg_key>city</arg_key>",
+            "\n<arg_value>Beijing</arg_value>",
+            "\n<arg_key>date</arg_key>",
+            "\n<arg_value>2026-03-30</arg_value>",
+            "\n</tool_call>",
+            "\n<tool_call>",
+            "get_weather",
+            "<tool_sep>",
+            "\n<arg_key>city</arg_key>",
+            "\n<arg_value>Hangzhou</arg_value>",
+            "\n<arg_key>date</arg_key>",
+            "\n<arg_value>2026-03-30</arg_value>",
+            "\n</tool_call>",
+            "\n</tool_calls>",
+        ]
+        results = _simulate_streaming(hy_v3_tool_parser, deltas, mock_request)
+        tc = _collect_streaming_tool_calls(results)
+        assert len(tc) == 2
+        assert json.loads(tc[0]["arguments"])["city"] == "Beijing"
+        assert json.loads(tc[1]["arguments"])["city"] == "Hangzhou"
+
+    def test_all_in_one_delta_streaming(self, hy_v3_tool_parser, mock_request):
+        out = "<tool_calls>\n<tool_call>get_current_date<tool_sep>\n</tool_call>\n</tool_calls>"
+        results = _simulate_streaming(hy_v3_tool_parser, [out], mock_request)
+        tc = _collect_streaming_tool_calls(results)
+        assert len(tc) == 1 and tc[0]["name"] == "get_current_date"
+        assert json.loads(tc[0]["arguments"]) == {}
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 21b3d5adfde1..b56032b91c17 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -3,14 +3,20 @@
 # ruff: noqa: E501
 
 import json
+from unittest.mock import MagicMock
 
 import pytest
 
-from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
+from tests.tool_parsers.utils import (
+    run_tool_extraction,
+    run_tool_extraction_streaming,
+)
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
 from vllm.tokenizers import get_tokenizer
 from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
 
-# Use a common model that is likely to be available
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
 
@@ -20,906 +26,557 @@ def kimi_k2_tokenizer():
 
 
 @pytest.fixture
-def kimi_k2_tool_parser(kimi_k2_tokenizer):
+def parser(kimi_k2_tokenizer):
     return KimiK2ToolParser(kimi_k2_tokenizer)
 
 
-def assert_tool_calls(
-    actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall]
-):
-    assert len(actual_tool_calls) == len(expected_tool_calls)
+SECTION_BEGIN = "<|tool_calls_section_begin|>"
+SECTION_END = "<|tool_calls_section_end|>"
+TOOL_BEGIN = "<|tool_call_begin|>"
+TOOL_END = "<|tool_call_end|>"
+ARG_BEGIN = "<|tool_call_argument_begin|>"
+
+
+def _tool(tool_id: str, args: str) -> str:
+    return f"{TOOL_BEGIN}{tool_id} {ARG_BEGIN}{args}{TOOL_END}"
 
-    for actual_tool_call, expected_tool_call in zip(
-        actual_tool_calls, expected_tool_calls
-    ):
-        assert actual_tool_call.type == "function"
-        assert actual_tool_call.function == expected_tool_call.function
-
-        # assert tool call id format: should contain function name and numeric index
-        # Format can be either "functions.func_name:0" or "func_name:0"
-        assert actual_tool_call.id.split(":")[-1].isdigit()
-        assert (
-            actual_tool_call.id.split(":")[0].split(".")[-1]
-            == expected_tool_call.function.name
-        )
 
+def _wrap(*tool_strs: str) -> str:
+    return SECTION_BEGIN + "".join(tool_strs) + SECTION_END
 
-def run_streaming_sequence(parser, deltas):
-    """Helper to simulate a streaming sequence and return results."""
-    previous_text = ""
-    previous_token_ids: list[int] = []
-    results = []
-
-    for delta_text, delta_token_ids in deltas:
-        current_text = previous_text + delta_text
-        current_token_ids = previous_token_ids + delta_token_ids
-
-        result = parser.extract_tool_calls_streaming(
-            previous_text=previous_text,
-            current_text=current_text,
-            delta_text=delta_text,
-            previous_token_ids=previous_token_ids,
-            current_token_ids=current_token_ids,
-            delta_token_ids=delta_token_ids,
-            request=None,
+
+class TestExtractToolCalls:
+    def test_no_tools(self, parser):
+        content, tool_calls = run_tool_extraction(
+            parser, "This is a test", streaming=False
         )
-        results.append(result)
-
-        previous_text = current_text
-        previous_token_ids = current_token_ids
-
-    return results
-
-
-def test_extract_tool_calls_no_tools(kimi_k2_tool_parser):
-    model_output = "This is a test"
-    extracted_tool_calls = kimi_k2_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-    assert not extracted_tool_calls.tools_called
-    assert extracted_tool_calls.tool_calls == []
-    assert extracted_tool_calls.content == model_output
-
-
-@pytest.mark.parametrize(
-    ids=[
-        "tool_call_with_content_before",
-        "multi_tool_call_with_content_before",
-        "concatenated_tool_calls_bug_fix",
-        "three_concatenated_tool_calls",
-        "mixed_spacing_tool_calls",
-        "angle_brackets_in_json",
-        "newlines_in_json",
-    ],
-    argnames=["model_output", "expected_tool_calls", "expected_content"],
-    argvalues=[
-        (
-            """I'll help you check the weather. <|tool_calls_section_begin|> <|tool_call_begin|>
-functions.get_weather:0 <|tool_call_argument_begin|> {"city": "Beijing"} <|tool_call_end|> <|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.get_weather:0",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments=json.dumps(
-                            {
-                                "city": "Beijing",
-                            },
-                        ),
-                    ),
-                    type="function",
-                )
-            ],
-            "I'll help you check the weather. ",
-        ),
-        (
-            """I'll help you check the weather. <|tool_calls_section_begin|> <|tool_call_begin|>
-functions.get_weather:0 <|tool_call_argument_begin|> {"city": "Beijing"} <|tool_call_end|> <|tool_call_begin|>
-functions.get_weather:1 <|tool_call_argument_begin|> {"city": "Shanghai"} <|tool_call_end|> <|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.get_weather:0",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments=json.dumps(
-                            {
-                                "city": "Beijing",
-                            },
-                        ),
-                    ),
-                    type="function",
-                ),
-                ToolCall(
-                    id="functions.get_weather:1",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments=json.dumps(
-                            {
-                                "city": "Shanghai",
-                            },
-                        ),
-                    ),
-                    type="function",
-                ),
-            ],
-            "I'll help you check the weather. ",
-        ),
-        (
-            """I'll get the weather and news for LA today. First, let me get the weather using Los Angeles coordinates, and then get the latest news. <|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{"latitude": 34.0522, "longitude": -118.2437}<|tool_call_end|><|tool_call_begin|>functions.get_news:1<|tool_call_argument_begin|>{"content": "Los Angeles today"}<|tool_call_end|><|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.get_weather:0",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments=json.dumps(
-                            {"latitude": 34.0522, "longitude": -118.2437}
-                        ),
-                    ),
-                    type="function",
-                ),
-                ToolCall(
-                    id="functions.get_news:1",
-                    function=FunctionCall(
-                        name="get_news",
-                        arguments=json.dumps({"content": "Los Angeles today"}),
-                    ),
-                    type="function",
-                ),
-            ],
-            "I'll get the weather and news for LA today. First, let me get the weather using Los Angeles coordinates, and then get the latest news. ",
-        ),
-        (
-            """I'll help you with multiple tasks. <|tool_calls_section_begin|><|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{"city": "New York"}<|tool_call_end|><|tool_call_begin|>functions.get_news:1<|tool_call_argument_begin|>{"topic": "technology"}<|tool_call_end|><|tool_call_begin|>functions.send_email:2<|tool_call_argument_begin|>{"to": "user@example.com", "subject": "Daily Update"}<|tool_call_end|><|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.get_weather:0",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments=json.dumps({"city": "New York"}),
-                    ),
-                    type="function",
+        assert content == "This is a test"
+        assert tool_calls == []
+
+    @pytest.mark.parametrize(
+        "model_output, expected_names, expected_args_list, expected_content",
+        [
+            pytest.param(
+                "I'll check. "
+                + _wrap(_tool("functions.get_weather:0", '{"city": "Beijing"}')),
+                ["get_weather"],
+                [{"city": "Beijing"}],
+                "I'll check. ",
+                id="single_tool_call",
+            ),
+            pytest.param(
+                "Compare weather. "
+                + _wrap(
+                    _tool("functions.get_weather:0", '{"city": "Beijing"}'),
+                    _tool("functions.get_weather:1", '{"city": "Shanghai"}'),
                 ),
-                ToolCall(
-                    id="functions.get_news:1",
-                    function=FunctionCall(
-                        name="get_news",
-                        arguments=json.dumps({"topic": "technology"}),
+                ["get_weather", "get_weather"],
+                [{"city": "Beijing"}, {"city": "Shanghai"}],
+                "Compare weather. ",
+                id="parallel_tool_calls",
+            ),
+            pytest.param(
+                "Multiple tasks. "
+                + _wrap(
+                    _tool("functions.get_weather:0", '{"city": "New York"}'),
+                    _tool("functions.get_news:1", '{"topic": "technology"}'),
+                    _tool(
+                        "functions.send_email:2",
+                        '{"to": "user@example.com", "subject": "Daily Update"}',
                     ),
-                    type="function",
                 ),
-                ToolCall(
-                    id="functions.send_email:2",
-                    function=FunctionCall(
-                        name="send_email",
-                        arguments=json.dumps(
-                            {"to": "user@example.com", "subject": "Daily Update"}
-                        ),
-                    ),
-                    type="function",
+                ["get_weather", "get_news", "send_email"],
+                [
+                    {"city": "New York"},
+                    {"topic": "technology"},
+                    {"to": "user@example.com", "subject": "Daily Update"},
+                ],
+                "Multiple tasks. ",
+                id="three_tool_calls",
+            ),
+            pytest.param(
+                "Process HTML. "
+                + _wrap(
+                    _tool("functions.process_html:0", '{"html": "<div>content</div>"}')
                 ),
-            ],
-            "I'll help you with multiple tasks. ",
-        ),
-        (
-            """Mixed spacing test. <|tool_calls_section_begin|> <|tool_call_begin|> functions.test:0 <|tool_call_argument_begin|> {} <|tool_call_end|><|tool_call_begin|>functions.test2:1<|tool_call_argument_begin|>{}<|tool_call_end|> <|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.test:0",
-                    function=FunctionCall(
-                        name="test",
-                        arguments=json.dumps({}),
-                    ),
-                    type="function",
-                ),
-                ToolCall(
-                    id="functions.test2:1",
-                    function=FunctionCall(
-                        name="test2",
-                        arguments=json.dumps({}),
-                    ),
-                    type="function",
+                ["process_html"],
+                [{"html": "<div>content</div>"}],
+                "Process HTML. ",
+                id="angle_brackets_in_json",
+            ),
+            pytest.param(
+                "Formatted. "
+                + _wrap(
+                    _tool(
+                        "functions.process_data:0",
+                        '{\n  "name": "test",\n  "value": 123\n}',
+                    )
                 ),
-            ],
-            "Mixed spacing test. ",
-        ),
-        (
-            """I need to process HTML content. <|tool_calls_section_begin|><|tool_call_begin|>functions.process_html:0<|tool_call_argument_begin|>{"html": "<div>content</div>", "text": "normal text"}<|tool_call_end|><|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.process_html:0",
-                    function=FunctionCall(
-                        name="process_html",
-                        arguments=json.dumps(
-                            {"html": "<div>content</div>", "text": "normal text"}
-                        ),
-                    ),
-                    type="function",
-                )
-            ],
-            "I need to process HTML content. ",
-        ),
-        (
-            """I need to process formatted JSON. <|tool_calls_section_begin|><|tool_call_begin|>functions.process_data:0<|tool_call_argument_begin|>{
-  "name": "test",
-  "value": 123,
-  "nested": {
-    "key": "value"
-  }
-}<|tool_call_end|><|tool_calls_section_end|>""",
-            [
-                ToolCall(
-                    id="functions.process_data:0",
-                    function=FunctionCall(
-                        name="process_data",
-                        arguments=json.dumps(
-                            {"name": "test", "value": 123, "nested": {"key": "value"}},
-                            indent=2,
-                        ),
-                    ),
-                    type="function",
-                )
-            ],
-            "I need to process formatted JSON. ",
-        ),
-    ],
-)
-def test_extract_tool_calls(
-    kimi_k2_tool_parser, model_output, expected_tool_calls, expected_content
-):
-    extracted_tool_calls = kimi_k2_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-    assert extracted_tool_calls.tools_called
-
-    assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
-
-    assert extracted_tool_calls.content == expected_content
-
-
-def test_extract_tool_calls_invalid_json(kimi_k2_tool_parser):
-    """we'll return every funcall result"""
-    model_output = """I'll help you check the weather. <|tool_calls_section_begin|> <|tool_call_begin|>
-functions.invalid_get_weather:0 <|tool_call_argument_begin|> {"city": "Beijing" <|tool_call_end|> <|tool_call_begin|>
-functions.valid_get_weather:1 <|tool_call_argument_begin|> {"city": "Shanghai"} <|tool_call_end|> <|tool_calls_section_end|>"""
-
-    extracted_tool_calls = kimi_k2_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-
-    assert extracted_tool_calls.tools_called
-    # Should extract only the valid JSON tool calls
-    assert len(extracted_tool_calls.tool_calls) == 2
-    assert extracted_tool_calls.tool_calls[0].function.name == "invalid_get_weather"
-    assert extracted_tool_calls.tool_calls[1].function.name == "valid_get_weather"
-
-
-def test_extract_tool_calls_invalid_funcall(kimi_k2_tool_parser):
-    """we'll return every funcall result"""
-    model_output = """I'll help you check the weather. <|tool_calls_section_begin|> <|tool_call_begin|>
-functions.invalid_get_weather.0 <|tool_call_argument_begin|> {"city": "Beijing"} <|tool_call_end|> <|tool_call_begin|>
-functions.valid_get_weather:1 <|tool_call_argument_begin|> {"city": "Shanghai"} <|tool_call_end|> <|tool_calls_section_end|>"""
-
-    extracted_tool_calls = kimi_k2_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-
-    assert extracted_tool_calls.tools_called
-    # Should extract only the valid JSON tool calls
-    assert len(extracted_tool_calls.tool_calls) == 1
-    assert extracted_tool_calls.tool_calls[0].function.name == "valid_get_weather"
-
-
-def test_streaming_basic_functionality(kimi_k2_tool_parser):
-    """Test basic streaming functionality."""
-    # Reset streaming state
-    kimi_k2_tool_parser.current_tool_name_sent = False
-    kimi_k2_tool_parser.prev_tool_call_arr = []
-    kimi_k2_tool_parser.current_tool_id = -1
-    kimi_k2_tool_parser.streamed_args_for_tool = []
-
-    # Test with a simple tool call
-    current_text = """ check the weather. <|tool_calls_section_begin|> <|tool_call_begin|>
-functions.get_weather:0 <|tool_call_argument_begin|> {"city": "Beijing"} <|tool_call_end|> <|tool_calls_section_end|>"""
-
-    # First call should handle the initial setup
-    result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="I'll help you",
-        current_text=current_text,
-        delta_text="<|tool_calls_section_end|>",
-        previous_token_ids=[],
-        current_token_ids=[],
-        delta_token_ids=[],
-        request=None,
-    )
-
-    # The result might be None or contain tool call information
-    # This depends on the internal state management
-    if result is not None and hasattr(result, "tool_calls") and result.tool_calls:
-        assert len(result.tool_calls) >= 0
-
-
-def test_streaming_no_tool_calls(kimi_k2_tool_parser):
-    """Test streaming when there are no tool calls."""
-    current_text = "This is just regular text without any tool calls."
-
-    result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="This is just regular text",
-        current_text=current_text,
-        delta_text=" without any tool calls.",
-        previous_token_ids=[],
-        current_token_ids=[],
-        delta_token_ids=[],
-        request=None,
-    )
-
-    # Should return the delta text as content
-    assert result is not None
-    assert hasattr(result, "content")
-    assert result.content == " without any tool calls."
-
-
-def test_token_leak_between_section_and_tool_begin(kimi_k2_tool_parser):
-    """
-    Test that text between <|tool_calls_section_begin|> and <|tool_call_begin|>
-    is suppressed and does not leak into reasoning_delta.
-    This is the main vulnerability being fixed.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    # Get token IDs for the markers
-    section_begin_token_id = kimi_k2_tool_parser.vocab.get(
-        "<|tool_calls_section_begin|>"
-    )
-    tool_call_begin_token_id = kimi_k2_tool_parser.vocab.get("<|tool_call_begin|>")
-
-    # Simulate streaming sequence:
-    deltas = [
-        ("I'll help you with that. ", [1, 2, 3]),
-        ("<|tool_calls_section_begin|>", [section_begin_token_id]),
-        (" spurious text ", [4, 5]),
-        ("<|tool_call_begin|>", [tool_call_begin_token_id]),
-    ]
-
-    results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
-
-    # Delta 1: "I'll help you with that. "
-    assert results[0] is not None
-    assert results[0].content == "I'll help you with that. "
-
-    # Delta 2: "<|tool_calls_section_begin|>"
-    # Section marker should be stripped and suppressed
-    assert results[1] is None or (
-        results[1].content is None or results[1].content == ""
-    )
-
-    # Delta 3: " spurious text or tokens " (THE LEAK SCENARIO)
-    # CRITICAL: This text should be suppressed, NOT returned as reasoning_delta
-    assert results[2] is None or (
-        results[2].content is None or results[2].content == ""
+                ["process_data"],
+                [{"name": "test", "value": 123}],
+                "Formatted. ",
+                id="multiline_json",
+            ),
+            pytest.param(
+                "No prefix. " + _wrap(_tool("get_weather:0", '{"city": "Tokyo"}')),
+                ["get_weather"],
+                [{"city": "Tokyo"}],
+                "No prefix. ",
+                id="no_functions_prefix",
+            ),
+            pytest.param(
+                "Empty args. " + _wrap(_tool("functions.test:0", "{}")),
+                ["test"],
+                [{}],
+                "Empty args. ",
+                id="empty_arguments",
+            ),
+        ],
     )
-
-    # Delta 4: "<|tool_call_begin|>..."
-    # Now we're in tool call mode, result depends on internal state
-    # The key is that the spurious text from Delta 3 was not leaked
-
-
-def test_split_markers_across_deltas(kimi_k2_tool_parser):
-    """
-    Test that markers split across delta chunks are correctly detected
-    via the rolling buffer mechanism.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_token_id = kimi_k2_tool_parser.vocab.get(
-        "<|tool_calls_section_begin|>"
-    )
-
-    # Delta 1: partial token, Delta 2: complete marker
-    deltas = [
-        ("<|tool_calls_sec", [3]),
-        ("tion_begin|> ", [section_begin_token_id, 4]),
-    ]
-
-    _results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
-
-    # Now the complete marker should be detected via buffer
-    assert kimi_k2_tool_parser.in_tool_section is True
-
-
-def test_marker_variants(kimi_k2_tool_parser):
-    """Test that both singular and plural marker variants are recognized."""
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    # Test singular variant: <|tool_call_section_begin|> (note: singular "call")
-    singular_token_id = kimi_k2_tool_parser.vocab.get("<|tool_call_section_begin|>")
-
-    if singular_token_id is not None:  # Only test if tokenizer supports it
-        _result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-            previous_text="Reasoning ",
-            current_text="Reasoning <|tool_call_section_begin|>",
-            delta_text="<|tool_call_section_begin|>",
-            previous_token_ids=[1, 2],
-            current_token_ids=[1, 2, singular_token_id],
-            delta_token_ids=[singular_token_id],
-            request=None,
+    def test_extract_tool_calls(
+        self, parser, model_output, expected_names, expected_args_list, expected_content
+    ):
+        content, tool_calls = run_tool_extraction(parser, model_output, streaming=False)
+        assert content == expected_content
+        assert len(tool_calls) == len(expected_names)
+        for tc, name, expected_args in zip(
+            tool_calls, expected_names, expected_args_list
+        ):
+            assert tc.type == "function"
+            assert tc.function.name == name
+            assert json.loads(tc.function.arguments) == expected_args
+            # id format: "something:digit"
+            assert tc.id.split(":")[-1].isdigit()
+
+    def test_invalid_json_still_extracted(self, parser):
+        """Tool calls with invalid JSON are still returned (arguments as-is)."""
+        model_output = (
+            "Help. "
+            + SECTION_BEGIN
+            + _tool("functions.bad:0", '{"city": "Beijing"')
+            + _tool("functions.good:1", '{"city": "Shanghai"}')
+            + SECTION_END
         )
-        # Should enter tool section mode with singular variant too
-        assert kimi_k2_tool_parser.in_tool_section is True
-
-
-def test_reentry_to_reasoning_after_tool_section(kimi_k2_tool_parser):
-    """
-    Test that after exiting a tool section with <|tool_calls_section_end|>,
-    subsequent text is correctly returned as reasoning content.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-
-    deltas = [
-        ("<|tool_calls_section_begin|>", [section_begin_id]),
-        ("<|tool_calls_section_end|>", [section_end_id]),
-        (" More reasoning", [10, 11]),
-    ]
-
-    results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
-
-    assert kimi_k2_tool_parser.in_tool_section is False
-    assert results[2] is not None
-    assert results[2].content == " More reasoning"
-
-
-def test_empty_tool_section(kimi_k2_tool_parser):
-    """Test an empty tool section (begin immediately followed by end)."""
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-
-    # Section begin
-    _result1 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Reasoning ",
-        current_text="Reasoning <|tool_calls_section_begin|>",
-        delta_text="<|tool_calls_section_begin|>",
-        previous_token_ids=[1],
-        current_token_ids=[1, section_begin_id],
-        delta_token_ids=[section_begin_id],
-        request=None,
-    )
-
-    # Immediate section end
-    _result2 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Reasoning <|tool_calls_section_begin|>",
-        current_text="Reasoning <|tool_calls_section_begin|><|tool_calls_section_end|>",
-        delta_text="<|tool_calls_section_end|>",
-        previous_token_ids=[1, section_begin_id],
-        current_token_ids=[1, section_begin_id, section_end_id],
-        delta_token_ids=[section_end_id],
-        request=None,
-    )
-    # Should exit cleanly without errors
-    assert kimi_k2_tool_parser.in_tool_section is False
-
-
-def test_malformed_tool_section_recovery(kimi_k2_tool_parser):
-    """
-    Test that the parser recovers from a malformed tool section
-    that never closes properly.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-
-    # Enter tool section
-    _result1 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="",
-        current_text="<|tool_calls_section_begin|>",
-        delta_text="<|tool_calls_section_begin|>",
-        previous_token_ids=[],
-        current_token_ids=[section_begin_id],
-        delta_token_ids=[section_begin_id],
-        request=None,
-    )
-    assert kimi_k2_tool_parser.in_tool_section is True
-
-    # Simulate a lot of text without proper tool calls or section end
-    # This should trigger the error recovery mechanism
-    large_text = "x" * 10000  # Exceeds max_section_chars
-
-    result2 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="<|tool_calls_section_begin|>",
-        current_text="<|tool_calls_section_begin|>" + large_text,
-        delta_text=large_text,
-        previous_token_ids=[section_begin_id],
-        current_token_ids=[section_begin_id] + list(range(100, 100 + len(large_text))),
-        delta_token_ids=list(range(100, 100 + len(large_text))),
-        request=None,
-    )
-
-    # Parser should have force-exited the tool section
-    assert kimi_k2_tool_parser.in_tool_section is False
-    # And returned the content as reasoning
-    assert result2 is not None
-    assert result2.content == large_text
-
-
-def test_state_reset(kimi_k2_tool_parser):
-    """Test that reset_streaming_state() properly clears all state."""
-    # Put parser in a complex state
-    kimi_k2_tool_parser.in_tool_section = True
-    kimi_k2_tool_parser.token_buffer = "some buffer"
-    kimi_k2_tool_parser.current_tool_id = 5
-    kimi_k2_tool_parser.prev_tool_call_arr = [{"id": "test"}]
-    kimi_k2_tool_parser.section_char_count = 1000
-
-    # Reset
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    # Verify all state is cleared
-    assert kimi_k2_tool_parser.in_tool_section is False
-    assert kimi_k2_tool_parser.token_buffer == ""
-    assert kimi_k2_tool_parser.current_tool_id == -1
-    assert kimi_k2_tool_parser.prev_tool_call_arr == []
-    assert kimi_k2_tool_parser.section_char_count == 0
-    assert kimi_k2_tool_parser.current_tool_name_sent is False
-    assert kimi_k2_tool_parser.streamed_args_for_tool == []
-
-
-def test_section_begin_noise_tool_begin_same_chunk(kimi_k2_tool_parser):
-    """
-    Test that begin→noise→tool_begin within the SAME chunk suppresses
-    the noise text correctly (not just across chunks).
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    tool_call_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_call_begin|>")
-
-    # Single delta containing: section_begin + spurious text + tool_call_begin
-    combined_text = "<|tool_calls_section_begin|> noise text <|tool_call_begin|>"
-
-    result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Reasoning ",
-        current_text="Reasoning " + combined_text,
-        delta_text=combined_text,
-        previous_token_ids=[1, 2],
-        current_token_ids=[1, 2, section_begin_id, 3, 4, tool_call_begin_id],
-        delta_token_ids=[section_begin_id, 3, 4, tool_call_begin_id],
-        request=None,
-    )
-
-    # The noise text should NOT leak into content
-    # Result should either be None/empty or start tool call parsing
-    if result is not None and result.content is not None:
-        # If content is returned, it should not contain the noise
-        assert "noise text" not in result.content
-        assert result.content == "" or result.content.strip() == ""
-
-
-def test_stream_ends_without_section_end_marker(kimi_k2_tool_parser):
-    """
-    Test that if the stream ends (EOF) without a proper section end marker,
-    the parser doesn't leak text, doesn't crash, and resets state cleanly.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-
-    # Enter tool section
-    _result1 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="",
-        current_text="<|tool_calls_section_begin|>",
-        delta_text="<|tool_calls_section_begin|>",
-        previous_token_ids=[],
-        current_token_ids=[section_begin_id],
-        delta_token_ids=[section_begin_id],
-        request=None,
-    )
-    assert kimi_k2_tool_parser.in_tool_section is True
-
-    # Some content in tool section
-    result2 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="<|tool_calls_section_begin|>",
-        current_text="<|tool_calls_section_begin|> partial content",
-        delta_text=" partial content",
-        previous_token_ids=[section_begin_id],
-        current_token_ids=[section_begin_id, 10, 11],
-        delta_token_ids=[10, 11],
-        request=None,
-    )
-    # Content should be suppressed
-    assert result2.content == "" or result2.content is None
-
-    # Stream ends (EOF) - no more deltas, no section_end marker
-    # Simulate this by manually checking state and resetting
-    # (In real usage, the request handler would call reset_streaming_state)
-    assert kimi_k2_tool_parser.in_tool_section is True  # Still in section
-
-    # Reset state (as would happen between requests)
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    # Verify clean slate
-    assert kimi_k2_tool_parser.in_tool_section is False
-    assert kimi_k2_tool_parser.token_buffer == ""
-
-    # Next request should work normally
-    result3 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="",
-        current_text="New reasoning",
-        delta_text="New reasoning",
-        previous_token_ids=[],
-        current_token_ids=[20, 21],
-        delta_token_ids=[20, 21],
-        request=None,
-    )
-    assert result3 is not None
-    assert result3.content == "New reasoning"
-
-
-def test_same_chunk_begin_and_end_markers(kimi_k2_tool_parser):
-    """
-    CRITICAL TEST: Verify that when both section_begin and section_end
-    markers appear in the SAME chunk, the parser correctly:
-    1. Enters the tool section
-    2. Immediately exits the tool section
-    3. Does NOT get stuck in in_tool_section=True state
-
-    This tests the bug fix where elif was changed to if to handle
-    both state transitions in a single delta.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-
-    # Single chunk with both markers (e.g., empty tool section)
-    combined_delta = "<|tool_calls_section_begin|><|tool_calls_section_end|>"
-
-    result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Some reasoning ",
-        current_text="Some reasoning " + combined_delta,
-        delta_text=combined_delta,
-        previous_token_ids=[1, 2],
-        current_token_ids=[1, 2, section_begin_id, section_end_id],
-        delta_token_ids=[section_begin_id, section_end_id],
-        request=None,
-    )
-
-    # CRITICAL: Parser should NOT be stuck in tool section
-    assert kimi_k2_tool_parser.in_tool_section is False, (
-        "Parser stuck in tool section after processing both begin/end in same chunk. "
-        "This indicates the elif bug was not fixed."
-    )
-
-    # Result should be empty or contain only stripped content
-    assert result is not None
-    assert result.content == "" or result.content is None
-
-    # Verify subsequent content streams correctly (not suppressed)
-    result2 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Some reasoning " + combined_delta,
-        current_text="Some reasoning " + combined_delta + " More reasoning",
-        delta_text=" More reasoning",
-        previous_token_ids=[1, 2, section_begin_id, section_end_id],
-        current_token_ids=[1, 2, section_begin_id, section_end_id, 10, 11],
-        delta_token_ids=[10, 11],
-        request=None,
-    )
-
-    # This content should NOT be suppressed (we're out of tool section)
-    assert result2 is not None
-    assert result2.content == " More reasoning"
-
-
-def test_same_chunk_begin_content_end_markers(kimi_k2_tool_parser):
-    """
-    Test the same-chunk scenario with actual content between markers.
-    Example: <|tool_calls_section_begin|> text <|tool_calls_section_end|>
-    all arriving in one delta. The key is that the state machine correctly
-    transitions in and out within the same chunk.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-
-    # Chunk with begin, some whitespace/noise, and end all together
-    # This simulates a tool section that opens and closes in the same chunk
-    combined_delta = "<|tool_calls_section_begin|>   <|tool_calls_section_end|>"
-
-    _result = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Reasoning ",
-        current_text="Reasoning " + combined_delta,
-        delta_text=combined_delta,
-        previous_token_ids=[1],
-        current_token_ids=[1, section_begin_id, 100, section_end_id],
-        delta_token_ids=[section_begin_id, 100, section_end_id],
-        request=None,
-    )
+        content, tool_calls = run_tool_extraction(parser, model_output, streaming=False)
+        assert len(tool_calls) == 2
+        assert tool_calls[0].function.name == "bad"
+        assert tool_calls[1].function.name == "good"
+
+    def test_invalid_funcall_id_skipped(self, parser):
+        """Tool calls with malformed id (no colon+digit) are skipped."""
+        model_output = (
+            "Help. "
+            + SECTION_BEGIN
+            + _tool("functions.invalid.0", '{"city": "Beijing"}')
+            + _tool("functions.valid:1", '{"city": "Shanghai"}')
+            + SECTION_END
+        )
+        content, tool_calls = run_tool_extraction(parser, model_output, streaming=False)
+        assert len(tool_calls) == 1
+        assert tool_calls[0].function.name == "valid"
+
+    def test_native_id_extracted(self, parser):
+        """Regression: parser extracts native ID onto ToolCall (PR #32768)."""
+        model_output = "Checking weather. " + _wrap(
+            _tool("functions.get_weather:0", '{"city": "Tokyo"}')
+        )
+        content, tool_calls = run_tool_extraction(parser, model_output, streaming=False)
+        assert len(tool_calls) == 1
+        assert tool_calls[0].id == "functions.get_weather:0"
+        assert tool_calls[0].function.name == "get_weather"
+        assert json.loads(tool_calls[0].function.arguments) == {"city": "Tokyo"}
+
+    def test_multi_turn_native_id_continuity(self, kimi_k2_tokenizer):
+        """Regression: native IDs from turn 1 preserved across turns (PR #32768)."""
+        turn1_parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        turn1_output = "Let me check. " + _wrap(
+            _tool("functions.get_weather:0", '{"city": "Beijing"}')
+        )
+        _, turn1_tools = run_tool_extraction(
+            turn1_parser, turn1_output, streaming=False
+        )
+        assert len(turn1_tools) == 1
+        assert turn1_tools[0].id == "functions.get_weather:0"
 
-    # Parser should exit cleanly (not stuck in tool section)
-    assert kimi_k2_tool_parser.in_tool_section is False
-
-    # Verify the fix: next content should stream normally, not be suppressed
-    result2 = kimi_k2_tool_parser.extract_tool_calls_streaming(
-        previous_text="Reasoning " + combined_delta,
-        current_text="Reasoning " + combined_delta + " Done",
-        delta_text=" Done",
-        previous_token_ids=[1, section_begin_id, 100, section_end_id],
-        current_token_ids=[1, section_begin_id, 100, section_end_id, 200],
-        delta_token_ids=[200],
-        request=None,
-    )
+        # Fresh parser for turn 2
+        turn2_parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        turn2_output = "Now let me get news. " + _wrap(
+            _tool("functions.get_news:0", '{"topic": "weather in Beijing"}')
+        )
+        _, turn2_tools = run_tool_extraction(
+            turn2_parser, turn2_output, streaming=False
+        )
+        assert len(turn2_tools) == 1
+        assert turn2_tools[0].id == "functions.get_news:0"
 
-    # Content after section should be returned (not suppressed)
-    assert result2 is not None
-    assert result2.content == " Done"
 
+def _split_tool_output_to_deltas(
+    content: str, tool_strs: list[tuple[str, str]]
+) -> list[str]:
+    """Build a list of string deltas with special tokens as separate chunks.
 
-def test_tool_call_end_and_section_end_same_chunk(kimi_k2_tool_parser):
+    Args:
+        content: text before tool section
+        tool_strs: list of (tool_id, args_json)
     """
-    CRITICAL TEST (P1): Verify that when both <|tool_call_end|> and
-    <|tool_calls_section_end|> appear in the SAME chunk, the parser:
-    1. Processes the tool_call_end first (emits final arguments)
-    2. THEN exits the section
-    3. Does NOT drop the final tool call update
-    4. Does NOT leak special tokens into reasoning
-
-    This tests the deferred section exit fix.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-    tool_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_call_begin|>")
-    tool_end_id = kimi_k2_tool_parser.vocab.get("<|tool_call_end|>")
-
-    # Simulate a streaming sequence for a SHORT tool call (all in one chunk):
-    combined = (
-        '<|tool_call_begin|>get_weather:0 <|tool_call_argument_begin|> {"city": "Paris"} '
-        "<|tool_call_end|><|tool_calls_section_end|>"
-    )
-
-    deltas = [
-        ("Let me help. ", [1, 2]),
-        ("<|tool_calls_section_begin|>", [section_begin_id]),
-        (combined, [tool_begin_id, 10, 11, 12, tool_end_id, section_end_id]),
-        (" Done", [20]),
-    ]
-
-    results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
-
-    # CRITICAL: Parser should have exited section AFTER processing tool
-    assert kimi_k2_tool_parser.in_tool_section is False
-
-    # Tool call should have been emitted (not dropped)
-    if results[2] is not None and results[2].content is not None:
-        # Verify no special tokens leaked into content
-        assert "<|tool_call_end|>" not in results[2].content
-        assert "<|tool_calls_section_end|>" not in results[2].content
-
-    # Content after tool section should stream normally
-    assert results[3] is not None
-    assert results[3].content == " Done"
-
+    deltas = [content, SECTION_BEGIN]
+    for tool_id, args_json in tool_strs:
+        deltas.extend(
+            [
+                TOOL_BEGIN,
+                f"{tool_id} ",
+                ARG_BEGIN,
+                f"{args_json} ",
+                TOOL_END,
+            ]
+        )
+    deltas.append(SECTION_END)
+    return deltas
 
-def test_streaming_tool_call_markers_not_leaked(kimi_k2_tool_parser):
-    """
-    CRITICAL TEST: Verify that tool call markers (<|tool_call_begin|>,
-    <|tool_call_end|>, <|tool_call_argument_begin|>) are NOT leaked
-    into the content field during streaming.
 
-    This reproduces the AWS Bedrock bug where tool call markers appeared
-    in the 'text' field of responses.
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
-
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-    tool_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_call_begin|>")
-    tool_end_id = kimi_k2_tool_parser.vocab.get("<|tool_call_end|>")
-
-    # List of markers that should NEVER appear in content
-    forbidden_markers = [
-        "<|tool_call_begin|>",
-        "<|tool_call_end|>",
-        "<|tool_call_argument_begin|>",
-        "<|tool_calls_section_begin|>",
-        "<|tool_calls_section_end|>",
-    ]
-
-    all_content = []
-
-    # Steps: reasoning, section begin, tool call, section end, more reasoning
-    tool_chunk = (
-        "<|tool_call_begin|> functions.get_weather:0 "
-        '<|tool_call_argument_begin|> {"city": "Tokyo"} <|tool_call_end|>'
+class TestStreamingHappyPath:
+    def test_single_tool_call(self, parser):
+        """Verify DeltaToolCall output: name, id, arguments for one tool."""
+        deltas = _split_tool_output_to_deltas(
+            "I'll help. ",
+            [("functions.get_weather:0", '{"city": "Beijing"}')],
+        )
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert len(rec.tool_calls) == 1
+        tc = rec.tool_calls[0]
+        assert tc.function.name == "get_weather"
+        assert tc.id == "functions.get_weather:0"
+        assert json.loads(tc.function.arguments) == {"city": "Beijing"}
+
+    def test_multiple_tool_calls(self, parser):
+        """Two tool calls emitted with correct indices, names, arguments."""
+        deltas = _split_tool_output_to_deltas(
+            "Compare weather. ",
+            [
+                ("functions.get_weather:0", '{"city": "Tokyo"}'),
+                ("functions.get_weather:1", '{"city": "NYC"}'),
+            ],
+        )
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert len(rec.tool_calls) == 2
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert rec.tool_calls[0].id == "functions.get_weather:0"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Tokyo"}
+
+        assert rec.tool_calls[1].function.name == "get_weather"
+        assert rec.tool_calls[1].id == "functions.get_weather:1"
+        assert json.loads(rec.tool_calls[1].function.arguments) == {"city": "NYC"}
+
+    def test_content_before_tools(self, parser):
+        """Content before section is streamed; markers/args don't leak."""
+        deltas = _split_tool_output_to_deltas(
+            "I'll check the weather. ",
+            [("functions.get_weather:0", '{"city": "Tokyo"}')],
+        )
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert "check the weather" in rec.other_content
+        # No markers or tool content leaked
+        for marker in [SECTION_BEGIN, SECTION_END, TOOL_BEGIN, TOOL_END, ARG_BEGIN]:
+            assert marker not in rec.other_content
+        assert "get_weather" not in rec.other_content
+        assert "Tokyo" not in rec.other_content
+
+    def test_no_tool_calls(self, parser):
+        """Plain text streaming returns content only."""
+        deltas = ["This is just ", "regular text ", "without tools."]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert rec.other_content == "This is just regular text without tools."
+        assert rec.tool_calls == []
+
+    def test_incremental_arguments(self, parser):
+        """Arguments split across small chunks accumulate correctly."""
+        deltas = [
+            "Help. ",
+            SECTION_BEGIN,
+            TOOL_BEGIN,
+            "functions.get_weather:0 ",
+            ARG_BEGIN,
+            '{"ci',
+            'ty": "Be',
+            'ijing"}',
+            " ",
+            TOOL_END,
+            SECTION_END,
+        ]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
+
+    @pytest.mark.parametrize(
+        "model_output",
+        [
+            pytest.param(
+                "Single. "
+                + _wrap(_tool("functions.get_weather:0", '{"city": "Beijing"}')),
+                id="single_tool",
+            ),
+            pytest.param(
+                "Multi. "
+                + _wrap(
+                    _tool("functions.get_weather:0", '{"city": "Tokyo"}'),
+                    _tool("functions.get_news:1", '{"topic": "tech"}'),
+                ),
+                id="parallel_tools",
+            ),
+            pytest.param(
+                "No prefix id. " + _wrap(_tool("get_weather:0", '{"city": "NYC"}')),
+                id="no_functions_prefix",
+            ),
+        ],
     )
-    deltas = [
-        ("I'll check the weather. ", [1, 2, 3]),
-        ("<|tool_calls_section_begin|>", [section_begin_id]),
-        (tool_chunk, [tool_begin_id, 10, 11, tool_end_id]),
-        ("<|tool_calls_section_end|>", [section_end_id]),
-        (" Here's the result.", [20, 21]),
-    ]
-
-    results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
-
-    for res in results:
-        if res and res.content:
-            all_content.append(res.content)
-
-    # CRITICAL ASSERTIONS: No forbidden markers in any content
-    full_content = "".join(all_content)
-    for marker in forbidden_markers:
-        assert marker not in full_content, (
-            f"MARKER LEAK DETECTED: '{marker}' found in content. "
-            f"Full content: {repr(full_content)}"
+    def test_streaming_matches_nonstreaming(self, parser, model_output):
+        """Streaming reconstruction matches non-streaming extraction."""
+        content_non, tools_non = run_tool_extraction(
+            parser, model_output, streaming=False
+        )
+        content_stream, tools_stream = run_tool_extraction(
+            parser, model_output, streaming=True
         )
 
-    # Also check that tool call content (function name, arguments) is not leaked
-    assert "get_weather" not in full_content, (
-        f"TOOL CALL CONTENT LEAKED: 'get_weather' found in content. "
-        f"Full content: {repr(full_content)}"
-    )
-    assert "Tokyo" not in full_content, (
-        f"TOOL CALL CONTENT LEAKED: 'Tokyo' found in content. "
-        f"Full content: {repr(full_content)}"
-    )
+        assert len(tools_non) == len(tools_stream)
+        for tc_non, tc_stream in zip(tools_non, tools_stream):
+            assert tc_non.function.name == tc_stream.function.name
+            assert json.loads(tc_non.function.arguments) == json.loads(
+                tc_stream.function.arguments
+            )
 
-    # Verify that legitimate content was preserved
-    assert "I'll check the weather." in full_content or len(all_content) > 0
 
+class TestStreamingEdgeCases:
+    def test_marker_suppression(self, parser):
+        """No special-token markers appear in reconstructed content."""
+        deltas = _split_tool_output_to_deltas(
+            "I'll check. ",
+            [("functions.get_weather:0", '{"city": "Tokyo"}')],
+        )
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        forbidden = [SECTION_BEGIN, SECTION_END, TOOL_BEGIN, TOOL_END, ARG_BEGIN]
+        for marker in forbidden:
+            assert marker not in rec.other_content, (
+                f"Marker leaked: {marker!r} in {rec.other_content!r}"
+            )
+
+    def test_noise_between_markers_suppressed(self, parser):
+        """Text between section_begin and tool_call_begin doesn't leak."""
+        deltas = [
+            "Reasoning. ",
+            SECTION_BEGIN,
+            " spurious noise ",
+            TOOL_BEGIN,
+            "functions.test:0 ",
+            ARG_BEGIN,
+            '{"k": "v"} ',
+            TOOL_END,
+            SECTION_END,
+        ]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert "spurious" not in rec.other_content
+        assert "noise" not in rec.other_content
+
+    def test_empty_tool_section(self, parser):
+        """Empty section (begin immediately followed by end) doesn't crash."""
+        deltas = ["Reasoning. ", SECTION_BEGIN, SECTION_END]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert rec.tool_calls == []
+
+    def test_three_different_tools(self, parser):
+        """Three tool calls with different functions stream correctly."""
+        deltas = _split_tool_output_to_deltas(
+            "Multiple tasks. ",
+            [
+                ("functions.get_weather:0", '{"city": "NYC"}'),
+                ("functions.get_news:1", '{"topic": "tech"}'),
+                ("functions.send_email:2", '{"to": "a@b.com"}'),
+            ],
+        )
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        assert len(rec.tool_calls) == 3
+        names = [tc.function.name for tc in rec.tool_calls]
+        assert names == ["get_weather", "get_news", "send_email"]
+        ids = [tc.id for tc in rec.tool_calls]
+        assert len(set(ids)) == 3  # unique ids
+
+    def test_truncated_tool_call_no_end_marker(self, parser):
+        """Stream ending mid-tool-call (max_tokens) doesn't crash."""
+        deltas = [
+            "I'll check. ",
+            SECTION_BEGIN,
+            TOOL_BEGIN,
+            "functions.get_weather:0 ",
+            ARG_BEGIN,
+            '{"city": "Bei',
+            # Stream ends here — no TOOL_END, no SECTION_END
+        ]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        # Should not crash; tool name and partial args extracted
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert rec.tool_calls[0].id == "functions.get_weather:0"
+        assert rec.tool_calls[0].function.arguments == '{"city": "Bei'
+        # No markers leaked into content
+        for marker in [SECTION_BEGIN, SECTION_END, TOOL_BEGIN, TOOL_END, ARG_BEGIN]:
+            assert marker not in rec.other_content
+
+    def test_content_after_tool_section(self, parser):
+        """Trailing text after section_end doesn't crash or leak markers."""
+        deltas = [
+            "Before. ",
+            SECTION_BEGIN,
+            TOOL_BEGIN,
+            "functions.get_weather:0 ",
+            ARG_BEGIN,
+            '{"city": "Tokyo"} ',
+            TOOL_END,
+            SECTION_END,
+            " After tools.",
+        ]
+        rec = run_tool_extraction_streaming(parser, deltas)
+
+        # Tool call extracted correctly
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Tokyo"}
+        # Trailing content after tool section is dropped
+        assert "After tools." not in rec.other_content
+        # No markers leaked into content
+        for marker in [SECTION_BEGIN, SECTION_END, TOOL_BEGIN, TOOL_END, ARG_BEGIN]:
+            assert marker not in rec.other_content
+
+
+class TestAdjustRequest:
+    def test_sets_skip_special_tokens_false(self, parser):
+        request = MagicMock(spec=ChatCompletionRequest)
+        request.tools = [{"type": "function", "function": {"name": "test"}}]
+        request.tool_choice = "auto"
+        request.skip_special_tokens = True
+
+        result = parser.adjust_request(request)
+        assert result.skip_special_tokens is False
+
+    def test_no_change_when_tool_choice_none(self, parser):
+        request = MagicMock(spec=ChatCompletionRequest)
+        request.tools = [{"type": "function", "function": {"name": "test"}}]
+        request.tool_choice = "none"
+        request.skip_special_tokens = True
+
+        result = parser.adjust_request(request)
+        assert result.skip_special_tokens is True
+
+    def test_no_change_when_no_tools(self, parser):
+        request = MagicMock(spec=ChatCompletionRequest)
+        request.tools = None
+        request.tool_choice = "auto"
+        request.skip_special_tokens = True
+
+        result = parser.adjust_request(request)
+        assert result.skip_special_tokens is True
+
+
+def _chunk_tokenized_deltas(tokenizer, text: str, stream_interval: int) -> list[str]:
+    """Encode text, group tokens into chunks of stream_interval, decode each."""
+    token_ids = tokenizer.encode(text, add_special_tokens=False)
+    deltas = []
+    prev = ""
+    for i in range(0, len(token_ids), stream_interval):
+        decoded = tokenizer.decode(
+            token_ids[: i + stream_interval], skip_special_tokens=False
+        )
+        deltas.append(decoded[len(prev) :])
+        prev = decoded
+    return deltas
 
-def test_streaming_multiple_tool_calls_not_leaked(kimi_k2_tool_parser):
-    """
-    Test that MULTIPLE tool calls in streaming mode do not leak into content.
-    This reproduces the AWS Bedrock scenario: "Compare weather in Tokyo and NYC".
-    """
-    kimi_k2_tool_parser.reset_streaming_state()
 
-    section_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_begin|>")
-    section_end_id = kimi_k2_tool_parser.vocab.get("<|tool_calls_section_end|>")
-    tool_begin_id = kimi_k2_tool_parser.vocab.get("<|tool_call_begin|>")
-    tool_end_id = kimi_k2_tool_parser.vocab.get("<|tool_call_end|>")
+class TestStreamingIntervals:
+    """Test streaming at various token-chunk sizes to catch boundary bugs."""
 
-    all_content = []
+    @pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+    def test_single_tool_call_at_interval(self, kimi_k2_tokenizer, stream_interval):
+        text = "Help. " + _wrap(_tool("functions.get_weather:0", '{"city": "Beijing"}'))
+        deltas = _chunk_tokenized_deltas(kimi_k2_tokenizer, text, stream_interval)
+        parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        rec = run_tool_extraction_streaming(
+            parser, deltas, assert_one_tool_per_delta=False
+        )
 
-    tool1 = '<|tool_call_begin|> get_weather:0 <|tool_call_argument_begin|> {"city": "Tokyo"} <|tool_call_end|>'
-    tool2 = ' <|tool_call_begin|> get_weather:1 <|tool_call_argument_begin|> {"city": "New York"} <|tool_call_end|>'
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
-    deltas = [
-        ("I'll compare the weather. ", [1, 2, 3]),
-        ("<|tool_calls_section_begin|>", [section_begin_id]),
-        (tool1, [tool_begin_id, 10, tool_end_id]),
-        (tool2, [tool_begin_id, 20, tool_end_id]),
-        ("<|tool_calls_section_end|>", [section_end_id]),
-        (" Here's the comparison.", [30]),
-    ]
+    @pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+    def test_content_then_tool_call_at_interval(
+        self, kimi_k2_tokenizer, stream_interval
+    ):
+        text = "Sure, let me check. " + _wrap(
+            _tool("functions.get_weather:0", '{"city": "Tokyo"}')
+        )
+        deltas = _chunk_tokenized_deltas(kimi_k2_tokenizer, text, stream_interval)
+        parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        rec = run_tool_extraction_streaming(
+            parser, deltas, assert_one_tool_per_delta=False
+        )
 
-    results = run_streaming_sequence(kimi_k2_tool_parser, deltas)
+        assert "let me check" in rec.other_content
+        assert "get_weather" not in rec.other_content
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Tokyo"}
+
+    @pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+    def test_multiple_tool_calls_at_interval(self, kimi_k2_tokenizer, stream_interval):
+        text = "Compare. " + _wrap(
+            _tool("functions.search:0", '{"q": "cats"}'),
+            _tool("functions.search:1", '{"q": "dogs"}'),
+        )
+        deltas = _chunk_tokenized_deltas(kimi_k2_tokenizer, text, stream_interval)
+        parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        rec = run_tool_extraction_streaming(
+            parser, deltas, assert_one_tool_per_delta=False
+        )
 
-    for res in results:
-        if res and res.content:
-            all_content.append(res.content)
+        assert len(rec.tool_calls) == 2
+        assert rec.tool_calls[0].function.name == "search"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"q": "cats"}
+        assert rec.tool_calls[1].function.name == "search"
+        assert json.loads(rec.tool_calls[1].function.arguments) == {"q": "dogs"}
+
+    @pytest.mark.parametrize("stream_interval", [1, 2, 3, 5, 8])
+    def test_plain_text_at_interval(self, kimi_k2_tokenizer, stream_interval):
+        text = "This is plain text with no tool calling involved."
+        deltas = _chunk_tokenized_deltas(kimi_k2_tokenizer, text, stream_interval)
+        parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        rec = run_tool_extraction_streaming(
+            parser, deltas, assert_one_tool_per_delta=False
+        )
 
-    # Assertions
-    full_content = "".join(all_content)
+        assert rec.other_content == text
+        assert rec.tool_calls == []
 
-    # Check no markers leaked
-    forbidden = ["<|tool_call", "<|tool_calls_section"]
-    for marker in forbidden:
-        assert marker not in full_content, (
-            f"MARKER LEAKED: {marker} in {repr(full_content)}"
+    def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
+        """Content + complete tool call in one chunk must both be emitted."""
+        text = "Hi! " + _wrap(_tool("functions.get_weather:0", '{"city": "Beijing"}'))
+        deltas = _chunk_tokenized_deltas(kimi_k2_tokenizer, text, stream_interval=9999)
+        parser = KimiK2ToolParser(kimi_k2_tokenizer)
+        rec = run_tool_extraction_streaming(
+            parser, deltas, assert_one_tool_per_delta=False
         )
 
-    # Check no tool call content leaked (both tools)
-    assert "get_weather" not in full_content, f"TOOL NAME LEAKED: {repr(full_content)}"
-    assert "Tokyo" not in full_content, f"TOOL ARG LEAKED (Tokyo): {repr(full_content)}"
-    assert "New York" not in full_content, (
-        f"TOOL ARG LEAKED (NYC): {repr(full_content)}"
-    )
-
-    # Legitimate content preserved
-    assert "compare" in full_content.lower() or len(all_content) > 0
+        assert "Hi!" in rec.other_content
+        assert "get_weather" not in rec.other_content
+        assert len(rec.tool_calls) == 1
+        assert rec.tool_calls[0].function.name == "get_weather"
+        assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
diff --git a/tests/tool_parsers/test_lfm2_tool_parser.py b/tests/tool_parsers/test_lfm2_tool_parser.py
new file mode 100644
index 000000000000..9cb5b195f1a7
--- /dev/null
+++ b/tests/tool_parsers/test_lfm2_tool_parser.py
@@ -0,0 +1,468 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.tool_parsers.utils import (
+    run_tool_extraction,
+    run_tool_extraction_streaming,
+)
+from vllm.entrypoints.openai.engine.protocol import FunctionCall
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers import ToolParser, ToolParserManager
+
+TOOL_CALL_START = "<|tool_call_start|>"
+TOOL_CALL_END = "<|tool_call_end|>"
+
+SIMPLE_FUNCTION_OUTPUT = "get_candidate_status(candidate_id='12345')"
+SIMPLE_FUNCTION_CALL = FunctionCall(
+    name="get_candidate_status",
+    arguments='{"candidate_id": "12345"}',
+)
+MORE_TYPES_FUNCTION_OUTPUT = (
+    "register_user(name='John Doe', "
+    "age=37, "
+    "address={'city': 'San Francisco', 'state': 'CA'}, "
+    "role=None, "
+    "passed_test=True, "
+    "aliases=['John', 'Johnny'])"
+)
+MORE_TYPES_FUNCTION_CALL = FunctionCall(
+    name="register_user",
+    arguments='{"name": "John Doe", '
+    '"age": 37, '
+    '"address": {"city": "San Francisco", "state": "CA"}, '
+    '"role": null, '
+    '"passed_test": true, '
+    '"aliases": ["John", "Johnny"]}',
+)
+PARAMETERLESS_FUNCTION_OUTPUT = "get_weather()"
+PARAMETERLESS_FUNCTION_CALL = FunctionCall(
+    name="get_weather",
+    arguments="{}",
+)
+EMPTY_DICT_FUNCTION_OUTPUT = "do_something_cool(additional_data={})"
+EMPTY_DICT_FUNCTION_CALL = FunctionCall(
+    name="do_something_cool",
+    arguments='{"additional_data": {}}',
+)
+EMPTY_LIST_FUNCTION_OUTPUT = "do_something_cool(steps=[])"
+EMPTY_LIST_FUNCTION_CALL = FunctionCall(
+    name="do_something_cool",
+    arguments='{"steps": []}',
+)
+ESCAPED_STRING_FUNCTION_OUTPUT = (
+    r"get_weather(city='Martha\'s Vineyard', metric='\"cool units\"')"
+)
+ESCAPED_STRING_FUNCTION_CALL = FunctionCall(
+    name="get_weather",
+    arguments='{"city": "Martha\'s Vineyard", "metric": "\\"cool units\\""}',
+)
+DOTTED_NAME_FUNCTION_OUTPUT = (
+    "grocery.orderIngredients("
+    "ingredientList=[{'name': 'Lasagna noodles', 'amount': 250, 'unit': 'g'}], "
+    "deliveryAddress='845 Willow Lane, Springfield, IL 62704')"
+)
+DOTTED_NAME_FUNCTION_CALL = FunctionCall(
+    name="grocery.orderIngredients",
+    arguments=(
+        '{"ingredientList": ['
+        '{"name": "Lasagna noodles", "amount": 250, "unit": "g"}], '
+        '"deliveryAddress": "845 Willow Lane, Springfield, IL 62704"}'
+    ),
+)
+
+
+@pytest.fixture(scope="module")
+def lfm2_tokenizer() -> TokenizerLike:
+    return AutoTokenizer.from_pretrained("LiquidAI/LFM2.5-1.2B-Instruct")
+
+
+def _wrap(tool_text: str, content_after: str = "") -> str:
+    """Wrap pythonic tool call in LFM2.5 sentinel tokens."""
+    result = f"{TOOL_CALL_START}[{tool_text}]{TOOL_CALL_END}"
+    if content_after:
+        result += f"\n{content_after}"
+    return result
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_no_tool_call(streaming: bool, lfm2_tokenizer: TokenizerLike):
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    model_output = "How can I help you today?"
+
+    content, tool_calls = run_tool_extraction(
+        tool_parser, model_output, streaming=streaming
+    )
+
+    assert content == model_output
+    assert len(tool_calls) == 0
+
+
+TEST_CASES = [
+    pytest.param(
+        True,
+        _wrap(SIMPLE_FUNCTION_OUTPUT),
+        [SIMPLE_FUNCTION_CALL],
+        None,
+        id="simple_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(SIMPLE_FUNCTION_OUTPUT),
+        [SIMPLE_FUNCTION_CALL],
+        None,
+        id="simple_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(MORE_TYPES_FUNCTION_OUTPUT),
+        [MORE_TYPES_FUNCTION_CALL],
+        None,
+        id="more_types_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(MORE_TYPES_FUNCTION_OUTPUT),
+        [MORE_TYPES_FUNCTION_CALL],
+        None,
+        id="more_types_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(PARAMETERLESS_FUNCTION_OUTPUT),
+        [PARAMETERLESS_FUNCTION_CALL],
+        None,
+        id="parameterless_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(PARAMETERLESS_FUNCTION_OUTPUT),
+        [PARAMETERLESS_FUNCTION_CALL],
+        None,
+        id="parameterless_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(EMPTY_DICT_FUNCTION_OUTPUT),
+        [EMPTY_DICT_FUNCTION_CALL],
+        None,
+        id="empty_dict_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(EMPTY_DICT_FUNCTION_OUTPUT),
+        [EMPTY_DICT_FUNCTION_CALL],
+        None,
+        id="empty_dict_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(EMPTY_LIST_FUNCTION_OUTPUT),
+        [EMPTY_LIST_FUNCTION_CALL],
+        None,
+        id="empty_list_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(EMPTY_LIST_FUNCTION_OUTPUT),
+        [EMPTY_LIST_FUNCTION_CALL],
+        None,
+        id="empty_list_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(ESCAPED_STRING_FUNCTION_OUTPUT),
+        [ESCAPED_STRING_FUNCTION_CALL],
+        None,
+        id="escaped_string_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(ESCAPED_STRING_FUNCTION_OUTPUT),
+        [ESCAPED_STRING_FUNCTION_CALL],
+        None,
+        id="escaped_string_nonstreaming",
+    ),
+    pytest.param(
+        True,
+        _wrap(f"{SIMPLE_FUNCTION_OUTPUT}, {MORE_TYPES_FUNCTION_OUTPUT}"),
+        [SIMPLE_FUNCTION_CALL, MORE_TYPES_FUNCTION_CALL],
+        None,
+        id="parallel_calls_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(f"{SIMPLE_FUNCTION_OUTPUT}, {MORE_TYPES_FUNCTION_OUTPUT}"),
+        [SIMPLE_FUNCTION_CALL, MORE_TYPES_FUNCTION_CALL],
+        None,
+        id="parallel_calls_nonstreaming",
+    ),
+    # LFM2.5 specific: content AFTER tool call
+    pytest.param(
+        False,
+        _wrap(
+            SIMPLE_FUNCTION_OUTPUT,
+            content_after="Checking the current status of candidate ID 12345.",
+        ),
+        [SIMPLE_FUNCTION_CALL],
+        "Checking the current status of candidate ID 12345.",
+        id="content_after_tool_call_nonstreaming",
+    ),
+    # Dotted / class-method function names: grocery.orderIngredients(...)
+    pytest.param(
+        True,
+        _wrap(DOTTED_NAME_FUNCTION_OUTPUT),
+        [DOTTED_NAME_FUNCTION_CALL],
+        None,
+        id="dotted_name_streaming",
+    ),
+    pytest.param(
+        False,
+        _wrap(DOTTED_NAME_FUNCTION_OUTPUT),
+        [DOTTED_NAME_FUNCTION_CALL],
+        None,
+        id="dotted_name_nonstreaming",
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "streaming, model_output, expected_tool_calls, expected_content",
+    TEST_CASES,
+)
+def test_tool_call(
+    streaming: bool,
+    model_output: str,
+    expected_tool_calls: list[FunctionCall],
+    expected_content: str | None,
+    lfm2_tokenizer: TokenizerLike,
+):
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+
+    content, tool_calls = run_tool_extraction(
+        tool_parser, model_output, streaming=streaming
+    )
+
+    if expected_content and not streaming:
+        assert content == expected_content
+    assert len(tool_calls) == len(expected_tool_calls)
+    for actual, expected in zip(tool_calls, expected_tool_calls):
+        assert actual.type == "function"
+        assert actual.function == expected
+
+
+def test_streaming_tool_call_with_large_steps(lfm2_tokenizer: TokenizerLike):
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    model_output_deltas = [
+        f"{TOOL_CALL_START}[get_candidate_status(candidate_id='12345'), "
+        f"{PARAMETERLESS_FUNCTION_OUTPUT}, "
+        f"{EMPTY_LIST_FUNCTION_OUTPUT}]{TOOL_CALL_END}",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        tool_parser, model_output_deltas, assert_one_tool_per_delta=False
+    )
+
+    assert len(reconstructor.tool_calls) == 3
+    assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
+    assert reconstructor.tool_calls[1].function == PARAMETERLESS_FUNCTION_CALL
+    assert reconstructor.tool_calls[2].function == EMPTY_LIST_FUNCTION_CALL
+
+
+def test_streaming_full_block_and_trailing_in_single_delta(
+    lfm2_tokenizer: TokenizerLike,
+):
+    """The entire <|tool_call_start|>[...]<|tool_call_end|> block plus
+    trailing assistant text arrive in one delta. Trailing content must
+    still be emitted — not silently dropped."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    full_text = f"{TOOL_CALL_START}[{SIMPLE_FUNCTION_OUTPUT}]{TOOL_CALL_END}\nDone."
+
+    reconstructor = run_tool_extraction_streaming(tool_parser, [full_text])
+
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
+    assert "Done." in reconstructor.other_content
+
+
+def test_streaming_leading_content_and_full_block_in_single_delta(
+    lfm2_tokenizer: TokenizerLike,
+):
+    """Leading assistant text plus the entire tool block arrive in one
+    delta. Leading content must be emitted — not silently dropped."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    full_text = (
+        f"Let me check. {TOOL_CALL_START}[{SIMPLE_FUNCTION_OUTPUT}]{TOOL_CALL_END}"
+    )
+
+    reconstructor = run_tool_extraction_streaming(tool_parser, [full_text])
+
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
+    assert "Let me check." in reconstructor.other_content
+
+
+def test_streaming_leading_block_and_trailing_in_single_delta(
+    lfm2_tokenizer: TokenizerLike,
+):
+    """Leading text + complete tool block + trailing text in one delta.
+    Both leading and trailing content must be preserved."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    full_text = (
+        "Let me check. "
+        f"{TOOL_CALL_START}[{SIMPLE_FUNCTION_OUTPUT}]{TOOL_CALL_END}\nDone."
+    )
+
+    reconstructor = run_tool_extraction_streaming(tool_parser, [full_text])
+
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function == SIMPLE_FUNCTION_CALL
+    assert "Let me check." in reconstructor.other_content
+    assert "Done." in reconstructor.other_content
+
+
+def test_echoed_tool_call_body_not_leaked_to_content(
+    lfm2_tokenizer: TokenizerLike,
+):
+    """LFM2 sometimes emits the tool call body again after the first
+    <|tool_call_end|>, capped with a second <|tool_call_end|>. The
+    echoed body must not surface as assistant content — neither in
+    streaming nor non-streaming paths."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    body = (
+        "[grocery.orderIngredients("
+        "ingredientList=[{'name': 'apple', 'quantity': '2'}], "
+        "deliveryAddress='123 Main St')]"
+    )
+    model_output = f"{TOOL_CALL_START}{body}{TOOL_CALL_END}{body}{TOOL_CALL_END}"
+
+    # Non-streaming
+    content_ns, tool_calls_ns = run_tool_extraction(
+        tool_parser, model_output, streaming=False
+    )
+    assert len(tool_calls_ns) == 1
+    assert tool_calls_ns[0].function.name == "grocery.orderIngredients"
+    assert content_ns in (None, "")
+
+    # Streaming: re-fetch a fresh parser since state was mutated above.
+    tool_parser2: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    content_s, tool_calls_s = run_tool_extraction(
+        tool_parser2, model_output, streaming=True
+    )
+    assert len(tool_calls_s) == 1
+    assert tool_calls_s[0].function.name == "grocery.orderIngredients"
+    # Echoed body must not leak as content.
+    assert content_s in (None, "")
+    assert "grocery.orderIngredients" not in (content_s or "")
+    assert TOOL_CALL_END not in (content_s or "")
+
+
+def test_streaming_char_by_char_multi_dict_list(lfm2_tokenizer: TokenizerLike):
+    """Stream a tool call containing a list of multiple dicts one
+    character at a time. Every prefix lands in some partial-parse state
+    (mid-key, mid-value, open quote inside dict, empty dict, etc.). The
+    parser must not raise — incomplete prefixes should silently wait for
+    more text instead of logging exceptions."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    full_text = (
+        f"{TOOL_CALL_START}[grocery.orderIngredients("
+        "ingredientList=["
+        '{"name": "apple", "quantity": "2"}, '
+        '{"name": "bread", "quantity": "1"}'
+        f"])]{TOOL_CALL_END}"
+    )
+    deltas = [c for c in full_text]
+
+    reconstructor = run_tool_extraction_streaming(
+        tool_parser, deltas, assert_one_tool_per_delta=False
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function.name == "grocery.orderIngredients"
+    import json
+
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert args == {
+        "ingredientList": [
+            {"name": "apple", "quantity": "2"},
+            {"name": "bread", "quantity": "1"},
+        ]
+    }
+
+
+def test_streaming_dotted_name_in_single_delta(lfm2_tokenizer: TokenizerLike):
+    """A pythonic call with a dotted/attribute function name (e.g.
+    ``domain.method(arg=...)``) must be parsed correctly in streaming mode
+    just as in non-streaming mode."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+    full_text = f"{TOOL_CALL_START}[{DOTTED_NAME_FUNCTION_OUTPUT}]{TOOL_CALL_END}"
+
+    reconstructor = run_tool_extraction_streaming(tool_parser, [full_text])
+
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function == DOTTED_NAME_FUNCTION_CALL
+
+
+def test_adjust_request_disables_skip_special_tokens(
+    lfm2_tokenizer: TokenizerLike,
+):
+    """When tools are present, the parser must force
+    ``skip_special_tokens=False`` so the engine does not strip the
+    <|tool_call_start|>/<|tool_call_end|> sentinels before they reach the
+    parser."""
+    from vllm.entrypoints.openai.chat_completion.protocol import (
+        ChatCompletionRequest,
+    )
+
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+
+    request_with_tools = ChatCompletionRequest(
+        messages=[],
+        model="test-model",
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    )
+    assert request_with_tools.skip_special_tokens is True
+    adjusted = tool_parser.adjust_request(request_with_tools)
+    assert adjusted.skip_special_tokens is False
+
+    # No tools → no override; default behaviour preserved.
+    request_no_tools = ChatCompletionRequest(messages=[], model="test-model")
+    assert request_no_tools.skip_special_tokens is True
+    adjusted_no_tools = tool_parser.adjust_request(request_no_tools)
+    assert adjusted_no_tools.skip_special_tokens is True
+
+
+@pytest.mark.parametrize("streaming", [False])
+def test_regex_timeout_handling(streaming: bool, lfm2_tokenizer: TokenizerLike):
+    """Test regex timeout is handled gracefully."""
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser("lfm2")(lfm2_tokenizer)
+
+    fake_input = f"{TOOL_CALL_START}[A(A=" + "\t)A(A=,\t" * 2
+    fake_input += f"]{TOOL_CALL_END}"
+
+    mock_regex = MagicMock()
+    mock_regex.match.side_effect = TimeoutError("Regex timeout")
+
+    with patch.object(tool_parser, "TOOL_CALL_REGEX", mock_regex):
+        content, tool_calls = run_tool_extraction(
+            tool_parser, fake_input, streaming=streaming
+        )
+
+        assert content == fake_input
+        assert len(tool_calls) == 0
+        mock_regex.match.assert_called_once()
diff --git a/tests/tool_parsers/test_llama3_json_tool_parser.py b/tests/tool_parsers/test_llama3_json_tool_parser.py
index 53948d577c15..7040fe87d07a 100644
--- a/tests/tool_parsers/test_llama3_json_tool_parser.py
+++ b/tests/tool_parsers/test_llama3_json_tool_parser.py
@@ -4,15 +4,22 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
+from transformers import AutoTokenizer
 
 from vllm.entrypoints.openai.engine.protocol import ExtractedToolCallInformation
-from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.llama_tool_parser import Llama3JsonToolParser
 
+LLAMA_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
+
+
+@pytest.fixture(scope="module")
+def llama_tokenizer():
+    return AutoTokenizer.from_pretrained(LLAMA_MODEL)
+
 
 @pytest.fixture
-def parser(default_tokenizer: TokenizerLike):
-    return Llama3JsonToolParser(default_tokenizer)
+def parser(llama_tokenizer):
+    return Llama3JsonToolParser(llama_tokenizer)
 
 
 def test_extract_tool_calls_simple(parser):
diff --git a/tests/tool_parsers/test_minimax_m2_tool_parser.py b/tests/tool_parsers/test_minimax_m2_tool_parser.py
index d61b6b6201cd..963c3462ff36 100644
--- a/tests/tool_parsers/test_minimax_m2_tool_parser.py
+++ b/tests/tool_parsers/test_minimax_m2_tool_parser.py
@@ -5,6 +5,10 @@
 
 import pytest
 
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionToolsParam,
+    FunctionDefinition,
+)
 from vllm.tool_parsers.minimax_m2_tool_parser import (
     MinimaxM2ToolParser,
 )
@@ -442,3 +446,235 @@ def test_header_and_params_in_separate_chunks(self, parser):
             "city": "Seattle",
             "days": "5",
         }
+
+
+class TestAnyOfNullableParam:
+    """Regression: anyOf nullable parameter parsing (PR #32342)."""
+
+    def test_anyof_nullable_param_non_null_value(self):
+        """A valid non-null string should be preserved, not collapsed to None."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="update_profile",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "nickname": {
+                                "anyOf": [{"type": "string"}, {"type": "null"}],
+                            },
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="update_profile">'
+                '<parameter name="nickname">Alice</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["nickname"] == "Alice"
+
+    def test_anyof_nullable_param_null_value(self):
+        """An actual null-like value should be returned as None/null."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="update_profile",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "nickname": {
+                                "anyOf": [{"type": "string"}, {"type": "null"}],
+                            },
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="update_profile">'
+                '<parameter name="nickname">null</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["nickname"] is None
+
+    def test_anyof_nullable_param_object_value(self):
+        """A valid object value in anyOf with null should parse as dict."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="update_settings",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "config": {
+                                "anyOf": [{"type": "object"}, {"type": "null"}],
+                            },
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="update_settings">'
+                '<parameter name="config">{"theme": "dark", "fontSize": 14}'
+                "</parameter>"
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["config"] == {"theme": "dark", "fontSize": 14}
+        assert isinstance(parsed["config"], dict)
+
+
+class TestNoneStringPreservation:
+    """Regression tests for #39567: 'none' as a string must not become None."""
+
+    def test_none_string_preserved_in_enum(self):
+        """'none' in an enum must stay as the string 'none', not Python None."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="set_theme",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "theme": {
+                                "type": "string",
+                                "enum": ["dark", "light", "none"],
+                            },
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="set_theme">'
+                '<parameter name="theme">none</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["theme"] == "none"
+        assert parsed["theme"] is not None
+
+    def test_none_string_preserved_plain_string(self):
+        """'none' as a plain string param must stay as 'none'."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="echo",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "message": {"type": "string"},
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="echo">'
+                '<parameter name="message">none</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["message"] == "none"
+
+    def test_null_still_converts_to_none(self):
+        """'null' in a nullable param must still become Python None."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="update_profile",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "nickname": {
+                                "anyOf": [{"type": "string"}, {"type": "null"}],
+                            },
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="update_profile">'
+                '<parameter name="nickname">null</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["nickname"] is None
+
+    def test_nil_string_preserved(self):
+        """'nil' must stay as the string 'nil', not become None."""
+        tools = [
+            ChatCompletionToolsParam(
+                function=FunctionDefinition(
+                    name="echo",
+                    parameters={
+                        "type": "object",
+                        "properties": {
+                            "value": {"type": "string"},
+                        },
+                    },
+                ),
+            )
+        ]
+        parser = MinimaxM2ToolParser(FakeTokenizer(), tools=tools)
+
+        results = _feed(
+            parser,
+            [
+                '<minimax:tool_call><invoke name="echo">'
+                '<parameter name="value">nil</parameter>'
+                "</invoke></minimax:tool_call>",
+            ],
+        )
+        tc = _collect_tool_calls(results)
+        assert len(tc) == 1
+        parsed = json.loads(tc[0]["arguments"])
+        assert parsed["value"] == "nil"
diff --git a/tests/tool_parsers/test_mistral_tool_parser.py b/tests/tool_parsers/test_mistral_tool_parser.py
index bf2fba8a8655..f6a5c6bfb265 100644
--- a/tests/tool_parsers/test_mistral_tool_parser.py
+++ b/tests/tool_parsers/test_mistral_tool_parser.py
@@ -3,19 +3,52 @@
 
 import json
 from collections.abc import Generator
+from typing import Any
+from unittest.mock import MagicMock, patch
 
 import partial_json_parser
 import pytest
 from mistral_common.protocol.instruct.messages import AssistantMessage
 from mistral_common.protocol.instruct.request import InstructRequest
-from mistral_common.protocol.instruct.tool_calls import FunctionCall, ToolCall
+from mistral_common.protocol.instruct.tool_calls import (
+    FunctionCall,
+    ToolCall,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    NamedToolChoice as MistralNamedToolChoice,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    ToolChoice as MistralToolChoice,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    ToolChoiceEnum as MistralToolChoiceEnum,
+)
 from partial_json_parser.core.options import Allow
 
-from vllm.entrypoints.openai.engine.protocol import DeltaMessage, DeltaToolCall
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    StructuralTagResponseFormat,
+)
+from vllm.entrypoints.openai.engine.protocol import FunctionCall as VllmFunctionCall
+from vllm.reasoning.mistral_reasoning_parser import MistralReasoningParser
+from vllm.sampling_params import StructuredOutputsParams
 from vllm.tokenizers import TokenizerLike, get_tokenizer
 from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
 from vllm.tokenizers.mistral import MistralTokenizer
-from vllm.tool_parsers.mistral_tool_parser import MistralToolParser
+from vllm.tool_parsers.mistral_tool_parser import (
+    _DEFAULT_JSON_SCHEMA,
+    MistralStreamingResult,
+    MistralToolCall,
+    MistralToolParser,
+)
+
+_DUMMY_REQUEST = ChatCompletionRequest(messages=[], model="test")
 
 
 @pytest.fixture(scope="module")
@@ -40,6 +73,13 @@ def mistral_tool_parser(mistral_tokenizer):
     return MistralToolParser(mistral_tokenizer)
 
 
+@pytest.fixture
+def non_mistral_parser() -> MistralToolParser:
+    mock_tokenizer = MagicMock()
+    mock_tokenizer.get_vocab.return_value = {"[TOOL_CALLS]": 1}
+    return MistralToolParser(mock_tokenizer)
+
+
 def assert_tool_calls(
     actual_tool_calls: list[ToolCall] | list[DeltaToolCall],
     expected_tool_calls: list[ToolCall],
@@ -174,7 +214,7 @@ def stream_delta_message_generator(
             previous_token_ids,
             current_token_ids,
             delta_token_ids,
-            request=None,  # type: ignore[arg-type]
+            request=_DUMMY_REQUEST,
         )
         if delta_message:
             yield delta_message
@@ -187,14 +227,18 @@ def stream_delta_message_generator(
         read_offset = new_read_offset
 
 
-def test_extract_tool_calls_no_tools(mistral_pre_v11_tool_parser):
+@pytest.mark.parametrize(
+    "parser_fixture",
+    ["mistral_pre_v11_tool_parser", "mistral_tool_parser"],
+    ids=["pre_v11", "v11"],
+)
+def test_extract_tool_calls_no_tools(parser_fixture, request):
+    parser = request.getfixturevalue(parser_fixture)
     model_output = "This is a test"
-    extracted_tool_calls = mistral_pre_v11_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-    assert not extracted_tool_calls.tools_called
-    assert extracted_tool_calls.tool_calls == []
-    assert extracted_tool_calls.content == model_output
+    result = parser.extract_tool_calls(model_output, request=_DUMMY_REQUEST)
+    assert result == ExtractedToolCallInformation(
+        tools_called=False, tool_calls=[], content=model_output
+    )
 
 
 @pytest.mark.parametrize(
@@ -203,6 +247,9 @@ def test_extract_tool_calls_no_tools(mistral_pre_v11_tool_parser):
         "single_tool_weather",
         "argument_before_name",
         "argument_before_name_and_name_in_argument",
+        "multiple_tools",
+        "content_before_tool",
+        "trailing_data_after_json",
     ],
     argnames=["model_output", "expected_tool_calls", "expected_content"],
     argvalues=[
@@ -261,14 +308,62 @@ def test_extract_tool_calls_no_tools(mistral_pre_v11_tool_parser):
             ],
             None,
         ),
+        (
+            """[TOOL_CALLS] [{"name": "add", "arguments": {"a": 3.5, "b": 4}}, {"name": "get_current_weather", "arguments":{"city": "San Francisco", "state": "CA", "unit": "celsius"}}]""",  # noqa: E501
+            [
+                ToolCall(
+                    function=FunctionCall(
+                        name="add", arguments=json.dumps({"a": 3.5, "b": 4})
+                    )
+                ),
+                ToolCall(
+                    function=FunctionCall(
+                        name="get_current_weather",
+                        arguments=json.dumps(
+                            {"city": "San Francisco", "state": "CA", "unit": "celsius"}
+                        ),
+                    )
+                ),
+            ],
+            None,
+        ),
+        (
+            """Hello[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1, "b": 2}}]""",  # noqa: E501
+            [
+                ToolCall(
+                    function=FunctionCall(
+                        name="add", arguments=json.dumps({"a": 1, "b": 2})
+                    )
+                )
+            ],
+            "Hello",
+        ),
+        (
+            """[TOOL_CALLS] [{"name": "get_current_weather", "arguments":{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]\nextra trailing data""",  # noqa: E501
+            [
+                ToolCall(
+                    function=FunctionCall(
+                        name="get_current_weather",
+                        arguments=json.dumps(
+                            {
+                                "city": "Dallas",
+                                "state": "TX",
+                                "unit": "fahrenheit",
+                            }
+                        ),
+                    )
+                )
+            ],
+            None,
+        ),
     ],
 )
 def test_extract_tool_calls_pre_v11_tokenizer(
     mistral_pre_v11_tool_parser, model_output, expected_tool_calls, expected_content
 ):
     extracted_tool_calls = mistral_pre_v11_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
+        model_output, request=_DUMMY_REQUEST
+    )
     assert extracted_tool_calls.tools_called
 
     assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
@@ -276,6 +371,49 @@ def test_extract_tool_calls_pre_v11_tokenizer(
     assert extracted_tool_calls.content == expected_content
 
 
+def test_extract_tool_calls_pre_v11_multiple_bot_tokens_raises(
+    mistral_pre_v11_tool_parser,
+):
+    model_output = (
+        '[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1}}]'
+        '[TOOL_CALLS] [{"name": "sub", "arguments":{"b": 2}}]'
+    )
+    with pytest.raises(ValueError, match="Only one BOT token"):
+        mistral_pre_v11_tool_parser.extract_tool_calls(
+            model_output, request=_DUMMY_REQUEST
+        )
+
+
+def test_extract_tool_calls_pre_v11_regex_fallback(
+    mistral_pre_v11_tool_parser,
+):
+    """The regex fallback path finds valid JSON via regex when the primary
+    raw_decode fails on leading junk. It should re-serialize arguments
+    and return a valid tool call."""
+    model_output = (
+        '[TOOL_CALLS]  junk [{"name": "add", "arguments":{"a": 1, "b": 2}}] trail'
+    )
+    result = mistral_pre_v11_tool_parser.extract_tool_calls(
+        model_output, request=_DUMMY_REQUEST
+    )
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "add"
+    assert result.tool_calls[0].function.arguments == json.dumps({"a": 1, "b": 2})
+
+
+def test_extract_tool_calls_pre_v11_regex_fallback_fails(
+    mistral_pre_v11_tool_parser,
+):
+    model_output = "[TOOL_CALLS] not json at all"
+    result = mistral_pre_v11_tool_parser.extract_tool_calls(
+        model_output, request=_DUMMY_REQUEST
+    )
+    assert result == ExtractedToolCallInformation(
+        tools_called=False, tool_calls=[], content="not json at all"
+    )
+
+
 @pytest.mark.parametrize(
     ids=[
         "single_tool_add",
@@ -364,8 +502,8 @@ def test_extract_tool_calls(
     mistral_tool_parser, model_output, expected_tool_calls, expected_content
 ):
     extracted_tool_calls = mistral_tool_parser.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
+        model_output, request=_DUMMY_REQUEST
+    )
     assert extracted_tool_calls.tools_called
 
     assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
@@ -373,6 +511,16 @@ def test_extract_tool_calls(
     assert extracted_tool_calls.content == expected_content
 
 
+def test_extract_tool_calls_v11_without_args_skipped(mistral_tool_parser):
+    model_output = "[TOOL_CALLS]toolname_no_args"
+    result = mistral_tool_parser.extract_tool_calls(
+        model_output, request=_DUMMY_REQUEST
+    )
+    assert result == ExtractedToolCallInformation(
+        tools_called=True, tool_calls=[], content=None
+    )
+
+
 def _test_extract_tool_calls_streaming(
     tool_parser, tokenizer, model_output, tools, expected_tool_calls, expected_content
 ):
@@ -442,6 +590,33 @@ def _test_extract_tool_calls_streaming(
     ]
     assert_tool_calls(actual_tool_calls, expected_tool_calls)
 
+    if expected_tool_calls:
+        assert len(tool_parser.streamed_args_for_tool) == len(expected_tool_calls)
+        assert len(tool_parser.prev_tool_call_arr) == len(expected_tool_calls)
+        for i in range(len(expected_tool_calls)):
+            assert (
+                tool_parser.prev_tool_call_arr[i]["arguments"]
+                == tool_parser.streamed_args_for_tool[i]
+            )
+            assert tool_parser.streamed_args_for_tool[i] == function_args_strs[i]
+            assert (
+                tool_parser.prev_tool_call_arr[i]["name"]
+                == expected_tool_calls[i].function.name
+            )
+
+        # Simulate the serving layer's unstreamed-args check
+        index = len(tool_parser.prev_tool_call_arr) - 1
+        args = tool_parser.prev_tool_call_arr[index].get("arguments", {})
+        expected_call = (
+            args if isinstance(args, str) else json.dumps(args, ensure_ascii=False)
+        )
+        actual_call = tool_parser.streamed_args_for_tool[index]
+        remaining_call = expected_call.replace(actual_call, "", 1)
+        assert remaining_call == ""
+    else:
+        assert len(tool_parser.streamed_args_for_tool) == 0
+        assert len(tool_parser.prev_tool_call_arr) == 0
+
 
 @pytest.mark.parametrize(
     ids=[
@@ -452,6 +627,7 @@ def _test_extract_tool_calls_streaming(
         "argument_before_name",
         "argument_before_name_and_name_in_argument",
         "multiple_tools",
+        "trailing_data_after_json",
     ],
     argnames=["model_output", "expected_tool_calls", "expected_content"],
     argvalues=[
@@ -541,6 +717,24 @@ def _test_extract_tool_calls_streaming(
             ],
             "",
         ),
+        (
+            """[TOOL_CALLS] [{"name": "get_current_weather", "arguments":{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]\nextra trailing data""",  # noqa: E501
+            [
+                ToolCall(
+                    function=FunctionCall(
+                        name="get_current_weather",
+                        arguments=json.dumps(
+                            {
+                                "city": "Dallas",
+                                "state": "TX",
+                                "unit": "fahrenheit",
+                            }
+                        ),
+                    )
+                )
+            ],
+            "\nextra trailing data",
+        ),
     ],
 )
 def test_extract_tool_calls_streaming_pre_v11_tokenizer(
@@ -638,17 +832,67 @@ def test_extract_tool_calls_streaming(
     )
 
 
+def test_extract_tool_calls_streaming_v11_no_tools(
+    mistral_tool_parser, mistral_tokenizer
+):
+    model_output = "This is a test"
+    if isinstance(mistral_tokenizer, MistralTokenizer):
+        all_token_ids = mistral_tokenizer.encode(model_output)
+    else:
+        all_token_ids = mistral_tokenizer.encode(model_output, add_special_tokens=False)
+    skip_special = isinstance(mistral_tokenizer, MistralTokenizer)
+    collected_content = ""
+    previous_text = ""
+    previous_tokens = None
+    prefix_offset = 0
+    read_offset = 0
+    for i in range(len(all_token_ids)):
+        current_token_ids = all_token_ids[: i + 1]
+        previous_token_ids = all_token_ids[:i]
+        delta_token_ids = [all_token_ids[i]]
+
+        new_tokens, delta_text, prefix_offset, read_offset = detokenize_incrementally(
+            tokenizer=mistral_tokenizer,
+            all_input_ids=current_token_ids,
+            prev_tokens=previous_tokens,
+            prefix_offset=prefix_offset,
+            read_offset=read_offset,
+            skip_special_tokens=skip_special,
+            spaces_between_special_tokens=True,
+        )
+        current_text = previous_text + delta_text
+        previous_tokens = (
+            previous_tokens + new_tokens if previous_tokens else new_tokens
+        )
+
+        delta_message = mistral_tool_parser.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=previous_token_ids,
+            current_token_ids=current_token_ids,
+            delta_token_ids=delta_token_ids,
+            request=_DUMMY_REQUEST,
+        )
+        if delta_message and delta_message.content:
+            collected_content += delta_message.content
+        if delta_message:
+            assert not delta_message.tool_calls
+
+        previous_text = current_text
+
+    assert collected_content == model_output
+    assert len(mistral_tool_parser.streamed_args_for_tool) == 0
+    assert len(mistral_tool_parser.prev_tool_call_arr) == 0
+
+
 @pytest.mark.parametrize(
-    ids=[
-        "single_tool_add",
-        "single_tool_weather",
-        "multiple_tool_calls",
-        "content_before_tool",
-        "complex",
-    ],
-    argnames=["model_output", "expected_tool_calls", "expected_content"],
-    argvalues=[
-        (
+    "parser_fixture, tokenizer_fixture, model_output,"
+    " expected_tool_calls, expected_content",
+    [
+        pytest.param(
+            "mistral_tool_parser",
+            "mistral_tokenizer",
             """[TOOL_CALLS]add_this_and_that{"a": 3.5, "b": 4}""",  # noqa: E501
             [
                 ToolCall(
@@ -659,8 +903,11 @@ def test_extract_tool_calls_streaming(
                 )
             ],
             "",
+            id="v11-single_tool_add",
         ),
-        (
+        pytest.param(
+            "mistral_tool_parser",
+            "mistral_tokenizer",
             """[TOOL_CALLS]get_current_weather{"city": "San Francisco", "state": "CA", "unit": "celsius"}""",  # noqa: E501
             [
                 ToolCall(
@@ -673,8 +920,11 @@ def test_extract_tool_calls_streaming(
                 )
             ],
             "",
+            id="v11-single_tool_weather",
         ),
-        (
+        pytest.param(
+            "mistral_tool_parser",
+            "mistral_tokenizer",
             """[TOOL_CALLS]add{"a": 3.5, "b": 4}[TOOL_CALLS]multiply{"a": 3, "b": 6}""",  # noqa: E501
             [
                 ToolCall(
@@ -689,9 +939,11 @@ def test_extract_tool_calls_streaming(
                 ),
             ],
             "",
+            id="v11-multiple_tool_calls",
         ),
-        (
-            # Additional content should not be after the tool calls
+        pytest.param(
+            "mistral_tool_parser",
+            "mistral_tokenizer",
             """bla[TOOL_CALLS]add_this_and_that{"a": 3.5, "b": 4}""",  # noqa: E501
             [
                 ToolCall(
@@ -702,9 +954,11 @@ def test_extract_tool_calls_streaming(
                 )
             ],
             "bla",
+            id="v11-content_before_tool",
         ),
-        (
-            # Complex
+        pytest.param(
+            "mistral_tool_parser",
+            "mistral_tokenizer",
             """hi{hi[TOOL_CALLS]bash{"command": "print(\\"hello world!\\")\\nre.compile(r\'{}\')"}""",  # noqa: E501
             [
                 ToolCall(
@@ -717,58 +971,19 @@ def test_extract_tool_calls_streaming(
                 )
             ],
             "hi{hi",
+            id="v11-complex",
         ),
-    ],
-)
-def test_extract_tool_calls_streaming_one_chunk(
-    mistral_tool_parser,
-    mistral_tokenizer,
-    model_output,
-    expected_tool_calls,
-    expected_content,
-):
-    if isinstance(mistral_tokenizer, MistralTokenizer):
-        all_token_ids = mistral_tokenizer.encode(model_output)
-    else:
-        all_token_ids = mistral_tokenizer.encode(model_output, add_special_tokens=False)
-    all_token_ids = fix_tool_call_tokenization(
-        all_token_ids, mistral_tool_parser, mistral_tokenizer
-    )
-
-    delta_message = mistral_tool_parser.extract_tool_calls_streaming(
-        previous_text="",
-        current_text=model_output,
-        delta_text=model_output,
-        previous_token_ids=[],
-        current_token_ids=all_token_ids,
-        delta_token_ids=all_token_ids,
-        request=None,
-    )  # type: ignore[arg-type]
-    assert isinstance(delta_message, DeltaMessage)
-    assert len(delta_message.tool_calls) == len(expected_tool_calls)
-
-    assert_tool_calls(delta_message.tool_calls, expected_tool_calls)
-
-    if delta_message.content is None:
-        assert expected_content == ""
-    else:
-        assert delta_message.content == expected_content
-
-
-@pytest.mark.parametrize(
-    ids=[
-        "no_tools",
-        "single_tool_add",
-        "single_tool_add_strings",
-        "single_tool_weather",
-        "argument_before_name",
-        "argument_before_name_and_name_in_argument",
-        "multiple_tools",
-    ],
-    argnames=["model_output", "expected_tool_calls", "expected_content"],
-    argvalues=[
-        ("""This is a test""", [], """This is a test"""),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
+            """This is a test""",
+            [],
+            """This is a test""",
+            id="pre_v11-no_tools",
+        ),
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS]  [ {"name":"add" , "arguments" : {"a": 3, "b": 4} } ]""",  # noqa: E501
             [
                 ToolCall(
@@ -778,8 +993,11 @@ def test_extract_tool_calls_streaming_one_chunk(
                 )
             ],
             "",
+            id="pre_v11-single_tool_add",
         ),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS] [{"name": "add", "arguments":{"a": "3", "b": "4"}}]""",  # noqa: E501
             [
                 ToolCall(
@@ -789,8 +1007,11 @@ def test_extract_tool_calls_streaming_one_chunk(
                 )
             ],
             "",
+            id="pre_v11-single_tool_add_strings",
         ),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"city": "San Francisco", "state": "CA", "unit": "celsius"}}]""",  # noqa: E501
             [
                 ToolCall(
@@ -803,8 +1024,11 @@ def test_extract_tool_calls_streaming_one_chunk(
                 )
             ],
             "",
+            id="pre_v11-single_tool_weather",
         ),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS] [{"arguments": {"city": "San Francisco", "state": "CA", "unit": "celsius"}, "name": "get_current_weather"}]""",  # noqa: E501
             [
                 ToolCall(
@@ -817,8 +1041,11 @@ def test_extract_tool_calls_streaming_one_chunk(
                 )
             ],
             "",
+            id="pre_v11-argument_before_name",
         ),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS] [{"arguments": {"name": "John Doe"}, "name": "get_age"}]""",  # noqa: E501
             [
                 ToolCall(
@@ -833,8 +1060,11 @@ def test_extract_tool_calls_streaming_one_chunk(
                 )
             ],
             "",
+            id="pre_v11-argument_before_name_and_name_in_argument",
         ),
-        (
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
             """[TOOL_CALLS] [{"arguments": {"a": 3.5, "b": 4}, "name": "add"}, {"arguments":{"city": "San Francisco", "state": "CA", "unit": "celsius"}, "name": "get_current_weather"}]""",  # noqa: E501
             [
                 ToolCall(
@@ -852,35 +1082,50 @@ def test_extract_tool_calls_streaming_one_chunk(
                 ),
             ],
             "",
+            id="pre_v11-multiple_tools",
+        ),
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            "mistral_pre_v11_tokenizer",
+            """Some text[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1, "b": 2}}]""",  # noqa: E501
+            [
+                ToolCall(
+                    function=FunctionCall(
+                        name="add", arguments=json.dumps({"a": 1, "b": 2})
+                    )
+                )
+            ],
+            "Some text",
+            id="pre_v11-content_before_tool",
         ),
     ],
 )
-def test_extract_tool_calls_streaming_pre_v11_tokenizer_one_chunk(
-    mistral_pre_v11_tool_parser,
-    mistral_pre_v11_tokenizer,
+def test_extract_tool_calls_streaming_one_chunk(
+    parser_fixture,
+    tokenizer_fixture,
     model_output,
     expected_tool_calls,
     expected_content,
+    request,
 ):
-    if isinstance(mistral_pre_v11_tokenizer, MistralTokenizer):
-        all_token_ids = mistral_pre_v11_tokenizer.encode(model_output)
+    tool_parser = request.getfixturevalue(parser_fixture)
+    tokenizer = request.getfixturevalue(tokenizer_fixture)
+
+    if isinstance(tokenizer, MistralTokenizer):
+        all_token_ids = tokenizer.encode(model_output)
     else:
-        all_token_ids = mistral_pre_v11_tokenizer.encode(
-            model_output, add_special_tokens=False
-        )
-    all_token_ids = fix_tool_call_tokenization(
-        all_token_ids, mistral_pre_v11_tool_parser, mistral_pre_v11_tokenizer
-    )
+        all_token_ids = tokenizer.encode(model_output, add_special_tokens=False)
+    all_token_ids = fix_tool_call_tokenization(all_token_ids, tool_parser, tokenizer)
 
-    delta_message = mistral_pre_v11_tool_parser.extract_tool_calls_streaming(
+    delta_message = tool_parser.extract_tool_calls_streaming(
         previous_text="",
         current_text=model_output,
         delta_text=model_output,
         previous_token_ids=[],
         current_token_ids=all_token_ids,
         delta_token_ids=all_token_ids,
-        request=None,
-    )  # type: ignore[arg-type]
+        request=_DUMMY_REQUEST,
+    )
     assert isinstance(delta_message, DeltaMessage)
     assert len(delta_message.tool_calls) == len(expected_tool_calls)
 
@@ -890,3 +1135,801 @@ def test_extract_tool_calls_streaming_pre_v11_tokenizer_one_chunk(
         assert expected_content == ""
     else:
         assert delta_message.content == expected_content
+
+
+@pytest.mark.parametrize(
+    "parser_fixture, model_output, fake_count, two_phase",
+    [
+        pytest.param(
+            "mistral_tool_parser",
+            '[TOOL_CALLS]add{"a": 1, "b": 2}',
+            20,
+            True,
+            id="v11",
+        ),
+        pytest.param(
+            "mistral_pre_v11_tool_parser",
+            '[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1, "b": 2}}]',
+            30,
+            False,
+            id="pre_v11",
+        ),
+    ],
+)
+def test_fast_detokenization_text_detection(
+    parser_fixture, model_output, fake_count, two_phase, request
+):
+    """Regression: bot_token in text but not token_ids (PR #37209)."""
+    parser = request.getfixturevalue(parser_fixture)
+    # Token IDs that do NOT contain bot_token_id.
+    fake_token_ids = list(range(99, 99 + fake_count))
+
+    if two_phase:
+        # First delta: pure content, no bot token yet
+        delta_message_before = parser.extract_tool_calls_streaming(
+            previous_text="",
+            current_text="Hello",
+            delta_text="Hello",
+            previous_token_ids=[],
+            current_token_ids=[99],
+            delta_token_ids=[99],
+            request=_DUMMY_REQUEST,
+        )
+        assert delta_message_before is not None
+        assert delta_message_before.content == "Hello"
+        assert not delta_message_before.tool_calls
+
+        previous_text = "Hello"
+        current_text = "Hello" + model_output
+        previous_token_ids = [99]
+        delta_token_ids = fake_token_ids[1:]
+    else:
+        previous_text = ""
+        current_text = model_output
+        previous_token_ids = []
+        delta_token_ids = fake_token_ids
+
+    delta_message = parser.extract_tool_calls_streaming(
+        previous_text=previous_text,
+        current_text=current_text,
+        delta_text=model_output,
+        previous_token_ids=previous_token_ids,
+        current_token_ids=fake_token_ids,
+        delta_token_ids=delta_token_ids,
+        request=_DUMMY_REQUEST,
+    )
+    assert delta_message is not None
+    assert delta_message.tool_calls is not None
+    assert len(delta_message.tool_calls) == 1
+    assert delta_message.tool_calls[0].function is not None
+    assert delta_message.tool_calls[0].function.name == "add"
+
+
+@pytest.mark.parametrize(
+    "parser_fixture, patched_method, current_text",
+    [
+        (
+            "mistral_tool_parser",
+            "_extract_tool_calls_streaming",
+            "[TOOL_CALLS]add{}",
+        ),
+        (
+            "mistral_pre_v11_tool_parser",
+            "_extract_tool_calls_streaming_pre_v11_tokenizer",
+            '[TOOL_CALLS] [{"name":"a","arguments":{}}]',
+        ),
+    ],
+    ids=["v11", "pre_v11"],
+)
+def test_extract_tool_calls_streaming_exception_returns_none(
+    parser_fixture, patched_method, current_text, request
+):
+    parser = request.getfixturevalue(parser_fixture)
+    with patch.object(parser, patched_method, side_effect=RuntimeError("boom")):
+        result = parser.extract_tool_calls_streaming(
+            previous_text="",
+            current_text=current_text,
+            delta_text=current_text,
+            previous_token_ids=[],
+            current_token_ids=[parser.bot_token_id],
+            delta_token_ids=[parser.bot_token_id],
+            request=_DUMMY_REQUEST,
+        )
+    assert result is None
+
+
+SAMPLE_TOOLS_DICTS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the weather",
+            "parameters": {
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "add",
+            "description": "Add two numbers",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "a": {"type": "number"},
+                    "b": {"type": "number"},
+                },
+                "required": ["a", "b"],
+            },
+        },
+    },
+]
+
+
+def _make_request(**kwargs) -> ChatCompletionRequest:
+    defaults: dict = {
+        "messages": [],
+        "model": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
+        "tools": SAMPLE_TOOLS_DICTS,
+        "tool_choice": "auto",
+    }
+    defaults.update(kwargs)
+    return ChatCompletionRequest(**defaults)
+
+
+@pytest.mark.parametrize(
+    "request_kwargs,expected_mode,expected_parallel",
+    [
+        ({"tool_choice": "auto"}, MistralToolChoiceEnum.auto, True),
+        ({"tool_choice": "none"}, MistralToolChoiceEnum.none, True),
+        ({"tool_choice": "required"}, MistralToolChoiceEnum.required, True),
+        ({"tool_choice": None, "tools": None}, MistralToolChoiceEnum.auto, True),
+        (
+            {
+                "tool_choice": {
+                    "type": "function",
+                    "function": {"name": "get_weather"},
+                }
+            },
+            MistralNamedToolChoice.model_validate(
+                {"type": "function", "function": {"name": "get_weather"}}
+            ),
+            True,
+        ),
+        (
+            {"tool_choice": "auto", "parallel_tool_calls": False},
+            MistralToolChoiceEnum.auto,
+            False,
+        ),
+        (
+            {"tool_choice": "auto", "response_format": {"type": "text"}},
+            MistralToolChoiceEnum.auto,
+            True,
+        ),
+    ],
+    ids=[
+        "auto",
+        "none",
+        "required",
+        "null_tool_choice",
+        "named_tool_choice",
+        "parallel_false",
+        "response_format_text",
+    ],
+)
+def test_adjust_request_grammar_factory(
+    mistral_tool_parser: MistralToolParser,
+    request_kwargs: dict,
+    expected_mode: MistralToolChoice,
+    expected_parallel: bool,
+) -> None:
+    request = _make_request(**request_kwargs)
+    factory = mistral_tool_parser.model_tokenizer.grammar_factory
+
+    with patch.object(
+        factory,
+        "get_lark_from_jinja",
+        wraps=factory.get_lark_from_jinja,
+    ) as mock_get_lark:
+        result = mistral_tool_parser.adjust_request(request)
+
+        mock_get_lark.assert_called_once()
+        call_kwargs = mock_get_lark.call_args
+
+        assert call_kwargs.kwargs["mode"] == expected_mode
+        assert call_kwargs.kwargs["json_schema"] is None
+        assert call_kwargs.kwargs["parallel_tool_calls"] == expected_parallel
+
+    assert result.structured_outputs is not None
+    assert isinstance(result.structured_outputs.grammar, str)
+    assert len(result.structured_outputs.grammar) > 0
+
+
+def test_adjust_request_unsupported_grammar_for_tokenizer(mistral_tokenizer) -> None:
+    with patch.object(
+        type(mistral_tokenizer),
+        "supports_grammar",
+        new_callable=lambda: property(lambda self: False),
+    ):
+        parser = MistralToolParser(mistral_tokenizer)
+        request = _make_request()
+        result = parser.adjust_request(request)
+
+        assert result.structured_outputs is None
+
+
+@pytest.mark.parametrize(
+    "tool_choice,expected_skip",
+    [("auto", False), ("none", True)],
+    ids=["auto_skip_false", "none_skip_true"],
+)
+def test_adjust_request_non_mistral_tokenizer(
+    non_mistral_parser: MistralToolParser,
+    tool_choice: str,
+    expected_skip: bool,
+) -> None:
+    request = _make_request(tool_choice=tool_choice)
+    result = non_mistral_parser.adjust_request(request)
+
+    assert result.skip_special_tokens is expected_skip
+
+
+@pytest.mark.parametrize(
+    "so_kwargs",
+    [
+        {"regex": r"\d+"},
+        {"choice": ["a", "b"]},
+        {"structural_tag": '{"key": "value"}'},
+        {"grammar": "start: 'hello'"},
+    ],
+    ids=["regex", "choice", "structural_tag", "grammar"],
+)
+def test_adjust_request_unsupported_structured_outputs(
+    mistral_tool_parser: MistralToolParser,
+    so_kwargs: dict,
+) -> None:
+    request = _make_request(
+        structured_outputs=StructuredOutputsParams(**so_kwargs),
+    )
+    result = mistral_tool_parser.adjust_request(request)
+
+    assert result.structured_outputs == request.structured_outputs
+
+
+def test_adjust_request_unsupported_response_format(
+    mistral_tool_parser: MistralToolParser,
+) -> None:
+    request = _make_request(
+        response_format=StructuralTagResponseFormat(
+            type="structural_tag", format={"some": "config"}
+        ),
+    )
+    result = mistral_tool_parser.adjust_request(request)
+    assert result.structured_outputs is None
+    assert result.response_format == request.response_format
+
+
+@pytest.mark.parametrize(
+    "so_kwargs,expected_json_schema",
+    [
+        ({"json_object": True}, _DEFAULT_JSON_SCHEMA),
+        ({"json": '{"type": "object"}'}, {"type": "object"}),
+        (
+            {"json": {"type": "object", "properties": {"x": {"type": "integer"}}}},
+            {"type": "object", "properties": {"x": {"type": "integer"}}},
+        ),
+    ],
+    ids=["json_object", "json_str", "json_dict"],
+)
+def test_adjust_request_structured_outputs_generates_grammar(
+    mistral_tool_parser: MistralToolParser,
+    so_kwargs: dict,
+    expected_json_schema: str,
+) -> None:
+    request = _make_request(
+        structured_outputs=StructuredOutputsParams(**so_kwargs),
+    )
+    factory = mistral_tool_parser.model_tokenizer.grammar_factory
+
+    with patch.object(
+        factory,
+        "get_lark_from_jinja",
+        wraps=factory.get_lark_from_jinja,
+    ) as mock_get_lark:
+        result = mistral_tool_parser.adjust_request(request)
+
+        mock_get_lark.assert_called_once()
+        assert mock_get_lark.call_args.kwargs["json_schema"] == expected_json_schema
+
+    assert result.structured_outputs is not None
+    assert isinstance(result.structured_outputs.grammar, str)
+    assert len(result.structured_outputs.grammar) > 0
+
+
+@pytest.mark.parametrize(
+    "response_format_kwargs,expected_json_schema",
+    [
+        ({"type": "json_object"}, _DEFAULT_JSON_SCHEMA),
+        (
+            {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "my_schema",
+                    "schema": {
+                        "type": "object",
+                        "properties": {"x": {"type": "integer"}},
+                    },
+                },
+            },
+            {"type": "object", "properties": {"x": {"type": "integer"}}},
+        ),
+    ],
+    ids=["json_object", "json_schema_with_schema"],
+)
+def test_adjust_request_response_format_generates_grammar(
+    mistral_tool_parser: MistralToolParser,
+    response_format_kwargs: dict,
+    expected_json_schema: str,
+) -> None:
+    request = _make_request(response_format=response_format_kwargs)
+    factory = mistral_tool_parser.model_tokenizer.grammar_factory
+
+    with patch.object(
+        factory,
+        "get_lark_from_jinja",
+        wraps=factory.get_lark_from_jinja,
+    ) as mock_get_lark:
+        result = mistral_tool_parser.adjust_request(request)
+
+        mock_get_lark.assert_called_once()
+        assert mock_get_lark.call_args.kwargs["json_schema"] == expected_json_schema
+
+    assert result.structured_outputs is not None
+    assert isinstance(result.structured_outputs.grammar, str)
+    assert len(result.structured_outputs.grammar) > 0
+
+
+@pytest.mark.parametrize(
+    "tool_choice, expected_method, not_called_method",
+    [
+        ("none", "get_lark_for_json_schema", None),
+        ("auto", "get_lark_from_jinja", "get_lark_for_json_schema"),
+    ],
+    ids=["none_uses_json_schema_factory", "auto_uses_jinja_factory"],
+)
+def test_adjust_request_tool_choice_with_json_schema_factory_routing(
+    mistral_tool_parser: MistralToolParser,
+    tool_choice: str,
+    expected_method: str,
+    not_called_method: str | None,
+) -> None:
+    request = _make_request(
+        tool_choice=tool_choice,
+        structured_outputs=StructuredOutputsParams(json='{"type": "object"}'),
+    )
+    factory = mistral_tool_parser.model_tokenizer.grammar_factory
+
+    patches = {
+        expected_method: patch.object(
+            factory,
+            expected_method,
+            wraps=getattr(factory, expected_method),
+        ),
+    }
+    if not_called_method:
+        patches[not_called_method] = patch.object(
+            factory,
+            not_called_method,
+            wraps=getattr(factory, not_called_method),
+        )
+
+    with patches[expected_method] as mock_expected:
+        ctx = patches[not_called_method] if not_called_method else None
+        if ctx:
+            with ctx as mock_not_called:
+                result = mistral_tool_parser.adjust_request(request)
+                mock_not_called.assert_not_called()
+        else:
+            result = mistral_tool_parser.adjust_request(request)
+
+        mock_expected.assert_called_once()
+        assert mock_expected.call_args.kwargs["json_schema"] == {"type": "object"}
+
+    assert result.structured_outputs is not None
+    assert isinstance(result.structured_outputs.grammar, str)
+    assert len(result.structured_outputs.grammar) > 0
+
+
+def test_grammar_from_tool_parser_default_false() -> None:
+    request = _make_request()
+    assert request._grammar_from_tool_parser is False
+
+
+def test_grammar_from_tool_parser_set_by_adjust_request(
+    mistral_tool_parser: MistralToolParser,
+) -> None:
+    request = _make_request()
+    result = mistral_tool_parser.adjust_request(request)
+    assert result._grammar_from_tool_parser is True
+
+
+@pytest.mark.parametrize(
+    "tool_calls, expected_len",
+    [
+        (None, 0),
+        ([], 0),
+        ([VllmFunctionCall(id="abc123xyz", name="f", arguments="{}")], 1),
+        ([VllmFunctionCall(name="f", arguments="{}")], 1),
+        (
+            [
+                VllmFunctionCall(id="fixed1234", name="a", arguments='{"x": 1}'),
+                VllmFunctionCall(name="b", arguments='{"y": 2}'),
+            ],
+            2,
+        ),
+    ],
+    ids=["none", "empty", "with_id", "without_id", "mixed"],
+)
+def test_build_non_streaming_tool_calls(
+    tool_calls: list[VllmFunctionCall] | None,
+    expected_len: int,
+) -> None:
+    result = MistralToolParser.build_non_streaming_tool_calls(tool_calls)
+    assert len(result) == expected_len
+
+    if tool_calls is None:
+        return
+
+    for i, tc in enumerate(result):
+        assert isinstance(tc, MistralToolCall)
+        assert tc.type == "function"
+
+        input_tc = tool_calls[i]
+        if input_tc.id:
+            assert tc.id == input_tc.id
+        else:
+            assert len(tc.id) == 9
+            assert tc.id.isalnum()
+
+        assert tc.function.name == input_tc.name
+        assert tc.function.arguments == input_tc.arguments
+
+
+class TestExtractMaybeReasoningAndToolStreaming:
+    r"""Tests for `MistralToolParser.extract_maybe_reasoning_and_tool_streaming`."""
+
+    @pytest.fixture
+    def parser(self) -> MistralToolParser:
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.get_vocab.return_value = {"[TOOL_CALLS]": 1}
+        return MistralToolParser(mock_tokenizer)
+
+    @pytest.fixture
+    def request_obj(self) -> ChatCompletionRequest:
+        return _make_request()
+
+    @staticmethod
+    def _call(
+        parser: MistralToolParser,
+        request: ChatCompletionRequest,
+        *,
+        reasoning_parser: Any = None,
+        previous_text: str = "",
+        current_text: str = "hello",
+        delta_text: str = "hello",
+        previous_token_ids: list[int] | None = None,
+        current_token_ids: list[int] | None = None,
+        output_token_ids: list[int] | None = None,
+        reasoning_ended: bool = False,
+        prompt_is_reasoning_end: bool | None = None,
+    ) -> MistralStreamingResult:
+        return parser.extract_maybe_reasoning_and_tool_streaming(
+            reasoning_parser=reasoning_parser,
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=previous_token_ids or [],
+            current_token_ids=current_token_ids or [1, 2, 3],
+            output_token_ids=output_token_ids or [1, 2, 3],
+            reasoning_ended=reasoning_ended,
+            prompt_is_reasoning_end=prompt_is_reasoning_end,
+            request=request,
+        )
+
+    def test_no_reasoning_tools_called(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        tool_delta = DeltaMessage(
+            tool_calls=[
+                DeltaToolCall(
+                    index=0,
+                    function=DeltaFunctionCall(name="f", arguments="{}"),
+                )
+            ]
+        )
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=tool_delta
+        ):
+            result = self._call(parser, request_obj, reasoning_parser=None)
+
+        assert result == MistralStreamingResult(
+            delta_message=tool_delta,
+            reasoning_ended=False,
+            tools_called=True,
+            current_text="hello",
+            current_token_ids=[1, 2, 3],
+        )
+
+    def test_no_reasoning_no_tools(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        content_delta = DeltaMessage(content="hello")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=content_delta
+        ):
+            result = self._call(parser, request_obj, reasoning_parser=None)
+
+        assert result == MistralStreamingResult(
+            delta_message=content_delta,
+            reasoning_ended=False,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[1, 2, 3],
+        )
+
+    def test_mistral_reasoning_parser_no_think_token(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        mock_rp = MagicMock(spec=MistralReasoningParser)
+        mock_rp.start_token_id = 999
+        content_delta = DeltaMessage(content="direct")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=content_delta
+        ):
+            result = self._call(
+                parser,
+                request_obj,
+                reasoning_parser=mock_rp,
+                reasoning_ended=False,
+                current_token_ids=[1, 2, 3],
+            )
+
+        mock_rp.extract_reasoning_streaming.assert_not_called()
+        assert result == MistralStreamingResult(
+            delta_message=content_delta,
+            reasoning_ended=False,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[1, 2, 3],
+        )
+
+    def test_mistral_reasoning_parser_with_think_token(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        mock_rp = MagicMock(spec=MistralReasoningParser)
+        mock_rp.start_token_id = 999
+        mock_rp.extract_reasoning_streaming.return_value = DeltaMessage(
+            reasoning="thinking..."
+        )
+        mock_rp.is_reasoning_end_streaming.return_value = False
+
+        result = self._call(
+            parser,
+            request_obj,
+            reasoning_parser=mock_rp,
+            reasoning_ended=False,
+            current_token_ids=[1, 999, 3],
+        )
+
+        mock_rp.extract_reasoning_streaming.assert_called_once()
+        assert result == MistralStreamingResult(
+            delta_message=DeltaMessage(reasoning="thinking..."),
+            reasoning_ended=False,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[1, 999, 3],
+        )
+
+    def test_non_mistral_reasoning_parser_always_expects_thinking(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        mock_rp = MagicMock()
+        mock_rp.start_token_id = 999
+        mock_rp.extract_reasoning_streaming.return_value = DeltaMessage(
+            reasoning="thinking..."
+        )
+        mock_rp.is_reasoning_end_streaming.return_value = False
+
+        result = self._call(
+            parser,
+            request_obj,
+            reasoning_parser=mock_rp,
+            reasoning_ended=False,
+            current_token_ids=[1, 2, 3],
+        )
+
+        mock_rp.extract_reasoning_streaming.assert_called_once()
+        assert result == MistralStreamingResult(
+            delta_message=DeltaMessage(reasoning="thinking..."),
+            reasoning_ended=False,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[1, 2, 3],
+        )
+
+    def test_reasoning_already_ended_no_reset(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        content_delta = DeltaMessage(content="content")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=content_delta
+        ) as mock_extract:
+            result = self._call(
+                parser,
+                request_obj,
+                reasoning_parser=MagicMock(),
+                reasoning_ended=True,
+                previous_text="prior_tool_text",
+                previous_token_ids=[10, 20],
+                current_text="prior_tool_texthello",
+                current_token_ids=[10, 20, 1, 2, 3],
+            )
+
+            _, call_kwargs = mock_extract.call_args
+            assert call_kwargs["previous_text"] == "prior_tool_text"
+            assert call_kwargs["previous_token_ids"] == [10, 20]
+
+        assert result == MistralStreamingResult(
+            delta_message=content_delta,
+            reasoning_ended=True,
+            tools_called=False,
+            current_text="prior_tool_texthello",
+            current_token_ids=[10, 20, 1, 2, 3],
+        )
+
+    def test_pre_v15_ignores_prompt_reasoning_end(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        mock_tokenizer = MagicMock(spec=MistralTokenizer)
+        mock_tokenizer.version = 13
+        parser.model_tokenizer = mock_tokenizer
+
+        mock_rp = MagicMock(spec=MistralReasoningParser)
+        mock_rp.start_token_id = 999
+        mock_rp.extract_reasoning_streaming.return_value = DeltaMessage(
+            reasoning="thinking..."
+        )
+        mock_rp.is_reasoning_end_streaming.return_value = False
+
+        result = self._call(
+            parser,
+            request_obj,
+            reasoning_parser=mock_rp,
+            reasoning_ended=False,
+            prompt_is_reasoning_end=True,
+            current_token_ids=[999, 1, 2],
+        )
+
+        mock_rp.extract_reasoning_streaming.assert_called_once()
+        assert result == MistralStreamingResult(
+            delta_message=DeltaMessage(reasoning="thinking..."),
+            reasoning_ended=False,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[999, 1, 2],
+        )
+
+    def test_non_pre_v15_prompt_reasoning_end(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        mock_tokenizer = MagicMock(spec=MistralTokenizer)
+        mock_tokenizer.version = 15
+        parser.model_tokenizer = mock_tokenizer
+
+        mock_rp = MagicMock(spec=MistralReasoningParser)
+        mock_rp.start_token_id = 999
+
+        content_delta = DeltaMessage(content="after reasoning")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=content_delta
+        ):
+            result = self._call(
+                parser,
+                request_obj,
+                reasoning_parser=mock_rp,
+                reasoning_ended=False,
+                prompt_is_reasoning_end=True,
+                current_token_ids=[999, 1, 2],
+                output_token_ids=[10, 20, 30],
+            )
+
+        mock_rp.extract_reasoning_streaming.assert_not_called()
+        assert result == MistralStreamingResult(
+            delta_message=content_delta,
+            reasoning_ended=True,
+            tools_called=False,
+            current_text="hello",
+            current_token_ids=[10, 20, 30],
+        )
+
+    def test_reasoning_end_transition_with_content(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        """When reasoning ends and the delta has content, that content is
+        cleared from delta_message and used as current_text for tool parsing."""
+        mock_rp = MagicMock()
+        mock_rp.start_token_id = 999
+        mock_rp.extract_reasoning_streaming.return_value = DeltaMessage(
+            reasoning="think", content="leftover"
+        )
+        mock_rp.is_reasoning_end_streaming.return_value = True
+        mock_rp.extract_content_ids.return_value = [50, 51]
+
+        content_delta = DeltaMessage(content="leftover")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=content_delta
+        ) as mock_extract:
+            result = self._call(
+                parser,
+                request_obj,
+                reasoning_parser=mock_rp,
+                reasoning_ended=False,
+                current_token_ids=[999, 1, 2],
+                output_token_ids=[10, 20, 30],
+            )
+
+            mock_rp.extract_content_ids.assert_called_once_with([10, 20, 30])
+            _, call_kwargs = mock_extract.call_args
+            assert call_kwargs["previous_text"] == ""
+            assert call_kwargs["previous_token_ids"] == []
+            assert call_kwargs["delta_text"] == "leftover"
+            assert call_kwargs["current_token_ids"] == [50, 51]
+
+        assert result == MistralStreamingResult(
+            delta_message=content_delta,
+            reasoning_ended=True,
+            tools_called=False,
+            current_text="leftover",
+            current_token_ids=[50, 51],
+        )
+
+    def test_reasoning_end_transition_without_content(
+        self, parser: MistralToolParser, request_obj: ChatCompletionRequest
+    ) -> None:
+        """When reasoning ends but the delta has no content, current_text
+        is set to empty string."""
+        mock_rp = MagicMock()
+        mock_rp.start_token_id = 999
+        mock_rp.extract_reasoning_streaming.return_value = DeltaMessage(
+            reasoning="think"
+        )
+        mock_rp.is_reasoning_end_streaming.return_value = True
+        mock_rp.extract_content_ids.return_value = [50, 51]
+
+        empty_delta = DeltaMessage(content="")
+        with patch.object(
+            parser, "extract_tool_calls_streaming", return_value=empty_delta
+        ) as mock_extract:
+            result = self._call(
+                parser,
+                request_obj,
+                reasoning_parser=mock_rp,
+                reasoning_ended=False,
+                current_token_ids=[999, 1, 2],
+                output_token_ids=[10, 20, 30],
+            )
+
+            _, call_kwargs = mock_extract.call_args
+            assert call_kwargs["delta_text"] == ""
+            assert call_kwargs["current_token_ids"] == [50, 51]
+
+        assert result == MistralStreamingResult(
+            delta_message=empty_delta,
+            reasoning_ended=True,
+            tools_called=False,
+            current_text="",
+            current_token_ids=[50, 51],
+        )
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index e9e39ef4c029..843fbca621f7 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -217,6 +217,158 @@ def test_extract_tool_calls_multiple_tools(
     assert extracted_info.content is None
 
 
+def test_extract_tool_calls_bare_function_name(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                "We need to use get_current_weather tool.",
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
+            .with_channel("commentary")
+            .with_recipient("get_current_weather")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="get_current_weather",
+                arguments=json.dumps({"location": "Tokyo"}),
+            )
+        )
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content is None
+
+
+def test_extract_tool_calls_bare_function_name_multiple(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(
+                Role.USER, "What is the weather in Tokyo based on where I'm at?"
+            ),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                "We need to use both tools.",
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
+            .with_channel("commentary")
+            .with_recipient("get_current_weather")
+            .with_content_type("json"),
+            Message.from_role_and_content(Role.ASSISTANT, "{}")
+            .with_channel("commentary")
+            .with_recipient("get_user_location")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="get_current_weather",
+                arguments=json.dumps({"location": "Tokyo"}),
+            )
+        ),
+        ToolCall(
+            function=FunctionCall(
+                name="get_user_location",
+                arguments=json.dumps({}),
+            )
+        ),
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+
+
+def test_extract_tool_calls_assistant_recipient_ignored(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "Hello"),
+            Message.from_role_and_content(Role.ASSISTANT, "Some tool response")
+            .with_channel("commentary")
+            .with_recipient("assistant"),
+            Message.from_role_and_content(
+                Role.ASSISTANT, "Here is the answer"
+            ).with_channel("final"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert not extracted_info.tools_called
+    assert extracted_info.tool_calls == []
+    assert extracted_info.content == "Here is the answer"
+
+
+def test_extract_tool_calls_dotted_function_name(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "Compute 2+3"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"a": 2, "b": 3}')
+            .with_channel("commentary")
+            .with_recipient("math.sum")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="math.sum",
+                arguments=json.dumps({"a": 2, "b": 3}),
+            )
+        )
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+
+
 def test_extract_tool_calls_with_content(
     openai_tool_parser,
     harmony_encoding,
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 3d46f73de612..defc6d23eff4 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -5,8 +5,12 @@
 from collections.abc import Generator
 
 import pytest
+from openai.types.responses.function_tool import FunctionTool
+from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedFunction,
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
     ChatCompletionToolsParam,
 )
@@ -31,13 +35,13 @@ def qwen3_tokenizer():
 
 
 @pytest.fixture
-def qwen3_tool_parser(qwen3_tokenizer):
-    return Qwen3CoderToolParser(qwen3_tokenizer)
+def qwen3_tool_parser(qwen3_tokenizer, sample_tools):
+    return Qwen3CoderToolParser(qwen3_tokenizer, tools=sample_tools)
 
 
 @pytest.fixture
-def qwen3_xml_tool_parser(qwen3_tokenizer):
-    return Qwen3XMLToolParser(qwen3_tokenizer)
+def qwen3_xml_tool_parser(qwen3_tokenizer, sample_tools):
+    return Qwen3XMLToolParser(qwen3_tokenizer, tools=sample_tools)
 
 
 @pytest.fixture(params=["xml"])
@@ -49,41 +53,83 @@ def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser, req
         return qwen3_xml_tool_parser
 
 
-@pytest.fixture
-def sample_tools():
-    return [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "get_current_weather",
-                "description": "Get the current weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {"type": "string", "description": "The city name"},
-                        "state": {"type": "string", "description": "The state code"},
-                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
-                    },
-                    "required": ["city", "state"],
+WEATHER_PARAMS = {
+    "type": "object",
+    "properties": {
+        "city": {"type": "string", "description": "The city name"},
+        "state": {"type": "string", "description": "The state code"},
+        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+    },
+    "required": ["city", "state"],
+}
+
+AREA_PARAMS = {
+    "type": "object",
+    "properties": {
+        "shape": {"type": "string"},
+        "dimensions": {"type": "object"},
+        "precision": {"type": "integer"},
+    },
+}
+
+
+@pytest.fixture(params=["chat_completion", "responses_api"])
+def sample_tools(request):
+    if request.param == "chat_completion":
+        return [
+            ChatCompletionToolsParam(
+                type="function",
+                function={
+                    "name": "get_current_weather",
+                    "description": "Get the current weather",
+                    "parameters": WEATHER_PARAMS,
                 },
-            },
-        ),
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "calculate_area",
-                "description": "Calculate area of a shape",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "shape": {"type": "string"},
-                        "dimensions": {"type": "object"},
-                        "precision": {"type": "integer"},
-                    },
+            ),
+            ChatCompletionToolsParam(
+                type="function",
+                function={
+                    "name": "calculate_area",
+                    "description": "Calculate area of a shape",
+                    "parameters": AREA_PARAMS,
                 },
-            },
-        ),
-    ]
+            ),
+        ]
+    else:
+        return [
+            FunctionTool(
+                type="function",
+                name="get_current_weather",
+                description="Get the current weather",
+                parameters=WEATHER_PARAMS,
+            ),
+            FunctionTool(
+                type="function",
+                name="calculate_area",
+                description="Calculate area of a shape",
+                parameters=AREA_PARAMS,
+            ),
+        ]
+
+
+def _as_chat_completion_tools(
+    tools: list[ChatCompletionToolsParam | FunctionTool],
+) -> list[ChatCompletionToolsParam]:
+    normalized: list[ChatCompletionToolsParam] = []
+    for tool in tools:
+        if isinstance(tool, ChatCompletionToolsParam):
+            normalized.append(tool)
+        else:
+            normalized.append(
+                ChatCompletionToolsParam(
+                    type="function",
+                    function={
+                        "name": tool.name,
+                        "description": tool.description,
+                        "parameters": tool.parameters,
+                    },
+                )
+            )
+    return normalized
 
 
 def assert_tool_calls(
@@ -337,12 +383,11 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized):
 )
 def test_extract_tool_calls(
     qwen3_tool_parser_parametrized,
-    sample_tools,
     model_output,
     expected_tool_calls,
     expected_content,
 ):
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
     extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
         model_output, request=request
     )
@@ -354,7 +399,7 @@ def test_extract_tool_calls(
 
 
 def test_extract_tool_calls_fallback_no_tags(
-    qwen3_tool_parser_parametrized, sample_tools
+    qwen3_tool_parser_parametrized,
 ):
     """Test fallback parsing when XML tags are missing"""
     model_output = """<function=get_current_weather>
@@ -366,7 +411,7 @@ def test_extract_tool_calls_fallback_no_tags(
 </parameter>
 </function>"""
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
     extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
         model_output, request=request
     )
@@ -376,7 +421,7 @@ def test_extract_tool_calls_fallback_no_tags(
     assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
 
 
-def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
+def test_extract_tool_calls_type_conversion(qwen3_tokenizer):
     """Test parameter type conversion based on tool schema"""
     tools = [
         ChatCompletionToolsParam(
@@ -417,10 +462,9 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
 </function>
 </tool_call>"""
 
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=request
-    )
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
 
     args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
     assert args["int_param"] == 42
@@ -430,6 +474,190 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
     assert args["obj_param"] == {"key": "value"}
 
 
+def test_extract_tool_calls_anyof_type_conversion(qwen3_tokenizer):
+    """Test type conversion for anyOf/oneOf nullable schemas (Pydantic v2).
+
+    Pydantic v2 emits anyOf for Optional[T] fields, e.g.:
+        Optional[int] -> {"anyOf": [{"type": "integer"}, {"type": "null"}]}
+    The parser must extract the non-null type and apply the correct
+    conversion (int(), float(), etc.) instead of returning a raw string.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "test_anyof",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "anyof_int": {
+                            "anyOf": [
+                                {"type": "integer"},
+                                {"type": "null"},
+                            ],
+                            "default": 5,
+                        },
+                        "anyof_str": {
+                            "anyOf": [
+                                {"type": "string"},
+                                {"type": "null"},
+                            ],
+                        },
+                        "anyof_array": {
+                            "anyOf": [
+                                {"type": "array", "items": {"type": "string"}},
+                                {"type": "null"},
+                            ],
+                        },
+                        "anyof_obj": {
+                            "anyOf": [
+                                {"type": "object"},
+                                {"type": "null"},
+                            ],
+                        },
+                        "type_as_array": {
+                            "type": ["integer", "null"],
+                        },
+                        "multi_non_null": {
+                            "anyOf": [
+                                {"type": "string"},
+                                {"type": "integer"},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = """<tool_call>
+<function=test_anyof>
+<parameter=anyof_int>
+5
+</parameter>
+<parameter=anyof_str>
+hello
+</parameter>
+<parameter=anyof_array>
+["a", "b", "c"]
+</parameter>
+<parameter=anyof_obj>
+{"key": "value"}
+</parameter>
+<parameter=type_as_array>
+42
+</parameter>
+<parameter=multi_non_null>
+some text
+</parameter>
+</function>
+</tool_call>"""
+
+    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    extracted = parser.extract_tool_calls(model_output, request=request)
+
+    args = json.loads(extracted.tool_calls[0].function.arguments)
+    assert args["anyof_int"] == 5
+    assert isinstance(args["anyof_int"], int)
+    assert args["anyof_str"] == "hello"
+    assert isinstance(args["anyof_str"], str)
+    assert args["anyof_array"] == ["a", "b", "c"]
+    assert isinstance(args["anyof_array"], list)
+    assert args["anyof_obj"] == {"key": "value"}
+    assert isinstance(args["anyof_obj"], dict)
+    assert args["type_as_array"] == 42
+    assert isinstance(args["type_as_array"], int)
+    # Multi non-null: anyOf[string, integer, null] → first non-null is string
+    assert args["multi_non_null"] == "some text"
+    assert isinstance(args["multi_non_null"], str)
+
+
+def test_extract_tool_calls_anyof_type_conversion_streaming(qwen3_tokenizer):
+    """Test streaming e2e for anyOf/oneOf nullable schemas (Pydantic v2).
+
+    Verifies that the full streaming pipeline — tokenize, incrementally
+    decode, extract_tool_calls_streaming — correctly resolves types from
+    anyOf schemas and produces valid JSON with properly typed values.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "search_web",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "anyOf": [
+                                {"type": "string"},
+                                {"type": "null"},
+                            ],
+                        },
+                        "count": {
+                            "anyOf": [
+                                {"type": "integer"},
+                                {"type": "null"},
+                            ],
+                            "default": 5,
+                        },
+                        "verbose": {
+                            "anyOf": [
+                                {"type": "boolean"},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = """<tool_call>
+<function=search_web>
+<parameter=query>
+vllm tool parser
+</parameter>
+<parameter=count>
+10
+</parameter>
+<parameter=verbose>
+true
+</parameter>
+</function>
+</tool_call>"""
+
+    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    tool_states = {}
+    for delta_message in stream_delta_message_generator(
+        parser, qwen3_tokenizer, model_output, request
+    ):
+        if delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+                if idx not in tool_states:
+                    tool_states[idx] = {"name": None, "arguments": ""}
+                if tool_call.function:
+                    if tool_call.function.name:
+                        tool_states[idx]["name"] = tool_call.function.name
+                    if tool_call.function.arguments is not None:
+                        tool_states[idx]["arguments"] += tool_call.function.arguments
+
+    assert len(tool_states) == 1
+    assert tool_states[0]["name"] == "search_web"
+    assert tool_states[0]["arguments"] is not None
+    args = json.loads(tool_states[0]["arguments"])
+    assert args["query"] == "vllm tool parser"
+    assert isinstance(args["query"], str)
+    assert args["count"] == 10
+    assert isinstance(args["count"], int)
+    assert args["verbose"] is True
+    assert isinstance(args["verbose"], bool)
+
+
 @pytest.mark.parametrize(
     ids=[
         "no_tools",
@@ -608,13 +836,12 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
 def test_extract_tool_calls_streaming(
     qwen3_tool_parser_parametrized,
     qwen3_tokenizer,
-    sample_tools,
     model_output,
     expected_tool_calls,
     expected_content,
 ):
     """Test incremental streaming behavior including typed parameters"""
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     other_content = ""
     tool_states = {}  # Track state per tool index
@@ -684,7 +911,7 @@ def test_extract_tool_calls_streaming(
 
 
 def test_extract_tool_calls_missing_closing_parameter_tag(
-    qwen3_tool_parser_parametrized, sample_tools
+    qwen3_tool_parser_parametrized,
 ):
     """Test handling of missing closing </parameter> tag"""
     # Using get_current_weather from sample_tools but with malformed XML
@@ -702,7 +929,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag(
 </function>
 </tool_call>"""
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
     extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
         model_output, request=request
     )
@@ -726,7 +953,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag(
 
 
 def test_extract_tool_calls_streaming_missing_closing_tag(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools
+    qwen3_tool_parser_parametrized, qwen3_tokenizer
 ):
     """Test streaming with missing closing </parameter> tag"""
     # Using get_current_weather from sample_tools but with malformed XML
@@ -744,7 +971,7 @@ def test_extract_tool_calls_streaming_missing_closing_tag(
 </function>
 </tool_call>"""
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     other_content = ""
     tool_states = {}
@@ -801,7 +1028,7 @@ def test_extract_tool_calls_streaming_missing_closing_tag(
 
 
 def test_extract_tool_calls_streaming_incremental(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools
+    qwen3_tool_parser_parametrized, qwen3_tokenizer
 ):
     """Test that streaming is truly incremental"""
     model_output = """I'll check the weather.<tool_call>
@@ -815,7 +1042,7 @@ def test_extract_tool_calls_streaming_incremental(
 </function>
 </tool_call>"""
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     chunks = []
     for delta_message in stream_delta_message_generator(
@@ -859,7 +1086,7 @@ def test_extract_tool_calls_streaming_incremental(
 
 
 def test_extract_tool_calls_complex_type_with_single_quote(
-    qwen3_tool_parser_parametrized,
+    qwen3_tokenizer,
 ):
     """Test parameter type conversion based on tool schema"""
     tools = [
@@ -889,17 +1116,16 @@ def test_extract_tool_calls_complex_type_with_single_quote(
 </function>
 </tool_call>"""
 
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=request
-    )
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
 
     args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
     assert args["obj_param"] == {"key": "value"}
 
 
 def test_extract_tool_calls_streaming_missing_opening_tag(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools
+    qwen3_tool_parser_parametrized, qwen3_tokenizer
 ):
     """Test streaming with missing opening <tool_call> tag
 
@@ -921,7 +1147,7 @@ def test_extract_tool_calls_streaming_missing_opening_tag(
 </function>
 </tool_call>"""
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     other_content = ""
     tool_states = {}
@@ -976,3 +1202,240 @@ def test_extract_tool_calls_streaming_missing_opening_tag(
     assert args["city"] == "Dallas"
     assert args["state"] == "TX"
     assert args["unit"] == "fahrenheit"
+
+
+def test_malformed_xml_no_gt_delimiter(qwen3_tool_parser):
+    """Regression: malformed XML without '>' must not crash (PR #36774)."""
+    model_output = (
+        "<tool_call>\n"
+        "<function=get_current_weather\n"
+        "<parameter=city>Dallas</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+    assert result is not None
+    assert isinstance(result.tool_calls, list)
+    assert all(tc is not None for tc in result.tool_calls)
+
+
+def test_none_tool_calls_filtered(qwen3_tool_parser):
+    """Regression: None tool calls filtered from output (PR #36774)."""
+    model_output = (
+        "<tool_call>\n"
+        "<function=bad_func_no_gt\n"
+        "</function>\n"
+        "</tool_call>\n"
+        "<tool_call>\n"
+        "<function=get_current_weather>\n"
+        "<parameter=city>Dallas</parameter>\n"
+        "<parameter=state>TX</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+    assert all(tc is not None for tc in result.tool_calls)
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "get_current_weather"
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+
+
+def test_anyof_parameter_not_double_encoded(qwen3_tokenizer):
+    """Regression: anyOf parameters must not be double-encoded (PR #36032)."""
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "update_record",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "data": {
+                            "anyOf": [{"type": "object"}, {"type": "null"}],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+
+    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
+
+    model_output = (
+        "<tool_call>\n"
+        "<function=update_record>\n"
+        '<parameter=data>{"key": "value", "count": 42}</parameter>\n'
+        "</function>\n"
+        "</tool_call>"
+    )
+
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert isinstance(args["data"], dict)
+    assert args["data"] == {"key": "value", "count": 42}
+
+
+def test_streaming_multi_param_single_chunk(qwen3_tool_parser, qwen3_tokenizer):
+    """Regression: speculative decode delivering multiple params at once (PR #35615)."""
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+
+    deltas = [
+        "<tool_call>",
+        "\n<function=get_current_weather>",
+        "\n",  # triggers json_started -> sends "{"
+        # This single delta delivers all three parameters at once
+        "<parameter=city>\nDallas\n</parameter>"
+        "\n<parameter=state>\nTX\n</parameter>"
+        "\n<parameter=unit>\nfahrenheit\n</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    from tests.tool_parsers.utils import (
+        run_tool_extraction_streaming,
+    )
+
+    reconstructor = run_tool_extraction_streaming(
+        qwen3_tool_parser,
+        deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
+
+
+def test_no_double_serialization_string_args(qwen3_tool_parser):
+    """Regression: string arguments must not be double-serialized (PR #35615)."""
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "greet",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "message": {"type": "string"},
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = (
+        "<tool_call>\n"
+        "<function=greet>\n"
+        "<parameter=message>hello world</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    raw_arguments = result.tool_calls[0].function.arguments
+    args = json.loads(raw_arguments)
+    assert args["message"] == "hello world"
+    assert '\\"hello world\\"' not in raw_arguments
+
+
+def test_get_vllm_registry_structural_tag_returns_structural_tag(
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="auto",
+    )
+    tag = qwen3_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="required",
+    )
+    tag = qwen3_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    if request_tools:
+        tool = request_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=request_tools,
+        )
+        req.tool_choice = ChatCompletionNamedToolChoiceParam(
+            function=ChatCompletionNamedFunction(name=tool.function.name)
+        )
+        tag = qwen3_tool_parser.get_structural_tag(req)
+        assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_uses_vllm_registry_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = qwen3_tool_parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_prefers_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="required",
+    )
+    out = qwen3_tool_parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py
index 3771b8afd24c..1ea9a1d65c04 100644
--- a/tests/tool_parsers/test_qwen3xml_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py
@@ -64,9 +64,6 @@ def test_config(self) -> ToolParserTestConfig:
                 "test_empty_arguments": "Qwen3XML streaming has systematic issues",
                 "test_surrounding_text": "Qwen3XML streaming has systematic issues",
                 "test_escaped_strings": "Qwen3XML streaming has systematic issues",
-                "test_malformed_input": (
-                    "Qwen3XML parser is lenient with malformed input"
-                ),
                 "test_streaming_reconstruction": (
                     "Qwen3XML streaming reconstruction has known issues"
                 ),
diff --git a/tests/tool_parsers/test_seed_oss_tool_parser.py b/tests/tool_parsers/test_seed_oss_tool_parser.py
index 87e71a12faa2..9dd13afe01e3 100644
--- a/tests/tool_parsers/test_seed_oss_tool_parser.py
+++ b/tests/tool_parsers/test_seed_oss_tool_parser.py
@@ -30,8 +30,8 @@ def seed_oss_tokenizer():
 
 
 @pytest.fixture
-def seed_oss_tool_parser(seed_oss_tokenizer):
-    return SeedOssToolParser(seed_oss_tokenizer)
+def seed_oss_tool_parser(seed_oss_tokenizer, sample_tools):
+    return SeedOssToolParser(seed_oss_tokenizer, tools=sample_tools)
 
 
 @pytest.fixture
diff --git a/tests/tool_parsers/test_step3p5_tool_parser.py b/tests/tool_parsers/test_step3p5_tool_parser.py
index b3cb4e20fb9c..8391a5b75d83 100644
--- a/tests/tool_parsers/test_step3p5_tool_parser.py
+++ b/tests/tool_parsers/test_step3p5_tool_parser.py
@@ -28,8 +28,8 @@ def step3p5_tokenizer():
 
 
 @pytest.fixture
-def step3p5_tool_parser(step3p5_tokenizer):
-    return Step3p5ToolParser(step3p5_tokenizer)
+def step3p5_tool_parser(step3p5_tokenizer, sample_tools):
+    return Step3p5ToolParser(step3p5_tokenizer, tools=sample_tools)
 
 
 @pytest.fixture
@@ -386,7 +386,7 @@ def test_extract_tool_calls_fallback_no_tags(step3p5_tool_parser, sample_tools):
     assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
 
 
-def test_extract_tool_calls_type_conversion(step3p5_tool_parser):
+def test_extract_tool_calls_type_conversion(step3p5_tokenizer):
     """Test parameter type conversion based on tool schema"""
     tools = [
         ChatCompletionToolsParam(
@@ -427,10 +427,9 @@ def test_extract_tool_calls_type_conversion(step3p5_tool_parser):
 </function>
 </tool_call>"""
 
+    parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = step3p5_tool_parser.extract_tool_calls(
-        model_output, request=request
-    )
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
 
     args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
     assert args["int_param"] == 42
@@ -864,7 +863,7 @@ def test_extract_tool_calls_streaming_incremental(
     assert parsed_args["state"] == "TX"
 
 
-def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser):
+def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tokenizer):
     """Test parameter type conversion based on tool schema"""
     tools = [
         ChatCompletionToolsParam(
@@ -893,10 +892,9 @@ def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser):
 </function>
 </tool_call>"""
 
+    parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = step3p5_tool_parser.extract_tool_calls(
-        model_output, request=request
-    )
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
 
     args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
     assert args["obj_param"] == {"key": "value"}
@@ -1433,3 +1431,140 @@ def test_extract_tool_calls_non_streaming_multiple_tool_calls_no_content_between
     assert "<function=calculate_area>" not in extracted_tool_calls.content, (
         "Second tool call should not be in content"
     )
+
+
+def _accumulate_tool_states(delta_messages):
+    """Accumulate tool call state from a stream of DeltaMessage objects."""
+    content = ""
+    tool_states = {}
+    for delta_message in delta_messages:
+        if delta_message.content:
+            content += delta_message.content
+        if delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+                if idx not in tool_states:
+                    tool_states[idx] = {
+                        "id": None,
+                        "name": None,
+                        "arguments": "",
+                        "type": None,
+                    }
+                if tool_call.id:
+                    tool_states[idx]["id"] = tool_call.id
+                if tool_call.type:
+                    tool_states[idx]["type"] = tool_call.type
+                if tool_call.function:
+                    if tool_call.function.name:
+                        tool_states[idx]["name"] = tool_call.function.name
+                    if tool_call.function.arguments is not None:
+                        tool_states[idx]["arguments"] += tool_call.function.arguments
+    return content, tool_states
+
+
+def test_streaming_mtp_variable_chunks(
+    step3p5_tool_parser, step3p5_tokenizer, sample_tools
+):
+    """Regression: MTP variable-size chunks spanning param boundaries (PR #33690)."""
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+
+    delta_text_chunks = [
+        "<tool_call>\n<function=get_current_weather>\n<parameter=city>\n",
+        "Dallas\n</parameter>\n<parameter=state>\nTX",
+        "\n</parameter>\n<parameter=unit>\nfahrenheit\n</parameter>",
+        "\n</function>\n</tool_call>",
+    ]
+
+    _, tool_states = _accumulate_tool_states(
+        stream_delta_message_generator_from_chunks(
+            step3p5_tool_parser, step3p5_tokenizer, delta_text_chunks, request
+        )
+    )
+
+    assert len(tool_states) == 1
+
+    state = tool_states[0]
+    assert state["id"] is not None
+    assert state["type"] == "function"
+    assert state["name"] == "get_current_weather"
+
+    args = json.loads(state["arguments"])
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
+
+
+def test_streaming_multi_token_per_step(
+    step3p5_tool_parser, step3p5_tokenizer, sample_tools
+):
+    """Regression: MTP large chunks spanning multiple tool calls (PR #33690)."""
+    model_output = """<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>
+<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Orlando
+</parameter>
+<parameter=state>
+FL
+</parameter>
+<parameter=unit>
+celsius
+</parameter>
+</function>
+</tool_call>"""
+
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+
+    # MTP-style large chunks
+    mtp_chunks = [
+        (
+            "<tool_call>\n<function=get_current_weather>\n"
+            "<parameter=city>\nDallas\n</parameter>\n"
+            "<parameter=state>\nTX"
+        ),
+        (
+            "\n</parameter>\n<parameter=unit>\nfahrenheit\n</parameter>\n"
+            "</function>\n</tool_call>\n"
+            "<tool_call>\n<function=get_current_weather>\n"
+            "<parameter=city>\nOrlando\n</parameter>\n"
+            "<parameter=state>\nFL\n</parameter>\n"
+            "<parameter=unit>\ncelsius\n</parameter>\n"
+            "</function>\n</tool_call>"
+        ),
+    ]
+
+    _, mtp_tool_states = _accumulate_tool_states(
+        stream_delta_message_generator_from_chunks(
+            step3p5_tool_parser, step3p5_tokenizer, mtp_chunks, request
+        )
+    )
+
+    # Token-by-token streaming (reference)
+    step3p5_tool_parser_ref = Step3p5ToolParser(step3p5_tokenizer)
+    _, ref_tool_states = _accumulate_tool_states(
+        stream_delta_message_generator(
+            step3p5_tool_parser_ref, step3p5_tokenizer, model_output, request
+        )
+    )
+
+    assert len(mtp_tool_states) == 2
+    assert len(ref_tool_states) == 2
+
+    # MTP results must match reference
+    for idx in range(2):
+        assert mtp_tool_states[idx]["name"] == ref_tool_states[idx]["name"]
+        mtp_args = json.loads(mtp_tool_states[idx]["arguments"])
+        ref_args = json.loads(ref_tool_states[idx]["arguments"])
+        assert mtp_args == ref_args
diff --git a/tests/tool_parsers/test_utils.py b/tests/tool_parsers/test_utils.py
new file mode 100644
index 000000000000..592ef580a2bc
--- /dev/null
+++ b/tests/tool_parsers/test_utils.py
@@ -0,0 +1,214 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.tool_parsers.utils import (
+    coerce_to_schema_type,
+    extract_types_from_schema,
+)
+
+
+class TestCoerceToSchemaType:
+    class TestNullHandling:
+        def test_null_converted_when_type_is_null(self):
+            assert coerce_to_schema_type("null", "null") is None
+
+        def test_null_converted_when_null_in_type_list(self):
+            assert coerce_to_schema_type("null", ["string", "null"]) is None
+
+        def test_null_preserved_as_string_when_type_is_string(self):
+            assert coerce_to_schema_type("null", "string") == "null"
+
+        def test_null_case_insensitive(self):
+            assert coerce_to_schema_type("NULL", "null") is None
+            assert coerce_to_schema_type("Null", "null") is None
+
+        def test_none_string_never_converted(self):
+            assert coerce_to_schema_type("none", "null") == "none"
+            assert coerce_to_schema_type("none", "string") == "none"
+            assert coerce_to_schema_type("none", ["string", "null"]) == "none"
+
+        def test_nil_string_never_converted(self):
+            assert coerce_to_schema_type("nil", "string") == "nil"
+            assert coerce_to_schema_type("nil", ["string", "null"]) == "nil"
+
+        def test_non_null_value_with_null_type(self):
+            assert coerce_to_schema_type("hello", ["null", "string"]) == "hello"
+
+    class TestStringType:
+        def test_string_type(self):
+            assert coerce_to_schema_type("hello", "string") == "hello"
+
+        def test_str_alias(self):
+            assert coerce_to_schema_type("hello", "str") == "hello"
+
+        def test_text_alias(self):
+            assert coerce_to_schema_type("hello", "text") == "hello"
+
+        def test_varchar_alias(self):
+            assert coerce_to_schema_type("hello", "varchar") == "hello"
+
+        def test_char_alias(self):
+            assert coerce_to_schema_type("x", "char") == "x"
+
+        def test_enum_alias(self):
+            assert coerce_to_schema_type("option_a", "enum") == "option_a"
+
+    class TestIntegerType:
+        def test_integer_type(self):
+            assert coerce_to_schema_type("42", "integer") == 42
+
+        def test_int_alias(self):
+            assert coerce_to_schema_type("42", "int") == 42
+
+        def test_negative_integer(self):
+            assert coerce_to_schema_type("-7", "integer") == -7
+
+        def test_invalid_integer_fallback(self):
+            assert coerce_to_schema_type("not_a_number", "integer") == "not_a_number"
+
+        def test_uint32_alias(self):
+            assert coerce_to_schema_type("5", "uint32") == 5
+
+        def test_long_alias(self):
+            assert coerce_to_schema_type("100", "long") == 100
+
+    class TestNumberType:
+        def test_number_type(self):
+            assert coerce_to_schema_type("3.14", "number") == 3.14
+
+        def test_float_alias(self):
+            assert coerce_to_schema_type("2.5", "float") == 2.5
+
+        def test_double_alias(self):
+            assert coerce_to_schema_type("2.5", "double") == 2.5
+
+        def test_whole_float_returns_int(self):
+            assert coerce_to_schema_type("5.0", "number") == 5
+            assert isinstance(coerce_to_schema_type("5.0", "number"), int)
+
+        def test_invalid_number_fallback(self):
+            assert coerce_to_schema_type("abc", "number") == "abc"
+
+    class TestBooleanType:
+        def test_true(self):
+            assert coerce_to_schema_type("true", "boolean") is True
+
+        def test_false(self):
+            assert coerce_to_schema_type("false", "boolean") is False
+
+        def test_bool_alias(self):
+            assert coerce_to_schema_type("true", "bool") is True
+
+        def test_one_is_true(self):
+            assert coerce_to_schema_type("1", "boolean") is True
+
+        def test_zero_is_false(self):
+            assert coerce_to_schema_type("0", "boolean") is False
+
+        def test_invalid_boolean_fallback(self):
+            assert coerce_to_schema_type("maybe", "boolean") == "maybe"
+
+    class TestObjectArrayType:
+        def test_object_type(self):
+            assert coerce_to_schema_type('{"a": 1}', "object") == {"a": 1}
+
+        def test_array_type(self):
+            assert coerce_to_schema_type("[1, 2, 3]", "array") == [1, 2, 3]
+
+        def test_invalid_json_fallback(self):
+            assert coerce_to_schema_type("not json", "object") == "not json"
+
+        def test_dict_alias(self):
+            assert coerce_to_schema_type('{"k": "v"}', "dict") == {"k": "v"}
+
+        def test_list_alias(self):
+            assert coerce_to_schema_type("[1]", "list") == [1]
+
+    class TestMultiType:
+        def test_null_takes_priority_over_string(self):
+            assert coerce_to_schema_type("null", ["string", "null"]) is None
+
+        def test_integer_tried_before_string(self):
+            assert coerce_to_schema_type("42", ["integer", "string"]) == 42
+
+        def test_falls_through_to_string(self):
+            assert coerce_to_schema_type("hello", ["integer", "string"]) == "hello"
+
+    class TestFallback:
+        def test_unknown_type_returns_string(self):
+            assert coerce_to_schema_type("hello", "unknown_type") == "hello"
+
+        def test_json_fallback_for_unknown_type(self):
+            assert coerce_to_schema_type('{"a": 1}', "unknown_type") == {"a": 1}
+
+        @pytest.mark.parametrize("schema_type", ["string", "str", "text"])
+        def test_string_types_preserve_value(self, schema_type):
+            assert coerce_to_schema_type("anything", schema_type) == "anything"
+
+        def test_unrecognized_type_falls_back_to_json(self):
+            assert coerce_to_schema_type("42", "interval") == 42
+
+
+class TestExtractTypesFromSchema:
+    def test_direct_type_string(self):
+        assert extract_types_from_schema({"type": "string"}) == ["string"]
+
+    def test_direct_type_integer(self):
+        assert extract_types_from_schema({"type": "integer"}) == ["integer"]
+
+    def test_type_array(self):
+        result = set(extract_types_from_schema({"type": ["string", "null"]}))
+        assert result == {"string", "null"}
+
+    def test_anyof(self):
+        schema = {"anyOf": [{"type": "object"}, {"type": "null"}]}
+        result = set(extract_types_from_schema(schema))
+        assert result == {"object", "null"}
+
+    def test_oneof(self):
+        schema = {"oneOf": [{"type": "integer"}, {"type": "string"}]}
+        result = set(extract_types_from_schema(schema))
+        assert result == {"integer", "string"}
+
+    def test_allof(self):
+        schema = {"allOf": [{"type": "object"}]}
+        assert extract_types_from_schema(schema) == ["object"]
+
+    def test_enum_infers_types(self):
+        schema = {"enum": [1, "a", None]}
+        result = set(extract_types_from_schema(schema))
+        assert result == {"integer", "string", "null"}
+
+    def test_enum_with_bool(self):
+        schema = {"enum": [True, False]}
+        assert extract_types_from_schema(schema) == ["boolean"]
+
+    def test_enum_with_float(self):
+        schema = {"enum": [1.5, 2.5]}
+        assert extract_types_from_schema(schema) == ["number"]
+
+    def test_enum_with_list_and_dict(self):
+        schema = {"enum": [[1, 2], {"a": 1}]}
+        result = set(extract_types_from_schema(schema))
+        assert result == {"array", "object"}
+
+    def test_none_schema_defaults_to_string(self):
+        assert extract_types_from_schema(None) == ["string"]
+
+    def test_non_dict_schema_defaults_to_string(self):
+        assert extract_types_from_schema("string") == ["string"]
+
+    def test_empty_dict_defaults_to_string(self):
+        assert extract_types_from_schema({}) == ["string"]
+
+    def test_nested_anyof(self):
+        schema = {
+            "anyOf": [
+                {"anyOf": [{"type": "integer"}, {"type": "null"}]},
+                {"type": "string"},
+            ]
+        }
+        result = set(extract_types_from_schema(schema))
+        assert result == {"integer", "null", "string"}
diff --git a/tests/tool_parsers/test_xlam_tool_parser.py b/tests/tool_parsers/test_xlam_tool_parser.py
index a5cab218f72b..3853d2039a72 100644
--- a/tests/tool_parsers/test_xlam_tool_parser.py
+++ b/tests/tool_parsers/test_xlam_tool_parser.py
@@ -484,7 +484,7 @@ def test_extract_tool_calls_streaming_incremental(
     expected_content,
 ):
     """Verify the XLAM Parser streaming behavior by verifying each chunk is as expected."""  # noqa: E501
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=[])
+    request = ChatCompletionRequest(model=MODEL, messages=[])
 
     chunks = []
     for delta_message in stream_delta_message_generator(
diff --git a/tests/tool_use/mistral/test_mistral_tool_calls.py b/tests/tool_use/mistral/test_mistral_tool_calls.py
index 3c4a543abe41..6dcfd43a9497 100644
--- a/tests/tool_use/mistral/test_mistral_tool_calls.py
+++ b/tests/tool_use/mistral/test_mistral_tool_calls.py
@@ -1,25 +1,198 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import json
+from dataclasses import dataclass, field
+
 import openai
 import pytest
 
-from tests.tool_use.utils import MESSAGES_ASKING_FOR_TOOLS, WEATHER_TOOL
+from tests.tool_use.utils import (
+    MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
+    MESSAGES_ASKING_FOR_TOOLS,
+    MESSAGES_WITH_TOOL_RESPONSE,
+    MESSAGES_WITHOUT_TOOLS,
+    SEARCH_TOOL,
+    SEED,
+    WEATHER_TOOL,
+    ensure_system_prompt,
+)
+
+from .utils import ServerConfig
+
+
+def _requires_tool_parser(server_config: ServerConfig) -> None:
+    r"""Skip test if server was not started with --tool-call-parser."""
+    if "--tool-call-parser" not in server_config.get("arguments", []):
+        pytest.skip(
+            f"Skipping: {server_config['model']} not configured with --tool-call-parser"
+        )
+
+
+def _is_pre_v11(server_config: ServerConfig) -> bool:
+    r"""Pre-v11 Mistral models lack grammar-based tool call enforcement."""
+    return "7B" in server_config.get("model", "")
+
+
+@dataclass
+class StreamedToolCallResult:
+    r"""Accumulated result from streaming a single tool call."""
+
+    function_name: str | None = None
+    function_args_str: str = ""
+    tool_call_id: str | None = None
+    role_name: str | None = None
+    finish_reason_count: int = 0
+    finish_reason: str | None = None
+
+
+async def _collect_streamed_tool_call(
+    stream: openai.AsyncStream,
+    *,
+    expected_finish_reason: str = "tool_calls",
+) -> StreamedToolCallResult:
+    result = StreamedToolCallResult()
+
+    async for chunk in stream:
+        if chunk.choices[0].finish_reason:
+            result.finish_reason_count += 1
+            result.finish_reason = chunk.choices[0].finish_reason
+            assert chunk.choices[0].finish_reason == expected_finish_reason
+
+        if chunk.choices[0].delta.role:
+            assert not result.role_name or result.role_name == "assistant"
+            result.role_name = "assistant"
+
+        streamed_tool_calls = chunk.choices[0].delta.tool_calls
+        if streamed_tool_calls and len(streamed_tool_calls) > 0:
+            assert len(streamed_tool_calls) == 1
+            tool_call = streamed_tool_calls[0]
+
+            if tool_call.id:
+                assert not result.tool_call_id
+                result.tool_call_id = tool_call.id
+
+            if tool_call.function:
+                if tool_call.function.name:
+                    assert result.function_name is None
+                    result.function_name = tool_call.function.name
+                if tool_call.function.arguments:
+                    result.function_args_str += tool_call.function.arguments
+
+    return result
+
+
+@dataclass
+class StreamedContentResult:
+    r"""Accumulated result from streaming a content-only response."""
+
+    chunks: list[str] = field(default_factory=list)
+    finish_reason_count: int = 0
+    finish_reason: str | None = None
+    role_sent: bool = False
+
+
+async def _collect_streamed_content(
+    stream: openai.AsyncStream,
+    *,
+    expected_finish_reason: str | None = None,
+    no_tool_calls: bool = True,
+) -> StreamedContentResult:
+    r"""Consume a streaming response and collect text content."""
+    result = StreamedContentResult()
+
+    async for chunk in stream:
+        delta = chunk.choices[0].delta
+
+        if delta.role:
+            assert not result.role_sent
+            assert delta.role == "assistant"
+            result.role_sent = True
+
+        if delta.content:
+            result.chunks.append(delta.content)
+
+        if chunk.choices[0].finish_reason is not None:
+            result.finish_reason_count += 1
+            result.finish_reason = chunk.choices[0].finish_reason
+            if expected_finish_reason is not None:
+                assert result.finish_reason == expected_finish_reason
+
+        if no_tool_calls:
+            assert not delta.tool_calls or len(delta.tool_calls) == 0
+
+    return result
+
+
+@dataclass
+class StreamedParallelToolCallResult:
+    r"""Accumulated result from streaming parallel tool calls."""
+
+    function_names: list[str] = field(default_factory=list)
+    function_args_strs: list[str] = field(default_factory=list)
+    tool_call_ids: list[str] = field(default_factory=list)
+    role_name: str | None = None
+    finish_reason_count: int = 0
+
+
+async def _collect_streamed_parallel_tool_calls(
+    stream: openai.AsyncStream,
+) -> StreamedParallelToolCallResult:
+    r"""Consume a streaming response and collect parallel tool calls."""
+    result = StreamedParallelToolCallResult()
+    tool_call_idx: int = -1
+
+    async for chunk in stream:
+        if chunk.choices[0].finish_reason:
+            result.finish_reason_count += 1
+            assert chunk.choices[0].finish_reason == "tool_calls"
+
+        if chunk.choices[0].delta.role:
+            assert not result.role_name or result.role_name == "assistant"
+            result.role_name = "assistant"
+
+        streamed_tool_calls = chunk.choices[0].delta.tool_calls
+        if streamed_tool_calls and len(streamed_tool_calls) > 0:
+            assert len(streamed_tool_calls) == 1
+            tool_call = streamed_tool_calls[0]
+
+            if tool_call.index != tool_call_idx:
+                tool_call_idx = tool_call.index
+                result.function_args_strs.append("")
+                result.tool_call_ids.append("")
+
+            if tool_call.id:
+                result.tool_call_ids[tool_call.index] = tool_call.id
+
+            if tool_call.function:
+                if tool_call.function.name:
+                    result.function_names.append(tool_call.function.name)
+                if tool_call.function.arguments:
+                    result.function_args_strs[tool_call.index] += (
+                        tool_call.function.arguments
+                    )
+
+    return result
 
 
 # test: a tool_choice with mistral-tokenizer results in an ID of length 9
 @pytest.mark.asyncio
-async def test_tool_call_with_tool_choice(client: openai.AsyncOpenAI):
+async def test_tool_call_with_tool_choice(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+) -> None:
+    _requires_tool_parser(server_config)
+
     models = await client.models.list()
     model_name: str = models.data[0].id
     chat_completion = await client.chat.completions.create(
-        messages=MESSAGES_ASKING_FOR_TOOLS,
+        messages=ensure_system_prompt(MESSAGES_ASKING_FOR_TOOLS, server_config),
         temperature=0,
         max_completion_tokens=100,
         model=model_name,
         tools=[WEATHER_TOOL],
         tool_choice=WEATHER_TOOL,
         logprobs=False,
+        seed=SEED,
     )
 
     choice = chat_completion.choices[0]
@@ -28,3 +201,307 @@ async def test_tool_call_with_tool_choice(client: openai.AsyncOpenAI):
     assert choice.message.role == "assistant"
     assert choice.message.tool_calls is None or len(choice.message.tool_calls) == 1
     assert len(choice.message.tool_calls[0].id) == 9  # length of 9 for mistral
+
+
+_NOT_SET = object()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "tools, tool_choice, streaming_id_len_pre_v11",
+    [
+        pytest.param(
+            [WEATHER_TOOL, SEARCH_TOOL],
+            _NOT_SET,
+            9,
+            id="auto",
+        ),
+        pytest.param(
+            [WEATHER_TOOL],
+            "required",
+            30,
+            id="required",
+        ),
+    ],
+)
+async def test_tool_call_auto_or_required(
+    client: openai.AsyncOpenAI,
+    server_config: ServerConfig,
+    tools: list,
+    tool_choice: object,
+    streaming_id_len_pre_v11: int,
+) -> None:
+    _requires_tool_parser(server_config)
+
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    create_kwargs: dict = {
+        "messages": ensure_system_prompt(MESSAGES_ASKING_FOR_TOOLS, server_config),
+        "temperature": 0,
+        "max_completion_tokens": 100,
+        "model": model_name,
+        "tools": tools,
+        "logprobs": False,
+        "seed": SEED,
+    }
+    if tool_choice is not _NOT_SET:
+        create_kwargs["tool_choice"] = tool_choice
+
+    # --- non-streaming ---
+    chat_completion = await client.chat.completions.create(**create_kwargs)
+
+    choice = chat_completion.choices[0]
+    tool_calls = choice.message.tool_calls
+
+    assert choice.finish_reason == "tool_calls"
+    assert tool_calls is not None and len(tool_calls) >= 1
+    assert tool_calls[0].function.name == "get_current_weather"
+    parsed_arguments = json.loads(tool_calls[0].function.arguments)
+    assert "city" in parsed_arguments
+    assert len(tool_calls[0].id) == 9
+
+    # --- streaming ---
+    stream = await client.chat.completions.create(**create_kwargs, stream=True)
+
+    result = await _collect_streamed_tool_call(stream)
+
+    assert result.finish_reason_count == 1
+    assert result.role_name == "assistant"
+    assert result.function_name == "get_current_weather"
+    streamed_args = json.loads(result.function_args_str)
+    assert isinstance(result.tool_call_id, str)
+    if _is_pre_v11(server_config):
+        assert len(result.tool_call_id) == streaming_id_len_pre_v11
+    else:
+        assert len(result.tool_call_id) == 9
+    assert parsed_arguments == streamed_args
+
+
+@pytest.mark.asyncio
+async def test_tool_call_none_with_tools(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+) -> None:
+    _requires_tool_parser(server_config)
+
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    # --- non-streaming ---
+    chat_completion = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_ASKING_FOR_TOOLS, server_config),
+        temperature=0,
+        max_completion_tokens=100,
+        model=model_name,
+        tools=[WEATHER_TOOL],
+        tool_choice="none",
+        logprobs=False,
+        seed=SEED,
+    )
+
+    choice = chat_completion.choices[0]
+
+    assert choice.finish_reason != "tool_calls"
+    assert choice.message.tool_calls is None or len(choice.message.tool_calls) == 0
+    assert choice.message.content is not None
+    # Without grammar enforcement, pre-v11 models may still emit [TOOL_CALLS]
+    if not _is_pre_v11(server_config):
+        assert "[TOOL_CALLS]" not in choice.message.content
+
+    non_streaming_content = choice.message.content
+
+    # --- streaming ---
+    stream = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_ASKING_FOR_TOOLS, server_config),
+        temperature=0,
+        max_completion_tokens=100,
+        model=model_name,
+        tools=[WEATHER_TOOL],
+        tool_choice="none",
+        logprobs=False,
+        seed=SEED,
+        stream=True,
+    )
+
+    # Pre-v11 models lack grammar enforcement, so the model may still
+    # emit tool calls even with tool_choice="none".
+    pre_v11 = _is_pre_v11(server_config)
+    result = await _collect_streamed_content(stream, no_tool_calls=not pre_v11)
+
+    assert result.finish_reason_count == 1
+    if not pre_v11:
+        assert result.finish_reason != "tool_calls"
+    streamed_content = "".join(result.chunks)
+    if not pre_v11:
+        assert "[TOOL_CALLS]" not in streamed_content
+        assert streamed_content == non_streaming_content
+
+
+@pytest.mark.asyncio
+async def test_chat_without_tools(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+) -> None:
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    # --- non-streaming ---
+    chat_completion = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
+        temperature=0,
+        max_completion_tokens=150,
+        model=model_name,
+        logprobs=False,
+        seed=SEED,
+    )
+
+    choice = chat_completion.choices[0]
+    output_text = choice.message.content
+
+    assert output_text is not None and len(output_text) > 0
+    assert choice.finish_reason != "tool_calls"
+    assert choice.message.tool_calls is None or len(choice.message.tool_calls) == 0
+
+    # --- streaming ---
+    stream = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
+        temperature=0,
+        max_completion_tokens=150,
+        model=model_name,
+        logprobs=False,
+        seed=SEED,
+        stream=True,
+    )
+
+    result = await _collect_streamed_content(
+        stream, expected_finish_reason=choice.finish_reason
+    )
+
+    assert result.role_sent
+    assert result.finish_reason_count == 1
+    assert len(result.chunks)
+    assert "".join(result.chunks) == output_text
+
+
+@pytest.mark.asyncio
+async def test_tool_call_with_results(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+) -> None:
+    _requires_tool_parser(server_config)
+
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    # --- non-streaming ---
+    chat_completion = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_WITH_TOOL_RESPONSE, server_config),
+        temperature=0,
+        max_completion_tokens=100,
+        model=model_name,
+        tools=[WEATHER_TOOL, SEARCH_TOOL],
+        logprobs=False,
+        seed=SEED,
+    )
+
+    choice = chat_completion.choices[0]
+
+    assert choice.finish_reason != "tool_calls"
+    assert choice.message.tool_calls is None or len(choice.message.tool_calls) == 0
+    assert choice.message.content is not None
+    assert "98" in choice.message.content
+
+    # --- streaming ---
+    stream = await client.chat.completions.create(
+        messages=ensure_system_prompt(MESSAGES_WITH_TOOL_RESPONSE, server_config),
+        temperature=0,
+        max_completion_tokens=100,
+        model=model_name,
+        tools=[WEATHER_TOOL, SEARCH_TOOL],
+        logprobs=False,
+        seed=SEED,
+        stream=True,
+    )
+
+    result = await _collect_streamed_content(
+        stream, expected_finish_reason=choice.finish_reason
+    )
+
+    assert result.role_sent
+    assert result.finish_reason_count == 1
+    assert len(result.chunks)
+    assert "".join(result.chunks) == choice.message.content
+
+
+def _requires_parallel(server_config: ServerConfig) -> None:
+    r"""Skip test if the model does not support parallel tool calls."""
+    if not server_config.get("supports_parallel"):
+        pytest.skip(
+            f"Skipping: {server_config['model']} does not support parallel tool calls"
+        )
+
+
+@pytest.mark.asyncio
+async def test_tool_call_parallel(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+) -> None:
+    _requires_tool_parser(server_config)
+    _requires_parallel(server_config)
+
+    models = await client.models.list()
+    model_name: str = models.data[0].id
+
+    # --- non-streaming ---
+    chat_completion = await client.chat.completions.create(
+        messages=ensure_system_prompt(
+            MESSAGES_ASKING_FOR_PARALLEL_TOOLS, server_config
+        ),
+        temperature=0,
+        max_completion_tokens=200,
+        model=model_name,
+        tools=[WEATHER_TOOL],
+        logprobs=False,
+        seed=SEED,
+    )
+
+    choice = chat_completion.choices[0]
+    tool_calls = choice.message.tool_calls
+
+    assert choice.finish_reason == "tool_calls"
+    assert tool_calls is not None and len(tool_calls) >= 2
+    for tc in tool_calls:
+        assert tc.type == "function"
+        assert tc.function.name == "get_current_weather"
+        assert isinstance(tc.function.arguments, str)
+        parsed = json.loads(tc.function.arguments)
+        assert "city" in parsed
+        assert len(tc.id) == 9
+
+    non_streaming_tool_calls = tool_calls
+
+    # --- streaming ---
+    stream = await client.chat.completions.create(
+        messages=ensure_system_prompt(
+            MESSAGES_ASKING_FOR_PARALLEL_TOOLS, server_config
+        ),
+        temperature=0,
+        max_completion_tokens=200,
+        model=model_name,
+        tools=[WEATHER_TOOL],
+        logprobs=False,
+        seed=SEED,
+        stream=True,
+    )
+
+    result = await _collect_streamed_parallel_tool_calls(stream)
+
+    assert result.finish_reason_count == 1
+    assert result.role_name == "assistant"
+    assert len(result.function_names) >= 2
+    assert all(name == "get_current_weather" for name in result.function_names)
+    assert len(result.tool_call_ids) >= 2
+    assert all(isinstance(tid, str) and len(tid) == 9 for tid in result.tool_call_ids)
+
+    for args_str in result.function_args_strs:
+        streamed_args = json.loads(args_str)
+        assert "city" in streamed_args
+
+    assert len(result.function_names) == len(non_streaming_tool_calls)
diff --git a/tests/tool_use/mistral/utils.py b/tests/tool_use/mistral/utils.py
index 4d772ba63793..01a2aaee6d2e 100644
--- a/tests/tool_use/mistral/utils.py
+++ b/tests/tool_use/mistral/utils.py
@@ -2,16 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 
-from typing_extensions import TypedDict
-
-
-class ServerConfig(TypedDict, total=False):
-    model: str
-    arguments: list[str]
-    system_prompt: str | None
-    supports_parallel: bool | None
-    supports_rocm: bool | None
-
+from tests.tool_use.utils import ServerConfig
 
 ARGS: list[str] = ["--max-model-len", "1024"]
 
@@ -21,6 +12,11 @@ class ServerConfig(TypedDict, total=False):
         "arguments": [
             "--tokenizer-mode",
             "mistral",
+            "--tool-call-parser",
+            "mistral",
+            "--enable-auto-tool-choice",
+            "--enforce-eager",
+            "--no-enable-prefix-caching",
             '--ignore-patterns="consolidated.safetensors"',
         ],
         "system_prompt": "You are a helpful assistant with access to tools. If a tool"
@@ -29,4 +25,22 @@ class ServerConfig(TypedDict, total=False):
         "without calling a tool. DO NOT CALL A TOOL THAT IS IRRELEVANT "
         "to the user's question - just respond to it normally.",
     },
+    "ministral-3b": {
+        "model": "mistralai/Ministral-3-3B-Instruct-2512",
+        "arguments": [
+            "--tokenizer-mode",
+            "mistral",
+            "--tool-call-parser",
+            "mistral",
+            "--enable-auto-tool-choice",
+            "--enforce-eager",
+            "--no-enable-prefix-caching",
+        ],
+        "system_prompt": "You are a helpful assistant with access to tools. If a tool"
+        " that you have would be helpful to answer a user query, "
+        "call the tool. Otherwise, answer the user's query directly "
+        "without calling a tool. DO NOT CALL A TOOL THAT IS IRRELEVANT "
+        "to the user's question - just respond to it normally.",
+        "supports_parallel": True,
+    },
 }
diff --git a/tests/tool_use/test_chat_completion_request_validations.py b/tests/tool_use/test_chat_completion_request_validations.py
index 69846f9adb12..d832feda7f5f 100644
--- a/tests/tool_use/test_chat_completion_request_validations.py
+++ b/tests/tool_use/test_chat_completion_request_validations.py
@@ -26,15 +26,15 @@ def test_chat_completion_request_with_no_tools():
     )
     assert request.tool_choice == "none"
 
-    # tools key present but empty
-    request = ChatCompletionRequest.model_validate(
-        {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "model": "facebook/opt-125m",
-            "tools": [],
-        }
-    )
-    assert request.tool_choice == "none"
+    # tools key present but empty -- should be rejected
+    with pytest.raises(ValueError, match="must not be an empty array"):
+        ChatCompletionRequest.model_validate(
+            {
+                "messages": [{"role": "user", "content": "Hello"}],
+                "model": "facebook/opt-125m",
+                "tools": [],
+            }
+        )
 
 
 @pytest.mark.parametrize("tool_choice", ["auto", "required"])
@@ -61,3 +61,58 @@ def test_chat_completion_request_with_tool_choice_but_no_tools(tool_choice):
                 "tools": None,
             }
         )
+
+
+def test_reasoning_content_normalized_to_reasoning():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "messages": [
+                {"role": "user", "content": "What is 2+2?"},
+                {
+                    "role": "assistant",
+                    "content": "4",
+                    "reasoning_content": "2+2 equals 4",
+                },
+                {"role": "user", "content": "Are you sure?"},
+            ],
+            "model": "facebook/opt-125m",
+        }
+    )
+    assistant_msg = request.messages[1]
+    assert assistant_msg.get("reasoning") == "2+2 equals 4"
+    assert "reasoning_content" not in assistant_msg
+
+
+def test_reasoning_takes_precedence_over_reasoning_content():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "messages": [
+                {"role": "user", "content": "What is 2+2?"},
+                {
+                    "role": "assistant",
+                    "content": "4",
+                    "reasoning": "from reasoning field",
+                    "reasoning_content": "from reasoning_content field",
+                },
+            ],
+            "model": "facebook/opt-125m",
+        }
+    )
+    assistant_msg = request.messages[1]
+    assert assistant_msg.get("reasoning") == "from reasoning field"
+    assert "reasoning_content" not in assistant_msg
+
+
+def test_no_reasoning_fields_unchanged():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "messages": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+            ],
+            "model": "facebook/opt-125m",
+        }
+    )
+    assistant_msg = request.messages[1]
+    assert assistant_msg.get("reasoning") is None
+    assert "reasoning_content" not in assistant_msg
diff --git a/tests/tool_use/test_gemma4_responses_adjust_request.py b/tests/tool_use/test_gemma4_responses_adjust_request.py
new file mode 100644
index 000000000000..e08896ee3237
--- /dev/null
+++ b/tests/tool_use/test_gemma4_responses_adjust_request.py
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Regression tests for Responses API tool-calling request adjustment.
+
+Covers two bugs on the ``/v1/responses`` path that broke streaming tool
+calling for parsers relying on special-token delimiters (Gemma4):
+
+1. :class:`Gemma4ToolParser.adjust_request` used an
+   ``isinstance(request, ChatCompletionRequest)`` guard, so a
+   :class:`ResponsesRequest` with tools never had
+   ``skip_special_tokens`` flipped to ``False``. The default (``True``)
+   stripped ``<|tool_call>`` / ``<tool_call|>`` delimiters, causing
+   :meth:`Gemma4ToolParser.extract_tool_calls_streaming` to fall through
+   to the content branch and leak the raw ``call:fn{...}`` body via
+   ``response.output_text.delta``.
+
+2. :meth:`ToolParser.adjust_request` built
+   :class:`ResponseTextConfig` in two steps (bare constructor then
+   ``.format = ...``). Under Pydantic v2 the later assignment is not
+   tracked in ``__fields_set__``, which can drop the nested config from
+   ``model_dump``. It also passed a ``description`` kwarg carrying the
+   wrong-purpose string ``"Response format for tool calling"``.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from openai.types.responses.tool_param import FunctionToolParam
+
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.gemma4_tool_parser import Gemma4ToolParser
+
+
+def _get_weather_tool() -> FunctionToolParam:
+    return FunctionToolParam(
+        type="function",
+        name="get_weather",
+        description="Get current weather for a city",
+        parameters={
+            "type": "object",
+            "properties": {"city": {"type": "string"}},
+            "required": ["city"],
+        },
+        strict=True,
+    )
+
+
+def _build_responses_request(*, tool_choice: str) -> ResponsesRequest:
+    return ResponsesRequest(
+        model="gemma4-test",
+        input=[{"role": "user", "content": "What is the weather in Hanoi?"}],
+        tools=[_get_weather_tool()],
+        tool_choice=tool_choice,
+        stream=True,
+        max_output_tokens=200,
+    )
+
+
+class _StubTokenizer:
+    """Minimal tokenizer stub to satisfy ``Gemma4ToolParser.__init__``."""
+
+    def get_vocab(self) -> dict[str, int]:
+        return {"<|tool_call>": 256_000, "<tool_call|>": 256_001, '<|"|>': 52}
+
+
+def test_gemma4_adjust_request_sets_skip_special_tokens_on_responses() -> None:
+    """``Gemma4ToolParser.adjust_request`` must flip
+    ``skip_special_tokens=False`` for both ``ChatCompletionRequest`` and
+    ``ResponsesRequest`` so that ``<|tool_call>`` delimiters reach the
+    streaming extractor. The previous
+    ``isinstance(ChatCompletionRequest)`` guard omitted the Responses
+    path, causing raw ``call:fn{...}`` text to leak via
+    ``response.output_text.delta``.
+    """
+    parser = Gemma4ToolParser.__new__(Gemma4ToolParser)
+    parser.model_tokenizer = _StubTokenizer()
+
+    request = _build_responses_request(tool_choice="auto")
+    assert request.skip_special_tokens is True, (
+        "Precondition: ResponsesRequest.skip_special_tokens default is True"
+    )
+
+    Gemma4ToolParser.adjust_request(parser, request)
+
+    assert request.skip_special_tokens is False
+
+
+def test_tool_parser_adjust_request_builds_valid_response_text_config() -> None:
+    """``ToolParser.adjust_request`` must produce a ``ResponseTextConfig``
+    whose dumped form contains the JSON schema under the ``schema`` alias
+    and does not leak the unrelated ``"Response format for tool calling"``
+    description string that the previous two-step construction injected.
+    """
+    parser = ToolParser.__new__(ToolParser)
+    parser.model_tokenizer = None
+
+    request = _build_responses_request(tool_choice="required")
+    ToolParser.adjust_request(parser, request)
+
+    assert request.text is not None
+    assert request.text.format is not None
+    assert request.text.format.type == "json_schema"
+
+    dump: dict[str, Any] = request.text.model_dump(mode="json", by_alias=True)
+    fmt = dump.get("format") or {}
+    assert fmt.get("type") == "json_schema"
+    assert fmt.get("name") == "tool_calling_response"
+    assert fmt.get("strict") is True
+    # Nested config must be present under the alias. Two-step Pydantic v2
+    # construction could drop it from __fields_set__.
+    assert "schema" in fmt and isinstance(fmt["schema"], dict)
+    # The old code passed a wrong-purpose string; valid field should now
+    # either be absent or None (the openai-python default).
+    assert fmt.get("description") in (None, "")
diff --git a/tests/tool_use/test_parallel_tool_calls.py b/tests/tool_use/test_parallel_tool_calls.py
index ed8c80d36678..0f7f68931620 100644
--- a/tests/tool_use/test_parallel_tool_calls.py
+++ b/tests/tool_use/test_parallel_tool_calls.py
@@ -13,9 +13,19 @@
     SEED,
     WEATHER_TOOL,
     ServerConfig,
+    ensure_system_prompt,
 )
 
 
+def apply_parallel_tool_system_prompt(
+    messages,
+    server_config: ServerConfig,
+):
+    if server_config["model"] == "ibm-granite/granite-3.0-8b-instruct":
+        return ensure_system_prompt(messages, server_config)
+    return messages
+
+
 # test: getting the model to generate parallel tool calls (streaming/not)
 # when requested. NOTE that not all models may support this, so some exclusions
 # may be added in the future. e.g. llama 3.1 models are not designed to support
@@ -33,8 +43,11 @@ async def test_parallel_tool_calls(
 
     models = await client.models.list()
     model_name: str = models.data[0].id
+    messages = apply_parallel_tool_system_prompt(
+        MESSAGES_ASKING_FOR_PARALLEL_TOOLS, server_config
+    )
     chat_completion = await client.chat.completions.create(
-        messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         model=model_name,
@@ -73,7 +86,7 @@ async def test_parallel_tool_calls(
     # make the same request, streaming
     stream = await client.chat.completions.create(
         model=model_name,
-        messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         tools=[WEATHER_TOOL, SEARCH_TOOL],
@@ -162,8 +175,11 @@ async def test_parallel_tool_calls_with_results(
 
     models = await client.models.list()
     model_name: str = models.data[0].id
+    messages = apply_parallel_tool_system_prompt(
+        MESSAGES_WITH_PARALLEL_TOOL_RESPONSE, server_config
+    )
     chat_completion = await client.chat.completions.create(
-        messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         model=model_name,
@@ -182,7 +198,7 @@ async def test_parallel_tool_calls_with_results(
     assert "78" in choice.message.content  # Orlando temp in tool response
 
     stream = await client.chat.completions.create(
-        messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         model=model_name,
@@ -220,15 +236,20 @@ async def test_parallel_tool_calls_with_results(
 
 
 @pytest.mark.asyncio
-async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
+async def test_parallel_tool_calls_false(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+):
     """
     Ensure only one tool call is returned when parallel_tool_calls is False.
     """
 
     models = await client.models.list()
     model_name: str = models.data[0].id
+    messages = apply_parallel_tool_system_prompt(
+        MESSAGES_ASKING_FOR_PARALLEL_TOOLS, server_config
+    )
     chat_completion = await client.chat.completions.create(
-        messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         model=model_name,
@@ -248,7 +269,7 @@ async def test_parallel_tool_calls_false(client: openai.AsyncOpenAI):
     # make the same request, streaming
     stream = await client.chat.completions.create(
         model=model_name,
-        messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=200,
         tools=[WEATHER_TOOL, SEARCH_TOOL],
diff --git a/tests/tool_use/test_responses_request_validations.py b/tests/tool_use/test_responses_request_validations.py
new file mode 100644
index 000000000000..63a1828c5009
--- /dev/null
+++ b/tests/tool_use/test_responses_request_validations.py
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from pydantic import ValidationError
+
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+
+SAMPLE_TOOL = {
+    "type": "function",
+    "name": "get_weather",
+    "description": "Get current weather",
+    "parameters": {
+        "type": "object",
+        "properties": {"location": {"type": "string", "description": "City name"}},
+        "required": ["location"],
+    },
+}
+
+NAMED_TOOL_CHOICE = {
+    "type": "function",
+    "name": "get_weather",
+}
+
+
+def test_responses_request_with_no_tools():
+    # tools key is not present — defaults tool_choice to "none"
+    request = ResponsesRequest.model_validate({"input": "Hello", "model": "test-model"})
+    assert request.tool_choice == "none"
+
+    # tools key present but empty
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tools": []}
+    )
+    assert request.tool_choice == "none"
+
+
+def test_responses_request_no_tools_tool_choice_none():
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tool_choice": "none"}
+    )
+    assert request.tool_choice == "none"
+
+
+def test_responses_request_no_tools_tool_choice_auto():
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tool_choice": "auto"}
+    )
+    assert request.tool_choice == "none"
+
+
+@pytest.mark.parametrize("tools", [None, []])
+def test_responses_request_required_without_tools(tools):
+    kwargs = {"input": "Hello", "model": "test-model", "tool_choice": "required"}
+    if tools is not None:
+        kwargs["tools"] = tools
+    with pytest.raises(
+        ValidationError, match="Tool choice 'required' must be specified"
+    ):
+        ResponsesRequest.model_validate(kwargs)
+
+
+def test_responses_request_named_tool_choice_without_tools():
+    with pytest.raises(ValidationError, match="not found in 'tools' parameter"):
+        ResponsesRequest.model_validate(
+            {
+                "input": "Hello",
+                "model": "test-model",
+                "tool_choice": NAMED_TOOL_CHOICE,
+            }
+        )
+
+
+def test_responses_request_with_tools_default_tool_choice():
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tools": [SAMPLE_TOOL]}
+    )
+    assert request.tool_choice == "auto"
+
+
+def test_responses_request_with_tools_tool_choice_none():
+    request = ResponsesRequest.model_validate(
+        {
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [SAMPLE_TOOL],
+            "tool_choice": "none",
+        }
+    )
+    assert request.tool_choice == "none"
+
+
+def test_responses_request_named_tool_choice_matching():
+    request = ResponsesRequest.model_validate(
+        {
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [SAMPLE_TOOL],
+            "tool_choice": NAMED_TOOL_CHOICE,
+        }
+    )
+    assert request.tool_choice.type == "function"
+    assert request.tool_choice.name == "get_weather"
+
+
+def test_responses_request_named_tool_choice_not_matching():
+    with pytest.raises(ValidationError, match="not found in 'tools' parameter"):
+        ResponsesRequest.model_validate(
+            {
+                "input": "Hello",
+                "model": "test-model",
+                "tools": [SAMPLE_TOOL],
+                "tool_choice": {"type": "function", "name": "nonexistent"},
+            }
+        )
+
+
+def test_responses_request_with_tools_tool_choice_auto():
+    request = ResponsesRequest.model_validate(
+        {
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [SAMPLE_TOOL],
+            "tool_choice": "auto",
+        }
+    )
+    assert request.tool_choice == "auto"
+
+
+def test_responses_request_with_tools_tool_choice_required():
+    request = ResponsesRequest.model_validate(
+        {
+            "input": "Hello",
+            "model": "test-model",
+            "tools": [SAMPLE_TOOL],
+            "tool_choice": "required",
+        }
+    )
+    assert request.tool_choice == "required"
+
+
+def test_responses_request_empty_tools_tool_choice_none():
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tools": [], "tool_choice": "none"}
+    )
+    assert request.tool_choice == "none"
+
+
+def test_responses_request_empty_tools_tool_choice_auto():
+    request = ResponsesRequest.model_validate(
+        {"input": "Hello", "model": "test-model", "tools": [], "tool_choice": "auto"}
+    )
+    assert request.tool_choice == "none"
+
+
+@pytest.mark.parametrize(
+    "tool_choice",
+    [
+        {"type": "function"},
+        {"type": "function", "name": ""},
+    ],
+)
+def test_responses_request_named_tool_choice_missing_name(tool_choice):
+    with pytest.raises(ValidationError, match="not found in 'tools' parameter"):
+        ResponsesRequest.model_validate(
+            {
+                "input": "Hello",
+                "model": "test-model",
+                "tools": [SAMPLE_TOOL],
+                "tool_choice": tool_choice,
+            }
+        )
+
+
+def test_responses_request_empty_tools_named_tool_choice():
+    with pytest.raises(ValidationError, match="not found in 'tools' parameter"):
+        ResponsesRequest.model_validate(
+            {
+                "input": "Hello",
+                "model": "test-model",
+                "tools": [],
+                "tool_choice": NAMED_TOOL_CHOICE,
+            }
+        )
diff --git a/tests/tool_use/test_tool_calls.py b/tests/tool_use/test_tool_calls.py
index f719a886c89d..8d21bcd79cc3 100644
--- a/tests/tool_use/test_tool_calls.py
+++ b/tests/tool_use/test_tool_calls.py
@@ -12,17 +12,22 @@
     SEARCH_TOOL,
     SEED,
     WEATHER_TOOL,
+    ServerConfig,
+    ensure_system_prompt,
 )
 
 
 # test: request a chat completion that should return tool calls, so we know they
 # are parsable
 @pytest.mark.asyncio
-async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
+async def test_tool_call_and_choice(
+    client: openai.AsyncOpenAI, server_config: ServerConfig
+):
     models = await client.models.list()
     model_name: str = models.data[0].id
+    messages = ensure_system_prompt(MESSAGES_ASKING_FOR_TOOLS, server_config)
     chat_completion = await client.chat.completions.create(
-        messages=MESSAGES_ASKING_FOR_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=100,
         model=model_name,
@@ -68,7 +73,7 @@ async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
     # make the same request, streaming
     stream = await client.chat.completions.create(
         model=model_name,
-        messages=MESSAGES_ASKING_FOR_TOOLS,
+        messages=messages,
         temperature=0,
         max_completion_tokens=100,
         tools=[WEATHER_TOOL, SEARCH_TOOL],
diff --git a/tests/tool_use/test_tool_choice_required.py b/tests/tool_use/test_tool_choice_required.py
index 01c1360818eb..e99165f3569a 100644
--- a/tests/tool_use/test_tool_choice_required.py
+++ b/tests/tool_use/test_tool_choice_required.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
 from copy import deepcopy
-from unittest.mock import MagicMock
 
 import pytest
 import regex as re
@@ -11,7 +10,7 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionToolsParam,
 )
-from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
+from vllm.tool_parsers.streaming import extract_required_tool_call_streaming
 from vllm.tool_parsers.utils import get_json_schema_from_tools
 
 pytestmark = pytest.mark.cpu_test
@@ -281,8 +280,6 @@ def test_structured_outputs_json_without_parameters(
 @pytest.mark.parametrize("empty_params", [False, True])
 @pytest.mark.parametrize("delta_len", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
 def test_streaming_output_valid(output, empty_params, delta_len):
-    self = MagicMock()
-
     output = deepcopy(output)
     if empty_params:
         output = [{"name": o["name"], "parameters": {}} for o in output]
@@ -295,14 +292,13 @@ def test_streaming_output_valid(output, empty_params, delta_len):
         delta_text = output_json[i : i + delta_len]
         current_text = previous_text + delta_text
 
-        delta_message, function_name_returned = (
-            OpenAIServingChat.extract_tool_call_required_streaming(
-                self,
-                previous_text=previous_text,
-                current_text=current_text,
-                delta_text=delta_text,
-                function_name_returned=function_name_returned,
-            )
+        delta_message, function_name_returned = extract_required_tool_call_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            function_name_returned=function_name_returned,
+            tool_call_idx=None,
+            tool_call_id_type="random",
         )
 
         if delta_message:
@@ -332,8 +328,6 @@ def test_streaming_output_valid(output, empty_params, delta_len):
 
 
 def test_streaming_output_valid_with_trailing_extra_data():
-    self = MagicMock()
-
     output = [{"name": "get_current_weather", "parameters": {"city": "Vienna"}}]
     output_json = json.dumps(output) + "\nDONE"
 
@@ -345,14 +339,13 @@ def test_streaming_output_valid_with_trailing_extra_data():
         delta_text = output_json[i : i + delta_len]
         current_text = previous_text + delta_text
 
-        delta_message, function_name_returned = (
-            OpenAIServingChat.extract_tool_call_required_streaming(
-                self,
-                previous_text=previous_text,
-                current_text=current_text,
-                delta_text=delta_text,
-                function_name_returned=function_name_returned,
-            )
+        delta_message, function_name_returned = extract_required_tool_call_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            function_name_returned=function_name_returned,
+            tool_call_idx=None,
+            tool_call_id_type="random",
         )
 
         if delta_message:
diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
index 5a03f53ec644..963bc5531c7b 100644
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@@ -201,6 +201,10 @@ def ensure_system_prompt(
             "--chat-template",
             str(VLLM_PATH / "examples/tool_chat_template_granite.jinja"),
         ],
+        "system_prompt": "You are a helpful AI assistant with access to tools. "
+        "Use two-letter US state abbreviations in weather tool arguments. "
+        "When a tool is required to answer the user query, respond with "
+        "<|tool_call|> followed by a JSON list of tools used.",
     },
     "granite-3.1-8b": {
         "model": "ibm-granite/granite-3.1-8b-instruct",
diff --git a/tests/tools/test_docker_build_metadata_args.py b/tests/tools/test_docker_build_metadata_args.py
new file mode 100644
index 000000000000..fa2eac558f53
--- /dev/null
+++ b/tests/tools/test_docker_build_metadata_args.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+import shlex
+import subprocess
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+HELPER = REPO_ROOT / ".buildkite" / "scripts" / "docker-build-metadata-args.sh"
+
+
+def run_helper(
+    *args: str,
+    env: dict[str, str] | None = None,
+    path: str | None = None,
+) -> list[str]:
+    helper_env = {"PATH": path or os.environ["PATH"]}
+    if env:
+        helper_env.update(env)
+    result = subprocess.run(
+        ["bash", str(HELPER), *args],
+        check=True,
+        env=helper_env,
+        stdout=subprocess.PIPE,
+        text=True,
+    )
+    return shlex.split(result.stdout)
+
+
+def option_values(args: list[str], option: str) -> list[str]:
+    return [args[i + 1] for i, arg in enumerate(args[:-1]) if arg == option]
+
+
+def build_args(args: list[str]) -> dict[str, str]:
+    values = {}
+    for value in option_values(args, "--build-arg"):
+        key, arg_value = value.split("=", 1)
+        values[key] = arg_value
+    return values
+
+
+def test_release_metadata_args_prefer_pipeline_id() -> None:
+    args = run_helper(
+        "cu130-ubuntu2404",
+        env={
+            "BUILDKITE": "1",
+            "BUILDKITE_COMMIT": "abc123",
+            "BUILDKITE_PIPELINE_ID": "pipe-uuid",
+            "BUILDKITE_PIPELINE_SLUG": "release",
+            "BUILDKITE_BUILD_URL": "https://buildkite.example/vllm/builds/1",
+            "RELEASE_VERSION": "v0.20.0",
+        },
+    )
+
+    assert build_args(args) == {
+        "VLLM_BUILD_COMMIT": "abc123",
+        "VLLM_BUILD_PIPELINE": "pipe-uuid",
+        "VLLM_BUILD_URL": "https://buildkite.example/vllm/builds/1",
+        "VLLM_IMAGE_TAG": "vllm/vllm-openai:v0.20.0-cu130-ubuntu2404",
+    }
+    expected_tag = (
+        "public.ecr.aws/q9t5s3a7/vllm-release-repo:"
+        f"abc123-{os.uname().machine}-cu130-ubuntu2404"
+    )
+    assert option_values(args, "--tag") == [expected_tag]
+
+
+def test_nightly_metadata_args_fall_back_to_pipeline_slug() -> None:
+    args = run_helper(
+        "ubuntu2404",
+        env={
+            "BUILDKITE": "1",
+            "BUILDKITE_COMMIT": "def456",
+            "BUILDKITE_PIPELINE_SLUG": "release",
+            "BUILDKITE_BUILD_URL": "https://buildkite.example/vllm/builds/2",
+            "NIGHTLY": "1",
+        },
+    )
+
+    assert build_args(args) == {
+        "VLLM_BUILD_COMMIT": "def456",
+        "VLLM_BUILD_PIPELINE": "release",
+        "VLLM_BUILD_URL": "https://buildkite.example/vllm/builds/2",
+        "VLLM_IMAGE_TAG": "vllm/vllm-openai:nightly-def456-ubuntu2404",
+    }
+    expected_tag = (
+        "public.ecr.aws/q9t5s3a7/vllm-release-repo:"
+        f"def456-{os.uname().machine}-ubuntu2404"
+    )
+    assert option_values(args, "--tag") == [expected_tag]
+
+
+def test_local_metadata_args_use_local_overrides() -> None:
+    args = run_helper(
+        env={
+            "VLLM_IMAGE_TAG": "local/test:dev",
+            "VLLM_BUILD_COMMIT": "localsha",
+            "VLLM_BUILD_PIPELINE": "local-pipeline",
+            "VLLM_BUILD_URL": "https://buildkite.example/local",
+        },
+    )
+
+    assert build_args(args) == {
+        "VLLM_BUILD_COMMIT": "localsha",
+        "VLLM_BUILD_PIPELINE": "local-pipeline",
+        "VLLM_BUILD_URL": "https://buildkite.example/local",
+        "VLLM_IMAGE_TAG": "local/test:dev",
+    }
+    assert option_values(args, "--tag") == ["local/test:dev"]
+
+
+def test_release_version_lookup_failure_falls_back_to_commit(
+    tmp_path: Path,
+) -> None:
+    fake_bin = tmp_path / "bin"
+    fake_bin.mkdir()
+    buildkite_agent = fake_bin / "buildkite-agent"
+    buildkite_agent.write_text("#!/bin/sh\nexit 1\n")
+    buildkite_agent.chmod(0o755)
+
+    args = run_helper(
+        "cu129",
+        env={
+            "BUILDKITE": "1",
+            "BUILDKITE_COMMIT": "fallback123",
+            "BUILDKITE_PIPELINE_SLUG": "release",
+        },
+        path=f"{fake_bin}:{os.environ['PATH']}",
+    )
+
+    assert build_args(args)["VLLM_IMAGE_TAG"] == ("vllm/vllm-openai:vfallback123-cu129")
+
+
+def test_vllm_openai_image_embeds_metadata_contract() -> None:
+    dockerfile = (REPO_ROOT / "docker" / "Dockerfile").read_text()
+
+    for expected in (
+        "ARG VLLM_BUILD_COMMIT",
+        "ARG VLLM_BUILD_PIPELINE",
+        "ARG VLLM_BUILD_URL",
+        "ARG VLLM_IMAGE_TAG",
+        "VLLM_BUILD_COMMIT=${VLLM_BUILD_COMMIT:-unknown}",
+        "VLLM_BUILD_PIPELINE=${VLLM_BUILD_PIPELINE:-local}",
+        "VLLM_BUILD_URL=${VLLM_BUILD_URL:-}",
+        "VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-local/vllm-openai:dev}",
+        'ai.vllm.build.commit="${VLLM_BUILD_COMMIT}"',
+        'ai.vllm.build.pipeline="${VLLM_BUILD_PIPELINE}"',
+        'ai.vllm.build.url="${VLLM_BUILD_URL}"',
+        'ai.vllm.image.tag="${VLLM_IMAGE_TAG}"',
+    ):
+        assert expected in dockerfile
diff --git a/tests/transformers_utils/test_hf_overrides_model_type.py b/tests/transformers_utils/test_hf_overrides_model_type.py
new file mode 100644
index 000000000000..cce10d3b084e
--- /dev/null
+++ b/tests/transformers_utils/test_hf_overrides_model_type.py
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Test that hf_overrides model_type returns the correct config class."""
+
+import json
+import tempfile
+
+from transformers import PretrainedConfig
+
+from vllm.transformers_utils.config import _CONFIG_REGISTRY, get_config
+
+
+class _TestCustomConfig(PretrainedConfig):
+    model_type = "test_custom_model"
+
+    def __init__(self, custom_attr=42, **kw):
+        super().__init__(**kw)
+        self.custom_attr = custom_attr
+
+
+def test_hf_overrides_model_type_returns_correct_config_class():
+    """When hf_overrides sets model_type to a registered custom type whose
+    checkpoint has a *different* model_type on disk, get_config() must return
+    an instance of the registered config class — not the class that matches
+    the on-disk model_type."""
+
+    # Register the custom config
+    _CONFIG_REGISTRY["test_custom_model"] = _TestCustomConfig
+
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Checkpoint says model_type="mixtral" on disk
+            cfg = {
+                "model_type": "mixtral",
+                "hidden_size": 128,
+                "num_hidden_layers": 2,
+                "num_attention_heads": 4,
+                "num_key_value_heads": 4,
+                "intermediate_size": 256,
+                "num_local_experts": 4,
+                "num_experts_per_tok": 2,
+            }
+            with open(f"{tmpdir}/config.json", "w") as f:
+                json.dump(cfg, f)
+
+            config = get_config(
+                tmpdir,
+                trust_remote_code=False,
+                hf_overrides_kw={
+                    "model_type": "test_custom_model",
+                },
+            )
+
+            from transformers import AutoConfig
+            from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+
+            # get_config() returns the registered custom class
+            assert isinstance(config, _TestCustomConfig), (
+                f"Expected _TestCustomConfig, got {type(config).__name__}"
+            )
+
+            # AutoConfig has _TestCustomConfig registered under both
+            # the overridden model_type and the on-disk model_type
+            assert CONFIG_MAPPING["test_custom_model"] is _TestCustomConfig
+            assert CONFIG_MAPPING["mixtral"] is _TestCustomConfig
+
+            # AutoConfig.from_pretrained now returns _TestCustomConfig
+            # for this checkpoint (even though its on-disk model_type
+            # is "mixtral")
+            auto_config = AutoConfig.from_pretrained(tmpdir)
+            assert isinstance(auto_config, _TestCustomConfig), (
+                f"Expected _TestCustomConfig from AutoConfig, got "
+                f"{type(auto_config).__name__}"
+            )
+    finally:
+        _CONFIG_REGISTRY.pop("test_custom_model", None)
+        # Restore the original mixtral AutoConfig mapping to avoid
+        # side effects on other tests in the same process
+        from transformers import AutoConfig, MixtralConfig
+
+        AutoConfig.register("mixtral", MixtralConfig, exist_ok=True)
diff --git a/tests/transformers_utils/test_utils.py b/tests/transformers_utils/test_utils.py
index 485c2efff77f..94dd014c929f 100644
--- a/tests/transformers_utils/test_utils.py
+++ b/tests/transformers_utils/test_utils.py
@@ -81,6 +81,25 @@ def test_is_remote_gguf_extended_quant_types(self):
         assert not is_remote_gguf("repo/model:INVALID_M")
         assert not is_remote_gguf("repo/model:Q9_K_M")
 
+    def test_is_remote_gguf_nonstandard_quant_type(self):
+        """Test is_remote_gguf with non-standard quant types containing
+        a known GGML type."""
+        # Non-standard quant types with known GGML type after prefix
+        assert is_remote_gguf("unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL")
+        assert is_remote_gguf("user/Model:UD-Q4_K_M")
+        assert is_remote_gguf("user/SomeModel:Custom-Q8_0")
+
+        # Exact GGML type after prefix (no suffix stripping needed)
+        assert is_remote_gguf("user/Model-GGUF:UD-IQ4_NL")
+        assert is_remote_gguf("user/Model-GGUF:UD-Q8_0")
+
+        # Completely unknown quant types should still fail
+        assert not is_remote_gguf("repo/model:TOTALLY-RANDOM")
+        assert not is_remote_gguf("user/Model:UD-INVALID")
+
+        # No dash separator → not recognized as prefixed
+        assert not is_remote_gguf("repo/model:UDIQ4NL")
+
     def test_is_remote_gguf_without_colon(self):
         """Test is_remote_gguf without colon."""
         assert not is_remote_gguf("repo/model")
@@ -143,6 +162,14 @@ def test_split_remote_gguf_extended_quant_types(self):
         assert repo_id == "repo/model"
         assert quant_type == "Q3_K_S"
 
+    def test_split_remote_gguf_nonstandard_quant_type(self):
+        """Test split_remote_gguf with non-standard quant types in GGUF repos."""
+        repo_id, quant_type = split_remote_gguf(
+            "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL"
+        )
+        assert repo_id == "unsloth/Qwen3.5-35B-A3B-GGUF"
+        assert quant_type == "UD-Q4_K_XL"
+
     def test_split_remote_gguf_with_path_object(self):
         """Test split_remote_gguf with Path object."""
         repo_id, quant_type = split_remote_gguf(Path("unsloth/Qwen3-0.6B-GGUF:IQ1_S"))
diff --git a/tests/utils.py b/tests/utils.py
index 1264fe81c8f5..7fa7fd8ad85f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import atexit
 import contextlib
 import copy
 import functools
@@ -14,13 +15,14 @@
 import subprocess
 import sys
 import tempfile
+import threading
 import time
 import warnings
-from collections.abc import Callable, Iterable
-from contextlib import ExitStack, contextmanager, suppress
-from multiprocessing import Process
+from collections.abc import Callable, Iterable, Sequence
+from contextlib import ExitStack, contextmanager
+from multiprocessing import Process, get_context
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, cast
 from unittest.mock import patch
 
 import anthropic
@@ -31,6 +33,8 @@
 import requests
 import torch
 import torch.nn.functional as F
+from huggingface_hub import hf_hub_download
+from huggingface_hub.constants import HF_HUB_OFFLINE
 from openai.types.completion import Completion
 from typing_extensions import ParamSpec
 
@@ -42,13 +46,12 @@
 )
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.entrypoints.cli.serve import ServeSubcommand
+from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
-    FP8ScaledMMLinearKernel,
+    _KernelT,
     init_fp8_linear_kernel,
 )
-from vllm.model_executor.layers.quantization.utils.fp8_utils import W8A8BlockFp8LinearOp
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    GroupShape,
     QuantKey,
 )
 from vllm.model_executor.model_loader import get_model_loader
@@ -58,15 +61,33 @@
 from vllm.utils.mem_constants import GB_bytes
 from vllm.utils.network_utils import get_open_port
 from vllm.utils.torch_utils import (
-    cuda_device_count_stateless,
     set_random_seed,  # noqa: F401 - re-exported for use in test files
 )
 
+logger = init_logger(__name__)
+
 FP8_DTYPE = current_platform.fp8_dtype()
 
-if current_platform.is_rocm():
-    import threading
 
+def prewarm_hf_cache(assets: list[tuple[str, str]]) -> None:
+    """Pre-populate the HF cache for (repo_id, filename) pairs that upstream
+    trust_remote_code modules would otherwise fetch from third-party CDNs
+    (often unreachable from US-based CI)."""
+    if HF_HUB_OFFLINE:
+        return
+    for repo_id, filename in assets:
+        try:
+            hf_hub_download(repo_id=repo_id, filename=filename)
+        except Exception as e:
+            logger.warning(
+                "Failed to prefetch %s/%s: %r. Tests depending on this asset may fail.",
+                repo_id,
+                filename,
+                e,
+            )
+
+
+if current_platform.is_rocm():
     from amdsmi import (
         amdsmi_get_gpu_vram_usage,
         amdsmi_get_processor_handles,
@@ -130,6 +151,21 @@ def _nvml():
 )
 
 
+def requires_spawn_multiprocessing() -> bool:
+    """Whether this platform requires spawn instead of fork for test processes."""
+    return current_platform.is_rocm() or current_platform.is_xpu()
+
+
+def _run_in_new_process_group(
+    child_process_fxn: Callable[[dict[str, str] | None, str, list[str]], None],
+    env_dict: dict[str, str] | None,
+    model: str,
+    vllm_serve_args: list[str],
+) -> None:
+    os.setsid()
+    child_process_fxn(env_dict, model, vllm_serve_args)
+
+
 class RemoteVLLMServer:
     """Base class for launching vLLM server subprocesses for testing.
 
@@ -138,6 +174,11 @@ class RemoteVLLMServer:
     """
 
     DUMMY_API_KEY = "token-abc123"  # vLLM's OpenAI server does not need API key
+    _active_servers: set["RemoteVLLMServer"] = set()
+    _active_servers_lock = threading.RLock()
+    _cleanup_hooks_registered = False
+    _signal_hooks_registered = False
+    _previous_signal_handlers: dict[int, Any] = {}
     proc: subprocess.Popen
 
     def _create_cli_subcommand(self):
@@ -213,6 +254,7 @@ def __init__(
         )
 
         self._pre_download_model(model, args)
+        self._shutdown_complete = False
 
         # Record GPU memory before server start so we know what
         # "released" looks like.
@@ -225,6 +267,7 @@ def __init__(
             )
 
         self._start_server(model, vllm_serve_args, env_dict)
+        self._register_active_server()
         max_wait_seconds = max_wait_seconds or 480
         try:
             self._wait_for_server(url=self.url_for("health"), timeout=max_wait_seconds)
@@ -250,6 +293,78 @@ def _shutdown(self) -> None:
         (when the server fails to start). Must be safe to call even if
         the process is already dead.
         """
+        if self._shutdown_complete:
+            return
+
+        self._shutdown_complete = True
+        try:
+            self._terminate_process_tree()
+            self._wait_for_gpu_memory_release()
+        finally:
+            self._unregister_active_server()
+
+    @classmethod
+    def _ensure_cleanup_hooks_registered(cls) -> None:
+        """Register process-exit cleanup for detached server subprocesses."""
+        root_cls = RemoteVLLMServer
+        with root_cls._active_servers_lock:
+            if not root_cls._cleanup_hooks_registered:
+                atexit.register(root_cls._shutdown_active_servers)
+                root_cls._cleanup_hooks_registered = True
+
+            if (
+                threading.current_thread() is threading.main_thread()
+                and not root_cls._signal_hooks_registered
+            ):
+                for signum in (signal.SIGTERM, signal.SIGINT):
+                    root_cls._previous_signal_handlers[signum] = signal.getsignal(
+                        signum
+                    )
+                    signal.signal(signum, root_cls._handle_parent_signal)
+                root_cls._signal_hooks_registered = True
+
+    def _register_active_server(self) -> None:
+        """Track this server so parent-process exits still clean it up."""
+        RemoteVLLMServer._ensure_cleanup_hooks_registered()
+        with RemoteVLLMServer._active_servers_lock:
+            RemoteVLLMServer._active_servers.add(self)
+
+    def _unregister_active_server(self) -> None:
+        with RemoteVLLMServer._active_servers_lock:
+            RemoteVLLMServer._active_servers.discard(self)
+
+    @classmethod
+    def _shutdown_active_servers(cls) -> None:
+        """Best-effort shutdown for all live RemoteVLLMServer instances."""
+        with cls._active_servers_lock:
+            servers = list(cls._active_servers)
+
+        for server in servers:
+            with contextlib.suppress(Exception):
+                server._shutdown()
+
+    @classmethod
+    def _handle_parent_signal(cls, signum, frame) -> None:
+        """Clean up detached servers before letting the signal terminate pytest."""
+        cls._shutdown_active_servers()
+
+        previous_handler = cls._previous_signal_handlers.get(signum, signal.SIG_DFL)
+        if callable(previous_handler):
+            previous_handler(signum, frame)
+        elif previous_handler == signal.SIG_IGN:
+            return
+        elif signum == signal.SIGINT:
+            raise KeyboardInterrupt
+        else:
+            raise SystemExit(128 + signum)
+
+    def _terminate_process_tree(self) -> None:
+        """Kill the server process tree without waiting for GPU memory release.
+
+        Split out from ``_shutdown`` so that ``shutdown_many`` can run this
+        phase in parallel for sibling servers and then wait for GPU memory
+        release once at the end.
+        """
         pid = self.proc.pid
 
         # Get the process group ID. Because we used
@@ -291,9 +406,56 @@ def _shutdown(self) -> None:
         # prevent VRAM from being reclaimed by the driver.
         self._kill_process_group_survivors(pgid)
 
-        # Wait for GPU memory to actually be freed, not just
-        # "stabilized at whatever level it's at".
-        self._wait_for_gpu_memory_release()
+    @classmethod
+    def shutdown_many(cls, servers: Sequence["RemoteVLLMServer"]) -> None:
+        """Shut down multiple sibling servers and wait for GPU memory once.
+
+        Test fixtures that hold several ``RemoteVLLMServer`` instances at
+        once must NOT shut them down by calling each server's ``__exit__``
+        sequentially: every server measures total GPU memory across all
+        visible devices in ``_wait_for_gpu_memory_release``, so the first
+        server's wait blocks the full timeout because later sibling
+        servers are still holding GPU memory.
+
+        Instead, this method terminates every server's process tree in
+        parallel, then runs the GPU-memory-release wait once against the
+        earliest recorded baseline (memory before any server started).
+        """
+        if not servers:
+            return
+
+        for server in servers:
+            server._shutdown_complete = True
+
+        threads = [
+            threading.Thread(
+                target=s._terminate_process_tree,
+                name=f"shutdown-{s.proc.pid}",
+                daemon=True,
+            )
+            for s in servers
+        ]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        # Use the smallest pre-server baseline so the wait targets memory
+        # usage before *any* of these sibling servers started, not after
+        # earlier siblings had already allocated.
+        earliest = min(
+            servers,
+            key=lambda s: (
+                float("inf")
+                if s._pre_server_gpu_memory is None
+                else s._pre_server_gpu_memory
+            ),
+        )
+        try:
+            earliest._wait_for_gpu_memory_release()
+        finally:
+            for server in servers:
+                server._unregister_active_server()
 
     def _kill_process_group_survivors(
         self, pgid: int | None, timeout: float = 15.0
@@ -384,7 +546,7 @@ def _get_gpu_memory_used(self) -> float | None:
             elif current_platform.is_cuda():
                 with _nvml():
                     total_used = 0
-                    device_count = cuda_device_count_stateless()
+                    device_count = current_platform.device_count()
                     for i in range(device_count):
                         handle = nvmlDeviceGetHandleByIndex(i)
                         mem_info = nvmlDeviceGetMemoryInfo(handle)
@@ -498,7 +660,8 @@ def url_root(self) -> str:
         )
 
     def url_for(self, *parts: str) -> str:
-        return self.url_root + "/" + "/".join(parts)
+        path = "/".join(part.strip("/") for part in parts if part)
+        return f"{self.url_root}/{path}"
 
     def get_client(self, **kwargs):
         if "timeout" not in kwargs:
@@ -615,8 +778,11 @@ class RemoteOpenAIServerCustom(RemoteOpenAIServer):
     def _start_server(
         self, model: str, vllm_serve_args: list[str], env_dict: dict[str, str] | None
     ) -> None:
-        self.proc: Process = Process(
-            target=self.child_process_fxn, args=(env_dict, model, vllm_serve_args)
+        method = "spawn" if requires_spawn_multiprocessing() else "fork"
+        ctx = get_context(method)
+        self.proc: Process = cast(Any, ctx).Process(
+            target=_run_in_new_process_group,
+            args=(self.child_process_fxn, env_dict, model, vllm_serve_args),
         )  # type: ignore[assignment]
         self.proc.start()
 
@@ -646,12 +812,40 @@ def __init__(
     def _poll(self) -> int | None:
         return self.proc.exitcode
 
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.proc.terminate()
-        self.proc.join(8)
+    def _terminate_process_tree(self) -> None:
+        pid = self.proc.pid
+        if pid is None:
+            return
+
+        pgid: int | None
+        try:
+            pgid = os.getpgid(pid)
+            # _run_in_new_process_group should make the child the group
+            # leader. Avoid signaling pytest's process group if startup failed
+            # before os.setsid() ran.
+            if pgid != pid:
+                pgid = None
+        except (ProcessLookupError, OSError):
+            pgid = None
+
+        with contextlib.suppress(ProcessLookupError, OSError):
+            self.proc.terminate()
+            print(f"[RemoteOpenAIServerCustom] Sent SIGTERM to process {pid}")
+
+        self.proc.join(15)
         if self.proc.is_alive():
-            # force kill if needed
-            self.proc.kill()
+            print(
+                f"[RemoteOpenAIServerCustom] Server {pid} did not respond "
+                "to SIGTERM, sending SIGKILL to process group"
+            )
+            if pgid is not None:
+                with contextlib.suppress(ProcessLookupError, OSError):
+                    os.killpg(pgid, signal.SIGKILL)
+            else:
+                self.proc.kill()
+            self.proc.join(10)
+
+        self._kill_process_group_survivors(pgid)
 
 
 def _test_completion(
@@ -659,6 +853,7 @@ def _test_completion(
     model: str,
     prompt: str,
     token_ids: list[int],
+    include_seeded_sampling: bool = True,
 ):
     results = []
 
@@ -693,33 +888,40 @@ def _test_completion(
         }
     )
 
-    # test seeded random sampling
-    completion = client.completions.create(
-        model=model, prompt=prompt, max_tokens=5, seed=33, temperature=1.0
-    )
+    if include_seeded_sampling:
+        # test seeded random sampling
+        completion = client.completions.create(
+            model=model, prompt=prompt, max_tokens=5, seed=33, temperature=1.0
+        )
 
-    results.append(
-        {
-            "test": "seeded_sampling",
-            "text": completion.choices[0].text,
-            "finish_reason": completion.choices[0].finish_reason,
-            "usage": completion.usage,
-        }
-    )
+        results.append(
+            {
+                "test": "seeded_sampling",
+                "text": completion.choices[0].text,
+                "finish_reason": completion.choices[0].finish_reason,
+                "usage": completion.usage,
+            }
+        )
 
-    # test seeded random sampling with multiple prompts
-    completion = client.completions.create(
-        model=model, prompt=[prompt, prompt], max_tokens=5, seed=33, temperature=1.0
-    )
+        # test seeded random sampling with multiple prompts
+        completion = client.completions.create(
+            model=model,
+            prompt=[prompt, prompt],
+            max_tokens=5,
+            seed=33,
+            temperature=1.0,
+        )
 
-    results.append(
-        {
-            "test": "seeded_sampling",
-            "text": [choice.text for choice in completion.choices],
-            "finish_reason": [choice.finish_reason for choice in completion.choices],
-            "usage": completion.usage,
-        }
-    )
+        results.append(
+            {
+                "test": "seeded_sampling",
+                "text": [choice.text for choice in completion.choices],
+                "finish_reason": [
+                    choice.finish_reason for choice in completion.choices
+                ],
+                "usage": completion.usage,
+            }
+        )
 
     # test simple list
     batch = client.completions.create(
@@ -914,6 +1116,8 @@ def compare_two_settings(
     *,
     method: str = "generate",
     max_wait_seconds: float | None = None,
+    include_seeded_sampling: bool = True,
+    force_v1_runner: bool = False,
 ) -> None:
     """
     Launch API server with two different sets of arguments/environments
@@ -925,6 +1129,11 @@ def compare_two_settings(
         arg2: The second set of arguments to pass to the API server.
         env1: The first set of environment variables to pass to the API server.
         env2: The second set of environment variables to pass to the API server.
+        include_seeded_sampling: Whether to include temperature=1.0 seeded
+            sampling checks in the default generate comparison.
+        force_v1_runner: Whether to pin all compared settings to the v1 model
+            runner to avoid mixing model runner differences into correctness
+            tests.
     """
 
     compare_all_settings(
@@ -933,6 +1142,8 @@ def compare_two_settings(
         [env1, env2],
         method=method,
         max_wait_seconds=max_wait_seconds,
+        include_seeded_sampling=include_seeded_sampling,
+        force_v1_runner=force_v1_runner,
     )
 
 
@@ -943,6 +1154,8 @@ def compare_all_settings(
     *,
     method: str = "generate",
     max_wait_seconds: float | None = None,
+    include_seeded_sampling: bool = True,
+    force_v1_runner: bool = False,
 ) -> None:
     """
     Launch API server with several different sets of arguments/environments
@@ -951,8 +1164,18 @@ def compare_all_settings(
         model: The model to test.
         all_args: A list of argument lists to pass to the API server.
         all_envs: A list of environment dictionaries to pass to the API server.
+        include_seeded_sampling: Whether to include temperature=1.0 seeded
+            sampling checks in the default generate comparison.
+        force_v1_runner: Whether to pin all compared settings to the v1 model
+            runner to avoid mixing model runner differences into correctness
+            tests.
     """
 
+    if force_v1_runner:
+        all_envs = [
+            {"VLLM_USE_V2_MODEL_RUNNER": "0", **(env or {})} for env in all_envs
+        ]
+
     trust_remote_code = False
     for args in all_args:
         if "--trust-remote-code" in args:
@@ -1011,7 +1234,13 @@ def compare_all_settings(
             )
 
             if method == "generate":
-                results += _test_completion(client, model, prompt, token_ids)
+                results += _test_completion(
+                    client,
+                    model,
+                    prompt,
+                    token_ids,
+                    include_seeded_sampling=include_seeded_sampling,
+                )
             elif method == "generate_close":
                 results += _test_completion_close(client, model, prompt)
             elif method == "generate_chat":
@@ -1264,9 +1493,6 @@ def fork_new_process_for_each_test(func: Callable[_P, None]) -> Callable[_P, Non
 
     @functools.wraps(func)
     def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
-        # Make the process the leader of its own process group
-        # to avoid sending SIGTERM to the parent process
-        os.setpgrp()
         from _pytest.outcomes import Skipped
 
         # Create a unique temporary file to store exception info from child
@@ -1286,6 +1512,9 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
             pid = os.fork()
             print(f"Fork a new process to run a test {pid}")
             if pid == 0:
+                # Make the child process the leader of its own process group
+                # to avoid sending SIGTERM to the parent process
+                os.setpgrp()
                 # Parent process responsible for deleting, don't delete
                 # in child.
                 delete_after.pop_all()
@@ -1325,14 +1554,12 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
                 else:
                     os._exit(0)
             else:
-                pgid = os.getpgid(pid)
+                # After setpgrp(), the child's pgid equals its pid
+                pgid = pid
                 _pid, _exitcode = os.waitpid(pid, 0)
-                # ignore SIGTERM signal itself
-                old_signal_handler = signal.signal(signal.SIGTERM, signal.SIG_IGN)
-                # kill all child processes
-                os.killpg(pgid, signal.SIGTERM)
-                # restore the signal handler
-                signal.signal(signal.SIGTERM, old_signal_handler)
+                # kill all child processes - but they may already have exited cleanly
+                with contextlib.suppress(ProcessLookupError):
+                    os.killpg(pgid, signal.SIGTERM)
                 if _exitcode != 0:
                     # Try to read the exception from the child process
                     exc_info = {}
@@ -1369,53 +1596,110 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
     return wrapper
 
 
+def _format_subprocess_exit(returncode: int) -> str:
+    """Render a subprocess exit code, naming the signal for negative codes."""
+    if returncode >= 0:
+        return f"exit code {returncode}"
+    try:
+        return f"killed by {signal.Signals(-returncode).name} ({returncode})"
+    except ValueError:
+        return f"exit code {returncode}"
+
+
+# Set on the spawn-child interpreter so the wrapper short-circuits when the
+# child resolves `module.qualname` back to its own decorated form, instead of
+# launching another subprocess.
+_SPAWN_CHILD_ENV = "VLLM_TEST_SPAWN_CHILD"
+
+
 def spawn_new_process_for_each_test(f: Callable[_P, None]) -> Callable[_P, None]:
-    """Decorator to spawn a new process for each test function."""
+    """Decorator to spawn a new process for each test function.
+
+    Uses subprocess to run each test in a fresh interpreter and propagates
+    exceptions back to the parent, so test failures are never silently
+    swallowed (fixes https://github.com/vllm-project/vllm/issues/41415).
+
+    The child resolves the test function by importing its module and looking
+    it up by qualified name, rather than reconstructing it from a cloudpickle
+    blob. Pickling the function by value would also pickle its ``__globals__``
+    by value — turning module-level singletons (e.g.
+    ``vllm.compilation.counter.compilation_counter``) into stale clones in
+    the child, so increments performed by the production code in the child
+    would never be observable to the test.
+
+    The child inherits the parent's stdout/stderr so its output (engine
+    cores, NCCL, CUDA, ...) reaches the test runner live; the Python-level
+    traceback is serialized to ``tb_file`` for structured re-raising. A
+    native crash leaves ``tb_file`` empty — the diagnostic is then only in
+    the inherited subprocess output.
+    """
 
     @functools.wraps(f)
     def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
-        # Check if we're already in a subprocess
-        if os.environ.get("RUNNING_IN_SUBPROCESS") == "1":
-            # If we are, just run the function directly
+        if os.environ.get(_SPAWN_CHILD_ENV) == "1":
             return f(*args, **kwargs)
 
-        import torch.multiprocessing as mp
-
-        with suppress(RuntimeError):
-            mp.set_start_method("spawn")
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".tb", mode="wb") as tmp:
+            tb_file = tmp.name
 
-        # Get the module
-        module_name = f.__module__
-
-        # Create a process with environment variable set
-        env = os.environ.copy()
-        env["RUNNING_IN_SUBPROCESS"] = "1"
-
-        with tempfile.TemporaryDirectory() as tempdir:
-            output_filepath = os.path.join(tempdir, "new_process.tmp")
+        try:
+            payload = cloudpickle.dumps(
+                {
+                    "module": f.__module__,
+                    "qualname": f.__qualname__,
+                    "args": args,
+                    "kwargs": kwargs,
+                    "tb_file": tb_file,
+                }
+            )
 
-            # `cloudpickle` allows pickling complex functions directly
-            input_bytes = cloudpickle.dumps((f, output_filepath))
+            child_script = (
+                "import sys, importlib, cloudpickle, traceback\n"
+                "try:\n"
+                "    from _pytest.outcomes import Skipped\n"
+                "except ImportError:\n"
+                "    class Skipped(BaseException): pass\n"
+                "data = cloudpickle.loads(sys.stdin.buffer.read())\n"
+                "mod = importlib.import_module(data['module'])\n"
+                "target = mod\n"
+                "for name in data['qualname'].split('.'):\n"
+                "    target = getattr(target, name)\n"
+                "try:\n"
+                "    target(*data['args'], **data['kwargs'])\n"
+                "except Skipped:\n"
+                "    sys.exit(0)\n"
+                "except BaseException:\n"
+                "    with open(data['tb_file'], 'w') as fp:\n"
+                "        fp.write(traceback.format_exc())\n"
+                "    sys.exit(1)\n"
+            )
 
             repo_root = str(VLLM_PATH.resolve())
-
-            env = dict(env or os.environ)
+            env = os.environ.copy()
             env["PYTHONPATH"] = repo_root + os.pathsep + env.get("PYTHONPATH", "")
+            env[_SPAWN_CHILD_ENV] = "1"
 
-            cmd = [sys.executable, "-m", f"{module_name}"]
-
-            returned = subprocess.run(
-                cmd, input=input_bytes, capture_output=True, env=env
+            result = subprocess.run(
+                [sys.executable, "-c", child_script],
+                input=payload,
+                env=env,
             )
 
-            # check if the subprocess is successful
-            try:
-                returned.check_returncode()
-            except Exception as e:
-                # wrap raised exception to provide more information
+            if result.returncode != 0:
+                try:
+                    with open(tb_file) as fp:
+                        tb = fp.read()
+                except OSError:
+                    tb = ""
+                if not tb:
+                    tb = "<no Python traceback; see subprocess output above>"
                 raise RuntimeError(
-                    f"Error raised in subprocess:\n{returned.stderr.decode()}"
-                ) from e
+                    f"Test subprocess '{f.__name__}' failed "
+                    f"({_format_subprocess_exit(result.returncode)}):\n{tb}"
+                )
+        finally:
+            with contextlib.suppress(OSError):
+                os.remove(tb_file)
 
     return wrapper
 
@@ -1434,8 +1718,7 @@ def create_new_process_for_each_test(
         A decorator to run test functions in separate processes.
     """
     if method is None:
-        use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
-        method = "spawn" if use_spawn else "fork"
+        method = "spawn" if requires_spawn_multiprocessing() else "fork"
 
     assert method in ["spawn", "fork"], "Method must be either 'spawn' or 'fork'"
 
@@ -1497,7 +1780,7 @@ def multi_gpu_marks(*, num_gpus: int):
     """Get a collection of pytest marks to apply for `@multi_gpu_test`."""
     test_selector = pytest.mark.distributed(num_gpus=num_gpus)
     test_skipif = pytest.mark.skipif(
-        cuda_device_count_stateless() < num_gpus,
+        current_platform.device_count() < num_gpus,
         reason=f"Need at least {num_gpus} GPUs to run the test.",
     )
 
@@ -1529,7 +1812,7 @@ def gpu_tier_mark(*, min_gpus: int = 1, max_gpus: int | None = None):
         @gpu_tier_mark(max_gpus=1)          # only on single-GPU
         @gpu_tier_mark(min_gpus=2, max_gpus=4)  # 2-4 GPUs only
     """
-    gpu_count = cuda_device_count_stateless()
+    gpu_count = current_platform.device_count()
     marks = []
 
     if min_gpus > 1:
@@ -1666,7 +1949,7 @@ def has_module_attribute(module_name, attribute_name):
 
 def get_attn_backend_list_based_on_platform() -> list[str]:
     if current_platform.is_cuda():
-        return ["FLASH_ATTN", "TRITON_ATTN", "TREE_ATTN"]
+        return ["FLASH_ATTN", "TRITON_ATTN"]
     elif current_platform.is_rocm():
         attn_backend_list = ["TRITON_ATTN"]
         try:
@@ -1812,34 +2095,57 @@ def __init__(
         weight_shape: tuple[int, int],
         activation_quant_key: QuantKey,
         weight_quant_key: QuantKey,
+        input_dtype: torch.dtype,
         out_dtype: torch.dtype | None = None,
+        transpose_weights: bool = False,
         device: torch.device | None = None,
-        force_kernel: FP8ScaledMMLinearKernel | None = None,
+        force_kernel: type[_KernelT] | None = None,
     ):
         super().__init__()
-        per_tensor_weights = weight_quant_key.scale.group_shape.is_per_tensor()
-        is_static_activation_scale = activation_quant_key.scale.static
-        weight_scale_shape = (1,) if per_tensor_weights else (weight_shape[0], 1)
-
-        self.weight_scale = torch.rand(
-            weight_scale_shape, dtype=torch.float32, device=device
-        )
-        self.input_scale = (
-            torch.rand(1, dtype=torch.float32, device=device)
-            if is_static_activation_scale
-            else None
-        )
-        self.weight = torch.rand(weight_shape, device=device).to(dtype=FP8_DTYPE).t()
-        self.input_scale_ub = None
+        act_scale_desc = activation_quant_key.scale
+        weight_scale_desc = weight_quant_key.scale
+        is_block_wise = act_scale_desc.group_shape.is_per_group()
+        if is_block_wise:
+            block_size = weight_scale_desc.group_shape.col
+            weight_scale_shape = weight_shape[0] // block_size
+            self.weight_scale_inv = torch.rand(
+                (weight_scale_shape, weight_scale_shape), dtype=torch.float32
+            )
+            self.weight = torch.rand(weight_shape).to(dtype=FP8_DTYPE)
+            self.input_scale = None
+            self.weight_scale = None
+            self.weight_block_size = [block_size, block_size]
+            if transpose_weights:
+                self.weight = self.weight.t()
+        else:
+            per_tensor_weights = weight_scale_desc.group_shape.is_per_tensor()
+            is_static_activation_scale = act_scale_desc.static
+            weight_scale_shape = (1,) if per_tensor_weights else (weight_shape[0], 1)
+            self.weight_scale_inv = None
+            self.weight_scale = torch.rand(
+                weight_scale_shape, dtype=torch.float32, device=device
+            )
+            self.input_scale = (
+                torch.rand(1, dtype=torch.float32, device=device)
+                if is_static_activation_scale
+                else None
+            )
+            self.weight = (
+                torch.rand(weight_shape, device=device).to(dtype=FP8_DTYPE).t()
+            )
+            self.input_scale_ub = None
 
         out_dtype = torch.get_default_dtype() if out_dtype is None else out_dtype
 
         self.kernel = init_fp8_linear_kernel(
             activation_quant_key=activation_quant_key,
             weight_quant_key=weight_quant_key,
+            weight_shape=weight_shape,
+            input_dtype=input_dtype,
             out_dtype=out_dtype,
             force_kernel=force_kernel,
         )
+        self.kernel.process_weights_after_loading(self)
 
     def is_quant_fp8_enabled(self) -> bool:
         return self.kernel.quant_fp8.enabled()
@@ -1848,61 +2154,3 @@ def forward(
         self, y: torch.Tensor, bias: torch.Tensor | None = None
     ) -> torch.Tensor:
         return self.kernel.apply_weights(self, y, bias)
-
-
-# TODO: Drop TestBlockFP8Layer in favour of a unified TestFP8Layer
-# after refactoring W8A8BlockFp8LinearOp.
-# https://github.com/vllm-project/vllm/issues/31818
-class TestBlockFP8Layer:
-    """
-    Test helper for blockwise FP8 linear operations. Creates random weights
-    and scales for W8A8BlockFp8LinearOp.
-
-    This is a workaround until W8A8BlockFp8LinearOp implements the kernel
-    abstraction (ScaledMMLinearKernel) for blockwise quantization.
-
-    Args:
-        weight_shape: Shape of the weight tensor (out_features, in_features).
-        group_shape: Blockwise quantization group shape.
-        cutlass_block_fp8_supported: Whether CUTLASS blockwise FP8 is available.
-        use_aiter_and_is_supported: Whether to use aiter quantization ops.
-        transpose_weights: Whether to transpose weights after creation.
-    """
-
-    def __init__(
-        self,
-        weight_shape: tuple[int, int],
-        group_shape: GroupShape,
-        cutlass_block_fp8_supported: bool = False,
-        use_aiter_and_is_supported: bool = False,
-        transpose_weights: bool = False,
-    ):
-        weight_scale_shape = weight_shape[0] // group_shape[1]
-        self.weight_scale = torch.rand(
-            (weight_scale_shape, weight_scale_shape), dtype=torch.float32
-        )
-        self.weight = torch.rand(weight_shape).to(dtype=FP8_DTYPE)
-        self.input_scale = None
-        if transpose_weights:
-            self.weight = self.weight.t()
-
-        self.linear_op = W8A8BlockFp8LinearOp(
-            weight_group_shape=GroupShape(group_shape[1], group_shape[1]),
-            act_quant_group_shape=group_shape,
-            cutlass_block_fp8_supported=cutlass_block_fp8_supported,
-            use_aiter_and_is_supported=use_aiter_and_is_supported,
-        )
-
-    def __call__(
-        self, y: torch.Tensor, bias: torch.Tensor | None = None
-    ) -> torch.Tensor:
-        return self.linear_op.apply(
-            input=y,
-            weight=self.weight,
-            weight_scale=self.weight_scale,
-            input_scale=self.input_scale,
-            bias=bias,
-        )
-
-    def is_quant_fp8_enabled(self) -> bool:
-        return self.linear_op.input_quant_op.enabled()
diff --git a/tests/utils_/test_mem_utils.py b/tests/utils_/test_mem_utils.py
index 4067b0257811..861e73c7dedd 100644
--- a/tests/utils_/test_mem_utils.py
+++ b/tests/utils_/test_mem_utils.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from unittest.mock import MagicMock, patch
+
 import torch
 from vllm_test_utils.monitor import monitor
 
@@ -18,6 +20,13 @@ def test_memory_profiling():
     # 512 MiB allocation outside of this instance
     handle1 = lib.cudaMalloc(512 * 1024 * 1024)
 
+    # Warm up PyTorch's CUDA/ROCm context so that its internal initialization
+    # overhead (streams, cuBLAS handles, etc.) is included in the baseline and
+    # does not inflate non-torch increase which is larger on ROCm than on CUDA
+    _warmup = torch.zeros(1, device="cuda")
+    del _warmup
+    torch.accelerator.empty_cache()
+
     baseline_snapshot = MemorySnapshot()
 
     # load weights
@@ -61,3 +70,62 @@ def measure_current_non_torch():
     del weights
     lib.cudaFree(handle1)
     lib.cudaFree(handle2)
+
+
+def test_memory_snapshot_uses_psutil_on_integrated_gpu():
+    """On integrated (UMA) GPUs, free_memory should come from psutil."""
+    mock_cuda_free = 40 * 1024**3
+    mock_cuda_total = 120 * 1024**3
+    mock_psutil_available = 100 * 1024**3
+
+    with (
+        patch("vllm.utils.mem_utils.current_platform") as mock_platform,
+        patch("vllm.utils.mem_utils.psutil") as mock_psutil,
+    ):
+        mock_platform.mem_get_info.return_value = (
+            mock_cuda_free,
+            mock_cuda_total,
+        )
+        mock_platform.is_integrated_gpu.return_value = True
+        mock_platform.memory_stats.return_value = {
+            "allocated_bytes.all.peak": 0,
+        }
+        mock_platform.memory_reserved.return_value = 0
+        mock_platform.current_device = lambda: "cuda:0"
+
+        mock_vmem = MagicMock()
+        mock_vmem.available = mock_psutil_available
+        mock_psutil.virtual_memory.return_value = mock_vmem
+
+        snapshot = MemorySnapshot(device="cuda:0")
+
+        assert snapshot.free_memory == mock_psutil_available
+        assert snapshot.total_memory == mock_cuda_total
+        mock_psutil.virtual_memory.assert_called_once()
+
+
+def test_memory_snapshot_uses_cuda_on_discrete_gpu():
+    """On discrete GPUs, free_memory should come from CUDA mem_get_info."""
+    mock_cuda_free = 70 * 1024**3
+    mock_cuda_total = 80 * 1024**3
+
+    with (
+        patch("vllm.utils.mem_utils.current_platform") as mock_platform,
+        patch("vllm.utils.mem_utils.psutil") as mock_psutil,
+    ):
+        mock_platform.mem_get_info.return_value = (
+            mock_cuda_free,
+            mock_cuda_total,
+        )
+        mock_platform.is_integrated_gpu.return_value = False
+        mock_platform.memory_stats.return_value = {
+            "allocated_bytes.all.peak": 0,
+        }
+        mock_platform.memory_reserved.return_value = 0
+        mock_platform.current_device = lambda: "cuda:0"
+
+        snapshot = MemorySnapshot(device="cuda:0")
+
+        assert snapshot.free_memory == mock_cuda_free
+        assert snapshot.total_memory == mock_cuda_total
+        mock_psutil.virtual_memory.assert_not_called()
diff --git a/tests/utils_/test_numa_utils.py b/tests/utils_/test_numa_utils.py
new file mode 100644
index 000000000000..9f6fa85da586
--- /dev/null
+++ b/tests/utils_/test_numa_utils.py
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+from types import SimpleNamespace
+
+import pytest
+
+from vllm.config import ParallelConfig
+from vllm.utils import numa_utils
+
+
+def _make_config(**parallel_kwargs):
+    parallel_defaults = dict(
+        numa_bind=False,
+        numa_bind_nodes=None,
+        numa_bind_cpus=None,
+        distributed_executor_backend="mp",
+        data_parallel_backend="mp",
+        nnodes_within_dp=1,
+        data_parallel_rank_local=0,
+        data_parallel_index=0,
+        pipeline_parallel_size=1,
+        tensor_parallel_size=1,
+    )
+    parallel_defaults.update(parallel_kwargs)
+    parallel_config = SimpleNamespace(**parallel_defaults)
+    return SimpleNamespace(parallel_config=parallel_config)
+
+
+def test_get_numactl_args_with_node_binding():
+    vllm_config = _make_config(numa_bind=True, numa_bind_nodes=[0, 1])
+    assert (
+        numa_utils._get_numactl_args(vllm_config, local_rank=1)
+        == "--cpunodebind=1 --membind=1"
+    )
+
+
+def test_get_numactl_args_with_cpu_binding():
+    vllm_config = _make_config(
+        numa_bind=True,
+        numa_bind_nodes=[0, 1],
+        numa_bind_cpus=["0-3", "4-7"],
+    )
+    assert (
+        numa_utils._get_numactl_args(vllm_config, local_rank=1)
+        == "--physcpubind=4-7 --membind=1"
+    )
+
+
+def test_get_numactl_args_uses_dp_offset():
+    vllm_config = _make_config(
+        numa_bind=True,
+        numa_bind_nodes=[0, 0, 1, 1],
+        data_parallel_rank_local=1,
+        pipeline_parallel_size=1,
+        tensor_parallel_size=2,
+    )
+    assert (
+        numa_utils._get_numactl_args(vllm_config, local_rank=1)
+        == "--cpunodebind=1 --membind=1"
+    )
+
+
+def test_get_numactl_args_requires_detectable_nodes(monkeypatch):
+    vllm_config = _make_config(numa_bind=True)
+    monkeypatch.setattr(numa_utils, "get_auto_numa_nodes", lambda: None)
+    with pytest.raises(RuntimeError):
+        numa_utils._get_numactl_args(vllm_config, local_rank=0)
+
+
+def test_log_numactl_show(monkeypatch):
+    log_lines = []
+
+    def fake_debug(msg, *args):
+        log_lines.append(msg % args)
+
+    monkeypatch.setattr(numa_utils.logger, "debug", fake_debug)
+    monkeypatch.setattr(
+        numa_utils.subprocess,
+        "run",
+        lambda *args, **kwargs: SimpleNamespace(
+            stdout="policy: bind\nphyscpubind: 0 1 2 3\n", returncode=0
+        ),
+    )
+
+    assert numa_utils._log_numactl_show("Worker_0") is True
+    assert log_lines == [
+        "Worker_0 affinity: policy: bind, physcpubind: 0 1 2 3",
+    ]
+
+
+def test_get_numactl_executable_points_to_fixed_wrapper(monkeypatch):
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/numactl")
+    executable, debug_str = numa_utils._get_numactl_executable()
+    assert executable.endswith("/vllm/utils/numa_wrapper.sh")
+    assert "_VLLM_INTERNAL_NUMACTL_ARGS" in debug_str
+
+
+def test_set_numa_wrapper_env_restores_previous_values():
+    os.environ[numa_utils._NUMACTL_ARGS_ENV] = "old-args"
+    os.environ[numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV] = "old-python"
+
+    with numa_utils._set_numa_wrapper_env("new-args", "new-python"):
+        assert os.environ[numa_utils._NUMACTL_ARGS_ENV] == "new-args"
+        assert os.environ[numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV] == "new-python"
+
+    assert os.environ[numa_utils._NUMACTL_ARGS_ENV] == "old-args"
+    assert os.environ[numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV] == "old-python"
+
+
+def test_set_numa_wrapper_env_clears_values_when_unset():
+    os.environ.pop(numa_utils._NUMACTL_ARGS_ENV, None)
+    os.environ.pop(numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV, None)
+
+    with numa_utils._set_numa_wrapper_env("new-args", "new-python"):
+        assert os.environ[numa_utils._NUMACTL_ARGS_ENV] == "new-args"
+        assert os.environ[numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV] == "new-python"
+
+    assert numa_utils._NUMACTL_ARGS_ENV not in os.environ
+    assert numa_utils._NUMACTL_PYTHON_EXECUTABLE_ENV not in os.environ
+
+
+def test_parallel_config_validates_numa_bind_nodes():
+    with pytest.raises(ValueError, match="non-negative"):
+        ParallelConfig(numa_bind_nodes=[0, -1])
+
+
+@pytest.mark.parametrize("cpuset", ["", "abc", "1-", "4-1", "1,,2", "1:2"])
+def test_parallel_config_rejects_invalid_numa_bind_cpus(cpuset):
+    with pytest.raises(ValueError, match="numa_bind_cpus"):
+        ParallelConfig(numa_bind_cpus=[cpuset])
diff --git a/tests/utils_/test_ray_utils.py b/tests/utils_/test_ray_utils.py
new file mode 100644
index 000000000000..0872ae9413f7
--- /dev/null
+++ b/tests/utils_/test_ray_utils.py
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from vllm.v1.executor.ray_utils import get_bundles_sorted_by_node
+
+NODE_A = "node_a"
+NODE_B = "node_b"
+NODE_C = "node_c"
+
+IP_A = "10.0.0.1"
+IP_B = "10.0.0.2"
+IP_C = "10.0.0.3"
+
+NODE_ID_TO_IP = {NODE_A: IP_A, NODE_B: IP_B, NODE_C: IP_C}
+
+MOCK_RAY_NODES = [
+    {"NodeID": NODE_A, "NodeManagerAddress": IP_A, "Alive": True},
+    {"NodeID": NODE_B, "NodeManagerAddress": IP_B, "Alive": True},
+    {"NodeID": NODE_C, "NodeManagerAddress": IP_C, "Alive": True},
+]
+
+
+@pytest.mark.parametrize(
+    "bundles_to_node_id,bundle_specs,expected",
+    [
+        pytest.param(
+            {0: NODE_C, 1: NODE_A, 2: NODE_B, 3: NODE_C, 4: NODE_A, 5: NODE_B},
+            [{"GPU": 1}] * 6,
+            [
+                (1, NODE_A, IP_A),
+                (4, NODE_A, IP_A),
+                (2, NODE_B, IP_B),
+                (5, NODE_B, IP_B),
+                (0, NODE_C, IP_C),
+                (3, NODE_C, IP_C),
+            ],
+        ),
+        pytest.param(
+            {0: NODE_B, 1: NODE_B, 2: NODE_A, 3: NODE_A},
+            [{"GPU": 1}] * 4,
+            [
+                (2, NODE_A, IP_A),
+                (3, NODE_A, IP_A),
+                (0, NODE_B, IP_B),
+                (1, NODE_B, IP_B),
+            ],
+        ),
+        pytest.param(
+            {0: NODE_C, 1: NODE_B, 2: NODE_C, 3: NODE_B},
+            [{"GPU": 1}] * 4,
+            [
+                (1, NODE_B, IP_B),
+                (3, NODE_B, IP_B),
+                (0, NODE_C, IP_C),
+                (2, NODE_C, IP_C),
+            ],
+        ),
+        pytest.param(
+            {0: NODE_A, 1: NODE_A, 2: NODE_A},
+            [{"GPU": 1}] * 3,
+            [(0, NODE_A, IP_A), (1, NODE_A, IP_A), (2, NODE_A, IP_A)],
+        ),
+        pytest.param(
+            {},
+            [],
+            [],
+        ),
+        pytest.param(
+            {0: NODE_A, 1: NODE_B, 2: NODE_A},
+            [{"CPU": 1}, {"GPU": 1}, {"GPU": 1}],
+            [(2, NODE_A, IP_A), (1, NODE_B, IP_B)],
+        ),
+    ],
+)
+def test_get_bundles_sorted_by_node(bundles_to_node_id, bundle_specs, expected):
+    mock_pg = MagicMock()
+    mock_pg.bundle_specs = bundle_specs
+
+    mock_ctx = MagicMock()
+    mock_ctx.get_node_id.return_value = NODE_A
+
+    with (
+        patch(
+            "vllm.v1.executor.ray_utils.placement_group_table",
+            return_value={"bundles_to_node_id": bundles_to_node_id},
+        ),
+        patch("vllm.v1.executor.ray_utils.ray") as mock_ray,
+        patch("vllm.v1.executor.ray_utils.current_platform") as mock_platform,
+    ):
+        mock_ray.get_runtime_context.return_value = mock_ctx
+        mock_ray.nodes.return_value = MOCK_RAY_NODES
+        mock_platform.ray_device_key = "GPU"
+
+        result = get_bundles_sorted_by_node(mock_pg)
+
+    assert result == expected
diff --git a/tests/utils_/test_serial_utils.py b/tests/utils_/test_serial_utils.py
index 42e466709cbf..85661657d50f 100644
--- a/tests/utils_/test_serial_utils.py
+++ b/tests/utils_/test_serial_utils.py
@@ -7,17 +7,39 @@
 from vllm.utils.serial_utils import (
     EMBED_DTYPES,
     ENDIANNESS,
+    MM_METADATA_DTYPES,
     EmbedDType,
     Endianness,
+    MmMetadataDType,
     binary2tensor,
     tensor2binary,
 )
 
+FLOAT_EMBED_DTYPES = tuple(EMBED_DTYPES.keys())
+INTEGER_EMBED_DTYPES = tuple(MM_METADATA_DTYPES.keys())
+
+
+def _build_integer_tensor(
+    embed_dtype: MmMetadataDType, shape: tuple[int, ...]
+) -> torch.Tensor:
+    torch_dtype = MM_METADATA_DTYPES[embed_dtype].torch_dtype
+
+    if torch_dtype is torch.bool:
+        return torch.randint(0, 2, shape, dtype=torch.int32).to(torch.bool)
+    if torch_dtype is torch.uint8:
+        return torch.randint(0, 256, shape, dtype=torch.uint8)
+    if torch_dtype is torch.int32:
+        return torch.randint(-(2**20), 2**20, shape, dtype=torch.int32)
+    if torch_dtype is torch.int64:
+        return torch.randint(-(2**62), 2**62, shape, dtype=torch.int64)
+
+    raise AssertionError(f"Unsupported non-floating embed dtype: {embed_dtype}")
+
 
 @pytest.mark.parametrize("endianness", ENDIANNESS)
-@pytest.mark.parametrize("embed_dtype", EMBED_DTYPES.keys())
+@pytest.mark.parametrize("embed_dtype", FLOAT_EMBED_DTYPES)
 @torch.inference_mode()
-def test_encode_and_decode(embed_dtype: EmbedDType, endianness: Endianness):
+def test_encode_and_decode_floats(embed_dtype: EmbedDType, endianness: Endianness):
     for i in range(10):
         tensor = torch.rand(2, 3, 5, 7, 11, 13, device="cpu", dtype=torch.float32)
         shape = tensor.shape
@@ -40,3 +62,20 @@ def test_encode_and_decode(embed_dtype: EmbedDType, endianness: Endianness):
             name_1="new",
             tol=1e-2,
         )
+
+
+@pytest.mark.parametrize("endianness", ENDIANNESS)
+@pytest.mark.parametrize("embed_dtype", INTEGER_EMBED_DTYPES)
+@torch.inference_mode()
+def test_encode_and_decode_integers(
+    embed_dtype: MmMetadataDType, endianness: Endianness
+):
+    shape = (2, 3, 5, 7, 11, 13)
+
+    for i in range(10):
+        tensor = _build_integer_tensor(embed_dtype, shape)
+        binary = tensor2binary(tensor, embed_dtype, endianness)
+        new_tensor = binary2tensor(binary, shape, embed_dtype, endianness)
+
+        assert new_tensor.dtype == MM_METADATA_DTYPES[embed_dtype].torch_dtype
+        torch.testing.assert_close(tensor, new_tensor, atol=0, rtol=0)
diff --git a/tests/utils_/test_spawn_decorator.py b/tests/utils_/test_spawn_decorator.py
new file mode 100644
index 000000000000..1740ea30de94
--- /dev/null
+++ b/tests/utils_/test_spawn_decorator.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for spawn_new_process_for_each_test decorator."""
+
+import pytest
+
+from tests.utils import spawn_new_process_for_each_test
+
+
+@spawn_new_process_for_each_test
+def test_spawn_decorator_passing():
+    """Passing function should complete normally."""
+    assert 1 + 1 == 2
+
+
+@pytest.mark.xfail(raises=RuntimeError, strict=True)
+@spawn_new_process_for_each_test
+def test_spawn_decorator_failure_is_caught():
+    """Failing function should raise RuntimeError, never silently pass."""
+    raise ValueError("intentional failure")
+
+
+@spawn_new_process_for_each_test
+def test_spawn_decorator_skip():
+    """pytest.skip inside subprocess should propagate correctly."""
+    pytest.skip("intentional skip")
+
+
+@spawn_new_process_for_each_test
+@pytest.mark.parametrize("x,y,expected", [(1, 2, 3), (0, 0, 0)])
+def test_spawn_decorator_parametrized(x, y, expected):
+    """Args and kwargs must be forwarded correctly to subprocess."""
+    assert x + y == expected
diff --git a/tests/utils_/test_system_utils.py b/tests/utils_/test_system_utils.py
index 3d1b1fc4ce37..5ef55877a7c7 100644
--- a/tests/utils_/test_system_utils.py
+++ b/tests/utils_/test_system_utils.py
@@ -1,10 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import os
 import tempfile
 from pathlib import Path
 
-from vllm.utils.system_utils import unique_filepath
+from vllm.utils.system_utils import _maybe_force_spawn, unique_filepath
 
 
 def test_unique_filepath():
@@ -17,3 +18,10 @@ def test_unique_filepath():
         paths.add(path)
     assert len(paths) == 10
     assert len(list(Path(temp_dir).glob("*.txt"))) == 10
+
+
+def test_numa_bind_forces_spawn(monkeypatch):
+    monkeypatch.delenv("VLLM_WORKER_MULTIPROC_METHOD", raising=False)
+    monkeypatch.setattr("sys.argv", ["vllm", "serve", "--numa-bind"])
+    _maybe_force_spawn()
+    assert os.environ["VLLM_WORKER_MULTIPROC_METHOD"] == "spawn"
diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
index 8c3a62b6ea5a..46d25c017f27 100644
--- a/tests/v1/attention/test_attention_backends.py
+++ b/tests/v1/attention/test_attention_backends.py
@@ -36,10 +36,11 @@
     AttentionBackendEnum.FLASHINFER,
     AttentionBackendEnum.FLEX_ATTENTION,
     AttentionBackendEnum.TRITON_ATTN,
-    AttentionBackendEnum.TREE_ATTN,
     "FLEX_ATTENTION_SLOW",
 ]
 
+DEVICE_TYPE = current_platform.device_type
+
 # Remove flashinfer from the list if it's not available
 try:
     import flashinfer  # noqa: F401
@@ -313,6 +314,7 @@ def _test_backend_correctness(
     backend_to_test: list[AttentionBackendEnum | str],
     mask_mod,
     *,
+    causal: bool = True,
     attn_type: AttentionType = AttentionType.DECODER,
     block_size: int = 16,
     atol: float = 1e-2,
@@ -366,9 +368,9 @@ def _test_backend_correctness(
         num_gpu_blocks=8192,
         hf_config_override=hf_config_override,
     )
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
 
-    kv_cache_spec = create_standard_kv_cache_spec(vllm_config)
+    kv_cache_spec = create_standard_kv_cache_spec(vllm_config, attn_type)
 
     # 1. Setup
     batch_size = batch_spec.batch_size
@@ -451,9 +453,7 @@ def _test_backend_correctness(
     common_attn_metadata = create_common_attn_metadata(
         batch_spec, vllm_config.cache_config.block_size, device
     )
-    if attn_type == AttentionType.ENCODER_ONLY:
-        # For encoder-only, all tokens are prefill tokens
-        common_attn_metadata.causal = False
+    common_attn_metadata.causal = causal
 
     # 3. Simulate Paged KV Cache and a realistic slot_mapping
     kv_cache = create_and_prepopulate_kv_cache(
@@ -734,6 +734,76 @@ def bidi_sliding_window_mask_mod(
         model,
         SLIDING_WINDOW_BACKENDS_TO_TEST,
         sliding_window_mask_mod_fn,
+        causal=False,
         attn_type=AttentionType.ENCODER_ONLY,
         tensor_parallel_size=tensor_parallel_size,
     )
+
+
+NON_CAUSAL_BACKENDS_TO_TEST = [
+    AttentionBackendEnum.FLASH_ATTN,
+    AttentionBackendEnum.FLEX_ATTENTION,
+    "FLEX_ATTENTION_SLOW",
+]
+
+if current_platform.is_rocm():
+    NON_CAUSAL_BACKENDS_TO_TEST = [
+        x
+        for x in NON_CAUSAL_BACKENDS_TO_TEST
+        if x is not AttentionBackendEnum.FLASH_ATTN
+    ]
+
+
+@pytest.mark.parametrize(
+    "batch_spec_name",
+    [
+        "small_decode",
+        "small_prefill",
+        "mixed_small",
+    ],
+)
+@pytest.mark.parametrize("model", ["meta-llama/Meta-Llama-3-8B"])
+def test_non_causal_backend_correctness(
+    default_vllm_config, batch_spec_name: str, model: str
+):
+    """Test backend's correctness with non-causal (bidirectional) decoder
+    attention, as used by DFlash speculative decoding."""
+
+    def bidirectional_mask_mod(
+        b: torch.Tensor,
+        h: torch.Tensor,
+        q_idx: torch.Tensor,
+        kv_idx: torch.Tensor,
+        *,
+        context_len: int,
+    ):
+        return q_idx >= 0  # Always True
+
+    batch_spec = BATCH_SPECS[batch_spec_name]
+    LARGE_BLOCK_BACKENDS = (
+        [AttentionBackendEnum.FLEX_ATTENTION]
+        if is_torch_equal_or_newer("2.9.0.dev0")
+        else []
+    )
+
+    SMALL_BLOCK_BACKENDS = [
+        x for x in NON_CAUSAL_BACKENDS_TO_TEST if x not in LARGE_BLOCK_BACKENDS
+    ]
+
+    _test_backend_correctness(
+        batch_spec,
+        model,
+        SMALL_BLOCK_BACKENDS,
+        bidirectional_mask_mod,
+        causal=False,
+    )
+
+    if LARGE_BLOCK_BACKENDS:
+        _test_backend_correctness(
+            batch_spec,
+            model,
+            LARGE_BLOCK_BACKENDS,
+            bidirectional_mask_mod,
+            causal=False,
+            block_size=128,
+        )
diff --git a/tests/v1/attention/test_attention_backends_selection.py b/tests/v1/attention/test_attention_backends_selection.py
index 9d8d5d3ebb19..4242cc5ff2e2 100644
--- a/tests/v1/attention/test_attention_backends_selection.py
+++ b/tests/v1/attention/test_attention_backends_selection.py
@@ -13,6 +13,7 @@
 from vllm.v1.attention.backends.linear_attn import LinearAttentionBackend
 from vllm.v1.attention.backends.mamba1_attn import Mamba1AttentionBackend
 from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionBackend
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 from vllm.v1.attention.backends.short_conv_attn import ShortConvAttentionBackend
 
 
@@ -32,7 +33,7 @@
                 use_rms_norm=True,
             ),
             Mamba1AttentionBackend,
-            "mamba1",
+            MambaAttentionBackendEnum.MAMBA1,
         ),
         (
             MambaMixer2,
@@ -48,7 +49,7 @@
                 head_dim=32,
             ),
             Mamba2AttentionBackend,
-            "mamba2",
+            MambaAttentionBackendEnum.MAMBA2,
         ),
         (
             MiniMaxText01LinearAttention,
@@ -64,7 +65,7 @@
                 linear_layer_idx=0,
             ),
             LinearAttentionBackend,
-            "linear_attention",
+            MambaAttentionBackendEnum.LINEAR,
         ),
         (
             ShortConv,
@@ -74,7 +75,7 @@
                 layer_idx=0,
             ),
             ShortConvAttentionBackend,
-            "short_conv",
+            MambaAttentionBackendEnum.SHORT_CONV,
         ),
     ],
 )
@@ -97,10 +98,14 @@ def test_mamba_layers_get_attn_backend(
 @pytest.mark.parametrize(
     "layer_class,expected_backend,expected_mamba_type",
     [
-        (MambaMixer, Mamba1AttentionBackend, "mamba1"),
-        (MambaMixer2, Mamba2AttentionBackend, "mamba2"),
-        (MiniMaxText01LinearAttention, LinearAttentionBackend, "linear_attention"),
-        (ShortConv, ShortConvAttentionBackend, "short_conv"),
+        (MambaMixer, Mamba1AttentionBackend, MambaAttentionBackendEnum.MAMBA1),
+        (MambaMixer2, Mamba2AttentionBackend, MambaAttentionBackendEnum.MAMBA2),
+        (
+            MiniMaxText01LinearAttention,
+            LinearAttentionBackend,
+            MambaAttentionBackendEnum.LINEAR,
+        ),
+        (ShortConv, ShortConvAttentionBackend, MambaAttentionBackendEnum.SHORT_CONV),
     ],
 )
 def test_mamba_layers_have_unified_interface(
diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py
index 66edaf0a7578..d6fc59576b2a 100644
--- a/tests/v1/attention/test_attention_splitting.py
+++ b/tests/v1/attention/test_attention_splitting.py
@@ -379,3 +379,68 @@ def test_prefill_split_across_ubatches(
         # Map to original request index
         orig_idx = split_req_idx + j
         assert int(second_meta.seq_lens[j]) == seq_lens[orig_idx]
+
+
+def test_build_attention_metadata_zeros_stale_is_prefilling():
+    """_build_attention_metadata zeroes is_prefilling for padded rows."""
+    from unittest.mock import MagicMock, patch
+
+    from vllm.v1.attention.backend import CommonAttentionMetadata
+    from vllm.v1.worker.gpu_model_runner import GPUModelRunner
+
+    num_reqs = 3
+    num_reqs_padded = 5
+
+    # Real rows [0-2] have known computed/prompt values; padded rows [3-4]
+    # carry stale data from a prior prefill (num_computed < num_prompt → True).
+    num_computed = torch.tensor([50, 100, 200, 10, 20], dtype=torch.int32)
+    num_prompt = torch.tensor([50, 200, 200, 100, 200], dtype=torch.int32)
+
+    runner = MagicMock()
+    runner.kv_cache_config.kv_cache_groups = [
+        MagicMock()
+    ]  # non-empty: skip early return
+    runner.attn_groups = [[]]  # empty inner list: inner loop never runs
+    runner.input_batch.num_computed_tokens_cpu_tensor = num_computed
+    runner.input_batch.num_prompt_tokens_cpu_tensor = num_prompt
+    runner.optimistic_seq_lens_cpu = torch.tensor([100, 200, 300, 0, 0])
+    runner.query_start_loc.gpu = torch.zeros(num_reqs_padded + 1, dtype=torch.int32)
+    runner.query_start_loc.cpu = torch.zeros(num_reqs_padded + 1, dtype=torch.int32)
+    runner.seq_lens = torch.zeros(num_reqs_padded, dtype=torch.int32)
+    runner.positions = torch.zeros(num_reqs_padded, dtype=torch.int64)
+    runner.routed_experts_initialized = False
+    runner.use_async_spec_decode = False
+    runner.dcp_world_size = 1
+    runner.speculative_config = None
+    runner.is_mm_prefix_lm = False
+    runner._get_encoder_seq_lens.return_value = (None, None)
+
+    # Intercept CommonAttentionMetadata construction to capture is_prefilling.
+    # With speculative_config=None the constructor is called exactly once (for
+    # cm_base), so captured reflects what the fix produced before storage.
+    captured_is_prefilling = None
+    original_init = CommonAttentionMetadata.__init__
+
+    def capturing_init(self, *args, **kwargs):
+        nonlocal captured_is_prefilling
+        if "is_prefilling" in kwargs:
+            captured_is_prefilling = kwargs["is_prefilling"]
+        original_init(self, *args, **kwargs)
+
+    with patch.object(CommonAttentionMetadata, "__init__", capturing_init):
+        GPUModelRunner._build_attention_metadata(
+            runner,
+            num_tokens=num_reqs,
+            num_reqs=num_reqs,
+            max_query_len=1,
+            num_tokens_padded=num_reqs_padded,
+            num_reqs_padded=num_reqs_padded,
+            slot_mappings={0: torch.zeros(num_reqs_padded, dtype=torch.int64)},
+        )
+
+    assert captured_is_prefilling is not None
+    assert not captured_is_prefilling[0]  # decode  (50 >= 50)
+    assert captured_is_prefilling[1]  # prefill (100 < 200)
+    assert not captured_is_prefilling[2]  # decode  (200 >= 200)
+    assert not captured_is_prefilling[3]  # stale data (10 < 100) zeroed
+    assert not captured_is_prefilling[4]  # stale data (20 < 200) zeroed
diff --git a/tests/v1/attention/test_chunked_local_attention.py b/tests/v1/attention/test_chunked_local_attention.py
index 4529c2cfc29b..c2798c8f2fa4 100644
--- a/tests/v1/attention/test_chunked_local_attention.py
+++ b/tests/v1/attention/test_chunked_local_attention.py
@@ -7,6 +7,7 @@
 import torch
 
 from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
+from vllm.platforms import current_platform
 from vllm.v1.attention.backends.utils import make_local_attention_virtual_batches
 
 
@@ -22,6 +23,8 @@ class LocalAttentionTestData:
     expected_local_block_table: list[list[int]]
 
 
+DEVICE_TYPE = current_platform.device_type
+
 test_data_list = [
     # Same as example in docstring of make_local_attention_virtual_batches
     # except block table has 9 columns instead of 10
@@ -151,7 +154,7 @@ class LocalAttentionTestData:
 
 @pytest.mark.parametrize("test_data", test_data_list)
 def test_local_attention_virtual_batches(test_data: LocalAttentionTestData):
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
     batch_spec = test_data.batch_spec
     attn_chunk_size = test_data.attn_chunk_size
     block_size = test_data.block_size
diff --git a/tests/v1/attention/test_indexer_deepseek_v4_slot_mapping.py b/tests/v1/attention/test_indexer_deepseek_v4_slot_mapping.py
new file mode 100644
index 000000000000..159bb8af3fb9
--- /dev/null
+++ b/tests/v1/attention/test_indexer_deepseek_v4_slot_mapping.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from tests.v1.attention.utils import create_vllm_config
+from vllm.v1.attention.backend import CommonAttentionMetadata
+from vllm.v1.attention.backends.mla.indexer import DeepseekV32IndexerMetadataBuilder
+from vllm.v1.kv_cache_interface import MLAAttentionSpec
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires CUDA")
+def test_indexer_builder_deepseek_v4_compressed_slot_mapping_uses_storage_block_size():
+    """Regression test: DeepseekV4 compression path must compute slot_mapping from
+    compressed positions, not reuse the uncompressed common metadata mapping.
+    """
+    device = torch.device("cuda")
+
+    # storage_block_size = block_size // compress_ratio = 256 // 4 = 64
+    kv_cache_spec = MLAAttentionSpec(
+        block_size=256,
+        num_kv_heads=1,
+        head_size=128,
+        dtype=torch.bfloat16,
+        compress_ratio=4,
+    )
+    vllm_config = create_vllm_config(max_model_len=1024)
+    builder = DeepseekV32IndexerMetadataBuilder(
+        kv_cache_spec=kv_cache_spec,
+        layer_names=["dummy"],
+        vllm_config=vllm_config,
+        device=device,
+    )
+
+    # Construct a single request where:
+    # - num_computed = 240 (=> compressed_pos_start = 60)
+    # - query_len = 40 (=> num_groups = 10)
+    # => compressed positions are 60..69 which cross the storage block boundary at 64.
+    query_start_loc = torch.tensor([0, 40], dtype=torch.int32, device=device)
+    query_start_loc_cpu = query_start_loc.cpu()
+    seq_lens = torch.tensor([280], dtype=torch.int32, device=device)  # 240 + 40
+
+    # Two blocks: compressed positions 0..63 map to block 5, 64..127 map to block 7.
+    block_table_tensor = torch.tensor([[5, 7]], dtype=torch.int32, device=device)
+
+    # Dummy uncompressed slot mapping (length == uncompressed num_actual_tokens).
+    slot_mapping = torch.full((40,), -123, dtype=torch.int64, device=device)
+
+    common = CommonAttentionMetadata(
+        query_start_loc=query_start_loc,
+        query_start_loc_cpu=query_start_loc_cpu,
+        seq_lens=seq_lens,
+        seq_lens_cpu_upper_bound=seq_lens.cpu(),
+        num_reqs=1,
+        num_actual_tokens=40,
+        max_query_len=40,
+        max_seq_len=280,
+        block_table_tensor=block_table_tensor,
+        slot_mapping=slot_mapping,
+        causal=True,
+    )
+
+    md = builder.build(common_prefix_len=0, common_attn_metadata=common)
+
+    # The compressed slot_mapping retains the original uncompressed size (40).
+    # Only every compress_ratio-th position gets a valid slot; the rest are -1.
+    assert md.slot_mapping.numel() == 40
+    valid_slots = md.slot_mapping[md.slot_mapping >= 0]
+    assert valid_slots.numel() == 10  # 40 tokens / compress_ratio 4
+
+    storage_bs = kv_cache_spec.storage_block_size  # 64
+    # Compressed positions 60..63 land in block 5, positions 64..69 in block 7.
+    expected = torch.tensor(
+        [
+            5 * storage_bs + 60,
+            5 * storage_bs + 61,
+            5 * storage_bs + 62,
+            5 * storage_bs + 63,
+        ]
+        + [
+            7 * storage_bs + 0,
+            7 * storage_bs + 1,
+            7 * storage_bs + 2,
+            7 * storage_bs + 3,
+            7 * storage_bs + 4,
+            7 * storage_bs + 5,
+        ],
+        dtype=torch.int64,
+        device=device,
+    )
+    torch.testing.assert_close(valid_slots, expected)
diff --git a/tests/v1/attention/test_kv_head_stride_canonicalization.py b/tests/v1/attention/test_kv_head_stride_canonicalization.py
new file mode 100644
index 000000000000..635f46390cfc
--- /dev/null
+++ b/tests/v1/attention/test_kv_head_stride_canonicalization.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for canonicalize_singleton_dim_strides.
+
+Background
+----------
+When num_kv_heads_per_rank == 1 (e.g. Qwen3.5-397B with TP=8 → 1 KV head
+per rank), PyTorch's is_contiguous() returns True for *any* stride on the
+size-1 dimension.  The KV cache allocator can therefore produce a tensor
+where that singleton dim has stride = 1 element (2 bytes for bf16) instead
+of the canonical product-of-remaining-dims value.
+
+CUDA TMA (used by FlashInfer XQA SM90 and Flash-Attention 3/4 on H100+)
+requires all non-outermost strides to be multiples of 16 bytes.  A 2-byte
+stride triggers cudaErrorIllegalInstruction.
+
+canonicalize_singleton_dim_strides() patches degenerate strides on all
+size-1 dimensions via torch.as_strided — zero-copy.
+
+The degenerate stride manifests at different positions in different backends:
+- FlashInfer: stride(-3) after kv_cache.permute() → shape [..., 1, B, D]
+- FlashAttention: stride(-2) after kv_cache.unbind(0) → shape [N, B, 1, D]
+"""
+
+import torch
+
+from vllm.utils.torch_utils import canonicalize_singleton_dim_strides
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _inject_degenerate_stride(t: torch.Tensor, dim: int) -> torch.Tensor:
+    """Return a view of t with a degenerate (stride=1) on a size-1 dim."""
+    assert t.shape[dim] == 1, f"dim {dim} must have size 1"
+    strides = list(t.stride())
+    strides[dim] = 1  # inject the bug
+    return t.as_strided(t.shape, strides)
+
+
+# ---------------------------------------------------------------------------
+# Tests: canonicalize_singleton_dim_strides
+# ---------------------------------------------------------------------------
+
+
+class TestCanonicalizeSingletonDimStrides:
+    def test_flashinfer_layout_dim_neg3(self):
+        """FlashInfer path: degenerate stride at dim -3 (num_kv_heads)."""
+        # Shape after permute: [num_blocks, 2, num_kv_heads, block_size, head_size]
+        num_blocks, block_size, head_size = 64, 16, 128
+        t = torch.zeros(num_blocks, 2, 1, block_size, head_size, dtype=torch.bfloat16)
+        t_deg = _inject_degenerate_stride(t, dim=-3)
+
+        assert t_deg.stride(-3) == 1  # confirm degenerate
+        assert t_deg.is_contiguous()  # PyTorch doesn't notice
+
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+
+        assert fixed.stride(-3) == block_size * head_size  # canonical = 2048
+        assert fixed.stride(-2) == head_size  # inner dims unchanged
+        assert fixed.stride(-1) == 1
+
+    def test_flash_attn_layout_dim_neg2(self):
+        """FlashAttention path: degenerate stride at dim -2 (num_kv_heads)."""
+        # Shape after unbind(0): [num_blocks, block_size, num_kv_heads, head_size]
+        num_blocks, block_size, head_size = 64, 16, 128
+        t = torch.zeros(num_blocks, block_size, 1, head_size, dtype=torch.bfloat16)
+        t_deg = _inject_degenerate_stride(t, dim=-2)
+
+        assert t_deg.stride(-2) == 1
+        assert t_deg.is_contiguous()
+
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+
+        assert fixed.stride(-2) == head_size  # canonical = 128
+        assert fixed.stride(-1) == 1
+
+    def test_canonical_strides_returned_as_is(self):
+        """No degenerate strides → same object returned (no copy, no new view)."""
+        t = torch.zeros(64, 2, 1, 16, 128, dtype=torch.bfloat16)
+        result = canonicalize_singleton_dim_strides(t)
+        assert result is t
+
+    def test_multi_kv_heads_unchanged(self):
+        """num_kv_heads > 1 → strides are already canonical → unchanged."""
+        t = torch.zeros(16, 2, 4, 16, 128, dtype=torch.bfloat16)
+        original_strides = t.stride()
+        result = canonicalize_singleton_dim_strides(t)
+        assert result.stride() == original_strides
+
+    def test_data_pointer_preserved(self):
+        """Fix is zero-copy: same underlying storage."""
+        t = torch.zeros(8, 2, 1, 16, 128, dtype=torch.bfloat16)
+        t_deg = _inject_degenerate_stride(t, dim=-3)
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+        assert fixed.data_ptr() == t_deg.data_ptr()
+        assert fixed.storage_offset() == t_deg.storage_offset()
+
+    def test_multiple_singleton_dims(self):
+        """All size-1 dims with degenerate strides are fixed."""
+        # Shape: [1, 1, 8, 32] — two size-1 dims
+        t = torch.zeros(1, 1, 8, 32, dtype=torch.float16)
+        # Both size-1 dims get degenerate strides
+        t_deg = t.as_strided(t.shape, (1, 1, 32, 1))  # both leading dims = 1
+
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+
+        assert fixed.stride(0) == 1 * 8 * 32  # canonical: 256
+        assert fixed.stride(1) == 1 * 8 * 32  # canonical: 256 (same since size-1)
+        assert fixed.stride(2) == 32
+        assert fixed.stride(3) == 1
+
+    def test_various_shapes_flashinfer(self):
+        """Correctness across different block_size / head_size for FlashInfer layout."""
+        for block_size, head_size in [(16, 64), (16, 128), (32, 128), (16, 256)]:
+            t = torch.zeros(8, 2, 1, block_size, head_size, dtype=torch.bfloat16)
+            t_deg = _inject_degenerate_stride(t, dim=-3)
+            fixed = canonicalize_singleton_dim_strides(t_deg)
+            assert fixed.stride(-3) == block_size * head_size, (
+                f"Failed for block_size={block_size}, head_size={head_size}: "
+                f"got stride(-3)={fixed.stride(-3)}"
+            )
+
+    def test_various_shapes_flash_attn(self):
+        """Correctness across different shapes for FlashAttention layout."""
+        for block_size, head_size in [(16, 64), (16, 128), (32, 128)]:
+            t = torch.zeros(8, block_size, 1, head_size, dtype=torch.bfloat16)
+            t_deg = _inject_degenerate_stride(t, dim=-2)
+            fixed = canonicalize_singleton_dim_strides(t_deg)
+            assert fixed.stride(-2) == head_size, (
+                f"Failed for block_size={block_size}, head_size={head_size}: "
+                f"got stride(-2)={fixed.stride(-2)}"
+            )
+
+    def test_tma_alignment_satisfied_after_fix_bf16(self):
+        """After fix, all strides meet 16-byte TMA alignment for bf16."""
+        t = torch.zeros(64, 2, 1, 16, 128, dtype=torch.bfloat16)
+        t_deg = _inject_degenerate_stride(t, dim=-3)
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+
+        element_size = fixed.element_size()  # 2 bytes for bf16
+        for i, s in enumerate(fixed.stride()):
+            assert (s * element_size) % 16 == 0 or i == len(fixed.stride()) - 1, (
+                f"dim {i} stride {s} * {element_size} bytes not 16-byte aligned"
+            )
+
+    def test_non_contiguous_outer_dims_preserved(self):
+        """Outer (non-size-1) non-contiguous strides are left unchanged."""
+        # Simulate cross-layer unified allocation: num_blocks stride is non-canonical
+        # but the inner dims should be fixed.
+        base = torch.zeros(200, 2, 1, 16, 128, dtype=torch.bfloat16)
+        # Slice every 2nd block → non-canonical outer stride
+        t_sliced = base[::2]  # shape [100, 2, 1, 16, 128], stride[0] = 2*canonical
+        t_deg = _inject_degenerate_stride(t_sliced, dim=-3)
+
+        fixed = canonicalize_singleton_dim_strides(t_deg)
+
+        # Outer stride should be unchanged (not a size-1 dim)
+        assert fixed.stride(0) == t_sliced.stride(0)
+        # Inner degenerate stride should be fixed
+        assert fixed.stride(-3) == 16 * 128
diff --git a/tests/v1/attention/test_mamba_update_block_table.py b/tests/v1/attention/test_mamba_update_block_table.py
index 923939053ece..99dcb09ab154 100644
--- a/tests/v1/attention/test_mamba_update_block_table.py
+++ b/tests/v1/attention/test_mamba_update_block_table.py
@@ -16,39 +16,60 @@
 
 import torch
 
+from tests.v1.attention.utils import MockMambaBuilder
 from vllm.config.compilation import CUDAGraphMode
-from vllm.v1.attention.backends.mamba_attn import (
-    BaseMambaAttentionMetadata,
-    BaseMambaAttentionMetadataBuilder,
-)
+from vllm.v1.attention.backends.mamba_attn import BaseMambaAttentionMetadata
 from vllm.v1.kv_cache_interface import MambaSpec
 
 
-class _ConcreteMambaBuilder(
-    BaseMambaAttentionMetadataBuilder[BaseMambaAttentionMetadata]
+def _make_vllm_config(
+    max_model_len: int,
+    max_num_seqs: int,
+    num_speculative_tokens: int = 0,
+    block_size: int | None = None,
 ):
-    """Minimal concrete subclass for testing (base class is ABC)."""
-
-    metadata_cls = BaseMambaAttentionMetadata
-
-
-def _make_vllm_config(block_size, max_model_len, max_num_seqs):
     """Create a minimal mock VllmConfig with only the fields the builder
     accesses, avoiding any model download / HF config inspection."""
+    speculative_config = (
+        SimpleNamespace(
+            num_speculative_tokens=num_speculative_tokens,
+            parallel_drafting=False,
+        )
+        if num_speculative_tokens > 0
+        else None
+    )
     return SimpleNamespace(
-        cache_config=SimpleNamespace(mamba_cache_mode="all"),
+        cache_config=SimpleNamespace(
+            block_size=block_size,
+            mamba_cache_mode="all",
+        ),
         compilation_config=SimpleNamespace(
             cudagraph_mode=CUDAGraphMode.FULL,
             max_cudagraph_capture_size=None,
         ),
-        speculative_config=None,
-        num_speculative_tokens=0,
+        speculative_config=speculative_config,
+        num_speculative_tokens=num_speculative_tokens,
         parallel_config=SimpleNamespace(decode_context_parallel_size=1),
         scheduler_config=SimpleNamespace(max_num_seqs=max_num_seqs),
         model_config=SimpleNamespace(max_model_len=max_model_len),
     )
 
 
+def test_mamba_single_token_prompt_runs_as_prefill():
+    seq_lens = [8, 9, 1]
+    config = _make_vllm_config(256, len(seq_lens), block_size=16)
+    metadata = MockMambaBuilder.build_mamba_metadata(
+        config,
+        seq_lens=seq_lens,
+        query_lens=[1] * len(seq_lens),
+        is_prefilling=[False, False, True],
+    )
+
+    assert metadata.num_decodes == 2
+    assert metadata.num_prefills == 1
+    assert metadata.has_initial_states_p.tolist() == [False]
+
+
 def test_update_block_table_copies_block_idx_to_persistent_buffers():
     """update_block_table() must write block_idx tensors to the current
     builder's persistent buffers, not leave them pointing to a different
@@ -59,7 +80,7 @@ def test_update_block_table_copies_block_idx_to_persistent_buffers():
     num_reqs = 4
     device = torch.device("cpu")
 
-    vllm_config = _make_vllm_config(block_size, max_model_len, num_reqs)
+    vllm_config = _make_vllm_config(max_model_len, num_reqs)
 
     spec = MambaSpec(
         block_size=block_size,
@@ -69,8 +90,8 @@ def test_update_block_table_copies_block_idx_to_persistent_buffers():
     )
 
     # Two builders simulating two KV cache groups with the same MambaSpec.
-    builder_a = _ConcreteMambaBuilder(spec, ["layer0"], vllm_config, device)
-    builder_b = _ConcreteMambaBuilder(spec, ["layer1"], vllm_config, device)
+    builder_a = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+    builder_b = MockMambaBuilder(spec, ["layer1"], vllm_config, device)
 
     # Sanity: each builder has its own persistent buffer.
     assert (
@@ -106,6 +127,7 @@ def test_update_block_table_copies_block_idx_to_persistent_buffers():
         block_idx_last_computed_token=(
             builder_a.block_idx_last_computed_token[:num_reqs]
         ),
+        block_idx_last_scheduled_token_prev_step=None,
         seq_lens=seq_lens,
     )
 
@@ -149,3 +171,261 @@ def shares_storage(tensor, buffer):
         metadata_b.block_idx_last_computed_token,
         block_idx_vals,
     )
+
+
+def test_state_indices_tensor_d_includes_num_speculative_blocks():
+    """Regression test for https://github.com/vllm-project/vllm/issues/39809
+    bug 1: with mamba_cache_mode='all' and speculative decoding enabled,
+    the cudagraph buffer for state_indices_tensor_d must allocate the same
+    per-request column count as the runtime block table, which includes
+    num_speculative_blocks trailing scratch columns."""
+
+    block_size = 16
+    max_model_len = 256
+    max_num_seqs = 4
+    num_speculative_tokens = 1
+    num_speculative_blocks = 2
+    device = torch.device("cpu")
+
+    vllm_config = _make_vllm_config(
+        max_model_len,
+        max_num_seqs,
+        num_speculative_tokens=num_speculative_tokens,
+    )
+
+    spec = MambaSpec(
+        block_size=block_size,
+        shapes=((1,), (1,)),
+        dtypes=(torch.float32,),
+        mamba_cache_mode="all",
+        num_speculative_blocks=num_speculative_blocks,
+    )
+
+    builder = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+
+    expected_cols = (max_model_len // block_size) + num_speculative_blocks
+    assert builder.state_indices_tensor_d.shape == (max_num_seqs, expected_cols)
+
+
+def test_block_idx_cudagraph_capture_padded_by_num_reqs():
+    """Regression test for https://github.com/vllm-project/vllm/issues/39809
+    bug 2: with mamba_cache_mode='all' and spec decode, _update_metadata_for
+    _cudagraph_capture must slice block_idx_last_{scheduled,computed}_token
+    by the request count (padded_bs == num_reqs), not by num_decode_tokens.
+    Past num_decodes, the slice must be zero-filled."""
+
+    block_size = 16
+    max_model_len = 256
+    max_num_seqs = 8
+    num_speculative_tokens = 1
+    device = torch.device("cpu")
+
+    vllm_config = _make_vllm_config(
+        max_model_len,
+        max_num_seqs,
+        num_speculative_tokens=num_speculative_tokens,
+    )
+
+    spec = MambaSpec(
+        block_size=block_size,
+        shapes=((1,), (1,)),
+        dtypes=(torch.float32,),
+        mamba_cache_mode="all",
+        num_speculative_blocks=2,
+    )
+
+    builder = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+
+    builder.block_idx_last_scheduled_token.fill_(-1)
+    builder.block_idx_last_computed_token.fill_(-1)
+
+    num_decodes = 2
+    num_reqs = 3
+    num_decode_tokens = num_decodes * (1 + num_speculative_tokens)
+    seq_lens = torch.full((num_reqs,), 64, dtype=torch.int32, device=device)
+    block_idx_vals = torch.tensor([3, 5], dtype=torch.int32, device=device)
+    state_indices_d = torch.zeros(
+        (num_decodes, builder.state_indices_tensor_d.shape[1]),
+        dtype=torch.int32,
+        device=device,
+    )
+    query_start_loc_d = torch.arange(
+        num_decodes + 1, dtype=torch.int32, device=device
+    ) * (1 + num_speculative_tokens)
+    num_accepted_tokens = torch.ones(num_decodes, dtype=torch.int32, device=device)
+
+    metadata = BaseMambaAttentionMetadata(
+        num_prefills=0,
+        num_prefill_tokens=0,
+        num_decodes=num_decodes,
+        num_decode_tokens=num_decode_tokens,
+        num_reqs=num_reqs,
+        has_initial_states_p=None,
+        query_start_loc_p=None,
+        num_computed_tokens_p=None,
+        state_indices_tensor_p=None,
+        state_indices_tensor_d=state_indices_d,
+        query_start_loc_d=query_start_loc_d,
+        num_accepted_tokens=num_accepted_tokens,
+        block_idx_last_scheduled_token=block_idx_vals,
+        block_idx_first_scheduled_token_p=None,
+        block_idx_last_computed_token=block_idx_vals,
+        block_idx_last_scheduled_token_prev_step=None,
+        seq_lens=seq_lens,
+    )
+
+    out = builder._update_metadata_for_cudagraph_capture(metadata)
+
+    assert out.block_idx_last_scheduled_token.shape == (num_reqs,)
+    assert out.block_idx_last_computed_token.shape == (num_reqs,)
+    torch.testing.assert_close(
+        out.block_idx_last_scheduled_token[:num_decodes], block_idx_vals
+    )
+    torch.testing.assert_close(
+        out.block_idx_last_computed_token[:num_decodes], block_idx_vals
+    )
+    assert torch.all(out.block_idx_last_scheduled_token[num_decodes:] == 0)
+    assert torch.all(out.block_idx_last_computed_token[num_decodes:] == 0)
+
+
+def test_block_idx_prev_step_persistent_buffer_allocated():
+    """With mamba_cache_mode='all' + spec decode, the builder must allocate
+    block_idx_last_scheduled_token_prev_step as a persistent buffer with the
+    same shape as the existing block_idx_last_{scheduled,computed}_token
+    buffers, so cudagraph capture records a stable pointer for the prev-step
+    input anchor consumed by mamba_mixer2's input gather."""
+    block_size = 16
+    max_model_len = 256
+    max_num_seqs = 8
+    num_speculative_tokens = 1
+    device = torch.device("cpu")
+
+    vllm_config = _make_vllm_config(
+        max_model_len,
+        max_num_seqs,
+        num_speculative_tokens=num_speculative_tokens,
+    )
+    spec = MambaSpec(
+        block_size=block_size,
+        shapes=((1,), (1,)),
+        dtypes=(torch.float32,),
+        mamba_cache_mode="all",
+        num_speculative_blocks=2,
+    )
+    builder = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+
+    assert hasattr(builder, "block_idx_last_scheduled_token_prev_step")
+    assert builder.block_idx_last_scheduled_token_prev_step.shape == (max_num_seqs,)
+    assert builder.block_idx_last_scheduled_token_prev_step.dtype == torch.int32
+
+
+def test_block_idx_prev_step_persistent_buffer_skipped_without_spec_decode():
+    """Without spec decode, the prev-step buffer is unused and must not be
+    allocated — the input anchor reduces to last_computed_token."""
+    block_size = 16
+    max_model_len = 256
+    max_num_seqs = 8
+    device = torch.device("cpu")
+
+    vllm_config = _make_vllm_config(
+        max_model_len, max_num_seqs, num_speculative_tokens=0
+    )
+    spec = MambaSpec(
+        block_size=block_size,
+        shapes=((1,), (1,)),
+        dtypes=(torch.float32,),
+        mamba_cache_mode="all",
+    )
+    builder = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+
+    assert not hasattr(builder, "block_idx_last_scheduled_token_prev_step")
+
+
+def test_block_idx_prev_step_cudagraph_capture_uses_persistent_buffer():
+    """_update_metadata_for_cudagraph_capture must copy the prev-step anchor
+    into the builder's persistent buffer (so cudagraph replay reads from the
+    same underlying memory), pad past num_decodes with zero, and return a
+    slice of the persistent buffer in the metadata."""
+    block_size = 16
+    max_model_len = 256
+    max_num_seqs = 8
+    num_speculative_tokens = 1
+    device = torch.device("cpu")
+
+    vllm_config = _make_vllm_config(
+        max_model_len,
+        max_num_seqs,
+        num_speculative_tokens=num_speculative_tokens,
+    )
+    spec = MambaSpec(
+        block_size=block_size,
+        shapes=((1,), (1,)),
+        dtypes=(torch.float32,),
+        mamba_cache_mode="all",
+        num_speculative_blocks=2,
+    )
+    builder = MockMambaBuilder(spec, ["layer0"], vllm_config, device)
+    builder.block_idx_last_scheduled_token.fill_(-1)
+    builder.block_idx_last_computed_token.fill_(-1)
+    builder.block_idx_last_scheduled_token_prev_step.fill_(-1)
+
+    num_decodes = 2
+    num_reqs = 3
+    num_decode_tokens = num_decodes * (1 + num_speculative_tokens)
+    seq_lens = torch.full((num_reqs,), 64, dtype=torch.int32, device=device)
+    block_idx_vals = torch.tensor([3, 5], dtype=torch.int32, device=device)
+    prev_step_vals = torch.tensor([2, 4], dtype=torch.int32, device=device)
+    state_indices_d = torch.zeros(
+        (num_decodes, builder.state_indices_tensor_d.shape[1]),
+        dtype=torch.int32,
+        device=device,
+    )
+    query_start_loc_d = torch.arange(
+        num_decodes + 1, dtype=torch.int32, device=device
+    ) * (1 + num_speculative_tokens)
+    num_accepted_tokens = torch.ones(num_decodes, dtype=torch.int32, device=device)
+
+    metadata = BaseMambaAttentionMetadata(
+        num_prefills=0,
+        num_prefill_tokens=0,
+        num_decodes=num_decodes,
+        num_decode_tokens=num_decode_tokens,
+        num_reqs=num_reqs,
+        has_initial_states_p=None,
+        query_start_loc_p=None,
+        num_computed_tokens_p=None,
+        state_indices_tensor_p=None,
+        state_indices_tensor_d=state_indices_d,
+        query_start_loc_d=query_start_loc_d,
+        num_accepted_tokens=num_accepted_tokens,
+        block_idx_last_scheduled_token=block_idx_vals,
+        block_idx_first_scheduled_token_p=None,
+        block_idx_last_computed_token=block_idx_vals,
+        block_idx_last_scheduled_token_prev_step=prev_step_vals,
+        seq_lens=seq_lens,
+    )
+
+    out = builder._update_metadata_for_cudagraph_capture(metadata)
+
+    # Output field exists and is identity-shared with the persistent buffer.
+    assert out.block_idx_last_scheduled_token_prev_step is not None
+    assert (
+        out.block_idx_last_scheduled_token_prev_step.untyped_storage().data_ptr()
+        == builder.block_idx_last_scheduled_token_prev_step.untyped_storage().data_ptr()
+    ), (
+        "prev-step buffer must live in the builder's persistent buffer, not "
+        "in the caller-provided tensor"
+    )
+
+    # Padded by num_reqs (not num_decode_tokens) — same fix as bug 2 for the
+    # other block_idx_* fields.
+    assert out.block_idx_last_scheduled_token_prev_step.shape == (num_reqs,)
+
+    # First num_decodes values: input values copied through.
+    torch.testing.assert_close(
+        out.block_idx_last_scheduled_token_prev_step[:num_decodes],
+        prev_step_vals,
+    )
+
+    # Tail values past num_decodes: zero-filled padding for cudagraph capture.
+    assert torch.all(out.block_idx_last_scheduled_token_prev_step[num_decodes:] == 0)
diff --git a/tests/v1/attention/test_mla_backends.py b/tests/v1/attention/test_mla_backends.py
index 796912a6806f..109e56cb3838 100644
--- a/tests/v1/attention/test_mla_backends.py
+++ b/tests/v1/attention/test_mla_backends.py
@@ -20,16 +20,20 @@
 from vllm import _custom_ops as ops
 from vllm.config.vllm import set_current_vllm_config
 from vllm.model_executor.layers.attention.mla_attention import (
+    MLAAttention,
     QueryLenSupport,
     _DecodeConcatQuantFP8,
 )
-from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
 from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 from vllm.v1.attention.backend import CommonAttentionMetadata
 from vllm.v1.attention.backends.fa_utils import flash_attn_supports_mla
+from vllm.v1.attention.backends.mla.prefill import (
+    MLAPrefillBackendEnum,
+    get_mla_prefill_backend,
+)
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 from vllm.v1.attention.ops.flashmla import is_flashmla_dense_supported
 from vllm.v1.kv_cache_interface import MLAAttentionSpec
@@ -40,12 +44,16 @@
     AttentionBackendEnum.FLASH_ATTN_MLA,
     AttentionBackendEnum.FLASHINFER_MLA,
     AttentionBackendEnum.TRITON_MLA,
+    AttentionBackendEnum.TOKENSPEED_MLA,
 ]
 
+DEVICE_TYPE = current_platform.device_type
+
 # Remove sm100 backends from the list if not using sm100
 if not torch.cuda.is_available() or torch.cuda.get_device_properties(0).major < 10:
     BACKENDS_TO_TEST.remove(AttentionBackendEnum.CUTLASS_MLA)
     BACKENDS_TO_TEST.remove(AttentionBackendEnum.FLASHINFER_MLA)
+    BACKENDS_TO_TEST.remove(AttentionBackendEnum.TOKENSPEED_MLA)
 
 # Remove FLASH_ATTN_MLA from the list if not supported
 if not flash_attn_supports_mla():
@@ -55,6 +63,22 @@
 if not is_flashmla_dense_supported()[0]:
     BACKENDS_TO_TEST.remove(AttentionBackendEnum.FLASHMLA)
 
+# Remove TOKENSPEED_MLA if the optional package is not installed
+if AttentionBackendEnum.TOKENSPEED_MLA in BACKENDS_TO_TEST:
+    try:
+        import tokenspeed_mla  # noqa: F401
+    except ImportError:
+        BACKENDS_TO_TEST.remove(AttentionBackendEnum.TOKENSPEED_MLA)
+
+
+# Filtered per-test via validate_configuration (capability/deps/dims).
+PREFILL_BACKENDS_TO_TEST = [
+    MLAPrefillBackendEnum.FLASH_ATTN,
+    MLAPrefillBackendEnum.FLASHINFER,
+    MLAPrefillBackendEnum.TRTLLM_RAGGED,
+    MLAPrefillBackendEnum.TOKENSPEED_MLA,
+]
+
 
 SPEC_DECODE_BACKENDS = []
 for backend in BACKENDS_TO_TEST:
@@ -386,14 +410,18 @@ def forward_impl(
         return output
 
 
-class MockMLAAttentionLayer(AttentionLayerBase):
+class MockMLAAttentionLayer(MLAAttention):
     """A mock MLA attention layer for testing.
 
     This replicates the forward_impl logic from MLAAttention to allow
     testing MLA backends without the full layer infrastructure.
 
-    The W_UK_T and W_UV weight matrices are created on the layer (like in
-    MLAAttention.process_weights_after_loading), not on the impl.
+    Subclasses MLAAttention so that backends that filter
+    `static_forward_context` by `isinstance(layer, MLAAttention)` (e.g.
+    FlashInfer prefill, which reads sm_scale through that filter) see the
+    mock as a real MLA layer. MLAAttention.__init__ is intentionally
+    skipped — it would create its own impl/prefill_backend and self-register
+    in static_forward_context, which fights what the test sets up below.
     """
 
     def __init__(
@@ -409,6 +437,7 @@ def __init__(
         q_scale: float,
         k_scale: float,
     ):
+        torch.nn.Module.__init__(self)
         self.impl = impl
         self.num_heads = num_heads
         self.qk_nope_head_dim = qk_nope_head_dim
@@ -559,11 +588,15 @@ def run_attention_backend(
     q_scale: float,
     k_scale: float,
     kv_cache_dtype: str = "auto",
+    prefill_backend: MLAPrefillBackendEnum | None = None,
 ) -> torch.Tensor:
     """Run attention computation using the specified backend's AttentionImpl."""
 
     builder_cls, impl_cls = try_get_attention_backend(backend)
 
+    # Force the prefill backend selection (None means auto-select).
+    vllm_config.attention_config.mla_prefill_backend = prefill_backend
+
     # Set the current vllm config so that get_current_vllm_config() works
     # in the backend implementations
     with set_current_vllm_config(vllm_config):
@@ -575,7 +608,11 @@ def run_attention_backend(
             vllm_config.parallel_config
         )
         head_size = vllm_config.model_config.get_head_size()
-        scale = 1.0 / (head_size**0.5)
+        # Production MLA passes 1/sqrt(qk_head_dim) (the prefill scale) to the
+        # impl and forwards the same value to the prefill backend. FLASHINFER
+        # prefill reads sm_scale back from impl.scale via global_hyperparameters
+        # at plan() time, so impl.scale must agree with prefill_backend.scale.
+        scale = (qk_nope_head_dim + qk_rope_head_dim) ** -0.5
         impl = impl_cls(
             num_heads=num_heads,
             head_size=head_size,
@@ -619,6 +656,19 @@ def run_attention_backend(
             k_scale=k_scale,
         )
 
+        # Attach prefill backend (normally created by MLAAttention.__init__)
+        prefill_scale = (qk_nope_head_dim + qk_rope_head_dim) ** -0.5
+        prefill_backend_cls = get_mla_prefill_backend(vllm_config)
+        mock_layer.prefill_backend = prefill_backend_cls(
+            num_heads=num_heads,
+            scale=prefill_scale,
+            kv_lora_rank=kv_lora_rank,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            vllm_config=vllm_config,
+        )
+
         # Populate static_forward_context with mock attention layers
         for layer_name in layer_names:
             vllm_config.compilation_config.static_forward_context[layer_name] = (
@@ -667,15 +717,18 @@ def run_attention_backend(
 @pytest.mark.parametrize("tensor_parallel_size", [1, 4, 8, 16])
 @pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8", "fp8_e4m3"])
 @pytest.mark.parametrize(("q_scale", "k_scale"), [(1.0, 1.0), (2.0, 3.0)])
+@pytest.mark.parametrize("prefill_backend", PREFILL_BACKENDS_TO_TEST)
 def test_backend_correctness(
     default_vllm_config,
     dist_init,
+    workspace_init,
     batch_spec_name: str,
     model: str,
     tensor_parallel_size: int,
     kv_cache_dtype: str,
     q_scale: float,
     k_scale: float,
+    prefill_backend: MLAPrefillBackendEnum,
 ):
     """
     Test that all backends produce similar outputs to a reference implementation
@@ -712,6 +765,24 @@ def test_backend_correctness(
     if not backends_to_test:
         pytest.skip(f"No backends support kv_cache_dtype={kv_cache_dtype}")
 
+    # Skip prefill backends that can't satisfy capability/deps/R1 constraints.
+    from vllm.v1.attention.backends.mla.prefill.selector import (
+        MLAPrefillSelectorConfig,
+    )
+
+    try:
+        prefill_invalid_reasons = prefill_backend.get_class().validate_configuration(
+            current_platform.get_device_capability(),
+            MLAPrefillSelectorConfig(dtype=torch.bfloat16, is_r1_compatible=True),
+        )
+    except ImportError:
+        prefill_invalid_reasons = ["ImportError"]
+    if prefill_invalid_reasons:
+        pytest.skip(
+            f"Prefill backend {prefill_backend.name} unavailable: "
+            f"{prefill_invalid_reasons}"
+        )
+
     batch_spec = BATCH_SPECS[batch_spec_name]
     is_spec_decode_test = batch_spec_name.startswith("spec_decode")
     unique_block_sizes = sorted(set(BACKEND_BLOCK_SIZES[b] for b in backends_to_test))
@@ -763,7 +834,7 @@ def test_backend_correctness(
             method="ngram", num_speculative_tokens=query_len - 1
         )
 
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
 
     # 1. Setup
     batch_size = batch_spec.batch_size
@@ -782,7 +853,13 @@ def test_backend_correctness(
     assert kv_lora_rank + qk_rope_head_dim == head_size, (
         f"MLA dimensions don't match: {total_head_size} != {head_size}"
     )
-    scale = 1.0 / (total_head_size**0.5)
+    qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+    prefill_scale = qk_head_dim**-0.5
+    # MLA reuses prefill_scale for the decode path: production sets
+    # impl.scale = 1/sqrt(qk_head_dim) and the decode kernels apply it even
+    # though the latent attention runs at head_size dimensions. Keeping the
+    # reference here in sync with run_attention_backend's impl.scale.
+    decode_scale = prefill_scale
 
     # 2. Generate data and compute SDPA reference output for MLA
     all_q_vllm, all_kv_c_vllm, all_k_pe_vllm = [], [], []
@@ -899,7 +976,7 @@ def test_backend_correctness(
         v_sdpa_in = v_mqa.unsqueeze(0).transpose(1, 2)
 
         sdpa_out_i_decode = torch.nn.functional.scaled_dot_product_attention(
-            q_sdpa_in, k_sdpa_in, v_sdpa_in, attn_mask=attn_mask, scale=scale
+            q_sdpa_in, k_sdpa_in, v_sdpa_in, attn_mask=attn_mask, scale=decode_scale
         )
         sdpa_out_i_decode = sdpa_out_i_decode.transpose(1, 2).squeeze(
             0
@@ -935,7 +1012,7 @@ def test_backend_correctness(
 
         # Single attention call with custom mask
         sdpa_out_i_prefill = torch.nn.functional.scaled_dot_product_attention(
-            q_sdpa_in, k_sdpa_in, v_sdpa_in, attn_mask=attn_mask, scale=scale
+            q_sdpa_in, k_sdpa_in, v_sdpa_in, attn_mask=attn_mask, scale=prefill_scale
         )
         sdpa_out_i_prefill = sdpa_out_i_prefill.transpose(1, 2).squeeze(0)
         sdpa_out_i_prefill = sdpa_out_i_prefill.flatten(start_dim=-2)
@@ -1073,6 +1150,7 @@ def test_backend_correctness(
             qk_rope_head_dim,
             v_head_dim,
             mock_kv_b_proj,
+            prefill_backend=prefill_backend,
             q_scale=q_scale,
             k_scale=k_scale,
             kv_cache_dtype=kv_cache_dtype,
diff --git a/tests/v1/attention/test_mla_prefill_selector.py b/tests/v1/attention/test_mla_prefill_selector.py
new file mode 100644
index 000000000000..d5c80c80c03e
--- /dev/null
+++ b/tests/v1/attention/test_mla_prefill_selector.py
@@ -0,0 +1,289 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for MLA prefill backend selector."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+
+from vllm.config import AttentionConfig, ModelConfig, VllmConfig
+from vllm.platforms.interface import DeviceCapability
+from vllm.v1.attention.backends.mla.prefill.registry import MLAPrefillBackendEnum
+from vllm.v1.attention.backends.mla.prefill.selector import (
+    MLAPrefillSelectorConfig,
+    _auto_select_mla_prefill_backend,
+    get_mla_prefill_backend,
+    is_deepseek_r1_mla_compatible,
+)
+
+
+@pytest.fixture(autouse=True)
+def clear_cache():
+    """Clear lru cache to ensure each test case runs without caching."""
+    _auto_select_mla_prefill_backend.cache_clear()
+
+
+def _make_mock_model_config(
+    qk_nope_head_dim: int = 128,
+    qk_rope_head_dim: int = 64,
+    v_head_dim: int = 128,
+    dtype: torch.dtype = torch.bfloat16,
+) -> ModelConfig:
+    mock_config = MagicMock(spec=ModelConfig)
+    mock_config.dtype = dtype
+    mock_config.hf_text_config = MagicMock()
+    mock_config.hf_text_config.qk_nope_head_dim = qk_nope_head_dim
+    mock_config.hf_text_config.qk_rope_head_dim = qk_rope_head_dim
+    mock_config.hf_text_config.v_head_dim = v_head_dim
+    return mock_config
+
+
+def _make_vllm_config(
+    model_config: ModelConfig | None = None,
+    mla_prefill_backend: MLAPrefillBackendEnum | None = None,
+) -> VllmConfig:
+    if model_config is None:
+        model_config = _make_mock_model_config()
+
+    attention_config = AttentionConfig(mla_prefill_backend=mla_prefill_backend)
+    mock_vllm_config = MagicMock(spec=VllmConfig)
+    mock_vllm_config.model_config = model_config
+    mock_vllm_config.attention_config = attention_config
+    return mock_vllm_config
+
+
+class TestGetMLAPrefillBackend:
+    """Tests for get_mla_prefill_backend (public API)."""
+
+    def test_no_device_capability_returns_flash_attn(self):
+        vllm_config = _make_vllm_config()
+
+        with patch("vllm.platforms.current_platform") as mock_platform:
+            mock_platform.get_device_capability.return_value = None
+
+            backend = get_mla_prefill_backend(vllm_config)
+            assert backend.get_name() == "FLASH_ATTN"
+
+    def test_explicit_flash_attn_selection(self):
+        try:
+            flash_attn_cls = MLAPrefillBackendEnum.FLASH_ATTN.get_class()
+        except ImportError:
+            pytest.skip("FLASH_ATTN backend not available")
+            return
+
+        vllm_config = _make_vllm_config(
+            mla_prefill_backend=MLAPrefillBackendEnum.FLASH_ATTN,
+        )
+
+        with patch("vllm.platforms.current_platform") as mock_platform:
+            mock_platform.get_device_capability.return_value = DeviceCapability(
+                major=9, minor=0
+            )
+
+            with patch.object(
+                flash_attn_cls,
+                "validate_configuration",
+                return_value=[],
+            ):
+                backend = get_mla_prefill_backend(vllm_config)
+                assert backend.get_name() == "FLASH_ATTN"
+
+    def test_explicit_backend_invalid_raises_error(self):
+        vllm_config = _make_vllm_config(
+            mla_prefill_backend=MLAPrefillBackendEnum.FLASHINFER,
+        )
+
+        with patch("vllm.platforms.current_platform") as mock_platform:
+            mock_platform.get_device_capability.return_value = DeviceCapability(
+                major=9, minor=0
+            )
+
+            with pytest.raises(ValueError, match="is not valid"):
+                get_mla_prefill_backend(vllm_config)
+
+    def test_explicit_backend_import_error_raises(self):
+        vllm_config = _make_vllm_config(
+            mla_prefill_backend=MLAPrefillBackendEnum.TRTLLM_RAGGED,
+        )
+
+        with patch("vllm.platforms.current_platform") as mock_platform:
+            mock_platform.get_device_capability.return_value = DeviceCapability(
+                major=10, minor=0
+            )
+
+            with (
+                patch.object(
+                    MLAPrefillBackendEnum.TRTLLM_RAGGED,
+                    "get_class",
+                    side_effect=ImportError("trtllm not installed"),
+                ),
+                pytest.raises(ValueError, match="is not valid"),
+            ):
+                get_mla_prefill_backend(vllm_config)
+
+    def test_auto_selection_on_hopper(self):
+        try:
+            flash_attn_cls = MLAPrefillBackendEnum.FLASH_ATTN.get_class()
+        except ImportError:
+            pytest.skip("FLASH_ATTN backend not available")
+            return
+
+        vllm_config = _make_vllm_config()
+
+        with patch("vllm.platforms.current_platform") as mock_platform:
+            mock_platform.get_device_capability.return_value = DeviceCapability(
+                major=9, minor=0
+            )
+
+            with patch.object(
+                flash_attn_cls,
+                "validate_configuration",
+                return_value=[],
+            ):
+                backend = get_mla_prefill_backend(vllm_config)
+                assert backend.get_name() == "FLASH_ATTN"
+
+
+class TestAutoSelectMLAPrefillBackend:
+    """Tests for fallback and error paths in auto-selection."""
+
+    def test_blackwell_falls_back_to_trtllm(self):
+        vllm_config = _make_vllm_config()
+        capability = DeviceCapability(major=10, minor=0)
+        selector_config = MLAPrefillSelectorConfig(
+            dtype=torch.bfloat16,
+            is_r1_compatible=is_deepseek_r1_mla_compatible(vllm_config),
+        )
+
+        try:
+            trtllm_cls = MLAPrefillBackendEnum.TRTLLM_RAGGED.get_class()
+        except ImportError:
+            pytest.skip("TRTLLM_RAGGED backend not available")
+            return
+
+        with (
+            patch.object(
+                MLAPrefillBackendEnum.FLASH_ATTN,
+                "get_class",
+                side_effect=ImportError("FLASH_ATTN not available"),
+            ),
+            patch.object(trtllm_cls, "validate_configuration", return_value=[]),
+        ):
+            backend = _auto_select_mla_prefill_backend(
+                capability,
+                selector_config,
+            )
+            assert backend.get_name() == "TRTLLM_RAGGED"
+
+    def test_all_fail_raises_error(self):
+        vllm_config = _make_vllm_config()
+        capability = DeviceCapability(major=10, minor=0)
+        selector_config = MLAPrefillSelectorConfig(
+            dtype=torch.bfloat16,
+            is_r1_compatible=is_deepseek_r1_mla_compatible(vllm_config),
+        )
+
+        def mock_get_class(backend_enum):  # noqa: ARG001
+            cls = MagicMock()
+            cls.validate_configuration.return_value = ["not available"]
+            return cls
+
+        with patch.object(MLAPrefillBackendEnum, "get_class", mock_get_class):
+            _auto_select_mla_prefill_backend.cache_clear()
+            with pytest.raises(ValueError, match="No valid MLA"):
+                _auto_select_mla_prefill_backend(
+                    capability,
+                    selector_config,
+                )
+
+
+class TestBackendValidation:
+    """Tests for backend validation logic."""
+
+    def test_r1_dimension_requirement(self):
+        try:
+            from vllm.v1.attention.backends.mla.prefill.flashinfer import (
+                FlashInferPrefillBackend,
+            )
+        except ImportError:
+            pytest.skip("FlashInfer prefill backend not available")
+            return
+
+        assert FlashInferPrefillBackend.requires_r1_mla_dimensions is True
+
+        vllm_config = _make_vllm_config(
+            model_config=_make_mock_model_config(
+                qk_nope_head_dim=128,
+                qk_rope_head_dim=64,
+                v_head_dim=128,
+            )
+        )
+        capability = DeviceCapability(major=10, minor=0)
+        selector_config = MLAPrefillSelectorConfig(
+            dtype=torch.bfloat16,
+            is_r1_compatible=is_deepseek_r1_mla_compatible(vllm_config),
+        )
+
+        with patch.object(FlashInferPrefillBackend, "is_available", return_value=True):
+            invalid_reasons = FlashInferPrefillBackend.validate_configuration(
+                capability,
+                selector_config,
+            )
+            assert len(invalid_reasons) == 0
+
+        vllm_config_invalid = _make_vllm_config(
+            model_config=_make_mock_model_config(
+                qk_nope_head_dim=64,
+                qk_rope_head_dim=64,
+                v_head_dim=128,
+            )
+        )
+        selector_config_invalid = MLAPrefillSelectorConfig(
+            dtype=torch.bfloat16,
+            is_r1_compatible=is_deepseek_r1_mla_compatible(vllm_config_invalid),
+        )
+
+        with patch.object(FlashInferPrefillBackend, "is_available", return_value=True):
+            invalid_reasons = FlashInferPrefillBackend.validate_configuration(
+                capability,
+                selector_config_invalid,
+            )
+            assert len(invalid_reasons) == 1
+            assert "DeepSeek R1 MLA dimensions" in invalid_reasons[0]
+
+
+class TestMLAPrefillBackendParsing:
+    """Tests for string-based mla_prefill_backend parsing from CLI args."""
+
+    def test_valid_string_parses_to_enum(self):
+        config = AttentionConfig(
+            mla_prefill_backend="FLASH_ATTN",  # type: ignore[arg-type]
+        )
+        assert config.mla_prefill_backend == MLAPrefillBackendEnum.FLASH_ATTN
+
+    def test_invalid_string_raises_error(self):
+        with pytest.raises(ValueError, match="Unknown MLA prefill backend"):
+            AttentionConfig(
+                mla_prefill_backend="NONEXISTENT",  # type: ignore[arg-type]
+            )
+
+
+class TestMLAPrefillBackendConfig:
+    """Tests for mla_prefill_backend configuration in AttentionConfig."""
+
+    def test_default_backend_is_none(self):
+        config = AttentionConfig()
+        assert config.mla_prefill_backend is None
+
+    def test_explicit_flash_attn_backend(self):
+        config = AttentionConfig(
+            mla_prefill_backend=MLAPrefillBackendEnum.FLASH_ATTN,
+        )
+        assert config.mla_prefill_backend == MLAPrefillBackendEnum.FLASH_ATTN
+
+    def test_explicit_trtllm_ragged_backend(self):
+        config = AttentionConfig(
+            mla_prefill_backend=MLAPrefillBackendEnum.TRTLLM_RAGGED,
+        )
+        assert config.mla_prefill_backend == MLAPrefillBackendEnum.TRTLLM_RAGGED
diff --git a/tests/v1/attention/test_sparse_mla_backends.py b/tests/v1/attention/test_sparse_mla_backends.py
index 3f6faf51de6d..22acc748d24b 100644
--- a/tests/v1/attention/test_sparse_mla_backends.py
+++ b/tests/v1/attention/test_sparse_mla_backends.py
@@ -42,6 +42,7 @@
     FlashMLASparseBackend,
     triton_convert_req_index_to_global_index,
 )
+from vllm.v1.attention.backends.mla.indexer import split_indexer_prefill_chunks
 from vllm.v1.attention.backends.utils import split_prefill_chunks
 from vllm.v1.attention.ops import flashmla
 
@@ -63,6 +64,8 @@
     seq_lens=[256] * 2, query_lens=[256] * 2
 )
 
+DEVICE_TYPE = current_platform.device_type
+
 
 def _float_to_e8m0_truncate(f: float) -> float:
     """Simulate SM100's float -> e8m0 -> bf16 scale conversion.
@@ -221,7 +224,7 @@ def test_sparse_backend_decode_correctness(
     batch_spec = SPARSE_BACKEND_BATCH_SPECS[batch_name]
     use_fp8_ds_mla_quantization = kv_cache_dtype == "fp8_ds_mla"
 
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.bfloat16
 
     # Model hyper-parameters (kept intentionally small for the unit test)
@@ -585,7 +588,7 @@ def _triton_convert_reference_impl(
 def test_triton_convert_req_index_to_global_index_decode_only(
     block_size, num_topk_tokens
 ):
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     num_tokens = 8
     num_requests = 4
     max_blocks_per_req = 10
@@ -638,7 +641,7 @@ def test_triton_convert_req_index_to_global_index_decode_only(
     reason="FlashMLASparseBackend requires CUDA 9.0 or higher",
 )
 def test_triton_convert_req_index_to_global_index_with_prefill_workspace(block_size):
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     num_requests = 4
     max_blocks_per_req = 8
     num_topk_tokens = 128
@@ -716,9 +719,84 @@ def test_split_prefill_chunks(seq_lens, max_buf, expected):
     assert out == expected
 
 
+@pytest.mark.parametrize(
+    "seq_lens,query_lens,workspace_size,max_logits_bytes,expected",
+    [
+        # Logits constraint triggers split (M*N exceeds budget)
+        # req0: M=10, N=100 -> 1000 elems (4000 bytes) - fits in 5000
+        # req1: adding M=10, N=100 -> new_M=20, new_N=200 -> 4000 elems > 1250
+        (
+            torch.tensor([100, 100, 100]),
+            torch.tensor([10, 10, 10]),
+            1000,  # workspace allows all
+            5000,  # 1250 float32 elems -> forces split
+            [
+                (slice(0, 1), slice(0, 10)),
+                (slice(1, 2), slice(0, 10)),
+                (slice(2, 3), slice(0, 10)),
+            ],
+        ),
+        # Both constraints satisfied - all fit in one chunk
+        (
+            torch.tensor([10, 10, 10]),
+            torch.tensor([5, 5, 5]),
+            100,
+            10000,  # 2500 elems, M*N = 15*30 = 450 < 2500
+            [(slice(0, 3), slice(0, 15))],
+        ),
+        # Workspace constraint triggers first
+        (
+            torch.tensor([50, 50, 50]),
+            torch.tensor([1, 1, 1]),
+            50,  # workspace only fits one at a time
+            1000000,  # logits budget is huge
+            [
+                (slice(0, 1), slice(0, 1)),
+                (slice(1, 2), slice(0, 1)),
+                (slice(2, 3), slice(0, 1)),
+            ],
+        ),
+        # Greedy filling: first two fit, third doesn't
+        # req0: M=5, N=10 -> 50 elems
+        # req0+1: M=10, N=20 -> 200 elems <= 250
+        # req0+1+2: M=15, N=30 -> 450 elems > 250
+        (
+            torch.tensor([10, 10, 10]),
+            torch.tensor([5, 5, 5]),
+            100,
+            1000,  # 250 elems
+            [(slice(0, 2), slice(0, 10)), (slice(2, 3), slice(0, 5))],
+        ),
+    ],
+)
+def test_split_indexer_prefill_chunks(
+    seq_lens, query_lens, workspace_size, max_logits_bytes, expected
+):
+    out = split_indexer_prefill_chunks(
+        seq_lens,
+        query_lens,
+        workspace_size,
+        max_logits_bytes,
+    )
+    assert out == expected
+
+
+def test_split_indexer_prefill_chunks_single_request_overflow():
+    """Test that single request exceeding budget is sub-chunked on query dim."""
+    seq_lens = torch.tensor([1000, 50])
+    query_lens = torch.tensor([100, 5])
+
+    out = split_indexer_prefill_chunks(seq_lens, query_lens, 2000, 1000)
+    # max_logits_elems = 250, N=1000 -> max_q = 1 -> 100 query sub-chunks
+    expected = [(slice(0, 1), slice(i, i + 1)) for i in range(100)]
+    # req1: M=5, N=50 -> 250 elems fits budget
+    expected.append((slice(1, 2), slice(0, 5)))
+    assert out == expected
+
+
 def test_triton_convert_returns_valid_counts():
     """Test that return_valid_counts correctly counts non-negative indices."""
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     num_tokens = 8
     num_requests = 2
     max_blocks_per_req = 10
diff --git a/tests/v1/attention/test_trtllm_attention_integration.py b/tests/v1/attention/test_trtllm_attention_integration.py
index 113442bf6e4b..06c5844508f4 100644
--- a/tests/v1/attention/test_trtllm_attention_integration.py
+++ b/tests/v1/attention/test_trtllm_attention_integration.py
@@ -17,13 +17,13 @@
 from vllm.config import set_current_vllm_config
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
-from vllm.utils.torch_utils import set_random_seed
+from vllm.utils.torch_utils import nvfp4_kv_cache_full_dim, set_random_seed
 from vllm.v1.attention.backends.utils import (
     PerLayerParameters,
     get_kv_cache_layout,
     set_kv_cache_layout,
 )
-from vllm.v1.kv_cache_interface import FullAttentionSpec
+from vllm.v1.kv_cache_interface import FullAttentionSpec, KVQuantMode
 
 if not current_platform.is_device_capability_family(100):
     pytest.skip(
@@ -53,8 +53,10 @@ def __init__(self, device: torch.device):
 
 
 MODEL = "Qwen/Qwen2.5-0.5B"
+MODEL_NVFP4 = "Qwen/Qwen3-4B"  # nvfp4 needs head_dim >= 128 (or 80)
 BLOCK_SIZE = 16
 NUM_GPU_BLOCKS = 8192
+DEVICE_TYPE = current_platform.device_type
 
 BATCH_SPECS = {
     "decode_only": BatchSpec(
@@ -168,19 +170,129 @@ def _create_hnd_kv_cache(
     return kv_cache
 
 
-def _run_trtllm_integration(batch_spec):
+def _create_nvfp4_hnd_kv_cache(
+    k_contexts,
+    v_contexts,
+    block_size,
+    num_kv_heads,
+    head_size,
+    dtype,
+    device,
+    num_blocks,
+    common_attn_metadata,
+    kv_scale_val,
+):
+    """Create an nvfp4 KV cache by quantizing bf16 context via
+    reshape_and_cache_flash, using the same block-table layout as
+    _create_hnd_kv_cache.
+
+    The returned tensor is dtype ``uint8`` with shape
+    ``(num_blocks, 2, block_size, num_kv_heads, full_dim)`` in logical
+    (NHD) order, but physically permuted to HND layout via stride order
+    ``(0, 1, 3, 2, 4)`` (i.e. ``num_kv_heads`` before ``block_size``).
+
+    The last dimension ``full_dim = head_size // 2 + head_size // 16``
+    packs two regions contiguously:
+      - **FP4 data** (``head_size // 2`` bytes): pairs of E2M1 values,
+        two per byte.
+      - **FP8 block scales** (``head_size // 16`` bytes): one E4M3
+        scale per 16-element block.
+
+    Dimension 1 indexes K (``[:, 0]``) and V (``[:, 1]``).
+
+    Args:
+        k_contexts: List of key context tensors, one per sequence.
+        v_contexts: List of value context tensors, one per sequence.
+        block_size: Number of tokens per cache block.
+        num_kv_heads: Number of key/value heads.
+        head_size: Head dimension (must be divisible by 16).
+        dtype: Source data type for the bf16 intermediate cache.
+        device: Target device.
+        num_blocks: Total number of blocks to allocate.
+        common_attn_metadata: Metadata containing block tables and
+            sequence lengths.
+        kv_scale_val: Scalar float used as both k_scale and v_scale
+            during quantization.
+
+    Returns:
+        ``torch.Tensor``: The nvfp4 kv_cache tensor (uint8, HND-strided).
+    """
+    # First create a bf16 HND cache so block tables are populated.
+    bf16_cache = _create_hnd_kv_cache(
+        k_contexts,
+        v_contexts,
+        block_size,
+        num_kv_heads,
+        head_size,
+        dtype,
+        device,
+        num_blocks,
+        common_attn_metadata,
+    )
+
+    # Allocate nvfp4 cache: same shape but with full_dim (data + scale).
+    full_dim = nvfp4_kv_cache_full_dim(head_size)
+    hnd_order = (0, 1, 3, 2, 4)
+    nvfp4_cache = torch.zeros(
+        (num_blocks, 2, num_kv_heads, block_size, full_dim),
+        dtype=torch.uint8,
+        device=device,
+    ).permute(*hnd_order)
+
+    # Flatten bf16 context into tokens and quantize via reshape_and_cache_flash.
+    # bf16_cache is (num_blocks, 2, block_size, num_kv_heads, head_size) logical
+    # with HND physical strides.
+    block_table = common_attn_metadata.block_table_tensor
+    seq_lens = common_attn_metadata.seq_lens.cpu()
+    query_lens = (
+        common_attn_metadata.query_start_loc_cpu[1:]
+        - common_attn_metadata.query_start_loc_cpu[:-1]
+    )
+    kv_scale_t = torch.tensor(kv_scale_val, dtype=torch.float32, device=device)
+
+    for i in range(len(k_contexts)):
+        ctx_len = int(seq_lens[i]) - int(query_lens[i])
+        if ctx_len == 0:
+            continue
+        # Gather context tokens from the bf16 cache using block table.
+        n_ctx_blocks = (ctx_len + block_size - 1) // block_size
+        blocks = block_table[i, :n_ctx_blocks]
+        # bf16_cache[:, kv_idx] is (num_blocks, block_size, num_kv_heads, head_size)
+        k_ctx = bf16_cache[blocks, 0].reshape(-1, num_kv_heads, head_size)[:ctx_len]
+        v_ctx = bf16_cache[blocks, 1].reshape(-1, num_kv_heads, head_size)[:ctx_len]
+        # Build slot mapping for these context tokens.
+        token_offsets = torch.arange(ctx_len, device=device)
+        block_indices = token_offsets // block_size
+        intra_offsets = token_offsets % block_size
+        slots = block_table[i, block_indices] * block_size + intra_offsets
+        torch.ops._C_cache_ops.reshape_and_cache_flash(
+            k_ctx,
+            v_ctx,
+            nvfp4_cache[:, 0],
+            nvfp4_cache[:, 1],
+            slots,
+            "nvfp4",
+            kv_scale_t,
+            kv_scale_t,
+        )
+
+    return nvfp4_cache
+
+
+def _run_trtllm_integration(batch_spec, kv_cache_dtype="auto", model_name=MODEL):
     """Run TRTLLM attention through the full FlashInfer pipeline
     and compare against an SDPA reference."""
     set_random_seed(42)
-    device = torch.device("cuda:0")
+    device = torch.device(f"{DEVICE_TYPE}:0")
 
     vllm_config = create_vllm_config(
-        model_name=MODEL,
+        model_name=model_name,
         max_model_len=max(batch_spec.seq_lens),
         block_size=BLOCK_SIZE,
         num_gpu_blocks=NUM_GPU_BLOCKS,
     )
     vllm_config.attention_config.use_trtllm_attention = True
+    vllm_config.cache_config.cache_dtype = kv_cache_dtype
 
     num_q_heads = vllm_config.model_config.get_num_attention_heads(
         vllm_config.parallel_config
@@ -247,28 +359,51 @@ def causal_mask_mod(b, h, q_idx, kv_idx, *, context_len):
     common_attn_metadata = create_common_attn_metadata(batch_spec, BLOCK_SIZE, device)
 
     # 2. Create HND KV cache
-    kv_cache = _create_hnd_kv_cache(
-        k_contexts,
-        v_contexts,
-        BLOCK_SIZE,
-        num_kv_heads,
-        head_size,
-        dtype,
-        device,
-        NUM_GPU_BLOCKS,
-        common_attn_metadata,
-    )
+    is_nvfp4 = kv_cache_dtype == "nvfp4"
+    if is_nvfp4:
+        # Compute a global scale from the context data.
+        all_ctx = torch.cat(k_contexts + v_contexts, dim=0)
+        kv_scale_val = (all_ctx.abs().amax() / 448.0).item()
+        kv_cache = _create_nvfp4_hnd_kv_cache(
+            k_contexts,
+            v_contexts,
+            BLOCK_SIZE,
+            num_kv_heads,
+            head_size,
+            dtype,
+            device,
+            NUM_GPU_BLOCKS,
+            common_attn_metadata,
+            kv_scale_val,
+        )
+    else:
+        kv_scale_val = 1.0
+        kv_cache = _create_hnd_kv_cache(
+            k_contexts,
+            v_contexts,
+            BLOCK_SIZE,
+            num_kv_heads,
+            head_size,
+            dtype,
+            device,
+            NUM_GPU_BLOCKS,
+            common_attn_metadata,
+        )
 
     # 3. Run through FlashInfer with TRTLLM enabled
     set_kv_cache_layout("HND")
     get_kv_cache_layout.cache_clear()
 
     try:
+        is_nvfp4 = kv_cache_dtype == "nvfp4"
+        kv_quant_mode = KVQuantMode.NVFP4 if is_nvfp4 else KVQuantMode.NONE
+        spec_dtype = torch.uint8 if is_nvfp4 else dtype
         kv_cache_spec = FullAttentionSpec(
             block_size=BLOCK_SIZE,
             num_kv_heads=num_kv_heads,
             head_size=head_size,
-            dtype=dtype,
+            dtype=spec_dtype,
+            kv_quant_mode=kv_quant_mode,
         )
         layer_names = ["test_layer_0"]
 
@@ -311,10 +446,20 @@ def causal_mask_mod(b, h, q_idx, kv_idx, *, context_len):
                 num_kv_heads=num_kv_heads,
                 alibi_slopes=None,
                 sliding_window=None,
-                kv_cache_dtype="auto",
+                kv_cache_dtype=kv_cache_dtype,
             )
 
             mock_layer = MockAttentionLayer(device)
+            if is_nvfp4:
+                # For nvfp4, k_scale/v_scale are the global quantization
+                # scales (amax/448) used by reshape_and_cache_flash.
+                kv_scale_t = torch.tensor(
+                    kv_scale_val, dtype=torch.float32, device=device
+                )
+                mock_layer._k_scale = kv_scale_t
+                mock_layer._v_scale = kv_scale_t
+                mock_layer._k_scale_float = kv_scale_val
+                mock_layer._v_scale_float = kv_scale_val
             output = torch.empty_like(query_vllm)
 
             impl.do_kv_cache_update(
@@ -325,6 +470,23 @@ def causal_mask_mod(b, h, q_idx, kv_idx, *, context_len):
                 attn_metadata.slot_mapping,
             )
 
+            # nvfp4 trtllm kernel requires FP8 queries. In the real
+            # pipeline the attention layer handles this; here we
+            # quantize manually.
+            if is_nvfp4:
+                finfo = torch.finfo(torch.float8_e4m3fn)
+                q_amax = query_vllm.abs().amax().clamp(min=1e-12)
+                q_s = (finfo.max / q_amax * 0.1).item()
+                query_vllm = (
+                    (query_vllm * q_s)
+                    .clamp(finfo.min, finfo.max)
+                    .to(torch.float8_e4m3fn)
+                )
+                mock_layer._q_scale = torch.tensor(
+                    1.0 / q_s, dtype=torch.float32, device=device
+                )
+                mock_layer._q_scale_float = 1.0 / q_s
+
             output = impl.forward(
                 mock_layer,
                 query_vllm,
@@ -336,12 +498,11 @@ def causal_mask_mod(b, h, q_idx, kv_idx, *, context_len):
             )
 
         # 4. Compare against SDPA reference
-        torch.testing.assert_close(
-            output,
-            sdpa_output,
-            atol=1e-2,
-            rtol=1e-2,
-        )
+        if is_nvfp4:
+            atol, rtol = 1.0, 1.0  # nvfp4 has higher quantization error
+        else:
+            atol, rtol = 1e-2, 1e-2
+        torch.testing.assert_close(output, sdpa_output, atol=atol, rtol=rtol)
 
     finally:
         set_kv_cache_layout(None)
@@ -358,3 +519,18 @@ def test_trtllm_gen_full_attention_integration(batch_spec_name: str):
     MetadataBuilder.build() -> FlashInferImpl.forward() pipeline,
     with real TRTLLM kernels on Blackwell."""
     _run_trtllm_integration(BATCH_SPECS[batch_spec_name])
+
+
+@pytest.mark.parametrize(
+    "batch_spec_name",
+    list(BATCH_SPECS.keys()),
+)
+@torch.inference_mode()
+def test_trtllm_gen_nvfp4_kv_integration(batch_spec_name: str):
+    """Test TRTLLM attention with nvfp4 KV cache through the full
+    FlashInfer MetadataBuilder.build() -> FlashInferImpl.forward() pipeline."""
+    _run_trtllm_integration(
+        BATCH_SPECS[batch_spec_name],
+        kv_cache_dtype="nvfp4",
+        model_name=MODEL_NVFP4,
+    )
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
index 91decf6658a5..3daafcdc6ff8 100644
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -21,10 +21,19 @@
 from vllm.v1.attention.backend import (
     AttentionImpl,
     AttentionMetadataBuilder,
+    AttentionType,
     CommonAttentionMetadata,
 )
+from vllm.v1.attention.backends.mamba_attn import (
+    BaseMambaAttentionMetadata,
+    BaseMambaAttentionMetadataBuilder,
+)
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
-from vllm.v1.kv_cache_interface import FullAttentionSpec
+from vllm.v1.kv_cache_interface import (
+    EncoderOnlyAttentionSpec,
+    FullAttentionSpec,
+    MambaSpec,
+)
 
 
 @dataclass
@@ -106,6 +115,7 @@ def create_common_attn_metadata(
         query_start_loc=query_start_loc,
         query_start_loc_cpu=query_start_loc_cpu,
         seq_lens=seq_lens,
+        seq_lens_cpu_upper_bound=seq_lens_cpu,
         _seq_lens_cpu=seq_lens_cpu,
         _num_computed_tokens_cpu=num_computed_tokens_cpu,
         num_reqs=batch_spec.batch_size,
@@ -142,8 +152,24 @@ def try_backend_includes_kv_cache_update(
         raise AssertionError("unreachable") from None
 
 
-def create_standard_kv_cache_spec(vllm_config: VllmConfig) -> FullAttentionSpec:
-    """Create a FullAttentionSpec from ModelParams only."""
+def create_standard_kv_cache_spec(
+    vllm_config: VllmConfig,
+    attn_type: AttentionType = AttentionType.DECODER,
+) -> FullAttentionSpec | EncoderOnlyAttentionSpec:
+    """Create an AttentionSpec from VllmConfig.
+
+    Returns an EncoderOnlyAttentionSpec for encoder-only attention (no KV
+    cache), and a FullAttentionSpec otherwise.
+    """
+    if attn_type == AttentionType.ENCODER_ONLY:
+        return EncoderOnlyAttentionSpec(
+            block_size=vllm_config.cache_config.block_size,
+            num_kv_heads=vllm_config.model_config.get_num_kv_heads(
+                vllm_config.parallel_config
+            ),
+            head_size=vllm_config.model_config.get_head_size(),
+            dtype=vllm_config.model_config.dtype,
+        )
     return FullAttentionSpec(
         block_size=vllm_config.cache_config.block_size,
         num_kv_heads=vllm_config.model_config.get_num_kv_heads(
@@ -358,3 +384,34 @@ class BackendConfig:
         },
     ),
 }
+
+
+class MockMambaBuilder(BaseMambaAttentionMetadataBuilder[BaseMambaAttentionMetadata]):
+    """Minimal concrete subclass for testing (base class is ABC)."""
+
+    metadata_cls = BaseMambaAttentionMetadata
+
+    @classmethod
+    def build_mamba_metadata(
+        cls,
+        vllm_config: VllmConfig,
+        seq_lens: list[int],
+        query_lens: list[int],
+        is_prefilling: list[bool],
+        *,
+        device: torch.device | None = None,
+    ) -> BaseMambaAttentionMetadata:
+        block_size = vllm_config.cache_config.block_size
+        device = device or torch.device("cpu")
+        mamba_spec = MambaSpec(
+            block_size=block_size, shapes=((1,), (1,)), dtypes=(torch.float32,)
+        )
+        builder = cls(mamba_spec, ["layer0"], vllm_config, device)
+        batch_spec = BatchSpec(seq_lens=seq_lens, query_lens=query_lens)
+        common_metadata = create_common_attn_metadata(
+            batch_spec, block_size=block_size, device=device, arange_block_indices=True
+        )
+        common_metadata = common_metadata.replace(
+            is_prefilling=torch.tensor(is_prefilling, dtype=torch.bool)
+        )
+        return builder.build(0, common_metadata)
diff --git a/tests/v1/core/test_async_scheduler.py b/tests/v1/core/test_async_scheduler.py
index a77ae81bae56..a77a50173f3f 100644
--- a/tests/v1/core/test_async_scheduler.py
+++ b/tests/v1/core/test_async_scheduler.py
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections import deque
+from unittest.mock import Mock
 
 import pytest
 
-from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.core.sched.async_scheduler import AsyncScheduler
+from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput
 from vllm.v1.outputs import ModelRunnerOutput
 from vllm.v1.request import RequestStatus
 from vllm.v1.utils import ConstantList
@@ -151,7 +153,6 @@ def test_prefix_caching_for_prefill_dedup():
         same_prompt=True,
         block_size=BLOCK_SIZE,
     )
-    requests_copy = requests.copy()
 
     # Two requests with the same prompt.
     req0 = requests.pop(0)
@@ -165,26 +166,31 @@ def test_prefix_caching_for_prefill_dedup():
     # Make sure prefix caching de-duplicates the prompts in the same step,
     # so all the blocks except the last are shared between the two requests.
     assert len(sched_output.num_scheduled_tokens) == 2
-    num_blocks = num_prompt_tokens // BLOCK_SIZE
-    assert req0.num_cached_tokens == 0
-    assert req1.num_cached_tokens >= num_blocks * BLOCK_SIZE
+    assert sched_output.num_scheduled_tokens[req0.request_id] == num_prompt_tokens
+    assert (
+        sched_output.num_scheduled_tokens[req1.request_id]
+        == num_prompt_tokens % BLOCK_SIZE
+    )
 
     sched_outputs.append(scheduler.schedule())
     while sched_outputs:
+        added_req = None
         if requests:
-            scheduler.add_request(requests.pop(0))
+            added_req = requests.pop(0)
+            scheduler.add_request(added_req)
         sched_output = sched_outputs.popleft()
         model_runner_output = _make_model_runner_output(sched_output)
         scheduler.update_from_output(sched_output, model_runner_output)
         sched_output = scheduler.schedule()
         if sched_output.num_scheduled_tokens:
             sched_outputs.append(sched_output)
+            if added_req:
+                assert (
+                    sched_output.num_scheduled_tokens[added_req.request_id]
+                    == num_prompt_tokens % BLOCK_SIZE
+                )
 
-    # Other requests scheduled after the two requests should also get
-    # prefix cache hit.
     assert scheduler.get_num_unfinished_requests() == 0
-    for req in requests_copy[1:]:
-        assert req.num_cached_tokens >= num_blocks * BLOCK_SIZE
 
 
 def test_prefix_caching_for_multi_turn():
@@ -241,9 +247,76 @@ def test_prefix_caching_for_multi_turn():
     # Schedule the next-turn requests.
     for req in next_turn_requests:
         scheduler.add_request(req)
-    sched_outputs.append(scheduler.schedule())
+    sched_output = scheduler.schedule()
+    sched_outputs.append(sched_output)
 
     # Make sure the next-turn requests get prefix cache hit by the previous
     # requests.
     for req in next_turn_requests:
-        assert req.num_cached_tokens == req.num_prompt_tokens // BLOCK_SIZE * BLOCK_SIZE
+        assert sched_output.num_scheduled_tokens[req.request_id] == (
+            req.num_prompt_tokens % BLOCK_SIZE
+        )
+
+
+def test_abort_request_when_structured_output_fsm_cannot_advance():
+    scheduler = object.__new__(AsyncScheduler)
+    request = create_requests(num_requests=1, num_tokens=1)[0]
+    request.structured_output_request = Mock()
+    request.structured_output_request.grammar = Mock()
+    request.structured_output_request.grammar.accept_tokens.return_value = False
+    request.status = RequestStatus.RUNNING
+    request.num_computed_tokens = request.num_tokens
+    request.num_output_placeholders = 1
+
+    scheduler.perf_metrics = None
+    scheduler.connector = None
+    scheduler.structured_output_manager = Mock()
+    scheduler.structured_output_manager.should_advance.return_value = True
+    scheduler.requests = {request.request_id: request}
+    scheduler.running = [request]
+    scheduler.waiting = Mock()
+    scheduler.kv_cache_manager = Mock()
+    scheduler.kv_cache_manager.take_events.return_value = None
+    scheduler.kv_event_publisher = Mock()
+    scheduler.finished_req_ids = set()
+    scheduler.finished_req_ids_dict = None
+    scheduler.vllm_config = Mock()
+    scheduler.vllm_config.model_config.enable_return_routed_experts = False
+    scheduler.enable_return_routed_experts = False
+    scheduler.recompute_kv_load_failures = False
+    scheduler.make_stats = Mock(return_value=None)
+    scheduler.max_model_len = 128
+
+    def free_request(req, delay_free_blocks=False):
+        scheduler.finished_req_ids.add(req.request_id)
+        scheduler.requests.pop(req.request_id, None)
+        return None
+
+    scheduler._free_request = Mock(side_effect=free_request)
+
+    output = SchedulerOutput(
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=CachedRequestData.make_empty(),
+        num_scheduled_tokens={request.request_id: 1},
+        total_num_scheduled_tokens=1,
+        scheduled_encoder_inputs={},
+        scheduled_spec_decode_tokens={},
+        num_common_prefix_blocks=[],
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+    )
+    model_runner_output = ModelRunnerOutput(
+        req_ids=[request.request_id],
+        req_id_to_index={request.request_id: 0},
+        sampled_token_ids=[[123]],
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+
+    scheduler.update_from_output(output, model_runner_output)
+
+    assert request.resumable is False
+    assert request.status == RequestStatus.FINISHED_ERROR
+    assert request.request_id not in scheduler.requests
+    assert not scheduler.running
diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py
index d8ecf28cbed1..94e9f6f4c100 100644
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@@ -10,6 +10,7 @@
 
 import vllm.v1.core.kv_cache_utils as kv_cache_utils
 from vllm.config import ModelConfig, SchedulerConfig, VllmConfig
+from vllm.config.kv_events import KVEventsConfig
 from vllm.lora.request import LoRARequest
 from vllm.multimodal.inputs import (
     MultiModalFeatureSpec,
@@ -42,11 +43,16 @@
     KVCacheConfig,
     KVCacheGroupSpec,
     KVCacheSpec,
+    KVCacheSpecKind,
     KVCacheTensor,
     MambaSpec,
     MLAAttentionSpec,
+    SinkFullAttentionSpec,
+    SlidingWindowMLASpec,
     SlidingWindowSpec,
     UniformTypeKVCacheSpecs,
+    get_kv_cache_spec_kind,
+    get_kv_cache_spec_sliding_window,
 )
 from vllm.v1.metrics.stats import CachingMetrics, PrefixCacheStats
 from vllm.v1.request import Request
@@ -1854,15 +1860,159 @@ def test_generate_scheduler_kv_cache_config():
 
 
 def new_mla_spec(cache_dtype_str=None):
+    # head_size = kv_lora_rank(512) + qk_rope_head_dim(64) = 576
     return MLAAttentionSpec(
         block_size=16,
-        num_kv_heads=16,
-        head_size=64,
+        num_kv_heads=1,
+        head_size=576,
         dtype=torch.float32,
         cache_dtype_str=cache_dtype_str,
     )
 
 
+def test_get_kv_cache_spec_kind_prefers_specific_attention_subclasses():
+    assert get_kv_cache_spec_kind(new_mla_spec()) == KVCacheSpecKind.MLA_ATTENTION
+
+    sliding_window_mla_spec = SlidingWindowMLASpec(
+        block_size=16,
+        num_kv_heads=1,
+        head_size=576,
+        dtype=torch.float32,
+        sliding_window=128,
+    )
+    assert (
+        get_kv_cache_spec_kind(sliding_window_mla_spec)
+        == KVCacheSpecKind.SLIDING_WINDOW_MLA
+    )
+
+    sink_full_attention_spec = SinkFullAttentionSpec(
+        block_size=16,
+        num_kv_heads=1,
+        head_size=64,
+        dtype=torch.float32,
+        sink_len=4,
+    )
+    assert (
+        get_kv_cache_spec_kind(sink_full_attention_spec)
+        == KVCacheSpecKind.SINK_FULL_ATTENTION
+    )
+
+
+def test_get_kv_cache_spec_kind_unwraps_uniform_type_specs():
+    uniform_mla_spec = UniformTypeKVCacheSpecs(
+        block_size=16,
+        kv_cache_specs={
+            "layer_1": new_mla_spec(),
+            "layer_2": new_mla_spec(cache_dtype_str="fp8"),
+        },
+    )
+    assert get_kv_cache_spec_kind(uniform_mla_spec) == KVCacheSpecKind.MLA_ATTENTION
+
+    uniform_swa_mla_spec = UniformTypeKVCacheSpecs(
+        block_size=16,
+        kv_cache_specs={
+            "layer_1": SlidingWindowMLASpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=576,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+            "layer_2": SlidingWindowMLASpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=1024,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+        },
+    )
+    assert (
+        get_kv_cache_spec_kind(uniform_swa_mla_spec)
+        == KVCacheSpecKind.SLIDING_WINDOW_MLA
+    )
+
+
+def test_get_kv_cache_spec_kind_unknown_for_mixed_uniform_type_specs():
+    uniform_mixed_spec = UniformTypeKVCacheSpecs(
+        block_size=16,
+        kv_cache_specs={
+            "layer_1": new_mla_spec(),
+            "layer_2": SlidingWindowMLASpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=576,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+        },
+    )
+    assert get_kv_cache_spec_kind(uniform_mixed_spec) == KVCacheSpecKind.UNKNOWN
+
+
+def test_get_kv_cache_spec_sliding_window_reads_windowed_specs():
+    full_attention_spec = FullAttentionSpec(
+        block_size=16,
+        num_kv_heads=1,
+        head_size=64,
+        dtype=torch.float32,
+    )
+    sliding_window_spec = SlidingWindowSpec(
+        block_size=16,
+        num_kv_heads=1,
+        head_size=64,
+        dtype=torch.float32,
+        sliding_window=128,
+    )
+
+    assert get_kv_cache_spec_sliding_window(full_attention_spec) is None
+    assert get_kv_cache_spec_sliding_window(sliding_window_spec) == 128
+
+
+def test_get_kv_cache_spec_sliding_window_unwraps_uniform_type_specs():
+    uniform_window_spec = UniformTypeKVCacheSpecs(
+        block_size=16,
+        kv_cache_specs={
+            "layer_1": SlidingWindowSpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=64,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+            "layer_2": SlidingWindowSpec(
+                block_size=16,
+                num_kv_heads=2,
+                head_size=64,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+        },
+    )
+    mixed_window_spec = UniformTypeKVCacheSpecs(
+        block_size=16,
+        kv_cache_specs={
+            "layer_1": SlidingWindowSpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=64,
+                dtype=torch.float32,
+                sliding_window=128,
+            ),
+            "layer_2": SlidingWindowSpec(
+                block_size=16,
+                num_kv_heads=1,
+                head_size=64,
+                dtype=torch.float32,
+                sliding_window=256,
+            ),
+        },
+    )
+
+    assert get_kv_cache_spec_sliding_window(uniform_window_spec) == 128
+    assert get_kv_cache_spec_sliding_window(mixed_window_spec) is None
+
+
 def test_merge_mla_spec():
     kv_cache_specs = [
         new_mla_spec(),
@@ -2072,6 +2222,54 @@ def test_auto_fit_max_model_len_not_triggered():
     assert vllm_config.model_config.max_model_len == 16
 
 
+def test_auto_fit_max_model_len_respects_num_gpu_blocks_override():
+    """Auto-fit must size max_model_len against the override-clamped pool, not
+    the raw `available_memory`. Without this, auto-fit could pick a
+    max_model_len that no longer fits once `num_gpu_blocks_override` is applied.
+    """
+    model_config = ModelConfig(max_model_len=16384)
+    model_config.original_max_model_len = -1  # request auto-fit
+    vllm_config = VllmConfig(model_config=model_config)
+    # Cap the cache to 32 blocks regardless of available memory.
+    vllm_config.cache_config.num_gpu_blocks_override = 32
+
+    mem_per_block_per_layer = 16 * 2 * 64 * 4 * 2
+    kv_cache_specs = {
+        "layer_1": new_kv_cache_spec(),  # block_size=16
+        "layer_2": new_kv_cache_spec(),
+    }
+    # Plenty of raw memory (1024 blocks per layer would fit max_model_len=16384).
+    large_available_memory = mem_per_block_per_layer * 2 * 1024
+
+    get_kv_cache_configs(vllm_config, [kv_cache_specs], [large_available_memory])
+
+    # 32 blocks * block_size 16 = 512 token slots, so max_model_len must
+    # auto-fit at or below that.
+    assert 0 < vllm_config.model_config.max_model_len <= 32 * 16
+
+
+def test_check_enough_kv_cache_memory_respects_num_gpu_blocks_override():
+    """Admission check must use the override-clamped pool size, not raw
+    `available_memory`. Without this, startup could accept a max_model_len
+    that does not actually fit in `num_gpu_blocks_override` blocks.
+    """
+    model_config = ModelConfig(max_model_len=16384)
+    vllm_config = VllmConfig(model_config=model_config)
+    # 32 blocks is far too small for max_model_len=16384 (would need 1024).
+    vllm_config.cache_config.num_gpu_blocks_override = 32
+
+    mem_per_block_per_layer = 16 * 2 * 64 * 4 * 2
+    kv_cache_specs = {
+        "layer_1": new_kv_cache_spec(),
+        "layer_2": new_kv_cache_spec(),
+    }
+    # Plenty of raw memory: a bytes-only check against this would pass.
+    large_available_memory = mem_per_block_per_layer * 2 * 1024
+
+    with pytest.raises(ValueError, match="max seq len"):
+        get_kv_cache_configs(vllm_config, [kv_cache_specs], [large_available_memory])
+
+
 def test_unify_hybrid_kv_cache_specs():
     # 1. has_full_attention and has_sliding_window
     before_spec_1 = new_kv_cache_spec()
@@ -2137,3 +2335,30 @@ def test_unify_hybrid_kv_cache_specs():
 
     with pytest.raises(ValueError):
         kv_cache_utils.unify_hybrid_kv_cache_specs(kv_cache_spec)
+
+
+def test_hma_not_disabled_when_kv_events_enabled():
+    """
+    Test enabling KV events must not force disable_hybrid_kv_cache_manager to True.
+
+    This test guards against that regression by verifying that a VllmConfig
+    with kv_events_config set still resolves disable_hybrid_kv_cache_manager
+    to False (i.e. HMA remains enabled) when no other condition requires it
+    to be disabled.
+    """
+    model_config = ModelConfig(max_model_len=16)
+    kv_events_config = KVEventsConfig(
+        enable_kv_cache_events=True,
+        publisher="null",
+    )
+
+    # Leave disable_hybrid_kv_cache_manager as None (the default) so that
+    # VllmConfig.__post_init__ resolves it automatically.
+    vllm_config = VllmConfig(
+        model_config=model_config,
+        kv_events_config=kv_events_config,
+    )
+
+    assert vllm_config.scheduler_config.disable_hybrid_kv_cache_manager is False, (
+        "kv_events_config must not force-disable the hybrid KV cache manager."
+    )
diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py
index b8b387fffd99..546412b1d2f8 100644
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -8,6 +8,7 @@
 import pytest
 import torch
 
+import vllm.v1.core.kv_cache_manager as kv_cache_manager
 import vllm.v1.core.kv_cache_utils as kv_cache_utils
 from vllm.distributed.kv_events import AllBlocksCleared, BlockRemoved, BlockStored
 from vllm.lora.request import LoRARequest
@@ -35,6 +36,7 @@
     FullAttentionSpec,
     KVCacheConfig,
     KVCacheGroupSpec,
+    KVCacheSpecKind,
     MambaSpec,
     SlidingWindowSpec,
 )
@@ -557,19 +559,19 @@ def test_prefill_hybrid_model_eagle():
     computed_blocks, num_computed_tokens = manager.get_computed_blocks(req1)
     assert len(req1.block_hashes) == num_full_blocks
     assert computed_blocks.get_block_ids() == (
-        [1, 2, 3, 4],
-        [0, 9, 10, 11],
-        [0, 16, 17, 18],
+        [1, 2, 3, 4, 5],
+        [0, 0, 10, 11, 12],
+        [0, 0, 17, 18, 19],
     )
-    assert num_computed_tokens == 4 * block_size
+    assert num_computed_tokens == 5 * block_size
     num_new_tokens = len(all_token_ids) - num_computed_tokens
     blocks = manager.allocate_slots(
         req1, num_new_tokens, num_computed_tokens, computed_blocks
     )
     assert blocks is not None and blocks.get_block_ids() == (
-        [22, 23, 24],
-        [25, 26, 27],
-        [28, 29, 30],
+        [22, 23],
+        [24, 25],
+        [26, 27],
     )
     for block_per_group in computed_blocks.blocks:
         for block in block_per_group:
@@ -591,7 +593,7 @@ def test_prefill_hybrid_model_eagle():
             make_block_hash_with_group_id(block_hashes[0], 1),
             make_block_hash_with_group_id(block_hashes[0], 2),
         ],
-        4,
+        5,
     )
 
     # Evict the first block of full attention, makes total cache miss.
@@ -605,7 +607,7 @@ def test_prefill_hybrid_model_eagle():
         0,
     )
 
-    # Evict the last block of all layers, reduces the hit length to 3.
+    # Evict the last block of all layers, reduces the hit length to 4.
     _test_partial_request_hit(
         manager,
         block_size,
@@ -617,10 +619,10 @@ def test_prefill_hybrid_model_eagle():
             make_block_hash_with_group_id(block_hashes[-1], 1),
             make_block_hash_with_group_id(block_hashes[-1], 2),
         ],
-        3,
+        4,
     )
 
-    # Evict the last block of full attention, reduces the hit length to 3.
+    # Evict the last block of full attention, reduces the hit length to 4.
     _test_partial_request_hit(
         manager,
         block_size,
@@ -628,7 +630,7 @@ def test_prefill_hybrid_model_eagle():
         "5",
         all_token_ids,
         [make_block_hash_with_group_id(block_hashes[-1], 0)],
-        3,
+        4,
     )
 
     # Since the last block of full attention is dropped for eagle, evict
@@ -655,12 +657,11 @@ def test_prefill_hybrid_model_eagle():
         3,
     )
 
-    # Evict different set of blocks for full attention and sliding window makes
-    # total cache miss.
-    # The cache hit length of full attention is 4 * block_size.
-    # The cache hit length of sliding window is 3 * block_size.
-    # Then it is cache miss as the two type of layers
-    # have different hit length.
+    # Evict different set of blocks for full attention and sliding window.
+    # Full loses its last block so it drops to 4 full blocks after the eagle
+    # pop; SWA lost block 0 (outside the sliding window of the final hit),
+    # which is not required for the K+1 anchor at position 4. Coordinated
+    # single-drop aligns both groups at hit=4.
     _test_partial_request_hit(
         manager,
         block_size,
@@ -672,7 +673,7 @@ def test_prefill_hybrid_model_eagle():
             make_block_hash_with_group_id(block_hashes[0], 1),
             make_block_hash_with_group_id(block_hashes[0], 2),
         ],
-        0,
+        4,
     )
 
 
@@ -893,7 +894,7 @@ def test_prefill_hybrid_model_combinations(spec_types: list[str]):
 # - 2 groups: 1 full + 1 other
 _EAGLE_HYBRID_MODEL_TEST_CASES = [
     # 2 groups: 1 full + 1 other
-    pytest.param(["full", "sliding_window"], 2, id="2g-full+sw"),
+    pytest.param(["full", "sliding_window"], 3, id="2g-full+sw"),
 ]
 
 
@@ -1934,6 +1935,7 @@ def test_kv_cache_events(blocks_to_cache: int):
         == len(manager.block_pool.cached_block_hash_to_block)
     )
     assert len(block.token_ids) == block.block_size * len(block.block_hashes)
+    assert block.kv_cache_spec_kind == KVCacheSpecKind.FULL_ATTENTION.value
     assert len(manager.block_pool.kv_event_queue) == 0
 
     stored_block_hash = block.block_hashes
@@ -1947,6 +1949,7 @@ def test_kv_cache_events(blocks_to_cache: int):
     events = manager.take_events()
 
     for blocks in events[:-1]:
+        assert isinstance(blocks, BlockRemoved)
         assert blocks.block_hashes[0] in stored_block_hash
     assert len(events) == blocks_to_cache + 1
     assert isinstance(events[-2], BlockRemoved)
@@ -1970,6 +1973,7 @@ def test_null_parent_block_hash():
     block_size = 1
     num_cached_blocks = 2
     num_full_blocks = 4
+    kv_cache_group_id = 0
 
     pool = BlockPool(
         num_gpu_blocks=8,
@@ -2002,7 +2006,7 @@ def test_null_parent_block_hash():
         num_cached_blocks=num_cached_blocks,
         num_full_blocks=num_full_blocks,
         block_size=block_size,
-        kv_cache_group_id=0,
+        kv_cache_group_id=kv_cache_group_id,
     )
 
     events = pool.take_events()
@@ -2021,6 +2025,9 @@ def test_null_parent_block_hash():
         for h in req.block_hashes[num_cached_blocks:num_full_blocks]
     ]
     assert event.block_hashes == expected_new_hashes
+    assert event.group_idx == kv_cache_group_id
+    assert event.kv_cache_spec_kind is None
+    assert event.kv_cache_spec_sliding_window is None
 
     # Ensure we didn't accidentally assign a hash to the null block.
     assert pool.null_block.block_hash is None
@@ -2087,6 +2094,236 @@ def test_kv_cache_events_with_lora(blocks_to_cache: int):
     assert block_stored_event.block_size == block_size
 
 
+@pytest.mark.parametrize("group_id", [0, 1, 2])
+def test_block_stored_event_group_idx(group_id: int):
+    """Test BlockStored events emitted by cache_full_blocks carry the correct
+    group_idx."""
+    block_size = 4
+    num_tokens = block_size * 2
+
+    manager = KVCacheManager(
+        make_kv_cache_config_three_types(block_size, num_blocks=5),
+        max_model_len=8192,
+        enable_caching=True,
+        enable_kv_cache_events=True,
+        hash_block_size=block_size,
+    )
+    pool = manager.block_pool
+
+    req = make_request(
+        "req_grp_idx",
+        prompt_token_ids=list(range(num_tokens)),
+        block_size=block_size,
+        hash_fn=sha256,
+    )
+
+    blocks = pool.get_new_blocks(2)
+    pool.cache_full_blocks(
+        request=req,
+        blocks=blocks,
+        num_cached_blocks=0,
+        num_full_blocks=2,
+        block_size=block_size,
+        kv_cache_group_id=group_id,
+    )
+
+    events = manager.take_events()
+    assert len(events) == 1
+    assert isinstance(events[0], BlockStored)
+    assert events[0].group_idx == group_id
+    assert (
+        events[0].kv_cache_spec_kind
+        == [
+            KVCacheSpecKind.FULL_ATTENTION.value,
+            KVCacheSpecKind.SLIDING_WINDOW.value,
+            KVCacheSpecKind.MAMBA.value,
+        ][group_id]
+    )
+    assert (
+        events[0].kv_cache_spec_sliding_window
+        == [
+            None,
+            2 * block_size,
+            None,
+        ][group_id]
+    )
+
+
+def test_block_stored_event_group_idx_multiple_groups():
+    """
+    Test BlockStored events for separate HMA groups that each carry the
+    correct group_idx.
+
+    Simulates the HMA scenario where full-attention blocks (group 0) and
+    sliding-window blocks (group 1) are cached independently and must be
+    distinguishable by consumers doing HMA-aware prefix-cache routing.
+    """
+    block_size = 4
+    num_tokens = block_size * 2
+
+    manager = KVCacheManager(
+        KVCacheConfig(
+            num_blocks=5,
+            kv_cache_tensors=[],
+            kv_cache_groups=[
+                KVCacheGroupSpec(
+                    ["layer1"],
+                    FullAttentionSpec(
+                        block_size=block_size,
+                        num_kv_heads=1,
+                        head_size=1,
+                        dtype=torch.float32,
+                    ),
+                ),
+                KVCacheGroupSpec(
+                    ["layer2"],
+                    SlidingWindowSpec(
+                        block_size=block_size,
+                        num_kv_heads=1,
+                        head_size=1,
+                        dtype=torch.float32,
+                        sliding_window=128,
+                    ),
+                ),
+            ],
+        ),
+        max_model_len=8192,
+        enable_caching=True,
+        enable_kv_cache_events=True,
+        hash_block_size=block_size,
+    )
+    pool = manager.block_pool
+
+    req = make_request(
+        "req_multi_grp",
+        prompt_token_ids=list(range(num_tokens)),
+        block_size=block_size,
+        hash_fn=sha256,
+    )
+
+    # Cache blocks for group 0 (full-attention)
+    blocks_grp0 = pool.get_new_blocks(2)
+    pool.cache_full_blocks(
+        request=req,
+        blocks=blocks_grp0,
+        num_cached_blocks=0,
+        num_full_blocks=2,
+        block_size=block_size,
+        kv_cache_group_id=0,
+    )
+
+    # Cache blocks for group 1 (sliding-window)
+    blocks_grp1 = pool.get_new_blocks(2)
+    pool.cache_full_blocks(
+        request=req,
+        blocks=blocks_grp1,
+        num_cached_blocks=0,
+        num_full_blocks=2,
+        block_size=block_size,
+        kv_cache_group_id=1,
+    )
+
+    events = manager.take_events()
+    assert len(events) == 2
+    assert isinstance(events[0], BlockStored)
+    assert events[0].group_idx == 0
+    assert events[0].kv_cache_spec_kind == KVCacheSpecKind.FULL_ATTENTION.value
+    assert events[0].kv_cache_spec_sliding_window is None
+    assert isinstance(events[1], BlockStored)
+    assert events[1].group_idx == 1
+    assert events[1].kv_cache_spec_kind == KVCacheSpecKind.SLIDING_WINDOW.value
+    assert events[1].kv_cache_spec_sliding_window == 128
+
+
+def test_block_stored_event_group_idx_out_of_bounds(monkeypatch):
+    """Out-of-range group_idx events are returned without metadata annotation."""
+    block_size = 4
+    manager = KVCacheManager(
+        make_kv_cache_config(block_size, num_blocks=5),
+        max_model_len=8192,
+        enable_caching=True,
+        enable_kv_cache_events=True,
+        hash_block_size=block_size,
+    )
+    event = BlockStored(
+        block_hashes=[1],
+        parent_block_hash=None,
+        token_ids=list(range(block_size)),
+        block_size=block_size,
+        lora_id=None,
+        medium=None,
+        lora_name=None,
+        group_idx=1,
+    )
+    manager.block_pool.kv_event_queue.append(event)
+    warnings = []
+
+    def collect_warning(message, *args, **kwargs):
+        del kwargs
+        warnings.append(message % args if args else message)
+
+    monkeypatch.setattr(kv_cache_manager.logger, "warning", collect_warning)
+    events = manager.take_events()
+
+    assert events == [event]
+    assert event.kv_cache_spec_kind is None
+    assert event.kv_cache_spec_sliding_window is None
+    assert warnings == ["Group index `1` not in KV cache metadata"]
+
+
+@pytest.mark.parametrize("group_id", [0, 1, 2])
+def test_block_removed_event_group_idx(group_id: int):
+    """
+    Test BlockRemoved events emitted on eviction carry the group_idx extracted
+    from the evicted block's BlockHashWithGroupId via get_group_id().
+    """
+    block_size = 4
+    num_tokens = block_size * 2
+
+    # null block + 4 usable; allocate all 4, cache 2, free all, re-allocate
+    # all 4 so the 2 cached blocks are forced through _maybe_evict_cached_block.
+    pool = BlockPool(
+        num_gpu_blocks=5,
+        enable_caching=True,
+        hash_block_size=block_size,
+        enable_kv_cache_events=True,
+    )
+
+    req = make_request(
+        "req_evict_grp",
+        prompt_token_ids=list(range(num_tokens)),
+        block_size=block_size,
+        hash_fn=sha256,
+    )
+
+    # Allocate all usable blocks and cache the first two for the target group.
+    all_blocks = pool.get_new_blocks(4)
+    pool.cache_full_blocks(
+        request=req,
+        blocks=all_blocks,
+        num_cached_blocks=0,
+        num_full_blocks=2,
+        block_size=block_size,
+        kv_cache_group_id=group_id,
+    )
+
+    # Drain the BlockStored events so only eviction events remain later.
+    pool.take_events()
+
+    # Return all blocks to the free queue so they become eviction candidates.
+    pool.free_blocks(all_blocks)
+
+    # Re-allocate all blocks; the two with hashes trigger BlockRemoved events.
+    pool.get_new_blocks(4)
+
+    events = pool.take_events()
+    removed_events = [e for e in events if isinstance(e, BlockRemoved)]
+
+    assert len(removed_events) == 2
+    for event in removed_events:
+        assert event.group_idx == group_id
+
+
 def test_eagle_enabled_removes_last_block():
     """Verify Eagle does NOT remove blocks when request
     length is divisible by block size."""
@@ -2291,6 +2528,145 @@ def test_different_block_size():
     assert num_computed_tokens == 4 * 16
 
 
+def test_hybrid_cache_blocks_swa_tail_window_only():
+    """Within each lcm-aligned segment, SWA's ``find_longest_cache_hit`` only
+    returns the trailing ``ceil((sliding_window - 1) / block_size)`` blocks
+    (its right-to-left scan stops once a contiguous match is found). Blocks
+    earlier in the segment can never serve a hit, so
+    ``HybridKVCacheCoordinator.cache_blocks`` should skip them rather than
+    polluting the prefix-cache hash map."""
+    block_size = 8
+    # Full attn block_size=32, SWA block_size=8, sw=8 -> lcm=32.
+    # tail = ceil(7/8) = 1; per_segment = 32/8 = 4.
+    # Per-segment template = [F, F, F, T]; only the last SWA block in each
+    # 32-token segment ends up in the prefix-cache hash map.
+    kv_cache_config = KVCacheConfig(
+        num_blocks=100,
+        kv_cache_tensors=[],
+        kv_cache_groups=[
+            KVCacheGroupSpec(
+                ["layer1"],
+                FullAttentionSpec(
+                    block_size=4 * block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float16,
+                ),
+            ),
+            KVCacheGroupSpec(
+                ["layer2"],
+                SlidingWindowSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                    sliding_window=block_size,
+                ),
+            ),
+        ],
+    )
+    manager = KVCacheManager(
+        kv_cache_config=kv_cache_config,
+        max_model_len=8192,
+        enable_caching=True,
+        hash_block_size=block_size,
+    )
+
+    # 8 hash-blocks of 8 tokens (64 tokens, two lcm-aligned segments).
+    token_ids = [i for i in range(8) for _ in range(block_size)]
+    req = make_request("0", token_ids, block_size, sha256)
+    computed_blocks, _ = manager.get_computed_blocks(req)
+    blocks = manager.allocate_slots(
+        req,
+        8 * block_size,
+        len(computed_blocks.blocks[0]) * block_size,
+        computed_blocks,
+    )
+    assert blocks is not None
+    assert len(req.block_hashes) == 8
+
+    pool = manager.block_pool
+    # SWA group_id=1: only hash 3 and hash 7 (the last block of each
+    # 32-token segment) should be cached. Hashes 0,1,2,4,5,6 cannot serve
+    # a hit at any lcm-aligned length, so they must NOT be cached.
+    expected_cached = {3, 7}
+    for i in range(8):
+        cached = pool.get_cached_block(req.block_hashes[i], kv_cache_group_ids=[1])
+        if i in expected_cached:
+            assert cached is not None, f"SWA hash {i} should be cached"
+        else:
+            assert cached is None, (
+                f"SWA hash {i} cannot serve any lcm-aligned hit; should not be cached"
+            )
+
+
+def test_hybrid_cache_blocks_clamped_to_lcm():
+    """HybridKVCacheCoordinator.cache_blocks() clamps to lcm_block_size.
+    Chunks past the last lcm-aligned boundary can never participate in a
+    cache hit (find_longest_cache_hit always returns lcm-aligned hits), so
+    caching them only pollutes the prefix-cache hash map and keeps blocks
+    on the LRU list that could otherwise return to the free pool."""
+    block_size = 16
+    # Full attn block_size=32, SWA block_size=16 -> lcm=32.
+    kv_cache_config = KVCacheConfig(
+        num_blocks=100,
+        kv_cache_tensors=[],
+        kv_cache_groups=[
+            KVCacheGroupSpec(
+                ["layer1"],
+                FullAttentionSpec(
+                    block_size=block_size * 2,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float16,
+                ),
+            ),
+            KVCacheGroupSpec(
+                ["layer2"],
+                SlidingWindowSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                    sliding_window=2 * block_size,
+                ),
+            ),
+        ],
+    )
+    manager = KVCacheManager(
+        kv_cache_config=kv_cache_config,
+        max_model_len=8192,
+        enable_caching=True,
+        hash_block_size=block_size,
+    )
+
+    # 7 hash-blocks of 16 tokens (112 tokens). With lcm=32 the clamp truncates
+    # to 96 tokens — SWA caches 6 hashes, full-attn caches 3.
+    token_ids = [i for i in range(7) for _ in range(block_size)]
+    req = make_request("0", token_ids, block_size, sha256)
+    computed_blocks, _ = manager.get_computed_blocks(req)
+    blocks = manager.allocate_slots(
+        req,
+        7 * block_size,
+        len(computed_blocks.blocks[0]) * block_size,
+        computed_blocks,
+    )
+    assert blocks is not None
+    assert len(req.block_hashes) == 7
+
+    pool = manager.block_pool
+    # SWA group_id=1: hashes 0..5 cached (6 blocks * 16 tokens = 96), hash 6
+    # spans tokens [96, 112) past the lcm boundary and must NOT be cached.
+    for i in range(6):
+        assert (
+            pool.get_cached_block(req.block_hashes[i], kv_cache_group_ids=[1])
+            is not None
+        ), f"SWA hash {i} should be cached"
+    assert pool.get_cached_block(req.block_hashes[6], kv_cache_group_ids=[1]) is None, (
+        "SWA hash 6 spans tokens past the lcm boundary; should not be cached"
+    )
+
+
 def test_block_lookup_cache_single_block_per_key():
     cache = BlockHashToBlockMap()
     key0 = BlockHashWithGroupId(b"hash0")
@@ -2364,3 +2740,113 @@ def test_block_lookup_cache_multi_blocks_per_key():
     assert cache.pop(key1, 11) is block11
     assert cache.get_one_block(key1) is None
     assert cache.pop(key1, 12) is None
+
+
+def test_can_fit_full_sequence_swa_cap_admits_long_prompt():
+    """Hybrid full+SWA model with a pool sized at the startup minimum should
+    admit a prompt longer than the SWA cap, because SlidingWindowManager
+    recycles blocks during chunked prefill (issue #39734)."""
+    block_size = 16
+    sliding_window = 4 * block_size  # 64 tokens
+    max_num_batched_tokens = 8 * block_size  # 128 tokens
+    max_model_len = 64 * block_size  # 1024 tokens — much larger than the SWA cap
+    # Startup pool sizing: full demands cdiv(max_model_len, bs) = 64 blocks,
+    # SWA demands cdiv(SW-1+max_batched, bs) + 1 = cdiv(191, 16) + 1 = 13.
+    # Pool minimum = 64 + 13 = 77; +1 for the null block.
+    num_blocks = 64 + 13 + 1
+
+    config = KVCacheConfig(
+        num_blocks=num_blocks,
+        kv_cache_tensors=[],
+        kv_cache_groups=[
+            KVCacheGroupSpec(
+                ["layer_full"],
+                FullAttentionSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                ),
+            ),
+            KVCacheGroupSpec(
+                ["layer_swa"],
+                SlidingWindowSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                    sliding_window=sliding_window,
+                ),
+            ),
+        ],
+    )
+
+    manager = KVCacheManager(
+        config,
+        max_model_len=max_model_len,
+        max_num_batched_tokens=max_num_batched_tokens,
+        enable_caching=True,
+        hash_block_size=block_size,
+    )
+
+    # A prompt that is shorter than max_model_len but longer than SW + chunk:
+    # cdiv(prompt_len, bs) = 32 blocks. Without the cap, admission would
+    # demand 32 (full) + 32 (SWA) = 64 blocks. With the cap, SWA contributes
+    # only 13, so total = 32 + 13 = 45 ≤ pool size.
+    prompt_len = 32 * block_size
+    req = make_request("long", list(range(prompt_len)), block_size, sha256)
+
+    assert (
+        manager.allocate_slots(req, block_size, full_sequence_must_fit=True) is not None
+    )
+
+
+def test_can_fit_full_sequence_full_attention_still_gates_oversized():
+    """The cap only loosens the SWA group; a prompt that exceeds the
+    full-attention pool capacity must still be rejected."""
+    block_size = 16
+    sliding_window = 4 * block_size
+    max_num_batched_tokens = 8 * block_size
+    max_model_len = 64 * block_size
+    # Provide a tiny pool — even a small prompt should be rejected.
+    num_blocks = 5
+
+    config = KVCacheConfig(
+        num_blocks=num_blocks,
+        kv_cache_tensors=[],
+        kv_cache_groups=[
+            KVCacheGroupSpec(
+                ["layer_full"],
+                FullAttentionSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                ),
+            ),
+            KVCacheGroupSpec(
+                ["layer_swa"],
+                SlidingWindowSpec(
+                    block_size=block_size,
+                    num_kv_heads=1,
+                    head_size=1,
+                    dtype=torch.float32,
+                    sliding_window=sliding_window,
+                ),
+            ),
+        ],
+    )
+
+    manager = KVCacheManager(
+        config,
+        max_model_len=max_model_len,
+        max_num_batched_tokens=max_num_batched_tokens,
+        enable_caching=True,
+        hash_block_size=block_size,
+    )
+
+    # 16 blocks of full attention demand alone exceeds the 5-block pool.
+    prompt_len = 16 * block_size
+    req = make_request("oversized", list(range(prompt_len)), block_size, sha256)
+
+    assert manager.allocate_slots(req, block_size, full_sequence_must_fit=True) is None
diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py
index e4b77f24b6f7..bcbac67a63f1 100644
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@@ -26,6 +26,7 @@
 from vllm.v1.core.kv_cache_utils import get_request_block_hasher, init_none_hash
 from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput
 from vllm.v1.core.sched.scheduler import Scheduler
+from vllm.v1.engine import FinishReason
 from vllm.v1.kv_cache_interface import (
     FullAttentionSpec,
     KVCacheConfig,
@@ -1038,6 +1039,54 @@ def test_no_spec_tokens_scheduled_for_prefill_chunks():
     assert len(output.scheduled_spec_decode_tokens[req.request_id]) == num_spec_tokens
 
 
+def test_scheduler_stats_waiting_queues():
+    """Test that scheduler stats correctly report waiting and skipped_waiting queues."""
+    # Create scheduler with limited capacity so we can have waiting requests
+    scheduler = create_scheduler(max_num_batched_tokens=100)
+
+    # Create requests: some will be scheduled, some will wait on capacity,
+    # and some will be blocked by constraints
+    all_requests = create_requests(num_requests=5, num_tokens=50)
+
+    # Add 3 requests - only 2 can be scheduled (2 * 50 = 100 tokens)
+    # The 3rd will remain in waiting queue (capacity constraint)
+    for request in all_requests[:3]:
+        scheduler.add_request(request)
+
+    # Manually add 2 more to skipped_waiting to simulate constraint-blocked
+    for request in all_requests[3:]:
+        request.status = RequestStatus.WAITING_FOR_REMOTE_KVS
+        scheduler.skipped_waiting.add_request(request)
+
+    # Schedule - this will schedule 2 requests, leaving 1 in waiting
+    output = scheduler.schedule()
+
+    # Verify: 2 scheduled, 1 still waiting on capacity, 2 blocked by constraints
+    assert len(output.scheduled_new_reqs) == 2
+    assert len(scheduler.waiting) == 1
+    assert len(scheduler.skipped_waiting) == 2
+
+    # Call update_from_output() to get frontend-facing stat
+    scheduled_req_ids = list(output.num_scheduled_tokens.keys())
+    model_runner_output = ModelRunnerOutput(
+        req_ids=scheduled_req_ids,
+        req_id_to_index={req_id: i for i, req_id in enumerate(scheduled_req_ids)},
+        sampled_token_ids=[[1]] * len(scheduled_req_ids),
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+    engine_core_outputs = scheduler.update_from_output(output, model_runner_output)
+    assert engine_core_outputs and len(engine_core_outputs) > 0
+    stats = engine_core_outputs[0].scheduler_stats
+    assert stats is not None
+
+    # Verify stats match queue lengths after scheduling
+    assert stats.num_running_reqs == 2  # 2 were scheduled
+    assert stats.num_waiting_reqs == 1  # 1 waiting on capacity
+    assert stats.num_skipped_waiting_reqs == 2  # 2 blocked by constraints
+
+
 def _assert_right_scheduler_output(
     output: SchedulerOutput,
     num_requests: int,
@@ -1843,6 +1892,7 @@ def create_scheduler_with_priority(
         log_stats=True,
         structured_output_manager=StructuredOutputManager(vllm_config),
         block_size=block_size,
+        hash_block_size=block_size,
     )
 
 
@@ -2040,91 +2090,103 @@ def test_priority_scheduling_mixed_priority_and_arrival():
     assert scheduled_req_ids == ["3", "2", "1", "0"]
 
 
-# This test had previously been passing due to its use of duplicate
-# request ids which resulted in incorrect behavior.
-# Now that the duplicate req ids had been fixed it fails and
-# investigation is needed into whether the priority scheduling
-# preemption logic is working as designed or not.
-@pytest.mark.skip("needs investigation")
 def test_priority_scheduling_preemption():
-    """Test that priority scheduling preempts
-    lower priority requests when memory is constrained."""
-    # Create scheduler with very limited memory to force preemption
+    """Test that under KV block pressure the scheduler preempts the
+    lowest-priority *running* request, not the highest-priority one.
+
+    A low-priority request starts running first. Then a high-priority
+    request arrives and is admitted to running.  When block pressure
+    builds, the scheduler preempts the low-priority running request
+    while keeping the high-priority one.
+
+    Block math
+    ----------
+    block_size = 16, num_blocks = 6 (1 null → 5 usable).
+
+    Phase 1: lo1 (priority 5, 32 tokens) → 2 blocks.  3 free.
+             Decode → lo1 has 33 tokens (needs 3rd block on next schedule).
+    Phase 2: hi1 (priority 0, 32 tokens) arrives.
+             schedule() allocates lo1's 3rd block (3 used) and admits
+             hi1 (2 blocks) → 5 used, 0 free. Both running.
+             Decode → lo1 34 tokens, hi1 33 tokens.
+    Phase 3: schedule() → hi1 needs 3rd block, 0 free → preemption.
+             lo1 (priority 5) is preempted, hi1 (priority 0) survives.
+    """
+    block_size = 16
+    num_blocks = 6  # 1 null block → 5 usable
+    num_tokens = block_size * 2  # 32 tokens = exactly 2 blocks
+
     scheduler = create_scheduler_with_priority(
-        max_num_seqs=3,  # Allow multiple requests
+        max_num_seqs=3,
         max_num_batched_tokens=200,
-        num_blocks=6,  # Very limited blocks to force memory pressure
-        block_size=16,  # Standard block size
-    )
-
-    # Create initial low-priority requests that will consume most memory
-    low_priority_requests = create_requests_with_priority(
-        num_requests=2,
-        priorities=[5, 5],  # Low priority
-        arrival_times=[1.0, 2.0],
-        num_tokens=30,  # Large enough to consume significant memory,
-        req_ids=["lo1", "lo2"],
+        num_blocks=num_blocks,
+        block_size=block_size,
     )
 
-    # Add and schedule low priority requests
-    for request in low_priority_requests:
-        scheduler.add_request(request)
+    # --- Phase 1: low-priority request starts running ---
+    lo1 = create_requests_with_priority(
+        num_requests=1,
+        priorities=[5],
+        arrival_times=[1.0],
+        num_tokens=num_tokens,
+        req_ids=["lo1"],
+    )[0]
+    scheduler.add_request(lo1)
 
     output = scheduler.schedule()
-    assert len(output.scheduled_new_reqs) == 2
+    assert len(output.scheduled_new_reqs) == 1
 
-    # Simulate model execution to move requests to running state
+    # Decode: lo1 now has 33 tokens (crosses 32-token boundary).
     model_output = ModelRunnerOutput(
-        req_ids=[req.request_id for req in low_priority_requests],
-        req_id_to_index={
-            req.request_id: i for i, req in enumerate(low_priority_requests)
-        },
-        sampled_token_ids=[[100] for _ in low_priority_requests],
+        req_ids=["lo1"],
+        req_id_to_index={"lo1": 0},
+        sampled_token_ids=[[100]],
         logprobs=None,
         prompt_logprobs_dict={},
         pooler_output=[],
     )
     scheduler.update_from_output(output, model_output)
 
-    # Verify both requests are running
-    assert len(scheduler.running) == 2
-
-    # Now add a high-priority request that requires memory allocation
-    # This should trigger preemption due to memory constraints
-    high_priority_request = create_requests_with_priority(
+    # --- Phase 2: high-priority request arrives AFTER lo1 is running ---
+    hi1 = create_requests_with_priority(
         num_requests=1,
-        priorities=[0],  # High priority
-        arrival_times=[3.0],
-        num_tokens=30,  # Large enough to require significant memory
+        priorities=[0],
+        arrival_times=[2.0],
+        num_tokens=num_tokens,
         req_ids=["hi1"],
     )[0]
+    scheduler.add_request(hi1)
 
-    scheduler.add_request(high_priority_request)
+    # schedule(): lo1 gets its 3rd block (3 used), hi1 admitted (5 used,
+    # 0 free).  Both are now running.
+    output = scheduler.schedule()
+    assert any(r.req_id == "hi1" for r in output.scheduled_new_reqs)
+    assert len(scheduler.running) == 2
+
+    # Decode: lo1 → 34 tokens, hi1 → 33 tokens.
+    model_output = ModelRunnerOutput(
+        req_ids=["lo1", "hi1"],
+        req_id_to_index={"lo1": 0, "hi1": 1},
+        sampled_token_ids=[[101], [100]],
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+    scheduler.update_from_output(output, model_output)
 
-    # Schedule again - this should trigger
-    # preemption when trying to allocate memory
+    # --- Phase 3: preemption with mixed-priority running requests ---
+    # hi1 needs a 3rd block but 0 are free.  The scheduler picks the
+    # lowest-priority running request to preempt:
+    #   max(running, key=(priority, arrival_time)) → lo1 (5 > 0).
     output = scheduler.schedule()
 
-    # Due to the scheduler's design, if preemption happens
-    # during running request scheduling,
-    # waiting requests won't be scheduled in the same step
-    # Let's check if preemption occurred by looking at the waiting queue
-
-    # If preemption happened, we should see requests in the
-    # waiting queue
-    if len(scheduler.waiting) > 1:  # high priority + preempted request
-        # Preemption occurred - verify the high priority request
-        # gets scheduled next
-        output2 = scheduler.schedule()
-        assert len(output2.scheduled_new_reqs) == 1
-        # High priority request
-        assert output2.scheduled_new_reqs[0].req_id == "hi1"
-    else:
-        # No preemption needed - all requests fit
-        # This is also valid behavior if memory allows
-        assert len(output.scheduled_new_reqs) == 1
-        # High priority request
-        assert output.scheduled_new_reqs[0].req_id == "hi1"
+    lo1_req = scheduler.requests["lo1"]
+    assert lo1_req.status == RequestStatus.PREEMPTED, (
+        "Expected low-priority 'lo1' to be preempted"
+    )
+    assert any(req.request_id == "hi1" for req in scheduler.running), (
+        "High-priority 'hi1' should still be running"
+    )
 
 
 def test_priority_scheduling_no_preemption_when_space_available():
@@ -2451,6 +2513,87 @@ def test_schedule_skip_tokenizer_init_structured_output_request():
     assert len(scheduler.skipped_waiting) == 1
 
 
+def test_abort_request_when_structured_output_fsm_cannot_advance():
+    scheduler = object.__new__(Scheduler)
+    sampling_params = SamplingParams(ignore_eos=True, max_tokens=4)
+    sampling_params.update_from_generation_config({}, EOS_TOKEN_ID)
+
+    request = Request(
+        request_id="0",
+        prompt_token_ids=[0, 1],
+        mm_features=None,
+        sampling_params=sampling_params,
+        pooling_params=None,
+    )
+    request.structured_output_request = Mock()
+    request.structured_output_request.grammar = Mock()
+    request.structured_output_request.grammar.accept_tokens.return_value = False
+    request.status = RequestStatus.RUNNING
+    request.num_computed_tokens = request.num_tokens
+
+    scheduler.perf_metrics = None
+    scheduler.connector = None
+    scheduler.structured_output_manager = Mock()
+    scheduler.structured_output_manager.should_advance.return_value = True
+    scheduler.requests = {request.request_id: request}
+    scheduler.running = [request]
+    scheduler.waiting = Mock()
+    scheduler.kv_cache_manager = Mock()
+    scheduler.kv_cache_manager.take_events.return_value = None
+    scheduler.kv_event_publisher = Mock()
+    scheduler.finished_req_ids = set()
+    scheduler.finished_req_ids_dict = None
+    scheduler.vllm_config = Mock()
+    scheduler.vllm_config.model_config.enable_return_routed_experts = False
+    scheduler.enable_return_routed_experts = False
+    scheduler.recompute_kv_load_failures = False
+    scheduler.make_stats = Mock(return_value=None)
+    scheduler.max_model_len = 128
+
+    def free_request(req: Request, delay_free_blocks: bool = False):
+        scheduler.finished_req_ids.add(req.request_id)
+        scheduler.requests.pop(req.request_id, None)
+        return None
+
+    scheduler._free_request = Mock(side_effect=free_request)
+
+    output = SchedulerOutput(
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=CachedRequestData.make_empty(),
+        num_scheduled_tokens={request.request_id: 1},
+        total_num_scheduled_tokens=1,
+        scheduled_encoder_inputs={},
+        scheduled_spec_decode_tokens={},
+        num_common_prefix_blocks=[],
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+    )
+
+    model_runner_output = ModelRunnerOutput(
+        req_ids=[request.request_id],
+        req_id_to_index={request.request_id: 0},
+        sampled_token_ids=[[123]],
+        logprobs=None,
+        prompt_logprobs_dict={},
+        pooler_output=[],
+    )
+    engine_core_outputs = scheduler.update_from_output(output, model_runner_output)
+
+    request.structured_output_request.grammar.accept_tokens.assert_called_once_with(
+        request.request_id, [123]
+    )
+    assert request.resumable is False
+    assert request.status == RequestStatus.FINISHED_ERROR
+    assert request.request_id not in scheduler.requests
+    assert not scheduler.running
+    scheduler._free_request.assert_called_once_with(request)
+    assert len(engine_core_outputs[0].outputs) == 1
+    engine_core_output = engine_core_outputs[0].outputs[0]
+    assert engine_core_output.request_id == request.request_id
+    assert engine_core_output.new_token_ids == [123]
+    assert engine_core_output.finish_reason == FinishReason.ERROR
+
+
 @pytest.mark.parametrize(
     "use_ec_connector, ec_role", [(False, None), (True, "ec_consumer")]
 )
@@ -3785,6 +3928,45 @@ def test_abort_request_finished_recving():
     assert not scheduler.finished_recving_kv_req_ids
 
 
+def test_delayed_kv_connector_free_keeps_scheduler_active():
+    scheduler = create_scheduler(use_kv_connector=True)
+    queued_request, request = create_requests(
+        num_requests=2, req_ids=["queued", "finished"]
+    )
+    scheduler.add_request(queued_request)
+
+    assert not scheduler.has_finished_requests()
+
+    request.status = RequestStatus.FINISHED_STOPPED
+    scheduler.requests[request.request_id] = request
+    scheduler.finished_req_ids = set()
+
+    assert scheduler.has_finished_requests()
+    assert scheduler.has_requests()
+
+    scheduler_output = SchedulerOutput(
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=CachedRequestData.make_empty(),
+        num_scheduled_tokens={},
+        total_num_scheduled_tokens=0,
+        scheduled_encoder_inputs={},
+        scheduled_spec_decode_tokens={},
+        num_common_prefix_blocks=[],
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+    )
+    model_runner_output = ModelRunnerOutput(
+        req_ids=[],
+        req_id_to_index={},
+        kv_connector_output=KVConnectorOutput(finished_sending={request.request_id}),
+    )
+
+    scheduler.update_from_output(scheduler_output, model_runner_output)
+
+    assert request.request_id not in scheduler.requests
+    assert not scheduler.has_finished_requests()
+
+
 # ==============================================================================
 # Variable-length encoder cross-attention block allocation tests
 # ==============================================================================
@@ -3867,6 +4049,7 @@ def _create_encoder_decoder_scheduler(
         vllm_config=vllm_config,
         kv_cache_config=kv_cache_config,
         block_size=block_size,
+        hash_block_size=block_size,
         structured_output_manager=StructuredOutputManager(vllm_config),
     )
 
diff --git a/tests/v1/core/test_single_type_kv_cache_manager.py b/tests/v1/core/test_single_type_kv_cache_manager.py
index b05040ebe2a6..f59830dcd741 100644
--- a/tests/v1/core/test_single_type_kv_cache_manager.py
+++ b/tests/v1/core/test_single_type_kv_cache_manager.py
@@ -22,11 +22,13 @@
 
 
 def get_sliding_window_manager(sliding_window_spec, block_pool, enable_caching=True):
+    # Tests don't exercise admission gating; pass a large cap that is a no-op.
     return SlidingWindowManager(
         sliding_window_spec,
         block_pool=block_pool,
         enable_caching=enable_caching,
         kv_cache_group_id=0,
+        max_admission_blocks_per_request=10**9,
     )
 
 
@@ -38,6 +40,7 @@ def get_chunked_local_attention_manager(
         block_pool=block_pool,
         enable_caching=enable_caching,
         kv_cache_group_id=0,
+        max_admission_blocks_per_request=10**9,
     )
 
 
@@ -429,3 +432,52 @@ def test_chunked_local_attention_get_num_blocks_to_allocate():
         )
         == 15
     )
+
+
+def test_predictor_matches_allocator_blocks_calculation_with_admission_cap():
+    """In forward steps, `get_num_blocks_to_allocate` must return exactly what
+    `allocate_new_blocks` will pull; otherwise `block_pool.get_new_blocks`
+    raises `ValueError: Cannot get N free blocks from the pool`.
+    """
+    block_size = 2
+    sliding_window = 8  # 4-block live window
+    cap = sliding_window // block_size
+
+    spec = SlidingWindowSpec(
+        block_size=block_size,
+        num_kv_heads=1,
+        head_size=1,
+        dtype=torch.float32,
+        sliding_window=sliding_window,
+    )
+    block_pool = BlockPool(
+        num_gpu_blocks=100, enable_caching=True, hash_block_size=block_size
+    )
+    manager = SlidingWindowManager(
+        spec,
+        block_pool=block_pool,
+        enable_caching=False,
+        kv_cache_group_id=0,
+        max_admission_blocks_per_request=cap,
+    )
+
+    request_id = "req"
+    total_computed = 0
+    # Walk through request forward steps. Check num_blocks returned by
+    # `get_num_blocks_to_allocate` matches what `allocate_new_blocks` pulls
+    for num_tokens in (4, 8, 12, 16):
+        predicted = manager.get_num_blocks_to_allocate(
+            request_id=request_id,
+            num_tokens=num_tokens,
+            new_computed_blocks=[],
+            total_computed_tokens=total_computed,
+            num_tokens_main_model=num_tokens,
+        )
+        new_blocks = manager.allocate_new_blocks(
+            request_id, num_tokens=num_tokens, num_tokens_main_model=num_tokens
+        )
+        assert predicted == len(new_blocks), (
+            f"num_tokens={num_tokens}: predictor returned {predicted} "
+            f"but allocator pulled {len(new_blocks)}"
+        )
+        total_computed = num_tokens
diff --git a/tests/v1/cudagraph/test_breakable_cudagraph.py b/tests/v1/cudagraph/test_breakable_cudagraph.py
new file mode 100644
index 000000000000..f856d91b6395
--- /dev/null
+++ b/tests/v1/cudagraph/test_breakable_cudagraph.py
@@ -0,0 +1,367 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for the breakable cudagraph primitives.
+"""
+
+from __future__ import annotations
+
+import os
+import threading
+
+import pytest
+import torch
+
+os.environ["VLLM_USE_BREAKABLE_CUDAGRAPH"] = "1"
+
+
+@pytest.fixture(autouse=True)
+def _reset_breakable_tls():
+    """Defensively clear thread-local capture state between tests so a
+    failure in one test can't leak "nested capture" errors into the next."""
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    BreakableCUDAGraphCapture._tls.active = None
+    yield
+    BreakableCUDAGraphCapture._tls.active = None
+
+
+@pytest.fixture
+def cuda_capture_stream():
+    """A non-default CUDA stream suitable for cudagraph capture.
+
+    ``CUDAGraph.capture_begin`` refuses to capture from the default
+    stream, so all capture-using tests need to run under
+    ``torch.cuda.stream(...)`` for a separate stream.
+    """
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA required")
+    stream = torch.cuda.Stream()
+    with torch.cuda.stream(stream):
+        yield stream
+    torch.cuda.current_stream().wait_stream(stream)
+
+
+# ---------------------------------------------------------------------------
+# eager_break_during_capture: outside capture
+# ---------------------------------------------------------------------------
+
+
+def test_decorator_passthrough_outside_capture():
+    from vllm.compilation.breakable_cudagraph import eager_break_during_capture
+
+    calls = []
+
+    @eager_break_during_capture
+    def f(x):
+        calls.append(x)
+        return x * 2
+
+    assert f(3) == 6
+    assert calls == [3]
+
+
+# ---------------------------------------------------------------------------
+# BreakableCUDAGraphCapture: thread-local + nested rejection
+# ---------------------------------------------------------------------------
+
+
+def test_current_is_none_when_inactive():
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    assert BreakableCUDAGraphCapture.current() is None
+    assert BreakableCUDAGraphCapture.is_active() is False
+
+
+def test_thread_local_active_during_context(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        assert BreakableCUDAGraphCapture.current() is cap
+        assert BreakableCUDAGraphCapture.is_active() is True
+    assert BreakableCUDAGraphCapture.current() is None
+
+
+def test_nested_capture_raises(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    outer = BreakableCUDAGraphCapture()
+    inner = BreakableCUDAGraphCapture()
+    with outer, pytest.raises(RuntimeError, match="Nested.*not supported"), inner:
+        pass
+
+
+def test_active_state_isolated_across_threads(cuda_capture_stream):
+    """Verify the thread-local 'active capture' slot is per-thread.
+
+    We don't run concurrent captures here -- CUDA only supports one
+    in-flight capture per stream and we keep tests cheap. We just check
+    that the worker thread sees its own slot as None while the main
+    thread has a capture active.
+    """
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    worker_view: dict[str, BreakableCUDAGraphCapture | None] = {}
+
+    def worker():
+        worker_view["state"] = BreakableCUDAGraphCapture.current()
+
+    main_cap = BreakableCUDAGraphCapture()
+    with main_cap:
+        # Main thread has a live capture.
+        assert BreakableCUDAGraphCapture.current() is main_cap
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+    # Worker thread saw None -- thread-local separation works.
+    assert worker_view["state"] is None
+    # Main thread's slot is cleared on exit.
+    assert BreakableCUDAGraphCapture.current() is None
+
+
+# ---------------------------------------------------------------------------
+# Segment list construction
+# ---------------------------------------------------------------------------
+
+
+def test_capture_with_no_eager_break_records_one_graph(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    x = torch.zeros(4, device="cuda")
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(1.0)
+    assert len(cap.segments) == 1
+    assert cap.num_graphs == 1
+    assert cap.num_eager_breaks == 0
+
+
+def test_add_eager_creates_alternating_graph_eager_graph(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    x = torch.zeros(4, device="cuda")
+    counter = {"eager_calls": 0}
+
+    def eager_step():
+        counter["eager_calls"] += 1
+        x.add_(10.0)
+
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(1.0)
+        cap.add_eager(eager_step)
+        x.add_(1.0)
+        cap.add_eager(eager_step)
+        x.add_(1.0)
+    # 3 graph segments + 2 eager segments, interleaved as G E G E G.
+    assert len(cap.segments) == 5
+    assert cap.num_graphs == 3
+    assert cap.num_eager_breaks == 2
+    # Eager fn is stored as-is in the segment list, so we can confirm
+    # the alternation pattern by identity check.
+    assert cap.segments[1] is eager_step
+    assert cap.segments[3] is eager_step
+    assert counter["eager_calls"] == 2  # only the in-capture invocation
+
+
+# ---------------------------------------------------------------------------
+# Capture vs eager numerical equivalence
+# ---------------------------------------------------------------------------
+
+
+def test_capture_replay_matches_eager_simple(cuda_capture_stream):
+    """Verify that replay reproduces the same end-state as a single eager
+    forward, with an eager break in the middle.
+
+    Note: during capture, the *captured* kernels are recorded but NOT
+    executed (that's CUDA-graph semantics). Only the eager segments
+    actually mutate state at capture time. So we check correctness after
+    ``replay()``, not after ``with cap:`` exits.
+    """
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    x = torch.zeros(8, device="cuda")
+    log: list[str] = []
+
+    def eager_break_op():
+        x.mul_(2.0)
+        log.append("eager")
+
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(1.0)  # recorded into graph[0]
+        cap.add_eager(eager_break_op)  # runs eagerly: x *= 2
+        x.add_(5.0)  # recorded into graph[1]
+
+    # Capture-time: graph kernels were recorded only; eager segment ran
+    # once on x == 0, leaving x == 0.
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.zeros(8, device="cuda"))
+    assert log == ["eager"]
+
+    # Replay with a fresh input: 10 -> 11 -> 22 -> 27.
+    x.fill_(10.0)
+    cap.replay()
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.full((8,), 27.0, device="cuda"))
+    assert log == ["eager", "eager"]
+
+    # Replay again with another input: 100 -> 101 -> 202 -> 207.
+    x.fill_(100.0)
+    cap.replay()
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.full((8,), 207.0, device="cuda"))
+    assert log == ["eager", "eager", "eager"]
+
+
+def test_decorator_breaks_when_invoked_inside_capture(cuda_capture_stream):
+    """Verify @eager_break_during_capture correctly routes through
+    add_eager when inside a capture context, and runs straight through
+    when there's no active capture."""
+    from vllm.compilation.breakable_cudagraph import (
+        BreakableCUDAGraphCapture,
+        eager_break_during_capture,
+    )
+
+    @eager_break_during_capture
+    def attention_like(t: torch.Tensor) -> None:
+        # In-place double; stands in for "real" attention work.
+        t.mul_(2.0)
+
+    x = torch.zeros(4, device="cuda")
+
+    # Outside capture: decorator should just call through.
+    x.fill_(3.0)
+    attention_like(x)
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.full((4,), 6.0, device="cuda"))
+
+    # Inside capture: decorator should split the graph. Only the eager
+    # segment actually mutates state during capture.
+    x.fill_(0.0)
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(5.0)  # recorded
+        attention_like(x)  # eager: x *= 2 (on x == 0, no-op)
+        x.add_(1.0)  # recorded
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.zeros(4, device="cuda"))
+    # 2 graph segments + 1 eager segment, ordered G E G; the arithmetic
+    # equivalence check below verifies the ordering.
+    assert len(cap.segments) == 3
+    assert cap.num_graphs == 2
+    assert cap.num_eager_breaks == 1
+
+    # Replay: 2 -> 7 -> 14 -> 15.
+    x.fill_(2.0)
+    cap.replay()
+    torch.accelerator.synchronize()
+    assert torch.equal(x, torch.full((4,), 15.0, device="cuda"))
+
+
+# ---------------------------------------------------------------------------
+# Replay ordering
+# ---------------------------------------------------------------------------
+
+
+def test_replay_invokes_eager_segments_in_order(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    log: list[str] = []
+    x = torch.zeros(1, device="cuda")
+
+    def make_eager(name):
+        def step():
+            log.append(name)
+            x.add_(1.0)
+
+        return step
+
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(1.0)
+        cap.add_eager(make_eager("A"))
+        x.add_(1.0)
+        cap.add_eager(make_eager("B"))
+        x.add_(1.0)
+        cap.add_eager(make_eager("C"))
+        x.add_(1.0)
+
+    # Capture-time invocation order
+    assert log == ["A", "B", "C"]
+
+    log.clear()
+    cap.replay()
+    torch.accelerator.synchronize()
+    assert log == ["A", "B", "C"]
+
+
+# ---------------------------------------------------------------------------
+# Capture cleanup releases thread-local even if body raises
+# ---------------------------------------------------------------------------
+
+
+def test_exception_in_body_clears_active(cuda_capture_stream):
+    from vllm.compilation.breakable_cudagraph import BreakableCUDAGraphCapture
+
+    cap = BreakableCUDAGraphCapture()
+    with pytest.raises(RuntimeError, match="boom"), cap:
+        raise RuntimeError("boom")
+
+    # active must be reset even after an exception inside the body
+    assert BreakableCUDAGraphCapture.current() is None
+
+
+# ---------------------------------------------------------------------------
+# Nested decorated ops: inner op must not trigger a recursive eager break
+# ---------------------------------------------------------------------------
+
+
+def test_nested_decorated_op_runs_inline(cuda_capture_stream):
+    """A decorated op invoked from inside another decorated op's eager
+    body must execute inline -- starting a second eager break mid-flight
+    corrupts the segment state and explodes ``_begin_segment``'s assert.
+
+    This mirrors the deepseek_v4_attention case where the outer attention
+    op's impl internally dispatches sparse_attn_indexer (also decorated).
+    """
+    from vllm.compilation.breakable_cudagraph import (
+        BreakableCUDAGraphCapture,
+        eager_break_during_capture,
+    )
+
+    x = torch.zeros(4, device="cuda")
+    inner_calls = 0
+
+    @eager_break_during_capture
+    def inner_op(t: torch.Tensor) -> None:
+        nonlocal inner_calls
+        inner_calls += 1
+        t.add_(1.0)
+
+    @eager_break_during_capture
+    def outer_op(t: torch.Tensor) -> None:
+        # outer body calls another decorated op -- this is the case that
+        # used to assert in _begin_segment.
+        inner_op(t)
+        t.add_(10.0)
+
+    cap = BreakableCUDAGraphCapture()
+    with cap:
+        x.add_(2.0)  # recorded in graph[0]
+        outer_op(x)  # one eager break, inner runs inline
+        x.add_(100.0)  # recorded in graph[1]
+
+    # Exactly one eager break (the outer); inner must NOT add a second.
+    assert cap.num_graphs == 2
+    assert cap.num_eager_breaks == 1
+    assert inner_calls == 1  # only the capture-time invocation
+
+    x.fill_(0.0)
+    cap.replay()
+    torch.accelerator.synchronize()
+    # 0 -> +2 -> +1 (inner) -> +10 (outer) -> +100 = 113
+    assert torch.equal(x, torch.full((4,), 113.0, device="cuda"))
+    assert inner_calls == 2  # replay invokes the outer's lambda again
diff --git a/tests/v1/cudagraph/test_cudagraph_dispatch.py b/tests/v1/cudagraph/test_cudagraph_dispatch.py
index 52e927cee8ec..97b5fd46a2eb 100644
--- a/tests/v1/cudagraph/test_cudagraph_dispatch.py
+++ b/tests/v1/cudagraph/test_cudagraph_dispatch.py
@@ -23,6 +23,8 @@
 from vllm.platforms import current_platform
 from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher
 
+DEVICE_TYPE = current_platform.device_type
+
 
 # Helper MLP for testing
 class SimpleMLP(nn.Module):
@@ -269,9 +271,9 @@ def test_get_capture_descs_empty_when_not_initialized(self):
 class TestCUDAGraphWrapper:
     def setup_method(self):
         self.vllm_config = _create_vllm_config(CompilationConfig())
-        self.model = SimpleMLP().to("cuda")
-        self.persistent_input_buffer = torch.zeros(1, 10, device="cuda")
-        self.input_tensor = torch.randn(1, 10, device="cuda")
+        self.model = SimpleMLP().to(DEVICE_TYPE)
+        self.persistent_input_buffer = torch.zeros(1, 10, device=DEVICE_TYPE)
+        self.input_tensor = torch.randn(1, 10, device=DEVICE_TYPE)
 
     def test_capture_and_replay(self):
         wrapper = CUDAGraphWrapper(
@@ -369,193 +371,200 @@ def test_bypass_on_mode_none(self):
         assert not wrapper.concrete_cudagraph_entries
 
 
-@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
-class TestCudagraphIntegration:
-    def setup_method(self):
-        # only FULL mode for non-uniform batches
-        self.comp_config = CompilationConfig(
-            mode=CompilationMode.VLLM_COMPILE,
-            cudagraph_mode="FULL",
-            cudagraph_capture_sizes=[10, 20],
-        )
-        self.vllm_config = _create_vllm_config(self.comp_config)
-        self.dispatcher = CudagraphDispatcher(self.vllm_config)
-        self.dispatcher.initialize_cudagraph_keys(
-            self.comp_config.cudagraph_mode, uniform_decode_query_len=1
-        )
+def _run_and_monitor_call(
+    wrapper, input_tensor, runtime_mode, batch_descriptor, vllm_config
+):
+    """Helper to run a single call and monitor the action."""
 
-    def _run_and_monitor_call(
-        self, wrapper, input_tensor, runtime_mode, batch_descriptor
+    with (
+        patch("torch.cuda.graph", wraps=torch.cuda.graph) as mock_graph_context,
+        patch.object(wrapper, "runnable", wraps=wrapper.runnable) as mock_runnable,
     ):
-        """Helper to run a single call and monitor the action."""
-
-        with (
-            patch("torch.cuda.graph", wraps=torch.cuda.graph) as mock_graph_context,
-            patch.object(wrapper, "runnable", wraps=wrapper.runnable) as mock_runnable,
-        ):
-            entry = wrapper.concrete_cudagraph_entries.get(batch_descriptor, None)
-
-            context = set_forward_context(
-                attn_metadata=None,
-                vllm_config=self.vllm_config,
-                cudagraph_runtime_mode=runtime_mode,
-                batch_descriptor=batch_descriptor,
-            )
-            mock_replay = MagicMock()
-            if entry and entry.cudagraph:
-                with (
-                    context,
-                    patch.object(
-                        entry.cudagraph, "replay", new_callable=MagicMock
-                    ) as mock_replay,
-                ):
-                    wrapper(input_tensor)
-            else:
-                with context:
-                    wrapper(input_tensor)
-
-            if mock_graph_context.called:
-                # note that this is globally mocked, so it will be detected
-                # even whether called by the inner or outer wrapper
-                return "capture_global"
-            if mock_replay.called:
-                # only for outer wrapper
-                return "replay"
-            if mock_runnable.call_count > 0:
-                # only for outer wrapper
-                return "bypass"
-            return "unknown"
-
-    @create_new_process_for_each_test("spawn")
-    def test_capture_replay_bypass_logic(self):
-        model = SimpleMLP().to("cuda")
-        full_wrapper = CUDAGraphWrapper(model, self.vllm_config, CUDAGraphMode.FULL)
-        max_bs = 16
-        persistent_input_buffer = torch.zeros(max_bs, 10, device="cuda")
-        input_1 = persistent_input_buffer[:1]
-        input_2 = persistent_input_buffer[:2]
-        input_3 = persistent_input_buffer[:3]
-
-        desc_1 = BatchDescriptor(num_tokens=1)
-        desc_2 = BatchDescriptor(num_tokens=2)
-        desc_3_unseen = BatchDescriptor(num_tokens=3)
+        entry = wrapper.concrete_cudagraph_entries.get(batch_descriptor, None)
 
-        # 0. global warmup
-        with set_forward_context(
+        context = set_forward_context(
             attn_metadata=None,
-            vllm_config=self.vllm_config,
-            cudagraph_runtime_mode=CUDAGraphMode.NONE,
-            batch_descriptor=None,
-        ):
-            full_wrapper(input_1)
+            vllm_config=vllm_config,
+            cudagraph_runtime_mode=runtime_mode,
+            batch_descriptor=batch_descriptor,
+        )
+        mock_replay = MagicMock()
+        if entry and entry.cudagraph:
+            with (
+                context,
+                patch.object(
+                    entry.cudagraph, "replay", new_callable=MagicMock
+                ) as mock_replay,
+            ):
+                wrapper(input_tensor)
+        else:
+            with context:
+                wrapper(input_tensor)
+
+        if mock_graph_context.called:
+            # note that this is globally mocked, so it will be detected
+            # even whether called by the inner or outer wrapper
+            return "capture_global"
+        if mock_replay.called:
+            # only for outer wrapper
+            return "replay"
+        if mock_runnable.call_count > 0:
+            # only for outer wrapper
+            return "bypass"
+        return "unknown"
+
+
+@create_new_process_for_each_test("spawn")
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
+def test_capture_replay_bypass_logic():
+    comp_config = CompilationConfig(
+        mode=CompilationMode.VLLM_COMPILE,
+        cudagraph_mode="FULL",
+        cudagraph_capture_sizes=[1, 2],
+    )
+    vllm_config = _create_vllm_config(comp_config)
+    dispatcher = CudagraphDispatcher(vllm_config)
+    dispatcher.initialize_cudagraph_keys(
+        comp_config.cudagraph_mode, uniform_decode_query_len=1
+    )
+    model = SimpleMLP().to(DEVICE_TYPE)
+    full_wrapper = CUDAGraphWrapper(model, vllm_config, CUDAGraphMode.FULL)
+    max_bs = 16
+    persistent_input_buffer = torch.zeros(max_bs, 10, device=DEVICE_TYPE)
+    input_1 = persistent_input_buffer[:1]
+    input_2 = persistent_input_buffer[:2]
+    input_3 = persistent_input_buffer[:3]
+
+    desc_1 = BatchDescriptor(num_tokens=1)
+    desc_2 = BatchDescriptor(num_tokens=2)
+    desc_3_unseen = BatchDescriptor(num_tokens=3)
+
+    # 0. global warmup
+    with set_forward_context(
+        attn_metadata=None,
+        vllm_config=vllm_config,
+        cudagraph_runtime_mode=CUDAGraphMode.NONE,
+        batch_descriptor=None,
+    ):
+        full_wrapper(input_1)
 
-        rt_mode, key = self.dispatcher.dispatch(num_tokens=desc_1.num_tokens)
-        # 1. Capture first shape
-        action = self._run_and_monitor_call(full_wrapper, input_1, rt_mode, key)
-        assert action == "capture_global"
+    rt_mode, key = dispatcher.dispatch(num_tokens=desc_1.num_tokens)
+    # 1. Capture first shape
+    action = _run_and_monitor_call(full_wrapper, input_1, rt_mode, key, vllm_config)
+    assert action == "capture_global"
 
-        # 2. Replay first shape
-        action = self._run_and_monitor_call(full_wrapper, input_1, rt_mode, key)
-        assert action == "replay"
+    # 2. Replay first shape
+    action = _run_and_monitor_call(full_wrapper, input_1, rt_mode, key, vllm_config)
+    assert action == "replay"
 
-        rt_mode, key = self.dispatcher.dispatch(num_tokens=desc_2.num_tokens)
-        # 3. Capture second shape
-        action = self._run_and_monitor_call(full_wrapper, input_2, rt_mode, key)
-        assert action == "capture_global"
+    rt_mode, key = dispatcher.dispatch(num_tokens=desc_2.num_tokens)
+    # 3. Capture second shape
+    action = _run_and_monitor_call(full_wrapper, input_2, rt_mode, key, vllm_config)
+    assert action == "capture_global"
 
-        # 4. Replay second shape
-        action = self._run_and_monitor_call(
-            full_wrapper, input_2, CUDAGraphMode.FULL, desc_2
+    # 4. Replay second shape
+    action = _run_and_monitor_call(
+        full_wrapper, input_2, CUDAGraphMode.FULL, key, vllm_config
+    )
+    assert action == "replay"
+
+    # 5. Bypass if no key match
+    rt_mode, key = dispatcher.dispatch(num_tokens=desc_3_unseen.num_tokens)
+    assert rt_mode == CUDAGraphMode.NONE
+    action = _run_and_monitor_call(full_wrapper, input_3, rt_mode, key, vllm_config)
+    assert action == "bypass"
+
+    # capture unseen shape is not allowed after disable
+    set_cudagraph_capturing_enabled(False)
+    with pytest.raises(RuntimeError):
+        _run_and_monitor_call(
+            full_wrapper, input_3, CUDAGraphMode.FULL, desc_3_unseen, vllm_config
         )
-        assert action == "replay"
+    set_cudagraph_capturing_enabled(True)
 
-        # 5. Bypass if no key match
-        rt_mode, key = self.dispatcher.dispatch(num_tokens=desc_3_unseen.num_tokens)
-        assert rt_mode == CUDAGraphMode.NONE
-        action = self._run_and_monitor_call(full_wrapper, input_3, rt_mode, key)
-        assert action == "bypass"
-
-        # capture unseen shape is not allowed after disable
-        set_cudagraph_capturing_enabled(False)
-        with pytest.raises(RuntimeError):
-            self._run_and_monitor_call(
-                full_wrapper, input_3, CUDAGraphMode.FULL, desc_3_unseen
-            )
-        set_cudagraph_capturing_enabled(True)
-
-    @create_new_process_for_each_test("spawn")
-    def test_nested_wrappers(self):
-        """Tests a scenario with a PIECEWISE wrapper inside a FULL one."""
-        model = SimpleMLP().to("cuda")
-        full_wrapper = CUDAGraphWrapper(model, self.vllm_config, CUDAGraphMode.FULL)
-        input_1 = torch.randn(1, 10, device="cuda")
-
-        # Setup: Inner model is wrapped with PIECEWISE, outer with FULL
-        inner_model = SimpleMLP().to("cuda")
-        piecewise_wrapper = CUDAGraphWrapper(
-            inner_model, self.vllm_config, CUDAGraphMode.PIECEWISE
-        )
-        inner_model.forward = MagicMock(wraps=inner_model.forward)
-        outer_model = SimpleMLP().to("cuda")
-        # When outer model is called, it calls the piecewise_wrapper
-        outer_model.forward = MagicMock(
-            wraps=outer_model.forward, side_effect=piecewise_wrapper
-        )
-        full_wrapper = CUDAGraphWrapper(
-            outer_model, self.vllm_config, CUDAGraphMode.FULL
-        )
 
-        desc_1 = BatchDescriptor(num_tokens=1)
+@create_new_process_for_each_test("spawn")
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
+def test_nested_wrappers():
+    """Tests a scenario with a PIECEWISE wrapper inside a FULL one."""
+    comp_config = CompilationConfig(
+        mode=CompilationMode.VLLM_COMPILE,
+        cudagraph_mode="FULL",
+        cudagraph_capture_sizes=[1],
+    )
+    vllm_config = _create_vllm_config(comp_config)
+    dispatcher = CudagraphDispatcher(vllm_config)
+    dispatcher.initialize_cudagraph_keys(
+        comp_config.cudagraph_mode, uniform_decode_query_len=1
+    )
+    model = SimpleMLP().to(DEVICE_TYPE)
+    full_wrapper = CUDAGraphWrapper(model, vllm_config, CUDAGraphMode.FULL)
+    input_1 = torch.randn(1, 10, device=DEVICE_TYPE)
+
+    # Setup: Inner model is wrapped with PIECEWISE, outer with FULL
+    inner_model = SimpleMLP().to(DEVICE_TYPE)
+    piecewise_wrapper = CUDAGraphWrapper(
+        inner_model, vllm_config, CUDAGraphMode.PIECEWISE
+    )
+    inner_model.forward = MagicMock(wraps=inner_model.forward)
+    outer_model = SimpleMLP().to(DEVICE_TYPE)
+    # When outer model is called, it calls the piecewise_wrapper
+    outer_model.forward = MagicMock(
+        wraps=outer_model.forward, side_effect=piecewise_wrapper
+    )
+    full_wrapper = CUDAGraphWrapper(outer_model, vllm_config, CUDAGraphMode.FULL)
 
-        # 0. global warmup
-        with set_forward_context(
-            attn_metadata=None,
-            vllm_config=self.vllm_config,
-            cudagraph_runtime_mode=CUDAGraphMode.NONE,
-            batch_descriptor=None,
-        ):
-            full_wrapper(input_1)
-
-        # --- Test runtime mode FULL---
-        # Run with FULL mode context. Expect outer wrapper to capture.
-        # The inner mock should be called once inside the graph capture.
-        outer_model.forward.reset_mock()
-        inner_model.forward.reset_mock()
-        action = self._run_and_monitor_call(
-            full_wrapper, input_1, CUDAGraphMode.FULL, desc_1
-        )
-        assert action == "capture_global"
-        assert outer_model.forward.call_count == 1
-        assert inner_model.forward.call_count == 1
-
-        # Run again. Expect outer wrapper to replay.
-        # The outer model should NOT be called because the whole graph
-        # is replayed.
-        action = self._run_and_monitor_call(
-            full_wrapper, input_1, CUDAGraphMode.FULL, desc_1
-        )
-        assert action == "replay"
-        assert outer_model.forward.call_count == 1  # No new call
-        assert inner_model.forward.call_count == 1
-
-        # --- Test runtime mode PIECEWISE ---
-        outer_model.forward.reset_mock()
-        inner_model.forward.reset_mock()
-        # Run with PIECEWISE mode context.
-        # Expect outer wrapper to bypass and call inner wrapper.
-        # Inner wrapper should capture.
-        action = self._run_and_monitor_call(
-            full_wrapper, input_1, CUDAGraphMode.PIECEWISE, desc_1
-        )
-        assert action == "capture_global"
-        assert outer_model.forward.call_count == 1
-        assert inner_model.forward.call_count == 1
-
-        # Run again with PIECEWISE.
-        # Outer bypasses, inner replays.
-        action = self._run_and_monitor_call(
-            full_wrapper, input_1, CUDAGraphMode.PIECEWISE, desc_1
-        )
-        assert action == "bypass"
-        assert outer_model.forward.call_count == 2
-        assert inner_model.forward.call_count == 1
+    desc_1 = BatchDescriptor(num_tokens=1)
+
+    # 0. global warmup
+    with set_forward_context(
+        attn_metadata=None,
+        vllm_config=vllm_config,
+        cudagraph_runtime_mode=CUDAGraphMode.NONE,
+        batch_descriptor=None,
+    ):
+        full_wrapper(input_1)
+
+    # --- Test runtime mode FULL---
+    # Run with FULL mode context. Expect outer wrapper to capture.
+    # The inner mock should be called once inside the graph capture.
+    outer_model.forward.reset_mock()
+    inner_model.forward.reset_mock()
+    action = _run_and_monitor_call(
+        full_wrapper, input_1, CUDAGraphMode.FULL, desc_1, vllm_config
+    )
+    assert action == "capture_global"
+    assert outer_model.forward.call_count == 1
+    assert inner_model.forward.call_count == 1
+
+    # Run again. Expect outer wrapper to replay.
+    # The outer model should NOT be called because the whole graph
+    # is replayed.
+    action = _run_and_monitor_call(
+        full_wrapper, input_1, CUDAGraphMode.FULL, desc_1, vllm_config
+    )
+    assert action == "replay"
+    assert outer_model.forward.call_count == 1  # No new call
+    assert inner_model.forward.call_count == 1
+
+    # --- Test runtime mode PIECEWISE ---
+    outer_model.forward.reset_mock()
+    inner_model.forward.reset_mock()
+    # Run with PIECEWISE mode context.
+    # Expect outer wrapper to bypass and call inner wrapper.
+    # Inner wrapper should capture.
+    action = _run_and_monitor_call(
+        full_wrapper, input_1, CUDAGraphMode.PIECEWISE, desc_1, vllm_config
+    )
+    assert action == "capture_global"
+    assert outer_model.forward.call_count == 1
+    assert inner_model.forward.call_count == 1
+
+    # Run again with PIECEWISE.
+    # Outer bypasses, inner replays.
+    action = _run_and_monitor_call(
+        full_wrapper, input_1, CUDAGraphMode.PIECEWISE, desc_1, vllm_config
+    )
+    assert action == "bypass"
+    assert outer_model.forward.call_count == 2
+    assert inner_model.forward.call_count == 1
diff --git a/tests/v1/cudagraph/test_encoder_cudagraph.py b/tests/v1/cudagraph/test_encoder_cudagraph.py
index 322fcb3caa14..2ba140707ea1 100644
--- a/tests/v1/cudagraph/test_encoder_cudagraph.py
+++ b/tests/v1/cudagraph/test_encoder_cudagraph.py
@@ -6,30 +6,96 @@
   No GPU required:
     - TestFindBudgetGraph      — greedy budget selection logic
     - TestGetCumulativeStats   — hit/miss rate statistics
+    - TestGetInputModality     — modality routing from mm_kwargs keys
   GPU required:
     - TestEncoderCudaGraphCaptureReplay — capture, replay, fallback, counters, chunking
+    - TestEncoderCudaGraphVideoReplay   — video modality capture, replay
 """
 
 from typing import Any
 
 import pytest
 import torch
-from vllm.v1.worker.gpu.mm.encoder_cudagraph import (
+
+from vllm.model_executor.models.interfaces import SupportsEncoderCudaGraph
+from vllm.platforms import current_platform
+from vllm.v1.worker.encoder_cudagraph import (
     EncoderCudaGraphManager,
 )
-from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
+from vllm.v1.worker.encoder_cudagraph_defs import (
     EncoderCudaGraphCaptureInputs,
     EncoderCudaGraphConfig,
     EncoderCudaGraphReplayBuffers,
+    EncoderItemSpec,
 )
 
-from vllm.platforms import current_platform
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
 
+class _MockCompilationConfig:
+    """Minimal mock for VllmConfig.compilation_config."""
+
+    def __init__(
+        self,
+        token_budgets: list[int] | None = None,
+        max_mm_items: int = 0,
+    ):
+        self.encoder_cudagraph_token_budgets = token_budgets or []
+        self.encoder_cudagraph_max_vision_items_per_batch = max_mm_items
+        self.encoder_cudagraph_max_frames_per_batch = None
+
+
+class _MockMultimodalConfig:
+    mm_encoder_tp_mode = "replicate"
+
+    def get_limit_per_prompt(self, modality: str) -> int:
+        # Image-only mocks — return 0 for "video" to short-circuit the
+        # max_frames_per_batch branch, so tests don't need a video-frame mock.
+        return 0
+
+
+class _MockModelConfig:
+    multimodal_config = _MockMultimodalConfig()
+
+
+class _MockParallelConfig:
+    tensor_parallel_size = 1
+
+
+class _MockVllmConfig:
+    """Minimal mock for VllmConfig used in __init__ tests."""
+
+    def __init__(
+        self,
+        token_budgets: list[int] | None = None,
+        max_mm_items: int = 0,
+    ):
+        self.compilation_config = _MockCompilationConfig(token_budgets, max_mm_items)
+        self.model_config = _MockModelConfig()
+        self.parallel_config = _MockParallelConfig()
+
+
+class _MockModel(SupportsEncoderCudaGraph):
+    """Minimal mock implementing SupportsEncoderCudaGraph for __init__."""
+
+    def __init__(self, min_budget: int = 4, max_budget: int = 128):
+        self._min_budget = min_budget
+        self._max_budget = max_budget
+
+    def get_encoder_cudagraph_config(self) -> EncoderCudaGraphConfig:
+        return EncoderCudaGraphConfig(
+            modalities=["image"],
+            input_key_by_modality={"image": "pixel_values"},
+            buffer_keys=["dummy_buf"],
+            out_hidden_size=32,
+        )
+
+    def get_encoder_cudagraph_budget_range(self, vllm_config):
+        return (self._min_budget, self._max_budget)
+
+
 def _make_manager_with_budgets(budgets: list[int]) -> EncoderCudaGraphManager:
     """Create a minimal EncoderCudaGraphManager with only token_budgets set.
 
@@ -186,7 +252,7 @@ def _count_output_tokens(
     return sum(t * (h // m) * (w // m) for t, h, w in grid_thw_list)
 
 
-class SimpleMockViTModel(torch.nn.Module):
+class SimpleMockViTModel(torch.nn.Module, SupportsEncoderCudaGraph):
     """Minimal ViT model for CUDA graph tests.
 
     Implements the SupportsEncoderCudaGraph protocol by providing
@@ -194,8 +260,6 @@ class SimpleMockViTModel(torch.nn.Module):
     simulates spatial merge by averaging groups of m^2 patches.
     """
 
-    supports_encoder_cudagraph = True
-
     def __init__(self):
         super().__init__()
         self.proj = torch.nn.Linear(_FLAT, _HIDDEN)
@@ -205,11 +269,19 @@ def __init__(self):
     def get_encoder_cudagraph_config(self) -> EncoderCudaGraphConfig:
         return EncoderCudaGraphConfig(
             modalities=["image"],
-            input_key="pixel_values",
+            input_key_by_modality={
+                "image": "pixel_values",
+            },
             buffer_keys=["dummy_buf"],
             out_hidden_size=_HIDDEN,
         )
 
+    def get_input_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> str:
+        return "image"
+
     def get_encoder_cudagraph_budget_range(
         self,
         vllm_config,
@@ -217,24 +289,18 @@ def get_encoder_cudagraph_budget_range(
         # For tests: min=4, max=128 (small values for fast capture)
         return (4, 128)
 
-    def get_encoder_cudagraph_num_items(
-        self,
-        mm_kwargs: dict[str, Any],
-    ) -> int:
-        return len(mm_kwargs["image_grid_thw"])
-
-    def get_encoder_cudagraph_per_item_output_tokens(
+    def get_encoder_cudagraph_item_specs(
         self,
         mm_kwargs: dict[str, Any],
-    ) -> list[int]:
+    ) -> list[EncoderItemSpec]:
         m = _SPATIAL_MERGE
-        return [t * (h // m) * (w // m) for t, h, w in mm_kwargs["image_grid_thw"]]
-
-    def get_encoder_cudagraph_per_item_input_sizes(
-        self,
-        mm_kwargs: dict[str, Any],
-    ) -> list[int]:
-        return [t * h * w for t, h, w in mm_kwargs["image_grid_thw"]]
+        return [
+            EncoderItemSpec(
+                input_size=t * h * w,
+                output_tokens=t * (h // m) * (w // m),
+            )
+            for t, h, w in mm_kwargs["image_grid_thw"]
+        ]
 
     def select_encoder_cudagraph_items(
         self,
@@ -268,6 +334,7 @@ def prepare_encoder_cudagraph_capture_inputs(
         self,
         token_budget: int,
         max_batch_size: int,
+        max_frames_per_batch: int,
         device: torch.device,
         dtype: torch.dtype,
     ) -> EncoderCudaGraphCaptureInputs:
@@ -294,6 +361,7 @@ def prepare_encoder_cudagraph_replay_buffers(
         self,
         mm_kwargs: dict[str, Any],
         max_batch_size: int,
+        max_frames_per_batch: int,
     ) -> EncoderCudaGraphReplayBuffers:
         grid_thw = mm_kwargs["image_grid_thw"]
         n_out = _count_output_tokens(grid_thw, _SPATIAL_MERGE)
@@ -327,11 +395,16 @@ def _make_manager_for_gpu(
     max_batch_size: int,
     device: torch.device,
     dtype: torch.dtype,
+    *,
+    max_frames_per_batch: int | None = None,
 ) -> EncoderCudaGraphManager:
     """Create EncoderCudaGraphManager bypassing VllmConfig for GPU tests."""
     mgr = object.__new__(EncoderCudaGraphManager)
     mgr.token_budgets = sorted(token_budgets)
     mgr.max_batch_size = max_batch_size
+    mgr.max_frames_per_batch = (
+        max_frames_per_batch if max_frames_per_batch is not None else max_batch_size * 2
+    )
     mgr.use_dp = False
     mgr.budget_graphs = {}
     mgr.graph_hits = 0
@@ -366,6 +439,18 @@ def _make_mm_kwargs(
     }
 
 
+def _make_video_mm_kwargs(
+    grid_thw_list: list[list[int]],
+    device: torch.device,
+    dtype: torch.dtype,
+) -> dict[str, Any]:
+    """Create video mm_kwargs (pixel_values_videos / video_grid_thw) for testing."""
+    return {
+        "pixel_values_videos": _make_pixel_values(grid_thw_list, device, dtype),
+        "video_grid_thw": grid_thw_list,
+    }
+
+
 # ---------------------------------------------------------------------------
 # GPU tests — capture, replay, fallback, counters, chunking
 # ---------------------------------------------------------------------------
@@ -449,3 +534,399 @@ def test_chunking_when_images_exceed_max_batch(self):
         assert len(result) == n_images
         for out in result:
             assert out.shape == (4, _HIDDEN)
+
+
+# ---------------------------------------------------------------------------
+# SimpleMockViTVideoModel — extends SimpleMockViTModel with video support
+# ---------------------------------------------------------------------------
+
+
+class SimpleMockViTVideoModel(SimpleMockViTModel):
+    """ViT mock that supports both image and video modalities.
+
+    Reuses SimpleMockViTModel's NN weights and _forward() logic.
+    Only the protocol methods that are key-dependent are overridden.
+    """
+
+    def get_encoder_cudagraph_config(self) -> EncoderCudaGraphConfig:
+        return EncoderCudaGraphConfig(
+            modalities=["image", "video"],
+            input_key_by_modality={
+                "image": "pixel_values",
+                "video": "pixel_values_videos",
+            },
+            buffer_keys=["dummy_buf"],
+            out_hidden_size=_HIDDEN,
+        )
+
+    def get_input_modality(self, mm_kwargs: dict[str, Any]) -> str:
+        return "video" if "video_grid_thw" in mm_kwargs else "image"
+
+    # ------------------------------------------------------------------
+    # Private helpers — route to the correct mm_kwargs keys
+    # ------------------------------------------------------------------
+
+    def _get_grid_thw(self, mm_kwargs: dict[str, Any]) -> list[list[int]]:
+        key = (
+            "video_grid_thw"
+            if self.get_input_modality(mm_kwargs) == "video"
+            else "image_grid_thw"
+        )
+        return mm_kwargs[key]
+
+    def _get_pixel_values(self, mm_kwargs: dict[str, Any]) -> torch.Tensor:
+        key = (
+            "pixel_values_videos"
+            if self.get_input_modality(mm_kwargs) == "video"
+            else "pixel_values"
+        )
+        return mm_kwargs[key]
+
+    # ------------------------------------------------------------------
+    # Protocol overrides that depend on modality keys
+    # ------------------------------------------------------------------
+
+    def get_encoder_cudagraph_item_specs(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> list[EncoderItemSpec]:
+        m = _SPATIAL_MERGE
+        return [
+            EncoderItemSpec(
+                input_size=t * h * w,
+                output_tokens=t * (h // m) * (w // m),
+            )
+            for t, h, w in self._get_grid_thw(mm_kwargs)
+        ]
+
+    def select_encoder_cudagraph_items(
+        self, mm_kwargs: dict[str, Any], indices: list[int]
+    ) -> dict[str, Any]:
+        modality = self.get_input_modality(mm_kwargs)
+        pv_key = "pixel_values_videos" if modality == "video" else "pixel_values"
+        grid_key = "video_grid_thw" if modality == "video" else "image_grid_thw"
+
+        grid_thw = self._get_grid_thw(mm_kwargs)
+        pixel_values = self._get_pixel_values(mm_kwargs)
+
+        if len(indices) == 0:
+            return {pv_key: pixel_values[:0], grid_key: []}
+
+        patches_per_item = [t * h * w for t, h, w in grid_thw]
+        cum_patches = [0]
+        for p in patches_per_item:
+            cum_patches.append(cum_patches[-1] + p)
+
+        selected_pv = torch.cat(
+            [pixel_values[cum_patches[i] : cum_patches[i + 1]] for i in indices]
+        )
+        return {pv_key: selected_pv, grid_key: [grid_thw[i] for i in indices]}
+
+    def prepare_encoder_cudagraph_capture_inputs(
+        self,
+        token_budget: int,
+        max_batch_size: int,
+        max_frames_per_batch: int,
+        device: torch.device,
+        dtype: torch.dtype,
+    ) -> EncoderCudaGraphCaptureInputs:
+        per_item_output = token_budget // max_batch_size
+        frames_per_item = max_frames_per_batch // max_batch_size
+        if frames_per_item > 1:
+            # Video-format capture: size cu_seqlens for T frames per item.
+            tokens_per_frame = (
+                per_item_output + frames_per_item - 1
+            ) // frames_per_item
+            grid_config = [
+                [frames_per_item, _SPATIAL_MERGE, tokens_per_frame * _SPATIAL_MERGE]
+                for _ in range(max_batch_size)
+            ]
+        else:
+            grid_config = [
+                [1, _SPATIAL_MERGE, per_item_output * _SPATIAL_MERGE]
+                for _ in range(max_batch_size)
+            ]
+        total_patches = _count_input_patches(grid_config)
+        # Use pixel_values (image key) for capture — same patch shape as video.
+        dummy_pixel_values = torch.randn(
+            total_patches, _FLAT, device=device, dtype=dtype
+        )
+        n_out = _count_output_tokens(grid_config, _SPATIAL_MERGE)
+        dummy_buf = torch.zeros(n_out, _HIDDEN, device=device, dtype=dtype)
+        return EncoderCudaGraphCaptureInputs(
+            mm_kwargs={
+                "pixel_values": dummy_pixel_values,
+                "image_grid_thw": grid_config,
+            },
+            buffers={"dummy_buf": dummy_buf},
+        )
+
+    def prepare_encoder_cudagraph_replay_buffers(
+        self,
+        mm_kwargs: dict[str, Any],
+        max_batch_size: int,
+        max_frames_per_batch: int,
+    ) -> EncoderCudaGraphReplayBuffers:
+        n_out = _count_output_tokens(self._get_grid_thw(mm_kwargs), _SPATIAL_MERGE)
+        p = next(self.parameters())
+        dummy_buf = torch.zeros(n_out, _HIDDEN, device=p.device, dtype=p.dtype)
+        return EncoderCudaGraphReplayBuffers(buffers={"dummy_buf": dummy_buf})
+
+    def encoder_cudagraph_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+        buffers: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        return self._forward(self._get_pixel_values(mm_kwargs))
+
+    def encoder_eager_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> torch.Tensor:
+        return self._forward(self._get_pixel_values(mm_kwargs))
+
+
+# ---------------------------------------------------------------------------
+# No-GPU tests — get_input_modality routing
+# ---------------------------------------------------------------------------
+
+
+class TestGetInputModality:
+    """get_input_modality returns correct modality based on mm_kwargs keys."""
+
+    def test_image_only_model_always_returns_image(self):
+        model = SimpleMockViTModel()
+        mm_kwargs = {
+            "pixel_values": torch.zeros(1, _FLAT),
+            "image_grid_thw": [[1, 4, 4]],
+        }
+        assert model.get_input_modality(mm_kwargs) == "image"
+
+    def test_video_model_returns_image_for_image_kwargs(self):
+        model = SimpleMockViTVideoModel()
+        mm_kwargs = {
+            "pixel_values": torch.zeros(1, _FLAT),
+            "image_grid_thw": [[1, 4, 4]],
+        }
+        assert model.get_input_modality(mm_kwargs) == "image"
+
+    def test_video_model_returns_video_for_video_kwargs(self):
+        model = SimpleMockViTVideoModel()
+        mm_kwargs = {
+            "pixel_values_videos": torch.zeros(8, _FLAT),
+            "video_grid_thw": [[2, 4, 4]],
+        }
+        assert model.get_input_modality(mm_kwargs) == "video"
+
+    def test_video_model_config_has_both_modalities(self):
+        model = SimpleMockViTVideoModel()
+        cfg = model.get_encoder_cudagraph_config()
+        assert "image" in cfg.modalities
+        assert "video" in cfg.modalities
+        assert cfg.input_key_by_modality["image"] == "pixel_values"
+        assert cfg.input_key_by_modality["video"] == "pixel_values_videos"
+
+
+# ---------------------------------------------------------------------------
+# GPU tests — video capture, replay, fallback, and mixed image+video
+# ---------------------------------------------------------------------------
+
+_VIDEO_MAX_BATCH = 4
+_VIDEO_MAX_FRAMES = 8  # 2 frames per item at max_batch_size=4
+
+
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
+class TestEncoderCudaGraphVideoReplay:
+    def setup_method(self):
+        self.device = torch.device("cuda:0")
+        self.dtype = torch.float16
+        self.model = SimpleMockViTVideoModel().to(self.device).half()
+        self.mgr = _make_manager_for_gpu(
+            self.model,
+            _BUDGETS,
+            _VIDEO_MAX_BATCH,
+            self.device,
+            self.dtype,
+            max_frames_per_batch=_VIDEO_MAX_FRAMES,
+        )
+        self.mgr.capture()
+
+    # --- capture ---
+
+    def test_capture_creates_one_graph_per_budget(self):
+        assert len(self.mgr.budget_graphs) == len(_BUDGETS)
+        assert set(self.mgr.budget_graphs.keys()) == set(_BUDGETS)
+
+    # --- output shape ---
+
+    def test_video_execute_returns_one_tensor_per_video(self):
+        # T=2, 4x4 → 2*(4//2)*(4//2) = 8 tokens per video
+        grid_thw = [[2, 4, 4], [2, 4, 4]]
+        mm_kwargs = _make_video_mm_kwargs(grid_thw, self.device, self.dtype)
+        result = self.mgr.execute(mm_kwargs)
+        assert result is not None
+        assert len(result) == 2
+
+    def test_video_output_tokens_per_item(self):
+        # T=2,4x4 → 8 tokens; T=1,4x4 → 4 tokens
+        grid_thw = [[2, 4, 4], [1, 4, 4]]
+        mm_kwargs = _make_video_mm_kwargs(grid_thw, self.device, self.dtype)
+        result = self.mgr.execute(mm_kwargs)
+        assert result is not None
+        assert result[0].shape == (8, _HIDDEN)
+        assert result[1].shape == (4, _HIDDEN)
+
+    # --- budget fallback ---
+
+    def test_video_eager_fallback_when_tokens_exceed_all_budgets(self):
+        # T=2, 18x18 → 2*(18//2)*(18//2) = 162 tokens > max budget 64
+        grid_thw = [[2, 18, 18]]
+        mm_kwargs = _make_video_mm_kwargs(grid_thw, self.device, self.dtype)
+        result = self.mgr.execute(mm_kwargs)
+        assert result is not None
+        assert len(result) == 1
+        assert result[0].shape == (162, _HIDDEN)
+        assert self.mgr.graph_misses == 1
+
+    # --- counters ---
+
+    def test_video_hit_counter_increments_by_num_videos(self):
+        grid_thw = [[2, 4, 4], [1, 4, 4]]
+        mm_kwargs = _make_video_mm_kwargs(grid_thw, self.device, self.dtype)
+        self.mgr.execute(mm_kwargs)
+        assert self.mgr.graph_hits == 2
+
+    def test_video_miss_counter_increments_for_oversized_video(self):
+        grid_thw = [[2, 18, 18]]  # 162 tokens > 64
+        mm_kwargs = _make_video_mm_kwargs(grid_thw, self.device, self.dtype)
+        self.mgr.execute(mm_kwargs)
+        assert self.mgr.graph_misses == 1
+
+    # --- image and video sharing the same manager ---
+
+    def test_image_and_video_share_manager(self):
+        """Image and video inputs can both be executed through the same manager."""
+        img_grid = [[1, 4, 4], [1, 4, 4]]
+        img_result = self.mgr.execute(
+            _make_mm_kwargs(img_grid, self.device, self.dtype)
+        )
+
+        vid_grid = [[2, 4, 4]]
+        vid_result = self.mgr.execute(
+            _make_video_mm_kwargs(vid_grid, self.device, self.dtype)
+        )
+
+        assert len(img_result) == 2
+        assert len(vid_result) == 1
+        assert img_result[0].shape == (4, _HIDDEN)
+        assert vid_result[0].shape == (8, _HIDDEN)
+
+
+# ---------------------------------------------------------------------------
+# __init__ invariant validation tests (no GPU required)
+# ---------------------------------------------------------------------------
+
+
+class TestInitInvariantValidation:
+    """Ensure max_batch_size <= min(token_budgets) for all config paths."""
+
+    def _make_mgr(
+        self,
+        token_budgets=None,
+        max_mm_items=0,
+        min_budget=4,
+        max_budget=128,
+    ):
+        vllm_config = _MockVllmConfig(token_budgets, max_mm_items)
+        model = _MockModel(min_budget, max_budget)
+        return EncoderCudaGraphManager(
+            vllm_config=vllm_config,
+            device=torch.device("cpu"),
+            dtype=torch.float32,
+            model=model,
+        )
+
+    # --- Finding 1: fully auto-inferred ---
+
+    def test_auto_inferred_invariant_holds(self):
+        mgr = self._make_mgr(min_budget=64, max_budget=16384)
+        assert mgr.max_batch_size <= min(mgr.token_budgets)
+
+    def test_auto_inferred_small_range(self):
+        mgr = self._make_mgr(min_budget=4, max_budget=128)
+        assert mgr.max_batch_size <= min(mgr.token_budgets)
+
+    # --- Finding 2: fully user-specified, bad combo ---
+
+    def test_user_specified_bad_combo_raises(self):
+        with pytest.raises(ValueError, match="must be <= smallest token budget"):
+            self._make_mgr(token_budgets=[64], max_mm_items=256)
+
+    def test_user_specified_valid_combo(self):
+        mgr = self._make_mgr(token_budgets=[64, 128], max_mm_items=32)
+        assert mgr.max_batch_size == 32
+        assert mgr.token_budgets == [64, 128]
+
+    def test_user_specified_exact_boundary(self):
+        # max_mm_items == min(budgets) is OK (per_image_output = 1)
+        mgr = self._make_mgr(token_budgets=[64, 128], max_mm_items=64)
+        assert mgr.max_batch_size == 64
+
+    # --- Finding 3: user provides only max_mm_items ---
+
+    def test_user_max_mm_items_only_adjusts_budgets(self):
+        # model min_budget=64, user max_mm_items=128 → budgets start at 128
+        mgr = self._make_mgr(max_mm_items=128, min_budget=64, max_budget=16384)
+        assert mgr.max_batch_size == 128
+        assert min(mgr.token_budgets) >= 128
+
+    def test_user_max_mm_items_smaller_than_min_budget(self):
+        # max_mm_items=2, model min=4 → budgets start at 4 (>= 2), OK
+        mgr = self._make_mgr(max_mm_items=2, min_budget=4, max_budget=128)
+        assert mgr.max_batch_size == 2
+        assert min(mgr.token_budgets) >= 2
+
+    # --- Finding 4: user provides only budgets ---
+
+    def test_user_budgets_only_caps_max_batch_size(self):
+        # user budgets start at 32, model min_budget=64
+        # without fix: max_batch_size = min(128//64, 64) = 2 → OK
+        # but if user budgets=[16, 64]:
+        # without fix: max_batch_size = min(128//4, 4) = 4 > 16? No.
+        # Let's use a case that triggers it:
+        # model min=64, max=16384 → max_budget//min_budget = 256
+        # user budgets=[32, 64] → min = 32
+        # without fix: max_batch_size = min(256, 64) = 64 > 32 → BUG
+        # with fix: max_batch_size = min(256, 32) = 32 → OK
+        mgr = self._make_mgr(token_budgets=[32, 64], min_budget=64, max_budget=16384)
+        assert mgr.max_batch_size <= min(mgr.token_budgets)
+        assert mgr.max_batch_size == 32
+
+    # --- Finding 5/6: bad model budget range ---
+
+    def test_zero_min_budget_raises(self):
+        with pytest.raises(ValueError, match="Both must be positive"):
+            self._make_mgr(min_budget=0, max_budget=128)
+
+    def test_negative_max_budget_raises(self):
+        with pytest.raises(ValueError, match="Both must be positive"):
+            self._make_mgr(min_budget=4, max_budget=-1)
+
+    def test_min_greater_than_max_raises(self):
+        with pytest.raises(ValueError, match="min_budget=200 > max_budget=100"):
+            self._make_mgr(min_budget=200, max_budget=100)
+
+    # --- Finding 7: user-provided budgets with non-positive values ---
+
+    def test_user_budgets_zero_raises(self):
+        """Non-positive budgets should be caught at config validation."""
+        from vllm.config.compilation import CompilationConfig
+
+        with pytest.raises(ValueError, match="must be positive"):
+            CompilationConfig(encoder_cudagraph_token_budgets=[0, 128])
+
+    def test_user_budgets_negative_raises(self):
+        from vllm.config.compilation import CompilationConfig
+
+        with pytest.raises(ValueError, match="must be positive"):
+            CompilationConfig(encoder_cudagraph_token_budgets=[-1, 64])
diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py
index 6465985f0e9f..415c7d5f3f26 100644
--- a/tests/v1/determinism/test_batch_invariance.py
+++ b/tests/v1/determinism/test_batch_invariance.py
@@ -11,15 +11,12 @@
     TEST_MODEL,
     _extract_step_logprobs,
     _random_prompt,
-    is_device_capability_below_90,
     skip_unsupported,
 )
 
 import vllm.envs as envs
 from vllm import LLM, SamplingParams
 
-IS_DEVICE_CAPABILITY_BELOW_90 = is_device_capability_below_90()
-
 
 @skip_unsupported
 @pytest.mark.timeout(1000)
@@ -36,10 +33,10 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
     using the high-level v1 LLM() API only (no manual batching).
 
     Strategy:
-    - Create two LLM engines with identical config except max_num_seqs: 1 vs N.
-    - Compute a baseline output for the needle prompt with the bs=1 engine.
-    - For many trials, generate a batch (size N) where the needle appears at a
-      random position among random filler prompts using the bs=N engine.
+    - Create a single LLM engine configured for the larger batch limit (N).
+    - Compute a baseline output for the needle prompt when it is run alone.
+    - For many trials, generate a mixed batch (size N) where the needle appears
+      at a random position among random filler prompts using the same engine.
     - Track how many trials match vs mismatch, and report totals at the end.
       The test fails if any mismatches occur, but we still dump pass/fail
       counts.
@@ -65,7 +62,7 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
     assert max_batch_size >= 2, "Batch size should be >= 2 to mix needle."
 
     # Keep GPU memory usage low to avoid startup allocation failures.
-    gpu_mem_util = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION", "0.4"))
+    gpu_mem_util = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION", "0.5"))
     max_model_len = int(os.getenv("VLLM_MAX_MODEL_LEN", "5120"))
 
     # Sampling parameters: longer outputs with a more random-sounding
@@ -83,11 +80,9 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
 
     needle_prompt = "There once was a "
 
-    llm_bs1 = None
-    llm_bsN = None
+    llm = None
     try:
-        # Engine with bs=1 behavior
-        llm_bs1 = LLM_with_max_seqs(
+        llm = LLM_with_max_seqs(
             model=model,
             max_num_seqs=max_batch_size,
             gpu_memory_utilization=gpu_mem_util,
@@ -96,20 +91,11 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
         )
 
         # Baseline generation for the needle prompt alone.
-        baseline_out = llm_bs1.generate([needle_prompt], sampling)
+        baseline_out = llm.generate([needle_prompt], sampling)
         assert len(baseline_out) == 1
         assert len(baseline_out[0].outputs) >= 1
         baseline_text = baseline_out[0].outputs[0].text
 
-        # Engine with larger batch limit (e.g., 64)
-        llm_bsN = LLM_with_max_seqs(
-            model=model,
-            max_num_seqs=max_batch_size,
-            gpu_memory_utilization=gpu_mem_util,
-            max_model_len=max_model_len,
-            attention_config=attention_config,
-        )
-
         mismatches = 0
 
         for trial in range(num_trials):
@@ -124,8 +110,8 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
                 else:
                     prompts.append(_random_prompt(min_random_prompt, max_random_prompt))
 
-            # Generate with the larger-batch engine
-            outputs = llm_bsN.generate(prompts, sampling)
+            # Generate with the same engine but in a larger batch.
+            outputs = llm.generate(prompts, sampling)
             # Find the needle output by position
             needle_output = outputs[needle_pos]
             assert needle_output.prompt == needle_prompt
@@ -151,12 +137,9 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
 
     finally:
         # Ensure engines are shutdown to free GPU/VRAM across test sessions
-        if llm_bs1 is not None:
-            with contextlib.suppress(Exception):
-                llm_bs1.shutdown()
-        if llm_bsN is not None:
+        if llm is not None:
             with contextlib.suppress(Exception):
-                llm_bsN.shutdown()
+                llm.shutdown()
 
 
 @skip_unsupported
@@ -164,8 +147,14 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
     "backend",
     BACKENDS,
 )
+@pytest.mark.parametrize(
+    "block_m,block_n",
+    [(16, 16), (8, 16)],
+)
 def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
     backend,
+    block_m,
+    block_n,
 ):
     seed = int(os.getenv("VLLM_TEST_SEED", "12345"))
     random.seed(seed)
@@ -187,10 +176,13 @@ def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
         tensor_parallel_size=tp_size,
         max_num_seqs=128,
         max_model_len=8192,
-        dtype="bfloat16",  # not everything is supported
+        dtype="auto",  # not everything is supported
         gpu_memory_utilization=0.9,
-        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
-        attention_config={"backend": backend},
+        attention_config={
+            "backend": backend,
+            "flex_attn_block_m": block_m,
+            "flex_attn_block_n": block_n,
+        },
     )
 
     # Use more realistic prompts for better token generation
@@ -400,9 +392,8 @@ def test_simple_generation(backend):
         tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")),
         gpu_memory_utilization=0.9,
         max_model_len=2048,
-        dtype="bfloat16",
+        dtype="auto",
         enable_prefix_caching=False,
-        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
         attention_config={"backend": backend},
     )
 
@@ -466,8 +457,7 @@ def test_logprobs_without_batch_invariance_should_fail(
         tensor_parallel_size=tp_size,
         max_num_seqs=32,
         max_model_len=8192,
-        dtype="bfloat16",
-        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
+        dtype="auto",
         attention_config={"backend": backend},
     )
 
@@ -686,8 +676,7 @@ def test_decode_logprobs_match_prefill_logprobs(
         tensor_parallel_size=tp_size,
         max_num_seqs=32,
         max_model_len=8192,
-        dtype="bfloat16",
-        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
+        dtype="auto",
         attention_config={"backend": backend},
     )
 
@@ -931,10 +920,9 @@ def LLM_with_max_seqs(
         max_num_seqs=max_num_seqs,
         gpu_memory_utilization=gpu_memory_utilization,
         max_model_len=max_model_len,
-        dtype="bfloat16",
+        dtype="auto",
         tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")),
         enable_prefix_caching=False,
-        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
         attention_config=attention_config,
         # Enable for MOE models
         # enable_expert_parallel=True,
diff --git a/tests/v1/determinism/test_cutlass_batch_invariance.py b/tests/v1/determinism/test_cutlass_batch_invariance.py
new file mode 100644
index 000000000000..6df3f0a5f089
--- /dev/null
+++ b/tests/v1/determinism/test_cutlass_batch_invariance.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+import vllm.envs as envs
+from tests.utils import TestFP8Layer, requires_fp8
+from vllm.model_executor.kernels.linear.scaled_mm.cutlass import (
+    CutlassFP8ScaledMMLinearKernel,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8DynamicTokenSym,
+    kFp8StaticTensorSym,
+)
+from vllm.platforms import current_platform
+
+pytest.importorskip("torch.cuda")
+
+
+@pytest.fixture(autouse=True)
+def setup_cuda():
+    if not current_platform.is_cuda():
+        pytest.skip("CUTLASS FP8 kernels require CUDA.")
+    torch.set_default_device("cuda")
+
+
+@requires_fp8
+@pytest.mark.parametrize("weight_shape", [(1024, 2048), (4608, 4096)])
+@pytest.mark.parametrize("batch_size", [1, 16, 17, 32, 64, 65, 256, 257])
+@torch.inference_mode()
+def test_cutlass_fp8_batch_invariant_fixed_config(
+    weight_shape: tuple[int, int],
+    batch_size: int,
+    default_vllm_config,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1")
+    monkeypatch.setattr(envs, "VLLM_BATCH_INVARIANT", True)
+
+    torch.manual_seed(0)
+    layer = TestFP8Layer(
+        weight_shape=weight_shape,
+        activation_quant_key=kFp8DynamicTokenSym,
+        weight_quant_key=kFp8StaticTensorSym,
+        input_dtype=torch.bfloat16,
+        out_dtype=torch.bfloat16,
+        device=torch.device("cuda"),
+        force_kernel=CutlassFP8ScaledMMLinearKernel,
+    )
+    assert isinstance(layer.kernel, CutlassFP8ScaledMMLinearKernel)
+
+    in_features = weight_shape[1]
+    needle = torch.randn((1, in_features), device="cuda", dtype=torch.bfloat16)
+    baseline = layer(needle)[0]
+
+    filler = torch.randn(
+        (max(batch_size - 1, 0), in_features), device="cuda", dtype=torch.bfloat16
+    )
+
+    front_batch = torch.cat([needle, filler], dim=0)
+    back_batch = torch.cat([filler, needle], dim=0)
+
+    front_output = layer(front_batch)[0]
+    back_output = layer(back_batch)[-1]
+
+    torch.testing.assert_close(front_output, baseline, rtol=0, atol=0)
+    torch.testing.assert_close(back_output, baseline, rtol=0, atol=0)
diff --git a/tests/v1/determinism/test_matmul_batch_invariant.py b/tests/v1/determinism/test_matmul_batch_invariant.py
new file mode 100644
index 000000000000..8838b6be2705
--- /dev/null
+++ b/tests/v1/determinism/test_matmul_batch_invariant.py
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Test batch-invariant matmul against torch.matmul for various shape combinations.
+
+Tests correctness (matches torch.matmul) and batch invariance (result for one
+item doesn't change based on other items in the batch).
+"""
+
+import pytest
+import torch
+from utils import skip_unsupported
+
+from vllm.model_executor.layers.batch_invariant import matmul_batch_invariant
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
+
+
+@skip_unsupported
+@pytest.mark.parametrize(
+    "a_shape,b_shape",
+    [
+        # 2D x 2D
+        ((32, 64), (64, 16)),
+        # 2D x 3D
+        ((64, 16), (4, 16, 32)),
+        # 3D x 2D
+        ((4, 32, 64), (64, 16)),
+        # 4D x 2D
+        ((1, 4, 32, 64), (64, 16)),
+        # 3D x 3D
+        ((4, 32, 64), (4, 64, 16)),
+        # 3D x 4D
+        ((2, 32, 64), (1, 2, 64, 16)),
+        # 4D x 3D (Gemma4 pattern)
+        ((1, 2, 32, 64), (2, 64, 16)),
+        # 4D x 4D
+        ((1, 2, 32, 64), (4, 2, 64, 16)),
+        # 2D x 4D
+        ((32, 64), (1, 2, 64, 16)),
+        # 2D x 5D
+        ((32, 64), (1, 2, 2, 64, 16)),
+        # 5D x 2D
+        ((1, 2, 2, 32, 64), (64, 16)),
+        # 5D x 5D
+        ((1, 2, 4, 32, 64), (1, 2, 4, 64, 16)),
+    ],
+)
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
+def test_matmul_correctness(a_shape, b_shape, dtype):
+    """
+    Compare matmul_batch_invariant against torch.matmul for various shapes.
+    """
+    device = torch.device(DEVICE_TYPE)
+
+    torch.manual_seed(42)
+    a = torch.rand(a_shape, dtype=dtype, device=device)
+    b = torch.rand(b_shape, dtype=dtype, device=device)
+
+    # Standard implementation (CUDA ops)
+    standard_output = torch.matmul(a, b)
+
+    # Batch-invariant implementation (Triton)
+    triton_output = matmul_batch_invariant(a, b)
+
+    # Compare outputs
+    # Use looser tolerance for bfloat16 due to its lower precision
+    if dtype == torch.bfloat16:
+        rtol, atol = 1e-1, 1e-1  # 10% relative tolerance for bfloat16
+    else:
+        rtol, atol = 1e-2, 1e-2  # 1% for float16/float32
+
+    torch.testing.assert_close(
+        triton_output,
+        standard_output,
+        rtol=rtol,
+        atol=atol,
+        msg=f"matmul mismatch for a ndim={a.ndim}, b ndim={b.ndim},",
+    )
+
+
+@skip_unsupported
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
+def test_matmul_batch_invariance(dtype):
+    """
+    Verify that the result for one item is bitwise identical regardless
+    of what other items are in the batch.
+    """
+
+    device = torch.device(DEVICE_TYPE)
+
+    torch.manual_seed(42)
+    a_single = torch.rand((1, 64, 32), dtype=dtype, device=device)
+    b = torch.rand((32, 128), dtype=dtype, device=device)
+
+    standard_output = matmul_batch_invariant(a_single, b)
+
+    a_batch = torch.rand((8, 64, 32), dtype=dtype, device=device)
+    a_batch[3] = a_single[0]
+
+    batch_output = matmul_batch_invariant(a_batch, b)
+    batch_output_a = batch_output[3]
+
+    assert torch.equal(standard_output[0], batch_output_a)
diff --git a/tests/v1/determinism/test_nvfp4_batch_invariant.py b/tests/v1/determinism/test_nvfp4_batch_invariant.py
new file mode 100644
index 000000000000..d7a1c9e84042
--- /dev/null
+++ b/tests/v1/determinism/test_nvfp4_batch_invariant.py
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import contextlib
+import os
+import random
+
+import pytest
+import torch
+from utils import (
+    _extract_step_logprobs,
+    _random_prompt,
+    skip_unsupported,
+)
+
+from vllm import LLM, SamplingParams
+
+pytestmark = pytest.mark.skipif(
+    not hasattr(torch, "float8_e4m3fn"),
+    reason="NVFP4 tests require torch.float8_e4m3fn support.",
+)
+
+NVFP4_TEST_MODEL = os.getenv(
+    "VLLM_TEST_NVFP4_MODEL", "nm-testing/TinyLlama-1.1B-Chat-v1.0-NVFP4"
+)
+
+
+def _make_llm(max_num_seqs: int, backend: str) -> LLM:
+    return LLM(
+        model=NVFP4_TEST_MODEL,
+        max_num_seqs=max_num_seqs,
+        gpu_memory_utilization=float(
+            os.getenv("VLLM_NVFP4_TEST_GPU_MEMORY_UTILIZATION", "0.05")
+        ),
+        max_model_len=int(os.getenv("VLLM_NVFP4_TEST_MAX_MODEL_LEN", "2048")),
+        dtype="auto",
+        tensor_parallel_size=int(os.getenv("VLLM_NVFP4_TEST_TP_SIZE", "1")),
+        enable_prefix_caching=False,
+        enforce_eager=True,
+        attention_config={"backend": backend},
+    )
+
+
+@skip_unsupported
+@pytest.mark.parametrize("backend", ["FLASH_ATTN"])
+def test_dense_nvfp4_generation_is_deterministic_across_batch_sizes_e2e(backend):
+    seed = int(os.getenv("VLLM_TEST_SEED", "12345"))
+    random.seed(seed)
+
+    num_trials = int(os.getenv("VLLM_NVFP4_NEEDLE_TRIALS", "2"))
+    max_batch_size = int(os.getenv("VLLM_NVFP4_NEEDLE_BATCH_SIZE", "8"))
+    min_random_prompt = int(os.getenv("VLLM_NVFP4_MIN_PROMPT", "32"))
+    max_random_prompt = int(os.getenv("VLLM_NVFP4_MAX_PROMPT", "96"))
+    assert max_batch_size >= 2, "Batch size should be >= 2 to test invariance."
+
+    sampling = SamplingParams(
+        temperature=float(os.getenv("VLLM_NVFP4_NEEDLE_TEMPERATURE", "0.6")),
+        top_p=float(os.getenv("VLLM_NVFP4_NEEDLE_TOP_P", "0.95")),
+        max_tokens=int(os.getenv("VLLM_NVFP4_NEEDLE_MAX_TOKENS", "16")),
+        seed=20240919,
+        logprobs=5,
+    )
+    needle_prompt = "Write one factual sentence about the moon."
+
+    llm = None
+    baseline_completion = None
+    baseline_logprobs = None
+    try:
+        llm = _make_llm(max_num_seqs=max_batch_size, backend=backend)
+        baseline_output = llm.generate([needle_prompt], sampling, use_tqdm=False)[0]
+        baseline_completion = baseline_output.outputs[0]
+        baseline_logprobs, baseline_token_ids = _extract_step_logprobs(baseline_output)
+        assert baseline_logprobs is not None
+        assert baseline_token_ids is not None
+        for _ in range(num_trials):
+            batch_size = random.randint(max_batch_size // 2, max_batch_size)
+            needle_pos = random.randint(0, batch_size - 1)
+            prompts: list[str] = []
+            for idx in range(batch_size):
+                if idx == needle_pos:
+                    prompts.append(needle_prompt)
+                else:
+                    prompts.append(_random_prompt(min_random_prompt, max_random_prompt))
+
+            outputs = llm.generate(prompts, sampling, use_tqdm=False)
+            needle_output = outputs[needle_pos]
+            needle_completion = needle_output.outputs[0]
+            needle_logprobs, needle_token_ids = _extract_step_logprobs(needle_output)
+            assert needle_logprobs is not None
+            assert needle_token_ids is not None
+
+            assert needle_output.prompt == needle_prompt
+            assert baseline_completion is not None
+            assert baseline_logprobs is not None
+            assert needle_completion.token_ids == baseline_completion.token_ids
+            assert needle_completion.text == baseline_completion.text
+            torch.testing.assert_close(needle_logprobs, baseline_logprobs)
+    finally:
+        if llm is not None:
+            with contextlib.suppress(Exception):
+                llm.shutdown()
diff --git a/tests/v1/determinism/test_nvfp4_batch_invariant_scaled_mm.py b/tests/v1/determinism/test_nvfp4_batch_invariant_scaled_mm.py
new file mode 100644
index 000000000000..16bb26093a13
--- /dev/null
+++ b/tests/v1/determinism/test_nvfp4_batch_invariant_scaled_mm.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NVFP4 CUTLASS GEMM tests that require ``VLLM_BATCH_INVARIANT=1``.
+
+Must run in a **fresh** pytest process:
+
+    pytest tests/v1/determinism/test_nvfp4_batch_invariant_scaled_mm.py -v
+
+Do not share a session with ``tests/kernels/quantization/test_nvfp4_scaled_mm.py``:
+the native code caches whether batch invariance is enabled on the first GEMM, and
+if ``VLLM_BATCH_INVARIANT`` was not set at that moment, it stays disabled for the
+rest of the process.
+"""
+
+import os
+
+import pytest
+import torch
+
+from tests.kernels.quantization.nvfp4_utils import get_nvfp4_global_scale
+from vllm import _custom_ops as ops
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+
+if not current_platform.has_device_capability(100):
+    pytest.skip(
+        reason="Nvfp4 Requires compute capability of 10 or above.",
+        allow_module_level=True,
+    )
+
+DTYPES = [torch.float16, torch.bfloat16]
+SHAPES = [(128, 128, 64), (128, 128, 128), (256, 128, 64), (128, 256, 128)]
+PAD_SHAPES = [(150, 128, 64), (128, 128, 96)]
+SHAPES.extend(PAD_SHAPES)
+
+
+CONSISTENCY_SHAPES = [
+    (256, 128, 4096),
+    (512, 256, 4096),
+    (256, 256, 2048),
+    (241, 160, 2048),
+    (401, 352, 1984),
+    (333, 320, 1008),
+    (287, 96, 4096),
+]
+
+
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("shape", CONSISTENCY_SHAPES)
+@torch.inference_mode()
+def test_nvfp4_gemm_batch_invariance(
+    dtype: torch.dtype,
+    shape: tuple[int, int, int],
+) -> None:
+    """Batch invariance: each row of a full-``M`` GEMM matches its ``M=1`` counterpart.
+
+    For row ``i``, compares ``cutlass_scaled_fp4_mm`` run once over all ``M``
+    rows against a separate call with ``A`` sliced to ``a_dtype[i : i+1]``.
+    Catches kernels whose reduction or scheduling depends on ``M`` or adjacent
+    rows.
+    """
+    seed = int(os.getenv("VLLM_TEST_SEED", "12345"))
+    set_random_seed(seed)
+    m, n, packed_k = shape
+    k = packed_k * 2  # real K (FP4 elements)
+
+    a_dtype = torch.randn((m, k), dtype=dtype, device="cuda")
+    b_dtype = torch.randn((n, k), dtype=dtype, device="cuda")
+
+    a_global_scale = get_nvfp4_global_scale(a_dtype)
+    b_global_scale = get_nvfp4_global_scale(b_dtype)
+    alpha = 1.0 / (a_global_scale * b_global_scale)
+
+    b_fp4, b_scale_interleaved = ops.scaled_fp4_quant(b_dtype, b_global_scale)
+
+    a_fp4_full, a_sf_full = ops.scaled_fp4_quant(a_dtype, a_global_scale)
+    out_full = ops.cutlass_scaled_fp4_mm(
+        a_fp4_full,
+        b_fp4,
+        a_sf_full,
+        b_scale_interleaved,
+        alpha,
+        dtype,
+    )
+
+    for i in range(m):
+        a_row = a_dtype[i : i + 1]
+        a_fp4_row, a_sf_row = ops.scaled_fp4_quant(a_row, a_global_scale)
+        out_row = ops.cutlass_scaled_fp4_mm(
+            a_fp4_row,
+            b_fp4,
+            a_sf_row,
+            b_scale_interleaved,
+            alpha,
+            dtype,
+        )
+
+        assert torch.equal(out_full[i], out_row[0]), (
+            f"VLLM_BATCH_INVARIANT: row {i} differs between M={m} and M=1: "
+            f"max_abs_diff={(out_full[i] - out_row[0]).abs().max().item()}"
+        )
diff --git a/tests/v1/determinism/test_rms_norm_batch_invariant.py b/tests/v1/determinism/test_rms_norm_batch_invariant.py
index 5e5b40d09c23..2e9f77881273 100644
--- a/tests/v1/determinism/test_rms_norm_batch_invariant.py
+++ b/tests/v1/determinism/test_rms_norm_batch_invariant.py
@@ -13,6 +13,9 @@
 
 from vllm.model_executor.layers.batch_invariant import rms_norm as triton_rms_norm
 from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.platforms import current_platform
+
+DEVICE_TYPE = current_platform.device_type
 
 
 @skip_unsupported
@@ -34,7 +37,7 @@ def test_rms_norm_batch_invariant_vs_standard(
     equivalent results to the standard CUDA implementation across various
     configurations.
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
 
     # Create test input and weight
     torch.manual_seed(42)
@@ -68,6 +71,99 @@ def test_rms_norm_batch_invariant_vs_standard(
     )
 
 
+@skip_unsupported
+@pytest.mark.parametrize("hidden_size", [512, 4096])
+@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
+@pytest.mark.parametrize("eps", [1e-6])
+def test_fused_add_rms_norm_batch_invariant_residual_path(
+    hidden_size: int,
+    dtype: torch.dtype,
+    eps: float,
+):
+    """
+    Test the batch-invariant fused residual-add + RMSNorm helper directly.
+    """
+    device = torch.device(DEVICE_TYPE)
+
+    torch.manual_seed(42)
+    x_single = torch.randn(1, hidden_size, dtype=dtype, device=device)
+    residual_single = torch.randn(1, hidden_size, dtype=dtype, device=device)
+    weight = torch.randn(hidden_size, dtype=dtype, device=device)
+
+    x_batch = torch.cat(
+        [
+            x_single,
+            torch.randn(3, hidden_size, dtype=dtype, device=device),
+        ],
+        dim=0,
+    )
+    residual_batch = torch.cat(
+        [
+            residual_single,
+            torch.randn(3, hidden_size, dtype=dtype, device=device),
+        ],
+        dim=0,
+    )
+
+    def fused_add_rms_norm(x, residual, w, e) -> tuple[torch.Tensor, torch.Tensor]:
+        import vllm._custom_ops as ops
+
+        ops.fused_add_rms_norm(x, residual, w, e)
+        return x, residual
+
+    out_single, residual_out_single = fused_add_rms_norm(
+        x_single.clone(),
+        residual_single.clone(),
+        weight,
+        eps,
+    )
+    out_batch, residual_out_batch = fused_add_rms_norm(
+        x_batch.clone(),
+        residual_batch.clone(),
+        weight,
+        eps,
+    )
+
+    merged_single = x_single + residual_single
+    ref_out = triton_rms_norm(merged_single, weight, eps=eps)
+
+    torch.testing.assert_close(
+        residual_out_single,
+        merged_single,
+        rtol=0.0,
+        atol=0.0,
+        msg="Residual output should equal x + residual exactly",
+    )
+    torch.testing.assert_close(
+        residual_out_batch[:1],
+        merged_single,
+        rtol=0.0,
+        atol=0.0,
+        msg="Residual output should be batch invariant",
+    )
+    torch.testing.assert_close(
+        out_single,
+        out_batch[:1],
+        rtol=0.0,
+        atol=0.0,
+        msg="Fused add RMSNorm output should be batch invariant",
+    )
+
+    if dtype == torch.bfloat16:
+        rtol, atol = 1e-1, 1e-1
+    else:
+        rtol, atol = 1e-2, 1e-2
+
+    torch.testing.assert_close(
+        out_single,
+        ref_out,
+        rtol=rtol,
+        atol=atol,
+        msg="Fused add RMSNorm output should stay numerically close to the "
+        "batch-invariant RMSNorm reference",
+    )
+
+
 @skip_unsupported
 @pytest.mark.parametrize("batch_size", [1, 16, 128])
 @pytest.mark.parametrize("seq_len", [1, 32, 512])
@@ -81,7 +177,7 @@ def test_rms_norm_3d_input(
     Ensures that the batch-invariant RMS norm correctly handles multi-dimensional
     inputs that are common in transformer models.
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.bfloat16
     eps = 1e-6
 
@@ -120,7 +216,7 @@ def test_rms_norm_numerical_stability(default_vllm_config):
     Ensures that both implementations handle edge cases like very small or large
     values without producing NaN or Inf.
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.float16
     eps = 1e-6
     hidden_size = 2048
@@ -179,7 +275,7 @@ def test_rms_norm_formula(default_vllm_config):
 
     Verifies: output = input / sqrt(mean(input^2) + eps) * weight
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.float32  # Use float32 for higher precision in formula check
     eps = 1e-6
     hidden_size = 1024
@@ -214,7 +310,7 @@ def test_rms_norm_different_hidden_sizes(default_vllm_config, hidden_size: int):
     The Triton kernel uses a fixed BLOCK_SIZE=1024, so this tests that it
     correctly handles hidden sizes both smaller and larger than the block size.
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.bfloat16
     eps = 1e-6
     batch_size = 16
@@ -251,7 +347,7 @@ def test_rms_norm_determinism(default_vllm_config):
     Runs the same input through the kernel multiple times and verifies
     identical outputs.
     """
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     dtype = torch.bfloat16
     eps = 1e-6
     hidden_size = 4096
@@ -283,7 +379,7 @@ def test_rms_norm_determinism(default_vllm_config):
     # Run a quick smoke test
     print("Running quick smoke test of RMS norm implementations...")
 
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     batch_size = 8
     hidden_size = 4096
     dtype = torch.bfloat16
diff --git a/tests/v1/determinism/utils.py b/tests/v1/determinism/utils.py
index f9bebec98619..bbef61477232 100644
--- a/tests/v1/determinism/utils.py
+++ b/tests/v1/determinism/utils.py
@@ -26,6 +26,7 @@
 BACKENDS: list[str] = [
     "FLASH_ATTN",
     "TRITON_ATTN",
+    "FLEX_ATTENTION",
 ]
 
 # FlashInfer temporarily disabled due to invariant CTA sizes.
diff --git a/tests/v1/distributed/test_async_llm_dp.py b/tests/v1/distributed/test_async_llm_dp.py
index 1b7739d2f071..70a5136a57ce 100644
--- a/tests/v1/distributed/test_async_llm_dp.py
+++ b/tests/v1/distributed/test_async_llm_dp.py
@@ -365,10 +365,13 @@ async def test_dp_pause_keep_race_staggered_engines():
         async def staggered_pause_keep(method: str, *args) -> Any:
             if method != "pause_scheduler" or not args or args[0] != "keep":
                 return await original_call_utility(method, *args)
-            # Send pause(keep) to engine 0 first
-            await client._call_utility_async(
-                method, *args, engine=client.core_engines[0]
+            # Fire pause(keep) to engine 0 (don't await — with DP
+            # two-phase pause, consensus requires all ranks).
+            pause_0 = asyncio.create_task(
+                client._call_utility_async(method, *args, engine=client.core_engines[0])
             )
+            # Let the event loop send the message to engine 0.
+            await asyncio.sleep(0.5)
             # In the middle: send two requests (race window)
             sp = SamplingParams(max_tokens=5, ignore_eos=True)
 
@@ -384,11 +387,13 @@ async def consume_gen(req_id: str) -> None:
             t2 = asyncio.create_task(consume_gen("race-2"))
             mid_pause_tasks.extend([t1, t2])
             await asyncio.sleep(3)
-            # Then send pause(keep) to engine 1
-            result = await client._call_utility_async(
-                method, *args, engine=client.core_engines[1]
+            # Fire pause(keep) to engine 1, then await both so
+            # consensus can be reached.
+            pause_1 = asyncio.create_task(
+                client._call_utility_async(method, *args, engine=client.core_engines[1])
             )
-            return result
+            results = await asyncio.gather(pause_0, pause_1)
+            return results[0]
 
         client.call_utility_async = staggered_pause_keep
 
@@ -398,3 +403,113 @@ async def consume_gen(req_id: str) -> None:
         assert not await engine.is_paused()
         # Let the two requests we sent mid-pause complete
         await asyncio.gather(*mid_pause_tasks)
+
+
+@pytest.mark.asyncio
+async def test_dp_pause_barrier_request_deadlock():
+    """
+    Test that start_dp_wave is ignored while paused.
+
+    Sequence:
+      1. Pause all engines (PAUSED_ALL).
+      2. Send barrier to engine 0 only — blocks in dist.barrier(dp_group).
+      3. Send a request routed to engine 1.
+      4. Wait for any (buggy) START_DP_WAVE propagation.
+      5. Send barrier to engine 1 — completes in fixed code, deadlocks
+         in buggy code because engine 1 is stuck in EP all-to-all.
+    """
+    if DP_SIZE != 2:
+        pytest.skip("requires DP_SIZE=2")
+
+    with ExitStack() as after:
+        engine_args = _get_dp_pause_engine_args(expert_parallel=True)
+        engine = AsyncLLM.from_engine_args(engine_args)
+        after.callback(engine.shutdown)
+
+        client = engine.engine_core
+
+        # Cache get_supported_tasks so that generate() won't need to
+        # send a utility call to all engines (which would hang once
+        # engine 0 is blocked in the barrier).
+        await engine.get_supported_tasks()
+
+        # Pause all engines normally — no staggering.
+        await engine.pause_generation(mode="keep")
+        assert await engine.is_paused()
+
+        original_call_utility = client.call_utility_async
+        mid_barrier_tasks: list[asyncio.Task] = []
+
+        async def staggered_barrier(method: str, *args) -> Any:
+            if method != "barrier":
+                return await original_call_utility(method, *args)
+
+            # Send barrier to engine 0 only — it blocks in
+            # dist.barrier(dp_group) waiting for engine 1.
+            barrier_0 = asyncio.create_task(
+                client._call_utility_async(method, *args, engine=client.core_engines[0])
+            )
+            await asyncio.sleep(1)
+
+            # While engine 0 is blocked, send a request routed
+            # specifically to engine 1.
+            sp = SamplingParams(max_tokens=5, ignore_eos=True)
+
+            engine_1 = client.core_engines[1]
+            original_get_engine = client.get_core_engine_for_request
+
+            def route_to_engine_1(req):
+                client.reqs_in_flight[req.request_id] = engine_1
+                return engine_1
+
+            client.get_core_engine_for_request = route_to_engine_1
+
+            async def consume_gen(req_id: str) -> None:
+                async for _ in engine.generate(
+                    request_id=req_id,
+                    prompt=DP_PAUSE_PROMPT,
+                    sampling_params=sp,
+                ):
+                    pass
+
+            t1 = asyncio.create_task(consume_gen("race-1"))
+            mid_barrier_tasks.append(t1)
+
+            # Yield so generate() preprocessing completes and
+            # add_request_async is called (which, in buggy code,
+            # would send FIRST_REQ and wake engine 1).
+            for _ in range(200):
+                await asyncio.sleep(0)
+
+            client.get_core_engine_for_request = original_get_engine
+
+            # Wait for any START_DP_WAVE to propagate and for
+            # engine 1 to potentially enter execute_dummy_batch.
+            await asyncio.sleep(5)
+
+            # Now send barrier to engine 1.  In buggy code engine 1
+            # is stuck in execute_dummy_batch (EP all-to-all) while
+            # engine 0 is stuck in dist.barrier(dp_group) — deadlock.
+            result = await client._call_utility_async(
+                method, *args, engine=client.core_engines[1]
+            )
+            await barrier_0
+            return result
+
+        client.call_utility_async = staggered_barrier
+
+        # Drive the staggered barrier.  Old code deadlocks here.
+        try:
+            await asyncio.wait_for(client.call_utility_async("barrier"), timeout=30)
+        except asyncio.TimeoutError:
+            for t in mid_barrier_tasks:
+                t.cancel()
+            pytest.fail(
+                "Staggered barrier deadlocked — FIRST_REQ sent while "
+                "paused caused collective-op mismatch between engines"
+            )
+
+        await engine.resume_generation()
+        assert not await engine.is_paused()
+        # Let the two requests we sent mid-barrier complete.
+        await asyncio.gather(*mid_barrier_tasks)
diff --git a/tests/v1/distributed/test_eagle_dp.py b/tests/v1/distributed/test_eagle_dp.py
index e20893b63632..019f9c6f213d 100644
--- a/tests/v1/distributed/test_eagle_dp.py
+++ b/tests/v1/distributed/test_eagle_dp.py
@@ -20,17 +20,22 @@
 else:
     ATTN_BACKENDS = ["FLASH_ATTN"]
 
+# On SM<90 (e.g., L4), batch invariance does not support CUDA graphs.
+# See https://github.com/vllm-project/vllm/pull/30018 and
+# tests/v1/determinism/utils.py for the documented limitation.
+IS_DEVICE_CAPABILITY_BELOW_90 = not current_platform.has_device_capability(90)
+
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("attn_backend", ATTN_BACKENDS)
 @pytest.mark.xfail(
     current_platform.is_rocm(),
-    reason="Test may fail on ROCm until batch invariance is enabled."
+    reason="Test may fail on ROCm until batch invariance is enabled. "
     "See: https://github.com/vllm-project/vllm/issues/27433",
     strict=False,
 )
 async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
-    if not current_platform.is_rocm():
+    if not current_platform.is_rocm() and not current_platform.is_xpu():
         # This test checks that running a model with and without eagle
         # leads to identical tokens.
         #
@@ -50,7 +55,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
     engine_args = AsyncEngineArgs(
         model=target_model,
         tokenizer_mode="auto",
-        enforce_eager=False,
+        enforce_eager=IS_DEVICE_CAPABILITY_BELOW_90,
         tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
         data_parallel_size=DP_SIZE,
         data_parallel_backend="mp",  # ray takes more time
@@ -69,9 +74,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
     )
 
     prompt = "This is a test of data parallel with eagle"
-    # This test might be flaky, see
-    # https://github.com/vllm-project/vllm/issues/31913
-    num_expected_tokens = 20
+    num_expected_tokens = 100
     sampling_params = SamplingParams(
         max_tokens=num_expected_tokens,
         ignore_eos=True,
diff --git a/tests/v1/distributed/test_external_lb_dp.py b/tests/v1/distributed/test_external_lb_dp.py
index 912f8cffe7f6..06e8e574a05d 100644
--- a/tests/v1/distributed/test_external_lb_dp.py
+++ b/tests/v1/distributed/test_external_lb_dp.py
@@ -14,7 +14,7 @@
 from tests.utils import RemoteOpenAIServer
 from vllm.platforms import current_platform
 
-MODEL_NAME = "ibm-research/PowerMoE-3b"
+MODEL_NAME = os.getenv("MODEL_NAME", "ibm-research/PowerMoE-3b")
 
 # Number of data parallel ranks for external LB testing
 DP_SIZE = int(os.getenv("DP_SIZE", "2"))
@@ -111,11 +111,12 @@ def start_server(r: int, sargs: list[str]):
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Stop all server instances."""
-        while self.servers:
-            try:
-                self.servers.pop()[0].__exit__(exc_type, exc_val, exc_tb)
-            except Exception as e:
-                print(f"Error stopping server: {e}")
+        servers = [s for s, _ in self.servers]
+        self.servers.clear()
+        try:
+            RemoteOpenAIServer.shutdown_many(servers)
+        except Exception as e:
+            print(f"Error stopping servers: {e}")
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/v1/distributed/test_hybrid_lb_dp.py b/tests/v1/distributed/test_hybrid_lb_dp.py
index aa25130752a4..fcd3c69af54a 100644
--- a/tests/v1/distributed/test_hybrid_lb_dp.py
+++ b/tests/v1/distributed/test_hybrid_lb_dp.py
@@ -134,11 +134,12 @@ def start_server(node: int, sargs: list[str]):
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Stop all server instances."""
-        while self.servers:
-            try:
-                self.servers.pop()[0].__exit__(exc_type, exc_val, exc_tb)
-            except Exception as e:
-                print(f"Error stopping server: {e}")
+        servers = [s for s, _ in self.servers]
+        self.servers.clear()
+        try:
+            RemoteOpenAIServer.shutdown_many(servers)
+        except Exception as e:
+            print(f"Error stopping servers: {e}")
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/v1/distributed/test_internal_lb_dp.py b/tests/v1/distributed/test_internal_lb_dp.py
index efd9fc607dbb..31859ffba4c9 100644
--- a/tests/v1/distributed/test_internal_lb_dp.py
+++ b/tests/v1/distributed/test_internal_lb_dp.py
@@ -228,13 +228,13 @@ def start_server(sidx: int, r: int, sargs: list[str]):
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Stop all server instances."""
-        while self.servers:
-            if server := self.servers.pop():
-                try:
-                    server[0].__exit__(exc_type, exc_val, exc_tb)
-                except Exception as e:
-                    print(f"Error stopping server: {e}")
-                    traceback.print_exc()
+        servers = [entry[0] for entry in self.servers if entry is not None]
+        self.servers.clear()
+        try:
+            RemoteOpenAIServer.shutdown_many(servers)
+        except Exception as e:
+            print(f"Error stopping servers: {e}")
+            traceback.print_exc()
 
 
 class APIOnlyServerManager:
@@ -370,13 +370,13 @@ def start_engines_server():
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Stop both server instances."""
-        while self.servers:
-            if server := self.servers.pop():
-                try:
-                    server[0].__exit__(exc_type, exc_val, exc_tb)
-                except Exception as e:
-                    print(f"Error stopping server: {e}")
-                    traceback.print_exc()
+        servers = [entry[0] for entry in self.servers if entry is not None]
+        self.servers.clear()
+        try:
+            RemoteOpenAIServer.shutdown_many(servers)
+        except Exception as e:
+            print(f"Error stopping servers: {e}")
+            traceback.print_exc()
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/v1/e2e/general/test_async_scheduling.py b/tests/v1/e2e/general/test_async_scheduling.py
index 8e1eddb0f64e..22a6c799c79f 100644
--- a/tests/v1/e2e/general/test_async_scheduling.py
+++ b/tests/v1/e2e/general/test_async_scheduling.py
@@ -57,6 +57,8 @@ def test_without_spec_decoding(
         dict(bad_words=["the", " the"]),
         dict(logprobs=2),
         dict(logprobs=2, frequency_penalty=-1.0),
+        dict(prompt_logprobs=2),
+        dict(prompt_logprobs=2, logprobs=2),
         dict(structured_outputs=struct_outputs),
         dict(
             structured_outputs=struct_outputs,
@@ -126,6 +128,8 @@ def test_with_eagle3_spec_decoding(sample_json_schema, monkeypatch: pytest.Monke
         dict(bad_words=["the", " the"]),
         dict(logprobs=2),
         dict(logprobs=2, frequency_penalty=-1.0),
+        dict(prompt_logprobs=2),
+        dict(prompt_logprobs=2, logprobs=2),
         dict(structured_outputs=struct_outputs),
         dict(
             structured_outputs=struct_outputs,
@@ -324,10 +328,13 @@ def run_test(
 ):
     spec_decoding = spec_config is not None
     cache_arg: dict[str, Any] = (
-        # Force preemptions
-        dict(num_gpu_blocks_override=32)
+        # Force preemptions: with 32 blocks the cache holds at most a single
+        # max-length request, so the ~34 concurrent prompts contend and trigger
+        # preemption. (Prompts here are << max_model_len, so dropping
+        # max_model_len from 4096 to 512 doesn't change generation behavior.)
+        dict(num_gpu_blocks_override=32, max_model_len=512)
         if test_preemption
-        else dict(gpu_memory_utilization=0.9)
+        else dict(gpu_memory_utilization=0.9, max_model_len=4096)
     )
     spec_mml = (spec_config or {}).get("max_model_len")
     spec_method = (spec_config or {}).get("method", "none")
@@ -343,7 +350,6 @@ def run_test(
 
     with VllmRunner(
         model,
-        max_model_len=4096,
         enable_chunked_prefill=test_prefill_chunking,
         # Force prefill chunking
         max_num_batched_tokens=48 if test_prefill_chunking else None,
@@ -411,14 +417,19 @@ def _all_logprobs_match(req_a, req_b) -> bool:
     )
 
 
-def _logprobs_match(lps_a: dict[int, Logprob], lps_b: dict[int, Logprob]) -> bool:
+def _logprobs_match(
+    lps_a: dict[int, Logprob] | None,
+    lps_b: dict[int, Logprob] | None,
+) -> bool:
+    if lps_a is None or lps_b is None:
+        return lps_a is lps_b
     rel_tol, abs_tol = 1e-3, 1e-6
     return (
         len(lps_a) == len(lps_b)
         and lps_a.keys() == lps_b.keys()
         and all(
             a.decoded_token == b.decoded_token
-            and a.rank == b.rank
+            and a.rank == pytest.approx(b.rank, rel=0.005)
             and a.logprob == pytest.approx(b.logprob, rel=rel_tol, abs=abs_tol)
             for a, b in ((lps_a[x], lps_b[x]) for x in lps_a)
         )
diff --git a/tests/v1/e2e/general/test_context_length.py b/tests/v1/e2e/general/test_context_length.py
index 0ac40bec35fe..c9dc8354fa1a 100644
--- a/tests/v1/e2e/general/test_context_length.py
+++ b/tests/v1/e2e/general/test_context_length.py
@@ -15,6 +15,7 @@
 
 from tests.conftest import VllmRunner
 from tests.utils import create_new_process_for_each_test
+from vllm.exceptions import VLLMValidationError
 
 
 @create_new_process_for_each_test()
@@ -61,3 +62,42 @@ def test_decoder_max_context_length_validation(
             with pytest.raises(ValueError) as excinfo:
                 vllm_model.generate_greedy(prompt_ids, max_tokens)
             assert expected_msg in str(excinfo.value)
+
+
+@create_new_process_for_each_test()
+@pytest.mark.parametrize("model", ["JackFram/llama-160m"])
+def test_auto_fit_max_model_len_rejects_oversized_input(
+    model: str,
+    vllm_runner: type[VllmRunner],
+) -> None:
+    """When max_model_len='auto' and KV cache memory is very limited,
+    the engine auto-fits max_model_len to a small value. The frontend
+    must see this reduced value and reject prompts that exceed it,
+    rather than accepting them and hanging."""
+
+    # Use a tiny KV cache budget to force auto-fit to a very small
+    # max_model_len (e.g. ~16 tokens).
+    kv_cache_bytes = 1_000_000  # 1 MB
+
+    with vllm_runner(
+        model_name=model,
+        max_model_len=-1,
+        max_num_seqs=1,
+        enforce_eager=True,
+        kv_cache_memory_bytes=kv_cache_bytes,
+        load_format="dummy",
+    ) as vllm_model:
+        auto_fitted_len = (
+            vllm_model.llm.llm_engine.vllm_config.model_config.max_model_len
+        )
+        # Sanity check: auto-fit should have reduced it well below the
+        # model's native context length.
+        assert auto_fitted_len < 2048, (
+            f"Expected auto-fit to reduce max_model_len significantly, "
+            f"but got {auto_fitted_len}"
+        )
+
+        # A prompt longer than the auto-fitted length must be rejected.
+        oversized_prompt = [[43] * (auto_fitted_len + 10)]
+        with pytest.raises(VLLMValidationError, match="Please reduce the length"):
+            vllm_model.generate_greedy(oversized_prompt, max_tokens=4)
diff --git a/tests/v1/e2e/general/test_mamba_prefix_cache.py b/tests/v1/e2e/general/test_mamba_prefix_cache.py
index 747c5defebd6..8cd2e89f5e98 100644
--- a/tests/v1/e2e/general/test_mamba_prefix_cache.py
+++ b/tests/v1/e2e/general/test_mamba_prefix_cache.py
@@ -16,6 +16,7 @@
 from vllm.config import CacheConfig
 from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.model_executor.layers.mamba.mamba_utils import MambaStateCopyFunc
+from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
 from vllm.v1.core.kv_cache_manager import KVCacheBlocks, KVCacheManager
@@ -48,6 +49,7 @@ class StepAction:
 prompt_token_ids: list[int] = []
 MODEL = "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
 BLOCK_SIZE = 560
+DEVICE_TYPE = current_platform.device_type
 NUM_HIDDEN_LAYERS = 1
 cur_step_action_idx = 0
 cur_step_action: StepAction | None = None
@@ -71,7 +73,7 @@ def fake_sample_fn(
             return SamplerOutput(
                 sampled_token_ids=torch.tensor(
                     [[prompt_token_ids[first_token_id_index]]],
-                    device="cuda",
+                    device=DEVICE_TYPE,
                     dtype=torch.int32,
                 ),
                 logprobs_tensors=None,
@@ -83,7 +85,9 @@ def fake_sample_fn(
         sampled_token_ids = accepted_tokens
         return SamplerOutput(
             sampled_token_ids=torch.tensor(
-                [sampled_token_ids], device="cuda", dtype=torch.int32
+                [sampled_token_ids],
+                device=DEVICE_TYPE,
+                dtype=torch.int32,
             ),
             logprobs_tensors=None,
         )
@@ -128,17 +132,23 @@ def fake_propose_draft_token_ids_fn(
                 - 1
                 + num_accepted_tokens
             ],
-            device="cuda",
+            device=DEVICE_TYPE,
             dtype=torch.int32,
         )
 
         valid_sampled_tokens_count = torch.tensor(
-            [num_accepted_tokens], device="cuda", dtype=torch.int32
+            [num_accepted_tokens],
+            device=DEVICE_TYPE,
+            dtype=torch.int32,
         )
 
         self._copy_valid_sampled_token_count(next_token_ids, valid_sampled_tokens_count)
 
-        return torch.tensor(proposed_draft_token_ids, device="cuda", dtype=torch.int32)
+        return torch.tensor(
+            proposed_draft_token_ids,
+            device=DEVICE_TYPE,
+            dtype=torch.int32,
+        )
 
     return fake_propose_draft_token_ids_fn
 
@@ -169,6 +179,7 @@ def fake_allocate_slots_fn(
         num_external_computed_tokens: int = 0,
         delay_cache_blocks: bool = False,
         num_encoder_tokens: int = 0,
+        full_sequence_must_fit: bool = False,
     ):
         ret = original_allocate_slots_fn(
             self,
@@ -180,6 +191,7 @@ def fake_allocate_slots_fn(
             num_external_computed_tokens,
             delay_cache_blocks,
             num_encoder_tokens,
+            full_sequence_must_fit,
         )
         if cur_step_action is not None:
             cur_block_ids = self.coordinator.single_type_managers[0].req_to_blocks[
@@ -281,7 +293,6 @@ def fake_execute_model_fn(
 
 def get_fake_process_mamba_fn(
     original_preprocess_mamba_fn: Callable,
-    original_post_process_mamba_fn: Callable,
     original_copy_fn: Callable,
 ):
     copy_info: tuple[list[int], list[int], list[int]] | None = None
@@ -349,37 +360,6 @@ def fake_preprocess_mamba_fn(
             )
         return ret
 
-    def fake_post_process_mamba_fn(
-        scheduler_output: SchedulerOutput,
-        kv_cache_config: KVCacheConfig,
-        input_batch: GPUInputBatch,
-        requests: dict[str, CachedRequestState],
-        mamba_state_idx: dict[str, int],
-        forward_context: dict[str, Any],
-        mamba_state_copy_funcs: tuple[MambaStateCopyFunc, ...],
-        copy_bufs: mamba_utils.MambaCopyBuffers,
-    ):
-        nonlocal copy_info
-        copy_info = None
-        ret = original_post_process_mamba_fn(
-            scheduler_output,
-            kv_cache_config,
-            input_batch,
-            requests,
-            mamba_state_idx,
-            forward_context,
-            mamba_state_copy_funcs,
-            copy_bufs,
-        )
-        if cur_step_action is not None:
-            check_copy_info(
-                cur_step_action.postprocess_copy_idx,
-                kv_cache_config,
-                forward_context,
-                input_batch,
-            )
-        return ret
-
     def fake_copy_fn(copy_bufs: mamba_utils.MambaCopyBuffers):
         nonlocal copy_info
         assert copy_info is None
@@ -390,7 +370,7 @@ def fake_copy_fn(copy_bufs: mamba_utils.MambaCopyBuffers):
         copy_info = (src_state_list, dest_state_list, num_elements_list)
         return original_copy_fn(copy_bufs)
 
-    return fake_preprocess_mamba_fn, fake_post_process_mamba_fn, fake_copy_fn
+    return fake_preprocess_mamba_fn, fake_copy_fn
 
 
 def run_ref_mamba_state_in_subprocess() -> None:
@@ -502,15 +482,11 @@ def apply_patch(monkeypatch: pytest.MonkeyPatch):
     fake_allocate_slots_fn = get_fake_allocate_slots_fn(KVCacheManager.allocate_slots)
     monkeypatch.setattr(KVCacheManager, "allocate_slots", fake_allocate_slots_fn)
 
-    fake_preprocess_mamba_fn, fake_post_process_mamba_fn, fake_copy_fn = (
-        get_fake_process_mamba_fn(
-            mamba_utils.preprocess_mamba,
-            mamba_utils.postprocess_mamba,
-            mamba_utils.do_mamba_copy_block,
-        )
+    fake_preprocess_mamba_fn, fake_copy_fn = get_fake_process_mamba_fn(
+        mamba_utils.preprocess_mamba,
+        mamba_utils.do_mamba_copy_block,
     )
     monkeypatch.setattr(mamba_utils, "preprocess_mamba", fake_preprocess_mamba_fn)
-    monkeypatch.setattr(mamba_utils, "postprocess_mamba", fake_post_process_mamba_fn)
     monkeypatch.setattr(mamba_utils, "do_mamba_copy_block", fake_copy_fn)
 
 
diff --git a/tests/v1/e2e/spec_decode/test_async_spec_decode.py b/tests/v1/e2e/spec_decode/test_async_spec_decode.py
index 726e9d89d67f..b19f90e2cdc6 100644
--- a/tests/v1/e2e/spec_decode/test_async_spec_decode.py
+++ b/tests/v1/e2e/spec_decode/test_async_spec_decode.py
@@ -82,6 +82,13 @@ def assert_no_sync(self, msg: str = ""):
         2,
         id="eagle-mla-deepseek",
     ),
+    pytest.param(
+        "Qwen/Qwen3.5-0.8B-Base",
+        "Qwen/Qwen3.5-0.8B-Base",
+        "mtp",
+        1,
+        id="mtp-qwen3_5-hybrid",
+    ),
 ]
 
 
@@ -104,6 +111,14 @@ def test_no_sync_with_spec_decode(
     from vllm import LLM, SamplingParams
     from vllm.distributed import cleanup_dist_env_and_memory
 
+    # Qwen3.5 is a VLM; without this, profile_run runs the ViT warmup
+    # and peaks well above the 18GB MIG slice used by one of the CI lanes.
+    # This test only exercises text generation, so the vision tower is
+    # never needed.
+    extra_kwargs: dict = {}
+    if "Qwen3.5" in model:
+        extra_kwargs["limit_mm_per_prompt"] = {"image": 0, "video": 0}
+
     llm = LLM(
         model=model,
         max_model_len=256,
@@ -114,6 +129,12 @@ def test_no_sync_with_spec_decode(
         },
         enforce_eager=True,
         async_scheduling=True,
+        **extra_kwargs,
+    )
+
+    # Assert async scheduling is actually active before running inference.
+    assert llm.llm_engine.vllm_config.scheduler_config.async_scheduling, (
+        f"Expected async_scheduling=True for spec decode, got False. method={method}"
     )
 
     outputs = llm.generate(
diff --git a/tests/v1/e2e/spec_decode/test_lora_with_spec_decode.py b/tests/v1/e2e/spec_decode/test_lora_with_spec_decode.py
index 5cbdc4123237..5decfbc00862 100644
--- a/tests/v1/e2e/spec_decode/test_lora_with_spec_decode.py
+++ b/tests/v1/e2e/spec_decode/test_lora_with_spec_decode.py
@@ -5,9 +5,6 @@
 1. test lora with speculative decoding for batch inference
 """
 
-import random
-
-import numpy as np
 import pytest
 import torch
 
@@ -15,6 +12,7 @@
 from vllm.distributed import cleanup_dist_env_and_memory
 from vllm.lora.request import LoRARequest
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
 
 LORA_TEST_PROMPT_MAP: dict[str, str] = {}
 
@@ -63,15 +61,18 @@ def test_batch_inference_correctness(
     with monkeypatch.context() as m:
         # Disable randomness
         m.setenv("CUBLAS_WORKSPACE_CONFIG", ":4096:8")
-        torch.manual_seed(SEED)
-        np.random.seed(SEED)
-        random.seed(SEED)
-        torch.cuda.manual_seed_all(SEED)
+        set_random_seed(SEED)
         torch.backends.cudnn.benchmark = False
         torch.backends.cudnn.deterministic = True
 
         method, model_name, spec_model_name, lora_path, tp_size = model_setup
 
+        prompts = [LORA_TEST_PROMPT_MAP[lora_path]] * 100
+        lora_request = LoRARequest("adapter", 1, lora_path)
+        sampling_params = SamplingParams(
+            temperature=0.0, top_p=1.0, top_k=-1, seed=SEED, max_tokens=128
+        )
+
         # without speculative decoding
         ref_llm = LLM(
             model=model_name,
@@ -84,19 +85,14 @@ def test_batch_inference_correctness(
             max_cpu_loras=1,
             max_lora_rank=16,
         )
-
-        prompts = [LORA_TEST_PROMPT_MAP[lora_path]] * 100
-        lora_request = LoRARequest("adapter", 1, lora_path)
-        sampling_params = SamplingParams(
-            temperature=0.0, top_p=1.0, top_k=-1, seed=SEED, max_tokens=128
-        )
-
-        ref_outputs = ref_llm.generate(
-            prompts, sampling_params, lora_request=lora_request
-        )
-        del ref_llm
-        torch.accelerator.empty_cache()
-        cleanup_dist_env_and_memory()
+        try:
+            ref_outputs = ref_llm.generate(
+                prompts, sampling_params, lora_request=lora_request
+            )
+        finally:
+            del ref_llm
+            torch.accelerator.empty_cache()
+            cleanup_dist_env_and_memory()
 
         lora_spec_llm = LLM(
             model=model_name,
@@ -115,25 +111,29 @@ def test_batch_inference_correctness(
             max_cpu_loras=1,
             max_lora_rank=16,
         )
-
-        lora_spec_outputs = lora_spec_llm.generate(
-            prompts, sampling_params, lora_request=lora_request
-        )
-
-        matches = 0
-        misses = 0
-        for ref_output, spec_output in zip(ref_outputs, lora_spec_outputs):
-            if ref_output.outputs[0].text == spec_output.outputs[0].text:
-                matches += 1
-            else:
-                misses += 1
-                print(f"ref_output: {ref_output.outputs[0].text}")
-                print(f"spec_output: {spec_output.outputs[0].text}")
-
-        # Heuristic: expect at least 90% of the prompts to match exactly
-        # Upon failure, inspect the outputs to check for inaccuracy.
-        print(f"match ratio: {matches}/{len(ref_outputs)}")
-        assert matches > int(0.90 * len(ref_outputs))
-        del lora_spec_llm
-        torch.accelerator.empty_cache()
-        cleanup_dist_env_and_memory()
+        try:
+            lora_spec_outputs = lora_spec_llm.generate(
+                prompts, sampling_params, lora_request=lora_request
+            )
+
+            matches = 0
+            for ref_output, spec_output in zip(ref_outputs, lora_spec_outputs):
+                if ref_output.outputs[0].text == spec_output.outputs[0].text:
+                    matches += 1
+                else:
+                    print(f"ref_output: {ref_output.outputs[0].text}")
+                    print(f"spec_output: {spec_output.outputs[0].text}")
+
+            # Heuristic threshold: under greedy verification, the spec-decode
+            # output should equal the non-spec output (modulo FP noise from the
+            # target's verify-path matmul running at seqlen
+            # num_speculative_tokens+1 vs 1). 90% leaves slack for that noise.
+            threshold = int(0.90 * len(ref_outputs))
+            print(f"match ratio: {matches}/{len(ref_outputs)}")
+            assert matches > threshold, (
+                f"match ratio {matches}/{len(ref_outputs)} <= {threshold}"
+            )
+        finally:
+            del lora_spec_llm
+            torch.accelerator.empty_cache()
+            cleanup_dist_env_and_memory()
diff --git a/tests/v1/e2e/spec_decode/test_spec_decode.py b/tests/v1/e2e/spec_decode/test_spec_decode.py
index 9ea41c774c46..a9092bb76634 100644
--- a/tests/v1/e2e/spec_decode/test_spec_decode.py
+++ b/tests/v1/e2e/spec_decode/test_spec_decode.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 import random
 from collections.abc import Iterable
 from dataclasses import dataclass
@@ -7,6 +8,7 @@
 
 import pytest
 import torch
+from tqdm import tqdm
 
 from tests.evals.gsm8k.gsm8k_eval import _build_gsm8k_prompts, evaluate_gsm8k_offline
 from tests.utils import (
@@ -29,6 +31,13 @@
 MTP_SIMILARITY_RATE = 0.8
 
 
+class AsyncSchedulingNotEnabledError(AssertionError):
+    """Raised when async_scheduling is expected to be True for draft_model
+    spec decode but is False. Tracked in:
+    https://github.com/vllm-project/vllm/issues/38929
+    """
+
+
 def _skip_if_insufficient_gpus_for_tp(tp_size: int):
     """Skip test if available GPUs < tp_size on ROCm."""
     available_gpus = torch.accelerator.device_count()
@@ -205,6 +214,11 @@ def test_ngram_gpu_default_with_async_scheduling(
         max_model_len=4096,
         async_scheduling=async_scheduling,
     )
+    # Assert the resolved async_scheduling config matches what was requested.
+    assert (
+        spec_llm.llm_engine.vllm_config.scheduler_config.async_scheduling
+        == async_scheduling
+    )
     evaluate_llm_for_gsm8k(spec_llm, expected_accuracy_threshold=0.8)
     del spec_llm
     cleanup_dist_env_and_memory()
@@ -308,7 +322,7 @@ def test_speculators_model_integration(
     test_prompts = get_test_prompts(mm_enabled=False)
 
     # First run: Direct speculator model (simplified integration)
-    spec_llm = LLM(model=model_path, max_model_len=4096)
+    spec_llm = LLM(model=model_path, max_model_len=4096, gpu_memory_utilization=0.92)
     evaluate_llm_for_gsm8k(
         spec_llm, expected_accuracy_threshold=expected_accuracy_threshold
     )
@@ -338,7 +352,7 @@ def test_speculators_model_integration(
     cleanup_dist_env_and_memory()
 
     # Second run: Reference without speculative decoding
-    ref_llm = LLM(model=verifier_model, max_model_len=4096)
+    ref_llm = LLM(model=verifier_model, max_model_len=4096, gpu_memory_utilization=0.92)
     ref_outputs = ref_llm.chat(test_prompts, sampling_config)
     del ref_llm
     torch.accelerator.empty_cache()
@@ -372,11 +386,6 @@ def _run_eagle_correctness(
     Compare the outputs of an original LLM and a speculative LLM
     which should be the same when using eagle speculative decoding.
     """
-    if attn_backend == "TREE_ATTN":
-        pytest.skip(
-            "TREE_ATTN is flaky in the test disable for now until it can be "
-            "resolved (see https://github.com/vllm-project/vllm/issues/22922)"
-        )
     if model_impl == "transformers":
         import transformers
         from packaging.version import Version
@@ -456,6 +465,8 @@ def _run_eagle_correctness(
             model_impl=model_impl,
             attention_config=attention_config,
         )
+        # EAGLE/EAGLE3 supports async scheduling; assert it is active by default.
+        assert spec_llm.llm_engine.vllm_config.scheduler_config.async_scheduling
         evaluate_llm_for_gsm8k(
             spec_llm, expected_accuracy_threshold=expected_accuracy_threshold
         )
@@ -477,6 +488,10 @@ def _run_eagle_correctness(
 
 
 @single_gpu_only
+@pytest.mark.skipif(
+    current_platform.is_device_capability_family(100),
+    reason="DeepSeek head_dim=192 not supported on SM100/SM110 (Blackwell)",
+)
 @pytest.mark.parametrize(
     [
         "model_setup",
@@ -542,12 +557,16 @@ def test_eagle_correctness_light(
             "auto",
             0.8,
         ),
-        (
+        pytest.param(
             ("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1),
             False,
             False,
             "transformers",
             0.8,
+            # TODO(hmellor): figure out why memory usage is so high
+            marks=pytest.mark.skip(
+                reason="Feature is experimental and uses too much memory in CI",
+            ),
         ),
         pytest.param(
             (
@@ -703,16 +722,34 @@ def test_eagle_correctness_heavy(
     ["model_setup", "mm_enabled", "expected_accuracy_threshold"],
     [
         (("mtp", "XiaomiMiMo/MiMo-7B-Base", 1), False, 0.5),  # ref: 65%-70%
-        (("mtp", "ZixiQi/DeepSeek-V3-4layers-MTP-FP8", 1), False, 0.0),  # dummy model
+        pytest.param(
+            ("mtp", "ZixiQi/DeepSeek-V3-4layers-MTP-FP8", 1),
+            False,
+            0.0,
+            marks=pytest.mark.skipif(
+                current_platform.is_device_capability_family(100),
+                reason="DeepSeek MTP: TRTLLM MoE top_k check fails on Blackwell",
+            ),
+        ),  # dummy model
+        (
+            ("mtp", "Qwen/Qwen3.5-0.8B-Base", 1),
+            False,
+            0.20,
+        ),  # hybrid + MTP, ref: ~34%-35%
+        (
+            ("mtp", "google/gemma-4-E4B-it", 1, "google/gemma-4-E4B-it-assistant"),
+            False,
+            0.50,
+        ),  # gemma4 MTP with assistant model, ref: ~62%
     ],
-    ids=["mimo", "deepseek"],
+    ids=["mimo", "deepseek", "qwen3_5-hybrid", "gemma4-e4b"],
 )
 @single_gpu_only
 @large_gpu_mark(min_gb=20)
 def test_mtp_correctness(
     monkeypatch: pytest.MonkeyPatch,
     sampling_config: SamplingParams,
-    model_setup: tuple[str, str, int],
+    model_setup: tuple[str, str, int] | tuple[str, str, int, str],
     mm_enabled: bool,
     expected_accuracy_threshold: float,
 ):
@@ -728,16 +765,45 @@ def test_mtp_correctness(
     with monkeypatch.context() as m:
         m.setenv("VLLM_MLA_DISABLE", "1")
 
-        method, model_name, tp_size = model_setup
+        if len(model_setup) == 4:
+            method, model_name, tp_size, draft_model = model_setup
+        else:
+            method, model_name, tp_size = model_setup
+            draft_model = None
         _skip_if_insufficient_gpus_for_tp(tp_size)
 
+        if "Qwen3.5" in model_name and os.environ.get("VLLM_USE_V2_MODEL_RUNNER"):
+            pytest.skip(
+                "Model Runner V2 does not yet support hybrid models "
+                "(Qwen3.5 mixes Mamba-style GDN with attention layers)."
+            )
+
         attn_backend = "TRITON_ATTN" if current_platform.is_rocm() else "auto"
+
+        # Skip multimodal profiling for models that don't need it in this test.
+        extra_kwargs: dict[str, Any] = {}
+        if "Qwen3.5" in model_name:
+            extra_kwargs["limit_mm_per_prompt"] = {"image": 0, "video": 0}
+        elif "gemma-4" in model_name:
+            extra_kwargs["limit_mm_per_prompt"] = {"image": 0, "audio": 0}
+
+        if draft_model is not None and "gemma-4" in draft_model:
+            import transformers
+            from packaging.version import Version
+
+            if Version(transformers.__version__) < Version("5.8.0"):
+                pytest.skip(
+                    "Gemma4 MTP assistant requires transformers>=5.8.0, "
+                    f"got {transformers.__version__}"
+                )
+
         ref_llm = LLM(
             model=model_name,
             max_model_len=2048,
             tensor_parallel_size=tp_size,
             trust_remote_code=True,
             attention_backend=attn_backend,
+            **extra_kwargs,
         )
         ref_outputs = ref_llm.chat(test_prompts, sampling_config)
         evaluate_llm_for_gsm8k(
@@ -747,18 +813,26 @@ def test_mtp_correctness(
         torch.accelerator.empty_cache()
         cleanup_dist_env_and_memory()
 
+        speculative_config: dict[str, Any] = {
+            "method": method,
+            "num_speculative_tokens": 1,
+            "max_model_len": 2048,
+        }
+        if draft_model is not None:
+            speculative_config["model"] = draft_model
+            speculative_config["num_speculative_tokens"] = 2
+
         spec_llm = LLM(
             model=model_name,
             trust_remote_code=True,
             tensor_parallel_size=tp_size,
-            speculative_config={
-                "method": method,
-                "num_speculative_tokens": 1,
-                "max_model_len": 2048,
-            },
+            speculative_config=speculative_config,
             max_model_len=2048,
             attention_backend=attn_backend,
+            **extra_kwargs,
         )
+        # MTP supports async scheduling; assert it is active by default.
+        assert spec_llm.llm_engine.vllm_config.scheduler_config.async_scheduling
         evaluate_llm_for_gsm8k(
             spec_llm, expected_accuracy_threshold=expected_accuracy_threshold
         )
@@ -828,12 +902,22 @@ class ArgsTest:
 @pytest.mark.parametrize("args", cases)
 @pytest.mark.parametrize("enforce_eager", [True, False])
 @single_gpu_only
+# TODO: Fix async_scheduling & engine initialization issues - see https://github.com/vllm-project/vllm/issues/38929
+@pytest.mark.xfail(
+    raises=AsyncSchedulingNotEnabledError,
+    reason="draft_model does not yet enable async_scheduling: issue #38929",
+)
 def test_draft_model_correctness(args: ArgsTest, enforce_eager: bool):
     args.enforce_eager = enforce_eager
     assert_draft_model_correctness(args)
 
 
 @single_gpu_only
+# TODO: Fix async_scheduling and engine initialization issues - see https://github.com/vllm-project/vllm/issues/38929
+@pytest.mark.xfail(
+    raises=AsyncSchedulingNotEnabledError,
+    reason="draft_model does not yet enable async_scheduling: issue #38929",
+)
 def test_draft_model_realistic_example():
     args = ArgsTest(
         target_model="Qwen/Qwen3-1.7B",
@@ -849,6 +933,11 @@ def test_draft_model_realistic_example():
 
 
 @single_gpu_only
+# TODO: Fix async_scheduling and engine initialization issues - see https://github.com/vllm-project/vllm/issues/38929
+@pytest.mark.xfail(
+    raises=AsyncSchedulingNotEnabledError,
+    reason="draft_model does not yet enable async_scheduling: issue #38929",
+)
 def test_draft_model_parallel_drafting():
     args = ArgsTest(
         target_model="Qwen/Qwen3-1.7B",
@@ -875,6 +964,11 @@ def test_draft_model_parallel_drafting():
 )
 @pytest.mark.parametrize("enforce_eager", [True, False])
 @single_gpu_only
+# TODO: Fix async_scheduling and engine initialization issues - see https://github.com/vllm-project/vllm/issues/38929
+@pytest.mark.xfail(
+    raises=AsyncSchedulingNotEnabledError,
+    reason="draft_model does not yet enable async_scheduling: issue #38929",
+)
 def test_draft_model_quantization(models: tuple[str, str], enforce_eager: bool):
     tgt_model, draft_model = models
     sd_case = ArgsTest(
@@ -887,6 +981,11 @@ def test_draft_model_quantization(models: tuple[str, str], enforce_eager: bool):
 
 
 @multi_gpu_only(num_gpus=2)
+# TODO: Fix async_scheduling and engine initialization issues - see https://github.com/vllm-project/vllm/issues/38929
+@pytest.mark.xfail(
+    raises=AsyncSchedulingNotEnabledError,
+    reason="draft_model does not yet enable async_scheduling: issue #38929",
+)
 def test_draft_model_tensor_parallelism():
     """Ensure spec decode works when running with TP > 1."""
     _skip_if_insufficient_gpus_for_tp(2)
@@ -1061,6 +1160,7 @@ def assert_draft_model_correctness(args: ArgsTest):
         enforce_eager=args.enforce_eager,
         disable_log_stats=False,  # enables get_metrics()
     )
+
     # we don't check the outputs, only check the metrics
     spec_llm.chat(test_prompts, args.sampling_config)
     metrics = spec_llm.get_metrics()
@@ -1072,10 +1172,6 @@ def assert_draft_model_correctness(args: ArgsTest):
         spec_llm, expected_accuracy_threshold=args.expected_gsm8k_accuracy
     )
 
-    del spec_llm  # CLEANUP
-    torch.accelerator.empty_cache()
-    cleanup_dist_env_and_memory()
-
     print(
         f"spec-decode: target={args.target_model}, draft={args.draft_model}, "
         f"temperature={args.sampling_config.temperature:.2f}, "
@@ -1085,6 +1181,20 @@ def assert_draft_model_correctness(args: ArgsTest):
 
     assert acceptance_rate >= args.expected_acceptance_rate
     assert acceptance_len >= args.expected_acceptance_len
+    # draft_model supports async scheduling; assert it is active by default.
+    # Raise AsyncSchedulingNotEnabledError (a subclass of AssertionError) so that
+    # @pytest.mark.xfail(raises=AsyncSchedulingNotEnabledError) catches only this
+    # specific failure — leaving all other assertion failures (e.g. correctness or
+    # acceptance-rate checks above) visible as real test failures.
+    has_async = spec_llm.llm_engine.vllm_config.scheduler_config.async_scheduling
+    del spec_llm  # CLEANUP
+    torch.accelerator.empty_cache()
+    cleanup_dist_env_and_memory()
+    if not has_async:
+        raise AsyncSchedulingNotEnabledError(
+            "Expected async_scheduling=True for draft_model spec decode, got False."
+            " See https://github.com/vllm-project/vllm/issues/38929"
+        )
 
 
 def get_messages(dataset: str, n: int) -> list[Messages]:
@@ -1105,19 +1215,229 @@ def some_high_acceptance_metrics() -> dict:
     }
 
 
-def compute_acceptance_rate(metrics: list[Metric]) -> float:
+def compute_acceptance_rate(
+    metrics: list[Metric], prev_metrics: list[Metric] | None = None
+) -> float:
     name2metric = {metric.name: metric for metric in metrics}
-    n_draft_toks = name2metric["vllm:spec_decode_num_draft_tokens"].value  # type: ignore
+    n_draft_toks = name2metric["vllm:spec_decode_num_draft_tokens"].value
     if n_draft_toks == 0:
         return float("nan")
-    n_accepted_toks = name2metric["vllm:spec_decode_num_accepted_tokens"].value  # type: ignore
+    n_accepted_toks = name2metric["vllm:spec_decode_num_accepted_tokens"].value
+    if prev_metrics is not None:
+        prev_name2metric = {metric.name: metric for metric in prev_metrics}
+        n_draft_toks -= prev_name2metric["vllm:spec_decode_num_draft_tokens"].value
+        n_accepted_toks -= prev_name2metric[
+            "vllm:spec_decode_num_accepted_tokens"
+        ].value
+        if n_draft_toks <= 0:
+            return float("nan")
     return n_accepted_toks / n_draft_toks
 
 
-def compute_acceptance_len(metrics: list[Metric]) -> float:
+def compute_acceptance_len(
+    metrics: list[Metric], prev_metrics: list[Metric] | None = None
+) -> float:
     name2metric = {metric.name: metric for metric in metrics}
-    n_drafts = name2metric["vllm:spec_decode_num_drafts"].value  # type: ignore
-    n_accepted_toks = name2metric["vllm:spec_decode_num_accepted_tokens"].value  # type: ignore
+    n_drafts = name2metric["vllm:spec_decode_num_drafts"].value
+    n_accepted_toks = name2metric["vllm:spec_decode_num_accepted_tokens"].value
     if n_drafts == 0:
         return 1
+    if prev_metrics is not None:
+        prev_name2metric = {metric.name: metric for metric in prev_metrics}
+        n_drafts -= prev_name2metric["vllm:spec_decode_num_drafts"].value
+        n_accepted_toks -= prev_name2metric[
+            "vllm:spec_decode_num_accepted_tokens"
+        ].value
+        if n_drafts <= 0:
+            return 1
     return 1 + (n_accepted_toks / n_drafts)
+
+
+# Datasets in the format used in DFlash validations
+def load_and_process_dataset(data_name: str):
+    from datasets import load_dataset
+
+    if data_name == "gsm8k":
+        dataset = load_dataset("openai/gsm8k", "main", split="test")
+        prompt_fmt = (
+            "{question}\nPlease reason step by step,"
+            " and put your final answer within \\boxed{{}}."
+        )
+        dataset = dataset.map(lambda x: {"turns": [prompt_fmt.format(**x)]})
+    elif data_name == "mt-bench":
+        dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")
+        dataset = dataset.map(lambda x: {"turns": x["prompt"]})
+    elif data_name == "humaneval":
+        dataset = load_dataset("openai/openai_humaneval", split="test")
+        prompt_fmt = (
+            "Write a solution to the following problem and make sure"
+            " that it passes the tests:\n```python\n{prompt}\n```"
+        )
+        dataset = dataset.map(lambda x: {"turns": [prompt_fmt.format(**x)]})
+
+    return dataset
+
+
+@pytest.fixture
+def dflash_config():
+    target_model = "Qwen/Qwen3-8B"
+    draft_model = "z-lab/Qwen3-8B-DFlash-b16"
+
+    return dict(
+        model=target_model,
+        trust_remote_code=True,
+        speculative_config={
+            "method": "dflash",
+            "model": draft_model,
+            "num_speculative_tokens": 16,
+            "max_model_len": 32768,
+        },
+        max_model_len=32768,
+        max_num_seqs=128,
+        gpu_memory_utilization=0.85,
+        enforce_eager=False,
+        disable_log_stats=False,
+    )
+
+
+def test_dflash_acceptance_rates(dflash_config):
+    """
+    E2E test for DFlash (block diffusion) speculative decoding.
+    Runs acceptance rate validation on GSM8k, MT-Bench, and HumanEval
+    comparing against baseline results from the paper (Table 1).
+    See https://github.com/z-lab/dflash/blob/main/benchmark_sglang.py for methodology.
+    """
+    spec_llm = LLM(**dflash_config)
+
+    max_prompts_per_dataset = 200  # mt-bench has 80, humaneval has 164, truncates gsm8k
+
+    # All scores from Table 1 in https://arxiv.org/pdf/2602.06036
+    expected_acceptance_lengths = {
+        "mt-bench": 4.24,
+        "humaneval": 6.50,
+        "gsm8k": 6.54 * 0.975,  # runs with a subset of prompts so extra wide tol here
+    }
+
+    tokenizer = spec_llm.get_tokenizer()
+    for dataset_name, expected_len in expected_acceptance_lengths.items():
+        dataset = load_and_process_dataset(dataset_name)
+        prev_metrics = None
+        acceptance_lengths = []
+        for i in tqdm(
+            range(min(max_prompts_per_dataset, len(dataset))),
+            desc=f"Processing {dataset_name}",
+        ):
+            user_content = dataset[i]["turns"][0]
+            prompt_text = tokenizer.apply_chat_template(
+                [{"role": "user", "content": user_content}],
+                tokenize=False,
+                add_generation_prompt=True,
+                enable_thinking=False,
+            )
+
+            # Temp=0, MaxTokens=2048 from the paper
+            spec_llm.generate(
+                [prompt_text],
+                SamplingParams(temperature=0, max_tokens=2048),
+                use_tqdm=False,
+            )
+            current_metrics = spec_llm.get_metrics()
+            acceptance_len = compute_acceptance_len(current_metrics, prev_metrics)
+            prev_metrics = current_metrics
+            acceptance_lengths.append(acceptance_len)
+
+        mean_acceptance_length = sum(acceptance_lengths) / len(acceptance_lengths)
+        # Fairly tight tolerance of 95% against the paper's figures,
+        # watching for regressions. Can be relaxed if test is flaky but be sure to
+        # check for genuine issues such as #40727.
+        expected_len = expected_len * 0.95
+        print(
+            f"DFlash acceptance_len for {dataset_name}: {mean_acceptance_length:.2f}"
+            f" (expected at least {expected_len:.2f})"
+        )
+
+        assert mean_acceptance_length >= expected_len, (
+            f"DFlash acceptance_len for {dataset_name} is below expected threshold:"
+            f"{mean_acceptance_length:.2f} < {expected_len:.2f}"
+        )
+
+    del spec_llm
+    torch.accelerator.empty_cache()
+    cleanup_dist_env_and_memory()
+
+
+@single_gpu_only
+def test_synthetic_acceptance_rate():
+    """Verify that synthetic rejection sampling produces an acceptance
+    length close to the requested mean acceptance length."""
+    num_spec_tokens = 3
+    expected_acceptance_len = 1.875
+    tolerance = 0.15
+
+    spec_llm = LLM(
+        model="meta-llama/Llama-3.2-1B-Instruct",
+        trust_remote_code=True,
+        speculative_config={
+            "method": "eagle3",
+            "model": "nm-testing/Llama3_2_1B_speculator.eagle3",
+            "num_speculative_tokens": num_spec_tokens,
+            "max_model_len": 2048,
+            "rejection_sample_method": "synthetic",
+            "synthetic_acceptance_length": expected_acceptance_len,
+        },
+        max_model_len=2048,
+        enforce_eager=True,
+        disable_log_stats=False,
+    )
+
+    test_prompts = get_test_prompts(mm_enabled=False, num_prompts=50)
+    spec_llm.chat(
+        test_prompts,
+        SamplingParams(temperature=0, max_tokens=64, ignore_eos=True),
+    )
+
+    metrics = spec_llm.get_metrics()
+    acceptance_len = compute_acceptance_len(metrics)
+
+    print(
+        f"Synthetic acceptance length: {acceptance_len:.3f}"
+        f" (expected={expected_acceptance_len:.3f},"
+        f" tolerance=±{tolerance})"
+    )
+    assert abs(acceptance_len - expected_acceptance_len) <= tolerance, (
+        f"Synthetic acceptance length {acceptance_len:.3f} is not within"
+        f" ±{tolerance} of expected {expected_acceptance_len:.3f}"
+    )
+
+    del spec_llm
+    torch.accelerator.empty_cache()
+    cleanup_dist_env_and_memory()
+
+
+def test_dflash_correctness(dflash_config):
+    """
+    E2E test for DFlash (block diffusion) speculative decoding.
+    Ensures output correctness on GSM8k, with cudagraphs and batching on.
+    """
+    spec_llm = LLM(**dflash_config)
+
+    # Evaluate GSM8k accuracy (Qwen3-8B ref: ~87-92% on GSM8k)
+    evaluate_llm_for_gsm8k(spec_llm, expected_accuracy_threshold=0.8)
+
+    current_metrics = spec_llm.get_metrics()
+    acceptance_len = compute_acceptance_len(current_metrics)
+
+    # AR is thoroughly validated in test_dflash_acceptance_rates, in a manner consistent
+    # with the DFlash paper. However, that test measures AL per-request and thus runs
+    # with a batch size of 1. To ensure that AL does not collapse with large batch sizes
+    # we enforce a baseline on the AL over the full lm-eval-style GSM8k test.
+    expected_len = 3.5  # Measured is 3.9 to 4.0
+    print(f"DFlash GSM8k correctness test got AL {acceptance_len}")
+    assert acceptance_len >= expected_len, (
+        "DFlash correctness check failed with"
+        f" {acceptance_len=}, expected at least {expected_len}"
+    )
+
+    del spec_llm
+    torch.accelerator.empty_cache()
+    cleanup_dist_env_and_memory()
diff --git a/tests/v1/e2e/test_hybrid_chunked_prefill.py b/tests/v1/e2e/test_hybrid_chunked_prefill.py
index 1790343ca836..dd8a5f5cb6d4 100644
--- a/tests/v1/e2e/test_hybrid_chunked_prefill.py
+++ b/tests/v1/e2e/test_hybrid_chunked_prefill.py
@@ -36,14 +36,20 @@
 ]
 
 
-@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA not available")
 @pytest.mark.parametrize(
     "model_name",
     [
         pytest.param("Qwen/Qwen3.5-4B", marks=[large_gpu_mark(min_gb=40)]),
         pytest.param(
             "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
-            marks=[large_gpu_mark(min_gb=80)] + multi_gpu_marks(num_gpus=4),
+            marks=[large_gpu_mark(min_gb=80)]
+            + multi_gpu_marks(num_gpus=4)
+            + [
+                pytest.mark.skipif(
+                    not current_platform.is_cuda(),
+                    reason="modelopt quantization is supported only on CUDA",
+                )
+            ],
         ),
     ],
 )
diff --git a/tests/v1/ec_connector/integration/README.md b/tests/v1/ec_connector/integration/README.md
index 2dbcb307fda3..a7dab5d5d9d1 100644
--- a/tests/v1/ec_connector/integration/README.md
+++ b/tests/v1/ec_connector/integration/README.md
@@ -13,7 +13,7 @@ The test ensures that disaggregated encoding produces **identical** outputs to t
 
 Note that currently PD disaggregation set up may give slightly different results from a single instance. Therefore, we need the result from 1P+1D as the baseline for 1E+1P+1D
 
-Please refer to [Disaggregated Encoder Feature](../../../docs/features/disagg_encoder.md) for the detailed explanation for the EPD features.
+Please refer to [Disaggregated Encoder Feature](../../../../docs/features/disagg_encoder.md) for the detailed explanation for the EPD features.
 
 ## Files
 
@@ -122,7 +122,7 @@ Quick sanity check:
 - Encoder cache should enable exact output reproduction
 - Test cleans up all instances and cache files after completion
 - Safe to run multiple times (idempotent)
-- We setup the PD disagg part with NixlConnector. Please read details about EPD in `examples/online_serving/disaggregated_encoder/README.md`
+- We setup the PD disagg part with NixlConnector. Please read details about EPD in `examples/disaggregated/disaggregated_encoder/README.md`
 
 ## Requirements
 
diff --git a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
index ffe9cac38030..65716444a57c 100644
--- a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
+++ b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
@@ -15,8 +15,9 @@
 
 # set -xe
 
-# Find the git repository root directory
-GIT_ROOT=$(git rev-parse --show-toplevel)
+# Resolve the repository root from the script location instead of `.git`.
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 
 # Model to test
 MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
@@ -184,7 +185,7 @@ run_epd_1e_1pd() {
 
     # Start proxy
     echo "Starting EPD proxy on port $PROXY_PORT"
-    python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \
+    python "${GIT_ROOT}/examples/disaggregated/disaggregated_encoder/disagg_epd_proxy.py" \
         --host "0.0.0.0" \
         --port "$PROXY_PORT" \
         --encode-servers-urls "http://localhost:$ENCODE_PORT" \
@@ -410,7 +411,7 @@ run_epd_1e_1p_1d() {
     
     # Start proxy
     echo "Starting EPD proxy on port $PROXY_PORT"
-    python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \
+    python "${GIT_ROOT}/examples/disaggregated/disaggregated_encoder/disagg_epd_proxy.py" \
         --host "0.0.0.0" \
         --port "$PROXY_PORT" \
         --encode-servers-urls "http://localhost:$ENCODE_PORT" \
diff --git a/tests/v1/engine/test_abort_final_step.py b/tests/v1/engine/test_abort_final_step.py
index 81a120d151d6..8f1e8029955f 100644
--- a/tests/v1/engine/test_abort_final_step.py
+++ b/tests/v1/engine/test_abort_final_step.py
@@ -66,7 +66,7 @@ def __init__(
         self,
         vllm_config: VllmConfig,
         role: KVConnectorRole,
-        kv_cache_config: KVCacheConfig | None = None,
+        kv_cache_config: KVCacheConfig,
     ):
         super().__init__(vllm_config, role, kv_cache_config)
         # Get the status file path from extra config
diff --git a/tests/v1/engine/test_async_llm.py b/tests/v1/engine/test_async_llm.py
index 69a1c38a453d..92de5a7e9819 100644
--- a/tests/v1/engine/test_async_llm.py
+++ b/tests/v1/engine/test_async_llm.py
@@ -256,8 +256,10 @@ async def test_multi_abort(output_kind: RequestOutputKind):
                 )
             )
 
-        # Let requests start
-        await asyncio.sleep(0.5)
+        # Let requests start generating, use a longer sleep to ensure all
+        # requests have exited prefill and produced at least one
+        # decode token before we abort.
+        await asyncio.sleep(1.0)
 
         # Use multi-abort to abort multiple requests at once
         abort_request_ids = [request_ids[i] for i in REQUEST_IDS_TO_ABORT]
@@ -369,9 +371,10 @@ async def test_mid_stream_cancellation(
         # Wait for all tasks to complete
         results = await asyncio.gather(*tasks)
 
-        # Verify all tasks were cancelled at the expected point
+        # Verify all tasks were cancelled at the expected point.
+        # Uses >= because the cancel check is `count >= cancel_after`.
         for num_generated_tokens, request_id in results:
-            assert num_generated_tokens == NUM_EXPECTED_TOKENS, (
+            assert num_generated_tokens >= NUM_EXPECTED_TOKENS, (
                 f"{request_id} generated {num_generated_tokens} tokens but "
                 f"expected to cancel after {NUM_EXPECTED_TOKENS}"
             )
@@ -514,7 +517,6 @@ async def test_header_dp_rank_argument():
         serving_render = OpenAIServingRender(
             model_config=engine.model_config,
             renderer=engine.renderer,
-            io_processor=engine.io_processor,
             model_registry=models.registry,
             request_logger=None,
             chat_template=None,
diff --git a/tests/v1/engine/test_core_engine_actor_manager.py b/tests/v1/engine/test_core_engine_actor_manager.py
new file mode 100644
index 000000000000..195ddda05a6a
--- /dev/null
+++ b/tests/v1/engine/test_core_engine_actor_manager.py
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import os
+import uuid
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+import ray
+
+from vllm.v1.engine.core import EngineCoreActorMixin
+from vllm.v1.engine.utils import CoreEngineActorManager, EngineZmqAddresses
+
+
+class _StubEngineCoreActor(EngineCoreActorMixin):
+    def __init__(
+        self,
+        vllm_config: Any,
+        local_client: bool,
+        addresses: EngineZmqAddresses,
+        executor_class: type[Any],
+        log_stats: bool,
+        dp_rank: int = 0,
+        local_dp_rank: int = 0,
+    ):
+        # Exercise the production Ray actor mixin without loading a model.
+        EngineCoreActorMixin.__init__(
+            self, vllm_config, addresses, dp_rank, local_dp_rank
+        )
+
+    def _set_visible_devices(self, vllm_config: Any, local_dp_rank: int) -> None:
+        pass
+
+    def wait_for_init(self) -> None:
+        pass
+
+    def run(self) -> None:
+        pass
+
+    def get_nixl_side_channel_host(self) -> str | None:
+        return os.environ.get("VLLM_NIXL_SIDE_CHANNEL_HOST")
+
+
+class _DummyExecutor:
+    pass
+
+
+def _make_vllm_config() -> SimpleNamespace:
+    return SimpleNamespace(
+        parallel_config=SimpleNamespace(
+            data_parallel_size=1,
+            data_parallel_size_local=1,
+            enable_elastic_ep=False,
+            world_size=1,
+        ),
+        model_config=SimpleNamespace(is_moe=False),
+        kv_transfer_config=None,
+    )
+
+
+def _make_addresses() -> EngineZmqAddresses:
+    return EngineZmqAddresses(
+        inputs=["tcp://127.0.0.1:12345"],
+        outputs=["tcp://127.0.0.1:12346"],
+    )
+
+
+def _make_cpu_placement_group():
+    pg = ray.util.placement_group(
+        [{"CPU": 0.001}, {"CPU": 1.0}],
+        strategy="PACK",
+    )
+    ray.get(pg.ready())
+    return pg
+
+
+@pytest.fixture
+def ray_context():
+    started_ray = False
+    if not ray.is_initialized():
+        project_root = str(Path(__file__).resolve().parents[3])
+        ray.init(
+            num_cpus=2,
+            runtime_env={"env_vars": {"PYTHONPATH": project_root}},
+            log_to_driver=False,
+        )
+        started_ray = True
+
+    yield
+
+    if started_ray:
+        ray.shutdown()
+
+
+@pytest.mark.usefixtures("ray_context")
+def test_driver_nixl_side_channel_host_does_not_leak_to_engine_core_actor(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    driver_marker = f"driver-only-nixl-host-{uuid.uuid4()}"
+    created_placement_groups: list[Any] = []
+    manager: CoreEngineActorManager | None = None
+
+    def create_dp_placement_groups(vllm_config: Any):
+        pg = _make_cpu_placement_group()
+        created_placement_groups.append(pg)
+        return [pg], [0]
+
+    monkeypatch.setenv("VLLM_NIXL_SIDE_CHANNEL_HOST", driver_marker)
+    monkeypatch.setattr("vllm.v1.engine.core.EngineCoreActor", _StubEngineCoreActor)
+    monkeypatch.setattr(
+        CoreEngineActorManager,
+        "create_dp_placement_groups",
+        staticmethod(create_dp_placement_groups),
+    )
+
+    try:
+        manager = CoreEngineActorManager(
+            vllm_config=_make_vllm_config(),
+            addresses=_make_addresses(),
+            executor_class=_DummyExecutor,
+            log_stats=False,
+        )
+        actor = manager.local_engine_actors[0]
+        actor_host = ray.get(actor.get_nixl_side_channel_host.remote())
+        node_host = ray.util.get_node_ip_address()
+
+        assert actor_host != driver_marker
+        assert actor_host == node_host
+    finally:
+        if manager is not None:
+            manager.shutdown()
+        else:
+            for pg in created_placement_groups:
+                ray.util.remove_placement_group(pg)
diff --git a/tests/v1/engine/test_engine_args.py b/tests/v1/engine/test_engine_args.py
index 527a56ff49ee..c336f93c5fbb 100644
--- a/tests/v1/engine/test_engine_args.py
+++ b/tests/v1/engine/test_engine_args.py
@@ -90,3 +90,41 @@ def test_defaults_with_usage_context():
     vllm_config = engine_args.create_engine_config(UsageContext.OPENAI_API_SERVER)
     assert vllm_config.scheduler_config.max_num_seqs == default_max_num_seqs
     assert vllm_config.scheduler_config.max_num_batched_tokens == default_server_tokens  # noqa: E501
+
+
+def test_mm_prefix_lm_raises_batched_tokens_floor():
+    """Verify that prefix-LM multimodal models auto-raise
+    max_num_batched_tokens to fit at least one multimodal item.
+
+    Regression test for https://github.com/vllm-project/vllm/issues/42687
+    """
+    from unittest.mock import patch
+
+    # Simulate a prefix-LM multimodal model whose largest modality
+    # (video) requires 2496 tokens — more than the 2048 default.
+    fake_mm_min = (2496, "video")
+
+    engine_args = EngineArgs(
+        model="facebook/opt-125m",
+        max_model_len=2048,
+        enforce_eager=True,
+    )
+
+    with (
+        patch.object(
+            type(engine_args),
+            "_get_min_mm_batched_tokens",
+            staticmethod(lambda _mc: fake_mm_min),
+        ),
+        patch(
+            "vllm.config.ModelConfig.is_multimodal_model",
+            new_callable=lambda: property(lambda self: True),
+        ),
+        patch(
+            "vllm.config.ModelConfig.is_mm_prefix_lm",
+            new_callable=lambda: property(lambda self: True),
+        ),
+    ):
+        vllm_config = engine_args.create_engine_config(UsageContext.OPENAI_API_SERVER)
+
+    assert vllm_config.scheduler_config.max_num_batched_tokens >= 2496
diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py
index 5e08ae35f76e..ab5946ad3ba6 100644
--- a/tests/v1/engine/test_engine_core_client.py
+++ b/tests/v1/engine/test_engine_core_client.py
@@ -114,7 +114,7 @@ def poll(self, timeout: int) -> int:
             return 1
 
         def recv_multipart(self):
-            return (b"\x00\x00", b"ready")
+            return (b"\x00\x00", b"")
 
     class DummySocket:
         def send_multipart(self, _msg, *, copy: bool = False, track: bool = False):
@@ -936,6 +936,13 @@ async def test_engine_core_client_future_utility_async(
             client.shutdown()
 
 
+@pytest.mark.parametrize(
+    "model_name,num_groups",
+    [
+        ("meta-llama/Llama-3.2-1B-Instruct", 1),
+        ("google/gemma-3-1b-it", 7),
+    ],
+)
 @pytest.mark.parametrize(
     "multiprocessing_mode,publisher_config",
     [(True, "tcp"), (False, "inproc")],
@@ -944,12 +951,14 @@ async def test_engine_core_client_future_utility_async(
 def test_kv_cache_events(
     multiprocessing_mode: bool,
     publisher_config,
+    model_name: str,
+    num_groups: int,
 ):
     block_size = 16
     num_blocks = 2
 
     engine_args = EngineArgs(
-        model=MODEL_NAME,
+        model=model_name,
         enforce_eager=True,
         enable_prefix_caching=True,
         block_size=block_size,
@@ -985,26 +994,29 @@ def test_kv_cache_events(
         assert result is not None, "No message received"
 
         seq, received = result
-
         assert seq == 0, "Sequence number mismatch"
-        assert len(received.events) == 1, "We should have exactly one BlockStored event"
-        event = received.events[0]
-        assert isinstance(event, BlockStored), "We should have a BlockStored event"
-        assert len(event.block_hashes) == num_blocks, (
-            "We should have a BlockStored event with 2 block_hashes"
-        )
-        assert event.block_size == block_size, (
-            "Block size should be the same as the block size"
-        )
-        assert event.parent_block_hash is None, "Parent block hash should be None"
-        assert event.lora_id is None, "Lora id should be None"
-        assert event.lora_name is None, "Lora name should be None"
-        assert len(event.token_ids) == num_blocks * block_size, (
-            "Token ids should be the same as the custom tokens"
-        )
-        assert event.token_ids == custom_tokens, (
-            "Token ids should be the same as the custom tokens"
+        assert len(received.events) == num_groups, (
+            f"Expected {num_groups} BlockStored event(s), got {len(received.events)}"
         )
+
+        for index, event in enumerate(received.events):
+            assert isinstance(event, BlockStored), "We should have a BlockStored event"
+            assert len(event.block_hashes) == num_blocks, (
+                "We should have a BlockStored event with 2 block_hashes"
+            )
+            assert event.block_size == block_size, (
+                "Block size should be the same as the block size"
+            )
+            assert event.parent_block_hash is None, "Parent block hash should be None"
+            assert event.lora_id is None, "Lora id should be None"
+            assert event.lora_name is None, "Lora name should be None"
+            assert len(event.token_ids) == num_blocks * block_size, (
+                "Token ids should be the same as the custom tokens"
+            )
+            assert event.token_ids == custom_tokens, (
+                "Token ids should be the same as the custom tokens"
+            )
+            assert event.group_idx == index
     finally:
         client.shutdown()
         subscriber.close()
diff --git a/tests/v1/engine/test_llm_engine.py b/tests/v1/engine/test_llm_engine.py
index 7e5196efc873..65dbb0e48199 100644
--- a/tests/v1/engine/test_llm_engine.py
+++ b/tests/v1/engine/test_llm_engine.py
@@ -79,10 +79,11 @@ def _get_test_sampling_params(
     structured_outputs: bool = False,
 ) -> tuple[list[SamplingParams], list[int]]:
     """Generate random sampling params for a batch."""
+    rng = random.Random(seed)
 
     def get_mostly_n_gt1() -> int:
         r"""Mostly n \in [2,20], ~1/3 n=1"""
-        x = random.randint(0, 28)
+        x = rng.randint(0, 28)
         if x < 10:
             return 1
         else:
diff --git a/tests/v1/engine/test_logprobs_processor.py b/tests/v1/engine/test_logprobs_processor.py
new file mode 100644
index 000000000000..edb8cef518ca
--- /dev/null
+++ b/tests/v1/engine/test_logprobs_processor.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for LogprobsProcessor.
+
+These tests exercise the truncation invariant that the MRV2 sampler relies
+on: when the sampler returns a row wider than a request's own
+`num_logprobs + 1` (because another request in the batch needed a wider
+row), the trailing positions are populated with sentinel values
+(`token_id=0`, `logprob=-inf`). LogprobsProcessor must read only the first
+`num_logprobs + 1` entries so those sentinels never reach the user.
+"""
+
+import numpy as np
+
+from vllm.logprobs import create_sample_logprobs
+from vllm.v1.engine.logprobs import LogprobsProcessor
+from vllm.v1.outputs import LogprobsLists
+
+
+def _make_processor(num_logprobs: int) -> LogprobsProcessor:
+    return LogprobsProcessor(
+        tokenizer=None,
+        logprobs=create_sample_logprobs(flat_logprobs=False),
+        prompt_logprobs=None,
+        cumulative_logprob=0.0,
+        num_logprobs=num_logprobs,
+        num_prompt_logprobs=None,
+    )
+
+
+def test_drops_trailing_sentinel_columns():
+    """A request that asked for 3 custom token logprobs but ended up in a
+    batch padded to width 5 must not surface the trailing -inf entries."""
+    processor = _make_processor(num_logprobs=3)
+
+    sampled = 42
+    # Layout: [sampled, custom_1, custom_2, custom_3, SENTINEL, SENTINEL]
+    # Use float32-exact values so cumulative_logprob compares cleanly.
+    token_ids = np.array([[sampled, 100, 200, 300, 0, 0]], dtype=np.int32)
+    logprobs = np.array([[-0.5, -1.0, -2.0, -3.0, -np.inf, -np.inf]], dtype=np.float32)
+    ranks = np.array([1], dtype=np.int32)
+
+    processor._update_sample_logprobs(LogprobsLists(token_ids, logprobs, ranks))
+
+    assert len(processor.logprobs) == 1
+    pos = processor.logprobs[0]
+    # Exactly sampled + 3 requested tokens; trailing sentinels dropped.
+    assert set(pos.keys()) == {sampled, 100, 200, 300}
+    assert 0 not in pos
+    assert all(np.isfinite(lp.logprob) for lp in pos.values())
+    # cumulative_logprob comes from the sampled token's logprob only.
+    assert processor.cumulative_logprob == -0.5
+
+
+def test_accepts_exactly_sized_row():
+    """When the row is exactly num_logprobs+1, no truncation needed."""
+    processor = _make_processor(num_logprobs=2)
+
+    token_ids = np.array([[7, 11, 13]], dtype=np.int32)
+    logprobs = np.array([[-0.5, -1.5, -2.5]], dtype=np.float32)
+    ranks = np.array([1], dtype=np.int32)
+
+    processor._update_sample_logprobs(LogprobsLists(token_ids, logprobs, ranks))
+
+    pos = processor.logprobs[0]
+    assert set(pos.keys()) == {7, 11, 13}
diff --git a/tests/v1/engine/test_output_processor.py b/tests/v1/engine/test_output_processor.py
index ece48e009d27..1919349790fa 100644
--- a/tests/v1/engine/test_output_processor.py
+++ b/tests/v1/engine/test_output_processor.py
@@ -84,6 +84,7 @@ def test_incremental_detokenization(
 
     engine_core = MockEngineCore(
         tokens_list=dummy_test_vectors.generation_tokens,
+        prompts_list=dummy_test_vectors.prompt_tokens,
         request_ids=[req.request_id for req in requests],
     )
 
@@ -506,6 +507,7 @@ def test_logprobs_processor(
 
     engine_core = MockEngineCore(
         tokens_list=dummy_test_vectors.generation_tokens,
+        prompts_list=dummy_test_vectors.prompt_tokens,
         generated_logprobs_raw=None
         if num_sample_logprobs is None
         else dummy_test_vectors.generation_logprobs,
@@ -691,6 +693,7 @@ def test_stop_token(
 
     engine_core = MockEngineCore(
         tokens_list=[generation_tokens],
+        prompts_list=dummy_test_vectors.prompt_tokens,
         generated_logprobs_raw=[generation_logprobs] if do_logprobs else None,
         prompt_logprobs_raw=None,
         eos_token_id=sampling_params.eos_token_id,
@@ -794,6 +797,7 @@ def test_stop_string(
 
     engine_core = MockEngineCore(
         tokens_list=dummy_test_vectors.generation_tokens,
+        prompts_list=dummy_test_vectors.prompt_tokens,
         generated_logprobs_raw=dummy_test_vectors.generation_logprobs
         if num_sample_logprobs
         else None,
@@ -917,6 +921,7 @@ def test_iteration_stats(dummy_test_vectors):
 
     engine_core = MockEngineCore(
         dummy_test_vectors.generation_tokens,
+        dummy_test_vectors.prompt_tokens,
         request_ids=[req.request_id for req in requests],
     )
 
@@ -927,7 +932,7 @@ def test_iteration_stats(dummy_test_vectors):
     inactive_request = requests[num_active]
 
     # First iteration has 2 prefills.
-    outputs = engine_core.get_outputs()[:num_active]
+    outputs = engine_core.get_outputs(num_active)
     iteration_stats = IterationStats()
     output_processor.process_outputs(outputs, engine_core_timestamp, iteration_stats)
     total_prompt_tokens = sum(
@@ -941,7 +946,7 @@ def test_iteration_stats(dummy_test_vectors):
     assert iteration_stats.num_generation_tokens == num_active
 
     # Just decodes in this step.
-    outputs = engine_core.get_outputs()[:num_active]
+    outputs = engine_core.get_outputs(num_active)
     iteration_stats = IterationStats()
     output_processor.process_outputs(outputs, engine_core_timestamp, iteration_stats)
 
@@ -951,7 +956,7 @@ def test_iteration_stats(dummy_test_vectors):
     # Add a new request - prefill and 2 decodes in this step.
     output_processor.add_request(inactive_request, None)
     num_active += 1
-    outputs = engine_core.get_outputs()[:num_active]
+    outputs = engine_core.get_outputs(num_active)
     iteration_stats = IterationStats()
     output_processor.process_outputs(outputs, engine_core_timestamp, iteration_stats)
     total_prompt_tokens = len(dummy_test_vectors.prompt_tokens[num_active - 1])
@@ -960,7 +965,7 @@ def test_iteration_stats(dummy_test_vectors):
     assert iteration_stats.num_generation_tokens == num_active
 
     # Just decodes in this step.
-    outputs = engine_core.get_outputs()[:num_active]
+    outputs = engine_core.get_outputs(num_active)
     iteration_stats = IterationStats()
     output_processor.process_outputs(outputs, engine_core_timestamp, iteration_stats)
 
@@ -1003,6 +1008,7 @@ def test_lora_request_tracking(log_stats: bool, dummy_test_vectors):
 
     engine_core = MockEngineCore(
         dummy_test_vectors.generation_tokens,
+        dummy_test_vectors.prompt_tokens,
         request_ids=[req.request_id for req in requests],
     )
 
diff --git a/tests/v1/engine/utils.py b/tests/v1/engine/utils.py
index de953a58843e..013e73bd8e48 100644
--- a/tests/v1/engine/utils.py
+++ b/tests/v1/engine/utils.py
@@ -11,6 +11,7 @@
 
 from vllm.engine.arg_utils import EngineArgs
 from vllm.v1.engine import EngineCoreOutput, FinishReason
+from vllm.v1.metrics.stats import PrefillStats
 from vllm.v1.outputs import LogprobsLists, LogprobsTensors
 
 GeneralTokenizerType: TypeAlias = PreTrainedTokenizer | PreTrainedTokenizerFast
@@ -330,6 +331,7 @@ class MockEngineCore:
     def __init__(
         self,
         tokens_list: list[list[int]],
+        prompts_list: list[list[int]],
         # For each request, for each sampled token offset,
         # a tuple of
         # (list of topk token ids, list of sample logprob vals, rank)
@@ -346,12 +348,13 @@ def __init__(
     ) -> None:
         self.num_requests = len(tokens_list)
         self.tokens_list = tokens_list
-        self.current_idx = 0
+        self.prompts_list = prompts_list
         self.generated_logprobs_raw = generated_logprobs_raw
         self.do_logprobs = generated_logprobs_raw is not None
         self.prompt_logprobs_raw = prompt_logprobs_raw
         self.do_prompt_logprobs = prompt_logprobs_raw is not None
         self.request_finished = [False for _ in range(self.num_requests)]
+        self.request_token_idx = [0 for _ in range(self.num_requests)]
         self.eos_token_id = eos_token_id
         self.stop_token_ids = stop_token_ids
         self.request_ids = (
@@ -360,14 +363,18 @@ def __init__(
             else [f"request-{i}" for i in range(self.num_requests)]
         )
 
-    def get_outputs(self) -> list[EngineCoreOutput]:
+    def get_outputs(self, num_active: int = -1) -> list[EngineCoreOutput]:
         do_logprobs = self.do_logprobs
         do_prompt_logprobs = self.do_prompt_logprobs
-        token_idx = self.current_idx
 
         outputs = []
-        for req_idx, token_ids in enumerate(self.tokens_list):
+        for req_idx, (token_ids, prompt_token_ids) in enumerate(
+            zip(self.tokens_list, self.prompts_list)
+        ):
+            if num_active != -1 and req_idx >= num_active:
+                break
             if not self.request_finished[req_idx]:
+                token_idx = self.request_token_idx[req_idx]
                 if do_logprobs:
                     assert self.generated_logprobs_raw is not None
                     (logprobs_token_ids_, logprobs_, sampled_token_ranks_) = (
@@ -381,19 +388,32 @@ def get_outputs(self) -> list[EngineCoreOutput]:
                 else:
                     logprobs = None
                 if do_prompt_logprobs:
-                    if self.current_idx == 0:
+                    if token_idx == 0:
                         assert self.prompt_logprobs_raw is not None
                         prompt_logprobs = self.prompt_logprobs_raw[req_idx]
                     else:
                         prompt_logprobs = None
                 else:
                     prompt_logprobs = None
+
+                # Add prefill_stats on first output (prefill) for this request
+                if token_idx == 0:
+                    prefill_stats = PrefillStats()
+                    prefill_stats.set(
+                        num_prompt_tokens=len(prompt_token_ids),
+                        num_local_cached_tokens=0,
+                        num_external_cached_tokens=0,
+                    )
+                else:
+                    prefill_stats = None
+
                 new_token_id = token_ids[token_idx]
                 output = EngineCoreOutput(
                     request_id=self.request_ids[req_idx],
                     new_token_ids=[new_token_id],
                     new_logprobs=logprobs,
                     new_prompt_logprobs_tensors=prompt_logprobs,
+                    prefill_stats=prefill_stats,
                 )
                 if token_idx == len(token_ids) - 1:
                     output.finish_reason = FinishReason.LENGTH
@@ -407,5 +427,6 @@ def get_outputs(self) -> list[EngineCoreOutput]:
                     self.request_finished[req_idx] = True
                 outputs.append(output)
 
-        self.current_idx += 1
+                self.request_token_idx[req_idx] += 1
+
         return outputs
diff --git a/tests/v1/entrypoints/openai/test_thinking_token_budget.py b/tests/v1/entrypoints/openai/test_thinking_token_budget.py
deleted file mode 100644
index f574b07b6b81..000000000000
--- a/tests/v1/entrypoints/openai/test_thinking_token_budget.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-"""E2E tests for thinking_token_budget with reasoning models."""
-
-import openai
-import pytest
-import pytest_asyncio
-
-from tests.utils import RemoteOpenAIServer
-
-MODEL_NAME = "Qwen/Qwen3-0.6B"
-MESSAGES = [{"role": "user", "content": "What is 1+1? Be concise."}]
-THINK_BUDGET = 5
-
-
-@pytest.fixture(scope="module")
-def server():
-    args = [
-        "--reasoning-parser",
-        "qwen3",
-        "--reasoning-config",
-        '{"think_start_str": "<think>", "think_end_str": "</think>"}',
-        "--max-model-len",
-        "2048",
-        "--enforce-eager",
-        "--no-async-scheduling",
-    ]
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
-
-
-@pytest_asyncio.fixture
-async def client(server):
-    async with server.get_async_client() as async_client:
-        yield async_client
-
-
-@pytest.mark.asyncio
-async def test_thinking_token_budget_mixed_requests(client: openai.AsyncOpenAI):
-    """Test that mixed requests (some with thinking_token_budget, some without)
-    complete successfully without errors."""
-
-    response_with_budget = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=MESSAGES,
-        max_tokens=100,
-        extra_body={"thinking_token_budget": THINK_BUDGET},
-    )
-    response_without_budget = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=MESSAGES,
-        max_tokens=100,
-    )
-
-    msg_with = response_with_budget.choices[0].message
-    msg_without = response_without_budget.choices[0].message
-
-    assert msg_with.content or getattr(msg_with, "reasoning", None)
-    assert msg_without.content or getattr(msg_without, "reasoning", None)
-
-
-@pytest.mark.asyncio
-async def test_thinking_token_budget_limits_reasoning(client: openai.AsyncOpenAI):
-    """Test that thinking_token_budget limits the number of reasoning tokens.
-
-    In streaming mode each reasoning delta corresponds to one token, so
-    counting non-empty reasoning_content chunks gives the exact token count.
-    """
-
-    reasoning_token_count = 0
-    stream = await client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=MESSAGES,
-        max_tokens=100,
-        stream=True,
-        extra_body={"thinking_token_budget": THINK_BUDGET},
-    )
-    async for chunk in stream:
-        delta = chunk.choices[0].delta
-        if getattr(delta, "reasoning", None):
-            reasoning_token_count += 1
-
-    assert reasoning_token_count == THINK_BUDGET, (
-        f"reasoning tokens ({reasoning_token_count}) != "
-        f"thinking_token_budget ({THINK_BUDGET})"
-    )
diff --git a/tests/v1/executor/test_ray_utils.py b/tests/v1/executor/test_ray_utils.py
new file mode 100644
index 000000000000..8da9d5459e73
--- /dev/null
+++ b/tests/v1/executor/test_ray_utils.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import numpy as np
+
+from vllm.v1.executor.ray_utils import detach_zero_copy_from_model_runner_output
+from vllm.v1.outputs import LogprobsLists, LogprobsTensors, ModelRunnerOutput
+
+
+def _make_readonly(arr: np.ndarray) -> np.ndarray:
+    arr.setflags(write=False)
+    return arr
+
+
+def test_detach_zero_copy_from_model_runner_output_copies_only_numpy_views():
+    cu_num_generated_tokens = [0, 2]
+    prompt_logprobs = LogprobsTensors.empty_cpu(1, 2)
+    output = ModelRunnerOutput(
+        req_ids=["req-0"],
+        req_id_to_index={"req-0": 0},
+        logprobs=LogprobsLists(
+            logprob_token_ids=_make_readonly(
+                np.array([[1, 2], [3, 4]], dtype=np.int32)
+            ),
+            logprobs=_make_readonly(
+                np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
+            ),
+            sampled_token_ranks=_make_readonly(np.array([1, 2], dtype=np.int32)),
+            cu_num_generated_tokens=cu_num_generated_tokens,
+        ),
+        prompt_logprobs_dict={"req-0": prompt_logprobs},
+    )
+
+    original_logprobs = output.logprobs
+    assert original_logprobs is not None
+
+    detach_zero_copy_from_model_runner_output(output)
+
+    detached_logprobs = output.logprobs
+    assert detached_logprobs is not None
+    assert detached_logprobs is not original_logprobs
+    assert (
+        detached_logprobs.logprob_token_ids is not original_logprobs.logprob_token_ids
+    )
+    assert detached_logprobs.logprobs is not original_logprobs.logprobs
+    assert (
+        detached_logprobs.sampled_token_ranks
+        is not original_logprobs.sampled_token_ranks
+    )
+    assert detached_logprobs.logprob_token_ids.flags.writeable
+    assert detached_logprobs.logprobs.flags.writeable
+    assert detached_logprobs.sampled_token_ranks.flags.writeable
+    assert detached_logprobs.cu_num_generated_tokens is cu_num_generated_tokens
+    assert output.prompt_logprobs_dict["req-0"] is prompt_logprobs
diff --git a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py
index 6a8c64152fec..5cc19247f515 100644
--- a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py
+++ b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py
@@ -83,7 +83,7 @@ def register_predictable_model():
 
 
 def test_extract_hidden_states_with_predictable_dummy_model(
-    predictable_llama_config_path, tmp_path
+    predictable_llama_config_path, tmp_path, monkeypatch
 ):
     """Comprehensive test using a predictable dummy model with synthetic weights.
 
@@ -94,6 +94,12 @@ def test_extract_hidden_states_with_predictable_dummy_model(
     3. Layer ordering is preserved correctly (non-sequential layer IDs)
     4. Multiple prompts of different lengths produce consistent layer values
     """
+    # Force fork so the engine worker inherits the autouse fixture's
+    # ModelRegistry.register_model("PredictableLlamaForCausalLM", ...).
+    # Spawn (the CI default) starts a fresh Python process that wouldn't
+    # see the registration.
+    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")
+
     # Test with non-sequential layer ordering to verify correct association
     layer_ids = [5, 2, 10]
     num_layers = len(layer_ids)
@@ -114,6 +120,7 @@ def test_extract_hidden_states_with_predictable_dummy_model(
         },
         max_model_len=128,
         enforce_eager=True,
+        enable_chunked_prefill=False,
         trust_remote_code=True,
         load_format="dummy",  # Don't try to load real weights
     )
@@ -153,3 +160,56 @@ def test_extract_hidden_states_with_predictable_dummy_model(
                 f"but got mean={layer_hidden.mean():.3f}, "
                 f"min={layer_hidden.min():.3f}, max={layer_hidden.max():.3f}"
             )
+
+
+def test_extract_hidden_states_qwen35_hybrid_smoke(tmp_path):
+    """Smoke test for Qwen3.5 hybrid (mamba + full-attention) models.
+    Uses load_format="dummy" to just check shape/plumbing.
+    """
+    layer_ids = [5, 11, 17]
+    hidden_size = 1024  # Qwen/Qwen3.5-0.8B hidden_size
+
+    llm = LLM(
+        model="Qwen/Qwen3.5-0.8B",
+        speculative_config={
+            "method": "extract_hidden_states",
+            "num_speculative_tokens": 1,
+            "draft_model_config": {
+                "hf_config": {"eagle_aux_hidden_state_layer_ids": layer_ids}
+            },
+        },
+        kv_transfer_config={
+            "kv_connector": "ExampleHiddenStatesConnector",
+            "kv_role": "kv_producer",
+            "kv_connector_extra_config": {"shared_storage_path": str(tmp_path)},
+        },
+        max_model_len=256,
+        enforce_eager=True,
+        enable_chunked_prefill=False,
+        gpu_memory_utilization=0.4,
+        load_format="dummy",
+    )
+
+    prompts = ["Hello world", "Test prompt with several tokens"]
+    sampling_params = SamplingParams(max_tokens=1, temperature=0.0)
+    outputs = llm.generate(prompts, sampling_params)
+    del llm
+    gc.collect()
+
+    assert len(outputs) == len(prompts)
+    for output in outputs:
+        assert output.kv_transfer_params is not None
+        hidden_states_path = output.kv_transfer_params.get("hidden_states_path")
+        assert hidden_states_path is not None
+        assert os.path.exists(hidden_states_path)
+
+        with safe_open(hidden_states_path, "pt") as f:
+            token_ids = f.get_tensor("token_ids")
+            hidden_states = f.get_tensor("hidden_states")
+
+        assert torch.equal(token_ids, torch.tensor(output.prompt_token_ids))
+        assert hidden_states.shape == (
+            len(output.prompt_token_ids),
+            len(layer_ids),
+            hidden_size,
+        )
diff --git a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
index 92ab254ddbed..040632249d34 100755
--- a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
@@ -3,6 +3,10 @@ set -euo pipefail
 
 # Utility to run integration tests sequentially with varying TP configurations.
 SCRIPT="v1/kv_connector/nixl_integration/run_accuracy_test.sh"
+IMPORT_CANARY="v1/kv_connector/nixl_integration/test_nixl_imports.py"
+
+echo "=== Running NIXL import canary ==="
+python3 -m pytest -s -x "${IMPORT_CANARY}"
 
 # Define test configurations
 tp_configs=(
@@ -12,16 +16,24 @@ tp_configs=(
   "GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA case
   "GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
   "GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
-  "GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192" # SW model
 )
 dp_ep_configs=(
 "DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1)
 "DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP2, D-DPEP=2 (TP=1)
 )
 hybrid_ssm_configs=(
-  "ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code"
+  "VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code"
   # TODO: (NickLucche) Address async scheduling issue with TP>1 separately as this may impact other models.
-  "ENABLE_HMA_FLAG=1 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code,--no-async-scheduling"
+  "VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code,--no-async-scheduling"
+  # GDN (Qwen3.5)
+  "VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=Qwen/Qwen3.5-0.8B"
+  "VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=Qwen/Qwen3.5-0.8B VLLM_SERVE_EXTRA_ARGS=--no-async-scheduling"
+)
+sw_attn_configs=(
+  # NOTE: gemma3 does not work with FlashInfer
+  "GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192" # SW model
+  "ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
+  "ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
 )
 
 # Select config array based on DP_EP env var
@@ -31,6 +43,9 @@ if [[ -n "${DP_EP:-}" ]]; then
 elif [[ -n "${HYBRID_SSM:-}" ]]; then
   configs=("${hybrid_ssm_configs[@]}")
   echo "HYBRID_SSM is set, using hybrid_ssm_configs."
+elif [[ -n "${SW_ATTN:-}" ]]; then
+  configs=("${sw_attn_configs[@]}")
+  echo "SW_ATTN is set, using sw_attn_configs."
 else
   configs=("${tp_configs[@]}")
 fi
diff --git a/tests/v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh b/tests/v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh
new file mode 100755
index 000000000000..313efc3968dd
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/config_sweep_spec_decode_test.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Sweep wrapper for spec decode acceptance tests, following the same pattern
+# as config_sweep_accuracy_test.sh. Runs spec_decode_acceptance_test.sh once
+# per configuration.
+
+SCRIPT="v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh"
+
+# EAGLE3: Llama-3.1-8B-Instruct with EAGLE3 speculator.
+eagle3_config="SD_METHOD=eagle3 MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct SD_MODEL=RedHatAI/Llama-3.1-8B-Instruct-speculator.eagle3 NUM_SPEC_TOKENS=3"
+
+# MTP: Qwen3.5-0.8B-Base with hybrid SSM flags.
+mtp_config="SD_METHOD=mtp MODEL_NAME=Qwen/Qwen3.5-0.8B-Base SD_MODEL=Qwen/Qwen3.5-0.8B-Base NUM_SPEC_TOKENS=1 BLOCK_SIZE=32 MAX_MODEL_LEN=4096 VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 KV_BUFFER_DEVICES=cuda"
+
+configs=(
+  "$eagle3_config"
+  "$mtp_config"
+)
+
+for cfg in "${configs[@]}"; do
+  local_cfg_parts=()
+  read -r -a local_cfg_parts <<< "$cfg"
+  echo "-> Running with: ${cfg}"
+  if ! env "${local_cfg_parts[@]}" bash "${SCRIPT}"; then
+    echo "❌ Test failed for config: ${cfg}"
+    exit 1
+  fi
+done
+
+echo "✅ All spec decode acceptance tests passed!"
diff --git a/tests/v1/kv_connector/nixl_integration/nixl_side_channel_probe.py b/tests/v1/kv_connector/nixl_integration/nixl_side_channel_probe.py
new file mode 100644
index 000000000000..24ecbd795e41
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/nixl_side_channel_probe.py
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Probe a NIXL side-channel socket for handshake metadata readiness."""
+
+import argparse
+import ipaddress
+
+import msgspec
+import zmq
+
+GET_META_MSG = b"get_meta_msg"
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", required=True)
+    parser.add_argument("--port", required=True, type=int)
+    parser.add_argument("--rank", default=0, type=int)
+    parser.add_argument("--timeout-ms", default=1000, type=int)
+    return parser.parse_args()
+
+
+def make_zmq_path(host: str, port: int) -> str:
+    try:
+        if isinstance(ipaddress.ip_address(host), ipaddress.IPv6Address):
+            return f"tcp://[{host}]:{port}"
+    except ValueError:
+        pass
+    return f"tcp://{host}:{port}"
+
+
+def main() -> None:
+    args = parse_args()
+    ctx = zmq.Context()
+    sock = ctx.socket(zmq.REQ)
+    sock.setsockopt(zmq.LINGER, 0)
+    sock.setsockopt(zmq.RCVTIMEO, args.timeout_ms)
+    try:
+        sock.connect(make_zmq_path(args.host, args.port))
+        sock.send(msgspec.msgpack.encode((GET_META_MSG, args.rank)))
+        sock.recv()
+    finally:
+        sock.close()
+        ctx.term()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
index fe95249602a8..fc446a0e7658 100755
--- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
@@ -85,8 +85,11 @@ DECODE_BLOCK_SIZE=${DECODE_BLOCK_SIZE:-128}
 # Comma-separated extra args for vllm serve (e.g. --max-model-len,2048)
 VLLM_SERVE_EXTRA_ARGS=${VLLM_SERVE_EXTRA_ARGS:-}
 
-# Find the git repository root directory
-GIT_ROOT=$(git rev-parse --show-toplevel)
+# Resolve the repository root from the script location instead of `.git`.
+# The ROCm CI image copies `/vllm-workspace` without the Git metadata, so
+# `git rev-parse --show-toplevel` is not reliable at runtime.
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 
 SMI_BIN=$(which nvidia-smi || which rocm-smi || echo "")
 
diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
index 703a27fd3f78..9d8e4df8c539 100755
--- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
@@ -33,8 +33,9 @@ MODELS=(
     "Qwen/Qwen3-0.6B"
 )
 
-# Find the git repository root directory
-GIT_ROOT=$(git rev-parse --show-toplevel)
+# Resolve the repository root from the script location instead of `.git`.
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 
 # Trap the SIGINT signal (triggered by Ctrl+C)
 trap 'kill $(jobs -pr)' SIGINT SIGTERM EXIT
diff --git a/tests/v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
new file mode 100755
index 000000000000..2e71858983e9
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Integration accuracy test for MultiConnector (NixlConnector + OffloadingConnector).
+#
+# Launches a P/D setup where both prefill and decode instances use MultiConnector
+# wrapping NixlConnector and OffloadingConnector, then runs gsm8k accuracy via
+# test_accuracy.py.
+#
+# By default runs two configurations:
+#   1. Normal KV layout (NixlConnector without cross-layer blocks)
+#   2. Cross-layer KV layout (NixlConnector with enable_cross_layers_blocks)
+#
+# Usage:
+#   bash tests/v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
+#
+# Environment variables:
+#   MODEL_NAMES              - model to test (default: Qwen/Qwen3-0.6B)
+#   GPU_MEMORY_UTILIZATION   - GPU memory fraction (default: 0.6)
+#   VLLM_SERVE_EXTRA_ARGS    - comma-separated extra args for vllm serve
+#   SKIP_CROSS_LAYERS        - set to 1 to skip the cross-layer layout test
+#   SKIP_NORMAL_LAYOUT       - set to 1 to skip the normal layout test
+set -xe
+
+# ── Configuration ────────────────────────────────────────────────────────
+
+MODEL_NAMES=${MODEL_NAMES:-}
+if [[ -n "$MODEL_NAMES" ]]; then
+  MODELS=("$MODEL_NAMES")
+else
+  MODELS=("Qwen/Qwen3-0.6B")
+fi
+
+GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.6}
+BLOCK_SIZE=${BLOCK_SIZE:-128}
+VLLM_SERVE_EXTRA_ARGS=${VLLM_SERVE_EXTRA_ARGS:-}
+
+GIT_ROOT=$(git rev-parse --show-toplevel)
+SMI_BIN=$(which nvidia-smi || which rocm-smi || echo "")
+
+# ── KV transfer configs ─────────────────────────────────────────────────
+
+# Normal layout: OffloadingConnector prefers cross-layer but NixlConnector
+# does not, so MultiConnector.prefer_cross_layer_blocks = False.
+KV_CONFIG_NORMAL='{
+  "kv_connector":"MultiConnector",
+  "kv_role":"kv_both",
+  "kv_connector_extra_config":{
+    "connectors":[
+      {"kv_connector":"NixlConnector","kv_role":"kv_both"},
+      {"kv_connector":"OffloadingConnector","kv_role":"kv_both",
+       "kv_connector_extra_config":{"cpu_bytes_to_use":1000000000}}
+    ]
+  }
+}'
+# Remove whitespace for CLI safety
+KV_CONFIG_NORMAL=$(echo "$KV_CONFIG_NORMAL" | tr -d '[:space:]')
+
+# Cross-layer layout: both connectors prefer cross-layer blocks.
+KV_CONFIG_CROSS_LAYERS='{
+  "kv_connector":"MultiConnector",
+  "kv_role":"kv_both",
+  "kv_connector_extra_config":{
+    "connectors":[
+      {"kv_connector":"NixlConnector","kv_role":"kv_both",
+       "kv_connector_extra_config":{"enable_cross_layers_blocks":"True"}},
+      {"kv_connector":"OffloadingConnector","kv_role":"kv_both",
+       "kv_connector_extra_config":{"cpu_bytes_to_use":1000000000}}
+    ]
+  }
+}'
+KV_CONFIG_CROSS_LAYERS=$(echo "$KV_CONFIG_CROSS_LAYERS" | tr -d '[:space:]')
+
+# ── Helpers ──────────────────────────────────────────────────────────────
+
+trap 'kill $(jobs -pr) 2>/dev/null' SIGINT SIGTERM EXIT
+
+wait_for_server() {
+  local port=$1
+  timeout 1200 bash -c "
+    until curl -s localhost:${port}/v1/completions > /dev/null; do
+      sleep 1
+    done" && return 0 || return 1
+}
+
+cleanup_instances() {
+  echo "Cleaning up any running vLLM instances..."
+  pkill -f "vllm serve" || true
+  sleep 2
+}
+
+get_num_gpus() {
+  if [[ "$SMI_BIN" == *"nvidia"* ]]; then
+    $SMI_BIN --query-gpu=name --format=csv,noheader | wc -l
+  elif [[ "$SMI_BIN" == *"rocm"* ]]; then
+    $SMI_BIN -l | grep -c GPU
+  else
+    echo "1"
+  fi
+}
+
+# ── Run tests for one model with a given KV config ───────────────────────
+
+run_tests_for_model() {
+  local model_name=$1
+  local kv_config=$2
+  local label=$3
+
+  echo "================================================================"
+  echo "Testing model: $model_name ($label)"
+  echo "KV config: $kv_config"
+  echo "================================================================"
+
+  local PREFILL_PORT=8100
+  local DECODE_PORT=8200
+  local PREFILL_GPU=0
+  local DECODE_GPU=1
+  local PREFILL_SIDE_CHANNEL_PORT=5559
+  local DECODE_SIDE_CHANNEL_PORT=5659
+
+  # ── Start prefill instance ──
+  echo "Starting prefill instance on GPU $PREFILL_GPU, port $PREFILL_PORT"
+  BASE_CMD="CUDA_VISIBLE_DEVICES=$PREFILL_GPU \
+    VLLM_KV_CACHE_LAYOUT='HND' \
+    UCX_NET_DEVICES=all \
+    VLLM_NIXL_SIDE_CHANNEL_PORT=$PREFILL_SIDE_CHANNEL_PORT \
+    vllm serve $model_name \
+    --port $PREFILL_PORT \
+    --enforce-eager \
+    --block-size ${BLOCK_SIZE} \
+    --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
+    --tensor-parallel-size 1 \
+    --kv-transfer-config '$kv_config'"
+
+  if [[ -n "$VLLM_SERVE_EXTRA_ARGS" ]]; then
+    IFS=',' read -r -a extra_args <<< "$VLLM_SERVE_EXTRA_ARGS"
+    for arg in "${extra_args[@]}"; do
+      BASE_CMD="${BASE_CMD} $arg"
+    done
+  fi
+  eval "$BASE_CMD &"
+
+  # ── Start decode instance ──
+  echo "Starting decode instance on GPU $DECODE_GPU, port $DECODE_PORT"
+  BASE_CMD="CUDA_VISIBLE_DEVICES=$DECODE_GPU \
+    VLLM_KV_CACHE_LAYOUT='HND' \
+    UCX_NET_DEVICES=all \
+    VLLM_NIXL_SIDE_CHANNEL_PORT=$DECODE_SIDE_CHANNEL_PORT \
+    vllm serve $model_name \
+    --port $DECODE_PORT \
+    --enforce-eager \
+    --block-size ${BLOCK_SIZE} \
+    --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
+    --tensor-parallel-size 1 \
+    --kv-transfer-config '$kv_config'"
+
+  if [[ -n "$VLLM_SERVE_EXTRA_ARGS" ]]; then
+    IFS=',' read -r -a extra_args <<< "$VLLM_SERVE_EXTRA_ARGS"
+    for arg in "${extra_args[@]}"; do
+      BASE_CMD="${BASE_CMD} $arg"
+    done
+  fi
+  eval "$BASE_CMD &"
+
+  # ── Wait for servers ──
+  echo "Waiting for prefill instance on port $PREFILL_PORT to start..."
+  wait_for_server "$PREFILL_PORT"
+  echo "Waiting for decode instance on port $DECODE_PORT to start..."
+  wait_for_server "$DECODE_PORT"
+
+  # ── Start proxy ──
+  PROXY_CMD="python3 ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192"
+  PROXY_CMD+=" --prefiller-hosts localhost"
+  PROXY_CMD+=" --prefiller-ports $PREFILL_PORT"
+  PROXY_CMD+=" --decoder-hosts localhost"
+  PROXY_CMD+=" --decoder-ports $DECODE_PORT"
+
+  echo "Starting proxy server with command: $PROXY_CMD"
+  $PROXY_CMD &
+  sleep 5
+
+  # ── Run accuracy test ──
+  echo "Running accuracy tests for $model_name ($label)"
+  TEST_MODEL=$model_name python3 -m pytest -s -x \
+    "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_accuracy.py
+
+  # ── Cleanup ──
+  cleanup_instances
+  sleep 3
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────
+
+for model in "${MODELS[@]}"; do
+  if [[ -z "${SKIP_NORMAL_LAYOUT:-}" ]]; then
+    run_tests_for_model "$model" "$KV_CONFIG_NORMAL" "MultiConnector normal layout"
+  fi
+
+  if [[ -z "${SKIP_CROSS_LAYERS:-}" ]]; then
+    run_tests_for_model "$model" "$KV_CONFIG_CROSS_LAYERS" "MultiConnector cross-layer layout"
+  fi
+done
+
+echo "All MultiConnector accuracy tests passed!"
diff --git a/tests/v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
new file mode 100755
index 000000000000..a80950b34136
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Integration edge-case tests for MultiConnector (NixlConnector + OffloadingConnector).
+#
+# Launches a P/D setup where both prefill and decode instances use MultiConnector
+# wrapping NixlConnector and OffloadingConnector, then runs scenario-based edge
+# case tests including Prometheus metrics validation.
+#
+# Tests cover: block-size boundaries, decode-side cache-hit scenarios
+# (cold / full / partial), direct decode (control), and prefill-side CPU
+# offload recovery after GPU eviction.
+#
+# Usage:
+#   bash tests/v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
+#
+# Environment variables:
+#   MODEL_NAMES              - model to test (default: Qwen/Qwen3-0.6B)
+#   KV_CACHE_MEMORY_BYTES    - GPU KV cache size in bytes (default: 268435456 = 256 MiB)
+#   BLOCK_SIZE               - KV cache block size (default: 128)
+#   VLLM_SERVE_EXTRA_ARGS    - comma-separated extra args for vllm serve
+set -xe
+
+# ── Configuration ────────────────────────────────────────────────────────
+
+MODEL_NAMES=${MODEL_NAMES:-}
+if [[ -n "$MODEL_NAMES" ]]; then
+  MODELS=("$MODEL_NAMES")
+else
+  MODELS=("Qwen/Qwen3-0.6B")
+fi
+
+KV_CACHE_MEMORY_BYTES=${KV_CACHE_MEMORY_BYTES:-268435456}  # 256 MiB
+MAX_MODEL_LEN=${MAX_MODEL_LEN:-2048}
+BLOCK_SIZE=${BLOCK_SIZE:-128}
+VLLM_SERVE_EXTRA_ARGS=${VLLM_SERVE_EXTRA_ARGS:-}
+
+GIT_ROOT=$(git rev-parse --show-toplevel)
+
+# ── KV transfer config ──────────────────────────────────────────────────
+
+KV_CONFIG='{
+  "kv_connector":"MultiConnector",
+  "kv_role":"kv_both",
+  "kv_connector_extra_config":{
+    "connectors":[
+      {"kv_connector":"NixlConnector","kv_role":"kv_both"},
+      {"kv_connector":"OffloadingConnector","kv_role":"kv_both",
+       "kv_connector_extra_config":{"cpu_bytes_to_use":2147483648}}
+    ]
+  }
+}'
+KV_CONFIG=$(echo "$KV_CONFIG" | tr -d '[:space:]')
+
+# ── Helpers ──────────────────────────────────────────────────────────────
+
+trap 'kill $(jobs -pr) 2>/dev/null || true' SIGINT SIGTERM EXIT
+
+wait_for_server() {
+  local port=$1
+  timeout 1200 bash -c "
+    until curl -s localhost:${port}/v1/completions > /dev/null; do
+      sleep 1
+    done" && return 0 || return 1
+}
+
+cleanup_instances() {
+  echo "Cleaning up any running vLLM instances and proxy..."
+  pkill -f "vllm serve" || true
+  pkill -f "toy_proxy_server.py" || true
+  sleep 2
+}
+
+# ── Run tests for one model ──────────────────────────────────────────────
+
+run_tests_for_model() {
+  local model_name=$1
+
+  echo "================================================================"
+  echo "Testing model: $model_name (MultiConnector edge cases)"
+  echo "================================================================"
+
+  local PREFILL_PORT=8100
+  local DECODE_PORT=8200
+  local PROXY_PORT=8192
+  local PREFILL_GPU=0
+  local DECODE_GPU=1
+  local PREFILL_SIDE_CHANNEL_PORT=5559
+  local DECODE_SIDE_CHANNEL_PORT=5659
+
+  # ── Start prefill instance ──
+  echo "Starting prefill instance on GPU $PREFILL_GPU, port $PREFILL_PORT"
+  BASE_CMD="CUDA_VISIBLE_DEVICES=$PREFILL_GPU \
+    VLLM_KV_CACHE_LAYOUT='HND' \
+    UCX_NET_DEVICES=all \
+    VLLM_NIXL_SIDE_CHANNEL_PORT=$PREFILL_SIDE_CHANNEL_PORT \
+    vllm serve \"$model_name\" \
+    --port $PREFILL_PORT \
+    --enforce-eager \
+    --block-size ${BLOCK_SIZE} \
+    --max-model-len $MAX_MODEL_LEN \
+    --kv-cache-memory-bytes $KV_CACHE_MEMORY_BYTES \
+    --tensor-parallel-size 1 \
+    --kv-transfer-config '$KV_CONFIG'"
+
+  if [[ -n "$VLLM_SERVE_EXTRA_ARGS" ]]; then
+    IFS=',' read -r -a extra_args <<< "$VLLM_SERVE_EXTRA_ARGS"
+    for arg in "${extra_args[@]}"; do
+      BASE_CMD="${BASE_CMD} $arg"
+    done
+  fi
+  eval "$BASE_CMD &"
+
+  # ── Start decode instance ──
+  echo "Starting decode instance on GPU $DECODE_GPU, port $DECODE_PORT"
+  BASE_CMD="CUDA_VISIBLE_DEVICES=$DECODE_GPU \
+    VLLM_KV_CACHE_LAYOUT='HND' \
+    UCX_NET_DEVICES=all \
+    VLLM_NIXL_SIDE_CHANNEL_PORT=$DECODE_SIDE_CHANNEL_PORT \
+    vllm serve \"$model_name\" \
+    --port $DECODE_PORT \
+    --enforce-eager \
+    --block-size ${BLOCK_SIZE} \
+    --max-model-len $MAX_MODEL_LEN \
+    --kv-cache-memory-bytes $KV_CACHE_MEMORY_BYTES \
+    --tensor-parallel-size 1 \
+    --kv-transfer-config '$KV_CONFIG'"
+
+  if [[ -n "$VLLM_SERVE_EXTRA_ARGS" ]]; then
+    IFS=',' read -r -a extra_args <<< "$VLLM_SERVE_EXTRA_ARGS"
+    for arg in "${extra_args[@]}"; do
+      BASE_CMD="${BASE_CMD} $arg"
+    done
+  fi
+  eval "$BASE_CMD &"
+
+  # ── Wait for servers ──
+  echo "Waiting for prefill instance on port $PREFILL_PORT to start..."
+  wait_for_server "$PREFILL_PORT"
+  echo "Waiting for decode instance on port $DECODE_PORT to start..."
+  wait_for_server "$DECODE_PORT"
+
+  # ── Start proxy ──
+  echo "Starting proxy server on port $PROXY_PORT"
+  python3 "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
+    --port "$PROXY_PORT" \
+    --prefiller-hosts localhost \
+    --prefiller-ports "$PREFILL_PORT" \
+    --decoder-hosts localhost \
+    --decoder-ports "$DECODE_PORT" &
+  sleep 5
+
+  # ── Run edge case tests ──
+  echo "Running MultiConnector edge case tests for $model_name"
+  PREFILL_PORT=$PREFILL_PORT \
+  DECODE_PORT=$DECODE_PORT \
+  PROXY_PORT=$PROXY_PORT \
+  BLOCK_SIZE=$BLOCK_SIZE \
+    python3 -m pytest -s -x \
+    "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_multi_connector_edge_cases.py"
+
+  # ── Cleanup ──
+  cleanup_instances
+  sleep 3
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────
+
+for model in "${MODELS[@]}"; do
+  run_tests_for_model "$model"
+done
+
+echo "All MultiConnector edge case tests passed!"
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
index 407542eb82b2..9274e3c573cb 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
@@ -20,7 +20,8 @@ BLOCK_SIZE=${BLOCK_SIZE:-32}
 
 
 # execution env
-GIT_ROOT=$(git rev-parse --show-toplevel)
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 EXP_ROOT="${GIT_ROOT}/tests/v1/kv_connector/nixl_integration"
 CONDA_PATH=${CONDA_PATH:-"/home/${USER}/anaconda3"}
 CONDA_ENV_NAME=${CONDA_ENV_NAME:-"nixl"}
@@ -153,4 +154,4 @@ echo "-----P/D success----"
 rm "${OUTPUT_FILE}"
 cleanup
 
-exit 0
\ No newline at end of file
+exit 0
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
index f32ef5e764c4..5969455025e0 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
@@ -20,7 +20,8 @@ BLOCK_SIZE=${BLOCK_SIZE:-32}
 
 
 # execution env
-GIT_ROOT=$(git rev-parse --show-toplevel)
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 EXP_ROOT="${GIT_ROOT}/tests/v1/kv_connector/nixl_integration"
 CONDA_PATH=${CONDA_PATH:-"/home/${USER}/anaconda3"}
 CONDA_ENV_NAME=${CONDA_ENV_NAME:-"nixl"}
diff --git a/tests/v1/kv_connector/nixl_integration/run_xpu_disagg_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_xpu_disagg_accuracy_test.sh
index 79863123b729..8340720f927c 100644
--- a/tests/v1/kv_connector/nixl_integration/run_xpu_disagg_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_xpu_disagg_accuracy_test.sh
@@ -44,7 +44,8 @@ DECODER_ZE_AFFINITY_MASK=${DECODER_ZE_AFFINITY_MASK:-$(generate_affinity_mask "$
 
 
 # execution env
-GIT_ROOT=$(git rev-parse --show-toplevel)
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 EXP_ROOT="${GIT_ROOT}/tests/v1/kv_connector/nixl_integration"
 
 OUTPUT_FILE=${OUTPUT_FILE:-"${EXP_ROOT}/.xpu_accuracy_test_outputs.txt"}
diff --git a/tests/v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh b/tests/v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
index c2c938ebffea..2c5622a2f0e1 100755
--- a/tests/v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
@@ -26,7 +26,10 @@
 #                         ROCm options: TRITON_ATTN, ROCM_ATTN, ROCM_AITER_FA,
 #                                       ROCM_AITER_UNIFIED_ATTN
 #                         NVIDIA options: FLASH_ATTN, FLASHINFER
-set -x
+#   VLLM_SSM_CONV_STATE_LAYOUT - SSM conv state layout (e.g. "DS" required for Mamba models)
+#   ENABLE_HMA_FLAG     - set to 1 to enable hybrid KV cache manager
+#   VLLM_SERVE_EXTRA_ARGS - comma-separated extra args for vllm serve
+set -ex
 
 # ── Model & spec decode config ──────────────────────────────────────────
 
@@ -51,8 +54,12 @@ PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:-1}
 DECODER_TP_SIZE=${DECODER_TP_SIZE:-1}
 GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.7}
 BLOCK_SIZE=${BLOCK_SIZE:-16}
+SERVER_HOST="${SERVER_HOST:-127.0.0.1}"
+NIXL_SIDE_CHANNEL_HOST="${NIXL_SIDE_CHANNEL_HOST:-$SERVER_HOST}"
 
-GIT_ROOT=$(git rev-parse --show-toplevel)
+# Resolve the repository root from the script location instead of `.git`.
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+GIT_ROOT="${GIT_ROOT:-$(cd -- "${SCRIPT_DIR}/../../../.." && pwd -P)}"
 
 SMI_BIN=$(which nvidia-smi || which rocm-smi || echo "")
 
@@ -78,6 +85,20 @@ if [[ -z "${ATTENTION_BACKEND:-}" ]]; then
 fi
 echo "Using attention backend: ${ATTENTION_BACKEND}"
 
+# ── HMA & extra serve args ────────────────────────────────────────────
+
+ENABLE_HMA_VAR=""
+if [[ -n "${ENABLE_HMA_FLAG:-}" ]]; then
+  ENABLE_HMA_VAR="--no-disable-hybrid-kv-cache-manager"
+  echo "HMA (Hybrid KV Cache Manager) enabled"
+fi
+
+EXTRA_SERVE_ARGS=()
+if [[ -n "${VLLM_SERVE_EXTRA_ARGS:-}" ]]; then
+  IFS=',' read -r -a EXTRA_SERVE_ARGS <<< "$VLLM_SERVE_EXTRA_ARGS"
+  echo "Extra serve args: ${EXTRA_SERVE_ARGS[*]}"
+fi
+
 cleanup_instances() {
   echo ""
   echo "Cleaning up..."
@@ -94,18 +115,57 @@ trap 'echo " Interrupted."; exit 130' INT TERM
 
 wait_for_server() {
   local port=$1
-  local deadline=600
+  local server_pid=$2
+  local server_name=$3
+  local endpoint=${4:-/v1/completions}
+  local deadline=${5:-600}
+  local elapsed=0
+  echo "Waiting for ${server_name} on port ${port}..."
+  while [ $elapsed -lt $deadline ]; do
+    if ! ps -p "$server_pid" > /dev/null 2>&1; then
+      local status=0
+      wait "$server_pid" || status=$?
+      echo "FAIL: ${server_name} process ${server_pid} exited with status ${status} before port ${port} became ready"
+      exit 1
+    fi
+    if curl -s "http://${SERVER_HOST}:${port}${endpoint}" > /dev/null 2>&1; then
+      echo "${server_name} on port ${port} ready"
+      return 0
+    fi
+    sleep 2
+    elapsed=$((elapsed + 2))
+  done
+  echo "FAIL: ${server_name} on port ${port} did not start within ${deadline}s"
+  exit 1
+}
+
+wait_for_nixl_side_channel() {
+  local host=$1
+  local port=$2
+  local server_pid=$3
+  local server_name=$4
+  local deadline=120
   local elapsed=0
-  echo "Waiting for server on port ${port}..."
+  echo "Waiting for ${server_name} NIXL side channel on ${host}:${port}..."
   while [ $elapsed -lt $deadline ]; do
-    if curl -s "localhost:${port}/v1/completions" > /dev/null 2>&1; then
-      echo "Server on port ${port} ready"
+    if ! ps -p "$server_pid" > /dev/null 2>&1; then
+      local status=0
+      wait "$server_pid" || status=$?
+      echo "FAIL: ${server_name} server process ${server_pid} exited with status ${status} before NIXL side channel ${host}:${port} became ready"
+      exit 1
+    fi
+    if python3 "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/nixl_side_channel_probe.py" \
+      --host "$host" \
+      --port "$port" \
+      --timeout-ms 1000 > /dev/null 2>&1
+    then
+      echo "${server_name} NIXL side channel on ${host}:${port} ready"
       return 0
     fi
     sleep 2
     elapsed=$((elapsed + 2))
   done
-  echo "FAIL: Server on port ${port} did not start within ${deadline}s"
+  echo "FAIL: ${server_name} NIXL side channel ${host}:${port} did not start within ${deadline}s"
   exit 1
 }
 
@@ -156,6 +216,8 @@ run_test_for_device() {
   echo "KV buffer device:   ${kv_device}"
   echo "Attention backend:  ${ATTENTION_BACKEND}"
   echo "GPU platform:       ${GPU_PLATFORM}"
+  echo "Server host:        ${SERVER_HOST}"
+  echo "NIXL side channel:  ${NIXL_SIDE_CHANNEL_HOST}"
   echo "GPUs available:     ${ALL_GPUS[*]}"
   echo "================================================================"
 
@@ -165,7 +227,8 @@ run_test_for_device() {
   local DECODE_PORTS=()
   local GPU_IDX=0
 
-  # Start prefill instances
+  # Start prefill instances and wait for each one before allocating the next
+  # server. This keeps failures from leaving extra model servers spinning.
   for i in $(seq 0 $((NUM_PREFILL_INSTANCES-1))); do
     local GPU_ID="${ALL_GPUS[$GPU_IDX]}"
     GPU_IDX=$((GPU_IDX + 1))
@@ -182,6 +245,8 @@ run_test_for_device() {
     ${GPU_DEVICE_VAR}=$GPU_ID \
     VLLM_KV_CACHE_LAYOUT='HND' \
     UCX_NET_DEVICES=all \
+    ${VLLM_SSM_CONV_STATE_LAYOUT:+VLLM_SSM_CONV_STATE_LAYOUT=$VLLM_SSM_CONV_STATE_LAYOUT} \
+    VLLM_NIXL_SIDE_CHANNEL_HOST=$NIXL_SIDE_CHANNEL_HOST \
     VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \
     vllm serve $MODEL_NAME \
       --port $PORT \
@@ -192,13 +257,18 @@ run_test_for_device() {
       --tensor-parallel-size $PREFILLER_TP_SIZE \
       --kv-transfer-config "$kv_config" \
       --speculative-config "$PREFILL_SPEC_CONFIG" \
-      --attention-backend $ATTENTION_BACKEND &
+      --attention-backend $ATTENTION_BACKEND \
+      ${ENABLE_HMA_VAR} \
+      ${EXTRA_SERVE_ARGS[@]+"${EXTRA_SERVE_ARGS[@]}"} &
+    local SERVER_PID=$!
 
-    PREFILL_HOSTS+=("localhost")
+    PREFILL_HOSTS+=("$SERVER_HOST")
     PREFILL_PORTS+=("$PORT")
+    wait_for_server "$PORT" "$SERVER_PID" "prefill"
+    wait_for_nixl_side_channel "$NIXL_SIDE_CHANNEL_HOST" "$SIDE_CHANNEL_PORT" "$SERVER_PID" "prefill"
   done
 
-  # Start decode instances
+  # Start decode instances after prefill is ready.
   for i in $(seq 0 $((NUM_DECODE_INSTANCES-1))); do
     local GPU_ID="${ALL_GPUS[$GPU_IDX]}"
     GPU_IDX=$((GPU_IDX + 1))
@@ -215,6 +285,8 @@ run_test_for_device() {
     ${GPU_DEVICE_VAR}=$GPU_ID \
     VLLM_KV_CACHE_LAYOUT='HND' \
     UCX_NET_DEVICES=all \
+    ${VLLM_SSM_CONV_STATE_LAYOUT:+VLLM_SSM_CONV_STATE_LAYOUT=$VLLM_SSM_CONV_STATE_LAYOUT} \
+    VLLM_NIXL_SIDE_CHANNEL_HOST=$NIXL_SIDE_CHANNEL_HOST \
     VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \
     vllm serve $MODEL_NAME \
       --port $PORT \
@@ -225,18 +297,15 @@ run_test_for_device() {
       --tensor-parallel-size $DECODER_TP_SIZE \
       --kv-transfer-config "$kv_config" \
       --speculative-config "$DECODE_SPEC_CONFIG" \
-      --attention-backend $ATTENTION_BACKEND &
+      --attention-backend $ATTENTION_BACKEND \
+      ${ENABLE_HMA_VAR} \
+      ${EXTRA_SERVE_ARGS[@]+"${EXTRA_SERVE_ARGS[@]}"} &
+    local SERVER_PID=$!
 
-    DECODE_HOSTS+=("localhost")
+    DECODE_HOSTS+=("$SERVER_HOST")
     DECODE_PORTS+=("$PORT")
-  done
-
-  # Wait for servers
-  for PORT in "${PREFILL_PORTS[@]}"; do
-    wait_for_server "$PORT"
-  done
-  for PORT in "${DECODE_PORTS[@]}"; do
-    wait_for_server "$PORT"
+    wait_for_server "$PORT" "$SERVER_PID" "decode"
+    wait_for_nixl_side_channel "$NIXL_SIDE_CHANNEL_HOST" "$SIDE_CHANNEL_PORT" "$SERVER_PID" "decode"
   done
 
   # Start proxy
@@ -248,13 +317,16 @@ run_test_for_device() {
     --prefiller-ports ${PREFILL_PORTS[*]} \
     --decoder-hosts ${DECODE_HOSTS[*]} \
     --decoder-ports ${DECODE_PORTS[*]} &
+  local PROXY_PID=$!
 
-  sleep 5
+  wait_for_server "$PROXY_PORT" "$PROXY_PID" "proxy" "/healthcheck" 60
 
   # Run test
   echo "Running spec decode acceptance test (kv_buffer_device=${kv_device}, backend=${ATTENTION_BACKEND})..."
   DECODE_PORT=${DECODE_PORTS[0]} \
+  SERVER_HOST=$SERVER_HOST \
   TEST_MODEL=$MODEL_NAME \
+  SD_METHOD=$SD_METHOD \
   python3 -m pytest -s -x "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_spec_decode_acceptance.py"
 
   # Tear down before next iteration
diff --git a/tests/v1/kv_connector/nixl_integration/test_accuracy.py b/tests/v1/kv_connector/nixl_integration/test_accuracy.py
index 23ea0d261102..31046e161280 100644
--- a/tests/v1/kv_connector/nixl_integration/test_accuracy.py
+++ b/tests/v1/kv_connector/nixl_integration/test_accuracy.py
@@ -9,7 +9,10 @@
 NUM_CONCURRENT = 100
 TASK = "gsm8k"
 FILTER = "exact_match,strict-match"
-RTOL = 0.03
+# TODO(#43186): Widened from 0.03 to absorb chunk_scan/SSU numeric jitter
+# on granite-4.0-h-tiny under NIXL PD; tighten when the kernel divergence
+# is fixed.
+RTOL = 0.05
 
 # Model-specific expected values
 EXPECTED_VALUES = {
@@ -19,7 +22,8 @@
     "deepseek-ai/DeepSeek-V2-Lite-Chat": 0.65,
     "google/gemma-3-4b-it": 0.74,
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8": 0.84,
-    "ibm-granite/granite-4.0-h-tiny": 0.80,
+    "ibm-granite/granite-4.0-h-tiny": 0.77,
+    "Qwen/Qwen3.5-0.8B": 0.33,
 }
 
 SIMPLE_PROMPT = (
diff --git a/tests/v1/kv_connector/nixl_integration/test_multi_connector_edge_cases.py b/tests/v1/kv_connector/nixl_integration/test_multi_connector_edge_cases.py
new file mode 100644
index 000000000000..f109190a4a0a
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/test_multi_connector_edge_cases.py
@@ -0,0 +1,477 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Integration edge-case tests for MultiConnector (NixlConnector + OffloadingConnector).
+
+Tests cover:
+  - Output correctness across block-size boundaries (proxy vs direct prefill).
+  - Decode-side Prometheus metrics validation (local_cache_hit,
+    external_kv_transfer, local_compute) for cold/warm/partial cache scenarios.
+  - Prefill-side CPU offload recovery after GPU cache eviction.
+
+Requires running servers started by run_multi_connector_edge_case_test.sh.
+"""
+
+import os
+import time
+import urllib.request
+
+import openai
+import regex as re
+
+# ── Server configuration from environment ─────────────────────────────────
+
+PREFILL_HOST = os.getenv("PREFILL_HOST", "localhost")
+PREFILL_PORT = os.environ["PREFILL_PORT"]
+DECODE_HOST = os.getenv("DECODE_HOST", "localhost")
+DECODE_PORT = os.environ["DECODE_PORT"]
+PROXY_HOST = os.getenv("PROXY_HOST", "localhost")
+PROXY_PORT = os.environ["PROXY_PORT"]
+BLOCK_SIZE = int(os.getenv("BLOCK_SIZE", "128"))
+
+# ── OpenAI clients ────────────────────────────────────────────────────────
+
+decode_client = openai.OpenAI(
+    api_key="EMPTY",
+    base_url=f"http://{DECODE_HOST}:{DECODE_PORT}/v1",
+)
+prefill_client = openai.OpenAI(
+    api_key="EMPTY",
+    base_url=f"http://{PREFILL_HOST}:{PREFILL_PORT}/v1",
+)
+proxy_client = openai.OpenAI(
+    api_key="EMPTY",
+    base_url=f"http://{PROXY_HOST}:{PROXY_PORT}/v1",
+)
+
+_MODEL = None
+
+
+def _get_model() -> str:
+    global _MODEL
+    if _MODEL is None:
+        models = decode_client.models.list()
+        _MODEL = models.data[0].id
+    return _MODEL
+
+
+def _complete(client: openai.OpenAI, prompt: str, max_tokens: int = 20):
+    """Send a completion request and return (text, prompt_tokens)."""
+    resp = client.completions.create(
+        model=_get_model(),
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=0,
+    )
+    return resp.choices[0].text, resp.usage.prompt_tokens
+
+
+# ── Prometheus metrics helpers ────────────────────────────────────────────
+
+_METRIC_RE = re.compile(
+    r'vllm:prompt_tokens_by_source_total\{.*?source="([^"]+)".*?\}\s+'
+    r"([\d.eE+\-]+)"
+)
+
+
+def _fetch_metrics(host: str, port: str) -> dict[str, float]:
+    """Scrape prompt_tokens_by_source counters from a vLLM server."""
+    body = urllib.request.urlopen(f"http://{host}:{port}/metrics").read().decode()
+    result = {
+        "local_compute": 0.0,
+        "local_cache_hit": 0.0,
+        "external_kv_transfer": 0.0,
+    }
+    for m in _METRIC_RE.finditer(body):
+        source, val = m.group(1), float(m.group(2))
+        if source in result:
+            result[source] += val
+    return result
+
+
+def _fetch_decode_metrics() -> dict[str, float]:
+    return _fetch_metrics(DECODE_HOST, DECODE_PORT)
+
+
+def _fetch_prefill_metrics() -> dict[str, float]:
+    return _fetch_metrics(PREFILL_HOST, PREFILL_PORT)
+
+
+_NIXL_BYTES_RE = re.compile(r"vllm:nixl_bytes_transferred_sum\b.*?\s+([\d.eE+\-]+)")
+
+
+def _fetch_nixl_bytes(host: str, port: str) -> float:
+    """Scrape total NIXL bytes transferred from a vLLM server."""
+    body = urllib.request.urlopen(f"http://{host}:{port}/metrics").read().decode()
+    total = 0.0
+    for m in _NIXL_BYTES_RE.finditer(body):
+        total += float(m.group(1))
+    return total
+
+
+_OFFLOAD_BYTES_RE = re.compile(
+    r'vllm:kv_offload_total_bytes_total\{.*?transfer_type="([^"]+)".*?\}\s+'
+    r"([\d.eE+\-]+)"
+)
+
+
+def _fetch_offload_bytes(host: str, port: str) -> dict[str, float]:
+    """Scrape kv_offload_total_bytes counters (CPU_to_GPU / GPU_to_CPU)."""
+    body = urllib.request.urlopen(f"http://{host}:{port}/metrics").read().decode()
+    result = {"CPU_to_GPU": 0.0, "GPU_to_CPU": 0.0}
+    for m in _OFFLOAD_BYTES_RE.finditer(body):
+        transfer_type, val = m.group(1), float(m.group(2))
+        if transfer_type in result:
+            result[transfer_type] += val
+    return result
+
+
+def _metrics_delta(before: dict, after: dict) -> dict[str, float]:
+    return {k: after.get(k, 0) - before.get(k, 0) for k in before}
+
+
+# ── Prompts (unique per test to avoid cross-test cache interference) ──────
+
+SHORT_PROMPT = "Red Hat is "
+
+MEDIUM_PROMPT = (
+    "Red Hat is the best company in the world to work for because it works "
+    "on open source software, which means that all the contributions are "
+    "delivered to the community. As a result,"
+)
+
+
+def _make_prompt(n_tokens: int) -> str:
+    """Build a prompt of ~n_tokens tokens (1 word ~ 1 token)."""
+    return "word " * n_tokens
+
+
+BLOCK_BOUNDARY_PROMPT = _make_prompt(BLOCK_SIZE)
+ABOVE_BOUNDARY_PROMPT = _make_prompt(BLOCK_SIZE + 2)
+MULTI_BLOCK_PROMPT = _make_prompt(BLOCK_SIZE * 4)
+
+FULL_CACHE_HIT_PROMPT = (  # noqa: E501
+    "The history of computing begins with Charles Babbage who designed the "
+    "Analytical Engine in the 1830s which is considered the first general "
+    "purpose computer design in history. Ada Lovelace is widely regarded as "
+    "the first computer programmer for her work on the Analytical Engine. "
+    "The modern era of computing began with Alan Turing who formalized the "
+    "concept of computation with his Turing machine in 1936. During World "
+    "War Two Turing worked at Bletchley Park to break the Enigma cipher. "
+    "After the war the first electronic computers were built including ENIAC "
+    "at the University of Pennsylvania and Colossus at Bletchley Park. "
+    "These early machines filled entire rooms and used vacuum tubes for logic. "
+    "The invention of the transistor at Bell Labs in 1947 revolutionized "
+    "computing by making smaller and more reliable machines possible. "
+    "The integrated circuit followed in the late 1950s combining multiple "
+    "transistors on a single chip. This led to the microprocessor in the 1970s "
+    "and eventually to the personal computer revolution of the 1980s."
+)
+
+PARTIAL_CACHE_PREFIX = (  # noqa: E501
+    "Machine learning has transformed the field of artificial intelligence "
+    "by enabling computers to learn patterns from data without being "
+    "explicitly programmed for every task. The field has evolved dramatically "
+    "since its inception in the 1950s when Arthur Samuel coined the term while "
+    "working at IBM. Early approaches focused on symbolic reasoning and expert "
+    "systems that encoded human knowledge as rules. The statistical revolution "
+    "of the 1990s shifted the paradigm toward data driven methods. Support "
+    "vector machines and random forests became popular for classification tasks. "
+    "The breakthrough of deep learning in 2012 with AlexNet winning ImageNet "
+    "changed everything. Neural networks with many layers could automatically "
+    "learn hierarchical feature representations from raw data."
+)
+PARTIAL_CACHE_EXTENDED = PARTIAL_CACHE_PREFIX + (
+    " Transformers have become the dominant architecture for natural language "
+    "processing tasks including translation, summarization, and generation. "
+    "The attention mechanism allows models to weigh the importance of different "
+    "parts of the input sequence. Large language models like GPT and BERT "
+    "demonstrated that pre-training on massive text corpora followed by fine "
+    "tuning on specific tasks could achieve state of the art results across "
+    "a wide range of benchmarks. Scaling laws suggest that larger models "
+    "trained on more data continue to improve in capability."
+)
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Output correctness across block-size boundaries (decode-side metrics)
+#
+# Each test sends via proxy, verifies output matches prefill_direct at
+# temperature=0, and checks decode-side metrics for NIXL transfer.
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+def test_short_prompt_correctness():
+    """Short prompt (< block_size): output matches prefill, NIXL used."""
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, _ = _complete(proxy_client, SHORT_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    prefill_text, _ = _complete(prefill_client, SHORT_PROMPT)
+    print(f"SHORT PROMPT: {proxy_text=}, nixl_bytes_delta={n1 - n0}")
+    assert proxy_text == prefill_text
+    assert d["external_kv_transfer"] > 0, (
+        "NIXL transfer did not occur — decode may have silently fallen back "
+        "to local compute"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase, got delta={n1 - n0}"
+    )
+
+
+def test_block_boundary_correctness():
+    """Exactly block_size tokens: output matches prefill, NIXL used."""
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, pt = _complete(proxy_client, BLOCK_BOUNDARY_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    prefill_text, _ = _complete(prefill_client, BLOCK_BOUNDARY_PROMPT)
+    print(f"BLOCK BOUNDARY: {pt} prompt tokens, nixl_bytes_delta={n1 - n0}")
+    assert proxy_text == prefill_text
+    assert d["external_kv_transfer"] > 0, (
+        "NIXL transfer did not occur — decode may have silently fallen back "
+        "to local compute"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase, got delta={n1 - n0}"
+    )
+
+
+def test_above_block_boundary_correctness():
+    """Just above block_size (partial second block): output matches prefill."""
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, pt = _complete(proxy_client, ABOVE_BOUNDARY_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    prefill_text, _ = _complete(prefill_client, ABOVE_BOUNDARY_PROMPT)
+    print(f"ABOVE BOUNDARY: {pt} prompt tokens, nixl_bytes_delta={n1 - n0}")
+    assert proxy_text == prefill_text
+    assert d["external_kv_transfer"] > 0, (
+        "NIXL transfer did not occur — decode may have silently fallen back "
+        "to local compute"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase, got delta={n1 - n0}"
+    )
+
+
+def test_multi_block_correctness():
+    """Multi-block prompt (~4x block_size): output matches prefill."""
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, pt = _complete(proxy_client, MULTI_BLOCK_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    prefill_text, _ = _complete(prefill_client, MULTI_BLOCK_PROMPT)
+    print(f"MULTI BLOCK: {pt} prompt tokens, nixl_bytes_delta={n1 - n0}")
+    assert proxy_text == prefill_text
+    assert d["external_kv_transfer"] > 0, (
+        "NIXL transfer did not occur — decode may have silently fallen back "
+        "to local compute"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase, got delta={n1 - n0}"
+    )
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Decode-side KV source validation via Prometheus metrics
+#
+# Scrape vllm:prompt_tokens_by_source_total from the DECODE server to
+# verify which code path (GPU prefix, NIXL, local compute) was exercised.
+# ═══════════════════════════════════════════════════════════════════════════
+
+
+def test_cold_decode_no_cache_hit_metrics():
+    """Cold decode: external_kv_transfer==P, local_cache_hit==0, local_compute==0."""
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, P = _complete(proxy_client, MEDIUM_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    print(f"COLD DECODE: {P} prompt tokens, metrics delta: {d}")
+    print(f"  nixl_bytes_delta={n1 - n0}")
+    assert len(proxy_text) > 0, "proxy returned empty response"
+    assert d["external_kv_transfer"] == P, (
+        f"expected external_kv_transfer={P}, got {d['external_kv_transfer']}"
+    )
+    assert d["local_compute"] == 0, (
+        f"expected local_compute=0, got {d['local_compute']}"
+    )
+    assert d["local_cache_hit"] == 0, (
+        f"expected local_cache_hit=0, got {d['local_cache_hit']}"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase, got delta={n1 - n0}"
+    )
+
+
+def test_full_decode_gpu_cache_hit_metrics():
+    """Prime decode, resend via proxy: local_cache_hit==cached blocks."""
+    decode_text, _ = _complete(decode_client, FULL_CACHE_HIT_PROMPT)
+
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, P = _complete(proxy_client, FULL_CACHE_HIT_PROMPT)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    cached = (P // BLOCK_SIZE) * BLOCK_SIZE
+    expected_nixl = P - cached
+
+    print(f"FULL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}")
+    print(f"  metrics delta: {d}, nixl_bytes_delta={n1 - n0}")
+    assert len(proxy_text) > 0, "proxy returned empty response"
+    assert d["local_cache_hit"] == cached, (
+        f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
+    )
+    assert d["external_kv_transfer"] == expected_nixl, (
+        f"expected external_kv_transfer={expected_nixl}, "
+        f"got {d['external_kv_transfer']}"
+    )
+    assert d["local_compute"] == 0, (
+        f"expected local_compute=0, got {d['local_compute']}"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase (partial NIXL for "
+        f"uncached tail), got delta={n1 - n0}"
+    )
+
+
+def test_partial_decode_gpu_cache_hit_metrics():
+    """Prime with prefix, extend via proxy: partial local_cache_hit."""
+    _, prefix_tokens = _complete(decode_client, PARTIAL_CACHE_PREFIX)
+    cached = (prefix_tokens // BLOCK_SIZE) * BLOCK_SIZE
+    assert cached >= BLOCK_SIZE, (
+        f"PARTIAL_CACHE_PREFIX too short ({prefix_tokens} tokens) for partial "
+        f"cache hit test with block_size={BLOCK_SIZE}"
+    )
+
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    proxy_text, P = _complete(proxy_client, PARTIAL_CACHE_EXTENDED)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    expected_nixl = P - cached
+
+    print(f"PARTIAL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}")
+    print(f"  metrics delta: {d}, nixl_bytes_delta={n1 - n0}")
+    assert len(proxy_text) > 0, "proxy returned empty response"
+    assert d["external_kv_transfer"] == expected_nixl, (
+        f"expected external_kv_transfer={expected_nixl}, "
+        f"got {d['external_kv_transfer']}"
+    )
+    assert d["local_cache_hit"] == cached, (
+        f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
+    )
+    assert d["local_compute"] == 0, (
+        f"expected local_compute=0, got {d['local_compute']}"
+    )
+    assert n1 - n0 > 0, (
+        f"expected nixl_bytes_transferred to increase (NIXL for uncached "
+        f"tail), got delta={n1 - n0}"
+    )
+
+
+def test_decode_direct_all_local_compute():
+    """Direct decode (no proxy): local_compute==P, no transfers."""
+    prompt = "The speed of light is approximately"
+    n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    m0 = _fetch_decode_metrics()
+    text, P = _complete(decode_client, prompt)
+    time.sleep(1)
+    m1 = _fetch_decode_metrics()
+    n1 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
+    d = _metrics_delta(m0, m1)
+
+    print(f"DIRECT DECODE: {text!r} ({P} tokens), metrics delta: {d}")
+    print(f"  nixl_bytes_delta={n1 - n0}")
+    assert len(text.strip()) > 0, "empty output from direct decode"
+    assert d["local_compute"] == P, (
+        f"expected local_compute={P}, got {d['local_compute']}"
+    )
+    assert d["external_kv_transfer"] == 0, (
+        f"expected external_kv_transfer=0, got {d['external_kv_transfer']}"
+    )
+    assert n1 - n0 == 0, (
+        f"expected no nixl_bytes_transferred for direct decode, got delta={n1 - n0}"
+    )
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Prefill-side CPU offload validation via Prometheus metrics
+#
+# Scrape vllm:prompt_tokens_by_source_total from the PREFILL server.
+# Exercises the OffloadingConnector read path: after GPU cache eviction,
+# the OffloadingConnector restores KV from CPU (NixlConnector cannot help
+# for direct requests without kv_transfer_params).
+# ═══════════════════════════════════════════════════════════════════════════
+
+EVICTION_PROMPT = (  # noqa: E501
+    "Quantum computing leverages quantum mechanical phenomena like "
+    "superposition and entanglement to perform computations that would be "
+    "intractable for classical computers. This has implications for "
+    "cryptography, drug discovery, and optimization problems. Richard Feynman "
+    "first proposed the idea of quantum computing in 1982 when he observed "
+    "that simulating quantum systems on classical computers was exponentially "
+    "hard. Peter Shor developed a quantum algorithm for factoring large "
+    "numbers in polynomial time which threatens RSA encryption. Grover search "
+    "algorithm provides a quadratic speedup for unstructured search problems. "
+    "Companies like IBM Google and Rigetti are building quantum processors "
+    "with increasing numbers of qubits. Error correction remains a major "
+    "challenge as quantum states are extremely fragile and prone to decoherence."
+)
+
+
+def test_prefill_cpu_offload_after_gpu_eviction():
+    """Prefill-side: evict GPU, re-request directly, CPU offload restores KV."""
+    text1, P = _complete(prefill_client, EVICTION_PROMPT, max_tokens=30)
+
+    for i in range(100):
+        _complete(prefill_client, f"Eviction prompt number {i}: " + _make_prompt(200))
+
+    ob0 = _fetch_offload_bytes(PREFILL_HOST, PREFILL_PORT)
+    m0 = _fetch_prefill_metrics()
+    text2, _ = _complete(prefill_client, EVICTION_PROMPT, max_tokens=30)
+
+    cpu_to_gpu_delta = 0.0
+    for _ in range(10):
+        time.sleep(1)
+        ob1 = _fetch_offload_bytes(PREFILL_HOST, PREFILL_PORT)
+        cpu_to_gpu_delta = ob1["CPU_to_GPU"] - ob0["CPU_to_GPU"]
+        if cpu_to_gpu_delta > 0:
+            break
+
+    m1 = _fetch_prefill_metrics()
+    d = _metrics_delta(m0, m1)
+
+    print(f"PREFILL CPU OFFLOAD: run1={text1[:60]!r}, run2={text2[:60]!r}")
+    print(f"  prefill metrics delta: {d}")
+    print(f"  cpu_to_gpu bytes delta: {cpu_to_gpu_delta}")
+    assert text1 == text2, f"inconsistent after eviction: {text1=!r}, {text2=!r}"
+    assert cpu_to_gpu_delta > 0, (
+        f"expected cpu_to_gpu bytes > 0 (OffloadingConnector should restore "
+        f"KV from CPU to GPU), got {cpu_to_gpu_delta}"
+    )
diff --git a/tests/v1/kv_connector/nixl_integration/test_nixl_imports.py b/tests/v1/kv_connector/nixl_integration/test_nixl_imports.py
new file mode 100644
index 000000000000..d88dfc318161
--- /dev/null
+++ b/tests/v1/kv_connector/nixl_integration/test_nixl_imports.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NIXL import canaries for CUDA wheel selection."""
+
+import importlib
+import importlib.metadata as metadata
+import pathlib
+import subprocess
+import sys
+
+import pytest
+import torch
+
+
+def _print_distribution_version(package_name: str) -> None:
+    try:
+        version = metadata.version(package_name)
+    except metadata.PackageNotFoundError:
+        version = "not installed"
+    print(f"{package_name}: {version}")
+
+
+@pytest.mark.skipif(torch.version.cuda is None, reason="CUDA NIXL EP canary")
+def test_nixl_and_nixl_ep_imports() -> None:
+    """Verify both core NIXL and the NIXL EP extension import successfully."""
+    print(f"torch cuda: {torch.version.cuda}")
+    for package_name in ("nixl", "nixl-cu12", "nixl-cu13"):
+        _print_distribution_version(package_name)
+
+    nixl = importlib.import_module("nixl")
+    print(f"nixl: {nixl.__file__}")
+
+    # Exercise the core NIXL bindings used by NixlConnector.
+    importlib.import_module("nixl._api")
+    importlib.import_module("nixl._bindings")
+
+    # Exercise the NIXL EP extension used by fused MoE expert parallelism.
+    nixl_ep = importlib.import_module("nixl_ep")
+    print(f"nixl_ep: {nixl_ep.__file__}")
+
+    assert nixl_ep.__file__ is not None
+    extension_dir = pathlib.Path(nixl_ep.__file__).parent
+    extension_files = sorted(extension_dir.glob("nixl_ep_cpp*.so"))
+    assert extension_files, f"No nixl_ep_cpp extension found in {extension_dir}"
+
+    extension_file = extension_files[0]
+    completed = subprocess.run(
+        ["ldd", str(extension_file)],
+        capture_output=True,
+        check=False,
+        text=True,
+    )
+    print(completed.stdout)
+    if completed.stderr:
+        print(completed.stderr, file=sys.stderr)
+
+    assert completed.returncode == 0
+    if torch.version.cuda is not None:
+        cuda_major = torch.version.cuda.split(".", maxsplit=1)[0]
+        expected_cudart = f"libcudart.so.{cuda_major}"
+        assert expected_cudart in completed.stdout
+        assert f"{expected_cudart} => not found" not in completed.stdout
diff --git a/tests/v1/kv_connector/nixl_integration/test_spec_decode_acceptance.py b/tests/v1/kv_connector/nixl_integration/test_spec_decode_acceptance.py
index b747f953a220..c86a407ff8e3 100644
--- a/tests/v1/kv_connector/nixl_integration/test_spec_decode_acceptance.py
+++ b/tests/v1/kv_connector/nixl_integration/test_spec_decode_acceptance.py
@@ -1,19 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""NixlConnector PD + EAGLE3 speculative decoding acceptance length test.
+"""NixlConnector PD + speculative decoding acceptance length test.
 
   - Loads MT-Bench prompts (80 prompts, 256 output tokens)
   - Sends through the PD proxy (completions API)
   - Scrapes Prometheus metrics from the decode server
-  - Asserts acceptance length matches standalone EAGLE3 baselines
+  - Asserts acceptance metrics match standalone baselines
 
-Baselines from tests/v1/spec_decode/test_acceptance_length.py
-(standalone EAGLE3 with same model/drafter on MT-Bench, temp=0).
-PD disaggregation via NixlConnector should match within tolerance.
+Supports EAGLE3 (default) and MTP, selected via SD_METHOD env var.
 
 Environment variables (set by spec_decode_acceptance_test.sh):
     TEST_MODEL   - target model name
     DECODE_PORT  - port of the decode vLLM server (for /metrics)
+    SD_METHOD    - "eagle3" (default) or "mtp"
 """
 
 import os
@@ -27,31 +26,42 @@
 
 from vllm.benchmarks.datasets import get_samples
 
-PROXY_BASE_URL = "http://localhost:8192/v1"
+SERVER_HOST = os.environ.get("SERVER_HOST", "127.0.0.1")
+PROXY_BASE_URL = f"http://{SERVER_HOST}:8192/v1"
 DECODE_PORT = os.environ.get("DECODE_PORT", "8200")
 MODEL_NAME = os.environ.get("TEST_MODEL", "meta-llama/Llama-3.1-8B-Instruct")
+SD_METHOD = os.environ.get("SD_METHOD", "eagle3").lower()
 
 
 @dataclass
-class Eagle3ModelConfig:
-    verifier: str
-    drafter: str
+class ModelConfig:
+    model: str
+    method: str
     expected_acceptance_length: float
+    drafter: str = ""
     expected_acceptance_lengths_per_pos: list[float] = field(default_factory=list)
+    expected_acceptance_rate: float | None = None
     id: str = ""
     rtol: float | None = None
 
 
-# Standalone EAGLE3 baselines (MT-Bench, 80 prompts, 256 tokens, temp=0).
-# Source: tests/v1/spec_decode/test_acceptance_length.py
-EAGLE3_MODEL_CONFIGS = [
-    Eagle3ModelConfig(
-        verifier="meta-llama/Llama-3.1-8B-Instruct",
+# Standalone baselines (MT-Bench, 80 prompts, 256 tokens, temp=0).
+# EAGLE3 source: tests/v1/spec_decode/test_acceptance_length.py
+MODEL_CONFIGS = [
+    ModelConfig(
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        method="eagle3",
         drafter="RedHatAI/Llama-3.1-8B-Instruct-speculator.eagle3",
         expected_acceptance_length=2.60,
         expected_acceptance_lengths_per_pos=[0.7296, 0.5208, 0.3545],
         id="llama3-8b-eagle3",
     ),
+    ModelConfig(
+        model="Qwen/Qwen3.5-0.8B-Base",
+        method="mtp",
+        expected_acceptance_length=1.798,
+        id="qwen35-0.8b-mtp",
+    ),
 ]
 
 DEFAULT_NUM_PROMPTS = 80
@@ -59,14 +69,14 @@ class Eagle3ModelConfig:
 DEFAULT_RTOL = 0.05
 
 
-def _get_model_config() -> Eagle3ModelConfig:
-    """Get the model config matching MODEL_NAME."""
-    for config in EAGLE3_MODEL_CONFIGS:
-        if config.verifier == MODEL_NAME:
+def _get_model_config() -> ModelConfig:
+    """Get the model config matching MODEL_NAME and SD_METHOD."""
+    for config in MODEL_CONFIGS:
+        if config.model == MODEL_NAME and config.method == SD_METHOD:
             return config
     raise ValueError(
-        f"No Eagle3ModelConfig found for model {MODEL_NAME}. "
-        f"Available: {[c.verifier for c in EAGLE3_MODEL_CONFIGS]}"
+        f"No config for model={MODEL_NAME}, method={SD_METHOD}. "
+        f"Available: {[(c.model, c.method) for c in MODEL_CONFIGS]}"
     )
 
 
@@ -101,7 +111,7 @@ def _get_mt_bench_prompts() -> list[str]:
 
 def _fetch_metric(metric_name: str) -> float:
     """Fetch a single counter metric from the decode server's /metrics."""
-    url = f"http://localhost:{DECODE_PORT}/metrics"
+    url = f"http://{SERVER_HOST}:{DECODE_PORT}/metrics"
     body = urlopen(url).read().decode()
     for line in body.split("\n"):
         if line.startswith(metric_name + "{") or line.startswith(metric_name + " "):
@@ -111,7 +121,7 @@ def _fetch_metric(metric_name: str) -> float:
 
 def _fetch_per_position_acceptance() -> dict[int, float]:
     """Fetch per-position acceptance counts from decode /metrics."""
-    url = f"http://localhost:{DECODE_PORT}/metrics"
+    url = f"http://{SERVER_HOST}:{DECODE_PORT}/metrics"
     body = urlopen(url).read().decode()
     counts: dict[int, float] = {}
     for line in body.split("\n"):
@@ -160,47 +170,60 @@ def test_spec_decode_acceptance_length():
     assert n_drafts > 0, "No spec-decode drafts were generated"
 
     acceptance_length = 1 + (n_accepted / n_drafts)
-
-    per_pos_counts = _fetch_per_position_acceptance()
-    per_pos_rates = [
-        per_pos_counts.get(i, 0) / n_drafts
-        for i in range(len(config.expected_acceptance_lengths_per_pos))
-    ]
-
-    # ── Report ────────────────────────────────────────────────────────
     expected = config.expected_acceptance_length
-    expected_per_pos = config.expected_acceptance_lengths_per_pos
 
     print(
         f"\n{config.id}: acceptance_length={acceptance_length:.3f} "
         f"(expected={expected:.3f})"
     )
     print(f"  Drafts: {n_drafts:.0f}, Accepted: {n_accepted:.0f}")
-    for i, (actual, exp) in enumerate(zip(per_pos_rates, expected_per_pos)):
-        print(f"  Position {i}: {actual:.4f} (expected: {exp:.4f})")
 
-    # ── Assert overall acceptance length ──────────────────────────────
+    # ── Assert acceptance length (all methods) ────────────────────────
     rel_error = abs(acceptance_length - expected) / expected
-
     assert rel_error <= rtol, (
         f"Acceptance length regression for {config.id}! "
         f"Expected: {expected:.3f}, "
         f"Got: {acceptance_length:.3f}, "
-        f"Relative error: {rel_error:.2%} (tolerance: {rtol:.0%}). "
-        f"This may indicate drafter KV was not correctly transferred."
+        f"Relative error: {rel_error:.2%} (tolerance: {rtol:.0%})"
     )
 
-    # ── Assert per-position acceptance ────────────────────────────────
-    for i, (actual, exp) in enumerate(zip(per_pos_rates, expected_per_pos)):
-        if exp > 0:
-            pos_err = abs(actual - exp) / exp
-            assert pos_err <= rtol, (
-                f"Per-position acceptance regression at position {i} "
-                f"for {config.id}! "
-                f"Expected: {exp:.4f}, Got: {actual:.4f}, "
-                f"Relative error: {pos_err:.2%} "
-                f"(tolerance: {rtol:.0%})"
-            )
+    # ── Assert per-position acceptance (EAGLE3) ───────────────────────
+    if config.expected_acceptance_lengths_per_pos:
+        per_pos_counts = _fetch_per_position_acceptance()
+        per_pos_rates = [
+            per_pos_counts.get(i, 0) / n_drafts
+            for i in range(len(config.expected_acceptance_lengths_per_pos))
+        ]
+        for i, (actual, exp) in enumerate(
+            zip(per_pos_rates, config.expected_acceptance_lengths_per_pos)
+        ):
+            print(f"  Position {i}: {actual:.4f} (expected: {exp:.4f})")
+            if exp > 0:
+                pos_err = abs(actual - exp) / exp
+                assert pos_err <= rtol, (
+                    f"Per-position regression at pos {i} for {config.id}! "
+                    f"Expected: {exp:.4f}, Got: {actual:.4f}, "
+                    f"Relative error: {pos_err:.2%} (tolerance: {rtol:.0%})"
+                )
+
+    # ── Assert acceptance rate (MTP) ──────────────────────────────────
+    if config.expected_acceptance_rate is not None:
+        n_draft_tokens = _fetch_metric("vllm:spec_decode_num_draft_tokens_total")
+        acceptance_rate = n_accepted / n_draft_tokens if n_draft_tokens > 0 else 0.0
+        print(
+            f"  Acceptance rate: {acceptance_rate:.3f} "
+            f"(expected: {config.expected_acceptance_rate:.3f})"
+        )
+        rate_err = (
+            abs(acceptance_rate - config.expected_acceptance_rate)
+            / config.expected_acceptance_rate
+        )
+        assert rate_err <= rtol, (
+            f"Acceptance rate regression for {config.id}! "
+            f"Expected: {config.expected_acceptance_rate:.3f}, "
+            f"Got: {acceptance_rate:.3f}, "
+            f"Relative error: {rate_err:.2%} (tolerance: {rtol:.0%})"
+        )
 
     print(
         f"\n=== PASS: {config.id} acceptance length {acceptance_length:.3f} "
diff --git a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
index b92d3fcd6fb8..1a910c8d24dc 100644
--- a/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
+++ b/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
@@ -173,6 +173,9 @@ async def send_request_to_service(
         req_data["max_completion_tokens"] = 1
     if "stream_options" in req_data:
         del req_data["stream_options"]
+    # These args are not supported for P
+    min_tokens = req_data.pop("min_tokens", None)
+    min_completion_tokens = req_data.pop("min_completion_tokens", None)
     headers = {
         "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
         "X-Request-Id": request_id,
@@ -187,6 +190,10 @@ async def send_request_to_service(
     # otherwise, it would http.ReadError
     await response.aread()
 
+    # Add back the min_tokens and min_completion_tokens so D can use them
+    req_data["min_tokens"] = min_tokens
+    req_data["min_completion_tokens"] = min_completion_tokens
+
     return response
 
 
diff --git a/tests/v1/kv_connector/unit/offloading_connector/test_scheduler.py b/tests/v1/kv_connector/unit/offloading_connector/test_scheduler.py
index e96ce29e95e6..78caba5f7537 100644
--- a/tests/v1/kv_connector/unit/offloading_connector/test_scheduler.py
+++ b/tests/v1/kv_connector/unit/offloading_connector/test_scheduler.py
@@ -1,68 +1,84 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Iterable
+from unittest.mock import MagicMock
 
 import pytest
+import torch
 
 from tests.v1.kv_connector.unit.offloading_connector.utils import (
     generate_store_output,
+    to_keys,
 )
 from tests.v1.kv_connector.unit.utils import EOS_TOKEN_ID
 from vllm.distributed.kv_events import BlockRemoved, BlockStored
+from vllm.distributed.kv_transfer.kv_connector.v1.offloading.scheduler import (
+    OffloadingConnectorScheduler,
+)
 from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.abstract import OffloadingEvent
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheGroupSpec,
+    SlidingWindowSpec,
+)
+from vllm.v1.kv_offload.base import (
+    OffloadingEvent,
+    OffloadingManager,
+    ReqContext,
+    get_offload_block_hash,
+)
 from vllm.v1.request import RequestStatus
 
 
 @pytest.mark.parametrize("async_scheduling", [True, False])
 def test_offloading_connector(request_runner, async_scheduling: bool):
-    offloaded_block_size = 12
-    gpu_block_size = 4
+    block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = block_size * block_size_factor
     num_gpu_blocks = 100
-    block_size_factor = offloaded_block_size // gpu_block_size
 
     runner = request_runner(
-        offloaded_block_size=offloaded_block_size,
-        gpu_block_size=gpu_block_size,
+        block_size=block_size,
         num_gpu_blocks=num_gpu_blocks,
         async_scheduling=async_scheduling,
+        block_size_factor=block_size_factor,
     )
 
     # 3 blocks, store just the middle block (skip first and last)
     # blocks = [0, 1, 2], [3, 4, 5], [6, 7, 8]
     runner.new_request(token_ids=[0] * offloaded_block_size * 3)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(list(block_hashes)[1:2])
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(list(keys)[1:2])
     )
     runner.run(decoded_tokens=[0])
 
     # add block missing 1 token -> no offload
     runner.run(
         decoded_tokens=[0] * (offloaded_block_size - 1),
-        expected_stored_gpu_block_indexes=(3, 4, 5),
+        expected_stored=(3, 4, 5),
     )
-    runner.manager.prepare_store.assert_not_called()
+    runner.manager.touch.assert_not_called()
 
     # +1 token -> single block, fail prepare_store
-    runner.manager.prepare_store.side_effect = lambda block_hashes: None
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: None
     runner.run(decoded_tokens=[0])
     runner.manager.prepare_store.assert_called()
 
     # 1 more block (+ token for async scheduling)
     # now set block_hashes_to_store = []
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
     runner.run(decoded_tokens=[0] * (offloaded_block_size + 1))
 
     # 1 more block (+ token for kicking off offloading)
     # now check touch was called with all 6 blocks
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
     runner.run(
         decoded_tokens=[0] * (offloaded_block_size + 1),
-        expected_stored_gpu_block_indexes=(15, 16, 17),
+        expected_stored=(15, 16, 17),
     )
     runner.manager.touch.assert_called()
     block_hashes1 = list(runner.manager.touch.call_args.args[0])
@@ -85,62 +101,53 @@ def test_offloading_connector(request_runner, async_scheduling: bool):
     # terminate request
     runner.run(
         decoded_tokens=[EOS_TOKEN_ID],
-        expected_stored_gpu_block_indexes=tuple(range(6 * block_size_factor)),
+        expected_stored=tuple(range(6 * block_size_factor)),
     )
 
     # full_block_tokens - num_computed_tokens < offloaded_block_size
     runner.new_request(
-        token_ids=[0] * gpu_block_size + [1] * (offloaded_block_size - gpu_block_size)
+        token_ids=[0] * block_size + [1] * (offloaded_block_size - block_size)
     )
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
     runner.run(decoded_tokens=[EOS_TOKEN_ID])
     runner.manager.lookup.assert_not_called()
 
     # single block lookup with no hits
     runner.new_request(token_ids=[1] * offloaded_block_size)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
     runner.run(decoded_tokens=[EOS_TOKEN_ID])
-    runner.manager.lookup.assert_called()
-    assert len(list(runner.manager.lookup.call_args.args[0])) == 1
+    runner.manager.lookup.assert_called_once()
 
     # single block lookup with a hit
     runner.scheduler.reset_prefix_cache()
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
-    )
-    runner.manager.lookup.return_value = 1
-    runner.run(
-        decoded_tokens=[EOS_TOKEN_ID], expected_loaded_gpu_block_indexes=(0, 1, 2)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.run(decoded_tokens=[EOS_TOKEN_ID], expected_loaded=(0, 1, 2))
 
     # single block lookup with a hit in a middle block
     runner.new_request(
         token_ids=[0] * offloaded_block_size * 2 + [1] * offloaded_block_size
     )
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
-    )
-    runner.manager.lookup.return_value = 1
-    runner.run(
-        decoded_tokens=[EOS_TOKEN_ID], expected_loaded_gpu_block_indexes=(3, 4, 5)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.run(decoded_tokens=[EOS_TOKEN_ID], expected_loaded=(3, 4, 5))
 
     # test take_events
     def to_hashes(int_hashes: list[int]) -> list[BlockHash]:
         return [BlockHash(str(i).encode()) for i in int_hashes]
 
     def take_events() -> Iterable[OffloadingEvent]:
-        yield OffloadingEvent(
-            block_hashes=to_hashes([1, 2, 3]), block_size=16, medium="A", removed=False
-        )
-        yield OffloadingEvent(
-            block_hashes=to_hashes([4, 5, 6]), block_size=32, medium="B", removed=True
-        )
+        yield OffloadingEvent(keys=to_keys([1, 2, 3]), medium="A", removed=False)
+        yield OffloadingEvent(keys=to_keys([4, 5, 6]), medium="B", removed=True)
 
     runner.manager.take_events.side_effect = take_events
     events = list(runner.scheduler_connector.take_events())
@@ -148,7 +155,7 @@ def take_events() -> Iterable[OffloadingEvent]:
     event = events[0]
     assert isinstance(event, BlockStored)
     assert event.block_hashes == to_hashes([1, 2, 3])
-    assert event.block_size == 16
+    assert event.block_size == 0
     assert event.medium == "A"
     assert event.token_ids == []
     assert event.parent_block_hash is None
@@ -162,15 +169,16 @@ def take_events() -> Iterable[OffloadingEvent]:
 
 @pytest.mark.parametrize("async_scheduling", [True, False])
 def test_request_preemption(request_runner, async_scheduling: bool):
-    offloaded_block_size = 12
-    gpu_block_size = 4
+    block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = block_size * block_size_factor
     num_gpu_blocks = 100
 
     runner = request_runner(
-        offloaded_block_size=offloaded_block_size,
-        gpu_block_size=gpu_block_size,
+        block_size=block_size,
         num_gpu_blocks=num_gpu_blocks,
         async_scheduling=async_scheduling,
+        block_size_factor=block_size_factor,
     )
 
     free_block_queue = runner.scheduler.kv_cache_manager.block_pool.free_block_queue
@@ -179,8 +187,8 @@ def test_request_preemption(request_runner, async_scheduling: bool):
     # 2 blocks, store all, without flushing
     # blocks = [0, 1, 2], [3, 4, 5]
     runner.new_request(token_ids=[0] * offloaded_block_size * 2)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
     runner.run(
         decoded_tokens=[0],
@@ -188,11 +196,11 @@ def test_request_preemption(request_runner, async_scheduling: bool):
     )
 
     # decode 2 more blocks - 1 gpu block, storing [6, 7, 8] (no flush)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
     runner.run(
-        decoded_tokens=[0] * (2 * offloaded_block_size - gpu_block_size),
+        decoded_tokens=[0] * (2 * offloaded_block_size - block_size),
         complete_transfers=False,
     )
 
@@ -203,8 +211,8 @@ def test_request_preemption(request_runner, async_scheduling: bool):
     runner.run(
         decoded_tokens=[],
         complete_transfers=False,
-        expected_flushed_gpu_block_indexes=(0, 1, 2, 3, 4, 5, 6, 7, 8),
-        expected_stored_gpu_block_indexes=(0, 1, 2, 3, 4, 5, 6, 7, 8),
+        expected_flushed=(0, 1, 2, 3, 4, 5, 6, 7, 8),
+        expected_stored=(0, 1, 2, 3, 4, 5, 6, 7, 8),
     )
 
     # restore KV cache space and reset GPU prefix cache
@@ -213,48 +221,55 @@ def test_request_preemption(request_runner, async_scheduling: bool):
 
     # request should now return from preemption
     # re-load [0, ..., 8] from the CPU and store [9, 10, 11]
-    runner.manager.lookup.return_value = 3
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 3
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
     runner.run(
-        decoded_tokens=[0] * gpu_block_size,
-        expected_loaded_gpu_block_indexes=(0, 1, 2, 3, 4, 5, 6, 7, 8),
+        decoded_tokens=[0] * block_size,
+        expected_loaded=(0, 1, 2, 3, 4, 5, 6, 7, 8),
     )
 
     runner.run(
         decoded_tokens=[EOS_TOKEN_ID],
-        expected_stored_gpu_block_indexes=(9, 10, 11),
+        expected_stored=(9, 10, 11),
     )
 
+    # All stores completed before request_finished -> fence index empty.
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
 
 @pytest.mark.parametrize("async_scheduling", [True, False])
 def test_concurrent_lookups_of_the_same_prefix(request_runner, async_scheduling: bool):
-    offloaded_block_size = 12
-    gpu_block_size = 4
+    block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = block_size * block_size_factor
     num_gpu_blocks = 100
 
     runner = request_runner(
-        offloaded_block_size=offloaded_block_size,
-        gpu_block_size=gpu_block_size,
+        block_size=block_size,
         num_gpu_blocks=num_gpu_blocks,
         async_scheduling=async_scheduling,
+        block_size_factor=block_size_factor,
     )
 
     # store 1 blocks
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
+    # With sync scheduling, all-finished flush fires within this run.
+    # With async scheduling, the finish is delayed so flush fires later.
     runner.run(
         decoded_tokens=[EOS_TOKEN_ID],
-        expected_stored_gpu_block_indexes=(0, 1, 2),
+        expected_stored=(0, 1, 2),
+        expected_flushed=(0, 1, 2) if not async_scheduling else (),
     )
 
     # start a request to load the first block, but don't complete
     runner.scheduler.reset_prefix_cache()
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.lookup.return_value = 1
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
     runner.run(
         decoded_tokens=[],
         complete_transfers=False,
@@ -266,7 +281,7 @@ def test_concurrent_lookups_of_the_same_prefix(request_runner, async_scheduling:
 
     # start a new request to load the same first block
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.lookup.return_value = 1
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
     runner.run(
         decoded_tokens=[],
         complete_transfers=False,
@@ -276,45 +291,50 @@ def test_concurrent_lookups_of_the_same_prefix(request_runner, async_scheduling:
     assert transfer_jobs == list(runner.offloading_spec.handler.transfer_specs)
 
     # complete transfers
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output([])
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
     )
     runner.run(
         decoded_tokens=[EOS_TOKEN_ID],
-        expected_loaded_gpu_block_indexes=(0, 1, 2),
+        expected_loaded=(0, 1, 2),
     )
 
     # second request will use the GPU prefix cache
     assert transfer_jobs == list(runner.offloading_spec.handler.transfer_specs)
 
+    # Fence index drained: stores completed before request_finished ran.
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
 
 @pytest.mark.parametrize("async_scheduling", [True, False])
 def test_abort_loading_requests(request_runner, async_scheduling: bool):
-    offloaded_block_size = 12
-    gpu_block_size = 4
+    block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = block_size * block_size_factor
     num_gpu_blocks = 100
 
     runner = request_runner(
-        offloaded_block_size=offloaded_block_size,
-        gpu_block_size=gpu_block_size,
+        block_size=block_size,
         num_gpu_blocks=num_gpu_blocks,
         async_scheduling=async_scheduling,
+        block_size_factor=block_size_factor,
     )
 
     # store 1 blocks
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.prepare_store.side_effect = (
-        lambda block_hashes: generate_store_output(block_hashes)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
     )
     runner.run(
         decoded_tokens=[EOS_TOKEN_ID],
-        expected_stored_gpu_block_indexes=(0, 1, 2),
+        expected_stored=(0, 1, 2),
+        expected_flushed=(0, 1, 2) if not async_scheduling else (),
     )
 
     # start a request to load the first block, but don't complete
     runner.scheduler.reset_prefix_cache()
     runner.new_request(token_ids=[0] * offloaded_block_size)
-    runner.manager.lookup.return_value = 1
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
     runner.run(
         decoded_tokens=[],
         complete_transfers=False,
@@ -334,8 +354,811 @@ def test_abort_loading_requests(request_runner, async_scheduling: bool):
     # complete loading request
     runner.run(
         decoded_tokens=[],
-        expected_loaded_gpu_block_indexes=(0, 1, 2),
+        expected_loaded=(0, 1, 2),
+        expected_flushed=(0, 1, 2),
     )
 
     # assert request is deleted
     assert req_id not in runner.scheduler.requests
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_two_groups_full_and_sliding_window(request_runner, async_scheduling: bool):
+    block_size = 4
+    num_gpu_blocks = 100
+    # sliding_window=8 -> 2 offloaded blocks (block_size_factor=1)
+    sliding_window = 8
+
+    kv_cache_groups = [
+        KVCacheGroupSpec(
+            ["layer0"],
+            FullAttentionSpec(
+                block_size=block_size,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+            ),
+        ),
+        KVCacheGroupSpec(
+            ["layer1"],
+            SlidingWindowSpec(
+                block_size=block_size,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+                sliding_window=sliding_window,
+            ),
+        ),
+    ]
+
+    runner = request_runner(
+        block_size=block_size,
+        num_gpu_blocks=num_gpu_blocks,
+        async_scheduling=async_scheduling,
+        kv_cache_groups=kv_cache_groups,
+    )
+
+    # Verify group configs: group 0 = full attention, group 1 = sliding window
+    kv_group_configs = runner.connector_scheduler.config.kv_group_configs
+    assert len(kv_group_configs) == 2
+    assert kv_group_configs[0].sliding_window_size_in_blocks is None
+    assert kv_group_configs[1].sliding_window_size_in_blocks == 2
+
+    # Blocks [0, 1, 2] miss
+    runner.new_request(token_ids=[0] * block_size * 3)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(decoded_tokens=[0])
+    # _touch called from get_num_new_matched_tokens (2 groups) and
+    # _get_reqs_to_store (2 groups) → 4 touch calls total.
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 4
+    assert len(touch_calls[0].args[0]) == 3
+    assert len(touch_calls[1].args[0]) == 3
+    assert len(touch_calls[2].args[0]) == 3
+    assert len(touch_calls[3].args[0]) == 3
+
+    # store 3 more block
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[0] * (block_size * 3 + 2),
+        expected_stored=(0, 1, 2, 3, 4, 5),
+    )
+
+    # touch called from _get_reqs_to_store * 3 blocks, once for each group
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 6
+
+    runner.run(decoded_tokens=[EOS_TOKEN_ID])
+
+    runner.scheduler.reset_prefix_cache()
+
+    # full 3 blocks hit [0, 1, 2]
+    runner.new_request(token_ids=[0] * (block_size * 3 + 1))
+    runner.manager.lookup.return_value = True
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        # Group 0 (full attn): prefix lookup hits 3 → loads blocks 0,1,2
+        # Group 1 (sliding window, window=2): only the last 2 blocks
+        #   are within the window → loads blocks 1,2
+        expected_loaded=((0, 0), (0, 1), (0, 2), (1, 1), (1, 2)),
+    )
+
+    # one touch in get_num_new_matched_tokens x 2 groups
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 2
+    # full attention group touched all 3 blocks
+    assert len(touch_calls[0].args[0]) == 3
+    # sliding window group touched just the last 2 blocks
+    assert len(touch_calls[1].args[0]) == 2
+
+    # 3 blocks are hit on GPU [0, 1, 2]
+    # 1 block loaded [3,]
+    runner.new_request(token_ids=[0] * (block_size * 4 + 1))
+    runner.manager.lookup.return_value = True
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        # Group 0 (full attn): prefix lookup hits 3 → loads blocks 0,1,2
+        # Group 1 (sliding window, window=2): only the last 2 blocks
+        #   are within the window → loads blocks 1,2
+        expected_loaded=((0, 3), (1, 3)),
+    )
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_two_groups_different_block_sizes(request_runner, async_scheduling: bool):
+    hash_block_size = 4
+    num_gpu_blocks = 100
+
+    # Group 0: block_size=12 (offloaded_block_size=12)
+    # Group 1: block_size=16 (offloaded_block_size=16)
+    kv_cache_groups = [
+        KVCacheGroupSpec(
+            ["layer0"],
+            FullAttentionSpec(
+                block_size=hash_block_size * 3,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+            ),
+        ),
+        KVCacheGroupSpec(
+            ["layer1"],
+            FullAttentionSpec(
+                block_size=hash_block_size * 4,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+            ),
+        ),
+    ]
+
+    runner = request_runner(
+        block_size=hash_block_size,
+        num_gpu_blocks=num_gpu_blocks,
+        async_scheduling=async_scheduling,
+        kv_cache_groups=kv_cache_groups,
+    )
+
+    # Verify group configs
+    kv_group_configs = runner.connector_scheduler.config.kv_group_configs
+    assert len(kv_group_configs) == 2
+    assert kv_group_configs[0].gpu_block_size == 12
+    assert kv_group_configs[0].offloaded_block_size == 12
+    assert kv_group_configs[1].gpu_block_size == 16
+    assert kv_group_configs[1].offloaded_block_size == 16
+
+    # Prompt: 25 tokens, unaligned to both block sizes.
+    # Group 0 blocks: [0, 1], ending_token_offset = 24
+    # Group 1 blocks: [0,], ending_token_offset = 16
+    runner.new_request(token_ids=[0] * 25)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(decoded_tokens=[0])
+    # _touch called from get_num_new_matched_tokens (2 groups) and
+    # _get_reqs_to_store (2 groups) → 4 touch calls total.
+    # Group 0 has 2 offload keys, group 1 has 1.
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 4
+    assert len(touch_calls[0].args[0]) == 2
+    assert len(touch_calls[1].args[0]) == 1
+    assert len(touch_calls[2].args[0]) == 2
+    assert len(touch_calls[3].args[0]) == 1
+
+    # Get to 31 tokens
+    # No further blocks offloaded
+    runner.run(decoded_tokens=[0] * 6, expected_stored=((0, 0), (0, 1), (1, 0)))
+
+    # Get to 32 tokens
+    # Group 0 blocks: [0, 1], ending_token_offset = 24
+    # Group 1 blocks: [0, 1], ending_token_offset = 32
+    runner.run(decoded_tokens=[0])
+    # _get_reqs_to_store touch: only group 1 has a new block to store
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 2
+    assert len(touch_calls[0].args[0]) == 2
+    assert len(touch_calls[1].args[0]) == 2
+
+    # Get to 35 tokens
+    # No further blocks offloaded
+    runner.run(decoded_tokens=[0] * 3, expected_stored=((1, 1),))
+
+    # Get to 36 tokens
+    # Group 0 blocks: [0, 1, 2], ending_token_offset = 36
+    # Group 1 blocks: [0, 1], ending_token_offset = 32
+    runner.run(decoded_tokens=[0])
+    # _get_reqs_to_store touch: only group 0 has a new block to store
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 2
+    assert len(touch_calls[0].args[0]) == 3
+    assert len(touch_calls[1].args[0]) == 2
+
+    # Get to 47 tokens
+    # No further blocks offloaded
+    runner.run(decoded_tokens=[0] * 11, expected_stored=((0, 2),))
+
+    # Get to 48 tokens
+    # Group 0 blocks: [0, 1, 2, 3], ending_token_offset = 4
+    # Group 1 blocks: [0, 1, 2], ending_token_offset = 48
+    runner.run(decoded_tokens=[0])
+    # _get_reqs_to_store touch: both groups have a new block, each with 1 key
+    touch_calls = runner.manager.touch.call_args_list
+    assert len(touch_calls) == 2
+    assert len(touch_calls[0].args[0]) == 4
+    assert len(touch_calls[1].args[0]) == 3
+
+    runner.run(decoded_tokens=[0], expected_stored=((0, 3), (1, 2)))
+
+    # Get to 96 tokens
+    runner.run(
+        decoded_tokens=[0] * 47 + [EOS_TOKEN_ID],
+        expected_stored=((0, 4), (0, 5), (0, 6), (0, 7), (1, 3), (1, 4), (1, 5)),
+    )
+
+    runner.scheduler.reset_prefix_cache()
+
+    # Request with 48 matching tokens
+    # will match 48 tokens (4 block) from the first group
+    # 48 tokens (3 block) from the second group
+    # Total 48 tokens can be loaded
+    runner.new_request(token_ids=[0] * 48)
+    runner.manager.lookup.return_value = True
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
+    )
+    runner.run(
+        decoded_tokens=[0],
+        expected_loaded=((0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2)),
+    )
+    runner.run(decoded_tokens=[EOS_TOKEN_ID])
+
+    # Request with 48+37 matching tokens
+    # 48 tokens will be hit on GPU
+    # extra 32 tokens will be loaded
+    # extra tokens [0, 36] (blocks [4, 5, 6]) from the first group
+    # extra tokens [0, 32] (block [3, 4]) from the second group
+    runner.new_request(token_ids=[0] * (48 + 37))
+    runner.manager.lookup.return_value = True
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
+    )
+    runner.run(
+        decoded_tokens=[0],
+        expected_loaded=((0, 4), (0, 5), (0, 6), (1, 3), (1, 4)),
+    )
+    runner.run(decoded_tokens=[EOS_TOKEN_ID])
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for _maximal_prefix_lookup / _sliding_window_lookup
+# ---------------------------------------------------------------------------
+
+
+def _make_scheduler_with_lookup(
+    lookup_results: dict[int, bool | None],
+) -> OffloadingConnectorScheduler:
+    """Create an OffloadingConnectorScheduler with a mocked manager.lookup."""
+    manager = MagicMock(spec=OffloadingManager)
+    manager.lookup.side_effect = lambda key, req_context: lookup_results.get(
+        int(get_offload_block_hash(key).decode()), False
+    )
+
+    scheduler = object.__new__(OffloadingConnectorScheduler)
+    scheduler.manager = manager
+    return scheduler
+
+
+_EMPTY_REQ_CTX = ReqContext(req_id="")
+
+
+class TestMaximalPrefixLookup:
+    def test_all_hit(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: True})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2]), _EMPTY_REQ_CTX) == 2
+
+    def test_all_miss(self):
+        sched = _make_scheduler_with_lookup({})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2]), _EMPTY_REQ_CTX) == 0
+
+    def test_partial_prefix(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: True})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2, 3]), _EMPTY_REQ_CTX) == 2
+
+    def test_miss_then_hit(self):
+        sched = _make_scheduler_with_lookup({2: True})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2]), _EMPTY_REQ_CTX) == 0
+
+    def test_single_hit(self):
+        sched = _make_scheduler_with_lookup({1: True})
+        assert sched._maximal_prefix_lookup(to_keys([1]), _EMPTY_REQ_CTX) == 1
+
+    def test_empty(self):
+        sched = _make_scheduler_with_lookup({})
+        assert sched._maximal_prefix_lookup([], _EMPTY_REQ_CTX) == 0
+
+    def test_none_defers(self):
+        sched = _make_scheduler_with_lookup({1: None, 2: True})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2]), _EMPTY_REQ_CTX) is None
+
+    def test_none_after_hit_defers(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: None})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2]), _EMPTY_REQ_CTX) is None
+
+    def test_none_stops_at_miss(self):
+        """None is treated as hit for iteration, but miss stops the scan."""
+        sched = _make_scheduler_with_lookup({1: None, 2: False, 3: True})
+        assert sched._maximal_prefix_lookup(to_keys([1, 2, 3]), _EMPTY_REQ_CTX) is None
+        # lookup should have been called for blocks 1 and 2 (stops at miss)
+        assert sched.manager.lookup.call_count == 2
+
+
+class TestSlidingWindowLookup:
+    def test_all_hit_exact_window(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: True})
+        assert sched._sliding_window_lookup(to_keys([1, 2]), 2, _EMPTY_REQ_CTX) == 2
+
+    def test_all_miss(self):
+        sched = _make_scheduler_with_lookup({})
+        assert sched._sliding_window_lookup(to_keys([1, 2, 3]), 1, _EMPTY_REQ_CTX) == 0
+
+    def test_window_at_end(self):
+        sched = _make_scheduler_with_lookup({2: True, 3: True})
+        assert sched._sliding_window_lookup(to_keys([1, 2, 3]), 2, _EMPTY_REQ_CTX) == 3
+
+    def test_window_in_middle(self):
+        sched = _make_scheduler_with_lookup({2: True, 3: True})
+        assert (
+            sched._sliding_window_lookup(to_keys([1, 2, 3, 4]), 2, _EMPTY_REQ_CTX) == 3
+        )
+
+    def test_no_full_window_falls_back_to_prefix(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: True})
+        assert sched._sliding_window_lookup(to_keys([1, 2, 3]), 3, _EMPTY_REQ_CTX) == 2
+
+    def test_single_block_window(self):
+        sched = _make_scheduler_with_lookup({2: True, 3: True})
+        assert sched._sliding_window_lookup(to_keys([1, 2, 3]), 1, _EMPTY_REQ_CTX) == 3
+
+    def test_gap_resets_consecutive(self):
+        sched = _make_scheduler_with_lookup({2: True, 3: True, 4: True})
+        # [1, 2, 3, 0, 4] — gap at 0 resets, window of 2 found at [2,3]
+        assert (
+            sched._sliding_window_lookup(to_keys([1, 2, 3, 0, 4]), 2, _EMPTY_REQ_CTX)
+            == 3
+        )
+
+    def test_window_prefers_rightmost(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: True, 4: True, 5: True})
+        # two valid windows: [1,2] at positions 0-1 and [4,5] at positions 3-4
+        # scans right-to-left, finds [4,5] first
+        assert (
+            sched._sliding_window_lookup(to_keys([1, 2, 3, 4, 5]), 2, _EMPTY_REQ_CTX)
+            == 5
+        )
+
+    def test_prefix_fallback_with_gap(self):
+        sched = _make_scheduler_with_lookup({2: True, 3: True, 4: True, 5: True})
+        # window of 4 not found contiguously (gap at 1)
+        assert (
+            sched._sliding_window_lookup(to_keys([2, 1, 3, 4, 5]), 4, _EMPTY_REQ_CTX)
+            == 1
+        )
+
+    def test_empty(self):
+        sched = _make_scheduler_with_lookup({})
+        assert sched._sliding_window_lookup([], 1, _EMPTY_REQ_CTX) == 0
+
+    def test_none_defers(self):
+        sched = _make_scheduler_with_lookup({1: True, 2: None})
+        assert sched._sliding_window_lookup(to_keys([1, 2]), 2, _EMPTY_REQ_CTX) is None
+
+    def test_none_with_full_window_still_defers(self):
+        """Even if a real window is found after a None, result is deferred."""
+        # Scan right-to-left: 4(True), 3(None) resets, 2(True), 1(True) = window
+        # but block 3 was None so defer_lookup is set
+        sched = _make_scheduler_with_lookup({1: True, 2: True, 3: None, 4: True})
+        assert (
+            sched._sliding_window_lookup(to_keys([1, 2, 3, 4]), 2, _EMPTY_REQ_CTX)
+            is None
+        )
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_do_remote_decode_stores_all_blocks(request_runner, async_scheduling: bool):
+    """With do_remote_decode=True, after loading prefix blocks from CPU,
+    all blocks must be re-stored — not just the newly computed ones.
+
+    This supports P/D disaggregation where the prefill instance offloads the
+    complete KV cache so a remote decode node can consume it."""
+    gpu_block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = gpu_block_size * block_size_factor
+    num_gpu_blocks = 100
+
+    runner = request_runner(
+        block_size_factor=block_size_factor,
+        block_size=gpu_block_size,
+        num_gpu_blocks=num_gpu_blocks,
+        async_scheduling=async_scheduling,
+    )
+
+    # Store 1 offloaded block (3 GPU blocks) via a normal request.
+    runner.new_request(token_ids=[0] * offloaded_block_size)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        expected_stored=(0, 1, 2),
+        expected_flushed=(0, 1, 2) if not async_scheduling else (),
+    )
+
+    # Reset GPU prefix cache so the next request must load from CPU.
+    runner.scheduler.reset_prefix_cache()
+
+    # New request with do_remote_decode=True and 2 offloaded blocks.
+    # The first offloaded block matches what we stored in CPU.
+    runner.new_request(
+        token_ids=[0] * offloaded_block_size * 2,
+        kv_transfer_params={"do_remote_decode": True},
+    )
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+
+    # Load the first offloaded block from CPU.
+    runner.run(decoded_tokens=[0], expected_loaded=(0, 1, 2))
+
+    # Store must include ALL 6 GPU blocks (both the loaded prefix and
+    # the newly computed block), not just the 3 new ones.
+    runner.run(decoded_tokens=[EOS_TOKEN_ID], expected_stored=(0, 1, 2, 3, 4, 5))
+
+    # All stores completed before request_finished -> fence index empty.
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+
+# ---------------------------------------------------------------------------
+# Tests for the per-job-store-completion design and fence invariants.
+# ---------------------------------------------------------------------------
+
+
+def test_loads_do_not_populate_fence_index(request_runner):
+    """Loads don't populate _block_id_to_pending_jobs (protected by
+    delay_free_blocks while in flight)."""
+    runner = request_runner(
+        block_size_factor=3,
+        block_size=4,
+        num_gpu_blocks=100,
+        async_scheduling=False,
+    )
+    runner.new_request(token_ids=[0] * 12)
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.run(decoded_tokens=[], complete_transfers=False)
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+
+def test_fence_at_update_state_after_alloc(request_runner):
+    """A load reusing a finished request's pending-store block triggers
+    a flush via update_state_after_alloc's fence.
+
+    num_gpu_blocks=2 forces the BlockPool to give req2 the same block
+    req1 just freed.
+    """
+    runner = request_runner(
+        block_size_factor=1,
+        block_size=4,
+        num_gpu_blocks=2,
+        async_scheduling=False,
+    )
+
+    runner.new_request(token_ids=[0] * 4)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        complete_transfers=False,
+        expected_stored=(0,),
+        expected_flushed=(0,),
+    )
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+    runner.scheduler.reset_prefix_cache()
+    runner.new_request(token_ids=[0] * 4)
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
+    )
+    runner.run(
+        decoded_tokens=[],
+        complete_transfers=False,
+    )
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+
+def test_fence_at_build_store_jobs(request_runner):
+    """A new prefill (no load -> update_state_after_alloc returns early)
+    reusing a finished request's pending-store block is flushed by
+    _build_store_jobs's fence."""
+    runner = request_runner(
+        block_size_factor=1,
+        block_size=4,
+        num_gpu_blocks=2,
+        async_scheduling=False,
+    )
+
+    runner.new_request(token_ids=[0] * 4)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        complete_transfers=False,
+        expected_stored=(0,),
+        expected_flushed=(0,),
+    )
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+    runner.scheduler.reset_prefix_cache()
+    runner.new_request(token_ids=[1] * 4)
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 0
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+    )
+    assert runner.connector_scheduler._block_id_to_pending_jobs == {}
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_complete_store_called_per_job(request_runner, async_scheduling: bool):
+    """complete_store fires per-job, not deferred to request finish.
+    Each call carries only that store's keys."""
+    gpu_block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = gpu_block_size * block_size_factor
+    runner = request_runner(
+        block_size_factor=block_size_factor,
+        block_size=gpu_block_size,
+        num_gpu_blocks=100,
+        async_scheduling=async_scheduling,
+    )
+    runner.new_request(token_ids=[0] * offloaded_block_size)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+
+    # First store: fires when block 0 is fully populated.
+    runner.run(decoded_tokens=[0, 0], expected_stored=(0, 1, 2))
+    assert runner.manager.complete_store.call_count == 1
+    first_call_keys = set(runner.manager.complete_store.call_args.args[0])
+    assert len(first_call_keys) == 1
+    runner.manager.complete_store.reset_mock()
+
+    # Second store: fires when block 1 is fully populated, with different keys.
+    runner.run(
+        decoded_tokens=[0] * (offloaded_block_size + 1),
+        expected_stored=(3, 4, 5),
+    )
+    assert runner.manager.complete_store.call_count == 1
+    second_call_keys = set(runner.manager.complete_store.call_args.args[0])
+    assert first_call_keys != second_call_keys
+    runner.manager.complete_store.reset_mock()
+
+    # Finish: no store pending -> no further call.
+    runner.run(decoded_tokens=[EOS_TOKEN_ID])
+    assert runner.manager.complete_store.call_count == 0
+
+
+def test_flush_all_jobs_when_no_requests_remain(request_runner):
+    """When all tracked requests are finished, build_connector_meta flushes
+    all pending jobs since there will be no future step to complete them."""
+    block_size = 4
+    block_size_factor = 1
+    offloaded_block_size = block_size * block_size_factor
+
+    runner = request_runner(
+        block_size=block_size,
+        num_gpu_blocks=100,
+        async_scheduling=False,
+        block_size_factor=block_size_factor,
+    )
+
+    runner.new_request(token_ids=[0] * offloaded_block_size)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        complete_transfers=False,
+        expected_stored=(0,),
+        expected_flushed=(0,),
+    )
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_reset_cache(request_runner, async_scheduling: bool):
+    """reset_cache flushes in-flight loads, calls manager.reset_cache(), resets
+    next_stored_block_idx for active requests and clears job tracking."""
+    block_size = 4
+    block_size_factor = 3
+    offloaded_block_size = block_size * block_size_factor
+    num_gpu_blocks = 100
+
+    runner = request_runner(
+        block_size=block_size,
+        num_gpu_blocks=num_gpu_blocks,
+        async_scheduling=async_scheduling,
+        block_size_factor=block_size_factor,
+    )
+
+    # Store 1 offloaded block (3 GPU blocks) to CPU.
+    runner.new_request(token_ids=[0] * offloaded_block_size)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        expected_stored=(0, 1, 2),
+        expected_flushed=(0, 1, 2) if not async_scheduling else (),
+    )
+
+    # Reset GPU prefix cache then start a request that loads from CPU.
+    # Leave the load in-flight so that reset_cache must flush it.
+    runner.scheduler.reset_prefix_cache()
+    runner.new_request(token_ids=[0] * offloaded_block_size)
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 1
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output([])
+    )
+    runner.run(decoded_tokens=[], complete_transfers=False)
+
+    # Capture in-flight load job IDs before reset.
+    load_job_ids = {
+        jid
+        for jid, status in runner.connector_scheduler._jobs.items()
+        if not status.is_store
+    }
+    assert load_job_ids, "expected in-flight load jobs before reset"
+
+    # Record job counter to verify the reset counter is set correctly.
+    job_counter_before_reset = runner.connector_scheduler._job_counter
+
+    # After update_state_after_alloc, next_stored_block_idx is advanced to
+    # skip the loaded prefix; reset_cache must bring it back to 0.
+    for req_status in runner.connector_scheduler._req_status.values():
+        for group_state in req_status.group_states:
+            assert group_state.next_stored_block_idx > 0
+
+    # Reset the cache
+    runner.connector_scheduler.reset_cache()
+
+    # manager.reset_cache() must be called exactly once.
+    runner.manager.reset_cache.assert_called_once()
+
+    # In-flight load jobs must be queued for flushing to prevent CUDA stream
+    # races between old loads and new post-reset stores.
+    assert load_job_ids <= runner.connector_scheduler._current_batch_jobs_to_flush
+
+    # All internal job tracking must be cleared.
+    assert not runner.connector_scheduler._jobs
+    assert not runner.connector_scheduler._block_id_to_pending_jobs
+    if runner.connector_scheduler._blocks_being_loaded is not None:
+        assert not runner.connector_scheduler._blocks_being_loaded
+
+    # Job reset counter must equal the job counter so that completions for
+    # pre-reset jobs arriving from workers are silently discarded.
+    assert runner.connector_scheduler._stale_job_threshold == job_counter_before_reset
+
+    # next_stored_block_idx must be reset to 0 for every active request so
+    # that post-reset stores restart from block 0.
+    for req_status in runner.connector_scheduler._req_status.values():
+        for group_state in req_status.group_states:
+            assert group_state.next_stored_block_idx == 0
+
+
+@pytest.mark.parametrize("async_scheduling", [True, False])
+def test_swa_alignment_skip(request_runner, async_scheduling: bool):
+    """SWA blocks unreachable by the load path are skipped during store.
+
+    Simulates a DeepSeek V4-like hybrid architecture where SWA groups have
+    much smaller block sizes than the full-attention (MLA) group, causing
+    most SWA blocks to be unreachable by the alignment-based load path.
+
+    Setup:
+      - Group 0: full attention (MLA-like), block_size=16
+      - Group 1: SWA, block_size=4, sliding_window=8
+
+    alignment_block_count = 16 / 4 = 4 SWA blocks per alignment segment.
+    sliding_window_size_in_blocks = ceil(8 / 4) = 2.
+    Within each segment of 4 SWA blocks, only the trailing 2 are stored.
+
+    With 32 tokens (2 full-attn blocks, 8 SWA blocks):
+      - Group 0 stores: blocks 0, 1  (all full-attn blocks)
+      - Group 1 stores: blocks 2, 3, 6, 7  (skip 0,1,4,5)
+
+    For real DeepSeek V4 (100K tokens), this reduces SWA stores by ~78%.
+    """
+    full_attn_block_size = 16
+    swa_block_size = 4
+    sliding_window = 8
+    num_gpu_blocks = 200
+
+    kv_cache_groups = [
+        KVCacheGroupSpec(
+            ["layer0"],
+            FullAttentionSpec(
+                block_size=full_attn_block_size,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+            ),
+        ),
+        KVCacheGroupSpec(
+            ["layer1"],
+            SlidingWindowSpec(
+                block_size=swa_block_size,
+                num_kv_heads=1,
+                head_size=1,
+                dtype=torch.float32,
+                sliding_window=sliding_window,
+            ),
+        ),
+    ]
+
+    runner = request_runner(
+        block_size=swa_block_size,
+        num_gpu_blocks=num_gpu_blocks,
+        async_scheduling=async_scheduling,
+        kv_cache_groups=kv_cache_groups,
+    )
+
+    # Verify config: alignment_block_count computed correctly
+    kv_group_configs = runner.connector_scheduler.config.kv_group_configs
+    assert len(kv_group_configs) == 2
+    # Group 0: full attention -> no alignment skip
+    assert kv_group_configs[0].alignment_block_count is None
+    assert kv_group_configs[0].sliding_window_size_in_blocks is None
+    assert kv_group_configs[0].offloaded_block_size == full_attn_block_size
+    # Group 1: SWA -> alignment_block_count = 16/4 = 4, tail = 2
+    assert kv_group_configs[1].alignment_block_count == 4
+    assert kv_group_configs[1].sliding_window_size_in_blocks == 2
+    assert kv_group_configs[1].offloaded_block_size == swa_block_size
+
+    # Send 32 tokens = 2 full-attn blocks (block_size=16) = 8 SWA blocks
+    # (block_size=4). Decode 1 token to kick off processing (stores are
+    # deferred to next step).
+    num_tokens = 32
+    runner.new_request(token_ids=[0] * num_tokens)
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(decoded_tokens=[0])
+
+    # Decode 1 more token to complete the deferred stores from above.
+    runner.manager.prepare_store.side_effect = lambda keys, req_context: (
+        generate_store_output(keys)
+    )
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        # Group 0 (full attn, block_size=16): 2 offloaded blocks
+        #   -> GPU blocks (0, 0) and (0, 1)
+        # Group 1 (SWA, block_size=4): 8 offloaded blocks, skip first 2
+        #   per segment of 4:
+        #   Segment 0 (blocks 0-3): skip 0,1 -> store (1, 2), (1, 3)
+        #   Segment 1 (blocks 4-7): skip 4,5 -> store (1, 6), (1, 7)
+        expected_stored=(
+            (0, 0),
+            (0, 1),
+            (1, 2),
+            (1, 3),
+            (1, 6),
+            (1, 7),
+        ),
+    )
+
+    # Verify that loads still work correctly for the stored SWA blocks.
+    runner.scheduler.reset_prefix_cache()
+    runner.new_request(token_ids=[0] * num_tokens + [1])
+    runner.manager.lookup.return_value = True
+    runner.connector_scheduler._maximal_prefix_lookup = lambda key, req_context: 2
+    runner.run(
+        decoded_tokens=[EOS_TOKEN_ID],
+        # Group 0: full prefix lookup hits 2 offloaded blocks
+        #   -> loads GPU blocks (0, 0), (0, 1)
+        # Group 1: sliding window lookup finds trailing 2 from last segment
+        #   (blocks 6, 7 which were stored)
+        #   -> loads GPU blocks (1, 6), (1, 7)
+        expected_loaded=(
+            (0, 0),
+            (0, 1),
+            (1, 6),
+            (1, 7),
+        ),
+    )
diff --git a/tests/v1/kv_connector/unit/offloading_connector/test_worker.py b/tests/v1/kv_connector/unit/offloading_connector/test_worker.py
index 3cfb32b3068f..b2fb0846d912 100644
--- a/tests/v1/kv_connector/unit/offloading_connector/test_worker.py
+++ b/tests/v1/kv_connector/unit/offloading_connector/test_worker.py
@@ -20,7 +20,7 @@
     MLAAttentionSpec,
     UniformTypeKVCacheSpecs,
 )
-from vllm.v1.kv_offload.spec import (
+from vllm.v1.kv_offload.base import (
     CanonicalKVCacheRef,
     CanonicalKVCaches,
     OffloadingSpec,
diff --git a/tests/v1/kv_connector/unit/offloading_connector/test_worker_metadata.py b/tests/v1/kv_connector/unit/offloading_connector/test_worker_metadata.py
new file mode 100644
index 000000000000..ab9d676cb4ae
--- /dev/null
+++ b/tests/v1/kv_connector/unit/offloading_connector/test_worker_metadata.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.distributed.kv_transfer.kv_connector.v1.offloading.common import (
+    OffloadingWorkerMetadata,
+)
+
+pytestmark = pytest.mark.cpu_test
+
+
+def test_aggregate_sums_counts():
+    meta1 = OffloadingWorkerMetadata(completed_jobs={42: 1, 7: 1})
+    meta2 = OffloadingWorkerMetadata(completed_jobs={42: 1, 7: 1})
+    result = meta1.aggregate(meta2)
+    assert result.completed_jobs == {42: 2, 7: 2}
+
+
+def test_aggregate_disjoint_jobs():
+    meta1 = OffloadingWorkerMetadata(completed_jobs={42: 1, 7: 1})
+    meta2 = OffloadingWorkerMetadata(completed_jobs={43: 1, 8: 1})
+    result = meta1.aggregate(meta2)
+    assert result.completed_jobs == {42: 1, 7: 1, 43: 1, 8: 1}
+
+
+def test_aggregate_multiple_workers():
+    meta1 = OffloadingWorkerMetadata(completed_jobs={42: 1, 43: 1, 7: 1})
+    meta2 = OffloadingWorkerMetadata(completed_jobs={42: 1, 7: 1, 8: 1})
+    meta3 = OffloadingWorkerMetadata(completed_jobs={42: 1, 43: 1, 8: 1})
+    result = meta1.aggregate(meta2).aggregate(meta3)
+    assert result.completed_jobs == {42: 3, 43: 2, 7: 2, 8: 2}
diff --git a/tests/v1/kv_connector/unit/offloading_connector/utils.py b/tests/v1/kv_connector/unit/offloading_connector/utils.py
index e051f239773c..bac8dbdf9464 100644
--- a/tests/v1/kv_connector/unit/offloading_connector/utils.py
+++ b/tests/v1/kv_connector/unit/offloading_connector/utils.py
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import copy
 from collections.abc import Iterable, Iterator
 from dataclasses import dataclass
 from typing import Any
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 import torch
@@ -19,6 +18,7 @@
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.common import (
     OffloadingConnectorMetadata,
+    OffloadingWorkerMetadata,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector import (
     OffloadingConnector,
@@ -27,7 +27,6 @@
 from vllm.utils.hashing import sha256
 from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
 from vllm.v1.core.kv_cache_utils import (
-    BlockHash,
     get_request_block_hasher,
     init_none_hash,
 )
@@ -38,33 +37,42 @@
     KVCacheConfig,
     KVCacheGroupSpec,
 )
-from vllm.v1.kv_offload.abstract import (
+from vllm.v1.kv_offload.base import (
+    GPULoadStoreSpec,
     LoadStoreSpec,
     OffloadingManager,
+    OffloadingSpec,
+    OffloadKey,
     PrepareStoreOutput,
+    make_offload_key,
 )
-from vllm.v1.kv_offload.mediums import GPULoadStoreSpec
-from vllm.v1.kv_offload.spec import OffloadingSpec
 from vllm.v1.kv_offload.worker.worker import (
     OffloadingHandler,
     TransferResult,
     TransferSpec,
 )
-from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import Request
 from vllm.v1.structured_output import StructuredOutputManager
 
 
+def to_key(int_hash: int) -> OffloadKey:
+    return make_offload_key(str(int_hash).encode(), 0)
+
+
+def to_keys(int_hashes: list[int]) -> list[OffloadKey]:
+    return [to_key(i) for i in int_hashes]
+
+
 class MockLoadStoreSpec(LoadStoreSpec):
-    def __init__(self, block_hashes: Iterable[BlockHash]):
-        self.block_hashes: list[BlockHash] = list(block_hashes)
+    def __init__(self, offload_keys: Iterable[OffloadKey]):
+        self.offload_keys: list[OffloadKey] = list(offload_keys)
 
     @staticmethod
     def medium() -> str:
         return "Mock"
 
     def __repr__(self) -> str:
-        return repr(self.block_hashes)
+        return repr(self.offload_keys)
 
 
 class MockOffloadingHandler(OffloadingHandler):
@@ -110,9 +118,8 @@ def __init__(self, vllm_config: VllmConfig, kv_cache_config: KVCacheConfig):
 
         self.manager = MagicMock(spec=OffloadingManager)
         self.manager.lookup.return_value = 0
-        self.manager.prepare_load = lambda block_hashes: (
-            MockLoadStoreSpec(block_hashes)
-        )
+        self.manager.prepare_load = lambda keys, req_context: MockLoadStoreSpec(keys)
+        self.manager.lookup.return_value = False
         self.handler = MockOffloadingHandler()
 
     def get_manager(self) -> OffloadingManager:
@@ -143,46 +150,61 @@ def get_flushed_transfers(self):
         return specs
 
 
+@dataclass(frozen=True)
+class GPUBlock:
+    group_idx: int
+    request_block_offset: int
+
+
 @dataclass
 class TransferSummary:
-    gpu_block_indices: list[int]
+    gpu_blocks: list[GPUBlock]
     offload_addresses: list[Any]
 
 
 class RequestRunner:
     def __init__(
         self,
-        offloaded_block_size: int,
-        gpu_block_size: int,
+        block_size: int,
         num_gpu_blocks: int,
+        block_size_factor: int = 1,
         async_scheduling: bool = True,
+        kv_cache_groups: list[KVCacheGroupSpec] | None = None,
     ):
-        self.offloaded_block_size: int = offloaded_block_size
-        self.gpu_block_size: int = gpu_block_size
+        assert block_size_factor == 1 or kv_cache_groups is None, (
+            "block_size_factor > 1 requires all groups to have the same "
+            "block size, so kv_cache_groups must be None (use default group)"
+        )
+
+        self.block_size_factor: int = block_size_factor
+        self.block_size: int = block_size
         self.num_gpu_blocks: int = num_gpu_blocks
         self.async_scheduling: bool = async_scheduling
 
         self.req_id: int = -1
 
         vllm_config = create_vllm_config(
-            block_size=gpu_block_size, max_num_batched_tokens=1000
+            block_size=block_size,
+            max_num_batched_tokens=1000,
+            disable_hybrid_kv_cache_manager=False,
         )
         vllm_config.scheduler_config.async_scheduling = async_scheduling
+
+        extra_config: dict[str, Any] = {
+            "spec_name": "MockOffloadingSpec",
+            "spec_module_path": "tests.v1.kv_connector.unit.offloading_connector.utils",  # noqa: E501
+        }
+        if block_size_factor > 1:
+            extra_config["block_size"] = block_size * block_size_factor
+
         vllm_config.kv_transfer_config = KVTransferConfig(
             kv_connector="OffloadingConnector",
             kv_role="kv_both",
-            kv_connector_extra_config={
-                "spec_name": "MockOffloadingSpec",
-                "spec_module_path": "tests.v1.kv_connector.unit.offloading_connector.utils",  # noqa: E501
-                "block_size": offloaded_block_size,
-            },
+            kv_connector_extra_config=extra_config,
         )
 
-        block_size = vllm_config.cache_config.block_size
-        kv_cache_config = KVCacheConfig(
-            num_blocks=num_gpu_blocks,
-            kv_cache_tensors=[],
-            kv_cache_groups=[
+        if kv_cache_groups is None:
+            kv_cache_groups = [
                 KVCacheGroupSpec(
                     ["layer"],
                     FullAttentionSpec(
@@ -192,7 +214,12 @@ def __init__(
                         dtype=torch.float32,
                     ),
                 )
-            ],
+            ]
+
+        kv_cache_config = KVCacheConfig(
+            num_blocks=num_gpu_blocks,
+            kv_cache_tensors=[],
+            kv_cache_groups=kv_cache_groups,
         )
         vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks
         self.num_kv_groups = len(kv_cache_config.kv_cache_groups)
@@ -212,11 +239,38 @@ def __init__(
 
         # register worker kv_caches to enable OffloadingWorker creations
         # set_current_vllm_config is needed for get_kv_cache_layout() to work
-        with set_current_vllm_config(vllm_config):
-            self.worker_connector.register_cross_layers_kv_cache(
-                kv_cache=torch.empty(0),
-                attn_backend=FlashAttentionBackend,
-            )
+        # Mock get_layers_from_vllm_config so that mock layer names
+        # resolve to layers whose get_attn_backend() returns
+        # FlashAttentionBackend.
+        def _mock_get_layers(_vllm_config, _layer_type, layer_names):
+            mock_layer = MagicMock()
+            mock_layer.get_attn_backend.return_value = FlashAttentionBackend
+            return {name: mock_layer for name in layer_names}
+
+        kv_caches: dict[str, torch.Tensor] = {}
+        for group in kv_cache_groups:
+            spec = group.kv_cache_spec
+            for layer_name in group.layer_names:
+                # Shape follows FlashAttention layout:
+                # (2, num_blocks, block_size, num_kv_heads, head_size)
+                kv_caches[layer_name] = torch.empty(
+                    2,
+                    num_gpu_blocks,
+                    spec.block_size,
+                    spec.num_kv_heads,
+                    spec.head_size,
+                    dtype=spec.dtype,
+                )
+
+        with (
+            set_current_vllm_config(vllm_config),
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1"
+                ".offloading.worker.get_layers_from_vllm_config",
+                side_effect=_mock_get_layers,
+            ),
+        ):
+            self.worker_connector.register_kv_caches(kv_caches)
 
         # extract connector of scheduler
         scheduler_connector = self.scheduler.connector
@@ -225,14 +279,23 @@ def __init__(
         self.scheduler_connector: OffloadingConnector = scheduler_connector
 
         # extract mocked OffloadingManager of scheduler connector
-        connector_scheduler = scheduler_connector.connector_scheduler
-        assert connector_scheduler is not None
-        manager = connector_scheduler.manager
+        self.connector_scheduler = scheduler_connector.connector_scheduler
+        assert self.connector_scheduler is not None
+        manager = self.connector_scheduler.manager
         assert isinstance(manager, MagicMock)
         self.manager: MagicMock = manager
 
-        assert connector_scheduler.gpu_block_size == gpu_block_size
-        assert connector_scheduler.offloaded_block_size == offloaded_block_size
+        num_kv_groups = len(kv_cache_config.kv_cache_groups)
+        assert len(self.connector_scheduler.config.kv_group_configs) == num_kv_groups
+        for group_config, kv_cache_group in zip(
+            self.connector_scheduler.config.kv_group_configs,
+            kv_cache_config.kv_cache_groups,
+        ):
+            gpu_block_size = kv_cache_group.kv_cache_spec.block_size
+            assert group_config.gpu_block_size == gpu_block_size
+            assert (
+                group_config.offloaded_block_size == gpu_block_size * block_size_factor
+            )
 
         # extract OffloadingSpec of worker_connector
         connector_worker = self.worker_connector.connector_worker
@@ -241,18 +304,18 @@ def __init__(
         assert isinstance(offloading_spec, MockOffloadingSpec)
         self.offloading_spec: MockOffloadingSpec = offloading_spec
 
-        # mapping (offloading address) -> gpu_block_index
-        self.offloaded: dict[Any, int] = {}
+        # mapping (offloading address) -> GPUBlock
+        self.offloaded: dict[Any, GPUBlock] = {}
 
         self.completed_loads: list[TransferSummary] = []
         self.completed_stores: list[TransferSummary] = []
-        self.flushed_gpu_block_indexes: set[int] = set()
+        self.flushed_gpu_blocks: set[GPUBlock] = set()
 
-        # maps {block_id: block_offset}
-        self.gpu_block_index: dict[int, int] = {}
+        # block_id -> GPUBlock
+        self.gpu_blocks: dict[int, GPUBlock] = {}
 
         init_none_hash(sha256)
-        self._block_hasher = get_request_block_hasher(gpu_block_size, sha256)
+        self._block_hasher = get_request_block_hasher(block_size, sha256)
 
         self._dummy_ctx: ForwardContext = ForwardContext(
             no_compile_layers={},
@@ -260,7 +323,11 @@ def __init__(
             slot_mapping={},
         )
 
-    def new_request(self, token_ids: list[int]):
+    def new_request(
+        self,
+        token_ids: list[int],
+        kv_transfer_params: dict | None = None,
+    ):
         self.req_id += 1
 
         sampling_params = SamplingParams(max_tokens=1000)
@@ -273,20 +340,24 @@ def new_request(self, token_ids: list[int]):
             pooling_params=None,
             block_hasher=self._block_hasher,
         )
+        if kv_transfer_params is not None:
+            req.kv_transfer_params = kv_transfer_params
 
         self.scheduler.add_request(req)
 
     def _parse_transfers(self):
         for transfer_spec in self.offloading_spec.get_flushed_transfers():
             src_spec, dst_spec = transfer_spec
-            assert isinstance(src_spec, GPULoadStoreSpec)
-
-            for block_id in src_spec.block_ids:
-                self.flushed_gpu_block_indexes.add(
-                    self.gpu_block_index[block_id.item()]
-                )
+            if isinstance(src_spec, GPULoadStoreSpec):
+                # store flush
+                for block_id in src_spec.block_ids:
+                    self.flushed_gpu_blocks.add(self.gpu_blocks[block_id.item()])
+            else:
+                # load flush
+                for block_id in dst_spec.block_ids:
+                    self.flushed_gpu_blocks.add(self.gpu_blocks[block_id.item()])
 
-        block_size_factor = self.offloaded_block_size // self.gpu_block_size
+        block_size_factor = self.block_size_factor
 
         for transfer_spec in self.offloading_spec.get_completed_transfers():
             src_spec, dst_spec = transfer_spec
@@ -302,41 +373,61 @@ def _parse_transfers(self):
 
             assert isinstance(offload_spec, MockLoadStoreSpec)
             assert isinstance(gpu_spec, GPULoadStoreSpec)
+            assert len(gpu_spec.group_sizes) == self.num_kv_groups
 
-            gpu_block_indices: list[int] = []
+            gpu_blocks: list[GPUBlock] = []
             for block_id in gpu_spec.block_ids:
-                gpu_block_indices.append(self.gpu_block_index[block_id.item()])
+                gpu_blocks.append(self.gpu_blocks[block_id.item()])
 
-            # list of (block_hash, sub_block_offset)
+            # list of (offload_key, sub_block_offset)
             offload_addresses: list[Any] = []
-            for block_hash in offload_spec.block_hashes:
+            for offload_key in offload_spec.offload_keys:
                 for sub_block_idx in range(block_size_factor):
-                    offload_addresses.append((block_hash, sub_block_idx))
+                    offload_addresses.append((offload_key, sub_block_idx))
 
-            if store:
-                assert len(gpu_block_indices) == len(offload_addresses)
+            assert gpu_spec.block_indices is not None
+            assert len(gpu_spec.block_indices) == self.num_kv_groups
 
-                self.completed_stores.append(
-                    TransferSummary(gpu_block_indices, offload_addresses)
-                )
-            else:
-                remainder_sub_block_count = len(offload_addresses) - len(
-                    gpu_block_indices
+            gpu_block_offset = 0
+            offload_address_offset = 0
+            for group_size, logical_offset in zip(
+                gpu_spec.group_sizes, gpu_spec.block_indices
+            ):
+                gpu_block_end_offset = gpu_block_offset + group_size
+                assert gpu_block_end_offset <= len(gpu_blocks)
+
+                offload_addresses_to_skip = logical_offset % block_size_factor
+                offload_addresses_end_offset = (
+                    offload_address_offset + offload_addresses_to_skip + group_size
                 )
-                assert remainder_sub_block_count >= 0
-                assert remainder_sub_block_count < block_size_factor
-                offload_addresses = offload_addresses[remainder_sub_block_count:]
+                assert offload_addresses_end_offset <= len(offload_addresses)
 
-                self.completed_loads.append(
-                    TransferSummary(gpu_block_indices, offload_addresses)
+                offload_addresses = (
+                    offload_addresses[:offload_address_offset]
+                    + offload_addresses[
+                        offload_address_offset + offload_addresses_to_skip :
+                    ]
                 )
 
-    def _update_gpu_block_idx(self):
-        for blocks in self.scheduler.kv_cache_manager.coordinator.single_type_managers[
-            0
-        ].req_to_blocks.values():
-            for block_idx, block in enumerate(blocks):
-                self.gpu_block_index[block.block_id] = block_idx
+                gpu_block_offset += group_size
+                offload_address_offset += group_size
+
+            assert gpu_block_offset == len(gpu_blocks)
+            assert offload_address_offset == len(offload_addresses)
+
+            transfer_summary = TransferSummary(gpu_blocks, offload_addresses)
+            if store:
+                self.completed_stores.append(transfer_summary)
+            else:
+                self.completed_loads.append(transfer_summary)
+
+    def _update_gpu_blocks(self):
+        for group_idx, manager in enumerate(
+            self.scheduler.kv_cache_manager.coordinator.single_type_managers
+        ):
+            for blocks in manager.req_to_blocks.values():
+                for block_idx, block in enumerate(blocks):
+                    self.gpu_blocks[block.block_id] = GPUBlock(group_idx, block_idx)
 
     def _run(self, decoded_tokens: list[int], complete_transfers: bool):
         """
@@ -353,10 +444,15 @@ def _run(self, decoded_tokens: list[int], complete_transfers: bool):
         prev_scheduler_output = None
         prev_model_runner_output = None
         while True:
-            assert self.scheduler.requests
+            # Strict-always-False frees the request immediately on EOS, but
+            # the worker may still have a deferred store queued. In production
+            # the next request's step drains it; in single-request tests we
+            # must keep stepping until the scheduler sees no in-flight jobs.
+            if not self.scheduler.requests and not self.connector_scheduler._jobs:
+                break
 
             scheduler_output = self.scheduler.schedule()
-            self._update_gpu_block_idx()
+            self._update_gpu_blocks()
 
             kv_connector_metadata = scheduler_output.kv_connector_metadata
             assert kv_connector_metadata is not None
@@ -367,15 +463,16 @@ def _run(self, decoded_tokens: list[int], complete_transfers: bool):
             self.worker_connector.bind_connector_metadata(kv_connector_metadata)
             self.worker_connector.start_load_kv(self._dummy_ctx)
 
-            if scheduler_output.total_num_scheduled_tokens > 0:
-                self.worker_connector.wait_for_save()
-
             if complete_transfers:
                 self.offloading_spec.complete_transfers()
 
             finished_sending, finished_recving = self.worker_connector.get_finished(
                 scheduler_output.finished_req_ids
             )
+            worker_meta = (
+                self.worker_connector.build_connector_worker_meta()
+                or OffloadingWorkerMetadata()
+            )
 
             self.worker_connector.clear_connector_metadata()
 
@@ -384,6 +481,7 @@ def _run(self, decoded_tokens: list[int], complete_transfers: bool):
                 finished_sending=finished_sending,
                 finished_recving=finished_recving,
                 token_id=token_id or 0,
+                kv_connector_worker_meta=worker_meta,
             )
 
             prev_token_id = token_id
@@ -404,7 +502,7 @@ def _run(self, decoded_tokens: list[int], complete_transfers: bool):
             if (
                 prev_token_id == EOS_TOKEN_ID
                 and prev_token_id != token_id
-                and self.scheduler.requests
+                and (self.scheduler.requests or self.connector_scheduler._jobs)
             ):
                 # continue for one more step to allow offloading to kick off
                 continue
@@ -419,33 +517,33 @@ def _run(self, decoded_tokens: list[int], complete_transfers: bool):
 
         self._parse_transfers()
 
-        # run one more step to update finished stored
         if EOS_TOKEN_ID in decoded_tokens:
             assert not self.scheduler.running
 
-            while self.scheduler.requests:
-                scheduler_output = self.scheduler.schedule()
-
-                finished_sending, finished_recving = self.worker_connector.get_finished(
-                    scheduler_output.finished_req_ids
-                )
-
-                assert not finished_recving
-
-                model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
-                model_runner_output.kv_connector_output = KVConnectorOutput(
-                    finished_sending=finished_sending
+    def _to_gpu_blocks(
+        self, blocks: tuple[int | tuple[int, int], ...]
+    ) -> list[GPUBlock]:
+        gpu_blocks: list[GPUBlock] = []
+        for block in blocks:
+            if isinstance(block, int):
+                for group_idx in range(self.num_kv_groups):
+                    gpu_blocks.append(
+                        GPUBlock(group_idx=group_idx, request_block_offset=block)
+                    )
+            else:
+                group_idx, offset = block
+                gpu_blocks.append(
+                    GPUBlock(group_idx=group_idx, request_block_offset=offset)
                 )
-
-                self.scheduler.update_from_output(scheduler_output, model_runner_output)
+        return gpu_blocks
 
     def run(
         self,
         decoded_tokens: list[int],
         complete_transfers: bool = True,
-        expected_stored_gpu_block_indexes: tuple[int, ...] = (),
-        expected_loaded_gpu_block_indexes: tuple[int, ...] = (),
-        expected_flushed_gpu_block_indexes: tuple[int, ...] = (),
+        expected_stored: tuple[int | tuple[int, int], ...] = (),
+        expected_loaded: tuple[int | tuple[int, int], ...] = (),
+        expected_flushed: tuple[int | tuple[int, int], ...] = (),
     ):
         """
         Runs multiple engine (scheduler + worker) steps.
@@ -454,41 +552,49 @@ def run(
         Args:
             decoded_tokens: the tokens to yield at each step.
             complete_transfers: complete transfers immediately
-            expected_stored_gpu_block_indexes: GPU block indexes
+            expected_stored: GPU blocks
                 that are expected to be written during the run.
-            expected_loaded_gpu_block_indexes: GPU block indexes
+            expected_loaded: GPU blocks
                 that are expected to be loaded during the run.
-            expected_flushed_gpu_block_indexes: GPU block indexes
+            expected_flushed: GPU blocks
                 that are expected to be flushed during the run.
+
+            A GPU block is either a (group_idx: int, request_block_offset: int)
+            or just request_block_offset: int.
+            The latter case is a convenience for representing all groups.
         """
 
+        expected_stored_gpu_blocks = self._to_gpu_blocks(expected_stored)
+        expected_loaded_gpu_blocks = self._to_gpu_blocks(expected_loaded)
+        expected_flushed_gpu_blocks = self._to_gpu_blocks(expected_flushed)
+
         self.manager.reset_mock()
         self._run(decoded_tokens, complete_transfers)
 
-        loaded_gpu_block_indexes: set[int] = set()
+        loaded_gpu_blocks: set[GPUBlock] = set()
         for transfer in self.completed_loads:
-            for gpu_block_idx, offloaded_address in zip(
-                transfer.gpu_block_indices, transfer.offload_addresses
+            for gpu_block, offloaded_address in zip(
+                transfer.gpu_blocks, transfer.offload_addresses
             ):
-                loaded_gpu_block_indexes.add(gpu_block_idx)
-                assert gpu_block_idx == self.offloaded[offloaded_address]
+                loaded_gpu_blocks.add(gpu_block)
+                assert gpu_block == self.offloaded[offloaded_address]
 
-        assert set(expected_loaded_gpu_block_indexes) == loaded_gpu_block_indexes
+        assert set(expected_loaded_gpu_blocks) == loaded_gpu_blocks
         self.completed_loads.clear()
 
-        stored_gpu_block_indexes: set[int] = set()
+        stored_gpu_blocks: set[GPUBlock] = set()
         for transfer in self.completed_stores:
-            for gpu_block_idx, offloaded_address in zip(
-                transfer.gpu_block_indices, transfer.offload_addresses
+            for gpu_block, offloaded_address in zip(
+                transfer.gpu_blocks, transfer.offload_addresses
             ):
-                stored_gpu_block_indexes.add(gpu_block_idx)
-                self.offloaded[offloaded_address] = gpu_block_idx
+                stored_gpu_blocks.add(gpu_block)
+                self.offloaded[offloaded_address] = gpu_block
 
-        assert set(expected_stored_gpu_block_indexes) == stored_gpu_block_indexes
+        assert set(expected_stored_gpu_blocks) == stored_gpu_blocks
         self.completed_stores.clear()
 
-        assert set(expected_flushed_gpu_block_indexes) == self.flushed_gpu_block_indexes
-        self.flushed_gpu_block_indexes.clear()
+        assert set(expected_flushed_gpu_blocks) == self.flushed_gpu_blocks
+        self.flushed_gpu_blocks.clear()
 
 
 @pytest.fixture
@@ -496,13 +602,18 @@ def request_runner():
     runners = []
 
     def runner_factory(
-        offloaded_block_size, gpu_block_size, num_gpu_blocks, async_scheduling
+        block_size,
+        num_gpu_blocks,
+        async_scheduling,
+        block_size_factor=1,
+        kv_cache_groups=None,
     ):
         runner = RequestRunner(
-            offloaded_block_size=offloaded_block_size,
-            gpu_block_size=gpu_block_size,
+            block_size=block_size,
             num_gpu_blocks=num_gpu_blocks,
+            block_size_factor=block_size_factor,
             async_scheduling=async_scheduling,
+            kv_cache_groups=kv_cache_groups,
         )
         runners.append(runner)
         return runner
@@ -510,10 +621,10 @@ def runner_factory(
     yield runner_factory  # pass factory to the test
 
 
-def generate_store_output(block_hashes: Iterable[BlockHash]):
-    block_hashes = list(block_hashes)
+def generate_store_output(keys: Iterable[OffloadKey]):
+    keys = list(keys)
     return PrepareStoreOutput(
-        block_hashes_to_store=list(block_hashes),
-        store_spec=MockLoadStoreSpec(block_hashes),
-        block_hashes_evicted=[],
+        keys_to_store=list(keys),
+        store_spec=MockLoadStoreSpec(keys),
+        evicted_keys=[],
     )
diff --git a/tests/v1/kv_connector/unit/test_backwards_compatibility.py b/tests/v1/kv_connector/unit/test_backwards_compatibility.py
deleted file mode 100644
index da6a5aadbc6d..000000000000
--- a/tests/v1/kv_connector/unit/test_backwards_compatibility.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Unit tests for backwards compatibility with external KV connector implementations.
-
-This test ensures that external connectors (loaded via kv_connector_module_path)
-implemented with the old signature continue to work:
-- Old signature: __init__(self, vllm_config, role)
-- New signature: __init__(self, vllm_config, role, kv_cache_config)
-"""
-
-from typing import TYPE_CHECKING
-from unittest.mock import patch
-
-import pytest
-
-from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
-from vllm.distributed.kv_transfer.kv_connector.v1 import (
-    KVConnectorBase_V1,
-    KVConnectorRole,
-)
-from vllm.v1.attention.backend import AttentionMetadata
-from vllm.v1.core.sched.output import SchedulerOutput
-
-from .utils import create_scheduler, create_vllm_config
-
-if TYPE_CHECKING:
-    from vllm.config import VllmConfig
-    from vllm.forward_context import ForwardContext
-    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
-    from vllm.v1.kv_cache_interface import KVCacheConfig
-    from vllm.v1.request import Request
-
-
-class OldStyleTestConnector(KVConnectorBase_V1):
-    """
-    Test connector using the old signature with 2 required arguments.
-    This simulates external connectors that haven't been updated yet.
-    """
-
-    def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
-        # Old-style call to super().__init__ with only 2 arguments
-        super().__init__(vllm_config=vllm_config, role=role)
-
-    def get_num_new_matched_tokens(
-        self, request: "Request", num_computed_tokens: int
-    ) -> tuple[int | None, bool]:
-        return 0, False
-
-    def update_state_after_alloc(
-        self,
-        request: "Request",
-        blocks: "KVCacheBlocks",
-        num_external_tokens: int,
-    ):
-        pass
-
-    def build_connector_meta(self, scheduler_output: SchedulerOutput):
-        return None
-
-    def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
-        pass
-
-    def wait_for_layer_load(self, layer_name: str) -> None:
-        pass
-
-    def save_kv_layer(
-        self,
-        layer_name: str,
-        kv_layer,
-        attn_metadata: AttentionMetadata,
-        **kwargs,
-    ) -> None:
-        pass
-
-    def wait_for_save(self):
-        pass
-
-
-class NewStyleTestConnector(KVConnectorBase_V1):
-    """
-    Test connector using the new signature with 3 required arguments.
-    """
-
-    def __init__(
-        self,
-        vllm_config: "VllmConfig",
-        role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig",
-    ):
-        # New-style call to super().__init__ with all 3 arguments
-        super().__init__(
-            vllm_config=vllm_config, role=role, kv_cache_config=kv_cache_config
-        )
-
-    def get_num_new_matched_tokens(
-        self, request: "Request", num_computed_tokens: int
-    ) -> tuple[int | None, bool]:
-        return 0, False
-
-    def update_state_after_alloc(
-        self,
-        request: "Request",
-        blocks: "KVCacheBlocks",
-        num_external_tokens: int,
-    ):
-        pass
-
-    def build_connector_meta(self, scheduler_output: SchedulerOutput):
-        return None
-
-    def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
-        pass
-
-    def wait_for_layer_load(self, layer_name: str) -> None:
-        pass
-
-    def save_kv_layer(
-        self,
-        layer_name: str,
-        kv_layer,
-        attn_metadata: AttentionMetadata,
-        **kwargs,
-    ) -> None:
-        pass
-
-    def wait_for_save(self):
-        pass
-
-
-@pytest.mark.parametrize("role", [KVConnectorRole.SCHEDULER, KVConnectorRole.WORKER])
-def test_external_old_signature_factory_instantiation(role):
-    """
-    Test that external connectors with old signature (2 required args) loaded
-    via kv_connector_module_path are correctly instantiated with backwards
-    compatibility support.
-    """
-    vllm_config = create_vllm_config()
-    vllm_config.kv_transfer_config.kv_connector = "OldStyleTestConnector"
-    vllm_config.kv_transfer_config.kv_connector_module_path = (
-        "tests.v1.kv_connector.unit.test_backwards_compatibility"
-    )
-
-    scheduler = create_scheduler(vllm_config)
-    kv_cache_config = scheduler.kv_cache_config
-
-    connector = KVConnectorFactory.create_connector(vllm_config, role, kv_cache_config)
-
-    assert connector is not None
-    assert isinstance(connector, OldStyleTestConnector)
-    assert connector.role == role
-    assert connector._kv_cache_config is None
-
-
-@pytest.mark.parametrize("role", [KVConnectorRole.SCHEDULER, KVConnectorRole.WORKER])
-def test_external_new_signature_factory_instantiation(role):
-    """
-    Test that external connectors with new signature (3 required args) loaded
-    via kv_connector_module_path are correctly instantiated.
-    """
-    vllm_config = create_vllm_config()
-    vllm_config.kv_transfer_config.kv_connector = "NewStyleTestConnector"
-    vllm_config.kv_transfer_config.kv_connector_module_path = (
-        "tests.v1.kv_connector.unit.test_backwards_compatibility"
-    )
-
-    scheduler = create_scheduler(vllm_config)
-    kv_cache_config = scheduler.kv_cache_config
-
-    connector = KVConnectorFactory.create_connector(vllm_config, role, kv_cache_config)
-
-    assert connector is not None
-    assert isinstance(connector, NewStyleTestConnector)
-    assert connector.role == role
-    assert connector._kv_cache_config is not None
-    assert connector._kv_cache_config == kv_cache_config
-
-
-@pytest.mark.parametrize("role", [KVConnectorRole.SCHEDULER, KVConnectorRole.WORKER])
-def test_old_signature_super_init(role):
-    """
-    Test that old-style connectors can call super().__init__() without
-    kv_cache_config parameter.
-    """
-    vllm_config = create_vllm_config()
-
-    connector = OldStyleTestConnector(vllm_config, role)
-
-    assert connector is not None
-    assert connector.role == role
-    assert connector._kv_cache_config is None
-
-
-def test_old_signature_super_init_with_kwargs():
-    """
-    Test that old-style connectors can call super().__init__() with keyword
-    arguments in different orders.
-    """
-    vllm_config = create_vllm_config()
-
-    # Test with vllm_config= and role= kwargs
-    connector1 = OldStyleTestConnector(
-        vllm_config=vllm_config, role=KVConnectorRole.SCHEDULER
-    )
-    assert connector1 is not None
-    assert connector1._kv_cache_config is None
-
-    # Test with role= and vllm_config= in reversed order
-    connector2 = OldStyleTestConnector(
-        role=KVConnectorRole.WORKER, vllm_config=vllm_config
-    )
-    assert connector2 is not None
-    assert connector2._kv_cache_config is None
-
-
-def test_internal_connector_uses_new_signature():
-    """
-    Test that internal connectors (registered in factory) always use the new
-    signature and get kv_cache_config.
-    """
-    from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import (
-        ExampleConnector,
-    )
-
-    vllm_config = create_vllm_config()
-    vllm_config.kv_transfer_config.kv_connector = "ExampleConnector"
-
-    scheduler = create_scheduler(vllm_config)
-    kv_cache_config = scheduler.kv_cache_config
-
-    connector = KVConnectorFactory.create_connector(
-        vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
-    )
-
-    assert connector is not None
-    assert isinstance(connector, ExampleConnector)
-    assert connector._kv_cache_config is not None
-    assert connector._kv_cache_config == kv_cache_config
-
-
-def test_signature_detection_with_mocking():
-    """
-    Test that the factory correctly applies compat_sig flag returned from
-    _get_connector_class_with_compat.
-    """
-    vllm_config = create_vllm_config()
-    scheduler = create_scheduler(vllm_config)
-    kv_cache_config = scheduler.kv_cache_config
-
-    # Mock _get_connector_class_with_compat to return old-style connector
-    with patch.object(
-        KVConnectorFactory,
-        "_get_connector_class_with_compat",
-        return_value=(OldStyleTestConnector, True),
-    ):
-        old_connector = KVConnectorFactory.create_connector(
-            vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
-        )
-        assert old_connector is not None
-        assert isinstance(old_connector, OldStyleTestConnector)
-        assert old_connector._kv_cache_config is None
-
-    # Mock _get_connector_class_with_compat to return new-style connector
-    with patch.object(
-        KVConnectorFactory,
-        "_get_connector_class_with_compat",
-        return_value=(NewStyleTestConnector, False),
-    ):
-        new_connector = KVConnectorFactory.create_connector(
-            vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
-        )
-        assert new_connector is not None
-        assert isinstance(new_connector, NewStyleTestConnector)
-        assert new_connector._kv_cache_config is not None
-        assert new_connector._kv_cache_config == kv_cache_config
diff --git a/tests/v1/kv_connector/unit/test_bidirectional_kv_transfer.py b/tests/v1/kv_connector/unit/test_bidirectional_kv_transfer.py
new file mode 100644
index 000000000000..dc76d61178d8
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_bidirectional_kv_transfer.py
@@ -0,0 +1,915 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for bi-directional KV cache transfer between P and D nodes.
+
+Tests cover the new behaviors added by the bi-directional KV transfer PR:
+1. P-node scheduler lifecycle: P pulls KV from D using remote_block_ids,
+   eliminating redundant prefill computation in multi-turn conversations.
+2. P-node metadata: NixlConnectorMetadata correctly populates recv metadata
+   when P pulls KV from D (do_remote_decode=True + remote_block_ids).
+3. P-node worker: start_load_kv processes reqs_to_recv for KV pull from D.
+4. D-node request_finished: returns kv_transfer_params with remote_block_ids
+   and remote_num_tokens so P can pull KV in future turns.
+5. Edge cases:
+   - No double read after reschedule (_remote_blocks_processed flag)
+   - remote_num_tokens bounded by block capacity (num_computed_tokens)
+   - kv_recompute_threshold skips small transfers
+   - P-node holds blocks for D after finishing
+   - Cache MISS first turn falls back to local prefill
+   - Partial remote coverage: P pulls partial, computes the rest
+   - _remote_blocks_processed flag persists across reschedules
+
+P-node flags: do_remote_prefill=False (prefill locally),
+do_remote_decode=True (don't decode locally, send KV to D).
+P pulls KV from D when remote_block_ids is not None and
+external tokens > 0.
+"""
+
+import copy
+import time
+from unittest.mock import patch
+
+import pytest
+
+from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorRole
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.connector import (
+    NixlConnector,
+    NixlConnectorMetadata,
+)
+from vllm.forward_context import ForwardContext
+from vllm.v1.outputs import (
+    EMPTY_MODEL_RUNNER_OUTPUT,
+    KVConnectorOutput,
+)
+from vllm.v1.request import RequestStatus
+
+from .test_nixl_connector import FakeNixlConnectorWorker, FakeNixlWrapper
+from .utils import (
+    assert_scheduler_empty,
+    create_model_runner_output,
+    create_request,
+    create_scheduler,
+    create_vllm_config,
+    make_kv_cache_config,
+)
+
+pytestmark = pytest.mark.cpu_test
+
+# Common extra config for all bi-directional KV transfer tests.
+BIDIR_KV_EXTRA_CONFIG = {"bidirectional_kv_xfer": True, "kv_recompute_threshold": 0}
+
+
+# Helpers
+
+
+def _make_p_node_turn2_request(
+    request_id, block_size, num_tokens, num_remote_blocks=3, remote_num_tokens=None
+):
+    """Create a P-node Turn 2 request with remote_block_ids from D."""
+    request = create_request(
+        request_id=request_id,
+        block_size=block_size,
+        num_tokens=num_tokens,
+        do_remote_decode=True,
+    )
+    if remote_num_tokens is None:
+        remote_num_tokens = num_remote_blocks * block_size
+    request.kv_transfer_params["remote_block_ids"] = [list(range(num_remote_blocks))]
+    request.kv_transfer_params["remote_num_tokens"] = remote_num_tokens
+    request.kv_transfer_params["remote_engine_id"] = "decode-engine"
+    request.kv_transfer_params["remote_request_id"] = f"decode-{request_id}"
+    request.kv_transfer_params["remote_host"] = "decode-host"
+    request.kv_transfer_params["remote_port"] = 5678
+    return request
+
+
+def _make_connector_with_fake_worker(
+    hand_shake_latency=0, cycles_before_done=0, do_handshake=True
+):
+    """Create a NixlConnector with FakeNixlConnectorWorker."""
+    vllm_config = create_vllm_config()
+    kv_cache_config = make_kv_cache_config(block_size=16, num_blocks=2)
+    connector = NixlConnector(vllm_config, KVConnectorRole.WORKER, kv_cache_config)
+    connector.connector_worker = FakeNixlConnectorWorker(
+        vllm_config,
+        connector.engine_id,
+        hand_shake_latency=hand_shake_latency,
+        kv_cache_config=kv_cache_config,
+    )
+    worker = connector.connector_worker
+    assert isinstance(worker.nixl_wrapper, FakeNixlWrapper)
+    worker.nixl_wrapper.set_cycles_before_xfer_done(cycles_before_done)
+    worker.kv_cache_layout = "HND"
+    if do_handshake:
+        remote_agents = worker._nixl_handshake(
+            host="localhost",
+            port=1234,
+            remote_tp_size=1,
+            expected_engine_id=FakeNixlConnectorWorker.REMOTE_ENGINE_ID,
+        )
+        worker._remote_agents[FakeNixlConnectorWorker.REMOTE_ENGINE_ID] = remote_agents
+    return connector, worker
+
+
+def _make_p_node_recv_metadata(request_id, local_blocks, remote_blocks):
+    """Build NixlConnectorMetadata for P-node pulling KV from D."""
+    meta = NixlConnectorMetadata()
+    meta.add_new_req_to_recv(
+        request_id=request_id,
+        local_block_ids=(local_blocks,),
+        kv_transfer_params={
+            "do_remote_prefill": False,
+            "do_remote_decode": True,
+            "remote_block_ids": (remote_blocks,),
+            "remote_engine_id": FakeNixlConnectorWorker.REMOTE_ENGINE_ID,
+            "remote_request_id": f"decode-{request_id}",
+            "remote_host": "localhost",
+            "remote_port": 1234,
+            "remote_tp_size": 1,
+        },
+    )
+    return meta
+
+
+def _do_load_kv(connector, metadata):
+    """Bind metadata and call start_load_kv."""
+    connector.bind_connector_metadata(metadata)
+    ctx = ForwardContext(no_compile_layers={}, attn_metadata={}, slot_mapping={})
+    connector.start_load_kv(ctx)
+
+
+# 1. P-node scheduler lifecycle tests
+
+
+def test_multiturn_lifecycle():
+    """Full two-turn lifecycle on the P node:
+    Turn 1: P prefills locally (do_remote_prefill=False), sends KV to D
+    (do_remote_decode=True). Finishes LENGTH_CAPPED with remote_block_ids.
+    Turn 2: P receives remote_block_ids from D. P pulls KV from D because
+    remote_block_ids is not None and external tokens > 0. Computes only
+    new tokens, finishes LENGTH_CAPPED."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+
+    t1 = create_request(
+        request_id=100, block_size=BS, num_tokens=int(BS * 2.5), do_remote_decode=True
+    )
+    scheduler.add_request(t1)
+    t1_id = t1.request_id
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[t1])
+    eco = scheduler.update_from_output(so, mro)
+    assert t1.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    kv = eco[0].outputs[0].kv_transfer_params
+    assert kv and sum(len(g) for g in kv["remote_block_ids"]) > 0
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+
+    t2 = _make_p_node_turn2_request(200, BS, int(BS * 2.5))
+    scheduler.add_request(t2)
+    t2_id = t2.request_id
+    so = scheduler.schedule()
+    assert t2.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={t2_id})
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[t2])
+    scheduler.update_from_output(so, mro)
+    assert t2.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={t1_id, t2_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_first_turn_no_remote_blocks():
+    """First turn: P has no remote_block_ids from D yet.
+    Standard local prefill, returns kv_transfer_params for future turns."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=3, block_size=BS, num_tokens=int(BS * 2.5), do_remote_decode=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    assert req.status != RequestStatus.WAITING_FOR_REMOTE_KVS
+    mro = create_model_runner_output(reqs=[req])
+    eco = scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    assert eco[0].outputs[0].kv_transfer_params is not None
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_abort_p_side_during_send():
+    """P-side do_remote_decode=True: blocks held until finished_sending."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=42, block_size=BS, num_tokens=int(BS * 2.5), do_remote_decode=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req_id in scheduler.requests
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    assert req_id in scheduler.requests
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_abort_p_side_non_length_capped():
+    """P-side abort with non-LENGTH_CAPPED → immediate block free."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=44, block_size=BS, num_tokens=int(BS * 2.5), do_remote_decode=True
+    )
+    req.sampling_params.max_tokens = 100
+    req.max_tokens = 100
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    scheduler.finish_requests([req_id], RequestStatus.FINISHED_ABORTED)
+    conn = scheduler.connector.connector_scheduler
+    assert req_id in conn._reqs_not_processed
+    assert req_id not in scheduler.requests
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    assert_scheduler_empty(scheduler)
+
+
+def test_remote_blocks_exceed_prompt_tokens():
+    """D provides more remote tokens than P's prompt needs.
+    P caps external tokens to prompt length."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    NUM_TOKENS = int(BS * 2.5)
+    req = _make_p_node_turn2_request(
+        300, BS, NUM_TOKENS, num_remote_blocks=5, remote_num_tokens=5 * BS
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    assert req.num_computed_tokens == NUM_TOKENS
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={req_id})
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_p_node_pulls_partial_last_block_from_d():
+    """D sends remote_block_ids with partially filled last block.
+    remote_num_tokens < len(remote_block_ids) * block_size.
+    P pulls only remote_num_tokens worth of external tokens."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    num_remote_blocks = 3
+    remote_num_tokens = int(BS * 2.5)
+    assert remote_num_tokens < num_remote_blocks * BS
+    NUM_TOKENS = int(BS * 3.5)
+    req = _make_p_node_turn2_request(
+        400,
+        BS,
+        NUM_TOKENS,
+        num_remote_blocks=num_remote_blocks,
+        remote_num_tokens=remote_num_tokens,
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={req_id})
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    assert len(scheduler.running) == 1
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+# 2. P-node metadata tests
+
+
+def test_add_new_req_to_recv_populates_remote_meta():
+    """add_new_req_to_recv correctly populates RemoteMeta for P-node
+    bi-directional KV pull from D."""
+    meta = NixlConnectorMetadata()
+    kv_params = {
+        "remote_block_ids": [[0, 1, 2]],
+        "remote_engine_id": "decode-engine",
+        "remote_request_id": "decode-req-123",
+        "remote_host": "decode-host",
+        "remote_port": 5678,
+    }
+    local_block_ids = ([10, 11, 12],)
+    meta.add_new_req_to_recv(
+        request_id="test-req",
+        local_block_ids=local_block_ids,
+        kv_transfer_params=kv_params,
+    )
+    assert "test-req" in meta.reqs_to_recv
+    rm = meta.reqs_to_recv["test-req"]
+    assert rm.remote is not None
+    assert rm.remote.block_ids == kv_params["remote_block_ids"]
+    assert rm.remote.engine_id == "decode-engine"
+    assert rm.remote.request_id == "decode-req-123"
+    assert rm.remote.host == "decode-host"
+    assert rm.remote.port == 5678
+    assert rm.local_block_ids == local_block_ids
+
+
+def test_build_connector_meta_recv_entries():
+    """P-node scheduler: do_remote_decode=True + remote_block_ids →
+    _reqs_need_recv populated, build_connector_meta produces reqs_to_recv."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = _make_p_node_turn2_request(1, BS, int(BS * 2.5))
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    meta = so.kv_connector_metadata
+    assert isinstance(meta, NixlConnectorMetadata)
+    assert req_id in meta.reqs_to_recv
+    rm = meta.reqs_to_recv[req_id]
+    assert rm.remote is not None
+    assert rm.remote.engine_id == "decode-engine"
+
+
+def test_build_connector_meta_clears_reqs_need_recv():
+    """After build_connector_meta, _reqs_need_recv is cleared."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = _make_p_node_turn2_request(2, BS, int(BS * 2.5))
+    scheduler.add_request(req)
+    conn = scheduler.connector.connector_scheduler
+    scheduler.schedule()
+    assert len(conn._reqs_need_recv) == 0
+
+
+def test_build_connector_meta_multiple_requests():
+    """Multiple P-node requests all included in reqs_to_recv."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    reqs = [_make_p_node_turn2_request(10 + i, BS, int(BS * 2.5)) for i in range(3)]
+    for r in reqs:
+        scheduler.add_request(r)
+    so = scheduler.schedule()
+    meta = so.kv_connector_metadata
+    assert isinstance(meta, NixlConnectorMetadata)
+    assert len(meta.reqs_to_recv) == 3
+    for r in reqs:
+        assert r.request_id in meta.reqs_to_recv
+
+
+# 3. P-node worker tests (FakeNixlWrapper)
+
+
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+    FakeNixlWrapper,
+)
+def test_p_node_pull_kv_from_d(dist_init):
+    """P node pulls KV from D via start_load_kv with reqs_to_recv."""
+    connector, worker = _make_connector_with_fake_worker()
+    meta = _make_p_node_recv_metadata("req-p1", [10, 11, 12], [20, 21, 22])
+    _do_load_kv(connector, meta)
+    assert "req-p1" in worker._recving_metadata
+    _, done_recving = connector.get_finished(finished_req_ids=set())
+    assert "req-p1" in done_recving
+
+
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+    FakeNixlWrapper,
+)
+def test_p_node_pull_then_send_kv(dist_init):
+    """Full P-node bi-directional: pull KV from D → prefill →
+    send KV back to D via notification."""
+    connector, worker = _make_connector_with_fake_worker()
+    meta = _make_p_node_recv_metadata("req-p2", [10, 11], [20, 21])
+    _do_load_kv(connector, meta)
+    _, done_recving = connector.get_finished(finished_req_ids=set())
+    assert "req-p2" in done_recving
+    worker._reqs_to_send["req-p2"] = time.perf_counter() + 60
+    worker._reqs_to_process.add("req-p2")
+    notif = f"req-p2:{worker.world_size}".encode()
+    orig = worker.nixl_wrapper.get_new_notifs
+    worker.nixl_wrapper.get_new_notifs = lambda: {"agent": [notif]}
+    done_sending, _ = connector.get_finished(finished_req_ids=set())
+    assert "req-p2" in done_sending
+    worker.nixl_wrapper.get_new_notifs = orig
+
+
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+    FakeNixlWrapper,
+)
+def test_p_node_deferred_pull_on_no_handshake(dist_init):
+    """P defers KV pull when no prior handshake exists."""
+    connector, worker = _make_connector_with_fake_worker(
+        hand_shake_latency=0, do_handshake=False
+    )
+    meta = _make_p_node_recv_metadata("req-p3", [10, 11], [20, 21])
+    _do_load_kv(connector, meta)
+    assert "req-p3" in worker._recving_metadata
+    timeout = 3.0
+    start = time.perf_counter()
+    while time.perf_counter() - start < timeout:
+        connector.bind_connector_metadata(NixlConnectorMetadata())
+        ctx = ForwardContext(no_compile_layers={}, attn_metadata={}, slot_mapping={})
+        connector.start_load_kv(ctx)
+        _, done = connector.get_finished(finished_req_ids=set())
+        if "req-p3" in done:
+            return
+        time.sleep(0.2)
+    raise AssertionError("Transfer did not complete after async handshake")
+
+
+# 4. D-node request_finished returns kv_transfer_params (new behavior)
+
+
+def test_d_node_request_finished_returns_kv_params():
+    """D-node request_finished returns kv_transfer_params with
+    do_remote_decode=True, remote_block_ids, remote_num_tokens
+    for P to pull. These params go directly to P node."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=1, block_size=BS, num_tokens=int(BS * 2.5), do_remote_prefill=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[], finished_recving={req_id})
+    )
+    so = scheduler.schedule()
+    eco = scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[req], use_eos=True)
+    )
+    assert req.status == RequestStatus.FINISHED_STOPPED
+    kv = eco[0].outputs[0].kv_transfer_params
+    assert kv is not None
+    assert kv["do_remote_decode"] is True
+    assert kv["do_remote_prefill"] is False
+    assert "remote_block_ids" in kv
+    assert "remote_num_tokens" in kv
+    assert kv["remote_num_tokens"] > 0
+
+
+def test_d_node_request_finished_delays_block_free():
+    """D-node holds blocks (delay_free=True) until P reads them."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=2, block_size=BS, num_tokens=int(BS * 2.5), do_remote_prefill=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[], finished_recving={req_id})
+    )
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[req], use_eos=True)
+    )
+    assert req_id in scheduler.requests
+    conn = scheduler.connector.connector_scheduler
+    assert req_id in conn._reqs_need_send
+
+
+def test_d_node_request_finished_remote_num_tokens():
+    """D-node kv_transfer_params includes correct remote_num_tokens."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=3, block_size=BS, num_tokens=int(BS * 2.5), do_remote_prefill=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[], finished_recving={req_id})
+    )
+    so = scheduler.schedule()
+    eco = scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[req], use_eos=True)
+    )
+    kv = eco[0].outputs[0].kv_transfer_params
+    assert kv["remote_num_tokens"] > 0
+    assert sum(len(g) for g in kv["remote_block_ids"]) > 0
+
+
+def test_d_node_partial_last_block_remote_num_tokens():
+    """D-node: remote_num_tokens < len(remote_block_ids) * block_size
+    when last block is partially filled."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=5, block_size=BS, num_tokens=int(BS * 2.5), do_remote_prefill=True
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[], finished_recving={req_id})
+    )
+    so = scheduler.schedule()
+    eco = scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[req], use_eos=True)
+    )
+    kv = eco[0].outputs[0].kv_transfer_params
+    total_blocks = sum(len(g) for g in kv["remote_block_ids"])
+    assert total_blocks == 3
+    assert kv["remote_num_tokens"] < total_blocks * BS
+    assert kv["remote_num_tokens"] > 0
+
+
+# 5. Edge case tests
+
+
+def test_no_double_read_blocks_after_reschedule():
+    """Edge case 1: update_state_after_alloc called twice for the same
+    bidirectional request (once on initial schedule, once after
+    WAITING_FOR_REMOTE_KVS → reschedule). The _remote_blocks_processed
+    flag must prevent the request from being added to _reqs_need_recv
+    twice, which would cause P to read D's blocks twice."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = _make_p_node_turn2_request(500, BS, int(BS * 2.5))
+    scheduler.add_request(req)
+    req_id = req.request_id
+    conn = scheduler.connector.connector_scheduler
+
+    # First schedule: request enters WAITING_FOR_REMOTE_KVS,
+    # _reqs_need_recv populated then cleared by build_connector_meta.
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    meta = so.kv_connector_metadata
+    assert isinstance(meta, NixlConnectorMetadata)
+    assert req_id in meta.reqs_to_recv
+    # _reqs_need_recv should be cleared after build_connector_meta
+    assert len(conn._reqs_need_recv) == 0
+
+    # Simulate recv completion
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={req_id})
+    scheduler.update_from_output(so, mro)
+
+    # Second schedule after recv: update_state_after_alloc called again.
+    # The _remote_blocks_processed flag should prevent re-entry.
+    so = scheduler.schedule()
+    meta2 = so.kv_connector_metadata
+    assert isinstance(meta2, NixlConnectorMetadata)
+    # Must NOT be in reqs_to_recv again
+    assert req_id not in meta2.reqs_to_recv
+
+    # Clean up
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_remote_num_tokens_bounded_by_blocks():
+    """Edge case 2: D-node request_finished must return
+    remote_num_tokens <= len(remote_block_ids) * block_size.
+    request.num_tokens includes the last sampled token which has no KV
+    in the cache, so remote_num_tokens must use num_computed_tokens."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=501,
+        block_size=BS,
+        num_tokens=int(BS * 2.5),
+        do_remote_prefill=True,
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[], finished_recving={req_id})
+    )
+    so = scheduler.schedule()
+    eco = scheduler.update_from_output(
+        so, create_model_runner_output(reqs=[req], use_eos=True)
+    )
+    kv = eco[0].outputs[0].kv_transfer_params
+    assert kv is not None
+    total_blocks = sum(len(g) for g in kv["remote_block_ids"])
+    max_tokens_in_blocks = total_blocks * BS
+    assert kv["remote_num_tokens"] <= max_tokens_in_blocks, (
+        f"remote_num_tokens ({kv['remote_num_tokens']}) exceeds "
+        f"block capacity ({max_tokens_in_blocks})"
+    )
+    assert kv["remote_num_tokens"] > 0
+
+
+def test_kv_recompute_threshold_skips_small_transfer():
+    """Edge case 3: When remote tokens are below kv_recompute_threshold,
+    P should skip the remote pull and compute locally instead of
+    entering WAITING_FOR_REMOTE_KVS."""
+    threshold = 256
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config={
+            "bidirectional_kv_xfer": True,
+            "kv_recompute_threshold": threshold,
+        },
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+
+    # Create request where remote tokens (48) < threshold (256)
+    req = _make_p_node_turn2_request(
+        502,
+        BS,
+        int(BS * 2.5),
+        num_remote_blocks=3,
+        remote_num_tokens=3 * BS,
+    )
+    scheduler.add_request(req)
+    so = scheduler.schedule()
+    # Should NOT enter WAITING_FOR_REMOTE_KVS — threshold not met
+    assert req.status != RequestStatus.WAITING_FOR_REMOTE_KVS
+    assert req.status == RequestStatus.RUNNING
+
+    # Clean up
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req.request_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_p_node_finished_holds_blocks_for_d():
+    """Edge case 4: P-node finishes with FINISHED_LENGTH_CAPPED and
+    do_remote_decode=True. P must hold blocks (delay_free=True) and
+    return kv_transfer_params with do_remote_prefill=True so D can
+    read P's blocks."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=503,
+        block_size=BS,
+        num_tokens=int(BS * 2.5),
+        do_remote_decode=True,
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[req])
+    eco = scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    kv = eco[0].outputs[0].kv_transfer_params
+    assert kv is not None
+    # P-node finished: should tell D to pull (do_remote_prefill=True)
+    assert kv["do_remote_prefill"] is True
+    assert kv["do_remote_decode"] is False
+    assert "remote_block_ids" in kv
+    assert sum(len(g) for g in kv["remote_block_ids"]) > 0
+    # Blocks should be held (request still tracked)
+    assert req_id in scheduler.requests
+
+    # Clean up: simulate D reading and notifying
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_cache_miss_first_turn_no_remote_pull():
+    """Edge case 5: First turn with do_remote_decode=True but no
+    remote_block_ids (cache MISS). P should prefill locally with
+    num_external_tokens=0 and not enter WAITING_FOR_REMOTE_KVS."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = create_request(
+        request_id=504,
+        block_size=BS,
+        num_tokens=int(BS * 2.5),
+        do_remote_decode=True,
+    )
+    # No remote_block_ids set — this is a cache MISS
+    assert req.kv_transfer_params.get("remote_block_ids") is None
+    scheduler.add_request(req)
+    so = scheduler.schedule()
+    # Should NOT wait for remote KVs
+    assert req.status != RequestStatus.WAITING_FOR_REMOTE_KVS
+    assert req.status == RequestStatus.RUNNING
+
+    # Clean up
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req.request_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_partial_remote_tokens_less_than_prompt():
+    """Edge case 6: D's remote_num_tokens covers only part of P's
+    prompt. P should pull remote_num_tokens worth of external tokens
+    and compute the rest locally."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    NUM_TOKENS = int(BS * 4.5)  # 72 tokens
+    # D provides only 2 blocks (32 tokens) out of 72
+    req = _make_p_node_turn2_request(
+        505,
+        BS,
+        NUM_TOKENS,
+        num_remote_blocks=2,
+        remote_num_tokens=2 * BS,
+    )
+    scheduler.add_request(req)
+    req_id = req.request_id
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    # num_computed_tokens should reflect the external tokens pulled
+    # (capped to remote_num_tokens, not full prompt)
+    assert req.num_computed_tokens < NUM_TOKENS
+
+    # Complete the transfer and finish
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={req_id})
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    assert req.status == RequestStatus.FINISHED_LENGTH_CAPPED
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
+
+
+def test_remote_blocks_processed_flag_persists():
+    """Edge case 7: After recv completes and request is rescheduled,
+    the _remote_blocks_processed flag in kv_transfer_params prevents
+    the bidirectional path from re-entering _reqs_need_recv."""
+    vllm_config = create_vllm_config(
+        kv_connector_extra_config=BIDIR_KV_EXTRA_CONFIG,
+    )
+    scheduler = create_scheduler(vllm_config)
+    BS = vllm_config.cache_config.block_size
+    req = _make_p_node_turn2_request(506, BS, int(BS * 2.5))
+    scheduler.add_request(req)
+    req_id = req.request_id
+    conn = scheduler.connector.connector_scheduler
+
+    # First schedule → WAITING_FOR_REMOTE_KVS
+    so = scheduler.schedule()
+    assert req.status == RequestStatus.WAITING_FOR_REMOTE_KVS
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+
+    # Recv completes
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_recving={req_id})
+    scheduler.update_from_output(so, mro)
+
+    # Verify the flag is set
+    assert req.kv_transfer_params.get("_remote_blocks_processed") is True
+
+    # Next schedule: update_state_after_alloc is called again.
+    # _reqs_need_recv must NOT contain this request.
+    so = scheduler.schedule()
+    assert req_id not in conn._reqs_need_recv
+    meta = so.kv_connector_metadata
+    assert isinstance(meta, NixlConnectorMetadata)
+    assert req_id not in meta.reqs_to_recv
+
+    # Clean up
+    mro = create_model_runner_output(reqs=[req])
+    scheduler.update_from_output(so, mro)
+    so = scheduler.schedule()
+    scheduler.update_from_output(so, EMPTY_MODEL_RUNNER_OUTPUT)
+    so = scheduler.schedule()
+    mro = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT)
+    mro.kv_connector_output = KVConnectorOutput(finished_sending={req_id})
+    scheduler.update_from_output(so, mro)
+    assert_scheduler_empty(scheduler)
diff --git a/tests/v1/kv_connector/unit/test_config.py b/tests/v1/kv_connector/unit/test_config.py
index 8a547c3f03f9..33c9abd09e61 100644
--- a/tests/v1/kv_connector/unit/test_config.py
+++ b/tests/v1/kv_connector/unit/test_config.py
@@ -65,6 +65,75 @@ def test_kv_connector(
         assert "existing_key" not in kv_connector_extra_config
 
 
+def _build_config(
+    *,
+    kv_connector: str | None,
+    enable_sleep_mode: bool = False,
+    enable_cumem_allocator: bool = False,
+) -> VllmConfig:
+    """Build a VllmConfig that exercises _verify_kv_transfer_compat without
+    requiring a real model (avoids HF downloads in CI)."""
+    from types import SimpleNamespace
+
+    kv_transfer_config = (
+        KVTransferConfig(kv_connector=kv_connector, kv_role="kv_both")
+        if kv_connector is not None
+        else None
+    )
+    cfg = VllmConfig.__new__(VllmConfig)
+    cfg.kv_transfer_config = kv_transfer_config
+    cfg.model_config = SimpleNamespace(
+        enable_sleep_mode=enable_sleep_mode,
+        enable_cumem_allocator=(enable_cumem_allocator or enable_sleep_mode),
+    )
+    cfg._verify_kv_transfer_compat()
+    return cfg
+
+
+@pytest.mark.parametrize(
+    "kv_connector", ["NixlConnector", "MooncakeConnectorV1", "SomeOOTConnector"]
+)
+def test_kv_connector_rejects_expandable_segments(monkeypatch, kv_connector):
+    """KV connectors that pin KV cache memory (e.g. via ibv_reg_mr) are
+    invalidated when expandable_segments lets the CUDA VMM allocator remap
+    the underlying physical pages. We can't enumerate every connector that
+    does this (especially OOT ones), so reject the combination whenever any
+    connector is configured."""
+    monkeypatch.setenv("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+    with pytest.raises(ValueError, match="expandable_segments"):
+        _build_config(kv_connector=kv_connector)
+
+
+def test_kv_connector_allows_expandable_segments_with_sleep_mode(monkeypatch):
+    """Sleep mode routes KV allocations through CuMemAllocator's pool, which
+    auto-disables expandable_segments (see #40812)."""
+    monkeypatch.setenv("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+    _build_config(kv_connector="NixlConnector", enable_sleep_mode=True)
+
+
+def test_kv_connector_allows_expandable_segments_with_cumem_allocator(
+    monkeypatch,
+):
+    """Manual CuMem allocation must also bypass expandable_segments."""
+    monkeypatch.setenv("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+    _build_config(kv_connector="NixlConnector", enable_cumem_allocator=True)
+
+
+def test_kv_connector_allows_other_alloc_conf(monkeypatch):
+    """Other PYTORCH_CUDA_ALLOC_CONF values must not be rejected."""
+    monkeypatch.setenv(
+        "PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512,expandable_segments:False"
+    )
+    _build_config(kv_connector="NixlConnector")
+
+
+def test_no_kv_connector_ignores_expandable_segments(monkeypatch):
+    """The expandable_segments check only applies when a KV connector is
+    configured."""
+    monkeypatch.setenv("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+    _build_config(kv_connector=None)
+
+
 def test_kv_offloading_size_only_uses_native_default():
     """Test that setting only kv_offloading_size enables native offloading."""
     vllm_config = VllmConfig(
diff --git a/tests/v1/kv_connector/unit/test_decode_bench_connector.py b/tests/v1/kv_connector/unit/test_decode_bench_connector.py
index 30652b3d5c51..5f7c5eeefad0 100644
--- a/tests/v1/kv_connector/unit/test_decode_bench_connector.py
+++ b/tests/v1/kv_connector/unit/test_decode_bench_connector.py
@@ -11,7 +11,6 @@
 import torch
 
 from vllm import SamplingParams
-from vllm.config import KVTransferConfig
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
 
 # ruff: noqa: E501
@@ -44,11 +43,9 @@ def __init__(self, block_size: int, num_gpu_blocks: int):
 
         # Create vllm config with DecodeBenchConnector
         vllm_config = create_vllm_config(
-            block_size=block_size, max_num_batched_tokens=1000
-        )
-        vllm_config.kv_transfer_config = KVTransferConfig(
+            block_size=block_size,
+            max_num_batched_tokens=1000,
             kv_connector="DecodeBenchConnector",
-            kv_role="kv_both",
         )
 
         self.vllm_config = vllm_config
@@ -58,7 +55,9 @@ def __init__(self, block_size: int, num_gpu_blocks: int):
 
         # Create worker-side connector
         self.worker_connector = DecodeBenchConnector(
-            vllm_config, KVConnectorRole.WORKER
+            vllm_config,
+            KVConnectorRole.WORKER,
+            self.scheduler.kv_cache_config,
         )
 
         # Create dummy KV caches for testing
diff --git a/tests/v1/kv_connector/unit/test_hf3fs_client.py b/tests/v1/kv_connector/unit/test_hf3fs_client.py
new file mode 100644
index 000000000000..d9c34a8907de
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_hf3fs_client.py
@@ -0,0 +1,284 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for resource management in hf3fs_client.py: constructor failure cleanup
+and idempotent close().  Tests use mock to replace real I/O operations
+(hf3fs_fuse.io, SharedMemory, os, CUDA).
+Requires hf3fs_fuse.io to be installed; skipped otherwise.
+"""
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+HF3FS_AVAILABLE = True
+try:
+    from hf3fs_fuse.io import (  # noqa: F401
+        deregister_fd,
+        extract_mount_point,
+        make_ioring,
+        make_iovec,
+        register_fd,
+    )
+
+    from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_client import (
+        Hf3fsClient,
+    )
+except Exception:
+    HF3FS_AVAILABLE = False
+
+requires_hf3fs = pytest.mark.skipif(
+    not HF3FS_AVAILABLE,
+    reason="hf3fs_fuse.io is not available on this machine",
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class _FakeShm:
+    """Shared-memory stub matching the multiprocessing.shared_memory.SharedMemory
+    interface used by Hf3fsClient:
+
+    Attributes accessed by the constructor:
+      .buf      – memoryview / buffer-protocol object consumed by torch.frombuffer
+    Methods called during normal lifetime:
+      .unlink() – called right after the iovec is set up
+      .close()  – called in _release_resources()
+    """
+
+    def __init__(self, size: int = 1024):
+        self._data = bytearray(size)
+        self.buf = memoryview(self._data)
+        self.closed = False
+        self.close_call_count = 0
+        self.unlink_call_count = 0
+
+    def close(self):
+        self.closed = True
+        self.close_call_count += 1
+
+    def unlink(self):
+        self.unlink_call_count += 1
+
+
+# ===========================================================================
+# TestHf3fsClientResourceManagement
+# ===========================================================================
+
+
+@requires_hf3fs
+class TestHf3fsClientResourceManagement:
+    """Tests for constructor failure cleanup and idempotent close()."""
+
+    _MOD = "vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_client"
+
+    # ------------------------------------------------------------------
+    # Helper: build a minimal Hf3fsClient bypassing all real I/O so that
+    # we can fully control its internal state.
+    # ------------------------------------------------------------------
+
+    def _make_client(self, tmp_path):
+        """Return a fully-mocked Hf3fsClient with controllable internals."""
+        fake_shm_r = _FakeShm()
+        fake_shm_w = _FakeShm()
+
+        patcher_list: list[Any] = [
+            patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
+            patch(f"{self._MOD}.register_fd"),
+            patch(f"{self._MOD}.deregister_fd"),
+            patch(f"{self._MOD}.extract_mount_point", return_value="/mnt/hf3fs"),
+            patch(f"{self._MOD}.make_ioring", return_value=MagicMock()),
+            patch(f"{self._MOD}.make_iovec", return_value=MagicMock()),
+            patch(
+                "multiprocessing.shared_memory.SharedMemory",
+                side_effect=[fake_shm_r, fake_shm_w],
+            ),
+            patch("os.open", return_value=99),
+            patch("os.ftruncate"),
+            patch("os.close"),
+            patch("os.fsync"),
+            patch("torch.cuda.Stream", return_value=MagicMock()),
+            patch("torch.frombuffer", return_value=MagicMock()),
+            patch("torch.empty", return_value=MagicMock()),
+        ]
+        for p in patcher_list:
+            p.start()
+
+        try:
+            client = Hf3fsClient(
+                path=str(tmp_path / "test.bin"),
+                size=1024,
+                bytes_per_page=256,
+                entries=4,
+            )
+        finally:
+            for p in patcher_list:
+                p.stop()
+
+        # Manually point internal handles to our controllable fakes so that
+        # assertions after close() can inspect them directly.
+        client.shm_r = fake_shm_r
+        client.shm_w = fake_shm_w
+        client.file = 99
+        return client, fake_shm_r, fake_shm_w
+
+    # ------------------------------------------------------------------
+    # close() idempotency
+    # ------------------------------------------------------------------
+
+    def test_close_idempotent_and_handles_cleared(self, tmp_path):
+        """Multiple close() calls must not raise; deregister_fd called exactly
+        once, all handles set to None, shm.close() invoked."""
+        client, shm_r, shm_w = self._make_client(tmp_path)
+
+        with (
+            patch(f"{self._MOD}.deregister_fd") as mock_dereg,
+            patch("os.close"),
+        ):
+            client.close()  # first close
+            client.close()  # second close — must be no-op
+            client.close()  # third close — must be no-op
+
+        assert client._closed is True
+        assert mock_dereg.call_count == 1, (
+            f"deregister_fd called {mock_dereg.call_count} times; expected 1"
+        )
+        for attr in ("iov_r", "iov_w", "ior_r", "ior_w", "shm_r", "shm_w", "file"):
+            assert getattr(client, attr) is None, f"{attr} should be None after close()"
+        assert shm_r.closed is True
+        assert shm_w.closed is True
+
+    def test_flush_after_close_is_noop(self, tmp_path):
+        """flush() after close() must silently do nothing (no fsync call)."""
+        client, _, _ = self._make_client(tmp_path)
+
+        with (
+            patch(f"{self._MOD}.deregister_fd"),
+            patch("os.close"),
+            patch("os.fsync") as mock_fsync,
+        ):
+            client.close()
+            client.flush()
+
+        mock_fsync.assert_not_called()
+
+    # ------------------------------------------------------------------
+    # Constructor failure leaves no leaked resources
+    # ------------------------------------------------------------------
+
+    def test_constructor_failure_after_file_open_cleans_file(self, tmp_path):
+        """If the constructor raises after os.open(), the fd must be closed."""
+        with (
+            patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
+            patch(f"{self._MOD}.register_fd"),
+            patch(f"{self._MOD}.deregister_fd"),
+            patch(
+                f"{self._MOD}.extract_mount_point",
+                side_effect=RuntimeError("mount point not found"),
+            ),
+            patch("os.open", return_value=55),
+            patch("os.ftruncate"),
+            patch("os.close") as mock_os_close,
+            patch("torch.cuda.Stream", return_value=MagicMock()),
+            pytest.raises(RuntimeError, match="mount point not found"),
+        ):
+            Hf3fsClient(
+                path=str(tmp_path / "fail.bin"),
+                size=1024,
+                bytes_per_page=256,
+                entries=4,
+            )
+
+        mock_os_close.assert_called_once_with(55)
+
+    def test_constructor_failure_after_shm_alloc_closes_shm(self, tmp_path):
+        """Constructor raises after SharedMemory creation → both shm objects closed."""
+        fake_shm_r = _FakeShm()
+        fake_shm_w = _FakeShm()
+
+        with (
+            patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
+            patch(f"{self._MOD}.register_fd"),
+            patch(f"{self._MOD}.deregister_fd"),
+            patch(f"{self._MOD}.extract_mount_point", return_value="/mnt/hf3fs"),
+            patch(
+                "multiprocessing.shared_memory.SharedMemory",
+                side_effect=[fake_shm_r, fake_shm_w],
+            ),
+            patch("os.open", return_value=66),
+            patch("os.ftruncate"),
+            patch("os.close"),
+            patch("torch.frombuffer", return_value=MagicMock()),
+            patch("torch.empty", return_value=MagicMock()),
+            patch(
+                f"{self._MOD}.make_ioring",
+                side_effect=RuntimeError("ioring init failed"),
+            ),
+            patch(f"{self._MOD}.make_iovec", return_value=MagicMock()),
+            patch("torch.cuda.Stream", return_value=MagicMock()),
+            pytest.raises(RuntimeError, match="ioring init failed"),
+        ):
+            Hf3fsClient(
+                path=str(tmp_path / "fail2.bin"),
+                size=1024,
+                bytes_per_page=256,
+                entries=4,
+            )
+
+        assert fake_shm_r.closed is True, (
+            "shm_r was not closed after constructor failure"
+        )
+        assert fake_shm_w.closed is True, (
+            "shm_w was not closed after constructor failure"
+        )
+
+    def test_constructor_failure_does_not_close_unallocated_shm(self, tmp_path):
+        """Failure before SharedMemory is created must not raise AttributeError
+        or TypeError from cleanup."""
+        with (
+            patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
+            patch(f"{self._MOD}.register_fd"),
+            patch(f"{self._MOD}.deregister_fd"),
+            patch(
+                f"{self._MOD}.extract_mount_point",
+                side_effect=RuntimeError("early failure"),
+            ),
+            patch("os.open", return_value=77),
+            patch("os.ftruncate"),
+            patch("os.close"),
+            patch("torch.cuda.Stream", return_value=MagicMock()),
+            pytest.raises(RuntimeError, match="early failure"),
+        ):
+            Hf3fsClient(
+                path=str(tmp_path / "early_fail.bin"),
+                size=1024,
+                bytes_per_page=256,
+                entries=4,
+            )
+
+    # ------------------------------------------------------------------
+    # _release_resources on already-cleared state must be a no-op
+    # ------------------------------------------------------------------
+
+    def test_release_resources_on_empty_state_is_safe(self, tmp_path):
+        """_release_resources() on a fully-cleared client must not raise."""
+        client, _, _ = self._make_client(tmp_path)
+
+        with (
+            patch(f"{self._MOD}.deregister_fd"),
+            patch("os.close"),
+        ):
+            client.close()  # clears all handles
+
+        with (
+            patch(f"{self._MOD}.deregister_fd") as mock_dereg2,
+            patch("os.close") as mock_os_close2,
+        ):
+            client._release_resources()  # must not raise
+
+        mock_dereg2.assert_not_called()
+        mock_os_close2.assert_not_called()
diff --git a/tests/v1/kv_connector/unit/test_hf3fs_connector.py b/tests/v1/kv_connector/unit/test_hf3fs_connector.py
new file mode 100644
index 000000000000..94bb94c6fbd7
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_hf3fs_connector.py
@@ -0,0 +1,230 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for HF3FS KV Connector high-level components:
+  - TestHf3fsMockClient      : file-backed mock client I/O correctness
+  - TestHF3FSKVConnectorStats: metric collection, aggregation, serialisation
+"""
+
+import os
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_connector import (
+    HF3FSKVConnectorStats,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.utils.hf3fs_mock_client import (
+    Hf3fsClient as MockHf3fsClient,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def hf3fs_stats():
+    """Fresh HF3FSKVConnectorStats instance."""
+    return HF3FSKVConnectorStats()
+
+
+def _make_cuda_event():
+    """Return a real CUDA event when available, otherwise a MagicMock."""
+    if torch.cuda.is_available():
+        return torch.cuda.Event()
+    return MagicMock()
+
+
+# ===========================================================================
+# TestHf3fsMockClient
+# ===========================================================================
+
+
+class TestHf3fsMockClient:
+    """Tests for hf3fs_mock_client.Hf3fsClient (file-backend mock)."""
+
+    def test_init_creates_file(self, tmp_path):
+        """Initializing the client should create the backing file."""
+        path = str(tmp_path / "test_file")
+        client = MockHf3fsClient(path=path, size=4096, bytes_per_page=512, entries=4)
+        assert os.path.exists(path), "Backing file should be created on init"
+        assert os.path.getsize(path) == 4096
+        client.close()
+
+    @pytest.mark.parametrize(
+        "dtype, bytes_per_page",
+        [
+            (torch.float32, 512),
+            (torch.float16, 256),
+            (torch.bfloat16, 256),
+        ],
+        ids=["float32", "float16", "bfloat16"],
+    )
+    def test_batch_write_and_read_dtype(self, tmp_path, dtype, bytes_per_page):
+        """Write a tensor of the given dtype and verify round-trip correctness."""
+        path = str(tmp_path / f"rw_{dtype}")
+        client = MockHf3fsClient(
+            path=path, size=bytes_per_page * 8, bytes_per_page=bytes_per_page, entries=4
+        )
+        elem_size = torch.tensor([], dtype=dtype).element_size()
+        numel = bytes_per_page // elem_size
+        tensor_write = torch.arange(numel, dtype=dtype)
+        event = _make_cuda_event()
+
+        results = client.batch_write([0], [tensor_write], event)
+        assert results == [bytes_per_page], f"Write should succeed, got {results}"
+
+        tensor_read = torch.zeros(numel, dtype=dtype)
+        results = client.batch_read([0], [tensor_read])
+        assert results == [bytes_per_page], f"Read should succeed, got {results}"
+        assert torch.equal(tensor_write, tensor_read), (
+            "Read tensor should match written tensor"
+        )
+        client.close()
+
+    def test_batch_read_empty_file_returns_error(self, tmp_path):
+        """Reading out-of-bounds offset should return -1."""
+        bytes_per_page = 128
+        size = bytes_per_page * 4
+        path = str(tmp_path / "empty_read")
+        client = MockHf3fsClient(
+            path=path, size=size, bytes_per_page=bytes_per_page, entries=4
+        )
+        numel = bytes_per_page // 4
+        tensor_read = torch.zeros(numel, dtype=torch.float32)
+        results = client.batch_read([size], [tensor_read])  # offset == size => OOB
+        assert results[0] == -1, "Out-of-bounds read should return -1"
+        client.close()
+
+    def test_batch_write_out_of_bounds_returns_error(self, tmp_path):
+        """Writing at an offset beyond file size should return -1."""
+        bytes_per_page = 128
+        size = bytes_per_page * 4
+        path = str(tmp_path / "oob_write")
+        client = MockHf3fsClient(
+            path=path, size=size, bytes_per_page=bytes_per_page, entries=4
+        )
+        numel = bytes_per_page // 4
+        tensor = torch.ones(numel, dtype=torch.float32)
+        event = _make_cuda_event()
+        results = client.batch_write([size], [tensor], event)  # OOB offset
+        assert results[0] == -1, "Out-of-bounds write should return -1"
+        client.close()
+
+    def test_multiple_tensors_rw(self, tmp_path):
+        """Write multiple tensors at different offsets, then read all back."""
+        bytes_per_page = 128
+        n = 4
+        path = str(tmp_path / "multi_rw")
+        client = MockHf3fsClient(
+            path=path,
+            size=bytes_per_page * n * 2,
+            bytes_per_page=bytes_per_page,
+            entries=8,
+        )
+        tensors_write = [
+            torch.full((bytes_per_page // 4,), float(i), dtype=torch.float32)
+            for i in range(n)
+        ]
+        offsets = [i * bytes_per_page for i in range(n)]
+        event = _make_cuda_event()
+
+        results = client.batch_write(offsets, tensors_write, event)
+        assert all(r == bytes_per_page for r in results)
+
+        tensors_read = [
+            torch.zeros(bytes_per_page // 4, dtype=torch.float32) for _ in range(n)
+        ]
+        results = client.batch_read(offsets, tensors_read)
+        assert all(r == bytes_per_page for r in results)
+
+        for i, (tw, tr) in enumerate(zip(tensors_write, tensors_read)):
+            assert torch.allclose(tw, tr), f"Tensor {i} mismatch after round-trip"
+        client.close()
+
+    def test_flush_and_close_no_error(self, tmp_path):
+        """flush() and close() should not raise exceptions."""
+        path = str(tmp_path / "flush_close")
+        client = MockHf3fsClient(path=path, size=1024, bytes_per_page=128, entries=4)
+        client.flush()
+        client.close()
+
+
+# ===========================================================================
+# TestHF3FSKVConnectorStats
+# ===========================================================================
+
+
+class TestHF3FSKVConnectorStats:
+    """Tests for HF3FSKVConnectorStats metric collection and aggregation."""
+
+    def test_initial_is_empty(self, hf3fs_stats):
+        """Fresh stats object should report is_empty() == True."""
+        assert hf3fs_stats.is_empty() is True
+
+    @pytest.mark.parametrize(
+        "task_type, duration_key",
+        [
+            ("Saved", "save_duration"),
+            ("Loaded", "load_duration"),
+        ],
+        ids=["save", "load"],
+    )
+    def test_record_success_duration(self, hf3fs_stats, task_type, duration_key):
+        """Recording a successful task should update duration list and total count."""
+        hf3fs_stats.record_success_task_duration(task_type, 0.5)
+        assert not hf3fs_stats.is_empty()
+        assert len(hf3fs_stats.data[duration_key]) == 1
+        assert hf3fs_stats.data[duration_key][0] == pytest.approx(0.5)
+        assert hf3fs_stats.data["num_transfer_task"] == 1
+
+    @pytest.mark.parametrize(
+        "task_type, failed_key",
+        [
+            ("Saved", "num_failed_save"),
+            ("Loaded", "num_failed_load"),
+        ],
+        ids=["save", "load"],
+    )
+    def test_record_failed_task(self, hf3fs_stats, task_type, failed_key):
+        """Recording a failed task should increment the corresponding counter."""
+        hf3fs_stats.record_failed_task_count(task_type)
+        assert hf3fs_stats.data[failed_key] == 1
+        assert hf3fs_stats.data["num_transfer_task"] == 1
+
+    def test_aggregate_two_stats(self):
+        """aggregate() should merge save/load duration lists and sum counters."""
+        stats1 = HF3FSKVConnectorStats()
+        stats1.record_success_task_duration("Saved", 0.1)
+        stats1.record_success_task_duration("Loaded", 0.2)
+
+        stats2 = HF3FSKVConnectorStats()
+        stats2.record_success_task_duration("Saved", 0.3)
+        stats2.record_failed_task_count("Loaded")
+
+        stats1.aggregate(stats2)
+        assert stats1.data["save_duration"] == pytest.approx([0.1, 0.3])
+        assert stats1.data["load_duration"] == pytest.approx([0.2])
+        assert stats1.data["num_failed_load"] == 1
+        assert stats1.data["num_transfer_task"] == 4
+
+    def test_reduce_with_data(self):
+        """reduce() computes correct averages when data is present."""
+        stats = HF3FSKVConnectorStats()
+        stats.record_success_task_duration("Saved", 1.0)
+        stats.record_success_task_duration("Saved", 3.0)
+        result = stats.reduce()
+        assert result["Num save task success"] == pytest.approx(2.0, rel=0.01)
+        assert result["Num save task failed"] == pytest.approx(0.0, rel=0.01)
+        assert result["Avg save duration (ms)"] == pytest.approx(2000.0, rel=0.01)
+
+    def test_clone_and_reset(self, hf3fs_stats):
+        """clone_and_reset() returns a copy with data and resets the original."""
+        hf3fs_stats.record_success_task_duration("Saved", 0.7)
+        hf3fs_stats.record_success_task_duration("Loaded", 0.4)
+
+        clone = hf3fs_stats.clone_and_reset()
+        assert clone.data["num_transfer_task"] == 2
+        assert hf3fs_stats.is_empty()
diff --git a/tests/v1/kv_connector/unit/test_hf3fs_metadata_server.py b/tests/v1/kv_connector/unit/test_hf3fs_metadata_server.py
new file mode 100644
index 000000000000..f922c7c85586
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_hf3fs_metadata_server.py
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tests for HF3FS metadata server data structures and allocation logic:
+  - RankFileMetadata : page allocation / release primitives
+  - KeyMetadata      : per-key rank-page tracking and completion detection
+  - GlobalMetadataState : coordinated allocation with cache-hit semantics
+"""
+
+import pytest
+
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_metadata_server import (
+    GlobalMetadataState,
+    KeyMetadata,
+    RankFileMetadata,
+)
+
+# ===========================================================================
+# TestRankFileMetadata
+# ===========================================================================
+
+
+class TestRankFileMetadata:
+    """Unit tests for RankFileMetadata page allocation primitives."""
+
+    @pytest.mark.parametrize(
+        "alloc_count, expected_pages",
+        [(3, 3), (5, 0)],
+        ids=["alloc_partial", "alloc_exceeds"],
+    )
+    def test_allocate_pages(self, alloc_count, expected_pages):
+        """allocate_pages returns correct pages or empty list when insufficient."""
+        rank_meta = RankFileMetadata(rank_id=0, num_pages=3, free_pages=list(range(3)))
+        pages = rank_meta.allocate_pages(alloc_count)
+        assert len(pages) == expected_pages
+        if expected_pages > 0:
+            rank_meta.release_pages(pages)
+            assert rank_meta.get_free_page_count() == 3
+
+    def test_release_pages_restores_count(self):
+        """Releasing allocated pages returns them to the free pool."""
+        rank_meta = RankFileMetadata(rank_id=0, num_pages=4, free_pages=list(range(4)))
+        pages = rank_meta.allocate_pages(2)
+        assert rank_meta.get_free_page_count() == 2
+        rank_meta.release_pages(pages)
+        assert rank_meta.get_free_page_count() == 4
+
+    def test_release_pages_no_duplicates(self):
+        """Releasing the same page twice must not create duplicates."""
+        rank_meta = RankFileMetadata(rank_id=0, num_pages=3, free_pages=list(range(3)))
+        rank_meta.allocate_pages(1)  # takes page 0
+        rank_meta.release_pages([0])
+        rank_meta.release_pages([0])  # second release of the same page
+        assert rank_meta.get_free_page_count() == 3
+
+
+# ===========================================================================
+# TestKeyMetadata
+# ===========================================================================
+
+
+class TestKeyMetadata:
+    """Unit tests for KeyMetadata completion tracking."""
+
+    def test_is_complete_false_until_all_ranks(self):
+        """is_complete() returns True only when all ranks confirmed."""
+        key_meta = KeyMetadata(key="k", rank_to_page={}, tp_world_size=2)
+        assert key_meta.is_complete() is False
+        key_meta.add_rank_page(0, 5)
+        assert key_meta.is_complete() is False
+        key_meta.add_rank_page(1, 10)
+        assert key_meta.is_complete() is True
+
+    def test_get_rank_page_returns_none_for_missing_rank(self):
+        """get_rank_page() returns None when the rank has no entry."""
+        key_meta = KeyMetadata(key="k", rank_to_page={0: 3}, tp_world_size=2)
+        assert key_meta.get_rank_page(0) == 3
+        assert key_meta.get_rank_page(1) is None
+
+    def test_get_all_pages(self):
+        """get_all_pages() returns all (rank, page) pairs."""
+        key_meta = KeyMetadata(key="k", rank_to_page={0: 1, 1: 2}, tp_world_size=2)
+        pairs = key_meta.get_all_pages()
+        assert set(pairs) == {(0, 1), (1, 2)}
+
+
+# ===========================================================================
+# TestGlobalMetadataStateAllocation
+# ===========================================================================
+
+
+class TestGlobalMetadataStateAllocation:
+    """Tests for GlobalMetadataState allocation and cache-hit semantics."""
+
+    def test_uninitialized_rank_raises_on_allocate(self):
+        """allocate_pages_for_keys raises ValueError for unknown rank."""
+        state = GlobalMetadataState()
+        with pytest.raises((ValueError, Exception)):
+            state.allocate_pages_for_keys(99, [("key", "")])
+
+    def test_uninitialized_rank_raises_on_get_locations(self):
+        """get_key_locations raises ValueError for unknown rank."""
+        state = GlobalMetadataState()
+        with pytest.raises((ValueError, Exception)):
+            state.get_key_locations(99, ["any_key"])
+
+    def test_basic_allocation_and_confirm(self):
+        """Allocating a page and confirming it marks the key as complete."""
+        state = GlobalMetadataState()
+        state.initialize_rank(0, 4)
+
+        results = state.allocate_pages_for_keys(0, [("K", "")])
+        assert results["K"] >= 0
+
+        state.confirm_write_for_keys(0, [("K", results["K"])])
+        assert state.batch_key_exists(["K"]) == [True]
+        locations = state.get_key_locations(0, ["K"])
+        assert locations == [results["K"]]
+
+    def test_allocate_pages_cache_hit_does_not_leak_pages(self):
+        """Cache-hit key must not consume a page from the free pool;
+        the pre-allocated slot must be returned before reusing the existing page.
+        """
+        state = GlobalMetadataState()
+        state.initialize_rank(0, 5)  # 5 free pages: [0,1,2,3,4]
+
+        # Simulate a key that has already been fully written and confirmed.
+        state.key_metadata["K_cached"] = KeyMetadata(
+            key="K_cached", rank_to_page={0: 2}, tp_world_size=1
+        )
+
+        free_before = state.rank_metadata[0].get_free_page_count()  # 5
+
+        results = state.allocate_pages_for_keys(0, [("K_cached", ""), ("K_new", "")])
+
+        free_after = state.rank_metadata[0].get_free_page_count()
+
+        # Cache-hit key must reuse its existing page.
+        assert results["K_cached"] == 2, (
+            f"Cache-hit key should reuse page 2, got {results['K_cached']}"
+        )
+        # New key must receive a valid page.
+        assert results["K_new"] >= 0, (
+            f"New key should get a valid page, got {results['K_new']}"
+        )
+        # Exactly one page consumed from the free pool.
+        assert free_before - free_after == 1, (
+            f"Expected 1 page consumed, got delta={free_before - free_after}"
+        )
+
+    def test_allocate_pages_all_cache_hits_frees_all_slots(self):
+        """When every key in the batch is a cache hit, no pages are consumed."""
+        state = GlobalMetadataState()
+        state.initialize_rank(0, 5)
+
+        for key, page in (("K1", 0), ("K2", 1)):
+            state.key_metadata[key] = KeyMetadata(
+                key=key, rank_to_page={0: page}, tp_world_size=1
+            )
+
+        free_before = state.rank_metadata[0].get_free_page_count()
+        results = state.allocate_pages_for_keys(0, [("K1", ""), ("K2", "")])
+        free_after = state.rank_metadata[0].get_free_page_count()
+
+        assert results["K1"] == 0
+        assert results["K2"] == 1
+        assert free_after == free_before, (
+            f"All-cache-hit batch must not consume free pages; "
+            f"before={free_before}, after={free_after}"
+        )
+
+    def test_allocate_returns_minus_one_when_pool_exhausted(self):
+        """If the free pool is exhausted, all new keys receive -1."""
+        state = GlobalMetadataState()
+        state.initialize_rank(0, 1)  # only 1 free page
+
+        results = state.allocate_pages_for_keys(0, [("K1", ""), ("K2", "")])
+        # allocate_pages uses all-or-nothing: 2 needed but only 1 available → []
+        assert all(v == -1 for v in results.values()), f"Expected all -1, got {results}"
+
+    def test_confirm_write_releases_pages(self):
+        """confirm_write_for_keys with pages_to_release returns them to pool."""
+        state = GlobalMetadataState()
+        state.initialize_rank(0, 3)
+
+        results = state.allocate_pages_for_keys(0, [("K", "")])
+        page = results["K"]
+        free_after_alloc = state.rank_metadata[0].get_free_page_count()
+
+        state.confirm_write_for_keys(0, [("K", page)], pages_to_release=[page])
+        free_after_release = state.rank_metadata[0].get_free_page_count()
+
+        assert free_after_release == free_after_alloc + 1
diff --git a/tests/v1/kv_connector/unit/test_kv_connector_lifecycle.py b/tests/v1/kv_connector/unit/test_kv_connector_lifecycle.py
index 4ba6b2201d0e..b6b18b092b9b 100644
--- a/tests/v1/kv_connector/unit/test_kv_connector_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_kv_connector_lifecycle.py
@@ -1,14 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from unittest.mock import MagicMock, patch
+
 from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import (  # noqa: E501
     ExampleConnectorMetadata,
 )
 from vllm.distributed.kv_transfer.kv_transfer_state import (
     ensure_kv_transfer_initialized,
+    ensure_kv_transfer_shutdown,
     get_kv_transfer_group,
 )
 from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput
+from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.worker.kv_connector_model_runner_mixin import KVConnectorModelRunnerMixin
 
 # Importing utils registers TestExampleConnector with the factory
@@ -31,13 +35,26 @@ def _make_empty_scheduler_output():
 
 
 def test_kv_connector_mixin_clears_metadata():
-    vllm_config = create_vllm_config()
-    vllm_config.kv_transfer_config.kv_connector = "TestExampleConnector"
-    vllm_config.kv_transfer_config.kv_role = "kv_both"
-    vllm_config.kv_transfer_config.kv_connector_extra_config["name"] = "unit"
+    vllm_config = create_vllm_config(
+        kv_connector="TestExampleConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config={"name": "unit"},
+    )
+
+    kv_cache_config = KVCacheConfig(
+        num_blocks=0, kv_cache_tensors=[], kv_cache_groups=[]
+    )
+    # Initialize the global connector instance.
+    # kv_transfer init now syncs engine_id across TP, so unit tests need
+    # a minimal mocked TP group.
+    mock_tp_group = MagicMock()
+    mock_tp_group.broadcast_object.side_effect = lambda value, src=0: value
 
-    # Initialize the global connector instance
-    ensure_kv_transfer_initialized(vllm_config)
+    with patch(
+        "vllm.distributed.parallel_state.get_tp_group",
+        return_value=mock_tp_group,
+    ):
+        ensure_kv_transfer_initialized(vllm_config, kv_cache_config)
 
     try:
         # Minimal scheduler output with empty metadata; mixin should still
@@ -57,4 +74,4 @@ def test_kv_connector_mixin_clears_metadata():
         assert connector.call_record.get("clear_connector_metadata", 0) == 1
     finally:
         # Ensure we clean up the global connector between tests
-        KVConnectorModelRunnerMixin.ensure_kv_transfer_shutdown()
+        ensure_kv_transfer_shutdown()
diff --git a/tests/v1/kv_connector/unit/test_mooncake_connector.py b/tests/v1/kv_connector/unit/test_mooncake_connector.py
index f21f8ecdc5c2..44292d94e147 100644
--- a/tests/v1/kv_connector/unit/test_mooncake_connector.py
+++ b/tests/v1/kv_connector/unit/test_mooncake_connector.py
@@ -26,11 +26,16 @@
 )
 from vllm.utils.network_utils import get_open_port
 from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
+from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.request import RequestStatus
 
 from .utils import create_request, create_scheduler, create_vllm_config
 
 
+def _make_test_kv_cache_config() -> KVCacheConfig:
+    return KVCacheConfig(num_blocks=0, kv_cache_tensors=[], kv_cache_groups=[])
+
+
 class FakeMooncakeWrapper:
     """Mock Mooncake TransferEngine for unit testing environments."""
 
@@ -91,8 +96,10 @@ def test_basic_interface():
     assert request_id in kv_connector_metadata.reqs_to_recv["my-engine-id"]
     req_meta = kv_connector_metadata.reqs_to_recv["my-engine-id"][request_id]
 
+    # local_block_ids is list[list[int]] (per-group); flatten for comparison.
+    all_block_ids = [bid for group in req_meta.local_block_ids for bid in group]
     for block_id, block in zip(
-        req_meta.local_block_ids,
+        all_block_ids,
         scheduler.kv_cache_manager.coordinator.single_type_managers[0].req_to_blocks[
             request_id
         ],
@@ -228,15 +235,15 @@ def test_scheduler_request_finished():
 
     # Case: Capped length (Successful prefill, need to send to decoder)
     request.status = RequestStatus.FINISHED_LENGTH_CAPPED
-    delay_free, _ = scheduler_connector.request_finished(request, block_ids=[10, 11])
+    delay_free, _ = scheduler_connector.request_finished(request, block_ids=([10, 11],))
     assert delay_free is True
     assert "id-1" in scheduler_connector._reqs_need_send
-    assert scheduler_connector._reqs_need_send["id-1"][1] == [10, 11]
+    assert scheduler_connector._reqs_need_send["id-1"][1] == [[10, 11]]
 
     # Case: Aborted (No need to transfer, free blocks immediately)
     scheduler_connector._reqs_need_send.clear()
     request.status = RequestStatus.FINISHED_ABORTED
-    delay_free, _ = scheduler_connector.request_finished(request, block_ids=[12])
+    delay_free, _ = scheduler_connector.request_finished(request, block_ids=([12],))
     assert delay_free is False
     assert len(scheduler_connector._reqs_need_send) == 0
     assert "id-1" in scheduler_connector._reqs_not_processed
@@ -319,7 +326,11 @@ async def test_kv_producer(monkeypatch):
     )
 
     with set_current_vllm_config(vllm_config), patch_worker_dependencies():
-        prefill_connector = MooncakeConnector(vllm_config, KVConnectorRole.WORKER)
+        prefill_connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
         prefill_worker = prefill_connector.connector_worker
         prefill_worker.kv_caches_base_addr = [0x1000]
         block_len = 4096
@@ -334,7 +345,7 @@ async def test_kv_producer(monkeypatch):
         send_meta = SendBlockMeta(
             p_req_id="p-req-1",
             transfer_id=transfer_id,
-            local_block_ids=[10, 11],
+            local_block_ids=[[10, 11]],
             ready=asyncio.Event(),
         )
         prefill_worker.reqs_need_send[transfer_id] = send_meta
@@ -346,7 +357,7 @@ async def test_kv_producer(monkeypatch):
             remote_port=54321,
             remote_tp_size=1,
             remote_tp_rank=0,
-            req_blocks={"d-req-1": (transfer_id, [20, 21])},
+            req_blocks={"d-req-1": (transfer_id, [[20, 21]])},
             kv_caches_base_addr=[0x2000],
             block_lens=[block_len],
         )
@@ -389,7 +400,7 @@ async def test_kv_producer(monkeypatch):
             prefill_worker.reqs_need_send[transfer_id] = send_meta
             send_meta.sent = 0
             send_meta.ready.set()
-            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [20])
+            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [[20]])
             # Worker processes the consumer's request
             await prefill_worker.send_kv_to_decode(identity, mock_socket, xfer_meta)
             # Verify transfer parameters are correct: 11 to 20
@@ -407,7 +418,7 @@ async def test_kv_producer(monkeypatch):
             prefill_worker.reqs_need_send[transfer_id] = send_meta
             send_meta.sent = 0
             send_meta.ready.set()
-            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [20, 21, 22])
+            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [[20, 21, 22]])
             # Worker processes the consumer's request
             await prefill_worker.send_kv_to_decode(identity, mock_socket, xfer_meta)
             # This should not be called because error.
@@ -424,7 +435,7 @@ async def test_kv_producer(monkeypatch):
             prefill_worker.reqs_need_send[transfer_id] = send_meta
             send_meta.sent = 0
             send_meta.ready.clear()
-            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [20, 21])
+            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [[20, 21]])
             # Worker processes the consumer's request
             await prefill_worker.send_kv_to_decode(identity, mock_socket, xfer_meta)
             # This should not be called because timeout.
@@ -443,7 +454,7 @@ async def test_kv_producer(monkeypatch):
             prefill_worker.reqs_need_send[transfer_id] = send_meta
             send_meta.sent = 0
             send_meta.ready.set()
-            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [20, 21])
+            xfer_meta.req_blocks["d-req-1"] = (transfer_id, [[20, 21]])
             # Worker processes the consumer's request
             await prefill_worker.send_kv_to_decode(identity, mock_socket, xfer_meta)
             mock_send_blocks.assert_called_once()
@@ -471,7 +482,11 @@ async def test_kv_consumuer(monkeypatch):
     )
 
     with set_current_vllm_config(vllm_config), patch_worker_dependencies() as mocks:
-        decode_connector = MooncakeConnector(vllm_config, KVConnectorRole.WORKER)
+        decode_connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
         decode_worker = decode_connector.connector_worker
         decode_worker.kv_caches_base_addr = [0x1000]
         decode_worker.rpc_port = 54321
@@ -481,7 +496,7 @@ async def test_kv_consumuer(monkeypatch):
             "d-req-1": PullReqMeta(
                 d_req_id="d-req-1",
                 transfer_id="xfer-req-1",
-                local_block_ids=[100, 101],
+                local_block_ids=[[100, 101]],
                 remote_engine_id="p-engine",
                 remote_bootstrap_addr="http://bootstrap:33333",
                 pull_tasks_count=1,
@@ -514,7 +529,7 @@ async def test_kv_consumuer(monkeypatch):
 
         assert sent_meta.remote_hostname == "127.0.0.1"
         assert sent_meta.remote_port == 54321
-        assert sent_meta.req_blocks["d-req-1"] == ("xfer-req-1", [100, 101])
+        assert sent_meta.req_blocks["d-req-1"] == ("xfer-req-1", [[100, 101]])
 
         # Verify internal state is updated correctly.
         assert "d-req-1" in decode_worker.finished_recving_reqs
@@ -531,14 +546,18 @@ async def test_worker_get_finished_timeout(monkeypatch):
         kv_connector="MooncakeConnector", kv_role="kv_producer"
     )
     with set_current_vllm_config(vllm_config), patch_worker_dependencies():
-        prefill_connector = MooncakeConnector(vllm_config, KVConnectorRole.WORKER)
+        prefill_connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
         prefill_worker = prefill_connector.connector_worker
 
         # Add an expired request (expire_time is in the past).
         prefill_worker.reqs_need_send["tx-expired"] = SendBlockMeta(
             p_req_id="p-req-expired",
             transfer_id="tx-expired",
-            local_block_ids=[1, 2],
+            local_block_ids=[[1, 2]],
             ready=MagicMock(),
             expire_time=time.perf_counter() - 100,
         )
@@ -547,7 +566,7 @@ async def test_worker_get_finished_timeout(monkeypatch):
         prefill_worker.reqs_need_send["tx-active"] = SendBlockMeta(
             p_req_id="p-req-active",
             transfer_id="tx-active",
-            local_block_ids=[3, 4],
+            local_block_ids=[[3, 4]],
             ready=MagicMock(),
             expire_time=time.perf_counter() + 100,
         )
@@ -577,7 +596,11 @@ def test_register_kv_caches():
             "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector.threading.Thread"
         ) as mock_thread,
     ):
-        connector = MooncakeConnector(vllm_config, KVConnectorRole.WORKER)
+        connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
         worker = connector.connector_worker
         mock_thread.return_value.is_alive.return_value = False
 
@@ -609,6 +632,55 @@ def test_register_kv_caches():
                 assert bl == tensor1[0].nbytes // tensor1.shape[1]
 
 
+def test_register_kv_caches_supports_mixed_mla_and_eagle_shapes():
+    """Mixed MLA+Eagle caches should register by byte length, not shape."""
+
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector", kv_role="kv_consumer"
+    )
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch_worker_dependencies(),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector.threading.Event"
+        ),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector.threading.Thread"
+        ) as mock_thread,
+    ):
+        connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
+        worker = connector.connector_worker
+        mock_thread.return_value.is_alive.return_value = False
+
+        worker.use_mla = True
+        worker.transfer_topo.is_mla = True
+
+        # MLA cache tensor: shape[-2] is the block size.
+        mla_cache = torch.zeros((2, 16, 96), dtype=torch.float16)
+        # Eagle3/GQA-like cache tensor: shape[-2] is num_kv_heads, not block size.
+        eagle_cache = torch.zeros((2, 16, 8, 64), dtype=torch.float16)
+        kv_caches = {"mla_layer": mla_cache, "eagle_layer": eagle_cache}
+
+        with patch.object(
+            worker.engine, "batch_register_memory", return_value=0
+        ) as mock_batch_register:
+            connector.register_kv_caches(kv_caches)
+
+        mock_batch_register.assert_called_once()
+        registered_ptrs, registered_lens = mock_batch_register.call_args[0]
+        assert registered_ptrs == [mla_cache.data_ptr(), eagle_cache.data_ptr()]
+        assert registered_lens == [mla_cache.nbytes, eagle_cache.nbytes]
+        assert worker.block_len_per_layer == [
+            mla_cache.nbytes // mla_cache.shape[0],
+            eagle_cache.nbytes // eagle_cache.shape[0],
+        ]
+
+
 @pytest.mark.asyncio
 @patch(
     "vllm.distributed.kv_transfer.kv_connector.v1.mooncake."
@@ -641,15 +713,19 @@ async def test_kv_producer_heterogeneous_tp(monkeypatch, d_tp_size):
     )
 
     with set_current_vllm_config(vllm_config), patch_worker_dependencies():
-        prefill_connector = MooncakeConnector(vllm_config, KVConnectorRole.WORKER)
+        prefill_connector = MooncakeConnector(
+            vllm_config,
+            KVConnectorRole.WORKER,
+            _make_test_kv_cache_config(),
+        )
         prefill_worker = prefill_connector.connector_worker
 
         # Override TP rank/size to simulate P TP=2
         prefill_worker.tp_rank = P_TP_RANK
         prefill_worker.tp_size = P_TP_SIZE
-        # Update shared dict so kv_topo sees correct TP size
         prefill_worker._tp_size[prefill_worker.engine_id] = P_TP_SIZE
-        prefill_worker.kv_topo.tp_rank = P_TP_RANK
+        prefill_worker.transfer_topo.tp_rank = P_TP_RANK
+        prefill_worker.transfer_topo.tp_size = P_TP_SIZE
 
         prefill_worker.kv_caches_base_addr = [0x1000]
         prefill_worker.block_len_per_layer = [local_block_len]
@@ -658,7 +734,7 @@ async def test_kv_producer_heterogeneous_tp(monkeypatch, d_tp_size):
         prefill_worker.sender_loop = asyncio.get_event_loop()
 
         transfer_id = "xfer-hetero-1"
-        local_block_ids = [10, 11]
+        local_block_ids = [[10, 11]]
         send_meta = SendBlockMeta(
             p_req_id="p-req-h1",
             transfer_id=transfer_id,
@@ -669,15 +745,15 @@ async def test_kv_producer_heterogeneous_tp(monkeypatch, d_tp_size):
         send_meta.ready.set()
 
         # Compute target D ranks using the production code path
-        target_d_ranks = prefill_worker.kv_topo.get_target_remote_ranks(d_tp_size)
+        target_d_ranks = prefill_worker.transfer_topo.handshake_target_ranks(d_tp_size)
 
         mock_socket = AsyncMock(spec=zmq.asyncio.Socket)
         mock_socket.send_multipart = AsyncMock()
         identity = b"consumer-hetero"
 
-        # Assign different remote block IDs per D rank
+        # Assign different remote block IDs per D rank (nested per-group)
         d_rank_remote_blocks = {
-            rank: [20 + i * 10, 21 + i * 10] for i, rank in enumerate(target_d_ranks)
+            rank: [[20 + i * 10, 21 + i * 10]] for i, rank in enumerate(target_d_ranks)
         }
 
         with patch.object(
@@ -712,11 +788,15 @@ async def test_kv_producer_heterogeneous_tp(monkeypatch, d_tp_size):
                 dst_ptrs = call_args[2]
                 lengths = call_args[3]
 
+                # Flatten nested per-group block IDs for assertions
+                flat_local = [b for g in local_block_ids for b in g]
+                flat_remote = [b for g in remote_block_ids for b in g]
+
                 # Heterogeneous TP: blocks cannot be coalesced because
                 # local and remote block_lens differ
-                assert len(src_ptrs) == len(local_block_ids)
-                assert len(dst_ptrs) == len(local_block_ids)
-                assert len(lengths) == len(local_block_ids)
+                assert len(src_ptrs) == len(flat_local)
+                assert len(dst_ptrs) == len(flat_local)
+                assert len(lengths) == len(flat_local)
 
                 # Compute expected offsets based on TP ratio
                 if d_tp_size <= P_TP_SIZE:
@@ -730,9 +810,7 @@ async def test_kv_producer_heterogeneous_tp(monkeypatch, d_tp_size):
                     expected_dst_off = 0
                     expected_xfer_len = remote_block_len
 
-                for idx, (lblk, rblk) in enumerate(
-                    zip(local_block_ids, remote_block_ids)
-                ):
+                for idx, (lblk, rblk) in enumerate(zip(flat_local, flat_remote)):
                     assert src_ptrs[idx] == (
                         0x1000 + lblk * local_block_len + expected_src_off
                     )
diff --git a/tests/v1/kv_connector/unit/test_mooncake_connector_hma.py b/tests/v1/kv_connector/unit/test_mooncake_connector_hma.py
new file mode 100644
index 000000000000..8e25df7ca837
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_connector_hma.py
@@ -0,0 +1,420 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for MooncakeConnector HMA (Hybrid Memory Architecture) support.
+
+Covers sliding-window clipping, multi-group metadata shape, multi-group
+send trimming, and group-count invariant checking in _build_transfer_params.
+"""
+
+import asyncio
+from unittest.mock import patch
+
+import pytest
+
+from vllm.config import set_current_vllm_config
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector import (
+    KVConnectorRole,
+    MooncakeConnector,
+    MooncakeConnectorMetadata,
+    MooncakeConnectorScheduler,
+    MooncakeXferMetadata,
+    SendBlockMeta,
+    TransferRegion,
+)
+
+from .test_mooncake_connector import FakeMooncakeWrapper, patch_worker_dependencies
+from .utils import create_request, create_vllm_config, make_kv_cache_config
+
+
+# ---------------------------------------------------------------------------
+#  test_sw_sizes: blocks_per_sw computed from KVCacheConfig
+# ---------------------------------------------------------------------------
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "swa_enabled,expected_blocks_per_sw",
+    [
+        # SWA enabled: FullAttentionSpec (0) + SlidingWindowSpec (2048/16=128+1)
+        (True, [0, 128 + 1]),
+        # SWA disabled: only FullAttentionSpec (0)
+        (False, [0]),
+    ],
+)
+def test_sw_sizes(swa_enabled, expected_blocks_per_sw):
+    """blocks_per_sw is correctly computed based on SWA enabled/disabled."""
+    block_size = 16
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_both",
+        block_size=block_size,
+    )
+    # Override so HMA detection works
+    vllm_config.scheduler_config.disable_hybrid_kv_cache_manager = False
+    kv_cache_config = make_kv_cache_config(
+        block_size=block_size, swa_enabled=swa_enabled, sw_size=2048
+    )
+
+    scheduler = MooncakeConnectorScheduler(
+        vllm_config=vllm_config,
+        engine_id="test-engine",
+        kv_cache_config=kv_cache_config,
+    )
+    assert scheduler.blocks_per_sw == expected_blocks_per_sw
+
+
+# ---------------------------------------------------------------------------
+#  test_is_hma_required: derived from kv_cache_config groups
+# ---------------------------------------------------------------------------
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "swa_enabled,disable_hma,expected_is_hma",
+    [
+        (True, False, True),  # SWA group present, HMA enabled
+        (True, True, False),  # SWA group present, but HMA disabled
+        (False, False, False),  # FA only, HMA not needed
+    ],
+)
+def test_is_hma_required(swa_enabled, disable_hma, expected_is_hma):
+    """_is_hma_required is correctly derived from kv_cache_config."""
+    block_size = 16
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_both",
+        block_size=block_size,
+    )
+    vllm_config.scheduler_config.disable_hybrid_kv_cache_manager = disable_hma
+    kv_cache_config = make_kv_cache_config(
+        block_size=block_size, swa_enabled=swa_enabled
+    )
+
+    scheduler = MooncakeConnectorScheduler(
+        vllm_config=vllm_config,
+        engine_id="test-engine",
+        kv_cache_config=kv_cache_config,
+    )
+    assert scheduler._is_hma_required is expected_is_hma
+
+
+# ---------------------------------------------------------------------------
+#  test_get_sw_clipped_blocks: sliding-window clipping logic
+# ---------------------------------------------------------------------------
+@pytest.mark.cpu_test
+def test_get_sw_clipped_blocks():
+    """get_sw_clipped_blocks clips SWA group but keeps FA group intact."""
+    block_size = 16
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_both",
+        block_size=block_size,
+    )
+    vllm_config.scheduler_config.disable_hybrid_kv_cache_manager = False
+    # SW=128 tokens → 128/16 = 8 blocks + 1 = 9 blocks_per_sw
+    kv_cache_config = make_kv_cache_config(
+        block_size=block_size, swa_enabled=True, sw_size=128
+    )
+
+    scheduler = MooncakeConnectorScheduler(
+        vllm_config=vllm_config,
+        engine_id="test-engine",
+        kv_cache_config=kv_cache_config,
+    )
+    assert scheduler.blocks_per_sw == [0, 9]
+
+    # FA group: 20 blocks, SW group: 20 blocks (exceeds window)
+    fa_blocks = list(range(20))
+    sw_blocks = list(range(100, 120))
+    block_ids = (fa_blocks, sw_blocks)
+
+    clipped = scheduler.get_sw_clipped_blocks(block_ids)
+
+    # FA: untouched (blocks_per_sw[0] = 0)
+    assert clipped[0] == fa_blocks
+    # SW: clipped to last 9 blocks
+    assert clipped[1] == sw_blocks[-9:]
+    assert len(clipped[1]) == 9
+
+
+@pytest.mark.cpu_test
+def test_get_sw_clipped_blocks_noop_no_hma():
+    """get_sw_clipped_blocks is a no-op when HMA is not required."""
+    block_size = 16
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_both",
+        block_size=block_size,
+    )
+    # FA only → _is_hma_required = False
+    kv_cache_config = make_kv_cache_config(block_size=block_size, swa_enabled=False)
+
+    scheduler = MooncakeConnectorScheduler(
+        vllm_config=vllm_config,
+        engine_id="test-engine",
+        kv_cache_config=kv_cache_config,
+    )
+    assert scheduler._is_hma_required is False
+
+    block_ids = ([1, 2, 3],)
+    clipped = scheduler.get_sw_clipped_blocks(block_ids)
+    assert clipped == [[1, 2, 3]]
+
+
+# ---------------------------------------------------------------------------
+#  test_metadata_hma_block_ids: MooncakeConnectorMetadata stores per-group IDs
+# ---------------------------------------------------------------------------
+@pytest.mark.cpu_test
+def test_metadata_hma_block_ids():
+    """MooncakeConnectorMetadata.add_new_req stores per-group block IDs."""
+    metadata = MooncakeConnectorMetadata()
+
+    # FA group: 6 blocks, SW group: 3 blocks (clipped)
+    fa_blocks = [0, 1, 2, 3, 4, 5]
+    sw_blocks = [10, 11, 12]
+
+    # Test recv path
+    metadata.add_new_req(
+        request_id="recv-req",
+        local_block_ids=[fa_blocks, sw_blocks],
+        kv_transfer_params={
+            "transfer_id": "recv-req",
+            "remote_engine_id": "remote-engine",
+            "remote_bootstrap_addr": "http://bootstrap:33333",
+        },
+        load_remote_cache=True,
+    )
+
+    assert "recv-req" in metadata.reqs_to_recv["remote-engine"]
+    req_meta = metadata.reqs_to_recv["remote-engine"]["recv-req"]
+    assert len(req_meta.local_block_ids) == 2
+    assert req_meta.local_block_ids[0] == fa_blocks
+    assert req_meta.local_block_ids[1] == sw_blocks
+
+    # Test send path
+    metadata.add_new_req(
+        request_id="send-req",
+        local_block_ids=[fa_blocks, sw_blocks],
+        kv_transfer_params={
+            "transfer_id": "send-req",
+        },
+        load_remote_cache=False,
+    )
+
+    assert "send-req" in metadata.reqs_to_send
+    transfer_id, stored_blocks = metadata.reqs_to_send["send-req"]
+    assert transfer_id == "send-req"
+    assert len(stored_blocks) == 2
+    assert stored_blocks[0] == fa_blocks
+    assert stored_blocks[1] == sw_blocks
+
+
+# ---------------------------------------------------------------------------
+#  test_build_transfer_params_multi_group_trimming
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+    ".mooncake_connector.TransferEngine",
+    FakeMooncakeWrapper,
+)
+async def test_build_transfer_params_multi_group_trimming(monkeypatch):
+    """_build_transfer_params trims per-group blocks when local > remote."""
+
+    monkeypatch.setenv("VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT", "5")
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector", kv_role="kv_producer"
+    )
+    kv_cache_config = make_kv_cache_config(
+        block_size=vllm_config.cache_config.block_size, swa_enabled=True
+    )
+
+    with set_current_vllm_config(vllm_config), patch_worker_dependencies():
+        connector = MooncakeConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+        worker = connector.connector_worker
+
+        block_len = 4096
+        # Call _build_transfer_params directly (avoids send_kv_to_decode
+        # async event loop complexity).
+        transfer_id = "xfer-hma-trim"
+        send_meta = SendBlockMeta(
+            p_req_id="p-trim",
+            transfer_id=transfer_id,
+            # FA: 4 blocks, SW: 3 blocks (producer has more)
+            local_block_ids=[[10, 11, 12, 13], [20, 21, 22]],
+            ready=asyncio.Event(),
+        )
+
+        xfer_meta = MooncakeXferMetadata(
+            remote_hostname="consumer-host",
+            remote_port=54321,
+            remote_tp_size=1,
+            remote_tp_rank=0,
+            req_blocks={
+                "d-trim": (
+                    transfer_id,
+                    # FA: 2 blocks, SW: 2 blocks (consumer needs fewer)
+                    [[30, 31], [40, 41]],
+                )
+            },
+            kv_caches_base_addr=[0x2000],
+            block_lens=[block_len],
+        )
+
+        local_regions = [
+            TransferRegion(
+                base_addr=0x1000, block_len=block_len, kv_block_len=block_len
+            ),
+        ]
+        remote_regions = [
+            TransferRegion(
+                base_addr=0x2000, block_len=block_len, kv_block_len=block_len
+            ),
+        ]
+
+        ready_reqs = [("d-trim", send_meta)]
+        (
+            src_ptrs,
+            dst_ptrs,
+            lengths,
+            err_reqs,
+            err_msg,
+        ) = await worker._build_transfer_params(
+            ready_reqs, xfer_meta, local_regions, remote_regions
+        )
+
+        # No errors
+        assert err_reqs == []
+        assert err_msg is None
+        # After trimming: FA [10..13] → last 2 → [12,13]; SW [20..22] → last 2 → [21,22]
+        # Flattened: [12,13,21,22] = 4 blocks → coalesced into some transfers
+        assert len(src_ptrs) > 0
+        assert len(dst_ptrs) == len(src_ptrs)
+        assert len(lengths) == len(src_ptrs)
+
+        worker.shutdown()
+
+
+# ---------------------------------------------------------------------------
+#  test_build_transfer_params_group_count_mismatch
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+    ".mooncake_connector.TransferEngine",
+    FakeMooncakeWrapper,
+)
+async def test_build_transfer_params_group_count_mismatch(monkeypatch):
+    """_build_transfer_params reports an error when group counts differ."""
+
+    monkeypatch.setenv("VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT", "5")
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector", kv_role="kv_producer"
+    )
+    kv_cache_config = make_kv_cache_config(
+        block_size=vllm_config.cache_config.block_size, swa_enabled=True
+    )
+
+    with set_current_vllm_config(vllm_config), patch_worker_dependencies():
+        connector = MooncakeConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+        worker = connector.connector_worker
+
+        block_len = 4096
+        transfer_id = "xfer-mismatch"
+        send_meta = SendBlockMeta(
+            p_req_id="p-mismatch",
+            transfer_id=transfer_id,
+            # Producer has 2 groups
+            local_block_ids=[[10, 11], [20, 21]],
+            ready=asyncio.Event(),
+        )
+
+        # Consumer has only 1 group — group count mismatch
+        xfer_meta = MooncakeXferMetadata(
+            remote_hostname="consumer-host",
+            remote_port=54321,
+            remote_tp_size=1,
+            remote_tp_rank=0,
+            req_blocks={
+                "d-mismatch": (transfer_id, [[30, 31]]),
+            },
+            kv_caches_base_addr=[0x2000],
+            block_lens=[block_len],
+        )
+
+        local_regions = [
+            TransferRegion(
+                base_addr=0x1000, block_len=block_len, kv_block_len=block_len
+            ),
+        ]
+        remote_regions = [
+            TransferRegion(
+                base_addr=0x2000, block_len=block_len, kv_block_len=block_len
+            ),
+        ]
+
+        ready_reqs = [("d-mismatch", send_meta)]
+        (
+            src_ptrs,
+            dst_ptrs,
+            lengths,
+            err_reqs,
+            err_msg,
+        ) = await worker._build_transfer_params(
+            ready_reqs, xfer_meta, local_regions, remote_regions
+        )
+
+        # Mismatched req is reported via err_reqs/err_msg with no transfers built.
+        assert err_reqs == ["d-mismatch"]
+        assert err_msg == "KV group count mismatch"
+        assert src_ptrs == []
+        assert dst_ptrs == []
+        assert lengths == []
+
+        worker.shutdown()
+
+
+# ---------------------------------------------------------------------------
+#  test_request_finished_with_hma_groups
+# ---------------------------------------------------------------------------
+@pytest.mark.cpu_test
+def test_request_finished_with_hma_groups():
+    """request_finished correctly handles per-group block_ids."""
+    block_size = 16
+    vllm_config = create_vllm_config(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_producer",
+        block_size=block_size,
+    )
+    vllm_config.scheduler_config.disable_hybrid_kv_cache_manager = False
+    kv_cache_config = make_kv_cache_config(
+        block_size=block_size, swa_enabled=True, sw_size=128
+    )
+
+    scheduler = MooncakeConnectorScheduler(
+        vllm_config=vllm_config,
+        engine_id="test-engine",
+        kv_cache_config=kv_cache_config,
+    )
+
+    request = create_request(request_id=1, do_remote_decode=True)
+    request.kv_transfer_params["transfer_id"] = request.request_id
+
+    from vllm.v1.request import RequestStatus
+
+    request.status = RequestStatus.FINISHED_LENGTH_CAPPED
+
+    # 2 groups: FA with 10 blocks, SW with 20 blocks (will be clipped)
+    fa_blocks = list(range(10))
+    sw_blocks = list(range(100, 120))
+    block_ids = (fa_blocks, sw_blocks)
+
+    delay_free, _ = scheduler.request_finished(request, block_ids)
+    assert delay_free is True
+    assert request.request_id in scheduler._reqs_need_send
+
+    _, stored_blocks = scheduler._reqs_need_send[request.request_id]
+    # FA: untouched
+    assert stored_blocks[0] == fa_blocks
+    # SW: clipped to last 9 blocks (sw_size=128, block_size=16 → 8+1=9)
+    assert stored_blocks[1] == sw_blocks[-9:]
diff --git a/tests/v1/kv_connector/unit/test_mooncake_stats.py b/tests/v1/kv_connector/unit/test_mooncake_stats.py
new file mode 100644
index 000000000000..a20fcb505330
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_stats.py
@@ -0,0 +1,281 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import threading
+from unittest.mock import MagicMock
+
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector import (
+    MooncakeConnector,
+    MooncakeConnectorWorker,
+    SendBlockMeta,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.stats import (
+    MooncakeKVConnectorStats,
+)
+
+
+def test_is_empty_on_fresh_stats():
+    stats = MooncakeKVConnectorStats()
+    assert stats.is_empty()
+    assert stats.num_successful_transfers == 0
+
+
+def test_record_transfer_and_reduce():
+    stats = MooncakeKVConnectorStats()
+    # 1 MB transfer in 1 ms -> 1000 MB/s throughput
+    stats.record_transfer(duration_s=0.001, total_bytes=1 * 2**20, num_descs=4)
+    # 2 MB transfer in 2 ms
+    stats.record_transfer(duration_s=0.002, total_bytes=2 * 2**20, num_descs=6)
+    assert not stats.is_empty()
+    assert stats.num_successful_transfers == 2
+
+    reduced = stats.reduce()
+    assert reduced["Num successful transfers"] == 2
+    # avg = (1 + 2) / 2 = 1.5 ms
+    assert reduced["Avg xfer time (ms)"] == 1.5
+    assert reduced["Avg MB per transfer"] == 1.5
+    # 3 MB total / 3 ms total = 1000 MB/s
+    assert reduced["Throughput (MB/s)"] == 1000.0
+    assert reduced["Avg number of descriptors"] == 5.0
+    assert reduced["Num failed transfers"] == 0
+    assert reduced["Num failed recvs"] == 0
+    assert reduced["Num KV expired reqs"] == 0
+
+
+def test_record_failures_keeps_stats_non_empty():
+    stats = MooncakeKVConnectorStats()
+    stats.record_failed_transfer()
+    stats.record_failed_recv()
+    stats.record_kv_expired_req()
+    assert not stats.is_empty()
+
+    reduced = stats.reduce()
+    # No successful transfers -> latency/throughput all zero, but failure
+    # counters still surface.
+    assert reduced["Num successful transfers"] == 0
+    assert reduced["Num failed transfers"] == 1
+    assert reduced["Num failed recvs"] == 1
+    assert reduced["Num KV expired reqs"] == 1
+
+
+def test_aggregate_sums_observations():
+    a = MooncakeKVConnectorStats()
+    b = MooncakeKVConnectorStats()
+    a.record_transfer(duration_s=0.001, total_bytes=1 * 2**20, num_descs=1)
+    b.record_transfer(duration_s=0.002, total_bytes=2 * 2**20, num_descs=2)
+    b.record_failed_transfer()
+
+    a.aggregate(b)
+
+    assert a.num_successful_transfers == 2
+    reduced = a.reduce()
+    assert reduced["Num successful transfers"] == 2
+    assert reduced["Num failed transfers"] == 1
+
+
+def test_aggregate_with_empty_other_is_noop():
+    a = MooncakeKVConnectorStats()
+    a.record_transfer(duration_s=0.001, total_bytes=1, num_descs=1)
+    b = MooncakeKVConnectorStats()
+
+    a.aggregate(b)
+
+    assert a.num_successful_transfers == 1
+
+
+def test_getstate_drops_lock_and_setstate_recreates_it():
+    # KVConnectorStats subclasses must be picklable (worker→scheduler IPC),
+    # but threading.Lock isn't — so __getstate__ strips it and __setstate__
+    # rebuilds a fresh per-process lock.
+    original = MooncakeKVConnectorStats()
+    original.record_transfer(duration_s=0.01, total_bytes=2048, num_descs=3)
+
+    state = original.__getstate__()
+    assert "_lock" not in state
+
+    rebuilt = MooncakeKVConnectorStats.__new__(MooncakeKVConnectorStats)
+    rebuilt.__setstate__(state)
+    assert rebuilt.data == original.data
+    # Lock works on the receiver side.
+    rebuilt.record_transfer(duration_s=0.02, total_bytes=4096, num_descs=5)
+    assert rebuilt.num_successful_transfers == 2
+
+
+def test_concurrent_writers_keep_row_lengths_aligned():
+    # Multiple writers + a snapshot reader must never produce a snapshot
+    # with mismatched column lengths — reduce()'s
+    # len(descs) == num_successful_transfers assertion would fire.
+    stats = MooncakeKVConnectorStats()
+    stop = threading.Event()
+    writer_count = 4
+    snapshots: list[MooncakeKVConnectorStats] = []
+
+    def writer():
+        i = 0
+        while not stop.is_set():
+            stats.record_transfer(
+                duration_s=0.001 + i * 1e-9,
+                total_bytes=1024 + i,
+                num_descs=1 + (i % 8),
+            )
+            i += 1
+
+    def snapper():
+        while not stop.is_set():
+            snap = stats.clone_and_reset()
+            if not snap.is_empty():
+                # Force the same path the logger walks; reduce() will
+                # blow up on torn rows via its internal assert.
+                snap.reduce()
+                snapshots.append(snap)
+
+    threads = [threading.Thread(target=writer) for _ in range(writer_count)]
+    snapshotter = threading.Thread(target=snapper)
+    for t in threads:
+        t.start()
+    snapshotter.start()
+    # Short fixed window — long enough to interleave thousands of ops.
+    threading.Event().wait(0.2)
+    stop.set()
+    for t in threads:
+        t.join()
+    snapshotter.join()
+
+    # Final drain so we don't lose the in-flight tail.
+    final = stats.clone_and_reset()
+    if not final.is_empty():
+        final.reduce()
+        snapshots.append(final)
+
+    # Every snapshot's columns must have identical lengths (the invariant
+    # the lock protects), and the union must contain at least one row.
+    total_rows = 0
+    for snap in snapshots:
+        n = len(snap.data["transfer_duration"])
+        assert len(snap.data["bytes_transferred"]) == n
+        assert len(snap.data["num_descriptors"]) == n
+        total_rows += n
+    assert total_rows > 0
+
+
+def test_clone_and_reset_hands_off_old_data():
+    stats = MooncakeKVConnectorStats()
+    stats.record_transfer(duration_s=0.001, total_bytes=1, num_descs=1)
+    stats.record_failed_recv()
+
+    snapshot = stats.clone_and_reset()
+
+    assert snapshot.num_successful_transfers == 1
+    assert not snapshot.is_empty()
+    # Original is now empty.
+    assert stats.is_empty()
+    assert stats.num_successful_transfers == 0
+    # Recording on the original does not mutate the snapshot.
+    stats.record_transfer(duration_s=0.005, total_bytes=2, num_descs=2)
+    assert snapshot.num_successful_transfers == 1
+
+
+def test_build_kv_connector_stats_none_returns_empty_instance():
+    out = MooncakeConnector.build_kv_connector_stats()
+    assert isinstance(out, MooncakeKVConnectorStats)
+    assert out.is_empty()
+
+
+def test_build_kv_connector_stats_with_data_round_trips():
+    original = MooncakeKVConnectorStats()
+    original.record_transfer(duration_s=0.01, total_bytes=1024, num_descs=3)
+    original.record_failed_transfer()
+
+    # Serialized form is the .data dict; build should reconstruct an instance
+    # that behaves the same.
+    rebuilt = MooncakeConnector.build_kv_connector_stats(data=original.data)
+
+    assert isinstance(rebuilt, MooncakeKVConnectorStats)
+    assert rebuilt.num_successful_transfers == 1
+    assert rebuilt.reduce()["Num failed transfers"] == 1
+
+
+def _bare_worker() -> MooncakeConnectorWorker:
+    """Construct a MooncakeConnectorWorker skipping __init__ (full init requires
+    a live TransferEngine). Only the attributes touched by the methods under
+    test are populated; role flags and async_zmq_ctx keep __del__'s shutdown
+    path a no-op."""
+    worker = MooncakeConnectorWorker.__new__(MooncakeConnectorWorker)
+    worker.xfer_stats = MooncakeKVConnectorStats()
+    worker.engine = MagicMock()
+    worker.async_zmq_ctx = MagicMock()
+    worker.is_kv_consumer = True
+    worker.is_kv_producer = True
+    return worker
+
+
+def test_send_blocks_records_success():
+    worker = _bare_worker()
+    worker.engine.batch_transfer_sync_write.return_value = 0
+
+    ret = worker._send_blocks(
+        "host:1234",
+        src_ptrs=[0x1000, 0x2000],
+        dst_ptrs=[0x3000, 0x4000],
+        lengths=[1024, 2048],
+    )
+
+    assert ret == 0
+    assert worker.xfer_stats.num_successful_transfers == 1
+    data = worker.xfer_stats.data
+    assert data["bytes_transferred"] == [1024 + 2048]
+    assert data["num_descriptors"] == [2]
+    assert data["num_failed_transfers"] == []
+
+
+def test_send_blocks_records_failure():
+    worker = _bare_worker()
+    worker.engine.batch_transfer_sync_write.return_value = 1  # non-zero = fail
+
+    ret = worker._send_blocks("host:1234", [0x1000], [0x2000], [4096])
+
+    assert ret == 1
+    assert worker.xfer_stats.num_successful_transfers == 0
+    assert worker.xfer_stats.data["num_failed_transfers"] == [1]
+
+
+def test_get_kv_connector_stats_returns_none_when_empty():
+    worker = _bare_worker()
+
+    assert worker.get_kv_connector_stats() is None
+
+
+def test_get_kv_connector_stats_returns_and_resets():
+    worker = _bare_worker()
+    worker.engine.batch_transfer_sync_write.return_value = 0
+    worker._send_blocks("host:1234", [0x1000], [0x2000], [4096])
+
+    snapshot = worker.get_kv_connector_stats()
+    assert isinstance(snapshot, MooncakeKVConnectorStats)
+    assert snapshot.num_successful_transfers == 1
+
+    # Second call returns None because the worker's stats were reset.
+    assert worker.get_kv_connector_stats() is None
+
+
+def test_expired_request_bumps_counter():
+    import asyncio
+
+    worker = _bare_worker()
+    worker.reqs_need_send = {
+        "tid1": SendBlockMeta(
+            p_req_id="req1",
+            transfer_id="tid1",
+            local_block_ids=[0, 1],
+            ready=asyncio.Event(),
+            expire_time=-1.0,  # Already expired.
+            sending=0,
+        ),
+    }
+    worker.finished_sending_reqs = set()
+
+    asyncio.run(worker.fetch_finished_sending_reqs())
+
+    assert worker.xfer_stats.data["num_kv_expired_reqs"] == [1]
+    # Expired transfer also cleaned out of reqs_need_send.
+    assert "tid1" not in worker.reqs_need_send
diff --git a/tests/v1/kv_connector/unit/test_mooncake_store_connector.py b/tests/v1/kv_connector/unit/test_mooncake_store_connector.py
new file mode 100644
index 000000000000..2f717c3e2b79
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_store_connector.py
@@ -0,0 +1,282 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from unittest.mock import MagicMock, patch
+
+from vllm.config import set_current_vllm_config
+from vllm.distributed.kv_events import BlockStored
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorRole,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store import (
+    connector as mooncake_store_connector,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (
+    MooncakeStoreConnectorMetadata,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.metrics import (
+    MooncakeStoreConnectorStats,
+)
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheConfig,
+    KVCacheGroupSpec,
+    KVCacheTensor,
+)
+from vllm.v1.outputs import KVConnectorOutput
+
+from .utils import create_vllm_config
+
+
+def _make_vllm_config():
+    return create_vllm_config(
+        kv_connector="MooncakeStoreConnector",
+        kv_role="kv_both",
+    )
+
+
+def _make_kv_cache_config() -> KVCacheConfig:
+    """Single-group full-attention KVCacheConfig — enough for the connector
+    constructor's validate() pass."""
+    spec = FullAttentionSpec(block_size=16, num_kv_heads=8, head_size=64, dtype=None)
+    return KVCacheConfig(
+        num_blocks=4,
+        kv_cache_tensors=[KVCacheTensor(size=8192, shared_by=["layer0"])],
+        kv_cache_groups=[KVCacheGroupSpec(["layer0"], spec)],
+    )
+
+
+def _make_block_stored() -> BlockStored:
+    return BlockStored(
+        block_hashes=[b"hash"],
+        parent_block_hash=None,
+        token_ids=[1, 2, 3],
+        block_size=16,
+        lora_id=None,
+        medium="cpu",
+        lora_name=None,
+    )
+
+
+def test_scheduler_role_initializes_store_scheduler_only():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreScheduler"
+        ) as mock_scheduler,
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
+        )
+
+    mock_scheduler.assert_called_once_with(vllm_config, kv_cache_config)
+    mock_worker.assert_not_called()
+    assert connector.connector_scheduler is mock_scheduler.return_value
+    assert connector.connector_worker is None
+
+
+def test_worker_methods_delegate_to_store_worker():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+    kv_caches = {"layer0": MagicMock()}
+    metadata = MooncakeStoreConnectorMetadata(set(), set())
+    finished_req_ids = {"req-1"}
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker_cls,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+
+    worker = mock_worker_cls.return_value
+    worker.get_finished.return_value = ({"req-1"}, {"req-2"})
+    connector.bind_connector_metadata(metadata)
+
+    connector.register_kv_caches(kv_caches)
+    result = connector.get_finished(finished_req_ids)
+
+    worker.register_kv_caches.assert_called_once_with(kv_caches)
+    worker.get_finished.assert_called_once_with(finished_req_ids, metadata)
+    assert result == ({"req-1"}, {"req-2"})
+
+
+def test_get_kv_connector_kv_cache_events_returns_none_when_empty():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker_cls,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+
+    mock_worker_cls.return_value.get_kv_events.return_value = []
+    assert connector.get_kv_connector_kv_cache_events() is None
+
+
+def test_get_kv_connector_stats_delegates_to_worker():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+    expected_stats = MooncakeStoreConnectorStats()
+    expected_stats.record_operation("save_put", 0.01, 2, num_bytes=1024)
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker_cls,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+
+    mock_worker_cls.return_value.get_kv_connector_stats.return_value = expected_stats
+    stats = connector.get_kv_connector_stats()
+
+    assert stats is expected_stats
+    mock_worker_cls.return_value.get_kv_connector_stats.assert_called_once_with()
+
+
+def test_build_kv_connector_stats_reconstructs_mooncake_stats():
+    stats = mooncake_store_connector.MooncakeStoreConnector.build_kv_connector_stats(
+        {
+            "save_put": [
+                {
+                    "duration_seconds": 0.02,
+                    "num_keys": 4,
+                    "num_bytes": 2048,
+                    "status": "ok",
+                    "num_failed_keys": 0,
+                }
+            ]
+        }
+    )
+
+    assert isinstance(stats, MooncakeStoreConnectorStats)
+    assert stats.data["save_put"][0]["num_bytes"] == 2048
+
+
+def test_get_kv_connector_kv_cache_events_wraps_worker_events():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+    event = _make_block_stored()
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker_cls,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+
+    mock_worker_cls.return_value.get_kv_events.return_value = [event]
+    kv_events = connector.get_kv_connector_kv_cache_events()
+
+    assert isinstance(kv_events, mooncake_store_connector.MooncakeStoreKVEvents)
+    assert kv_events.get_number_of_workers() == 1
+    assert kv_events.get_all_events() == [event]
+
+
+def test_prefer_cross_layer_blocks_from_config():
+    # Default: disabled
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreScheduler"
+        ),
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
+        )
+    assert connector.prefer_cross_layer_blocks is False
+
+    # Enabled via config
+    vllm_config_enabled = create_vllm_config(
+        kv_connector="MooncakeStoreConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config={"enable_cross_layers_blocks": "true"},
+    )
+    with (
+        set_current_vllm_config(vllm_config_enabled),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreScheduler"
+        ),
+    ):
+        connector_enabled = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config_enabled, KVConnectorRole.SCHEDULER, kv_cache_config
+        )
+    assert connector_enabled.prefer_cross_layer_blocks is True
+
+
+def test_register_cross_layers_kv_cache_delegates_to_worker():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreWorker"
+        ) as mock_worker_cls,
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.WORKER, kv_cache_config
+        )
+
+    fake_tensor = MagicMock()
+    fake_backend = MagicMock()
+    connector.register_cross_layers_kv_cache(fake_tensor, fake_backend)
+
+    worker = mock_worker_cls.return_value
+    worker.register_cross_layers_kv_caches.assert_called_once_with(fake_tensor)
+
+
+def test_update_connector_output_and_take_events():
+    vllm_config = _make_vllm_config()
+    kv_cache_config = _make_kv_cache_config()
+    event = _make_block_stored()
+
+    with (
+        set_current_vllm_config(vllm_config),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "connector.MooncakeStoreScheduler"
+        ),
+    ):
+        connector = mooncake_store_connector.MooncakeStoreConnector(
+            vllm_config, KVConnectorRole.SCHEDULER, kv_cache_config
+        )
+
+    kv_events = mooncake_store_connector.MooncakeStoreKVEvents(num_workers=1)
+    kv_events.add_events([event])
+    connector.update_connector_output(KVConnectorOutput(kv_cache_events=kv_events))
+
+    assert connector._kv_cache_events is kv_events
+    assert list(connector.take_events()) == [event]
+    assert connector._kv_cache_events is None
diff --git a/tests/v1/kv_connector/unit/test_mooncake_store_coordinator.py b/tests/v1/kv_connector/unit/test_mooncake_store_coordinator.py
new file mode 100644
index 000000000000..1fd601af59ce
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_store_coordinator.py
@@ -0,0 +1,302 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from math import lcm
+
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.coordinator import (  # noqa: E501
+    ExternalCachedBlockPool,
+    MooncakeStoreCoordinator,
+)
+from vllm.v1.core.kv_cache_utils import BlockHash, BlockHashListWithBlockSize
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheGroupSpec,
+    SlidingWindowSpec,
+)
+
+
+def _make_coord(groups, hash_block_size, use_eagle=False):
+    """Construct a coordinator using the natural LCM of group block sizes as
+    the scheduler block size — mirrors ``resolve_kv_cache_block_sizes`` for
+    the test fixtures."""
+    block_sizes = [g.kv_cache_spec.block_size for g in groups]
+    scheduler_block_size = lcm(*block_sizes)
+    return MooncakeStoreCoordinator(
+        groups,
+        scheduler_block_size=scheduler_block_size,
+        hash_block_size=hash_block_size,
+        use_eagle=use_eagle,
+    )
+
+
+# ----- ExternalCachedBlockPool -----
+
+
+def test_external_cached_block_pool_tautological_returns_present_for_any_hash():
+    cmap = ExternalCachedBlockPool()
+    h = BlockHash(b"\xaa" * 4)
+    res = cmap.get_cached_block(h, [0, 1])
+    assert res is not None
+    assert len(res) == 2
+    assert res[0] is not cmap.null_block
+    assert res[1] is not cmap.null_block
+
+
+def test_external_cached_block_pool_hit_all_groups():
+    h = BlockHash(b"\x11\x22\x33\x44")
+    cmap = ExternalCachedBlockPool({(0, bytes(h)), (1, bytes(h))})
+    res = cmap.get_cached_block(h, [0, 1])
+    assert res is not None
+    assert len(res) == 2
+    assert res[0] is not cmap.null_block
+    assert res[1] is not cmap.null_block
+
+
+def test_external_cached_block_pool_miss_one_group():
+    h = BlockHash(b"\x11\x22\x33\x44")
+    cmap = ExternalCachedBlockPool({(0, bytes(h))})
+    assert cmap.get_cached_block(h, [0, 1]) is None
+
+
+def test_external_cached_block_pool_unknown_hash():
+    h_known = BlockHash(b"\x01" * 4)
+    h_unknown = BlockHash(b"\x02" * 4)
+    cmap = ExternalCachedBlockPool({(0, bytes(h_known))})
+    assert cmap.get_cached_block(h_unknown, [0]) is None
+
+
+# ----- Helpers -----
+
+
+def _full(block_size=16, sliding_window=None):
+    return FullAttentionSpec(
+        block_size=block_size,
+        num_kv_heads=8,
+        head_size=64,
+        dtype=None,
+        sliding_window=sliding_window,
+    )
+
+
+def _swa(block_size=16, sliding_window=32):
+    return SlidingWindowSpec(
+        block_size=block_size,
+        num_kv_heads=8,
+        head_size=64,
+        dtype=None,
+        sliding_window=sliding_window,
+    )
+
+
+def _hashes(n: int) -> list[BlockHash]:
+    return [BlockHash(bytes([i + 1]) * 4) for i in range(n)]
+
+
+# ----- Single-group coordinator -----
+
+
+def test_coordinator_single_full_attention_all_hits():
+    groups = [KVCacheGroupSpec(["L0"], _full(16))]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    cmap = ExternalCachedBlockPool({(0, bytes(h)) for h in hs})
+    masks, hit = coord.find_longest_cache_hit(hs, max_length=64, cached_block_pool=cmap)
+    assert hit == 64
+    assert masks[0] == [True, True, True, True]
+
+
+def test_coordinator_single_full_attention_partial_prefix():
+    groups = [KVCacheGroupSpec(["L0"], _full(16))]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    cmap = ExternalCachedBlockPool({(0, bytes(hs[0])), (0, bytes(hs[1]))})
+    masks, hit = coord.find_longest_cache_hit(hs, max_length=64, cached_block_pool=cmap)
+    assert hit == 32
+    assert masks[0] == [True, True]
+
+
+def test_coordinator_single_full_attention_no_hits():
+    groups = [KVCacheGroupSpec(["L0"], _full(16))]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    cmap = ExternalCachedBlockPool(set())
+    masks, hit = coord.find_longest_cache_hit(hs, max_length=64, cached_block_pool=cmap)
+    assert hit == 0
+    assert masks[0] == []
+
+
+def test_coordinator_single_swa_tautological_pool_masks_pre_window():
+    """SWA tautological-pool: hit_length spans full prefix, mask is
+    tail-window only."""
+    groups = [KVCacheGroupSpec(["L0"], _swa(block_size=16, sliding_window=32))]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)  # 4 chunks * 16 tokens
+    cmap = ExternalCachedBlockPool()
+    masks, hit = coord.find_longest_cache_hit(hs, max_length=64, cached_block_pool=cmap)
+    assert hit == 64
+    # ceil((sw-1)/block_size) = ceil(31/16) = 2 tail blocks.
+    assert masks[0][-2:] == [True, True]
+    assert all(not m for m in masks[0][:-2])
+
+
+# ----- Hybrid coordinator (single-group worker, multi-group coordinator) -----
+
+
+def test_coordinator_hybrid_full_plus_swa_all_hit():
+    groups = [
+        KVCacheGroupSpec(["L0"], _full(16)),
+        KVCacheGroupSpec(["L1"], _swa(16, 32)),
+    ]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    cmap = ExternalCachedBlockPool({(g, bytes(h)) for g in (0, 1) for h in hs})
+    _masks, hit = coord.find_longest_cache_hit(
+        hs, max_length=64, cached_block_pool=cmap
+    )
+    assert hit == 64
+
+
+def test_coordinator_hybrid_hole_in_full_clips_both():
+    groups = [
+        KVCacheGroupSpec(["L0"], _full(16)),
+        KVCacheGroupSpec(["L1"], _swa(16, 32)),
+    ]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    exists = {(0, bytes(hs[0])), (0, bytes(hs[2])), (0, bytes(hs[3]))}
+    exists |= {(1, bytes(h)) for h in hs}
+    cmap = ExternalCachedBlockPool(exists)
+    _masks, hit = coord.find_longest_cache_hit(
+        hs, max_length=64, cached_block_pool=cmap
+    )
+    assert hit == 16
+
+
+def test_coordinator_group_block_size_double_hash():
+    """Group block_size=32 over hash_block_size=16 hashes: adjacent
+    hashes merge before pool lookup."""
+    groups = [
+        KVCacheGroupSpec(["L0"], _full(16)),
+        KVCacheGroupSpec(["L1"], _full(32)),
+    ]
+    coord = _make_coord(groups, hash_block_size=16)
+    hs = _hashes(4)
+    big_hashes = list(BlockHashListWithBlockSize(hs, 16, 32))
+    exists = {(0, bytes(h)) for h in hs}
+    exists |= {(1, bytes(bh)) for bh in big_hashes}
+    cmap = ExternalCachedBlockPool(exists)
+    _masks, hit = coord.find_longest_cache_hit(
+        hs, max_length=64, cached_block_pool=cmap
+    )
+    assert hit == 64
+    assert hit % 32 == 0
+
+
+# ----- store_mask -----
+
+
+def test_store_mask_full_attention_all_true():
+    groups = [KVCacheGroupSpec(["L0"], _full(16))]
+    coord = _make_coord(groups, hash_block_size=16)
+    masks = coord.store_mask(64)
+    assert masks == ([True, True, True, True],)
+
+
+def test_store_mask_zero_aligned_returns_empty_per_group():
+    groups = [
+        KVCacheGroupSpec(["L0"], _full(16)),
+        KVCacheGroupSpec(["L1"], _swa(16, 32)),
+    ]
+    coord = _make_coord(groups, hash_block_size=16)
+    masks = coord.store_mask(0)
+    assert masks == ([], [])
+
+
+def test_store_mask_swa_only_window_around_each_lcm_boundary():
+    """Hybrid full-attn(block=32) + SWA(block=8, sw=8). lcm=32. With
+    aligned=64 the SWA group should mark exactly the blocks ending at 32
+    and 64 (i.e. blocks 3 and 7 at block_size=8); the rest can never
+    participate in any future hit."""
+    full = _full(32)
+    swa = _swa(block_size=8, sliding_window=8)
+    groups = [KVCacheGroupSpec(["L0"], full), KVCacheGroupSpec(["L1"], swa)]
+    coord = _make_coord(groups, hash_block_size=8)
+    masks = coord.store_mask(64)
+    # Full-attn: 2 chunks * 32 tokens.
+    assert masks[0] == [True, True]
+    # SWA: 8 chunks * 8 tokens. Only chunks ending at 32 and 64 are stored.
+    assert masks[1] == [False, False, False, True, False, False, False, True]
+
+
+def test_store_mask_swa_wider_window_covers_more_blocks_per_lcm():
+    """Same hybrid layout but sliding_window=16 (= 2 SWA blocks). Each lcm
+    boundary should now span two SWA tail blocks."""
+    full = _full(32)
+    swa = _swa(block_size=8, sliding_window=16)
+    groups = [KVCacheGroupSpec(["L0"], full), KVCacheGroupSpec(["L1"], swa)]
+    coord = _make_coord(groups, hash_block_size=8)
+    masks = coord.store_mask(64)
+    assert masks[0] == [True, True]
+    # Boundary at 32: blocks ending in [16, 32) — chunks 2 and 3.
+    # Boundary at 64: chunks 6 and 7. Others stay False.
+    assert masks[1] == [False, False, True, True, False, False, True, True]
+
+
+def test_store_mask_dsv4_5_groups_full_mla_plus_4_swa():
+    """DSV4-shaped: full-MLA(B=256) + 4 SWA groups with B in {64, 64, 4, 8}
+    and varied sliding windows. lcm=256, hash_block_size=4. Two lcm segments
+    (aligned_len=512). Validates that the tile-once strategy produces the
+    expected per-segment tail-window pattern, repeated."""
+    full_mla = _full(block_size=256)
+    swa_64_sw128 = _swa(block_size=64, sliding_window=128)
+    swa_64_sw512 = _swa(block_size=64, sliding_window=512)
+    swa_4_sw16 = _swa(block_size=4, sliding_window=16)
+    swa_8_sw64 = _swa(block_size=8, sliding_window=64)
+    groups = [
+        KVCacheGroupSpec(["L0"], full_mla),
+        KVCacheGroupSpec(["L1"], swa_64_sw128),
+        KVCacheGroupSpec(["L2"], swa_64_sw512),
+        KVCacheGroupSpec(["L3"], swa_4_sw16),
+        KVCacheGroupSpec(["L4"], swa_8_sw64),
+    ]
+    coord = _make_coord(groups, hash_block_size=4)
+    assert coord.lcm_block_size == 256
+    masks = coord.store_mask(512)
+
+    # Full-MLA: 2 chunks of 256, both stored.
+    assert masks[0] == [True, True]
+    # SWA(64, sw=128): tail = ceil(127/64) = 2; C = 256/64 = 4.
+    # Per-segment template = [F,F,T,T]; tiled twice.
+    assert masks[1] == [False, False, True, True] * 2
+    # SWA(64, sw=512): tail = 8 >= C = 4 → entire segment True.
+    assert masks[2] == [True] * 8
+    # SWA(4, sw=16): tail = ceil(15/4) = 4; C = 256/4 = 64.
+    # Last 4 of each 64-chunk segment True.
+    assert masks[3] == ([False] * 60 + [True] * 4) * 2
+    # SWA(8, sw=64): tail = ceil(63/8) = 8; C = 256/8 = 32.
+    # Last 8 of each 32-chunk segment True.
+    assert masks[4] == ([False] * 24 + [True] * 8) * 2
+
+
+def test_store_mask_fast_path_all_block_sizes_equal_lcm():
+    """When every non-full-attn group already aligns to lcm_block_size, the
+    fast path returns all-True without invoking find_longest_cache_hit."""
+    full = _full(block_size=64)
+    swa = _swa(block_size=64, sliding_window=128)
+    groups = [KVCacheGroupSpec(["L0"], full), KVCacheGroupSpec(["L1"], swa)]
+    coord = _make_coord(groups, hash_block_size=64)
+    assert coord.lcm_block_size == 64
+    masks = coord.store_mask(256)
+    # Every block in every group is True — no sub-lcm filtering possible.
+    assert masks == ([True] * 4, [True] * 4)
+
+
+def test_store_mask_fast_path_single_attention_group():
+    """Two groups sharing the same SWA spec collapse to one attention group;
+    no lcm filter applies, every chunk is True."""
+    swa = _swa(block_size=16, sliding_window=32)
+    groups = [KVCacheGroupSpec(["L0"], swa), KVCacheGroupSpec(["L1"], swa)]
+    coord = _make_coord(groups, hash_block_size=16)
+    assert len(coord.attention_groups) == 1
+    masks = coord.store_mask(64)
+    assert masks == ([True] * 4, [True] * 4)
diff --git a/tests/v1/kv_connector/unit/test_mooncake_store_hma_e2e.py b/tests/v1/kv_connector/unit/test_mooncake_store_hma_e2e.py
new file mode 100644
index 000000000000..01d4f4821ea0
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_store_hma_e2e.py
@@ -0,0 +1,342 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""End-to-end save->lookup test for MooncakeStoreConnector on a hybrid
+(SWA + Full) attention config, using a dict-backed mock store."""
+
+import sys
+import threading
+import types
+from unittest.mock import MagicMock, patch
+
+import torch
+
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store import (
+    worker as mooncake_store_worker,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.coordinator import (  # noqa: E501
+    MooncakeStoreCoordinator,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (
+    ChunkedTokenDatabase,
+    KeyMetadata,
+    LoadSpec,
+    ReqMeta,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.worker import (  # noqa: E501
+    KVCacheStoreRecvingThread,
+    KVCacheStoreSendingThread,
+)
+from vllm.v1.core.kv_cache_utils import BlockHash
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheConfig,
+    KVCacheGroupSpec,
+    KVCacheTensor,
+    SlidingWindowSpec,
+)
+
+
+class _DictStore:
+    """In-memory MooncakeDistributedStore stand-in."""
+
+    def __init__(self):
+        self._data: dict[str, bytes] = {}
+
+    def setup(self, *_args, **_kwargs):
+        return 0
+
+    def register_buffer(self, addr, length):
+        return 0
+
+    def batch_is_exist(self, keys):
+        return [1 if k in self._data else 0 for k in keys]
+
+    def batch_put_from_multi_buffers(self, keys, addrs, sizes, *_args, **_kwargs):
+        for k in keys:
+            self._data[k] = b"x"
+        return [0] * len(keys)
+
+    def batch_get_into_multi_buffers(self, keys, addrs, sizes, *_args, **_kwargs):
+        return [0 if k in self._data else -1 for k in keys]
+
+
+def _minimal_vllm_config(cache_block_size=16):
+    cfg = MagicMock()
+    cfg.cache_config.block_size = cache_block_size
+    cfg.cache_config.num_gpu_blocks = 4
+    cfg.cache_config.hash_block_size = None
+    cfg.cache_config.enable_prefix_caching = True
+    cfg.parallel_config.prefill_context_parallel_size = 1
+    cfg.parallel_config.decode_context_parallel_size = 1
+    cfg.parallel_config.pipeline_parallel_size = 1
+    cfg.parallel_config.world_size = 1
+    cfg.parallel_config.rank = 0
+    cfg.parallel_config.data_parallel_rank_local = 0
+    cfg.parallel_config.data_parallel_size_local = 1
+    cfg.kv_transfer_config.kv_role = "kv_both"
+    cfg.kv_transfer_config.kv_connector_extra_config = {}
+    cfg.kv_events_config = None
+    cfg.model_config.model = "/tmp/m"
+    cfg.model_config.use_mla = False
+    cfg.model_config.get_num_layers.return_value = 2
+    cfg.model_config.get_total_num_kv_heads.return_value = 8
+    cfg.model_config.max_model_len = 4096
+    cfg.scheduler_config.max_num_batched_tokens = 8192
+    # Without this, MagicMock's truthy use_eagle() triggers the coordinator's
+    # "use_eagle && nothing annotated → flag all groups" fallback.
+    cfg.speculative_config = None
+    return cfg
+
+
+def _build_worker_with_dict_store(vllm_config, kv_cache_config, store):
+    """Build a MooncakeStoreWorker patching all distributed dependencies."""
+    fake_mooncake_store = types.ModuleType("mooncake.store")
+    fake_mooncake_store.MooncakeDistributedStore = lambda: store  # type: ignore[attr-defined]
+    fake_mooncake_store.ReplicateConfig = MagicMock  # type: ignore[attr-defined]
+    with (
+        patch.dict(sys.modules, {"mooncake.store": fake_mooncake_store}),
+        patch.object(mooncake_store_worker, "MooncakeStoreConfig") as MCfg,
+    ):
+        sc = MCfg.load_from_env.return_value
+        sc.metadata_server = ""
+        sc.global_segment_size = 1 << 20
+        sc.local_buffer_size = 1 << 20
+        sc.protocol = "tcp"
+        sc.device_name = ""
+        sc.master_server_address = ""
+        sc.enable_offload = False
+        with (
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+                ".store.worker.get_tensor_model_parallel_rank",
+                return_value=0,
+            ),
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+                ".store.worker.get_tensor_model_parallel_world_size",
+                return_value=1,
+            ),
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+                ".store.worker.get_pcp_group"
+            ) as mock_pcp,
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+                ".store.worker.get_dcp_group"
+            ) as mock_dcp,
+            patch(
+                "vllm.distributed.kv_transfer.kv_connector.v1.mooncake"
+                ".store.worker.get_ip",
+                return_value="127.0.0.1",
+            ),
+        ):
+            mock_pcp.return_value.world_size = 1
+            mock_dcp.return_value.world_size = 1
+            worker = mooncake_store_worker.MooncakeStoreWorker(
+                vllm_config, kv_cache_config=kv_cache_config
+            )
+    return worker
+
+
+def test_e2e_swa_plus_full_save_then_lookup_hits():
+    """
+    E2E: build a SWA+Full hybrid worker, save all blocks via the sending
+    thread (synchronously), then verify lookup returns the full hit length.
+    Also verify that evicting SWA's early blocks (outside its window) still
+    allows a full hit because the window covers the tail.
+    """
+    full = FullAttentionSpec(block_size=16, num_kv_heads=8, head_size=64, dtype=None)
+    swa = SlidingWindowSpec(
+        block_size=16,
+        num_kv_heads=8,
+        head_size=64,
+        dtype=None,
+        sliding_window=32,
+    )
+    cfg = KVCacheConfig(
+        num_blocks=4,
+        kv_cache_tensors=[
+            KVCacheTensor(size=8192, shared_by=["L0"]),
+            KVCacheTensor(size=8192, shared_by=["L1"]),
+        ],
+        kv_cache_groups=[
+            KVCacheGroupSpec(["L0"], full),
+            KVCacheGroupSpec(["L1"], swa),
+        ],
+    )
+    vllm_config = _minimal_vllm_config(cache_block_size=16)
+    store = _DictStore()
+
+    worker = _build_worker_with_dict_store(vllm_config, cfg, store)
+    worker.tp_size = 1
+    worker.pp_size = 1
+    worker.put_step = 1
+    worker.num_kv_head = 8
+
+    # Register kv_caches using mocked thread classes so register_kv_caches
+    # doesn't try to start real background threads (which set ready_event).
+    kv_caches = {
+        "L0": torch.zeros(2, 4, 8, 8, 64),
+        "L1": torch.zeros(2, 4, 8, 8, 64),
+    }
+
+    def _fake_thread_init(*args, **kwargs):
+        """Mock thread that sets all threading.Event args so waits don't block."""
+        for v in list(args) + list(kwargs.values()):
+            if isinstance(v, threading.Event):
+                v.set()
+        m = MagicMock()
+        m.start = lambda: None
+        return m
+
+    with (
+        patch.object(
+            mooncake_store_worker,
+            "KVCacheStoreSendingThread",
+            side_effect=_fake_thread_init,
+        ),
+        patch.object(
+            mooncake_store_worker,
+            "KVCacheStoreRecvingThread",
+            side_effect=_fake_thread_init,
+        ),
+    ):
+        worker.register_kv_caches(kv_caches)
+
+    # Now build a real sending thread (no .start()) over the worker's token_dbs
+    # and the dict-backed store, so _handle_request runs synchronously.
+    ready = threading.Event()
+    send_thread = KVCacheStoreSendingThread(
+        store=store,
+        token_databases=worker.token_dbs,
+        block_size=worker.block_size,
+        coord=worker.coord,
+        tp_rank=worker.tp_rank,
+        put_step=worker.put_step,
+        kv_role=worker.kv_role,
+        ready_event=ready,
+        enable_kv_event=False,
+    )
+
+    hs = [BlockHash(bytes([i + 1]) * 4) for i in range(4)]
+    save_req = ReqMeta(
+        req_id="r0",
+        token_len_chunk=64,
+        block_ids=([0, 1, 2, 3], [0, 1, 2, 3]),
+        block_hashes=hs,
+        can_save=True,
+    )
+    send_thread.add_stored_request("r0")
+    # Put the request in the queue so task_done() doesn't underflow.
+    send_thread.request_queue.put(save_req)
+    req = send_thread.request_queue.get()
+    send_thread._handle_request(req)
+
+    # Point worker.store at the dict store (the worker constructor captured
+    # the MagicMock; replace with the real dict store for lookup).
+    worker.store = store
+
+    # Both groups stored all 4 blocks -> full hit.
+    assert worker.lookup(token_len=64, block_hashes=hs) == 64
+
+    # Evict SWA's first two blocks (outside its window of 32 tokens = 2 blocks).
+    swa_keys_outside_window = [
+        k
+        for k in list(store._data.keys())
+        if "@group:1" in k and (("@" + hs[0].hex()) in k or ("@" + hs[1].hex()) in k)
+    ]
+    for k in swa_keys_outside_window:
+        del store._data[k]
+
+    # SWA window=32 -> only last 2 blocks must be present in SWA group.
+    # Full has all 4. Coordinator should still return 64.
+    assert worker.lookup(token_len=64, block_hashes=hs) == 64
+
+
+def test_recv_skips_swa_blocks_before_window():
+    """Producer stored every block for both groups; consumer must only fetch
+    SWA blocks within the sliding window, not the head."""
+    full = FullAttentionSpec(block_size=16, num_kv_heads=8, head_size=64, dtype=None)
+    # sliding_window=32, block_size=16 → 2 contiguous blocks within window.
+    swa = SlidingWindowSpec(
+        block_size=16,
+        num_kv_heads=8,
+        head_size=64,
+        dtype=None,
+        sliding_window=32,
+    )
+    groups = [
+        KVCacheGroupSpec(["L0"], full),
+        KVCacheGroupSpec(["L1"], swa),
+    ]
+    md0 = KeyMetadata("m", 0, 0, 0, 0, group_id=0)
+    md1 = KeyMetadata("m", 0, 0, 0, 0, group_id=1)
+    db_full = ChunkedTokenDatabase(md0, block_size=16, hash_block_size=16)
+    db_swa = ChunkedTokenDatabase(md1, block_size=16, hash_block_size=16)
+    db_full.set_kv_caches_base_addr([0])
+    db_full.set_block_len([1024])
+    db_swa.set_kv_caches_base_addr([1 << 20])
+    db_swa.set_block_len([1024])
+
+    requested_keys: list[str] = []
+
+    class _CapturingStore:
+        def batch_get_into_multi_buffers(self, keys, addrs, sizes):
+            requested_keys.extend(keys)
+            return [0] * len(keys)
+
+    ready = threading.Event()
+    coord = MooncakeStoreCoordinator(
+        groups, scheduler_block_size=16, hash_block_size=16
+    )
+    recv = KVCacheStoreRecvingThread(
+        store=_CapturingStore(),
+        token_databases=[db_full, db_swa],
+        block_size=16,
+        tp_rank=0,
+        ready_event=ready,
+        coord=coord,
+    )
+
+    hs = [BlockHash(bytes([i + 1]) * 4) for i in range(4)]
+    req = ReqMeta(
+        req_id="r0",
+        token_len_chunk=64,
+        block_ids=([0, 1, 2, 3], [0, 1, 2, 3]),
+        block_hashes=hs,
+        load_spec=LoadSpec(
+            vllm_cached_tokens=0, kvpool_cached_tokens=64, can_load=True, token_len=64
+        ),
+    )
+    recv.request_queue.put(req)
+    recv._handle_request(recv.request_queue.get())
+
+    full_keys = [k for k in requested_keys if "@group:0" in k]
+    swa_keys = [k for k in requested_keys if "@group:1" in k]
+    # Full attention: load every block (4).
+    assert len(full_keys) == 4
+    # SWA: only the 2 tail-window blocks (hashes hs[2], hs[3]).
+    assert len(swa_keys) == 2
+    swa_hashes = {k.rsplit("@", 1)[-1] for k in swa_keys}
+    assert swa_hashes == {hs[2].hex(), hs[3].hex()}
+
+
+def test_chunked_token_database_hash_block_size_smaller_than_block_size():
+    """DSv4-style: hash_block_size=4, group block_size=16 — process_tokens
+    must merge every 4 fine hashes into one chunk hash via
+    BlockHashListWithBlockSize."""
+    md = KeyMetadata("m", 0, 0, 0, 0, group_id=3)
+    db = ChunkedTokenDatabase(md, block_size=16, hash_block_size=4)
+    db.set_kv_caches_base_addr([0])
+    db.set_block_len([512])
+    # 8 fine-grained hashes (32 tokens at hash_block_size=4) → 2 group chunks.
+    fine_hashes = [BlockHash(bytes([i + 1]) * 4) for i in range(8)]
+    out = list(db.process_tokens(token_len=32, block_hashes=fine_hashes))
+    assert len(out) == 2
+    assert out[0][0] == 0 and out[0][1] == 16
+    assert out[1][0] == 16 and out[1][1] == 32
+    # Each chunk's hash is the concatenation of 4 fine hashes.
+    expected0 = b"".join(fine_hashes[0:4]).hex()
+    expected1 = b"".join(fine_hashes[4:8]).hex()
+    assert out[0][2].chunk_hash == expected0
+    assert out[1][2].chunk_hash == expected1
diff --git a/tests/v1/kv_connector/unit/test_mooncake_store_scheduler.py b/tests/v1/kv_connector/unit/test_mooncake_store_scheduler.py
new file mode 100644
index 000000000000..a6d7f3658073
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_store_scheduler.py
@@ -0,0 +1,408 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from types import SimpleNamespace
+
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (
+    LoadSpec,
+    ReqMeta,
+    RequestTracker,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.scheduler import (
+    MooncakeStoreScheduler,
+)
+
+
+def _make_bare_scheduler() -> MooncakeStoreScheduler:
+    scheduler = object.__new__(MooncakeStoreScheduler)
+    scheduler.kv_role = "kv_both"
+    scheduler.original_block_size = 16
+    scheduler._block_size = 16
+    scheduler.load_specs = {}
+    scheduler._preempted_req_ids = set()
+    scheduler._unfinished_request_ids = {"req-0"}
+    scheduler._unfinished_requests = {}
+    scheduler._request_trackers = {}
+    return scheduler
+
+
+def _make_scheduler_output(*, scheduled_spec_tokens: list[int] | None):
+    return SimpleNamespace(
+        finished_req_ids=set(),
+        preempted_req_ids=set(),
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=SimpleNamespace(
+            req_ids=["req-0"],
+            new_block_ids=[([2],)],
+            num_computed_tokens=[44],
+        ),
+        num_scheduled_tokens={"req-0": 4},
+        scheduled_spec_decode_tokens=(
+            {"req-0": scheduled_spec_tokens} if scheduled_spec_tokens else {}
+        ),
+    )
+
+
+def _add_unfinished_request(
+    scheduler: MooncakeStoreScheduler,
+    *,
+    token_ids: list[int],
+    block_hashes: list[bytes],
+    prefill_end_tokens: int,
+) -> None:
+    request = SimpleNamespace(
+        all_token_ids=token_ids,
+        block_hashes=block_hashes,
+        num_output_placeholders=0,
+    )
+    scheduler._unfinished_requests["req-0"] = (request, ([0, 1],))
+    scheduler._request_trackers["req-0"] = RequestTracker(
+        req_id="req-0",
+        token_len=44,
+        allocated_block_ids=([0, 1],),
+        num_saved_tokens=32,
+        token_ids=token_ids[:44],
+        prefill_end_tokens=prefill_end_tokens,
+    )
+
+
+def test_cached_request_with_spec_decode_does_not_save_scheduled_drafts():
+    # Drafts in scheduled_spec_decode_tokens are not appended to all_token_ids
+    # yet, so the tracker's token_len does not advance and num_tokens_to_save
+    # stays below chunk_boundary — the save is naturally skipped.
+    scheduler = _make_bare_scheduler()
+    _add_unfinished_request(
+        scheduler,
+        token_ids=list(range(44)),
+        block_hashes=[b"h0", b"h1"],
+        prefill_end_tokens=48,
+    )
+
+    meta = scheduler.build_connector_meta(
+        _make_scheduler_output(scheduled_spec_tokens=[101, 102, 103])
+    )
+
+    assert meta.requests == []
+    tracker = scheduler._request_trackers["req-0"]
+    assert tracker.token_len == 44
+    assert tracker.num_saved_tokens == 32
+    assert tracker.allocated_block_ids == ([0, 1, 2],)
+
+
+def test_cached_request_without_spec_decode_keeps_current_step_save_overlap():
+    scheduler = _make_bare_scheduler()
+    _add_unfinished_request(
+        scheduler,
+        token_ids=list(range(48)),
+        block_hashes=[b"h0", b"h1", b"h2"],
+        prefill_end_tokens=48,
+    )
+
+    meta = scheduler.build_connector_meta(
+        _make_scheduler_output(scheduled_spec_tokens=None)
+    )
+
+    assert len(meta.requests) == 1
+    req_meta = meta.requests[0]
+    assert req_meta.req_id == "req-0"
+    assert req_meta.can_save is True
+    assert req_meta.token_len_chunk == 48
+    tracker = scheduler._request_trackers["req-0"]
+    assert tracker.token_len == 48
+    assert tracker.num_saved_tokens == 48
+
+
+def _make_pending_load_unfinished_request(
+    scheduler: MooncakeStoreScheduler,
+    *,
+    num_tokens: int,
+    block_hashes: list[bytes],
+    block_ids: tuple[list[int], ...] = ([0, 1, 2],),
+) -> None:
+    request = SimpleNamespace(
+        num_tokens=num_tokens,
+        block_hashes=block_hashes,
+        num_output_placeholders=0,
+    )
+    scheduler._unfinished_requests["req-0"] = (request, block_ids)
+
+
+def _make_pending_load_scheduler_output() -> SimpleNamespace:
+    """scheduler_output for a step where req-0 is parked on a pending load
+    (not in scheduled_new_reqs or scheduled_cached_reqs)."""
+    return SimpleNamespace(
+        finished_req_ids=set(),
+        preempted_req_ids=set(),
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=SimpleNamespace(
+            req_ids=[],
+            new_block_ids=[],
+            num_computed_tokens=[],
+        ),
+        num_scheduled_tokens={},
+        scheduled_spec_decode_tokens={},
+    )
+
+
+def test_pending_load_does_not_co_queue_save():
+    # Regression: a cache-hit request waiting on an async load must not also
+    # enqueue a save in the same scheduling step. Co-queuing both produces a
+    # recv+send pair for the same req_id, and the scheduler's
+    # _update_from_kv_xfer_finished then trips `assert req_id in self.requests`
+    # when both completions land for the delay-freed request.
+    scheduler = _make_bare_scheduler()
+    _make_pending_load_unfinished_request(
+        scheduler,
+        num_tokens=48,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+    scheduler.load_specs["req-0"] = LoadSpec(
+        vllm_cached_tokens=0,
+        kvpool_cached_tokens=48,
+        can_load=True,
+    )
+
+    meta = scheduler.build_connector_meta(_make_pending_load_scheduler_output())
+
+    assert len(meta.requests) == 1
+    req_meta = meta.requests[0]
+    assert req_meta.req_id == "req-0"
+    # Save must be off so the worker does not call add_stored_request.
+    assert req_meta.can_save is False
+    # Load is still issued as planned.
+    assert req_meta.load_spec is not None
+    assert req_meta.load_spec.can_load is True
+    # And the tracker's saved-tokens watermark stays at 0 so request_finished
+    # later sees `num_saved_tokens <= 0` and frees immediately rather than
+    # waiting for a finished_sending that will never come.
+    tracker = scheduler._request_trackers["req-0"]
+    assert tracker.num_saved_tokens == 0
+
+
+def _make_resumed_unfinished_request(
+    scheduler: MooncakeStoreScheduler,
+    *,
+    token_ids: list[int],
+    block_hashes: list[bytes],
+    num_computed_tokens: int,
+) -> None:
+    request = SimpleNamespace(
+        all_token_ids=token_ids,
+        block_hashes=block_hashes,
+        num_computed_tokens=num_computed_tokens,
+        num_output_placeholders=0,
+    )
+    scheduler._unfinished_requests["req-0"] = (request, ([0, 1],))
+
+
+def _make_resumed_scheduler_output(*, num_scheduled_tokens: int) -> SimpleNamespace:
+    return SimpleNamespace(
+        finished_req_ids=set(),
+        preempted_req_ids=set(),
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=SimpleNamespace(
+            req_ids=["req-0"],
+            new_block_ids=[([2],)],
+            num_computed_tokens=[0],
+        ),
+        num_scheduled_tokens={"req-0": num_scheduled_tokens},
+        scheduled_spec_decode_tokens={},
+    )
+
+
+def test_resumed_from_preemption_with_load_skips_save():
+    # On resume-from-preemption with a cache hit, the same co-queueing race
+    # applies: the resumed-from-preemption branch in build_connector_meta also
+    # passes load_spec.can_load=True. Skip save in this step; subsequent
+    # cached_reqs steps will save new tokens normally.
+    scheduler = _make_bare_scheduler()
+    scheduler._preempted_req_ids = {"req-0"}
+    _make_resumed_unfinished_request(
+        scheduler,
+        token_ids=list(range(48)),
+        block_hashes=[b"h0", b"h1", b"h2"],
+        num_computed_tokens=0,
+    )
+    scheduler.load_specs["req-0"] = LoadSpec(
+        vllm_cached_tokens=0,
+        kvpool_cached_tokens=48,
+        can_load=True,
+    )
+
+    meta = scheduler.build_connector_meta(
+        _make_resumed_scheduler_output(num_scheduled_tokens=48)
+    )
+
+    assert len(meta.requests) == 1
+    req_meta = meta.requests[0]
+    assert req_meta.req_id == "req-0"
+    assert req_meta.can_save is False
+    assert req_meta.load_spec is not None
+    assert req_meta.load_spec.can_load is True
+    tracker = scheduler._request_trackers["req-0"]
+    assert tracker.num_saved_tokens == 0
+
+
+def test_resumed_from_preemption_without_load_still_saves():
+    # No load_spec → behavior is unchanged: save proceeds.
+    scheduler = _make_bare_scheduler()
+    scheduler._preempted_req_ids = {"req-0"}
+    _make_resumed_unfinished_request(
+        scheduler,
+        token_ids=list(range(48)),
+        block_hashes=[b"h0", b"h1", b"h2"],
+        num_computed_tokens=0,
+    )
+
+    meta = scheduler.build_connector_meta(
+        _make_resumed_scheduler_output(num_scheduled_tokens=48)
+    )
+
+    assert len(meta.requests) == 1
+    req_meta = meta.requests[0]
+    assert req_meta.req_id == "req-0"
+    assert req_meta.can_save is True
+    assert req_meta.load_spec is None
+    tracker = scheduler._request_trackers["req-0"]
+    assert tracker.num_saved_tokens == 48
+
+
+# Focused tests for ReqMeta.from_request_tracker — the centralized guard that
+# enforces "a ReqMeta never carries both a save and a load".
+
+
+def test_from_request_tracker_load_overrides_caller_skip_save():
+    # Caller asks for skip_save=False, but load_spec.can_load=True. The
+    # function must force skip_save=True to avoid producing a ReqMeta the
+    # worker would enqueue on both kv_send_thread and kv_recv_thread.
+    tracker = RequestTracker(
+        req_id="req-0",
+        token_len=48,
+        allocated_block_ids=([0, 1, 2],),
+        num_saved_tokens=0,
+    )
+    load_spec = LoadSpec(vllm_cached_tokens=0, kvpool_cached_tokens=48, can_load=True)
+
+    req_meta = ReqMeta.from_request_tracker(
+        tracker,
+        block_size=16,
+        load_spec=load_spec,
+        skip_save=False,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+
+    assert req_meta is not None
+    assert req_meta.can_save is False
+    assert req_meta.load_spec is load_spec
+    assert tracker.num_saved_tokens == 0
+
+
+def test_from_request_tracker_load_with_can_load_false_still_saves():
+    # A LoadSpec with can_load=False (e.g., no external tokens to load after
+    # update_state_after_alloc) must not suppress the save.
+    tracker = RequestTracker(
+        req_id="req-0",
+        token_len=48,
+        allocated_block_ids=([0, 1, 2],),
+        num_saved_tokens=0,
+    )
+    load_spec = LoadSpec(vllm_cached_tokens=0, kvpool_cached_tokens=48, can_load=False)
+
+    req_meta = ReqMeta.from_request_tracker(
+        tracker,
+        block_size=16,
+        load_spec=load_spec,
+        skip_save=False,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+
+    assert req_meta is not None
+    assert req_meta.can_save is True
+    # from_request_tracker clears load_spec when can_load is False.
+    assert req_meta.load_spec is None
+    assert tracker.num_saved_tokens == 48
+
+
+def test_from_request_tracker_no_load_saves_normally():
+    tracker = RequestTracker(
+        req_id="req-0",
+        token_len=48,
+        allocated_block_ids=([0, 1, 2],),
+        num_saved_tokens=0,
+    )
+
+    req_meta = ReqMeta.from_request_tracker(
+        tracker,
+        block_size=16,
+        load_spec=None,
+        skip_save=False,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+
+    assert req_meta is not None
+    assert req_meta.can_save is True
+    assert req_meta.load_spec is None
+    assert tracker.num_saved_tokens == 48
+
+
+class _StubLookupClient:
+    def __init__(self, hit_tokens: int) -> None:
+        self._hit_tokens = hit_tokens
+
+    def lookup(self, token_len: int, block_hashes: list[bytes]) -> int:
+        return self._hit_tokens
+
+
+def test_full_external_hit_keeps_kvpool_cached_tokens_block_aligned():
+    # When the external store hits the entire prompt, scheduler must leave at
+    # least one token uncomputed for sampling but stay on a block boundary.
+    # Otherwise the recv-side load mask floors token_len to
+    # (num_tokens-1)//block_size, the tail partial chunk is dropped, and -- if
+    # the local cache covers the aligned prefix -- key_list ends up empty
+    # (ZeroDivisionError in the recv thread's `tp_rank % len(key_list)`).
+    scheduler = _make_bare_scheduler()
+    scheduler.load_async = True
+    scheduler.client = _StubLookupClient(hit_tokens=48)  # full hit on 48-token prompt
+
+    request = SimpleNamespace(
+        request_id="req-0",
+        num_tokens=48,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+
+    need_to_allocate, load_async = scheduler.get_num_new_matched_tokens(
+        request, num_computed_tokens=16
+    )
+
+    # 47 // 16 * 16 == 32 tokens left in external store after reserving the
+    # sub-block tail for sampling. 32 - 16 (local) == 16 to load.
+    assert need_to_allocate == 16
+    assert load_async is True
+    load_spec = scheduler.load_specs["req-0"]
+    assert load_spec.vllm_cached_tokens == 16
+    assert load_spec.kvpool_cached_tokens == 32
+    assert load_spec.kvpool_cached_tokens % 16 == 0
+
+
+def test_full_external_hit_with_full_local_hit_skips_load():
+    # When local prefix cache already covers the block-aligned external hit,
+    # there is nothing for the connector to load. The pre-fix behavior would
+    # have scheduled a 15-token load that the recv thread couldn't translate
+    # into any block-aligned key.
+    scheduler = _make_bare_scheduler()
+    scheduler.load_async = True
+    scheduler.client = _StubLookupClient(hit_tokens=48)
+
+    request = SimpleNamespace(
+        request_id="req-0",
+        num_tokens=48,
+        block_hashes=[b"h0", b"h1", b"h2"],
+    )
+
+    need_to_allocate, load_async = scheduler.get_num_new_matched_tokens(
+        request, num_computed_tokens=32
+    )
+
+    assert need_to_allocate == 0
+    assert load_async is False
+    assert "req-0" not in scheduler.load_specs
diff --git a/tests/v1/kv_connector/unit/test_mooncake_store_worker.py b/tests/v1/kv_connector/unit/test_mooncake_store_worker.py
new file mode 100644
index 000000000000..a617b6074e03
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_mooncake_store_worker.py
@@ -0,0 +1,1477 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+import logging
+import math
+import sys
+import threading
+import types
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake import (
+    rdma_utils,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store import (
+    worker,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store import (
+    worker as mooncake_store_worker,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (
+    ChunkedTokenDatabase,
+    KeyMetadata,
+    LoadSpec,
+    ReqMeta,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.metrics import (
+    MooncakeStoreConnectorStats,
+)
+
+
+def _default_send_coord() -> mooncake_store_worker.MooncakeStoreCoordinator:
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
+
+    spec = FullAttentionSpec(block_size=16, num_kv_heads=8, head_size=64, dtype=None)
+    return mooncake_store_worker.MooncakeStoreCoordinator(
+        [KVCacheGroupSpec(["layer0"], spec)],
+        scheduler_block_size=16,
+        hash_block_size=16,
+    )
+
+
+def _make_store_sending_thread(
+    store: MagicMock,
+    *,
+    coord: mooncake_store_worker.MooncakeStoreCoordinator | None = None,
+    token_databases: list[ChunkedTokenDatabase] | None = None,
+    block_size: int = 16,
+    replicate_config: object | None = None,
+) -> mooncake_store_worker.KVCacheStoreSendingThread:
+    if coord is None:
+        coord = _default_send_coord()
+    if token_databases is None:
+        db = ChunkedTokenDatabase(KeyMetadata("test-model", 0, 0, 0, 0), block_size=16)
+        db.set_kv_caches_base_addr([0x1000])
+        db.set_block_len([256])
+        token_databases = [db]
+    thread = mooncake_store_worker.KVCacheStoreSendingThread(
+        store=store,
+        token_databases=token_databases,
+        block_size=block_size,
+        coord=coord,
+        tp_rank=0,
+        put_step=1,
+        kv_role="kv_producer",
+        ready_event=threading.Event(),
+        replicate_config=replicate_config,
+    )
+    thread.request_queue.task_done = MagicMock()
+    return thread
+
+
+def _make_store_recving_thread(
+    store: MagicMock,
+    *,
+    disk_offload_buffer_budget_bytes: int | None = None,
+) -> mooncake_store_worker.KVCacheStoreRecvingThread:
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
+
+    token_database = ChunkedTokenDatabase(
+        KeyMetadata("test-model", 0, 0, 0, 0), block_size=16
+    )
+    token_database.set_kv_caches_base_addr([0x1000])
+    token_database.set_block_len([256])
+    spec = FullAttentionSpec(block_size=16, num_kv_heads=8, head_size=64, dtype=None)
+    coord = mooncake_store_worker.MooncakeStoreCoordinator(
+        [KVCacheGroupSpec(["layer0"], spec)],
+        scheduler_block_size=16,
+        hash_block_size=16,
+    )
+    thread = mooncake_store_worker.KVCacheStoreRecvingThread(
+        store=store,
+        token_databases=[token_database],
+        block_size=16,
+        tp_rank=0,
+        ready_event=threading.Event(),
+        coord=coord,
+        disk_offload_buffer_budget_bytes=disk_offload_buffer_budget_bytes,
+    )
+    thread.request_queue.task_done = MagicMock()
+    return thread
+
+
+def _make_load_req(
+    req_id: str,
+    block_hashes: list[bytes],
+    *,
+    token_len: int,
+    vllm_cached_tokens: int = 0,
+) -> ReqMeta:
+    return ReqMeta(
+        req_id=req_id,
+        token_len_chunk=token_len,
+        block_ids=(list(range(len(block_hashes))),),
+        block_hashes=block_hashes,
+        load_spec=LoadSpec(
+            vllm_cached_tokens=vllm_cached_tokens,
+            kvpool_cached_tokens=token_len,
+            can_load=True,
+            token_len=token_len,
+        ),
+    )
+
+
+def _make_store_req(req_id: str, block_hashes: list[bytes]) -> ReqMeta:
+    return ReqMeta(
+        req_id=req_id,
+        token_len_chunk=32,
+        block_ids=([0, 1],),
+        block_hashes=block_hashes,
+        can_save=True,
+        original_block_size=16,
+    )
+
+
+_DISK_OFFLOAD_SINGLE_KEY_BYTES = worker._estimate_disk_offload_staging_bytes([256])
+_DISK_OFFLOAD_USABLE_BUDGET_RATIO = 0.9
+_DISK_OFFLOAD_BUDGET_FOR_THREE_KEYS = 4 * _DISK_OFFLOAD_SINGLE_KEY_BYTES
+_DISK_OFFLOAD_BUDGET_FOR_SPLIT = math.ceil(
+    2 * _DISK_OFFLOAD_SINGLE_KEY_BYTES / _DISK_OFFLOAD_USABLE_BUDGET_RATIO
+)  # Allows two 256-byte chunks but not the third.
+_DISK_OFFLOAD_BUDGET_TOO_SMALL = (
+    _DISK_OFFLOAD_SINGLE_KEY_BYTES - 1
+)  # Smaller than a single 256-byte chunk.
+
+
+class _FakeKVTransferConfig:
+    def __init__(
+        self,
+        *,
+        kv_role: str = "kv_both",
+        extra_config: dict[str, object] | None = None,
+    ) -> None:
+        self.kv_role = kv_role
+        self.kv_connector_extra_config = extra_config or {}
+
+    def get_from_extra_config(self, key: str, default: object) -> object:
+        return self.kv_connector_extra_config.get(key, default)
+
+
+class _FakeModelConfig:
+    model = "test-model"
+    use_mla = False
+
+    def get_num_layers(self, parallel_config) -> int:
+        return 1
+
+    def get_total_num_kv_heads(self) -> int:
+        return 1
+
+
+def _make_vllm_config(
+    *, extra_config: dict[str, object] | None = None
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        model_config=_FakeModelConfig(),
+        parallel_config=SimpleNamespace(
+            pipeline_parallel_size=1,
+            rank=0,
+            decode_context_parallel_size=1,
+            prefill_context_parallel_size=1,
+        ),
+        kv_transfer_config=_FakeKVTransferConfig(extra_config=extra_config),
+        cache_config=SimpleNamespace(block_size=16, num_gpu_blocks=10),
+        kv_events_config=SimpleNamespace(enable_kv_cache_events=False),
+        speculative_config=None,
+    )
+
+
+def _make_kv_cache_config(*, block_size: int = 16) -> object:
+    """Minimal single-group KVCacheConfig for topology tests."""
+    from vllm.v1.kv_cache_interface import (
+        FullAttentionSpec,
+        KVCacheConfig,
+        KVCacheGroupSpec,
+    )
+
+    spec = FullAttentionSpec(
+        block_size=block_size, num_kv_heads=8, head_size=64, dtype=None
+    )
+    return KVCacheConfig(
+        num_blocks=10,
+        kv_cache_tensors=[],
+        kv_cache_groups=[KVCacheGroupSpec(["layer0"], spec)],
+    )
+
+
+def _write_mooncake_config(tmp_path, config: dict[str, object]) -> str:
+    config_path = tmp_path / "mooncake_config.json"
+    config_path.write_text(json.dumps(config), encoding="utf-8")
+    return str(config_path)
+
+
+def _install_fake_mooncake(monkeypatch, store_instance: MagicMock):
+    class FakeReplicateConfig:
+        def __init__(self) -> None:
+            self.preferred_segment = ""
+
+    fake_store_module = types.ModuleType("mooncake.store")
+    fake_store_module.MooncakeDistributedStore = lambda: store_instance  # type: ignore[attr-defined]
+    fake_store_module.ReplicateConfig = FakeReplicateConfig  # type: ignore[attr-defined]
+    fake_mooncake_module = types.ModuleType("mooncake")
+    fake_mooncake_module.store = fake_store_module  # type: ignore[attr-defined]
+    monkeypatch.setitem(sys.modules, "mooncake", fake_mooncake_module)
+    monkeypatch.setitem(sys.modules, "mooncake.store", fake_store_module)
+    return FakeReplicateConfig
+
+
+def _patch_worker_runtime(monkeypatch, *, local_ip: str = "10.0.0.7") -> None:
+    single_rank_group = SimpleNamespace(world_size=1, rank_in_group=0)
+    monkeypatch.setattr(worker, "get_mooncake_dp_engine_index", lambda _: 0)
+    monkeypatch.setattr(worker, "get_tensor_model_parallel_rank", lambda: 0)
+    monkeypatch.setattr(worker, "get_tensor_model_parallel_world_size", lambda: 1)
+    monkeypatch.setattr(worker, "get_pcp_group", lambda: single_rank_group)
+    monkeypatch.setattr(worker, "get_dcp_group", lambda: single_rank_group)
+    monkeypatch.setattr(worker, "get_ip", lambda: local_ip)
+
+
+def test_default_local_buffer_size_matches_pr40900():
+    """PR-40900 shipped a 4 GiB default for local_buffer_size; the dual-mode
+    patch preserves it (and the JSON key) so unchanged PR-40900 configs work."""
+    assert worker.DEFAULT_LOCAL_BUFFER_SIZE == 4 * 1024**3
+
+
+def test_get_requester_local_hostname_prefers_override(monkeypatch):
+    monkeypatch.setenv("MOONCAKE_REQUESTER_LOCAL_HOSTNAME", "worker-a:50053")
+
+    assert rdma_utils.get_requester_local_hostname("10.0.0.7") == "worker-a:50053"
+
+
+def test_get_configured_preferred_segment_returns_explicit_override():
+    assert (
+        rdma_utils.get_configured_preferred_segment(
+            {"preferred_segment": "10.0.0.7:50053"}
+        )
+        == "10.0.0.7:50053"
+    )
+
+
+def test_get_configured_preferred_segment_prefers_explicit_over_env(monkeypatch):
+    monkeypatch.setenv("MOONCAKE_PREFERRED_SEGMENT", "10.0.0.8:50053")
+
+    assert (
+        rdma_utils.get_configured_preferred_segment(
+            {"preferred_segment": "10.0.0.7:50053"}
+        )
+        == "10.0.0.7:50053"
+    )
+
+
+def test_get_configured_preferred_segment_returns_env_override(monkeypatch):
+    monkeypatch.setenv("MOONCAKE_PREFERRED_SEGMENT", "10.0.0.8:50053")
+
+    assert rdma_utils.get_configured_preferred_segment({}) == "10.0.0.8:50053"
+
+
+def test_get_configured_preferred_segment_rejects_empty_override():
+    with pytest.raises(ValueError, match="preferred_segment"):
+        rdma_utils.get_configured_preferred_segment({"preferred_segment": "  "})
+
+
+def test_get_configured_worker_rnic_prefers_explicit_device_name(monkeypatch):
+    store_config = worker.MooncakeStoreConfig(
+        metadata_server="",
+        local_buffer_size=1,
+        protocol="rdma",
+        device_name="rocep139s0",
+        master_server_address="",
+    )
+
+    assert (
+        rdma_utils.get_configured_worker_rnic(
+            protocol=store_config.protocol,
+            configured_device=store_config.device_name,
+        )
+        == "rocep139s0"
+    )
+
+
+def test_get_configured_worker_rnic_selects_device_from_explicit_csv(monkeypatch):
+    monkeypatch.setattr(
+        rdma_utils,
+        "get_current_physical_gpu_index",
+        lambda: 1,
+    )
+    store_config = worker.MooncakeStoreConfig(
+        metadata_server="",
+        local_buffer_size=1,
+        protocol="rdma",
+        device_name="rocep139s0,rocep140s0",
+        master_server_address="",
+    )
+
+    assert (
+        rdma_utils.get_configured_worker_rnic(
+            protocol=store_config.protocol,
+            configured_device=store_config.device_name,
+        )
+        == "rocep140s0"
+    )
+
+
+def test_get_configured_worker_rnic_warns_and_returns_empty_for_rdma_with_no_device(
+    caplog, monkeypatch
+):
+    """No device configured + protocol=rdma → emit a clear warning and return ""
+    so the C++ side handles auto-selection. There is no Python-side fallback."""
+    monkeypatch.setattr(logging.getLogger("vllm"), "propagate", True)
+    with caplog.at_level(logging.WARNING):
+        result = rdma_utils.get_configured_worker_rnic(
+            protocol="rdma",
+            configured_device="",
+        )
+    assert result == ""
+    warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
+    assert any("No RDMA devices specified" in r.message for r in warnings), (
+        f"expected fallback warning, got {[r.message for r in warnings]}"
+    )
+
+
+def test_get_configured_worker_rnic_silent_for_tcp_with_no_device(caplog, monkeypatch):
+    """protocol=tcp + no device → return "" silently (no RDMA, no warning)."""
+    monkeypatch.setattr(logging.getLogger("vllm"), "propagate", True)
+    with caplog.at_level(logging.WARNING):
+        result = rdma_utils.get_configured_worker_rnic(
+            protocol="tcp",
+            configured_device="",
+        )
+    assert result == ""
+    warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
+    assert not any("RDMA" in r.message for r in warnings), (
+        "did not expect RDMA warning for tcp protocol, got "
+        f"{[r.message for r in warnings]}"
+    )
+
+
+def test_get_configured_worker_rnic_rejects_short_explicit_csv(monkeypatch):
+    monkeypatch.setattr(
+        rdma_utils,
+        "get_current_physical_gpu_index",
+        lambda: 2,
+    )
+    with pytest.raises(ValueError, match="does not cover local GPU 2"):
+        rdma_utils.get_configured_worker_rnic(
+            protocol="rdma",
+            configured_device="rocep139s0,rocep140s0",
+        )
+
+
+class _ReplicaDesc:
+    def __init__(self, tier: str):
+        self.tier = tier
+
+    def is_memory_replica(self) -> bool:
+        return self.tier == "memory"
+
+    def is_disk_replica(self) -> bool:
+        return self.tier == "disk"
+
+    def is_local_disk_replica(self) -> bool:
+        return self.tier == "disk"
+
+
+def test_store_sending_thread_skips_request_during_cpu_pressure():
+    store = MagicMock()
+    store.batch_is_exist.side_effect = lambda keys: [0] * len(keys)
+    store.batch_put_from_multi_buffers.side_effect = [
+        [-200, -200],
+        [256, 256],
+        [256, 256],
+    ]
+    thread = _make_store_sending_thread(store)
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a0", b"a1"]))
+
+    assert thread._store_pressure_active is True
+    assert "req-a" in thread._skip_store_requests
+    assert store.batch_put_from_multi_buffers.call_count == 1
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a2", b"a3"]))
+
+    assert store.batch_put_from_multi_buffers.call_count == 1
+
+    thread.add_stored_request("req-b")
+    thread._handle_request(_make_store_req("req-b", [b"b0", b"b1"]))
+
+    assert thread._store_pressure_active is False
+    assert "req-a" not in thread._skip_store_requests
+    assert store.batch_put_from_multi_buffers.call_count == 2
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a4", b"a5"]))
+
+    assert store.batch_put_from_multi_buffers.call_count == 3
+
+
+def test_store_sending_thread_records_mooncake_metrics():
+    store = MagicMock()
+    store.batch_is_exist.return_value = [0, 0]
+    store.batch_put_from_multi_buffers.return_value = [256, 256]
+    thread = _make_store_sending_thread(store)
+    stats = MooncakeStoreConnectorStats()
+    thread._record_operation_cb = stats.record_operation
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a0", b"a1"]))
+
+    assert len(stats.data["save_exists"]) == 1
+    assert stats.data["save_exists"][0]["num_keys"] == 2
+    assert len(stats.data["save_put"]) == 1
+    assert stats.data["save_put"][0]["num_bytes"] == 512
+    assert stats.data["save_put"][0]["status"] == "ok"
+
+
+def test_store_sending_thread_only_skips_on_no_available_handle():
+    store = MagicMock()
+    store.batch_is_exist.side_effect = lambda keys: [0] * len(keys)
+    store.batch_put_from_multi_buffers.side_effect = [
+        [-500, -500],
+        [256, 256],
+    ]
+    thread = _make_store_sending_thread(store)
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a0", b"a1"]))
+
+    assert thread._store_pressure_active is False
+    assert "req-a" not in thread._skip_store_requests
+    assert store.batch_put_from_multi_buffers.call_count == 1
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a2", b"a3"]))
+
+    assert store.batch_put_from_multi_buffers.call_count == 2
+
+
+def test_store_sending_thread_passes_replicate_config_when_preferred_segment_set():
+    store = MagicMock()
+    store.batch_is_exist.side_effect = lambda keys: [0] * len(keys)
+    store.batch_put_from_multi_buffers.return_value = [256, 256]
+    replicate_config = SimpleNamespace(preferred_segment="10.0.0.7:50053")
+    thread = _make_store_sending_thread(store, replicate_config=replicate_config)
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a0", b"a1"]))
+
+    assert store.batch_put_from_multi_buffers.call_count == 1
+    call_args = store.batch_put_from_multi_buffers.call_args.args
+    assert len(call_args) == 4
+    assert call_args[3] is replicate_config
+
+
+def test_store_sending_thread_passes_default_replicate_config_when_no_preferred_segment():  # noqa: E501
+    """Without a preferred_segment the SendingThread still forwards a
+    (default-constructed) ReplicateConfig so the C++ side always sees a
+    well-defined config object."""
+    store = MagicMock()
+    store.batch_is_exist.side_effect = lambda keys: [0] * len(keys)
+    store.batch_put_from_multi_buffers.return_value = [256, 256]
+    replicate_config = SimpleNamespace()
+    thread = _make_store_sending_thread(store, replicate_config=replicate_config)
+
+    thread.add_stored_request("req-a")
+    thread._handle_request(_make_store_req("req-a", [b"a0", b"a1"]))
+
+    assert store.batch_put_from_multi_buffers.call_count == 1
+    call_args = store.batch_put_from_multi_buffers.call_args.args
+    assert len(call_args) == 4
+    assert call_args[3] is replicate_config
+
+
+def test_estimate_disk_offload_staging_bytes_sums_multi_segment_sizes():
+    assert worker._estimate_disk_offload_staging_bytes([256, 512]) == 12288
+
+
+def test_recv_thread_uses_single_batch_when_no_disk_offload_budget(monkeypatch):
+    monkeypatch.delenv("VLLM_MOONCAKE_STORE_TIER_LOG", raising=False)
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.return_value = [256, 256, 256]
+    thread = _make_store_recving_thread(store, disk_offload_buffer_budget_bytes=None)
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1", b"a2"],
+        token_len=48,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 1
+    keys, addrs, sizes = store.batch_get_into_multi_buffers.call_args.args
+    assert keys == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6130",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6131",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6132",
+    ]
+    assert sizes == [[256], [256], [256]]
+    store.batch_get_replica_desc.assert_not_called()
+
+
+def test_recv_thread_logs_tier_summary_when_enabled(monkeypatch, caplog_vllm):
+    monkeypatch.setenv("VLLM_MOONCAKE_STORE_TIER_LOG", "1")
+    caplog_vllm.set_level(logging.INFO, logger=worker.logger.name)
+
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.return_value = [256, 256, -10]
+    thread = _make_store_recving_thread(store, disk_offload_buffer_budget_bytes=None)
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1", b"a2"],
+        token_len=48,
+    )
+    expected_keys = [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6130",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6131",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6132",
+    ]
+    store.batch_get_replica_desc.return_value = {
+        expected_keys[0]: [_ReplicaDesc("memory")],
+        expected_keys[1]: [_ReplicaDesc("disk")],
+        expected_keys[2]: [],
+    }
+
+    thread._handle_request(req)
+
+    assert store.batch_get_replica_desc.call_args.args == (expected_keys,)
+    assert store.method_calls[0][0] == "batch_get_replica_desc"
+    assert store.method_calls[1][0] == "batch_get_into_multi_buffers"
+
+    messages = [record.getMessage() for record in caplog_vllm.records]
+    assert any(
+        "Mooncake load tier summary" in message
+        and "req_id=req-a" in message
+        and "batch_keys=3" in message
+        and "memory_keys=1" in message
+        and "disk_keys=1" in message
+        and "unknown_keys=1" in message
+        and "success_keys=2" in message
+        and "failed_keys=1" in message
+        and "bytes_by_tier={'memory': 256, 'disk': 256, 'unknown': 0}" in message
+        for message in messages
+    )
+
+
+def test_recv_thread_records_partial_failure_metrics(monkeypatch):
+    monkeypatch.delenv("VLLM_MOONCAKE_STORE_TIER_LOG", raising=False)
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.return_value = [256, -10]
+    thread = _make_store_recving_thread(store, disk_offload_buffer_budget_bytes=None)
+    stats = MooncakeStoreConnectorStats()
+    thread._record_operation_cb = stats.record_operation
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1"],
+        token_len=32,
+    )
+
+    thread._handle_request(req)
+
+    assert len(stats.data["load_get"]) == 1
+    assert stats.data["load_get"][0]["num_keys"] == 2
+    assert stats.data["load_get"][0]["num_bytes"] == 512
+    assert stats.data["load_get"][0]["status"] == "partial_failure"
+    assert stats.data["load_get"][0]["num_failed_keys"] == 1
+
+
+def test_recv_thread_uses_ratio_scaled_budget_for_first_pass_split():
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.side_effect = [
+        [256],
+        [256],
+    ]
+    thread = _make_store_recving_thread(
+        store,
+        disk_offload_buffer_budget_bytes=2 * _DISK_OFFLOAD_SINGLE_KEY_BYTES,
+    )
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1"],
+        token_len=32,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 2
+    first_keys = store.batch_get_into_multi_buffers.call_args_list[0].args[0]
+    second_keys = store.batch_get_into_multi_buffers.call_args_list[1].args[0]
+    assert first_keys == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6130",
+    ]
+    assert second_keys == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6131",
+    ]
+
+
+def test_recv_thread_splits_disk_offload_loads_by_budget():
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.side_effect = [
+        [256, 256],
+        [256],
+    ]
+    thread = _make_store_recving_thread(
+        store,
+        disk_offload_buffer_budget_bytes=_DISK_OFFLOAD_BUDGET_FOR_SPLIT,
+    )
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1", b"a2"],
+        token_len=48,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 2
+
+    first_keys = store.batch_get_into_multi_buffers.call_args_list[0].args[0]
+    second_keys = store.batch_get_into_multi_buffers.call_args_list[1].args[0]
+    first_addrs = store.batch_get_into_multi_buffers.call_args_list[0].args[1]
+    second_addrs = store.batch_get_into_multi_buffers.call_args_list[1].args[1]
+    first_sizes = store.batch_get_into_multi_buffers.call_args_list[0].args[2]
+    second_sizes = store.batch_get_into_multi_buffers.call_args_list[1].args[2]
+    assert first_keys == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6130",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6131",
+    ]
+    assert second_keys == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6132",
+    ]
+    base_addr = thread.token_databases[0].kv_caches_base_addr[0]
+    block_len = thread.token_databases[0].block_len[0]
+    assert first_addrs == [[base_addr], [base_addr + block_len]]
+    assert second_addrs == [[base_addr + 2 * block_len]]
+    expected_size = block_len
+    assert first_sizes == [[expected_size], [expected_size]]
+    assert second_sizes == [[expected_size]]
+
+
+def test_recv_thread_stops_after_first_failing_disk_offload_sub_batch():
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.return_value = [-10, -10]
+    thread = _make_store_recving_thread(
+        store,
+        disk_offload_buffer_budget_bytes=_DISK_OFFLOAD_BUDGET_FOR_SPLIT,
+    )
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1", b"a2"],
+        token_len=48,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 1
+
+
+def test_recv_thread_skips_split_when_budget_holds_all_keys():
+    """PR-36 removed the count-based split trigger; with budget for 3 keys,
+    all three should be requested in a single call."""
+    store = MagicMock()
+    store.batch_get_into_multi_buffers.return_value = [256, 256, 256]
+    thread = _make_store_recving_thread(
+        store,
+        disk_offload_buffer_budget_bytes=_DISK_OFFLOAD_BUDGET_FOR_THREE_KEYS,
+    )
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0", b"a1", b"a2"],
+        token_len=48,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 1
+    assert store.batch_get_into_multi_buffers.call_args_list[0].args[0] == [
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6130",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6131",
+        "test-model@tp_rank:0@pcp0@dcp0@pp_rank:0@group:0@6132",
+    ]
+
+
+def test_recv_thread_reports_unsplittable_key_larger_than_budget():
+    store = MagicMock()
+    thread = _make_store_recving_thread(
+        store,
+        disk_offload_buffer_budget_bytes=_DISK_OFFLOAD_BUDGET_TOO_SMALL,
+    )
+
+    req = _make_load_req(
+        "req-a",
+        [b"a0"],
+        token_len=16,
+    )
+
+    thread._handle_request(req)
+
+    assert store.batch_get_into_multi_buffers.call_count == 0
+
+
+def test_requester_worker_init_uses_positional_setup(tmp_path, monkeypatch):
+    store = MagicMock()
+    store.setup.return_value = 0
+    _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "metadata_server": "http://metadata/endpoint",
+                "global_segment_size": "4gb",
+                "local_buffer_size": "64mb",
+                "protocol": "rdma",
+                "device_name": "mlx5_0",
+                "master_server_address": "10.0.0.7:50051",
+                "enable_offload": True,
+            },
+        ),
+    )
+    w = worker.MooncakeStoreWorker(_make_vllm_config(), _make_kv_cache_config())
+
+    assert not hasattr(w, "_isolate_offload_resources")
+    assert store.setup.call_args.args == (
+        "10.0.0.7",
+        "http://metadata/endpoint",
+        4 * 1024 * 1024 * 1024,  # global_segment_size: "4gb" honored
+        64 * 1024 * 1024,
+        "rdma",
+        "mlx5_0",
+        "10.0.0.7:50051",
+    )
+
+
+def test_requester_worker_init_prefers_local_hostname_override(
+    tmp_path,
+    monkeypatch,
+):
+    store = MagicMock()
+    store.setup.return_value = 0
+    _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv("MOONCAKE_REQUESTER_LOCAL_HOSTNAME", "worker-a:50053")
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "metadata_server": "http://metadata/endpoint",
+                "local_buffer_size": "64mb",
+                "protocol": "tcp",
+                "device_name": "",
+                "master_server_address": "10.0.0.7:50051",
+            },
+        ),
+    )
+    worker.MooncakeStoreWorker(_make_vllm_config(), _make_kv_cache_config())
+
+    assert store.setup.call_args.args[0] == "worker-a:50053"
+
+
+def test_requester_worker_init_skips_disk_budget_when_offload_disabled(
+    tmp_path,
+    monkeypatch,
+):
+    """enable_offload=False zeroes out the disk budget so we don't generate
+    redundant owner GET-RPCs."""
+    store = MagicMock()
+    store.setup.return_value = 0
+    _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "metadata_server": "http://metadata/endpoint",
+                "protocol": "tcp",
+                "device_name": "",
+                "master_server_address": "10.0.0.7:50051",
+                "enable_offload": False,
+            },
+        ),
+    )
+    w = worker.MooncakeStoreWorker(_make_vllm_config(), _make_kv_cache_config())
+
+    assert w.disk_offload_buffer_budget_bytes is None
+
+
+def test_requester_worker_init_builds_replicate_config_for_preferred_segment(
+    tmp_path,
+    monkeypatch,
+):
+    store = MagicMock()
+    store.setup.return_value = 0
+    fake_replicate_config_cls = _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "metadata_server": "http://metadata/endpoint",
+                "protocol": "tcp",
+                "device_name": "",
+                "master_server_address": "10.0.0.7:50051",
+            },
+        ),
+    )
+    w = worker.MooncakeStoreWorker(
+        _make_vllm_config(
+            extra_config={
+                "preferred_segment": "10.0.0.7:50053",
+            }
+        ),
+        _make_kv_cache_config(),
+    )
+
+    assert isinstance(w.store_replicate_config, fake_replicate_config_cls)
+    assert w.store_replicate_config.preferred_segment == "10.0.0.7:50053"
+
+
+# ---------------------------------------------------------------------------
+# Helpers for register_kv_caches tests
+# ---------------------------------------------------------------------------
+
+
+def test_store_sending_thread_clamps_token_len_to_lcm():
+    """Partial chunks past the last lcm boundary aren't stored — cache hits
+    are always lcm-aligned (mirrors HybridKVCacheCoordinator)."""
+    store = MagicMock()
+    store.batch_is_exist.return_value = [0, 0]
+    store.batch_put_from_multi_buffers.return_value = [256, 256]
+    # Default coord: single full-attn block_size=16, lcm=16.
+    # token_len_chunk=33 clamps to 32 → 2 chunks (not 3 with a partial 1-token chunk).
+    thread = _make_store_sending_thread(store)
+
+    thread.add_stored_request("r0")
+    thread._handle_request(
+        ReqMeta(
+            req_id="r0",
+            token_len_chunk=33,
+            block_ids=([0, 1, 2],),
+            block_hashes=[b"a0", b"a1", b"a2"],
+            can_save=True,
+            original_block_size=16,
+        )
+    )
+
+    keys = store.batch_put_from_multi_buffers.call_args.args[0]
+    assert len(keys) == 2
+
+
+def test_store_sending_thread_skips_when_token_len_below_lcm():
+    """Requests shorter than lcm_block_size cannot produce any aligned chunk,
+    so neither the existence check nor the put should be issued."""
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec
+
+    store = MagicMock()
+    # lcm=64 via single full-attn block_size=64.
+    spec = FullAttentionSpec(block_size=64, num_kv_heads=8, head_size=64, dtype=None)
+    coord = mooncake_store_worker.MooncakeStoreCoordinator(
+        [KVCacheGroupSpec(["L"], spec)],
+        scheduler_block_size=64,
+        hash_block_size=64,
+    )
+    db = ChunkedTokenDatabase(
+        KeyMetadata("test-model", 0, 0, 0, 0, group_id=0),
+        block_size=64,
+        hash_block_size=64,
+    )
+    db.set_kv_caches_base_addr([0x1000])
+    db.set_block_len([1024])
+    thread = _make_store_sending_thread(
+        store, coord=coord, token_databases=[db], block_size=64
+    )
+
+    thread.add_stored_request("r0")
+    thread._handle_request(
+        ReqMeta(
+            req_id="r0",
+            token_len_chunk=32,
+            block_ids=([0, 1],),
+            block_hashes=[b"a0", b"a1"],
+            can_save=True,
+            original_block_size=64,
+        )
+    )
+
+    store.batch_is_exist.assert_not_called()
+    store.batch_put_from_multi_buffers.assert_not_called()
+    assert thread.stored_requests["r0"] == 0
+
+
+def test_store_sending_thread_only_stores_swa_blocks_in_window():
+    """For SWA groups, only blocks within ``sliding_window`` of an
+    lcm-aligned boundary are stored — a block outside every such window
+    can never participate in any future hit (mirrors recv-side masking).
+
+    Setup: full-attn (block_size=32) + SWA (block_size=8, sliding_window=8)
+    → lcm=32. With token_len=64 there are two LCM boundaries (32, 64);
+    the SWA group should store exactly one block per boundary: the block
+    ending at 32 and the block ending at 64.
+    """
+    from vllm.v1.kv_cache_interface import (
+        FullAttentionSpec,
+        KVCacheGroupSpec,
+        SlidingWindowSpec,
+    )
+
+    store = MagicMock()
+    store.batch_is_exist.side_effect = lambda keys: [0] * len(keys)
+    store.batch_put_from_multi_buffers.side_effect = lambda keys, addrs, sizes: (
+        [256] * len(keys)
+    )
+
+    full_spec = FullAttentionSpec(
+        block_size=32, num_kv_heads=8, head_size=64, dtype=None
+    )
+    swa_spec = SlidingWindowSpec(
+        block_size=8,
+        num_kv_heads=8,
+        head_size=64,
+        dtype=None,
+        sliding_window=8,
+    )
+    coord = mooncake_store_worker.MooncakeStoreCoordinator(
+        [KVCacheGroupSpec(["L0"], full_spec), KVCacheGroupSpec(["L1"], swa_spec)],
+        scheduler_block_size=32,
+        hash_block_size=8,
+    )
+
+    db_full = ChunkedTokenDatabase(
+        KeyMetadata("test-model", 0, 0, 0, 0, group_id=0),
+        block_size=32,
+        hash_block_size=8,
+    )
+    db_full.set_kv_caches_base_addr([0x1000])
+    db_full.set_block_len([512])
+    db_swa = ChunkedTokenDatabase(
+        KeyMetadata("test-model", 0, 0, 0, 0, group_id=1),
+        block_size=8,
+        hash_block_size=8,
+    )
+    db_swa.set_kv_caches_base_addr([0x2000])
+    db_swa.set_block_len([128])
+
+    thread = _make_store_sending_thread(
+        store,
+        coord=coord,
+        token_databases=[db_full, db_swa],
+        block_size=32,
+    )
+
+    hs = [bytes([i + 1]) * 4 for i in range(8)]
+    thread.add_stored_request("r0")
+    thread._handle_request(
+        ReqMeta(
+            req_id="r0",
+            token_len_chunk=64,
+            block_ids=([0, 1], list(range(8))),
+            block_hashes=hs,
+            can_save=True,
+            original_block_size=32,
+        )
+    )
+
+    keys = store.batch_put_from_multi_buffers.call_args.args[0]
+    full_keys = [k for k in keys if "@group:0" in k]
+    swa_keys = [k for k in keys if "@group:1" in k]
+    # Full-attn: 2 blocks (chunks ending at 32 and 64).
+    assert len(full_keys) == 2
+    # SWA: only the two blocks ending at lcm boundaries (32 and 64), i.e.
+    # blocks covering tokens [24,32) and [56,64) — hashes hs[3] and hs[7].
+    assert len(swa_keys) == 2
+    swa_hashes = {k.rsplit("@", 1)[-1] for k in swa_keys}
+    assert swa_hashes == {hs[3].hex(), hs[7].hex()}
+
+
+def _auto_set_ready_event(*args, **kwargs):
+    """Side effect for mocked thread constructors that auto-sets ready_event."""
+    for arg in args:
+        if isinstance(arg, threading.Event):
+            arg.set()
+    for val in kwargs.values():
+        if isinstance(val, threading.Event):
+            val.set()
+    return MagicMock()
+
+
+def _register_with_mocked_threads(
+    worker: mooncake_store_worker.MooncakeStoreWorker,
+    kv_caches: dict[str, torch.Tensor],
+) -> None:
+    """Call register_kv_caches with the I/O transfer threads mocked out."""
+    prefix = "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.worker."
+    with (
+        patch(prefix + "KVCacheStoreSendingThread", side_effect=_auto_set_ready_event),
+        patch(prefix + "KVCacheStoreRecvingThread", side_effect=_auto_set_ready_event),
+    ):
+        worker.register_kv_caches(kv_caches)
+
+
+def _make_bare_worker(
+    *,
+    num_gpu_blocks: int = 10,
+    block_size: int = 16,
+    kv_role: str = "kv_both",
+) -> mooncake_store_worker.MooncakeStoreWorker:
+    """Construct a MooncakeStoreWorker via __new__, bypassing __init__.
+
+    Sets only the attributes that register_kv_caches() reads so we can
+    test the stride-based layout detection without a real
+    MooncakeDistributedStore.
+    """
+    worker = object.__new__(mooncake_store_worker.MooncakeStoreWorker)
+    worker.cache_config = MagicMock()
+    worker.cache_config.num_gpu_blocks = num_gpu_blocks
+    worker.store = MagicMock()
+    worker.store.register_buffer.return_value = 0
+    worker.use_mla = False
+    worker.kv_role = kv_role
+    worker.block_size = block_size
+    worker.tp_rank = 0
+    worker.put_step = 1
+    worker.enable_kv_events = False
+    worker.kv_send_thread = None
+    worker.kv_recv_thread = None
+    worker.tp_size = 1
+    worker.num_kv_head = 1
+    worker.pp_size = 1
+    # Minimal single-full-attention-group config so the coordinator-based
+    # lookup path works (the connector no longer carries a legacy single-group
+    # path; everything flows through the coordinator).
+    from vllm.v1.kv_cache_interface import (
+        FullAttentionSpec,
+        KVCacheGroupSpec,
+    )
+
+    worker.disk_offload_buffer_budget_bytes = None
+    worker.store_replicate_config = SimpleNamespace()
+    worker._kv_connector_stats_lock = threading.Lock()
+    worker.kv_connector_stats = MooncakeStoreConnectorStats()
+
+    spec = FullAttentionSpec(
+        block_size=block_size, num_kv_heads=8, head_size=64, dtype=None
+    )
+    group = KVCacheGroupSpec(["layer0", "__cross_layer__"], spec)
+    worker._kv_cache_groups = [group]
+    worker.pcp_size = 1
+    worker.dcp_size = 1
+    worker.hash_block_size = block_size
+    worker.metadata = KeyMetadata("test-model", 0, 0, 0, 0)
+    # Pre-build a single-group token_dbs so lookup-only tests don't have to
+    # call register_kv_caches.
+    worker.token_dbs = [
+        ChunkedTokenDatabase(
+            KeyMetadata("test-model", 0, 0, 0, 0, group_id=0),
+            block_size=block_size,
+            hash_block_size=block_size,
+        )
+    ]
+    worker.coord = mooncake_store_worker.MooncakeStoreCoordinator(
+        worker._kv_cache_groups,
+        scheduler_block_size=block_size,
+        hash_block_size=block_size,
+    )
+    return worker
+
+
+def test_lookup_partial_prefix_returns_first_hit_length():
+    worker = _make_bare_worker()
+    worker.store.batch_is_exist.return_value = [1, 1, 0]
+    assert worker.lookup(48, [b"a0", b"a1", b"a2"]) == 32
+
+
+def test_lookup_swa_single_group_returns_full_when_tail_window_present():
+    """Single-SWA, sliding_window=32 (= 2 blocks): producer stored only the
+    tail. Coordinator-driven lookup returns full prefix even though the
+    pre-window blocks are absent."""
+    from vllm.v1.kv_cache_interface import KVCacheGroupSpec, SlidingWindowSpec
+
+    worker = _make_bare_worker(block_size=16)
+    swa = SlidingWindowSpec(
+        block_size=16, num_kv_heads=8, head_size=64, dtype=None, sliding_window=32
+    )
+    worker._kv_cache_groups = [KVCacheGroupSpec(["layer0"], swa)]
+    worker.coord = mooncake_store_worker.MooncakeStoreCoordinator(
+        worker._kv_cache_groups,
+        scheduler_block_size=worker.hash_block_size,
+        hash_block_size=worker.hash_block_size,
+    )
+    worker.store.batch_is_exist.return_value = [0, 0, 1, 1]
+    assert worker.lookup(64, [b"h0", b"h1", b"h2", b"h3"]) == 64
+
+
+# ---------------------------------------------------------------------------
+# register_kv_caches tests
+# ---------------------------------------------------------------------------
+
+
+def test_register_kv_caches_blocks_first_single_segment():
+    """Blocks-first layout (FlashInfer/MLA): one segment per layer."""
+    num_blocks = 10
+    page_size_elements = 64
+    worker = _make_bare_worker(num_gpu_blocks=num_blocks)
+
+    # Shape: (num_blocks, page_size_elements) — blocks outermost, no outer_dims
+    tensor = torch.zeros(num_blocks, page_size_elements, dtype=torch.float16)
+    _register_with_mocked_threads(worker, {"layer0": tensor})
+
+    db = worker.token_dbs[0]
+    assert db.kv_caches_base_addr == [tensor.untyped_storage().data_ptr()]
+    assert db.block_len == [tensor.untyped_storage().nbytes() // num_blocks]
+    worker.store.register_buffer.assert_called_once_with(
+        tensor.untyped_storage().data_ptr(),
+        tensor.untyped_storage().nbytes(),
+    )
+
+
+def test_register_kv_caches_kv_first_two_segments():
+    """K/V-first layout (FlashAttn): two segments (K, V) per layer."""
+    num_blocks = 10
+    block_size_tokens = 16
+    num_kv_heads = 4
+    head_size = 8
+    worker = _make_bare_worker(num_gpu_blocks=num_blocks)
+
+    # Shape: (2, num_blocks, block_size, num_kv_heads, head_size) — K/V outermost
+    tensor = torch.zeros(
+        2,
+        num_blocks,
+        block_size_tokens,
+        num_kv_heads,
+        head_size,
+        dtype=torch.float16,
+    )
+    _register_with_mocked_threads(worker, {"layer0": tensor})
+
+    db = worker.token_dbs[0]
+    seg_stride = tensor.stride(0) * tensor.element_size()
+    base = tensor.untyped_storage().data_ptr()
+    assert db.kv_caches_base_addr == [base, base + seg_stride]
+    assert db.block_len == [seg_stride // num_blocks] * 2
+
+
+def test_register_kv_caches_cross_layer_single_segment():
+    """Cross-layer tensor: single segment with block_len = page_size * num_layers."""
+    num_blocks = 10
+    num_layers = 4
+    per_layer_page_elements = 64  # elements per layer per block
+
+    worker = _make_bare_worker(num_gpu_blocks=num_blocks)
+
+    # Cross-layer blocks-first tensor: all layers packed into a single
+    # contiguous block.  Shape (num_blocks, num_layers * per_layer_page)
+    # mimics the physical layout after stride reordering.
+    total_page_elements = num_layers * per_layer_page_elements
+    tensor = torch.zeros(num_blocks, total_page_elements, dtype=torch.float16)
+
+    with (
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "worker.KVCacheStoreSendingThread",
+            side_effect=_auto_set_ready_event,
+        ),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "worker.KVCacheStoreRecvingThread",
+            side_effect=_auto_set_ready_event,
+        ),
+    ):
+        # Use the cross-layer wrapper key, same as register_cross_layers_kv_caches
+        worker.register_kv_caches({"__cross_layer__": tensor})
+
+    db = worker.token_dbs[0]
+    assert len(db.kv_caches_base_addr) == 1
+    assert db.kv_caches_base_addr[0] == tensor.untyped_storage().data_ptr()
+
+    expected_block_len = tensor.untyped_storage().nbytes() // num_blocks
+    # block_len should be per_layer_page_size * num_layers
+    assert (
+        expected_block_len
+        == num_layers * per_layer_page_elements * tensor.element_size()
+    )
+    assert len(db.block_len) == 1
+    assert db.block_len[0] == expected_block_len
+
+    # Also verify via register_cross_layers_kv_caches wrapper
+    worker2 = _make_bare_worker(num_gpu_blocks=num_blocks)
+    with (
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "worker.KVCacheStoreSendingThread",
+            side_effect=_auto_set_ready_event,
+        ),
+        patch(
+            "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store."
+            "worker.KVCacheStoreRecvingThread",
+            side_effect=_auto_set_ready_event,
+        ),
+    ):
+        worker2.register_cross_layers_kv_caches(tensor)
+
+    db2 = worker2.token_dbs[0]
+    assert db2.kv_caches_base_addr == db.kv_caches_base_addr
+    assert db2.block_len == db.block_len
+
+
+# ---------------------------------------------------------------------------
+# Dual-mode (embedded / standalone-store) config validation tests
+# ---------------------------------------------------------------------------
+
+
+def _make_config(**overrides):
+    """Build a MooncakeStoreConfig with sensible defaults for validation tests.
+
+    Required dataclass fields are populated; callers override only the field
+    under test.
+    """
+    base = dict(
+        metadata_server="http://metadata/endpoint",
+        master_server_address="10.0.0.7:50051",
+        protocol="rdma",
+        device_name="mlx5_0",
+    )
+    base.update(overrides)
+    return worker.MooncakeStoreConfig(**base)
+
+
+def test_config_defaults_to_embedded():
+    """A JSON without explicit mode parses as embedded with 4 GiB segment."""
+    cfg = _make_config()
+    assert cfg.mode == "embedded"
+    assert cfg.global_segment_size == worker.DEFAULT_GLOBAL_SEGMENT_SIZE
+    assert cfg.local_buffer_size == worker.DEFAULT_LOCAL_BUFFER_SIZE
+    assert cfg.enable_offload is False
+
+
+def test_config_pr40900_unchanged(tmp_path):
+    """A literal PR-40900 config (no mode, no enable_offload, no preferred_segment)
+    parses without raising and resolves to embedded mode."""
+    config_path = _write_mooncake_config(
+        tmp_path,
+        {
+            "metadata_server": "http://metadata/endpoint",
+            "global_segment_size": "4GB",
+            "local_buffer_size": "4GB",
+            "protocol": "rdma",
+            "device_name": "mlx5_0",
+            "master_server_address": "10.0.0.7:50051",
+        },
+    )
+    cfg = worker.MooncakeStoreConfig.from_file(config_path)
+    assert cfg.mode == "embedded"
+    assert cfg.global_segment_size == 4 * 1024**3
+    assert cfg.local_buffer_size == 4 * 1024**3
+    assert cfg.enable_offload is False
+
+
+def test_config_embedded_rejects_zero_segment():
+    with pytest.raises(
+        ValueError, match=r"embedded mode requires global_segment_size > 0"
+    ):
+        _make_config(mode="embedded", global_segment_size=0)
+
+
+def test_config_standalone_store_rejects_nonzero_segment():
+    with pytest.raises(
+        ValueError,
+        match=r"standalone-store mode requires global_segment_size == 0",
+    ):
+        _make_config(mode="standalone-store", global_segment_size=4 * 1024**3)
+
+
+def test_config_standalone_store_accepts_zero_segment():
+    cfg = _make_config(mode="standalone-store", global_segment_size=0)
+    assert cfg.mode == "standalone-store"
+    assert cfg.global_segment_size == 0
+
+
+def test_config_unknown_mode():
+    with pytest.raises(ValueError, match=r"unknown Mooncake mode"):
+        _make_config(mode="something-else")
+
+
+def test_config_zero_local_buffer():
+    with pytest.raises(ValueError, match=r"local_buffer_size must be > 0"):
+        _make_config(local_buffer_size=0)
+
+
+# ---------------------------------------------------------------------------
+# End-to-end topology tests
+# Covers the two supported recipes:
+#   (A) standalone-store mode + disk offload (mode="standalone-store",
+#       segment=0, enable_offload=true, preferred_segment set)
+#   (B) embedded mode + CPU only      (mode default, segment>0,
+#       enable_offload=false, no preferred_segment)
+# ---------------------------------------------------------------------------
+
+
+def test_topology_standalone_store_with_disk_offload(tmp_path, monkeypatch):
+    """standalone-store + disk: global_segment_size=0, enable_offload=True,
+    preferred_segment set. Assert setup() positional args, ReplicateConfig
+    wiring, and that the disk-offload buffer budget is allocated."""
+    store = MagicMock()
+    store.setup.return_value = 0
+    fake_replicate_config_cls = _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "mode": "standalone-store",
+                "metadata_server": "http://metadata/endpoint",
+                "global_segment_size": 0,
+                "local_buffer_size": "1GB",
+                "protocol": "rdma",
+                "device_name": "mlx5_0",
+                "master_server_address": "10.0.0.7:50051",
+                "enable_offload": True,
+            },
+        ),
+    )
+
+    w = worker.MooncakeStoreWorker(
+        _make_vllm_config(extra_config={"preferred_segment": "10.0.0.7:50053"}),
+        _make_kv_cache_config(),
+    )
+
+    # setup() receives global_segment_size=0 and the configured local buffer.
+    assert store.setup.call_args.args == (
+        "10.0.0.7",
+        "http://metadata/endpoint",
+        0,
+        1024 * 1024 * 1024,
+        "rdma",
+        "mlx5_0",
+        "10.0.0.7:50051",
+    )
+    # ReplicateConfig is built and carries the preferred_segment.
+    assert isinstance(w.store_replicate_config, fake_replicate_config_cls)
+    assert w.store_replicate_config.preferred_segment == "10.0.0.7:50053"
+    # Disk-offload staging budget is allocated (enable_offload=True).
+    assert w.disk_offload_buffer_budget_bytes is not None
+    assert w.disk_offload_buffer_budget_bytes > 0
+
+
+def test_topology_embedded_cpu_only(tmp_path, monkeypatch):
+    """embedded + CPU-only: no mode key (defaults to embedded),
+    global_segment_size>0, enable_offload absent, no preferred_segment.
+    This is the PR-40900 baseline recipe."""
+    store = MagicMock()
+    store.setup.return_value = 0
+    fake_replicate_config_cls = _install_fake_mooncake(monkeypatch, store)
+    _patch_worker_runtime(monkeypatch)
+    monkeypatch.setenv(
+        "MOONCAKE_CONFIG_PATH",
+        _write_mooncake_config(
+            tmp_path,
+            {
+                "metadata_server": "http://metadata/endpoint",
+                "global_segment_size": "4GB",
+                "local_buffer_size": "4GB",
+                "protocol": "rdma",
+                "device_name": "mlx5_0",
+                "master_server_address": "10.0.0.7:50051",
+            },
+        ),
+    )
+
+    w = worker.MooncakeStoreWorker(_make_vllm_config(), _make_kv_cache_config())
+
+    # setup() receives global_segment_size=4 GiB (rank contributes a segment).
+    assert store.setup.call_args.args == (
+        "10.0.0.7",
+        "http://metadata/endpoint",
+        4 * 1024 * 1024 * 1024,
+        4 * 1024 * 1024 * 1024,
+        "rdma",
+        "mlx5_0",
+        "10.0.0.7:50051",
+    )
+    # No preferred_segment — ReplicateConfig is default-constructed (so the
+    # preferred_segment field keeps its default value).
+    assert w.preferred_segment is None
+    assert isinstance(w.store_replicate_config, fake_replicate_config_cls)
+    assert w.store_replicate_config.preferred_segment == ""
+    # No disk budget — enable_offload was absent (defaults to False).
+    assert w.disk_offload_buffer_budget_bytes is None
+
+
+# ---------------------------------------------------------------------------
+# Stats/metrics tests (PR-35 port)
+# ---------------------------------------------------------------------------
+
+
+def test_mooncake_store_stats_aggregate_reduce():
+    stats = MooncakeStoreConnectorStats()
+    stats.record_operation("save_put", 0.01, 2, num_bytes=128)
+    other = MooncakeStoreConnectorStats()
+    other.record_operation(
+        "save_put",
+        0.03,
+        1,
+        num_bytes=64,
+        status="error",
+        num_failed_keys=1,
+    )
+
+    reduced = stats.aggregate(other).reduce()
+
+    assert reduced["save_put_count"] == 2
+    assert reduced["save_put_total_keys"] == 3
+    assert reduced["save_put_total_bytes"] == 192
+    assert reduced["save_put_failed_keys"] == 1
+    assert reduced["save_put_error_count"] == 1
+
+
+def test_worker_get_kv_connector_stats_resets_after_read():
+    worker = _make_bare_worker()
+    worker._record_kv_connector_operation(
+        "save_put",
+        0.01,
+        2,
+        num_bytes=128,
+    )
+
+    stats = worker.get_kv_connector_stats()
+
+    assert isinstance(stats, MooncakeStoreConnectorStats)
+    assert stats.data["save_put"][0]["num_bytes"] == 128
+    assert worker.get_kv_connector_stats() is None
+
+
+def test_lookup_records_mooncake_metrics():
+    worker = _make_bare_worker()
+    worker.store.batch_is_exist.return_value = [1, 1]
+
+    result = worker.lookup(32, [b"a0", b"a1"])
+    stats = worker.get_kv_connector_stats()
+
+    assert result == 32
+    assert isinstance(stats, MooncakeStoreConnectorStats)
+    assert len(stats.data["lookup_exists"]) == 1
+    assert stats.data["lookup_exists"][0]["num_keys"] == 2
diff --git a/tests/v1/kv_connector/unit/test_moriio_connector.py b/tests/v1/kv_connector/unit/test_moriio_connector.py
index 902957e18309..da78b62b9a03 100644
--- a/tests/v1/kv_connector/unit/test_moriio_connector.py
+++ b/tests/v1/kv_connector/unit/test_moriio_connector.py
@@ -3,6 +3,7 @@
 import importlib.util
 import os
 import subprocess
+import uuid
 from unittest.mock import MagicMock, patch
 
 import msgspec
@@ -36,9 +37,15 @@
     get_ip,
     make_zmq_path,
 )
+from vllm.v1.kv_cache_interface import KVCacheConfig
 
 from .utils import create_request, create_scheduler
 
+
+def _make_test_kv_cache_config() -> KVCacheConfig:
+    return KVCacheConfig(num_blocks=0, kv_cache_tensors=[], kv_cache_groups=[])
+
+
 aiter_available = importlib.util.find_spec("aiter") is not None
 mori_available = importlib.util.find_spec("mori") is not None
 
@@ -99,6 +106,11 @@ def _setup_kv_transfer_request(
             "remote_engine_id": "test_engine",
         }
     )
+    zmq_addr = f"host:{remote_host},handshake:{fake_port},notify:{fake_port}"
+    fake_uuid = uuid.uuid4().hex
+    request.request_id = (
+        f"___prefill_addr_{zmq_addr}___decode_addr_{zmq_addr}_{fake_uuid}"
+    )
     return request
 
 
@@ -254,13 +266,14 @@ def test_write_mode_saves_local_block_ids():
         do_remote_decode=True,
         do_remote_prefill=False,
     )
+
+    # Setup KV transfer params and embed ZMQ addrs in request_id before
+    # adding to scheduler so the ID is consistent everywhere.
+    request = _setup_kv_transfer_request(request)
     request_id = request.request_id
 
     scheduler.add_request(request)
 
-    # Fake Config
-    request = _setup_kv_transfer_request(request)
-
     # Remote Prefill, triggers MoRIIOConnectorMetadata.
     scheduler_output = scheduler.schedule()
     kv_connector_metadata = scheduler_output.kv_connector_metadata
@@ -312,13 +325,14 @@ def test_write_mode_with_chunked_prefill_saves_local_block_ids():
         do_remote_decode=True,
         do_remote_prefill=False,
     )
+
+    # Setup KV transfer params and embed ZMQ addrs in request_id before
+    # adding to scheduler so the ID is consistent everywhere.
+    request = _setup_kv_transfer_request(request)
     request_id = request.request_id
 
     scheduler.add_request(request)
 
-    # Fake Config
-    request = _setup_kv_transfer_request(request)
-
     # Remote Prefill with chunked prefill, triggers multiple schedules.
     expected_counts = [(0, 0, 0), (0, 0, 0), (1, 0, 0)]
     kv_connector_metadata = None
@@ -363,6 +377,10 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode):
         do_remote_decode=False,
         do_remote_prefill=True,
     )
+
+    # Setup KV transfer params and embed ZMQ addrs in request_id before
+    # adding to scheduler so the ID is consistent everywhere.
+    request = _setup_kv_transfer_request(request)
     request_id = request.request_id
 
     scheduler.add_request(request)
@@ -370,8 +388,6 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode):
         0
     ].req_to_blocks[request_id]
 
-    request = _setup_kv_transfer_request(request)
-
     # Set remote block ids to be fetched.
     request.kv_transfer_params["remote_block_ids"] = block_list
 
@@ -452,7 +468,11 @@ def test_register_kv_caches(mock_parallel_groups):
         )
 
         with set_current_vllm_config(vllm_config):
-            connector = MoRIIOConnector(vllm_config, KVConnectorRole.WORKER)
+            connector = MoRIIOConnector(
+                vllm_config,
+                KVConnectorRole.WORKER,
+                _make_test_kv_cache_config(),
+            )
             connector.connector_worker = FakeMoRIIOConnectorWorker(
                 vllm_config, connector.engine_id, hand_shake_latency=0
             )
@@ -544,7 +564,11 @@ def test_moriio_handshake_returns_metadata(mock_parallel_groups):
             }
         )
         with set_current_vllm_config(vllm_config):
-            connector = MoRIIOConnector(vllm_config, KVConnectorRole.WORKER)
+            connector = MoRIIOConnector(
+                vllm_config,
+                KVConnectorRole.WORKER,
+                _make_test_kv_cache_config(),
+            )
 
         # Execute register_kv_caches
         connector.register_kv_caches(kv_caches)
diff --git a/tests/v1/kv_connector/unit/test_multi_connector.py b/tests/v1/kv_connector/unit/test_multi_connector.py
index 671a80137b63..3c1f7cfa9a39 100644
--- a/tests/v1/kv_connector/unit/test_multi_connector.py
+++ b/tests/v1/kv_connector/unit/test_multi_connector.py
@@ -8,20 +8,25 @@
 from unittest.mock import MagicMock
 
 import pytest
+import torch
 
 from tests.v1.kv_connector.unit.utils import create_vllm_config
 from vllm import LLM, SamplingParams
 from vllm.config import KVTransferConfig
 from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
-from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorBase_V1
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorBase_V1,
+    SupportsHMA,
+    supports_hma,
+)
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
 from vllm.distributed.kv_transfer.kv_connector.v1.multi_connector import (
     MultiConnector,
     MultiKVConnectorStats,
     MultiKVConnectorWorkerMetadata,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl import (
     NixlKVConnectorStats,
 )
 from vllm.v1.kv_cache_interface import KVCacheConfig
@@ -83,24 +88,56 @@ def update_state_after_alloc(self, request, blocks, num_tokens) -> None:
         pass
 
 
-# Register the mock connector
+class MockHMAConnector(KVConnectorBase_V1, SupportsHMA):
+    """Mock connector that supports HMA for testing."""
+
+    def __new__(cls, *args, **kwargs):
+        mock = MagicMock(spec_set=cls)
+        return mock
+
+    def start_load_kv(self, forward_context, **kwargs):
+        pass
+
+    def wait_for_layer_load(self, layer_name):
+        pass
+
+    def save_kv_layer(self, layer_name, kv_layer, attn_metadata, **kwargs):
+        pass
+
+    def wait_for_save(self):
+        pass
+
+    def build_connector_meta(self, scheduler_output):
+        return None
+
+    def get_num_new_matched_tokens(self, request, num_computed_tokens):
+        return (0, False)
+
+    def update_state_after_alloc(self, request, blocks, num_tokens) -> None:
+        pass
+
+    def request_finished_all_groups(self, request, block_ids):
+        return (False, None)
+
+
+# Register mock connectors
 KVConnectorFactory.register_connector("MockConnector", __name__, MockConnector.__name__)
+KVConnectorFactory.register_connector(
+    "MockHMAConnector", __name__, MockHMAConnector.__name__
+)
 
 
 @pytest.fixture
 def mc() -> MultiConnector:
     """MultiConnector using two mocked connectors"""
-    vllm_config = create_vllm_config()
-
     mock_connector_config = {
         "kv_connector": "MockConnector",
         "kv_role": "kv_both",
         "kv_connector_module_path": "tests.v1.kv_connector.unit.test_multi_connector",
     }
 
-    vllm_config.kv_transfer_config = KVTransferConfig(
+    vllm_config = create_vllm_config(
         kv_connector="MultiConnector",
-        kv_role="kv_both",
         kv_connector_extra_config={
             "connectors": [mock_connector_config, mock_connector_config],
         },
@@ -221,10 +258,15 @@ def test_multi_example_connector_consistency():
         )
 
     events = get_connector_events()
-    # First event is set_xfer_handshake_metadata from initialization, then
-    # get_num_new_matched_tokens and update_state_after_alloc from generate().
-    assert events["storage1-SCHEDULER"][:4] == [
+    storage1_scheduler_events = _ignore_event_collection(events["storage1-SCHEDULER"])
+    storage2_scheduler_events = _ignore_event_collection(events["storage2-SCHEDULER"])
+    # First event is bind_gpu_block_pool from initialization, then
+    # set_xfer_handshake_metadata, then on_new_request when the request is enqueued,
+    # then get_num_new_matched_tokens and update_state_after_alloc from generate().
+    assert storage1_scheduler_events[:6] == [
+        "bind_gpu_block_pool",
         "set_xfer_handshake_metadata",
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[0] 0",
         "build_connector_meta",
@@ -241,8 +283,10 @@ def test_multi_example_connector_consistency():
         "wait_for_layer_load",
         "save_kv_layer",
     ]
-    assert events["storage2-SCHEDULER"][:4] == [
+    assert storage2_scheduler_events[:6] == [
+        "bind_gpu_block_pool",
         "set_xfer_handshake_metadata",
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[0] 0",
         "build_connector_meta",
@@ -270,12 +314,16 @@ def test_multi_example_connector_consistency():
     # connector so update_state_after_alloc will be with allocated blocks
     # on that one but with zero blocks for others (first nonzero match is
     # chosen).
-    assert events["storage1-SCHEDULER"][:3] == [
+    storage1_scheduler_events = _ignore_event_collection(events["storage1-SCHEDULER"])
+    storage2_scheduler_events = _ignore_event_collection(events["storage2-SCHEDULER"])
+    assert storage1_scheduler_events[:4] == [
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[7] 96",
         "build_connector_meta",
     ]
-    assert events["storage2-SCHEDULER"][:3] == [
+    assert storage2_scheduler_events[:4] == [
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[0] 0",
         "build_connector_meta",
@@ -296,12 +344,16 @@ def test_multi_example_connector_consistency():
     # return 0 from the first connector, but the second connector should have
     # a hit, so update_state_after_alloc will only be called with allocated
     # blocks for the second connector.
-    assert events["storage1-SCHEDULER"][:3] == [
+    storage1_scheduler_events = _ignore_event_collection(events["storage1-SCHEDULER"])
+    storage2_scheduler_events = _ignore_event_collection(events["storage2-SCHEDULER"])
+    assert storage1_scheduler_events[:4] == [
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[0] 0",
         "build_connector_meta",
     ]
-    assert events["storage2-SCHEDULER"][:3] == [
+    assert storage2_scheduler_events[:4] == [
+        "on_new_request",
         "get_num_new_matched_tokens 0",
         "update_state_after_alloc num_blocks=[7] 96",
         "build_connector_meta",
@@ -312,6 +364,10 @@ def test_multi_example_connector_consistency():
     shutil.rmtree(storage_2_path)
 
 
+def _ignore_event_collection(events: list[str]) -> list[str]:
+    return [event for event in events if event != "take_events"]
+
+
 def get_connector_events() -> dict[str, list[str]]:
     # Read in connector events and reset the files.
     import glob
@@ -356,39 +412,35 @@ def test_multi_connector_handle_preemptions_integration():
 
     try:
         # Configure MultiConnector with two TestExampleConnectors
-        kv_transfer_config = KVTransferConfig(
-            kv_connector="MultiConnector",
-            kv_role="kv_both",
-            kv_connector_extra_config={
-                "connectors": [
-                    {
-                        "kv_connector": "TestExampleConnector",
-                        "kv_role": "kv_both",
-                        "kv_connector_extra_config": {
-                            "shared_storage_path": str(storage_path / "s1"),
-                            "name": "preempt1",
-                        },
-                        "kv_connector_module_path": "tests.v1.kv_connector.unit.utils",
+        connectors_extra_config = {
+            "connectors": [
+                {
+                    "kv_connector": "TestExampleConnector",
+                    "kv_role": "kv_both",
+                    "kv_connector_extra_config": {
+                        "shared_storage_path": str(storage_path / "s1"),
+                        "name": "preempt1",
                     },
-                    {
-                        "kv_connector": "TestExampleConnector",
-                        "kv_role": "kv_both",
-                        "kv_connector_extra_config": {
-                            "shared_storage_path": str(storage_path / "s2"),
-                            "name": "preempt2",
-                        },
-                        "kv_connector_module_path": "tests.v1.kv_connector.unit.utils",
+                    "kv_connector_module_path": "tests.v1.kv_connector.unit.utils",
+                },
+                {
+                    "kv_connector": "TestExampleConnector",
+                    "kv_role": "kv_both",
+                    "kv_connector_extra_config": {
+                        "shared_storage_path": str(storage_path / "s2"),
+                        "name": "preempt2",
                     },
-                ]
-            },
-        )
+                    "kv_connector_module_path": "tests.v1.kv_connector.unit.utils",
+                },
+            ]
+        }
 
         vllm_config = create_vllm_config(
             block_size=16,
             max_num_batched_tokens=100,
-            kv_connector_extra_config=kv_transfer_config.kv_connector_extra_config,
+            kv_connector="MultiConnector",
+            kv_connector_extra_config=connectors_extra_config,
         )
-        vllm_config.kv_transfer_config = kv_transfer_config
 
         # Create scheduler - this initializes the MultiConnector with SCHEDULER role
         scheduler = create_scheduler(vllm_config, num_blocks=10)
@@ -920,3 +972,131 @@ def assert_update_connector_output_called(mc: MultiConnector):
     mc.update_connector_output(kv_connector_output)
     assert_update_connector_output_called(mc)
     assert kv_connector_output.kv_connector_worker_meta == mc_worker_meta_01a_01b
+
+
+def _make_multi_connector(connector_names: list[str]) -> MultiConnector:
+    """Build a MultiConnector wrapping the given registered connectors."""
+    connectors = [
+        {
+            "kv_connector": name,
+            "kv_role": "kv_both",
+            "kv_connector_module_path": "tests.v1.kv_connector.unit.test_multi_connector",  # noqa: E501
+        }
+        for name in connector_names
+    ]
+    vllm_config = create_vllm_config(
+        kv_connector="MultiConnector",
+        kv_connector_extra_config={"connectors": connectors},
+    )
+    kv_cache_config = KVCacheConfig(
+        num_blocks=0,
+        kv_cache_tensors=[],
+        kv_cache_groups=[],
+    )
+    return MultiConnector(
+        vllm_config=vllm_config,
+        role=KVConnectorRole.WORKER,
+        kv_cache_config=kv_cache_config,
+    )
+
+
+def test_multi_connector_hma_opt_in():
+    """
+    MultiConnector currently assumes HMA is opt-in: it needs
+    --no-disable-hybrid-kv-cache-manager to be enabled.
+
+    At runtime, _all_support_hma is True only when every sub-connector
+    implements SupportsHMA. Test all combinations of HMA / non-HMA
+    sub-connectors.
+    """
+
+    assert supports_hma(MultiConnector)
+
+    # -- All non-HMA connectors => _all_support_hma is False --
+    mc_none = _make_multi_connector(["MockConnector", "MockConnector"])
+    assert not supports_hma(mc_none._connectors[0])
+    assert not supports_hma(mc_none._connectors[1])
+    assert mc_none._all_support_hma is False
+
+    # -- All HMA connectors => _all_support_hma is True --
+    mc_all = _make_multi_connector(["MockHMAConnector", "MockHMAConnector"])
+    assert supports_hma(mc_all._connectors[0])
+    assert supports_hma(mc_all._connectors[1])
+    assert mc_all._all_support_hma is True
+
+    # -- Mixed: first HMA, second non-HMA => _all_support_hma is False --
+    mc_mixed1 = _make_multi_connector(["MockHMAConnector", "MockConnector"])
+    assert supports_hma(mc_mixed1._connectors[0])
+    assert not supports_hma(mc_mixed1._connectors[1])
+    assert mc_mixed1._all_support_hma is False
+
+    # -- Mixed: first non-HMA, second HMA => _all_support_hma is False --
+    mc_mixed2 = _make_multi_connector(["MockConnector", "MockHMAConnector"])
+    assert not supports_hma(mc_mixed2._connectors[0])
+    assert supports_hma(mc_mixed2._connectors[1])
+    assert mc_mixed2._all_support_hma is False
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available(), reason="Requires GPU to instantiate LLM"
+)
+def test_multi_connector_mixed_hma_disables_hybrid_kv_cache(monkeypatch):
+    """
+    When MultiConnector wraps a mix of HMA (NixlConnector) and non-HMA
+    (MockConnector) sub-connectors, verify that:
+    1. The scheduler's MultiConnector has _all_support_hma == False.
+    2. vLLM auto-disables the hybrid KV cache manager (no preference expressed by user)
+    """
+    from unittest.mock import patch
+
+    from tests.v1.kv_connector.unit.test_nixl_connector import FakeNixlWrapper
+
+    monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="MultiConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config={
+            "connectors": [
+                {
+                    "kv_connector": "NixlConnector",
+                    "kv_role": "kv_both",
+                },
+                {
+                    "kv_connector": "MockConnector",
+                    "kv_role": "kv_both",
+                    "kv_connector_module_path": (
+                        "tests.v1.kv_connector.unit.test_multi_connector"
+                    ),
+                },
+            ],
+        },
+    )
+
+    with patch(
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+        FakeNixlWrapper,
+    ):
+        llm = LLM(
+            model="Qwen/Qwen3-0.6B",
+            enforce_eager=True,
+            gpu_memory_utilization=0.3,
+            max_model_len=128,
+            max_num_seqs=1,
+            max_num_batched_tokens=128,
+            kv_transfer_config=kv_transfer_config,
+        )
+        try:
+            # HMA should be auto-disabled when user has not expressed a preference.
+            assert (
+                llm.llm_engine.vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
+                is True
+            )
+            # The scheduler-side MultiConnector should detect the mixed
+            # HMA support among its sub-connectors.
+            scheduler = llm.llm_engine.engine_core.engine_core.scheduler
+            mc = scheduler.connector
+            assert isinstance(mc, MultiConnector)
+            assert mc._all_support_hma is False
+        finally:
+            llm.llm_engine.engine_core.shutdown()
diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py
index b4ee97cd1d74..3f92b183dca7 100644
--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -21,16 +21,16 @@
 from vllm.config import KVTransferConfig, set_current_vllm_config
 from vllm.distributed.kv_transfer.kv_connector.utils import (
     KVOutputAggregator,
-    TpKVTopology,
+    TransferTopology,
     get_current_attn_backend,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1 import nixl_connector
+from vllm.distributed.kv_transfer.kv_connector.v1 import nixl
+from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorRole
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
 from vllm.distributed.kv_transfer.kv_connector.v1.multi_connector import (
     MultiKVConnectorStats,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
-    KVConnectorRole,
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl import (
     NixlAgentMetadata,
     NixlConnector,
     NixlConnectorMetadata,
@@ -38,6 +38,8 @@
     NixlConnectorWorker,
     NixlHandshakePayload,
     NixlKVConnectorStats,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
     compute_nixl_compatibility_hash,
 )
 from vllm.distributed.kv_transfer.kv_transfer_state import (
@@ -319,8 +321,36 @@ def test_prompt_less_than_block_size():
     assert len(scheduler_output.scheduled_new_reqs) == 0
 
 
+def test_abort_immediately_remote_prefill_enqueues_empty_recv():
+    """A remote-prefill request added with abort_immediately=True should
+    be added to the scheduler's waiting queue then immediately aborted, so the
+    NIXL connector's request_finished hook enqueues an empty recv to notify
+    the prefill instance to free its blocks."""
+    from vllm.v1.request import RequestStatus
+
+    scheduler = create_scheduler(create_vllm_config())
+
+    request = create_request(request_id=42, num_tokens=10, do_remote_prefill=True)
+    assert request.kv_transfer_params is not None
+    assert request.kv_transfer_params["do_remote_prefill"] is True
+
+    # Mimic the EngineCore.add_request path for an abort-immediately req.
+    scheduler.add_request(request)
+    scheduler.finish_requests([request.request_id], RequestStatus.FINISHED_ABORTED)
+
+    scheduler_output = scheduler.schedule()
+    meta = scheduler_output.kv_connector_metadata
+    assert isinstance(meta, NixlConnectorMetadata)
+    assert set(meta.reqs_to_recv) == {request.request_id}
+    req_meta = meta.reqs_to_recv[request.request_id]
+    assert req_meta.local_block_ids == []
+    assert req_meta.remote.request_id == f"prefill-{42}"
+    # do_remote_prefill is consumed by request_finished to prevent re-issuing.
+    assert request.kv_transfer_params["do_remote_prefill"] is False
+
+
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_kv_transfer_handshake(dist_init):
@@ -461,19 +491,20 @@ def __init__(
         test_shape = self.attn_backends[0].get_kv_cache_shape(
             num_blocks=1, block_size=16, num_kv_heads=1, head_size=1
         )
-        self.kv_topo = TpKVTopology(
+        self.transfer_topo = TransferTopology(
             tp_rank=self.tp_rank,
+            tp_size=self.world_size,
+            block_size=self.block_size,
             engine_id=self.engine_id,
-            remote_tp_size=self._tp_size,  # shared state
-            remote_block_size=self._block_size,  # shared state
             is_mla=self.use_mla,
+            is_mamba=False,
             total_num_kv_heads=self.model_config.get_total_num_kv_heads(),
             attn_backends=self.attn_backends,
             tensor_shape=test_shape,
         )
 
         self.compat_hash = compute_nixl_compatibility_hash(
-            self.vllm_config, self.backend_name, self.kv_topo.cross_layers_blocks
+            self.vllm_config, self.backend_name, self.transfer_topo.cross_layers_blocks
         )
 
     def _nixl_handshake(
@@ -494,7 +525,7 @@ def _nixl_handshake(
         # Adjust remote block length metadata to satisfy heterogeneous TP
         # invariants enforced during handshake validation.
         remote_block_lens = list(self.block_len_per_layer)
-        tp_ratio = self.kv_topo.tp_ratio(remote_tp_size)
+        tp_ratio = self.transfer_topo.tp_ratio(remote_tp_size)
         if remote_tp_size > self.world_size:
             # P TP > D TP case, block_len of remote is smaller
             remote_block_lens = [
@@ -523,6 +554,8 @@ def _nixl_handshake(
                     kv_cache_layout="HND",
                     block_size=self.block_size,
                     ssm_sizes=(0, 0),
+                    attn_backend_name=self.backend_name,
+                    physical_blocks_per_logical_kv_block=1,
                 ),
                 remote_tp_rank=remote_tp_rank,
                 remote_tp_size=remote_tp_size,
@@ -533,7 +566,7 @@ def _nixl_handshake(
 
 class TestNixlHandshake:
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     def test_multi_xfer_one_engine(
@@ -620,7 +653,7 @@ def test_multi_xfer_one_engine(
             connector.clear_connector_metadata()
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     @pytest.mark.parametrize(
@@ -690,7 +723,7 @@ def test_async_load_kv(
         raise TimeoutError("Took too long to complete async handshake.")
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     @pytest.mark.parametrize("local_tp_size", [1, 2])
@@ -702,7 +735,7 @@ def test_prefill_tp_size_greater_than_decode_tp_size(
         remote configurations.
         """
         monkeypatch.setattr(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.get_tensor_model_parallel_world_size",
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.get_tensor_model_parallel_world_size",
             lambda: local_tp_size,
         )
 
@@ -722,14 +755,16 @@ def test_prefill_tp_size_greater_than_decode_tp_size(
         worker.num_blocks = 1
         worker.dst_num_blocks[worker.engine_id] = worker.num_blocks
         worker.src_blocks_data = [(0, worker.block_len_per_layer[0], worker.tp_rank)]
+        worker.num_descs = len(worker.src_blocks_data)
 
         def check_handshake(remote_tp_size: int):
             tp_ratio = remote_tp_size // local_tp_size
             assert set(remote_agents.keys()) == set(range(tp_ratio))
 
             remote_engine_id = worker.REMOTE_ENGINE_ID
-            assert worker._tp_size[remote_engine_id] == remote_tp_size
-            assert -tp_ratio == worker.kv_topo.tp_ratio_from_engine_id(remote_engine_id)
+            remote_info = worker.transfer_topo.get_engine_info(remote_engine_id)
+            assert remote_info.remote_tp_size == remote_tp_size
+            assert -tp_ratio == worker.transfer_topo.tp_ratio(remote_tp_size)
             # ensure src_xfer_handles_by_tp_ratio is populated with tpratio chunks
             assert -tp_ratio in worker.src_xfer_handles_by_tp_ratio
             assert len(worker.src_xfer_handles_by_tp_ratio[-tp_ratio]) == tp_ratio
@@ -759,7 +794,7 @@ def check_handshake(remote_tp_size: int):
         check_handshake(6)
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     def test_prefill_tp_size_greater_than_decode_tp_size_mla(
@@ -793,7 +828,7 @@ def test_prefill_tp_size_greater_than_decode_tp_size_mla(
             (conn_p0.connector_worker, conn_p1.connector_worker)
         ):
             worker.world_size = p_tp_size
-            worker.kv_topo.remote_tp_size = {worker.engine_id: p_tp_size}
+            worker.transfer_topo.tp_size = p_tp_size
             worker.tp_rank = rank
             worker.use_mla = True
 
@@ -862,7 +897,7 @@ def test_prefill_tp_size_greater_than_decode_tp_size_mla(
         assert req_id not in conn_p1.connector_worker._reqs_to_process
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     def test_concurrent_load_kv(
@@ -927,7 +962,7 @@ def test_concurrent_load_kv(
         raise TimeoutError("Took too long to complete async handshake.")
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     def test_handshake_fails_on_kv_cache_layout_mismatch(
@@ -942,7 +977,7 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(
         # Mock TP world size to 2 to force heterogeneous TP when
         # remote_tp_size=1
         with patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.get_tensor_model_parallel_world_size",  # noqa: E501
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.get_tensor_model_parallel_world_size",  # noqa: E501
             return_value=2,
         ):
             # Initialize connector and worker (with fake NIXL wrapper)
@@ -972,6 +1007,8 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(
                 kv_cache_layout=mismatched_layout,
                 block_size=worker.block_size,
                 ssm_sizes=(0, 0),
+                attn_backend_name=worker.backend_name,
+                physical_blocks_per_logical_kv_block=1,
             )
 
             with pytest.raises(RuntimeError):
@@ -980,7 +1017,7 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(
                 worker.add_remote_agent(meta, remote_tp_size=1)
 
     @patch(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
         FakeNixlWrapper,
     )
     def test_handshake_succeed_on_kv_cache_layout_mismatch_with_experimental(
@@ -995,7 +1032,7 @@ def test_handshake_succeed_on_kv_cache_layout_mismatch_with_experimental(
         # Mock TP world size to 2 to force heterogeneous TP when
         # remote_tp_size=1
         with patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.get_tensor_model_parallel_world_size",  # noqa: E501
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.get_tensor_model_parallel_world_size",  # noqa: E501
             return_value=2,
         ):
             # Initialize connector and worker (with fake NIXL wrapper)
@@ -1028,6 +1065,8 @@ def test_handshake_succeed_on_kv_cache_layout_mismatch_with_experimental(
                 kv_cache_layout="HND",
                 block_size=worker.block_size,
                 ssm_sizes=(0, 0),
+                attn_backend_name=worker.backend_name,
+                physical_blocks_per_logical_kv_block=1,
             )
 
             # We don't check layout for homogeneous TP and MLA for now, as the
@@ -1039,7 +1078,7 @@ def test_handshake_succeed_on_kv_cache_layout_mismatch_with_experimental(
 # we put here is important. First run ray, it will clean up the resources, then
 # the rest of the tests.
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_kv_connector_stats(default_vllm_config, dist_init):
@@ -1224,8 +1263,8 @@ def make_multi_stats(nixl_count: int, foo_count: int) -> MultiKVConnectorStats:
     worker_patterns = [(2, 1), (3, 0), (0, 5)]  # (Nixl, Foo)
 
     worker_outputs: list[ModelRunnerOutput] = []
-    for i, (nixl, foo) in enumerate(worker_patterns):
-        stats = make_multi_stats(nixl, foo)
+    for i, (nixl_count, foo) in enumerate(worker_patterns):
+        stats = make_multi_stats(nixl_count, foo)
         output = ModelRunnerOutput(
             req_ids=[f"req_{i}"],
             req_id_to_index={f"req_{i}": 0},
@@ -1253,7 +1292,7 @@ def make_multi_stats(nixl_count: int, foo_count: int) -> MultiKVConnectorStats:
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_scheduler_kv_connector_stats_aggregation():
@@ -1321,7 +1360,7 @@ def test_scheduler_kv_connector_stats_aggregation():
 
 @pytest.mark.parametrize("distributed_executor_backend", ["ray", None])
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_abort_timeout_on_prefiller(monkeypatch, distributed_executor_backend):
@@ -1335,9 +1374,11 @@ def test_abort_timeout_on_prefiller(monkeypatch, distributed_executor_backend):
             |  {eventually free blocks}
     """
     model_name = "Qwen/Qwen3-0.6B"
+    timeout = 6
     kv_transfer_config = KVTransferConfig(
         kv_connector="NixlConnector",
         kv_role="kv_both",
+        kv_connector_extra_config={"kv_lease_duration": timeout},
     )
     llm_kwargs = {
         "model": model_name,
@@ -1347,9 +1388,7 @@ def test_abort_timeout_on_prefiller(monkeypatch, distributed_executor_backend):
         "distributed_executor_backend": distributed_executor_backend,
     }
 
-    timeout = 6
     monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
-    monkeypatch.setenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", str(timeout))
 
     def run_test_and_cleanup():
         llm = LLM(**llm_kwargs)
@@ -1364,8 +1403,6 @@ def run_test_and_cleanup():
             runtime_env = {
                 "working_dir": working_dir,  # ship fake nixl package
                 "env_vars": {
-                    "VLLM_NIXL_ABORT_REQUEST_TIMEOUT": str(timeout),
-                    # TODO: for ray to carry over, remove once we set
                     "NIXL_TELEMETRY_ENABLE": "1",
                 },
             }
@@ -1510,13 +1547,14 @@ def test_register_kv_caches(
 
         backend_cls = TritonAttentionBackend
 
-    nixl_module = "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector"
+    nixl_worker = "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker"
+    nixl_connector = "vllm.distributed.kv_transfer.kv_connector.v1.nixl.connector"
     with (
-        patch(f"{nixl_module}.NixlWrapper") as mock_nixl_wrapper,
-        patch(f"{nixl_module}.threading.Event"),
-        patch(f"{nixl_module}.threading.Thread") as mock_thread,
-        patch(f"{nixl_module}.get_current_attn_backend") as mock_get_attn_backend,
-        patch(f"{nixl_module}.get_current_attn_backends") as mock_get_attn_backends,
+        patch(f"{nixl_worker}.NixlWrapper") as mock_nixl_wrapper,
+        patch(f"{nixl_worker}.threading.Event"),
+        patch(f"{nixl_worker}.threading.Thread") as mock_thread,
+        patch(f"{nixl_connector}.get_current_attn_backend") as mock_get_attn_backend,
+        patch(f"{nixl_worker}.get_current_attn_backends") as mock_get_attn_backends,
     ):
         # Ensure get_attn_backend returns the correct value due to
         # _cached_get_attn_backend returning the backend from previous
@@ -1586,10 +1624,18 @@ def test_register_kv_caches(
         expected_base_addrs: list[int]
         expected_num_entries: int
         kv_caches: dict[str, torch.Tensor]
-        assert str(enable_cross_layers).lower() != "true" or (
-            (attn_backend not in ("FLASH_ATTN", "FLASHINFER"))
-            or connector.prefer_cross_layer_blocks
+        if str(enable_cross_layers).lower() == "true":
+            assert connector.prefer_cross_layer_blocks == (
+                attn_backend in ("FLASH_ATTN", "FLASHINFER", "TRITON_ATTN")
+            )
+        else:
+            assert not connector.prefer_cross_layer_blocks
+
+        test_shape = backend_cls.get_kv_cache_shape(
+            num_blocks=1, block_size=16, num_kv_heads=1, head_size=1
         )
+        is_blocks_first = len(test_shape) == 5 and test_shape[0] == 1
+
         if connector.prefer_cross_layer_blocks:
             with set_current_vllm_config(vllm_config):
                 _, cross_layers_kv_cache, _ = (
@@ -1605,7 +1651,7 @@ def test_register_kv_caches(
                                 )
                             ]
                         ],
-                        cache_dtype=torch.bfloat16,
+                        cache_dtype="bfloat16",
                         device=torch.accelerator.current_device_index(),
                         kernel_block_sizes=[block_size],
                     )
@@ -1619,7 +1665,7 @@ def test_register_kv_caches(
             ]
             expected_num_entries = 1
 
-            expected_blocks_count = 8
+            expected_blocks_count = num_blocks * (2 if is_blocks_first else 1)
 
             kv_caches = {"all-layers": cross_layers_kv_cache}
         else:
@@ -1639,12 +1685,6 @@ def test_register_kv_caches(
             }
 
             # Store tensor info for validation
-
-            test_shape = backend_cls.get_kv_cache_shape(
-                num_blocks=1, block_size=16, num_kv_heads=1, head_size=1
-            )
-            is_blocks_first = len(test_shape) == 5 and test_shape[0] == 1
-
             if is_blocks_first:
                 expected_tensor_size = (
                     shared_tensor.element_size() * shared_tensor.numel()
@@ -1696,13 +1736,13 @@ def test_register_kv_caches(
 
         if connector.prefer_cross_layer_blocks:
             num_blocks = 8
-            expected_block_len = expected_tensor_size // num_blocks
         else:
             num_blocks = kv_cache_config.num_blocks
-            if is_blocks_first:
-                expected_block_len = expected_tensor_size // num_blocks // 2
-            else:
-                expected_block_len = expected_tensor_size // num_blocks
+
+        if is_blocks_first:
+            expected_block_len = expected_tensor_size // num_blocks // 2
+        else:
+            expected_block_len = expected_tensor_size // num_blocks
 
         for i, block_entry in enumerate(blocks_data):
             block_start_addr, block_len, tp_rank = block_entry
@@ -1749,28 +1789,26 @@ def test_kv_buffer_to_nixl_memory_types(
     vllm_config = create_vllm_config()
     # Override the default memory types in the config
     vllm_config.kv_transfer_config.kv_buffer_device = kv_buffer_device
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.utils import (
         _NIXL_SUPPORTED_DEVICE,
     )
 
     _NIXL_SUPPORTED_DEVICE.update(FakePlatform.get_nixl_supported_devices())
 
     with (
+        patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper"),
         patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper"
-        ),
-        patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.threading.Event"
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.threading.Event"
         ),
         patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.threading.Thread"
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.threading.Thread"
         ),
         patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.current_platform",
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.current_platform",
             FakePlatform,
         ),
         patch(
-            "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector._NIXL_SUPPORTED_DEVICE",
+            "vllm.distributed.kv_transfer.kv_connector.v1.nixl.utils._NIXL_SUPPORTED_DEVICE",
             _NIXL_SUPPORTED_DEVICE,
         ),
     ):  # noqa: E501
@@ -1785,7 +1823,7 @@ def test_kv_buffer_to_nixl_memory_types(
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_shutdown_cleans_up_resources(default_vllm_config, dist_init):
@@ -1850,7 +1888,7 @@ def test_shutdown_cleans_up_resources(default_vllm_config, dist_init):
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_aborted_request_removed_from_worker_in_batch(default_vllm_config, dist_init):
@@ -1970,7 +2008,7 @@ def check_xfer_state(self, handle: int) -> str:
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FailingNixlWrapper,
 )
 @pytest.mark.parametrize(
@@ -2060,10 +2098,10 @@ def test_transfer_failure_logging(
         slot_mapping={},
     )
 
-    # Capture logs from the nixl_connector logger specifically
+    # Capture logs from the nixl.worker logger specifically
     # vLLM loggers have propagate=False, so we need to capture directly
     nixl_logger = logging.getLogger(
-        "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector"
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker"
     )
     captured_logs: list[logging.LogRecord] = []
 
@@ -2125,7 +2163,7 @@ def emit(self, record):
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FailingNixlWrapper,
 )
 def test_handshake_failure_returns_finished(default_vllm_config, dist_init):
@@ -2176,7 +2214,7 @@ def test_handshake_failure_returns_finished(default_vllm_config, dist_init):
 
 
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FailingNixlWrapper,
 )
 def test_transfer_setup_failure_returns_finished(default_vllm_config, dist_init):
@@ -2229,6 +2267,127 @@ def test_transfer_setup_failure_returns_finished(default_vllm_config, dist_init)
     assert request_id in done_recving
 
 
+@patch(
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+    FailingNixlWrapper,
+)
+@pytest.mark.parametrize(
+    "failure_mode",
+    [
+        "handshake",
+        "transfer_setup",
+        "transfer_failed",
+        "transfer_exception",
+    ],
+)
+def test_failed_request_skips_kv_postprocessing(
+    default_vllm_config, dist_init, failure_mode
+):
+    """Test that failed requests skip KV sync and post-processing in
+    get_finished().
+
+    This is the core safety behavior: when a KV transfer fails at any stage,
+    the request must still appear in done_recving (so the scheduler can apply
+    kv_load_failure_policy), but sync_recved_kv_to_device and post-processing
+    must NOT be called since no valid KV data was received.
+
+    Covers all failure paths that involve an actual (attempted) KV transfer:
+    - handshake: add_remote_agent raises during async handshake
+    - transfer_setup: make_prepped_xfer raises before handle is in transfers
+    - transfer_failed: check_xfer_state returns bad state ("ERR") in
+      _pop_done_transfers — this is the path that previously had the bug
+      where post-processing was NOT skipped
+    - transfer_exception: check_xfer_state raises in _pop_done_transfers
+
+    Note: notification_failed (send_notif raises on the full-cache-hit path)
+    is intentionally excluded. That path is a best-effort D→P courtesy
+    notification; the blocks are already in D's cache, so no KV transfer
+    was attempted and done_recving is correctly empty.
+    """
+    # Map each failure mode to the FailingNixlWrapper attribute to set.
+    _WRAPPER_CONFIG: dict[str, str] = {
+        "handshake": "fail_handshake",
+        "transfer_setup": "fail_transfer_setup",
+        "transfer_failed": "fail_transfer_state",
+        "transfer_exception": "fail_transfer_exception",
+    }
+
+    # Use enable_permute_local_kv=True so that
+    # post_process_device_kv_on_receive would be called on the success path,
+    # making the assertion meaningful (not trivially true).
+    vllm_config = create_vllm_config(enable_permute_local_kv=True)
+
+    connector = NixlConnector(
+        vllm_config, KVConnectorRole.WORKER, make_kv_cache_config(block_size=16)
+    )
+    connector.connector_worker = FakeNixlConnectorWorker(
+        vllm_config,
+        connector.engine_id,
+        hand_shake_latency=0.1 if failure_mode == "handshake" else 0,
+    )
+    worker = connector.connector_worker
+    setattr(worker.nixl_wrapper, _WRAPPER_CONFIG[failure_mode], True)
+
+    request_id = f"test_{failure_mode}_skip_postprocess"
+    metadata = NixlConnectorMetadata()
+    metadata.add_new_req_to_recv(
+        request_id=request_id,
+        local_block_ids=([1, 2, 3],),
+        kv_transfer_params={
+            "remote_block_ids": ([4, 5, 6],),
+            "remote_engine_id": FakeNixlConnectorWorker.REMOTE_ENGINE_ID,
+            "remote_request_id": f"prefill-{request_id}",
+            "remote_host": "localhost",
+            "remote_port": 1234,
+            "remote_tp_size": 1,
+        },
+    )
+    connector.bind_connector_metadata(metadata)
+
+    dummy_ctx = ForwardContext(
+        no_compile_layers={},
+        attn_metadata={},
+        slot_mapping={},
+    )
+    connector.start_load_kv(dummy_ctx)
+
+    if failure_mode == "handshake":
+        # Wait for async handshake to fail.
+        time.sleep(0.3)
+    else:
+        # All other modes: let the handshake complete, then process the
+        # ready_requests queue. For transfer_failed / transfer_exception the
+        # handle ends up in _recving_transfers; the failure surfaces in
+        # get_finished() via _pop_done_transfers below.
+        connector.bind_connector_metadata(NixlConnectorMetadata())
+        time.sleep(0.1)
+        connector.start_load_kv(dummy_ctx)
+
+    # Spy on sync_recved_kv_to_device and post_process_device_kv_on_receive
+    # to verify they are NOT called for the failed request.
+    with (
+        patch.object(worker, "sync_recved_kv_to_device") as mock_sync,
+        patch.object(worker, "post_process_device_kv_on_receive") as mock_postprocess,
+    ):
+        _, done_recving = connector.get_finished(finished_req_ids=set())
+
+    # The failed request must appear in done_recving so the scheduler
+    # can handle it (e.g., trigger recompute via kv_load_failure_policy).
+    assert request_id in done_recving
+
+    # Critical: KV sync and post-processing must NOT have been called
+    # since no valid KV data was received for the failed request.
+    mock_sync.assert_not_called()
+    mock_postprocess.assert_not_called()
+
+    # Metadata for the request should have been cleaned up.
+    assert request_id not in worker._recving_metadata
+
+    # Blocks should have been marked as invalid.
+    invalid_blocks = connector.get_block_ids_with_load_errors()
+    assert invalid_blocks == {1, 2, 3}
+
+
 @pytest.mark.parametrize(
     "mismatch_type,config_overrides,version_override,should_fail,enforce_handshake_compat",
     [
@@ -2252,7 +2411,7 @@ def test_transfer_setup_failure_returns_finished(default_vllm_config, dist_init)
     ],
 )
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_compatibility_hash_validation(
@@ -2323,7 +2482,7 @@ def test_compatibility_hash_validation(
         elif "connector_version" in version_override:
             stack.enter_context(
                 patch.object(
-                    nixl_connector,
+                    nixl.metadata,
                     "NIXL_CONNECTOR_VERSION",
                     version_override["connector_version"],
                 )
@@ -2331,7 +2490,7 @@ def test_compatibility_hash_validation(
         remote_hash = compute_nixl_compatibility_hash(
             remote_vllm_config,
             decode_worker.backend_name,
-            decode_worker.kv_topo.cross_layers_blocks,
+            decode_worker.transfer_topo.cross_layers_blocks,
         )
 
     prefill_block_size = config_overrides.get("block_size", 16)
@@ -2345,6 +2504,8 @@ def test_compatibility_hash_validation(
         kv_cache_layout="HND",
         block_size=prefill_block_size,
         ssm_sizes=(0, 0),
+        attn_backend_name=decode_worker.backend_name,
+        physical_blocks_per_logical_kv_block=1,
     )
     handshake_payload = NixlHandshakePayload(
         compatibility_hash=remote_hash,
@@ -2359,7 +2520,7 @@ def test_compatibility_hash_validation(
     # Patch zmq_ctx to return our mock socket
     with (
         patch.object(decode_worker, "add_remote_agent", return_value="fake_agent"),
-        patch.object(nixl_connector, "zmq_ctx") as mock_zmq_ctx,
+        patch.object(nixl.worker, "zmq_ctx") as mock_zmq_ctx,
     ):
         mock_zmq_ctx.return_value.__enter__.return_value = mock_socket
 
@@ -2393,7 +2554,7 @@ def test_compatibility_hash_validation(
     ],
 )
 @patch(
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
     FakeNixlWrapper,
 )
 def test_handshake_decode_errors(default_vllm_config, dist_init, error_scenario):
@@ -2417,12 +2578,13 @@ def test_handshake_decode_errors(default_vllm_config, dist_init, error_scenario)
     test_shape = backend.get_kv_cache_shape(
         num_blocks=1, block_size=16, num_kv_heads=1, head_size=1
     )
-    decode_worker.kv_topo = TpKVTopology(
+    decode_worker.transfer_topo = TransferTopology(
         tp_rank=decode_worker.tp_rank,
+        tp_size=decode_worker.world_size,
+        block_size=decode_worker.block_size,
         engine_id=decode_worker.engine_id,
-        remote_tp_size=decode_worker._tp_size,  # shared state
-        remote_block_size=decode_worker._block_size,  # shared state
         is_mla=decode_worker.use_mla,
+        is_mamba=False,
         total_num_kv_heads=decode_worker.model_config.get_total_num_kv_heads(),
         attn_backends=[backend],
         tensor_shape=test_shape,
@@ -2431,7 +2593,7 @@ def test_handshake_decode_errors(default_vllm_config, dist_init, error_scenario)
     decode_worker.compat_hash = compute_nixl_compatibility_hash(
         decode_worker.vllm_config,
         decode_worker.backend_name,
-        decode_worker.kv_topo.cross_layers_blocks,
+        decode_worker.transfer_topo.cross_layers_blocks,
     )
 
     if error_scenario == "handshake_decode_error":
@@ -2458,7 +2620,7 @@ def test_handshake_decode_errors(default_vllm_config, dist_init, error_scenario)
     mock_socket.recv.return_value = msg_bytes
     with (
         patch.object(decode_worker, "add_remote_agent", return_value="fake_agent"),
-        patch.object(nixl_connector, "zmq_ctx") as mock_zmq_ctx,
+        patch.object(nixl.worker, "zmq_ctx") as mock_zmq_ctx,
     ):
         mock_zmq_ctx.return_value.__enter__.return_value = mock_socket
 
@@ -2469,3 +2631,122 @@ def test_handshake_decode_errors(default_vllm_config, dist_init, error_scenario)
                 remote_tp_size=1,
                 expected_engine_id=FakeNixlConnectorWorker.REMOTE_ENGINE_ID,
             )
+
+    @patch(
+        "vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker.NixlWrapper",
+        FakeNixlWrapper,
+    )
+    def test_mla_broadcast_notif_uses_remote_request_id(
+        self, default_vllm_config, dist_init
+    ):
+        """MLA + remote TP > local TP: the broadcast notification sent to
+        non-read prefill ranks must be keyed by the prefill-side request
+        id (``meta.remote.request_id``), not the local decode request id.
+
+        Prefill ranks key ``_reqs_to_send`` by their own request id, so a
+        broadcast keyed by the decode id is rejected in
+        ``_get_new_notifs`` with "Potentially invalid KV blocks for
+        unrecognized request" and the blocks only release via the abort
+        timeout. See ``_read_blocks_for_req`` in
+        ``vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py``.
+        """
+        decode_tp_size = 1
+        prefill_tp_size = 4
+
+        vllm_config = create_vllm_config()
+        vllm_config.parallel_config.tensor_parallel_size = decode_tp_size
+
+        connector = NixlConnector(
+            vllm_config, KVConnectorRole.WORKER, make_kv_cache_config(block_size=16)
+        )
+        connector.connector_worker = FakeNixlConnectorWorker(
+            vllm_config, connector.engine_id, hand_shake_latency=0
+        )
+        worker = connector.connector_worker
+
+        # Force the MLA path; only `self.use_mla` gates the branches we
+        # exercise inside `_read_blocks_for_req`.
+        worker.use_mla = True
+
+        # Manually register the remote (P) engine and pre-populate the
+        # per-rank state the handshake would normally fill in. The real
+        # `_nixl_handshake` is unnecessary here — we only need
+        # `transfer_topo` to know `remote_tp_size`, and `_remote_agents`
+        # / `dst_xfer_side_handles` to be keyed by remote rank.
+        remote_engine_id = "remote_engine"
+        worker.transfer_topo.register_remote_engine(
+            remote_engine_id=remote_engine_id,
+            remote_tp_size=prefill_tp_size,
+            remote_block_size=worker.block_size,
+            remote_block_len=worker.block_size * 4096,
+            remote_physical_blocks_per_logical=1,
+            local_block_len=worker.block_size * 4096,
+        )
+        worker._remote_agents[remote_engine_id] = {
+            rank: f"agent_p{rank}" for rank in range(prefill_tp_size)
+        }
+        worker.dst_xfer_side_handles = {
+            remote_engine_id: {rank: 100 + rank for rank in range(prefill_tp_size)}
+        }
+        # Sanity: D TP=1, P TP=4 => tp_ratio = -4 (P > D).
+        assert worker.transfer_topo.tp_ratio(prefill_tp_size) == -prefill_tp_size
+
+        # Distinct ids on each side — that's the whole point of the bug.
+        decode_req_id = "decode-req-AAAA"
+        prefill_req_id = "prefill-req-BBBB"
+        assert decode_req_id != prefill_req_id
+
+        metadata = NixlConnectorMetadata()
+        metadata.add_new_req_to_recv(
+            request_id=decode_req_id,
+            local_block_ids=([0, 1, 2],),
+            kv_transfer_params={
+                "remote_block_ids": ([10, 11, 12],),
+                "remote_engine_id": remote_engine_id,
+                "remote_request_id": prefill_req_id,
+                "remote_host": "localhost",
+                "remote_port": 1234,
+                "remote_tp_size": prefill_tp_size,
+            },
+        )
+        meta = metadata.reqs_to_recv[decode_req_id]
+
+        # Capture broadcast send_notif calls; stub `_read_blocks` so we
+        # don't need a working xfer path. Real `_read_blocks` emits its
+        # auto-notif via `make_prepped_xfer`, not via `send_notif`, so
+        # any captured `send_notif` here is a broadcast.
+        send_notif_calls: list[tuple[str, bytes]] = []
+        worker.nixl_wrapper.send_notif = (  # type: ignore[method-assign]
+            lambda agent_name, notif_msg: send_notif_calls.append(
+                (agent_name, notif_msg)
+            )
+        )
+        worker._read_blocks = MagicMock()  # type: ignore[method-assign]
+
+        worker._read_blocks_for_req(decode_req_id, meta)
+
+        # MLA: read once from rank 0 and broadcast to the other ranks.
+        worker._read_blocks.assert_called_once()
+        assert worker._read_blocks.call_args.kwargs["remote_rank"] == 0
+        assert (
+            worker._read_blocks.call_args.kwargs["remote_request_id"] == prefill_req_id
+        )
+
+        # Broadcast goes to ranks {1, 2, 3} only, never to the read target.
+        expected_recipients = {
+            worker._remote_agents[remote_engine_id][r]
+            for r in range(1, prefill_tp_size)
+        }
+        assert {agent for agent, _ in send_notif_calls} == expected_recipients
+
+        # Every broadcast notif must be keyed by the prefill request id.
+        # Pre-fix this used the *decode* request id, which prefill ranks
+        # didn't recognize.
+        expected_notif = f"{prefill_req_id}:{decode_tp_size}".encode()
+        bad_notif = f"{decode_req_id}:{decode_tp_size}".encode()
+        for agent, notif in send_notif_calls:
+            assert notif == expected_notif, (
+                f"Broadcast notif to {agent!r} must use prefill_req_id; "
+                f"got {notif!r} (expected {expected_notif!r}, "
+                f"buggy form would be {bad_notif!r})"
+            )
diff --git a/tests/v1/kv_connector/unit/test_nixl_connector_hma.py b/tests/v1/kv_connector/unit/test_nixl_connector_hma.py
index 898f8e4b35ba..9c163fdf327f 100644
--- a/tests/v1/kv_connector/unit/test_nixl_connector_hma.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector_hma.py
@@ -2,10 +2,13 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Unit tests for NixlConnectorScheduler with HMA and Mamba N-1 prefill."""
 
+import gc
 from unittest.mock import patch
 
 import pytest
+import torch
 
+from tests.v1.attention.utils import MockMambaBuilder
 from vllm import LLM, SamplingParams
 from vllm.config import KVTransferConfig
 from vllm.v1.core.single_type_kv_cache_manager import (
@@ -31,10 +34,10 @@
         (False, [0]),
     ],
 )
-@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.current_platform")
+@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler.current_platform")
 def test_sw_sizes(mock_platform, swa_enabled, expected_sw_sizes):
     """Test sw_sizes is correctly computed based on SWA enabled/disabled."""
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler import (
         NixlConnectorScheduler,
     )
 
@@ -65,7 +68,7 @@ def test_logical_to_kernel_block_ids_with_hma():
     When HMA is enabled, the logical block size may differ from the kernel
     block size. Each logical block maps to multiple kernel blocks.
     """
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
         NixlConnectorWorker,
     )
 
@@ -89,6 +92,283 @@ def test_logical_to_kernel_block_ids_with_hma():
     )
 
 
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "group_spec_types,remote_physical_per_logical,"
+    "local_physical_per_logical,tp_ratio,remote_block_ids,"
+    "expected_remote_block_ids",
+    [
+        pytest.param(
+            ("FullAttentionSpec", "SlidingWindowSpec"),
+            2,
+            2,
+            1,
+            ([0, 1, 2], [3, 4]),
+            [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9]],
+            id="dense_fa_swa",
+        ),
+        # Nemotron-3-Nano-30B-A3B 4p1d (P_TP=4, D_TP=1):
+        # remote_physical_per_logical=34, local_physical_per_logical=66.
+        # FA logical block 5 → kernel [170..203], block 6 → [204..237].
+        # Mamba block unchanged.
+        pytest.param(
+            ("FullAttentionSpec", "MambaSpec"),
+            34,
+            66,
+            -4,
+            ([5, 6], [2]),
+            [list(range(170, 238)), [2]],
+            id="mamba_fa_ssm",
+        ),
+    ],
+)
+def test_read_blocks_for_req_expands_remote_ids(
+    group_spec_types,
+    remote_physical_per_logical,
+    local_physical_per_logical,
+    tp_ratio,
+    remote_block_ids,
+    expected_remote_block_ids,
+):
+    """_read_blocks_for_req must expand remote logical block IDs to kernel
+    block IDs when kernel block size != logical block size.
+
+    The hot path always calls _logical_to_remote_kernel_block_ids with
+    remote_info.remote_physical_blocks_per_logical (model-agnostic).
+    """
+    from unittest.mock import MagicMock
+
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
+        NixlConnectorMetadata,
+    )
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.tp_mapping import (
+        TPMapping,
+    )
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+        NixlConnectorWorker,
+    )
+    from vllm.v1.kv_cache_interface import (
+        FullAttentionSpec,
+        MambaSpec,
+        SlidingWindowSpec,
+    )
+
+    spec_name_to_type = {
+        "FullAttentionSpec": FullAttentionSpec,
+        "SlidingWindowSpec": SlidingWindowSpec,
+        "MambaSpec": MambaSpec,
+    }
+    resolved_types = tuple(spec_name_to_type[n] for n in group_spec_types)
+
+    worker = object.__new__(NixlConnectorWorker)
+    worker._physical_blocks_per_logical_kv_block = local_physical_per_logical
+
+    has_mamba = any(t is MambaSpec for t in resolved_types)
+    has_swa = any(t is SlidingWindowSpec for t in resolved_types)
+    worker.kv_cache_config = make_kv_cache_config(
+        block_size=16, swa_enabled=has_swa, mamba_enabled=has_mamba
+    )
+
+    remote_engine_id = "remote-engine"
+
+    worker.transfer_topo = MagicMock()
+    # tp_ratio not exercised (all_source_ranks is empty so no reads run),
+    # but set for realism.
+    worker.transfer_topo.tp_ratio.return_value = tp_ratio
+    remote_info = MagicMock()
+    remote_info.remote_physical_blocks_per_logical = remote_physical_per_logical
+    worker.transfer_topo.get_engine_info.return_value = remote_info
+    worker.use_mla = False
+
+    mock_plan = MagicMock(spec=TPMapping)
+    mock_plan.all_source_ranks = ()
+    mock_plan.source_ranks_per_group = ()
+    worker.tp_mappings = {remote_engine_id: mock_plan}
+
+    metadata = NixlConnectorMetadata()
+    metadata.add_new_req_to_recv(
+        request_id="test-req",
+        local_block_ids=([0, 1], [2, 3]),
+        kv_transfer_params={
+            "remote_block_ids": remote_block_ids,
+            "remote_engine_id": remote_engine_id,
+            "remote_request_id": "prefill-test-req",
+            "remote_host": "localhost",
+            "remote_port": 1234,
+            "tp_size": 1,
+        },
+    )
+
+    meta = metadata.reqs_to_recv["test-req"]
+    worker._read_blocks_for_req("test-req", meta)
+
+    assert meta.remote.block_ids == expected_remote_block_ids, (
+        f"Expected {expected_remote_block_ids}, got {meta.remote.block_ids}"
+    )
+
+
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "local_physical_per_logical,remote_physical_per_logical,"
+    "local_block_ids,remote_block_ids,"
+    "expected_local,expected_remote",
+    [
+        # 10 kernel blocks of data, local has more logical blocks.
+        # remote physical_per_logical=10 → 1 logical → 10 kernel blocks
+        # local  physical_per_logical=6  → 2 logical → 12 kernel blocks
+        # Trim local from 12 to 10.
+        pytest.param(
+            6,
+            10,
+            [list(range(12)), [42]],
+            [list(range(10)), [42]],
+            [list(range(10)), [42]],
+            [list(range(10)), [42]],
+            id="align_local6_remote10",
+        ),
+        # 10 kernel blocks of data, remote has more logical blocks.
+        # remote physical_per_logical=6  → 2 logical → 12 kernel blocks
+        # local  physical_per_logical=10 → 1 logical → 10 kernel blocks
+        # Trim remote from 12 to 10.
+        pytest.param(
+            10,
+            6,
+            [list(range(10)), [42]],
+            [list(range(12)), [42]],
+            [list(range(10)), [42]],
+            [list(range(10)), [42]],
+            id="align_local10_remote6",
+        ),
+    ],
+)
+def test_apply_prefix_caching_mamba_hybrid(
+    local_physical_per_logical,
+    remote_physical_per_logical,
+    local_block_ids,
+    remote_block_ids,
+    expected_local,
+    expected_remote,
+):
+    """_apply_prefix_caching front-trims FA groups to
+    min(local, remote) for Mamba hybrid models with heterogeneous TP.
+    """
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+        NixlConnectorWorker,
+    )
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec
+
+    worker = object.__new__(NixlConnectorWorker)
+    worker._has_mamba = True
+    worker._physical_blocks_per_logical_kv_block = local_physical_per_logical
+    worker._group_spec_types = (FullAttentionSpec, MambaSpec)
+    worker.kv_cache_config = make_kv_cache_config(block_size=16, mamba_enabled=True)
+
+    aligned_local, aligned_remote = worker._apply_prefix_caching(
+        local_block_ids, remote_block_ids, remote_physical_per_logical
+    )
+
+    assert aligned_local == expected_local, (
+        f"Expected local {expected_local}, got {aligned_local}"
+    )
+    assert aligned_remote == expected_remote, (
+        f"Expected remote {expected_remote}, got {aligned_remote}"
+    )
+
+
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "local_physical_per_logical,remote_physical_per_logical,"
+    "remote_fa_blocks,local_fa_blocks,ssm_blocks,"
+    "correct_remote_fa,correct_local_fa",
+    [
+        # 10 kernel blocks of data (640 tokens).
+        # remote physical_per_logical=10 → 1 logical → 10 kernel [0..9]
+        # local  physical_per_logical=6  → 2 logical → 12 kernel [0..11]
+        # 1st local logical block cached → suffix [6..11]
+        # Correct: transfer only uncached suffix tokens (384-639)
+        #   = remote [6,7,8,9] → local [6,7,8,9].
+        # Actual (front-trim): remote[:6]=[0..5] → local [6..11]. Wrong.
+        pytest.param(
+            6,
+            10,
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+            [6, 7, 8, 9, 10, 11],
+            [42],
+            [6, 7, 8, 9],
+            [6, 7, 8, 9],
+            id="local6_remote10_fail",
+        ),
+        # 15 kernel blocks of data (960 tokens).
+        # remote physical_per_logical=6  → 3 logical → 18 kernel [0..17]
+        # local  physical_per_logical=10 → 2 logical → 20 kernel [0..19]
+        # 1st local logical block cached → suffix [10..19]
+        # Correct: transfer only uncached suffix tokens (640-959)
+        #   = remote [10,11,12,13,14] → local [10,11,12,13,14].
+        # Actual (front-trim): remote[:10]=[0..9] → local [10..19]. Wrong.
+        pytest.param(
+            10,
+            6,
+            list(range(18)),
+            list(range(10, 20)),
+            [42],
+            [10, 11, 12, 13, 14],
+            [10, 11, 12, 13, 14],
+            id="local10_remote6_fail",
+        ),
+    ],
+)
+def test_mismatched_physical_per_logical_fails_with_prefix_caching(
+    local_physical_per_logical,
+    remote_physical_per_logical,
+    remote_fa_blocks,
+    local_fa_blocks,
+    ssm_blocks,
+    correct_remote_fa,
+    correct_local_fa,
+):
+    """Demonstrate that _apply_prefix_caching front-trims ([:N])
+    in the Mamba hybrid path, which fails when prefix caching produces
+    suffix-only local blocks.
+
+    Prefix caching operates at logical block granularity. When a logical
+    block is cached locally, the decode side only allocates kernel blocks
+    for the uncached suffix. The front-trim pairs remote prefix blocks
+    with local suffix slots — a silent data corruption.
+    """
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+        NixlConnectorWorker,
+    )
+
+    worker = object.__new__(NixlConnectorWorker)
+    worker._physical_blocks_per_logical_kv_block = local_physical_per_logical
+    worker.kv_cache_config = make_kv_cache_config(
+        block_size=16,
+        mamba_enabled=True,
+    )
+    worker._has_mamba = True
+    worker._group_spec_types = tuple(
+        type(g.kv_cache_spec) for g in worker.kv_cache_config.kv_cache_groups
+    )
+
+    local_block_ids = (local_fa_blocks, ssm_blocks)
+    remote_block_ids = (remote_fa_blocks, ssm_blocks)
+
+    aligned_local, aligned_remote = worker._apply_prefix_caching(
+        local_block_ids,
+        remote_block_ids,
+        remote_physical_per_logical,
+    )
+
+    assert (
+        aligned_remote[0] != correct_remote_fa or aligned_local[0] != correct_local_fa
+    ), (
+        f"Prefix caching with mismatched physical_per_logical should not "
+        f"produce correct transfer ids: "
+        f"remote={aligned_remote[0]}, local={aligned_local[0]}, "
+        f"correct_remote={correct_remote_fa}, correct_local={correct_local_fa}"
+    )
+
+
 @pytest.mark.parametrize("model_name, sw_size", [("google/gemma-3-1b-it", 512)])
 def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
     """Test that a prefill instance returns fewer "remote blocks" for the SWA groups
@@ -102,12 +382,13 @@ def test_fewer_blocks_with_hma(monkeypatch, model_name, sw_size):
     llm_kwargs = {
         "model": model_name,
         "enforce_eager": True,
-        "gpu_memory_utilization": 0.5,
+        "gpu_memory_utilization": 0.3,
         "kv_transfer_config": kv_transfer_config,
         "max_model_len": 2048,
+        "max_num_seqs": 1,
         # NOTE: Make sure HMA is enabled
         "disable_hybrid_kv_cache_manager": False,
-        "max_num_batched_tokens": 1024,
+        "max_num_batched_tokens": 2048,
         "enable_prefix_caching": False,
         "block_size": block_size,
     }
@@ -154,6 +435,8 @@ def run_hma_test(llm: LLM):
             assert len(group_block_ids) == expected_num_remote_blocks
 
     def run_test_and_cleanup():
+        gc.collect()
+        torch.accelerator.empty_cache()
         llm = LLM(**llm_kwargs)
         try:
             run_hma_test(llm)
@@ -169,7 +452,7 @@ def test_nixl_metadata_hma_block_ids_structure():
     Test that NixlConnectorMetadata correctly stores block IDs for multiple
     KV cache groups when HMA is enabled.
     """
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
         NixlConnectorMetadata,
     )
 
@@ -207,69 +490,83 @@ def test_nixl_metadata_hma_block_ids_structure():
     assert list(req_meta.remote.block_ids[1]) == [18, 19, 20, 21]
 
 
-@pytest.mark.cpu_test
-def test_get_block_descs_ids_hybrid_ssm():
-    """Test _get_block_descs_ids uses per-group strides for hybrid FA+SSM
-    when ratio=1 (no kernel block size mismatch)."""
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+def _make_mock_worker_for_desc_ids(
+    num_regions: int,
+    has_mamba: bool,
+    group_spec_types: tuple,
+    block_len_per_layer: list[int] | None = None,
+):
+    """Build a mock NixlConnectorWorker with attrs needed by _compute_desc_ids."""
+    from unittest.mock import MagicMock
+
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
         NixlConnectorWorker,
     )
 
-    worker = object.__new__(NixlConnectorWorker)
+    worker = MagicMock(spec=NixlConnectorWorker)
+    worker.num_regions = num_regions
+    worker._has_mamba = has_mamba
+    worker._group_spec_types = group_spec_types
+    worker.block_len_per_layer = block_len_per_layer or [100]
+    worker._compute_desc_ids = NixlConnectorWorker._compute_desc_ids.__get__(
+        worker, NixlConnectorWorker
+    )
+    return worker
 
-    num_blocks = 100
-    engine_id = "test-engine"
-    worker.num_regions = 2
-    worker.dst_num_blocks = {engine_id: num_blocks}
-    worker._has_mamba = True
-    worker._is_mamba_group = [False, True]
-    worker._physical_blocks_per_logical_kv_block = 1
-    # num_descs = num_regions * num_blocks (no blocks_first doubling)
-    worker.num_descs = 2 * num_blocks
+
+@pytest.mark.cpu_test
+def test_get_block_descs_ids_hybrid_ssm():
+    """Test _compute_desc_ids uses per-group strides for hybrid
+    FA+SSM when ratio=1 (no kernel block size mismatch)."""
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec
+
+    worker = _make_mock_worker_for_desc_ids(
+        num_regions=2,
+        has_mamba=True,
+        group_spec_types=(FullAttentionSpec, MambaSpec),
+        block_len_per_layer=[100],
+    )
 
     fa_blocks = [3, 5]
     ssm_blocks = [1, 2]
-    result = worker._get_block_descs_ids(engine_id, (fa_blocks, ssm_blocks))
-
-    # FA group: stride=num_blocks=100, offset=0
-    #   region0: [3, 5],  region1: [103, 105]
-    # SSM group: stride=logical_blocks=100 (=num_blocks/ratio=100/1),
-    #   offset=num_descs=200
-    #   region0: [201, 202],  region1: [301, 302]
-    expected = [3, 5, 103, 105, 201, 202, 301, 302]
+    result = worker._compute_desc_ids(
+        block_ids=(fa_blocks, ssm_blocks),
+        dst_num_blocks=100,
+        block_size_ratio=None,
+        physical_blocks_per_logical=1,
+    )
+
+    expected = [3, 5, 103, 105, 201, 202, 301, 302, 401, 402, 501, 502]
     assert list(result) == expected, f"Expected {expected}, got {list(result)}"
 
 
 @pytest.mark.cpu_test
 def test_get_block_descs_ids_kernel_block_mismatch():
-    """Test _get_block_descs_ids uses different strides for FA (kernel blocks)
-    vs SSM (logical blocks) when ratio > 1."""
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
-        NixlConnectorWorker,
-    )
-
-    worker = object.__new__(NixlConnectorWorker)
+    """Test _compute_desc_ids uses different strides for FA
+    (kernel blocks) vs SSM (logical blocks) when ratio > 1."""
+    from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec
 
     ratio = 4
     logical_blocks = 100
     num_blocks = logical_blocks * ratio  # 400 kernel blocks
-    engine_id = "test-engine"
-    worker.num_regions = 2
-    worker.dst_num_blocks = {engine_id: num_blocks}
-    worker._has_mamba = True
-    worker._is_mamba_group = [False, True]
-    worker._physical_blocks_per_logical_kv_block = ratio
-    worker.num_descs = 2 * num_blocks  # 800
-
-    fa_blocks = [3, 7]  # kernel-level block IDs
-    ssm_blocks = [1, 2]  # logical block IDs
-    result = worker._get_block_descs_ids(engine_id, (fa_blocks, ssm_blocks))
-
-    # FA group: stride=num_blocks=400, offset=0
-    #   region0: [3, 7],  region1: [403, 407]
-    # SSM group: stride=logical_blocks=400//4=100, offset=num_descs=800
-    #   region0: [801, 802],  region1: [901, 902]
-    expected = [3, 7, 403, 407, 801, 802, 901, 902]
+
+    worker = _make_mock_worker_for_desc_ids(
+        num_regions=2,
+        has_mamba=True,
+        group_spec_types=(FullAttentionSpec, MambaSpec),
+        block_len_per_layer=[100],
+    )
+
+    fa_blocks = [3, 7]
+    ssm_blocks = [1, 2]
+    result = worker._compute_desc_ids(
+        block_ids=(fa_blocks, ssm_blocks),
+        dst_num_blocks=num_blocks,
+        block_size_ratio=None,
+        physical_blocks_per_logical=ratio,
+    )
+
+    expected = [3, 7, 403, 407, 801, 802, 901, 902, 1001, 1002, 1101, 1102]
     assert list(result) == expected, f"Expected {expected}, got {list(result)}"
 
 
@@ -277,7 +574,7 @@ def test_get_block_descs_ids_kernel_block_mismatch():
 def test_nixl_metadata_hybrid_ssm_block_ids():
     """Test NixlConnectorMetadata correctly stores block IDs for FA + SSM
     groups with different block counts (kernel mismatch active)."""
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
         NixlConnectorMetadata,
     )
 
@@ -340,6 +637,30 @@ def test_mamba_n1_d_side(has_mamba, is_hma_required, expected_count):
     assert is_async is True
 
 
+@pytest.mark.cpu_test
+def test_mamba_n1_d_side_builds_decode_metadata():
+    req = create_request(num_tokens=10, do_remote_prefill=True)
+    sched = make_nixl_scheduler(has_mamba=True, is_hma_required=True)
+
+    num_computed_tokens, is_async = sched.get_num_new_matched_tokens(
+        req, num_computed_tokens=0
+    )
+
+    assert num_computed_tokens == req.num_prompt_tokens - 1
+    assert is_async is True
+
+    vllm_config = create_vllm_config()
+    metadata = MockMambaBuilder.build_mamba_metadata(
+        vllm_config,
+        seq_lens=[req.num_prompt_tokens],
+        query_lens=[1],
+        is_prefilling=[True],
+    )
+
+    assert metadata.num_decodes == 1
+    assert metadata.num_prefills == 0
+
+
 @pytest.mark.cpu_test
 def test_mamba_n1_p_side_truncation():
     """P-side: Mamba truncates prompt to N-1, sets max_tokens=1.
@@ -385,7 +706,7 @@ def test_mamba_n1_p_side_truncation():
     ],
     ids=["fa_swa_mamba", "fa_swa_only", "fa_only"],
 )
-@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.current_platform")
+@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler.current_platform")
 def test_has_mamba_init(
     mock_platform,
     swa_enabled,
@@ -394,7 +715,7 @@ def test_has_mamba_init(
     expected_is_hma,
 ):
     """Test _has_mamba / _is_hma_required derived from kv_cache_groups."""
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler import (
         NixlConnectorScheduler,
     )
 
@@ -418,3 +739,271 @@ def test_has_mamba_init(
     )
     assert scheduler._has_mamba is expected_has_mamba
     assert scheduler._is_hma_required is expected_is_hma
+
+
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "ssm_sizes,block_len,expected_ratio",
+    [
+        # Nemotron 30B TP=1: ceil((36864 + 2097152) / 8192) = 261
+        ((36864, 2097152), 8192, 261),
+        # Nemotron 30B TP=2: ceil((18432 + 1048576) / 4096) = 261
+        ((18432, 1048576), 4096, 261),
+        # Nemotron 30B TP=4: ceil((9216 + 524288) / 4096) = 131
+        ((9216, 524288), 4096, 131),
+    ],
+)
+def test_compute_physical_blocks_per_logical(ssm_sizes, block_len, expected_ratio):
+    """Verify that compute_physical_blocks_per_logical is TP-dependent.
+
+    With dimension-sharded Mamba state, the ratio differs across TP sizes
+    (e.g. TP=1 → 261, TP=4 → 131 for Nemotron 30B). This is why
+    _physical_blocks_per_logical must be stored per-engine.
+    """
+    from vllm.distributed.kv_transfer.kv_connector.v1.ssm_conv_transfer_utils import (
+        compute_physical_blocks_per_logical,
+    )
+
+    assert compute_physical_blocks_per_logical(ssm_sizes, block_len) == expected_ratio
+
+
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "mamba_type,local_tp,conv_dim_local,conv_rows,temporal_shape,expected_proj_dims",
+    [
+        # nvidia/Nemotron-H-8B-Base-8K (Mamba2)
+        # mamba_num_heads=128, head_dim=64, n_groups=8, ssm_state_size=128
+        pytest.param(
+            "mamba2",
+            1,
+            10240,
+            3,
+            (128, 64, 128),
+            (8192, 1024, 1024),
+            id="nemotron_h_8b_tp1",
+        ),
+        pytest.param(
+            "mamba2",
+            4,
+            2560,
+            3,
+            (32, 64, 128),
+            (2048, 256, 256),
+            id="nemotron_h_8b_tp4",
+        ),
+        # nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B (Mamba2)
+        # mamba_num_heads=64, head_dim=64, n_groups=8, ssm_state_size=128
+        pytest.param(
+            "mamba2",
+            1,
+            6144,
+            3,
+            (64, 64, 128),
+            (4096, 1024, 1024),
+            id="nemotron_nano_30b_tp1",
+        ),
+        # Qwen/Qwen3.5-0.8B (GDN, symmetric: num_v=num_k=16)
+        # key_dim=2048, value_dim=2048, conv_dim=6144
+        pytest.param(
+            "gdn_attention",
+            1,
+            6144,
+            3,
+            (16, 128, 128),
+            (2048, 2048, 2048),
+            id="qwen35_08b_tp1",
+        ),
+        pytest.param(
+            "gdn_attention",
+            4,
+            1536,
+            3,
+            (4, 128, 128),
+            (512, 512, 512),
+            id="qwen35_08b_tp4",
+        ),
+        # Qwen/Qwen3.5-4B (GDN, asymmetric: num_v=32, num_k=16, K:V=1:2)
+        # key_dim=2048, value_dim=4096, conv_dim=8192
+        pytest.param(
+            "gdn_attention",
+            1,
+            8192,
+            3,
+            (32, 128, 128),
+            (2048, 2048, 4096),
+            id="qwen35_4b_tp1",
+        ),
+        # Qwen/Qwen3.5-27B (GDN, asymmetric: num_v=48, num_k=16, K:V=1:3)
+        # key_dim=2048, value_dim=6144, conv_dim=10240
+        pytest.param(
+            "gdn_attention",
+            1,
+            10240,
+            3,
+            (48, 128, 128),
+            (2048, 2048, 6144),
+            id="qwen35_27b_tp1",
+        ),
+        pytest.param(
+            "gdn_attention",
+            8,
+            1280,
+            3,
+            (6, 128, 128),
+            (256, 256, 768),
+            id="qwen35_27b_tp8",
+        ),
+    ],
+)
+def test_derive_mamba_conv_split(
+    monkeypatch,
+    mamba_type,
+    local_tp,
+    conv_dim_local,
+    conv_rows,
+    temporal_shape,
+    expected_proj_dims,
+):
+    """Parametrized test for derive_mamba_conv_split with real model configs.
+
+    Values generated by verify_conv_split.py which loads HuggingFace configs
+    and calls vLLM's derive_mamba_conv_split directly.
+    """
+    from vllm.distributed.kv_transfer.kv_connector.v1.ssm_conv_transfer_utils import (
+        derive_mamba_conv_split,
+    )
+    from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+    from vllm.v1.kv_cache_interface import MambaSpec
+
+    _TYPE_MAP = {
+        "mamba2": MambaAttentionBackendEnum.MAMBA2,
+        "gdn_attention": MambaAttentionBackendEnum.GDN_ATTN,
+    }
+    mamba_type_enum = _TYPE_MAP[mamba_type]
+
+    monkeypatch.setenv("VLLM_SSM_CONV_STATE_LAYOUT", "DS")
+    spec = MambaSpec(
+        block_size=64,
+        shapes=((conv_dim_local, conv_rows), temporal_shape),
+        dtypes=(torch.bfloat16, torch.bfloat16),
+        mamba_type=mamba_type_enum,
+    )
+    out = derive_mamba_conv_split(spec, local_tp=local_tp)
+    assert out.local_proj_dims == expected_proj_dims
+    assert out.conv_rows == conv_rows
+
+
+@pytest.mark.cpu_test
+@pytest.mark.parametrize(
+    "mamba_enabled,swa_enabled,"
+    "local_physical_per_logical,remote_physical_per_logical,"
+    "logical_block_ids,expected_kernel_block_ids",
+    [
+        # Qwen3.5-0.8B 4P2D (kernel_block_size=64):
+        #   prefill TP=4: logical_block_size=384 → physical_per_logical=6
+        #   decode  TP=2: logical_block_size=640 → physical_per_logical=10
+        # FA logical [0] → remote kernel [0..9] (1 * 10)
+        # SSM logical [10] → unchanged [10]
+        pytest.param(
+            True,
+            False,
+            6,
+            10,
+            ([0], [10]),
+            [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10]],
+            id="qwen35_4p2d",
+        ),
+        # Qwen3.5-0.8B 2P4D (kernel_block_size=64):
+        #   prefill TP=2: logical_block_size=640 → physical_per_logical=10
+        #   decode  TP=4: logical_block_size=384 → physical_per_logical=6
+        # FA logical [0, 1] → remote kernel [0..5, 6..11] (2 * 6)
+        # SSM logical [10] → unchanged [10]
+        pytest.param(
+            True,
+            False,
+            10,
+            6,
+            ([0, 1], [10]),
+            [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [10]],
+            id="qwen35_2p4d",
+        ),
+        # Homogeneous TP (kernel_block_size=64):
+        #   both sides: logical_block_size=640 → physical_per_logical=10
+        # FA logical [0] → kernel [0..9], SSM unchanged
+        pytest.param(
+            True,
+            False,
+            10,
+            10,
+            ([0], [10]),
+            [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10]],
+            id="homo_tp",
+        ),
+        # remote physical_per_logical=1: early return, no expansion
+        pytest.param(
+            True,
+            False,
+            10,
+            1,
+            ([0, 1, 2], [5]),
+            [[0, 1, 2], [5]],
+            id="mamba_remote_physical_per_logical_1",
+        ),
+        # Pure FA (no mamba): single group expanded with remote stride
+        pytest.param(
+            False,
+            False,
+            2,
+            4,
+            ([0, 1],),
+            [[0, 1, 2, 3, 4, 5, 6, 7]],
+            id="pure_fa",
+        ),
+        # FA + SWA (no mamba): both groups expanded
+        pytest.param(
+            False,
+            True,
+            2,
+            3,
+            ([0, 1], [2, 3]),
+            [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]],
+            id="fa_swa",
+        ),
+    ],
+)
+def test_logical_to_remote_kernel_block_ids(
+    mamba_enabled,
+    swa_enabled,
+    local_physical_per_logical,
+    remote_physical_per_logical,
+    logical_block_ids,
+    expected_kernel_block_ids,
+):
+    """Verify _logical_to_remote_kernel_block_ids uses the remote
+    physical_per_logical for FA expansion, not the local one.
+
+    This was the root cause of silent accuracy corruption in Qwen3.5
+    heterogeneous TP (e.g. 4P2D): the old code used local physical_per_logical
+    for the expansion arange, producing wrong kernel block indices.
+
+    Qwen3.5-0.8B values verified by verify_conv_split.py (issue #13).
+    """
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+        NixlConnectorWorker,
+    )
+
+    worker = object.__new__(NixlConnectorWorker)
+    worker._physical_blocks_per_logical_kv_block = local_physical_per_logical
+    worker.kv_cache_config = make_kv_cache_config(
+        block_size=16,
+        mamba_enabled=mamba_enabled,
+        swa_enabled=swa_enabled,
+    )
+
+    result = worker._logical_to_remote_kernel_block_ids(
+        logical_block_ids,
+        remote_physical_per_logical,
+    )
+    assert list(result) == expected_kernel_block_ids, (
+        f"Expected {expected_kernel_block_ids}, got {result}"
+    )
diff --git a/tests/v1/kv_connector/unit/test_nixl_heartbeat.py b/tests/v1/kv_connector/unit/test_nixl_heartbeat.py
new file mode 100644
index 000000000000..345e10621f0b
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_nixl_heartbeat.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for the scheduler-driven heartbeat / lease-renewal system."""
+
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+from vllm.v1.outputs import KVConnectorOutput
+
+from .utils import create_request, make_nixl_scheduler
+
+_ENGINE_A = "my-engine-id"
+
+
+def _sched(kv_lease_duration: int = 30):
+    return make_nixl_scheduler(heartbeat=True, kv_lease_duration=kv_lease_duration)
+
+
+def _req(request_id: int = 1):
+    return create_request(request_id=request_id, do_remote_prefill=True)
+
+
+def _worker_stub():
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+        NixlConnectorWorker,
+    )
+
+    w = object.__new__(NixlConnectorWorker)
+    w._reqs_to_send = {}
+    w._lease_extension = 20
+    return w
+
+
+# ===================================================================
+# Scheduler: on_new_request
+# ===================================================================
+
+
+def test_on_new_request_tracks_and_groups():
+    """Add two reqs to same engine, one to another; verify grouping."""
+    s = _sched()
+    s.on_new_request(_req(1))
+    s.on_new_request(_req(2))
+
+    assert s._heartbeat_by_engine[_ENGINE_A].req_ids == {"prefill-1", "prefill-2"}
+    info = s._heartbeat_by_engine[_ENGINE_A]
+    assert (info.host, info.port, info.tp_size) == ("my-host", 1234, 1)
+    assert s._heartbeat_req_engine["id-1"] == (_ENGINE_A, "prefill-1")
+
+    # Different engine.
+    r3 = _req(3)
+    r3.kv_transfer_params["remote_engine_id"] = "engine-b"
+    s.on_new_request(r3)
+    assert len(s._heartbeat_by_engine) == 2
+
+
+@pytest.mark.parametrize(
+    "make_req",
+    [
+        lambda: create_request(request_id=2, do_remote_decode=True),
+        lambda: create_request(request_id=3),  # no kv_transfer_params
+    ],
+    ids=["decode", "plain"],
+)
+def test_on_new_request_ignores_non_prefill(make_req):
+    s = _sched()
+    s.on_new_request(make_req())
+    assert len(s._heartbeat_by_engine) == 0
+
+
+# ===================================================================
+# Scheduler: _stop_heartbeat
+# ===================================================================
+
+
+def test_stop_heartbeat_partial_and_full():
+    """Stop one of two reqs on same engine, then stop the other."""
+    s = _sched()
+    s.on_new_request(_req(1))
+    s.on_new_request(_req(2))
+
+    s._stop_heartbeat("id-1")
+    assert s._heartbeat_by_engine[_ENGINE_A].req_ids == {"prefill-2"}
+    assert "id-1" not in s._heartbeat_req_engine
+
+    s._stop_heartbeat("id-2")
+    assert len(s._heartbeat_by_engine) == 0
+    assert len(s._heartbeat_req_engine) == 0
+
+
+# ===================================================================
+# Scheduler: build_connector_meta throttling
+# ===================================================================
+
+
+def test_build_connector_meta_heartbeat_throttling():
+    # kv_lease_duration=30 => _heartbeat_interval = 30 // 6 = 5
+    s = _sched(kv_lease_duration=30)
+    s.on_new_request(_req(1))
+
+    # Ensure the first call triggers by placing last_heartbeat far in the past.
+    s._last_heartbeat_time = time.perf_counter() - 10
+    meta1 = s.build_connector_meta(MagicMock())
+    assert _ENGINE_A in meta1.heartbeat_by_engine
+
+    # Immediate second call is throttled (< 5s since last).
+    meta2 = s.build_connector_meta(MagicMock())
+    assert len(meta2.heartbeat_by_engine) == 0
+
+
+# ===================================================================
+# Scheduler: cleanup paths (update_connector_output / request_finished)
+# ===================================================================
+
+
+def test_update_connector_output_stops_heartbeat():
+    s = _sched()
+    s.on_new_request(_req(1))
+
+    s.update_connector_output(
+        KVConnectorOutput(
+            finished_sending=None,
+            finished_recving={"id-1"},
+            invalid_block_ids=set(),
+        )
+    )
+
+    assert len(s._heartbeat_by_engine) == 0
+    assert len(s._heartbeat_req_engine) == 0
+
+
+def test_request_finished_stops_heartbeat():
+    s = _sched()
+    r = _req(1)
+    s.on_new_request(r)
+
+    # Simulate update_state_after_alloc having consumed do_remote_prefill.
+    r.kv_transfer_params["do_remote_prefill"] = False
+    s.request_finished(r, block_ids=())
+
+    assert len(s._heartbeat_by_engine) == 0
+    assert len(s._heartbeat_req_engine) == 0
+
+
+# ===================================================================
+# Worker: _handle_heartbeat
+# ===================================================================
+
+
+def test_handle_heartbeat():
+    w = _worker_stub()
+    far_future = time.perf_counter() + 99999
+    w._reqs_to_send = {"req-a": 100.0, "req-b": far_future}
+
+    before = time.perf_counter()
+    w._handle_heartbeat("req-a,req-b,req-unknown")
+
+    # req-a: pushed forward to ~now+20.
+    assert w._reqs_to_send["req-a"] >= before + 20
+    # req-b: already far out, max() keeps it.
+    assert w._reqs_to_send["req-b"] >= far_future
+    # req-unknown: not added.
+    assert "req-unknown" not in w._reqs_to_send
diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py
new file mode 100644
index 000000000000..af948ce3aef3
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_offloading_connector.py
@@ -0,0 +1,391 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import socket
+import time
+
+import msgspec
+import msgspec.msgpack
+import pytest
+import zmq
+from tqdm import tqdm
+
+from vllm import LLM, SamplingParams, TokensPrompt
+from vllm.config import KVEventsConfig, KVTransferConfig
+from vllm.distributed.kv_events import BlockStored, KVEventBatch
+from vllm.platforms import current_platform
+
+_ATTN_BACKENDS: list[str] = []
+if current_platform.is_cuda():
+    _ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER", "TRITON_ATTN"]
+elif current_platform.is_rocm():
+    _ATTN_BACKENDS = ["TRITON_ATTN"]
+
+# (model, attn_backend | None, block_size | None, uses_hma)
+#
+# - Llama: tested with each attention backend and a custom block_size.
+# - Gemma-3: HMA (sliding window + full attention), default backend.
+# - Mamba-130m: HMA (attention-free, acts like sliding_window=1),
+#   default backend.  Prefix caching must be force-enabled.
+# - Falcon-H1-0.5B-Instruct: HMA (parallel SSM/attention in every layer).
+#   After page-size unification the mamba and attention groups have
+#   different block sizes.
+MODEL_PARAMS: list[tuple[str, str | None, int | None, bool]] = [
+    ("meta-llama/Llama-3.2-1B-Instruct", backend, 48, False)
+    for backend in _ATTN_BACKENDS
+]
+# HMA / Mamba models are only tested on CUDA (not ROCm).
+if current_platform.is_cuda():
+    MODEL_PARAMS += [
+        ("google/gemma-3-1b-it", None, 48, True),
+        ("state-spaces/mamba-130m-hf", None, 48, True),
+        # Falcon-H1: parallel hybrid (every layer has both attention and SSM).
+        # The mamba and attention groups end up with different GPU block sizes
+        # after page-size unification, so we leave cpu_block_size=None
+        # (block_size_factor stays 1).
+        ("tiiuae/Falcon-H1-0.5B-Instruct", None, None, True),
+    ]
+
+# Maximum time (seconds) to wait for the async CPU offload transfer
+# to complete before giving up.
+_RESET_CACHE_TIMEOUT = 30 if current_platform.is_rocm() else 10
+
+# ZMQ poll timeout (ms) for the first event.
+_FIRST_EVENT_POLL_MS = 10_000 if current_platform.is_rocm() else 1000
+
+# Hard ceiling (seconds) on how long get_new_cpu_stored_events may loop,
+# to prevent hangs if non-CPU events keep arriving indefinitely.
+_EVENT_DRAIN_TIMEOUT = 60
+
+
+class MockSubscriber:
+    """Helper class to receive and verify published events"""
+
+    def __init__(
+        self,
+        endpoint: str,
+        topic: str,
+    ):
+        self.ctx = zmq.Context.instance()
+        self.topic_bytes = topic.encode("utf-8")
+
+        # Set up subscriber socket
+        self.sub = self.ctx.socket(zmq.SUB)
+        self.sub.setsockopt(zmq.SUBSCRIBE, self.topic_bytes)
+        self.sub.connect(endpoint)
+
+        self.decoder = msgspec.msgpack.Decoder(type=KVEventBatch)
+
+    def get_new_cpu_stored_events(self) -> list[BlockStored]:
+        cpu_stored_events: list[BlockStored] = []
+
+        poller = zmq.Poller()
+        poller.register(self.sub, zmq.POLLIN)
+
+        poll_ms = _FIRST_EVENT_POLL_MS
+        deadline = time.monotonic() + _EVENT_DRAIN_TIMEOUT
+        while time.monotonic() < deadline:
+            events = dict(poller.poll(poll_ms))
+
+            if events.get(self.sub) != zmq.POLLIN:
+                return cpu_stored_events
+
+            topic_bytes, _, payload = self.sub.recv_multipart()
+
+            assert topic_bytes == self.topic_bytes
+
+            event_batch = self.decoder.decode(payload)
+            assert isinstance(event_batch, KVEventBatch)
+            for event in event_batch.events:
+                if isinstance(event, BlockStored) and event.medium == "CPU":
+                    cpu_stored_events.append(event)
+                    poll_ms = 100
+
+        return cpu_stored_events
+
+    def close(self):
+        """Clean up resources"""
+        self.sub.close()
+
+
+def _wait_for_prefix_cache_reset(llm: LLM, reset_connector: bool = False) -> None:
+    """Wait for async offload transfers to finish so prefix cache can reset.
+
+    The GPU-to-CPU offload runs on a CUDA stream asynchronously. While blocks
+    are still held by the offload worker, ``reset_prefix_cache`` returns
+    ``False``. Between retries we send a dummy single-token prefill to force
+    the engine to step, which polls the worker for completed transfers and
+    frees GPU blocks.
+
+    Args:
+        llm: The LLM instance to reset.
+        reset_connector: If True, also reset the KV connector state.
+    """
+    _dummy_params = SamplingParams(max_tokens=1)
+    deadline = time.monotonic() + _RESET_CACHE_TIMEOUT
+    while not llm.reset_prefix_cache(reset_connector=reset_connector):
+        if time.monotonic() > deadline:
+            raise TimeoutError(
+                "reset_prefix_cache did not succeed within "
+                f"{_RESET_CACHE_TIMEOUT}s - async offload may be stuck"
+            )
+        # Force an engine step so the scheduler polls get_finished()
+        # and releases GPU blocks held by in-flight async stores.
+        llm.generate(
+            [TokensPrompt(prompt_token_ids=[0])],
+            _dummy_params,
+            use_tqdm=False,
+        )
+
+
+def _latency_test(
+    llm: LLM, subscriber: MockSubscriber | None, reset_connector: bool = False
+):
+    sampling_params = SamplingParams(max_tokens=1)
+
+    num_times_cpu_better_than_cold = 0
+    num_tests = 10
+    total_cold_time = 0.0
+    total_gpu_hit_time = 0.0
+    total_cpu_hit_time = 0.0
+    max_model_len = llm.llm_engine.vllm_config.model_config.max_model_len
+    # Use a long prompt that fits within the model's context window.
+    prompt_len = min(10001, max_model_len - 1)
+    prompt_token_ids = [0] * prompt_len
+    for i in tqdm(range(num_tests), desc="Running tests"):
+        prompt_token_ids[0] = i
+        prompts = [TokensPrompt(prompt_token_ids=prompt_token_ids)]
+
+        # run generation - this should trigger saving KV cache
+        start_time = time.time()
+        llm.generate(prompts, sampling_params, use_tqdm=False)
+        cold_time = time.time() - start_time
+        total_cold_time += cold_time
+
+        # run generation again - should hit the GPU prefix cache
+        start_time = time.time()
+        llm.generate(prompts, sampling_params, use_tqdm=False)
+        gpu_hit_time = time.time() - start_time
+        total_gpu_hit_time += gpu_hit_time
+
+        # Wait for the async CPU offload to finish, then reset prefix cache
+        # so the next generate() must reload from CPU rather than GPU.
+        _wait_for_prefix_cache_reset(llm, reset_connector=reset_connector)
+
+        # Verify CPU stored events arrived (offload is done before we
+        # attempt to load from CPU).
+        if subscriber is not None:
+            assert subscriber.get_new_cpu_stored_events(), (
+                f"No CPU stored events received on iteration {i}; "
+                "async offload may not have completed in time"
+            )
+
+        # run generation again - this should trigger loading from CPU
+        start_time = time.time()
+        llm.generate(prompts, sampling_params, use_tqdm=False)
+        cpu_hit_time = time.time() - start_time
+        total_cpu_hit_time += cpu_hit_time
+
+        if cpu_hit_time < cold_time:
+            num_times_cpu_better_than_cold += 1
+
+    print("Average times:")
+    print(f"    Cold: {total_cold_time * 1000 / num_tests:.2f}ms")
+    print(f"    GPU hit: {total_gpu_hit_time * 1000 / num_tests:.2f}ms")
+    print(f"    CPU hit: {total_cpu_hit_time * 1000 / num_tests:.2f}ms")
+
+    assert num_times_cpu_better_than_cold >= 0.8 * num_tests
+
+
+def _accuracy_test(llm: LLM, subscriber: MockSubscriber | None):
+    sampling_params = SamplingParams(max_tokens=1)
+    extra_config = (
+        llm.llm_engine.vllm_config.kv_transfer_config.kv_connector_extra_config
+    )
+    cpu_block_size = extra_config.get("block_size")
+    if cpu_block_size is None:
+        # No custom offloaded block_size: offloaded blocks match GPU blocks.
+        # Use the hash block_size (cache_config.block_size) for alignment.
+        cpu_block_size = llm.llm_engine.vllm_config.cache_config.block_size
+
+    if subscriber is not None:
+        subscriber.get_new_cpu_stored_events()
+
+    # Pad prompt so its token count is a multiple of cpu_block_size.
+    # Use the tokenizer directly to avoid expensive llm.generate() calls.
+    tokenizer = llm.get_tokenizer()
+    prompt = "Let's count to 10. One, two, three, four,"
+    while len(tokenizer.encode(prompt)) % cpu_block_size != 0:
+        prompt = ". " + prompt
+
+    # Seed the CPU cache with the prompt.
+    llm.generate(prompt, sampling_params, use_tqdm=False)
+
+    if subscriber is not None:
+        assert subscriber.get_new_cpu_stored_events()
+
+    test_count = 20
+    results = llm.generate([prompt] * test_count, sampling_params, use_tqdm=False)
+    success_count = sum(1 for r in results if r.outputs[0].text == " five")
+    assert success_count >= 0.5 * test_count
+
+
+@pytest.mark.parametrize("model, attn_backend, cpu_block_size, uses_hma", MODEL_PARAMS)
+def test_cpu_offloading(
+    model: str,
+    attn_backend: str | None,
+    cpu_block_size: int | None,
+    uses_hma: bool,
+) -> None:
+    """
+    Tests OffloadingConnector with CPUOffloadingSpec.
+    """
+    # configure OffloadingConnector (spec_name=CPUOffloadingSpec by default)
+    extra_config: dict = {"cpu_bytes_to_use": 500 << 20}
+    if cpu_block_size is not None:
+        extra_config["block_size"] = cpu_block_size
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="OffloadingConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config=extra_config,
+    )
+
+    # KV events are incompatible with HMA (setting kv_events_config
+    # would force HMA off), so only enable them for non-HMA models.
+    subscriber: MockSubscriber | None = None
+    kv_events_config: KVEventsConfig | None = None
+    if not uses_hma:
+        port: int
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("0.0.0.0", 0))
+            port = s.getsockname()[1]
+
+        events_endpoint = f"tcp://*:{port}"
+        kv_events_config = KVEventsConfig(
+            enable_kv_cache_events=True,
+            publisher="zmq",
+            endpoint=events_endpoint,
+            topic="test",
+        )
+
+    # Attention-free / hybrid models disable prefix caching by default
+    # (ModelConfig.is_prefix_caching_supported returns False).  Without it,
+    # mamba_block_size falls back to max_model_len, making GPU blocks too
+    # large for any reasonable offloaded block_size.  Force-enable it.
+    force_prefix_caching = uses_hma
+
+    llm = LLM(
+        model=model,
+        max_model_len=4096,
+        gpu_memory_utilization=0.5,
+        kv_events_config=kv_events_config,
+        kv_transfer_config=kv_transfer_config,
+        **({"attention_config": {"backend": attn_backend}} if attn_backend else {}),
+        # HMA models need explicit opt-in when kv_transfer_config is set
+        **({"disable_hybrid_kv_cache_manager": False} if uses_hma else {}),
+        **({"enable_prefix_caching": True} if force_prefix_caching else {}),
+        # ROCm: batch size 1 to reduce variability
+        **({"max_num_seqs": 1} if current_platform.is_rocm() else {}),
+    )
+
+    if kv_events_config is not None:
+        events_endpoint = events_endpoint.replace("*", "127.0.0.1")
+        subscriber = MockSubscriber(events_endpoint, topic=kv_events_config.topic)
+
+    try:
+        _latency_test(llm, subscriber)
+        _accuracy_test(llm, subscriber)
+    finally:
+        if subscriber is not None:
+            subscriber.close()
+        del llm
+
+
+def test_tiering_offloading() -> None:
+    """Tests OffloadingConnector with TieringOffloadingSpec."""
+    extra_config: dict = {
+        "cpu_bytes_to_use": 500 << 20,
+        "block_size": 48,
+        "spec_name": "TieringOffloadingSpec",
+        "secondary_tiers": [{"type": "example"}],
+    }
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="OffloadingConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config=extra_config,
+    )
+
+    port: int
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("0.0.0.0", 0))
+        port = s.getsockname()[1]
+    events_endpoint = f"tcp://*:{port}"
+    kv_events_config = KVEventsConfig(
+        enable_kv_cache_events=True,
+        publisher="zmq",
+        endpoint=events_endpoint,
+        topic="test",
+    )
+
+    llm = LLM(
+        model="meta-llama/Llama-3.2-1B-Instruct",
+        max_model_len=4096,
+        gpu_memory_utilization=0.5,
+        kv_events_config=kv_events_config,
+        kv_transfer_config=kv_transfer_config,
+    )
+    subscriber = MockSubscriber(
+        events_endpoint.replace("*", "127.0.0.1"),
+        topic=kv_events_config.topic,
+    )
+    try:
+        _latency_test(llm, subscriber)
+        _accuracy_test(llm, subscriber)
+    finally:
+        subscriber.close()
+        del llm
+
+
+def test_fs_tiering_offloading(tmp_path) -> None:
+    """Tests OffloadingConnector with TieringOffloadingSpec
+    + fs_python secondary tier."""
+    extra_config: dict = {
+        "cpu_bytes_to_use": 1 << 30,
+        "block_size": 48,
+        "spec_name": "TieringOffloadingSpec",
+        "secondary_tiers": [{"type": "fs_python", "root_dir": str(tmp_path)}],
+    }
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="OffloadingConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config=extra_config,
+    )
+
+    port: int
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("0.0.0.0", 0))
+        port = s.getsockname()[1]
+    events_endpoint = f"tcp://*:{port}"
+    kv_events_config = KVEventsConfig(
+        enable_kv_cache_events=True,
+        publisher="zmq",
+        endpoint=events_endpoint,
+        topic="test",
+    )
+
+    llm = LLM(
+        model="meta-llama/Llama-3.2-1B-Instruct",
+        max_model_len=512,
+        gpu_memory_utilization=0.5,
+        kv_events_config=kv_events_config,
+        kv_transfer_config=kv_transfer_config,
+    )
+    subscriber = MockSubscriber(
+        events_endpoint.replace("*", "127.0.0.1"),
+        topic=kv_events_config.topic,
+    )
+    try:
+        _latency_test(llm, subscriber, reset_connector=True)
+        _accuracy_test(llm, subscriber)
+    finally:
+        subscriber.close()
+        del llm
diff --git a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
index 283b4f25e6e4..44fc6d06d778 100644
--- a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
@@ -587,7 +587,7 @@ def test_cannot_recv():
     assert_scheduler_empty(scheduler)
 
 
-@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.current_platform")
+@patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler.current_platform")
 def test_p_side_chunked_prefill_mamba(mock_platform):
     """P-side integration: Mamba N-1 truncation + chunked prefill completes.
 
diff --git a/tests/v1/kv_connector/unit/test_rixl_gpu_mem_diag.py b/tests/v1/kv_connector/unit/test_rixl_gpu_mem_diag.py
new file mode 100644
index 000000000000..3a3ef2a88a68
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_rixl_gpu_mem_diag.py
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Verify that GPU memory is fully released after RixlConnector shutdown on ROCm.
+
+Regression test for ROCm/ucx#33: UCX rocm_ipc transport permanently pinned
+GPU memory via hsa_amd_ipc_memory_create during ucp_mem_map, causing
+GPU memory to be unrecoverable after engine shutdown.
+"""
+
+import gc
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+
+pytestmark = pytest.mark.skipif(
+    not current_platform.is_rocm(),
+    reason="ROCm platform required",
+)
+
+
+def _mb(b: int) -> float:
+    return b / (1024 * 1024)
+
+
+def _gpu_snapshot(tag: str, prev_alloc: float = 0.0) -> dict:
+    """Print and return current GPU memory stats."""
+    torch.accelerator.synchronize()
+    alloc = torch.accelerator.memory_allocated()
+    reserved = torch.accelerator.memory_reserved()
+    # mem_get_info is not available on torch.accelerator
+    try:
+        drv_free, drv_total = torch.cuda.mem_get_info()
+        drv_used = drv_total - drv_free
+        drv_pct = drv_used / drv_total * 100
+    except Exception:
+        drv_used = drv_total = drv_pct = 0
+    alloc_mb = _mb(alloc)
+    drv_used_mb = _mb(drv_used)
+    delta = alloc_mb - prev_alloc
+    print(
+        f"  {tag:<40s} | {alloc_mb:>9.1f} alloc | "
+        f"{_mb(reserved):>9.1f} rsrvd | "
+        f"{drv_used_mb:>9.1f} driver ({drv_pct:.1f}%) | "
+        f"delta {delta:>+9.1f}"
+    )
+    return {
+        "tag": tag,
+        "alloc_mb": alloc_mb,
+        "drv_used_mb": drv_used_mb,
+        "drv_pct": drv_pct,
+    }
+
+
+def _full_gpu_cleanup():
+    """gc.collect + torch empty_cache, multiple rounds."""
+    gc.unfreeze()
+    for _ in range(3):
+        if gc.collect() == 0:
+            break
+    torch.accelerator.empty_cache()
+
+
+@pytest.mark.parametrize("model_name, sw_size", [("google/gemma-3-1b-it", 512)])
+def test_gpu_memory_rixl_hma(model_name, sw_size):
+    """Track GPU memory through NixlConnector create/infer/shutdown cycle."""
+    from vllm import LLM, SamplingParams
+    from vllm.config import KVTransferConfig
+    from vllm.distributed.parallel_state import cleanup_dist_env_and_memory
+
+    llm_kwargs = {
+        "model": model_name,
+        "enforce_eager": True,
+        "gpu_memory_utilization": 0.5,
+        "kv_transfer_config": KVTransferConfig(
+            kv_connector="NixlConnector",
+            kv_role="kv_both",
+        ),
+        "max_model_len": 2048,
+        "disable_hybrid_kv_cache_manager": False,
+        "max_num_batched_tokens": 1024,
+        "enable_prefix_caching": False,
+        "block_size": 16,
+    }
+
+    print("\n" + "=" * 90)
+    print("GPU MEMORY -- RIXL NixlConnector HMA (ROCm)")
+    print("=" * 90)
+    gc.collect()
+    torch.accelerator.empty_cache()
+    torch.accelerator.reset_peak_memory_stats()
+    snap0 = _gpu_snapshot("0. baseline", 0.0)
+
+    # create + infer
+    llm = LLM(**llm_kwargs)
+    snap1 = _gpu_snapshot("1. after LLM()", snap0["alloc_mb"])
+
+    llm.generate(
+        ["hi" * 1401],
+        SamplingParams(
+            temperature=0.0,
+            max_tokens=1,
+            extra_args={
+                "kv_transfer_params": {
+                    "do_remote_decode": True,
+                    "do_remote_prefill": False,
+                    "remote_engine_id": None,
+                    "remote_block_ids": None,
+                    "remote_host": None,
+                    "remote_port": None,
+                }
+            },
+        ),
+    )
+    snap2 = _gpu_snapshot("2. after generate()", snap1["alloc_mb"])
+
+    # shutdown + cleanup
+    print("\n--- shutdown ---")
+    llm.llm_engine.engine_core.shutdown()
+    _gpu_snapshot("3. after shutdown()", snap2["alloc_mb"])
+
+    del llm
+    _full_gpu_cleanup()
+    cleanup_dist_env_and_memory()
+    _full_gpu_cleanup()
+    torch._dynamo.reset()
+    gc.collect()
+    torch.accelerator.empty_cache()
+    snap_final = _gpu_snapshot("4. final", snap2["alloc_mb"])
+
+    # summary
+    print("\n" + "=" * 90)
+    baseline = snap0["alloc_mb"]
+    final = snap_final["alloc_mb"]
+    peak = snap2["alloc_mb"]
+    total_alloc = peak - baseline
+
+    print(
+        f"  PyTorch:  baseline={baseline:.0f}  peak={peak:.0f}  "
+        f"final={final:.0f}  "
+        f"leaked={final - baseline:.0f} MB"
+        + (
+            f" ({(final - baseline) / total_alloc * 100:.1f}%)"
+            if total_alloc > 0
+            else ""
+        )
+    )
+
+    drv_base = snap0["drv_used_mb"]
+    drv_final = snap_final["drv_used_mb"]
+    drv_leaked = drv_final - drv_base
+    print(
+        f"  Driver:   baseline={drv_base:.0f} ({snap0['drv_pct']:.1f}%)  "
+        f"peak={snap2['drv_used_mb']:.0f} ({snap2['drv_pct']:.1f}%)  "
+        f"final={drv_final:.0f} ({snap_final['drv_pct']:.1f}%)  "
+        f"leaked={drv_leaked:.0f} MB"
+    )
+    print("=" * 90)
+
+    # Peak driver memory used above baseline
+    drv_peak = snap2["drv_used_mb"] - drv_base
+    leak_pct = (drv_leaked / drv_peak * 100) if drv_peak > 0 else 0
+    max_leak_pct = 10
+    assert leak_pct <= max_leak_pct, (
+        f"{drv_leaked:.0f} MB ({leak_pct:.1f}%) of driver-level GPU memory "
+        f"not freed after NixlConnector shutdown "
+        f"(peak allocation: {drv_peak:.0f} MB, threshold: {max_leak_pct}%)"
+    )
+
+
+@pytest.mark.parametrize("model_name", ["google/gemma-3-1b-it"])
+def test_gpu_memory_no_rixl_baseline(model_name):
+    """Same workload without NixlConnector.  Comparing driver-level memory
+    between this and test_gpu_memory_rixl_hma isolates UCX/RIXL impact."""
+    from vllm import LLM, SamplingParams
+    from vllm.distributed.parallel_state import cleanup_dist_env_and_memory
+
+    print("\n" + "=" * 90)
+    print("CONTROL -- same model, no RIXL connector")
+    print("=" * 90)
+    gc.collect()
+    torch.accelerator.empty_cache()
+    snap0 = _gpu_snapshot("baseline", 0.0)
+
+    llm = LLM(
+        model=model_name,
+        enforce_eager=True,
+        gpu_memory_utilization=0.5,
+        max_model_len=2048,
+        max_num_batched_tokens=1024,
+        enable_prefix_caching=False,
+        block_size=16,
+    )
+    _gpu_snapshot("after LLM()", snap0["alloc_mb"])
+
+    llm.generate(["hi " * 500], SamplingParams(max_tokens=1))
+    snap_peak = _gpu_snapshot("after generate()", snap0["alloc_mb"])
+
+    llm.llm_engine.engine_core.shutdown()
+    del llm
+    _full_gpu_cleanup()
+    cleanup_dist_env_and_memory()
+    _full_gpu_cleanup()
+    torch._dynamo.reset()
+    gc.collect()
+    torch.accelerator.empty_cache()
+    snap_final = _gpu_snapshot("final", snap0["alloc_mb"])
+
+    drv_base = snap0["drv_used_mb"]
+    drv_leaked = snap_final["drv_used_mb"] - drv_base
+    drv_peak = snap_peak["drv_used_mb"] - drv_base
+    print(f"\n  Driver leaked (no rixl): {drv_leaked:.0f} MB")
+    print("=" * 90)
+
+    leak_pct = (drv_leaked / drv_peak * 100) if drv_peak > 0 else 0
+    max_leak_pct = 10
+    assert leak_pct <= max_leak_pct, (
+        f"{drv_leaked:.0f} MB ({leak_pct:.1f}%) of driver-level GPU memory "
+        f"not freed after baseline shutdown "
+        f"(peak allocation: {drv_peak:.0f} MB, threshold: {max_leak_pct}%)"
+    )
diff --git a/tests/v1/kv_connector/unit/test_scheduler_kv_connector_override.py b/tests/v1/kv_connector/unit/test_scheduler_kv_connector_override.py
index 2834647fe1ff..164d9025dfef 100644
--- a/tests/v1/kv_connector/unit/test_scheduler_kv_connector_override.py
+++ b/tests/v1/kv_connector/unit/test_scheduler_kv_connector_override.py
@@ -17,6 +17,7 @@
 from vllm.v1.core.kv_cache_utils import BlockHash
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.core.sched.scheduler import Scheduler
+from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.request import Request
 
 
@@ -26,7 +27,7 @@ def __init__(self, block_hashes_by_req: dict[str, list[BlockHash]]):
 
 
 class DummyKVConnector(KVConnectorBase_V1):
-    def __init__(self, vllm_config, role, kv_cache_config=None):
+    def __init__(self, vllm_config, role, kv_cache_config: KVCacheConfig):
         super().__init__(vllm_config, role, kv_cache_config)
 
     def get_num_new_matched_tokens(
diff --git a/tests/v1/kv_connector/unit/test_tp_mapping.py b/tests/v1/kv_connector/unit/test_tp_mapping.py
new file mode 100644
index 000000000000..95d49faf042f
--- /dev/null
+++ b/tests/v1/kv_connector/unit/test_tp_mapping.py
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Tests for TP mapping and transfer plan utilities.
+
+These tests verify that TP mapping produces correct outputs
+(source ranks, split handles, desc IDs).
+No GPU or NIXL required.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.tp_mapping import (
+    TPMapping,
+    compute_tp_mapping,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+    NixlConnectorWorker,
+)
+from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec
+
+# ======================================================================
+# Test fixtures / helpers
+# ======================================================================
+
+
+def _compute_mapping(
+    tp_rank: int = 0,
+    tp_size: int = 1,
+    remote_tp_size: int = 1,
+    is_mla: bool = False,
+    num_kv_heads: int = 8,
+    group_spec_types: tuple[type, ...] = (FullAttentionSpec,),
+) -> TPMapping:
+    transfer_topology = SimpleNamespace(
+        tp_rank=tp_rank,
+        tp_size=tp_size,
+        is_mla=is_mla,
+        total_num_kv_heads=num_kv_heads,
+    )
+    return compute_tp_mapping(
+        transfer_topology=transfer_topology,
+        remote_tp_size=remote_tp_size,
+        group_spec_types=group_spec_types,
+    )
+
+
+# ======================================================================
+# TP mapping structure tests
+# ======================================================================
+
+
+class TestTPMappingStructure:
+    def test_source_ranks_homogeneous(self):
+        m = _compute_mapping(tp_size=2, tp_rank=1, remote_tp_size=2)
+        assert m.all_source_ranks == (1,)
+
+    def test_source_ranks_d_gt_p(self):
+        m = _compute_mapping(tp_size=4, tp_rank=2, remote_tp_size=2)
+        assert m.all_source_ranks == (1,)
+
+    def test_source_ranks_p_gt_d(self):
+        m = _compute_mapping(tp_size=1, tp_rank=0, remote_tp_size=2)
+        assert m.all_source_ranks == (0, 1)
+
+
+# ======================================================================
+# Split handle tests
+# ======================================================================
+
+
+def _make_mock_worker_for_splits(group_spec_types):
+    """Build a mock NixlConnectorWorker with _group_spec_types for split tests."""
+    worker = object.__new__(NixlConnectorWorker)
+    worker._group_spec_types = group_spec_types
+    return worker
+
+
+class TestBuildSrcSplitHandles:
+    @pytest.mark.parametrize("remote_tp_size", [2, 4])
+    def test_build_src_split_handles(self, remote_tp_size):
+        tp_rank = 0
+        tp_size = 1
+
+        plan = _compute_mapping(
+            tp_rank=tp_rank,
+            tp_size=tp_size,
+            remote_tp_size=remote_tp_size,
+        )
+
+        worker = _make_mock_worker_for_splits((FullAttentionSpec,))
+        src_blocks_data = [(0x2000 + i * 1024, 1024, 0) for i in range(8)]
+        num_descs = len(src_blocks_data)
+        splits = list(
+            worker._build_local_splits_from_plan(
+                plan,
+                src_blocks_data,
+                num_descs,
+            )
+        )
+
+        assert len(splits) == remote_tp_size
+        for handle in splits:
+            assert len(handle) == len(src_blocks_data)
+            for _, length, _ in handle:
+                assert length == 1024 // remote_tp_size
+
+
+class TestMambaPlanSplitHandles:
+    """Verify split handles for Mamba with FA/SSM distinction."""
+
+    def test_fa_and_ssm_different_split_factors(self):
+        """Section 0 split by num_attn_reads, section 1 by abs_tp."""
+        fa_readers = (0,)
+        ssm_readers = (0, 1)
+        plan = TPMapping(
+            source_ranks_per_group=(fa_readers, ssm_readers),
+            all_source_ranks=(0, 1),
+            rank_to_attention_slot={0: 0, 1: 0},
+            rank_offset_factor=0,
+        )
+
+        worker = _make_mock_worker_for_splits((FullAttentionSpec, MambaSpec))
+        # 2 FA descs + 1 SSM desc
+        src_blocks_data = [
+            (1000, 200, 0),  # FA desc 0
+            (2000, 200, 0),  # FA desc 1
+            (3000, 400, 0),  # SSM desc 0
+        ]
+
+        splits = list(worker._build_local_splits_from_plan(plan, src_blocks_data, 2))
+
+        assert len(splits) == 2  # 2 source ranks
+
+        # Rank 0 (FA source, p_idx=0):
+        # FA: chunk=200//1=200, slot=0 → (1000, 200, 0), (2000, 200, 0)
+        # SSM: chunk=400//2=200, idx=0 → (3000, 200, 0)
+        assert splits[0] == [(1000, 200, 0), (2000, 200, 0), (3000, 200, 0)]
+
+        # Rank 1 (not FA source, p_idx=1):
+        # FA: chunk=200//1=200, slot=0 (skip_fa) → (1000, 200, 0), (2000, 200, 0)
+        # SSM: chunk=400//2=200, idx=1 → (3200, 200, 0)
+        assert splits[1] == [(1000, 200, 0), (2000, 200, 0), (3200, 200, 0)]
diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py
index 75dc479470ea..1b892849d909 100644
--- a/tests/v1/kv_connector/unit/utils.py
+++ b/tests/v1/kv_connector/unit/utils.py
@@ -24,6 +24,7 @@
     KVConnectorBase_V1,
     KVConnectorMetadata,
     KVConnectorRole,
+    KVConnectorWorkerMetadata,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import (  # noqa
     ExampleConnector,
@@ -101,7 +102,9 @@ def create_vllm_config(
     attention_backend: str | None = None,
     kv_load_failure_policy: Literal["recompute", "fail"] = "fail",
     kv_connector: str = "NixlConnector",
+    kv_connector_module_path: str | None = None,
     kv_role: str = "kv_both",
+    disable_hybrid_kv_cache_manager: bool | None = None,
 ) -> VllmConfig:
     """Initialize VllmConfig For Testing."""
     model_config = ModelConfig(
@@ -117,6 +120,7 @@ def create_vllm_config(
         max_model_len=max_model_len,
         enable_chunked_prefill=enable_chunked_prefill,
         is_encoder_decoder=model_config.is_encoder_decoder,
+        disable_hybrid_kv_cache_manager=disable_hybrid_kv_cache_manager,
     )
     # Cache config, optionally force APC
     cache_config = CacheConfig(
@@ -127,6 +131,7 @@ def create_vllm_config(
     )
     kv_transfer_config = KVTransferConfig(
         kv_connector=kv_connector,
+        kv_connector_module_path=kv_connector_module_path,
         kv_role=kv_role,
         enable_permute_local_kv=enable_permute_local_kv,
         kv_connector_extra_config=kv_connector_extra_config or {},
@@ -220,6 +225,7 @@ def create_request(
             remote_block_ids=list(range(num_remote_blocks)),
             remote_host="my-host",
             remote_port=1234,
+            tp_size=1,
         )
 
     max_tokens = 1 if do_remote_decode else max_tokens
@@ -249,6 +255,7 @@ def create_model_runner_output(
     invalid_block_ids: set[int] | None = None,
     use_eos: bool = False,
     token_id: int = 0,
+    kv_connector_worker_meta: KVConnectorWorkerMetadata | None = None,
 ) -> ModelRunnerOutput:
     """Make dummy model runner output for testing."""
 
@@ -266,11 +273,13 @@ def create_model_runner_output(
             finished_sending is None
             and finished_recving is None
             and invalid_block_ids is None
+            and kv_connector_worker_meta is None
         )
         else KVConnectorOutput(
             finished_sending=finished_sending,
             finished_recving=finished_recving,
             invalid_block_ids=invalid_block_ids or set(),
+            kv_connector_worker_meta=kv_connector_worker_meta,
         )
     )
 
@@ -287,9 +296,14 @@ def create_model_runner_output(
 
 
 class TestExampleConnector(ExampleConnector):
-    def __init__(self, config: VllmConfig, role, kv_cache_config):
+    def __init__(
+        self,
+        config: VllmConfig,
+        role: KVConnectorRole,
+        kv_cache_config: KVCacheConfig,
+    ):
         self.name = config.kv_transfer_config.kv_connector_extra_config["name"]
-        self._connector = ExampleConnector(config, role)
+        self._connector = ExampleConnector(config, role, kv_cache_config)
         self.call_record: dict[str, int] = defaultdict(int)
         # Use a unique temp file per connector
         self._event_file = (
@@ -362,7 +376,7 @@ def __init__(
         self,
         vllm_config: VllmConfig,
         role: KVConnectorRole,
-        kv_cache_config: KVCacheConfig | None = None,
+        kv_cache_config: KVCacheConfig,
     ):
         super().__init__(vllm_config, role, kv_cache_config)
         extra_config = self._kv_transfer_config.kv_connector_extra_config
@@ -471,16 +485,41 @@ def make_kv_cache_config(
     )
 
 
-def make_nixl_scheduler(has_mamba: bool = False, is_hma_required: bool = False):
+def make_nixl_scheduler(
+    has_mamba: bool = False,
+    is_hma_required: bool = False,
+    heartbeat: bool = False,
+    kv_lease_duration: int = 30,
+):
     """Create a NixlConnectorScheduler via __new__ (skipping __init__).
 
-    Only sets the two flags needed by the N-1 prefill logic.
+    Only sets the flags needed by the tests.  When *heartbeat=True* the
+    scheduler-side heartbeat bookkeeping fields are also initialised.
     """
-    from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
+    from vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler import (
         NixlConnectorScheduler,
     )
 
     sched = object.__new__(NixlConnectorScheduler)
     sched._has_mamba = has_mamba
     sched._is_hma_required = is_hma_required
+
+    if heartbeat:
+        sched._heartbeat_by_engine = {}
+        sched._heartbeat_req_engine = {}
+        sched._last_heartbeat_time = 0.0
+        sched._kv_lease_duration = kv_lease_duration
+        sched._heartbeat_interval = kv_lease_duration // 6
+        # Fields touched by build_connector_meta / request_finished:
+        sched._reqs_need_recv = {}
+        sched._reqs_need_send = {}
+        sched._reqs_in_batch = set()
+        sched._reqs_not_processed = set()
+        sched._reqs_need_save = {}
+        sched.use_host_buffer = False
+        sched.engine_id = "test-engine"
+        sched.side_channel_host = "localhost"
+        sched.side_channel_port = 5555
+        sched.blocks_per_sw = []
+        sched.is_bidirectional_kv_xfer_enabled = False
     return sched
diff --git a/tests/v1/kv_offload/cpu/test_gpu_worker.py b/tests/v1/kv_offload/cpu/test_gpu_worker.py
new file mode 100644
index 000000000000..e4ed635b9b70
--- /dev/null
+++ b/tests/v1/kv_offload/cpu/test_gpu_worker.py
@@ -0,0 +1,419 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import random
+import time
+import uuid
+
+import pytest
+import torch
+
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed
+from vllm.v1.kv_offload.base import (
+    CanonicalKVCacheRef,
+    CanonicalKVCaches,
+    CanonicalKVCacheTensor,
+    GPULoadStoreSpec,
+)
+from vllm.v1.kv_offload.cpu.common import CPULoadStoreSpec
+from vllm.v1.kv_offload.cpu.gpu_worker import CpuGpuOffloadingHandlers
+from vllm.v1.kv_offload.cpu.shared_offload_region import SharedOffloadRegion
+
+NUM_GPU_BLOCKS = [64]
+NUM_CPU_BLOCKS = [256]
+GPU_PAGE_SIZES = [512, 1024]
+BLOCK_SIZE_FACTORS = [1, 3]
+NUM_TENSORS = [4]
+SEEDS = [0]
+DEVICE_TYPE = current_platform.device_type
+DEVICES = [f"{DEVICE_TYPE}:0"]
+NUM_MAPPINGS = [3]
+NUM_MAPPINGS_PER_GROUP = [2]
+
+
+@pytest.mark.parametrize("gpu_to_cpu", [True, False])
+@pytest.mark.parametrize("num_mappings", NUM_MAPPINGS)
+@pytest.mark.parametrize("gpu_page_size_bytes", GPU_PAGE_SIZES)
+@pytest.mark.parametrize("block_size_factor", BLOCK_SIZE_FACTORS)
+@pytest.mark.parametrize("num_gpu_blocks", NUM_GPU_BLOCKS)
+@pytest.mark.parametrize("num_cpu_blocks", NUM_CPU_BLOCKS)
+@pytest.mark.parametrize("num_tensors", NUM_TENSORS)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("use_shared_memory", [False, True])
+@torch.inference_mode()
+def test_transfer(
+    default_vllm_config,
+    gpu_to_cpu: bool,
+    num_mappings: int,
+    gpu_page_size_bytes: int,
+    block_size_factor: int,
+    num_gpu_blocks: int,
+    num_cpu_blocks: int,
+    num_tensors: int,
+    seed: int,
+    device: str,
+    use_shared_memory: bool,
+) -> None:
+    set_random_seed(seed)
+
+    # build CanonicalKVCacheTensor list: one per tensor
+    kv_cache_tensors: list[CanonicalKVCacheTensor] = []
+    for i in range(num_tensors):
+        gpu_tensor = torch.zeros(
+            (num_gpu_blocks, gpu_page_size_bytes),
+            dtype=torch.int8,
+            device=device,
+        )
+        kv_cache_tensors.append(
+            CanonicalKVCacheTensor(
+                tensor=gpu_tensor,
+                page_size_bytes=gpu_page_size_bytes,
+            )
+        )
+
+    # one group containing all tensors, one data ref per tensor
+    kv_cache_groups_data_refs: list[list[CanonicalKVCacheRef]] = [
+        [
+            CanonicalKVCacheRef(
+                tensor_idx=i,
+                page_size_bytes=gpu_page_size_bytes,
+            )
+            for i in range(num_tensors)
+        ]
+    ]
+
+    kv_caches = CanonicalKVCaches(
+        tensors=kv_cache_tensors,
+        group_data_refs=kv_cache_groups_data_refs,
+    )
+
+    mmap_region: SharedOffloadRegion | None = None
+    if use_shared_memory:
+        cpu_page_size = gpu_page_size_bytes * num_tensors * block_size_factor
+        mmap_region = SharedOffloadRegion(
+            instance_id=str(uuid.uuid4()),
+            total_size_bytes=num_cpu_blocks * cpu_page_size,
+            num_blocks=num_cpu_blocks,
+            rank=0,
+            num_workers=1,
+            cpu_page_size=cpu_page_size,
+        )
+
+    handlers = CpuGpuOffloadingHandlers(
+        kv_caches=kv_caches,
+        block_size_factor=block_size_factor,
+        num_cpu_blocks=num_cpu_blocks,
+        mmap_region=mmap_region,
+    )
+
+    # select block mappings
+    gpu_blocks = random.sample(range(num_gpu_blocks), num_mappings * block_size_factor)
+    cpu_blocks = random.sample(range(num_cpu_blocks), num_mappings)
+
+    # expand cpu blocks to gpu-page granularity for uniform comparison:
+    # each cpu block maps to block_size_factor consecutive sub-blocks
+    cpu_blocks_expanded = [
+        cpu_block * block_size_factor + j
+        for cpu_block in cpu_blocks
+        for j in range(block_size_factor)
+    ]
+
+    # maybe skip some GPU blocks to test reading/writing from the middle of a CPU block
+    blocks_to_skip = block_size_factor - 1
+    if blocks_to_skip > 0:
+        gpu_blocks = gpu_blocks[blocks_to_skip:]
+        cpu_blocks_expanded = cpu_blocks_expanded[blocks_to_skip:]
+
+    # set transfer direction
+    if gpu_to_cpu:
+        handler = handlers.gpu_to_cpu_handler
+        src_spec = GPULoadStoreSpec(
+            gpu_blocks, group_sizes=(len(gpu_blocks),), block_indices=(blocks_to_skip,)
+        )
+        dst_spec = CPULoadStoreSpec(cpu_blocks)
+        dst_to_src = dict(zip(cpu_blocks_expanded, gpu_blocks))
+        num_dst_sub_blocks = num_gpu_blocks
+    else:
+        handler = handlers.cpu_to_gpu_handler
+        src_spec = CPULoadStoreSpec(cpu_blocks)
+        dst_spec = GPULoadStoreSpec(
+            gpu_blocks, group_sizes=(len(gpu_blocks),), block_indices=(blocks_to_skip,)
+        )
+        dst_to_src = dict(zip(gpu_blocks, cpu_blocks_expanded))
+        num_dst_sub_blocks = num_gpu_blocks
+
+    # randomize src and dst tensors before transfer
+    for tensor in handler.src_tensors:
+        tensor.random_()
+    for tensor in handler.dst_tensors:
+        tensor.random_()
+
+    # clone src and dst tensors before transfer
+    orig_src_tensors = [x.clone() for x in handler.src_tensors]
+    orig_dst_tensors = [x.clone() for x in handler.dst_tensors]
+
+    # call transfer function
+    start_time = time.time()
+    assert handler.transfer_async(1, (src_spec, dst_spec))
+    assert {x.job_id for x in handler._transfers} == {1}
+
+    # wait for transfer to complete
+    end_time = time.time() + 10
+    while time.time() < end_time:
+        finished = handler.get_finished()
+        if finished:
+            assert finished[0].job_id == 1
+            assert finished[0].success
+            assert (
+                finished[0].transfer_type == ("GPU", "CPU")
+                if gpu_to_cpu
+                else ("CPU", "GPU")
+            )
+            assert finished[0].transfer_size == (
+                len(gpu_blocks)
+                * sum([x.page_size_bytes for x in handler.kv_cache_groups_data_refs[0]])
+            )
+            assert finished[0].transfer_time > 0
+            assert finished[0].transfer_time < (time.time() - start_time)
+            break
+        time.sleep(0.1)
+
+    # verify src tensors did not change
+    for orig_tensor, tensor in zip(orig_src_tensors, handler.src_tensors):
+        assert torch.equal(orig_tensor, tensor)
+
+    # verify dst tensors at gpu-page granularity.
+    for src_tensor, dst_tensor, orig_dst_tensor in zip(
+        handler.src_tensors,
+        handler.dst_tensors,
+        orig_dst_tensors,
+    ):
+        # view both GPU and CPU tensors as (n, gpu_page_size_bytes) for comparison.
+        src_view = src_tensor.reshape(-1, gpu_page_size_bytes)
+        dst_view = dst_tensor.reshape(-1, gpu_page_size_bytes)
+        orig_dst_view = orig_dst_tensor.reshape(-1, gpu_page_size_bytes)
+        for dst_sub_block in range(num_dst_sub_blocks):
+            src_sub_block = dst_to_src.get(dst_sub_block)
+            if src_sub_block is not None:
+                expected = src_view[src_sub_block]
+            else:
+                expected = orig_dst_view[dst_sub_block]
+            torch.testing.assert_close(dst_view[dst_sub_block].cpu(), expected.cpu())
+
+    # Drop loop-variable refs so mmap_obj has no exported buffers at cleanup.
+    del orig_tensor, tensor, src_tensor, dst_tensor, orig_dst_tensor
+    del src_view, dst_view, orig_dst_view, expected
+
+    handlers.cpu_to_gpu_handler.shutdown()
+    handlers.gpu_to_cpu_handler.shutdown()
+    if mmap_region:
+        mmap_region.cleanup()
+
+
+@pytest.mark.parametrize("gpu_to_cpu", [True, False])
+@pytest.mark.parametrize("num_mappings_per_group", NUM_MAPPINGS_PER_GROUP)
+@pytest.mark.parametrize("gpu_page_size_bytes", GPU_PAGE_SIZES)
+@pytest.mark.parametrize("block_size_factor", BLOCK_SIZE_FACTORS)
+@pytest.mark.parametrize("num_gpu_blocks", NUM_GPU_BLOCKS)
+@pytest.mark.parametrize("num_cpu_blocks", NUM_CPU_BLOCKS)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device", DEVICES)
+@torch.inference_mode()
+def test_transfer_multi_group(
+    default_vllm_config,
+    gpu_to_cpu: bool,
+    num_mappings_per_group: int,
+    gpu_page_size_bytes: int,
+    block_size_factor: int,
+    num_gpu_blocks: int,
+    num_cpu_blocks: int,
+    seed: int,
+    device: str,
+) -> None:
+    """Test transfers with three KV cache groups:
+    - Group 0: aligned transfer with num_mappings_per_group blocks
+    - Group 1: zero blocks (empty group)
+    - Group 2: unaligned CPU->GPU transfer (logical_offset=block_size_factor-1,
+      causing the implementation to skip source sub-blocks) with
+      num_mappings_per_group blocks
+    """
+    set_random_seed(seed)
+
+    # 3 groups, each with 2 tensors
+    num_groups = 3
+    tensors_per_group = 2
+    num_tensors = num_groups * tensors_per_group
+    kv_cache_tensors: list[CanonicalKVCacheTensor] = []
+    for _ in range(num_tensors):
+        gpu_tensor = torch.zeros(
+            (num_gpu_blocks, gpu_page_size_bytes),
+            dtype=torch.int8,
+            device=device,
+        )
+        kv_cache_tensors.append(
+            CanonicalKVCacheTensor(
+                tensor=gpu_tensor,
+                page_size_bytes=gpu_page_size_bytes,
+            )
+        )
+
+    kv_cache_groups_data_refs: list[list[CanonicalKVCacheRef]] = [
+        [
+            CanonicalKVCacheRef(
+                tensor_idx=g * tensors_per_group + i,
+                page_size_bytes=gpu_page_size_bytes,
+            )
+            for i in range(tensors_per_group)
+        ]
+        for g in range(num_groups)
+    ]
+
+    canonical_kv_caches = CanonicalKVCaches(
+        tensors=kv_cache_tensors, group_data_refs=kv_cache_groups_data_refs
+    )
+
+    handlers = CpuGpuOffloadingHandlers(
+        kv_caches=canonical_kv_caches,
+        block_size_factor=block_size_factor,
+        num_cpu_blocks=num_cpu_blocks,
+    )
+
+    # group 0: aligned, group 1: empty, group 2: unaligned on CPU->GPU
+    group_sizes_in_cpu_blocks = [num_mappings_per_group, 0, num_mappings_per_group]
+
+    total_cpu_blocks = sum(group_sizes_in_cpu_blocks)
+    total_gpu_blocks_needed = total_cpu_blocks * block_size_factor
+    gpu_blocks_all = random.sample(range(num_gpu_blocks), total_gpu_blocks_needed)
+    cpu_blocks_all = random.sample(range(num_cpu_blocks), total_cpu_blocks)
+
+    # split gpu/cpu blocks per group
+    gpu_blocks_per_group: list[list[int]] = []
+    cpu_blocks_per_group: list[list[int]] = []
+    gpu_offset = 0
+    cpu_offset = 0
+    for size in group_sizes_in_cpu_blocks:
+        gpu_count = size * block_size_factor
+        gpu_blocks_per_group.append(gpu_blocks_all[gpu_offset : gpu_offset + gpu_count])
+        cpu_blocks_per_group.append(cpu_blocks_all[cpu_offset : cpu_offset + size])
+        gpu_offset += gpu_count
+        cpu_offset += size
+
+    # expand cpu blocks to gpu-page granularity
+    cpu_blocks_expanded_per_group = [
+        [
+            cpu_block * block_size_factor + j
+            for cpu_block in cpu_blocks
+            for j in range(block_size_factor)
+        ]
+        for cpu_blocks in cpu_blocks_per_group
+    ]
+
+    # skip sub-blocks from group 2 to test unaligned transfers.
+    sub_blocks_to_skip = block_size_factor - 1  # e.g. 2 when block_size_factor=3
+    if sub_blocks_to_skip > 0:
+        gpu_blocks_per_group[2] = gpu_blocks_per_group[2][
+            sub_blocks_to_skip:-sub_blocks_to_skip
+        ]
+        cpu_blocks_expanded_per_group[2] = cpu_blocks_expanded_per_group[2][
+            sub_blocks_to_skip:-sub_blocks_to_skip
+        ]
+
+    # build flat gpu_blocks list and group_sizes in GPU blocks
+    gpu_blocks: list[int] = []
+    group_sizes: list[int] = []
+    for gpu_blks in gpu_blocks_per_group:
+        gpu_blocks.extend(gpu_blks)
+        group_sizes.append(len(gpu_blks))
+
+    # build flat cpu_blocks list
+    cpu_blocks = []
+    for cpu_blks in cpu_blocks_per_group:
+        cpu_blocks.extend(cpu_blks)
+
+    # block_indices: only relevant for unaligned transfers
+    block_indices: list[int] = [0, 0, sub_blocks_to_skip]
+
+    if gpu_to_cpu:
+        handler = handlers.gpu_to_cpu_handler
+        src_spec = GPULoadStoreSpec(
+            gpu_blocks, group_sizes=group_sizes, block_indices=block_indices
+        )
+        dst_spec = CPULoadStoreSpec(cpu_blocks)
+        # per-group mapping: cpu sub-block -> gpu sub-block
+        dst_to_src_per_group = [
+            dict(zip(expanded, gpu_blks))
+            for expanded, gpu_blks in zip(
+                cpu_blocks_expanded_per_group, gpu_blocks_per_group
+            )
+        ]
+        num_dst_sub_blocks = num_cpu_blocks * block_size_factor
+    else:
+        handler = handlers.cpu_to_gpu_handler
+        src_spec = CPULoadStoreSpec(cpu_blocks)
+        dst_spec = GPULoadStoreSpec(
+            gpu_blocks, group_sizes=group_sizes, block_indices=block_indices
+        )
+        # per-group mapping: gpu sub-block -> cpu sub-block
+        dst_to_src_per_group = [
+            dict(zip(gpu_blks, expanded))
+            for gpu_blks, expanded in zip(
+                gpu_blocks_per_group, cpu_blocks_expanded_per_group
+            )
+        ]
+        num_dst_sub_blocks = num_gpu_blocks
+
+    # randomize src and dst tensors before transfer
+    for tensor in handler.src_tensors:
+        tensor.random_()
+    for tensor in handler.dst_tensors:
+        tensor.random_()
+
+    orig_src_tensors = [x.clone() for x in handler.src_tensors]
+    orig_dst_tensors = [x.clone() for x in handler.dst_tensors]
+
+    assert handler.transfer_async(1, (src_spec, dst_spec))
+    assert {x.job_id for x in handler._transfers} == {1}
+
+    end_time = time.time() + 10
+    while time.time() < end_time:
+        finished = handler.get_finished()
+        if finished:
+            assert finished[0].job_id == 1
+            assert finished[0].success
+            expected_bytes = sum(
+                group_size * sum([x.page_size_bytes for x in data_refs])
+                for group_size, data_refs in zip(
+                    group_sizes, handler.kv_cache_groups_data_refs
+                )
+            )
+            assert finished[0].transfer_size == expected_bytes
+            break
+        time.sleep(0.1)
+
+    # verify src tensors did not change
+    for orig_tensor, tensor in zip(orig_src_tensors, handler.src_tensors):
+        assert torch.equal(orig_tensor, tensor)
+
+    # verify dst tensors at gpu-page granularity
+    for group_idx, dst_to_src in enumerate(dst_to_src_per_group):
+        group_tensor_offset = group_idx * tensors_per_group
+        for tensor_idx in range(tensors_per_group):
+            src_tensor = handler.src_tensors[group_tensor_offset + tensor_idx]
+            dst_tensor = handler.dst_tensors[group_tensor_offset + tensor_idx]
+            orig_dst_tensor = orig_dst_tensors[group_tensor_offset + tensor_idx]
+            src_view = src_tensor.view(-1, gpu_page_size_bytes)
+            dst_view = dst_tensor.view(-1, gpu_page_size_bytes)
+            orig_dst_view = orig_dst_tensor.view(-1, gpu_page_size_bytes)
+            for dst_sub_block in range(num_dst_sub_blocks):
+                src_sub_block = dst_to_src.get(dst_sub_block)
+                if src_sub_block is not None:
+                    expected = src_view[src_sub_block]
+                else:
+                    expected = orig_dst_view[dst_sub_block]
+                torch.testing.assert_close(
+                    dst_view[dst_sub_block].cpu(), expected.cpu()
+                )
+
+    handlers.cpu_to_gpu_handler.shutdown()
+    handlers.gpu_to_cpu_handler.shutdown()
diff --git a/tests/v1/kv_offload/cpu/test_manager.py b/tests/v1/kv_offload/cpu/test_manager.py
new file mode 100644
index 000000000000..3957294f8b0f
--- /dev/null
+++ b/tests/v1/kv_offload/cpu/test_manager.py
@@ -0,0 +1,650 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+import numpy as np
+import pytest
+
+from vllm.v1.kv_offload.base import (
+    LoadStoreSpec,
+    OffloadingEvent,
+    OffloadKey,
+    PrepareStoreOutput,
+    ReqContext,
+    make_offload_key,
+)
+from vllm.v1.kv_offload.cpu.common import CPULoadStoreSpec
+from vllm.v1.kv_offload.cpu.manager import CPUOffloadingManager
+from vllm.v1.kv_offload.cpu.policies.arc import ARCCachePolicy
+
+
+def make_req_context(
+    req_id: str = "", kv_transfer_params: dict | None = None
+) -> ReqContext:
+    """Create a ReqContext as production code would, from a request's params."""
+    return ReqContext(req_id=req_id, kv_transfer_params=kv_transfer_params)
+
+
+_EMPTY_REQ_CTX = make_req_context()
+
+
+@dataclass
+class ExpectedPrepareStoreOutput:
+    keys_to_store: list[int]
+    store_block_ids: list[int]
+    evicted_keys: list[int]
+
+
+def to_key(int_hash: int) -> OffloadKey:
+    return make_offload_key(str(int_hash).encode(), 0)
+
+
+def to_keys(int_hashes: list[int]) -> list[OffloadKey]:
+    return [to_key(i) for i in int_hashes]
+
+
+def verify_store_output(
+    prepare_store_output: PrepareStoreOutput | None,
+    expected_prepare_store_output: ExpectedPrepareStoreOutput,
+):
+    assert prepare_store_output is not None
+    assert prepare_store_output.keys_to_store == to_keys(
+        expected_prepare_store_output.keys_to_store
+    )
+    assert prepare_store_output.evicted_keys == to_keys(
+        expected_prepare_store_output.evicted_keys
+    )
+    store_spec = prepare_store_output.store_spec
+    assert isinstance(store_spec, CPULoadStoreSpec)
+    expected_array = np.array(
+        expected_prepare_store_output.store_block_ids, dtype=np.int64
+    )
+    assert np.array_equal(expected_array, store_spec.block_ids)
+
+
+def verify_load_output(
+    prepare_load_output: LoadStoreSpec, expected_prepare_load_output: list[int]
+):
+    assert isinstance(prepare_load_output, CPULoadStoreSpec)
+    expected_array = np.array(expected_prepare_load_output, dtype=np.int64)
+    assert np.array_equal(expected_array, prepare_load_output.block_ids)
+
+
+def verify_events(
+    events: Iterable[OffloadingEvent],
+    expected_stores: tuple[set[int], ...] = (),
+    expected_evictions: tuple[set[int], ...] = (),
+):
+    stores: list[set[OffloadKey]] = []
+    evictions: list[set[OffloadKey]] = []
+    for event in events:
+        assert event.medium == CPULoadStoreSpec.medium()
+        if event.removed:
+            evictions.append(set(event.keys))
+        else:
+            stores.append(set(event.keys))
+
+    def to_key_sets(
+        int_sets: tuple[set[int], ...],
+    ) -> tuple[set[OffloadKey], ...]:
+        return tuple([set(to_keys(list(int_set))) for int_set in int_sets])
+
+    assert tuple(evictions) == to_key_sets(expected_evictions)
+    assert tuple(stores) == to_key_sets(expected_stores)
+
+
+@pytest.mark.parametrize("eviction_policy", ["lru", "arc"])
+def test_already_stored_block_not_evicted_during_prepare_store(eviction_policy):
+    """
+    Regression test: a block that is already stored must not be evicted
+    by prepare_store() when it needs to make room for new blocks.
+    Applies to both lru and arc policies.
+
+    Scenario:
+        - Store blocks [1, 2] and complete.
+        - touch([1]) makes block 2 the LRU candidate.
+        - prepare_store([2, 3, 4, 5]):
+            * block 2 is filtered out as "already stored"
+            * but without the fix, block 2 would be evicted as the LRU
+              candidate to make room for [3, 4, 5]
+        - After complete_store([2, 3, 4, 5]), block 2 must still be present.
+    """
+    manager = CPUOffloadingManager(
+        num_blocks=4,
+        cache_policy=eviction_policy,
+        enable_events=True,
+    )
+
+    # store [1, 2] and complete
+    manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+    manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+
+    # touch [1] to make block 2 the LRU candidate
+    manager.touch(to_keys([1]), _EMPTY_REQ_CTX)
+
+    # prepare_store([2, 3, 4, 5]):
+    #   - block 2 is already stored -> filtered out of keys_to_store
+    #   - block 2 must NOT be evicted even though it is the LRU candidate
+    #   - block 1 (ID 0) is evicted instead; new blocks [3,4,5] get IDs 2,3,0
+    prepare_store_output = manager.prepare_store(to_keys([2, 3, 4, 5]), _EMPTY_REQ_CTX)
+    verify_store_output(
+        prepare_store_output,
+        ExpectedPrepareStoreOutput(
+            keys_to_store=[3, 4, 5],
+            store_block_ids=[2, 3, 0],
+            evicted_keys=[1],  # block 1 evicted, not block 2
+        ),
+    )
+
+    # complete_store must not silently drop block 2
+    manager.complete_store(to_keys([2, 3, 4, 5]), _EMPTY_REQ_CTX)
+
+    # block 2 must still be present in the cache
+    assert manager.lookup(to_key(2), _EMPTY_REQ_CTX) is True
+
+
+def test_cpu_manager():
+    """
+    Tests CPUOffloadingManager with lru policy.
+    """
+    # initialize a CPU manager with a capacity of 4 blocks
+    cpu_manager = CPUOffloadingManager(
+        num_blocks=4, cache_policy="lru", enable_events=True
+    )
+
+    # prepare store [1, 2]
+    prepare_store_output = cpu_manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+    verify_store_output(
+        prepare_store_output,
+        ExpectedPrepareStoreOutput(
+            keys_to_store=[1, 2],
+            store_block_ids=[0, 1],
+            evicted_keys=[],
+        ),
+    )
+
+    # lookup [1, 2] -> write in-flight, not yet ready
+    assert cpu_manager.lookup(to_key(1), _EMPTY_REQ_CTX) is None
+    assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is None
+
+    # no events so far
+    assert list(cpu_manager.take_events()) == []
+
+    # complete store [1, 2]
+    cpu_manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+    verify_events(cpu_manager.take_events(), expected_stores=({1, 2},))
+
+    # lookup [1, 2]
+    assert cpu_manager.lookup(to_key(1), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(3), _EMPTY_REQ_CTX) is False
+
+    # prepare store [2, 3, 4, 5] -> evicts [1]
+    prepare_store_output = cpu_manager.prepare_store(
+        to_keys([2, 3, 4, 5]), _EMPTY_REQ_CTX
+    )
+    verify_store_output(
+        prepare_store_output,
+        ExpectedPrepareStoreOutput(
+            keys_to_store=[3, 4, 5],
+            store_block_ids=[2, 3, 0],
+            evicted_keys=[1],
+        ),
+    )
+
+    # verify eviction event
+    verify_events(cpu_manager.take_events(), expected_evictions=({1},))
+
+    # prepare store with no space
+    assert cpu_manager.prepare_store(to_keys([1, 6]), _EMPTY_REQ_CTX) is None
+
+    # complete store [2, 3, 4, 5]
+    cpu_manager.complete_store(to_keys([2, 3, 4, 5]), _EMPTY_REQ_CTX)
+
+    # lookup (now that we have [2, 3, 4, 5])
+    assert cpu_manager.lookup(to_key(1), _EMPTY_REQ_CTX) is False
+    assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(3), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(4), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(5), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(0), _EMPTY_REQ_CTX) is False
+
+    # prepare load [2, 3]
+    prepare_load_output = cpu_manager.prepare_load(to_keys([2, 3]), _EMPTY_REQ_CTX)
+    verify_load_output(prepare_load_output, [1, 2])
+
+    # prepare store with no space ([2, 3] is being loaded)
+    assert cpu_manager.prepare_store(to_keys([6, 7, 8]), _EMPTY_REQ_CTX) is None
+
+    # complete load [2, 3]
+    cpu_manager.complete_load(to_keys([2, 3]), _EMPTY_REQ_CTX)
+
+    # prepare store [6, 7, 8] -> evicts [2, 3, 4] (oldest)
+    prepare_store_output = cpu_manager.prepare_store(to_keys([6, 7, 8]), _EMPTY_REQ_CTX)
+    verify_store_output(
+        prepare_store_output,
+        ExpectedPrepareStoreOutput(
+            keys_to_store=[6, 7, 8],
+            store_block_ids=[3, 2, 1],
+            evicted_keys=[2, 3, 4],
+        ),
+    )
+
+    # complete store [6, 7, 8]
+    cpu_manager.complete_store(to_keys([6, 7, 8]), _EMPTY_REQ_CTX)
+
+    # touch [5, 6, 7] (move to end of LRU order)
+    cpu_manager.touch(to_keys([5, 6, 7]), _EMPTY_REQ_CTX)
+
+    # prepare store [7, 9] -> evicts [8] (oldest following previous touch)
+    prepare_store_output = cpu_manager.prepare_store(to_keys([9]), _EMPTY_REQ_CTX)
+    verify_store_output(
+        prepare_store_output,
+        ExpectedPrepareStoreOutput(
+            keys_to_store=[9],
+            store_block_ids=[1],
+            evicted_keys=[8],
+        ),
+    )
+
+    # complete store [7, 9] with failure
+    cpu_manager.complete_store(to_keys([7, 9]), _EMPTY_REQ_CTX, success=False)
+
+    # assert [7] is still stored, but [9] is not
+    assert cpu_manager.lookup(to_key(7), _EMPTY_REQ_CTX) is True
+    assert cpu_manager.lookup(to_key(9), _EMPTY_REQ_CTX) is False
+
+    verify_events(
+        cpu_manager.take_events(),
+        expected_stores=({3, 4, 5}, {6, 7, 8}),
+        expected_evictions=({2, 3, 4}, {8}),
+    )
+
+
+def test_prepare_load_preserves_key_order():
+    """block_ids[i] must correspond to keys[i] (co-indexed invariant)."""
+    manager = CPUOffloadingManager(num_blocks=4, cache_policy="lru")
+
+    key_a, key_b, key_c = to_key(0), to_key(1), to_key(2)
+
+    # Store all three keys and learn their block ID assignments
+    store_output = manager.prepare_store([key_a, key_b, key_c], _EMPTY_REQ_CTX)
+    assert store_output is not None
+    assert isinstance(store_output.store_spec, CPULoadStoreSpec)
+    key_to_block_id = {
+        k: int(bid)
+        for k, bid in zip(store_output.keys_to_store, store_output.store_spec.block_ids)
+    }
+    manager.complete_store([key_a, key_b, key_c], _EMPTY_REQ_CTX)
+
+    # Forward order: [a, b, c]
+    spec_fwd = manager.prepare_load([key_a, key_b, key_c], _EMPTY_REQ_CTX)
+    assert isinstance(spec_fwd, CPULoadStoreSpec)
+    assert [int(x) for x in spec_fwd.block_ids] == [
+        key_to_block_id[key_a],
+        key_to_block_id[key_b],
+        key_to_block_id[key_c],
+    ]
+    manager.complete_load([key_a, key_b, key_c], _EMPTY_REQ_CTX)  # order irrelevant
+
+    # Arbitrary permutation: [b, c, a]
+    spec_perm = manager.prepare_load([key_b, key_c, key_a], _EMPTY_REQ_CTX)
+    assert isinstance(spec_perm, CPULoadStoreSpec)
+    assert [int(x) for x in spec_perm.block_ids] == [
+        key_to_block_id[key_b],
+        key_to_block_id[key_c],
+        key_to_block_id[key_a],
+    ]
+    manager.complete_load([key_a, key_b, key_c], _EMPTY_REQ_CTX)  # order irrelevant
+
+
+class TestARCPolicy:
+    """Unit tests for CPUOffloadingManager with ARC eviction policy."""
+
+    def _make_manager(
+        self, num_blocks: int = 4, enable_events: bool = True
+    ) -> tuple[CPUOffloadingManager, ARCCachePolicy]:
+        manager = CPUOffloadingManager(
+            num_blocks=num_blocks,
+            cache_policy="arc",
+            enable_events=enable_events,
+        )
+        policy = manager._policy
+        assert isinstance(policy, ARCCachePolicy)
+        return manager, policy
+
+    def test_basic(self):
+        """
+        Tests CPUOffloadingManager with arc policy.
+        Verifies that ARC handles store, load, and lookup operations correctly.
+        """
+        cpu_manager, arc_policy = self._make_manager()
+
+        # prepare store [1, 2]
+        prepare_store_output = cpu_manager.prepare_store(
+            to_keys([1, 2]), _EMPTY_REQ_CTX
+        )
+        verify_store_output(
+            prepare_store_output,
+            ExpectedPrepareStoreOutput(
+                keys_to_store=[1, 2],
+                store_block_ids=[0, 1],
+                evicted_keys=[],
+            ),
+        )
+
+        # lookup [1, 2] -> write in-flight, not yet ready
+        assert cpu_manager.lookup(to_key(1), _EMPTY_REQ_CTX) is None
+        assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is None
+
+        # no events so far
+        assert list(cpu_manager.take_events()) == []
+
+        # complete store [1, 2]
+        cpu_manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+        verify_events(cpu_manager.take_events(), expected_stores=({1, 2},))
+
+        # lookup [1, 2]
+        assert cpu_manager.lookup(to_key(1), _EMPTY_REQ_CTX) is True
+        assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is True
+        assert cpu_manager.lookup(to_key(3), _EMPTY_REQ_CTX) is False
+
+        # blocks should be in T1 (recent)
+        assert len(arc_policy.t1) == 2
+        assert len(arc_policy.t2) == 0
+
+    def test_t1_to_t2_promotion(self):
+        """
+        Tests that accessing a block in T1 promotes it to T2 (frequent).
+        This is a key feature of ARC's adaptive behavior.
+        """
+        cpu_manager, arc_policy = self._make_manager(enable_events=False)
+
+        # store and complete block 1
+        cpu_manager.prepare_store(to_keys([1]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1]), _EMPTY_REQ_CTX)
+
+        # block 1 starts in T1 (recent)
+        assert to_keys([1])[0] in arc_policy.t1
+        assert to_keys([1])[0] not in arc_policy.t2
+
+        # touch block 1 (simulate second access)
+        cpu_manager.touch(to_keys([1]), _EMPTY_REQ_CTX)
+
+        # block 1 should now be in T2 (frequent)
+        assert to_keys([1])[0] not in arc_policy.t1
+        assert to_keys([1])[0] in arc_policy.t2
+
+    def test_eviction_with_load(self):
+        """
+        Tests ARC eviction behavior similar to LRU test.
+        Verifies that blocks being loaded (ref_cnt > 0) cannot be evicted.
+        """
+        cpu_manager, _ = self._make_manager()
+
+        # prepare and complete store [1, 2, 3, 4]
+        prepare_store_output = cpu_manager.prepare_store(
+            to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX
+        )
+        verify_store_output(
+            prepare_store_output,
+            ExpectedPrepareStoreOutput(
+                keys_to_store=[1, 2, 3, 4],
+                store_block_ids=[0, 1, 2, 3],
+                evicted_keys=[],
+            ),
+        )
+        cpu_manager.complete_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+
+        # prepare load [2, 3] (increases ref_cnt)
+        prepare_load_output = cpu_manager.prepare_load(to_keys([2, 3]), _EMPTY_REQ_CTX)
+        verify_load_output(prepare_load_output, [1, 2])
+
+        # prepare store [5, 6, 7] with [2, 3] being loaded
+        # should fail because [2, 3] have ref_cnt > 0
+        assert cpu_manager.prepare_store(to_keys([5, 6, 7]), _EMPTY_REQ_CTX) is None
+
+        # complete load [2, 3]
+        cpu_manager.complete_load(to_keys([2, 3]), _EMPTY_REQ_CTX)
+
+        # now prepare store [5, 6, 7] should succeed
+        # ARC will evict blocks one at a time from T1 as needed
+        prepare_store_output = cpu_manager.prepare_store(
+            to_keys([5, 6, 7]), _EMPTY_REQ_CTX
+        )
+        assert prepare_store_output is not None
+        # Should successfully evict enough blocks to make room (at least 1)
+        assert len(prepare_store_output.evicted_keys) >= 1
+
+    def test_adaptive_target(self):
+        """
+        Tests ARC's adaptive target adjustment via ghost lists.
+        When a block in B1 (ghost list) is accessed, target_t1_size increases.
+        When a block in B2 is accessed, target_t1_size decreases.
+        """
+        cpu_manager, arc_policy = self._make_manager(num_blocks=2, enable_events=False)
+
+        # store blocks 1, 2 (fills cache)
+        cpu_manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+
+        initial_target = arc_policy.target_t1_size
+
+        # store block 3, evicting block 1 (moves to B1 ghost list)
+        cpu_manager.prepare_store(to_keys([3]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([3]), _EMPTY_REQ_CTX)
+
+        # block 1 should be in B1 (ghost list)
+        assert to_keys([1])[0] in arc_policy.b1
+
+        # touch block 1 (cache miss, but in B1)
+        # this should increase target_t1_size (favor recency)
+        cpu_manager.touch(to_keys([1]), _EMPTY_REQ_CTX)
+
+        # target should have increased
+        assert arc_policy.target_t1_size > initial_target
+
+    def test_t1_t2_eviction_policy(self):
+        """
+        Tests that ARC evicts from T1 or T2 based on target_t1_size.
+        If |T1| >= target_t1_size, evict from T1, otherwise from T2.
+        """
+        cpu_manager, arc_policy = self._make_manager(enable_events=False)
+
+        # store blocks 1, 2, 3, 4
+        cpu_manager.prepare_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+
+        # promote blocks 3, 4 to T2 by touching them
+        cpu_manager.touch(to_keys([3, 4]), _EMPTY_REQ_CTX)
+
+        # now: T1 = {1, 2}, T2 = {3, 4}
+        assert len(arc_policy.t1) == 2
+        assert len(arc_policy.t2) == 2
+
+        # set target_t1_size to prefer evicting from T1
+        # (when |T1| >= target, evict from T1)
+        arc_policy.target_t1_size = 1
+
+        # store block 5, should evict from T1 (block 1, LRU in T1)
+        output = cpu_manager.prepare_store(to_keys([5]), _EMPTY_REQ_CTX)
+        assert output is not None
+        assert to_keys([1]) == output.evicted_keys
+
+        cpu_manager.complete_store(to_keys([5]), _EMPTY_REQ_CTX)
+
+        # block 1 should be in B1 (ghost list)
+        assert to_keys([1])[0] in arc_policy.b1
+        # block 5 should be in T1
+        assert to_keys([5])[0] in arc_policy.t1
+
+    def test_ghost_list_bounds(self):
+        """
+        Tests that ghost lists (B1, B2) don't grow unbounded.
+        They should be capped at cache_capacity.
+        """
+        cpu_manager, arc_policy = self._make_manager(num_blocks=2, enable_events=False)
+
+        # fill cache with blocks 1, 2
+        cpu_manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+
+        # store many blocks to fill ghost lists
+        for i in range(3, 20):
+            cpu_manager.prepare_store(to_keys([i]), _EMPTY_REQ_CTX)
+            cpu_manager.complete_store(to_keys([i]), _EMPTY_REQ_CTX)
+
+        # ghost lists should not exceed cache_capacity
+        assert len(arc_policy.b1) <= arc_policy.cache_capacity
+        assert len(arc_policy.b2) <= arc_policy.cache_capacity
+
+    def test_touch_ordering(self):
+        """
+        Tests that touch() correctly updates access patterns.
+        Similar to LRU test but verifies T1/T2 ordering.
+        """
+        cpu_manager, arc_policy = self._make_manager()
+
+        # store blocks 1, 2, 3, 4
+        cpu_manager.prepare_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+
+        # promote 3, 4 to T2
+        cpu_manager.touch(to_keys([3, 4]), _EMPTY_REQ_CTX)
+
+        # T1 = {1, 2}, T2 = {3, 4}
+        # touch [1, 3, 4] - should promote 1 to T2, and move 3,4 to end of T2
+        cpu_manager.touch(to_keys([1, 3, 4]), _EMPTY_REQ_CTX)
+
+        # T1 = {2}, T2 = {1, 3, 4} (in that order, with 4 most recent)
+        assert len(arc_policy.t1) == 1
+        assert len(arc_policy.t2) == 3
+
+        # store block 5, should evict from T1 (block 2, only one in T1)
+        prepare_store_output = cpu_manager.prepare_store(to_keys([5]), _EMPTY_REQ_CTX)
+        verify_store_output(
+            prepare_store_output,
+            ExpectedPrepareStoreOutput(
+                keys_to_store=[5],
+                store_block_ids=[1],  # reuses block 2's storage
+                evicted_keys=[2],
+            ),
+        )
+
+    def test_failed_store(self):
+        """
+        Tests that failed store operations clean up correctly.
+        Similar to LRU test but for ARC.
+        """
+        cpu_manager, arc_policy = self._make_manager()
+
+        # store blocks 1, 2, 3, 4
+        cpu_manager.prepare_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2, 3, 4]), _EMPTY_REQ_CTX)
+
+        # prepare store block 5 (will evict block 1)
+        prepare_store_output = cpu_manager.prepare_store(to_keys([5]), _EMPTY_REQ_CTX)
+        assert prepare_store_output is not None
+        assert len(prepare_store_output.evicted_keys) == 1
+
+        # complete store with failure
+        cpu_manager.complete_store(to_keys([5]), _EMPTY_REQ_CTX, success=False)
+
+        # block 5 should not be in cache
+        assert cpu_manager.lookup(to_key(5), _EMPTY_REQ_CTX) is False
+        # block 5 should not be in T1 or T2
+        assert to_keys([5])[0] not in arc_policy.t1
+        assert to_keys([5])[0] not in arc_policy.t2
+
+        # evicted block should still be gone (in B1 ghost list)
+        evicted_hash = prepare_store_output.evicted_keys[0]
+        assert evicted_hash in arc_policy.b1
+
+    def test_full_scenario(self):
+        """
+        Comprehensive test covering multiple ARC operations in sequence.
+        Similar to the full LRU test but adapted for ARC behavior.
+        """
+        cpu_manager, arc_policy = self._make_manager()
+
+        # store [1, 2]
+        cpu_manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+        cpu_manager.complete_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+
+        # store [3, 4, 5] -> evicts [1]
+        prepare_store_output = cpu_manager.prepare_store(
+            to_keys([3, 4, 5]), _EMPTY_REQ_CTX
+        )
+        assert prepare_store_output is not None
+        assert len(prepare_store_output.evicted_keys) == 1
+        cpu_manager.complete_store(to_keys([3, 4, 5]), _EMPTY_REQ_CTX)
+
+        # promote some blocks to T2
+        cpu_manager.touch(to_keys([2, 3]), _EMPTY_REQ_CTX)
+
+        # T1 has {4, 5}, T2 has {2, 3}
+        assert len(arc_policy.t1) == 2
+        assert len(arc_policy.t2) == 2
+
+        # store [6] -> should evict from T1 (4 is oldest in T1)
+        prepare_store_output = cpu_manager.prepare_store(to_keys([6]), _EMPTY_REQ_CTX)
+        assert prepare_store_output is not None
+        cpu_manager.complete_store(to_keys([6]), _EMPTY_REQ_CTX)
+
+        # verify blocks 2, 3 (in T2) are still present
+        assert cpu_manager.lookup(to_key(2), _EMPTY_REQ_CTX) is True
+        assert cpu_manager.lookup(to_key(3), _EMPTY_REQ_CTX) is True
+
+        # verify events
+        events = list(cpu_manager.take_events())
+        assert len(events) > 0  # should have store and eviction events
+
+
+def test_filter_reused_manager():
+    """
+    Tests CPUOffloadingManager reuse filtering (store_threshold=2).
+    """
+    manager = CPUOffloadingManager(
+        num_blocks=4,
+        cache_policy="lru",
+        enable_events=True,
+        store_threshold=2,
+        max_tracker_size=3,
+    )
+
+    # Lookup [1, 2] -> 1st time, added to tracker but not eligible for store yet
+    assert manager.lookup(to_key(1), _EMPTY_REQ_CTX) is False
+    assert manager.lookup(to_key(2), _EMPTY_REQ_CTX) is False
+
+    # prepare store [1, 2] -> should be filtered
+    prepare_store_output = manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+    assert prepare_store_output is not None
+    assert prepare_store_output.keys_to_store == []
+
+    # Lookup [1] -> 2nd time, eligible now
+    assert manager.lookup(to_key(1), _EMPTY_REQ_CTX) is False
+
+    # prepare store [1, 2] -> [1] should be eligible, [2] should be filtered
+    prepare_store_output = manager.prepare_store(to_keys([1, 2]), _EMPTY_REQ_CTX)
+    assert prepare_store_output is not None
+    assert prepare_store_output.keys_to_store == to_keys([1])
+
+    # Lookup [3, 4] -> 1st time
+    # (evicts [2] from tracker since max_size is 3 and tracker has [1])
+    assert manager.lookup(to_key(3), _EMPTY_REQ_CTX) is False
+    assert manager.lookup(to_key(4), _EMPTY_REQ_CTX) is False
+    # Verify [2] was evicted from the tracker (tracker now has: [1], [3], [4])
+    assert to_keys([2])[0] not in manager.counts
+
+    # Lookup [2] again -> (this adds [2] back to the tracker as 1st time)
+    assert manager.lookup(to_key(2), _EMPTY_REQ_CTX) is False
+    # Verify [2] was re-added with count=1 (not eligible yet)
+    assert manager.counts.get(to_keys([2])[0]) == 1
+
+    # prepare store [2] -> should still be filtered out since count was reset
+    prepare_store_output = manager.prepare_store(to_keys([2]), _EMPTY_REQ_CTX)
+    assert prepare_store_output is not None
+    assert prepare_store_output.keys_to_store == []
+
+    manager.complete_store(to_keys([1]), _EMPTY_REQ_CTX)
diff --git a/tests/v1/kv_offload/cpu/test_shared_offload_region.py b/tests/v1/kv_offload/cpu/test_shared_offload_region.py
new file mode 100644
index 000000000000..b33a27ca6453
--- /dev/null
+++ b/tests/v1/kv_offload/cpu/test_shared_offload_region.py
@@ -0,0 +1,625 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for SharedOffloadRegion."""
+
+import contextlib
+import mmap
+import os
+import threading
+import time
+import uuid
+
+import pytest
+
+from vllm.utils.system_utils import get_mp_context
+from vllm.v1.kv_offload.cpu.shared_offload_region import (
+    SharedOffloadRegion,
+    _wait_for_file_size,
+)
+
+PAGE_SIZE = mmap.PAGESIZE
+
+
+# ---------------------------------------------------------------------------
+# Helpers / fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _set_spawn_method(monkeypatch):
+    # On WSL, NVML is not compatible with fork so vLLM auto-overrides the
+    # multiprocessing start method to 'spawn' with a warning. Set it explicitly
+    # here so the override is a no-op and the warning is suppressed.
+    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
+
+
+def _make_region(
+    instance_id: str,
+    num_blocks: int = 4,
+    cpu_page_size: int = PAGE_SIZE,
+    num_workers: int = 1,
+    rank: int = 0,
+) -> SharedOffloadRegion:
+    total_size_bytes = num_blocks * num_workers * cpu_page_size
+    assert total_size_bytes % PAGE_SIZE == 0
+    return SharedOffloadRegion(
+        instance_id=instance_id,
+        total_size_bytes=total_size_bytes,
+        num_blocks=num_blocks,
+        rank=rank,
+        num_workers=num_workers,
+        cpu_page_size=cpu_page_size,
+    )
+
+
+def _cleanup_file(path: str) -> None:
+    """Best-effort file removal for test teardown."""
+    with contextlib.suppress(FileNotFoundError):
+        os.unlink(path)
+
+
+@contextlib.contextmanager
+def _region(instance_id: str, **kwargs):
+    """Context manager: create one region, clean up on exit."""
+    r = _make_region(instance_id, **kwargs)
+    try:
+        yield r
+    finally:
+        r.cleanup()
+        _cleanup_file(r.mmap_path)
+
+
+@contextlib.contextmanager
+def _multi_region(
+    instance_id: str,
+    num_workers: int,
+    num_blocks: int = 4,
+    cpu_page_size: int = PAGE_SIZE,
+):
+    """Context manager: create one SharedOffloadRegion per rank, clean up on exit."""
+    total = num_blocks * num_workers * cpu_page_size
+    regions = [
+        SharedOffloadRegion(
+            instance_id=instance_id,
+            total_size_bytes=total,
+            num_blocks=num_blocks,
+            rank=rank,
+            num_workers=num_workers,
+            cpu_page_size=cpu_page_size,
+        )
+        for rank in range(num_workers)
+    ]
+    try:
+        yield regions
+    finally:
+        for r in regions:
+            r.cleanup()
+        _cleanup_file(regions[0].mmap_path)
+
+
+def _race_construct(
+    instance_id: str,
+    num_workers: int,
+    num_blocks: int = 4,
+    cpu_page_size: int = PAGE_SIZE,
+) -> tuple[list[SharedOffloadRegion], list[Exception]]:
+    """Spawn num_workers threads that all race to construct SharedOffloadRegion."""
+    total = num_blocks * num_workers * cpu_page_size
+    regions: list[SharedOffloadRegion | None] = [None] * num_workers
+    errors: list[Exception] = []
+    barrier = threading.Barrier(num_workers)
+
+    def worker(rank: int) -> None:
+        barrier.wait()  # all threads start at the same instant
+        try:
+            regions[rank] = SharedOffloadRegion(
+                instance_id=instance_id,
+                total_size_bytes=total,
+                num_blocks=num_blocks,
+                rank=rank,
+                num_workers=num_workers,
+                cpu_page_size=cpu_page_size,
+            )
+        except Exception as e:
+            errors.append(e)
+
+    threads = [threading.Thread(target=worker, args=(i,)) for i in range(num_workers)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+
+    return [r for r in regions if r is not None], errors
+
+
+def _mp_race_construct_and_write(
+    instance_id: str,
+    total_bytes: int,
+    num_blocks: int,
+    rank: int,
+    num_workers: int,
+    cpu_page_size: int,
+    fill_value: int,
+    done_queue,
+    cleanup_queue,
+) -> None:
+    """Race to construct a SharedOffloadRegion, write fill_value, then wait
+    for the parent's cleanup signal before tearing down.  The wait gives the
+    parent a window to read the raw mmap before the creator removes the file."""
+    try:
+        region = SharedOffloadRegion(
+            instance_id=instance_id,
+            total_size_bytes=total_bytes,
+            num_blocks=num_blocks,
+            rank=rank,
+            num_workers=num_workers,
+            cpu_page_size=cpu_page_size,
+        )
+        t = region.create_next_view(cpu_page_size)
+        t[:, :] = fill_value
+        done_queue.put({"rank": rank, "error": None})
+        cleanup_queue.get()  # wait for parent's verification to finish
+        del t  # release view before cleanup to avoid BufferError
+        region.cleanup()
+    except Exception as e:
+        done_queue.put({"rank": rank, "error": repr(e)})
+
+
+@pytest.fixture
+def iid():
+    """Fresh instance ID for each test."""
+    return str(uuid.uuid4())
+
+
+# ---------------------------------------------------------------------------
+# create_next_view — shape, stride and storage offset
+# ---------------------------------------------------------------------------
+
+
+def test_create_next_view_shape_and_stride(iid):
+    """Returned tensor must have shape (num_blocks, tensor_page_size) and
+    stride (row_stride, 1) where row_stride = cpu_page_size * num_workers."""
+    with _region(iid, num_blocks=4, cpu_page_size=2 * PAGE_SIZE) as r:
+        t = r.create_next_view(PAGE_SIZE)
+        assert t.shape == (4, PAGE_SIZE)
+        # num_workers=1 → row_stride = cpu_page_size
+        assert t.stride() == (2 * PAGE_SIZE, 1)
+        del t
+
+
+def test_create_next_view_storage_offset_rank0(iid):
+    """rank=0 worker's first tensor must start at byte 0 of the mmap."""
+    with _region(iid, cpu_page_size=PAGE_SIZE, num_workers=2, rank=0) as r:
+        t = r.create_next_view(PAGE_SIZE)
+        assert t.data_ptr() == r._base.data_ptr()  # storage_offset == 0
+        del t
+
+
+def test_create_next_view_storage_offset_rank1(iid):
+    """rank=1 worker's first tensor must start cpu_page_size bytes into the mmap."""
+    with _multi_region(iid, num_workers=2, num_blocks=4) as (r0, r1):
+        t1 = r1.create_next_view(PAGE_SIZE)
+        assert t1.data_ptr() == r1._base.data_ptr() + PAGE_SIZE
+        del t1
+
+
+def test_create_next_view_row_stride_with_multiple_workers(iid):
+    """With num_workers=4, row_stride must be 4 * cpu_page_size."""
+    with _region(iid, num_blocks=2, num_workers=4) as r:
+        t = r.create_next_view(PAGE_SIZE)
+        assert t.stride(0) == 4 * PAGE_SIZE
+        del t
+
+
+# ---------------------------------------------------------------------------
+# create_next_view — cursor advancement
+# ---------------------------------------------------------------------------
+
+
+def test_create_next_view_cursor_advances(iid):
+    """Each call to create_next_view must advance _worker_offset by tensor_page_size."""
+    with _region(iid, cpu_page_size=3 * PAGE_SIZE) as r:
+        assert r._worker_offset == 0
+        r.create_next_view(PAGE_SIZE)
+        assert r._worker_offset == PAGE_SIZE
+        r.create_next_view(PAGE_SIZE)
+        assert r._worker_offset == 2 * PAGE_SIZE
+        r.create_next_view(PAGE_SIZE)
+        assert r._worker_offset == 3 * PAGE_SIZE  # exactly at area end
+
+
+def test_create_next_view_exact_fill_succeeds(iid):
+    """Allocations whose total exactly equals cpu_page_size must all succeed."""
+    with _region(iid, cpu_page_size=2 * PAGE_SIZE) as r:
+        r.create_next_view(PAGE_SIZE)  # first half
+        r.create_next_view(PAGE_SIZE)  # fills to area end — must not raise
+
+
+# ---------------------------------------------------------------------------
+# create_next_view — overflow guard
+# ---------------------------------------------------------------------------
+
+
+def test_create_next_view_single_overflow_raises(iid):
+    """A single allocation larger than cpu_page_size must raise AssertionError."""
+    with (
+        _region(iid) as r,
+        pytest.raises(AssertionError, match="exceeds worker area end"),
+    ):
+        r.create_next_view(PAGE_SIZE + 1)
+
+
+def test_create_next_view_cumulative_overflow_raises(iid):
+    """Successive allocations that cumulatively exceed cpu_page_size must raise."""
+    with _region(iid, cpu_page_size=2 * PAGE_SIZE) as r:
+        r.create_next_view(PAGE_SIZE)  # ok — half used
+        r.create_next_view(PAGE_SIZE)  # ok — full
+        with pytest.raises(AssertionError, match="exceeds worker area end"):
+            r.create_next_view(1)  # one byte too many
+
+
+def test_create_next_view_overflow_does_not_mutate_cursor(iid):
+    """A failed create_next_view must leave _worker_offset unchanged."""
+    with _region(iid) as r:
+        offset_before = r._worker_offset
+        with pytest.raises(AssertionError):
+            r.create_next_view(PAGE_SIZE + 1)
+        assert r._worker_offset == offset_before
+
+
+# ---------------------------------------------------------------------------
+# create_next_view — data correctness and layout
+# ---------------------------------------------------------------------------
+
+
+def test_create_next_view_write_visible_in_raw_mmap(iid):
+    """Writes into a create_next_view view must appear at the correct raw mmap offset"""
+    with _region(iid, num_blocks=4) as r:
+        t = r.create_next_view(PAGE_SIZE)
+        t[2, :] = 42  # write to block row 2
+
+        raw = memoryview(r.mmap_obj)
+        # num_workers=1 → row_stride = PAGE_SIZE; block 2 starts at byte 2*PAGE_SIZE
+        chunk = bytes(raw[2 * PAGE_SIZE : 3 * PAGE_SIZE])
+        assert all(b == 42 for b in chunk)
+        del raw, t
+
+
+def test_create_next_view_multi_tensor_layout(iid):
+    """Two tensors from the same worker land at consecutive byte offsets per row."""
+    with _region(iid, num_blocks=2, cpu_page_size=2 * PAGE_SIZE) as r:
+        ta = r.create_next_view(PAGE_SIZE)
+        tb = r.create_next_view(PAGE_SIZE)
+
+        ta[:, :] = 1
+        tb[:, :] = 2
+
+        raw = memoryview(r.mmap_obj)
+        for blk in range(2):
+            row_offset = blk * 2 * PAGE_SIZE  # num_workers=1
+            assert all(b == 1 for b in raw[row_offset : row_offset + PAGE_SIZE])
+            assert all(
+                b == 2 for b in raw[row_offset + PAGE_SIZE : row_offset + 2 * PAGE_SIZE]
+            )
+        del raw, ta, tb
+
+
+def test_create_next_view_multiprocess_slots(iid):
+    """Each worker process calls create_next_view and writes distinct data;
+    the parent verifies each slot lands at the correct interleaved offset."""
+    num_workers = 2
+    num_blocks = 4
+    total_bytes = num_blocks * num_workers * PAGE_SIZE
+
+    ctx = get_mp_context()
+    done_queue = ctx.Queue()
+    cleanup_queue = ctx.Queue()
+
+    # Parent is rank 0 (creator); child is rank 1 (joiner).
+    region = SharedOffloadRegion(
+        instance_id=iid,
+        total_size_bytes=total_bytes,
+        num_blocks=num_blocks,
+        rank=0,
+        num_workers=num_workers,
+        cpu_page_size=PAGE_SIZE,
+    )
+    try:
+        child = ctx.Process(
+            target=_mp_race_construct_and_write,
+            args=(
+                iid,
+                total_bytes,
+                num_blocks,
+                1,
+                num_workers,
+                PAGE_SIZE,
+                22,
+                done_queue,
+                cleanup_queue,
+            ),
+        )
+        child.start()
+
+        t0 = region.create_next_view(PAGE_SIZE)
+        t0[:, :] = 11
+
+        result = done_queue.get(timeout=30)
+        assert result["error"] is None, result["error"]
+
+        raw = memoryview(region.mmap_obj)
+        for blk in range(num_blocks):
+            row_start = blk * num_workers * PAGE_SIZE
+            w0 = bytes(raw[row_start : row_start + PAGE_SIZE])
+            w1 = bytes(raw[row_start + PAGE_SIZE : row_start + 2 * PAGE_SIZE])
+            assert all(b == 11 for b in w0), f"block {blk}: rank0 slot wrong"
+            assert all(b == 22 for b in w1), f"block {blk}: rank1 slot wrong"
+
+        del raw, t0  # release before finally triggers cleanup
+        cleanup_queue.put(True)
+        child.join(timeout=10)
+        assert child.exitcode == 0
+    finally:
+        region.cleanup()
+        _cleanup_file(region.mmap_path)
+
+
+def test_create_next_view_worker_isolation(iid):
+    """Writes by worker 0 must not affect worker 1's slot and vice versa."""
+    num_workers = 2
+    num_blocks = 4
+    with _multi_region(iid, num_workers=num_workers, num_blocks=num_blocks) as regions:
+        t0 = regions[0].create_next_view(PAGE_SIZE)
+        t1 = regions[1].create_next_view(PAGE_SIZE)
+
+        t0[:, :] = 11
+        t1[:, :] = 22
+
+        raw = memoryview(regions[0].mmap_obj)
+        for blk in range(num_blocks):
+            row_start = blk * num_workers * PAGE_SIZE
+            w0 = bytes(raw[row_start : row_start + PAGE_SIZE])
+            w1 = bytes(raw[row_start + PAGE_SIZE : row_start + 2 * PAGE_SIZE])
+            assert all(b == 11 for b in w0), f"block {blk}: worker0 slot corrupted"
+            assert all(b == 22 for b in w1), f"block {blk}: worker1 slot corrupted"
+        del raw, t0, t1  # release before finally triggers cleanup
+
+
+# ---------------------------------------------------------------------------
+# Constructor — creator vs joiner semantics
+# ---------------------------------------------------------------------------
+
+
+def test_creator_flag_set_on_first_open(iid):
+    """The first worker to open the file must have _creator == True."""
+    with _region(iid) as r:
+        assert r._creator is True
+
+
+def test_joiner_flag_not_set(iid):
+    """A second worker opening the same file must have _creator == False."""
+    with _multi_region(iid, num_workers=2) as (r0, r1):
+        assert r0._creator is True
+        assert r1._creator is False
+
+
+def test_file_exists_after_construction(iid):
+    """The mmap file must be present on disk after __init__ completes."""
+    with _region(iid) as r:
+        assert os.path.exists(r.mmap_path)
+
+
+def test_file_has_correct_size(iid):
+    """The mmap file size on disk must equal total_size_bytes."""
+    with _region(iid, num_blocks=4) as r:
+        assert os.path.getsize(r.mmap_path) == 4 * PAGE_SIZE
+
+
+# ---------------------------------------------------------------------------
+# Multi-worker race — concurrent construction
+# ---------------------------------------------------------------------------
+
+
+def test_multi_worker_race_exactly_one_creator(iid):
+    """When N threads race to create the same region, exactly one becomes creator."""
+    num_workers = 8
+    regions, errors = _race_construct(iid, num_workers=num_workers)
+    try:
+        assert not errors, f"Workers raised: {errors}"
+        assert len(regions) == num_workers, "Some workers failed to construct"
+
+        creators = [r for r in regions if r._creator]
+        assert len(creators) == 1, f"Expected 1 creator, got {len(creators)}"
+        assert sum(1 for r in regions if not r._creator) == num_workers - 1, (
+            f"Expected {num_workers - 1} non-creators, got "
+            f"{sum(1 for r in regions if not r._creator)}"
+        )
+
+        for r in regions:
+            assert not r.mmap_obj.closed
+            assert r.total_size_bytes == 4 * num_workers * PAGE_SIZE
+    finally:
+        for r in regions:
+            r.cleanup()
+        _cleanup_file(regions[0].mmap_path)
+
+
+def test_multi_worker_race_shared_memory_visible(iid):
+    """After a concurrent construction race, MAP_SHARED is intact across all workers."""
+    num_workers = 4
+    regions, errors = _race_construct(iid, num_workers=num_workers)
+    assert not errors
+    try:
+        regions[0].mmap_obj[0:1] = b"\xab"
+        for r in regions[1:]:
+            assert memoryview(r.mmap_obj)[0:1] == b"\xab"
+    finally:
+        for r in regions:
+            r.cleanup()
+        _cleanup_file(regions[0].mmap_path)
+
+
+def test_multiprocess_race_construct_and_write(iid):
+    """N processes race to construct the same SharedOffloadRegion, each writes
+    fill_value = rank+1 into their slot; parent verifies interleaved layout."""
+    num_workers = 4
+    num_blocks = 3
+    total_bytes = num_blocks * num_workers * PAGE_SIZE
+
+    ctx = get_mp_context()
+    done_queue = ctx.Queue()
+    cleanup_queue = ctx.Queue()
+
+    procs = [
+        ctx.Process(
+            target=_mp_race_construct_and_write,
+            args=(
+                iid,
+                total_bytes,
+                num_blocks,
+                rank,
+                num_workers,
+                PAGE_SIZE,
+                rank + 1,
+                done_queue,
+                cleanup_queue,
+            ),
+        )
+        for rank in range(num_workers)
+    ]
+    for p in procs:
+        p.start()
+
+    results = {}
+    for _ in range(num_workers):
+        r = done_queue.get(timeout=30)
+        results[r["rank"]] = r
+
+    for rank, r in results.items():
+        assert r["error"] is None, f"rank {rank}: {r['error']}"
+
+    # Read the raw file while all workers still hold it open.
+    mmap_path = f"/dev/shm/vllm_offload_{iid}.mmap"
+    with open(mmap_path, "rb") as f:
+        raw = f.read()
+
+    for blk in range(num_blocks):
+        for w in range(num_workers):
+            slot_start = (blk * num_workers + w) * PAGE_SIZE
+            slot = raw[slot_start : slot_start + PAGE_SIZE]
+            expected = w + 1  # fill_value = rank + 1
+            assert all(b == expected for b in slot), (
+                f"block {blk}, worker {w}: expected {expected} but got wrong bytes"
+            )
+
+    # Unblock all workers to clean up.
+    for _ in range(num_workers):
+        cleanup_queue.put(True)
+    for p in procs:
+        p.join(timeout=10)
+        assert p.exitcode == 0
+
+
+# ---------------------------------------------------------------------------
+# Cleanup
+# ---------------------------------------------------------------------------
+
+
+def test_cleanup_creator_all_effects(iid):
+    """cleanup() on the creator closes mmap, closes fd, and removes the file."""
+    r = _make_region(iid)
+    path = r.mmap_path
+    fd = r.fd
+    mmap_obj = r.mmap_obj
+
+    r.cleanup()
+
+    assert mmap_obj.closed, "mmap should be closed after cleanup"
+    assert not os.path.exists(path), "creator should remove the file"
+    with pytest.raises(OSError):
+        os.fstat(fd)  # fd should be closed
+
+
+def test_cleanup_non_creator_all_effects(iid):
+    """cleanup() on a non-creator closes mmap and fd, but leaves the file on disk."""
+    r0 = _make_region(iid)  # creator
+    r1 = _make_region(iid)  # joiner
+    path = r0.mmap_path
+    fd1 = r1.fd
+    mmap_obj1 = r1.mmap_obj
+    try:
+        r1.cleanup()
+
+        assert mmap_obj1.closed, "mmap should be closed after cleanup"
+        assert os.path.exists(path), "non-creator must not remove the file"
+        with pytest.raises(OSError):
+            os.fstat(fd1)  # fd should be closed
+    finally:
+        r0.cleanup()
+        _cleanup_file(path)
+
+
+def test_cleanup_idempotent(iid):
+    """Calling cleanup() twice must not raise any exception."""
+    r = _make_region(iid)
+    r.cleanup()
+    r.cleanup()  # must be a no-op
+
+
+def test_cleanup_after_create_next_view_releases_mmap(iid):
+    """cleanup() must close the mmap even after create_next_view was called.
+    create_next_view returns a view that shares storage with _base; both must be
+    released before mmap.close() can succeed."""
+    r = _make_region(iid)
+    mmap_obj = r.mmap_obj
+
+    t = r.create_next_view(PAGE_SIZE)
+    del t
+
+    r.cleanup()
+
+    assert mmap_obj.closed, "mmap should be closed after releasing the tensor"
+
+
+# ---------------------------------------------------------------------------
+# _wait_for_file_size
+# ---------------------------------------------------------------------------
+
+
+def test_wait_for_file_size_already_large_enough(tmp_path):
+    """_wait_for_file_size must return immediately when file is already big enough."""
+    fd = os.open(str(tmp_path / "ready.mmap"), os.O_CREAT | os.O_RDWR, 0o600)
+    try:
+        os.ftruncate(fd, PAGE_SIZE)
+        start = time.monotonic()
+        _wait_for_file_size(fd, PAGE_SIZE, timeout=5.0)
+        assert time.monotonic() - start < 0.5
+    finally:
+        os.close(fd)
+
+
+def test_wait_for_file_size_waits_for_grow(tmp_path):
+    """_wait_for_file_size must return once a background thread grows the file."""
+    fd = os.open(str(tmp_path / "grow.mmap"), os.O_CREAT | os.O_RDWR, 0o600)
+    try:
+
+        def grow():
+            time.sleep(0.05)
+            os.ftruncate(fd, PAGE_SIZE)
+
+        t = threading.Thread(target=grow)
+        t.start()
+        _wait_for_file_size(fd, PAGE_SIZE, timeout=5.0)  # must not raise
+        t.join()
+    finally:
+        os.close(fd)
+
+
+def test_wait_for_file_size_timeout(tmp_path):
+    """_wait_for_file_size must raise TimeoutError when the file never grows."""
+    fd = os.open(str(tmp_path / "stuck.mmap"), os.O_CREAT | os.O_RDWR, 0o600)
+    try:
+        with pytest.raises(TimeoutError):
+            _wait_for_file_size(fd, PAGE_SIZE, timeout=0.1)
+    finally:
+        os.close(fd)
diff --git a/tests/v1/kv_offload/test_cpu_gpu.py b/tests/v1/kv_offload/test_cpu_gpu.py
deleted file mode 100644
index 1983cca22d8f..000000000000
--- a/tests/v1/kv_offload/test_cpu_gpu.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import random
-import time
-
-import pytest
-import torch
-
-from vllm.utils.torch_utils import set_random_seed
-from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
-from vllm.v1.kv_offload.spec import (
-    CanonicalKVCacheRef,
-    CanonicalKVCaches,
-    CanonicalKVCacheTensor,
-)
-from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandlers
-
-NUM_GPU_BLOCKS = [64]
-NUM_CPU_BLOCKS = [256]
-GPU_PAGE_SIZES = [512, 1024]
-BLOCK_SIZE_FACTORS = [1, 3]
-NUM_TENSORS = [4]
-SEEDS = [0]
-CUDA_DEVICES = ["cuda:0"]
-NUM_MAPPINGS = [3]
-
-
-@pytest.mark.parametrize("gpu_to_cpu", [True, False])
-@pytest.mark.parametrize("num_mappings", NUM_MAPPINGS)
-@pytest.mark.parametrize("gpu_page_size_bytes", GPU_PAGE_SIZES)
-@pytest.mark.parametrize("block_size_factor", BLOCK_SIZE_FACTORS)
-@pytest.mark.parametrize("num_gpu_blocks", NUM_GPU_BLOCKS)
-@pytest.mark.parametrize("num_cpu_blocks", NUM_CPU_BLOCKS)
-@pytest.mark.parametrize("num_tensors", NUM_TENSORS)
-@pytest.mark.parametrize("seed", SEEDS)
-@pytest.mark.parametrize("device", CUDA_DEVICES)
-@torch.inference_mode()
-def test_transfer(
-    default_vllm_config,
-    gpu_to_cpu: bool,
-    num_mappings: int,
-    gpu_page_size_bytes: int,
-    block_size_factor: int,
-    num_gpu_blocks: int,
-    num_cpu_blocks: int,
-    num_tensors: int,
-    seed: int,
-    device: str,
-) -> None:
-    set_random_seed(seed)
-
-    # build CanonicalKVCacheTensor list: one per tensor
-    kv_cache_tensors: list[CanonicalKVCacheTensor] = []
-    for i in range(num_tensors):
-        gpu_tensor = torch.randint(
-            -128,
-            127,
-            (num_gpu_blocks, gpu_page_size_bytes),
-            dtype=torch.int8,
-            device=device,
-        )
-        kv_cache_tensors.append(
-            CanonicalKVCacheTensor(
-                tensor=gpu_tensor,
-                page_size_bytes=gpu_page_size_bytes,
-            )
-        )
-
-    # one group containing all tensors, one data ref per tensor
-    kv_cache_groups_data_refs: list[list[CanonicalKVCacheRef]] = [
-        [
-            CanonicalKVCacheRef(
-                tensor_idx=i,
-                page_size_bytes=gpu_page_size_bytes,
-            )
-            for i in range(num_tensors)
-        ]
-    ]
-
-    kv_caches = CanonicalKVCaches(
-        tensors=kv_cache_tensors,
-        group_data_refs=kv_cache_groups_data_refs,
-    )
-    handlers = CpuGpuOffloadingHandlers(
-        kv_caches=kv_caches,
-        block_size_factor=block_size_factor,
-        num_cpu_blocks=num_cpu_blocks,
-    )
-
-    # select block mappings
-    gpu_blocks = random.sample(range(num_gpu_blocks), num_mappings * block_size_factor)
-    cpu_blocks = random.sample(range(num_cpu_blocks), num_mappings)
-
-    # expand cpu blocks to gpu-page granularity for uniform comparison:
-    # each cpu block maps to block_size_factor consecutive sub-blocks
-    cpu_blocks_expanded = [
-        cpu_block * block_size_factor + j
-        for cpu_block in cpu_blocks
-        for j in range(block_size_factor)
-    ]
-
-    # maybe skip some GPU blocks to test reading from the middle of a CPU block
-    if not gpu_to_cpu:
-        blocks_to_skip = block_size_factor - 1
-        gpu_blocks = gpu_blocks[blocks_to_skip:]
-        cpu_blocks_expanded = cpu_blocks_expanded[blocks_to_skip:]
-
-    # set transfer direction
-    if gpu_to_cpu:
-        handler = handlers.gpu_to_cpu_handler
-        src_spec = GPULoadStoreSpec(gpu_blocks, group_sizes=(len(gpu_blocks),))
-        dst_spec = CPULoadStoreSpec(cpu_blocks)
-        dst_to_src = dict(zip(cpu_blocks_expanded, gpu_blocks))
-        num_dst_sub_blocks = num_cpu_blocks * block_size_factor
-    else:
-        handler = handlers.cpu_to_gpu_handler
-        src_spec = CPULoadStoreSpec(cpu_blocks)
-        dst_spec = GPULoadStoreSpec(gpu_blocks, group_sizes=(len(gpu_blocks),))
-        dst_to_src = dict(zip(gpu_blocks, cpu_blocks_expanded))
-        num_dst_sub_blocks = num_gpu_blocks
-
-    # clone src and dst tensors before transfer
-    orig_src_tensors = [x.clone() for x in handler.src_tensors]
-    orig_dst_tensors = [x.clone() for x in handler.dst_tensors]
-
-    # call transfer function
-    start_time = time.time()
-    assert handler.transfer_async(1, (src_spec, dst_spec))
-    assert set({x.job_id for x in handler._transfers}) == {1}
-
-    # wait for transfer to complete
-    end_time = time.time() + 10
-    while time.time() < end_time:
-        finished = handler.get_finished()
-        if finished:
-            assert finished[0].job_id == 1
-            assert finished[0].success
-            assert (
-                finished[0].transfer_type == ("GPU", "CPU")
-                if gpu_to_cpu
-                else ("CPU", "GPU")
-            )
-            assert finished[0].transfer_size == (
-                len(gpu_blocks) * handler.group_block_size_in_bytes[0]
-            )
-            assert finished[0].transfer_time > 0
-            assert finished[0].transfer_time < (time.time() - start_time)
-            break
-        time.sleep(0.1)
-
-    # verify src tensors did not change
-    for orig_tensor, tensor in zip(orig_src_tensors, handler.src_tensors):
-        assert torch.equal(orig_tensor, tensor)
-
-    # verify dst tensors at gpu-page granularity.
-    for src_tensor, dst_tensor, orig_dst_tensor in zip(
-        handler.src_tensors,
-        handler.dst_tensors,
-        orig_dst_tensors,
-    ):
-        # view both GPU and CPU tensors as (n, gpu_page_size_bytes) for comparison.
-        src_view = src_tensor.view(-1, gpu_page_size_bytes)
-        dst_view = dst_tensor.view(-1, gpu_page_size_bytes)
-        orig_dst_view = orig_dst_tensor.view(-1, gpu_page_size_bytes)
-        for dst_sub_block in range(num_dst_sub_blocks):
-            src_sub_block = dst_to_src.get(dst_sub_block)
-            if src_sub_block is not None:
-                expected = src_view[src_sub_block]
-            else:
-                expected = orig_dst_view[dst_sub_block]
-            torch.testing.assert_close(dst_view[dst_sub_block].cpu(), expected.cpu())
diff --git a/tests/v1/kv_offload/test_cpu_manager.py b/tests/v1/kv_offload/test_cpu_manager.py
deleted file mode 100644
index eea0367bf503..000000000000
--- a/tests/v1/kv_offload/test_cpu_manager.py
+++ /dev/null
@@ -1,624 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections.abc import Iterable
-from dataclasses import dataclass
-
-import numpy as np
-import pytest
-
-from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.abstract import (
-    LoadStoreSpec,
-    OffloadingEvent,
-    PrepareStoreOutput,
-)
-from vllm.v1.kv_offload.cpu.manager import CPUOffloadingManager
-from vllm.v1.kv_offload.cpu.policies.arc import ARCCachePolicy
-from vllm.v1.kv_offload.mediums import CPULoadStoreSpec
-
-
-@dataclass
-class ExpectedPrepareStoreOutput:
-    block_hashes_to_store: list[int]
-    store_block_ids: list[int]
-    block_hashes_evicted: list[int]
-
-
-def to_hashes(int_hashes: list[int]) -> list[BlockHash]:
-    return [BlockHash(str(i).encode()) for i in int_hashes]
-
-
-def verify_store_output(
-    prepare_store_output: PrepareStoreOutput | None,
-    expected_prepare_store_output: ExpectedPrepareStoreOutput,
-):
-    assert prepare_store_output is not None
-    assert prepare_store_output.block_hashes_to_store == to_hashes(
-        expected_prepare_store_output.block_hashes_to_store
-    )
-    assert prepare_store_output.block_hashes_evicted == to_hashes(
-        expected_prepare_store_output.block_hashes_evicted
-    )
-    store_spec = prepare_store_output.store_spec
-    assert isinstance(store_spec, CPULoadStoreSpec)
-    expected_array = np.array(
-        expected_prepare_store_output.store_block_ids, dtype=np.int64
-    )
-    assert np.array_equal(expected_array, store_spec.block_ids)
-
-
-def verify_load_output(
-    prepare_load_output: LoadStoreSpec, expected_prepare_load_output: list[int]
-):
-    assert isinstance(prepare_load_output, CPULoadStoreSpec)
-    expected_array = np.array(expected_prepare_load_output, dtype=np.int64)
-    assert np.array_equal(expected_array, prepare_load_output.block_ids)
-
-
-def verify_events(
-    events: Iterable[OffloadingEvent],
-    block_size: int,
-    expected_stores: tuple[set[int], ...] = (),
-    expected_evictions: tuple[set[int], ...] = (),
-):
-    stores: list[set[BlockHash]] = []
-    evictions: list[set[BlockHash]] = []
-    for event in events:
-        assert event.medium == CPULoadStoreSpec.medium()
-        assert event.block_size == block_size
-        if event.removed:
-            evictions.append(set(event.block_hashes))
-        else:
-            stores.append(set(event.block_hashes))
-
-    def to_hash_sets(int_sets: tuple[set[int], ...]) -> tuple[set[BlockHash], ...]:
-        return tuple([set(to_hashes(list(int_set))) for int_set in int_sets])
-
-    assert tuple(evictions) == to_hash_sets(expected_evictions)
-    assert tuple(stores) == to_hash_sets(expected_stores)
-
-
-@pytest.mark.parametrize("eviction_policy", ["lru", "arc"])
-def test_already_stored_block_not_evicted_during_prepare_store(eviction_policy):
-    """
-    Regression test: a block that is already stored must not be evicted
-    by prepare_store() when it needs to make room for new blocks.
-    Applies to both lru and arc policies.
-
-    Scenario:
-        - Store blocks [1, 2] and complete.
-        - touch([1]) makes block 2 the LRU candidate.
-        - prepare_store([2, 3, 4, 5]):
-            * block 2 is filtered out as "already stored"
-            * but without the fix, block 2 would be evicted as the LRU
-              candidate to make room for [3, 4, 5]
-        - After complete_store([2, 3, 4, 5]), block 2 must still be present.
-    """
-    block_size = 256
-    manager = CPUOffloadingManager(
-        block_size=block_size,
-        num_blocks=4,
-        cache_policy=eviction_policy,
-        enable_events=True,
-    )
-
-    # store [1, 2] and complete
-    manager.prepare_store(to_hashes([1, 2]))
-    manager.complete_store(to_hashes([1, 2]))
-
-    # touch [1] to make block 2 the LRU candidate
-    manager.touch(to_hashes([1]))
-
-    # prepare_store([2, 3, 4, 5]):
-    #   - block 2 is already stored → filtered out of block_hashes_to_store
-    #   - block 2 must NOT be evicted even though it is the LRU candidate
-    #   - block 1 (ID 0) is evicted instead; new blocks [3,4,5] get IDs 2,3,0
-    prepare_store_output = manager.prepare_store(to_hashes([2, 3, 4, 5]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[3, 4, 5],
-            store_block_ids=[2, 3, 0],
-            block_hashes_evicted=[1],  # block 1 evicted, not block 2
-        ),
-    )
-
-    # complete_store must not silently drop block 2
-    manager.complete_store(to_hashes([2, 3, 4, 5]))
-
-    # block 2 must still be present in the cache
-    assert manager.lookup(to_hashes([2])) == 1
-
-
-def test_cpu_manager():
-    """
-    Tests CPUOffloadingManager with lru policy.
-    """
-    # initialize a CPU backend with a capacity of 4 blocks
-    block_size = 256
-    cpu_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="lru", enable_events=True
-    )
-
-    # prepare store [1, 2]
-    prepare_store_output = cpu_manager.prepare_store(to_hashes([1, 2]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[1, 2],
-            store_block_ids=[0, 1],
-            block_hashes_evicted=[],
-        ),
-    )
-
-    # lookup [1, 2] -> not ready
-    assert cpu_manager.lookup(to_hashes([1, 2])) == 0
-
-    # no events so far
-    assert list(cpu_manager.take_events()) == []
-
-    # complete store [1, 2]
-    cpu_manager.complete_store(to_hashes([1, 2]))
-    verify_events(
-        cpu_manager.take_events(), block_size=block_size, expected_stores=({1, 2},)
-    )
-
-    # lookup [1, 2]
-    assert cpu_manager.lookup(to_hashes([1])) == 1
-    assert cpu_manager.lookup(to_hashes([1, 2])) == 2
-    assert cpu_manager.lookup(to_hashes([1, 2, 3])) == 2
-
-    # prepare store [2, 3, 4, 5] -> evicts [1]
-    prepare_store_output = cpu_manager.prepare_store(to_hashes([2, 3, 4, 5]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[3, 4, 5],
-            store_block_ids=[2, 3, 0],
-            block_hashes_evicted=[1],
-        ),
-    )
-
-    # verify eviction event
-    verify_events(
-        cpu_manager.take_events(), block_size=block_size, expected_evictions=({1},)
-    )
-
-    # prepare store with no space
-    assert cpu_manager.prepare_store(to_hashes([1, 6])) is None
-
-    # complete store [2, 3, 4, 5]
-    cpu_manager.complete_store(to_hashes([2, 3, 4, 5]))
-
-    # prepare load [2, 3]
-    prepare_load_output = cpu_manager.prepare_load(to_hashes([2, 3]))
-    verify_load_output(prepare_load_output, [1, 2])
-
-    # prepare store with no space ([2, 3] is being loaded)
-    assert cpu_manager.prepare_store(to_hashes([6, 7, 8])) is None
-
-    # complete load [2, 3]
-    cpu_manager.complete_load(to_hashes([2, 3]))
-
-    # prepare store [6, 7, 8] -> evicts [2, 3, 4] (oldest)
-    prepare_store_output = cpu_manager.prepare_store(to_hashes([6, 7, 8]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[6, 7, 8],
-            store_block_ids=[3, 2, 1],
-            block_hashes_evicted=[2, 3, 4],
-        ),
-    )
-
-    # complete store [6, 7, 8]
-    cpu_manager.complete_store(to_hashes([6, 7, 8]))
-
-    # touch [5, 6, 7] (move to end of LRU order)
-    cpu_manager.touch(to_hashes([5, 6, 7]))
-
-    # prepare store [7, 9] -> evicts [8] (oldest following previous touch)
-    prepare_store_output = cpu_manager.prepare_store(to_hashes([9]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[9],
-            store_block_ids=[1],
-            block_hashes_evicted=[8],
-        ),
-    )
-
-    # complete store [7, 9] with failure
-    cpu_manager.complete_store(to_hashes([7, 9]), success=False)
-
-    # assert [7] is still stored, but [9] is not
-    assert cpu_manager.lookup(to_hashes([7])) == 1
-    assert cpu_manager.lookup(to_hashes([9])) == 0
-
-    verify_events(
-        cpu_manager.take_events(),
-        block_size=block_size,
-        expected_stores=({3, 4, 5}, {6, 7, 8}),
-        expected_evictions=({2, 3, 4}, {8}),
-    )
-
-
-def test_arc_manager_basic():
-    """
-    Tests CPUOffloadingManager with arc policy.
-    Verifies that ARC handles store, load, and lookup operations correctly.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=True
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # prepare store [1, 2]
-    prepare_store_output = arc_manager.prepare_store(to_hashes([1, 2]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[1, 2],
-            store_block_ids=[0, 1],
-            block_hashes_evicted=[],
-        ),
-    )
-
-    # lookup [1, 2] -> not ready
-    assert arc_manager.lookup(to_hashes([1, 2])) == 0
-
-    # no events so far
-    assert list(arc_manager.take_events()) == []
-
-    # complete store [1, 2]
-    arc_manager.complete_store(to_hashes([1, 2]))
-    verify_events(
-        arc_manager.take_events(), block_size=block_size, expected_stores=({1, 2},)
-    )
-
-    # lookup [1, 2]
-    assert arc_manager.lookup(to_hashes([1])) == 1
-    assert arc_manager.lookup(to_hashes([1, 2])) == 2
-    assert arc_manager.lookup(to_hashes([1, 2, 3])) == 2
-
-    # blocks should be in T1 (recent)
-    assert len(arc_policy.t1) == 2
-    assert len(arc_policy.t2) == 0
-
-
-def test_arc_manager_t1_to_t2_promotion():
-    """
-    Tests that accessing a block in T1 promotes it to T2 (frequent).
-    This is a key feature of ARC's adaptive behavior.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=False
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store and complete block 1
-    arc_manager.prepare_store(to_hashes([1]))
-    arc_manager.complete_store(to_hashes([1]))
-
-    # block 1 starts in T1 (recent)
-    assert to_hashes([1])[0] in arc_policy.t1
-    assert to_hashes([1])[0] not in arc_policy.t2
-
-    # touch block 1 (simulate second access)
-    arc_manager.touch(to_hashes([1]))
-
-    # block 1 should now be in T2 (frequent)
-    assert to_hashes([1])[0] not in arc_policy.t1
-    assert to_hashes([1])[0] in arc_policy.t2
-
-
-def test_arc_manager_eviction_with_load():
-    """
-    Tests ARC eviction behavior similar to LRU test.
-    Verifies that blocks being loaded (ref_cnt > 0) cannot be evicted.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=True
-    )
-
-    # prepare and complete store [1, 2, 3, 4]
-    prepare_store_output = arc_manager.prepare_store(to_hashes([1, 2, 3, 4]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[1, 2, 3, 4],
-            store_block_ids=[0, 1, 2, 3],
-            block_hashes_evicted=[],
-        ),
-    )
-    arc_manager.complete_store(to_hashes([1, 2, 3, 4]))
-
-    # prepare load [2, 3] (increases ref_cnt)
-    prepare_load_output = arc_manager.prepare_load(to_hashes([2, 3]))
-    verify_load_output(prepare_load_output, [1, 2])
-
-    # prepare store [5, 6, 7] with [2, 3] being loaded
-    # should fail because [2, 3] have ref_cnt > 0
-    assert arc_manager.prepare_store(to_hashes([5, 6, 7])) is None
-
-    # complete load [2, 3]
-    arc_manager.complete_load(to_hashes([2, 3]))
-
-    # now prepare store [5, 6, 7] should succeed
-    # ARC will evict blocks one at a time from T1 as needed
-    prepare_store_output = arc_manager.prepare_store(to_hashes([5, 6, 7]))
-    assert prepare_store_output is not None
-    # Should successfully evict enough blocks to make room (at least 1)
-    assert len(prepare_store_output.block_hashes_evicted) >= 1
-
-
-def test_arc_manager_adaptive_target():
-    """
-    Tests ARC's adaptive target adjustment via ghost lists.
-    When a block in B1 (ghost list) is accessed, target_t1_size increases.
-    When a block in B2 is accessed, target_t1_size decreases.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=2, cache_policy="arc", enable_events=False
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store blocks 1, 2 (fills cache)
-    arc_manager.prepare_store(to_hashes([1, 2]))
-    arc_manager.complete_store(to_hashes([1, 2]))
-
-    initial_target = arc_policy.target_t1_size
-
-    # store block 3, evicting block 1 (moves to B1 ghost list)
-    arc_manager.prepare_store(to_hashes([3]))
-    arc_manager.complete_store(to_hashes([3]))
-
-    # block 1 should be in B1 (ghost list)
-    assert to_hashes([1])[0] in arc_policy.b1
-
-    # touch block 1 (cache miss, but in B1)
-    # this should increase target_t1_size (favor recency)
-    arc_manager.touch(to_hashes([1]))
-
-    # target should have increased
-    assert arc_policy.target_t1_size > initial_target
-
-
-def test_arc_manager_t1_t2_eviction_policy():
-    """
-    Tests that ARC evicts from T1 or T2 based on target_t1_size.
-    If |T1| >= target_t1_size, evict from T1, otherwise from T2.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=False
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store blocks 1, 2, 3, 4
-    arc_manager.prepare_store(to_hashes([1, 2, 3, 4]))
-    arc_manager.complete_store(to_hashes([1, 2, 3, 4]))
-
-    # promote blocks 3, 4 to T2 by touching them
-    arc_manager.touch(to_hashes([3, 4]))
-
-    # now: T1 = {1, 2}, T2 = {3, 4}
-    assert len(arc_policy.t1) == 2
-    assert len(arc_policy.t2) == 2
-
-    # set target_t1_size to prefer evicting from T1
-    # (when |T1| >= target, evict from T1)
-    arc_policy.target_t1_size = 1
-
-    # store block 5, should evict from T1 (block 1, LRU in T1)
-    output = arc_manager.prepare_store(to_hashes([5]))
-    assert output is not None
-    assert to_hashes([1]) == output.block_hashes_evicted
-
-    arc_manager.complete_store(to_hashes([5]))
-
-    # block 1 should be in B1 (ghost list)
-    assert to_hashes([1])[0] in arc_policy.b1
-    # block 5 should be in T1
-    assert to_hashes([5])[0] in arc_policy.t1
-
-
-def test_arc_manager_ghost_list_bounds():
-    """
-    Tests that ghost lists (B1, B2) don't grow unbounded.
-    They should be capped at cache_capacity.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=2, cache_policy="arc", enable_events=False
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # fill cache with blocks 1, 2
-    arc_manager.prepare_store(to_hashes([1, 2]))
-    arc_manager.complete_store(to_hashes([1, 2]))
-
-    # store many blocks to fill ghost lists
-    for i in range(3, 20):
-        arc_manager.prepare_store(to_hashes([i]))
-        arc_manager.complete_store(to_hashes([i]))
-
-    # ghost lists should not exceed cache_capacity
-    assert len(arc_policy.b1) <= arc_policy.cache_capacity
-    assert len(arc_policy.b2) <= arc_policy.cache_capacity
-
-
-def test_arc_manager_touch_ordering():
-    """
-    Tests that touch() correctly updates access patterns.
-    Similar to LRU test but verifies T1/T2 ordering.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=True
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store blocks 1, 2, 3, 4
-    arc_manager.prepare_store(to_hashes([1, 2, 3, 4]))
-    arc_manager.complete_store(to_hashes([1, 2, 3, 4]))
-
-    # promote 3, 4 to T2
-    arc_manager.touch(to_hashes([3, 4]))
-
-    # T1 = {1, 2}, T2 = {3, 4}
-    # touch [1, 3, 4] - should promote 1 to T2, and move 3,4 to end of T2
-    arc_manager.touch(to_hashes([1, 3, 4]))
-
-    # T1 = {2}, T2 = {1, 3, 4} (in that order, with 4 most recent)
-    assert len(arc_policy.t1) == 1
-    assert len(arc_policy.t2) == 3
-
-    # store block 5, should evict from T1 (block 2, only one in T1)
-    prepare_store_output = arc_manager.prepare_store(to_hashes([5]))
-    verify_store_output(
-        prepare_store_output,
-        ExpectedPrepareStoreOutput(
-            block_hashes_to_store=[5],
-            store_block_ids=[1],  # reuses block 2's storage
-            block_hashes_evicted=[2],
-        ),
-    )
-
-
-def test_arc_manager_failed_store():
-    """
-    Tests that failed store operations clean up correctly.
-    Similar to LRU test but for ARC.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=True
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store blocks 1, 2, 3, 4
-    arc_manager.prepare_store(to_hashes([1, 2, 3, 4]))
-    arc_manager.complete_store(to_hashes([1, 2, 3, 4]))
-
-    # prepare store block 5 (will evict block 1)
-    prepare_store_output = arc_manager.prepare_store(to_hashes([5]))
-    assert prepare_store_output is not None
-    assert len(prepare_store_output.block_hashes_evicted) == 1
-
-    # complete store with failure
-    arc_manager.complete_store(to_hashes([5]), success=False)
-
-    # block 5 should not be in cache
-    assert arc_manager.lookup(to_hashes([5])) == 0
-    # block 5 should not be in T1 or T2
-    assert to_hashes([5])[0] not in arc_policy.t1
-    assert to_hashes([5])[0] not in arc_policy.t2
-
-    # evicted block should still be gone (in B1 ghost list)
-    evicted_hash = prepare_store_output.block_hashes_evicted[0]
-    assert evicted_hash in arc_policy.b1
-
-
-def test_arc_manager_full_scenario():
-    """
-    Comprehensive test covering multiple ARC operations in sequence.
-    Similar to the full LRU test but adapted for ARC behavior.
-    """
-    block_size = 256
-    arc_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="arc", enable_events=True
-    )
-    arc_policy = arc_manager._policy
-    assert isinstance(arc_policy, ARCCachePolicy)
-
-    # store [1, 2]
-    arc_manager.prepare_store(to_hashes([1, 2]))
-    arc_manager.complete_store(to_hashes([1, 2]))
-
-    # store [3, 4, 5] -> evicts [1]
-    prepare_store_output = arc_manager.prepare_store(to_hashes([3, 4, 5]))
-    assert prepare_store_output is not None
-    assert len(prepare_store_output.block_hashes_evicted) == 1
-    arc_manager.complete_store(to_hashes([3, 4, 5]))
-
-    # promote some blocks to T2
-    arc_manager.touch(to_hashes([2, 3]))
-
-    # T1 has {4, 5}, T2 has {2, 3}
-    assert len(arc_policy.t1) == 2
-    assert len(arc_policy.t2) == 2
-
-    # store [6] -> should evict from T1 (4 is oldest in T1)
-    prepare_store_output = arc_manager.prepare_store(to_hashes([6]))
-    assert prepare_store_output is not None
-    arc_manager.complete_store(to_hashes([6]))
-
-    # verify blocks 2, 3 (in T2) are still present
-    assert arc_manager.lookup(to_hashes([2])) == 1
-    assert arc_manager.lookup(to_hashes([3])) == 1
-
-    # verify events
-    events = list(arc_manager.take_events())
-    assert len(events) > 0  # should have store and eviction events
-
-
-def test_filter_reused_manager():
-    """
-    Tests FilterReusedOffloadingManager with a CPUOffloadingManager.
-    """
-    block_size = 256
-    lru_manager = CPUOffloadingManager(
-        block_size=block_size, num_blocks=4, cache_policy="lru", enable_events=True
-    )
-
-    from vllm.v1.kv_offload.reuse_manager import FilterReusedOffloadingManager
-
-    manager = FilterReusedOffloadingManager(
-        backing=lru_manager, store_threshold=2, max_tracker_size=3
-    )
-
-    # Lookup [1, 2] -> 1st time, added to tracker but not eligible for store yet
-    assert manager.lookup(to_hashes([1, 2])) == 0
-
-    # prepare store [1, 2] -> should be filtered
-    prepare_store_output = manager.prepare_store(to_hashes([1, 2]))
-    assert prepare_store_output is not None
-    assert prepare_store_output.block_hashes_to_store == []
-
-    # Lookup [1] -> 2nd time, eligible now
-    assert manager.lookup(to_hashes([1])) == 0
-
-    # prepare store [1, 2] -> [1] should be eligible, [2] should be filtered
-    prepare_store_output = manager.prepare_store(to_hashes([1, 2]))
-    assert prepare_store_output is not None
-    assert prepare_store_output.block_hashes_to_store == to_hashes([1])
-
-    # Lookup [3, 4] -> 1st time
-    # (evicts [2] from tracker since max_size is 3 and tracker has [1])
-    assert manager.lookup(to_hashes([3, 4])) == 0
-    # Verify [2] was evicted from the tracker (tracker now has: [1], [3], [4])
-    assert to_hashes([2])[0] not in manager.counts
-
-    # Lookup [2] again -> (this adds [2] back to the tracker as 1st time)
-    assert manager.lookup(to_hashes([2])) == 0
-    # Verify [2] was re-added with count=1 (not eligible yet)
-    assert manager.counts.get(to_hashes([2])[0]) == 1
-
-    # prepare store [2] -> should still be filtered out since count was reset
-    prepare_store_output = manager.prepare_store(to_hashes([2]))
-    assert prepare_store_output is not None
-    assert prepare_store_output.block_hashes_to_store == []
-
-    manager.complete_store(to_hashes([1]))
diff --git a/tests/v1/kv_offload/test_cpu_offloading.py b/tests/v1/kv_offload/test_cpu_offloading.py
deleted file mode 100644
index d3db828dc60e..000000000000
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import socket
-import time
-
-import msgspec
-import msgspec.msgpack
-import pytest
-import zmq
-from tqdm import tqdm
-
-from vllm import LLM, SamplingParams, TokensPrompt
-from vllm.config import KVEventsConfig, KVTransferConfig
-from vllm.distributed.kv_events import BlockStored, KVEventBatch
-from vllm.platforms import current_platform
-
-CPU_BLOCK_SIZES = [48]
-ATTN_BACKENDS = []
-
-if current_platform.is_cuda():
-    ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER", "TRITON_ATTN"]
-elif current_platform.is_rocm():
-    ATTN_BACKENDS = ["TRITON_ATTN"]
-
-# Maximum time (seconds) to wait for the async CPU offload transfer
-# to complete before giving up.
-_RESET_CACHE_TIMEOUT = 30 if current_platform.is_rocm() else 10
-
-# ZMQ poll timeout (ms) for the first event.
-_FIRST_EVENT_POLL_MS = 10_000 if current_platform.is_rocm() else 1000
-
-# Hard ceiling (seconds) on how long get_new_cpu_stored_events may loop,
-# to prevent hangs if non-CPU events keep arriving indefinitely.
-_EVENT_DRAIN_TIMEOUT = 60
-
-
-class MockSubscriber:
-    """Helper class to receive and verify published events"""
-
-    def __init__(
-        self,
-        endpoint: str,
-        topic: str,
-    ):
-        self.ctx = zmq.Context.instance()
-        self.topic_bytes = topic.encode("utf-8")
-
-        # Set up subscriber socket
-        self.sub = self.ctx.socket(zmq.SUB)
-        self.sub.setsockopt(zmq.SUBSCRIBE, self.topic_bytes)
-        self.sub.connect(endpoint)
-
-        self.decoder = msgspec.msgpack.Decoder(type=KVEventBatch)
-
-    def get_new_cpu_stored_events(self) -> list[BlockStored]:
-        cpu_stored_events: list[BlockStored] = []
-
-        poller = zmq.Poller()
-        poller.register(self.sub, zmq.POLLIN)
-
-        poll_ms = _FIRST_EVENT_POLL_MS
-        deadline = time.monotonic() + _EVENT_DRAIN_TIMEOUT
-        while time.monotonic() < deadline:
-            events = dict(poller.poll(poll_ms))
-
-            if events.get(self.sub) != zmq.POLLIN:
-                return cpu_stored_events
-
-            topic_bytes, _, payload = self.sub.recv_multipart()
-
-            assert topic_bytes == self.topic_bytes
-
-            event_batch = self.decoder.decode(payload)
-            assert isinstance(event_batch, KVEventBatch)
-            for event in event_batch.events:
-                if isinstance(event, BlockStored) and event.medium == "CPU":
-                    cpu_stored_events.append(event)
-                    poll_ms = 100
-
-        return cpu_stored_events
-
-    def close(self):
-        """Clean up resources"""
-        self.sub.close()
-
-
-def _wait_for_prefix_cache_reset(llm: LLM) -> None:
-    """Wait for async offload transfers to finish so prefix cache can reset.
-
-    The GPU-to-CPU offload runs on a CUDA stream asynchronously.  While blocks
-    are still held by the offload worker, ``reset_prefix_cache`` returns
-    ``False``.  Retry with a short sleep until it succeeds or we time out.
-    """
-    deadline = time.monotonic() + _RESET_CACHE_TIMEOUT
-    while not llm.reset_prefix_cache():
-        if time.monotonic() > deadline:
-            raise TimeoutError(
-                "reset_prefix_cache did not succeed within "
-                f"{_RESET_CACHE_TIMEOUT}s - async offload may be stuck"
-            )
-        time.sleep(0.1)
-
-
-def _latency_test(llm: LLM, subscriber: MockSubscriber):
-    sampling_params = SamplingParams(max_tokens=1)
-
-    num_times_cpu_better_than_cold = 0
-    num_tests = 10
-    total_cold_time = 0.0
-    total_gpu_hit_time = 0.0
-    total_cpu_hit_time = 0.0
-    prompt_token_ids = [0] * 10001
-    for i in tqdm(range(num_tests), desc="Running tests"):
-        prompt_token_ids[0] = i
-        prompts = [TokensPrompt(prompt_token_ids=prompt_token_ids)]
-
-        # run generation - this should trigger saving KV cache
-        start_time = time.time()
-        llm.generate(prompts, sampling_params, use_tqdm=False)
-        cold_time = time.time() - start_time
-        total_cold_time += cold_time
-
-        # run generation again - should hit the GPU prefix cache
-        start_time = time.time()
-        llm.generate(prompts, sampling_params, use_tqdm=False)
-        gpu_hit_time = time.time() - start_time
-        total_gpu_hit_time += gpu_hit_time
-
-        # Wait for the async CPU offload to finish, then reset prefix cache
-        # so the next generate() must reload from CPU rather than GPU.
-        _wait_for_prefix_cache_reset(llm)
-
-        # Verify CPU stored events arrived (offload is done before we
-        # attempt to load from CPU).
-        assert subscriber.get_new_cpu_stored_events(), (
-            f"No CPU stored events received on iteration {i}; "
-            "async offload may not have completed in time"
-        )
-
-        # run generation again - this should trigger loading from CPU
-        start_time = time.time()
-        llm.generate(prompts, sampling_params, use_tqdm=False)
-        cpu_hit_time = time.time() - start_time
-        total_cpu_hit_time += cpu_hit_time
-
-        if cpu_hit_time < cold_time:
-            num_times_cpu_better_than_cold += 1
-
-    print("Average times:")
-    print(f"    Cold: {total_cold_time * 1000 / num_tests:.2f}ms")
-    print(f"    GPU hit: {total_gpu_hit_time * 1000 / num_tests:.2f}ms")
-    print(f"    CPU hit: {total_cpu_hit_time * 1000 / num_tests:.2f}ms")
-
-    assert num_times_cpu_better_than_cold >= 0.8 * num_tests
-
-
-def _accuracy_test(llm: LLM, subscriber: MockSubscriber):
-    sampling_params = SamplingParams(max_tokens=1)
-    cpu_block_size = (
-        llm.llm_engine.vllm_config.kv_transfer_config.kv_connector_extra_config[
-            "block_size"
-        ]
-    )
-
-    subscriber.get_new_cpu_stored_events()
-
-    # prepend prompt to be cpu block aligned
-    prompt = "Let's count to 10. One, two, three, four,"
-    while (
-        len(llm.generate(prompt, use_tqdm=False)[0].prompt_token_ids) % cpu_block_size
-        != 0
-    ):
-        prompt = ". " + prompt
-
-    assert subscriber.get_new_cpu_stored_events()
-
-    test_count = 100
-    success_count = 0
-    for i in range(test_count):
-        if (
-            llm.generate(prompt, sampling_params, use_tqdm=False)[0].outputs[0].text
-            == " five"
-        ):
-            success_count += 1
-
-    assert success_count >= 0.5 * test_count
-
-
-@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
-@pytest.mark.parametrize("attn_backend", ATTN_BACKENDS)
-def test_cpu_offloading(cpu_block_size: int, attn_backend: str) -> None:
-    """
-    Tests OffloadingConnector with CPUOffloadingSpec.
-    """
-
-    # configure OffloadingConnector (spec_name=CPUOffloadingSpec by default)
-    kv_transfer_config = KVTransferConfig(
-        kv_connector="OffloadingConnector",
-        kv_role="kv_both",
-        kv_connector_extra_config={
-            "cpu_bytes_to_use": 500 << 20,
-            "block_size": cpu_block_size,
-        },
-    )
-
-    port: int
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("0.0.0.0", 0))
-        port = s.getsockname()[1]
-
-    events_endpoint = f"tcp://*:{port}"
-    kv_events_config = KVEventsConfig(
-        enable_kv_cache_events=True,
-        publisher="zmq",
-        endpoint=events_endpoint,
-        topic="test",
-    )
-
-    llm = LLM(
-        model="meta-llama/Llama-3.2-1B-Instruct",
-        gpu_memory_utilization=0.5,
-        kv_events_config=kv_events_config,
-        kv_transfer_config=kv_transfer_config,
-        attention_config={"backend": attn_backend},
-        # ROCm: batch size 1 to reduce variability
-        **({"max_num_seqs": 1} if current_platform.is_rocm() else {}),
-    )
-
-    events_endpoint = events_endpoint.replace("*", "127.0.0.1")
-    subscriber = MockSubscriber(events_endpoint, topic=kv_events_config.topic)
-
-    try:
-        _latency_test(llm, subscriber)
-        _accuracy_test(llm, subscriber)
-    finally:
-        subscriber.close()
-        del llm
diff --git a/tests/v1/kv_offload/test_file_mapper.py b/tests/v1/kv_offload/test_file_mapper.py
new file mode 100644
index 000000000000..920eea92d96d
--- /dev/null
+++ b/tests/v1/kv_offload/test_file_mapper.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for FileMapper."""
+
+from unittest.mock import MagicMock
+
+from vllm.v1.kv_offload.base import (
+    OffloadingSpec,
+    make_offload_key,
+)
+from vllm.v1.kv_offload.file_mapper import FileMapper
+
+# ---------------------------------------------------------------------------
+# Shared mocks (mirrors test_fs_tier.py pattern)
+# ---------------------------------------------------------------------------
+
+_MOCK_VLLM_CONFIG = MagicMock()
+_MOCK_VLLM_CONFIG.model_config.model = "test-model"
+_MOCK_VLLM_CONFIG.cache_config.block_size = 16
+_MOCK_VLLM_CONFIG.cache_config.cache_dtype = "torch.float32"
+_MOCK_VLLM_CONFIG.parallel_config.tensor_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.pipeline_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.prefill_context_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.decode_context_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.rank = 0
+
+_MOCK_KV_CACHE_CONFIG = MagicMock()
+_MOCK_KV_CACHE_CONFIG.kv_cache_groups = []
+
+_MOCK_OFFLOADING_SPEC = MagicMock(spec=OffloadingSpec)
+_MOCK_OFFLOADING_SPEC.vllm_config = _MOCK_VLLM_CONFIG
+_MOCK_OFFLOADING_SPEC.kv_cache_config = _MOCK_KV_CACHE_CONFIG
+_MOCK_OFFLOADING_SPEC.block_size_factor = 1
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+
+def make_mapper_from_offloading_spec(**kwargs) -> FileMapper:
+    """Helper to create FileMapper with customizable mock config."""
+    # Create a copy of the mock config to avoid modifying the global one
+    mock_vllm_config = MagicMock()
+    mock_vllm_config.model_config.model = kwargs.get("model_name", "test-model")
+    mock_vllm_config.cache_config.block_size = kwargs.get("hash_block_size", 16)
+    mock_vllm_config.cache_config.cache_dtype = (
+        f"torch.{kwargs.get('dtype', 'float16')}"
+    )
+    mock_vllm_config.parallel_config.tensor_parallel_size = kwargs.get("tp_size", 1)
+    mock_vllm_config.parallel_config.pipeline_parallel_size = kwargs.get("pp_size", 1)
+    mock_vllm_config.parallel_config.prefill_context_parallel_size = kwargs.get(
+        "pcp_size", 1
+    )
+    mock_vllm_config.parallel_config.decode_context_parallel_size = kwargs.get(
+        "dcp_size", 1
+    )
+    mock_vllm_config.parallel_config.rank = kwargs.get("rank", 0)
+
+    mock_kv_cache_config = MagicMock()
+    mock_kv_cache_config.kv_cache_groups = []
+
+    mock_offloading_spec = MagicMock(spec=OffloadingSpec)
+    mock_offloading_spec.vllm_config = mock_vllm_config
+    mock_offloading_spec.kv_cache_config = mock_kv_cache_config
+    mock_offloading_spec.block_size_factor = kwargs.get("block_size_factor", 1)
+
+    return FileMapper.from_offloading_spec(
+        root_dir=kwargs.get("root_dir", "/tmp/cache"),
+        offloading_spec=mock_offloading_spec,
+        gpu_blocks_per_file=mock_offloading_spec.block_size_factor,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_get_file_name_full_structure():
+    """
+    Path must match: <base_path>_r<rank>/<hhh>/<hh>_g<group_idx>/<hash_hex>.bin
+
+    Concretely:
+      - The segment immediately after base_path must end with `_r0`
+      - The next segment is the first 3 hex chars of the block hash
+      - The next segment is <2 hex chars>_g<group_idx>
+      - The final segment is <full hash hex>.bin
+    """
+    rank = 3
+    group_idx = 2
+    block_hash = bytes(range(8))  # deterministic, non-zero bytes
+    fm = make_mapper_from_offloading_spec(rank=rank)
+    key = make_offload_key(block_hash, group_idx)
+    path = fm.get_file_name(key)
+
+    expected_path = (
+        "/tmp/cache/test-model_588656ebcc66_r3/000/10_g2/0001020304050607.bin"
+    )
+    assert path == expected_path
+
+
+def test_get_run_config_fields():
+    fm = make_mapper_from_offloading_spec(
+        model_name="my-model",
+        dtype="bfloat16",
+        tp_size=2,
+    )
+    cfg = fm.get_run_config()
+    assert cfg == {
+        "model_name": "my-model",
+        "hash_block_size": 16,
+        "gpu_blocks_per_file": 1,
+        "tp_size": 2,
+        "pp_size": 1,
+        "pcp_size": 1,
+        "dcp_size": 1,
+        "dtype": "bfloat16",
+        "kv_cache_groups": [],
+        "inference_engine": "vllm",
+    }
+
+
+def test_get_config_file_path():
+    fm = make_mapper_from_offloading_spec()
+    config_path = fm.get_config_file_path()
+    assert config_path == f"{fm.base_path}/config.json"
diff --git a/tests/v1/kv_offload/test_fs_tier.py b/tests/v1/kv_offload/test_fs_tier.py
new file mode 100644
index 000000000000..fcb5879b9bdc
--- /dev/null
+++ b/tests/v1/kv_offload/test_fs_tier.py
@@ -0,0 +1,256 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for FileSystemTierManager.
+
+These tests use real disk I/O to verify the Python filesystem tier implementation.
+The tier manager writes KV cache blocks to disk and reads them back, verifying
+data integrity throughout the process.
+"""
+
+import os
+import time
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+import torch
+
+from vllm.v1.kv_offload.base import OffloadKey, ReqContext, make_offload_key
+from vllm.v1.kv_offload.tiering.base import JobMetadata
+from vllm.v1.kv_offload.tiering.fs.manager import (
+    FileSystemTierManager,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_BLOCK_ELEMENTS = 512 * 1024  # 2 MB per block (float32 × 512K = 2MB)
+_DTYPE = torch.float32
+_CTX = ReqContext(req_id="test")
+
+_MOCK_VLLM_CONFIG = MagicMock()
+_MOCK_VLLM_CONFIG.model_config.model = "test-model"
+_MOCK_VLLM_CONFIG.cache_config.block_size = 16
+_MOCK_VLLM_CONFIG.cache_config.cache_dtype = "torch.float32"
+_MOCK_VLLM_CONFIG.parallel_config.tensor_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.pipeline_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.prefill_context_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.decode_context_parallel_size = 1
+_MOCK_VLLM_CONFIG.parallel_config.rank = 0
+
+_MOCK_KV_CACHE_CONFIG = MagicMock()
+_MOCK_KV_CACHE_CONFIG.kv_cache_groups = []
+
+_MOCK_OFFLOADING_SPEC = MagicMock()
+_MOCK_OFFLOADING_SPEC.vllm_config = _MOCK_VLLM_CONFIG
+_MOCK_OFFLOADING_SPEC.kv_cache_config = _MOCK_KV_CACHE_CONFIG
+_MOCK_OFFLOADING_SPEC.block_size_factor = 1
+
+
+def key(n: int) -> OffloadKey:
+    return make_offload_key(n.to_bytes(8, "big"), 0)
+
+
+def make_job(
+    job_id: int,
+    keys: list[OffloadKey],
+    block_ids: list[int] | None = None,
+    is_promotion: bool = False,
+) -> JobMetadata:
+    if block_ids is None:
+        block_ids = list(range(len(keys)))
+    return JobMetadata(
+        job_id=job_id,
+        keys=keys,
+        block_ids=np.array(block_ids, dtype=np.int64),
+        is_promotion=is_promotion,
+        req_context=_CTX,
+    )
+
+
+def drain(tier: FileSystemTierManager, max_rounds: int = 40) -> list:
+    """
+    Call get_finished() repeatedly until no new results arrive for 5
+    consecutive rounds or max_rounds is reached.
+    """
+    results = []
+    idle = 0
+    for _ in range(max_rounds):
+        time.sleep(0.01)
+        new = list(tier.get_finished())
+        results.extend(new)
+        if new:
+            idle = 0
+        else:
+            idle += 1
+            if idle >= 5:
+                break
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def fs_tier(tmp_path):
+    tensor = torch.zeros((4, _BLOCK_ELEMENTS), dtype=_DTYPE)
+    mock_view = memoryview(tensor.numpy())
+    tier = FileSystemTierManager(
+        offloading_spec=_MOCK_OFFLOADING_SPEC,
+        primary_kv_view=mock_view,
+        tier_type="fs_python",
+        root_dir=str(tmp_path),
+        n_read_threads=4,
+        n_write_threads=4,
+    )
+    yield tier, tensor
+    tier.shutdown()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_lookup_empty_tier(fs_tier):
+    tier, _ = fs_tier
+    assert tier.lookup(key(1), _CTX) is False
+    assert tier.lookup(key(2), _CTX) is False
+
+
+def test_store_creates_file_and_lookup_succeeds(fs_tier):
+    tier, _ = fs_tier
+    job = make_job(1, [key(1)], [0])
+    tier.submit_store(job)
+    results = drain(tier)
+    assert len(results) == 1
+    assert results[0].success
+    assert tier.lookup(key(1), _CTX) is True
+    dest = tier.file_mapper.get_file_name(key(1))
+    assert os.path.exists(dest), f"Expected file at {dest}"
+
+
+def test_store_then_load_roundtrip(fs_tier):
+    tier, _ = fs_tier
+    job_s = make_job(1, [key(1), key(2)], [0, 1])
+    tier.submit_store(job_s)
+    store_results = drain(tier)
+    assert all(r.success for r in store_results)
+
+    assert tier.lookup(key(1), _CTX) is True
+    assert tier.lookup(key(2), _CTX) is True
+
+    job_l = make_job(2, [key(1), key(2)], [2, 3], is_promotion=True)
+    tier.submit_load(job_l)
+    load_results = drain(tier)
+    assert all(r.success for r in load_results)
+    # Blocks stay on disk after load
+    assert tier.lookup(key(1), _CTX) is True
+    assert tier.lookup(key(2), _CTX) is True
+
+
+def test_invalid_path_raises_at_construction():
+    """Construction must fail immediately when the config file cannot be written."""
+    tensor = torch.zeros((32, _BLOCK_ELEMENTS), dtype=_DTYPE)
+    mock_view = memoryview(tensor.numpy())
+
+    with pytest.raises(OSError):
+        FileSystemTierManager(
+            offloading_spec=_MOCK_OFFLOADING_SPEC,
+            primary_kv_view=mock_view,
+            tier_type="fs_python",
+            root_dir="/dev/null/invalid_path",
+        )
+
+
+def test_failed_load_missing_file(fs_tier):
+    """Test that loading a block whose file does not exist results in a failed job."""
+    tier, _ = fs_tier
+    job = make_job(1, [key(99)], [0], is_promotion=True)
+    tier.submit_load(job)
+    results = drain(tier)
+    assert len(results) == 1
+    assert not results[0].success
+
+
+def test_multiple_jobs_tracked_independently(fs_tier):
+    tier, _ = fs_tier
+    job1 = make_job(1, [key(1)], [0])
+    job2 = make_job(2, [key(2)], [1])
+    tier.submit_store(job1)
+    tier.submit_store(job2)
+    results = drain(tier)
+    job_ids = {r.job_id for r in results}
+    assert job_ids == {1, 2}
+    assert tier.lookup(key(1), _CTX) is True
+    assert tier.lookup(key(2), _CTX) is True
+
+
+def test_multi_block_job_partial_failure(fs_tier):
+    """A load job where one block file is missing yields a single failed JobResult."""
+    tier, _ = fs_tier
+    # Store two of three keys
+    tier.submit_store(make_job(1, [key(10), key(11)], [0, 1]))
+    assert all(r.success for r in drain(tier))
+
+    # Load all three — key(99) was never stored
+    tier.submit_load(
+        make_job(2, [key(10), key(11), key(99)], [0, 1, 2], is_promotion=True)
+    )
+    results = drain(tier)
+
+    assert len(results) == 1
+    assert results[0].job_id == 2
+    assert not results[0].success
+
+
+def test_shutdown_discards_pending_tasks(fs_tier):
+    """Shutdown clears both queues and stops all worker threads without draining."""
+    tier, _ = fs_tier
+    # Submit many tasks to ensure some remain pending
+    for i in range(10):
+        tier.submit_store(make_job(i, [key(i)], [i % 4]))
+
+    # Shutdown immediately without draining
+    tier.shutdown()
+
+    # Verify queues are cleared and threads stopped
+    assert len(tier._pool._load_q) == 0
+    assert len(tier._pool._store_q) == 0
+    assert all(not t.is_alive() for t in tier._pool._threads)
+
+
+def test_store_load_data_integrity(fs_tier):
+    """Data written by store must be exactly recovered by load."""
+    tier, tensor = fs_tier
+    # Populate tensor with random data
+    tensor[:] = torch.rand((4, _BLOCK_ELEMENTS), dtype=_DTYPE)
+
+    # Store first 2 blocks
+    num_store = 2
+    expected = tensor[:num_store].clone()
+
+    store_ids = list(range(num_store))
+    keys = [key(i) for i in range(num_store)]
+
+    tier.submit_store(make_job(1, keys, store_ids))
+    results = drain(tier)
+    assert all(r.success for r in results)
+
+    # Overwrite source blocks to prove data is read from disk
+    tensor[:num_store] = 0.0
+
+    # Load into last 2 blocks
+    load_ids = [2, 3]
+    tier.submit_load(make_job(2, keys, load_ids, is_promotion=True))
+    results = drain(tier)
+    assert all(r.success for r in results)
+
+    for i, bid in enumerate(load_ids):
+        assert torch.allclose(tensor[bid], expected[i]), (
+            f"Block {bid} data mismatch after store+load"
+        )
diff --git a/tests/v1/kv_offload/test_tiering_offloading.py b/tests/v1/kv_offload/test_tiering_offloading.py
new file mode 100644
index 000000000000..61f7cdb7606b
--- /dev/null
+++ b/tests/v1/kv_offload/test_tiering_offloading.py
@@ -0,0 +1,441 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Unit tests for TieringOffloadingManager and ExampleSecondaryTierManager.
+
+These tests verify:
+1. Basic tiered offloading operations (store, load, lookup)
+2. Cascade behavior (blocks stored to all secondary tiers)
+3. Promotion behavior (blocks loaded from secondary to primary to GPU)
+4. ref_cnt management (blocks protected during async transfers)
+5. Eviction coordination between tiers
+"""
+
+from collections.abc import Iterable
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+from vllm.v1.kv_offload.base import (
+    OffloadKey,
+    ReqContext,
+    make_offload_key,
+)
+from vllm.v1.kv_offload.tiering.example.manager import ExampleSecondaryTierManager
+from vllm.v1.kv_offload.tiering.manager import (
+    CPUPrimaryTierOffloadingManager,
+    TieringOffloadingManager,
+)
+
+_CTX = ReqContext(req_id="test")
+_MOCK_OFFLOADING_SPEC = MagicMock()
+
+
+def _mock_mmap_region(num_blocks: int, row_bytes: int = 16):
+    """Create a mock SharedOffloadRegion for testing."""
+    mock = MagicMock()
+    view = memoryview(torch.zeros((num_blocks, row_bytes), dtype=torch.int8).numpy())
+    mock.create_kv_memoryview.return_value = view
+    return mock
+
+
+def to_keys(int_ids: Iterable[int]) -> list[OffloadKey]:
+    return [make_offload_key(str(i).encode(), 0) for i in int_ids]
+
+
+def count_hits(manager, keys: list[OffloadKey]) -> int | None:
+    """Count consecutive lookup hits from the start of keys.
+
+    Returns the count of leading True results, or None if any lookup
+    returns None (retry-later signal).
+    """
+    count = 0
+    for key in keys:
+        result = manager.lookup(key, _CTX)
+        if result is None:
+            return None
+        if not result:
+            break
+        count += 1
+    return count
+
+
+class TestExampleSecondaryTierManager:
+    """Tests for ExampleSecondaryTierManager implementation."""
+
+    def test_basic_store_and_lookup(self):
+        """Test basic store and lookup operations."""
+        mock_view = memoryview(torch.zeros((10, 16), dtype=torch.int8).numpy())
+        tier = ExampleSecondaryTierManager(
+            offloading_spec=_MOCK_OFFLOADING_SPEC,
+            primary_kv_view=mock_view,
+            tier_type="example",
+            custom_param=67,
+        )
+
+        # Initially empty
+        blocks = to_keys(range(3))
+        assert tier.lookup(blocks[0], _CTX) is False
+
+        # Store blocks (simulate with direct insertion for testing)
+        tier.blocks[blocks[0]] = True
+        tier.blocks[blocks[1]] = True
+
+        # Lookup should find first two blocks
+        assert tier.lookup(blocks[0], _CTX) is True
+        assert tier.lookup(blocks[1], _CTX) is True
+
+        # Third block not present
+        assert tier.lookup(blocks[2], _CTX) is False
+
+
+class TestTieringOffloadingManager:
+    """Tests for TieringOffloadingManager."""
+
+    @pytest.fixture
+    def manager_setup(self):
+        # Create primary tier (CPU-based)
+        mock_region = _mock_mmap_region(5)
+        self.primary_tier = CPUPrimaryTierOffloadingManager(
+            num_blocks=5, mmap_region=mock_region
+        )
+
+        mock_view = mock_region.create_kv_memoryview()
+
+        # Create secondary tiers with the primary view
+        self.secondary_tier1 = ExampleSecondaryTierManager(
+            offloading_spec=_MOCK_OFFLOADING_SPEC,
+            primary_kv_view=mock_view,
+            tier_type="example",
+        )
+        self.secondary_tier2 = ExampleSecondaryTierManager(
+            offloading_spec=_MOCK_OFFLOADING_SPEC,
+            primary_kv_view=mock_view,
+            tier_type="example",
+        )
+
+        # Create tiered manager
+        self.manager = TieringOffloadingManager(
+            primary_tier=self.primary_tier,
+            secondary_tiers=[self.secondary_tier1, self.secondary_tier2],
+        )
+
+    def test_basic_store_to_primary(self, manager_setup):
+        """Test basic store operation to primary tier."""
+        blocks = to_keys(range(3))
+
+        # Prepare store
+        result = self.manager.prepare_store(blocks, _CTX)
+        assert result is not None
+        assert len(result.keys_to_store) == 3
+
+        # Complete store
+        self.manager.complete_store(blocks, _CTX, success=True)
+
+        # Blocks should be in primary tier
+        assert count_hits(self.primary_tier, blocks) == 3
+
+    def test_cascade_to_all_secondary_tiers(self, manager_setup):
+        """Test that blocks are cascaded to ALL secondary tiers."""
+        blocks = to_keys(range(3))
+
+        self.secondary_tier1.submit_store = MagicMock(
+            wraps=self.secondary_tier1.submit_store
+        )
+        self.secondary_tier2.submit_store = MagicMock(
+            wraps=self.secondary_tier2.submit_store
+        )
+
+        # Store to primary
+        result = self.manager.prepare_store(blocks, _CTX)
+        assert result is not None
+
+        # Complete store (triggers cascade via submit_store on each tier)
+        self.manager.complete_store(blocks, _CTX, success=True)
+
+        # submit_store was called once per secondary tier
+        self.secondary_tier1.submit_store.assert_called_once()
+        self.secondary_tier2.submit_store.assert_called_once()
+
+        # Blocks should be in both secondary tiers
+        assert self.secondary_tier1.get_num_blocks() == 3
+        assert self.secondary_tier2.get_num_blocks() == 3
+
+        # Verify blocks are present
+        assert all(self.secondary_tier1.lookup(b, _CTX) for b in blocks)
+        assert all(self.secondary_tier2.lookup(b, _CTX) for b in blocks)
+
+    def test_ref_cnt_protection_during_cascade(self, manager_setup):
+        """Test that ref_cnt protects blocks during cascade."""
+        blocks = to_keys(range(3))
+
+        # Store to primary
+        result = self.manager.prepare_store(blocks, _CTX)
+        assert result is not None
+        self.manager.complete_store(blocks, _CTX, success=True)
+
+        # After complete_store, blocks should have ref_cnt > 0
+        # (one for each secondary tier)
+        for block_hash in blocks:
+            block = self.primary_tier._policy.get(block_hash)
+            # ref_cnt should be 2 (one for each secondary tier)
+            assert block.ref_cnt == 2
+
+        # End of step 1: _maybe_process_finished_jobs() was already called by
+        # prepare_store() above (setting the per-step flag), so take_events()
+        # does NOT poll get_finished() again — cascade completions remain
+        # unprocessed until the next step.
+        list(self.manager.take_events())
+
+        # ref_cnt still held: cascade jobs finished (sync tier) but haven't
+        # been polled yet because the per-step guard skipped the second call.
+        for block_hash in blocks:
+            block = self.primary_tier._policy.get(block_hash)
+            assert block.ref_cnt == 2
+
+        # Secondary tiers have completed jobs waiting to be drained
+        assert len(self.secondary_tier1.completed_jobs) > 0
+        assert len(self.secondary_tier2.completed_jobs) > 0
+
+        # End of step 2: flag was reset, so _maybe_process_finished_jobs()
+        # runs and processes the cascade completions (complete_read → ref_cnt--)
+        list(self.manager.take_events())
+
+        # After cascade completes, ref_cnt should be 0
+        for block_hash in blocks:
+            block = self.primary_tier._policy.get(block_hash)
+            assert block.ref_cnt == 0
+
+        # All completed jobs have been drained
+        assert len(self.secondary_tier1.completed_jobs) == 0
+        assert len(self.secondary_tier2.completed_jobs) == 0
+
+    def test_lookup_from_primary(self, manager_setup):
+        """Test lookup when blocks are in primary tier."""
+        blocks = to_keys(range(3))
+
+        # Store blocks
+        self.manager.prepare_store(blocks, _CTX)
+        self.manager.complete_store(blocks, _CTX, success=True)
+
+        # Lookup should find all blocks in primary
+        assert count_hits(self.manager, blocks) == 3
+
+    def test_promotion_from_secondary(self, manager_setup):
+        """Test promotion of blocks from secondary to primary tier."""
+        blocks = to_keys(range(3))
+
+        # Manually add blocks to secondary tier (simulate previous cascade)
+        for block in blocks:
+            self.secondary_tier1.blocks[block] = True
+
+        # Lookup each block to initiate promotion for all of them
+        for block in blocks:
+            result = self.manager.lookup(block, _CTX)
+            assert result is None  # Retry later (promotion initiated)
+
+        # End of step 1: flushes deferred submit_load() calls
+        list(self.manager.take_events())
+
+        # End of step 2: processes the completed promotion jobs
+        list(self.manager.take_events())
+
+        # Now blocks should be in primary tier
+        assert count_hits(self.primary_tier, blocks) == 3
+
+        # Next lookup should succeed
+        assert count_hits(self.manager, blocks) == 3
+
+    def test_partial_lookup(self, manager_setup):
+        """Test lookup with partial hits."""
+        blocks = to_keys(range(5))
+
+        # Store first 3 blocks to primary
+        self.manager.prepare_store(blocks[:3], _CTX)
+        self.manager.complete_store(blocks[:3], _CTX, success=True)
+
+        # Lookup all 5 blocks should return 3 (first 3 found)
+        assert count_hits(self.manager, blocks) == 3
+
+    def test_eviction_in_primary_tier(self, manager_setup):
+        """Test eviction in primary tier when capacity is exceeded."""
+        # Primary tier has capacity of 5 blocks
+        # First, fill the primary tier
+        blocks = to_keys(range(5))
+        result = self.manager.prepare_store(blocks, _CTX)
+        assert result is not None
+        assert len(result.keys_to_store) == 5
+        self.manager.complete_store(blocks, _CTX, success=True)
+
+        # End of step: release ref_cnt from cascade
+        list(self.manager.take_events())
+
+        # Now try to store 2 more blocks (should trigger eviction)
+        more_blocks = to_keys(range(5, 7))
+        result = self.manager.prepare_store(more_blocks, _CTX)
+
+        # Should evict 2 blocks from primary tier
+        assert result is not None
+        assert len(result.evicted_keys) == 2
+        assert len(result.keys_to_store) == 2
+
+    def test_touch_propagates_to_all_tiers(self, manager_setup):
+        """Test that touch() propagates to all tiers."""
+        blocks = to_keys(range(3))
+
+        # Store blocks
+        self.manager.prepare_store(blocks, _CTX)
+        self.manager.complete_store(blocks, _CTX, success=True)
+        list(self.manager.take_events())
+
+        self.secondary_tier1.touch = MagicMock(wraps=self.secondary_tier1.touch)
+        self.secondary_tier2.touch = MagicMock(wraps=self.secondary_tier2.touch)
+
+        # Touch blocks
+        self.manager.touch(blocks, _CTX)
+
+        # Verify touch was called on primary tier (check LRU order)
+        primary_keys = list(self.primary_tier._policy.blocks.keys())
+        assert primary_keys[-3:] == list(reversed(blocks))
+
+        # Verify touch was propagated to all secondary tiers
+        self.secondary_tier1.touch.assert_called_once_with(blocks, _CTX)
+        self.secondary_tier2.touch.assert_called_once_with(blocks, _CTX)
+
+    def test_failed_store_no_cascade(self, manager_setup):
+        """Test that failed GPU→primary store doesn't cascade."""
+        blocks = to_keys(range(3))
+
+        self.secondary_tier1.submit_store = MagicMock(
+            wraps=self.secondary_tier1.submit_store
+        )
+        self.secondary_tier2.submit_store = MagicMock(
+            wraps=self.secondary_tier2.submit_store
+        )
+
+        # Prepare store
+        result = self.manager.prepare_store(blocks, _CTX)
+        assert result is not None
+
+        # Complete store with failure — cascade must not happen
+        self.manager.complete_store(blocks, _CTX, success=False)
+
+        # submit_store was never called on either secondary tier
+        self.secondary_tier1.submit_store.assert_not_called()
+        self.secondary_tier2.submit_store.assert_not_called()
+
+    def test_lookup_batches_submit_load_per_request(self, manager_setup):
+        """lookup() defers submit_load until take_events(), one call per request.
+
+        Blocks from different requests each get their own submit_load call, each
+        carrying the correct req_context.
+        """
+        blocks = to_keys(range(4))
+        for block in blocks:
+            self.secondary_tier1.blocks[block] = True
+
+        self.secondary_tier1.submit_load = MagicMock(
+            wraps=self.secondary_tier1.submit_load
+        )
+
+        ctx_a = ReqContext(req_id="req_a")
+        ctx_b = ReqContext(req_id="req_b")
+
+        # All lookups return None: secondary hit triggers promotion (in-flight)
+        assert self.manager.lookup(blocks[0], ctx_a) is None
+        assert self.manager.lookup(blocks[1], ctx_a) is None
+        assert self.manager.lookup(blocks[2], ctx_b) is None
+        assert self.manager.lookup(blocks[3], ctx_b) is None
+
+        # submit_load must not fire during lookup - only at end of step
+        self.secondary_tier1.submit_load.assert_not_called()
+
+        # simulate end of step
+        list(self.manager.take_events())
+
+        assert self.secondary_tier1.submit_load.call_count == 2
+        calls = self.secondary_tier1.submit_load.call_args_list
+        jm_a = calls[0].args[0]
+        jm_b = calls[1].args[0]
+        assert set(jm_a.keys) == {blocks[0], blocks[1]}
+        assert jm_a.req_context is ctx_a
+        assert set(jm_b.keys) == {blocks[2], blocks[3]}
+        assert jm_b.req_context is ctx_b
+
+    def test_lookup_shared_block_no_duplicate_promotion(self, manager_setup):
+        """A block looked up by two requests in the same step is promoted once.
+
+        The first lookup initiates promotion (returns None via secondary hit).
+        The second lookup sees ref_cnt=-1 on the primary slot and returns None
+        via the primary in-flight path — without triggering a second promotion.
+        """
+        shared_block = to_keys([0])[0]
+        self.secondary_tier1.blocks[shared_block] = True
+
+        self.secondary_tier1.submit_load = MagicMock(
+            wraps=self.secondary_tier1.submit_load
+        )
+
+        ctx_a = ReqContext(req_id="req_a")
+        ctx_b = ReqContext(req_id="req_b")
+
+        result_a = self.manager.lookup(shared_block, ctx_a)
+        result_b = self.manager.lookup(shared_block, ctx_b)
+
+        # Both see None (in-flight), but promotion is only queued once
+        assert result_a is None
+        assert result_b is None
+
+        list(self.manager.take_events())
+
+        # Only one submit_load call despite two lookups
+        self.secondary_tier1.submit_load.assert_called_once()
+        job_metadata = self.secondary_tier1.submit_load.call_args.args[0]
+        assert list(job_metadata.keys) == [shared_block]
+        assert job_metadata.req_context is ctx_a
+
+    def test_complete_store_forwards_req_context_to_submit_store(self, manager_setup):
+        """complete_store cascades to secondary tiers with the correct req_context."""
+        blocks = to_keys(range(2))
+
+        self.secondary_tier1.submit_store = MagicMock(
+            wraps=self.secondary_tier1.submit_store
+        )
+
+        ctx = ReqContext(req_id="req_ctx", kv_transfer_params={"key": "value"})
+
+        self.manager.prepare_store(blocks, ctx)
+        self.manager.complete_store(blocks, ctx, success=True)
+
+        assert self.secondary_tier1.submit_store.call_count == 1
+        job_metadata = self.secondary_tier1.submit_store.call_args.args[0]
+        assert job_metadata.req_context is ctx
+
+
+class TestTieringOffloadingWithoutSecondaryTiers:
+    """Test TieringOffloadingManager with no secondary tiers (backward compat)."""
+
+    def test_works_without_secondary_tiers(self):
+        """Test that manager works with empty secondary_tiers list."""
+        primary_tier = CPUPrimaryTierOffloadingManager(
+            num_blocks=5, mmap_region=_mock_mmap_region(5)
+        )
+
+        # Create manager with no secondary tiers
+        manager = TieringOffloadingManager(
+            primary_tier=primary_tier, secondary_tiers=[]
+        )
+
+        blocks = to_keys(range(3))
+
+        # Should work like a regular OffloadingManager
+        result = manager.prepare_store(blocks, _CTX)
+        assert result is not None
+        manager.complete_store(blocks, _CTX, success=True)
+
+        assert count_hits(manager, blocks) == 3
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/v1/kv_offload/test_worker.py b/tests/v1/kv_offload/test_worker.py
index fbdac5f9dc7c..b291fcf1b857 100644
--- a/tests/v1/kv_offload/test_worker.py
+++ b/tests/v1/kv_offload/test_worker.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.v1.kv_offload.abstract import LoadStoreSpec
+from vllm.v1.kv_offload.base import LoadStoreSpec
 from vllm.v1.kv_offload.worker.worker import (
     OffloadingHandler,
     OffloadingWorker,
diff --git a/tests/v1/logits_processors/test_correctness.py b/tests/v1/logits_processors/test_correctness.py
index 792168877663..e7ec66302087 100644
--- a/tests/v1/logits_processors/test_correctness.py
+++ b/tests/v1/logits_processors/test_correctness.py
@@ -30,25 +30,31 @@
     MinPLogitsProcessor,
     MinTokensLogitsProcessor,
     MoveDirectionality,
-    ThinkingTokenBudgetLogitsProcessor,
     build_logitsprocs,
 )
 from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.sample.thinking_budget_state import (
+    ThinkingBudgetStateHolder,
+    maybe_create_thinking_budget_state_holder,
+)
 
 PIN_MEMORY_AVAILABLE = is_pin_memory_available()
 MAX_NUM_REQS = 256
 VOCAB_SIZE = 1024
 NUM_OUTPUT_TOKENS = 20
-CUDA_DEVICES = [
-    f"{current_platform.device_type}:{i}"
+DEVICE_TYPE = current_platform.device_type
+DEVICES = [
+    f"{DEVICE_TYPE}:{i}"
     for i in range(1 if current_platform.device_count() == 1 else 2)
 ]
 MAX_NUM_PROMPT_TOKENS = 64
 MIN_TOKENS_LEN_THRESHOLD = 5
 REQS_PER_LOGITPROC = 50
 STR_NO_LOGITPROC = "none"
+# Thinking budget uses ``ThinkingBudgetStateHolder`` (not a logits processor).
+STR_THINKING_BUDGET = "thinking_budget"
 
-# ThinkingTokenBudgetLogitsProcessor testing constants
+# Thinking token budget testing constants
 THINKING_TOKEN_BUDGET = 5
 THINK_START_TOKEN_ID = 999
 THINK_END_TOKEN_ID = 998
@@ -79,15 +85,8 @@ def __init__(self, workload_index: int, logitproc_type: LogitprocType):
         if num_tokens > 0:
             # Use diverse random tokens
             self.out_tokens = [random.randint(1, 950) for _ in range(num_tokens)]
-            # Set first token for ThinkingTokenBudget testing
-            is_thinking_processor = (
-                logitproc_type is ThinkingTokenBudgetLogitsProcessor
-                or (
-                    hasattr(logitproc_type, "__name__")
-                    and logitproc_type.__name__ == "ThinkingTokenBudgetLogitsProcessor"
-                )
-            )
-            if is_thinking_processor:
+            # Think-start seed for ``STR_THINKING_BUDGET`` rows.
+            if logitproc_type == STR_THINKING_BUDGET:
                 self.out_tokens[0] = THINK_START_TOKEN_ID
         else:
             self.out_tokens = []
@@ -101,10 +100,11 @@ def __str__(self):
 
 
 class MockReasoningConfig:
-    """Mock reasoning config for testing ThinkingTokenBudgetLogitsProcessor."""
+    """Minimal reasoning config for ``ThinkingBudgetStateHolder`` tests."""
 
-    think_start_token_ids = [THINK_START_TOKEN_ID]
-    think_end_token_ids = [THINK_END_TOKEN_ID]
+    reasoning_start_token_ids = [THINK_START_TOKEN_ID]
+    reasoning_end_token_ids = [THINK_END_TOKEN_ID]
+    enabled = True
 
 
 def _generate_fake_sampling_metadata(
@@ -135,6 +135,18 @@ def _generate_fake_sampling_metadata(
         is_pin_memory=PIN_MEMORY_AVAILABLE,
         is_pooling_model=False,
     )
+    num_spec = (
+        vllm_config.speculative_config.num_speculative_tokens
+        if vllm_config.speculative_config
+        else 0
+    )
+    thinking_holder = maybe_create_thinking_budget_state_holder(
+        vllm_config.reasoning_config,
+        vllm_config.scheduler_config.max_num_seqs,
+        num_spec,
+        device,
+        PIN_MEMORY_AVAILABLE,
+    )
     fake_sampling_metadata = SamplingMetadata(
         temperature=torch.full((batch_size,), 0.0),
         all_greedy=True,
@@ -154,6 +166,7 @@ def _generate_fake_sampling_metadata(
         allowed_token_ids_mask=None,
         bad_words_token_ids={},
         logitsprocs=logitsprocs,
+        thinking_budget_state_holder=thinking_holder,
     )
     return fake_sampling_metadata
 
@@ -185,7 +198,7 @@ def _sampling_params_from_logitproc(logitproc_type: LogitprocType) -> SamplingPa
 
 def _generate_mixed_logitsprocs_batch_params(
     reqs_per_logitproc: int,
-    logitsprocs_types: list[str],
+    logitsprocs_types: list[LogitprocType],
 ) -> list[LogitsProcsRequestParams]:
     """Define key params for a batch of requests with a different
     logitproc enabled per request.
@@ -448,23 +461,21 @@ def _thinking_budget_validate(
     request_params: LogitsProcsRequestParams,
     step_idx: int,
 ) -> None:
-    """Validate thinking token budget processor behavior"""
-    # Get the ThinkingTokenBudgetLogitsProcessor instance
-    tb_processor: ThinkingTokenBudgetLogitsProcessor = next(
-        test_fakes.get_logitsprocs_by_cls(ThinkingTokenBudgetLogitsProcessor)
-    )
+    """Validate ``ThinkingBudgetStateHolder`` thinking-budget behavior.
 
-    # Get current request state
-    state = tb_processor._state.get(batch_index)
+    State is keyed by **batch slot** (same index space as logits rows), matching
+    ``sync_batch`` / sampler integration (see PR #34668 discussion).
+    """
+    holder = test_fakes.sampling_metadata.thinking_budget_state_holder
+    assert holder is not None
+    state = holder._state.get(batch_index)
     params = request_params.params
 
-    # Validate thinking token budget configuration
     if hasattr(params, "thinking_token_budget") and params.thinking_token_budget:
-        # State should exist for requests with thinking_token_budget
         if state is None:
             _raise_error_invalid(
                 msg_suffix=(
-                    f"Expected state for batch {batch_index} "
+                    f"Expected holder state for batch slot {batch_index} "
                     f"with thinking_token_budget={params.thinking_token_budget}"
                 ),
                 batch_index=batch_index,
@@ -472,10 +483,8 @@ def _thinking_budget_validate(
                 step_idx=step_idx,
             )
 
-        # Validate budget matches what was set
         expected_budget = params.thinking_token_budget
         actual_budget = state["thinking_token_budget"]
-
         if actual_budget != expected_budget:
             _raise_error_invalid(
                 msg_suffix=(
@@ -486,13 +495,9 @@ def _thinking_budget_validate(
                 step_idx=step_idx,
             )
 
-        # Check if we're in thinking mode and validate token counting
         output_tokens = request_params.out_tokens
-
-        # Find if thinking has started in output tokens
+        start_tokens = holder.think_start_token_ids
         thinking_started = False
-        start_tokens = tb_processor.think_start_token_ids
-
         if len(start_tokens) > 0:
             for i in range(len(output_tokens) - len(start_tokens) + 1):
                 if output_tokens[i : i + len(start_tokens)] == start_tokens:
@@ -500,61 +505,42 @@ def _thinking_budget_validate(
                     break
 
         if thinking_started:
-            # If budget is exceeded, validate end token forcing
             think_count = state["think_count"]
             budget = state["thinking_token_budget"]
+            if think_count >= budget and not state["in_end"]:
+                _raise_error_invalid(
+                    msg_suffix=(
+                        f"Budget exceeded ({think_count} >= {budget}) but "
+                        "in_end is false"
+                    ),
+                    batch_index=batch_index,
+                    request_params=request_params,
+                    step_idx=step_idx,
+                )
 
-            if think_count >= budget:
-                if not state["in_end"]:
+            end_tokens = holder.think_end_token_ids
+            if (
+                think_count >= budget
+                and state["in_end"]
+                and len(end_tokens) > 0
+                and holder.has_tracked_requests()
+            ):
+                expected_end_token_id = end_tokens[
+                    min(state["end_count"], len(end_tokens) - 1)
+                ]
+                # Holder bumps forced vocab positions to 1e9 (does not -inf others).
+                forced_logit = float(logits_new[batch_index, expected_end_token_id])
+                if forced_logit < 1.0e8:
                     _raise_error_invalid(
                         msg_suffix=(
-                            f"Budget exceeded ({think_count} >= "
-                            f"{budget}) but not "
-                            "forcing end tokens"
+                            f"Expected forced end token {expected_end_token_id} "
+                            f"with large logit, got {forced_logit}"
                         ),
                         batch_index=batch_index,
                         request_params=request_params,
                         step_idx=step_idx,
                     )
 
-                # Validate that only end tokens are allowed
-                end_tokens = tb_processor.think_end_token_ids
-                if len(end_tokens) > 0:
-                    expected_end_token_id = end_tokens[
-                        min(state["end_count"], len(end_tokens) - 1)
-                    ]
-
-                    # Check logits masking
-                    batch_logits = logits_new[batch_index]
-                    for token_id in range(len(batch_logits)):
-                        logit_value = batch_logits[token_id]
-
-                        if token_id == expected_end_token_id:
-                            # End token should not be masked
-                            if logit_value == -float("inf"):
-                                _raise_error_invalid(
-                                    msg_suffix=(
-                                        f"End token {token_id} should not be "
-                                        "masked but is"
-                                    ),
-                                    batch_index=batch_index,
-                                    request_params=request_params,
-                                    step_idx=step_idx,
-                                )
-                        else:
-                            # All other tokens should be masked when forcing end
-                            if logit_value != -float("inf"):
-                                _raise_error_invalid(
-                                    msg_suffix=(
-                                        f"Token {token_id} should be masked "
-                                        f"when forcing end tokens, but "
-                                        f"logit={logit_value}"
-                                    ),
-                                    batch_index=batch_index,
-                                    request_params=request_params,
-                                    step_idx=step_idx,
-                                )
-
 
 def _none_validate(
     test_fakes: LogitsprocsTestFakes,
@@ -602,7 +588,7 @@ class LogitsprocTestHelpers(NamedTuple):
     MinTokensLogitsProcessor: LogitsprocTestHelpers(
         gen_request_fxn=_min_tokens_params, eval_fxn=_min_tokens_validate
     ),
-    ThinkingTokenBudgetLogitsProcessor: LogitsprocTestHelpers(
+    STR_THINKING_BUDGET: LogitsprocTestHelpers(
         gen_request_fxn=_thinking_budget_params, eval_fxn=_thinking_budget_validate
     ),
 }
@@ -612,20 +598,17 @@ def _get_test_cases() -> list[list[str]]:
     """Each test case is a set of logitsprocs"""
     logitsprocs_types = list(logitsprocs_test_mapping.keys())
 
-    # Isolate ThinkingTokenBudgetLogitsProcessor from all other processors
-    # to avoid unexpected modification of logits interference
-    thinking_processor = ThinkingTokenBudgetLogitsProcessor
+    # Isolate thinking-budget handling from other processors to avoid cross-talk.
+    thinking_id: LogitprocType = STR_THINKING_BUDGET
     other_processors = [
-        p
-        for p in logitsprocs_types
-        if p != STR_NO_LOGITPROC and p != thinking_processor
+        p for p in logitsprocs_types if p != STR_NO_LOGITPROC and p != thinking_id
     ]
 
     return (
         [[STR_NO_LOGITPROC]]
         + [[logitproc_type, STR_NO_LOGITPROC] for logitproc_type in other_processors]
         + [other_processors]
-        + [[thinking_processor]]
+        + [[thinking_id]]
     )
 
 
@@ -800,12 +783,23 @@ def _assert_valid(
         )
 
 
+def _slot_outputs_for_metadata(
+    persistent_batch: list[LogitsProcsRequestParams], pad_len: int
+) -> list[list[int]]:
+    """Per-batch-slot output token ids aligned with ``SamplingMetadata`` rows."""
+    rows: list[list[int]] = [[] for _ in range(pad_len)]
+    for i, req in enumerate(persistent_batch):
+        if i < pad_len:
+            rows[i] = list(req.out_tokens)
+    return rows
+
+
 @create_new_process_for_each_test()
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("reqs_per_logitproc", [REQS_PER_LOGITPROC])
 @pytest.mark.parametrize("logitsprocs_under_test", _get_test_cases())
 def test_logitsprocs(
-    device: str, reqs_per_logitproc: int, logitsprocs_under_test: list[str]
+    device: str, reqs_per_logitproc: int, logitsprocs_under_test: list[LogitprocType]
 ):
     random.seed(40)
     torch.set_default_device(device)
@@ -853,9 +847,10 @@ def test_logitsprocs(
         # Apply fake batch update to logitsprocs
         fake_update_logitsprocs_state(test_fakes, batch_update)
 
-        # Emulate application of logits processors in engine
+        # Emulate application of logits processors + thinking holder (sampler order).
         slice_idxs = [req.workload_index for req in persistent_batch]
-        logits_w_lp = fake_apply_logitsprocs(test_fakes, slice_idxs).cpu()
+        slot_rows = _slot_outputs_for_metadata(persistent_batch, workload_size)
+        logits_w_lp = fake_apply_logitsprocs(test_fakes, slice_idxs, slot_rows).cpu()
 
         _assert_valid(
             batch_size=batch_size,
@@ -867,3 +862,335 @@ def test_logitsprocs(
         )
 
         step_idx += 1
+
+
+class MockReasoningNoEndTokens:
+    """Reasoning config with no end token ids (disables enforcement in holder)."""
+
+    reasoning_start_token_ids = [THINK_START_TOKEN_ID]
+    reasoning_end_token_ids: list[int] = []
+
+
+def test_maybe_create_thinking_budget_holder_without_reasoning():
+    cfg = VllmConfig()
+    assert cfg.reasoning_config is None
+    assert (
+        maybe_create_thinking_budget_state_holder(
+            None,
+            cfg.scheduler_config.max_num_seqs,
+            0,
+            torch.device("cpu"),
+            False,
+        )
+        is None
+    )
+
+
+def test_thinking_budget_holder_has_tracked_after_sync_add():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    assert not h.has_tracked_requests()
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=3),
+                    None,
+                    [THINK_START_TOKEN_ID],
+                )
+            ],
+            moved=(),
+        )
+    )
+    assert h.has_tracked_requests()
+    assert h._state[0]["thinking_token_budget"] == 3
+
+
+def test_thinking_budget_holder_sync_remove_clears_state():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=3),
+                    None,
+                    [],
+                )
+            ],
+            moved=(),
+        )
+    )
+    assert h.has_tracked_requests()
+    h.sync_batch(BatchUpdate(batch_size=0, removed=(0,), added=(), moved=()))
+    assert not h.has_tracked_requests()
+
+
+def test_thinking_budget_holder_sync_add_without_budget_drops_row():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[(0, SamplingParams(), None, [])],
+            moved=(),
+        )
+    )
+    assert not h.has_tracked_requests()
+
+
+def test_thinking_budget_holder_swap_exchanges_state():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=2,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=3),
+                    None,
+                    [],
+                ),
+                (
+                    1,
+                    SamplingParams(thinking_token_budget=7),
+                    None,
+                    [],
+                ),
+            ],
+            moved=(),
+        )
+    )
+    b0, b1 = h._state[0]["thinking_token_budget"], h._state[1]["thinking_token_budget"]
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=2,
+            removed=(),
+            added=(),
+            moved=[(0, 1, MoveDirectionality.SWAP)],
+        )
+    )
+    assert h._state[0]["thinking_token_budget"] == b1
+    assert h._state[1]["thinking_token_budget"] == b0
+
+
+def test_thinking_budget_holder_unidirectional_move():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=2,
+            removed=(),
+            added=[
+                (
+                    1,
+                    SamplingParams(thinking_token_budget=4),
+                    None,
+                    [],
+                ),
+            ],
+            moved=(),
+        )
+    )
+    assert 1 in h._state and 0 not in h._state
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=2,
+            removed=(),
+            added=(),
+            moved=[(1, 0, MoveDirectionality.UNIDIRECTIONAL)],
+        )
+    )
+    assert 0 in h._state and 1 not in h._state
+    assert h._state[0]["thinking_token_budget"] == 4
+
+
+def test_thinking_budget_holder_update_state_repeat_indices_last_row_wins():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=5),
+                    None,
+                    [THINK_START_TOKEN_ID],
+                )
+            ],
+            moved=(),
+        )
+    )
+    out_lists = [[THINK_START_TOKEN_ID], [THINK_START_TOKEN_ID, 10, 11, 12, 13, 14]]
+    h.update_state(
+        out_lists,
+        None,
+        torch.tensor([0, 0], dtype=torch.long),
+    )
+    assert h._state[0]["output_tok_ids"] == out_lists[1]
+
+
+def test_thinking_budget_holder_spec_mode_tensor_layout():
+    h = ThinkingBudgetStateHolder(
+        MockReasoningConfig(),
+        8,
+        2,
+        torch.device("cpu"),
+        False,
+    )
+    assert h.in_spec_mode
+    assert h._mask_capacity == 8 * (2 + 1)
+
+
+def test_thinking_budget_holder_empty_end_tokens_disables_row():
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningNoEndTokens()
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=5),
+                    None,
+                    [THINK_START_TOKEN_ID],
+                )
+            ],
+            moved=(),
+        )
+    )
+    h.update_state([[THINK_START_TOKEN_ID, 1]], None, None)
+    assert h._state[0]["thinking_token_budget"] == -1
+
+
+def test_thinking_budget_enforced_without_penalties():
+    """Regression test for gpu_input_batch.py bug.
+
+    When thinking_budget_tracks_reqs=True and no penalties/bad_words are set,
+    the old code computed needs_output_token_ids=False (inverted condition:
+    ``or not thinking_budget_tracks_reqs``), causing update_state to receive
+    an empty list and skip _update_think_state for every request.
+
+    Fix: changed ``or not thinking_budget_tracks_reqs`` to
+    ``or thinking_budget_tracks_reqs`` so that output_token_ids is populated
+    whenever the thinking budget state holder has tracked requests.
+
+    This test verifies that update_state correctly calls _update_think_state
+    (setting in_end=True) when given the real output_token_ids, and that
+    passing an empty list (the pre-fix behavior) prevents budget enforcement.
+    """
+    vc = VllmConfig()
+    vc.reasoning_config = MockReasoningConfig()
+    budget = 3  # allow 3 thinking tokens
+
+    h = ThinkingBudgetStateHolder(
+        vc.reasoning_config,
+        vc.scheduler_config.max_num_seqs,
+        0,
+        torch.device("cpu"),
+        False,
+    )
+    output_token_ids: list[int] = []
+    h.sync_batch(
+        BatchUpdate(
+            batch_size=1,
+            removed=(),
+            added=[
+                (
+                    0,
+                    SamplingParams(thinking_token_budget=budget),
+                    None,
+                    output_token_ids,
+                )
+            ],
+            moved=(),
+        )
+    )
+    assert h.has_tracked_requests()
+
+    # Simulate the buggy behavior: update_state receives empty list.
+    # _update_think_state is skipped → in_end stays False → no budget enforcement.
+    h.update_state([], None, None)
+    assert not h._state[0].get("in_end", False), (
+        "With empty output_token_ids, in_end should stay False (budget not yet tracked)"
+    )
+
+    # Simulate the correct behavior: output_token_ids is the live list.
+    # Step 1: think-start token appears.
+    output_token_ids.append(THINK_START_TOKEN_ID)
+    h.update_state([output_token_ids], None, None)
+    assert not h._state[0].get("in_end", False), (
+        "Still within budget after 0 think tokens"
+    )
+
+    # Steps 2–4: 3 thinking tokens (hits the budget exactly).
+    for tok in [1, 2, 3]:
+        output_token_ids.append(tok)
+        h.update_state([output_token_ids], None, None)
+
+    # After exactly `budget` thinking tokens the holder should force end token.
+    assert h._state[0].get("in_end", False), (
+        "Budget exceeded: in_end should be True so that apply_to_logits "
+        "forces the end token"
+    )
diff --git a/tests/v1/logits_processors/test_custom_offline.py b/tests/v1/logits_processors/test_custom_offline.py
index 29ec72186b8d..325ca48b597e 100644
--- a/tests/v1/logits_processors/test_custom_offline.py
+++ b/tests/v1/logits_processors/test_custom_offline.py
@@ -16,8 +16,8 @@
     DummyLogitsProcessor,
     WrappedPerReqLogitsProcessor,
     prompts,
+    setup_fake_entrypoint,
 )
-from tests.v1.logits_processors.utils import entry_points as fake_entry_points
 from vllm import LLM, SamplingParams
 from vllm.v1.sample.logits_processor import (
     STR_POOLING_REJECTS_LOGITSPROCS,
@@ -145,13 +145,9 @@ def test_custom_logitsprocs(monkeypatch, logitproc_source: CustomLogitprocSource
 
     if logitproc_source == CustomLogitprocSource.LOGITPROC_SOURCE_ENTRYPOINT:
         # Scenario: vLLM loads a logitproc from a preconfigured entrypoint
-        # To that end, mock a dummy logitproc entrypoint
-        import importlib.metadata
-
-        importlib.metadata.entry_points = fake_entry_points  # type: ignore
-
-        # fork is required for workers to see entrypoint patch
-        monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")
+        # To that end, register a real dist-info package so spawned
+        # workers can discover the entrypoint via PYTHONPATH
+        setup_fake_entrypoint(monkeypatch)
         _run_test({}, logitproc_loaded=True)
         return
 
@@ -266,14 +262,9 @@ def test_rejects_custom_logitsprocs(
         # Scenario: vLLM loads a model and ignores a logitproc that is
         # available at a preconfigured entrypoint
 
-        # Patch in dummy logitproc entrypoint
-        import importlib.metadata
-
-        importlib.metadata.entry_points = fake_entry_points  # type: ignore
-
-        # fork is required for entrypoint patch to be visible to workers,
-        # although they should ignore the entrypoint patch anyway
-        monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")
+        # Register real dist-info package so spawned workers can
+        # discover the entrypoint via PYTHONPATH (spawn-compatible)
+        setup_fake_entrypoint(monkeypatch)
 
         llm = LLM(**llm_kwargs)
         # Require that no custom logitsprocs have been loaded
diff --git a/tests/v1/logits_processors/test_custom_online.py b/tests/v1/logits_processors/test_custom_online.py
index 3dc6b8979015..3b7add3b80f4 100644
--- a/tests/v1/logits_processors/test_custom_online.py
+++ b/tests/v1/logits_processors/test_custom_online.py
@@ -10,7 +10,7 @@
 import pytest
 import pytest_asyncio
 
-from tests.utils import RemoteOpenAIServerCustom, create_new_process_for_each_test
+from tests.utils import RemoteOpenAIServerCustom
 from tests.v1.logits_processors.utils import (
     DUMMY_LOGITPROC_ARG,
     DUMMY_LOGITPROC_FQCN,
@@ -18,8 +18,8 @@
     MODEL_NAME,
     TEMP_GREEDY,
     prompts,
+    setup_fake_entrypoint,
 )
-from tests.v1.logits_processors.utils import entry_points as fake_entry_points
 
 
 def _server_with_logitproc_entrypoint(
@@ -27,16 +27,9 @@ def _server_with_logitproc_entrypoint(
     model: str,
     vllm_serve_args: list[str],
 ) -> None:
-    """Start vLLM server, inject dummy logitproc entrypoint"""
-
-    # Patch `entry_points` to inject logitproc entrypoint
-    import importlib.metadata
-
-    importlib.metadata.entry_points = fake_entry_points  # type: ignore
+    """Start vLLM server with dummy logitproc entrypoint."""
     from vllm.entrypoints.cli import main
 
-    # fork is required for workers to see entrypoint patch
-    os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "fork"
     if env_dict is not None:
         os.environ.update(env_dict)
 
@@ -50,7 +43,7 @@ def _server_with_logitproc_fqcn(
     model: str,
     vllm_serve_args: list[str],
 ) -> None:
-    """Start vLLM server, inject module with dummy logitproc"""
+    """Start vLLM server with dummy logitproc specified by FQCN."""
     from vllm.entrypoints.cli import main
 
     if env_dict is not None:
@@ -80,8 +73,8 @@ def default_server_args():
 def server(default_server_args, request, monkeypatch):
     """Consider two server configurations:
     (1) --logits-processors cli arg specifies dummy logits processor via fully-
-    qualified class name (FQCN); patch in a dummy logits processor module
-    (2) No --logits-processors cli arg; patch in a dummy logits processor
+    qualified class name (FQCN)
+    (2) No --logits-processors cli arg; inject a dummy logits processor
     entrypoint
     """
 
@@ -94,6 +87,7 @@ def server(default_server_args, request, monkeypatch):
         _server_fxn = _server_with_logitproc_fqcn
     else:
         # Launch server, inject dummy logitproc entrypoint
+        setup_fake_entrypoint(monkeypatch)
         args = default_server_args
         _server_fxn = _server_with_logitproc_entrypoint
 
@@ -119,13 +113,11 @@ async def client(server):
 }
 
 
-@create_new_process_for_each_test()
-@pytest.mark.asyncio
 @pytest.mark.parametrize(
     "model_name",
     [MODEL_NAME],
 )
-async def test_custom_logitsprocs(client: openai.AsyncOpenAI, model_name: str):
+def test_custom_logitsprocs(server, model_name: str):
     """Test custom logitsprocs when starting OpenAI server from CLI
 
     Launch vLLM OpenAI-compatible server, configured to load a custom logitproc
@@ -139,36 +131,45 @@ async def test_custom_logitsprocs(client: openai.AsyncOpenAI, model_name: str):
     token
     """
 
-    use_dummy_logitproc = True
-    for prompt in prompts:
-        # Build request arguments
-        request_keyword_args: dict[str, Any] = {
-            **api_keyword_args,
-        }
-        if use_dummy_logitproc:
-            # 50% of requests pass target_token custom arg
-            target_token = random.choice([128, 67])
-            # For requests which activate the dummy logitproc, choose one of
-            # two `target_token` values which are known not to be EOS tokens
-            request_keyword_args["extra_body"] = {
-                "vllm_xargs": {DUMMY_LOGITPROC_ARG: target_token}
-            }
-        batch = await client.completions.create(
-            model=model_name,
-            prompt=prompt,
-            **request_keyword_args,
-        )
+    import asyncio
 
-        if use_dummy_logitproc:
-            # Only for requests which activate dummy logitproc - validate that
-            # output token is repeated
-            choices: openai.types.CompletionChoice = batch.choices
-            toks = choices[0].logprobs.tokens
-            if not all([x == toks[0] for x in toks]):
-                raise AssertionError(f"Generated {toks} should all be {toks[0]}")
+    async def _async_main(srv, mn):
+        async with srv.get_async_client() as client:
+            await _run(client)
 
-        # Alternate whether to activate dummy logitproc for each request
-        use_dummy_logitproc = not use_dummy_logitproc
+    async def _run(client):
+        use_dummy_logitproc = True
+        for prompt in prompts:
+            # Build request arguments
+            request_keyword_args: dict[str, Any] = {
+                **api_keyword_args,
+            }
+            if use_dummy_logitproc:
+                # 50% of requests pass target_token custom arg
+                target_token = random.choice([128, 67])
+                # For requests which activate the dummy logitproc, choose one of
+                # two `target_token` values which are known not to be EOS tokens
+                request_keyword_args["extra_body"] = {
+                    "vllm_xargs": {DUMMY_LOGITPROC_ARG: target_token}
+                }
+            batch = await client.completions.create(
+                model=model_name,
+                prompt=prompt,
+                **request_keyword_args,
+            )
+
+            if use_dummy_logitproc:
+                # Only for requests which activate dummy logitproc - validate that
+                # output token is repeated
+                choices: openai.types.CompletionChoice = batch.choices
+                toks = choices[0].logprobs.tokens
+                if not all([x == toks[0] for x in toks]):
+                    raise AssertionError(f"Generated {toks} should all be {toks[0]}")
+
+            # Alternate whether to activate dummy logitproc for each request
+            use_dummy_logitproc = not use_dummy_logitproc
+
+    asyncio.run(_async_main(server, model_name))
 
 
 @pytest.mark.asyncio
diff --git a/tests/v1/logits_processors/utils.py b/tests/v1/logits_processors/utils.py
index e54da72e5e2e..fc8ce50c05fa 100644
--- a/tests/v1/logits_processors/utils.py
+++ b/tests/v1/logits_processors/utils.py
@@ -1,12 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import types
+import os
+import tempfile
 from enum import Enum, auto
+from pathlib import Path
 from typing import Any
 
 import torch
 
+from tests.utils import requires_spawn_multiprocessing
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
@@ -102,11 +105,6 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
         return logits
 
 
-"""Dummy module with dummy logitproc class"""
-dummy_module = types.ModuleType(DUMMY_LOGITPROC_MODULE)
-dummy_module.DummyLogitsProcessor = DummyLogitsProcessor  # type: ignore
-
-
 class EntryPoint:
     """Dummy entrypoint class for logitsprocs testing"""
 
@@ -187,5 +185,59 @@ def new_req_logits_processor(
         return DummyPerReqLogitsProcessor(target_token)
 
 
-"""Fake version of importlib.metadata.entry_points"""
-entry_points = lambda group: EntryPoints(group)
+def register_fake_entrypoint(monkeypatch) -> str:
+    """Register the dummy logitsproc entrypoint in a way that is visible
+    to spawned subprocesses by creating a real dist-info directory on disk.
+
+    Unlike monkey-patching importlib.metadata.entry_points (which only works
+    with fork), this approach writes a real dist-info package that
+    importlib.metadata can discover in any subprocess via PYTHONPATH.
+
+    Returns the temp directory path.
+    """
+    tmpdir = Path(tempfile.mkdtemp(prefix="dummy-logitproc-"))
+    dist_info = tmpdir / "dummy_logitproc-0.1.dist-info"
+    dist_info.mkdir()
+
+    # Write METADATA file (required by importlib.metadata)
+    (dist_info / "METADATA").write_text(
+        "Metadata-Version: 2.1\nName: dummy-logitproc\nVersion: 0.1\n",
+        encoding="utf-8",
+    )
+
+    # Write entry_points.txt
+    (dist_info / "entry_points.txt").write_text(
+        f"[{LOGITSPROCS_GROUP}]\n"
+        f"{DUMMY_LOGITPROC_ENTRYPOINT} = {DUMMY_LOGITPROC_FQCN}\n",
+        encoding="utf-8",
+    )
+
+    # Add to PYTHONPATH so spawned subprocesses can discover it
+    existing = os.environ.get("PYTHONPATH", "")
+    monkeypatch.setenv(
+        "PYTHONPATH", str(tmpdir) + (os.pathsep + existing if existing else "")
+    )
+
+    # Also update sys.path for the current process so the driver can
+    # discover the entrypoint.
+    monkeypatch.syspath_prepend(str(tmpdir))
+
+    return str(tmpdir)
+
+
+def fake_entry_points(group: str) -> EntryPoints:
+    """Fake version of importlib.metadata.entry_points."""
+    return EntryPoints(group)
+
+
+def setup_fake_entrypoint(monkeypatch) -> None:
+    """Expose the dummy logitproc entrypoint for the current platform."""
+    if requires_spawn_multiprocessing():
+        register_fake_entrypoint(monkeypatch)
+        monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
+        return
+
+    import importlib.metadata
+
+    monkeypatch.setattr(importlib.metadata, "entry_points", fake_entry_points)
+    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")
diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py
index f08d9f684921..6bad1299b61e 100644
--- a/tests/v1/metrics/test_ray_metrics.py
+++ b/tests/v1/metrics/test_ray_metrics.py
@@ -1,13 +1,21 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from unittest.mock import MagicMock
+
 import pytest
 import ray
 
 from vllm.config.model import ModelDType
 from vllm.sampling_params import SamplingParams
 from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
-from vllm.v1.metrics.ray_wrappers import RayPrometheusMetric, RayPrometheusStatLogger
+from vllm.v1.metrics.ray_wrappers import (
+    RayCounterWrapper,
+    RayGaugeWrapper,
+    RayHistogramWrapper,
+    RayPrometheusMetric,
+    RayPrometheusStatLogger,
+)
 
 MODELS = [
     "distilbert/distilgpt2",
@@ -94,3 +102,148 @@ def test_sanitized_opentelemetry_name():
 
     # Test empty string
     assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == ""
+
+
+def _install_mock_metric(wrapper: RayPrometheusMetric) -> MagicMock:
+    """Swap the wrapper's underlying Ray metric for a MagicMock while
+    preserving the real metric's ``_tag_keys`` (labels() reads them to
+    validate arity)."""
+    real_metric = wrapper.metric
+    mock = MagicMock()
+    mock._tag_keys = real_metric._tag_keys
+    wrapper.metric = mock
+    return mock
+
+
+def test_ray_counter_labels_returns_independent_children():
+    """RayCounterWrapper.labels() must return distinct labeled children that
+    each carry their own tag set."""
+    base = RayCounterWrapper(
+        name="vllm_test_finish_reason",
+        documentation="",
+        labelnames=["reason"],
+    )
+
+    stop_child = base.labels("stop")
+    rep_child = base.labels("repetition")
+
+    assert stop_child is not rep_child
+    assert stop_child._tags["reason"] == "stop"
+    assert rep_child._tags["reason"] == "repetition"
+    # Mutating one child's tags must not leak into another.
+    stop_child._tags["reason"] = "mutated"
+    assert rep_child._tags["reason"] == "repetition"
+
+
+def test_ray_counter_inc_forwards_per_child_tags():
+    """.inc() on a labeled counter must forward that child's tags to the
+    underlying Ray metric (not rely on a shared set_default_tags)."""
+    wrapper = RayCounterWrapper(
+        name="vllm_test_counter_tag_forward",
+        documentation="",
+        labelnames=["reason"],
+    )
+    mock = _install_mock_metric(wrapper)
+
+    wrapper.labels("stop").inc()
+    wrapper.labels("repetition").inc(3)
+    wrapper.labels("stop").inc(0)  # zero increment must be a no-op.
+
+    # The zero-increment call should not reach the underlying metric.
+    assert mock.inc.call_count == 2
+    first, second = mock.inc.call_args_list
+    assert first.args == (1.0,)
+    assert first.kwargs["tags"]["reason"] == "stop"
+    assert second.args == (3,)
+    assert second.kwargs["tags"]["reason"] == "repetition"
+
+
+def test_ray_gauge_labels_returns_independent_children_and_forwards_tags():
+    wrapper = RayGaugeWrapper(
+        name="vllm_test_gauge_tag_forward",
+        documentation="",
+        labelnames=["kind"],
+    )
+    mock = _install_mock_metric(wrapper)
+
+    a = wrapper.labels("a")
+    b = wrapper.labels("b")
+    assert a is not b
+
+    a.set(1)
+    b.set(2)
+    assert mock.set.call_args_list[0].args == (1,)
+    assert mock.set.call_args_list[0].kwargs["tags"]["kind"] == "a"
+    assert mock.set.call_args_list[1].args == (2,)
+    assert mock.set.call_args_list[1].kwargs["tags"]["kind"] == "b"
+
+
+def test_ray_histogram_labels_returns_independent_children_and_forwards_tags():
+    wrapper = RayHistogramWrapper(
+        name="vllm_test_histogram_tag_forward",
+        documentation="",
+        labelnames=["bucket"],
+        buckets=[1.0, 2.0, 5.0],
+    )
+    mock = _install_mock_metric(wrapper)
+
+    x = wrapper.labels("x")
+    y = wrapper.labels("y")
+    assert x is not y
+
+    x.observe(0.5)
+    y.observe(4.0)
+    assert mock.observe.call_args_list[0].args == (0.5,)
+    assert mock.observe.call_args_list[0].kwargs["tags"]["bucket"] == "x"
+    assert mock.observe.call_args_list[1].args == (4.0,)
+    assert mock.observe.call_args_list[1].kwargs["tags"]["bucket"] == "y"
+
+
+def test_ray_counter_labels_accepts_non_string_label_values():
+    """RayPrometheusStatLogger passes ``str(idx)`` for engine indexes; this
+    covers the coercion path for any caller that passes a non-string label
+    value positionally."""
+    wrapper = RayCounterWrapper(
+        name="vllm_test_nonstr_label",
+        documentation="",
+        labelnames=["engine", "reason"],
+    )
+    child = wrapper.labels(0, "stop")
+    assert child._tags["engine"] == "0"
+    assert child._tags["reason"] == "stop"
+
+
+def test_ray_counter_labels_arity_validation():
+    wrapper = RayCounterWrapper(
+        name="vllm_test_arity",
+        documentation="",
+        labelnames=["a", "b"],
+    )
+    with pytest.raises(ValueError, match="Number of labels must match"):
+        wrapper.labels("only-one")
+
+
+def test_unlabeled_inc_carries_replica_id():
+    """Recording on an unlabeled metric must still pass ReplicaId — it's a
+    declared tag_key and Ray rejects updates that omit any declared key."""
+    wrapper = RayCounterWrapper(
+        name="vllm_test_unlabeled_replica_id",
+        documentation="",
+        labelnames=None,
+    )
+    mock = _install_mock_metric(wrapper)
+    wrapper.inc()
+    assert mock.inc.call_args.kwargs["tags"] == {"ReplicaId": ""}
+
+
+def test_double_labels_raises():
+    """labels() on an already-labeled child should raise, mirroring the
+    prometheus_client contract."""
+    wrapper = RayCounterWrapper(
+        name="vllm_test_double_labels",
+        documentation="",
+        labelnames=["reason"],
+    )
+    child = wrapper.labels("stop")
+    with pytest.raises(ValueError, match="already-labeled"):
+        child.labels("repetition")
diff --git a/tests/v1/metrics/test_stats.py b/tests/v1/metrics/test_stats.py
index d49874adc998..21f496ea4aea 100644
--- a/tests/v1/metrics/test_stats.py
+++ b/tests/v1/metrics/test_stats.py
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from vllm.v1.engine import FinishReason
-from vllm.v1.metrics.stats import IterationStats, PromptTokenStats, RequestStateStats
+from vllm.v1.metrics.stats import (
+    IterationStats,
+    PrefillStats,
+    PromptTokenStats,
+    RequestStateStats,
+)
 
 
 def test_iteration_stats_repr():
@@ -21,6 +26,7 @@ def test_prefill_kv_computed_with_cache():
     # Case 1: With prefix cache (1200 tokens cached)
     iteration_stats.update_from_finished_request(
         finish_reason=FinishReason.STOP,
+        request_id="test-req-001",
         num_prompt_tokens=10000,
         max_tokens_param=100,
         req_stats=req_stats,
@@ -30,6 +36,7 @@ def test_prefill_kv_computed_with_cache():
     finished_req = iteration_stats.finished_requests[0]
     assert finished_req.num_prompt_tokens == 10000
     assert finished_req.num_cached_tokens == 1200
+    assert finished_req.request_id == "test-req-001"
 
     # Verify calculation: prefill KV = prompt tokens - cached tokens
     prefill_kv_computed = finished_req.num_prompt_tokens - max(
@@ -50,6 +57,7 @@ def test_prefill_kv_computed_no_cache():
     # Case 2: No prefix cache
     iteration_stats.update_from_finished_request(
         finish_reason=FinishReason.STOP,
+        request_id="test-req-002",
         num_prompt_tokens=2000,
         max_tokens_param=100,
         req_stats=req_stats,
@@ -59,6 +67,7 @@ def test_prefill_kv_computed_no_cache():
     finished_req = iteration_stats.finished_requests[0]
     assert finished_req.num_prompt_tokens == 2000
     assert finished_req.num_cached_tokens == 0
+    assert finished_req.request_id == "test-req-002"
 
     # Verify calculation: prefill KV = full prompt when no cache
     prefill_kv_computed = finished_req.num_prompt_tokens - max(
@@ -79,6 +88,7 @@ def test_prefill_kv_computed_edge_cases():
     # Case 3: Negative num_cached_tokens (shouldn't happen, but handle gracefully)
     iteration_stats.update_from_finished_request(
         finish_reason=FinishReason.STOP,
+        request_id="test-req-003",
         num_prompt_tokens=100,
         max_tokens_param=10,
         req_stats=req_stats,
@@ -91,11 +101,13 @@ def test_prefill_kv_computed_edge_cases():
         finished_req.num_cached_tokens, 0
     )
     assert prefill_kv_computed == 100  # Should treat negative as 0
+    assert finished_req.request_id == "test-req-003"
 
     # Case 4: All tokens cached (shouldn't happen in practice)
     iteration_stats2 = IterationStats()
     iteration_stats2.update_from_finished_request(
         finish_reason=FinishReason.STOP,
+        request_id="test-req-004",
         num_prompt_tokens=100,
         max_tokens_param=10,
         req_stats=req_stats,
@@ -107,6 +119,7 @@ def test_prefill_kv_computed_edge_cases():
         finished_req2.num_cached_tokens, 0
     )
     assert prefill_kv_computed2 == 0  # All cached, nothing computed
+    assert finished_req2.request_id == "test-req-004"
 
 
 def test_prompt_token_stats_all_computed():
@@ -114,15 +127,18 @@ def test_prompt_token_stats_all_computed():
     stats = PromptTokenStats()
 
     # Case 1: No caching (All tokens computed locally)
-    stats.update_from_output(
-        num_cached_tokens=0,
-        num_external_computed_tokens=0,
-        prompt_len=1000,
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=0,
+        num_external_cached_tokens=0,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 1000
     assert stats.local_cache_hit == 0
     assert stats.external_kv_transfer == 0
+    assert stats.cached_tokens == 0
     assert stats.total == 1000
 
 
@@ -131,15 +147,19 @@ def test_prompt_token_stats_partial_local_cache():
     stats = PromptTokenStats()
 
     # Case 2: Partial local cache
-    stats.update_from_output(
-        num_cached_tokens=300,
-        num_external_computed_tokens=0,
-        prompt_len=1000,
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=300,
+        num_external_cached_tokens=0,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 700
     assert stats.local_cache_hit == 300
     assert stats.external_kv_transfer == 0
+    assert stats.cached_tokens == 300
+    assert stats.total == 1000
 
 
 def test_prompt_token_stats_partial_external_transfer():
@@ -147,15 +167,19 @@ def test_prompt_token_stats_partial_external_transfer():
     stats = PromptTokenStats()
 
     # Case 3: Partial external transfer
-    stats.update_from_output(
-        num_cached_tokens=500,
-        num_external_computed_tokens=500,
-        prompt_len=1000,
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=0,
+        num_external_cached_tokens=500,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 500
     assert stats.local_cache_hit == 0
     assert stats.external_kv_transfer == 500
+    assert stats.cached_tokens == 500
+    assert stats.total == 1000
 
 
 def test_prompt_token_stats_mixed_sources():
@@ -163,49 +187,60 @@ def test_prompt_token_stats_mixed_sources():
     stats = PromptTokenStats()
 
     # Case 4: Mixed sources
-    stats.update_from_output(
-        num_cached_tokens=600,
-        num_external_computed_tokens=200,
-        prompt_len=1000,
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=400,
+        num_external_cached_tokens=200,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 400
     assert stats.local_cache_hit == 400
     assert stats.external_kv_transfer == 200
+    assert stats.cached_tokens == 600
+    assert stats.total == 1000
 
 
 def test_prompt_token_stats_full_local_cache_recompute():
     """Test full local cache triggers last token recomputation.
 
-    When all tokens are cached, the scheduler reduces num_cached_tokens by 1
-    to force the model to recompute the last token.
+    When all tokens are cached, the scheduler forces the model to recompute
+    the last token (num_computed_tokens=1), with the rest from cache.
     """
     stats = PromptTokenStats()
 
-    # Case 5: Full local cache (999 cached after reduction, 1 recomputed)
-    stats.update_from_output(
-        num_cached_tokens=999,
-        num_external_computed_tokens=0,
-        prompt_len=1000,
+    # Case 5: Full local cache (999 cached, 1 recomputed)
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=999,
+        num_external_cached_tokens=0,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 1
-    assert stats.local_cache_hit == 1000
-    assert stats.recomputed_tokens == 1
+    assert stats.local_cache_hit == 999
+    assert stats.external_kv_transfer == 0
+    assert stats.cached_tokens == 999
+    assert stats.total == 1000
 
 
 def test_prompt_token_stats_full_external_transfer_recompute():
     """Test full external transfer triggers last token recomputation."""
     stats = PromptTokenStats()
 
-    # Case 6: Full external transfer (999 cached after reduction, 1 recomputed)
-    stats.update_from_output(
-        num_cached_tokens=999,
-        num_external_computed_tokens=1000,
-        prompt_len=1000,
+    # Case 6: Full external transfer (999 from external, 1 recomputed)
+    prefill_stats = PrefillStats()
+    prefill_stats.set(
+        num_prompt_tokens=1000,
+        num_local_cached_tokens=0,
+        num_external_cached_tokens=999,
     )
+    stats.update_from_output(prefill_stats)
 
     assert stats.computed == 1
     assert stats.local_cache_hit == 0
-    assert stats.external_kv_transfer == 1000
-    assert stats.recomputed_tokens == 1
+    assert stats.external_kv_transfer == 999
+    assert stats.cached_tokens == 999
+    assert stats.total == 1000
diff --git a/tests/v1/sample/test_batched_count_greater_than.py b/tests/v1/sample/test_batched_count_greater_than.py
new file mode 100644
index 000000000000..c9ace93c6961
--- /dev/null
+++ b/tests/v1/sample/test_batched_count_greater_than.py
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Test that batched_count_greater_than does not trigger 0/1 specialization
+recompiles when batch_size varies."""
+
+import torch
+
+from vllm.platforms import current_platform
+from vllm.v1.sample.ops.logprobs import batched_count_greater_than
+from vllm.v1.sample.sampler import Sampler
+
+DEVICE = current_platform.device_type
+
+
+def test_batched_count_greater_than_correctness():
+    """Basic correctness: counts elements >= the corresponding value."""
+    x = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], device=DEVICE)
+    values = torch.tensor([[2.0], [5.0]], device=DEVICE)
+    result = batched_count_greater_than(x, values)
+    expected = torch.tensor([2, 2], device=DEVICE)
+    torch.testing.assert_close(result, expected)
+
+
+def test_gather_logprobs_no_recompile():
+    """Sampler.gather_logprobs with batch_size=1 then 2 must not recompile.
+
+    This guards against 0/1 specialization: dynamo normally specializes on
+    tensor sizes 0 and 1, causing a recompile when the size first exceeds 1.
+    The mark_unbacked calls in gather_logprobs prevent this.
+    """
+    torch._dynamo.reset()
+
+    compile_count = 0
+    orig_backend = current_platform.simple_compile_backend
+
+    def counting_backend(gm, example_inputs):
+        nonlocal compile_count
+        compile_count += 1
+        if orig_backend == "inductor":
+            return torch._inductor.compile(gm, example_inputs)
+        return gm
+
+    # Monkey-patch batched_count_greater_than with our counting backend
+    # so we can detect recompiles through the production code path.
+    import vllm.v1.sample.ops.logprobs as logprobs_module
+    import vllm.v1.sample.sampler as sampler_module
+
+    unwrapped = batched_count_greater_than._torchdynamo_orig_callable
+    patched = torch.compile(unwrapped, backend=counting_backend)
+    orig_fn = logprobs_module.batched_count_greater_than
+
+    logprobs_module.batched_count_greater_than = patched
+    sampler_module.batched_count_greater_than = patched
+
+    try:
+        vocab_size = 32
+        num_logprobs = 3
+
+        # Call 1: batch_size=1
+        logprobs1 = torch.randn(1, vocab_size, device=DEVICE)
+        token_ids1 = torch.randint(
+            0, vocab_size, (1,), device=DEVICE, dtype=torch.int64
+        )
+        Sampler.gather_logprobs(logprobs1, num_logprobs, token_ids1)
+        assert compile_count == 1, f"Expected 1 compile, got {compile_count}"
+
+        # Call 2: batch_size=2 — should NOT recompile
+        logprobs2 = torch.randn(2, vocab_size, device=DEVICE)
+        token_ids2 = torch.randint(
+            0, vocab_size, (2,), device=DEVICE, dtype=torch.int64
+        )
+        Sampler.gather_logprobs(logprobs2, num_logprobs, token_ids2)
+        assert compile_count == 1, (
+            f"Recompiled on batch_size 1->2 (0/1 specialization). "
+            f"Expected 1 compile, got {compile_count}"
+        )
+
+        # Call 3: batch_size=8 — should NOT recompile
+        logprobs3 = torch.randn(8, vocab_size, device=DEVICE)
+        token_ids3 = torch.randint(
+            0, vocab_size, (8,), device=DEVICE, dtype=torch.int64
+        )
+        Sampler.gather_logprobs(logprobs3, num_logprobs, token_ids3)
+        assert compile_count == 1, (
+            f"Recompiled on batch_size change. Expected 1 compile, got {compile_count}"
+        )
+    finally:
+        # Restore original function
+        logprobs_module.batched_count_greater_than = orig_fn
+        sampler_module.batched_count_greater_than = orig_fn
+        torch._dynamo.reset()
diff --git a/tests/v1/sample/test_logprobs.py b/tests/v1/sample/test_logprobs.py
index d029a6ce065c..460e0d685649 100644
--- a/tests/v1/sample/test_logprobs.py
+++ b/tests/v1/sample/test_logprobs.py
@@ -33,11 +33,10 @@
 SAMPLE_PROMPT = BatchLogprobsComposition.SAMPLE_PROMPT
 
 # On ROCm, floating-point reductions in attention and GEMM kernels are
-# non-associative and sensitive to batch geometry. The ref LLM (no spec
-# decode, default scheduling) and the spec-decode LLM (chunked prefill,
-# different effective batch sizes) follow different reduction orders,
-# producing numerically divergent logprobs that get misattributed to
-# spec-decode incorrectness.
+# non-associative and sensitive to batch geometry. If the ref LLM and
+# spec-decode LLM use different scheduling or batch geometry, they can
+# follow different reduction orders and produce numerically divergent
+# logprobs that get misattributed to spec-decode incorrectness.
 #
 # Force LLM instances into an identical, deterministic execution
 # mode so the test isolates spec-decode correctness only:
@@ -539,6 +538,10 @@ class TestCorrectDecodedToken:
     result in the Unicode replacement character "�" (U+FFFD). This commonly
     happens with byte-fallback tokenization when multi-byte UTF-8 characters
     are split across tokens.
+
+    The method signature is _correct_decoded_token(token_id, context_token_ids)
+    where token_id is the single token to correct and context_token_ids are
+    the preceding sampled tokens in sequential order.
     """
 
     @pytest.fixture
@@ -550,8 +553,8 @@ def mock_tokenizer(self):
         return tokenizer
 
     @pytest.fixture
-    def processor_with_empty_logprobs(self, mock_tokenizer):
-        """Create a LogprobsProcessor with empty logprobs."""
+    def processor(self, mock_tokenizer):
+        """Create a LogprobsProcessor."""
         from vllm.v1.engine.logprobs import LogprobsProcessor
 
         processor = LogprobsProcessor(
@@ -564,209 +567,191 @@ def processor_with_empty_logprobs(self, mock_tokenizer):
         )
         return processor
 
-    @pytest.fixture
-    def processor_with_previous_logprobs(self, mock_tokenizer):
-        """Create a LogprobsProcessor with previous logprobs."""
-        from vllm.v1.engine.logprobs import LogprobsProcessor
+    def test_correction_with_context(self, processor):
+        """Test correction using context from preceding sampled tokens.
 
-        processor = LogprobsProcessor(
-            tokenizer=mock_tokenizer,
-            logprobs=[{123: None}],  # Previous token ID is 123
-            prompt_logprobs=None,
-            cumulative_logprob=0.0,
-            num_logprobs=1,
-            num_prompt_logprobs=None,
-        )
-        return processor
+        Scenario: A byte-fallback token that completes a multi-byte
+        UTF-8 sequence when decoded with context.
+        """
 
-    def test_correction_with_previous_token_in_list(
-        self, processor_with_empty_logprobs
-    ):
-        """Test correction using previous token in the same list.
+        # Context is [101] (a preceding sampled token)
+        # Token 102 individually decodes to "�"
+        # decode([101, 102]) returns "valid" (complete sequence)
+        def mock_decode(ids):
+            if ids == [101, 102]:
+                return "hello valid"
+            if ids == [101]:
+                return "hello "
+            return "�"
 
-        Scenario: Token at idx=1 ends with "�", but when decoded with
-        the previous token (idx=0), it forms a valid UTF-8 sequence.
-        Example: token[0]="�", token[1]="�" -> together form "polarized"
-        """
-        processor = processor_with_empty_logprobs
-        tokens = [100, 101, 102]  # token IDs
-
-        # Mock tokenizer behavior:
-        # - decode([102]) returns "�" (ends with replacement char)
-        # - decode([101, 102]) returns "valid" (no replacement char)
-        processor.tokenizer.decode.side_effect = lambda ids: (
-            "valid" if ids == [101, 102] else "�"
-        )
+        processor.tokenizer.decode.side_effect = mock_decode
 
-        result = processor._correct_decoded_token(2, tokens)
+        result = processor._correct_decoded_token(102, [101])
         assert result == "valid"
-        processor.tokenizer.decode.assert_called_with([101, 102])
 
-    def test_correction_with_previous_logprob_token(
-        self, processor_with_previous_logprobs
-    ):
-        """Test correction using previous logprob token.
+    def test_correction_with_context_from_logprobs(self, processor):
+        """Test correction using context from previous logprob entries.
 
-        Scenario: Cannot correct with previous token in list (idx=0),
-        but can correct with previous logprob token.
+        Scenario: Token decoded with context from previously sampled
+        tokens completes a UTF-8 sequence.
         """
-        processor = processor_with_previous_logprobs
-        tokens = [100]  # single token
 
-        # Mock tokenizer behavior:
-        # - decode([100]) returns "�" (ends with replacement char)
-        # - decode([123, 100]) returns " "polarized" (no replacement char)
-        # Token 123 is from previous logprobs
+        # Token 123 was previously sampled (in context)
         def mock_decode(ids):
             if ids == [123, 100]:
-                return ' "polarized"'
+                return 'hello "polarized"'
+            if ids == [123]:
+                return "hello "
             return "�"
 
         processor.tokenizer.decode.side_effect = mock_decode
 
-        result = processor._correct_decoded_token(0, tokens)
-        assert result == ' "polarized"'
+        result = processor._correct_decoded_token(100, [123])
+        assert result == '"polarized"'
 
-    def test_correction_at_idx_zero_no_previous_logprobs(
-        self, processor_with_empty_logprobs
-    ):
-        """Test correction at idx=0 with no previous logprobs.
+    def test_correction_no_context(self, processor):
+        """Test correction with no context available.
 
-        Scenario: First token in list, no previous logprobs available.
         Should return empty string as fallback.
         """
-        processor = processor_with_empty_logprobs
-        tokens = [100]
-
-        # Mock tokenizer always returns "�"
         processor.tokenizer.decode.return_value = "�"
 
-        result = processor._correct_decoded_token(0, tokens)
+        result = processor._correct_decoded_token(100, [])
         assert result == ""
 
-    def test_correction_at_idx_zero_with_previous_logprobs(
-        self, processor_with_previous_logprobs
-    ):
-        """Test correction at idx=0 with previous logprobs available.
+    def test_correction_with_context_succeeds(self, processor):
+        """Test correction with context from previously sampled tokens."""
 
-        Scenario: First token in list, but previous logprobs exist.
-        Should try correction with previous logprob token.
-        """
-        processor = processor_with_previous_logprobs
-        tokens = [200]
-
-        # Mock tokenizer behavior
         def mock_decode(ids):
             if ids == [123, 200]:
-                return "corrected"
+                return "hello corrected"
+            if ids == [123]:
+                return "hello "
             return "�"
 
         processor.tokenizer.decode.side_effect = mock_decode
 
-        result = processor._correct_decoded_token(0, tokens)
+        result = processor._correct_decoded_token(200, [123])
         assert result == "corrected"
 
-    def test_no_correction_needed_returns_fallback(
-        self, processor_with_previous_logprobs
-    ):
-        """Test fallback to empty string when no correction works.
-
-        Scenario: All correction attempts still end with "�".
-        Should return empty string as final fallback.
-        """
-        processor = processor_with_previous_logprobs
-        tokens = [100, 101, 102]
-
-        # Mock tokenizer always returns text ending with "�"
+    def test_fallback_when_all_attempts_fail(self, processor):
+        """Test fallback to empty string when no correction works."""
         processor.tokenizer.decode.return_value = "still�"
 
-        result = processor._correct_decoded_token(2, tokens)
+        result = processor._correct_decoded_token(102, [100, 101])
         assert result == ""
 
-    def test_middle_token_correction(self, processor_with_previous_logprobs):
-        """Test correction for a token in the middle of the list.
+    def test_increasing_context_window(self, processor):
+        """Test that increasing context window finds the correction.
 
-        Scenario: Token at idx=5 in a longer list needs correction.
+        Scenario: 3-byte UTF-8 char. With 1 context token, still
+        incomplete. With 2 context tokens, completes the sequence.
         """
-        processor = processor_with_previous_logprobs
-        tokens = [10, 20, 30, 40, 50, 60, 70, 80]
 
-        # Mock tokenizer behavior for middle token
         def mock_decode(ids):
-            if ids == [50, 60]:
-                return "olar"
+            # 1 context token: still incomplete
+            if ids == [81, 82]:
+                return "�"
+            # 2 context tokens: complete
+            if ids == [80, 81, 82]:
+                return "\u201c"
+            # Context-only decodes
+            if ids == [81]:
+                return "�"
+            if ids == [80, 81]:
+                return "�"
             return "�"
 
         processor.tokenizer.decode.side_effect = mock_decode
 
-        result = processor._correct_decoded_token(5, tokens)
-        assert result == "olar"
+        # Context has 2 preceding tokens [80, 81]
+        result = processor._correct_decoded_token(82, [80, 81])
+        assert result == "\u201c"
 
-    def test_multiple_consecutive_replacement_chars(
-        self, processor_with_previous_logprobs
-    ):
+    def test_multiple_consecutive_replacement_chars(self, processor):
         """Test handling of multiple consecutive replacement characters.
 
-        Scenario: Sequence like ["�", "�", "p"] where first two should
-        become empty strings.
+        Scenario: Multi-byte sequence where intermediate bytes return
+        empty string and the final byte returns the complete character.
         """
-        processor = processor_with_previous_logprobs
-
-        # Test first replacement char
-        tokens = [100, 101, 102]
         processor.tokenizer.decode.return_value = "still�"
-        result1 = processor._correct_decoded_token(0, tokens)
+
+        # First byte with no useful context: returns ""
+        result1 = processor._correct_decoded_token(100, [50])
         assert result1 == ""
 
-        # Test second replacement char
-        result2 = processor._correct_decoded_token(1, tokens)
+        # Second byte with same context: still returns ""
+        result2 = processor._correct_decoded_token(101, [50])
         assert result2 == ""
 
-    def test_correction_with_multibyte_utf8(self, processor_with_previous_logprobs):
+    def test_correction_with_multibyte_utf8(self, processor):
         """Test correction involving multi-byte UTF-8 characters.
 
-        Scenario: Byte-fallback tokenization splits multi-byte UTF-8
-        characters (e.g., curly quotes, Chinese characters, emojis).
-        Example from user: "�", "�" -> "", "\""
+        Scenario: Byte-fallback tokenization splits curly quotes.
+        The last byte token should produce the complete character.
         """
-        processor = processor_with_previous_logprobs
-        tokens = [200, 201]
 
-        # Mock tokenizer behavior for multi-byte UTF-8 correction
         def mock_decode(ids):
-            # When decoding first token (idx=0) with previous logprob token
+            # Context [123] + first byte: completes to left curly quote
             if ids == [123, 200]:
-                return ' "'  # Space + left curly quote
-            # When decoding second token (idx=1) with previous token in list
-            elif ids == [200, 201]:
-                return '"'  # Right curly quote
-            # When decoding second token (idx=1) with previous logprob + prev token
-            elif ids == [123, 200, 201]:
-                return ' ""'  # Full sequence
-            return "�"
+                return "hello \u201c"
+            if ids == [123]:
+                return "hello "
+            # Context [123] + second byte: completes to right curly quote
+            if ids == [123, 201]:
+                return "hello \u201d"
+            return "\ufffd"
 
         processor.tokenizer.decode.side_effect = mock_decode
 
-        # First token correction (idx=0)
-        # Will call decode([123, 200]) since idx=0 uses previous logprob token
-        result1 = processor._correct_decoded_token(0, tokens)
-        assert result1 == ' "'
+        # Each top-k token is corrected independently with same context
+        result1 = processor._correct_decoded_token(200, [123])
+        assert result1 == "\u201c"
+
+        result2 = processor._correct_decoded_token(201, [123])
+        assert result2 == "\u201d"
 
-        # Second token correction (idx=1)
-        # Will call decode([200, 201]) since idx>0 uses previous token in list
-        result2 = processor._correct_decoded_token(1, tokens)
-        assert result2 == '"'
+    def test_topk_tokens_corrected_independently(self, processor):
+        """Test that top-k alternatives at the same position are each
+        corrected independently using only sequential context, not
+        each other.
+
+        This is the core fix for issue #27300: when logprobs > 0,
+        alternative tokens must not be combined with each other.
+        """
+        # Context: previously sampled token 50
+        context = [50]
+
+        def mock_decode(ids):
+            # Token 100 (sampled) with context
+            if ids == [50, 100]:
+                return "prefix \u201c"
+            # Token 200 (top-k alternative) with context
+            if ids == [50, 200]:
+                return "prefix \u2014"
+            # Context alone
+            if ids == [50]:
+                return "prefix "
+            return "\ufffd"
+
+        processor.tokenizer.decode.side_effect = mock_decode
+
+        # Both tokens at the same position use the SAME context [50]
+        result_sampled = processor._correct_decoded_token(100, context)
+        assert result_sampled == "\u201c"
+
+        result_alt = processor._correct_decoded_token(200, context)
+        assert result_alt == "\u2014"
 
     def test_real_world_opt125m_scenario(self, mock_tokenizer):
-        """Test the real-world scenario from user's example.
+        """Test the real-world scenario from the bug report.
 
-        User's example with facebook/opt-125m:
-        Before: [" the", " term", " �", "�", "p", "olar", "ized", "�", "�", ...]
-        After: [" the", " term", "", " "", "p", "olar", "ized", "", "\"", ...]
+        Simulates the OPT-125m sequence where curly quotes are split
+        into byte-fallback tokens. Each token is corrected using only
+        the preceding sampled tokens as context.
         """
         from vllm.v1.engine.logprobs import LogprobsProcessor
 
-        # Simulate the sequence of tokens
         processor = LogprobsProcessor(
             tokenizer=mock_tokenizer,
             logprobs=[],
@@ -776,47 +761,106 @@ def test_real_world_opt125m_scenario(self, mock_tokenizer):
             num_prompt_logprobs=None,
         )
 
-        # Token IDs representing the problematic sequence
-        tokens = [1, 2, 3, 4, 5, 6, 7, 8, 9]  # placeholder IDs
-
-        # Mock decode behavior simulating the real scenario
+        # Simulating: byte tokens 3, 4 form left curly quote "\u201c"
+        # byte tokens 8, 9 form right curly quote "\u201d"
         def mock_decode(ids):
-            # Simulate cases where individual tokens decode to "�"
-            # but combinations decode correctly
-            if len(ids) == 1:
-                if ids[0] in (3, 4, 8, 9):
-                    return "�"
-            elif len(ids) == 2:
-                if ids == [2, 3]:
-                    return " term�"  # Still ends with �, need more context
-                elif ids == [3, 4]:
-                    return ' "'  # Corrected to space + left curly quote
-                elif ids == [7, 8]:
-                    return "ized�"  # Still ends with �
-                elif ids == [8, 9]:
-                    return '"'  # Corrected to right curly quote
-            elif len(ids) == 3:
-                if ids == [1, 2, 3]:
-                    return " the term�"  # Still ends with issue
-                elif ids == [2, 3, 4]:
-                    return ' term "'  # With all context
+            # Context decodes
+            if ids == [2]:
+                return " term"
+            if ids == [1, 2]:
+                return " the term"
+            if ids == [3]:
+                return "\ufffd"
+            if ids == [2, 3]:
+                return " term\ufffd"
+            if ids == [1, 2, 3]:
+                return " the term\ufffd"
+            # Token 4 with context [2, 3] -> completes left curly quote
+            if ids == [3, 4]:
+                return "\u201c"
+            if ids == [2, 3, 4]:
+                return " term\u201c"
+            # Context for right curly quote
+            if ids == [7]:
+                return "ized"
+            if ids == [7, 8]:
+                return "ized\ufffd"
+            if ids == [8, 9]:
+                return "\u201d"
+            if ids == [7, 8, 9]:
+                return "ized\u201d"
             return "normal_text"
 
         mock_tokenizer.decode.side_effect = mock_decode
 
-        # Test token at index 2 (should fail to correct, return "")
-        # Token 3 individually is "�"
-        # decode([2, 3]) = " term�" (still ends with �)
-        # No previous logprobs, so fallback to ""
-        result = processor._correct_decoded_token(2, tokens)
+        # First byte (token 3) of left curly quote with no context
+        result = processor._correct_decoded_token(3, [])
         assert result == ""
 
-        # Test token at index 3 (should correct to " "")
-        # Token 4 individually is "�"
-        # decode([3, 4]) = " "" (corrected!)
-        processor.logprobs = [{2: None}]  # Add previous logprob
-        result = processor._correct_decoded_token(3, tokens)
-        assert result == ' "'
+        # First byte (token 3) with context [2] -> still incomplete
+        result = processor._correct_decoded_token(3, [2])
+        assert result == ""
+
+        # Second byte (token 4) of left curly quote with context [2, 3]
+        # Token 3 is byte-fallback, so clean context is [2] only.
+        # decode([2, 3, 4]) = " term\u201c", decode([2]) = " term"
+        # result = "\u201c"
+        result = processor._correct_decoded_token(4, [2, 3])
+        assert result == "\u201c"
+
+        # Second byte (token 9) of right curly quote with context [7, 8]
+        result = processor._correct_decoded_token(9, [7, 8])
+        assert result == "\u201d"
+
+    def test_byte_fallback_context_preserves_space(self, mock_tokenizer):
+        """Test that text from byte-fallback context tokens is preserved.
+
+        In OPT-125m, token 44 = space + 2 bytes of curly quote.
+        When token 44 returns "" (incomplete), the space it carried
+        must be attributed to the completing token (48).
+        """
+        from vllm.v1.engine.logprobs import LogprobsProcessor
+
+        processor = LogprobsProcessor(
+            tokenizer=mock_tokenizer,
+            logprobs=[],
+            prompt_logprobs=None,
+            cumulative_logprob=0.0,
+            num_logprobs=1,
+            num_prompt_logprobs=None,
+        )
+
+        def mock_decode(ids):
+            # Token 44 = space + 2 bytes (like OPT-125m's \u0120\u00e2\u0080)
+            if ids == [44]:
+                return " \ufffd"
+            if ids == [48]:
+                return "\ufffd"
+            # Together they form: space + left curly quote
+            if ids == [44, 48]:
+                return " \u201c"
+            # With preceding clean context
+            if ids == [1385]:
+                return " term"
+            if ids == [1385, 44]:
+                return " term \ufffd"
+            if ids == [1385, 44, 48]:
+                return " term \u201c"
+            return "\ufffd"
+
+        mock_tokenizer.decode.side_effect = mock_decode
+
+        # Token 44 with context [1385] -> still ends with replacement
+        result = processor._correct_decoded_token(44, [1385])
+        assert result == ""
+
+        # Token 48 with context [1385, 44]:
+        # Token 44 is byte-fallback, so clean context is [1385].
+        # decode([1385, 44, 48]) = " term \u201c"
+        # decode([1385]) = " term"
+        # result = " \u201c" (space preserved from token 44!)
+        result = processor._correct_decoded_token(48, [1385, 44])
+        assert result == " \u201c"
 
 
 def test_verify_tokens_integration():
@@ -1041,18 +1085,25 @@ def test_spec_decode_logprobs(
     )
 
     max_model_len = 256
-
-    # Run base LLM.
-    ref_llm = LLM(
-        model=model_name,
+    llm_kwargs = dict(
         max_logprobs=5,
         max_model_len=max_model_len,
         seed=42,
         logprobs_mode=logprobs_mode,
         gpu_memory_utilization=0.4,
+        # Force the same prefill chunking for both the base model and
+        # spec decode model so the comparison isolates spec decode.
+        enable_chunked_prefill=True,
+        max_num_batched_tokens=32,
         enable_prefix_caching=False,
         **ROCM_DETERMINISM_KWARGS,
     )
+
+    # Run base LLM.
+    ref_llm = LLM(
+        model=model_name,
+        **llm_kwargs,
+    )
     ref_results = ref_llm.generate(
         [prompt, prompt], [sampling_params, penalty_sampling_params]
     )
@@ -1072,16 +1123,7 @@ def test_spec_decode_logprobs(
     spec_llm = LLM(
         model_name,
         speculative_config=spec_config_with_len,
-        max_logprobs=5,
-        max_model_len=max_model_len,
-        seed=42,
-        logprobs_mode=logprobs_mode,
-        gpu_memory_utilization=0.4,
-        # Force prefill chunking
-        enable_chunked_prefill=True,
-        max_num_batched_tokens=32,
-        enable_prefix_caching=False,
-        **ROCM_DETERMINISM_KWARGS,
+        **llm_kwargs,
     )
     spec_results = spec_llm.generate(
         [prompt, prompt], [sampling_params, penalty_sampling_params]
diff --git a/tests/v1/sample/test_rejection_sampler.py b/tests/v1/sample/test_rejection_sampler.py
index 552a27fe22d6..ae0cbeab53b2 100644
--- a/tests/v1/sample/test_rejection_sampler.py
+++ b/tests/v1/sample/test_rejection_sampler.py
@@ -19,7 +19,7 @@
 from vllm.v1.sample.sampler import Sampler, SamplerOutput
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 
-DEVICE = current_platform.device_type
+DEVICE_TYPE = current_platform.device_type
 
 
 @pytest.fixture
@@ -57,7 +57,7 @@ def create_logits_tensor(
     will produce desired token ids on argmax"""
     token_ids = [tokens[:-1] for tokens in output_token_ids]
     num_total_tokens = sum(len(tokens) for tokens in token_ids)
-    logits = torch.full((num_total_tokens, vocab_size), -100.0, device=DEVICE)
+    logits = torch.full((num_total_tokens, vocab_size), -100.0, device=DEVICE_TYPE)
     start_loc = 0
     for tokens in token_ids:
         for j, token_id in enumerate(tokens):
@@ -99,9 +99,9 @@ def create_sampling_metadata(
         assert output_token_ids
         assert len(output_token_ids) > 0
 
-        frequency_penalties = torch.tensor(frequency_penalties, device=DEVICE)
-        presence_penalties = torch.tensor(presence_penalties, device=DEVICE)
-        repetition_penalties = torch.tensor(repetition_penalties, device=DEVICE)
+        frequency_penalties = torch.tensor(frequency_penalties, device=DEVICE_TYPE)
+        presence_penalties = torch.tensor(presence_penalties, device=DEVICE_TYPE)
+        repetition_penalties = torch.tensor(repetition_penalties, device=DEVICE_TYPE)
     else:
         no_penalties = True
         frequency_penalties = torch.tensor([])
@@ -320,14 +320,27 @@ def test_deterministic_when_seeded(
     n_rep: int,
 ):
     num_tokens = batch_size * k
-    draft_probs = torch.rand(num_tokens, vocab_size, dtype=torch.float32, device=DEVICE)
+    draft_probs = torch.rand(
+        num_tokens,
+        vocab_size,
+        dtype=torch.float32,
+        device=DEVICE_TYPE,
+    )
     draft_probs = F.softmax(draft_probs, dim=-1)
     target_logits = torch.rand_like(draft_probs)
     bonus_token_ids = torch.randint(
-        low=0, high=vocab_size, size=(batch_size, 1), dtype=torch.int64, device=DEVICE
+        low=0,
+        high=vocab_size,
+        size=(batch_size, 1),
+        dtype=torch.int64,
+        device=DEVICE_TYPE,
     )
     draft_token_ids = torch.randint(
-        low=0, high=vocab_size, size=(batch_size, k), dtype=torch.int64, device=DEVICE
+        low=0,
+        high=vocab_size,
+        size=(batch_size, k),
+        dtype=torch.int64,
+        device=DEVICE_TYPE,
     )
 
     seeded_mask = torch.rand(batch_size, dtype=torch.float32) <= frac_seeded
@@ -335,12 +348,12 @@ def test_deterministic_when_seeded(
     results = []
     for _ in range(n_rep):
         seeded_seqs = {
-            i: torch.Generator(device=DEVICE).manual_seed(i)
+            i: torch.Generator(device=DEVICE_TYPE).manual_seed(i)
             for i in range(batch_size)
             if seeded_mask[i]
         }
 
-        temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE)
+        temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE_TYPE)
         sampling_metadata = create_sampling_metadata(
             all_greedy=False, temperature=temperature, generators=seeded_seqs
         )
@@ -387,7 +400,7 @@ def test_rejection_sampling_approximates_target_distribution():
     much more than the distance improvement between the observed
     distribution and the random distribution.
     """
-    torch.set_default_device(DEVICE)
+    torch.set_default_device(DEVICE_TYPE)
     vocab_size = 10
     k = 2
     num_reference_probs = 100
@@ -410,7 +423,7 @@ def test_rejection_sampling_approximates_target_distribution():
         rej_sample_probs = estimate_rejection_sampling_pdf(
             draft_probs, target_logits, k, vocab_size, num_samples
         )
-        rej_sample_probs = rej_sample_probs.to(DEVICE)
+        rej_sample_probs = rej_sample_probs.to(DEVICE_TYPE)
 
         # Average distance from reference probs.
         reference_vs_rejsample_dist = (
@@ -491,11 +504,11 @@ def estimate_rejection_sampling_pdf(
     draft_probs = draft_probs.view(num_tokens, vocab_size)
 
     # Bonus tokens not used but required.
-    bonus_token_ids = torch.zeros((1, 1), dtype=torch.int64, device=DEVICE).repeat(
+    bonus_token_ids = torch.zeros((1, 1), dtype=torch.int64, device=DEVICE_TYPE).repeat(
         num_samples, 1
     )
 
-    temperature = torch.ones(num_samples, dtype=torch.float32, device=DEVICE)
+    temperature = torch.ones(num_samples, dtype=torch.float32, device=DEVICE_TYPE)
     sampling_metadata = create_sampling_metadata(
         all_greedy=False, temperature=temperature
     )
@@ -600,7 +613,7 @@ def _test_masked_logits(
 
     # Create random draft probabilities.
     draft_probs = torch.rand(
-        (num_tokens, vocab_size), dtype=torch.float32, device=DEVICE
+        (num_tokens, vocab_size), dtype=torch.float32, device=DEVICE_TYPE
     )
     draft_probs = F.softmax(draft_probs, dim=-1)
 
@@ -610,7 +623,11 @@ def _test_masked_logits(
     draft_token_ids = draft_token_ids.tolist()
 
     # Bonus tokens not used but required
-    bonus_token_ids = torch.zeros((batch_size, 1), dtype=torch.int64, device=DEVICE)
+    bonus_token_ids = torch.zeros(
+        (batch_size, 1),
+        dtype=torch.int64,
+        device=DEVICE_TYPE,
+    )
 
     # Create spec decode metadata
     spec_decode_metadata = create_spec_decode_metadata(draft_token_ids, target_logits)
@@ -645,12 +662,13 @@ def test_top_k(rejection_sampler, top_k):
 
     # Randomly create top-k indices.
     top_k_indices = [
-        torch.randperm(vocab_size, device=DEVICE)[:top_k] for _ in range(num_tokens)
+        torch.randperm(vocab_size, device=DEVICE_TYPE)[:top_k]
+        for _ in range(num_tokens)
     ]
     top_k_indices = torch.stack(top_k_indices)
 
     # Create logits with the uniform distribution.
-    target_logits = torch.zeros((num_tokens, vocab_size), device=DEVICE)
+    target_logits = torch.zeros((num_tokens, vocab_size), device=DEVICE_TYPE)
 
     # Increment the logits for top-k indices, a little bit more than the other
     # ones. If the masking is effective, the non-topk indices will never be
@@ -659,11 +677,11 @@ def test_top_k(rejection_sampler, top_k):
         target_logits[i, top_k_indices[i]] += 0.1
 
     # Create sampling metadata
-    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE)
+    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE_TYPE)
     sampling_metadata = create_sampling_metadata(
         all_greedy=False,
         temperature=temperature,
-        top_k=torch.tensor([top_k] * batch_size, device=DEVICE, dtype=torch.int64),
+        top_k=torch.tensor([top_k] * batch_size, device=DEVICE_TYPE, dtype=torch.int64),
     )
 
     _test_masked_logits(
@@ -686,8 +704,8 @@ def test_top_p(rejection_sampler, top_p):
     num_tokens = batch_size * num_draft_tokens
 
     # Create logits with the uniform distribution.
-    target_logits = torch.randn((num_tokens, vocab_size), device=DEVICE)
-    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE)
+    target_logits = torch.randn((num_tokens, vocab_size), device=DEVICE_TYPE)
+    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE_TYPE)
     rescaled_logits = target_logits / temperature
 
     logits_sort, logits_idx = rescaled_logits.sort(dim=-1, descending=False)
@@ -706,7 +724,11 @@ def test_top_p(rejection_sampler, top_p):
     sampling_metadata = create_sampling_metadata(
         all_greedy=False,
         temperature=temperature,
-        top_p=torch.tensor([top_p] * batch_size, device=DEVICE, dtype=torch.float32),
+        top_p=torch.tensor(
+            [top_p] * batch_size,
+            device=DEVICE_TYPE,
+            dtype=torch.float32,
+        ),
     )
 
     _test_masked_logits(
@@ -732,7 +754,10 @@ def test_frequency_penalties(rejection_sampler):
         all_greedy=True,
         output_token_ids=[[2], [3], [4]],
         spec_token_ids=spec_tokens,
-        prompt_token_ids=torch.tensor([[5, 6, 7], [6, 7, 8], [7, 8, 9]], device=DEVICE),
+        prompt_token_ids=torch.tensor(
+            [[5, 6, 7], [6, 7, 8], [7, 8, 9]],
+            device=DEVICE_TYPE,
+        ),
         frequency_penalties=[1.5, 1.5, 0.7],
         presence_penalties=[0.0] * num_requests,
         repetition_penalties=[1.0] * num_requests,
@@ -858,21 +883,26 @@ def test_sample_recovered_tokens(
     num_tokens = batch_size * max_spec_len
 
     # Create random draft probabilities.
-    draft_probs = torch.rand(num_tokens, vocab_size, dtype=torch.float32, device=DEVICE)
+    draft_probs = torch.rand(
+        num_tokens,
+        vocab_size,
+        dtype=torch.float32,
+        device=DEVICE_TYPE,
+    )
     draft_probs = F.softmax(draft_probs, dim=-1)
 
     # Create random target probabilities.
     target_logits = torch.rand(
-        num_tokens, vocab_size, dtype=torch.float32, device=DEVICE
+        num_tokens, vocab_size, dtype=torch.float32, device=DEVICE_TYPE
     )
     target_probs = F.softmax(target_logits, dim=-1)
 
     # Randomly sample draft token ids from draft probs
     draft_token_ids = torch.multinomial(draft_probs, num_samples=1).to(torch.int32)
 
-    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE)
+    temperature = torch.ones(batch_size, dtype=torch.float32, device=DEVICE_TYPE)
     generators = {
-        i: torch.Generator(device=DEVICE).manual_seed(i) for i in range(batch_size)
+        i: torch.Generator(device=DEVICE_TYPE).manual_seed(i) for i in range(batch_size)
     }
     sampling_metadata = create_sampling_metadata(
         all_greedy=False, temperature=temperature, generators=generators
@@ -890,7 +920,7 @@ def test_sample_recovered_tokens(
         None if no_draft_probs else draft_probs,
         target_probs,
         sampling_metadata,
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     recovered_token_ids = sample_recovered_tokens(
         max_spec_len,
@@ -900,6 +930,67 @@ def test_sample_recovered_tokens(
         None if no_draft_probs else draft_probs,
         target_probs,
         sampling_metadata,
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     assert torch.equal(recovered_token_ids, ref_recovered_token_ids)
+
+
+########################### Tests for Synthetic Rejection Sampling #########
+
+
+def _make_synthetic_sampler(rates: list[float]) -> RejectionSampler:
+    mock_sampler = Mock(spec=Sampler)
+    mock_sampler.logprobs_mode = "raw_logprobs"
+    spec_config = Mock()
+    spec_config.rejection_sample_method = "synthetic"
+    spec_config.synthetic_acceptance_rates = rates
+    return RejectionSampler(mock_sampler, spec_config, torch.device(DEVICE_TYPE))
+
+
+def _make_sampling_metadata(all_greedy: bool) -> SamplingMetadata:
+    temperature = None if all_greedy else torch.tensor([1.0, 1.0], device=DEVICE_TYPE)
+    return create_sampling_metadata(all_greedy=all_greedy, temperature=temperature)
+
+
+@pytest.mark.parametrize("all_greedy", [True, False])
+def test_synthetic_all_accepted(all_greedy: bool):
+    """With all rates=1.0, every draft token is accepted."""
+    sampler = _make_synthetic_sampler([1.0, 1.0])
+    spec_tokens = [[1, 2], [3]]
+    output_tokens = [[10, 20, 50], [30, 40]]
+
+    metadata = _make_sampling_metadata(all_greedy)
+    logits = create_logits_tensor(output_tokens)
+    bonus = torch.tensor([50, 40], device=DEVICE_TYPE)
+    spec_decode_metadata = create_spec_decode_metadata(spec_tokens, logits)
+
+    mock_sampler_output(sampler, bonus)
+    output = sampler(spec_decode_metadata, None, logits, metadata)
+    expected = torch.tensor(
+        [[1, 2, 50], [3, 40, PLACEHOLDER_TOKEN_ID]],
+        dtype=torch.int,
+        device=DEVICE_TYPE,
+    )
+    assert torch.equal(output.sampled_token_ids, expected)
+
+
+@pytest.mark.parametrize("all_greedy", [True, False])
+def test_synthetic_all_rejected(all_greedy: bool):
+    """With all rates=0.0, the first token is always rejected."""
+    sampler = _make_synthetic_sampler([0.0, 0.0])
+    spec_tokens = [[1, 2], [3]]
+    output_tokens = [[10, 20, 50], [30, 40]]
+
+    metadata = _make_sampling_metadata(all_greedy)
+    logits = create_logits_tensor(output_tokens)
+    bonus = torch.tensor([50, 40], device=DEVICE_TYPE)
+    spec_decode_metadata = create_spec_decode_metadata(spec_tokens, logits)
+
+    mock_sampler_output(sampler, bonus)
+    output = sampler(spec_decode_metadata, None, logits, metadata)
+    result = output.sampled_token_ids
+    # Exactly one token emitted per sequence (the rejection fallback),
+    # followed by placeholders.
+    for row in result:
+        assert row[0] != PLACEHOLDER_TOKEN_ID
+        assert (row[1:] == PLACEHOLDER_TOKEN_ID).all()
diff --git a/tests/v1/sample/test_sampler.py b/tests/v1/sample/test_sampler.py
index 51f2bf5e753c..c67199fa4077 100644
--- a/tests/v1/sample/test_sampler.py
+++ b/tests/v1/sample/test_sampler.py
@@ -17,8 +17,9 @@
 MAX_NUM_REQS = 256
 VOCAB_SIZE = 1024
 NUM_OUTPUT_TOKENS = 20
-CUDA_DEVICES = [
-    f"{current_platform.device_type}:{i}"
+DEVICE_TYPE = current_platform.device_type
+DEVICES = [
+    f"{DEVICE_TYPE}:{i}"
     for i in range(1 if current_platform.device_count() == 1 else 2)
 ]
 MAX_NUM_PROMPT_TOKENS = 64
@@ -199,7 +200,7 @@ def _create_weighted_output_token_list(
     return output_token_ids, sorted_token_ids_in_output
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("presence_penalty", [-2.0, 2.0])
 def test_sampler_presence_penalty(
@@ -249,7 +250,7 @@ def test_sampler_presence_penalty(
             assert penalized_token_id not in output_token_ids[batch_idx]
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("frequency_penalty", [-2.0, 2.0])
 def test_sampler_frequency_penalty(
@@ -305,7 +306,7 @@ def test_sampler_frequency_penalty(
             assert penalized_token_id not in distinct_sorted_token_ids_in_output
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("repetition_penalty", [0.1, 1.9])
 def test_sampler_repetition_penalty(
@@ -363,7 +364,7 @@ def test_sampler_repetition_penalty(
             )
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("num_allowed_token_ids", [0, 1, 2])
 def test_sampler_allowed_token_ids(
@@ -409,7 +410,7 @@ def test_sampler_allowed_token_ids(
                 assert logits_for_req[token_id] != -float("inf")
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32])
 @pytest.mark.parametrize("bad_words_lengths", [(1,), (1, 3), (2, 2)])
 def test_sampler_bad_words(
diff --git a/tests/v1/sample/test_topk_topp_sampler.py b/tests/v1/sample/test_topk_topp_sampler.py
index ce1e288a2418..7d488aaabf2f 100644
--- a/tests/v1/sample/test_topk_topp_sampler.py
+++ b/tests/v1/sample/test_topk_topp_sampler.py
@@ -7,13 +7,36 @@
 from vllm.platforms import current_platform
 from vllm.v1.sample.ops.topk_topp_sampler import apply_top_k_top_p_pytorch
 
-CUDA_DEVICE = "cuda" if current_platform.is_cuda() else None
-DEVICE = current_platform.device_type
+DEVICE_TYPE = current_platform.device_type
 
 BATCH_SIZE = 1024
 VOCAB_SIZE = 128 * 1024
 
 
+def _flashinfer_topk_topp_supported() -> bool:
+    """True iff the FlashInfer top-k/top-p sampler is usable on this host.
+
+    Mirrors the gate in `TopKTopPSampler.__init__`: CUDA + flashinfer
+    importable + GPU compute capability supported by the FlashInfer
+    backend.
+    """
+    if not current_platform.is_cuda():
+        return False
+    try:
+        import flashinfer  # noqa: F401
+
+        from vllm.v1.attention.backends.flashinfer import FlashInferBackend
+    except ImportError:
+        return False
+    capability = current_platform.get_device_capability()
+    if capability is None:
+        return False
+    return FlashInferBackend.supports_compute_capability(capability)
+
+
+FLASHINFER_TOPK_TOPP_SUPPORTED = _flashinfer_topk_topp_supported()
+
+
 @pytest.fixture(autouse=True)
 def reset_default_device():
     """
@@ -26,8 +49,8 @@ def reset_default_device():
 
 
 def test_topk_impl_equivalence():
-    torch.set_default_device(DEVICE)
-    generator = Generator(device=DEVICE).manual_seed(33)
+    torch.set_default_device(DEVICE_TYPE)
+    generator = Generator(device=DEVICE_TYPE).manual_seed(33)
 
     logits = torch.rand((BATCH_SIZE, VOCAB_SIZE), generator=generator)
 
@@ -76,8 +99,8 @@ def test_flashinfer_sampler():
     if not FLASHINFER_ENABLED:
         pytest.skip("FlashInfer not installed or not available on this platform.")
 
-    torch.set_default_device(DEVICE)
-    generator = Generator(device=DEVICE).manual_seed(42)
+    torch.set_default_device(DEVICE_TYPE)
+    generator = Generator(device=DEVICE_TYPE).manual_seed(42)
 
     # Generate random logits
     logits = torch.rand((BATCH_SIZE, VOCAB_SIZE), generator=generator)
@@ -128,15 +151,15 @@ def test_flashinfer_sampler():
 # =============================================================================
 
 
-@pytest.mark.skipif(CUDA_DEVICE is None, reason="CUDA not available")
+@pytest.mark.skipif("cpu" in DEVICE_TYPE, reason="CUDA/XPU not available")
 class TestTritonTopkTopp:
     """Tests for the Triton top-k/top-p kernel."""
 
     @pytest.fixture(autouse=True)
     def setup(self):
         """Set up test fixtures."""
-        torch.set_default_device(CUDA_DEVICE)
-        self.generator = Generator(device=CUDA_DEVICE).manual_seed(42)
+        torch.set_default_device(DEVICE_TYPE)
+        self.generator = Generator(device=DEVICE_TYPE).manual_seed(42)
 
     def _compare_results(
         self,
@@ -297,6 +320,56 @@ def test_large_batch(self):
 
         self._compare_results(logits, k, p)
 
+    @pytest.mark.parametrize(
+        "mode",
+        ["topk_only", "topp_only", "topk_and_topp"],
+    )
+    def test_noncontiguous_logits_match_contiguous(self, mode: str):
+        """Non-contiguous logits views should behave like contiguous inputs."""
+        from vllm.v1.sample.ops.topk_topp_triton import apply_top_k_top_p_triton
+
+        device = torch.device(DEVICE_TYPE)
+        batch_size, vocab_size, pad = 16, 4096, 8
+        backing = torch.full(
+            (batch_size, vocab_size + pad),
+            -1000.0,
+            device=device,
+            dtype=torch.float32,
+        )
+        base = torch.linspace(
+            10.0, -10.0, vocab_size, device=device, dtype=torch.float32
+        )
+        source = base[None, :] + (
+            torch.arange(batch_size, device=device, dtype=torch.float32)[:, None]
+            / 1000.0
+        )
+
+        logits = backing[:, :vocab_size]
+        logits.copy_(source)
+        contig_logits = source.clone()
+        pytorch_logits = source.clone()
+
+        assert logits.shape == (batch_size, vocab_size)
+        assert logits.stride() == (vocab_size + pad, 1)
+        assert not logits.is_contiguous()
+
+        k: torch.Tensor | None = None
+        p: torch.Tensor | None = None
+        if mode in ("topk_only", "topk_and_topp"):
+            k = torch.full((batch_size,), 154, device=device, dtype=torch.int32)
+        if mode in ("topp_only", "topk_and_topp"):
+            p = torch.full((batch_size,), 0.95, device=device, dtype=torch.float32)
+
+        noncontig_out = apply_top_k_top_p_triton(logits, k, p)
+        contig_out = apply_top_k_top_p_triton(contig_logits, k, p)
+        pytorch_out = apply_top_k_top_p_pytorch(pytorch_logits, k, p)
+
+        assert noncontig_out.data_ptr() == logits.data_ptr()
+        assert not noncontig_out.is_contiguous()
+        assert torch.equal(logits, noncontig_out)
+        assert torch.equal(torch.isfinite(noncontig_out), torch.isfinite(contig_out))
+        assert torch.equal(torch.isfinite(noncontig_out), torch.isfinite(pytorch_out))
+
     # -----------------------------------------------------------------
     # Tests for -inf logits (e.g. from grammar / structured output masks)
     # -----------------------------------------------------------------
@@ -569,3 +642,280 @@ def test_mixed_neginf_and_normal_rows(self):
             finite_in = (logits[i] > float("-inf")).sum().item()
             if finite_in > 0:
                 assert kept > 0, f"Row {i}: no tokens kept"
+
+
+# =============================================================================
+# FlashInfer top-k/top-p robustness tests
+# =============================================================================
+
+
+@pytest.mark.skipif(
+    not FLASHINFER_TOPK_TOPP_SUPPORTED,
+    reason="FlashInfer top-k/top-p sampler requires CUDA "
+    "and a GPU with FlashInfer support.",
+)
+class TestFlashInferTopkToppRobustness:
+    """Robustness of FlashInfer top-k / top-p sampling to NaN / Inf logits.
+
+    The FlashInfer sampler is enabled by default on supported GPUs. A
+    single poisoned request (NaN / +Inf / -Inf in row 0) must not:
+
+    1. crash or hang the process;
+    2. produce out-of-range token ids (anything outside ``[0, vocab)``);
+    3. corrupt other batch rows — neighbours of a poisoned row must
+       still receive valid token ids (regression for cross-row
+       corruption in a DP batch where one bad request would otherwise
+       poison its peers).
+
+    The reference is "no crash + valid token ids", not bit-exact equality
+    against the PyTorch-native path.
+    """
+
+    BATCH = 8
+    VOCAB = 32768
+    TOPK = 50
+    TOPP = 0.9
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        torch.set_default_device(DEVICE_TYPE)
+        self.generator = Generator(device=DEVICE_TYPE).manual_seed(1234)
+
+    def _make_logits(self, pattern: str) -> torch.Tensor:
+        """Build (BATCH, VOCAB) logits with `pattern` applied to row 0
+        (rows 1..B-1 stay clean so we can detect cross-row corruption)."""
+        logits = (
+            torch.randn(
+                self.BATCH,
+                self.VOCAB,
+                generator=self.generator,
+                dtype=torch.float32,
+            )
+            * 5.0
+        )
+        if pattern == "clean":
+            return logits
+        if pattern == "nan_one_row":
+            logits[0, :] = float("nan")
+        elif pattern == "nan_few":
+            # Scatter 16 NaNs across row 0, keep the rest finite.
+            idx = torch.randperm(self.VOCAB, generator=self.generator)[:16]
+            logits[0, idx] = float("nan")
+        elif pattern == "nan_at_top":
+            # Poison the top-32 highest-scoring positions of row 0 — worst
+            # case for top-k since these are exactly the tokens that would
+            # otherwise be selected. Use argsort instead of topk to avoid
+            # a known compute-sanitizer false positive in mbtopk.
+            top_idx = logits[0].argsort(descending=True)[:32]
+            logits[0, top_idx] = float("nan")
+        elif pattern == "nan_all_rows":
+            logits[:, :] = float("nan")
+        elif pattern == "pos_inf_one_row":
+            logits[0, :] = float("inf")
+        elif pattern == "neg_inf_one_row":
+            logits[0, :] = float("-inf")
+        elif pattern == "mixed_inf_nan":
+            assert self.BATCH >= 3
+            logits[0, :] = float("nan")
+            logits[1, :] = float("inf")
+            logits[2, :] = float("-inf")
+        elif pattern == "degenerate_flat":
+            logits[:, :] = 1.0
+        else:
+            raise ValueError(f"unknown pattern: {pattern}")
+        return logits
+
+    def _check_tokens(self, tokens: torch.Tensor, ctx: str):
+        assert tokens.dim() == 1, f"{ctx}: expected 1-D output, got {tokens.shape}"
+        assert tokens.shape[0] == self.BATCH, (
+            f"{ctx}: expected batch size {self.BATCH}, got {tokens.shape[0]}"
+        )
+        ids = tokens.tolist()
+        min_id, max_id = min(ids), max(ids)
+        assert 0 <= min_id < self.VOCAB and 0 <= max_id < self.VOCAB, (
+            f"{ctx}: token id(s) outside [0, {self.VOCAB}): min={min_id}, max={max_id}"
+        )
+
+    @pytest.mark.parametrize(
+        "pattern",
+        [
+            "clean",
+            "nan_one_row",
+            "nan_few",
+            "nan_at_top",
+            "nan_all_rows",
+            "pos_inf_one_row",
+            "neg_inf_one_row",
+            "mixed_inf_nan",
+            "degenerate_flat",
+        ],
+    )
+    @pytest.mark.parametrize("path", ["topk_only", "topp_only", "topk_topp"])
+    def test_flashinfer_handles_pathological_logits(self, pattern: str, path: str):
+        """flashinfer_sample must return valid ids even on poisoned logits.
+
+        Direct call into ``flashinfer_sample`` — exactly the code path
+        ``TopKTopPSampler.forward_cuda`` takes when FI is enabled.
+        """
+        from vllm.v1.sample.ops.topk_topp_sampler import flashinfer_sample
+
+        logits = self._make_logits(pattern)
+        k = (
+            torch.full(
+                (self.BATCH,),
+                self.TOPK,
+                device=DEVICE_TYPE,
+                dtype=torch.int32,
+            )
+            if path in ("topk_only", "topk_topp")
+            else None
+        )
+        p = (
+            torch.full(
+                (self.BATCH,),
+                self.TOPP,
+                device=DEVICE_TYPE,
+                dtype=torch.float32,
+            )
+            if path in ("topp_only", "topk_topp")
+            else None
+        )
+
+        # flashinfer_sample may mutate its input in-place; pass a clone so
+        # the parametrize iterations stay independent.
+        tokens = flashinfer_sample(logits.clone().contiguous(), k, p, {})
+        # Surface any async CUDA error synchronously (e.g. illegal memory
+        # access from a malformed FlashInfer call) so it's attributed to
+        # this test rather than a later, unrelated GPU op.
+        torch.accelerator.synchronize()
+        self._check_tokens(tokens, ctx=f"pattern={pattern}, path={path}")
+
+
+# =============================================================================
+# FlashInfer top-k/top-p distribution-match tests
+# =============================================================================
+
+
+@pytest.mark.skipif(
+    not FLASHINFER_TOPK_TOPP_SUPPORTED,
+    reason="FlashInfer top-k/top-p sampler requires CUDA "
+    "and a GPU with FlashInfer support.",
+)
+class TestFlashInferDistributionMatch:
+    """Chi-square goodness-of-fit: FlashInfer and PyTorch-native samplers
+    both reproduce the expected token distribution after top-k / top-p.
+
+    Regression guard against historical FlashInfer distribution-shift.
+    Each impl is compared to the theoretical distribution (softmax of
+    filtered logits); if both pass they are statistically equivalent
+    to each other by transitivity.
+    """
+
+    VOCAB = 32
+    N_SAMPLES = 50_000
+    ALPHA = 1e-6
+    SEED = 0
+
+    @pytest.mark.parametrize(
+        "topk,topp",
+        [
+            (8, None),
+            (16, None),
+            (None, 0.5),
+            (None, 0.7),
+            (None, 0.99),
+            (8, 0.9),
+            (4, 0.5),
+        ],
+    )
+    def test_distribution_matches_theoretical(self, topk, topp):
+        from scipy.stats import chisquare
+
+        from vllm.v1.sample.ops.topk_topp_sampler import (
+            apply_top_k_top_p,
+            flashinfer_sample,
+            random_sample,
+        )
+
+        torch.set_default_device(DEVICE_TYPE)
+        torch.manual_seed(self.SEED)
+
+        # Same logits row used for both impls so the comparison is fair.
+        logits_one = (
+            torch.randn(
+                (1, self.VOCAB),
+                dtype=torch.float32,
+            )
+            * 2.0
+        )
+
+        # Theoretical expected distribution from PyTorch-native filter.
+        k_one = torch.tensor([topk], dtype=torch.int32) if topk is not None else None
+        p_one = torch.tensor([topp], dtype=torch.float32) if topp is not None else None
+        masked = apply_top_k_top_p_pytorch(logits_one.clone(), k_one, p_one)
+        expected_probs = masked.softmax(dim=-1).flatten().cpu().numpy()
+        expected_counts = expected_probs * self.N_SAMPLES
+
+        # Build a batch of N identical rows for both impls.
+        batch = logits_one.expand(self.N_SAMPLES, self.VOCAB).contiguous()
+        k_batch = (
+            torch.full((self.N_SAMPLES,), topk, dtype=torch.int32)
+            if topk is not None
+            else None
+        )
+        p_batch = (
+            torch.full((self.N_SAMPLES,), topp, dtype=torch.float32)
+            if topp is not None
+            else None
+        )
+
+        # FlashInfer dispatch path.
+        fi_tokens = flashinfer_sample(batch.contiguous(), k_batch, p_batch, {})
+        fi_counts = torch.bincount(fi_tokens, minlength=self.VOCAB).cpu().numpy()
+        self._chi2_check(
+            fi_counts,
+            expected_counts,
+            chisquare,
+            label=f"flashinfer top-k={topk} top-p={topp}",
+        )
+
+        # PyTorch-native dispatch path (Triton-routed filter + Gumbel sample).
+        processed = apply_top_k_top_p(batch.clone(), k_batch, p_batch)
+        probs = processed.softmax(dim=-1, dtype=torch.float32)
+        pt_tokens = random_sample(probs, {})
+        pt_counts = torch.bincount(pt_tokens, minlength=self.VOCAB).cpu().numpy()
+        self._chi2_check(
+            pt_counts,
+            expected_counts,
+            chisquare,
+            label=f"native top-k={topk} top-p={topp}",
+        )
+
+    def _chi2_check(self, empirical, expected, chisquare_fn, *, label):
+        import numpy as np
+
+        # Hard check: the sampler must never produce a token outside the
+        # expected support (zero theoretical probability).
+        outside = (expected == 0) & (empirical > 0)
+        assert not outside.any(), (
+            f"{label}: sampled out-of-support tokens "
+            f"(zero expected prob): indices={outside.nonzero()[0].tolist()}"
+        )
+        # Skip chi-square in the degenerate case where the support
+        # collapses to a single token (e.g. very restrictive joint
+        # top-k + top-p): all samples must land there and the hard
+        # check above already verified they do.
+        in_support = expected > 0
+        if int(in_support.sum()) <= 1:
+            return
+        # Soft check: chi-square goodness-of-fit on in-support tokens.
+        # Cast to float64 so the rescaling step below stays within
+        # scipy.chisquare's strict 1.5e-8 sum-equality tolerance.
+        emp = empirical[in_support].astype(np.float64)
+        exp = expected[in_support].astype(np.float64)
+        exp = exp * (emp.sum() / exp.sum())
+        chi2, p_value = chisquare_fn(emp, exp)
+        assert p_value > self.ALPHA, (
+            f"{label}: distribution differs from theoretical: "
+            f"chi2={chi2:.2f} p_value={p_value:.2e} alpha={self.ALPHA}"
+        )
diff --git a/tests/v1/sample/utils.py b/tests/v1/sample/utils.py
index a0abb3b4c6ce..907be3614b9c 100644
--- a/tests/v1/sample/utils.py
+++ b/tests/v1/sample/utils.py
@@ -198,22 +198,43 @@ def get_logitsprocs(self) -> Iterator[LogitsProcessor]:
 
 def fake_update_logitsprocs_state(
     test_fakes: LogitsprocsTestFakes,
-    batch_update: BatchUpdate,
+    batch_update: BatchUpdate | None,
 ) -> None:
     """Imitate logits processors persistent batch state update
     in engine core"""
     for logitproc in test_fakes.get_logitsprocs():
         logitproc.update_state(batch_update)
+    holder = test_fakes.sampling_metadata.thinking_budget_state_holder
+    if holder is not None:
+        holder.sync_batch(batch_update)
 
 
 def fake_apply_logitsprocs(
     test_fakes: LogitsprocsTestFakes,
     slice_indices: list[int],
+    slot_output_token_ids: list[list[int]] | None = None,
 ) -> torch.Tensor:
-    """Imitate application of logits processors in engine core"""
+    """Imitate application of logits processors in engine core.
+
+    When ``thinking_budget_state_holder`` has tracked requests, this mirrors
+    :meth:`Sampler.apply_logits_processors` by refreshing per-slot
+    ``output_token_ids`` (if ``slot_output_token_ids`` is provided), then
+    ``update_state`` + ``apply_to_logits`` on the holder after built-in logits
+    processors.
+    """
     logits = test_fakes.logits[torch.tensor(slice_indices, dtype=torch.long)].clone()
     for processor in test_fakes.get_logitsprocs():
         logits = processor.apply(logits)
+
+    md = test_fakes.sampling_metadata
+    holder = md.thinking_budget_state_holder
+    if holder is not None and holder.has_tracked_requests():
+        if slot_output_token_ids is not None:
+            for i, toks in enumerate(slot_output_token_ids):
+                if i < len(md.output_token_ids):
+                    md.output_token_ids[i] = list(toks)
+        holder.update_state(md.output_token_ids, md.spec_token_ids, None)
+        logits = holder.apply_to_logits(logits, False, md.spec_token_ids)
     return logits
 
 
diff --git a/tests/v1/shutdown/test_delete.py b/tests/v1/shutdown/test_delete.py
index ee04dfad3906..adf99fb922da 100644
--- a/tests/v1/shutdown/test_delete.py
+++ b/tests/v1/shutdown/test_delete.py
@@ -11,8 +11,8 @@
 )
 from vllm import LLM, SamplingParams
 from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
-from vllm.utils.torch_utils import cuda_device_count_stateless
 from vllm.v1.engine.async_llm import AsyncLLM
 
 MODELS = ["hmellor/tiny-random-LlamaForCausalLM"]
@@ -34,7 +34,7 @@ async def test_async_llm_delete(
       tensor_parallel_size: degree of tensor parallelism
       send_one_request: send one request to engine before deleting
     """
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     engine_args = AsyncEngineArgs(
@@ -83,7 +83,7 @@ def test_llm_delete(
       enable_multiprocessing: enable workers in separate process(es)
       send_one_request: send one request to engine before deleting
     """
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     with monkeypatch.context() as m:
diff --git a/tests/v1/shutdown/test_forward_error.py b/tests/v1/shutdown/test_forward_error.py
index 4625bc174915..eadb1abb6d5d 100644
--- a/tests/v1/shutdown/test_forward_error.py
+++ b/tests/v1/shutdown/test_forward_error.py
@@ -15,7 +15,7 @@
 from vllm import LLM, AsyncEngineArgs, SamplingParams
 from vllm.distributed import get_tensor_model_parallel_rank
 from vllm.model_executor.models.llama import LlamaForCausalLM
-from vllm.utils.torch_utils import cuda_device_count_stateless
+from vllm.platforms import current_platform
 from vllm.v1.engine.async_llm import AsyncLLM
 from vllm.v1.engine.exceptions import EngineDeadError
 
@@ -60,7 +60,7 @@ async def test_async_llm_model_error(
 
     AsyncLLM always uses an MP client.
     """
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     # Monkeypatch an error in the model.
@@ -126,7 +126,7 @@ def test_llm_model_error(
     TODO(andy) - LLM without multiprocessing; LLM with multiprocessing
     and >1 rank
     """
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     with monkeypatch.context() as m:
diff --git a/tests/v1/shutdown/test_startup_error.py b/tests/v1/shutdown/test_startup_error.py
index 4b5661a52c1e..03982aa48e71 100644
--- a/tests/v1/shutdown/test_startup_error.py
+++ b/tests/v1/shutdown/test_startup_error.py
@@ -15,7 +15,7 @@
 from vllm.distributed import get_tensor_model_parallel_rank
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.model_executor.models.llama import LlamaForCausalLM
-from vllm.utils.torch_utils import cuda_device_count_stateless
+from vllm.platforms import current_platform
 from vllm.v1.engine.async_llm import AsyncLLM
 
 MODELS = ["hmellor/tiny-random-LlamaForCausalLM"]
@@ -57,7 +57,7 @@ def test_async_llm_startup_error(
     Test profiling (forward()) and load weights failures.
     AsyncLLM always uses an MP client.
     """
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     # Monkeypatch an error in the model.
@@ -99,7 +99,7 @@ def test_llm_startup_error(
     # If MODELS list grows, each architecture needs its own test variant.
     if model != "JackFram/llama-68m":
         pytest.skip(reason="Only test JackFram/llama-68m")
-    if cuda_device_count_stateless() < tensor_parallel_size:
+    if current_platform.device_count() < tensor_parallel_size:
         pytest.skip(reason="Not enough CUDA devices")
 
     with monkeypatch.context() as m:
diff --git a/tests/v1/simple_kv_offload/__init__.py b/tests/v1/simple_kv_offload/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/v1/simple_kv_offload/test_integration.py b/tests/v1/simple_kv_offload/test_integration.py
new file mode 100644
index 000000000000..02f6360e08e8
--- /dev/null
+++ b/tests/v1/simple_kv_offload/test_integration.py
@@ -0,0 +1,193 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Integration tests for SimpleCPUOffloadConnector with real models."""
+
+import time
+
+import pytest
+
+from vllm import LLM, SamplingParams, TokensPrompt
+from vllm.config import KVTransferConfig
+from vllm.platforms import current_platform
+
+if not current_platform.is_cuda_alike():
+    pytest.skip("Requires CUDA or ROCm", allow_module_level=True)
+
+# Small models for default CI / local runs (accuracy only).
+SMALL_MODELS = [
+    "meta-llama/Llama-3.2-1B-Instruct",
+    "google/gemma-3-1b-it",
+]
+
+# Large models for optional perf runs only (slow to load and execute).
+PERF_MODELS = [
+    "meta-llama/Llama-3.1-8B",
+    "openai/gpt-oss-20b",
+]
+
+
+def _make_llm(model: str, lazy: bool, cpu_bytes_to_use: int) -> LLM:
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="SimpleCPUOffloadConnector",
+        kv_role="kv_both",
+        kv_connector_extra_config={
+            "cpu_bytes_to_use": cpu_bytes_to_use,
+            "lazy_offload": lazy,
+        },
+    )
+    return LLM(
+        model=model,
+        kv_cache_memory_bytes=40 << 30,  # 40 GiB
+        disable_hybrid_kv_cache_manager=False,
+        enable_prefix_caching=True,
+        kv_transfer_config=kv_transfer_config,
+    )
+
+
+def _flush_gpu_cache(llm: LLM, sampling_params: SamplingParams, seed: int = 0):
+    """Generate enough filler requests to allocate the entire GPU KV cache.
+
+    This pushes all prior blocks through the free queue so that the lazy
+    cursor offloads them to CPU before they are evicted.
+    """
+    cache_config = llm.llm_engine.vllm_config.cache_config
+    num_gpu_blocks = cache_config.num_gpu_blocks
+    block_size = cache_config.block_size
+    # Use 1.2x GPU capacity to give the lazy cursor enough scheduling steps
+    # to walk past all target blocks near the tail of the free queue.
+    total_tokens_needed = int(num_gpu_blocks * block_size * 1.5)
+
+    # Use token-id prompts so each filler is unique (no prefix sharing).
+    # Split into multiple requests to stay under max_model_len.
+    max_tokens_per_req = 4096
+    num_fillers = (total_tokens_needed + max_tokens_per_req - 1) // max_tokens_per_req
+    batch_size = 10
+    for i in range(0, num_fillers, batch_size):
+        batch_end = min(i + batch_size, num_fillers)
+        filler_prompts = []
+        for j in range(i, batch_end):
+            ids = [seed * num_fillers + j + 1] * max_tokens_per_req
+            filler_prompts.append(TokensPrompt(prompt_token_ids=ids))
+        llm.generate(filler_prompts, sampling_params, use_tqdm=False)
+
+
+def _accuracy_test(llm: LLM, lazy: bool = False):
+    """Verify that CPU-loaded KV produces correct output."""
+    sampling_params = SamplingParams(max_tokens=1, temperature=0)
+    prompt = "hi " * 2000 + "Let's count to ten. One, two, three, "
+
+    # Cold run — populate GPU cache and trigger CPU offload
+    cold_output = llm.generate(prompt, sampling_params, use_tqdm=False)[0]
+
+    # CPU hit runs
+    test_count = 10
+    success_count = 0
+    expected = cold_output.outputs[0].text
+    for i in range(test_count):
+        if lazy:
+            _flush_gpu_cache(llm, sampling_params, seed=i)
+        time.sleep(2)  # let engine core drain pending transfers
+
+        # Reset GPU prefix cache so next run must load from CPU
+        if not llm.reset_prefix_cache():
+            print(f"GPU prefix cache reset failed for iteration {i}")
+
+        output = llm.generate(prompt, sampling_params, use_tqdm=False)[0]
+        if output.outputs[0].text == expected:
+            success_count += 1
+
+    assert success_count >= 0.5 * test_count, (
+        f"Accuracy too low: {success_count}/{test_count} matched '{expected}'"
+    )
+
+
+def _latency_test(llm: LLM, lazy: bool = False):
+    """Verify CPU cache hit is faster than cold compute."""
+    sampling_params = SamplingParams(max_tokens=1, seed=42)
+    prompt_token_ids = [0] * 10001
+
+    num_times_cpu_better = 0
+    num_tests = 10
+    for i in range(num_tests):
+        prompt_token_ids[0] = i
+        prompts = [TokensPrompt(prompt_token_ids=prompt_token_ids)]
+
+        # Cold
+        time.sleep(2)  # let engine core drain pending transfers
+        if not llm.reset_prefix_cache():
+            print(f"GPU prefix cache reset failed for iteration {i}")
+        start = time.time()
+        llm.generate(prompts, sampling_params, use_tqdm=False)
+        cold_time = time.time() - start
+
+        if lazy:
+            _flush_gpu_cache(llm, sampling_params, seed=i)
+        else:
+            # Eager mode: GPU hit ensures store completion is processed.
+            llm.generate(prompts, sampling_params, use_tqdm=False)
+
+        time.sleep(2)  # let engine core drain pending transfers
+        if not llm.reset_prefix_cache():
+            print(f"GPU prefix cache reset failed for iteration {i}")
+
+        # CPU hit
+        start = time.time()
+        llm.generate(prompts, sampling_params, use_tqdm=False)
+        cpu_time = time.time() - start
+
+        if cpu_time < cold_time:
+            num_times_cpu_better += 1
+
+    assert num_times_cpu_better >= 0.8 * num_tests, (
+        f"CPU hit only faster {num_times_cpu_better}/{num_tests} times"
+    )
+
+
+@pytest.mark.optional
+@pytest.mark.slow_test
+@pytest.mark.parametrize("model", SMALL_MODELS)
+def test_simple_cpu_offload_accuracy(model: str):
+    """Store to CPU, reset GPU, load from CPU; verify output matches baseline."""
+    llm = _make_llm(model, False, 1 << 30)  # 1GB
+    try:
+        _accuracy_test(llm, lazy=False)
+    finally:
+        del llm
+
+
+@pytest.mark.optional
+@pytest.mark.slow_test
+@pytest.mark.parametrize("model", PERF_MODELS)
+def test_simple_cpu_offload_perf_latency(model: str):
+    """CPU KV hit should beat cold prefill on long context (large models only)."""
+    llm = _make_llm(model, False, 10 << 30)  # 10GB
+    try:
+        _latency_test(llm, lazy=False)
+    finally:
+        del llm
+
+
+@pytest.mark.optional
+@pytest.mark.slow_test
+@pytest.mark.parametrize("model", SMALL_MODELS)
+def test_simple_cpu_offload_accuracy_lazy(model: str):
+    """Lazy mode: flush GPU cache to trigger CPU offload, then verify hit."""
+    # CPU must be larger than GPU KV cache to avoid evicting offloaded blocks.
+    llm = _make_llm(model, True, 80 << 30)  # 80GB
+    try:
+        _accuracy_test(llm, lazy=True)
+    finally:
+        del llm
+
+
+@pytest.mark.optional
+@pytest.mark.slow_test
+@pytest.mark.parametrize("model", PERF_MODELS)
+def test_simple_cpu_offload_perf_latency_lazy(model: str):
+    """Lazy mode: CPU KV hit should beat cold prefill (large models only)."""
+    # CPU must be larger than GPU KV cache to avoid evicting offloaded blocks.
+    llm = _make_llm(model, True, 80 << 30)  # 80GB
+    try:
+        _latency_test(llm, lazy=True)
+    finally:
+        del llm
diff --git a/tests/v1/simple_kv_offload/test_scheduler.py b/tests/v1/simple_kv_offload/test_scheduler.py
new file mode 100644
index 000000000000..970e16e52798
--- /dev/null
+++ b/tests/v1/simple_kv_offload/test_scheduler.py
@@ -0,0 +1,1350 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for SimpleCPUOffloadScheduler."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import torch
+
+from vllm import SamplingParams
+from vllm.config import (
+    CacheConfig,
+    DeviceConfig,
+    KVTransferConfig,
+    ModelConfig,
+    SchedulerConfig,
+    VllmConfig,
+)
+from vllm.utils.hashing import sha256
+from vllm.v1.core.block_pool import BlockPool
+from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+from vllm.v1.core.kv_cache_utils import (
+    get_request_block_hasher,
+    init_none_hash,
+    make_block_hash_with_group_id,
+)
+from vllm.v1.core.sched.output import (
+    CachedRequestData,
+    NewRequestData,
+    SchedulerOutput,
+)
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheConfig,
+    KVCacheGroupSpec,
+    KVCacheTensor,
+)
+from vllm.v1.outputs import KVConnectorOutput
+from vllm.v1.request import Request
+from vllm.v1.simple_kv_offload.manager import SimpleCPUOffloadScheduler
+from vllm.v1.simple_kv_offload.metadata import SimpleCPUOffloadWorkerMetadata
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+BLOCK_SIZE = 16
+HEAD_SIZE = 16
+NUM_KV_HEADS = 1
+DTYPE = torch.float16
+# bytes per block per tensor:
+# block_size * num_kv_heads * head_size * 2 (K+V) * element_size
+_BYTES_PER_BLOCK = BLOCK_SIZE * NUM_KV_HEADS * HEAD_SIZE * 2 * DTYPE.itemsize
+
+# Ensure none_hash is initialized once
+init_none_hash(sha256)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_kv_cache_config(
+    num_blocks: int,
+    num_groups: int = 1,
+) -> KVCacheConfig:
+    """Build a KVCacheConfig with non-empty kv_cache_tensors."""
+    groups = []
+    tensors = []
+    for g in range(num_groups):
+        layer_names = [f"layer_{g}"]
+        groups.append(
+            KVCacheGroupSpec(
+                layer_names,
+                FullAttentionSpec(
+                    block_size=BLOCK_SIZE,
+                    num_kv_heads=NUM_KV_HEADS,
+                    head_size=HEAD_SIZE,
+                    dtype=DTYPE,
+                ),
+            )
+        )
+        tensors.append(
+            KVCacheTensor(
+                size=_BYTES_PER_BLOCK * num_blocks,
+                shared_by=layer_names,
+            )
+        )
+    return KVCacheConfig(
+        num_blocks=num_blocks,
+        kv_cache_tensors=tensors,
+        kv_cache_groups=groups,
+    )
+
+
+def _make_vllm_config(block_size: int = BLOCK_SIZE) -> VllmConfig:
+    """Minimal VllmConfig for scheduler tests (no GPU)."""
+    model_config = ModelConfig(
+        model="facebook/opt-125m",
+        trust_remote_code=True,
+        dtype="float16",
+        seed=42,
+    )
+    scheduler_config = SchedulerConfig(
+        max_num_seqs=16,
+        max_num_batched_tokens=64,
+        max_model_len=10000,
+        enable_chunked_prefill=True,
+        is_encoder_decoder=False,
+    )
+    cache_config = CacheConfig(
+        block_size=block_size,
+        gpu_memory_utilization=0.9,
+        enable_prefix_caching=True,
+    )
+    kv_transfer_config = KVTransferConfig(
+        kv_connector="SimpleCPUOffloadConnector",
+        kv_role="kv_both",
+    )
+    return VllmConfig(
+        scheduler_config=scheduler_config,
+        model_config=model_config,
+        cache_config=cache_config,
+        kv_transfer_config=kv_transfer_config,
+        device_config=DeviceConfig("cpu"),
+    )
+
+
+@dataclass
+class SchedulerFixture:
+    """Bundle returned by make_scheduler for convenient access."""
+
+    scheduler: SimpleCPUOffloadScheduler
+    gpu_block_pool: BlockPool
+    vllm_config: VllmConfig
+    kv_cache_config: KVCacheConfig
+    num_groups: int = 1
+
+
+def make_scheduler(
+    num_cpu_blocks: int = 8,
+    num_gpu_blocks: int = 16,
+    num_groups: int = 1,
+    lazy: bool = False,
+) -> SchedulerFixture:
+    """Build a SimpleCPUOffloadScheduler with small block pools."""
+    kv_cache_config = _make_kv_cache_config(num_gpu_blocks, num_groups)
+    vllm_config = _make_vllm_config()
+    cpu_capacity_bytes = _BYTES_PER_BLOCK * num_cpu_blocks * num_groups
+
+    sched = SimpleCPUOffloadScheduler(
+        vllm_config=vllm_config,
+        kv_cache_config=kv_cache_config,
+        cpu_capacity_bytes=cpu_capacity_bytes,
+        scheduler_block_size=BLOCK_SIZE,
+        hash_block_size=BLOCK_SIZE,
+        lazy_offload=lazy,
+    )
+
+    # Build a real GPU block pool and bind it
+    gpu_block_pool = BlockPool(
+        num_gpu_blocks=num_gpu_blocks,
+        enable_caching=True,
+        hash_block_size=BLOCK_SIZE,
+    )
+    sched.bind_gpu_block_pool(gpu_block_pool)
+
+    return SchedulerFixture(
+        scheduler=sched,
+        gpu_block_pool=gpu_block_pool,
+        vllm_config=vllm_config,
+        kv_cache_config=kv_cache_config,
+        num_groups=num_groups,
+    )
+
+
+_req_counter = 0
+
+
+def make_request(
+    num_blocks: int = 2,
+    request_id: str | None = None,
+    extra_tokens: int = 1,
+) -> Request:
+    """Create a Request with deterministic block hashes."""
+    global _req_counter
+    _req_counter += 1
+    if request_id is None:
+        request_id = f"req-{_req_counter}"
+
+    num_tokens = num_blocks * BLOCK_SIZE + extra_tokens
+    start = _req_counter * 10000
+    prompt_token_ids = list(range(start, start + num_tokens))
+    sampling_params = SamplingParams(max_tokens=1)
+
+    req = Request(
+        request_id=request_id,
+        prompt_token_ids=prompt_token_ids,
+        sampling_params=sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=get_request_block_hasher(BLOCK_SIZE, sha256),
+    )
+    return req
+
+
+def make_scheduler_output(
+    req_id_to_num_tokens: dict[str, int],
+    *,
+    new_reqs: dict[str, tuple[list[int], ...]] | None = None,
+    cached_req_new_blocks: dict[str, tuple[list[int], ...] | None] | None = None,
+) -> SchedulerOutput:
+    """Build a minimal SchedulerOutput with num_scheduled_tokens.
+
+    Args:
+        new_reqs: For first-time requests, maps req_id -> block_ids tuple.
+            These are placed into ``scheduled_new_reqs`` as ``NewRequestData``.
+        cached_req_new_blocks: For returning (cached) requests, maps
+            req_id -> new_block_ids (incremental) or None.
+            These are placed into ``scheduled_cached_reqs``.
+    """
+    scheduled_new_reqs: list[NewRequestData] = []
+    if new_reqs:
+        for req_id, block_ids in new_reqs.items():
+            scheduled_new_reqs.append(
+                NewRequestData(
+                    req_id=req_id,
+                    prompt_token_ids=None,
+                    mm_features=[],
+                    sampling_params=None,
+                    pooling_params=None,
+                    block_ids=block_ids,
+                    num_computed_tokens=0,
+                    lora_request=None,
+                )
+            )
+
+    if cached_req_new_blocks:
+        cached_req_ids = list(cached_req_new_blocks.keys())
+        cached_new_block_ids = [cached_req_new_blocks[rid] for rid in cached_req_ids]
+        cached_reqs = CachedRequestData(
+            req_ids=cached_req_ids,
+            resumed_req_ids=set(),
+            new_token_ids=[[] for _ in cached_req_ids],
+            all_token_ids={},
+            new_block_ids=cached_new_block_ids,
+            num_computed_tokens=[0] * len(cached_req_ids),
+            num_output_tokens=[0] * len(cached_req_ids),
+        )
+    else:
+        cached_reqs = CachedRequestData.make_empty()
+
+    return SchedulerOutput(
+        scheduled_new_reqs=scheduled_new_reqs,
+        scheduled_cached_reqs=cached_reqs,
+        num_scheduled_tokens=req_id_to_num_tokens,
+        total_num_scheduled_tokens=sum(req_id_to_num_tokens.values()),
+        scheduled_spec_decode_tokens={},
+        scheduled_encoder_inputs={},
+        num_common_prefix_blocks=[],
+        preempted_req_ids=set(),
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+    )
+
+
+def simulate_store_completion(
+    scheduler: SimpleCPUOffloadScheduler,
+    event_idx: int,
+) -> None:
+    """Simulate worker reporting a store event completion."""
+    output = KVConnectorOutput(
+        finished_recving=set(),
+        kv_connector_worker_meta=SimpleCPUOffloadWorkerMetadata(
+            completed_store_events={event_idx: scheduler._expected_worker_count},
+        ),
+    )
+    scheduler.update_connector_output(output)
+
+
+def simulate_load_completion(
+    scheduler: SimpleCPUOffloadScheduler,
+    req_ids: set[str],
+) -> None:
+    """Simulate worker reporting load completions for requests."""
+    output = KVConnectorOutput(
+        finished_sending=set(),
+        finished_recving=req_ids,
+    )
+    scheduler.update_connector_output(output)
+
+
+def get_cpu_free_blocks(scheduler: SimpleCPUOffloadScheduler) -> int:
+    """Return number of free CPU blocks."""
+    return scheduler.cpu_block_pool.get_num_free_blocks()
+
+
+def _allocate_gpu_blocks(
+    gpu_block_pool: BlockPool,
+    request: Request,
+    num_blocks: int,
+    group_id: int = 0,
+) -> list:
+    """Allocate GPU blocks, cache them with hashes, return block list.
+
+    Mimics what KVCacheManager does: allocate blocks from pool, then
+    register them in the prefix cache via cache_full_blocks so that
+    re-allocation properly evicts stale hashes.
+    """
+    blocks = gpu_block_pool.get_new_blocks(num_blocks)
+    num_full = min(num_blocks, len(request.block_hashes))
+    if num_full > 0:
+        gpu_block_pool.cache_full_blocks(
+            request=request,
+            blocks=blocks,
+            num_cached_blocks=0,
+            num_full_blocks=num_full,
+            block_size=BLOCK_SIZE,
+            kv_cache_group_id=group_id,
+        )
+    return blocks
+
+
+def _alloc_and_register(
+    fix: SchedulerFixture,
+    request: Request,
+    num_blocks: int,
+    *,
+    confirmed: bool = True,
+    group_id: int = 0,
+) -> KVCacheBlocks:
+    """Allocate GPU blocks and return KVCacheBlocks.
+
+    Block IDs are no longer registered in a mock KVCacheManager; instead
+    tests pass them through ``make_scheduler_output`` so that
+    ``yield_req_data`` can pick them up.
+
+    If ``confirmed`` is True, advance ``request.num_computed_tokens`` to simulate
+    the scheduler's ``_update_after_schedule`` from a prior step.
+    """
+    gpu_blocks = _allocate_gpu_blocks(
+        fix.gpu_block_pool, request, num_blocks, group_id=group_id
+    )
+    kv_blocks = KVCacheBlocks(blocks=(gpu_blocks,))
+    if confirmed:
+        request.num_computed_tokens = num_blocks * BLOCK_SIZE
+    return kv_blocks
+
+
+# ---------------------------------------------------------------------------
+# Test 1a: Eager store-and-load roundtrip
+# ---------------------------------------------------------------------------
+def test_eager_store_and_load_roundtrip() -> None:
+    """Eager mode: store blocks on compute, complete store, verify cache hit."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+
+    meta = sched.build_connector_meta(sched_out)
+    assert meta.store_event >= 0, "Expected a store event to be scheduled"
+    assert len(meta.store_gpu_blocks) > 0
+    assert len(meta.store_cpu_blocks) == len(meta.store_gpu_blocks)
+    simulate_store_completion(sched, meta.store_event)
+
+    # New request with same tokens should get CPU cache hit
+    req2 = Request(
+        request_id="req-eager-load",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(req2, num_computed_tokens=0)
+    # make_request pads num_tokens by +1 beyond the last full block, so the
+    # manager's max_hit_len = num_tokens - 1 cap leaves all full blocks intact.
+    assert hit_tokens == num_blocks * BLOCK_SIZE
+    assert is_async is True
+
+    gpu_blocks2 = fix.gpu_block_pool.get_new_blocks(num_blocks)
+    kv_blocks2 = KVCacheBlocks(blocks=(gpu_blocks2,))
+    sched.update_state_after_alloc(req2, kv_blocks2, num_external_tokens=hit_tokens)
+
+    block_ids2 = kv_blocks2.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: 1},
+        new_reqs={req2.request_id: block_ids2},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.load_event >= 0, "Expected a load event to be assigned"
+    assert len(meta2.load_gpu_blocks) > 0
+    assert len(meta2.load_cpu_blocks) == len(meta2.load_gpu_blocks)
+
+
+# ---------------------------------------------------------------------------
+# Test 1b: Boundary — max_hit_len cap drops the last full block when the
+# prompt is an exact multiple of BLOCK_SIZE.
+# ---------------------------------------------------------------------------
+def test_max_hit_len_cap_drops_last_full_block() -> None:
+    """When num_tokens is an exact multiple of BLOCK_SIZE, the manager's
+    ``max_hit_len = num_tokens - 1`` cap forces ``find_longest_cache_hit`` to
+    drop the final block (since ``max_length // block_size`` rounds down).
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks, extra_tokens=0)
+    assert req.num_tokens == num_blocks * BLOCK_SIZE
+
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: kv_blocks.get_block_ids()},
+    )
+    meta = sched.build_connector_meta(sched_out)
+    simulate_store_completion(sched, meta.store_event)
+
+    req2 = Request(
+        request_id="req-cap-boundary",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    hit_tokens, _ = sched.get_num_new_matched_tokens(req2, num_computed_tokens=0)
+    assert hit_tokens == (num_blocks - 1) * BLOCK_SIZE
+
+
+# ---------------------------------------------------------------------------
+# Test 1c: Lazy store-and-load roundtrip
+# ---------------------------------------------------------------------------
+def _flush_old_blocks_to_lru_head(
+    gpu_pool: BlockPool,
+    num_filler_blocks: int,
+) -> list:
+    """Allocate filler blocks so that previously-freed (hashed) blocks migrate
+    to the LRU head of the free queue.  Returns the filler blocks (caller must
+    free them later to restore pool capacity).
+
+    In a real engine the same thing happens naturally: after one request
+    finishes and frees its blocks, subsequent requests allocate from the LRU
+    head, consuming the unhashed blocks and leaving the old hashed blocks at
+    the front of the queue.
+    """
+    fillers = gpu_pool.get_new_blocks(num_filler_blocks)
+    return fillers
+
+
+def test_lazy_store_and_load_roundtrip() -> None:
+    """Lazy mode: schedule a request, finish it so its hashed blocks are freed,
+    then schedule new requests so the old blocks migrate to the LRU head.
+    The lazy scanner offloads them to CPU.  Re-scheduling the old request
+    triggers a CPU cache hit + load.
+
+    GPU pool: 8 blocks (7 usable).  _target_free = ceil(64/16) = 4.
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=8, lazy=True)
+    sched = fix.scheduler
+    gpu_pool = fix.gpu_block_pool
+
+    num_blocks = 2
+
+    # --- Step 1: Schedule req_old, compute, and finish ---
+    req_old = make_request(num_blocks=num_blocks)
+    gpu_blocks_old = _allocate_gpu_blocks(gpu_pool, req_old, num_blocks, group_id=0)
+    gpu_pool.free_blocks(gpu_blocks_old)
+
+    # Allocate filler blocks so req_old's hashed blocks move to LRU head.
+    # 7 usable - 2 (req_old freed) = 5 other free blocks to consume.
+    fillers = _flush_old_blocks_to_lru_head(gpu_pool, num_filler_blocks=5)
+
+    # --- Step 2: Lazy scanner should offload req_old's blocks ---
+    sched_out = make_scheduler_output({})
+    meta = sched.build_connector_meta(sched_out)
+    assert meta.store_event >= 0, "Expected lazy store to offload old blocks"
+    assert len(meta.store_gpu_blocks) == num_blocks
+    simulate_store_completion(sched, meta.store_event)
+
+    # Free fillers to restore pool capacity.
+    gpu_pool.free_blocks(fillers)
+
+    # --- Step 3: Re-schedule req_old — should get CPU cache hit ---
+    req_old2 = Request(
+        request_id="req-old-reload",
+        prompt_token_ids=req_old.prompt_token_ids,
+        sampling_params=req_old.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req_old._block_hasher,
+    )
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(
+        req_old2, num_computed_tokens=0
+    )
+    # make_request pads num_tokens by +1 beyond the last full block, so the
+    # manager's max_hit_len = num_tokens - 1 cap leaves all full blocks intact.
+    expected_hit = num_blocks * BLOCK_SIZE
+    assert hit_tokens == expected_hit, (
+        f"Expected {expected_hit} hit tokens, got {hit_tokens}"
+    )
+    assert is_async is True
+
+    # Allocate fresh GPU blocks for the load.
+    gpu_blocks_load = gpu_pool.get_new_blocks(num_blocks)
+    kv_blocks_load = KVCacheBlocks(blocks=(gpu_blocks_load,))
+    sched.update_state_after_alloc(
+        req_old2, kv_blocks_load, num_external_tokens=hit_tokens
+    )
+
+    sched_out2 = make_scheduler_output({req_old2.request_id: 1})
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.load_event >= 0, "Expected a load event to be assigned"
+    assert len(meta2.load_gpu_blocks) > 0
+
+
+# ---------------------------------------------------------------------------
+# Test 2a: Eager duplicate store is skipped
+# ---------------------------------------------------------------------------
+def test_eager_duplicate_store_skipped() -> None:
+    """Eager: storing the same block hashes twice should not allocate new CPU blocks."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+
+    meta1 = sched.build_connector_meta(sched_out)
+    assert meta1.store_event >= 0
+    simulate_store_completion(sched, meta1.store_event)
+    cpu_free_after_first = get_cpu_free_blocks(sched)
+
+    # Second request with identical hashes — should skip store
+    req2 = Request(
+        request_id="req-dup-eager",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    kv_blocks2 = _alloc_and_register(fix, req2, num_blocks)
+    sched.update_state_after_alloc(req2, kv_blocks2, num_external_tokens=0)
+    block_ids2 = kv_blocks2.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req2.request_id: block_ids2},
+    )
+
+    meta2 = sched.build_connector_meta(sched_out2)
+    if meta2.store_event >= 0:
+        assert len(meta2.store_cpu_blocks) == 0, (
+            "Expected no new CPU blocks for duplicate hashes"
+        )
+    assert get_cpu_free_blocks(sched) == cpu_free_after_first
+
+
+# ---------------------------------------------------------------------------
+# Test 2b: Eager dedup of in-flight stores across consecutive steps
+# ---------------------------------------------------------------------------
+def test_eager_in_flight_store_dedup_across_steps() -> None:
+    """Eager: a second request sharing a prefix with an in-flight store
+    must not re-offload the same GPU blocks before completion lands.
+
+    Simulates a GPU prefix-cache hit by reusing the first request's
+    GPU block IDs in the second scheduler step, which is the path the
+    real scheduler takes when two requests share a prefix.
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+
+    meta1 = sched.build_connector_meta(sched_out)
+    assert meta1.store_event >= 0
+    assert len(meta1.store_cpu_blocks) == num_blocks
+    # In-flight set tracks the scheduled GPU blocks until completion.
+    assert sched._in_flight_store_gpu_blocks == set(meta1.store_gpu_blocks)
+    cpu_free_after_first = get_cpu_free_blocks(sched)
+
+    # Second request shares the prefix and reuses the same GPU block IDs
+    # (the real scheduler path: GPU prefix cache returns the same blocks).
+    # Do NOT simulate completion — the first store is still in-flight.
+    req2 = Request(
+        request_id="req-dup-eager-inflight",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    req2.num_computed_tokens = num_blocks * BLOCK_SIZE
+    sched.update_state_after_alloc(req2, kv_blocks, num_external_tokens=0)
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req2.request_id: block_ids},
+    )
+
+    meta2 = sched.build_connector_meta(sched_out2)
+    if meta2.store_event >= 0:
+        assert len(meta2.store_cpu_blocks) == 0, (
+            "Expected no new CPU blocks for in-flight duplicate hashes"
+        )
+    assert get_cpu_free_blocks(sched) == cpu_free_after_first, (
+        "Second request should not consume CPU blocks while the first "
+        "store is still in-flight"
+    )
+
+    # After completion, the in-flight set is cleared.
+    simulate_store_completion(sched, meta1.store_event)
+    assert sched._in_flight_store_gpu_blocks == set()
+
+
+# ---------------------------------------------------------------------------
+# Test 2c: Lazy duplicate store is skipped
+# ---------------------------------------------------------------------------
+def test_lazy_duplicate_store_skipped() -> None:
+    """Lazy: blocks already offloaded to CPU should not be offloaded again.
+
+    Same pattern as the lazy roundtrip: flush old blocks to LRU head, offload,
+    then repeat with the same hashes and verify no new CPU allocation.
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=8, lazy=True)
+    sched = fix.scheduler
+    gpu_pool = fix.gpu_block_pool
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+
+    # Schedule + finish → hashed blocks in free queue
+    gpu_blocks = _allocate_gpu_blocks(gpu_pool, req, num_blocks, group_id=0)
+    gpu_pool.free_blocks(gpu_blocks)
+
+    # Flush old blocks to LRU head, then trigger lazy offload.
+    fillers = _flush_old_blocks_to_lru_head(gpu_pool, num_filler_blocks=5)
+    meta1 = sched.build_connector_meta(make_scheduler_output({}))
+    assert meta1.store_event >= 0
+    simulate_store_completion(sched, meta1.store_event)
+    gpu_pool.free_blocks(fillers)
+    cpu_free_after_first = get_cpu_free_blocks(sched)
+
+    # Allocate blocks with the same hashes and free them again.
+    # The scanner should see they are already in CPU cache and skip them.
+    req2 = Request(
+        request_id="req-dup-lazy",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    gpu_blocks2 = _allocate_gpu_blocks(gpu_pool, req2, num_blocks, group_id=0)
+    gpu_pool.free_blocks(gpu_blocks2)
+
+    # Flush again so the hashed blocks are at LRU head for the scanner.
+    fillers2 = _flush_old_blocks_to_lru_head(gpu_pool, num_filler_blocks=5)
+    meta2 = sched.build_connector_meta(make_scheduler_output({}))
+    gpu_pool.free_blocks(fillers2)
+
+    # Either no store event, or zero new CPU blocks (already cached).
+    if meta2.store_event >= 0:
+        assert len(meta2.store_cpu_blocks) == 0, (
+            "Expected no new CPU blocks for duplicate hashes"
+        )
+    assert get_cpu_free_blocks(sched) == cpu_free_after_first
+
+
+# ---------------------------------------------------------------------------
+# Test 3: LRU eviction order
+# ---------------------------------------------------------------------------
+def test_lru_eviction_order() -> None:
+    """With limited CPU space, oldest blocks should be evicted first.
+
+    CPU block pool: num_cpu_blocks=5 -> 4 free usable blocks (1 taken by null_block).
+    After storing 4 blocks (2 req_a + 2 req_b), all free slots are occupied by
+    cached blocks (ref_cnt=0, in hash map).  When 2 more are stored (req_c),
+    2 LRU blocks from req_a get evicted from the cache to make room.
+    """
+    # 5 total = 4 usable (null_block takes 1), filling exactly with 4 blocks
+    fix = make_scheduler(num_cpu_blocks=5, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    # Fill CPU with 4 blocks: 2 requests x 2 blocks (in LRU insertion order)
+    req_a = make_request(num_blocks=2)
+    req_b = make_request(num_blocks=2)
+
+    kv_a = _alloc_and_register(fix, req_a, 2)
+    kv_b = _alloc_and_register(fix, req_b, 2)
+    sched.update_state_after_alloc(req_a, kv_a, num_external_tokens=0)
+    sched.update_state_after_alloc(req_b, kv_b, num_external_tokens=0)
+
+    ids_a = kv_a.get_block_ids()
+    ids_b = kv_b.get_block_ids()
+    sched_out = make_scheduler_output(
+        {
+            req_a.request_id: 2 * BLOCK_SIZE,
+            req_b.request_id: 2 * BLOCK_SIZE,
+        },
+        new_reqs={
+            req_a.request_id: ids_a,
+            req_b.request_id: ids_b,
+        },
+    )
+    meta = sched.build_connector_meta(sched_out)
+    assert meta.store_event >= 0
+    simulate_store_completion(sched, meta.store_event)
+
+    # Verify all 4 blocks are cached in CPU hash map
+    for i, bhash in enumerate(req_a.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        assert (
+            sched.cpu_block_pool.cached_block_hash_to_block.get_one_block(
+                bhash_with_group
+            )
+            is not None
+        ), f"req_a block {i} should be cached after store"
+    for i, bhash in enumerate(req_b.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        assert (
+            sched.cpu_block_pool.cached_block_hash_to_block.get_one_block(
+                bhash_with_group
+            )
+            is not None
+        ), f"req_b block {i} should be cached after store"
+
+    # Store 2 more blocks from a new request - must evict 2 LRU blocks (req_a)
+    req_c = make_request(num_blocks=2)
+    kv_c = _alloc_and_register(fix, req_c, 2)
+    sched.update_state_after_alloc(req_c, kv_c, num_external_tokens=0)
+
+    ids_c = kv_c.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req_c.request_id: 2 * BLOCK_SIZE},
+        new_reqs={req_c.request_id: ids_c},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.store_event >= 0
+    simulate_store_completion(sched, meta2.store_event)
+
+    # req_a hashes should be evicted from CPU (they were LRU)
+    for i, bhash in enumerate(req_a.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cache_map = sched.cpu_block_pool.cached_block_hash_to_block
+        cached = cache_map.get_one_block(bhash_with_group)
+        assert cached is None, f"req_a block {i} should have been evicted"
+
+    # req_b and req_c hashes should be present
+    for i, bhash in enumerate(req_b.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cache_map = sched.cpu_block_pool.cached_block_hash_to_block
+        cached = cache_map.get_one_block(bhash_with_group)
+        assert cached is not None, f"req_b block {i} should still be cached"
+
+    for i, bhash in enumerate(req_c.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cache_map = sched.cpu_block_pool.cached_block_hash_to_block
+        cached = cache_map.get_one_block(bhash_with_group)
+        assert cached is not None, f"req_c block {i} should still be cached"
+
+
+# ---------------------------------------------------------------------------
+# Test 4: Touched blocks survive eviction
+# ---------------------------------------------------------------------------
+def test_touched_blocks_survive_eviction() -> None:
+    """Touching CPU blocks updates their LRU position, protecting them from eviction."""
+    # 5 total = 4 usable (null_block takes 1)
+    fix = make_scheduler(num_cpu_blocks=5, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    # Fill CPU with 4 blocks (req_a: 2, req_b: 2) in LRU order
+    req_a = make_request(num_blocks=2)
+    req_b = make_request(num_blocks=2)
+
+    kv_a = _alloc_and_register(fix, req_a, 2)
+    kv_b = _alloc_and_register(fix, req_b, 2)
+    sched.update_state_after_alloc(req_a, kv_a, num_external_tokens=0)
+    sched.update_state_after_alloc(req_b, kv_b, num_external_tokens=0)
+
+    ids_a = kv_a.get_block_ids()
+    ids_b = kv_b.get_block_ids()
+    sched_out = make_scheduler_output(
+        {
+            req_a.request_id: 2 * BLOCK_SIZE,
+            req_b.request_id: 2 * BLOCK_SIZE,
+        },
+        new_reqs={
+            req_a.request_id: ids_a,
+            req_b.request_id: ids_b,
+        },
+    )
+    meta = sched.build_connector_meta(sched_out)
+    simulate_store_completion(sched, meta.store_event)
+
+    # Touch req_a's CPU blocks to make them most-recently-used
+    cpu_pool = sched.cpu_block_pool
+    for bhash in req_a.block_hashes[:2]:
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cached_blk = cpu_pool.cached_block_hash_to_block.get_one_block(bhash_with_group)
+        assert cached_blk is not None
+        cpu_pool.touch([cached_blk])
+        # Undo touch to return ref_cnt to 0
+        # (so it's a free candidate but at MRU position)
+        cpu_pool.free_blocks([cached_blk])
+
+    # Now store 2 more blocks; req_b (LRU front) should be evicted, not req_a
+    req_c = make_request(num_blocks=2)
+    kv_c = _alloc_and_register(fix, req_c, 2)
+    sched.update_state_after_alloc(req_c, kv_c, num_external_tokens=0)
+
+    ids_c = kv_c.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req_c.request_id: 2 * BLOCK_SIZE},
+        new_reqs={req_c.request_id: ids_c},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    simulate_store_completion(sched, meta2.store_event)
+
+    # req_b should be evicted (LRU), req_a and req_c should survive
+    for i, bhash in enumerate(req_b.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cached = cpu_pool.cached_block_hash_to_block.get_one_block(bhash_with_group)
+        assert cached is None, f"req_b block {i} should have been evicted (it was LRU)"
+
+    for i, bhash in enumerate(req_a.block_hashes[:2]):
+        bhash_with_group = make_block_hash_with_group_id(bhash, 0)
+        cached = cpu_pool.cached_block_hash_to_block.get_one_block(bhash_with_group)
+        assert cached is not None, f"req_a block {i} should survive (was touched/MRU)"
+
+
+# ---------------------------------------------------------------------------
+# Test 5: Preemption no CPU block leak
+# ---------------------------------------------------------------------------
+def test_preemption_no_cpu_block_leak() -> None:
+    """request_finished during in-flight load defers cleanup;
+    completes after load done."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+
+    # First: store blocks to CPU
+    req = make_request(num_blocks=num_blocks)
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+    meta = sched.build_connector_meta(sched_out)
+    simulate_store_completion(sched, meta.store_event)
+
+    # Create new request with same tokens, check hit
+    req2 = Request(
+        request_id="req-preempt-load",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(req2, num_computed_tokens=0)
+    assert hit_tokens > 0
+
+    gpu_blocks2 = fix.gpu_block_pool.get_new_blocks(num_blocks)
+    kv_blocks2 = KVCacheBlocks(blocks=(gpu_blocks2,))
+    sched.update_state_after_alloc(req2, kv_blocks2, num_external_tokens=hit_tokens)
+
+    # Assign load_event via build_connector_meta
+    block_ids2 = kv_blocks2.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: 1},
+        new_reqs={req2.request_id: block_ids2},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.load_event >= 0
+
+    # Request finishes BEFORE load completes -> deferred
+    sched.request_finished(req2, block_ids=[])
+    assert req2.request_id in sched._reqs_to_load
+    assert sched._reqs_to_load[req2.request_id].finished is True
+
+    # Now simulate load completion -> cleanup fires
+    simulate_load_completion(sched, {req2.request_id})
+    assert req2.request_id not in sched._reqs_to_load
+
+
+# ---------------------------------------------------------------------------
+# Test 6: Eager store preemption cleanup
+# ---------------------------------------------------------------------------
+def test_eager_store_preemption_cleanup() -> None:
+    """In eager mode, finishing a request during in-flight store defers cleanup."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+    meta = sched.build_connector_meta(sched_out)
+    store_event = meta.store_event
+    assert store_event >= 0
+
+    # The request gets store_events populated
+    assert req.request_id in sched._reqs_to_store
+    store_state = sched._reqs_to_store[req.request_id]
+    assert store_event in store_state.store_events
+
+    # Finish request while store still in-flight -> deferred
+    sched.request_finished(req, block_ids=[])
+    assert req.request_id in sched._reqs_to_store
+    assert sched._reqs_to_store[req.request_id].finished is True
+
+    # Simulate store completion -> deferred cleanup fires
+    simulate_store_completion(sched, store_event)
+    assert req.request_id not in sched._reqs_to_store
+
+
+# ---------------------------------------------------------------------------
+# Test 7: In-flight finish deferred cleanup (load variant)
+# ---------------------------------------------------------------------------
+def test_inflight_finish_deferred_cleanup() -> None:
+    """Store, then start a load, request_finished defers,
+    load completion fires cleanup."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 2
+
+    # Store
+    req = make_request(num_blocks=num_blocks)
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+    meta = sched.build_connector_meta(sched_out)
+    simulate_store_completion(sched, meta.store_event)
+
+    # Load
+    req2 = Request(
+        request_id="req-inflight-load",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    hit_tokens, _ = sched.get_num_new_matched_tokens(req2, num_computed_tokens=0)
+    assert hit_tokens > 0
+
+    gpu_blocks2 = fix.gpu_block_pool.get_new_blocks(num_blocks)
+    kv_blocks2 = KVCacheBlocks(blocks=(gpu_blocks2,))
+    sched.update_state_after_alloc(req2, kv_blocks2, num_external_tokens=hit_tokens)
+
+    block_ids2 = kv_blocks2.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: 1},
+        new_reqs={req2.request_id: block_ids2},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.load_event >= 0
+
+    # Finish before load completes
+    sched.request_finished(req2, block_ids=[])
+    assert req2.request_id in sched._reqs_to_load
+
+    # Simulate load completion -> request removed
+    simulate_load_completion(sched, {req2.request_id})
+    assert req2.request_id not in sched._reqs_to_load
+
+
+# ---------------------------------------------------------------------------
+# Test 8: Null GPU blocks are skipped in store and load transfer pairs
+# ---------------------------------------------------------------------------
+def test_multi_group_null_blocks_skipped() -> None:
+    """Null GPU blocks (no block_hash) must not appear in store or load pairs.
+
+    In eager store mode, _prepare_eager_store_specs skips blocks whose
+    block_hash is None (null blocks have no hash). We verify this by mixing
+    real hashed blocks with unhashed (null-like) blocks in a single group and
+    checking that only real blocks appear in the store list.
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, num_groups=1, lazy=False)
+    sched = fix.scheduler
+    gpu_pool = fix.gpu_block_pool
+
+    num_blocks = 2
+    req = make_request(num_blocks=num_blocks)
+
+    # Allocate real blocks (with hashes) and use the null_block as a placeholder
+    gpu_blocks = _allocate_gpu_blocks(gpu_pool, req, num_blocks, group_id=0)
+    null_block = gpu_pool.null_block
+
+    # Mix: [real_block, null_block] — null_block has no hash, should be skipped
+    mixed_blocks = [gpu_blocks[0], null_block]
+    kv_blocks = KVCacheBlocks(blocks=(mixed_blocks,))
+    req.num_computed_tokens = num_blocks * BLOCK_SIZE
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+    meta = sched.build_connector_meta(sched_out)
+
+    # Null block's ID should NOT appear in store_gpu_blocks
+    null_block_id = null_block.block_id
+    assert null_block_id not in meta.store_gpu_blocks, (
+        f"Null block id {null_block_id} should not appear in store transfer pairs"
+    )
+
+    # Only real block should be scheduled for store
+    assert len(meta.store_gpu_blocks) == 1
+    assert gpu_blocks[0].block_id in meta.store_gpu_blocks
+
+    # Complete the store
+    assert meta.store_event >= 0
+    simulate_store_completion(sched, meta.store_event)
+
+    # Create matching request and get load hit
+    req2 = Request(
+        request_id="req-null-load",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(req2, num_computed_tokens=0)
+    # Only 1 block was stored (the real one)
+    assert hit_tokens == BLOCK_SIZE
+    assert is_async is True
+
+    # Allocate new GPU blocks for the load
+    gpu_blocks2 = gpu_pool.get_new_blocks(1)
+    kv_blocks2 = KVCacheBlocks(blocks=([gpu_blocks2[0], null_block],))
+    sched.update_state_after_alloc(req2, kv_blocks2, num_external_tokens=hit_tokens)
+
+    sched_out2 = make_scheduler_output({req2.request_id: 1})
+    meta2 = sched.build_connector_meta(sched_out2)
+
+    # Null block's ID should NOT appear in load_gpu_blocks
+    assert null_block_id not in meta2.load_gpu_blocks, (
+        f"Null block id {null_block_id} should not appear in load transfer pairs"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 9: Chunked prefill accumulates block_ids across steps
+# ---------------------------------------------------------------------------
+def test_chunked_prefill_reads_live_block_ids() -> None:
+    """With chunked prefill, block IDs accumulate across scheduler steps.
+    _prepare_eager_store_specs reads block IDs from scheduler_output via
+    yield_req_data, so the store should reflect the updated (larger) block
+    list, not a stale snapshot."""
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    num_blocks = 4
+    req = make_request(num_blocks=num_blocks)
+
+    # First chunk: allocate 2 blocks
+    kv_blocks_first = _alloc_and_register(fix, req, 2)
+    sched.update_state_after_alloc(req, kv_blocks_first, num_external_tokens=0)
+
+    assert req.request_id in sched._reqs_to_store
+    # Should still be exactly 1 entry in _reqs_to_store
+    assert list(sched._reqs_to_store.keys()).count(req.request_id) == 1
+
+    # Build connector meta with 2 blocks — stores the first 2
+    ids_first = kv_blocks_first.get_block_ids()
+    sched_out1 = make_scheduler_output(
+        {req.request_id: 2 * BLOCK_SIZE},
+        new_reqs={req.request_id: ids_first},
+    )
+    meta1 = sched.build_connector_meta(sched_out1)
+    assert meta1.store_event >= 0
+    assert len(meta1.store_gpu_blocks) == 2
+    simulate_store_completion(sched, meta1.store_event)
+
+    # Second chunk: allocate 4 blocks total (2 new ones)
+    kv_blocks_second = _alloc_and_register(fix, req, num_blocks)
+    # update_state_after_alloc is idempotent for store registration
+    sched.update_state_after_alloc(req, kv_blocks_second, num_external_tokens=0)
+
+    # Still exactly 1 entry
+    assert list(sched._reqs_to_store.keys()).count(req.request_id) == 1
+
+    # The second chunk's NEW block IDs (positions 2,3) are passed as
+    # cached_req_new_blocks. The full block_ids include both old and new,
+    # but yield_req_data only appends the new_block_ids for cached reqs.
+    ids_second_full = kv_blocks_second.get_block_ids()
+    # New blocks are those beyond the first chunk
+    new_block_ids = tuple(ids_second_full[g][2:] for g in range(len(ids_second_full)))
+    sched_out2 = make_scheduler_output(
+        {req.request_id: 2 * BLOCK_SIZE},
+        cached_req_new_blocks={req.request_id: new_block_ids},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.store_event >= 0
+    # Only the 2 NEW blocks should be stored (first 2 already done)
+    assert len(meta2.store_gpu_blocks) == 2
+
+
+# ---------------------------------------------------------------------------
+# Test 10: Partial GPU prefix hit + CPU load + new compute blocks
+# ---------------------------------------------------------------------------
+def test_partial_gpu_prefix_plus_cpu_load() -> None:
+    """When GPU has a prefix cache hit for the first N blocks, CPU has a
+    hit for the next M blocks, and there are P new blocks needing fresh
+    compute, the block layout is:
+
+        | comp (N) | ext_comp (M) | new (P) |
+
+    External blocks sit in the middle — not at the beginning or end.
+    The load path must target hashes at positions [N, N+M).
+
+    Request: 6 blocks (0..5).
+    - Store all 6 to CPU.
+    - New request: GPU prefix cache hits blocks 0,1 (hashed).
+      CPU hits blocks 2,3. Blocks 4,5 are new (need compute).
+    - update_state_after_alloc receives 6 GPU blocks:
+      [0,1] hashed (comp), [2,3] unhashed (ext_comp), [4,5] unhashed (new).
+    - Load must target hash positions 2,3.
+    """
+    fix = make_scheduler(num_cpu_blocks=8, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+    gpu_pool = fix.gpu_block_pool
+
+    num_blocks = 6
+    req = make_request(num_blocks=num_blocks)
+
+    # Store all 6 blocks to CPU via eager store.
+    kv_blocks = _alloc_and_register(fix, req, num_blocks)
+    sched.update_state_after_alloc(req, kv_blocks, num_external_tokens=0)
+    block_ids = kv_blocks.get_block_ids()
+    sched_out = make_scheduler_output(
+        {req.request_id: num_blocks * BLOCK_SIZE},
+        new_reqs={req.request_id: block_ids},
+    )
+    meta = sched.build_connector_meta(sched_out)
+    assert meta.store_event >= 0
+    simulate_store_completion(sched, meta.store_event)
+
+    # New request with same tokens — but only partial GPU prefix hit.
+    req2 = Request(
+        request_id="req-partial-gpu",
+        prompt_token_ids=req.prompt_token_ids,
+        sampling_params=req.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req._block_hasher,
+    )
+
+    # GPU prefix cache hits the first 2 blocks.
+    gpu_local_computed = 2 * BLOCK_SIZE
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(
+        req2, num_computed_tokens=gpu_local_computed
+    )
+    # CPU has all 6 blocks stored. make_request pads num_tokens by +1, so
+    # the manager's num_tokens - 1 cap leaves all full blocks intact:
+    # remaining hashable range = 6 - 2 = 4 blocks, all hit.
+    num_cpu_hit_blocks = 4
+    assert hit_tokens == num_cpu_hit_blocks * BLOCK_SIZE, (
+        f"Expected {num_cpu_hit_blocks * BLOCK_SIZE} CPU hit tokens, got {hit_tokens}"
+    )
+    assert is_async is True
+
+    # Simulate what the real scheduler does: only accept 2 of the 4 CPU hit
+    # blocks as external (e.g. due to budget constraints), leaving 2 new
+    # blocks for fresh compute.
+    num_ext_blocks = 2
+    num_new_blocks = 2
+    external_tokens = num_ext_blocks * BLOCK_SIZE
+
+    # Build block list matching real layout: | comp(2) | ext_comp(2) | new(2) |
+    # comp: GPU prefix cache hit — blocks with hashes
+    gpu_comp = _allocate_gpu_blocks(gpu_pool, req2, 2, group_id=0)
+    # ext_comp + new: freshly allocated, no hashes
+    gpu_ext_and_new = gpu_pool.get_new_blocks(num_ext_blocks + num_new_blocks)
+    all_gpu_blocks = gpu_comp + gpu_ext_and_new
+    kv_blocks2 = KVCacheBlocks(blocks=(all_gpu_blocks,))
+
+    # Critical call: with 2 hashed comp blocks and 2 external tokens worth
+    # of blocks, the manager must derive skipped=2 and load hashes [2,3].
+    sched.update_state_after_alloc(
+        req2, kv_blocks2, num_external_tokens=external_tokens
+    )
+
+    block_ids2 = kv_blocks2.get_block_ids()
+    sched_out2 = make_scheduler_output(
+        {req2.request_id: num_new_blocks * BLOCK_SIZE},
+        new_reqs={req2.request_id: block_ids2},
+    )
+    meta2 = sched.build_connector_meta(sched_out2)
+    assert meta2.load_event >= 0, "Expected a load event for partial GPU + CPU hit"
+    assert len(meta2.load_gpu_blocks) == num_ext_blocks
+    assert len(meta2.load_cpu_blocks) == num_ext_blocks
+
+    # Verify the load targets the ext_comp GPU blocks (positions 2,3),
+    # not the comp blocks (0,1) or new blocks (4,5).
+    ext_block_ids = [b.block_id for b in gpu_ext_and_new[:num_ext_blocks]]
+    for bid in meta2.load_gpu_blocks:
+        assert bid in ext_block_ids, (
+            f"Load GPU block {bid} should be an ext_comp block, not a comp or new block"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Test 11: TOCTOU between Phase A and Phase B (regression for #39702)
+# ---------------------------------------------------------------------------
+def test_toctou_cpu_hit_evicted_between_phases_no_crash() -> None:
+    """Regression for vllm-project/vllm#39702.
+
+    When ``get_num_new_matched_tokens`` (Phase A) reports a CPU cache hit
+    of ``N`` tokens but ``update_state_after_alloc`` (Phase B) runs after
+    other requests have caused LRU eviction of those exact blocks, the
+    second ``find_longest_cache_hit`` call returns 0 while
+    ``num_external_tokens`` is still ``N``, triggering
+    ``AssertionError: Expected N hit tokens, got 0``.
+
+    Setup: ``num_cpu_blocks=5`` (4 usable; null_block takes 1).
+        1. Store req_a's 2 blocks to CPU. CPU: [a0, a1, _, _].
+        2. Phase A on req_b (same prompt as req_a) reports a 2-block hit.
+           Without the fix this does NOT pin a0/a1 — they remain at LRU front.
+        3. Store req_c (2) + req_d (2) — 4 more blocks into 4 slots. With
+           a0/a1 unpinned and at LRU front, they get evicted. CPU: [c0, c1,
+           d0, d1].
+        4. Phase B on req_b: re-searches the CPU coordinator, finds 0
+           cached hashes, asserts.
+
+    After the fix, Phase A pins the hit blocks so step 3 evicts req_c's
+    blocks instead, and Phase B reads the cached ``(cpu_hit_blocks,
+    hit_length)`` tuple without re-searching.
+    """
+    # 5 total = 4 usable (null_block takes 1)
+    fix = make_scheduler(num_cpu_blocks=5, num_gpu_blocks=16, lazy=False)
+    sched = fix.scheduler
+
+    # --- Step 1: Store req_a's 2 blocks to CPU cache ---
+    req_a = make_request(num_blocks=2)
+    kv_a = _alloc_and_register(fix, req_a, 2)
+    sched.update_state_after_alloc(req_a, kv_a, num_external_tokens=0)
+    sched_out_a = make_scheduler_output(
+        {req_a.request_id: 2 * BLOCK_SIZE},
+        new_reqs={req_a.request_id: kv_a.get_block_ids()},
+    )
+    meta_a = sched.build_connector_meta(sched_out_a)
+    assert meta_a.store_event >= 0
+    simulate_store_completion(sched, meta_a.store_event)
+
+    # --- Step 2: Phase A — req_b (same prompt as req_a) reports a CPU hit ---
+    req_b = Request(
+        request_id="req-b-toctou",
+        prompt_token_ids=req_a.prompt_token_ids,
+        sampling_params=req_a.sampling_params,
+        pooling_params=None,
+        mm_features=None,
+        block_hasher=req_a._block_hasher,
+    )
+    hit_tokens, is_async = sched.get_num_new_matched_tokens(
+        req_b, num_computed_tokens=0
+    )
+    assert hit_tokens == 2 * BLOCK_SIZE, (
+        f"Phase A should report 2 blocks of CPU hit, got {hit_tokens}"
+    )
+    assert is_async is True
+
+    # --- Step 3: TOCTOU window — fill CPU cache so LRU evicts req_a's blocks
+    # (in production this corresponds to other concurrent requests landing
+    # between Phase A and Phase B for req_b). 4 usable slots, req_a occupies 2;
+    # req_c (2) + req_d (2) require evicting 2 LRU blocks. ---
+    req_c = make_request(num_blocks=2)
+    req_d = make_request(num_blocks=2)
+    kv_c = _alloc_and_register(fix, req_c, 2)
+    kv_d = _alloc_and_register(fix, req_d, 2)
+    sched.update_state_after_alloc(req_c, kv_c, num_external_tokens=0)
+    sched.update_state_after_alloc(req_d, kv_d, num_external_tokens=0)
+    sched_out_pressure = make_scheduler_output(
+        {
+            req_c.request_id: 2 * BLOCK_SIZE,
+            req_d.request_id: 2 * BLOCK_SIZE,
+        },
+        new_reqs={
+            req_c.request_id: kv_c.get_block_ids(),
+            req_d.request_id: kv_d.get_block_ids(),
+        },
+    )
+    meta_pressure = sched.build_connector_meta(sched_out_pressure)
+    assert meta_pressure.store_event >= 0
+    simulate_store_completion(sched, meta_pressure.store_event)
+
+    # --- Step 4: Phase B — must not crash ---
+    # Before fix: AssertionError: Expected 32 hit tokens, got 0
+    # After fix: Phase A pinned the hits → Step 3 evicted req_c instead,
+    # Phase B consumes the cached (cpu_hit_blocks, hit_length) tuple.
+    gpu_blocks_b = fix.gpu_block_pool.get_new_blocks(2)
+    kv_blocks_b = KVCacheBlocks(blocks=(gpu_blocks_b,))
+    sched.update_state_after_alloc(req_b, kv_blocks_b, num_external_tokens=hit_tokens)
+
+    # --- Step 5: with the fix, the load is queued correctly ---
+    sched_out_b = make_scheduler_output(
+        {req_b.request_id: 1},
+        new_reqs={req_b.request_id: kv_blocks_b.get_block_ids()},
+    )
+    meta_b = sched.build_connector_meta(sched_out_b)
+    assert meta_b.load_event >= 0, (
+        "Phase B should queue a load event using the pinned CPU hit blocks"
+    )
+    assert len(meta_b.load_gpu_blocks) == 2
+    assert len(meta_b.load_cpu_blocks) == 2
diff --git a/tests/v1/spec_decode/test_acceptance_length.py b/tests/v1/spec_decode/test_acceptance_length.py
index aa8e40a2de5e..62ff100fdbf8 100644
--- a/tests/v1/spec_decode/test_acceptance_length.py
+++ b/tests/v1/spec_decode/test_acceptance_length.py
@@ -43,7 +43,8 @@ class Eagle3ModelConfig:
 
 # Model configurations for EAGLE3 acceptance length tests.
 # Expected acceptance lengths are determined by running baseline benchmarks
-# using examples/offline_inference/spec_decode.py with the MT-Bench dataset.
+# using examples/features/speculative_decoding/spec_decode_offline.py
+# with the MT-Bench dataset.
 EAGLE3_MODEL_CONFIGS = [
     Eagle3ModelConfig(
         verifier="meta-llama/Llama-3.1-8B-Instruct",
@@ -165,6 +166,7 @@ def get_mt_bench_prompts(
         no_stream=True,
         disable_shuffle=False,
         skip_chat_template=False,
+        trust_remote_code=False,
     )
     samples = get_samples(args, tokenizer)
     prompt_ids = [
@@ -210,8 +212,8 @@ def extract_acceptance_metrics(metrics, num_spec_tokens: int) -> dict:
 
 @large_gpu_mark(min_gb=40)
 @pytest.mark.skipif(
-    not current_platform.is_cuda(),
-    reason="This test is only supported on CUDA platform.",
+    not current_platform.is_cuda_alike(),
+    reason="This test is only supported on CUDA-alike platforms.",
 )
 @pytest.mark.parametrize(
     "model_config",
diff --git a/tests/v1/spec_decode/test_backup_token_async_spec.py b/tests/v1/spec_decode/test_backup_token_async_spec.py
new file mode 100644
index 000000000000..9340503ea4f2
--- /dev/null
+++ b/tests/v1/spec_decode/test_backup_token_async_spec.py
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Regression tests for the backup token fix in prepare_next_token_ids_padded.
+
+Fixes #38098: with async scheduling, seq_lens_cpu is inflated by unaccepted
+draft token placeholders, causing get_token_id() to return -1.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+import torch
+
+
+class _FakeRequest:
+    def __init__(self, prompt_tokens: list[int], output_tokens: list[int]):
+        self.num_prompt_tokens = len(prompt_tokens)
+        self._prompt = prompt_tokens
+        self._output = output_tokens
+
+    @property
+    def num_tokens(self) -> int:
+        return self.num_prompt_tokens + len(self._output)
+
+    def get_token_id(self, idx: int) -> int:
+        if idx < self.num_prompt_tokens:
+            return self._prompt[idx]
+        out_idx = idx - self.num_prompt_tokens
+        if out_idx < len(self._output):
+            return self._output[out_idx]
+        return -1  # out of range
+
+
+class _FakeInputBatch:
+    def __init__(
+        self,
+        req_ids: list[str],
+        num_tokens_no_spec: list[int],
+        vocab_size: int = 32000,
+    ):
+        self.req_ids = req_ids
+        self.num_reqs = len(req_ids)
+        self.vocab_size = vocab_size
+        self.num_tokens_no_spec = np.array(num_tokens_no_spec, dtype=np.int64)
+
+
+def _make_requests(
+    req_ids: list[str],
+    prompt_lens: list[int],
+    output_lens: list[int],
+) -> dict[str, _FakeRequest]:
+    requests = {}
+    for rid, plen, olen in zip(req_ids, prompt_lens, output_lens):
+        requests[rid] = _FakeRequest(list(range(plen)), list(range(1000, 1000 + olen)))
+    return requests
+
+
+def _backup_buggy(
+    seq_lens_cpu: torch.Tensor,
+    requests: dict[str, _FakeRequest],
+    batch: _FakeInputBatch,
+) -> list[int]:
+    """Old logic: uses seq_lens_cpu directly (may be inflated)."""
+    n = batch.num_reqs
+    return [
+        requests[batch.req_ids[i]].get_token_id(int(seq_lens_cpu[i])) for i in range(n)
+    ]
+
+
+def _backup_fixed(
+    requests: dict[str, _FakeRequest],
+    batch: _FakeInputBatch,
+) -> list[int]:
+    """New logic: uses num_tokens_no_spec - 1 (last committed token)."""
+    n = batch.num_reqs
+    idx = (batch.num_tokens_no_spec[:n] - 1).tolist()
+    return [requests[batch.req_ids[i]].get_token_id(int(idx[i])) for i in range(n)]
+
+
+class TestBackupTokenAsyncSpec:
+    def test_no_inflation_fixed_returns_last_token(self):
+        req_ids = ["r0", "r1"]
+        requests = _make_requests(req_ids, [3, 3], [2, 2])
+        batch = _FakeInputBatch(req_ids, [5, 5])
+        # idx = 5-1 = 4 → output[1] = 1001
+        assert _backup_fixed(requests, batch) == [1001, 1001]
+
+    def test_inflation_buggy_returns_placeholder(self):
+        req_ids = ["r0", "r1"]
+        requests = _make_requests(req_ids, [3, 3], [2, 2])
+        batch = _FakeInputBatch(req_ids, [5, 5])
+        # inflated by 3 spec tokens → idx 8 is out of range
+        seq_lens = torch.tensor([8, 8], dtype=torch.int64)
+        assert _backup_buggy(seq_lens, requests, batch) == [-1, -1]
+
+    def test_inflation_fixed_returns_correct_token(self):
+        req_ids = ["r0", "r1"]
+        requests = _make_requests(req_ids, [3, 3], [2, 2])
+        batch = _FakeInputBatch(req_ids, [5, 5])
+        assert _backup_fixed(requests, batch) == [1001, 1001]
+
+    def test_mixed_inflation_per_request(self):
+        req_ids = ["r0", "r1", "r2"]
+        requests = {
+            "r0": _FakeRequest([0, 1], [1000, 1001, 1002]),
+            "r1": _FakeRequest([0, 1, 2, 3], [2000]),
+            "r2": _FakeRequest([0], [3000, 3001, 3002, 3003]),
+        }
+        batch = _FakeInputBatch(req_ids, [5, 5, 5])
+        seq_lens = torch.tensor([7, 9, 5], dtype=torch.int64)
+
+        assert _backup_buggy(seq_lens, requests, batch) == [-1, -1, -1]
+        assert _backup_fixed(requests, batch) == [1002, 2000, 3003]
+
+    def test_prefill_only_request(self):
+        """No output tokens yet — backup should be the last prompt token."""
+        req_ids = ["r0"]
+        requests = {"r0": _FakeRequest([10, 20, 30], [])}
+        batch = _FakeInputBatch(req_ids, [3])
+        # idx = 3-1 = 2 → prompt[2] = 30
+        assert _backup_fixed(requests, batch) == [30]
+
+    @pytest.mark.parametrize("num_spec_tokens", [1, 2, 3, 4, 5])
+    def test_various_spec_token_counts(self, num_spec_tokens: int):
+        req_ids = ["r0"]
+        requests = {"r0": _FakeRequest([0, 1, 2], list(range(1000, 1005)))}
+        batch = _FakeInputBatch(req_ids, [8])
+        # idx = 8-1 = 7 → output[4] = 1004
+        assert _backup_fixed(requests, batch) == [1004]
+
+    def test_buggy_code_was_always_off_by_one(self):
+        """The original code used seq_len as index, which is always one past
+        the end of output_token_ids even without async inflation."""
+        req_ids = ["r0"]
+        requests = {"r0": _FakeRequest([0, 1, 2], [1000, 1001])}
+        batch = _FakeInputBatch(req_ids, [5])
+
+        # no inflation: seq_len == num_tokens == 5 → idx 5 is out of range
+        seq_lens = torch.tensor([5], dtype=torch.int64)
+        assert _backup_buggy(seq_lens, requests, batch) == [-1]
+        assert _backup_fixed(requests, batch) == [1001]
+
+        # with inflation: still -1, fixed still correct
+        seq_lens_inf = torch.tensor([8], dtype=torch.int64)
+        assert _backup_buggy(seq_lens_inf, requests, batch) == [-1]
+        assert _backup_fixed(requests, batch) == [1001]
diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py
index fb4ea1bcecbd..c13de6d4f71f 100644
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -3,6 +3,7 @@
 
 from unittest import mock
 
+import numpy as np
 import pytest
 import torch
 
@@ -27,6 +28,7 @@
 from vllm.model_executor.models.llama import LlamaForCausalLM
 from vllm.platforms import current_platform
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
+from vllm.v1.spec_decode.dflash import DFlashProposer
 from vllm.v1.spec_decode.draft_model import DraftModelProposer
 from vllm.v1.spec_decode.eagle import EagleProposer
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
@@ -36,33 +38,43 @@
 eagle_dir = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B"
 eagle3_dir = "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
 ar_draft_model_dir = "amd/PARD-Llama-3.2-1B"  # Compatible with parallel and AR drafting
+dflash_target_dir = "Qwen/Qwen3-8B"
+dflash_dir = "z-lab/Qwen3-8B-DFlash-b16"
 
 BLOCK_SIZE = 16
+DEVICE_TYPE = current_platform.device_type
 
 
 def _create_proposer(
     method: str,
     num_speculative_tokens: int,
     attention_backend: str | None = None,
-    speculative_token_tree: list[tuple[int, ...]] | None = None,
     parallel_drafting: bool = False,
+    rejection_sample_method: str = "standard",
+    draft_sample_method: str = "greedy",
 ) -> EagleProposer:
-    model_config = ModelConfig(model=model_dir, runner="generate", max_model_len=100)
-
     # Method-dependent setup
     if method == "eagle":
+        target_model_dir = model_dir
         draft_model_dir = eagle_dir
     elif method == "eagle3":
+        target_model_dir = model_dir
         draft_model_dir = eagle3_dir
     elif method == "draft_model":
+        target_model_dir = model_dir
         draft_model_dir = ar_draft_model_dir
+    elif method == "dflash":
+        target_model_dir = dflash_target_dir
+        draft_model_dir = dflash_dir
     else:
         raise ValueError(f"Unknown method: {method}")
 
-    spec_token_tree_str = None
-    if speculative_token_tree is not None:
-        assert num_speculative_tokens == len(speculative_token_tree)
-        spec_token_tree_str = str(speculative_token_tree)
+    model_config = ModelConfig(
+        model=target_model_dir,
+        runner="generate",
+        max_model_len=100,
+        trust_remote_code=(method == "dflash"),
+    )
 
     speculative_config = SpeculativeConfig(
         target_model_config=model_config,
@@ -70,14 +82,15 @@ def _create_proposer(
         model=draft_model_dir,
         method=method,
         num_speculative_tokens=num_speculative_tokens,
-        speculative_token_tree=spec_token_tree_str,
         parallel_drafting=parallel_drafting,
+        rejection_sample_method=rejection_sample_method,
+        draft_sample_method=draft_sample_method,
     )
     if parallel_drafting:
         # Overwrite pard_token to avoid crash during init
         speculative_config.draft_model_config.hf_config.pard_token = 0
 
-    device = current_platform.device_type
+    device = DEVICE_TYPE
     vllm_config = VllmConfig(
         model_config=model_config,
         cache_config=CacheConfig(block_size=16),
@@ -92,7 +105,9 @@ def _create_proposer(
         attention_config=AttentionConfig(backend=attention_backend),
     )
 
-    if "eagle" in method:
+    if method == "dflash":
+        proposer = DFlashProposer(vllm_config=vllm_config, device=device)
+    elif "eagle" in method:
         proposer = EagleProposer(vllm_config=vllm_config, device=device)
     else:
         proposer = DraftModelProposer(vllm_config=vllm_config, device=device)
@@ -107,20 +122,18 @@ def test_prepare_next_token_ids():
     either the GPU tensor of sampled_token_ids with -1 for rejected tokens,
     or the CPU python list[list[int]] with the rejected tokens removed.
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     num_requests = 4
     num_speculative_tokens = 4
-    batch_spec = BatchSpec(
-        seq_lens=[num_speculative_tokens + 1] * num_requests,
-        query_lens=[num_speculative_tokens + 1] * num_requests,
-    )
-
     req_ids = [f"req_{i + 1}" for i in range(num_requests)]
     mock_input_batch = mock.MagicMock(spec=InputBatch)
     mock_input_batch.req_ids = req_ids
     mock_input_batch.num_reqs = num_requests
     mock_input_batch.vocab_size = 100
+    mock_input_batch.num_tokens_no_spec = np.array(
+        [num_speculative_tokens + 1] * num_requests
+    )
 
     mock_num_scheduled_tokens = {req_id: 0 for req_id in req_ids}
     mock_requests = {}
@@ -165,19 +178,12 @@ def test_prepare_next_token_ids():
 
     assert torch.equal(next_token_ids_from_cpu, expected_next_token_ids_tensor)
 
-    common_attn_metadata = create_common_attn_metadata(
-        batch_spec,
-        block_size=BLOCK_SIZE,
-        device=device,
-    )
-
     expected_valid_sampled_tokens_count = torch.tensor(
         [2, 5, 0, 0], dtype=torch.int32, device=device
     )
 
     next_token_ids_from_padded, valid_sampled_tokens_count = (
         proposer.prepare_next_token_ids_padded(
-            common_attn_metadata.seq_lens_cpu,
             sampled_token_ids_tensor,
             mock_requests,
             mock_input_batch,
@@ -199,7 +205,7 @@ def test_prepare_inputs():
                     a, a + 1, ..., a + b - n2 - 1,
                     a + b, a + b + 1, ..., a + b + c - n3 - 1]
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     # q1 = 4, q2 = 7, q3 = 5
     # n1 = 1, n2 = 3, n3 = 2
@@ -292,7 +298,7 @@ def test_prepare_inputs_padded():
             from the original indices to sample from.
     """
 
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     expected_token_indices_to_sample = torch.tensor(
         [1, 5, 6], dtype=torch.int32, device=device
@@ -362,7 +368,7 @@ def test_set_inputs_first_pass_default_eagle():
     - After inserting next_tokens [100, 200, 300]:
         [a2, a3, 100, b2, 200, c2, c3, c4, 300]
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     num_speculative_tokens = 3
     proposer = _create_proposer("eagle", num_speculative_tokens)
@@ -463,7 +469,7 @@ def test_set_inputs_first_pass_draft_model():
       - idx 5: token 21, pos 1
       - idx 6: token 200, pos 2 (bonus token)
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     num_speculative_tokens = 2
     block_size = BLOCK_SIZE
@@ -601,7 +607,7 @@ def test_set_inputs_first_pass_parallel_drafting():
       - idx 9: bonus token 200
       - idx 10-11: parallel_drafting_tokens, is_masked=True
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     num_speculative_tokens = 3
     block_size = BLOCK_SIZE
@@ -732,9 +738,9 @@ def test_set_inputs_first_pass_parallel_drafting():
 @pytest.mark.parametrize("pp_size", [1, 2])
 @pytest.mark.parametrize("use_distinct_embed_tokens", [True, False])
 @pytest.mark.parametrize("use_distinct_lm_head", [True, False])
-@mock.patch("vllm.v1.spec_decode.eagle.get_pp_group")
-@mock.patch("vllm.v1.spec_decode.eagle.get_layers_from_vllm_config")
-@mock.patch("vllm.v1.spec_decode.eagle.get_model")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_pp_group")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_layers_from_vllm_config")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_model")
 def test_load_model(
     mock_get_model,
     mock_get_layers,
@@ -746,12 +752,6 @@ def test_load_model(
     use_distinct_lm_head,
     monkeypatch,
 ):
-    if attn_backend == "TRITON_ATTN" and not current_platform.is_rocm():
-        pytest.skip(
-            "TRITON_ATTN does not support "
-            "multi-token eagle spec decode on current platform"
-        )
-
     if attn_backend == "ROCM_AITER_FA" and current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
 
@@ -841,17 +841,11 @@ def test_propose(method, attn_backend, num_speculative_tokens, monkeypatch):
             "multi-token eagle spec decode on current platform"
         )
 
-    if attn_backend == "TREE_ATTN":
-        pytest.skip(
-            "TREE_ATTN is tested separately in test_propose_tree"
-            "because it requires special input mocking."
-        )
-
     if attn_backend == "ROCM_AITER_FA" and current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
 
     # Use GPU device
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     # Setup test parameters
     batch_size = 2
@@ -952,10 +946,6 @@ def create_deterministic_logits(token_ids):
         attn_metadata_builder_cls, _ = try_get_attention_backend(
             AttentionBackendEnum.TRITON_ATTN
         )
-    elif attn_backend == "TREE_ATTN":
-        attn_metadata_builder_cls, _ = try_get_attention_backend(
-            AttentionBackendEnum.TREE_ATTN
-        )
     elif attn_backend == "ROCM_AITER_FA":
         attn_metadata_builder_cls, _ = try_get_attention_backend(
             AttentionBackendEnum.ROCM_AITER_FA
@@ -1011,91 +1001,57 @@ def create_deterministic_logits(token_ids):
     assert torch.equal(result, expected_tokens)
 
 
-@pytest.mark.parametrize(
-    "spec_token_tree",
-    [
-        [(0,)],  # A single token
-        [(0,), (0, 0), (0, 0, 0)],  # Chain
-        [(0,), (1,), (2,)],  # Parallel
-        [(0,), (1,), (2,), (0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)],  # Tree
-    ],
-)
-def test_propose_tree(spec_token_tree):
-    # Get GPU device.
-    device = torch.device(current_platform.device_type)
-
-    # Setup test parameters.
+def test_propose_stores_probabilistic_draft_probs(monkeypatch):
+    device = torch.device(DEVICE_TYPE)
     batch_size = 2
-    seq_len_1 = 5
-    seq_len_2 = 3
-    total_tokens = seq_len_1 + seq_len_2
-    vocab_size = 100
-    seq_lens = [seq_len_1, seq_len_2]
-    num_speculative_tokens = len(spec_token_tree)
+    seq_lens = [5, 3]
+    total_tokens = sum(seq_lens)
+    num_speculative_tokens = 3
+    vocab_size = 8
 
-    # Create proposer first so we can use its actual hidden_size.
     proposer = _create_proposer(
-        "eagle",
+        "draft_model",
         num_speculative_tokens,
-        speculative_token_tree=spec_token_tree,
+        rejection_sample_method="standard",
+        draft_sample_method="probabilistic",
     )
-    # Get the hidden_size from the proposer to ensure consistency.
     hidden_size = proposer.hidden_size
+    expanded_total_tokens = total_tokens + batch_size
 
-    # Helper to create deterministic logits that will produce specific tokens
-    def create_deterministic_logits(token_ids, k: int):
-        logits = torch.full((batch_size, vocab_size), -100.0, device=device)
-        for i, token_id in enumerate(token_ids):
-            # Assign decreasing values to the k, consecutive, tokens.
-            for j in range(k):
-                logits[i, token_id + j] = 100.0 - j
-        return logits
-
-    # Mock a model that returns deterministic logits.
-    base_token_ids = torch.tensor([42, 60], dtype=torch.int64, device=device)
-
-    # Skip loading the model and replace it with a mock that returns
-    # deterministic outputs.
     model_mock = mock.MagicMock()
+    forward_returns = []
+    logits_returns = []
+    for step in range(num_speculative_tokens):
+        token_count = expanded_total_tokens if step == 0 else batch_size
+        forward_returns.append(torch.zeros(token_count, hidden_size, device=device))
+        logits = torch.full((batch_size, vocab_size), -10.0, device=device)
+        logits[0, step + 1] = 5.0
+        logits[1, step + 3] = 4.0
+        logits_returns.append(logits)
 
-    # Mock the model forward calls.
-    forward_returns = [
-        (
-            torch.zeros(total_tokens, hidden_size, device=device),
-            torch.zeros(total_tokens, hidden_size, device=device),
-        )
-    ]
-    for cu_num_drafts in proposer.cu_drafts_per_level:
-        h_logits = torch.zeros(batch_size * cu_num_drafts, hidden_size, device=device)
-        h_states = torch.zeros(batch_size * cu_num_drafts, hidden_size, device=device)
-        forward_returns.append((h_logits, h_states))
     model_mock.side_effect = forward_returns
-
-    # Mock the compute_logits calls.
-    cu_num_drafts_tensor = torch.tensor(
-        [0] + proposer.cu_drafts_per_level, dtype=torch.int32, device=device
-    )
-    logits_returns = []
-    for level, num_children in enumerate(proposer.child_drafts_per_level):
-        token_ids = base_token_ids + cu_num_drafts_tensor[level]
-        level_num_drafts = cu_num_drafts_tensor[level + 1] - cu_num_drafts_tensor[level]
-        level_logits = []
-        for i in range(level_num_drafts // num_children):
-            level_logits.append(
-                create_deterministic_logits(token_ids + i * num_children, num_children)
-            )
-        logits_returns.append(torch.stack(level_logits, dim=1))
     model_mock.compute_logits.side_effect = logits_returns
-
-    # Assign the mock to the proposer
     proposer.model = model_mock
-
-    # Assign draft attn_layer_names since load_model is not invoked
     proposer._draft_attn_layer_names = {"layer.0"}
 
-    # Get the tree attention metadata builder.
+    def fake_compute_probs(logits, sampling_metadata):
+        probs = torch.softmax(logits, dim=-1)
+        return probs.argmax(dim=-1), probs
+
+    monkeypatch.setattr(
+        "vllm.v1.spec_decode.llm_base_proposer.compute_probs_and_sample_next_token",
+        fake_compute_probs,
+    )
+
+    batch_spec = BatchSpec(seq_lens=seq_lens, query_lens=seq_lens)
+    common_attn_metadata = create_common_attn_metadata(
+        batch_spec,
+        block_size=BLOCK_SIZE,
+        device=device,
+    )
+
     attn_metadata_builder_cls, _ = try_get_attention_backend(
-        AttentionBackendEnum.TREE_ATTN
+        AttentionBackendEnum.FLASH_ATTN
     )
     attn_metadata_builder = attn_metadata_builder_cls(
         kv_cache_spec=create_standard_kv_cache_spec(proposer.vllm_config),
@@ -1103,8 +1059,6 @@ def create_deterministic_logits(token_ids, k: int):
         vllm_config=proposer.vllm_config,
         device=device,
     )
-
-    # Mock runner and draft_attn_groups for attention metadata building.
     proposer.runner = mock.MagicMock()
     mock_attn_group = mock.MagicMock()
     mock_attn_group.get_metadata_builder.return_value = attn_metadata_builder
@@ -1112,43 +1066,166 @@ def create_deterministic_logits(token_ids, k: int):
     mock_attn_group.kv_cache_spec = attn_metadata_builder.kv_cache_spec
     proposer.draft_attn_groups = [mock_attn_group]
 
-    # Setup inputs for the proposer.
-    target_token_ids = torch.randint(0, vocab_size, (total_tokens,), device=device)
-    target_positions = torch.cat(
-        [torch.arange(seq_len_1, device=device), torch.arange(seq_len_2, device=device)]
-    )
-    target_hidden_states = torch.randn(total_tokens, hidden_size, device=device)
-    next_token_ids = torch.randint(
-        0, vocab_size, (batch_size,), dtype=torch.int32, device=device
+    sampling_metadata = mock.MagicMock()
+    sampling_metadata.all_greedy = False
+
+    result = proposer.propose(
+        target_token_ids=torch.randint(0, vocab_size, (total_tokens,), device=device),
+        target_positions=torch.cat(
+            [
+                torch.arange(seq_lens[0], device=device),
+                torch.arange(seq_lens[1], device=device),
+            ]
+        ),
+        target_hidden_states=torch.randn(total_tokens, hidden_size, device=device),
+        next_token_ids=torch.randint(
+            0, vocab_size, (batch_size,), dtype=torch.int32, device=device
+        ),
+        token_indices_to_sample=None,
+        common_attn_metadata=common_attn_metadata,
+        sampling_metadata=sampling_metadata,
     )
+
+    assert result.shape == (batch_size, num_speculative_tokens)
+
+    draft_probs = proposer.take_last_draft_probs()
+    assert draft_probs is not None
+    assert draft_probs.shape == (batch_size, num_speculative_tokens, vocab_size)
+    for step, expected_logits in enumerate(logits_returns):
+        assert torch.allclose(
+            draft_probs[:, step, :],
+            torch.softmax(expected_logits, dim=-1),
+        )
+
+
+def test_set_inputs_first_pass_dflash():
+    """
+    Test for DFlash set_inputs_first_pass.
+
+    DFlash uses cross-attention: context tokens become K/V and only
+    query tokens (bonus + mask) are Q. This tests the DFlash-specific
+    input preparation where:
+    - Context hidden states are stored by reference (no copy)
+    - Query input_ids are [next_token, mask, mask, ...] per request
+    - Context and query positions are written to separate buffers
+    - token_indices_to_sample points to mask token positions only
+    - A new CommonAttentionMetadata is returned with causal=False
+
+    Setup:
+    - 3 requests with query_lens [3, 2, 4]
+    - num_speculative_tokens = 3
+    - num_query_per_req = 4 (1 bonus + 3 mask tokens)
+    - next_token_ids: [100, 200, 300]
+
+    Expected output layout (query tokens only, 12 total):
+    Request 0 (indices 0-3): [100, mask, mask, mask]
+    Request 1 (indices 4-7): [200, mask, mask, mask]
+    Request 2 (indices 8-11): [300, mask, mask, mask]
+
+    Expected positions layout (separate buffers):
+    Context (_context_positions_buffer, 9 tokens): copied from target_positions
+    Query (positions, 12 tokens):
+      Request 0: last_pos=9, query=[10, 11, 12, 13]
+      Request 1: last_pos=7, query=[8, 9, 10, 11]
+      Request 2: last_pos=11, query=[12, 13, 14, 15]
+    """
+    device = torch.device(current_platform.device_type)
+
+    num_speculative_tokens = 3
+    proposer = _create_proposer("dflash", num_speculative_tokens)
+    mask_token_id = proposer.parallel_drafting_token_id
+
+    # Setup batch with 3 requests
     batch_spec = BatchSpec(
-        seq_lens=seq_lens,
-        query_lens=seq_lens,
+        seq_lens=[10, 8, 12],
+        query_lens=[3, 2, 4],
     )
+
     common_attn_metadata = create_common_attn_metadata(
         batch_spec,
         block_size=BLOCK_SIZE,
         device=device,
+        arange_block_indices=True,
     )
-    sampling_metadata = mock.MagicMock()
 
-    # Propose draft tokens.
-    result = proposer.propose(
+    # Input tensors
+    # Request 0: tokens [10, 11, 12] at positions [7, 8, 9]
+    # Request 1: tokens [20, 21] at positions [6, 7]
+    # Request 2: tokens [30, 31, 32, 33] at positions [8, 9, 10, 11]
+    target_token_ids = torch.tensor(
+        [10, 11, 12, 20, 21, 30, 31, 32, 33], dtype=torch.int32, device=device
+    )
+    target_positions = torch.tensor(
+        [7, 8, 9, 6, 7, 8, 9, 10, 11], dtype=torch.int64, device=device
+    )
+    target_hidden_states = torch.randn(
+        9, proposer.hidden_size, dtype=proposer.dtype, device=device
+    )
+    next_token_ids = torch.tensor([100, 200, 300], dtype=torch.int32, device=device)
+
+    num_tokens, token_indices_to_sample, output_cad = proposer.set_inputs_first_pass(
         target_token_ids=target_token_ids,
+        next_token_ids=next_token_ids,
         target_positions=target_positions,
         target_hidden_states=target_hidden_states,
-        next_token_ids=next_token_ids,
         token_indices_to_sample=None,
-        common_attn_metadata=common_attn_metadata,
-        sampling_metadata=sampling_metadata,
+        cad=common_attn_metadata,
+        num_rejected_tokens_gpu=None,
     )
-    assert result.shape == (batch_size, num_speculative_tokens)
 
-    # The tokens are expected to be consecutive integers starting
-    # from the base token IDs.
-    expected_tokens = base_token_ids[:, None] + torch.arange(
-        num_speculative_tokens, dtype=torch.int64, device=device
+    num_query_per_req = 1 + num_speculative_tokens  # 4
+    num_context = 9
+
+    # num_tokens is the query-only count
+    assert num_tokens == 3 * num_query_per_req  # 12
+
+    # Verify input_ids (query tokens only)
+    # Each request: [next_token, mask, mask, mask]
+    M = mask_token_id
+    expected_input_ids = torch.tensor(
+        [100, M, M, M, 200, M, M, M, 300, M, M, M],
+        dtype=torch.int32,
+        device=device,
+    )
+    assert torch.equal(proposer.input_ids[:num_tokens], expected_input_ids)
+
+    # Verify context positions (separate buffer): copied from target_positions
+    assert torch.equal(
+        proposer._context_positions_buffer[:num_context], target_positions
     )
 
-    # Verify that the draft tokens match our expectations.
-    assert torch.equal(result, expected_tokens)
+    # Verify query positions (separate buffer, starts at index 0):
+    # req0: last_pos=9,  query=[10, 11, 12, 13]
+    # req1: last_pos=7,  query=[8, 9, 10, 11]
+    # req2: last_pos=11, query=[12, 13, 14, 15]
+    expected_query_positions = torch.tensor(
+        [10, 11, 12, 13, 8, 9, 10, 11, 12, 13, 14, 15],
+        dtype=torch.int64,
+        device=device,
+    )
+    assert torch.equal(
+        proposer.positions[:num_tokens],
+        expected_query_positions,
+    )
+
+    # Verify token_indices_to_sample (mask tokens only, skip bonus at offset 0)
+    # req0: query indices 0-3, mask at 1,2,3
+    # req1: query indices 4-7, mask at 5,6,7
+    # req2: query indices 8-11, mask at 9,10,11
+    expected_token_indices_to_sample = torch.tensor(
+        [1, 2, 3, 5, 6, 7, 9, 10, 11], dtype=torch.int32, device=device
+    )
+    assert torch.equal(token_indices_to_sample, expected_token_indices_to_sample)
+
+    # Verify the new CAD has DFlash-specific properties
+    assert output_cad.causal is False  # DFlash requires non-causal attention
+    assert output_cad.num_actual_tokens == num_tokens  # query-only count
+    assert output_cad.max_query_len == num_query_per_req
+
+    expected_query_start_loc = torch.tensor(
+        [0, 4, 8, 12], dtype=torch.int32, device=device
+    )
+    assert torch.equal(output_cad.query_start_loc, expected_query_start_loc)
+
+    # Verify hidden states (stored by reference, not copied)
+    assert proposer._dflash_hidden_states is target_hidden_states
diff --git a/tests/v1/spec_decode/test_eagle_step_kernel.py b/tests/v1/spec_decode/test_eagle_step_kernel.py
index 319ab4a33ad1..83a29bc0469c 100644
--- a/tests/v1/spec_decode/test_eagle_step_kernel.py
+++ b/tests/v1/spec_decode/test_eagle_step_kernel.py
@@ -5,15 +5,18 @@
 import pytest
 import torch
 
+from vllm.platforms import current_platform
 from vllm.v1.spec_decode.utils import (
     PADDING_SLOT_ID,
     eagle_step_update_slot_mapping_and_metadata,
 )
 
+DEVICE_TYPE = current_platform.device_type
+
 # Skip if no CUDA - Triton kernel requires GPU
 pytest.importorskip("triton")
-if not torch.cuda.is_available():
-    pytest.skip("CUDA required for EAGLE kernel tests", allow_module_level=True)
+if not current_platform.is_cuda_alike() and not current_platform.is_xpu():
+    pytest.skip("CUDA/XPU required for EAGLE kernel tests", allow_module_level=True)
 
 
 def _reference_eagle_step_slot_mapping(
@@ -47,7 +50,7 @@ def _reference_eagle_step_slot_mapping(
 
 def test_eagle_step_slot_mapping_kernel():
     """Test fused kernel matches Python reference for slot mapping and metadata."""
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     batch_size = 32
     block_size = 16
     max_model_len = 4096
@@ -93,7 +96,7 @@ def test_eagle_step_slot_mapping_kernel():
 
 def test_eagle_step_slot_mapping_kernel_exceeds_max():
     """Test fused kernel when position exceeds max_model_len."""
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     batch_size = 4
     block_size = 16
     max_model_len = 100
@@ -130,7 +133,7 @@ def test_eagle_step_slot_mapping_kernel_exceeds_max():
 def test_eagle_step_slot_mapping_kernel_cudagraph_padding():
     """Test that padding threads write PADDING_SLOT_ID when
     input_batch_size > batch_size (cudagraph padding)."""
-    device = torch.device("cuda")
+    device = torch.device(DEVICE_TYPE)
     batch_size = 4
     input_batch_size = 8
     block_size = 16
diff --git a/tests/v1/spec_decode/test_extract_hidden_states.py b/tests/v1/spec_decode/test_extract_hidden_states.py
index 27b2a53c1849..b568d0b204fe 100644
--- a/tests/v1/spec_decode/test_extract_hidden_states.py
+++ b/tests/v1/spec_decode/test_extract_hidden_states.py
@@ -1,10 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import json
 from unittest import mock
 
+import numpy as np
 import pytest
 import torch
+from transformers import CLIPVisionConfig, LlamaConfig, LlavaConfig, PretrainedConfig
 
 from tests.v1.attention.utils import (
     BatchSpec,
@@ -22,10 +25,15 @@
 )
 from vllm.config.load import LoadConfig
 from vllm.platforms import current_platform
+from vllm.transformers_utils.config import get_hf_text_config
+from vllm.transformers_utils.configs.extract_hidden_states import (
+    ExtractHiddenStatesConfig,
+)
 from vllm.v1.spec_decode.extract_hidden_states import ExtractHiddenStatesProposer
 from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
 
 model_dir = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+DEVICE_TYPE = current_platform.device_type
 
 
 def _create_proposer(
@@ -50,7 +58,7 @@ def _create_proposer(
         },
     )
 
-    device = current_platform.device_type
+    device = DEVICE_TYPE
     vllm_config = VllmConfig(
         model_config=model_config,
         cache_config=CacheConfig(),
@@ -61,6 +69,7 @@ def _create_proposer(
         scheduler_config=SchedulerConfig(
             max_model_len=model_config.max_model_len,
             is_encoder_decoder=model_config.is_encoder_decoder,
+            enable_chunked_prefill=False,
         ),
         attention_config=AttentionConfig(),
     )
@@ -100,7 +109,7 @@ def test_proposer_initialization_missing_layer_ids():
         },
     )
 
-    device = current_platform.device_type
+    device = DEVICE_TYPE
     vllm_config = VllmConfig(
         model_config=model_config,
         cache_config=CacheConfig(),
@@ -111,6 +120,7 @@ def test_proposer_initialization_missing_layer_ids():
         scheduler_config=SchedulerConfig(
             max_model_len=model_config.max_model_len,
             is_encoder_decoder=model_config.is_encoder_decoder,
+            enable_chunked_prefill=False,
         ),
         attention_config=AttentionConfig(),
     )
@@ -129,19 +139,15 @@ def test_prepare_next_token_ids_padded():
     For each request we either use the sampled token (if valid and not discarded)
     or a backup token from the request state.
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     num_requests = 4
-    batch_spec = BatchSpec(
-        seq_lens=[5] * num_requests,
-        query_lens=[5] * num_requests,
-    )
-
     req_ids = [f"req_{i + 1}" for i in range(num_requests)]
     mock_input_batch = mock.MagicMock(spec=InputBatch)
     mock_input_batch.req_ids = req_ids
     mock_input_batch.num_reqs = num_requests
     mock_input_batch.vocab_size = 100
+    mock_input_batch.num_tokens_no_spec = np.array([5] * num_requests)
 
     mock_requests = {}
     for req_id in req_ids:
@@ -174,12 +180,6 @@ def test_prepare_next_token_ids_padded():
 
     proposer = _create_proposer(num_speculative_tokens=1)
 
-    common_attn_metadata = create_common_attn_metadata(
-        batch_spec,
-        block_size=16,
-        device=device,
-    )
-
     # valid_sampled_tokens_count tracks if token is valid (not -1 and in vocab range)
     # It doesn't depend on whether the request is discarded
     expected_valid_sampled_tokens_count = torch.tensor(
@@ -187,7 +187,6 @@ def test_prepare_next_token_ids_padded():
     )
 
     next_token_ids, valid_sampled_tokens_count = proposer.prepare_next_token_ids_padded(
-        common_attn_metadata.seq_lens_cpu,
         sampled_token_ids,
         mock_requests,
         mock_input_batch,
@@ -207,7 +206,7 @@ def test_propose():
     2. Return the sampled tokens as "draft" tokens (shape [batch_size, 1])
     3. Cache the hidden states in the model's KV cache
     """
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     # Setup test parameters
     batch_size = 2
@@ -283,7 +282,7 @@ def test_propose():
 @pytest.mark.parametrize("num_hidden_layers", [1, 4, 8])
 def test_propose_different_layer_counts(num_hidden_layers):
     """Test that propose works correctly with different numbers of hidden layers."""
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
 
     batch_size = 2
     num_tokens = 5
@@ -332,3 +331,160 @@ def test_propose_different_layer_counts(num_hidden_layers):
 
     assert draft_tokens.shape == (batch_size, 1)
     assert torch.equal(draft_tokens, sampled_token_ids)
+
+
+# ---------------------------------------------------------------------------
+# VLM / composite config tests for ExtractHiddenStatesConfig
+# ---------------------------------------------------------------------------
+
+
+class _DummyVLMConfig(PretrainedConfig):
+    """Minimal composite config that mimics VLMs like Kimi-K2.5 or LLaVA.
+
+    The text model's parameters (hidden_size, num_attention_heads, …) live
+    exclusively under ``text_config``; the top-level config has none of them.
+    """
+
+    model_type = "test_vlm"
+
+    def __init__(self, text_config: PretrainedConfig, **kwargs):
+        self.text_config = text_config
+        super().__init__(architectures=["LlamaForCausalLM"], **kwargs)
+
+    def get_text_config(self, decoder: bool = False) -> PretrainedConfig:
+        del decoder
+        return self.text_config
+
+
+def test_extract_hidden_states_text_only_config_regression():
+    """Text-only models (no nested text_config) must keep working."""
+    model_config = ModelConfig(model=model_dir, runner="generate", max_model_len=100)
+
+    speculative_config = SpeculativeConfig(
+        target_model_config=model_config,
+        target_parallel_config=ParallelConfig(),
+        method="extract_hidden_states",
+        num_speculative_tokens=1,
+        draft_model_config={
+            "hf_config": {
+                "eagle_aux_hidden_state_layer_ids": [1, 2, 3, 4],
+            }
+        },
+    )
+
+    assert speculative_config.draft_model_config is not None
+    # For text-only models, hf_text_config should be the config itself.
+    assert speculative_config.draft_model_config.hf_text_config is (
+        speculative_config.draft_model_config.hf_config
+    )
+    assert (
+        speculative_config.draft_model_config.hf_text_config.num_attention_heads
+        == model_config.hf_text_config.num_attention_heads
+    )
+
+
+def test_extract_hidden_states_config_preserves_vlm_text_config():
+    """A real VLM config (LLaVA) with nested text_config must be preserved."""
+    text_config = LlamaConfig(
+        vocab_size=32000,
+        hidden_size=128,
+        intermediate_size=256,
+        num_hidden_layers=2,
+        num_attention_heads=8,
+    )
+    vlm_config = LlavaConfig(
+        vision_config=CLIPVisionConfig(),
+        text_config=text_config,
+    )
+
+    # Precondition: to_dict() flattens the nested config to a plain dict.
+    assert isinstance(vlm_config.to_dict()["text_config"], dict)
+
+    extract_config = ExtractHiddenStatesConfig(
+        vlm_config,
+        eagle_aux_hidden_state_layer_ids=[1, 2],
+    )
+
+    # The fix: text_config is still a PretrainedConfig, not a dict.
+    assert isinstance(extract_config.text_config, LlamaConfig)
+
+    extracted = get_hf_text_config(extract_config)
+    assert extracted is extract_config.text_config
+    assert extracted.num_attention_heads == text_config.num_attention_heads
+    assert extracted.hidden_size == text_config.hidden_size
+
+    # Serialization must still round-trip correctly.
+    serialized = extract_config.to_dict()
+    assert isinstance(serialized["text_config"], dict)
+    assert serialized["text_config"]["num_attention_heads"] == (
+        text_config.num_attention_heads
+    )
+
+    json_str = json.loads(extract_config.to_json_string())
+    assert json_str["text_config"]["num_attention_heads"] == (
+        text_config.num_attention_heads
+    )
+
+
+def test_extract_hidden_states_speculative_config_vlm():
+    """SpeculativeConfig with a VLM target must build without errors."""
+    nested_text_config = LlamaConfig(
+        vocab_size=32000,
+        hidden_size=128,
+        intermediate_size=256,
+        num_hidden_layers=2,
+        num_attention_heads=8,
+    )
+
+    target_model_config = ModelConfig(
+        model=model_dir,
+        runner="generate",
+        max_model_len=100,
+    )
+    # Replace the real text-only config with our composite VLM config.
+    target_model_config.hf_config = _DummyVLMConfig(
+        text_config=nested_text_config,
+    )
+    target_model_config.hf_text_config = nested_text_config
+
+    speculative_config = SpeculativeConfig(
+        target_model_config=target_model_config,
+        target_parallel_config=ParallelConfig(),
+        method="extract_hidden_states",
+        num_speculative_tokens=1,
+        draft_model_config={
+            "hf_config": {
+                "eagle_aux_hidden_state_layer_ids": [1, 2],
+            }
+        },
+    )
+
+    assert speculative_config.draft_model_config is not None
+    assert isinstance(
+        speculative_config.draft_model_config.hf_config.text_config,
+        LlamaConfig,
+    )
+    assert speculative_config.draft_model_config.hf_text_config is (
+        speculative_config.draft_model_config.hf_config.text_config
+    )
+    assert (
+        speculative_config.draft_model_config.hf_text_config.num_attention_heads
+        == nested_text_config.num_attention_heads
+    )
+
+
+def test_extract_hidden_states_config_invalid_text_config():
+    """A nested text_config missing required attrs must still be rejected."""
+    broken_text_config = PretrainedConfig(hidden_size=128)
+    vlm_config = _DummyVLMConfig(text_config=broken_text_config)
+
+    extract_config = ExtractHiddenStatesConfig(
+        vlm_config,
+        eagle_aux_hidden_state_layer_ids=[1],
+    )
+
+    # The object is preserved (not flattened), …
+    assert extract_config.text_config is broken_text_config
+    # … but validation still rejects the missing attribute.
+    with pytest.raises(ValueError, match="num_attention_heads"):
+        get_hf_text_config(extract_config)
diff --git a/tests/v1/spec_decode/test_max_len.py b/tests/v1/spec_decode/test_max_len.py
index 42991f9f1ae0..77c041d84a94 100644
--- a/tests/v1/spec_decode/test_max_len.py
+++ b/tests/v1/spec_decode/test_max_len.py
@@ -6,6 +6,7 @@
 
 from tests.utils import get_attn_backend_list_based_on_platform
 from vllm import LLM, SamplingParams
+from vllm.config import ModelConfig, ParallelConfig, SpeculativeConfig
 from vllm.platforms import current_platform
 from vllm.sampling_params import StructuredOutputsParams
 
@@ -38,12 +39,6 @@ def test_ngram_max_len(num_speculative_tokens: int):
 def test_eagle_max_len(
     monkeypatch: pytest.MonkeyPatch, num_speculative_tokens: int, attn_backend: str
 ):
-    if attn_backend == "TRITON_ATTN" and not current_platform.is_rocm():
-        pytest.skip(
-            "TRITON_ATTN does not support "
-            "multi-token eagle spec decode on current platform"
-        )
-
     if attn_backend == "ROCM_AITER_FA" and current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
 
@@ -83,3 +78,23 @@ def test_eagle_max_len(
             "is longer than the eagle max length"
         )
         assert o.outputs[0].text == "a b c d e " * 15
+
+
+@pytest.mark.parametrize("spec_max_model_len", [80, 150])
+def test_mtp_speculative_config_max_model_len(spec_max_model_len: int):
+    """Regression test for #41456: max_model_len in speculative config
+    should be respected for the draft model."""
+    model_config = ModelConfig(
+        model="XiaomiMiMo/MiMo-7B-Base",
+        runner="generate",
+        max_model_len=200,
+        trust_remote_code=True,
+    )
+    spec_config = SpeculativeConfig(
+        target_model_config=model_config,
+        target_parallel_config=ParallelConfig(),
+        method="mtp",
+        num_speculative_tokens=1,
+        max_model_len=spec_max_model_len,
+    )
+    assert spec_config.draft_model_config.max_model_len == spec_max_model_len
diff --git a/tests/v1/spec_decode/test_mtp.py b/tests/v1/spec_decode/test_mtp.py
index 0a48b0e7b98c..7c478f81d862 100644
--- a/tests/v1/spec_decode/test_mtp.py
+++ b/tests/v1/spec_decode/test_mtp.py
@@ -28,6 +28,7 @@
 from vllm.v1.spec_decode.eagle import EagleProposer
 
 mimo_7b_dir = "XiaomiMiMo/MiMo-7B-Base"
+DEVICE_TYPE = current_platform.device_type
 
 
 def _create_mtp_proposer(num_speculative_tokens: int) -> EagleProposer:
@@ -48,7 +49,7 @@ def _create_mtp_proposer(num_speculative_tokens: int) -> EagleProposer:
         model_config=model_config,
         cache_config=CacheConfig(),
         speculative_config=speculative_config,
-        device_config=DeviceConfig(device=current_platform.device_type),
+        device_config=DeviceConfig(device=DEVICE_TYPE),
         parallel_config=ParallelConfig(),
         load_config=LoadConfig(),
         scheduler_config=SchedulerConfig(
@@ -57,12 +58,12 @@ def _create_mtp_proposer(num_speculative_tokens: int) -> EagleProposer:
         ),
     )
 
-    return EagleProposer(vllm_config=vllm_config, device=current_platform.device_type)
+    return EagleProposer(vllm_config=vllm_config, device=DEVICE_TYPE)
 
 
-@mock.patch("vllm.v1.spec_decode.eagle.get_pp_group")
-@mock.patch("vllm.v1.spec_decode.eagle.get_layers_from_vllm_config")
-@mock.patch("vllm.v1.spec_decode.eagle.get_model")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_pp_group")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_layers_from_vllm_config")
+@mock.patch("vllm.v1.spec_decode.llm_base_proposer.get_model")
 def test_mtp_load_model_unified(mock_get_model, mock_get_layers, mock_get_pp_group):
     """Test MTP-specific model loading with unified model approach."""
 
@@ -118,7 +119,7 @@ class _TargetModelStub(LlamaForCausalLM):
 def test_mtp_propose(num_speculative_tokens, monkeypatch):
     """Test that MTP's forward method returns hidden states directly"""
 
-    device = torch.device(current_platform.device_type)
+    device = torch.device(DEVICE_TYPE)
     batch_size = 2
     seq_lens = [5, 3]
     total_tokens = sum(seq_lens)
diff --git a/tests/v1/spec_decode/test_rejection_sampler_utils.py b/tests/v1/spec_decode/test_rejection_sampler_utils.py
new file mode 100644
index 000000000000..28f6044de877
--- /dev/null
+++ b/tests/v1/spec_decode/test_rejection_sampler_utils.py
@@ -0,0 +1,282 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+
+import pytest
+import torch
+
+from vllm.v1.worker.gpu.spec_decode.rejection_sampler_utils import (
+    rejection_sample,
+)
+
+VOCAB_SIZE = 4096
+
+# Skip if no CUDA - Triton kernel requires GPU
+pytest.importorskip("triton")
+if not torch.cuda.is_available():
+    pytest.skip("CUDA required for rejection sampler tests", allow_module_level=True)
+
+
+def _build_rejection_sample_inputs(
+    target_logits_1d: torch.Tensor,
+    draft_logits_1d: torch.Tensor,
+    num_speculative_steps: int,
+    temperature: float,
+    num_trials: int,
+) -> dict:
+    device = target_logits_1d.device
+    vocab_size = target_logits_1d.shape[0]
+    K = num_speculative_steps
+    num_logits = num_trials * (K + 1)
+
+    target_logits = target_logits_1d.unsqueeze(0).expand(num_logits, -1).contiguous()
+    draft_logits = (
+        draft_logits_1d.view(1, 1, vocab_size).expand(num_trials, K, -1).contiguous()
+    )
+
+    draft_probs = torch.softmax(draft_logits_1d, dim=0)
+    draft_tokens = torch.multinomial(
+        draft_probs.expand(num_trials, -1), K, replacement=True
+    )
+    draft_sampled_2d = torch.zeros(num_trials, K + 1, dtype=torch.int64, device=device)
+    draft_sampled_2d[:, 1:] = draft_tokens
+    draft_sampled = draft_sampled_2d.reshape(-1)
+
+    cu_num_logits = torch.arange(num_trials + 1, dtype=torch.int32, device=device) * (
+        K + 1
+    )
+    pos = torch.arange(num_logits, dtype=torch.int32, device=device)
+    idx_mapping = torch.arange(num_trials, dtype=torch.int32, device=device)
+    expanded_idx_mapping = torch.arange(
+        num_trials, dtype=torch.int32, device=device
+    ).repeat_interleave(K + 1)
+    expanded_local_pos = torch.arange(K + 1, dtype=torch.int32, device=device).repeat(
+        num_trials
+    )
+    temp_tensor = torch.full(
+        (num_trials,), temperature, dtype=torch.float32, device=device
+    )
+    seed = torch.arange(num_trials, dtype=torch.int64, device=device)
+
+    return dict(
+        target_logits=target_logits,
+        draft_logits=draft_logits,
+        draft_sampled=draft_sampled,
+        cu_num_logits=cu_num_logits,
+        pos=pos,
+        idx_mapping=idx_mapping,
+        expanded_idx_mapping=expanded_idx_mapping,
+        expanded_local_pos=expanded_local_pos,
+        temperature=temp_tensor,
+        seed=seed,
+    )
+
+
+def _assert_distribution_match(
+    sampled_tokens: torch.Tensor,
+    target_probs: torch.Tensor,
+    device: str,
+    label: str = "",
+    min_expected: float = 5.0,
+):
+    """
+    Assert sampled tokens match the target distribution via a
+    chi-squared goodness-of-fit test. This is done by computing
+    observed vs expected token counts (target_probs * num_samples),
+    then checking that the chi-squared statistic is below a conservative
+    threshold. The threshold is set at df + 10*sqrt(2*df), which
+    corresponds to ~10 sigma under the chi-squared distribution's
+    normal approximation, effectively disallowing false positives.
+
+    NOTE: Tokens with expected count < min_expected are merged into
+    a single "other" bin to minimize chi-squared noise.
+    """
+    num_samples = sampled_tokens.shape[0]
+    vocab_size = target_probs.shape[0]
+
+    observed = torch.zeros(vocab_size, device=device, dtype=torch.float32)
+    observed.scatter_add_(0, sampled_tokens, torch.ones(num_samples, device=device))
+    expected = target_probs * num_samples
+
+    sufficient = expected >= min_expected
+    obs_main = observed[sufficient]
+    exp_main = expected[sufficient]
+
+    obs_other = observed[~sufficient].sum().unsqueeze(0)
+    exp_other = expected[~sufficient].sum().unsqueeze(0)
+
+    if exp_other.item() >= min_expected:
+        obs_all = torch.cat([obs_main, obs_other])
+        exp_all = torch.cat([exp_main, exp_other])
+    else:
+        obs_all = obs_main
+        exp_all = exp_main
+
+    chi2 = ((obs_all - exp_all) ** 2 / exp_all).sum().item()
+    df = obs_all.shape[0] - 1
+    if df < 1:
+        # All samples were merged into < 2 bins, which is too
+        # few to evaluate.
+        return
+
+    threshold = df + 10 * math.sqrt(2 * df)
+    prefix = f"[{label}] " if label else ""
+    assert chi2 < threshold, (
+        f"{prefix}Chi-squared test failed: chi2={chi2:.1f}, "
+        f"df={df}, threshold={threshold:.1f}. "
+        f"Output distribution does not match target distribution."
+    )
+
+
+@pytest.mark.parametrize(
+    "num_speculative_steps,temperature",
+    [
+        (1, 0.6),
+        (3, 0.6),
+        (1, 1.0),
+        (3, 1.0),
+    ],
+)
+def test_stochastic_rejection_sample(num_speculative_steps: int, temperature: float):
+    """
+    Verify that rejection sampling produces the target distribution.
+    This is done by simulating many independent trials of speculative
+    decoding (from a fixed target and draft distribution). We then
+    run rejection sample on all of the trials (requests), and verify
+    that the sampled tokens at every position follow the target
+    distribution p(x).
+    """
+
+    torch.manual_seed(42)
+    device = "cuda"
+    num_trials = 10 * VOCAB_SIZE
+
+    target_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+    draft_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+
+    if temperature > 0:
+        target_logits_1d /= temperature
+        draft_logits_1d /= temperature
+
+    inputs = _build_rejection_sample_inputs(
+        target_logits_1d,
+        draft_logits_1d,
+        num_speculative_steps,
+        temperature=temperature,
+        num_trials=num_trials,
+    )
+
+    sampled, num_sampled = rejection_sample(
+        **inputs, num_speculative_steps=num_speculative_steps
+    )
+
+    target_probs = torch.softmax(target_logits_1d, dim=0)
+    for pos in range(num_speculative_steps + 1):
+        accepted_mask = num_sampled >= pos + 1
+        _assert_distribution_match(
+            sampled[accepted_mask, pos], target_probs, device, label=f"position {pos}"
+        )
+
+
+@pytest.mark.parametrize("num_speculative_steps", [1, 3])
+def test_greedy_rejection_sample(num_speculative_steps: int):
+    """
+    Verify that greedy (temperature=0) always outputs the target argmax
+    at every accepted position.
+    """
+
+    torch.manual_seed(42)
+    device = "cuda"
+    num_trials = 10 * VOCAB_SIZE
+
+    target_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+    draft_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+
+    inputs = _build_rejection_sample_inputs(
+        target_logits_1d,
+        draft_logits_1d,
+        num_speculative_steps,
+        temperature=0.0,
+        num_trials=num_trials,
+    )
+
+    sampled, num_sampled = rejection_sample(
+        **inputs, num_speculative_steps=num_speculative_steps
+    )
+
+    target_argmax = target_logits_1d.argmax().item()
+
+    steps = torch.arange(num_speculative_steps + 1, device=device).unsqueeze(0)
+    accepted_mask = steps < num_sampled.unsqueeze(1)
+
+    assert (sampled[accepted_mask] == target_argmax).all(), (
+        "Greedy sampling produced tokens that are not the target argmax"
+    )
+
+
+@pytest.mark.parametrize(
+    "num_speculative_steps,temperature,unconditional_rates",
+    [
+        (3, 1.0, [0.9, 0.5, 0.2]),
+        (3, 0.0, [0.9, 0.5, 0.2]),
+        (3, 1.0, [1.0, 1.0, 1.0]),
+        (3, 0.0, [1.0, 1.0, 1.0]),
+        (3, 1.0, [0.0, 0.0, 0.0]),
+        (3, 0.0, [0.0, 0.0, 0.0]),
+        (1, 1.0, [0.7]),
+        (1, 0.0, [0.7]),
+    ],
+)
+def test_synthetic_rejection_sample(
+    num_speculative_steps: int,
+    temperature: float,
+    unconditional_rates: list[float],
+):
+    """
+    Verify that synthetic rejection sampling produces the expected
+    per-position acceptance rates. The unconditional rate at position i
+    is P(all draft steps 0..i accepted) = product(conditional_rates[0:i+1]).
+    This is approximately mean(num accepted >= i + 1) over many trials.
+    """
+    from vllm.v1.spec_decode.utils import unconditional_to_conditional_rates
+
+    torch.manual_seed(42)
+    device = "cuda"
+    num_trials = 10 * VOCAB_SIZE
+    deviation_tol = 1e-2
+
+    target_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+    draft_logits_1d = torch.randn(VOCAB_SIZE, device=device, dtype=torch.float32)
+
+    if temperature > 0:
+        target_logits_1d /= temperature
+        draft_logits_1d /= temperature
+
+    inputs = _build_rejection_sample_inputs(
+        target_logits_1d,
+        draft_logits_1d,
+        num_speculative_steps,
+        temperature=temperature,
+        num_trials=num_trials,
+    )
+
+    conditional_rates = unconditional_to_conditional_rates(unconditional_rates)
+    synthetic_conditional_rates = torch.tensor(
+        conditional_rates, dtype=torch.float32, device=device
+    )
+
+    _, num_sampled = rejection_sample(
+        **inputs,
+        num_speculative_steps=num_speculative_steps,
+        synthetic_conditional_rates=synthetic_conditional_rates,
+    )
+
+    # num_sampled includes the resampled/bonus token.
+    num_accepted = num_sampled - 1
+    for i, expected_rate in enumerate(unconditional_rates):
+        observed_rate = (num_accepted >= i + 1).float().mean().item()
+        assert abs(observed_rate - expected_rate) < deviation_tol, (
+            f"Step {i}: observed rate {observed_rate:.4f} deviates from "
+            f"expected rate {expected_rate:.4f} by more than {deviation_tol}."
+        )
diff --git a/tests/v1/spec_decode/test_speculators_correctness.py b/tests/v1/spec_decode/test_speculators_correctness.py
new file mode 100644
index 000000000000..e133d9eaf9e4
--- /dev/null
+++ b/tests/v1/spec_decode/test_speculators_correctness.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import dataclasses
+
+import pytest
+import torch
+
+from tests.evals.gsm8k.gsm8k_eval import evaluate_gsm8k_offline
+from tests.utils import large_gpu_mark
+from vllm import LLM
+from vllm.config import SpeculativeConfig
+from vllm.distributed import cleanup_dist_env_and_memory
+
+
+@dataclasses.dataclass
+class SpeculatorTestConfig:
+    model_path: str
+    method: str
+    display_name: str
+    expected_gsm8k_accuracy: float
+    accuracy_rtol: float
+    expected_acceptance_len: float
+    acceptance_len_rtol: float
+    expected_per_pos_acceptance_rates: tuple[float, ...]
+    per_pos_rtol: float
+    quantization: str | None = None
+    parallel_drafting: bool | None = None
+
+
+DFLASH_CONFIG = SpeculatorTestConfig(
+    model_path="nm-testing/dflash-qwen3-8b-speculators",
+    method="dflash",
+    display_name="DFlash",
+    expected_gsm8k_accuracy=0.885,
+    accuracy_rtol=0.03,
+    expected_acceptance_len=3.45,
+    acceptance_len_rtol=0.15,
+    expected_per_pos_acceptance_rates=(0.795, 0.611, 0.429, 0.282),
+    per_pos_rtol=0.15,
+    quantization="fp8",
+)
+
+PEAGLE_CONFIG = SpeculatorTestConfig(
+    model_path="nm-testing/qwen3-8b-peagle-speculators",
+    method="eagle3",
+    display_name="PEagle",
+    expected_gsm8k_accuracy=0.88,
+    accuracy_rtol=0.05,
+    expected_acceptance_len=2.27,
+    acceptance_len_rtol=0.20,
+    expected_per_pos_acceptance_rates=(0.66, 0.36, 0.18, 0.09),
+    per_pos_rtol=0.20,
+    parallel_drafting=True,
+)
+
+SPECULATOR_CONFIGS = [
+    pytest.param(DFLASH_CONFIG, id="dflash"),
+    pytest.param(PEAGLE_CONFIG, id="peagle"),
+]
+
+
+def compute_spec_decode_stats(metrics) -> dict:
+    """Extract all spec-decode metrics and compute derived stats."""
+    name2metric = {m.name: m for m in metrics}
+
+    n_drafts = name2metric["vllm:spec_decode_num_drafts"].value
+    n_draft_tokens = name2metric["vllm:spec_decode_num_draft_tokens"].value
+    n_accepted = name2metric["vllm:spec_decode_num_accepted_tokens"].value
+
+    per_pos_vec = name2metric["vllm:spec_decode_num_accepted_tokens_per_pos"].values
+
+    acceptance_len = 1 + (n_accepted / n_drafts) if n_drafts > 0 else 1.0
+    draft_tokens_per_step = (n_draft_tokens / n_drafts) if n_drafts > 0 else 0
+    overall_acceptance_rate = (n_accepted / n_draft_tokens) if n_draft_tokens > 0 else 0
+    per_pos_rates = [v / n_drafts for v in per_pos_vec] if n_drafts > 0 else []
+
+    return {
+        "num_drafts": n_drafts,
+        "num_draft_tokens": n_draft_tokens,
+        "num_accepted_tokens": n_accepted,
+        "acceptance_len": acceptance_len,
+        "draft_tokens_per_step": draft_tokens_per_step,
+        "overall_acceptance_rate": overall_acceptance_rate,
+        "per_pos_accepted": list(per_pos_vec),
+        "per_pos_acceptance_rates": per_pos_rates,
+    }
+
+
+def print_spec_decode_stats(stats: dict) -> None:
+    """Print all spec-decode metrics and derived values."""
+    print("\n===== Spec Decode Metrics =====")
+    print(f"  num_drafts:              {stats['num_drafts']}")
+    print(f"  num_draft_tokens:        {stats['num_draft_tokens']}")
+    print(f"  num_accepted_tokens:     {stats['num_accepted_tokens']}")
+    print(f"  draft_tokens_per_step:   {stats['draft_tokens_per_step']:.2f}")
+    print(f"  overall_acceptance_rate: {stats['overall_acceptance_rate']:.4f}")
+    print(f"  acceptance_len (1+acc/drafts): {stats['acceptance_len']:.4f}")
+    print("  per-position accepted tokens:", stats["per_pos_accepted"])
+    print("  per-position acceptance rates:")
+    for i, rate in enumerate(stats["per_pos_acceptance_rates"]):
+        print(f"    pos {i}: {rate:.4f}")
+    print("===============================\n")
+
+
+@pytest.mark.parametrize("config", SPECULATOR_CONFIGS)
+def test_speculators_model(vllm_runner, example_prompts, monkeypatch, config):
+    """
+    Test speculators model properly initializes speculative decoding.
+
+    Verifies:
+    1. Speculative config is automatically initialized from speculators config
+    2. Method is detected correctly
+    3. parallel_drafting is set correctly (if applicable)
+    4. The draft model path is correctly set
+    5. Speculative tokens count is valid
+    6. Text generation works with speculative decoding enabled
+    """
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+    runner_kwargs = dict(dtype=torch.bfloat16, enforce_eager=True)
+    if config.quantization:
+        runner_kwargs["quantization"] = config.quantization
+
+    with vllm_runner(config.model_path, **runner_kwargs) as vllm_model:
+        vllm_config = vllm_model.llm.llm_engine.vllm_config
+
+        assert isinstance(vllm_config.speculative_config, SpeculativeConfig), (
+            "Speculative config should be initialized for speculators model"
+        )
+
+        spec_config = vllm_config.speculative_config
+        assert spec_config.method == config.method, (
+            f"Expected method='{config.method}', got '{spec_config.method}'"
+        )
+        if config.parallel_drafting is not None:
+            assert spec_config.parallel_drafting is config.parallel_drafting, (
+                f"Expected parallel_drafting={config.parallel_drafting} "
+                f"for {config.display_name} model"
+            )
+        assert spec_config.num_speculative_tokens > 0, (
+            f"Expected positive speculative tokens, "
+            f"got {spec_config.num_speculative_tokens}"
+        )
+        assert spec_config.model == config.model_path, (
+            f"Draft model should be {config.model_path}, got {spec_config.model}"
+        )
+
+        vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens=20)
+        assert vllm_outputs, (
+            f"No outputs generated for speculators model {config.model_path}"
+        )
+
+
+@pytest.mark.slow_test
+@large_gpu_mark(min_gb=40)
+@pytest.mark.parametrize("config", SPECULATOR_CONFIGS)
+def test_speculators_correctness(monkeypatch, config):
+    """
+    E2E correctness test via the speculators auto-detect path.
+
+    Evaluates GSM8k accuracy to ensure the speculators-format model produces
+    correct outputs, and checks that acceptance length does not collapse under
+    batched inference (lm-eval style).
+    """
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+
+    spec_llm = LLM(
+        model=config.model_path,
+        trust_remote_code=True,
+        max_model_len=4096,
+        max_num_seqs=128,
+        gpu_memory_utilization=0.85,
+        enforce_eager=False,
+        disable_log_stats=False,
+    )
+
+    results = evaluate_gsm8k_offline(spec_llm)
+    accuracy = results["accuracy"]
+    accuracy_threshold = config.expected_gsm8k_accuracy * (1 - config.accuracy_rtol)
+    assert accuracy >= accuracy_threshold, (
+        f"Expected GSM8K accuracy >= {accuracy_threshold:.3f}, got {accuracy:.3f}"
+    )
+
+    current_metrics = spec_llm.get_metrics()
+    stats = compute_spec_decode_stats(current_metrics)
+    print_spec_decode_stats(stats)
+
+    acceptance_len = stats["acceptance_len"]
+    al_threshold = config.expected_acceptance_len * (1 - config.acceptance_len_rtol)
+    assert acceptance_len >= al_threshold, (
+        f"{config.display_name} speculators acceptance length too low: "
+        f"{acceptance_len:.2f} < {al_threshold:.2f}"
+    )
+
+    per_pos_rates = stats["per_pos_acceptance_rates"]
+    for i, expected_rate in enumerate(config.expected_per_pos_acceptance_rates):
+        assert i < len(per_pos_rates), (
+            f"Missing per-position acceptance rate for position {i}"
+        )
+        threshold = expected_rate * (1 - config.per_pos_rtol)
+        assert per_pos_rates[i] >= threshold, (
+            f"Per-position acceptance rate at pos {i} too low: "
+            f"{per_pos_rates[i]:.4f} < {threshold:.4f} "
+            f"(expected ~{expected_rate:.4f})"
+        )
+
+    del spec_llm
+    torch.accelerator.empty_cache()
+    cleanup_dist_env_and_memory()
diff --git a/tests/v1/spec_decode/test_synthetic_rejection_sampler_utils.py b/tests/v1/spec_decode/test_synthetic_rejection_sampler_utils.py
index d817bc1b8fee..a5a23cf1b7e8 100644
--- a/tests/v1/spec_decode/test_synthetic_rejection_sampler_utils.py
+++ b/tests/v1/spec_decode/test_synthetic_rejection_sampler_utils.py
@@ -2,33 +2,48 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import pytest
 
-from vllm.v1.worker.gpu.spec_decode.synthetic_rejection_sampler_utils import (
-    compute_synthetic_rejection_sampler_params,
+from vllm.config.speculative import SpeculativeConfig
+from vllm.v1.spec_decode.utils import unconditional_to_conditional_rates
+
+
+def test_unconditional_to_conditional_rates_basic():
+    # c_0 = p_0; c_i = p_i / p_{i-1}
+    assert unconditional_to_conditional_rates([0.9, 0.5, 0.2]) == pytest.approx(
+        [0.9, 0.5 / 0.9, 0.2 / 0.5]
+    )
+
+
+def test_unconditional_to_conditional_rates_handles_zero():
+    # After a zero, subsequent conditional rates are clamped to 0 (the chain
+    # has already terminated in the kernel, so these values are unused).
+    assert unconditional_to_conditional_rates([1.0, 0.6, 0.0, 0.0]) == pytest.approx(
+        [1.0, 0.6, 0.0, 0.0]
+    )
+
+
+def test_unconditional_to_conditional_rates_all_ones():
+    assert unconditional_to_conditional_rates([1.0, 1.0, 1.0]) == pytest.approx(
+        [1.0, 1.0, 1.0]
+    )
+
+
+@pytest.mark.parametrize(
+    "length,n,expected",
+    [
+        (2.6, 3, [1.0, 0.6, 0.0]),
+        (1.0, 3, [0.0, 0.0, 0.0]),
+        (4.0, 3, [1.0, 1.0, 1.0]),
+        (2.0, 3, [1.0, 0.0, 0.0]),
+        (3.5, 4, [1.0, 1.0, 0.5, 0.0]),
+    ],
 )
+def test_acceptance_length_to_rates(length, n, expected):
+    assert SpeculativeConfig._acceptance_length_to_rates(length, n) == pytest.approx(
+        expected
+    )
+
 
-NUM_SPECULATIVE_STEPS = [1, 2, 3, 4, 5, 7, 10]
-ACCEPTANCE_RATES = [i / 100 for i in range(0, 100)]
-
-
-@pytest.mark.parametrize("num_speculative_steps", NUM_SPECULATIVE_STEPS)
-def test_compute_synthetic_rejection_sampler_params(num_speculative_steps: int):
-    """Test that the base acceptance rate and decay factor generated for
-    synthetic rejection sampling have a mean joint acceptance probability
-    that matches the desired acceptance rate."""
-    tol = 1e-9
-    for desired_acceptance_rate in ACCEPTANCE_RATES:
-        base_rate, decay_factor = compute_synthetic_rejection_sampler_params(
-            desired_acceptance_rate, num_speculative_steps, tol=tol
-        )
-
-        # Compute the mean of joint acceptance probabilities across
-        # all speculative positions.
-        joint_prob = 1.0
-        mean_joint = 0.0
-        for i in range(num_speculative_steps):
-            joint_prob *= base_rate * decay_factor**i
-            mean_joint += joint_prob
-        mean_joint /= num_speculative_steps
-
-        assert abs(desired_acceptance_rate - mean_joint) < 10 * tol
-        assert base_rate <= 1.0
+def test_resolve_length_produces_minvariance_schedule():
+    assert SpeculativeConfig._resolve_synthetic_acceptance_rates(
+        3, None, 2.6
+    ) == pytest.approx([1.0, 0.6, 0.0])
diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py
deleted file mode 100644
index 52bc722cfcbd..000000000000
--- a/tests/v1/spec_decode/test_tree_attention.py
+++ /dev/null
@@ -1,502 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import math
-
-import pytest
-import torch
-
-from tests.v1.attention.utils import (
-    create_standard_kv_cache_spec,
-    create_vllm_config,
-    try_backend_includes_kv_cache_update,
-    try_get_attention_backend,
-)
-from vllm.config import ParallelConfig, SpeculativeConfig
-from vllm.platforms import current_platform
-from vllm.v1.attention.backend import CommonAttentionMetadata
-from vllm.v1.attention.backends.fa_utils import is_flash_attn_varlen_func_available
-from vllm.v1.attention.backends.registry import AttentionBackendEnum
-
-if not is_flash_attn_varlen_func_available():
-    pytest.skip(
-        "This test requires flash_attn_varlen_func, but it's not available.",
-        allow_module_level=True,
-    )
-
-# --------------------------------------------------------------------------- #
-#  KV cache layout adaptation
-# --------------------------------------------------------------------------- #
-# Two KV cache layouts exist across backends:
-#
-#   Flash layout: (2, num_blocks, block_size, num_kv_heads, head_size)
-#     - dim 0 separates key (index 0) and value (index 1)
-#     - Used by: FLASH_ATTN, TREE_ATTN, ROCM_AITER_FA, ROCM_ATTN
-#
-#   Block layout: (num_blocks, 2, block_size, num_kv_heads, head_size)
-#     - dim 1 separates key (index 0) and value (index 1)
-#     - Used by: TRITON_ATTN
-#
-# The test creates KV caches in flash layout (the canonical format used by
-# tree attention). When a reference backend needs block layout we transpose
-# dims 0 and 1.
-#
-# Note: ROCM_ATTN uses flash layout for storage but its forward path calls
-# PagedAttention.split_kv_cache which reinterprets the raw memory as paged
-# layout (num_blocks, num_kv_heads, head_size//x, block_size, x). This is
-# a view-level incompatibility, not a transpose - see the TODO in
-# _get_available_reference_backends for details.
-#
-# TODO: Replace this mapping with a `KV_CACHE_LAYOUT` class attribute on each
-# AttentionImpl so the layout is self-documented by the backend itself, e.g.:
-#     class TritonAttentionImpl(AttentionImpl):
-#         KV_CACHE_LAYOUT = "block"
-# --------------------------------------------------------------------------- #
-
-_BLOCK_KV_LAYOUT_BACKENDS = frozenset(
-    {
-        AttentionBackendEnum.TRITON_ATTN,
-    }
-)
-
-# Backends whose do_kv_cache_update requires engine-level state (e.g.
-# ForwardContext) that is not available in this test harness, but whose
-# KV cache is flash layout and can be written with reshape_and_cache_flash.
-# When a backend is listed here, forward_attention() bypasses
-# do_kv_cache_update and writes directly to the cache.
-_NEEDS_DIRECT_CACHE_UPDATE = frozenset(
-    {
-        AttentionBackendEnum.ROCM_AITER_FA,
-    }
-)
-
-# Backends with known test-harness incompatibilities - see the TODOs
-# inside _get_available_reference_backends for details.
-_INCOMPATIBLE_REFERENCE_BACKENDS = frozenset(
-    {
-        AttentionBackendEnum.ROCM_AITER_FA,
-        AttentionBackendEnum.ROCM_ATTN,
-    }
-)
-
-
-def _adapt_kv_cache_for_backend(
-    kv_cache: torch.Tensor,
-    backend: AttentionBackendEnum,
-) -> torch.Tensor:
-    """Convert kv_cache from flash layout ``(2, num_blocks, ...)`` to block
-    layout ``(num_blocks, 2, ...)`` if the backend requires it.  Returns the
-    original tensor unchanged when no conversion is needed."""
-    if backend in _BLOCK_KV_LAYOUT_BACKENDS:
-        return kv_cache.transpose(0, 1).contiguous()
-    return kv_cache
-
-
-def _get_platform_default_backend() -> AttentionBackendEnum:
-    """Ask the platform what backend it would auto-select at runtime."""
-    from vllm.v1.attention.selector import AttentionSelectorConfig
-
-    config = AttentionSelectorConfig(
-        block_size=32,
-        kv_cache_dtype="auto",
-        use_mla=False,
-        use_sparse=False,
-        head_size=128,
-        dtype=torch.bfloat16,
-    )
-    backend_path = current_platform.get_attn_backend_cls(
-        selected_backend=None,
-        attn_selector_config=config,
-    )
-    for backend in AttentionBackendEnum:
-        try:
-            if backend.get_path() == backend_path:
-                return backend
-        except ValueError:
-            continue
-    raise RuntimeError(
-        f"Platform returned backend path '{backend_path}' "
-        f"that doesn't match any AttentionBackendEnum member."
-    )
-
-
-def _get_available_reference_backends() -> list[AttentionBackendEnum]:
-    """Collect all reference backends the current platform can run.
-
-    On CUDA this is just FLASH_ATTN. On ROCm this includes the platform
-    default plus every backend the hardware supports, so the test validates
-    tree attention against all of them.
-    """
-    if current_platform.is_rocm():
-        backends: list[AttentionBackendEnum] = []
-
-        # 1. Whatever the platform would auto-select at runtime.
-        default_backend = _get_platform_default_backend()
-        if default_backend not in _INCOMPATIBLE_REFERENCE_BACKENDS:
-            backends.append(default_backend)
-
-        # 2. TRITON_ATTN - always available on ROCm.
-        if AttentionBackendEnum.TRITON_ATTN not in backends:
-            backends.append(AttentionBackendEnum.TRITON_ATTN)
-
-        # TODO: Enable ROCM_ATTN. Its forward path uses
-        # PagedAttention.split_kv_cache which reinterprets the raw
-        # cache memory as paged layout:
-        #   key:   (num_blocks, num_kv_heads, head_size//x, block_size, x)
-        #   value: (num_blocks, num_kv_heads, head_size, block_size)
-        # Tree attention writes prefix data in NHD flash layout, so the
-        # same bytes produce completely different values when read in
-        # paged format. Supporting ROCM_ATTN would require writing
-        # prefix data via PagedAttention.write_to_paged_cache into a
-        # separate paged-format KV cache.
-
-        # TODO: Enable ROCM_AITER_FA. Its metadata builder reads head
-        # counts from the model config at construction time and
-        # allocates extend_workspace with those dimensions. The test
-        # uses independent head count parameters (num_heads=2/4,
-        # num_kv_heads=2) that don't match the model config
-        # (Llama-3-8B: 32 q heads, 8 kv heads), causing a head count
-        # mismatch in flash_attn_varlen_func during extend_forward.
-        # Fixing this requires either matching test head counts to the
-        # model config or decoupling the builder from model config
-        # head geometry. The direct cache update path
-        # (_NEEDS_DIRECT_CACHE_UPDATE) is already in place for when
-        # this is resolved.
-
-        return backends
-
-    # CUDA: flash attention.
-    return [AttentionBackendEnum.FLASH_ATTN]
-
-
-class MockAttentionLayer(torch.nn.Module):
-    _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
-    _k_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
-    _v_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
-    layer_name = "mock_layer"
-
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x):
-        return x
-
-
-def forward_attention(
-    q: torch.Tensor,
-    k: torch.Tensor,
-    v: torch.Tensor,
-    kv_cache: torch.Tensor,
-    block_table: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    seqlen_k: int,
-    backend: AttentionBackendEnum,
-    spec_token_tree: str | None = None,
-    num_spec_tokens: int = 0,
-) -> torch.Tensor:
-    """Run a single attention forward pass through the given backend.
-
-    ``kv_cache`` is expected in **flash layout**
-    ``(2, num_blocks, block_size, num_kv_heads, head_size)``.
-    It is automatically converted when the target backend needs a
-    different layout.
-    """
-    batch_size, q_len, num_heads, dim_per_head = q.shape
-    num_kv_heads = k.shape[-2]
-    # Initialize the query and KV sequence lengths.
-    query_start_loc = q_len * torch.arange(
-        batch_size + 1, device=q.device, dtype=torch.int32
-    )
-    query_lens = torch.diff(query_start_loc)
-    seq_lens = torch.full(
-        (batch_size,),
-        seqlen_k,
-        device=q.device,
-        dtype=torch.int32,
-    )
-    context_lens = seq_lens - query_lens
-    max_seq_len = int(seq_lens.max())
-    max_query_len = q_len
-    num_actual_tokens = query_start_loc[-1]
-
-    softmax_scale = q.shape[-1] ** (-0.5)
-    layer = MockAttentionLayer()
-
-    # Build common metadata.
-    model_name = "meta-llama/Meta-Llama-3-8B"
-    builder_cls, impl_cls = try_get_attention_backend(backend)
-    vllm_config = create_vllm_config(model_name=model_name, max_model_len=max(seq_lens))
-    if spec_token_tree is not None:
-        # Create speculative config if token tree is specified.
-        vllm_config.speculative_config = SpeculativeConfig(
-            target_model_config=vllm_config.model_config,
-            target_parallel_config=ParallelConfig(),
-            model=model_name,
-            method="eagle",
-            num_speculative_tokens=num_spec_tokens,
-            speculative_token_tree=spec_token_tree,
-        )
-    kv_cache_spec = create_standard_kv_cache_spec(vllm_config)
-    builder = builder_cls(kv_cache_spec, [], vllm_config, q.device)
-    common_attn_metadata = CommonAttentionMetadata(
-        query_start_loc=query_start_loc,
-        query_start_loc_cpu=query_start_loc.cpu(),
-        seq_lens=seq_lens,
-        _seq_lens_cpu=seq_lens.cpu(),
-        _num_computed_tokens_cpu=context_lens.cpu(),
-        num_reqs=batch_size,
-        num_actual_tokens=num_actual_tokens,
-        max_query_len=max_query_len,
-        max_seq_len=max_seq_len,
-        block_table_tensor=block_table,
-        slot_mapping=slot_mapping,
-    )
-
-    # Build attention metadata.
-    attn_metadata = builder.build(
-        common_prefix_len=0,
-        common_attn_metadata=common_attn_metadata,
-    )
-
-    # Initialize the backend implementation.
-    instance = impl_cls(
-        num_heads=num_heads,
-        head_size=dim_per_head,
-        scale=softmax_scale,
-        num_kv_heads=num_kv_heads,
-        alibi_slopes=None,
-        sliding_window=None,
-        kv_cache_dtype="auto",
-    )
-
-    # Adapt KV cache layout for this backend.
-    adapted_kv_cache = _adapt_kv_cache_for_backend(kv_cache, backend)
-
-    # Run forward pass and return output.
-    query = q.view(-1, num_heads, dim_per_head)
-    key = k.view(-1, num_kv_heads, dim_per_head)
-    value = v.view(-1, num_kv_heads, dim_per_head)
-    output = torch.empty_like(query)
-    if not try_backend_includes_kv_cache_update(backend):
-        if backend in _NEEDS_DIRECT_CACHE_UPDATE:
-            # This backend's do_kv_cache_update requires engine-level
-            # ForwardContext that isn't available in this test harness.
-            # Write directly using reshape_and_cache_flash since the
-            # KV cache layout is identical (flash layout, unbind on dim 0).
-            key_cache, value_cache = adapted_kv_cache.unbind(0)
-            torch.ops._C_cache_ops.reshape_and_cache_flash(
-                key,
-                value,
-                key_cache,
-                value_cache,
-                attn_metadata.slot_mapping,
-                "auto",
-                layer._k_scale,
-                layer._v_scale,
-            )
-        else:
-            instance.do_kv_cache_update(
-                layer=layer,
-                key=key,
-                value=value,
-                kv_cache=adapted_kv_cache,
-                slot_mapping=attn_metadata.slot_mapping,
-            )
-    return instance.forward(
-        layer=layer,
-        query=query,
-        key=key,
-        value=value,
-        kv_cache=adapted_kv_cache.clone(),
-        attn_metadata=attn_metadata,
-        output=output,
-    )
-
-
-@pytest.mark.parametrize(
-    "reference_backend",
-    _get_available_reference_backends(),
-    ids=lambda b: b.name,
-)
-def test_tree_attn_correctness(
-    reference_backend: AttentionBackendEnum,
-) -> None:
-    torch.manual_seed(42)
-    torch.cuda.manual_seed_all(42)
-
-    device = "cuda"
-    tree_attn_masks = {
-        # Chain.
-        "[(0,), (0, 0), (0, 0, 0)]": torch.tensor(
-            [
-                [1, 0, 0, 0],
-                [1, 1, 0, 0],
-                [1, 1, 1, 0],
-                [1, 1, 1, 1],
-            ],
-            device=device,
-            dtype=torch.int32,
-        ),
-        # Tree.
-        "[(0,), (1,), (0, 0), (0, 1), (1, 0), (1, 1)]": torch.tensor(
-            [
-                [1, 0, 0, 0, 0, 0, 0],
-                [1, 1, 0, 0, 0, 0, 0],
-                [1, 0, 1, 0, 0, 0, 0],
-                [1, 1, 0, 1, 0, 0, 0],
-                [1, 1, 0, 0, 1, 0, 0],
-                [1, 0, 1, 0, 0, 1, 0],
-                [1, 0, 1, 0, 0, 0, 1],
-            ],
-            device=device,
-            dtype=torch.int32,
-        ),
-    }
-
-    dim_per_head = 128
-    num_kv_heads = 2
-    block_size = 32
-    max_sequence_length = 8192
-    randomize_blocks = True
-    for batch_size in [1, 16, 32]:
-        for num_heads in [2, 4]:
-            for sequence_position in [16, 1024, 2048]:
-                for spec_token_tree, tree_attn_mask in tree_attn_masks.items():
-                    # Assert that the number of heads is divisible
-                    # by the number of KV heads.
-                    assert num_heads % num_kv_heads == 0
-
-                    # Initialize q, k, and v.
-                    tree_size_q = tree_attn_mask.shape[0]
-                    seqlen_k = sequence_position + tree_size_q
-                    q = torch.randn(
-                        (batch_size, tree_size_q, num_heads, dim_per_head),
-                        device=device,
-                        dtype=torch.bfloat16,
-                    )
-                    k = torch.randn(
-                        (batch_size, tree_size_q, num_kv_heads, dim_per_head),
-                        device=device,
-                        dtype=torch.bfloat16,
-                    )
-                    v = torch.randn(
-                        (batch_size, tree_size_q, num_kv_heads, dim_per_head),
-                        device=device,
-                        dtype=torch.bfloat16,
-                    )
-
-                    # KV cache in flash layout - the canonical format for
-                    # tree attention. forward_attention() handles conversion
-                    # when needed.
-                    assert max_sequence_length % block_size == 0
-                    max_blocks_per_batch = max_sequence_length // block_size
-                    kv_cache = torch.randn(
-                        (
-                            2,
-                            batch_size * max_blocks_per_batch,
-                            block_size,
-                            num_kv_heads,
-                            dim_per_head,
-                        ),
-                        device=q.device,
-                        dtype=torch.bfloat16,
-                    )
-                    num_alloc_blocks_per_batch = math.ceil(seqlen_k / block_size)
-                    block_table = torch.zeros(
-                        (batch_size, max_blocks_per_batch),
-                        device=q.device,
-                        dtype=torch.int32,
-                    )
-                    block_ids = torch.arange(
-                        0,
-                        batch_size * num_alloc_blocks_per_batch,
-                        device=q.device,
-                        dtype=torch.int32,
-                    )
-                    if randomize_blocks:
-                        # Randomize the block ids.
-                        block_ids = block_ids[torch.randperm(block_ids.numel())]
-                    block_table[:, :num_alloc_blocks_per_batch] = block_ids.view(
-                        -1, num_alloc_blocks_per_batch
-                    )
-
-                    # Set up the slot mapping for the input KVs.
-                    tree_positions = sequence_position + torch.arange(
-                        0,
-                        tree_size_q,
-                        device=q.device,
-                        dtype=torch.int64,
-                    ).repeat(batch_size, 1)
-                    tree_slot_mapping = _gen_slot_mapping(
-                        tree_positions, block_table, block_size
-                    )
-
-                    # Compute attention for the tree.
-                    tree_attn_output = forward_attention(
-                        q=q,
-                        k=k,
-                        v=v,
-                        kv_cache=kv_cache,
-                        block_table=block_table,
-                        slot_mapping=tree_slot_mapping,
-                        seqlen_k=seqlen_k,
-                        backend=AttentionBackendEnum.TREE_ATTN,
-                        spec_token_tree=spec_token_tree,
-                        num_spec_tokens=tree_size_q - 1,
-                    ).view(batch_size, -1, num_heads, dim_per_head)
-
-                    # Verify each branch against the reference backend.
-                    for q_index in range(tree_size_q):
-                        # Get the q, k, and v for the branch.
-                        branch_mask = tree_attn_mask[q_index, :]
-                        branch_indices = torch.nonzero(branch_mask, as_tuple=True)[0]
-                        q_len = branch_indices.shape[0]
-                        q_branch = q[:, branch_indices]
-                        k_branch = k[:, branch_indices]
-                        v_branch = v[:, branch_indices]
-
-                        # Setup slot mapping for the branch.
-                        branch_positions = sequence_position + torch.arange(
-                            0,
-                            q_len,
-                            device=q.device,
-                            dtype=torch.int64,
-                        ).repeat(batch_size, 1)
-                        branch_slot_mapping = _gen_slot_mapping(
-                            branch_positions, block_table, block_size
-                        )
-
-                        # Reference attention for this branch.
-                        ref_output = forward_attention(
-                            q=q_branch,
-                            k=k_branch,
-                            v=v_branch,
-                            kv_cache=kv_cache,
-                            block_table=block_table,
-                            slot_mapping=branch_slot_mapping,
-                            seqlen_k=sequence_position + q_len,
-                            backend=reference_backend,
-                        ).view(batch_size, -1, num_heads, dim_per_head)
-
-                        # Compare the outputs.
-                        assert torch.allclose(
-                            tree_attn_output[:, branch_indices],
-                            ref_output,
-                            atol=7.81e-3,
-                        ), (
-                            f"outputs are not close for "
-                            f"reference_backend: {reference_backend.name}, "
-                            f"batch_size: {batch_size}, "
-                            f"num_heads: {num_heads}, "
-                            f"sequence_position: {sequence_position}, "
-                            f"tree_attn_mask: {tree_attn_mask}, "
-                            f"q_index: {q_index}."
-                        )
-
-
-def _gen_slot_mapping(
-    positions: torch.Tensor, block_table: torch.Tensor, block_size: int
-):
-    block_indices = positions // block_size
-    blocks = block_table.gather(dim=1, index=block_indices)
-    return (blocks * block_size + positions % block_size).view(-1)
diff --git a/tests/v1/streaming_input/test_gpu_model_runner_streaming.py b/tests/v1/streaming_input/test_gpu_model_runner_streaming.py
index 0ed7b6cb3efc..946ca99507df 100644
--- a/tests/v1/streaming_input/test_gpu_model_runner_streaming.py
+++ b/tests/v1/streaming_input/test_gpu_model_runner_streaming.py
@@ -39,7 +39,6 @@ def mock_model_runner_with_input_batch():
         vocab_size=32000,
         block_sizes=[16],
         kernel_block_sizes=[16],
-        is_spec_decode=False,
         logitsprocs=None,
         is_pooling_model=False,
     )
diff --git a/tests/v1/streaming_input/test_scheduler_streaming.py b/tests/v1/streaming_input/test_scheduler_streaming.py
index fd9f6b17f9a9..7d680895b836 100644
--- a/tests/v1/streaming_input/test_scheduler_streaming.py
+++ b/tests/v1/streaming_input/test_scheduler_streaming.py
@@ -76,6 +76,7 @@ def create_scheduler() -> Scheduler:
         log_stats=True,
         structured_output_manager=StructuredOutputManager(vllm_config),
         block_size=16,
+        hash_block_size=16,
     )
 
 
diff --git a/tests/v1/structured_output/test_backend_guidance.py b/tests/v1/structured_output/test_backend_guidance.py
index 704ed8b9c9e9..ca8c9b0d7853 100644
--- a/tests/v1/structured_output/test_backend_guidance.py
+++ b/tests/v1/structured_output/test_backend_guidance.py
@@ -11,6 +11,7 @@
 from vllm.config.parallel import ParallelConfig
 from vllm.config.speculative import SpeculativeConfig
 from vllm.sampling_params import SamplingParams, StructuredOutputsParams
+from vllm.tokenizers import get_tokenizer
 from vllm.v1.request import Request
 from vllm.v1.structured_output import StructuredOutputManager
 from vllm.v1.structured_output.backend_guidance import GuidanceBackend
@@ -19,6 +20,14 @@
 TOKENIZER = "gpt2"
 
 
+@pytest.fixture(scope="module")
+def mistral_tokenizer():
+    return get_tokenizer(
+        tokenizer_name="mistralai/Mistral-Small-3.2-24B-Instruct-2506",
+        tokenizer_mode="mistral",
+    )
+
+
 def test_backend_guidance_rollback_terminated():
     # Test that the backend guidance successfully rollbacks from a
     # terminated state. This can happen with speculative decoding,
@@ -187,3 +196,38 @@ def test_grammar_init_async_and_sync(async_grammar):
 
     # Verify the grammar can accept valid tokens
     assert grammar.accept_tokens(request.request_id, prompt)
+
+
+@pytest.mark.parametrize(
+    "request_type,grammar_spec",
+    [
+        pytest.param(
+            StructuredOutputOptions.JSON,
+            '{"type": "object"}',
+            id="json",
+        ),
+        pytest.param(
+            StructuredOutputOptions.GRAMMAR,
+            'start: "hello" | "world"',
+            id="lark",
+        ),
+    ],
+)
+def test_mistral_tokenizer_compile_grammar(
+    mistral_tokenizer,
+    request_type: StructuredOutputOptions,
+    grammar_spec: str,
+) -> None:
+    vllm_config = VllmConfig(
+        structured_outputs_config=StructuredOutputsConfig(backend="guidance"),
+    )
+    backend = GuidanceBackend(
+        vllm_config,
+        tokenizer=mistral_tokenizer,
+        vocab_size=mistral_tokenizer.vocab_size,
+    )
+    assert backend.ll_tokenizer is mistral_tokenizer.llg_tokenizer
+
+    grammar = backend.compile_grammar(request_type, grammar_spec)
+    assert grammar is not None
+    assert not grammar.is_terminated()
diff --git a/tests/v1/structured_output/test_reasoning_structured_output.py b/tests/v1/structured_output/test_reasoning_structured_output.py
index 98a25e41dfe0..861e919c102a 100644
--- a/tests/v1/structured_output/test_reasoning_structured_output.py
+++ b/tests/v1/structured_output/test_reasoning_structured_output.py
@@ -8,9 +8,15 @@
 import pytest
 
 from vllm.config import ModelConfig, SchedulerConfig, VllmConfig
-from vllm.reasoning import ReasoningParser
 from vllm.v1.request import Request
 from vllm.v1.structured_output import StructuredOutputManager
+from vllm.v1.structured_output.backend_types import StructuredOutputOptions
+
+
+class MockReasoner:
+    def __init__(self, tokenizer):
+        self.is_reasoning_end = Mock(return_value=False)
+        self.is_reasoning_end_streaming = Mock(return_value=False)
 
 
 class TestReasoningStructuredOutput:
@@ -50,13 +56,6 @@ def mock_vllm_config(self, mock_model_config, mock_scheduler_config):
         config.speculative_config = None
         return config
 
-    @pytest.fixture
-    def mock_reasoning_parser(self):
-        """Create a mock ReasoningParser."""
-        parser = Mock(spec=ReasoningParser)
-        parser.is_reasoning_end = Mock(return_value=False)
-        return parser
-
     @pytest.fixture
     def mock_request_with_structured_output(self):
         """Create a mock request with structured output."""
@@ -64,6 +63,8 @@ def mock_request_with_structured_output(self):
         request.structured_output_request = Mock()
         request.structured_output_request.reasoning_ended = None
         request.structured_output_request.grammar = Mock()
+        request.structured_output_request.reasoning_parser_kwargs = None
+        request.structured_output_request.reasoner = None
         request.structured_output_request.grammar.is_terminated = Mock(
             return_value=False
         )
@@ -74,6 +75,13 @@ def mock_request_with_structured_output(self):
         request.num_output_placeholders = 0
         return request
 
+    @pytest.fixture
+    def manager_with_reasoner(self, mock_vllm_config):
+        manager = StructuredOutputManager(mock_vllm_config)
+        manager.reasoner_cls = MockReasoner
+        manager.tokenizer = Mock()
+        return manager
+
     def test_should_fill_bitmask_with_enable_in_reasoning(
         self, mock_vllm_config, mock_request_with_structured_output
     ):
@@ -89,22 +97,17 @@ def test_should_fill_bitmask_with_enable_in_reasoning(
 
     def test_should_fill_bitmask_without_enable_in_reasoning(
         self,
-        mock_vllm_config,
+        manager_with_reasoner,
         mock_request_with_structured_output,
-        mock_reasoning_parser,
     ):
         """Test should_fill_bitmask when enable_in_reasoning is False."""
         # Keep enable_in_reasoning as False (default)
-        config = mock_vllm_config.structured_outputs_config
+        config = manager_with_reasoner.vllm_config.structured_outputs_config
         assert config.enable_in_reasoning is False
 
-        manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = mock_reasoning_parser
-
-        # Mock reasoning not ended
-        mock_reasoning_parser.is_reasoning_end.return_value = False
-
-        result = manager.should_fill_bitmask(mock_request_with_structured_output)
+        result = manager_with_reasoner.should_fill_bitmask(
+            mock_request_with_structured_output
+        )
 
         # Should set reasoning_ended and return its value
         assert (
@@ -118,68 +121,92 @@ def test_should_fill_bitmask_no_reasoner(
     ):
         """Test should_fill_bitmask when no reasoner is configured."""
         manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = None
 
         result = manager.should_fill_bitmask(mock_request_with_structured_output)
 
         # Should default to True when no reasoner
         assert result is True
 
+    def test_should_fill_bitmask_uses_request_reasoning_parser_kwargs(
+        self, mock_vllm_config, mock_request_with_structured_output
+    ):
+        """Test request-level parser kwargs override the default reasoner."""
+
+        class KwargReasoner:
+            def __init__(self, tokenizer, chat_template_kwargs=None):
+                self.chat_template_kwargs = chat_template_kwargs or {}
+
+            def is_reasoning_end(self, input_ids):
+                return not self.chat_template_kwargs.get("enable_thinking", False)
+
+        manager = StructuredOutputManager(mock_vllm_config)
+        manager.reasoner_cls = KwargReasoner
+        manager.tokenizer = Mock()
+
+        structured_req = mock_request_with_structured_output.structured_output_request
+        structured_req.reasoning_parser_kwargs = {
+            "chat_template_kwargs": {"enable_thinking": True}
+        }
+
+        result = manager.should_fill_bitmask(mock_request_with_structured_output)
+
+        assert result is False
+        assert (
+            mock_request_with_structured_output.structured_output_request.reasoner
+            is not None
+        )
+
     def test_should_advance_with_enable_in_reasoning(
         self,
-        mock_vllm_config,
+        manager_with_reasoner,
         mock_request_with_structured_output,
-        mock_reasoning_parser,
     ):
         """Test should_advance when enable_in_reasoning is True."""
         # Enable enable_in_reasoning
-        mock_vllm_config.structured_outputs_config.enable_in_reasoning = True
-
-        manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = mock_reasoning_parser
+        manager_with_reasoner.enable_in_reasoning = True
 
         # Should always return True when enable_in_reasoning is enabled
-        result = manager.should_advance(mock_request_with_structured_output)
+        result = manager_with_reasoner.should_advance(
+            mock_request_with_structured_output
+        )
         assert result is True
 
     def test_should_advance_reasoning_not_ended(
         self,
-        mock_vllm_config,
+        manager_with_reasoner,
         mock_request_with_structured_output,
-        mock_reasoning_parser,
     ):
         """Test should_advance when reasoning has not ended."""
-        manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = mock_reasoning_parser
-
         # Set reasoning as not ended
         (
             mock_request_with_structured_output.structured_output_request
         ).reasoning_ended = False
-        mock_reasoning_parser.is_reasoning_end.return_value = False
 
-        result = manager.should_advance(mock_request_with_structured_output)
+        result = manager_with_reasoner.should_advance(
+            mock_request_with_structured_output
+        )
 
         # Should return False since reasoning hasn't ended
         assert result is False
 
     def test_should_advance_reasoning_just_ended(
         self,
-        mock_vllm_config,
+        manager_with_reasoner,
         mock_request_with_structured_output,
-        mock_reasoning_parser,
     ):
         """Test should_advance when reasoning ends in current step."""
-        manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = mock_reasoning_parser
-
         # Set reasoning as not ended initially, but ends in this step
         (
             mock_request_with_structured_output.structured_output_request
         ).reasoning_ended = False
-        mock_reasoning_parser.is_reasoning_end.return_value = True
+        reasoner = MockReasoner(tokenizer=Mock())
+        reasoner.is_reasoning_end_streaming.return_value = True
+        structured_req = mock_request_with_structured_output.structured_output_request
+        structured_req.reasoner = reasoner
 
-        result = manager.should_advance(mock_request_with_structured_output)
+        result = manager_with_reasoner.should_advance(
+            mock_request_with_structured_output
+        )
 
         # Should set reasoning_ended to True but return False for this step
         assert (
@@ -188,22 +215,46 @@ def test_should_advance_reasoning_just_ended(
         )
         assert result is False
 
+    def test_should_advance_reasoning_just_ended_with_spec_decode_structural_tag(
+        self,
+        manager_with_reasoner,
+        mock_request_with_structured_output,
+    ):
+        """When reasoning ends this step, advance immediately for structural
+        tags with speculative decoding."""
+        structured_req = mock_request_with_structured_output.structured_output_request
+        structured_req.reasoning_ended = False
+        structured_req.structured_output_key = (
+            StructuredOutputOptions.STRUCTURAL_TAG,
+            "{}",
+        )
+        reasoner = MockReasoner(tokenizer=Mock())
+        reasoner.is_reasoning_end_streaming.return_value = True
+        structured_req.reasoner = reasoner
+
+        manager_with_reasoner.vllm_config.speculative_config = Mock()
+
+        result = manager_with_reasoner.should_advance(
+            mock_request_with_structured_output
+        )
+
+        assert structured_req.reasoning_ended is True
+        assert result is True
+
     def test_should_advance_reasoning_already_ended(
         self,
-        mock_vllm_config,
+        manager_with_reasoner,
         mock_request_with_structured_output,
-        mock_reasoning_parser,
     ):
         """Test should_advance when reasoning has already ended."""
-        manager = StructuredOutputManager(mock_vllm_config)
-        manager.reasoner = mock_reasoning_parser
-
         # Set reasoning as already ended
         (
             mock_request_with_structured_output.structured_output_request
         ).reasoning_ended = True
 
-        result = manager.should_advance(mock_request_with_structured_output)
+        result = manager_with_reasoner.should_advance(
+            mock_request_with_structured_output
+        )
 
         # Should return True since reasoning has ended
         assert result is True
diff --git a/tests/v1/test_tensor_ipc_queue.py b/tests/v1/test_tensor_ipc_queue.py
index a3fcb97ca171..a70f5d48cc54 100644
--- a/tests/v1/test_tensor_ipc_queue.py
+++ b/tests/v1/test_tensor_ipc_queue.py
@@ -14,6 +14,7 @@
 import torch
 import torch.multiprocessing as torch_mp
 
+from vllm.platforms import current_platform
 from vllm.v1.engine.tensor_ipc import (
     TensorIpcData,
     TensorIpcReceiver,
@@ -21,6 +22,8 @@
 )
 from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
 
+DEVICE_TYPE = current_platform.device_type
+
 
 @pytest.fixture(scope="module", autouse=True)
 def setup_multiprocessing():
@@ -53,7 +56,7 @@ def encoder_process(
         encoder = MsgpackEncoder(oob_tensor_consumer=sender)
 
         if torch.cuda.is_available():
-            device = "cuda:0"
+            device = f"{DEVICE_TYPE}:0"
             tensor = torch.randn(
                 *tensor_data["shape"], dtype=tensor_data["dtype"], device=device
             )
@@ -384,7 +387,7 @@ def mixed_tensor_encoder_process(
 
         # Create only CUDA tensor for IPC (CPU will be serialized)
         # But actually, let's just send CUDA tensor directly
-        cuda_tensor = torch.randn(4, 5, device="cuda:0")
+        cuda_tensor = torch.randn(4, 5, device=f"{DEVICE_TYPE}:0")
 
         # Manually send via IPC to test the mechanism
         cuda_tensor_shared = cuda_tensor.share_memory_()
@@ -651,7 +654,7 @@ def test_ipc_disabled_mode():
 
     # If CUDA is available, test with CUDA tensor too
     if torch.cuda.is_available():
-        cuda_tensor = torch.randn(4, 5, device="cuda:0")
+        cuda_tensor = torch.randn(4, 5, device=f"{DEVICE_TYPE}:0")
         encoded_cuda = encoder.encode({"cuda_tensor": cuda_tensor})
         assert len(encoded_cuda) > 0
         assert tensor_queues[0].empty(), (
diff --git a/tests/v1/worker/test_gpu_input_batch.py b/tests/v1/worker/test_gpu_input_batch.py
index d4eee19adab5..3a478d21013f 100644
--- a/tests/v1/worker/test_gpu_input_batch.py
+++ b/tests/v1/worker/test_gpu_input_batch.py
@@ -22,10 +22,8 @@
 VOCAB_SIZE = 1024
 NUM_OUTPUT_TOKENS = 20
 MAX_PROMPT_SIZE = 100
-CUDA_DEVICES = [
-    f"{current_platform.device_type}:{i}"
-    for i in range(min(current_platform.device_count(), 2))
-]
+DEVICE_TYPE = current_platform.device_type
+DEVICES = [f"{DEVICE_TYPE}:{i}" for i in range(min(current_platform.device_count(), 2))]
 MAX_NUM_PROMPT_TOKENS = 64
 
 
@@ -219,7 +217,7 @@ def _construct_cached_request_state(req_id_suffix: int):
     )
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [1, 2, 32, 64])
 def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
     """
@@ -313,7 +311,7 @@ def same(t1: torch.Tensor | None, t2: torch.Tensor | None) -> bool:
     )
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("batch_size", [32])
 @pytest.mark.parametrize("swap_list", [((0, 1),)])
 def test_swap_states_in_input_batch(device: str, batch_size: int, swap_list: list):
@@ -400,7 +398,7 @@ def _construct_pooling_request(req_id_suffix: int, pooling_params=None):
     )
 
 
-@pytest.mark.parametrize("device", CUDA_DEVICES)
+@pytest.mark.parametrize("device", DEVICES)
 def test_pooling_prompt_lens_not_aliased(device: str):
     """Verify that prompt_lens in PoolingMetadata does not share memory
     with the internal num_prompt_tokens pinned buffer. Guards against possible
diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py
index 93c5435e817b..1da5d9570737 100644
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -1,10 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from types import SimpleNamespace
+from unittest.mock import Mock
+
 import numpy as np
 import pytest
 import torch
 
+import vllm.v1.worker.gpu_model_runner as gpu_model_runner_module
 from vllm.config import (
     AttentionConfig,
     CacheConfig,
@@ -30,19 +34,21 @@
 from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
 from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
 from vllm.v1.kv_cache_interface import (
+    AttentionSpec,
     FullAttentionSpec,
     KVCacheConfig,
     KVCacheGroupSpec,
     KVCacheTensor,
 )
 from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 from vllm.v1.worker.gpu_input_batch import InputBatch
 from vllm.v1.worker.gpu_model_runner import GPUModelRunner
-from vllm.v1.worker.utils import select_common_block_size
+from vllm.v1.worker.utils import AttentionGroup, select_common_block_size
 
 BLOCK_SIZE = 16
 NUM_BLOCKS = 10
-DEVICE = current_platform.device_type
+DEVICE_TYPE = current_platform.device_type
 
 
 def initialize_kv_cache(runner: GPUModelRunner):
@@ -118,7 +124,7 @@ def model_runner():
         vllm_config.compilation_config.static_forward_context["layer.0"] = Attention(
             num_heads, head_size, 0.1
         )
-        runner = GPUModelRunner(vllm_config, DEVICE)
+        runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
         initialize_kv_cache(runner)
         yield runner
 
@@ -159,6 +165,34 @@ def _schedule_new_request(*req_ids: str) -> SchedulerOutput:
     )
 
 
+def _schedule_cached_requests(
+    req_ids: list[str],
+    num_scheduled_tokens: dict[str, int],
+    new_token_ids: list[list[int]],
+    num_computed_tokens: list[int],
+    num_output_tokens: list[int],
+) -> SchedulerOutput:
+    return SchedulerOutput(
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=CachedRequestData(
+            req_ids=req_ids,
+            resumed_req_ids=set(),
+            new_token_ids=new_token_ids,
+            all_token_ids={},
+            new_block_ids=[None] * len(req_ids),
+            num_computed_tokens=num_computed_tokens,
+            num_output_tokens=num_output_tokens,
+        ),
+        num_scheduled_tokens=num_scheduled_tokens,
+        total_num_scheduled_tokens=sum(num_scheduled_tokens.values()),
+        scheduled_spec_decode_tokens={},
+        scheduled_encoder_inputs={},
+        num_common_prefix_blocks=[],
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+    )
+
+
 def _is_req_scheduled(model_runner, req_id: str) -> bool:
     return req_id in model_runner.input_batch.req_id_to_index
 
@@ -216,6 +250,58 @@ def test_select_common_block_size_uses_largest_shared_int():
     assert selected_size == 64
 
 
+@pytest.mark.skip_global_cleanup
+@pytest.mark.parametrize(
+    ("world_size", "is_last_rank", "expected_calls"),
+    [(1, True, 0), (2, True, 0), (2, False, 1)],
+)
+def test_sample_tokens_receives_pp_sampled_ids_only_on_non_last_rank(
+    monkeypatch: pytest.MonkeyPatch,
+    world_size: int,
+    is_last_rank: bool,
+    expected_calls: int,
+):
+    runner = GPUModelRunner.__new__(GPUModelRunner)
+    runner.execute_model_state = None
+    runner.kv_connector_output = None
+    runner.use_async_scheduling = True
+    receive_calls = 0
+
+    def receive_prev_sampled_token_ids():
+        nonlocal receive_calls
+        receive_calls += 1
+
+    runner._pp_receive_prev_sampled_token_ids_to_input_batch = (
+        receive_prev_sampled_token_ids
+    )
+    monkeypatch.setattr(
+        gpu_model_runner_module,
+        "get_pp_group",
+        lambda: SimpleNamespace(world_size=world_size, is_last_rank=is_last_rank),
+    )
+
+    assert GPUModelRunner.sample_tokens(runner, None) is None
+    assert receive_calls == expected_calls
+
+
+@pytest.mark.skip_global_cleanup
+def test_sample_tokens_skips_pp_group_lookup_without_async_scheduling(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    runner = GPUModelRunner.__new__(GPUModelRunner)
+    runner.execute_model_state = None
+    runner.kv_connector_output = None
+    runner.use_async_scheduling = False
+
+    monkeypatch.setattr(
+        gpu_model_runner_module,
+        "get_pp_group",
+        pytest.fail,
+    )
+
+    assert GPUModelRunner.sample_tokens(runner, None) is None
+
+
 def test_select_common_block_size_no_valid_option():
     backend_a = _make_mock_backend_for_kernel_block_size([64])
     backend_b = _make_mock_backend_for_kernel_block_size([MultipleOf(16)])
@@ -337,7 +423,7 @@ def test_get_nans_in_logits(model_runner, dist_init):
             [1.0, 2.0, 3.0],
             [3.0, 2.0, 1.0],
         ],
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     result = model_runner._get_nans_in_logits(logits)
     assert result == {"req_0": 0, "req_1": 0}
@@ -347,7 +433,7 @@ def test_get_nans_in_logits(model_runner, dist_init):
             [1.0, float("nan"), 3.0],
             [4.0, float("nan"), float("nan")],
         ],
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     result = model_runner._get_nans_in_logits(logits)
     assert result == {"req_0": 1, "req_1": 2}
@@ -357,7 +443,7 @@ def test_get_nans_in_logits(model_runner, dist_init):
             [1.0, 2.0, 3.0],
             [4.0, float("nan"), float("nan")],
         ],
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     result = model_runner._get_nans_in_logits(logits)
     assert result == {"req_0": 0, "req_1": 2}
@@ -369,7 +455,7 @@ def test_get_nans_in_logits(model_runner, dist_init):
         [
             [1.0, float("nan"), 3.0],
         ],
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     result = model_runner._get_nans_in_logits(logits)
     assert result == {"req_0": 1, "req_1": 0}
@@ -380,7 +466,7 @@ def test_get_nans_in_logits(model_runner, dist_init):
             [1.0, 2.0, 3.0],
             [float("nan"), 2.0, 3.0],
         ],
-        device=DEVICE,
+        device=DEVICE_TYPE,
     )
     result = model_runner._get_nans_in_logits(logits)
     assert result == {"req_0": 2, "req_1": 0}
@@ -454,6 +540,135 @@ def test_update_states_request_unscheduled(model_runner, dist_init):
     assert not _is_req_scheduled(model_runner, req_ids[1])
 
 
+def test_update_states_pp_non_async_multi_request_keeps_token_buffers_consistent(
+    model_runner, model_runner_2, dist_init, monkeypatch
+):
+    req_ids = ["req_0", "req_1"]
+    non_last_runner = model_runner
+    last_runner = model_runner_2
+    non_last_runner.use_async_scheduling = False
+    last_runner.use_async_scheduling = False
+
+    # Both ranks start from the same request set.
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=False, world_size=2),
+    )
+    non_last_runner._update_states(_schedule_new_request(*req_ids))
+    last_runner._update_states(_schedule_new_request(*req_ids))
+
+    sampled_by_last_rank = {req_ids[0]: 101, req_ids[1]: 201}
+    # Emulate last-rank bookkeeping result from previous step:
+    # sampled tokens already cached in CPU token buffers.
+    for req_id, token_id in sampled_by_last_rank.items():
+        req_index = last_runner.input_batch.req_id_to_index[req_id]
+        start_idx = int(last_runner.input_batch.num_tokens_no_spec[req_index])
+        end_idx = start_idx + 1
+        last_runner.input_batch.token_ids_cpu[req_index, start_idx:end_idx] = [token_id]
+        last_runner.input_batch.is_token_ids[req_index, start_idx:end_idx] = True
+        last_runner.input_batch.num_tokens_no_spec[req_index] = end_idx
+        last_runner.requests[req_id].output_token_ids.append(token_id)
+
+    scheduler_output = _schedule_cached_requests(
+        req_ids=req_ids,
+        num_scheduled_tokens={req_ids[0]: 1, req_ids[1]: 1},
+        new_token_ids=[[101], [201]],
+        num_computed_tokens=[3, 3],  # prompt tokens only
+        num_output_tokens=[1, 1],
+    )
+    # non-last rank appends new_token_ids in _update_states.
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=False, world_size=2),
+    )
+    non_last_runner._update_states(scheduler_output)
+    # last rank should keep its already-bookkept CPU buffers unchanged.
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=True, world_size=2),
+    )
+    last_runner._update_states(scheduler_output)
+
+    # Verify consistency between PP ranks after _update_states.
+    for req_id in req_ids:
+        non_last_idx = non_last_runner.input_batch.req_id_to_index[req_id]
+        last_idx = last_runner.input_batch.req_id_to_index[req_id]
+        non_last_len = int(non_last_runner.input_batch.num_tokens_no_spec[non_last_idx])
+        last_len = int(last_runner.input_batch.num_tokens_no_spec[last_idx])
+        assert non_last_len == last_len
+        assert (
+            non_last_runner.input_batch.token_ids_cpu[
+                non_last_idx, :non_last_len
+            ].tolist()
+            == last_runner.input_batch.token_ids_cpu[last_idx, :last_len].tolist()
+        )
+
+
+def test_update_states_pp_async_multi_request_keeps_rank_state_consistent(
+    model_runner, model_runner_2, dist_init, monkeypatch
+):
+    req_ids = ["req_0", "req_1"]
+    non_last_runner = model_runner
+    last_runner = model_runner_2
+    non_last_runner.use_async_scheduling = True
+    last_runner.use_async_scheduling = True
+
+    # Both ranks start from the same request set.
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=False, world_size=2),
+    )
+    non_last_runner._update_states(_schedule_new_request(*req_ids))
+    last_runner._update_states(_schedule_new_request(*req_ids))
+
+    # Simulate async previous-step sampled tokens known on both ranks.
+    # non-last rank may receive them via PP communication; last rank has
+    # them from local sampling/bookkeeping.
+    sampled_by_last_rank = {req_ids[0]: 111, req_ids[1]: 222}
+    for runner in (non_last_runner, last_runner):
+        for req_id, token_id in sampled_by_last_rank.items():
+            req_index = runner.input_batch.req_id_to_index[req_id]
+            start_idx = int(runner.input_batch.num_tokens_no_spec[req_index])
+            end_idx = start_idx + 1
+            runner.input_batch.token_ids_cpu[req_index, start_idx:end_idx] = [token_id]
+            runner.input_batch.is_token_ids[req_index, start_idx:end_idx] = True
+            runner.input_batch.num_tokens_no_spec[req_index] = end_idx
+            runner.requests[req_id].output_token_ids.append(token_id)
+
+    scheduler_output = _schedule_cached_requests(
+        req_ids=req_ids,
+        num_scheduled_tokens={req_ids[0]: 1, req_ids[1]: 1},
+        new_token_ids=[],
+        num_computed_tokens=[4, 4],
+        num_output_tokens=[1, 1],
+    )
+    # non-last rank: async PP branch (new_token_ids empty).
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=False, world_size=2),
+    )
+    non_last_runner._update_states(scheduler_output)
+    # last rank: keep already-bookkept state aligned with scheduler view.
+    monkeypatch.setattr(
+        "vllm.v1.worker.gpu_model_runner.get_pp_group",
+        lambda: SimpleNamespace(is_last_rank=True, world_size=2),
+    )
+    last_runner._update_states(scheduler_output)
+
+    for req_id in req_ids:
+        non_last_idx = non_last_runner.input_batch.req_id_to_index[req_id]
+        last_idx = last_runner.input_batch.req_id_to_index[req_id]
+        non_last_len = int(non_last_runner.input_batch.num_tokens_no_spec[non_last_idx])
+        last_len = int(last_runner.input_batch.num_tokens_no_spec[last_idx])
+        assert non_last_len == last_len
+        assert (
+            non_last_runner.input_batch.token_ids_cpu[
+                non_last_idx, :non_last_len
+            ].tolist()
+            == last_runner.input_batch.token_ids_cpu[last_idx, :last_len].tolist()
+        )
+
+
 def test_kv_cache_stride_order(monkeypatch, model_runner):
     # This test checks if GPUModelRunner initializes correctly when an attention
     # backend enforces a non-default KV cache stride order.
@@ -534,6 +749,39 @@ def test_reload_weights_before_load_model(model_runner):
         model_runner.reload_weights()
 
 
+def test_sample_passes_reordered_draft_probs_to_rejection_sampler():
+    runner = object.__new__(GPUModelRunner)
+    runner.use_async_scheduling = False
+    runner.input_batch = SimpleNamespace(
+        sampling_metadata=Mock(spec=SamplingMetadata),
+        update_async_output_token_ids=Mock(),
+        req_ids=["req_a", "req_b", "req_c"],
+    )
+    runner.rejection_sampler = Mock(return_value="sampler_output")
+    runner.sampler = Mock()
+    runner._draft_prob_req_ids = ["req_c", "req_a", "req_b"]
+    runner._draft_probs = torch.arange(3 * 3 * 4, dtype=torch.float32).reshape(3, 3, 4)
+
+    spec_decode_metadata = SpecDecodeMetadata.make_dummy(
+        [[1, 2], [], [3]],
+        device=torch.device("cpu"),
+    )
+    logits = torch.randn(6, 4)
+
+    output = GPUModelRunner._sample(runner, logits, spec_decode_metadata)
+
+    assert output == "sampler_output"
+    passed_draft_probs = runner.rejection_sampler.call_args.args[1]
+    expected_draft_probs = torch.cat(
+        [
+            runner._draft_probs[1, :2],
+            runner._draft_probs[0, :1],
+        ],
+        dim=0,
+    )
+    assert torch.equal(passed_draft_probs, expected_draft_probs)
+
+
 def test_init_kv_cache_with_kv_sharing_invalid_target_layer_order(default_vllm_config):
     torch.set_default_dtype(torch.float16)
     layer_0 = "model.layers.0.self_attn.attn"
@@ -640,7 +888,7 @@ def test_init_kv_cache_without_kv_sharing(default_vllm_config):
     # Set high context length to test max context length estimation
     vllm_config.model_config.max_model_len = 3_000_000
     vllm_ctx = vllm_config.compilation_config.static_forward_context
-    runner = GPUModelRunner(vllm_config, DEVICE)
+    runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
     kv_cache_spec = runner.get_kv_cache_spec()
     assert len(kv_cache_spec) == 2
     assert len(runner.shared_kv_cache_layers) == 0
@@ -708,7 +956,7 @@ def test_init_kv_cache_with_kv_sharing_valid(default_vllm_config):
     # Set high context length to test max context length estimation
     vllm_config.model_config.max_model_len = 3_000_000
     vllm_ctx = vllm_config.compilation_config.static_forward_context
-    runner = GPUModelRunner(vllm_config, DEVICE)
+    runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
     kv_cache_spec = runner.get_kv_cache_spec()
     assert len(kv_cache_spec) == 1
     assert layer_0 in kv_cache_spec
@@ -847,7 +1095,8 @@ def test_hybrid_attention_mamba_tensor_shapes():
         assert fwd_context is not None
         vllm_ctx = vllm_config.compilation_config.static_forward_context
 
-        runner = GPUModelRunner(vllm_config, DEVICE)
+        runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
+        current_platform.update_block_size_for_backend(vllm_config)
         kv_cache_spec = runner.get_kv_cache_spec()
 
         available_memory = 5 * GiB_bytes
@@ -892,13 +1141,13 @@ def test_hybrid_attention_mamba_tensor_shapes():
     ssm_constant_shape = ssm_shape[1:]
 
     attn_blocks_constant = torch.full(
-        (test_block_size, *attn_constant_shape), device=DEVICE, fill_value=3.33
+        (test_block_size, *attn_constant_shape), device=DEVICE_TYPE, fill_value=3.33
     )
     conv_blocks_constant = torch.full(
-        (test_block_size, *conv_constant_shape), device=DEVICE, fill_value=6.66
+        (test_block_size, *conv_constant_shape), device=DEVICE_TYPE, fill_value=6.66
     )
     ssm_blocks_constant = torch.full(
-        (test_block_size, *ssm_constant_shape), device=DEVICE, fill_value=9.99
+        (test_block_size, *ssm_constant_shape), device=DEVICE_TYPE, fill_value=9.99
     )
 
     # Fill attention blocks with constants using kv block indices
@@ -946,6 +1195,33 @@ def test_hybrid_attention_mamba_tensor_shapes():
             assert torch.equal(actual_ssm, expected_ssm)
 
 
+def test_update_hybrid_attention_mamba_layout_with_num_block_2_rewrites_stride():
+    from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
+
+    ambiguous_cache = torch.empty((2, 2, BLOCK_SIZE, 1, 8), dtype=torch.float16)
+    """Ambiguous, because both dims[0=kv_dim] and dims[1=num_blocks] == 2"""
+    hidden_size = ambiguous_cache.shape[2:].numel()
+    assert ambiguous_cache.stride()[:2] == (2 * hidden_size, hidden_size)
+
+    attention_spec = AttentionSpec(
+        block_size=BLOCK_SIZE, num_kv_heads=1, head_size=8, dtype=torch.float16
+    )
+    runner_stub = SimpleNamespace(
+        cache_config=SimpleNamespace(cache_dtype="auto"),
+        _kv_cache_spec_attn_group_iterator=lambda: iter(
+            [AttentionGroup(FlashAttentionBackend, ["attn"], attention_spec, 0)]
+        ),
+    )
+    GPUModelRunner._update_hybrid_attention_mamba_layout(
+        runner_stub, {"attn": ambiguous_cache}, [BLOCK_SIZE]
+    )
+
+    assert ambiguous_cache.stride()[:2] == (hidden_size, 2 * hidden_size), """\
+        We expect _update_hybrid_attention_mamba_layout to re-stride the cache from:
+        (2, num_blocks) -> (num_blocks, 2), even when num_blocks==2, 
+        which was ambiguous before get_kv_cache_block_dim was used"""
+
+
 def test_hybrid_block_table_initialization():
     """Test hybrid block table with different kernel and kvcache_manager block
     sizes."""
@@ -966,7 +1242,7 @@ def test_hybrid_block_table_initialization():
         max_num_blocks_per_req=max_num_blocks_per_req,
         max_num_batched_tokens=max_num_batched_tokens,
         pin_memory=False,
-        device=torch.device(DEVICE),
+        device=torch.device(DEVICE_TYPE),
         kernel_block_size=kernel_block_sizes[0],
         cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
     )
@@ -1005,7 +1281,7 @@ def test_input_batch_with_kernel_block_sizes():
     max_num_reqs = 10
     max_model_len = 512
     max_num_batched_tokens = 512
-    device = torch.device(DEVICE)
+    device = torch.device(DEVICE_TYPE)
     pin_memory = False
     vocab_size = 50272
 
@@ -1052,7 +1328,7 @@ def test_hybrid_cache_integration(default_vllm_config, dist_init):
         num_heads, head_size, 0.1
     )
 
-    runner = GPUModelRunner(vllm_config, DEVICE)
+    runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
 
     # Initialize KV cache with configuration
     attn_spec = FullAttentionSpec(
@@ -1191,9 +1467,9 @@ def test_is_uniform_decode() -> None:
     current_platform.is_rocm(),
     reason="Attention backend FLASHINFER is not supported on ROCm.",
 )
-def test_cudagraph_sizes_capped_for_mamba_cache():
-    """Test that cudagraph capture sizes are capped to num_blocks for
-    hybrid models with Mamba layers.
+def test_mamba_cache_raises_when_max_num_seqs_exceeds_blocks():
+    """Test that a ValueError is raised when max_num_seqs exceeds the
+    available Mamba cache blocks for hybrid models with FULL cudagraphs.
 
     See: https://github.com/vllm-project/vllm/issues/34094
     """
@@ -1275,7 +1551,8 @@ def test_cudagraph_sizes_capped_for_mamba_cache():
             )
         assert fwd_context is not None
 
-        runner = GPUModelRunner(vllm_config, DEVICE)
+        runner = GPUModelRunner(vllm_config, DEVICE_TYPE)
+        current_platform.update_block_size_for_backend(vllm_config)
         kv_cache_spec = runner.get_kv_cache_spec()
 
         available_memory = 5 * GiB_bytes
@@ -1284,23 +1561,8 @@ def test_cudagraph_sizes_capped_for_mamba_cache():
         )[0]
         num_blocks = kv_cache_config.num_blocks
 
-        # Set max_cudagraph_capture_size to a value larger than num_blocks
-        # to trigger the Mamba capping logic.
-        large_max = num_blocks + 100
-        compilation_config = vllm_config.compilation_config
-        compilation_config.max_cudagraph_capture_size = large_max
-        compilation_config.cudagraph_capture_sizes = [
-            s for s in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] if s <= large_max
-        ]
+        # Force max_num_seqs to exceed num_blocks so the check triggers.
+        runner.max_num_reqs = num_blocks + 100
 
-        runner.initialize_kv_cache(kv_cache_config)
-
-    # After initialization, cudagraph sizes should be capped
-    assert compilation_config.max_cudagraph_capture_size <= num_blocks
-    assert all(s <= num_blocks for s in compilation_config.cudagraph_capture_sizes)
-    # Invariant: last element == max
-    if compilation_config.cudagraph_capture_sizes:
-        assert (
-            compilation_config.cudagraph_capture_sizes[-1]
-            == compilation_config.max_cudagraph_capture_size
-        )
+        with pytest.raises(ValueError, match="max_num_seqs"):
+            runner.initialize_kv_cache(kv_cache_config)
diff --git a/tests/v1/worker/test_late_interaction_runner.py b/tests/v1/worker/test_late_interaction_runner.py
index 5be3f6e6f10d..9719485cd542 100644
--- a/tests/v1/worker/test_late_interaction_runner.py
+++ b/tests/v1/worker/test_late_interaction_runner.py
@@ -4,12 +4,12 @@
 import pytest
 import torch
 
+from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
 from vllm.pooling_params import LateInteractionParams, PoolingParams
 from vllm.v1.pool.late_interaction import (
     LATE_INTERACTION_MODE_CACHE_QUERY,
     build_late_interaction_doc_params,
     build_late_interaction_query_params,
-    compute_maxsim_score,
 )
 from vllm.v1.worker.gpu.pool.late_interaction_runner import LateInteractionRunner
 
diff --git a/tests/v1/worker/test_mamba_utils.py b/tests/v1/worker/test_mamba_utils.py
index c5d0661476e3..dece9db00ce8 100644
--- a/tests/v1/worker/test_mamba_utils.py
+++ b/tests/v1/worker/test_mamba_utils.py
@@ -1,9 +1,91 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any
 from unittest.mock import MagicMock, patch
 
+import numpy as np
+import pytest
+import torch
+
+from vllm.model_executor.layers.mamba.mamba_utils import (
+    get_conv_copy_spec,
+    get_temporal_copy_spec,
+)
 from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput
-from vllm.v1.worker.mamba_utils import preprocess_mamba
+from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheGroupSpec, MambaSpec
+from vllm.v1.worker.mamba_utils import (
+    MambaCopyBuffers,
+    MambaSpecDecodeGPUContext,
+    collect_mamba_copy_meta,
+    do_mamba_copy_block,
+    preprocess_mamba,
+)
+
+MambaStateCopyFunc = Callable[..., Any]
+
+# Conv + temporal copy specs, in the order the tests' MambaSpec shapes expect.
+_COPY_FUNCS: tuple[MambaStateCopyFunc, ...] = (
+    get_conv_copy_spec,
+    get_temporal_copy_spec,
+)
+
+
+def postprocess_mamba(
+    scheduler_output: "SchedulerOutput",
+    kv_cache_config: "KVCacheConfig",
+    input_batch: Any,
+    requests: dict[str, Any],
+    forward_context: dict[str, Any],
+    mamba_state_copy_funcs: tuple[MambaStateCopyFunc, ...],
+    copy_bufs: "MambaCopyBuffers",
+):
+    """CPU reference for the align-mode postprocess.
+
+    Used as a golden against the GPU fused kernel (``postprocess_mamba_align_gpu``).
+    Mirrors what the production code did before the fused kernel replaced it;
+    kept here because production no longer has a CPU implementation.
+    """
+    assert input_batch.mamba_state_idx_cpu is not None
+    num_scheduled_tokens_dict = scheduler_output.num_scheduled_tokens
+    scheduled_spec_decode_tokens_dict = scheduler_output.scheduled_spec_decode_tokens
+    num_accepted_tokens_cpu = input_batch.num_accepted_tokens_cpu
+    mamba_state_idx_cpu = input_batch.mamba_state_idx_cpu
+    mamba_group_ids = copy_bufs.mamba_group_ids
+    mamba_spec = copy_bufs.mamba_spec
+    copy_bufs.offset = 0
+    for i, req_id in enumerate(input_batch.req_ids):
+        req_state = requests[req_id]
+        num_computed_tokens = req_state.num_computed_tokens
+        num_draft_tokens = len(scheduled_spec_decode_tokens_dict.get(req_id, []))
+        num_scheduled_tokens = num_scheduled_tokens_dict[req_id]
+        num_accepted_tokens = num_accepted_tokens_cpu[i]
+        num_tokens_running_state = (
+            num_computed_tokens + num_scheduled_tokens - num_draft_tokens
+        )
+        new_num_computed_tokens = num_tokens_running_state + num_accepted_tokens - 1
+        aligned_new_computed_tokens = (
+            new_num_computed_tokens // mamba_spec.block_size * mamba_spec.block_size
+        )
+        if aligned_new_computed_tokens >= num_tokens_running_state:
+            accept_token_bias = aligned_new_computed_tokens - num_tokens_running_state
+            src_block_idx = mamba_state_idx_cpu[i]
+            dest_block_idx = aligned_new_computed_tokens // mamba_spec.block_size - 1
+            collect_mamba_copy_meta(
+                copy_bufs,
+                kv_cache_config,
+                mamba_state_copy_funcs,
+                mamba_group_ids,
+                src_block_idx,
+                dest_block_idx,
+                accept_token_bias,
+                req_state,
+                forward_context,
+            )
+            if src_block_idx == dest_block_idx:
+                num_accepted_tokens_cpu[i] = 1
+    do_mamba_copy_block(copy_bufs)
 
 
 def _make_scheduler_output(
@@ -38,7 +120,7 @@ def test_resumed_req_ids_cleared_from_mamba_state_idx():
     input_batch = MagicMock(req_ids=[])
     copy_bufs = MagicMock(mamba_group_ids=[0], mamba_spec=spec)
 
-    mamba_state_idx = {
+    mamba_state_idx: dict[str, int] = {
         "finished": 1,
         "preempted": 2,
         "resumed": 3,  # only in resumed_req_ids, NOT in preempted
@@ -56,14 +138,1997 @@ def test_resumed_req_ids_cleared_from_mamba_state_idx():
     ):
         preprocess_mamba(
             sched,
-            MagicMock(),
+            MagicMock(),  # kv_cache_config
             cache_config,
             mamba_state_idx,
             input_batch,
-            {},
-            {},
-            (),
+            {},  # requests
+            {},  # forward_context
+            (),  # mamba_state_copy_funcs
             copy_bufs,
         )
 
     assert mamba_state_idx == {"keep": 99}
+
+
+# -----------------------------------------------------------------------------
+# Golden tests for postprocess_mamba_fused_kernel
+# -----------------------------------------------------------------------------
+
+
+@dataclass
+class _TestConfig:
+    """Common test configuration for fused kernel tests."""
+
+    block_size: int = 16
+    num_blocks: int = 32
+    num_layers: int = 2
+    num_reqs: int = 4
+    max_num_reqs: int = 8
+    # Conv state shape: [num_blocks, conv_width, inner_dim]
+    conv_width: int = 4
+    conv_inner_dim: int = 64
+    # Temporal state shape: [num_blocks, state_dim]
+    temporal_state_dim: int = 128
+    dtype: torch.dtype = torch.float16
+
+
+class _MockCpuGpuBuffer:
+    """Mock CpuGpuBuffer for testing without pinned memory."""
+
+    def __init__(self, size: int, dtype: torch.dtype, device: torch.device):
+        self.cpu = torch.zeros(size, dtype=dtype, device="cpu")
+        self.gpu = torch.zeros(size, dtype=dtype, device=device)
+        self.np = self.cpu.numpy()
+
+    def copy_to_gpu(self, n: int | None = None) -> torch.Tensor:
+        if n is None:
+            return self.gpu.copy_(self.cpu, non_blocking=True)
+        return self.gpu[:n].copy_(self.cpu[:n], non_blocking=True)
+
+
+def _make_postprocess_scheduler_output(
+    req_ids: list[str],
+    num_scheduled_tokens: dict[str, int],
+    scheduled_spec_decode_tokens: dict[str, list] | None = None,
+) -> SchedulerOutput:
+    """Create a minimal SchedulerOutput for postprocess testing."""
+    cached = CachedRequestData.make_empty()
+    return SchedulerOutput(
+        scheduled_new_reqs=[],
+        scheduled_cached_reqs=cached,
+        num_scheduled_tokens=num_scheduled_tokens,
+        total_num_scheduled_tokens=sum(num_scheduled_tokens.values()),
+        scheduled_spec_decode_tokens=scheduled_spec_decode_tokens or {},
+        scheduled_encoder_inputs={},
+        num_common_prefix_blocks=[],
+        finished_req_ids=set(),
+        free_encoder_mm_hashes=[],
+        preempted_req_ids=set(),
+    )
+
+
+def _make_mock_attention(
+    conv_state: torch.Tensor, temporal_state: torch.Tensor
+) -> MagicMock:
+    """Create a mock attention object with kv_cache."""
+    attention = MagicMock()
+    attention.kv_cache = [conv_state, temporal_state]
+    return attention
+
+
+def _make_dual_states(
+    cfg: "_TestConfig",
+    layer_names: list[str],
+    device: torch.device,
+    *,
+    num_blocks: int | None = None,
+) -> tuple[
+    list[torch.Tensor],
+    list[torch.Tensor],
+    list[torch.Tensor],
+    list[torch.Tensor],
+    dict[str, MagicMock],
+    dict[str, MagicMock],
+]:
+    """Allocate conv+temporal state tensors for the Python path, clone them for
+    the GPU path, and build matching ``forward_context`` dicts for both.
+
+    Returns ``(conv_py, temporal_py, conv_gpu, temporal_gpu, fwd_py, fwd_gpu)``
+    where the four state lists are parallel to ``layer_names``.
+    """
+    n_blocks = num_blocks if num_blocks is not None else cfg.num_blocks
+    conv_py = [
+        torch.randn(
+            n_blocks,
+            cfg.conv_width,
+            cfg.conv_inner_dim,
+            dtype=cfg.dtype,
+            device=device,
+        )
+        for _ in layer_names
+    ]
+    temporal_py = [
+        torch.randn(n_blocks, cfg.temporal_state_dim, dtype=cfg.dtype, device=device)
+        for _ in layer_names
+    ]
+    conv_gpu = [s.clone() for s in conv_py]
+    temporal_gpu = [s.clone() for s in temporal_py]
+    fwd_py = {
+        name: _make_mock_attention(c, t)
+        for name, c, t in zip(layer_names, conv_py, temporal_py)
+    }
+    fwd_gpu = {
+        name: _make_mock_attention(c, t)
+        for name, c, t in zip(layer_names, conv_gpu, temporal_gpu)
+    }
+    return conv_py, temporal_py, conv_gpu, temporal_gpu, fwd_py, fwd_gpu
+
+
+def _make_dual_layer_state(
+    cfg: "_TestConfig",
+    device: torch.device,
+    *,
+    num_blocks: int | None = None,
+    layer_name: str = "layer_0",
+) -> tuple[
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    dict[str, MagicMock],
+    dict[str, MagicMock],
+]:
+    """Single-layer convenience form of ``_make_dual_states``."""
+    conv_py, temporal_py, conv_gpu, temporal_gpu, fwd_py, fwd_gpu = _make_dual_states(
+        cfg, [layer_name], device, num_blocks=num_blocks
+    )
+    return conv_py[0], temporal_py[0], conv_gpu[0], temporal_gpu[0], fwd_py, fwd_gpu
+
+
+def _make_kv_cache_config(cfg: _TestConfig, layer_names: list[str]) -> KVCacheConfig:
+    """Create a KVCacheConfig with mamba groups."""
+    mamba_spec = MambaSpec(
+        block_size=cfg.block_size,
+        shapes=(
+            (cfg.conv_width, cfg.conv_inner_dim),
+            (cfg.temporal_state_dim,),
+        ),
+        dtypes=(cfg.dtype, cfg.dtype),
+        mamba_cache_mode="all",
+    )
+    group = KVCacheGroupSpec(
+        layer_names=layer_names,
+        kv_cache_spec=mamba_spec,
+    )
+    return KVCacheConfig(
+        num_blocks=cfg.num_blocks,
+        kv_cache_tensors=[],
+        kv_cache_groups=[group],
+    )
+
+
+def _make_input_batch(
+    req_ids: list[str],
+    num_accepted_tokens: list[int],
+    mamba_state_idx: list[int],
+) -> MagicMock:
+    """Create a mock GPUInputBatch."""
+    batch = MagicMock()
+    batch.req_ids = req_ids
+    batch.req_id_to_index = {rid: i for i, rid in enumerate(req_ids)}
+    # Use numpy arrays so modifications persist
+    batch.num_accepted_tokens_cpu = np.array(num_accepted_tokens, dtype=np.int32)
+    batch.mamba_state_idx_cpu = np.array(mamba_state_idx, dtype=np.int32)
+    return batch
+
+
+def _make_requests(
+    req_ids: list[str],
+    num_computed_tokens: list[int],
+    block_ids_per_req: list[list[int]],
+) -> dict[str, MagicMock]:
+    """Create mock CachedRequestState objects."""
+    requests = {}
+    for i, req_id in enumerate(req_ids):
+        req = MagicMock()
+        req.num_computed_tokens = num_computed_tokens[i]
+        req.block_ids = {0: block_ids_per_req[i]}  # group_id=0
+        requests[req_id] = req
+    return requests
+
+
+def _make_copy_bufs(
+    cfg: _TestConfig, kv_cache_config: KVCacheConfig, device: torch.device
+) -> MambaCopyBuffers:
+    """Create MambaCopyBuffers for the Python path."""
+
+    def make_buffer(n, dtype):
+        return _MockCpuGpuBuffer(n, dtype, device)
+
+    return MambaCopyBuffers.create(
+        max_num_reqs=cfg.max_num_reqs,
+        kv_cache_config=kv_cache_config,
+        copy_funcs=(get_conv_copy_spec, get_temporal_copy_spec),
+        make_buffer=make_buffer,
+    )
+
+
+def _make_gpu_ctx(
+    cfg: _TestConfig, kv_cache_config: KVCacheConfig, device: torch.device
+) -> MambaSpecDecodeGPUContext:
+    """Create MambaSpecDecodeGPUContext for the GPU path."""
+
+    def make_buffer(n, dtype):
+        return _MockCpuGpuBuffer(n, dtype, device)
+
+    return MambaSpecDecodeGPUContext.create(
+        max_num_reqs=cfg.max_num_reqs,
+        kv_cache_config=kv_cache_config,
+        num_state_types=2,
+        device=device,
+        make_buffer=make_buffer,
+    )
+
+
+def _run_gpu_postprocess(
+    gpu_ctx: MambaSpecDecodeGPUContext,
+    *,
+    kv_cache_config: KVCacheConfig,
+    forward_context: dict[str, Any],
+    copy_funcs: tuple,
+    block_table: torch.Tensor,
+    req_ids: list[str],
+    num_accepted_tokens: list[int],
+    mamba_state_idx: list[int],
+    num_scheduled_tokens: dict[str, int],
+    num_computed_tokens: list[int],
+    num_draft_tokens: dict[str, int],
+    device: torch.device,
+) -> None:
+    """Initialize the GPU context against `block_table`, run the fused
+    postprocess kernel for `req_ids`, and synchronize."""
+
+    def t(values):
+        return torch.tensor(values, dtype=torch.int32, device=device)
+
+    gpu_ctx.initialize_from_forward_context(
+        kv_cache_config, forward_context, copy_funcs, [block_table]
+    )
+    gpu_ctx.run_fused_postprocess(
+        num_reqs=len(req_ids),
+        num_accepted_tokens_gpu=t(num_accepted_tokens),
+        mamba_state_idx_gpu=t(mamba_state_idx),
+        num_scheduled_tokens_gpu=t([num_scheduled_tokens[r] for r in req_ids]),
+        num_computed_tokens_gpu=t(num_computed_tokens),
+        num_draft_tokens_gpu=t([num_draft_tokens.get(r, 0) for r in req_ids]),
+    )
+    torch.accelerator.synchronize()
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
+class TestPostprocessMambaFusedKernel:
+    """Tests for postprocess_mamba_fused_kernel comparing GPU vs CPU paths."""
+
+    @pytest.fixture
+    def device(self):
+        return torch.device("cuda:0")
+
+    @pytest.fixture
+    def test_config(self):
+        return _TestConfig()
+
+    def test_matches_python_postprocess_mamba(self, device, test_config):
+        """
+        Golden test: GPU kernel produces identical results to Python impl.
+
+        This test:
+        1. Sets up identical initial state for both paths
+        2. Runs Python postprocess_mamba (modifies states via batch_memcpy)
+        3. Runs GPU fused kernel (modifies states directly)
+        4. Compares resulting state tensors and num_accepted_tokens
+        """
+        cfg = test_config
+        torch.manual_seed(42)
+
+        # Test scenario: 4 requests with different copy conditions
+        # Copy needed when: aligned_new_computed >= num_tokens_running_state
+        # where: num_tokens_running_state = num_computed + num_scheduled - num_draft
+        #        new_num_computed = num_tokens_running_state + num_accepted - 1
+        #        aligned_new_computed = (new_num_computed // block_size) * block_size
+        req_ids = ["req_0", "req_1", "req_2", "req_3"]
+
+        # Configure requests so some need copies, some don't
+        # block_size = 16
+        # req_0: running=60+5-2=63, new=63+3-1=65, aligned=64 >= 63 -> COPY
+        # req_1: running=30+3-0=33, new=33+2-1=34, aligned=32 < 33 -> NO COPY
+        # req_2: running=45+8-3=50, new=50+4-1=53, aligned=48 < 50 -> NO COPY
+        # req_3: running=10+6-0=16, new=16+2-1=17, aligned=16 >= 16 -> COPY
+        num_computed_tokens = [60, 30, 45, 10]
+        num_scheduled_tokens = {"req_0": 5, "req_1": 3, "req_2": 8, "req_3": 6}
+        num_draft_tokens = {"req_0": 2, "req_1": 0, "req_2": 3, "req_3": 0}
+        num_accepted_tokens = [3, 2, 4, 2]
+        mamba_state_idx = [3, 1, 2, 0]  # source block indices
+
+        # Block IDs for each request (simulate block table)
+        block_ids_per_req = [
+            list(range(8)),  # req_0: blocks 0-7
+            list(range(8, 16)),  # req_1: blocks 8-15
+            list(range(16, 24)),  # req_2: blocks 16-23
+            list(range(24, 32)),  # req_3: blocks 24-31
+        ]
+
+        layer_names = [f"layer_{i}" for i in range(cfg.num_layers)]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_states_py,
+            temporal_states_py,
+            conv_states_gpu,
+            temporal_states_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_states(cfg, layer_names, device)
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        num_reqs = len(req_ids)
+        max_blocks = max(len(b) for b in block_ids_per_req)
+        block_table_gpu = torch.zeros(
+            num_reqs, max_blocks, dtype=torch.int32, device=device
+        )
+        for i, block_ids in enumerate(block_ids_per_req):
+            block_table_gpu[i, : len(block_ids)] = torch.tensor(
+                block_ids, dtype=torch.int32
+            )
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Compare results ---
+        # 1. Compare state tensors
+        for i in range(cfg.num_layers):
+            torch.testing.assert_close(
+                conv_states_gpu[i],
+                conv_states_py[i],
+                msg=f"Conv state mismatch at layer {i}",
+            )
+            torch.testing.assert_close(
+                temporal_states_gpu[i],
+                temporal_states_py[i],
+                msg=f"Temporal state mismatch at layer {i}",
+            )
+
+        # 2. Compare num_accepted_tokens updates
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="num_accepted_tokens mismatch",
+        )
+
+    def test_no_copy_when_not_needed(self, device, test_config):
+        """Kernel should not modify state when no copy is needed."""
+        cfg = test_config
+        torch.manual_seed(123)
+
+        # Single request where no copy is needed:
+        # running = 30 + 3 = 33, new = 33 + 1 - 1 = 33, aligned = 32 < 33
+        req_ids = ["req_0"]
+        num_computed_tokens = [30]
+        num_scheduled_tokens = {"req_0": 3}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [1]
+        mamba_state_idx = [1]
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        # Create state tensor
+        conv_state = torch.randn(
+            cfg.num_blocks,
+            cfg.conv_width,
+            cfg.conv_inner_dim,
+            dtype=cfg.dtype,
+            device=device,
+        )
+        temporal_state = torch.randn(
+            cfg.num_blocks, cfg.temporal_state_dim, dtype=cfg.dtype, device=device
+        )
+
+        # Clone to verify no modification
+        conv_state_orig = conv_state.clone()
+        temporal_state_orig = temporal_state.clone()
+
+        forward_context = {"layer_0": _make_mock_attention(conv_state, temporal_state)}
+
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # State should be unchanged
+        torch.testing.assert_close(conv_state, conv_state_orig)
+        torch.testing.assert_close(temporal_state, temporal_state_orig)
+
+    @pytest.mark.parametrize("num_reqs", [1, 2, 8, 16])
+    def test_various_batch_sizes(self, device, test_config, num_reqs):
+        """Verify kernel works correctly with different batch sizes."""
+        cfg = _TestConfig(max_num_reqs=max(16, num_reqs))
+        torch.manual_seed(456)
+
+        req_ids = [f"req_{i}" for i in range(num_reqs)]
+        # All requests will trigger a copy
+        num_computed_tokens = [60] * num_reqs
+        num_scheduled_tokens = {r: 5 for r in req_ids}
+        num_draft_tokens = {r: 0 for r in req_ids}
+        num_accepted_tokens = [3] * num_reqs
+        mamba_state_idx = [3] * num_reqs
+        # Each request gets unique blocks
+        block_ids_per_req = [list(range(i * 8, (i + 1) * 8)) for i in range(num_reqs)]
+
+        # Ensure we have enough blocks
+        total_blocks = num_reqs * 8
+        cfg = _TestConfig(num_blocks=total_blocks, max_num_reqs=max(16, num_reqs))
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # Run Python path
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # Run GPU path
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        max_blocks_per_req = 8
+        block_table_gpu = torch.zeros(
+            num_reqs, max_blocks_per_req, dtype=torch.int32, device=device
+        )
+        for i, block_ids in enumerate(block_ids_per_req):
+            block_table_gpu[i, : len(block_ids)] = torch.tensor(
+                block_ids, dtype=torch.int32
+            )
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # Compare results
+        torch.testing.assert_close(
+            conv_state_gpu, conv_state_py, msg="Conv state mismatch"
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu, temporal_state_py, msg="Temporal state mismatch"
+        )
+
+    def test_block_table_with_realistic_stride(self, device, test_config):
+        """
+        Test kernel with realistic block table strides.
+
+        In real usage, the block table is pre-allocated with shape
+        [max_num_reqs, max_num_blocks_per_req] and then sliced to
+        [:num_reqs]. This means stride(0) = max_num_blocks_per_req,
+        which is typically much larger than the actual blocks used.
+
+        This test verifies the kernel handles non-tight strides correctly,
+        catching bugs where stride is incorrectly treated as bytes vs elements.
+        """
+        cfg = test_config
+        torch.manual_seed(789)
+
+        # Use multiple requests to exercise stride-based indexing
+        num_reqs = 4
+        req_ids = [f"req_{i}" for i in range(num_reqs)]
+
+        # All requests trigger copies (same setup as test_various_batch_sizes)
+        num_computed_tokens = [60] * num_reqs
+        num_scheduled_tokens = {r: 5 for r in req_ids}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [3] * num_reqs
+        mamba_state_idx = [3] * num_reqs
+
+        # Each request uses only 8 blocks, but we allocate much more
+        blocks_used_per_req = 8
+        block_ids_per_req = [
+            list(range(i * blocks_used_per_req, (i + 1) * blocks_used_per_req))
+            for i in range(num_reqs)
+        ]
+
+        total_blocks = num_reqs * blocks_used_per_req
+        cfg = _TestConfig(num_blocks=total_blocks, max_num_reqs=max(16, num_reqs))
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # Run Python path
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # Run GPU path with REALISTIC block table stride
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        # KEY DIFFERENCE: Create a large block table like real code does
+        # Real system has max_num_blocks_per_req >> blocks actually used
+        max_num_reqs_full = 16
+        max_blocks_per_req_full = 512  # Much larger than blocks_used_per_req=8
+
+        # Allocate full-size table (simulates pre-allocated CpuGpuBuffer)
+        block_table_full = torch.zeros(
+            max_num_reqs_full, max_blocks_per_req_full, dtype=torch.int32, device=device
+        )
+
+        # Fill in actual block IDs (only first few columns used)
+        for i, block_ids in enumerate(block_ids_per_req):
+            block_table_full[i, : len(block_ids)] = torch.tensor(
+                block_ids, dtype=torch.int32
+            )
+
+        # Slice like real code: block_table.gpu[:num_reqs]
+        # This preserves stride(0) = 512, not 8!
+        block_table_gpu = block_table_full[:num_reqs]
+
+        # Verify stride is large (the key property we're testing)
+        assert block_table_gpu.stride(0) == max_blocks_per_req_full, (
+            f"Expected stride {max_blocks_per_req_full}, "
+            f"got {block_table_gpu.stride(0)}"
+        )
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # Compare results - this will fail if stride handling is incorrect
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="Conv state mismatch - possible stride bug in kernel",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="Temporal state mismatch - possible stride bug in kernel",
+        )
+
+    def test_src_addr_equals_dst_addr_skips_copy_and_sets_accepted_to_1(
+        self, device, test_config
+    ):
+        """
+        Test the ``src_addr == dst_addr`` early-return path in
+        postprocess_mamba_fused_kernel matches Python behavior.
+
+        When src_addr == dst_addr (source and destination memory addresses are
+        identical), both implementations should:
+        1. Skip the copy (state unchanged)
+        2. Set num_accepted_tokens to 1
+
+        This condition occurs when:
+        - src_block_idx == dest_block_idx (same logical block)
+        - accept_token_bias == 0 (no offset within the block)
+
+        Python reference (collect_mamba_copy_meta):
+            if src_block_idx == dest_block_idx and accept_token_bias == 0:
+                return  # No copy added
+
+        Python reference (postprocess_mamba):
+            if src_block_idx == dest_block_idx:
+                num_accepted_tokens_cpu[i] = 1
+
+        Test setup (block_size=16):
+        - num_tokens_running_state = 30 + 2 - 0 = 32
+        - new_num_computed = 32 + 1 - 1 = 32
+        - aligned_new_computed = 32
+        - accept_token_bias = 32 - 32 = 0
+        - dest_block_idx = 32 // 16 - 1 = 1
+        - src_block_idx = 1 (set explicitly)
+        """
+        cfg = test_config
+        torch.manual_seed(1001)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [30]
+        num_scheduled_tokens = {"req_0": 2}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [5]  # Initial value, should become 1
+        mamba_state_idx = [1]  # src_block_idx = 1 = dest_block_idx
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # Also clone to verify no modification
+        conv_state_orig = conv_state_py.clone()
+        temporal_state_orig = temporal_state_py.clone()
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Verify Python behavior (ground truth) ---
+        # State should be unchanged (no copy when src_addr == dst_addr)
+        torch.testing.assert_close(
+            conv_state_py,
+            conv_state_orig,
+            msg="Python: Conv state should be unchanged when src==dst",
+        )
+        torch.testing.assert_close(
+            temporal_state_py,
+            temporal_state_orig,
+            msg="Python: Temporal state should be unchanged when src==dst",
+        )
+        # num_accepted_tokens should be 1
+        assert input_batch_py.num_accepted_tokens_cpu[0] == 1, (
+            f"Python: num_accepted_tokens should be 1, "
+            f"got {input_batch_py.num_accepted_tokens_cpu[0]}"
+        )
+
+        # --- Verify GPU matches Python ---
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="GPU conv state should match Python",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="GPU temporal state should match Python",
+        )
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="GPU num_accepted_tokens should match Python",
+        )
+
+    def test_same_block_idx_with_offset_copies_then_sets_accepted_to_1(
+        self, device, test_config
+    ):
+        """
+        Test the ``src_block_idx == dest_block_idx`` post-copy update in
+        postprocess_mamba_fused_kernel matches Python behavior.
+
+        When src_block_idx == dest_block_idx but accept_token_bias > 0, both
+        implementations should:
+        1. Perform the copy (src_addr != dst_addr due to offset)
+        2. Set num_accepted_tokens to 1 AFTER the copy
+
+        Python reference (postprocess_mamba):
+            if src_block_idx == dest_block_idx:
+                num_accepted_tokens_cpu[i] = 1
+
+        For conv states: copies state[block, offset:] to
+            state[block, :] (shifted window)
+        For temporal states: copies state[block_ids[src_idx + offset]] to
+            state[block_ids[dest_idx]]
+
+        Test setup (block_size=16):
+        - num_tokens_running_state = 30 + 1 - 0 = 31
+        - new_num_computed = 31 + 2 - 1 = 32
+        - aligned_new_computed = 32
+        - accept_token_bias = 32 - 31 = 1 (> 0, so copy happens)
+        - dest_block_idx = 32 // 16 - 1 = 1
+        - src_block_idx = 1 (set explicitly, == dest_block_idx)
+        """
+        cfg = test_config
+        torch.manual_seed(1002)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [30]
+        num_scheduled_tokens = {"req_0": 1}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [2]  # Results in accept_token_bias = 1
+        mamba_state_idx = [1]  # src_block_idx = 1 = dest_block_idx
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # Clone to verify modification
+        conv_state_orig = conv_state_py.clone()
+        temporal_state_orig = temporal_state_py.clone()
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Verify Python behavior (ground truth) ---
+        dest_block_id = block_ids_per_req[0][1]  # dest_block_idx = 1
+
+        # Conv state should be modified (shifted copy within block)
+        conv_changed = not torch.allclose(
+            conv_state_py[dest_block_id], conv_state_orig[dest_block_id]
+        )
+        assert conv_changed, (
+            "Python: Conv state should be modified when accept_token_bias > 0"
+        )
+
+        # Temporal state should be modified (copy from different block)
+        src_block_id_temporal = block_ids_per_req[0][2]  # actual_src_block_idx = 2
+        dest_block_id_temporal = block_ids_per_req[0][1]  # dest_block_idx = 1
+        torch.testing.assert_close(
+            temporal_state_py[dest_block_id_temporal],
+            temporal_state_orig[src_block_id_temporal],
+            msg="Python: Temporal state copy should have happened",
+        )
+
+        # num_accepted_tokens should be 1
+        assert input_batch_py.num_accepted_tokens_cpu[0] == 1, (
+            f"Python: num_accepted_tokens should be 1, "
+            f"got {input_batch_py.num_accepted_tokens_cpu[0]}"
+        )
+
+        # --- Verify GPU matches Python ---
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="GPU conv state should match Python",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="GPU temporal state should match Python",
+        )
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="GPU num_accepted_tokens should match Python",
+        )
+
+    def test_different_block_idx_copies_without_setting_accepted_to_1(
+        self, device, test_config
+    ):
+        """
+        Test that neither special-case path triggers when
+        src_block_idx != dest_block_idx, and GPU matches Python behavior.
+
+        When copying between different blocks:
+        1. src_addr != dst_addr (different blocks = different addresses)
+        2. src_block_idx != dest_block_idx
+
+        Therefore:
+        - The ``src_addr == dst_addr`` early-return does NOT trigger
+        - The ``src_block_idx == dest_block_idx`` post-copy update does NOT trigger
+        - Copy happens normally
+        - num_accepted_tokens remains UNCHANGED
+
+        Test setup (block_size=16):
+        - num_tokens_running_state = 60 + 3 - 0 = 63
+        - new_num_computed = 63 + 3 - 1 = 65
+        - aligned_new_computed = 64
+        - accept_token_bias = 64 - 63 = 1
+        - dest_block_idx = 64 // 16 - 1 = 3
+        - src_block_idx = 2 (set explicitly, != dest_block_idx)
+        """
+        cfg = test_config
+        torch.manual_seed(1003)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [60]
+        num_scheduled_tokens = {"req_0": 3}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [3]  # Should remain 3, NOT set to 1
+        mamba_state_idx = [2]  # src_block_idx = 2, dest_block_idx will be 3
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # Clone to verify modification
+        conv_state_orig = conv_state_py.clone()
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Verify Python behavior (ground truth) ---
+        dest_block_id = block_ids_per_req[0][3]  # dest_block_idx = 3
+
+        # Copy DID happen (dest block should be modified)
+        conv_changed = not torch.allclose(
+            conv_state_py[dest_block_id], conv_state_orig[dest_block_id]
+        )
+        assert conv_changed, "Python: Conv state copy should have happened"
+
+        # num_accepted_tokens should NOT be changed to 1
+        assert input_batch_py.num_accepted_tokens_cpu[0] == num_accepted_tokens[0], (
+            f"Python: num_accepted_tokens should remain {num_accepted_tokens[0]}, "
+            f"got {input_batch_py.num_accepted_tokens_cpu[0]}"
+        )
+
+        # --- Verify GPU matches Python ---
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="GPU conv state should match Python",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="GPU temporal state should match Python",
+        )
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="GPU num_accepted_tokens should match Python",
+        )
+
+    def test_prefix_caching_shared_block_does_not_set_accepted_to_1(
+        self, device, test_config
+    ):
+        """
+        Regression test: with prefix caching, different logical block indices
+        can map to the same physical block. The kernel must NOT set
+        num_accepted_tokens to 1 in that case.
+
+        When src_block_idx != dest_block_idx but block_table maps both to the
+        same physical block ID, src_addr == dst_addr. The copy is correctly
+        skipped (self-copy is a no-op), but num_accepted_tokens must be
+        preserved — only logical-index equality justifies setting it to 1.
+
+        Test setup (block_size=16):
+        - num_tokens_running_state = 30 + 2 - 0 = 32
+        - new_num_computed = 32 + 3 - 1 = 34
+        - aligned_new_computed = 32
+        - accept_token_bias = 32 - 32 = 0
+        - dest_block_idx = 32 // 16 - 1 = 1
+        - src_block_idx = 0 (set explicitly, != dest_block_idx)
+        - block_ids = [5, 5, ...] — prefix caching: both logical indices
+          map to the same physical block 5
+        """
+        cfg = test_config
+        torch.manual_seed(2001)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [30]
+        num_scheduled_tokens = {"req_0": 2}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [3]  # Must stay 3, NOT become 1
+        mamba_state_idx = [0]  # src_block_idx = 0, dest_block_idx will be 1
+
+        # Prefix caching: logical blocks 0 and 1 share physical block 5
+        block_ids_per_req = [[5, 5, 2, 3, 4, 6, 7, 8]]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        conv_state_orig = conv_state_py.clone()
+        temporal_state_orig = temporal_state_py.clone()
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Verify Python behavior (ground truth) ---
+        # Copy is self-to-self (same physical block), state unchanged
+        torch.testing.assert_close(
+            conv_state_py,
+            conv_state_orig,
+            msg="Python: Conv state should be unchanged (self-copy)",
+        )
+        torch.testing.assert_close(
+            temporal_state_py,
+            temporal_state_orig,
+            msg="Python: Temporal state should be unchanged (self-copy)",
+        )
+        # num_accepted_tokens must NOT be set to 1 (src_block_idx != dest_block_idx)
+        assert input_batch_py.num_accepted_tokens_cpu[0] == num_accepted_tokens[0], (
+            f"Python: num_accepted_tokens should remain {num_accepted_tokens[0]}, "
+            f"got {input_batch_py.num_accepted_tokens_cpu[0]}"
+        )
+
+        # --- Verify GPU matches Python ---
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="GPU conv state should match Python",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="GPU temporal state should match Python",
+        )
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="GPU num_accepted_tokens should match Python (must NOT be 1)",
+        )
+
+    def test_prefix_caching_nonsequential_block_ids_boundary(self, device, test_config):
+        """
+        Regression test: non-sequential physical block IDs under prefix caching
+        with the needs_copy boundary at exact equality.
+
+        Under PC, the block allocator assigns physical block IDs in arbitrary
+        order (e.g., [17, 3, 42, 9] instead of [0, 1, 2, 3]). The needs_copy
+        condition is purely token-count based and must evaluate identically
+        regardless of the physical block IDs assigned. This test verifies that
+        the kernel's address arithmetic (block_table lookup, stride computation)
+        produces correct copies when physical IDs are non-sequential.
+
+        Two requests exercise different boundary behaviors:
+        - req_0: aligned_new_computed == num_tokens_running_state (exact boundary)
+          This is the tightest edge: one fewer accepted token and no copy needed.
+        - req_1: aligned_new_computed == num_tokens_running_state (exact boundary)
+          Different block layout, src!=dest, real copy happens.
+
+        Both use non-sequential block IDs typical of PC reuse patterns.
+
+        Test setup (block_size=16):
+        req_0:
+        - num_tokens_running_state = 48 + 0 - 0 = 48
+        - new_num_computed = 48 + 1 - 1 = 48
+        - aligned_new_computed = 48
+        - needs_copy = (48 >= 48) = True (exact boundary!)
+        - accept_token_bias = 48 - 48 = 0
+        - dest_block_idx = 48 // 16 - 1 = 2
+        - src_block_idx = 2 (same as dest -> num_accepted = 1)
+
+        req_1:
+        - num_tokens_running_state = 31 + 1 - 0 = 32
+        - new_num_computed = 32 + 3 - 1 = 34
+        - aligned_new_computed = 32
+        - needs_copy = (32 >= 32) = True (exact boundary!)
+        - accept_token_bias = 32 - 32 = 0
+        - dest_block_idx = 32 // 16 - 1 = 1
+        - src_block_idx = 0 (diff from dest -> num_accepted unchanged)
+        """
+        cfg = test_config
+        torch.manual_seed(4001)
+
+        req_ids = ["req_0", "req_1"]
+        num_computed_tokens = [48, 31]
+        num_scheduled_tokens = {"req_0": 0, "req_1": 1}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [1, 3]
+        mamba_state_idx = [2, 0]
+
+        # Non-sequential block IDs typical of prefix caching allocation
+        block_ids_per_req = [
+            [17, 3, 42, 9, 25, 11, 30, 2],  # req_0: scattered physical blocks
+            [41, 7, 22, 15, 38, 19, 4, 28],  # req_1: different scattered blocks
+        ]
+
+        layer_names = [f"layer_{i}" for i in range(cfg.num_layers)]
+        # Need enough physical blocks for the scattered IDs
+        num_blocks = 50
+        local_cfg = _TestConfig(num_blocks=num_blocks, max_num_reqs=cfg.max_num_reqs)
+        kv_cache_config = _make_kv_cache_config(local_cfg, layer_names)
+
+        (
+            conv_states_py,
+            temporal_states_py,
+            conv_states_gpu,
+            temporal_states_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_states(local_cfg, layer_names, device)
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(local_cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(local_cfg, kv_cache_config, device)
+        num_reqs = len(req_ids)
+        max_blocks = max(len(b) for b in block_ids_per_req)
+        block_table_gpu = torch.zeros(
+            num_reqs, max_blocks, dtype=torch.int32, device=device
+        )
+        for i, block_ids in enumerate(block_ids_per_req):
+            block_table_gpu[i, : len(block_ids)] = torch.tensor(
+                block_ids, dtype=torch.int32
+            )
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Compare results ---
+        for i in range(cfg.num_layers):
+            torch.testing.assert_close(
+                conv_states_gpu[i],
+                conv_states_py[i],
+                msg=f"Conv state mismatch at layer {i} with non-sequential block IDs",
+            )
+            torch.testing.assert_close(
+                temporal_states_gpu[i],
+                temporal_states_py[i],
+                msg=(
+                    f"Temporal state mismatch at layer {i} "
+                    f"with non-sequential block IDs"
+                ),
+            )
+
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="num_accepted_tokens mismatch with non-sequential block IDs",
+        )
+
+        # Verify req_0 had num_accepted set to 1 (src==dest) and req_1 unchanged
+        assert input_batch_py.num_accepted_tokens_cpu[0] == 1
+        assert input_batch_py.num_accepted_tokens_cpu[1] == num_accepted_tokens[1]
+
+    def test_prefix_caching_mixed_shared_and_distinct_blocks(self, device, test_config):
+        """
+        Regression test: mixed batch under prefix caching where some requests
+        have shared physical blocks (aliased) and others have distinct blocks,
+        with the needs_copy boundary at various positions.
+
+        This tests the interaction between:
+        1. PC block aliasing (src and dest map to same physical block)
+        2. The needs_copy boundary (exact equality vs well-past vs no-copy)
+        3. Non-sequential physical block IDs
+
+        Batch of 4 requests:
+        - req_0: needs_copy=True, src!=dest, shared physical block (PC aliased)
+                 -> copy skipped (src_addr==dst_addr), num_accepted PRESERVED
+        - req_1: needs_copy=True, src!=dest, distinct blocks, non-sequential IDs
+                 -> real copy happens, num_accepted PRESERVED
+        - req_2: needs_copy=False (below boundary)
+                 -> no action at all
+        - req_3: needs_copy=True, src==dest (exact boundary, zero bias)
+                 -> copy skipped (self-copy), num_accepted SET TO 1
+
+        Test setup (block_size=16):
+        req_0: running=30+2-0=32, new=32+3-1=34, aligned=32, 32>=32 -> COPY
+               bias=0, dest=32//16-1=1, src=0 (!=dest)
+               block_ids=[5,5,...] -> same physical -> skip, keep accepted=3
+
+        req_1: running=60+5-2=63, new=63+3-1=65, aligned=64, 64>=63 -> COPY
+               bias=1, dest=64//16-1=3, src=2 (!=dest)
+               block_ids=[41,7,22,15,...] -> distinct -> real copy, keep accepted=3
+
+        req_2: running=30+3-0=33, new=33+1-1=33, aligned=32, 32<33 -> NO COPY
+
+        req_3: running=48+0-0=48, new=48+1-1=48, aligned=48, 48>=48 -> COPY
+               bias=0, dest=48//16-1=2, src=2 (==dest)
+               block_ids=[10,20,30,...] -> distinct IDs, same logical idx
+               -> self-copy (src_addr==dst_addr), set accepted=1
+        """
+        cfg = test_config
+        torch.manual_seed(5001)
+
+        req_ids = ["req_0", "req_1", "req_2", "req_3"]
+        num_computed_tokens = [30, 60, 30, 48]
+        num_scheduled_tokens = {"req_0": 2, "req_1": 5, "req_2": 3, "req_3": 0}
+        num_draft_tokens = {"req_1": 2}
+        num_accepted_tokens = [3, 3, 1, 1]
+        mamba_state_idx = [0, 2, 1, 2]
+
+        # Block IDs with various PC patterns:
+        # req_0: shared blocks (PC alias: logical 0 and 1 -> physical 5)
+        # req_1: distinct non-sequential blocks
+        # req_2: doesn't matter (no copy)
+        # req_3: distinct sequential blocks (no aliasing)
+        block_ids_per_req = [
+            [5, 5, 12, 18, 23, 31, 44, 2],  # req_0: blocks 0,1 share phys 5
+            [41, 7, 22, 15, 38, 19, 4, 28],  # req_1: all distinct
+            [10, 20, 30, 40, 1, 6, 8, 14],  # req_2: irrelevant
+            [10, 20, 30, 40, 1, 6, 8, 14],  # req_3: distinct, dest=src=idx 2
+        ]
+
+        layer_names = [f"layer_{i}" for i in range(cfg.num_layers)]
+        num_blocks = 50
+        local_cfg = _TestConfig(num_blocks=num_blocks, max_num_reqs=cfg.max_num_reqs)
+        kv_cache_config = _make_kv_cache_config(local_cfg, layer_names)
+
+        (
+            conv_states_py,
+            temporal_states_py,
+            conv_states_gpu,
+            temporal_states_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_states(local_cfg, layer_names, device)
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(local_cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(local_cfg, kv_cache_config, device)
+        num_reqs = len(req_ids)
+        max_blocks = max(len(b) for b in block_ids_per_req)
+        block_table_gpu = torch.zeros(
+            num_reqs, max_blocks, dtype=torch.int32, device=device
+        )
+        for i, block_ids in enumerate(block_ids_per_req):
+            block_table_gpu[i, : len(block_ids)] = torch.tensor(
+                block_ids, dtype=torch.int32
+            )
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Compare all state tensors ---
+        for i in range(cfg.num_layers):
+            torch.testing.assert_close(
+                conv_states_gpu[i],
+                conv_states_py[i],
+                msg=(
+                    f"Conv state mismatch at layer {i} — "
+                    f"mixed PC batch with shared/distinct blocks"
+                ),
+            )
+            torch.testing.assert_close(
+                temporal_states_gpu[i],
+                temporal_states_py[i],
+                msg=(
+                    f"Temporal state mismatch at layer {i} — "
+                    f"mixed PC batch with shared/distinct blocks"
+                ),
+            )
+
+        # --- Compare num_accepted_tokens ---
+        expected_accepted = torch.tensor(
+            input_batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="num_accepted_tokens mismatch in mixed PC batch",
+        )
+
+        # Verify per-request expectations:
+        # req_0: src!=dest, shared block -> preserved (3)
+        assert input_batch_py.num_accepted_tokens_cpu[0] == 3
+        # req_1: src!=dest, distinct blocks -> preserved (3)
+        assert input_batch_py.num_accepted_tokens_cpu[1] == 3
+        # req_2: no copy -> preserved (1)
+        assert input_batch_py.num_accepted_tokens_cpu[2] == 1
+        # req_3: src==dest -> set to 1
+        assert input_batch_py.num_accepted_tokens_cpu[3] == 1
+
+    def test_pc_aliased_blocks_skip_must_use_logical_idx_not_addr(
+        self, device, test_config
+    ):
+        """
+        Regression test for 6466ce0d vs 959ca0fd: the kernel's early-return
+        guard must compare logical block indices, not physical addresses.
+
+        Under prefix caching, different logical blocks (src_block_idx=0,
+        dest_block_idx=1) can map to the same physical block. When
+        accept_token_bias=0, this makes src_addr == dst_addr for BOTH conv
+        and temporal states. A buggy guard `if src_addr == dst_addr` would
+        incorrectly set num_accepted_tokens=1; the correct guard is
+        `if src_block_idx == dest_block_idx and accept_token_bias == 0`.
+
+        The Python reference only sets num_accepted_tokens=1 when
+        src_block_idx == dest_block_idx (line 79 of postprocess_mamba).
+        With src_block_idx=0, dest_block_idx=1, num_accepted_tokens must
+        be preserved even though the physical addresses match.
+
+        Test setup (block_size=16):
+        - num_tokens_running_state = 30 + 2 - 0 = 32
+        - new_num_computed = 32 + 3 - 1 = 34
+        - aligned_new_computed = 32
+        - needs_copy = (32 >= 32) = True
+        - accept_token_bias = 32 - 32 = 0
+        - dest_block_idx = 32 // 16 - 1 = 1
+        - src_block_idx = 0 (explicitly, != dest_block_idx)
+        - block_ids = [7, 7, ...] -> physical aliasing via prefix caching
+
+        Expected: num_accepted_tokens stays 3 (not set to 1).
+        Bug (959ca0fd): kernel saw src_addr == dst_addr, set it to 1.
+        """
+        cfg = test_config
+        torch.manual_seed(6001)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [30]
+        num_scheduled_tokens = {"req_0": 2}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [3]
+        mamba_state_idx = [0]  # src_block_idx = 0
+
+        # Prefix caching: logical blocks 0 and 1 both map to physical block 7.
+        block_ids_per_req = [[7, 7, 10, 11, 12, 13, 14, 15]]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            forward_context_py,
+            forward_context_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+
+        # --- Run Python path ---
+        scheduler_output = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        input_batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            scheduler_output,
+            kv_cache_config,
+            input_batch_py,
+            requests,
+            forward_context_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # Python reference: src_block_idx(0) != dest_block_idx(1) -> no change
+        assert input_batch_py.num_accepted_tokens_cpu[0] == 3, (
+            f"Python: num_accepted_tokens should remain 3, "
+            f"got {input_batch_py.num_accepted_tokens_cpu[0]}"
+        )
+
+        # --- Run GPU path ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+        num_reqs = len(req_ids)
+        block_table_gpu = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table_gpu[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=forward_context_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table_gpu,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # The critical assertion: kernel must NOT set num_accepted_tokens to 1
+        # when src_block_idx != dest_block_idx, even though src_addr == dst_addr
+        # due to prefix caching aliasing.
+        #
+        # Old kernel (959ca0fd): `if src_addr == dst_addr` -> FAILS here (sets 1)
+        # Fixed kernel (6466ce0d): `if src_block_idx == dest_block_idx and
+        #   accept_token_bias == 0` -> PASSES (preserves 3)
+        kernel_accepted = gpu_ctx.num_accepted_tokens_out[0].item()
+        assert kernel_accepted == 3, (
+            f"Kernel set num_accepted_tokens to {kernel_accepted} but expected 3. "
+            f"The early-return guard likely compared physical addresses "
+            f"(src_addr == dst_addr) instead of logical block indices "
+            f"(src_block_idx == dest_block_idx). Under prefix caching, "
+            f"different logical blocks can share the same physical block."
+        )
+
+        # Also verify state tensors match Python
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="GPU conv state should match Python",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg="GPU temporal state should match Python",
+        )
+
+    def test_as_strided_temporal_copy_size(self, device, test_config):
+        """
+        Regression test for 240723d46: temporal copy_size must be
+        inner_size * elem_size, not state_block_stride.
+
+        In production (gpu_model_runner.py), conv and temporal states share
+        a raw buffer via torch.as_strided where stride(0) equals
+        page_size_bytes / elem_size — larger than either state's natural
+        element count.  Using stride(0) as copy_size for temporal states
+        overwrites into the next block's conv region.
+
+        Layout per page (384 float16 elements = 768 bytes):
+            [conv: 256 elems | temporal: 128 elems]
+
+        The test triggers a temporal copy from block 4 to block 3.  With the
+        bug the kernel copies 768 bytes (page stride) instead of 256 bytes
+        (128 * 2), overwriting conv_state[4] with conv_state[5]'s data.
+
+        Test setup (block_size=16):
+        - running = 60 + 5 - 2 = 63
+        - new = 63 + 3 - 1 = 65
+        - aligned = 64 >= 63 -> COPY needed
+        - accept_token_bias = 64 - 63 = 1
+        - dest_block_idx = 64 // 16 - 1 = 3
+        - temporal: actual_src_block_idx = 3 + 1 = 4  (block_ids[4] = 4)
+        """
+        cfg = test_config
+        torch.manual_seed(7001)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [60]
+        num_scheduled_tokens = {"req_0": 5}
+        num_draft_tokens = {"req_0": 2}
+        num_accepted_tokens = [3]
+        mamba_state_idx = [3]
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        # --- Production-like packed layout (mirrors gpu_model_runner.py) ---
+        conv_shape = (cfg.conv_width, cfg.conv_inner_dim)
+        temporal_shape = (cfg.temporal_state_dim,)
+        dtype = cfg.dtype
+        elem_size = torch.tensor([], dtype=dtype).element_size()
+
+        conv_natural_elems = cfg.conv_width * cfg.conv_inner_dim
+        temporal_natural_elems = cfg.temporal_state_dim
+        page_size_bytes = (conv_natural_elems + temporal_natural_elems) * elem_size
+        num_element_per_page = page_size_bytes // elem_size
+
+        assert num_element_per_page > temporal_natural_elems, (
+            "Test requires padded stride; page must be larger than one state"
+        )
+
+        raw_py = torch.randn(
+            cfg.num_blocks * num_element_per_page, dtype=dtype, device=device
+        )
+        raw_gpu = raw_py.clone()
+
+        def make_views(raw):
+            conv_tgt = (cfg.num_blocks, *conv_shape)
+            conv_nat_stride = torch.empty(conv_tgt).stride()
+            conv = torch.as_strided(
+                raw,
+                size=conv_tgt,
+                stride=(num_element_per_page, *conv_nat_stride[1:]),
+                storage_offset=0,
+            )
+
+            temp_tgt = (cfg.num_blocks, *temporal_shape)
+            temp_nat_stride = torch.empty(temp_tgt).stride()
+            temp = torch.as_strided(
+                raw,
+                size=temp_tgt,
+                stride=(num_element_per_page, *temp_nat_stride[1:]),
+                storage_offset=conv_natural_elems,
+            )
+            return conv, temp
+
+        conv_py, temp_py = make_views(raw_py)
+        conv_gpu, temp_gpu = make_views(raw_gpu)
+
+        fwd_py = {"layer_0": _make_mock_attention(conv_py, temp_py)}
+        fwd_gpu = {"layer_0": _make_mock_attention(conv_gpu, temp_gpu)}
+
+        # --- Python reference ---
+        sched = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            sched,
+            kv_cache_config,
+            batch_py,
+            requests,
+            fwd_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- GPU fused kernel ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+        num_reqs = 1
+        block_table = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=fwd_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Assertions ---
+        # With the bug (pre-240723d46), the kernel copies page_size_bytes
+        # (768) for temporal state instead of 256 bytes, overwriting
+        # conv_state[4] with conv_state[5]'s data.
+        torch.testing.assert_close(
+            conv_gpu,
+            conv_py,
+            msg=(
+                "Conv state corrupted: temporal copy_size was likely "
+                "state_block_stride instead of inner_size * elem_size"
+            ),
+        )
+        torch.testing.assert_close(
+            temp_gpu,
+            temp_py,
+            msg="Temporal state mismatch",
+        )
+
+        expected_accepted = torch.tensor(
+            batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="num_accepted_tokens mismatch",
+        )
+
+    def test_temporal_copy_with_bias_ge_2(self, device, test_config):
+        """
+        Coverage test for the temporal-state block-table stride arithmetic
+        when ``accept_token_bias >= 2``.
+
+        The kernel computes, for temporal (non-conv) states::
+
+            actual_src_block_idx = src_block_idx + accept_token_bias
+            actual_src_block_id = block_table[req, actual_src_block_idx]
+
+        All prior regression tests exercise only ``bias == 1``, i.e. they
+        only ever read one slot ahead of ``src_block_idx`` in the block
+        table. An off-by-one (or missing scale) in the address computation
+        on line 143 of ``mamba_utils.py`` would be invisible to every
+        existing test but would silently read the wrong physical block on
+        any speculative-decode cycle that accepts multiple tokens across a
+        block boundary, feeding a stale hidden state forward one step.
+
+        Setup (block_size=16):
+        - running   = 28 + 2 - 0 = 30
+        - new       = 30 + 3 - 1 = 32
+        - aligned   = 32 >= 30 -> COPY needed
+        - bias      = 32 - 30 = 2             (key: >= 2)
+        - dest_idx  = 32 // 16 - 1 = 1
+        - src_idx   = 1 (same as dest -> exercises post-copy accepted=1 write)
+        - temporal actual_src_block_idx = 1 + 2 = 3 (reads block_table[0, 3])
+
+        With identity block_ids = [0,1,2,3,...], an off-by-one that used
+        bias=1 would copy from block_ids[2]=2 instead of block_ids[3]=3,
+        producing a clear state-value mismatch against the Python
+        reference.
+        """
+        cfg = test_config
+        torch.manual_seed(7002)
+
+        req_ids = ["req_0"]
+        num_computed_tokens = [28]
+        num_scheduled_tokens = {"req_0": 2}
+        num_draft_tokens: dict[str, int] = {}
+        num_accepted_tokens = [3]  # -> accept_token_bias = 2
+        mamba_state_idx = [1]  # src_block_idx = 1 = dest_block_idx
+        block_ids_per_req = [list(range(8))]
+
+        layer_names = ["layer_0"]
+        kv_cache_config = _make_kv_cache_config(cfg, layer_names)
+
+        (
+            conv_state_py,
+            temporal_state_py,
+            conv_state_gpu,
+            temporal_state_gpu,
+            fwd_py,
+            fwd_gpu,
+        ) = _make_dual_layer_state(cfg, device)
+        temporal_state_orig = temporal_state_py.clone()
+
+        # --- Python reference ---
+        sched = _make_postprocess_scheduler_output(
+            req_ids,
+            num_scheduled_tokens,
+            {k: [None] * v for k, v in num_draft_tokens.items() if v > 0},
+        )
+        batch_py = _make_input_batch(
+            req_ids, num_accepted_tokens.copy(), mamba_state_idx.copy()
+        )
+        requests = _make_requests(req_ids, num_computed_tokens, block_ids_per_req)
+        copy_bufs = _make_copy_bufs(cfg, kv_cache_config, device)
+
+        postprocess_mamba(
+            sched,
+            kv_cache_config,
+            batch_py,
+            requests,
+            fwd_py,
+            _COPY_FUNCS,
+            copy_bufs,
+        )
+        torch.accelerator.synchronize()
+
+        # --- GPU fused kernel ---
+        gpu_ctx = _make_gpu_ctx(cfg, kv_cache_config, device)
+        num_reqs = 1
+        block_table = torch.zeros(num_reqs, 8, dtype=torch.int32, device=device)
+        block_table[0, :8] = torch.tensor(block_ids_per_req[0], dtype=torch.int32)
+
+        _run_gpu_postprocess(
+            gpu_ctx,
+            kv_cache_config=kv_cache_config,
+            forward_context=fwd_gpu,
+            copy_funcs=_COPY_FUNCS,
+            block_table=block_table,
+            req_ids=req_ids,
+            num_accepted_tokens=num_accepted_tokens,
+            mamba_state_idx=mamba_state_idx,
+            num_scheduled_tokens=num_scheduled_tokens,
+            num_computed_tokens=num_computed_tokens,
+            num_draft_tokens=num_draft_tokens,
+            device=device,
+        )
+
+        # --- Ground truth: Python must have sourced temporal from block 3 ---
+        actual_src_block_id = block_ids_per_req[0][3]  # == 3
+        dest_block_id = block_ids_per_req[0][1]  # == 1
+        torch.testing.assert_close(
+            temporal_state_py[dest_block_id],
+            temporal_state_orig[actual_src_block_id],
+            msg=(
+                "Python reference did not copy from block_ids[src+bias]=3; "
+                "test preconditions are wrong"
+            ),
+        )
+
+        # --- GPU kernel must match Python byte-for-byte ---
+        torch.testing.assert_close(
+            conv_state_gpu,
+            conv_state_py,
+            msg="Conv state mismatch at accept_token_bias=2",
+        )
+        torch.testing.assert_close(
+            temporal_state_gpu,
+            temporal_state_py,
+            msg=(
+                "Temporal state mismatch at accept_token_bias=2: the kernel "
+                "likely read the wrong slot of the block table "
+                "(actual_src_block_idx stride arithmetic)"
+            ),
+        )
+
+        expected_accepted = torch.tensor(
+            batch_py.num_accepted_tokens_cpu[:num_reqs],
+            dtype=torch.int32,
+            device=device,
+        )
+        torch.testing.assert_close(
+            gpu_ctx.num_accepted_tokens_out[:num_reqs],
+            expected_accepted,
+            msg="num_accepted_tokens mismatch at accept_token_bias=2",
+        )
diff --git a/tests/weight_loading/models-large.txt b/tests/weight_loading/models-large.txt
index ee98aed2684d..e701542569c5 100644
--- a/tests/weight_loading/models-large.txt
+++ b/tests/weight_loading/models-large.txt
@@ -2,7 +2,7 @@ compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-quantized, main
 compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized, main
 compressed-tensors, nm-testing/Mixtral-8x7B-Instruct-v0.1-W8A16-quantized, main
 compressed-tensors, nm-testing/test-w4a16-mixtral-actorder-group, main
-gptq_marlin, TheBloke/Mixtral-8x7B-v0.1-GPTQ, main
-gptq_marlin, TheBloke/Mixtral-8x7B-v0.1-GPTQ, gptq-8bit-128g-actorder_True
+gptq, TheBloke/Mixtral-8x7B-v0.1-GPTQ, main
+gptq, TheBloke/Mixtral-8x7B-v0.1-GPTQ, gptq-8bit-128g-actorder_True
 awq_marlin, casperhansen/deepseek-coder-v2-instruct-awq, main
 compressed-tensors, RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16, main
\ No newline at end of file
diff --git a/tests/weight_loading/models.txt b/tests/weight_loading/models.txt
index a4691da8063d..398a12afd51c 100644
--- a/tests/weight_loading/models.txt
+++ b/tests/weight_loading/models.txt
@@ -1,9 +1,3 @@
-gptq_marlin, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
-gptq_marlin, TheBloke/Llama-2-7B-GPTQ, main
-gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, main
-gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit--1g-actorder_True
-gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
-gptq_marlin, TechxGenus/gemma-1.1-2b-it-GPTQ, main
 gptq, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
 gptq, TheBloke/Llama-2-7B-GPTQ, main
 gptq, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, main
diff --git a/tools/build_deepgemm_C.py b/tools/build_deepgemm_C.py
new file mode 100644
index 000000000000..67a527405e2d
--- /dev/null
+++ b/tools/build_deepgemm_C.py
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Build DeepGEMM's `_C` pybind11 extension for <TARGET_PY>.
+
+Driven from cmake/external_projects/deepgemm.cmake. The driver runs against
+the build interpreter's torch; <TARGET_PY> is only consulted for INCLUDEPY
+and SOABI, so target venvs don't need torch installed.
+
+Usage: python build_deepgemm_C.py <DEEPGEMM_SRC_DIR> <OUTPUT_DIR> <TARGET_PY>
+"""
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import torch
+from torch.utils import cpp_extension
+
+if len(sys.argv) != 4:
+    sys.exit(f"usage: {sys.argv[0]} <SRC> <OUT> <TARGET_PY>")
+
+src = Path(sys.argv[1]).resolve()
+out = Path(sys.argv[2]).resolve()
+target_py = sys.argv[3]
+out.mkdir(parents=True, exist_ok=True)
+
+info = json.loads(
+    subprocess.check_output(
+        [
+            target_py,
+            "-c",
+            "import sysconfig, json; "
+            "print(json.dumps({k: sysconfig.get_config_var(k) "
+            "for k in ('EXT_SUFFIX', 'INCLUDEPY')}))",
+        ]
+    ).decode()
+)
+
+cuda_home = cpp_extension.CUDA_HOME
+if cuda_home is None:
+    sys.exit("CUDA_HOME not found; cannot build DeepGEMM _C")
+# CCCL lives outside the standard CUDAToolkit search (mirrors DeepGEMM's setup.py).
+includes = [
+    info["INCLUDEPY"],
+    f"{cuda_home}/include",
+    f"{cuda_home}/include/cccl",
+    str(src / "csrc"),
+    str(src / "deep_gemm/include"),
+    str(src / "third-party/cutlass/include"),
+    str(src / "third-party/cutlass/tools/util/include"),
+    str(src / "third-party/fmt/include"),
+    *cpp_extension.include_paths(device_type="cuda"),
+]
+
+cmd = [
+    os.environ.get("CXX", "g++"),
+    "-shared",
+    "-fPIC",
+    "-std=c++20",
+    "-O3",
+    "-g0",
+    "-Wno-psabi",
+    "-Wno-deprecated-declarations",
+    "-DTORCH_API_INCLUDE_EXTENSION_H",
+    "-DTORCH_EXTENSION_NAME=_C",
+    f"-D_GLIBCXX_USE_CXX11_ABI={int(torch.compiled_with_cxx11_abi())}",
+    *(f"-I{p}" for p in includes),
+    str(src / "csrc/python_api.cpp"),
+    *(f"-L{p}" for p in cpp_extension.library_paths(device_type="cuda")),
+    f"-L{cuda_home}/lib64",
+    "-ltorch",
+    "-ltorch_python",
+    "-ltorch_cpu",
+    "-ltorch_cuda",
+    "-lc10",
+    "-lc10_cuda",
+    "-lcudart",
+    "-lnvrtc",
+    "-o",
+    str(out / f"_C{info['EXT_SUFFIX']}"),
+]
+print("[build_deepgemm_C] " + " ".join(cmd), flush=True)
+subprocess.check_call(cmd)
diff --git a/tools/check_wheel_deepgemm.py b/tools/check_wheel_deepgemm.py
new file mode 100644
index 000000000000..6f8a03ffd3dc
--- /dev/null
+++ b/tools/check_wheel_deepgemm.py
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Assert the installed vLLM has a `_C.cpython-X.Y-*.so` for every CPython
+covered by `requires-python`. Fails closed if a Python's `.so` is missing
+from the wheel — i.e. the regression that surfaced in #41476/#41512.
+
+Run from a CI test job after vLLM is installed, e.g. the H100 deepgemm
+kernel tests in .buildkite/test_areas/kernels.yaml.
+"""
+
+import importlib.util
+import os
+import sys
+from pathlib import Path
+
+import regex as re
+import tomllib
+
+SO_RE = re.compile(r"^_C\.cpython-(\d)(\d+)-")
+
+
+def required_pythons() -> list[str]:
+    pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
+    spec = tomllib.loads(pyproject.read_text())["project"]["requires-python"]
+    m = re.match(r">=3\.(\d+),<3\.(\d+)", spec)
+    if not m:
+        sys.exit(f"unexpected requires-python format: {spec!r}")
+    return [f"3.{v}" for v in range(int(m[1]), int(m[2]))]
+
+
+spec = importlib.util.find_spec("vllm.third_party.deep_gemm")
+if spec is None or spec.origin is None:
+    sys.exit("vllm.third_party.deep_gemm not importable; is vllm installed?")
+pkg_dir = Path(spec.origin).parent
+
+found = {f"{m[1]}.{m[2]}" for f in os.listdir(pkg_dir) if (m := SO_RE.match(f))}
+required = required_pythons()
+missing = [v for v in required if v not in found]
+print(f"deepgemm _C: found {sorted(found)}, required {required}, missing {missing}")
+sys.exit(1 if missing else 0)
diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh
index 3372dd10f4dc..f61aa868581e 100755
--- a/tools/ep_kernels/install_python_libraries.sh
+++ b/tools/ep_kernels/install_python_libraries.sh
@@ -101,8 +101,9 @@ NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvs
 
 pushd "$WORKSPACE"
 echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
-curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
+curl -fSL --retry 3 --retry-delay 2 "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
 tar -xf "${NVSHMEM_FILE}"
+rm -rf nvshmem
 mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
 rm -f "${NVSHMEM_FILE}"
 rm -rf nvshmem/lib/bin nvshmem/lib/share
diff --git a/tools/flashinfer-build.sh b/tools/flashinfer-build.sh
index 8bb630070241..fb148f056f64 100755
--- a/tools/flashinfer-build.sh
+++ b/tools/flashinfer-build.sh
@@ -35,7 +35,7 @@ elif [[ "${CUDA_VERSION}" == 12.[8-9]* ]]; then
     FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 10.3a 12.0"
 else
     # CUDA 13.0+
-    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0f 12.0"
+    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0f 11.0 12.0f"
 fi
 
 echo "🏗️ Building FlashInfer AOT for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
diff --git a/tools/generate_cmake_presets.py b/tools/generate_cmake_presets.py
index 85847c2c0fe8..6bc8443c4473 100644
--- a/tools/generate_cmake_presets.py
+++ b/tools/generate_cmake_presets.py
@@ -128,7 +128,7 @@ def generate_presets(output_path="CMakeUserPresets.json", force_overwrite=False)
 
     presets = {
         "version": 6,
-        # Keep in sync with CMakeLists.txt and requirements/build.txt
+        # Keep in sync with CMakeLists.txt and requirements/build/cuda.txt
         "cmakeMinimumRequired": {"major": 3, "minor": 26, "patch": 1},
         "configurePresets": [configure_preset],
         "buildPresets": [
diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh
index 0e1adda97b68..a7bf17331073 100755
--- a/tools/install_deepgemm.sh
+++ b/tools/install_deepgemm.sh
@@ -5,8 +5,9 @@
 set -e
 
 # Default values
+# Keep DEEPGEMM_GIT_REF in sync with cmake/external_projects/deepgemm.cmake
 DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git"
-DEEPGEMM_GIT_REF="477618cd51baffca09c4b0b87e97c03fe827ef03"
+DEEPGEMM_GIT_REF="891d57b4db1071624b5c8fa0d1e51cb317fa709f"
 WHEEL_DIR=""
 
 # Parse command line arguments
diff --git a/tools/install_protoc.sh b/tools/install_protoc.sh
new file mode 100755
index 000000000000..a995fdb6f6a2
--- /dev/null
+++ b/tools/install_protoc.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Install a pinned protoc binary from upstream GitHub releases.
+#
+# Distro protobuf-compiler packages vary widely in version (e.g.
+# AlmaLinux/RHEL 8 ships protoc 3.5, predating the
+# --experimental_allow_proto3_optional flag the rust frontend's build.rs
+# passes), so we pin the protoc version here instead.
+#
+# Override the version via the PROTOC_VERSION env var.
+# Requires: curl, unzip, root privileges.
+
+if [[ $(id -u) -ne 0 ]]; then
+  echo "Must be run as root" >&2
+  exit 1
+fi
+
+VERSION="${PROTOC_VERSION:-34.2}"
+
+ARCH="$(uname -m)"
+case "${ARCH}" in
+    # protoc release archives use "aarch_64" (with an underscore), not
+    # "aarch64". Don't "fix" this.
+    aarch64|arm64) URL_ARCH="aarch_64" ;;
+    x86_64|amd64)  URL_ARCH="x86_64" ;;
+    *) echo "Unsupported arch for protoc binary: ${ARCH}" >&2; exit 1 ;;
+esac
+
+URL="https://github.com/protocolbuffers/protobuf/releases/download/v${VERSION}/protoc-${VERSION}-linux-${URL_ARCH}.zip"
+TMPDIR="$(mktemp -d)"
+trap 'rm -rf "${TMPDIR}"' EXIT
+
+echo "Downloading: ${URL}"
+curl -fsSL -o "${TMPDIR}/protoc.zip" "${URL}"
+unzip -q -o "${TMPDIR}/protoc.zip" -d /usr/local
+echo "Installed $(protoc --version)"
diff --git a/tools/pre_commit/check_forbidden_imports.py b/tools/pre_commit/check_forbidden_imports.py
index ac7d8b096ec4..365b2f5bb771 100644
--- a/tools/pre_commit/check_forbidden_imports.py
+++ b/tools/pre_commit/check_forbidden_imports.py
@@ -31,6 +31,7 @@ class ForbiddenImport:
             "vllm/transformers_utils/config.py",
             "vllm/model_executor/models/registry.py",
             "vllm/compilation/caching.py",
+            "vllm/env_override.py",
             "vllm/compilation/piecewise_backend.py",
             "vllm/distributed/utils.py",
             "vllm/distributed/parallel_state.py",
diff --git a/tools/pre_commit/check_torch_cuda.py b/tools/pre_commit/check_torch_cuda.py
index ea84618a0882..bfbb36ffbfff 100644
--- a/tools/pre_commit/check_torch_cuda.py
+++ b/tools/pre_commit/check_torch_cuda.py
@@ -9,10 +9,18 @@
 # --------------------------------------------------------------------------- #
 _TORCH_CUDA_PATTERNS = [
     r"\btorch\.cuda\.(empty_cache|synchronize|device_count|current_device|memory_reserved|memory_allocated|max_memory_allocated|max_memory_reserved|reset_peak_memory_stats|memory_stats|set_device|device\()\b",
+    r"\btorch\.cuda\.(manual_seed|manual_seed_all)\b",
     r"\bwith\storch\.cuda\.device\b",
+    # Calls torch.cuda.{_is_compiled/_device_count_amdsmi/_device_count_nvml} internally
+    r"\bcuda_device_count_stateless\(\)\b",
 ]
 
-ALLOWED_FILES = {"vllm/platforms/", "vllm/device_allocator/"}
+ALLOWED_FILES = {
+    "vllm/platforms/",
+    "vllm/device_allocator/",
+    "vllm/distributed/weight_transfer/ipc_engine.py",
+    "tests/distributed/test_packed_tensor.py",
+}
 
 
 def scan_file(path: str) -> int:
@@ -22,6 +30,14 @@ def scan_file(path: str) -> int:
         for match in re.finditer(pattern, content, re.MULTILINE):
             # Calculate line number from match position
             line_num = content[: match.start() + 1].count("\n") + 1
+            matched_text = match.group(0)
+            if "manual_seed" in matched_text:
+                print(
+                    f"{path}:{line_num}: "
+                    "\033[91merror:\033[0m "
+                    f"Found {matched_text} API call. Use set_random_seed instead."
+                )
+                return 1
             print(
                 f"{path}:{line_num}: "
                 "\033[91merror:\033[0m "  # red color
diff --git a/tools/pre_commit/generate_attention_backend_docs.py b/tools/pre_commit/generate_attention_backend_docs.py
index bbbf4f4b64f2..abbbdce09b49 100644
--- a/tools/pre_commit/generate_attention_backend_docs.py
+++ b/tools/pre_commit/generate_attention_backend_docs.py
@@ -30,7 +30,6 @@
 RELEVANT_PATTERNS = [
     "vllm/v1/attention/backends/*.py",
     "vllm/v1/attention/backends/**/*.py",
-    "vllm/v1/attention/backends/fa_utils.py",
     "vllm/model_executor/layers/attention/mla_attention.py",
     "vllm/platforms/cuda.py",
     "tools/pre_commit/generate_attention_backend_docs.py",
@@ -68,6 +67,11 @@ def is_relevant_file(filepath: str) -> bool:
     return any(fnmatch.fnmatch(path_str, pattern) for pattern in RELEVANT_PATTERNS)
 
 
+MLA_PREFILL_DIR = BACKENDS_DIR / "mla" / "prefill"
+MLA_PREFILL_REGISTRY_FILE = MLA_PREFILL_DIR / "registry.py"
+MLA_PREFILL_SELECTOR_FILE = MLA_PREFILL_DIR / "selector.py"
+
+
 # ---------------------------------------------------------------------------
 # AST utility helpers
 # ---------------------------------------------------------------------------
@@ -293,6 +297,242 @@ def get_file_from_class_path(class_path: str) -> Path | None:
     return py_file if py_file.exists() else None
 
 
+def parse_mla_prefill_registry() -> dict[str, str]:
+    """Parse MLAPrefillBackendEnum from the prefill registry.
+
+    Returns:
+        A dict mapping backend names to their class paths.
+    """
+    if not MLA_PREFILL_REGISTRY_FILE.exists():
+        return {}
+
+    try:
+        tree = ast.parse(MLA_PREFILL_REGISTRY_FILE.read_text())
+    except Exception:
+        return {}
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.ClassDef) and node.name == "MLAPrefillBackendEnum":
+            return _extract_enum_values(node)
+    return {}
+
+
+def parse_mla_prefill_priorities() -> dict[str, list[str]]:
+    """Parse MLA prefill backend priorities from selector.py.
+
+    Returns:
+        A dict with keys like 'blackwell' and 'default' containing
+        lists of backend enum names in priority order.
+    """
+    if not MLA_PREFILL_SELECTOR_FILE.exists():
+        return {}
+
+    try:
+        tree = ast.parse(MLA_PREFILL_SELECTOR_FILE.read_text())
+    except Exception:
+        return {}
+
+    priorities: dict[str, list[str]] = {}
+
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.FunctionDef):
+            continue
+        if node.name != "_get_mla_prefill_backend_priorities":
+            continue
+
+        # Look for if statements checking device_capability.major
+        for stmt in ast.walk(node):
+            if not isinstance(stmt, ast.If):
+                continue
+
+            # Check if it's a capability.major == 10 check (Blackwell)
+            is_blackwell = (
+                isinstance(stmt.test, ast.Compare)
+                and isinstance(stmt.test.left, ast.Attribute)
+                and stmt.test.left.attr == "major"
+                and stmt.test.comparators
+                and isinstance(stmt.test.comparators[0], ast.Constant)
+                and stmt.test.comparators[0].value == 10
+            )
+
+            # Extract backends from return statements
+            for body_stmt in stmt.body:
+                if isinstance(body_stmt, ast.Return) and isinstance(
+                    body_stmt.value, ast.List
+                ):
+                    backends = []
+                    for elt in body_stmt.value.elts:
+                        if isinstance(elt, ast.Attribute):
+                            backends.append(elt.attr)
+                    if is_blackwell:
+                        priorities["blackwell"] = backends
+                    else:
+                        priorities["default"] = backends
+
+            # Extract from else branch
+            for else_stmt in stmt.orelse:
+                if isinstance(else_stmt, ast.Return) and isinstance(
+                    else_stmt.value, ast.List
+                ):
+                    backends = []
+                    for elt in else_stmt.value.elts:
+                        if isinstance(elt, ast.Attribute):
+                            backends.append(elt.attr)
+                    priorities["default"] = backends
+
+    return priorities
+
+
+def parse_mla_prefill_backend_file(class_path: str) -> dict[str, Any] | None:
+    """Parse a single MLA prefill backend file to extract its properties.
+
+    Args:
+        class_path: The fully qualified class path.
+
+    Returns:
+        A dict with backend properties, or None if parsing fails.
+    """
+    file_path = get_file_from_class_path(class_path)
+    if file_path is None:
+        return None
+
+    try:
+        tree = ast.parse(file_path.read_text())
+    except Exception:
+        return None
+
+    class_name = class_path.rsplit(".", 1)[1]
+    class_node = find_class_in_ast(tree, class_name)
+    if class_node is None:
+        return None
+
+    info: dict[str, Any] = {
+        "compute_capability": "Any",
+        "requires_r1_dims": False,
+        "dtypes": "fp16, bf16",  # Default from base class
+    }
+
+    # Parse class variables
+    for item in class_node.body:
+        if isinstance(item, ast.Assign):
+            for target in item.targets:
+                if (
+                    isinstance(target, ast.Name)
+                    and target.id == "requires_r1_mla_dimensions"
+                    and isinstance(item.value, ast.Constant)
+                ):
+                    info["requires_r1_dims"] = item.value.value
+
+        # Parse supported_dtypes class variable
+        if (
+            isinstance(item, ast.AnnAssign)
+            and isinstance(item.target, ast.Name)
+            and item.target.id == "supported_dtypes"
+            and isinstance(item.value, ast.List)
+        ):
+            dtype_map = {"float16": "fp16", "bfloat16": "bf16", "float32": "fp32"}
+            dtypes = []
+            for elt in item.value.elts:
+                if isinstance(elt, ast.Attribute):
+                    dtypes.append(dtype_map.get(elt.attr, elt.attr))
+            if dtypes:
+                info["dtypes"] = ", ".join(dtypes)
+
+    # Parse get_name static method
+    get_name_method = find_method(class_node, "get_name")
+    if get_name_method:
+        for n in ast.walk(get_name_method):
+            if isinstance(n, ast.Return) and isinstance(n.value, ast.Constant):
+                info["name"] = n.value.value
+
+    # Parse supports_compute_capability classmethod
+    cc_method = find_method(class_node, "supports_compute_capability")
+    if cc_method:
+        for n in ast.walk(cc_method):
+            # Look for capability.major == 10 style checks
+            if (
+                isinstance(n, ast.Compare)
+                and isinstance(n.left, ast.Attribute)
+                and n.left.attr == "major"
+                and n.comparators
+                and isinstance(n.comparators[0], ast.Constant)
+            ):
+                major = n.comparators[0].value
+                info["compute_capability"] = f"{major}.x"
+
+    return info
+
+
+def parse_mla_prefill_backends() -> list[dict[str, Any]]:
+    """Parse MLA prefill backend options from the prefill registry.
+
+    MLA uses different backends for prefill vs decode. The decode backends are
+    registered in the main registry, but prefill backends have their own
+    registry at vllm/v1/attention/backends/mla/prefill/registry.py.
+
+    Returns a list of prefill backend info dicts with their requirements.
+    """
+    registry = parse_mla_prefill_registry()
+    priorities = parse_mla_prefill_priorities()
+
+    if not registry:
+        return []
+
+    # Get the priority order (Blackwell order shows all backends)
+    priority_order = priorities.get("blackwell", list(registry.keys()))
+
+    prefill_backends: list[dict[str, Any]] = []
+
+    # Backend-specific metadata that can't be easily parsed from code
+    backend_metadata = {
+        "TRTLLM_RAGGED": {
+            "description": "TensorRT-LLM ragged attention",
+        },
+        "FLASHINFER": {
+            "description": "FlashInfer CUTLASS backend",
+        },
+        "FLASH_ATTN": {
+            "description": "FlashAttention varlen (FA2/FA3/FA4)",
+        },
+    }
+
+    for backend_name in priority_order:
+        if backend_name not in registry:
+            continue
+
+        class_path = registry[backend_name]
+        backend_info = parse_mla_prefill_backend_file(class_path)
+        if backend_info is None:
+            continue
+
+        metadata = backend_metadata.get(backend_name, {})
+        display_name = backend_info.get("name", backend_name)
+
+        # Add marker for default Blackwell backend
+        marker = ""
+        if backend_name == priority_order[0] and priorities.get("blackwell"):
+            marker = "‡"
+
+        notes = ""
+        if backend_info.get("requires_r1_dims"):
+            notes = "DeepSeek R1 dims only"
+        elif backend_name == "FLASH_ATTN":
+            notes = "FA4 on SM100+, FA3 on SM90, FA2 otherwise"
+
+        prefill_backends.append(
+            {
+                "name": display_name,
+                "marker": marker,
+                "description": metadata.get("description", ""),
+                "dtypes": backend_info.get("dtypes", "fp16, bf16"),
+                "compute_capability": backend_info.get("compute_capability", "Any"),
+                "notes": notes,
+            }
+        )
+
+    return prefill_backends
+
+
 # ---------------------------------------------------------------------------
 # Backend feature extraction from AST
 # ---------------------------------------------------------------------------
@@ -570,6 +810,9 @@ def analyze_backend(backend_name: str, class_path: str) -> dict[str, Any] | None
         "compute_capability": compute_cap,
         "is_mla": is_mla_backend or check_method_overrides(class_node, "is_mla"),
         "supports_sink": check_method_overrides(class_node, "supports_sink"),
+        "supports_non_causal": check_method_overrides(
+            class_node, "supports_non_causal"
+        ),
         "is_sparse": check_method_overrides(class_node, "is_sparse"),
         "supports_mm_prefix": check_method_overrides(class_node, "supports_mm_prefix"),
         "supports_dcp": supports_dcp,
@@ -634,9 +877,10 @@ def parse_flash_attn_features() -> dict[str, dict[str, Any]]:
     except Exception:
         return {}
 
-    # Analyze the functions to determine FA3-specific features
-    fa3_supports_fp8 = False
+    # Analyze the functions to determine FA3/FA4-specific features
+    fa3_supports_fp8 = True
     fa3_supports_sinks = False
+    fa4_supports_sinks = False
     fa3_compute_cap: str | None = None
     fa4_compute_cap: str | None = None
 
@@ -644,29 +888,49 @@ def parse_flash_attn_features() -> dict[str, dict[str, Any]]:
         if not isinstance(node, ast.FunctionDef):
             continue
 
-        # Check flash_attn_supports_fp8 - looks for `get_flash_attn_version() == 3`
-        if node.name == "flash_attn_supports_fp8":
+        # Check flash_attn_supports_sinks - looks for `fa_version == 3/4`
+        # or `get_flash_attn_version() == 3/4` (also accepts `in (3, 4)`)
+        if node.name == "flash_attn_supports_sinks":
             for n in ast.walk(node):
                 if (
                     isinstance(n, ast.Compare)
-                    and isinstance(n.left, ast.Call)
-                    and isinstance(n.left.func, ast.Name)
-                    and n.left.func.id == "get_flash_attn_version"
+                    and len(n.ops) == 1
+                    and isinstance(n.ops[0], ast.Eq)
+                    and isinstance(n.comparators[0], ast.Constant)
                 ):
-                    fa3_supports_fp8 = True
-                    break
-
-        # Check flash_attn_supports_sinks - looks for `get_flash_attn_version() == 3`
-        if node.name == "flash_attn_supports_sinks":
-            for n in ast.walk(node):
-                if (
+                    is_version_compare = (
+                        isinstance(n.left, ast.Name) and n.left.id == "fa_version"
+                    ) or (
+                        isinstance(n.left, ast.Call)
+                        and isinstance(n.left.func, ast.Name)
+                        and n.left.func.id == "get_flash_attn_version"
+                    )
+                    if is_version_compare:
+                        val = n.comparators[0].value
+                        if val == 3:
+                            fa3_supports_sinks = True
+                        elif val == 4:
+                            fa4_supports_sinks = True
+                elif (
                     isinstance(n, ast.Compare)
-                    and isinstance(n.left, ast.Call)
-                    and isinstance(n.left.func, ast.Name)
-                    and n.left.func.id == "get_flash_attn_version"
+                    and len(n.ops) == 1
+                    and isinstance(n.ops[0], ast.In)
+                    and isinstance(n.comparators[0], (ast.Tuple, ast.List, ast.Set))
                 ):
-                    fa3_supports_sinks = True
-                    break
+                    is_version_compare = (
+                        isinstance(n.left, ast.Name) and n.left.id == "fa_version"
+                    ) or (
+                        isinstance(n.left, ast.Call)
+                        and isinstance(n.left.func, ast.Name)
+                        and n.left.func.id == "get_flash_attn_version"
+                    )
+                    if is_version_compare:
+                        for elt in n.comparators[0].elts:
+                            if isinstance(elt, ast.Constant):
+                                if elt.value == 3:
+                                    fa3_supports_sinks = True
+                                elif elt.value == 4:
+                                    fa4_supports_sinks = True
 
         # Check get_flash_attn_version for FA3/FA4 compute capability
         if node.name == "get_flash_attn_version":
@@ -731,7 +995,7 @@ def parse_flash_attn_features() -> dict[str, dict[str, Any]]:
         "fa4": {
             "compute_capability": fa4_compute_cap,
             "supports_fp8": False,
-            "supports_sink": False,
+            "supports_sink": fa4_supports_sinks,
         },
     }
 
@@ -755,6 +1019,11 @@ def parse_flashinfer_trtllm_features() -> dict[str, dict[str, Any]]:
     if not trtllm_compute_cap:
         return {}
 
+    # KV cache dtypes that only work with a dedicated kernel (e.g. nvfp4
+    # requires the SM100 NVFP4 MHA kernel) and should not appear in the
+    # generic attention-backend feature matrix.
+    kernel_only_kv_dtypes = ["nvfp4"]
+
     return {
         "native": {
             # Native FlashInfer: everything except SM100
@@ -765,89 +1034,10 @@ def parse_flashinfer_trtllm_features() -> dict[str, dict[str, Any]]:
             "compute_capability": trtllm_compute_cap,
             "supports_sink": True,
         },
+        "exclude_kv_dtypes": kernel_only_kv_dtypes,
     }
 
 
-def parse_mla_prefill_backends() -> list[dict[str, Any]]:
-    """Parse MLA prefill backend options from mla_attention.py.
-
-    MLA uses different backends for prefill vs decode. The decode backends are
-    registered in the registry, but prefill backends are selected at runtime
-    based on conditions in MLACommonImpl.__init__.
-
-    Returns a list of prefill backend info dicts with their requirements.
-    """
-    if not MLA_ATTENTION_FILE.exists():
-        return []
-
-    try:
-        tree = ast.parse(MLA_ATTENTION_FILE.read_text())
-    except Exception:
-        return []
-
-    # Find compute capability requirements by parsing use_* functions
-    trtllm_cc = _find_cc_in_function(tree, "use_trtllm_ragged_deepseek_prefill")
-    flashinfer_cc = _find_cc_in_function(tree, "use_flashinfer_prefill")
-    cudnn_cc = _find_cc_in_function(tree, "use_cudnn_prefill")
-
-    # Build prefill backend list based on what we found
-    # Order matches the priority in MLACommonImpl.__init__
-    prefill_backends: list[dict[str, Any]] = []
-
-    # TRT-LLM Ragged (highest priority if available)
-    if trtllm_cc:
-        prefill_backends.append(
-            {
-                "name": "TRT-LLM Ragged‡",
-                "description": "TensorRT-LLM ragged attention",
-                "compute_capability": trtllm_cc,
-                "enable": "Default on SM100",
-                "disable": "`-ac.use_trtllm_ragged_deepseek_prefill=0`",
-                "notes": "DeepSeek R1 dims only",
-            }
-        )
-
-    # FlashInfer prefill
-    if flashinfer_cc:
-        prefill_backends.append(
-            {
-                "name": "FlashInfer",
-                "description": "FlashInfer CUTLASS backend",
-                "compute_capability": flashinfer_cc,
-                "enable": "`-ac.disable_flashinfer_prefill=0`",
-                "disable": "`-ac.disable_flashinfer_prefill=1`",
-                "notes": "DeepSeek R1 dims only",
-            }
-        )
-
-    # cuDNN prefill
-    if cudnn_cc:
-        prefill_backends.append(
-            {
-                "name": "cuDNN",
-                "description": "cuDNN-based attention",
-                "compute_capability": cudnn_cc,
-                "enable": "`-ac.use_cudnn_prefill=1`",
-                "disable": "`-ac.use_cudnn_prefill=0`",
-                "notes": "",
-            }
-        )
-
-    # FlashAttention is always available as fallback
-    prefill_backends.append(
-        {
-            "name": "FlashAttention",
-            "description": "FlashAttention varlen (FA2/FA3)",
-            "compute_capability": "Any",
-            "enable": "Default fallback",
-            "disable": "Use other backends",
-            "notes": "FA3 on SM90, FA2 otherwise",
-        }
-    )
-
-    return prefill_backends
-
-
 # ---------------------------------------------------------------------------
 # Backend variant expansion (FA2/FA3/FA4, FlashInfer native/TRTLLM)
 # ---------------------------------------------------------------------------
@@ -930,6 +1120,15 @@ def _expand_flashinfer_variants(
         native["supports_sink"] = fi_features["native"]["supports_sink"]
         native["compute_capability"] = f"{min_cc}.x-9.x"
 
+        # Remove KV dtypes only supported by SM100 kernels (e.g. nvfp4)
+        exclude = fi_features.get("exclude_kv_dtypes", [])
+        if exclude:
+            native["kv_cache_dtypes"] = ", ".join(
+                d
+                for d in (d.strip() for d in native["kv_cache_dtypes"].split(","))
+                if d not in exclude
+            )
+
         # Create TRTLLM entry
         trtllm = backend.copy()
         trtllm["version"] = "TRTLLM†"
@@ -1103,6 +1302,10 @@ def _extract_priorities(body: list, priorities: dict[str, list[str]], prefix: st
 _COL_BLOCK_SIZES: TableColumn = ("Block Sizes", lambda b: b["block_sizes"])
 _COL_HEAD_SIZES: TableColumn = ("Head Sizes", lambda b: b["head_sizes"])
 _COL_SINK: TableColumn = ("Sink", lambda b: bool_to_emoji(b["supports_sink"]))
+_COL_NON_CAUSAL: TableColumn = (
+    "Non-Causal",
+    lambda b: bool_to_emoji(b["supports_non_causal"]),
+)
 _COL_SPARSE: TableColumn = ("Sparse", lambda b: bool_to_emoji(b["is_sparse"]))
 _COL_MM_PREFIX: TableColumn = (
     "MM Prefix",
@@ -1136,6 +1339,7 @@ def _build_columns(is_mla: bool, has_versions: bool) -> list[TableColumn]:
         cols.append(_COL_VERSION)
     cols.extend([_COL_DTYPES, _COL_KV_DTYPES, _COL_BLOCK_SIZES, _COL_HEAD_SIZES])
     cols.append(_COL_SINK)
+    cols.append(_COL_NON_CAUSAL)
     if is_mla:
         cols.append(_COL_SPARSE)
     cols.extend([_COL_MM_PREFIX, _COL_DCP, _COL_ATTN_TYPES, _COL_COMPUTE_CAP])
@@ -1346,6 +1550,7 @@ def generate_legend() -> str:
 | **Block Sizes** | Supported KV cache block sizes (%N means multiples of N) |
 | **Head Sizes** | Supported attention head sizes |
 | **Sink** | Attention sink support (for StreamingLLM) |
+| **Non-Causal** | Non-causal (bidirectional) attention support for decoder models |
 | **Sparse** | Sparse attention support (MLA only) |
 | **MM Prefix** | Multimodal prefix full attention support |
 | **DCP** | Decode Context Parallelism support (`--decode-context-parallel-size`) |
@@ -1367,20 +1572,22 @@ def generate_mla_section(
         "",
         "### Prefill Backends",
         "",
-        "The prefill backend is selected at runtime based on hardware and",
-        "configuration.",
+        "To explicitly select a prefill backend, use",
+        "`-ac.mla_prefill_backend=<BACKEND>` (e.g., `FLASH_ATTN`, `FLASHINFER`).",
+        "Otherwise, the prefill backend is selected automatically at runtime based on",
+        "hardware and configuration.",
         "",
-        "| Backend | Description | Compute Cap. | Enable | Disable | Notes |",
-        "| ------- | ----------- | ------------ | ------ | ------- | ----- |",
+        "| Backend | Description | Dtypes | Compute Cap. | Notes |",
+        "| ------- | ----------- | ------ | ------------ | ----- |",
     ]
 
     for backend in prefill_backends:
-        row = "| {} | {} | {} | {} | {} | {} |".format(
+        row = "| `{}`{} | {} | {} | {} | {} |".format(
             backend["name"],
+            backend.get("marker", ""),
             backend["description"],
+            backend.get("dtypes", "fp16, bf16"),
             backend["compute_capability"],
-            backend["enable"],
-            backend["disable"],
             backend.get("notes", ""),
         )
         lines.append(row.replace("  ", " "))
@@ -1393,6 +1600,9 @@ def generate_mla_section(
             "",
             "### Decode Backends",
             "",
+            "MLA decode backends are selected using the standard",
+            "`-ac.backend=<BACKEND>` argument (e.g., `FLASHMLA`, `TRITON_MLA`).",
+            "",
         ]
     )
 
diff --git a/tools/pre_commit/generate_nightly_torch_test.py b/tools/pre_commit/generate_nightly_torch_test.py
index a3d7f7a609ba..354da54df8f4 100644
--- a/tools/pre_commit/generate_nightly_torch_test.py
+++ b/tools/pre_commit/generate_nightly_torch_test.py
@@ -3,15 +3,15 @@
 """
 Generates specialized requirements files for nightly PyTorch testing.
 
-This script reads the main test requirements input file (`requirements/test.in`)
+This script reads the main test requirements input file (`requirements/test/cuda.in`)
 and splits its content into two files:
-1.  `requirements/nightly_torch_test.txt`: Contains dependencies
+1.  `requirements/test/nightly-torch.txt`: Contains dependencies
 except PyTorch-related.
 2.  `torch_nightly_test.txt`: Contains only PyTorch-related packages.
 """
 
-input_file = "requirements/test.in"
-output_file = "requirements/nightly_torch_test.txt"
+input_file = "requirements/test/cuda.in"
+output_file = "requirements/test/nightly-torch.txt"
 
 # white list of packages that are not compatible with PyTorch nightly directly
 # with pip install. Please add your package to this list if it is not compatible
diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py
index 1ba1f81564cc..228550808242 100755
--- a/tools/pre_commit/mypy.py
+++ b/tools/pre_commit/mypy.py
@@ -29,15 +29,12 @@
     "tests",
     # v0 related
     "vllm/lora",
-    "vllm/model_executor/layers",
 ]
 
 # TODO(woosuk): Include the code from Megatron and HuggingFace.
 EXCLUDE = [
     "vllm/model_executor/models",
     "vllm/model_executor/layers/fla/ops",
-    # Ignore triton kernels in ops.
-    "vllm/v1/attention/ops",
     # TODO: Remove these entries after fixing mypy errors.
     "vllm/benchmarks",
 ]
@@ -101,7 +98,6 @@ def mypy(
 
 
 def main():
-    ci = sys.argv[1] == "1"
     python_version = sys.argv[2]
     file_groups = group_files(sys.argv[3:])
 
@@ -110,7 +106,7 @@ def main():
 
     returncode = 0
     for file_group, changed_files in file_groups.items():
-        follow_imports = None if ci and file_group == "" else "skip"
+        follow_imports = None if file_group == "" else "skip"
         if changed_files:
             returncode |= mypy(
                 changed_files, python_version, follow_imports, file_group
diff --git a/tools/pre_commit/rust-check.sh b/tools/pre_commit/rust-check.sh
new file mode 100755
index 000000000000..2bff1bfacd2a
--- /dev/null
+++ b/tools/pre_commit/rust-check.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# Wrapper for the rust-* pre-commit hooks.
+#
+# Skips (with a warning) when `cargo` or the requested cargo subcommand is
+# not installed, so contributors who don't touch the Rust code aren't forced
+# to install the Rust toolchain (or niche cargo extensions like cargo-sort
+# / cargo-autoinherit). Buildkite CI covers the rust hooks regardless.
+#
+# Usage: tools/pre_commit/rust-check.sh <cargo-subcommand> [extra cargo args...]
+
+set -euo pipefail
+
+# Pre-commit captures stdout/stderr and only replays on failure. Try to write
+# to /dev/tty so the warning is visible during a normal `git commit` even
+# though we exit 0; fall back to stderr where there's no controlling tty
+# (e.g. CI).
+#
+# The leading newline pushes the warning off pre-commit's dot-leader line so
+# the message doesn't mash into "Rust - ... ........WARNING:". The hook's
+# "Passed" still lands on its own line just below the warning.
+warn() {
+    { printf '\n%s\n' "$*" >/dev/tty; } 2>/dev/null || printf '\n%s\n' "$*" >&2
+}
+
+subcommand="$1"
+shift
+
+if ! command -v cargo >/dev/null 2>&1; then
+    warn "WARNING: 'cargo' not found in PATH; skipping rust pre-commit hook (cargo ${subcommand}).
+         Install the Rust toolchain via https://rustup.rs/ if you need to run rust hooks locally."
+    exit 0
+fi
+
+# Cargo subcommands resolve to a `cargo-<name>` binary on PATH. Check up-front
+# so a missing helper produces a friendly skip instead of a cargo error.
+if ! command -v "cargo-${subcommand}" >/dev/null 2>&1; then
+    case "${subcommand}" in
+        fmt) install_hint="rustup component add rustfmt" ;;
+        *)   install_hint="cargo install cargo-${subcommand}" ;;
+    esac
+    warn "WARNING: 'cargo ${subcommand}' is not installed; skipping rust pre-commit hook.
+         Install it with: ${install_hint}"
+    exit 0
+fi
+
+cd "$(git rev-parse --show-toplevel)/rust"
+exec cargo "${subcommand}" "$@"
diff --git a/tools/setup_deepgemm_pythons.sh b/tools/setup_deepgemm_pythons.sh
new file mode 100755
index 000000000000..d98ea7e0370d
--- /dev/null
+++ b/tools/setup_deepgemm_pythons.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Provision one bare Python per `requires-python` entry (or per argument) and
+# print their paths as ":"-separated DEEPGEMM_PYTHON_INTERPRETERS. Skip this
+# entirely if you already have interpreter paths.
+#
+# Usage:
+#   export DEEPGEMM_PYTHON_INTERPRETERS=$(tools/setup_deepgemm_pythons.sh)
+#   python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
+#
+# Optional: DEEPGEMM_VENV_PREFIX (default: /tmp/dgenv).
+set -euo pipefail
+
+if [ "$#" -eq 0 ]; then
+  # Derive the matrix from `requires-python = ">=3.X,<3.Y"` in pyproject.toml.
+  pyproject="$(dirname "$0")/../pyproject.toml"
+  spec=$(grep -E '^requires-python' "$pyproject" \
+         | grep -oE '>=3\.[0-9]+,<3\.[0-9]+')
+  lo=${spec#>=3.}; lo=${lo%%,*}
+  hi=${spec##*<3.}
+  set -- $(seq "$lo" $((hi - 1)) | sed 's/^/3./')
+fi
+
+prefix="${DEEPGEMM_VENV_PREFIX:-/tmp/dgenv}"
+mkdir -p "$prefix"
+
+paths=""
+for V in "$@"; do
+  venv="$prefix/$V"
+  # uv-managed Python ensures Python.h is present; system 3.X-dev packages
+  # on the manylinux / Ubuntu build bases are not always installed.
+  [ -x "$venv/bin/python" ] || \
+    uv venv --python "$V" "$venv" --python-preference only-managed --seed \
+      >/dev/null
+  paths="$paths:$venv/bin/python"
+done
+echo "${paths#:}"
diff --git a/tools/vllm-tpu/build.sh b/tools/vllm-tpu/build.sh
index aa46a5298bff..226439ce58fb 100755
--- a/tools/vllm-tpu/build.sh
+++ b/tools/vllm-tpu/build.sh
@@ -79,12 +79,15 @@ trap cleanup EXIT HUP INT QUIT PIPE TERM # Register cleanup function to run on s
 
 echo "Updating pyproject.toml completed. Proceeding with build..."
 
+echo "Install dependencies for no-isolation build"
+pip install -r "$VLLM_DIR/requirements/build/tpu.txt"
+
 echo "Building wheel for TPU..."
 rm -rf dist/
 mkdir -p dist/
 
 # User confirmed to use 'python -m build' directly
-if ! VLLM_TARGET_DEVICE=tpu python -m build; then
+if ! VLLM_TARGET_DEVICE=tpu python -m build --no-isolation; then
     echo "Error: Python build command failed. Check if 'python -m build' works and the 'build' module is installed."
     exit 1
 fi
diff --git a/use_existing_torch.py b/use_existing_torch.py
index 7c58a34d69dd..39c327e96708 100644
--- a/use_existing_torch.py
+++ b/use_existing_torch.py
@@ -30,8 +30,8 @@ def main(argv):
     args = parser.parse_args(argv)
 
     for file in (
-        *glob.glob("requirements/*.txt"),
-        *glob.glob("requirements/*.in"),
+        *glob.glob("requirements/**/*.txt", recursive=True),
+        *glob.glob("requirements/**/*.in", recursive=True),
         "pyproject.toml",
     ):
         with open(file) as f:
diff --git a/vllm/_aiter_ops.py b/vllm/_aiter_ops.py
index fefbf6a41170..ce4fc3cfbadb 100644
--- a/vllm/_aiter_ops.py
+++ b/vllm/_aiter_ops.py
@@ -2,18 +2,27 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import functools
 from collections.abc import Callable
+from contextlib import contextmanager
+from typing import Protocol
 
 import torch
 from torch._ops import OpOverload
+from torch.distributed import ProcessGroup
 
 import vllm.envs as envs
 from vllm.platforms import current_platform
+from vllm.utils.import_utils import PlaceholderModule
 from vllm.utils.torch_utils import direct_register_custom_op
 from vllm.v1.attention.ops.rocm_aiter_mla_sparse import (
     rocm_aiter_sparse_attn_indexer,
     rocm_aiter_sparse_attn_indexer_fake,
 )
 
+try:
+    import pandas as pd
+except ImportError:
+    pd = PlaceholderModule("pandas")
+
 # fp8_dtype is not cached.
 # on ROCm the fp8_dtype always calls is_fp8_fnuz
 # which is a host op, so we cache it once here.
@@ -33,6 +42,27 @@ def is_aiter_found() -> bool:
 IS_AITER_FOUND = is_aiter_found()
 
 
+class AiterCustomAllreduceProto(Protocol):
+    max_size: int
+    world_size: int
+    fully_connected: bool
+
+    @contextmanager
+    def capture(self): ...
+    def close(self) -> None: ...
+    def fused_ar_rms(
+        self,
+        inp: torch.Tensor,
+        res_inp: torch.Tensor,
+        *,
+        w: torch.Tensor,
+        eps: float,
+        registered: bool = False,
+        use_1stage: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor]: ...
+    def should_custom_ar(self, inp: torch.Tensor) -> bool: ...
+
+
 def is_aiter_found_and_supported() -> bool:
     """Check if AITER library is available and platform supports it.
 
@@ -56,6 +86,29 @@ def is_aiter_found_and_supported() -> bool:
     return False
 
 
+@functools.cache
+def _load_gemm_tuned_configs(
+    q_dtype_w: torch.dtype, csv_path: str
+) -> set[tuple[int, int, int]]:
+    try:
+        df = pd.read_csv(csv_path).drop_duplicates()
+        df = df[df["q_dtype_w"] == str(q_dtype_w)]
+        return set(zip(df["N"].astype(int), df["K"].astype(int), df["M"].astype(int)))
+    except Exception:
+        return set()
+
+
+def _check_kernel_tuned(N: int, K: int, q_dtype_w: torch.dtype, csv_path: str) -> bool:
+    configs = _load_gemm_tuned_configs(q_dtype_w, csv_path)
+    l_m = (
+        [1, 2, 4]
+        + list(range(8, 513, 8))
+        + [1024, 1536]
+        + [2**i for i in range(11, 19)]
+    )
+    return any((N, K, M) in configs for M in l_m)
+
+
 def if_aiter_supported(func: Callable) -> Callable:
     """Decorator that only executes the function if
     ROCm AITER package is supported and enabled on gfx9 archs.
@@ -208,11 +261,19 @@ def _rocm_aiter_topk_softmax_impl(
     token_expert_indices: torch.Tensor,
     gating_output: torch.Tensor,
     renormalize: bool,
+    num_shared_experts: int = 0,
+    shared_expert_scoring_func: str = "",
 ) -> None:
     from aiter import topk_softmax
 
     topk_softmax(
-        topk_weights, topk_indices, token_expert_indices, gating_output, renormalize
+        topk_weights,
+        topk_indices,
+        token_expert_indices,
+        gating_output,
+        renormalize,
+        num_shared_experts,
+        shared_expert_scoring_func,
     )
 
 
@@ -222,6 +283,8 @@ def _rocm_aiter_topk_softmax_fake(
     token_expert_indices: torch.Tensor,
     gating_output: torch.Tensor,
     renormalize: bool,
+    num_shared_experts: int = 0,
+    shared_expert_scoring_func: str = "",
 ) -> None:
     pass
 
@@ -336,13 +399,48 @@ def _rocm_aiter_fused_topk_fake(
     router_logits: torch.Tensor,
     top_k: int,
     gate_up: bool,
-) -> None:
-    # tuple[torch.Tensor, torch.Tensor]:
-    pass
+) -> tuple[torch.Tensor, torch.Tensor]:
+    num_tokens = x.shape[0]
+    topk_weights = torch.empty(
+        (num_tokens, top_k), dtype=torch.float32, device=x.device
+    )
+    topk_indices = torch.empty((num_tokens, top_k), dtype=torch.int32, device=x.device)
+    return topk_weights, topk_indices
 
 
 # Cache whether aiter supports FP8 MLA parameters
 _AITER_MLA_SUPPORTS_FP8: bool | None = None
+_AITER_HAS_FUSED_QK_RMSNORM: bool | None = None
+
+
+def check_aiter_fused_qk_rmsnorm() -> bool:
+    """Check if aiter provides fused_qk_rmsnorm.
+
+    Supports both the new private name ``_fused_qk_rmsnorm``
+    (AITER >= PR #2958) and the old public name ``fused_qk_rmsnorm``
+    (AITER >= PR #2442).
+
+    TODO(rbrugaro-amd): remove the legacy fused_qk_rmsnorm path once
+    AITER stabilizes the API (https://github.com/ROCm/aiter/issues/3207).
+    """
+    global _AITER_HAS_FUSED_QK_RMSNORM
+    if _AITER_HAS_FUSED_QK_RMSNORM is None:
+        try:
+            from aiter.ops.fused_qk_norm_rope_cache_quant import (  # noqa: F401
+                _fused_qk_rmsnorm,
+            )
+
+            _AITER_HAS_FUSED_QK_RMSNORM = True
+        except (ImportError, ModuleNotFoundError, AttributeError):
+            try:
+                from aiter.ops.fused_qk_norm_rope_cache_quant import (  # noqa: F401
+                    fused_qk_rmsnorm,
+                )
+
+                _AITER_HAS_FUSED_QK_RMSNORM = True
+            except (ImportError, ModuleNotFoundError, AttributeError):
+                _AITER_HAS_FUSED_QK_RMSNORM = False
+    return _AITER_HAS_FUSED_QK_RMSNORM
 
 
 def _check_aiter_mla_fp8_support() -> bool:
@@ -464,7 +562,7 @@ def _rocm_aiter_mla_decode_fwd_fake(
     pass
 
 
-def _rocm_aiter_gemm_a8w8_impl(
+def _rocm_aiter_w8a8_gemm_impl(
     A: torch.Tensor,
     B: torch.Tensor,
     As: torch.Tensor,
@@ -481,7 +579,7 @@ def _rocm_aiter_gemm_a8w8_impl(
     return gemm_a8w8_CK(A, B, As, Bs, bias, output_dtype)
 
 
-def _rocm_aiter_gemm_a8w8_fake(
+def _rocm_aiter_w8a8_gemm_fake(
     A: torch.Tensor,
     B: torch.Tensor,
     As: torch.Tensor,
@@ -495,6 +593,35 @@ def _rocm_aiter_gemm_a8w8_fake(
     return Y
 
 
+def _rocm_aiter_preshuffled_per_token_w8a8_gemm_impl(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    As: torch.Tensor,
+    Bs: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
+    from aiter import gemm_a8w8_bpreshuffle
+
+    output = gemm_a8w8_bpreshuffle(A, B, As, Bs, None, output_dtype)
+    if bias is not None:
+        output.add_(bias)
+    return output
+
+
+def _rocm_aiter_preshuffled_per_token_w8a8_gemm_fake(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    As: torch.Tensor,
+    Bs: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
+    m = A.shape[0]
+    n = B.shape[0]
+    return torch.empty(m, n, dtype=output_dtype, device=A.device)
+
+
 def _rocm_aiter_triton_gemm_a8w8_blockscale_impl(
     A: torch.Tensor,
     B: torch.Tensor,
@@ -545,58 +672,6 @@ def _rocm_aiter_gemm_a8w8_blockscale_fake(
     return Y
 
 
-def _rocm_aiter_rms_norm_impl(
-    x: torch.Tensor, weight: torch.Tensor, variance_epsilon: float
-) -> torch.Tensor:
-    from aiter import rms_norm
-
-    if x.dim() > 2:
-        x_original_shape = x.shape
-        x = x.reshape(-1, x_original_shape[-1])
-        x = rms_norm(x, weight, variance_epsilon)
-        return x.reshape(x_original_shape)
-
-    return rms_norm(x, weight, variance_epsilon)
-
-
-def _rocm_aiter_rms_norm_fake(
-    x: torch.Tensor, weight: torch.Tensor, variance_epsilon: float
-) -> torch.Tensor:
-    return torch.empty_like(x)
-
-
-def _rocm_aiter_rmsnorm2d_fwd_with_add_impl(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    variance_epsilon: float,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    from aiter import rmsnorm2d_fwd_with_add
-
-    residual_out = torch.empty_like(residual)
-    out = torch.empty_like(x)
-    rmsnorm2d_fwd_with_add(
-        out,  # output
-        x,  # input
-        residual,  # residual input
-        residual_out,  # residual output
-        weight,
-        variance_epsilon,
-    )
-    return out, residual_out
-
-
-def _rocm_aiter_rmsnorm2d_fwd_with_add_fake(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    variance_epsilon: float,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    residual_out = torch.empty_like(residual)
-    out = torch.empty_like(x)
-    return out, residual_out
-
-
 def _rocm_aiter_rmsnorm_fused_add_dynamic_quant_impl(
     x: torch.Tensor,
     residual: torch.Tensor,
@@ -672,6 +747,59 @@ def _rocm_aiter_rmsnorm_fused_dynamic_quant_fake(
     return out, y_scale
 
 
+def _rocm_aiter_fused_allreduce_rmsnorm_impl(
+    input_: torch.Tensor,
+    residual: torch.Tensor,
+    weight: torch.Tensor,
+    epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    aiter_ar = rocm_aiter_ops.get_aiter_allreduce()
+    assert aiter_ar is not None, "aiter allreduce must be initialized"
+
+    total_bytes = input_.numel() * input_.element_size()
+    hidden_dim = input_.shape[-1]
+    token_num = input_.shape[0]
+    if input_.dtype in (torch.bfloat16, torch.float16):
+        pack_size = 16 // input_.element_size()
+        hidden_ok = hidden_dim % pack_size == 0 and hidden_dim // pack_size <= 1024
+    else:
+        hidden_ok = False
+    token_ok = token_num <= 80
+    world_size = aiter_ar.world_size
+    full_nvlink = aiter_ar.fully_connected
+
+    if world_size == 2:
+        size_ok = True
+    elif full_nvlink and world_size <= 4:
+        size_ok = total_bytes < 256 * 1024
+    elif full_nvlink and world_size <= 8:
+        size_ok = total_bytes < 128 * 1024
+    else:
+        size_ok = False
+
+    use_1stage = hidden_ok and token_ok and size_ok
+
+    result = aiter_ar.fused_ar_rms(
+        input_,
+        residual,
+        w=weight,
+        eps=epsilon,
+        registered=torch.cuda.is_current_stream_capturing(),
+        use_1stage=use_1stage,
+    )
+    assert result is not None
+    return result[0], result[1]
+
+
+def _rocm_aiter_fused_allreduce_rmsnorm_fake(
+    input_: torch.Tensor,
+    residual: torch.Tensor,
+    weight: torch.Tensor,
+    epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return torch.empty_like(input_), torch.empty_like(residual)
+
+
 def _rocm_aiter_per_tensor_quant_impl(
     x: torch.Tensor,
     quant_dtype: torch.dtype,
@@ -700,7 +828,7 @@ def _rocm_aiter_per_token_quant_impl(
     assert quant_dtype in [torch.int8, FP8_DTYPE]
 
     out_shape = x.shape
-    out = torch.empty(x.shape, dtype=FP8_DTYPE, device=x.device)
+    out = torch.empty(x.shape, dtype=quant_dtype, device=x.device)
     if scale is None:
         scale = torch.empty((*out_shape[:-1], 1), dtype=torch.float32, device=x.device)
     dynamic_per_token_scaled_quant(
@@ -720,7 +848,7 @@ def _rocm_aiter_per_token_quant_fake(
 ) -> tuple[torch.Tensor, torch.Tensor]:
     out_shape = x.shape
     return (
-        torch.empty(x.shape, dtype=FP8_DTYPE, device=x.device),
+        torch.empty(x.shape, dtype=quant_dtype, device=x.device),
         torch.empty((*out_shape[:-1], 1), dtype=torch.float32, device=x.device),
     )
 
@@ -804,6 +932,50 @@ def _rocm_aiter_rmsnorm_fp8_group_quant_fake(
     )
 
 
+def _rocm_aiter_fused_rms_gated_fp8_group_quant_impl(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    z: torch.Tensor,
+    eps: float,
+    norm_before_gate: bool,
+    activation: str,
+    group_size: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Fused gated-RMSNorm + FP8 group quantization via aiter Triton kernel."""
+    from aiter.ops.triton.quant import fused_rms_gated_fp8_group_quant
+
+    return fused_rms_gated_fp8_group_quant(
+        x,
+        weight,
+        bias,
+        z,
+        eps,
+        norm_before_gate=norm_before_gate,
+        activation=activation,
+        out_dtype=FP8_DTYPE,
+        group_size=group_size,
+    )
+
+
+def _rocm_aiter_fused_rms_gated_fp8_group_quant_fake(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    z: torch.Tensor,
+    eps: float,
+    norm_before_gate: bool,
+    activation: str,
+    group_size: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    M, N = x.shape
+    scale_shape = (M, (N + group_size - 1) // group_size)
+    return (
+        torch.empty_like(x, dtype=FP8_DTYPE, device=x.device),
+        torch.empty(scale_shape, dtype=torch.float32, device=x.device),
+    )
+
+
 def _rocm_aiter_group_fp8_quant_impl(
     x: torch.Tensor,
     group_size: int,
@@ -900,6 +1072,65 @@ def _rocm_aiter_triton_add_rmsnorm_pad_fake(
     return out, residual_out
 
 
+def _fused_mla_dual_rms_norm_impl(
+    x1: torch.Tensor,
+    x1_weight: torch.Tensor,
+    x2: torch.Tensor,
+    x2_weight: torch.Tensor,
+    x1_epsilon: float,
+    x2_epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    try:
+        import aiter.ops.fused_qk_norm_rope_cache_quant as aiter_ops
+    except (ImportError, ModuleNotFoundError, AttributeError) as exc:
+        raise ImportError(
+            "fused_qk_rmsnorm requires AITer >= PR #2442. "
+            "Please upgrade aiter or disable the "
+            "fuse_mla_dual_rms_norm pass."
+        ) from exc
+
+    if hasattr(aiter_ops, "_fused_qk_rmsnorm"):
+        return aiter_ops._fused_qk_rmsnorm(
+            q_out=None,
+            q=x1,
+            q_weight=x1_weight,
+            q_eps=x1_epsilon,
+            k_out=None,
+            k=x2,
+            k_weight=x2_weight,
+            k_eps=x2_epsilon,
+        )
+
+    # TODO(rbrugaro-amd): remove the legacy fused_qk_rmsnorm path once
+    # AITER stabilizes the API (https://github.com/ROCm/aiter/issues/3207).
+    if hasattr(aiter_ops, "fused_qk_rmsnorm"):
+        return aiter_ops.fused_qk_rmsnorm(
+            q=x1,
+            q_weight=x1_weight,
+            q_eps=x1_epsilon,
+            k=x2,
+            k_weight=x2_weight,
+            k_eps=x2_epsilon,
+        )
+
+    raise ImportError(
+        "fused_qk_rmsnorm requires AITer >= PR #2442. "
+        "Please upgrade aiter or disable the "
+        "fuse_mla_dual_rms_norm pass."
+    )
+
+
+def _fused_mla_dual_rms_norm_fake(
+    x1: torch.Tensor,
+    x1_weight: torch.Tensor,
+    x2: torch.Tensor,
+    x2_weight: torch.Tensor,
+    x1_epsilon: float,
+    x2_epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return (torch.empty_like(x1), torch.empty_like(x2))
+
+
 def _rocm_aiter_gemm_a8wfp4_impl(
     x: torch.Tensor,
     w: torch.Tensor,
@@ -1040,10 +1271,9 @@ class rocm_aiter_ops:
 
         # Check if aiter is enabled before using operations
         if rocm_aiter_ops.is_enabled():
-            result = rocm_aiter_ops.rms_norm(x, weight, epsilon)
+            result = rocm_aiter_ops.per_token_quant(x, FP8_DTYPE)
 
     Operations:
-        - RMS normalization: rms_norm, rms_norm2d_with_add
         - GEMM operations: gemm_a8w8, gemm_a8w8_blockscale
         - Fused MoE: fused_moe, asm_moe_tkw1
         - Routing: topk_softmax, biased_grouped_topk, grouped_topk
@@ -1055,7 +1285,6 @@ class rocm_aiter_ops:
     # Check if the env variable is set
     _AITER_ENABLED = envs.VLLM_ROCM_USE_AITER
     _LINEAR_ENABLED = envs.VLLM_ROCM_USE_AITER_LINEAR
-    _RMSNORM_ENABLED = envs.VLLM_ROCM_USE_AITER_RMSNORM
     _FMOE_ENABLED = envs.VLLM_ROCM_USE_AITER_MOE
     _MLA_ENABLED = envs.VLLM_ROCM_USE_AITER_MLA
     _MHA_ENABLED = envs.VLLM_ROCM_USE_AITER_MHA
@@ -1071,6 +1300,12 @@ class rocm_aiter_ops:
     _MOE_SHARED_EXPERTS_ENABLED = envs.VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS
     # TODO: Consolidate under _LINEAR_ENABLED
     _TRITON_UNQUANT_GEMM = envs.VLLM_ROCM_USE_AITER_TRITON_GEMM
+    # Lazily probed: whether aiter.topk_softmax supports the
+    # num_shared_experts / shared_expert_scoring_func args (7-arg form).
+    _TOPK_SOFTMAX_FUSED_SIGMOID: bool | None = None
+
+    _ALL_REDUCE_MAX_SIZE: int = 8192 * 1024 * 8 * 2
+    _CUSTOM_ALL_REDUCE: AiterCustomAllreduceProto | None = None
 
     @classmethod
     def refresh_env_variables(cls):
@@ -1083,7 +1318,6 @@ def refresh_env_variables(cls):
         """
         cls._AITER_ENABLED = envs.VLLM_ROCM_USE_AITER
         cls._LINEAR_ENABLED = envs.VLLM_ROCM_USE_AITER_LINEAR
-        cls._RMSNORM_ENABLED = envs.VLLM_ROCM_USE_AITER_RMSNORM
         cls._FMOE_ENABLED = envs.VLLM_ROCM_USE_AITER_MOE
         cls._MLA_ENABLED = envs.VLLM_ROCM_USE_AITER_MLA
         cls._MHA_ENABLED = envs.VLLM_ROCM_USE_AITER_MHA
@@ -1175,11 +1409,6 @@ def is_linear_enabled(cls) -> bool:
     def is_linear_fp8_enabled(cls) -> bool:
         return cls.is_linear_enabled()
 
-    @classmethod
-    @if_aiter_supported
-    def is_rmsnorm_enabled(cls) -> bool:
-        return cls._AITER_ENABLED and cls._RMSNORM_ENABLED
-
     @classmethod
     @if_aiter_supported
     def is_fused_moe_enabled(cls) -> bool:
@@ -1190,6 +1419,52 @@ def is_fused_moe_enabled(cls) -> bool:
     def is_fusion_moe_shared_experts_enabled(cls) -> bool:
         return cls.is_fused_moe_enabled() and cls._MOE_SHARED_EXPERTS_ENABLED
 
+    @classmethod
+    @if_aiter_supported
+    def topk_softmax_supports_fused_sigmoid(cls) -> bool:
+        """Check if topk_softmax supports fused shared expert activation."""
+        if cls._TOPK_SOFTMAX_FUSED_SIGMOID is None:
+            try:
+                import inspect
+
+                from aiter import topk_softmax
+
+                params = inspect.signature(topk_softmax).parameters
+                if "num_shared_experts" in params:
+                    cls._TOPK_SOFTMAX_FUSED_SIGMOID = True
+                else:
+                    # @compile_ops wrapper loses the original signature.
+                    # Fall back to the torch custom op schema.
+                    import torch
+
+                    schema = getattr(
+                        getattr(torch.ops.aiter, "topk_softmax", None), "default", None
+                    )
+                    schema_str = str(getattr(schema, "_schema", ""))
+                    cls._TOPK_SOFTMAX_FUSED_SIGMOID = "num_shared_experts" in schema_str
+            except (ImportError, ValueError):
+                cls._TOPK_SOFTMAX_FUSED_SIGMOID = False
+        return cls._TOPK_SOFTMAX_FUSED_SIGMOID
+
+    @classmethod
+    @if_aiter_supported
+    def fuse_sigmoid_in_kernel(cls, aiter_topK_meta_data: object) -> bool:
+        """Whether fused shared-expert sigmoid in the topk kernel is usable.
+
+        Combines the cached static capability checks (FSE enabled, fused-moe
+        enabled, topk_softmax supports fused sigmoid) with the runtime
+        readiness check (topK meta-data buffer initialized).
+
+        ``aiter_topK_meta_data`` is accepted as a parameter rather than
+        imported internally so callers cannot hit initialization-order
+        issues where the module-level global has not been set yet.
+        """
+        return (
+            cls.is_fusion_moe_shared_experts_enabled()
+            and cls.topk_softmax_supports_fused_sigmoid()
+            and aiter_topK_meta_data is not None
+        )
+
     @classmethod
     @if_aiter_supported
     def is_mla_enabled(cls) -> bool:
@@ -1239,6 +1514,64 @@ def is_triton_rotary_embed_enabled(cls) -> bool:
     def is_triton_gemm_enabled(cls) -> bool:
         return cls._AITER_ENABLED and cls._TRITON_UNQUANT_GEMM
 
+    @classmethod
+    @if_aiter_supported
+    def is_tgemm_enabled(cls) -> bool:
+        from vllm.platforms.rocm import on_gfx950
+
+        return cls.is_linear_enabled() and on_gfx950()
+
+    @classmethod
+    def initialize_aiter_allreduce(
+        cls, group: ProcessGroup, device: torch.device
+    ) -> None:
+        try:
+            from aiter.dist.device_communicators.custom_all_reduce import (
+                CustomAllreduce as AiterCustomAllreduce,
+            )
+
+            cls._CUSTOM_ALL_REDUCE = AiterCustomAllreduce(group, device)
+        except Exception:
+            cls._CUSTOM_ALL_REDUCE = None
+
+    @classmethod
+    def get_aiter_allreduce(cls) -> AiterCustomAllreduceProto | None:
+        return cls._CUSTOM_ALL_REDUCE
+
+    @classmethod
+    def destroy_aiter_allreduce(cls) -> None:
+        if cls._CUSTOM_ALL_REDUCE is not None:
+            cls._CUSTOM_ALL_REDUCE.close()
+            cls._CUSTOM_ALL_REDUCE = None
+
+    @classmethod
+    def get_aiter_allreduce_max_size(cls) -> int | None:
+        # effective max input size (based on upstream aiter version: v0.1.10.post3)
+        # https://github.com/ROCm/aiter/blob/6a0e7b26ccf33164785531212cc2ec2cde0b9243/aiter/dist/device_communicators/custom_all_reduce.py#L272-L273
+        return int(cls._ALL_REDUCE_MAX_SIZE / 2)
+
+    @classmethod
+    @if_aiter_supported
+    def are_gdn_triton_kernels_available(cls) -> bool:
+        """Check if AITER Triton kernels for GDN attention are importable.
+
+        These are optional Triton kernels (conv1d fast-path, gated delta net)
+        used by GatedDeltaNetAttention's decode fast-path.  They may be absent
+        in older aiter builds.
+        """
+        if not cls._AITER_ENABLED:
+            return False
+        try:
+            import aiter.ops.triton.causal_conv1d_update_single_token  # noqa: F401
+            import aiter.ops.triton.gated_delta_net  # noqa: F401
+            from aiter.ops.triton.quant import (  # noqa: F401
+                fused_rms_gated_fp8_group_quant,
+            )
+
+            return True
+        except (ImportError, ModuleNotFoundError):
+            return False
+
     @staticmethod
     @if_aiter_supported
     def register_ops_once() -> None:
@@ -1309,11 +1642,15 @@ def register_ops_once() -> None:
             )
 
             direct_register_custom_op(
-                op_name="rocm_aiter_gemm_a8w8",
-                op_func=_rocm_aiter_gemm_a8w8_impl,
-                mutates_args=[],
-                fake_impl=_rocm_aiter_gemm_a8w8_fake,
-                dispatch_key=current_platform.dispatch_key,
+                op_name="rocm_aiter_w8a8_gemm",
+                op_func=_rocm_aiter_w8a8_gemm_impl,
+                fake_impl=_rocm_aiter_w8a8_gemm_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="_rocm_aiter_preshuffled_per_token_w8a8_gemm",
+                op_func=_rocm_aiter_preshuffled_per_token_w8a8_gemm_impl,
+                fake_impl=_rocm_aiter_preshuffled_per_token_w8a8_gemm_fake,
             )
 
             direct_register_custom_op(
@@ -1328,19 +1665,6 @@ def register_ops_once() -> None:
                 fake_impl=_rocm_aiter_gemm_a8w8_blockscale_fake,
             )
 
-            direct_register_custom_op(
-                op_name="rocm_aiter_rms_norm",
-                op_func=_rocm_aiter_rms_norm_impl,
-                fake_impl=_rocm_aiter_rms_norm_fake,
-            )
-
-            direct_register_custom_op(
-                op_name="rocm_aiter_rmsnorm2d_fwd_with_add",
-                op_func=_rocm_aiter_rmsnorm2d_fwd_with_add_impl,
-                fake_impl=_rocm_aiter_rmsnorm2d_fwd_with_add_fake,
-                dispatch_key=current_platform.dispatch_key,
-            )
-
             direct_register_custom_op(
                 op_name="rocm_aiter_rmsnorm_fused_dynamic_quant",
                 op_func=_rocm_aiter_rmsnorm_fused_dynamic_quant_impl,
@@ -1361,6 +1685,12 @@ def register_ops_once() -> None:
                 fake_impl=_rocm_aiter_rmsnorm_fp8_group_quant_fake,
             )
 
+            direct_register_custom_op(
+                op_name="rocm_aiter_fused_rms_gated_fp8_group_quant",
+                op_func=_rocm_aiter_fused_rms_gated_fp8_group_quant_impl,
+                fake_impl=_rocm_aiter_fused_rms_gated_fp8_group_quant_fake,
+            )
+
             direct_register_custom_op(
                 op_name="rocm_aiter_rmsnorm_with_add_fp8_group_quant",
                 op_func=_rocm_aiter_rmsnorm_with_add_fp8_group_quant_impl,
@@ -1425,15 +1755,20 @@ def register_ops_once() -> None:
                 fake_impl=_triton_rotary_embedding_fake,
             )
 
-            _OPS_REGISTERED = True
+            direct_register_custom_op(
+                op_name="rocm_aiter_fused_allreduce_rmsnorm",
+                op_func=_rocm_aiter_fused_allreduce_rmsnorm_impl,
+                fake_impl=_rocm_aiter_fused_allreduce_rmsnorm_fake,
+            )
 
-    @staticmethod
-    def get_rmsnorm_fused_add_op() -> OpOverload:
-        return torch.ops.vllm.rocm_aiter_rmsnorm2d_fwd_with_add.default
+            direct_register_custom_op(
+                op_name="fused_mla_dual_rms_norm",
+                op_func=_fused_mla_dual_rms_norm_impl,
+                mutates_args=[],
+                fake_impl=_fused_mla_dual_rms_norm_fake,
+            )
 
-    @staticmethod
-    def get_rmsnorm_op() -> OpOverload:
-        return torch.ops.vllm.rocm_aiter_rms_norm.default
+            _OPS_REGISTERED = True
 
     @staticmethod
     def get_rmsnorm_fused_add_dynamic_quant_op() -> OpOverload:
@@ -1447,6 +1782,11 @@ def get_rmsnorm_fused_dynamic_quant_op() -> OpOverload:
     def get_rmsnorm_group_fused_quant_op() -> OpOverload:
         return torch.ops.vllm.rocm_aiter_rmsnorm_fp8_group_quant.default
 
+    @staticmethod
+    def get_fused_rms_gated_fp8_group_quant_op() -> OpOverload:
+        """Return the fused gated-RMSNorm + FP8 group quant custom op."""
+        return torch.ops.vllm.rocm_aiter_fused_rms_gated_fp8_group_quant.default
+
     @staticmethod
     def get_rmsnorm_group_add_fused_quant_op() -> OpOverload:
         return torch.ops.vllm.rocm_aiter_rmsnorm_with_add_fp8_group_quant.default
@@ -1472,24 +1812,26 @@ def get_triton_rotary_embedding_op() -> OpOverload:
         return torch.ops.vllm.rocm_aiter_triton_rotary_embedding.default
 
     @staticmethod
-    def rms_norm(
-        x: torch.Tensor, weight: torch.Tensor, variance_epsilon: float
-    ) -> torch.Tensor:
-        return torch.ops.vllm.rocm_aiter_rms_norm(x, weight, variance_epsilon)
+    def get_fused_allreduce_rmsnorm_op() -> OpOverload:
+        return torch.ops.vllm.rocm_aiter_fused_allreduce_rmsnorm.default
 
     @staticmethod
-    def rms_norm2d_with_add(
-        x: torch.Tensor,
-        residual: torch.Tensor,
-        weight: torch.Tensor,
-        variance_epsilon: float,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        return torch.ops.vllm.rocm_aiter_rmsnorm2d_fwd_with_add(
-            x, residual, weight, variance_epsilon
-        )
+    def get_fused_mla_dual_rms_norm_op() -> OpOverload:
+        return torch.ops.vllm.fused_mla_dual_rms_norm.default
+
+    @staticmethod
+    def w8a8_gemm(
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+        bias: torch.Tensor | None = None,
+        output_dtype: torch.dtype = torch.float16,
+    ) -> torch.Tensor:
+        return torch.ops.vllm.rocm_aiter_w8a8_gemm(A, B, As, Bs, bias, output_dtype)
 
     @staticmethod
-    def gemm_a8w8(
+    def preshuffled_per_token_w8a8_gemm(
         A: torch.Tensor,
         B: torch.Tensor,
         As: torch.Tensor,
@@ -1497,7 +1839,9 @@ def gemm_a8w8(
         bias: torch.Tensor | None = None,
         output_dtype: torch.dtype = torch.float16,
     ) -> torch.Tensor:
-        return torch.ops.vllm.rocm_aiter_gemm_a8w8(A, B, As, Bs, bias, output_dtype)
+        return torch.ops.vllm._rocm_aiter_preshuffled_per_token_w8a8_gemm(
+            A, B, As, Bs, bias, output_dtype
+        )
 
     @staticmethod
     def triton_gemm_a8w8_blockscale(
@@ -1608,9 +1952,17 @@ def topk_softmax(
         token_expert_indices: torch.Tensor,
         gating_output: torch.Tensor,
         renormalize: bool,
+        num_shared_experts: int = 0,
+        shared_expert_scoring_func: str = "",
     ) -> tuple[torch.Tensor, ...]:
         torch.ops.vllm.rocm_aiter_topk_softmax(
-            topk_weights, topk_indices, token_expert_indices, gating_output, renormalize
+            topk_weights,
+            topk_indices,
+            token_expert_indices,
+            gating_output,
+            renormalize,
+            num_shared_experts,
+            shared_expert_scoring_func,
         )
         return topk_weights, topk_indices
 
@@ -1638,6 +1990,8 @@ def biased_grouped_topk(
         need_renorm: bool,
         routed_scaling_factor: float = 1.0,
     ) -> None:
+        if correction_bias.dtype != gating_output.dtype:
+            correction_bias = correction_bias.to(gating_output.dtype)
         torch.ops.vllm.rocm_aiter_biased_grouped_topk(
             gating_output,
             correction_bias,
@@ -1916,9 +2270,27 @@ def is_triton_gemm_afp4wfp4_presh_ws_tuned(n: int, k: int) -> bool:
             (8192, 3584),
         ]
 
+    @staticmethod
+    def is_shuffled_per_token_w8a8_gemm_tuned(
+        N: int, K: int, q_dtype_w: torch.dtype
+    ) -> bool:
+        import aiter.ops.gemm_op_a8w8 as aiter_gemm_a8w8_ops
+
+        csv_path = (
+            aiter_gemm_a8w8_ops.AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_BPRESHUFFLE_FILE
+        )
+        return _check_kernel_tuned(N, K, q_dtype_w, csv_path)
+
+    @staticmethod
+    def is_per_token_w8a8_gemm_tuned(N: int, K: int, q_dtype_w: torch.dtype) -> bool:
+        import aiter.ops.gemm_op_a8w8 as aiter_gemm_a8w8_ops
+
+        csv_path = aiter_gemm_a8w8_ops.AITER_CONFIGS.AITER_CONFIG_GEMM_A8W8_FILE
+        return _check_kernel_tuned(N, K, q_dtype_w, csv_path)
+
     @staticmethod
     def shuffle_weight(
-        self, tensor: torch.Tensor, layout: tuple[int, int] = (16, 16)
+        tensor: torch.Tensor, layout: tuple[int, int] = (16, 16)
     ) -> torch.Tensor:
         from aiter.ops.shuffle import shuffle_weight
 
@@ -2070,5 +2442,234 @@ def pa_fwd_asm(
             out_=out_,
         )
 
+    @staticmethod
+    def paged_attention_common(
+        Q: torch.Tensor,
+        K: torch.Tensor,
+        V: torch.Tensor,
+        tmp_out: torch.Tensor,
+        max_logits: torch.Tensor,
+        exp_sums: torch.Tensor,
+        max_seq_len: int,
+        block_tables: torch.Tensor,
+        context_lens: torch.Tensor,
+        block_tables_stride0: int,
+        scale: float,
+        K_QScale_hip: torch.Tensor,
+        V_QScale_hip: torch.Tensor,
+        K_QScale_asm: torch.Tensor,
+        V_QScale_asm: torch.Tensor,
+        out_: torch.Tensor,
+        kv_cache_dtype: str,
+    ):
+        """
+        Paged attention common function.
+
+        This function is NOT wrapped with @is_aiter_supported decorator
+        to allow explicit backend selection via attention_config to work
+        even when VLLM_ROCM_USE_AITER=0.
+
+        Note: This performs lazy import of aiter.paged_attention_common
+        """
+        from aiter import paged_attention_common
+
+        return paged_attention_common(
+            Q=Q,
+            K=K,
+            V=V,
+            tmp_out=tmp_out,
+            max_logits=max_logits,
+            exp_sums=exp_sums,
+            max_seq_len=max_seq_len,
+            block_tables=block_tables,
+            context_lens=context_lens,
+            block_tables_stride0=block_tables_stride0,
+            scale=scale,
+            K_QScale_hip=K_QScale_hip,
+            V_QScale_hip=V_QScale_hip,
+            K_QScale_asm=K_QScale_asm,
+            V_QScale_asm=V_QScale_asm,
+            out_=out_,
+            kv_cache_dtype=kv_cache_dtype,
+        )
+
+    @staticmethod
+    def mhc_pre(
+        residual: torch.Tensor,
+        fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_eps: float,
+        hc_pre_eps: float,
+        hc_sinkhorn_eps: float,
+        hc_post_mult_value: float,
+        sinkhorn_repeat: int,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Forward pass for mHC pre block.
+
+        Args:
+            residual: shape (..., hc_mult, hidden_size), dtype torch.bfloat16
+            fn: shape (hc_mult3, hc_mult * hidden_size), dtype torch.float32
+            hc_scale: shape (3,), dtype torch.float32
+            hc_base: shape (hc_mult3,), dtype torch.float32
+            rms_eps: RMS normalization epsilon
+            hc_pre_eps: pre-mix epsilon
+            hc_sinkhorn_eps: sinkhorn epsilon
+            hc_post_mult_value: post-mix multiplier value
+            sinkhorn_repeat: number of sinkhorn iterations
+            n_splits: split-k factor;
+
+        Returns:
+            post_mix: shape (..., hc_mult), dtype torch.float32
+            comb_mix: shape (..., hc_mult, hc_mult), dtype torch.float32
+            layer_input: shape (..., hidden_size), dtype torch.bfloat16
+        """
+        from aiter.ops.mhc import mhc_pre
+
+        # Validate shapes
+        assert residual.dtype == torch.bfloat16
+        assert fn.dtype == torch.float32
+        assert hc_scale.dtype == torch.float32
+        assert hc_base.dtype == torch.float32
+
+        hc_mult = residual.shape[-2]
+        hidden_size = residual.shape[-1]
+        hc_mult2 = hc_mult * hc_mult
+        hc_mult3 = hc_mult * 2 + hc_mult2
+
+        hc_hidden_size = hc_mult * hidden_size
+        assert fn.shape[0] == hc_mult3
+        assert fn.shape[1] == hc_hidden_size
+        assert hc_scale.shape == (3,)
+        assert hc_base.shape == (hc_mult3,)
+
+        outer_shape = residual.shape[:-2]
+
+        residual_flat = residual.view(-1, hc_mult, hidden_size)
+
+        num_tokens = residual_flat.shape[0]
+        if num_tokens == 0:
+            return (
+                torch.empty(
+                    num_tokens,
+                    hc_mult,
+                    1,
+                    dtype=torch.float32,
+                    device=residual_flat.device,
+                ),
+                torch.empty(
+                    num_tokens,
+                    hc_mult,
+                    hc_mult,
+                    dtype=torch.float32,
+                    device=residual_flat.device,
+                ),
+                torch.empty(
+                    num_tokens,
+                    hidden_size,
+                    dtype=torch.bfloat16,
+                    device=residual_flat.device,
+                ),
+            )
+
+        # AITER's Python wrapper allocates intermediate/output tensors without
+        # explicit device arguments, so run it under the residual tensor's device.
+        with torch.device(residual_flat.device):
+            post_mix, comb_mix, layer_input = mhc_pre(
+                residual_flat,
+                fn,
+                hc_scale,
+                hc_base,
+                rms_eps,
+                hc_pre_eps,
+                hc_sinkhorn_eps,
+                hc_post_mult_value,
+                sinkhorn_repeat,
+            )
+        return (
+            post_mix.view(*outer_shape, hc_mult, 1),
+            comb_mix.view(*outer_shape, hc_mult, hc_mult),
+            layer_input.view(*outer_shape, hidden_size),
+        )
+
+    @staticmethod
+    def hc_head(
+        hs_flat: torch.Tensor,
+        fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        out: torch.Tensor,
+        hidden_size: int,
+        rms_eps: float,
+        hc_eps: float,
+        hc_mult: int,
+    ) -> None:
+        """Run hc_head through AITER mhc_pre and write the result to out."""
+        assert hs_flat.dtype == torch.bfloat16
+        assert fn.dtype == torch.float32
+        assert hc_scale.dtype == torch.float32
+        assert hc_base.dtype == torch.float32
+        assert hs_flat.shape[-2:] == (hc_mult, hidden_size)
+        assert fn.shape == (hc_mult, hc_mult * hidden_size)
+        assert hc_scale.shape == (1,)
+        assert hc_base.shape == (hc_mult,)
+
+        num_tokens = hs_flat.shape[0]
+        if num_tokens == 0:
+            return
+
+        hc_mult3 = hc_mult * 2 + hc_mult * hc_mult
+
+        full_fn = torch.zeros(
+            hc_mult3,
+            hc_mult * hidden_size,
+            dtype=fn.dtype,
+            device=fn.device,
+        )
+        full_fn[:hc_mult] = fn
+
+        full_base = torch.zeros(hc_mult3, dtype=hc_base.dtype, device=hc_base.device)
+        full_base[:hc_mult] = hc_base
+
+        full_scale = torch.zeros(3, dtype=hc_scale.dtype, device=hc_scale.device)
+        full_scale[0] = hc_scale[0]
+
+        _, _, layer_input = rocm_aiter_ops.mhc_pre(
+            hs_flat,
+            full_fn,
+            full_scale,
+            full_base,
+            rms_eps,
+            hc_eps,
+            0.0,
+            1.0,
+            0,
+        )
+        out.copy_(layer_input)
+
+    @staticmethod
+    def mhc_post(
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post_layer_mix: torch.Tensor,
+        comb_res_mix: torch.Tensor,
+    ) -> torch.Tensor:
+        from aiter.ops.mhc import mhc_post
+
+        hc_mult = residual.shape[-2]
+        hidden_size = residual.shape[-1]
+        residual_flat = residual.view(-1, hc_mult, hidden_size)
+        num_tokens = residual_flat.shape[0]
+        out = torch.empty_like(residual_flat)
+        mhc_post(
+            out,
+            x.view(num_tokens, hidden_size),
+            residual_flat,
+            post_layer_mix.view(num_tokens, hc_mult, 1),
+            comb_res_mix.view(num_tokens, hc_mult, hc_mult),
+        )
+        return out.view_as(residual)
+
 
 rocm_aiter_ops.register_ops_once()
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 6c9ca07dba9a..08170098700f 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from enum import IntEnum
 from typing import TYPE_CHECKING, Literal
 
 import torch
@@ -68,15 +69,20 @@ def create_fp4_output_tensors(
     n: int,
     device: torch.device,
     is_sf_swizzled_layout: bool,
+    padded_n: int | None = None,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     """
     Allocate both output tensors for scaled_fp4_quant:
     (quantized_output, output_scale).
 
-    Must match the C++ scaled_fp4_quant_func allocation exactly.
+    Must match the C++ scaled_fp4_quant_func allocation exactly when
+    ``padded_n`` is ``None``. When ``padded_n`` is provided, allocate a larger
+    packed-FP4 output/scale buffer so the quantization kernel can write
+    CUTLASS-compatible K padding directly
     """
-    output = torch.empty((m, n // 2), device=device, dtype=torch.uint8)
-    output_scale = create_fp4_scale_tensor(m, n, device, is_sf_swizzled_layout)
+    physical_n = padded_n if padded_n is not None else n
+    output = torch.empty((m, physical_n // 2), device=device, dtype=torch.uint8)
+    output_scale = create_fp4_scale_tensor(m, physical_n, device, is_sf_swizzled_layout)
     return output, output_scale
 
 
@@ -264,127 +270,19 @@ def merge_attn_states(
     suffix_output: torch.Tensor,
     suffix_lse: torch.Tensor,
     output_lse: torch.Tensor | None = None,
+    prefill_tokens_with_context: int | None = None,
+    output_scale: torch.Tensor | None = None,
 ) -> None:
     torch.ops._C.merge_attn_states(
-        output, output_lse, prefix_output, prefix_lse, suffix_output, suffix_lse
-    )
-
-
-def convert_vertical_slash_indexes(
-    q_seqlens: torch.Tensor,  # [BATCH, ]
-    kv_seqlens: torch.Tensor,  # [BATCH, ]
-    vertical_indexes: torch.Tensor,  # [BATCH, N_HEADS, NNZ_V]
-    slash_indexes: torch.Tensor,  # [BATCH, N_HEADS, NNZ_S]
-    context_size: int,
-    block_size_M: int,
-    block_size_N: int,
-    causal: bool = True,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    batch_size = slash_indexes.size(0)
-    num_heads = slash_indexes.size(1)
-    nnz_slash = slash_indexes.size(2)
-    nnz_vertical = vertical_indexes.size(2)
-    num_rows = (context_size + block_size_M - 1) // block_size_M
-
-    block_count = torch.zeros(
-        batch_size, num_heads, num_rows, dtype=q_seqlens.dtype, device=q_seqlens.device
-    )
-    block_offset = torch.zeros(
-        batch_size,
-        num_heads,
-        num_rows,
-        nnz_slash,
-        dtype=q_seqlens.dtype,
-        device=q_seqlens.device,
-    )
-    column_count = torch.zeros(
-        batch_size, num_heads, num_rows, dtype=q_seqlens.dtype, device=q_seqlens.device
-    )
-    column_index = torch.zeros(
-        batch_size,
-        num_heads,
-        num_rows,
-        nnz_vertical,
-        dtype=q_seqlens.dtype,
-        device=q_seqlens.device,
-    )
-
-    torch.ops._C.convert_vertical_slash_indexes(
-        block_count,
-        block_offset,
-        column_count,
-        column_index,
-        q_seqlens,
-        kv_seqlens,
-        vertical_indexes,
-        slash_indexes,
-        context_size,
-        block_size_M,
-        block_size_N,
-        causal,
-    )
-    return block_count, block_offset, column_count, column_index
-
-
-def convert_vertical_slash_indexes_mergehead(
-    q_seqlens: torch.Tensor,  # [BATCH, ]
-    kv_seqlens: torch.Tensor,  # [BATCH, ]
-    vertical_indexes: torch.Tensor,  # [BATCH, N_HEADS, NNZ_V]
-    slash_indexes: torch.Tensor,  # [BATCH, N_HEADS, NNZ_S]
-    # [N_HEADS] : different head use different number of indices
-    vertical_indices_count: torch.Tensor,
-    slash_indices_count: torch.Tensor,
-    context_size: int,
-    block_size_M: int,
-    block_size_N: int,
-    causal: bool = True,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    batch_size = slash_indexes.size(0)
-    num_heads = slash_indexes.size(1)
-    nnz_slash = slash_indexes.size(2)
-    nnz_vertical = vertical_indexes.size(2)
-    num_rows = (context_size + block_size_M - 1) // block_size_M
-
-    block_count = torch.empty(
-        batch_size, num_heads, num_rows, dtype=q_seqlens.dtype, device=q_seqlens.device
-    )
-    block_offset = torch.empty(
-        batch_size,
-        num_heads,
-        num_rows,
-        nnz_slash,
-        dtype=q_seqlens.dtype,
-        device=q_seqlens.device,
-    )
-    column_count = torch.empty(
-        batch_size, num_heads, num_rows, dtype=q_seqlens.dtype, device=q_seqlens.device
-    )
-    column_index = torch.empty(
-        batch_size,
-        num_heads,
-        num_rows,
-        nnz_vertical,
-        dtype=q_seqlens.dtype,
-        device=q_seqlens.device,
-    )
-
-    torch.ops._C.convert_vertical_slash_indexes_mergehead(
-        block_count,
-        block_offset,
-        column_count,
-        column_index,
-        q_seqlens,
-        kv_seqlens,
-        vertical_indexes,
-        slash_indexes,
-        vertical_indices_count,
-        slash_indices_count,
-        context_size,
-        block_size_M,
-        block_size_N,
-        causal,
+        output,
+        output_lse,
+        prefix_output,
+        prefix_lse,
+        suffix_output,
+        suffix_lse,
+        prefill_tokens_with_context,
+        output_scale,
     )
-    return block_count, block_offset, column_count, column_index
 
 
 # pos encoding ops
@@ -395,10 +293,24 @@ def rotary_embedding(
     head_size: int,
     cos_sin_cache: torch.Tensor,
     is_neox: bool,
+    rope_dim_offset: int = 0,
+    inverse: bool = False,
 ) -> None:
-    torch.ops._C.rotary_embedding(
-        positions, query, key, head_size, cos_sin_cache, is_neox
-    )
+    if rope_dim_offset == 0 and not inverse:
+        torch.ops._C.rotary_embedding(
+            positions, query, key, head_size, cos_sin_cache, is_neox
+        )
+    else:
+        torch.ops._C.rotary_embedding(
+            positions,
+            query,
+            key,
+            head_size,
+            cos_sin_cache,
+            is_neox,
+            rope_dim_offset,
+            inverse,
+        )
 
 
 # layer norm ops
@@ -411,6 +323,7 @@ def rms_norm(
 def fused_add_rms_norm(
     input: torch.Tensor, residual: torch.Tensor, weight: torch.Tensor, epsilon: float
 ) -> None:
+    # Note: this func is batch invariant
     torch.ops._C.fused_add_rms_norm(input, residual, weight, epsilon)
 
 
@@ -426,6 +339,7 @@ def fused_qk_norm_rope(
     cos_sin_cache: torch.Tensor,
     is_neox: bool,
     position_ids: torch.Tensor,
+    forced_token_heads_per_warp: int = -1,
 ) -> None:
     torch.ops._C.fused_qk_norm_rope(
         qkv,
@@ -439,6 +353,7 @@ def fused_qk_norm_rope(
         cos_sin_cache,
         is_neox,
         position_ids,
+        forced_token_heads_per_warp,
     )
 
 
@@ -572,6 +487,56 @@ def rms_norm_per_block_quant(
     return output, scales
 
 
+# fused silu_and_mul + block quant
+def silu_and_mul_per_block_quant(
+    input: torch.Tensor,
+    group_size: int,  # Changed from list[int]
+    quant_dtype: torch.dtype,
+    scale_ub: torch.Tensor | None = None,
+    is_scale_transposed: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    assert input.ndim == 2, f"input must be 2D [batch, hidden*2], got {input.shape}"
+    assert input.shape[-1] % 2 == 0, (
+        f"input last dim must be even (gate||up layout), got {input.shape[-1]}"
+    )
+
+    # Output is half the width of input (after silu_and_mul)
+    num_tokens = input.shape[0]
+    hidden_size = input.shape[-1] // 2  # Divide by 2 because input is [gate || up]
+
+    # Allocate output tensor (FP8 or INT8)
+    output = torch.empty(
+        (num_tokens, hidden_size), device=input.device, dtype=quant_dtype
+    )
+
+    # Allocate scales tensor
+    num_groups = hidden_size // group_size  # Directly use group_size
+    if is_scale_transposed:
+        scales = torch.empty(
+            (num_groups, num_tokens),
+            device=input.device,
+            dtype=torch.float32,
+        ).t()
+    else:
+        scales = torch.empty(
+            (num_tokens, num_groups),
+            device=input.device,
+            dtype=torch.float32,
+        )
+
+    # Call the C++ kernel
+    torch.ops._C.silu_and_mul_per_block_quant(
+        output,
+        input,
+        scales,
+        group_size,  # Pass directly as int
+        scale_ub,
+        is_scale_transposed,
+    )
+
+    return output, scales
+
+
 # quantization ops
 # awq
 def awq_dequantize(
@@ -1089,6 +1054,38 @@ def cutlass_fp4_moe_mm(
     )
 
 
+def cutlass_mxfp4_moe_mm(
+    out_tensors: torch.Tensor,
+    a_tensors: torch.Tensor,
+    b_tensors: torch.Tensor,
+    a_scales: torch.Tensor,
+    b_scales: torch.Tensor,
+    problem_sizes: torch.Tensor,
+    expert_offsets: torch.Tensor,
+    sf_offsets: torch.Tensor,
+):
+    """
+    An MXFP4 Blockscaled Group Gemm for MoE (MXFP4 x MXFP4).
+
+    Uses mx_float4_t types with E8M0 scale factors and 32-element blocks.
+    - a/b_tensors: MXFP4 packed activations/weights (uint8, 2 E2M1 per byte)
+    - a_/b_scales: E8M0 blockscales (uint8, stored in swizzled layout)
+    - Epilogue uses scalar alpha=1, beta=0 inside the CUDA op (no global scales).
+    - expert_offsets/sf_offsets: expert boundary indices
+    - problem_sizes: (num_experts, 3) with (M, N, K) per expert
+    """
+    return torch.ops._C.cutlass_mxfp4_group_mm(
+        out_tensors,
+        a_tensors,
+        b_tensors,
+        a_scales,
+        b_scales,
+        problem_sizes,
+        expert_offsets,
+        sf_offsets,
+    )
+
+
 def mxfp8_experts_quant(
     input_tensor: torch.Tensor,
     problem_sizes: torch.Tensor,
@@ -1605,6 +1602,7 @@ def scaled_fp4_quant(
     input_global_scale: torch.Tensor,
     is_sf_swizzled_layout: bool = True,
     backend: str = "none",
+    padded_n: int | None = None,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     """
     Quantize input tensor to FP4 and return quantized tensor and scale.
@@ -1619,6 +1617,8 @@ def scaled_fp4_quant(
         input: The input tensor to be quantized to FP4
         input_global_scale: A scalar scaling factor for the entire tensor.
         use_8x4_sf_layout: Whether to use the 8x4 or 128x4 layout for the scaling
+        padded_n: Optional padded K dimension. When provided, the quantized
+            output and scale tensors are allocated for ``padded_n``
 
     Returns:
         tuple[torch.Tensor, torch.Tensor]: The output tensor in FP4 but every
@@ -1636,9 +1636,16 @@ def scaled_fp4_quant(
     assert input.dtype in (torch.float16, torch.bfloat16), (
         f"input.dtype needs to be fp16 or bf16 but got {input.dtype}."
     )
+    if padded_n is not None:
+        assert padded_n >= n, f"padded_n must be >= n, got padded_n={padded_n}, n={n}."
+        assert padded_n % block_size == 0, (
+            f"padded_n has to be a multiple of {block_size}, but got {padded_n}."
+        )
 
     use_8x4_sf_layout = True if "trtllm" in backend and m <= 32 else False  # noqa: SIM210
-
+    if use_8x4_sf_layout and padded_n is not None and padded_n != n:
+        # TODO: support this case
+        raise ValueError("padded_n is not supported with TRTLLM 8x4 scale layout.")
     if use_8x4_sf_layout:
         output, output_scale = flashinfer_quant_nvfp4_8x4_sf_layout(
             input, input_global_scale
@@ -1646,7 +1653,11 @@ def scaled_fp4_quant(
     else:
         # Pre-allocate and call .out variant (same behavior as old in-place API)
         output, output_scale = create_fp4_output_tensors(
-            m, n, input.device, is_sf_swizzled_layout
+            m,
+            n,
+            input.device,
+            is_sf_swizzled_layout,
+            padded_n=padded_n,
         )
         torch.ops._C.scaled_fp4_quant.out(
             input,
@@ -1787,6 +1798,109 @@ def silu_and_mul_scaled_fp4_experts_quant(
     return output, output_scales
 
 
+def mxfp4_experts_quant(
+    input_tensor: torch.Tensor,
+    expert_offsets: torch.Tensor,
+    blockscale_offsets: torch.Tensor,
+    n_experts: int,
+    topk: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Quantize input tensor to MXFP4 for packed MoE inputs.
+    Uses 32-element blocks with E8M0 (power-of-two) scale factors.
+    MXFP4 has no global scale - only block-level E8M0 scale factors.
+
+    Args:
+        input_tensor: [m_topk, k] BF16/FP16 activations
+        expert_offsets: [n_experts+1] token boundaries per expert
+        blockscale_offsets: [n_experts+1] SF row boundaries per expert
+        n_experts: number of experts
+        topk: number of top-k experts
+    Returns:
+        output: [m_topk, k//2] packed E2M1 values (uint8)
+        output_scales: E8M0 blockscales in swizzled layout (uint8 view)
+    """
+    assert not current_platform.is_rocm()
+    assert input_tensor.ndim == 2
+
+    MAX_TOKENS_PER_EXPERT = envs.VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE
+    m_numtopk, k = input_tensor.shape
+
+    assert m_numtopk <= MAX_TOKENS_PER_EXPERT * topk, (
+        f"m_numtopk must be less than MAX_TOKENS_PER_EXPERT("
+        f"{MAX_TOKENS_PER_EXPERT})"
+        f" for cutlass_moe_mxfp4, observed m_numtopk = {m_numtopk}. Use"
+        f" VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE to set this value."
+    )
+    scales_k = k // 32
+    padded_k = (scales_k + (4 - 1)) // 4
+
+    output = torch.empty(
+        m_numtopk, k // 2, device=input_tensor.device, dtype=torch.uint8
+    )
+    output_scales = torch.empty(
+        MAX_TOKENS_PER_EXPERT * topk,
+        padded_k,
+        dtype=torch.int32,
+        device=input_tensor.device,
+    )
+    torch.ops._C.mxfp4_experts_quant(
+        output,
+        output_scales,
+        input_tensor,
+        expert_offsets,
+        blockscale_offsets,
+        n_experts,
+    )
+    # E8M0 SFs are stored as uint8
+    output_scales = output_scales.view(torch.uint8)
+    return output, output_scales
+
+
+def silu_and_mul_mxfp4_experts_quant(
+    input_tensor: torch.Tensor,
+    expert_offsets: torch.Tensor,
+    blockscale_offsets: torch.Tensor,
+    n_experts: int,
+    topk: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Fused SiLU+Mul+MXFP4 quantization for MoE intermediate activations.
+    MXFP4 has no global scale - only block-level E8M0 scale factors.
+    """
+    assert not current_platform.is_rocm()
+    assert input_tensor.ndim == 2
+
+    MAX_TOKENS_PER_EXPERT = envs.VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE
+    m_numtopk, k_times_2 = input_tensor.shape
+    assert k_times_2 % 2 == 0, "input width must be even (gate || up layout)"
+    k = k_times_2 // 2
+
+    assert m_numtopk <= MAX_TOKENS_PER_EXPERT * topk
+    scales_k = k // 32
+    padded_k = (scales_k + (4 - 1)) // 4
+
+    output = torch.empty(
+        m_numtopk, k // 2, device=input_tensor.device, dtype=torch.uint8
+    )
+    output_scales = torch.empty(
+        MAX_TOKENS_PER_EXPERT * topk,
+        padded_k,
+        dtype=torch.int32,
+        device=input_tensor.device,
+    )
+    torch.ops._C.silu_and_mul_mxfp4_experts_quant(
+        output,
+        output_scales,
+        input_tensor,
+        expert_offsets,
+        blockscale_offsets,
+        n_experts,
+    )
+    output_scales = output_scales.view(torch.uint8)
+    return output, output_scales
+
+
 # fp8
 def scaled_fp8_quant(
     input: torch.Tensor,
@@ -2062,7 +2176,7 @@ def selective_scan_fwd(
     cache_indices: torch.Tensor | None,
     has_initial_state: torch.Tensor | None,
     ssm_states: torch.Tensor,
-    pad_slot_id: int,
+    null_block_id: int,
     block_size: int = 1024,
     block_idx_first_scheduled_token: torch.Tensor | None = None,
     block_idx_last_scheduled_token: torch.Tensor | None = None,
@@ -2084,7 +2198,7 @@ def selective_scan_fwd(
         cache_indices,
         has_initial_state,
         ssm_states,
-        pad_slot_id,
+        null_block_id,
         block_size,
         block_idx_first_scheduled_token,
         block_idx_last_scheduled_token,
@@ -2238,23 +2352,6 @@ def moe_wna16_gemm(
     )
 
 
-def router_gemm_bf16_fp32(input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
-    """bf16 x bf16 -> fp32 GEMM via cuBLAS. weight shape: (N, K)."""
-    return torch.ops._moe_C.router_gemm_bf16_fp32(input, weight)
-
-
-if hasattr(torch.ops, "_moe_C") and hasattr(torch.ops._moe_C, "router_gemm_bf16_fp32"):
-
-    @register_fake("_moe_C::router_gemm_bf16_fp32")
-    def router_gemm_bf16_fp32_fake(
-        input: torch.Tensor,
-        weight: torch.Tensor,
-    ) -> torch.Tensor:
-        return torch.empty(
-            input.shape[0], weight.shape[0], dtype=torch.float32, device=input.device
-        )
-
-
 def dsv3_router_gemm(
     hidden_states: torch.Tensor,
     router_weight: torch.Tensor,
@@ -2270,19 +2367,72 @@ def dsv3_router_gemm(
     return output
 
 
-def gpt_oss_router_gemm(
-    hidden_states: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
+def dsv4_norm_router_gemm(
+    x: torch.Tensor,
+    norm_weight: torch.Tensor,
+    gate_weight: torch.Tensor,
+    eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Fused RMSNorm + router GEMV for DeepSeek V4.
+
+    Returns ``(normed_x, router_logits)`` where
+        normed_x[m,k]      = x[m,k] * rsqrt(mean(x[m]^2) + eps) * norm_weight[k]
+        router_logits[m,n] = sum_k(normed_x[m,k] * gate_weight[n,k])
+
+    DSV4-specific constraints (caller must check before dispatching here):
+      - x, norm_weight, gate_weight all bf16 contiguous
+      - x.shape == [num_tokens, 7168] with num_tokens in [1, 16]
+      - gate_weight.shape == [num_experts, 7168] with num_experts in {256, 384}
+      - SM 9.x or 10.x device
+
+    Logits output is fp32 (hard-coded by DSV4 router).
+    """
+    num_tokens, hidden = x.shape
+    num_experts = gate_weight.shape[0]
+    normed_x = torch.empty_like(x)
+    logits = torch.empty(num_tokens, num_experts, device=x.device, dtype=torch.float32)
+    torch.ops._moe_C.dsv4_norm_router_gemm(
+        logits, normed_x, x, norm_weight, gate_weight, float(eps)
+    )
+    return normed_x, logits
+
+
+def fp32_router_gemm(
+    hidden_states: torch.Tensor,
+    router_weight: torch.Tensor,
 ) -> torch.Tensor:
     output = torch.empty(
         hidden_states.shape[0],
-        weight.shape[0],
+        router_weight.shape[0],
         device=hidden_states.device,
-        dtype=hidden_states.dtype,
+        dtype=torch.float32,
     )
-    torch.ops._moe_C.gpt_oss_router_gemm(output, hidden_states, weight, bias)
+    torch.ops._moe_C.fp32_router_gemm(output, hidden_states, router_weight)
     return output
 
 
+if hasattr(torch.ops, "_moe_C") and hasattr(torch.ops._moe_C, "fp32_router_gemm"):
+
+    @register_fake("_moe_C::fp32_router_gemm")
+    def fp32_router_gemm_fake(
+        output: torch.Tensor,
+        mat_a: torch.Tensor,
+        mat_b: torch.Tensor,
+    ) -> None:
+        return
+
+
+if hasattr(torch.ops, "_moe_C") and hasattr(torch.ops._moe_C, "fp32_router_gemm"):
+
+    @register_fake("_moe_C::fp32_router_gemm")
+    def fp32_router_gemm_fake(
+        output: torch.Tensor,
+        mat_a: torch.Tensor,
+        mat_b: torch.Tensor,
+    ) -> None:
+        return
+
+
 def topk_softmax(
     topk_weights: torch.Tensor,
     topk_ids: torch.Tensor,
@@ -2290,6 +2440,7 @@ def topk_softmax(
     gating_output: torch.Tensor,
     renormalize: bool = False,
     e_score_correction_bias: torch.Tensor | None = None,
+    enable_pdl: bool = False,
 ) -> None:
     torch.ops._moe_C.topk_softmax(
         topk_weights,
@@ -2298,6 +2449,7 @@ def topk_softmax(
         gating_output,
         renormalize,
         e_score_correction_bias,
+        enable_pdl,
     )
 
 
@@ -2308,6 +2460,7 @@ def topk_sigmoid(
     gating_output: torch.Tensor,
     renormalize: bool = False,
     e_score_correction_bias: torch.Tensor | None = None,
+    enable_pdl: bool = False,
 ) -> None:
     torch.ops._moe_C.topk_sigmoid(
         topk_weights,
@@ -2316,6 +2469,31 @@ def topk_sigmoid(
         gating_output,
         renormalize,
         e_score_correction_bias,
+        enable_pdl,
+    )
+
+
+def topk_hash_softplus_sqrt(
+    topk_weights: torch.Tensor,
+    topk_indices: torch.Tensor,
+    token_expert_indices: torch.Tensor,
+    gating_output: torch.Tensor,
+    renormalize: bool = False,
+    routed_scaling_factor: float = 1.0,
+    e_score_correction_bias: torch.Tensor | None = None,
+    input_tokens: torch.Tensor | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+) -> None:
+    torch.ops._moe_C.topk_softplus_sqrt(
+        topk_weights,
+        topk_indices,
+        token_expert_indices,
+        gating_output,
+        renormalize,
+        routed_scaling_factor,
+        e_score_correction_bias,
+        input_tokens,
+        hash_indices_table,
     )
 
 
@@ -2422,32 +2600,7 @@ def moe_wna16_marlin_gemm(
     )
 
 
-if hasattr(torch.ops, "_moe_C") and hasattr(torch.ops._moe_C, "marlin_gemm_moe"):
-
-    @register_fake("_moe_C::marlin_gemm_moe")
-    def marlin_gemm_moe_fake(
-        a: torch.Tensor,
-        b_q_weights: torch.Tensor,
-        sorted_ids: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        b_scales: torch.Tensor,
-        b_zero_points: torch.Tensor,
-        g_idx: torch.Tensor,
-        perm: torch.Tensor,
-        workspace: torch.Tensor,
-        b_q_type: ScalarType,
-        size_m: torch.SymInt,
-        size_n: torch.SymInt,
-        size_k: torch.SymInt,
-        is_k_full: bool,
-        num_experts: int,
-        topk: int,
-        moe_block_size: int,
-        replicate_input: bool,
-        apply_weights: bool,
-    ) -> torch.Tensor:
-        return torch.empty((size_m, topk, size_n), dtype=a.dtype, device=a.device)
+if hasattr(torch.ops, "_moe_C") and hasattr(torch.ops._moe_C, "moe_wna16_marlin_gemm"):
 
     @register_fake("_moe_C::moe_wna16_marlin_gemm")
     def moe_wna16_marlin_gemm_fake(
@@ -2595,6 +2748,31 @@ def swap_blocks(
     torch.ops._C_cache_ops.swap_blocks(src, dst, block_size_in_bytes, block_mapping)
 
 
+def swap_blocks_batch(
+    src_ptrs: torch.Tensor,
+    dst_ptrs: torch.Tensor,
+    sizes: torch.Tensor,
+    is_src_access_order_any: bool = False,
+) -> None:
+    """
+    Batch version of swap_blocks: submit all copies in a single driver call.
+
+    Each entry specifies a raw pointer copy: src_ptrs[i] -> dst_ptrs[i]
+    of sizes[i] bytes. All three tensors must be int64 CPU tensors.
+    On CUDA 12.8+ this uses cuMemcpyBatchAsync for minimal submission
+    overhead; on older CUDA it falls back to a loop of cudaMemcpyAsync.
+
+    is_src_access_order_any: if True, pass CU_MEMCPY_SRC_ACCESS_ORDER_ANY to
+        cuMemcpyBatchAsync, letting the DMA engine prefetch source bytes
+        out of stream order. Only safe when no GPU stream is concurrently
+        writing to the source. Defaults to False (STREAM ordering), which
+        is always safe.
+    """
+    torch.ops._C_cache_ops.swap_blocks_batch(
+        src_ptrs, dst_ptrs, sizes, is_src_access_order_any
+    )
+
+
 def convert_fp8(
     output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
 ) -> None:
@@ -2688,6 +2866,50 @@ def indexer_k_quant_and_cache(
     )
 
 
+def top_k_per_row_prefill(
+    logits: torch.Tensor,
+    cu_seqlen_ks: torch.Tensor,
+    cu_seqlen_ke: torch.Tensor,
+    raw_topk_indices: torch.Tensor,
+    num_rows: int,
+    stride0: int,
+    stride1: int,
+    topk_tokens: int,
+) -> None:
+    torch.ops._C.top_k_per_row_prefill(
+        logits,
+        cu_seqlen_ks,
+        cu_seqlen_ke,
+        raw_topk_indices,
+        num_rows,
+        stride0,
+        stride1,
+        topk_tokens,
+    )
+
+
+def top_k_per_row_decode(
+    logits: torch.Tensor,
+    next_n: int,
+    seq_lens: torch.Tensor,
+    raw_topk_indices: torch.Tensor,
+    num_rows: int,
+    stride0: int,
+    stride1: int,
+    topk_tokens: int,
+) -> None:
+    torch.ops._C.top_k_per_row_decode(
+        logits,
+        next_n,
+        seq_lens,
+        raw_topk_indices,
+        num_rows,
+        stride0,
+        stride1,
+        topk_tokens,
+    )
+
+
 def cp_gather_indexer_k_quant_cache(
     kv_cache: torch.Tensor,
     dst_k: torch.Tensor,
@@ -2929,6 +3151,14 @@ def weight_packed_linear_fake(
         )
 
 
+class CPUQuantMethod(IntEnum):
+    UNQUANT = 0
+    INT8_W8A8 = 1
+    FP8_W8A16 = 2
+    INT4_W4A8 = 3
+    MXFP4 = 4
+
+
 if hasattr(torch.ops._C, "fused_experts_cpu"):
 
     @register_fake("_C::fused_experts_cpu")
@@ -2939,18 +3169,61 @@ def fused_experts_cpu_fake(
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
         inplace: bool,
-        use_int8_w8a8: bool,
-        use_fp8_w8a16: bool,
+        moe_comp_method: CPUQuantMethod,
         w1_scale: torch.Tensor | None,
         w2_scale: torch.Tensor | None,
+        w1_zero: torch.Tensor | None,
+        w2_zero: torch.Tensor | None,
         block_size: list[int] | None,
-        a1_scale: torch.Tensor | None,
-        a2_scale: torch.Tensor | None,
+        w1_bias: torch.Tensor | None,
+        w2_bias: torch.Tensor | None,
+        alpha: float | None,
+        limit: float | None,
         is_vnni: bool,
     ) -> torch.Tensor:
         return torch.empty_like(hidden_states)
 
 
+def fused_experts_cpu(
+    hidden_states: torch.Tensor,
+    w1: torch.Tensor,
+    w2: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    inplace: bool,
+    moe_comp_method: CPUQuantMethod,
+    w1_scale: torch.Tensor | None,
+    w2_scale: torch.Tensor | None,
+    w1_zero: torch.Tensor | None,
+    w2_zero: torch.Tensor | None,
+    block_size: list[int] | None,
+    w1_bias: torch.Tensor | None = None,
+    w2_bias: torch.Tensor | None = None,
+    alpha: float | None = None,
+    limit: float | None = None,
+    is_vnni: bool = True,
+) -> torch.Tensor:
+    return torch.ops._C.fused_experts_cpu(
+        hidden_states,
+        w1,
+        w2,
+        topk_weights,
+        topk_ids,
+        inplace,
+        moe_comp_method,
+        w1_scale,
+        w2_scale,
+        w1_zero,
+        w2_zero,
+        block_size,
+        w1_bias,
+        w2_bias,
+        alpha,
+        limit,
+        is_vnni,
+    )
+
+
 if hasattr(torch.ops._C, "int8_scaled_mm_with_quant"):
 
     @register_fake("_C::int8_scaled_mm_with_quant")
@@ -2967,6 +3240,239 @@ def int8_scaled_mm_with_quant_fake(
         return torch.empty((M, N), dtype=out_dtype)
 
 
+class CPUQuantAlgo(IntEnum):
+    AWQ = 0
+    GPTQ = 1
+
+
+if hasattr(torch.ops._C, "convert_weight_packed_scale_zp"):
+
+    @register_fake("_C::convert_weight_packed_scale_zp")
+    def convert_weight_packed_scale_zp_fake(
+        qweight: torch.Tensor,
+        qzeros: torch.Tensor,
+        scales: torch.Tensor,
+        quant_method_4bit: CPUQuantAlgo,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        return (
+            torch.empty_like(qweight),
+            torch.empty_like(qzeros),
+            torch.empty_like(scales),
+        )
+
+
+def convert_weight_packed_scale_zp(
+    qweight: torch.Tensor,
+    qzeros: torch.Tensor,
+    scales: torch.Tensor,
+    quant_method_4bit: CPUQuantAlgo,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    return torch.ops._C.convert_weight_packed_scale_zp(
+        qweight,
+        qzeros,
+        scales,
+        quant_method_4bit,
+    )
+
+
+if hasattr(torch.ops._C, "int4_scaled_mm_cpu"):
+
+    @register_fake("_C::int4_scaled_mm_cpu")
+    def int4_scaled_mm_cpu_fake(
+        x: torch.Tensor,
+        w: torch.Tensor,
+        w_zeros: torch.Tensor,
+        w_scales: torch.Tensor,
+        bias: torch.Tensor | None,
+    ) -> torch.Tensor:
+        N = w_scales.size(0) * w_scales.size(-1)
+        return torch.empty((x.size(0), N), dtype=x.dtype, device=x.device)
+
+
+def int4_scaled_mm_cpu(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    w_zeros: torch.Tensor,
+    w_scales: torch.Tensor,
+    bias: torch.Tensor | None,
+) -> torch.Tensor:
+    x_shape = x.shape
+    x_2d = x.reshape(-1, x_shape[-1]) if len(x_shape) > 2 else x
+
+    out = torch.ops._C.int4_scaled_mm_cpu(
+        x_2d,
+        w,
+        w_zeros,
+        w_scales,
+        bias,
+    )
+    out = out.reshape(x_shape[:-1] + (out.size(-1),)) if len(x_shape) > 2 else out
+    return out
+
+
+if hasattr(torch.ops._C, "fp8_scaled_mm_cpu"):
+
+    @register_fake("_C::fp8_scaled_mm_cpu")
+    def fp8_scaled_mm_cpu_fake(
+        mat1: torch.Tensor,
+        mat2: torch.Tensor,
+        scales2: torch.Tensor,
+        block_size: list[int],
+        bias: torch.Tensor | None,
+        out_dtype: torch.dtype,
+        is_vnni: bool,
+    ) -> torch.Tensor:
+        M = mat1.size(0)
+        N = mat2.size(0)
+        return torch.empty((M, N), dtype=out_dtype, device=mat1.device)
+
+
+_supports_cpu_fp8_w8a16 = bool(hasattr(torch.ops._C, "fp8_scaled_mm_cpu"))
+
+
+def fp8_scaled_mm_cpu(
+    mat1: torch.Tensor,
+    mat2: torch.Tensor,
+    scales2: torch.Tensor,
+    block_size: list[int],
+    bias: torch.Tensor | None,
+    out_dtype: torch.dtype,
+    is_vnni: bool,
+) -> torch.Tensor:
+    return torch.ops._C.fp8_scaled_mm_cpu(
+        mat1, mat2, scales2, block_size, bias, out_dtype, is_vnni
+    )
+
+
+def chunk_gated_delta_rule_cpu(
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    g: torch.Tensor,
+    beta: torch.Tensor,
+    initial_state: torch.Tensor,
+    output_final_state: bool,
+    cu_seqlens: torch.Tensor,
+    head_first: bool,
+    use_qk_l2norm_in_kernel: bool,
+    eps: float = 1e-5,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return torch.ops._C.chunk_gated_delta_rule_cpu(
+        query,
+        key,
+        value,
+        g,
+        beta,
+        initial_state,
+        output_final_state,
+        cu_seqlens,
+        head_first,
+        use_qk_l2norm_in_kernel,
+        eps,
+    )
+
+
+def fused_sigmoid_gating_delta_rule_update_cpu(
+    A_log: torch.Tensor,
+    dt_bias: torch.Tensor,
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    initial_state_source: torch.Tensor,
+    initial_state_indices: torch.Tensor,
+    cu_seqlens: torch.Tensor,
+    use_qk_l2norm_in_kernel: bool,
+    softplus_beta: float = 1.0,
+    softplus_threshold: float = 20.0,
+) -> torch.Tensor:
+    return torch.ops._C.fused_sigmoid_gating_delta_rule_update_cpu(
+        A_log,
+        dt_bias,
+        q,
+        k,
+        v,
+        a,
+        b,
+        initial_state_source,
+        initial_state_indices,
+        cu_seqlens,
+        use_qk_l2norm_in_kernel,
+        softplus_beta,
+        softplus_threshold,
+    )
+
+
+def fused_gdn_gating_cpu(
+    A_log: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    dt_bias: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return torch.ops._C.fused_gdn_gating_cpu(
+        A_log,
+        a,
+        b,
+        dt_bias,
+    )
+
+
+def causal_conv1d_weight_pack(
+    weight: torch.Tensor,
+) -> torch.Tensor:
+    return torch.ops._C.causal_conv1d_weight_pack(
+        weight,
+    )
+
+
+def causal_conv1d_fwd_cpu(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    conv_states: torch.Tensor | None,
+    query_start_loc: torch.Tensor | None,
+    cache_indices: torch.Tensor | None,
+    has_initial_state: torch.Tensor | None,
+    silu_activation: bool,
+    is_vnni: bool,
+) -> torch.Tensor:
+    return torch.ops._C.causal_conv1d_fwd_cpu(
+        x,
+        weight,
+        bias,
+        conv_states,
+        query_start_loc,
+        cache_indices,
+        has_initial_state,
+        silu_activation,
+        -1,
+        is_vnni,
+    )
+
+
+def causal_conv1d_update_cpu(
+    x: torch.Tensor,
+    conv_states: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    silu_activation: bool,
+    conv_state_indices: torch.Tensor | None,
+    is_vnni: bool,
+) -> torch.Tensor:
+    return torch.ops._C.causal_conv1d_update_cpu(
+        x,
+        conv_states,
+        weight,
+        bias,
+        silu_activation,
+        None,
+        conv_state_indices,
+        -1,
+        is_vnni,
+    )
+
+
 class CPUDNNLGEMMHandler:
     def __init__(self) -> None:
         self.handler_tensor: torch.Tensor | None = None
@@ -3133,6 +3639,9 @@ def cpu_attn_reshape_and_cache(
     value_cache: torch.Tensor,
     slot_mapping: torch.Tensor,
     isa: str,
+    k_scale: float = 1.0,
+    v_scale: float = 1.0,
+    kv_cache_dtype: str = "auto",
 ) -> None:
     torch.ops._C.cpu_attn_reshape_and_cache(
         key,
@@ -3141,6 +3650,9 @@ def cpu_attn_reshape_and_cache(
         value_cache,
         slot_mapping,
         isa,
+        k_scale,
+        v_scale,
+        kv_cache_dtype,
     )
 
 
@@ -3159,6 +3671,9 @@ def cpu_attention_with_kv_cache(
     softcap: float,
     scheduler_metadata: torch.Tensor,
     s_aux: torch.Tensor | None,
+    k_scale: float = 1.0,
+    v_scale: float = 1.0,
+    kv_cache_dtype: str = "auto",
 ) -> None:
     torch.ops._C.cpu_attention_with_kv_cache(
         query,
@@ -3176,6 +3691,9 @@ def cpu_attention_with_kv_cache(
         softcap,
         scheduler_metadata,
         s_aux,
+        k_scale,
+        v_scale,
+        kv_cache_dtype,
     )
 
 
@@ -3204,6 +3722,12 @@ def cpu_gemm_wna16(
     return output
 
 
+def cpu_activation_lut_bf16(input: torch.Tensor, activation: str) -> torch.Tensor:
+    out = torch.empty_like(input)
+    torch.ops._C.activation_lut_bf16(out, input, activation)
+    return out
+
+
 def cpu_prepack_moe_weight(
     weight: torch.Tensor,
     isa: str,
@@ -3397,3 +3921,38 @@ def hadacore_transform(x: torch.Tensor, inplace: bool = True) -> torch.Tensor:
     @register_fake("_C::hadacore_transform")
     def _hadacore_transform_fake(x: torch.Tensor, inplace: bool) -> torch.Tensor:
         return torch.empty_like(x) if not inplace else x
+
+
+if hasattr(torch.ops._C, "minimax_allreduce_rms"):
+
+    @register_fake("_C::minimax_allreduce_rms")
+    def _minimax_allreduce_rms_fake(
+        input: torch.Tensor,
+        norm_weight: torch.Tensor,
+        workspace: torch.Tensor,
+        rank: int,
+        nranks: int,
+        eps: float,
+    ) -> torch.Tensor:
+        return torch.empty_like(input)
+
+
+if hasattr(torch.ops._C, "minimax_allreduce_rms_qk"):
+
+    @register_fake("_C::minimax_allreduce_rms_qk")
+    def _minimax_allreduce_rms_qk_fake(
+        qkv: torch.Tensor,
+        norm_weight_q: torch.Tensor,
+        norm_weight_k: torch.Tensor,
+        workspace: torch.Tensor,
+        q_size: int,
+        kv_size: int,
+        rank: int,
+        nranks: int,
+        eps: float,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        token_num = qkv.shape[0]
+        return (
+            torch.empty([token_num, q_size], dtype=qkv.dtype, device=qkv.device),
+            torch.empty([token_num, kv_size], dtype=qkv.dtype, device=qkv.device),
+        )
diff --git a/vllm/_oink_ops.py b/vllm/_oink_ops.py
deleted file mode 100644
index c7a055410b71..000000000000
--- a/vllm/_oink_ops.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Small helper wrappers for external Oink Blackwell custom ops.
-
-vLLM does not depend on the external Oink repository/package. When an external
-plugin registers torch.library.custom_op entrypoints under the `oink::`
-namespace (e.g. via vLLM's general_plugins mechanism) and
-`VLLM_USE_OINK_OPS=1` is set, vLLM can route eligible calls to those ops.
-
-This module provides:
-- A single place to probe Oink op availability at module init time
-  (outside torch.compile tracing), and
-- Thin wrappers around the torch.ops entrypoints for use in CUDA fast paths,
-  without introducing graph breaks.
-
-Important:
-  Do not call the availability helpers in a compiled region. They may call
-  functions decorated with `torch._dynamo.disable` to safely check
-  conditions that should not be traced.
-"""
-
-from __future__ import annotations
-
-from collections.abc import Callable
-
-import torch
-
-try:
-    from torch._dynamo import disable as _dynamo_disable  # type: ignore[attr-defined]
-except Exception:  # pragma: no cover
-
-    def _dynamo_disable(fn: Callable):  # type: ignore[misc]
-        return fn
-
-
-def _has_oink_op(op_name: str) -> bool:
-    """Check if a specific oink op is registered."""
-    return hasattr(torch.ops, "oink") and hasattr(torch.ops.oink, op_name)
-
-
-@_dynamo_disable
-def is_oink_available_for_device(device_index: int) -> bool:
-    """Return True if Oink ops are registered and device is SM100+.
-
-    This function is intended to be called during module initialization
-    (e.g., in RMSNorm.__init__), not in the forward path.
-
-    External plugins are expected to gate registration on SM100+ and
-    VLLM_USE_OINK_OPS=1, so if the ops are present they should be usable.
-    """
-    if not torch.cuda.is_available():
-        return False
-
-    try:
-        major, minor = torch.cuda.get_device_capability(device_index)
-        sm = 10 * major + minor
-        if sm < 100:
-            return False
-    except Exception:
-        return False
-
-    return _has_oink_op("rmsnorm")
-
-
-def has_fused_add_rms_norm() -> bool:
-    """Return True if the in-place fused op is registered."""
-    return _has_oink_op("fused_add_rms_norm")
-
-
-def rmsnorm(x: torch.Tensor, weight: torch.Tensor, eps: float) -> torch.Tensor:
-    """Call `torch.ops.oink.rmsnorm`.
-
-    This wrapper is safe to call in torch.compile regions.
-    """
-    return torch.ops.oink.rmsnorm(x, weight, eps)
-
-
-def fused_add_rms_norm_(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float,
-) -> None:
-    """Call `torch.ops.oink.fused_add_rms_norm` (mutates x and residual)."""
-    torch.ops.oink.fused_add_rms_norm(x, residual, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """Convenience wrapper returning (x, residual) after in-place mutation."""
-    fused_add_rms_norm_(x, residual, weight, eps)
-    return x, residual
diff --git a/vllm/_tilelang_ops.py b/vllm/_tilelang_ops.py
new file mode 100644
index 000000000000..aa742fe50320
--- /dev/null
+++ b/vllm/_tilelang_ops.py
@@ -0,0 +1,627 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import math
+from functools import cache
+from typing import TYPE_CHECKING
+
+import torch
+
+from vllm.platforms import current_platform
+from vllm.utils.import_utils import has_tilelang
+from vllm.utils.math_utils import cdiv
+
+# tilelang is only available on CUDA platforms
+if TYPE_CHECKING or current_platform.is_cuda():
+    if not has_tilelang():
+        raise ImportError(
+            "tilelang is required for mhc but is not installed. Install it with "
+            "`pip install tilelang`."
+        )
+    import tilelang
+    import tilelang.language as T
+else:
+    tilelang = None  # type: ignore[assignment]
+    T = None  # type: ignore[assignment]
+
+
+@cache
+def compute_num_split(block_k: int, k: int | None, grid_size: int) -> int:
+    device_props = torch.cuda.get_device_properties(0)
+    n_sms = device_props.multi_processor_count
+    split_k = n_sms // grid_size
+    if k is not None:
+        # avoid split_k for small k
+        num_block_k = cdiv(k, block_k)
+        split_k = min(split_k, num_block_k // 4)
+    split_k = max(split_k, 1)
+    return split_k
+
+
+@tilelang.jit(
+    pass_configs={
+        tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
+        tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True,
+        tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10,
+    },
+)
+def mhc_pre_big_fuse_tilelang(
+    gemm_out_mul,
+    gemm_out_sqrsum,
+    hc_scale,
+    hc_base,
+    residual,
+    post_mix,
+    comb_mix,
+    layer_input,
+    hidden_size: int,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 16,
+    hc_mult: int = 4,
+):
+    """Deeply fused kernels, everything other than gemm & sqrsum in mHC pre block."""
+    num_tokens = T.dynamic("num_tokens")
+    hc_mult3 = hc_mult * (2 + hc_mult)
+    hidden_block = math.gcd(512, hidden_size)
+
+    gemm_out_mul: T.Tensor[[n_splits, num_tokens, hc_mult3], T.float32]  # type: ignore[no-redef, valid-type]
+    gemm_out_sqrsum: T.Tensor[[n_splits, num_tokens], T.float32]  # type: ignore[no-redef, valid-type]
+    hc_scale: T.Tensor[[3], T.float32]  # type: ignore[no-redef, valid-type]
+    hc_base: T.Tensor[[hc_mult3], T.float32]  # type: ignore[no-redef, valid-type]
+    residual: T.Tensor[[num_tokens, hc_mult, hidden_size], T.bfloat16]  # type: ignore[no-redef, valid-type]
+    # outputs
+    post_mix: T.Tensor[[num_tokens, hc_mult], T.float32]  # type: ignore[no-redef, valid-type]
+    comb_mix: T.Tensor[[num_tokens, hc_mult * hc_mult], T.float32]  # type: ignore[no-redef, valid-type]
+    layer_input: T.Tensor[[num_tokens, hidden_size], T.bfloat16]  # type: ignore[no-redef, valid-type]
+
+    with T.Kernel(num_tokens, threads=96) as i:
+        T.pdl_sync()
+        ##################################################################
+        # _pre_norm_fn_fwd_norm
+        rms = T.alloc_fragment(1, T.float32)
+        mixes = T.alloc_fragment(hc_mult3, T.float32)
+        T.clear(mixes)
+        rms[0] = 0
+        for i_split in T.serial(n_splits):
+            rms[0] += gemm_out_sqrsum[i_split, i]
+        rms[0] = T.rsqrt(rms[0] / (hc_mult * hidden_size) + rms_eps)
+        for j in T.Parallel(hc_mult3):
+            mixes[j] = 0
+            for i_split in T.serial(n_splits):
+                mixes[j] += gemm_out_mul[i_split, i, j]
+            mixes[j] *= rms[0]
+        mixes_shared = T.alloc_shared(hc_mult3, T.float32)
+        T.copy(mixes, mixes_shared)
+
+        if T.get_thread_binding() < 32:
+            ##################################################################
+            # _pre_split_mixes_fwd (post & comb)
+            cm = T.alloc_fragment((hc_mult, hc_mult), T.float32)
+            for j in T.Parallel(hc_mult):
+                post_mix[i, j] = (
+                    T.sigmoid(
+                        mixes_shared[j + hc_mult] * hc_scale[1] + hc_base[j + hc_mult]
+                    )
+                    * hc_post_mult_value
+                )
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = (
+                    mixes_shared[j * hc_mult + k + hc_mult * 2] * hc_scale[2]
+                    + hc_base[j * hc_mult + k + hc_mult * 2]
+                )
+
+            ##################################################################
+            # _sinkhorn_fwd
+            row_sum = T.alloc_fragment(hc_mult, T.float32)
+            col_sum = T.alloc_fragment(hc_mult, T.float32)
+
+            # comb = comb.softmax(-1) + eps
+            row_max = T.alloc_fragment(hc_mult, T.float32)
+            T.reduce_max(cm, row_max, dim=1)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = T.exp(cm[j, k] - row_max[j])
+            T.reduce_sum(cm, row_sum, dim=1)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = cm[j, k] / row_sum[j] + hc_sinkhorn_eps
+
+            # comb = comb / (comb.sum(-2) + eps)
+            T.reduce_sum(cm, col_sum, dim=0)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = cm[j, k] / (col_sum[k] + hc_sinkhorn_eps)
+
+            for _ in T.serial(sinkhorn_repeat - 1):
+                # comb = comb / (comb.sum(-1) + eps)
+                T.reduce_sum(cm, row_sum, dim=1)
+                for j, k in T.Parallel(hc_mult, hc_mult):
+                    cm[j, k] = cm[j, k] / (row_sum[j] + hc_sinkhorn_eps)
+
+                # comb = comb / (comb.sum(-2) + eps)
+                T.reduce_sum(cm, col_sum, dim=0)
+                for j, k in T.Parallel(hc_mult, hc_mult):
+                    cm[j, k] = cm[j, k] / (col_sum[k] + hc_sinkhorn_eps)
+
+            # save comb_mix to global memory
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                comb_mix[i, j * hc_mult + k] = cm[j, k]
+        else:
+            ##################################################################
+            # _pre_split_mixes_fwd (pre)
+            pre_mix_shared = T.alloc_shared(hc_mult, T.float32)
+            for j in T.Parallel(hc_mult):
+                pre_mix_shared[j] = (
+                    T.sigmoid(
+                        mixes_shared[j] * hc_scale[0] + hc_base[j],
+                    )
+                    + hc_pre_eps
+                )
+            ###################################################################
+            # _pre_apply_mix_fwd
+            for i0_h in T.Pipelined(hidden_size // hidden_block, num_stages=2):
+                xs = T.alloc_shared((hc_mult, hidden_block), T.float32)
+                xl = T.alloc_fragment((hc_mult, hidden_block), T.float32)
+                T.copy(residual[i, 0, i0_h * hidden_block], xs)
+                T.copy(xs, xl)
+
+                ol = T.alloc_fragment(hidden_block, T.float32)
+                T.clear(ol)
+
+                for i_hc in T.serial(hc_mult):
+                    pre = pre_mix_shared[i_hc]
+                    for i1_h in T.Parallel(hidden_block):
+                        ol[i1_h] += pre * xl[i_hc, i1_h]
+
+                T.copy(ol, layer_input[i, i0_h * hidden_block])
+        T.pdl_trigger()
+
+
+# Copied from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/layers/mhc.py#L478
+
+
+@tilelang.jit(
+    pass_configs={
+        tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
+        tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True,
+        tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10,
+    },
+)
+def mhc_pre_big_fuse_with_norm_tilelang(
+    gemm_out_mul,
+    gemm_out_sqrsum,
+    hc_scale,
+    hc_base,
+    residual,
+    post_mix,
+    comb_mix,
+    layer_input,
+    norm_weight,
+    hidden_size: int,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    norm_eps: float,
+    n_splits: int = 16,
+    hc_mult: int = 4,
+    gemm_last_dim: int = -1,
+):
+    num_tokens = T.dynamic("num_tokens")
+    hc_mult3 = hc_mult * (2 + hc_mult)
+    if gemm_last_dim < 0:
+        gemm_last_dim = hc_mult3
+    hidden_block = math.gcd(1024, hidden_size)
+
+    gemm_out_mul: T.Tensor[[n_splits, num_tokens, gemm_last_dim], T.float32]  # type: ignore[no-redef, valid-type]
+    gemm_out_sqrsum: T.Tensor[[n_splits, num_tokens], T.float32]  # type: ignore[no-redef, valid-type]
+    hc_scale: T.Tensor[[3], T.float32]  # type: ignore[no-redef, valid-type]
+    hc_base: T.Tensor[[hc_mult3], T.float32]  # type: ignore[no-redef, valid-type]
+    residual: T.Tensor[[num_tokens, hc_mult, hidden_size], T.bfloat16]  # type: ignore[no-redef, valid-type]
+    post_mix: T.Tensor[[num_tokens, hc_mult], T.float32]  # type: ignore[no-redef, valid-type]
+    comb_mix: T.Tensor[[num_tokens, hc_mult * hc_mult], T.float32]  # type: ignore[no-redef, valid-type]
+    layer_input: T.Tensor[[num_tokens, hidden_size], T.bfloat16]  # type: ignore[no-redef, valid-type]
+    norm_weight: T.Tensor[[hidden_size], T.bfloat16]  # type: ignore[no-redef, valid-type]
+
+    with T.Kernel(num_tokens, threads=96) as i:
+        rms = T.alloc_fragment(1, T.float32)
+        mixes = T.alloc_fragment(hc_mult3, T.float32)
+        T.clear(mixes)
+        rms[0] = 0
+
+        T.pdl_sync()
+
+        for i_split in T.serial(n_splits):
+            rms[0] += gemm_out_sqrsum[i_split, i]
+        rms[0] = T.rsqrt(rms[0] / (hc_mult * hidden_size) + rms_eps)
+        for j in T.Parallel(hc_mult3):
+            mixes[j] = 0
+            for i_split in T.serial(n_splits):
+                mixes[j] += gemm_out_mul[i_split, i, j]
+            mixes[j] *= rms[0]
+        mixes_shared = T.alloc_shared(hc_mult3, T.float32)
+        T.copy(mixes, mixes_shared)
+
+        if T.get_thread_binding() < 32:
+            cm = T.alloc_fragment((hc_mult, hc_mult), T.float32)
+            for j in T.Parallel(hc_mult):
+                post_mix[i, j] = (
+                    T.sigmoid(
+                        mixes_shared[j + hc_mult] * hc_scale[1] + hc_base[j + hc_mult]
+                    )
+                    * hc_post_mult_value
+                )
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = (
+                    mixes_shared[j * hc_mult + k + hc_mult * 2] * hc_scale[2]
+                    + hc_base[j * hc_mult + k + hc_mult * 2]
+                )
+
+            row_sum = T.alloc_fragment(hc_mult, T.float32)
+            col_sum = T.alloc_fragment(hc_mult, T.float32)
+
+            row_max = T.alloc_fragment(hc_mult, T.float32)
+            T.reduce_max(cm, row_max, dim=1)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = T.exp(cm[j, k] - row_max[j])
+            T.reduce_sum(cm, row_sum, dim=1)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = cm[j, k] / row_sum[j] + hc_sinkhorn_eps
+
+            T.reduce_sum(cm, col_sum, dim=0)
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                cm[j, k] = cm[j, k] / (col_sum[k] + hc_sinkhorn_eps)
+
+            for _ in T.serial(sinkhorn_repeat - 1):
+                T.reduce_sum(cm, row_sum, dim=1)
+                for j, k in T.Parallel(hc_mult, hc_mult):
+                    cm[j, k] = cm[j, k] / (row_sum[j] + hc_sinkhorn_eps)
+
+                T.reduce_sum(cm, col_sum, dim=0)
+                for j, k in T.Parallel(hc_mult, hc_mult):
+                    cm[j, k] = cm[j, k] / (col_sum[k] + hc_sinkhorn_eps)
+
+            for j, k in T.Parallel(hc_mult, hc_mult):
+                comb_mix[i, j * hc_mult + k] = cm[j, k]
+        else:
+            pre_mix_shared = T.alloc_shared(hc_mult, T.float32)
+            for j in T.Parallel(hc_mult):
+                pre_mix_shared[j] = (
+                    T.sigmoid(
+                        mixes_shared[j] * hc_scale[0] + hc_base[j],
+                    )
+                    + hc_pre_eps
+                )
+
+            # Pass 1: stash unnormalized weighted-sum output in shared memory
+            # as bf16 (matches the rounding that RMSNorm would see) while
+            # accumulating the per-position squared sum.
+            output_shared = T.alloc_shared(hidden_size, T.bfloat16)
+            sumsq_per_pos = T.alloc_fragment(hidden_block, T.float32)
+            T.clear(sumsq_per_pos)
+
+            for i0_h in T.Pipelined(hidden_size // hidden_block, num_stages=3):
+                xs = T.alloc_shared((hc_mult, hidden_block), T.bfloat16)
+                xl = T.alloc_fragment((hc_mult, hidden_block), T.float32)
+                T.copy(residual[i, 0, i0_h * hidden_block], xs)
+                T.copy(xs, xl)
+
+                ol = T.alloc_fragment(hidden_block, T.float32)
+                T.clear(ol)
+
+                for i_hc in T.serial(hc_mult):
+                    pre = pre_mix_shared[i_hc]
+                    for i1_h in T.Parallel(hidden_block):
+                        ol[i1_h] += pre * xl[i_hc, i1_h]
+
+                for i1_h in T.Parallel(hidden_block):
+                    sumsq_per_pos[i1_h] += ol[i1_h] * ol[i1_h]
+                    output_shared[i0_h * hidden_block + i1_h] = T.bfloat16(ol[i1_h])
+
+            sumsq = T.alloc_fragment(1, T.float32)
+            T.reduce_sum(sumsq_per_pos, sumsq, dim=0)
+            rsqrt_norm = T.alloc_fragment(1, T.float32)
+            rsqrt_norm[0] = T.rsqrt(sumsq[0] / hidden_size + norm_eps)
+
+            # Pass 2: scale by rsqrt * norm_weight and write the result to HBM.
+            for i0_h in T.Pipelined(hidden_size // hidden_block, num_stages=2):
+                w_shared = T.alloc_shared(hidden_block, T.bfloat16)
+                w_local = T.alloc_fragment(hidden_block, T.float32)
+                T.copy(norm_weight[i0_h * hidden_block], w_shared)
+                T.copy(w_shared, w_local)
+
+                ol = T.alloc_fragment(hidden_block, T.float32)
+                for i1_h in T.Parallel(hidden_block):
+                    ol[i1_h] = (
+                        output_shared[i0_h * hidden_block + i1_h]
+                        * rsqrt_norm[0]
+                        * w_local[i1_h]
+                    )
+
+                T.copy(ol, layer_input[i, i0_h * hidden_block])
+
+        T.pdl_trigger()
+
+
+@tilelang.jit(
+    pass_configs={
+        tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
+        tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True,
+        tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10,
+    },
+)
+def mhc_fused_tilelang(
+    comb_mix,
+    residual_in,
+    post_mix,
+    x_in,
+    weight_t,
+    yp_out,
+    rp_out,
+    residual_out,
+    hc: int,
+    hidden: int,
+    n_out: int,
+    n_thr: int = 256,
+    h_blk: int = 256,
+    tile_n: int = 1,
+    split_k: int = 1,
+) -> tilelang.JITKernel:
+    """Fused mhc post-mapping + pre-norm GEMM FMA"""
+    m = T.dynamic("num_tokens")
+    split_k = T.dynamic("split_k")
+    h = hidden
+    h_blk = math.gcd(hidden, h_blk)
+    h_per_split = h // split_k
+    n_tiles = n_out // tile_n
+
+    comb_mix: T.Tensor((m, hc, hc), T.float32)  # type: ignore[no-redef, valid-type]
+    residual_in: T.Tensor((m, hc, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+    post_mix: T.Tensor((m, hc), T.float32)  # type: ignore[no-redef, valid-type]
+    x_in: T.Tensor((m, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+    weight_t: T.Tensor((n_out, hc, h), T.float32)  # type: ignore[no-redef, valid-type]
+    yp_out: T.Tensor((split_k, m, n_out), T.float32)  # type: ignore[no-redef, valid-type]
+    rp_out: T.Tensor((split_k, m), T.float32)  # type: ignore[no-redef, valid-type]
+    residual_out: T.Tensor((m, hc, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+
+    h_iters = h_per_split // n_thr
+    num_warps = n_thr // 32
+
+    with T.Kernel(m, n_tiles, split_k, threads=n_thr) as (i_n, i_nt, i_ks):
+        tid = T.get_thread_binding()
+        warp_id = T.get_warp_idx()
+        lane = T.get_lane_idx()
+
+        s_warp = T.alloc_shared((num_warps, tile_n + 1), T.float32)
+        s_post = T.alloc_shared((hc,), T.float32)
+        s_comb = T.alloc_shared((hc, hc), T.float32)
+
+        pm = T.alloc_local((hc,), T.float32)
+        cm = T.alloc_local((hc, hc), T.float32)
+        acc = T.alloc_local((tile_n,), T.float32)
+        sqr = T.alloc_local((1,), T.float32)
+        new_r = T.alloc_local((hc,), T.float32)
+
+        T.clear(acc)
+        T.clear(sqr)
+        h_split_start = i_ks * h_per_split
+
+        T.pdl_sync()
+
+        T.copy(post_mix[i_n, 0], s_post)
+        T.copy(comb_mix[i_n, 0, 0], s_comb)
+
+        for j in T.unroll(hc):
+            pm[j] = s_post[j]
+        for j in T.unroll(hc):
+            for k in T.unroll(hc):
+                cm[k, j] = s_comb[k, j]
+
+        # Each thread owns h_iters elements of the k-split's h slice.
+        for it in T.serial(h_iters):
+            h_idx = h_split_start + it * n_thr + tid
+
+            # Compute new residual from layer output and past residual
+            for j in T.unroll(hc):
+                new_r[j] = pm[j] * x_in[i_n, h_idx]
+                for k in T.unroll(hc):
+                    new_r[j] += cm[k, j] * residual_in[i_n, k, h_idx]
+
+            # populate residual_out and compute sqr sum
+            if i_nt == 0:
+                for j in T.unroll(hc):
+                    residual_out[i_n, j, h_idx] = new_r[j]
+                    sqr[0] += new_r[j] * new_r[j]
+
+            # Per-thread FMA into acc[n]
+            for n in T.unroll(tile_n):
+                for j in T.unroll(hc):
+                    acc[n] += weight_t[i_nt * tile_n + n, j, h_idx] * new_r[j]
+
+        for n in T.unroll(tile_n):
+            acc[n] = T.warp_reduce_sum(acc[n])
+        if i_nt == 0:
+            sqr[0] = T.warp_reduce_sum(sqr[0])
+
+        # Cross-warp reduce via shared mem
+        if lane == 0:
+            for n in T.unroll(tile_n):
+                s_warp[warp_id, n] = acc[n]
+            if i_nt == 0:
+                s_warp[warp_id, tile_n] = sqr[0]
+        T.sync_threads()
+
+        # Warp 0 does the final cross-warp sum and writes outputs
+        if warp_id == 0:
+            if lane < tile_n:
+                v = T.alloc_var(T.float32, init=0.0)
+                for w in T.unroll(num_warps):
+                    v += s_warp[w, lane]
+                yp_out[i_ks, i_n, i_nt * tile_n + lane] = v
+
+            if i_nt == 0 and lane == 0:
+                v2 = T.alloc_var(T.float32, init=0.0)
+                for w in T.unroll(num_warps):
+                    v2 += s_warp[w, tile_n]
+                rp_out[i_ks, i_n] = v2
+
+        T.pdl_trigger()
+
+
+@tilelang.jit(
+    pass_configs={
+        tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
+        tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True,
+        tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10,
+    },
+)
+def mhc_post_tilelang(
+    a,
+    b,
+    c,
+    d,
+    x,
+    hc: int,
+    hidden: int,
+    n_thr: int = 128,
+    h_blk: int = 1024,
+) -> tilelang.JITKernel:
+    # rename for shorter code
+    n = T.dynamic("num_tokens")
+    h = hidden
+
+    h_blk = math.gcd(hidden, h_blk)
+    a: T.Tensor((n, hc, hc), T.float32)  # type: ignore[no-redef, valid-type]
+    b: T.Tensor((n, hc, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+    c: T.Tensor((n, hc), T.float32)  # type: ignore[no-redef, valid-type]
+    d: T.Tensor((n, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+    x: T.Tensor((n, hc, h), T.bfloat16)  # type: ignore[no-redef, valid-type]
+    with T.Kernel(n, threads=n_thr) as i_n:
+        b_shared = T.alloc_shared((hc, h_blk), T.bfloat16)
+        d_shared = T.alloc_shared(h_blk, T.bfloat16)
+
+        x_local = T.alloc_fragment((hc, h_blk), T.float32)
+        b_local = T.alloc_fragment((hc, h_blk), T.float32)
+        d_local = T.alloc_fragment(h_blk, T.float32)
+
+        a_local = T.alloc_fragment((hc, hc), T.float32)
+        c_local = T.alloc_fragment(hc, T.float32)
+        T.pdl_sync()
+        T.copy(a[i_n, 0, 0], a_local)
+        T.copy(c[i_n, 0], c_local)
+
+        for i0_h in T.Serial(T.ceildiv(h, h_blk)):
+            T.copy(b[i_n, 0, i0_h * h_blk], b_shared)
+            T.copy(d[i_n, i0_h * h_blk], d_shared)
+
+            T.copy(b_shared, b_local)
+            T.copy(d_shared, d_local)
+            for i_hco, i1_h in T.Parallel(hc, h_blk):
+                x_local[i_hco, i1_h] = c_local[i_hco] * d_local[i1_h]
+                for i_hci in T.vectorized(hc):
+                    x_local[i_hco, i1_h] += a_local[i_hci, i_hco] * b_local[i_hci, i1_h]
+
+            T.copy(x_local, x[i_n, 0, i0_h * h_blk])
+        T.pdl_trigger()
+
+
+@tilelang.jit(
+    pass_configs={
+        tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
+        tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True,
+        tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10,
+    },
+)
+def hc_head_fuse_tilelang(
+    residual,
+    fn,
+    hc_scale,
+    hc_base,
+    out,
+    hidden_size: int,
+    rms_eps: float,
+    hc_eps: float,
+    hc_mult: int = 4,
+    n_thr: int = 128,
+    h_blk: int = 1024,
+):
+    """Two-pass fused kernel for hc_head.
+
+    Pass 1: accumulate per-token squared sum and hc_mult dot-products
+            (projections onto fn rows) using cross-thread reducers.
+    Pass 2: apply sigmoid-gated weighted sum of residual channels to output.
+
+    Avoids materialising mixes / rsqrt / pre tensors to global memory.
+    """
+    num_tokens = T.dynamic("num_tokens")
+    hc_dim = hc_mult * hidden_size
+    h_block = math.gcd(h_blk, hidden_size)
+    n_h = hidden_size // h_block
+
+    residual: T.Tensor[[num_tokens, hc_mult, hidden_size], T.bfloat16]  # type: ignore[no-redef,valid-type]
+    fn: T.Tensor[[hc_mult, hc_dim], T.float32]  # type: ignore[no-redef,valid-type]
+    hc_scale: T.Tensor[[1], T.float32]  # type: ignore[no-redef,valid-type]
+    hc_base: T.Tensor[[hc_mult], T.float32]  # type: ignore[no-redef,valid-type]
+    out: T.Tensor[[num_tokens, hidden_size], T.bfloat16]  # type: ignore[no-redef,valid-type]
+
+    with T.Kernel(num_tokens, threads=n_thr) as i:
+        T.pdl_sync()
+
+        # ------------------------------------------------------------------
+        # Pass 1 – for each residual channel m_c and h_block:
+        #   • accumulate squared sum (for RMS norm denominator)
+        #   • accumulate hc_mult dot-products with fn rows
+        # ------------------------------------------------------------------
+        sqrsum_r = T.alloc_reducer((1,), T.float32, replication="all")
+        mixes_r = T.alloc_reducer((hc_mult,), T.float32, replication="all")
+        T.fill(sqrsum_r, 0.0)
+        T.fill(mixes_r, 0.0)
+
+        for m_c in T.serial(hc_mult):
+            for i_h in T.serial(n_h):
+                x_local = T.alloc_fragment(h_block, T.float32)
+                T.copy(residual[i, m_c, i_h * h_block], x_local)
+
+                for k in T.Parallel(h_block):
+                    sqrsum_r[0] += x_local[k] * x_local[k]
+
+                for m_m in T.unroll(hc_mult):
+                    fn_local = T.alloc_fragment(h_block, T.float32)
+                    T.copy(fn[m_m, m_c * hidden_size + i_h * h_block], fn_local)
+                    for k in T.Parallel(h_block):
+                        mixes_r[m_m] += x_local[k] * fn_local[k]
+
+        T.finalize_reducer(sqrsum_r)
+        T.finalize_reducer(mixes_r)
+
+        # ------------------------------------------------------------------
+        # Compute pre_mix = sigmoid(mix * rsqrt * scale + base) + eps
+        # ------------------------------------------------------------------
+        pre_mix_shared = T.alloc_shared(hc_mult, T.float32)
+        rsqrt_val = T.alloc_fragment(1, T.float32)
+        rsqrt_val[0] = T.rsqrt(sqrsum_r[0] / hc_dim + rms_eps)
+        for m in T.Parallel(hc_mult):
+            pre_mix_shared[m] = (
+                T.sigmoid(mixes_r[m] * rsqrt_val[0] * hc_scale[0] + hc_base[m]) + hc_eps
+            )
+
+        # ------------------------------------------------------------------
+        # Pass 2 – apply_mix: pipelined weighted sum over residual channels
+        # ------------------------------------------------------------------
+        for i0_h in T.Pipelined(n_h, num_stages=2):
+            xs = T.alloc_shared((hc_mult, h_block), T.bfloat16)
+            xl = T.alloc_fragment((hc_mult, h_block), T.float32)
+            T.copy(residual[i, 0, i0_h * h_block], xs, disable_tma=True)
+            T.copy(xs, xl)
+
+            ol = T.alloc_fragment(h_block, T.float32)
+            T.clear(ol)
+            for i_hc in T.serial(hc_mult):
+                pre = pre_mix_shared[i_hc]
+                for i1_h in T.Parallel(h_block):
+                    ol[i1_h] += pre * xl[i_hc, i1_h]
+
+            T.copy(ol, out[i, i0_h * h_block], disable_tma=True)
+
+        T.pdl_trigger()
diff --git a/vllm/_xpu_ops.py b/vllm/_xpu_ops.py
index 376375550b1d..233c8fb632fd 100644
--- a/vllm/_xpu_ops.py
+++ b/vllm/_xpu_ops.py
@@ -22,6 +22,23 @@ def register_fake(fn):
     except ImportError:
         from torch.library import impl_abstract as register_fake
 
+if hasattr(torch.ops._xpu_C, "fp8_gemm"):
+
+    @register_fake("_xpu_C::fp8_gemm")
+    def _fp8_gemm_fake(
+        q_input: torch.Tensor,
+        q_weight: torch.Tensor,
+        out_dtype: torch.dtype,
+        input_scales: torch.Tensor,
+        weight_scale: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        input_2d = q_input.view(-1, q_input.shape[-1])
+        M = input_2d.size(0)
+        N = q_weight.size(1)
+        return torch.empty((M, N), dtype=out_dtype, device=q_input.device)
+
+
 if hasattr(torch.ops._xpu_C, "fp8_gemm_w8a16"):
 
     @register_fake("_xpu_C::fp8_gemm_w8a16")
@@ -75,6 +92,72 @@ def _int4_gemm_w4a16_fake(
         return torch.empty((M, N), dtype=input.dtype, device=input.device)
 
 
+def _gdn_attention_core_xpu_impl(
+    core_attn_out: torch.Tensor,
+    z: torch.Tensor,
+    projected_states_qkvz: torch.Tensor,
+    projected_states_ba: torch.Tensor,
+    layer_name: str,
+) -> None:
+    """Custom op wrapping the XPU SYCL GDN kernel for torch.compile."""
+    from vllm.forward_context import get_forward_context
+    from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
+
+    forward_context = get_forward_context()
+    self = forward_context.no_compile_layers[layer_name]
+    attn_metadata_raw = forward_context.attn_metadata
+
+    if attn_metadata_raw is None:
+        return
+
+    assert isinstance(attn_metadata_raw, dict)
+    attn_metadata = attn_metadata_raw[self.prefix]
+    assert isinstance(attn_metadata, GDNAttentionMetadata)
+
+    # TODO: xpu does not support speculative decoding yet
+    assert attn_metadata.spec_sequence_masks is None  # type: ignore[attr-defined]
+
+    conv_weights = self.conv1d.weight.view(
+        self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+    )
+
+    torch.ops._xpu_C.gdn_attention(
+        core_attn_out,
+        z,
+        projected_states_qkvz,
+        projected_states_ba,
+        self.num_k_heads,
+        self.num_v_heads,
+        self.head_k_dim,
+        self.head_v_dim,
+        conv_state=self.kv_cache[0],
+        ssm_state=self.kv_cache[1],
+        conv_weights=conv_weights,
+        conv_bias=self.conv1d.bias,
+        activation=self.activation,
+        A_log=self.A_log,
+        dt_bias=self.dt_bias,
+        num_prefills=attn_metadata.num_prefills,  # type: ignore[attr-defined]
+        num_decodes=attn_metadata.num_decodes,  # type: ignore[attr-defined]
+        has_initial_state=attn_metadata.has_initial_state,  # type: ignore[attr-defined]
+        non_spec_query_start_loc=attn_metadata.non_spec_query_start_loc,  # type: ignore[attr-defined]
+        non_spec_state_indices_tensor=attn_metadata.non_spec_state_indices_tensor,  # type: ignore[attr-defined]
+        num_actual_tokens=attn_metadata.num_actual_tokens,  # type: ignore[attr-defined]
+        tp_size=self.tp_size,
+        reorder_input=not self.gqa_interleaved_layout,
+    )
+
+
+def _gdn_attention_core_xpu_fake(
+    core_attn_out: torch.Tensor,
+    z: torch.Tensor,
+    projected_states_qkvz: torch.Tensor,
+    projected_states_ba: torch.Tensor,
+    layer_name: str,
+) -> None:
+    return
+
+
 def _xpu_ops_deepseek_scaling_rope_impl(
     positions: torch.Tensor,
     query: torch.Tensor,
@@ -102,6 +185,187 @@ def _xpu_ops_deepseek_scaling_rope_fake(
     return query, key
 
 
+def _xpu_fp8_mqa_logits_impl(
+    q: torch.Tensor,
+    k_quant: torch.Tensor,
+    k_scale: torch.Tensor,
+    weights: torch.Tensor,
+    cu_seqlen_ks: torch.Tensor,
+    cu_seqlen_ke: torch.Tensor,
+) -> torch.Tensor:
+    return torch.ops._xpu_C.fp8_mqa_logits(
+        q,
+        k_quant,
+        k_scale,
+        weights,
+        cu_seqlen_ks,
+        cu_seqlen_ke,
+    )
+
+
+def _xpu_fp8_mqa_logits_fake(
+    q: torch.Tensor,
+    k_quant: torch.Tensor,
+    k_scale: torch.Tensor,
+    weights: torch.Tensor,
+    cu_seqlen_ks: torch.Tensor,
+    cu_seqlen_ke: torch.Tensor,
+) -> torch.Tensor:
+    return torch.empty(
+        (q.shape[0], k_quant.shape[0]),
+        dtype=torch.float32,
+        device=q.device,
+    )
+
+
+def _xpu_fp8_paged_mqa_logits_impl(
+    q: torch.Tensor,
+    kv_cache: torch.Tensor,
+    weights: torch.Tensor,
+    context_lens: torch.Tensor,
+    block_tables: torch.Tensor,
+    schedule_metadata: torch.Tensor,
+    max_model_len: int,
+) -> torch.Tensor:
+    return torch.ops._xpu_C.fp8_paged_mqa_logits(
+        q,
+        kv_cache,
+        weights,
+        context_lens,
+        block_tables,
+        schedule_metadata,
+        max_model_len,
+    )
+
+
+def _xpu_fp8_paged_mqa_logits_fake(
+    q: torch.Tensor,
+    kv_cache: torch.Tensor,
+    weights: torch.Tensor,
+    context_lens: torch.Tensor,
+    block_tables: torch.Tensor,
+    schedule_metadata: torch.Tensor,
+    max_model_len: int,
+) -> torch.Tensor:
+    batch_size, next_n = q.shape[:2]
+    return torch.empty(
+        (batch_size * next_n, max_model_len),
+        dtype=torch.float32,
+        device=q.device,
+    )
+
+
+def _topk_topp_sample_impl(
+    random_sampled: torch.Tensor,
+    logits_to_return: torch.Tensor | None,
+    logits: torch.Tensor,
+    k: torch.Tensor | None,
+    p: torch.Tensor | None,
+    logprobs_mode: str,
+    seeds: torch.Tensor | None,
+    lambda_: float = 1.0,
+) -> None:
+    torch.ops._xpu_C.topk_topp_sampler(
+        random_sampled, logits_to_return, logits, k, p, logprobs_mode, seeds, lambda_
+    )
+    return
+
+
+def _topk_topp_sample_fake(
+    random_sampled: torch.Tensor,
+    logits_to_return: torch.Tensor | None,
+    logits: torch.Tensor,
+    k: torch.Tensor | None,
+    p: torch.Tensor | None,
+    logprobs_mode: str,
+    seeds: torch.Tensor | None,
+    lambda_: float = 1.0,
+) -> None:
+    return
+
+
+def _xpu_mxfp8_quantize_impl(
+    x: torch.Tensor, dtype: torch.dtype | None = None
+) -> tuple[torch.Tensor, torch.Tensor]:
+    MXFP8_BLOCK_SIZE = 32
+    assert x.shape[-1] % MXFP8_BLOCK_SIZE == 0
+    if dtype is not None:
+        assert dtype in (torch.float8_e4m3fn, torch.float8_e5m2), (
+            f"Unsupported dtype for xpu_mxfp8_quantize: {dtype}. "
+            f"Expected torch.float8_e4m3fn or torch.float8_e5m2."
+        )
+    else:
+        dtype = current_platform.fp8_dtype()
+
+    finfo = torch.finfo(dtype)
+    fp8_min = finfo.min
+    fp8_max = finfo.max
+    eps = 1e-10
+
+    x_q = torch.empty_like(x, device=x.device, dtype=dtype)
+    shape = x.shape[:-1] + (x.shape[-1] // MXFP8_BLOCK_SIZE,)
+    x_s = torch.empty(shape, device=x.device, dtype=torch.float32)
+    torch.ops._C.per_token_group_fp8_quant(
+        x, x_q, x_s, MXFP8_BLOCK_SIZE, eps, fp8_min, fp8_max, True
+    )
+    x_s = x_s.to(torch.float8_e8m0fnu)
+    return x_q, x_s
+
+
+def _xpu_mxfp8_quantize_fake(
+    x: torch.Tensor, dtype: torch.dtype | None = None
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if dtype is None:
+        dtype = current_platform.fp8_dtype()
+
+    MXFP8_BLOCK_SIZE = 32
+
+    shape = x.shape[:-1] + (x.shape[-1] // MXFP8_BLOCK_SIZE,)
+    x_s = torch.zeros(shape, device=x.device, dtype=torch.float32)
+
+    return x.to(dtype), x_s.to(torch.float8_e8m0fnu)
+
+
+def _xpu_mxfp4_quantize_impl(
+    x: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    MXFP4_BLOCK_SIZE = 32
+    eps = 1e-10
+    assert x.ndim == 2, "input must be 2-D"
+    assert x.shape[-1] % MXFP4_BLOCK_SIZE == 0, (
+        f"last dimension {x.shape[-1]} must be divisible by group_size "
+        f"{MXFP4_BLOCK_SIZE}"
+    )
+    assert x.is_contiguous(), "input groups must be contiguous"
+
+    M, N = x.shape
+
+    # Packed FP4 output: two nibbles per byte
+    x_q = torch.empty(M, N // 2, device=x.device, dtype=torch.uint8)
+    x_s = torch.empty(M, N // MXFP4_BLOCK_SIZE, device=x.device, dtype=torch.float32)
+
+    torch.ops._C.per_token_group_quant_mxfp4(x, x_q, x_s, MXFP4_BLOCK_SIZE, eps)
+
+    x_q = x_q.view(torch.float4_e2m1fn_x2)
+    x_s = x_s.to(dtype=torch.float8_e8m0fnu, memory_format=torch.preserve_format)
+    return x_q, x_s
+
+
+def _xpu_mxfp4_quantize_fake(
+    x: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    MXFP4_BLOCK_SIZE = 32
+    M, N = x.shape
+
+    # Packed FP4 output: two nibbles per byte
+    x_q = torch.empty(M, N // 2, device=x.device, dtype=torch.uint8)
+    x_s = torch.empty(M, N // MXFP4_BLOCK_SIZE, device=x.device, dtype=torch.float32)
+
+    x_q = x_q.view(torch.float4_e2m1fn_x2)
+    x_s = x_s.to(dtype=torch.float8_e8m0fnu, memory_format=torch.preserve_format)
+    return x_q, x_s
+
+
 # Global flag to ensure ops are registered only once
 _OPS_REGISTERED = False
 
@@ -193,14 +457,9 @@ def flash_attn_varlen_func(
             assert len(window_size) == 2
             real_window_size = (window_size[0], window_size[1])  # noqa: F841
 
-        # In encode attention, k and v maybe not contiguous and current
-        # kernel can't handle it
-        if block_table is None:
-            k = k.contiguous()
-            v = v.contiguous()
         return flash_attn_varlen_func(
             out=out,
-            q=q.contiguous(),
+            q=q,
             k=k,
             v=v,
             cu_seqlens_q=cu_seqlens_q,
@@ -216,6 +475,9 @@ def flash_attn_varlen_func(
             # alibi_slopes = alibi_slopes,
             # softcap=softcap,
             return_softmax_lse=return_softmax_lse,
+            q_descale=q_descale,
+            k_descale=k_descale,
+            v_descale=v_descale,
         )
 
     @staticmethod
@@ -246,251 +508,6 @@ def get_scheduler_metadata(
         )
         return None
 
-    @staticmethod
-    def indexer_k_quant_and_cache(
-        k: torch.Tensor,
-        kv_cache: torch.Tensor,
-        slot_mapping: torch.Tensor,
-        quant_block_size: int,
-        scale_fmt: str | None,
-    ) -> None:
-        head_dim = k.shape[-1]
-        k = k.view(-1, head_dim)  # [total_tokens, head_dim]
-
-        def group_quant_torch(
-            x: torch.Tensor,
-            group_size: int,
-            eps: float = 1e-10,
-            dtype: torch.dtype | None = None,
-            column_major_scales: bool = False,
-            out_q: torch.Tensor | None = None,
-            use_ue8m0: bool | None = None,
-        ) -> tuple[torch.Tensor, torch.Tensor]:
-            if use_ue8m0 is None:
-                # Default fallback - could import is_deep_gemm_e8m0_used if needed
-                use_ue8m0 = False
-
-            if dtype is None:
-                dtype = current_platform.fp8_dtype()
-
-            # Validate inputs
-            assert x.shape[-1] % group_size == 0, (
-                f"Last dimension {x.shape[-1]} must be divisible by "
-                f"group_size {group_size}"
-            )
-            assert x.stride(-1) == 1, "Input tensor groups must be contiguous"
-
-            # Prepare output tensor
-            if out_q is None:
-                x_q = torch.empty_like(x, dtype=dtype)
-            else:
-                assert out_q.shape == x.shape
-                x_q = out_q
-
-            # Reshape input for group processing
-            # Original shape: (..., last_dim)
-            # Target shape: (..., num_groups, group_size)
-            original_shape = x.shape
-            num_groups = original_shape[-1] // group_size
-
-            # Reshape to separate groups
-            group_shape = original_shape[:-1] + (num_groups, group_size)
-            x_grouped = x.view(group_shape)
-
-            # Compute per-group absolute maximum values
-            # Shape: (..., num_groups)
-            abs_max = torch.amax(torch.abs(x_grouped), dim=-1, keepdim=False)
-            abs_max = torch.maximum(
-                abs_max, torch.tensor(eps, device=x.device, dtype=x.dtype)
-            )
-
-            # Compute scales
-            FP8_MAX = torch.finfo(dtype).max
-            FP8_MIN = torch.finfo(dtype).min
-            scale_raw = abs_max / FP8_MAX
-
-            if use_ue8m0:
-                # For UE8M0 format, scales must be powers of 2
-                scales = torch.pow(2.0, torch.ceil(torch.log2(scale_raw)))
-            else:
-                scales = scale_raw
-
-            # Expand scales for broadcasting with grouped data
-            # Shape: (..., num_groups, 1)
-            scales_expanded = scales.unsqueeze(-1)
-
-            # Quantize the grouped data
-            x_scaled = x_grouped / scales_expanded
-            x_clamped = torch.clamp(x_scaled, FP8_MIN, FP8_MAX)
-            x_quantized = x_clamped.to(dtype)
-
-            # Reshape back to original shape
-            x_q.copy_(x_quantized.view(original_shape))
-
-            # Prepare scales tensor in requested format
-            if column_major_scales:
-                # Column-major: (num_groups,) + batch_dims
-                # Transpose the scales to put group dimension first
-                scales_shape = (num_groups,) + original_shape[:-1]
-                x_s = scales.permute(-1, *range(len(original_shape) - 1))
-                x_s = x_s.contiguous().view(scales_shape)
-            else:
-                # Row-major: batch_dims + (num_groups,)
-                x_s = scales.contiguous()
-
-            # Ensure scales are float32
-            return x_q, x_s.float()
-
-        k_fp8, k_scale = group_quant_torch(
-            k,
-            group_size=quant_block_size,
-            column_major_scales=False,
-            use_ue8m0=(scale_fmt == "ue8m0"),
-        )
-
-        k_fp8_bytes = k_fp8.view(-1, head_dim).view(torch.uint8)
-        scale_bytes = k_scale.view(torch.uint8).view(-1, 4)
-        k = torch.cat(
-            [k_fp8_bytes, scale_bytes], dim=-1
-        )  # [total_tokens, head_dim + 4]
-
-        slot_mapping = slot_mapping.flatten()
-        # kv_cache: [num_block, block_size, head_dim + 4]
-        kv_cache.view(-1, kv_cache.shape[-1]).index_copy_(0, slot_mapping, k)
-
-    @staticmethod
-    def cp_gather_indexer_k_quant_cache(
-        kv_cache: torch.Tensor,
-        dst_k: torch.Tensor,
-        dst_scale: torch.Tensor,
-        block_table: torch.Tensor,
-        cu_seq_lens: torch.Tensor,
-    ) -> None:
-        """
-        Args:
-            kv_cache: [num_blocks, block_size, cache_stride] - quantized KV cache
-                    Layout per block: [k_values, scale_values]
-                    - k_values: [block_size * head_dim]
-                    - scale_values: [block_size * head_dim * 4 / quant_block_size]
-            dst_k: [num_tokens, head_dim] - output tensor for K values
-            dst_scale: [num_tokens, head_dim / quant_block_size * 4]
-                - output tensor for scale values
-            block_table: [batch_size, num_blocks] - block table for indexing
-            cu_seq_lens: [batch_size + 1] - cumulative sequence lengths
-        """
-        batch_size = block_table.size(0)
-        num_tokens = dst_k.size(0)
-        head_dim = dst_k.size(1)
-        cache_block_size = kv_cache.size(1)
-        quant_block_size = head_dim * 4 // dst_scale.size(1)
-
-        # For each token, find which batch it belongs to using searchsorted
-        token_indices = torch.arange(num_tokens, device=dst_k.device) + 1
-        # cu_seq_lens is [batch_size + 1], we need to find which interval each
-        # token belongs to
-        batch_indices = torch.searchsorted(cu_seq_lens, token_indices) - 1
-        batch_indices = torch.clamp(batch_indices, 0, batch_size - 1)
-
-        # Calculate the in-batch sequence index for each token
-        inbatch_seq_indices = token_indices - cu_seq_lens[batch_indices]
-
-        # Find which block each token belongs to
-        block_indices_in_table = inbatch_seq_indices // cache_block_size
-        physical_block_indices = block_table[batch_indices, block_indices_in_table]
-
-        # Calculate the offset within each block
-        inblock_offsets = (inbatch_seq_indices - 1) % cache_block_size
-
-        # Calculate strides
-        block_stride = kv_cache.stride(0)  # stride for each block
-
-        # Flatten kv_cache for easier indexing
-        kv_cache_flat = kv_cache.view(-1)
-
-        # Calculate source offset for K values for all tokens (vectorized)
-        src_block_offsets = physical_block_indices * block_stride
-        src_k_offsets = src_block_offsets + inblock_offsets * head_dim
-
-        # Gather K values using advanced indexing
-        # Create indices for all elements we need to gather
-        k_indices = src_k_offsets.unsqueeze(1) + torch.arange(
-            head_dim, device=dst_k.device
-        )
-        dst_k[:] = kv_cache_flat[k_indices]
-
-        # Calculate source offset for scale values (vectorized)
-        # Scales are stored after all K values for each block
-        scale_size = head_dim * 4 // quant_block_size
-        src_scale_offsets = src_block_offsets + head_dim + inblock_offsets * scale_size
-
-        # Gather scale values
-        scale_indices = src_scale_offsets.unsqueeze(1) + torch.arange(
-            scale_size, device=dst_scale.device
-        )
-        dst_scale[:] = kv_cache_flat[scale_indices]
-
-    @staticmethod
-    def top_k_per_row_prefill(
-        logits: torch.Tensor,
-        cu_seqlen_ks: torch.Tensor,
-        cu_seqlen_ke: torch.Tensor,
-        raw_topk_indices: torch.Tensor,
-        num_rows: int,
-        stride0: int,
-        strdide1: int,
-        topk_tokens: int,
-    ) -> torch.Tensor:
-        real_topk = min(topk_tokens, logits.shape[-1])
-        topk_indices = logits.topk(real_topk, dim=-1)[1].to(torch.int32)
-        topk_indices -= cu_seqlen_ks[:, None]
-        mask_lo = topk_indices >= 0
-        mask_hi = topk_indices - (cu_seqlen_ke - cu_seqlen_ks)[:, None] < 0
-        mask = torch.full_like(
-            topk_indices, False, dtype=torch.bool, device=topk_indices.device
-        )
-        mask = mask_lo & mask_hi
-        topk_indices.masked_fill_(~mask, -1)
-        raw_topk_indices[: topk_indices.shape[0], : topk_indices.shape[1]] = (
-            topk_indices
-        )
-
-    @staticmethod
-    def top_k_per_row_decode(
-        logits: torch.Tensor,
-        next_n: int,
-        seq_lens: torch.Tensor,
-        raw_topk_indices: torch.Tensor,
-        num_rows: int,
-        stride0: int,
-        stride1: int,
-        topk_tokens: int,
-    ) -> torch.Tensor:
-        device = logits.device
-        batch_size = seq_lens.size(0)
-        # padded query len
-        padded_num_tokens = batch_size * next_n
-        positions = (
-            torch.arange(logits.shape[-1], device=device)
-            .unsqueeze(0)
-            .expand(batch_size * next_n, -1)
-        )
-        row_indices = torch.arange(padded_num_tokens, device=device) // next_n
-        next_n_offset = torch.arange(padded_num_tokens, device=device) % next_n
-        index_end_pos = (seq_lens[row_indices] - next_n + next_n_offset).unsqueeze(1)
-        # index_end_pos: [B * N, 1]
-        mask = positions <= index_end_pos
-        # mask: [B * N, L]
-        logits = logits.masked_fill(~mask, float("-inf"))
-        real_topk = min(topk_tokens, logits.shape[-1])
-        topk_indices = logits.topk(real_topk, dim=-1)[1].to(torch.int32)  # [B * N, K]
-        # ensure we don't set indices for the top k
-        # that is out of range(masked already)
-        # this will happen if context length is shorter than K
-        topk_indices[topk_indices > index_end_pos] = -1
-        raw_topk_indices[: topk_indices.shape[0], : topk_indices.shape[1]] = (
-            topk_indices
-        )
-
     @staticmethod
     def register_ops_once() -> None:
         global _OPS_REGISTERED
@@ -504,6 +521,43 @@ def register_ops_once() -> None:
                 dispatch_key=current_platform.dispatch_key,
             )
 
+            direct_register_custom_op(
+                op_name="xpu_mxfp8_quantize",
+                op_func=_xpu_mxfp8_quantize_impl,
+                fake_impl=_xpu_mxfp8_quantize_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="xpu_mxfp4_quantize",
+                op_func=_xpu_mxfp4_quantize_impl,
+                fake_impl=_xpu_mxfp4_quantize_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="xpu_fp8_mqa_logits",
+                op_func=_xpu_fp8_mqa_logits_impl,
+                fake_impl=_xpu_fp8_mqa_logits_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="xpu_fp8_paged_mqa_logits",
+                op_func=_xpu_fp8_paged_mqa_logits_impl,
+                fake_impl=_xpu_fp8_paged_mqa_logits_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="gdn_attention_core_xpu",
+                op_func=_gdn_attention_core_xpu_impl,
+                mutates_args=["core_attn_out", "z"],
+                fake_impl=_gdn_attention_core_xpu_fake,
+            )
+
+            direct_register_custom_op(
+                op_name="xpu_topk_topp_sampler",
+                op_func=_topk_topp_sample_impl,
+                fake_impl=_topk_topp_sample_fake,
+            )
+
             _OPS_REGISTERED = True
 
 
diff --git a/vllm/assets/video.py b/vllm/assets/video.py
index f5e443db978f..9ec2e4d16770 100644
--- a/vllm/assets/video.py
+++ b/vllm/assets/video.py
@@ -139,6 +139,6 @@ def get_audio(self, sampling_rate: float | None = None) -> npt.NDArray:
         """
         Read audio data from the video asset, used in Qwen2.5-Omni examples.
 
-        See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
+        See also: examples/generate/multimodal/qwen2_5_omni/only_thinker.py
         """
         return load_audio_pyav(self.video_path, sr=sampling_rate)[0]
diff --git a/vllm/benchmarks/datasets/__init__.py b/vllm/benchmarks/datasets/__init__.py
new file mode 100644
index 000000000000..b989958edcf6
--- /dev/null
+++ b/vllm/benchmarks/datasets/__init__.py
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.benchmarks.datasets.datasets import (
+    DEFAULT_NUM_PROMPTS,
+    AIMODataset,
+    ASRDataset,
+    BenchmarkDataset,
+    BlazeditDataset,
+    BurstGPTDataset,
+    ConversationDataset,
+    CustomAudioDataset,
+    CustomDataset,
+    CustomImageDataset,
+    HuggingFaceDataset,
+    InstructCoderDataset,
+    MLPerfDataset,
+    MMStarDataset,
+    MMVUDataset,
+    MTBenchDataset,
+    MultiModalConversationDataset,
+    NextEditPredictionDataset,
+    PrefixRepetitionRandomDataset,
+    RandomDataset,
+    RandomDatasetForReranking,
+    RandomMultiModalDataset,
+    SampleRequest,
+    ShareGPTDataset,
+    SonnetDataset,
+    SpecBench,
+    VisionArenaDataset,
+    add_dataset_parser,
+    add_random_dataset_base_args,
+    add_random_multimodal_dataset_args,
+    gen_prompt_decode_to_target_len,
+    get_samples,
+    is_valid_sequence,
+    lora_path_on_disk,
+    lora_tokenizer_cache,
+    process_audio,
+    process_image,
+    process_video,
+    zeta_prompt,
+)
+from vllm.benchmarks.datasets.utils import RangeRatio
+
+__all__ = [
+    "DEFAULT_NUM_PROMPTS",
+    "AIMODataset",
+    "ASRDataset",
+    "BenchmarkDataset",
+    "BlazeditDataset",
+    "BurstGPTDataset",
+    "ConversationDataset",
+    "CustomDataset",
+    "CustomAudioDataset",
+    "CustomImageDataset",
+    "HuggingFaceDataset",
+    "InstructCoderDataset",
+    "MLPerfDataset",
+    "MMStarDataset",
+    "MMVUDataset",
+    "MTBenchDataset",
+    "MultiModalConversationDataset",
+    "NextEditPredictionDataset",
+    "PrefixRepetitionRandomDataset",
+    "RandomDataset",
+    "RandomDatasetForReranking",
+    "RandomMultiModalDataset",
+    "SampleRequest",
+    "ShareGPTDataset",
+    "SonnetDataset",
+    "SpecBench",
+    "VisionArenaDataset",
+    "add_dataset_parser",
+    "add_random_dataset_base_args",
+    "add_random_multimodal_dataset_args",
+    "gen_prompt_decode_to_target_len",
+    "get_samples",
+    "is_valid_sequence",
+    "lora_path_on_disk",
+    "lora_tokenizer_cache",
+    "process_audio",
+    "process_image",
+    "process_video",
+    "RangeRatio",
+    "zeta_prompt",
+]
diff --git a/vllm/benchmarks/datasets/create_txt_slices_dataset.py b/vllm/benchmarks/datasets/create_txt_slices_dataset.py
new file mode 100644
index 000000000000..3f7c5028a205
--- /dev/null
+++ b/vllm/benchmarks/datasets/create_txt_slices_dataset.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Convert a plain-text file (local path or URL) into a JSONL dataset
+compatible with ``CustomDataset`` (``--dataset-name custom``), by 
+randomly slicing the tokenized text into prompts.
+
+Each line of the output JSONL contains a ``prompt`` (decoded from a random
+slice of the tokenized source text) and an ``output_tokens`` count.
+
+Usage
+-----
+::
+
+    python -m vllm.benchmarks.datasets.create_txt_slices_dataset \\
+        --input  sonnet.txt \\
+        --output sonnet_dataset.jsonl \\
+        --tokenizer gpt2 \\
+        --num-prompts 1000 \\
+        --input-len 1024 \\
+        --output-len 128
+
+The resulting JSONL file can then be used with the serving benchmark::
+
+    python -m vllm.benchmarks.serve \\
+        --dataset-name custom \\
+        --dataset-path sonnet_dataset.jsonl \\
+        ...
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import random
+import urllib.request
+
+import numpy as np
+from transformers import AutoTokenizer
+
+from vllm.benchmarks.datasets.utils import RangeRatio, get_sampling_params
+
+logger = logging.getLogger(__name__)
+
+
+def load_text(path: str) -> str:
+    """Load text from a local file or URL."""
+    if path.startswith(("http://", "https://")):
+        with urllib.request.urlopen(path) as response:
+            return response.read().decode("utf-8")
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+
+
+def create_txt_slices_jsonl(
+    *,
+    input_path: str,
+    output_path: str,
+    tokenizer_name: str,
+    num_prompts: int,
+    input_len: int,
+    output_len: int,
+    range_ratio: RangeRatio = 0.0,
+    seed: int = 0,
+    trust_remote_code: bool = False,
+) -> None:
+    """Read *input_path*, slice it into prompts, and write JSONL to
+    *output_path*."""
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_name, trust_remote_code=trust_remote_code
+    )
+
+    text = load_text(input_path)
+    if not text:
+        raise ValueError("The text file is empty and cannot be sampled from.")
+
+    token_ids = tokenizer(text, add_special_tokens=False).input_ids
+    if not token_ids:
+        raise ValueError("Tokenizing the text produced zero tokens; cannot sample.")
+
+    rng_np = np.random.default_rng(seed)
+    rng_py = random.Random(seed)
+
+    input_lens, output_lens, _ = get_sampling_params(
+        rng_np,
+        num_prompts,
+        range_ratio,
+        input_len,
+        output_len,
+        tokenizer,
+    )
+
+    num_available_tokens = len(token_ids)
+
+    records: list[dict[str, object]] = []
+    for i in range(num_prompts):
+        req_input_len = int(input_lens[i])
+        req_output_len = int(output_lens[i])
+
+        # Randomly select a start position and slice with cycling
+        start_pos = rng_py.randint(0, num_available_tokens - 1)
+        prompt_token_ids = [
+            token_ids[(start_pos + j) % num_available_tokens]
+            for j in range(req_input_len)
+        ]
+        prompt = tokenizer.decode(prompt_token_ids, skip_special_tokens=False)
+
+        records.append({"prompt": prompt, "output_tokens": req_output_len})
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        for record in records:
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+
+    logger.info(
+        "Wrote %d prompts to %s",
+        len(records),
+        output_path,
+    )
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        description="Convert a plain-text file into a JSONL dataset "
+        "for CustomDataset (--dataset-name custom).",
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        help="Path or URL to the source text file.",
+    )
+    parser.add_argument(
+        "--output",
+        required=True,
+        help="Path for the output JSONL file.",
+    )
+    parser.add_argument(
+        "--tokenizer",
+        required=True,
+        help="HuggingFace tokenizer name or path.",
+    )
+    parser.add_argument(
+        "--num-prompts",
+        type=int,
+        default=1000,
+        help="Number of prompt samples to generate (default: 1000).",
+    )
+    parser.add_argument(
+        "--input-len",
+        type=int,
+        default=1024,
+        help="Target number of input tokens per prompt (default: 1024).",
+    )
+    parser.add_argument(
+        "--output-len",
+        type=int,
+        default=128,
+        help="Target number of output tokens per prompt (default: 128).",
+    )
+    parser.add_argument(
+        "--range-ratio",
+        type=str,
+        default="0.0",
+        help="Range ratio for input/output length sampling (default: 0.0). "
+        "A single float applies to both ISL and OSL. "
+        'A JSON dict like \'{"input": 0.3, "output": 0.5}\' sets them '
+        "independently. Values must be in [0, 1).",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=0,
+        help="Random seed for reproducibility (default: 0).",
+    )
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Trust remote code from HuggingFace.",
+    )
+
+    args = parser.parse_args(argv)
+
+    logging.basicConfig(level=logging.INFO)
+
+    # Parse --range-ratio: try float first, then JSON dict.
+    range_ratio: RangeRatio
+    try:
+        range_ratio = float(args.range_ratio)
+    except ValueError:
+        import json as _json
+
+        range_ratio = _json.loads(args.range_ratio)
+
+    create_txt_slices_jsonl(
+        input_path=args.input,
+        output_path=args.output,
+        tokenizer_name=args.tokenizer,
+        num_prompts=args.num_prompts,
+        input_len=args.input_len,
+        output_len=args.output_len,
+        range_ratio=range_ratio,
+        seed=args.seed,
+        trust_remote_code=args.trust_remote_code,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets/datasets.py
similarity index 87%
rename from vllm/benchmarks/datasets.py
rename to vllm/benchmarks/datasets/datasets.py
index dd71762b5ba7..3e0a5451fd60 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets/datasets.py
@@ -22,10 +22,10 @@
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterator, Mapping
 from contextlib import suppress
-from copy import deepcopy
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from functools import cache
 from io import BytesIO
+from pathlib import Path
 from tempfile import NamedTemporaryFile
 from typing import Any, cast
 
@@ -35,6 +35,11 @@
 from PIL import Image
 from typing_extensions import deprecated
 
+from vllm.benchmarks.datasets.utils import (
+    RangeRatio,
+    _resolve_range_ratios,
+    get_sampling_params,
+)
 from vllm.inputs import MultiModalDataDict
 from vllm.lora.request import LoRARequest
 from vllm.lora.utils import get_adapter_absolute_path
@@ -55,15 +60,16 @@
 except ImportError:
     pd = PlaceholderModule("pandas")
 
+try:
+    import soundfile as sf
+except ImportError:
+    sf = PlaceholderModule("soundfile")
+
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_NUM_PROMPTS = 1000
 
-# -----------------------------------------------------------------------------
-# Data Classes
-# -----------------------------------------------------------------------------
-
 
 @dataclass
 class SampleRequest:
@@ -71,9 +77,9 @@ class SampleRequest:
     Represents a single inference request for benchmarking.
     """
 
-    prompt: str | list[str]
+    prompt: str | list[str] | list[dict]
     prompt_len: int
-    expected_output_len: int
+    expected_output_len: int | None
     multi_modal_data: MultiModalDataDict | dict | list[dict] | None = None
     lora_request: LoRARequest | None = None
     request_id: str | None = None
@@ -110,7 +116,7 @@ def __init__(
         # default seed.
         self.random_seed = random_seed if random_seed is not None else self.DEFAULT_SEED
         self.disable_shuffle = disable_shuffle
-        self.data = None
+        self.data: Any | None = None
 
     def apply_multimodal_chat_transformation(
         self,
@@ -249,6 +255,7 @@ def sample(
         num_requests: int,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        **kwargs,
     ) -> list[SampleRequest]:
         """
         Abstract method to generate sample requests from the dataset.
@@ -296,8 +303,10 @@ def maybe_oversample_requests(
             needed = num_requests - len(requests)
             additional = []
             for i in range(needed):
-                req = deepcopy(random.choice(requests))
-                req.request_id = request_id_prefix + str(len(requests) + i)
+                req = replace(
+                    random.choice(requests),
+                    request_id=request_id_prefix + str(len(requests) + i),
+                )
                 additional.append(req)
             requests.extend(additional)
             logger.info("Oversampled requests to reach %d total samples.", num_requests)
@@ -437,6 +446,27 @@ def process_video(video: Any) -> Mapping[str, Any]:
     )
 
 
+def process_audio(audio: Any) -> tuple:
+    """
+    Process a single audio input and return a (array, sample_rate) tuple.
+
+    Supports:
+    1. String: treated as a file path, loaded with soundfile.
+    2. Dict with 'array' and 'sampling_rate' keys: HuggingFace audio format.
+    3. Tuple (array, sr): passed through directly.
+    """
+    if isinstance(audio, str):
+        return sf.read(audio)
+    if isinstance(audio, dict) and "array" in audio and "sampling_rate" in audio:
+        return audio["array"], audio["sampling_rate"]
+    if isinstance(audio, tuple) and len(audio) == 2:
+        return audio
+    raise ValueError(
+        f"Invalid audio input {audio}. Must be a file path string, "
+        "a dict with 'array' and 'sampling_rate', or a (array, sr) tuple."
+    )
+
+
 def gen_prompt_decode_to_target_len(
     tokenizer: TokenizerLike,
     token_sequence: list[int],
@@ -533,7 +563,7 @@ def sample(
         request_id_prefix: str = "",
         no_oversample: bool = False,
         prefix_len: int = DEFAULT_PREFIX_LEN,
-        range_ratio: float = DEFAULT_RANGE_RATIO,
+        range_ratio: RangeRatio = DEFAULT_RANGE_RATIO,
         input_len: int = DEFAULT_INPUT_LEN,
         output_len: int = DEFAULT_OUTPUT_LEN,
         batchsize: int = 1,
@@ -542,24 +572,33 @@ def sample(
         lora_assignment: str = "random",
         **kwargs,
     ) -> list[SampleRequest]:
-        # validate total input tokens (prefix + sampled) is at least 1.
+        resolved_input_rr, _ = _resolve_range_ratios(range_ratio)
+
         num_special = int(tokenizer.num_special_tokens_to_add())
         real_input_len = max(0, int(input_len) - num_special)
-        min_sampled_input = math.floor(real_input_len * (1.0 - float(range_ratio)))
+        min_sampled_input = math.floor(
+            real_input_len * (1.0 - float(resolved_input_rr))
+        )
         min_total_input = int(prefix_len) + min_sampled_input
         if min_total_input < 1:
             raise ValueError(
                 "--random-input-len is too small: with tokenizer special "
-                f"tokens {num_special} and --random-range-ratio {range_ratio}, "
+                f"tokens {num_special} and "
+                f"input range ratio {resolved_input_rr}, "
                 "the minimum possible total input tokens (prefix + sampled) is "
                 f"{min_total_input}. Increase --random-input-len and/or "
-                "--random-prefix-len, or decrease --random-range-ratio so that "
-                "prefix_len + floor(max(0, random_input_len - num_special)) "
-                "* (1 - range_ratio) >= 1."
-            )
-
-        input_lens, output_lens, offsets = self.get_sampling_params(
-            num_requests, range_ratio, input_len, output_len, tokenizer
+                "--random-prefix-len, or decrease the input range ratio "
+                "so that prefix_len + floor(max(0, random_input_len - "
+                "num_special)) * (1 - input_range_ratio) >= 1."
+            )
+
+        input_lens, output_lens, offsets = get_sampling_params(
+            self._rng,
+            num_requests,
+            range_ratio,
+            input_len,
+            output_len,
+            tokenizer,
         )
 
         vocab_size = tokenizer.vocab_size
@@ -661,55 +700,6 @@ def get_prefix(
             )
         return adjusted_tokens
 
-    def get_sampling_params(
-        self,
-        num_requests: int,
-        range_ratio: float,
-        input_len: int,
-        output_len: int,
-        tokenizer: TokenizerLike,
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        """
-        Get the sampling parameters for the dataset.
-        """
-        # Enforce range_ratio < 1
-        if not (0.0 <= range_ratio < 1.0):
-            raise ValueError("range_ratio must be in [0, 1).")
-        num_special_tokens = int(tokenizer.num_special_tokens_to_add())
-        real_input_len = max(0, int(input_len) - num_special_tokens)
-        # Bounds use floor for low and ceil for high
-        input_low = math.floor(real_input_len * (1 - range_ratio))
-        input_high = math.ceil(real_input_len * (1 + range_ratio))
-        output_low = math.floor(output_len * (1 - range_ratio))
-        output_high = math.ceil(output_len * (1 + range_ratio))
-        # Ensure the lower bound for output length is at least 1 to
-        # prevent sampling 0 tokens.
-        output_low = max(output_low, 1)
-        output_high = max(output_high, 1)
-
-        if input_low > input_high:
-            raise ValueError(
-                f"Invalid input sampling interval: low={input_low} > high={input_high}"
-            )
-        if output_low > output_high:
-            raise ValueError(
-                "Invalid output sampling interval: "
-                f"low={output_low} > high={output_high}"
-            )
-
-        logger.info(
-            "Sampling input_len from [%s, %s] and output_len from [%s, %s]",
-            input_low,
-            input_high,
-            output_low,
-            output_high,
-        )
-
-        input_lens = self._rng.integers(input_low, input_high + 1, size=num_requests)
-        output_lens = self._rng.integers(output_low, output_high + 1, size=num_requests)
-        offsets = self._rng.integers(0, tokenizer.vocab_size, size=num_requests)
-        return input_lens, output_lens, offsets
-
     def generate_token_sequence(
         self,
         *,
@@ -776,8 +766,11 @@ def sample(
         tokenizer: TokenizerLike,
         num_requests: int,
         request_id_prefix: str = "",
-        range_ratio: float = RandomDataset.DEFAULT_RANGE_RATIO,
+        no_oversample: bool = False,
+        prefix_len: int = RandomDataset.DEFAULT_PREFIX_LEN,
+        range_ratio: RangeRatio = RandomDataset.DEFAULT_RANGE_RATIO,
         input_len: int = RandomDataset.DEFAULT_INPUT_LEN,
+        output_len: int = RandomDataset.DEFAULT_OUTPUT_LEN,
         batchsize: int = 1,
         is_reranker: bool = True,
         **kwargs,
@@ -786,8 +779,13 @@ def sample(
 
         query_len_param = (input_len // 2) - n_sep_tokens if is_reranker else input_len
 
-        query_lens, _, query_offsets = self.get_sampling_params(
-            1, range_ratio, query_len_param, 0, tokenizer
+        query_lens, _, query_offsets = get_sampling_params(
+            self._rng,
+            1,
+            range_ratio,
+            query_len_param,
+            0,
+            tokenizer,
         )
 
         query_len = int(query_lens[0])
@@ -800,8 +798,13 @@ def sample(
         else:
             doc_len_param = input_len - query_len - n_sep_tokens
 
-        doc_lens, _, doc_offsets = self.get_sampling_params(
-            num_requests, range_ratio, doc_len_param, 0, tokenizer
+        doc_lens, _, doc_offsets = get_sampling_params(
+            self._rng,
+            num_requests,
+            range_ratio,
+            doc_len_param,
+            0,
+            tokenizer,
         )
 
         vocab_size = tokenizer.vocab_size
@@ -1175,9 +1178,10 @@ def sample(
         request_id_prefix: str = "",
         no_oversample: bool = False,
         prefix_len: int = RandomDataset.DEFAULT_PREFIX_LEN,
-        range_ratio: float = RandomDataset.DEFAULT_RANGE_RATIO,
+        range_ratio: RangeRatio = RandomDataset.DEFAULT_RANGE_RATIO,
         input_len: int = RandomDataset.DEFAULT_INPUT_LEN,
         output_len: int = RandomDataset.DEFAULT_OUTPUT_LEN,
+        batchsize: int = 1,
         limit_mm_per_prompt: dict[str, int] = DEFAULT_LIMIT_MM_PER_PROMPT,
         base_items_per_request: int = DEFAULT_BASE_ITEMS_PER_REQUEST,
         num_mm_items_range_ratio: float = DEFAULT_NUM_MM_ITEMS_RANGE_RATIO,
@@ -1187,9 +1191,18 @@ def sample(
         enable_multimodal_chat: bool = DEFAULT_ENABLE_MULTIMODAL_CHAT,
         **kwargs,
     ) -> list[SampleRequest]:
-        # Get the sampling parameters for the dataset
-        input_lens, output_lens, offsets = self.get_sampling_params(
-            num_requests, range_ratio, input_len, output_len, tokenizer
+        if batchsize != 1:
+            raise NotImplementedError(
+                "batchsize > 1 is not supported for RandomMultiModalDataset."
+            )
+
+        input_lens, output_lens, offsets = get_sampling_params(
+            self._rng,
+            num_requests,
+            range_ratio,
+            input_len,
+            output_len,
+            tokenizer,
         )
 
         (
@@ -1326,16 +1339,16 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         lora_path: str | None = None,
         max_loras: int | None = None,
         output_len: int | None = None,
         enable_multimodal_chat: bool = False,
-        request_id_prefix: str = "",
-        no_oversample: bool = False,
         lora_assignment: str = "random",
         **kwargs,
-    ) -> list:
-        samples: list = []
+    ) -> list[SampleRequest]:
+        samples: list[SampleRequest] = []
         ind = 0
         for entry in self.data:
             if len(samples) >= num_requests:
@@ -1386,26 +1399,6 @@ def sample(
         return samples
 
 
-class _ValidateDatasetArgs(argparse.Action):
-    """Argparse action to validate dataset name and path compatibility."""
-
-    def __call__(self, parser, namespace, values, option_string=None):
-        setattr(namespace, self.dest, values)
-
-        # Get current values of both dataset_name and dataset_path
-        dataset_name = getattr(namespace, "dataset_name", "random")
-        dataset_path = getattr(namespace, "dataset_path", None)
-
-        # Validate the combination
-        if dataset_name == "random" and dataset_path is not None:
-            parser.error(
-                "Cannot use 'random' dataset with --dataset-path. "
-                "Please specify the appropriate --dataset-name (e.g., "
-                "'sharegpt', 'custom', 'sonnet') for your dataset file: "
-                f"{dataset_path}"
-            )
-
-
 def add_dataset_parser(parser: FlexibleArgumentParser):
     parser.add_argument(
         "--trust-remote-code",
@@ -1423,7 +1416,6 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
         "--dataset-name",
         type=str,
         default="random",
-        action=_ValidateDatasetArgs,
         choices=[
             "sharegpt",
             "burstgpt",
@@ -1433,9 +1425,12 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
             "random-rerank",
             "hf",
             "custom",
+            "custom_audio",
+            "custom_image",
             "custom_mm",
             "prefix_repetition",
             "spec_bench",
+            "speed_bench",
         ],
         help="Name of the dataset to benchmark on.",
     )
@@ -1448,9 +1443,8 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
         "--dataset-path",
         type=str,
         default=None,
-        action=_ValidateDatasetArgs,
-        help="Path to the sharegpt/sonnet dataset. "
-        "Or the huggingface dataset ID if using HF dataset.",
+        help="Path to the sharegpt/sonnet dataset or the HF dataset ID if "
+        "using HF dataset.",
     )
     parser.add_argument(
         "--no-oversample",
@@ -1620,6 +1614,36 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
         "repetition dataset.",
     )
 
+    speed_bench_group = parser.add_argument_group(
+        "speed bench dataset options", description=SpeedBench.__doc__
+    )
+    speed_bench_group.add_argument(
+        "--speed-bench-dataset-subset",
+        type=str,
+        default="qualitative",
+        choices={
+            "qualitative",
+            "throughput_1k",
+            "throughput_2k",
+            "throughput_8k",
+            "throughput_16k",
+            "throughput_32k",
+        },
+        help="Subset of the SPEED-Bench dataset.",
+    )
+    speed_bench_group.add_argument(
+        "--speed-bench-output-len",
+        type=int,
+        default=4096,
+        help="Num of output tokens per request, used only for speed bench dataset.",
+    )
+    speed_bench_group.add_argument(
+        "--speed-bench-category",
+        type=str,
+        default=None,
+        help="Category for speed bench dataset. If None, use all categories.",
+    )
+
 
 def add_random_dataset_base_args(
     parser_or_group: FlexibleArgumentParser | argparse._ArgumentGroup,
@@ -1648,12 +1672,12 @@ def add_random_dataset_base_args(
     )
     parser_or_group.add_argument(
         "--random-range-ratio",
-        type=float,
-        default=0.0,
+        type=str,
+        default="0.0",
         help="Range ratio for sampling input/output length, "
-        "used only for random sampling. Must be in the range [0, 1) to define "
-        "a symmetric sampling range"
-        "[length * (1 - range_ratio), length * (1 + range_ratio)].",
+        "used only for random sampling. A single float applies to both "
+        'ISL and OSL. A JSON dict like \'{"input": 0.3, "output": 0.5}\' '
+        "sets them independently. Values must be in [0, 1).",
     )
     parser_or_group.add_argument(
         "--random-prefix-len",
@@ -1786,13 +1810,30 @@ def normalize(d: dict) -> dict[tuple[int, int, int], float]:
     )
 
 
+def _parse_range_ratio(value: str) -> RangeRatio:
+    """Parse a ``--random-range-ratio`` CLI string.
+
+    Accepts either a plain float (``"0.3"``) or a JSON dict
+    (``'{"input": 0.3, "output": 0.5}'``).
+    """
+    try:
+        return float(value)
+    except ValueError:
+        return json.loads(value)
+
+
 def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
     if not hasattr(args, "request_id_prefix"):
         args.request_id_prefix = ""
 
+    if hasattr(args, "random_range_ratio") and isinstance(args.random_range_ratio, str):
+        args.random_range_ratio = _parse_range_ratio(args.random_range_ratio)
+
     if args.dataset_name == "custom":
         dataset = CustomDataset(
-            dataset_path=args.dataset_path, disable_shuffle=args.disable_shuffle
+            dataset_path=args.dataset_path,
+            disable_shuffle=args.disable_shuffle,
+            random_seed=args.seed,
         )
         input_requests = dataset.sample(
             num_requests=args.num_prompts,
@@ -1803,9 +1844,31 @@ def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
             no_oversample=args.no_oversample,
         )
 
-    elif args.dataset_name == "custom_mm":
-        dataset = CustomMMDataset(
-            dataset_path=args.dataset_path, disable_shuffle=args.disable_shuffle
+    elif args.dataset_name in ("custom_image", "custom_mm"):
+        if args.dataset_name == "custom_mm":
+            logger.warning(
+                "Dataset name 'custom_mm' is deprecated and will be removed in v0.24. "
+                "Use '--dataset-name custom_image' instead."
+            )
+        dataset = CustomImageDataset(
+            dataset_path=args.dataset_path,
+            disable_shuffle=args.disable_shuffle,
+            random_seed=args.seed,
+        )
+        input_requests = dataset.sample(
+            num_requests=args.num_prompts,
+            tokenizer=tokenizer,
+            output_len=args.custom_output_len,
+            enable_multimodal_chat=args.enable_multimodal_chat,
+            request_id_prefix=args.request_id_prefix,
+            no_oversample=args.no_oversample,
+        )
+
+    elif args.dataset_name == "custom_audio":
+        dataset = CustomAudioDataset(
+            dataset_path=args.dataset_path,
+            disable_shuffle=args.disable_shuffle,
+            random_seed=args.seed,
         )
         input_requests = dataset.sample(
             num_requests=args.num_prompts,
@@ -1877,6 +1940,19 @@ def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
         ):
             dataset_class = MTBenchDataset
             args.hf_split = args.hf_split if args.hf_split else "train"
+        elif (
+            args.dataset_path in HumanEvalDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in HumanEvalDataset.SUPPORTED_DATASET_PATHS
+        ):
+            dataset_class = HumanEvalDataset
+            args.hf_split = args.hf_split if args.hf_split else "test"
+        elif (
+            args.dataset_path in GSM8KDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in GSM8KDataset.SUPPORTED_DATASET_PATHS
+        ):
+            dataset_class = GSM8KDataset
+            args.hf_subset = args.hf_subset if args.hf_subset else "main"
+            args.hf_split = args.hf_split if args.hf_split else "test"
         elif (
             args.dataset_path in MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
             or args.hf_name in MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
@@ -2073,6 +2149,19 @@ def get_samples(args, tokenizer: TokenizerLike) -> list[SampleRequest]:
                 request_id_prefix=args.request_id_prefix,
                 no_oversample=args.no_oversample,
             ),
+            "speed_bench": lambda: SpeedBench(
+                dataset_path=args.dataset_path,
+                dataset_subset=args.speed_bench_dataset_subset,
+                category=args.speed_bench_category,
+                disable_shuffle=args.disable_shuffle,
+            ).sample(
+                num_requests=args.num_prompts,
+                tokenizer=tokenizer,
+                output_len=args.speed_bench_output_len,
+                enable_multimodal_chat=args.enable_multimodal_chat,
+                request_id_prefix=args.request_id_prefix,
+                no_oversample=args.no_oversample,
+            ),
         }
 
         try:
@@ -2120,7 +2209,7 @@ def load_data(self) -> None:
         # This will be the standardized format which load_data()
         # has to convert into depending on the filetype of dataset_path.
         # sample() will assume this standardized format of self.data
-        self.data = []
+        self.data: list[dict] = []
 
         # Load the JSONL file
         if self.dataset_path.endswith(".jsonl"):
@@ -2149,15 +2238,15 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         lora_path: str | None = None,
         max_loras: int | None = None,
         output_len: int | None = None,
         enable_multimodal_chat: bool = False,
         skip_chat_template: bool = False,
-        request_id_prefix: str = "",
-        no_oversample: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         # load all data if needed
         self.num_available_samples = len(self.data)
         if num_requests <= 0:
@@ -2168,7 +2257,7 @@ def sample(
                 num_requests,
             )
 
-        sampled_requests = []
+        sampled_requests: list[SampleRequest] = []
         for i, item in enumerate(self.data):
             if len(sampled_requests) >= num_requests:
                 break
@@ -2221,9 +2310,9 @@ def sample(
         return sampled_requests
 
 
-class CustomMMDataset(CustomDataset):
+class CustomImageDataset(CustomDataset):
     """
-    Implements the Custom MultiModal dataset. Loads data from a JSONL file and generates
+    Implements the Custom image dataset. Loads data from a JSONL file and generates
     sample requests based on conversation turns. E.g.,
     ```
     {
@@ -2252,7 +2341,7 @@ def sample(
         request_id_prefix: str = "",
         no_oversample: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         # load all data if needed
         self.num_available_samples = len(self.data)
         if num_requests <= 0:
@@ -2300,6 +2389,104 @@ def sample(
         return sampled_requests
 
 
+class CustomAudioDataset(CustomDataset):
+    """
+    Custom dataset for audio benchmarking. Loads data from a JSONL file. E.g.,
+    {"prompt": "Transcribe the audio.", "audio": "/path/to/audio.wav"}
+
+    Supports both:
+    - Dedicated ASR models (e.g. Whisper) via openai-audio & /v1/audio/transcriptions
+    - Chat-based audio models (e.g. Qwen2-Audio) via openai-chat & /v1/chat/completions
+    """
+
+    IS_MULTIMODAL = True
+
+    def sample(
+        self,
+        tokenizer: TokenizerLike,
+        num_requests: int,
+        output_len: int | None = None,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
+        skip_chat_template: bool = False,
+        enable_multimodal_chat: bool = False,
+        **kwargs,
+    ) -> list[SampleRequest]:
+        self.num_available_samples = len(self.data)
+        if num_requests <= 0:
+            num_requests = self.num_available_samples
+        sampled_requests = []
+        for i, item in enumerate(self.data):
+            if len(sampled_requests) >= num_requests:
+                break
+            prompt = item.get("prompt", "")
+            if tokenizer is None:
+                prompt_len = 1
+                new_output_len = output_len if output_len not in (None, -1) else 256
+                mm_content = None
+            else:
+                use_chat_template = (
+                    not skip_chat_template
+                    and hasattr(tokenizer, "chat_template")
+                    and tokenizer.chat_template is not None
+                )
+                if enable_multimodal_chat:
+                    # Chat-based audio models (e.g., Qwen2-Audio):
+                    # encode audio as base64; serve.py assembles the chat message
+                    # as: {"role": "user", "content": [
+                    #     {"type": "text", "text": prompt},
+                    #     {"type": "input_audio", "input_audio": {...}}
+                    # ]}
+                    y, sr = process_audio(item["audio"])
+                    buf = io.BytesIO()
+                    sf.write(buf, y, sr, format="WAV")
+                    audio_base64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+                    mm_content = {
+                        "type": "input_audio",
+                        "input_audio": {
+                            "data": audio_base64,
+                            "format": "wav",
+                        },
+                    }
+                    # prompt stays as plain string; serve.py handles wrapping
+                else:
+                    # Whisper-style models: load audio array locally
+                    y, sr = process_audio(item["audio"])
+                    mm_content = {"audio": (y, sr)}
+                    if use_chat_template:
+                        # ASR models with a chat template but not multimodal chat
+                        prompt = tokenizer.apply_chat_template(
+                            [{"role": "user", "content": prompt}],
+                            add_generation_prompt=True,
+                            tokenize=False,
+                        )
+                    # else: plain prompt for Whisper-style models
+                prompt_len = (
+                    len(tokenizer(prompt).input_ids) if isinstance(prompt, str) else 1
+                )
+                new_output_len = output_len
+                if output_len is None or output_len == -1:
+                    if "output_tokens" not in item:
+                        raise ValueError(
+                            "If no output length is provided the "
+                            "custom dataset must contain an 'output_tokens' field."
+                        )
+                    new_output_len = int(item["output_tokens"])
+            sampled_requests.append(
+                SampleRequest(
+                    prompt=prompt,
+                    prompt_len=prompt_len,
+                    expected_output_len=new_output_len,
+                    multi_modal_data=mm_content,
+                    request_id=request_id_prefix + str(i),
+                )
+            )
+        self.maybe_oversample_requests(
+            sampled_requests, num_requests, request_id_prefix, no_oversample
+        )
+        return sampled_requests
+
+
 # -----------------------------------------------------------------------------
 # Spec Bench Dataset Implementation
 # -----------------------------------------------------------------------------
@@ -2340,9 +2527,14 @@ def load_data(self) -> None:
         if not getattr(self, "disable_shuffle", False):
             random.shuffle(self.data)
 
-    def sample(self, **kwargs) -> list:
+    def sample(
+        self,
+        **kwargs,
+    ) -> list[SampleRequest]:
         # leverage CustomDataset sample
-        return super().sample(**kwargs)
+        return super().sample(
+            **kwargs,
+        )
 
 
 # -----------------------------------------------------------------------------
@@ -2381,14 +2573,14 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         prefix_len: int = DEFAULT_PREFIX_LEN,
         input_len: int = DEFAULT_INPUT_LEN,
         output_len: int = DEFAULT_OUTPUT_LEN,
         return_prompt_formatted: bool = False,
-        request_id_prefix: str = "",
-        no_oversample: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         # Calculate average token length for a poem line.
         tokenized_lines = [tokenizer(line).input_ids for line in self.data]
         avg_len = sum(len(tokens) for tokens in tokenized_lines) / len(tokenized_lines)
@@ -2411,7 +2603,7 @@ def sample(
         num_prefix_lines = max(round((prefix_len - base_offset) / avg_len), 0)
         prefix_lines = self.data[:num_prefix_lines]
 
-        samples = []
+        samples: list[SampleRequest] = []
         ind = 0
         while len(samples) < num_requests:
             extra_lines = random.choices(
@@ -2482,11 +2674,11 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        max_loras: int | None = None,
-        lora_path: str | None = None,
         request_id_prefix: str = "",
         no_oversample: bool = False,
         lora_assignment: str = "random",
+        max_loras: int | None = None,
+        lora_path: str | None = None,
         **kwargs,
     ) -> list[SampleRequest]:
         samples = []
@@ -2574,15 +2766,15 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
-        enable_multimodal_chat: bool = False,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         # Filter examples with at least 2 conversations
         filtered_data = self.data.filter(lambda x: len(x["conversations"]) >= 2)
-        sampled_requests = []
+        sampled_requests: list[SampleRequest] = []
         ind = 0
         dynamic_output = output_len is None
 
@@ -2634,15 +2826,15 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
-        enable_multimodal_chat: bool = False,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         # Filter examples with at least 2 conversations
         filtered_data = self.data.filter(lambda x: len(x["conversations"]) >= 2)
-        sampled_requests = []
+        sampled_requests: list[SampleRequest] = []
         ind = 0
         dynamic_output = output_len is None
 
@@ -2703,12 +2895,12 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
-        enable_multimodal_chat: bool = False,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
         if parser_fn is None:
             raise ValueError(f"Unsupported dataset path: {self.hf_name}")
@@ -2753,9 +2945,11 @@ class MMVUDataset(HuggingFaceDataset):
 
     DEFAULT_OUTPUT_LEN = 128
     SUPPORTED_DATASET_PATHS = {
-        "yale-nlp/MMVU": lambda x: x["question"]
-        + " "
-        + (" ".join(f"{k}.{v}" for k, v in x["choices"].items())),
+        "yale-nlp/MMVU": lambda x: (
+            x["question"]
+            + " "
+            + (" ".join(f"{k}.{v}" for k, v in x["choices"].items()))
+        ),
     }
 
     def __init__(self, **kwargs) -> None:
@@ -2770,12 +2964,12 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
-        enable_multimodal_chat: bool = False,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
         if parser_fn is None:
             raise ValueError(f"Unsupported dataset path: {self.hf_name}")
@@ -2838,15 +3032,15 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         output_len: int | None = None,
         enable_multimodal_chat: bool = False,
         skip_chat_template: bool = False,
-        request_id_prefix: str = "",
-        no_oversample: bool = False,
         **kwargs,
     ) -> list[SampleRequest]:
         output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
-        sampled_requests = []
+        sampled_requests: list[SampleRequest] = []
         for i, prompt in enumerate(self.sample_prompts(n=num_requests)):
             # apply template
             if not skip_chat_template:
@@ -2903,20 +3097,140 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         output_len: int | None = None,
         enable_multimodal_chat: bool = False,
         skip_chat_template: bool = False,
+        **kwargs,
+    ) -> list[SampleRequest]:
+        output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
+        sampled_requests: list[SampleRequest] = []
+
+        for i, item in enumerate(self.data):
+            if len(sampled_requests) >= num_requests:
+                break
+            prompt = item["turns"][0]
+
+            # apply template
+            if not skip_chat_template:
+                prompt = tokenizer.apply_chat_template(
+                    [{"role": "user", "content": prompt}],
+                    add_generation_prompt=True,
+                    tokenize=False,
+                )
+
+            prompt_len = len(tokenizer(prompt).input_ids)
+            sampled_requests.append(
+                SampleRequest(
+                    prompt=prompt,
+                    prompt_len=prompt_len,
+                    expected_output_len=output_len,
+                    request_id=request_id_prefix + str(i),
+                )
+            )
+        self.maybe_oversample_requests(
+            sampled_requests, num_requests, request_id_prefix, no_oversample
+        )
+        return sampled_requests
+
+
+# -----------------------------------------------------------------------------
+# HumanEval Dataset Implementation
+# -----------------------------------------------------------------------------
+
+
+class HumanEvalDataset(HuggingFaceDataset):
+    """
+    HumanEvalDataset Dataset.
+    https://huggingface.co/datasets/openai/openai_humaneval
+
+    We create a single turn dataset for HumanEval.
+    """
+
+    DEFAULT_OUTPUT_LEN = 256
+    SUPPORTED_DATASET_PATHS = {
+        "openai/openai_humaneval",
+    }
+
+    def sample(
+        self,
+        tokenizer: TokenizerLike,
+        num_requests: int,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
+        skip_chat_template: bool = False,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
         sampled_requests = []
 
         for i, item in enumerate(self.data):
             if len(sampled_requests) >= num_requests:
                 break
-            prompt = item["turns"][0]
+            prompt = item["prompt"]
+
+            # apply template
+            if not skip_chat_template:
+                prompt = tokenizer.apply_chat_template(
+                    [{"role": "user", "content": prompt}],
+                    add_generation_prompt=True,
+                    tokenize=False,
+                )
+
+            prompt_len = len(tokenizer(prompt).input_ids)
+            sampled_requests.append(
+                SampleRequest(
+                    prompt=prompt,
+                    prompt_len=prompt_len,
+                    expected_output_len=output_len,
+                    request_id=request_id_prefix + str(i),
+                )
+            )
+        self.maybe_oversample_requests(
+            sampled_requests, num_requests, request_id_prefix, no_oversample
+        )
+        return sampled_requests
+
+
+# -----------------------------------------------------------------------------
+# GSM8K Dataset Implementation
+# -----------------------------------------------------------------------------
+
+
+class GSM8KDataset(HuggingFaceDataset):
+    """
+    GSM8K Dataset.
+    https://huggingface.co/datasets/openai/gsm8k
+
+    We create a single turn dataset for GSM8K.
+    """
+
+    DEFAULT_OUTPUT_LEN = 256
+    SUPPORTED_DATASET_PATHS = {
+        "openai/gsm8k",
+    }
+
+    def sample(
+        self,
+        tokenizer: TokenizerLike,
+        num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
+        skip_chat_template: bool = False,
+        **kwargs,
+    ) -> list[SampleRequest]:
+        output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
+        sampled_requests = []
+
+        for i, item in enumerate(self.data):
+            if len(sampled_requests) >= num_requests:
+                break
+            prompt = item["question"]
 
             # apply template
             if not skip_chat_template:
@@ -2976,7 +3290,7 @@ def sample(
         min_distance: float = 0.0,
         max_distance: float = 1.0,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
         sampled_requests = []
 
@@ -3050,12 +3364,12 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
         **kwargs,
-    ) -> list:
-        sampled_requests = []
+    ) -> list[SampleRequest]:
+        sampled_requests: list[SampleRequest] = []
         ind = 0
         dynamic_output = output_len is None
 
@@ -3228,18 +3542,18 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
         **kwargs,
-    ) -> list:
+    ) -> list[SampleRequest]:
         output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
         if "openai" in getattr(tokenizer, "name_or_path", ""):
             prompt = "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
         else:
             prompt = ""
         prompt_len = len(tokenizer(prompt).input_ids)
-        sampled_requests = []
+        sampled_requests: list[SampleRequest] = []
         ind = 0
         skipped = 0
         asr_min_audio_len_sec = kwargs.get("asr_min_audio_len_sec")
@@ -3326,9 +3640,9 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
         **kwargs,
     ) -> list[SampleRequest]:
         # Force dynamic output length based on reference completion.
@@ -3405,12 +3719,12 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
+        request_id_prefix: str = "",
+        no_oversample: bool = False,
         prefix_len: int = DEFAULT_PREFIX_LEN,
         suffix_len: int = DEFAULT_SUFFIX_LEN,
         num_prefixes: int = DEFAULT_NUM_PREFIXES,
         output_len: int = DEFAULT_OUTPUT_LEN,
-        request_id_prefix: str = "",
-        no_oversample: bool = False,
         **kwargs,
     ) -> list[SampleRequest]:
         vocab_size = tokenizer.vocab_size
@@ -3421,7 +3735,7 @@ def sample(
                 f"to num_prefixes ({num_prefixes})"
             )
 
-        def _generate_exact_length_tokens(target_length: int) -> list[int]:
+        def _generate_exact_length_tokens(target_length: int) -> tuple[list[int], int]:
             """Generate tokens that decode and re-encode to exactly
             target_length."""
             # Generate random tokens
@@ -3491,10 +3805,10 @@ def sample(
         self,
         tokenizer: TokenizerLike,
         num_requests: int,
-        output_len: int | None = None,
-        enable_multimodal_chat: bool = False,
         request_id_prefix: str = "",
         no_oversample: bool = False,
+        output_len: int | None = None,
+        enable_multimodal_chat: bool = False,
         **kwargs,
     ) -> list[SampleRequest]:
         # If --hf-output-len is not set, use the default output length.
@@ -3516,6 +3830,7 @@ def sample(
             # if enable_multimodal_chat is False).
             prompt_len = len(tokenizer(question_text).input_ids)
 
+            prompt: str | list[dict]
             if enable_multimodal_chat:
                 # If multimodal content should be embedded in the chat message,
                 # convert to [{"role":"user","content":[...]}]
@@ -3543,3 +3858,63 @@ def sample(
             sampled_requests, num_requests, request_id_prefix, no_oversample
         )
         return sampled_requests
+
+
+# -----------------------------------------------------------------------------
+# Speed Bench Dataset Implementation
+# -----------------------------------------------------------------------------
+
+
+class SpeedBench(CustomDataset):
+    """
+    SPEED-Bench dataset: https://huggingface.co/datasets/nvidia/SPEED-Bench
+
+    Download the dataset using:
+
+    `curl -LsSf https://raw.githubusercontent.com/NVIDIA-NeMo/Skills/refs/heads/main/nemo_skills/dataset/speed-bench/prepare.py | python3 -`
+    """  # noqa: E501
+
+    DOWNLOAD_SCRIPT_URL = "https://raw.githubusercontent.com/NVIDIA-NeMo/Skills/refs/heads/main/nemo_skills/dataset/speed-bench/prepare.py"
+
+    def __init__(self, **kwargs) -> None:
+        self.dataset_subset = kwargs.pop("dataset_subset", "qualitative")
+        self.category = kwargs.pop("category", None)
+        super().__init__(**kwargs)
+        self.load_data()
+
+    def load_data(self) -> None:
+        if self.dataset_path is None:
+            raise ValueError("dataset_path must be provided for loading data.")
+
+        if not Path(self.dataset_path).is_dir():
+            raise ValueError(
+                f"dataset_path {self.dataset_path} is not a directory. "
+                f"Please make sure to download the dataset from HuggingFace using "
+                f"`curl -LsSf {self.DOWNLOAD_SCRIPT_URL} | python3 -`"
+            )
+
+        self.data = []
+
+        # Load the JSONL file
+        jsonl_data = pd.read_json(
+            path_or_buf=Path(self.dataset_path) / f"{self.dataset_subset}.jsonl",
+            lines=True,
+        )
+
+        # check if the JSONL file has a 'turns' column
+        if "messages" not in jsonl_data.columns:
+            raise ValueError(
+                "JSONL file must contain a 'messages' column. "
+                "Please make sure to download the dataset from HuggingFace using "
+                f"`curl -LsSf {self.DOWNLOAD_SCRIPT_URL} | python3 -`"
+            )
+
+        for _, row in jsonl_data.iterrows():
+            # sample only from a specific category if specified
+            if (not self.category) or (self.category == row["category"]):
+                prompt = row["messages"][0]["content"]
+                self.data.append({"prompt": prompt})
+
+        random.seed(self.random_seed)
+        if not getattr(self, "disable_shuffle", False):
+            random.shuffle(self.data)
diff --git a/vllm/benchmarks/datasets/utils.py b/vllm/benchmarks/datasets/utils.py
new file mode 100644
index 000000000000..bc5a4340dd62
--- /dev/null
+++ b/vllm/benchmarks/datasets/utils.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Shared utilities for benchmark dataset sampling.
+"""
+
+import logging
+import math
+
+import numpy as np
+
+from vllm.tokenizers import TokenizerLike
+
+logger = logging.getLogger(__name__)
+
+# Type alias: a single float applies to both ISL and OSL; a dict allows
+# specifying them independently via ``{"input": …, "output": …}``.
+RangeRatio = float | dict[str, float]
+
+
+def _resolve_range_ratios(
+    range_ratio: RangeRatio,
+) -> tuple[float, float]:
+    """Return ``(input_range_ratio, output_range_ratio)`` from *range_ratio*.
+
+    *range_ratio* is either a single float (used for both input and output)
+    or a dict with ``"input"`` and ``"output"`` keys.
+    """
+    if isinstance(range_ratio, dict):
+        try:
+            return float(range_ratio["input"]), float(range_ratio["output"])
+        except KeyError as exc:
+            raise ValueError(
+                "When range_ratio is a dict it must contain 'input' and "
+                f"'output' keys, got: {sorted(range_ratio)}"
+            ) from exc
+    ratio = float(range_ratio)
+    return ratio, ratio
+
+
+def get_sampling_params(
+    rng: np.random.Generator,
+    num_requests: int,
+    range_ratio: RangeRatio,
+    input_len: int,
+    output_len: int,
+    tokenizer: TokenizerLike,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Sample per-request input/output token lengths and vocab offsets.
+
+    Lengths are drawn uniformly from integer ranges around the configured
+    means, controlled by *range_ratio*.  It may be a single ``float``
+    (applied to both input and output) or a ``dict`` with ``"input"`` and
+    ``"output"`` keys for independent control.
+
+    Tokenizer special tokens are subtracted from ``input_len`` before
+    computing the sampling interval.
+
+    Returns:
+        (input_lens, output_lens, offsets) – three 1-D ``np.ndarray`` of
+        shape ``(num_requests,)``.
+    """
+    input_range_ratio, output_range_ratio = _resolve_range_ratios(range_ratio)
+
+    if not (0.0 <= input_range_ratio < 1.0):
+        raise ValueError("input_range_ratio must be in [0, 1).")
+    if not (0.0 <= output_range_ratio < 1.0):
+        raise ValueError("output_range_ratio must be in [0, 1).")
+    num_special_tokens = int(tokenizer.num_special_tokens_to_add())
+    real_input_len = max(0, int(input_len) - num_special_tokens)
+    input_low = math.floor(real_input_len * (1 - input_range_ratio))
+    input_high = math.ceil(real_input_len * (1 + input_range_ratio))
+    output_low = math.floor(output_len * (1 - output_range_ratio))
+    output_high = math.ceil(output_len * (1 + output_range_ratio))
+    # Ensure the lower bound for output length is at least 1 to
+    # prevent sampling 0 tokens.
+    output_low = max(output_low, 1)
+    output_high = max(output_high, 1)
+
+    if input_low > input_high:
+        raise ValueError(
+            f"Invalid input sampling interval: low={input_low} > high={input_high}"
+        )
+    if output_low > output_high:
+        raise ValueError(
+            f"Invalid output sampling interval: low={output_low} > high={output_high}"
+        )
+
+    logger.info(
+        "Sampling input_len from [%s, %s] and output_len from [%s, %s]",
+        input_low,
+        input_high,
+        output_low,
+        output_high,
+    )
+
+    input_lens = rng.integers(input_low, input_high + 1, size=num_requests)
+    output_lens = rng.integers(output_low, output_high + 1, size=num_requests)
+    offsets = rng.integers(0, tokenizer.vocab_size, size=num_requests)
+    return input_lens, output_lens, offsets
diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py
index b0ef67889d1d..9c85ad686d3e 100644
--- a/vllm/benchmarks/lib/endpoint_request_func.py
+++ b/vllm/benchmarks/lib/endpoint_request_func.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """The request function for API endpoints."""
 
+import codecs
 import io
 import json
 import os
@@ -25,11 +26,12 @@ class StreamedResponseHandler:
 
     def __init__(self):
         self.buffer = ""
+        self._decoder = codecs.getincrementaldecoder("utf-8")()
 
     def add_chunk(self, chunk_bytes: bytes) -> list[str]:
         """Add a chunk of bytes to the buffer and return any complete
         messages."""
-        chunk_str = chunk_bytes.decode("utf-8")
+        chunk_str = self._decoder.decode(chunk_bytes)
         self.buffer += chunk_str
 
         messages = []
@@ -237,6 +239,8 @@ async def async_request_openai_completions(
                                 generated_text += text or ""
                             elif usage := data.get("usage"):
                                 output.output_tokens = usage.get("completion_tokens")
+                                if (pt := usage.get("prompt_tokens")) is not None:
+                                    output.prompt_len = pt
                 if first_chunk_received:
                     output.success = True
                 else:
@@ -358,6 +362,8 @@ async def async_request_openai_chat_completions(
                                 generated_text += content or ""
                             elif usage := data.get("usage"):
                                 output.output_tokens = usage.get("completion_tokens")
+                                if (pt := usage.get("prompt_tokens")) is not None:
+                                    output.prompt_len = pt
 
                             most_recent_timestamp = timestamp
 
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index 53ae6ca6a804..2c39e7d1878f 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -128,26 +128,30 @@ async def fetch_spec_decode_metrics(
                     continue
 
                 if line.startswith("vllm:spec_decode"):
+                    # Extract metric name (before labels) to avoid matching
+                    # substrings inside label values.
+                    parts = line.split(None, 1)
+                    metric_name = parts[0].split("{")[0]
+                    if not metric_name.endswith("_total"):
+                        continue
                     found_spec_decode = True
-                    parts = line.split()
-                    if parts:
-                        with contextlib.suppress(ValueError):
-                            if "num_drafts" in line:
-                                num_drafts += int(float(parts[-1]))
-                            elif "num_draft_tokens" in line:
-                                num_draft_tokens += int(float(parts[-1]))
-                            elif "num_accepted_tokens_per_pos" in line:
-                                pos_label = 'position="'
-                                if pos_label in line:
-                                    start = line.index(pos_label) + len(pos_label)
-                                    end = line.index('"', start)
-                                    pos = int(line[start:end])
-                                    val = int(float(parts[-1]))
-                                    accepted_per_pos[pos] = (
-                                        accepted_per_pos.get(pos, 0) + val
-                                    )
-                            elif "num_accepted_tokens" in line:
-                                num_accepted_tokens += int(float(parts[-1]))
+                    with contextlib.suppress(ValueError):
+                        if "num_drafts" in metric_name:
+                            num_drafts += int(float(parts[-1]))
+                        elif "num_draft_tokens" in metric_name:
+                            num_draft_tokens += int(float(parts[-1]))
+                        elif "num_accepted_tokens_per_pos" in metric_name:
+                            pos_label = 'position="'
+                            if pos_label in line:
+                                start = line.index(pos_label) + len(pos_label)
+                                end = line.index('"', start)
+                                pos = int(line[start:end])
+                                val = int(float(parts[-1]))
+                                accepted_per_pos[pos] = (
+                                    accepted_per_pos.get(pos, 0) + val
+                                )
+                        elif "num_accepted_tokens" in metric_name:
+                            num_accepted_tokens += int(float(parts[-1]))
 
             if not found_spec_decode:
                 return None
@@ -439,7 +443,7 @@ def calculate_metrics(
                         ).input_ids
                     )
             actual_output_lens.append(output_len)
-            total_input += input_requests[i].prompt_len
+            total_input += outputs[i].prompt_len
             tpot = 0
             if output_len > 1:
                 latency_minus_ttft = outputs[i].latency - outputs[i].ttft
@@ -1611,14 +1615,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
     )
     parser.add_argument(
         "--timeline-itl-thresholds",
-        type=float,
-        nargs=2,
-        default=[25.0, 50.0],
-        metavar=("THRESHOLD1", "THRESHOLD2"),
+        type=str,
+        default="25,50",
         help="ITL thresholds in milliseconds for timeline plot coloring. "
-        "Specify two values to categorize inter-token latencies into three groups: "
-        "below first threshold (green), between thresholds (orange), "
-        "and above second threshold (red). Default: 25 50 (milliseconds).",
+        "Specify two comma-separated values to categorize inter-token "
+        "latencies into three groups: below first threshold (green), "
+        "between thresholds (orange), and above second threshold (red).",
     )
     parser.add_argument(
         "--plot-dataset-stats",
@@ -1637,6 +1639,19 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
     random.seed(args.seed)
     np.random.seed(args.seed)
 
+    # Validate timeline ITL thresholds
+    if args.plot_timeline:
+        try:
+            itl_thresholds = [
+                float(t.strip()) for t in args.timeline_itl_thresholds.split(",")
+            ]
+            if len(itl_thresholds) != 2:
+                raise ValueError(
+                    f"Expected 2 ITL threshold values, got {len(itl_thresholds)}"
+                )
+        except ValueError as e:
+            raise ValueError(f"Invalid --timeline-itl-thresholds format: {e}") from e
+
     # Validate ramp-up arguments
     if args.ramp_up_strategy is not None:
         if args.request_rate != float("inf"):
@@ -1711,12 +1726,25 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
             trust_remote_code=args.trust_remote_code,
         )
 
+    # Validate dataset name/path
     if args.dataset_name is None:
         raise ValueError(
             "Please specify '--dataset-name' and the corresponding "
             "'--dataset-path' if required."
         )
 
+    if (
+        args.dataset_name
+        in ["random", "random-mm", "random-rerank", "prefix_repetition"]
+        and args.dataset_path is not None
+    ):
+        raise ValueError(
+            f"Cannot use '{args.dataset_name}' dataset with --dataset-path. "
+            "Please specify the appropriate --dataset-name (e.g., "
+            "'sharegpt', 'custom', 'sonnet') for your dataset file: "
+            f"{args.dataset_path}"
+        )
+
     # Map general --input-len and --output-len to all dataset-specific arguments
     if args.input_len is not None:
         args.random_input_len = args.input_len
@@ -1893,7 +1921,9 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
 
                 timeline_path = Path(file_name).with_suffix(".timeline.html")
                 # Convert thresholds from milliseconds to seconds
-                itl_thresholds_sec = [t / 1000.0 for t in args.timeline_itl_thresholds]
+                itl_thresholds_sec = [
+                    float(t) / 1000.0 for t in args.timeline_itl_thresholds.split(",")
+                ]
                 generate_timeline_plot(
                     per_request_data, timeline_path, itl_thresholds=itl_thresholds_sec
                 )
diff --git a/vllm/benchmarks/startup.py b/vllm/benchmarks/startup.py
index 375b8f9fac33..095fdb073271 100644
--- a/vllm/benchmarks/startup.py
+++ b/vllm/benchmarks/startup.py
@@ -16,7 +16,7 @@
 import tempfile
 import time
 from contextlib import contextmanager
-from typing import Any
+from typing import Any, NamedTuple
 
 import numpy as np
 from tqdm import tqdm
@@ -27,6 +27,82 @@
 )
 from vllm.engine.arg_utils import EngineArgs
 
+PERCENTAGES = [10, 25, 50, 75, 90, 99]
+
+
+class MetricDesc(NamedTuple):
+    """Descriptor for a metric to collect from each iteration."""
+
+    iter_key: str  # key in the iteration result dict
+    suffix: str  # result key suffix, e.g. "startup", "compilation"
+    display_name: str
+
+
+class MetricStats(NamedTuple):
+    """Aggregated statistics for a single benchmark metric."""
+
+    key: str  # e.g. "cold_startup", "warm_encoder_compilation"
+    display_name: str
+    values: list[float]
+    avg: float
+    percentiles: dict[int, float]
+
+
+_BASE_METRICS = [
+    MetricDesc("total_startup_time", "startup", "Startup time"),
+    MetricDesc("compilation_time", "compilation", "Compilation time"),
+]
+_ENCODER_METRIC = MetricDesc(
+    "encoder_compilation_time",
+    "encoder_compilation",
+    "Encoder compilation time",
+)
+
+
+def _compute_metric(
+    phase: str,
+    desc: MetricDesc,
+    iterations: list[dict[str, float]],
+) -> MetricStats:
+    values = [m[desc.iter_key] for m in iterations]
+    arr = np.array(values)
+    return MetricStats(
+        key=f"{phase}_{desc.suffix}",
+        display_name=desc.display_name,
+        values=values,
+        avg=float(np.mean(arr)),
+        percentiles=dict(zip(PERCENTAGES, np.percentile(arr, PERCENTAGES).tolist())),
+    )
+
+
+def _collect_phase_metrics(
+    phase: str,
+    iterations: list[dict[str, float]],
+    has_encoder: bool,
+) -> list[MetricStats]:
+    metrics = [_compute_metric(phase, desc, iterations) for desc in _BASE_METRICS]
+    if has_encoder:
+        metrics.append(_compute_metric(phase, _ENCODER_METRIC, iterations))
+    return metrics
+
+
+def _print_phase(phase_name: str, metrics: list[MetricStats]) -> None:
+    print(f"\n{phase_name}:")
+    for m in metrics:
+        print(f"Avg {m.display_name.lower()}: {m.avg:.2f} seconds")
+    for m in metrics:
+        print(f"{m.display_name} percentiles:")
+        for pct, val in m.percentiles.items():
+            print(f"  {pct}%: {val:.2f} seconds")
+
+
+def _metric_to_json(m: MetricStats) -> dict[str, Any]:
+    return {
+        f"avg_{m.key}_time": m.avg,
+        f"{m.key}_times": m.values,
+        f"{m.key}_percentiles": m.percentiles,
+    }
+
 
 @contextmanager
 def cold_startup():
@@ -72,6 +148,7 @@ def run_startup_in_subprocess(engine_args, result_queue):
 
         # Extract compilation time if available
         compilation_time = 0.0
+        encoder_compilation_time = 0.0
         if hasattr(llm.llm_engine, "vllm_config"):
             vllm_config = llm.llm_engine.vllm_config
             if (
@@ -79,11 +156,15 @@ def run_startup_in_subprocess(engine_args, result_queue):
                 and vllm_config.compilation_config is not None
             ):
                 compilation_time = vllm_config.compilation_config.compilation_time
+                encoder_compilation_time = (
+                    vllm_config.compilation_config.encoder_compilation_time
+                )
 
         result_queue.put(
             {
                 "total_startup_time": total_startup_time,
                 "compilation_time": compilation_time,
+                "encoder_compilation_time": encoder_compilation_time,
             }
         )
 
@@ -93,65 +174,20 @@ def run_startup_in_subprocess(engine_args, result_queue):
 
 
 def save_to_pytorch_benchmark_format(
-    args: argparse.Namespace, results: dict[str, Any]
+    args: argparse.Namespace, metrics: list[MetricStats]
 ) -> None:
     base_name = os.path.splitext(args.output_json)[0]
-
-    cold_startup_records = convert_to_pytorch_benchmark_format(
-        args=args,
-        metrics={
-            "avg_cold_startup_time": [results["avg_cold_startup_time"]],
-        },
-        extra_info={
-            "cold_startup_times": results["cold_startup_times"],
-            "cold_startup_percentiles": results["cold_startup_percentiles"],
-        },
-    )
-    if cold_startup_records:
-        write_to_json(f"{base_name}.cold_startup.pytorch.json", cold_startup_records)
-
-    cold_compilation_records = convert_to_pytorch_benchmark_format(
-        args=args,
-        metrics={
-            "avg_cold_compilation_time": [results["avg_cold_compilation_time"]],
-        },
-        extra_info={
-            "cold_compilation_times": results["cold_compilation_times"],
-            "cold_compilation_percentiles": results["cold_compilation_percentiles"],
-        },
-    )
-    if cold_compilation_records:
-        write_to_json(
-            f"{base_name}.cold_compilation.pytorch.json", cold_compilation_records
-        )
-
-    warm_startup_records = convert_to_pytorch_benchmark_format(
-        args=args,
-        metrics={
-            "avg_warm_startup_time": [results["avg_warm_startup_time"]],
-        },
-        extra_info={
-            "warm_startup_times": results["warm_startup_times"],
-            "warm_startup_percentiles": results["warm_startup_percentiles"],
-        },
-    )
-    if warm_startup_records:
-        write_to_json(f"{base_name}.warm_startup.pytorch.json", warm_startup_records)
-
-    warm_compilation_records = convert_to_pytorch_benchmark_format(
-        args=args,
-        metrics={
-            "avg_warm_compilation_time": [results["avg_warm_compilation_time"]],
-        },
-        extra_info={
-            "warm_compilation_times": results["warm_compilation_times"],
-            "warm_compilation_percentiles": results["warm_compilation_percentiles"],
-        },
-    )
-    if warm_compilation_records:
-        write_to_json(
-            f"{base_name}.warm_compilation.pytorch.json", warm_compilation_records
+    for m in metrics:
+        records = convert_to_pytorch_benchmark_format(
+            args=args,
+            metrics={f"avg_{m.key}_time": [m.avg]},
+            extra_info={
+                f"{m.key}_times": m.values,
+                f"{m.key}_percentiles": m.percentiles,
+            },
         )
+        if records:
+            write_to_json(f"{base_name}.{m.key}.pytorch.json", records)
 
 
 def add_cli_args(parser: argparse.ArgumentParser):
@@ -224,97 +260,46 @@ def create_llm_and_measure_startup():
     os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
     print("Setting VLLM_ENABLE_V1_MULTIPROCESSING=0 to collect startup metrics.\n")
 
+    # Collect cold startup iterations
     print("Measuring cold startup time...\n")
-    cold_startup_times = []
-    cold_compilation_times = []
+    cold_iterations = []
     for i in tqdm(range(args.num_iters_cold), desc="Cold startup iterations"):
         with cold_startup():
-            metrics = create_llm_and_measure_startup()
-            cold_startup_times.append(metrics["total_startup_time"])
-            cold_compilation_times.append(metrics["compilation_time"])
+            cold_iterations.append(create_llm_and_measure_startup())
 
     # Warmup for warm startup
     print("\nWarming up for warm startup measurement...\n")
     for _ in tqdm(range(args.num_iters_warmup), desc="Warmup iterations"):
         create_llm_and_measure_startup()
 
+    # Collect warm startup iterations
     print("\nMeasuring warm startup time...\n")
-    warm_startup_times = []
-    warm_compilation_times = []
+    warm_iterations = []
     for i in tqdm(range(args.num_iters_warm), desc="Warm startup iterations"):
-        metrics = create_llm_and_measure_startup()
-        warm_startup_times.append(metrics["total_startup_time"])
-        warm_compilation_times.append(metrics["compilation_time"])
-
-    # Calculate statistics
-    cold_startup_array = np.array(cold_startup_times)
-    cold_compilation_array = np.array(cold_compilation_times)
-    warm_startup_array = np.array(warm_startup_times)
-    warm_compilation_array = np.array(warm_compilation_times)
-
-    avg_cold_startup = np.mean(cold_startup_array)
-    avg_cold_compilation = np.mean(cold_compilation_array)
-    avg_warm_startup = np.mean(warm_startup_array)
-    avg_warm_compilation = np.mean(warm_compilation_array)
-
-    percentages = [10, 25, 50, 75, 90, 99]
-    cold_startup_percentiles = np.percentile(cold_startup_array, percentages)
-    cold_compilation_percentiles = np.percentile(cold_compilation_array, percentages)
-    warm_startup_percentiles = np.percentile(warm_startup_array, percentages)
-    warm_compilation_percentiles = np.percentile(warm_compilation_array, percentages)
+        warm_iterations.append(create_llm_and_measure_startup())
 
+    # Determine if encoder compilation occurred in any iteration
+    has_encoder = any(
+        m["encoder_compilation_time"] > 0 for m in cold_iterations + warm_iterations
+    )
+
+    cold_metrics = _collect_phase_metrics("cold", cold_iterations, has_encoder)
+    warm_metrics = _collect_phase_metrics("warm", warm_iterations, has_encoder)
+    all_metrics = cold_metrics + warm_metrics
+
+    # Print results
     print("\n" + "=" * 60)
     print("STARTUP TIME BENCHMARK RESULTS")
     print("=" * 60)
-
-    # Cold startup statistics
-    print("\nCOLD STARTUP:")
-    print(f"Avg total startup time: {avg_cold_startup:.2f} seconds")
-    print(f"Avg compilation time:   {avg_cold_compilation:.2f} seconds")
-    print("Startup time percentiles:")
-    for percentage, percentile in zip(percentages, cold_startup_percentiles):
-        print(f"  {percentage}%: {percentile:.2f} seconds")
-    print("Compilation time percentiles:")
-    for percentage, percentile in zip(percentages, cold_compilation_percentiles):
-        print(f"  {percentage}%: {percentile:.2f} seconds")
-
-    # Warm startup statistics
-    print("\nWARM STARTUP:")
-    print(f"Avg total startup time: {avg_warm_startup:.2f} seconds")
-    print(f"Avg compilation time:   {avg_warm_compilation:.2f} seconds")
-    print("Startup time percentiles:")
-    for percentage, percentile in zip(percentages, warm_startup_percentiles):
-        print(f"  {percentage}%: {percentile:.2f} seconds")
-    print("Compilation time percentiles:")
-    for percentage, percentile in zip(percentages, warm_compilation_percentiles):
-        print(f"  {percentage}%: {percentile:.2f} seconds")
-
+    _print_phase("COLD STARTUP", cold_metrics)
+    _print_phase("WARM STARTUP", warm_metrics)
     print("=" * 60)
 
     # Output JSON results if specified
     if args.output_json:
-        results = {
-            "avg_cold_startup_time": float(avg_cold_startup),
-            "avg_cold_compilation_time": float(avg_cold_compilation),
-            "cold_startup_times": cold_startup_times,
-            "cold_compilation_times": cold_compilation_times,
-            "cold_startup_percentiles": dict(
-                zip(percentages, cold_startup_percentiles.tolist())
-            ),
-            "cold_compilation_percentiles": dict(
-                zip(percentages, cold_compilation_percentiles.tolist())
-            ),
-            "avg_warm_startup_time": float(avg_warm_startup),
-            "avg_warm_compilation_time": float(avg_warm_compilation),
-            "warm_startup_times": warm_startup_times,
-            "warm_compilation_times": warm_compilation_times,
-            "warm_startup_percentiles": dict(
-                zip(percentages, warm_startup_percentiles.tolist())
-            ),
-            "warm_compilation_percentiles": dict(
-                zip(percentages, warm_compilation_percentiles.tolist())
-            ),
-        }
+        results: dict[str, Any] = {}
+        for m in all_metrics:
+            results.update(_metric_to_json(m))
         with open(args.output_json, "w") as f:
             json.dump(results, f, indent=4)
-        save_to_pytorch_benchmark_format(args, results)
+        save_to_pytorch_benchmark_format(args, all_metrics)
diff --git a/vllm/benchmarks/sweep/plot.py b/vllm/benchmarks/sweep/plot.py
index 156e18f697f0..2d3692804446 100644
--- a/vllm/benchmarks/sweep/plot.py
+++ b/vllm/benchmarks/sweep/plot.py
@@ -8,7 +8,7 @@
 from functools import partial
 from pathlib import Path
 from types import TracebackType
-from typing import ClassVar
+from typing import TYPE_CHECKING, ClassVar
 
 from typing_extensions import Self, override
 
@@ -17,20 +17,8 @@
 
 from .utils import sanitize_filename
 
-try:
-    import matplotlib.pyplot as plt
-except ImportError:
-    plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
-
-try:
+if TYPE_CHECKING:
     import pandas as pd
-except ImportError:
-    pd = PlaceholderModule("pandas")
-
-try:
-    import seaborn as sns
-except ImportError:
-    seaborn = PlaceholderModule("seaborn")
 
 
 @dataclass
@@ -265,6 +253,20 @@ def _plot_fig(
     fig_height: float,
     fig_dpi: int,
 ):
+    # Lazy-import matplotlib/pandas/seaborn
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
+    try:
+        import pandas as pd
+    except ImportError:
+        pd = PlaceholderModule("pandas")
+    try:
+        import seaborn as sns
+    except ImportError:
+        sns = PlaceholderModule("seaborn")
+
     fig_group, fig_data = fig_group_data
 
     row_groups = full_groupby(
diff --git a/vllm/benchmarks/sweep/plot_pareto.py b/vllm/benchmarks/sweep/plot_pareto.py
index 365e87f757d1..8ec309a7a106 100644
--- a/vllm/benchmarks/sweep/plot_pareto.py
+++ b/vllm/benchmarks/sweep/plot_pareto.py
@@ -6,7 +6,7 @@
 from dataclasses import dataclass
 from functools import partial
 from pathlib import Path
-from typing import ClassVar
+from typing import TYPE_CHECKING, ClassVar
 
 from vllm.utils.collection_utils import full_groupby
 from vllm.utils.import_utils import PlaceholderModule
@@ -14,20 +14,8 @@
 from .plot import DummyExecutor, _json_load_bytes
 from .utils import sanitize_filename
 
-try:
-    import matplotlib.pyplot as plt
-except ImportError:
-    plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
-
-try:
+if TYPE_CHECKING:
     import pandas as pd
-except ImportError:
-    pd = PlaceholderModule("pandas")
-
-try:
-    import seaborn as sns
-except ImportError:
-    seaborn = PlaceholderModule("seaborn")
 
 
 def _first_present(run_data: dict[str, object], keys: list[str]):
@@ -195,6 +183,20 @@ def _plot_fig(
         print("[END FIGURE]")
         return
 
+    # Lazy-import matplotlib/pandas/seaborn
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
+    try:
+        import pandas as pd
+    except ImportError:
+        pd = PlaceholderModule("pandas")
+    try:
+        import seaborn as sns
+    except ImportError:
+        sns = PlaceholderModule("seaborn")
+
     df = pd.DataFrame.from_records(fig_data)
     df = df.dropna(subset=["tokens_per_user", "tokens_per_gpu"])
 
diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
index 6f878b275959..9f1bf4487e45 100644
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -17,6 +17,7 @@
 
 from vllm.benchmarks.datasets import (
     AIMODataset,
+    ASRDataset,
     BurstGPTDataset,
     ConversationDataset,
     InstructCoderDataset,
@@ -49,21 +50,59 @@ def run_vllm(
     engine_args: EngineArgs,
     do_profile: bool,
     disable_detokenize: bool = False,
+    warmup_requests: list[SampleRequest] | None = None,
+    prequeue_requests: bool = False,
 ) -> tuple[float, list[RequestOutput] | None]:
-    from vllm import LLM, SamplingParams
+    from vllm import LLM
 
     llm = LLM.from_engine_args(engine_args)
+    all_requests = list(warmup_requests or []) + requests
     assert all(
         llm.llm_engine.model_config.max_model_len
         >= (request.prompt_len + request.expected_output_len)
-        for request in requests
+        for request in all_requests
     ), (
         "Please ensure that max_model_len is greater than the sum of"
         " prompt_len and expected_output_len for all requests."
     )
-    # Add the requests to the engine.
+
+    if warmup_requests:
+        print(f"Warming up with {len(warmup_requests)} requests...")
+        _run_vllm_requests(
+            llm,
+            warmup_requests,
+            n,
+            disable_detokenize,
+            do_profile=False,
+            prequeue_requests=prequeue_requests,
+            enable_lora=engine_args.enable_lora,
+        )
+
+    return _run_vllm_requests(
+        llm,
+        requests,
+        n,
+        disable_detokenize,
+        do_profile=do_profile,
+        prequeue_requests=prequeue_requests,
+        enable_lora=engine_args.enable_lora,
+    )
+
+
+def _run_vllm_requests(
+    llm: Any,
+    requests: list[SampleRequest],
+    n: int,
+    disable_detokenize: bool,
+    do_profile: bool,
+    prequeue_requests: bool,
+    enable_lora: bool,
+) -> tuple[float, list[RequestOutput] | None]:
+    from vllm import SamplingParams
+
     prompts: list[TextPrompt | TokensPrompt] = []
     sampling_params: list[SamplingParams] = []
+    lora_requests: list[LoRARequest] | None = [] if enable_lora else None
     for request in requests:
         prompt = (
             TokensPrompt(prompt_token_ids=request.prompt["prompt_token_ids"])
@@ -85,26 +124,42 @@ def run_vllm(
                 detokenize=not disable_detokenize,
             )
         )
-    lora_requests: list[LoRARequest] | None = None
-    if engine_args.enable_lora:
-        lora_requests = [request.lora_request for request in requests]
+        if lora_requests is not None:
+            lora_requests.append(request.lora_request)
 
     use_beam_search = False
 
     outputs = None
     if not use_beam_search:
+        if prequeue_requests:
+            llm.sleep(level=0, mode="abort")
+
         start = time.perf_counter()
         if do_profile:
             llm.start_profile()
-        outputs = llm.generate(
-            prompts, sampling_params, lora_request=lora_requests, use_tqdm=True
-        )
+
+        if prequeue_requests:
+            try:
+                llm.enqueue(
+                    prompts,
+                    sampling_params,
+                    lora_request=lora_requests,
+                    use_tqdm=True,
+                )
+            finally:
+                llm.wake_up(tags=["scheduling"])
+            outputs = llm.wait_for_completion(output_type=RequestOutput, use_tqdm=True)
+        else:
+            outputs = llm.generate(
+                prompts, sampling_params, lora_request=lora_requests, use_tqdm=True
+            )
+
         if do_profile:
             llm.stop_profile()
         end = time.perf_counter()
     else:
         assert lora_requests is None, "BeamSearch API does not support LoRA"
-        prompts = [request.prompt for request in requests]
+        beam_prompts = [request.prompt for request in requests]
         # output_len should be the same for all requests.
         output_len = requests[0].expected_output_len
         for request in requests:
@@ -113,7 +168,7 @@ def run_vllm(
         if do_profile:
             llm.start_profile()
         llm.beam_search(
-            prompts,
+            beam_prompts,
             BeamSearchParams(
                 beam_width=n,
                 max_tokens=output_len,
@@ -132,29 +187,62 @@ def run_vllm_chat(
     engine_args: EngineArgs,
     do_profile: bool,
     disable_detokenize: bool = False,
+    warmup_requests: list[SampleRequest] | None = None,
+    prequeue_requests: bool = False,
 ) -> tuple[float, list[RequestOutput]]:
     """
     Run vLLM chat benchmark. This function is recommended ONLY for benchmarking
     multimodal models as it properly handles multimodal inputs and chat
     formatting. For non-multimodal models, use run_vllm() instead.
     """
-    from vllm import LLM, SamplingParams
+    from vllm import LLM
 
     llm = LLM.from_engine_args(engine_args)
 
+    all_requests = list(warmup_requests or []) + requests
     assert all(
         llm.llm_engine.model_config.max_model_len
         >= (request.prompt_len + request.expected_output_len)
-        for request in requests
+        for request in all_requests
     ), (
         "Please ensure that max_model_len is greater than the sum of "
         "prompt_len and expected_output_len for all requests."
     )
 
-    prompts = []
+    if warmup_requests:
+        print(f"Warming up with {len(warmup_requests)} requests...")
+        _run_vllm_chat_requests(
+            llm,
+            warmup_requests,
+            n,
+            disable_detokenize,
+            do_profile=False,
+            prequeue_requests=prequeue_requests,
+        )
+
+    return _run_vllm_chat_requests(
+        llm,
+        requests,
+        n,
+        disable_detokenize,
+        do_profile=do_profile,
+        prequeue_requests=prequeue_requests,
+    )
+
+
+def _run_vllm_chat_requests(
+    llm: Any,
+    requests: list[SampleRequest],
+    n: int,
+    disable_detokenize: bool,
+    do_profile: bool,
+    prequeue_requests: bool,
+) -> tuple[float, list[RequestOutput]]:
+    from vllm import SamplingParams
+
+    prompts = [request.prompt for request in requests]
     sampling_params: list[SamplingParams] = []
     for request in requests:
-        prompts.append(request.prompt)
         sampling_params.append(
             SamplingParams(
                 n=n,
@@ -165,12 +253,26 @@ def run_vllm_chat(
                 detokenize=not disable_detokenize,
             )
         )
+
+    if prequeue_requests:
+        llm.sleep(level=0, mode="abort")
+
     start = time.perf_counter()
     if do_profile:
         llm.start_profile()
-    outputs = llm.chat(prompts, sampling_params, use_tqdm=True)
+
+    if prequeue_requests:
+        try:
+            llm.enqueue_chat(prompts, sampling_params, use_tqdm=True)
+        finally:
+            llm.wake_up(tags=["scheduling"])
+        outputs = llm.wait_for_completion(output_type=RequestOutput, use_tqdm=True)
+    else:
+        outputs = llm.chat(prompts, sampling_params, use_tqdm=True)
+
     if do_profile:
         llm.stop_profile()
+
     end = time.perf_counter()
     return end - start, outputs
 
@@ -181,8 +283,8 @@ async def run_vllm_async(
     engine_args: AsyncEngineArgs,
     do_profile: bool,
     disable_detokenize: bool = False,
+    warmup_requests: list[SampleRequest] | None = None,
 ) -> float:
-    from vllm import SamplingParams
     from vllm.entrypoints.openai.api_server import (
         build_async_engine_client_from_engine_args,
     )
@@ -191,59 +293,91 @@ async def run_vllm_async(
         engine_args,
     ) as llm:
         model_config = llm.model_config
+        all_requests = list(warmup_requests or []) + requests
         assert all(
             model_config.max_model_len
             >= (request.prompt_len + request.expected_output_len)
-            for request in requests
+            for request in all_requests
         ), (
             "Please ensure that max_model_len is greater than the sum of"
             " prompt_len and expected_output_len for all requests."
         )
 
-        # Add the requests to the engine.
-        prompts: list[TextPrompt | TokensPrompt] = []
-        sampling_params: list[SamplingParams] = []
-        lora_requests: list[LoRARequest | None] = []
-        for request in requests:
-            prompt = (
-                TokensPrompt(prompt_token_ids=request.prompt["prompt_token_ids"])
-                if "prompt_token_ids" in request.prompt
-                else TextPrompt(prompt=request.prompt)
+        if warmup_requests:
+            print(f"Warming up with {len(warmup_requests)} requests...")
+            await _run_vllm_async_requests(
+                llm,
+                warmup_requests,
+                n,
+                disable_detokenize,
+                do_profile=False,
+                request_id_prefix="warmup",
             )
 
-            if request.multi_modal_data:
-                assert isinstance(request.multi_modal_data, dict)
-                prompt["multi_modal_data"] = request.multi_modal_data
-
-            sampling_params.append(
-                SamplingParams(
-                    n=n,
-                    temperature=1.0,
-                    top_p=1.0,
-                    ignore_eos=True,
-                    max_tokens=request.expected_output_len,
-                    detokenize=not disable_detokenize,
-                )
+        elapsed_time, _ = await _run_vllm_async_requests(
+            llm,
+            requests,
+            n,
+            disable_detokenize,
+            do_profile=do_profile,
+            request_id_prefix="test",
+        )
+        return elapsed_time
+
+
+async def _run_vllm_async_requests(
+    llm: Any,
+    requests: list[SampleRequest],
+    n: int,
+    disable_detokenize: bool,
+    do_profile: bool,
+    request_id_prefix: str,
+) -> tuple[float, None]:
+    from vllm import SamplingParams
+
+    prompts: list[TextPrompt | TokensPrompt] = []
+    sampling_params: list[SamplingParams] = []
+    lora_requests: list[LoRARequest | None] = []
+    for request in requests:
+        prompt = (
+            TokensPrompt(prompt_token_ids=request.prompt["prompt_token_ids"])
+            if "prompt_token_ids" in request.prompt
+            else TextPrompt(prompt=request.prompt)
+        )
+
+        if request.multi_modal_data:
+            assert isinstance(request.multi_modal_data, dict)
+            prompt["multi_modal_data"] = request.multi_modal_data
+
+        sampling_params.append(
+            SamplingParams(
+                n=n,
+                temperature=1.0,
+                top_p=1.0,
+                ignore_eos=True,
+                max_tokens=request.expected_output_len,
+                detokenize=not disable_detokenize,
             )
-            prompts.append(prompt)
-            lora_requests.append(request.lora_request)
+        )
+        prompts.append(prompt)
+        lora_requests.append(request.lora_request)
 
-        generators = []
-        start = time.perf_counter()
-        if do_profile:
-            await llm.start_profile()
-        for i, (prompt, sp, lr) in enumerate(
-            zip(prompts, sampling_params, lora_requests)
-        ):
-            generator = llm.generate(prompt, sp, lora_request=lr, request_id=f"test{i}")
-            generators.append(generator)
-        all_gens = merge_async_iterators(*generators)
-        async for i, res in all_gens:
-            pass
-        if do_profile:
-            await llm.stop_profile()
-        end = time.perf_counter()
-        return end - start
+    generators = []
+    start = time.perf_counter()
+    if do_profile:
+        await llm.start_profile()
+    for i, (prompt, sp, lr) in enumerate(zip(prompts, sampling_params, lora_requests)):
+        generator = llm.generate(
+            prompt, sp, lora_request=lr, request_id=f"{request_id_prefix}{i}"
+        )
+        generators.append(generator)
+    all_gens = merge_async_iterators(*generators)
+    async for _i, _res in all_gens:
+        pass
+    if do_profile:
+        await llm.stop_profile()
+    end = time.perf_counter()
+    return end - start, None
 
 
 def run_hf(
@@ -256,6 +390,7 @@ def run_hf(
     disable_detokenize: bool = False,
     dtype: torch.dtype | None = torch.float16,
     enable_torch_compile: bool = False,
+    warmup_requests: list[SampleRequest] | None = None,
 ) -> float:
     assert isinstance(tokenizer, PreTrainedTokenizerBase), (
         "the hf backend only supports HF tokenizers"
@@ -270,6 +405,31 @@ def run_hf(
     if enable_torch_compile:
         llm = torch.compile(llm)
 
+    if warmup_requests:
+        print(f"Warming up with {len(warmup_requests)} requests...")
+        _run_hf_requests(
+            llm,
+            tokenizer,
+            warmup_requests,
+            n,
+            max_batch_size,
+            disable_detokenize,
+        )
+
+    elapsed_time, _ = _run_hf_requests(
+        llm, tokenizer, requests, n, max_batch_size, disable_detokenize
+    )
+    return elapsed_time
+
+
+def _run_hf_requests(
+    llm: Any,
+    tokenizer: PreTrainedTokenizerBase,
+    requests: list[SampleRequest],
+    n: int,
+    max_batch_size: int,
+    disable_detokenize: bool,
+) -> tuple[float, None]:
     pbar = tqdm(total=len(requests))
     start = time.perf_counter()
     batch: list[str] = []
@@ -314,8 +474,9 @@ def run_hf(
         batch = []
         max_prompt_len = 0
         max_output_len = 0
+    pbar.close()
     end = time.perf_counter()
-    return end - start
+    return end - start, None
 
 
 def save_to_pytorch_benchmark_format(
@@ -392,28 +553,53 @@ def get_requests(args, tokenizer):
     elif args.dataset_name == "hf":
         if args.output_len is not None:
             sample_kwargs["output_len"] = args.output_len
-        if args.dataset_path in VisionArenaDataset.SUPPORTED_DATASET_PATHS:
+        common_kwargs["hf_name"] = args.hf_name
+        if (
+            args.dataset_path in VisionArenaDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in VisionArenaDataset.SUPPORTED_DATASET_PATHS
+        ):
             dataset_cls = VisionArenaDataset
             common_kwargs["dataset_subset"] = None
             common_kwargs["dataset_split"] = "train"
             sample_kwargs["enable_multimodal_chat"] = True
-        elif args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS:
+        elif (
+            args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in InstructCoderDataset.SUPPORTED_DATASET_PATHS
+        ):
             dataset_cls = InstructCoderDataset
             common_kwargs["dataset_split"] = "train"
-        elif args.dataset_path in MultiModalConversationDataset.SUPPORTED_DATASET_PATHS:
+        elif (
+            args.dataset_path in MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
+        ):
             dataset_cls = MultiModalConversationDataset
             common_kwargs["dataset_subset"] = args.hf_subset
             common_kwargs["dataset_split"] = args.hf_split
             sample_kwargs["enable_multimodal_chat"] = True
-        elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS:
+        elif (
+            args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in ConversationDataset.SUPPORTED_DATASET_PATHS
+        ):
             dataset_cls = ConversationDataset
             common_kwargs["dataset_subset"] = args.hf_subset
             common_kwargs["dataset_split"] = args.hf_split
             sample_kwargs["enable_multimodal_chat"] = True
-        elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS:
+        elif (
+            args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in AIMODataset.SUPPORTED_DATASET_PATHS
+        ):
             dataset_cls = AIMODataset
             common_kwargs["dataset_subset"] = None
             common_kwargs["dataset_split"] = "train"
+        elif (
+            args.dataset_path in ASRDataset.SUPPORTED_DATASET_PATHS
+            or args.hf_name in ASRDataset.SUPPORTED_DATASET_PATHS
+        ):
+            dataset_cls = ASRDataset
+            common_kwargs["dataset_subset"] = args.hf_subset
+            common_kwargs["dataset_split"] = args.hf_split
+            sample_kwargs["asr_min_audio_len_sec"] = args.asr_min_audio_len_sec
+            sample_kwargs["asr_max_audio_len_sec"] = args.asr_max_audio_len_sec
     elif args.dataset_name == "prefix_repetition":
         dataset_cls = PrefixRepetitionRandomDataset
         sample_kwargs["prefix_len"] = args.prefix_repetition_prefix_len
@@ -517,6 +703,10 @@ def validate_args(args):
     valid_backends = {"vllm", "hf", "mii", "vllm-chat"}
     if args.backend not in valid_backends:
         raise ValueError(f"Unsupported backend: {args.backend}")
+    if args.prequeue_requests and args.backend not in {"vllm", "vllm-chat"}:
+        raise ValueError("--prequeue-requests requires --backend vllm or vllm-chat")
+    if args.prequeue_requests and args.async_engine:
+        raise ValueError("--prequeue-requests is not supported with --async-engine")
 
     # === Dataset Configuration ===
     if (
@@ -550,6 +740,10 @@ def validate_args(args):
             VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys()
             | MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
             | ConversationDataset.SUPPORTED_DATASET_PATHS
+        ) or args.hf_name in (
+            VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys()
+            | MultiModalConversationDataset.SUPPORTED_DATASET_PATHS
+            | ConversationDataset.SUPPORTED_DATASET_PATHS
         ):
             assert args.backend == "vllm-chat", (
                 f"{args.dataset_path} needs to use vllm-chat as the backend."
@@ -557,6 +751,11 @@ def validate_args(args):
         elif args.dataset_path in (
             InstructCoderDataset.SUPPORTED_DATASET_PATHS
             | AIMODataset.SUPPORTED_DATASET_PATHS
+            | ASRDataset.SUPPORTED_DATASET_PATHS
+        ) or args.hf_name in (
+            InstructCoderDataset.SUPPORTED_DATASET_PATHS
+            | AIMODataset.SUPPORTED_DATASET_PATHS
+            | ASRDataset.SUPPORTED_DATASET_PATHS
         ):
             assert args.backend == "vllm", (
                 f"{args.dataset_path} needs to use vllm as the backend."
@@ -730,6 +929,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
     parser.add_argument(
         "--num-prompts", type=int, default=1000, help="Number of prompts to process."
     )
+    parser.add_argument(
+        "--num-warmups",
+        type=int,
+        default=0,
+        help="Number of warmup prompts to process before the timed benchmark.",
+    )
     parser.add_argument(
         "--hf-max-batch-size",
         type=int,
@@ -754,6 +959,20 @@ def add_cli_args(parser: argparse.ArgumentParser):
         default=False,
         help="Use vLLM async engine rather than LLM class.",
     )
+    parser.add_argument(
+        "--prequeue-requests",
+        action="store_true",
+        default=False,
+        help=(
+            "For the vLLM backends, enqueue all requests before allowing the "
+            "scheduler to process them. This can improve benchmark "
+            "reproducibility by removing overlap between request rendering "
+            "and engine scheduling, but may reduce measured throughput. "
+            "Request rendering is typically fast relative to scheduling and "
+            "processing; the intended use case of this flag is multimodal "
+            "benchmarks with time-consuming image rendering."
+        ),
+    )
     parser.add_argument(
         "--disable-detokenize",
         action="store_true",
@@ -787,7 +1006,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
         "context in a request (default: 0).",
     )
 
-    # hf dtaset
+    # hf dataset
     parser.add_argument(
         "--hf-subset",
         type=str,
@@ -800,6 +1019,17 @@ def add_cli_args(parser: argparse.ArgumentParser):
         default=None,
         help="Split of the HF dataset.",
     )
+    parser.add_argument(
+        "--hf-name",
+        type=str,
+        default=None,
+        help=(
+            "Name of the dataset on HuggingFace "
+            "(e.g., 'lmms-lab/LLaVA-OneVision-Data'). "
+            "Specify this when --dataset-path is a local filesystem path "
+            "so the benchmark can identify the correct dataset class."
+        ),
+    )
     parser.add_argument(
         "--profile",
         action="store_true",
@@ -841,6 +1071,20 @@ def add_cli_args(parser: argparse.ArgumentParser):
     add_random_dataset_base_args(parser)
     add_random_multimodal_dataset_args(parser)
 
+    # ASR dataset
+    parser.add_argument(
+        "--asr-min-audio-len-sec",
+        type=float,
+        default=0.0,
+        help="Minimum audio duration in seconds for ASR dataset filtering.",
+    )
+    parser.add_argument(
+        "--asr-max-audio-len-sec",
+        type=float,
+        default=float("inf"),
+        help="Maximum audio duration in seconds for ASR dataset filtering.",
+    )
+
     parser = AsyncEngineArgs.add_cli_args(parser)
 
 
@@ -861,6 +1105,14 @@ def main(args: argparse.Namespace):
         tokenizer_mode=args.tokenizer_mode,
         trust_remote_code=args.trust_remote_code,
     )
+    num_warmups = args.num_warmups
+    warmup_requests: list[SampleRequest] | None = None
+    if num_warmups > 0:
+        warmup_args = argparse.Namespace(**vars(args))
+        warmup_args.num_prompts = num_warmups
+        warmup_args.seed += 1
+        warmup_requests = get_requests(warmup_args, tokenizer)
+
     requests = get_requests(args, tokenizer)
     is_multi_modal = any(request.multi_modal_data is not None for request in requests)
     request_outputs: list[RequestOutput] | None = None
@@ -873,6 +1125,7 @@ def main(args: argparse.Namespace):
                     AsyncEngineArgs.from_cli_args(args),
                     disable_detokenize=args.disable_detokenize,
                     do_profile=args.profile,
+                    warmup_requests=warmup_requests,
                 )
             )
         else:
@@ -882,6 +1135,8 @@ def main(args: argparse.Namespace):
                 EngineArgs.from_cli_args(args),
                 disable_detokenize=args.disable_detokenize,
                 do_profile=args.profile,
+                warmup_requests=warmup_requests,
+                prequeue_requests=args.prequeue_requests,
             )
     elif args.backend == "hf":
         assert args.tensor_parallel_size == 1
@@ -897,6 +1152,7 @@ def main(args: argparse.Namespace):
             args.disable_detokenize,
             dtype=args.dtype,
             enable_torch_compile=args.hf_enable_torch_compile,
+            warmup_requests=warmup_requests,
         )
     elif args.backend == "vllm-chat":
         elapsed_time, request_outputs = run_vllm_chat(
@@ -905,6 +1161,8 @@ def main(args: argparse.Namespace):
             EngineArgs.from_cli_args(args),
             disable_detokenize=args.disable_detokenize,
             do_profile=args.profile,
+            warmup_requests=warmup_requests,
+            prequeue_requests=args.prequeue_requests,
         )
     else:
         raise ValueError(f"Unknown backend: {args.backend}")
diff --git a/vllm/collect_env.py b/vllm/collect_env.py
index 0cf5681bcf54..1b94adba87ee 100644
--- a/vllm/collect_env.py
+++ b/vllm/collect_env.py
@@ -46,6 +46,17 @@
         "nvidia_driver_version",
         "nvidia_gpu_models",
         "cudnn_version",
+        "xpu_available",
+        "xpu_runtime_version",
+        "intel_graphics_compiler_version",
+        "intel_gpu_models",
+        "oneapi_compiler_version",
+        "level_zero_loader_version",
+        "level_zero_driver_version",
+        "oneccl_version",
+        "libigdgmm_version",
+        "vllm_xpu_kernels_version",
+        "sycl_version",
         "pip_version",  # 'pip' or 'pip3'
         "pip_packages",
         "conda_packages",
@@ -277,6 +288,134 @@ def get_rocm_version(run_lambda):
     )
 
 
+def get_xpu_available():
+    if TORCH_AVAILABLE and hasattr(torch, "xpu") and torch.xpu.is_available():
+        return True
+    return False
+
+
+def get_xpu_runtime_version():
+    if TORCH_AVAILABLE and hasattr(torch.version, "xpu"):
+        return torch.version.xpu
+    return None
+
+
+def get_pkg_version(run_lambda, pkg):
+    assert get_platform() == "linux"
+
+    if pkg == "vllm_xpu_kernels":
+        rc, out, _ = run_lambda("pip show vllm-xpu-kernels")
+        if rc == 0:
+            match = re.search(r"Version: (.*)", out)
+            return match.group(1).strip() if match else None
+        return None
+
+    pkg_map = {
+        "igc": ["intel-igc-core", "libigc2", "libigc1"],
+        "level_zero_loader": ["level-zero", "libze1"],
+        "level_zero_driver": ["libze-intel-gpu1", "intel-level-zero-gpu"],
+        "oneccl": ["intel-oneapi-ccl", "oneccl"],
+        "libigdgmm": ["libigdgmm12", "libigdgmm"],
+    }
+
+    pkg_candidates = pkg_map.get(pkg, [])
+    if not pkg_candidates:
+        return None
+
+    mgr_name = None
+    for mgr in ["dpkg", "dnf", "yum", "zypper"]:
+        rc, _, _ = run_lambda(f"which {mgr}")
+        if rc == 0:
+            mgr_name = mgr
+            break
+
+    if not mgr_name:
+        return None
+
+    ret = ""
+    index = -1
+
+    for pkg_name in pkg_candidates:
+        if not pkg_name:
+            continue
+
+        cmd = ""
+        if mgr_name in ["dnf", "yum"]:
+            index = 1
+            cmd = f"{mgr_name} list | grep -w {pkg_name}"
+        elif mgr_name == "zypper":
+            index = 2
+            cmd = f"{mgr_name} info {pkg_name} | grep Version"
+        elif mgr_name == "dpkg":
+            index = 2
+            cmd = f"{mgr_name} -l | grep -w {pkg_name}"
+
+        if cmd:
+            out = run_and_read_all(run_lambda, cmd)
+            if out:
+                ret = out.splitlines()[0]
+                break
+
+    if not ret or index == -1:
+        return None
+
+    lst = re.sub(" +", " ", ret).strip().split(" ")
+    if len(lst) > index:
+        return lst[index]
+
+    return None
+
+
+def get_intel_graphics_compiler_version(run_lambda):
+    """Return Intel Graphics Compiler (IGC) version."""
+    return get_pkg_version(run_lambda, "igc")
+
+
+def get_level_zero_loader_version(run_lambda):
+    """Return Level Zero loader runtime version."""
+    return get_pkg_version(run_lambda, "level_zero_loader")
+
+
+def get_level_zero_driver_version(run_lambda):
+    """Return Level Zero driver version."""
+    return get_pkg_version(run_lambda, "level_zero_driver")
+
+
+def get_oneapi_ccl_version(run_lambda):
+    """Return oneAPI Collective Communications Library (oneCCL) version."""
+    return get_pkg_version(run_lambda, "oneccl")
+
+
+def get_libigdgmm_version(run_lambda):
+    return get_pkg_version(run_lambda, "libigdgmm")
+
+
+def get_vllm_xpu_kernels_version(run_lambda):
+    return get_pkg_version(run_lambda, "vllm_xpu_kernels")
+
+
+def get_intel_gpu_models():
+    if TORCH_AVAILABLE and hasattr(torch, "xpu") and torch.xpu.is_available():
+        device_count = torch.xpu.device_count()
+        return "\n".join(
+            "GPU {}: {}".format(i, torch.xpu.get_device_name(i))
+            for i in range(device_count)
+        )
+    return None
+
+
+def get_oneapi_compiler_version(run_lambda):
+    """Return Intel oneAPI DPC++/C++ Compiler version via icpx."""
+    return run_and_parse_first_match(
+        run_lambda, "icpx --version", r"oneAPI DPC\+\+/C\+\+ Compiler (\S+)"
+    )
+
+
+def get_sycl_version(run_lambda):
+    """Return SYCL/DPC++ compiler build version."""
+    return run_and_parse_first_match(run_lambda, "icpx --version", r"\((\d[\d.]+)\)")
+
+
 def get_vllm_version():
     from vllm import __version__, __version_tuple__
 
@@ -298,11 +437,12 @@ def get_vllm_version():
 
 
 def summarize_vllm_build_flags():
-    # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc.
-    return "CUDA Archs: {}; ROCm: {}".format(
+    flags = "CUDA Archs: {}; ROCm: {}; XPU: {}".format(
         os.environ.get("TORCH_CUDA_ARCH_LIST", "Not Set"),
         "Enabled" if os.environ.get("ROCM_HOME") else "Disabled",
+        "Enabled" if get_xpu_available() else "Disabled",
     )
+    return flags
 
 
 def get_gpu_topo(run_lambda):
@@ -574,6 +714,13 @@ def get_env_vars():
         "OMP_",
         "MKL_",
         "NVIDIA",
+        "ZE_",
+        "ONEAPI_",
+        "SYCL_",
+        "NEOReadDebugKeys",
+        "IGC_",
+        "CCL_",
+        "I_MPI_",
     )
     for k, v in os.environ.items():
         if any(term in k.lower() for term in secret_terms):
@@ -637,6 +784,17 @@ def get_version_or_na(cfg, prefix):
         nvidia_gpu_models=get_gpu_info(run_lambda),
         nvidia_driver_version=get_nvidia_driver_version(run_lambda),
         cudnn_version=get_cudnn_version(run_lambda),
+        xpu_available=str(get_xpu_available()),
+        xpu_runtime_version=get_xpu_runtime_version(),
+        intel_graphics_compiler_version=get_intel_graphics_compiler_version(run_lambda),
+        intel_gpu_models=get_intel_gpu_models(),
+        oneapi_compiler_version=get_oneapi_compiler_version(run_lambda),
+        level_zero_loader_version=get_level_zero_loader_version(run_lambda),
+        level_zero_driver_version=get_level_zero_driver_version(run_lambda),
+        oneccl_version=get_oneapi_ccl_version(run_lambda),
+        libigdgmm_version=get_libigdgmm_version(run_lambda),
+        vllm_xpu_kernels_version=get_vllm_xpu_kernels_version(run_lambda),
+        sycl_version=get_sycl_version(run_lambda),
         hip_compiled_version=hip_compiled_version,
         hip_runtime_version=hip_runtime_version,
         miopen_runtime_version=miopen_runtime_version,
@@ -676,26 +834,15 @@ def get_version_or_na(cfg, prefix):
 Is debug build               : {is_debug_build}
 CUDA used to build PyTorch   : {cuda_compiled_version}
 ROCM used to build PyTorch   : {hip_compiled_version}
+XPU used to build PyTorch    : {xpu_runtime_version}
 
 ==============================
       Python Environment
 ==============================
 Python version               : {python_version}
 Python platform              : {python_platform}
-
-==============================
-       CUDA / GPU Info
-==============================
-Is CUDA available            : {is_cuda_available}
-CUDA runtime version         : {cuda_runtime_version}
-CUDA_MODULE_LOADING set to   : {cuda_module_loading}
-GPU models and configuration : {nvidia_gpu_models}
-Nvidia driver version        : {nvidia_driver_version}
-cuDNN version                : {cudnn_version}
-HIP runtime version          : {hip_runtime_version}
-MIOpen runtime version       : {miopen_runtime_version}
-Is XNNPACK available         : {is_xnnpack_available}
-
+    
+{gpu_info}
 ==============================
           CPU Info
 ==============================
@@ -790,6 +937,35 @@ def maybe_start_on_next_line(string):
         if envinfo.cuda_compiled_version is None:
             mutable_dict["cuda_compiled_version"] = "None"
 
+    # If the machine doesn't have XPU, report XPU fields as 'No XPU'
+    dynamic_xpu_fields = [
+        "intel_graphics_compiler_version",
+        "intel_gpu_models",
+        "level_zero_loader_version",
+        "level_zero_driver_version",
+        "oneccl_version",
+        "libigdgmm_version",
+        "vllm_xpu_kernels_version",
+    ]
+    all_xpu_fields = dynamic_xpu_fields + [
+        "oneapi_compiler_version",
+        "sycl_version",
+    ]
+    all_dynamic_xpu_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_xpu_fields
+    )
+    xpu_available = mutable_dict.get("xpu_available") == "True"
+    if not xpu_available and all_dynamic_xpu_fields_missing:
+        for field in all_xpu_fields:
+            mutable_dict[field] = "No XPU"
+    if envinfo.xpu_runtime_version is None or envinfo.xpu_runtime_version == "N/A":
+        mutable_dict["xpu_runtime_version"] = "N/A"
+
+    # If intel_gpu_models is multiline, start on the next line
+    mutable_dict["intel_gpu_models"] = maybe_start_on_next_line(
+        mutable_dict.get("intel_gpu_models")
+    )
+
     # Replace True with Yes, False with No
     mutable_dict = replace_bools(mutable_dict)
 
@@ -811,6 +987,62 @@ def maybe_start_on_next_line(string):
             mutable_dict["conda_packages"], "[conda] "
         )
     mutable_dict["cpu_info"] = envinfo.cpu_info
+
+    CUDA_FMT = """
+==============================
+       CUDA / GPU Info
+==============================
+Is CUDA available            : {is_cuda_available}
+CUDA runtime version         : {cuda_runtime_version}
+CUDA_MODULE_LOADING set to   : {cuda_module_loading}
+GPU models and configuration : {nvidia_gpu_models}
+Nvidia driver version        : {nvidia_driver_version}
+cuDNN version                : {cudnn_version}
+HIP runtime version          : {hip_runtime_version}
+MIOpen runtime version       : {miopen_runtime_version}
+Is XNNPACK available         : {is_xnnpack_available}
+""".strip()
+
+    XPU_FMT = """
+==============================
+      Intel XPU / GPU Info
+==============================
+Is XPU available             : {xpu_available}
+XPU runtime version          : {xpu_runtime_version}
+Intel GPU models             : {intel_gpu_models}
+
+--Compile time--
+oneAPI compiler version      : {oneapi_compiler_version}
+SYCL compiler build          : {sycl_version}
+oneCCL version               : {oneccl_version}
+
+--Runtime--
+Intel Graphics Compiler (IGC): {intel_graphics_compiler_version}
+Intel GMM (libigdgmm)        : {libigdgmm_version}
+Level Zero loader version    : {level_zero_loader_version}
+Level Zero driver version    : {level_zero_driver_version}
+vLLM XPU kernels version     : {vllm_xpu_kernels_version}
+""".strip()
+
+    invalid_vers = {"N/A", "Could not collect", "None"}
+    sections = []
+
+    if (
+        mutable_dict.get("is_cuda_available") in ("True", "Yes")
+        or mutable_dict.get("cuda_compiled_version") not in invalid_vers
+    ):
+        sections.append(CUDA_FMT)
+
+    if (
+        mutable_dict.get("xpu_available") in ("True", "Yes")
+        or mutable_dict.get("xpu_runtime_version") not in invalid_vers
+    ):
+        sections.append(XPU_FMT)
+
+    mutable_dict["gpu_info"] = (
+        ("\n\n".join(sections) + "\n").format(**mutable_dict) if sections else ""
+    )
+
     return env_info_fmt.format(**mutable_dict)
 
 
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index dee7cdde744d..5a67415f1030 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -23,6 +23,10 @@
 from torch.fx._lazy_graph_module import _use_lazy_graph_module
 
 import vllm.envs as envs
+from vllm.compilation.codegen import (
+    compile_execution_fn,
+    generate_execution_code,
+)
 from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
 from vllm.config.compilation import DynamicShapesType
 from vllm.config.utils import Range, hash_factors
@@ -31,6 +35,7 @@
 from vllm.platforms import current_platform
 from vllm.tracing import instrument, instrument_manual
 from vllm.utils.import_utils import resolve_obj_by_qualname
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 from .compiler_interface import (
     CompilerInterface,
@@ -45,6 +50,7 @@
     should_split,
 )
 from .passes.inductor_pass import InductorPass, pass_context
+from .passes.ir.inplace_functionalization import VllmIRInplaceFunctionalizationPass
 from .passes.pass_manager import PostGradPassManager
 
 logger = init_logger(__name__)
@@ -264,6 +270,7 @@ def compile(
         compile_range: Range,
         graph_index: int = 0,
         num_graphs: int = 1,
+        is_encoder: bool = False,
     ) -> Any:
         if graph_index == 0:
             # before compiling the first graph, record the start time
@@ -273,6 +280,7 @@ def compile(
         compilation_counter.num_backend_compilations += 1
 
         compiled_graph = None
+        handle = None
 
         # try to load from the cache
         compiled_graph = self.load(graph, example_inputs, graph_index, compile_range)
@@ -281,13 +289,11 @@ def compile(
                 # after loading the last graph for this shape, record the time.
                 # there can be multiple graphs due to piecewise compilation.
                 elapsed = time.perf_counter() - compilation_start_time
-                compilation_config.compilation_time += elapsed
                 logger.info_once(
                     "Directly load the compiled graph(s) for compile range %s "
                     "from the cache, took %.3f s",
                     str(compile_range),
                     elapsed,
-                    scope="local",
                 )
             return compiled_graph
 
@@ -353,7 +359,7 @@ def autograd_cache_key(*args, **kwargs):
                     )
                 except StopCompiling:
                     assert cache_key is not None
-                    return self.loaded_artifacts[cache_key]
+                    compiled_graph = self.loaded_artifacts[cache_key]
             if cache_key is not None and compiled_graph is not None:
                 self.loaded_artifacts[cache_key] = compiled_graph
 
@@ -372,7 +378,6 @@ def autograd_cache_key(*args, **kwargs):
                 logger.info_once(
                     "Cache the graph of compile range %s for later use",
                     str(compile_range),
-                    scope="local",
                 )
             logger.debug_once(
                 "Store the %s-th graph for compile range%s from %s via handle %s",
@@ -380,18 +385,15 @@ def autograd_cache_key(*args, **kwargs):
                 str(compile_range),
                 self.compiler.name,
                 handle,
-                scope="local",
             )
 
         # after compiling the last graph, record the end time
         if graph_index == num_graphs - 1:
             elapsed = time.perf_counter() - compilation_start_time
-            compilation_config.compilation_time += elapsed
             logger.info_once(
                 "Compiling a graph for compile range %s takes %.2f s",
                 str(compile_range),
                 elapsed,
-                scope="local",
             )
 
         return compiled_graph
@@ -515,16 +517,31 @@ def _decompose_size_nodes(graph: fx.GraphModule) -> None:
                     )
 
         # Replace size node in each user's args.
-        # Dynamo always passes size as a direct arg: view(clone, size)
-        # → view(clone, d0, d1, ...)
         for user in list(node.users):
-            new_args = []
-            for arg in user.args:
-                if arg is node:
-                    new_args.extend(dims)
-                else:
-                    new_args.append(arg)
-            user.args = tuple(new_args)
+            if (
+                user.op == "call_function"
+                and user.target is operator.getitem
+                and len(user.args) == 2
+                and user.args[0] is node
+            ):
+                # getitem(size, idx) → replace with dims[idx] directly.
+                idx = user.args[1]
+                assert isinstance(idx, int), (
+                    f"Expected literal int index for getitem on size(), "
+                    f"got {type(idx).__name__}: {idx}"
+                )
+                user.replace_all_uses_with(dims[idx])
+                graph.graph.erase_node(user)
+            else:
+                # User consumes the full size tuple (e.g. view(clone, size))
+                # → view(clone, d0, d1, ...)
+                new_args = []
+                for arg in user.args:
+                    if arg is node:
+                        new_args.extend(dims)
+                    else:
+                        new_args.append(arg)
+                user.args = tuple(new_args)
         graph.graph.erase_node(node)
 
 
@@ -575,11 +592,14 @@ def split_graph(
     # the semantics of the graph will change when we
     # have mutations in the graph
     with _use_lazy_graph_module(True):
+        has_tuple_return = is_torch_equal_or_newer("2.12.0.dev")
+        tuple_return_kwarg = {"tuple_return": True} if has_tuple_return else {}
         split_gm = torch.fx.passes.split_module.split_module(
             graph,
             None,
             lambda node: node_to_subgraph_id[node],
             keep_original_order=True,
+            **tuple_return_kwarg,
         )
 
     outputs = []
@@ -907,6 +927,24 @@ def collect_standalone_compile_artifacts(
         return standalone_compile_artifacts, sym_shape_indices_map, returns_tuple_map
 
     def configure_post_pass(self) -> None:
+        # TODO proper PassManager?
+        pre_grad_pass_key = "pre_grad_custom_pass"
+        assert self.pass_key != pre_grad_pass_key
+        assert pre_grad_pass_key not in self.inductor_config
+        self.inductor_config[pre_grad_pass_key] = VllmIRInplaceFunctionalizationPass(
+            self.vllm_config
+        )
+
+        # Make sure pre_grad_custom_pass is not pickled
+        # as part of AOTAutograd built-in cache key
+        # TODO(luka) is there a cleaner way to do this
+        import torch._inductor.config as inductor_config
+
+        ignore = inductor_config._cache_config_ignore_prefix + [pre_grad_pass_key]
+        assert "_cache_config_ignore_prefix" not in self.inductor_config
+        self.inductor_config["_cache_config_ignore_prefix"] = ignore
+
+        # Configure the (nominally post-grad) pass manager
         self.pass_manager.configure(self.vllm_config)
 
         # Post-grad custom passes are run using the post_grad_custom_post_pass
@@ -998,11 +1036,11 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
         )
         hash_content = []
         for filepath in forward_code_files:
-            hash_content.append(filepath)
             if filepath == "<string>":
                 # This means the function was dynamically generated, with
                 # e.g. exec(). We can't actually check these.
                 continue
+            hash_content.append(filepath)
             try:
                 with open(filepath) as f:
                     hash_content.append(f.read())
@@ -1046,12 +1084,11 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
         disable_cache = disable_cache or is_ngram_gpu_enabled
 
         if disable_cache:
-            logger.info_once("vLLM's torch.compile cache is disabled.", scope="local")
+            logger.info_once("vLLM's torch.compile cache is disabled.")
         else:
             logger.info_once(
                 "Using cache directory: %s for vLLM's torch.compile",
                 local_cache_dir,
-                scope="local",
             )
 
         self.compiler_manager.initialize_cache(
@@ -1109,9 +1146,9 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
 
         dynamo_time = time.perf_counter() - torch_compile_start_time
         logger.info_once(
-            "Dynamo bytecode transform time: %.2f s", dynamo_time, scope="local"
+            "Dynamo bytecode transform time: %.2f s",
+            dynamo_time,
         )
-        self.compilation_config.compilation_time += dynamo_time
 
         # Record Dynamo time in tracing if available
         start_time = int(torch_compile_start_time * 1e9)
@@ -1186,7 +1223,6 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
             logger.info_once(
                 "Saved compiler manager cache in %.2f seconds.",
                 elapsed,
-                scope="local",
             )
 
         from torch._guards import detect_fake_mode
@@ -1225,15 +1261,25 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
             with open(graph_path, "w") as f:
                 f.write(src)
 
-            logger.debug_once(
-                "Computation graph saved to %s", graph_path, scope="local"
-            )
+            logger.debug_once("Computation graph saved to %s", graph_path)
 
         self._called = True
         graph_to_serialize = (
             original_split_gm if envs.VLLM_USE_MEGA_AOT_ARTIFACT else self.graph
         )
 
+        execution_code, submod_names, consts = generate_execution_code(self.split_gm)
+        # Use getattr to get correct callables: __dict__ has PiecewiseBackend
+        # instances (from PiecewiseCompileInterpreter), _modules has originals.
+        # getattr checks __dict__ first, then falls back to _modules.
+        submod_callables = {
+            name: getattr(self.split_gm, name)
+            for name, _ in self.split_gm.named_children()
+        }
+        runtime_callable = compile_execution_fn(
+            execution_code, submod_callables, submod_names, consts
+        )
+
         if (
             self.compilation_config.cudagraph_mode == CUDAGraphMode.NONE
             or not self.compilation_config.cudagraph_copy_inputs
@@ -1242,9 +1288,12 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
                 graph_to_serialize,
                 example_inputs,
                 self.prefix,
-                self.split_gm,
+                runtime_callable,
                 is_encoder=self.is_encoder,
                 vllm_backend=self,
+                execution_code=execution_code,
+                submod_names=submod_names,
+                consts=consts,
             )
 
         # index of tensors that have symbolic shapes (batch size)
@@ -1265,7 +1314,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
         copy_and_call = make_copy_and_call(
             sym_tensor_indices,
             [example_inputs[x].clone() for x in sym_tensor_indices],
-            self.split_gm,
+            runtime_callable,
         )
 
         return VllmSerializableFunction(
@@ -1276,4 +1325,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
             is_encoder=self.is_encoder,
             vllm_backend=self,
             sym_tensor_indices=sym_tensor_indices,
+            execution_code=execution_code,
+            submod_names=submod_names,
+            consts=consts,
         )
diff --git a/vllm/compilation/breakable_cudagraph.py b/vllm/compilation/breakable_cudagraph.py
new file mode 100644
index 000000000000..6da3ec717861
--- /dev/null
+++ b/vllm/compilation/breakable_cudagraph.py
@@ -0,0 +1,424 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Breakable CUDA graph capture/replay.
+
+This is an alternative to :class:`CUDAGraphWrapper` that replaces vLLM's
+torch.compile-based FX graph splitting with runtime stream-capture
+breaks.
+
+The idea (inspired by sgl-project/sglang#19102): instead of pre-splitting
+the model into many pieces at attention boundaries, a
+single capture context drives the whole forward and intercepts
+attention / kv-cache custom ops at the dispatcher to end the current
+stream capture, run the op eagerly, and resume capture.
+
+The captured artifact is a list of zero-arg callables -- the bound
+``CUDAGraph.replay`` for graph segments, or the user fn for eager
+segments -- replayed in order at inference time.
+
+Eager segments must operate on the same static buffers used during
+capture so subsequent graph segments read the same memory addresses.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import functools
+import gc
+import threading
+import weakref
+from collections.abc import Callable
+from typing import Any, ClassVar, TypeVar
+
+import torch
+
+import vllm.envs as envs
+from vllm.compilation.monitor import validate_cudagraph_capturing_enabled
+from vllm.config import CUDAGraphMode, VllmConfig
+from vllm.distributed.device_communicators.pynccl_allocator import set_graph_pool_id
+from vllm.forward_context import (
+    BatchDescriptor,
+    get_forward_context,
+    is_forward_context_available,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.offloader.base import get_offloader
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import weak_ref_tensor, weak_ref_tensors
+
+logger = init_logger(__name__)
+
+
+def is_breakable_cudagraph_enabled() -> bool:
+    return bool(envs.VLLM_USE_BREAKABLE_CUDAGRAPH)
+
+
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+def eager_break_during_capture(fn: F) -> F:
+    """Decorator that turns a custom-op Python kernel into a "break point"
+    for the breakable cudagraph capture.
+
+    When the decorated function is invoked outside of a
+    :class:`BreakableCUDAGraphCapture` context, it executes normally.
+
+    When invoked inside a capture context, it ends the current cudagraph
+    segment, runs the function eagerly on the capture stream, records the
+    callable for replay, and starts a fresh segment.
+
+    **In-place output buffer required.** Decorated ops must write into a
+    caller-provided output tensor; a fresh tensor returned by ``fn`` would
+    change address each replay and break downstream graph segments.
+
+    **Decorator order matters.** Apply as the *outermost* decorator if
+    there are other decorators that introduce host-side side effects
+    around the call -- the canonical example is
+    ``@maybe_transfer_kv_layer`` for PD-disaggregation, whose
+    ``wait_for_layer_load`` and ``save_kv_layer`` calls must run in the
+    eager segment, not inside the captured cudagraph. Putting
+    ``@eager_break_during_capture`` *inside* such a decorator would
+    record those side effects into the graph and hang on replay.
+
+    The correct order is::
+
+        @eager_break_during_capture   # outermost
+        @maybe_transfer_kv_layer
+        def unified_attention_with_output(...):
+            ...
+    """
+    if not is_breakable_cudagraph_enabled():
+        return fn
+
+    @functools.wraps(fn)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        capture = BreakableCUDAGraphCapture.current()
+        if capture is None:
+            return fn(*args, **kwargs)
+        if not capture._capturing:
+            return fn(*args, **kwargs)
+        if is_forward_context_available():
+            mode = get_forward_context().cudagraph_runtime_mode
+            if mode == CUDAGraphMode.FULL:
+                return fn(*args, **kwargs)
+
+        # Weak-ref args: strong refs in the replay lambda pin cudagraph-pool
+        # slots across batch descriptors. cudagraph owns the slot, so the
+        # weak_ref is safe to deref on replay.
+        weak_args = tuple(
+            weak_ref_tensor(a) if isinstance(a, torch.Tensor) else a for a in args
+        )
+        weak_kwargs = {
+            k: weak_ref_tensor(v) if isinstance(v, torch.Tensor) else v
+            for k, v in kwargs.items()
+        }
+        return capture.add_eager(lambda: fn(*weak_args, **weak_kwargs))
+
+    return wrapper  # type: ignore[return-value]
+
+
+# ---------------------------------------------------------------------------
+# Capture context
+# ---------------------------------------------------------------------------
+
+
+class BreakableCUDAGraphCapture:
+    """Stream-capture context that supports eager breaks via :meth:`add_eager`.
+
+    Usage::
+
+        cap = BreakableCUDAGraphCapture(pool=...)
+        with cap:
+            output = model(*static_inputs)
+        # Later, after copying new inputs into the static buffers:
+        cap.replay()
+        # Output tensors live at the same addresses as during capture.
+
+    Thread-local: only one capture may be active per thread.
+    """
+
+    _tls = threading.local()
+
+    @classmethod
+    def current(cls) -> BreakableCUDAGraphCapture | None:
+        return getattr(cls._tls, "active", None)
+
+    @classmethod
+    def is_active(cls) -> bool:
+        return cls.current() is not None
+
+    def __init__(self, pool: Any | None = None) -> None:
+        self.pool = pool
+        self.segments: list[Callable[[], Any]] = []
+        self._num_graphs: int = 0
+        self._num_eager_breaks: int = 0
+        self._current_graph: torch.cuda.CUDAGraph | None = None
+        self._capturing: bool = False
+
+    # --- context manager protocol ----------------------------------------
+
+    def __enter__(self) -> BreakableCUDAGraphCapture:
+        if getattr(BreakableCUDAGraphCapture._tls, "active", None) is not None:
+            raise RuntimeError("Nested BreakableCUDAGraphCapture is not supported.")
+        BreakableCUDAGraphCapture._tls.active = self
+        self._begin_segment()
+        return self
+
+    def __exit__(self, exc_type, exc, tb) -> None:
+        try:
+            self._end_segment()
+        finally:
+            BreakableCUDAGraphCapture._tls.active = None
+
+    # --- segment management ----------------------------------------------
+
+    def _begin_segment(self) -> None:
+        assert not self._capturing
+        g = torch.cuda.CUDAGraph()
+        if self.pool is not None:
+            g.capture_begin(pool=self.pool)
+        else:
+            g.capture_begin()
+        self._current_graph = g
+        self._capturing = True
+
+    def _end_segment(self) -> None:
+        if not self._capturing:
+            return
+        assert self._current_graph is not None
+        self._current_graph.capture_end()
+        self.segments.append(self._current_graph.replay)
+        self._num_graphs += 1
+        self._current_graph = None
+        self._capturing = False
+
+    def add_eager(self, fn: Callable[[], Any]) -> Any:
+        """End the current capture segment, run ``fn`` eagerly on the
+        capture stream, record ``fn`` for replay, and start a new segment.
+
+        Returns whatever ``fn`` returned during this (capture-time) call.
+        Replay does not return values; callers should propagate any
+        downstream dependencies via static output buffers.
+        """
+        self._end_segment()
+        result = fn()
+        self.segments.append(fn)
+        self._num_eager_breaks += 1
+        self._begin_segment()
+        return result
+
+    # --- replay ----------------------------------------------------------
+
+    def replay(self) -> None:
+        for r in self.segments:
+            r()
+
+    # --- introspection ---------------------------------------------------
+
+    @property
+    def num_graphs(self) -> int:
+        return self._num_graphs
+
+    @property
+    def num_eager_breaks(self) -> int:
+        return self._num_eager_breaks
+
+    def __repr__(self) -> str:
+        return (
+            f"BreakableCUDAGraphCapture(graphs={self.num_graphs}, "
+            f"eager_breaks={self.num_eager_breaks})"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Wrapper that mirrors CUDAGraphWrapper's interface
+# ---------------------------------------------------------------------------
+
+
+@dataclasses.dataclass
+class _BreakableEntry:
+    batch_descriptor: BatchDescriptor
+    capture: BreakableCUDAGraphCapture | None = None
+    output: Any = None
+    input_addresses: list[int] | None = None
+
+
+class BreakableCUDAGraphWrapper:
+    """Drop-in replacement for :class:`CUDAGraphWrapper` that uses
+    :class:`BreakableCUDAGraphCapture` instead of a single monolithic
+    ``torch.cuda.graph()`` capture.
+
+    Same dispatch contract as ``CUDAGraphWrapper``:
+        * If no ``forward_context`` is available, run the underlying
+          callable eagerly.
+        * If runtime mode mismatch / NONE, run eagerly.
+        * Otherwise, lazily capture per ``batch_descriptor`` and replay
+          on subsequent invocations with the same descriptor.
+    """
+
+    _all_instances: ClassVar[weakref.WeakSet[BreakableCUDAGraphWrapper]] = (
+        weakref.WeakSet()
+    )
+
+    @classmethod
+    def clear_all_graphs(cls) -> None:
+        for instance in list(cls._all_instances):
+            instance.clear_graphs()
+
+    def __init__(
+        self,
+        runnable: Callable[..., Any],
+        vllm_config: VllmConfig,
+    ) -> None:
+        # Unlike the original CUDAGraphWrapper which strictly matches a
+        # single runtime_mode, this wrapper captures whatever the
+        # dispatcher emits (any non-NONE runtime_mode) -- breakable's
+        # capture is identical for prefill and decode, so there's nothing
+        # to dispatch on at the runtime_mode level. Entries are keyed by
+        # BatchDescriptor which already encodes batch shape / uniformity.
+        self.runnable = runnable
+        self.vllm_config = vllm_config
+        self.compilation_config = vllm_config.compilation_config
+        self.graph_pool = current_platform.get_global_graph_pool()
+        self.is_debugging_mode = envs.VLLM_LOGGING_LEVEL == "DEBUG"
+
+        self.entries: dict[BatchDescriptor, _BreakableEntry] = {}
+        BreakableCUDAGraphWrapper._all_instances.add(self)
+
+        logger.info_once("Breakable CUDA graph enabled")
+
+    # --- vllm-style attribute forwarding ---------------------------------
+
+    def __getattr__(self, key: str) -> Any:
+        runnable = self.__dict__.get("runnable")
+        if runnable is not None and hasattr(runnable, key):
+            return getattr(runnable, key)
+        raise AttributeError(key)
+
+    def unwrap(self) -> Callable[..., Any]:
+        return self.runnable
+
+    @property
+    def cudagraph_wrapper(self) -> BreakableCUDAGraphWrapper:
+        return self
+
+    def clear_graphs(self) -> None:
+        self.entries.clear()
+
+    # --- dispatch --------------------------------------------------------
+
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        if not is_forward_context_available():
+            return self.runnable(*args, **kwargs)
+
+        forward_context = get_forward_context()
+        batch_descriptor = forward_context.batch_descriptor
+        cudagraph_runtime_mode = forward_context.cudagraph_runtime_mode
+
+        # Capture whenever the dispatcher says "some cudagraph mode" --
+        # breakable produces the same artifact regardless of PIECEWISE
+        # vs FULL, so we match either. Entries are keyed by batch
+        # descriptor, which already encodes prefill/decode distinctions.
+        if cudagraph_runtime_mode == CUDAGraphMode.NONE:
+            return self.runnable(*args, **kwargs)
+
+        assert batch_descriptor is not None
+        entry = self.entries.get(batch_descriptor)
+        if entry is None:
+            entry = _BreakableEntry(batch_descriptor=batch_descriptor)
+            self.entries[batch_descriptor] = entry
+
+        if entry.capture is None:
+            return self._capture(entry, args, kwargs)
+        return self._replay(entry, args, kwargs)
+
+    # --- capture / replay paths -----------------------------------------
+
+    @staticmethod
+    def _collect_tensor_addresses(
+        args: tuple[Any, ...], kwargs: dict[str, Any]
+    ) -> list[int]:
+        """Flatten tensor data_ptrs from positional and keyword args in a
+        stable order (positionals first, then kwargs in insertion order).
+
+        Used for the DEBUG-mode address-stability check; covers both call
+        styles since vLLM models are typically invoked with kwargs.
+        """
+        addrs = [x.data_ptr() for x in args if isinstance(x, torch.Tensor)]
+        addrs.extend(
+            v.data_ptr() for v in kwargs.values() if isinstance(v, torch.Tensor)
+        )
+        return addrs
+
+    def _capture(
+        self,
+        entry: _BreakableEntry,
+        args: tuple[Any, ...],
+        kwargs: dict[str, Any],
+    ) -> Any:
+        validate_cudagraph_capturing_enabled()
+
+        entry.input_addresses = self._collect_tensor_addresses(args, kwargs)
+
+        if self.graph_pool is not None:
+            set_graph_pool_id(self.graph_pool)
+        else:
+            set_graph_pool_id(current_platform.graph_pool_handle())
+
+        # Match torch.cuda.graph()'s pre-capture cleanup once per descriptor.
+        # We drive capture_begin/end directly and bypass torch.cuda.graph(),
+        # so its built-in gc + empty_cache never fire. Run them here once
+        # per _capture call -- NOT inside _begin_segment, since this capture
+        # session may issue many begin/end pairs (one per layer's break),
+        # and repeated gc would tank capture time the way it did for the
+        # pre-`gc_disable` piecewise path.
+        gc.collect()
+        torch.accelerator.empty_cache()
+        # Sync the offloader's copy stream before capture so any in-flight
+        # pre-capture prefetches are complete and don't leak into the graph.
+        get_offloader().sync_prev_onload()
+
+        capture = BreakableCUDAGraphCapture(pool=self.graph_pool)
+        with capture:
+            output = self.runnable(*args, **kwargs)
+            # Join the offloader's copy stream while we still hold the last
+            # segment open, so the join is captured into the graph (otherwise
+            # we get an "unjoined stream" error on subsequent forwards).
+            get_offloader().join_after_forward()
+            # Convert output to a weak ref *inside* the capture context so the
+            # strong ref is dropped before the last segment closes, letting
+            # the cudagraph pool reclaim/reuse that memory immediately for
+            # the next batch descriptor's capture.
+            output = weak_ref_tensors(output)
+
+        entry.capture = capture
+        entry.output = weak_ref_tensors(output)
+
+        logger.debug(
+            "Captured breakable cudagraph for %s: %r",
+            entry.batch_descriptor,
+            capture,
+        )
+        # Return the (already-weak) output from the captured run so the
+        # caller of model(...) gets a tensor pointing at the cudagraph pool's memory
+        return output
+
+    def _replay(
+        self,
+        entry: _BreakableEntry,
+        args: tuple[Any, ...],
+        kwargs: dict[str, Any],
+    ) -> Any:
+        if self.is_debugging_mode and entry.input_addresses is not None:
+            new_addresses = self._collect_tensor_addresses(args, kwargs)
+            assert new_addresses == entry.input_addresses, (
+                "Input tensor addresses changed between capture and replay "
+                f"for {entry.batch_descriptor}. Expected "
+                f"{entry.input_addresses}, got {new_addresses}."
+            )
+        # Sync the offloader's copy stream before replay so any external
+        # dependencies from pre-capture prefetches are satisfied.
+        get_offloader().sync_prev_onload()
+        assert entry.capture is not None
+        entry.capture.replay()
+        return entry.output
diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py
index c089f02a37ff..62da2d9de35b 100644
--- a/vllm/compilation/caching.py
+++ b/vllm/compilation/caching.py
@@ -16,6 +16,7 @@
 from torch.utils import _pytree as pytree
 
 import vllm.envs as envs
+from vllm.compilation.codegen import compile_execution_fn
 from vllm.compilation.compiler_interface import get_inductor_factors
 from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig, get_current_vllm_config
@@ -176,7 +177,7 @@ class VllmSerializableFunction(SerializableCallable):  # type: ignore[misc]
 
     def __init__(
         self,
-        graph_module: torch.fx.GraphModule,
+        graph_module: torch.fx.GraphModule | bytes,
         example_inputs: Sequence[Any],
         prefix: str,
         optimized_call: Callable[..., Any],
@@ -184,8 +185,10 @@ def __init__(
         vllm_backend: Any | None = None,
         sym_tensor_indices: list[int] | None = None,
         aot_autograd_config: dict[str, Any] | None = None,
+        execution_code: str | None = None,
+        submod_names: list[str] | None = None,
+        consts: list[Any] | None = None,
     ) -> None:
-        assert isinstance(graph_module, torch.fx.GraphModule)
         self.graph_module = graph_module
         self.example_inputs = example_inputs
         self.prefix = prefix
@@ -194,6 +197,9 @@ def __init__(
         self.shape_env = None
         self.vllm_backend = vllm_backend
         self.sym_tensor_indices = sym_tensor_indices
+        self.execution_code = execution_code
+        self.submod_names = submod_names
+        self.consts = consts
         self._fake_mode: Any | None = None
 
         import torch._functorch.config as functorch_config
@@ -298,10 +304,6 @@ def deserialize_compile_artifacts(cls, data: bytes) -> "VllmSerializableFunction
         state = pickle.loads(data)
         fake_mode = FakeTensorMode(shape_env=ShapeEnv())
 
-        state["graph_module"] = cls.deserialize_graph_module(
-            state["graph_module"], fake_mode
-        )
-        state["graph_module"].recompile()
         state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode)
 
         standalone_compile_artifacts = state.pop("standalone_compile_artifacts", None)
@@ -327,6 +329,7 @@ def deserialize_compile_artifacts(cls, data: bytes) -> "VllmSerializableFunction
                     vllm_config=get_current_vllm_config(),
                     sym_shape_indices_map=sym_shape_indices_map,
                     returns_tuple_map=returns_tuple_map,
+                    fake_mode=fake_mode,
                 )
 
             logger.info(
@@ -338,6 +341,11 @@ def deserialize_compile_artifacts(cls, data: bytes) -> "VllmSerializableFunction
 
             return fn
 
+        state["graph_module"] = cls.deserialize_graph_module(
+            state["graph_module"], fake_mode
+        )
+        state["graph_module"].recompile()
+
         # Fall back to standard VllmBackend.
         # Use a lazy closure: the backend needs traced_files for cache
         # dir computation, but those are only populated after
@@ -406,6 +414,7 @@ def reconstruct_serializable_fn_from_mega_artifact(
     vllm_config: VllmConfig,
     sym_shape_indices_map: dict[str, list[int]],
     returns_tuple_map: dict[str, bool],
+    fake_mode: FakeTensorMode,
 ) -> "VllmSerializableFunction":
     """Construct a VllmSerializableFunction from cached inductor artifacts.
 
@@ -448,12 +457,11 @@ def reconstruct_serializable_fn_from_mega_artifact(
 
     prefix = state["prefix"]
     is_encoder = state.get("is_encoder", False)
-    split_gm = state["graph_module"]
     compilation_config = vllm_config.compilation_config
 
     standalone_compile_artifacts.load_all()
 
-    submod_names = standalone_compile_artifacts.submodule_names()
+    piecewise_submod_names = standalone_compile_artifacts.submodule_names()
     compiled_callables: dict[str, dict[str, Callable[..., Any]]] = {}
 
     for cache_key in standalone_compile_artifacts.submodule_bytes:
@@ -472,14 +480,17 @@ def reconstruct_serializable_fn_from_mega_artifact(
     )
 
     # spot check that cached submodules exist in the graph structure
-    graph_children = {name for name, _ in split_gm.named_children()}
-    missing = set(submod_names) - graph_children
+    # if an old cache is used, this will fail but that's fine because
+    # we will just try this error and re-generate the new cache.
+    graph_children = set(state["submod_names"])
+    missing = set(piecewise_submod_names) - graph_children
     assert not missing, (
         f"artifacts reference submodules not in graph: {missing}. "
         f"graph has: {sorted(graph_children)}"
     )
 
-    for i, submod_name in enumerate(submod_names):
+    submod_callables = {}
+    for i, submod_name in enumerate(piecewise_submod_names):
         assert submod_name in sym_shape_indices_map and submod_name in returns_tuple_map
 
         sym_shape_indices = sym_shape_indices_map[submod_name]
@@ -490,7 +501,7 @@ def reconstruct_serializable_fn_from_mega_artifact(
             graph=None,  # not needed for cached artifacts
             vllm_config=vllm_config,
             piecewise_compile_index=i,
-            total_piecewise_compiles=len(submod_names),
+            total_piecewise_compiles=len(piecewise_submod_names),
             sym_shape_indices=sym_shape_indices,
             vllm_backend=vllm_backend,
             returns_tuple=returns_tuple,
@@ -498,7 +509,7 @@ def reconstruct_serializable_fn_from_mega_artifact(
         )
 
         is_first = i == 0
-        is_last = i == len(submod_names) - 1
+        is_last = i == len(piecewise_submod_names) - 1
         wrapped_backend = wrap_with_cudagraph_if_needed(
             piecewise_backend,
             vllm_config,
@@ -507,12 +518,28 @@ def reconstruct_serializable_fn_from_mega_artifact(
             is_last,
         )
 
-        split_gm.__dict__[submod_name] = wrapped_backend
+        submod_callables[submod_name] = wrapped_backend
         logger.debug(
             "Replaced submodule %s with piecewise backend from cache",
             submod_name,
         )
 
+    # Use codegen'd execution code if available, fall back to split_gm
+    execution_code = state.get("execution_code")
+    submod_names = state.get("submod_names")
+    if execution_code is not None and submod_names is not None:
+        consts = state.get("consts")
+        runtime_callable = compile_execution_fn(
+            execution_code, submod_callables, submod_names, consts
+        )
+    else:
+        logger.warning(
+            "No execution code found, falling back to graph module execution."
+        )
+        runtime_callable = GraphPickler.loads(
+            state["graph_module"], fake_mode=fake_mode
+        )
+
     if compilation_config.cudagraph_copy_inputs:
         sym_tensor_indices = state["sym_tensor_indices"]
         input_buffers = [
@@ -521,9 +548,11 @@ def reconstruct_serializable_fn_from_mega_artifact(
             )
             for idx in sym_tensor_indices
         ]
-        optimized_call = make_copy_and_call(sym_tensor_indices, input_buffers, split_gm)
+        optimized_call = make_copy_and_call(
+            sym_tensor_indices, input_buffers, runtime_callable
+        )
     else:
-        optimized_call = split_gm
+        optimized_call = runtime_callable
 
     fn = VllmSerializableFunction(
         **state,
diff --git a/vllm/compilation/codegen.py b/vllm/compilation/codegen.py
new file mode 100644
index 000000000000..67e9ac843a64
--- /dev/null
+++ b/vllm/compilation/codegen.py
@@ -0,0 +1,235 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Code generation for split_gm stitching graph execution.
+
+Generates a plain Python function that replaces the FX GraphModule's
+interpreter-based execution of the stitching graph, eliminating
+nn.Module.__call__ overhead and __getattr__ dispatch.
+"""
+
+import operator
+from collections.abc import Callable
+from functools import partial
+from typing import Any
+
+import torch.fx
+from torch._dynamo.utils import dynamo_timed
+from torch._logging import trace_structured
+from torch.fx.node import _get_qualified_name
+
+
+def generate_execution_code_with_name(
+    split_gm: torch.fx.GraphModule,
+    fn_name: str,
+    with_submod: bool,
+    consts: list[Any] | None = None,
+    const_index: dict[int, int] | None = None,
+) -> tuple[str, list[str], list[Any]]:
+    lines: list[str] = []
+    param_names: list[str] = []
+    submod_names: list[str] = []
+    submod_index: dict[str, int] = {}
+    if consts is None:
+        consts = []
+    if const_index is None:
+        const_index = {}
+
+    # Build node ordering for liveness analysis.
+    nodes = list(split_gm.graph.nodes)
+    node_order = {node: i for i, node in enumerate(nodes)}
+    inlined_submods: list[str] = []
+
+    # For each value-producing node, find the position of its last consumer.
+    # If the last consumer is the output node, skip (return handles cleanup).
+    # Otherwise, schedule a del after that consumer to free memory early.
+    del_after: dict[int, list[str]] = {}  # position -> names to delete
+    for node in nodes:
+        if node.op == "output":
+            continue
+        users = list(node.users.keys())
+        if not users:
+            continue
+        last_user = max(users, key=lambda u: node_order[u])
+        if last_user.op == "output":
+            continue
+        del_after.setdefault(node_order[last_user], []).append(node.name)
+
+    def ref(arg: Any) -> str:
+        return _node_ref(arg, consts, const_index)
+
+    for i, node in enumerate(nodes):
+        if node.op == "placeholder":
+            param_names.append(node.name)
+
+        elif node.op == "call_module":
+            target = node.target
+            if not with_submod:
+                raise RuntimeError(
+                    f"call_module is not allowed for codegen target {target}."
+                )
+            if target not in submod_index:
+                submod_index[target] = len(submod_names)
+                submod_names.append(target)
+            idx = submod_index[target]
+            args_str = ", ".join(ref(a) for a in node.args)
+            kwargs_str = ", ".join(f"{k}={ref(v)}" for k, v in node.kwargs.items())
+            all_args = ", ".join(filter(None, [args_str, kwargs_str]))
+            submod = getattr(split_gm, target)
+            if isinstance(submod, torch.fx.GraphModule):
+                callable_name = f"__vllm_inlined_submods__{idx}"
+                inlined_code, _, _ = generate_execution_code_with_name(
+                    submod,
+                    callable_name,
+                    with_submod=False,
+                    consts=consts,
+                    const_index=const_index,
+                )
+                inlined_submods.append(inlined_code)
+            else:
+                callable_name = f"__vllm_submods__[{idx}]"
+            lines.append(f"    {node.name} = {callable_name}({all_args})")
+
+        elif node.op == "call_function":
+            if node.target is operator.getitem:
+                source = ref(node.args[0])
+                index = node.args[1]
+                assert isinstance(index, int)
+                lines.append(f"    {node.name} = {source}[{index}]")
+            else:
+                args_str = ", ".join(ref(a) for a in node.args)
+                kwargs_str = ", ".join(f"{k}={ref(v)}" for k, v in node.kwargs.items())
+                all_args = ", ".join(filter(None, [args_str, kwargs_str]))
+                lines.append(
+                    f"    {node.name} = {_get_qualified_name(node.target)}({all_args})"
+                )
+
+        elif node.op == "output":
+            assert len(node.args) == 1
+            ret = ref(node.args[0])
+            lines.append(f"    return {ret}")
+
+        else:
+            raise RuntimeError(f"Unsupported node from codegen: {node.format_node()}")
+
+        # Emit del for variables whose last use was this node.
+        if i in del_after and i < len(nodes) - 2:
+            names = sorted(del_after[i])
+            lines.append(f"    del {', '.join(names)}")
+
+    assert len(param_names) > 0
+    params = ", ".join(param_names)
+    kw_params = ", *, __vllm_submods__" if with_submod else ""
+    header = f"\ndef {fn_name}({params}{kw_params}):"
+    return (
+        "".join(inlined_submods) + "\n".join([header] + lines) + "\n",
+        submod_names,
+        consts,
+    )
+
+
+@dynamo_timed("vllm.generate_execution_code")
+def generate_execution_code(
+    split_gm: torch.fx.GraphModule,
+) -> tuple[str, list[str], list[Any]]:
+    """Generate Python source code from a split_gm's stitching graph.
+
+    Walks split_gm.graph.nodes and produces a function that calls
+    submodules via a __vllm_submods__ list, avoiding FX GraphModule overhead
+    and dict lookup cost.
+
+    Non-primitive constant arguments (e.g. torch.device, DTensor placement
+    types) are collected into a constants list and referenced by index
+    in the generated code, avoiding reliance on repr() being eval-able.
+
+    If a submodule is a plain torch.fx.GraphModule, it is inlined directly
+    in the generated code and we do not need to serialize it in the artifact.
+
+    Args:
+        split_gm: The split graph module produced by split_graph().
+
+    Returns:
+        A tuple of (code, submod_names, consts) where code is the Python
+        source, submod_names is the ordered list of submodule target names
+        corresponding to list indices used in the generated code, and
+        consts is a list of non-primitive constant objects referenced
+        by the generated code via __vllm_consts__. These objects are
+        kept alive for the lifetime of the compiled function.
+    """
+    code, submod_names, consts = generate_execution_code_with_name(
+        split_gm, "execution_fn", with_submod=True
+    )
+    return "import torch\nimport operator\n" + code, submod_names, consts
+
+
+@dynamo_timed("vllm.compile_execution_fn")
+def compile_execution_fn(
+    code: str,
+    submod_callables: dict[str, Callable[..., Any]],
+    submod_names: list[str],
+    consts: list[Any] | None = None,
+) -> Callable[..., Any]:
+    """Compile execution code and bind submodule callables.
+
+    Args:
+        code: Python source from generate_execution_code().
+        submod_callables: Mapping of submodule names to their callables.
+        submod_names: Ordered list of submodule names matching the indices
+            used in the generated code.
+        consts: List of non-primitive constant objects referenced by the
+            generated code via __vllm_consts__. None for legacy cached
+            code that predates this feature.
+
+    Returns:
+        A callable that executes the stitching logic.
+    """
+    trace_structured(
+        "artifact",
+        metadata_fn=lambda: {
+            "name": "vllm_execution_code",
+            "encoding": "string",
+        },
+        payload_fn=lambda: code,
+    )
+    namespace: dict[str, Any] = {}
+    if consts is not None:
+        namespace["__vllm_consts__"] = consts
+    exec(code, namespace)  # noqa: S102
+    fn = namespace["execution_fn"]
+    # Using .get() is intentional here because only piecewise backend will
+    # be stored in submod_callables. The other submodules are inlined and
+    # we don't need to bind them to the execution function. Instead, we
+    # should use None as placeholder to ensure the list indices are preserved
+    # for better debuggability.
+    submods_list = [submod_callables.get(name) for name in submod_names]
+    return partial(fn, __vllm_submods__=submods_list)
+
+
+def _node_ref(arg: Any, consts: list[Any], const_index: dict[int, int]) -> str:
+    """Convert an FX node argument to a source code reference."""
+    if isinstance(arg, torch.fx.Node):
+        return arg.name
+    if isinstance(arg, list):
+        return f"[{', '.join(_node_ref(x, consts, const_index) for x in arg)}]"
+    if isinstance(arg, tuple):
+        items = ", ".join(_node_ref(x, consts, const_index) for x in arg)
+        return f"({items},)" if len(arg) == 1 else f"({items})"
+    if isinstance(arg, dict):
+        return (
+            "{"
+            + ", ".join(
+                f"{_node_ref(k, consts, const_index)}: "
+                f"{_node_ref(v, consts, const_index)}"
+                for k, v in arg.items()
+            )
+            + "}"
+        )
+    if isinstance(arg, (int, float, bool, str, bytes, type(None))):
+        return repr(arg)
+    # Dedup by identity, not equality: safe because FX graph args
+    # are live for the entire code-generation pass. Objects stored
+    # here must be picklable (for compile-artifact caching).
+    key = id(arg)
+    if key not in const_index:
+        const_index[key] = len(consts)
+        consts.append(arg)
+    return f"__vllm_consts__[{const_index[key]}]"
diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py
index bddacfbbc295..2348ff3191b7 100644
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -16,6 +16,7 @@
 from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig
 from vllm.config.utils import Range
+from vllm.env_override import _apply_constrain_to_fx_strides_patch
 from vllm.logger import init_logger
 from vllm.utils.hashing import safe_hash
 from vllm.utils.torch_utils import is_torch_equal_or_newer
@@ -140,6 +141,10 @@ class AlwaysHitShapeEnv:
 
     def __init__(self) -> None:
         self.guards: list[Any] = []
+        # Read by torch._inductor.codecache.FxGraphHashDetails (torch>=2.11)
+        # to incorporate user-provided dynamic-shape hint overrides into the
+        # cache key. We never override hints, so an empty dict is correct.
+        self.var_to_hint_override: dict[Any, int] = {}
 
     def evaluate_guards_expression(self, *args: Any, **kwargs: Any) -> Literal[True]:
         return True
@@ -151,6 +156,17 @@ def produce_guards_expression(self, *args: Any, **kwargs: Any) -> Literal[""]:
         return ""
 
 
+def _get_vllm_functorch_config() -> dict[str, Any]:
+    """Return the functorch config overrides that vLLM applies at compile time.
+
+    Used by both set_functorch_config() and get_inductor_factors() to ensure
+    the compile-time config and cache key are always consistent."""
+    cfg: dict[str, Any] = {}
+    if not envs.VLLM_USE_MEGA_AOT_ARTIFACT:
+        cfg["bundled_autograd_cache"] = False
+    return cfg
+
+
 def get_inductor_factors() -> list[Any]:
     factors: list[Any] = []
     # summarize system state
@@ -164,6 +180,13 @@ def get_inductor_factors() -> list[Any]:
 
     torch_factors = torch_key()
     factors.append(torch_factors)
+
+    from torch._functorch import config as functorch_config
+    from torch._inductor import config as inductor_config
+
+    factors.append(inductor_config.save_config_portable())
+    with functorch_config.patch(_get_vllm_functorch_config()):
+        factors.append(functorch_config.save_config_portable())
     return factors
 
 
@@ -225,48 +248,6 @@ def _save(
     logger.debug("Patched %s.save for atomic writes (torch < 2.10)", cls.__name__)
 
 
-def _patch_constrain_to_fx_strides() -> contextlib.AbstractContextManager:
-    """Context manager that patches inductor's ``constrain_to_fx_strides``
-    to handle opaque (non-tensor) arguments.
-
-    The original calls ``.stride()`` on every FX arg's meta value, which
-    crashes on ``FakeScriptObject`` (the compile-time proxy for hoisted
-    opaque types).  The patched version skips args whose meta value is
-    not a ``torch.Tensor``.
-
-    Returns ``nullcontext`` on torch < 2.11.
-    Upstream issue: https://github.com/pytorch/pytorch/issues/175973
-    """
-    if not is_torch_equal_or_newer("2.11.0.dev"):
-        return contextlib.nullcontext()
-
-    import torch._inductor.ir as _ir
-    import torch._inductor.lowering as _lowering
-    from torch._inductor.virtualized import V as _V
-
-    def _patched(fx_node, *args, **kwargs):
-        def apply_constraint(arg, fx_arg):
-            if isinstance(arg, _ir.IRNode):
-                meta_val = fx_arg.meta.get("val")
-                if isinstance(meta_val, torch.Tensor):
-                    stride_order = _ir.get_stride_order(
-                        meta_val.stride(), _V.graph.sizevars.shape_env
-                    )
-                    return _ir.ExternKernel.require_stride_order(arg, stride_order)
-                return arg
-            if isinstance(arg, dict):
-                return {key: apply_constraint(arg[key], fx_arg[key]) for key in arg}
-            return arg
-
-        args = tuple(
-            apply_constraint(arg, fx_arg) for arg, fx_arg in zip(args, fx_node.args)
-        )
-        kwargs = {k: apply_constraint(v, fx_node.kwargs[k]) for k, v in kwargs.items()}
-        return args, kwargs
-
-    return patch.object(_lowering, "constrain_to_fx_strides", _patched)
-
-
 class InductorStandaloneAdaptor(CompilerInterface):
     """
     The adaptor for the Inductor compiler.
@@ -304,6 +285,7 @@ def compile(
         compile_range: Range,
         key: str | None = None,
     ) -> tuple[Callable[..., Any] | None, Any | None]:
+        _apply_constrain_to_fx_strides_patch()
         compilation_counter.num_inductor_compiles += 1
         current_config = {}
         if compiler_config is not None:
@@ -335,6 +317,9 @@ def compile(
             },
         }
 
+        if is_torch_equal_or_newer("2.13.0.dev"):
+            compile_kwargs["donate_graph_module"] = True  # type: ignore[assignment]
+
         use_aot: bool = supports_aot and envs.VLLM_USE_MEGA_AOT_ARTIFACT
         # only add 'aot' parameter if both supported and enabled...
         # this will set bundled_autograd_cache
@@ -345,9 +330,9 @@ def compile(
         # Inductor's pre-grad passes don't do anything for vLLM.
         # The pre-grad passes get run even on cache-hit and negatively impact
         # vllm cold compile times by O(1s)
-        # Can remove this after the following issue gets fixed
+        # Fixed upstream in PyTorch 2.12:
         # https://github.com/pytorch/pytorch/issues/174502
-        if envs.VLLM_ENABLE_PREGRAD_PASSES:
+        if is_torch_equal_or_newer("2.12.0.dev") or envs.VLLM_ENABLE_PREGRAD_PASSES:
             pregrad_ctx: Any = contextlib.nullcontext()
         else:
             pregrad_ctx = patch(
@@ -387,7 +372,7 @@ def compile(
         else:
             fake_mode_ctx = contextlib.nullcontext()
 
-        with pregrad_ctx, fake_mode_ctx, _patch_constrain_to_fx_strides():
+        with pregrad_ctx, fake_mode_ctx:
             compiled_graph = standalone_compile(graph, example_inputs, **compile_kwargs)
 
         if use_aot:
@@ -502,6 +487,7 @@ def compile(
         compile_range: Range,
         key: str | None = None,
     ) -> tuple[Callable[..., Any] | None, Any | None]:
+        _apply_constrain_to_fx_strides_patch()
         compilation_counter.num_inductor_compiles += 1
         from torch._inductor.compile_fx import compile_fx
 
@@ -630,7 +616,6 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
             stack.enter_context(
                 torch._functorch.config.patch(enable_remote_autograd_cache=False)
             )
-            stack.enter_context(_patch_constrain_to_fx_strides())
 
             # Clear the tracing context before calling compile_fx.
             # vLLM calls compile_fx from within a PiecewiseCompileInterpreter
@@ -776,8 +761,8 @@ def set_inductor_config(config: dict[str, Any], compile_range: Range) -> None:
 
 
 def set_functorch_config() -> None:
-    if not envs.VLLM_USE_MEGA_AOT_ARTIFACT:
-        torch._functorch.config.bundled_autograd_cache = False
+    for k, v in _get_vllm_functorch_config().items():
+        setattr(torch._functorch.config, k, v)
 
 
 class EagerAdaptor(CompilerInterface):
diff --git a/vllm/compilation/cuda_graph.py b/vllm/compilation/cuda_graph.py
index 00bf4bbc71f1..b63d86199720 100644
--- a/vllm/compilation/cuda_graph.py
+++ b/vllm/compilation/cuda_graph.py
@@ -290,9 +290,14 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any | None:
                     # across layers will make the cudagraph capture very slow.
                     # therefore, we only run gc for the first graph,
                     # and disable gc for the rest of the graphs.
-                    stack.enter_context(patch("gc.collect", lambda: None))
                     stack.enter_context(
-                        patch("torch.accelerator.empty_cache", lambda: None)
+                        patch("gc.collect", lambda *args, **kwargs: None)
+                    )
+                    stack.enter_context(
+                        patch(
+                            "torch.accelerator.empty_cache",
+                            lambda *args, **kwargs: None,
+                        )
                     )
 
                 if self.graph_pool is not None:
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index ab52d544c61b..ad710a6fe3fc 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -32,6 +32,9 @@
 
 from .monitor import monitor_profiling_run, monitor_torch_compile
 
+# shape_id parameter was added to mark_unbacked in PyTorch 2.11.0
+_SUPPORTS_SHAPE_ID = is_torch_equal_or_newer("2.11.0")
+
 if TYPE_CHECKING:
     # Only added on nightly/2.10 so wrap
     try:
@@ -89,7 +92,7 @@ def support_torch_compile(
 @overload
 def support_torch_compile(
     *,
-    dynamic_arg_dims: dict[str, int | list[int]] | None,
+    dynamic_arg_dims: dict[str, int | list[int] | dict[int, str]] | None,
 ) -> Callable[[type[_T]], type[_T]]: ...
 
 
@@ -103,7 +106,7 @@ def support_torch_compile(
 @overload
 def support_torch_compile(
     *,
-    dynamic_arg_dims: dict[str, int | list[int]] | None,
+    dynamic_arg_dims: dict[str, int | list[int] | dict[int, str]] | None,
     mark_unbacked_dims: dict[str, int | list[int]] | None,
 ) -> Callable[[type[_T]], type[_T]]: ...
 
@@ -115,11 +118,10 @@ def support_torch_compile(cls: type[_T]) -> type[_T]: ...
 def support_torch_compile(
     cls: type[_T] | None = None,
     *,
-    dynamic_arg_dims: dict[str, int | list[int]] | None = None,
+    dynamic_arg_dims: dict[str, int | list[int] | dict[int, str]] | None = None,
     mark_unbacked_dims: dict[str, int | list[int]] | None = None,
     enable_if: Callable[[VllmConfig], bool] | None = None,
     is_encoder: bool = False,
-    shape_invariants: Callable[..., None] = lambda *args, **kwargs: None,
 ) -> Callable[[type[_T]], type[_T]] | type[_T]:
     """
     A decorator to add support for compiling the forward method of a class.
@@ -141,8 +143,12 @@ def forward(self, x: torch.Tensor, y: Optional[torch.Tensor]): ...
     ```
 
     `dynamic_arg_dims` is a dictionary that maps argument names to the dynamic
-    dimensions of the argument. The dynamic dimensions can be either a single
-    integer or a list of integers.
+    dimensions of the argument. The value can be:
+    - int: a single dimension index (e.g., 0)
+    - list[int]: multiple dimension indices (e.g., [0, 1])
+    - dict[int, str]: dimension to shape_id mapping for shape relations
+      (e.g., {0: "b"}). Dimensions with the same shape_id share the same
+      unbacked symbol.
 
     if `dynamic_arg_dims` is `None`, it is inferred from the type annotation
     of the `forward` method, based on the following default rules:
@@ -189,7 +195,7 @@ def forward(self, x: torch.Tensor, y: Optional[torch.Tensor]): ...
             torch._check(input_ids.size()[0] == inputs_embeds.size()[0])
     This enforces constraints on the symbolic shapes without hardcoding
     specific values. It is needed for some models to avoid data dependent
-    errors.
+    errors and maximize perf when unbacked shapes are used.
     """
 
     def cls_decorator_helper(cls: type[_T]) -> type[_T]:
@@ -205,6 +211,8 @@ def cls_decorator_helper(cls: type[_T]) -> type[_T]:
                 if v.annotation in [
                     torch.Tensor,
                     torch.Tensor | None,
+                    torch.FloatTensor,
+                    torch.FloatTensor | None,
                     IntermediateTensors,
                     IntermediateTensors | None,
                 ]:
@@ -227,13 +235,13 @@ def cls_decorator_helper(cls: type[_T]) -> type[_T]:
                 raise ValueError(
                     f"Argument {k} not found in the forward method of {cls}"
                 )
+
         return _support_torch_compile(
             cls,
             inferred_dynamic_arg_dims,
             mark_unbacked_dims,
             enable_if,
             is_encoder,
-            shape_invariants,
         )
 
     if cls is not None:
@@ -283,7 +291,7 @@ def _try_load_aot_compiled_fn(
     Re-raises on failure when ``VLLM_FORCE_AOT_LOAD`` is set.
     """
     try:
-        with monitor_torch_compile(model.vllm_config):
+        with monitor_torch_compile(model.vllm_config, is_encoder=model._is_encoder):
             with (
                 set_current_vllm_config(model.vllm_config),
                 open(aot_compilation_path, "rb") as f,
@@ -322,15 +330,13 @@ def _try_load_aot_compiled_fn(
 
 def _support_torch_compile(
     cls: type[_T],
-    dynamic_arg_dims: dict[str, int | list[int]],
+    dynamic_arg_dims: dict[str, int | list[int] | dict[int, str]],
     mark_unbacked_dims: dict[str, int | list[int]] | None = None,
     enable_if: Callable[[VllmConfig], bool] | None = None,
     is_encoder: bool = False,
-    shape_invariants: Callable[..., None] = lambda *args, **kwargs: None,
 ) -> type[_T]:
-    """
-    A decorator to add support for compiling the forward method of a class.
-    """
+    """Internal implementation of support_torch_compile decorator."""
+
     if TorchCompileWithNoGuardsWrapper in cls.__bases__:
         # support decorating multiple times
         return cls
@@ -346,7 +352,7 @@ def _support_torch_compile(
 
     def __init__(
         self: _T,
-        *,
+        *args,
         vllm_config: VllmConfig | None = None,
         prefix: str = "",
         **kwargs: Any,
@@ -357,11 +363,24 @@ def __init__(
         # NOTE: to support multimodal models (such as encoder),
         # we may not have vllm_config so we may need to patch it
         sig = inspect.signature(old_init)
+        # Check that any positional arguments match the old_init method signature
+        annotations = [p.annotation for p in sig.parameters.values()]
+        for arg, annotation in zip(args, annotations):
+            if annotation is inspect._empty:
+                continue
+            if not isinstance(arg, annotation):
+                init = f"'{type(self).__name__}.__init__'"
+                arg_type = f"'{type(arg).__name__}'"
+                raise TypeError(
+                    f"{init} received a positional argument of type {arg_type}, "
+                    "but no parameter of that type was found in the method signature. "
+                    f"Please either annotate {init} or pass it as a keyword argument."
+                )
         if "vllm_config" in sig.parameters:
             kwargs["vllm_config"] = vllm_config
         if "prefix" in sig.parameters:
             kwargs["prefix"] = prefix
-        old_init(self, **kwargs)
+        old_init(self, *args, **kwargs)
 
         self.vllm_config = vllm_config
         self.compilation_config = self.vllm_config.compilation_config
@@ -377,7 +396,8 @@ def __init__(
         if self.do_not_compile:
             return
 
-        self._check_shape_invariants = shape_invariants
+        self._dynamic_arg_dims = dynamic_arg_dims
+
         self.was_aot_compile_fn_loaded_from_disk = False
         compilation_counter.num_models_seen += 1
         self.compiled = False
@@ -394,48 +414,83 @@ def __init__(
     def _mark_dynamic_inputs(
         mod: type[_T], ds_type: DynamicShapesType, *args: Any, **kwargs: Any
     ) -> None:
-        def mark_dynamic(arg: torch.Tensor, dims: list[int]) -> None:
+        def mark_dynamic(
+            arg: torch.Tensor, dim_shape_pairs: list[tuple[int, str | None]]
+        ) -> None:
             if ds_type == DynamicShapesType.UNBACKED:
                 if is_torch_equal_or_newer("2.10.0"):
-                    for dim in dims:
-                        torch._dynamo.decorators.mark_unbacked(
-                            arg, dim, hint_override=arg.size()[dim]
-                        )
+                    for dim, shape_id in dim_shape_pairs:
+                        if shape_id is not None:
+                            if not _SUPPORTS_SHAPE_ID:
+                                raise RuntimeError(
+                                    f"shape_id='{shape_id}' requires PyTorch >= 2.11.0"
+                                )
+                            torch._dynamo.decorators.mark_unbacked(
+                                arg,
+                                dim,
+                                hint_override=arg.size()[dim],
+                                shape_id=shape_id,
+                            )
+                        else:
+                            torch._dynamo.decorators.mark_unbacked(
+                                arg,
+                                dim,
+                                hint_override=arg.size()[dim],
+                            )
                 else:
+                    # For older versions, we can't use hint_override or shape_id
+                    dims = [dim for dim, _ in dim_shape_pairs]
                     torch._dynamo.decorators.mark_unbacked(arg, dims)
             else:
+                dims = [dim for dim, _ in dim_shape_pairs]
                 torch._dynamo.mark_dynamic(arg, dims)
 
         sig = inspect.signature(mod.__class__.forward)  # type: ignore[attr-defined]
         bound_args = sig.bind(mod, *args, **kwargs)
         bound_args.apply_defaults()
-        for k, dims in dynamic_arg_dims.items():
+
+        # Normalize dynamic_arg_dims to dict[str, dict[int, str | None]]
+        normalized_dims: dict[str, dict[int, str | None]] = {}
+        for k, v in dynamic_arg_dims.items():
+            if isinstance(v, dict):
+                normalized_dims[k] = {dim: shape_id for dim, shape_id in v.items()}
+            elif isinstance(v, int):
+                normalized_dims[k] = {v: None}
+            else:
+                normalized_dims[k] = {d: None for d in v}
+
+        for k, dim_to_shape_id in normalized_dims.items():
             arg = bound_args.arguments.get(k)
 
             if arg is not None:
-                dims = [dims] if isinstance(dims, int) else dims
+                dims = list(dim_to_shape_id.keys())
+
                 if isinstance(arg, torch.Tensor):
-                    # In case dims is specified with negative indexing
-                    dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
-                    mark_dynamic(arg, dims)
+                    dim_shape_pairs = [
+                        (arg.ndim + d if d < 0 else d, dim_to_shape_id.get(d))
+                        for d in dims
+                    ]
+                    mark_dynamic(arg, dim_shape_pairs)
                 elif isinstance(arg, IntermediateTensors):
                     for tensor in arg.tensors.values():
-                        # In case dims is specified with negative indexing
-                        dims = [tensor.ndim + dim if dim < 0 else dim for dim in dims]
-                        mark_dynamic(tensor, dims)
+                        dim_shape_pairs = [
+                            (tensor.ndim + d if d < 0 else d, dim_to_shape_id.get(d))
+                            for d in dims
+                        ]
+                        mark_dynamic(tensor, dim_shape_pairs)
                 else:
                     raise ValueError(
-                        "Unsupported dynamic dimensions"
-                        f" {dims} for argument {k} with type {type(arg)}."
+                        f"Unsupported dynamic dimensions {dims} "
+                        f"for argument {k} with type {type(arg)}."
                     )
+
         if mark_unbacked_dims:
-            for k, dims in mark_unbacked_dims.items():
+            for k, dims_val in mark_unbacked_dims.items():
                 arg = bound_args.arguments.get(k)
                 if arg is not None:
-                    dims = [dims] if isinstance(dims, int) else dims
+                    dims = [dims_val] if isinstance(dims_val, int) else list(dims_val)
                     if isinstance(arg, torch.Tensor):
-                        # In case dims is specified with negative indexing
-                        dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
+                        dims = [arg.ndim + d if d < 0 else d for d in dims]
                         if is_torch_equal_or_newer("2.10.0"):
                             for dim in dims:
                                 torch._dynamo.decorators.mark_unbacked(
@@ -492,6 +547,16 @@ def __call__(self: type[_T], *args: Any, **kwargs: Any) -> Any:
                 hash_key,
             )
 
+            # Hash-level dir; shared across ranks on the same node.
+            self.compilation_config.local_cache_dir = cache_dir
+            inductor_cache = os.path.join(cache_dir, "inductor_cache")
+            os.makedirs(inductor_cache, exist_ok=True)
+            # Process-wide: post-load execution, CUDA-graph capture, and later
+            # autotune/recompile all need to write under {hash}/inductor_cache/.
+            # Unconditional because torch's cache_dir() may have pre-filled the
+            # /tmp default during import, making setdefault a no-op.
+            os.environ["TORCHINDUCTOR_CACHE_DIR"] = inductor_cache
+
             rank = self.vllm_config.parallel_config.rank
             dp_rank = self.vllm_config.parallel_config.data_parallel_index
             cache_dir = os.path.join(cache_dir, f"rank_{rank}_{dp_rank}")
@@ -592,7 +657,9 @@ def patched_inline_call(self_: Any) -> Any:
                 # store the path for saving after warmup
                 self._aot_compilation_path = aot_compilation_path
                 self._aot_cache_dir = cache_dir
-                with monitor_torch_compile(self.vllm_config):
+                with monitor_torch_compile(
+                    self.vllm_config, is_encoder=self._is_encoder
+                ):
                     self.aot_compiled_fn = self.aot_compile(*args, **kwargs)
                     compilation_counter.num_aot_compiles += 1
                     # All compilation is done at this point, save the
@@ -606,6 +673,7 @@ def patched_inline_call(self_: Any) -> Any:
                     self.vllm_config,
                     "torch.compile and initial profiling/warmup "
                     "run together took %.2f s in total",
+                    is_encoder=self._is_encoder,
                 ):
                     output = TorchCompileWithNoGuardsWrapper.__call__(
                         self,  # type: ignore[arg-type]
@@ -640,7 +708,6 @@ def save_aot_compiled_function(self: type[_T]) -> None:
             logger.info_once(
                 "saved AOT compiled function to %s",
                 self._aot_compilation_path,
-                scope="local",
             )
         except Exception as e:
             logger.warning(
diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py
index f584f526f08f..c23a8f67228c 100644
--- a/vllm/compilation/monitor.py
+++ b/vllm/compilation/monitor.py
@@ -18,6 +18,7 @@
 def monitor_torch_compile(
     vllm_config: VllmConfig,
     message: str = "torch.compile took %.2f s in total",
+    is_encoder: bool = False,
 ) -> Generator[None, None, None]:
     """Context manager that times torch.compile and manages depyf debugging.
 
@@ -45,7 +46,11 @@ def monitor_torch_compile(
     else:
         total_compile_time = time.perf_counter() - torch_compile_start_time
         if compilation_config.mode == CompilationMode.VLLM_COMPILE:
-            logger.info_once(message, total_compile_time, scope="local")
+            if is_encoder:
+                compilation_config.encoder_compilation_time += total_compile_time
+            else:
+                compilation_config.compilation_time += total_compile_time
+            logger.info_once(message, total_compile_time)
     finally:
         if depyf_cm is not None:
             try:
@@ -76,7 +81,6 @@ def monitor_profiling_run() -> Generator[None, None, None]:
     logger.info_once(
         "Initial profiling/warmup run took %.2f s",
         elapsed,
-        scope="local",
     )
 
 
diff --git a/vllm/compilation/passes/fusion/act_quant_fusion.py b/vllm/compilation/passes/fusion/act_quant_fusion.py
index 911775f69967..e35fc5cd4084 100644
--- a/vllm/compilation/passes/fusion/act_quant_fusion.py
+++ b/vllm/compilation/passes/fusion/act_quant_fusion.py
@@ -1,29 +1,25 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from abc import ABC, abstractmethod
+import itertools
 from typing import Any
 
 import torch
 from torch._higher_order_ops.auto_functionalize import auto_functionalized
-from torch._inductor.pattern_matcher import (
-    PatternMatcherPass,
-    fwd_only,
-    register_replacement,
-)
 from torch._ops import OpOverload
 
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
+    kFp8Dynamic64Sym,
+    kFp8Dynamic128Sym,
     kFp8StaticTensorSym,
     kNvfp4Dynamic,
 )
 from vllm.platforms import current_platform
 
-from ..inductor_pass import enable_fake_mode
-from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
+from ..vllm_inductor_pass import VllmFusionPatternMatcherPass, VllmPatternReplacement
 from .matcher_utils import MatcherQuantFP8, MatcherSiluAndMul
 from .rms_quant_fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32
 
@@ -43,10 +39,14 @@
 if silu_and_mul_nvfp4_quant_supported:
     FUSED_OPS[kNvfp4Dynamic] = torch.ops._C.silu_and_mul_nvfp4_quant.default  # noqa: E501
 
+if current_platform.is_cuda_alike():
+    FUSED_OPS[kFp8Dynamic128Sym] = torch.ops._C.silu_and_mul_per_block_quant.default
+    FUSED_OPS[kFp8Dynamic64Sym] = torch.ops._C.silu_and_mul_per_block_quant.default
+
 
-class ActivationQuantPattern(ABC):
+class ActivationQuantPattern(VllmPatternReplacement):
     """
-    The base class for Activation+Quant fusions.
+    Base class for Activation+Quant fusions.
     Should not be used directly.
     """
 
@@ -73,10 +73,6 @@ def empty_quant(self, *args: Any, **kwargs: Any) -> torch.Tensor:
         kwargs = {"dtype": self.quant_dtype, "device": "cuda", **kwargs}
         return torch.empty(*args, **kwargs)
 
-    @abstractmethod
-    def register(self, pm_pass: PatternMatcherPass) -> None:
-        raise NotImplementedError
-
 
 class SiluMulFp8StaticQuantPattern(ActivationQuantPattern):
     """
@@ -94,8 +90,9 @@ def get_inputs(self) -> list[torch.Tensor]:
             scale,
         ]
 
-    def register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
+    @property
+    def pattern(self):
+        def _pattern(
             input: torch.Tensor,
             scale: torch.Tensor,
         ) -> torch.Tensor:
@@ -103,7 +100,11 @@ def pattern(
             result_quant = self.quant_matcher(result_silu_mul, scale)
             return result_quant[0]
 
-        def replacement(
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
             input: torch.Tensor,
             scale: torch.Tensor,
         ) -> torch.Tensor:
@@ -117,10 +118,7 @@ def replacement(
             )
             return at[1]
 
-        inps = self.get_inputs()
-        pattern(*inps)
-
-        register_replacement(pattern, replacement, inps, fwd_only, pm_pass)
+        return _replacement
 
 
 class SiluMulNvfp4QuantPattern(ActivationQuantPattern):
@@ -138,8 +136,9 @@ def get_inputs(self) -> list[torch.Tensor]:
         scale = empty_fp32(1, 1)
         return [result, output_scale, input_, scale]
 
-    def register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
+    @property
+    def pattern(self):
+        def _pattern(
             result: torch.Tensor,
             output_scale: torch.Tensor,
             input: torch.Tensor,
@@ -156,7 +155,11 @@ def pattern(
             )
             return at[1], at[2]
 
-        def replacement(
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
             result: torch.Tensor,
             output_scale: torch.Tensor,
             input: torch.Tensor,
@@ -171,10 +174,109 @@ def replacement(
             )
             return at[1], at[2]
 
-        register_replacement(pattern, replacement, self.get_inputs(), fwd_only, pm_pass)
+        return _replacement
+
+
+class SiluMulBlockQuantPattern(ActivationQuantPattern):
+    """
+    Fusion for SiluMul+BlockQuant (FP8 dynamic per-group) Pattern.
+    Supports group_size 128 and 64 via QuantKey.
+    Parameterized on is_scale_transposed for different scale layouts.
+    """
+
+    def __init__(
+        self,
+        quant_key: QuantKey,
+        is_scale_transposed: bool = False,
+        is_e8m0: bool = False,
+        is_tma_aligned: bool = False,
+        match_aiter: bool = False,
+    ) -> None:
+        super().__init__(quant_key)
+        self.quant_matcher = MatcherQuantFP8(
+            quant_key,
+            has_col_major_scales=is_scale_transposed,
+            is_e8m0=is_e8m0,
+            is_tma_aligned=is_tma_aligned,
+        )
+        self.group_size = quant_key.scale.group_shape[1]
+        self.is_scale_transposed = is_scale_transposed
+        self.is_e8m0 = is_e8m0
+        self.is_tma_aligned = is_tma_aligned
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        scale = self.quant_matcher.empty_f32(1, 1)
+        return self.silu_and_mul_matcher.inputs() + [scale]
+
+    @property
+    def pattern(self):
+        def _pattern(
+            input: torch.Tensor,
+            scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            silu_out = self.silu_and_mul_matcher(input)
+            result = torch.empty(
+                silu_out.shape,
+                device=silu_out.device,
+                dtype=self.quant_dtype,
+            )
+            assert scale is not None
+            finfo = torch.finfo(self.quant_dtype)
+            _, result, scale = auto_functionalized(
+                self.quant_matcher.QUANT_OP,
+                input=silu_out,
+                output_q=result,
+                output_s=scale,
+                group_size=self.group_size,
+                eps=1e-10,
+                fp8_min=finfo.min,
+                fp8_max=finfo.max,
+                scale_ue8m0=self.is_e8m0,
+                dummy_is_scale_transposed=self.is_scale_transposed,
+                dummy_is_tma_aligned=self.is_tma_aligned,
+            )
+            return result, scale
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
+            input: torch.Tensor,
+            scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            d = input.shape[-1] // 2
+            output_shape = input.shape[:-1] + (d,)
+            result = torch.empty(
+                output_shape, device=input.device, dtype=self.quant_dtype
+            )
+            if self.is_scale_transposed:
+                scale = torch.empty(
+                    (d // self.group_size, input.shape[0]),
+                    device=input.device,
+                    dtype=torch.float32,
+                ).permute(-1, -2)
+            else:
+                scale = torch.empty(
+                    (input.shape[0], d // self.group_size),
+                    device=input.device,
+                    dtype=torch.float32,
+                )
+            at = auto_functionalized(
+                self.FUSED_OP,
+                out=result,
+                input=input,
+                scales=scale,
+                group_size=self.group_size,
+                scale_ub=None,
+                is_scale_transposed=self.is_scale_transposed,
+            )
+            return at[1], at[2]
+
+        return _replacement
 
 
-class ActivationQuantFusionPass(VllmPatternMatcherPass):
+class ActivationQuantFusionPass(VllmFusionPatternMatcherPass):
     """
     This pass fuses a pre-defined set of custom ops into fused ops.
     It uses the torch pattern matcher to find the patterns and replace them.
@@ -184,32 +286,33 @@ class ActivationQuantFusionPass(VllmPatternMatcherPass):
     https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
     """
 
-    @enable_fake_mode
     def __init__(self, config: VllmConfig) -> None:
-        super().__init__(config)
+        super().__init__(config, "activation_quant_fusion_pass")
 
-        self.patterns: PatternMatcherPass = PatternMatcherPass(
-            pass_name="activation_quant_fusion_pass"
-        )
-
-        pattern_silu_mul_fp8 = SiluMulFp8StaticQuantPattern()
-        pattern_silu_mul_fp8.register(self.patterns)
+        self.register(SiluMulFp8StaticQuantPattern())
 
         if silu_and_mul_nvfp4_quant_supported:
-            pattern_silu_mul_nvfp4 = SiluMulNvfp4QuantPattern()
-            pattern_silu_mul_nvfp4.register(self.patterns)
-
-        self.dump_patterns(config, self.patterns)
-
-    @VllmInductorPass.time_and_log
-    def __call__(self, graph: torch.fx.Graph) -> None:
-        self.matched_count = self.patterns.apply(graph)
-        logger.debug("Replaced %s patterns", self.matched_count)
-
-    def uuid(self) -> str:
-        return VllmInductorPass.hash_source(
-            self,
-            ActivationQuantPattern,
-            SiluMulFp8StaticQuantPattern,
-            SiluMulNvfp4QuantPattern,
-        )
+            self.register(SiluMulNvfp4QuantPattern())
+
+        if current_platform.is_cuda():
+            for (
+                quant_key,
+                is_scale_transposed,
+                is_e8m0,
+                is_tma_aligned,
+            ) in itertools.product(
+                [kFp8Dynamic128Sym, kFp8Dynamic64Sym],
+                [False, True],
+                [True, False],
+                [False, True],
+            ):
+                self.register(
+                    SiluMulBlockQuantPattern(
+                        quant_key,
+                        is_scale_transposed=is_scale_transposed,
+                        is_e8m0=is_e8m0,
+                        is_tma_aligned=is_tma_aligned,
+                    )
+                )
+
+        self.dump_patterns(config, self.pm_pass)
diff --git a/vllm/compilation/passes/fusion/allreduce_rms_fusion.py b/vllm/compilation/passes/fusion/allreduce_rms_fusion.py
index d55b305992e9..5406f611e87d 100644
--- a/vllm/compilation/passes/fusion/allreduce_rms_fusion.py
+++ b/vllm/compilation/passes/fusion/allreduce_rms_fusion.py
@@ -3,6 +3,7 @@
 import contextlib
 from importlib.util import find_spec
 from types import ModuleType
+from typing import Any
 
 import torch
 import torch._inductor.pattern_matcher as pm
@@ -10,9 +11,15 @@
 from torch._higher_order_ops.auto_functionalize import auto_functionalized
 from torch._inductor.pattern_matcher import PatternMatcherPass
 
+import vllm.ir.ops
+from vllm._aiter_ops import rocm_aiter_ops
+from vllm.compilation.passes.fusion.rms_quant_fusion import (
+    _rms_input_weight_dtype_match,
+)
 from vllm.config import VllmConfig
 from vllm.config.utils import Range
 from vllm.distributed import get_tp_group, tensor_model_parallel_all_reduce
+from vllm.distributed.device_communicators.custom_all_reduce import CustomAllreduce
 from vllm.distributed.parallel_state import (
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
@@ -27,11 +34,20 @@
 )
 
 from ..inductor_pass import enable_fake_mode
-from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
-from .matcher_utils import MatcherFusedAddRMSNorm, MatcherQuantFP8, MatcherRMSNorm
+from ..vllm_inductor_pass import (
+    VllmFusionPatternMatcherPass,
+    VllmInductorPass,
+    VllmPatternMatcherPass,
+    VllmPatternReplacement,
+)
+from .matcher_utils import MatcherQuantFP8
 
 FP8_DTYPE = current_platform.fp8_dtype()
 
+
+# The empirical value for small batch
+PDL_ADVANCE_LAUNCH_TOKENS = 16
+
 logger = init_logger(__name__)
 
 flashinfer_comm: ModuleType | None = None
@@ -192,6 +208,7 @@ def call_trtllm_fused_allreduce_norm(
             layout_code=layout_code,
             use_oneshot=use_oneshot,
             fp32_acc=fp32_acc,
+            trigger_completion_at_end=num_tokens > PDL_ADVANCE_LAUNCH_TOKENS,
         )
 
     def call_trtllm_fused_allreduce_norm_fake(
@@ -258,6 +275,12 @@ def __init__(self, dtype: torch.dtype, device: str | None) -> None:
         self.tp = get_tp_group()
         self.tp_size = get_tensor_model_parallel_world_size()
 
+    def empty(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=self.dtype, device=self.device, **kwargs)
+
+    def empty_f32(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.float32, device=self.device, **kwargs)
+
 
 class AllReduceRMSNormPattern(BasePattern):
     """
@@ -276,20 +299,17 @@ def __init__(
         super().__init__(dtype, device)
         self.epsilon = epsilon
         self.allreduce_params = allreduce_params
-        self.rmsnorm_matcher = MatcherRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input, weight = self.rmsnorm_matcher.inputs()
-
-        # input goes through allreduce first, always 16-bit
-        return [input.to(self.dtype), weight]
+        # input, weight
+        return [self.empty(5, 16), self.empty(16)]
 
     def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor, weight: torch.Tensor
         ) -> tuple[torch.Tensor, torch.Tensor]:
             allreduce_output = tensor_model_parallel_all_reduce(input)
-            rms = self.rmsnorm_matcher(allreduce_output, weight)
+            rms = vllm.ir.ops.rms_norm(allreduce_output, weight, self.epsilon)
 
             return rms, allreduce_output
 
@@ -315,7 +335,12 @@ def replacement(
             return allreduce[3], allreduce[1]
 
         pm.register_replacement(
-            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+            pattern,
+            replacement,
+            self.get_inputs(),
+            pm.fwd_only,
+            pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -336,10 +361,11 @@ def __init__(
         super().__init__(dtype, device)
         self.epsilon = epsilon
         self.allreduce_params = allreduce_params
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input, residual, weight = self.rmsnorm_matcher.inputs()
+        input = self.empty(5, 16)
+        residual = self.empty(5, 16)
+        weight = self.empty(16)
 
         # input goes through allreduce first, always 16-bit
         return [residual, input.to(self.dtype), weight]
@@ -349,7 +375,9 @@ def pattern(
             residual: torch.Tensor, input: torch.Tensor, weight: torch.Tensor
         ) -> tuple[torch.Tensor, torch.Tensor]:
             allreduce_output = tensor_model_parallel_all_reduce(input)
-            rms, residual = self.rmsnorm_matcher(allreduce_output, weight, residual)
+            rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                allreduce_output, residual, weight, self.epsilon
+            )
             return rms, residual
 
         def replacement(
@@ -407,15 +435,13 @@ def __init__(
         self.epsilon = epsilon
         self.allreduce_params = allreduce_params
         self.quant_dtype = torch.float8_e4m3fn
-        self.rmsnorm_matcher = MatcherRMSNorm(epsilon)
         self.quant_matcher = MatcherQuantFP8(kFp8StaticTensorSym)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input, weight = self.rmsnorm_matcher.inputs()
         _, scale = self.quant_matcher.inputs()
 
-        # input goes through allreduce first, always 16-bit
-        return [input.to(self.dtype), weight, scale]
+        # input, weight
+        return [self.empty(5, 16), self.empty(16), scale]
 
     def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
@@ -424,7 +450,7 @@ def pattern(
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             all_reduce = tensor_model_parallel_all_reduce(input)
-            rms = self.rmsnorm_matcher(all_reduce, weight)
+            rms = vllm.ir.ops.rms_norm(all_reduce, weight, self.epsilon)
             quant, _ = self.quant_matcher(rms, scale)
             return quant, all_reduce
 
@@ -456,7 +482,12 @@ def replacement(
             return allreduce[4], allreduce[1]
 
         pm.register_replacement(
-            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+            pattern,
+            replacement,
+            self.get_inputs(),
+            pm.fwd_only,
+            pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -480,11 +511,12 @@ def __init__(
         self.allreduce_params = allreduce_params
         self.quant_dtype = torch.float8_e4m3fn
 
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon)
         self.quant_matcher = MatcherQuantFP8(kFp8StaticTensorSym)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input, residual, weight = self.rmsnorm_matcher.inputs()
+        input = self.empty(5, 16)
+        residual = self.empty(5, 16)
+        weight = self.empty(16)
         _, scale = self.quant_matcher.inputs()
 
         # input goes through allreduce first, always 16-bit
@@ -498,7 +530,9 @@ def pattern(
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             allreduce_output = tensor_model_parallel_all_reduce(input)
-            rms, res = self.rmsnorm_matcher(allreduce_output, weight, residual)
+            rms, res = vllm.ir.ops.fused_add_rms_norm(
+                allreduce_output, residual, weight, self.epsilon
+            )
             quant, _ = self.quant_matcher(rms, scale)
 
             return quant, res
@@ -553,7 +587,6 @@ def __init__(
         super().__init__(dtype, device)
         self.epsilon = epsilon
         self.allreduce_params = allreduce_params
-        self.rmsnorm_matcher = MatcherRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
         input = torch.empty([1, 16, 16], device=self.device, dtype=self.dtype)
@@ -575,7 +608,7 @@ def pattern(
             output_scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
             all_reduce = tensor_model_parallel_all_reduce(input)
-            rms = self.rmsnorm_matcher(all_reduce, weight)
+            rms = vllm.ir.ops.rms_norm(all_reduce, weight, self.epsilon)
             quant_out_tuple = auto_functionalized(
                 STATIC_FP4_QUANT_OP,
                 input=rms,
@@ -619,7 +652,12 @@ def replacement(
             return allreduce[4], allreduce[1], allreduce[5]
 
         pm.register_replacement(
-            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+            pattern,
+            replacement,
+            self.get_inputs(),
+            pm.fwd_only,
+            pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -641,7 +679,6 @@ def __init__(
         super().__init__(dtype, device)
         self.epsilon = epsilon
         self.allreduce_params = allreduce_params
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
         input = torch.empty([16, 16], device=self.device, dtype=self.dtype)
@@ -673,7 +710,9 @@ def pattern(
             input_global_scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
             allreduce_output = tensor_model_parallel_all_reduce(input)
-            rms, residual = self.rmsnorm_matcher(allreduce_output, weight, residual)
+            rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                allreduce_output, residual, weight, self.epsilon
+            )
             quant_out_tuple = auto_functionalized(
                 STATIC_FP4_QUANT_OP,
                 input=rms,
@@ -869,3 +908,201 @@ def __del__(self) -> None:
             return
         with contextlib.suppress(Exception):
             destroy_fi_ar_workspace()
+
+
+# TODO: make BasePattern to inherit from VllmPatternReplacement
+class AiterAllreduceFusedRMSNormPattern(BasePattern, VllmPatternReplacement):
+    def __init__(
+        self,
+        epsilon: float,
+        dtype: torch.dtype,
+        device: str | None,
+        use_aiter_rmsnorm: bool = True,
+    ) -> None:
+        super().__init__(dtype, device)
+        self.dtype = dtype
+        self.epsilon = epsilon
+        self.FUSED_AR_RMSNORM_OP = rocm_aiter_ops.get_fused_allreduce_rmsnorm_op()
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        return [self.empty(5, 16), self.empty(16)]
+
+    @property
+    def pattern(self):
+        def _pattern(
+            input: torch.Tensor, weight: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            allreduce_output = tensor_model_parallel_all_reduce(input)
+            rms = vllm.ir.ops.rms_norm(allreduce_output, weight, self.epsilon)
+
+            return rms, allreduce_output
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
+            input: torch.Tensor, weight: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            residual = torch.zeros_like(input)
+            allreduce = self.FUSED_AR_RMSNORM_OP(
+                input_=input,
+                residual=residual,
+                weight=weight,
+                epsilon=self.epsilon,
+            )
+            return allreduce[0], allreduce[1]
+
+        return _replacement
+
+
+class AiterAllreduceFusedAddRMSNormPattern(BasePattern, VllmPatternReplacement):
+    def __init__(
+        self,
+        epsilon: float,
+        dtype: torch.dtype,
+        device: str | None,
+        use_aiter_rmsnorm: bool = True,
+    ) -> None:
+        super().__init__(dtype, device)
+        self.epsilon = epsilon
+        self.dtype = dtype
+        self.FUSED_AR_RMSNORM_OP = rocm_aiter_ops.get_fused_allreduce_rmsnorm_op()
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        # input, residual, weight
+        return [self.empty(5, 16), self.empty(5, 16), self.empty(16)]
+
+    @property
+    def pattern(self):
+        def _pattern(
+            residual: torch.Tensor, input: torch.Tensor, weight: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            allreduce_output = tensor_model_parallel_all_reduce(input)
+            rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                allreduce_output, residual, weight, self.epsilon
+            )
+            return rms, residual
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
+            residual: torch.Tensor, input: torch.Tensor, weight: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            allreduce = self.FUSED_AR_RMSNORM_OP(
+                input_=input,
+                residual=residual,
+                weight=weight,
+                epsilon=self.epsilon,
+            )
+            return allreduce[0], allreduce[1]
+
+        return _replacement
+
+
+class RocmAiterAllReduceFusionPass(VllmFusionPatternMatcherPass):
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "rocm_aiter_allreduce_fusion_pass")
+        self.disabled = True
+        self.tp_size = get_tensor_model_parallel_world_size()
+        if self.tp_size <= 1:
+            logger.warning_once("AllReduce fusion pass is disabled for tp_size <= 1.")
+            return
+
+        if config.model_config is None:
+            logger.warning_once(
+                "AllReduce fusion pass is disabled for missing model_config."
+            )
+            return
+
+        device_comm = get_tp_group().device_communicator
+        if device_comm is None:
+            logger.warning_once("Device communicator is required.")
+            return
+
+        ca_comm = getattr(device_comm, "ca_comm", None)
+        if ca_comm is None:
+            logger.warning_once("Custom Allreduce is required.")
+            return
+        self.ca_comm = ca_comm
+
+        assert isinstance(ca_comm, CustomAllreduce)
+
+        group = get_tp_group().cpu_group
+        rocm_aiter_ops.initialize_aiter_allreduce(group, self.device)
+        hidden_dim = config.model_config.get_hidden_size()
+        element_size = torch.tensor([], dtype=self.model_dtype).element_size()
+        max_size = rocm_aiter_ops.get_aiter_allreduce_max_size()
+        if max_size is None:
+            logger.warning("AITER allreduce fusion must be initialized")
+            return
+
+        # Aiter's fused_allreduce_rmsnorm kernel dispatches on hidden_dim.
+        # Before aiter v0.1.12 the launcher was template-specialized on HIDDEN_DIM
+        # and silently no-op'd for sizes outside {512, 1024, 2048, 4096}. From v0.1.12
+        # hidden_dim is a runtime argument. Detect the older API via the missing
+        # `_pool` attribute and skip fusion for unsupported sizes.
+        # Ref (old kernel): https://github.com/ROCm/aiter/blob/6a0e7b26ccf33164785531212cc2ec2cde0b9243/csrc/include/custom_all_reduce.cuh#L2590
+        aiter_ar = rocm_aiter_ops.get_aiter_allreduce()
+        _AITER_OLD_FUSED_AR_RMS_HIDDEN = (512, 1024, 2048, 4096)
+        if (
+            aiter_ar is not None
+            and not hasattr(aiter_ar, "_pool")
+            and hidden_dim not in _AITER_OLD_FUSED_AR_RMS_HIDDEN
+        ):
+            logger.warning_once(
+                "AITER allreduce-rmsnorm fusion disabled: aiter<0.1.12 "
+                "only supports hidden_dim in %s; got %d. Upgrade aiter to "
+                ">=0.1.12 to enable fusion for this model.",
+                _AITER_OLD_FUSED_AR_RMS_HIDDEN,
+                hidden_dim,
+            )
+            # Tear down aiter's custom-allreduce so its IPC handles don't
+            # race with vllm's ca_comm on the unfused fallback path.
+            with contextlib.suppress(Exception):
+                rocm_aiter_ops.destroy_aiter_allreduce()
+            return
+
+        max_token_num = max_size // (hidden_dim * element_size)
+        self.max_token_num = min(
+            max_token_num,
+            config.scheduler_config.max_num_batched_tokens,
+        )
+
+        for epsilon in [1e-5, 1e-6]:
+            self.register(
+                AiterAllreduceFusedRMSNormPattern(
+                    epsilon,
+                    self.model_dtype,
+                    self.device,
+                )
+            )
+            self.register(
+                AiterAllreduceFusedAddRMSNormPattern(
+                    epsilon,
+                    self.model_dtype,
+                    self.device,
+                )
+            )
+
+            # WARNING: This is a hack to clear the pattern matcher cache
+            # and allow multiple values of epsilon.
+            torch._inductor.pattern_matcher._seen_patterns.clear()
+
+        self.disabled = False
+
+        self.dump_patterns(config, self.pm_pass)
+
+    def is_applicable_for_range(self, compile_range: Range) -> bool:
+        if self.disabled:
+            logger.warning_once("AllReduce fusion pass is disabled.")
+            return False
+        return bool(compile_range.end <= self.max_token_num)
+
+    def __del__(self) -> None:
+        if getattr(self, "disabled", True):
+            return
+        with contextlib.suppress(Exception):
+            rocm_aiter_ops.destroy_aiter_allreduce()
diff --git a/vllm/compilation/passes/fusion/attn_quant_fusion.py b/vllm/compilation/passes/fusion/attn_quant_fusion.py
index 0e1b846af856..3e2ed2bc707e 100644
--- a/vllm/compilation/passes/fusion/attn_quant_fusion.py
+++ b/vllm/compilation/passes/fusion/attn_quant_fusion.py
@@ -1,15 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from abc import ABC, abstractmethod
+
 from collections.abc import Callable
-from typing import Any, ParamSpec
 
 import torch
-import torch._inductor.pattern_matcher as pm
-from torch import fx
 from torch._higher_order_ops.auto_functionalize import auto_functionalized
-from torch._inductor.pattern_matcher import PatternMatcherPass
 
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
@@ -21,15 +17,14 @@
 )
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import round_up
+from vllm.utils.torch_utils import _USE_LAYERNAME, _encode_layer_name
 
-from ..fx_utils import is_func
-from ..inductor_pass import enable_fake_mode
-from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
+from ..vllm_inductor_pass import VllmFusionPatternMatcherPass, VllmPatternReplacement
 from .matcher_utils import MatcherQuantFP8
-from .rms_quant_fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32
+from .rms_quant_fusion import QUANT_OPS
 
 logger = init_logger(__name__)
-P = ParamSpec("P")
+
 FP8_DTYPE = current_platform.fp8_dtype()
 FP4_DTYPE = torch.uint8
 
@@ -37,83 +32,10 @@
 RESHAPE_OP = torch.ops.aten.reshape.default
 
 
-class AttentionQuantPattern(ABC):
-    """
-    The base class for Attn+Quant fusions.
-    Should not be used directly.
-    """
+_FP8_QUANT_KEY = QuantKey(dtype=FP8_DTYPE, scale=kStaticTensorScale, symmetric=True)
 
-    def __init__(
-        self,
-        layer: Attention,
-        quant_key: QuantKey,
-        dtype: torch.dtype,
-    ) -> None:
-        self.layer = layer
-        self.layer_name = layer.layer_name
-        self.num_heads = layer.num_heads
-        self.head_size = layer.head_size
-        self.quant_key = quant_key
-        self.quant_dtype = quant_key.dtype
-        self.dtype = dtype
-
-        assert self.quant_key in QUANT_OPS, (
-            f"unsupported quantization scheme {self.quant_key}"
-        )
-        self.QUANT_OP = QUANT_OPS[self.quant_key]
-
-    def empty(self, *args: Any, **kwargs: Any) -> torch.Tensor:
-        kwargs = {"dtype": self.dtype, "device": "cuda", **kwargs}
-        return torch.empty(*args, **kwargs)
-
-    def empty_quant(self, *args: Any, **kwargs: Any) -> torch.Tensor:
-        kwargs = {"dtype": self.quant_dtype, "device": "cuda", **kwargs}
-        return torch.empty(*args, **kwargs)
-
-    @staticmethod
-    def wrap_trace_fn(
-        trace_fn: Callable[P, fx.GraphModule],
-        *process_fx_fns: Callable[[fx.GraphModule], None],
-    ) -> Callable[P, fx.GraphModule]:
-        def wrapped(*args: P.args, **kwargs: P.kwargs) -> fx.GraphModule:
-            gm = trace_fn(*args, **kwargs)
-            for process_fx in process_fx_fns:
-                process_fx(gm)
-
-            return gm
-
-        return wrapped
-
-    @staticmethod
-    def fx_view_to_reshape(gm: torch.fx.GraphModule) -> None:
-        from torch._inductor.fx_passes.post_grad import view_to_reshape
-
-        view_to_reshape(gm)
-
-    @staticmethod
-    def remove_noop_permutes(gm: torch.fx.GraphModule) -> None:
-        for node in gm.graph.nodes:
-            if not is_func(node, torch.ops.aten.permute.default):
-                continue
-
-            dims = node.args[1]
-            if any(dim != i for i, dim in enumerate(dims)):
-                continue
-
-            # this is now an identity op, remove
-            node.replace_all_uses_with(node.args[0])
-            gm.graph.erase_node(node)
-
-    def register_if_supported(self, pm_pass: PatternMatcherPass) -> None:
-        if self.layer.impl.fused_output_quant_supported(self.quant_key):
-            self._register(pm_pass)
-
-    @abstractmethod
-    def _register(self, pm_pass: PatternMatcherPass) -> None:
-        raise NotImplementedError
-
-
-class AttentionFp8StaticQuantPattern(AttentionQuantPattern):
+
+class AttnFp8StaticQuantPattern(VllmPatternReplacement[..., torch.Tensor]):
     """
     Fusion for Attention+Fp8StaticQuant.
 
@@ -123,56 +45,96 @@ class AttentionFp8StaticQuantPattern(AttentionQuantPattern):
     will be passed into Attention op as the `output_scale` argument.
     """
 
-    def __init__(
-        self,
-        layer: Attention,
-        dtype: torch.dtype,
-        symmetric: bool = True,
-    ) -> None:
-        quant_key = QuantKey(
-            dtype=FP8_DTYPE, scale=kStaticTensorScale, symmetric=symmetric
-        )
-        super().__init__(layer, quant_key, dtype)
-        self.quant_matcher = MatcherQuantFP8(quant_key)
-
-    def _register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
-            q: torch.Tensor,
-            k: torch.Tensor,
-            v: torch.Tensor,
-            output_attn: torch.Tensor,
-            scale: torch.Tensor,
-            kv_cache_dummy_dep: torch.Tensor,
-        ) -> torch.Tensor:
+    def __init__(self, layer: Attention, dtype: torch.dtype):
+        self._layer_name = layer.layer_name
+        self._num_heads = layer.num_heads
+        self._head_size = layer.head_size
+        self._dtype = dtype
+        self._quant_matcher = MatcherQuantFP8(_FP8_QUANT_KEY)
+
+    @property
+    def pattern(self) -> Callable[..., torch.Tensor]:
+        # When _USE_LAYERNAME is enabled (torch >= 2.11), layer_name is
+        # passed as an explicit pattern input so the pattern matcher
+        # treats it as a wildcard matching hoisted LayerName placeholders.
+        # Otherwise it stays as a closure constant (original behavior).
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(  # type: ignore[misc]
+                q, k, v, output_attn, scale, kv_cache_dummy_dep, layer_name
+            ):
+                at1 = auto_functionalized(
+                    ATTN_OP,
+                    query=q,
+                    key=k,
+                    value=v,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                attn_out_view = RESHAPE_OP(
+                    at1[1], [q.shape[0], self._num_heads * self._head_size]
+                )
+                return self._quant_matcher(attn_out_view, scale)[0]
+
+            return _pattern_with_ln
+
+        def _pattern(q, k, v, output_attn, scale, kv_cache_dummy_dep):
             at1 = auto_functionalized(
                 ATTN_OP,
                 query=q,
                 key=k,
                 value=v,
                 output=output_attn,
-                layer_name=self.layer_name,
+                layer_name=_ln,
                 output_scale=None,
                 output_block_scale=None,
                 kv_cache_dummy_dep=kv_cache_dummy_dep,
             )
             attn_out_view = RESHAPE_OP(
-                at1[1], [q.shape[0], self.num_heads * self.head_size]
+                at1[1], [q.shape[0], self._num_heads * self._head_size]
             )
+            return self._quant_matcher(attn_out_view, scale)[0]
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., torch.Tensor]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(  # type: ignore[misc]
+                q, k, v, output_attn, scale, kv_cache_dummy_dep, layer_name
+            ):
+                output_attn = torch.empty(
+                    [q.shape[0], self._num_heads, self._head_size],
+                    dtype=FP8_DTYPE,
+                    device=q.device,
+                )
+                at1 = auto_functionalized(
+                    ATTN_OP,
+                    query=q,
+                    key=k,
+                    value=v,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=scale,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                return RESHAPE_OP(at1[1], [-1, self._num_heads * self._head_size])
 
-            return self.quant_matcher(attn_out_view, scale)[0]
-
-        def replacement(
-            q: torch.Tensor,
-            k: torch.Tensor,
-            v: torch.Tensor,
-            output_attn: torch.Tensor,
-            scale: torch.Tensor,
-            kv_cache_dummy_dep: torch.Tensor,
-        ) -> torch.Tensor:
-            # attn output in quant_dtype
+            return _replacement_with_ln
+
+        def _replacement(q, k, v, output_attn, scale, kv_cache_dummy_dep):
             output_attn = torch.empty(
-                [q.shape[0], self.num_heads, self.head_size],
-                dtype=self.quant_dtype,
+                [q.shape[0], self._num_heads, self._head_size],
+                dtype=FP8_DTYPE,
                 device=q.device,
             )
             at1 = auto_functionalized(
@@ -181,36 +143,35 @@ def replacement(
                 key=k,
                 value=v,
                 output=output_attn,
-                layer_name=self.layer_name,
+                layer_name=_ln,
                 output_scale=scale,
                 output_block_scale=None,
                 kv_cache_dummy_dep=kv_cache_dummy_dep,
             )
-            return RESHAPE_OP(at1[1], [-1, self.num_heads * self.head_size])
-
-        inputs = [
-            self.empty(5, self.num_heads, self.head_size),  # q
-            self.empty(5, self.num_heads, self.head_size),  # k
-            self.empty(5, self.num_heads, self.head_size),  # v
-            self.empty(5, self.num_heads, self.head_size),  # attn_output
-            empty_fp32(1, 1),  # scale
-            self.empty(0),  # kv_cache_dummy_dep
+            return RESHAPE_OP(at1[1], [-1, self._num_heads * self._head_size])
+
+        return _replacement
+
+    def get_inputs(self):
+        dtype = self._dtype
+        num_heads = self._num_heads
+        head_size = self._head_size
+        inputs: list = [
+            self.empty(5, num_heads, head_size, dtype=dtype),  # q
+            self.empty(5, num_heads, head_size, dtype=dtype),  # k
+            self.empty(5, num_heads, head_size, dtype=dtype),  # v
+            self.empty(5, num_heads, head_size, dtype=dtype),  # attn_output
+            self.empty_fp32(1, 1),  # scale
+            self.empty(0, dtype=dtype),  # kv_cache_dummy_dep
         ]
-
-        pm.register_replacement(
-            pattern,
-            replacement,
-            inputs,
-            AttentionQuantPattern.wrap_trace_fn(
-                pm.fwd_only,
-                AttentionQuantPattern.fx_view_to_reshape,
-                AttentionQuantPattern.remove_noop_permutes,
-            ),
-            pm_pass,
-        )
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self._layer_name))
+        return inputs
 
 
-class AttentionNvfp4QuantPattern(AttentionQuantPattern):
+class AttnNvfp4QuantPattern(
+    VllmPatternReplacement[..., tuple[torch.Tensor, torch.Tensor]]
+):
     """
     Fusion for Attention+Nvfp4Quant.
 
@@ -220,104 +181,185 @@ class AttentionNvfp4QuantPattern(AttentionQuantPattern):
     will be passed into Attention op as the `output_scale` argument.
     """
 
-    def __init__(self, layer: Attention, dtype: torch.dtype) -> None:
-        super().__init__(layer, kNvfp4Dynamic, dtype)
-
-    def _register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
-            q: torch.Tensor,
-            k: torch.Tensor,
-            v: torch.Tensor,
-            output_attn: torch.Tensor,
-            output_quant: torch.Tensor,
-            output_scale: torch.Tensor,
-            input_scale: torch.Tensor,
-            kv_cache_dummy_dep: torch.Tensor,
-        ) -> tuple[torch.Tensor, torch.Tensor]:
+    def __init__(self, layer: Attention, dtype: torch.dtype):
+        self._layer_name = layer.layer_name
+        self._num_heads = layer.num_heads
+        self._head_size = layer.head_size
+        self._dtype = dtype
+        self._QUANT_OP = QUANT_OPS[kNvfp4Dynamic]
+
+    @property
+    def pattern(self) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(  # type: ignore[misc]
+                q,
+                k,
+                v,
+                output_attn,
+                output_quant,
+                output_scale,
+                input_scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                at1 = auto_functionalized(
+                    ATTN_OP,
+                    query=q,
+                    key=k,
+                    value=v,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                attn_out_view = RESHAPE_OP(
+                    at1[1], [q.shape[0], self._num_heads * self._head_size]
+                )
+                at2 = auto_functionalized(
+                    self._QUANT_OP,
+                    input=attn_out_view,
+                    input_scale=input_scale,
+                    is_sf_swizzled_layout=True,
+                    output=output_quant,
+                    output_scale=output_scale,
+                )
+                return at2[1], torch.ops.aten.view.dtype(at2[2], FP8_DTYPE)
+
+            return _pattern_with_ln
+
+        def _pattern(
+            q,
+            k,
+            v,
+            output_attn,
+            output_quant,
+            output_scale,
+            input_scale,
+            kv_cache_dummy_dep,
+        ):
             at1 = auto_functionalized(
                 ATTN_OP,
                 query=q,
                 key=k,
                 value=v,
                 output=output_attn,
-                layer_name=self.layer_name,
+                layer_name=_ln,
                 output_scale=None,
                 output_block_scale=None,
                 kv_cache_dummy_dep=kv_cache_dummy_dep,
             )
             attn_out_view = RESHAPE_OP(
-                at1[1], [q.shape[0], self.num_heads * self.head_size]
+                at1[1], [q.shape[0], self._num_heads * self._head_size]
             )
             at2 = auto_functionalized(
-                self.QUANT_OP,
+                self._QUANT_OP,
                 input=attn_out_view,
                 input_scale=input_scale,
                 is_sf_swizzled_layout=True,
                 output=output_quant,
                 output_scale=output_scale,
             )
-            output_scale_view = torch.ops.aten.view.dtype(at2[2], FP8_DTYPE)
-            return at2[1], output_scale_view
-
-        def replacement(
-            q: torch.Tensor,
-            k: torch.Tensor,
-            v: torch.Tensor,
-            output_attn: torch.Tensor,
-            output_quant: torch.Tensor,
-            output_scale: torch.Tensor,
-            input_scale: torch.Tensor,
-            kv_cache_dummy_dep: torch.Tensor,
-        ) -> tuple[torch.Tensor, torch.Tensor]:
-            # attention output in quant_dtype
+            return at2[1], torch.ops.aten.view.dtype(at2[2], FP8_DTYPE)
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(  # type: ignore[misc]
+                q,
+                k,
+                v,
+                output_attn,
+                _output_quant,
+                output_scale,
+                input_scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                output_attn = torch.empty(
+                    [q.shape[0], self._num_heads, self._head_size // 2],
+                    dtype=FP4_DTYPE,
+                    device=q.device,
+                )
+                osv = torch.ops.aten.view.dtype(output_scale, FP8_DTYPE)
+                at2 = auto_functionalized(
+                    ATTN_OP,
+                    query=q,
+                    key=k,
+                    value=v,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=input_scale,
+                    output_block_scale=osv,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                return RESHAPE_OP(
+                    at2[1], [-1, self._num_heads * self._head_size // 2]
+                ), at2[2]
+
+            return _replacement_with_ln
+
+        def _replacement(
+            q,
+            k,
+            v,
+            output_attn,
+            _output_quant,
+            output_scale,
+            input_scale,
+            kv_cache_dummy_dep,
+        ):
             output_attn = torch.empty(
-                [q.shape[0], self.num_heads, self.head_size // 2],
-                dtype=self.quant_dtype,
+                [q.shape[0], self._num_heads, self._head_size // 2],
+                dtype=FP4_DTYPE,
                 device=q.device,
             )
-            # attention output block scale
-            output_scale_view = torch.ops.aten.view.dtype(output_scale, FP8_DTYPE)
+            osv = torch.ops.aten.view.dtype(output_scale, FP8_DTYPE)
             at2 = auto_functionalized(
                 ATTN_OP,
                 query=q,
                 key=k,
                 value=v,
                 output=output_attn,
-                layer_name=self.layer_name,
+                layer_name=_ln,
                 output_scale=input_scale,
-                output_block_scale=output_scale_view,
+                output_block_scale=osv,
                 kv_cache_dummy_dep=kv_cache_dummy_dep,
             )
-            output = RESHAPE_OP(at2[1], [-1, self.num_heads * self.head_size // 2])
-            return output, at2[2]
-
-        inputs = [
-            empty_bf16(5, self.num_heads, self.head_size),  # q
-            empty_bf16(5, self.num_heads, self.head_size),  # k
-            empty_bf16(5, self.num_heads, self.head_size),  # v
-            empty_bf16(5, self.num_heads, self.head_size),  # output_attn
-            self.empty_quant(5, self.num_heads * self.head_size // 2),  # output_quant
-            empty_i32(
-                128, round_up(self.num_heads * self.head_size // 16, 4)
-            ),  # output_scale
-            empty_fp32(1, 1),  # input_scale
-            self.empty(0),  # kv_cache_dummy_dep
+            return RESHAPE_OP(
+                at2[1], [-1, self._num_heads * self._head_size // 2]
+            ), at2[2]
+
+        return _replacement
+
+    def get_inputs(self):
+        dtype = self._dtype
+        num_heads = self._num_heads
+        head_size = self._head_size
+        inputs: list = [
+            self.empty_bf16(5, num_heads, head_size),  # q
+            self.empty_bf16(5, num_heads, head_size),  # k
+            self.empty_bf16(5, num_heads, head_size),  # v
+            self.empty_bf16(5, num_heads, head_size),  # output_attn
+            self.empty(5, num_heads * head_size // 2, dtype=FP4_DTYPE),
+            self.empty_i32(128, round_up(num_heads * head_size // 16, 4)),
+            self.empty_fp32(1, 1),  # input_scale
+            self.empty(0, dtype=dtype),  # kv_cache_dummy_dep
         ]
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self._layer_name))
+        return inputs
 
-        pm.register_replacement(
-            pattern,
-            replacement,
-            inputs,
-            AttentionQuantPattern.wrap_trace_fn(
-                pm.fwd_only,
-                AttentionQuantPattern.fx_view_to_reshape,
-                AttentionQuantPattern.remove_noop_permutes,
-            ),
-            pm_pass,
-        )
 
-
-class AttnFusionPass(VllmPatternMatcherPass):
+class AttnQuantFusionPass(VllmFusionPatternMatcherPass):
     """
     This pass fuses post-attention quantization onto attention if supported.
 
@@ -330,43 +372,32 @@ class AttnFusionPass(VllmPatternMatcherPass):
     support are attention kernels, which need to support fusing output quant.
     """
 
-    @enable_fake_mode
     def __init__(self, config: VllmConfig) -> None:
-        super().__init__(config)
-
-        self.patterns = PatternMatcherPass(pass_name="attn_fusion_pass")
+        super().__init__(config, "attn_quant_fusion")
 
-        attn_layers = get_layers_from_vllm_config(config, Attention)
-        for layer_name, layer in attn_layers.items():
-            pattern_fp8 = AttentionFp8StaticQuantPattern(
-                layer, config.model_config.dtype
-            )
-            pattern_fp8.register_if_supported(self.patterns)
+        dtype = config.model_config.dtype
+        layers = list(get_layers_from_vllm_config(config, Attention).values())
 
-            if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
-                pattern_nvfp4 = AttentionNvfp4QuantPattern(
-                    layer, config.model_config.dtype
-                )
-                pattern_nvfp4.register_if_supported(self.patterns)
-
-        if len(attn_layers) == 0:
+        if len(layers) == 0:
             logger.warning(
                 "Attention + quant fusion is enabled, but no attention layers "
                 "were found in CompilationConfig.static_forward_context "
                 "so no fusion patterns were registered."
             )
 
-        self.dump_patterns(config, self.patterns)
-
-    @VllmInductorPass.time_and_log
-    def __call__(self, graph: torch.fx.graph.Graph) -> None:
-        self.matched_count = self.patterns.apply(graph)
-        logger.debug("Fused quant onto %s attention nodes", self.matched_count)
-
-    def uuid(self) -> str:
-        return VllmInductorPass.hash_source(
-            self,
-            AttentionQuantPattern,
-            AttentionFp8StaticQuantPattern,
-            AttentionNvfp4QuantPattern,
-        )
+        # When _USE_LAYERNAME is enabled, layer_name is a wildcard so all
+        # layers produce the same pattern — register once then break.
+        for layer in layers:
+            if layer.impl.fused_output_quant_supported(_FP8_QUANT_KEY):
+                self.register(AttnFp8StaticQuantPattern(layer, dtype))
+                if _USE_LAYERNAME:
+                    break
+
+        if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
+            for layer in layers:
+                if layer.impl.fused_output_quant_supported(kNvfp4Dynamic):
+                    self.register(AttnNvfp4QuantPattern(layer, dtype))
+                    if _USE_LAYERNAME:
+                        break
+
+        self.dump_patterns(config, self.pm_pass)
diff --git a/vllm/compilation/passes/fusion/collective_fusion.py b/vllm/compilation/passes/fusion/collective_fusion.py
index a9b64adcb3f1..29d79c9b92ce 100644
--- a/vllm/compilation/passes/fusion/collective_fusion.py
+++ b/vllm/compilation/passes/fusion/collective_fusion.py
@@ -1,8 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from collections.abc import Callable
+from contextlib import suppress
+
 import torch
 import torch._inductor.pattern_matcher as pm
+import torch.distributed.distributed_c10d as c10d
 import torch.fx as fx
 from torch._inductor.pattern_matcher import PatternMatcherPass
 from torch.distributed._symmetric_memory import enable_symm_mem_for_group
@@ -15,15 +19,317 @@
 )
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import direct_register_custom_op
 
 from ..inductor_pass import enable_fake_mode
-from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
+from ..vllm_inductor_pass import (
+    VllmFusionPatternMatcherPass,
+    VllmInductorPass,
+    VllmPatternMatcherPass,
+    VllmPatternReplacement,
+)
 
 FP8_DTYPE = current_platform.fp8_dtype()
 
 logger = init_logger(__name__)
 
 
+def _flashinfer_scaled_mm_out(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    *,
+    scale_a: torch.Tensor,
+    scale_b: torch.Tensor,
+    out: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    scale_result: torch.Tensor | None = None,
+    out_dtype: torch.dtype | None = None,
+    use_fast_accum: bool = False,
+) -> None:
+    # Import lazily to avoid a circular import during module initialization
+    # when docs or other tooling import the pass without FlashInfer.
+    from vllm.utils.flashinfer import flashinfer_scaled_fp8_mm_out
+
+    assert bias is None, "FlashInfer symm_mem adapter does not support bias"
+    assert scale_result is None, (
+        "FlashInfer symm_mem adapter does not support result scaling"
+    )
+    assert not use_fast_accum, (
+        "FlashInfer symm_mem adapter does not support use_fast_accum"
+    )
+    assert A.ndim == 2 and B.ndim == 2 and out.ndim == 2, (
+        "FlashInfer symm_mem adapter expects 2D inputs and output"
+    )
+    assert scale_a.numel() == 1 and scale_b.numel() == 1, (
+        "FlashInfer symm_mem adapter only supports tensor-wise FP8 scales"
+    )
+
+    flashinfer_scaled_fp8_mm_out(
+        A,
+        B,
+        scale_a,
+        scale_b,
+        out=out,
+        out_dtype=out_dtype or out.dtype,
+    )
+
+
+def _flashinfer_fp4_mm_out(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    *,
+    scale_a: torch.Tensor,
+    scale_b: torch.Tensor,
+    out: torch.Tensor,
+    alpha: torch.Tensor,
+    out_dtype: torch.dtype | None = None,
+    use_8x4_sf_layout: bool = False,
+    backend: str = "cutlass",
+) -> None:
+    from vllm.utils.flashinfer import flashinfer_scaled_fp4_mm_out
+
+    assert A.ndim == 2 and B.ndim == 2 and out.ndim == 2, (
+        "FlashInfer FP4 symm_mem adapter expects 2D inputs and output"
+    )
+    flashinfer_scaled_fp4_mm_out(
+        A,
+        B,
+        scale_a,
+        scale_b,
+        alpha,
+        out=out,
+        out_dtype=out_dtype or out.dtype,
+        use_8x4_sf_layout=use_8x4_sf_layout,
+        backend=backend,
+    )
+
+
+def fused_flashinfer_scaled_matmul_reduce_scatter_fake(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    A_scale: torch.Tensor,
+    B_scale: torch.Tensor,
+    reduce_op: str,
+    orig_scatter_dim: int,
+    scatter_dim_after_maybe_reshape: int,
+    group_name: str,
+    output_shape: list[int],
+    out_dtype: torch.dtype | None = None,
+) -> torch.Tensor:
+    world_size = c10d._resolve_process_group(group_name).size()
+    result_shape = list(output_shape)
+    result_shape[orig_scatter_dim] //= world_size
+    return torch.empty(
+        result_shape,
+        dtype=out_dtype or torch.bfloat16,
+        device=A.device,
+    )
+
+
+def fused_flashinfer_scaled_matmul_reduce_scatter(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    A_scale: torch.Tensor,
+    B_scale: torch.Tensor,
+    reduce_op: str,
+    orig_scatter_dim: int,
+    scatter_dim_after_maybe_reshape: int,
+    group_name: str,
+    output_shape: list[int],
+    out_dtype: torch.dtype | None = None,
+) -> torch.Tensor:
+    assert orig_scatter_dim == 0 and scatter_dim_after_maybe_reshape == 0, (
+        "FlashInfer symm_mem adapter currently only supports scatter_dim=0"
+    )
+    world_size = c10d._resolve_process_group(group_name).size()
+    assert A.ndim == 2 and B.ndim == 2, "FlashInfer symm_mem adapter expects 2D inputs"
+    assert A.is_contiguous(), "FlashInfer symm_mem adapter expects contiguous A"
+    assert A_scale.numel() == 1 and B_scale.numel() == 1, (
+        "FlashInfer symm_mem adapter only supports tensor-wise FP8 scales"
+    )
+    assert A.shape[0] % world_size == 0, (
+        "FlashInfer symm_mem adapter expects M divisible by world size"
+    )
+
+    kwargs = {
+        "scale_b": B_scale,
+        "bias": None,
+        "scale_result": None,
+        "out_dtype": out_dtype,
+        "use_fast_accum": False,
+    }
+    return torch.distributed._symmetric_memory._fused_scaled_matmul_reduce_scatter_impl(
+        mm_out_op=_flashinfer_scaled_mm_out,
+        A=A,
+        B=B,
+        A_scale=A_scale,
+        kwargs=kwargs,
+        out_dtype=out_dtype,
+        reduce_op=reduce_op,
+        orig_scatter_dim=orig_scatter_dim,
+        scatter_dim_after_maybe_reshape=scatter_dim_after_maybe_reshape,
+        group_name=group_name,
+        output_shape=output_shape,
+    )
+
+
+def fused_all_gather_flashinfer_scaled_matmul_fake(
+    A_shard: torch.Tensor,
+    B: torch.Tensor,
+    A_scale: torch.Tensor,
+    B_scale: torch.Tensor,
+    gather_dim: int,
+    group_name: str,
+    out_dtype: torch.dtype | None = None,
+) -> torch.Tensor:
+    world_size = c10d._resolve_process_group(group_name).size()
+    output_shape = list(A_shard.shape)
+    output_shape[gather_dim] *= world_size
+    output_shape[-1] = B.shape[1]
+    return torch.empty(
+        output_shape,
+        dtype=out_dtype or torch.bfloat16,
+        device=A_shard.device,
+    )
+
+
+def fused_all_gather_flashinfer_scaled_matmul(
+    A_shard: torch.Tensor,
+    B: torch.Tensor,
+    A_scale: torch.Tensor,
+    B_scale: torch.Tensor,
+    gather_dim: int,
+    group_name: str,
+    out_dtype: torch.dtype | None = None,
+) -> torch.Tensor:
+    assert gather_dim == 0, (
+        "FlashInfer symm_mem adapter currently only supports gather_dim=0"
+    )
+    _, outputs = torch.distributed._symmetric_memory._fused_all_gather_matmul_impl(
+        mm_out_op=_flashinfer_scaled_mm_out,
+        A_shard=A_shard,
+        Bs=[B],
+        A_scale=A_scale,
+        kwargs_list=[
+            {
+                "scale_b": B_scale,
+                "bias": None,
+                "scale_result": None,
+                "out_dtype": out_dtype,
+                "use_fast_accum": False,
+            }
+        ],
+        out_dtypes=[out_dtype],
+        gather_dim=gather_dim,
+        group_name=group_name,
+        return_A=False,
+    )
+    return outputs[0]
+
+
+def fused_all_gather_flashinfer_fp4_matmul_fake(
+    A_shard: torch.Tensor,
+    B: torch.Tensor,
+    A_scale_shard: torch.Tensor,
+    B_scale: torch.Tensor,
+    alpha: torch.Tensor,
+    gather_dim: int,
+    group_name: str,
+    out_dtype: torch.dtype | None = None,
+    view_a_scale_as_fp8: bool = False,
+    use_8x4_sf_layout: bool = False,
+    backend: str = "cutlass",
+) -> torch.Tensor:
+    world_size = c10d._resolve_process_group(group_name).size()
+    output_shape = list(A_shard.shape)
+    output_shape[gather_dim] *= world_size
+    output_shape[-1] = B.shape[1]
+    return torch.empty(
+        output_shape,
+        dtype=out_dtype or torch.bfloat16,
+        device=A_shard.device,
+    )
+
+
+def fused_all_gather_flashinfer_fp4_matmul(
+    A_shard: torch.Tensor,
+    B: torch.Tensor,
+    A_scale_shard: torch.Tensor,
+    B_scale: torch.Tensor,
+    alpha: torch.Tensor,
+    gather_dim: int,
+    group_name: str,
+    out_dtype: torch.dtype | None = None,
+    view_a_scale_as_fp8: bool = False,
+    use_8x4_sf_layout: bool = False,
+    backend: str = "cutlass",
+) -> torch.Tensor:
+    assert gather_dim == 0, (
+        "FlashInfer FP4 symm_mem adapter currently only supports gather_dim=0"
+    )
+    assert A_shard.ndim == 2 and A_scale_shard.ndim == 2 and B.ndim == 2, (
+        "FlashInfer FP4 symm_mem adapter expects 2D inputs"
+    )
+    if view_a_scale_as_fp8:
+        A_scale_shard = A_scale_shard.view(torch.float8_e4m3fn)
+
+    group = c10d._resolve_process_group(group_name)
+    world_size = group.size()
+    output = A_shard.new_empty(
+        A_shard.shape[0] * world_size,
+        B.shape[1],
+        dtype=out_dtype or torch.bfloat16,
+    )
+    output_shards = output.chunk(world_size)
+
+    A = A_shard.new_empty(A_shard.shape[0] * world_size, A_shard.shape[1])
+    A_scale = A_scale_shard.new_empty(
+        A_scale_shard.shape[0] * world_size,
+        A_scale_shard.shape[1],
+    )
+
+    def fp4_shard_consumer(shards: list[torch.Tensor], rank: int) -> None:
+        _flashinfer_fp4_mm_out(
+            shards[0],
+            B,
+            scale_a=shards[1],
+            scale_b=B_scale,
+            alpha=alpha,
+            out=output_shards[rank],
+            out_dtype=out_dtype,
+            use_8x4_sf_layout=use_8x4_sf_layout,
+            backend=backend,
+        )
+
+    torch.distributed._symmetric_memory._pipelined_multi_all_gather_and_consume(
+        [A_shard, A_scale_shard],
+        fp4_shard_consumer,
+        [A, A_scale],
+        group_name,
+        False,
+    )
+    return output
+
+
+direct_register_custom_op(
+    op_name="fused_flashinfer_scaled_matmul_reduce_scatter",
+    op_func=fused_flashinfer_scaled_matmul_reduce_scatter,
+    fake_impl=fused_flashinfer_scaled_matmul_reduce_scatter_fake,
+)
+
+direct_register_custom_op(
+    op_name="fused_all_gather_flashinfer_scaled_matmul",
+    op_func=fused_all_gather_flashinfer_scaled_matmul,
+    fake_impl=fused_all_gather_flashinfer_scaled_matmul_fake,
+)
+
+direct_register_custom_op(
+    op_name="fused_all_gather_flashinfer_fp4_matmul",
+    op_func=fused_all_gather_flashinfer_fp4_matmul,
+    fake_impl=fused_all_gather_flashinfer_fp4_matmul_fake,
+)
+
+
 class BasePattern:
     def __init__(self, dtype: torch.dtype, device: str | None) -> None:
         self.dtype = dtype
@@ -371,53 +677,304 @@ def replacement(
         )
 
 
-class AsyncTPPass(VllmPatternMatcherPass):
+class FlashInferBMMFP8ReduceScatterPattern(
+    BasePattern, VllmPatternReplacement[..., torch.Tensor]
+):
+    def get_inputs(self) -> list[torch.Tensor]:
+        a_2d = torch.empty([16, 16], device=self.device, dtype=FP8_DTYPE)
+        b_2d = (
+            torch.empty([16, 16], device=self.device, dtype=FP8_DTYPE)
+            .contiguous()
+            .transpose(0, 1)
+        )
+        a_scale = torch.empty([1], device=self.device, dtype=torch.float32)
+        b_scale = torch.empty([1], device=self.device, dtype=torch.float32)
+        return [a_2d, b_2d, a_scale, b_scale]
+
+    @property
+    def pattern(self) -> Callable[..., torch.Tensor]:
+        def _pattern(
+            a_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale: torch.Tensor,
+            b_scale: torch.Tensor,
+        ) -> torch.Tensor:
+            bmm = torch.ops.vllm.bmm_fp8.default(
+                torch.ops.aten.unsqueeze.default(a_2d, 0),
+                torch.ops.aten.unsqueeze.default(b_2d, 0),
+                a_scale,
+                b_scale,
+                self.dtype,
+                "auto",
+            )
+            output = torch.ops.aten.reshape.default(bmm, list(bmm.shape[1:]))
+            return torch.ops.vllm.reduce_scatter.default(
+                output,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name,
+            )
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., torch.Tensor]:
+        def _replacement(
+            a_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale: torch.Tensor,
+            b_scale: torch.Tensor,
+        ) -> torch.Tensor:
+            return torch.ops.vllm.fused_flashinfer_scaled_matmul_reduce_scatter.default(
+                a_2d,
+                b_2d,
+                a_scale,
+                b_scale,
+                "sum",
+                0,
+                0,
+                self.tp.device_group.group_name,
+                [a_2d.shape[0], b_2d.shape[1]],
+                self.dtype,
+            )
+
+        return _replacement
+
+
+class FlashInferAllGatherBMMFP8Pattern(
+    BasePattern, VllmPatternReplacement[..., torch.Tensor]
+):
+    def get_inputs(self) -> list[torch.Tensor]:
+        a_shard_2d = torch.empty([8, 16], device=self.device, dtype=FP8_DTYPE)
+        b_2d = (
+            torch.empty([16, 16], device=self.device, dtype=FP8_DTYPE)
+            .contiguous()
+            .transpose(0, 1)
+        )
+        a_scale = torch.empty([1], device=self.device, dtype=torch.float32)
+        b_scale = torch.empty([1], device=self.device, dtype=torch.float32)
+        return [a_shard_2d, b_2d, a_scale, b_scale]
+
+    @property
+    def pattern(self) -> Callable[..., torch.Tensor]:
+        def _pattern(
+            a_shard_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale: torch.Tensor,
+            b_scale: torch.Tensor,
+        ) -> torch.Tensor:
+            all_gather = torch.ops.vllm.all_gather.default(
+                a_shard_2d,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name,
+            )
+            return torch.ops.vllm.bmm_fp8.default(
+                torch.ops.aten.unsqueeze.default(all_gather, 0),
+                torch.ops.aten.unsqueeze.default(b_2d, 0),
+                a_scale,
+                b_scale,
+                self.dtype,
+                "auto",
+            )
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., torch.Tensor]:
+        def _replacement(
+            a_shard_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale: torch.Tensor,
+            b_scale: torch.Tensor,
+        ) -> torch.Tensor:
+            fused = torch.ops.vllm.fused_all_gather_flashinfer_scaled_matmul.default(
+                a_shard_2d,
+                b_2d,
+                a_scale,
+                b_scale,
+                0,
+                self.tp.device_group.group_name,
+                self.dtype,
+            )
+            return torch.ops.aten.unsqueeze.default(fused, 0)
+
+        return _replacement
+
+
+class FlashInferAllGatherFP4Pattern(
+    BasePattern, VllmPatternReplacement[..., torch.Tensor]
+):
+    def __init__(
+        self,
+        dtype: torch.dtype,
+        device: str | None,
+        backend: str,
+        use_8x4_sf_layout: bool,
+        a_scale_view: str,
+    ) -> None:
+        super().__init__(dtype, device)
+        self.backend = backend
+        self.use_8x4_sf_layout = use_8x4_sf_layout
+        self.a_scale_view = a_scale_view
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        a_shard_2d = torch.empty([8, 8], device=self.device, dtype=torch.uint8)
+        b_2d = torch.empty([8, 16], device=self.device, dtype=torch.uint8)
+        a_scale_shard = torch.empty([128, 4], device=self.device, dtype=torch.int32)
+        b_scale = torch.empty([4, 128], device=self.device, dtype=torch.uint8)
+        alpha = torch.empty([], device=self.device, dtype=torch.float32)
+        return [
+            a_shard_2d,
+            b_2d,
+            a_scale_shard,
+            b_scale,
+            alpha,
+        ]
+
+    @property
+    def pattern(self) -> Callable[..., torch.Tensor]:
+        def _pattern(
+            a_shard_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale_shard: torch.Tensor,
+            b_scale: torch.Tensor,
+            alpha: torch.Tensor,
+        ) -> torch.Tensor:
+            all_gather_a = torch.ops.vllm.all_gather.default(
+                a_shard_2d,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name,
+            )
+            all_gather_a_scale = torch.ops.vllm.all_gather.default(
+                a_scale_shard,
+                dim=0,
+                world_size=self.tp_size,
+                group_name=self.tp.unique_name,
+            )
+            a_scale = all_gather_a_scale
+            if self.a_scale_view in ("float8", "float8_uint8"):
+                a_scale = torch.ops.aten.view.dtype(a_scale, torch.float8_e4m3fn)
+            if self.a_scale_view in ("uint8", "float8_uint8"):
+                a_scale = torch.ops.aten.view.dtype(a_scale, torch.uint8)
+            return torch.ops.vllm.flashinfer_mm_fp4.default(
+                all_gather_a,
+                b_2d,
+                a_scale,
+                b_scale,
+                alpha,
+                self.dtype,
+                self.use_8x4_sf_layout,
+                self.backend,
+            )
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., torch.Tensor]:
+        def _replacement(
+            a_shard_2d: torch.Tensor,
+            b_2d: torch.Tensor,
+            a_scale_shard: torch.Tensor,
+            b_scale: torch.Tensor,
+            alpha: torch.Tensor,
+        ) -> torch.Tensor:
+            return torch.ops.vllm.fused_all_gather_flashinfer_fp4_matmul.default(
+                a_shard_2d,
+                b_2d,
+                a_scale_shard,
+                b_scale,
+                alpha,
+                0,
+                self.tp.device_group.group_name,
+                self.dtype,
+                self.a_scale_view in ("float8", "float8_uint8"),
+                self.use_8x4_sf_layout,
+                self.backend,
+            )
+
+        return _replacement
+
+
+class AsyncTPPass(VllmFusionPatternMatcherPass):
     @enable_fake_mode
     def __init__(self, config: VllmConfig) -> None:
-        super().__init__(config)
+        super().__init__(config, pass_name="async_tp_pass")
 
-        # Enable symmetric memory for the TP process group
         enable_symm_mem_for_group(get_tp_group().device_group.group_name)
-        self.patterns: PatternMatcherPass = PatternMatcherPass(
-            pass_name="async_tp_pass"
-        )
-        GEMMReduceScatterPattern(self.model_dtype, self.device).register(self.patterns)
+        GEMMReduceScatterPattern(self.model_dtype, self.device).register(self.pm_pass)
 
-        AllGatherGEMMPattern(self.model_dtype, self.device).register(self.patterns)
+        AllGatherGEMMPattern(self.model_dtype, self.device).register(self.pm_pass)
 
         # These fusions are enabled only for bfloat16 models because
         # `scaled_mm` or `cutlass_scaled_mm` with per-token (row-wise) scaling
         # only supports bfloat16 as the output dtype.
         if self.model_dtype == torch.bfloat16:
             ScaledMMReduceScatterPattern(self.model_dtype, self.device).register(
-                self.patterns
+                self.pm_pass
             )
             AllGatherScaledMMPattern(self.model_dtype, self.device).register(
-                self.patterns
+                self.pm_pass
             )
 
             CutlassScaledMMReduceScatterPattern(self.model_dtype, self.device).register(
-                self.patterns
+                self.pm_pass
             )
             AllGatherCutlassScaledMMPattern(self.model_dtype, self.device).register(
-                self.patterns
+                self.pm_pass
             )
-
-        self.dump_patterns(config, self.patterns)
+            with suppress(ImportError):
+                import vllm.utils.flashinfer  # noqa: F401
+            if hasattr(torch.ops.vllm, "bmm_fp8"):
+                self.register(
+                    FlashInferAllGatherBMMFP8Pattern(self.model_dtype, self.device)
+                )
+                self.register(
+                    FlashInferBMMFP8ReduceScatterPattern(self.model_dtype, self.device)
+                )
+            if hasattr(torch.ops.vllm, "flashinfer_mm_fp4"):
+                for backend in ("cutlass", "cudnn"):
+                    for a_scale_view in ("float8_uint8", "uint8"):
+                        self.register(
+                            FlashInferAllGatherFP4Pattern(
+                                self.model_dtype,
+                                self.device,
+                                backend,
+                                use_8x4_sf_layout=False,
+                                a_scale_view=a_scale_view,
+                            )
+                        )
+                for use_8x4_sf_layout in (False, True):
+                    for a_scale_view in ("float8",):
+                        self.register(
+                            FlashInferAllGatherFP4Pattern(
+                                self.model_dtype,
+                                self.device,
+                                "trtllm",
+                                use_8x4_sf_layout=use_8x4_sf_layout,
+                                a_scale_view=a_scale_view,
+                            )
+                        )
+                # NVFP4 reduce-scatter does not need scale communication: FP4
+                # scales are consumed by the local GEMM and only BF16 partial
+                # outputs are reduced. Keep this PR scoped to the all-gather
+                # path; reduce-scatter needs a dedicated FP4 producer rather
+                # than the existing FP8-style helper.
+
+        self.dump_patterns(config, self.pm_pass)
 
     def is_applicable_for_range(self, compile_range: Range) -> bool:
-        # This pass is applied on top of the sequence parallelism pass.
-        # It inherits the same applicability condition as `SequenceParallelismPass`.
-        # See `SequenceParallelismPass.is_applicable` for more details.
-        if (
-            not self.compilation_config.splitting_ops
-            or self.compilation_config.use_inductor_graph_partition
-        ):
-            return True
-        tp_size = get_tensor_model_parallel_world_size()
-        return bool(compile_range.is_single_size() and compile_range.end % tp_size == 0)
+        # This pass is applied on top of the sequence parallelism pass,
+        # which is only supported in fullgraph compilation mode.
+        assert (
+            self.compilation_config.use_inductor_graph_partition
+            or not self.compilation_config.splitting_ops
+        ), "AsyncTPPass requires full-graph compilation"
+        return True
 
     @VllmInductorPass.time_and_log
     def __call__(self, graph: fx.Graph) -> None:
-        self.matched_count = self.patterns.apply(graph)
+        self.matched_count = self.pm_pass.apply(graph)
+        VllmPatternMatcherPass.match_table[self.pass_name] += self.matched_count
         logger.debug("Replaced %s patterns", self.matched_count)
diff --git a/vllm/compilation/passes/fusion/matcher_utils.py b/vllm/compilation/passes/fusion/matcher_utils.py
index ec36c12d1776..9f25a6805e93 100644
--- a/vllm/compilation/passes/fusion/matcher_utils.py
+++ b/vllm/compilation/passes/fusion/matcher_utils.py
@@ -10,7 +10,7 @@
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import get_current_vllm_config
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.layernorm import RMSNormGated
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
@@ -24,10 +24,11 @@
     kNvfp4Dynamic,
 )
 from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
+from vllm.model_executor.layers.rotary_embedding.deepseek_scaling_rope import (
+    DeepseekScalingRotaryEmbedding,
+)
 from vllm.platforms import current_platform
 
-RMS_OP = torch.ops._C.rms_norm.default
-RMS_ADD_OP = torch.ops._C.fused_add_rms_norm.default
 ROTARY_OP = torch.ops._C.rotary_embedding.default
 FLASHINFER_ROTARY_OP = torch.ops.vllm.flashinfer_rotary_embedding.default
 
@@ -160,131 +161,144 @@ def forward_native(
         return result
 
 
-class MatcherRMSNorm(MatcherCustomOp):
+class MatcherRMSNormGated(MatcherCustomOp):
+    """Matches RMSNormGated with norm_before_gate=True and group_size=None."""
+
     def __init__(
         self,
         epsilon: float,
         enabled: bool | None = None,
-        match_rocm_aiter: bool = False,
+        norm_before_gate: bool = True,
+        group_size: int | None = None,
     ) -> None:
         if enabled is None:
-            enabled = RMSNorm.enabled()
+            enabled = RMSNormGated.enabled()
 
         super().__init__(enabled)
         self.epsilon = epsilon
-        self._rmsnorm_op = RMS_OP
-        self.match_rocm_aiter = match_rocm_aiter
-
-        if match_rocm_aiter:
-            self._rmsnorm_op = rocm_aiter_ops.get_rmsnorm_op()
+        self.norm_before_gate = norm_before_gate
+        self.group_size = group_size
 
     def inputs(self) -> list[torch.Tensor]:
-        input = self.empty(5, 16) if self.enabled else self.empty_f32(5, 16)
+        x = self.empty(5, 16)
+        z = self.empty(5, 16)
         weight = self.empty(16)
-        return [input, weight]
-
-    def forward_rocm_aiter(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-    ) -> torch.Tensor:
-        return self._rmsnorm_op(
-            x=input,
-            weight=weight,
-            variance_epsilon=self.epsilon,
-        )
+        return [x, z, weight]
 
     def forward_custom(
         self,
-        input: torch.Tensor,
+        x: torch.Tensor,
+        z: torch.Tensor,
         weight: torch.Tensor,
     ) -> torch.Tensor:
-        if self.match_rocm_aiter:
-            return self.forward_rocm_aiter(input, weight)
-
-        result = torch.empty_like(input)
-        _, result = auto_functionalized(
-            self._rmsnorm_op,
-            result=result,
-            input=input,
-            weight=weight,
-            epsilon=self.epsilon,
+        from vllm.model_executor.layers.fla.ops.layernorm_guard import (
+            rmsnorm_fn,
         )
 
-        return result
+        return rmsnorm_fn(
+            x,
+            weight,
+            bias=None,
+            z=z,
+            eps=self.epsilon,
+            group_size=self.group_size,
+            norm_before_gate=self.norm_before_gate,
+        )
 
     def forward_native(
         self,
-        input: torch.Tensor,
+        x: torch.Tensor,
+        z: torch.Tensor,
         weight: torch.Tensor,
     ) -> torch.Tensor:
-        return RMSNorm.forward_static(
-            input, self.epsilon, input.size(-1), self.model_dtype, weight
+        return RMSNormGated.forward_static(
+            x,
+            z,
+            weight,
+            self.epsilon,
+            self.model_dtype,
+            group_size=self.group_size,
+            norm_before_gate=self.norm_before_gate,
         )
 
 
-class MatcherFusedAddRMSNorm(MatcherCustomOp):
+class MatcherDeepseekScalingRotaryEmbedding(MatcherCustomOp):
     def __init__(
         self,
-        epsilon: float,
+        is_neox: bool,
+        head_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        use_flashinfer: bool = False,
         enabled: bool | None = None,
-        match_rocm_aiter: bool = False,
     ) -> None:
         if enabled is None:
-            enabled = RMSNorm.enabled()
+            enabled = DeepseekScalingRotaryEmbedding.enabled()
 
         super().__init__(enabled)
-        self.epsilon = epsilon
-        self.match_rocm_aiter = match_rocm_aiter
-
-        self._rmsnorm_op = RMS_ADD_OP
-
-        if match_rocm_aiter:
-            self._rmsnorm_op = rocm_aiter_ops.get_rmsnorm_fused_add_op()
+        self.is_neox = is_neox
+        self.head_size = head_size
+        self.num_heads = num_heads
+        self.num_kv_heads = num_kv_heads
+        self.q_size = self.num_heads * self.head_size
+        self.kv_size = self.num_kv_heads * self.head_size
+        self.rotary_dim = head_size
+        self.use_flashinfer = use_flashinfer
 
     def inputs(self) -> list[torch.Tensor]:
-        input = self.empty(5, 16) if self.enabled else self.empty_f32(5, 16)
-        weight = self.empty(16)
-        residual = self.empty(5, 16)
-        return [input, weight, residual]
-
-    def forward_rocm_aiter(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        residual: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        return self._rmsnorm_op(  # type: ignore[no-any-return]
-            x=input, residual=residual, weight=weight, variance_epsilon=self.epsilon
-        )
+        positions = self.empty_int64(5)
+        query = self.empty(5, self.num_heads, self.head_size)
+        key = self.empty(5, self.num_kv_heads, self.head_size)
+        cos_sin_cache = self.empty(4096, self.rotary_dim)
+        return [positions, query, key, cos_sin_cache]
 
     def forward_custom(
         self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        residual: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if self.match_rocm_aiter:
-            return self.forward_rocm_aiter(input, weight, residual)
-
-        _, result, residual = auto_functionalized(
-            self._rmsnorm_op,
-            input=input,
-            residual=residual,
-            weight=weight,
-            epsilon=self.epsilon,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None,
+        cos_sin_cache: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        if self.use_flashinfer:
+            torch.ops.vllm.flashinfer_rotary_embedding(
+                positions,
+                query,
+                key,
+                self.head_size,
+                cos_sin_cache,
+                self.is_neox,
+            )
+            return query, key
+        result: tuple[torch.Tensor, torch.Tensor | None] = (
+            DeepseekScalingRotaryEmbedding.forward_static(
+                positions,
+                query,
+                key,
+                self.head_size,
+                self.rotary_dim,
+                cos_sin_cache,
+                self.is_neox,
+            )
         )
-
-        return result, residual
+        return result
 
     def forward_native(
         self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        residual: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        result: tuple[torch.Tensor, torch.Tensor] = RMSNorm.forward_static(
-            input, self.epsilon, input.size(-1), self.model_dtype, weight, residual
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None,
+        cos_sin_cache: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        result: tuple[torch.Tensor, torch.Tensor | None] = (
+            DeepseekScalingRotaryEmbedding.forward_static(
+                positions,
+                query,
+                key,
+                self.head_size,
+                self.rotary_dim,
+                cos_sin_cache,
+                self.is_neox,
+            )
         )
         return result
 
@@ -320,12 +334,7 @@ def __init__(
                     "ROCm aiter fusion pass currently supports "
                     "quantization operation with group_size 128"
                 )
-                if current_platform.is_fp8_fnuz():
-                    self.QUANT_OP = rocm_aiter_ops.get_group_quant_op()
-                else:
-                    self.QUANT_OP = (
-                        torch.ops.vllm.triton_per_token_group_quant_fp8.default
-                    )
+                self.QUANT_OP = rocm_aiter_ops.get_group_quant_op()
 
         else:
             assert quant_key in QUANT_OPS, (
diff --git a/vllm/compilation/passes/fusion/minimax_qk_norm_fusion.py b/vllm/compilation/passes/fusion/minimax_qk_norm_fusion.py
new file mode 100644
index 000000000000..7445028da63f
--- /dev/null
+++ b/vllm/compilation/passes/fusion/minimax_qk_norm_fusion.py
@@ -0,0 +1,340 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""
+Fusion pass: replace MiniMax QK allreduce + RMS norm with the Lamport
+fused kernel (minimax_allreduce_rms_qk) for decode-size batches.
+
+Pattern (inlined forward_qk in compiled graph):
+    q, k, v = qkv.split([q_size, kv_size, kv_size], -1)
+    q_fp32 = q.to(float32); k_fp32 = k.to(float32)
+    q_var = q_fp32.pow(2).mean(-1, keepdim=True)
+    k_var = k_fp32.pow(2).mean(-1, keepdim=True)
+    qk_var = cat([q_var, k_var], -1)
+    qk_var = allreduce(qk_var) / tp_world
+    q_var, k_var = qk_var.chunk(2, -1)
+    q_out = (q_fp32 * rsqrt(q_var + eps) * q_weight).to(orig_dtype)
+    k_out = (k_fp32 * rsqrt(k_var + eps) * k_weight).to(orig_dtype)
+    return q_out, k_out, v
+
+Replacement (pure, no in-place on qkv/q/k):
+    q_out, k_out = minimax_qk_norm_fused(qkv, q_weight, k_weight, workspace, ...)
+    v = qkv.split([q_size, kv_size, kv_size], -1)[2]
+    return q_out, k_out, v
+
+is_applicable_for_range: only fires for compile_range.end <= max_decode_tokens
+so that large prefill batches fall through to the original forward_qk (= main).
+"""
+
+import torch
+import torch._inductor.pattern_matcher as pm
+import torch.fx as fx
+from torch._inductor.pattern_matcher import PatternMatcherPass
+
+from vllm.config import VllmConfig
+from vllm.config.utils import Range
+from vllm.distributed import tensor_model_parallel_all_reduce
+from vllm.distributed.parallel_state import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.logger import init_logger
+from vllm.utils.torch_utils import direct_register_custom_op
+
+from ..inductor_pass import enable_fake_mode
+from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
+
+logger = init_logger(__name__)
+
+MAX_TOKEN_NUM = 2048
+
+_MINIMAX_QK_NORM_FUSED_OP = None
+if hasattr(torch.ops._C, "minimax_allreduce_rms_qk"):
+
+    def _minimax_qk_norm_fused(
+        qkv: torch.Tensor,
+        norm_weight_q: torch.Tensor,
+        norm_weight_k: torch.Tensor,
+        q_size: int,
+        kv_size: int,
+        rank: int,
+        nranks: int,
+        eps: float,
+        max_tokens: int,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        from vllm.distributed.parallel_state import get_tp_group
+        from vllm.model_executor.layers.mamba.lamport_workspace import (
+            get_allreduce_workspace,
+        )
+
+        workspace = get_allreduce_workspace(
+            rank=rank,
+            world_size=nranks,
+            max_tokens=max_tokens,
+            process_group=get_tp_group().cpu_group,
+        )
+        return torch.ops._C.minimax_allreduce_rms_qk(
+            qkv,
+            norm_weight_q,
+            norm_weight_k,
+            workspace,
+            q_size,
+            kv_size,
+            rank,
+            nranks,
+            eps,
+        )
+
+    def _minimax_qk_norm_fused_fake(
+        qkv: torch.Tensor,
+        norm_weight_q: torch.Tensor,
+        norm_weight_k: torch.Tensor,
+        q_size: int,
+        kv_size: int,
+        rank: int,
+        nranks: int,
+        eps: float,
+        max_tokens: int,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        T = qkv.shape[0]
+        return (
+            torch.empty([T, q_size], dtype=qkv.dtype, device=qkv.device),
+            torch.empty([T, kv_size], dtype=qkv.dtype, device=qkv.device),
+        )
+
+    direct_register_custom_op(
+        op_name="minimax_qk_norm_fused",
+        op_func=_minimax_qk_norm_fused,
+        fake_impl=_minimax_qk_norm_fused_fake,
+        mutates_args=[],
+    )
+    _MINIMAX_QK_NORM_FUSED_OP = torch.ops.vllm.minimax_qk_norm_fused.default
+
+
+class MiniMaxQKNormPattern:
+    """
+    Match the forward_qk allreduce+rms pattern and replace with Lamport kernel.
+    """
+
+    def __init__(
+        self,
+        q_size: int,
+        kv_size: int,
+        eps: float,
+        tp_world: int,
+        tp_rank: int,
+        max_tokens: int,
+        dtype: torch.dtype,
+        device: str | None,
+    ) -> None:
+        self.q_size = q_size
+        self.kv_size = kv_size
+        self.eps = eps
+        self.tp_world = tp_world
+        self.tp_rank = tp_rank
+        self.max_tokens = max_tokens
+        self.dtype = dtype
+        self.device = device
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        T = 4
+        qkv = torch.empty(
+            [T, self.q_size + 2 * self.kv_size],
+            device=self.device,
+            dtype=self.dtype,
+        )
+        q_weight = torch.empty([self.q_size], device=self.device, dtype=self.dtype)
+        k_weight = torch.empty([self.kv_size], device=self.device, dtype=self.dtype)
+        return [qkv, q_weight, k_weight]
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        q_size = self.q_size
+        kv_size = self.kv_size
+        eps = self.eps
+        tp_world = self.tp_world
+        max_tokens = self.max_tokens
+        tp_rank = self.tp_rank
+        dtype = self.dtype
+
+        def pattern(
+            qkv: torch.Tensor,
+            q_weight: torch.Tensor,
+            k_weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            q, k, v = qkv.split([q_size, kv_size, kv_size], dim=-1)
+            q_fp32 = q.to(torch.float32)
+            k_fp32 = k.to(torch.float32)
+            q_var = q_fp32.pow(2).mean(dim=-1, keepdim=True)
+            k_var = k_fp32.pow(2).mean(dim=-1, keepdim=True)
+            qk_var = torch.cat([q_var, k_var], dim=-1)
+            qk_var = tensor_model_parallel_all_reduce(qk_var) / tp_world
+            q_var, k_var = qk_var.chunk(2, dim=-1)
+            q_out = (q_fp32 * torch.rsqrt(q_var + eps) * q_weight).to(dtype)
+            k_out = (k_fp32 * torch.rsqrt(k_var + eps) * k_weight).to(dtype)
+            return q_out, k_out, v
+
+        def replacement(
+            qkv: torch.Tensor,
+            q_weight: torch.Tensor,
+            k_weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            assert _MINIMAX_QK_NORM_FUSED_OP is not None
+            q_out, k_out = torch.ops.vllm.minimax_qk_norm_fused(
+                qkv,
+                q_weight,
+                k_weight,
+                q_size,
+                kv_size,
+                tp_rank,
+                tp_world,
+                eps,
+                max_tokens,
+            )
+            _, _, v = qkv.split([q_size, kv_size, kv_size], dim=-1)
+            return q_out, k_out, v
+
+        pm.register_replacement(
+            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+        )
+
+        # Second pattern: three separate split_with_sizes nodes (one per output),
+        # each with _users=1. This occurs when the QKV projection uses a
+        # functional GEMM kernel (e.g. cutlass_scaled_mm via auto_functionalized),
+        # which causes inductor to generate one split per consumer.
+        def pattern_split3(
+            qkv: torch.Tensor,
+            q_weight: torch.Tensor,
+            k_weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            q = qkv.split([q_size, kv_size, kv_size], dim=-1)[0]
+            k = qkv.split([q_size, kv_size, kv_size], dim=-1)[1]
+            v = qkv.split([q_size, kv_size, kv_size], dim=-1)[2]
+            q_fp32 = q.to(torch.float32)
+            k_fp32 = k.to(torch.float32)
+            q_var = q_fp32.pow(2).mean(dim=-1, keepdim=True)
+            k_var = k_fp32.pow(2).mean(dim=-1, keepdim=True)
+            qk_var = torch.cat([q_var, k_var], dim=-1)
+            qk_var = tensor_model_parallel_all_reduce(qk_var) / tp_world
+            q_var, k_var = qk_var.chunk(2, dim=-1)
+            q_out = (q_fp32 * torch.rsqrt(q_var + eps) * q_weight).to(dtype)
+            k_out = (k_fp32 * torch.rsqrt(k_var + eps) * k_weight).to(dtype)
+            return q_out, k_out, v
+
+        pm.register_replacement(
+            pattern_split3, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+        )
+
+
+class MiniMaxQKNormPass(VllmPatternMatcherPass):
+    """
+    Replace forward_qk allreduce+norm with the Lamport fused kernel.
+    Only applied for decode-size compile ranges (small token counts).
+    """
+
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config)
+        self.disabled = True
+
+        if _MINIMAX_QK_NORM_FUSED_OP is None:
+            logger.warning_once(
+                "minimax_allreduce_rms_qk op not found, MiniMaxQKNormPass disabled."
+            )
+            return
+
+        tp_world = get_tensor_model_parallel_world_size()
+        if tp_world <= 1:
+            logger.warning_once("MiniMaxQKNormPass disabled: tp_size <= 1.")
+            return
+
+        if config.model_config is None:
+            logger.warning_once("MiniMaxQKNormPass disabled: no model_config.")
+            return
+
+        hf_cfg = config.model_config.hf_config
+
+        model_name = getattr(hf_cfg, "architectures", "")[0]
+        if model_name != "MiniMaxM2ForCausalLM":
+            return
+
+        num_attention_heads = getattr(hf_cfg, "num_attention_heads", 0)
+        num_key_value_heads = getattr(hf_cfg, "num_key_value_heads", 0)
+        hidden_size = getattr(hf_cfg, "hidden_size", 0)
+        head_dim = getattr(hf_cfg, "head_dim", 0)
+        eps: float = getattr(hf_cfg, "rms_norm_eps", 1e-6)
+
+        if (
+            num_attention_heads != 48
+            or num_key_value_heads != 8
+            or hidden_size != 3072
+            or head_dim != 128
+        ):
+            logger.warning_once(
+                "MiniMaxQKNormPass disabled: cannot infer model info from hf_config."
+            )
+            return
+
+        num_heads_per_rank = num_attention_heads // tp_world
+        num_kv_heads_per_rank = max(1, num_key_value_heads // tp_world)
+        q_size = num_heads_per_rank * head_dim
+        kv_size = num_kv_heads_per_rank * head_dim
+
+        self.max_token_num = min(
+            MAX_TOKEN_NUM, config.scheduler_config.max_num_batched_tokens
+        )
+
+        tp_rank = get_tensor_model_parallel_rank()
+        # Allocate Lamport workspace first.
+        from vllm.distributed.parallel_state import get_tp_group
+        from vllm.model_executor.layers.mamba.lamport_workspace import (
+            get_allreduce_workspace,
+        )
+
+        get_allreduce_workspace(
+            rank=tp_rank,
+            world_size=tp_world,
+            max_tokens=self.max_token_num,
+            process_group=get_tp_group().cpu_group,
+        )
+
+        self.patterns: PatternMatcherPass = PatternMatcherPass(
+            pass_name="minimax_qk_norm_pass"
+        )
+        self._register_patterns(q_size, kv_size, eps, tp_world, tp_rank)
+        self.dump_patterns(config, self.patterns)
+        self.disabled = False
+
+    @enable_fake_mode
+    def _register_patterns(
+        self,
+        q_size: int,
+        kv_size: int,
+        eps: float,
+        tp_world: int,
+        tp_rank: int,
+    ) -> None:
+        MiniMaxQKNormPattern(
+            q_size=q_size,
+            kv_size=kv_size,
+            eps=eps,
+            tp_world=tp_world,
+            tp_rank=tp_rank,
+            max_tokens=self.max_token_num,
+            dtype=self.model_dtype,
+            device=self.device,
+        ).register(self.patterns)
+
+    def is_applicable_for_range(self, compile_range: Range) -> bool:
+        if self.disabled:
+            return False
+
+        return bool(compile_range.end <= self.max_token_num)
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: fx.Graph) -> None:
+        if self.disabled:
+            return
+        self.matched_count = self.patterns.apply(graph)
+        logger.debug("MiniMaxQKNormPass replaced %s patterns", self.matched_count)
+
+    def uuid(self) -> str:
+        return VllmInductorPass.hash_source(self, MiniMaxQKNormPattern)
diff --git a/vllm/compilation/passes/fusion/mla_attn_quant_fusion.py b/vllm/compilation/passes/fusion/mla_attn_quant_fusion.py
new file mode 100644
index 000000000000..84c24bc60e53
--- /dev/null
+++ b/vllm/compilation/passes/fusion/mla_attn_quant_fusion.py
@@ -0,0 +1,633 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable
+
+import torch
+from torch._higher_order_ops.auto_functionalize import auto_functionalized
+
+from vllm.config import VllmConfig, get_layers_from_vllm_config
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention.mla_attention import MLAAttention
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8Dynamic64Sym,
+    kFp8Dynamic128Sym,
+    kFp8StaticTensorSym,
+    kNvfp4Dynamic,
+)
+from vllm.platforms import current_platform
+from vllm.utils.math_utils import round_up
+from vllm.utils.torch_utils import _USE_LAYERNAME, _encode_layer_name
+
+from ..vllm_inductor_pass import VllmFusionPatternMatcherPass, VllmPatternReplacement
+from .matcher_utils import MatcherQuantFP8
+from .rms_quant_fusion import QUANT_OPS
+
+logger = init_logger(__name__)
+
+FP8_DTYPE = current_platform.fp8_dtype()
+FP4_DTYPE = torch.uint8
+
+MLA_ATTN_OP = torch.ops.vllm.unified_mla_attention_with_output.default
+
+
+class MLAAttnFp8StaticQuantPattern(VllmPatternReplacement[..., torch.Tensor]):
+    """
+    Fusion for MLA Attention+Fp8StaticQuant.
+
+    Matches the pattern: MLA attention -> static FP8 quant, and replaces
+    it with MLA attention(output_scale=scale, output=fp8_buffer).
+    """
+
+    def __init__(self, layer: MLAAttention, dtype: torch.dtype) -> None:
+        self._layer_name = layer.layer_name
+        self._num_heads = layer.num_heads
+        self._v_head_dim = layer.v_head_dim
+        self._kv_lora_rank = layer.kv_lora_rank
+        self._qk_rope_head_dim = layer.qk_rope_head_dim
+        self._qk_head_dim = layer.qk_nope_head_dim + layer.qk_rope_head_dim
+        self._output_dim = layer.num_heads * layer.v_head_dim
+        self._dtype = dtype
+        self._quant_matcher = MatcherQuantFP8(kFp8StaticTensorSym)
+
+    @property
+    def pattern(self) -> Callable[..., torch.Tensor]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                at1 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                # MLA output is already 2D (T, N*V), no reshape needed
+                return self._quant_matcher(at1[1], scale)[0]
+
+            return _pattern_with_ln
+
+        def _pattern(q, kv_c_normed, k_pe, output_attn, scale, kv_cache_dummy_dep):
+            at1 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=None,
+                output_block_scale=None,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            # MLA output is already 2D (T, N*V), no reshape needed
+            return self._quant_matcher(at1[1], scale)[0]
+
+        return _pattern
+
+    @property
+    def replacement(self) -> Callable[..., torch.Tensor]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                # MLA output in quant_dtype
+                output_attn = torch.empty(
+                    [q.shape[0], self._output_dim],
+                    dtype=FP8_DTYPE,
+                    device=q.device,
+                )
+                at1 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=scale,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                return at1[1]
+
+            return _replacement_with_ln
+
+        def _replacement(q, kv_c_normed, k_pe, output_attn, scale, kv_cache_dummy_dep):
+            # MLA output in quant_dtype
+            output_attn = torch.empty(
+                [q.shape[0], self._output_dim],
+                dtype=FP8_DTYPE,
+                device=q.device,
+            )
+            at1 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=scale,
+                output_block_scale=None,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            return at1[1]
+
+        return _replacement
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        inputs: list = [
+            self.empty(5, self._num_heads, self._qk_head_dim, dtype=self._dtype),
+            self.empty(5, self._kv_lora_rank, dtype=self._dtype),
+            self.empty(5, 1, self._qk_rope_head_dim, dtype=self._dtype),
+            self.empty(5, self._output_dim, dtype=self._dtype),
+            self.empty_fp32(1, 1),
+            self.empty(0, dtype=self._dtype),
+        ]
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self._layer_name))
+        return inputs
+
+
+class MLAAttnNvfp4QuantPattern(
+    VllmPatternReplacement[..., tuple[torch.Tensor, torch.Tensor]]
+):
+    """
+    Fusion for MLA Attention+Nvfp4Quant.
+
+    Matches the pattern: MLA attention -> NVFP4 quant, and replaces
+    it with MLA attention(output_scale=scale, output_block_scale=block_scale,
+    output=fp4_buffer).
+    """
+
+    def __init__(self, layer: MLAAttention, dtype: torch.dtype) -> None:
+        self._layer_name = layer.layer_name
+        self._num_heads = layer.num_heads
+        self._v_head_dim = layer.v_head_dim
+        self._kv_lora_rank = layer.kv_lora_rank
+        self._qk_rope_head_dim = layer.qk_rope_head_dim
+        self._qk_head_dim = layer.qk_nope_head_dim + layer.qk_rope_head_dim
+        self._output_dim = layer.num_heads * layer.v_head_dim
+        self._dtype = dtype
+        self._QUANT_OP = QUANT_OPS[kNvfp4Dynamic]
+
+    @property
+    def pattern(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                output_quant,
+                output_scale,
+                input_scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                at1 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                at2 = auto_functionalized(
+                    self._QUANT_OP,
+                    input=at1[1],
+                    input_scale=input_scale,
+                    is_sf_swizzled_layout=True,
+                    output=output_quant,
+                    output_scale=output_scale,
+                )
+                output_scale_view = torch.ops.aten.view.dtype(at2[2], FP8_DTYPE)
+                return at2[1], output_scale_view
+
+            return _pattern_with_ln
+
+        def _pattern(
+            q,
+            kv_c_normed,
+            k_pe,
+            output_attn,
+            output_quant,
+            output_scale,
+            input_scale,
+            kv_cache_dummy_dep,
+        ):
+            at1 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=None,
+                output_block_scale=None,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            at2 = auto_functionalized(
+                self._QUANT_OP,
+                input=at1[1],
+                input_scale=input_scale,
+                is_sf_swizzled_layout=True,
+                output=output_quant,
+                output_scale=output_scale,
+            )
+            output_scale_view = torch.ops.aten.view.dtype(at2[2], FP8_DTYPE)
+            return at2[1], output_scale_view
+
+        return _pattern
+
+    @property
+    def replacement(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                _output_quant,
+                output_scale,
+                input_scale,
+                kv_cache_dummy_dep,
+                layer_name,
+            ):
+                # MLA output in quant_dtype (FP4 packed as uint8)
+                output_attn = torch.empty(
+                    [q.shape[0], self._output_dim // 2],
+                    dtype=FP4_DTYPE,
+                    device=q.device,
+                )
+                output_scale_view = torch.ops.aten.view.dtype(output_scale, FP8_DTYPE)
+                at2 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=input_scale,
+                    output_block_scale=output_scale_view,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                return at2[1], at2[2]
+
+            return _replacement_with_ln
+
+        def _replacement(
+            q,
+            kv_c_normed,
+            k_pe,
+            output_attn,
+            _output_quant,
+            output_scale,
+            input_scale,
+            kv_cache_dummy_dep,
+        ):
+            # MLA output in quant_dtype (FP4 packed as uint8)
+            output_attn = torch.empty(
+                [q.shape[0], self._output_dim // 2],
+                dtype=FP4_DTYPE,
+                device=q.device,
+            )
+            output_scale_view = torch.ops.aten.view.dtype(output_scale, FP8_DTYPE)
+            at2 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=input_scale,
+                output_block_scale=output_scale_view,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            return at2[1], at2[2]
+
+        return _replacement
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        inputs: list = [
+            self.empty(5, self._num_heads, self._qk_head_dim, dtype=self._dtype),
+            self.empty(5, self._kv_lora_rank, dtype=self._dtype),
+            self.empty(5, 1, self._qk_rope_head_dim, dtype=self._dtype),
+            self.empty(5, self._output_dim, dtype=self._dtype),
+            self.empty(5, self._output_dim // 2, dtype=FP4_DTYPE),
+            self.empty_i32(128, round_up(self._output_dim // 16, 4)),
+            self.empty_fp32(1, 1),
+            self.empty(0, dtype=self._dtype),
+        ]
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self._layer_name))
+        return inputs
+
+
+class MLAAttnFp8GroupQuantPattern(
+    VllmPatternReplacement[..., tuple[torch.Tensor, torch.Tensor]]
+):
+    """
+    Fusion for MLA Attention+Fp8GroupQuant (per-group dynamic FP8).
+
+    Matches the pattern: MLA attention -> per_token_group_fp8_quant, and
+    replaces it with MLA attention(output_block_scale=group_scale_buffer).
+    Used by models with block FP8 quantization (e.g. DeepSeek V3).
+    """
+
+    def __init__(
+        self,
+        layer: MLAAttention,
+        dtype: torch.dtype,
+        quant_key: QuantKey,
+        has_col_major_scales: bool,
+        is_e8m0: bool,
+        is_tma_aligned: bool,
+    ) -> None:
+        self._layer_name = layer.layer_name
+        self._num_heads = layer.num_heads
+        self._v_head_dim = layer.v_head_dim
+        self._kv_lora_rank = layer.kv_lora_rank
+        self._qk_rope_head_dim = layer.qk_rope_head_dim
+        self._qk_head_dim = layer.qk_nope_head_dim + layer.qk_rope_head_dim
+        self._output_dim = layer.num_heads * layer.v_head_dim
+        self._dtype = dtype
+        self._layer = layer
+        self._group_size = quant_key.scale.group_shape[1]
+        self._has_col_major_scales = has_col_major_scales
+        self._is_e8m0 = is_e8m0
+        self._is_tma_aligned = is_tma_aligned
+
+        self._quant_matcher = MatcherQuantFP8(
+            quant_key,
+            has_col_major_scales=has_col_major_scales,
+            is_e8m0=is_e8m0,
+            is_tma_aligned=is_tma_aligned,
+        )
+
+    @property
+    def pattern(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                kv_cache_dummy_dep,
+                scale,
+                layer_name,
+            ):
+                at1 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=None,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                )
+                attn_out = at1[1]
+                result = torch.empty(
+                    attn_out.shape, device=attn_out.device, dtype=FP8_DTYPE
+                )
+                finfo = torch.finfo(FP8_DTYPE)
+                _, result, scale = auto_functionalized(
+                    self._quant_matcher.QUANT_OP,
+                    input=attn_out,
+                    output_q=result,
+                    output_s=scale,
+                    group_size=self._group_size,
+                    eps=1e-10,
+                    fp8_min=finfo.min,
+                    fp8_max=finfo.max,
+                    scale_ue8m0=self._is_e8m0,
+                    dummy_is_scale_transposed=self._has_col_major_scales,
+                    dummy_is_tma_aligned=self._is_tma_aligned,
+                )
+                return result, scale
+
+            return _pattern_with_ln
+
+        def _pattern(
+            q: torch.Tensor,
+            kv_c_normed: torch.Tensor,
+            k_pe: torch.Tensor,
+            output_attn: torch.Tensor,
+            kv_cache_dummy_dep: torch.Tensor,
+            scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            at1 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=None,
+                output_block_scale=None,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            attn_out = at1[1]
+            result = torch.empty(
+                attn_out.shape, device=attn_out.device, dtype=FP8_DTYPE
+            )
+            finfo = torch.finfo(FP8_DTYPE)
+            _, result, scale = auto_functionalized(
+                self._quant_matcher.QUANT_OP,
+                input=attn_out,
+                output_q=result,
+                output_s=scale,
+                group_size=self._group_size,
+                eps=1e-10,
+                fp8_min=finfo.min,
+                fp8_max=finfo.max,
+                scale_ue8m0=self._is_e8m0,
+                dummy_is_scale_transposed=self._has_col_major_scales,
+                dummy_is_tma_aligned=self._is_tma_aligned,
+            )
+            return result, scale
+
+        return _pattern
+
+    @property
+    def replacement(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor]]:
+        _ln = _encode_layer_name(self._layer_name)
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(  # type: ignore[misc]
+                q,
+                kv_c_normed,
+                k_pe,
+                output_attn,
+                kv_cache_dummy_dep,
+                scale,
+                layer_name,
+            ):
+                output_attn = torch.empty(
+                    [q.shape[0], self._output_dim],
+                    dtype=FP8_DTYPE,
+                    device=q.device,
+                )
+                at1 = auto_functionalized(
+                    MLA_ATTN_OP,
+                    q=q,
+                    kv_c_normed=kv_c_normed,
+                    k_pe=k_pe,
+                    output=output_attn,
+                    layer_name=layer_name,
+                    output_scale=None,
+                    output_block_scale=scale,
+                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+                    quant_group_size=self._group_size,
+                    quant_scale_ue8m0=self._is_e8m0,
+                    quant_col_major=self._has_col_major_scales,
+                    quant_tma_aligned=self._is_tma_aligned,
+                )
+                return at1[1], at1[2]
+
+            return _replacement_with_ln
+
+        def _replacement(q, kv_c_normed, k_pe, output_attn, kv_cache_dummy_dep, scale):
+            output_attn = torch.empty(
+                [q.shape[0], self._output_dim],
+                dtype=FP8_DTYPE,
+                device=q.device,
+            )
+            at1 = auto_functionalized(
+                MLA_ATTN_OP,
+                q=q,
+                kv_c_normed=kv_c_normed,
+                k_pe=k_pe,
+                output=output_attn,
+                layer_name=_ln,
+                output_scale=None,
+                output_block_scale=scale,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+                quant_group_size=self._group_size,
+                quant_scale_ue8m0=self._is_e8m0,
+                quant_col_major=self._has_col_major_scales,
+                quant_tma_aligned=self._is_tma_aligned,
+            )
+            return at1[1], at1[2]
+
+        return _replacement
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        inputs: list = [
+            self.empty(5, self._num_heads, self._qk_head_dim, dtype=self._dtype),
+            self.empty(5, self._kv_lora_rank, dtype=self._dtype),
+            self.empty(5, 1, self._qk_rope_head_dim, dtype=self._dtype),
+            self.empty(5, self._output_dim, dtype=self._dtype),
+            self.empty(0, dtype=self._dtype),
+            self._quant_matcher.empty_f32(1, 1),
+        ]
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self._layer_name))
+        return inputs
+
+
+class MLAAttnQuantFusionPass(VllmFusionPatternMatcherPass):
+    """
+    This pass fuses post-attention quantization onto MLA attention if supported.
+
+    It uses the pattern matcher and matches each MLA layer manually, as strings
+    cannot be wildcarded. This also lets us check support on attention layers
+    upon registration instead of during pattern matching.
+    """
+
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "mla_attn_quant_fusion")
+
+        dtype = config.model_config.dtype
+        layers = list(get_layers_from_vllm_config(config, MLAAttention).values())
+
+        if len(layers) == 0:
+            logger.warning(
+                "MLA attention + quant fusion is enabled, but no MLA "
+                "attention layers were found in "
+                "CompilationConfig.static_forward_context "
+                "so no fusion patterns were registered."
+            )
+
+        # When _USE_LAYERNAME is enabled, layer_name is a wildcard so all
+        # layers produce the same pattern — register once then break.
+        for layer in layers:
+            if layer.impl.fused_output_quant_supported(kFp8StaticTensorSym):
+                self.register(MLAAttnFp8StaticQuantPattern(layer, dtype))
+                if _USE_LAYERNAME:
+                    break
+
+        if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
+            for layer in layers:
+                if layer.impl.fused_output_quant_supported(kNvfp4Dynamic):
+                    self.register(MLAAttnNvfp4QuantPattern(layer, dtype))
+                    if _USE_LAYERNAME:
+                        break
+
+        # Per-group FP8 (block quant) — register all flag combinations.
+        if current_platform.is_cuda():
+            for quant_key in [kFp8Dynamic128Sym, kFp8Dynamic64Sym]:
+                for col_major in [True, False]:
+                    for is_e8m0 in [True, False]:
+                        for tma_aligned in [False, True]:
+                            for layer in layers:
+                                if layer.impl.fused_output_quant_supported(quant_key):
+                                    self.register(
+                                        MLAAttnFp8GroupQuantPattern(
+                                            layer,
+                                            dtype,
+                                            quant_key,
+                                            col_major,
+                                            is_e8m0,
+                                            tma_aligned,
+                                        )
+                                    )
+                                    if _USE_LAYERNAME:
+                                        break
+
+        self.dump_patterns(config, self.pm_pass)
diff --git a/vllm/compilation/passes/fusion/mla_rope_kvcache_cat_fusion.py b/vllm/compilation/passes/fusion/mla_rope_kvcache_cat_fusion.py
new file mode 100644
index 000000000000..5a493149a9d5
--- /dev/null
+++ b/vllm/compilation/passes/fusion/mla_rope_kvcache_cat_fusion.py
@@ -0,0 +1,271 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch._higher_order_ops.auto_functionalize import auto_functionalized
+
+import vllm._custom_ops as ops
+from vllm.config import VllmConfig, get_layers_from_vllm_config
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention import MLAAttention
+from vllm.model_executor.layers.attention.attention import get_attention_context
+from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
+from vllm.utils.torch_utils import (
+    _USE_LAYERNAME,
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
+
+from ..vllm_inductor_pass import VllmFusionPatternMatcherPass, VllmPatternReplacement
+from .matcher_utils import MatcherDeepseekScalingRotaryEmbedding, MatcherRotaryEmbedding
+
+logger = init_logger(__name__)
+
+
+def fused_rope_unified_mla_kv_cache_update_impl(
+    positions: torch.Tensor,
+    q_pe: torch.Tensor,
+    k_pe: torch.Tensor,
+    kv_c: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    is_neox: bool,
+    kv_cache_dtype: str,
+    kv_cache_scale: torch.Tensor,
+    layer_name: LayerNameType,
+) -> torch.Tensor:
+    layer_name = _resolve_layer_name(layer_name)
+    attn_metadata, _, kv_cache, layer_slot_mapping = get_attention_context(layer_name)
+    if layer_slot_mapping is not None:
+        ops.concat_and_cache_mla_rope_fused(
+            positions,
+            q_pe,
+            k_pe,
+            kv_c,
+            cos_sin_cache,
+            is_neox,
+            layer_slot_mapping,
+            kv_cache,
+            kv_cache_dtype,
+            kv_cache_scale,
+        )
+    return torch.empty(0, device=kv_c.device, dtype=kv_c.dtype)
+
+
+def fused_rope_unified_mla_kv_cache_update_fake(
+    positions: torch.Tensor,
+    q_pe: torch.Tensor,
+    k_pe: torch.Tensor,
+    kv_c: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    is_neox: bool,
+    kv_cache_dtype: str,
+    kv_cache_scale: torch.Tensor,
+    layer_name: LayerNameType,
+) -> torch.Tensor:
+    return torch.empty(0, dtype=kv_c.dtype, device=kv_c.device)
+
+
+direct_register_custom_op(
+    op_name="fused_rope_unified_mla_kv_cache_update",
+    op_func=fused_rope_unified_mla_kv_cache_update_impl,
+    fake_impl=fused_rope_unified_mla_kv_cache_update_fake,
+    mutates_args=["q_pe", "k_pe"],
+)
+
+
+class MLARoPEKVCacheCatPattern(VllmPatternReplacement):
+    FUSED_OP = torch.ops.vllm.fused_rope_unified_mla_kv_cache_update.default
+
+    def __init__(
+        self,
+        layer: MLAAttention,
+        is_neox: bool,
+        use_flashinfer: bool = False,
+        use_deepseek_scaling: bool = False,
+    ) -> None:
+        self.layer_name = layer.layer_name
+        self.kv_cache_dtype = layer.kv_cache_dtype
+        self.num_heads = layer.num_heads
+        self.num_kv_heads = layer.num_kv_heads
+        self.kv_lora_rank = layer.kv_lora_rank
+        self.qk_rope_head_dim = layer.qk_rope_head_dim
+        self.is_neox = is_neox
+        self.use_flashinfer = use_flashinfer
+        self._ln = _encode_layer_name(self.layer_name)
+
+        if use_deepseek_scaling:
+            self.rope_matcher = MatcherDeepseekScalingRotaryEmbedding(
+                is_neox=self.is_neox,
+                head_size=self.qk_rope_head_dim,
+                num_heads=self.num_heads,
+                num_kv_heads=self.num_kv_heads,
+                use_flashinfer=self.use_flashinfer,
+            )
+        else:
+            self.rope_matcher = MatcherRotaryEmbedding(  # type: ignore
+                is_neox=self.is_neox,
+                head_size=self.qk_rope_head_dim,
+                num_heads=self.num_heads,
+                num_kv_heads=self.num_kv_heads,
+                use_flashinfer=self.use_flashinfer,
+            )
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        T = 5
+        L = 4096
+        q_pe = self.empty_bf16(T, self.num_heads, self.qk_rope_head_dim)
+        k_pe = self.empty_bf16(T, self.qk_rope_head_dim)
+        kv_c_normed = self.empty_bf16(T, self.kv_lora_rank)
+        cos_sin_cache = self.empty_bf16(L, self.qk_rope_head_dim)
+        positions = self.empty(T, dtype=torch.int64)
+        k_scale = self.empty(0, dtype=torch.float32)
+        inputs = [
+            q_pe,
+            k_pe,
+            kv_c_normed,
+            positions,
+            cos_sin_cache,
+            k_scale,
+        ]
+        if _USE_LAYERNAME:
+            inputs.append(self._ln)
+        return inputs
+
+    @property
+    def pattern(self):
+        _ln = self._ln
+
+        if _USE_LAYERNAME:
+
+            def _pattern_with_ln(
+                q_pe: torch.Tensor,
+                k_pe: torch.Tensor,
+                kv_c_normed: torch.Tensor,
+                positions: torch.Tensor,
+                cos_sin_cache: torch.Tensor,
+                k_scale: torch.Tensor,
+                layer_name: LayerNameType,
+            ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+                k_pe_unsqueezed = k_pe.unsqueeze(1)
+                q_pe, k_pe = self.rope_matcher(
+                    positions, q_pe, k_pe_unsqueezed, cos_sin_cache
+                )
+                dummy = torch.ops.vllm.unified_mla_kv_cache_update(
+                    kv_c_normed, k_pe, layer_name, self.kv_cache_dtype, k_scale
+                )
+                return dummy, q_pe, k_pe
+
+            return _pattern_with_ln
+
+        def _pattern(
+            q_pe: torch.Tensor,
+            k_pe: torch.Tensor,
+            kv_c_normed: torch.Tensor,
+            positions: torch.Tensor,
+            cos_sin_cache: torch.Tensor,
+            k_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            k_pe_unsqueezed = k_pe.unsqueeze(1)
+            q_pe, k_pe = self.rope_matcher(
+                positions, q_pe, k_pe_unsqueezed, cos_sin_cache
+            )
+            dummy = torch.ops.vllm.unified_mla_kv_cache_update(
+                kv_c_normed, k_pe, _ln, self.kv_cache_dtype, k_scale
+            )
+            return dummy, q_pe, k_pe
+
+        return _pattern
+
+    @property
+    def replacement(self):
+        _ln = self._ln
+
+        if _USE_LAYERNAME:
+
+            def _replacement_with_ln(
+                q_pe: torch.Tensor,
+                k_pe: torch.Tensor,
+                kv_c_normed: torch.Tensor,
+                positions: torch.Tensor,
+                cos_sin_cache: torch.Tensor,
+                k_scale: torch.Tensor,
+                layer_name: LayerNameType,
+            ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+                at = auto_functionalized(
+                    self.FUSED_OP,
+                    positions=positions,
+                    q_pe=q_pe,
+                    k_pe=k_pe,
+                    kv_c=kv_c_normed,
+                    cos_sin_cache=cos_sin_cache,
+                    is_neox=self.is_neox,
+                    kv_cache_dtype=self.kv_cache_dtype,
+                    kv_cache_scale=k_scale,
+                    layer_name=layer_name,
+                )
+                dummy, q_pe, k_pe_squeezed = at
+                k_pe = k_pe_squeezed.unsqueeze(1)
+                return dummy, q_pe, k_pe
+
+            return _replacement_with_ln
+
+        def _replacement(
+            q_pe: torch.Tensor,
+            k_pe: torch.Tensor,
+            kv_c_normed: torch.Tensor,
+            positions: torch.Tensor,
+            cos_sin_cache: torch.Tensor,
+            k_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            at = auto_functionalized(
+                self.FUSED_OP,
+                positions=positions,
+                q_pe=q_pe,
+                k_pe=k_pe,
+                kv_c=kv_c_normed,
+                cos_sin_cache=cos_sin_cache,
+                is_neox=self.is_neox,
+                kv_cache_dtype=self.kv_cache_dtype,
+                kv_cache_scale=k_scale,
+                layer_name=_ln,
+            )
+            dummy, q_pe, k_pe_squeezed = at
+            k_pe = k_pe_squeezed.unsqueeze(1)
+            return dummy, q_pe, k_pe
+
+        return _replacement
+
+
+class MLARoPEKVCacheCatFusionPass(VllmFusionPatternMatcherPass):
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "mla_rope_kv_cache_fusion_pass")
+
+        attn_layers = get_layers_from_vllm_config(config, MLAAttention)
+
+        for _, layer in attn_layers.items():
+            for is_neox in [False, True]:
+                for use_deepseek_scaling in [False, True]:
+                    if RotaryEmbedding.enabled():
+                        for use_flashinfer in [False, True]:
+                            self.register(
+                                MLARoPEKVCacheCatPattern(
+                                    layer,
+                                    is_neox,
+                                    use_flashinfer,
+                                    use_deepseek_scaling,
+                                )
+                            )
+                    else:
+                        self.register(
+                            MLARoPEKVCacheCatPattern(
+                                layer,
+                                is_neox,
+                                use_deepseek_scaling=use_deepseek_scaling,
+                            )
+                        )
+
+            if _USE_LAYERNAME:
+                break
+
+        self.dump_patterns(config, self.pm_pass)
diff --git a/vllm/compilation/passes/fusion/qk_norm_rope_fusion.py b/vllm/compilation/passes/fusion/qk_norm_rope_fusion.py
index dd1f8245e108..b7e747a784eb 100644
--- a/vllm/compilation/passes/fusion/qk_norm_rope_fusion.py
+++ b/vllm/compilation/passes/fusion/qk_norm_rope_fusion.py
@@ -10,6 +10,7 @@
 from torch._higher_order_ops.auto_functionalize import auto_functionalized
 from torch._inductor.pattern_matcher import PatternMatcherPass
 
+import vllm.ir.ops
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
@@ -17,7 +18,7 @@
 
 from ..inductor_pass import enable_fake_mode
 from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
-from .matcher_utils import MatcherRMSNorm, MatcherRotaryEmbedding
+from .matcher_utils import MatcherRotaryEmbedding
 from .rms_quant_fusion import empty_bf16, empty_fp32, empty_i64
 
 logger = init_logger(__name__)
@@ -64,7 +65,6 @@ def __init__(
         self.q_size = self.num_heads * self.head_dim
         self.kv_size = self.num_kv_heads * self.head_dim
         self.eps = eps
-        self.rmsnorm_matcher = MatcherRMSNorm(eps)
         self.is_neox = is_neox
         self.rope_flashinfer = rope_flashinfer
         self.rope_matcher = MatcherRotaryEmbedding(
@@ -129,14 +129,14 @@ def pattern(
             q_by_head = q.view(
                 *q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim
             )
-            q_normed_by_head = self.rmsnorm_matcher(q_by_head, q_weight)
+            q_normed_by_head = vllm.ir.ops.rms_norm(q_by_head, q_weight, self.eps)
             q_flat = q_normed_by_head.view(q.shape)
 
             # K path: view -> RMS -> view back to k.shape
             k_by_head = k.view(
                 *k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim
             )
-            k_normed_by_head = self.rmsnorm_matcher(k_by_head, k_weight)
+            k_normed_by_head = vllm.ir.ops.rms_norm(k_by_head, k_weight, self.eps)
             k_flat = k_normed_by_head.view(k.shape)
 
             # RoPE: apply to flattened q/k
@@ -164,6 +164,7 @@ def replacement(
                 cos_sin_cache=cos_sin_cache,
                 is_neox=self.is_neox,
                 position_ids=positions.view(-1),
+                forced_token_heads_per_warp=-1,
             )
             result_qkv = result[1]
 
diff --git a/vllm/compilation/passes/fusion/rms_quant_fusion.py b/vllm/compilation/passes/fusion/rms_quant_fusion.py
index 95ce7b22e0a3..cc986595d436 100644
--- a/vllm/compilation/passes/fusion/rms_quant_fusion.py
+++ b/vllm/compilation/passes/fusion/rms_quant_fusion.py
@@ -9,6 +9,7 @@
 from torch._inductor.pattern_matcher import PatternMatcherPass
 from torch._ops import OpOverload
 
+import vllm.ir.ops
 from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
@@ -28,9 +29,7 @@
 from ..inductor_pass import enable_fake_mode
 from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
 from .matcher_utils import (
-    MatcherFusedAddRMSNorm,
     MatcherQuantFP8,
-    MatcherRMSNorm,
 )
 
 logger = init_logger(__name__)
@@ -38,6 +37,22 @@
 FP4_DTYPE = torch.uint8
 
 
+_RMS_NORM_OP = torch.ops.vllm_ir.rms_norm.default
+
+
+# TODO: extend rmsnorm quant kernels to support mixed input/weight dtypes,
+# and remove this check.
+def _rms_input_weight_dtype_match(match: pm.Match) -> bool:
+    """Prevent fusion when rms_norm input and weight dtypes differ."""
+    for node in match.nodes:
+        if node.target == _RMS_NORM_OP:
+            # rms_norm(x, weight, epsilon, variance_size)
+            x, weight = node.args[0], node.args[1]
+            if isinstance(x, fx.Node) and isinstance(weight, fx.Node):
+                return x.meta["val"].dtype == weight.meta["val"].dtype
+    return True
+
+
 def empty_bf16(*args: Any, **kwargs: Any) -> torch.Tensor:
     return torch.empty(*args, **kwargs, dtype=torch.bfloat16, device="cuda")
 
@@ -54,7 +69,6 @@ def empty_i64(*args: Any, **kwargs: Any) -> torch.Tensor:
     return torch.empty(*args, **kwargs, dtype=torch.int64, device="cuda")
 
 
-RMS_OP = torch.ops._C.rms_norm.default
 RMS_ADD_OP = torch.ops._C.fused_add_rms_norm.default
 
 QUANT_OPS: dict[QuantKey, OpOverload] = {
@@ -131,11 +145,6 @@ def __init__(
         assert key in FUSED_OPS, f"unsupported fused rmsnorm+quant op for {key}"
         self.FUSED_OP = FUSED_OPS[key]
 
-        self.rmsnorm_matcher = (
-            MatcherRMSNorm(epsilon)
-            if not key.fused_add
-            else MatcherFusedAddRMSNorm(epsilon)
-        )
         self.quant_matcher = MatcherQuantFP8(
             key.quant,
             has_col_major_scales=has_col_major_scales,
@@ -161,16 +170,12 @@ def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
         ) -> torch.Tensor:
-            result_rms = self.rmsnorm_matcher(input, weight)
+            result_rms = vllm.ir.ops.rms_norm(input, weight, self.epsilon)
             return self.quant_matcher(result_rms, scale)[0]
 
         def replacement(
             input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
         ) -> torch.Tensor:
-            # In case we're matching native rms-norm, conversions might be
-            # optimized out. We convert here just to be safe.
-            input = input.to(dtype=self.model_dtype)
-
             result = torch.empty(
                 input.shape, device=input.device, dtype=self.quant_dtype
             )
@@ -187,13 +192,20 @@ def replacement(
             return at[1]
 
         inputs = [
-            # input, weight
-            *self.rmsnorm_matcher.inputs(),
+            empty_bf16(5, 16),  # input
+            empty_bf16(16),  # weight
             self.quant_matcher.inputs()[1],  # scale
         ]
         pattern(*inputs)
 
-        pm.register_replacement(pattern, replacement, inputs, pm.fwd_only, pm_pass)
+        pm.register_replacement(
+            pattern,
+            replacement,
+            inputs,
+            pm.fwd_only,
+            pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
+        )
 
 
 class FusedAddRMSNormStaticQuantPattern(RMSNormQuantPattern):
@@ -215,7 +227,9 @@ def pattern(
             residual: torch.Tensor,
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            result_rms, residual = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             result, _ = self.quant_matcher(result_rms, scale)
 
             return result, residual
@@ -245,8 +259,9 @@ def replacement(
             return at[1], at[2]
 
         inputs = [
-            # input, weight, residual
-            *self.rmsnorm_matcher.inputs(),
+            empty_bf16(5, 16),  # input
+            empty_bf16(16),  # weight
+            empty_bf16(5, 16),  # residual
             self.quant_matcher.inputs()[1],  # scale
         ]
 
@@ -256,6 +271,7 @@ def replacement(
             inputs,
             pm.fwd_only,
             pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -294,7 +310,9 @@ def pattern(
             residual: torch.Tensor,
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-            result_rms, residual = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             result = torch.empty(
                 result_rms.shape,
                 device=result_rms.device,
@@ -349,14 +367,20 @@ def replacement(
             # result, residual, scale
             return at[1], at[3], at[2]
 
-        scale = self.quant_matcher.empty_f32(1, 1)
+        inputs = [
+            empty_bf16(5, 16),  # input
+            empty_bf16(16),  # weight
+            empty_bf16(5, 16),  # residual
+            self.quant_matcher.empty_f32(1, 1),  # scale
+        ]
 
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs() + [scale],
+            inputs,
             pm.fwd_only,
             pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -391,7 +415,7 @@ def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            result_rms = self.rmsnorm_matcher(input, weight)
+            result_rms = vllm.ir.ops.rms_norm(input, weight, self.epsilon)
             result = torch.empty(
                 result_rms.shape,
                 device=result_rms.device,
@@ -442,14 +466,17 @@ def replacement(
             # result, scale
             return at[1], at[2]
 
-        scale = self.quant_matcher.empty_f32(1, 1)
-
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs() + [scale],
+            [
+                empty_bf16(5, 16),  # input
+                empty_bf16(16),  # weight
+                self.quant_matcher.empty_f32(1, 1),  # scale
+            ],
             pm.fwd_only,
             pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -472,7 +499,7 @@ def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor, weight: torch.Tensor
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            result_rms = self.rmsnorm_matcher(input, weight)
+            result_rms = vllm.ir.ops.rms_norm(input, weight, self.epsilon)
             # result, scale
             return self.quant_matcher(result_rms)  # type: ignore[no-any-return]
 
@@ -502,9 +529,13 @@ def replacement(
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs(),
+            [
+                empty_bf16(5, 16),  # input
+                empty_bf16(16),  # weight
+            ],
             pm.fwd_only,
             pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
@@ -527,7 +558,9 @@ def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor, weight: torch.Tensor, residual: torch.Tensor
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-            result_rms, residual = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual = vllm.ir.ops.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             result, scale = self.quant_matcher(result_rms)
 
             return result, residual, scale
@@ -555,12 +588,19 @@ def replacement(
             # result, residual, scale
             return at[1], at[3], at[2]
 
+        inputs = [
+            empty_bf16(5, 16),  # input
+            empty_bf16(16),  # weight
+            empty_bf16(5, 16),  # residual
+        ]
+
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs(),
+            inputs,
             pm.fwd_only,
             pm_pass,
+            extra_check=_rms_input_weight_dtype_match,
         )
 
 
diff --git a/vllm/compilation/passes/fusion/rocm_aiter_fusion.py b/vllm/compilation/passes/fusion/rocm_aiter_fusion.py
index 59c94db5e812..e7ba3385725b 100644
--- a/vllm/compilation/passes/fusion/rocm_aiter_fusion.py
+++ b/vllm/compilation/passes/fusion/rocm_aiter_fusion.py
@@ -1,14 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+from typing import Any
 
 import torch
 import torch._inductor.pattern_matcher as pm
 from torch import fx
+from torch._inductor.fx_passes.post_grad import view_to_reshape
 from torch._inductor.pattern_matcher import PatternMatcherPass
 
+import vllm.ir.ops
 import vllm.model_executor.layers.quantization.utils.fp8_utils  # noqa: F401
 from vllm._aiter_ops import rocm_aiter_ops
-from vllm.config import VllmConfig
+from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
@@ -19,12 +23,17 @@
 from vllm.platforms import current_platform
 
 from ..inductor_pass import enable_fake_mode
-from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
-from .act_quant_fusion import ActivationQuantPattern
+from ..vllm_inductor_pass import (
+    VllmFusionPatternMatcherPass,
+    VllmInductorPass,
+    VllmPatternMatcherPass,
+    VllmPatternReplacement,
+    _fx_view_to_reshape,
+    fold_consecutive_reshapes,
+)
 from .matcher_utils import (
-    MatcherFusedAddRMSNorm,
     MatcherQuantFP8,
-    MatcherRMSNorm,
+    MatcherRMSNormGated,
     MatcherSiluAndMul,
 )
 from .rms_quant_fusion import (
@@ -41,17 +50,19 @@ def __init__(
     ):
         self.epsilon = epsilon
         self.quant_dtype = key.quant.dtype
+        self.device = torch.device("cuda")
 
-        self.rmsnorm_matcher = (
-            MatcherRMSNorm(epsilon, match_rocm_aiter=True)
-            if not key.fused_add
-            else MatcherFusedAddRMSNorm(epsilon, match_rocm_aiter=True)
-        )
         self.quant_matcher = MatcherQuantFP8(
             key.quant,
             match_rocm_aiter=match_aiter_quant,
         )
 
+    def empty(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.bfloat16, device=self.device, **kwargs)
+
+    def empty_f32(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.float32, device=self.device, **kwargs)
+
 
 class AiterRMSNormDynamicQuantPattern(AiterRMSNormQuantPattern):
     """AITER RMSNorm + Dynamic Quantization pattern."""
@@ -79,7 +90,7 @@ def pattern(
             input: torch.Tensor,
             weight: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            result_rms = self.rmsnorm_matcher(input, weight)
+            result_rms = torch.ops.vllm_ir.rms_norm(input, weight, self.epsilon)
             result, scale = self.quant_matcher(result_rms)
             return result, scale
 
@@ -99,7 +110,8 @@ def replacement(
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs(),
+            # input, weight
+            [self.empty(5, 16), self.empty(16)],
             pm.fwd_only,
             pm_pass,
         )
@@ -132,7 +144,9 @@ def pattern(
             weight: torch.Tensor,
             residual: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-            result_rms, residual_out = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual_out = torch.ops.vllm_ir.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             result, scale = self.quant_matcher(result_rms)
 
             return result, residual_out, scale
@@ -150,10 +164,16 @@ def replacement(
 
             return result[0], result[1], result[2]
 
+        inputs = [
+            self.empty(5, 16),  # input
+            self.empty(16),  # weight
+            self.empty(5, 16),  # residual
+        ]
+
         pm.register_replacement(
             pattern,
             replacement,
-            self.rmsnorm_matcher.inputs(),
+            inputs,
             pm.fwd_only,
             pm_pass,
         )
@@ -188,7 +208,7 @@ def pattern(
             input: torch.Tensor,
             weight: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            result_rms = self.rmsnorm_matcher(input, weight)
+            result_rms = torch.ops.vllm_ir.rms_norm(input, weight, self.epsilon)
             result, scale = self.quant_matcher(result_rms)
             return result, scale
 
@@ -206,7 +226,12 @@ def replacement(
             return at[0], at[1]
 
         pm.register_replacement(
-            pattern, replacement, self.rmsnorm_matcher.inputs(), pm.fwd_only, pm_pass
+            pattern,
+            replacement,
+            # input, weight
+            [self.empty(5, 16), self.empty(16)],
+            pm.fwd_only,
+            pm_pass,
         )
 
 
@@ -240,7 +265,9 @@ def pattern(
             weight: torch.Tensor,
             residual: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-            result_rms, residual_out = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual_out = torch.ops.vllm_ir.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             result, scale = self.quant_matcher(result_rms)
 
             return result, residual_out, scale
@@ -261,8 +288,258 @@ def replacement(
             # result, scale, residual
             return at[0], at[1], at[2]
 
+        inputs = [
+            self.empty(5, 16),  # input
+            self.empty(16),  # weight
+            self.empty(5, 16),  # residual
+        ]
+
+        pm.register_replacement(pattern, replacement, inputs, pm.fwd_only, pm_pass)
+
+
+class DoubleAiterRMSFp8GroupQuantPattern(AiterRMSNormQuantPattern):
+    """
+    Pattern matching ``rms_norm`` whose output feeds *two* distinct
+    ``rocm_aiter_group_fp8_quant`` consumers, replacing it with two
+    independent fused ``rms_norm_group_fp8_quant`` ops.
+
+    Repeating the rms_norm in the replacement is preferable to leaving
+    the fused 16-bit rms output materialized for two unfused quant
+    consumers, and matches what the previous manual graph surgery
+    achieved by cloning the rms_norm node.
+    """
+
+    FUSED_OP = rocm_aiter_ops.get_rmsnorm_group_fused_quant_op()
+
+    def __init__(
+        self,
+        epsilon: float,
+        quant_dtype: torch.dtype,
+        group_shape: GroupShape,
+        match_aiter_quant: bool = True,
+        symmetric: bool = True,
+    ) -> None:
+        scale = ScaleDesc(torch.float32, False, group_shape)
+        key = FusedRMSQuantKey(
+            fused_add=False,
+            quant=QuantKey(dtype=quant_dtype, scale=scale, symmetric=symmetric),
+        )
+
+        super().__init__(epsilon, key, match_aiter_quant)
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        def pattern(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            result_rms = torch.ops.vllm_ir.rms_norm(input, weight, self.epsilon)
+            result1, scale1 = self.quant_matcher(result_rms)
+            result2, scale2 = self.quant_matcher(result_rms)
+            return result1, scale1, result2, scale2
+
+        def replacement(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            at1 = self.FUSED_OP(
+                x=input,
+                weight=weight,
+                variance_epsilon=self.epsilon,
+                group_size=128,
+            )
+            at2 = self.FUSED_OP(
+                x=input,
+                weight=weight,
+                variance_epsilon=self.epsilon,
+                group_size=128,
+            )
+
+            return at1[0], at1[1], at2[0], at2[1]
+
         pm.register_replacement(
-            pattern, replacement, self.rmsnorm_matcher.inputs(), pm.fwd_only, pm_pass
+            pattern,
+            replacement,
+            # input, weight
+            [self.empty(5, 16), self.empty(16)],
+            pm.fwd_only,
+            pm_pass,
+        )
+
+
+class DoubleAiterRMSFp8GroupQuantViewPattern(AiterRMSNormQuantPattern):
+    """
+    View-tolerant variant of ``DoubleAiterRMSFp8GroupQuantPattern``.
+
+    Matches the same 1-to-2 fan-out, but with a ``view``/``reshape`` between
+    the ``rms_norm`` output and the two ``rocm_aiter_group_fp8_quant``
+    consumers::
+
+        rms_norm -> view -> rocm_aiter_group_fp8_quant
+                \\-> view -> rocm_aiter_group_fp8_quant
+
+    This shape arises in DeepSeek-V3.2's MLA indexer q_c norm, where the
+    FP8 linear path's 2D-flatten boilerplate
+    (``Fp8BlockScaledMMLinearKernel.apply_weights``) inserts a view between
+    the rms_norm output and each FP8 group quant op. The non-view sibling
+    pattern silently no-ops on this graph because the pattern matcher
+    requires the in-graph and in-pattern node shapes to align.
+
+    The trace_fn runs Inductor's ``view_to_reshape`` post-grad pass to
+    normalize ``view`` to ``reshape`` in both the pattern and the input
+    graph, widening the match without touching the no-view sibling.
+    """
+
+    FUSED_OP = rocm_aiter_ops.get_rmsnorm_group_fused_quant_op()
+
+    def __init__(
+        self,
+        epsilon: float,
+        quant_dtype: torch.dtype,
+        group_shape: GroupShape,
+        match_aiter_quant: bool = True,
+        symmetric: bool = True,
+    ) -> None:
+        scale = ScaleDesc(torch.float32, False, group_shape)
+        key = FusedRMSQuantKey(
+            fused_add=False,
+            quant=QuantKey(dtype=quant_dtype, scale=scale, symmetric=symmetric),
+        )
+
+        super().__init__(epsilon, key, match_aiter_quant)
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        def pattern(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            result_rms = torch.ops.vllm_ir.rms_norm(input, weight, self.epsilon)
+            view_rms = result_rms.view(-1, result_rms.shape[-1])
+            result1, scale1 = self.quant_matcher(view_rms)
+            result2, scale2 = self.quant_matcher(view_rms)
+            return result1, scale1, result2, scale2
+
+        def replacement(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            at1 = self.FUSED_OP(
+                x=input,
+                weight=weight,
+                variance_epsilon=self.epsilon,
+                group_size=128,
+            )
+            at2 = self.FUSED_OP(
+                x=input,
+                weight=weight,
+                variance_epsilon=self.epsilon,
+                group_size=128,
+            )
+
+            return at1[0], at1[1], at2[0], at2[1]
+
+        def trace_with_view_to_reshape(*args: Any, **kwargs: Any) -> fx.GraphModule:
+            gm = pm.fwd_only(*args, **kwargs)
+            view_to_reshape(gm)
+            return gm
+
+        pm.register_replacement(
+            pattern,
+            replacement,
+            # input, weight
+            [self.empty(5, 16), self.empty(16)],
+            trace_with_view_to_reshape,
+            pm_pass,
+        )
+
+
+class AiterRMSNormGatedFp8GroupQuantPattern(AiterRMSNormQuantPattern):
+    """
+    Matches decomposed RMSNormGated + reshape + group FP8 quant and replaces
+    with rocm_aiter_fused_rms_gated_fp8_group_quant.
+
+    The norm operates per-head on (N*H, D) tensors. The compiler folds the
+    reshape chain so after norm the result goes through reshape->merge->quant.
+    The pattern reshapes from (N*H, D) to (N, H*D) before calling
+    MatcherQuantFP8 so that _quantize_group_native sees the full hidden dim
+    and computes the correct num_groups.
+    """
+
+    FUSED_OP = rocm_aiter_ops.get_fused_rms_gated_fp8_group_quant_op()
+
+    def __init__(
+        self,
+        epsilon: float,
+        quant_dtype: torch.dtype,
+        group_shape: GroupShape,
+        num_heads: int,
+        head_dim: int,
+        match_aiter_quant: bool = True,
+        symmetric: bool = True,
+    ) -> None:
+        scale = ScaleDesc(torch.float32, False, group_shape)
+        key = FusedRMSQuantKey(
+            fused_add=False,
+            quant=QuantKey(dtype=quant_dtype, scale=scale, symmetric=symmetric),
+        )
+        super().__init__(epsilon, key, match_aiter_quant)
+        self.rmsnorm_gated_matcher = MatcherRMSNormGated(epsilon)
+        self.num_heads = num_heads
+        self.head_dim = head_dim
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        num_heads = self.num_heads
+        head_dim = self.head_dim
+        hidden_dim = num_heads * head_dim
+        quant_matcher = self.quant_matcher
+
+        def pattern(
+            x: torch.Tensor,
+            z: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            normed = self.rmsnorm_gated_matcher(x, z, weight)
+            merged = normed.reshape(-1, hidden_dim)
+            quant_out, scales_out = quant_matcher(merged)
+            return quant_out, scales_out
+
+        def replacement(
+            x: torch.Tensor,
+            z: torch.Tensor,
+            weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            fused = self.FUSED_OP(
+                x=x,
+                weight=weight,
+                bias=None,
+                z=z,
+                eps=self.epsilon,
+                norm_before_gate=True,
+                activation="silu",
+                group_size=head_dim,
+            )
+            fp8_out = fused[0]
+            scales_out = fused[1]
+            fp8_reshaped = fp8_out.reshape(-1, hidden_dim)
+            scales_reshaped = scales_out.reshape(-1, num_heads)
+            return fp8_reshaped, scales_reshaped
+
+        n_tokens = 2
+        x = self.empty(n_tokens * num_heads, head_dim)
+        z = self.empty(n_tokens * num_heads, head_dim)
+        w = self.empty(head_dim)
+
+        def trace_fn(*args, **kwargs):
+            gm = pm.fwd_only(*args, **kwargs)
+            _fx_view_to_reshape(gm)
+            fold_consecutive_reshapes(gm)
+            return gm
+
+        pm.register_replacement(
+            pattern,
+            replacement,
+            [x, z, w],
+            trace_fn,
+            pm_pass,
         )
 
 
@@ -281,9 +558,41 @@ def __init__(self, config: VllmConfig) -> None:
             pass_name="rocm_aiter_rms_norm_quant_fusion_pass"
         )
 
+        # Discover (num_heads, head_dim) pairs for gated RMSNorm patterns
+        # from GatedDeltaNetAttention layers in static_forward_context.
+        from vllm.model_executor.layers.mamba.gdn.base import (
+            GatedDeltaNetAttention,
+        )
+
+        gdn_layers = get_layers_from_vllm_config(
+            config,
+            GatedDeltaNetAttention,  # type: ignore[type-abstract]
+        )
+        gated_norm_shapes: set[tuple[int, int]] = set()
+        for layer in gdn_layers.values():
+            gated_norm_shapes.add(
+                (layer.num_v_heads // layer.tp_size, layer.head_v_dim)
+            )
+
         # Make sure fused add patterns are before simple rms norm,
-        # as the latter is a subset of the former in torch ops
+        # as the latter is a subset of the former in torch ops.
+        # The DoubleQuant patterns handle 1 rms_norm -> 2 group_fp8_quant
+        # fan-out (e.g. DSv3.2) and must be registered before the single
+        # group-quant pattern so they match first. The view-tolerant variant
+        # additionally covers the rms_norm -> view -> 2x quant shape that
+        # appears when the FP8 linear path inserts a 2D-flatten boilerplate
+        # (DSv3.2 MLA indexer q_c norm).
         for epsilon in [1e-5, 1e-6]:
+            # Fuse aiter rms_norm + 2x aiter group fp8 quant
+            DoubleAiterRMSFp8GroupQuantPattern(
+                epsilon, FP8_DTYPE, GroupShape(1, 128)
+            ).register(self.patterns)
+
+            # View-tolerant sibling for DSv3.2 q_c norm fan-out
+            DoubleAiterRMSFp8GroupQuantViewPattern(
+                epsilon, FP8_DTYPE, GroupShape(1, 128)
+            ).register(self.patterns)
+
             #  Fuse aiter rms_norm + aiter dynamic group fp8 quant
             AiterRMSFp8GroupQuantPattern(
                 epsilon, FP8_DTYPE, GroupShape(1, 128)
@@ -294,7 +603,18 @@ def __init__(self, config: VllmConfig) -> None:
                 epsilon, FP8_DTYPE, GroupShape(1, 128)
             ).register(self.patterns)
 
-            for match_aiter_quant in [True, False]:
+            # When quant_fp8 custom ops are disabled, both AITER and native
+            # quant matchers trace through QuantFP8's native implementation.
+            # Registering both variants would create duplicate Inductor
+            # patterns.
+            is_quant_fp8_enabled = config.compilation_config.is_custom_op_enabled(
+                "quant_fp8"
+            )
+            match_aiter_quant_options = (
+                [True, False] if is_quant_fp8_enabled else [False]
+            )
+
+            for match_aiter_quant in match_aiter_quant_options:
                 # Fuse aiter rms_norm + (aiter / vllm built-in)
                 # dynamic per-token fp8 quant
                 AiterRMSNormDynamicQuantPattern(
@@ -307,6 +627,21 @@ def __init__(self, config: VllmConfig) -> None:
                     epsilon, FP8_DTYPE, match_aiter_quant=match_aiter_quant
                 ).register(self.patterns)
 
+            # Fuse decomposed RMSNormGated + group fp8 quant.
+            # The replacement op (fused_rms_gated_fp8_group_quant) requires
+            # an aiter version that includes the GDN triton kernel renames.
+            if gated_norm_shapes and rocm_aiter_ops.are_gdn_triton_kernels_available():
+                for num_heads, head_dim in gated_norm_shapes:
+                    if head_dim != 128:
+                        continue
+                    AiterRMSNormGatedFp8GroupQuantPattern(
+                        epsilon,
+                        FP8_DTYPE,
+                        GroupShape(1, 128),
+                        num_heads=num_heads,
+                        head_dim=head_dim,
+                    ).register(self.patterns)
+
         self.dump_patterns(config, self.patterns)
 
     @VllmInductorPass.time_and_log
@@ -322,11 +657,14 @@ def uuid(self) -> str:
             AiterFusedAddRMSNormDynamicQuantPattern,
             AiterRMSFp8GroupQuantPattern,
             AiterFusedAddRMSFp8GroupQuantPattern,
+            DoubleAiterRMSFp8GroupQuantPattern,
+            DoubleAiterRMSFp8GroupQuantViewPattern,
+            AiterRMSNormGatedFp8GroupQuantPattern,
         ]
         return self.hash_source(self, *fusion_patterns)
 
 
-class AiterSiluMulFp8GroupQuantPattern(ActivationQuantPattern):
+class AiterSiluMulFp8GroupQuantPattern(VllmPatternReplacement):
     """
     This pattern fuses aiter silu_and_mul & group fp8 quant custom
     ops into an aiter silu_and_mul_group_fp8_quant op.
@@ -345,26 +683,29 @@ def get_inputs(self) -> list[torch.Tensor]:
             self.silu_and_mul_matcher.inputs()[0],
         ]
 
-    def register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
+    @property
+    def pattern(self):
+        def _pattern(
             input: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             at1 = self.silu_and_mul_matcher(input)
             at2 = self.quant_matcher(at1)
             return at2[0], at2[1]
 
-        def replacement(
+        return _pattern
+
+    @property
+    def replacement(self):
+        def _replacement(
             input: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             at = self.FUSED_SILU_MUL_QUANT_OP(x=input, group_size=128)
             return at[0], at[1]
 
-        pm.register_replacement(
-            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
-        )
+        return _replacement
 
 
-class RocmAiterSiluMulFp8GroupQuantFusionPass(VllmPatternMatcherPass):
+class RocmAiterSiluMulFp8GroupQuantFusionPass(VllmFusionPatternMatcherPass):
     """
     This pass fuses a pre-defined set of custom ops into fused ops.
     It uses the torch pattern matcher to find the patterns and replace them.
@@ -374,29 +715,12 @@ class RocmAiterSiluMulFp8GroupQuantFusionPass(VllmPatternMatcherPass):
     https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
     """
 
-    @enable_fake_mode
     def __init__(self, config: VllmConfig) -> None:
-        super().__init__(config)
+        super().__init__(config, "rocm_aiter_silu_mul_fp8_group_quant_fusion_pass")
 
-        self.patterns: PatternMatcherPass = PatternMatcherPass(
-            pass_name="rocm_aiter_silu_mul_fp8_group_quant_fusion_pass"
-        )
+        self.register(AiterSiluMulFp8GroupQuantPattern())
 
-        AiterSiluMulFp8GroupQuantPattern().register(self.patterns)
-
-        self.dump_patterns(config, self.patterns)
-
-    @VllmInductorPass.time_and_log
-    def __call__(self, graph: torch.fx.Graph) -> None:
-        self.matched_count = self.patterns.apply(graph)
-        logger.debug("Replaced %s patterns", self.matched_count)
-
-    def uuid(self) -> str:
-        fusion_patterns = [
-            ActivationQuantPattern,
-            AiterSiluMulFp8GroupQuantPattern,
-        ]
-        return VllmInductorPass.hash_source(self, *fusion_patterns)
+        self.dump_patterns(config, self.pm_pass)
 
 
 class AddAiterRMSNormPadPattern:
@@ -416,12 +740,15 @@ def __init__(
         self.epsilon = epsilon
         self.hidden_size = hidden_size
         self.x_pad_to_multiple = x_pad_to_multiple
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon, match_rocm_aiter=True)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input, weight, residual = self.rmsnorm_matcher.inputs()
-        router_weight = torch.empty([8, 16], dtype=weight.dtype, device=weight.device)
-        router_bias = torch.empty([8], dtype=weight.dtype, device=weight.device)
+        device = torch.device("cuda")
+        dtype = torch.bfloat16
+        input = torch.empty(5, 16, dtype=dtype, device=device)
+        weight = torch.empty(16, dtype=dtype, device=device)
+        residual = torch.empty(5, 16, dtype=dtype, device=device)
+        router_weight = torch.empty([8, 16], dtype=dtype, device=device)
+        router_bias = torch.empty([8], dtype=dtype, device=device)
         return [input, weight, residual, router_weight, router_bias]
 
     def register(self, pm_pass: PatternMatcherPass) -> None:
@@ -435,7 +762,9 @@ def pattern(
             pad_size = self.x_pad_to_multiple - (
                 self.hidden_size % self.x_pad_to_multiple
             )
-            result_rms, residual_out = self.rmsnorm_matcher(input, weight, residual)
+            result_rms, residual_out = torch.ops.vllm_ir.fused_add_rms_norm(
+                input, residual, weight, self.epsilon
+            )
             router_logits = torch.ops.vllm.rocm_unquantized_gemm(
                 result_rms, router_weight, router_bias
             )
@@ -500,3 +829,101 @@ def __call__(self, graph: torch.fx.Graph) -> None:
 
     def uuid(self) -> str:
         return VllmInductorPass.hash_source(self, AddAiterRMSNormPadPattern)
+
+
+class MLADualRMSNormPattern(
+    VllmPatternReplacement[..., tuple[torch.Tensor, torch.Tensor, torch.Tensor]]
+):
+    """
+    Fuse paired q_a_layernorm + kv_a_layernorm in MLA attention into
+    AITER's ``fused_qk_rmsnorm`` HIP kernel.
+
+    Target FX-graph pattern (unfused, ``vllm_ir`` stage)::
+
+        gemm -> split_with_sizes([q_dim, kv_dim])
+            +-- q_c     -> vllm_ir.rms_norm(q_c, q_w, eps)
+            +-- kv_lora -> split_with_sizes([kv_c_dim, k_pe_dim])
+                            +-- kv_c -> vllm_ir.rms_norm(kv_c, kv_w, eps)
+                            +-- k_pe
+
+    The pattern covers the connected subgraph rooted at the first
+    ``split_with_sizes`` (which produces ``q_c`` and ``kv_lora``),
+    through the two ``rms_norm`` calls, and the ``k_pe`` passthrough.
+    """
+
+    def __init__(self, epsilon: float) -> None:
+        self._epsilon = epsilon
+
+    def get_inputs(self) -> list[torch.Tensor]:
+        q_dim, kv_c_dim, k_pe_dim = 8, 4, 2
+        return [
+            self.empty_bf16(5, q_dim + kv_c_dim + k_pe_dim),
+            self.empty_bf16(q_dim),
+            self.empty_bf16(kv_c_dim),
+        ]
+
+    @property
+    def pattern(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
+        eps = self._epsilon
+
+        def _pattern(
+            projected: torch.Tensor,
+            q_weight: torch.Tensor,
+            kv_weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            q_dim = q_weight.shape[0]
+            kv_dim = projected.shape[-1] - q_dim
+            kv_c_dim = kv_weight.shape[0]
+            k_pe_dim = kv_dim - kv_c_dim
+            q_c, kv_lora = projected.split([q_dim, kv_dim], dim=-1)
+            kv_c, k_pe = kv_lora.split([kv_c_dim, k_pe_dim], dim=-1)
+            q_normed = vllm.ir.ops.rms_norm(q_c, q_weight, eps)
+            kv_normed = vllm.ir.ops.rms_norm(kv_c, kv_weight, eps)
+            return q_normed, kv_normed, k_pe
+
+        return _pattern
+
+    @property
+    def replacement(
+        self,
+    ) -> Callable[..., tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
+        eps = self._epsilon
+
+        def _replacement(
+            projected: torch.Tensor,
+            q_weight: torch.Tensor,
+            kv_weight: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            q_dim = q_weight.shape[0]
+            kv_dim = projected.shape[-1] - q_dim
+            kv_c_dim = kv_weight.shape[0]
+            k_pe_dim = kv_dim - kv_c_dim
+            q_c, kv_lora = projected.split([q_dim, kv_dim], dim=-1)
+            kv_c, k_pe = kv_lora.split([kv_c_dim, k_pe_dim], dim=-1)
+            q_normed, kv_normed = torch.ops.vllm.fused_mla_dual_rms_norm(
+                q_c,
+                q_weight,
+                kv_c,
+                kv_weight,
+                eps,
+                eps,
+            )
+            return q_normed, kv_normed, k_pe
+
+        return _replacement
+
+
+class MLADualRMSNormFusionPass(VllmFusionPatternMatcherPass):
+    """
+    Post-grad PatternMatcher pass that fuses paired q / kv RMS norms in
+    MLA attention into ``fused_mla_dual_rms_norm`` backed by aiter's
+    ``fused_qk_rmsnorm`` HIP kernel.
+    """
+
+    def __init__(self, config: VllmConfig) -> None:
+        super().__init__(config, "mla_dual_rms_norm_fusion_pass")
+
+        for epsilon in [1e-5, 1e-6]:
+            self.register(MLADualRMSNormPattern(epsilon))
diff --git a/vllm/compilation/passes/fusion/rope_kvcache_fusion.py b/vllm/compilation/passes/fusion/rope_kvcache_fusion.py
index 830a9640780c..bc6754188aa6 100644
--- a/vllm/compilation/passes/fusion/rope_kvcache_fusion.py
+++ b/vllm/compilation/passes/fusion/rope_kvcache_fusion.py
@@ -15,7 +15,13 @@
     Attention,
     get_attention_context,
 )
-from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.utils.torch_utils import (
+    _USE_LAYERNAME,
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
 
 from ..inductor_pass import enable_fake_mode
 from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
@@ -37,7 +43,7 @@ def fused_rope_and_unified_kv_cache_update_impl(
     positions: torch.Tensor,
     cos_sin_cache: torch.Tensor,
     is_neox: bool,
-    layer_name: str = "",
+    layer_name: LayerNameType,
 ) -> torch.Tensor:
     """
     This impl fetches the KV cache and slot mapping from the forward context,
@@ -46,6 +52,7 @@ def fused_rope_and_unified_kv_cache_update_impl(
     that is passed to unified_attention to signal a side effect and
     the data dependency between them to ensure torch.compile preserves ordering.
     """
+    layer_name = _resolve_layer_name(layer_name)
     _, attn_layer, kv_cache, layer_slot_mapping = get_attention_context(layer_name)
     if layer_slot_mapping is not None:
         attn_layer.impl.do_rope_and_kv_cache_update(
@@ -70,7 +77,7 @@ def fused_rope_and_unified_kv_cache_update_fake(
     positions: torch.Tensor,
     cos_sin_cache: torch.Tensor,
     is_neox: bool,
-    layer_name: str = "",
+    layer_name: LayerNameType,
 ) -> torch.Tensor:
     return torch.empty(0, device=query.device, dtype=query.dtype)
 
@@ -120,38 +127,30 @@ def __init__(
             num_kv_heads=self.num_kv_heads,
         )
 
-    def get_inputs(self) -> list[torch.Tensor]:
+    def get_inputs(self) -> list:
         # Sample inputs to help pattern tracing
         T = 5
         L = 4096
         qkv = empty_bf16(T, self.q_size + self.k_size + self.v_size)
         positions = empty_i64(T)
         cos_sin_cache = empty_bf16(L, self.head_size)
-        return [
-            qkv,
-            positions,
-            cos_sin_cache,
-        ]
+        inputs: list = [qkv, positions, cos_sin_cache]
+        if _USE_LAYERNAME:
+            inputs.append(_encode_layer_name(self.layer_name))
+        return inputs
 
-    def register(self, pm_pass: PatternMatcherPass) -> None:
-        def pattern(
-            qkv: torch.Tensor,
-            positions: torch.Tensor,
-            cos_sin_cache: torch.Tensor,
-        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    def _mk_pattern_with_layer_name_input(self, _ln):
+        """Pattern/replacement with layer_name as an explicit input."""
+
+        def pattern(qkv, positions, cos_sin_cache, layer_name):
             q, k, v = qkv.split([self.q_size, self.k_size, self.v_size], dim=-1)
             q, k = self.rope_matcher(positions, q, k, cos_sin_cache)
             q = q.view(-1, self.num_heads, self.head_size)
             k = k.view(-1, self.num_kv_heads, self.head_size)
             v = v.view(-1, self.num_kv_heads, self.head_size_v)
-            dummy = torch.ops.vllm.unified_kv_cache_update(k, v, self.layer_name)
-            return dummy, q, k, v
-
-        def replacement(
-            qkv: torch.Tensor,
-            positions: torch.Tensor,
-            cos_sin_cache: torch.Tensor,
-        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            return torch.ops.vllm.unified_kv_cache_update(k, v, layer_name), q, k, v
+
+        def replacement(qkv, positions, cos_sin_cache, layer_name):
             q, k, v = qkv.split([self.q_size, self.k_size, self.v_size], dim=-1)
             q = q.view(-1, self.num_heads, self.head_size)
             k = k.view(-1, self.num_kv_heads, self.head_size)
@@ -164,10 +163,50 @@ def replacement(
                 positions=positions,
                 cos_sin_cache=cos_sin_cache,
                 is_neox=self.is_neox,
-                layer_name=self.layer_name,
+                layer_name=layer_name,
             )
             return results[0], results[1], results[2], v
 
+        return pattern, replacement
+
+    def _mk_pattern_with_layer_name_closure(self, _ln):
+        """Pattern/replacement with layer_name as a closure constant."""
+
+        def pattern(qkv, positions, cos_sin_cache):
+            q, k, v = qkv.split([self.q_size, self.k_size, self.v_size], dim=-1)
+            q, k = self.rope_matcher(positions, q, k, cos_sin_cache)
+            q = q.view(-1, self.num_heads, self.head_size)
+            k = k.view(-1, self.num_kv_heads, self.head_size)
+            v = v.view(-1, self.num_kv_heads, self.head_size_v)
+            return torch.ops.vllm.unified_kv_cache_update(k, v, _ln), q, k, v
+
+        def replacement(qkv, positions, cos_sin_cache):
+            q, k, v = qkv.split([self.q_size, self.k_size, self.v_size], dim=-1)
+            q = q.view(-1, self.num_heads, self.head_size)
+            k = k.view(-1, self.num_kv_heads, self.head_size)
+            v = v.view(-1, self.num_kv_heads, self.head_size_v)
+            results = auto_functionalized(
+                self.FUSED_OP,
+                query=q,
+                key=k,
+                value=v,
+                positions=positions,
+                cos_sin_cache=cos_sin_cache,
+                is_neox=self.is_neox,
+                layer_name=_ln,
+            )
+            return results[0], results[1], results[2], v
+
+        return pattern, replacement
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        _ln = _encode_layer_name(self.layer_name)
+
+        if _USE_LAYERNAME:
+            pattern, replacement = self._mk_pattern_with_layer_name_input(_ln)
+        else:
+            pattern, replacement = self._mk_pattern_with_layer_name_closure(_ln)
+
         # NOTE: use view_to_reshape to unify view/reshape to simplify
         # pattern and increase matching opportunities
         def fwd_and_view_to_reshape(*args, **kwargs) -> fx.GraphModule:
@@ -176,7 +215,11 @@ def fwd_and_view_to_reshape(*args, **kwargs) -> fx.GraphModule:
             return gm
 
         pm.register_replacement(
-            pattern, replacement, self.get_inputs(), fwd_and_view_to_reshape, pm_pass
+            pattern,
+            replacement,
+            self.get_inputs(),
+            fwd_and_view_to_reshape,
+            pm_pass,
         )
 
 
@@ -205,6 +248,8 @@ def __init__(self, config: VllmConfig) -> None:
         self.max_token_num = cc.pass_config.rope_kvcache_fusion_max_token_num
 
         attn_layers = get_layers_from_vllm_config(config, Attention)
+        # When _USE_LAYERNAME is enabled, layer_name is a wildcard so all
+        # layers produce the same pattern — register once then break.
         for _, layer in attn_layers.items():
             if layer.impl.fused_rope_kvcache_supported():
                 for is_neox in [True, False]:
@@ -212,6 +257,8 @@ def __init__(self, config: VllmConfig) -> None:
                         layer=layer,
                         is_neox=is_neox,
                     ).register(self.patterns)
+                if _USE_LAYERNAME:
+                    break
 
         self.dump_patterns(config, self.patterns)
 
diff --git a/vllm/compilation/passes/fusion/sequence_parallelism.py b/vllm/compilation/passes/fusion/sequence_parallelism.py
index b7ae3dc626ee..8d0f40e2c775 100644
--- a/vllm/compilation/passes/fusion/sequence_parallelism.py
+++ b/vllm/compilation/passes/fusion/sequence_parallelism.py
@@ -8,12 +8,17 @@
 import torch
 import torch._inductor.pattern_matcher as pm
 import torch.fx as fx
+from torch._higher_order_ops.auto_functionalize import auto_functionalized
 from torch._inductor.pattern_matcher import PatternMatcherPass
 
+import vllm.ir.ops
 from vllm.config import VllmConfig
 from vllm.config.utils import Range
 from vllm.distributed import get_tp_group, tensor_model_parallel_all_reduce
-from vllm.distributed.parallel_state import get_tensor_model_parallel_world_size
+from vllm.distributed.parallel_state import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     kFp8StaticTensorSym,
@@ -22,14 +27,19 @@
 from ..inductor_pass import enable_fake_mode
 from ..utility.noop_elimination import NoOpEliminationPass
 from ..vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
-from .matcher_utils import MatcherFusedAddRMSNorm, MatcherQuantFP8, MatcherRMSNorm
+from .matcher_utils import MatcherQuantFP8
 
 logger = init_logger(__name__)
 
+if hasattr(torch.ops._C, "scaled_fp4_quant"):
+    SCALED_FP4_QUANT_OUT_OVERLOAD = torch.ops._C.scaled_fp4_quant.out
+    SCALED_FP4_QUANT_DEFAULT_OVERLOAD = torch.ops._C.scaled_fp4_quant.default
+
 # Min hidden size per device capability for sequence parallelism
 # Only apply sequence parallelism for models with hidden_size >= threshold
 SP_MIN_HIDDEN_SIZE: dict[int, int] = {
     90: 8192,  # H100: only for models with hidden_size >= 8192
+    100: 8192,  # Blackwell family: only for models with hidden_size >= 8192
 }
 
 # Min size per GPU per device capability for sequence parallelism
@@ -37,6 +47,8 @@
 # This ensures the threshold scales appropriately with tensor parallelism
 SP_MIN_PER_GPU_SIZE_MB: dict[int, float] = {
     90: 8,  # 8MB per GPU for H100
+    # Use a more conservative threshold on Blackwell so TP8 starts later.
+    100: 32,
 }
 
 
@@ -66,7 +78,12 @@ def get_sequence_parallelism_threshold(
     capability = current_platform.get_device_capability()
     if capability is None:
         return None
-    device_capability = capability.to_int()
+
+    # Collapse Blackwell variants (sm100/sm103/...) into one policy bucket.
+    if current_platform.is_device_capability_family(100):
+        device_capability = 100
+    else:
+        device_capability = capability.to_int()
 
     # Check if device has configured thresholds
     min_hidden_size = SP_MIN_HIDDEN_SIZE.get(device_capability)
@@ -108,6 +125,7 @@ def __init__(
         self.device = device
         self.tp_group = get_tp_group()
         self.tp_size = get_tensor_model_parallel_world_size()
+        self.tp_rank = get_tensor_model_parallel_rank()
 
     def _all_reduce(self, x: torch.Tensor) -> torch.Tensor:
         return tensor_model_parallel_all_reduce(x)
@@ -122,35 +140,38 @@ def _all_gather(self, x: torch.Tensor) -> torch.Tensor:
             x, dim=0, world_size=self.tp_size, group_name=self.tp_group.unique_name
         )
 
+    def empty(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=self.dtype, device=self.device, **kwargs)
+
+    def empty_f32(self, *args: Any, **kwargs: Any) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.float32, device=self.device, **kwargs)
+
 
 class FirstAllReduceRMSNormPattern(_SequenceParallelPatternHelper):
     def __init__(self, epsilon: float, dtype: torch.dtype, device: str | None) -> None:
         super().__init__(epsilon, dtype, device)
-        self.rmsnorm_matcher = MatcherRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input = torch.empty([1, 8, 4], device=self.device, dtype=self.dtype)
-        arg3_1 = torch.empty([4], device=self.device, dtype=self.dtype)
-
-        return [input, arg3_1]
+        # input, weight
+        return [self.empty([1, 8, 4]), self.empty([4])]
 
     def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
             input: torch.Tensor,
-            arg3_1: torch.Tensor,
+            weight: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             all_reduce = self._all_reduce(input)
-            rmsnorm = self.rmsnorm_matcher(all_reduce, arg3_1)
+            rmsnorm = vllm.ir.ops.rms_norm(all_reduce, weight, self.epsilon)
 
             return rmsnorm, all_reduce
 
         def replacement(
             input: torch.Tensor,
-            arg3_1: torch.Tensor,
+            weight: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             reduce_scatter = self._reduce_scatter(input)
 
-            rmsnorm = self.rmsnorm_matcher(reduce_scatter, arg3_1)
+            rmsnorm = vllm.ir.ops.rms_norm(reduce_scatter, weight, self.epsilon)
             all_gather = self._all_gather(rmsnorm)
             return all_gather, reduce_scatter
 
@@ -162,7 +183,6 @@ def replacement(
 class MiddleAllReduceRMSNormPattern(_SequenceParallelPatternHelper):
     def __init__(self, epsilon: float, dtype: torch.dtype, device: str | None) -> None:
         super().__init__(epsilon, dtype, device)
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon)
 
     def get_inputs(self) -> list[torch.Tensor]:
         mm_1 = torch.empty([4, 4], device=self.device, dtype=self.dtype)
@@ -183,7 +203,9 @@ def pattern(
             rms_norm_weights: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             all_reduce = self._all_reduce(mm_1)
-            rmsnorm = self.rmsnorm_matcher(all_reduce, rms_norm_weights, residual)
+            rmsnorm = vllm.ir.ops.fused_add_rms_norm(
+                all_reduce, residual, rms_norm_weights, self.epsilon
+            )
             return rmsnorm[0], rmsnorm[1]
 
         def replacement(
@@ -191,15 +213,35 @@ def replacement(
             mm_1: torch.Tensor,
             rms_norm_weights: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            # pattern matcher replaces from top-to-bottom,
-            # so residual is still the full size here.
-            # once the seqpar pattern with the previous rmsnorm is replaced
+            # The pattern matcher replaces from the end of the graph
+            # (last layer first). At the time each match is replaced,
+            # the preceding layer has NOT been replaced yet, so
+            # `residual` is still full-size and the slice below is
+            # correct. Once the preceding layer IS replaced, its
+            # residual output shrinks to [local_len, H], and this
+            # slice becomes semantically incorrect (e.g. for rank > 0,
+            # the indices would be out of bounds). However, since the
+            # symbolic output shape equals the input shape,
+            # NoOpEliminationPass (called at the end of
+            # SequenceParallelismPass.__call__) removes these slices
+            # before the graph is ever executed or compiled.
             reduce_scatter = self._reduce_scatter(mm_1)
-            residual = residual[0 : reduce_scatter.size(0), ...]
-            rmsnorm = self.rmsnorm_matcher(reduce_scatter, rms_norm_weights, residual)
+            local_len = reduce_scatter.size(0)
+            # when the preceding VocabParallelEmbedding is excluded
+            # from the FX graph (e.g., passing `inputs_embeds` directly in VLMs),
+            # the FirstAllReduceRMSNorm pattern is never matched. we must
+            # perform a proper TP-aware slice here. simply using `[0:local_len]`
+            # would incorrectly cause all ranks to process rank 0's chunk.
+            residual = residual[
+                self.tp_rank * local_len : self.tp_rank * local_len + local_len, ...
+            ]
+            rmsnorm = vllm.ir.ops.fused_add_rms_norm(
+                reduce_scatter, residual, rms_norm_weights, self.epsilon
+            )
             all_gather = self._all_gather(rmsnorm[0])
-            # shape of residual changes but that's fine,
-            # next node is already slicing it, now becomes a noop
+            # residual output is now [local_len, H]; the next layer's
+            # slice on it is semantically incorrect until
+            # NoOpEliminationPass removes it.
             return all_gather, rmsnorm[1]
 
         pm.register_replacement(
@@ -222,14 +264,11 @@ def __init__(
         device: str | None,
     ) -> None:
         super().__init__(epsilon, dtype, device)
-        self.rmsnorm_matcher = MatcherRMSNorm(epsilon)
         self.quant_matcher = MatcherQuantFP8(kFp8StaticTensorSym)
 
     def get_inputs(self) -> list[torch.Tensor]:
-        input = torch.zeros([1, 8, 4], device=self.device, dtype=self.dtype)
-        weight = torch.empty([4], device=self.device, dtype=self.dtype)
-        scale = torch.tensor(1.0, device=self.device, dtype=torch.float32)
-        return [input, weight, scale]
+        # input, weight, scale
+        return [self.empty([1, 8, 4]), self.empty([4]), self.empty_f32([1, 1])]
 
     def register(self, pm_pass: PatternMatcherPass) -> None:
         def pattern(
@@ -238,7 +277,7 @@ def pattern(
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             all_reduce = self._all_reduce(input)
-            rms = self.rmsnorm_matcher(all_reduce, weight)
+            rms = vllm.ir.ops.rms_norm(all_reduce, weight, self.epsilon)
             quant, _ = self.quant_matcher(rms, scale)
             return quant, all_reduce
 
@@ -248,7 +287,7 @@ def replacement(
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             reduce_scatter = self._reduce_scatter(input)
-            rms = self.rmsnorm_matcher(reduce_scatter, weight)
+            rms = vllm.ir.ops.rms_norm(reduce_scatter, weight, self.epsilon)
             quant, _ = self.quant_matcher(rms, scale)
             all_gather = self._all_gather(quant)
 
@@ -262,7 +301,6 @@ def replacement(
 class MiddleAllReduceRMSNormStaticFP8Pattern(_SequenceParallelPatternHelper):
     def __init__(self, epsilon: float, dtype: torch.dtype, device: str | None) -> None:
         super().__init__(epsilon, dtype, device)
-        self.rmsnorm_matcher = MatcherFusedAddRMSNorm(epsilon)
         self.quant_matcher = MatcherQuantFP8(kFp8StaticTensorSym)
 
     def get_inputs(self) -> list[torch.Tensor]:
@@ -281,8 +319,8 @@ def pattern(
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
             all_reduce = self._all_reduce(mm_1)
-            rms, residual_out = self.rmsnorm_matcher(
-                all_reduce, rms_norm_weights, residual
+            rms, residual_out = vllm.ir.ops.fused_add_rms_norm(
+                all_reduce, residual, rms_norm_weights, self.epsilon
             )
             quant, _ = self.quant_matcher(rms, scale)
             return quant, residual_out
@@ -293,19 +331,29 @@ def replacement(
             rms_norm_weights: torch.Tensor,
             scale: torch.Tensor,
         ) -> tuple[torch.Tensor, torch.Tensor]:
-            # pattern matcher replaces from top-to-bottom,
-            # so residual is still the full size here.
-            # add a temporary slice which will become a noop
-            # once the seqpar pattern with the previous rmsnorm is replaced
+            # See MiddleAllReduceRMSNormPattern.replacement for a
+            # detailed explanation of the temporary slice below:
+            # it is correct when first inserted, becomes semantically
+            # incorrect after the preceding layer is replaced, and is
+            # removed by NoOpEliminationPass before the graph is compiled.
             reduce_scatter = self._reduce_scatter(mm_1)
-            residual = residual[0 : reduce_scatter.size(0), ...]
-            rms, residual_out = self.rmsnorm_matcher(
-                reduce_scatter, rms_norm_weights, residual
+            local_len = reduce_scatter.size(0)
+            # when the preceding VocabParallelEmbedding is excluded
+            # from the FX graph (e.g., passing `inputs_embeds` directly in VLMs),
+            # the FirstAllReduceRMSNorm pattern is never matched. we must
+            # perform a proper TP-aware slice here. simply using `[0:local_len]`
+            # would incorrectly cause all ranks to process rank 0's chunk.
+            residual = residual[
+                self.tp_rank * local_len : self.tp_rank * local_len + local_len, ...
+            ]
+            rms, residual_out = vllm.ir.ops.fused_add_rms_norm(
+                reduce_scatter, residual, rms_norm_weights, self.epsilon
             )
             quant, _ = self.quant_matcher(rms, scale)
             all_gather = self._all_gather(quant)
-            # shape of residual changes but that's fine,
-            # next node is already slicing it, now becomes a noop
+            # residual output is now [local_len, H]; the next layer's
+            # slice on it is semantically incorrect until
+            # NoOpEliminationPass removes it.
             return all_gather, residual_out
 
         pm.register_replacement(
@@ -321,6 +369,129 @@ def replacement(
         )
 
 
+class FirstAllReduceRMSNormStaticNVFP4Pattern(_SequenceParallelPatternHelper):
+    def get_inputs(self) -> list[torch.Tensor]:
+        input = self.empty([8, 16])
+        weight = self.empty([16])
+        input_global_scale = self.empty_f32([1, 1])
+        quant_output = torch.empty([8, 8], device=self.device, dtype=torch.uint8)
+        output_scale = torch.empty([128, 4], device=self.device, dtype=torch.int32)
+        return [input, weight, input_global_scale, quant_output, output_scale]
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        def pattern(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+            input_global_scale: torch.Tensor,
+            quant_output: torch.Tensor,
+            output_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            all_reduce = self._all_reduce(input)
+            rms = vllm.ir.ops.rms_norm(all_reduce, weight, self.epsilon)
+            quant = auto_functionalized(
+                SCALED_FP4_QUANT_OUT_OVERLOAD,
+                input=rms,
+                input_scale=input_global_scale,
+                is_sf_swizzled_layout=True,
+                output=quant_output,
+                output_scale=output_scale,
+            )
+            return quant[1], all_reduce, quant[2]
+
+        def replacement(
+            input: torch.Tensor,
+            weight: torch.Tensor,
+            input_global_scale: torch.Tensor,
+            quant_output: torch.Tensor,
+            output_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            reduce_scatter = self._reduce_scatter(input)
+            rms = vllm.ir.ops.rms_norm(reduce_scatter, weight, self.epsilon)
+            rms = torch.ops.aten.view.default(rms, [-1, rms.shape[-1]])
+            quant = SCALED_FP4_QUANT_DEFAULT_OVERLOAD(
+                rms,
+                input_global_scale,
+                True,
+            )
+            return (
+                self._all_gather(quant[0]),
+                reduce_scatter,
+                self._all_gather(quant[1]),
+            )
+
+        pm.register_replacement(
+            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+        )
+
+
+class MiddleAllReduceRMSNormStaticNVFP4Pattern(_SequenceParallelPatternHelper):
+    def get_inputs(self) -> list[torch.Tensor]:
+        mm_1 = self.empty([8, 16])
+        residual = self.empty([8, 16])
+        rms_norm_weights = self.empty([16])
+        input_global_scale = self.empty_f32([1, 1])
+        quant_output = torch.empty([8, 8], device=self.device, dtype=torch.uint8)
+        output_scale = torch.empty([128, 4], device=self.device, dtype=torch.int32)
+        return [
+            residual,
+            mm_1,
+            rms_norm_weights,
+            input_global_scale,
+            quant_output,
+            output_scale,
+        ]
+
+    def register(self, pm_pass: PatternMatcherPass) -> None:
+        def pattern(
+            residual: torch.Tensor,
+            mm_1: torch.Tensor,
+            rms_norm_weights: torch.Tensor,
+            input_global_scale: torch.Tensor,
+            quant_output: torch.Tensor,
+            output_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            all_reduce = self._all_reduce(mm_1)
+            rms, residual_out = vllm.ir.ops.fused_add_rms_norm(
+                all_reduce, residual, rms_norm_weights, self.epsilon
+            )
+            quant = auto_functionalized(
+                SCALED_FP4_QUANT_OUT_OVERLOAD,
+                input=rms,
+                input_scale=input_global_scale,
+                is_sf_swizzled_layout=True,
+                output=quant_output,
+                output_scale=output_scale,
+            )
+            return quant[1], residual_out, quant[2]
+
+        def replacement(
+            residual: torch.Tensor,
+            mm_1: torch.Tensor,
+            rms_norm_weights: torch.Tensor,
+            input_global_scale: torch.Tensor,
+            quant_output: torch.Tensor,
+            output_scale: torch.Tensor,
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+            # Keep this slice in sync with the non-quantized SP replacement:
+            # once the previous SP pattern fires, it becomes a no-op.
+            reduce_scatter = self._reduce_scatter(mm_1)
+            residual = residual[0 : reduce_scatter.size(0), ...]
+            rms, residual_out = vllm.ir.ops.fused_add_rms_norm(
+                reduce_scatter, residual, rms_norm_weights, self.epsilon
+            )
+            rms = torch.ops.aten.view.default(rms, [-1, rms.shape[-1]])
+            quant = SCALED_FP4_QUANT_DEFAULT_OVERLOAD(
+                rms,
+                input_global_scale,
+                True,
+            )
+            return self._all_gather(quant[0]), residual_out, self._all_gather(quant[1])
+
+        pm.register_replacement(
+            pattern, replacement, self.get_inputs(), pm.fwd_only, pm_pass
+        )
+
+
 class SequenceParallelismPass(VllmPatternMatcherPass):
     """
     This pass enables sequence parallelism for models.
@@ -340,22 +511,21 @@ class SequenceParallelismPass(VllmPatternMatcherPass):
     significantly reduce communication overhead and improve overall model
     performance.
 
-
-    This pass splits up the residual tensor across TP ranks and hence divides its size.
-    Because the pattern matcher starts at the end of the graph, the replacement
-    contains a slice that temporarily conforms the input residual to the correct size.
-    After all patterns have been matched, we use a NoOpEliminationPass to clean up
-    what have now become no-op slices.
-
-    Note that an older version of the pass did not need this as it operated only on
-    custom rms_norm and fused_rms_norm_add custom ops which did not complain about
-    mismatched shapes during replacement. So this approach has the same assumption that
-    correctness is only maintained if all rms_norm operations are split across ranks.
-
-    Correctness-wise, this is approach strictly better than before - before,
-    the graph was incorrect semantically and shape-wise during the pass.
-    With this approach there's only semantic incorrectness during the pass.
-    Both approaches restore a correct graph once all patterns are matched.
+    This pass is only supported when compiling the whole graph (fullgraph
+    mode, i.e. using Inductor graph partition or empty splitting_ops).
+    Piecewise compilation is not supported because the residual tensor
+    gets split across TP ranks, causing size mismatches at subgraph
+    boundaries.
+
+    This pass splits up the residual tensor across TP ranks and hence divides
+    its size. The pattern matcher starts at the end of the graph (last layer
+    first), so when each replacement inserts a residual slice, the preceding
+    layer has not been replaced yet and the slice is correct. Once the
+    preceding layer IS replaced, its residual output shrinks and the slice
+    becomes semantically incorrect (out-of-bounds indices for rank > 0).
+    The graph is never executed in this intermediate state —
+    NoOpEliminationPass removes these slices based on symbolic shape equality
+    (input shape == output shape) before the graph is compiled.
     """
 
     @enable_fake_mode
@@ -397,6 +567,14 @@ def __init__(self, config: VllmConfig) -> None:
                 epsilon, self.model_dtype, self.device
             ).register(self.patterns)
 
+            if "SCALED_FP4_QUANT_OUT_OVERLOAD" in globals():
+                FirstAllReduceRMSNormStaticNVFP4Pattern(
+                    epsilon, self.model_dtype, self.device
+                ).register(self.patterns)
+                MiddleAllReduceRMSNormStaticNVFP4Pattern(
+                    epsilon, self.model_dtype, self.device
+                ).register(self.patterns)
+
             # Normal RMSNorm patterns
             FirstAllReduceRMSNormPattern(
                 epsilon, self.model_dtype, self.device
@@ -418,19 +596,13 @@ def is_applicable_for_range(self, compile_range: Range) -> bool:
         and gathering tensors across TP ranks outweighs the benefits.
 
         Returns False (SP disabled) when:
-        - Using piecewise compilation with non-concrete or TP-indivisible sizes
         - min_token_num is None (SP disabled for this device/config)
         - The compile range starts below the minimum token threshold
         """
-        # For piecewise compilation (not using inductor graph partition),
-        # we need concrete sizes that are divisible by TP for correct splitting
-        if (
-            not self.compilation_config.use_inductor_graph_partition
-            and self.compilation_config.splitting_ops
-        ):
-            tp_size = get_tensor_model_parallel_world_size()
-            if not compile_range.is_single_size() or compile_range.end % tp_size != 0:
-                return False
+        assert (
+            self.compilation_config.use_inductor_graph_partition
+            or not self.compilation_config.splitting_ops
+        ), "SequenceParallelismPass requires full-graph compilation"
 
         # min_token_num is None when SP is disabled for this device/config
         # (e.g., non-CUDA platform, unsupported GPU, or small hidden_size)
diff --git a/vllm/compilation/passes/inductor_pass.py b/vllm/compilation/passes/inductor_pass.py
index 4610c62d1771..8a0d5326dd92 100644
--- a/vllm/compilation/passes/inductor_pass.py
+++ b/vllm/compilation/passes/inductor_pass.py
@@ -30,6 +30,9 @@ class PassContext:
     def __init__(self, compile_range: Range):
         self.compile_range: Range = compile_range
 
+        # set of arg indices
+        self.donated_input_ids: set[int] = set()
+
 
 def get_pass_context() -> PassContext:
     """Get the current pass context."""
@@ -51,6 +54,15 @@ def pass_context(compile_range: Range) -> Generator[None, None, None]:
         _pass_context = prev_context
 
 
+@functools.cache
+def _hash_source_cached(*srcs: str | type | types.FunctionType) -> str:
+    hasher = hashlib.sha256()
+    for src in srcs:
+        src_str = src if isinstance(src, str) else inspect.getsource(src)
+        hasher.update(src_str.encode("utf-8"))
+    return hasher.hexdigest()
+
+
 class InductorPass(CustomGraphPass):  # type: ignore[misc]
     """
     A custom graph pass that uses a hash of its source as the UUID.
@@ -72,19 +84,16 @@ def hash_source(*srcs: str | Any) -> str:
         Utility method to hash the sources of functions or objects.
         :param srcs: strings or objects to add to the hash.
         Objects and functions have their source inspected.
+        Results are cached by resolved types to avoid repeated
+        inspect.getsource() calls.
         :return:
         """
-        hasher = hashlib.sha256()
-        for src in srcs:
-            if isinstance(src, str):
-                src_str = src
-            elif isinstance(src, (types.FunctionType, type)):
-                src_str = inspect.getsource(src)
-            else:
-                # object instance
-                src_str = inspect.getsource(src.__class__)
-            hasher.update(src_str.encode("utf-8"))
-        return hasher.hexdigest()
+        # Resolve instances to their class for a hashable cache key.
+        cache_key = tuple(
+            src if isinstance(src, (str, type, types.FunctionType)) else src.__class__
+            for src in srcs
+        )
+        return _hash_source_cached(*cache_key)
 
     @staticmethod
     def hash_dict(dict_: dict[Any, Any]) -> str:
diff --git a/vllm/compilation/passes/ir/__init__.py b/vllm/compilation/passes/ir/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/compilation/passes/ir/clone_elimination.py b/vllm/compilation/passes/ir/clone_elimination.py
new file mode 100644
index 000000000000..61ba750a6c4e
--- /dev/null
+++ b/vllm/compilation/passes/ir/clone_elimination.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch import fx
+from torch._higher_order_ops.auto_functionalize import auto_functionalized
+from torch._higher_order_ops.triton_kernel_wrap import TritonKernelWrapperFunctional
+from torch._ops import HigherOrderOperator, OpOverload
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+
+from ..fx_utils import is_func
+from ..inductor_pass import get_pass_context
+from ..vllm_inductor_pass import VllmInductorPass
+
+logger = init_logger(__name__)
+
+
+def user_writes_to_node(user: fx.Node, node: fx.Node) -> bool:
+    if user.op == "output":
+        return False
+
+    if is_func(user, auto_functionalized):
+        # While autofunc writes to the node,
+        # this is a follow-up use we're not interested in.
+        # It is also guaranteed to be the final use,
+        # as auto_functionalized returns the tensor back for follow-up use.
+        return False
+    elif user.op == "call_function" and isinstance(user.target, HigherOrderOperator):
+        # By default, be conservative, assume this could be a write
+        # (except functional HOPs)
+        return not isinstance(user.target, TritonKernelWrapperFunctional)
+
+    assert isinstance(user.target, OpOverload), (
+        f"{node=} {user=} {user.op=} {user.target=}"
+    )
+    schema = user.target._schema
+    assert len(user.args) <= len(schema.arguments)
+    for i, arg in enumerate(user.args):
+        # Only interested in writes to node
+        if arg is not node:
+            continue
+
+        # If not a write, next arg could be
+        if schema.arguments[i].is_write:
+            return True
+
+    # No writes found
+    return False
+
+
+class UnsafeCloneEliminationPass(VllmInductorPass):
+    """
+    This pass removes clone nodes that are no longer needed after vLLM IR lowering.
+    It uses donated_input_ids to eliminate clones of donated graph inputs, preserving
+    contents of non-donated graph inputs.
+
+    It is "unsafe" because it does not (yet) take aliasing into account. Solving
+    aliasing is an open problem, so this pass intends to support known vLLM cases
+    and not guarantee soundness on general graphs. In the future, this pass will likely
+    support basic forms of aliasing to handle simple views (e.g. qkv -> q,k,v).
+    """
+
+    def __init__(self, vllm_config: VllmConfig) -> None:
+        super().__init__(vllm_config)
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: fx.Graph) -> None:
+        count = 0
+        node_to_idx = {node: i for i, node in enumerate(graph.nodes)}
+        pass_context = get_pass_context()
+        donated_input_ids = pass_context.donated_input_ids
+        logger.debug("Donated input ids: %s", donated_input_ids)
+
+        for node in graph.nodes:
+            if not is_func(node, torch.ops.aten.clone.default):
+                continue
+
+            original_node = node.args[0]
+            assert isinstance(original_node, fx.Node)
+
+            # Clone needs to be preserved if node is getting written to and
+            # the old value is used again.
+            # This could only happen if an inplace implementation was lowered.
+            # Then node (the clone) will have one write.
+            # TODO(luka) hopefully this can be removed once we lower functional graphs.
+            write_idxs = [
+                node_to_idx[u] for u in node.users if user_writes_to_node(u, node)
+            ]
+            assert len(write_idxs) in (0, 1)
+            if write_idxs:
+                # Check if a user of original_node occurs after a write
+                write_idx = write_idxs[0]
+                if any(
+                    node_to_idx[orig_user] > write_idx
+                    for orig_user in original_node.users
+                ):
+                    logger.debug(
+                        "Clone removal not possible, "
+                        "original_node=%s used after mutation on node=%s",
+                        original_node,
+                        node,
+                    )
+                    continue
+
+                # Check if a node is a (non-donated) graph input
+                if (
+                    original_node.op == "placeholder"
+                    and node_to_idx[original_node] not in donated_input_ids
+                ):
+                    logger.debug(
+                        "Graph input %s not donated, cannot eliminate its clone",
+                        original_node,
+                    )
+                    continue
+
+            logger.debug(
+                "Node %s is a redundant clone node of %s, removing it",
+                node,
+                original_node,
+            )
+            node.replace_all_uses_with(original_node)
+            graph.erase_node(node)
+            count += 1
+
+        logger.debug("CloneCleanupPass removed %d clone nodes", count)
diff --git a/vllm/compilation/passes/ir/inplace_functionalization.py b/vllm/compilation/passes/ir/inplace_functionalization.py
new file mode 100644
index 000000000000..e69351075bca
--- /dev/null
+++ b/vllm/compilation/passes/ir/inplace_functionalization.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections import defaultdict
+
+from torch import fx
+from torch._inductor.pattern_matcher import (
+    PatternMatcherPass,
+)
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+
+from ..inductor_pass import get_pass_context
+from ..vllm_inductor_pass import VllmInductorPass
+from .lowering_pass import get_ir_op
+from .utils import overload_or_default
+
+logger = init_logger(__name__)
+
+
+class VllmIRInplaceFunctionalizationPass(VllmInductorPass):
+    """
+    This pass functionalizes maybe_inplace vLLM IR ops to the default overload.
+    The maybe_inplace overloads have the same signature as the default overload
+    so the pass simply replaces the called overload.
+    That makes the graph properly functional.
+    The pass also validates that activations passed to maybe_inplace have no later
+    uses in the graph: they are donated to the maybe_inplace op call,
+    and their contents are not defined afterward.
+
+    This pass operates pre-AOTAutograd,
+    so it must handle non-normalized and non-functional IR.
+    """
+
+    def __init__(self, vllm_config: VllmConfig) -> None:
+        super().__init__(vllm_config)
+        self.patterns = PatternMatcherPass(self.pass_name)
+        self.functionalized_ops: dict[str, int] = defaultdict(lambda: 0)
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: fx.Graph) -> None:
+        # clear at the beginning instead of end, so that tests can inspect
+        self.functionalized_ops.clear()
+        assert graph.owning_module is not None
+        node_to_idx = {node: i for i, node in enumerate(graph.nodes)}
+
+        # Pass donated input via vLLM's pass context
+        pass_context = get_pass_context()
+        pass_context.donated_input_ids = set[int]()
+
+        for node in graph.nodes:
+            if (ir_op := get_ir_op(node)) is None:
+                continue
+
+            op_overload = overload_or_default(node.target)
+            overload_name = op_overload._overloadname
+            if overload_name != "maybe_inplace":
+                assert overload_name == "default", (
+                    f"Found overload {overload_name} for op {ir_op.name}, "
+                    f"expected maybe_inplace or default"
+                )
+                continue
+
+            # must have maybe_inplace overload and allow_inplace
+            assert ir_op.allow_inplace and hasattr(ir_op, "maybe_inplace")
+
+            # Check that activation inputs are not used after this op
+            for arg_idx in ir_op.activation_indices:
+                arg = node.args[arg_idx]
+                assert isinstance(arg, fx.Node), "Activation inputs must be fx.Node"
+                for user in arg.users:
+                    if node_to_idx[user] > node_to_idx[node]:
+                        raise ValueError(
+                            f"Input {arg} to maybe_inplace node {node} "
+                            f"is used again after the node. "
+                            f"This is not allowed; activation inputs to maybe_inplace "
+                            f"ops are donated to the op, meaning their memory may be "
+                            f"recycled for outputs.\n\n"
+                            f"To preserve the inputs, use the default overload or "
+                            f"clone them manually beforehand."
+                        )
+
+                if arg.op == "placeholder":
+                    # Graph input that maybe_inplace might modify.
+                    # Mark it so downstream passes know it's donated.
+                    # TODO(luka) store in placeholder node meta once supported
+                    pass_context.donated_input_ids.add(node_to_idx[arg])
+
+            # Same signature, just replace the overload that's called.
+            node.target = ir_op.torch_op
+            self.functionalized_ops[ir_op.name] += 1
+
+        count = sum(self.functionalized_ops.values())
+        ops = ",".join(self.functionalized_ops.keys())
+        logger.debug("Donated input IDs: %s", pass_context.donated_input_ids)
+        logger.debug(
+            "%s functionalized %d vLLM IR nodes for op(s) %s",
+            self.pass_name,
+            count,
+            ops,
+        )
diff --git a/vllm/compilation/passes/ir/lowering_pass.py b/vllm/compilation/passes/ir/lowering_pass.py
new file mode 100644
index 000000000000..f34f1c64b76e
--- /dev/null
+++ b/vllm/compilation/passes/ir/lowering_pass.py
@@ -0,0 +1,131 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections import defaultdict
+from collections.abc import Iterable
+
+from torch import fx
+from torch._inductor.pattern_matcher import (
+    CallFunctionVarArgs,
+    Match,
+    PatternMatcherPass,
+    register_graph_pattern,
+)
+
+from vllm.config import VllmConfig
+from vllm.ir.op import IrOp
+from vllm.logger import init_logger
+from vllm.logging_utils import lazy
+
+from ..vllm_inductor_pass import VllmInductorPass
+from .utils import get_ir_op
+
+logger = init_logger(__name__)
+
+
+class VllmIRLoweringPass(VllmInductorPass):
+    """
+    This pass lowers vLLM IR ops to their implementations the priority list.
+    """
+
+    def __init__(self, vllm_config: VllmConfig) -> None:
+        super().__init__(vllm_config)
+        self.patterns = PatternMatcherPass(self.pass_name)
+        self.selected_impls: dict[str, dict[str, str]] = defaultdict(lambda: {})
+        self.ops = [ir_op.torch_op for ir_op in IrOp.registry.values()]
+
+        # Look for any call_function node where the target is a vLLM IR op.
+        # Then, lower_matched_op will select, trace, and insert the implementation.
+        register_graph_pattern(
+            CallFunctionVarArgs(self.ops),
+            pass_dict=self.patterns,
+        )(self.lower_matched_op)
+
+    def lower_matched_op(self, match: Match, *args, **kwargs):
+        # TODO(luka) I think args and kwargs are for the match, but just use the node?
+
+        assert len(match.nodes) == 1, "Expected single node match"
+        node = match.nodes[0]
+        ir_op = get_ir_op(node)  # TODO is node.target always an overload?
+        assert ir_op is not None, "Expected vLLM IR op"
+        assert not node.kwargs  # I think there should never be kwargs here
+
+        # Select and record the implementation, using fake args
+        fake_args = fx.map_arg(node.args, lambda arg: arg.meta["val"])
+        ir_op_impl = ir_op.dispatch(*fake_args)
+        self.selected_impls[ir_op.name][node.name] = ir_op_impl.provider
+
+        # replace_by_example wants node args, not the fake tensors
+        # use func_impl_fn to properly handle in-place implementations
+        # TODO(luka): Use aot_export_module to get functionalized graph
+        # TODO(luka): Cache the fx_replacement to avoid re-tracing the same impl
+
+        # Defaults not present on node.args but required for replacement tracing
+        bound_args = ir_op._py_signature.bind(*node.args)
+        bound_args.apply_defaults()
+        # It is not safe to run functional passes (like DCE) on the replacements
+        # as they might not be functional.
+        match.replace_by_example(
+            ir_op_impl.func_impl_fn, bound_args.args, run_functional_passes=False
+        )
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: fx.Graph) -> None:
+        # clear at the beginning instead of end, so that tests can inspect
+        self.selected_impls.clear()
+
+        count = self.patterns.apply(graph)
+        logger.debug("VllmIRLoweringPass lowered %d vLLM IR nodes", count)
+
+        # TODO write self.selected_impls to depyf/tlparse dir
+        def count_items(impls: Iterable[str]) -> dict[str, int]:
+            counts: dict[str, int] = defaultdict(lambda: 0)
+            for impl in impls:
+                counts[impl] += 1
+            return counts
+
+        def print_count(counts: dict[str, int]) -> str:
+            # e.g., "impl1*3,impl2"
+            impl_count = lambda i, c: f"{i}" if c == 1 else f"{i}*{c}"
+            return ",".join(impl_count(impl, count) for impl, count in counts.items())
+
+        logger.debug(
+            "Selected implementations: %s",
+            lazy(
+                lambda: ", ".join(
+                    f"{op}={print_count(count_items(impls_by_node.values()))}"
+                    for op, impls_by_node in self.selected_impls.items()
+                )
+            ),
+        )
+
+        failed_nodes: list[fx.Node] = []
+        failed_ops: set[str] = set()
+        # Check no vllm_ir nodes were left in the graph
+        for node in graph.nodes:
+            if (ir_op := get_ir_op(node)) is None:
+                continue
+
+            failed_nodes.append(node)
+            failed_ops.add(ir_op.name)
+
+        if failed_nodes or failed_ops:
+            logger.warning("Failed to lower vLLM IR ops: %s", ",".join(failed_ops))
+            logger.warning("Full node list: %s", ",".join(str(n) for n in failed_nodes))
+
+    def uuid(self) -> str:
+        """
+        IR op priority & impl sources affect lowering pass output,
+        so we include them in the cache key.
+        """
+        priorities = {name: op.get_priority() for name, op in IrOp.registry.items()}
+        priorities_str = ";".join(
+            f"{name}={','.join(p)}" for name, p in priorities.items()
+        )
+
+        impl_uuids_str = ";".join(
+            f"{name}="
+            + ",".join(IrOp.registry[name].impls[provider].uuid() for provider in p)
+            for name, p in priorities.items()
+        )
+
+        return f"{super().uuid()}|{priorities_str}|{impl_uuids_str}"
diff --git a/vllm/compilation/passes/ir/utils.py b/vllm/compilation/passes/ir/utils.py
new file mode 100644
index 000000000000..50b4773ce523
--- /dev/null
+++ b/vllm/compilation/passes/ir/utils.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from torch import fx
+from torch._ops import OpOverload, OpOverloadPacket
+
+from vllm.ir.op import IrOp
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def overload_or_default(op: OpOverload | OpOverloadPacket) -> OpOverload:
+    if isinstance(op, OpOverloadPacket):
+        return op.default
+    assert isinstance(op, OpOverload), "Expected an OpOverload or OpOverloadPacket"
+    return op
+
+
+def get_ir_op(node: fx.Node) -> IrOp | None:
+    if node.op != "call_function":
+        return None
+
+    if not isinstance(node.target, (OpOverload, OpOverloadPacket)):
+        return None
+
+    op_overload = overload_or_default(node.target)
+    if op_overload.namespace != "vllm_ir":
+        return None
+
+    op_name = op_overload._opname
+    if op_name not in IrOp.registry:
+        logger.warning(
+            "Unknown vLLM IR op %s, there's likely an issue with torch registration, "
+            "or a torch custom op was registered in the vllm_ir namespace by mistake.",
+            op_name,
+        )
+        return None
+
+    ir_op = IrOp.registry[op_name]
+    return ir_op
diff --git a/vllm/compilation/passes/pass_manager.py b/vllm/compilation/passes/pass_manager.py
index 70f86c8d2ae3..9c86518a946e 100644
--- a/vllm/compilation/passes/pass_manager.py
+++ b/vllm/compilation/passes/pass_manager.py
@@ -7,17 +7,23 @@
 from torch import fx as fx
 
 from vllm import envs
-from vllm._aiter_ops import rocm_aiter_ops
+from vllm._aiter_ops import check_aiter_fused_qk_rmsnorm, rocm_aiter_ops
 from vllm.compilation.passes.utility.post_cleanup import PostCleanupPass
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils.system_utils import set_env_var
 
-from .vllm_inductor_pass import VllmInductorPass
+from .ir.clone_elimination import UnsafeCloneEliminationPass
+from .ir.lowering_pass import VllmIRLoweringPass
+from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass
 
 if rocm_aiter_ops.is_enabled():
+    from .fusion.allreduce_rms_fusion import (
+        RocmAiterAllReduceFusionPass,
+    )
     from .fusion.rocm_aiter_fusion import (
+        MLADualRMSNormFusionPass,
         RocmAiterRMSNormQuantFusionPass,
         RocmAiterSiluMulFp8GroupQuantFusionPass,
         RocmAiterTritonAddRMSNormPadFusionPass,
@@ -25,7 +31,9 @@
 
 if current_platform.is_cuda_alike():
     from .fusion.act_quant_fusion import ActivationQuantFusionPass
-    from .fusion.attn_quant_fusion import AttnFusionPass
+    from .fusion.attn_quant_fusion import AttnQuantFusionPass
+    from .fusion.mla_attn_quant_fusion import MLAAttnQuantFusionPass
+    from .fusion.mla_rope_kvcache_cat_fusion import MLARoPEKVCacheCatFusionPass
     from .fusion.qk_norm_rope_fusion import QKNormRoPEFusionPass
     from .fusion.rms_quant_fusion import RMSNormQuantFusionPass
     from .fusion.rope_kvcache_fusion import RopeKVCacheFusionPass
@@ -36,6 +44,7 @@
 if current_platform.is_cuda():
     from .fusion.allreduce_rms_fusion import AllReduceFusionPass
     from .fusion.collective_fusion import AsyncTPPass
+    from .fusion.minimax_qk_norm_fusion import MiniMaxQKNormPass
 
 from .inductor_pass import (
     CustomGraphPass,
@@ -99,8 +108,19 @@ def __call__(self, graph: fx.Graph) -> None:
             else:
                 logger.debug("Skipping %s with compile range %s", pass_, compile_range)
 
-        # post-cleanup goes before fix_functionalization
-        # because it requires a functional graph
+        # perform the first post-cleanup before IR lowering to clean up fusion artifacts
+        # and make sure no dead IR ops are lowered.
+        self.post_cleanup(graph)
+        VllmInductorPass.dump_prefix += 1
+
+        # lowering before cleanup so DCE can clean up lowered ops.
+        # DCE handles mutating ops correctly as well.
+        self.ir_lowering(graph)
+        VllmInductorPass.dump_prefix += 1
+        self.clone_elimination(graph)
+        VllmInductorPass.dump_prefix += 1
+
+        # clean up after lowering again
         self.post_cleanup(graph)
         VllmInductorPass.dump_prefix += 1
 
@@ -108,6 +128,8 @@ def __call__(self, graph: fx.Graph) -> None:
         self.fix_functionalization(graph)
         VllmInductorPass.dump_prefix = None  # Cleanup index
 
+        VllmPatternMatcherPass.log_match_summary()
+
     def configure(self, config: VllmConfig) -> None:
         self.pass_config = config.compilation_config.pass_config
 
@@ -121,36 +143,57 @@ def configure(self, config: VllmConfig) -> None:
                 if self.pass_config.fuse_gemm_comms:
                     self.passes += [AsyncTPPass(config)]
 
+            if self.pass_config.fuse_act_padding and rocm_aiter_ops.is_enabled():
+                # Run the more specific RMSNorm+router-pad fusion before
+                # AR+RMS, since both consume fused_add_rms_norm.
+                self.passes += [RocmAiterTritonAddRMSNormPadFusionPass(config)]
+
             if self.pass_config.fuse_allreduce_rms:
-                self.passes += [AllReduceFusionPass(config)]
+                if rocm_aiter_ops.is_enabled():
+                    self.passes += [RocmAiterAllReduceFusionPass(config)]
+                else:
+                    self.passes += [AllReduceFusionPass(config)]
+
+            if self.pass_config.fuse_minimax_qk_norm:
+                self.passes += [MiniMaxQKNormPass(config)]
 
             if self.pass_config.fuse_norm_quant:
-                self.passes += [RMSNormQuantFusionPass(config)]
                 if rocm_aiter_ops.is_enabled():
                     self.passes += [
                         RocmAiterRMSNormQuantFusionPass(config),
                     ]
+                self.passes += [RMSNormQuantFusionPass(config)]
+
             if self.pass_config.fuse_act_quant:
                 self.passes += [ActivationQuantFusionPass(config)]
                 if rocm_aiter_ops.is_enabled():
                     self.passes += [RocmAiterSiluMulFp8GroupQuantFusionPass(config)]
 
-            if self.pass_config.fuse_act_padding and rocm_aiter_ops.is_enabled():
-                self.passes += [RocmAiterTritonAddRMSNormPadFusionPass(config)]
+            if (
+                self.pass_config.fuse_mla_dual_rms_norm
+                and rocm_aiter_ops.is_enabled()
+                and check_aiter_fused_qk_rmsnorm()
+            ):
+                self.passes += [MLADualRMSNormFusionPass(config)]
 
             if self.pass_config.fuse_rope_kvcache:
                 self.passes += [SplitCoalescingPass(config)]
                 self.passes += [ScatterSplitReplacementPass(config)]
                 self.passes += [RopeKVCacheFusionPass(config)]
 
+            if self.pass_config.fuse_rope_kvcache_cat_mla:
+                self.passes += [MLARoPEKVCacheCatFusionPass(config)]
+
             if self.pass_config.fuse_attn_quant:
-                self.passes += [AttnFusionPass(config)]
+                self.passes += [AttnQuantFusionPass(config)]
+                self.passes += [MLAAttnQuantFusionPass(config)]
 
             if self.pass_config.enable_qk_norm_rope_fusion:
                 self.passes += [SplitCoalescingPass(config)]
                 self.passes += [QKNormRoPEFusionPass(config)]
 
-            # needs a functional graph
+            self.ir_lowering = VllmIRLoweringPass(config)
+            self.clone_elimination = UnsafeCloneEliminationPass(config)
             self.post_cleanup = PostCleanupPass(config)
             self.fix_functionalization = FixFunctionalizationPass(config)
 
@@ -169,6 +212,11 @@ def uuid(self) -> str:
         state: dict[str, Any] = {"pass_config": self.pass_config.compute_hash()}
         for pass_ in self.passes:
             passes.append(pass_.uuid())
+
+        passes.append(self.post_cleanup.uuid())
+        passes.append(self.ir_lowering.uuid())
+        passes.append(self.clone_elimination.uuid())
+        passes.append(self.post_cleanup.uuid())
         passes.append(self.fix_functionalization.uuid())
 
         # Include the compile range in the uuid to ensure that inductor
diff --git a/vllm/compilation/passes/utility/fix_functionalization.py b/vllm/compilation/passes/utility/fix_functionalization.py
index 1b656d0c890e..2887c19ad4a2 100644
--- a/vllm/compilation/passes/utility/fix_functionalization.py
+++ b/vllm/compilation/passes/utility/fix_functionalization.py
@@ -168,6 +168,7 @@ def __call__(self, graph: torch.fx.Graph) -> None:
                     "cos_sin_cache",
                     "is_neox",
                     "position_ids",
+                    "forced_token_heads_per_warp",
                 )
                 self.defunctionalize(graph, node, mutated_args=mutated_args, args=args)
             elif (
@@ -180,6 +181,45 @@ def __call__(self, graph: torch.fx.Graph) -> None:
                     2: "key",
                 }
                 self.defunctionalize(graph, node, mutated_args=mutated_args)
+            elif (
+                hasattr(torch.ops.vllm, "fused_rope_unified_mla_kv_cache_update")
+                and at_target
+                == torch.ops.vllm.fused_rope_unified_mla_kv_cache_update.default
+            ):
+                # AOTAutograd functionalizes `q[..., nope_dim:] = rope_result` into
+                # a sequence of aten ops on q: view+slice+copy+slice_scatter.
+                # Since the fused MLA RoPE op mutates q_pe in-place, we can remove
+                # the redundant copy and slice_scatter ops during defunctionalization.
+                getitem_nodes = self.getitem_users(node)
+                q_pe_out = getitem_nodes[1]
+
+                for user in list(q_pe_out.users):
+                    if is_func(user, torch.ops.aten.copy.default):
+                        copy_temp = user
+                slice_temp = copy_temp.args[0]
+                for user in list(copy_temp.users):
+                    if is_func(user, torch.ops.aten.slice_scatter.default):
+                        slice_scatter_temp = user
+                view_temp = slice_scatter_temp.args[0]
+
+                view_orig = slice_temp.args[0]
+                slice_scatter_temp.replace_all_uses_with(view_orig)
+                self._remove(slice_scatter_temp)
+                self._remove(copy_temp)
+                self._remove(slice_temp)
+                self._remove(view_temp)
+                self._remove(q_pe_out)
+
+                # defunctionalize k_pe manually; self.replace_users_with_mutated_args
+                # does not support only replacing specific kwargs
+                k_pe_in = node.kwargs["k_pe"]
+                k_pe_out = getitem_nodes[2]
+                k_pe_out.replace_all_uses_with(k_pe_in)
+                self._remove(k_pe_out)
+
+                self.insert_defunctionalized(graph, node)
+                self._remove(node)
+
             # only used for test_functionalization::TestFunctionWithMutatedArgsAndReturn
             elif (
                 hasattr(torch.ops.vllm, "function_with_mutated_args_and_return")
diff --git a/vllm/compilation/passes/vllm_inductor_pass.py b/vllm/compilation/passes/vllm_inductor_pass.py
index b64c892881f5..4f90b2a27e1b 100644
--- a/vllm/compilation/passes/vllm_inductor_pass.py
+++ b/vllm/compilation/passes/vllm_inductor_pass.py
@@ -3,19 +3,24 @@
 import functools
 import operator
 import time
+from abc import ABC, abstractmethod
+from collections import defaultdict
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import ClassVar
+from typing import Any, ClassVar, Generic, ParamSpec, TypeVar
 
 import regex as re
 import torch
+import torch._inductor.pattern_matcher as pm
+from torch import fx
 from torch._dynamo.utils import lazy_format_graph_code
 from torch._inductor.pattern_matcher import PatternMatcherPass, PatternPrettyPrinter
 
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 
-from .inductor_pass import InductorPass
+from .fx_utils import is_func
+from .inductor_pass import InductorPass, enable_fake_mode
 
 logger = init_logger(__name__)
 
@@ -79,18 +84,23 @@ def end_and_log(self) -> None:
         logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms)
 
 
+def get_match_table() -> dict[str, int]:
+    """Return a snapshot of the match table."""
+    return dict(VllmPatternMatcherPass.match_table)
+
+
 class VllmPatternMatcherPass(VllmInductorPass):
     """
     A VllmInductorPass that uses the Inductor pattern matcher.
-    Its main use is providing the dump_patterns utility that dumps the
-    Inductor pattern matcher patterns into a file, which greatly aids debugging.
-
-    TODO(luka) move more utilities to this pass.
+    Provides pattern registration with match counting, debug dumping, and logging.
     """
 
     matched_count: int = 0
     """The number of matched patterns in the pass."""
 
+    match_table: ClassVar[defaultdict[str, int]] = defaultdict(int)
+    """Global table mapping pass name to its total match count."""
+
     _OP_OVERLOAD_PATTERN: ClassVar[re.Pattern] = re.compile(
         r"<OpOverload\(op='([^']*)', overload='([^']*)'\)>"
     )
@@ -104,6 +114,11 @@ def _replace_op_overloads(self, string: str) -> str:
             )
         )
 
+    @classmethod
+    def log_match_summary(cls) -> None:
+        if cls.match_table:
+            logger.debug("fusion pass matches: %s", dict(cls.match_table))
+
     def dump_patterns(self, config: VllmConfig, pm_pass: PatternMatcherPass) -> None:
         """
         If debug dumping is enabled, dump the Inductor pattern-matcher patterns
@@ -137,6 +152,7 @@ def dump_patterns(self, config: VllmConfig, pm_pass: PatternMatcherPass) -> None
                 f"auto_functionalized as auto_functionalized\n"
                 f"from torch._inductor.pattern_matcher import *\n"
                 f"vllm = torch.ops.vllm",
+                "vllm_ir = torch.ops.vllm_ir",
                 file=f,
             )
 
@@ -171,6 +187,151 @@ def dump_patterns(self, config: VllmConfig, pm_pass: PatternMatcherPass) -> None
                     print(f"{pattern_repr}\n", file=f)
 
 
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+class VllmPatternReplacement(ABC, Generic[P, R]):
+    """
+    A pattern/replacement pair for FX graph fusion.
+
+    Implement the three abstract members below, then pass
+    instances to VllmFusionPatternMatcherPass.register(). The pass will
+    find every occurrence of `pattern` in the graph and substitute it
+    with `replacement`.
+    """
+
+    # TODO(Badr): bound methods work for pattern registration since
+    # PyTorch 2.10. Once vLLM requires torch>=2.11, replace these properties
+    # with plain methods and drop the closure indirection.
+    @property
+    @abstractmethod
+    def pattern(self) -> Callable[P, R]:
+        """Returns a closure defining the FX subgraph to search for."""
+        ...
+
+    @property
+    @abstractmethod
+    def replacement(self) -> Callable[P, R]:
+        """
+        Returns a closure defining the FX subgraph to
+        substitute in place of each match.
+        """
+        ...
+
+    @abstractmethod
+    def get_inputs(self) -> list[torch.Tensor]:
+        """Example tensors used to trace pattern and replacement."""
+        ...
+
+    # Helpers for get_inputs: uninitialized tensors of common dtypes.
+    @staticmethod
+    def empty(*args, **kwargs) -> torch.Tensor:
+        return torch.empty(*args, device="cuda", **kwargs)
+
+    @staticmethod
+    def empty_bf16(*args, **kwargs) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.bfloat16, device="cuda", **kwargs)
+
+    @staticmethod
+    def empty_fp16(*args, **kwargs) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.float16, device="cuda", **kwargs)
+
+    @staticmethod
+    def empty_fp32(*args, **kwargs) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.float32, device="cuda", **kwargs)
+
+    @staticmethod
+    def empty_i32(*args, **kwargs) -> torch.Tensor:
+        return torch.empty(*args, dtype=torch.int32, device="cuda", **kwargs)
+
+
+def _fx_view_to_reshape(gm: fx.GraphModule) -> None:
+    from torch._inductor.fx_passes.post_grad import view_to_reshape
+
+    view_to_reshape(gm)
+
+
+def fold_consecutive_reshapes(gm: fx.GraphModule) -> None:
+    """Fold consecutive reshape ops into a single reshape.
+
+    ``make_fx`` faithfully records every view/reshape the Python code performs,
+    so patterns like ``x.reshape(a, b).reshape(c, d)`` produce two reshape
+    nodes.  Inductor's own optimisation would fold these, but
+    ``pm.register_replacement``'s ``trace_fn`` runs before Inductor, so we
+    must fold them ourselves for the pattern to match the compiled graph.
+
+    When reshape(A, shape1) feeds only into reshape(result, shape2),
+    the first reshape is redundant -- replace with reshape(A, shape2).
+    """
+    aten_reshape = torch.ops.aten.reshape.default
+    for node in list(gm.graph.nodes):
+        if not is_func(node, aten_reshape):
+            continue
+        inp = node.args[0]
+        if not isinstance(inp, fx.Node) or not is_func(inp, aten_reshape):
+            continue
+        if len(inp.users) != 1:
+            continue
+        original_input = inp.args[0]
+        node.args = (original_input, node.args[1])
+        inp.replace_all_uses_with(original_input)
+        gm.graph.erase_node(inp)
+
+
+def _remove_noop_permutes(gm: fx.GraphModule) -> None:
+    for node in gm.graph.nodes:
+        if not is_func(node, torch.ops.aten.permute.default):
+            continue
+        dims = node.args[1]
+        if any(dim != i for i, dim in enumerate(dims)):
+            continue
+        node.replace_all_uses_with(node.args[0])
+        gm.graph.erase_node(node)
+
+
+class VllmFusionPatternMatcherPass(VllmPatternMatcherPass):
+    """
+    A VllmPatternMatcherPass for passes that use VllmPatternReplacement objects.
+    Subclasses register patterns via self.register() in their own __init__.
+    """
+
+    def __init__(self, config: VllmConfig, pass_name: str) -> None:
+        super().__init__(config)
+        self.pass_name = pass_name
+        self.pm_pass = PatternMatcherPass(pass_name=pass_name)
+        self._pattern_replacements: list[VllmPatternReplacement] = []
+
+    @enable_fake_mode
+    def register(self, pr: VllmPatternReplacement) -> None:
+        pm.register_replacement(
+            pr.pattern,
+            pr.replacement,
+            pr.get_inputs(),
+            self._trace_fn,
+            self.pm_pass,
+        )
+        self._pattern_replacements.append(pr)
+
+    def uuid(self) -> str:
+        return VllmInductorPass.hash_source(
+            type(self),
+            *[type(pr) for pr in self._pattern_replacements],
+        )
+
+    @staticmethod
+    def _trace_fn(*args: Any, **kwargs: Any) -> fx.GraphModule:
+        gm = pm.fwd_only(*args, **kwargs)
+        _fx_view_to_reshape(gm)
+        _remove_noop_permutes(gm)
+        return gm
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: torch.fx.Graph) -> None:
+        self.matched_count = self.pm_pass.apply(graph)
+        VllmPatternMatcherPass.match_table[self.pass_name] += self.matched_count
+
+
 class PrinterInductorPass(VllmInductorPass):
     def __init__(self, name: str, config: VllmConfig) -> None:
         super().__init__(config)
diff --git a/vllm/compilation/piecewise_backend.py b/vllm/compilation/piecewise_backend.py
index 7474d0bf841b..b647e0d8581a 100644
--- a/vllm/compilation/piecewise_backend.py
+++ b/vllm/compilation/piecewise_backend.py
@@ -11,6 +11,7 @@
 
 import torch._functorch.config
 import torch.fx as fx
+from torch._dynamo.utils import dynamo_timed
 from torch._inductor.runtime.triton_heuristics import CachingAutotuner
 from torch._logging._internal import trace_structured
 
@@ -270,10 +271,12 @@ def compile_all_ranges(self) -> None:
                 compile_range=range_entry.compile_range,
                 graph_index=self.piecewise_compile_index,
                 num_graphs=self.total_piecewise_compiles,
+                is_encoder=self.vllm_backend.is_encoder,
             )
 
             range_entry.compiled = True
 
+    @dynamo_timed("vllm_log_compile_start_torch_trace_only")
     def _log_compile_start(self, compile_range: Range):
         """Log compilation event for TORCH_TRACE/tlparse."""
         is_cudagraph_size = (
@@ -353,12 +356,22 @@ def _find_range_for_shape(self, runtime_shape: int) -> RangeEntry | None:
         return None
 
     def __call__(self, *args: Any) -> Any:
-        runtime_shape = args[self.sym_shape_indices[0]]
-        range_entry = self._find_range_for_shape(runtime_shape)
+        if self.sym_shape_indices:
+            runtime_shape = args[self.sym_shape_indices[0]]
+            range_entry = self._find_range_for_shape(runtime_shape)
+            assert range_entry is not None, (
+                f"Shape: {runtime_shape} out of considered ranges: "
+                f"{self.compile_ranges}"
+            )
+        else:
+            # All inputs have static shapes; use the only compiled range_entry
+            compiled_entries = [re for re in self.range_entries.values() if re.compiled]
+            assert len(compiled_entries) == 1, (
+                f"Expected exactly one compiled range_entry for static shape "
+                f"compilation, but found {len(compiled_entries)}"
+            )
+            range_entry = compiled_entries[0]
 
-        assert range_entry is not None, (
-            f"Shape: {runtime_shape} out of considered ranges: {self.compile_ranges}"
-        )
         assert range_entry.compiled, (
             "All ranges should be compiled or loaded up front in "
             "PiecewiseBackend.__init__. "
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
index d5eb35e210ca..5635fe03ae2f 100644
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -53,12 +53,6 @@ class TorchCompileWithNoGuardsWrapper:
     since we drop all guards.
     """
 
-    def check_invariants_and_forward(self, *args: Any, **kwargs: Any) -> Any:
-        assert hasattr(self, "_check_shape_invariants")
-        self._check_shape_invariants(*args, **kwargs)
-
-        return self.forward(*args, **kwargs)
-
     def _call_with_optional_nvtx_range(
         self, callable_fn: Callable[P, R], *args: P.args, **kwargs: P.kwargs
     ) -> Any:
@@ -115,6 +109,9 @@ def __init__(
                     "compilation_config.dynamic_shapes_config.evaluate_guards "
                     "requires VLLM_USE_BYTECODE_HOOK=0. "
                 )
+                assert ds_type != DynamicShapesType.UNBACKED, (
+                    "UNBACKED dynamic shapes do not add guards"
+                )
 
                 options["guard_filter_fn"] = lambda x: [
                     entry.guard_type == "SHAPE_ENV" for entry in x
@@ -130,18 +127,12 @@ def __init__(
         compiled_ptr: Any = self.forward
         # Validate that unbacked dynamic shapes require VLLM_USE_BYTECODE_HOOK=False
 
-        if ds_type == DynamicShapesType.UNBACKED:
-            # reason is that bytecode does torch._dynamo.eval_frame.
-            # remove_from_cache(self.original_code_object()) to force a new
-            # re-compilation. And if we use
-            # compiled_ptr = self.check_invariants_and_forward
-            # it will reset all entries.
-            assert not envs.VLLM_USE_BYTECODE_HOOK, (
-                "UNBACKED dynamic shapes requires VLLM_USE_BYTECODE_HOOK=0. "
-            )
-            assert not self.evaluate_guards, "UNBACKED dynamic shapes do not add guards"
+        # Apply the constrain_to_fx_strides patch before first compilation.
+        # This covers STOCK_TORCH_COMPILE and DYNAMO_ONCE paths. The VLLM
+        # compile paths call this from their own compile() methods too.
+        from vllm.env_override import _apply_constrain_to_fx_strides_patch
 
-            compiled_ptr = self.check_invariants_and_forward
+        _apply_constrain_to_fx_strides_patch()
 
         aot_context = nullcontext()
         if envs.VLLM_USE_AOT_COMPILE:
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
index d5a3e9bfd960..b189c45c8d7a 100644
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -16,6 +16,7 @@
 from vllm.config.kv_transfer import KVTransferConfig
 from vllm.config.load import LoadConfig
 from vllm.config.lora import LoRAConfig
+from vllm.config.mamba import MambaConfig
 from vllm.config.model import (
     ModelConfig,
     iter_architecture_defaults,
@@ -36,7 +37,7 @@
 from vllm.config.reasoning import ReasoningConfig
 from vllm.config.scheduler import SchedulerConfig
 from vllm.config.speculative import SpeculativeConfig
-from vllm.config.speech_to_text import SpeechToTextConfig
+from vllm.config.speech_to_text import SpeechToTextConfig, SpeechToTextParams
 from vllm.config.structured_outputs import StructuredOutputsConfig
 from vllm.config.utils import (
     ConfigType,
@@ -83,6 +84,8 @@
     "LoadConfig",
     # From vllm.config.lora
     "LoRAConfig",
+    # From vllm.config.mamba
+    "MambaConfig",
     # From vllm.config.model
     "ModelConfig",
     "iter_architecture_defaults",
@@ -110,6 +113,7 @@
     "SpeculativeConfig",
     # From vllm.config.speech_to_text
     "SpeechToTextConfig",
+    "SpeechToTextParams",
     # From vllm.config.structured_outputs
     "StructuredOutputsConfig",
     # From vllm.config.profiler
diff --git a/vllm/config/attention.py b/vllm/config/attention.py
index 1da647a6d6ff..52ce9f102a6c 100644
--- a/vllm/config/attention.py
+++ b/vllm/config/attention.py
@@ -6,6 +6,7 @@
 from pydantic import field_validator
 
 from vllm.config.utils import config
+from vllm.v1.attention.backends.mla.prefill.registry import MLAPrefillBackendEnum
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 
@@ -27,25 +28,52 @@ class AttentionConfig:
     flash_attn_max_num_splits_for_cuda_graph: int = 32
     """Flash Attention max number splits for cuda graph decode."""
 
-    use_cudnn_prefill: bool = False
-    """Whether to use cudnn prefill."""
-
-    use_trtllm_ragged_deepseek_prefill: bool = False
-    """Whether to use TRTLLM ragged deepseek prefill."""
+    tq_max_kv_splits_for_cuda_graph: int = 32
+    """TurboQuant max NUM_KV_SPLITS for cuda graph decode.
+    Fixes the split count so grid dimensions are constant across captures,
+    and buffers can be pre-allocated to avoid inflating the memory estimate."""
 
     use_trtllm_attention: bool | None = None
     """If set to True/False, use or don't use the TRTLLM attention backend
     in flashinfer. If None, auto-detect the attention backend in flashinfer."""
 
-    disable_flashinfer_prefill: bool = True
-    """Whether to disable flashinfer prefill."""
-
     disable_flashinfer_q_quantization: bool = False
     """If set, when using fp8 kv, do not quantize Q to fp8."""
 
+    mla_prefill_backend: MLAPrefillBackendEnum | None = None
+    """MLA prefill backend to use. If None, will be selected automatically.
+    Valid options: FLASH_ATTN (FA3/FA4), FLASHINFER, TRTLLM_RAGGED."""
+
     use_prefill_query_quantization: bool = False
     """If set, quantize query for attention in prefill."""
 
+    use_fp4_indexer_cache: bool = False
+    """If set, use fp4 indexer cache for dsv32 family model (not support yet)"""
+
+    use_non_causal: bool = False
+    """Whether to use non-causal (bidirectional) attention."""
+
+    flex_attn_block_m: int | None = None
+    """Triton kernel BLOCK_M tile size for flex attention.
+    Must be a power of 2 >= 16. If None and VLLM_BATCH_INVARIANT=1,
+    defaults to 16."""
+
+    flex_attn_block_n: int | None = None
+    """Triton kernel BLOCK_N tile size for flex attention.
+    Must be a power of 2 >= 16. If None and VLLM_BATCH_INVARIANT=1,
+    defaults to 16."""
+
+    flex_attn_q_block_size: int | None = None
+    """Logical Q block size for the flex attention block mask.
+    Must be a power of 2 and divisible by flex_attn_block_m.
+    If None, uses the default (16 on PyTorch >= 2.9, 128 otherwise)."""
+
+    flex_attn_kv_block_size: int | None = None
+    """Logical KV block size for the flex attention block mask.
+    Must be a power of 2 and divisible by flex_attn_block_n.
+    If None, uses the default (kv_cache_block_size on PyTorch >= 2.9,
+    128 otherwise)."""
+
     def compute_hash(self) -> str:
         """
         Provide a hash that uniquely identifies all the configs
@@ -73,3 +101,11 @@ def validate_backend_before(cls, value: Any) -> Any:
                 return None
             return AttentionBackendEnum[value.upper()]
         return value
+
+    @field_validator("mla_prefill_backend", mode="before")
+    @classmethod
+    def validate_mla_prefill_backend_before(cls, value: Any) -> Any:
+        """Enable parsing of the `mla_prefill_backend` enum type from string."""
+        if isinstance(value, str):
+            return MLAPrefillBackendEnum[value.upper()]
+        return value
diff --git a/vllm/config/cache.py b/vllm/config/cache.py
index dcc93d987eda..4fa7e1cfcbb9 100644
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -8,6 +8,10 @@
 
 from vllm.config.utils import config
 from vllm.logger import init_logger
+from vllm.utils.torch_utils import (
+    is_quantized_kv_cache,
+    kv_cache_uses_per_token_head_scales,
+)
 
 logger = init_logger(__name__)
 
@@ -20,8 +24,15 @@
     "fp8_e5m2",
     "fp8_inc",
     "fp8_ds_mla",
+    "turboquant_k8v4",
+    "turboquant_4bit_nc",
+    "turboquant_k3v4_nc",
+    "turboquant_3bit_nc",
+    "int8_per_token_head",
+    "fp8_per_token_head",
+    "nvfp4",
 ]
-MambaDType = Literal["auto", "float32", "float16"]
+MambaDType = Literal["auto", "float32", "float16", "bfloat16"]
 MambaCacheMode = Literal["all", "align", "none"]
 PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor", "xxhash", "xxhash_cbor"]
 KVOffloadingBackend = Literal["native", "lmcache"]
@@ -38,10 +49,24 @@ class CacheConfig:
     Accepts None (meaning "use default"). After construction, always int."""
     user_specified_block_size: bool = field(default=False, init=False)
     """Whether block_size was explicitly provided. Derived automatically."""
-    gpu_memory_utilization: float = Field(default=0.9, gt=0, le=1)
+    user_specified_mamba_block_size: bool = field(default=False, init=False)
+    """Whether mamba_block_size was explicitly provided. Derived automatically."""
+    hash_block_size: SkipValidation[int] | None = None  # type: ignore
+    """Block size (in tokens) used for computing Request's block_hashes.
+
+    This can be set to a finer granularity than the physical KV cache block
+    sizes (e.g. 8) as long as every KV cache group's `block_size` is divisible
+    by it. This enables prefix-caching keys to be computed at the finest common
+    granularity and then merged for larger physical block sizes.
+
+    This config is not static default. If left unspecified, vLLM will choose a
+    default based on the resolved KV cache groups (typically the smallest KV
+    cache block size when there are multiple groups).
+    """
+    gpu_memory_utilization: float = Field(default=0.92, gt=0, le=1)
     """The fraction of GPU memory to be used for the model executor, which can
     range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
-    utilization. If unspecified, will use the default value of 0.9. This is a
+    utilization. If unspecified, will use the default value of 0.92. This is a
     per-instance limit, and only applies to the current vLLM instance. It does
     not matter if you have another vLLM instance running on the same GPU. For
     example, if you have two vLLM instances running on the same GPU, you can
@@ -89,8 +114,6 @@ class CacheConfig:
     kv_cache_dtype_skip_layers: list[str] = field(default_factory=list)
     """Layer patterns to skip KV cache quantization. Accepts layer indices
     (e.g., '0', '2', '4') or attention type names (e.g., 'sliding_window')."""
-    cpu_kvcache_space_bytes: int | None = None
-    """(CPU backend only) CPU key-value cache space."""
     mamba_page_size_padded: int | None = None
     """ Optional override for mamba page size; used by hybrid mamba/attention
     models to ensure exact alignment with attention page size."""
@@ -109,7 +132,7 @@ class CacheConfig:
     mamba_cache_mode: MambaCacheMode = "none"
     """The cache strategy for Mamba layers.
     - "none": set when prefix caching is disabled.
-    - "all": cache the mamba state of all tokens at position i * block_size. This is 
+    - "all": cache the mamba state of all tokens at position i * block_size. This is
            the default behavior (for models that support it) when prefix caching is
            enabled.
     - "align": only cache the mamba state of the last token of each scheduler step and
@@ -171,9 +194,11 @@ def compute_hash(self) -> str:
             "num_gpu_blocks_override",
             "enable_prefix_caching",
             "prefix_caching_hash_algo",
-            "cpu_kvcache_space_bytes",
+            # Prefix-caching implementation detail (doesn't affect compiled graph).
+            "hash_block_size",
             "mamba_page_size_padded",
             "user_specified_block_size",
+            "user_specified_mamba_block_size",
             "_block_size_resolved",
             # Post-init/derived counters
             "num_gpu_blocks",
@@ -206,6 +231,8 @@ def _apply_block_size_default(self) -> "CacheConfig":
             object.__setattr__(self, "block_size", self.DEFAULT_BLOCK_SIZE)
         else:
             object.__setattr__(self, "user_specified_block_size", True)
+        if self.mamba_block_size is not None:
+            object.__setattr__(self, "user_specified_mamba_block_size", True)
         return self
 
     @field_validator("calculate_kv_scales", mode="after")
@@ -223,11 +250,19 @@ def _warn_deprecated_calculate_kv_scales(cls, calculate_kv_scales: bool) -> bool
     @field_validator("cache_dtype", mode="after")
     @classmethod
     def _validate_cache_dtype(cls, cache_dtype: CacheDType) -> CacheDType:
-        if cache_dtype.startswith("fp8"):
+        if kv_cache_uses_per_token_head_scales(cache_dtype):
+            logger.info(
+                "Using %s data type to store kv cache. It reduces the GPU "
+                "memory footprint and boosts the performance. "
+                "Dynamic per-token-head scales will be computed at runtime.",
+                str(cache_dtype),
+            )
+        elif is_quantized_kv_cache(cache_dtype):
             logger.info(
-                "Using fp8 data type to store kv cache. It reduces the GPU "
+                "Using %s data type to store kv cache. It reduces the GPU "
                 "memory footprint and boosts the performance. "
                 "Meanwhile, it may cause accuracy drop without a proper "
-                "scaling factor."
+                "scaling factor",
+                str(cache_dtype),
             )
         return cache_dtype
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 5b6648908dd6..7b9478035ece 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -26,6 +26,8 @@
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
+    from vllm.v1.attention.backend import AttentionCGSupport
+    from vllm.v1.kv_cache_interface import KVCacheConfig
 else:
     VllmConfig = object
 
@@ -121,7 +123,7 @@ class PassConfig:
     fuse_act_quant: bool = None  # type: ignore[assignment]
     """Fuse the custom SiluMul + quant ops."""
     fuse_attn_quant: bool = None  # type: ignore[assignment]
-    """Fuse the custom attention + quant ops."""
+    """Fuse the custom Attention and MLAAttention + quant ops."""
     eliminate_noops: bool = Field(default=True)
     """Eliminate no-op ops."""
     enable_sp: bool = None  # type: ignore[assignment]
@@ -132,12 +134,18 @@ class PassConfig:
     """Enable async TP."""
     fuse_allreduce_rms: bool = None  # type: ignore[assignment]
     """Enable flashinfer allreduce fusion."""
-    enable_qk_norm_rope_fusion: bool = False
+    fuse_minimax_qk_norm: bool = None  # type: ignore[assignment]
+    """Enable fused allreduce+RMSNorm for MiniMax QK norm."""
+    enable_qk_norm_rope_fusion: bool = None  # type: ignore[assignment]
     """Enable fused Q/K RMSNorm + RoPE pass."""
+    fuse_rope_kvcache_cat_mla: bool = None  # type: ignore[assignment]
+    """Enable fused MLA KV cache update with RoPE."""
 
     # ROCm/AITER specific fusions
     fuse_act_padding: bool = None  # type: ignore[assignment]
     """Fuse the custom RMSNorm + padding ops."""
+    fuse_mla_dual_rms_norm: bool = None  # type: ignore[assignment]
+    """Fuse paired q/kv RMS norms in MLA attention."""
     fuse_rope_kvcache: bool = None  # type: ignore[assignment]
     """Fuse the QK rope + KV cache ops."""
 
@@ -220,7 +228,9 @@ def compute_hash(self) -> str:
         "fuse_gemm_comms",
         "fuse_allreduce_rms",
         "fuse_act_padding",
+        "fuse_mla_dual_rms_norm",
         "fuse_rope_kvcache",
+        "fuse_rope_kvcache_cat_mla",
         mode="wrap",
     )
     @classmethod
@@ -266,12 +276,24 @@ def __post_init__(self) -> None:
                 "The fusion will be disabled."
             )
             self.fuse_act_padding = False
+        if self.fuse_mla_dual_rms_norm and not current_platform.is_rocm():
+            logger.warning_once(
+                "MLA dual RMS norm fusion requires ROCm/AITER. "
+                "The fusion will be disabled."
+            )
+            self.fuse_mla_dual_rms_norm = False
         if self.fuse_rope_kvcache and not current_platform.is_rocm():
             logger.warning_once(
                 "KV cache fusion currently only enabled on ROCm. "
                 "The fusion will be disabled."
             )
             self.fuse_rope_kvcache = False
+        if self.fuse_rope_kvcache_cat_mla and not current_platform.is_cuda_alike():
+            logger.warning_once(
+                "MLA KV cache update with RoPE fusion enabled but the "
+                "current platform is not CUDA or ROCm. The fusion will be disabled."
+            )
+            self.fuse_rope_kvcache_cat_mla = False
 
     def log_enabled_passes(self) -> None:
         """
@@ -282,7 +304,7 @@ def log_enabled_passes(self) -> None:
         """
         enabled_fusions = [
             f.name[len("fuse_") :]
-            for f in fields(self)
+            for f in fields(self)  # type: ignore[arg-type]
             if getattr(self, f.name) and f.name.startswith("fuse_")
         ]
 
@@ -466,6 +488,15 @@ class CompilationConfig:
     disabled when running with Inductor: mode>CompilationMode.NONE and
     backend="inductor".
     Inductor generates (fused) Triton kernels for disabled custom ops."""
+
+    ir_enable_torch_wrap: bool = None  # type: ignore[assignment]
+    """If True, enable vllm_ir torch custom op wrapping during the forward pass.
+    When False, torch custom op wrapping is disabled, allowing Dynamo to trace the
+    selected implementation directly or avoiding torch custom op overhead in eager mode.
+    Defaults to True when using Inductor with vllm-compile
+    (backend=="inductor" and mode == VLLM_COMPILE), False otherwise.
+    """
+
     splitting_ops: list[str] | None = None
     """A list of ops to exclude from cudagraphs, used in piecewise compilation.
 
@@ -486,9 +517,10 @@ class CompilationConfig:
     If empty list [], no ops are excluded (suitable for full cudagraphs)."""
     compile_mm_encoder: bool = False
     """Whether or not to compile the multimodal encoder.
-    Currently, this only works for `Qwen2_5_vl` and `mLLaMa4` models
-    on selected platforms. Disabled by default until more models
-    are supported/tested to work."""
+    Currently, this only works for `Qwen2_5_vl` and `mLLaMa4` models on selected
+    platforms. It may also work for models loaded with the Transformers modeling backend
+    if the encoder is compilable. Disabled by default until more models are
+    supported/tested to work."""
 
     # Vision encoder CUDA graph
     cudagraph_mm_encoder: bool = False
@@ -505,13 +537,23 @@ class CompilationConfig:
     User-provided values override auto-inference.
     Example: [2048, 4096, 8192, 13824]"""
 
-    encoder_cudagraph_max_images_per_batch: int = 0
-    """Maximum number of images per batch for encoder CUDA graph capture.
+    encoder_cudagraph_max_vision_items_per_batch: int = 0
+    """Maximum number of images/videos per batch for encoder CUDA graph capture.
     Determines the fixed batch size used during graph capture.
     If 0 (default), auto-inferred as max_budget // min_budget from the
     model's budget range. User-provided positive value overrides
     auto-inference."""
 
+    encoder_cudagraph_max_frames_per_batch: int | None = None
+    """Maximum total video frames per batch for encoder CUDA graph capture.
+    Controls the cu_seqlens buffer size (one entry per attention sequence,
+    i.e. one per video frame).
+    If None (default), auto-inferred as encoder_cudagraph_max_vision_items_per_batch
+    * max_frames_per_video (model-specific value according to processing_info).
+    Positive value overrides auto-inference and applies to all budget levels.
+    If we limit the video count per prompt to `0`, it will also be set to `0`
+    (i.e., fall back to image-only mode)."""
+
     # Inductor capture
     compile_sizes: list[int | str] | None = None
     """Sizes to compile for inductor. In addition
@@ -688,6 +730,8 @@ class CompilationConfig:
     """files that are traced for compilation"""
     compilation_time: float = field(default=0.0, init=False)
     """time taken for compilation"""
+    encoder_compilation_time: float = field(default=0.0, init=False)
+    """time taken for multimodal encoder compilation"""
 
     static_forward_context: dict[str, Any] = field(default_factory=dict, init=False)
     """Per-model forward context
@@ -701,20 +745,20 @@ class CompilationConfig:
     # Attention ops; used for piecewise cudagraphs
     # Use PyTorch operator format: "namespace::name"
     _attention_ops: ClassVar[list[str]] = [
-        "vllm::unified_attention",
         "vllm::unified_attention_with_output",
-        "vllm::unified_mla_attention",
         "vllm::unified_mla_attention_with_output",
         "vllm::mamba_mixer2",
         "vllm::mamba_mixer",
         "vllm::short_conv",
         "vllm::linear_attention",
         "vllm::plamo2_mamba_mixer",
-        "vllm::gdn_attention_core",
+        "vllm::qwen_gdn_attention_core",
+        "vllm::gdn_attention_core_xpu",
         "vllm::olmo_hybrid_gdn_full_forward",
         "vllm::kda_attention",
         "vllm::sparse_attn_indexer",
         "vllm::rocm_aiter_sparse_attn_indexer",
+        "vllm::deepseek_v4_attention",
     ]
 
     def compute_hash(self) -> str:
@@ -736,6 +780,7 @@ def compute_hash(self) -> str:
             "local_cache_dir",
             "traced_files",
             "compilation_time",
+            "encoder_compilation_time",
             "static_forward_context",
             "pass_config",  # handled separately below
             "dynamic_shapes_config",  # handled separately below
@@ -755,6 +800,7 @@ def __repr__(self) -> str:
             "enabled_custom_ops": True,
             "disabled_custom_ops": True,
             "compilation_time": True,
+            "encoder_compilation_time": True,
             "traced_files": True,
             "inductor_compile_config": {
                 "post_grad_custom_post_pass": True,
@@ -830,6 +876,7 @@ def validate_compile_cache_save_format(cls, value: str) -> str:
         "cudagraph_mode",
         "max_cudagraph_capture_size",
         "use_inductor_graph_partition",
+        "ir_enable_torch_wrap",
         mode="wrap",
     )
     @classmethod
@@ -951,12 +998,29 @@ def __post_init__(self) -> None:
         # Validate encoder CUDA graph configuration
         if (
             self.cudagraph_mm_encoder
-            and self.encoder_cudagraph_max_images_per_batch < 0
+            and self.encoder_cudagraph_max_vision_items_per_batch < 0
         ):
             raise ValueError(
-                "encoder_cudagraph_max_images_per_batch must be "
+                "encoder_cudagraph_max_vision_items_per_batch must be "
                 "non-negative (0 = auto-infer)"
             )
+        if (
+            self.cudagraph_mm_encoder
+            and self.encoder_cudagraph_max_frames_per_batch is not None
+            and self.encoder_cudagraph_max_frames_per_batch < 0
+        ):
+            raise ValueError(
+                "encoder_cudagraph_max_frames_per_batch must be "
+                "non-negative (None = auto-infer)"
+            )
+
+        if self.encoder_cudagraph_token_budgets and any(
+            b <= 0 for b in self.encoder_cudagraph_token_budgets
+        ):
+            raise ValueError(
+                f"All encoder_cudagraph_token_budgets must be positive, "
+                f"got {self.encoder_cudagraph_token_budgets}"
+            )
 
         if self.backend == "":
             self.backend = current_platform.get_compile_backend()
@@ -1102,6 +1166,25 @@ def set_splitting_ops_for_v1(
                     self.cudagraph_mode = CUDAGraphMode.FULL
                 self.splitting_ops = []
 
+        if (
+            not self.use_inductor_graph_partition
+            and (self.pass_config.enable_sp or self.pass_config.fuse_gemm_comms)
+            and self.splitting_ops
+        ):
+            logger.warning_once(
+                "Sequence parallelism requires full-graph compilation when "
+                "use_inductor_graph_partition is off. Setting splitting_ops "
+                "to an empty list to preserve SP and async TP."
+            )
+            self.splitting_ops = []
+            if self.cudagraph_mode.has_piecewise_cudagraphs():
+                logger.warning_once(
+                    "Sequence parallelism is incompatible with piecewise "
+                    "cudagraph when use_inductor_graph_partition is off. "
+                    "Setting cudagraph_mode to FULL."
+                )
+                self.cudagraph_mode = CUDAGraphMode.FULL
+
         # Disable CUDA graphs for DeepEP high-throughput since its not CG compatible
         if (
             all2all_backend == "deepep_high_throughput"
@@ -1232,6 +1315,152 @@ def is_custom_op_enabled(self, op: str) -> bool:
         assert "none" in self.custom_ops
         return f"+{op}" in self.custom_ops
 
+    def resolve_cudagraph_mode_and_sizes(
+        self,
+        min_cg_support: "AttentionCGSupport",
+        min_cg_attn_backend: str | None,
+        uniform_decode_query_len: int = 1,
+        tensor_parallel_size: int = 1,
+        kv_cache_config: "KVCacheConfig | None" = None,
+        max_num_reqs: int | None = None,
+        is_profiling: bool = False,
+    ) -> CUDAGraphMode:
+        from vllm.v1.attention.backend import AttentionCGSupport
+
+        cudagraph_mode = self.cudagraph_mode
+        if cudagraph_mode is None or cudagraph_mode == CUDAGraphMode.NONE:
+            self.cudagraph_mode = CUDAGraphMode.NONE
+            return CUDAGraphMode.NONE
+
+        # Check cudagraph for mixed batch is supported
+        if (
+            cudagraph_mode.mixed_mode() == CUDAGraphMode.FULL
+            and min_cg_support != AttentionCGSupport.ALWAYS
+        ):
+            msg = (
+                f"CUDAGraphMode.{cudagraph_mode.name} is not supported "
+                f"with {min_cg_attn_backend} backend (support: "
+                f"{min_cg_support})"
+            )
+            if min_cg_support == AttentionCGSupport.NEVER:
+                # if not supported any full cudagraphs, just raise it.
+                msg += (
+                    "; please try cudagraph_mode=PIECEWISE, and "
+                    "make sure compilation mode is VLLM_COMPILE"
+                )
+                raise ValueError(msg)
+
+            # attempt to resolve the full cudagraph related mode
+            if self.splitting_ops_contain_attention():
+                msg += "; setting cudagraph_mode=FULL_AND_PIECEWISE"
+                cudagraph_mode = CUDAGraphMode.FULL_AND_PIECEWISE
+            else:
+                msg += "; setting cudagraph_mode=FULL_DECODE_ONLY"
+                cudagraph_mode = CUDAGraphMode.FULL_DECODE_ONLY
+            logger.warning(msg)
+
+        # check that if we are doing decode full-cudagraphs it is supported
+        if (
+            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
+            and min_cg_support == AttentionCGSupport.NEVER
+        ):
+            msg = (
+                f"CUDAGraphMode.{cudagraph_mode.name} is not supported "
+                f"with {min_cg_attn_backend} backend (support: "
+                f"{min_cg_support})"
+            )
+            if self.mode == CompilationMode.VLLM_COMPILE and (
+                self.splitting_ops_contain_attention()
+                or self.use_inductor_graph_partition
+            ):
+                msg += (
+                    "; setting cudagraph_mode=PIECEWISE because "
+                    "attention is compiled piecewise"
+                )
+                cudagraph_mode = CUDAGraphMode.PIECEWISE
+            else:
+                msg += (
+                    "; setting cudagraph_mode=NONE because "
+                    "attention is not compiled piecewise"
+                )
+                cudagraph_mode = CUDAGraphMode.NONE
+            logger.warning(msg)
+
+        # check that if we are doing spec-decode + decode full-cudagraphs it is
+        # supported
+        if (
+            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
+            and uniform_decode_query_len > 1
+            and min_cg_support.value < AttentionCGSupport.UNIFORM_BATCH.value
+        ):
+            msg = (
+                f"CUDAGraphMode.{cudagraph_mode.name} is not supported"
+                f" with spec-decode for attention backend "
+                f"{min_cg_attn_backend} (support: {min_cg_support})"
+            )
+            if self.splitting_ops_contain_attention():
+                msg += "; setting cudagraph_mode=PIECEWISE"
+                cudagraph_mode = CUDAGraphMode.PIECEWISE
+            else:
+                msg += "; setting cudagraph_mode=NONE"
+                cudagraph_mode = CUDAGraphMode.NONE
+            logger.warning(msg)
+
+        # double check that we can support full cudagraph if they are requested
+        # even after automatic downgrades
+        if (
+            cudagraph_mode.has_full_cudagraphs()
+            and min_cg_support == AttentionCGSupport.NEVER
+        ):
+            raise ValueError(
+                f"CUDAGraphMode.{cudagraph_mode.name} is not "
+                f"supported with {min_cg_attn_backend} backend ("
+                f"support:{min_cg_support}) "
+                "; please try cudagraph_mode=PIECEWISE, "
+                "and make sure compilation mode is VLLM_COMPILE"
+            )
+
+        # Adjust cudagraph sizes to be a multiple of uniform_decode_query_len
+        # to avoid: https://github.com/vllm-project/vllm/issues/28207 and temp-fix:
+        # https://github.com/vllm-project/vllm/issues/28207#issuecomment-3504004536
+        # Will be removed in the near future when we have separate cudagraph capture
+        # sizes for decode and mixed prefill-decode.
+        if (
+            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
+            and uniform_decode_query_len > 1
+        ):
+            self.adjust_cudagraph_sizes_for_spec_decode(
+                uniform_decode_query_len,
+                tensor_parallel_size,
+            )
+
+        # For Mamba models with FULL decode cudagraphs, each decode
+        # sequence needs one Mamba cache block. The decode cudagraph
+        # dispatcher already caps batch sizes at max_num_seqs, so we just
+        # need to verify that enough blocks exist. Raising here instead
+        # of silently capping cudagraph_capture_sizes avoids unintended
+        # restrictions on PIECEWISE (prefill) cudagraphs.
+        # See: https://github.com/vllm-project/vllm/issues/34094
+        if (
+            kv_cache_config is not None
+            and max_num_reqs is not None
+            and cudagraph_mode.has_full_cudagraphs()
+            and not is_profiling
+            and kv_cache_config.has_mamba_layers
+            and max_num_reqs > kv_cache_config.num_blocks
+        ):
+            raise ValueError(
+                f"max_num_seqs ({max_num_reqs}) exceeds available Mamba cache "
+                f"blocks ({kv_cache_config.num_blocks}). Each decode sequence "
+                "requires one Mamba cache block, so CUDA graph capture cannot "
+                "proceed. Please lower max_num_seqs to at most "
+                f"{kv_cache_config.num_blocks} or increase "
+                "gpu_memory_utilization."
+            )
+
+        self.cudagraph_mode = cudagraph_mode
+        return cudagraph_mode
+
     def adjust_cudagraph_sizes_for_spec_decode(
         self, uniform_decode_query_len: int, tensor_parallel_size: int
     ):
@@ -1279,58 +1508,6 @@ def adjust_cudagraph_sizes_for_spec_decode(
         self.max_cudagraph_capture_size = rounded_sizes[-1]
         self.cudagraph_capture_sizes = rounded_sizes
 
-    def adjust_cudagraph_sizes_for_mamba_cache(
-        self, num_mamba_cache_blocks: int
-    ) -> None:
-        """Cap cudagraph capture sizes to available Mamba cache blocks.
-
-        For hybrid Mamba/attention models, the Mamba conv_state and
-        ssm_state tensors have their first dimension equal to num_blocks
-        (from KVCacheConfig). During CUDA graph capture the decode batch
-        size equals num_tokens, so capture sizes exceeding num_blocks
-        would cause out-of-bounds access in Mamba kernels.
-
-        See: https://github.com/vllm-project/vllm/issues/34094
-        """
-        if not self.cudagraph_capture_sizes or num_mamba_cache_blocks <= 0:
-            return
-
-        assert self.max_cudagraph_capture_size is not None
-
-        if num_mamba_cache_blocks >= self.max_cudagraph_capture_size:
-            return
-
-        capped_sizes = [
-            s for s in self.cudagraph_capture_sizes if s <= num_mamba_cache_blocks
-        ]
-
-        if len(capped_sizes) == 0:
-            logger.warning(
-                "No valid cudagraph capture sizes remain after capping "
-                "to Mamba cache blocks (%d). The smallest capture size "
-                "was %d. Disabling cudagraph capture. Consider reducing "
-                "max_num_seqs or increasing available GPU memory.",
-                num_mamba_cache_blocks,
-                self.cudagraph_capture_sizes[0],
-            )
-            self.cudagraph_capture_sizes = []
-            self.max_cudagraph_capture_size = 0
-            return
-
-        logger.warning(
-            "Capping cudagraph capture sizes from max %d to %d to fit "
-            "Mamba cache blocks (%d blocks available). This limits the "
-            "maximum batch size that can use CUDA graphs. To increase "
-            "this limit, reduce max_num_seqs or increase available GPU "
-            "memory.",
-            self.max_cudagraph_capture_size,
-            capped_sizes[-1],
-            num_mamba_cache_blocks,
-        )
-
-        self.max_cudagraph_capture_size = capped_sizes[-1]
-        self.cudagraph_capture_sizes = capped_sizes
-
     def get_compile_ranges(self) -> list[Range]:
         """Get the compile ranges for the compilation config."""
         if self.compile_ranges_endpoints is None:
diff --git a/vllm/config/device.py b/vllm/config/device.py
index c20e4d0f288b..2d46dcdb117e 100644
--- a/vllm/config/device.py
+++ b/vllm/config/device.py
@@ -65,8 +65,13 @@ def __post_init__(self):
             elif isinstance(self.device, torch.device):
                 self.device_type = self.device.type
 
-        # Some device types require processing inputs on CPU
-        if self.device_type in ["tpu"]:
+        # Some platforms require processing inputs on CPU.
+        from vllm.platforms import current_platform
+
+        if (
+            current_platform.uses_host_device_handling()
+            and self.device_type == current_platform.device_type
+        ):
             self.device = None
         else:
             # Set device with device type
diff --git a/vllm/config/kernel.py b/vllm/config/kernel.py
index 4476cd125265..c5f44e1563d0 100644
--- a/vllm/config/kernel.py
+++ b/vllm/config/kernel.py
@@ -1,24 +1,157 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
+import contextlib
 from collections.abc import Callable
-from typing import Any, Literal
+from dataclasses import asdict, fields
+from typing import TYPE_CHECKING, Any, Literal
+
+from pydantic import Field, field_validator
+
+from vllm.config.utils import config, get_hash_factors, hash_factors
+from vllm.logger import init_logger
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+
+logger = init_logger(__name__)
+
+
+@config
+class IrOpPriorityConfig:
+    """
+    Configuration for vLLM IR op priority for dispatching/lowering during the
+    forward pass. Each member is a list of strings, which will be installed
+    in worker init via vllm.ir.ops.<op_name>.set_default().
+    A single comma-separated string is accepted as well,
+
+    If specified manually, platform defaults will be appended to the lists.
+    See KernelConfig.set_platform_defaults().
+    """
+
+    rms_norm: list[str] = Field(default_factory=list)
+    """Priority list for vllm.ir.ops.rms_norm"""
+
+    fused_add_rms_norm: list[str] = Field(default_factory=list)
+    """Priority list for vllm.ir.ops.fused_add_rms_norm"""
+
+    def compute_hash(self) -> str:
+        """
+        Produces a hash unique to the pass configuration.
+        Any new fields that affect compilation should be added to the hash.
+        Any future fields that don't affect compilation should be excluded.
+
+        Also, manually add IR op impl UUIDs to make sure they affect the compile cache.
+        """
+        factors = get_hash_factors(self, set())
+
+        # Implementations are hidden from Dynamo,
+        # so they don't show up in the traced files list.
+        from vllm.ir.op import IrOp
+
+        assert "_impls" not in factors
+        factors["_impls"] = {
+            name: {
+                provider: IrOp.registry[name].impls[provider].uuid() for provider in p
+            }
+            for name, p in asdict(self).items()  # type: ignore[call-overload]
+        }
+
+        return hash_factors(factors)
+
+    @field_validator("*", mode="before")
+    @classmethod
+    def _to_list_str(cls, value: str | list[str]):
+        if isinstance(value, str):
+            value = value.replace(" ", "").split(",")
+
+        assert all(isinstance(v, str) for v in value)
+        return value
+
+    def _iter_op_priorities(self):
+        """
+        Yield (IrOp, priority_list) for each field, after importing platform
+        kernels and validating each entry.
+        """
+        from vllm.ir.op import IrOp
+        from vllm.platforms import current_platform
+
+        current_platform.import_ir_kernels()
+
+        for field in fields(self):  # type: ignore[arg-type]
+            op_priority = getattr(self, field.name)
+            assert op_priority is not None, (
+                f"IR op priority for {field.name} must be set"
+            )
+            logger.debug("Setting IR op priority for %s to %s", field.name, op_priority)
+            yield IrOp.registry[field.name], op_priority
+
+    def set_default(self) -> None:
+        """
+        Permanently set the IR op priority for all op members.
+        """
+        for ir_op, op_priority in self._iter_op_priorities():
+            ir_op.set_default(op_priority)
+
+    @contextlib.contextmanager
+    def set_priority(self):
+        """
+        Context manager to set the IR op priority for all op members.
+        It also imports IR kernel implementations for the current platform
+        to ensure all implementations are made available.
+        """
+        with contextlib.ExitStack() as stack:
+            for ir_op, op_priority in self._iter_op_priorities():
+                stack.enter_context(ir_op.set_priority(op_priority))
+            yield
+
+    @classmethod
+    def with_default(
+        cls, default: list[str], /, **kwargs: list[str]
+    ) -> "IrOpPriorityConfig":
+        """
+        A helper to create an IrOpPriorityConfig where fields not specified in kwargs
+        use the given default list.
+        """
+        for field in fields(cls):  # type: ignore[arg-type]
+            if field.name not in kwargs:
+                kwargs[field.name] = list(default)
 
-from pydantic import field_validator
+        return cls(**kwargs)
 
-from vllm.config.utils import config
-from vllm.utils.hashing import safe_hash
 
 MoEBackend = Literal[
     "auto",
     "triton",
     "deep_gemm",
+    "deep_gemm_mega_moe",
     "cutlass",
     "flashinfer_trtllm",
     "flashinfer_cutlass",
     "flashinfer_cutedsl",
+    "flashinfer_b12x",
     "marlin",
+    "humming",
+    "triton_unfused",
     "aiter",
+    "emulation",
+]
+
+LinearBackend = Literal[
+    "auto",
+    "cutlass",
+    "flashinfer_cutlass",
+    "flashinfer_trtllm",
+    "flashinfer_cudnn",
+    "marlin",
+    "triton",
+    "deep_gemm",
+    "torch",
+    "aiter",
+    "machete",
+    "fbgemm",
+    "conch",
+    "exllama",
+    "emulation",
 ]
 
 
@@ -26,6 +159,12 @@
 class KernelConfig:
     """Configuration for kernel selection and warmup behavior."""
 
+    ir_op_priority: IrOpPriorityConfig = Field(default_factory=IrOpPriorityConfig)
+    """
+    vLLM IR op priority for dispatching/lowering during the forward pass.
+    Platform defaults appended automatically during VllmConfig.__post_init__.
+    """
+
     enable_flashinfer_autotune: bool = None  # type: ignore[assignment]
     """If True, run FlashInfer autotuning during kernel warmup."""
 
@@ -35,12 +174,39 @@ class KernelConfig:
     - "auto": Automatically select the best backend based on model and hardware
     - "triton": Use Triton-based fused MoE kernels
     - "deep_gemm": Use DeepGEMM kernels (FP8 block-quantized only)
+    - "deep_gemm_mega_moe": Use DeepGEMM mega MoE kernels
     - "cutlass": Use vLLM CUTLASS kernels
     - "flashinfer_trtllm": Use FlashInfer with TRTLLM-GEN kernels
     - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
     - "flashinfer_cutedsl": Use FlashInfer with CuteDSL kernels (FP4 only)
+    - "flashinfer_b12x": Use FlashInfer CuteDSL fused MoE for SM12x
+      (RTX Pro 6000 / DGX Spark)
     - "marlin": Use Marlin kernels (weight-only quantization)
-    - "aiter": Use AMD AITer kernels (ROCm only)"""
+    - "humming": Use Humming Mixed Precision kernels
+    - "triton_unfused": Use Triton unfused MoE kernels
+    - "aiter": Use AMD AITer kernels (ROCm only)
+    - "emulation": use BF16/FP16 GEMM, dequantizing weights and
+                   running QDQ on activations.
+    """
+
+    linear_backend: LinearBackend = "auto"
+    """Backend for quantized linear layer GEMM kernels. Available options:
+
+    - "auto": Automatically select the best backend based on model and hardware
+    - "cutlass": Use CUTLASS-based kernels
+    - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
+    - "flashinfer_trtllm": Use FlashInfer with TensorRT-LLM kernels
+    - "flashinfer_cudnn": Use FlashInfer with cuDNN kernels
+    - "marlin": Use Marlin kernels
+    - "triton": Use Triton-based kernels
+    - "deep_gemm": Use DeepGEMM kernels
+    - "torch": Use PyTorch native scaled_mm kernels
+    - "aiter": Use AMD AITer kernels (ROCm only)
+    - "machete": Use Machete kernels (mixed-precision)
+    - "fbgemm": Use FBGEMM kernels
+    - "conch": Use Conch mixed-precision kernels
+    - "exllama": Use Exllama mixed-precision kernels
+    - "emulation": Use slow dequant-to-BF16 emulation (for testing only)"""
 
     @field_validator("moe_backend", mode="before")
     @classmethod
@@ -49,23 +215,26 @@ def _normalize_moe_backend(cls, value: Any) -> Any:
             return value.lower().replace("-", "_")
         return value
 
+    @field_validator("linear_backend", mode="before")
+    @classmethod
+    def _normalize_linear_backend(cls, value: Any) -> Any:
+        if isinstance(value, str):
+            return value.lower().replace("-", "_")
+        return value
+
     def compute_hash(self) -> str:
         """
-        WARNING: Whenever a new field is added to this config,
-        ensure that it is included in the factors list if
-        it affects the computation graph.
-
-        Provide a hash that uniquely identifies all the configs
-        that affect the structure of the computation
-        graph from input ids/embeddings to the final hidden states,
-        excluding anything before input ids/embeddings and after
-        the final hidden states.
+        Produces a hash unique to the pass configuration.
+        Any new fields that affect compilation should be added to the hash.
+        Any future fields that don't affect compilation should be excluded.
         """
-        # no factors to consider.
-        # this config will not affect the computation graph.
-        factors: list[Any] = []
-        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
-        return hash_str
+        ignored_factors = {
+            "enable_flashinfer_autotune",
+            "ir_op_priority",  # handled separately below
+        }
+        factors = get_hash_factors(self, ignored_factors)
+        factors["ir_op_priority"] = self.ir_op_priority.compute_hash()
+        return hash_factors(factors)
 
     @field_validator("enable_flashinfer_autotune", mode="wrap")
     @classmethod
@@ -74,3 +243,31 @@ def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
         if value is None:
             return value
         return handler(value)
+
+    def set_platform_defaults(self, vllm_config: "VllmConfig") -> None:
+        """Set platform-specific defaults for the kernel config."""
+        from vllm.platforms import current_platform
+
+        platform_op_priority = current_platform.get_default_ir_op_priority(vllm_config)
+        logger.debug(
+            "Setting platform-specific IR op priority defaults: %s, user-defined: %s",
+            platform_op_priority,
+            self.ir_op_priority,
+        )
+        for op_name, op_priority in asdict(platform_op_priority).items():
+            current_op_priority: list[str] = getattr(self.ir_op_priority, op_name)
+            if current_op_priority is None:
+                setattr(self.ir_op_priority, op_name, op_priority)
+            else:
+                # Append platform-specific priorities
+                # Must be idempotent because vllm_config.set_platform_defaults() may be
+                # called multiple times (due to VllmConfig.__post_init__ manual call).
+                unique_op_priority = [
+                    op for op in op_priority if op not in current_op_priority
+                ]
+                current_op_priority.extend(unique_op_priority)
+
+        logger.info(
+            "Final IR op priority after setting platform defaults: %s",
+            self.ir_op_priority,
+        )
diff --git a/vllm/config/load.py b/vllm/config/load.py
index 93240ec5fc0f..90d906dafb91 100644
--- a/vllm/config/load.py
+++ b/vllm/config/load.py
@@ -9,6 +9,9 @@
 from vllm.logger import init_logger
 from vllm.utils.hashing import safe_hash
 
+DEFAULT_SAFETENSORS_PREFETCH_NUM_THREADS = 8
+DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE = 16 * 1024 * 1024
+
 if TYPE_CHECKING:
     from vllm.model_executor.model_loader import LoadFormats
     from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
@@ -52,6 +55,7 @@ class LoadConfig:
       https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).
     - "mistral" will load weights from consolidated safetensors files used by
       Mistral models.
+    - "modelexpress" will load weights using ModelExpress.
     - Other custom values can be supported via plugins.
     """
     download_dir: str | None = None
@@ -79,6 +83,15 @@ class LoadConfig:
       was quantized using torchao and saved using safetensors.
       Needs `torchao >= 0.14.0`.
     """
+    safetensors_prefetch_num_threads: int = Field(
+        default=DEFAULT_SAFETENSORS_PREFETCH_NUM_THREADS, ge=1
+    )
+    """Number of worker threads used to prefetch safetensors checkpoint files
+    into the OS page cache when safetensors prefetching is enabled."""
+    safetensors_prefetch_block_size: int = Field(
+        default=DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE, ge=1
+    )
+    """Read size in bytes for each safetensors checkpoint file prefetch."""
     model_loader_extra_config: dict | TensorizerConfig = Field(default_factory=dict)
     """Extra config for model loader. This will be passed to the model loader
     corresponding to the chosen load_format."""
diff --git a/vllm/config/lora.py b/vllm/config/lora.py
index bfef0efa3df0..94a679941c85 100644
--- a/vllm/config/lora.py
+++ b/vllm/config/lora.py
@@ -7,8 +7,10 @@
 from pydantic import ConfigDict, Field, model_validator
 from typing_extensions import Self
 
+from vllm import envs
 from vllm.config.utils import config
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 from vllm.utils.hashing import safe_hash
 
 if TYPE_CHECKING:
@@ -69,6 +71,12 @@ class LoRAConfig:
     for variable LoRA usage patterns at the cost of increased startup time and
     memory usage. Only takes effect when cudagraph_specialize_lora is True.
     """
+    enable_mixed_moe_lora_format: bool = False
+    """If True, force the engine to use the universal 2D MoE LoRA wrapper
+    (`FusedMoEWithLoRA`) regardless of the model's `is_3d_moe_weight` flag, so
+    that 2D-format and 3D-format MoE LoRA adapters can be served in the same
+    deployment. Only meaningful forMoE models; ignored otherwise. Default False 
+    keeps the existing model-driven behavior."""
 
     def compute_hash(self) -> str:
         """
@@ -88,6 +96,7 @@ def compute_hash(self) -> str:
         factors.append(self.fully_sharded_loras)
         factors.append(self.lora_dtype)
         factors.append(self.enable_tower_connector_lora)
+        factors.append(self.enable_mixed_moe_lora_format)
         # target_modules affects which modules get LoRA applied
         factors.append(
             tuple(sorted(self.target_modules)) if self.target_modules else None
@@ -105,7 +114,14 @@ def _validate_lora_config(self) -> Self:
                 f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
                 f"max_loras ({self.max_loras})."
             )
-
+        if envs.VLLM_LORA_ENABLE_DUAL_STREAM and not current_platform.is_cuda_alike():
+            raise ValueError("Dual CUDA streams are only supported on CUDA platforms.")
+        if envs.VLLM_LORA_ENABLE_DUAL_STREAM and self.fully_sharded_loras:
+            logger.warning_once(
+                "fully_sharded_loras isn't compatible with "
+                "VLLM_LORA_ENABLE_DUAL_STREAM, set VLLM_LORA_ENABLE_DUAL_STREAM=False"
+            )
+            envs.VLLM_LORA_ENABLE_DUAL_STREAM = False
         return self
 
     def verify_with_model_config(self, model_config: ModelConfig):
diff --git a/vllm/config/mamba.py b/vllm/config/mamba.py
new file mode 100644
index 000000000000..996478c36760
--- /dev/null
+++ b/vllm/config/mamba.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from enum import Enum, EnumMeta
+from typing import Any
+
+from pydantic import field_validator
+
+from vllm.config.utils import config
+
+
+class _MambaBackendEnumMeta(EnumMeta):
+    """Metaclass for MambaBackendEnum to provide better error messages."""
+
+    def __getitem__(cls, name: str):
+        try:
+            return super().__getitem__(name)
+        except KeyError:
+            valid = ", ".join(cls.__members__.keys())
+            raise ValueError(
+                f"Unknown Mamba SSU backend: '{name}'. Valid options are: {valid}"
+            ) from None
+
+
+class MambaBackendEnum(Enum, metaclass=_MambaBackendEnumMeta):
+    """Enumeration of supported Mamba SSU (selective state update) backends."""
+
+    TRITON = "triton"
+    FLASHINFER = "flashinfer"
+
+
+@config
+class MambaConfig:
+    """Configuration for Mamba SSM backends."""
+
+    backend: MambaBackendEnum = MambaBackendEnum.TRITON
+    """Mamba SSU backend to use."""
+
+    enable_stochastic_rounding: bool = False
+    """Enable stochastic rounding when writing SSM state to fp16 cache.
+    Uses random bits to unbias the rounding error, which can improve
+    numerical stability for long sequences."""
+    stochastic_rounding_philox_rounds: int = 0
+    """Number of Philox PRNG rounds for stochastic rounding random number
+    generation. 0 uses the Triton default. Higher values improve randomness
+    quality at the cost of compute."""
+
+    @field_validator("backend", mode="before")
+    @classmethod
+    def validate_backend_before(cls, value: Any) -> Any:
+        """Enable parsing of the `backend` enum type from string."""
+        if isinstance(value, str):
+            return MambaBackendEnum[value.upper()]
+        return value
+
+    def __post_init__(self):
+        if self.enable_stochastic_rounding:
+            from vllm.platforms import current_platform
+
+            if not current_platform.is_cuda():
+                raise ValueError(
+                    "Stochastic rounding for Mamba cache is only supported "
+                    "on NVIDIA CUDA platforms. Please do not specify  "
+                    "`--enable-mamba-cache-stochastic-rounding`."
+                )
+            if (
+                self.backend == MambaBackendEnum.TRITON
+                and not current_platform.is_device_capability_family(100)
+            ):
+                raise ValueError(
+                    "Stochastic rounding for Mamba cache with triton backend requires "
+                    "compute capability 10.0 (data center Blackwell). The `cvt.rs` "
+                    "PTX instruction is not supported on your GPU. Please do not "
+                    "specify `--enable-mamba-cache-stochastic-rounding`, "
+                    "or set `--mamba-backend flashinfer`."
+                )
diff --git a/vllm/config/model.py b/vllm/config/model.py
index b8c601334a1d..547ec342f967 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -21,6 +21,7 @@
     MultiModalConfig,
 )
 from vllm.config.pooler import PoolerConfig
+from vllm.config.quantization import QuantizationConfigArgs
 from vllm.config.scheduler import RunnerType
 from vllm.config.utils import config, getattr_iter
 from vllm.logger import init_logger
@@ -79,10 +80,20 @@
 
 logger = init_logger(__name__)
 
+
+def is_cumem_allocator_available() -> bool:
+    try:
+        from vllm.device_allocator.cumem import cumem_available
+    except ImportError:
+        return False
+
+    return cumem_available
+
+
 RunnerOption = Literal["auto", RunnerType]
 ConvertType = Literal["none", "embed", "classify"]
 ConvertOption = Literal["auto", ConvertType]
-TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
+TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32", "deepseek_v4"]
 ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
 LogprobsMode = Literal[
     "raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
@@ -133,8 +144,15 @@ class ModelConfig:
     - "slow" will always use the slow tokenizer.
     - "mistral" will always use the tokenizer from `mistral_common`.
     - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.
+    - "deepseek_v4" will always use the tokenizer from `deepseek_v4`.
     - "qwen_vl" will always use the tokenizer from `qwen_vl`.
-    - Other custom values can be supported via plugins."""
+    - Other custom values can be supported via plugins.
+
+    To swap the Rust BPE backend that powers HF fast tokenizers for the
+    [fastokens](https://github.com/crusoecloud/fastokens) implementation, set
+    `VLLM_USE_FASTOKENS=1` instead — that override applies to any mode that
+    loads an HF fast tokenizer (`hf`, `deepseek_v32`, `deepseek_v4`,
+    `qwen_vl`, …)."""
     trust_remote_code: bool = False
     """Trust remote code (e.g., from HuggingFace) when downloading the model
     and tokenizer."""
@@ -199,6 +217,11 @@ class ModelConfig:
     `quantization_config` attribute in the model config file. If that is
     `None`, we assume the model weights are not quantized and use `dtype` to
     determine the data type of the weights."""
+    quantization_config: dict[str, Any] | QuantizationConfigArgs | None = None
+    """User-facing quantization configuration. Carries per-layer-kind specs
+    (linear, moe) and ignore patterns; see :class:`QuantizationConfigArgs`.
+    Auto-populated from the matching online shorthand when `quantization` is
+    one of the values in `ONLINE_QUANT_SHORTHAND_NAMES`."""
     allow_deprecated_quantization: bool = False
     """Whether to allow deprecated quantization methods."""
     enforce_eager: bool = False
@@ -221,6 +244,10 @@ class ModelConfig:
     Processed means the values after applying all processors, including
     temperature and top_k/top_p.
     """
+    use_fp64_gumbel: bool = False
+    """Whether to use FP64 (instead of FP32) for the Gumbel noise used by the
+    sampler. FP64 reduces the chance of ties in Gumbel-max sampling at the cost
+    of significantly lower kernel throughput on most GPUs."""
     disable_sliding_window: bool = False
     """Whether to disable sliding window. If True, we will disable the sliding
     window functionality of the model, capping to sliding window size. If the
@@ -279,6 +306,13 @@ class ModelConfig:
     enable_sleep_mode: bool = False
     """Enable sleep mode for the engine (only cuda and
     hip platforms are supported)."""
+    enable_cumem_allocator: bool = False
+    """Enable the custom cumem allocator to leverage advanced GPU memory
+    allocation features such as multi-node NVLink support.
+
+    Sleep mode automatically enables this allocator. Only cuda and hip
+    platforms are supported.
+    """
     model_impl: str | ModelImpl = "auto"
     """Which implementation of the model to use:
 
@@ -296,9 +330,15 @@ class ModelConfig:
     io_processor_plugin: str | None = None
     """IOProcessor plugin name to load at model startup"""
     renderer_num_workers: int = 1
-    """Number of worker threads in the renderer thread pool. This pool
-    handles async tokenization, chat template rendering, and multimodal
-    preprocessing."""
+    """Number of worker threads in the renderer thread pool. The pool is
+    consumed by the async renderer path (e.g. the OpenAI-compatible API
+    server started by `vllm serve`) to parallelize tokenization, chat
+    template rendering, and multimodal preprocessing across concurrent
+    requests.
+
+    The offline `LLM` entrypoint uses the synchronous renderer path and
+    processes prompts (including multimodal preprocessing) serially, so
+    this setting has no effect there."""
 
     # Pooler config
     pooler_config: PoolerConfig | None = None
@@ -320,6 +360,10 @@ class ModelConfig:
     mm_encoder_only: InitVar[bool | None] = None
     mm_encoder_tp_mode: InitVar[MMEncoderTPMode | None] = None
     mm_encoder_attn_backend: InitVar[AttentionBackendEnum | str | None] = None
+    mm_encoder_attn_dtype: InitVar[str | None] = None
+    mm_encoder_fp8_scale_path: InitVar[str | None] = None
+    mm_encoder_fp8_scale_save_path: InitVar[str | None] = None
+    mm_encoder_fp8_scale_save_margin: InitVar[float | None] = None
     interleave_mm_strings: InitVar[bool | None] = None
     skip_mm_profiling: InitVar[bool | None] = None
     video_pruning_rate: InitVar[float | None] = None
@@ -349,6 +393,7 @@ def compute_hash(self) -> str:
             "spec_target_max_model_len",
             "enforce_eager",
             "logprobs_mode",
+            "use_fp64_gumbel",
             "disable_cascade_attn",
             "skip_tokenizer_init",
             "served_model_name",
@@ -441,6 +486,10 @@ def __post_init__(
         mm_encoder_only: bool | None,
         mm_encoder_tp_mode: MMEncoderTPMode | None,
         mm_encoder_attn_backend: AttentionBackendEnum | str | None,
+        mm_encoder_attn_dtype: str | None,
+        mm_encoder_fp8_scale_path: str | None,
+        mm_encoder_fp8_scale_save_path: str | None,
+        mm_encoder_fp8_scale_save_margin: float | None,
         interleave_mm_strings: bool | None,
         skip_mm_profiling: bool | None,
         video_pruning_rate: float | None,
@@ -485,8 +534,16 @@ def __post_init__(
                 stacklevel=2,
             )
 
-        if self.enable_sleep_mode and not current_platform.is_sleep_mode_available():
-            raise ValueError("Sleep mode is not supported on current platform.")
+        if self.enable_sleep_mode:
+            if not current_platform.is_sleep_mode_available():
+                raise ValueError("Sleep mode is not supported on current platform.")
+            if not self.enable_cumem_allocator:
+                logger.info_once(
+                    "Enabling cumem allocator because sleep mode requires it."
+                )
+                self.enable_cumem_allocator = True
+        if self.enable_cumem_allocator and not is_cumem_allocator_available():
+            raise ValueError("cumem allocator is not supported on current platform.")
 
         hf_config = get_config(
             self.hf_config_path or self.model,
@@ -507,6 +564,7 @@ def __post_init__(
         if dict_overrides:
             self._apply_dict_overrides(hf_config, dict_overrides)
         self.hf_text_config = get_hf_text_config(self.hf_config)
+        self.model_arch_config = self.get_model_arch_config()
         self.attention_chunk_size = getattr(
             self.hf_text_config, "attention_chunk_size", None
         )
@@ -514,18 +572,28 @@ def __post_init__(
         self.hf_image_processor_config = get_hf_image_processor_config(
             self.model, hf_token=self.hf_token, revision=self.revision
         )
-        self.model_arch_config = self.get_model_arch_config()
 
         architectures = self.architectures
         registry = self.registry
         is_generative_model = registry.is_text_generation_model(architectures, self)
         is_pooling_model = registry.is_pooling_model(architectures, self)
 
-        self.runner_type = self._get_runner_type(architectures, self.runner)
+        self.runner_type = self._get_runner_type(
+            architectures, self.runner, self.convert
+        )
         self.convert_type = self._get_convert_type(
             architectures, self.runner_type, self.convert
         )
 
+        if (
+            is_pooling_model
+            and not is_generative_model
+            and self.runner_type in ("draft", "generate")
+        ):
+            raise ValueError(
+                f"Embedding models do not support `--runner {self.runner_type}`. "
+                "Use `--runner pooling` or `--runner auto` for embedding models."
+            )
         if self.runner_type == "generate" and not is_generative_model:
             generate_converts = _RUNNER_CONVERTS["generate"]
             if self.convert_type not in generate_converts:
@@ -560,6 +628,8 @@ def __post_init__(
                 self.tokenizer_mode = "qwen_vl"
             elif arch == "DeepseekV32ForCausalLM":
                 self.tokenizer_mode = "deepseek_v32"
+            elif arch == "DeepseekV4ForCausalLM":
+                self.tokenizer_mode = "deepseek_v4"
 
             if self.tokenizer_mode != "auto":
                 logger.info(
@@ -635,6 +705,10 @@ def __post_init__(
                 mm_encoder_only=mm_encoder_only,
                 mm_encoder_tp_mode=mm_encoder_tp_mode,
                 mm_encoder_attn_backend=mm_encoder_attn_backend,
+                mm_encoder_attn_dtype=mm_encoder_attn_dtype,
+                mm_encoder_fp8_scale_path=mm_encoder_fp8_scale_path,
+                mm_encoder_fp8_scale_save_path=mm_encoder_fp8_scale_save_path,
+                mm_encoder_fp8_scale_save_margin=mm_encoder_fp8_scale_save_margin,
                 interleave_mm_strings=interleave_mm_strings,
                 skip_mm_profiling=skip_mm_profiling,
                 video_pruning_rate=video_pruning_rate,
@@ -853,11 +927,15 @@ def _get_runner_type(
         self,
         architectures: list[str],
         runner: RunnerOption,
+        convert: ConvertOption,
     ) -> RunnerType:
         if runner != "auto":
             return runner
 
-        runner_type = self._get_default_runner_type(architectures)
+        if convert in {"auto", "none"}:
+            runner_type = self._get_default_runner_type(architectures)
+        else:
+            runner_type = "pooling"
 
         # Don't log the most common case
         if runner_type != "generate":
@@ -935,6 +1013,8 @@ def _verify_quantization(self) -> None:
             # `override_quantization_method` method) must be checked in order
             # of preference (this is particularly important for GPTQ).
             overrides = [
+                "auto_gptq",
+                "gptq",
                 "gptq_marlin",
                 "awq_marlin",
                 "inc",
@@ -943,12 +1023,18 @@ def _verify_quantization(self) -> None:
                 "modelopt_fp4",
                 "modelopt_mxfp8",
                 "modelopt_mixed",
-                "petit_nvfp4",
                 # Ensure heavy backends are probed last to avoid unnecessary
                 # imports during override detection (e.g., MXFP4 imports Triton)
                 "mxfp4",
+                "gpt_oss_mxfp4",
+                "deepseek_v4_fp8",
                 "cpu_awq",
+                "humming",
+                "gguf",
             ]
+            # if the user specifies humming, we should always use humming
+            if self.quantization == "humming":
+                overrides = ["humming"] + overrides
             quantization_methods = [
                 q for q in supported_quantization if q not in overrides
             ]
@@ -961,7 +1047,7 @@ def _verify_quantization(self) -> None:
             for name in quantization_methods:
                 method = me_quant.get_quantization_config(name)
                 quantization_override = method.override_quantization_method(
-                    quant_cfg, self.quantization
+                    quant_cfg, self.quantization, hf_config=self.hf_config
                 )
                 if quantization_override is not None:
                     # Raise error if the override is not custom (custom would
@@ -1191,21 +1277,9 @@ def get_inputs_embeds_size(self) -> int:
     def is_deepseek_mla(self) -> bool:
         return self.model_arch_config.is_deepseek_mla
 
-    @cached_property
+    @property
     def is_mm_prefix_lm(self) -> bool:
-        """Whether to use bidirectional attention for mm positions."""
-        if hasattr(self.hf_config, "is_mm_prefix_lm"):
-            return bool(self.hf_config.is_mm_prefix_lm)
-        # fallback to list of known models
-        MM_PREFIX_LM_MODELS = (
-            "bagel",
-            "gemma3",
-            "molmo2",
-            "paligemma",
-        )
-        if not hasattr(self.hf_config, "model_type"):
-            return False
-        return self.hf_config.model_type in MM_PREFIX_LM_MODELS
+        return self.model_arch_config.is_mm_prefix_lm
 
     def get_head_size(self) -> int:
         return self.model_arch_config.head_size
@@ -1326,7 +1400,7 @@ def get_num_layers_by_block_type(
                 )
             raise AssertionError(f"Unsupported block type: {block_type}")
 
-    def get_mamba_chunk_size(self) -> int | None:
+    def get_mamba_chunk_size(self) -> int:
         """
         Returns the mamba chunk size if it exists
         """
@@ -1337,7 +1411,7 @@ def get_mamba_chunk_size(self) -> int | None:
             chunk_size = getattr(self.hf_text_config, "chunk_size", None)
 
         # Since Mamba1 does not have a chunk notion
-        # we use a default chunk size of 1024.
+        # we use a default chunk size of 2048.
         if chunk_size is None:
             chunk_size = 2048
 
diff --git a/vllm/config/model_arch.py b/vllm/config/model_arch.py
index 24d1baea0a9e..0b99df22b880 100644
--- a/vllm/config/model_arch.py
+++ b/vllm/config/model_arch.py
@@ -53,5 +53,8 @@ class ModelArchitectureConfig:
     is_deepseek_mla: bool
     """Whether the model is a DeepSeek MLA model."""
 
+    is_mm_prefix_lm: bool
+    """Whether the model uses image bidirectional attention."""
+
     derived_max_model_len_and_key: tuple[float, str | None]
     """Derived maximum model length and key from the hf config."""
diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py
index e66511c92ab2..56333b1116c0 100644
--- a/vllm/config/multimodal.py
+++ b/vllm/config/multimodal.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from collections.abc import Mapping
+from pathlib import Path
 from typing import Any, Literal, TypeAlias, TypedDict, final
 
 from pydantic import ConfigDict, Field, field_validator, model_validator
@@ -158,6 +159,24 @@ class MultiModalConfig:
     """Optional override for the multi-modal encoder attention backend when
     using vision transformers. Accepts any value from
     `vllm.v1.attention.backends.registry.AttentionBackendEnum` (e.g. `FLASH_ATTN`)."""
+    mm_encoder_attn_dtype: Literal["fp8"] | None = None
+    """Optional dtype override for ViT encoder attention. Set to `"fp8"` to
+    enable FP8 quantization via the FlashInfer cuDNN backend. When set to
+    `"fp8"` without a scale file, dynamic scaling is used automatically.
+    See docs/features/quantization/fp8_vit_attn.md for details."""
+    mm_encoder_fp8_scale_path: str | None = None
+    """Path to a JSON file containing per-layer FP8 Q/K/V scales for ViT
+    encoder attention. When provided (with `mm_encoder_attn_dtype="fp8"`),
+    static scaling is used. When omitted, dynamic scaling is used."""
+    mm_encoder_fp8_scale_save_path: str | None = None
+    """When set with dynamic FP8 scaling (`mm_encoder_attn_dtype="fp8"`
+    and no `mm_encoder_fp8_scale_path`), saves the calibrated scales to
+    this file after the amax history buffer is full. The saved file can
+    then be used as `mm_encoder_fp8_scale_path` in subsequent runs."""
+    mm_encoder_fp8_scale_save_margin: float = Field(default=1.5, gt=0.0)
+    """Safety margin multiplied onto scales when auto-saving. A value > 1
+    leaves headroom so that inputs with larger activations than the
+    calibration set do not overflow FP8 range. Default 1.5."""
     interleave_mm_strings: bool = False
     """Enable fully interleaved support for multimodal prompts, while using
     --chat-template-content-format=string."""
@@ -233,6 +252,36 @@ def _validate_multimodal_config(self):
                 "'mm_shm_cache_max_object_size_mb' should only be set when "
                 "'mm_processor_cache_type' is 'shm'."
             )
+        # Validate FP8 scale path combinations.
+        if self.mm_encoder_attn_dtype != "fp8" and (
+            self.mm_encoder_fp8_scale_path is not None
+            or self.mm_encoder_fp8_scale_save_path is not None
+        ):
+            raise ValueError(
+                "'mm_encoder_fp8_scale_path' and "
+                "'mm_encoder_fp8_scale_save_path' require "
+                "'mm_encoder_attn_dtype' to be 'fp8'."
+            )
+        if (
+            self.mm_encoder_fp8_scale_path is not None
+            and self.mm_encoder_fp8_scale_save_path is not None
+        ):
+            raise ValueError(
+                "'mm_encoder_fp8_scale_save_path' cannot be used with "
+                "'mm_encoder_fp8_scale_path' (saving requires dynamic scaling)."
+            )
+
+        # Validate file paths exist.
+        if self.mm_encoder_fp8_scale_path is not None:
+            scale_path = Path(self.mm_encoder_fp8_scale_path)
+            if not scale_path.is_file():
+                raise FileNotFoundError(f"FP8 scale file not found: {scale_path}")
+        if self.mm_encoder_fp8_scale_save_path is not None:
+            save_parent = Path(self.mm_encoder_fp8_scale_save_path).parent
+            if not save_parent.is_dir():
+                raise FileNotFoundError(
+                    f"Parent directory for FP8 scale save path not found: {save_parent}"
+                )
         return self
 
     def compute_hash(self) -> str:
@@ -252,6 +301,8 @@ def compute_hash(self) -> str:
             if self.mm_encoder_attn_backend is not None
             else None,
             self.mm_encoder_tp_mode,
+            self.mm_encoder_attn_dtype,
+            self.mm_encoder_fp8_scale_path,
         ]
         hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
         return hash_str
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
index 8afff3af258e..6e8f78e4ee30 100644
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -6,6 +6,7 @@
 from collections.abc import Callable
 from typing import TYPE_CHECKING, Any, Literal, overload
 
+import regex as re
 import torch
 from pydantic import Field, field_validator, model_validator
 from torch.distributed import ProcessGroup, ReduceOp, Store
@@ -16,7 +17,6 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils.network_utils import get_open_ports_list
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 if TYPE_CHECKING:
     from ray.runtime_env import RuntimeEnv
@@ -29,12 +29,14 @@
     Executor = Any
 
 logger = init_logger(__name__)
+_NUMACTL_CPUSET_PATTERN = re.compile(r"^\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*$")
 
 ExpertPlacementStrategy = Literal["linear", "round_robin"]
 DistributedExecutorBackend = Literal["ray", "mp", "uni", "external_launcher"]
 DataParallelBackend = Literal["ray", "mp"]
 EPLBPolicyOption = Literal["default"]
 DCPCommBackend = Literal["ag_rs", "a2a"]
+EPLBCommunicatorBackend = Literal["torch_nccl", "torch_gloo", "nixl", "pynccl"]
 All2AllBackend = Literal[
     "naive",
     "pplx",
@@ -53,9 +55,9 @@
 class EPLBConfig:
     """Configuration for Expert Parallel Load Balancing (EP)."""
 
-    window_size: int = 1000
+    window_size: int = Field(default=1000, gt=0)
     """Window size for expert load recording."""
-    step_interval: int = 3000
+    step_interval: int = Field(default=3000, gt=0)
     """
     Interval for rearranging experts in expert parallelism.
 
@@ -71,7 +73,7 @@ class EPLBConfig:
     Log the balancedness each step of expert parallelism.
     This is turned off by default since it will cause communication overhead.
     """
-    log_balancedness_interval: int = 1
+    log_balancedness_interval: int = Field(default=1, gt=0)
     """
     Interval for logging the balancedness.
     """
@@ -83,6 +85,16 @@ class EPLBConfig:
     policy: EPLBPolicyOption = "default"
     """The policy type for expert parallel load balancing (EPLB)."""
 
+    communicator: EPLBCommunicatorBackend | None = None
+    """
+    Backend for EPLB expert weight communication:
+    - "torch_nccl": Use torch.distributed on the device process group
+    - "torch_gloo": Use torch.distributed gloo with CPU staging
+    - "nixl": Use NIXL/ RIXL with staged send/recv buffers
+    - "pynccl": Use PyNccl send/recv
+    - None: Auto-select backend ("torch_gloo" for async, "torch_nccl" for sync)
+    """
+
     @model_validator(mode="after")
     def _validate_eplb_config(self) -> Self:
         if self.use_async and self.policy != "default":
@@ -123,8 +135,10 @@ class ParallelConfig:
     data_parallel_external_lb: bool = False
     """Whether to use "external" DP LB mode. Applies only to online serving
     and when data_parallel_size > 0. This is useful for a "one-pod-per-rank"
-    wide-EP setup in Kubernetes. Set implicitly when --data-parallel-rank
-    is provided explicitly to vllm serve."""
+    wide-EP setup in Kubernetes. Supported only for MoE deployments; non-MoE
+    models should use independent vLLM instances without --data-parallel-*
+    arguments. Set implicitly when --data-parallel-rank is provided explicitly
+    to vllm serve."""
     data_parallel_hybrid_lb: bool = False
     """Whether to use "hybrid" DP LB mode. Applies only to online serving
     and when data_parallel_size > 0. Enables running an AsyncLLM
@@ -214,14 +228,18 @@ class ParallelConfig:
     distributed_executor_backend: (
         str | DistributedExecutorBackend | type[Executor] | None
     ) = None
-    """Backend to use for distributed model workers, either "ray" or "mp"
+    """
+    Backend to use for distributed model workers, either "ray" or "mp"
     (multiprocessing). If the product of pipeline_parallel_size and tensor_parallel_size
     is less than or equal to the number of GPUs available, "mp" will be used to
     keep processing on a single host. Otherwise, an error will be raised. To use "mp"
     you must also set nnodes, and to use "ray" you must manually set
     distributed_executor_backend to "ray".
 
-    Note that tpu only support Ray for distributed inference."""
+    Note:
+        [TPU](https://docs.vllm.ai/projects/tpu/en/latest/) platform only supports Ray
+        for distributed inference.
+    """
 
     worker_cls: str = "auto"
     """The full name of the worker class to use. If "auto", the worker class
@@ -246,6 +264,27 @@ class is dynamically inherited by the worker class. This is used to inject
     nnodes: int = 1
     """num of nodes for multi-node distributed
     inference when distributed_executor_backend is mp."""
+    numa_bind: bool = False
+    """Enable NUMA binding for GPU worker subprocesses."""
+    numa_bind_nodes: list[int] | None = None
+    """NUMA node to bind each GPU worker to.
+
+    Specify one NUMA node per visible GPU, for example `[0, 0, 1, 1]`
+    for a 4-GPU system with GPUs 0-1 on NUMA node 0 and GPUs 2-3 on
+    NUMA node 1. If unset and `numa_bind=True`, vLLM auto-detects the
+    GPU-to-NUMA topology. The values are passed to `numactl --membind`
+    and `--cpunodebind`, so they must be valid `numactl` NUMA node indices.
+    """
+    numa_bind_cpus: list[str] | None = None
+    """Optional CPU lists to bind each GPU worker to.
+
+    Specify one CPU list per visible GPU, for example
+    `["0-3", "4-7", "8-11", "12-15"]`. When set, vLLM uses
+    `numactl --physcpubind` instead of `--cpunodebind`. This is useful
+    for custom policies such as binding to PCT or other high-frequency cores.
+    Each entry must use `numactl --physcpubind` CPU-list syntax, for example
+    `"0-3"` or `"0,2,4-7"`.
+    """
 
     distributed_timeout_seconds: int | None = None
     """Timeout in seconds for distributed operations (e.g., init_process_group).
@@ -253,6 +292,10 @@ class is dynamically inherited by the worker class. This is used to inject
     timeout parameter. If None, PyTorch's default timeout is used (600s for NCCL).
     Increase this for multi-node setups where model downloads may be slow."""
 
+    cpu_distributed_timeout_seconds: int | None = None
+    """Timeout (in seconds) for cpu communication groups. If None, PyTorch's
+    default timeout is used (1800s for gloo)."""
+
     world_size: int = Field(init=False)
     """world_size is TPxPP, it affects the number of workers we create."""
 
@@ -332,6 +375,43 @@ def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
         """Skip validation if the value is `None` when initialisation is delayed."""
         return None if value is None else handler(value)
 
+    @field_validator("numa_bind_nodes")
+    @classmethod
+    def _validate_numa_bind_nodes(cls, value: list[int] | None) -> list[int] | None:
+        if value is None:
+            return None
+        if not value:
+            raise ValueError("numa_bind_nodes must not be empty.")
+        if any(node < 0 for node in value):
+            raise ValueError("numa_bind_nodes must contain non-negative integers.")
+        return value
+
+    @field_validator("numa_bind_cpus")
+    @classmethod
+    def _validate_numa_bind_cpus(cls, value: list[str] | None) -> list[str] | None:
+        if value is None:
+            return None
+        if not value:
+            raise ValueError("numa_bind_cpus must not be empty.")
+
+        for cpuset in value:
+            if not cpuset:
+                raise ValueError("numa_bind_cpus entries must not be empty.")
+            if not _NUMACTL_CPUSET_PATTERN.fullmatch(cpuset):
+                raise ValueError(
+                    "numa_bind_cpus entries must use numactl CPU list syntax, "
+                    "for example '0-3' or '0,2,4-7'."
+                )
+            for part in cpuset.split(","):
+                if "-" not in part:
+                    continue
+                start_str, end_str = part.split("-", 1)
+                if int(start_str) > int(end_str):
+                    raise ValueError(
+                        f"numa_bind_cpus ranges must be ascending, but got '{cpuset}'."
+                    )
+        return value
+
     @model_validator(mode="after")
     def _validate_parallel_config(self) -> Self:
         if self._api_process_rank >= self._api_process_count:
@@ -360,6 +440,13 @@ def _validate_parallel_config(self) -> Self:
                 "data_parallel_external_lb can only be set when data_parallel_size > 1"
             )
 
+        if not self.numa_bind and (
+            self.numa_bind_nodes is not None or self.numa_bind_cpus is not None
+        ):
+            raise ValueError(
+                "numa_bind_nodes and numa_bind_cpus require numa_bind=True."
+            )
+
         if self.enable_eplb:
             if not current_platform.is_cuda_alike():
                 raise ValueError(
@@ -542,6 +629,18 @@ def use_sequence_parallel_moe(self) -> bool:
             and self.data_parallel_size > 1
         )
 
+    @property
+    def use_batched_dp_moe(self) -> bool:
+        return (
+            self.all2all_backend
+            in (
+                "deepep_low_latency",
+                "nixl_ep",
+            )
+            and self.enable_expert_parallel
+            and self.data_parallel_size > 1
+        )
+
     @property
     def node_rank_within_dp(self) -> int:
         return self.node_rank % self.nnodes_within_dp
@@ -570,6 +669,33 @@ def has_unfinished_dp(dp_group: ProcessGroup, has_unfinished: bool) -> bool:
         aggregated_has_unfinished = bool(tensor.item())
         return aggregated_has_unfinished
 
+    @staticmethod
+    def sync_dp_state(
+        dp_group: ProcessGroup, has_unfinished: bool, pending_pause: bool
+    ) -> tuple[bool, bool]:
+        """Combined all-reduce for DP state synchronization.
+
+        Uses a single SUM all-reduce on a 2-element tensor:
+          [0] = 1 if this rank has unfinished work, else 0.
+                SUM > 0 ≡ logical OR across ranks → any rank has work.
+          [1] = 1 if this rank has a pending pause request, else 0.
+                SUM == dp_size ≡ all ranks reached pause consensus.
+
+        has_unfinished_global is true if any rank has unfinished work,
+        or if some ranks are waiting for a pause consensus.
+
+        Returns:
+            (has_unfinished_global, pause_consensus)
+        """
+        tensor = torch.tensor(
+            [int(has_unfinished), int(pending_pause)], dtype=torch.int32, device="cpu"
+        )
+        torch.distributed.all_reduce(tensor, op=ReduceOp.SUM, group=dp_group)
+        dp_size = dp_group.size()
+        pause_count = tensor[1].item()
+        has_unfinished_global = tensor[0].item() > 0 or pause_count % dp_size != 0
+        return has_unfinished_global, pause_count == dp_size
+
     @staticmethod
     def sync_kv_cache_memory_size(dp_group: ProcessGroup, kv_cache_memory: int) -> int:
         if kv_cache_memory == -1:
@@ -620,6 +746,14 @@ def compute_hash(self):
             "worker_extension_cls",
             "_api_process_count",
             "_api_process_rank",
+            # NUMA binding is per-rank host-side memory locality; it does
+            # not affect collective-communication semantics. When numa_bind
+            # is enabled with auto-detection, each DP rank stores its own
+            # NUMA node in numa_bind_nodes (see vllm/utils/numa_utils.py
+            # `_get_numa_node`), which would otherwise diverge the DP hash.
+            "numa_bind",
+            "numa_bind_nodes",
+            "numa_bind_cpus",
         }
 
         from vllm.config.utils import get_hash_factors, hash_factors
@@ -712,9 +846,9 @@ def __post_init__(self) -> None:
                 backend = "mp"
             elif (
                 current_platform.is_cuda()
-                and cuda_device_count_stateless() < self.world_size
+                and current_platform.device_count() < self.world_size
             ):
-                gpu_count = cuda_device_count_stateless()
+                gpu_count = current_platform.device_count()
                 raise ValueError(
                     f"World size ({self.world_size}) is larger than the number of "
                     f"available GPUs ({gpu_count}) in this node. If this is "
@@ -760,16 +894,24 @@ def __post_init__(self) -> None:
                 "backend is mp, uni or external_launcher."
             )
 
-        if (
-            self.all2all_backend in ("allgather_reducescatter")
-            and self.eplb_config.use_async
-        ):
-            logger.warning(
-                "Async EPLB causes hangs with the '%s' all2all backend. "
-                "Forcing synchronous EPLB.",
-                self.all2all_backend,
-            )
-            self.eplb_config.use_async = False
+        if self.enable_eplb and self.eplb_config.communicator is None:
+            if self.enable_elastic_ep:
+                # Elastic EP requires stateless mode
+                # (torch.distributed.batch_isend_irecv doesn't
+                # support stateless mode), so we use PyNCCL backend
+                self.eplb_config.communicator = "pynccl"
+            else:
+                # Avoid torch_nccl: NCCL is fundamentally incompatible
+                # with async EPLB due to multi-stream conflicts, and
+                # batched isend/irecv hangs under high load.
+                # See https://github.com/pytorch/pytorch/issues/174288
+                # Prefer nixl when available; fall back to torch_gloo.
+                from vllm.distributed.nixl_utils import is_nixl_available
+
+                if is_nixl_available():
+                    self.eplb_config.communicator = "nixl"
+                else:
+                    self.eplb_config.communicator = "torch_gloo"
 
     @property
     def use_ray(self) -> bool:
diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py
index 24368c3494e7..f8eefb7c2ba4 100644
--- a/vllm/config/pooler.py
+++ b/vllm/config/pooler.py
@@ -77,10 +77,31 @@ class PoolerConfig:
     Defaults to None (i.e. set to max_model_len).
     """
 
-    ## for classification models
+    ## for classification models — affine score calibration
+    logit_mean: float | None = None
+    """
+    If provided, subtract this value from classification logits before
+    activation. Used for affine score calibration (Platt scaling):
+    activation((logit - logit_mean) / logit_sigma). Defaults to None.
+    """
+
+    logit_sigma: float | None = None
+    """
+    If provided, divide the classification logits by this value after
+    mean subtraction. Used for affine score calibration (Platt scaling):
+    activation((logit - logit_mean) / logit_sigma). Defaults to None.
+    """
+
+    # Deprecated aliases — will be removed in v0.21
     logit_bias: float | None = None
     """
-    If provided, apply classification logit biases. Defaults to None.
+    Deprecated: Use logit_mean instead. Will be removed in v0.21.
+    """
+
+    logit_scale: float | None = None
+    """
+    Deprecated: Use logit_sigma instead (note: logit_sigma = 1/logit_scale).
+    Will be removed in v0.21.
     """
 
     ## for reward models
@@ -98,6 +119,39 @@ class PoolerConfig:
     """
 
     def __post_init__(self) -> None:
+        # Handle deprecated logit_bias → logit_mean
+        if self.logit_bias is not None:
+            if self.logit_mean is not None:
+                raise ValueError(
+                    "Cannot set both `logit_bias` and `logit_mean`. "
+                    "`logit_bias` is deprecated, use `logit_mean` instead."
+                )
+            logger.warning(
+                "`logit_bias` is deprecated and will be removed in v0.21. "
+                "Use `logit_mean` instead."
+            )
+            self.logit_mean = self.logit_bias
+            self.logit_bias = None
+
+        # Handle deprecated logit_scale → logit_sigma
+        if self.logit_scale is not None:
+            if self.logit_sigma is not None:
+                raise ValueError(
+                    "Cannot set both `logit_scale` and `logit_sigma`. "
+                    "`logit_scale` is deprecated, use `logit_sigma` instead."
+                )
+            logger.warning(
+                "`logit_scale` is deprecated and will be removed in v0.21. "
+                "Use `logit_sigma` instead (logit_sigma = 1/logit_scale)."
+            )
+            if self.logit_scale == 0:
+                raise ValueError("logit_scale cannot be 0 (division by zero)")
+            self.logit_sigma = 1.0 / self.logit_scale
+            self.logit_scale = None
+
+        if self.logit_sigma is not None and self.logit_sigma == 0:
+            raise ValueError("logit_sigma cannot be 0 (division by zero)")
+
         if pooling_type := self.pooling_type:
             if self.seq_pooling_type is not None:
                 raise ValueError(
diff --git a/vllm/config/quantization.py b/vllm/config/quantization.py
new file mode 100644
index 000000000000..b726d4ac239c
--- /dev/null
+++ b/vllm/config/quantization.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Annotated, Any
+
+from pydantic import Field, GetPydanticSchema, ValidationInfo, field_validator
+from pydantic_core import core_schema
+
+from vllm.config.utils import config
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8Dynamic128Sym,
+    kFp8DynamicTensorSym,
+    kFp8DynamicTokenSym,
+    kFp8Static128BlockSym,
+    kFp8StaticTensorSym,
+    kInt8StaticChannelSym,
+    kMxfp4Dynamic,
+    kMxfp8Dynamic,
+)
+
+# User-facing names addressable from quantization_config.
+QUANT_KEY_NAMES: dict[str, QuantKey] = {
+    "fp8_per_tensor_static": kFp8StaticTensorSym,
+    "fp8_per_tensor_dynamic": kFp8DynamicTensorSym,
+    "fp8_per_token": kFp8DynamicTokenSym,
+    "fp8_per_block_static": kFp8Static128BlockSym,
+    "fp8_per_block_dynamic": kFp8Dynamic128Sym,
+    "mxfp8": kMxfp8Dynamic,
+    "mxfp4": kMxfp4Dynamic,
+    "int8_per_channel_static": kInt8StaticChannelSym,
+}
+
+
+def _coerce_quant_key(v: Any) -> QuantKey | None:
+    if v is None or isinstance(v, QuantKey):
+        return v
+    if not isinstance(v, str):
+        raise TypeError(f"expected str or QuantKey, got {type(v).__name__}")
+    try:
+        return QUANT_KEY_NAMES[v]
+    except KeyError:
+        raise ValueError(
+            f"unknown quantization name {v!r}; "
+            f"expected one of {sorted(QUANT_KEY_NAMES)}"
+        ) from None
+
+
+# Stop pydantic from introspecting QuantKey: it transitively contains a
+# NamedTuple with `ClassVar[GroupShape]` declarations that pydantic refuses.
+QuantKeyField = Annotated[
+    QuantKey | None,
+    GetPydanticSchema(
+        lambda _src, _handler: core_schema.no_info_plain_validator_function(
+            _coerce_quant_key
+        )
+    ),
+]
+
+
+@config
+class QuantSpec:
+    """Quantization spec for one layer kind (linear or MoE).
+
+    `None` on either side means the method class falls back to its own default
+    (typically inherited from the checkpoint, or unquantized for online).
+    """
+
+    weight: QuantKeyField = None
+    """Weight quantization key, or a name from QUANT_KEY_NAMES."""
+
+    activation: QuantKeyField = None
+    """Activation quantization key, or a name from QUANT_KEY_NAMES."""
+
+
+@config
+class QuantizationConfigArgs:
+    """User-facing quantization configuration.
+
+    See `docs/features/quantization/online.md` for the schema and shorthand
+    string forms accepted on `linear` and `moe`.
+    """
+
+    linear: QuantSpec | None = None
+    """Spec applied to ``LinearBase`` layers."""
+
+    moe: QuantSpec | None = None
+    """Spec applied to ``FusedMoE`` layers."""
+
+    ignore: list[str] = Field(default_factory=list)
+    """Layers to skip quantization for."""
+
+    @field_validator("linear", "moe", mode="before")
+    @classmethod
+    def _coerce_spec(cls, v: Any, info: ValidationInfo) -> Any:
+        if not isinstance(v, str):
+            return v
+        field_name = info.field_name
+        assert field_name is not None
+        if v in _ONLINE_SHORTHANDS:
+            spec = getattr(_ONLINE_SHORTHANDS[v], field_name)
+            if spec is None:
+                raise ValueError(
+                    f"online shorthand {v!r} does not define a {field_name} spec"
+                )
+            return spec
+        return QuantSpec(weight=_coerce_quant_key(v))
+
+
+# CLI shorthands accepted by `--quantization`. Each desugars to a full
+# QuantizationConfigArgs; activation overrides go through quantization_config.
+_ONLINE_SHORTHANDS: dict[str, QuantizationConfigArgs] = {
+    "fp8_per_tensor": QuantizationConfigArgs(
+        linear=QuantSpec(weight=kFp8StaticTensorSym),
+        moe=QuantSpec(weight=kFp8StaticTensorSym),
+    ),
+    "fp8_per_block": QuantizationConfigArgs(
+        linear=QuantSpec(weight=kFp8Static128BlockSym),
+        moe=QuantSpec(weight=kFp8Static128BlockSym),
+    ),
+    "mxfp8": QuantizationConfigArgs(
+        linear=QuantSpec(weight=kMxfp8Dynamic),
+        moe=QuantSpec(weight=kMxfp8Dynamic),
+    ),
+    # INT8 weight-only on MoE; linear stays unquantized (no `linear` field).
+    "int8_per_channel_weight_only": QuantizationConfigArgs(
+        moe=QuantSpec(weight=kInt8StaticChannelSym),
+    ),
+}
+
+
+# Names accepted by `--quantization`; "online" means "use quantization_config".
+ONLINE_QUANT_SHORTHAND_NAMES: tuple[str, ...] = (
+    *_ONLINE_SHORTHANDS.keys(),
+    "online",
+)
+
+
+def resolve_quantization_config(
+    quantization: str | None,
+    quantization_config: dict[str, Any] | QuantizationConfigArgs | None,
+) -> QuantizationConfigArgs | None:
+    """Resolve `--quantization` shorthand and `--quantization-config` into a
+    QuantizationConfigArgs.
+
+    `quantization` is a CLI shorthand that desugars into a base config via
+    `_ONLINE_SHORTHANDS`. `quantization_config` is a dict or pre-built args
+    object. When both are given, fields explicitly set in `quantization_config`
+    take precedence over the shorthand.
+    """
+    if quantization is not None and quantization not in ONLINE_QUANT_SHORTHAND_NAMES:
+        if quantization_config is not None:
+            raise ValueError(
+                f"quantization_config is only supported when quantization is "
+                f"one of {sorted(ONLINE_QUANT_SHORTHAND_NAMES)}, "
+                f"got quantization={quantization!r}"
+            )
+        return None
+
+    base = _ONLINE_SHORTHANDS.get(quantization) if quantization else None
+
+    if quantization_config is None:
+        return base
+
+    if isinstance(quantization_config, dict):
+        quantization_config = QuantizationConfigArgs(**quantization_config)
+
+    if base is None:
+        return quantization_config
+
+    return QuantizationConfigArgs(
+        linear=quantization_config.linear or base.linear,
+        moe=quantization_config.moe or base.moe,
+        ignore=quantization_config.ignore or base.ignore,
+    )
diff --git a/vllm/config/reasoning.py b/vllm/config/reasoning.py
index 872e05580908..ff5546e05ebf 100644
--- a/vllm/config/reasoning.py
+++ b/vllm/config/reasoning.py
@@ -5,6 +5,7 @@
 
 from vllm.config.model import ModelConfig
 from vllm.config.utils import config
+from vllm.reasoning import ReasoningParserManager
 from vllm.tokenizers import cached_tokenizer_from_config
 
 
@@ -12,61 +13,95 @@
 class ReasoningConfig:
     """Configuration for reasoning models.
 
-    Set `think_start_str` and `think_end_str` to the strings that delimit
+    Set `reasoning_start_str` and `reasoning_end_str` to the strings that delimit
     the reasoning block (e.g. `"<think>"` and `"</think>"`).  The
     corresponding token IDs are derived automatically via
     `initialize_token_ids` and are not intended to be set directly.
     """
 
-    # NOTE: These parameters are temporary, the intent is to derive them
-    # automatically from the reasoning parser in a future version.
-    think_start_str: str = "<think>"
+    reasoning_parser: str = ""
+    """The name of the ReasoningParser to use for this model."""
+    reasoning_start_str: str = ""
     """String that indicates the start of reasoning."""
-    think_end_str: str = "</think>"
+    reasoning_end_str: str = ""
     """String that indicates the end of reasoning content."""
 
-    _think_start_token_ids: list[int] | None = field(
+    _reasoning_start_token_ids: list[int] | None = field(
         default=None, init=False, repr=False
     )
-    """Private backing field for `think_start_token_ids`. Set by
+    """Private backing field for `reasoning_start_token_ids`. Set by
     `initialize_token_ids`. Not intended to be configured directly."""
-    _think_end_token_ids: list[int] | None = field(default=None, init=False, repr=False)
-    """Private backing field for `think_end_token_ids`. Set by
+    _reasoning_end_token_ids: list[int] | None = field(
+        default=None, init=False, repr=False
+    )
+    """Private backing field for `reasoning_end_token_ids`. Set by
     `initialize_token_ids`. Not intended to be configured directly."""
 
+    _enabled: bool = field(default=False, init=False, repr=False)
+    """Private field indicating whether reasoning token IDs have been initialized.
+    Set to True by `initialize_token_ids` once token IDs are initialized."""
+
+    @property
+    def enabled(self) -> bool:
+        """Returns True if reasoning is enabled (i.e. if token IDs have been
+        initialized), False otherwise."""
+        return self._enabled
+
     @property
-    def think_start_token_ids(self) -> list[int] | None:
-        """Token IDs derived from `think_start_str`. Set automatically by
+    def reasoning_start_token_ids(self) -> list[int] | None:
+        """Token IDs derived from `reasoning_start_str`. Set automatically by
         `initialize_token_ids`. Not intended to be configured directly."""
-        return self._think_start_token_ids
+        return self._reasoning_start_token_ids
 
     @property
-    def think_end_token_ids(self) -> list[int] | None:
-        """Token IDs derived from `think_end_str`. Set automatically by
+    def reasoning_end_token_ids(self) -> list[int] | None:
+        """Token IDs derived from `reasoning_end_str`. Set automatically by
         `initialize_token_ids`. Not intended to be configured directly."""
-        return self._think_end_token_ids
+        return self._reasoning_end_token_ids
 
     def initialize_token_ids(self, model_config: ModelConfig) -> None:
         """Initialize reasoning token IDs from strings using the tokenizer."""
         if (
-            self._think_start_token_ids is not None
-            and self._think_end_token_ids is not None
+            self._reasoning_start_token_ids is not None
+            and self._reasoning_end_token_ids is not None
         ):
-            return
+            self._enabled = True
+            return  # Already initialized
 
         tokenizer = cached_tokenizer_from_config(model_config=model_config)
+        reasoning_start_str = self.reasoning_start_str
+        reasoning_end_str = self.reasoning_end_str
+        if self.reasoning_parser is not None and (
+            not reasoning_start_str or not reasoning_end_str
+        ):
+            parser_cls = ReasoningParserManager.get_reasoning_parser(
+                self.reasoning_parser
+            )
+            reasoning_parser = parser_cls(tokenizer)
+            start_token = reasoning_parser.reasoning_start_str
+            if start_token and not reasoning_start_str:
+                reasoning_start_str = start_token
 
-        self._think_start_token_ids = tokenizer.encode(
-            self.think_start_str, add_special_tokens=False
+            end_token = reasoning_parser.reasoning_end_str
+            if end_token and not reasoning_end_str:
+                reasoning_end_str = end_token
+
+        if not reasoning_start_str or not reasoning_end_str:
+            # If we don't have valid strings to tokenize,
+            # we can't initialize the token IDs.
+            return
+        self._reasoning_start_token_ids = tokenizer.encode(
+            reasoning_start_str, add_special_tokens=False
         )
-        self._think_end_token_ids = tokenizer.encode(
-            self.think_end_str, add_special_tokens=False
+        self._reasoning_end_token_ids = tokenizer.encode(
+            reasoning_end_str, add_special_tokens=False
         )
 
-        if not self._think_start_token_ids or not self._think_end_token_ids:
+        if not self._reasoning_start_token_ids or not self._reasoning_end_token_ids:
             raise ValueError(
                 f"ReasoningConfig: failed to tokenize reasoning strings: "
-                f"think_start_str='{self.think_start_str}', "
-                f"think_end_str='{self.think_end_str}'. "
+                f"reasoning_start_str='{self.reasoning_start_str}', "
+                f"reasoning_end_str='{self.reasoning_end_str}'. "
                 "Ensure the strings are valid tokens in the model's vocabulary."
             )
+        self._enabled = True
diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py
index 3cd99bb082eb..fb6951ea7dd1 100644
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -40,6 +40,7 @@ class SchedulerConfig:
     """
 
     DEFAULT_MAX_NUM_BATCHED_TOKENS: ClassVar[int] = 2048
+    DEFAULT_MAX_NUM_BATCHED_TOKENS_FOR_BATCHED_DP: ClassVar[int] = 256
     DEFAULT_MAX_NUM_SEQS: ClassVar[int] = 128
 
     runner_type: RunnerType = "generate"
@@ -238,7 +239,6 @@ def __post_init__(self, max_model_len: int, is_encoder_decoder: bool) -> None:
             logger.info_once(
                 "Chunked prefill is enabled with max_num_batched_tokens=%d.",
                 self.max_num_batched_tokens,
-                scope="local",
             )
 
         if self.max_num_partial_prefills > 1:
diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py
index 3757939414dc..47d35f4ff4b5 100644
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -1,11 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import ast
 import copy
 from typing import TYPE_CHECKING, Any, Literal, get_args
 
-from pydantic import Field, SkipValidation, model_validator
+from pydantic import Field, SkipValidation, field_validator, model_validator
 from typing_extensions import Self
 
 from vllm.config import LoadConfig
@@ -17,6 +16,7 @@
 from vllm.transformers_utils.config import get_hf_text_config
 from vllm.utils.hashing import safe_hash
 from vllm.utils.import_utils import LazyLoader, has_arctic_inference
+from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
@@ -34,31 +34,40 @@
 MTPModelTypes = Literal[
     "deepseek_mtp",
     "mimo_mtp",
+    "mimo_v2_mtp",
     "glm4_moe_mtp",
     "glm4_moe_lite_mtp",
     "glm_ocr_mtp",
     "ernie_mtp",
     "nemotron_h_mtp",
     "exaone_moe_mtp",
+    "exaone4_5_mtp",
     "qwen3_next_mtp",
     "qwen3_5_mtp",
     "longcat_flash_mtp",
     "mtp",
     "pangu_ultra_moe_mtp",
     "step3p5_mtp",
+    "hy_v3_mtp",
+    "gemma4_mtp",
 ]
-EagleModelTypes = Literal["eagle", "eagle3", "extract_hidden_states", MTPModelTypes]
 NgramGPUTypes = Literal["ngram_gpu"]
+DFlashModelTypes = Literal["dflash"]
+EagleModelTypes = Literal[
+    "eagle", "eagle3", "extract_hidden_states", MTPModelTypes, DFlashModelTypes
+]
 SpeculativeMethod = Literal[
     "ngram",
     "medusa",
     "mlp_speculator",
     "draft_model",
     "suffix",
+    "custom_class",
     EagleModelTypes,
     NgramGPUTypes,
 ]
-RejectionSampleMethod = Literal["strict", "probabilistic", "synthetic"]
+RejectionSampleMethod = Literal["standard", "synthetic"]
+DraftSampleMethod = Literal["greedy", "probabilistic"]
 
 
 @config
@@ -99,6 +108,10 @@ class SpeculativeConfig:
     inherits the target model's `--moe-backend` setting. Useful when the
     drafter and generator require different MoE kernels (e.g. quantized
     generator with unquantized drafter)."""
+    attention_backend: AttentionBackendEnum | None = None
+    """Attention backend to use for the draft model. When `None`, the backend is
+    automatically selected. Useful when the drafter requires a different attention
+    backend (e.g. DFlash needs a non-causal-capable backend like FLASH_ATTN)."""
     max_model_len: int | None = Field(default=None, ge=1)
     """The maximum model length of the draft model. Used when testing the
     ability to skip speculation for some sequences."""
@@ -132,9 +145,6 @@ class SpeculativeConfig:
     provided. Defaults to 1."""
 
     # Alternative drafting strategies
-    speculative_token_tree: str | None = None
-    """Specifies the tree structure for speculative token generation.
-    """
     parallel_drafting: bool = False
     """Enable parallel drafting, where all speculative tokens are generated
     in parallel rather than sequentially. This can improve performance but
@@ -178,18 +188,78 @@ class SpeculativeConfig:
     """Load config for the draft model. If not specified, will use the load
     config from the target model."""
 
-    rejection_sample_method: RejectionSampleMethod = "strict"
-    """Whether to use strict (target and draft sampled tokens match exactly)
-    or probabilistic rejection sampling. Both respect the target model
-    distribution, but the latter yields a higher acceptance rate at the cost
-    of more memory to cache draft logits."""
+    rejection_sample_method: RejectionSampleMethod = "standard"
+    """The rejection sampling method to use. 'standard' uses probabilistic
+    rejection sampling (with or without cached draft logits, controlled by
+    draft_sample_method). 'synthetic' accepts draft tokens with a decaying
+    probability calibrated to synthetic_acceptance_rate."""
+
+    synthetic_acceptance_rates: list[float] | None = None
+    """Per-position *unconditional* acceptance rates for synthetic rejection
+    sampling. Position i's entry is the marginal probability that the first
+    i+1 draft tokens are all accepted; the list must have length
+    num_speculative_tokens, each entry in [0, 1], and be monotonically
+    non-increasing. Only valid when rejection_sample_method is 'synthetic'.
+    Mutually exclusive with synthetic_acceptance_length."""
+
+    synthetic_acceptance_length: float | None = None
+    """Target mean acceptance length for synthetic rejection sampling, in
+    [1, num_speculative_tokens + 1]. Resolved internally to
+    synthetic_acceptance_rates. Only valid when rejection_sample_method is 'synthetic'.
+    Mutually exclusive with synthetic_acceptance_rates."""
+
+    @staticmethod
+    def _acceptance_length_to_rates(length: float, n: int) -> list[float]:
+        """Mean acceptance length to unconditional per-position rates, using
+        the minimum-variance schedule."""
+        num_drafts = length - 1  # expected number of accepted draft tokens
+        num_full = int(num_drafts)
+        return (
+            [1.0] * num_full + [num_drafts - num_full] + [0.0] * (n - num_full - 1)
+        )[:n]
+
+    @staticmethod
+    def _resolve_synthetic_acceptance_rates(
+        n: int,
+        rates: list[float] | None,
+        length: float | None,
+    ) -> list[float]:
+        """Return per-position unconditional acceptance rates from exactly one
+        of `rates` or `length` (validates range, length, and monotonicity)."""
+        if (rates is None) == (length is None):
+            raise ValueError(
+                "rejection_sample_method='synthetic' requires exactly one of "
+                "synthetic_acceptance_rates or synthetic_acceptance_length."
+            )
+        if rates is not None:
+            if len(rates) != n:
+                raise ValueError(
+                    f"synthetic_acceptance_rates must have length {n}, got {rates}."
+                )
+            if not all(0.0 <= r <= 1.0 for r in rates):
+                raise ValueError(
+                    f"synthetic_acceptance_rates entries must be in [0, 1], "
+                    f"got {rates}."
+                )
+            if any(rates[i] > rates[i - 1] for i in range(1, n)):
+                raise ValueError(
+                    f"synthetic_acceptance_rates must be non-increasing, got {rates}."
+                )
+            return list(rates)
+        assert length is not None
+        if not 1.0 <= length <= float(n + 1):
+            raise ValueError(
+                f"synthetic_acceptance_length must be in [1, {n + 1}], got {length}."
+            )
+        return SpeculativeConfig._acceptance_length_to_rates(length, n)
 
-    synthetic_acceptance_rate: float | None = None
-    """Average acceptance rate for synthetic rejection sampling. Draft
-    tokens are accepted with a position-dependent probability that decays
-    geometrically, calibrated so that the mean rate across all speculative
-    positions equals this value. Only used when rejection_sample_method
-    is 'synthetic'. Must be in [0, 1]."""
+    draft_sample_method: DraftSampleMethod = "greedy"
+    """How the draft model samples tokens. 'greedy' always picks the argmax
+    token, and the draft probabilities are treated as one-hot during rejection
+    sampling. 'probabilistic' samples stochastically from the draft
+    distribution and uses the full draft logits for the probability ratio test
+    during rejection sampling. This comes at the cost of additional GPU memory
+    usage."""
 
     def compute_hash(self) -> str:
         """
@@ -206,7 +276,11 @@ def compute_hash(self) -> str:
         factors: list[Any] = []
         # Eagle3 and extract_hidden_states affect the computation graph because
         # they return intermediate hidden states in addition to the final hidden state.
-        uses_aux_hidden_states = self.method in ("eagle3", "extract_hidden_states")
+        uses_aux_hidden_states = self.method in (
+            "eagle3",
+            "extract_hidden_states",
+            "dflash",
+        )
         factors.append(uses_aux_hidden_states)
 
         # The specific layers used also affect the computation graph
@@ -226,13 +300,23 @@ def compute_hash(self) -> str:
     @staticmethod
     def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
         initial_architecture = hf_config.architectures[0]
-        if hf_config.model_type in ("deepseek_v3", "deepseek_v32", "glm_moe_dsa"):
+        if hf_config.model_type in (
+            "deepseek_v3",
+            "deepseek_v32",
+            "glm_moe_dsa",
+        ):
             hf_config.model_type = "deepseek_mtp"
         if hf_config.model_type == "deepseek_mtp":
             n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
             hf_config.update(
                 {"n_predict": n_predict, "architectures": ["DeepSeekMTPModel"]}
             )
+        if hf_config.model_type == "deepseek_v4":
+            hf_config.model_type = "deepseek_mtp"
+            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
+            hf_config.update(
+                {"n_predict": n_predict, "architectures": ["DeepSeekV4MTPModel"]}
+            )
         if hf_config.model_type in ("pangu_ultra_moe"):
             hf_config.model_type = "pangu_ultra_moe_mtp"
         if hf_config.model_type == "pangu_ultra_moe_mtp":
@@ -252,6 +336,48 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
                 }
             )
 
+        if (arch := hf_config.architectures[0]) in (
+            "MiMoV2ForCausalLM",
+            "MiMoV2OmniForCausalLM",
+        ):
+            from vllm.model_executor.models.mimo_v2_mtp import (
+                _MIMO_V2_PRO_NUM_MTP_LAYERS,
+            )
+
+            mtp_arch_maps = {
+                "MiMoV2ForCausalLM": "MiMoV2MTPModel",
+                "MiMoV2OmniForCausalLM": "MiMoV2OmniMTPModel",
+            }
+
+            hf_config.model_type = "mimo_v2_mtp"
+            # vLLM currently supports only the first MiMo-V2 MTP layer.
+            n_predict = _MIMO_V2_PRO_NUM_MTP_LAYERS
+            hf_config.update(
+                {
+                    "num_hidden_layers": 0,
+                    "n_predict": n_predict,
+                    "num_nextn_predict_layers": n_predict,
+                    "architectures": [mtp_arch_maps[arch]],
+                }
+            )
+
+        if hf_config.architectures[0] == "MiMoV2FlashForCausalLM":
+            from vllm.model_executor.models.mimo_v2_mtp import (
+                _MIMO_V2_FLASH_NUM_MTP_LAYERS,
+            )
+
+            hf_config.model_type = "mimo_v2_mtp"
+            # vLLM currently supports only the first MiMo-V2 MTP layer.
+            n_predict = _MIMO_V2_FLASH_NUM_MTP_LAYERS
+            hf_config.update(
+                {
+                    "num_hidden_layers": 0,
+                    "n_predict": n_predict,
+                    "num_nextn_predict_layers": n_predict,
+                    "architectures": ["MiMoV2MTPModel"],
+                }
+            )
+
         if hf_config.architectures[0] == "Glm4MoeForCausalLM":
             hf_config.model_type = "glm4_moe_mtp"
             n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
@@ -292,6 +418,10 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
                 {"n_predict": n_predict, "architectures": ["ErnieMTPModel"]}
             )
 
+        if hf_config.architectures[0] == "NemotronH_Super_Omni_Reasoning_V3":
+            # Promote VLM's text_config so MTP detection below fires correctly
+            hf_config = hf_config.text_config
+
         if (
             hf_config.model_type in {"nemotron_h", "nemotron_h_puzzle"}
             and hasattr(hf_config, "num_nextn_predict_layers")
@@ -320,7 +450,13 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
             hf_config.update(
                 {"n_predict": n_predict, "architectures": ["ExaoneMoeMTP"]}
             )
-
+        if "exaone4_5" in hf_config.model_type:
+            hf_config.model_type = "exaone4_5_mtp"
+        if hf_config.model_type == "exaone4_5_mtp":
+            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
+            hf_config.update(
+                {"n_predict": n_predict, "architectures": ["Exaone4_5_MTP"]}
+            )
         if hf_config.model_type in ("qwen3_5", "qwen3_5_moe"):
             is_moe = hf_config.model_type == "qwen3_5_moe"
             hf_config.model_type = "qwen3_5_mtp"
@@ -331,6 +467,17 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
                     "architectures": ["Qwen3_5MoeMTP" if is_moe else "Qwen3_5MTP"],
                 }
             )
+        if hf_config.model_type == "intern_s2_preview":
+            text_config = getattr(hf_config, "text_config", None)
+            is_moe = getattr(text_config, "model_type", None) == "qwen3_5_moe_text"
+            hf_config.model_type = "qwen3_5_mtp"
+            n_predict = getattr(text_config, "mtp_num_hidden_layers", None)
+            hf_config.update(
+                {
+                    "n_predict": n_predict,
+                    "architectures": ["Qwen3_5MoeMTP" if is_moe else "Qwen3_5MTP"],
+                }
+            )
         if hf_config.model_type == "longcat_flash":
             hf_config.model_type = "longcat_flash_mtp"
             n_predict = getattr(hf_config, "num_nextn_predict_layers", 1)
@@ -346,6 +493,24 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
         if initial_architecture == "MistralLarge3ForCausalLM":
             hf_config.update({"architectures": ["EagleMistralLarge3ForCausalLM"]})
 
+        if hf_config.model_type == "hy_v3":
+            hf_config.model_type = "hy_v3_mtp"
+            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
+            hf_config.update(
+                {"n_predict": n_predict, "architectures": ["HYV3MTPModel"]}
+            )
+
+        if hf_config.model_type == "gemma4_assistant":
+            hf_config.model_type = "gemma4_mtp"
+            text_config = getattr(hf_config, "text_config", hf_config)
+            # The assistant runs all decoder layers in a single forward
+            # call to produce one draft token, so n_predict=1.
+            # num_kv_shared_layers must be 0: cross-model KV sharing is
+            # set up by the proposer after model construction.
+            if hasattr(text_config, "num_kv_shared_layers"):
+                text_config.num_kv_shared_layers = 0
+            hf_config.update({"n_predict": 1, "architectures": ["Gemma4MTPModel"]})
+
         return hf_config
 
     def __post_init__(self):
@@ -358,7 +523,16 @@ def __post_init__(self):
         # default.
 
         # infer method from user args
-        if self.method is None:
+        # Check if the model field contains a custom module path (e.g., 'pkg.Mod')
+        if (
+            self.model is not None
+            and "." in self.model
+            and not self.model.startswith(("http://", "https://", "file://"))
+            and "/" not in self.model  # not a HuggingFace repo (org/model)
+        ):
+            # Treat as a custom class path
+            self.method = "custom_class"
+        elif self.method is None:
             if self.model in ("ngram", "[ngram]"):
                 self.method = "ngram"
             else:
@@ -392,6 +566,14 @@ def __post_init__(self):
                 self.model = "suffix"
             elif self.method == "extract_hidden_states":
                 self.model = "extract_hidden_states"
+            elif self.method == "custom_class":
+                # method was set explicitly, but model should already contain the
+                # custom module path. If not, this is a configuration error.
+                if self.model is None:
+                    raise ValueError(
+                        "method='custom_class' requires 'model' to contain the "
+                        "custom proposer module path (e.g., 'my_module.MyProposer')."
+                    )
             else:
                 raise ValueError(
                     "num_speculative_tokens was provided but without speculative model."
@@ -435,6 +617,18 @@ def __post_init__(self):
             self.draft_parallel_config = self.target_parallel_config
         elif self.method == "suffix":
             self._validate_suffix_decoding()
+        elif self.method == "custom_class":
+            # Custom class proposer does not need a draft model.
+            # It will dynamically load the user-provided class at runtime.
+            logger.warning_once(
+                "Using a custom class-based proposer backend. This is an "
+                "experimental feature and the proposer interface is subject to "
+                "breaking changes in future vLLM releases."
+            )
+            self.prompt_lookup_max = 0
+            self.prompt_lookup_min = 0
+            self.draft_model_config = self.target_model_config
+            self.draft_parallel_config = self.target_parallel_config
         elif self.method == "extract_hidden_states":
             from vllm.transformers_utils.configs.extract_hidden_states import (
                 ExtractHiddenStatesConfig,
@@ -481,6 +675,7 @@ def __post_init__(self):
                     revision=self.revision,
                     code_revision=self.code_revision,
                     tokenizer_revision=self.target_model_config.tokenizer_revision,
+                    max_model_len=self.max_model_len,  # type: ignore[arg-type]
                     spec_target_max_model_len=self.target_model_config.max_model_len,
                     quantization=self.quantization,
                     enforce_eager=self.target_model_config.enforce_eager,
@@ -490,7 +685,7 @@ def __post_init__(self):
                 )
 
                 # Automatically detect the method
-                if self.method in ("eagle", "eagle3"):
+                if self.method in ("eagle", "eagle3", "dflash"):
                     pass
                 # examples:
                 # yuhuili/EAGLE-LLaMA3-Instruct-8B
@@ -500,6 +695,8 @@ def __post_init__(self):
                     self.method = "eagle"
                 elif "eagle3" in self.draft_model_config.model.lower():
                     self.method = "eagle3"
+                elif "dflash" in self.draft_model_config.model.lower():
+                    self.method = "dflash"
                 elif self.draft_model_config.hf_config.model_type == "medusa":
                     self.method = "medusa"
                 elif self.draft_model_config.hf_config.model_type == "mlp_speculator":
@@ -532,7 +729,7 @@ def __post_init__(self):
                     )
 
                 # Replace hf_config for EAGLE draft_model
-                if self.method in ("eagle", "eagle3"):
+                if self.method in ("eagle", "eagle3", "dflash"):
                     from vllm.transformers_utils.configs.eagle import EAGLEConfig
                     from vllm.transformers_utils.configs.speculators import (
                         SpeculatorsConfig,
@@ -552,6 +749,9 @@ def __post_init__(self):
                         self.draft_model_config.hf_config = eagle_config
                         self.update_arch_()
 
+                if self.method == "dflash":
+                    self.parallel_drafting = True
+
                 if self.num_speculative_tokens is not None and hasattr(
                     self.draft_model_config.hf_config, "num_lookahead_tokens"
                 ):
@@ -576,23 +776,10 @@ def __post_init__(self):
                             f" must be divisible by {n_predict=}"
                         )
 
-                if self.speculative_token_tree is None:
-                    if self.num_speculative_tokens is None:
-                        raise ValueError(
-                            "A speculative model was provided, but neither "
-                            "`speculative_token_tree` nor `num_speculative_tokens` "
-                            "was provided"
-                        )
-
-                    # Generate chain of tokens.
-                    self.speculative_token_tree = str(
-                        [(i + 1) * (0,) for i in range(self.num_speculative_tokens)]
-                    )
-                else:
-                    # Sort the token tree breadth-first.
-                    tree_choices = ast.literal_eval(self.speculative_token_tree)
-                    self.speculative_token_tree = str(
-                        sorted(tree_choices, key=lambda t: (len(t), t))
+                if self.num_speculative_tokens is None:
+                    raise ValueError(
+                        "A speculative model was provided, but "
+                        "`num_speculative_tokens` was not provided"
                     )
 
                 self.draft_tensor_parallel_size = (
@@ -687,10 +874,17 @@ def _maybe_override_draft_max_model_len(
 
             return speculative_max_model_len
 
-        return min(
+        result = min(
             draft_max_model_len,
             target_max_model_len,
         )
+        if result != draft_max_model_len:
+            logger.info(
+                "Overriding draft model max model len from %d to %d",
+                draft_max_model_len,
+                result,
+            )
+        return result
 
     @staticmethod
     def _verify_and_get_draft_tp(
@@ -766,6 +960,15 @@ def create_draft_parallel_config(
 
         return draft_parallel_config
 
+    @field_validator("attention_backend", mode="before")
+    @classmethod
+    def _parse_attention_backend(cls, value: Any) -> Any:
+        if isinstance(value, str):
+            if value.lower() == "auto":
+                return None
+            return AttentionBackendEnum[value.upper()]
+        return value
+
     @model_validator(mode="after")
     def _verify_args(self) -> Self:
         if self.tensor_parallel_size is not None:
@@ -787,37 +990,28 @@ def _verify_args(self) -> Self:
                 f"than zero ({self.num_speculative_tokens})."
             )
 
+        if self.rejection_sample_method == "synthetic":
+            # Consolidate to per-position rates
+            self.synthetic_acceptance_rates = self._resolve_synthetic_acceptance_rates(
+                self.num_speculative_tokens,
+                self.synthetic_acceptance_rates,
+                self.synthetic_acceptance_length,
+            )
+            self.synthetic_acceptance_length = None
+        elif (
+            self.synthetic_acceptance_rates is not None
+            or self.synthetic_acceptance_length is not None
+        ):
+            raise ValueError(
+                "synthetic_acceptance_rates / synthetic_acceptance_length "
+                "are only valid with rejection_sample_method='synthetic'."
+            )
+
         if self.draft_model_config:
             self.draft_model_config.verify_with_parallel_config(
                 self.draft_parallel_config
             )
 
-        aux_hidden_states_supported = [
-            "llama",
-            "qwen",
-            "minicpm",
-            "gpt_oss",
-            "hunyuan_vl",
-            "hunyuan_v1_dense",
-            "afmoe",
-            "nemotron_h",
-            "deepseek_v2",
-            "deepseek_v3",
-            "kimi_k2",
-            "kimi_k25",
-        ]
-        if (
-            self.method in ("eagle3", "extract_hidden_states")
-            and self.target_model_config
-            and not any(
-                supported_model in self.target_model_config.hf_text_config.model_type
-                for supported_model in aux_hidden_states_supported
-            )
-        ):
-            raise ValueError(
-                f"{self.method} is only supported for {aux_hidden_states_supported}"
-                f" models. Got {self.target_model_config.hf_text_config.model_type=}"
-            )
         self.verify_equal_vocab_size_if_draft_model()
         return self
 
@@ -854,8 +1048,19 @@ def max_num_new_slots_for_drafting(self) -> int:
             slots_per_req += 1
         return slots_per_req
 
+    def use_gemma4_mtp(self) -> bool:
+        return (
+            self.method == "mtp"
+            and self.draft_model_config is not None
+            and getattr(self.draft_model_config.hf_config, "model_type", None)
+            == "gemma4_mtp"
+        )
+
     def use_eagle(self) -> bool:
-        return self.method in ("eagle", "eagle3", "mtp")
+        return self.method in ("eagle", "eagle3", "mtp", "dflash")
+
+    def use_dflash(self) -> bool:
+        return self.method == "dflash"
 
     def uses_draft_model(self) -> bool:
         return self.method == "draft_model"
@@ -870,7 +1075,13 @@ def __repr__(self) -> str:
         method = self.method
         model = (
             None
-            if method in ("ngram", "suffix", "extract_hidden_states")
+            if method
+            in (
+                "ngram",
+                "suffix",
+                "extract_hidden_states",
+                "custom_class",
+            )
             else self.draft_model_config.model
         )
         num_spec_tokens = self.num_speculative_tokens
diff --git a/vllm/config/speech_to_text.py b/vllm/config/speech_to_text.py
index e0d72eb203af..6d31713c8667 100644
--- a/vllm/config/speech_to_text.py
+++ b/vllm/config/speech_to_text.py
@@ -1,9 +1,55 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from __future__ import annotations
 
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
 
 from vllm.config.utils import config
 
+if TYPE_CHECKING:
+    import numpy as np
+
+    from vllm.config.model import ModelConfig
+
+
+@dataclass
+class SpeechToTextParams:
+    """All parameters consumed by ``get_generation_prompt()``.
+
+    ``TranscriptionRequest.build_stt_params()`` constructs this object,
+    mapping API-level fields into typed attributes.  Models only receive
+    this object, so new parameters can be added here without changing the
+    ``get_generation_prompt`` signature.
+    """
+
+    audio: np.ndarray
+    """Resampled audio waveform for a single chunk."""
+
+    stt_config: SpeechToTextConfig
+    """Server-level speech-to-text configuration."""
+
+    model_config: ModelConfig
+    """Model configuration."""
+
+    language: str | None = None
+    """ISO 639-1 language code (validated / auto-detected)."""
+
+    hotwords: str | None = None
+    """
+    hotwords refers to a list of important words or phrases that the model
+    should pay extra attention to during transcription.
+    """
+
+    task_type: str = "transcribe"
+    """``"transcribe"`` or ``"translate"``."""
+
+    request_prompt: str = ""
+    """Optional text prompt to guide the model."""
+
+    to_language: str | None = None
+    """Target language for translation (model-dependent)."""
+
 
 @config
 class SpeechToTextConfig:
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index b6be7f10bdb0..f009dd6f154e 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -8,6 +8,7 @@
 import tempfile
 import threading
 import time
+from collections.abc import Iterable
 from contextlib import contextmanager
 from dataclasses import is_dataclass
 from datetime import datetime
@@ -24,6 +25,7 @@
 import vllm.envs as envs
 from vllm.logger import enable_trace_function_call, init_logger
 from vllm.transformers_utils.runai_utils import is_runai_obj_uri
+from vllm.triton_utils import HAS_TRITON
 from vllm.utils import random_uuid
 from vllm.utils.hashing import safe_hash
 
@@ -37,6 +39,7 @@
 from .kv_transfer import KVTransferConfig
 from .load import LoadConfig
 from .lora import LoRAConfig
+from .mamba import MambaConfig
 from .model import ModelConfig
 from .observability import ObservabilityConfig
 from .offload import OffloadConfig
@@ -63,6 +66,8 @@
 
 logger = init_logger(__name__)
 
+DEFAULT_V2_MODEL_RUNNER_ARCHITECTURES = frozenset({"Qwen3ForCausalLM"})
+
 
 class OptimizationLevel(IntEnum):
     """Optimization level enum."""
@@ -95,9 +100,11 @@ def enable_norm_fusion(cfg: "VllmConfig") -> bool:
     """Enable if either RMS norm or quant FP8 custom op is active;
     otherwise Inductor handles fusion."""
 
-    return cfg.compilation_config.is_custom_op_enabled(
-        "rms_norm"
-    ) or cfg.compilation_config.is_custom_op_enabled("quant_fp8")
+    return (
+        cfg.compilation_config.is_custom_op_enabled("rms_norm")
+        or cfg.compilation_config.is_custom_op_enabled("quant_fp8")
+        or cfg.kernel_config.ir_op_priority.rms_norm[0] != "native"
+    )
 
 
 def enable_act_fusion(cfg: "VllmConfig") -> bool:
@@ -118,6 +125,13 @@ def enable_allreduce_rms_fusion(cfg: "VllmConfig") -> bool:
     from vllm.platforms import current_platform
     from vllm.utils.flashinfer import has_flashinfer
 
+    if current_platform.is_rocm():
+        from vllm._aiter_ops import rocm_aiter_ops
+
+        return (
+            rocm_aiter_ops.is_enabled() and cfg.parallel_config.tensor_parallel_size > 1
+        )
+
     return (
         cfg.parallel_config.tensor_parallel_size > 1
         and current_platform.is_cuda()
@@ -126,12 +140,6 @@ def enable_allreduce_rms_fusion(cfg: "VllmConfig") -> bool:
             current_platform.is_device_capability_family(100)
             or current_platform.is_device_capability(90)
         )
-        # tp-dp combination broken:
-        # https://github.com/vllm-project/vllm/issues/34458
-        and cfg.parallel_config.data_parallel_size == 1
-        # tp-pp combination broken:
-        # https://github.com/vllm-project/vllm/issues/35426
-        and cfg.parallel_config.pipeline_parallel_size == 1
     )
 
 
@@ -151,17 +159,32 @@ def enable_rope_kvcache_fusion(cfg: "VllmConfig") -> bool:
     )
 
 
+def enable_rope_kvcache_mla_fusion(cfg: "VllmConfig") -> bool:
+    """Enable if use_inductor_graph_partition is enabled."""
+
+    return (
+        cfg.compilation_config.use_inductor_graph_partition
+        or not cfg.compilation_config.splitting_ops_contain_kv_cache_update()
+    )
+
+
 def enable_norm_pad_fusion(cfg: "VllmConfig") -> bool:
     """Enable if using AITER RMSNorm and hidden size is 2880 i.e. gpt-oss."""
-    from vllm._aiter_ops import rocm_aiter_ops
 
     return (
-        rocm_aiter_ops.is_rmsnorm_enabled()
+        cfg.kernel_config.ir_op_priority.fused_add_rms_norm[0] == "aiter"
         and cfg.model_config is not None
         and cfg.model_config.get_hidden_size() == 2880
     )
 
 
+def enable_mla_dual_rms_norm_fusion(cfg: "VllmConfig") -> bool:
+    """Enable MLA dual RMS norm fusion when AITer has fused_qk_rmsnorm."""
+    from vllm._aiter_ops import check_aiter_fused_qk_rmsnorm, rocm_aiter_ops
+
+    return rocm_aiter_ops.is_enabled() and check_aiter_fused_qk_rmsnorm()
+
+
 OPTIMIZATION_LEVEL_00 = {
     "compilation_config": {
         "pass_config": {
@@ -172,7 +195,9 @@ def enable_norm_pad_fusion(cfg: "VllmConfig") -> bool:
             "enable_sp": False,
             "fuse_gemm_comms": False,
             "fuse_act_padding": False,
+            "fuse_mla_dual_rms_norm": False,
             "fuse_rope_kvcache": False,
+            "fuse_rope_kvcache_cat_mla": False,
         },
         "cudagraph_mode": CUDAGraphMode.NONE,
         "use_inductor_graph_partition": False,
@@ -191,7 +216,9 @@ def enable_norm_pad_fusion(cfg: "VllmConfig") -> bool:
             "enable_sp": False,
             "fuse_gemm_comms": False,
             "fuse_act_padding": enable_norm_pad_fusion,
+            "fuse_mla_dual_rms_norm": enable_mla_dual_rms_norm_fusion,
             "fuse_rope_kvcache": False,
+            "fuse_rope_kvcache_cat_mla": False,
         },
         "cudagraph_mode": CUDAGraphMode.PIECEWISE,
         "use_inductor_graph_partition": False,
@@ -210,7 +237,9 @@ def enable_norm_pad_fusion(cfg: "VllmConfig") -> bool:
             "enable_sp": IS_DENSE,
             "fuse_gemm_comms": IS_DENSE,
             "fuse_act_padding": enable_norm_pad_fusion,
+            "fuse_mla_dual_rms_norm": enable_mla_dual_rms_norm_fusion,
             "fuse_rope_kvcache": enable_rope_kvcache_fusion,
+            "fuse_rope_kvcache_cat_mla": enable_rope_kvcache_mla_fusion,
         },
         "cudagraph_mode": CUDAGraphMode.FULL_AND_PIECEWISE,
         "use_inductor_graph_partition": False,
@@ -229,7 +258,9 @@ def enable_norm_pad_fusion(cfg: "VllmConfig") -> bool:
             "enable_sp": IS_DENSE,
             "fuse_gemm_comms": IS_DENSE,
             "fuse_act_padding": enable_norm_pad_fusion,
+            "fuse_mla_dual_rms_norm": enable_mla_dual_rms_norm_fusion,
             "fuse_rope_kvcache": enable_rope_kvcache_fusion,
+            "fuse_rope_kvcache_cat_mla": enable_rope_kvcache_mla_fusion,
         },
         "cudagraph_mode": CUDAGraphMode.FULL_AND_PIECEWISE,
         "use_inductor_graph_partition": False,
@@ -273,6 +304,8 @@ class VllmConfig:
     """Model weight offloading configuration."""
     attention_config: AttentionConfig = Field(default_factory=AttentionConfig)
     """Attention configuration."""
+    mamba_config: MambaConfig = Field(default_factory=MambaConfig)
+    """Mamba configuration."""
     kernel_config: KernelConfig = Field(default_factory=KernelConfig)
     """Kernel configuration."""
     lora_config: LoRAConfig | None = None
@@ -417,6 +450,10 @@ def compute_hash(self) -> str:
             vllm_factors.append(self.compilation_config.compute_hash())
         else:
             vllm_factors.append("None")
+        if self.kernel_config:
+            vllm_factors.append(self.kernel_config.compute_hash())
+        else:
+            vllm_factors.append(None)
         if self.kv_transfer_config:
             vllm_factors.append(self.kv_transfer_config.compute_hash())
         else:
@@ -452,6 +489,48 @@ def num_speculative_tokens(self) -> int:
             return self.speculative_config.num_speculative_tokens
         return 0
 
+    @property
+    def use_v2_model_runner(self) -> bool:
+        use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER
+        if use_v2_model_runner is not None:
+            return use_v2_model_runner
+
+        if not self._is_default_v2_model_runner_model():
+            return False
+
+        if not HAS_TRITON:
+            logger.warning_once(
+                "Model runner v2 requires Triton; using the v1 model runner instead."
+            )
+            return False
+
+        unsupported = self._get_v2_model_runner_unsupported_features()
+        if unsupported:
+            logger.warning_once(
+                "Model runner v2 does not yet support %s; using the v1 model "
+                "runner instead.",
+                ", ".join(unsupported),
+            )
+            return False
+
+        return True
+
+    def _is_default_v2_model_runner_model(self) -> bool:
+        model_config = self.model_config
+        if model_config is None:
+            return False
+
+        if model_config.runner_type != "generate":
+            return False
+
+        architectures = getattr(model_config, "architectures", [])
+        if not any(
+            arch in DEFAULT_V2_MODEL_RUNNER_ARCHITECTURES for arch in architectures
+        ):
+            return False
+
+        return not model_config.is_moe and not model_config.is_quantized
+
     @property
     def needs_dp_coordinator(self) -> bool:
         """
@@ -526,7 +605,10 @@ def _get_quantization_config(
                     f"method {model_config.quantization}. Supported dtypes: "
                     f"{supported_dtypes}"
                 )
-            quant_config.maybe_update_config(model_config.model)
+            quant_config.maybe_update_config(
+                model_config.model,
+                hf_config=model_config.hf_config,
+            )
             return quant_config
         return None
 
@@ -550,6 +632,16 @@ def with_hf_config(
         if architectures is not None:
             hf_config = copy.deepcopy(hf_config)
             hf_config.architectures = architectures
+        elif hf_config.architectures is None:
+            from transformers.models.auto.modeling_auto import (
+                MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
+            )
+
+            if hf_config.model_type in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
+                hf_config = copy.deepcopy(hf_config)
+                hf_config.architectures = [
+                    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[hf_config.model_type]
+                ]
 
         model_config = copy.deepcopy(self.model_config)
 
@@ -642,6 +734,17 @@ def _post_init_kv_transfer_config(self) -> None:
         Right now, this function reads the offloading settings from
         CacheConfig and configures the KVTransferConfig accordingly.
         """
+        # Check if KV connector requires chunked prefill to be disabled.
+        if (
+            self.kv_transfer_config is not None
+            and self.kv_transfer_config.kv_connector == "ExampleHiddenStatesConnector"
+            and self.scheduler_config.enable_chunked_prefill
+        ):
+            raise ValueError(
+                "ExampleHiddenStatesConnector does not support chunked prefill. "
+                "Please disable chunked prefill (--no-enable-chunked-prefill)."
+            )
+
         # KV offloading is only activated when kv_offloading_size is set.
         if (kv_offloading_size := self.cache_config.kv_offloading_size) is None:
             return
@@ -657,7 +760,11 @@ def _post_init_kv_transfer_config(self) -> None:
         )
 
         if kv_offloading_backend == "native":
-            self.kv_transfer_config.kv_connector = "OffloadingConnector"
+            if envs.VLLM_USE_SIMPLE_KV_OFFLOAD:
+                config_connector = "SimpleCPUOffloadConnector"
+            else:
+                config_connector = "OffloadingConnector"
+            self.kv_transfer_config.kv_connector = config_connector
             self.kv_transfer_config.kv_connector_extra_config.update(
                 {"cpu_bytes_to_use": kv_offloading_size * (1 << 30)}
             )
@@ -672,6 +779,49 @@ def _post_init_kv_transfer_config(self) -> None:
         # This is the same for all backends
         self.kv_transfer_config.kv_role = "kv_both"
 
+    def _verify_kv_transfer_compat(self) -> None:
+        """Reject configurations that silently corrupt KV transfers."""
+        if (
+            self.kv_transfer_config is None
+            or self.kv_transfer_config.kv_connector is None
+        ):
+            return
+
+        # PyTorch's expandable_segments allocator uses CUDA VMM, which can
+        # remap a virtual address range to different physical pages over the
+        # engine's lifetime. KV connectors that pin KV cache memory (e.g.
+        # NixlConnector via ibv_reg_mr, MooncakeConnector) end up with their
+        # registrations pointing at stale physical pages after any remap,
+        # producing RDMA failures like IBV_WC_REM_ACCESS_ERR /
+        # NIXL_ERR_REMOTE_DISCONNECT at the first inter-node KV transfer.
+        # We can't enumerate every in-tree and out-of-tree connector that
+        # pins memory, so we conservatively reject the combination whenever
+        # any KV connector is configured.
+        #
+        # CuMem allocator is exempt: CuMemAllocator.use_memory_pool toggles
+        # expandable_segments off around its pool (see #40812), so the KV
+        # cache allocated within that context lands on stable physical pages
+        # even when the env var is set.
+        if "expandable_segments:True" not in os.environ.get(
+            "PYTORCH_CUDA_ALLOC_CONF", ""
+        ):
+            return
+        if self.model_config is not None and (self.model_config.enable_cumem_allocator):
+            return
+
+        raise ValueError(
+            f"KV connector {self.kv_transfer_config.kv_connector} is "
+            "incompatible with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True "
+            "unless enable_cumem_allocator is also enabled. PyTorch's CUDA VMM "
+            "allocator can remap KV cache virtual addresses to different "
+            "physical pages, invalidating any pinned/registered KV memory "
+            "(e.g. IB memory regions registered by NIXL or Mooncake). Either "
+            "unset expandable_segments:True or enable the cumem allocator "
+            "(sleep mode does this automatically and also "
+            "routes KV allocations through CuMemAllocator's pool, where "
+            "expandable_segments is automatically disabled)."
+        )
+
     def __post_init__(self):
         """Verify configs are valid & consistent with each other."""
 
@@ -679,9 +829,7 @@ def __post_init__(self):
         self.instance_id = f"{time.time_ns()}"
 
         if self.performance_mode != "balanced":
-            logger.info_once(
-                "Performance mode set to '%s'.", self.performance_mode, scope="local"
-            )
+            logger.info_once("Performance mode set to '%s'.", self.performance_mode)
 
         self.try_verify_and_update_config()
 
@@ -691,9 +839,45 @@ def __post_init__(self):
 
             self.parallel_config.is_moe_model = self.model_config.is_moe
 
+        if (
+            self.model_config is not None
+            and self.model_config.enable_return_routed_experts
+        ):
+            if self.parallel_config.pipeline_parallel_size > 1:
+                raise ValueError(
+                    "--enable-return-routed-experts is incompatible with "
+                    "pipeline parallelism (PP > 1)."
+                )
+
+            # Incompatible with any KV connector — covers both PD disaggregation
+            # (kv_producer/kv_consumer: routing captured on P can't reach D) and
+            # single-instance KV offload/sharing (kv_both: slot_mapping semantics
+            # change when KV blocks live outside local GPU memory, breaking the
+            # slot-indexed routed_experts buffer).
+            if (
+                self.kv_transfer_config is not None
+                and self.kv_transfer_config.is_kv_transfer_instance
+            ):
+                raise ValueError(
+                    "--enable-return-routed-experts is incompatible with KV "
+                    "connectors (PD disaggregation, KV cache offload)."
+                )
+
         if self.lora_config is not None:
             self.lora_config.verify_with_model_config(self.model_config)
 
+        if (
+            self.mamba_config.enable_stochastic_rounding
+            and self.cache_config.mamba_ssm_cache_dtype != "float16"
+        ):
+            raise ValueError(
+                "Stochastic rounding for Mamba cache requires "
+                "the SSM cache to be float16. Please set it explicitly, "
+                "by specifying `--mamba-ssm-cache-dtype float16`, or disable "
+                "stochastic rounding by not specifying "
+                "`--enable-mamba-cache-stochastic-rounding`."
+            )
+
         if self.quant_config is None and self.model_config is not None:
             self.quant_config = VllmConfig._get_quantization_config(
                 self.model_config, self.load_config
@@ -751,6 +935,16 @@ def __post_init__(self):
         elif self.scheduler_config.async_scheduling is None:
             # Enable async scheduling unless there is an incompatible option.
             if (
+                self.model_config is not None
+                and self.model_config.runner_type == "pooling"
+            ):
+                # The current implementation of asynchronous scheduling negatively
+                # impacts performance of pooling models, so we disable by default.
+                logger.debug(
+                    "Disabling asynchronous scheduling by default for pooling model."
+                )
+                self.scheduler_config.async_scheduling = False
+            elif (
                 self.speculative_config is not None
                 and self.speculative_config.method not in get_args(EagleModelTypes)
                 and self.speculative_config.method not in get_args(NgramGPUTypes)
@@ -759,7 +953,6 @@ def __post_init__(self):
                     "Async scheduling not supported with %s-based "
                     "speculative decoding and will be disabled.",
                     self.speculative_config.method,
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             elif (
@@ -769,7 +962,6 @@ def __post_init__(self):
                 logger.warning_once(
                     "Async scheduling is not compatible with "
                     "disable_padded_drafter_batch=True and will be disabled.",
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             elif not executor_supports_async_sched:
@@ -777,7 +969,6 @@ def __post_init__(self):
                     "Async scheduling will be disabled because it is not supported "
                     "with the `%s` distributed executor backend. ",
                     executor_backend,
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             else:
@@ -796,7 +987,6 @@ def __post_init__(self):
                     logger.info_once(
                         "Disabling NCCL for DP synchronization "
                         "when using async scheduling.",
-                        scope="local",
                     )
                 self.parallel_config.disable_nccl_for_dp_synchronization = True
             else:
@@ -811,7 +1001,6 @@ def __post_init__(self):
             logger.warning_once(
                 "Disabling cascade attention (not yet compatible with "
                 "async speculative decoding).",
-                scope="local",
             )
             self.model_config.disable_cascade_attn = True
 
@@ -848,6 +1037,20 @@ def __post_init__(self):
             self.compilation_config.mode = CompilationMode.NONE
             self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
 
+        if os.environ.get("TORCH_COMPILE_DISABLE") == "1":
+            logger.warning(
+                "TORCH_COMPILE_DISABLE is set, disabling torch.compile. "
+                "This is equivalent to setting -cc.mode=none"
+            )
+            self.compilation_config.mode = CompilationMode.NONE
+
+        if envs.VLLM_USE_BREAKABLE_CUDAGRAPH:
+            logger.warning_once(
+                "VLLM_USE_BREAKABLE_CUDAGRAPH is set, disabling vLLM's "
+                "torch.compile pipeline. Equivalent to -cc.mode=none."
+            )
+            self.compilation_config.mode = CompilationMode.NONE
+
         if self.compilation_config.backend == "eager" or (
             self.compilation_config.mode is not None
             and self.compilation_config.mode != CompilationMode.VLLM_COMPILE
@@ -883,6 +1086,13 @@ def has_blocked_weights():
             else:
                 self.compilation_config.mode = CompilationMode.NONE
 
+        # By default, enable torch wrapping only when using custom Inductor lowering
+        if self.compilation_config.ir_enable_torch_wrap is None:
+            self.compilation_config.ir_enable_torch_wrap = (
+                self.compilation_config.mode == CompilationMode.VLLM_COMPILE
+                and self.compilation_config.backend == "inductor"
+            )
+
         if all(s not in self.compilation_config.custom_ops for s in ("all", "none")):
             if (
                 self.compilation_config.backend == "inductor"
@@ -892,6 +1102,11 @@ def has_blocked_weights():
             else:
                 self.compilation_config.custom_ops.append("all")
 
+        # This populates IR op priorities,
+        # must happen after compilation mode and backend are decided,
+        # but before fusion defaults are applied as those may depend on op priority.
+        self.kernel_config.set_platform_defaults(self)
+
         default_config = OPTIMIZATION_LEVEL_TO_CONFIG[self.optimization_level]
         self._apply_optimization_level_defaults(default_config)
         if self.kernel_config.enable_flashinfer_autotune is None:
@@ -903,6 +1118,7 @@ def has_blocked_weights():
         if (
             self.compilation_config.cudagraph_mode.requires_piecewise_compilation()
             and self.compilation_config.mode != CompilationMode.VLLM_COMPILE
+            and not envs.VLLM_USE_BREAKABLE_CUDAGRAPH
         ):
             logger.info(
                 "Cudagraph mode %s is not compatible with compilation mode %s."
@@ -912,19 +1128,16 @@ def has_blocked_weights():
             )
             self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
 
-        # async tp is built on top of sequence parallelism
-        # and requires it to be enabled.
-        if self.compilation_config.pass_config.fuse_gemm_comms:
-            self.compilation_config.pass_config.enable_sp = True
-        if self.compilation_config.pass_config.enable_sp:
+        # async tp is built on top of sequence parallelism and requires it.
+        pass_config = self.compilation_config.pass_config
+        if pass_config.fuse_gemm_comms:
+            pass_config.enable_sp = True
+        if pass_config.enable_sp:
             if self.parallel_config.tensor_parallel_size == 1:
                 logger.warning("Sequence Parallelism requires TP>1, disabling")
-                self.compilation_config.pass_config.enable_sp = False
-                self.compilation_config.pass_config.fuse_gemm_comms = False
+                pass_config.enable_sp = False
+                pass_config.fuse_gemm_comms = False
             else:
-                # Compute SP threshold early; disable if None (model too
-                # small for SP to be beneficial).
-                pass_config = self.compilation_config.pass_config
                 if pass_config.sp_min_token_num is None:
                     from vllm.compilation.passes.fusion.sequence_parallelism import (
                         get_sequence_parallelism_threshold,
@@ -944,8 +1157,8 @@ def has_blocked_weights():
                         "threshold heuristic, disabling. To force SP, "
                         "set pass_config.sp_min_token_num manually."
                     )
-                    self.compilation_config.pass_config.enable_sp = False
-                    self.compilation_config.pass_config.fuse_gemm_comms = False
+                    pass_config.enable_sp = False
+                    pass_config.fuse_gemm_comms = False
 
         from vllm.utils.torch_utils import HAS_OPAQUE_TYPE
 
@@ -1027,6 +1240,7 @@ def has_blocked_weights():
                 self.compilation_config.cudagraph_num_of_warmups = 1
 
             self._set_cudagraph_sizes()
+
         else:
             self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
 
@@ -1081,6 +1295,9 @@ def has_blocked_weights():
             )
         current_platform.check_and_update_config(self)
 
+        if self.use_v2_model_runner:
+            self._validate_v2_model_runner()
+
         # Re-compute compile ranges after platform-specific config updates
         # (e.g., XPU may lower max_num_batched_tokens when MLA is enabled)
         self._set_compile_ranges()
@@ -1097,8 +1314,8 @@ def has_blocked_weights():
         )
 
         if self.compilation_config.pass_config.enable_sp:
-            # With pipeline parallelism or dynamo partitioning,
-            # native rms norm tracing errors due to incorrect residual shape.
+            # With pipeline parallelism, native rms norm tracing errors due to
+            # incorrect residual shape.
             # Use custom rms norm to unblock. In the future,
             # the pass will operate on higher-level IR to avoid the issue.
             # TODO: https://github.com/vllm-project/vllm/issues/27894
@@ -1109,24 +1326,15 @@ def has_blocked_weights():
                     self.compilation_config.mode,
                 )
 
-            is_fullgraph = (
-                self.compilation_config.use_inductor_graph_partition
-                or len(self.compilation_config.splitting_ops or []) == 0
-            )
-            if self.parallel_config.pipeline_parallel_size > 1 or not is_fullgraph:
+            if self.parallel_config.pipeline_parallel_size > 1:
                 if "-rms_norm" not in self.compilation_config.custom_ops:
                     self.compilation_config.custom_ops.append("+rms_norm")
                 else:
-                    regime = (
-                        "Dynamo partition"
-                        if not is_fullgraph
-                        else "pipeline parallelism"
-                    )
                     logger.warning_once(
                         "Sequence parallelism not supported with "
                         "native rms_norm when using %s, "
                         "this will likely lead to an error.",
-                        regime,
+                        "pipeline parallelism",
                     )
 
         # final check of cudagraph mode after all possible updates
@@ -1138,13 +1346,16 @@ def has_blocked_weights():
                 and not self.compilation_config.cudagraph_mode.has_piecewise_cudagraphs()  # noqa: E501
             ):
                 logger.warning_once(
-                    "No piecewise cudagraph for executing cascade attention."
-                    " Will fall back to eager execution if a batch runs "
-                    "into cascade attentions."
+                    "No piecewise cudagraph for executing cascade attention. "
+                    "Will fall back to eager execution if a batch runs into "
+                    "cascade attentions."
                 )
 
             if self.compilation_config.cudagraph_mode.requires_piecewise_compilation():
-                assert self.compilation_config.mode == CompilationMode.VLLM_COMPILE, (
+                assert (
+                    self.compilation_config.mode == CompilationMode.VLLM_COMPILE
+                    or envs.VLLM_USE_BREAKABLE_CUDAGRAPH
+                ), (
                     "Compilation mode should be CompilationMode.VLLM_COMPILE "
                     "when cudagraph_mode piecewise cudagraphs is used, "
                     f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
@@ -1157,7 +1368,6 @@ def has_blocked_weights():
             self.model_config.disable_cascade_attn = True
             logger.warning_once(
                 "Disabling cascade attention when VLLM_BATCH_INVARIANT is enabled.",
-                scope="local",
             )
 
         if self.parallel_config.use_ubatching:
@@ -1182,6 +1392,16 @@ def has_blocked_weights():
 
         if self.reasoning_config is not None and self.model_config is not None:
             self.reasoning_config.initialize_token_ids(self.model_config)
+            if not self.reasoning_config.enabled:
+                logger.warning_once(
+                    "Auto-initialization of reasoning token IDs failed. "
+                    "Please check whether your reasoning parser has implemented "
+                    "the `reasoning_start_str` and `reasoning_end_str`."
+                )
+
+        # Resolve kv_offloading-derived connector name into kv_transfer_config
+        # before the HMA check below, which inspects the connector class.
+        self._post_init_kv_transfer_config()
 
         # Hybrid KV cache manager (HMA) runtime rules:
         # - Explicit enable (--no-disable-kv-cache-manager): error if runtime
@@ -1195,9 +1415,6 @@ def has_blocked_weights():
         if not current_platform.support_hybrid_kv_cache():
             # Hybrid KV cache manager is not supported on non-GPU platforms.
             need_disable_hybrid_kv_cache_manager = True
-        if self.kv_events_config is not None:
-            # Hybrid KV cache manager is not compatible with KV events.
-            need_disable_hybrid_kv_cache_manager = True
         if (
             self.model_config is not None
             and self.model_config.attention_chunk_size is not None
@@ -1223,18 +1440,42 @@ def has_blocked_weights():
         if self.scheduler_config.disable_hybrid_kv_cache_manager is None:
             # Default to disable HMA, but only if the user didn't express a preference.
             if self.kv_transfer_config is not None:
-                # NOTE(Kuntai): turn HMA off for connector unless specifically enabled.
-                need_disable_hybrid_kv_cache_manager = True
-                logger.warning(
-                    "Turning off hybrid kv cache manager because "
-                    "`--kv-transfer-config` is set. This will reduce the "
-                    "performance of vLLM on LLMs with sliding window attention "
-                    "or Mamba attention. If you are a developer of kv connector"
-                    ", please consider supporting hybrid kv cache manager for "
-                    "your connector by making sure your connector is a subclass"
-                    " of `SupportsHMA` defined in kv_connector/v1/base.py and"
-                    " use --no-disable-hybrid-kv-cache-manager to start vLLM."
+                from vllm.config.kv_transfer import KVTransferConfig
+                from vllm.distributed.kv_transfer.kv_connector.factory import (
+                    KVConnectorFactory,
+                )
+                from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+                    supports_hma,
                 )
+
+                connector_cls = KVConnectorFactory.get_connector_class(
+                    self.kv_transfer_config
+                )
+                all_support_hma = supports_hma(connector_cls)
+                # MultiConnector subclasses SupportsHMA; only effectively
+                # supports HMA when every sub-connector does.
+                if all_support_hma and connector_cls.__name__ == "MultiConnector":
+                    sub_ktcs = self.kv_transfer_config.kv_connector_extra_config.get(
+                        "connectors", []
+                    )
+                    all_support_hma = all(
+                        supports_hma(
+                            KVConnectorFactory.get_connector_class(
+                                KVTransferConfig(**sub)
+                            )
+                        )
+                        for sub in sub_ktcs
+                    )
+                if not all_support_hma:
+                    need_disable_hybrid_kv_cache_manager = True
+                    logger.warning(
+                        "Turning off hybrid kv cache manager because "
+                        "connector %s does not subclass `SupportsHMA`. "
+                        "This will reduce performance on models with "
+                        "sliding window or Mamba attention. See "
+                        "kv_connector/v1/base.py for details.",
+                        connector_cls.__name__,
+                    )
             self.scheduler_config.disable_hybrid_kv_cache_manager = (
                 need_disable_hybrid_kv_cache_manager
             )
@@ -1276,9 +1517,7 @@ def has_blocked_weights():
             if "-quant_fp8" not in custom_ops:
                 custom_ops.append("+quant_fp8")
 
-        # Handle the KV connector configs
-        self._post_init_kv_transfer_config()
-
+        self._verify_kv_transfer_compat()
         # Log the custom passes that are enabled
         self.compilation_config.pass_config.log_enabled_passes()
 
@@ -1324,6 +1563,25 @@ def _set_max_num_scheduled_tokens(self):
                     max_num_batched_tokens - scheduled_token_delta
                 )
 
+            if self.scheduler_config.max_num_scheduled_tokens <= 0:
+                raise ValueError(
+                    "max_num_scheduled_tokens is set to"
+                    f" {self.scheduler_config.max_num_scheduled_tokens} based on"
+                    " the speculative decoding settings, which does not allow"
+                    " any tokens to be scheduled. Increase max_num_batched_tokens"
+                    " to accommodate the additional draft token slots, or decrease"
+                    " num_speculative_tokens or max_num_seqs."
+                )
+            if self.scheduler_config.max_num_scheduled_tokens < 8192:
+                logger.warning_once(
+                    "max_num_scheduled_tokens is set to"
+                    f" {self.scheduler_config.max_num_scheduled_tokens} based on"
+                    " the speculative decoding settings. This may lead to suboptimal"
+                    " performance. Consider increasing max_num_batched_tokens to"
+                    " accommodate the additional draft token slots, or decrease"
+                    " num_speculative_tokens or max_num_seqs.",
+                )
+
             max_num_scheduled_tokens = self.scheduler_config.max_num_scheduled_tokens
             if max_num_batched_tokens < max_num_scheduled_tokens + (
                 self.speculative_config.max_num_new_slots_for_drafting
@@ -1348,6 +1606,10 @@ def _set_cudagraph_sizes(self):
         cudagraph_capture_sizes = [1, 2, 4] + list(range(8, 256, 8)) + list(
             range(256, max_graph_size + 1, 16))
 
+        `max_num_batched_tokens` is also appended to the list if it fits
+        within `max_cudagraph_capture_size`, so the max batch size is captured
+        even when off-stride.
+
         In the end, `vllm_config.compilation_config.cudagraph_capture_sizes`
         will be the final sizes to capture cudagraph (in ascending order).
 
@@ -1436,6 +1698,12 @@ def _set_cudagraph_sizes(self):
                     cudagraph_capture_sizes += list(
                         range(256, max_cudagraph_capture_size + 1, 16)
                     )
+                # ensure max_num_tokens is captured if within max capture size
+                if (
+                    max_num_tokens <= max_cudagraph_capture_size
+                    and max_num_tokens not in cudagraph_capture_sizes
+                ):
+                    cudagraph_capture_sizes.append(max_num_tokens)
                 # de-duplicate and sort the sizes
                 cudagraph_capture_sizes = sorted(set(cudagraph_capture_sizes))
 
@@ -1510,11 +1778,16 @@ def _set_compile_ranges(self):
         if compile_range_end is not None:
             computed_compile_ranges_endpoints.append(compile_range_end)
 
-        # Add the compile ranges for flashinfer
+        # Add the compile ranges for flashinfer/aiter.
         if compilation_config.pass_config.fuse_allreduce_rms:
             tp_size = self.parallel_config.tensor_parallel_size
-            max_size = compilation_config.pass_config.flashinfer_max_size(tp_size)
-            if max_size is not None:
+            from vllm._aiter_ops import rocm_aiter_ops
+
+            if rocm_aiter_ops.is_enabled():
+                max_size = rocm_aiter_ops.get_aiter_allreduce_max_size()
+            else:
+                max_size = compilation_config.pass_config.flashinfer_max_size(tp_size)
+            if max_size is not None and self.model_config is not None:
                 assert isinstance(self.model_config.dtype, torch.dtype)
                 max_token_num = max_size // (
                     self.model_config.get_hidden_size()
@@ -1573,6 +1846,22 @@ def _set_compile_ranges(self):
                         compile_range_end,
                     )
 
+        if compilation_config.pass_config.fuse_minimax_qk_norm:
+            from vllm.compilation.passes.fusion.minimax_qk_norm_fusion import (
+                MAX_TOKEN_NUM,
+            )
+
+            max_token_num = min(
+                MAX_TOKEN_NUM, self.scheduler_config.max_num_batched_tokens
+            )
+            if compile_range_end is not None and max_token_num < compile_range_end:
+                computed_compile_ranges_endpoints.append(max_token_num)
+            else:
+                logger.debug(
+                    "Max num batched tokens below MiniMax QK norm fusion threshold, "
+                    "MiniMax QK norm fusion enabled for all num_tokens."
+                )
+
         if compilation_config.compile_ranges_endpoints is not None:
             for x in compilation_config.compile_ranges_endpoints:
                 assert isinstance(x, int)
@@ -1624,12 +1913,13 @@ def try_verify_and_update_config(self):
                 )
                 self.load_config.load_format = "runai_streamer"
             elif self.load_config.load_format not in (
+                "modelexpress",
                 "runai_streamer",
                 "runai_streamer_sharded",
             ):
                 raise ValueError(
                     f"To load a model from object storage (S3/GCS/Azure), "
-                    f"'load_format' must be 'runai_streamer' or "
+                    f"'load_format' must be 'modelexpress', 'runai_streamer' or "
                     f"'runai_streamer_sharded', "
                     f"but got '{self.load_config.load_format}'. "
                     f"Model: {self.model_config.model}"
@@ -1668,6 +1958,7 @@ def __str__(self):
             f"dcp_comm_backend={self.parallel_config.dcp_comm_backend}, "  # noqa
             f"disable_custom_all_reduce={self.parallel_config.disable_custom_all_reduce}, "  # noqa
             f"quantization={self.model_config.quantization}, "
+            f"quantization_config={self.model_config.quantization_config}, "  # noqa
             f"enforce_eager={self.model_config.enforce_eager}, "
             f"enable_return_routed_experts={self.model_config.enable_return_routed_experts}, "  # noqa
             f"kv_cache_dtype={self.cache_config.cache_dtype}, "
@@ -1679,9 +1970,102 @@ def __str__(self):
             f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, "
             f"enable_chunked_prefill={self.scheduler_config.enable_chunked_prefill}, "  # noqa
             f"pooler_config={self.model_config.pooler_config!r}, "
-            f"compilation_config={self.compilation_config!r}"
+            f"compilation_config={self.compilation_config!r}, "
+            f"kernel_config={self.kernel_config!r}"
         )
 
+    def _get_v2_model_runner_unsupported_features(self) -> list[str]:
+        """Collect features not yet supported by the V2 model runner."""
+        unsupported: list[str] = []
+        model_config = self.model_config
+        speculative_config = self.speculative_config
+
+        if model_config is not None and model_config.has_inner_state:
+            unsupported.append("hybrid/mamba models")
+
+        if self.parallel_config.prefill_context_parallel_size > 1:
+            unsupported.append("prefill context parallelism")
+
+        if self.compilation_config.mode == CompilationMode.STOCK_TORCH_COMPILE:
+            unsupported.append("stock torch.compile")
+
+        if (
+            self.compilation_config.pass_config.enable_sp
+            and self.parallel_config.tensor_parallel_size > 1
+        ):
+            unsupported.append("sequence parallelism")
+
+        if speculative_config is not None:
+            # TODO: ngram / ngram_gpu are not supported by the v2 model runner yet
+            if speculative_config.method in ("ngram", "ngram_gpu"):
+                unsupported.append("ngram/ngram_gpu speculative decoding")
+            elif speculative_config.method not in ("eagle", "eagle3", "mtp"):
+                unsupported.append(f"speculative method '{speculative_config.method}'")
+
+            # V2 EagleSpeculator does not support parallel_drafting (required by PEagle)
+            if speculative_config.parallel_drafting:
+                unsupported.append("parallel drafting for speculative decoding")
+
+            if (
+                speculative_config.method == "eagle3"
+                and self.parallel_config.pipeline_parallel_size > 1
+            ):
+                unsupported.append("EAGLE3 with pipeline parallelism")
+
+        if self.reasoning_config is not None:
+            # TODO: add reasoning budget enforcement to ModelRunnerV2.
+            unsupported.append("reasoning budget enforcement")
+
+        if self.parallel_config.enable_dbo:
+            unsupported.append("dual batch overlap")
+
+        if model_config is not None and model_config.enable_return_routed_experts:
+            # Will be added by https://github.com/vllm-project/vllm/pull/38163
+            unsupported.append("routed experts capture")
+
+        has_logitsproc_plugins = False
+        if model_config is not None:
+            from importlib.metadata import entry_points
+
+            has_logitsproc_plugins = bool(entry_points(group="vllm.logits_processors"))
+
+        if model_config is not None and (
+            model_config.logits_processors or has_logitsproc_plugins
+        ):
+            unsupported.append("custom logits processors")
+
+        if model_config is not None and model_config.enable_prompt_embeds:
+            unsupported.append("prompt embeds")
+
+        if (
+            model_config is not None
+            and model_config.runner_type == "generate"
+            and model_config.logprobs_mode in ("raw_logits", "processed_logits")
+        ):
+            unsupported.append(f"logprobs mode '{model_config.logprobs_mode}'")
+
+        if self.cache_config.kv_sharing_fast_prefill:
+            # Will be added by https://github.com/vllm-project/vllm/pull/35045
+            unsupported.append("KV sharing fast prefill")
+
+        if self.ec_transfer_config is not None:
+            # Will be added by https://github.com/vllm-project/vllm/pull/38390
+            unsupported.append("EC transfer")
+
+        return unsupported
+
+    def _validate_v2_model_runner(self) -> None:
+        """Check for features not yet supported by the V2 model runner."""
+        if not HAS_TRITON:
+            raise ValueError("VLLM_USE_V2_MODEL_RUNNER requires Triton.")
+
+        unsupported = self._get_v2_model_runner_unsupported_features()
+        if unsupported:
+            raise ValueError(
+                "VLLM_USE_V2_MODEL_RUNNER does not yet support: "
+                + ", ".join(unsupported)
+            )
+
     def validate_block_size(self) -> None:
         """Validate block_size against DCP and mamba constraints.
 
@@ -1728,6 +2112,22 @@ def validate_block_size(self) -> None:
                 "to schedule a multiple of block_size tokens even if they are "
                 "in the middle of a mm input"
             )
+            # TODO: support align mamba cache mode for model runner v2
+            assert not envs.VLLM_USE_V2_MODEL_RUNNER, (
+                "Model Runner V2 has not yet supported mamba_cache_mode='align'. "
+            )
+
+    @model_validator(mode="after")
+    def validate_nvfp4_kv_cache_with_mla(self) -> "VllmConfig":
+        if self.model_config is None:
+            return self
+        if self.cache_config.cache_dtype == "nvfp4" and self.model_config.use_mla:
+            raise ValueError(
+                "nvfp4 KV cache is not supported with MLA (Multi-head Latent "
+                "Attention) backends. Please use a different --kv-cache-dtype "
+                "(e.g., 'fp8' or 'auto') for MLA models such as DeepSeek."
+            )
+        return self
 
     @model_validator(mode="after")
     def validate_mamba_block_size(self) -> "VllmConfig":
@@ -1832,7 +2232,7 @@ def get_current_vllm_config_or_none() -> VllmConfig | None:
 def get_layers_from_vllm_config(
     vllm_config: VllmConfig,
     layer_type: type[T],
-    layer_names: list[str] | None = None,
+    layer_names: Iterable[str] | None = None,
 ) -> dict[str, T]:
     """
     Get layers from the vLLM config.
@@ -1843,14 +2243,12 @@ def get_layers_from_vllm_config(
         layer_names: The names of the layers to get. If None, return all layers.
     """
 
-    if layer_names is None:
-        layer_names = list(vllm_config.compilation_config.static_forward_context.keys())
-
     forward_context = vllm_config.compilation_config.static_forward_context
+    if layer_names is None:
+        layer_names = forward_context.keys()
 
     return {
-        layer_name: forward_context[layer_name]
+        layer_name: layer
         for layer_name in layer_names
-        if layer_name in forward_context
-        and isinstance(forward_context[layer_name], layer_type)
+        if isinstance(layer := forward_context.get(layer_name), layer_type)
     }
diff --git a/vllm/config/weight_transfer.py b/vllm/config/weight_transfer.py
index 1da1f96cb7e4..e2ac6e71a95f 100644
--- a/vllm/config/weight_transfer.py
+++ b/vllm/config/weight_transfer.py
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Literal
-
 from vllm.config.utils import config
 
 
@@ -9,5 +7,7 @@
 class WeightTransferConfig:
     """Configuration for weight transfer during RL training."""
 
-    backend: Literal["nccl", "ipc"] = "nccl"
-    """The backend to use for weight transfer."""
+    backend: str = "nccl"
+    """The backend to use for weight transfer. Validated against the
+    `WeightTransferEngineFactory` registry at engine creation time.
+    """
diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
index 554a34b6a68e..6edd69a949e7 100644
--- a/vllm/device_allocator/cumem.py
+++ b/vllm/device_allocator/cumem.py
@@ -128,13 +128,6 @@ def get_instance() -> "CuMemAllocator":
         return CuMemAllocator.instance
 
     def __init__(self):
-        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
-        assert "expandable_segments:True" not in conf, (
-            "Expandable segments are not compatible with memory pool. "
-            "Please track https://github.com/pytorch/pytorch/issues/147851 "
-            "for the latest updates."
-        )
-
         self.pointer_to_data: dict[int, AllocationData] = {}
         self.current_tag: str = CuMemAllocator.default_tag
         self.allocator_and_pools: dict[str, Any] = {}
@@ -167,6 +160,12 @@ def _python_free_callback(self, ptr: int) -> HandleType:
         data = self.pointer_to_data.pop(ptr)
         if data.cpu_backup_tensor is not None:
             data.cpu_backup_tensor = None
+        # Drain pending kernels before the C extension's cuMemUnmap.
+        # The pluggable allocator path doesn't defer reclaim like the
+        # regular caching allocator, so without this, in-flight work
+        # (e.g. quant helpers' transient tensors during weight loading)
+        # races the unmap and surfaces as CUDA_ERROR_ILLEGAL_ADDRESS.
+        torch.cuda.synchronize(data.handle[0])
         logger.debug(
             "Freed %s bytes for %s with address %s from cumem allocator",
             data.handle[1],
@@ -264,34 +263,49 @@ def use_memory_pool(self, tag: str | None = None):
 
         assert isinstance(tag, str)
 
+        # Expandable segments are incompatible with the memory pool used for
+        # sleep mode (see https://github.com/pytorch/pytorch/issues/147851).
+        # If the user has enabled expandable segments via
+        # PYTORCH_CUDA_ALLOC_CONF, temporarily disable them for the duration
+        # of the memory pool context and restore on exit.
+        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+        expandable_was_enabled = "expandable_segments:True" in conf
+        if expandable_was_enabled:
+            torch.cuda.memory._set_allocator_settings("expandable_segments:False")
+
         old_tag = self.current_tag
         self.current_tag = tag
-        with use_memory_pool_with_allocator(
-            self.python_malloc_callback, self.python_free_callback
-        ) as data:
-            # start to hit another PyTorch bug in PyTorch 2.6,
-            # possibly because of gc-related issue w.r.t. the allocator and
-            # the memory pool.
-            # to avoid the issue, we keep a reference of the data.
-            # see https://github.com/pytorch/pytorch/issues/146431 .
-            self.allocator_and_pools[tag] = data
-            yield
-            # PyTorch's bug, calling torch.cuda.empty_cache() will error
-            # when using pluggable allocator, see
-            # https://github.com/pytorch/pytorch/issues/145168 .
-            # if we have some memory allocated and then freed,
-            # the memory will not be released, e.g. in online quantization,
-            # where the model is created in higher precision, and then
-            # quantized in lower precision.
-            # Find all unused allocations and manually release them.
-            # TODO: we should expose `empty_cache` method in the memory pool.
-            # TODO: ask for help from PyTorch team to expose this method.
-            allocations = data[0].snapshot()
-            for allocation in allocations:
-                if allocation["allocated_size"] == 0:
-                    handle = self._python_free_callback(allocation["address"])
-                    unmap_and_release(handle)
+        try:
+            with use_memory_pool_with_allocator(
+                self.python_malloc_callback, self.python_free_callback
+            ) as data:
+                # start to hit another PyTorch bug in PyTorch 2.6,
+                # possibly because of gc-related issue w.r.t. the allocator
+                # and the memory pool.
+                # to avoid the issue, we keep a reference of the data.
+                # see https://github.com/pytorch/pytorch/issues/146431 .
+                self.allocator_and_pools[tag] = data
+                yield
+                # PyTorch's bug, calling torch.cuda.empty_cache() will error
+                # when using pluggable allocator, see
+                # https://github.com/pytorch/pytorch/issues/145168 .
+                # if we have some memory allocated and then freed,
+                # the memory will not be released, e.g. in online
+                # quantization, where the model is created in higher
+                # precision, and then quantized in lower precision.
+                # Find all unused allocations and manually release them.
+                # TODO: we should expose `empty_cache` method in the memory
+                # pool.
+                # TODO: ask for help from PyTorch team to expose this method.
+                allocations = data[0].snapshot()
+                for allocation in allocations:
+                    if allocation["allocated_size"] == 0:
+                        handle = self._python_free_callback(allocation["address"])
+                        unmap_and_release(handle)
+        finally:
             self.current_tag = old_tag
+            if expandable_was_enabled:
+                torch.cuda.memory._set_allocator_settings("expandable_segments:True")
 
     def get_current_usage(self) -> int:
         """
diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
index 075f4e0859e4..33ff55a64e66 100644
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import threading
+from dataclasses import dataclass
 from typing import Any
 
 import torch
@@ -10,7 +11,6 @@
 from vllm.distributed import get_dp_group, get_ep_group
 from vllm.forward_context import get_forward_context
 from vllm.logger import init_logger
-from vllm.platforms import current_platform
 from vllm.utils.flashinfer import (
     has_flashinfer_nvlink_one_sided,
     has_flashinfer_nvlink_two_sided,
@@ -38,115 +38,6 @@
 logger = init_logger(__name__)
 
 
-class NaiveAll2AllManager(All2AllManagerBase):
-    """
-    A naive implementation of all2all communication.
-    It uses all-reduce under the hood, which is not
-    efficient at all. The main purpose is for testing and
-    debugging.
-    """
-
-    def __init__(self, cpu_group, tcp_store_group=None):
-        super().__init__(cpu_group, tcp_store_group)
-
-    def naive_multicast(
-        self,
-        x: torch.Tensor,
-        cu_tokens_across_sp_cpu: torch.Tensor,
-        is_sequence_parallel: bool,
-    ) -> torch.Tensor:
-        assert len(x.shape) == 2
-        buffer = torch.empty(
-            (cu_tokens_across_sp_cpu[-1], x.size(1)), device=x.device, dtype=x.dtype
-        )
-
-        rank = self.rank if is_sequence_parallel else self.dp_rank
-        world_size = self.world_size if is_sequence_parallel else self.dp_world_size
-
-        start = 0 if rank == 0 else cu_tokens_across_sp_cpu[rank - 1]
-        end = cu_tokens_across_sp_cpu[rank]
-        buffer[start:end, :].copy_(x)
-        for idx in range(world_size):
-            start = 0 if idx == 0 else cu_tokens_across_sp_cpu[idx - 1]
-            end = cu_tokens_across_sp_cpu[idx]
-            get_ep_group().broadcast(buffer[start:end, :], idx)
-
-        return buffer
-
-    def dispatch_router_logits(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-        is_sequence_parallel: bool = False,
-        extra_tensors: list[torch.Tensor] | None = None,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if extra_tensors is not None:
-            raise NotImplementedError(
-                "extra_tensors is not supported for NaiveAll2AllManager"
-            )
-        sp_size = self.tp_group.world_size if is_sequence_parallel else 1
-        dp_metadata = get_forward_context().dp_metadata
-        assert dp_metadata is not None
-        cu_tokens_across_sp_cpu = dp_metadata.cu_tokens_across_sp(sp_size)
-
-        hidden_states = self.naive_multicast(
-            hidden_states, cu_tokens_across_sp_cpu, is_sequence_parallel
-        )
-        router_logits = self.naive_multicast(
-            router_logits, cu_tokens_across_sp_cpu, is_sequence_parallel
-        )
-
-        return hidden_states, router_logits
-
-    def dispatch(
-        self,
-        hidden_states: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        is_sequence_parallel: bool = False,
-        extra_tensors: list[torch.Tensor] | None = None,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        if extra_tensors is not None:
-            raise NotImplementedError(
-                "extra_tensors is not supported for NaiveAll2AllManager"
-            )
-        sp_size = self.tp_group.world_size if is_sequence_parallel else 1
-        dp_metadata = get_forward_context().dp_metadata
-        assert dp_metadata is not None
-        cu_tokens_across_sp_cpu = dp_metadata.cu_tokens_across_sp(sp_size)
-
-        hidden_states = self.naive_multicast(
-            hidden_states, cu_tokens_across_sp_cpu, is_sequence_parallel
-        )
-        topk_weights = self.naive_multicast(
-            topk_weights, cu_tokens_across_sp_cpu, is_sequence_parallel
-        )
-        topk_ids = self.naive_multicast(
-            topk_ids, cu_tokens_across_sp_cpu, is_sequence_parallel
-        )
-        return hidden_states, topk_weights, topk_ids
-
-    def combine(
-        self, hidden_states: torch.Tensor, is_sequence_parallel: bool = False
-    ) -> torch.Tensor:
-        ep_rank = self.rank if is_sequence_parallel else self.dp_rank
-
-        dp_metadata = get_forward_context().dp_metadata
-        assert dp_metadata is not None
-        sp_size = self.tp_group.world_size if is_sequence_parallel else 1
-        cu_tokens_across_sp_cpu = dp_metadata.cu_tokens_across_sp(sp_size)
-
-        start = 0 if ep_rank == 0 else cu_tokens_across_sp_cpu[ep_rank - 1]
-        end = cu_tokens_across_sp_cpu[ep_rank]
-
-        all_hidden_states = get_ep_group().all_reduce(hidden_states)
-        hidden_states = all_hidden_states[start:end, :]
-        return hidden_states
-
-    def destroy(self):
-        pass
-
-
 class AgRsAll2AllManager(All2AllManagerBase):
     """
     An implementation of all2all communication based on
@@ -334,11 +225,8 @@ def _make_all2all_kwargs(self) -> dict[Any, Any]:
             num_rdma_bytes=num_rdma_bytes,
             low_latency_mode=False,
             num_qps_per_rank=num_qps_per_rank,
+            explicitly_destroy=True,
         )
-        if not current_platform.is_rocm():
-            kwargs.update(
-                explicitly_destroy=True,
-            )
         return kwargs
 
     def get_handle(self, kwargs):
@@ -412,13 +300,10 @@ def _make_all2all_kwargs(
             num_rdma_bytes=num_rdma_bytes,
             low_latency_mode=True,
             num_qps_per_rank=num_qps_per_rank,
+            allow_nvlink_for_low_latency_mode=True,
+            allow_mnnvl=envs.VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL,
+            explicitly_destroy=True,
         )
-        if not current_platform.is_rocm():
-            kwargs.update(
-                allow_nvlink_for_low_latency_mode=True,
-                allow_mnnvl=envs.VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL,
-                explicitly_destroy=True,
-            )
         return kwargs
 
     def get_handle(self, kwargs):
@@ -440,17 +325,24 @@ def max_sms_used(self) -> int | None:
         return 0
 
 
+@dataclass
+class _NixlEPBufferState:
+    buffer: Any
+    connected_ep_size: int
+    active_ep_size: int
+
+
 class NixlEPAll2AllManager(All2AllManagerBase):
     """
     All2All communication based on NIXL EP kernels.
     This backend supports elastic EP with dynamic rank connection/disconnection.
     """
 
-    # (nixl_ep_buffer, ep_size)
-    _buffer: tuple[Any, int] | None = None
-    _lock = threading.Lock()
+    _buffer: _NixlEPBufferState | None = None
+    _lock = threading.RLock()
 
     def __init__(self, cpu_group, tcp_store_group=None):
+        assert tcp_store_group is not None
         super().__init__(cpu_group, tcp_store_group)
 
         self.max_num_ep_ranks = envs.VLLM_NIXL_EP_MAX_NUM_RANKS
@@ -482,47 +374,103 @@ def _init_buffer(
             num_experts_per_rank=num_experts_per_rank,
             num_rdma_bytes=num_rdma_bytes,
         )
-        ranks_to_connect = list(range(self.cpu_group.size()))
+        ranks_to_connect = list(range(self.world_size))
         buffer.connect_ranks(ranks_to_connect)
-        NixlEPAll2AllManager._buffer = (buffer, self.cpu_group.size())
+        NixlEPAll2AllManager._buffer = _NixlEPBufferState(
+            buffer=buffer,
+            connected_ep_size=self.world_size,
+            active_ep_size=self.world_size,
+        )
+
+    def _connect_to_ep_size(self, ep_size: int, *, make_active: bool) -> None:
+        assert NixlEPAll2AllManager._buffer is not None
+        state = NixlEPAll2AllManager._buffer
+        if ep_size <= state.connected_ep_size:
+            return
+
+        state.buffer.set_tcp_store_group(self.tcp_store_group.store)
+        ranks_to_connect = list(range(state.connected_ep_size, ep_size))
+        state.buffer.connect_ranks(ranks_to_connect, activate=make_active)
+        state.connected_ep_size = ep_size
+        if make_active:
+            state.active_ep_size = ep_size
+
+    def _disconnect_to_ep_size(self, ep_size: int) -> None:
+        assert NixlEPAll2AllManager._buffer is not None
+        state = NixlEPAll2AllManager._buffer
+        if ep_size >= state.connected_ep_size:
+            return
+
+        state.buffer.set_tcp_store_group(self.tcp_store_group.store)
+        ranks_to_disconnect = list(range(ep_size, state.connected_ep_size))
+        state.buffer.disconnect_ranks(ranks_to_disconnect)
+        state.connected_ep_size = ep_size
+        state.active_ep_size = min(state.active_ep_size, ep_size)
+
+    def _unmask_connected_ranks(self, target_ep_size: int) -> None:
+        assert NixlEPAll2AllManager._buffer is not None
+        state = NixlEPAll2AllManager._buffer
+        state.buffer.set_tcp_store_group(self.tcp_store_group.store)
+        if target_ep_size <= state.active_ep_size:
+            return
+        assert state.connected_ep_size >= target_ep_size
+
+        for rank in range(state.active_ep_size, target_ep_size):
+            state.buffer.update_mask_buffer(rank, mask=False)
+        state.active_ep_size = target_ep_size
 
-    def _update_buffer(self):
+    def _stage_ep_size(self) -> None:
         assert NixlEPAll2AllManager._buffer is not None
-        buffer, current_ep_size = NixlEPAll2AllManager._buffer
-        current_ranks = list(range(current_ep_size))
-        new_ep_size = self.cpu_group.size()
-        buffer.set_tcp_store_group(self.tcp_store_group.store)
-        if new_ep_size > len(current_ranks):
-            ranks_to_connect = list(range(len(current_ranks), new_ep_size))
-            buffer.connect_ranks(ranks_to_connect)
+        state = NixlEPAll2AllManager._buffer
+        target_ep_size = self.world_size
+
+        # Scale-up can safely connect standby ranks while leaving them masked.
+        # Scale-down must not disconnect active ranks until commit.
+        if target_ep_size > state.connected_ep_size:
+            self._connect_to_ep_size(target_ep_size, make_active=False)
+
+    def commit_staged_state(self) -> None:
+        """Commit staged NIXL EP state to the active communication set."""
+        with NixlEPAll2AllManager._lock:
+            assert NixlEPAll2AllManager._buffer is not None
+            state = NixlEPAll2AllManager._buffer
+            target_ep_size = self.world_size
+
+            if target_ep_size < state.connected_ep_size:
+                self._disconnect_to_ep_size(target_ep_size)
+            elif target_ep_size > state.connected_ep_size:
+                self._connect_to_ep_size(target_ep_size, make_active=True)
+
+            self._unmask_connected_ranks(target_ep_size)
+
+    def _ensure_ep_size(self, *, stage: bool) -> None:
+        if stage:
+            self._stage_ep_size()
         else:
-            ranks_to_disconnect = current_ranks[new_ep_size:]
-            buffer.disconnect_ranks(ranks_to_disconnect)
-        NixlEPAll2AllManager._buffer = (buffer, new_ep_size)
+            self.commit_staged_state()
 
     def get_handle(self, kwargs):
         with NixlEPAll2AllManager._lock:
-            if (
-                NixlEPAll2AllManager._buffer is not None
-                and NixlEPAll2AllManager._buffer[1] == self.cpu_group.size()
-            ):
-                return NixlEPAll2AllManager._buffer[0]
-
-            num_experts_per_rank = (
-                kwargs["num_global_experts"] // kwargs["num_ep_ranks"]
-            )
-            nixl_kwargs = dict(
-                max_num_tokens_per_dp_rank=kwargs["max_num_tokens_per_dp_rank"],
-                token_hidden_size=kwargs["token_hidden_size"],
-                num_experts_per_rank=num_experts_per_rank,
-            )
-            if NixlEPAll2AllManager._buffer is None:
-                self._init_buffer(**nixl_kwargs)
+            stage = bool(kwargs.get("stage", False))
+            state = NixlEPAll2AllManager._buffer
+            if state is None:
+                assert not stage, (
+                    "NIXL EP staged initialization requires an existing buffer"
+                )
+                max_num_tokens_per_dp_rank = kwargs["max_num_tokens_per_dp_rank"]
+                num_experts_per_rank = (
+                    kwargs["num_global_experts"] // kwargs["num_ep_ranks"]
+                )
+                self._init_buffer(
+                    max_num_tokens_per_dp_rank=max_num_tokens_per_dp_rank,
+                    token_hidden_size=kwargs["token_hidden_size"],
+                    num_experts_per_rank=num_experts_per_rank,
+                )
             else:
-                self._update_buffer()
+                self._ensure_ep_size(stage=stage)
 
             assert NixlEPAll2AllManager._buffer is not None
-            handle = NixlEPAll2AllManager._buffer[0]
+            handle = NixlEPAll2AllManager._buffer.buffer
             return handle
 
     def dispatch(
@@ -547,7 +495,7 @@ def destroy(self):
         # NOTE(yongji): NIXLEPAll2AllManager instance is recreated during
         # scale-up/down, so we cannot destroy the persistent buffer here.
         assert NixlEPAll2AllManager._buffer is not None
-        buffer = NixlEPAll2AllManager._buffer[0]
+        buffer = NixlEPAll2AllManager._buffer.buffer
         buffer.set_tcp_store_group(None)
 
     # NIXL EP uses RDMA so no SMs are used for communication
@@ -601,15 +549,18 @@ def initialize(
             CustomCommunicator,
         )
 
-        dp_config = MnnvlConfig(
-            comm_backend=CustomCommunicator(get_dp_group().cpu_group),
+        # MNNVL workspace is allocated per rank in the comm_backend's group; the
+        # flashinfer kernel asserts workspace.size(0) == moe_ep_size, so the backend
+        # must span the EP group (= DP*PCP*TP), not the DP group.
+        ep_config = MnnvlConfig(
+            comm_backend=CustomCommunicator(self.cpu_group),
             fabric_page_size=1 << 29,  # 512MB
             allocation_granularity=0,  # Auto-detect
         )
 
-        self.workspace_tensor = MnnvlMoe.get_moe_workspaces(self.mapping, dp_config)
+        self.workspace_tensor = MnnvlMoe.get_moe_workspaces(self.mapping, ep_config)
         self.prepare_workspace_tensor = MnnvlMoe.get_moe_prepare_workspace(
-            self.mapping, dp_config
+            self.mapping, ep_config
         )
 
         self.world_size = world_size
@@ -683,6 +634,10 @@ def __init__(self, cpu_group):
         self.initialized = False
         self.moe_alltoall: MoeAlltoAll | None = None
         self.mapping = None
+        self.workspace_size = 0
+        self.max_num_tokens = 0
+        self.top_k = 0
+        self.num_experts = 0
 
     def initialize(
         self,
@@ -690,10 +645,57 @@ def initialize(
         top_k: int,
         num_experts: int,
         hidden_size: int,
+        dispatch_dtype_bytes_per_elem: int = 0,
+        dispatch_scale_bytes_per_token: int = 0,
     ):
-        """Initialize the MoeAlltoAll workspace."""
+        """Initialize (or grow) the MoeAlltoAll workspace."""
+        if dispatch_dtype_bytes_per_elem == 0:
+            hidden_bytes = hidden_size // 2
+        else:
+            hidden_bytes = hidden_size * dispatch_dtype_bytes_per_elem
+        total_dispatch_payload_size_per_token = (
+            hidden_bytes
+            + dispatch_scale_bytes_per_token
+            + top_k * 4  # int32 topks ids
+            + top_k * 4  # float32 topk weights
+        )
+        combine_payload_size_per_token = hidden_size * 2  # bf16 hidden states
+        needed_workspace_size = moe_a2a_get_workspace_size_per_rank(
+            ep_size=self.world_size,
+            max_num_tokens=max_num_tokens,
+            total_dispatch_payload_size_per_token=total_dispatch_payload_size_per_token,
+            combine_payload_size_per_token=combine_payload_size_per_token,
+        )
+        # workspace_size and max_num_tokens are kernel-side max-bounds, so
+        # heterogeneous MoE layers (e.g. NVFP4 base + bf16 MTP head) only
+        # need the shared workspace grown to the union. top_k and num_experts
+        # must match across layers: top_k is a strict-equality assert at
+        # dispatch (FlashInfer csrc/trtllm_moe_alltoall.cu), and num_experts
+        # feeds the expert-to-rank routing math, so any mismatch would crash
+        # or silently corrupt routing. All ranks see the same MoE layers in
+        # the same order with identical shapes, so the skip / rebuild
+        # branches are taken consistently across ranks.
         if self.initialized:
-            return
+            assert top_k == self.top_k, (
+                "FlashInfer one-sided MoeAlltoAll does not support "
+                f"heterogeneous top_k across MoE layers (got {top_k}, "
+                f"was built with {self.top_k})"
+            )
+            assert num_experts == self.num_experts, (
+                "FlashInfer one-sided MoeAlltoAll does not support "
+                f"heterogeneous num_experts across MoE layers (got "
+                f"{num_experts}, was built with {self.num_experts})"
+            )
+            if (
+                needed_workspace_size <= self.workspace_size
+                and max_num_tokens <= self.max_num_tokens
+            ):
+                return
+
+        self.workspace_size = max(self.workspace_size, needed_workspace_size)
+        self.max_num_tokens = max(self.max_num_tokens, max_num_tokens)
+        self.top_k = top_k
+        self.num_experts = num_experts
 
         self.cleanup()
         gpus_per_node = torch.accelerator.device_count()
@@ -714,37 +716,23 @@ def initialize(
             CustomCommunicator,
         )
 
-        dp_config = MnnvlConfig(
-            comm_backend=CustomCommunicator(get_dp_group().cpu_group),
-        )
-        total_dispatch_payload_size_per_token = (
-            hidden_size // 2  # nvfp4 hidden states
-            + hidden_size // 16  # fp8 scaling factors
-            + top_k * 4  # int32 topks ids
-            + top_k * 4  # float32 topk weights
-        )
-        combine_payload_size_per_token = hidden_size * 2  # bf16 hidden states
-        self.workspace_size = moe_a2a_get_workspace_size_per_rank(
-            ep_size=self.world_size,
-            max_num_tokens=max_num_tokens,
-            total_dispatch_payload_size_per_token=total_dispatch_payload_size_per_token,
-            combine_payload_size_per_token=combine_payload_size_per_token,
+        # MNNVL workspace is allocated per rank in the comm_backend's group; the
+        # flashinfer kernel asserts workspace.size(0) == moe_ep_size, so the backend
+        # must span the EP group (= DP*PCP*TP), not the DP group.
+        ep_config = MnnvlConfig(
+            comm_backend=CustomCommunicator(self.cpu_group),
         )
 
         self.moe_alltoall = MoeAlltoAll(
             mapping=self.mapping,
-            max_num_tokens=max_num_tokens,
-            top_k=top_k,
-            num_experts=num_experts,
+            max_num_tokens=self.max_num_tokens,
+            top_k=self.top_k,
+            num_experts=self.num_experts,
             workspace_size_per_rank=self.workspace_size,
-            mnnvl_config=dp_config,
+            mnnvl_config=ep_config,
         )
 
         self.gpus_per_node = gpus_per_node
-        self.max_num_tokens = max_num_tokens
-        self.top_k = top_k
-        self.num_experts = num_experts
-        self.hidden_size = hidden_size
         self.initialized = True
 
         logger.info(
@@ -752,7 +740,10 @@ def initialize(
             self.rank,
             self.world_size,
         )
-        dist.barrier()
+        # Scope barrier to the EP group: with PP, different EP groups can
+        # rebuild a different number of times if their MoE layers have
+        # different shape sequences, so a world-level barrier would deadlock.
+        dist.barrier(group=self.cpu_group)
 
     def get_handle(self, kwargs):
         return self
diff --git a/vllm/distributed/device_communicators/all_reduce_utils.py b/vllm/distributed/device_communicators/all_reduce_utils.py
index 108afa195f63..cebf2c49b44e 100644
--- a/vllm/distributed/device_communicators/all_reduce_utils.py
+++ b/vllm/distributed/device_communicators/all_reduce_utils.py
@@ -19,8 +19,8 @@
 import vllm.envs as envs
 from vllm.distributed.device_communicators.cuda_wrapper import CudaRTLibrary
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 from vllm.utils.system_utils import update_environment_variables
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 logger = init_logger(__name__)
 
@@ -320,7 +320,7 @@ def gpu_p2p_access_check(src: int, tgt: int) -> bool:
 
     is_distributed = dist.is_initialized()
 
-    num_dev = cuda_device_count_stateless()
+    num_dev = current_platform.device_count()
     cuda_visible_devices = envs.CUDA_VISIBLE_DEVICES
     if cuda_visible_devices is None:
         cuda_visible_devices = ",".join(str(i) for i in range(num_dev))
diff --git a/vllm/distributed/device_communicators/base_device_communicator.py b/vllm/distributed/device_communicators/base_device_communicator.py
index 2125f7381fe2..0b4b81f93bb4 100644
--- a/vllm/distributed/device_communicators/base_device_communicator.py
+++ b/vllm/distributed/device_communicators/base_device_communicator.py
@@ -7,6 +7,8 @@
 import torch.distributed as dist
 from torch.distributed import ProcessGroup
 
+from vllm.utils import is_moe_layer
+
 
 class Cache:
     def __init__(self):
@@ -317,16 +319,7 @@ def prepare_communication_buffer_for_model(self, model: torch.nn.Module) -> None
         if not self.is_ep_communicator:
             return
 
-        moe_modules = [
-            module
-            for module in model.modules()
-            # TODO(bnell): Should use isinstance but can't.  Maybe search for
-            # presence of quant_method.maybe_init_modular_kernel?
-            if (
-                module.__class__.__name__ == "FusedMoE"
-                or module.__class__.__name__ == "SharedFusedMoE"
-            )
-        ]
+        moe_modules = [module for module in model.modules() if is_moe_layer(module)]
         for module in moe_modules:
             module.maybe_init_modular_kernel()
 
diff --git a/vllm/distributed/device_communicators/cpu_communicator.py b/vllm/distributed/device_communicators/cpu_communicator.py
index 2bce5faa8b66..067cdad7348a 100644
--- a/vllm/distributed/device_communicators/cpu_communicator.py
+++ b/vllm/distributed/device_communicators/cpu_communicator.py
@@ -45,19 +45,23 @@ def __init__(
                 unique_name,
             )
 
+        # send/recv tensor_dict is only supported through the SHM communicator backend
+        self.supports_tensor_dict = isinstance(self.dist_module, _CPUSHMDistributed)
+
         if self.use_all2all:
-            if self.all2all_backend != "naive":  # type: ignore[has-type]
+            if self.all2all_backend not in (
+                "naive",
+                "allgather_reducescatter",
+            ):  # type: ignore[has-type]
                 logger.warning(
                     "`%s` all2all manager is not supported on CPU. "
-                    "Falling back to `naive` all2all manager for CPU.",
+                    "Falling back to `allgather_reducescatter` manager.",
                     self.all2all_backend,  # type: ignore[has-type]
                 )
-                self.all2all_backend = "naive"
-            if self.all2all_backend == "naive":
-                from .all2all import NaiveAll2AllManager
+            from .all2all import AgRsAll2AllManager
 
-                self.all2all_manager = NaiveAll2AllManager(self.cpu_group)
-                logger.info("Using naive all2all manager.")
+            self.all2all_manager = AgRsAll2AllManager(self.cpu_group)
+            logger.info("Using allgather_reducescatter all2all manager.")
 
     def _all_group_ranks_share_shm_group_name(self) -> bool:
         """
@@ -143,12 +147,22 @@ def send_tensor_dict(
         tensor_dict: dict[str, torch.Tensor | Any],
         dst: int,
     ) -> None:
+        if not self.supports_tensor_dict:
+            raise NotImplementedError(
+                "CpuCommunicator does not support tensor dict fastpath with "
+                "torch.distributed backend."
+            )
         return self.dist_module.send_tensor_dict(tensor_dict, dst)
 
     def recv_tensor_dict(
         self,
         src: int,
     ) -> dict[str, torch.Tensor | Any]:
+        if not self.supports_tensor_dict:
+            raise NotImplementedError(
+                "CpuCommunicator does not support tensor dict fastpath with "
+                "torch.distributed backend."
+            )
         return self.dist_module.recv_tensor_dict(src)
 
     def dispatch_router_logits(
diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py
index 4550bdb25629..ee81bc20f3c2 100644
--- a/vllm/distributed/device_communicators/cuda_communicator.py
+++ b/vllm/distributed/device_communicators/cuda_communicator.py
@@ -7,6 +7,7 @@
 
 import vllm.envs as envs
 from vllm.distributed.device_communicators.all_reduce_utils import (
+    NCCL_SYMM_MEM_ALL_REDUCE_CONFIG,
     should_nccl_symm_mem_allreduce,
 )
 from vllm.distributed.device_communicators.pynccl import register_nccl_symmetric_ops
@@ -114,14 +115,11 @@ def __init__(
                 # currently be an MI300 series.
                 self.qr_comm = QuickAllReduce(group=self.cpu_group, device=self.device)
 
-        if self.use_all2all:
-            if self.all2all_backend == "naive":
-                from .all2all import NaiveAll2AllManager
+        if self.world_size > 1:
+            self._log_all_reduce_backend_selection()
 
-                self.all2all_manager = NaiveAll2AllManager(
-                    self.cpu_group, tcp_store_group
-                )
-            elif self.all2all_backend == "allgather_reducescatter":
+        if self.use_all2all:
+            if self.all2all_backend in ("naive", "allgather_reducescatter"):
                 from .all2all import AgRsAll2AllManager
 
                 self.all2all_manager = AgRsAll2AllManager(
@@ -177,6 +175,69 @@ def __init__(
                 scope="global",
             )
 
+    def _log_all_reduce_backend_selection(self) -> None:
+        """Log the all-reduce backends that are active for this group.
+
+        The dispatch chain in ``all_reduce`` tries backends in this order and
+        falls through to the next one if the current backend rejects the
+        input (size/dtype gates) or is disabled. The list of "enabled"
+        backends below is the subset of potential backends that may be
+        chosen at dispatch time for this group; the actual per-call choice
+        depends on the input tensor.
+        """
+        all_potential_ar_backends = [
+            "NCCL_SYMM_MEM",
+            "QUICK_REDUCE",
+            "FLASHINFER",
+            "CUSTOM",
+            "SYMM_MEM",
+            "PYNCCL",
+        ]
+        enabled_ar_backends: list[str] = []
+        # Mirror the static preconditions of `should_nccl_symm_mem_allreduce`:
+        # VLLM_BATCH_INVARIANT off, NCCL symm mem enabled, world_size meets
+        # min_world_size, and world_size either has a tuned entry in
+        # `custom_ar_preferred_ranges` or is greater than
+        # `always_use_above_world_size`. World sizes that fail the latter (e.g.
+        # 5/6/7 with the default config) never dispatch NCCL symm mem
+        # regardless of input. The per-tensor-size check inside the function
+        # stays as a runtime decision.
+        nccl_symm_ws_ok = self.world_size >= NCCL_SYMM_MEM_ALL_REDUCE_CONFIG[
+            "min_world_size"
+        ] and (
+            self.world_size
+            in NCCL_SYMM_MEM_ALL_REDUCE_CONFIG["custom_ar_preferred_ranges"]
+            or self.world_size
+            > NCCL_SYMM_MEM_ALL_REDUCE_CONFIG["always_use_above_world_size"]
+        )
+        if (
+            self.pynccl_comm is not None
+            and not self.pynccl_comm.disabled
+            and is_symmetric_memory_enabled()
+            and not envs.VLLM_BATCH_INVARIANT
+            and nccl_symm_ws_ok
+        ):
+            enabled_ar_backends.append("NCCL_SYMM_MEM")
+        if self.qr_comm is not None and not self.qr_comm.disabled:
+            enabled_ar_backends.append("QUICK_REDUCE")
+        if self.fi_ar_comm is not None and not self.fi_ar_comm.disabled:
+            enabled_ar_backends.append("FLASHINFER")
+        if self.ca_comm is not None and not self.ca_comm.disabled:
+            enabled_ar_backends.append("CUSTOM")
+        if self.symm_mem_comm is not None and not self.symm_mem_comm.disabled:
+            enabled_ar_backends.append("SYMM_MEM")
+        if self.pynccl_comm is not None and not self.pynccl_comm.disabled:
+            enabled_ar_backends.append("PYNCCL")
+
+        logger.info_once(
+            "Using %s all-reduce backends (in dispatch order) for group "
+            "'%s' out of potential backends: %s.",
+            "[" + ", ".join(f"'{b}'" for b in enabled_ar_backends) + "]",
+            self.unique_name or "<unnamed>",
+            "[" + ", ".join(f"'{b}'" for b in all_potential_ar_backends) + "]",
+            scope="global",
+        )
+
     def all_reduce(self, input_):
         # since currently we perform copy input -> symm_input -> out-of-place AR
         # return symm_output, we don't need to check if input is symmetric
@@ -276,7 +337,7 @@ def reduce_scatterv(
         input_tensor = input_.movedim(0, dim).contiguous()
 
         if sizes is not None:
-            assert len(sizes) == world_size
+            assert len(sizes) == world_size, f"{len(sizes)} == {world_size}"
             assert input_tensor.shape[0] == sum(sizes)
             chunk_size = sizes[self.rank_in_group]
         else:
diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py
index 02591805a796..65a196264684 100644
--- a/vllm/distributed/device_communicators/custom_all_reduce.py
+++ b/vllm/distributed/device_communicators/custom_all_reduce.py
@@ -17,7 +17,6 @@
 from vllm.distributed.parallel_state import in_the_same_node_as
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 try:
     ops.meta_size()
@@ -135,7 +134,7 @@ def __init__(
         if cuda_visible_devices:
             device_ids = list(map(int, cuda_visible_devices.split(",")))
         else:
-            device_ids = list(range(cuda_device_count_stateless()))
+            device_ids = list(range(current_platform.device_count()))
 
         physical_device_id = device_ids[device.index]
         tensor = torch.tensor([physical_device_id], dtype=torch.int, device="cpu")
diff --git a/vllm/distributed/device_communicators/flashinfer_all_reduce.py b/vllm/distributed/device_communicators/flashinfer_all_reduce.py
index a65789a28338..2594c0cf160a 100644
--- a/vllm/distributed/device_communicators/flashinfer_all_reduce.py
+++ b/vllm/distributed/device_communicators/flashinfer_all_reduce.py
@@ -19,6 +19,8 @@
 
 logger = init_logger(__name__)
 
+# The empirical value for small batch
+PDL_ADVANCE_LAUNCH_TOKENS = 16
 
 fi_ar_available = False
 try:
@@ -312,7 +314,7 @@ def should_use_fi_ar(self, input_tensor: torch.Tensor) -> bool:
         return self._ensure_workspace(hidden_dim, input_tensor.dtype)
 
     def all_reduce(self, input_tensor: torch.Tensor) -> torch.Tensor:
-        _, hidden_dim = input_tensor.shape
+        num_tokens, hidden_dim = input_tensor.shape
         workspace = get_fi_ar_workspace(
             world_size=self.world_size,
             rank=self.rank,
@@ -325,6 +327,8 @@ def all_reduce(self, input_tensor: torch.Tensor) -> torch.Tensor:
             input=input_tensor,
             workspace=workspace,
             pattern=flashinfer_comm.AllReduceFusionPattern.kAllReduce,
+            launch_with_pdl=True,
+            trigger_completion_at_end=num_tokens > PDL_ADVANCE_LAUNCH_TOKENS,
         )
 
     def destroy(self):
diff --git a/vllm/distributed/device_communicators/pynccl.py b/vllm/distributed/device_communicators/pynccl.py
index 6ac3b9ea3c7c..9f305c718f9d 100644
--- a/vllm/distributed/device_communicators/pynccl.py
+++ b/vllm/distributed/device_communicators/pynccl.py
@@ -3,6 +3,8 @@
 
 
 # ===================== import region =====================
+import threading
+
 import torch
 import torch.distributed as dist
 from torch.distributed import ProcessGroup, ReduceOp
@@ -108,9 +110,7 @@ def __init__(
         if self.rank == 0:
             # get the unique id from NCCL
             self.unique_id = self.nccl.ncclGetUniqueId()
-            logger.info_once(
-                "vLLM is using nccl==%s", self.nccl.ncclGetVersion(), scope="local"
-            )
+            logger.info_once("vLLM is using nccl==%s", self.nccl.ncclGetVersion())
         else:
             # construct an empty unique id
             self.unique_id = ncclUniqueId()
@@ -147,8 +147,19 @@ def __init__(
 
     def destroy(self):
         if self.available and not self.disabled:
-            with torch.accelerator.device_index(self.device.index):
-                self.nccl.ncclCommDestroy(self.comm)
+            # ncclCommAbort can block until all CUDA graphs that
+            # captured NCCL ops on this comm are destroyed — and
+            # those graphs are released later in this same main-
+            # thread teardown, so a direct call here self-deadlocks.
+            # Run it in a daemon thread with a timeout: the main
+            # thread proceeds, the graphs drop, and the abort returns.
+            def _abort():
+                with torch.accelerator.device_index(self.device.index):
+                    self.nccl.ncclCommAbort(self.comm)
+
+            abort_thread = threading.Thread(target=_abort, daemon=True)
+            abort_thread.start()
+            abort_thread.join(timeout=5.0)
             self.available = False
             self.disabled = True
 
diff --git a/vllm/distributed/device_communicators/pynccl_wrapper.py b/vllm/distributed/device_communicators/pynccl_wrapper.py
index 78b3328f48d9..5ca8cc7c77f4 100644
--- a/vllm/distributed/device_communicators/pynccl_wrapper.py
+++ b/vllm/distributed/device_communicators/pynccl_wrapper.py
@@ -23,6 +23,7 @@
 # variable in the code.
 
 import ctypes
+import functools
 import platform
 from dataclasses import dataclass
 from typing import Any
@@ -75,26 +76,34 @@ class ncclDataTypeEnum:
     ncclFloat8e4m3 = 10
     ncclNumTypes = 11
 
+    @classmethod
+    @functools.lru_cache(maxsize=1)
+    def _torch_to_nccl_map(cls) -> dict[torch.dtype, int]:
+        return {
+            torch.int8: cls.ncclInt8,
+            torch.uint8: cls.ncclUint8,
+            torch.int32: cls.ncclInt32,
+            torch.int64: cls.ncclInt64,
+            torch.float16: cls.ncclFloat16,
+            torch.float32: cls.ncclFloat32,
+            torch.float64: cls.ncclFloat64,
+            torch.bfloat16: cls.ncclBfloat16,
+            current_platform.fp8_dtype(): cls.ncclFloat8e4m3,
+        }
+
+    @classmethod
+    def supports_torch_dtype(cls, dtype: torch.dtype) -> bool:
+        return dtype in cls._torch_to_nccl_map()
+
+    @classmethod
+    def try_from_torch(cls, dtype: torch.dtype) -> int | None:
+        return cls._torch_to_nccl_map().get(dtype)
+
     @classmethod
     def from_torch(cls, dtype: torch.dtype) -> int:
-        if dtype == torch.int8:
-            return cls.ncclInt8
-        if dtype == torch.uint8:
-            return cls.ncclUint8
-        if dtype == torch.int32:
-            return cls.ncclInt32
-        if dtype == torch.int64:
-            return cls.ncclInt64
-        if dtype == torch.float16:
-            return cls.ncclFloat16
-        if dtype == torch.float32:
-            return cls.ncclFloat32
-        if dtype == torch.float64:
-            return cls.ncclFloat64
-        if dtype == torch.bfloat16:
-            return cls.ncclBfloat16
-        if dtype == current_platform.fp8_dtype():
-            return cls.ncclFloat8e4m3
+        nccl_dtype = cls.try_from_torch(dtype)
+        if nccl_dtype is not None:
+            return nccl_dtype
         raise ValueError(
             f"Unsupported dtype {dtype}: should be one of "
             f"int8, uint8, int32, int64, float16, float32, float64, bfloat16,"
@@ -281,6 +290,12 @@ class NCCLLibrary:
         # it is better not to call it at all.
         # ncclResult_t  ncclCommDestroy(ncclComm_t comm);
         Function("ncclCommDestroy", ncclResult_t, [ncclComm_t]),
+        # ncclCommAbort frees resources associated with the communicator
+        # without requiring a collective synchronization. Unlike
+        # ncclCommDestroy, it is safe to call during an uncoordinated
+        # shutdown when peer ranks may already be gone.
+        # ncclResult_t  ncclCommAbort(ncclComm_t comm);
+        Function("ncclCommAbort", ncclResult_t, [ncclComm_t]),
         # ncclResult_t ncclGroupStart();
         Function("ncclGroupStart", ncclResult_t, []),
         # ncclResult_t ncclGroupEnd();
@@ -539,6 +554,9 @@ def ncclBroadcast(
     def ncclCommDestroy(self, comm: ncclComm_t) -> None:
         self.NCCL_CHECK(self._funcs["ncclCommDestroy"](comm))
 
+    def ncclCommAbort(self, comm: ncclComm_t) -> None:
+        self.NCCL_CHECK(self._funcs["ncclCommAbort"](comm))
+
     def ncclGroupStart(self) -> None:
         self.NCCL_CHECK(self._funcs["ncclGroupStart"]())
 
diff --git a/vllm/distributed/device_communicators/quick_all_reduce.py b/vllm/distributed/device_communicators/quick_all_reduce.py
index 7670ec134b53..9c9d39a91a97 100644
--- a/vllm/distributed/device_communicators/quick_all_reduce.py
+++ b/vllm/distributed/device_communicators/quick_all_reduce.py
@@ -13,7 +13,6 @@
 from vllm.distributed.parallel_state import in_the_same_node_as
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import cuda_device_count_stateless
 
 logger = init_logger(__name__)
 
@@ -40,7 +39,8 @@ class QuickReduceRegime(Enum):
     NONE = 4
 
 
-MB = 1024 * 1024
+KB = 1024
+MB = 1024 * KB
 
 
 class QuickAllReduce:
@@ -137,7 +137,7 @@ def __init__(self, group: ProcessGroup, device: int | str | torch.device) -> Non
         if cuda_visible_devices:
             device_ids = list(map(int, cuda_visible_devices.split(",")))
         else:
-            device_ids = list(range(cuda_device_count_stateless()))
+            device_ids = list(range(current_platform.device_count()))
         physical_device_id = device_ids[device.index]
         tensor = torch.tensor([physical_device_id], dtype=torch.int, device="cpu")
         gather_list = [
@@ -184,6 +184,7 @@ def init_quick_all_reduce(self):
             )
             return
         self.qr_quant_level = QuickReduceRegime[regime_str]
+        self.qr_quantization_min_size = self._get_qr_quantization_min_size()
         vllm_config = get_current_vllm_config_or_none()
         if (
             vllm_config is not None
@@ -216,11 +217,56 @@ def init_quick_all_reduce(self):
                     "lead to error or degradation to custom allreduce or rccl."
                 )
             qr_max_size = qr_max_size * MB
+        effective_qr_max_size = (
+            qr_max_size if qr_max_size is not None else ops.qr_max_size()
+        )
+        qr_min_size = self._get_qr_min_size(effective_qr_max_size)
         self._ptr = ops.init_custom_qr(self.rank, self.world_size, qr_max_size)
-        self.qr_max_size = qr_max_size if qr_max_size is not None else ops.qr_max_size()
+        self.qr_max_size = effective_qr_max_size
+        self.qr_min_size = qr_min_size
+        if qr_min_size is not None:
+            logger.info(
+                "Custom quick allreduce: min size override = %d MB",
+                qr_min_size // MB,
+            )
+        if self.qr_quantization_min_size is not None:
+            logger.info(
+                "Custom quick allreduce: quantization codec threshold = %d KB",
+                self.qr_quantization_min_size // KB,
+            )
         self.create_shared_buffer()
         self.disabled = False
 
+    @staticmethod
+    def _get_qr_min_size(qr_max_size: int | None) -> int | None:
+        qr_min_size = envs.VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB
+        if qr_min_size is None:
+            return None
+        if qr_min_size < 0:
+            raise ValueError(
+                "VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB must be non-negative, "
+                f"got {qr_min_size}"
+            )
+        qr_min_size *= MB
+        if qr_max_size is not None and qr_min_size > qr_max_size:
+            raise ValueError(
+                "VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB must be less than or "
+                "equal to the effective QuickReduce max size"
+            )
+        return qr_min_size
+
+    @staticmethod
+    def _get_qr_quantization_min_size() -> int | None:
+        quantization_min_size = envs.VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB
+        if quantization_min_size is None:
+            return None
+        if quantization_min_size < 0:
+            raise ValueError(
+                "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB must be "
+                f"non-negative, got {quantization_min_size}"
+            )
+        return quantization_min_size * KB
+
     def _rocm_arch_available(self):
         if not current_platform.is_rocm():
             return False
@@ -262,11 +308,12 @@ def should_quick_allreduce(self, inp: torch.Tensor):
         dtype = inp.dtype
         if self.use_fp16_kernels:
             dtype = torch.float16
-        return (
-            inp_size <= self.qr_max_size
-            and inp_size
-            >= self._QR_MIN_SIZE[(dtype, self.world_size)][self.qr_quant_level.value]
-        )
+        min_size = self.qr_min_size
+        if min_size is None:
+            min_size = self._QR_MIN_SIZE[(dtype, self.world_size)][
+                self.qr_quant_level.value
+            ]
+        return inp_size <= self.qr_max_size and inp_size >= min_size
 
     def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = None):
         """Performs an out-of-place custom quick all reduce."""
@@ -275,10 +322,19 @@ def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = None):
         if out is None:
             out = torch.empty_like(inp)
         ops.qr_all_reduce(
-            self._ptr, inp, out, self.qr_quant_level.value, self.use_fp16_kernels
+            self._ptr, inp, out, self._get_qr_quant_level(inp), self.use_fp16_kernels
         )
         return out
 
+    def _get_qr_quant_level(self, inp: torch.Tensor) -> int:
+        quantization_min_size = self.qr_quantization_min_size
+        if (
+            quantization_min_size is not None
+            and inp.numel() * inp.element_size() < quantization_min_size
+        ):
+            return QuickReduceRegime.FP.value
+        return self.qr_quant_level.value
+
     def close(self):
         if not self.disabled and getattr(self, "_ptr", None):
             if ops is not None:
diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py
index 9c8bf3ad165c..dc7e6d151a48 100644
--- a/vllm/distributed/device_communicators/shm_broadcast.py
+++ b/vllm/distributed/device_communicators/shm_broadcast.py
@@ -38,6 +38,11 @@
     is_valid_ipv6_address,
 )
 
+if envs.VLLM_USE_SPINLOOP_EXT:
+    from vllm.spinloop import spinloop
+
+SPINLOOP_TIMEOUT_SECONDS = 0.1
+
 if TYPE_CHECKING:
     from _typeshed import SizedBuffer
 
@@ -540,13 +545,17 @@ def acquire_write(self, timeout: float | None = None):
         n_warning = 1
         while True:
             with self.buffer.get_metadata(self.current_idx) as metadata_buffer:
-                # Memory fence ensures we see the latest read flags from readers.
-                # Without this, we may read stale flags from our CPU cache and
-                # spin indefinitely even though readers have completed.
-                memory_fence()
-                read_count = sum(metadata_buffer[1:])
-                written_flag = metadata_buffer[0]
-                if written_flag and read_count != self.buffer.n_reader:
+
+                def check():
+                    memory_fence()
+                    read_count = sum(metadata_buffer[1:])
+                    written_flag = metadata_buffer[0]
+                    return not (written_flag and read_count != self.buffer.n_reader)
+
+                if envs.VLLM_USE_SPINLOOP_EXT and not check():
+                    spinloop(metadata_buffer, check, timeout=SPINLOOP_TIMEOUT_SECONDS)
+
+                if not check():
                     # this block is written and not read by all readers
                     # for writers, `self.current_idx` is the next block to write
                     # if this block is not ready to write,
@@ -657,13 +666,21 @@ def acquire_read(
         )
         with self.buffer.get_metadata(self.current_idx) as metadata_buffer:
             while True:
-                # Memory fence ensures we see the latest writes from the writer.
-                # Without this, we may read stale flags from our CPU cache
-                # and spin indefinitely even though writer has updated them.
-                memory_fence()
-                read_flag = metadata_buffer[self.local_reader_rank + 1]
-                written_flag = metadata_buffer[0]
-                if not written_flag or read_flag:
+
+                def check():
+                    memory_fence()
+                    read_flag = metadata_buffer[self.local_reader_rank + 1]
+                    written_flag = metadata_buffer[0]
+                    return not (not written_flag or read_flag)
+
+                if envs.VLLM_USE_SPINLOOP_EXT and not check():
+                    spinloop(
+                        metadata_buffer[0 : self.local_reader_rank + 1],
+                        check,
+                        timeout=SPINLOOP_TIMEOUT_SECONDS,
+                    )
+
+                if not check():
                     # this block is either
                     # (1) not written
                     # (2) already read by this reader
diff --git a/vllm/distributed/device_communicators/symm_mem.py b/vllm/distributed/device_communicators/symm_mem.py
index 3d964c640d3c..8c174602c3c6 100644
--- a/vllm/distributed/device_communicators/symm_mem.py
+++ b/vllm/distributed/device_communicators/symm_mem.py
@@ -121,7 +121,7 @@ def should_use_symm_mem(self, inp: torch.Tensor):
         inp_size = inp.numel() * inp.element_size()
         if inp_size % 4 != 0:
             return False
-        return inp_size < self.max_size
+        return inp_size <= self.max_size
 
     def all_reduce(
         self, inp: torch.Tensor, *, out: torch.Tensor | None = None
diff --git a/vllm/distributed/device_communicators/xpu_communicator.py b/vllm/distributed/device_communicators/xpu_communicator.py
index d2e9e89e535d..1b6ce9e8aae4 100644
--- a/vllm/distributed/device_communicators/xpu_communicator.py
+++ b/vllm/distributed/device_communicators/xpu_communicator.py
@@ -22,14 +22,9 @@ def __init__(
         unique_name: str = "",
     ):
         super().__init__(cpu_group, device, device_group, unique_name)
+        self.ca_comm: None = None
         if self.use_all2all:
-            if self.all2all_backend == "naive":
-                from .all2all import NaiveAll2AllManager
-
-                self.all2all_manager = NaiveAll2AllManager(self.cpu_group)
-                logger.info("Using naive all2all manager.")
-
-            elif self.all2all_backend == "allgather_reducescatter":
+            if self.all2all_backend in ("naive", "allgather_reducescatter"):
                 from .all2all import AgRsAll2AllManager
 
                 self.all2all_manager = AgRsAll2AllManager(self.cpu_group)
@@ -47,9 +42,10 @@ def __init__(
                 self.all2all_manager = AgRsAll2AllManager(self.cpu_group)
                 logger.info("Using AgRs manager on XPU device.")
 
-    def all_reduce(self, input_) -> torch.Tensor:
-        dist.all_reduce(input_, group=self.device_group)
-        return input_
+    def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
+        output = input_.clone()
+        dist.all_reduce(output, group=self.device_group)
+        return output
 
     def reduce_scatter(self, input_: torch.Tensor, dim: int = -1):
         world_size = self.world_size
diff --git a/vllm/distributed/elastic_ep/elastic_execute.py b/vllm/distributed/elastic_ep/elastic_execute.py
index 8b05c58eaec5..2cd6decb3a58 100644
--- a/vllm/distributed/elastic_ep/elastic_execute.py
+++ b/vllm/distributed/elastic_ep/elastic_execute.py
@@ -4,6 +4,8 @@
 import gc
 import weakref
 from collections.abc import Iterable, Sequence
+from dataclasses import replace
+from typing import TYPE_CHECKING
 
 import torch
 import torch.nn as nn
@@ -29,19 +31,30 @@
     get_standby_ep_group,
     pop_standby_groups,
 )
+from vllm.distributed.eplb.eplb_communicator import create_eplb_communicator
 from vllm.distributed.parallel_state import (
     _replace_active_groups,
+    get_eplb_group,
     prepare_communication_buffer_for_model,
 )
 from vllm.distributed.stateless_coordinator import StatelessGroupCoordinator
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe.layer import FusedMoEParallelConfig
+from vllm.model_executor.layers.fused_moe.config import FusedMoEParallelConfig
+from vllm.model_executor.layers.fused_moe.eep_reconfigure import (
+    make_eep_staged_quant_method,
+)
+from vllm.utils import is_moe_layer
 from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
 from vllm.v1.worker.gpu_ubatch_wrapper import UBatchWrapper
 from vllm.v1.worker.workspace import lock_workspace, unlock_workspace
 
 logger = init_logger(__name__)
 
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
+        FusedMoEMethodBase,
+    )
+
 
 def batch_transfer_weights(
     model: nn.Module,
@@ -131,6 +144,7 @@ class ElasticEPScalingExecutor:
     def __init__(self, worker):
         self.worker_ref = weakref.ref(worker)
         self.reconfig_request = None
+        self._staged_moe_quant_methods: dict[nn.Module, FusedMoEMethodBase] = {}
 
     @property
     def worker(self):
@@ -193,6 +207,8 @@ def create_standby_groups(
             )
         if new_dp_size > old_dp_size:
             self._set_eplb_suppressed(True)
+        elif new_dp_size < old_dp_size:
+            self._stage_standby_moe_quant_methods()
 
     def transfer_weights(self, old_dp_size: int, new_dp_size: int) -> None:
         standby_dp_group = get_standby_dp_group()
@@ -259,6 +275,58 @@ def broadcast_expert_mapping(self) -> None:
             src_rank=0,
             device=self.worker.device,
         )
+        # New workers enter load_model after receiving the expert mapping.
+        # Stage replacement MoE kernels before returning to the state machine
+        # so existing ranks can participate in collective EP comm creation.
+        self._stage_standby_moe_quant_methods()
+
+    def _make_eep_moe_config(self, module, dp_group, ep_group):
+        parallel_config = self.worker.vllm_config.parallel_config
+        tp_size = get_tp_group().world_size
+        sp_size = tp_size if parallel_config.use_sequence_parallel_moe else 1
+        moe_parallel_config = FusedMoEParallelConfig.make(
+            tp_size_=tp_size,
+            pcp_size_=get_pcp_group().world_size,
+            dp_size_=dp_group.world_size,
+            sp_size_=sp_size,
+            vllm_parallel_config=parallel_config,
+        )
+        return replace(
+            module.moe_config,
+            num_experts=module.moe_config.num_local_experts * ep_group.world_size,
+            moe_parallel_config=moe_parallel_config,
+        )
+
+    def _stage_standby_moe_quant_methods(self) -> None:
+        standby_dp_group = get_standby_dp_group()
+        standby_ep_group = get_standby_ep_group()
+        model = self.worker.model_runner.get_model()
+        moe_modules = [module for module in model.modules() if is_moe_layer(module)]
+        self._staged_moe_quant_methods.clear()
+        with set_current_vllm_config(self.worker.vllm_config):
+            for module in moe_modules:
+                staged_quant_method = make_eep_staged_quant_method(
+                    module,
+                    self._make_eep_moe_config(
+                        module,
+                        standby_dp_group,
+                        standby_ep_group,
+                    ),
+                )
+                if staged_quant_method is not None:
+                    self._staged_moe_quant_methods[module] = staged_quant_method
+
+    def _commit_staged_moe_quant_methods(self) -> None:
+        model = self.worker.model_runner.get_model()
+        moe_modules = [module for module in model.modules() if is_moe_layer(module)]
+        for module in moe_modules:
+            staged_quant_method = self._staged_moe_quant_methods.pop(module, None)
+            if staged_quant_method is None:
+                continue
+            assert staged_quant_method.moe_kernel is not None
+            module._replace_quant_method(staged_quant_method)
+            staged_quant_method.moe_kernel.prepare_finalize.on_commit()
+        self._staged_moe_quant_methods.clear()
 
     def _release_cuda_graphs(self) -> None:
         if isinstance(self.worker.model_runner.model, CUDAGraphWrapper):
@@ -317,29 +385,20 @@ def switch_and_prepare(self) -> None:
         moe_modules = [
             module
             for module in self.worker.model_runner.model.modules()
-            if (
-                module.__class__.__name__ == "FusedMoE"
-                or module.__class__.__name__ == "SharedFusedMoE"
-            )
+            if is_moe_layer(module)
         ]
         num_local_experts = moe_modules[0].moe_config.num_local_experts
         assert all(
             module.moe_config.num_local_experts == num_local_experts
             for module in moe_modules
         ), "All MoE modules must have the same number of experts"
+        dp_group = get_dp_group()
+        ep_group = get_ep_group()
         for module in moe_modules:
-            module.moe_config.num_experts = num_local_experts * new_ep_size
+            new_moe_config = self._make_eep_moe_config(module, dp_group, ep_group)
+            module.moe_config.num_experts = new_moe_config.num_experts
             module.global_num_experts = module.moe_config.num_experts
-            tp_size = get_tp_group().world_size
-            is_sequence_parallel = parallel_config.use_sequence_parallel_moe
-            sp_size = tp_size if is_sequence_parallel else 1
-            module.moe_parallel_config = FusedMoEParallelConfig.make(
-                tp_size_=tp_size,
-                pcp_size_=get_pcp_group().world_size,
-                dp_size_=get_dp_group().world_size,
-                sp_size_=sp_size,
-                vllm_parallel_config=parallel_config,
-            )
+            module.moe_parallel_config = new_moe_config.moe_parallel_config
             module.moe_config.moe_parallel_config = module.moe_parallel_config
 
         # Update EPLB state
@@ -399,17 +458,24 @@ def switch_and_prepare(self) -> None:
                 eplb_model_state.logical_to_physical_map,
                 eplb_model_state.logical_replica_count,
             )
+            eplb_state._init_should_record_tensor(model)
             model.update_physical_experts_metadata(
                 num_physical_experts=num_physical_experts,
                 num_local_physical_experts=num_local_experts,
             )
-            # Force re-creation of the modular kernel (and all2all manager)
-            # for the new EP size by resetting quant_method to base
+            self._commit_staged_moe_quant_methods()
+            # Legacy modular methods need to be recreated for the new EP size.
             for module in moe_modules:
-                if hasattr(module.quant_method, "old_quant_method"):
-                    module.quant_method = module.quant_method.old_quant_method
-                    module.runner = module._init_runner()
+                if getattr(module.quant_method, "wraps_legacy_quant_method", False):
+                    module._replace_quant_method(module.quant_method.old_quant_method)
             prepare_communication_buffer_for_model(self.worker.model_runner.model)
+
+        eplb_model_state.communicator = create_eplb_communicator(
+            group_coordinator=get_eplb_group(),
+            backend=parallel_config.eplb_config.communicator,
+            expert_weights=model.expert_weights[0],
+        )
+
         if (
             self.worker.vllm_config.compilation_config.mode
             == CompilationMode.STOCK_TORCH_COMPILE
@@ -559,3 +625,45 @@ def receive_expert_mapping(self) -> tuple[torch.Tensor, int, int]:
     def prepare_new_worker(self) -> None:
         with set_current_vllm_config(self.worker.vllm_config):
             prepare_communication_buffer_for_model(self.worker.model_runner.get_model())
+
+    def rewarm_workspace(self) -> None:
+        # Must run on every DP sibling in lockstep: _dummy_run calls
+        # coordinate_batch_across_dp whenever data_parallel_size > 1
+        # (gpu_model_runner.py:3663), which deadlocks if any rank skips it.
+
+        # Save and clear block tables so profile_run/compile_or_warm_up_model
+        # don't write dummy slot mappings into real KV-cache blocks (mirrors
+        # switch_and_prepare's pattern).
+        multi_block_table = self.worker.model_runner.input_batch.block_table
+        saved_block_tables: list[tuple[torch.Tensor, torch.Tensor]] = []
+        for bt in multi_block_table.block_tables:
+            saved_block_tables.append(
+                (bt.block_table.gpu.clone(), bt.block_table.cpu.clone())
+            )
+        multi_block_table.clear()
+
+        # _ensure_workspace_size allocates a fresh tensor on grow, leaving
+        # captured CUDA graphs with stale data pointers; drop graphs before
+        # re-warm so captures realign with the resized buffer.
+        self._release_cuda_graphs()
+        unlock_workspace()
+
+        # Grow the MoE workspace at max_num_tokens.
+        # compile_or_warm_up_model alone only exercises cudagraph-capture
+        # sizes (≤64 tokens for this test) and leaves the workspace at
+        # ~10-14 MB; the post-all-to-all per-rank token count under real
+        # post-reshuffle routing needs hundreds of MB. Use _dummy_run
+        # directly (rather than profile_run) with skip_eplb=True so dummy
+        # routing doesn't pollute the just-rebalanced EPLB stats — same
+        # convention compile_or_warm_up_model itself uses.
+        runner = self.worker.model_runner
+        runner._dummy_run(runner.max_num_tokens, is_profile=True, skip_eplb=True)
+        self.worker.compile_or_warm_up_model()
+
+        lock_workspace()
+
+        for bt, (saved_gpu, saved_cpu) in zip(
+            multi_block_table.block_tables, saved_block_tables
+        ):
+            bt.block_table.gpu.copy_(saved_gpu)
+            bt.block_table.cpu.copy_(saved_cpu)
diff --git a/vllm/distributed/elastic_ep/elastic_state.py b/vllm/distributed/elastic_ep/elastic_state.py
index bace771a2ab6..256efe46a4a4 100644
--- a/vllm/distributed/elastic_ep/elastic_state.py
+++ b/vllm/distributed/elastic_ep/elastic_state.py
@@ -538,6 +538,11 @@ def _eplb_reshuffle(self):
         self.model_executor.collective_rpc(
             "elastic_ep_execute", args=("perform_eplb_reshuffle",)
         )
+        # Reshuffle changes per-rank token routing; the locked MoE workspace
+        # may now be too small. Rewarm covers both new and existing engines.
+        self.model_executor.collective_rpc(
+            "elastic_ep_execute", args=("rewarm_workspace",)
+        )
         assert self.new_dp_group is not None
         if self.new_dp_group.rank() == 0:
             logger.info("[Elastic EP] EPLB reshuffle completed")
diff --git a/vllm/distributed/eplb/async_worker.py b/vllm/distributed/eplb/async_worker.py
index 7cb8805f4117..542606fe7417 100644
--- a/vllm/distributed/eplb/async_worker.py
+++ b/vllm/distributed/eplb/async_worker.py
@@ -4,7 +4,6 @@
 The async worker that transfers experts in the background.
 """
 
-import asyncio
 import threading
 from typing import TYPE_CHECKING
 
@@ -14,7 +13,8 @@
 from vllm.distributed.parallel_state import get_eplb_group
 from vllm.logger import init_logger
 
-from .rebalance_execute import transfer_layer
+from .eplb_utils import CpuGpuEvent
+from .rebalance_execute import AsyncEplbLayerResult, transfer_layer
 
 if TYPE_CHECKING:
     from .eplb_state import EplbModelState, EplbState
@@ -35,21 +35,15 @@ def thread_target() -> None:
         assert device_index is not None
         torch.accelerator.set_device_index(device_index)
         cuda_stream = torch.cuda.Stream(device=device_index)
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
         try:
-            loop.run_until_complete(
-                transfer_run_periodically(
-                    state=state,
-                    eplb_group=eplb_group,
-                    cuda_stream=cuda_stream,
-                    is_profile=is_profile,
-                )
+            transfer_run_periodically(
+                state=state,
+                eplb_group=eplb_group,
+                cuda_stream=cuda_stream,
+                is_profile=is_profile,
             )
         except Exception as exc:  # pragma: no cover - diagnostic path
             logger.exception("async loop error (Rank %d): %s", rank, str(exc))
-        finally:
-            loop.close()
 
     thread = threading.Thread(target=thread_target, daemon=True)
     thread.start()
@@ -60,18 +54,14 @@ def run_rebalance_experts(
     model_state: "EplbModelState",
     eplb_state: "EplbState",
     physical_to_logical_map_cpu: torch.Tensor,
-) -> None:
+    cuda_stream: torch.cuda.Stream,
+) -> torch.Tensor:
     assert model_state.eplb_stats is not None
     eplb_stats = model_state.eplb_stats
 
-    # Wait for the main thread's all-reduce and clone to complete before
-    # accessing the global_expert_load_window tensor.
-    assert model_state.window_ready_event is not None
-    model_state.window_ready_event.wait()
-    model_state.window_ready_event = None
-
     # Move the global expert load window to CPU for computation.
-    global_expert_load_window = eplb_stats.global_expert_load_window.cpu()
+    with torch.cuda.stream(cuda_stream):
+        global_expert_load_window = eplb_stats.global_expert_load_window.cpu()
     # Compute new expert mappings for the model
     new_physical_to_logical_map = eplb_state.policy.rebalance_experts(
         global_expert_load_window,
@@ -83,92 +73,75 @@ def run_rebalance_experts(
     )
     assert new_physical_to_logical_map.device == torch.device("cpu")
 
-    model_state.new_physical_to_logical_map = new_physical_to_logical_map
+    return new_physical_to_logical_map
 
 
-async def transfer_run_periodically(
+def transfer_run_periodically(
     state: "EplbState",
     eplb_group: ProcessGroup,
     cuda_stream: torch.cuda.Stream,
     is_profile: bool = False,
 ) -> None:
     while True:
-        await asyncio.to_thread(state.rearrange_event.wait)
+        state.rearrange_event.wait(stream=cuda_stream)
         logger.info("async worker woke up for EPLB transfer")
 
         assert state.is_async
         for model_state in state.model_states.values():
-            rebalancing_algorithm_executed = False
-            physical_to_logical_map_cpu = None
-            current_num_layers = model_state.model.num_moe_layers
-            while (
-                model_state.rebalanced
-                and model_state.layer_to_transfer < current_num_layers
-            ):
-                if not model_state.ep_buffer_ready and model_state.rebalanced:
-                    # Polling the lock directly in the async thread avoids
-                    # the thread switch overhead of asyncio.to_thread.
-                    # This is typically faster than offloading to a worker thread.
-                    while not model_state.buffer_lock.acquire(blocking=False):
-                        await asyncio.sleep(0)
-                    try:
-                        if model_state.layer_to_transfer >= current_num_layers:
-                            break
-                        if (
-                            not rebalancing_algorithm_executed
-                            or model_state.new_physical_to_logical_map is None
-                        ):
-                            # Move the physical_to_logical_map to CPU
-                            # for rebalancing and transfer_layer.
-                            physical_to_logical_map_cpu = (
-                                model_state.physical_to_logical_map.cpu()
-                            )
-                            run_rebalance_experts(
-                                model_state, state, physical_to_logical_map_cpu
-                            )
-                            rebalancing_algorithm_executed = True
-                            logger.info(
-                                "Async worker computed new indices for model %s",
-                                model_state.model_name,
-                            )
-
-                        assert model_state.new_physical_to_logical_map is not None
-                        assert physical_to_logical_map_cpu is not None
-
-                        layer_idx = model_state.layer_to_transfer
-                        old_layer_indices = physical_to_logical_map_cpu[layer_idx]
-                        new_layer_indices = model_state.new_physical_to_logical_map[
-                            layer_idx
-                        ]
-
-                        # Wait for the main thread to finish consuming the buffer
-                        # before initiating an EPLB transfer on another layer.
-                        if model_state.buffer_consumed_event is not None:
-                            cuda_stream.wait_event(model_state.buffer_consumed_event)
-                            model_state.buffer_consumed_event = None
-
-                        (
-                            model_state.is_unchanged,
-                            model_state.is_received_locally,
-                            model_state.recv_metadata,
-                        ) = await transfer_layer(
-                            old_layer_indices=old_layer_indices,
-                            new_layer_indices=new_layer_indices,
-                            expert_weights=model_state.model.expert_weights[layer_idx],
-                            expert_weights_buffer=model_state.expert_buffer,
-                            ep_group=eplb_group,
-                            is_profile=is_profile,
-                            cuda_stream=cuda_stream,
-                        )
-                        # block the async thread until the transfer to
-                        # the intermediate buffer is complete.
-                        cuda_stream.synchronize()
-                        model_state.ep_buffer_ready = 1
-                    finally:
-                        model_state.buffer_lock.release()
-                else:
-                    if not model_state.rebalanced:
-                        break
-                    await asyncio.sleep(0.001)
-
-        state.rearrange_event.clear()
+            layer_idx = 0
+            # Set the async worker's CUDA stream on the communicator
+            model_state.communicator.set_stream(cuda_stream)
+            num_layers = model_state.model.num_moe_layers
+
+            # Snapshot the physical_to_logical_map (synchronized with
+            # rearrange_event) and copy it to CPU
+            with torch.cuda.stream(cuda_stream):
+                physical_to_logical_map_cpu = model_state.physical_to_logical_map.cpu()
+
+            new_physical_to_logical_map = run_rebalance_experts(
+                model_state, state, physical_to_logical_map_cpu, cuda_stream
+            )
+            logger.info(
+                "Async worker computed new indices for model %s",
+                model_state.model_name,
+            )
+
+            # Execute one EPLB layer transfer per model forward pass. Each iteration
+            # of this loop will copy the new set of expert weights into
+            # model_state.expert_buffer, which will be consumed by the main thread in
+            # move_to_workspace
+            while model_state.rebalanced and layer_idx < num_layers:
+                transfer_metadata = transfer_layer(
+                    old_layer_indices=physical_to_logical_map_cpu[layer_idx],
+                    new_layer_indices=new_physical_to_logical_map[layer_idx],
+                    expert_weights=model_state.model.expert_weights[layer_idx],
+                    expert_weights_buffer=model_state.expert_buffer,
+                    communicator=model_state.communicator,
+                    ep_group=eplb_group,
+                    is_profile=is_profile,
+                    cuda_stream=cuda_stream,
+                )
+
+                # Wait until all writes to expert_buffer have finished before making the
+                # AsyncEplbLayerResult visible to the main thread.
+                cuda_stream.synchronize()
+
+                # This event guarantees that expert_buffer will not be overwritten by
+                # subsequent iterations of this loop until the main thread has consumed
+                # it. Record is called by the main thread after move_from_buffer().
+                consumed_event = CpuGpuEvent()
+
+                model_state.pending_result = AsyncEplbLayerResult(
+                    layer_idx=layer_idx,
+                    new_physical_to_logical_map=new_physical_to_logical_map[layer_idx],
+                    transfer_metadata=transfer_metadata,
+                    consumed_event=consumed_event,
+                )
+
+                # Block this thread until the main thread and main stream
+                # finish copying model_state.expert_buffer into
+                # model_state.model.expert_weights[layer_idx]
+                consumed_event.wait(stream=cuda_stream)
+                logger.debug("Layer %d transfer complete", layer_idx)
+                assert model_state.pending_result is None
+                layer_idx += 1
diff --git a/vllm/distributed/eplb/eplb_communicator.py b/vllm/distributed/eplb/eplb_communicator.py
new file mode 100644
index 000000000000..f8ee90b934fb
--- /dev/null
+++ b/vllm/distributed/eplb/eplb_communicator.py
@@ -0,0 +1,721 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+EPLB communicator implementations and factory.
+"""
+
+import contextlib
+import time
+import uuid
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from datetime import timedelta
+
+import numpy as np
+import torch
+from torch.distributed import (
+    P2POp,
+    ProcessGroup,
+    batch_isend_irecv,
+)
+
+import vllm.distributed.nixl_utils as nixl_utils
+from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
+from vllm.distributed.device_communicators.pynccl_wrapper import (
+    ncclDataTypeEnum,
+)
+from vllm.distributed.parallel_state import (
+    GroupCoordinator,
+    get_pp_group,
+    is_local_first_rank,
+)
+from vllm.distributed.stateless_coordinator import StatelessGroupCoordinator
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+
+def has_nixl() -> bool:
+    """Whether the optional NIXL / RIXL package is available."""
+    return nixl_utils.NixlWrapper is not None
+
+
+class EplbCommunicator(ABC):
+    """Abstract EPLB communicator for expert weight transfers."""
+
+    @abstractmethod
+    def add_send(
+        self,
+        tensors: list[torch.Tensor],
+        dst_rank: int,
+        expert_id: int,
+    ) -> None:
+        pass
+
+    @abstractmethod
+    def add_recv(
+        self,
+        tensors: list[torch.Tensor],
+        src_rank: int,
+        expert_id: int,
+    ) -> None:
+        pass
+
+    @abstractmethod
+    def execute(self, old_indices: np.ndarray | None = None) -> None:
+        pass
+
+    @property
+    def needs_profile_buffer_reservation(self) -> bool:
+        """Whether the profile path must run a dummy collective operation to reserve
+        communication buffers."""
+        return True
+
+    def set_stream(self, cuda_stream: torch.cuda.Stream | None) -> None:
+        self._cuda_stream = cuda_stream
+
+    def _log_initialized(self) -> None:
+        if is_local_first_rank():
+            logger.info("Initialized EPLB communicator: %s.", self.__class__.__name__)
+
+
+class TorchDistNcclEplbCommunicator(EplbCommunicator):
+    """EPLB communicator backed by torch.distributed isend/irecv."""
+
+    def __init__(
+        self,
+        ep_group: ProcessGroup,
+        cuda_stream: torch.cuda.Stream | None = None,
+    ) -> None:
+        self._ep_group = ep_group
+        self._cuda_stream = cuda_stream
+        self._p2p_ops: list[P2POp] = []
+        self._log_initialized()
+
+    def add_send(
+        self,
+        tensors: list[torch.Tensor],
+        dst_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        for tensor in tensors:
+            self._p2p_ops.append(
+                P2POp(
+                    torch.distributed.isend,
+                    tensor,
+                    dst_rank,
+                    self._ep_group,
+                )
+            )
+
+    def add_recv(
+        self,
+        tensors: list[torch.Tensor],
+        src_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        for tensor in tensors:
+            self._p2p_ops.append(
+                P2POp(
+                    torch.distributed.irecv,
+                    tensor,
+                    src_rank,
+                    self._ep_group,
+                )
+            )
+
+    def execute(self, old_indices: np.ndarray | None = None) -> None:
+        if not self._p2p_ops:
+            return
+        try:
+            with torch.cuda.stream(self._cuda_stream):
+                reqs = batch_isend_irecv(self._p2p_ops)
+                for req in reqs:
+                    req.wait()
+        finally:
+            self._p2p_ops.clear()
+
+
+class TorchDistGlooStagedEplbCommunicator(EplbCommunicator):
+    """EPLB communicator using gloo P2P with CPU staging."""
+
+    def __init__(
+        self,
+        cpu_group: ProcessGroup,
+        cuda_stream: torch.cuda.Stream | None = None,
+    ) -> None:
+        self._cpu_group = cpu_group
+        self._cuda_stream = cuda_stream
+        self._ops: list[tuple[str, torch.Tensor, int]] = []
+        self._log_initialized()
+
+    def add_send(
+        self,
+        tensors: list[torch.Tensor],
+        dst_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        for tensor in tensors:
+            self._ops.append(("send", tensor, dst_rank))
+
+    def add_recv(
+        self,
+        tensors: list[torch.Tensor],
+        src_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        for tensor in tensors:
+            self._ops.append(("recv", tensor, src_rank))
+
+    def execute(self, old_indices: np.ndarray | None = None) -> None:
+        if not self._ops:
+            return
+
+        p2p_ops: list[P2POp] = []
+        recv_staging: list[tuple[torch.Tensor, torch.Tensor]] = []
+
+        def build_ops() -> None:
+            for op, tensor, peer_rank in self._ops:
+                if op == "send":
+                    cpu_tensor = tensor.to(device="cpu", non_blocking=True)
+                    p2p_ops.append(
+                        P2POp(
+                            torch.distributed.isend,
+                            cpu_tensor,
+                            peer_rank,
+                            self._cpu_group,
+                        )
+                    )
+                    continue
+                cpu_tensor = torch.empty_like(tensor, device="cpu")
+                p2p_ops.append(
+                    P2POp(
+                        torch.distributed.irecv,
+                        cpu_tensor,
+                        peer_rank,
+                        self._cpu_group,
+                    )
+                )
+                recv_staging.append((tensor, cpu_tensor))
+
+        try:
+            with torch.cuda.stream(self._cuda_stream):
+                build_ops()
+        finally:
+            self._ops.clear()
+
+        # Wait for all D2H copies to finish
+        # before issuing gloo batch_isend_irecv operations.
+        if self._cuda_stream is not None:
+            self._cuda_stream.synchronize()
+        else:
+            torch.cuda.current_stream().synchronize()
+
+        reqs = batch_isend_irecv(p2p_ops)
+        for req in reqs:
+            req.wait()
+
+        if not recv_staging:
+            return
+        with torch.cuda.stream(self._cuda_stream):
+            for dst_tensor, cpu_tensor in recv_staging:
+                dst_tensor.copy_(cpu_tensor, non_blocking=True)
+
+
+class NixlEplbCommunicator(EplbCommunicator):
+    """EPLB communicator backed by NIXL READ transfers."""
+
+    def __init__(
+        self,
+        cpu_group: ProcessGroup,
+        expert_weights: Sequence[torch.Tensor],
+        cuda_stream: torch.cuda.Stream | None = None,
+    ) -> None:
+        assert expert_weights, "NixlEplbCommunicator requires non-empty expert_weights."
+        nixl_wrapper_cls = nixl_utils.NixlWrapper
+        if nixl_wrapper_cls is None:
+            raise RuntimeError("NIXL/ RIXL is unavailable.")
+        self._cpu_group = cpu_group
+        self._cuda_stream = cuda_stream
+        self._world_size = cpu_group.size()
+        self._rank = cpu_group.rank()
+        # expert_id -> weight tensors to pack into the send buffer.
+        self._expert_send_map: dict[int, list[torch.Tensor]] = {}
+        # src_rank -> expert_id -> weight tensors to unpack after transfer.
+        self._recv_map: dict[int, dict[int, list[torch.Tensor]]] = {}
+        self._num_local_experts: int = expert_weights[0].shape[0]
+        self._device = expert_weights[0].device
+        for tensor in expert_weights:
+            assert tensor.device == self._device, (
+                "All local EPLB tensors are expected to be on the same device: "
+                f"expected={self._device}, got={tensor.device}"
+            )
+
+        nixl_agent_config = nixl_utils.nixl_agent_config
+        config = (
+            nixl_agent_config(capture_telemetry=False)
+            if nixl_agent_config is not None
+            else None
+        )
+        self._nixl_wrapper = nixl_wrapper_cls(self._make_agent_name(), config)
+        self._nixl_memory_type = "VRAM"
+        self._registered_desc: object | None = None
+        self._remote_agents: dict[int, str] = {}
+        self._remote_send_meta: dict[int, tuple[int, int]] = {}
+        self._send_buffer: torch.Tensor = torch.empty(0)
+        self._recv_buffer: torch.Tensor = torch.empty(0)
+        self._expert_bytes: int = 0
+
+        self._cuda_device_id = int(self._device.index or 0)
+        self._init_step("buffers", self._init_registered_buffers, expert_weights)
+        self._init_step("agents", self._init_remote_agents)
+        self._init_step("send meta", self._exchange_remote_send_meta)
+        self._log_initialized()
+
+    @property
+    def needs_profile_buffer_reservation(self) -> bool:
+        return False
+
+    @staticmethod
+    def _init_step(name: str, fn: object, *args: object, **kwargs: object) -> None:
+        try:
+            fn(*args, **kwargs)  # type: ignore[operator]
+        except Exception as exc:
+            raise RuntimeError(f"NIXL EPLB init failed: {name}") from exc
+
+    def _make_agent_name(self) -> str:
+        """Build a deployment-unique nixl agent name."""
+        pp_size = get_pp_group().world_size
+        pp_suffix = f"-pp{get_pp_group().rank_in_group}" if pp_size > 1 else ""
+        uid = uuid.uuid4().hex[:8]
+        return f"eplb-{self._rank}{pp_suffix}-{uid}"
+
+    def add_send(
+        self,
+        tensors: list[torch.Tensor],
+        dst_rank: int,
+        expert_id: int,
+    ) -> None:
+        assert dst_rank != self._rank, (
+            "EPLB communicator should not enqueue same-rank sends: "
+            f"rank={self._rank}, dst_rank={dst_rank}"
+        )
+        # An expert sent to multiple peers is packed only once; skip duplicates.
+        if expert_id not in self._expert_send_map:
+            self._expert_send_map[expert_id] = tensors
+
+    def add_recv(
+        self,
+        tensors: list[torch.Tensor],
+        src_rank: int,
+        expert_id: int,
+    ) -> None:
+        assert src_rank != self._rank, (
+            "EPLB communicator should not enqueue same-rank recvs: "
+            f"rank={self._rank}, src_rank={src_rank}"
+        )
+        recv_experts = self._recv_map.setdefault(src_rank, {})
+        if expert_id not in recv_experts:
+            recv_experts[expert_id] = tensors
+
+    def _init_remote_agents(self) -> None:
+        local_metadata = self._nixl_wrapper.get_agent_metadata()
+        gathered_metadata: list[bytes | None] = [None] * self._world_size
+        torch.distributed.all_gather_object(
+            gathered_metadata, local_metadata, group=self._cpu_group
+        )
+        for peer in range(self._world_size):
+            if peer == self._rank:
+                continue
+            peer_metadata = gathered_metadata[peer]
+            assert peer_metadata is not None
+            self._remote_agents[peer] = self._nixl_wrapper.add_remote_agent(
+                peer_metadata
+            )
+
+    def _init_registered_buffers(self, expert_weights: Sequence[torch.Tensor]) -> None:
+        total_bytes = max(sum(t.nbytes for t in expert_weights), 1)
+        assert total_bytes % self._num_local_experts == 0, (
+            f"Number of bytes in moe layer {total_bytes} is not divisible "
+            f"by number of local experts {self._num_local_experts}"
+        )
+        self._expert_bytes = total_bytes // self._num_local_experts
+
+        self._send_buffer = torch.empty(
+            total_bytes, device=self._device, dtype=torch.uint8
+        )
+        self._recv_buffer = torch.empty(
+            total_bytes, device=self._device, dtype=torch.uint8
+        )
+
+        descs = self._nixl_wrapper.get_reg_descs([self._send_buffer, self._recv_buffer])
+        self._nixl_wrapper.register_memory(descs)
+        self._registered_desc = descs
+
+    def _exchange_remote_send_meta(self) -> None:
+        """Exchange send-buffer metadata so each rank can build dynamic
+        descriptors at execute time."""
+        local_meta: tuple[int, int] = (
+            self._send_buffer.data_ptr(),
+            self._cuda_device_id,
+        )
+        gathered_meta: list[tuple[int, int] | None] = [None] * self._world_size
+        torch.distributed.all_gather_object(
+            gathered_meta, local_meta, group=self._cpu_group
+        )
+
+        for peer in self._remote_agents:
+            peer_meta = gathered_meta[peer]
+            assert peer_meta is not None
+            self._remote_send_meta[peer] = peer_meta
+
+    @staticmethod
+    def _pack_send_buffer(
+        in_tensors: list[torch.Tensor],
+        send_buffer: torch.Tensor,
+        byte_offset: int,
+    ) -> None:
+        for tensor in in_tensors:
+            raw = tensor.reshape(-1).view(torch.uint8)
+            if raw.numel() == 0:
+                continue
+            send_buffer[byte_offset : byte_offset + raw.numel()].copy_(
+                raw, non_blocking=True
+            )
+            byte_offset += raw.numel()
+
+    @staticmethod
+    def _unpack_recv_buffer(
+        recv_buffer: torch.Tensor,
+        out_tensors: list[torch.Tensor],
+        byte_offset: int,
+    ) -> None:
+        for tensor in out_tensors:
+            num_bytes = tensor.numel() * tensor.element_size()
+            if num_bytes == 0:
+                continue
+            tensor.reshape(-1).view(torch.uint8).copy_(
+                recv_buffer[byte_offset : byte_offset + num_bytes],
+                non_blocking=True,
+            )
+            byte_offset += num_bytes
+
+    def _wait_for_all_transfers(self, handles: list[int]) -> None:
+        pending = set(handles)
+        while pending:
+            completed: list[int] = []
+            for handle in pending:
+                state = self._nixl_wrapper.check_xfer_state(handle)
+                if state == "DONE":
+                    completed.append(handle)
+                    continue
+                if state != "PROC":
+                    raise RuntimeError(f"NIXL transfer failed with state={state}")
+            for handle in completed:
+                pending.remove(handle)
+            if pending:
+                time.sleep(0.0005)
+
+    def _create_peer_xfer(
+        self,
+        src: int,
+        local_descs: list[tuple[int, int, int]],
+        remote_descs: list[tuple[int, int, int]],
+    ) -> tuple[int, int, int]:
+        """Create a batched xfer for multiple descriptors from one peer.
+
+        Each element in *local_descs* / *remote_descs* is an
+        ``(address, size, device_id)`` tuple.
+
+        Returns ``(local_dlist, remote_dlist, xfer_handle)``.
+        """
+        local_desc = self._nixl_wrapper.get_xfer_descs(
+            local_descs, self._nixl_memory_type
+        )
+        local_handle = self._nixl_wrapper.prep_xfer_dlist(
+            "NIXL_INIT_AGENT",
+            local_desc,
+        )
+
+        remote_desc = self._nixl_wrapper.get_xfer_descs(
+            remote_descs, self._nixl_memory_type
+        )
+        remote_handle = self._nixl_wrapper.prep_xfer_dlist(
+            self._remote_agents[src],
+            remote_desc,
+        )
+
+        indices = list(range(len(local_descs)))
+        xfer_handle = self._nixl_wrapper.make_prepped_xfer(
+            "READ",
+            local_handle,
+            indices,
+            remote_handle,
+            indices,
+        )
+        return (local_handle, remote_handle, xfer_handle)
+
+    def execute(self, old_indices: np.ndarray | None = None) -> None:
+        assert old_indices is not None, (
+            "NixlEplbCommunicator.execute requires old_indices"
+        )
+
+        xfer_entries: list[tuple[int, int, int]] = []
+        try:
+            n = self._num_local_experts
+            rank_experts = old_indices[: self._world_size * n].reshape(
+                self._world_size, n
+            )
+            # Build expert_id -> send slot mapping per rank.
+            expert_to_send_slot: list[dict[int, int]] = [
+                {int(eid): i for i, eid in enumerate(row) if eid != -1}
+                for row in rank_experts
+            ]
+
+            # Phase 1: pack each expert at its slot offset in the send buffer.
+            with torch.cuda.stream(self._cuda_stream):
+                for expert_id, tensors in self._expert_send_map.items():
+                    slot = expert_to_send_slot[self._rank][expert_id]
+                    byte_offset = slot * self._expert_bytes
+                    self._pack_send_buffer(tensors, self._send_buffer, byte_offset)
+
+            # Ensure all packed data is visible in device memory before pulls.
+            if self._cuda_stream is not None:
+                self._cuda_stream.synchronize()
+            else:
+                torch.cuda.current_stream().synchronize()
+            # READ is receiver-initiated; synchronize all ranks before transfer.
+            # We use monitored_barrier so a rank that crashes or exits early
+            # produces a diagnostic timeout instead of a silent hang.
+            torch.distributed.monitored_barrier(
+                group=self._cpu_group,
+                timeout=timedelta(minutes=5),
+            )
+
+            # Phase 2: issue one batched READ per peer.
+            recv_offsets: dict[tuple[int, int], int] = {}
+            recv_offset = 0
+            recv_base = self._recv_buffer.data_ptr()
+            for src in range(self._world_size):
+                if src == self._rank:
+                    continue
+                recv_experts = self._recv_map.get(src)
+                if not recv_experts:
+                    continue
+                expert_ids = list(recv_experts.keys())
+                remote_base, remote_dev = self._remote_send_meta[src]
+                local_descs: list[tuple[int, int, int]] = []
+                remote_descs: list[tuple[int, int, int]] = []
+                for expert_id in expert_ids:
+                    slot = expert_to_send_slot[src][expert_id]
+                    remote_off = slot * self._expert_bytes
+                    recv_offsets[(src, expert_id)] = recv_offset
+                    local_descs.append(
+                        (
+                            recv_base + recv_offset,
+                            self._expert_bytes,
+                            self._cuda_device_id,
+                        )
+                    )
+                    remote_descs.append(
+                        (remote_base + remote_off, self._expert_bytes, remote_dev)
+                    )
+                    recv_offset += self._expert_bytes
+                    assert recv_offset <= self._recv_buffer.nbytes
+                local_h, remote_h, xfer_h = self._create_peer_xfer(
+                    src, local_descs, remote_descs
+                )
+                self._nixl_wrapper.transfer(xfer_h)
+                xfer_entries.append((local_h, remote_h, xfer_h))
+
+            # Phase 3: wait for all in-flight transfers, then unpack.
+            self._wait_for_all_transfers([x[2] for x in xfer_entries])
+
+            with torch.cuda.stream(self._cuda_stream):
+                for (src, expert_id), offset in recv_offsets.items():
+                    self._unpack_recv_buffer(
+                        self._recv_buffer,
+                        self._recv_map[src][expert_id],
+                        offset,
+                    )
+        finally:
+            for local_h, remote_h, xfer_h in xfer_entries:
+                with contextlib.suppress(Exception):
+                    self._nixl_wrapper.release_xfer_handle(xfer_h)
+                with contextlib.suppress(Exception):
+                    self._nixl_wrapper.release_dlist_handle(local_h)
+                with contextlib.suppress(Exception):
+                    self._nixl_wrapper.release_dlist_handle(remote_h)
+            self._expert_send_map.clear()
+            self._recv_map.clear()
+
+    def __del__(self) -> None:
+        try:
+            if self._registered_desc is not None:
+                self._nixl_wrapper.deregister_memory(self._registered_desc)
+                self._registered_desc = None
+            for agent_name in self._remote_agents.values():
+                self._nixl_wrapper.remove_remote_agent(agent_name)
+            self._remote_agents.clear()
+        except Exception as e:
+            logger.warning("Error during NixlEplbCommunicator cleanup: %s", e)
+
+
+class PyNcclEplbCommunicator(EplbCommunicator):
+    """EPLB communicator backed by PyNcclCommunicator using ncclSend/ncclRecv."""
+
+    def __init__(
+        self,
+        pynccl_comm: PyNcclCommunicator,
+        cuda_stream: torch.cuda.Stream | None = None,
+    ) -> None:
+        self._pynccl_comm = pynccl_comm
+        self._cuda_stream = cuda_stream
+        self._group_started = False
+        self._log_initialized()
+
+    def _ensure_group_started(self) -> None:
+        if not self._group_started:
+            self._pynccl_comm.group_start()
+            self._group_started = True
+
+    def add_send(
+        self,
+        tensors: list[torch.Tensor],
+        dst_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        self._ensure_group_started()
+        for tensor in tensors:
+            self._pynccl_comm.send(tensor, dst_rank, stream=self._cuda_stream)
+
+    def add_recv(
+        self,
+        tensors: list[torch.Tensor],
+        src_rank: int,
+        expert_id: int,  # unused by this backend
+    ) -> None:
+        self._ensure_group_started()
+        for tensor in tensors:
+            self._pynccl_comm.recv(tensor, src_rank, stream=self._cuda_stream)
+
+    def execute(self, old_indices: np.ndarray | None = None) -> None:
+        if self._group_started:
+            self._pynccl_comm.group_end()
+            self._group_started = False
+
+
+def create_eplb_communicator(
+    group_coordinator: GroupCoordinator,
+    backend: str | None,
+    expert_weights: Sequence[torch.Tensor],
+) -> EplbCommunicator:
+    """Create an EPLB communicator for the given backend.
+
+    Args:
+        group_coordinator: Process-group coordinator that provides the
+            device and CPU communication groups.
+        backend: Communicator backend name (``"torch_nccl"``,
+            ``"torch_gloo"``, ``"pynccl"``, or ``"nixl"``).
+            Falls back to ``"torch_nccl"`` when *None*.
+            Stateless (elastic EP) groups only support ``"torch_nccl"``
+            and ``"pynccl"``; ``"torch_nccl"`` is silently promoted to
+            ``"pynccl"`` in that case.  When tensors reside on CPU,
+            ``"torch_gloo"`` or ``"torch_nccl"`` are used via the CPU
+            process group.
+        expert_weights: Expert weight tensors from *one* MoE layer.
+            NixlEplbCommunicator pre-allocates send/recv buffers sized
+            to this layer, so all other MoE layers must have the same
+            tensor count, shapes, and dtypes.
+    """
+    # Keep a safe default for callers that have not resolved communicator yet.
+    if backend is None:
+        backend = "torch_nccl"
+
+    tensor_device_type = expert_weights[0].device.type if expert_weights else "cpu"
+    torch_group = (
+        group_coordinator.cpu_group
+        if tensor_device_type == "cpu"
+        else group_coordinator.device_group
+    )
+
+    def _create_pynccl() -> EplbCommunicator:
+        if tensor_device_type == "cpu":
+            raise RuntimeError(
+                "EPLB communicator 'pynccl' supports only cuda-like devices "
+                f"(got {tensor_device_type})."
+            )
+        unsupported_dtypes = sorted(
+            {
+                tensor.dtype
+                for tensor in expert_weights
+                if not ncclDataTypeEnum.supports_torch_dtype(tensor.dtype)
+            },
+            key=str,
+        )
+        if unsupported_dtypes:
+            raise RuntimeError(
+                "EPLB communicator 'pynccl' requested but expert weights contain "
+                "unsupported dtypes: "
+                f"({', '.join(str(dtype) for dtype in unsupported_dtypes)})."
+            )
+
+        device_comm = group_coordinator.device_communicator
+        pynccl_comm = (
+            getattr(device_comm, "pynccl_comm", None)
+            if device_comm is not None
+            else None
+        )
+        if pynccl_comm is None or pynccl_comm.disabled or not pynccl_comm.available:
+            raise RuntimeError("EPLB communicator 'pynccl' requested but unavailable.")
+        try:
+            return PyNcclEplbCommunicator(pynccl_comm=pynccl_comm)
+        except Exception as exc:
+            raise RuntimeError(
+                f"Failed to initialize PyNcclEplbCommunicator ({exc})."
+            ) from exc
+
+    is_stateless = isinstance(group_coordinator, StatelessGroupCoordinator)
+    if is_stateless:
+        if backend not in ("torch_nccl", "pynccl"):
+            raise ValueError(
+                f"Elastic EP requires 'torch_nccl' or 'pynccl' EPLB communicator "
+                f"(got '{backend}')."
+            )
+        if backend == "torch_nccl":
+            logger.warning(
+                "Stateless elastic EP requires PyNCCL backend. "
+                "Forcing EPLB communicator to 'pynccl'."
+            )
+            backend = "pynccl"
+        return _create_pynccl()
+
+    if backend == "nixl":
+        if not has_nixl():
+            raise RuntimeError(
+                "EPLB communicator 'nixl' requested but NIXL is unavailable."
+            )
+        if not (current_platform.is_cuda_alike() and tensor_device_type != "cpu"):
+            raise RuntimeError(
+                "EPLB communicator 'nixl' supports only cuda-like devices "
+                f"(got {tensor_device_type})."
+            )
+        try:
+            return NixlEplbCommunicator(
+                cpu_group=group_coordinator.cpu_group,
+                expert_weights=expert_weights,
+            )
+        except Exception as exc:
+            raise RuntimeError(
+                f"Failed to initialize NixlEplbCommunicator ({exc})."
+            ) from exc
+    elif backend == "torch_gloo":
+        return TorchDistGlooStagedEplbCommunicator(
+            cpu_group=group_coordinator.cpu_group,
+        )
+    elif backend == "torch_nccl":
+        return TorchDistNcclEplbCommunicator(ep_group=torch_group)
+    elif backend == "pynccl":
+        return _create_pynccl()
+    raise ValueError(f"Unknown EPLB communicator backend: {backend}")
diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
index 180c12abc73b..319a5f22c922 100644
--- a/vllm/distributed/eplb/eplb_state.py
+++ b/vllm/distributed/eplb/eplb_state.py
@@ -30,13 +30,13 @@
 from collections.abc import Sequence
 from dataclasses import dataclass
 
-import numpy as np
 import torch
 from torch.distributed import ProcessGroup, all_reduce
 
 from vllm.config import ModelConfig, ParallelConfig
 from vllm.distributed.parallel_state import (
     get_ep_group,
+    get_eplb_group,
     get_node_count,
     in_the_same_node_as,
 )
@@ -46,9 +46,11 @@
 from vllm.model_executor.models.interfaces import MixtureOfExperts
 
 from .async_worker import start_async_worker
+from .eplb_communicator import EplbCommunicator, create_eplb_communicator
+from .eplb_utils import CpuGpuEvent
 from .policy import EPLB_POLICIES, AbstractEplbPolicy, DefaultEplbPolicy
 from .rebalance_execute import (
-    RecvMetadata,
+    AsyncEplbLayerResult,
     move_from_buffer,
     rearrange_expert_weights_inplace,
 )
@@ -172,63 +174,36 @@ class EplbModelState:
     """
     The buffer to store the expert weights during transfer.
     """
-    buffer_lock: threading.Lock
-    """
-    The lock to protect the expert buffer.
-    """
-    buffer_consumed_event: torch.cuda.Event | None
-    """
-    CUDA event recorded after the main thread finishes consuming the buffer.
-    The async worker waits on this before writing to the buffer again.
-    """
-    window_ready_event: torch.cuda.Event | None
-    """
-    CUDA event recorded after all-reduce and clone on the main thread.
-    The async worker waits on this before accessing global_expert_load_window.
-    """
-    ep_buffer_ready: int
-    """
-    The flag indicates whether the expert buffer is ready for transfer.
-    0 or 1.
-    """
-    layer_to_transfer: int
-    """
-    The layer index to transfer in async mode.
-    """
     rebalanced: bool
     """
-    The flag indicates whether the experts rebalance have been computed.
-    """
-    pending_global_ready_check: bool
-    """
-    Whether the async EPLB needs to poll peers for buffer readiness.
+    This flag is only used when running Async EPLB. It is set to True by the main thread
+    after the new expert maps have been computed. This indicates that the async worker
+    should start transferring weights. move_to_workspace sets this flag to False when
+    all weights have been transferred and the new map has been successfully committed.
+
+    rebalanced relies on the GIL to synchronize access between the main thread and
+    the async worker.
     """
     eplb_stats: EplbStats | None
     """
     EPLB stats for the model.
     """
-    is_unchanged: np.ndarray
-    """
-    intermediate variable between `move_to_buffer` and `move_to_workspace`.
-    The size is same as the num of physical experts in the current layer.
-    """
-    is_received_locally: np.ndarray
-    """
-    intermediate variable between `move_to_buffer` and `move_to_workspace`.
-    The size is same as the num of physical experts in the current layer.
-    """
-    recv_metadata: RecvMetadata
-    """
-    intermediate variable between `move_to_buffer` and `move_to_workspace`.
-    """
     cuda_device_index: int | None
     """
     CUDA device index for the async EPLB worker thread.
     """
-    new_physical_to_logical_map: torch.Tensor | None = None
+    communicator: EplbCommunicator
+    """
+    The communicator for expert weight transfers.
     """
-    intermediate variable between `move_to_buffer` and `move_to_workspace`.
-    the size is same as physical_to_logical_map
+    pending_result: AsyncEplbLayerResult | None = None
+    """
+    Set by the async worker after all writes to expert_buffer are done. Consumed
+    and reset to None by the main thread in move_to_workspace() after the contents of
+    expert_buffer have been transferred out. At most one result is pending at a time.
+
+    pending_result relies on the GIL to synchronize access between the main thread and
+    the async worker.
     """
 
 
@@ -272,11 +247,18 @@ def __init__(self, parallel_config: ParallelConfig, device: torch.device):
         Interval for expert rearrangement steps.
         This is a constant and is taken from the config.
         """
+        self.should_record_tensor: torch.Tensor | None = None
+        """
+        Shared scalar bool tensor for all layers.  Every
+        :class:`EplbLayerState` holds a reference to the **same** object so
+        a single ``.fill_()`` updates all layers at once.  Allocated on the
+        first call to :meth:`_init_should_record_tensor`.
+        """
         self.is_async: bool = False
         """
         The flag indicates whether the EPLB is running in async mode.
         """
-        self.rearrange_event = threading.Event()
+        self.rearrange_event: CpuGpuEvent = CpuGpuEvent()
         """
         Event to signal when a new rearrangement is needed for the async thread.
         """
@@ -462,9 +444,15 @@ def add_model(
             logical_to_physical_map,
             logical_replica_count,
         )
-
+        self._init_should_record_tensor(model)
         expert_buffer = [torch.empty_like(w) for w in model.expert_weights[0]]
 
+        communicator = create_eplb_communicator(
+            group_coordinator=get_eplb_group(),
+            backend=self.parallel_config.eplb_config.communicator,
+            expert_weights=model.expert_weights[0],
+        )
+
         model_state = EplbModelState(
             physical_to_logical_map=physical_to_logical_map,
             logical_to_physical_map=logical_to_physical_map,
@@ -474,24 +462,10 @@ def add_model(
             model_name=model_config.model,
             model=model,
             expert_buffer=expert_buffer,
-            buffer_lock=threading.Lock(),
-            buffer_consumed_event=None,
-            window_ready_event=None,
-            ep_buffer_ready=0,
-            layer_to_transfer=0,
             rebalanced=False,
-            pending_global_ready_check=False,
             eplb_stats=None,
-            is_unchanged=np.array([]),
-            is_received_locally=np.array([]),
-            recv_metadata=RecvMetadata(
-                recv_primary_mask=np.array([]),
-                recv_count=0,
-                recv_expert_ids=np.array([]),
-                recv_dst_rows=np.array([]),
-            ),
             cuda_device_index=self.cuda_device_index,
-            new_physical_to_logical_map=None,
+            communicator=communicator,
         )
         self.model_states[model_config.compute_hash()] = model_state
         self.num_valid_physical_experts = model.num_physical_experts
@@ -582,15 +556,18 @@ def step(
 
         # Update the expert load sliding window
         if not is_dummy:
+            should_record = self._should_record_current_step(log_stats=log_stats)
             for eplb_model_state in self.model_states.values():
-                eplb_model_state.expert_load_window[self.expert_load_window_step] = (
-                    eplb_model_state.expert_load_pass.clone()
-                )
-                eplb_model_state.expert_load_pass.zero_()
+                if should_record:
+                    eplb_model_state.expert_load_window[
+                        self.expert_load_window_step
+                    ].copy_(eplb_model_state.expert_load_pass)
+                    eplb_model_state.expert_load_pass.zero_()
 
-            self.expert_load_window_step += 1
-            if self.expert_load_window_step >= self.expert_load_window_size:
-                self.expert_load_window_step = 0
+            if should_record:
+                self.expert_load_window_step += 1
+                if self.expert_load_window_step >= self.expert_load_window_size:
+                    self.expert_load_window_step = 0
 
         # Step the expert rearrangement step
         # Note that even if this is a dummy step, we still increment the
@@ -599,17 +576,17 @@ def step(
         self.expert_rearrangement_step += 1
 
         if self.is_async:
+            # Run _move_to_workspace if all ranks have finished transferring the
+            # new weights to the intermediate buffer
             for eplb_model_state in self.model_states.values():
-                all_ranks_buffer_ready = False
-                if eplb_model_state.pending_global_ready_check:
-                    all_ranks_buffer_ready = self._all_ranks_buffer_ready(
-                        eplb_model_state
-                    )
-                if eplb_model_state.ep_buffer_ready and all_ranks_buffer_ready:
-                    self.move_to_workspace(
+                # rebalanced must remain consistent amongst all ranks otherwise the
+                # all_reduce in _all_ranks_result_ready will hang
+                if eplb_model_state.rebalanced and self._all_ranks_result_ready(
+                    eplb_model_state
+                ):
+                    _move_to_workspace(
                         model_state=eplb_model_state,
-                        ep_group=ep_group,
-                        is_profile=is_profile,
+                        ep_rank=ep_group.rank(),
                     )
 
         if self.expert_rearrangement_step >= self.expert_rearrangement_step_interval:
@@ -617,11 +594,66 @@ def step(
                 eplb_model_state.rebalanced
                 for eplb_model_state in self.model_states.values()
             ):
-                # Still performing asynchronous rearrangement
+                # Still performing asynchronous rearrangement; update
+                # should_record (step > step_interval, so always True) and
+                # bail out before the step counter is reset.
+                self._update_layer_should_record(log_stats=log_stats)
                 return
             self.expert_rearrangement_step = 0
             self.rearrange()
 
+        self._update_layer_should_record(log_stats=log_stats)
+
+    def _should_record_current_step(self, log_stats: bool = False) -> bool:
+        """Return whether expert-load recording should be enabled this step.
+
+        Recording is enabled when we are close to either:
+        1) The next rearrangement step, so the sliding window is ready.
+        2) The next balancedness logging step, when log_stats is enabled.
+        """
+        steps_remaining = (
+            self.expert_rearrangement_step_interval - self.expert_rearrangement_step
+        )
+        should_record_for_rearrange = steps_remaining <= self.expert_load_window_size
+
+        if not log_stats:
+            return should_record_for_rearrange
+
+        log_interval = self.parallel_config.eplb_config.log_balancedness_interval
+        steps_until_next_log = (
+            log_interval - (self.expert_rearrangement_step % log_interval)
+        ) % log_interval
+        should_record_for_log = steps_until_next_log <= self.expert_load_window_size
+        return should_record_for_rearrange or should_record_for_log
+
+    def _update_layer_should_record(self, log_stats: bool = False) -> None:
+        """Update the shared ``should_record_tensor`` for all layers."""
+        if self.should_record_tensor is not None:
+            self.should_record_tensor.fill_(
+                self._should_record_current_step(log_stats=log_stats)
+            )
+
+    def _init_should_record_tensor(self, model: "MixtureOfExperts") -> None:  # type: ignore[name-defined]
+        """Allocate (once) and propagate the shared ``should_record_tensor``.
+
+        Must be called after :meth:`model.set_eplb_state` so that each
+        layer's ``eplb_state`` is already populated with the tensor views.
+        """
+        layer_states = [
+            layer.eplb_state
+            for layer in model.moe_layers
+            if hasattr(layer, "eplb_state")
+            and isinstance(layer.eplb_state, EplbLayerState)
+        ]
+
+        if self.should_record_tensor is None and layer_states:
+            self.should_record_tensor = torch.ones(
+                (), dtype=torch.bool, device=self.device
+            )
+
+        for ls in layer_states:
+            ls.should_record_tensor = self.should_record_tensor
+
     def rearrange(
         self,
         is_profile: bool = False,
@@ -735,6 +767,7 @@ def rearrange(
                     new_physical_to_logical_map,
                     eplb_model_state.model.expert_weights,
                     ep_group,
+                    eplb_model_state.communicator,
                     is_profile,
                     rank_mapping,
                 )
@@ -767,18 +800,10 @@ def rearrange(
                     num_nodes=num_nodes,
                     num_gpus=num_gpus,
                 )
-                # Record event after clone to signal async worker
-                # that load stats data is ready
-                sync_event = torch.cuda.Event()
-                sync_event.record()
-                eplb_model_state.window_ready_event = sync_event
-
                 eplb_model_state.rebalanced = True
-                eplb_model_state.layer_to_transfer = 0
-                eplb_model_state.pending_global_ready_check = True
         # Signal async thread to start transferring layers
         if self.is_async and (not is_profile):
-            self.rearrange_event.set()
+            self.rearrange_event.record()
         return None
 
     def start_async_loop(
@@ -794,117 +819,27 @@ def start_async_loop(
                 is_profile=is_profile,
             )
 
-    def _all_ranks_buffer_ready(self, model_state: EplbModelState) -> bool:
+    def _all_ranks_result_ready(self, model_state: EplbModelState) -> bool:
         parallel_state = get_ep_group()
+        has_result = int(model_state.pending_result is not None)
+
         cpu_group = getattr(parallel_state, "cpu_group", None)
         if cpu_group is not None and cpu_group.size() > 1:
-            flag = torch.tensor(
-                (int(model_state.ep_buffer_ready),), dtype=torch.int32, device="cpu"
-            )
+            flag = torch.tensor((has_result,), dtype=torch.int32, device="cpu")
             all_reduce(flag, group=cpu_group)
             return int(flag.item()) == cpu_group.size()
 
         device_group = parallel_state.device_group
         if device_group.size() <= 1:
-            return bool(model_state.ep_buffer_ready)
+            return bool(has_result)
 
         device = getattr(
             parallel_state, "device", model_state.physical_to_logical_map.device
         )
-        flag = torch.tensor(
-            (int(model_state.ep_buffer_ready),), dtype=torch.int32, device=device
-        )
+        flag = torch.tensor((has_result,), dtype=torch.int32, device=device)
         all_reduce(flag, group=device_group)
         return int(flag.item()) == device_group.size()
 
-    def move_to_workspace(
-        self,
-        model_state: EplbModelState,
-        ep_group: ProcessGroup,
-        is_profile: bool = False,
-    ):
-        # We call move_to_workspace only when ep_buffer_ready is 1.
-        # It means we only need to wait for the lock for a short time.
-        max_retries = 6  # 1 minute max
-        retries = 0
-        while not model_state.buffer_lock.acquire(blocking=True, timeout=10.0):
-            retries += 1
-            if retries >= max_retries:
-                raise RuntimeError(
-                    f"Rank {ep_group.rank()}: buffer_lock timeout after "
-                    "{max_retries * 10}s"
-                )
-            logger.warning(
-                "Rank %d: EPLB buffer_lock acquire failed, retrying (%d/%d)",
-                ep_group.rank(),
-                retries,
-                max_retries,
-            )
-        try:
-            assert model_state.new_physical_to_logical_map is not None
-            expert_weights = model_state.model.expert_weights[
-                model_state.layer_to_transfer
-            ]
-            expert_weights_buffer = model_state.expert_buffer
-            new_indices = model_state.new_physical_to_logical_map[
-                model_state.layer_to_transfer
-            ].numpy()
-            move_from_buffer(
-                expert_weights=expert_weights,
-                expert_weights_buffers=expert_weights_buffer,
-                is_unchanged=model_state.is_unchanged,
-                is_received_locally=model_state.is_received_locally,
-                recv_metadata=model_state.recv_metadata,
-                new_indices=new_indices,
-                ep_rank=ep_group.rank(),
-            )
-            # Record event after consuming buffer to signal async thread
-            # that it's safe to overwrite the intermediate buffer
-            consumed_event = torch.cuda.Event()
-            consumed_event.record()
-            model_state.buffer_consumed_event = consumed_event
-
-            transferred_layer = model_state.layer_to_transfer
-            assert model_state.new_physical_to_logical_map is not None
-            _commit_eplb_maps_for_layer(
-                model_state,
-                new_physical_to_logical_map=model_state.new_physical_to_logical_map,
-                layer=transferred_layer,
-            )
-            # After the main thread consumes, advance layer_to_transfer
-            model_state.layer_to_transfer += 1
-            model_state.ep_buffer_ready = 0
-            logger.debug(
-                "model %s successfully move_to_workspace layer %d",
-                model_state.model_name,
-                transferred_layer,
-            )
-            if model_state.layer_to_transfer >= model_state.model.num_moe_layers:
-                self.post_eplb(model_state)
-                model_state.rebalanced = False
-                model_state.layer_to_transfer = 0
-                model_state.pending_global_ready_check = False
-                logger.info(
-                    "finish async transfer for model %s rank %d layer %d",
-                    model_state.model_name,
-                    ep_group.rank(),
-                    model_state.model.num_moe_layers,
-                )
-
-        finally:
-            try:
-                model_state.buffer_lock.release()
-            except Exception as e:
-                logger.error(
-                    "Rank %d: buffer_lock release failed in move_to_workspace: %s",
-                    ep_group.rank(),
-                    str(e),
-                )
-
-    def post_eplb(self, model_state: EplbModelState) -> None:
-        assert model_state.new_physical_to_logical_map is not None
-        model_state.new_physical_to_logical_map = None
-
     def _allreduce_list(self, tensor_list: list[torch.Tensor]) -> list[torch.Tensor]:
         """
         All-reduce a list of tensors.
@@ -993,6 +928,28 @@ class EplbLayerState:
     expert_load_view: torch.Tensor | None = None
     logical_to_physical_map: torch.Tensor | None = None
     logical_replica_count: torch.Tensor | None = None
+    should_record_tensor: torch.Tensor | None = None
+    """
+    Shared scalar bool tensor controlling whether to accumulate expert load
+    metrics during this forward pass.  All layers reference the **same**
+    tensor object, which is owned and updated by :class:`EplbState`.
+
+    Set to ``False`` for the first ``step_interval - window_size`` steps of
+    each rearrangement period: those steps would be overwritten in the
+    sliding window before the next rearrangement, so recording them wastes
+    GPU work.
+    """
+
+    def set_layer_state(
+        self,
+        moe_layer_idx: int,
+        expert_load_view: torch.Tensor,
+        logical_to_physical_map: torch.Tensor,
+        logical_replica_count: torch.Tensor,
+    ) -> None:
+        self.expert_load_view = expert_load_view[moe_layer_idx]
+        self.logical_to_physical_map = logical_to_physical_map[moe_layer_idx]
+        self.logical_replica_count = logical_replica_count[moe_layer_idx]
 
 
 def _node_count_with_rank_mapping(
@@ -1131,7 +1088,7 @@ def _commit_eplb_maps_for_layer(
     """
 
     # Commit physical_to_logical_map
-    src = new_physical_to_logical_map[layer]
+    src = new_physical_to_logical_map
     dst = model_state.physical_to_logical_map[layer]
     assert src.shape == dst.shape, (
         "The number of physical experts must stay the same while running Async EPLB. "
@@ -1190,3 +1147,31 @@ def _commit_eplb_maps(
     src = new_replica_count
     dst = model_state.logical_replica_count
     dst.copy_(src, non_blocking=True)
+
+
+def _move_to_workspace(
+    model_state: EplbModelState,
+    ep_rank: int,
+) -> None:
+    result = model_state.pending_result
+    assert result is not None
+    move_from_buffer(
+        expert_weights=model_state.model.expert_weights[result.layer_idx],
+        expert_weights_buffers=model_state.expert_buffer,
+        transfer_metadata=result.transfer_metadata,
+        new_indices=result.new_physical_to_logical_map.numpy(),
+        ep_rank=ep_rank,
+    )
+
+    _commit_eplb_maps_for_layer(
+        model_state,
+        new_physical_to_logical_map=result.new_physical_to_logical_map,
+        layer=result.layer_idx,
+    )
+
+    if result.layer_idx == model_state.model.num_moe_layers - 1:
+        model_state.rebalanced = False
+
+    # Reset pending_result before unblocking the async worker
+    model_state.pending_result = None
+    result.consumed_event.record()
diff --git a/vllm/distributed/eplb/eplb_utils.py b/vllm/distributed/eplb/eplb_utils.py
index 455848341a47..92fffd229771 100644
--- a/vllm/distributed/eplb/eplb_utils.py
+++ b/vllm/distributed/eplb/eplb_utils.py
@@ -3,6 +3,9 @@
 """Utility functions for EPLB (Expert Parallel Load Balancing)."""
 
 import os
+import threading
+
+import torch
 
 from vllm.config import ParallelConfig
 from vllm.logger import init_logger
@@ -10,6 +13,54 @@
 logger = init_logger(__name__)
 
 
+class CpuGpuEvent:
+    """
+    Combines a CUDA event with a CPU threading event to enforce record->wait
+    ordering across two threads.
+
+    This class is designed for exactly two threads: one producer that calls
+    record() and one consumer that calls wait(). Using it with more than two
+    threads is not supported and will produce undefined behavior.
+
+    CUDA events alone are insufficient for cross-thread synchronization because
+    waiting on an unrecorded CUDA event is a no-op. The wait will return
+    immediately instead of blocking. This class adds a threading.Event so
+    that the waiting thread blocks on the CPU side until record() is called, at
+    which point the CUDA event is guaranteed to be in-flight and event.wait() will
+    correctly synchronize the GPU stream.
+    """
+
+    def __init__(self):
+        self._event = torch.cuda.Event()
+        self._recorded = threading.Event()
+
+    def wait(self, stream: torch.cuda.Stream | None = None):
+        """
+        Blocks the calling thread until record finishes. Used to guarantee that the
+        record kernel is called before wait.
+
+        Should only be called by the Async Eplb thread.
+        """
+        self._recorded.wait()
+        self._event.wait(stream)
+        self._recorded.clear()
+
+    def record(self, stream: torch.cuda.Stream | None = None):
+        """
+        Unblocks the waiting thread after calling event.record().
+
+        Should only be called by the main thread.
+        """
+        if self._recorded.is_set():
+            raise RuntimeError(
+                "CpuGpuEvent.record() called before the previous event was "
+                "consumed by wait()"
+            )
+        self._event = torch.cuda.Event()
+        self._event.record(stream)
+        self._recorded.set()
+
+
 def override_envs_for_eplb(parallel_config: ParallelConfig) -> None:
     """
     Override environment variables for EPLB when specific conditions are met.
@@ -21,6 +72,10 @@ def override_envs_for_eplb(parallel_config: ParallelConfig) -> None:
     is_eplb_enabled = parallel_config.enable_eplb
     async_eplb = parallel_config.eplb_config.use_async
     is_deepep_ll = parallel_config.all2all_backend == "deepep_low_latency"
+    is_nccl_based_eplb_communicator = parallel_config.eplb_config.communicator in (
+        "torch_nccl",
+        "pynccl",
+    )
 
     # Override NCCL_MAX_CTAS to avoid hangs when using async EPLB with the
     # DeepEP low-latency backend.
@@ -39,7 +94,13 @@ def override_envs_for_eplb(parallel_config: ParallelConfig) -> None:
     # Limiting NCCL occupancy via NCCL_MAX_CTAS leaves space for the DeepEP
     # cooperative kernel to launch and complete, breaking the deadlock.
     # See: https://github.com/deepseek-ai/DeepEP/issues/496
-    if is_data_parallel and is_eplb_enabled and is_deepep_ll and async_eplb:
+    if (
+        is_data_parallel
+        and is_eplb_enabled
+        and is_deepep_ll
+        and async_eplb
+        and is_nccl_based_eplb_communicator
+    ):
         current_value_str = os.getenv("NCCL_MAX_CTAS")
 
         if current_value_str and current_value_str.isdigit():
@@ -49,6 +110,7 @@ def override_envs_for_eplb(parallel_config: ParallelConfig) -> None:
         os.environ["NCCL_MAX_CTAS"] = str(override_value)
         logger.info_once(
             f"EPLB: Setting NCCL_MAX_CTAS={override_value} "
-            "for expert parallel with EPLB and deepep_low_latency backend",
+            "for expert parallel with NCCL-based EPLB communicator and "
+            "deepep_low_latency backend",
             scope="global",
         )
diff --git a/vllm/distributed/eplb/rebalance_execute.py b/vllm/distributed/eplb/rebalance_execute.py
index 7823ce4a35e3..50b7013295c6 100644
--- a/vllm/distributed/eplb/rebalance_execute.py
+++ b/vllm/distributed/eplb/rebalance_execute.py
@@ -11,25 +11,23 @@
 
 import numpy as np
 import torch
-from torch.distributed import (
-    P2POp,
-    ProcessGroup,
-    all_gather,
-    batch_isend_irecv,
-    get_global_rank,
-)
-
-from vllm.distributed.parallel_state import get_ep_group
-from vllm.distributed.stateless_coordinator import StatelessGroupCoordinator
+from torch.distributed import ProcessGroup, all_gather
+
+from vllm.distributed.eplb.eplb_communicator import EplbCommunicator
+from vllm.distributed.eplb.eplb_utils import CpuGpuEvent
 from vllm.logger import init_logger
 
 logger = init_logger(__name__)
 
 
 @dataclass
-class RecvMetadata:
-    """Metadata describing remote receives during EPLB rebalancing."""
+class TransferMetadata:
+    """Metadata describing a completed EPLB buffer transfer."""
 
+    is_unchanged: np.ndarray
+    """Mask of (num_local_experts,) indicating experts unchanged after rebalance."""
+    is_received_locally: np.ndarray
+    """Mask of (num_local_experts,) indicating experts received from local data."""
     recv_primary_mask: np.ndarray
     """Mask of (num_local_experts,) indicating primary experts received."""
     recv_count: int
@@ -40,8 +38,28 @@ class RecvMetadata:
     """Target expert indices (num_local_experts,) in local tensors to send."""
 
 
-# Type alias for the result of move_to_buffer or transfer_layer
-MoveToBufferResult = tuple[np.ndarray, np.ndarray, RecvMetadata]
+@dataclass
+class AsyncEplbLayerResult:
+    """
+    The result of one completed async EPLB layer transfer.
+    """
+
+    layer_idx: int
+    """Index of the MoE layer that was transferred."""
+    new_physical_to_logical_map: torch.Tensor
+    """
+    New physical→logical mapping for layers_idx, on CPU.
+    Shape: (num_physical_experts)
+    """
+    transfer_metadata: TransferMetadata
+    """Metadata describing what was received during transfer_layer."""
+    consumed_event: CpuGpuEvent
+    """
+    Event used to synchronize access to the intermediate buffer. The async worker calls
+    wait() after it finishes transferring weights to the intermediate buffer. The main
+    thread calls record() after it finishes transferring weights out of the intermediate
+    buffer in _move_to_workspace()
+    """
 
 
 def get_ep_ranks_with_experts_batch(
@@ -158,8 +176,9 @@ def move_to_buffer(
     expert_weights: Sequence[torch.Tensor],
     expert_weights_buffers: Sequence[torch.Tensor],
     cuda_stream: torch.cuda.Stream | None,
-    ep_group: ProcessGroup,
-) -> MoveToBufferResult:
+    ep_rank: int,
+    communicator: EplbCommunicator,
+) -> TransferMetadata:
     """
     Rearranges expert weights during EPLB rebalancing.
 
@@ -172,18 +191,13 @@ def move_to_buffer(
         expert_weights: Original expert weights for the layer.
         expert_weights_buffers: Intermediate buffers (one per tensor).
         cuda_stream: CUDA stream for async copies (can be None for sync mode).
-        ep_group: Distributed process group for expert parallel comms.
+        ep_rank: Rank of this process in expert parallel group.
+        communicator: EplbCommunicator instance for P2P communication.
 
     Returns:
-        is_unchanged (np.ndarray): (num_local_experts,), True where an expert row
-            is unchanged after rebalance.
-        is_received_locally (np.ndarray): (num_local_experts,), True where a row
-            can be updated from local data.
-        RecvMetadata: Metadata needed for completing remote weight transfers.
+        TransferMetadata: Metadata needed for completing remote weight transfers.
     """
     assert old_indices.shape == new_indices.shape
-    ep_rank = ep_group.rank()
-
     recv_primary_mask = np.zeros((num_local_experts,), dtype=np.bool_)
     send_expert_ids = np.full((num_local_experts,), -1, dtype=np.int64)
     send_src_rows = np.full((num_local_experts,), -1, dtype=np.int32)
@@ -247,22 +261,9 @@ def move_to_buffer(
             expert = new_local_expert_ids[dst]
             src_local = expert_to_src_map.get(expert, -1)
             if src_local != -1:
-                for w, b in zip(expert_weights, expert_weights_buffers):
-                    b[dst].copy_(w[src_local], non_blocking=True)
-
-    p2p_ops: list[P2POp] = []
-    if isinstance(get_ep_group(), StatelessGroupCoordinator):
-        ep_group = get_ep_group()
-        is_stateless = True
-    else:
-        is_stateless = False
-
-    # Pre-compute global ranks mapping (only needed for non-stateless groups)
-    ep_size = ep_group.size()
-    if not is_stateless:
-        rank_to_global = {
-            rank: get_global_rank(ep_group, rank) for rank in range(ep_size)
-        }
+                with torch.cuda.stream(cuda_stream):
+                    for w, b in zip(expert_weights, expert_weights_buffers):
+                        b[dst].copy_(w[src_local], non_blocking=True)
 
     # 2. Post sends
     if send_count > 0:
@@ -293,24 +294,9 @@ def move_to_buffer(
             recver_pos = remainder_start + sender_pos
             if recver_pos < len(ranks_to_recv):
                 recv_ranks.append(ranks_to_recv[recver_pos])
+            expert_tensors = [w[src] for w in expert_weights]
             for dst in recv_ranks:
-                if is_stateless:
-                    for w in expert_weights:
-                        op = object.__new__(P2POp)
-                        op.op = torch.distributed.isend
-                        op.tensor = w[src]
-                        op.group_peer = dst
-                        p2p_ops.append(op)
-                else:
-                    dst_global = rank_to_global[dst]
-                    p2p_ops += [
-                        P2POp(
-                            torch.distributed.isend,
-                            w[src],
-                            dst_global,
-                        )
-                        for w in expert_weights
-                    ]
+                communicator.add_send(expert_tensors, dst, expert_id=int(expert))
 
     # 3. Post recvs
     if recv_count > 0:
@@ -339,59 +325,29 @@ def move_to_buffer(
                 src = ranks_to_send[recver_pos // num_dst_per_sender]
             else:
                 src = ranks_to_send[recver_pos - remainder_start]
-            if is_stateless:
-                for b in expert_weights_buffers:
-                    op = object.__new__(P2POp)
-                    op.op = torch.distributed.irecv
-                    op.tensor = b[dst]
-                    op.group_peer = src
-                    p2p_ops.append(op)
-            else:
-                src_global = rank_to_global[src]
-                p2p_ops += [
-                    P2POp(
-                        torch.distributed.irecv,
-                        b[dst],
-                        src_global,
-                    )
-                    for b in expert_weights_buffers
-                ]
+            communicator.add_recv(
+                [b[dst] for b in expert_weights_buffers],
+                src,
+                expert_id=int(expert),
+            )
 
     # 4. Execute the P2P operations. The real communication happens here.
-    if p2p_ops and cuda_stream is not None:
-        with torch.cuda.stream(cuda_stream):
-            if is_stateless:
-                ep_group.device_communicator.batch_isend_irecv(p2p_ops)
-            else:
-                reqs = batch_isend_irecv(p2p_ops)
-                for req in reqs:
-                    req.wait()
-    elif p2p_ops:
-        if is_stateless:
-            ep_group.device_communicator.batch_isend_irecv(p2p_ops)
-        else:
-            reqs = batch_isend_irecv(p2p_ops)
-            for req in reqs:
-                req.wait()
+    communicator.execute(old_indices=old_indices)
     # wait for the communication to finish
-    return (
-        is_unchanged,
-        is_received_locally,
-        RecvMetadata(
-            recv_primary_mask=recv_primary_mask,
-            recv_count=recv_count,
-            recv_expert_ids=recv_expert_ids,
-            recv_dst_rows=recv_dst_rows,
-        ),
+    return TransferMetadata(
+        is_unchanged=is_unchanged,
+        is_received_locally=is_received_locally,
+        recv_primary_mask=recv_primary_mask,
+        recv_count=recv_count,
+        recv_expert_ids=recv_expert_ids,
+        recv_dst_rows=recv_dst_rows,
     )
 
 
 def move_from_buffer(
     expert_weights: Sequence[torch.Tensor],
     expert_weights_buffers: list[torch.Tensor],
-    is_unchanged: np.ndarray,
-    is_received_locally: np.ndarray,
-    recv_metadata: RecvMetadata,
+    transfer_metadata: TransferMetadata,
     new_indices: np.ndarray,
     ep_rank: int,
 ) -> None:
@@ -403,17 +359,17 @@ def move_from_buffer(
         expert_weights: List of the actual MoE layer weights used in the execution.
         expert_weights_buffers: Intermediate buffers containing the experts weights
             after the transfer is completed.
-        is_unchanged: (num_local_experts,), True where an expert row is unchanged.
-        is_received_locally: (num_local_experts,), True where a row is updated locally.
-        recv_metadata: RecvMetadata containing remote receive metadata.
+        transfer_metadata: TransferMetadata containing transfer metadata.
         new_indices: (num_experts_total,) mapping from local rows to desired
             (possibly global) expert id, after rebalance.
         ep_rank: Rank of the process in the expert parallel group.
     """
-    recv_primary_mask = recv_metadata.recv_primary_mask
-    recv_count = recv_metadata.recv_count
-    recv_expert_ids = recv_metadata.recv_expert_ids
-    recv_dst_rows = recv_metadata.recv_dst_rows
+    is_unchanged = transfer_metadata.is_unchanged
+    is_received_locally = transfer_metadata.is_received_locally
+    recv_primary_mask = transfer_metadata.recv_primary_mask
+    recv_count = transfer_metadata.recv_count
+    recv_expert_ids = transfer_metadata.recv_expert_ids
+    recv_dst_rows = transfer_metadata.recv_dst_rows
     num_local_experts = is_unchanged.shape[0]
 
     # Mask for rows to copy back from buffers:
@@ -465,16 +421,17 @@ def move_from_buffer(
             w[dst].copy_(w[src], non_blocking=True)
 
 
-async def transfer_layer(
+def transfer_layer(
     old_layer_indices: torch.Tensor,
     new_layer_indices: torch.Tensor,
     expert_weights: Sequence[torch.Tensor],
     expert_weights_buffer: Sequence[torch.Tensor],
     ep_group: ProcessGroup,
+    communicator: EplbCommunicator,
     is_profile: bool = False,
     cuda_stream: torch.cuda.Stream | None = None,
     rank_mapping: dict[int, int] | None = None,
-) -> MoveToBufferResult:
+) -> TransferMetadata:
     """
     Rearranges the expert weights in place according to the new expert indices.
 
@@ -489,6 +446,7 @@ async def transfer_layer(
             For example, a linear layer may have up and down projection.
         expert_weights_buffer: Intermediate buffers (one per weight tensor).
         ep_group: The device process group for expert parallelism.
+        communicator: EplbCommunicator instance for P2P communication.
         is_profile (bool): If `True`, do not perform any actual weight copy.
             This is used during profile run, where we only perform dummy
             communications to reserve enough memory for the buffers.
@@ -496,11 +454,8 @@ async def transfer_layer(
         rank_mapping: Optional rank mapping for elastic expert parallelism.
 
     Returns:
-        is_unchanged (np.ndarray): (num_local_experts,), True where expert
-            is left unchanged.
-        is_received_locally (np.ndarray): (num_local_experts,), True where expert
-            can be received locally.
-        RecvMetadata: Metadata needed for completing remote weight transfers.
+        TransferMetadata: Metadata needed for completing remote weight transfers,
+            including is_unchanged and is_received_locally masks.
     """
     ep_size = ep_group.size()
     if rank_mapping is not None:
@@ -535,16 +490,16 @@ async def transfer_layer(
     old_layer_indices_np = old_layer_indices.cpu().numpy()
     new_layer_indices_np = new_layer_indices.cpu().numpy()
 
-    is_unchanged, is_received_locally, recv_metadata = move_to_buffer(
+    return move_to_buffer(
         num_local_experts=num_local_physical_experts,
         old_indices=old_layer_indices_np,
         new_indices=new_layer_indices_np,
         expert_weights=expert_weights,
         expert_weights_buffers=expert_weights_buffer,
         cuda_stream=cuda_stream,
-        ep_group=ep_group,
+        ep_rank=ep_group.rank(),
+        communicator=communicator,
     )
-    return is_unchanged, is_received_locally, recv_metadata
 
 
 def rearrange_expert_weights_inplace(
@@ -552,6 +507,7 @@ def rearrange_expert_weights_inplace(
     new_global_expert_indices: torch.Tensor,
     expert_weights: Sequence[Sequence[torch.Tensor]],
     ep_group: ProcessGroup,
+    communicator: EplbCommunicator,
     is_profile: bool = False,
     rank_mapping: dict[int, int] | None = None,
 ) -> None:
@@ -569,6 +525,7 @@ def rearrange_expert_weights_inplace(
             For example, a linear layer may have up and down projection,
             so weight_count = 2. Each weight's hidden size can be different.
         ep_group: The device process group for expert parallelism.
+        communicator: EplbCommunicator instance for P2P communication.
         is_profile (bool): If `True`, do not perform any actual weight copy.
             This is used during profile run, where we only perform dummy
             communications to reserve enough memory for the buffers.
@@ -599,53 +556,55 @@ def rearrange_expert_weights_inplace(
     assert new_global_expert_indices.shape == (num_moe_layers, num_physical_experts)
 
     ep_size = ep_group.size()
+    ep_rank = ep_group.rank()
     assert num_physical_experts == ep_size * num_local_physical_experts
 
     first_layer_weights = list(expert_weights[0])
-    # Buffers to hold the expert weights during the exchange.
-    # NOTE: Currently we assume the same weights across different layers
-    # have the same shape.
-    weights_buffer: list[torch.Tensor] = [
-        torch.empty_like(w) for w in first_layer_weights
-    ]
+
     if is_profile:
-        # Reserve communication buffers via a minimal dummy all_gather on first layer
-        for weight, buffer in zip(expert_weights[0], weights_buffer):
-            dummy_recv_buffer = [buffer for _ in range(ep_size)]
-            torch.distributed.barrier()
-            all_gather(
-                dummy_recv_buffer,
-                weight,
-                group=ep_group,
-            )
+        if communicator.needs_profile_buffer_reservation:
+            # Reserve NCCL communication buffers via a dummy all_gather.
+            # Backends that pre-allocate their own transfer buffers
+            # skip this to avoid the extra memory spike during profiling.
+            weights_buffer: list[torch.Tensor] = [
+                torch.empty_like(w) for w in first_layer_weights
+            ]
+            for weight, buffer in zip(expert_weights[0], weights_buffer):
+                dummy_recv_buffer = [buffer for _ in range(ep_size)]
+                torch.distributed.barrier()
+                all_gather(
+                    dummy_recv_buffer,
+                    weight,
+                    group=ep_group,
+                )
         return
 
-    # NOTE(bowen): We need this synchronize to run, but I don't know why.
-    # If you figure out the reason, please let me know -- thank you!
-    torch.accelerator.synchronize()
+    # Buffers to hold the expert weights during the exchange.
+    # NOTE: Currently we assume the same weights across different layers
+    # have the same shape.
+    weights_buffer = [torch.empty_like(w) for w in first_layer_weights]
 
     old_global_expert_indices_cpu = old_global_expert_indices.cpu().numpy()
     new_global_expert_indices_cpu = new_global_expert_indices.cpu().numpy()
 
     for layer_idx in range(num_moe_layers):
-        is_unchanged, is_received_locally, recv_metadata = move_to_buffer(
+        transfer_metadata = move_to_buffer(
             num_local_experts=num_local_physical_experts,
             old_indices=old_global_expert_indices_cpu[layer_idx],
             new_indices=new_global_expert_indices_cpu[layer_idx],
             expert_weights=expert_weights[layer_idx],
             expert_weights_buffers=weights_buffer,
             cuda_stream=None,
-            ep_group=ep_group,
+            ep_rank=ep_rank,
+            communicator=communicator,
         )
 
         move_from_buffer(
             expert_weights=expert_weights[layer_idx],
             expert_weights_buffers=weights_buffer,
-            is_unchanged=is_unchanged,
-            is_received_locally=is_received_locally,
-            recv_metadata=recv_metadata,
+            transfer_metadata=transfer_metadata,
             new_indices=new_global_expert_indices_cpu[layer_idx],
-            ep_rank=ep_group.rank(),
+            ep_rank=ep_rank,
         )
 
 
@@ -737,4 +696,4 @@ def _map_new_expert_indices_with_rank_mapping(
     return mapped_expert_indices
 
 
-__all__ = ["transfer_layer", "move_from_buffer", "RecvMetadata"]
+__all__ = ["transfer_layer", "move_from_buffer", "TransferMetadata"]
diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py
index 21ec7a36e984..ee21185969f3 100644
--- a/vllm/distributed/kv_events.py
+++ b/vllm/distributed/kv_events.py
@@ -67,6 +67,12 @@ class BlockStored(KVCacheEvent):
     KV cache consumers to reconstruct block hashes.
     """
 
+    group_idx: int | None = None
+    # Store events carry cache-spec metadata so consumers can classify and
+    # filter groups as they are learned. Remove events only need group_idx+hash.
+    kv_cache_spec_kind: str | None = None
+    kv_cache_spec_sliding_window: int | None = None
+
     def __hash__(self) -> int:
         return hash(
             (
@@ -77,6 +83,9 @@ def __hash__(self) -> int:
                 self.lora_id,
                 self.medium,
                 tuple(self.extra_keys) if self.extra_keys else None,
+                self.group_idx,
+                self.kv_cache_spec_kind,
+                self.kv_cache_spec_sliding_window,
             )
         )
 
@@ -84,9 +93,16 @@ def __hash__(self) -> int:
 class BlockRemoved(KVCacheEvent):
     block_hashes: list[ExternalBlockHash]
     medium: str | None
+    group_idx: int | None = None
 
     def __hash__(self) -> int:
-        return hash((tuple(self.block_hashes), self.medium))
+        return hash(
+            (
+                tuple(self.block_hashes),
+                self.medium,
+                self.group_idx,
+            )
+        )
 
 
 class AllBlocksCleared(KVCacheEvent):
diff --git a/vllm/distributed/kv_transfer/README.md b/vllm/distributed/kv_transfer/README.md
index 39377aabcce3..64be074cf9f8 100644
--- a/vllm/distributed/kv_transfer/README.md
+++ b/vllm/distributed/kv_transfer/README.md
@@ -22,7 +22,7 @@ NOTE: If you want to not only transfer KV caches, but adjust the model execution
 
 ## Disaggregated prefilling
 
-The example usage is in [this file](../../../examples/online_serving/disaggregated_prefill.sh).
+The example usage is in [this file](../../../examples/disaggregated/disaggregated_prefill.sh).
 
 Here is the diagram of how we run disaggregated prefilling.
 
diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py
index b677c5885bb0..30df07c0918a 100644
--- a/vllm/distributed/kv_transfer/kv_connector/factory.py
+++ b/vllm/distributed/kv_transfer/kv_connector/factory.py
@@ -44,14 +44,12 @@ def create_connector(
         cls,
         config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ) -> KVConnectorBase:
         kv_transfer_config = config.kv_transfer_config
         if kv_transfer_config is None:
             raise ValueError("kv_transfer_config must be set to create a connector")
-        connector_cls, compat_sig = cls._get_connector_class_with_compat(
-            kv_transfer_config
-        )
+        connector_cls = cls.get_connector_class(kv_transfer_config)
 
         # check if the connector supports HMA
         hma_enabled = not config.scheduler_config.disable_hybrid_kv_cache_manager
@@ -74,12 +72,7 @@ def create_connector(
         # - Co-locate with worker process
         # - Should only be used inside the forward context & attention layer
         # We build separately to enforce strict separation
-        if compat_sig:
-            # Old signature: __init__(self, vllm_config, role)
-            return connector_cls(config, role)
-        else:
-            # New signature: __init__(self, vllm_config, role, kv_cache_config)
-            return connector_cls(config, role, kv_cache_config)
+        return connector_cls(config, role, kv_cache_config)
 
     @classmethod
     def get_connector_class_by_name(
@@ -100,19 +93,17 @@ def get_connector_class_by_name(
         return cls._registry[connector_name]()
 
     @classmethod
-    def _get_connector_class_with_compat(
+    def get_connector_class(
         cls, kv_transfer_config: "KVTransferConfig"
-    ) -> tuple[type[KVConnectorBaseType], bool]:
+    ) -> type[KVConnectorBaseType]:
         connector_name = kv_transfer_config.kv_connector
         if connector_name is None:
             raise ValueError("Connector name is not set in KVTransferConfig")
-        compat_sig = False
-        if connector_name in cls._registry:
-            connector_cls = cls._registry[connector_name]()
-        else:
-            connector_module_path = kv_transfer_config.kv_connector_module_path
-            if connector_module_path is None:
-                raise ValueError(f"Unsupported connector type: {connector_name}")
+        connector_module_path = kv_transfer_config.kv_connector_module_path
+        if connector_module_path is not None and not connector_module_path:
+            raise ValueError("kv_connector_module_path cannot be an empty string.")
+        if connector_module_path:
+            # External module path takes priority over internal registry.
             connector_module = importlib.import_module(connector_module_path)
             try:
                 connector_cls = getattr(connector_module, connector_name)
@@ -122,20 +113,18 @@ def _get_connector_class_with_compat(
                 ) from e
             connector_cls = cast(type[KVConnectorBaseType], connector_cls)
             if not supports_kw(connector_cls, "kv_cache_config"):
-                compat_sig = True
-                logger.warning(
-                    "Connector %s uses deprecated signature with 2 required arguments. "
-                    "Please update to include kv_cache_config as the second argument.",
-                    connector_cls.__name__,
+                msg = (
+                    f"Connector {connector_cls.__name__} uses deprecated "
+                    "2-argument constructor signature. External v1 KV "
+                    "connectors must accept kv_cache_config as the third "
+                    "constructor argument and pass it to super().__init__()."
                 )
-        return connector_cls, compat_sig
-
-    @classmethod
-    def get_connector_class(
-        cls, kv_transfer_config: "KVTransferConfig"
-    ) -> type[KVConnectorBaseType]:
-        """Get the connector class by name."""
-        connector_cls, _ = cls._get_connector_class_with_compat(kv_transfer_config)
+                logger.error(msg)
+                raise ValueError(msg)
+        elif connector_name in cls._registry:
+            connector_cls = cls._registry[connector_name]()
+        else:
+            raise ValueError(f"Unsupported connector type: {connector_name}")
         return connector_cls
 
 
@@ -175,7 +164,7 @@ def get_connector_class(
 
 KVConnectorFactory.register_connector(
     "NixlConnector",
-    "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector",
+    "vllm.distributed.kv_transfer.kv_connector.v1.nixl",
     "NixlConnector",
 )
 
@@ -202,14 +191,29 @@ def get_connector_class(
     "vllm.distributed.kv_transfer.kv_connector.v1.decode_bench_connector",
     "DecodeBenchConnector",
 )
+
 KVConnectorFactory.register_connector(
     "MooncakeConnector",
     "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_connector",
     "MooncakeConnector",
 )
-
+KVConnectorFactory.register_connector(
+    "MooncakeStoreConnector",
+    "vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.connector",
+    "MooncakeStoreConnector",
+)
 KVConnectorFactory.register_connector(
     "FlexKVConnectorV1",
     "vllm.distributed.kv_transfer.kv_connector.v1.flexkv_connector",
     "FlexKVConnectorV1",
 )
+KVConnectorFactory.register_connector(
+    "SimpleCPUOffloadConnector",
+    "vllm.distributed.kv_transfer.kv_connector.v1.simple_cpu_offload_connector",
+    "SimpleCPUOffloadConnector",
+)
+KVConnectorFactory.register_connector(
+    "HF3FSKVConnector",
+    "vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_connector",
+    "HF3FSKVConnector",
+)
diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
index 1f889c6c838a..a86b25c75a4f 100644
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -301,7 +301,7 @@ def kv_postprocess_blksize_and_layout_on_receive(cache, indices, block_size_rati
 
 def yield_req_data(
     scheduler_output,
-) -> Iterator[tuple[str, tuple[list[int], ...], bool]]:
+) -> Iterator[tuple[str, tuple[list[int], ...] | None, bool]]:
     """
     Yields:
         (req_id, new_block_id_groups, preempted)
@@ -319,31 +319,105 @@ def yield_req_data(
     )
 
 
-@dataclass
-class TpKVTopology:
+def get_current_attn_backends(
+    vllm_config: VllmConfig, layer_names: list[str] | None = None
+) -> list[type[AttentionBackend]]:
+    """Get all distinct attention backends for the given layers.
+
+    Args:
+        vllm_config: The current vLLM configuration.
+        layer_names: Optional list of layer names to scope the lookup.
+            When None, all attention layers are considered.
+
+    Returns:
+        Deduplicated list of attention backend classes.
     """
-    Helper class for tensor parallel and KV topology information for
-    mapping between local and remote TP workers.
+    layer_type = cast(type[Any], AttentionLayerBase)
+    layers = get_layers_from_vllm_config(vllm_config, layer_type, layer_names)
+    if layers:
+        seen: dict[str, type[AttentionBackend]] = {}
+        for layer in layers.values():
+            backend = layer.get_attn_backend()
+            seen[backend.full_cls_name()] = backend
+        return list(seen.values())
+
+    # Fallback for tests, when static_forward_context is empty.
+    logger.debug(
+        "No layers found in the vLLM config. Falling back to default attention backend."
+    )
+    from vllm.v1.attention.selector import get_attn_backend
+
+    return [
+        get_attn_backend(
+            head_size=vllm_config.model_config.get_head_size(),
+            dtype=vllm_config.model_config.dtype,
+            kv_cache_dtype=vllm_config.cache_config.cache_dtype,
+            use_mla=vllm_config.model_config.use_mla,
+        )
+    ]
+
+
+def get_current_attn_backend(
+    vllm_config: VllmConfig, layer_names: list[str] | None = None
+) -> type[AttentionBackend]:
+    """Get the first attention backend for the given layers."""
+    return get_current_attn_backends(vllm_config, layer_names)[0]
+
+
+# ---- Per-engine transfer info ----
+
+
+@dataclass(frozen=True)
+class EngineTransferInfo:
+    """Common per-remote-engine transfer state, computed at handshake.
+
+    Stored per ``engine_id`` inside ``TransferTopology._engines``.
     """
 
+    remote_tp_size: int
+
+    remote_block_len: int
+    """Block length (bytes)"""
+
+    remote_block_size: int
+    """Tokens per block."""
+
+    remote_physical_blocks_per_logical: int
+    """Physical blocks per logical block."""
+
+
+# ---- Transfer topology ----
+
+
+@dataclass
+class TransferTopology:
+    """Single source of truth for local TP identity and per-engine remote info."""
+
     tp_rank: int
-    remote_tp_size: dict[EngineId, int]
+    tp_size: int
+    block_size: int
+    engine_id: EngineId
     is_mla: bool
+    is_mamba: bool
     total_num_kv_heads: int
     attn_backends: list[type[AttentionBackend]]
-    engine_id: EngineId
-    remote_block_size: dict[EngineId, int]
     tensor_shape: torch.Size | None = None
-    is_mamba: bool = False
 
     def __post_init__(self):
+        self.local_physical_heads = max(1, self.total_num_kv_heads // self.tp_size)
+
+        self._engines: dict[EngineId, EngineTransferInfo] = {}
+
         # Figure out whether the first dimension of the cache is K/V
-        # or num_blocks. This is used to register the memory regions correctly.
+        # or num_blocks.
         attn_backend = self.attn_backends[0]
         if not self.is_mamba:
             _MOCK_BLOCK_SIZE = 16
             kv_cache_shape: tuple[int, ...] = attn_backend.get_kv_cache_shape(
-                num_blocks=1, block_size=_MOCK_BLOCK_SIZE, num_kv_heads=1, head_size=1
+                num_blocks=1,
+                block_size=_MOCK_BLOCK_SIZE,
+                num_kv_heads=1,
+                head_size=1,
             )
             logger.debug("Test kv_cache_shape: %s", kv_cache_shape)
         # Non-MLA backends caches have 5 dims [2, num_blocks, H,N,D],
@@ -358,11 +432,9 @@ def __post_init__(self):
             self._cross_layers_blocks = (
                 len(self.tensor_shape) == len(kv_cache_shape) + 1
             )
-            self.tensor_shape: torch.Size
 
         if self._cross_layers_blocks:
             logger.debug("Using cross-layer KV cache")
-            # prepend layers dimension
             _MOCK_NUM_LAYERS = 80
             kv_cache_shape = (_MOCK_NUM_LAYERS,) + kv_cache_shape
             try:
@@ -372,15 +444,46 @@ def __post_init__(self):
             except (AttributeError, NotImplementedError):
                 assert self.tensor_shape is not None
                 kv_cache_stride_order = tuple(range(len(self.tensor_shape)))
-
-            # In case of cross layers permute kv_cache_shape according to
-            # stride_order to retrieve physical position of block_size
             kv_cache_shape = tuple(kv_cache_shape[i] for i in kv_cache_stride_order)
 
+    # ============================================================
+    # Engine registration
+    # ============================================================
+
+    def register_remote_engine(
+        self,
+        remote_engine_id: EngineId,
+        info: EngineTransferInfo,
+    ) -> EngineTransferInfo:
+        """Register a remote engine, unifying worker dicts state.
+
+        The caller (worker) is responsible for computing the info via
+        the transfer policy.  This method only stores and deduplicates.
+        """
+        assert remote_engine_id != self.engine_id, (
+            f"Cannot register local engine {self.engine_id} as remote. "
+            f"Local identity is set via __init__ params."
+        )
+        if remote_engine_id in self._engines:
+            return self._engines[remote_engine_id]
+        self._engines[remote_engine_id] = info
+        return info
+
+    def get_engine_info(self, remote_engine_id: EngineId) -> EngineTransferInfo:
+        return self._engines[remote_engine_id]
+
+    # ============================================================
+    # Layout properties
+    # ============================================================
+
     @property
     def is_kv_layout_blocks_first(self) -> bool:
         return self._is_kv_layout_blocks_first
 
+    @property
+    def cross_layers_blocks(self) -> bool:
+        return self._cross_layers_blocks
+
     @property
     def split_k_and_v(self) -> bool:
         # Whether to register regions for K and V separately (when present).
@@ -388,29 +491,16 @@ def split_k_and_v(self) -> bool:
             self._cross_layers_blocks or self.is_mla or self.is_kv_layout_blocks_first
         )
 
-    @property
-    def tp_size(self) -> int:
-        return self.remote_tp_size[self.engine_id]
+    # ============================================================
+    # Common methods
+    # ============================================================
 
-    @property
-    def block_size(self) -> int:
-        return self.remote_block_size[self.engine_id]
-
-    @property
-    def cross_layers_blocks(self) -> bool:
-        return self._cross_layers_blocks
+    def tp_ratio(self, remote_tp_size: int) -> int:
+        """Calculate the tensor parallel ratio between local and remote TP.
 
-    def tp_ratio(
-        self,
-        remote_tp_size: int,
-    ) -> int:
-        """
-        Calculate the tensor parallel ratio between local and remote TP.
-        We can think of it as the number of local TP workers-per-remote TP
-        workers. Local workers will read from the same remote TP worker in
-        groups of size `tp_ratio`.If remote tp_size > local tp_size, the
-        ratio is flipped (remote_size/local_size) and the returned value is
-        negative.
+        Positive when local_tp >= remote_tp (local workers read from the
+        same remote worker in groups of size ``tp_ratio``).  Negative when
+        remote_tp > local_tp (ratio is flipped).
         """
         if self.tp_size >= remote_tp_size:
             assert self.tp_size % remote_tp_size == 0, (
@@ -418,76 +508,59 @@ def tp_ratio(
                 f"by remote tensor parallel size {remote_tp_size}."
             )
             return self.tp_size // remote_tp_size
-
         assert remote_tp_size % self.tp_size == 0, (
             f"Remote tensor parallel size {remote_tp_size} is not divisible "
             f"by local tensor parallel size {self.tp_size}."
         )
-        # P TP > D TP case, return the ratio as negative
-        return -remote_tp_size // self.tp_size
+        return -(remote_tp_size // self.tp_size)
 
-    def block_size_ratio(
-        self,
-        remote_block_size: int,
-    ) -> int:
-        """
-        Calculate the block size ratio between local and remote TP.
-        """
+    def block_size_ratio(self, remote_block_size: int) -> int:
+        """Calculate the block size ratio between local and remote."""
         assert self.block_size % remote_block_size == 0, (
             f"Local block size {self.block_size} is not divisible "
             f"by remote block size {remote_block_size} or vice versa."
         )
         return self.block_size // remote_block_size
 
-    def tp_ratio_from_engine_id(
-        self,
-        remote_engine_id: EngineId,
-    ) -> int:
-        remote_tp_size = self.remote_tp_size[remote_engine_id]
-        return self.tp_ratio(remote_tp_size)
-
-    def block_size_ratio_from_engine_id(
-        self,
-        remote_engine_id: EngineId,
-    ) -> int:
-        remote_block_size = self.remote_block_size[remote_engine_id]
-        return self.block_size_ratio(remote_block_size)
-
-    def is_kv_replicated(self, engine_id: EngineId) -> bool:
-        """
-        Whether the KV cache is replicated across TP workers due to the
+    def is_kv_replicated(self, remote_engine_id: EngineId) -> bool:
+        """Whether the KV cache is replicated across TP workers due to the
         number of TP workers being greater than the number of KV heads.
         """
-        tp_size = self.remote_tp_size[engine_id]
-        return tp_size // self.total_num_kv_heads >= 1
+        return self._engines[remote_engine_id].remote_tp_size > self.total_num_kv_heads
 
     def replicates_kv_cache(self, remote_engine_id: EngineId) -> bool:
         # MLA is always replicated as the hidden dim can't be split.
         return self.is_mla or self.is_kv_replicated(remote_engine_id)
 
-    def get_target_remote_ranks(
-        self,
-        remote_tp_size: int,
-    ) -> list[int]:
-        """
-        Get the remote TP rank (on P) that the current local TP rank
-        (on D) will read from. When remote tp_size > local tp_size, we
-        read from multiple remote ranks.
+    @property
+    def local_replicates_kv_cache(self) -> bool:
+        """Whether the local engine's KV cache is replicated."""
+        return self.is_mla or self.tp_size > self.total_num_kv_heads
+
+    def handshake_target_ranks(self, remote_tp_size: int) -> list[int]:
+        """Pre-registration: compute which remote TP ranks to handshake with.
+
+        Pure math based on local/remote TP sizes — does not require
+        the remote engine to be registered yet.
         """
         tp_ratio = self.tp_ratio(remote_tp_size)
         if tp_ratio > 0:
             return [self.tp_rank // tp_ratio]
+        abs_ratio = -tp_ratio
+        return [self.tp_rank * abs_ratio + i for i in range(abs_ratio)]
 
-        # P TP > D TP case, D reads from |tp_ratio| remote workers.
-        tp_ratio = -tp_ratio
-        return [self.tp_rank * tp_ratio + i for i in range(tp_ratio)]
-
-    def get_target_remote_ranks_from_engine_id(
-        self,
-        remote_engine_id: EngineId,
-    ) -> list[int]:
-        remote_tp_size = self.remote_tp_size[remote_engine_id]
-        return self.get_target_remote_ranks(remote_tp_size)
+    def target_remote_ranks(self, remote_engine_id: EngineId) -> list[int]:
+        """Get the remote TP rank(s) that the current local TP rank will
+        read from.  When remote tp_size > local tp_size, reads from
+        multiple remote ranks.
+        """
+        info = self._engines[remote_engine_id]
+        tp_ratio = self.tp_ratio(info.remote_tp_size)
+        if tp_ratio > 0:
+            return [self.tp_rank // tp_ratio]
+        # remote TP > local TP: read from |tp_ratio| remote workers
+        abs_ratio = -tp_ratio
+        return [self.tp_rank * abs_ratio + i for i in range(abs_ratio)]
 
     def get_transfer_cache_regions(
         self, cache: torch.Tensor, layer_spec: "KVCacheSpec"
@@ -496,64 +569,33 @@ def get_transfer_cache_regions(
         also accounting for hybrid SSM models specificities.
         """
         if isinstance(layer_spec, MambaSpec):
-            # Register the whole kv cache shared tensor, including SSM/Conv. This is
-            # similar to FI with the difference that SSM/Conv have different sizes
+            # Register the whole kv cache shared tensor, including
+            # SSM/Conv.
             conv, ssm = cache
             return [conv]
 
-        # Check may be hacky but it's matching `_update_hybrid_attention_mamba_layout`.
+        # Check may be hacky but it's matching
+        # `_update_hybrid_attention_mamba_layout`.
         if self.is_mamba and cache.shape[0] == 2:
-            # When MAMBA is present, all backends are blocks first, so that blocks
-            # can be shared between attention layers and mamba layers. Runner
-            # `_update_hybrid_attention_mamba_layout` already adjusted strides
-            # for FlashAttn-like backends so its num_blocks first.
-            # Swap [2<>num_blocks] dims to get required layout for hybrid SSM.
+            # When MAMBA is present, all backends are blocks first, so
+            # that blocks can be shared between attention layers and mamba
+            # layers.  Runner already adjusted strides for FlashAttn-like
+            # backends so its num_blocks first.
+            # Swap [2<>num_blocks] dims for hybrid SSM layout.
             cache = cache.transpose(0, 1)
 
         # Regular case: backends like FA register K/V in separate regions
         return cache if self.split_k_and_v else [cache]
 
-
-def get_current_attn_backends(
-    vllm_config: VllmConfig, layer_names: list[str] | None = None
-) -> list[type[AttentionBackend]]:
-    """Get all distinct attention backends for the given layers.
-
-    Args:
-        vllm_config: The current vLLM configuration.
-        layer_names: Optional list of layer names to scope the lookup.
-            When None, all attention layers are considered.
-
-    Returns:
-        Deduplicated list of attention backend classes.
-    """
-    layer_type = cast(type[Any], AttentionLayerBase)
-    layers = get_layers_from_vllm_config(vllm_config, layer_type, layer_names)
-    if layers:
-        seen: dict[str, type[AttentionBackend]] = {}
-        for layer in layers.values():
-            backend = layer.get_attn_backend()
-            seen[backend.full_cls_name()] = backend
-        return list(seen.values())
-
-    # Fallback for tests, when static_forward_context is empty.
-    logger.debug(
-        "No layers found in the vLLM config. Falling back to default attention backend."
-    )
-    from vllm.v1.attention.selector import get_attn_backend
-
-    return [
-        get_attn_backend(
-            head_size=vllm_config.model_config.get_head_size(),
-            dtype=vllm_config.model_config.dtype,
-            kv_cache_dtype=vllm_config.cache_config.cache_dtype,
-            use_mla=vllm_config.model_config.use_mla,
+    def describe(self, remote_engine_id: EngineId) -> str:
+        """One-line summary of transfer config for logging."""
+        info = self._engines[remote_engine_id]
+        return (
+            f"TransferTopology("
+            f"tp_ratio={self.tp_ratio(info.remote_tp_size)}, "
+            f"num_kv_heads={self.total_num_kv_heads if not self.is_mla else 1}, "
+            f"local_tp={self.tp_size}, "
+            f"remote_tp={info.remote_tp_size}, "
+            f"local_rank={self.tp_rank}, "
+            f"remote_block_len={info.remote_block_len})"
         )
-    ]
-
-
-def get_current_attn_backend(
-    vllm_config: VllmConfig, layer_names: list[str] | None = None
-) -> type[AttentionBackend]:
-    """Get the first attention backend for the given layers."""
-    return get_current_attn_backends(vllm_config, layer_names)[0]
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/base.py b/vllm/distributed/kv_transfer/kv_connector/v1/base.py
index ef143cba7fb5..fb5658da887a 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/base.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/base.py
@@ -62,6 +62,7 @@
         PromMetricT,
     )
     from vllm.forward_context import ForwardContext
+    from vllm.v1.core.block_pool import BlockPool
     from vllm.v1.core.kv_cache_manager import KVCacheBlocks
     from vllm.v1.kv_cache_interface import KVCacheConfig
     from vllm.v1.request import Request
@@ -184,7 +185,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         logger.warning(
             "Initializing KVConnectorBase_V1. This API is experimental and "
@@ -197,13 +198,6 @@ def __init__(
         else:
             raise ValueError("kv_transfer_config must be set for KVConnectorBase_V1")
         self._kv_cache_config = kv_cache_config
-        if self._kv_cache_config is None:
-            logger.warning(
-                "KVConnectorBase_V1 initialized without kv_cache_config. "
-                "This is deprecated - please update your connector to accept "
-                "kv_cache_config as the third constructor argument and pass it "
-                "to super().__init__()."
-            )
         self._role = role
 
     @property
@@ -446,6 +440,16 @@ def build_connector_worker_meta(self) -> KVConnectorWorkerMetadata | None:
     # Scheduler-side methods
     # ==============================
 
+    def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
+        """
+        Bind the GPU block pool to the connector for per-GPU block status tracking.
+        For example, inc/dec ref counts, or iterate over the prefix cache blocks.
+
+        Args:
+            gpu_block_pool: the GPU block pool.
+        """
+        return
+
     @abstractmethod
     def get_num_new_matched_tokens(
         self,
@@ -517,6 +521,14 @@ def build_connector_meta(
         """
         pass
 
+    def on_new_request(self, request: "Request") -> None:
+        """Called by the scheduler when a new request is added.
+
+        Connectors can override this to inspect the request and perform
+        bookkeeping. The default implementation is a no-op.
+        """
+        return
+
     def update_connector_output(self, connector_output: KVConnectorOutput):
         """
         Update KVConnector state from worker-side connectors output.
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py
index 6e9e757ffbd5..0f835b1eebba 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py
@@ -40,7 +40,10 @@
     KVConnectorBase_V1,
     KVConnectorRole,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorMetadata,
+    SupportsHMA,
+)
 from vllm.logger import init_logger
 from vllm.utils.math_utils import cdiv
 from vllm.v1.attention.backend import AttentionMetadata
@@ -71,7 +74,7 @@ class DecodeBenchConnectorMetadata(KVConnectorMetadata):
     reqs_to_fill: dict[str, tuple[tuple[list[int], ...], int]]
 
 
-class DecodeBenchConnector(KVConnectorBase_V1):
+class DecodeBenchConnector(KVConnectorBase_V1, SupportsHMA):
     """
     A KV Connector for decode instance performance testing.
 
@@ -84,7 +87,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(vllm_config, role, kv_cache_config)
 
@@ -164,6 +167,17 @@ def request_finished(
         self.connector_scheduler.request_finished(request)
         return False, None
 
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        # HMA-enabled path: same cleanup as the single-group variant since
+        # this connector owns no external state per block.
+        assert self.connector_scheduler is not None
+        self.connector_scheduler.request_finished(request)
+        return False, None
+
 
 class DecodeBenchConnectorScheduler:
     """Scheduler-side implementation for DecodeBenchConnector."""
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py
index 24e156561dfb..8ed18894e8a3 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py
@@ -92,7 +92,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(
             vllm_config=vllm_config,
@@ -188,7 +188,9 @@ def inject_kv_into_layer(
                 filename = self._generate_filename_debug(
                     layer_name, request.token_ids, request.mm_hashes
                 )
-                kv_cache = safetensors.torch.load_file(filename)["kv_cache"].cuda()
+                kv_cache = safetensors.torch.load_file(
+                    filename, device=str(kv_cache_layer.device)
+                )["kv_cache"]
                 if isinstance(attn_metadata, dict):
                     inject_kv_into_layer(
                         kv_cache_layer,
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/example_hidden_states_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/example_hidden_states_connector.py
index fcd1f365a715..3e4e6750858a 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/example_hidden_states_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/example_hidden_states_connector.py
@@ -1,18 +1,25 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import fcntl
 import os
+from concurrent.futures import Future, ThreadPoolExecutor
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Optional
+from functools import partial
+from importlib.metadata import version
+from typing import TYPE_CHECKING, Any
 
-import safetensors
 import torch
+from packaging.version import Version
+from safetensors.torch import load_file, save_file
 
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.distributed.kv_transfer.kv_connector.v1.base import (
     KVConnectorBase_V1,
     KVConnectorMetadata,
     KVConnectorRole,
+    SupportsHMA,
 )
+from vllm.forward_context import get_forward_context
 from vllm.logger import init_logger
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
@@ -30,13 +37,42 @@ def extract_from_kv_cache(
     slot_mapping: torch.Tensor,
     num_tokens: int,
 ) -> torch.Tensor:
-    """Extract data from KV cache
-    Assume the shape of the kv_cache is (num_pages, page_size, num_heads, head_size)
+    """Extract data from KV cache."""
+    block_size = kv_cache.shape[1]
+    return kv_cache[slot_mapping // block_size, slot_mapping % block_size][:num_tokens]
+
+
+def load_hidden_states(path: str) -> dict[str, torch.Tensor]:
+    """Load hidden states written by ExampleHiddenStatesConnector.
+
+    Blocks (without polling) until the async write is complete by
+    acquiring a shared flock on the companion lock file.  The kernel
+    puts the caller to sleep until the writer releases its exclusive lock.
+
+    Args:
+        path: The file path returned in kv_transfer_params["hidden_states_path"].
+
+    Returns:
+        Dict with "hidden_states" and "token_ids" tensors.
     """
+    lock_path = path + ".lock"
+    with open(lock_path) as lf:
+        fcntl.flock(lf, fcntl.LOCK_SH)  # sleeps until writer releases LOCK_EX
+        data = load_file(path, device="cpu")
+    return data
+
 
-    padded_kv = kv_cache.flatten(0, 1)[slot_mapping]
-    # shape: [len(slot_mapping), num_heads, head_size]
-    return padded_kv[:num_tokens]  # shape: [num_tokens, num_heads, head_size]
+def cleanup_hidden_states(path: str, keep_hidden_states: bool = False) -> None:
+    """Clean up hidden states file and lock file after loading.
+
+    If keep_hidden_states is True, only removes the lock file
+    and keeps the hidden states file.
+    """
+    lock_path = path + ".lock"
+    if os.path.exists(lock_path):
+        os.remove(lock_path)
+    if not keep_hidden_states and os.path.exists(path):
+        os.remove(path)
 
 
 @dataclass
@@ -47,8 +83,6 @@ class ReqMeta:
     filename: str
     # Request tokens
     token_ids: torch.Tensor
-    # Slot mappings, should have the same length as token_ids
-    slot_mapping: torch.Tensor
     # Whether this request is a new request or partially computed already
     new_req: bool
 
@@ -57,24 +91,12 @@ def make_meta(
         req_id: str,
         filename: str,
         token_ids: list[int],
-        block_ids: list[int],
-        block_size: int,
         new_req: bool,
     ) -> "ReqMeta":
-        token_ids_tensor = torch.tensor(token_ids)
-        block_ids_tensor = torch.tensor(block_ids)
-        num_blocks = block_ids_tensor.shape[0]
-        block_offsets = torch.arange(0, block_size)
-        slot_mapping = (
-            block_offsets.reshape((1, block_size))
-            + block_ids_tensor.reshape((num_blocks, 1)) * block_size
-        )
-        slot_mapping = slot_mapping.flatten()
         return ReqMeta(
             req_id=req_id,
             filename=filename,
-            token_ids=token_ids_tensor,
-            slot_mapping=slot_mapping,
+            token_ids=torch.tensor(token_ids),
             new_req=new_req,
         )
 
@@ -88,18 +110,12 @@ def add_request(
         req_id: str,
         filename: str,
         token_ids: list[int],
-        block_ids: list[int],
-        block_size: int,
         new_req: bool = True,
     ) -> None:
-        self.requests.append(
-            ReqMeta.make_meta(
-                req_id, filename, token_ids, block_ids, block_size, new_req
-            )
-        )
+        self.requests.append(ReqMeta.make_meta(req_id, filename, token_ids, new_req))
 
 
-class ExampleHiddenStatesConnector(KVConnectorBase_V1):
+class ExampleHiddenStatesConnector(KVConnectorBase_V1, SupportsHMA):
     """
     Simple debug implementation of a HiddenStatesConnector.
 
@@ -120,7 +136,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: Optional["KVCacheConfig"] = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(
             vllm_config=vllm_config,
@@ -135,6 +151,13 @@ def __init__(
         logger.info(self._kv_transfer_config)
         logger.info("Shared storage path is %s", self._storage_path)
 
+        if Version(version("safetensors")) < Version("0.8.0"):
+            logger.warning(
+                "safetensors < 0.8.0 holds the GIL during save_file, which "
+                "serializes the writer thread pool and hurts throughput. "
+                "Upgrade to safetensors >= 0.8.0 for better performance."
+            )
+
         assert self._vllm_config.speculative_config is not None, (
             "ExampleHiddenStatesConnector only works when using "
             "'extract_hidden_states' speculative method"
@@ -148,17 +171,97 @@ def __init__(
         self._active_requests: dict[str, NewRequestData] = {}
         self._req_blocks: dict[str, list[int]] = {}
 
+        # Async write infrastructure (worker-side).
+        # Dedicated CUDA stream for DtoH copies so they don't block
+        # the default stream (model forward). Thread pool for disk writes.
+        self._copy_stream: torch.cuda.Stream | None = None  # lazy init
+        self._executor = ThreadPoolExecutor(
+            max_workers=self._kv_transfer_config.get_from_extra_config(
+                "num_writer_threads", 8
+            ),
+            thread_name_prefix="vllm-hs-save",
+        )
+        # Whether to use a filesystem lock when writing files to shared storage.
+        # This is necessary for online transfer clients to avoid incomplete reads,
+        # but can be disabled for offline tasks that run tasks in batches to completion
+        self.use_lock = self._kv_transfer_config.get_from_extra_config(
+            "use_synchronization_lock", True
+        )
+        # (tensors_dict, copy_done_event, filename, req_id) queued by
+        # save_kv_layer, submitted to thread pool by wait_for_save.
+        self._pending_copies: list[
+            tuple[dict[str, torch.Tensor], torch.cuda.Event, str, str]
+        ] = []
+        # req_id → in-flight disk-write Future for that req_id.
+        self._req_futures: dict[str, Future] = {}
+        # req_id → CUDA event marking completion of the DtoH copy. Once
+        # this event is complete the request is considered "done sending"
+        # by get_finished; clients block on the per-file flock to wait for
+        # the disk write itself.
+        self._req_copy_events: dict[str, torch.cuda.Event] = {}
+        # req_ids reported as finished-generating by the scheduler,
+        # accumulated across get_finished calls.
+        self._accumulated_finished_req_ids: set[str] = set()
+
+    def _get_copy_stream(self) -> torch.cuda.Stream:
+        """Lazily create the copy stream (CUDA must be initialized)."""
+        if self._copy_stream is None:
+            self._copy_stream = torch.cuda.Stream()
+        return self._copy_stream
+
     # ==============================
     # Worker-side methods
     # ==============================
     def start_load_kv(self, *args, **kwargs: Any) -> None:
-        pass  # Empty implementation of abstract method
+        pass  # Store-only connector — nothing to load
 
     def wait_for_layer_load(self, layer_name: str) -> None:
-        pass  # Empty implementation of abstract method
+        pass  # Store-only connector — nothing to load
 
     def wait_for_save(self):
-        pass  # Empty implementation of abstract method
+        """Submit pending async copies to the thread pool for disk write.
+
+        For each pending write we acquire an exclusive flock on a
+        companion ``.lock`` file **before** submitting to the thread pool.
+        The thread worker releases the lock after the data file is fully
+        written.  Clients call :func:`load_hidden_states` which takes a
+        shared flock — the kernel sleeps the client until the writer is
+        done.  Because ``wait_for_save`` runs before the worker returns
+        output to the scheduler, the lock file is guaranteed to exist
+        (and be held) by the time the client receives the path.
+
+        The lock can be disabled via the "use_synchronization_lock" extra config.
+        """
+        for tensors, event, filename, req_id in self._pending_copies:
+            prior = self._req_futures.get(req_id)
+            assert prior is None, "Found another KV transfer request with same req_id!"
+
+            lock_fd = None
+            if self.use_lock:
+                # Create/open the lock file and acquire an exclusive lock.
+                # The lock is held by this fd; the thread worker will close
+                # the fd after writing, which releases the lock.
+                lock_path = filename + ".lock"
+                lock_fd = os.open(
+                    lock_path, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o644
+                )
+                fcntl.flock(lock_fd, fcntl.LOCK_EX)
+
+            future = self._executor.submit(
+                self._write_tensors, tensors, event, filename, lock_fd
+            )
+            self._req_copy_events[req_id] = event
+            self._req_futures[req_id] = future
+            future.add_done_callback(partial(self._on_write_done, req_id))
+        self._pending_copies.clear()
+
+    def _on_write_done(self, req_id: str, future: Future) -> None:
+        """Surface any exception from the disk-write thread and drop the
+        completed future from the in-flight tracking dict."""
+        self._req_futures.pop(req_id, None)
+        exc = future.exception()
+        if exc is not None:
+            logger.error("Hidden-states write failed for req_id=%s: %r", req_id, exc)
 
     def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         from vllm.model_executor.models.extract_hidden_states import (
@@ -174,6 +277,26 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             f"Expected 1 CacheOnlyAttentionLayer, got {len(self.cache_layers)}"
         )
 
+    @staticmethod
+    def _write_tensors(
+        tensors: dict[str, torch.Tensor],
+        event: torch.cuda.Event,
+        filename: str,
+        lock_fd: int | None,
+    ) -> None:
+        """Thread worker: wait for async DtoH copy, write to disk, release lock.
+
+        ``lock_fd`` is an open file descriptor on the companion ``.lock``
+        file with ``LOCK_EX`` already held.  Closing it releases the lock,
+        which unblocks any client sleeping on ``LOCK_SH``.
+        """
+        try:
+            event.synchronize()
+            save_file(tensors, filename)
+        finally:
+            if lock_fd is not None:
+                os.close(lock_fd)  # releases LOCK_EX
+
     def save_kv_layer(
         self,
         layer_name: str,
@@ -184,6 +307,10 @@ def save_kv_layer(
         """Start saving the KV cache of the layer from vLLM's paged buffer
         to the connector.
 
+        Launches an async DtoH copy on a dedicated CUDA stream.  The
+        actual disk write is deferred to wait_for_save() which submits
+        it to a thread pool.
+
         Args:
             layer_name (str): the name of the layer.
             kv_layer (torch.Tensor): the paged KV buffer of the current
@@ -206,15 +333,47 @@ def save_kv_layer(
         assert isinstance(connector_metadata, ExampleHiddenStatesConnectorMetadata)
 
         os.makedirs(self._storage_path, exist_ok=True)
+
+        copy_stream = self._get_copy_stream()
+
+        # Ensure the copy stream sees all prior writes on the default stream.
+        ready_event = torch.cuda.Event()
+        ready_event.record()
+        copy_stream.wait_event(ready_event)
+
+        slot_mapping = get_forward_context().slot_mapping[layer_name]  # type: ignore
+        offset = 0
         for request in connector_metadata.requests:
-            hidden_states = extract_from_kv_cache(
-                kv_layer, request.slot_mapping, request.token_ids.shape[0]
+            num_tokens = request.token_ids.shape[0]
+            with torch.cuda.stream(copy_stream):
+                req_slot_mapping_gpu = slot_mapping[offset : offset + num_tokens]
+                assert req_slot_mapping_gpu.device == kv_layer.device
+                offset += num_tokens
+
+                hidden_states_gpu = extract_from_kv_cache(
+                    kv_layer, req_slot_mapping_gpu, num_tokens
+                )
+                # Async DtoH copy into pinned host memory.
+                pinned_hs = torch.empty_like(
+                    hidden_states_gpu, device="cpu", pin_memory=True
+                )
+                pinned_hs.copy_(hidden_states_gpu, non_blocking=True)
+
+            # Record completion of this copy on the copy stream.
+            copy_done = torch.cuda.Event()
+            copy_done.record(copy_stream)
+
+            # token_ids is already on CPU (created in ReqMeta.make_meta).
+            assert not request.token_ids.is_cuda, (
+                "Expected token_ids on CPU, got CUDA tensor"
             )
             tensors = {
-                "hidden_states": hidden_states.detach().cpu(),
-                "token_ids": request.token_ids.detach().cpu(),
+                "hidden_states": pinned_hs,
+                "token_ids": request.token_ids.clone(),
             }
-            safetensors.torch.save_file(tensors, request.filename)
+            self._pending_copies.append(
+                (tensors, copy_done, request.filename, request.req_id)
+            )
 
     # ==============================
     # Scheduler-side methods
@@ -269,40 +428,11 @@ def build_connector_meta(
                 new_req.req_id,
                 filename=filename,
                 token_ids=token_ids,
-                block_ids=new_req.block_ids[0],
-                block_size=self._block_size,
             )
             self._request_filenames[new_req.req_id] = filename
             self._active_requests[new_req.req_id] = new_req
             self._req_blocks[new_req.req_id] = list(new_req.block_ids[0])
 
-        cached_reqs = scheduler_output.scheduled_cached_reqs
-        for i, req_id in enumerate(cached_reqs.req_ids):
-            if req_id not in self._active_requests:
-                continue
-
-            new_block_ids = cached_reqs.new_block_ids[i]
-
-            cached_req = self._active_requests[req_id]
-            req_block_ids = self._req_blocks[req_id]
-
-            if new_block_ids is None:
-                continue
-
-            block_ids = new_block_ids[0]
-
-            req_block_ids.extend(block_ids)
-            filename = os.path.join(self._storage_path, f"{req_id}.safetensors")
-
-            meta.add_request(
-                req_id=req_id,
-                filename=filename,
-                token_ids=cached_req.prompt_token_ids or [],
-                block_ids=req_block_ids,
-                block_size=self._block_size,
-                new_req=False,
-            )
-
         return meta
 
     def request_finished(
@@ -329,7 +459,38 @@ def request_finished(
         _ = self._active_requests.pop(req_id, None)
         _ = self._req_blocks.pop(req_id, None)
 
-        return False, {"hidden_states_path": req_filename}
+        return True, {"hidden_states_path": req_filename}
+
+    def get_finished(
+        self, finished_req_ids: set[str]
+    ) -> tuple[set[str] | None, set[str] | None]:
+        """Poll DtoH-copy completion for requests that finished generating.
+
+        The scheduler passes finished_req_ids to tell the worker which
+        requests are done generating.  We accumulate these across calls
+        and return a request as "finished sending" once its DtoH copy
+        event is complete (or if it never had a pending copy).  The
+        subsequent disk write may still be in flight; clients block on
+        the per-file flock to wait for it.
+        """
+        self._accumulated_finished_req_ids.update(finished_req_ids)
+
+        done_sending: set[str] = set()
+        for req_id in list(self._accumulated_finished_req_ids):
+            event = self._req_copy_events.get(req_id)
+            if event is None or event.query():
+                self._req_copy_events.pop(req_id, None)
+                done_sending.add(req_id)
+                self._accumulated_finished_req_ids.discard(req_id)
+
+        return done_sending or None, None
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        return self.request_finished(request, block_ids[0])
 
     @classmethod
     def get_required_kvcache_layout(cls, vllm_config: "VllmConfig") -> str | None:
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_client.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_client.py
new file mode 100644
index 000000000000..a54233453bb4
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_client.py
@@ -0,0 +1,298 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import logging
+import multiprocessing
+import os
+import threading
+from functools import wraps
+from pathlib import Path
+
+import torch
+import torch.utils.cpp_extension
+from torch.utils.cpp_extension import load
+
+root = Path(__file__).parent.resolve()
+cuda_include_path = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")
+hf3fs_utils = load(
+    name="hf3fs_utils",
+    sources=[f"{root}/utils/hf3fs_utils.cpp"],
+    extra_include_paths=[cuda_include_path],
+)
+
+logger = logging.getLogger(__name__)
+
+HF3FS_AVAILABLE = True
+try:
+    from hf3fs_fuse.io import (
+        deregister_fd,
+        extract_mount_point,
+        make_ioring,
+        make_iovec,
+        register_fd,
+    )
+except ImportError:
+    HF3FS_AVAILABLE = False
+
+
+def rsynchronized():
+    def _decorator(func):
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            with self.rlock:
+                return func(self, *args, **kwargs)
+
+        return wrapper
+
+    return _decorator
+
+
+def wsynchronized():
+    def _decorator(func):
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            with self.wlock:
+                return func(self, *args, **kwargs)
+
+        return wrapper
+
+    return _decorator
+
+
+class Hf3fsClient:
+    def __init__(self, path: str, size: int, bytes_per_page: int, entries: int):
+        """Initialize the HF3FS client with hf3fs_fuse.
+
+        Args:
+            path: Path to the file used for storage
+            size: Total size of the storage file in bytes
+            bytes_per_page: Size of each page in bytes
+            entries: Maximum number of concurrent operations
+        """
+        if not HF3FS_AVAILABLE:
+            raise ImportError(
+                "hf3fs_fuse.io is not available. Please install the hf3fs_fuse package."
+            )
+
+        self.path = path
+        self.size = size
+        self.bytes_per_page = bytes_per_page
+        self.entries = entries
+
+        self._closed = False
+
+        self.file = None
+        self.shm_r = None
+        self.shm_w = None
+        self.ior_r = None
+        self.ior_w = None
+        self.iov_r = None
+        self.iov_w = None
+        try:
+            # Create the file if it doesn't exist and set its size
+            self.file = os.open(self.path, os.O_RDWR | os.O_CREAT)
+            os.ftruncate(self.file, size)
+            register_fd(self.file)
+
+            self.hf3fs_mount_point = extract_mount_point(path)
+            self.bs = self.bytes_per_page
+            self.shm_r = multiprocessing.shared_memory.SharedMemory(
+                size=self.bs * self.entries, create=True
+            )
+            self.shm_w = multiprocessing.shared_memory.SharedMemory(
+                size=self.bs * self.entries, create=True
+            )
+
+            self.shm_r_tensor = torch.frombuffer(self.shm_r.buf, dtype=torch.uint8)
+            self.shm_w_tensor = torch.frombuffer(self.shm_w.buf, dtype=torch.uint8)
+
+            numel = self.bs * self.entries
+            self.r_pinned = torch.empty(
+                numel,
+                dtype=torch.uint8,
+                device="cpu",
+                pin_memory=True,
+            )
+            self.w_pinned = torch.empty(
+                numel,
+                dtype=torch.uint8,
+                device="cpu",
+                pin_memory=True,
+            )
+
+            self.numa = -1
+            self.ior_r = make_ioring(
+                self.hf3fs_mount_point,
+                self.entries,
+                for_read=True,
+                timeout=1,
+                numa=self.numa,
+            )
+            self.ior_w = make_ioring(
+                self.hf3fs_mount_point,
+                self.entries,
+                for_read=False,
+                timeout=1,
+                numa=self.numa,
+            )
+            self.iov_r = make_iovec(self.shm_r, self.hf3fs_mount_point)
+            self.iov_w = make_iovec(self.shm_w, self.hf3fs_mount_point)
+            self.shm_r.unlink()
+            self.shm_w.unlink()
+
+            self.rlock = threading.RLock()
+            self.wlock = threading.RLock()
+
+            self.stream = torch.cuda.Stream()
+            self.stream_ptr_int = self.stream.cuda_stream
+
+        except Exception:
+            self._release_resources()
+            raise
+
+        logger.debug(
+            "Initialized HF3FS client with file: %s, size: %s bytes", path, size
+        )
+
+    def _release_resources(self) -> None:
+        """Release all acquired resources safely"""
+        # iov must be released before ioring and shm
+        for attr in ("iov_r", "iov_w", "ior_r", "ior_w"):
+            obj = getattr(self, attr, None)
+            if obj is not None:
+                del obj
+                setattr(self, attr, None)
+
+        for attr in ("shm_r", "shm_w"):
+            shm = getattr(self, attr, None)
+            if shm is not None:
+                try:
+                    shm.close()
+                except Exception as e:
+                    logger.warning("Failed to close %s: %s", attr, e)
+                setattr(self, attr, None)
+
+        if self.file is not None:
+            try:
+                deregister_fd(self.file)
+            except Exception as e:
+                logger.warning("deregister_fd failed: %s", e)
+            try:
+                os.close(self.file)
+            except OSError as e:
+                logger.warning("os.close failed: %s", e)
+            self.file = None
+
+    @rsynchronized()
+    def batch_read(self, offsets: list[int], tensors: list[torch.Tensor]) -> list[int]:
+        """Read data from the file at specified offsets into tensors.
+
+        Args:
+            offsets: List of byte offsets to read from
+            tensors: List of tensors to read data into
+
+        Returns:
+            List of operation results (0 for success, non-zero for error)
+        """
+        self.check(offsets, tensors)
+        assert self.ior_r is not None
+        assert self.iov_r is not None
+
+        # prepare
+        current = 0
+        for offset, tensor in zip(offsets, tensors):
+            size = tensor.numel() * tensor.itemsize
+            self.ior_r.prepare(
+                self.iov_r[current : current + size], True, self.file, offset
+            )
+            current += size
+
+        # submit
+        ionum = len(offsets)
+        resv = self.ior_r.submit().wait(min_results=ionum)
+
+        # results
+        with torch.cuda.stream(self.stream):
+            hf3fs_utils.read_shm(
+                self.shm_r_tensor, self.r_pinned, tensors, self.stream_ptr_int
+            )
+        results = [res.result for res in resv]
+
+        return results
+
+    @wsynchronized()
+    def batch_write(
+        self, offsets: list[int], tensors: list[torch.Tensor], event: torch.cuda.Event
+    ) -> list[int]:
+        """Write data from tensors to the file at specified offsets.
+
+        Args:
+            offsets: List of byte offsets to write to
+            tensors: List of tensors containing data to write
+
+        Returns:
+            List of operation results (0 for success, non-zero for error)
+        """
+
+        self.check(offsets, tensors)
+        assert self.ior_w is not None
+        assert self.iov_w is not None
+
+        # prepare
+        with torch.cuda.stream(self.stream):
+            self.stream.wait_event(event)
+            hf3fs_utils.write_shm(
+                tensors, self.shm_w_tensor, self.w_pinned, self.stream_ptr_int
+            )
+
+        current = 0
+        for offset, tensor in zip(offsets, tensors):
+            size = tensor.numel() * tensor.itemsize
+            self.ior_w.prepare(
+                self.iov_w[current : current + size], False, self.file, offset
+            )
+            current += size
+
+        # submit
+        ionum = len(offsets)
+        resv = self.ior_w.submit().wait(min_results=ionum)
+
+        # results
+        results = [res.result for res in resv]
+
+        return results
+
+    def check(self, offsets: list[int], tensors: list[torch.Tensor]) -> None:
+        sizes = [t.numel() * t.itemsize for t in tensors]
+        if any(
+            [
+                len(offsets) > self.entries,
+                len(offsets) != len(sizes),
+                any(
+                    offset < 0 or offset + size > self.size
+                    for offset, size in zip(offsets, sizes)
+                ),
+                any(size > self.bytes_per_page for size in sizes),
+            ]
+        ):
+            self.close()
+            raise ValueError("Hf3fsClient.check Failed")
+
+    def get_size(self) -> int:
+        """Get the total size of the storage file.
+
+        Returns:
+            Size of the file in bytes
+        """
+        return self.size
+
+    def close(self) -> None:
+        """Close the client and clean up resources."""
+        if self._closed:
+            return
+        self._closed = True
+        self._release_resources()
+
+    def flush(self) -> None:
+        """Flush any pending writes to disk."""
+        if not self._closed and self.file is not None:
+            os.fsync(self.file)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_connector.py
new file mode 100644
index 000000000000..526375952fe6
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_connector.py
@@ -0,0 +1,1195 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+HF3FS KV Connector Implementation for vLLM.
+
+This module implements a KV connector that uses
+the 3FS for storing and retrieving KV cache data.
+
+Key components:
+1. HF3FSConnector: Main connector implementation
+   2.1 AsyncOperationManager: Manages async save/load operations with background threads
+   2.2 HF3FSConnectorMetadata: Container for connector metadata
+3. HF3FSMetadataServer: Mini Metadata server for HF3FS connector
+4. HF3FSClient: 3FS Client Implementation
+"""
+
+import atexit
+import concurrent
+import copy
+import hashlib
+import os
+import queue
+import signal
+import threading
+import time
+from concurrent.futures import Future
+from dataclasses import dataclass
+from queue import Empty
+from typing import Any, Optional
+
+import numpy as np
+import torch
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorBase_V1,
+    KVConnectorMetadata,
+    KVConnectorRole,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_metadata_server import (
+    Hf3fsGlobalMetadataClient as Hf3fsMetadataClient,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.utils import (
+    gather_scatter_helper,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.utils.common import (
+    AtomicCounter,
+    HF3FSConnectorMetadata,
+    HF3FSRequestMetadata,
+    LoadBlockInfo,
+    RequestSchedulingState,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.utils.gather_scatter_helper import (  # noqa: E501
+    CopyBufferAllocator,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorPromMetrics,
+    KVConnectorStats,
+    PromMetric,
+    PromMetricT,
+)
+from vllm.distributed.parallel_state import get_tensor_model_parallel_rank
+from vllm.forward_context import ForwardContext
+from vllm.logger import init_logger
+from vllm.v1.attention.backend import AttentionMetadata
+from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.metrics.utils import create_metric_per_engine
+from vllm.v1.request import Request
+
+HF3FS_AVAILABLE = True
+Hf3fsClient = None
+try:
+    from hf3fs_fuse.io import deregister_fd  # noqa: F401
+
+    from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_client import (
+        Hf3fsClient as _RealClient,
+    )
+
+    Hf3fsClient = _RealClient
+except Exception:
+    HF3FS_AVAILABLE = False
+    from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.utils.hf3fs_mock_client import (  # noqa: E501
+        Hf3fsClient as _MockClient,
+    )
+
+    Hf3fsClient = _MockClient  # type: ignore
+
+# Constants
+DEFAULT_MAX_IO_ENTRIES = 8
+
+logger = init_logger(__name__)
+
+
+# ============================================================================
+# Async Operation Management
+# ============================================================================
+
+
+class AsyncOperationManager:
+    """
+    Manages async save/load operations with background threads.
+    """
+
+    def __init__(self, connector: "HF3FSKVConnector"):
+        # Store connector reference and extract commonly used attributes
+        self._connector = connector
+        self._device = connector._device
+        self._dtype = connector._dtype
+        self._shape_per_page = connector._shape_per_page
+        self._bytes_per_page = connector._bytes_per_page
+        self._rank = connector._rank
+        self._numjobs = connector._numjobs
+        self._max_device_buffer_count = connector._max_device_buffer_count
+
+        # Operation tracking
+        self._save_futures: dict[str, list[Future]] = {}
+        self._load_futures: dict[str, Future] = {}
+        self._pending_finished_requests: set[str] = set()
+
+        # Initialize resources
+        self._init_cuda_resources()
+        self._init_worker_threads()
+
+        # Metrics
+        self.hf3fs_stats = HF3FSKVConnectorStats()
+
+        logger.info("AsyncOperationManager initialized for rank %d", self._rank)
+
+    def _init_cuda_resources(self) -> None:
+        """Initialize CUDA streams, events and buffer allocators."""
+        # CUDA streams for async operations
+        self._save_stream = torch.cuda.Stream()
+        self._load_stream = torch.cuda.Stream()
+        self._save_event = torch.cuda.Event()
+
+        # Buffer allocators for data copying
+        self._save_buffer_allocator = CopyBufferAllocator(
+            self._device,
+            self._dtype,
+            self._shape_per_page,
+            self._max_device_buffer_count,
+        )
+        self._load_buffer_allocator = CopyBufferAllocator(
+            self._device,
+            self._dtype,
+            self._shape_per_page,
+            self._max_device_buffer_count,
+        )
+
+    def _init_worker_threads(self) -> None:
+        """Initialize worker threads and I/O executor."""
+        # Thread synchronization
+        self._stop_event = threading.Event()
+        self._save_queue: queue.Queue[Any] = queue.Queue()
+        self._load_queue: queue.Queue[Any] = queue.Queue()
+
+        # I/O thread pool
+        self._io_executor = concurrent.futures.ThreadPoolExecutor(
+            max_workers=self._numjobs,
+            thread_name_prefix=f"HF3FS-Rank{self._rank}",
+        )
+
+        # Background worker threads
+        self._save_thread = threading.Thread(target=self._save_worker, daemon=True)
+        self._load_thread = threading.Thread(target=self._load_worker, daemon=True)
+        self._save_thread.start()
+        self._load_thread.start()
+
+    def submit_save_operation(self, request_id: str, block_ids, block_hashes) -> Future:
+        """Submit a save operation for async execution."""
+        future: Future[Any] = Future()
+        main_stream_event = torch.cuda.Event()
+        main_stream_event.record()
+        task = (request_id, block_ids, block_hashes, future, main_stream_event)
+        self._save_queue.put(task)
+
+        if request_id not in self._save_futures:
+            self._save_futures[request_id] = []
+        self._save_futures[request_id].append(future)
+        return future
+
+    def submit_load_operation(self, request_id: str, block_ids, block_hashes) -> Future:
+        """Submit a load operation for async execution."""
+        future: Future[Any] = Future()
+        task = (request_id, block_ids, block_hashes, future)
+        self._load_queue.put(task)
+        self._load_futures[request_id] = future
+        return future
+
+    def get_finished_operations(
+        self, finished_req_ids: set[str]
+    ) -> tuple[set[str], set[str]]:
+        completed_saves = self._check_completed_saves(finished_req_ids)
+        completed_loads = self._check_completed_loads()
+
+        if completed_saves or completed_loads:
+            logger.info(
+                "HF3FS Connector Completed: %d saves, %d loads operations",
+                len(completed_saves),
+                len(completed_loads),
+            )
+
+        return completed_saves, completed_loads
+
+    def _check_completed_saves(self, finished_req_ids: set[str]) -> set[str]:
+        """Check for completed save operations."""
+        completed = set()
+
+        # Check pending finished requests first
+        for request_id in list(self._pending_finished_requests):
+            if request_id in self._save_futures and self._all_saves_done(request_id):
+                completed.add(request_id)
+                self._save_futures.pop(request_id)
+                self._pending_finished_requests.remove(request_id)
+
+        # Process newly finished requests
+        for request_id in finished_req_ids:
+            if request_id in self._save_futures:
+                if self._all_saves_done(request_id):
+                    completed.add(request_id)
+                    self._save_futures.pop(request_id)
+                else:
+                    self._pending_finished_requests.add(request_id)
+            else:
+                completed.add(request_id)
+
+        return completed
+
+    def _check_completed_loads(self) -> set[str]:
+        """Check for completed load operations."""
+        completed = set()
+        for request_id in list(self._load_futures):
+            if self._load_futures[request_id].done():
+                completed.add(request_id)
+                self._load_futures.pop(request_id)
+        return completed
+
+    def _all_saves_done(self, request_id: str) -> bool:
+        """Check if all save operations for a request are completed."""
+        return all(future.done() for future in self._save_futures[request_id])
+
+    def _save_worker(self) -> None:
+        """Background worker for handling save operations."""
+        torch.accelerator.set_device_index(self._device)
+        while not self._stop_event.is_set():
+            try:
+                task = self._save_queue.get(block=True, timeout=1)
+                self._handle_save_task(task)
+            except Empty:
+                continue
+            except Exception as e:
+                logger.error("Save worker error: %s", e)
+
+    def _load_worker(self) -> None:
+        """Background worker for handling load operations."""
+        torch.accelerator.set_device_index(self._device)
+        while not self._stop_event.is_set():
+            try:
+                task = self._load_queue.get(block=True, timeout=1)
+                self._handle_load_task(task)
+            except Empty:
+                continue
+            except Exception as e:
+                logger.error("Load worker error: %s", e)
+
+    def _handle_save_task(self, task) -> None:
+        """Handle individual save task with proper stream synchronization."""
+        request_id, block_ids, block_hashes, future, main_stream_event = task
+        start_time = time.perf_counter()
+        buffers = None
+        try:
+            # Step1: Allocate storage pages
+            key_pairs = [(hash_val, "") for hash_val in block_hashes]
+            allocation_results = (
+                self._connector._metadata_client.allocate_pages_for_keys(
+                    self._rank, key_pairs
+                )
+            )
+
+            if any(result[1] < 0 for result in allocation_results):
+                return self._fail_task(
+                    "Saved", "Page allocation failed", request_id, future
+                )
+
+            page_indices = [result[1] for result in allocation_results]
+            offsets = [idx * self._bytes_per_page for idx in page_indices]
+
+            # Step2: Allocate buffers and gather KV cache data
+            buffers = self._save_buffer_allocator.alloc_buffer(len(block_ids))
+            if buffers is None:
+                return self._fail_task(
+                    "Saved",
+                    f"Buffer allocation failed for {len(block_ids)} blocks",
+                    request_id,
+                    future,
+                )
+
+            # Synchronize streams and gather data
+            with torch.cuda.stream(self._save_stream):
+                self._save_stream.wait_event(main_stream_event)  # Wait for main stream
+                self._connector._gather_or_scatter_kv_caches(
+                    block_ids, buffers, "gather"
+                )
+
+                save_stream_event = torch.cuda.Event()
+                save_stream_event.record(self._save_stream)  # Record gather completion
+
+            # Step3: Write data in batches
+            write_futures = []
+            for i in range(0, len(offsets), DEFAULT_MAX_IO_ENTRIES):
+                batch_offsets = offsets[i : i + DEFAULT_MAX_IO_ENTRIES]
+                batch_buffers = buffers[i : i + DEFAULT_MAX_IO_ENTRIES]
+                client = self._connector._clients[self._connector._ac.next()]
+                write_future = self._io_executor.submit(
+                    client.batch_write, batch_offsets, batch_buffers, save_stream_event
+                )
+                write_futures.append(write_future)
+
+            # Check write results
+            write_success = all(
+                result == self._bytes_per_page
+                for write_future in write_futures
+                for result in write_future.result()
+            )
+
+            # Step4: Confirm writes to metadata server
+            if write_success:
+                written_keys = list(zip(block_hashes, page_indices))
+                self._connector._metadata_client.confirm_write_for_keys(
+                    self._rank, written_keys, []
+                )
+                self._save_buffer_allocator.free_buffer(buffers)
+                return self._succeed_task(
+                    "Saved", start_time, request_id, len(block_ids), future
+                )
+            else:
+                self._connector._metadata_client.confirm_write_for_keys(
+                    self._rank, [], page_indices
+                )
+                self._save_buffer_allocator.free_buffer(buffers)
+                return self._fail_task(
+                    "Saved", "Write operation failed", request_id, future
+                )
+
+        except Exception as e:
+            if buffers is not None:
+                self._save_buffer_allocator.free_buffer(buffers)
+            return self._fail_task(
+                "Saved", f"Task execution error: {e}", request_id, future
+            )
+
+    def _handle_load_task(self, task) -> None:
+        """Handle individual load task."""
+        request_id, block_ids, block_hashes, future = task
+        start_time = time.perf_counter()
+        buffers = None
+        try:
+            # Step1: Get block locations from metadata server
+            page_indices = self._connector._metadata_client.get_key_locations(
+                self._rank, block_hashes
+            )
+
+            if any(idx is None for idx in page_indices):
+                return self._fail_task("Loaded", "Blocks not found", request_id, future)
+
+            # Allocate read buffer
+            buffers = self._load_buffer_allocator.alloc_buffer(len(block_ids))
+            if buffers is None:
+                return self._fail_task(
+                    "Loaded",
+                    f"Buffer allocation failed for {len(block_ids)} blocks",
+                    request_id,
+                    future,
+                )
+
+            # Step2: Read data in batches
+            offsets = [idx * self._bytes_per_page for idx in page_indices]
+            read_futures = []
+            for i in range(0, len(offsets), DEFAULT_MAX_IO_ENTRIES):
+                batch_offsets = offsets[i : i + DEFAULT_MAX_IO_ENTRIES]
+                batch_buffers = buffers[i : i + DEFAULT_MAX_IO_ENTRIES]
+                client = self._connector._clients[self._connector._ac.next()]
+                read_future = self._io_executor.submit(
+                    client.batch_read, batch_offsets, batch_buffers
+                )
+                read_futures.append(read_future)
+
+            # Check read results
+            read_success = all(
+                result == self._bytes_per_page
+                for read_future in read_futures
+                for result in read_future.result()
+            )
+
+            if not read_success:
+                self._load_buffer_allocator.free_buffer(buffers)
+                return self._fail_task(
+                    "Loaded", "Read operation failed", request_id, future
+                )
+
+            # Step3: Scatter data back to KV cache
+            with torch.cuda.stream(self._load_stream):
+                self._connector._gather_or_scatter_kv_caches(
+                    block_ids, buffers, "scatter"
+                )
+
+            self._load_stream.synchronize()
+            self._load_buffer_allocator.free_buffer(buffers)
+            return self._succeed_task(
+                "Loaded", start_time, request_id, len(block_ids), future
+            )
+
+        except Exception as e:
+            if buffers is not None:
+                self._load_buffer_allocator.free_buffer(buffers)
+            return self._fail_task(
+                "Loaded", f"Task execution error: {e}", request_id, future
+            )
+
+    def _fail_task(
+        self, operation: str, error_msg: str, request_id: str, future: Future
+    ) -> None:
+        """Helper to fail task with error logging."""
+        logger.error(
+            "%s for %s request %s",
+            error_msg,
+            operation,
+            request_id,
+        )
+        self.hf3fs_stats.record_failed_task_count(operation)
+        future.set_result(False)
+
+    def _succeed_task(
+        self,
+        operation: str,
+        start_time: float,
+        request_id: str,
+        block_count: int,
+        future: Future,
+    ) -> None:
+        """Helper to succeed task with logging."""
+        duration = time.perf_counter() - start_time
+        logger.info(
+            "%s %s: %d blocks in %.2fs",
+            operation,
+            request_id,
+            block_count,
+            duration,
+        )
+        self.hf3fs_stats.record_success_task_duration(operation, duration)
+        future.set_result(True)
+
+    def shutdown(self) -> None:
+        """Clean shutdown of all background threads and resources."""
+        self._stop_event.set()
+        self._save_thread.join()
+        self._load_thread.join()
+        self._io_executor.shutdown(wait=True)
+        logger.info("AsyncOperationManager shutdown completed")
+
+
+# ============================================================================
+# HF3FS Connector
+# ============================================================================
+
+
+class HF3FSKVConnector(KVConnectorBase_V1):
+    """HF3FS KV Connector implementation."""
+
+    def __init__(
+        self,
+        vllm_config: "VllmConfig",
+        role: KVConnectorRole,
+        kv_cache_config: "KVCacheConfig",
+    ):
+        super().__init__(
+            vllm_config=vllm_config, role=role, kv_cache_config=kv_cache_config
+        )
+
+        # Core configuration
+        self._vllm_config = vllm_config
+        self._role = role
+        self._block_size = vllm_config.cache_config.block_size
+        self._use_mla = vllm_config.model_config.use_mla
+        self._model_config = vllm_config.model_config
+
+        logger.info("Using MLA: %s", self._use_mla)
+
+        # HF3FS configuration
+        kv_config = vllm_config.kv_transfer_config
+        assert kv_config is not None
+
+        self._storage_path = kv_config.get_from_extra_config(
+            "hf3fs_storage_path", "/vllm-workspace/mnt/hf3fs"
+        )
+        self._metadata_server_url = kv_config.get_from_extra_config(
+            "hf3fs_metadata_server_url", "http://localhost:18000"
+        )
+        self._file_size = kv_config.get_from_extra_config(
+            "hf3fs_file_size", 1024 * 1024 * 1024
+        )
+        self._numjobs = kv_config.get_from_extra_config("hf3fs_client_numjobs", 16)
+        self._max_device_buffer_count = kv_config.get_from_extra_config(
+            "hf3fs_max_device_buffer_count", 128
+        )
+        self._max_device_buffer_count = max(
+            self._max_device_buffer_count, self._numjobs * DEFAULT_MAX_IO_ENTRIES
+        )
+
+        if self._role == KVConnectorRole.SCHEDULER:
+            self._scheduling_states: dict[str, RequestSchedulingState] = {}
+            self._metadata_client = Hf3fsMetadataClient()
+            self._metadata_client.initialize(0, role="scheduler")
+
+        atexit.register(self.close)
+        signal.signal(signal.SIGINT, lambda sig, frame: self.close())
+        signal.signal(signal.SIGTERM, lambda sig, frame: self.close())
+        signal.signal(signal.SIGQUIT, lambda sig, frame: self.close())
+
+        logger.info(
+            "HF3FSKVConnector initialized: path=%s, role=%s",
+            self._storage_path,
+            self._role.name,
+        )
+
+    ############################################################
+    # Worker Side Methods
+    ############################################################
+
+    def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]) -> None:
+        self._kv_caches = kv_caches
+        self._setup_kv_cache_config()
+        self._setup_storage_clients()
+        self._async_manager = AsyncOperationManager(self)
+
+    def _setup_kv_cache_config(self):
+        first_cache = next(iter(self._kv_caches.values()))
+        self._device = first_cache.device
+        self._dtype = first_cache.dtype
+        element_size = first_cache.element_size()
+
+        if self._use_mla:
+            assert len(first_cache.shape) == 3, "MLA format should have 3 dimensions"
+            # MLA format: [num_blocks, block_size, head_size]
+            num_blocks, block_size, head_size = first_cache.shape
+            num_heads = 1
+        else:
+            # MHA format: [2, num_blocks, block_size, num_heads, head_size]
+            _, num_blocks, block_size, num_heads, head_size = first_cache.shape
+
+        self._local_total_tokens = num_blocks * block_size
+        self._local_block_size = block_size
+
+        if self._use_mla:
+            layer_block_size = block_size * head_size * element_size
+            self._bytes_per_page = layer_block_size * len(self._kv_caches)
+            self._shape_per_page = [
+                len(self._kv_caches),
+                block_size,
+                head_size,
+            ]
+        else:
+            layer_block_size = 2 * block_size * num_heads * head_size * element_size
+            self._bytes_per_page = layer_block_size * len(self._kv_caches)
+            self._shape_per_page = [
+                len(self._kv_caches),
+                2,
+                block_size,
+                num_heads * head_size,
+            ]
+
+        self._kvcache_ptrs = torch.tensor(
+            [cache.data_ptr() for cache in self._kv_caches.values()],
+            dtype=torch.int64,
+            device=self._device,
+        )
+
+    def _setup_storage_clients(self):
+        os.makedirs(self._storage_path, exist_ok=True)
+
+        self._rank = get_tensor_model_parallel_rank()
+        file_path = os.path.join(
+            self._storage_path, f"hf3fs_vllm_data_file_{self._rank}"
+        )
+
+        try:
+            # Initialize HF3FS clients
+            self._ac = AtomicCounter(self._numjobs)
+            assert Hf3fsClient is not None
+            self._clients = [
+                Hf3fsClient(
+                    path=file_path,
+                    size=self._file_size,
+                    bytes_per_page=self._bytes_per_page,
+                    entries=DEFAULT_MAX_IO_ENTRIES,
+                )
+                for _ in range(self._numjobs)
+            ]
+
+            # Initialize metadata client
+            num_pages = self._file_size // self._bytes_per_page
+            self._metadata_client = Hf3fsMetadataClient()
+            self._metadata_client.initialize(self._rank, num_pages, role="worker")
+        except Exception as e:
+            logger.error("HF3FS client initialization failed: %s", e)
+            raise
+
+    def save_kv_layer(
+        self,
+        layer_name: str,
+        kv_layer: torch.Tensor,
+        attn_metadata: "AttentionMetadata",
+        **kwargs,
+    ) -> None:
+        """HF3FSConnector does not do layerwise saving."""
+        pass
+
+    def wait_for_save(self) -> None:
+        metadata = self._get_connector_metadata()
+        if not isinstance(metadata, HF3FSConnectorMetadata):
+            logger.error("Invalid metadata type: %s", type(metadata))
+            return
+
+        for request in metadata.requests:
+            if request.save_block_op is None:
+                continue
+
+            skip_blocks = request.save_block_op.skip_leading_blocks
+            block_hashes = self._generate_block_hashes(request.token_ids, skip_blocks)
+            block_ids = request.block_ids[skip_blocks : skip_blocks + len(block_hashes)]
+
+            for i in range(0, len(block_ids), self._max_device_buffer_count):
+                batch_block_ids = block_ids[i : i + self._max_device_buffer_count]
+                batch_block_hashes = block_hashes[i : i + self._max_device_buffer_count]
+                self._async_manager.submit_save_operation(
+                    request.request_id, batch_block_ids, batch_block_hashes
+                )
+
+    def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
+        metadata = self._get_connector_metadata()
+        if not isinstance(metadata, HF3FSConnectorMetadata):
+            logger.error("Invalid metadata type for loading")
+            return
+
+        for request in metadata.requests:
+            if request.load_block_op is None:
+                continue
+
+            load_op = request.load_block_op
+            block_ids = request.block_ids[: load_op.num_blocks_to_load]
+            block_hashes = self._generate_block_hashes(
+                request.token_ids, load_op.num_computed_blocks, len(block_ids)
+            )
+
+            for i in range(0, len(block_ids), self._max_device_buffer_count):
+                batch_block_ids = block_ids[i : i + self._max_device_buffer_count]
+                batch_block_hashes = block_hashes[i : i + self._max_device_buffer_count]
+                self._async_manager.submit_load_operation(
+                    request.request_id, batch_block_ids, batch_block_hashes
+                )
+
+    def wait_for_layer_load(self, layer_name: str) -> None:
+        pass
+
+    def get_finished(
+        self, finished_req_ids: set[str]
+    ) -> tuple[set[str] | None, set[str] | None]:
+        return self._async_manager.get_finished_operations(finished_req_ids)
+
+    def get_kv_connector_stats(self) -> Optional["KVConnectorStats"]:
+        """
+        Get the KV connector stats collected during the last interval.
+        """
+        # Clear stats for next iteration
+        if (
+            hasattr(self, "_async_manager")
+            and not self._async_manager.hf3fs_stats.is_empty()
+        ):
+            return self._async_manager.hf3fs_stats.clone_and_reset()
+        return None
+
+    ############################################################
+    # Scheduler Side Methods
+    ############################################################
+
+    def request_finished(
+        self,
+        request: "Request",
+        block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        return True, None
+
+    def get_num_new_matched_tokens(
+        self, request: "Request", num_computed_tokens: int
+    ) -> tuple[int, bool]:
+        """Get number of new tokens that can be loaded from external cache."""
+        try:
+            state = self._get_or_create_scheduling_state(request.request_id)
+            state.request = request
+            assert request.prompt_token_ids is not None
+
+            num_tokens_to_check = self._align_to_block_size(
+                len(request.prompt_token_ids) - 1
+            )
+
+            if num_tokens_to_check <= num_computed_tokens:
+                state.load_op = LoadBlockInfo(
+                    num_computed_blocks=num_computed_tokens // self._block_size,
+                    num_blocks_to_load=0,
+                    need_fetch_block_ids=[],
+                )
+                return 0, False
+
+            token_ids_to_check = request.prompt_token_ids[:num_tokens_to_check]
+            block_hashes = self._generate_block_hashes(token_ids_to_check, 0)
+
+            # Check existence
+            exists_results = self._metadata_client.batch_key_exists(block_hashes)
+
+            # Count consecutive matches
+            matched_blocks = next(
+                (i for i, exists in enumerate(exists_results) if not exists),
+                len(exists_results),
+            )
+            matched_tokens = matched_blocks * self._block_size
+            new_hit_tokens = max(0, matched_tokens - num_computed_tokens)
+
+            # Store load operation
+            state.load_op = LoadBlockInfo(
+                num_computed_blocks=num_computed_tokens // self._block_size,
+                num_blocks_to_load=new_hit_tokens // self._block_size,
+                need_fetch_block_ids=[],
+            )
+
+            logger.info(
+                (
+                    "Token matching for %s: "
+                    "%d matched (%d blocks), "
+                    "%d new hits, "
+                    "prompt len %d"
+                ),
+                request.request_id,
+                matched_tokens,
+                matched_blocks,
+                new_hit_tokens,
+                len(request.prompt_token_ids),
+            )
+            return new_hit_tokens, new_hit_tokens > 0
+
+        except Exception as e:
+            logger.error(
+                "Error calculating matches for request %s: %s", request.request_id, e
+            )
+            return 0, False
+
+    def update_state_after_alloc(
+        self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int
+    ) -> None:
+        """Update state after block allocation."""
+        state = self._get_or_create_scheduling_state(request.request_id)
+        state.request = request
+
+        if num_external_tokens <= 0 or not state.needs_loading():
+            return
+
+        # Validate block allocation
+        assert state.load_op is not None
+        expected_blocks = state.load_op.num_blocks_to_load
+        actual_blocks = num_external_tokens // self._block_size
+        assert actual_blocks == expected_blocks, (
+            f"Block count mismatch for {request.request_id}: "
+            f"expected {expected_blocks}, got {actual_blocks}"
+        )
+
+        # Update load operation with allocated block IDs
+        if actual_blocks > 0:
+            local_block_ids = blocks.get_unhashed_block_ids()
+            state.load_op.need_fetch_block_ids.extend(local_block_ids)
+            state.phase = "WAITING_TO_LOAD"
+
+    def build_connector_meta(
+        self, scheduler_output: SchedulerOutput
+    ) -> KVConnectorMetadata:
+        """Build connector metadata for scheduling step."""
+        metadata = HF3FSConnectorMetadata()
+
+        for request_id in scheduler_output.finished_req_ids:
+            self._scheduling_states.pop(request_id, None)
+
+        # Process requests by phase
+        self._process_waiting_to_load_requests(metadata)
+        self._process_new_requests(scheduler_output, metadata)
+        self._process_cached_requests(scheduler_output, metadata)
+
+        return metadata
+
+    def _process_waiting_to_load_requests(
+        self, metadata: HF3FSConnectorMetadata
+    ) -> None:
+        """Process requests waiting to load."""
+        for state in list(self._scheduling_states.values()):
+            if not state.is_ready_to_load():
+                continue
+            assert state.load_op is not None
+            assert (
+                state.request is not None and state.request.prompt_token_ids is not None
+            )
+            # Create load request metadata
+            num_cached_blocks = (
+                state.load_op.num_computed_blocks + state.load_op.num_blocks_to_load
+            )
+            num_tokens_to_compute = num_cached_blocks * self._block_size
+
+            # Initialize token_ids and allocated_block_ids for loading
+            state.token_ids = state.request.prompt_token_ids[
+                :num_tokens_to_compute
+            ].copy()
+            state.allocated_block_ids = state.load_op.need_fetch_block_ids.copy()
+
+            request_metadata = HF3FSRequestMetadata.from_scheduling_state(
+                state, self._block_size, state.load_op, num_cached_blocks
+            )
+
+            if request_metadata:
+                metadata.add_request(request_metadata)
+                state.phase = "ACTIVE"
+
+    def _process_new_requests(
+        self, scheduler_output: SchedulerOutput, metadata: HF3FSConnectorMetadata
+    ) -> None:
+        """Process new requests."""
+        for request in scheduler_output.scheduled_new_reqs:
+            state = self._get_or_create_scheduling_state(request.req_id)
+
+            # Calculate tokens to compute
+            num_tokens_to_compute = (
+                request.num_computed_tokens
+                + scheduler_output.num_scheduled_tokens[request.req_id]
+            )
+            self._initialize_state_from_new_request(
+                state, request, num_tokens_to_compute
+            )
+
+            # Create save metadata (skip cached blocks if any)
+            num_cached_blocks = None
+            if state.load_op:
+                num_cached_blocks = (
+                    state.load_op.num_computed_blocks + state.load_op.num_blocks_to_load
+                )
+
+            request_metadata = HF3FSRequestMetadata.from_scheduling_state(
+                state, self._block_size, None, num_cached_blocks
+            )
+
+            if request_metadata:
+                metadata.add_request(request_metadata)
+                state.phase = "ACTIVE"
+
+    def _process_cached_requests(
+        self, scheduler_output: SchedulerOutput, metadata: HF3FSConnectorMetadata
+    ) -> None:
+        """Process cached requests."""
+        cached_reqs = scheduler_output.scheduled_cached_reqs
+        for i, request_id in enumerate(cached_reqs.req_ids):
+            state = self._get_or_create_scheduling_state(request_id)
+            assert state.request is not None
+
+            # Update with new tokens and blocks
+            num_new_tokens = scheduler_output.num_scheduled_tokens[request_id]
+            num_current_tokens = len(state.token_ids)
+            new_token_ids = state.request.all_token_ids[
+                num_current_tokens : num_current_tokens + num_new_tokens
+            ]
+            new_block_ids = cached_reqs.new_block_ids[i]
+
+            state.update_tokens_and_blocks(new_token_ids, new_block_ids)
+
+            # Create save metadata
+            request_metadata = HF3FSRequestMetadata.from_scheduling_state(
+                state, self._block_size, None
+            )
+
+            if request_metadata:
+                metadata.add_request(request_metadata)
+
+    @classmethod
+    def build_kv_connector_stats(
+        cls, data: dict[str, Any] | None = None
+    ) -> Optional["KVConnectorStats"]:
+        """
+        KVConnectorStats resolution method. This method allows dynamically
+        registered connectors to return their own KVConnectorStats object,
+        which can implement custom aggregation logic on the data dict.
+        """
+        return (
+            HF3FSKVConnectorStats(data=data)
+            if data is not None
+            else HF3FSKVConnectorStats()
+        )
+
+    @classmethod
+    def build_prom_metrics(
+        cls,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ) -> KVConnectorPromMetrics:
+        return HF3FSPromMetrics(
+            vllm_config, metric_types, labelnames, per_engine_labelvalues
+        )
+
+    def close(self) -> None:
+        try:
+            if hasattr(self, "_async_manager"):
+                self._async_manager.shutdown()
+
+            if hasattr(self, "_clients"):
+                for client in self._clients:
+                    client.close()
+                logger.info("HF3FS clients closed")
+        except Exception as e:
+            logger.error("Connector shutdown error: %s", e)
+
+    ############################################################
+    # Utility Methods
+    ############################################################
+
+    def _get_or_create_scheduling_state(
+        self, request_id: str
+    ) -> RequestSchedulingState:
+        """Get existing or create new scheduling state."""
+        if request_id not in self._scheduling_states:
+            self._scheduling_states[request_id] = RequestSchedulingState(
+                request_id=request_id
+            )
+        return self._scheduling_states[request_id]
+
+    def _initialize_state_from_new_request(
+        self, state: RequestSchedulingState, request, num_tokens_to_compute: int
+    ) -> None:
+        """Initialize state from new request data."""
+        # Handle different block_ids formats in vLLM 0.9.0+
+        if isinstance(request.block_ids[0], list):
+            unfolded_block_ids = request.block_ids[0].copy()
+        else:
+            unfolded_block_ids = request.block_ids.copy()
+
+        state.token_ids = request.prompt_token_ids[:num_tokens_to_compute].copy()
+        state.allocated_block_ids = unfolded_block_ids
+        state.num_saved_blocks = 0
+
+    def _generate_block_hashes(
+        self,
+        token_ids: list[int],
+        start_block_id: int,
+        max_blocks_count: int | None = None,
+    ) -> list[str]:
+        """Generate block hashes for token sequence."""
+        block_hashes = []
+        previous_hash = ""
+
+        for start_idx in range(0, len(token_ids), self._block_size):
+            if start_idx + self._block_size > len(token_ids):
+                break
+
+            end_idx = start_idx + self._block_size
+            block_hash = self._compute_prefix_hash(
+                token_ids[start_idx:end_idx], previous_hash
+            )
+
+            block_index = start_idx // self._block_size
+            if block_index >= start_block_id:
+                block_hashes.append(block_hash)
+
+            if max_blocks_count and len(block_hashes) >= max_blocks_count:
+                break
+            previous_hash = block_hash
+
+        return block_hashes
+
+    def _gather_or_scatter_kv_caches(
+        self, block_ids: list[int], block_buffers, operation: str
+    ):
+        for buffer_tensor, block_id in zip(block_buffers, block_ids):
+            start_idx = block_id * self._local_block_size
+            token_indices = list(range(start_idx, start_idx + self._local_block_size))
+            if operation == "gather":
+                gather_scatter_helper.gather_kv_caches(
+                    self._kvcache_ptrs,
+                    self._local_total_tokens,
+                    buffer_tensor,
+                    token_indices,
+                    is_mla=self._use_mla,
+                )
+            else:
+                gather_scatter_helper.scatter_kv_caches(
+                    self._kvcache_ptrs,
+                    self._local_total_tokens,
+                    buffer_tensor,
+                    token_indices,
+                    is_mla=self._use_mla,
+                )
+
+    def _compute_prefix_hash(
+        self, token_ids: list[int], previous_hash: str = ""
+    ) -> str:
+        """Compute prefix hash for token block."""
+        combined_string = f"{previous_hash}_{token_ids}"
+        return hashlib.md5(combined_string.encode()).hexdigest()
+
+    def _align_to_block_size(self, num_tokens: int) -> int:
+        """Align token count to block size."""
+        return (num_tokens // self._block_size) * self._block_size
+
+
+@dataclass
+class HF3FSKVConnectorStats(KVConnectorStats):
+    """Container for transfer performance metrics"""
+
+    def __post_init__(self):
+        if not self.data:
+            # Empty container init, no data is passed in.
+            self.reset()
+
+    def reset(self):
+        # Must be serializable
+        self.data: dict[str, Any] = {
+            "save_duration": [],
+            "load_duration": [],
+            "num_failed_save": 0,
+            "num_failed_load": 0,
+            "num_transfer_task": 0,
+        }
+
+    def aggregate(self, other: "KVConnectorStats") -> "KVConnectorStats":
+        if not other.is_empty():
+            for k, v in other.data.items():
+                accumulator = self.data[k]
+                if isinstance(accumulator, list):
+                    accumulator.extend(v)
+                else:  # int
+                    self.data[k] += v
+        return self
+
+    def reduce(self) -> dict[str, int | float]:
+        # Compute compact representative stats suitable for CLI logging
+        if self.is_empty():
+            return {
+                "Num transfers task": 0,
+                "Num save task success": 0,
+                "Num save task failed": 0,
+                "Num load task success": 0,
+                "Num load task failed": 0,
+                "Avg save duration (ms)": 0,
+                "P90 save duration (ms)": 0,
+                "Avg load duration (ms)": 0,
+                "P90 load duration (ms)": 0,
+            }
+        num_success_save = len(self.data["save_duration"] or [])
+        num_success_load = len(self.data["load_duration"] or [])
+        num_failed_save = self.data["num_failed_save"]
+        num_failed_load = self.data["num_failed_load"]
+        if num_success_save == 0:
+            save_duration = np.zeros(1)
+        else:
+            save_duration = np.asarray(self.data["save_duration"])
+        if num_success_load == 0:
+            load_duration = np.zeros(1)
+        else:
+            load_duration = np.asarray(self.data["load_duration"])
+
+        return {
+            "Num transfers task": self.data["num_transfer_task"],
+            "Num save task success": num_success_save,
+            "Num save task failed": num_failed_save,
+            "Num load task success": num_success_load,
+            "Num load task failed": num_failed_load,
+            "Avg save duration (ms)": round(save_duration.mean() * 1e3, 3),
+            "P90 save duration (ms)": round(np.percentile(save_duration, 90) * 1e3, 3),
+            "Avg load duration (ms)": round(load_duration.mean() * 1e3, 3),
+            "P90 load duration (ms)": round(np.percentile(load_duration, 90) * 1e3, 3),
+        }
+
+    def is_empty(self) -> bool:
+        return self.data["num_transfer_task"] == 0
+
+    def record_success_task_duration(self, operation, duration):
+        if operation == "Saved":
+            self.data["save_duration"].append(duration)
+        elif operation == "Loaded":
+            self.data["load_duration"].append(duration)
+        self.data["num_transfer_task"] += 1
+
+    def record_failed_task_count(self, operation):
+        if operation == "Saved":
+            self.data["num_failed_save"] += 1
+        elif operation == "Loaded":
+            self.data["num_failed_load"] += 1
+        self.data["num_transfer_task"] += 1
+
+    def clone_and_reset(self):
+        old = copy.copy(self)
+        self.reset()
+        return old
+
+
+class HF3FSPromMetrics(KVConnectorPromMetrics):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ):
+        super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
+        buckets = [
+            0.001,
+            0.005,
+            0.01,
+            0.025,
+            0.05,
+            0.075,
+            0.1,
+            0.2,
+            0.3,
+            0.5,
+            0.75,
+            1.0,
+            5.0,
+        ]
+        hf3fs_save_duration = self._histogram_cls(
+            name="vllm:hf3fs_save_duration_seconds",
+            documentation="Histogram of save duration for HF3FSKVConnector.",
+            buckets=buckets,
+            labelnames=labelnames,
+        )
+        self.hf3fs_save_duration = create_metric_per_engine(
+            hf3fs_save_duration, self.per_engine_labelvalues
+        )
+
+        hf3fs_load_duration = self._histogram_cls(
+            name="vllm:hf3fs_load_duration_seconds",
+            documentation="Histogram of load duration for HF3FSKVConnector.",
+            buckets=buckets,
+            labelnames=labelnames,
+        )
+        self.hf3fs_load_duration = create_metric_per_engine(
+            hf3fs_load_duration, self.per_engine_labelvalues
+        )
+
+        hf3fs_num_failed_save = self._counter_cls(
+            name="vllm:hf3fs_num_failed_save",
+            documentation="Number of failed HF3FS KV save.",
+            labelnames=labelnames,
+        )
+        self.hf3fs_num_failed_save = create_metric_per_engine(
+            hf3fs_num_failed_save, self.per_engine_labelvalues
+        )
+
+        hf3fs_num_failed_load = self._counter_cls(
+            name="vllm:hf3fs_num_failed_load",
+            documentation="Number of failed HF3FS KV load.",
+            labelnames=labelnames,
+        )
+        self.hf3fs_num_failed_load = create_metric_per_engine(
+            hf3fs_num_failed_load, self.per_engine_labelvalues
+        )
+
+    def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
+        for prom_obj, list_item_key in zip(
+            [
+                self.hf3fs_save_duration,
+                self.hf3fs_load_duration,
+            ],
+            [
+                "save_duration",
+                "load_duration",
+            ],
+        ):
+            for list_item in transfer_stats_data[list_item_key]:
+                prom_obj[engine_idx].observe(list_item)
+        for counter_obj, counter_item_key in zip(
+            [
+                self.hf3fs_num_failed_save,
+                self.hf3fs_num_failed_load,
+            ],
+            [
+                "num_failed_save",
+                "num_failed_load",
+            ],
+        ):
+            counter_obj[engine_idx].inc(transfer_stats_data[counter_item_key])
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_metadata_server.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_metadata_server.py
new file mode 100644
index 000000000000..72792e5eb260
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/hf3fs_metadata_server.py
@@ -0,0 +1,530 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+HF3FS Metadata Server with key-based organization.
+"""
+
+import argparse
+import logging
+import threading
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+try:
+    import orjson
+
+    HAS_ORJSON = True
+except ImportError:
+    import json as orjson  # type: ignore
+
+    HAS_ORJSON = False
+
+import requests
+from fastapi import FastAPI, HTTPException, Request, Response
+from fastapi.responses import ORJSONResponse
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class RankFileMetadata:
+    """Manages file page allocation for a single rank."""
+
+    rank_id: int
+    num_pages: int
+    free_pages: list[int]
+
+    def allocate_pages(self, num_pages: int) -> list[int]:
+        """Allocate specified number of free pages."""
+        if len(self.free_pages) < num_pages:
+            return []
+
+        allocated = self.free_pages[:num_pages]
+        self.free_pages = self.free_pages[num_pages:]
+        return allocated
+
+    def release_pages(self, page_indices: list[int]) -> None:
+        """Release pages back to free pool."""
+        for page_idx in page_indices:
+            if page_idx not in self.free_pages:
+                self.free_pages.append(page_idx)
+
+    def get_free_page_count(self) -> int:
+        """Get current number of free pages."""
+        return len(self.free_pages)
+
+
+@dataclass
+class KeyMetadata:
+    """Manages metadata for a single key across multiple ranks."""
+
+    key: str
+    rank_to_page: dict[int, int]  # rank -> allocated page index
+    tp_world_size: int
+
+    def add_rank_page(self, rank: int, page_index: int) -> None:
+        """Add page allocation for a specific rank."""
+        self.rank_to_page[rank] = page_index
+
+    def get_all_pages(self) -> list[tuple[int, int]]:
+        """Get all (rank, page) pairs for this key."""
+        return [(rank, page) for rank, page in self.rank_to_page.items()]
+
+    def get_rank_page(self, rank: int) -> int | None:
+        """Get page index for a specific rank."""
+        return self.rank_to_page.get(rank)
+
+    def is_complete(self) -> bool:
+        """Check if all ranks in the TP world have allocated pages."""
+        return len(self.rank_to_page) == self.tp_world_size
+
+
+class GlobalMetadataState:
+    """Manages global metadata state across all ranks and keys."""
+
+    def __init__(self):
+        self.global_lock = threading.RLock()
+        self.rank_metadata: dict[int, RankFileMetadata] = {}
+        self.key_metadata: dict[str, KeyMetadata] = {}
+
+    def clear(self) -> None:
+        """Clear all metadata state."""
+        with self.global_lock:
+            self.rank_metadata.clear()
+            self.key_metadata.clear()
+            logger.info("Cleared all metadata state")
+
+    def initialize_rank(self, rank: int, num_pages: int) -> None:
+        """Initialize a new rank with specified number of pages."""
+        with self.global_lock:
+            if rank not in self.rank_metadata:
+                self.rank_metadata[rank] = RankFileMetadata(
+                    rank, num_pages, list(range(num_pages))
+                )
+                logger.info("Initialized rank %s with %s pages", rank, num_pages)
+
+    def allocate_pages_for_keys(
+        self, rank: int, keys: list[tuple[str, str]]
+    ) -> dict[str, int]:
+        """Allocate one page for each key on the specified rank.
+
+        Args:
+            rank: Rank ID to allocate pages on
+            keys: List of keys to allocate pages for
+
+        Returns:
+            Dictionary mapping key -> allocated page index
+        """
+        with self.global_lock:
+            if rank not in self.rank_metadata:
+                raise ValueError(f"Rank {rank} not initialized")
+
+            # Batch allocate pages for all keys
+            num_pages_needed = len(keys)
+            allocated_pages = self.rank_metadata[rank].allocate_pages(num_pages_needed)
+
+            if len(allocated_pages) < num_pages_needed:
+                logger.warning(
+                    "Rank %s only allocated %s pages for %s keys",
+                    rank,
+                    len(allocated_pages),
+                    num_pages_needed,
+                )
+
+            allocation_results = {}
+            for i, (key, prefix_key) in enumerate(keys):
+                if key in self.key_metadata:
+                    key_meta = self.key_metadata[key]
+                    if key_meta.is_complete() and rank in key_meta.rank_to_page:
+                        # key is already fully written, reuse the existing page
+                        # and release the allocated pages back to the free pool.
+                        if i < len(allocated_pages):
+                            self.rank_metadata[rank].release_pages([allocated_pages[i]])
+                        allocation_results[key] = key_meta.rank_to_page[rank]
+                        continue
+
+                if i < len(allocated_pages):
+                    allocation_results[key] = allocated_pages[i]
+                else:
+                    allocation_results[key] = -1  # No pages available
+
+            return allocation_results
+
+    def confirm_write_for_keys(
+        self,
+        rank: int,
+        key_confirmations: list[tuple[str, int]],
+        pages_to_release: list[int] | None = None,
+    ) -> None:
+        """Confirm write operations for keys and update metadata.
+
+        Args:
+            rank: Rank ID that confirmed the writes
+            key_confirmations: List of (key, page_index) tuples
+            pages_to_release: List of page indices to release back to free pool
+        """
+        with self.global_lock:
+            # Confirm successful writes
+            for key, page_index in key_confirmations:
+                if key not in self.key_metadata:
+                    # Need to determine tp_world_size from rank_metadata
+                    tp_world_size = len(self.rank_metadata)
+                    self.key_metadata[key] = KeyMetadata(key, {}, tp_world_size)
+
+                # Add confirmed page to key metadata
+                self.key_metadata[key].add_rank_page(rank, page_index)
+
+            # Release specified pages back to free pool
+            if pages_to_release:
+                self.rank_metadata[rank].release_pages(pages_to_release)
+                logger.debug(
+                    "Released %s pages on rank %s: %s",
+                    len(pages_to_release),
+                    rank,
+                    pages_to_release,
+                )
+
+    def batch_key_exists(self, keys: list[str]) -> list[bool]:
+        """Check if keys exist in metadata and all ranks have confirmed writes.
+
+        Args:
+            keys: List of keys to check
+
+        Returns:
+            List of boolean values indicating key existence and completion
+        """
+        with self.global_lock:
+            results = []
+            for key in keys:
+                if key not in self.key_metadata:
+                    results.append(False)
+                else:
+                    # Check if all ranks in the TP world have confirmed writes
+                    key_meta = self.key_metadata[key]
+                    results.append(key_meta.is_complete())
+            return results
+
+    def get_key_locations(self, rank: int, keys: list[str]) -> list[int | None]:
+        """Get page indices for keys on a specific rank.
+
+        Args:
+            rank: Rank ID to query
+            keys: List of keys to look up
+
+        Returns:
+            List of page indices in the same order as input keys (None if key not found)
+        """
+        with self.global_lock:
+            if rank not in self.rank_metadata:
+                raise ValueError(f"Rank {rank} not initialized")
+
+            results = []
+            for key in keys:
+                if key in self.key_metadata:
+                    key_meta = self.key_metadata[key]
+                    if key_meta.is_complete():
+                        page_index = key_meta.get_rank_page(rank)
+                    else:
+                        page_index = None
+
+                    results.append(page_index)
+                else:
+                    results.append(None)
+
+            return results
+
+
+class Hf3fsMetadataServer:
+    """HF3FS Metadata Server with improved key-based organization."""
+
+    def __init__(self, persistence_path: str | None = None, save_interval: int = 60):
+        self.state = GlobalMetadataState()
+        if HAS_ORJSON:
+            self.app = FastAPI(default_response_class=ORJSONResponse)
+        else:
+            self.app = FastAPI()
+        self._setup_routes()
+
+    async def _read_json(self, request: Request) -> dict:
+        """Parse request JSON using orjson if available."""
+        body = await request.body()
+        return orjson.loads(body)
+
+    def _json_response(self, content: dict):
+        """Return ORJSONResponse when available to bypass jsonable_encoder."""
+        if HAS_ORJSON:
+            return ORJSONResponse(content)
+        else:
+            return content
+
+    def _setup_routes(self):
+        """Setup FastAPI routes for new API design."""
+        self.app.post("/rank/{rank}/initialize")(self.initialize_rank)
+        self.app.post("/keys/batch_allocate")(self.batch_allocate_pages_for_keys)
+        self.app.post("/keys/confirm_write")(self.confirm_write_for_keys)
+        self.app.post("/keys/batch_exists")(self.batch_key_exists)
+        self.app.post("/keys/get_locations")(self.get_key_locations)
+        self.app.post("/clear")(self.clear)
+
+    async def initialize_rank(self, rank: int, request: Request):
+        """Initialize a rank with specified number of pages."""
+        data = await self._read_json(request)
+        role = data.get("role", "worker")
+        num_pages = data.get("num_pages", 0)
+
+        if role == "scheduler":
+            return self._json_response(
+                {"message": "Scheduler role does not require initialization"}
+            )
+
+        if role == "worker" and num_pages > 0:
+            self.state.initialize_rank(rank, num_pages)
+            return self._json_response(
+                {"message": f"Rank {rank} initialized with {num_pages} pages"}
+            )
+        else:
+            raise HTTPException(
+                status_code=400, detail="Invalid initialization parameters"
+            )
+
+    async def batch_allocate_pages_for_keys(self, request: Request):
+        """Allocate one page for each key on a specific rank."""
+        data = await self._read_json(request)
+        rank = data.get("rank")
+        keys = data.get("keys", [])
+
+        # Validate input format
+        if rank is None or not isinstance(keys, list):
+            raise HTTPException(
+                status_code=400, detail="Invalid request format: need 'rank' and 'keys'"
+            )
+
+        try:
+            # Perform allocation
+            results = self.state.allocate_pages_for_keys(rank, keys)
+
+            # Convert results to response format
+            response = {"rank": rank, "results": list(results.items())}
+            return self._json_response(response)
+        except Exception as e:
+            raise HTTPException(
+                status_code=500, detail=f"Allocation failed: {str(e)}"
+            ) from e
+
+    async def confirm_write_for_keys(self, request: Request):
+        """Confirm write operations for keys."""
+        data = await self._read_json(request)
+        rank = data.get("rank")
+        confirmations = data.get("confirmations", [])
+        pages_to_release = data.get("pages_to_release", [])
+
+        # Validate input format
+        if rank is None or not isinstance(confirmations, list):
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid request format: need 'rank' and 'confirmations'",
+            )
+
+        try:
+            self.state.confirm_write_for_keys(rank, confirmations, pages_to_release)
+
+            return Response(status_code=204)
+
+        except Exception as e:
+            logger.error("Confirm write for keys failed: %s", e)
+            raise HTTPException(
+                status_code=500, detail=f"Confirmation failed: {str(e)}"
+            ) from e
+
+    async def batch_key_exists(self, request: Request):
+        """Check if multiple keys exist in metadata."""
+        data = await self._read_json(request)
+        keys = data.get("keys", [])
+
+        if not isinstance(keys, list):
+            raise HTTPException(status_code=400, detail="Invalid keys format")
+
+        try:
+            exists_results = self.state.batch_key_exists(keys)
+            return self._json_response({"exists": exists_results})
+        except Exception as e:
+            raise HTTPException(
+                status_code=500, detail=f"Key existence check failed: {str(e)}"
+            ) from e
+
+    async def get_key_locations(self, request: Request):
+        """Get page indices for keys on a specific rank."""
+        data = await self._read_json(request)
+        rank = data.get("rank")
+        keys = data.get("keys", [])
+
+        # Validate input format
+        if rank is None or not isinstance(keys, list):
+            raise HTTPException(
+                status_code=400, detail="Invalid request format: need 'rank' and 'keys'"
+            )
+
+        try:
+            # Get key locations
+            locations = self.state.get_key_locations(rank, keys)
+            return self._json_response({"locations": locations})
+        except Exception as e:
+            raise HTTPException(
+                status_code=500, detail=f"Failed to get key locations: {str(e)}"
+            ) from e
+
+    async def clear(self, request: Request):
+        """Clear the metadata server."""
+        self.state.clear()
+        return Response(status_code=204)
+
+    def run(self, host: str = "0.0.0.0", port: int = 18000):
+        """Run the metadata server."""
+        import uvicorn
+
+        logger.info("Starting improved metadata server on http://%s:%s", host, port)
+        uvicorn.run(self.app, host=host, port=port)
+
+
+# --- Client implementation ---
+class Hf3fsMetadataInterface(ABC):
+    """Interface for HF3FS metadata operations."""
+
+    @abstractmethod
+    def initialize(self, rank: int, num_pages: int = 0, role: str = "worker") -> None:
+        """Initialize the metadata service with specified number of pages."""
+        pass
+
+    @abstractmethod
+    def allocate_pages_for_keys(
+        self, rank: int, keys: list[tuple[str, str]]
+    ) -> list[tuple[str, int]]:
+        """Allocate one page for each key on the specified rank."""
+        pass
+
+    @abstractmethod
+    def confirm_write_for_keys(
+        self,
+        rank: int,
+        key_confirmations: list[tuple[str, int]],
+        pages_to_release: list[int] | None = None,
+    ) -> None:
+        """Confirm write operations for keys and optionally release pages."""
+        pass
+
+    @abstractmethod
+    def batch_key_exists(self, keys: list[str]) -> list[bool]:
+        """Check if keys exist and are complete across all ranks."""
+        pass
+
+    @abstractmethod
+    def get_key_locations(self, rank: int, keys: list[str]) -> list[int]:
+        """Get page indices for keys on a specific rank."""
+        pass
+
+
+class Hf3fsGlobalMetadataClient(Hf3fsMetadataInterface):
+    """Global HTTP metadata client for HF3FS."""
+
+    def __init__(self, base_url: str = "http://localhost:18000", max_retries: int = 3):
+        self.base_url = base_url.rstrip("/")
+        self._session = requests.Session()
+
+        retry_strategy = Retry(
+            total=max_retries,
+            backoff_factor=0.3,
+            status_forcelist=[500, 502, 503, 504],
+            allowed_methods=["GET", "POST"],
+        )
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self._session.mount("http://", adapter)
+
+    def _post(self, endpoint: str, json_data: dict) -> dict:
+        """Make POST request to metadata server."""
+        try:
+            url = f"{self.base_url}/{endpoint}"
+            headers = {"Content-Type": "application/json"}
+            if HAS_ORJSON:
+                payload = orjson.dumps(json_data)
+            else:
+                import json
+
+                payload = json.dumps(json_data).encode("utf-8")
+            response = self._session.post(url, data=payload, headers=headers)
+            response.raise_for_status()
+
+            if response.status_code == 204 or not response.content:
+                return {}
+            if HAS_ORJSON:
+                return orjson.loads(response.content)
+            else:
+                return response.json()
+        except requests.exceptions.RequestException as e:
+            logger.error("Failed to POST to %s after retries: %s", endpoint, e)
+            raise RuntimeError(f"Failed to connect to metadata server: {e}") from e
+
+    def initialize(self, rank: int, num_pages: int = 0, role: str = "worker") -> None:
+        """Initialize a rank with specified number of pages."""
+        self._post(f"rank/{rank}/initialize", {"num_pages": num_pages, "role": role})
+
+    def allocate_pages_for_keys(
+        self, rank: int, keys: list[tuple[str, str]]
+    ) -> list[tuple[str, int]]:
+        """Allocate pages for keys on the specified rank."""
+        response = self._post("keys/batch_allocate", {"rank": rank, "keys": keys})
+
+        # Convert response to expected format
+        return response.get("results", {})
+
+    def confirm_write_for_keys(
+        self,
+        rank: int,
+        key_confirmations: list[tuple[str, int]],
+        pages_to_release: list[int] | None = None,
+    ) -> None:
+        """Confirm write operations for keys and optionally release pages."""
+        payload = {
+            "rank": rank,
+            "confirmations": key_confirmations,
+            "pages_to_release": pages_to_release or [],
+        }
+
+        self._post("keys/confirm_write", payload)
+
+    def batch_key_exists(self, keys: list[str]) -> list[bool]:
+        """Check if keys exist and are complete across all ranks."""
+        response = self._post("keys/batch_exists", {"keys": keys})
+        return response.get("exists", [])
+
+    def get_key_locations(self, rank: int, keys: list[str]) -> list[int]:
+        """Get page indices for keys on a specific rank."""
+        response = self._post("keys/get_locations", {"rank": rank, "keys": keys})
+        return response.get("locations", [])
+
+
+def run_metadata_server(
+    host: str = "0.0.0.0",
+    port: int = 18000,
+):
+    """Run the improved HF3FS metadata server."""
+    server = Hf3fsMetadataServer()
+    server.run(host=host, port=port)
+
+
+# --- Main Execution ---
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Improved HF3FS Metadata Server")
+    parser.add_argument(
+        "--host", type=str, default="0.0.0.0", help="Host to bind the server to."
+    )
+    parser.add_argument(
+        "--port", type=int, default=18000, help="Port to run the server on."
+    )
+    args = parser.parse_args()
+
+    run_metadata_server(args.host, args.port)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/common.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/common.py
new file mode 100644
index 000000000000..b47de73c9927
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/common.py
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import threading
+from dataclasses import dataclass, field
+from typing import Optional
+
+from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
+from vllm.v1.request import Request
+
+
+class AtomicCounter:
+    """Thread-safe atomic counter for round-robin operations."""
+
+    def __init__(self, n: int):
+        assert n > 0, "Counter size must be positive"
+        self._n = n
+        self._value = 0
+        self._lock = threading.Lock()
+
+    def next(self) -> int:
+        """Get next value in round-robin fashion."""
+        with self._lock:
+            current = self._value
+            self._value = (current + 1) % self._n
+            return current
+
+
+@dataclass
+class LoadBlockInfo:
+    """Operation for loading blocks from external storage."""
+
+    num_computed_blocks: int
+    num_blocks_to_load: int
+    need_fetch_block_ids: list[int]
+
+
+@dataclass
+class SaveBlockInfo:
+    """Operation for saving blocks to external storage."""
+
+    skip_leading_blocks: int
+
+
+@dataclass
+class RequestSchedulingState:
+    """Unified request scheduling state management."""
+
+    request_id: str
+    request: Request | None = None
+
+    # Token and block tracking
+    token_ids: list[int] = field(default_factory=list)
+    allocated_block_ids: list[int] = field(default_factory=list)
+    num_saved_blocks: int = 0
+
+    # Load operation info
+    load_op: LoadBlockInfo | None = None
+
+    # Scheduling phase
+    phase: str = "NEW"  # NEW -> WAITING_TO_LOAD -> ACTIVE -> FINISHED
+
+    def needs_loading(self) -> bool:
+        """Check if request needs loading."""
+        return self.load_op is not None and self.load_op.num_blocks_to_load > 0
+
+    def is_ready_to_load(self) -> bool:
+        """Check if request is ready for loading."""
+        return self.phase == "WAITING_TO_LOAD" and self.needs_loading()
+
+    def update_tokens_and_blocks(self, new_token_ids: list[int], new_block_ids) -> None:
+        """Update with new tokens and blocks."""
+        if new_token_ids:
+            self.token_ids.extend(new_token_ids)
+
+        if new_block_ids is not None:
+            normalized_block_ids = self._normalize_block_ids(new_block_ids)
+            self.allocated_block_ids.extend(normalized_block_ids)
+
+    def _normalize_block_ids(self, block_ids) -> list[int]:
+        """Normalize block_ids to list format."""
+        if not block_ids:
+            return []
+        if isinstance(block_ids, tuple):
+            return block_ids[0] if block_ids else []
+        if isinstance(block_ids, list):
+            return block_ids
+        return []
+
+
+@dataclass
+class HF3FSRequestMetadata:
+    """Metadata for a single request in HF3FS connector."""
+
+    request_id: str
+    token_ids: list[int]
+    block_ids: list[int]
+    load_block_op: LoadBlockInfo | None = None
+    save_block_op: SaveBlockInfo | None = None
+
+    @staticmethod
+    def from_scheduling_state(
+        state: "RequestSchedulingState",
+        block_size: int,
+        load_op: LoadBlockInfo | None = None,
+        skip_leading_blocks: int | None = None,
+    ) -> Optional["HF3FSRequestMetadata"]:
+        """Create request metadata from scheduling state."""
+        token_count = len(state.token_ids)
+        total_blocks = token_count // block_size
+
+        skip_blocks = (
+            state.num_saved_blocks
+            if skip_leading_blocks is None
+            else skip_leading_blocks
+        )
+
+        new_blocks_to_save = total_blocks - state.num_saved_blocks
+        if new_blocks_to_save <= 0 and load_op is None:
+            return None
+
+        state.num_saved_blocks = total_blocks
+        return HF3FSRequestMetadata(
+            request_id=state.request_id,
+            token_ids=state.token_ids,
+            block_ids=state.allocated_block_ids,
+            load_block_op=load_op,
+            save_block_op=SaveBlockInfo(skip_leading_blocks=skip_blocks),
+        )
+
+
+class HF3FSConnectorMetadata(KVConnectorMetadata):
+    """Container for HF3FS connector metadata."""
+
+    def __init__(self):
+        self.requests: list[HF3FSRequestMetadata] = []
+
+    def add_request(self, request_metadata: HF3FSRequestMetadata) -> None:
+        """Add request to metadata."""
+        self.requests.append(request_metadata)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/gather_scatter_helper.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/gather_scatter_helper.py
new file mode 100644
index 000000000000..39d852dae637
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/gather_scatter_helper.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.logger import init_logger
+from vllm.triton_utils import tl, triton
+
+
+@triton.jit
+def kv_cache_scatter_kernel(
+    kv_cache_ptrs_ptr,
+    source_ptr,
+    token_indices_ptr,
+    num_tokens_in_block,
+    hidden_size,
+    total_token_in_kvcache,
+    num_layers,
+    is_mla,
+    BLOCK_SIZE: tl.constexpr,
+):
+    layer_idx = tl.program_id(0)
+    token_pos = tl.program_id(1)
+
+    if layer_idx >= num_layers or token_pos >= num_tokens_in_block:
+        return
+
+    token_idx = tl.load(token_indices_ptr + token_pos)
+    kv_cache_ptr = tl.cast(tl.load(kv_cache_ptrs_ptr + layer_idx), source_ptr.dtype)
+
+    if token_idx >= total_token_in_kvcache:
+        return
+
+    if is_mla:
+        # MLA format: source [num_layers, num_tokens_in_block, hidden_size]
+        # MLA format: target [total_token_in_kvcache, hidden_size] (per layer)
+        source_offset = (layer_idx * num_tokens_in_block + token_pos) * hidden_size
+        target_offset = token_idx * hidden_size
+
+        for i in range(0, hidden_size, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < hidden_size
+            val = tl.load(source_ptr + source_offset + offset, mask=mask)
+            tl.store(kv_cache_ptr + target_offset + offset, val, mask=mask)
+    else:
+        # MHA format: source [num_layers, 2, num_tokens_in_block, hidden_size]
+        # MHA format: target [2, total_token_in_kvcache, hidden_size]
+        source_offset_k = (
+            layer_idx * num_tokens_in_block * 2 + token_pos
+        ) * hidden_size
+        source_offset_v = (
+            layer_idx * num_tokens_in_block * 2 + num_tokens_in_block + token_pos
+        ) * hidden_size
+
+        target_offset_k = token_idx * hidden_size
+        target_offset_v = (total_token_in_kvcache + token_idx) * hidden_size
+
+        for i in range(0, hidden_size, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < hidden_size
+
+            val_k = tl.load(source_ptr + source_offset_k + offset, mask=mask)
+            val_v = tl.load(source_ptr + source_offset_v + offset, mask=mask)
+
+            tl.store(kv_cache_ptr + target_offset_k + offset, val_k, mask=mask)
+            tl.store(kv_cache_ptr + target_offset_v + offset, val_v, mask=mask)
+
+
+@triton.jit
+def kv_cache_gather_kernel(
+    kv_cache_ptrs_ptr,
+    dst_ptr,
+    token_indices_ptr,
+    num_tokens_in_block,
+    hidden_size,
+    total_token_in_kvcache,
+    num_layers,
+    is_mla,
+    BLOCK_SIZE: tl.constexpr,
+):
+    layer_idx = tl.program_id(0)
+    token_pos = tl.program_id(1)
+
+    if layer_idx >= num_layers or token_pos >= num_tokens_in_block:
+        return
+
+    token_idx = tl.load(token_indices_ptr + token_pos)
+    kv_cache_ptr = tl.cast(tl.load(kv_cache_ptrs_ptr + layer_idx), dst_ptr.dtype)
+
+    if token_idx >= total_token_in_kvcache:
+        return
+
+    if is_mla:
+        # MLA format: source [total_token_in_kvcache, hidden_size] (per layer)
+        # MLA format: dst [num_layers, num_tokens_in_block, hidden_size]
+        kvcache_offset = token_idx * hidden_size
+        dst_offset = (layer_idx * num_tokens_in_block + token_pos) * hidden_size
+
+        for i in range(0, hidden_size, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < hidden_size
+            val = tl.load(kv_cache_ptr + kvcache_offset + offset, mask=mask)
+            tl.store(dst_ptr + dst_offset + offset, val, mask=mask)
+    else:
+        # MHA format: source [2, total_token_in_kvcache, hidden_size]
+        # MHA format: dst [num_layers, 2, num_tokens_in_block, hidden_size]
+        dst_offset_k = (layer_idx * num_tokens_in_block * 2 + token_pos) * hidden_size
+        dst_offset_v = (
+            layer_idx * num_tokens_in_block * 2 + num_tokens_in_block + token_pos
+        ) * hidden_size
+
+        kvcache_offset_k = token_idx * hidden_size
+        kvcache_offset_v = (total_token_in_kvcache + token_idx) * hidden_size
+
+        for i in range(0, hidden_size, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < hidden_size
+
+            val_k = tl.load(kv_cache_ptr + kvcache_offset_k + offset, mask=mask)
+            val_v = tl.load(kv_cache_ptr + kvcache_offset_v + offset, mask=mask)
+
+            tl.store(dst_ptr + dst_offset_k + offset, val_k, mask=mask)
+            tl.store(dst_ptr + dst_offset_v + offset, val_v, mask=mask)
+
+
+def scatter_kv_caches(
+    kv_caches_ptrs: torch.Tensor,
+    total_token_in_kvcache: int,
+    src_tensor: torch.Tensor,
+    token_indices: list[int],
+    is_mla: bool = False,
+) -> None:
+    """Scatter KV cache data from source tensor to KV cache storage.
+
+    Args:
+        kv_caches_ptrs: Tensor of KV cache pointers (one per layer)
+        total_token_in_kvcache: Total number of tokens in KV cache
+        src_tensor: Source tensor containing data to scatter
+            - MHA format: [num_layers, 2, num_tokens_in_block, hidden_size]
+            - MLA format: [num_layers, num_tokens_in_block, hidden_size]
+        token_indices: List of token positions to update
+        is_mla: Whether using MLA model format
+    """
+    num_layers = len(kv_caches_ptrs)
+    num_tokens_in_block = len(token_indices)
+
+    if is_mla:
+        # MLA: src_tensor is [num_layers, num_tokens_in_block, hidden_size]
+        assert len(src_tensor.shape) == 3, (
+            f"MLA src_tensor should be 3D, got {src_tensor.shape}"
+        )
+        hidden_size = src_tensor.shape[2]
+    else:
+        # MHA: src_tensor is [num_layers, 2, num_tokens_in_block, hidden_size]
+        assert len(src_tensor.shape) == 4, (
+            f"MHA src_tensor should be 4D, got {src_tensor.shape}"
+        )
+        hidden_size = src_tensor.shape[3]
+
+    device = src_tensor.device
+    token_indices_tensor = torch.tensor(
+        token_indices, dtype=torch.int32, device="cpu"
+    ).to(device, non_blocking=True)
+
+    grid = (num_layers, num_tokens_in_block)
+    BLOCK_SIZE = 128
+
+    kv_cache_scatter_kernel[grid](
+        kv_caches_ptrs,
+        src_tensor,
+        token_indices_tensor,
+        num_tokens_in_block,
+        hidden_size,
+        total_token_in_kvcache,
+        num_layers,
+        is_mla,
+        BLOCK_SIZE=BLOCK_SIZE,
+    )
+
+
+def gather_kv_caches(
+    kv_caches_ptrs: torch.Tensor,
+    total_token_in_kvcache: int,
+    dst_tensor: torch.Tensor,
+    token_indices: list[int],
+    is_mla: bool = False,
+) -> None:
+    """Gather KV cache data from KV cache storage to destination tensor.
+
+    Args:
+        kv_caches_ptrs: Tensor of KV cache pointers (one per layer)
+        total_token_in_kvcache: Total number of tokens in KV cache
+        dst_tensor: Destination tensor to store gathered data
+            - MHA format: [num_layers, 2, num_tokens_in_block, hidden_size]
+            - MLA format: [num_layers, num_tokens_in_block, hidden_size]
+        token_indices: List of token positions to gather
+        is_mla: Whether using MLA model format
+    """
+    num_layers = kv_caches_ptrs.shape[0]
+    num_tokens_in_block = len(token_indices)
+
+    if is_mla:
+        # MLA: dst_tensor is [num_layers, num_tokens_in_block, hidden_size]
+        assert len(dst_tensor.shape) == 3, (
+            f"MLA dst_tensor should be 3D, got {dst_tensor.shape}"
+        )
+        assert dst_tensor.shape[0] == num_layers, (
+            f"Layer count mismatch: {dst_tensor.shape[0]} vs {num_layers}"
+        )
+        assert dst_tensor.shape[1] == num_tokens_in_block, (
+            f"Token count mismatch: {dst_tensor.shape[1]} vs {num_tokens_in_block}"
+        )
+        hidden_size = dst_tensor.shape[2]
+    else:
+        # MHA: dst_tensor is [num_layers, 2, num_tokens_in_block, hidden_size]
+        assert len(dst_tensor.shape) == 4, (
+            f"MHA dst_tensor should be 4D, got {dst_tensor.shape}"
+        )
+        assert dst_tensor.shape[0] == num_layers, (
+            f"Layer count mismatch: {dst_tensor.shape[0]} vs {num_layers}"
+        )
+        assert dst_tensor.shape[1] == 2, (
+            f"MHA should have 2 (K,V) components, got {dst_tensor.shape[1]}"
+        )
+        assert dst_tensor.shape[2] == num_tokens_in_block, (
+            f"Token count mismatch: {dst_tensor.shape[2]} vs {num_tokens_in_block}"
+        )
+        hidden_size = dst_tensor.shape[3]
+
+    device = dst_tensor.device
+    token_indices_tensor = torch.tensor(
+        token_indices, dtype=torch.int32, device="cpu"
+    ).to(device, non_blocking=True)
+
+    grid = (num_layers, num_tokens_in_block)
+    BLOCK_SIZE = 128
+
+    kv_cache_gather_kernel[grid](
+        kv_caches_ptrs,
+        dst_tensor,
+        token_indices_tensor,
+        num_tokens_in_block,
+        hidden_size,
+        total_token_in_kvcache,
+        num_layers,
+        is_mla,
+        BLOCK_SIZE=BLOCK_SIZE,
+    )
+
+
+class CopyBufferAllocator:
+    """Memory pool for tensor buffers to avoid frequent allocation/deallocation."""
+
+    def __init__(
+        self, device: torch.device, dtype: torch.dtype, shape: list, max_count: int
+    ):
+        self._shape = shape
+        self._max_count = max_count
+        self._device = device
+        self._free_buffers = [
+            torch.empty(shape, dtype=dtype, device=device) for _ in range(max_count)
+        ]
+        self._inuse_count = 0
+
+    def alloc_buffer(self, count: int) -> list[torch.Tensor] | None:
+        """Allocate buffers from the pool."""
+        if count == 0:
+            return []
+
+        if self._inuse_count + count <= self._max_count:
+            self._inuse_count += count
+            result = self._free_buffers[-count:]
+            del self._free_buffers[-count:]
+            return result
+        return None
+
+    def free_buffer(self, buffers: list[torch.Tensor]) -> None:
+        """Return buffers to the pool."""
+        if not buffers:
+            return
+
+        if self._inuse_count >= len(buffers):
+            self._inuse_count -= len(buffers)
+            self._free_buffers.extend(buffers)
+        else:
+            raise RuntimeError("Attempted to free more buffers than allocated")
+
+
+logger = init_logger(__name__)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_mock_client.py b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_mock_client.py
new file mode 100644
index 000000000000..3914663a62d8
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_mock_client.py
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import logging
+import os
+
+import torch
+
+logger = logging.getLogger(__name__)
+HF3FS_AVAILABLE = True
+
+
+class Hf3fsClient:
+    """Mock HF3FS client using file backend for debugging and testing."""
+
+    def __init__(self, path: str, size: int, bytes_per_page: int, entries: int):
+        self._size = size
+        self._bytes_per_page = bytes_per_page
+        self._entries = entries
+        self._file_path = path
+
+        self._ensure_file_exists()
+        logger.debug("Initialized mock HF3FS client: %s (%d bytes)", path, size)
+
+    def _ensure_file_exists(self) -> None:
+        """Create file if it doesn't exist."""
+        if not os.path.exists(self._file_path):
+            with open(self._file_path, "w+b") as f:
+                f.truncate(self._size)
+
+    def batch_read(self, offsets: list[int], tensors: list[torch.Tensor]) -> list[int]:
+        """Read data from file at specified offsets into tensors."""
+        results = []
+
+        try:
+            with open(self._file_path, "rb") as f:
+                for offset, tensor in zip(offsets, tensors):
+                    num_bytes = tensor.numel() * tensor.element_size()
+
+                    if offset < 0 or offset + num_bytes > self._size:
+                        results.append(-1)
+                        continue
+
+                    f.seek(offset)
+                    buffer_data = f.read(num_bytes)
+
+                    if len(buffer_data) == num_bytes == self._bytes_per_page:
+                        tensor_data = self._convert_buffer_to_tensor(
+                            buffer_data, tensor.dtype
+                        )
+                        tensor.copy_(
+                            tensor_data.reshape(tensor.shape).to(tensor.device)
+                        )
+                        results.append(self._bytes_per_page)
+                    else:
+                        logger.error(
+                            "Read size mismatch: got %d, expected %d",
+                            len(buffer_data),
+                            num_bytes,
+                        )
+                        results.append(-1)
+        except Exception as e:
+            logger.error("Batch read error: %s", e)
+            results.extend([-1] * (len(offsets) - len(results)))
+
+        return results
+
+    def _convert_buffer_to_tensor(
+        self, buffer_data: bytes, dtype: torch.dtype
+    ) -> torch.Tensor:
+        """Convert buffer data to tensor with proper dtype handling."""
+        if dtype == torch.bfloat16:
+            tensor_data = torch.frombuffer(buffer_data, dtype=torch.uint16)
+            return tensor_data.view(dtype=torch.bfloat16)
+        else:
+            return torch.frombuffer(buffer_data, dtype=dtype)
+
+    def batch_write(
+        self, offsets: list[int], tensors: list[torch.Tensor], event: torch.cuda.Event
+    ) -> list[int]:
+        """Write data from tensors to file at specified offsets."""
+        results = []
+
+        try:
+            torch.cuda.current_stream().wait_event(event)
+
+            # Convert tensors to bytes
+            data_bytes_list = [self._tensor_to_bytes(tensor) for tensor in tensors]
+
+            # Write to file
+            with open(self._file_path, "r+b") as f:
+                for offset, data_bytes in zip(offsets, data_bytes_list):
+                    if offset < 0 or offset + len(data_bytes) > self._size:
+                        results.append(-1)
+                        continue
+
+                    f.seek(offset)
+                    bytes_written = f.write(data_bytes)
+
+                    if bytes_written == len(data_bytes) == self._bytes_per_page:
+                        results.append(self._bytes_per_page)
+                    else:
+                        logger.error(
+                            "Write size mismatch: wrote %d, expected %d",
+                            bytes_written,
+                            self._bytes_per_page,
+                        )
+                        results.append(-1)
+
+        except Exception as e:
+            logger.error("Batch write error: %s", e)
+            results.extend([-1] * (len(offsets) - len(results)))
+
+        return results
+
+    def _tensor_to_bytes(self, tensor: torch.Tensor) -> bytes:
+        """Convert tensor to bytes with proper dtype handling."""
+        cpu_tensor = tensor.cpu()
+        if cpu_tensor.dtype == torch.bfloat16:
+            return cpu_tensor.view(dtype=torch.uint16).numpy().tobytes()
+        else:
+            return cpu_tensor.numpy().tobytes()
+
+    def get_size(self) -> int:
+        """Get the total size of the storage file."""
+        return self._size
+
+    def close(self) -> None:
+        """Close the client (no-op for file backend)."""
+        pass
+
+    def flush(self) -> None:
+        """Flush any pending writes (no-op for file backend)."""
+        pass
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_utils.cpp b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_utils.cpp
new file mode 100644
index 000000000000..9dbeb251d04f
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/hf3fs/utils/hf3fs_utils.cpp
@@ -0,0 +1,57 @@
+#include <cuda_runtime.h>
+#include <torch/extension.h>
+#include <cstring>
+#include <vector>
+
+void read_shm(const torch::Tensor& shm, const torch::Tensor& pin,
+              std::vector<torch::Tensor> dst, uint64_t stream_ptr) {
+  py::gil_scoped_release release;
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_ptr);
+
+  // Copy from shared memory to pinned memory
+  char* shm_ptr = static_cast<char*>(shm.data_ptr());
+  char* src_ptr = static_cast<char*>(pin.data_ptr());
+  std::memcpy(src_ptr, shm_ptr, shm.numel() * shm.element_size());
+
+  // Copy from pinned memory to GPU tensors
+  size_t current = 0;
+  for (size_t i = 0; i < dst.size(); ++i) {
+    auto& t = dst[i];
+    size_t t_bytes = t.numel() * t.element_size();
+    char* dst_ptr = static_cast<char*>(t.data_ptr());
+    cudaMemcpyAsync(dst_ptr, src_ptr + current, t_bytes, cudaMemcpyHostToDevice,
+                    stream);
+    current += t_bytes;
+  }
+  cudaStreamSynchronize(stream);
+}
+
+void write_shm(const std::vector<torch::Tensor> src, torch::Tensor& shm,
+               const torch::Tensor& pin, uint64_t stream_ptr) {
+  py::gil_scoped_release release;
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_ptr);
+
+  // Copy from GPU tensors to pinned memory
+  char* dst_ptr = static_cast<char*>(pin.data_ptr());
+  size_t current = 0;
+  for (size_t i = 0; i < src.size(); ++i) {
+    auto& t = src[i];
+    size_t t_bytes = t.numel() * t.element_size();
+    char* src_ptr = static_cast<char*>(t.data_ptr());
+    cudaMemcpyAsync(dst_ptr + current, src_ptr, t_bytes, cudaMemcpyDeviceToHost,
+                    stream);
+    current += t_bytes;
+  }
+  cudaStreamSynchronize(stream);
+
+  // Copy from pinned memory to shared memory
+  char* shm_ptr = static_cast<char*>(shm.data_ptr());
+  std::memcpy(shm_ptr, dst_ptr, shm.numel() * shm.element_size());
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("read_shm", &read_shm, "Read tensors from shared memory");
+  m.def("write_shm", &write_shm, "Write tensors to shared memory");
+}
\ No newline at end of file
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/__init__.py
index 07e05cc8f893..3d3a093820e4 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/__init__.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/__init__.py
@@ -7,6 +7,7 @@
     LMCacheMPSchedulerAdapter,
     LMCacheMPWorkerAdapter,
     LoadStoreOp,
+    ParallelStrategy,
 )
 
 __all__ = [
@@ -15,4 +16,5 @@
     "LMCacheMPSchedulerAdapter",
     "LMCacheMPWorkerAdapter",
     "LoadStoreOp",
+    "ParallelStrategy",
 ]
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py
index eff580df9022..2e75519df125 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py
@@ -79,6 +79,39 @@ def get_lmcache_chunk_size(
     return chunk_size
 
 
+@dataclass
+class ParallelStrategy:
+    use_mla: bool
+    """Whether to use the MLA."""
+
+    kv_world_size: int
+    """
+    The kv world size, kv_world_size may not be equal to the actual_world_size, 
+    in the case of mla, it will 'exclude' the effect of TP, the value is 
+    calculated by `extract_world_size_and_kv_rank` in `lmcache_mp_connector.py`.
+    """
+
+    kv_worker_id: int
+    """
+    The kv worker id of the sub-process, kv_worker_id may not be equal to the 
+    actual_worker_id, in the case of mla, it will 'exclude' the effect of TP, 
+    the value is calculated by `extract_world_size_and_kv_rank` in 
+    `lmcache_mp_connector.py`.
+    """
+
+    actual_world_size: int
+    """The actual world size."""
+
+    actual_worker_id: int
+    """The actual worker id of the sub-process."""
+
+    tp_size: int
+    """The tensor parallel size."""
+
+    pp_size: int
+    """The pipeline parallel size."""
+
+
 @dataclass
 class LoadStoreOp:
     block_ids: list[int]
@@ -111,10 +144,8 @@ def __init__(
         server_url: str,
         context: zmq.Context,
         model_name: str,
-        world_size: int,
-        kv_rank: int,
         vllm_block_size: int,
-        tp_size: int = 1,
+        parallel_strategy: ParallelStrategy,
     ):
         """
         Args:
@@ -122,11 +153,10 @@ def __init__(
             context: The ZMQ context
 
             model_name: The model name used for LMCache keys
-            world_size: The world size used for LMCache keys
-            kv_rank: The kv rank used for LMCache keys
             vllm_block_size: The block size used in vLLM
-            tp_size: Tensor-parallel size for MLA
-                multi-reader locking (default 1).
+            parallel_strategy:
+                The parallel strategy, which includes `use_mla`,
+                `world_size`, `worker_id` and so on
         """
         self.mq_client = MessageQueueClient(server_url, context)
 
@@ -134,9 +164,7 @@ def __init__(
         self.lookup_futures: dict[str, MessagingFuture[LookupResult]] = {}
 
         self.model_name = model_name
-        self.world_size = world_size
-        self.worker_id = kv_rank
-        self.tp_size = tp_size
+        self.parallel_strategy = parallel_strategy
 
         # Read chunk size from lmcache
         self.chunk_size = get_lmcache_chunk_size(self.mq_client)
@@ -145,6 +173,21 @@ def __init__(
         )
         self.blocks_in_chunk = self.chunk_size // vllm_block_size
 
+    @property
+    def world_size(self) -> int:
+        """The world size."""
+        return self.parallel_strategy.kv_world_size
+
+    @property
+    def worker_id(self) -> int:
+        """The worker id."""
+        return self.parallel_strategy.kv_worker_id
+
+    @property
+    def tp_size(self) -> int:
+        """The tensor parallel size."""
+        return self.parallel_strategy.tp_size
+
     @_lmcache_nvtx_annotate
     def maybe_submit_lookup_request(
         self,
@@ -308,9 +351,8 @@ def __init__(
         server_url: str,
         context: zmq.Context,
         model_name: str,
-        world_size: int,
-        kv_rank: int,
         vllm_block_size: int,
+        parallel_strategy: ParallelStrategy,
     ):
         self.mq_client = MessageQueueClient(server_url, context)
 
@@ -336,8 +378,7 @@ def __init__(
         self.previously_finished: set[str] = set()
 
         self.model_name = model_name
-        self.world_size = world_size
-        self.worker_id = kv_rank
+        self.parallel_strategy = parallel_strategy
 
         # Read chunk size from lmcache
         chunk_size = get_lmcache_chunk_size(self.mq_client)
@@ -346,6 +387,29 @@ def __init__(
         )
         self.blocks_in_chunk = chunk_size // vllm_block_size
 
+    @property
+    def world_size(self) -> int:
+        """The world size."""
+        return self.parallel_strategy.kv_world_size
+
+    @property
+    def worker_id(self) -> int:
+        """The worker id."""
+        return self.parallel_strategy.kv_worker_id
+
+    @property
+    def use_mla(self) -> bool:
+        """Whether to use MLA."""
+        return self.parallel_strategy.use_mla
+
+    @property
+    def is_first_rank_of_pp_group(self) -> bool:
+        """Is the first rank of the pipeline parallel group."""
+        return (
+            self.parallel_strategy.actual_worker_id % self.parallel_strategy.tp_size
+            == 0
+        )
+
     def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         """
         Register the kv caches with LMCache server
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
index 5f14c733a8b0..8786e91a5a14 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import enum
-import inspect
+import os
 from collections.abc import Iterable
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Literal
@@ -28,12 +28,25 @@
         LMCacheMPSchedulerAdapter,
         LMCacheMPWorkerAdapter,
         LoadStoreOp,
+        ParallelStrategy,
     )
+
+    try:
+        from lmcache.v1.multiprocess.custom_types import RequestAllocationRecord
+    except ImportError:
+        from lmcache.v1.multiprocess.custom_types import (
+            BlockAllocationRecord as RequestAllocationRecord,
+        )
 except ImportError:
+    from lmcache.v1.multiprocess.custom_types import (
+        BlockAllocationRecord as RequestAllocationRecord,
+    )
+
     from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_integration import (
         LMCacheMPSchedulerAdapter,
         LMCacheMPWorkerAdapter,
         LoadStoreOp,
+        ParallelStrategy,
     )
 
 if TYPE_CHECKING:
@@ -53,12 +66,6 @@
 logger = lmcache_init_logger(__name__)
 
 
-def _adapter_accepts_tp_size() -> bool:
-    """Check if the imported adapter accepts tp_size."""
-    sig = inspect.signature(LMCacheMPSchedulerAdapter.__init__)
-    return "tp_size" in sig.parameters
-
-
 # Helper functions
 def reformat_block_ids(block_ids: tuple[list[int], ...] | None) -> list[int]:
     if block_ids is None:
@@ -94,8 +101,8 @@ def extract_world_size_and_kv_rank(
         # vLLM constructs TP groups first, and then construct other
         # parallel groups on top of TP groups.
         # for example, TP=4, PP=2,
-        # TP group: [0, 1, 2, 3], [4, 5, 6, 7]
-        # PP group: [0, 4], [1, 5], [2, 6], [3, 7]
+        # PP group: [0, 1, 2, 3], [4, 5, 6, 7]
+        # TP group: [0, 4], [1, 5], [2, 6], [3, 7]
         # So we can "exclude" the effect of TP by rank // tp_size.
         return world_size // tp_size, rank // tp_size
 
@@ -112,24 +119,24 @@ def create_scheduler_adapter(
         vllm_config.parallel_config.rank,
         vllm_config,
     )
-    tp_size = vllm_config.parallel_config.tensor_parallel_size
-
-    # Pass tp_size only when the adapter accepts it so that
-    # a newer vllm can still work with an older LMCache.
-    kwargs: dict[str, Any] = {}
-    if _adapter_accepts_tp_size():
-        kwargs["tp_size"] = tp_size
-
-    return LMCacheMPSchedulerAdapter(
-        server_url,
-        zmq_context,
-        vllm_config.model_config.model,
+    parallel_strategy = ParallelStrategy(
+        mla_enabled(vllm_config.model_config),
         world_size,
         kv_rank,
-        vllm_config.cache_config.block_size,
+        vllm_config.parallel_config.world_size,
+        vllm_config.parallel_config.rank,
+        vllm_config.parallel_config.tensor_parallel_size,
+        vllm_config.parallel_config.pipeline_parallel_size,
+    )
+
+    return LMCacheMPSchedulerAdapter(
+        server_url=server_url,
+        context=zmq_context,
+        model_name=vllm_config.model_config.model,
+        vllm_block_size=vllm_config.cache_config.block_size,
+        parallel_strategy=parallel_strategy,
         mq_timeout=mq_timeout,
         heartbeat_interval=heartbeat_interval,
-        **kwargs,
     )
 
 
@@ -145,13 +152,22 @@ def create_worker_adapter(
         vllm_config.parallel_config.rank,
         vllm_config,
     )
-    return LMCacheMPWorkerAdapter(
-        server_url,
-        zmq_context,
-        vllm_config.model_config.model,
+    parallel_strategy = ParallelStrategy(
+        mla_enabled(vllm_config.model_config),
         world_size,
         kv_rank,
-        vllm_config.cache_config.block_size,
+        vllm_config.parallel_config.world_size,
+        vllm_config.parallel_config.rank,
+        vllm_config.parallel_config.tensor_parallel_size,
+        vllm_config.parallel_config.pipeline_parallel_size,
+    )
+
+    return LMCacheMPWorkerAdapter(
+        server_url=server_url,
+        context=zmq_context,
+        model_name=vllm_config.model_config.model,
+        vllm_block_size=vllm_config.cache_config.block_size,
+        parallel_strategy=parallel_strategy,
         mq_timeout=mq_timeout,
         heartbeat_interval=heartbeat_interval,
     )
@@ -200,8 +216,11 @@ class LMCacheMPRequestTracker:
     # Main state
     state: LMCacheMPRequestState = LMCacheMPRequestState.PREFETCHING
 
+    cache_salt: str = ""
+
     def __init__(self, request: "Request"):
         self.request_id = request.request_id
+        self.cache_salt: str = request.cache_salt or ""
         self.all_token_ids = request.all_token_ids
         self.block_hashes = ConstantList(request.block_hashes)
         self.allocated_block_ids = []
@@ -274,6 +293,7 @@ class LMCacheMPRequestMetadata:
     request_id: str
     direction: Literal["STORE", "RETRIEVE"]
     op: LoadStoreOp
+    cache_salt: str = ""
 
     @staticmethod
     def GetStoreMetadata(
@@ -293,10 +313,31 @@ def GetStoreMetadata(
         # NOTE: the invariant here is that `num_stored_blocks` should
         # always be a multiple of `blocks_in_chunk`
         # TODO: This should be checked everytime we update the num_stored_blocks
+        #
+        # Why computed_blocks uses max(num_vllm_hit_blocks, num_lmcache_hit_blocks):
+        #
+        # Both values represent a prefix of blocks whose KV data is already
+        # available (either from vLLM APC or from LMCache), so they must NOT
+        # be summed (that would double-count the overlapping prefix).
+        #
+        # * num_lmcache_hit_blocks: LMCache-hit blocks are already counted in
+        #   num_stored_blocks (set during lookup), so they must be included
+        #   here to keep the upper bound consistent.  They are NOT re-stored.
+        # * num_vllm_hit_blocks: LMCache stores in units of chunks (N blocks),
+        #   so num_lmcache_hit_blocks is rounded DOWN to the nearest chunk
+        #   boundary.  When vLLM APC hits more blocks than that rounded value
+        #   (e.g. APC=44 blocks, LMCache=32 blocks after chunk alignment),
+        #   using only num_lmcache_hit_blocks would set the upper bound too
+        #   low and silently skip the APC-hit blocks that fall between the
+        #   two values, causing under-storing.  Taking the max ensures we
+        #   always use the tighter (larger) of the two hit counts.
+        computed_blocks = tracker.num_scheduled_tokens // vllm_block_size + max(
+            tracker.num_vllm_hit_blocks, tracker.num_lmcache_hit_blocks
+        )
         min_available_blocks = min(
             len(tracker.block_hashes),
             len(tracker.allocated_block_ids),
-            tracker.num_scheduled_tokens // vllm_block_size,
+            computed_blocks,
         )
         num_staging_blocks = min_available_blocks - tracker.num_stored_blocks
         num_chunks = num_staging_blocks // blocks_in_chunk
@@ -319,6 +360,7 @@ def GetStoreMetadata(
                 request_id=tracker.request_id,
                 direction="STORE",
                 op=op,
+                cache_salt=tracker.cache_salt,
             )
 
             # Update the request tracker
@@ -385,6 +427,7 @@ def GetRetrieveMetadata(
                 request_id=tracker.request_id,
                 direction="RETRIEVE",
                 op=op,
+                cache_salt=tracker.cache_salt,
             )
             return ret
 
@@ -418,7 +461,7 @@ def __repr__(self):
         return self.__str__()
 
 
-class LMCacheMPConnector(KVConnectorBase_V1):
+class LMCacheMPConnectorUpstream(KVConnectorBase_V1):
     """
     The connector for LMCache multi-process mode.
 
@@ -434,7 +477,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(vllm_config, role, kv_cache_config)
 
@@ -533,12 +576,14 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> Non
 
         request_ids = []
         ops = []
+        cache_salts = []
 
         for meta in metadata.requests:
             if meta.direction != "RETRIEVE":
                 continue
             request_ids.append(meta.request_id)
             ops.append(meta.op)
+            cache_salts.append(meta.cache_salt)
 
         if len(request_ids) == 0:
             return
@@ -547,7 +592,9 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> Non
             event = torch.cuda.Event(interprocess=True)
             event.record()
 
-        self.worker_adapter.batched_submit_retrieve_requests(request_ids, ops, event)
+        self.worker_adapter.batched_submit_retrieve_requests(
+            request_ids, ops, event, cache_salts=cache_salts
+        )
 
     def wait_for_layer_load(self, layer_name: str) -> None:
         """
@@ -591,16 +638,26 @@ def wait_for_save(self):
 
         This prevents overwrites of paged KV buffer before saving done.
         """
+        # In MLA scenario, only the first rank of the pipeline group
+        # needs to save the KV cache.
+        if (
+            self.worker_adapter.use_mla
+            and not self.worker_adapter.is_first_rank_of_pp_group
+        ):
+            return
+
         metadata = self._get_connector_metadata()
         assert isinstance(metadata, LMCacheMPConnectorMetadata)
 
         request_ids = []
         ops = []
+        cache_salts = []
         for meta in metadata.requests:
             if meta.direction != "STORE":
                 continue
             request_ids.append(meta.request_id)
             ops.append(meta.op)
+            cache_salts.append(meta.cache_salt)
 
         if len(request_ids) == 0:
             return
@@ -609,7 +666,9 @@ def wait_for_save(self):
             event = torch.cuda.Event(interprocess=True)
             event.record()
 
-        self.worker_adapter.batched_submit_store_requests(request_ids, ops, event)
+        self.worker_adapter.batched_submit_store_requests(
+            request_ids, ops, event, cache_salts=cache_salts
+        )
 
     def get_finished(
         self, finished_req_ids: set[str]
@@ -711,6 +770,7 @@ def get_num_new_matched_tokens(
         self.scheduler_adapter.maybe_submit_lookup_request(
             request.request_id,
             token_ids=list(request.all_token_ids),
+            cache_salt=tracker.cache_salt,
         )
 
         ret = self.scheduler_adapter.check_lookup_result(request.request_id)
@@ -837,6 +897,9 @@ def build_connector_meta(
         if len(metadata) > 0:
             logger.debug("Final connector metadata: %s", metadata)
 
+        # Report block allocation deltas to LMCache for observability
+        self._report_block_allocation_deltas(scheduler_output)
+
         return metadata
 
     def update_connector_output(self, connector_output: KVConnectorOutput):
@@ -868,12 +931,31 @@ def request_finished(
             Optional KVTransferParams to be included in the request outputs
             returned by the engine.
         """
+
+        params: dict[str, Any] | None = getattr(request, "kv_transfer_params", None)
+        return_params: dict[str, Any] | None = {} if params is not None else None
+
+        if (
+            params is not None
+            and return_params is not None
+            and "num_lmcache_extra_cached_tokens" in params
+        ):
+            request_tracker = self._get_request_tracker(request.request_id)
+            num_extra_cached_blocks = max(
+                0,
+                request_tracker.num_lmcache_hit_blocks
+                - request_tracker.num_vllm_hit_blocks,
+            )
+            return_params["num_lmcache_extra_cached_tokens"] = (
+                num_extra_cached_blocks * self.vllm_block_size
+            )
+
         # Clean up request tracker to prevent memory leak
         self._cleanup_request_tracker(request.request_id)
         # Notify LMCache to end the session for this request
         self.scheduler_adapter.end_session(request.request_id)
 
-        return True, None
+        return True, return_params
 
     def take_events(self) -> Iterable["KVCacheEvent"]:
         """
@@ -996,8 +1078,9 @@ def _process_cached_requests(
             if request_id not in cached_reqs.resumed_req_ids:
                 request_tracker.append_block_ids(new_block_ids)
 
-            # Update new scheduled tokens
-            num_new_tokens = cached_reqs.num_computed_tokens[idx]
+            # Use the incremental num_scheduled_tokens to
+            # stay consistent with _process_new_requests.
+            num_new_tokens = scheduler_output.num_scheduled_tokens[request_id]
             request_tracker.increase_num_scheduled_tokens(num_new_tokens)
 
             r_meta = LMCacheMPRequestMetadata.GetStoreMetadata(
@@ -1007,6 +1090,64 @@ def _process_cached_requests(
             if r_meta is not None:
                 metadata.add_request_metadata(r_meta)
 
+    def _report_block_allocation_deltas(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> None:
+        """Gather per-request block allocation deltas and report to LMCache.
+
+        For new requests: all allocated_block_ids and token_ids are new.
+        For cached requests: only newly appended block_ids and token_ids.
+        """
+        records: list[RequestAllocationRecord] = []
+
+        # New requests: send all tokens covering all allocated blocks so
+        # the L0 metrics subscriber can correctly map each block to its
+        # actual token content (not just the newly-scheduled slice).
+        for new_request in scheduler_output.scheduled_new_reqs:
+            tracker = self.request_trackers.get(new_request.req_id)
+            if tracker is None:
+                continue
+            num_blocks = len(tracker.allocated_block_ids)
+            total_tokens = num_blocks * self.vllm_block_size
+            records.append(
+                RequestAllocationRecord(
+                    req_id=new_request.req_id,
+                    new_block_ids=list(tracker.allocated_block_ids),
+                    new_token_ids=list(tracker.all_token_ids[:total_tokens]),
+                )
+            )
+
+        # Cached requests: only the newly added blocks and their full
+        # token content.  We send all tokens covered by the new blocks
+        # (not just the tokens scheduled this step) so the L0 subscriber
+        # can correctly identify block content.
+        cached_reqs = scheduler_output.scheduled_cached_reqs
+        for idx, request_id in enumerate(cached_reqs.req_ids):
+            new_block_ids = reformat_block_ids(cached_reqs.new_block_ids[idx])
+            if not new_block_ids:
+                continue
+            tracker = self.request_trackers.get(request_id)
+            if tracker is None:
+                continue
+            # The new blocks sit at the end of the request's block list.
+            # Compute the token range they cover.
+            total_blocks = len(tracker.allocated_block_ids)
+            num_new_blocks = len(new_block_ids)
+            start_token = (total_blocks - num_new_blocks) * self.vllm_block_size
+            end_token = total_blocks * self.vllm_block_size
+            new_token_ids = list(tracker.all_token_ids[start_token:end_token])
+            records.append(
+                RequestAllocationRecord(
+                    req_id=request_id,
+                    new_block_ids=new_block_ids,
+                    new_token_ids=new_token_ids,
+                )
+            )
+
+        if records:
+            self.scheduler_adapter.report_block_allocations(records)
+
     def _get_request_tracker(self, request_id: str) -> LMCacheMPRequestTracker:
         assert request_id in self.request_trackers, (
             f"Request tracker for request_id {request_id} not found. "
@@ -1048,3 +1189,38 @@ def _cleanup_request_tracker(self, request_id: str) -> None:
                 "[KVConnector] Cleaned up request_tracker for request %s",
                 request_id,
             )
+
+
+# At module load time, prefer the external LMCacheMPConnector shipped with the
+# ``lmcache`` package. This avoids forcing users to set
+# ``kv_connector_module_path`` when they only configure ``kv_connector``. If
+# the external module is unavailable (e.g. older lmcache version that does
+# not ship this submodule, or any import error), fall back to the builtin
+# implementation defined above.
+def _resolve_lmcache_mp_connector() -> type[KVConnectorBase_V1]:
+    if os.environ.get("LMCACHE_USE_UPSTREAM_MP"):
+        logger.info(
+            "Force use builtin LMCacheMPConnectorUpstream in vLLM.",
+        )
+        return LMCacheMPConnectorUpstream
+
+    try:
+        from lmcache.integration.vllm.lmcache_mp_connector import (
+            LMCacheMPConnector as _ExternalLMCacheMPConnector,
+        )
+
+        logger.info(
+            "Using external LMCacheMPConnector from "
+            "lmcache.integration.vllm.lmcache_mp_connector"
+        )
+        return _ExternalLMCacheMPConnector
+    except ImportError as e:
+        logger.info(
+            "External LMCacheMPConnector is not available (%s), "
+            "falling back to builtin implementation in vLLM.",
+            e,
+        )
+        return LMCacheMPConnectorUpstream
+
+
+LMCacheMPConnector = _resolve_lmcache_mp_connector()
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
index b49a016641ef..c693902e6132 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py
@@ -21,18 +21,24 @@
 from vllm.config import VllmConfig
 from vllm.distributed.kv_transfer.kv_connector.utils import (
     EngineId,
-    TpKVTopology,
+    TransferTopology,
     get_current_attn_backend,
+    get_current_attn_backends,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.base import (
     KVConnectorBase_V1,
     KVConnectorMetadata,
     KVConnectorRole,
+    SupportsHMA,
 )
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
 from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_utils import (
     MooncakeBootstrapServer,
     RegisterWorkerPayload,
 )
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.stats import (
+    MooncakeKVConnectorStats,
+)
 from vllm.distributed.parallel_state import (
     get_pp_group,
     get_tensor_model_parallel_rank,
@@ -41,11 +47,15 @@
 )
 from vllm.forward_context import ForwardContext
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils.math_utils import cdiv
 from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.utils import get_kv_cache_layout
 from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import FullAttentionSpec, SlidingWindowSpec
 from vllm.v1.request import RequestStatus
+from vllm.v1.worker.utils import select_common_block_size
 
 logger = init_logger(__name__)
 
@@ -249,7 +259,7 @@ class MooncakeXferMetadata(
     remote_port: int
     remote_tp_size: int
     remote_tp_rank: int
-    req_blocks: dict[ReqId, tuple[TransferId, list[int]]]
+    req_blocks: dict[ReqId, tuple[TransferId, list[list[int]]]]
     kv_caches_base_addr: list[int]
     block_lens: list[int]
 
@@ -277,7 +287,7 @@ class MooncakeXferResponse(
 class PullReqMeta:
     d_req_id: ReqId
     transfer_id: TransferId
-    local_block_ids: list[int]
+    local_block_ids: list[list[int]]
     remote_engine_id: EngineId
     remote_bootstrap_addr: str
     # Set expire time to avoid infinitely sending requests.
@@ -290,7 +300,7 @@ class PullReqMeta:
 class SendBlockMeta:
     p_req_id: ReqId
     transfer_id: TransferId
-    local_block_ids: list[int]
+    local_block_ids: list[list[int]]
     ready: asyncio.Event
     expire_time: float = float("inf")
     need_send: int = 0
@@ -303,13 +313,13 @@ def __init__(self):
         # Use (engine_id, dp_rank) to group reqs with same dp.
         # See comments in MooncakeBootstrapServer.
         self.reqs_to_recv: dict[EngineId, dict[ReqId, PullReqMeta]] = defaultdict(dict)
-        self.reqs_to_send: dict[ReqId, tuple[TransferId, list[int]]] = {}
+        self.reqs_to_send: dict[ReqId, tuple[TransferId, list[list[int]]]] = {}
         self.reqs_not_processed: set[TransferId] = set()
 
     def add_new_req(
         self,
         request_id: ReqId,
-        local_block_ids: list[int],
+        local_block_ids: list[list[int]],
         kv_transfer_params: dict[str, Any],
         load_remote_cache: bool = True,
     ):
@@ -327,12 +337,12 @@ def add_new_req(
             self.reqs_to_send[request_id] = (transfer_id, local_block_ids)
 
 
-class MooncakeConnector(KVConnectorBase_V1):
+class MooncakeConnector(KVConnectorBase_V1, SupportsHMA):
     def __init__(
         self,
         vllm_config: VllmConfig,
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(vllm_config, role, kv_cache_config)
 
@@ -341,13 +351,18 @@ def __init__(
         self.engine_id: EngineId = vllm_config.kv_transfer_config.engine_id
 
         if role == KVConnectorRole.SCHEDULER:
+            assert kv_cache_config is not None, (
+                "kv_cache_config is required for SCHEDULER role"
+            )
             self.connector_scheduler: MooncakeConnectorScheduler | None = (
-                MooncakeConnectorScheduler(vllm_config, self.engine_id)
+                MooncakeConnectorScheduler(vllm_config, self.engine_id, kv_cache_config)
             )
             self.connector_worker: MooncakeConnectorWorker | None = None
         elif role == KVConnectorRole.WORKER:
             self.connector_scheduler = None
-            self.connector_worker = MooncakeConnectorWorker(vllm_config, self.engine_id)
+            self.connector_worker = MooncakeConnectorWorker(
+                vllm_config, self.engine_id, kv_cache_config
+            )
 
     @classmethod
     def get_required_kvcache_layout(cls, vllm_config: VllmConfig):
@@ -398,6 +413,14 @@ def request_finished(
         self,
         request: "Request",
         block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.request_finished(request, (block_ids,))
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
     ) -> tuple[bool, dict[str, Any] | None]:
         assert self.connector_scheduler is not None
         return self.connector_scheduler.request_finished(request, block_ids)
@@ -438,12 +461,37 @@ def save_kv_layer(
     def wait_for_save(self):
         pass
 
+    def get_kv_connector_stats(self) -> KVConnectorStats | None:
+        """Return worker-local transfer stats since the last call.
+
+        Note the P/D asymmetry: because Mooncake is P-push (P calls
+        batch_transfer_sync_write), P records successful transfer latency,
+        bytes, and descriptor counts, while D only records failures
+        (recv/ZMQ errors). Aggregated NIXL-style dashboards will find
+        successful-transfer metrics on the P worker, not D.
+        """
+        if self.connector_worker is None:
+            return None
+        return self.connector_worker.get_kv_connector_stats()
+
+    @classmethod
+    def build_kv_connector_stats(
+        cls, data: dict[str, Any] | None = None
+    ) -> KVConnectorStats | None:
+        return MooncakeKVConnectorStats(data=data or {})
+
 
 class MooncakeConnectorScheduler:
     """Implementation of Scheduler side methods"""
 
-    def __init__(self, vllm_config: VllmConfig, engine_id: str):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        engine_id: str,
+        kv_cache_config: "KVCacheConfig",
+    ):
         self.vllm_config = vllm_config
+        self.block_size = vllm_config.cache_config.block_size
 
         assert vllm_config.kv_transfer_config
         self.is_kv_producer: bool = (
@@ -454,15 +502,49 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         )
         logger.info("Initializing Mooncake Transfer Engine Scheduler %s", engine_id)
 
+        self._is_hma_required = (
+            not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
+            and any(
+                not isinstance(g.kv_cache_spec, FullAttentionSpec)
+                for g in kv_cache_config.kv_cache_groups
+            )
+        )
+
         # Requests that need to start recv/send.
         # New requests are added by update_state_after_alloc in
         # the scheduler. Used to make metadata passed to Worker.
-        self._reqs_need_recv: dict[ReqId, tuple[Request, list[int]]] = {}
-        self._reqs_need_send: dict[ReqId, tuple[Request, list[int]]] = {}
+        self._reqs_need_recv: dict[ReqId, tuple[Request, list[list[int]]]] = {}
+        self._reqs_need_send: dict[ReqId, tuple[Request, list[list[int]]]] = {}
         # Reqs to remove from processed set because they're not to send after
         # remote prefill or aborted.
         self._reqs_not_processed: set[TransferId] = set()
 
+        # Compute sliding window block counts per KV cache group.
+        sw_sizes_tokens: list[tuple[int, int]] = [
+            (g.kv_cache_spec.sliding_window, g.kv_cache_spec.block_size)
+            if isinstance(g.kv_cache_spec, SlidingWindowSpec)
+            else (0, self.block_size)
+            for g in kv_cache_config.kv_cache_groups
+        ]
+        # cdiv(n_tokens, block_size) gives blocks/window; add 1 to
+        # conservatively account for boundary overlap.
+        self.blocks_per_sw = [
+            cdiv(n_tokens, block_size) + 1 if n_tokens else 0
+            for n_tokens, block_size in sw_sizes_tokens
+        ]
+
+    def get_sw_clipped_blocks(
+        self,
+        block_ids: tuple[list[int], ...] | list[list[int]],
+    ) -> list[list[int]]:
+        """Clip per-group block IDs to sliding window size."""
+        if len(block_ids) == 0 or not self._is_hma_required:
+            return list(block_ids)
+        return [
+            blocks[-self.blocks_per_sw[i] :] if self.blocks_per_sw[i] > 0 else blocks
+            for i, blocks in enumerate(block_ids)
+        ]
+
     def get_num_new_matched_tokens(
         self, request: "Request", num_computed_tokens: int
     ) -> tuple[int, bool]:
@@ -527,9 +609,12 @@ def update_state_after_alloc(
                 # If remote_blocks and num_external_tokens = 0, we have
                 # a full prefix cache hit on the D worker. We need to call
                 # send_notif in _read_blocks to free the memory on the P.
-                local_block_ids = (
-                    blocks.get_unhashed_block_ids() if num_external_tokens > 0 else []
+                unhashed_block_ids = (
+                    blocks.get_unhashed_block_ids_all_groups()
+                    if num_external_tokens > 0
+                    else ()
                 )
+                local_block_ids = self.get_sw_clipped_blocks(unhashed_block_ids)
                 # Get unhashed blocks to pull from remote.
                 self._reqs_need_recv[request.request_id] = (request, local_block_ids)
             else:
@@ -584,7 +669,7 @@ def build_connector_meta(
     def request_finished(
         self,
         request: "Request",
-        block_ids: list[int],
+        block_ids: tuple[list[int], ...],
     ) -> tuple[bool, dict[str, Any] | None]:
         """
         Once a request is finished, determine whether request blocks
@@ -627,10 +712,13 @@ def request_finished(
 
         # TODO: check whether block_ids actually ever be 0. If not we could
         # remove the conditional below
-        delay_free_blocks = len(block_ids) > 0
+        delay_free_blocks = any(len(group) > 0 for group in block_ids)
 
         if delay_free_blocks:
-            self._reqs_need_send[request.request_id] = (request, block_ids)
+            self._reqs_need_send[request.request_id] = (
+                request,
+                self.get_sw_clipped_blocks(block_ids),
+            )
 
         return delay_free_blocks, None
 
@@ -638,13 +726,22 @@ def request_finished(
 class MooncakeConnectorWorker:
     """Implementation of Worker side methods"""
 
-    def __init__(self, vllm_config: VllmConfig, engine_id: str):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        engine_id: str,
+        kv_cache_config: "KVCacheConfig | None" = None,
+    ):
         if TransferEngine is None:
             logger.error("Mooncake is not available")
             raise RuntimeError("Mooncake is not available")
         logger.info("Initializing Mooncake Transfer Engine worker %s", engine_id)
 
         self.vllm_config = vllm_config
+        # Capture device BEFORE TransferEngine init — MNNVL's NVLink allocator
+        # may change the current CUDA device during engine.initialize().
+        self.device_id = torch.accelerator.current_device_index()
+        current_platform.set_device(self.device_id)
 
         self.engine = TransferEngine()
         self.hostname = get_ip()
@@ -705,9 +802,12 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         # For kv_both, we will act both prefiller and decoder.
         if not self.is_kv_consumer:
             # Background threads for sending kvcaches to D.
+            # Each pool thread must be bound to the correct CUDA device
+            # because CUDA device selection is thread-local.
             self._sender_executor = ThreadPoolExecutor(
                 max_workers=self.num_sender_workers,
                 thread_name_prefix="vllm-mooncake-sender",
+                initializer=self._bind_sender_thread_device,
             )
             logger.debug(
                 "Mooncake Prefiller: use %d workers to send kvcaches",
@@ -739,10 +839,14 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         self.finished_sending_reqs: set[ReqId] = set()
         self.finished_recving_reqs: set[ReqId] = set()
 
+        self.xfer_stats = MooncakeKVConnectorStats()
+
         self.block_size = vllm_config.cache_config.block_size
         self.model_config = vllm_config.model_config
         self.cache_config = vllm_config.cache_config
+        self.kv_cache_config = kv_cache_config
         self.use_mla = self.model_config.use_mla
+        self._sync_block_size_with_kernel()
 
         # Get the attention backend from the first layer
         # NOTE (NickLucche) models with multiple backends are not supported yet
@@ -753,13 +857,13 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         logger.debug("Detected kv cache layout %s", self.kv_cache_layout)
 
         self._tp_size: dict[EngineId, int] = {self.engine_id: self.tp_size}
-        self._block_size: dict[EngineId, int] = {self.engine_id: self.block_size}
-        self.kv_topo = TpKVTopology(
+        self.transfer_topo = TransferTopology(
             tp_rank=self.tp_rank,
+            tp_size=self.tp_size,
+            block_size=self.block_size,
             engine_id=self.engine_id,
-            remote_tp_size=self._tp_size,  # shared state
-            remote_block_size=self._block_size,  # shared state
             is_mla=self.use_mla,
+            is_mamba=False,
             total_num_kv_heads=self.model_config.get_total_num_kv_heads(),
             attn_backends=[backend],
         )
@@ -769,6 +873,23 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         self._xfer_meta_decoder = msgspec.msgpack.Decoder(MooncakeXferMetadata)
         self._xfer_resp_decoder = msgspec.msgpack.Decoder(MooncakeXferResponse)
 
+    def _sync_block_size_with_kernel(self) -> None:
+        # When speculative decoding (e.g. Eagle) is enabled, the main model
+        # and draft model may use different attention backends with different
+        # physical block sizes. Pick the common (smallest) block size so that
+        # KV-cache registration and transfer work correctly for both models.
+        backends = get_current_attn_backends(self.vllm_config)
+        kernel_block_size = select_common_block_size(self.block_size, backends)
+        if self.block_size != kernel_block_size:
+            logger.info_once(
+                "User-specified logical block size (%s) does not match"
+                " physical kernel block size (%s). Using the latter.",
+                self.block_size,
+                kernel_block_size,
+            )
+            assert self.block_size > kernel_block_size
+            self.block_size = kernel_block_size
+
     def __del__(self):
         self.shutdown()
 
@@ -883,7 +1004,7 @@ async def send_kv_to_decode(
         self, identity: bytes, sock: zmq.asyncio.Socket, meta: MooncakeXferMetadata
     ):
         pending_reqs: dict[ReqId, SendBlockMeta] = {}
-        remote_tp_ranks = self.kv_topo.get_target_remote_ranks(meta.remote_tp_size)
+        remote_tp_ranks = self.transfer_topo.handshake_target_ranks(meta.remote_tp_size)
         if meta.remote_tp_rank not in remote_tp_ranks:
             # This D worker does not pair with the P worker.
             msg = (
@@ -1075,27 +1196,61 @@ async def _build_transfer_params(
         remote_session = f"{agent_meta.remote_hostname}:{agent_meta.remote_port}"
 
         for d_req_id, send_meta in ready_reqs:
-            _, remote_block_ids = agent_meta.req_blocks[d_req_id]
-            num_remote_blocks = len(remote_block_ids)
-            if num_remote_blocks == 0:
+            _, remote_block_ids_per_group = agent_meta.req_blocks[d_req_id]
+
+            if not remote_block_ids_per_group or all(
+                len(g) == 0 for g in remote_block_ids_per_group
+            ):
                 continue
 
-            local_block_ids = send_meta.local_block_ids
-            # Partial prefix cache hit: just read uncomputed blocks.
-            num_local_blocks = len(local_block_ids)
-            if num_local_blocks < num_remote_blocks:
+            # Per-group partial hit trimming, then flatten.
+            # With HMA, groups share the same KV tensor but use different
+            # block ranges.  We trim and concatenate so the coalescer and
+            # address math see one flat block list — same as non-HMA, but
+            # now including blocks from every group.
+            local_block_ids: list[int] = []
+            remote_block_ids: list[int] = []
+            has_block_error = False
+            if len(send_meta.local_block_ids) != len(remote_block_ids_per_group):
                 logger.error(
-                    "req %s: local blocks(%d) less than remote blocks(%d)!",
+                    "req %s: KV group count mismatch: local=%d, remote=%d",
                     d_req_id,
-                    num_local_blocks,
-                    num_remote_blocks,
+                    len(send_meta.local_block_ids),
+                    len(remote_block_ids_per_group),
                 )
+                err_reqs.append(d_req_id)
+                if err_msg is None:
+                    err_msg = "KV group count mismatch"
+                continue
+            for local_group, remote_group in zip(
+                send_meta.local_block_ids, remote_block_ids_per_group
+            ):
+                n_local = len(local_group)
+                n_remote = len(remote_group)
+                if n_local < n_remote:
+                    logger.error(
+                        "req %s: local blocks(%d) < remote blocks(%d) "
+                        "in a KV cache group",
+                        d_req_id,
+                        n_local,
+                        n_remote,
+                    )
+                    has_block_error = True
+                    break
+                if n_local > n_remote:
+                    # Partial prefix cache hit: just read uncomputed blocks.
+                    local_group = local_group[-n_remote:]
+                local_block_ids.extend(local_group)
+                remote_block_ids.extend(remote_group)
+
+            if has_block_error:
                 err_reqs.append(d_req_id)
                 if err_msg is None:
                     err_msg = "P num blocks less than D"
                 continue
-            if num_local_blocks > num_remote_blocks:
-                local_block_ids = local_block_ids[-num_remote_blocks:]
+
+            if not local_block_ids:
+                continue
 
             # Group by indices
             group_local_block_ids, group_remote_block_ids = group_concurrent_contiguous(
@@ -1187,12 +1342,18 @@ async def _build_transfer_params(
             logger.debug(
                 "Sending kv_caches for request %s (%d blocks) to %s",
                 d_req_id,
-                num_remote_blocks,
+                len(local_block_ids),
                 remote_session,
             )
 
         return src_ptrs, dst_ptrs, lengths, err_reqs, err_msg
 
+    def _bind_sender_thread_device(self) -> None:
+        """ThreadPoolExecutor initializer — binds each pool thread to the
+        correct CUDA device.  CUDA device selection is thread-local, so
+        without this, NVLink transfers fail for TP ranks > 0."""
+        current_platform.set_device(self.device_id)
+
     def _send_blocks(
         self,
         remote_session: str,
@@ -1204,11 +1365,23 @@ def _send_blocks(
         ret_value = self.engine.batch_transfer_sync_write(
             remote_session, src_ptrs, dst_ptrs, lengths
         )
+        duration = time.perf_counter() - start_time
         if ret_value == 0:
-            logger.debug(
-                "Sending to %s done, took %s",
+            self.xfer_stats.record_transfer(
+                duration_s=duration,
+                total_bytes=sum(lengths),
+                num_descs=len(src_ptrs),
+            )
+            logger.debug("Sending to %s done, took %s", remote_session, duration)
+        else:
+            self.xfer_stats.record_failed_transfer()
+            logger.warning(
+                "Sending to %s failed (ret=%s) after %s (%d descriptors, %d bytes)",
                 remote_session,
-                time.perf_counter() - start_time,
+                ret_value,
+                duration,
+                len(src_ptrs),
+                sum(lengths),
             )
         return ret_value
 
@@ -1222,7 +1395,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         seen_base_addresses = []
         self.block_len_per_layer = []
 
-        split_k_and_v = self.kv_topo.split_k_and_v
+        split_k_and_v = self.transfer_topo.split_k_and_v
         tensor_size_bytes = None
         for layer_name, cache_or_caches in kv_caches.items():
             cache_list = cache_or_caches if split_k_and_v else [cache_or_caches]
@@ -1239,26 +1412,24 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
                     continue
 
                 seen_base_addresses.append(base_addr)
-                curr_tensor_size_bytes = cache.nbytes
 
                 if tensor_size_bytes is None:
-                    tensor_size_bytes = curr_tensor_size_bytes
+                    tensor_size_bytes = cache.nbytes
                     self.num_blocks = cache.shape[0]
                 assert cache.shape[0] == self.num_blocks, (
                     "All kv cache tensors must have the same number of blocks"
                 )
-                assert curr_tensor_size_bytes % self.num_blocks == 0, (
-                    "Mooncake expects each kv cache tensor size to be "
-                    "divisible by the number of blocks."
-                )
-                self.block_len_per_layer.append(
-                    curr_tensor_size_bytes // self.num_blocks
-                )
 
-                kernel_block_size = cache.shape[-2 if self.use_mla else -3]
-                assert self.block_size == kernel_block_size
+                # Use stride-based block length so RDMA reaches the last
+                # block's padding (e.g. DeepseekV4 MLA alignment). stride(0)
+                # reflects the actual byte distance between consecutive
+                # blocks in GPU memory, which matches or exceeds the
+                # shape-based size.
+                block_len = cache.stride(0) * cache.element_size()
+
+                self.block_len_per_layer.append(block_len)
                 kv_data_ptrs.append(base_addr)
-                kv_data_lens.append(curr_tensor_size_bytes)
+                kv_data_lens.append(self.num_blocks * block_len)
 
         self.kv_caches_base_addr = seen_base_addresses
         self.seen_base_addresses = seen_base_addresses
@@ -1311,6 +1482,7 @@ async def fetch_finished_sending_reqs(self) -> set[ReqId]:
                     send_meta.p_req_id,
                     envs.VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT,
                 )
+                self.xfer_stats.record_kv_expired_req()
                 finished_sending_reqs.add(send_meta.p_req_id)
                 expired_transfer_id.append(transfer_id)
 
@@ -1351,6 +1523,13 @@ def get_finished(self) -> tuple[set[str] | None, set[str] | None]:
 
         return finished_sending_reqs or None, finished_recving_reqs or None
 
+    def get_kv_connector_stats(self) -> KVConnectorStats | None:
+        """Return transfer stats collected since the last call, or None
+        if nothing has been recorded in this interval."""
+        if self.xfer_stats.is_empty():
+            return None
+        return self.xfer_stats.clone_and_reset()
+
     async def receive_kv_from_single_worker(
         self,
         worker_addr: str,
@@ -1397,6 +1576,7 @@ async def receive_kv_from_single_worker(
                             req_ids,
                             response.err_msg,
                         )
+                        self.xfer_stats.record_failed_recv()
                         return
                     self.process_pulling_result(response, pull_metas)
                     if response.status == MooncakeXferResponseStatus.FINISH:
@@ -1405,6 +1585,7 @@ async def receive_kv_from_single_worker(
             logger.debug("ZMQ context terminated, exiting Mooncake receiver thread.")
         except Exception as e:
             logger.error("MooncakeXferMetadata transfer failed for %s: %s", req_ids, e)
+            self.xfer_stats.record_failed_recv()
             return
 
     def process_pulling_result(
@@ -1464,8 +1645,8 @@ def receive_kv(
         remote_engine_id: EngineId,
         pull_metas: dict[ReqId, PullReqMeta],
     ):
-        remote_tp_ranks = self.kv_topo.get_target_remote_ranks_from_engine_id(
-            remote_engine_id
+        remote_tp_ranks = self.transfer_topo.handshake_target_ranks(
+            self._tp_size[remote_engine_id]
         )
         count = len(remote_tp_ranks)
         logger.debug(
@@ -1556,7 +1737,7 @@ def start_load_kv(self, metadata: MooncakeConnectorMetadata):
             )
 
     def _producer_cache_is_replicated(self) -> bool:
-        return self.kv_topo.replicates_kv_cache(self.engine_id)
+        return self.transfer_topo.local_replicates_kv_cache
 
     def _get_transfer_regions(
         self, base_addrs: list[int], block_lens: list[int]
@@ -1564,7 +1745,7 @@ def _get_transfer_regions(
         return _expand_transfer_regions(
             base_addrs=base_addrs,
             block_lens=block_lens,
-            is_kv_layout_blocks_first=self.kv_topo.is_kv_layout_blocks_first,
+            is_kv_layout_blocks_first=self.transfer_topo.is_kv_layout_blocks_first,
         )
 
     def _get_sender_transfer_plan(
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_utils.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_utils.py
index 2d158387f060..a3c9200e8261 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_utils.py
@@ -8,6 +8,7 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 
+from vllm.config import ParallelConfig
 from vllm.distributed.kv_transfer.kv_connector.utils import EngineId
 from vllm.logger import init_logger
 
@@ -16,6 +17,15 @@
 logger = init_logger(__name__)
 
 
+def get_mooncake_dp_engine_index(parallel_config: ParallelConfig) -> int:
+    """Return the per-engine DP index used for Mooncake side channels."""
+    if parallel_config.local_engines_only:
+        assert parallel_config.data_parallel_rank_local is not None
+        return parallel_config.data_parallel_rank_local
+
+    return parallel_config.data_parallel_index
+
+
 class RegisterWorkerPayload(BaseModel):
     engine_id: EngineId
     dp_rank: int
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/rdma_utils.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/rdma_utils.py
new file mode 100644
index 000000000000..9ee0d2cc5423
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/rdma_utils.py
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Mooncake requester config helpers."""
+
+from collections.abc import Mapping
+from typing import Any
+
+import torch
+
+import vllm.envs as envs
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def normalize_string_override(value: Any) -> str | None:
+    if not isinstance(value, str):
+        return None
+    normalized = value.strip()
+    return normalized or None
+
+
+def get_current_physical_gpu_index() -> int | None:
+    try:
+        from vllm.platforms import current_platform
+    except ImportError:
+        return None
+
+    try:
+        device_index = torch.accelerator.current_device_index()
+        physical_device_id = current_platform.device_id_to_physical_device_id(
+            device_index
+        )
+        return int(physical_device_id)
+    except Exception:
+        return None
+
+
+def get_requester_local_hostname(local_ip: str) -> str:
+    override = normalize_string_override(envs.MOONCAKE_REQUESTER_LOCAL_HOSTNAME)
+    if override is not None:
+        return override
+    return local_ip
+
+
+def get_configured_preferred_segment(
+    extra_config: Mapping[str, Any],
+) -> str | None:
+    preferred_segment = normalize_string_override(extra_config.get("preferred_segment"))
+    if preferred_segment is not None:
+        return preferred_segment
+    if extra_config.get("preferred_segment") is not None:
+        raise ValueError(
+            "Mooncake preferred_segment override must be a non-empty string"
+        )
+
+    env_value = normalize_string_override(envs.MOONCAKE_PREFERRED_SEGMENT)
+    if env_value is not None:
+        logger.info(
+            "Mooncake preferred_segment from MOONCAKE_PREFERRED_SEGMENT: %s",
+            env_value,
+        )
+        return env_value
+    return None
+
+
+def _get_explicit_worker_rnic(device_list: str) -> str:
+    entries = [entry.strip() for entry in device_list.split(",")]
+    if any(not entry for entry in entries):
+        raise ValueError(
+            "Mooncake worker device_name contains an empty RDMA device entry"
+        )
+    if len(entries) == 1:
+        return entries[0]
+
+    gpu_index = get_current_physical_gpu_index()
+    if gpu_index is None:
+        raise RuntimeError(
+            "Mooncake RDMA requester could not determine the local physical GPU index"
+        )
+    if gpu_index >= len(entries):
+        raise ValueError(
+            "Mooncake worker device list does not cover local GPU "
+            f"{gpu_index}: {device_list}"
+        )
+    device_name = entries[gpu_index]
+    logger.info(
+        "Mooncake selected worker RNIC %s from explicit device list for local GPU %s",
+        device_name,
+        gpu_index,
+    )
+    return device_name
+
+
+def get_configured_worker_rnic(
+    *,
+    protocol: str,
+    configured_device: str,
+) -> str:
+    normalized_device = normalize_string_override(configured_device)
+    if normalized_device is not None:
+        return _get_explicit_worker_rnic(normalized_device)
+
+    if protocol not in {"rdma", "efa"}:
+        return ""
+
+    logger.warning(
+        "No RDMA devices specified for Mooncake backend (protocol=%s). "
+        "Set 'device_name' in mooncake_config.json to a single RNIC name "
+        "or a comma-separated CSV indexed by physical GPU; falling back to "
+        "Mooncake's built-in auto-selection, which may converge on the same "
+        "NIC across all DP ranks and saturate bandwidth.",
+        protocol,
+    )
+    return ""
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/stats.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/stats.py
new file mode 100644
index 000000000000..d177f55cc720
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/stats.py
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Stats container for the Mooncake connector."""
+
+import threading
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorStats,
+)
+
+# TODO(mooncake-stats): add MooncakePromMetrics (mirror NixlPromMetrics)
+# and wire it via MooncakeConnector.build_prom_metrics in a follow-up PR.
+
+
+@dataclass
+class MooncakeKVConnectorStats(KVConnectorStats):
+    """Container for Mooncake KV transfer performance metrics.
+
+    `_lock` serializes record_* against clone_and_reset so each row's
+    appends are atomic and column lengths stay aligned. Writers run on
+    the sender pool / receiver loop / sender loop; reader runs on the
+    main worker thread.
+    """
+
+    def __post_init__(self):
+        self._lock = threading.Lock()
+        if not self.data:
+            self.reset()
+
+    # threading.Lock is not picklable; strip it from the wire form and
+    # rebuild a fresh per-process lock on the receiver side.
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__.copy()
+        state.pop("_lock", None)
+        return state
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._lock = threading.Lock()
+
+    def reset(self):
+        self.data: dict[str, list[float | int]] = {
+            "transfer_duration": [],
+            "bytes_transferred": [],
+            "num_descriptors": [],
+            "num_failed_transfers": [],
+            "num_failed_recvs": [],
+            "num_kv_expired_reqs": [],
+        }
+
+    def record_transfer(self, duration_s: float, total_bytes: int, num_descs: int):
+        with self._lock:
+            self.data["transfer_duration"].append(duration_s)
+            self.data["bytes_transferred"].append(total_bytes)
+            self.data["num_descriptors"].append(num_descs)
+
+    # Failure counters store a list of 1s so a future Prom counter can iterate
+    # with .inc(list_item), mirroring NIXL's NixlPromMetrics.observe.
+    def record_failed_transfer(self):
+        with self._lock:
+            self.data["num_failed_transfers"].append(1)
+
+    def record_failed_recv(self):
+        with self._lock:
+            self.data["num_failed_recvs"].append(1)
+
+    def record_kv_expired_req(self):
+        with self._lock:
+            self.data["num_kv_expired_reqs"].append(1)
+
+    def clone_and_reset(self) -> "MooncakeKVConnectorStats":
+        # Copy lists under the lock for length alignment; return a fresh
+        # instance so the snapshot has its own _lock.
+        with self._lock:
+            snapshot_data: dict[str, list[float | int]] = {
+                k: list(v) for k, v in self.data.items()
+            }
+            self.reset()
+        return MooncakeKVConnectorStats(data=snapshot_data)
+
+    def is_empty(self) -> bool:
+        return (
+            self.num_successful_transfers == 0
+            and len(self.data["num_failed_transfers"]) == 0
+            and len(self.data["num_failed_recvs"]) == 0
+            and len(self.data["num_kv_expired_reqs"]) == 0
+        )
+
+    def aggregate(self, other: KVConnectorStats) -> KVConnectorStats:
+        if not other.is_empty():
+            for k, v in other.data.items():
+                accumulator = self.data[k]
+                assert isinstance(accumulator, list)
+                accumulator.extend(v)
+        return self
+
+    def reduce(self) -> dict[str, int | float]:
+        num_failed_transfers = len(self.data["num_failed_transfers"])
+        num_failed_recvs = len(self.data["num_failed_recvs"])
+        num_kv_expired_reqs = len(self.data["num_kv_expired_reqs"])
+
+        if self.num_successful_transfers == 0:
+            return {
+                "Num successful transfers": 0,
+                "Avg xfer time (ms)": 0,
+                "P90 xfer time (ms)": 0,
+                "Avg MB per transfer": 0,
+                "Throughput (MB/s)": 0,
+                "Avg number of descriptors": 0,
+                "Num failed transfers": num_failed_transfers,
+                "Num failed recvs": num_failed_recvs,
+                "Num KV expired reqs": num_kv_expired_reqs,
+            }
+
+        xfer_time = np.asarray(self.data["transfer_duration"])
+        mb = np.asarray(self.data["bytes_transferred"]) / 2**20
+        descs = np.asarray(self.data["num_descriptors"], dtype=np.uint32)
+        n = len(descs)
+        assert n == self.num_successful_transfers
+
+        total_mb = mb.sum()
+        avg_mb = total_mb / n
+        total_time_seconds = xfer_time.sum()
+        throughput_mb_s = (
+            total_mb / total_time_seconds if total_time_seconds > 0 else 0.0
+        )
+
+        return {
+            "Num successful transfers": n,
+            "Avg xfer time (ms)": round(xfer_time.mean() * 1e3, 3),
+            "P90 xfer time (ms)": round(np.percentile(xfer_time, 90).item() * 1e3, 3),
+            "Avg MB per transfer": round(avg_mb, 3),
+            "Throughput (MB/s)": round(throughput_mb_s, 3),
+            "Avg number of descriptors": round(descs.mean(), 1),
+            "Num failed transfers": num_failed_transfers,
+            "Num failed recvs": num_failed_recvs,
+            "Num KV expired reqs": num_kv_expired_reqs,
+        }
+
+    @property
+    def num_successful_transfers(self) -> int:
+        return len(self.data["transfer_duration"])
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/connector.py
new file mode 100644
index 000000000000..bf0be63d5e4a
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/connector.py
@@ -0,0 +1,310 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Adapted from vllm-project/vllm-ascend
+# (vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/).
+"""MooncakeStoreConnector - KV cache connector using MooncakeDistributedStore.
+
+Unlike MooncakeConnector which does direct P2P transfer, this connector
+uses MooncakeDistributedStore as a shared KV cache pool. Both producer
+and consumer instances read/write KV to/from the store independently,
+enabling prefix caching via hash-based deduplication.
+"""
+
+from collections.abc import Iterable
+from typing import Any
+
+import torch
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_events import (
+    KVCacheEvent,
+    KVConnectorKVEvents,
+    KVEventAggregator,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorBase_V1,
+    KVConnectorMetadata,
+    KVConnectorRole,
+    SupportsHMA,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorPromMetrics,
+    KVConnectorStats,
+    PromMetric,
+    PromMetricT,
+)
+from vllm.forward_context import ForwardContext
+from vllm.logger import init_logger
+from vllm.v1.attention.backend import AttentionMetadata
+from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.outputs import KVConnectorOutput
+from vllm.v1.request import Request
+
+from .data import MooncakeStoreConnectorMetadata
+from .metrics import MooncakeStoreConnectorStats, MooncakeStorePromMetrics
+from .scheduler import MooncakeStoreScheduler
+from .worker import MooncakeStoreWorker
+
+logger = init_logger(__name__)
+
+
+class MooncakeStoreKVEvents(KVConnectorKVEvents):
+    """KV event aggregation for MooncakeStoreConnector."""
+
+    def __init__(self, num_workers: int) -> None:
+        self._aggregator = KVEventAggregator(num_workers)
+
+    def add_events(self, events: list[KVCacheEvent]) -> None:
+        self._aggregator.add_events(events)
+
+    def aggregate(self) -> "MooncakeStoreKVEvents":
+        common_events = self._aggregator.get_common_events()
+        self._aggregator.clear_events()
+        self._aggregator.add_events(common_events)
+        self._aggregator.reset_workers()
+        return self
+
+    def increment_workers(self, count: int = 1) -> None:
+        self._aggregator.increment_workers(count)
+
+    def get_all_events(self) -> list[KVCacheEvent]:
+        return self._aggregator.get_all_events()
+
+    def get_number_of_workers(self) -> int:
+        return self._aggregator.get_number_of_workers()
+
+    def clear_events(self) -> None:
+        self._aggregator.clear_events()
+        self._aggregator.reset_workers()
+
+    def __repr__(self) -> str:
+        return f"<MooncakeStoreKVEvents events={self.get_all_events()}>"
+
+
+class MooncakeStoreConnector(KVConnectorBase_V1, SupportsHMA):
+    """KV connector using MooncakeDistributedStore as shared KV pool."""
+
+    @property
+    def prefer_cross_layer_blocks(self) -> bool:
+        extra_config = self._kv_transfer_config.kv_connector_extra_config
+        return (
+            str(extra_config.get("enable_cross_layers_blocks", "False")).lower()
+            == "true"
+        )
+
+    @staticmethod
+    def _validate_kv_cache_config(
+        vllm_config: VllmConfig, kv_cache_config: KVCacheConfig
+    ) -> None:
+        from vllm.v1.kv_cache_interface import CrossAttentionSpec, MambaSpec
+
+        unsupported: list[str] = []
+        cache_block_size = vllm_config.cache_config.block_size
+        for g_idx, g in enumerate(kv_cache_config.kv_cache_groups):
+            spec = g.kv_cache_spec
+            if isinstance(spec, CrossAttentionSpec):
+                unsupported.append(f"group {g_idx}: CrossAttentionSpec")
+            # Enforce Mamba align mode
+            if isinstance(spec, MambaSpec) and spec.block_size != cache_block_size:
+                unsupported.append(
+                    f"group {g_idx}: MambaSpec with block_size="
+                    f"{spec.block_size} != cache_config.block_size="
+                    f"{cache_block_size} (mamba_cache_mode != 'align')"
+                )
+        pcp = vllm_config.parallel_config.prefill_context_parallel_size
+        dcp = vllm_config.parallel_config.decode_context_parallel_size
+        if len(kv_cache_config.kv_cache_groups) > 1 and pcp * dcp > 1:
+            unsupported.append(
+                f"PCP/DCP > 1 (pcp={pcp}, dcp={dcp}) with hybrid attention"
+            )
+        if unsupported:
+            raise ValueError(
+                "MooncakeStoreConnector does not support: " + "; ".join(unsupported)
+            )
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        role: KVConnectorRole,
+        kv_cache_config: KVCacheConfig | None = None,
+    ):
+        super().__init__(
+            vllm_config=vllm_config,
+            role=role,
+            kv_cache_config=kv_cache_config,  # type: ignore[arg-type]
+        )
+        assert vllm_config.kv_transfer_config is not None
+        assert kv_cache_config is not None, "kv_cache_config is required"
+        self._validate_kv_cache_config(vllm_config, kv_cache_config)
+        self._kv_cache_config = kv_cache_config
+        self.kv_role = vllm_config.kv_transfer_config.kv_role
+        self._kv_cache_events: MooncakeStoreKVEvents | None = None
+
+        self.connector_scheduler: MooncakeStoreScheduler | None = None
+        self.connector_worker: MooncakeStoreWorker | None = None
+
+        if role == KVConnectorRole.SCHEDULER:
+            self.connector_scheduler = MooncakeStoreScheduler(
+                vllm_config, kv_cache_config
+            )
+        else:
+            self.connector_worker = MooncakeStoreWorker(vllm_config, kv_cache_config)
+
+    # ============================================================
+    # Scheduler-side methods
+    # ============================================================
+
+    def get_num_new_matched_tokens(
+        self,
+        request: Request,
+        num_computed_tokens: int,
+    ) -> tuple[int, bool]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.get_num_new_matched_tokens(
+            request, num_computed_tokens
+        )
+
+    def update_state_after_alloc(
+        self,
+        request: Request,
+        blocks: KVCacheBlocks,
+        num_external_tokens: int,
+    ):
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.update_state_after_alloc(
+            request, blocks, num_external_tokens
+        )
+
+    def build_connector_meta(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> KVConnectorMetadata:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.build_connector_meta(scheduler_output)
+
+    def request_finished(
+        self,
+        request: Request,
+        block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        return self.request_finished_all_groups(request, (block_ids,))
+
+    def request_finished_all_groups(
+        self,
+        request: Request,
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.request_finished(request, block_ids)
+
+    def update_connector_output(self, connector_output: KVConnectorOutput):
+        kv_cache_events = connector_output.kv_cache_events
+        if not kv_cache_events or not isinstance(
+            kv_cache_events, MooncakeStoreKVEvents
+        ):
+            return
+
+        if self._kv_cache_events is None:
+            self._kv_cache_events = kv_cache_events
+        else:
+            self._kv_cache_events.add_events(kv_cache_events.get_all_events())
+            self._kv_cache_events.increment_workers(
+                kv_cache_events.get_number_of_workers()
+            )
+
+    def take_events(self) -> Iterable[KVCacheEvent]:
+        if self._kv_cache_events is not None:
+            self._kv_cache_events.aggregate()
+            yield from self._kv_cache_events.get_all_events()
+            self._kv_cache_events.clear_events()
+            self._kv_cache_events = None
+
+    # ============================================================
+    # Worker-side methods
+    # ============================================================
+
+    def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
+        assert self.connector_worker is not None
+        self.connector_worker.register_kv_caches(kv_caches)
+
+    def register_cross_layers_kv_cache(
+        self, kv_cache: torch.Tensor, attn_backend: type
+    ):
+        assert self.connector_worker is not None
+        assert (
+            self._kv_cache_config is not None
+            and len(self._kv_cache_config.kv_cache_groups) == 1
+        ), "Cross-layer KV cache does not supported with hybrid models"
+        self.connector_worker.register_cross_layers_kv_caches(kv_cache)
+
+    def start_load_kv(self, forward_context: ForwardContext, **kwargs: Any) -> None:
+        # No-op: loads are issued in get_finished() for compute overlap.
+        pass
+
+    def wait_for_layer_load(self, layer_name: str) -> None:
+        # No layerwise support - no-op
+        return
+
+    def save_kv_layer(
+        self,
+        layer_name: str,
+        kv_layer: torch.Tensor,
+        attn_metadata: AttentionMetadata,
+        **kwargs: Any,
+    ) -> None:
+        # No layerwise support - no-op
+        return
+
+    def wait_for_save(self):
+        # No-op: stores are issued in get_finished() for compute overlap.
+        pass
+
+    def get_finished(
+        self, finished_req_ids: set[str]
+    ) -> tuple[set[str] | None, set[str] | None]:
+        assert self.connector_worker is not None
+        metadata = self._get_connector_metadata()
+        assert isinstance(metadata, MooncakeStoreConnectorMetadata)
+        return self.connector_worker.get_finished(finished_req_ids, metadata)
+
+    def get_kv_connector_kv_cache_events(
+        self,
+    ) -> MooncakeStoreKVEvents | None:
+        assert self.connector_worker is not None
+        events = self.connector_worker.get_kv_events()
+        if not events:
+            return None
+
+        kv_events = MooncakeStoreKVEvents(num_workers=1)
+        kv_events.add_events(events)
+        return kv_events
+
+    def get_kv_connector_stats(self) -> KVConnectorStats | None:
+        if self.connector_worker is None:
+            return None
+        return self.connector_worker.get_kv_connector_stats()
+
+    @classmethod
+    def build_kv_connector_stats(
+        cls, data: dict[str, Any] | None = None
+    ) -> KVConnectorStats | None:
+        return (
+            MooncakeStoreConnectorStats(data=data)
+            if data is not None
+            else MooncakeStoreConnectorStats()
+        )
+
+    @classmethod
+    def build_prom_metrics(
+        cls,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ) -> KVConnectorPromMetrics:
+        return MooncakeStorePromMetrics(
+            vllm_config, metric_types, labelnames, per_engine_labelvalues
+        )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/coordinator.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/coordinator.py
new file mode 100644
index 000000000000..26f3aa263206
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/coordinator.py
@@ -0,0 +1,290 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""External-store cache-hit coordinator for MooncakeStoreConnector."""
+
+from typing import cast
+
+from vllm.v1.core.block_pool import BlockPool
+from vllm.v1.core.kv_cache_utils import (
+    BlockHash,
+    BlockHashList,
+    BlockHashListWithBlockSize,
+    KVCacheBlock,
+)
+from vllm.v1.core.single_type_kv_cache_manager import (
+    SingleTypeKVCacheManager,
+    spec_manager_map,
+)
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheGroupSpec,
+    KVCacheSpec,
+    UniformTypeKVCacheSpecs,
+)
+
+# Dummy placeholder hash for store_mask's template computation.
+_DUMMY_BLOCK_HASH = BlockHash(b"\x00" * 32)
+
+
+class ExternalCachedBlockPool:
+    """Duck-typed BlockPool backed by a ``(group_id, hash)`` exists set."""
+
+    def __init__(self, exists: set[tuple[int, bytes]] | None = None) -> None:
+        # ``exists=None`` is used on the recv side where hit_length is already
+        # determined and we just want each spec's manager to apply its own mask.
+        self._exists = exists
+        self.null_block = KVCacheBlock(block_id=0)
+        # Dummy ID 1 for present block for duck-typing.
+        self._present_block = KVCacheBlock(block_id=1)
+
+    def get_cached_block(
+        self,
+        block_hash: BlockHash,
+        group_ids: list[int],
+    ) -> list[KVCacheBlock] | None:
+        # Mirrors BlockPool.get_cached_block: hit only when every group_id
+        # (groups sharing a spec) has the hash cached.
+        if self._exists is None:
+            return [self._present_block] * len(group_ids)
+        h = bytes(block_hash)
+        if all((g, h) in self._exists for g in group_ids):
+            return [self._present_block] * len(group_ids)
+        return None
+
+
+class MooncakeStoreCoordinator:
+    """Mirror of ``HybridKVCacheCoordinator.find_longest_cache_hit`` over an
+    ``ExternalCachedBlockPool``."""
+
+    def __init__(
+        self,
+        kv_cache_groups: list[KVCacheGroupSpec],
+        scheduler_block_size: int,
+        hash_block_size: int,
+        use_eagle: bool = False,
+    ) -> None:
+        assert all(
+            g.kv_cache_spec.block_size % hash_block_size == 0 for g in kv_cache_groups
+        ), "block_size must be divisible by hash_block_size"
+        assert scheduler_block_size % hash_block_size == 0, (
+            f"scheduler_block_size ({scheduler_block_size}) must be a multiple of "
+            f"hash_block_size ({hash_block_size})"
+        )
+        assert all(
+            scheduler_block_size % g.kv_cache_spec.block_size == 0
+            for g in kv_cache_groups
+        ), "scheduler_block_size must be a multiple of each group's block_size"
+        self.kv_cache_groups = kv_cache_groups
+        self.hash_block_size = hash_block_size
+        self.lcm_block_size = scheduler_block_size
+        self.use_eagle = use_eagle
+        self._verify_and_split_kv_cache_groups()
+
+    def _verify_and_split_kv_cache_groups(self) -> None:
+        """Mirrors KVCacheCoordinator.verify_and_split_kv_cache_groups but
+        dispatches via spec_manager_map (we don't allocate managers).
+        """
+        attention_groups: list[
+            tuple[KVCacheSpec, list[int], type[SingleTypeKVCacheManager]]
+        ] = []
+        for i, g in enumerate(self.kv_cache_groups):
+            spec = _unwrap_spec(g.kv_cache_spec)
+            manager_cls = spec_manager_map[type(spec)]
+            for existing_spec, group_ids, existing_cls in attention_groups:
+                if existing_spec == spec:
+                    assert manager_cls is existing_cls
+                    group_ids.append(i)
+                    break
+            else:
+                attention_groups.append((spec, [i], manager_cls))
+        # Full attention first (matches upstream convergence ordering).
+        self.attention_groups = sorted(
+            attention_groups,
+            key=lambda x: not isinstance(x[0], FullAttentionSpec),
+        )
+        self.eagle_attn_group_indices: set[int] = {
+            i
+            for i, (_, group_ids, _) in enumerate(self.attention_groups)
+            if any(self.kv_cache_groups[gid].is_eagle_group for gid in group_ids)
+        }
+        if self.use_eagle and not self.eagle_attn_group_indices:
+            self.eagle_attn_group_indices = set(range(len(self.attention_groups)))
+
+    def find_longest_cache_hit(
+        self,
+        block_hashes: list[BlockHash],
+        max_length: int,
+        cached_block_pool: ExternalCachedBlockPool,
+    ) -> tuple[tuple[list[bool], ...], int]:
+        """Returns ``(load_mask_per_group, hit_length)``. ``mask[g][i]`` is True iff
+        group ``g`` populates chunk ``i`` locally (e.g. SWA and Mamba tail-only);
+        recv-side callers skip False slots."""
+        blocks_per_group, hit_length = self._find_hit_blocks(
+            block_hashes, max_length, cached_block_pool
+        )
+        masks = tuple(
+            [blk is not cached_block_pool.null_block for blk in blocks]
+            for blocks in blocks_per_group
+        )
+        return masks, hit_length
+
+    def load_mask(
+        self,
+        block_hashes: list[BlockHash],
+        token_len: int,
+    ) -> tuple[list[bool], ...]:
+        """Per-group load masks: ``mask[g][i]`` is True iff group ``g``'s
+        spec would populate chunk ``i`` locally at length ``token_len``
+        (e.g. SWA / Mamba tail-only).
+        """
+        masks, _ = self.find_longest_cache_hit(
+            block_hashes, token_len, ExternalCachedBlockPool()
+        )
+        return masks
+
+    def store_mask(self, aligned_token_len: int) -> tuple[list[bool], ...]:
+        """Per-group store masks: ``mask[g][i]`` is True iff chunk ``i`` of
+        group ``g`` would be populated by some future cache hit at length
+        ``L = N * lcm_block_size <= aligned_token_len``.
+        """
+        assert aligned_token_len % self.lcm_block_size == 0, (
+            f"aligned_token_len ({aligned_token_len}) must be a multiple of "
+            f"lcm_block_size ({self.lcm_block_size})"
+        )
+        if aligned_token_len == 0:
+            return tuple([] for _ in self.kv_cache_groups)
+
+        num_chunks_per_group = [
+            aligned_token_len // g.kv_cache_spec.block_size
+            for g in self.kv_cache_groups
+        ]
+
+        # Fast path: single group or full attn groups or uniform block_sizes
+        if all(
+            isinstance(spec, FullAttentionSpec)
+            or spec.block_size == self.lcm_block_size
+            for spec, _, _ in self.attention_groups
+        ):
+            return tuple([True] * n for n in num_chunks_per_group)
+
+        n_segments = aligned_token_len // self.lcm_block_size
+        dummy_hashes: list[BlockHash] = [_DUMMY_BLOCK_HASH] * (
+            self.lcm_block_size // self.hash_block_size
+        )
+        template_masks, _ = self.find_longest_cache_hit(
+            dummy_hashes,
+            max_length=self.lcm_block_size,
+            cached_block_pool=ExternalCachedBlockPool(),
+        )
+        return tuple(
+            list(template_masks[g]) * n_segments
+            for g in range(len(self.kv_cache_groups))
+        )
+
+    def block_hashes_for_spec(
+        self, block_hashes: list[BlockHash], spec: KVCacheSpec
+    ) -> BlockHashList:
+        if spec.block_size == self.hash_block_size:
+            return block_hashes
+        return BlockHashListWithBlockSize(
+            block_hashes, self.hash_block_size, spec.block_size
+        )
+
+    def _find_hit_blocks(
+        self,
+        block_hashes: list[BlockHash],
+        max_length: int,
+        cached_block_pool: ExternalCachedBlockPool,
+    ) -> tuple[tuple[list[KVCacheBlock], ...], int]:
+        """Mirrors HybridKVCacheCoordinator.find_longest_cache_hit but
+        dispatches via spec_manager_map (we don't allocate managers).
+        """
+        if len(self.attention_groups) == 1:
+            spec, group_ids, manager_cls = self.attention_groups[0]
+            hashes = self.block_hashes_for_spec(block_hashes, spec)
+            hit_blocks = manager_cls.find_longest_cache_hit(
+                block_hashes=hashes,
+                max_length=max_length,
+                kv_cache_group_ids=group_ids,
+                block_pool=cast(BlockPool, cached_block_pool),
+                kv_cache_spec=spec,
+                use_eagle=(0 in self.eagle_attn_group_indices),
+                alignment_tokens=spec.block_size,
+            )
+            num_groups = len(self.kv_cache_groups)
+            blocks_by_group: list[list[KVCacheBlock]] = [[] for _ in range(num_groups)]
+            for gid, blks in zip(group_ids, hit_blocks, strict=True):
+                blocks_by_group[gid] = blks
+            return tuple(blocks_by_group), len(hit_blocks[0]) * spec.block_size
+
+        num_groups = len(self.kv_cache_groups)
+        hit_length = max_length
+        hit_blocks_by_group: list[list[KVCacheBlock] | None] = [None] * num_groups
+
+        is_simple_hybrid = len(self.attention_groups) == 2 and isinstance(
+            self.attention_groups[0][0], FullAttentionSpec
+        )
+        eagle_verified: set[int] = set()
+
+        while True:
+            curr_hit_length = hit_length
+
+            for idx, (spec, group_ids, manager_cls) in enumerate(self.attention_groups):
+                cached = hit_blocks_by_group[group_ids[0]]
+                if isinstance(spec, FullAttentionSpec) and cached is not None:
+                    curr_hit_length = (
+                        curr_hit_length // spec.block_size * spec.block_size
+                    )
+                    continue
+
+                use_eagle = (
+                    idx in self.eagle_attn_group_indices and idx not in eagle_verified
+                )
+                _max_length = curr_hit_length
+                if use_eagle:
+                    _max_length = min(curr_hit_length + spec.block_size, max_length)
+                hashes = self.block_hashes_for_spec(block_hashes, spec)
+                hit_blocks = manager_cls.find_longest_cache_hit(
+                    block_hashes=hashes,
+                    max_length=_max_length,
+                    kv_cache_group_ids=group_ids,
+                    block_pool=cast(BlockPool, cached_block_pool),
+                    kv_cache_spec=spec,
+                    use_eagle=use_eagle,
+                    alignment_tokens=self.lcm_block_size,
+                )
+                _new_hit_length = len(hit_blocks[0]) * spec.block_size
+                if use_eagle:
+                    eagle_verified.add(idx)
+                elif _new_hit_length < curr_hit_length:
+                    eagle_verified.clear()
+                curr_hit_length = _new_hit_length
+                for gid, blocks in zip(group_ids, hit_blocks, strict=True):
+                    hit_blocks_by_group[gid] = blocks
+
+            if curr_hit_length >= hit_length:
+                break
+            hit_length = curr_hit_length
+            if is_simple_hybrid:
+                break
+
+        # Truncate full-attention hit_blocks to final converged length;
+        # other specs already trim themselves inside their hit logic.
+        spec0, group_ids0, _ = self.attention_groups[0]
+        if isinstance(spec0, FullAttentionSpec):
+            num_blocks = hit_length // spec0.block_size
+            for gid in group_ids0:
+                full_blks = hit_blocks_by_group[gid]
+                assert full_blks is not None
+                del full_blks[num_blocks:]
+
+        return (
+            tuple(blks if blks is not None else [] for blks in hit_blocks_by_group),
+            hit_length,
+        )
+
+
+def _unwrap_spec(spec: KVCacheSpec) -> KVCacheSpec:
+    if isinstance(spec, UniformTypeKVCacheSpecs):
+        return next(iter(spec.kv_cache_specs.values()))
+    return spec
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/data.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/data.py
new file mode 100644
index 000000000000..f3e9a2e64469
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/data.py
@@ -0,0 +1,296 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Adapted from vllm-project/vllm-ascend
+# (vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/).
+"""Data classes for MooncakeStoreConnector."""
+
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+import torch
+
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorMetadata,
+)
+from vllm.logger import init_logger
+from vllm.utils.math_utils import cdiv
+from vllm.v1.core.kv_cache_utils import (
+    BlockHash,
+    BlockHashListWithBlockSize,
+)
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class KeyMetadata:
+    """Metadata for constructing pool keys."""
+
+    model_name: str
+    tp_rank: int
+    pcp_rank: int
+    dcp_rank: int
+    pp_rank: int
+    group_id: int = 0
+
+
+@dataclass(order=True)
+class PoolKey:
+    """Key for addressing KV cache blocks in the distributed store."""
+
+    key_metadata: KeyMetadata
+    chunk_hash: str
+
+    def __hash__(self):
+        return hash(
+            (
+                self.key_metadata.model_name,
+                self.key_metadata.tp_rank,
+                self.key_metadata.pcp_rank,
+                self.key_metadata.dcp_rank,
+                self.key_metadata.pp_rank,
+                self.key_metadata.group_id,
+                self.chunk_hash,
+            )
+        )
+
+    def to_string(self) -> str:
+        return (
+            f"{self.key_metadata.model_name}"
+            f"@tp_rank:{self.key_metadata.tp_rank}"
+            f"@pcp{self.key_metadata.pcp_rank}"
+            f"@dcp{self.key_metadata.dcp_rank}"
+            f"@pp_rank:{self.key_metadata.pp_rank}"
+            f"@group:{self.key_metadata.group_id}"
+            f"@{self.chunk_hash}"
+        )
+
+
+class ChunkedTokenDatabase:
+    """Maps token positions to store keys and GPU memory addresses."""
+
+    def __init__(
+        self,
+        metadata: KeyMetadata,
+        block_size: int,
+        hash_block_size: int | None = None,
+    ):
+        self.metadata = metadata
+        self.block_size = block_size
+        self.hash_block_size = hash_block_size or block_size
+        if self.block_size % self.hash_block_size != 0:
+            raise ValueError(
+                f"block_size ({self.block_size}) must be a multiple of "
+                f"hash_block_size ({self.hash_block_size})"
+            )
+        self.kv_caches_base_addr: list[int] = []
+        self.block_len: list[int] = []
+
+    def _make_key_by_hash(self, chunk_hash: str) -> PoolKey:
+        return PoolKey(self.metadata, chunk_hash)
+
+    def set_kv_caches_base_addr(self, kv_caches_base_addr: list[int]):
+        self.kv_caches_base_addr = kv_caches_base_addr
+
+    def set_block_len(self, block_len: list[int]):
+        self.block_len = block_len
+
+    def prepare_value(
+        self, start: int, end: int, block_ids: list[int]
+    ) -> tuple[list[int], list[int], int]:
+        """Compute memory addresses and sizes for a token range.
+
+        Returns:
+            (addr_list, size_list, block_id)
+        """
+        addr_list = []
+        size_list = []
+        block_id = block_ids[start // self.block_size]
+        length = len(self.block_len)
+        for index, base_addr in enumerate(self.kv_caches_base_addr):
+            addr = base_addr + block_id * self.block_len[index % length]
+            assert (end - start) % self.block_size == 0
+            size = self.block_len[index % length] * cdiv(end - start, self.block_size)
+            addr_list.append(addr)
+            size_list.append(size)
+        return addr_list, size_list, block_id
+
+    def process_tokens(
+        self,
+        token_len: int,
+        block_hashes: list[BlockHash],
+        mask_num: int = 0,
+    ) -> Iterable[tuple[int, int, PoolKey]]:
+        """Process tokens and yield (start_idx, end_idx, pool_key) tuples.
+
+        Args:
+            token_len: Total number of tokens.
+            block_hashes: Block hashes computed at ``hash_block_size`` granularity.
+                When ``block_size > hash_block_size`` consecutive hashes are merged
+                up to the group's ``block_size`` via ``BlockHashListWithBlockSize``.
+            mask_num: Number of tokens to skip from the beginning.
+        """
+        if not block_hashes:
+            return
+        if self.block_size == self.hash_block_size:
+            chunk_hashes: Iterable[BlockHash] = block_hashes
+        else:
+            chunk_hashes = BlockHashListWithBlockSize(
+                block_hashes, self.hash_block_size, self.block_size
+            )
+        for chunk_id, h in enumerate(chunk_hashes):
+            start_idx = chunk_id * self.block_size
+            if start_idx >= token_len:
+                break
+            end_idx = min(start_idx + self.block_size, token_len)
+            if start_idx < mask_num:
+                continue
+            yield start_idx, end_idx, self._make_key_by_hash(h.hex())
+
+
+@dataclass
+class LoadSpec:
+    """Specification for loading KV cache from external store."""
+
+    vllm_cached_tokens: int
+    kvpool_cached_tokens: int
+    can_load: bool
+    token_len: int = 0
+
+
+@dataclass
+class RequestTracker:
+    """Tracks per-request state across scheduler ticks."""
+
+    req_id: str
+    token_len: int
+    allocated_block_ids: tuple[list[int], ...]
+    num_saved_tokens: int = 0
+    token_ids: list[int] | None = None
+    # Snapshot of the prefill range length at tracker creation time.
+    # For a fresh request this is len(prompt). For a resumed-from-preemption
+    # request it includes previously-generated tokens, which are re-prefilled.
+    prefill_end_tokens: int = 0
+
+    def update(
+        self,
+        new_block_ids: tuple[list[int], ...] | list[int],
+    ) -> None:
+        # Backward-compat: accept a single list (broadcast to single group).
+        if isinstance(new_block_ids, list):
+            new_block_ids = (new_block_ids,)
+        if len(new_block_ids) != len(self.allocated_block_ids):
+            raise ValueError(
+                f"Group count mismatch: tracker has "
+                f"{len(self.allocated_block_ids)} groups, update has "
+                f"{len(new_block_ids)}"
+            )
+        for existing, new in zip(self.allocated_block_ids, new_block_ids, strict=True):
+            if new:
+                existing.extend(new)
+
+
+@dataclass
+class ReqMeta:
+    """Per-request metadata for store put/get operations."""
+
+    req_id: str
+    token_len_chunk: int
+    block_ids: tuple[list[int], ...]
+    block_hashes: list[BlockHash]
+
+    can_save: bool | None = None
+    load_spec: LoadSpec | None = None
+    is_last_chunk: bool | None = None
+    current_event: torch.cuda.Event | None = None
+
+    token_ids: list[int] | None = None
+    original_block_size: int | None = None
+
+    @staticmethod
+    def from_request_tracker(
+        tracker: RequestTracker,
+        block_size: int,
+        load_spec: LoadSpec | None = None,
+        skip_save: bool | None = False,
+        block_hashes: list[BlockHash] | None = None,
+        is_last_chunk: bool | None = None,
+        discard_partial_chunks: bool = True,
+        original_block_size: int | None = None,
+    ) -> "ReqMeta | None":
+        """Create ReqMeta from a RequestTracker."""
+        if block_hashes is None:
+            block_hashes = []
+        input_token_len = tracker.token_len
+
+        chunk_boundary = (
+            cdiv(tracker.num_saved_tokens + 1, block_size) * block_size
+            if discard_partial_chunks
+            else 0
+        )
+        num_tokens_to_save = (
+            (input_token_len // block_size * block_size)
+            if discard_partial_chunks
+            else input_token_len
+        )
+
+        skip_save = skip_save or num_tokens_to_save < chunk_boundary
+        # A ReqMeta must never carry both a save AND a load.
+        # The save would also be wasted work — the bytes are being looked up
+        # in the store right now. Later cached_reqs steps save new tokens
+        # normally.
+        if load_spec is not None and load_spec.can_load:
+            skip_save = True
+        if skip_save and load_spec is None:
+            return None
+
+        if not skip_save:
+            tracker.num_saved_tokens = num_tokens_to_save
+
+        token_ids = None
+        if tracker.token_ids:
+            token_ids = tracker.token_ids
+
+        if load_spec is not None and load_spec.can_load:
+            logger.debug(
+                "Scheduled to load %d tokens for request %s",
+                load_spec.kvpool_cached_tokens,
+                tracker.req_id,
+            )
+        else:
+            load_spec = None
+
+        logger.debug(
+            "request:%s, meta save spec:%s, meta load spec:%s",
+            tracker.req_id,
+            not skip_save,
+            load_spec,
+        )
+        return ReqMeta(
+            req_id=tracker.req_id,
+            token_len_chunk=num_tokens_to_save,
+            block_ids=tracker.allocated_block_ids,
+            can_save=not skip_save,
+            load_spec=load_spec,
+            block_hashes=block_hashes,
+            is_last_chunk=is_last_chunk,
+            token_ids=token_ids,
+            original_block_size=original_block_size,
+        )
+
+
+class MooncakeStoreConnectorMetadata(KVConnectorMetadata):
+    """Metadata passed from scheduler to worker."""
+
+    def __init__(
+        self,
+        unfinished_request_ids: set[str],
+        preempted_req_ids: set[str],
+    ):
+        self.requests: list[ReqMeta] = []
+        self.unfinished_request_ids = unfinished_request_ids
+        self.preempted_req_ids = preempted_req_ids
+
+    def add_request(self, req_meta: ReqMeta) -> None:
+        self.requests.append(req_meta)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/metrics.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/metrics.py
new file mode 100644
index 000000000000..8dc949cf326b
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/metrics.py
@@ -0,0 +1,189 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Per-operation telemetry for MooncakeStoreConnector.
+
+Records one row per Mooncake RPC (``save_exists``, ``save_put``, ``load_get``,
+``lookup_exists``) with duration, key/byte counts, status, and failed-key
+count. Exposed to the logger via ``KVConnectorLogging`` and to Prometheus
+via ``MooncakeStorePromMetrics``.
+"""
+
+from dataclasses import dataclass
+from statistics import fmean
+from typing import Any
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorPromMetrics,
+    KVConnectorStats,
+    PromMetric,
+    PromMetricT,
+)
+
+
+def _nearest_rank_percentile(values: list[float], percentile: float) -> float:
+    if not values:
+        return 0.0
+    sorted_values = sorted(values)
+    rank = max(
+        0, min(len(sorted_values) - 1, int(percentile * len(sorted_values) - 1e-12))
+    )
+    return sorted_values[rank]
+
+
+@dataclass
+class MooncakeStoreConnectorStats(KVConnectorStats):
+    """Serializable Mooncake store communication telemetry."""
+
+    def __post_init__(self):
+        if not self.data:
+            self.reset()
+
+    def reset(self):
+        self.data: dict[str, list[dict[str, int | float | str]]] = {}
+
+    def is_empty(self) -> bool:
+        return not self.data
+
+    def aggregate(self, other: KVConnectorStats) -> KVConnectorStats:
+        if other.is_empty():
+            return self
+        for operation, records in other.data.items():
+            self.data.setdefault(operation, []).extend(records)
+        return self
+
+    def reduce(self) -> dict[str, int | float]:
+        reduced: dict[str, int | float] = {}
+        for operation, records in sorted(self.data.items()):
+            if not records:
+                continue
+            durations = [float(record["duration_seconds"]) for record in records]
+            reduced[f"{operation}_count"] = len(records)
+            reduced[f"{operation}_avg_ms"] = round(fmean(durations) * 1e3, 3)
+            reduced[f"{operation}_p90_ms"] = round(
+                _nearest_rank_percentile(durations, 0.9) * 1e3, 3
+            )
+            reduced[f"{operation}_total_keys"] = sum(
+                int(record["num_keys"]) for record in records
+            )
+            reduced[f"{operation}_total_bytes"] = sum(
+                int(record["num_bytes"]) for record in records
+            )
+            reduced[f"{operation}_failed_keys"] = sum(
+                int(record["num_failed_keys"]) for record in records
+            )
+            reduced[f"{operation}_error_count"] = sum(
+                1 for record in records if record["status"] == "error"
+            )
+        return reduced
+
+    def record_operation(
+        self,
+        operation: str,
+        duration_seconds: float,
+        num_keys: int,
+        *,
+        num_bytes: int = 0,
+        status: str = "ok",
+        num_failed_keys: int = 0,
+    ) -> None:
+        self.data.setdefault(operation, []).append(
+            {
+                "duration_seconds": duration_seconds,
+                "num_keys": num_keys,
+                "num_bytes": num_bytes,
+                "status": status,
+                "num_failed_keys": num_failed_keys,
+            }
+        )
+
+
+class MooncakeStorePromMetrics(KVConnectorPromMetrics):
+    """Prometheus metrics for Mooncake store communication."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ):
+        super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
+        metric_labelnames = labelnames + ["operation", "status"]
+        self._metric_cache: dict[tuple[int, str, str], dict[str, PromMetric]] = {}
+
+        self._histogram_operation_time = self._histogram_cls(
+            name="vllm:mooncake_store_operation_time_seconds",
+            documentation="Histogram of Mooncake store communication time.",
+            buckets=[
+                1e-3,
+                5e-3,
+                1e-2,
+                5e-2,
+                1e-1,
+                2e-1,
+                3e-1,
+                4e-1,
+                5e-1,
+                7.5e-1,
+                1.0,
+                1.5,
+                2.0,
+                3.0,
+                4.0,
+            ],
+            labelnames=metric_labelnames,
+        )
+        self._counter_operation_calls = self._counter_cls(
+            name="vllm:mooncake_store_operation_total",
+            documentation="Number of Mooncake store communication operations.",
+            labelnames=metric_labelnames,
+        )
+        self._counter_operation_keys = self._counter_cls(
+            name="vllm:mooncake_store_operation_keys_total",
+            documentation="Number of Mooncake store keys touched by operations.",
+            labelnames=metric_labelnames,
+        )
+        self._counter_operation_bytes = self._counter_cls(
+            name="vllm:mooncake_store_operation_bytes_total",
+            documentation="Number of bytes transferred by Mooncake store operations.",
+            labelnames=metric_labelnames,
+        )
+        self._counter_failed_keys = self._counter_cls(
+            name="vllm:mooncake_store_operation_failed_keys_total",
+            documentation="Number of Mooncake store keys that failed in operations.",
+            labelnames=metric_labelnames,
+        )
+
+    def _get_metrics(
+        self,
+        engine_idx: int,
+        operation: str,
+        status: str,
+    ) -> dict[str, PromMetric]:
+        cache_key = (engine_idx, operation, status)
+        if cache_key not in self._metric_cache:
+            label_values = self.per_engine_labelvalues[engine_idx] + [operation, status]
+            self._metric_cache[cache_key] = {
+                "time": self._histogram_operation_time.labels(*label_values),
+                "calls": self._counter_operation_calls.labels(*label_values),
+                "keys": self._counter_operation_keys.labels(*label_values),
+                "bytes": self._counter_operation_bytes.labels(*label_values),
+                "failed_keys": self._counter_failed_keys.labels(*label_values),
+            }
+        return self._metric_cache[cache_key]
+
+    def observe(self, transfer_stats_data: dict[str, Any] | None, engine_idx: int = 0):
+        if not transfer_stats_data:
+            return
+        for operation, records in transfer_stats_data.items():
+            assert isinstance(records, list)
+            for record in records:
+                assert isinstance(record, dict)
+                status = str(record["status"])
+                metrics = self._get_metrics(engine_idx, operation, status)
+                metrics["time"].observe(float(record["duration_seconds"]))
+                metrics["calls"].inc()
+                metrics["keys"].inc(int(record["num_keys"]))
+                metrics["bytes"].inc(int(record["num_bytes"]))
+                metrics["failed_keys"].inc(int(record["num_failed_keys"]))
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/scheduler.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/scheduler.py
new file mode 100644
index 000000000000..452f909d31bd
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/scheduler.py
@@ -0,0 +1,369 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Adapted from vllm-project/vllm-ascend
+# (vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/).
+"""Scheduler-side logic for MooncakeStoreConnector."""
+
+from typing import Any
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorMetadata,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (  # noqa: E501
+    LoadSpec,
+    MooncakeStoreConnectorMetadata,
+    ReqMeta,
+    RequestTracker,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.worker import (  # noqa: E501
+    LookupKeyClient,
+)
+from vllm.logger import init_logger
+from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+from vllm.v1.core.kv_cache_utils import resolve_kv_cache_block_sizes
+from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.request import Request
+
+logger = init_logger(__name__)
+
+
+def _new_req_prefill_tokens(request: NewRequestData) -> list[int]:
+    """Tokens this prefill will compute KV for.
+
+    Under the v2 model runner, resumed-from-preemption requests appear in
+    ``scheduled_new_reqs`` with ``prefill_token_ids`` set to the request's full
+    token list (prompt + previously-generated). For all other cases this falls
+    back to the original prompt.
+    """
+    if request.prefill_token_ids is not None:
+        return request.prefill_token_ids
+    assert request.prompt_token_ids is not None
+    return request.prompt_token_ids
+
+
+class MooncakeStoreScheduler:
+    """Scheduler-side component for MooncakeStoreConnector."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        kv_cache_config: KVCacheConfig,
+    ):
+        assert vllm_config.kv_transfer_config is not None
+        self.kv_role = vllm_config.kv_transfer_config.kv_role
+        self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
+            "load_async", True
+        )
+        self.client = LookupKeyClient(vllm_config)
+
+        self.pcp_size = vllm_config.parallel_config.prefill_context_parallel_size
+        self.dcp_size = vllm_config.parallel_config.decode_context_parallel_size
+        self.original_block_size = vllm_config.cache_config.block_size
+        # LCM for multi-group HMA; bs * pcp * dcp for single-group. Matches
+        # the engine's own scheduler block size by construction.
+        self._block_size, self._hash_block_size = resolve_kv_cache_block_sizes(
+            kv_cache_config, vllm_config
+        )
+
+        # Per-request state
+        self.load_specs: dict[str, LoadSpec] = {}  # to be loaded
+        self._request_trackers: dict[str, RequestTracker] = {}  # scheduled new requests
+        self._preempted_req_ids: set[str] = set()  # preempted requests
+        self._unfinished_requests: dict[str, tuple[Request, tuple[list[int], ...]]] = {}
+        self._unfinished_request_ids: set[str] = set()
+
+    def get_num_new_matched_tokens(
+        self,
+        request: Request,
+        num_computed_tokens: int,
+    ) -> tuple[int, bool]:
+        """Check for external KV cache hit."""
+        # Look up against the full prefill range, not just the prompt.
+        token_len = request.num_tokens // self._block_size * self._block_size
+        if token_len < self._block_size:
+            return 0, False
+
+        num_external_hit_tokens = self.client.lookup(token_len, request.block_hashes)
+
+        if num_external_hit_tokens == request.num_tokens:
+            # Leave a sub-block tail uncomputed for sampling, on a block
+            # boundary so the recv-side load mask covers every yielded chunk.
+            num_external_hit_tokens = max(
+                0,
+                (request.num_tokens - 1) // self._block_size * self._block_size,
+            )
+
+        if num_external_hit_tokens < num_computed_tokens:
+            need_to_allocate = 0
+        else:
+            need_to_allocate = num_external_hit_tokens - num_computed_tokens
+
+        logger.debug(
+            "Reqid: %s, Total tokens %d, kvpool hit tokens: %d, need to load: %d",
+            request.request_id,
+            request.num_tokens,
+            num_external_hit_tokens,
+            need_to_allocate,
+        )
+
+        if need_to_allocate <= 0:
+            return 0, False
+
+        self.load_specs[request.request_id] = LoadSpec(
+            vllm_cached_tokens=num_computed_tokens,
+            kvpool_cached_tokens=num_external_hit_tokens,
+            can_load=False,
+        )
+
+        return need_to_allocate, self.load_async
+
+    def update_state_after_alloc(
+        self,
+        request: Request,
+        blocks: KVCacheBlocks,
+        num_external_tokens: int,
+    ):
+        """Update state after block allocation."""
+        local_block_ids: tuple[list[int], ...] = ()
+        if num_external_tokens > 0:
+            local_block_ids = blocks.get_block_ids()
+
+        self._unfinished_requests[request.request_id] = (request, local_block_ids)
+        self._unfinished_request_ids.add(request.request_id)
+
+        if request.request_id not in self.load_specs:
+            return
+
+        if num_external_tokens == 0:
+            self.load_specs[request.request_id].can_load = False
+            return
+
+        assert (
+            num_external_tokens > 0
+            and num_external_tokens
+            == self.load_specs[request.request_id].kvpool_cached_tokens
+            - self.load_specs[request.request_id].vllm_cached_tokens
+        ), (
+            f"Mismatch in number of tokens: {num_external_tokens} vs "
+            f"{self.load_specs[request.request_id].kvpool_cached_tokens} - "
+            f"{self.load_specs[request.request_id].vllm_cached_tokens}"
+            f" for request {request.request_id}"
+        )
+
+        self.load_specs[request.request_id].can_load = True
+
+    def build_connector_meta(
+        self, scheduler_output: SchedulerOutput
+    ) -> KVConnectorMetadata:
+        """Build connector metadata for this scheduler step."""
+        force_skip_save = self.kv_role == "kv_consumer"
+
+        for finished_req_id in scheduler_output.finished_req_ids:
+            self.load_specs.pop(finished_req_id, None)
+            self._request_trackers.pop(finished_req_id, None)
+            self._unfinished_requests.pop(finished_req_id, None)
+            self._unfinished_request_ids.discard(finished_req_id)
+            self._preempted_req_ids.discard(finished_req_id)
+
+        preempted_ids = scheduler_output.preempted_req_ids or set()
+        self._preempted_req_ids.update(preempted_ids)
+        for req_id in preempted_ids:
+            self._request_trackers.pop(req_id, None)
+            self._unfinished_requests.pop(req_id, None)
+
+        meta = MooncakeStoreConnectorMetadata(
+            self._unfinished_request_ids,
+            preempted_ids,
+        )
+
+        # Handle new requests
+        for request in scheduler_output.scheduled_new_reqs:
+            load_spec = self.load_specs.pop(request.req_id, None)
+            num_tokens_to_compute = (
+                request.num_computed_tokens
+                + scheduler_output.num_scheduled_tokens[request.req_id]
+            )
+            assert request.req_id in self._unfinished_requests
+            request_tuple = self._unfinished_requests.get(request.req_id)
+            request_real = request_tuple[0]  # type: ignore[index]
+
+            if isinstance(request.block_ids, tuple):
+                # Multi-group: preserve per-group structure.
+                unfolded_block_ids = tuple(b.copy() for b in request.block_ids)
+            else:
+                # Single-group legacy: list[int] -> 1-tuple.
+                unfolded_block_ids = (request.block_ids.copy(),)
+
+            prefill_tokens = _new_req_prefill_tokens(request)
+            request_tracker = RequestTracker(
+                req_id=request.req_id,
+                token_len=num_tokens_to_compute,
+                allocated_block_ids=unfolded_block_ids,
+                num_saved_tokens=0,
+                token_ids=prefill_tokens[:num_tokens_to_compute],
+                prefill_end_tokens=len(prefill_tokens),
+            )
+            self._request_trackers[request.req_id] = request_tracker
+
+            last_chunk_tokens_num = (
+                len(prefill_tokens) // self._block_size * self._block_size
+            )
+
+            req_meta = ReqMeta.from_request_tracker(
+                request_tracker,
+                self._block_size,
+                load_spec=load_spec,
+                skip_save=force_skip_save,
+                block_hashes=request_real.block_hashes,
+                is_last_chunk=(request_tracker.token_len >= last_chunk_tokens_num),
+                original_block_size=self.original_block_size,
+            )
+            if req_meta is not None:
+                meta.add_request(req_meta)
+
+        # Handle cached (running, or MRV1 resumed-from-preemption) requests
+        cached_reqs = scheduler_output.scheduled_cached_reqs
+        if not force_skip_save:
+            for i, req_id in enumerate(cached_reqs.req_ids):
+                new_block_ids = cached_reqs.new_block_ids[i]
+                if not new_block_ids:
+                    continue
+
+                req_meta = None
+                if req_id in self._preempted_req_ids:
+                    # Resumed after preemption
+                    if isinstance(new_block_ids, tuple):
+                        new_block_ids = tuple(b.copy() for b in new_block_ids)
+                    else:
+                        new_block_ids = (new_block_ids.copy(),)
+                    self._preempted_req_ids.discard(req_id)
+                    load_spec = self.load_specs.pop(req_id, None)
+                    request_tuple = self._unfinished_requests.get(req_id)
+                    request_real = request_tuple[0]  # type: ignore[index]
+                    num_tokens_to_compute = (
+                        request_real.num_computed_tokens
+                        + scheduler_output.num_scheduled_tokens[req_id]
+                    )
+                    # On resume, the request re-prefills prompt + previously
+                    # generated tokens (all_token_ids).
+                    prefill_tokens = list(request_real.all_token_ids)
+                    request_tracker = RequestTracker(
+                        req_id=req_id,
+                        token_len=num_tokens_to_compute,
+                        allocated_block_ids=new_block_ids,
+                        num_saved_tokens=0,
+                        token_ids=prefill_tokens[:num_tokens_to_compute].copy(),
+                        prefill_end_tokens=len(prefill_tokens),
+                    )
+                    self._request_trackers[req_id] = request_tracker
+
+                    last_chunk_tokens_num = (
+                        len(prefill_tokens) // self._block_size * self._block_size
+                    )
+                    req_meta = ReqMeta.from_request_tracker(
+                        request_tracker,
+                        self._block_size,
+                        load_spec=load_spec,
+                        skip_save=force_skip_save,
+                        block_hashes=request_real.block_hashes,
+                        is_last_chunk=(
+                            request_tracker.token_len >= last_chunk_tokens_num
+                        ),
+                        original_block_size=self.original_block_size,
+                    )
+                else:
+                    # Decode/chunked request
+                    request_tracker = self._request_trackers[req_id]
+                    num_new_tokens = scheduler_output.num_scheduled_tokens[req_id]
+                    req_tuple = self._unfinished_requests.get(req_id)
+                    if req_tuple:
+                        unfinished_req = req_tuple[0]
+                        num_current_tokens = request_tracker.token_len
+                        new_token_ids = unfinished_req.all_token_ids[
+                            num_current_tokens : num_current_tokens + num_new_tokens
+                        ]
+                        request_tracker.token_len += len(new_token_ids)
+                    else:
+                        raise ValueError(
+                            f"Request {req_id} is not in _unfinished_requests"
+                        )
+                    num_computed_token = cached_reqs.num_computed_tokens[i]
+                    # Use the tracker's snapshot of the prefill range so resumed
+                    # requests keep saving past the original prompt boundary.
+                    prefill_end = request_tracker.prefill_end_tokens
+                    if num_computed_token >= prefill_end:
+                        continue
+                    request_tracker.update(new_block_ids)
+
+                    last_chunk_tokens_num = (
+                        prefill_end // self._block_size * self._block_size
+                    )
+                    req_meta = ReqMeta.from_request_tracker(
+                        request_tracker,
+                        self._block_size,
+                        load_spec=None,
+                        skip_save=force_skip_save,
+                        block_hashes=unfinished_req.block_hashes,
+                        is_last_chunk=(
+                            request_tracker.token_len >= last_chunk_tokens_num
+                        ),
+                        original_block_size=self.original_block_size,
+                    )
+
+                if req_meta is not None:
+                    meta.add_request(req_meta)
+
+        # Handle requests with pending load specs not yet scheduled
+        request_ids = [req.req_id for req in scheduler_output.scheduled_new_reqs]
+        for request_id, (
+            unfinished_req,
+            block_ids,
+        ) in self._unfinished_requests.items():
+            if request_id not in request_ids and request_id not in cached_reqs.req_ids:
+                load_spec = self.load_specs.pop(request_id, None)
+                if not load_spec:
+                    continue
+                num_tokens_to_compute = load_spec.kvpool_cached_tokens
+                request_tracker = RequestTracker(
+                    req_id=request_id,
+                    token_len=num_tokens_to_compute,
+                    allocated_block_ids=block_ids,
+                    num_saved_tokens=0,
+                )
+                self._request_trackers[request_id] = request_tracker
+                req_meta = ReqMeta.from_request_tracker(
+                    request_tracker,
+                    self._block_size,
+                    load_spec=load_spec,
+                    skip_save=None,
+                    block_hashes=unfinished_req.block_hashes,
+                )
+                if req_meta is not None:
+                    meta.add_request(req_meta)
+
+        return meta
+
+    def request_finished(
+        self,
+        request: Request,
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        """Determine whether to delay freeing blocks for async save."""
+        if self.kv_role == "kv_consumer":
+            return False, None
+        tracker = self._request_trackers.get(request.request_id)
+        assert tracker is not None
+        if tracker.num_saved_tokens <= 0:
+            return False, None
+        total_blocks = sum(len(g) for g in block_ids)
+        delay_free_blocks = total_blocks > 0
+        if delay_free_blocks:
+            logger.debug(
+                "Delaying free of %d blocks for request %s",
+                total_blocks,
+                request.request_id,
+            )
+        return delay_free_blocks, None
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/worker.py b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/worker.py
new file mode 100644
index 000000000000..bdbbc3a866af
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/mooncake/store/worker.py
@@ -0,0 +1,1464 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# The transfer-thread scaffolding (KVTransferThread, KVCacheStoreSendingThread,
+# KVCacheStoreRecvingThread) is adapted from vllm-project/vllm-ascend
+# (vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/).
+"""Worker-side logic for MooncakeStoreConnector.
+
+Includes the store worker, transfer threads, lookup server,
+and MooncakeDistributedStore integration.
+"""
+
+import dataclasses
+import json
+import os
+import queue
+import socket
+import threading
+import time
+from collections import defaultdict
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any, Literal
+
+import regex as re
+import torch
+import zmq
+
+import vllm.envs as envs
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_dcp_group,
+    get_pcp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.distributed.kv_events import BlockStored
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake import rdma_utils
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.mooncake_utils import (
+    get_mooncake_dp_engine_index,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.coordinator import (  # noqa: E501
+    ExternalCachedBlockPool,
+    MooncakeStoreCoordinator,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.mooncake.store.data import (  # noqa: E501
+    ChunkedTokenDatabase,
+    KeyMetadata,
+    MooncakeStoreConnectorMetadata,
+    PoolKey,
+    ReqMeta,
+)
+from vllm.logger import init_logger
+from vllm.utils.network_utils import get_ip, make_zmq_socket
+from vllm.v1.core.kv_cache_utils import (
+    BlockHash,
+    maybe_convert_block_hash,
+    resolve_kv_cache_block_sizes,
+)
+from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheGroupSpec
+from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
+
+from .metrics import MooncakeStoreConnectorStats
+
+logger = init_logger(__name__)
+
+DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024  # 4 GiB
+DEFAULT_LOCAL_BUFFER_SIZE = 4 * 1024 * 1024 * 1024  # 4 GiB
+
+MOONCAKE_NO_AVAILABLE_HANDLE = -200
+
+# Mirrors FileStorageConfig::local_buffer_size in Mooncake C++.
+DEFAULT_MOONCAKE_DISK_STAGING_BUFFER_BYTES = 1280 * 1024 * 1024
+
+# Mirrors DirectIO alignment in Mooncake's AllocateBatch.
+_DIRECT_IO_ALIGNMENT = 4096
+_DIRECT_IO_PADDING_BYTES = 2 * _DIRECT_IO_ALIGNMENT
+
+
+MooncakeMode = Literal["embedded", "standalone-store"]
+
+
+@dataclass
+class MooncakeStoreConfig:
+    """Configuration for MooncakeDistributedStore.
+
+    ``mode`` selects the topology: ``embedded`` (each rank contributes
+    ``global_segment_size`` in-process) or ``standalone-store`` (rank
+    contributes 0; an external ``mooncake_client`` process owns the pool
+    and the SSD tier).
+    """
+
+    metadata_server: str
+    master_server_address: str
+    protocol: str
+    device_name: str
+    mode: MooncakeMode = "embedded"
+    global_segment_size: int = DEFAULT_GLOBAL_SEGMENT_SIZE
+    local_buffer_size: int = DEFAULT_LOCAL_BUFFER_SIZE
+    enable_offload: bool = False
+
+    def __post_init__(self) -> None:
+        if self.mode not in ("embedded", "standalone-store"):
+            raise ValueError(f"unknown Mooncake mode: {self.mode!r}")
+        if self.local_buffer_size <= 0:
+            raise ValueError("local_buffer_size must be > 0")
+        if self.mode == "embedded" and self.global_segment_size == 0:
+            raise ValueError("embedded mode requires global_segment_size > 0")
+        if self.mode == "standalone-store" and self.global_segment_size != 0:
+            raise ValueError("standalone-store mode requires global_segment_size == 0")
+
+    @staticmethod
+    def from_file(file_path: str) -> "MooncakeStoreConfig":
+        with open(file_path) as file:
+            config = json.load(file)
+        return MooncakeStoreConfig(
+            metadata_server=config.get("metadata_server", ""),
+            master_server_address=config.get("master_server_address", ""),
+            protocol=config.get("protocol", "rdma"),
+            device_name=config.get("device_name", ""),
+            mode=config.get("mode", "embedded"),
+            global_segment_size=_parse_size(
+                config.get("global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE)
+            ),
+            local_buffer_size=_parse_size(
+                config.get("local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE)
+            ),
+            enable_offload=bool(config.get("enable_offload", False)),
+        )
+
+    @staticmethod
+    def load_from_env() -> "MooncakeStoreConfig":
+        config_path = os.getenv("MOONCAKE_CONFIG_PATH")
+        if not config_path:
+            raise ValueError(
+                "The environment variable 'MOONCAKE_CONFIG_PATH' is not set."
+            )
+        return MooncakeStoreConfig.from_file(config_path)
+
+
+def _parse_size(value: Any) -> int:
+    """Parse storage size strings with units: GB, MB, KB, B."""
+    if isinstance(value, int):
+        return value
+    if not isinstance(value, str):
+        try:
+            return int(value)
+        except (TypeError, ValueError) as e:
+            raise TypeError(f"Unsupported type for size: {type(value)}") from e
+
+    cleaned = value.strip().lower()
+    if not cleaned:
+        raise ValueError("Size cannot be empty.")
+
+    unit_multipliers = {
+        "gb": 1024**3,
+        "mb": 1024**2,
+        "kb": 1024,
+        "b": 1,
+    }
+    match = re.match(r"^\s*([\d.]+)\s*(gb|mb|kb|b)?\s*$", cleaned)
+    if not match:
+        raise ValueError(f"Invalid format: '{value}'")
+
+    number_str = match.group(1)
+    unit = match.group(2) or "b"
+    multiplier = unit_multipliers[unit]
+
+    try:
+        numeric_value = float(number_str)
+    except ValueError as exc:
+        raise ValueError(f"Invalid numeric value '{number_str}' in: '{value}'") from exc
+    return int(numeric_value * multiplier)
+
+
+def _align_up(value: int, alignment: int) -> int:
+    return ((value + alignment - 1) // alignment) * alignment
+
+
+def _estimate_disk_offload_staging_bytes(size_list: list[int]) -> int:
+    data_size = sum(size_list)
+    return _align_up(data_size, _DIRECT_IO_ALIGNMENT) + _DIRECT_IO_PADDING_BYTES
+
+
+def _sum_batch_bytes(sizes: list[list[int]]) -> int:
+    return sum(sum(size) for size in sizes)
+
+
+def _get_usable_disk_offload_buffer_budget_bytes(raw_budget_bytes: int) -> int:
+    return max(1, int(raw_budget_bytes * envs.VLLM_MOONCAKE_DISK_STAGING_USABLE_RATIO))
+
+
+def _split_disk_offload_load_batches(
+    keys: list[str],
+    addrs: list[list[int]],
+    sizes: list[list[int]],
+    usable_budget_bytes: int,
+    raw_budget_bytes: int,
+) -> tuple[list[tuple[list[str], list[list[int]], list[list[int]]]], str | None]:
+    """Split a GET into sub-batches that fit the owner's staging buffer.
+
+    ``addrs[i]`` / ``sizes[i]`` are scatter-gather lists (K/V or multi-layer
+    segments) for key ``i``. ``usable_budget_bytes`` caps a multi-key batch;
+    ``raw_budget_bytes`` is the hard per-key cap.
+
+    Returns ``(batches, oversize_key)``. Aborts with ``([], key)`` if any
+    single key exceeds ``raw_budget_bytes``; otherwise ``oversize_key`` is
+    ``None``.
+    """
+    batches: list[tuple[list[str], list[list[int]], list[list[int]]]] = []
+    batch_keys: list[str] = []
+    batch_addrs: list[list[int]] = []
+    batch_sizes: list[list[int]] = []
+    batch_bytes = 0
+
+    for key, addr, size in zip(keys, addrs, sizes, strict=True):
+        key_bytes = _estimate_disk_offload_staging_bytes(size)
+        if key_bytes > raw_budget_bytes:
+            return [], key
+        if key_bytes > usable_budget_bytes:
+            if batch_keys:
+                batches.append((batch_keys, batch_addrs, batch_sizes))
+                batch_keys, batch_addrs, batch_sizes = [], [], []
+                batch_bytes = 0
+            batches.append(([key], [addr], [size]))
+            continue
+        if batch_keys and batch_bytes + key_bytes > usable_budget_bytes:
+            batches.append((batch_keys, batch_addrs, batch_sizes))
+            batch_keys, batch_addrs, batch_sizes = [], [], []
+            batch_bytes = 0
+        batch_keys.append(key)
+        batch_addrs.append(addr)
+        batch_sizes.append(size)
+        batch_bytes += key_bytes
+
+    if batch_keys:
+        batches.append((batch_keys, batch_addrs, batch_sizes))
+    return batches, None
+
+
+def _call_replica_predicate(replica_desc: Any, method_name: str) -> bool:
+    method = getattr(replica_desc, method_name, None)
+    if method is None:
+        return False
+    try:
+        return bool(method())
+    except Exception:
+        return False
+
+
+def _classify_replica_tier(replica_descs: Any) -> str:
+    if not replica_descs:
+        return "unknown"
+    try:
+        replica_desc = replica_descs[0]
+    except (IndexError, KeyError, TypeError):
+        return "unknown"
+
+    if _call_replica_predicate(replica_desc, "is_memory_replica"):
+        return "memory"
+    if _call_replica_predicate(
+        replica_desc, "is_disk_replica"
+    ) or _call_replica_predicate(replica_desc, "is_local_disk_replica"):
+        return "disk"
+    return "unknown"
+
+
+def _get_replica_tiers_by_key(store: Any, keys: list[str]) -> dict[str, str]:
+    tiers_by_key = {key: "unknown" for key in keys}
+    try:
+        replica_descs_by_key = store.batch_get_replica_desc(keys)
+    except Exception as e:
+        logger.warning(
+            "Failed to get Mooncake replica descriptors for tier logging "
+            "(batch_keys=%d, error=%s); marking tiers unknown",
+            len(keys),
+            e,
+        )
+        return tiers_by_key
+
+    for key in keys:
+        if hasattr(replica_descs_by_key, "get"):
+            replica_descs = replica_descs_by_key.get(key)
+        else:
+            try:
+                replica_descs = replica_descs_by_key[key]
+            except (KeyError, TypeError):
+                replica_descs = None
+        tiers_by_key[key] = _classify_replica_tier(replica_descs)
+    return tiers_by_key
+
+
+def _log_mooncake_load_tier_summary(
+    req_id: str,
+    batch_keys: list[str],
+    load_results: list[int],
+    tiers_by_key: dict[str, str],
+) -> None:
+    tier_counts = {"memory": 0, "disk": 0, "unknown": 0}
+    bytes_by_tier = {"memory": 0, "disk": 0, "unknown": 0}
+    success_keys = 0
+    failed_keys = 0
+
+    for index, key in enumerate(batch_keys):
+        tier = tiers_by_key.get(key, "unknown")
+        if tier not in tier_counts:
+            tier = "unknown"
+        tier_counts[tier] += 1
+
+        value = load_results[index] if index < len(load_results) else -1
+        if value >= 0:
+            success_keys += 1
+            bytes_by_tier[tier] += int(value)
+        else:
+            failed_keys += 1
+
+    logger.info(
+        "Mooncake load tier summary: req_id=%s batch_keys=%d "
+        "memory_keys=%d disk_keys=%d unknown_keys=%d "
+        "success_keys=%d failed_keys=%d bytes_by_tier=%s",
+        req_id,
+        len(batch_keys),
+        tier_counts["memory"],
+        tier_counts["disk"],
+        tier_counts["unknown"],
+        success_keys,
+        failed_keys,
+        bytes_by_tier,
+    )
+
+
+# ============================================================
+# Transfer Threads
+# ============================================================
+
+
+class KVTransferThread(threading.Thread):
+    """Base class for async KV cache transfer threads."""
+
+    def __init__(
+        self,
+        store: Any,
+        token_databases: list[ChunkedTokenDatabase],
+        block_size: int,
+        tp_rank: int,
+        ready_event: threading.Event,
+        name: str,
+        record_operation: Callable[..., None] | None = None,
+    ):
+        super().__init__(daemon=True, name=name)
+        self.store = store
+        self.ready_event = ready_event
+        self.block_size = block_size
+        self.tp_rank = tp_rank
+        self.token_databases = token_databases
+        self._record_operation_cb = record_operation
+        self.done_task_lock = threading.Lock()
+        self.request_queue: queue.Queue[Any] = queue.Queue()
+        self.finished_requests: set[str] = set()
+        self.kv_event_lock = threading.Lock()
+        self.kv_events: list[BlockStored] = []
+
+    def add_request(self, request: ReqMeta) -> None:
+        self.request_queue.put(request)
+
+    def get_and_clear_finished_requests(self) -> set[str]:
+        with self.done_task_lock:
+            finished = self.finished_requests.copy()
+            self.finished_requests.clear()
+        return finished
+
+    def set_finished_request(self, req_id: str):
+        with self.done_task_lock:
+            self.finished_requests.add(req_id)
+
+    def run(self):
+        self.ready_event.set()
+        while True:
+            try:
+                request_data = self.request_queue.get()
+                if request_data is None:
+                    logger.warning("Received a None request!")
+                    self.request_queue.task_done()
+                    continue
+                self._handle_request(request_data)
+            except Exception as e:
+                logger.error("Error in %s: %s", self.name, e)
+
+    def _handle_request(self, req_meta: Any):
+        pass
+
+    def _record_operation(
+        self,
+        operation: str,
+        start_time: float,
+        num_keys: int,
+        *,
+        num_bytes: int = 0,
+        status: str = "ok",
+        num_failed_keys: int = 0,
+    ) -> None:
+        if self._record_operation_cb is None:
+            return
+        self._record_operation_cb(
+            operation=operation,
+            duration_seconds=time.perf_counter() - start_time,
+            num_keys=num_keys,
+            num_bytes=num_bytes,
+            status=status,
+            num_failed_keys=num_failed_keys,
+        )
+
+    def update_kv_event(self, events: list[BlockStored]):
+        with self.kv_event_lock:
+            self.kv_events.extend(events)
+
+    def get_kv_events(self) -> list[BlockStored]:
+        with self.kv_event_lock:
+            events = self.kv_events.copy()
+            self.kv_events.clear()
+        return events
+
+
+class KVCacheStoreSendingThread(KVTransferThread):
+    """Background thread for storing KV cache blocks to the store."""
+
+    def __init__(
+        self,
+        store: Any,
+        coord: MooncakeStoreCoordinator,
+        token_databases: list[ChunkedTokenDatabase],
+        block_size: int,
+        tp_rank: int,
+        put_step: int,
+        kv_role: str,
+        ready_event: threading.Event,
+        enable_kv_event: bool = False,
+        replicate_config: Any = None,
+        record_operation: Callable[..., None] | None = None,
+    ):
+        super().__init__(
+            store,
+            token_databases,
+            block_size,
+            tp_rank,
+            ready_event,
+            name="KVCacheStoreSendingThread",
+            record_operation=record_operation,
+        )
+        self.put_step = put_step
+        self.coord = coord
+        self.kv_role = kv_role
+        self.stored_requests: defaultdict[str, int] = defaultdict(int)
+        self.enable_kv_event = enable_kv_event
+        # Caller always passes a non-None ReplicateConfig — see
+        # MooncakeStoreWorker.__init__ where store_replicate_config is built.
+        self.replicate_config = replicate_config
+
+        # Pause store requests when CPU/disk offloading is under pressure.
+        self._store_pressure_active = False
+        self._skip_store_requests: set[str] = set()
+
+    def add_stored_request(self, req_id: str):
+        with self.done_task_lock:
+            self.stored_requests[req_id] += 1
+
+    def dec_stored_request(self, req_id: str):
+        with self.done_task_lock:
+            if req_id in self.stored_requests:
+                self.stored_requests[req_id] -= 1
+
+    def delete_finished_stored_request(self, req_id: str):
+        with self.done_task_lock:
+            if req_id in self.stored_requests:
+                del self.stored_requests[req_id]
+            self._skip_store_requests.discard(req_id)
+
+    def _should_skip_request(self, req_id: str) -> bool:
+        with self.done_task_lock:
+            return self._store_pressure_active and req_id in self._skip_store_requests
+
+    def _mark_request_skipped_for_pressure(self, req_id: str) -> bool:
+        with self.done_task_lock:
+            already_skipped = req_id in self._skip_store_requests
+            self._store_pressure_active = True
+            self._skip_store_requests.add(req_id)
+        return already_skipped
+
+    def _clear_store_pressure(self) -> bool:
+        with self.done_task_lock:
+            if not self._store_pressure_active and not self._skip_store_requests:
+                return False
+            self._store_pressure_active = False
+            self._skip_store_requests.clear()
+        return True
+
+    def _handle_request(self, req_meta: ReqMeta):
+        # Cache hits are always a multiple of ``lcm_block_size`` tokens
+        lcm_block_size = self.coord.lcm_block_size
+        token_len = req_meta.token_len_chunk // lcm_block_size * lcm_block_size
+        block_ids_per_group = req_meta.block_ids
+        req_id = req_meta.req_id
+        current_event = req_meta.current_event
+
+        if req_id not in self.stored_requests:
+            self.request_queue.task_done()
+            return
+        if token_len == 0:
+            self.dec_stored_request(req_id)
+            self.request_queue.task_done()
+            return
+        if self._should_skip_request(req_id):
+            logger.debug(
+                "Skipping Mooncake store for request %s while CPU/disk offloading "
+                "is under pressure",
+                req_id,
+            )
+            self.dec_stored_request(req_id)
+            self.request_queue.task_done()
+            return
+
+        # Within each lcm region only per-spec relevant chunks are loaded
+        # (e.g., SWA or linear attn), so mask out irrelevant chunks
+        store_masks = self.coord.store_mask(token_len)
+        starts: list[int] = []
+        ends: list[int] = []
+        keys: list[str] = []
+        block_hashes: list[BlockHash] = []
+        group_indices: list[int] = []
+        for g_idx, db in enumerate(self.token_databases):
+            mask = store_masks[g_idx]
+            for chunk_idx, (start, end, key) in enumerate(
+                db.process_tokens(token_len, req_meta.block_hashes)
+            ):
+                if chunk_idx >= len(mask) or not mask[chunk_idx]:
+                    continue
+                starts.append(start)
+                ends.append(end)
+                keys.append(key.to_string())
+                block_hashes.append(req_meta.block_hashes[chunk_idx])
+                group_indices.append(g_idx)
+
+        # Apply put_step striding for TP
+        sl = slice(self.tp_rank % self.put_step, None, self.put_step)
+        starts = starts[sl]
+        ends = ends[sl]
+        keys = keys[sl]
+        block_hashes = block_hashes[sl]
+        group_indices = group_indices[sl]
+
+        if not keys:
+            self.dec_stored_request(req_id)
+            return
+
+        # Check which blocks already exist (dedup)
+        save_exists_start = time.perf_counter()
+        try:
+            exists_states = self.store.batch_is_exist(keys)
+        except Exception:
+            self._record_operation(
+                "save_exists",
+                save_exists_start,
+                len(keys),
+                status="error",
+                num_failed_keys=len(keys),
+            )
+            raise
+        self._record_operation(
+            "save_exists",
+            save_exists_start,
+            len(keys),
+        )
+        missing_indices = [i for i, exists in enumerate(exists_states) if exists != 1]
+
+        if not missing_indices:
+            self.dec_stored_request(req_id)
+            return
+
+        starts = [starts[i] for i in missing_indices]
+        ends = [ends[i] for i in missing_indices]
+        keys = [keys[i] for i in missing_indices]
+        block_hashes = [block_hashes[i] for i in missing_indices]
+        group_indices = [group_indices[i] for i in missing_indices]
+
+        logger.debug(
+            "Storing KV cache for %d blocks (groups=%s) for request %s",
+            len(keys),
+            set(group_indices),
+            req_id,
+        )
+
+        addrs: list[list[int]] = []
+        sizes: list[list[int]] = []
+        stored_events: list[BlockStored] = []
+        # parent_block_hash chains live within a group, not across.
+        prev_key_per_group: dict[int, Any] = {}
+        new_block_hashes = [maybe_convert_block_hash(bh) for bh in block_hashes]
+
+        for idx, (s, e, g_idx) in enumerate(
+            zip(starts, ends, group_indices, strict=True)
+        ):
+            db = self.token_databases[g_idx]
+            addr, size, _ = db.prepare_value(s, e, block_ids_per_group[g_idx])
+            addrs.append(addr)
+            sizes.append(size)
+
+            if self.enable_kv_event:
+                token_ids = (
+                    req_meta.token_ids[s:e] if req_meta.token_ids is not None else None
+                )
+                stored_event = BlockStored(
+                    block_hashes=[new_block_hashes[idx]],
+                    parent_block_hash=prev_key_per_group.get(g_idx),
+                    token_ids=token_ids,
+                    block_size=req_meta.original_block_size,
+                    lora_id=None,
+                    medium="cpu",
+                    lora_name=None,
+                )
+                stored_events.append(stored_event)
+                prev_key_per_group[g_idx] = new_block_hashes[idx]
+
+        if current_event is not None:
+            current_event.synchronize()
+
+        batch_bytes = _sum_batch_bytes(sizes)
+        put_start = time.perf_counter()
+        try:
+            res = self.store.batch_put_from_multi_buffers(
+                keys,
+                addrs,
+                sizes,
+                self.replicate_config,
+            )
+            failed = [i for i, v in enumerate(res) if v < 0]
+            self._record_operation(
+                "save_put",
+                put_start,
+                len(keys),
+                num_bytes=batch_bytes,
+                status="partial_failure" if failed else "ok",
+                num_failed_keys=len(failed),
+            )
+            if failed:
+                failed_codes = set(res[i] for i in failed)
+                logger.warning(
+                    "batch_put failed: %d/%d keys failed "
+                    "(codes=%s, batch_bytes=%d, num_keys=%d), "
+                    "first_key=%s",
+                    len(failed),
+                    len(keys),
+                    failed_codes,
+                    batch_bytes,
+                    len(keys),
+                    keys[0] if keys else "N/A",
+                )
+                if (
+                    MOONCAKE_NO_AVAILABLE_HANDLE in failed_codes
+                    and not self._mark_request_skipped_for_pressure(req_id)
+                ):
+                    logger.warning(
+                        "Detected Mooncake CPU/disk offloading pressure "
+                        "(NO_AVAILABLE_HANDLE); skipping future store "
+                        "batches for request %s until a later store "
+                        "batch succeeds",
+                        req_id,
+                    )
+            elif self._clear_store_pressure():
+                logger.info(
+                    "Mooncake CPU/disk offloading pressure cleared after a "
+                    "successful store batch"
+                )
+        except Exception as e:
+            self._record_operation(
+                "save_put",
+                put_start,
+                len(keys),
+                num_bytes=batch_bytes,
+                status="error",
+                num_failed_keys=len(keys),
+            )
+            logger.error("Failed to put key %s, error: %s", keys, e)
+
+        if self.enable_kv_event and stored_events:
+            self.update_kv_event(stored_events)
+
+        self.dec_stored_request(req_id)
+        self.request_queue.task_done()
+
+
+class KVCacheStoreRecvingThread(KVTransferThread):
+    """Background thread for loading KV cache blocks from the store."""
+
+    def __init__(
+        self,
+        store: Any,
+        coord: MooncakeStoreCoordinator,
+        token_databases: list[ChunkedTokenDatabase],
+        block_size: int,
+        tp_rank: int,
+        ready_event: threading.Event,
+        disk_offload_buffer_budget_bytes: int | None = None,
+        record_operation: Callable[..., None] | None = None,
+    ):
+        super().__init__(
+            store,
+            token_databases,
+            block_size,
+            tp_rank,
+            ready_event,
+            name="KVCacheStoreRecvingThread",
+            record_operation=record_operation,
+        )
+        self.disk_offload_buffer_budget_bytes = disk_offload_buffer_budget_bytes
+        self.usable_disk_offload_buffer_budget_bytes = (
+            None
+            if disk_offload_buffer_budget_bytes is None
+            else _get_usable_disk_offload_buffer_budget_bytes(
+                disk_offload_buffer_budget_bytes
+            )
+        )
+        self.coord = coord
+
+    def _handle_request(self, req_meta: ReqMeta):
+        token_len = req_meta.load_spec.token_len  # type: ignore[union-attr]
+        req_id = req_meta.req_id
+        mask_num = (
+            req_meta.load_spec.vllm_cached_tokens  # type: ignore[union-attr]
+            // self.block_size
+            * self.block_size
+        )
+
+        # Skip chunks the consumer's per-group spec wouldn't populate
+        # locally (e.g. SWA pre-window) even if the producer stored them.
+        load_mask_per_group = self.coord.load_mask(req_meta.block_hashes, token_len)
+
+        addr_list: list[list[int]] = []
+        size_list: list[list[int]] = []
+        key_list: list[str] = []
+        for g_idx, db in enumerate(self.token_databases):
+            mask = load_mask_per_group[g_idx]
+            for start, end, key in db.process_tokens(
+                token_len, req_meta.block_hashes, mask_num
+            ):
+                chunk_idx = start // db.block_size
+                if chunk_idx >= len(mask) or not mask[chunk_idx]:
+                    continue
+                addr, size, _ = db.prepare_value(start, end, req_meta.block_ids[g_idx])
+                key_list.append(key.to_string())
+                addr_list.append(addr)
+                size_list.append(size)
+
+        # Rotate lists by tp_rank for load balancing
+        rotation = self.tp_rank % len(key_list)
+        key_list_c = key_list[rotation:] + key_list[:rotation]
+        addr_list_c = addr_list[rotation:] + addr_list[:rotation]
+        size_list_c = size_list[rotation:] + size_list[:rotation]
+
+        load_batches = [(key_list_c, addr_list_c, size_list_c)]
+        if self.usable_disk_offload_buffer_budget_bytes is not None:
+            total_staging_bytes = sum(
+                _estimate_disk_offload_staging_bytes(size) for size in size_list_c
+            )
+            if total_staging_bytes > self.usable_disk_offload_buffer_budget_bytes:
+                assert self.disk_offload_buffer_budget_bytes is not None
+                load_batches, oversized_key = _split_disk_offload_load_batches(
+                    key_list_c,
+                    addr_list_c,
+                    size_list_c,
+                    self.usable_disk_offload_buffer_budget_bytes,
+                    self.disk_offload_buffer_budget_bytes,
+                )
+                if oversized_key is not None:
+                    oversized_key_index = key_list_c.index(oversized_key)
+                    oversized_key_bytes = _estimate_disk_offload_staging_bytes(
+                        size_list_c[oversized_key_index]
+                    )
+                    logger.warning(
+                        "Skipping Mooncake load for request %s because key %s "
+                        "requires %d staging bytes, exceeding budget %d",
+                        req_id,
+                        oversized_key,
+                        oversized_key_bytes,
+                        self.disk_offload_buffer_budget_bytes,
+                    )
+                    self.set_finished_request(req_id)
+                    self.request_queue.task_done()
+                    return
+
+        current_batch_keys: list[str] = key_list_c
+        batch_bytes = 0
+        try:
+            for batch_keys, batch_addrs, batch_sizes in load_batches:
+                current_batch_keys = batch_keys
+                batch_bytes = _sum_batch_bytes(batch_sizes)
+                tiers_by_key: dict[str, str] | None = None
+                if envs.VLLM_MOONCAKE_STORE_TIER_LOG:
+                    tiers_by_key = _get_replica_tiers_by_key(self.store, batch_keys)
+                # Reset so the recorded RPC duration excludes tier lookup.
+                load_get_start = time.perf_counter()
+                res = self.store.batch_get_into_multi_buffers(
+                    batch_keys, batch_addrs, batch_sizes
+                )
+                if tiers_by_key is not None:
+                    _log_mooncake_load_tier_summary(
+                        req_id, batch_keys, res, tiers_by_key
+                    )
+                failed = [
+                    (key, value)
+                    for key, value in zip(batch_keys, res, strict=True)
+                    if value < 0
+                ]
+                self._record_operation(
+                    "load_get",
+                    load_get_start,
+                    len(batch_keys),
+                    num_bytes=batch_bytes,
+                    status="partial_failure" if failed else "ok",
+                    num_failed_keys=len(failed),
+                )
+                if failed:
+                    logger.warning(
+                        "Failed to get %d Mooncake keys from sub-batch "
+                        "(batch_keys=%d, first_failures=%s)",
+                        len(failed),
+                        len(batch_keys),
+                        failed[:3],
+                    )
+                    break
+        except Exception as e:
+            self._record_operation(
+                "load_get",
+                load_get_start,
+                len(current_batch_keys),
+                num_bytes=batch_bytes,
+                status="error",
+                num_failed_keys=len(current_batch_keys),
+            )
+            logger.warning(
+                "Failed to get Mooncake sub-batch %s, error: %s",
+                current_batch_keys[:3],
+                e,
+            )
+
+        self.set_finished_request(req_id)
+        self.request_queue.task_done()
+
+
+# ============================================================
+# Store Worker
+# ============================================================
+
+
+class MooncakeStoreWorker:
+    """Worker-side component for MooncakeStoreConnector."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        kv_cache_config: KVCacheConfig,
+    ):
+        try:
+            from mooncake.store import (  # type: ignore
+                MooncakeDistributedStore,
+                ReplicateConfig,
+            )
+        except ImportError as e:
+            raise ImportError(
+                "Please install mooncake by following the instructions at "
+                "https://github.com/kvcache-ai/Mooncake/blob/main/doc/"
+                "en/build.md to run vLLM with MooncakeStoreConnector."
+            ) from e
+
+        model_config = vllm_config.model_config
+        parallel_config = vllm_config.parallel_config
+
+        self.dp_rank = get_mooncake_dp_engine_index(parallel_config)
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.pp_size = parallel_config.pipeline_parallel_size
+        self.pp_rank = (parallel_config.rank // self.tp_size) % self.pp_size
+
+        self.pcp_size = get_pcp_group().world_size
+        self.pcp_rank = get_pcp_group().rank_in_group if self.pcp_size > 1 else 0
+        self.dcp_size = get_dcp_group().world_size
+        self.dcp_rank = get_dcp_group().rank_in_group if self.dcp_size > 1 else 0
+
+        assert vllm_config.kv_transfer_config is not None
+        self.kv_role = vllm_config.kv_transfer_config.kv_role
+        self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
+            "load_async", True
+        )
+        self.cache_config = vllm_config.cache_config
+        self.original_block_size = self.cache_config.block_size
+        self.block_size, self.hash_block_size = resolve_kv_cache_block_sizes(
+            kv_cache_config, vllm_config
+        )
+        self.num_layers = model_config.get_num_layers(parallel_config)
+
+        self.use_mla = False
+        if (
+            hasattr(model_config, "use_mla")
+            and isinstance(model_config.use_mla, bool)
+            and model_config.use_mla
+        ):
+            self.use_mla = True
+
+        if self.use_mla:
+            self.num_kv_head = 1
+        else:
+            self.num_kv_head = model_config.get_total_num_kv_heads()
+
+        if self.num_kv_head < self.tp_size:
+            self.put_step = self.tp_size // self.num_kv_head
+            self.head_or_tp_rank = self.tp_rank // self.put_step
+        else:
+            self.head_or_tp_rank = self.tp_rank
+            self.put_step = 1
+
+        self.metadata = KeyMetadata(
+            model_name=model_config.model.rstrip("/").split("/")[-1],
+            tp_rank=self.head_or_tp_rank,
+            pcp_rank=self.pcp_rank,
+            dcp_rank=self.dcp_rank,
+            pp_rank=self.pp_rank,
+        )
+
+        # Initialize MooncakeDistributedStore with its own TransferEngine
+        store_config = MooncakeStoreConfig.load_from_env()
+        extra_config = (
+            vllm_config.kv_transfer_config.kv_connector_extra_config
+            if vllm_config.kv_transfer_config
+            else {}
+        )
+        store_config.device_name = rdma_utils.get_configured_worker_rnic(
+            protocol=store_config.protocol,
+            configured_device=store_config.device_name,
+        )
+        self.store = MooncakeDistributedStore()
+        local_ip = get_ip()
+        local_hostname = rdma_utils.get_requester_local_hostname(local_ip)
+        ret = self.store.setup(
+            local_hostname,
+            store_config.metadata_server,
+            store_config.global_segment_size,
+            store_config.local_buffer_size,
+            store_config.protocol,
+            store_config.device_name,
+            store_config.master_server_address,
+        )
+        if ret != 0:
+            msg = "Initialize MooncakeDistributedStore failed."
+            logger.error(msg)
+            raise RuntimeError(msg)
+
+        preferred_segment = rdma_utils.get_configured_preferred_segment(extra_config)
+        self.preferred_segment = preferred_segment
+        self.store_replicate_config = ReplicateConfig()
+        if preferred_segment is not None:
+            self.store_replicate_config.preferred_segment = preferred_segment
+
+        logger.info(
+            "Mooncake mode=%s (global_segment_size=%d, local_buffer_size=%d, "
+            "preferred_segment=%s, enable_offload=%s)",
+            store_config.mode,
+            store_config.global_segment_size,
+            store_config.local_buffer_size,
+            preferred_segment or "<none>",
+            store_config.enable_offload,
+        )
+        if store_config.mode == "embedded":
+            if store_config.enable_offload and preferred_segment is None:
+                logger.warning(
+                    "enable_offload is set in embedded mode without "
+                    "preferred_segment; SSD tier will only see puts that "
+                    "happen to land on the owner segment."
+                )
+            if preferred_segment is not None:
+                logger.warning(
+                    "preferred_segment=%s with mode=embedded: rank-"
+                    "contributed segments will be idle.",
+                    preferred_segment,
+                )
+        elif (
+            store_config.mode == "standalone-store" and not store_config.enable_offload
+        ):
+            logger.warning(
+                "standalone-store mode without enable_offload: large prefills "
+                "may exceed the owner DirectIO budget."
+            )
+
+        self.disk_offload_buffer_budget_bytes = (
+            DEFAULT_MOONCAKE_DISK_STAGING_BUFFER_BYTES
+            if store_config.enable_offload
+            else None
+        )
+
+        # Start lookup server on rank 0 for scheduler-side prefix queries
+        self.lookup_server: LookupKeyServer | None = None
+        if vllm_config.parallel_config.rank == 0:
+            self.lookup_server = LookupKeyServer(self, vllm_config)
+
+        kv_event_config = vllm_config.kv_events_config
+        self.enable_kv_events = False
+        if kv_event_config and kv_event_config.enable_kv_cache_events:
+            self.enable_kv_events = True
+
+        self.kv_send_thread: KVCacheStoreSendingThread | None = None
+        self.kv_recv_thread: KVCacheStoreRecvingThread | None = None
+        self.finished_store_req: set[str] = set()
+        self._kv_connector_stats_lock = threading.Lock()
+        self.kv_connector_stats = MooncakeStoreConnectorStats()
+
+        self._kv_cache_config = kv_cache_config
+        # Single-group + PCP/DCP > 1: scale the lone group's spec.block_size to
+        # self.block_size (= scheduler_block_size) so the coordinator's
+        # ``block_size % hash_block_size == 0`` invariant holds.
+        groups = list(kv_cache_config.kv_cache_groups)
+        if len(groups) == 1 and groups[0].kv_cache_spec.block_size != self.block_size:
+            g = groups[0]
+            groups = [
+                dataclasses.replace(
+                    g,
+                    kv_cache_spec=dataclasses.replace(
+                        g.kv_cache_spec, block_size=self.block_size
+                    ),
+                )
+            ]
+        self._kv_cache_groups: list[KVCacheGroupSpec] = groups
+        spec_cfg = getattr(vllm_config, "speculative_config", None)
+        use_eagle = bool(
+            spec_cfg.use_eagle()
+            if spec_cfg is not None and callable(getattr(spec_cfg, "use_eagle", None))
+            else False
+        )
+        self.coord = MooncakeStoreCoordinator(
+            self._kv_cache_groups,
+            scheduler_block_size=self.block_size,
+            hash_block_size=self.hash_block_size,
+            use_eagle=use_eagle,
+        )
+        # One ChunkedTokenDatabase per group; addresses populated in
+        # register_kv_caches once the kv-cache layout is known.
+        self.token_dbs: list[ChunkedTokenDatabase] = [
+            ChunkedTokenDatabase(
+                dataclasses.replace(self.metadata, group_id=g_idx),
+                g.kv_cache_spec.block_size,
+                hash_block_size=self.hash_block_size,
+            )
+            for g_idx, g in enumerate(self._kv_cache_groups)
+        ]
+
+    def register_cross_layers_kv_caches(self, kv_cache: torch.Tensor) -> None:
+        """Register a cross-layers KV cache tensor.
+
+        Wraps the unified tensor in a single-entry dict so that the
+        existing stride-based logic in register_kv_caches() produces
+        the correct single-segment result (block_len = page_size * num_layers).
+        """
+        self.register_kv_caches({"__cross_layer__": kv_cache})
+
+    def register_kv_caches(
+        self,
+        kv_caches: dict[str, torch.Tensor | list[torch.Tensor]],
+    ) -> None:
+        """Register KV cache tensors and start transfer threads."""
+        if not kv_caches:
+            logger.warning("No KV caches to offload.")
+            return
+
+        # Resolve each entry to a representative tensor for storage
+        # deduplication. For attention layers the value is already a tensor;
+        # for Mamba layers it is a list of tensors that all share the same
+        # underlying raw storage, so we take the first one.
+        def _repr_tensor(v: torch.Tensor | list[torch.Tensor]) -> torch.Tensor:
+            assert isinstance(v, torch.Tensor | list)
+            return v if isinstance(v, torch.Tensor) else v[0]
+
+        assert self.cache_config.num_gpu_blocks is not None
+        self.num_blocks = self.cache_config.num_gpu_blocks
+
+        seen_ptrs: set[int] = set()
+        addrs: list[int] = []
+        block_lens: list[int] = []
+
+        for value in kv_caches.values():
+            cache = _repr_tensor(value)
+            cache_storage = cache.untyped_storage()
+            base_addr = cache_storage.data_ptr()
+            if base_addr in seen_ptrs:
+                continue
+            seen_ptrs.add(base_addr)
+            region_len = cache_storage.nbytes()
+
+            ret = self.store.register_buffer(base_addr, region_len)
+            if ret != 0:
+                logger.error(
+                    "register_buffer failed for addr %#x len %d: %d",
+                    base_addr,
+                    region_len,
+                    ret,
+                )
+
+            # Detect layout via stride: a dim whose byte-stride exceeds
+            # page_size_bytes is an outer segment dim (e.g. the K/V dim of
+            # FlashAttn's (2, num_blocks, ...)). FlashInfer/MLA's blocks-
+            # outermost layout has no such dim and yields a single segment.
+            el = cache.element_size()
+            page_size_bytes = region_len // self.num_blocks
+            outer_dims = [
+                d for d in range(cache.ndim) if cache.stride(d) * el > page_size_bytes
+            ]
+            if not outer_dims:
+                # Blocks-first layout (FlashInfer / MLA): one segment.
+                addrs.append(base_addr)
+                block_lens.append(page_size_bytes)
+            else:
+                # K/V-first layout (FlashAttn / ROCm): split segments.
+                seg_stride = cache.stride(outer_dims[0]) * el
+                for idx in range(cache.shape[outer_dims[0]]):
+                    addrs.append(base_addr + idx * seg_stride)
+                    block_lens.append(seg_stride // self.num_blocks)
+
+        logger.info(
+            "Registered KV caches: num_groups=%d, num_segments=%d, num_blocks=%d",
+            len(self.token_dbs),
+            len(addrs),
+            self.num_blocks,
+        )
+
+        for db in self.token_dbs:
+            db.set_kv_caches_base_addr(addrs)
+            db.set_block_len(block_lens)
+
+        # Start transfer threads
+        if self.kv_role in ["kv_producer", "kv_both"]:
+            ready_event_sending = threading.Event()
+            self.kv_send_thread = KVCacheStoreSendingThread(
+                self.store,
+                self.coord,
+                self.token_dbs,
+                self.block_size,
+                self.tp_rank,
+                self.put_step,
+                self.kv_role,
+                ready_event_sending,
+                self.enable_kv_events,
+                self.store_replicate_config,
+                record_operation=self._record_kv_connector_operation,
+            )
+            self.kv_send_thread.start()
+
+        ready_event_recving = threading.Event()
+        self.kv_recv_thread = KVCacheStoreRecvingThread(
+            self.store,
+            self.coord,
+            self.token_dbs,
+            self.block_size,
+            self.tp_rank,
+            ready_event_recving,
+            disk_offload_buffer_budget_bytes=self.disk_offload_buffer_budget_bytes,
+            record_operation=self._record_kv_connector_operation,
+        )
+        self.kv_recv_thread.start()
+        ready_event_recving.wait()
+
+    def start_load_kv(
+        self,
+        metadata: MooncakeStoreConnectorMetadata,
+    ):
+        """No-op: loads are issued in get_finished() for overlap."""
+        pass
+
+    def wait_for_save(
+        self,
+        metadata: MooncakeStoreConnectorMetadata,
+    ):
+        """No-op: stores are issued in get_finished() for overlap."""
+        pass
+
+    def get_finished(
+        self,
+        finished_req_ids: set[str],
+        meta: MooncakeStoreConnectorMetadata,
+    ) -> tuple[set[str], set[str]]:
+        """Issue all I/O and get completed send/recv request IDs.
+
+        All load and store I/O requests are issued here (after model
+        compute is launched on the compute stream) for better
+        compute-I/O overlap.
+        """
+        # Issue async loads
+        for request in meta.requests:
+            load_spec = request.load_spec
+            if load_spec is None or not load_spec.can_load:
+                continue
+
+            load_spec.token_len = load_spec.kvpool_cached_tokens
+
+            assert self.kv_recv_thread is not None
+            self.kv_recv_thread.add_request(request)
+
+        assert self.load_async, "load_async must be True for better performance."
+        # Issue stores with CUDA event synchronization
+        if self.kv_role in ["kv_producer", "kv_both"]:
+            current_event = None
+            for request in meta.requests:
+                if request.can_save:
+                    current_event = torch.cuda.Event()
+                    current_event.record()
+                    break
+
+            for request in meta.requests:
+                if not request.can_save:
+                    continue
+                request.current_event = current_event
+                assert self.kv_send_thread is not None
+                self.kv_send_thread.add_stored_request(request.req_id)
+                self.kv_send_thread.add_request(request)
+
+        # Check completion of previously queued transfers
+        done_sending = (
+            self._get_and_clear_finished_sending(finished_req_ids, meta)
+            if self.kv_role in ["kv_producer", "kv_both"]
+            else set()
+        )
+
+        done_recving = (
+            self.kv_recv_thread.get_and_clear_finished_requests()
+            if self.load_async and self.kv_recv_thread is not None
+            else set()
+        )
+
+        logger.debug(
+            "Completed send: %d, recv: %d, tp_rank: %d",
+            len(done_sending),
+            len(done_recving),
+            self.tp_rank,
+        )
+        return done_sending, done_recving
+
+    def _record_kv_connector_operation(
+        self,
+        operation: str,
+        duration_seconds: float,
+        num_keys: int,
+        *,
+        num_bytes: int = 0,
+        status: str = "ok",
+        num_failed_keys: int = 0,
+    ) -> None:
+        with self._kv_connector_stats_lock:
+            self.kv_connector_stats.record_operation(
+                operation=operation,
+                duration_seconds=duration_seconds,
+                num_keys=num_keys,
+                num_bytes=num_bytes,
+                status=status,
+                num_failed_keys=num_failed_keys,
+            )
+
+    def get_kv_connector_stats(self) -> MooncakeStoreConnectorStats | None:
+        with self._kv_connector_stats_lock:
+            if self.kv_connector_stats.is_empty():
+                return None
+            kv_connector_stats = self.kv_connector_stats
+            self.kv_connector_stats = MooncakeStoreConnectorStats()
+            return kv_connector_stats
+
+    def _get_and_clear_finished_sending(
+        self,
+        finished_req_ids: set[str],
+        meta: MooncakeStoreConnectorMetadata,
+    ) -> set[str]:
+        assert self.kv_send_thread is not None
+        finished_sending: set[str] = set()
+
+        for req_id in meta.preempted_req_ids:
+            self.kv_send_thread.delete_finished_stored_request(req_id)
+
+        for req_id in self.kv_send_thread.stored_requests.copy():
+            if (
+                self.kv_send_thread.stored_requests[req_id] == 0
+                and req_id in self.finished_store_req
+            ):
+                self.finished_store_req.remove(req_id)
+                finished_sending.add(req_id)
+                self.kv_send_thread.delete_finished_stored_request(req_id)
+
+        for req_id in finished_req_ids:
+            req_remain_jobs = self.kv_send_thread.stored_requests.get(req_id)
+            if req_remain_jobs == 0:
+                finished_sending.add(req_id)
+                self.kv_send_thread.delete_finished_stored_request(req_id)
+            elif req_remain_jobs is not None:
+                self.finished_store_req.add(req_id)
+
+        return finished_sending
+
+    def lookup(self, token_len: int, block_hashes: list[BlockHash]) -> int:
+        """Check how many prefix tokens exist in the store.
+
+        Checks across all TP ranks and PP ranks.
+        """
+        if not block_hashes or token_len <= 0:
+            return 0
+
+        # Build per-(group, hash) candidate keys expanded across TP/PP.
+        # candidate_meta[i] is the (group_id, hash_bytes) for candidate_keys[i].
+        candidate_keys: list[str] = []
+        candidate_meta: list[tuple[int, bytes]] = []
+        tp_count = min(self.tp_size, self.num_kv_head)
+        for g_idx, db in enumerate(self.token_dbs):
+            spec_block_size = db.block_size
+            group_hashes = self.coord.block_hashes_for_spec(
+                block_hashes, self._kv_cache_groups[g_idx].kv_cache_spec
+            )
+            for chunk_id, h in enumerate(group_hashes):
+                start_idx = chunk_id * spec_block_size
+                if start_idx >= token_len:
+                    break
+                for tp in range(tp_count):
+                    for pp in range(self.pp_size):
+                        md = dataclasses.replace(db.metadata, tp_rank=tp, pp_rank=pp)
+                        candidate_keys.append(PoolKey(md, h.hex()).to_string())
+                        candidate_meta.append((g_idx, bytes(h)))
+
+        if not candidate_keys:
+            return 0
+
+        lookup_start = time.perf_counter()
+        try:
+            res = self.store.batch_is_exist(candidate_keys)
+            self._record_kv_connector_operation(
+                "lookup_exists",
+                time.perf_counter() - lookup_start,
+                len(candidate_keys),
+            )
+        except Exception as e:
+            self._record_kv_connector_operation(
+                "lookup_exists",
+                time.perf_counter() - lookup_start,
+                len(candidate_keys),
+                status="error",
+                num_failed_keys=len(candidate_keys),
+            )
+            logger.error("Remote connection failed in lookup: %s", e)
+            return 0
+
+        # A (group, hash) is "present" only when every TP*PP rank has it.
+        expected_per_key = max(1, tp_count * self.pp_size)
+        present_count: dict[tuple[int, bytes], int] = {}
+        for gh, exists in zip(candidate_meta, res, strict=True):
+            if exists == 1:
+                present_count[gh] = present_count.get(gh, 0) + 1
+        exists_set = {gh for gh, c in present_count.items() if c >= expected_per_key}
+
+        _masks, hit_length = self.coord.find_longest_cache_hit(
+            block_hashes, token_len, ExternalCachedBlockPool(exists_set)
+        )
+        return hit_length
+
+    def get_kv_events(self) -> list[BlockStored]:
+        if self.enable_kv_events and self.kv_send_thread is not None:
+            return self.kv_send_thread.get_kv_events()
+        return []
+
+
+# ============================================================
+# Lookup Key Server
+# ============================================================
+
+
+class LookupKeyServer:
+    """ZMQ server on worker rank 0 for handling prefix lookup queries."""
+
+    def __init__(
+        self,
+        store_worker: MooncakeStoreWorker,
+        vllm_config: VllmConfig,
+    ):
+        self.decoder = MsgpackDecoder()
+        self.ctx = zmq.Context()  # type: ignore[attr-defined]
+        socket_path = get_zmq_rpc_path_lookup(vllm_config)
+        self._ipc_path = socket_path.removeprefix("ipc://")
+        if os.path.exists(self._ipc_path):
+            os.unlink(self._ipc_path)
+        self.socket = make_zmq_socket(
+            self.ctx,
+            socket_path,
+            zmq.REP,  # type: ignore[attr-defined]
+            bind=True,
+        )
+
+        self.store_worker = store_worker
+        self.running = True
+
+        def process_request():
+            while self.running:
+                all_frames = self.socket.recv_multipart(copy=False)
+                token_len = int.from_bytes(all_frames[0], byteorder="big")
+                hash_frames = all_frames[1:]
+                hashes_str = self.decoder.decode(hash_frames)
+                block_hashes = [BlockHash(bytes.fromhex(s)) for s in hashes_str]
+                result = self.store_worker.lookup(token_len, block_hashes)
+                response = result.to_bytes(4, "big")
+                self.socket.send(response)
+
+        self.thread = threading.Thread(target=process_request, daemon=True)
+        self.thread.start()
+
+    def close(self):
+        self.socket.close(linger=0)
+        if os.path.exists(self._ipc_path):
+            os.unlink(self._ipc_path)
+
+
+# ============================================================
+# Lookup Key Client
+# ============================================================
+
+
+class LookupKeyClient:
+    """ZMQ client for querying prefix cache hits from worker."""
+
+    def __init__(self, vllm_config: VllmConfig):
+        self.encoder = MsgpackEncoder()
+        self.ctx = zmq.Context()  # type: ignore[attr-defined]
+        socket_path = get_zmq_rpc_path_lookup(vllm_config)
+        self.socket = make_zmq_socket(
+            self.ctx,
+            socket_path,
+            zmq.REQ,  # type: ignore[attr-defined]
+            bind=False,
+        )
+
+    def lookup(self, token_len: int, block_hashes: list[BlockHash]) -> int:
+        hash_strs = [h.hex() for h in block_hashes]
+        hash_frames = self.encoder.encode(hash_strs)
+        token_len_bytes = token_len.to_bytes(4, byteorder="big")
+        all_frames = [token_len_bytes] + list(hash_frames)
+        self.socket.send_multipart(all_frames, copy=False)
+        resp = self.socket.recv()
+        result = int.from_bytes(resp, "big")
+        return result
+
+    def close(self):
+        self.socket.close(linger=0)
+
+
+def get_zmq_rpc_path_lookup(vllm_config: VllmConfig) -> str:
+    """Construct IPC path for ZMQ lookup socket."""
+    assert vllm_config.kv_transfer_config is not None
+    dp_rank = get_mooncake_dp_engine_index(vllm_config.parallel_config)
+    base_url = envs.VLLM_RPC_BASE_PATH
+    rpc_port = 0
+    hostname = socket.gethostname()
+    extra_config = vllm_config.kv_transfer_config.kv_connector_extra_config
+    if "lookup_rpc_port" in extra_config:
+        rpc_port = extra_config["lookup_rpc_port"]
+    logger.debug("Base URL: %s, RPC Port: %s", base_url, rpc_port)
+    return (
+        f"ipc://{base_url}/lookup_rpc_port_{rpc_port}_host_{hostname}_dp_rank{dp_rank}"
+    )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_common.py b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_common.py
index f3b2ce3b5bec..2733b2e0a878 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_common.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_common.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING, Any
 
 import msgspec
+import regex as re
 import torch
 import zmq
 
@@ -188,6 +189,7 @@ class MoRIIOConfig:
     dp_rank: int
     dp_size: int
     tp_size: int
+    backend: str = "rdma"
 
     @classmethod
     def from_vllm_config(cls, vllm_config: VllmConfig) -> "MoRIIOConfig":
@@ -212,6 +214,12 @@ def from_vllm_config(cls, vllm_config: VllmConfig) -> "MoRIIOConfig":
         dp_size = vllm_config.parallel_config.data_parallel_size
         tp_size = get_tensor_model_parallel_world_size()
         port_offset = get_port_offset(dp_rank, tp_rank)
+        backend = str(extra_config.get("backend", "rdma")).lower()
+        if backend not in ("rdma", "xgmi"):
+            raise ValueError(
+                f"Invalid MoRIIO backend {backend!r} in kv_connector_extra_config; "
+                "must be one of 'rdma' or 'xgmi'."
+            )
 
         return cls(
             local_ip=get_ip(),
@@ -226,6 +234,7 @@ def from_vllm_config(cls, vllm_config: VllmConfig) -> "MoRIIOConfig":
             dp_rank=dp_rank,
             dp_size=dp_size,
             tp_size=tp_size,
+            backend=backend,
         )
 
 
@@ -239,7 +248,7 @@ class MoRIIOConstants:
     COMPLETION_PREFIX = "cmpl"
     TRANSFER_PREFIX = "tx"
 
-    PING_INTERVAL = 5
+    PING_INTERVAL = 3
     MAX_PING_RETRIES = 100
     DEFAULT_HANDSHAKE_PORT = "6301"
     DEFAULT_NOTIFY_PORT = "61005"
@@ -247,6 +256,64 @@ class MoRIIOConstants:
     VLLM_MORI_READ_ABORT_REQUEST_TIMEOUT = 3600
 
 
+# The router embeds both zmq_addresses in the request_id (similar to P2pNcclConnector):
+#   "___prefill_addr_{zmq}___decode_addr_{zmq}_{32-hex-uuid}"
+# MoRIIO zmq_address format: "host:IP,handshake:PORT,notify:PORT"
+#
+# This lets each connector side parse the peer's connection info without
+# requiring the router to pass it explicitly in kv_transfer_params.
+_PREFILL_ZMQ_RE = re.compile(r"___prefill_addr_(.+?)___decode_addr_")
+# vLLM wraps the router's X-Request-Id as "cmpl-<id>-<seq>-<hex>" so there may
+# be a trailing "-<seq>-<hex>" suffix after the 32-char UUID.  Allow it.
+_DECODE_ZMQ_RE = re.compile(r"___decode_addr_(.+)_[0-9a-f]{32}(?:-.*)?$")
+
+
+def parse_moriio_zmq_address(
+    zmq_address: str,
+) -> tuple[str, int, int]:
+    """Parse the MoRI-IO zmq address into its components.
+
+    Parses ``"host:IP,handshake:PORT,notify:PORT"`` into
+        (host, handshake_port, notify_port).
+
+    Each key-value pair is split on the *first* colon so that IPv6 addresses
+    (e.g. ``host:::1``) are handled correctly.  Raises ``ValueError`` if any
+    of ``host``, ``handshake``, or ``notify`` keys are absent or if the port
+    values are non-numeric.
+    """
+    parts: dict[str, str] = {}
+    for segment in zmq_address.split(","):
+        key, _, val = segment.partition(":")
+        parts[key.strip()] = val.strip()
+    try:
+        host = parts["host"]
+        handshake_port = int(parts["handshake"])
+        notify_port = int(parts["notify"])
+    except (KeyError, ValueError) as e:
+        raise ValueError(
+            f"Malformed zmq_address {zmq_address!r}: expected "
+            f"'host:IP,handshake:PORT,notify:PORT' format"
+        ) from e
+    return host, handshake_port, notify_port
+
+
+def get_peer_zmq_from_request_id(request_id: str, is_producer: bool) -> str:
+    """Extract the *peer's* zmq_address from the vLLM router request_id.
+
+    The producer (prefill) needs the decode's address; the consumer (decode)
+    needs the prefill's address.
+    """
+    if is_producer:
+        m = _DECODE_ZMQ_RE.search(request_id)
+    else:
+        m = _PREFILL_ZMQ_RE.search(request_id)
+    if m is None:
+        raise ValueError(
+            f"Cannot parse peer zmq_address from request_id: {request_id!r}"
+        )
+    return m.group(1)
+
+
 @dataclass
 class ReqMeta:
     """Metadata for a single request."""
@@ -286,15 +353,23 @@ def add_new_req(
         write_mode=False,
     ):
         transfer_id = kv_transfer_params["transfer_id"]
+
+        # Parse host/ports from the request_id. The router embeds both zmq_addresses
+        # in the request_id
+        peer_zmq = get_peer_zmq_from_request_id(request_id, is_producer=write_mode)
+        remote_host, remote_handshake_port, remote_notify_port = (
+            parse_moriio_zmq_address(peer_zmq)
+        )
+
         _req = ReqMeta(
             transfer_id=transfer_id,
             local_block_ids=local_block_ids,
             remote_block_ids=kv_transfer_params["remote_block_ids"],
             remote_engine_id=kv_transfer_params["remote_engine_id"],
-            remote_host=kv_transfer_params["remote_host"],
-            remote_port=kv_transfer_params["remote_port"],
-            remote_handshake_port=kv_transfer_params["remote_handshake_port"],
-            remote_notify_port=kv_transfer_params["remote_notify_port"],
+            remote_host=remote_host,
+            remote_port=remote_handshake_port,
+            remote_handshake_port=remote_handshake_port,
+            remote_notify_port=remote_notify_port,
             tp_size=kv_transfer_params.get("tp_size", 1),
             remote_dp_size=kv_transfer_params.get("remote_dp_size", 1),
         )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_connector.py
index dcde7665f344..804103275ae3 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_connector.py
@@ -35,8 +35,10 @@
     TransferId,
     WriteTask,
     get_moriio_mode,
+    get_peer_zmq_from_request_id,
     get_port_offset,
     get_role,
+    parse_moriio_zmq_address,
     set_role,
     zmq_ctx,
 )
@@ -91,9 +93,9 @@ def __init__(
         self,
         vllm_config: VllmConfig,
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
-        super().__init__(vllm_config, role)
+        super().__init__(vllm_config, role, kv_cache_config)
         assert vllm_config.kv_transfer_config is not None, (
             "kv_transfer_config must be set for MoRIIOConnector"
         )
@@ -379,13 +381,12 @@ def update_state_after_alloc(
         if params is not None and params.get("do_remote_prefill"):
             if self.mode == MoRIIOMode.READ:
                 if remote_block_ids := params.get("remote_block_ids"):
-                    if all(
-                        p in params
-                        for p in ("remote_engine_id", "remote_host", "remote_port")
-                    ):
-                        # If remote_blocks and num_external_tokens = 0, we
+                    # remote_engine_id is returned by the prefill's request_finished.
+                    # host/ports come from the request_id (parsed in add_new_req).
+                    if "remote_engine_id" in params:
+                        # If remote_blocks and num_external_tokens = 0, we have
                         # a full prefix cache hit on the D worker. We need to call
-                        # send_notif in _read_blocks to free the memory on the P.
+                        # send_notify in _read_blocks to free the memory on the P.
 
                         # Get unhashed blocks to pull from remote.
                         local_block_ids = blocks.get_block_ids()[0]
@@ -407,22 +408,30 @@ def update_state_after_alloc(
                         )
 
             else:
+                # WRITE mode: prefill scheduler notifies the decode side that
+                # blocks are ready.  Parse the decode's host/notify_port from
+                # the request_id
                 assert request.kv_transfer_params is not None, (
                     "kv_transfer_params should not be None"
                 )
 
                 remote_dp_rank = request.kv_transfer_params.get("remote_dp_rank", 0)
 
+                peer_zmq = get_peer_zmq_from_request_id(
+                    request.request_id, is_producer=True
+                )
+                remote_host, _, remote_notify_port = parse_moriio_zmq_address(peer_zmq)
+
                 for tp_index in range(self.tp_size):
-                    target_port = request.kv_transfer_params[
-                        "remote_notify_port"
-                    ] + get_port_offset(remote_dp_rank, tp_index)
+                    target_port = remote_notify_port + get_port_offset(
+                        remote_dp_rank, tp_index
+                    )
 
                     self.send_notify_block(
                         req_id=request.request_id,
                         transfer_id=request.kv_transfer_params["transfer_id"],
                         block_notify_list=blocks.get_block_ids()[0],
-                        host=params.get("remote_host"),
+                        host=remote_host,
                         port=target_port,
                     )
 
@@ -584,15 +593,15 @@ def request_finished(
                 + MoRIIOConstants.VLLM_MORI_READ_ABORT_REQUEST_TIMEOUT
             )
 
-        # If we execute in P-D serial mode, no notification port is needed.
+        # Return KV transfer params forwarded verbatim to the decode instance by
+        # the router.
         return delay_free_blocks, dict(
             do_remote_prefill=True,
             do_remote_decode=False,
             remote_block_ids=computed_block_ids,
             remote_engine_id=self.engine_id,
-            remote_host=self.host_ip,
-            remote_port=self.handshake_port,
             tp_size=self.vllm_config.parallel_config.tensor_parallel_size,
+            transfer_id=params["transfer_id"],
         )
 
 
@@ -686,7 +695,12 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         # Agent.
         self.moriio_wrapper = MoRIIOWrapper(tp_rank=self.tp_rank, dp_rank=self.dp_rank)
         self.moriio_wrapper.set_moriio_engine(self.moriio_engine)
-        self.moriio_wrapper.set_backend_type(BackendType.RDMA)
+        backend = (
+            BackendType.XGMI
+            if self.moriio_config.backend == "xgmi"
+            else BackendType.RDMA
+        )
+        self.moriio_wrapper.set_backend_type(backend)
         self.moriio_wrapper.notify_port = self.moriio_config.notify_port
         self.local_kv_cache_metadata: list[bytes] = []
         self.local_kv_cache_size: list[int] = []
@@ -846,7 +860,15 @@ def _get_built_session(self, remote_engine_id):
         ]
 
     def _ping(self, zmq_context):
-        http_request_address = f"http://{self.request_address}/v1/completions"
+        # Use host:port format for http_address (compatible with official router)
+        http_address = f"{self.request_address}"
+        # Include host so the router embeds it in the request_id; the connector
+        # on the other side parses host/ports from there.
+        zmq_address = (
+            f"host:{self.local_ip},"
+            f"handshake:{self.handshake_port},"
+            f"notify:{self.notify_port}"
+        )
         role = "P" if self.is_producer else "D"
 
         retry_count = 0
@@ -857,14 +879,17 @@ def _ping(self, zmq_context):
             while True:
                 try:
                     data = {
-                        "type": "register",
-                        "role": role,
-                        "index": str(index),
-                        "request_address": http_request_address,
-                        "handshake_port": self.handshake_port,
-                        "notify_port": self.notify_port,
+                        "type": role,  # "P" or "D"
+                        "http_address": http_address,
+                        "zmq_address": zmq_address,
+                        # dp_size/tp_size are not used by the official vLLM router
+                        # (routing operates at the http_address level); they are
+                        # consumed only by the toy proxy server.
                         "dp_size": self.moriio_config.dp_size,
                         "tp_size": self.moriio_config.tp_size,
+                        # transfer_mode is included so the router can distinguish
+                        # READ (prefill-then-decode, sequential) from WRITE (concurrent)
+                        # scheduling.
                         "transfer_mode": self.mode.name,
                     }
 
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_engine.py b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_engine.py
index 973c0bb801c8..78c8d4860c1b 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_engine.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/moriio/moriio_engine.py
@@ -44,11 +44,13 @@
 logger = init_logger(__name__)
 try:
     from mori.io import (
+        BackendType,
         EngineDesc,
         IOEngine,
         MemoryDesc,
         PollCqMode,
         RdmaBackendConfig,
+        XgmiBackendConfig,
     )
 
     logger.info("MoRIIO is available")
@@ -376,17 +378,24 @@ def set_moriio_engine(self, moriio_engine):
 
     def set_backend_type(self, backend_type):
         assert self.moriio_engine is not None, "MoRIIO engine must be set first"
-        qp_per_transfer = envs.VLLM_MORIIO_QP_PER_TRANSFER
-        post_batch_size = envs.VLLM_MORIIO_POST_BATCH_SIZE
-        num_worker_threads = envs.VLLM_MORIIO_NUM_WORKERS
-        poll_mode = PollCqMode.POLLING
-        rdma_cfg = RdmaBackendConfig(
-            qp_per_transfer,
-            post_batch_size,
-            num_worker_threads,
-            poll_mode,
-        )
-        self.moriio_engine.create_backend(backend_type, rdma_cfg)
+        if backend_type == BackendType.XGMI:
+            logger.info("Using MoRIIO backend: XGMI")
+            self.moriio_engine.create_backend(backend_type, XgmiBackendConfig())
+        else:
+            logger.info(
+                "Using MoRIIO backend: RDMA "
+                "(qp_per_transfer=%d, post_batch_size=%d, num_workers=%d)",
+                envs.VLLM_MORIIO_QP_PER_TRANSFER,
+                envs.VLLM_MORIIO_POST_BATCH_SIZE,
+                envs.VLLM_MORIIO_NUM_WORKERS,
+            )
+            rdma_cfg = RdmaBackendConfig(
+                envs.VLLM_MORIIO_QP_PER_TRANSFER,
+                envs.VLLM_MORIIO_POST_BATCH_SIZE,
+                envs.VLLM_MORIIO_NUM_WORKERS,
+                PollCqMode.POLLING,
+            )
+            self.moriio_engine.create_backend(backend_type, rdma_cfg)
 
     def get_agent_metadata(self):
         assert self.moriio_engine is not None, "MoRIIO engine must be set first"
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
index 3888d2e0f44c..8f01c3f84f96 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy
-from collections.abc import Iterable
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 import torch
 
@@ -18,6 +18,8 @@
     KVConnectorMetadata,
     KVConnectorRole,
     KVConnectorWorkerMetadata,
+    SupportsHMA,
+    supports_hma,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
     KVConnectorPromMetrics,
@@ -33,6 +35,7 @@
 if TYPE_CHECKING:
     from vllm.distributed.kv_events import KVCacheEvent
     from vllm.forward_context import ForwardContext
+    from vllm.v1.core.block_pool import BlockPool
     from vllm.v1.core.kv_cache_manager import KVCacheBlocks
     from vllm.v1.kv_cache_interface import KVCacheConfig
     from vllm.v1.request import Request
@@ -123,7 +126,7 @@ def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
             self._prom_metrics[connector_id].observe(stats_data["data"], engine_idx)
 
 
-class MultiConnector(KVConnectorBase_V1):
+class MultiConnector(KVConnectorBase_V1, SupportsHMA):
     """
     A wrapper for using multiple KVConnectors at the same time.
 
@@ -166,6 +169,12 @@ def __init__(
             self._connectors.append(connector_cls(temp_config, role, kv_cache_config))
             self._ktc_kv_transfer_config.append(temp_config.kv_transfer_config)
 
+        self._all_support_hma = all(supports_hma(c) for c in self._connectors)
+        assert (
+            vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
+            or self._all_support_hma
+        ), "HMA should not be enabled unless all sub-connectors support it"
+
         # A mapping from request id to the index of the connector chosen to
         # load the request from (if any).
         self._requests_to_connector: dict[str, int] = {}
@@ -219,6 +228,10 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         for c in self._connectors:
             c.register_kv_caches(kv_caches)
 
+    def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
+        for c in self._connectors:
+            c.bind_gpu_block_pool(gpu_block_pool)
+
     # We must override the base class method here because we need to bind
     # the metadata to each connector in the order of the connectors in the
     # MultiKVConnectorMetadata.
@@ -381,6 +394,10 @@ def update_state_after_alloc(
                 # Call with empty blocks for other connectors.
                 c.update_state_after_alloc(request, empty_blocks, 0)
 
+    def on_new_request(self, request: "Request") -> None:
+        for c in self._connectors:
+            c.on_new_request(request)
+
     def build_connector_meta(
         self, scheduler_output: SchedulerOutput
     ) -> MultiKVConnectorMetadata:
@@ -436,15 +453,17 @@ def set_xfer_handshake_metadata(
         for c in self._connectors:
             c.set_xfer_handshake_metadata(metadata)
 
-    def request_finished(
+    def _aggregate_request_finished(
         self,
         request: "Request",
-        blocks: list[int],
+        per_connector_fn: Callable[
+            [KVConnectorBase_V1], tuple[bool, dict[str, Any] | None]
+        ],
     ) -> tuple[bool, dict[str, Any] | None]:
         async_saves = 0
         kv_txfer_params = None
         for c in self._connectors:
-            async_save, txfer_params = c.request_finished(request, blocks)
+            async_save, txfer_params = per_connector_fn(c)
             if async_save:
                 async_saves += 1
             if txfer_params is not None:
@@ -458,11 +477,39 @@ def request_finished(
         if async_saves > 1:
             self._extra_async_saves[request.request_id] = async_saves - 1
 
-        # Clean up other state for this request.
         self._requests_to_connector.pop(request.request_id, None)
 
         return async_saves > 0, kv_txfer_params
 
+    def request_finished(
+        self,
+        request: "Request",
+        blocks: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        return self._aggregate_request_finished(
+            request,
+            lambda c: c.request_finished(request, blocks),
+        )
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        if not self._all_support_hma:
+            assert len(block_ids) == 1, (
+                "HMA with multiple kv_cache_groups requires all "
+                "sub-connectors to support HMA"
+            )
+            return self.request_finished(request, block_ids[0])
+
+        return self._aggregate_request_finished(
+            request,
+            lambda c: cast(SupportsHMA, c).request_finished_all_groups(
+                request, block_ids
+            ),
+        )
+
     def take_events(self) -> Iterable["KVCacheEvent"]:
         for c in self._connectors:
             yield from c.take_events()
@@ -548,7 +595,13 @@ def get_kv_connector_stats(self) -> MultiKVConnectorStats | None:
             if stats_by_connector is None:
                 # Lazy init to allow optional return value.
                 stats_by_connector = MultiKVConnectorStats()
-            stats_by_connector[c.__class__.__name__] = stats
+            connector_id = c.__class__.__name__
+            if connector_id in stats_by_connector.data:
+                stats_by_connector[connector_id] = stats_by_connector[
+                    connector_id
+                ].aggregate(stats)
+            else:
+                stats_by_connector[connector_id] = stats
         return stats_by_connector
 
     @classmethod
@@ -560,9 +613,13 @@ def build_prom_metrics(
         per_engine_labelvalues: dict[int, list[object]],
     ) -> KVConnectorPromMetrics:
         prom_metrics: dict[str, KVConnectorPromMetrics] = {}
+        seen_classes: set[type] = set()
         for connector_cls, temp_config in cls._get_connector_classes_and_configs(
             vllm_config
         ):
+            if connector_cls in seen_classes:
+                continue
+            seen_classes.add(connector_cls)
             connector_prom = connector_cls.build_prom_metrics(
                 temp_config, metric_types, labelnames, per_engine_labelvalues
             )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/__init__.py
new file mode 100644
index 000000000000..ed5c892fb9df
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/__init__.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NIXL KV-cache transfer connector (disaggregated prefill / decode)."""
+
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.connector import (
+    NixlConnector,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
+    NixlAgentMetadata,
+    NixlConnectorMetadata,
+    NixlHandshakePayload,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler import (
+    NixlConnectorScheduler,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.stats import (
+    NixlKVConnectorStats,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+    NixlConnectorWorker,
+)
+
+__all__ = [
+    "NixlAgentMetadata",
+    "NixlConnector",
+    "NixlConnectorMetadata",
+    "NixlConnectorScheduler",
+    "NixlConnectorWorker",
+    "NixlHandshakePayload",
+    "NixlKVConnectorStats",
+]
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/connector.py
new file mode 100644
index 000000000000..187322b4ae4e
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/connector.py
@@ -0,0 +1,292 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NixlConnector – thin facade that delegates to scheduler / worker."""
+
+from typing import TYPE_CHECKING, Any
+
+import torch
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.utils import (
+    EngineId,
+    get_current_attn_backend,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    CopyBlocksOp,
+    KVConnectorBase_V1,
+    KVConnectorHandshakeMetadata,
+    KVConnectorMetadata,
+    KVConnectorRole,
+    SupportsHMA,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorPromMetrics,
+    KVConnectorStats,
+    PromMetric,
+    PromMetricT,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
+    NixlConnectorMetadata,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.scheduler import (
+    NixlConnectorScheduler,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.stats import (
+    NixlKVConnectorStats,
+    NixlPromMetrics,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.worker import (
+    NixlConnectorWorker,
+)
+from vllm.forward_context import ForwardContext
+from vllm.logger import init_logger
+from vllm.v1.attention.backend import AttentionBackend, AttentionMetadata
+from vllm.v1.attention.backends.utils import get_kv_cache_layout
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import MambaSpec
+from vllm.v1.outputs import KVConnectorOutput
+
+if TYPE_CHECKING:
+    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+    from vllm.v1.request import Request
+
+logger = init_logger(__name__)
+
+
+class NixlConnector(KVConnectorBase_V1, SupportsHMA):
+    @property
+    def prefer_cross_layer_blocks(self) -> bool:
+        if any(
+            [
+                isinstance(group.kv_cache_spec, MambaSpec)
+                for group in self.kv_cache_config.kv_cache_groups
+            ]
+        ):
+            # Hybrid SSM models do not yet support cross-layer layout
+            return False
+
+        backend = get_current_attn_backend(self._vllm_config)
+        if backend.get_name() not in (
+            "FLASH_ATTN",
+            "FLASHINFER",
+            "TRITON_ATTN",
+        ):
+            return False
+
+        # For now there is no benefit to run cross layers when backend
+        # does not support on HND
+        if get_kv_cache_layout() != "HND":
+            return False
+
+        extra_config = self.kv_transfer_config.kv_connector_extra_config
+        return (
+            str(extra_config.get("enable_cross_layers_blocks", "False")).lower()
+            == "true"
+        )
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        role: KVConnectorRole,
+        kv_cache_config: "KVCacheConfig",
+    ):
+        super().__init__(vllm_config, role, kv_cache_config)
+        assert vllm_config.kv_transfer_config is not None
+        assert vllm_config.kv_transfer_config.engine_id is not None
+        self.kv_cache_config = kv_cache_config
+        self.engine_id: EngineId = vllm_config.kv_transfer_config.engine_id
+        self.kv_transfer_config = vllm_config.kv_transfer_config
+        if role == KVConnectorRole.SCHEDULER:
+            self.connector_scheduler: NixlConnectorScheduler | None = (
+                NixlConnectorScheduler(vllm_config, self.engine_id, kv_cache_config)
+            )
+            self.connector_worker: NixlConnectorWorker | None = None
+        elif role == KVConnectorRole.WORKER:
+            self.connector_scheduler = None
+            self.connector_worker = NixlConnectorWorker(
+                vllm_config, self.engine_id, kv_cache_config
+            )
+
+    ############################################################
+    # Class Methods
+    ############################################################
+    @classmethod
+    def get_required_kvcache_layout(cls, vllm_config: VllmConfig):
+        if vllm_config.model_config is None:
+            logger.warning_once(
+                "Unable to detect current VLLM config. "
+                "Fallback to default kv cache layout."
+            )
+            return None
+        use_mla = vllm_config.model_config.use_mla
+        if use_mla:
+            # return None when we have mla
+            # as the layout should not matter in that case,
+            # which fallback to the default behavior.
+            return None
+        logger.info_once(
+            "NixlConnector setting KV cache layout to HND for better xfer performance."
+        )
+        return "HND"
+
+    ############################################################
+    # Scheduler Side Methods
+    ############################################################
+
+    def get_num_new_matched_tokens(
+        self, request: "Request", num_computed_tokens: int
+    ) -> tuple[int | None, bool]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.get_num_new_matched_tokens(
+            request, num_computed_tokens
+        )
+
+    def update_state_after_alloc(
+        self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int
+    ):
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.update_state_after_alloc(
+            request, blocks, num_external_tokens
+        )
+
+    def build_connector_meta(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> KVConnectorMetadata:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.build_connector_meta(scheduler_output)
+
+    def on_new_request(self, request: "Request") -> None:
+        assert self.connector_scheduler is not None
+        self.connector_scheduler.on_new_request(request)
+
+    def update_connector_output(self, connector_output: KVConnectorOutput):
+        assert self.connector_scheduler is not None
+        self.connector_scheduler.update_connector_output(connector_output)
+
+    def request_finished(
+        self,
+        request: "Request",
+        block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.request_finished(request, (block_ids,))
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.request_finished(request, block_ids)
+
+    def set_xfer_handshake_metadata(
+        self, metadata: dict[int, KVConnectorHandshakeMetadata]
+    ) -> None:
+        """
+        Set the KV connector handshake metadata for this connector.
+
+        Args:
+            metadata (dict): the handshake metadata to set.
+        """
+        assert self.connector_scheduler is not None
+        self.connector_scheduler.set_xfer_handshake_metadata(metadata)
+
+    ############################################################
+    # Worker Side Methods
+    ############################################################
+    def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
+        assert self.connector_worker is not None
+        self.connector_worker.register_kv_caches(kv_caches)
+
+    def register_cross_layers_kv_cache(
+        self, kv_cache: torch.Tensor, attn_backend: type[AttentionBackend]
+    ):
+        assert self.connector_worker is not None
+        self.connector_worker.register_cross_layers_kv_caches(kv_cache)
+
+    def set_host_xfer_buffer_ops(self, copy_operation: CopyBlocksOp):
+        assert self.connector_worker is not None
+        self.connector_worker.set_host_xfer_buffer_ops(copy_operation)
+
+    def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
+        """Get the finished recving and sending requests."""
+        assert self.connector_worker is not None
+        return self.connector_worker.get_finished()
+
+    def get_block_ids_with_load_errors(self) -> set[int]:
+        """Get block IDs that failed to load via NIXL."""
+        assert self.connector_worker is not None
+        return self.connector_worker.get_block_ids_with_load_errors()
+
+    def get_kv_connector_stats(self) -> KVConnectorStats | None:
+        if self.connector_worker is None:
+            return None
+        return self.connector_worker.get_kv_connector_stats()
+
+    @classmethod
+    def build_kv_connector_stats(
+        cls, data: dict[str, Any] | None = None
+    ) -> KVConnectorStats | None:
+        return (
+            NixlKVConnectorStats(data=data)
+            if data is not None
+            else NixlKVConnectorStats()
+        )
+
+    @classmethod
+    def build_prom_metrics(
+        cls,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ) -> KVConnectorPromMetrics:
+        return NixlPromMetrics(
+            vllm_config, metric_types, labelnames, per_engine_labelvalues
+        )
+
+    def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
+        assert self.connector_worker is not None
+        assert isinstance(self._connector_metadata, NixlConnectorMetadata)
+        self.connector_worker.start_load_kv(self._connector_metadata)
+
+    def wait_for_layer_load(self, layer_name: str) -> None:
+        """NixlConnector does not do layerwise saving."""
+        pass
+
+    def save_kv_layer(
+        self,
+        layer_name: str,
+        kv_layer: torch.Tensor,
+        attn_metadata: AttentionMetadata,
+        **kwargs,
+    ) -> None:
+        """NixlConnector does not save explicitly."""
+        pass
+
+    def wait_for_save(self):
+        assert self.connector_worker is not None
+        assert isinstance(self._connector_metadata, NixlConnectorMetadata)
+        if self.connector_worker.use_host_buffer and self.connector_worker.copy_blocks:
+            self.connector_worker.save_kv_to_host(self._connector_metadata)
+
+    def shutdown(self):
+        if self.connector_worker is not None:
+            self.connector_worker.shutdown()
+        if self.connector_scheduler is not None:
+            self.connector_scheduler.shutdown()
+
+    def get_handshake_metadata(self) -> KVConnectorHandshakeMetadata | None:
+        """
+        Get the KVConnector handshake metadata for this connector.
+        This metadata is used for out-of-band connector handshake
+        between P/D workers.
+
+        Returns:
+            KVConnectorHandshakeMetadata: the handshake metadata.
+            None if no handshake metadata is available.
+        """
+        assert self.connector_worker is not None
+        return self.connector_worker.xfer_handshake_metadata
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/metadata.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/metadata.py
new file mode 100644
index 000000000000..b9e3436f5019
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/metadata.py
@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Metadata dataclasses and helpers for the NIXL connector."""
+
+from dataclasses import dataclass
+from typing import Any
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.utils import BlockIds, EngineId
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorHandshakeMetadata,
+    KVConnectorMetadata,
+)
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+TransferHandle = int
+ReqId = str
+
+GET_META_MSG = b"get_meta_msg"
+#
+# NIXL Connector Version
+#
+# Increment this version whenever there is an incompatible change to:
+#   - NixlAgentMetadata schema
+#   - kv_transfer_params schema or semantics
+#   - NIXL transfer protocol or wire format
+#   - KV cache memory layout or block organization
+#   - Any other change that breaks P/D interoperability
+#
+# Version History:
+#   1: Initial version with compatibility checking
+#   2: Add remote_request_id to kv_transfer_params
+#   3: Add physical_blocks_per_logical_kv_block to NixlAgentMetadata
+#   4: Add KV block lease renewal through heartbeats
+#
+NIXL_CONNECTOR_VERSION: int = 4
+
+
+@dataclass
+class NixlAgentMetadata:
+    engine_id: str
+    agent_metadata: bytes
+    kv_caches_base_addr: list[int]
+    device_id: int
+    num_blocks: int
+    block_lens: list[int]
+    kv_cache_layout: str
+    block_size: int
+    ssm_sizes: tuple[int, int]
+    attn_backend_name: str
+    physical_blocks_per_logical_kv_block: int
+
+
+@dataclass
+class NixlHandshakePayload(KVConnectorHandshakeMetadata):
+    """
+    Wrapper for NIXL handshake sent over the wire.
+
+    Enables two-phase decoding for graceful compatibility checking:
+    1. Decode NixlHandshakePayload to get compatibility_hash
+    2. Compute local hash and compare
+    3. Only if hashes match, decode agent_metadata_bytes
+
+    This prevents decoder errors when NixlAgentMetadata schema is
+    incompatible, allowing graceful failure with clear error message.
+    """
+
+    compatibility_hash: str
+    agent_metadata_bytes: bytes  # NixlAgentMetadata encoded
+
+
+def compute_nixl_compatibility_hash(
+    vllm_config: VllmConfig, attn_backend_name: str, cross_layers_blocks: bool
+) -> str:
+    """
+    Compute compatibility hash for NIXL KV transfer.
+
+    Hash only the factors that affect whether two NIXL instances can
+    successfully transfer KV cache data.
+
+    Factors included:
+    - vLLM version and NIXL connector version
+    - Model architecture (name, dtype, KV heads, layers)
+    - KV cache format (dtype, sliding window)
+    - Attention backend
+
+    Note: Factors like tensor_parallel_size, block_size, and kv_cache_layout
+    are validated at runtime in _validate_remote_agent_handshake and are not
+    included in this hash to support heterogeneous deployments.
+
+    Note - the set of factors are likely to evolve significantly over
+    time to be more or less permissive.
+
+    Returns:
+        SHA-256 hex digest
+    """
+    from vllm import __version__ as vllm_version
+    from vllm.config.utils import hash_factors
+
+    model_config = vllm_config.model_config
+    cache_config = vllm_config.cache_config
+    is_hma_enabled = not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
+
+    factors = {
+        # Version compatibility
+        "vllm_version": vllm_version,
+        "nixl_connector_version": NIXL_CONNECTOR_VERSION,
+        # Model architecture - affects KV cache shape
+        "model": model_config.model,
+        "dtype": str(model_config.dtype),
+        "num_kv_heads": model_config.get_total_num_kv_heads(),
+        "head_size": model_config.get_head_size(),
+        "num_hidden_layers": model_config.get_total_num_hidden_layers(),
+        # Attention backend and KV cache dtype affect memory layout
+        "attn_backend_name": attn_backend_name,
+        "cache_dtype": str(cache_config.cache_dtype),
+        "cross_layers_blocks": cross_layers_blocks,
+        "is_hma_enabled": is_hma_enabled,
+    }
+
+    compat_hash = hash_factors(factors)
+    logger.debug(
+        "NIXL compatibility hash: %s (model=%s, dtype=%s, num_kv_heads=%d, "
+        "cache_dtype=%s, attn_backend=%s)",
+        compat_hash,
+        factors["model"],
+        factors["dtype"],
+        factors["num_kv_heads"],
+        factors["cache_dtype"],
+        attn_backend_name,
+    )
+    return compat_hash
+
+
+@dataclass
+class HeartbeatInfo:
+    """Heartbeat data for a single remote engine, sent from D worker to P."""
+
+    req_ids: set[ReqId]
+    host: str
+    port: int
+    tp_size: int
+
+
+@dataclass
+class RemoteMeta:
+    block_ids: BlockIds
+    host: str
+    port: int
+    engine_id: str
+    request_id: str
+
+
+@dataclass
+class ReqMeta:
+    local_block_ids: BlockIds
+    # To be used when logical block size does not match the kernel block size
+    local_physical_block_ids: BlockIds
+    tp_size: int
+    remote: RemoteMeta | None = None
+
+
+class NixlConnectorMetadata(KVConnectorMetadata):
+    def __init__(self):
+        self.reqs_to_recv: dict[ReqId, ReqMeta] = {}
+        self.reqs_to_save: dict[ReqId, ReqMeta] = {}
+        self.reqs_to_send: dict[ReqId, float] = {}
+        self.reqs_in_batch: set[ReqId] = set()
+        self.reqs_not_processed: set[ReqId] = set()
+        # Heartbeat data grouped by remote engine, sent by D worker to P.
+        self.heartbeat_by_engine: dict[EngineId, HeartbeatInfo] = {}
+
+    def _add_new_req(
+        self,
+        local_block_ids: BlockIds,
+        kv_transfer_params: dict[str, Any],
+    ) -> ReqMeta:
+        return ReqMeta(
+            local_block_ids=local_block_ids,
+            local_physical_block_ids=local_block_ids,
+            # P workers don't need to receive tp_size from proxy here.
+            tp_size=kv_transfer_params.get("tp_size", 1),
+        )
+
+    def add_new_req_to_save(
+        self,
+        request_id: ReqId,
+        local_block_ids: BlockIds,
+        kv_transfer_params: dict[str, Any],
+    ):
+        self.reqs_to_save[request_id] = self._add_new_req(
+            local_block_ids, kv_transfer_params
+        )
+
+    def add_new_req_to_recv(
+        self,
+        request_id: ReqId,
+        local_block_ids: BlockIds,
+        kv_transfer_params: dict[str, Any],
+    ):
+        req = self._add_new_req(local_block_ids, kv_transfer_params)
+        req.remote = RemoteMeta(
+            block_ids=kv_transfer_params["remote_block_ids"],
+            engine_id=kv_transfer_params["remote_engine_id"],
+            request_id=kv_transfer_params["remote_request_id"],
+            host=kv_transfer_params["remote_host"],
+            port=kv_transfer_params["remote_port"],
+        )
+        self.reqs_to_recv[request_id] = req
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/scheduler.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/scheduler.py
new file mode 100644
index 000000000000..b2122ed0d30b
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/scheduler.py
@@ -0,0 +1,674 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Scheduler-side logic for the NIXL connector."""
+
+import threading
+import time
+from typing import TYPE_CHECKING, Any
+
+import msgspec
+import zmq
+
+from vllm import envs
+from vllm.distributed.kv_transfer.kv_connector.utils import (
+    BlockIds,
+    EngineId,
+    yield_req_data,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorHandshakeMetadata,
+    KVConnectorMetadata,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
+    GET_META_MSG,
+    HeartbeatInfo,
+    NixlConnectorMetadata,
+    NixlHandshakePayload,
+    ReqId,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.utils import zmq_ctx
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils.math_utils import cdiv
+from vllm.utils.network_utils import make_zmq_path
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    MambaSpec,
+    SlidingWindowSpec,
+)
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+    from vllm.v1.outputs import KVConnectorOutput
+    from vllm.v1.request import Request
+
+logger = init_logger(__name__)
+
+
+class NixlConnectorScheduler:
+    """Implementation of Scheduler side methods"""
+
+    def __init__(
+        self,
+        vllm_config: "VllmConfig",
+        engine_id: str,
+        kv_cache_config: "KVCacheConfig",
+    ):
+        self.vllm_config = vllm_config
+        self.block_size = vllm_config.cache_config.block_size
+        self.engine_id: EngineId = engine_id
+        self.kv_cache_config = kv_cache_config
+        self.side_channel_host = envs.VLLM_NIXL_SIDE_CHANNEL_HOST
+        self.side_channel_port = (
+            envs.VLLM_NIXL_SIDE_CHANNEL_PORT
+            + vllm_config.parallel_config.data_parallel_index
+        )
+        assert vllm_config.kv_transfer_config is not None
+        self._kv_lease_duration: int = (
+            vllm_config.kv_transfer_config.get_from_extra_config(
+                "kv_lease_duration", 30
+            )
+        )
+        # NOTE (NickLucche): For now we use a hardcoded value for a simpler interface.
+        self._heartbeat_interval = self._kv_lease_duration // 6
+        if current_platform.device_type == "cpu":
+            self.use_host_buffer = False
+        else:
+            self.use_host_buffer = (
+                vllm_config.kv_transfer_config.kv_buffer_device == "cpu"
+            )
+        self._is_hma_required = (
+            not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
+            # Also handle unlikely SW-only model case instead of checking num_groups>1.
+            and any(
+                not isinstance(g.kv_cache_spec, FullAttentionSpec)
+                for g in kv_cache_config.kv_cache_groups
+            )
+        )
+        self._has_mamba = any(
+            isinstance(g.kv_cache_spec, MambaSpec)
+            for g in kv_cache_config.kv_cache_groups
+        )
+
+        logger.info("Initializing NIXL Scheduler %s", engine_id)
+        if vllm_config.scheduler_config.disable_hybrid_kv_cache_manager:
+            logger.info("Hybrid Memory Allocator is enabled with NIXL")
+
+        # Background thread for handling new handshake requests.
+        self._nixl_handshake_listener_t: threading.Thread | None = None
+        self._stop_event = threading.Event()
+
+        # Requests that need to start recv/send.
+        # New requests are added by update_state_after_alloc in
+        # the scheduler. Used to make metadata passed to Worker.
+        self._reqs_need_recv: dict[ReqId, tuple[Request, BlockIds]] = {}
+        self._reqs_need_save: dict[ReqId, Request] = {}
+        # Reqs to send and their expiration time
+        self._reqs_need_send: dict[ReqId, float] = {}
+        self._reqs_in_batch: set[ReqId] = set()
+        # Reqs to remove from processed set because they're not to send after
+        # remote prefill or aborted.
+        self._reqs_not_processed: set[ReqId] = set()
+
+        # Heartbeat tracking: requests needing periodic lease-renewal heartbeats to
+        # remote P-side, stored as ready-to-send HeartbeatInfo grouped by remote engine
+        self._heartbeat_by_engine: dict[EngineId, HeartbeatInfo] = {}
+        # Reverse lookup: local req_id -> (engine_id, remote_req_id) for O(1) removal
+        self._heartbeat_req_engine: dict[ReqId, tuple[EngineId, ReqId]] = {}
+        self._last_heartbeat_time: float = 0.0
+
+        # Gather Sliding Window sizes for each kv cache group (if any) in number of
+        # blocks per KV cache group. This is used to clip the local attention window.
+        sw_sizes_tokens: list[tuple[int, int]] = [
+            (g.kv_cache_spec.sliding_window, g.kv_cache_spec.block_size)
+            if isinstance(g.kv_cache_spec, SlidingWindowSpec)
+            else (0, self.block_size)
+            for g in kv_cache_config.kv_cache_groups
+        ]
+        # cdiv(n_tokens, block_size) gives blocks/window; add 1 to conservatively
+        # account for boundary overlap eg window isn't fully aligned with blocks.
+        self.blocks_per_sw = [
+            cdiv(n_tokens, block_size) + 1 if n_tokens else 0
+            for n_tokens, block_size in sw_sizes_tokens
+        ]
+
+        # Threshold to decide whether to compute kv cache locally
+        # or pull from a remote node: minimum number of remote
+        # tokens to amortize the xfer latencies
+        self.kv_recompute_threshold: int = int(
+            vllm_config.kv_transfer_config.get_from_extra_config(
+                "kv_recompute_threshold", 64
+            )
+        )
+
+        # Bi-directional KV transfer feature supports KV block
+        # transfers from D node to P node
+        self.is_bidirectional_kv_xfer_enabled = (
+            vllm_config.kv_transfer_config.get_from_extra_config(
+                "bidirectional_kv_xfer", False
+            )
+        )
+        self.decoder_kv_blocks_ttl = (
+            vllm_config.kv_transfer_config.get_from_extra_config(
+                "decoder_kv_blocks_ttl", 480
+            )
+        )
+
+        if self.is_bidirectional_kv_xfer_enabled and self.kv_recompute_threshold > 0:
+            logger.info(
+                "Bidirectional KV transfer is enabled and the kv "
+                "recompute threshold is set to %d tokens."
+                "KV blocks on D are released after a TTL of %d seconds.",
+                self.kv_recompute_threshold,
+                self.decoder_kv_blocks_ttl,
+            )
+
+    def shutdown(self):
+        self._stop_event.set()
+        if self._nixl_handshake_listener_t is not None:
+            self._nixl_handshake_listener_t.join()
+            self._nixl_handshake_listener_t = None
+
+    def on_new_request(self, request: "Request") -> None:
+        """Track a request that may need heartbeats."""
+        params = request.kv_transfer_params
+        # NOTE (NickLucche) This excludes request meant for P, ie heartbeats are
+        # effectively disabled for Bidirectional KV transfer.
+        if params is None or not params.get("do_remote_prefill"):
+            return
+        # Only track if all required remote fields are present.
+        remote_engine_id = params.get("remote_engine_id")
+        remote_request_id = params.get("remote_request_id")
+        host = params.get("remote_host")
+        port = params.get("remote_port")
+        tp_size = params.get("tp_size")
+        if (
+            remote_engine_id is None
+            or remote_request_id is None
+            or host is None
+            or port is None
+            or tp_size is None
+        ):
+            return
+        if remote_engine_id not in self._heartbeat_by_engine:
+            self._heartbeat_by_engine[remote_engine_id] = HeartbeatInfo(
+                req_ids=set(),
+                host=host,
+                port=port,
+                tp_size=tp_size,
+            )
+        self._heartbeat_by_engine[remote_engine_id].req_ids.add(remote_request_id)
+        self._heartbeat_req_engine[request.request_id] = (
+            remote_engine_id,
+            remote_request_id,
+        )
+
+    def _stop_heartbeat(self, req_id: ReqId) -> None:
+        """Remove *req_id* from heartbeat tracking (if tracked)."""
+        if key := self._heartbeat_req_engine.pop(req_id, None):
+            engine_id, remote_id = key
+            if info := self._heartbeat_by_engine.get(engine_id):
+                info.req_ids.discard(remote_id)
+                if not info.req_ids:
+                    # Clean up empty engines so we don't leak a key when remote dies.
+                    del self._heartbeat_by_engine[engine_id]
+
+    def get_sw_clipped_blocks(self, block_ids: BlockIds) -> BlockIds:
+        """
+        Clip the number of blocks to the sliding window size for each kv cache group
+        that employs SWA.
+        This is necessary because the KV Cache manager initially allocates blocks for
+        the entire sequence length, and successively cleans up blocks that are outside
+        the window prior to the `request_finished_all_groups` hook.
+        """
+        if len(block_ids) == 0 or not self._is_hma_required:
+            # No blocks to clip eg Full prefix cache hit or not a hybrid model.
+            return block_ids
+        # NOTE (NickLucche) This logic is currently handled at the connector level
+        # because offloading connectors might want to receive the whole sequence even
+        # for SWA groups. We will abstract this logic once the interface is more stable
+        assert len(block_ids) == len(self.blocks_per_sw), (
+            "Number of KV cache groups must match"
+        )
+        # For non-SWA groups, blocks_per_sw is 0 so we return all block_ids unchanged
+        return tuple(
+            [
+                blocks[-self.blocks_per_sw[i] :]
+                if self.blocks_per_sw[i] > 0
+                else blocks
+                for i, blocks in enumerate(block_ids)
+            ]
+        )
+
+    def set_xfer_handshake_metadata(
+        self, metadata: dict[int, KVConnectorHandshakeMetadata]
+    ) -> None:
+        """
+        Set the KV connector handshake metadata for this connector.
+
+        Args:
+            metadata (dict): the handshake metadata to set.
+        """
+        encoded_data: dict[int, bytes] = {}
+        encoder = msgspec.msgpack.Encoder()
+        for tp_rank, rank_metadata in metadata.items():
+            if not isinstance(rank_metadata, NixlHandshakePayload):
+                raise ValueError(
+                    "NixlConnectorScheduler expects NixlHandshakePayload for "
+                    "handshake metadata."
+                )
+            encoded_data[tp_rank] = encoder.encode(rank_metadata)
+            logger.debug(
+                "Tp rank %d: encoded NixlHandshakePayload size: %s bytes",
+                tp_rank,
+                str(len(encoded_data[tp_rank])),
+            )
+
+        # Only start the listener when we have metadata to serve.
+        if self._nixl_handshake_listener_t is None:
+            ready_event = threading.Event()
+            self._nixl_handshake_listener_t = threading.Thread(
+                target=self._nixl_handshake_listener,
+                args=(
+                    encoded_data,
+                    ready_event,
+                    self._stop_event,
+                    self.side_channel_host,
+                    self.side_channel_port,
+                ),
+                daemon=True,
+                name="nixl_handshake_listener",
+            )
+            self._nixl_handshake_listener_t.start()
+            ready_event.wait()  # Wait for listener ZMQ socket to be ready.
+
+    @staticmethod
+    def _nixl_handshake_listener(
+        encoded_data: dict[int, Any],
+        ready_event: threading.Event,
+        stop_event: threading.Event,
+        host: str,
+        port: int,
+    ):
+        """Background thread for getting new NIXL handshakes."""
+        # NOTE(rob): this is a simple implementation. We will move
+        # to a better approach via HTTP endpoint soon.
+
+        # Listen for new requests for metadata.
+        path = make_zmq_path("tcp", host, port)
+        logger.debug("Starting listening on path: %s", path)
+        with zmq_ctx(zmq.ROUTER, path) as sock:
+            sock.setsockopt(zmq.RCVTIMEO, 1000)
+            ready_event.set()
+            while True:
+                try:
+                    identity, _, msg = sock.recv_multipart()
+                except zmq.Again:
+                    if stop_event.is_set():
+                        break
+                    continue
+                # Decode the message which contains (GET_META_MSG, rank)
+                msg, target_tp_rank = msgspec.msgpack.decode(msg)
+                logger.debug(
+                    "Received message for tp rank %s",
+                    target_tp_rank,
+                )
+                if msg != GET_META_MSG:
+                    logger.warning("Connection listener got unexpected message %s", msg)
+                sock.send_multipart((identity, b"", encoded_data[target_tp_rank]))
+
+    def _mamba_prefill_token_count(self, num_prompt_tokens: int) -> int:
+        """D-side only. Returns N-1 for Mamba models since the decoder
+        always recomputes the last token and must start from h(N-1)."""
+        if self._has_mamba and num_prompt_tokens > 1:
+            return num_prompt_tokens - 1
+        return num_prompt_tokens
+
+    def _truncate_mamba_request_for_prefill(self, request: "Request") -> None:
+        """P-side only: drop the last prompt token so the prefiller computes
+        h(N-1) instead of h(N). The decoder recomputes the last token to
+        derive h(N) correctly.
+
+        Guarded by ``_p_side_truncated`` to avoid repeated truncation if the
+        request is preempted and rescheduled."""
+        params = request.kv_transfer_params
+        if (
+            params is not None
+            # Guard against repeated truncation after preemption/reschedule.
+            and not params.get("_p_side_truncated")
+            and request.num_prompt_tokens > 1
+        ):
+            if request.prompt_token_ids is not None:
+                request.prompt_token_ids.pop()
+            elif request.prompt_embeds is not None:
+                request.prompt_embeds = request.prompt_embeds[:-1]
+            else:
+                return
+
+            request._all_token_ids.pop()
+            request.num_prompt_tokens -= 1
+            request.max_tokens = 1
+            params["_p_side_truncated"] = True
+
+    def get_num_new_matched_tokens(
+        self, request: "Request", num_computed_tokens: int
+    ) -> tuple[int, bool]:
+        """
+        For remote prefill, pull all prompt blocks from remote
+        asynchronously relative to engine execution.
+
+        Args:
+            request (Request): the request object.
+            num_computed_tokens (int): the number of locally
+                computed tokens for this request
+        Returns:
+            * the number of tokens that can be loaded from the
+              external KV cache beyond what is already computed.
+            * true if the external KV cache tokens will be loaded
+              asynchronously (between scheduler steps).
+        """
+
+        params = request.kv_transfer_params
+        logger.debug(
+            "NIXLConnector get_num_new_matched_tokens: "
+            "num_computed_tokens=%s, kv_transfer_params=%s",
+            num_computed_tokens,
+            params,
+        )
+
+        if params is not None and params.get("do_remote_prefill"):
+            # Remote prefill: get all prompt blocks from remote.
+            token_ids = request.prompt_token_ids or []
+            actual = self._mamba_prefill_token_count(len(token_ids))
+            count = actual - num_computed_tokens
+            if count > 0:
+                return count, True
+
+        if params is not None and params.get("do_remote_decode") and self._has_mamba:
+            self._truncate_mamba_request_for_prefill(request)
+
+        if (
+            params is not None
+            and params.get("do_remote_decode")
+            and params.get("remote_block_ids")
+            and all(
+                p in params
+                for p in (
+                    "remote_engine_id",
+                    "remote_request_id",
+                    "remote_host",
+                    "remote_port",
+                )
+            )
+        ):
+            # Decode node has kv blocks for part of prefill request, so, provide them
+            # as an external token count to scheduler.
+            # The tokens will be loaded if not already present
+            # in the prefill node local cache
+            remote_num_tokens = params.get("remote_num_tokens") or 0
+            count = (
+                min(remote_num_tokens, request.num_prompt_tokens) - num_computed_tokens
+            )
+            if count > 0:
+                # Check kv_recompute_threshold: skip pull if
+                # remote tokens are below the threshold.
+                if (
+                    self.kv_recompute_threshold > 0
+                    and count < self.kv_recompute_threshold
+                ):
+                    logger.debug(
+                        "Skipping remote pull for %s: %d remote tokens < threshold %d",
+                        request.request_id,
+                        count,
+                        self.kv_recompute_threshold,
+                    )
+                    return 0, False
+                return count, True
+
+        # No remote prefill for this request.
+        return 0, False
+
+    def update_state_after_alloc(
+        self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int
+    ):
+        params = request.kv_transfer_params
+        logger.debug(
+            "NIXLConnector update_state_after_alloc: "
+            "num_external_tokens=%s, kv_transfer_params=%s",
+            num_external_tokens,
+            params,
+        )
+
+        if not params:
+            return
+
+        if params.get("do_remote_decode") or (
+            params.get("do_remote_prefill") and self.is_bidirectional_kv_xfer_enabled
+        ):
+            self._reqs_in_batch.add(request.request_id)
+        if self.use_host_buffer and params.get("do_remote_decode"):
+            # NOTE: when accelerator is not directly supported by Nixl,
+            # prefilled blocks need to be saved to host memory before transfer.
+            self._reqs_need_save[request.request_id] = request
+        elif params.get("do_remote_prefill") or (
+            params.get("do_remote_decode")
+            and self.is_bidirectional_kv_xfer_enabled
+            and not params.get("_remote_blocks_processed")
+        ):
+            if params.get("remote_block_ids"):
+                if all(
+                    p in params
+                    for p in (
+                        "remote_engine_id",
+                        "remote_request_id",
+                        "remote_host",
+                        "remote_port",
+                    )
+                ):
+                    # If remote_blocks and num_external_tokens = 0, we have
+                    # a full prefix cache hit on the local node. We need to call
+                    # send_notif in _read_blocks to free the memory on the remote node.
+
+                    unhashed_local_block_ids: BlockIds = (
+                        blocks.get_unhashed_block_ids_all_groups()
+                        if num_external_tokens > 0
+                        else ()
+                    )
+                    local_block_ids = self.get_sw_clipped_blocks(
+                        unhashed_local_block_ids
+                    )
+
+                    # Get unhashed blocks to pull from remote. Mind that a full prefix
+                    # cache hit is indicated with an empty list.
+                    self._reqs_need_recv[request.request_id] = (
+                        request,
+                        local_block_ids,
+                    )
+
+                else:
+                    logger.warning(
+                        "Got invalid KVTransferParams: %s. This "
+                        "request will not utilize KVTransfer",
+                        params,
+                    )
+            else:
+                assert num_external_tokens == 0
+            # Only trigger 1 KV transfer per request.
+            params["do_remote_prefill"] = False
+            params["_remote_blocks_processed"] = True
+
+    def _build_save_meta(
+        self,
+        meta: NixlConnectorMetadata,
+        scheduler_output: SchedulerOutput,
+    ) -> None:
+        # only called when use_host_buffer is True to build the save metadata
+
+        # NOTE: For the prefill side, there might be a chance that an early added
+        # request is a chunked prefill, so we need to check if new blocks are added
+        for req_id, new_block_id_groups, _ in yield_req_data(scheduler_output):
+            req_to_save = self._reqs_need_save.get(req_id)
+            if req_to_save is None or new_block_id_groups is None:
+                continue
+            req = req_to_save
+
+            assert req.kv_transfer_params is not None
+            clipped_block_id_groups = self.get_sw_clipped_blocks(new_block_id_groups)
+            meta.add_new_req_to_save(
+                request_id=req_id,
+                local_block_ids=clipped_block_id_groups,
+                kv_transfer_params=req.kv_transfer_params,
+            )
+            assert scheduler_output.num_scheduled_tokens is not None
+            num_scheduled_tokens = scheduler_output.num_scheduled_tokens[req_id]
+            is_partial = (
+                req.num_computed_tokens + num_scheduled_tokens
+            ) < req.num_prompt_tokens
+            if not is_partial:
+                # For non-partial prefills, once new req_meta is scheduled, it
+                # can be removed from _reqs_need_save.
+                # For partial prefill case, we will retain the request in
+                # _reqs_need_save until all blocks are scheduled with req_meta.
+                # Therefore, only pop if `not is_partial`.
+                self._reqs_need_save.pop(req_id)
+
+    def build_connector_meta(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> KVConnectorMetadata:
+        meta = NixlConnectorMetadata()
+
+        # Loop through scheduled reqs and convert to ReqMeta.
+        for req_id, (req, block_ids) in self._reqs_need_recv.items():
+            assert req.kv_transfer_params is not None
+            meta.add_new_req_to_recv(
+                request_id=req_id,
+                local_block_ids=block_ids,
+                kv_transfer_params=req.kv_transfer_params,
+            )
+
+        if self.use_host_buffer:
+            self._build_save_meta(meta, scheduler_output)
+
+        meta.reqs_to_send = self._reqs_need_send
+        meta.reqs_in_batch = self._reqs_in_batch
+        meta.reqs_not_processed = self._reqs_not_processed
+
+        # Package heartbeats, throttled by heartbeat_interval.
+        if self._heartbeat_by_engine:
+            now = time.perf_counter()
+            if now - self._last_heartbeat_time >= self._heartbeat_interval:
+                self._last_heartbeat_time = now
+                meta.heartbeat_by_engine = self._heartbeat_by_engine
+
+        # Clear the list once workers start the transfers
+        self._reqs_need_recv.clear()
+        self._reqs_in_batch = set()
+        self._reqs_not_processed = set()
+        self._reqs_need_send = {}
+
+        return meta
+
+    def update_connector_output(self, connector_output: "KVConnectorOutput") -> None:
+        """Stop heartbeating for requests whose KV transfer completed."""
+        for req_id in connector_output.finished_recving or ():
+            self._stop_heartbeat(req_id)
+
+    def request_finished(
+        self,
+        request: "Request",
+        block_ids: BlockIds,
+    ) -> tuple[bool, dict[str, Any] | None]:
+        """
+        Once a request is finished, determine whether request blocks
+        should be freed now or will be sent asynchronously and freed later.
+        """
+        from vllm.v1.request import RequestStatus
+
+        params = request.kv_transfer_params
+        logger.debug(
+            "NIXLConnector request_finished(%s), request_status=%s, "
+            "kv_transfer_params=%s",
+            request.request_id,
+            request.status,
+            params,
+        )
+        if not params:
+            return False, None
+
+        is_p_node = bool(params.get("do_remote_decode"))
+        is_d_node = not is_p_node
+
+        # Stop heartbeating for aborted requests that never reached finished_recving:
+        # normal path cleans up in update_connector_output.
+        self._stop_heartbeat(request.request_id)
+
+        if params.get("do_remote_prefill"):
+            # If do_remote_prefill is still True when the request is finished,
+            # update_state_after_alloc must not have been called (the request
+            # must have been aborted before it was scheduled, e.g. via the
+            # abort_immediately path used to clean up KV-transfer requests
+            # rejected at the D-side serving layer).
+            # To avoid stranding the prefill blocks in the prefill instance,
+            # we must add empty block_ids to _reqs_need_recv so that our
+            # worker side will notify and free blocks in the prefill instance.
+            self._reqs_need_recv[request.request_id] = (request, [])
+            params["do_remote_prefill"] = False
+            return False, None
+
+        if is_d_node and not self.is_bidirectional_kv_xfer_enabled:
+            return False, None
+
+        if request.status not in (
+            RequestStatus.FINISHED_LENGTH_CAPPED,
+            RequestStatus.FINISHED_STOPPED,
+        ):
+            # Also include the case of a P/D Prefill request with immediate
+            # block free (eg abort). Stop tracking this request.
+            self._reqs_not_processed.add(request.request_id)
+            # Clear _reqs_need_save if a request is aborted as partial prefill.
+            self._reqs_need_save.pop(request.request_id, None)
+            return False, None
+
+        # TODO: check whether block_ids actually ever be 0. If not we could
+        # remove the conditional below
+        delay_free_blocks = any(len(group) > 0 for group in block_ids)
+        remote_num_tokens = 0
+        if delay_free_blocks:
+            # Prefill request on remote. It will be read from D upon completion
+            request_kv_blocks_ttl = self._kv_lease_duration
+            if is_d_node:
+                # For blocks pinned on D, use a simpler timeout for now instead of a
+                # lease mechanism as turn2 request is client-driven.
+                request_kv_blocks_ttl = self.decoder_kv_blocks_ttl
+            logger.debug(
+                "NIXLConnector request_finished(%s) waiting for %d seconds "
+                "before releasing blocks",
+                request.request_id,
+                request_kv_blocks_ttl,
+            )
+            self._reqs_need_send[request.request_id] = (
+                time.perf_counter() + request_kv_blocks_ttl
+            )
+            # NOTE HMA will "mark" empty/null blocks in groups with 0s (eg SWA ones),
+            # trimming down after allocating for the whole sequence length. Empty
+            # blocks are always at the start of the list.
+            # Here we "unpad" blocks to send the actual remote blocks to be read.
+            block_ids = self.get_sw_clipped_blocks(block_ids)
+
+            remote_num_tokens = request.num_computed_tokens
+
+        return delay_free_blocks, dict(
+            do_remote_prefill=is_p_node,
+            do_remote_decode=is_d_node,
+            remote_block_ids=block_ids,
+            remote_engine_id=self.engine_id,
+            remote_request_id=request.request_id,
+            remote_host=self.side_channel_host,
+            remote_port=self.side_channel_port,
+            tp_size=self.vllm_config.parallel_config.tensor_parallel_size,
+            remote_num_tokens=remote_num_tokens,
+        )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/stats.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/stats.py
new file mode 100644
index 000000000000..1e4f5c48e0f7
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/stats.py
@@ -0,0 +1,266 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Stats and Prometheus metrics for the NIXL connector."""
+
+import copy
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+import numpy as np
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
+    KVConnectorPromMetrics,
+    KVConnectorStats,
+    PromMetric,
+    PromMetricT,
+)
+from vllm.v1.metrics.utils import create_metric_per_engine
+
+if TYPE_CHECKING:
+    from vllm.distributed.nixl_utils import nixlXferTelemetry
+
+
+@dataclass
+class NixlKVConnectorStats(KVConnectorStats):
+    """Container for transfer performance metrics"""
+
+    def __post_init__(self):
+        if not self.data:
+            # Empty container init, no data is passed in.
+            self.reset()
+
+    def reset(self):
+        # Must be serializable
+        self.data: dict[str, list[float | int]] = {
+            "transfer_duration": [],
+            "post_duration": [],
+            "bytes_transferred": [],
+            "num_descriptors": [],
+            "num_failed_transfers": [],
+            "num_failed_notifications": [],
+            "num_kv_expired_reqs": [],
+        }
+
+    def record_transfer(self, res: "nixlXferTelemetry"):
+        # Keep metrics units consistent with rest of the code: time us->s
+        self.data["transfer_duration"].append(res.xferDuration / 1e6)
+        self.data["post_duration"].append(res.postDuration / 1e6)
+        self.data["bytes_transferred"].append(res.totalBytes)
+        self.data["num_descriptors"].append(res.descCount)
+
+    def record_failed_transfer(self):
+        """Record a failed NIXL transfer operation."""
+        self.data["num_failed_transfers"].append(1)
+
+    def record_failed_notification(self):
+        """Record a failed NIXL notification (send_notif)."""
+        self.data["num_failed_notifications"].append(1)
+
+    def record_kv_expired_req(self):
+        """Record a request that had its KV blocks expire."""
+        self.data["num_kv_expired_reqs"].append(1)
+
+    def clone_and_reset(self) -> "NixlKVConnectorStats":
+        old = copy.copy(self)
+        self.reset()
+        return old
+
+    def is_empty(self) -> bool:
+        # Do not discard metrics update that are entirely failures related.
+        return (
+            self.num_successful_transfers == 0
+            and len(self.data["num_failed_transfers"]) == 0
+            and len(self.data["num_failed_notifications"]) == 0
+            and len(self.data["num_kv_expired_reqs"]) == 0
+        )
+
+    def aggregate(self, other: KVConnectorStats) -> KVConnectorStats:
+        if not other.is_empty():
+            for k, v in other.data.items():
+                accumulator = self.data[k]
+                assert isinstance(accumulator, list)
+                accumulator.extend(v)
+        return self
+
+    def reduce(self) -> dict[str, int | float]:
+        # Compute compact representative stats suitable for CLI logging
+        if self.num_successful_transfers == 0:
+            # CLI logging only reports successful transfers stats. If all requests in
+            # the interval were unsuccessful, Prom will report failures stats instead.
+            return {
+                "Num successful transfers": 0,
+                "Avg xfer time (ms)": 0,
+                "P90 xfer time (ms)": 0,
+                "Avg post time (ms)": 0,
+                "P90 post time (ms)": 0,
+                "Avg MB per transfer": 0,
+                "Throughput (MB/s)": 0,
+                "Avg number of descriptors": 0,
+            }
+
+        xfer_time = np.asarray(self.data["transfer_duration"])
+        post_time = np.asarray(self.data["post_duration"])
+        # Convert to MB for CLI logging.
+        mb = np.asarray(self.data["bytes_transferred"]) / 2**20
+        descs = np.asarray(self.data["num_descriptors"], dtype=np.uint32)
+        n = len(descs)
+        assert n == self.num_successful_transfers
+
+        total_mb = mb.sum()
+        avg_mb = total_mb / n
+
+        total_time_seconds = xfer_time.sum()
+        throughput_mb_s = total_mb / total_time_seconds
+
+        return {
+            "Num successful transfers": n,
+            "Avg xfer time (ms)": round(xfer_time.mean() * 1e3, 3),
+            "P90 xfer time (ms)": round(np.percentile(xfer_time, 90).item() * 1e3, 3),
+            "Avg post time (ms)": round(post_time.mean() * 1e3, 3),
+            "P90 post time (ms)": round(np.percentile(post_time, 90).item() * 1e3, 3),
+            "Avg MB per transfer": round(avg_mb, 3),
+            "Throughput (MB/s)": round(throughput_mb_s, 3),
+            "Avg number of descriptors": round(descs.mean(), 1),
+        }
+
+    @property
+    def num_successful_transfers(self) -> int:
+        return len(self.data["transfer_duration"])
+
+
+class NixlPromMetrics(KVConnectorPromMetrics):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        metric_types: dict[type[PromMetric], type[PromMetricT]],
+        labelnames: list[str],
+        per_engine_labelvalues: dict[int, list[object]],
+    ):
+        super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
+
+        buckets = [
+            0.001,
+            0.005,
+            0.01,
+            0.025,
+            0.05,
+            0.075,
+            0.1,
+            0.2,
+            0.3,
+            0.5,
+            0.75,
+            1.0,
+            5.0,
+        ]
+        nixl_histogram_xfer_time = self._histogram_cls(
+            name="vllm:nixl_xfer_time_seconds",
+            documentation="Histogram of transfer duration for NIXL KV Cache transfers.",
+            buckets=buckets[1:],
+            labelnames=labelnames,
+        )
+        self.nixl_histogram_xfer_time = create_metric_per_engine(
+            nixl_histogram_xfer_time, self.per_engine_labelvalues
+        )
+        nixl_histogram_post_time = self._histogram_cls(
+            name="vllm:nixl_post_time_seconds",
+            documentation="Histogram of transfer post time for NIXL KV"
+            " Cache transfers.",
+            buckets=buckets,
+            labelnames=labelnames,
+        )
+        self.nixl_histogram_post_time = create_metric_per_engine(
+            nixl_histogram_post_time, self.per_engine_labelvalues
+        )
+        # uniform 2kb to 16gb range
+        buckets = [2 ** (10 + i) for i in range(1, 25, 2)]
+        nixl_histogram_bytes_transferred = self._histogram_cls(
+            name="vllm:nixl_bytes_transferred",
+            documentation="Histogram of bytes transferred per NIXL KV Cache transfers.",
+            buckets=buckets,
+            labelnames=labelnames,
+        )
+        self.nixl_histogram_bytes_transferred = create_metric_per_engine(
+            nixl_histogram_bytes_transferred, self.per_engine_labelvalues
+        )
+        buckets = [
+            10,
+            20,
+            30,
+            50,
+            75,
+            100,
+            200,
+            400,
+            1000,
+            2000,
+            4000,
+            10000,
+            20000,
+            50000,
+        ]
+        nixl_histogram_num_descriptors = self._histogram_cls(
+            name="vllm:nixl_num_descriptors",
+            documentation="Histogram of number of descriptors per NIXL"
+            "  KV Cache transfers.",
+            buckets=buckets,
+            labelnames=labelnames,
+        )
+        self.nixl_histogram_num_descriptors = create_metric_per_engine(
+            nixl_histogram_num_descriptors, self.per_engine_labelvalues
+        )
+        counter_nixl_num_failed_transfers = self._counter_cls(
+            name="vllm:nixl_num_failed_transfers",
+            documentation="Number of failed NIXL KV Cache transfers.",
+            labelnames=labelnames,
+        )
+        self.counter_nixl_num_failed_transfers = create_metric_per_engine(
+            counter_nixl_num_failed_transfers, self.per_engine_labelvalues
+        )
+        counter_nixl_num_failed_notifications = self._counter_cls(
+            name="vllm:nixl_num_failed_notifications",
+            documentation="Number of failed NIXL KV Cache notifications.",
+            labelnames=labelnames,
+        )
+        self.counter_nixl_num_failed_notifications = create_metric_per_engine(
+            counter_nixl_num_failed_notifications, self.per_engine_labelvalues
+        )
+
+        counter_nixl_num_kv_expired_reqs = self._counter_cls(
+            name="vllm:nixl_num_kv_expired_reqs",
+            documentation="Number of requests that had their KV expire. "
+            "NOTE: This metric is tracked on the P instance.",
+            labelnames=labelnames,
+        )
+        self.counter_nixl_num_kv_expired_reqs = create_metric_per_engine(
+            counter_nixl_num_kv_expired_reqs, self.per_engine_labelvalues
+        )
+
+    def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
+        for prom_obj, list_item_key in zip(
+            [
+                self.nixl_histogram_xfer_time,
+                self.nixl_histogram_post_time,
+                self.nixl_histogram_bytes_transferred,
+                self.nixl_histogram_num_descriptors,
+            ],
+            [
+                "transfer_duration",
+                "post_duration",
+                "bytes_transferred",
+                "num_descriptors",
+            ],
+        ):
+            for list_item in transfer_stats_data[list_item_key]:
+                prom_obj[engine_idx].observe(list_item)
+        for counter_obj, counter_item_key in zip(
+            [
+                self.counter_nixl_num_failed_transfers,
+                self.counter_nixl_num_failed_notifications,
+                self.counter_nixl_num_kv_expired_reqs,
+            ],
+            ["num_failed_transfers", "num_failed_notifications", "num_kv_expired_reqs"],
+        ):
+            for list_item in transfer_stats_data[counter_item_key]:
+                counter_obj[engine_idx].inc(list_item)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/tp_mapping.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/tp_mapping.py
new file mode 100644
index 000000000000..b034b7605087
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/tp_mapping.py
@@ -0,0 +1,142 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TP mapping computation for NIXL KV cache transfers."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from vllm.distributed.kv_transfer.kv_connector.utils import (
+    BlockIds,
+    TransferTopology,
+)
+from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheSpec, MambaSpec
+
+# ======================================================================
+# Data structures
+# ======================================================================
+
+
+@dataclass(frozen=True)
+class ReadSpec:
+    """Specification for a single remote block read operation."""
+
+    remote_rank: int
+    local_block_ids: BlockIds
+    remote_block_ids: BlockIds
+
+
+def _is_attention_spec(spec_type: type[KVCacheSpec]) -> bool:
+    return issubclass(spec_type, AttentionSpec)
+
+
+def _is_ssm_spec(spec_type: type[KVCacheSpec]) -> bool:
+    return issubclass(spec_type, MambaSpec)
+
+
+@dataclass(frozen=True)
+class TPMapping:
+    """Complete local-to-remote TP mapping for one remote engine.
+
+    Generated once per remote engine during handshake.
+    """
+
+    # Remote TP ranks that this local rank reads from, per group.
+    # Position = local piece index.
+    source_ranks_per_group: tuple[tuple[int, ...], ...]
+
+    # Superset of all source ranks (union of all groups).
+    all_source_ranks: tuple[int, ...]
+
+    # Maps each source rank to its FA head slot index.
+    rank_to_attention_slot: dict[int, int]
+
+    # FA head offset factor for hetero-TP (D_TP > P_TP).
+    rank_offset_factor: int
+
+
+# ======================================================================
+# TP mapping computation
+# ======================================================================
+
+
+def compute_tp_mapping(
+    transfer_topology: TransferTopology,
+    remote_tp_size: int,
+    group_spec_types: tuple[type[KVCacheSpec], ...],
+) -> TPMapping:
+    """Build the complete local-to-remote TP mapping.
+
+    Computes source ranks, head slot assignments, and the rank offset
+    factor in a single pass.
+    """
+    tp_rank = transfer_topology.tp_rank
+    tp_size = transfer_topology.tp_size
+    total_num_kv_heads = transfer_topology.total_num_kv_heads
+    # --- Attention source ranks ---
+    if transfer_topology.is_mla or tp_size >= remote_tp_size:
+        # D (local TP) > P (remote TP): multiple local ranks read different chunks from
+        # *one* remote rank, corresponding to different kv heads.
+        # For MLA, we only need one remote since cache is duplicated. When P TP=k*TP k,
+        # this will spread mla ranks to read from remote k*tp_rank.
+        attn_ranks = [tp_rank * remote_tp_size // tp_size]
+    else:
+        # P (remote TP) > D (local TP): one local rank
+        # reads from multiple remote ranks.
+        # GQA dedup: when K < remote_tp_size, several remote ranks
+        # hold the same KV head.  np.unique keeps only the first
+        # rank per unique head so we don't issue redundant reads.
+        abs_tp = remote_tp_size // tp_size
+        start = tp_rank * abs_tp
+        heads = np.arange(start, start + abs_tp) * total_num_kv_heads // remote_tp_size
+        _, unique_idx = np.unique(heads, return_index=True)
+        attn_ranks = (start + np.sort(unique_idx)).tolist()
+
+    # --- SSM source ranks ---
+    has_ssm = any(_is_ssm_spec(t) for t in group_spec_types)
+    if has_ssm:
+        if tp_size < remote_tp_size:
+            abs_tp = remote_tp_size // tp_size
+            ssm_ranks = list(range(tp_rank * abs_tp, (tp_rank + 1) * abs_tp))
+        else:
+            ssm_ranks = list(attn_ranks)
+    else:
+        ssm_ranks = []
+
+    all_ranks = sorted(set(attn_ranks) | set(ssm_ranks))
+
+    # --- Per-group ordered source ranks ---
+    source_ranks_per_group = tuple(
+        tuple(ssm_ranks) if _is_ssm_spec(t) else tuple(attn_ranks)
+        for t in group_spec_types
+    )
+
+    # --- Attention head slots ---
+    head_to_slot: dict[int, int] = {}
+    for i, r in enumerate(attn_ranks):
+        head_to_slot[r * total_num_kv_heads // remote_tp_size] = i
+    rank_to_attention_slot = {
+        r: head_to_slot.get(r * total_num_kv_heads // remote_tp_size, 0)
+        for r in all_ranks
+    }
+
+    # --- Rank offset factor ---
+    if transfer_topology.is_mla or tp_size <= remote_tp_size:
+        # We don't index into remote for reading, no offset needed.
+        rank_offset_factor = 0
+    elif tp_size > total_num_kv_heads:
+        local_head = tp_rank * total_num_kv_heads // tp_size
+        p_start = attn_ranks[0] * total_num_kv_heads // remote_tp_size
+        rank_offset_factor = local_head - p_start
+    else:
+        # D TP > P TP: we index into remote to read different heads depending on rank.
+        rank_offset_factor = tp_rank % (tp_size // remote_tp_size)
+
+    return TPMapping(
+        source_ranks_per_group=source_ranks_per_group,
+        all_source_ranks=tuple(all_ranks),
+        rank_to_attention_slot=rank_to_attention_slot,
+        rank_offset_factor=rank_offset_factor,
+    )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/utils.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/utils.py
new file mode 100644
index 000000000000..2fa3829eaecb
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/utils.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Shared constants, lazy imports and helpers for the NIXL connector."""
+
+import contextlib
+from collections.abc import Iterator
+from typing import Any
+
+import zmq
+
+from vllm.platforms import current_platform
+from vllm.utils.network_utils import make_zmq_socket
+from vllm.v1.kv_cache_interface import KVCacheSpec, UniformTypeKVCacheSpecs
+
+# Supported platforms and types of kv transfer buffer.
+# {device: tuple of supported kv buffer types}
+_NIXL_SUPPORTED_DEVICE = {
+    "cuda": (
+        "cuda",
+        "cpu",
+    ),
+    "tpu": ("cpu",),
+    "xpu": (
+        "cpu",
+        "xpu",
+    ),
+    "cpu": ("cpu",),
+}
+# support for oot platform by providing mapping in current_platform
+_NIXL_SUPPORTED_DEVICE.update(current_platform.get_nixl_supported_devices())
+
+
+# TODO: merge with vllm.utils.network_utils.zmq_socket_ctx
+@contextlib.contextmanager
+def zmq_ctx(socket_type: Any, addr: str) -> Iterator[zmq.Socket]:
+    """Context manager for a ZMQ socket"""
+
+    if socket_type not in (zmq.ROUTER, zmq.REQ):
+        raise ValueError(f"Unexpected socket type: {socket_type}")
+
+    ctx: zmq.Context | None = None
+    try:
+        ctx = zmq.Context()  # type: ignore[attr-defined]
+        yield make_zmq_socket(
+            ctx=ctx, path=addr, socket_type=socket_type, bind=socket_type == zmq.ROUTER
+        )
+    finally:
+        if ctx is not None:
+            ctx.destroy(linger=0)
+
+
+def get_representative_spec_type(spec: KVCacheSpec) -> type[KVCacheSpec]:
+    if isinstance(spec, UniformTypeKVCacheSpecs):
+        # All inner specs are the same type; pick any.
+        inner = next(iter(spec.kv_cache_specs.values()))
+        return type(inner)
+    return type(spec)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
similarity index 53%
rename from vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
rename to vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
index a86a52a6a6fb..ea8b46c28f9c 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
@@ -1,18 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import contextlib
-import copy
+"""Worker-side logic for the NIXL connector."""
+
 import logging
 import os
 import queue
-import sys
 import threading
 import time
 import uuid
 from collections import defaultdict
 from collections.abc import Iterator
 from concurrent.futures import Future, ThreadPoolExecutor
-from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, cast
 
 import msgspec
@@ -20,991 +18,187 @@
 import torch
 import zmq
 
-from vllm import envs
-from vllm.config import VllmConfig
 from vllm.distributed.kv_transfer.kv_connector.utils import (
     BlockIds,
     EngineId,
-    TpKVTopology,
-    get_current_attn_backend,
+    EngineTransferInfo,
+    TransferTopology,
     get_current_attn_backends,
     kv_postprocess_blksize_and_layout_on_receive,
     kv_postprocess_blksize_on_receive,
     kv_postprocess_layout_on_receive,
-    yield_req_data,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1.base import (
-    CopyBlocksOp,
-    KVConnectorBase_V1,
-    KVConnectorHandshakeMetadata,
-    KVConnectorMetadata,
-    KVConnectorRole,
-    SupportsHMA,
+from vllm.distributed.kv_transfer.kv_connector.v1.base import CopyBlocksOp
+from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.metadata import (
+    GET_META_MSG,
+    NixlAgentMetadata,
+    NixlConnectorMetadata,
+    NixlHandshakePayload,
+    ReqId,
+    ReqMeta,
+    TransferHandle,
+    compute_nixl_compatibility_hash,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.stats import (
+    NixlKVConnectorStats,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.tp_mapping import (
+    ReadSpec,
+    TPMapping,
+    _is_attention_spec,
+    _is_ssm_spec,
+    compute_tp_mapping,
+)
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl.utils import (
+    _NIXL_SUPPORTED_DEVICE,
+    get_representative_spec_type,
+    zmq_ctx,
 )
-from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
-    KVConnectorPromMetrics,
-    KVConnectorStats,
-    PromMetric,
-    PromMetricT,
+from vllm.distributed.kv_transfer.kv_connector.v1.ssm_conv_transfer_utils import (
+    MambaConvSplitInfo,
+    derive_mamba_conv_split,
 )
+from vllm.distributed.nixl_utils import NixlWrapper, nixl_agent_config
 from vllm.distributed.parallel_state import (
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
-from vllm.forward_context import ForwardContext
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils.math_utils import cdiv
-from vllm.utils.network_utils import make_zmq_path, make_zmq_socket
-from vllm.v1.attention.backend import AttentionBackend, AttentionMetadata
+from vllm.utils.network_utils import make_zmq_path
 from vllm.v1.attention.backends.utils import get_kv_cache_layout
-from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import (
     FullAttentionSpec,
     MambaSpec,
-    SlidingWindowSpec,
     UniformTypeKVCacheSpecs,
 )
-from vllm.v1.metrics.utils import create_metric_per_engine
 from vllm.v1.worker.block_table import BlockTable
 from vllm.v1.worker.utils import select_common_block_size
 
 if TYPE_CHECKING:
-    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+    from vllm.config import VllmConfig
     from vllm.v1.kv_cache_interface import KVCacheConfig
-    from vllm.v1.request import Request
-
-TransferHandle = int
-ReqId = str
-
-#
-# NIXL Connector Version
-#
-# Increment this version whenever there is an incompatible change to:
-#   - NixlAgentMetadata schema
-#   - kv_transfer_params schema or semantics
-#   - NIXL transfer protocol or wire format
-#   - KV cache memory layout or block organization
-#   - Any other change that breaks P/D interoperability
-#
-# Version History:
-#   1: Initial version with compatibility checking
-#   2: Add remote_request_id to kv_transfer_params
-#
-NIXL_CONNECTOR_VERSION: int = 2
-
-GET_META_MSG = b"get_meta_msg"
 
 logger = init_logger(__name__)
 
-# Lazy import nixl_wrapper to avoid loading nixl_bindings if nixl is not used
-try:
-    if "UCX_RCACHE_MAX_UNRELEASED" not in os.environ:
-        # avoid a memory leak in UCX when using NIXL on some models
-        # see: https://github.com/vllm-project/vllm/issues/24264
-        if "nixl" in sys.modules or "rixl" in sys.modules:
-            logger.warning(
-                "NIXL was already imported, we can't reset UCX_RCACHE_MAX_UNRELEASED. "
-                "Please set it to '1024' manually."
-            )
-        else:
-            logger.info(
-                "Setting UCX_RCACHE_MAX_UNRELEASED to '1024' to avoid a rare "
-                "memory leak in UCX when using NIXL."
-            )
-            os.environ["UCX_RCACHE_MAX_UNRELEASED"] = "1024"
-
-    if not current_platform.is_rocm():
-        from nixl._api import nixl_agent as NixlWrapper
-        from nixl._bindings import nixlXferTelemetry
-    else:
-        from rixl._api import nixl_agent as NixlWrapper
-        from rixl._bindings import nixlXferTelemetry
-
-    logger.info("NIXL is available")
-except ImportError:
-    logger.warning("NIXL is not available")
-    NixlWrapper = None
-    nixlXferTelemetry = None
-
-
-try:
-    if not current_platform.is_rocm():
-        from nixl._api import nixl_agent_config
-    else:
-        from rixl._api import nixl_agent_config
-except ImportError:
-    nixl_agent_config = None
-    logger.warning("NIXL agent config is not available")
-
-# Supported platforms and types of kv transfer buffer.
-# {device: tuple of supported kv buffer types}
-_NIXL_SUPPORTED_DEVICE = {
-    "cuda": (
-        "cuda",
-        "cpu",
-    ),
-    "tpu": ("cpu",),
-    "xpu": (
-        "cpu",
-        "xpu",
-    ),
-    "cpu": ("cpu",),
-}
-# support for oot platform by providing mapping in current_platform
-_NIXL_SUPPORTED_DEVICE.update(current_platform.get_nixl_supported_devices())
-
-
-@dataclass
-class NixlAgentMetadata:
-    engine_id: str
-    agent_metadata: bytes
-    kv_caches_base_addr: list[int]
-    device_id: int
-    num_blocks: int
-    block_lens: list[int]
-    kv_cache_layout: str
-    block_size: int
-    ssm_sizes: tuple[int, int]
-
-
-@dataclass
-class NixlHandshakePayload(KVConnectorHandshakeMetadata):
-    """
-    Wrapper for NIXL handshake sent over the wire.
-
-    Enables two-phase decoding for graceful compatibility checking:
-    1. Decode NixlHandshakePayload to get compatibility_hash
-    2. Compute local hash and compare
-    3. Only if hashes match, decode agent_metadata_bytes
-
-    This prevents decoder errors when NixlAgentMetadata schema is
-    incompatible, allowing graceful failure with clear error message.
-    """
-
-    compatibility_hash: str
-    agent_metadata_bytes: bytes  # NixlAgentMetadata encoded
-
-
-def compute_nixl_compatibility_hash(
-    vllm_config: VllmConfig, attn_backend_name: str, cross_layers_blocks: bool
-) -> str:
-    """
-    Compute compatibility hash for NIXL KV transfer.
-
-    Hash only the factors that affect whether two NIXL instances can
-    successfully transfer KV cache data.
-
-    Factors included:
-    - vLLM version and NIXL connector version
-    - Model architecture (name, dtype, KV heads, layers)
-    - KV cache format (dtype, sliding window)
-    - Attention backend
-
-    Note: Factors like tensor_parallel_size, block_size, and kv_cache_layout
-    are validated at runtime in _validate_remote_agent_handshake and are not
-    included in this hash to support heterogeneous deployments.
-
-    Note - the set of factors are likely to evolve significantly over
-    time to be more or less permissive.
-
-    Returns:
-        SHA-256 hex digest
-    """
-    from vllm import __version__ as vllm_version
-    from vllm.config.utils import hash_factors
-
-    model_config = vllm_config.model_config
-    cache_config = vllm_config.cache_config
-    is_hma_enabled = not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
-
-    factors = {
-        # Version compatibility
-        "vllm_version": vllm_version,
-        "nixl_connector_version": NIXL_CONNECTOR_VERSION,
-        # Model architecture - affects KV cache shape
-        "model": model_config.model,
-        "dtype": str(model_config.dtype),
-        "num_kv_heads": model_config.get_total_num_kv_heads(),
-        "head_size": model_config.get_head_size(),
-        "num_hidden_layers": model_config.get_total_num_hidden_layers(),
-        # Attention backend and KV cache dtype affect memory layout
-        "attn_backend_name": attn_backend_name,
-        "cache_dtype": str(cache_config.cache_dtype),
-        "cross_layers_blocks": cross_layers_blocks,
-        "is_hma_enabled": is_hma_enabled,
-    }
-
-    compat_hash = hash_factors(factors)
-    logger.debug(
-        "NIXL compatibility hash: %s (model=%s, dtype=%s, num_kv_heads=%d, "
-        "cache_dtype=%s, attn_backend=%s)",
-        compat_hash,
-        factors["model"],
-        factors["dtype"],
-        factors["num_kv_heads"],
-        factors["cache_dtype"],
-        attn_backend_name,
-    )
-    return compat_hash
-
-
-@dataclass
-class RemoteMeta:
-    block_ids: BlockIds
-    host: str
-    port: int
-    engine_id: str
-    request_id: str
-
-
-@dataclass
-class ReqMeta:
-    local_block_ids: BlockIds
-    # To be used when logical block size does not match the kernel block size
-    local_physical_block_ids: BlockIds
-    tp_size: int
-    remote: RemoteMeta | None = None
-
-
-class NixlConnectorMetadata(KVConnectorMetadata):
-    def __init__(self):
-        self.reqs_to_recv: dict[ReqId, ReqMeta] = {}
-        self.reqs_to_save: dict[ReqId, ReqMeta] = {}
-        self.reqs_to_send: dict[ReqId, float] = {}
-        self.reqs_in_batch: set[ReqId] = set()
-        self.reqs_not_processed: set[ReqId] = set()
-
-    def _add_new_req(
-        self,
-        local_block_ids: BlockIds,
-        kv_transfer_params: dict[str, Any],
-    ) -> ReqMeta:
-        return ReqMeta(
-            local_block_ids=local_block_ids,
-            local_physical_block_ids=local_block_ids,
-            # P workers don't need to receive tp_size from proxy here.
-            tp_size=kv_transfer_params.get("tp_size", 1),
-        )
-
-    def add_new_req_to_save(
-        self,
-        request_id: ReqId,
-        local_block_ids: BlockIds,
-        kv_transfer_params: dict[str, Any],
-    ):
-        self.reqs_to_save[request_id] = self._add_new_req(
-            local_block_ids, kv_transfer_params
-        )
-
-    def add_new_req_to_recv(
-        self,
-        request_id: ReqId,
-        local_block_ids: BlockIds,
-        kv_transfer_params: dict[str, Any],
-    ):
-        req = self._add_new_req(local_block_ids, kv_transfer_params)
-        req.remote = RemoteMeta(
-            block_ids=kv_transfer_params["remote_block_ids"],
-            engine_id=kv_transfer_params["remote_engine_id"],
-            request_id=kv_transfer_params["remote_request_id"],
-            host=kv_transfer_params["remote_host"],
-            port=kv_transfer_params["remote_port"],
-        )
-        self.reqs_to_recv[request_id] = req
-
-
-class NixlConnector(KVConnectorBase_V1, SupportsHMA):
-    @property
-    def prefer_cross_layer_blocks(self) -> bool:
-        if any(
-            [
-                isinstance(group.kv_cache_spec, MambaSpec)
-                for group in self.kv_cache_config.kv_cache_groups
-            ]
-        ):
-            # Hybrid SSM models do not yet support cross-layer layout
-            return False
-
-        backend = get_current_attn_backend(self._vllm_config)
-        if backend.get_name() not in (
-            "FLASH_ATTN",
-            "FLASHINFER",
-        ):
-            return False
-
-        # For now there is no benefit to run cross layers when backend
-        # does not support on HND
-        if get_kv_cache_layout() != "HND":
-            return False
-
-        extra_config = self.kv_transfer_config.kv_connector_extra_config
-        return (
-            str(extra_config.get("enable_cross_layers_blocks", "False")).lower()
-            == "true"
-        )
-
-    def __init__(
-        self,
-        vllm_config: VllmConfig,
-        role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig",
-    ):
-        super().__init__(vllm_config, role, kv_cache_config)
-        assert vllm_config.kv_transfer_config is not None
-        assert vllm_config.kv_transfer_config.engine_id is not None
-        self.kv_cache_config = kv_cache_config
-        self.engine_id: EngineId = vllm_config.kv_transfer_config.engine_id
-        self.kv_transfer_config = vllm_config.kv_transfer_config
-        if role == KVConnectorRole.SCHEDULER:
-            self.connector_scheduler: NixlConnectorScheduler | None = (
-                NixlConnectorScheduler(vllm_config, self.engine_id, kv_cache_config)
-            )
-            self.connector_worker: NixlConnectorWorker | None = None
-        elif role == KVConnectorRole.WORKER:
-            self.connector_scheduler = None
-            self.connector_worker = NixlConnectorWorker(
-                vllm_config, self.engine_id, kv_cache_config
-            )
-
-    ############################################################
-    # Class Methods
-    ############################################################
-    @classmethod
-    def get_required_kvcache_layout(cls, vllm_config: VllmConfig):
-        if vllm_config.model_config is None:
-            logger.warning_once(
-                "Unable to detect current VLLM config. "
-                "Fallback to default kv cache layout."
-            )
-            return None
-        use_mla = vllm_config.model_config.use_mla
-        if use_mla:
-            # return None when we have mla
-            # as the layout should not matter in that case,
-            # which fallback to the default behavior.
-            return None
-        logger.info_once(
-            "NixlConnector setting KV cache layout to HND for better xfer performance."
-        )
-        return "HND"
-
-    ############################################################
-    # Scheduler Side Methods
-    ############################################################
-
-    def get_num_new_matched_tokens(
-        self, request: "Request", num_computed_tokens: int
-    ) -> tuple[int | None, bool]:
-        assert self.connector_scheduler is not None
-        return self.connector_scheduler.get_num_new_matched_tokens(
-            request, num_computed_tokens
-        )
-
-    def update_state_after_alloc(
-        self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int
-    ):
-        assert self.connector_scheduler is not None
-        return self.connector_scheduler.update_state_after_alloc(
-            request, blocks, num_external_tokens
-        )
-
-    def build_connector_meta(
-        self,
-        scheduler_output: SchedulerOutput,
-    ) -> KVConnectorMetadata:
-        assert self.connector_scheduler is not None
-        return self.connector_scheduler.build_connector_meta(scheduler_output)
-
-    def request_finished(
-        self,
-        request: "Request",
-        block_ids: list[int],
-    ) -> tuple[bool, dict[str, Any] | None]:
-        assert self.connector_scheduler is not None
-        return self.connector_scheduler.request_finished(request, (block_ids,))
-
-    def request_finished_all_groups(
-        self,
-        request: "Request",
-        block_ids: tuple[list[int], ...],
-    ) -> tuple[bool, dict[str, Any] | None]:
-        assert self.connector_scheduler is not None
-        return self.connector_scheduler.request_finished(request, block_ids)
-
-    def set_xfer_handshake_metadata(
-        self, metadata: dict[int, KVConnectorHandshakeMetadata]
-    ) -> None:
-        """
-        Set the KV connector handshake metadata for this connector.
-
-        Args:
-            metadata (dict): the handshake metadata to set.
-        """
-        assert self.connector_scheduler is not None
-        self.connector_scheduler.set_xfer_handshake_metadata(metadata)
-
-    ############################################################
-    # Worker Side Methods
-    ############################################################
-    def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
-        assert self.connector_worker is not None
-        self.connector_worker.register_kv_caches(kv_caches)
-
-    def register_cross_layers_kv_cache(
-        self, kv_cache: torch.Tensor, attn_backend: type[AttentionBackend]
-    ):
-        assert self.connector_worker is not None
-        self.connector_worker.register_cross_layers_kv_caches(kv_cache)
-
-    def set_host_xfer_buffer_ops(self, copy_operation: CopyBlocksOp):
-        assert self.connector_worker is not None
-        self.connector_worker.set_host_xfer_buffer_ops(copy_operation)
-
-    def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
-        """Get the finished recving and sending requests."""
-        assert self.connector_worker is not None
-        return self.connector_worker.get_finished()
-
-    def get_block_ids_with_load_errors(self) -> set[int]:
-        """Get block IDs that failed to load via NIXL."""
-        assert self.connector_worker is not None
-        return self.connector_worker.get_block_ids_with_load_errors()
-
-    def get_kv_connector_stats(self) -> KVConnectorStats | None:
-        if self.connector_worker is None:
-            return None
-        return self.connector_worker.get_kv_connector_stats()
-
-    @classmethod
-    def build_kv_connector_stats(
-        cls, data: dict[str, Any] | None = None
-    ) -> KVConnectorStats | None:
-        return (
-            NixlKVConnectorStats(data=data)
-            if data is not None
-            else NixlKVConnectorStats()
-        )
-
-    @classmethod
-    def build_prom_metrics(
-        cls,
-        vllm_config: VllmConfig,
-        metric_types: dict[type[PromMetric], type[PromMetricT]],
-        labelnames: list[str],
-        per_engine_labelvalues: dict[int, list[object]],
-    ) -> KVConnectorPromMetrics:
-        return NixlPromMetrics(
-            vllm_config, metric_types, labelnames, per_engine_labelvalues
-        )
-
-    def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
-        assert self.connector_worker is not None
-        assert isinstance(self._connector_metadata, NixlConnectorMetadata)
-        self.connector_worker.start_load_kv(self._connector_metadata)
 
-    def wait_for_layer_load(self, layer_name: str) -> None:
-        """NixlConnector does not do layerwise saving."""
-        pass
+class NixlConnectorWorker:
+    """Implementation of Worker side methods"""
 
-    def save_kv_layer(
+    def _compute_desc_ids(
         self,
-        layer_name: str,
-        kv_layer: torch.Tensor,
-        attn_metadata: AttentionMetadata,
-        **kwargs,
-    ) -> None:
-        """NixlConnector does not save explicitly."""
-        pass
-
-    def wait_for_save(self):
-        assert self.connector_worker is not None
-        assert isinstance(self._connector_metadata, NixlConnectorMetadata)
-        if self.connector_worker.use_host_buffer and self.connector_worker.copy_blocks:
-            self.connector_worker.save_kv_to_host(self._connector_metadata)
-
-    def shutdown(self):
-        if self.connector_worker is not None:
-            self.connector_worker.shutdown()
-        if self.connector_scheduler is not None:
-            self.connector_scheduler.shutdown()
-
-    def get_handshake_metadata(self) -> KVConnectorHandshakeMetadata | None:
-        """
-        Get the KVConnector handshake metadata for this connector.
-        This metadata is used for out-of-band connector handshake
-        between P/D workers.
-
-        Returns:
-            KVConnectorHandshakeMetadata: the handshake metadata.
-            None if no handshake metadata is available.
-        """
-        assert self.connector_worker is not None
-        return self.connector_worker.xfer_handshake_metadata
-
-
-class NixlConnectorScheduler:
-    """Implementation of Scheduler side methods"""
-
-    def __init__(
-        self, vllm_config: VllmConfig, engine_id: str, kv_cache_config: "KVCacheConfig"
-    ):
-        self.vllm_config = vllm_config
-        self.block_size = vllm_config.cache_config.block_size
-        self.engine_id: EngineId = engine_id
-        self.kv_cache_config = kv_cache_config
-        self.side_channel_host = envs.VLLM_NIXL_SIDE_CHANNEL_HOST
-        self.side_channel_port = (
-            envs.VLLM_NIXL_SIDE_CHANNEL_PORT
-            + vllm_config.parallel_config.data_parallel_index
-        )
-        assert vllm_config.kv_transfer_config is not None
-        if current_platform.device_type == "cpu":
-            self.use_host_buffer = False
-        else:
-            self.use_host_buffer = (
-                vllm_config.kv_transfer_config.kv_buffer_device == "cpu"
-            )
-        self._is_hma_required = (
-            not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
-            # Also handle unlikely SW-only model case instead of checking num_groups>1.
-            and any(
-                not isinstance(g.kv_cache_spec, FullAttentionSpec)
-                for g in kv_cache_config.kv_cache_groups
-            )
-        )
-        self._has_mamba = any(
-            isinstance(g.kv_cache_spec, MambaSpec)
-            for g in kv_cache_config.kv_cache_groups
-        )
-
-        logger.info("Initializing NIXL Scheduler %s", engine_id)
-        if vllm_config.scheduler_config.disable_hybrid_kv_cache_manager:
-            logger.info("Hybrid Memory Allocator is enabled with NIXL")
-
-        # Background thread for handling new handshake requests.
-        self._nixl_handshake_listener_t: threading.Thread | None = None
-        self._stop_event = threading.Event()
-
-        # Requests that need to start recv/send.
-        # New requests are added by update_state_after_alloc in
-        # the scheduler. Used to make metadata passed to Worker.
-        self._reqs_need_recv: dict[ReqId, tuple[Request, BlockIds]] = {}
-        self._reqs_need_save: dict[ReqId, Request] = {}
-        # Reqs to send and their expiration time
-        self._reqs_need_send: dict[ReqId, float] = {}
-        self._reqs_in_batch: set[ReqId] = set()
-        # Reqs to remove from processed set because they're not to send after
-        # remote prefill or aborted.
-        self._reqs_not_processed: set[ReqId] = set()
-
-        # Gather Sliding Window sizes for each kv cache group (if any) in number of
-        # blocks per KV cache group. This is used to clip the local attention window.
-        sw_sizes_tokens: list[tuple[int, int]] = [
-            (g.kv_cache_spec.sliding_window, g.kv_cache_spec.block_size)
-            if isinstance(g.kv_cache_spec, SlidingWindowSpec)
-            else (0, self.block_size)
-            for g in kv_cache_config.kv_cache_groups
-        ]
-        # cdiv(n_tokens, block_size) gives blocks/window; add 1 to conservatively
-        # account for boundary overlap eg window isn't fully aligned with blocks.
-        self.blocks_per_sw = [
-            cdiv(n_tokens, block_size) + 1 if n_tokens else 0
-            for n_tokens, block_size in sw_sizes_tokens
-        ]
-
-    def shutdown(self):
-        self._stop_event.set()
-        if self._nixl_handshake_listener_t is not None:
-            self._nixl_handshake_listener_t.join()
-            self._nixl_handshake_listener_t = None
-
-    def get_sw_clipped_blocks(self, block_ids: BlockIds) -> BlockIds:
-        """
-        Clip the number of blocks to the sliding window size for each kv cache group
-        that employs SWA.
-        This is necessary because the KV Cache manager initially allocates blocks for
-        the entire sequence length, and successively cleans up blocks that are outside
-        the window prior to the `request_finished_all_groups` hook.
-        """
-        if len(block_ids) == 0 or not self._is_hma_required:
-            # No blocks to clip eg Full prefix cache hit or not a hybrid model.
-            return block_ids
-        # NOTE (NickLucche) This logic is currently handled at the connector level
-        # because offloading connectors might want to receive the whole sequence even
-        # for SWA groups. We will abstract this logic once the interface is more stable
-        assert len(block_ids) == len(self.blocks_per_sw), (
-            "Number of KV cache groups must match"
-        )
-        # For non-SWA groups, blocks_per_sw is 0 so we return all block_ids unchanged
-        return tuple(
-            [
-                blocks[-self.blocks_per_sw[i] :]
-                if self.blocks_per_sw[i] > 0
-                else blocks
-                for i, blocks in enumerate(block_ids)
-            ]
-        )
+        block_ids: BlockIds,
+        dst_num_blocks: int,
+        block_size_ratio: float | None,
+        physical_blocks_per_logical: int,
+    ) -> np.ndarray:
+        """Compute NIXL descriptor IDs for given block IDs."""
+        num_fa_regions = self.num_regions
+        num_ssm_regions = len(self.block_len_per_layer) * 4 if self._has_mamba else 0
 
-    def set_xfer_handshake_metadata(
-        self, metadata: dict[int, KVConnectorHandshakeMetadata]
-    ) -> None:
-        """
-        Set the KV connector handshake metadata for this connector.
+        num_blocks = dst_num_blocks
+        if block_size_ratio is not None:
+            num_blocks = int(num_blocks * block_size_ratio)
+        num_fa_descs = num_fa_regions * num_blocks
+
+        # All-attention fast path: single vectorized broadcast.
+        if num_ssm_regions == 0:
+            # NOTE (NickLucche) With HMA, every kv group has the same number of layers
+            # and layers from different groups share the same kv tensor.
+            # eg block_ids=[[1, 2], [3]]->blocks [1, 2] need to be
+            # read across all regions, same for [3], but group0-group1 blocks will
+            # always differ (different areas). Therefore we can just flatten the
+            # block_ids and compute the descs ids for all groups at once.
+            block_arr = np.concatenate(block_ids)[None, :]
+            region_ids = np.arange(num_fa_regions)[:, None]
+            return (region_ids * num_blocks + block_arr).flatten()
 
-        Args:
-            metadata (dict): the handshake metadata to set.
-        """
-        encoded_data: dict[int, bytes] = {}
-        encoder = msgspec.msgpack.Encoder()
-        for tp_rank, rank_metadata in metadata.items():
-            if not isinstance(rank_metadata, NixlHandshakePayload):
-                raise ValueError(
-                    "NixlConnectorScheduler expects NixlHandshakePayload for "
-                    "handshake metadata."
+        # Compute desc ids per group using the right stride: FA descs have
+        # num_blocks entries per region (kernel granularity), SSM descs have
+        # logical_blocks entries per region (no kernel splitting).
+        logical_blocks = num_blocks // physical_blocks_per_logical
+        all_descs: list[np.ndarray] = []
+        for i, group in enumerate(block_ids):
+            group_arr = np.asarray(group)
+            if _is_attention_spec(self._group_spec_types[i]):
+                fa_region_ids = np.arange(num_fa_regions)[:, None]
+                all_descs.append(
+                    (fa_region_ids * num_blocks + group_arr[None, :]).flatten()
                 )
-            encoded_data[tp_rank] = encoder.encode(rank_metadata)
-            logger.debug(
-                "Tp rank %d: encoded NixlHandshakePayload size: %s bytes",
-                tp_rank,
-                str(len(encoded_data[tp_rank])),
-            )
-
-        # Only start the listener when we have metadata to serve.
-        if self._nixl_handshake_listener_t is None:
-            ready_event = threading.Event()
-            self._nixl_handshake_listener_t = threading.Thread(
-                target=self._nixl_handshake_listener,
-                args=(
-                    encoded_data,
-                    ready_event,
-                    self._stop_event,
-                    self.side_channel_port,
-                ),
-                daemon=True,
-                name="nixl_handshake_listener",
-            )
-            self._nixl_handshake_listener_t.start()
-            ready_event.wait()  # Wait for listener ZMQ socket to be ready.
-
-    @staticmethod
-    def _nixl_handshake_listener(
-        encoded_data: dict[int, Any],
-        ready_event: threading.Event,
-        stop_event: threading.Event,
-        port: int,
-    ):
-        """Background thread for getting new NIXL handshakes."""
-        # NOTE(rob): this is a simple implementation. We will move
-        # to a better approach via HTTP endpoint soon.
-
-        # Listen for new requests for metadata.
-        host = envs.VLLM_NIXL_SIDE_CHANNEL_HOST
-        path = make_zmq_path("tcp", host, port)
-        logger.debug("Starting listening on path: %s", path)
-        with zmq_ctx(zmq.ROUTER, path) as sock:
-            sock.setsockopt(zmq.RCVTIMEO, 1000)
-            ready_event.set()
-            while True:
-                try:
-                    identity, _, msg = sock.recv_multipart()
-                except zmq.Again:
-                    if stop_event.is_set():
-                        break
-                    continue
-                # Decode the message which contains (GET_META_MSG, rank)
-                msg, target_tp_rank = msgspec.msgpack.decode(msg)
-                logger.debug(
-                    "Received message for tp rank %s",
-                    target_tp_rank,
+            elif _is_ssm_spec(self._group_spec_types[i]):
+                # NOTE (NickLucche) SSM and Attention block regions can
+                # be exchanged arbitrarily by manager.  Therefore, descs
+                # are laid out as:
+                #   [descs_fa (all regions) | descs_ssm (all regions)].
+                # num_fa_descs offset must be computed per-engine since
+                # P and D can have different num_blocks (and thus
+                # different FA desc counts).
+                ssm_region_ids = np.arange(num_ssm_regions)[:, None]
+                all_descs.append(
+                    (
+                        ssm_region_ids * logical_blocks
+                        + group_arr[None, :]
+                        + num_fa_descs
+                    ).flatten()
                 )
-                if msg != GET_META_MSG:
-                    logger.warning("Connection listener got unexpected message %s", msg)
-                sock.send_multipart((identity, b"", encoded_data[target_tp_rank]))
-
-    def _mamba_prefill_token_count(self, num_prompt_tokens: int) -> int:
-        """D-side only. Returns N-1 for Mamba models since the decoder
-        always recomputes the last token and must start from h(N-1)."""
-        if self._has_mamba and num_prompt_tokens > 1:
-            return num_prompt_tokens - 1
-        return num_prompt_tokens
-
-    def _truncate_mamba_request_for_prefill(self, request: "Request") -> None:
-        """P-side only: drop the last prompt token so the prefiller computes
-        h(N-1) instead of h(N). The decoder recomputes the last token to
-        derive h(N) correctly.
-
-        Guarded by ``_p_side_truncated`` to avoid repeated truncation if the
-        request is preempted and rescheduled."""
-        params = request.kv_transfer_params
-        if (
-            params is not None
-            # Guard against repeated truncation after preemption/reschedule.
-            and not params.get("_p_side_truncated")
-            and request.num_prompt_tokens > 1
-        ):
-            if request.prompt_token_ids is not None:
-                request.prompt_token_ids.pop()
-            elif request.prompt_embeds is not None:
-                request.prompt_embeds = request.prompt_embeds[:-1]
-            else:
-                return
-
-            request._all_token_ids.pop()
-            request.num_prompt_tokens -= 1
-            request.max_tokens = 1
-            params["_p_side_truncated"] = True
-
-    def get_num_new_matched_tokens(
-        self, request: "Request", num_computed_tokens: int
-    ) -> tuple[int, bool]:
-        """
-        For remote prefill, pull all prompt blocks from remote
-        asynchronously relative to engine execution.
-
-        Args:
-            request (Request): the request object.
-            num_computed_tokens (int): the number of locally
-                computed tokens for this request
-        Returns:
-            * the number of tokens that can be loaded from the
-              external KV cache beyond what is already computed.
-            * true if the external KV cache tokens will be loaded
-              asynchronously (between scheduler steps).
-        """
-
-        params = request.kv_transfer_params
-        logger.debug(
-            "NIXLConnector get_num_new_matched_tokens: "
-            "num_computed_tokens=%s, kv_transfer_params=%s",
-            num_computed_tokens,
-            params,
-        )
-
-        if params is not None and params.get("do_remote_prefill"):
-            # Remote prefill: get all prompt blocks from remote.
-            token_ids = request.prompt_token_ids or []
-            actual = self._mamba_prefill_token_count(len(token_ids))
-            count = actual - num_computed_tokens
-            if count > 0:
-                return count, True
-
-        if params is not None and params.get("do_remote_decode") and self._has_mamba:
-            self._truncate_mamba_request_for_prefill(request)
-
-        # No remote prefill for this request.
-        return 0, False
-
-    def update_state_after_alloc(
-        self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int
-    ):
-        params = request.kv_transfer_params
-        logger.debug(
-            "NIXLConnector update_state_after_alloc: "
-            "num_external_tokens=%s, kv_transfer_params=%s",
-            num_external_tokens,
-            params,
-        )
-
-        if not params:
-            return
-
-        if params.get("do_remote_decode"):
-            self._reqs_in_batch.add(request.request_id)
-        if self.use_host_buffer and params.get("do_remote_decode"):
-            # NOTE: when accelerator is not directly supported by Nixl,
-            # prefilled blocks need to be saved to host memory before transfer.
-            self._reqs_need_save[request.request_id] = request
-        elif params.get("do_remote_prefill"):
-            if params.get("remote_block_ids"):
-                if all(
-                    p in params
-                    for p in (
-                        "remote_engine_id",
-                        "remote_request_id",
-                        "remote_host",
-                        "remote_port",
-                    )
-                ):
-                    # If remote_blocks and num_external_tokens = 0, we have
-                    # a full prefix cache hit on the D worker. We need to call
-                    # send_notif in _read_blocks to free the memory on the P.
-
-                    unhashed_local_block_ids: BlockIds = (
-                        blocks.get_unhashed_block_ids_all_groups()
-                        if num_external_tokens > 0
-                        else ()
-                    )
-                    local_block_ids = self.get_sw_clipped_blocks(
-                        unhashed_local_block_ids
-                    )
-
-                    # Get unhashed blocks to pull from remote. Mind that a full prefix
-                    # cache hit is indicated with an empty list.
-                    self._reqs_need_recv[request.request_id] = (
-                        request,
-                        local_block_ids,
-                    )
-
-                else:
-                    logger.warning(
-                        "Got invalid KVTransferParams: %s. This "
-                        "request will not utilize KVTransfer",
-                        params,
-                    )
             else:
-                assert num_external_tokens == 0
-            # Only trigger 1 KV transfer per request.
-            params["do_remote_prefill"] = False
-
-    def _build_save_meta(
-        self,
-        meta: NixlConnectorMetadata,
-        scheduler_output: SchedulerOutput,
-    ) -> None:
-        # only called when use_host_buffer is True to build the save metadata
-
-        # NOTE: For the prefill side, there might be a chance that an early added
-        # request is a chunked prefill, so we need to check if new blocks are added
-        for req_id, new_block_id_groups, _ in yield_req_data(scheduler_output):
-            req_to_save = self._reqs_need_save.get(req_id)
-            if req_to_save is None or new_block_id_groups is None:
-                continue
-            req = req_to_save
-
-            assert req.kv_transfer_params is not None
-            clipped_block_id_groups = self.get_sw_clipped_blocks(new_block_id_groups)
-            meta.add_new_req_to_save(
-                request_id=req_id,
-                local_block_ids=clipped_block_id_groups,
-                kv_transfer_params=req.kv_transfer_params,
-            )
-            assert scheduler_output.num_scheduled_tokens is not None
-            num_scheduled_tokens = scheduler_output.num_scheduled_tokens[req_id]
-            is_partial = (
-                req.num_computed_tokens + num_scheduled_tokens
-            ) < req.num_prompt_tokens
-            if not is_partial:
-                # For non-partial prefills, once new req_meta is scheduled, it
-                # can be removed from _reqs_need_save.
-                # For partial prefill case, we will retain the request in
-                # _reqs_need_save until all blocks are scheduled with req_meta.
-                # Therefore, only pop if `not is_partial`.
-                self._reqs_need_save.pop(req_id)
-
-    def build_connector_meta(
-        self,
-        scheduler_output: SchedulerOutput,
-    ) -> KVConnectorMetadata:
-        meta = NixlConnectorMetadata()
-
-        # Loop through scheduled reqs and convert to ReqMeta.
-        for req_id, (req, block_ids) in self._reqs_need_recv.items():
-            assert req.kv_transfer_params is not None
-            meta.add_new_req_to_recv(
-                request_id=req_id,
-                local_block_ids=block_ids,
-                kv_transfer_params=req.kv_transfer_params,
-            )
-
-        if self.use_host_buffer:
-            self._build_save_meta(meta, scheduler_output)
-
-        meta.reqs_to_send = self._reqs_need_send
-        meta.reqs_in_batch = self._reqs_in_batch
-        meta.reqs_not_processed = self._reqs_not_processed
-
-        # Clear the list once workers start the transfers
-        self._reqs_need_recv.clear()
-        self._reqs_in_batch = set()
-        self._reqs_not_processed = set()
-        self._reqs_need_send = {}
+                raise ValueError(
+                    f"Unknown spec type {self._group_spec_types[i]} at index {i}"
+                )
 
-        return meta
+        return np.concatenate(all_descs)
 
-    def request_finished(
+    def _build_local_splits_from_plan(
         self,
-        request: "Request",
-        block_ids: BlockIds,
-    ) -> tuple[bool, dict[str, Any] | None]:
-        """
-        Once a request is finished, determine whether request blocks
-        should be freed now or will be sent asynchronously and freed later.
+        plan: TPMapping,
+        src_blocks_data: list[tuple[int, int, int]],
+        num_fa_descs: int,
+    ) -> Iterator[list[tuple[int, int, int]]]:
+        """Build split handle data for P_TP > D_TP scenario.
+
+        num_fa_descs is the boundary between FA and SSM descriptors.
+        Split counts are derived from source_ranks_per_group lengths.
+        FA uses rank_to_attention_slot for the slot offset;
+        SSM uses the rank's positional index.
         """
-        from vllm.v1.request import RequestStatus
+        fa_idx = next(
+            i for i, t in enumerate(self._group_spec_types) if _is_attention_spec(t)
+        )
+        fa_num_splits = len(plan.source_ranks_per_group[fa_idx])
 
-        params = request.kv_transfer_params
-        logger.debug(
-            "NIXLConnector request_finished(%s), request_status=%s, "
-            "kv_transfer_params=%s",
-            request.request_id,
-            request.status,
-            params,
+        has_ssm_descs = num_fa_descs < len(src_blocks_data)
+        ssm_idx = next(
+            (i for i, t in enumerate(self._group_spec_types) if _is_ssm_spec(t)),
+            None,
         )
-        if not params:
-            return False, None
-
-        if params.get("do_remote_prefill"):
-            # If do_remote_prefill is still True when the request is finished,
-            # update_state_after_alloc must not have been called (the request
-            # must have been aborted before it was scheduled).
-            # To avoid stranding the prefill blocks in the prefill instance,
-            # we must add empty block_ids to _reqs_need_recv so that our
-            # worker side will notify and free blocks in the prefill instance.
-            self._reqs_need_recv[request.request_id] = (request, [])
-            params["do_remote_prefill"] = False
-            return False, None
-
-        if not params.get("do_remote_decode"):
-            return False, None
-        if request.status != RequestStatus.FINISHED_LENGTH_CAPPED:
-            # Also include the case of a P/D Prefill request with immediate
-            # block free (eg abort). Stop tracking this request.
-            self._reqs_not_processed.add(request.request_id)
-            # Clear _reqs_need_save if a request is aborted as partial prefill.
-            self._reqs_need_save.pop(request.request_id, None)
-            return False, None
-
-        # TODO: check whether block_ids actually ever be 0. If not we could
-        # remove the conditional below
-        delay_free_blocks = any(len(group) > 0 for group in block_ids)
-
-        if delay_free_blocks:
-            # Prefill request on remote. It will be read from D upon completion
-            logger.debug(
-                "NIXLConnector request_finished(%s) waiting for %d seconds "
-                "for remote decode to fetch blocks",
-                request.request_id,
-                envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT,
-            )
-            self._reqs_need_send[request.request_id] = (
-                time.perf_counter() + envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT
-            )
-            # NOTE HMA will "mark" empty/null blocks in groups with 0s (eg SWA ones),
-            # trimming down after allocating for the whole sequence length. Empty
-            # blocks are always at the start of the list.
-            # Here we "unpad" blocks to send the actual remote blocks to be read.
-            block_ids = self.get_sw_clipped_blocks(block_ids)
-
-        return delay_free_blocks, dict(
-            do_remote_prefill=True,
-            do_remote_decode=False,
-            remote_block_ids=block_ids,
-            remote_engine_id=self.engine_id,
-            remote_request_id=request.request_id,
-            remote_host=self.side_channel_host,
-            remote_port=self.side_channel_port,
-            tp_size=self.vllm_config.parallel_config.tensor_parallel_size,
+        ssm_num_splits = (
+            len(plan.source_ranks_per_group[ssm_idx])
+            if has_ssm_descs and ssm_idx is not None
+            else 0
         )
 
+        for p_idx, p_rank in enumerate(plan.all_source_ranks):
+            fa_slot = plan.rank_to_attention_slot.get(p_rank, 0)
 
-class NixlConnectorWorker:
-    """Implementation of Worker side methods"""
+            handle: list[tuple[int, int, int]] = []
+            for j, (addr, local_len, dev) in enumerate(src_blocks_data):
+                if j < num_fa_descs:
+                    chunk = local_len // fa_num_splits
+                    handle.append((addr + fa_slot * chunk, chunk, dev))
+                else:
+                    chunk = local_len // ssm_num_splits
+                    handle.append((addr + p_idx * chunk, chunk, dev))
+            yield handle
 
     def __init__(
-        self, vllm_config: VllmConfig, engine_id: str, kv_cache_config: "KVCacheConfig"
+        self,
+        vllm_config: "VllmConfig",
+        engine_id: str,
+        kv_cache_config: "KVCacheConfig",
     ):
-        if NixlWrapper is None:
+        nixl_wrapper_cls = NixlWrapper
+        if nixl_wrapper_cls is None:
             logger.error("NIXL is not available")
             raise RuntimeError("NIXL is not available")
         logger.info("Initializing NIXL wrapper")
@@ -1022,6 +216,12 @@ def __init__(
         self.nixl_backends = vllm_config.kv_transfer_config.get_from_extra_config(
             "backends", ["UCX"]
         )
+        kv_lease_duration: int = vllm_config.kv_transfer_config.get_from_extra_config(
+            "kv_lease_duration", 30
+        )
+        # NOTE (NickLucche): For now we use a hardcoded value for a simpler interface.
+        self._lease_extension = kv_lease_duration * 2 // 3
+
         self._is_hma_required = (
             not vllm_config.scheduler_config.disable_hybrid_kv_cache_manager
             and any(
@@ -1037,32 +237,36 @@ def __init__(
         }
         self.hma_group_size = len(kv_cache_config.kv_cache_tensors)
 
-        # Mamba metadata
-        self._is_mamba_group = [
-            isinstance(group.kv_cache_spec, MambaSpec)
-            for group in kv_cache_config.kv_cache_groups
-        ]
+        # ---- Model state (derived from model config) ----
         mamba_ssm_size = (0, 0)
-        self._has_mamba = any(self._is_mamba_group)
+        # Conv state sub-projection decomposition (None when no Mamba).
+        # The 3-read transfer requires DS (dim, state_len) conv layout so
+        # that x/B/C sub-projections are contiguous in memory.
+        self._conv_decomp: MambaConvSplitInfo | None = None
+        self._has_mamba = any(
+            isinstance(g.kv_cache_spec, MambaSpec)
+            for g in kv_cache_config.kv_cache_groups
+        )
         if self._has_mamba:
             assert self._is_hma_required
+            from vllm.model_executor.layers.mamba.mamba_utils import (
+                is_conv_state_dim_first,
+            )
+
+            assert is_conv_state_dim_first(), (
+                "3-read Mamba conv transfer requires DS conv state layout. "
+                "Set VLLM_SSM_CONV_STATE_LAYOUT=DS"
+            )
             mamba_spec = next(
                 spec
                 for spec in self._layer_specs.values()
                 if isinstance(spec, MambaSpec)
             )
-            conv_nbytes, ssm_nbytes = (
-                torch.tensor([], dtype=mamba_spec.dtypes[0]).element_size(),  # type: ignore[misc]
-                torch.tensor([], dtype=mamba_spec.dtypes[1]).element_size(),  # type: ignore[misc]
-            )
-            conv_shape, ssm_shape = (
-                torch.Size(mamba_spec.shapes[0]),
-                torch.Size(mamba_spec.shapes[1]),
-            )
-            mamba_ssm_size = (
-                conv_shape.numel() * conv_nbytes,
-                ssm_shape.numel() * ssm_nbytes,
+            self._conv_decomp = derive_mamba_conv_split(
+                mamba_spec,
+                vllm_config.parallel_config.tensor_parallel_size,
             )
+            mamba_ssm_size = self._conv_decomp.ssm_sizes
         self._mamba_ssm_size = mamba_ssm_size
 
         # Agent.
@@ -1086,7 +290,7 @@ def __init__(
                 else nixl_agent_config(num_threads=num_threads, capture_telemetry=True)
             )
 
-        self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config)
+        self.nixl_wrapper = nixl_wrapper_cls(str(uuid.uuid4()), config)
         # Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}.
         self._remote_agents: dict[EngineId, dict[int, str]] = defaultdict(dict)
 
@@ -1097,6 +301,7 @@ def __init__(
 
         self.num_blocks = kv_cache_config.num_blocks
         self.enable_permute_local_kv = False
+        self.enable_heterogeneous_attn_post_process = False
 
         # KV Caches and nixl tracking data.
         self.device_type = current_platform.device_type
@@ -1183,10 +388,12 @@ def __init__(
         # Set of requests that have been part of a batch, regardless of status.
         self._reqs_to_process: set[ReqId] = set()
 
-        # invalid blocks from failed NIXL operations
-        self._invalid_block_ids: set[int] = set()
+        # Invalid blocks from failed NIXL operations (thread-safe queue of block ids)
+        self._invalid_block_ids: queue.Queue[set[int]] = queue.Queue()
         # requests that skipped transfer (handshake or transfer failures)
-        self._failed_recv_reqs: set[ReqId] = set()
+        # Uses Queue for thread-safe cross-thread coordination with the
+        # background handshake thread, matching the _ready_requests pattern.
+        self._failed_recv_reqs: queue.Queue[ReqId] = queue.Queue()
 
         # Handshake metadata of this worker for NIXL transfers.
         self.xfer_handshake_metadata: NixlHandshakePayload | None = None
@@ -1213,15 +420,16 @@ def __init__(
 
         self.kv_cache_layout = get_kv_cache_layout()
         self.host_buffer_kv_cache_layout = self.kv_cache_layout
-        logger.info("Detected attention backend %s", self.backend_name)
+        logger.info(
+            "Detected attention backend(s) %s",
+            [backend.get_name() for backend in self.attn_backends],
+        )
         logger.info("Detected kv cache layout %s", self.kv_cache_layout)
 
         # lazy initialized in register_kv_caches
         self.compat_hash: str | None = None
-        self.kv_topo: TpKVTopology | None = None
+        self.transfer_topo: TransferTopology | None = None
 
-        self._tp_size: dict[EngineId, int] = {self.engine_id: self.world_size}
-        self._block_size: dict[EngineId, int] = {self.engine_id: self.block_size}
         # With heterogeneous TP, P must wait for all assigned D TP workers to
         # finish reading before safely freeing the blocks.
         self.consumer_notification_counts_by_req = defaultdict[ReqId, int](int)
@@ -1230,6 +438,15 @@ def __init__(
         self._physical_blocks_per_logical_kv_block = 1
         self._sync_block_size_with_kernel()
 
+        # Unwrap UniformTypeKVCacheSpecs to get the representative spec type
+        self._group_spec_types = tuple(
+            get_representative_spec_type(g.kv_cache_spec)
+            for g in self.kv_cache_config.kv_cache_groups
+        )
+
+        # Per-engine TP mappings. Generated during handshake.
+        self.tp_mappings: dict[EngineId, TPMapping] = {}
+
         self.enforce_compat_hash = self.kv_transfer_config.get_from_extra_config(
             "enforce_handshake_compat", True
         )
@@ -1251,7 +468,6 @@ def _sync_block_size_with_kernel(self) -> None:
                 self.block_size // kernel_block_size
             )
             self.block_size = kernel_block_size
-            self._block_size[self.engine_id] = kernel_block_size
             self.num_blocks *= self._physical_blocks_per_logical_kv_block
 
     def _nixl_handshake(
@@ -1280,8 +496,8 @@ def _nixl_handshake(
         # Regardless, only handshake with the remote TP rank(s) that current
         # local rank will read from. Note that With homogeneous TP,
         # this happens to be the same single rank_i.
-        assert self.kv_topo is not None
-        p_remote_ranks = self.kv_topo.get_target_remote_ranks(remote_tp_size)
+        assert self.transfer_topo is not None
+        p_remote_ranks = self.transfer_topo.handshake_target_ranks(remote_tp_size)
         remote_rank_to_agent_name = {}
         path = make_zmq_path("tcp", host, port)
 
@@ -1479,23 +695,38 @@ def _log_failure(
             stacklevel=2,
         )
 
-    def _background_nixl_handshake(
-        self, req_id: str, remote_engine_id: EngineId, meta: ReqMeta
-    ):
-        # Do NIXL handshake in background and add to _ready_requests when done.
-        fut = self._handshake_futures.get(remote_engine_id)
-        if fut is None:
-            assert meta.remote is not None
+    def _ensure_handshake(
+        self,
+        engine_id: EngineId,
+        host: str,
+        port: int,
+        tp_size: int,
+    ) -> Future[dict[int, str]] | None:
+        """
+        Ensure a handshake is in-flight (or already done) for *engine_id*.
+
+        Returns the ``Future`` if a handshake is pending (or was just
+        started), or ``None`` if the handshake already completed
+        successfully.  Callers can attach per-request callbacks to the
+        returned future.
+        Failures to handshake are logged and the request is marked as failed.
+        """
+        with self._handshake_lock:
+            if engine_id in self._remote_agents:
+                return None
+            fut = self._handshake_futures.get(engine_id)
+            if fut is not None:
+                return fut
             fut = self._handshake_initiation_executor.submit(
                 self._nixl_handshake,
-                meta.remote.host,
-                meta.remote.port,
-                meta.tp_size,
-                remote_engine_id,
+                host,
+                port,
+                tp_size,
+                engine_id,
             )
-            self._handshake_futures[remote_engine_id] = fut
+            self._handshake_futures[engine_id] = fut
 
-            def done_callback(f: Future[dict[int, str]], eid=remote_engine_id):
+            def done_callback(f: Future[dict[int, str]], eid=engine_id):
                 with self._handshake_lock:
                     del self._handshake_futures[eid]
                     try:
@@ -1509,26 +740,37 @@ def done_callback(f: Future[dict[int, str]], eid=remote_engine_id):
                         )
 
             fut.add_done_callback(done_callback)
+            return fut
 
-        # check handshake success before proceeding with request
+    def _background_nixl_handshake(
+        self, req_id: str, remote_engine_id: EngineId, meta: ReqMeta
+    ):
+        # Do NIXL handshake in background and add to _ready_requests when done.
+        assert meta.remote is not None
+        fut = self._ensure_handshake(
+            remote_engine_id,
+            meta.remote.host,
+            meta.remote.port,
+            meta.tp_size,
+        )
+        if fut is None:
+            # Already handshaked — only happens if caller does not pre-check.
+            self._ready_requests.put((req_id, meta))
+            return
+
+        # Check handshake success before proceeding with request.
         def request_ready(f: Future[Any], entry=(req_id, meta)):
             try:
-                # check if handshake succeeded
                 f.result()
                 self._ready_requests.put(entry)
             except Exception as e:
-                # handshake failed - mark blocks as invalid
                 self._log_failure(
                     failure_type="handshake_failed",
                     req_id=req_id,
                     error=e,
                     meta=meta,
                 )
-                if (
-                    req_meta := self._recving_metadata.get(req_id)
-                ) and not self._is_hma_required:
-                    self._invalid_block_ids.update(req_meta.local_block_ids[0])
-                self._failed_recv_reqs.add(req_id)
+                self._handle_failed_transfer(req_id, None)
 
         fut.add_done_callback(request_ready)
 
@@ -1545,11 +787,11 @@ def register_cross_layers_kv_caches(self, kv_cache: torch.Tensor) -> None:
 
     def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         """Register the KV Cache data in nixl."""
-        self.kv_topo = TpKVTopology(
+        self.transfer_topo = TransferTopology(
             tp_rank=self.tp_rank,
+            tp_size=self.world_size,
+            block_size=self.block_size,
             engine_id=self.engine_id,
-            remote_tp_size=self._tp_size,  # shared state
-            remote_block_size=self._block_size,  # shared state
             is_mla=self.use_mla,
             total_num_kv_heads=self.model_config.get_total_num_kv_heads(),
             attn_backends=self.attn_backends,
@@ -1560,7 +802,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             is_mamba=self._has_mamba,
         )
         self.compat_hash = compute_nixl_compatibility_hash(
-            self.vllm_config, self.backend_name, self.kv_topo.cross_layers_blocks
+            self.vllm_config, self.backend_name, self.transfer_topo.cross_layers_blocks
         )
 
         if self.use_host_buffer:
@@ -1612,7 +854,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             if isinstance(layer_spec, UniformTypeKVCacheSpecs):
                 # MLA DSv32 Indexer case: UniformTypeKVCacheSpecs merges kv_cache_specs
                 layer_spec = layer_spec.kv_cache_specs[layer_name]
-            cache_list = self.kv_topo.get_transfer_cache_regions(
+            cache_list = self.transfer_topo.get_transfer_cache_regions(
                 cache_or_caches, layer_spec
             )
             # `layer_spec.page_size_bytes` only accounts for logical page_size, that is
@@ -1625,7 +867,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             )
             # For when registering multiple tensors eg K/V in separate regions.
             physical_page_size = physical_page_size // len(cache_list)
-            if self.kv_topo._cross_layers_blocks:
+            if self.transfer_topo._cross_layers_blocks:
                 # When cross-layers blocks are used, multiply by number of layers
                 physical_page_size = physical_page_size * len(
                     self.kv_cache_config.kv_cache_tensors
@@ -1664,9 +906,21 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
                 else:
                     self.block_len_per_layer.append(physical_page_size)
 
-                assert cache.shape[0] == num_blocks, (
-                    "All kv cache tensors must have the same number of blocks"
-                )
+                if cache.shape[0] != num_blocks:
+                    raise AssertionError(
+                        "All kv cache tensors must have the same number of "
+                        f"blocks; layer={layer_name}, "
+                        f"expected_num_blocks={num_blocks}, "
+                        f"cache_shape={tuple(cache.shape)}, "
+                        f"cache_stride={tuple(cache.stride())}, "
+                        f"layer_spec={type(layer_spec).__name__}, "
+                        f"backend={self.backend_name}, "
+                        "all_backends="
+                        f"{[backend.get_name() for backend in self.attn_backends]}, "
+                        f"kv_cache_layout={self.kv_cache_layout}, "
+                        "blocks_first="
+                        f"{self.transfer_topo.is_kv_layout_blocks_first}"
+                    )
 
                 if not self.use_mla:
                     # Different kv cache shape is not supported by HeteroTP.
@@ -1689,7 +943,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         self.kv_caches_base_addr[self.engine_id][self.tp_rank] = seen_base_addresses
         self.num_regions = len(caches_data)
 
-        if self.kv_topo.is_kv_layout_blocks_first:
+        if self.transfer_topo.is_kv_layout_blocks_first:
             # NOTE (NickLucche) When FlashInfer is used, memory is registered
             # with joint KV for each block. This minimizes the overhead in
             # registerMem allowing faster descs queries. In order to be able to
@@ -1700,8 +954,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             # then duplicate it logically to be able to index SSM/Conv separately.
             self.num_regions *= 2
 
-        # TODO (NickLucche) Adapt to different descs views (engine_id->tp_rank) to
-        # support heterogeneous TP.
+        # Total local FA descriptors (boundary between FA and mamba descs).
         self.num_descs = self.num_regions * self.num_blocks
 
         descs = self.nixl_wrapper.get_reg_descs(caches_data, self.nixl_memory_type)
@@ -1745,6 +998,10 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             else self.host_buffer_kv_cache_layout,
             block_size=self.block_size,
             ssm_sizes=self._mamba_ssm_size,
+            attn_backend_name=self.backend_name,
+            physical_blocks_per_logical_kv_block=(
+                self._physical_blocks_per_logical_kv_block
+            ),
         )
         # Wrap metadata in payload with hash for defensive decoding
         assert self.compat_hash is not None
@@ -1754,6 +1011,176 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
             agent_metadata_bytes=encoder.encode(agent_metadata),
         )
 
+    def _build_mamba_local(
+        self,
+        base_addresses: list[int],
+        block_size_ratio: int,
+    ) -> list[tuple[int, int, int]]:
+        """Build 4 desc regions (x, B, C, ssm) per layer for local mamba
+        blocks, enabling the 3-read transfer with DS conv layout."""
+        assert block_size_ratio == 1, (
+            "Mamba 3-read transfer with block_size_ratio != 1 is not tested. "
+            f"Got block_size_ratio={block_size_ratio}."
+        )
+        assert self._conv_decomp is not None
+        conv_offsets = self._conv_decomp.local_conv_offsets
+        conv_size, ssm_size = self._mamba_ssm_size
+        num_blocks = self._logical_num_blocks * block_size_ratio
+        physical_per_logical = self._physical_blocks_per_logical_kv_block
+
+        result: list[tuple[int, int, int]] = []
+        for i, base_addr in enumerate(base_addresses):
+            # Jump one page_size, but ssm page_size may be bigger when kernel
+            # locks block size to a specific value (physical_per_logical scale).
+            page_stride = (
+                self.block_len_per_layer[i] // block_size_ratio * physical_per_logical
+            )
+            for off, sz in conv_offsets:
+                for blk in range(num_blocks):
+                    result.append(
+                        (base_addr + blk * page_stride + off, sz, self.device_id)
+                    )
+            # SSM temporal state follows the conv state.
+            for blk in range(num_blocks):
+                result.append(
+                    (
+                        base_addr + blk * page_stride + conv_size,
+                        ssm_size,
+                        self.device_id,
+                    )
+                )
+        return result
+
+    def _build_mamba_remote(
+        self,
+        nixl_agent_meta: NixlAgentMetadata,
+        tp_ratio: int,
+        transfer_info: EngineTransferInfo,
+    ) -> list[tuple[int, int, int]]:
+        """Build 4 remote desc regions (proj0, proj1, proj2, ssm) per layer
+        for the 3-read transfer.  For hetero-TP, each D rank reads only its
+        sub-projection slice from the P rank."""
+        assert self._conv_decomp is not None
+        effective_ratio = max(tp_ratio, 1)
+        # Mamba conv state is always TP-sharded, even when attention KV
+        # is replicated (num_kv_heads < tp_size).
+        local_offset = self.tp_rank % effective_ratio
+        conv_size_remote = nixl_agent_meta.ssm_sizes[0]
+
+        conv_offsets = self._conv_decomp.remote_conv_offsets(local_offset, tp_ratio)
+        if tp_ratio >= 1:
+            ssm_read_size = self._mamba_ssm_size[1]
+        else:
+            ssm_read_size = nixl_agent_meta.ssm_sizes[1]
+
+        remote_physical_per_logical = transfer_info.remote_physical_blocks_per_logical
+        num_blocks = nixl_agent_meta.num_blocks // remote_physical_per_logical
+        device_id = nixl_agent_meta.device_id
+
+        result: list[tuple[int, int, int]] = []
+        # NOTE (ZhanqiuHu): use per-layer block_lens[i], not [0], in case
+        # block lengths vary across layers (e.g. MLA).
+        for i, base_addr in enumerate(nixl_agent_meta.kv_caches_base_addr):
+            page_stride = nixl_agent_meta.block_lens[i] * remote_physical_per_logical
+            for off, sz in conv_offsets:
+                for blk in range(num_blocks):
+                    result.append((base_addr + blk * page_stride + off, sz, device_id))
+            # SSM temporal state is also TP-sharded on the heads dimension.
+            for blk in range(num_blocks):
+                ssm_addr = (
+                    base_addr
+                    + blk * page_stride
+                    + conv_size_remote
+                    + local_offset * ssm_read_size
+                )
+                result.append((ssm_addr, ssm_read_size, device_id))
+        return result
+
+    def _build_fa_local(
+        self,
+        base_addresses: list[int],
+        block_size_ratio: int,
+    ) -> list[tuple[int, int, int]]:
+        """Build local FA descriptors for all layers."""
+        assert self.transfer_topo is not None
+        num_blocks = self.num_blocks * block_size_ratio
+        result: list[tuple[int, int, int]] = []
+        for i, base_addr in enumerate(base_addresses):
+            kv_block_len = (
+                self.get_backend_aware_kv_block_len(
+                    layer_idx=i, first_split=True, mamba_view=False
+                )
+                // block_size_ratio
+            )
+            page_stride = self.block_len_per_layer[i] // block_size_ratio
+            for block_id in range(num_blocks):
+                block_offset = block_id * page_stride
+                addr = base_addr + block_offset
+                result.append((addr, kv_block_len, self.device_id))
+
+            if self.transfer_topo.is_kv_layout_blocks_first:
+                # Separate and interleave K/V regions to maintain the same
+                # descs ordering. This is needed for selecting contiguous heads
+                # when split across TP ranks.
+                second_split = self.get_backend_aware_kv_block_len(
+                    layer_idx=i, first_split=False, mamba_view=False
+                )
+                for block_id in range(num_blocks):
+                    block_offset = block_id * page_stride
+                    addr = base_addr + block_offset
+                    v_addr = addr + kv_block_len
+                    result.append((v_addr, second_split, self.device_id))
+        return result
+
+    def _build_fa_remote(
+        self,
+        plan: TPMapping,
+        nixl_agent_meta: NixlAgentMetadata,
+        block_size_ratio: int,
+    ) -> list[tuple[int, int, int]]:
+        """Build remote FA descriptors for all layers."""
+        assert self.transfer_topo is not None
+        fa_group_idx = next(
+            i for i, t in enumerate(self._group_spec_types) if _is_attention_spec(t)
+        )
+        num_attn_reads = len(plan.source_ranks_per_group[fa_group_idx])
+        num_blocks = nixl_agent_meta.num_blocks
+        result: list[tuple[int, int, int]] = []
+        for i, base_addr in enumerate(nixl_agent_meta.kv_caches_base_addr):
+            # Read our whole local region size from remote..
+            local_block_len = self.get_backend_aware_kv_block_len(
+                layer_idx=i, first_split=True, mamba_view=False
+            )
+            remote_kv_block_len = local_block_len // block_size_ratio
+            if block_size_ratio > 1:
+                # ..using remote kv_block_len as transfer unit
+                local_block_len = remote_kv_block_len
+
+            local_block_len = local_block_len // num_attn_reads
+            rank_offset = plan.rank_offset_factor * remote_kv_block_len
+
+            page_size = nixl_agent_meta.block_lens[i]
+            for block_id in range(num_blocks):
+                block_offset = block_id * page_size
+                # For each block, grab the kv heads chunk belonging to current local
+                # tp rank of size local_block_len.
+                addr = base_addr + block_offset + rank_offset
+                result.append((addr, local_block_len, nixl_agent_meta.device_id))
+
+            if self.transfer_topo.is_kv_layout_blocks_first:
+                # With FlashInfer index V separately to allow head splitting.
+                second_split = self.get_backend_aware_kv_block_len(
+                    layer_idx=i, first_split=False, mamba_view=False
+                )
+                second_split = second_split // num_attn_reads
+                for block_id in range(num_blocks):
+                    block_offset = block_id * page_size
+                    addr = base_addr + block_offset + rank_offset
+                    # Hop over the first split of remote page, K, to read V.
+                    v_addr = addr + nixl_agent_meta.block_lens[i] // 2
+                    result.append((v_addr, second_split, nixl_agent_meta.device_id))
+        return result
+
     def register_local_xfer_handler(
         self,
         block_size: int,
@@ -1769,66 +1196,30 @@ def register_local_xfer_handler(
         register another local_xfer_handler using remote block len to ensure
         data copy correctness.
         """
-        assert self.kv_topo is not None
-        kv_topo = self.kv_topo
-
+        assert self.transfer_topo is not None
         block_size_ratio = self.block_size // block_size
-        blocks_data: list[tuple[int, int, int]] = []
         local_base_addresses = self.kv_caches_base_addr[self.engine_id][self.tp_rank]
 
-        def register_blocks(blocks_data: list[tuple[int, int, int]], mamba: bool):
-            for i, base_addr in enumerate(local_base_addresses):
-                # The new block_len is using prefill block_len;
-                # and num_blocks is multiple with N
-                kv_block_len = (
-                    self.get_backend_aware_kv_block_len(
-                        layer_idx=i, first_split=True, mamba_view=mamba
-                    )
-                    // block_size_ratio
-                )
-                # Jump one page_size, but ssm page_size may be bigger when kernel
-                # locks block size to a specific value.
-                block_len_per_layer = (
-                    self.block_len_per_layer[i]
-                    // block_size_ratio
-                    * (1 if not mamba else self._physical_blocks_per_logical_kv_block)
-                )
-                num_blocks = self._logical_num_blocks if mamba else self.num_blocks
-                num_blocks = num_blocks * block_size_ratio
-                for block_id in range(num_blocks):
-                    block_offset = block_id * block_len_per_layer
-                    addr = base_addr + block_offset
-                    # (addr, len, device id)
-                    blocks_data.append((addr, kv_block_len, self.device_id))
-
-                if kv_topo.is_kv_layout_blocks_first:
-                    second_split = self.get_backend_aware_kv_block_len(
-                        layer_idx=i, first_split=False, mamba_view=mamba
-                    )
-                    # Separate and interleave K/V regions to maintain the same
-                    # descs ordering. This is needed for selecting contiguous heads
-                    # when split across TP ranks.
-                    for block_id in range(num_blocks):
-                        block_offset = block_id * block_len_per_layer
-                        addr = base_addr + block_offset
-                        # Register addresses for V cache (K registered first).
-                        v_addr = addr + kv_block_len
-                        blocks_data.append((v_addr, second_split, self.device_id))
-            logger.debug(
-                "Created %s blocks for src engine %s and rank %s on device id %s",
-                len(blocks_data),
-                self.engine_id,
-                self.tp_rank,
-                self.device_id,
-            )
-
-        register_blocks(blocks_data, mamba=False)
+        blocks_data = self._build_fa_local(local_base_addresses, block_size_ratio)
+        logger.debug(
+            "Created %s blocks for src engine %s and rank %s on device id %s",
+            len(blocks_data),
+            self.engine_id,
+            self.tp_rank,
+            self.device_id,
+        )
         if self._has_mamba:
             assert self.num_descs == len(blocks_data)
-            logger.debug(
-                "Registering additional %s local Mamba blocks", len(blocks_data)
+            # TODO (ZhanqiuHu): For homogeneous TP (tp_ratio == 1), the 3-descs split
+            # is unnecessary — a single conv desc per block suffices.  Consider
+            # adding a fast path that falls back to the standard 2-region
+            # registration (_build_fa_local mamba=True) when no hetero-TP
+            # remote has been seen.  Currently we always register 4 regions
+            # because local descs are created before knowing the remote TP.
+            logger.debug("Registering local Mamba descriptors (4 regions/layer)")
+            blocks_data.extend(
+                self._build_mamba_local(local_base_addresses, block_size_ratio)
             )
-            register_blocks(blocks_data, mamba=True)
 
         descs = self.nixl_wrapper.get_xfer_descs(blocks_data, self.nixl_memory_type)
         # NIXL_INIT_AGENT to be used for preparations of local descs.
@@ -1879,6 +1270,9 @@ def add_remote_agent(
 
         Regarding MLA case, the cache is replicated across TP workers so the rank_offset will just always be 0
         so that the whole cache is shared by "tp_ratio" D TP workers.
+
+        For Mamba hetero-TP, both tp_ratio > 0 (D_TP > P_TP) and
+        tp_ratio < 0 (P_TP > D_TP) are supported by the 3-read transfer.
         """  # noqa: E501
         engine_id = nixl_agent_meta.engine_id
         # TODO re-evaluate refreshing for scaling/recovery
@@ -1891,11 +1285,26 @@ def add_remote_agent(
             )
             return self._remote_agents[engine_id][remote_tp_rank]
 
-        ### Register remote agent metadata
-        if engine_id not in self._tp_size:
-            self._tp_size[engine_id] = remote_tp_size
-        if engine_id not in self._block_size:
-            self._block_size[engine_id] = nixl_agent_meta.block_size
+        ### Register remote engine in TransferTopology (idempotent).
+        assert self.transfer_topo is not None
+        transfer_topo = self.transfer_topo
+        physical_blocks_per_logical = (
+            nixl_agent_meta.physical_blocks_per_logical_kv_block
+        )
+        transfer_info = EngineTransferInfo(
+            remote_tp_size=remote_tp_size,
+            remote_block_size=nixl_agent_meta.block_size,
+            remote_block_len=nixl_agent_meta.block_lens[0],
+            remote_physical_blocks_per_logical=physical_blocks_per_logical,
+        )
+        transfer_topo.register_remote_engine(engine_id, transfer_info)
+        logger.info("Transfer plan: %s", transfer_topo.describe(engine_id))
+
+        self.tp_mappings[engine_id] = compute_tp_mapping(
+            transfer_topology=transfer_topo,
+            remote_tp_size=remote_tp_size,
+            group_spec_types=self._group_spec_types,
+        )
 
         remote_agent_name = self.nixl_wrapper.add_remote_agent(
             nixl_agent_meta.agent_metadata
@@ -1908,9 +1317,7 @@ def add_remote_agent(
         # remote:               | 0| 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|
         # local origin:|          0|          1|          8|         12|
         # local mapped:| 0| 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|
-        assert self.kv_topo is not None
-        kv_topo = self.kv_topo
-        block_size_ratio = kv_topo.block_size_ratio_from_engine_id(engine_id)
+        block_size_ratio = transfer_topo.block_size_ratio(nixl_agent_meta.block_size)
 
         if engine_id not in self.dst_num_blocks:
             self.dst_num_blocks[engine_id] = nixl_agent_meta.num_blocks
@@ -1923,12 +1330,7 @@ def add_remote_agent(
 
         # This is 1 when P and D `--tensor-parallel-size` match. Otherwise,
         # this is the ratio between the two sizes.
-        tp_ratio = self.kv_topo.tp_ratio_from_engine_id(engine_id)
-
-        # Handle tp_size>num_kv_heads: replicate KV cache.
-        indexes_into_remote = (
-            not self.kv_topo.replicates_kv_cache(engine_id) and tp_ratio > 0
-        )
+        tp_ratio = transfer_topo.tp_ratio(remote_tp_size)
 
         logger.debug(
             "Registering remote agent (%s, rank %s) memory regions with tp_ratio %s",
@@ -1937,6 +1339,8 @@ def add_remote_agent(
             tp_ratio,
         )
 
+        plan = self.tp_mappings[engine_id]
+
         ### (Optional) Register local agent memory regions. MLA is not split.
         if (
             tp_ratio < 0
@@ -1947,109 +1351,50 @@ def add_remote_agent(
             # Logically "split" own regions into |tp_ratio| chunks. Mind that
             # we only do this once per remote tp_size (replica-friendly).
             self.src_xfer_handles_by_tp_ratio[tp_ratio] = []
-            for i in range(-tp_ratio):
-                blocks_data = []
-                for memory_region in self.src_blocks_data:
-                    addr, local_block_len, own_tp_rank = memory_region
-                    # Computing block len layer by layer allows for different
-                    # block sizes to be used.
-                    remote_block_len = local_block_len // (-tp_ratio)
-                    addr = addr + i * remote_block_len
-                    blocks_data.append((addr, remote_block_len, own_tp_rank))
+
+            for handle_data in self._build_local_splits_from_plan(
+                plan,
+                self.src_blocks_data,
+                self.num_descs,
+            ):
                 descs = self.nixl_wrapper.get_xfer_descs(
-                    blocks_data, self.nixl_memory_type
+                    handle_data, self.nixl_memory_type
                 )
                 handle = self.nixl_wrapper.prep_xfer_dlist("NIXL_INIT_AGENT", descs)
                 self.src_xfer_handles_by_tp_ratio[tp_ratio].append(handle)
 
         ### Register remote agent memory regions
-        blocks_data = []
-        # With homogeneous TP, D pulls the whole kv cache from corresponding
-        # rank. With heterogeneous TP, prepare the descriptors by splitting the
-        # P KV cache along kv_head dim, of D worker's kv_head size (D>P).
+        # With homogeneous TP, D pulls the whole kv cache from corresponding rank. With
+        # heterogeneous TP, prepare the descriptors by splitting the P KV cache along
+        # kv_head dim, of D worker's kv_head size (D>P).
         # Eg. PTP1 DTP2 => P0 KV:[block0-KV_0 | block0-KV_1..].
 
         # Register all remote blocks, but only the corresponding kv heads.
-        def register_remote_blocks(
-            blocks_data: list[tuple[int, int, int]], mamba: bool
-        ):
-            for i, base_addr in enumerate(nixl_agent_meta.kv_caches_base_addr):
-                # Read our whole local region size from remote.
-                local_block_len = self.get_backend_aware_kv_block_len(
-                    layer_idx=i, first_split=True, mamba_view=mamba
-                )
-                remote_kv_block_len = local_block_len // block_size_ratio
-                if block_size_ratio > 1:
-                    # using remote kv_block_len as transfer unit
-                    local_block_len = remote_kv_block_len
-
-                if tp_ratio < 0 and not self.use_mla:
-                    # Remote tp is bigger: read a chunk of local region from remote
-                    local_block_len = local_block_len // (-tp_ratio)
-                rank_offset = (
-                    self.tp_rank % tp_ratio * remote_kv_block_len
-                    if indexes_into_remote
-                    else 0
-                )
-
-                # Assume same num_blocks for mamba and fa
-                num_blocks = (
-                    nixl_agent_meta.num_blocks
-                    if not mamba
-                    else nixl_agent_meta.num_blocks
-                    // self._physical_blocks_per_logical_kv_block
-                )
-                page_size = nixl_agent_meta.block_lens[i] * (
-                    1 if not mamba else self._physical_blocks_per_logical_kv_block
-                )
-                for block_id in range(num_blocks):
-                    block_offset = block_id * page_size
-                    # For each block, grab the heads chunk belonging to rank_i
-                    # of size remote_nheads // tp_ratio, which correspond to
-                    # self.block_len == remote_block_len//tp_ratio bytes.
-                    addr = base_addr + block_offset + rank_offset
-                    # (addr, len, device id)
-                    blocks_data.append(
-                        (addr, local_block_len, nixl_agent_meta.device_id)
-                    )
-
-                if kv_topo.is_kv_layout_blocks_first:
-                    # With FlashInfer index V separately to allow head splitting.
-                    second_split = self.get_backend_aware_kv_block_len(
-                        layer_idx=i, first_split=False, mamba_view=mamba
-                    )
-                    # Apply the same scaling as local_block_len above for when we read
-                    # a chunk of local V from `tp_ratio` separate remote workers.
-                    if tp_ratio < 0 and not self.use_mla:
-                        second_split = second_split // (-tp_ratio)
-                    for block_id in range(num_blocks):
-                        block_offset = block_id * page_size
-                        addr = base_addr + block_offset + rank_offset
-                        # Hop over the first split of remote page: either K or Conv.
-                        if mamba:
-                            v_addr = addr + nixl_agent_meta.ssm_sizes[0]
-                        else:
-                            v_addr = addr + nixl_agent_meta.block_lens[i] // 2
-                        blocks_data.append(
-                            (v_addr, second_split, nixl_agent_meta.device_id)
-                        )
-
+        blocks_data = self._build_fa_remote(
+            plan,
+            nixl_agent_meta,
+            block_size_ratio,
+        )
+        logger.debug(
+            "Created %s blocks for dst engine %s with remote rank %s and local rank %s",
+            len(blocks_data),
+            engine_id,
+            remote_tp_rank,
+            self.tp_rank,
+        )
+        if self._has_mamba:
             logger.debug(
-                "Created %s blocks for dst engine %s"
-                " with remote rank %s and local rank %s",
-                len(blocks_data),
+                "Registering remote Mamba blocks for engine %s rank %s",
                 engine_id,
                 remote_tp_rank,
-                self.tp_rank,
             )
-
-        register_remote_blocks(blocks_data, mamba=False)
-        if self._has_mamba:
-            # Create extra descs for the Mamba "view" of the same KV cache tensors.
-            logger.debug(
-                "Registering additional %s remote Mamba blocks", len(blocks_data)
+            blocks_data.extend(
+                self._build_mamba_remote(
+                    nixl_agent_meta,
+                    tp_ratio,
+                    transfer_info,
+                )
             )
-            register_remote_blocks(blocks_data, mamba=True)
 
         # Register with NIXL.
         descs = self.nixl_wrapper.get_xfer_descs(blocks_data, self.nixl_memory_type)
@@ -2075,24 +1420,43 @@ def _validate_remote_agent_handshake(
         """
         remote_engine_id = nixl_agent_meta.engine_id
 
-        assert self._tp_size[remote_engine_id] == remote_tp_size
-        assert self.kv_topo is not None
+        assert self.transfer_topo is not None
+        remote_info = self.transfer_topo.get_engine_info(remote_engine_id)
+        assert remote_info.remote_tp_size == remote_tp_size
 
-        tp_ratio = self.kv_topo.tp_ratio_from_engine_id(remote_engine_id)
-        block_size_ratio = self.kv_topo.block_size_ratio_from_engine_id(
-            remote_engine_id
+        tp_ratio = self.transfer_topo.tp_ratio(remote_tp_size)
+        block_size_ratio = self.transfer_topo.block_size_ratio(
+            nixl_agent_meta.block_size
         )
-        # Num kv_heads > tp_size and P TP > D TP case, not supported
-        assert not (tp_ratio < 0 and self.kv_topo.is_kv_replicated(remote_engine_id))
+        # num_kv_heads > tp_size with P_TP > D_TP not supported for non-mamba.
+        # Mamba models can have replicated FA KV with tp_ratio < 0.
+        # MLA models do not need to handle kv replication.
+        if not self.use_mla and not self._has_mamba:
+            assert not (
+                tp_ratio < 0 and self.transfer_topo.is_kv_replicated(remote_engine_id)
+            )
+
+        remote_physical_per_logical = (
+            nixl_agent_meta.physical_blocks_per_logical_kv_block
+        )
+        if (
+            self._has_mamba
+            and remote_physical_per_logical
+            != self._physical_blocks_per_logical_kv_block
+            and self.vllm_config.cache_config.enable_prefix_caching
+        ):
+            raise RuntimeError(
+                "Prefix caching with heterogeneous physical_blocks_per_logical "
+                "is not supported for Mamba hybrid models. "
+                f"Local: {self._physical_blocks_per_logical_kv_block}, "
+                f"Remote: {remote_physical_per_logical}. "
+                "Disable prefix caching with --no-enable-prefix-caching."
+            )
 
         if self._is_hma_required:
             assert block_size_ratio == 1, (
                 "HMA does not support different remote block size yet"
             )
-        # Mamba additional constraints
-        if self._has_mamba:
-            assert tp_ratio == 1, "Mamba does not support heterogeneous TP yet"
-
         kv_cache_layout = (
             self.kv_cache_layout
             if not self.use_host_buffer
@@ -2117,16 +1481,49 @@ def _validate_remote_agent_handshake(
                     "Or enable experimental feature to use HND to NHD support by "
                     "setting 'enable_permute_local_kv'=True in --kv-transfer-config."
                 )
+        # if remote_agent used attn is not same as local,
+        # hint heterogenuous attn post process
+        if (
+            nixl_agent_meta.attn_backend_name != self.backend_name
+            and self.backend_name in ["CPU_ATTN"]
+        ):
+            if self._is_hma_required:
+                raise RuntimeError(
+                    "heterogeneous attn post process is not supported with HMA"
+                )
+            logger.info(
+                "[Experimental] CPU_ATTN backend is used, "
+                "hint heterogeneous attn post process"
+            )
+            self.enable_heterogeneous_attn_post_process = True
+
+        # Heterogeneous TP requires head-splitting, which only works with
+        # HND layout. MLA and replicated-KV cases don't split on heads.
+        # Mamba doesn't support heterogeneous TP.
+        if (
+            abs(tp_ratio) != 1
+            and not self.use_mla
+            and not self.transfer_topo.is_kv_replicated(remote_engine_id)
+            and kv_cache_layout != "HND"
+            and not self.enable_permute_local_kv
+        ):
+            raise RuntimeError(
+                "Heterogeneous TP head-dimension splitting requires contiguous heads. "
+                "Use HND layout on the prefill side."
+            )
 
         # Block len can only vary across layers when using MLA.
         remote_block_len = nixl_agent_meta.block_lens[0]
-        if self.use_mla or self.kv_topo.is_kv_replicated(remote_engine_id):
+        if self.use_mla or self.transfer_topo.is_kv_replicated(remote_engine_id):
             # With replicated KV cache, only the number of blocks can differ.
-            for i in range(len(self.block_len_per_layer)):
-                assert (
-                    self.block_len_per_layer[i] // block_size_ratio
-                    == nixl_agent_meta.block_lens[i]
-                ), "KV cache sizes must match between P and D when replicated"
+            # TODO (ZhanqiuHu): For mamba models, validate FA and mamba
+            # block_lens separately.
+            if not self._has_mamba:
+                for i in range(len(self.block_len_per_layer)):
+                    assert (
+                        self.block_len_per_layer[i] // block_size_ratio
+                        == nixl_agent_meta.block_lens[i]
+                    ), "KV cache sizes must match between P and D when replicated"
         else:
             # When MLA is not used, this is a list of the same block length
             for block_len in nixl_agent_meta.block_lens:
@@ -2134,25 +1531,31 @@ def _validate_remote_agent_handshake(
                     "All remote layers must have the same block size"
                 )
 
-            if tp_ratio > 0:
-                # Remote tp is smaller: remote block_len size is bigger
-                assert (
-                    remote_block_len
-                    == (self.block_len_per_layer[0] * tp_ratio) // block_size_ratio
-                ), (
-                    "Remote P worker KV layer cache must be of shape [2, N, "
-                    "local_kv_heads*tp_ratio, page_size, head_dim] and same dtype."
-                )  # noqa: E501
-            else:
-                assert block_size_ratio == 1, (
-                    "Different local/remote block sizes are not supported when"
-                    " P TP > D TP."
-                )
-                # Remote tp is bigger: remote block_len size is smaller
-                assert remote_block_len == self.block_len_per_layer[0] // (-tp_ratio), (
-                    "Remote P worker KV layer cache must be of shape [2, N, "
-                    "local_kv_heads/tp_ratio, page_size, head_dim] and same dtype."
-                )  # noqa: E501
+            # HMA hybrid models (mamba+attention) pad block_len to
+            # max(attn_page, mamba_page), so the linear tp_ratio scaling
+            # assumption only holds for pure-attention models.
+            if not self._has_mamba:
+                if tp_ratio > 0:
+                    assert (
+                        remote_block_len
+                        == (self.block_len_per_layer[0] * tp_ratio) // block_size_ratio
+                    ), (
+                        "Remote P worker KV layer cache must be of shape [2, N,"
+                        " local_kv_heads*tp_ratio, page_size, head_dim] and "
+                        "same dtype."
+                    )
+                else:
+                    assert block_size_ratio == 1, (
+                        "Different local/remote block sizes are not supported"
+                        " when P TP > D TP."
+                    )
+                    assert remote_block_len == self.block_len_per_layer[0] // (
+                        -tp_ratio
+                    ), (
+                        "Remote P worker KV layer cache must be of shape [2, N,"
+                        " local_kv_heads/tp_ratio, page_size, head_dim] and "
+                        "same dtype."
+                    )
 
         # TP workers that handhshake with same remote have same #blocks.
         assert self.dst_num_blocks[remote_engine_id] == nixl_agent_meta.num_blocks
@@ -2227,7 +1630,7 @@ def post_process_device_kv_on_receive(
         if len(self.device_kv_caches) == 0:
             return
         assert block_size_ratio >= 1, "Only nP < nD supported currently."
-        assert self.kv_topo is not None
+        assert self.transfer_topo is not None
         if self.enable_permute_local_kv and block_size_ratio > 1:
             logger.debug(
                 "Post-processing device kv cache on receive by converting "
@@ -2247,7 +1650,7 @@ def post_process_device_kv_on_receive(
                 block_size_ratio,
             )
 
-        split_k_and_v = self.kv_topo.split_k_and_v
+        split_k_and_v = self.transfer_topo.split_k_and_v
 
         for block_ids in block_ids_list:
             indices = torch.tensor(block_ids, device=self.device_type, dtype=torch.long)
@@ -2266,41 +1669,83 @@ def post_process_device_kv_on_receive(
                             cache, indices, block_size_ratio
                         )
 
+    def post_process_device_kv_on_receive_heterogeneous_attn(
+        self, block_ids: list[int]
+    ):
+        """
+        Post process device kv cache after receiving from remote
+        for heterogeneous attention.
+        """
+        assert self.enable_heterogeneous_attn_post_process
+
+        indices = torch.tensor(block_ids, device=self.device_type, dtype=torch.long)
+
+        for _, cache_or_caches in self.device_kv_caches.items():
+            blocks_to_update = cache_or_caches.index_select(1, indices)
+            current_platform.pack_kv_cache(
+                key=blocks_to_update[0],
+                value=blocks_to_update[1],
+                key_cache=cache_or_caches[0],
+                value_cache=cache_or_caches[1],
+                block_ids=block_ids,
+                indices=indices,
+            )
+
     def get_finished(self) -> tuple[set[str], set[str]]:
         """
         Get requests that are done sending or recving on this specific worker.
         The scheduler process (via the MultiprocExecutor) will use this output
         to track which workers are done.
         """
-        assert self.kv_topo is not None
+        assert self.transfer_topo is not None
         done_sending = self._get_new_notifs()
         done_recving = self._pop_done_transfers(self._recving_transfers)
 
-        # add requests that skipped transfer to done_recving
-        done_recving.update(self._failed_recv_reqs)
-        self._failed_recv_reqs.clear()
+        # Drain queue of requests where handshake or transfer setup failed.
+        failed_recv_reqs = set[ReqId]()
+        while not self._failed_recv_reqs.empty():
+            try:
+                failed_recv_reqs.add(self._failed_recv_reqs.get_nowait())
+            except queue.Empty:
+                break
+
+        # Add failed requests to done_recving for scheduler tracking
+        # (blocks are already marked invalid, scheduler will handle recompute)
+        done_recving.update(failed_recv_reqs)
 
         if len(done_sending) > 0 or len(done_recving) > 0:
             logger.debug(
                 "Rank %s, get_finished: %s requests done sending "
-                "and %s requests done recving",
+                "and %s requests done recving (%s failed)",
                 self.tp_rank,
                 len(done_sending),
                 len(done_recving),
+                len(failed_recv_reqs),
             )
 
         block_ids_for_blocksize_post_process = defaultdict(list)
+        block_ids_for_heterogeneous_attn_post_process = list[list[int]]()
         for req_id in done_recving:
             # clean up metadata for completed requests
             meta = self._recving_metadata.pop(req_id, None)
             assert meta is not None, f"{req_id} not found in recving_metadata list"
+
+            # Skip KV sync and post-processing for failed requests
+            if req_id in failed_recv_reqs:
+                logger.warning(
+                    "Skipping KV post-processing for failed request %s",
+                    req_id,
+                )
+                continue
+
             assert meta.remote is not None
             if self.use_host_buffer:
                 self.sync_recved_kv_to_device(req_id, meta)
 
             # post processing for heteroblocksize
-            block_size_ratio = self.kv_topo.block_size_ratio_from_engine_id(
-                meta.remote.engine_id
+            remote_info = self.transfer_topo.get_engine_info(meta.remote.engine_id)
+            block_size_ratio = self.transfer_topo.block_size_ratio(
+                remote_info.remote_block_size
             )
             if not self.use_mla and (
                 block_size_ratio > 1 or self.enable_permute_local_kv
@@ -2309,12 +1754,20 @@ def get_finished(self) -> tuple[set[str], set[str]]:
                 block_ids_for_blocksize_post_process[block_size_ratio].append(
                     meta.local_physical_block_ids[0]
                 )
+            # post processing for heterogeneous attention
+            if self.enable_heterogeneous_attn_post_process:
+                block_ids_for_heterogeneous_attn_post_process.append(
+                    meta.local_physical_block_ids[0]
+                )
         for (
             block_size_ratio,
             block_ids_list,
         ) in block_ids_for_blocksize_post_process.items():
             self.post_process_device_kv_on_receive(block_size_ratio, block_ids_list)
 
+        for block_ids in block_ids_for_heterogeneous_attn_post_process:
+            self.post_process_device_kv_on_receive_heterogeneous_attn(block_ids)
+
         # Handle timeout to avoid stranding blocks on remote.
         now = time.perf_counter()
         while self._reqs_to_send:
@@ -2326,10 +1779,9 @@ def get_finished(self) -> tuple[set[str], set[str]]:
             self.xfer_stats.record_kv_expired_req()
             logger.warning(
                 "Releasing expired KV blocks for request %s which were "
-                "retrieved by %d decode worker(s) within %d seconds.",
+                "retrieved by %d remote worker(s) before lease expired.",
                 req_id,
                 count,
-                envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT,
             )
             self._reqs_to_process.remove(req_id)
             del self._reqs_to_send[req_id]
@@ -2342,12 +1794,22 @@ def _get_new_notifs(self) -> set[str]:
         Get req_ids which got a remote xfer message. When multiple consumers
         are reading from the same producer (heterogeneous TP scenario), wait
         for all consumers to be done pulling.
+
+        Also handles heartbeat notifications ("HB:req1,req2,...") by
+        extending the lease on the referenced requests.
         """
-        assert self.kv_topo is not None
+        assert self.transfer_topo is not None
         notified_req_ids: set[str] = set()
         for notifs in self.nixl_wrapper.get_new_notifs().values():
             for notif in notifs:
-                req_id, tp_size = notif.decode("utf-8").rsplit(":", 1)
+                msg = notif.decode("utf-8")
+
+                # Handle heartbeat messages from D-side.
+                if msg.startswith("HB:"):
+                    self._handle_heartbeat(msg[3:])
+                    continue
+
+                req_id, tp_size = msg.rsplit(":", 1)
                 if (
                     req_id not in self._reqs_to_send
                     and req_id not in self._reqs_to_process
@@ -2362,7 +1824,7 @@ def _get_new_notifs(self) -> set[str]:
 
                 # NOTE: `tp_ratio` is the opposite when swapping local<>remote
                 n_consumers = int(tp_size)
-                tp_ratio = self.kv_topo.tp_ratio(n_consumers)
+                tp_ratio = self.transfer_topo.tp_ratio(n_consumers)
 
                 # Number of reads *per producer* to wait for.
                 # When remote D TP > local P TP we expect `tp_ratio` reads.
@@ -2382,6 +1844,27 @@ def _get_new_notifs(self) -> set[str]:
                     self._reqs_to_send.pop(req_id, None)
         return notified_req_ids
 
+    def _handle_heartbeat(self, payload: str) -> None:
+        """Extend leases for requests referenced in a heartbeat.
+
+        Args:
+            payload: comma-separated P-side request IDs, e.g.
+                     "req_abc,req_def".
+        """
+        new_expiry = time.perf_counter() + self._lease_extension
+        for req_id in payload.split(","):
+            if req_id in self._reqs_to_send:
+                old = self._reqs_to_send[req_id]
+                self._reqs_to_send[req_id] = max(old, new_expiry)
+                logger.debug(
+                    "Heartbeat extended lease for request %s "
+                    "by %ds (old_expiry=%.1f, new_expiry=%.1f)",
+                    req_id,
+                    self._lease_extension,
+                    old,
+                    new_expiry,
+                )
+
     def _pop_done_transfers(self, transfers: dict[str, list[int]]) -> set[str]:
         """
         Pop completed xfers by checking for DONE state.
@@ -2429,7 +1912,7 @@ def _pop_done_transfers(self, transfers: dict[str, list[int]]) -> set[str]:
                 transfers[req_id] = in_progress
         return done_req_ids
 
-    def _handle_failed_transfer(self, req_id: str, handle: int):
+    def _handle_failed_transfer(self, req_id: str, handle: int | None):
         """
         Handle a failed transfer by marking all (logical) blocks as invalid and
         recording the failure.
@@ -2441,8 +1924,10 @@ def _handle_failed_transfer(self, req_id: str, handle: int):
         # Use .get() here as the metadata cleanup is handled by get_finished()
         # TODO (NickLucche) handle failed transfer for HMA.
         if (meta := self._recving_metadata.get(req_id)) and not self._is_hma_required:
-            self._invalid_block_ids.update(meta.local_block_ids[0])
-        self.nixl_wrapper.release_xfer_handle(handle)
+            self._invalid_block_ids.put(set(meta.local_block_ids[0]))
+        self._failed_recv_reqs.put(req_id)
+        if handle is not None:
+            self.nixl_wrapper.release_xfer_handle(handle)
         self.xfer_stats.record_failed_transfer()
 
     def start_load_kv(self, metadata: NixlConnectorMetadata):
@@ -2455,9 +1940,8 @@ def start_load_kv(self, metadata: NixlConnectorMetadata):
                 meta.local_block_ids
             )
             assert meta.remote is not None
-            meta.remote.block_ids = self._logical_to_kernel_block_ids(
-                meta.remote.block_ids
-            )
+            # Remote block IDs are kept logical here; expanded in
+            # _read_blocks_for_req using the remote engine's phys ratio.
             remote_engine_id = meta.remote.engine_id
             logger.debug(
                 "start_load_kv for request %s from remote engine %s. "
@@ -2503,25 +1987,83 @@ def start_load_kv(self, metadata: NixlConnectorMetadata):
             if req_id in self._reqs_to_process:
                 self._reqs_to_send[req_id] = expiration_time
 
+        # Send heartbeats to P-side engines to keep KV blocks alive while
+        # requests sit in the D scheduler WAITING queue.
+        self._send_heartbeats(metadata)
+
+    def _send_heartbeats(self, metadata: NixlConnectorMetadata) -> None:
+        """
+        Send heartbeat notifications to remote engines, extending lease on KV blocks.
+        """
+        for engine_id, hb_info in metadata.heartbeat_by_engine.items():
+            # Proactive handshake (this request may still be in waiting queue) so
+            # the **next** heartbeat for this remote can go through.
+            if (
+                self._ensure_handshake(
+                    engine_id, hb_info.host, hb_info.port, hb_info.tp_size
+                )
+                is not None
+            ):
+                continue  # handshake is still pending
+
+            # Build the heartbeat message: "HB:req1,req2,..."
+            hb_msg = ("HB:" + ",".join(hb_info.req_ids)).encode()
+            for agent_name in self._remote_agents[engine_id].values():
+                try:
+                    self.nixl_wrapper.send_notif(agent_name, notif_msg=hb_msg)
+                except Exception:
+                    logger.debug(
+                        "Failed to send heartbeat to engine %s",
+                        engine_id,
+                        exc_info=True,
+                    )
+
     def _read_blocks_for_req(self, req_id: str, meta: ReqMeta):
-        assert meta.remote is not None and self.kv_topo is not None
-        remote_ranks = self.kv_topo.get_target_remote_ranks_from_engine_id(
-            meta.remote.engine_id
+        assert meta.remote is not None and self.transfer_topo is not None
+        engine_id = meta.remote.engine_id
+        plan = self.tp_mappings[engine_id]
+        remote_info = self.transfer_topo.get_engine_info(engine_id)
+        tp_ratio = self.transfer_topo.tp_ratio(remote_info.remote_tp_size)
+
+        meta.remote.block_ids = self._logical_to_remote_kernel_block_ids(
+            meta.remote.block_ids,
+            remote_info.remote_physical_blocks_per_logical,
         )
-        tp_ratio = self.kv_topo.tp_ratio_from_engine_id(meta.remote.engine_id)
+        remote_block_ids = meta.remote.block_ids
+        local_block_ids = meta.local_physical_block_ids
+        num_groups = len(local_block_ids)
+        read_specs = [
+            ReadSpec(
+                remote_rank=rank,
+                local_block_ids=[
+                    list(local_block_ids[g])
+                    if rank in plan.source_ranks_per_group[g]
+                    else []
+                    for g in range(num_groups)
+                ],
+                remote_block_ids=[
+                    list(remote_block_ids[g])
+                    if rank in plan.source_ranks_per_group[g]
+                    else []
+                    for g in range(num_groups)
+                ],
+            )
+            for rank in plan.all_source_ranks
+        ]
+
         # D may have to perform multiple reads from different remote ranks.
-        for i, remote_rank in enumerate(remote_ranks):
-            if self.use_mla and tp_ratio < 0 and i > 0:
-                # MLA opt: when P TP > D TP, only a single read is executed for
-                # the first remote rank (cache is duplicated)..
-                break
+        # MLA opt: when P TP > D TP, only a single read is executed for
+        # the first remote rank (cache is duplicated)..
+        if self.use_mla and tp_ratio < 0:
+            assert len(read_specs) == 1
 
-            remote_block_size = self.kv_topo.remote_block_size[meta.remote.engine_id]
+        for i, spec in enumerate(read_specs):
+            remote_block_size = remote_info.remote_block_size
             logger.debug(
                 "Remote agent %s available, calling _read_blocks"
                 " on remote rank %s with remote block size %s for req %s",
                 meta.remote.engine_id,
-                remote_rank,
+                spec.remote_rank,
                 remote_block_size,
                 req_id,
             )
@@ -2540,36 +2082,33 @@ def _read_blocks_for_req(self, req_id: str, meta: ReqMeta):
 
             # Destination handle: remote_engine_id -> remote_rank -> handle.
             remote_xfer_side_handle = self.dst_xfer_side_handles[meta.remote.engine_id][
-                remote_rank
+                spec.remote_rank
             ]
+
             self._read_blocks(
+                read_spec=spec,
                 request_id=req_id,
                 dst_engine_id=meta.remote.engine_id,
                 remote_request_id=meta.remote.request_id,
-                local_block_ids=meta.local_physical_block_ids,
-                remote_block_ids=meta.remote.block_ids,
-                remote_rank=remote_rank,
                 local_xfer_side_handle=local_xfer_side_handle,
                 remote_xfer_side_handle=remote_xfer_side_handle,
             )
 
-            if self.use_mla and tp_ratio < 0:
-                # ..but we still need to notify the other remote ranks that we
-                # have the blocks we need so they can update the request state.
-                notif_id = f"{req_id}:{self.world_size}".encode()
-                remote_agents = self._remote_agents[meta.remote.engine_id]
-                for rank_to_notify, agent in remote_agents.items():
-                    if rank_to_notify != remote_rank:
-                        self.nixl_wrapper.send_notif(agent, notif_msg=notif_id)
+        if self.use_mla and tp_ratio < 0 and read_specs:
+            # ..but we still need to notify the other remote ranks that we
+            # have the blocks we need so they can update the request state.
+            notif_id = f"{meta.remote.request_id}:{self.world_size}".encode()
+            remote_agents = self._remote_agents[meta.remote.engine_id]
+            for rank_to_notify, agent in remote_agents.items():
+                if rank_to_notify != read_specs[0].remote_rank:
+                    self.nixl_wrapper.send_notif(agent, notif_msg=notif_id)
 
     def _read_blocks(
         self,
-        local_block_ids: BlockIds,
-        remote_block_ids: BlockIds,
+        read_spec: ReadSpec,
         dst_engine_id: str,
         request_id: str,
         remote_request_id: str,
-        remote_rank: int,
         local_xfer_side_handle: int,
         remote_xfer_side_handle: int,
     ):
@@ -2577,8 +2116,15 @@ def _read_blocks(
         Post a READ point-to-point xfer request from a single local worker to
         a single remote worker.
         """
-        assert self.kv_topo is not None
-        block_size_ratio = self.kv_topo.block_size_ratio_from_engine_id(dst_engine_id)
+        assert self.transfer_topo is not None
+        remote_rank = read_spec.remote_rank
+        local_block_ids = read_spec.local_block_ids
+        remote_block_ids = read_spec.remote_block_ids
+
+        remote_info = self.transfer_topo.get_engine_info(dst_engine_id)
+        block_size_ratio = self.transfer_topo.block_size_ratio(
+            remote_info.remote_block_size
+        )
         if block_size_ratio > 1:
             # TODO (NickLucche) assume HMA is off. Change to handle multiple KV groups.
             assert not self._is_hma_required
@@ -2643,28 +2189,27 @@ def _read_blocks(
             == len(local_block_ids)
             == len(self.kv_cache_config.kv_cache_groups)
         )
-        remote_block_ids = list(remote_block_ids)
-        for i, remote_group in enumerate(remote_block_ids):
-            num_remote_blocks = len(remote_group)
-            num_local_blocks = len(local_block_ids[i])
-            assert num_local_blocks <= num_remote_blocks
-            # Partial prefix cache hit: just read uncomputed blocks.
-            if num_local_blocks < num_remote_blocks:
-                remote_block_ids[i] = remote_group[-num_local_blocks:]
+        remote_physical_per_logical = remote_info.remote_physical_blocks_per_logical
+        local_block_ids, remote_block_ids = self._apply_prefix_caching(
+            local_block_ids, remote_block_ids, remote_physical_per_logical
+        )
 
         # NOTE (nicolo) With homogeneous TP, each TP worker loads KV from
         # corresponding rank. With heterogeneous TP, fixing D>P, the D tp
         # workers will issue xfers to parts of the P worker remote kv caches.
 
         # Get descs ids.
-        remote_block_descs_ids = self._get_block_descs_ids(
-            dst_engine_id,
-            remote_block_ids,
+        remote_block_descs_ids = self._compute_desc_ids(
+            block_ids=remote_block_ids,
+            dst_num_blocks=self.dst_num_blocks[dst_engine_id],
+            block_size_ratio=None,
+            physical_blocks_per_logical=remote_info.remote_physical_blocks_per_logical,
         )
-        local_block_descs_ids = self._get_block_descs_ids(
-            self.engine_id,
-            local_block_ids,
+        local_block_descs_ids = self._compute_desc_ids(
+            block_ids=local_block_ids,
+            dst_num_blocks=self.dst_num_blocks[self.engine_id],
             block_size_ratio=block_size_ratio,
+            physical_blocks_per_logical=self._physical_blocks_per_logical_kv_block,
         )
 
         assert len(local_block_descs_ids) == len(remote_block_descs_ids)
@@ -2696,14 +2241,7 @@ def _read_blocks(
                 dst_engine_id=dst_engine_id,
                 remote_rank=remote_rank,
             )
-            if (
-                meta := self._recving_metadata.get(request_id)
-            ) and not self._is_hma_required:
-                self._invalid_block_ids.update(meta.local_block_ids[0])
-            self.xfer_stats.record_failed_transfer()
-            if handle is not None:
-                self.nixl_wrapper.release_xfer_handle(handle)
-            self._failed_recv_reqs.add(request_id)
+            self._handle_failed_transfer(request_id, handle)
 
     def get_mapped_blocks(
         self, block_ids: np.ndarray, block_size_ratio: int
@@ -2726,57 +2264,6 @@ def get_mapped_blocks(
 
         return mapped_2d.flatten().astype(np.int64)
 
-    def _get_block_descs_ids(
-        self,
-        engine_id: str,
-        block_ids: BlockIds,
-        block_size_ratio: float | None = None,
-    ) -> np.ndarray:
-        """
-        Get the descs ids for a set of block ids.
-        When HMA is enabled number of descriptors across kv cache groups might differ.
-        A single flattened array is returned for all groups anyway.
-        """
-        region_ids = np.arange(self.num_regions)
-
-        # NOTE (NickLucche) With HMA, every kv group has the same number of layers and
-        # layers from different groups share the same kv tensor.
-        # eg block_ids=[[1, 2], [3]]->blocks [1, 2] need to be read across all regions,
-        # same for [3], but group0-group1 blocks will always differ (different areas).
-        # Therefore we can just flatten the block_ids and compute the descs ids for all
-        # groups at once.
-        num_blocks = self.dst_num_blocks[engine_id]
-        if block_size_ratio is not None:
-            num_blocks = int(num_blocks * block_size_ratio)
-
-        # Compute desc ids per group using the right stride: FA descs have
-        # num_blocks entries per region (kernel granularity), SSM descs have
-        # logical_blocks entries per region (no kernel splitting).
-        region_ids = region_ids[:, None]
-        if not self._has_mamba:
-            block_ids = np.concatenate(block_ids)[None, :]
-            descs_ids = region_ids * num_blocks + block_ids
-            return descs_ids.flatten()
-        else:
-            # NOTE (NickLucche) SSM and Attention blocks regions can be exchanged
-            # arbitrarily by manager. Therefore, descs are duplicated for SSM and
-            # Attention like so:
-            # desc_handle->[descs_fa (all regions) | descs_ssm (all regions)].
-            # This is like having two "low-level views" of the same storage.
-            # `num_fa_descs` offset must be computed per-engine since P and D can
-            # have different num_blocks (and thus different FA descs counts).
-            ratio = self._physical_blocks_per_logical_kv_block
-            # SSM may register fewer num_blocks than FA
-            logical_blocks = num_blocks // ratio
-            num_fa_descs = self.num_regions * num_blocks
-            all_descs = []
-            for i, group in enumerate(block_ids):
-                stride = logical_blocks if self._is_mamba_group[i] else num_blocks
-                group_arr = np.asarray(group)[None, :]
-                offset = num_fa_descs if self._is_mamba_group[i] else 0
-                all_descs.append((region_ids * stride + group_arr + offset).flatten())
-            return np.concatenate(all_descs)
-
     def _logical_to_kernel_block_ids(self, block_ids: BlockIds) -> BlockIds:
         """
         Convert logical block ids to kernel physical block ids.
@@ -2802,6 +2289,99 @@ def _logical_to_kernel_block_ids(self, block_ids: BlockIds) -> BlockIds:
             for i, group in enumerate(block_ids)
         ]
 
+    def _apply_prefix_caching(
+        self,
+        local_block_ids: BlockIds,
+        remote_block_ids: BlockIds,
+        remote_physical_per_logical: int,
+    ) -> tuple[BlockIds, list]:
+        """Apply prefix caching by trimming local/remote block ID lists.
+
+        For non-Mamba models: end-trim remote to match local count, so that
+        already-cached prefix blocks are skipped in the transfer.
+
+        For Mamba hybrid (prefix caching not yet supported): front-trim both
+        to the minimum count to handle kernel block count discrepancies from
+        logical block rounding in heterogeneous TP.
+        """
+        # Partial prefix cache hit: just read uncomputed blocks.
+        # Skip mamba groups — their blocks represent full state (conv+ssm),
+        # not per-token data, so trimming would corrupt the transfer.
+        remote_block_ids = list(remote_block_ids)
+        if not self._has_mamba:
+            for i, remote_group in enumerate(remote_block_ids):
+                num_local_blocks = len(local_block_ids[i])
+                assert num_local_blocks <= len(remote_group)
+                if num_local_blocks < len(remote_group):
+                    remote_block_ids[i] = remote_group[-num_local_blocks:]
+        else:
+            # (NOTE: ZhanqiuHu) Mamba hybrid: no prefix caching support so far.HeteroTP
+            # can cause different kernel block counts due to logical block rounding.
+            # Example: 640 prompt tokens, kernel_block_size=64
+            #   remote physical_per_logical=10, local physical_per_logical=6
+            #   remote logical ids from kv_transfer_params = [0]
+            #   local logical ids allocated = [0, 1]
+            #   remote kernel blocks: [0..9]  (1*10=10)
+            #   local kernel blocks:  [0..11] (2*6=12)
+            #   actual data blocks = ceil(640/64) = 10, trim both to 10
+            # Vice versa (remote physical_per_logical=6, local=10):
+            #   remote logical ids = [0, 1], local logical ids = [0]
+            #   remote kernel blocks: [0..11] (2*6=12)
+            #   local kernel blocks:  [0..9]  (1*10=10)
+            #   actual data blocks = ceil(640/64) = 10, trim both to 10
+            local_block_ids = list(local_block_ids)
+            for i, remote_group in enumerate(remote_block_ids):
+                num_local_blocks = len(local_block_ids[i])
+                num_remote_blocks = len(remote_group)
+                if _is_ssm_spec(self._group_spec_types[i]):
+                    assert num_local_blocks == num_remote_blocks
+                else:
+                    max_padding = max(
+                        self._physical_blocks_per_logical_kv_block,
+                        remote_physical_per_logical,
+                    )
+                    assert abs(num_local_blocks - num_remote_blocks) < max_padding, (
+                        f"Group {i}: |{num_local_blocks} - "
+                        f"{num_remote_blocks}| >= {max_padding}"
+                    )
+                    num_blocks = min(num_local_blocks, num_remote_blocks)
+                    local_block_ids[i] = local_block_ids[i][:num_blocks]
+                    remote_block_ids[i] = remote_group[:num_blocks]
+        return local_block_ids, remote_block_ids
+
+    def _logical_to_remote_kernel_block_ids(
+        self, block_ids: BlockIds, remote_physical_per_logical: int
+    ) -> BlockIds:
+        """Map logical block IDs to physical kernel block IDs on the remote.
+
+        Args:
+            block_ids: per-group lists of logical block IDs.
+            remote_physical_per_logical: remote engine's physical blocks
+                per logical block.
+
+        Returns:
+            Same structure with FA groups expanded (each logical block L
+            becomes kernel blocks [L*remote_physical_per_logical, ..
+            L*remote_physical_per_logical +
+            remote_physical_per_logical - 1]).
+            Mamba groups are passed through unchanged.
+        """
+        if remote_physical_per_logical == 1:
+            return block_ids
+        remote_arange = np.arange(remote_physical_per_logical).reshape(1, -1)
+        group_specs = self.kv_cache_config.kv_cache_groups
+        result = [
+            BlockTable.map_to_kernel_blocks(
+                np.array(group),
+                remote_physical_per_logical,
+                remote_arange,
+            ).tolist()
+            if not isinstance(group_specs[i].kv_cache_spec, MambaSpec)
+            else group
+            for i, group in enumerate(block_ids)
+        ]
+        return result
+
     def get_backend_aware_kv_block_len(
         self, layer_idx: int, first_split: bool = True, mamba_view: bool = False
     ) -> int:
@@ -2816,9 +2396,9 @@ def get_backend_aware_kv_block_len(
         the their size differs.
         Reference diagram:
                             KVCacheTensor (Shared)
-                               /       \
-                              /         \
-                             /           \
+                               /       \\
+                              /         \\
+                             /           \\
         Attention (FlashInfer) View      Mamba View
                   |                          |
                   |                          |
@@ -2835,12 +2415,9 @@ def get_backend_aware_kv_block_len(
            +-------------------+         +--------------------+
            |1st_split-2nd_split|         |1st_split-2nd_split |
         """
-        assert self.kv_topo is not None
-        if self.kv_topo.is_kv_layout_blocks_first:
-            # For indexing only half (either just the K or V part).
+        assert self.transfer_topo is not None
+        if self.transfer_topo.is_kv_layout_blocks_first:
             if mamba_view:
-                # NOTE (NickLucche) Mamba Opt: this is already skipping the padding so
-                # we're only transferring the minimum required bytes.
                 block_len = self._mamba_ssm_size[not first_split]
             else:
                 block_len = self.block_len_per_layer[layer_idx] // 2
@@ -2864,8 +2441,13 @@ def get_block_ids_with_load_errors(self) -> set[int]:
         This is called by the scheduler to identify blocks that need
         to be retried after a NIXL transfer failure.
         """
-        result = self._invalid_block_ids
-        self._invalid_block_ids = set()
+        # Drain the queue (thread-safe, no lock needed).
+        result: set[int] = set()
+        while not self._invalid_block_ids.empty():
+            try:
+                result.update(self._invalid_block_ids.get_nowait())
+            except queue.Empty:
+                break
         return result
 
     def __del__(self):
@@ -2899,266 +2481,3 @@ def shutdown(self):
         for desc in self._registered_descs:
             self.nixl_wrapper.deregister_memory(desc)
         self._registered_descs.clear()
-
-
-@contextlib.contextmanager
-def zmq_ctx(socket_type: Any, addr: str) -> Iterator[zmq.Socket]:
-    """Context manager for a ZMQ socket"""
-
-    if socket_type not in (zmq.ROUTER, zmq.REQ):
-        raise ValueError(f"Unexpected socket type: {socket_type}")
-
-    ctx: zmq.Context | None = None
-    try:
-        ctx = zmq.Context()  # type: ignore[attr-defined]
-        yield make_zmq_socket(
-            ctx=ctx, path=addr, socket_type=socket_type, bind=socket_type == zmq.ROUTER
-        )
-    finally:
-        if ctx is not None:
-            ctx.destroy(linger=0)
-
-
-@dataclass
-class NixlKVConnectorStats(KVConnectorStats):
-    """Container for transfer performance metrics"""
-
-    def __post_init__(self):
-        if not self.data:
-            # Empty container init, no data is passed in.
-            self.reset()
-
-    def reset(self):
-        # Must be serializable
-        self.data: dict[str, list[float | int]] = {
-            "transfer_duration": [],
-            "post_duration": [],
-            "bytes_transferred": [],
-            "num_descriptors": [],
-            "num_failed_transfers": [],
-            "num_failed_notifications": [],
-            "num_kv_expired_reqs": [],
-        }
-
-    def record_transfer(self, res: nixlXferTelemetry):
-        # Keep metrics units consistent with rest of the code: time us->s
-        self.data["transfer_duration"].append(res.xferDuration / 1e6)
-        self.data["post_duration"].append(res.postDuration / 1e6)
-        self.data["bytes_transferred"].append(res.totalBytes)
-        self.data["num_descriptors"].append(res.descCount)
-
-    def record_failed_transfer(self):
-        """Record a failed NIXL transfer operation."""
-        self.data["num_failed_transfers"].append(1)
-
-    def record_failed_notification(self):
-        """Record a failed NIXL notification (send_notif)."""
-        self.data["num_failed_notifications"].append(1)
-
-    def record_kv_expired_req(self):
-        """Record a request that had its KV blocks expire."""
-        self.data["num_kv_expired_reqs"].append(1)
-
-    def clone_and_reset(self) -> "NixlKVConnectorStats":
-        old = copy.copy(self)
-        self.reset()
-        return old
-
-    def is_empty(self) -> bool:
-        # Do not discard metrics update that are entirely failures related.
-        return (
-            self.num_successful_transfers == 0
-            and len(self.data["num_failed_transfers"]) == 0
-            and len(self.data["num_failed_notifications"]) == 0
-            and len(self.data["num_kv_expired_reqs"]) == 0
-        )
-
-    def aggregate(self, other: KVConnectorStats) -> KVConnectorStats:
-        if not other.is_empty():
-            for k, v in other.data.items():
-                accumulator = self.data[k]
-                assert isinstance(accumulator, list)
-                accumulator.extend(v)
-        return self
-
-    def reduce(self) -> dict[str, int | float]:
-        # Compute compact representative stats suitable for CLI logging
-        if self.num_successful_transfers == 0:
-            # CLI logging only reports successful transfers stats. If all requests in
-            # the interval were unsuccessful, Prom will report failures stats instead.
-            return {
-                "Num successful transfers": 0,
-                "Avg xfer time (ms)": 0,
-                "P90 xfer time (ms)": 0,
-                "Avg post time (ms)": 0,
-                "P90 post time (ms)": 0,
-                "Avg MB per transfer": 0,
-                "Throughput (MB/s)": 0,
-                "Avg number of descriptors": 0,
-            }
-
-        xfer_time = np.asarray(self.data["transfer_duration"])
-        post_time = np.asarray(self.data["post_duration"])
-        # Convert to MB for CLI logging.
-        mb = np.asarray(self.data["bytes_transferred"]) / 2**20
-        descs = np.asarray(self.data["num_descriptors"], dtype=np.uint32)
-        n = len(descs)
-        assert n == self.num_successful_transfers
-
-        total_mb = mb.sum()
-        avg_mb = total_mb / n
-
-        total_time_seconds = xfer_time.sum()
-        throughput_mb_s = total_mb / total_time_seconds
-
-        return {
-            "Num successful transfers": n,
-            "Avg xfer time (ms)": round(xfer_time.mean() * 1e3, 3),
-            "P90 xfer time (ms)": round(np.percentile(xfer_time, 90).item() * 1e3, 3),
-            "Avg post time (ms)": round(post_time.mean() * 1e3, 3),
-            "P90 post time (ms)": round(np.percentile(post_time, 90).item() * 1e3, 3),
-            "Avg MB per transfer": round(avg_mb, 3),
-            "Throughput (MB/s)": round(throughput_mb_s, 3),
-            "Avg number of descriptors": round(descs.mean(), 1),
-        }
-
-    @property
-    def num_successful_transfers(self) -> int:
-        return len(self.data["transfer_duration"])
-
-
-class NixlPromMetrics(KVConnectorPromMetrics):
-    def __init__(
-        self,
-        vllm_config: VllmConfig,
-        metric_types: dict[type[PromMetric], type[PromMetricT]],
-        labelnames: list[str],
-        per_engine_labelvalues: dict[int, list[object]],
-    ):
-        super().__init__(vllm_config, metric_types, labelnames, per_engine_labelvalues)
-
-        buckets = [
-            0.001,
-            0.005,
-            0.01,
-            0.025,
-            0.05,
-            0.075,
-            0.1,
-            0.2,
-            0.3,
-            0.5,
-            0.75,
-            1.0,
-            5.0,
-        ]
-        nixl_histogram_xfer_time = self._histogram_cls(
-            name="vllm:nixl_xfer_time_seconds",
-            documentation="Histogram of transfer duration for NIXL KV Cache transfers.",
-            buckets=buckets[1:],
-            labelnames=labelnames,
-        )
-        self.nixl_histogram_xfer_time = create_metric_per_engine(
-            nixl_histogram_xfer_time, self.per_engine_labelvalues
-        )
-        nixl_histogram_post_time = self._histogram_cls(
-            name="vllm:nixl_post_time_seconds",
-            documentation="Histogram of transfer post time for NIXL KV"
-            " Cache transfers.",
-            buckets=buckets,
-            labelnames=labelnames,
-        )
-        self.nixl_histogram_post_time = create_metric_per_engine(
-            nixl_histogram_post_time, self.per_engine_labelvalues
-        )
-        # uniform 2kb to 16gb range
-        buckets = [2 ** (10 + i) for i in range(1, 25, 2)]
-        nixl_histogram_bytes_transferred = self._histogram_cls(
-            name="vllm:nixl_bytes_transferred",
-            documentation="Histogram of bytes transferred per NIXL KV Cache transfers.",
-            buckets=buckets,
-            labelnames=labelnames,
-        )
-        self.nixl_histogram_bytes_transferred = create_metric_per_engine(
-            nixl_histogram_bytes_transferred, self.per_engine_labelvalues
-        )
-        buckets = [
-            10,
-            20,
-            30,
-            50,
-            75,
-            100,
-            200,
-            400,
-            1000,
-            2000,
-            4000,
-            10000,
-            20000,
-            50000,
-        ]
-        nixl_histogram_num_descriptors = self._histogram_cls(
-            name="vllm:nixl_num_descriptors",
-            documentation="Histogram of number of descriptors per NIXL"
-            "  KV Cache transfers.",
-            buckets=buckets,
-            labelnames=labelnames,
-        )
-        self.nixl_histogram_num_descriptors = create_metric_per_engine(
-            nixl_histogram_num_descriptors, self.per_engine_labelvalues
-        )
-        counter_nixl_num_failed_transfers = self._counter_cls(
-            name="vllm:nixl_num_failed_transfers",
-            documentation="Number of failed NIXL KV Cache transfers.",
-            labelnames=labelnames,
-        )
-        self.counter_nixl_num_failed_transfers = create_metric_per_engine(
-            counter_nixl_num_failed_transfers, self.per_engine_labelvalues
-        )
-        counter_nixl_num_failed_notifications = self._counter_cls(
-            name="vllm:nixl_num_failed_notifications",
-            documentation="Number of failed NIXL KV Cache notifications.",
-            labelnames=labelnames,
-        )
-        self.counter_nixl_num_failed_notifications = create_metric_per_engine(
-            counter_nixl_num_failed_notifications, self.per_engine_labelvalues
-        )
-
-        counter_nixl_num_kv_expired_reqs = self._counter_cls(
-            name="vllm:nixl_num_kv_expired_reqs",
-            documentation="Number of requests that had their KV expire. "
-            "NOTE: This metric is tracked on the P instance.",
-            labelnames=labelnames,
-        )
-        self.counter_nixl_num_kv_expired_reqs = create_metric_per_engine(
-            counter_nixl_num_kv_expired_reqs, self.per_engine_labelvalues
-        )
-
-    def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0):
-        for prom_obj, list_item_key in zip(
-            [
-                self.nixl_histogram_xfer_time,
-                self.nixl_histogram_post_time,
-                self.nixl_histogram_bytes_transferred,
-                self.nixl_histogram_num_descriptors,
-            ],
-            [
-                "transfer_duration",
-                "post_duration",
-                "bytes_transferred",
-                "num_descriptors",
-            ],
-        ):
-            for list_item in transfer_stats_data[list_item_key]:
-                prom_obj[engine_idx].observe(list_item)
-        for counter_obj, counter_item_key in zip(
-            [
-                self.counter_nixl_num_failed_transfers,
-                self.counter_nixl_num_failed_notifications,
-                self.counter_nixl_num_kv_expired_reqs,
-            ],
-            ["num_failed_transfers", "num_failed_notifications", "num_kv_expired_reqs"],
-        ):
-            for list_item in transfer_stats_data[counter_item_key]:
-                counter_obj[engine_idx].inc(list_item)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/common.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/common.py
index 06a727a27b55..c5a251a2a515 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/common.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/common.py
@@ -1,15 +1,60 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 
-from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorMetadata,
+    KVConnectorWorkerMetadata,
+)
 from vllm.v1.kv_offload.worker.worker import TransferSpec
 
 ReqId = str
 
 
+@dataclass
+class TransferJob:
+    """A transfer job bundling request context with transfer spec.
+
+    Used for both loads and stores, keyed by scheduler-assigned job ID.
+    The worker reports the job ID back when the transfer finishes,
+    and the scheduler processes the completion.
+    """
+
+    req_id: ReqId
+    transfer_spec: TransferSpec
+
+
 @dataclass
 class OffloadingConnectorMetadata(KVConnectorMetadata):
-    reqs_to_load: dict[ReqId, TransferSpec]
-    reqs_to_store: dict[ReqId, TransferSpec]
-    reqs_to_flush: set[str] | None = None
+    # Keyed by scheduler-assigned job IDs.
+    load_jobs: dict[int, TransferJob]
+    store_jobs: dict[int, TransferJob]
+    jobs_to_flush: set[int] | None = None
+
+
+@dataclass
+class OffloadingWorkerMetadata(KVConnectorWorkerMetadata):
+    """Worker -> Scheduler metadata for completed transfer jobs.
+
+    Each worker reports {job_id: 1} for newly completed transfer jobs
+    (load or store). aggregate() sums counts across workers within a step.
+    The scheduler accumulates across steps and processes
+    a transfer completion only when count reaches num_workers.
+    """
+
+    completed_jobs: dict[int, int] = field(default_factory=dict)
+
+    def mark_completed(self, job_id: int) -> None:
+        """Record a transfer job completion from this worker."""
+        self.completed_jobs[job_id] = 1
+
+    def aggregate(
+        self, other: "KVConnectorWorkerMetadata"
+    ) -> "KVConnectorWorkerMetadata":
+        assert isinstance(other, OffloadingWorkerMetadata)
+
+        merged = dict(self.completed_jobs)
+        for job_id, v in other.completed_jobs.items():
+            merged[job_id] = merged.get(job_id, 0) + v
+
+        return OffloadingWorkerMetadata(completed_jobs=merged)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/scheduler.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/scheduler.py
index c28fe5e96593..678f4f0c89e6 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/scheduler.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/scheduler.py
@@ -1,72 +1,500 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections import defaultdict
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence
+from dataclasses import dataclass, field
 from itertools import islice
-from typing import Any
+from typing import Any, NamedTuple
 
 from vllm.distributed.kv_events import BlockRemoved, BlockStored, KVCacheEvent
 from vllm.distributed.kv_transfer.kv_connector.utils import yield_req_data
 from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.common import (
     OffloadingConnectorMetadata,
+    OffloadingWorkerMetadata,
     ReqId,
+    TransferJob,
 )
 from vllm.logger import init_logger
+from vllm.utils.math_utils import cdiv
 from vllm.v1.core.kv_cache_manager import KVCacheBlocks
-from vllm.v1.core.kv_cache_utils import BlockHash
 from vllm.v1.core.sched.output import SchedulerOutput
-from vllm.v1.kv_offload.abstract import OffloadingManager
-from vllm.v1.kv_offload.mediums import GPULoadStoreSpec
-from vllm.v1.kv_offload.spec import OffloadingSpec
-from vllm.v1.kv_offload.worker.worker import TransferSpec
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    KVCacheSpec,
+    MambaSpec,
+    SlidingWindowSpec,
+)
+from vllm.v1.kv_offload.base import (
+    GPULoadStoreSpec,
+    OffloadingManager,
+    OffloadingSpec,
+    OffloadKey,
+    ReqContext,
+    get_offload_block_hash,
+    make_offload_key,
+)
 from vllm.v1.outputs import KVConnectorOutput
 from vllm.v1.request import Request
 
 logger = init_logger(__name__)
 
 
+@dataclass(slots=True)
+class TransferJobStatus:
+    """Tracks scheduler-side state for a single transfer job."""
+
+    req_id: ReqId
+    # Number of workers still pending. Starts at num_workers,
+    # decremented as each worker reports completion. Job is done at 0.
+    pending_count: int
+    # Offload keys this job covers; passed to manager.complete_*().
+    keys: set[OffloadKey]
+    is_store: bool
+    # Store src block IDs whose ref_cnt protects them while the request
+    # runs. Only registered in _block_id_to_pending_jobs on request_finished.
+    non_sliding_window_block_ids: list[int] | None = None
+    # Store src block IDs that may be freed before the request finishes.
+    # Registered in _block_id_to_pending_jobs at store creation time.
+    sliding_window_block_ids: list[int] | None = None
+
+
+class GroupOffloadConfig(NamedTuple):
+    group_idx: int
+    gpu_block_size: int
+    offloaded_block_size: int
+    hash_block_size_factor: int
+    # None below means full attention
+    sliding_window_size_in_blocks: int | None
+    # Number of this group's offloaded blocks per full-attention alignment
+    # segment. Used to skip storing SWA blocks that can never serve a load
+    # hit (e.g. DeepSeek V4 where SWA groups have much smaller block sizes
+    # than the MLA full-attention group).
+    # None for full-attention groups or when the optimization doesn't apply.
+    alignment_block_count: int | None = None
+
+
+def get_sliding_window_size_in_blocks(
+    kv_cache_spec: KVCacheSpec, offloaded_block_size: int
+) -> int | None:
+    if isinstance(kv_cache_spec, SlidingWindowSpec):
+        assert kv_cache_spec.sliding_window > 0
+        return cdiv(kv_cache_spec.sliding_window, offloaded_block_size)
+
+    if isinstance(kv_cache_spec, MambaSpec):
+        # Mamba depends on a single state
+        return 1
+
+    assert isinstance(kv_cache_spec, FullAttentionSpec)
+    return None
+
+
+class SchedulerOffloadConfig(NamedTuple):
+    kv_group_configs: tuple[GroupOffloadConfig, ...]
+    block_size_factor: int
+    num_workers: int
+
+    @classmethod
+    def from_spec(cls, spec: OffloadingSpec) -> "SchedulerOffloadConfig":
+        # Determine the alignment token count from the full-attention group(s).
+        # This is the offloaded_block_size of the full-attention group; load
+        # hits are always aligned to this boundary, so SWA blocks earlier in
+        # each segment can never serve a load hit. Relevant for hybrid
+        # architectures like DeepSeek V4 (MLA + SWA groups).
+        full_attn_offloaded_block_sizes: set[int] = set()
+        for idx, gpu_block_size in enumerate(spec.gpu_block_size):
+            kv_spec = spec.kv_cache_config.kv_cache_groups[idx].kv_cache_spec
+            sw = get_sliding_window_size_in_blocks(
+                kv_spec, gpu_block_size * spec.block_size_factor
+            )
+            if sw is None:
+                full_attn_offloaded_block_sizes.add(
+                    gpu_block_size * spec.block_size_factor
+                )
+
+        # Only apply the optimization if there's a single consistent
+        # full-attention alignment size.
+        alignment_tokens: int | None = None
+        if len(full_attn_offloaded_block_sizes) == 1:
+            alignment_tokens = full_attn_offloaded_block_sizes.pop()
+
+        def _alignment_block_count(
+            offloaded_block_size: int,
+            sliding_window_size_in_blocks: int | None,
+        ) -> int | None:
+            if alignment_tokens is None or sliding_window_size_in_blocks is None:
+                return None
+            if alignment_tokens <= offloaded_block_size:
+                return None
+            per_segment = alignment_tokens // offloaded_block_size
+            if sliding_window_size_in_blocks >= per_segment:
+                return None
+            return per_segment
+
+        return cls(
+            num_workers=spec.vllm_config.parallel_config.world_size,
+            kv_group_configs=tuple(
+                GroupOffloadConfig(
+                    group_idx=idx,
+                    gpu_block_size=gpu_block_size,
+                    offloaded_block_size=gpu_block_size * spec.block_size_factor,
+                    hash_block_size_factor=(
+                        (gpu_block_size * spec.block_size_factor)
+                        // spec.hash_block_size
+                    ),
+                    sliding_window_size_in_blocks=(
+                        sw := get_sliding_window_size_in_blocks(
+                            spec.kv_cache_config.kv_cache_groups[idx].kv_cache_spec,
+                            gpu_block_size * spec.block_size_factor,
+                        )
+                    ),
+                    alignment_block_count=_alignment_block_count(
+                        gpu_block_size * spec.block_size_factor, sw
+                    ),
+                )
+                for idx, gpu_block_size in enumerate(spec.gpu_block_size)
+            ),
+            block_size_factor=spec.block_size_factor,
+        )
+
+
+@dataclass
+class RequestGroupState:
+    offload_keys: list[OffloadKey] = field(default_factory=list)
+    block_ids: list[int] = field(default_factory=list)
+    # index of next block (of size offloaded_block_size) to offload
+    next_stored_block_idx: int = 0
+    # number of offloaded blocks hit (including GPU prefix cache)
+    # when the request first started
+    num_hit_blocks: int = 0
+
+
+@dataclass(slots=True)
+class RequestOffloadState:
+    config: SchedulerOffloadConfig
+    req: Request
+    group_states: tuple[RequestGroupState, ...] = field(init=False)
+    req_context: ReqContext = field(init=False)
+    # number of hits in the GPU cache
+    num_locally_computed_tokens: int = 0
+    # In-flight job IDs. Per the connector's invariant, at any given time
+    # this contains either a single load job, or one or more store jobs.
+    transfer_jobs: set[int] = field(default_factory=set)
+
+    def __post_init__(self) -> None:
+        self.group_states = tuple(
+            RequestGroupState() for _ in self.config.kv_group_configs
+        )
+        self.req_context = ReqContext(
+            req_id=self.req.request_id,
+            kv_transfer_params=self.req.kv_transfer_params,
+        )
+
+    def update_offload_keys(self) -> None:
+        for group_config, group_state in zip(
+            self.config.kv_group_configs, self.group_states
+        ):
+            for req_block_hash in islice(
+                self.req.block_hashes,
+                group_config.hash_block_size_factor * len(group_state.offload_keys)
+                + group_config.hash_block_size_factor
+                - 1,
+                None,
+                group_config.hash_block_size_factor,
+            ):
+                group_state.offload_keys.append(
+                    make_offload_key(req_block_hash, group_config.group_idx)
+                )
+
+    def update_block_id_groups(
+        self, new_block_id_groups: tuple[list[int], ...] | None
+    ) -> None:
+        if new_block_id_groups is None:
+            return
+
+        assert len(new_block_id_groups) == len(self.group_states)
+        for group_state, new_blocks in zip(self.group_states, new_block_id_groups):
+            group_state.block_ids.extend(new_blocks)
+
+    def advance_stored_idx(self, num_offloadable_tokens: int) -> None:
+        for group_config, group_state in zip(
+            self.config.kv_group_configs, self.group_states
+        ):
+            num_blocks = num_offloadable_tokens // group_config.offloaded_block_size
+            group_state.next_stored_block_idx = num_blocks
+
+    def update_num_hit_blocks(self, num_cached_tokens: int) -> None:
+        for group_config, group_state in zip(
+            self.config.kv_group_configs, self.group_states
+        ):
+            group_state.num_hit_blocks = (
+                num_cached_tokens // group_config.offloaded_block_size
+            )
+
+
 class OffloadingConnectorScheduler:
     """Implementation of Scheduler side methods"""
 
     def __init__(self, spec: OffloadingSpec):
-        assert len(spec.gpu_block_size) == 1
-        self.gpu_block_size = spec.gpu_block_size[0]
-        self.offloaded_block_size = self.gpu_block_size * spec.block_size_factor
-        self.block_size_factor = spec.block_size_factor
+        self.config = SchedulerOffloadConfig.from_spec(spec)
         self.manager: OffloadingManager = spec.get_manager()
 
-        self._requests: dict[ReqId, Request] = {}
-        # list of GPU block IDs per request
-        self._request_block_ids: dict[ReqId, list[int]] = {}
-        # requests to load for the current scheduler step
-        self._reqs_to_load: dict[ReqId, TransferSpec] = {}
-        # request blocks are stored in order
-        # index of next block (of size offloaded_block_size) to offload
-        self._next_stored_block_idx: dict[ReqId, int] = {}
+        full_attention_groups: list[int] = []
+        sliding_window_groups: list[int] = []
+        for group_config in self.config.kv_group_configs:
+            if group_config.sliding_window_size_in_blocks is None:
+                full_attention_groups.append(group_config.group_idx)
+            else:
+                sliding_window_groups.append(group_config.group_idx)
+
+        # sort sliding window groups by window size in decreasing order
+        def _sliding_window_sort_key(i: int) -> int:
+            val = self.config.kv_group_configs[i].sliding_window_size_in_blocks
+            assert val is not None
+            return val
+
+        sliding_window_groups.sort(key=_sliding_window_sort_key, reverse=True)
+
+        # used by _lookup
+        self._sliding_window_groups: tuple[int, ...] = tuple(sliding_window_groups)
+        self._lookup_groups = tuple(full_attention_groups) + self._sliding_window_groups
+
+        self._req_status: dict[ReqId, RequestOffloadState] = {}
+        self._current_batch_load_jobs: dict[int, TransferJob] = {}
+        self._current_batch_jobs_to_flush: set[int] = set()
         # if GPU prefix caching is enabled,
         # track loaded blocks to avoid redundant loads
-        self._blocks_being_loaded: set[BlockHash] | None = (
+        self._blocks_being_loaded: set[OffloadKey] | None = (
             set() if spec.vllm_config.cache_config.enable_prefix_caching else None
         )
 
-        # request ID -> set(block hashes being stored/load)
-        self._reqs_being_stored = defaultdict[ReqId, set[BlockHash]](set)
-        self._reqs_being_loaded = defaultdict[ReqId, set[BlockHash]](set)
-
-    def _get_block_hashes(
+        # Job ID counter shared by loads and stores.
+        self._job_counter: int = 0
+        # Threshold value for stale jobs. All job ids >= _stale_job_threshold are
+        # active jobs.
+        self._stale_job_threshold: int = 0
+        self._jobs: dict[int, TransferJobStatus] = {}
+
+        # block_id -> pending store job_ids. Used to track jobs that needs
+        # flushing in case a block is re-allocated by the KV cache manager.
+        # Populated only for finished requests (running-request blocks are
+        # protected by their ref_cnt) and for sliding window blocks (which can
+        # be freed before a request finishes).
+        self._block_id_to_pending_jobs: dict[int, set[int]] = {}
+
+    def _generate_job_id(self) -> int:
+        job_id = self._job_counter
+        self._job_counter += 1
+        return job_id
+
+    def _remove_pending_job(self, job_id: int, block_ids: list[int] | None) -> None:
+        for bid in block_ids or ():
+            pending = self._block_id_to_pending_jobs[bid]
+            pending.remove(job_id)
+            if not pending:
+                del self._block_id_to_pending_jobs[bid]
+
+    def _maximal_prefix_lookup(
+        self, keys: Iterable[OffloadKey], req_context: ReqContext
+    ) -> int | None:
+        """Return the number of consecutive offloaded blocks from the start,
+        or None if the backend deferred a lookup."""
+        hit_count = 0
+        defer_lookup = False
+        for key in keys:
+            result = self.manager.lookup(key, req_context)
+            if result is None:
+                defer_lookup = True
+                # continue lookup to allow manager to kick-off async lookups
+                # for all blocks (until a miss is detected)
+                result = True
+            if not result:
+                break
+            hit_count += 1
+        return hit_count if not defer_lookup else None
+
+    def _sliding_window_lookup(
         self,
-        req: Request,
-        start_idx: int = 0,
-        end_idx: int | None = None,
-    ) -> Iterable[BlockHash]:
-        return islice(
-            req.block_hashes,
-            self.block_size_factor * start_idx + self.block_size_factor - 1,
-            self.block_size_factor * end_idx if end_idx else None,
-            self.block_size_factor,
+        keys: Sequence[OffloadKey],
+        sliding_window_size: int,
+        req_context: ReqContext,
+    ) -> int | None:
+        """Return the end index (in `keys`) of the last run of
+        `sliding_window_size` consecutive hits, scanning from the end.
+        Returns 0 on miss, None if the backend deferred a lookup."""
+        defer_lookup = False
+        consecutive_hits = 0
+        for idx in range(len(keys) - 1, -1, -1):
+            result = self.manager.lookup(keys[idx], req_context)
+            if result is None:
+                defer_lookup = True
+                # continue lookup to allow manager to kick-off async lookups
+                # for all blocks (until a hit is detected)
+                result = False
+            if not result:
+                consecutive_hits = 0
+            else:
+                consecutive_hits += 1
+                if consecutive_hits == sliding_window_size:
+                    return idx + sliding_window_size if not defer_lookup else None
+        return consecutive_hits if not defer_lookup else None
+
+    def _touch(self, req_status: RequestOffloadState):
+        for group_config, group_state in zip(
+            self.config.kv_group_configs, req_status.group_states
+        ):
+            if group_config.sliding_window_size_in_blocks is None:
+                self.manager.touch(group_state.offload_keys, req_status.req_context)
+            else:
+                # we aim to keep just blocks that are necessary to hit
+                # the original request (+ decoded blocks)
+                blocks_to_skip = max(
+                    0,
+                    group_state.num_hit_blocks
+                    - group_config.sliding_window_size_in_blocks,
+                )
+                self.manager.touch(
+                    group_state.offload_keys[blocks_to_skip:],
+                    req_status.req_context,
+                )
+
+    def _lookup(self, req_status: RequestOffloadState) -> int | None:
+        """
+        Find how many tokens beyond num_locally_computed_tokens can be loaded.
+
+        Iterates full-attention groups first (prefix lookup), then sliding-window
+        groups (suffix lookup). Each group may tighten max_hit_size_tokens, which
+        can invalidate an earlier group's result, so the loop re-runs when that
+        happens until num_hit_tokens converges.
+        """
+        num_computed_tokens = req_status.num_locally_computed_tokens
+        max_hit_size_tokens: int = req_status.req.num_tokens
+        if self._sliding_window_groups:
+            # the last prompt token has to be recomputed to get the logprobs
+            # for sliding window attention, we must reduce by 1 to make sure
+            # we still have a hit after reduction
+            max_hit_size_tokens -= 1
+        num_hit_tokens: int = 0
+        defer_lookup = False
+        lookup_groups = self._lookup_groups
+        while lookup_groups:
+            looked_up_sliding_window: bool = False
+            groups_iter = iter(lookup_groups)
+            lookup_groups = ()
+            for group_idx in groups_iter:
+                group_config: GroupOffloadConfig = self.config.kv_group_configs[
+                    group_idx
+                ]
+                group_state: RequestGroupState = req_status.group_states[group_idx]
+                offloaded_block_size = group_config.offloaded_block_size
+                offload_keys = group_state.offload_keys
+
+                assert (
+                    len(offload_keys)
+                    >= req_status.req.num_tokens // offloaded_block_size
+                )
+
+                # Constrain to block-aligned boundary for this group
+                max_hit_size_tokens = min(
+                    max_hit_size_tokens, len(offload_keys) * offloaded_block_size
+                )
+                if max_hit_size_tokens - num_computed_tokens < offloaded_block_size:
+                    # we can only load less than a block, better skip
+                    return 0
+
+                num_blocks = min(
+                    cdiv(max_hit_size_tokens, offloaded_block_size), len(offload_keys)
+                )
+                start_block_idx = num_computed_tokens // offloaded_block_size
+                offload_keys = offload_keys[start_block_idx:num_blocks]
+                sliding_window_size_in_blocks = (
+                    group_config.sliding_window_size_in_blocks
+                )
+
+                # end index (in the sliced offload_keys) up to which we
+                # have backend-confirmed hits
+                num_hit_blocks: int | None
+                if sliding_window_size_in_blocks is None:
+                    num_hit_blocks = self._maximal_prefix_lookup(
+                        offload_keys, req_status.req_context
+                    )
+                else:
+                    num_hit_blocks = self._sliding_window_lookup(
+                        offload_keys,
+                        sliding_window_size_in_blocks,
+                        req_status.req_context,
+                    )
+                if num_hit_blocks == 0:
+                    return 0
+
+                if num_hit_blocks is None:
+                    defer_lookup = True
+                else:
+                    max_hit_size_tokens = min(
+                        max_hit_size_tokens,
+                        offloaded_block_size * (start_block_idx + num_hit_blocks),
+                    )
+
+                new_num_hit_tokens = max_hit_size_tokens - num_computed_tokens
+                if new_num_hit_tokens < offloaded_block_size:
+                    # we can only load less than a block, better skip
+                    return 0
+
+                if new_num_hit_tokens < num_hit_tokens:
+                    if defer_lookup:
+                        # make another iteration on all groups to check
+                        # if we still need to defer lookup
+                        defer_lookup = False
+                        lookup_groups = self._lookup_groups
+                    elif looked_up_sliding_window and not lookup_groups:
+                        # we need another iteration to confirm previously looked up
+                        # sliding window works with the new_num_hit_tokens
+                        lookup_groups = self._sliding_window_groups
+
+                looked_up_sliding_window |= sliding_window_size_in_blocks is not None
+                num_hit_tokens = new_num_hit_tokens
+
+        if defer_lookup:
+            logger.debug(
+                "Offloading manager delayed request %s as backend requested",
+                req_status.req.request_id,
+            )
+            return None
+
+        # possibly delay request if any of the hit blocks is already being loaded
+        if self._blocks_being_loaded:
+            for group_config, group_state in zip(
+                self.config.kv_group_configs, req_status.group_states
+            ):
+                offloaded_block_size = group_config.offloaded_block_size
+                sliding_window_size_in_blocks = (
+                    group_config.sliding_window_size_in_blocks
+                )
+                offload_keys = group_state.offload_keys
+                num_blocks = cdiv(
+                    num_computed_tokens + num_hit_tokens, offloaded_block_size
+                )
+                start_block_idx = num_computed_tokens // offloaded_block_size
+                offload_keys = offload_keys[start_block_idx:num_blocks]
+                if sliding_window_size_in_blocks is not None:
+                    offload_keys = offload_keys[-sliding_window_size_in_blocks:]
+                if any(key in self._blocks_being_loaded for key in offload_keys):
+                    # hit blocks are being loaded, delay request
+                    logger.debug(
+                        "Delaying request %s since some of its"
+                        " blocks are already being loaded",
+                        req_status.req.request_id,
+                    )
+                    return None
+
+        logger.debug(
+            "Request %s hit %s offloaded tokens after %s GPU hit tokens",
+            req_status.req.request_id,
+            num_hit_tokens,
+            num_computed_tokens,
         )
 
+        return num_hit_tokens
+
     def get_num_new_matched_tokens(
         self, request: Request, num_computed_tokens: int
     ) -> tuple[int | None, bool]:
@@ -89,201 +517,350 @@ def get_num_new_matched_tokens(
                 - `True` if tokens will be loaded asynchronously
                   (between scheduler steps).
         """
-        num_blocks = request.num_tokens // self.offloaded_block_size
-
-        assert len(request.block_hashes) // self.block_size_factor == num_blocks
-        block_hashes = self._get_block_hashes(request)
-
-        self.manager.touch(block_hashes)
-
-        full_block_tokens = self.offloaded_block_size * num_blocks
-        if full_block_tokens - num_computed_tokens < self.offloaded_block_size:
-            # we can load less than a block, skip
-            return 0, False
-
-        start_block_idx = num_computed_tokens // self.offloaded_block_size
-        hits = self.manager.lookup(
-            self._get_block_hashes(request, start_idx=start_block_idx)
-        )
-        if hits is None:
-            # indicates a lookup that should be tried later
-            return None, False
-        if hits == 0:
-            return 0, False
-
-        num_hit_tokens = (
-            self.offloaded_block_size * (start_block_idx + hits) - num_computed_tokens
-        )
-        logger.debug(
-            "Request %s hit %s offloaded tokens after %s GPU hit tokens",
-            request.request_id,
-            num_hit_tokens,
-            num_computed_tokens,
-        )
-        if num_hit_tokens < self.offloaded_block_size:
-            return 0, False
-
-        if self._blocks_being_loaded:
-            block_hashes = self._get_block_hashes(
-                request, start_idx=start_block_idx, end_idx=start_block_idx + hits
+        is_new_request = False
+        if req_status := self._req_status.get(request.request_id):
+            # make sure block IDs are cleared
+            for group_state in req_status.group_states:
+                group_state.block_ids.clear()
+        else:
+            is_new_request = True
+            req_status = RequestOffloadState(config=self.config, req=request)
+            self._req_status[request.request_id] = req_status
+
+        req_status.update_offload_keys()
+        req_status.num_locally_computed_tokens = num_computed_tokens
+
+        num_hit_tokens = self._lookup(req_status)
+        if is_new_request:
+            req_status.update_num_hit_blocks(
+                num_computed_tokens + (num_hit_tokens or 0)
             )
 
-            if any(
-                block_hash in self._blocks_being_loaded for block_hash in block_hashes
-            ):
-                # hit blocks are being loaded, delay request
-                logger.debug(
-                    "Delaying request %s since some of its blocks are already"
-                    " being loaded",
-                    request.request_id,
-                )
-                return None, False
+        self._touch(req_status)
 
-        return num_hit_tokens, True
+        return num_hit_tokens, bool(num_hit_tokens)
 
     def update_state_after_alloc(
         self, request: Request, blocks: KVCacheBlocks, num_external_tokens: int
     ):
-        self._requests[request.request_id] = request
-        # the block ids are updated in _get_reqs_to_store
-        self._request_block_ids[request.request_id] = []
-
         if num_external_tokens == 0:
             return
 
-        block_groups = blocks.get_block_ids()
-        block_ids = block_groups[0]
-
-        num_computed_gpu_blocks = sum(
-            block.block_hash is not None for block in blocks.blocks[0]
-        )
-        num_computed_tokens = num_computed_gpu_blocks * self.gpu_block_size
-        full_block_tokens = num_computed_tokens + num_external_tokens
-        assert full_block_tokens % self.offloaded_block_size == 0
+        req_status = self._req_status[request.request_id]
+
+        num_locally_computed_tokens = req_status.num_locally_computed_tokens
+        num_cached_tokens = num_locally_computed_tokens + num_external_tokens
+
+        params = req_status.req_context.kv_transfer_params
+        do_remote_decode = params is not None and params.get("do_remote_decode")
+
+        keys_to_load: list[OffloadKey] = []
+        dst_block_ids: list[int] = []
+        # per group
+        group_sizes: list[int] = []
+        block_indices: list[int] = []
+        for group_config, group_state, group_blocks in zip(
+            self.config.kv_group_configs,
+            req_status.group_states,
+            blocks.blocks,
+        ):
+            gpu_block_size = group_config.gpu_block_size
+            offloaded_block_size = group_config.offloaded_block_size
+            offload_keys = group_state.offload_keys
+            num_gpu_blocks = cdiv(num_cached_tokens, gpu_block_size)
+
+            assert len(group_blocks) >= num_gpu_blocks
+            num_locally_computed_gpu_blocks = num_gpu_blocks
+            # Skip null placeholder blocks (used for sliding window or mamba padding).
+            for i, block in enumerate(group_blocks[:num_gpu_blocks]):
+                if not block.is_null and block.block_hash is None:
+                    num_locally_computed_gpu_blocks = i
+                    break
+
+            assert (
+                num_locally_computed_tokens
+                <= num_locally_computed_gpu_blocks * gpu_block_size
+            )
+            num_pending_gpu_blocks = num_gpu_blocks - num_locally_computed_gpu_blocks
 
-        num_pending_gpu_blocks = len(block_ids) - num_computed_gpu_blocks
-        assert num_external_tokens == num_pending_gpu_blocks * self.gpu_block_size
+            if group_config.sliding_window_size_in_blocks is not None:
+                assert (
+                    num_pending_gpu_blocks
+                    <= group_config.sliding_window_size_in_blocks
+                    * self.config.block_size_factor
+                )
 
-        start_block_idx = num_computed_tokens // self.offloaded_block_size
-        num_blocks = full_block_tokens // self.offloaded_block_size
+            num_blocks = cdiv(num_cached_tokens, offloaded_block_size)
+            assert len(offload_keys) >= num_blocks
+            if num_pending_gpu_blocks:
+                start_block_idx = (
+                    num_locally_computed_gpu_blocks // self.config.block_size_factor
+                )
+                keys_to_load.extend(offload_keys[start_block_idx:num_blocks])
 
-        assert len(request.block_hashes) // self.block_size_factor >= num_blocks
-        block_hashes = self._get_block_hashes(
-            request, start_idx=start_block_idx, end_idx=num_blocks
-        )
+            dst_block_ids.extend(
+                block.block_id
+                for block in group_blocks[
+                    num_locally_computed_gpu_blocks:num_gpu_blocks
+                ]
+            )
+            group_sizes.append(num_pending_gpu_blocks)
+            block_indices.append(num_locally_computed_gpu_blocks)
+
+            if not do_remote_decode:
+                # For P/D prefill requests (do_remote_decode=True), we do
+                # NOT skip saving the hit prefix, as we need to stream the
+                # entire KV cache so a remote decode node can consume it.
+                group_state.next_stored_block_idx = num_blocks
+
+        # Fence dst blocks against finished-request pending stores.
+        if (
+            self._block_id_to_pending_jobs
+            and not self._block_id_to_pending_jobs.keys().isdisjoint(dst_block_ids)
+        ):
+            self._current_batch_jobs_to_flush.update(
+                jid
+                for bid in dst_block_ids
+                for jid in self._block_id_to_pending_jobs.get(bid, ())
+            )
 
-        src_spec = self.manager.prepare_load(block_hashes)
+        src_spec = self.manager.prepare_load(keys_to_load, req_status.req_context)
         dst_spec = GPULoadStoreSpec(
-            block_ids[num_computed_gpu_blocks:],
-            group_sizes=(num_pending_gpu_blocks,),
-            block_indices=(num_computed_gpu_blocks,),
+            dst_block_ids, group_sizes=group_sizes, block_indices=block_indices
         )
 
-        block_hashes = self._get_block_hashes(
-            request, start_idx=start_block_idx, end_idx=num_blocks
+        load_job_id = self._generate_job_id()
+        self._current_batch_load_jobs[load_job_id] = TransferJob(
+            req_id=request.request_id,
+            transfer_spec=(src_spec, dst_spec),
+        )
+        # a load can only be issued when no other jobs are pending.
+        assert not req_status.transfer_jobs
+        req_status.transfer_jobs.add(load_job_id)
+        self._jobs[load_job_id] = TransferJobStatus(
+            req_id=request.request_id,
+            pending_count=self.config.num_workers,
+            keys=set(keys_to_load),
+            is_store=False,
         )
-
-        self._reqs_to_load[request.request_id] = (src_spec, dst_spec)
-        req_blocks_being_loaded = self._reqs_being_loaded[request.request_id]
-        req_blocks_being_loaded.update(block_hashes)
-        self._next_stored_block_idx[request.request_id] = num_blocks
 
         if self._blocks_being_loaded is not None:
-            self._blocks_being_loaded.update(req_blocks_being_loaded)
+            self._blocks_being_loaded.update(keys_to_load)
 
-    def _get_reqs_to_store(self, scheduler_output: SchedulerOutput):
-        reqs_to_store: dict[ReqId, TransferSpec] = {}
+    def _build_store_jobs(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> dict[int, TransferJob]:
+        block_size_factor = self.config.block_size_factor
+        store_jobs: dict[int, TransferJob] = {}
         # iterate over both new and cached requests
         for req_id, new_block_id_groups, preempted in yield_req_data(scheduler_output):
+            req_status = self._req_status[req_id]
+            req_status.update_offload_keys()
+            req = req_status.req
+
             if preempted:
-                self._request_block_ids[req_id] = []
+                for group_state in req_status.group_states:
+                    group_state.block_ids.clear()
 
             if new_block_id_groups:
-                new_block_ids = new_block_id_groups[0]
-                self._request_block_ids[req_id] += new_block_ids
-
-            block_ids = self._request_block_ids[req_id]
-
-            req = self._requests[req_id]
-            new_tokens = scheduler_output.num_scheduled_tokens[req_id]
-            expected_tokens = req.num_computed_tokens + new_tokens
+                req_status.update_block_id_groups(new_block_id_groups)
+                # Fence new blocks against in-flight stores.
+                if self._block_id_to_pending_jobs:
+                    new_blocks_flat = [
+                        bid for new_blocks in new_block_id_groups for bid in new_blocks
+                    ]
+                    if not self._block_id_to_pending_jobs.keys().isdisjoint(
+                        new_blocks_flat
+                    ):
+                        self._current_batch_jobs_to_flush.update(
+                            jid
+                            for bid in new_blocks_flat
+                            for jid in self._block_id_to_pending_jobs.get(bid, ())
+                        )
+
+            num_scheduled_tokens = scheduler_output.num_scheduled_tokens[req_id]
+            num_tokens_after_batch = req.num_computed_tokens + num_scheduled_tokens
             # with async scheduling, some tokens may be missing
-            total_tokens = min(expected_tokens, req.num_tokens)
-            num_blocks = total_tokens // self.offloaded_block_size
-            start_block_idx = self._next_stored_block_idx.get(req_id, 0)
-            num_new_blocks = num_blocks - start_block_idx
+            num_offloadable_tokens = min(num_tokens_after_batch, req.num_tokens)
 
-            if num_new_blocks <= 0:
+            # Filter out blocks skipped due to sliding window attention / SSM
+            # or unreachable by the load path's alignment constraints.
+            new_offload_keys: list[OffloadKey] = []
+            for group_config, group_state in zip(
+                self.config.kv_group_configs, req_status.group_states
+            ):
+                num_blocks = num_offloadable_tokens // group_config.offloaded_block_size
+                start_block_idx = group_state.next_stored_block_idx
+                if num_blocks <= start_block_idx:
+                    continue
+                offload_keys = group_state.offload_keys[start_block_idx:num_blocks]
+                # For each block to offload, take the last corresponding GPU block.
+                # e.g. if block size factor is 3 and GPU block IDs are
+                # 1 5 6 7 2 4 9 3 8 then we'll take blocks 6 4 8.
+                # We will use these GPU blocks to determine if the block needs
+                # offloading, or (if the GPU block ID is 0) this block should
+                # be skipped due to sliding window attention / SSM.
+                # We know that if a block is skipped, then all the previous blocks
+                # are skipped as well. This is why we take the last of each block.
+                offload_block_ids = group_state.block_ids[
+                    start_block_idx * block_size_factor
+                    + block_size_factor
+                    - 1 : num_blocks * block_size_factor : block_size_factor
+                ]
+                assert len(offload_keys) == len(offload_block_ids)
+
+                alignment_block_count = group_config.alignment_block_count
+                tail = group_config.sliding_window_size_in_blocks
+
+                for key_idx, (offload_key, block_id) in enumerate(
+                    zip(offload_keys, offload_block_ids)
+                ):
+                    if block_id == 0:
+                        continue
+                    # Skip SWA blocks that can never serve a load hit:
+                    # within each full-attention alignment segment, only the
+                    # trailing `tail` blocks are reachable by
+                    # _sliding_window_lookup. For DeepSeek V4 with 100K
+                    # tokens this reduces SWA stores by ~78%.
+                    if alignment_block_count is not None:
+                        assert tail is not None
+                        abs_block_idx = start_block_idx + key_idx
+                        pos_in_segment = abs_block_idx % alignment_block_count
+                        if pos_in_segment < alignment_block_count - tail:
+                            continue
+                    new_offload_keys.append(offload_key)
+
+            if not new_offload_keys:
+                req_status.advance_stored_idx(num_offloadable_tokens)
                 continue
 
-            num_gpu_blocks = num_blocks * self.block_size_factor
-            assert len(req.block_hashes) >= num_gpu_blocks
-
-            new_block_hashes = self._get_block_hashes(
-                req, start_idx=start_block_idx, end_idx=num_blocks
+            store_output = self.manager.prepare_store(
+                new_offload_keys, req_status.req_context
             )
-            store_output = self.manager.prepare_store(new_block_hashes)
             if store_output is None:
-                logger.warning(
-                    "Request %s: cannot store %s blocks", req_id, num_new_blocks
-                )
+                logger.warning("Request %s: cannot store blocks", req_id)
                 continue
 
-            self._next_stored_block_idx[req_id] = num_blocks
-
-            if not store_output.block_hashes_to_store:
+            if not store_output.keys_to_store:
+                req_status.advance_stored_idx(num_offloadable_tokens)
                 continue
-            block_hashes_to_store = set(store_output.block_hashes_to_store)
 
-            block_hashes = self._get_block_hashes(req, end_idx=num_blocks)
-            self.manager.touch(block_hashes)
+            self._touch(req_status)
 
-            new_block_hashes = self._get_block_hashes(
-                req, start_idx=start_block_idx, end_idx=num_blocks
-            )
-            dst_spec = store_output.store_spec
+            keys_to_store = set(store_output.keys_to_store)
+
+            group_sizes: list[int] = []
+            block_indices: list[int] = []
             src_block_ids: list[int] = []
-            for idx, blk_hash in enumerate(new_block_hashes):
-                if blk_hash not in block_hashes_to_store:
-                    continue
-                offloaded_block_idx = start_block_idx + idx
-                gpu_block_idx = offloaded_block_idx * self.block_size_factor
-                for i in range(self.block_size_factor):
-                    src_block_ids.append(block_ids[gpu_block_idx + i])
+            sliding_window_block_ids: list[int] = []
+            non_sliding_window_block_ids: list[int] = []
+            for group_config, group_state in zip(
+                self.config.kv_group_configs, req_status.group_states
+            ):
+                is_sliding_window = (
+                    group_config.sliding_window_size_in_blocks is not None
+                )
+                num_blocks = num_offloadable_tokens // group_config.offloaded_block_size
+                start_block_idx = group_state.next_stored_block_idx
+                block_ids = group_state.block_ids
+                num_group_blocks = 0
+                start_gpu_block_idx: int | None = None
+                for idx, offload_key in enumerate(
+                    group_state.offload_keys[start_block_idx:num_blocks]
+                ):
+                    if offload_key not in keys_to_store:
+                        continue
+
+                    offloaded_block_idx = start_block_idx + idx
+                    gpu_block_idx = offloaded_block_idx * block_size_factor
+                    for i in range(block_size_factor):
+                        block_id = block_ids[gpu_block_idx + i]
+                        if block_id == 0:
+                            # skipped blocks cannot appear after non-skipped blocks
+                            assert start_gpu_block_idx is None
+                            continue
+                        elif start_gpu_block_idx is None:
+                            start_gpu_block_idx = gpu_block_idx + i
+                        src_block_ids.append(block_id)
+                        num_group_blocks += 1
+                        if is_sliding_window:
+                            sliding_window_block_ids.append(block_id)
+                        else:
+                            non_sliding_window_block_ids.append(block_id)
+
+                group_sizes.append(num_group_blocks)
+                block_indices.append(start_gpu_block_idx or 0)
+                group_state.next_stored_block_idx = num_blocks
+
             src_spec = GPULoadStoreSpec(
-                src_block_ids, group_sizes=(len(src_block_ids),)
+                src_block_ids, group_sizes=group_sizes, block_indices=block_indices
+            )
+            dst_spec = store_output.store_spec
+
+            job_id = self._generate_job_id()
+            # a store can only be issued when no load is pending.
+            if req_status.transfer_jobs:
+                any_jid = next(iter(req_status.transfer_jobs))
+                assert self._jobs[any_jid].is_store
+            req_status.transfer_jobs.add(job_id)
+
+            # Watch sliding window blocks as they may get evicted
+            # before the request finishes
+            for bid in sliding_window_block_ids or ():
+                self._block_id_to_pending_jobs.setdefault(bid, set()).add(job_id)
+
+            # the non-sliding window blocks will be watched only
+            # when the request finishes
+            self._jobs[job_id] = TransferJobStatus(
+                req_id=req_id,
+                pending_count=self.config.num_workers,
+                keys=set(keys_to_store),
+                is_store=True,
+                non_sliding_window_block_ids=non_sliding_window_block_ids,
+                sliding_window_block_ids=sliding_window_block_ids or None,
             )
 
-            reqs_to_store[req_id] = (src_spec, dst_spec)
-            self._reqs_being_stored[req_id] |= block_hashes_to_store
+            store_jobs[job_id] = TransferJob(
+                req_id=req_id, transfer_spec=(src_spec, dst_spec)
+            )
 
             logger.debug(
-                "Request %s offloading %s blocks starting from block #%d",
+                "Request %s offloading %s blocks upto %d tokens (job %d)",
                 req_id,
-                len(block_hashes_to_store),
-                start_block_idx,
+                len(keys_to_store),
+                num_offloadable_tokens,
+                job_id,
             )
 
-        return reqs_to_store
+        return store_jobs
 
     def build_connector_meta(
         self, scheduler_output: SchedulerOutput
     ) -> KVConnectorMetadata:
-        meta = OffloadingConnectorMetadata(
-            reqs_to_load=self._reqs_to_load,
-            reqs_to_store=self._get_reqs_to_store(scheduler_output),
-            reqs_to_flush=scheduler_output.preempted_req_ids,
-        )
-        self._reqs_to_load = {}
-
-        # NOTE (orozery): we should move this logic to update_connector_output
-        # once KVConnectorOutput allows us to report completed transfers
         for req_id in scheduler_output.preempted_req_ids or ():
-            block_hashes = self._reqs_being_stored.get(req_id)
-            if block_hashes:
-                self.manager.complete_store(block_hashes)
-                block_hashes.clear()
+            req_status = self._req_status.get(req_id)
+            if req_status is None or not req_status.transfer_jobs:
+                continue
+            any_jid = next(iter(req_status.transfer_jobs))
+            assert self._jobs[any_jid].is_store
+            self._current_batch_jobs_to_flush.update(req_status.transfer_jobs)
+
+        # If all tracked requests are finished, flush all pending jobs
+        # (both store and load) - there might not be a future scheduler
+        # step to trigger their completion.
+        if self._req_status and all(
+            rs.req.is_finished() for rs in self._req_status.values()
+        ):
+            self._current_batch_jobs_to_flush.update(self._jobs.keys())
 
+        meta = OffloadingConnectorMetadata(
+            load_jobs=self._current_batch_load_jobs,
+            store_jobs=self._build_store_jobs(scheduler_output),
+            jobs_to_flush=self._current_batch_jobs_to_flush,
+        )
+        self._current_batch_load_jobs = {}
+        self._current_batch_jobs_to_flush = set()
         return meta
 
     def update_connector_output(self, connector_output: KVConnectorOutput):
@@ -294,22 +871,51 @@ def update_connector_output(self, connector_output: KVConnectorOutput):
             connector_output (KVConnectorOutput): the worker-side
                 connectors output.
         """
-        for req_id in connector_output.finished_sending or []:
-            block_hashes = self._reqs_being_stored.pop(req_id, None)
-            if block_hashes:
-                self.manager.complete_store(block_hashes)
-
-        for req_id in connector_output.finished_recving or []:
-            block_hashes = self._reqs_being_loaded.pop(req_id, None)
-            if block_hashes:
+        meta = connector_output.kv_connector_worker_meta
+        if not isinstance(meta, OffloadingWorkerMetadata):
+            assert meta is None
+            meta = OffloadingWorkerMetadata()
+        for job_id, count in meta.completed_jobs.items():
+            assert count > 0
+            if job_id < self._stale_job_threshold:
+                logger.debug(
+                    "Skipping stale completed job %d (pre-reset counter: %d)",
+                    job_id,
+                    self._stale_job_threshold,
+                )
+                continue
+            job_status = self._jobs[job_id]
+            job_status.pending_count -= count
+            if job_status.pending_count > 0:
+                continue
+            assert job_status.pending_count == 0
+
+            req_status = self._req_status[job_status.req_id]
+            if job_status.is_store:
+                self.manager.complete_store(job_status.keys, req_status.req_context)
+            else:
+                self.manager.complete_load(job_status.keys, req_status.req_context)
                 if self._blocks_being_loaded:
-                    self._blocks_being_loaded.difference_update(block_hashes)
-                self.manager.complete_load(block_hashes)
+                    self._blocks_being_loaded.difference_update(job_status.keys)
+            if self._block_id_to_pending_jobs:
+                # Sliding window blocks are tracked from store creation
+                # and must be cleaned up unconditionally.
+                self._remove_pending_job(job_id, job_status.sliding_window_block_ids)
+                # Non-sliding-window blocks are only tracked after
+                # request_finished, so only clean up for finished requests.
+                if req_status.req.is_finished():
+                    self._remove_pending_job(
+                        job_id, job_status.non_sliding_window_block_ids
+                    )
+
+            del self._jobs[job_id]
+            req_status.transfer_jobs.remove(job_id)
+            if not req_status.transfer_jobs and req_status.req.is_finished():
+                del self._req_status[job_status.req_id]
 
     def request_finished(
         self,
         request: Request,
-        block_ids: list[int],
     ) -> tuple[bool, dict[str, Any] | None]:
         """
         Called when a request has finished, before its blocks are freed.
@@ -321,16 +927,21 @@ def request_finished(
             Optional KVTransferParams to be included in the request outputs
             returned by the engine.
         """
-        req_id = request.request_id
-        self._requests.pop(req_id, None)
-        self._request_block_ids.pop(req_id, None)
-
         # TODO(orozery): possibly kickoff offload for last block
         # which may have been deferred due to async scheduling
-        self._next_stored_block_idx.pop(req_id, None)
-
-        request_being_stored = req_id in self._reqs_being_stored
-        return request_being_stored, None
+        req_status = self._req_status.get(request.request_id)
+        if req_status is None:
+            return False, None
+        if not req_status.transfer_jobs:
+            del self._req_status[request.request_id]
+            return False, None
+        # Pending stores will outlive the request's block ownership.
+        # Register them so future block reuse triggers a flush.
+        for job_id in req_status.transfer_jobs:
+            job_status = self._jobs[job_id]
+            for bid in job_status.non_sliding_window_block_ids or ():
+                self._block_id_to_pending_jobs.setdefault(bid, set()).add(job_id)
+        return False, None
 
     def take_events(self) -> Iterable[KVCacheEvent]:
         """Take the KV cache events from the connector.
@@ -339,15 +950,47 @@ def take_events(self) -> Iterable[KVCacheEvent]:
             A list of KV cache events.
         """
         for event in self.manager.take_events():
+            block_hashes = [get_offload_block_hash(key) for key in event.keys]
             if event.removed:
-                yield BlockRemoved(block_hashes=event.block_hashes, medium=event.medium)
+                yield BlockRemoved(block_hashes=block_hashes, medium=event.medium)
             else:
                 yield BlockStored(
-                    block_hashes=event.block_hashes,
+                    block_hashes=block_hashes,
                     parent_block_hash=None,
                     token_ids=[],
                     lora_id=None,
-                    block_size=event.block_size,
+                    block_size=0,
                     medium=event.medium,
                     lora_name=None,
                 )
+
+    def reset_cache(self) -> None:
+        """Reset the offloading manager cache, evicting all stored blocks."""
+
+        # reset_cache cannot be called in the middle of a schedule step
+        assert not self._current_batch_load_jobs
+        assert not self._current_batch_jobs_to_flush
+
+        # Flush all in-flight jobs
+        self._current_batch_jobs_to_flush.update(self._jobs.keys())
+
+        # Reset offloading manager cache
+        self.manager.reset_cache()
+
+        # Reset store progress so active requests re-offload from block 0
+        for status in self._req_status.values():
+            for group_state in status.group_states:
+                group_state.next_stored_block_idx = 0
+
+        # Discard jobs and save job_counter to be able to discard worker responses
+        self._stale_job_threshold = self._job_counter
+        self._jobs.clear()
+        self._block_id_to_pending_jobs.clear()
+
+        # Note: _current_batch_jobs_to_flush is intentionally NOT cleared.
+        # The load flush IDs collected above must be delivered to workers.
+        if self._blocks_being_loaded is not None:
+            self._blocks_being_loaded.clear()
+
+    def shutdown(self) -> None:
+        self.manager.shutdown()
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/worker.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/worker.py
index 77398eee8885..6c074bd2a608 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading/worker.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading/worker.py
@@ -11,6 +11,7 @@
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.common import (
     OffloadingConnectorMetadata,
+    OffloadingWorkerMetadata,
     ReqId,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.metrics import (
@@ -24,7 +25,7 @@
     MambaSpec,
     UniformTypeKVCacheSpecs,
 )
-from vllm.v1.kv_offload.spec import (
+from vllm.v1.kv_offload.base import (
     CanonicalKVCacheRef,
     CanonicalKVCaches,
     CanonicalKVCacheTensor,
@@ -45,24 +46,11 @@ def __init__(self, spec: OffloadingSpec):
         self.spec = spec
         self.worker = OffloadingWorker()
 
-        self._job_counter = 0
-
         self.kv_connector_stats = OffloadingConnectorStats()
-        # req_id -> (job_id, store)
-        self._jobs: dict[int, tuple[ReqId, bool]] = {}
-        # req_id -> active job IDs
-        self._load_job: dict[ReqId, int] = {}
-        # req_id -> set(active job IDs)
-        self._store_jobs = defaultdict[ReqId, set[int]](set)
-        # list of store jobs pending submission (job_id, transfer_spec)
+        # job_id -> req_id for in-flight loads.
+        self._load_jobs: dict[int, ReqId] = {}
         self._unsubmitted_store_jobs: list[tuple[int, TransferSpec]] = []
-
-        self._finished_reqs_waiting_for_store: set[ReqId] = set()
-
-    def _generate_job_id(self) -> int:
-        job_id = self._job_counter
-        self._job_counter = job_id + 1
-        return job_id
+        self._connector_worker_meta = OffloadingWorkerMetadata()
 
     def _register_handlers(self, kv_caches: CanonicalKVCaches):
         for src_cls, dst_cls, handler in self.spec.get_handlers(kv_caches):
@@ -83,6 +71,8 @@ def register_kv_caches(
             if layer_name in layers
         }
 
+        num_blocks = self.spec.kv_cache_config.num_blocks
+
         # layer_name -> list of matching KV cache tensors
         # such that each tensor starts with the num_blocks dimension.
         # FlashAttention layers which use the (2, num_blocks, ...) layout
@@ -132,7 +122,6 @@ def register_kv_caches(
                         num_blocks_logical_dim
                     )
                     if num_blocks_physical_dim == 0:
-                        num_blocks = layer_kv_cache.shape[num_blocks_logical_dim]
                         storage = layer_kv_cache.untyped_storage()
                         page = layer_kv_cache_spec.page_size_bytes
                         tensors_per_block[layer_name] = (
@@ -147,6 +136,9 @@ def register_kv_caches(
                         page_size_bytes[layer_name] = (
                             layer_kv_cache_spec.page_size_bytes
                         )
+                        unpadded_page_size_bytes[layer_name] = (
+                            layer_kv_cache_spec.real_page_size_bytes
+                        )
                     else:
                         # Flash Attention case: (2, num_blocks, ...)
                         assert test_shape[0] == 2
@@ -154,7 +146,6 @@ def register_kv_caches(
                         assert num_blocks_physical_dim == 1
 
                         # unbind the tensor to separate K and V tensors
-                        num_blocks = layer_kv_cache.shape[num_blocks_logical_dim]
                         half_page_size = layer_kv_cache_spec.page_size_bytes // 2
                         storage = layer_kv_cache.untyped_storage()
                         raw = (
@@ -169,8 +160,9 @@ def register_kv_caches(
                         tensors_per_block[layer_name] = tuple(raw.unbind(0))
 
                         page_size_bytes[layer_name] = half_page_size
-
-                    unpadded_page_size_bytes[layer_name] = page_size_bytes[layer_name]
+                        unpadded_page_size_bytes[layer_name] = (
+                            layer_kv_cache_spec.real_page_size_bytes // 2
+                        )
 
                 elif isinstance(layer_kv_cache_spec, MambaSpec):
                     state_tensors = kv_caches[layer_name]
@@ -181,7 +173,6 @@ def register_kv_caches(
                     assert len(state_tensors) > 0
                     first_state_tensor = state_tensors[0]
                     assert first_state_tensor.storage_offset() == 0
-                    num_blocks = first_state_tensor.shape[0]
                     tensor = (
                         torch.tensor(
                             [],
@@ -204,7 +195,16 @@ def register_kv_caches(
         block_tensors: list[CanonicalKVCacheTensor] = []
         block_data_refs: dict[str, list[CanonicalKVCacheRef]] = defaultdict(list)
         for kv_cache_tensor in self.spec.kv_cache_config.kv_cache_tensors:
-            tensor_layer_names = kv_cache_tensor.shared_by
+            # Filter to layers that were actually processed above.
+            # _get_kv_cache_config_deepseek_v4 emits KVCacheTensor entries for
+            # every (tuple_idx, page_size) slot; slots where no group has a
+            # layer at that index produce an empty shared_by (reserved memory
+            # with no corresponding model layer).
+            tensor_layer_names = [
+                n for n in kv_cache_tensor.shared_by if n in tensors_per_block
+            ]
+            if not tensor_layer_names:
+                continue
 
             # verify all layers in the group reference the exact same tensors
             assert len({len(tensors_per_block[n]) for n in tensor_layer_names}) == 1
@@ -302,10 +302,8 @@ def handle_preemptions(self, kv_connector_metadata: OffloadingConnectorMetadata)
             assert success
         self._unsubmitted_store_jobs.clear()
 
-        for req_id in kv_connector_metadata.reqs_to_flush or ():
-            job_ids = self._store_jobs.get(req_id)
-            if job_ids:
-                self.worker.wait(job_ids)
+        if kv_connector_metadata.jobs_to_flush:
+            self.worker.wait(kv_connector_metadata.jobs_to_flush)
 
     def start_kv_transfers(self, metadata: OffloadingConnectorMetadata):
         for job_id, transfer_spec in self._unsubmitted_store_jobs:
@@ -313,41 +311,33 @@ def start_kv_transfers(self, metadata: OffloadingConnectorMetadata):
             assert success
         self._unsubmitted_store_jobs.clear()
 
-        for req_id, transfer_spec in metadata.reqs_to_load.items():
-            job_id = self._generate_job_id()
-            self._jobs[job_id] = (req_id, False)
-            assert req_id not in self._load_job
-            self._load_job[req_id] = job_id
-            success = self.worker.transfer_async(job_id, transfer_spec)
+        for job_id, entry in metadata.load_jobs.items():
+            self._load_jobs[job_id] = entry.req_id
+            success = self.worker.transfer_async(job_id, entry.transfer_spec)
             assert success
 
     def prepare_store_kv(self, metadata: OffloadingConnectorMetadata):
-        for req_id, transfer_spec in metadata.reqs_to_store.items():
-            job_id = self._generate_job_id()
-            self._jobs[job_id] = (req_id, True)
-            self._store_jobs[req_id].add(job_id)
-            # NOTE(orozery): defer the store to the beginning of the next engine step,
-            # so that offloading starts AFTER transfers related to token sampling,
-            # thereby avoiding delays to token generation due to offloading.
-            self._unsubmitted_store_jobs.append((job_id, transfer_spec))
+        for job_id, entry in metadata.store_jobs.items():
+            # NOTE(orozery): defer the store to the beginning of the next
+            # engine step, so that offloading starts AFTER transfers related
+            # to token sampling, thereby avoiding delays to token generation.
+            self._unsubmitted_store_jobs.append((job_id, entry.transfer_spec))
 
     def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
         """
-        Notifies worker-side connector ids of requests that have
-        finished generating tokens.
-        Returns a list of request IDs that finished loading or storing.
-
         Returns:
-            ids of requests that have finished asynchronous transfer
-            tuple of (sending/saving ids, recving/loading ids).
+            tuple of (finished_sending, finished_recving). Stores never
+            emit finished_sending — the scheduler tracks store completion
+            via kv_connector_worker_meta.completed_jobs and fences any
+            block reuse via jobs_to_flush. Loads still emit
+            finished_recving so the base scheduler can resume requests
+            blocked on remote KV (and free aborted-during-load reqs).
         """
-        finished_sending = set()
-        finished_recving = set()
+        finished_recving: set[str] = set()
         for transfer_result in self.worker.get_finished():
             # we currently do not support job failures
             job_id = transfer_result.job_id
             assert transfer_result.success
-            req_id, store = self._jobs.pop(job_id)
             if (
                 transfer_result.transfer_time
                 and transfer_result.transfer_size is not None
@@ -358,31 +348,21 @@ def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
                     time=transfer_result.transfer_time,
                     transfer_type=transfer_result.transfer_type,
                 )
-            if store:
-                req_jobs = self._store_jobs[req_id]
-                req_jobs.remove(job_id)
-                if req_jobs:
-                    continue
-
-                if req_id in self._finished_reqs_waiting_for_store:
-                    self._finished_reqs_waiting_for_store.remove(req_id)
-                    finished_sending.add(req_id)
-                    del self._store_jobs[req_id]
-            else:
-                req_job = self._load_job[req_id]
-                assert job_id == req_job
-                del self._load_job[req_id]
+
+            self._connector_worker_meta.mark_completed(job_id)
+            req_id = self._load_jobs.pop(job_id, None)
+            if req_id is not None:
                 finished_recving.add(req_id)
 
-        for req_id in finished_req_ids:
-            pending_req_jobs = self._store_jobs.get(req_id)
-            if pending_req_jobs:
-                self._finished_reqs_waiting_for_store.add(req_id)
-            elif pending_req_jobs is not None:
-                finished_sending.add(req_id)
-                del self._store_jobs[req_id]
+        return set(), finished_recving
 
-        return finished_sending, finished_recving
+    def build_connector_worker_meta(self) -> OffloadingWorkerMetadata | None:
+        """Return completed transfer job IDs since the last call."""
+        if not self._connector_worker_meta.completed_jobs:
+            return None
+        meta = self._connector_worker_meta
+        self._connector_worker_meta = OffloadingWorkerMetadata()
+        return meta
 
     def get_kv_connector_stats(self) -> KVConnectorStats | None:
         """
@@ -395,3 +375,9 @@ def get_kv_connector_stats(self) -> KVConnectorStats | None:
         kv_connector_stats = self.kv_connector_stats
         self.kv_connector_stats = OffloadingConnectorStats()
         return kv_connector_stats
+
+    def shutdown(self) -> None:
+        self._unsubmitted_store_jobs.clear()
+        self._load_jobs.clear()
+        self._connector_worker_meta = OffloadingWorkerMetadata()
+        self.worker.shutdown()
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
index 547ee2578a12..6c75bda0c4cf 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
@@ -10,6 +10,7 @@
 from vllm.distributed.kv_transfer.kv_connector.v1 import (
     KVConnectorBase_V1,
     KVConnectorRole,
+    SupportsHMA,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
@@ -20,6 +21,7 @@
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.common import (
     OffloadingConnectorMetadata,
+    OffloadingWorkerMetadata,
 )
 from vllm.distributed.kv_transfer.kv_connector.v1.offloading.metrics import (
     OffloadingConnectorStats,
@@ -41,7 +43,7 @@
 from vllm.v1.request import Request
 
 
-class OffloadingConnector(KVConnectorBase_V1):
+class OffloadingConnector(KVConnectorBase_V1, SupportsHMA):
     @property
     def prefer_cross_layer_blocks(self) -> bool:
         return True
@@ -50,11 +52,10 @@ def __init__(
         self,
         vllm_config: VllmConfig,
         role: KVConnectorRole,
-        kv_cache_config: KVCacheConfig | None = None,
+        kv_cache_config: KVCacheConfig,
     ):
         super().__init__(vllm_config, role, kv_cache_config)
 
-        assert kv_cache_config is not None
         spec = OffloadingSpecFactory.create_spec(vllm_config, kv_cache_config)
 
         self.connector_scheduler: OffloadingConnectorScheduler | None = None
@@ -64,6 +65,12 @@ def __init__(
         elif role == KVConnectorRole.WORKER:
             self.connector_worker = OffloadingConnectorWorker(spec)
 
+    def shutdown(self) -> None:
+        if self.connector_worker is not None:
+            self.connector_worker.shutdown()
+        if self.connector_scheduler is not None:
+            self.connector_scheduler.shutdown()
+
     def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
         assert self.connector_worker is not None
         self.connector_worker.register_kv_caches(kv_caches)
@@ -97,14 +104,26 @@ def save_kv_layer(
         pass
 
     def wait_for_save(self):
+        # Store deferral is handled in get_finished(), which always runs even
+        # when wait_for_save() is skipped (e.g. kv_connector_no_forward).
+        pass
+
+    def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
         assert self.connector_worker is not None
         assert isinstance(self._connector_metadata, OffloadingConnectorMetadata)
+
+        # Defer store jobs to the next step's start_kv_transfers. Done here
+        # (rather than wait_for_save) so stores are queued even on steps where
+        # wait_for_save is skipped.
         self.connector_worker.prepare_store_kv(self._connector_metadata)
 
-    def get_finished(self, finished_req_ids: set[str]) -> tuple[set[str], set[str]]:
-        assert self.connector_worker is not None
         return self.connector_worker.get_finished(finished_req_ids)
 
+    def build_connector_worker_meta(self) -> OffloadingWorkerMetadata | None:
+        if self.connector_worker is not None:
+            return self.connector_worker.build_connector_worker_meta()
+        return None
+
     def get_num_new_matched_tokens(
         self, request: "Request", num_computed_tokens: int
     ) -> tuple[int | None, bool]:
@@ -137,12 +156,29 @@ def request_finished(
         block_ids: list[int],
     ) -> tuple[bool, dict[str, Any] | None]:
         assert self.connector_scheduler is not None
-        return self.connector_scheduler.request_finished(request, block_ids)
+        return self.connector_scheduler.request_finished(request)
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        assert self.connector_scheduler is not None
+        return self.connector_scheduler.request_finished(request)
 
     def take_events(self) -> Iterable[KVCacheEvent]:
         assert self.connector_scheduler is not None
         return self.connector_scheduler.take_events()
 
+    @classmethod
+    def get_required_kvcache_layout(cls, vllm_config: VllmConfig) -> str | None:
+        return "HND"
+
+    def reset_cache(self) -> bool | None:
+        assert self.connector_scheduler is not None
+        self.connector_scheduler.reset_cache()
+        return True
+
     def get_kv_connector_stats(self) -> KVConnectorStats | None:
         if self.connector_worker is None:
             return None  # We only emit stats from the worker-side
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
index ce228b3c6f23..aa791921be1a 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
@@ -76,7 +76,7 @@ def __init__(
         self,
         vllm_config: "VllmConfig",
         role: KVConnectorRole,
-        kv_cache_config: "KVCacheConfig | None" = None,
+        kv_cache_config: "KVCacheConfig",
     ):
         super().__init__(
             vllm_config=vllm_config,
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/simple_cpu_offload_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/simple_cpu_offload_connector.py
new file mode 100644
index 000000000000..15904da9e531
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/simple_cpu_offload_connector.py
@@ -0,0 +1,254 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""SimpleCPUOffloadConnector: minimal CPU KV cache offloading."""
+
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Any
+
+import torch
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_events import KVCacheEvent
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorBase_V1,
+    KVConnectorMetadata,
+    KVConnectorRole,
+    SupportsHMA,
+)
+from vllm.logger import init_logger
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.outputs import KVConnectorOutput
+from vllm.v1.simple_kv_offload.manager import (
+    SimpleCPUOffloadScheduler,
+)
+from vllm.v1.simple_kv_offload.metadata import (
+    SimpleCPUOffloadMetadata,
+)
+from vllm.v1.simple_kv_offload.worker import (
+    SimpleCPUOffloadWorker,
+)
+
+if TYPE_CHECKING:
+    from vllm.forward_context import ForwardContext
+    from vllm.v1.attention.backend import AttentionMetadata
+    from vllm.v1.core.block_pool import BlockPool
+    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+    from vllm.v1.request import Request
+
+logger = init_logger(__name__)
+
+# Default CPU capacity: 8 GB
+DEFAULT_CPU_CAPACITY_BYTES = 8 * (1024**3)
+
+
+class SimpleCPUOffloadConnector(KVConnectorBase_V1, SupportsHMA):
+    """CPU KV cache offloading with custom kernel transfers and BlockPool LRU."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        role: KVConnectorRole,
+        kv_cache_config: "KVCacheConfig",
+    ):
+        super().__init__(vllm_config, role, kv_cache_config)
+
+        enable_prefix_caching = vllm_config.cache_config.enable_prefix_caching
+        extra_config = self._kv_transfer_config.kv_connector_extra_config or {}
+
+        cpu_capacity_bytes = int(
+            extra_config.get("cpu_bytes_to_use", DEFAULT_CPU_CAPACITY_BYTES)
+        )
+        # cpu_bytes_to_use is server-wide for compatibility;
+        # cpu_bytes_to_use_per_rank overrides for per-rank capacity.
+        world_size = vllm_config.parallel_config.world_size
+        cpu_capacity_per_rank = cpu_capacity_bytes // world_size
+        if "cpu_bytes_to_use_per_rank" in extra_config:
+            explicit = int(extra_config["cpu_bytes_to_use_per_rank"])
+            if explicit != cpu_capacity_per_rank:
+                logger.warning(
+                    "cpu_bytes_to_use_per_rank (%.2f GB) != "
+                    "cpu_bytes_to_use/world_size (%.2f GB). Using per-rank value.",
+                    explicit / (1024**3),
+                    cpu_capacity_per_rank / (1024**3),
+                )
+            cpu_capacity_per_rank = explicit
+
+        lazy_offload = bool(extra_config.get("lazy_offload", False))
+
+        self.scheduler_manager: SimpleCPUOffloadScheduler | None = None
+        self.worker_handler: SimpleCPUOffloadWorker | None = None
+
+        if not enable_prefix_caching:
+            logger.warning(
+                "Detected prefix caching disabled, disabling CPU offload "
+                "since it requires prefix caching."
+            )
+            return
+
+        logger.info(
+            "SimpleCPUOffloadConnector: role=%s, "
+            "per_rank=%.2f GB, world_size=%d, mode=%s",
+            role.name,
+            cpu_capacity_per_rank / (1024**3),
+            world_size,
+            "lazy" if lazy_offload else "eager",
+        )
+
+        if role == KVConnectorRole.SCHEDULER:
+            from vllm.v1.core.kv_cache_utils import resolve_kv_cache_block_sizes
+
+            assert kv_cache_config is not None
+            scheduler_block_size, hash_block_size = resolve_kv_cache_block_sizes(
+                kv_cache_config, vllm_config
+            )
+            self.scheduler_manager = SimpleCPUOffloadScheduler(
+                vllm_config,
+                kv_cache_config,
+                cpu_capacity_per_rank,
+                scheduler_block_size=scheduler_block_size,
+                hash_block_size=hash_block_size,
+                lazy_offload=lazy_offload,
+            )
+        elif role == KVConnectorRole.WORKER:
+            self.worker_handler = SimpleCPUOffloadWorker(
+                vllm_config, kv_cache_config, cpu_capacity_per_rank
+            )
+
+    # --- Worker-side methods ---
+
+    def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]) -> None:
+        if self.worker_handler is not None:
+            self.worker_handler.register_kv_caches(kv_caches)
+
+    def bind_connector_metadata(
+        self,
+        connector_metadata: KVConnectorMetadata,
+    ) -> None:
+        super().bind_connector_metadata(connector_metadata)
+        if self.worker_handler is not None:
+            assert isinstance(connector_metadata, SimpleCPUOffloadMetadata)
+            self.worker_handler.bind_connector_metadata(connector_metadata)
+
+    def clear_connector_metadata(self) -> None:
+        super().clear_connector_metadata()
+        if self.worker_handler is not None:
+            self.worker_handler.clear_connector_metadata()
+
+    def handle_preemptions(self, kv_connector_metadata: KVConnectorMetadata) -> None:
+        if self.worker_handler is not None:
+            assert isinstance(kv_connector_metadata, SimpleCPUOffloadMetadata)
+            self.worker_handler.handle_preemptions(kv_connector_metadata)
+
+    def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> None:
+        pass  # Launch loads ops in get_finished() after launching model execution
+
+    def wait_for_layer_load(self, layer_name: str) -> None:
+        pass  # Always load asynchronously and deferred to get_finished()
+
+    def save_kv_layer(
+        self,
+        layer_name: str,
+        kv_layer: torch.Tensor,
+        attn_metadata: "AttentionMetadata",
+        **kwargs: Any,
+    ) -> None:
+        pass  # Always save asynchronously and deferred to get_finished()
+
+    def wait_for_save(self) -> None:
+        pass  # All stores are driven by get_finished() and no wait needed
+
+    def get_finished(
+        self,
+        finished_req_ids: set[str],
+    ) -> tuple[set[str] | None, set[str] | None]:
+        if self.worker_handler is not None:
+            return self.worker_handler.get_finished(finished_req_ids)
+        return None, None
+
+    def build_connector_worker_meta(self):
+        if self.worker_handler is not None:
+            return self.worker_handler.build_connector_worker_meta()
+        return None
+
+    # --- Scheduler-side methods ---
+
+    def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
+        if self.scheduler_manager is not None:
+            self.scheduler_manager.bind_gpu_block_pool(gpu_block_pool)
+
+    def get_num_new_matched_tokens(
+        self,
+        request: "Request",
+        num_computed_tokens: int,
+    ) -> tuple[int | None, bool]:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.get_num_new_matched_tokens(
+                request, num_computed_tokens
+            )
+        return 0, False
+
+    def update_state_after_alloc(
+        self,
+        request: "Request",
+        blocks: "KVCacheBlocks",
+        num_external_tokens: int,
+    ) -> None:
+        if self.scheduler_manager is not None:
+            self.scheduler_manager.update_state_after_alloc(
+                request, blocks, num_external_tokens
+            )
+
+    def build_connector_meta(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> KVConnectorMetadata:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.build_connector_meta(scheduler_output)
+        return SimpleCPUOffloadMetadata()
+
+    def update_connector_output(
+        self,
+        connector_output: KVConnectorOutput,
+    ) -> None:
+        if self.scheduler_manager is not None:
+            self.scheduler_manager.update_connector_output(connector_output)
+
+    def request_finished(
+        self,
+        request: "Request",
+        block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.request_finished(request, block_ids)
+        return False, None
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.request_finished_all_groups(
+                request, block_ids
+            )
+        return False, None
+
+    # NOTE: New API only for SimpleCPUOffloadConnector.
+    def has_pending_transfers(self) -> bool:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.has_pending_stores()
+        return False
+
+    def take_events(self) -> Iterable[KVCacheEvent]:
+        if self.scheduler_manager is not None:
+            return self.scheduler_manager.take_events()
+        return []
+
+    def reset_cache(self) -> bool | None:
+        raise NotImplementedError(
+            "SimpleCPUOffloadConnector does not support reset_cache(). "
+            "reset_prefix_cache() requires synchronizing all pending "
+            "CPU offload transfers before clearing GPU prefix cache blocks, "
+            "which is not yet implemented."
+        )
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/ssm_conv_transfer_utils.py b/vllm/distributed/kv_transfer/kv_connector/v1/ssm_conv_transfer_utils.py
new file mode 100644
index 000000000000..2a5510656bca
--- /dev/null
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/ssm_conv_transfer_utils.py
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Mamba conv-state sub-projection decomposition for the 3-read transfer.
+
+With DS conv state layout (dim, state_len), sub-projections are
+contiguous in memory.  Each D rank reads its slices via 3 separate
+RDMA transfers — no P-side permutation needed.
+
+Supported model types:
+  - Mamba2: conv = [x, B, C], temporal = (num_heads, head_dim)
+  - GDN (Gated Delta Net): conv = [Q, K, V] (dim(Q)==dim(K)),
+    temporal = (num_v_heads, v_dim, k_dim)
+"""
+
+import math
+from dataclasses import dataclass
+
+import torch
+
+from vllm.model_executor.layers.mamba.mamba_utils import is_conv_state_dim_first
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+from vllm.v1.kv_cache_interface import MambaSpec
+
+
+@dataclass(frozen=True)
+class MambaConvSplitInfo:
+    """Per-rank byte sizes of the 3 conv sub-projections.
+
+    Used by both P and D sides for NIXL descriptor registration.
+    All fields are LOCAL to this engine's TP (already divided by TP size).
+
+    DS memory layout within one page (contiguous):
+      Mamba2: |-- x --|- B -|- C -|  (B == C)
+      GDN:    |- Q -|- K -|-- V --|  (dim(Q)==dim(K), V may differ)
+    """
+
+    conv_rows: int  # conv_kernel - 1 (typically 3)
+    local_proj_dims: tuple[int, int, int]  # per-rank column counts per sub-proj
+    conv_dtype_size: int  # bytes per element (e.g. 2 for float16)
+    ssm_sizes: tuple[int, int]  # (conv_state_bytes, ssm_state_bytes)
+
+    @property
+    def local_conv_dim(self) -> int:
+        """Total conv columns per rank."""
+        return sum(self.local_proj_dims)
+
+    @property
+    def proj_bytes(self) -> tuple[int, int, int]:
+        """Byte sizes of the 3 sub-projections for one rank."""
+        row_bytes = self.conv_rows * self.conv_dtype_size
+        return tuple(d * row_bytes for d in self.local_proj_dims)  # type: ignore[return-value]
+
+    @property
+    def local_conv_offsets(self) -> list[tuple[int, int]]:
+        """(byte_offset, byte_size) of each sub-projection within this
+        engine's page.
+
+        Used by both P and D for local descriptor registration.
+        """
+        conv0, conv1, conv2 = self.proj_bytes
+        return [(0, conv0), (conv0, conv1), (conv0 + conv1, conv2)]
+
+    def remote_conv_offsets(
+        self, local_rank_offset: int, tp_ratio: int
+    ) -> list[tuple[int, int]]:
+        """(byte_offset, byte_size) of this D rank's sub-projection slices
+        within one P page.
+
+        Used by D side only, during remote descriptor registration.
+
+        Args:
+            local_rank_offset: which slice this D rank reads.
+            tp_ratio: signed TP ratio.
+                >= 1:  D_TP >= P_TP — P page is larger, D reads its slice.
+                < 0:   P_TP > D_TP — P pages are smaller, D reads entire
+                       P page.  Local dims are scaled down by |tp_ratio|
+                       to get P-sized offsets.
+        """
+        conv0, conv1, conv2 = self.proj_bytes
+        if tp_ratio >= 1:
+            remote_conv0 = conv0 * tp_ratio
+            remote_conv1 = conv1 * tp_ratio
+            return [
+                (local_rank_offset * conv0, conv0),
+                (remote_conv0 + local_rank_offset * conv1, conv1),
+                (remote_conv0 + remote_conv1 + local_rank_offset * conv2, conv2),
+            ]
+        else:
+            # NOTE (ZhanqiuHu): tp_ratio < 0 means P_TP > D_TP, so P pages
+            # are smaller than D's. Local dims are D-sized, but we need
+            # P-sized offsets. Scale down by |tp_ratio|.
+            abs_ratio = -tp_ratio
+            remote_conv0 = conv0 // abs_ratio
+            remote_conv1 = conv1 // abs_ratio
+            remote_conv2 = conv2 // abs_ratio
+            return [
+                (0, remote_conv0),
+                (remote_conv0, remote_conv1),
+                (remote_conv0 + remote_conv1, remote_conv2),
+            ]
+
+
+def derive_mamba_conv_split(
+    mamba_spec: MambaSpec,
+    local_tp: int,
+) -> MambaConvSplitInfo:
+    """Derive per-rank sub-projection byte sizes from a MambaSpec.
+
+    Called once at init on both P and D.  Decomposes the conv dimension
+    into its sub-projection parts based on the model type.
+
+    Args:
+        mamba_spec: MambaSpec whose shapes are:
+            shapes[0] = conv state: (conv_dim_local, conv_rows) in DS layout.
+            shapes[1] = temporal state (model-specific shape).
+        local_tp: this engine's tensor-parallel size.
+
+    Returns:
+        MambaConvSplitInfo with per-rank sub-projection dims, conv_rows,
+        conv_dtype_size, and ssm_sizes (conv_state_bytes, ssm_state_bytes).
+    """
+    _supported = (
+        MambaAttentionBackendEnum.MAMBA2,
+        MambaAttentionBackendEnum.GDN_ATTN,
+    )
+    if mamba_spec.mamba_type not in _supported:
+        raise NotImplementedError(
+            f"3-read conv transfer only supports Mamba2 and GDN models, "
+            f"got mamba_type={mamba_spec.mamba_type!r}."
+        )
+
+    conv_shape = mamba_spec.shapes[0]
+    assert len(conv_shape) == 2, f"Expected 2D conv state shape, got {conv_shape}"
+
+    # NOTE (ZhanqiuHu): 3-read requires DS layout, which is already asserted
+    # in nixl worker __init__.  Use it directly instead of heuristic detection.
+    assert is_conv_state_dim_first(), "3-read requires DS conv state layout"
+    local_conv_dim = conv_shape[0]  # DS: (conv_dim_local, conv_rows)
+    conv_rows = conv_shape[1]
+
+    conv_dtype_size = torch.tensor(
+        [],
+        dtype=mamba_spec.dtypes[0],  # type: ignore[misc]
+    ).element_size()
+    ssm_dtype_size = torch.tensor(
+        [],
+        dtype=mamba_spec.dtypes[1],  # type: ignore[misc]
+    ).element_size()
+    conv_state_bytes = torch.Size(mamba_spec.shapes[0]).numel() * conv_dtype_size
+    ssm_state_bytes = torch.Size(mamba_spec.shapes[1]).numel() * ssm_dtype_size
+
+    if mamba_spec.mamba_type == MambaAttentionBackendEnum.MAMBA2:
+        # NOTE (ZhanqiuHu): intermediate_size (= global x dim) is not stored
+        # in MambaSpec, so we reconstruct it from the SSM temporal state shape:
+        #   shapes[1] = (local_num_heads, head_dim), already divided by TP.
+        head_dim = mamba_spec.shapes[1][1]
+        local_num_heads = mamba_spec.shapes[1][0]
+        intermediate_size = local_num_heads * local_tp * head_dim
+
+        # NOTE (ZhanqiuHu): global conv dim = intermediate_size + 2 * groups_ss,
+        # where groups_ss is the B (= C) dimension.  B and C are always the same
+        # size, so we recover groups_ss from the remainder after subtracting x.
+        remainder = local_conv_dim * local_tp - intermediate_size
+        assert remainder > 0 and remainder % 2 == 0, (
+            f"Conv dim ({local_conv_dim}*tp={local_tp}) doesn't decompose "
+            f"into intermediate_size={intermediate_size} + 2*groups_ss. "
+            f"remainder={remainder}"
+        )
+        groups_ss = remainder // 2
+
+        # Divide by TP to get per-rank column counts.
+        x_local = intermediate_size // local_tp
+        b_local = groups_ss // local_tp
+        local_proj_dims = (x_local, b_local, b_local)
+    elif mamba_spec.mamba_type == MambaAttentionBackendEnum.GDN_ATTN:
+        # GDN: conv = [Q, K, V] where dim(Q) == dim(K) == key_dim.
+        # conv_dim = key_dim*2 + value_dim (all global, divided by TP).
+        # Temporal state shape is (num_v_heads/TP, head_v_dim, head_k_dim).
+        temporal_shape = mamba_spec.shapes[1]
+        num_v_heads_local = temporal_shape[0]
+        head_v_dim = temporal_shape[1]
+        value_dim_local = num_v_heads_local * head_v_dim
+
+        remainder = local_conv_dim - value_dim_local
+        assert remainder > 0 and remainder % 2 == 0, (
+            f"GDN conv dim ({local_conv_dim}) doesn't decompose into "
+            f"2*key_dim_local + value_dim_local={value_dim_local}. "
+            f"remainder={remainder}"
+        )
+        key_dim_local = remainder // 2
+        local_proj_dims = (key_dim_local, key_dim_local, value_dim_local)
+    else:
+        raise NotImplementedError(
+            f"Conv split not supported for mamba_type={mamba_spec.mamba_type!r}"
+        )
+
+    return MambaConvSplitInfo(
+        conv_rows=conv_rows,
+        local_proj_dims=local_proj_dims,
+        conv_dtype_size=conv_dtype_size,
+        ssm_sizes=(conv_state_bytes, ssm_state_bytes),
+    )
+
+
+def compute_physical_blocks_per_logical(
+    ssm_sizes: tuple[int, ...], block_len: int
+) -> int:
+    """Derive _physical_blocks_per_logical_kv_block from remote metadata.
+
+    The remote engine's ratio is not sent directly in the handshake, so we
+    reconstruct it: total mamba state per logical block / block_len.
+
+    Args:
+        ssm_sizes: (conv_state_bytes, ssm_state_bytes) from NixlAgentMetadata.
+        block_len: the engine's block_len in bytes (from block_lens[0]).
+    """
+    return math.ceil((ssm_sizes[0] + ssm_sizes[1]) / block_len)
diff --git a/vllm/distributed/kv_transfer/kv_transfer_state.py b/vllm/distributed/kv_transfer/kv_transfer_state.py
index 2cc074bded6f..67a6b4ca7a67 100644
--- a/vllm/distributed/kv_transfer/kv_transfer_state.py
+++ b/vllm/distributed/kv_transfer/kv_transfer_state.py
@@ -48,8 +48,22 @@ def is_v1_kv_transfer_group(connector: KVConnectorBaseType | None = None) -> boo
     return isinstance(connector, KVConnectorBase_V1)
 
 
+def _sync_engine_id_across_tp(vllm_config: "VllmConfig") -> None:
+    """Broadcast engine_id from TP rank 0 so all workers in a
+    multi-node TP group share the same value."""
+    from vllm.distributed.parallel_state import (
+        get_tp_group,
+    )
+
+    assert vllm_config.kv_transfer_config is not None
+    synced_id = get_tp_group().broadcast_object(
+        vllm_config.kv_transfer_config.engine_id, src=0
+    )
+    vllm_config.kv_transfer_config.engine_id = synced_id
+
+
 def ensure_kv_transfer_initialized(
-    vllm_config: "VllmConfig", kv_cache_config: "KVCacheConfig | None" = None
+    vllm_config: "VllmConfig", kv_cache_config: "KVCacheConfig"
 ) -> None:
     """
     Initialize KV cache transfer parallel group.
@@ -64,6 +78,8 @@ def ensure_kv_transfer_initialized(
         vllm_config.kv_transfer_config.is_kv_transfer_instance
         and _KV_CONNECTOR_AGENT is None
     ):
+        _sync_engine_id_across_tp(vllm_config)
+
         _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(
             config=vllm_config,
             role=KVConnectorRole.WORKER,
diff --git a/vllm/distributed/nixl_utils.py b/vllm/distributed/nixl_utils.py
new file mode 100644
index 000000000000..634d59976f70
--- /dev/null
+++ b/vllm/distributed/nixl_utils.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import importlib
+import os
+import sys
+from typing import Any
+
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+# declaration for static analyzers
+NixlWrapper: Any
+nixl_agent_config: Any
+nixlXferTelemetry: Any
+
+
+def _maybe_set_ucx_rcache_limit() -> None:
+    if "UCX_RCACHE_MAX_UNRELEASED" in os.environ:
+        return
+
+    if "nixl" in sys.modules or "rixl" in sys.modules:
+        logger.warning_once(
+            "NIXL was already imported, we can't reset "
+            "UCX_RCACHE_MAX_UNRELEASED. "
+            "Please set it to '1024' manually."
+        )
+        return
+
+    logger.info_once(
+        "Setting UCX_RCACHE_MAX_UNRELEASED to '1024' to avoid a rare "
+        "memory leak in UCX when using NIXL."
+    )
+    os.environ["UCX_RCACHE_MAX_UNRELEASED"] = "1024"
+
+
+def _get_nixl_module_name(name: str) -> str:
+    package_name = "rixl" if current_platform.is_rocm() else "nixl"
+    if name == "nixlXferTelemetry":
+        return f"{package_name}._bindings"
+    return f"{package_name}._api"
+
+
+def _load_nixl_attr(name: str) -> Any:
+    attr_name = {
+        "NixlWrapper": "nixl_agent",
+        "nixl_agent_config": "nixl_agent_config",
+        "nixlXferTelemetry": "nixlXferTelemetry",
+    }[name]
+
+    _maybe_set_ucx_rcache_limit()
+    try:
+        module = importlib.import_module(_get_nixl_module_name(name))
+    except ImportError:
+        if name == "NixlWrapper":
+            logger.warning_once("NIXL is not available")
+        elif name == "nixl_agent_config":
+            logger.warning_once("NIXL agent config is not available")
+        value = None
+    else:
+        value = getattr(module, attr_name, None)
+        if name == "NixlWrapper":
+            if value is None:
+                logger.warning_once("NIXL is not available")
+            else:
+                logger.info_once("NIXL is available")
+        elif name == "nixl_agent_config" and value is None:
+            logger.warning_once("NIXL agent config is not available")
+
+    globals()[name] = value
+    return value
+
+
+def __getattr__(name: str) -> Any:
+    if name in __all__:
+        return _load_nixl_attr(name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def is_nixl_available() -> bool:
+    """Lightweight check for nixl/rixl package without importing it."""
+    import importlib.util
+
+    pkg = "rixl" if current_platform.is_rocm() else "nixl"
+    return importlib.util.find_spec(pkg) is not None
+
+
+__all__ = [
+    "NixlWrapper",
+    "nixl_agent_config",
+    "nixlXferTelemetry",
+    "is_nixl_available",
+]
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index dbe673b331ce..712167c601cf 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -335,6 +335,10 @@ def __init__(
         self_device_group = None
         self_cpu_group = None
 
+        from vllm.distributed.utils import get_cpu_distributed_timeout_or_none
+
+        timeout = get_cpu_distributed_timeout_or_none()
+
         for ranks in group_ranks:
             device_group = torch.distributed.new_group(
                 ranks, backend=torch_distributed_backend
@@ -342,7 +346,9 @@ def __init__(
             # a group with `gloo` backend, to allow direct coordination between
             # processes through the CPU.
             with suppress_stdout():
-                cpu_group = torch.distributed.new_group(ranks, backend="gloo")
+                cpu_group = torch.distributed.new_group(
+                    ranks, backend="gloo", timeout=timeout
+                )
             if self.rank in ranks:
                 self.ranks = ranks
                 self.world_size = len(ranks)
@@ -394,8 +400,10 @@ def __init__(
             current_platform.is_tpu() or current_platform.use_custom_op_collectives()
         )
 
-        self.use_cpu_custom_send_recv = current_platform.is_cpu() and hasattr(
-            torch.ops._C, "init_shm_manager"
+        self.use_cpu_custom_send_recv = (
+            current_platform.is_cpu()
+            and self.device_communicator
+            and getattr(self.device_communicator, "supports_tensor_dict", False)
         )
 
     def create_mq_broadcaster(
@@ -470,23 +478,37 @@ def graph_capture(self, graph_capture_context: GraphCaptureContext | None = None
         # only cuda uses this function,
         # so we don't abstract it into the base class
         maybe_ca_context = nullcontext()
+        maybe_aiter_context = nullcontext()
         from vllm.distributed.device_communicators.cuda_communicator import (
             CudaCommunicator,
         )
+        from vllm.distributed.device_communicators.xpu_communicator import (
+            XpuCommunicator,
+        )
 
         if self.device_communicator is not None:
-            assert isinstance(self.device_communicator, CudaCommunicator)
+            assert isinstance(
+                self.device_communicator,
+                (CudaCommunicator, XpuCommunicator),
+            )
             ca_comm = self.device_communicator.ca_comm
             if ca_comm is not None:
                 maybe_ca_context = ca_comm.capture()  # type: ignore
 
+            from vllm._aiter_ops import rocm_aiter_ops
+
+            if rocm_aiter_ops.is_enabled():
+                aiter_ar = rocm_aiter_ops.get_aiter_allreduce()
+                if aiter_ar is not None:
+                    maybe_aiter_context = aiter_ar.capture()  # type: ignore
+
         # ensure all initialization operations complete before attempting to
         # capture the graph on another stream
         curr_stream = torch.cuda.current_stream()
         if curr_stream != stream:
             stream.wait_stream(curr_stream)
 
-        with torch.cuda.stream(stream), maybe_ca_context:
+        with torch.cuda.stream(stream), maybe_ca_context, maybe_aiter_context:
             yield graph_capture_context
 
     def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
@@ -1445,6 +1467,7 @@ def init_distributed_environment(
         # local rank not set, this usually happens in single-node
         # setting, where we can use rank as local rank
         local_rank = envs.LOCAL_RANK if distributed_init_method == "env://" else rank
+
     global _WORLD, _NODE_COUNT, _INNER_DP_WORLD
     if enable_elastic_ep:
         _init_elastic_ep_world(config, local_rank, backend, rank, world_size)
@@ -1690,11 +1713,7 @@ def initialize_model_parallel(
         # using torch.distributed in execution with torch.distributed in EPLB.
         global _EPLB
         assert _EPLB is None, "EPLB group is already initialized"
-        if (
-            config is not None
-            and config.parallel_config is not None
-            and config.parallel_config.enable_eplb
-        ):
+        if config.parallel_config.enable_eplb:
             if enable_elastic_ep:
                 _EPLB = _init_stateless_group(
                     group_ranks,
diff --git a/vllm/distributed/utils.py b/vllm/distributed/utils.py
index 9991ab1ddc23..ba0419a2800d 100644
--- a/vllm/distributed/utils.py
+++ b/vllm/distributed/utils.py
@@ -491,6 +491,16 @@ def get_cached_tcp_store_client(host: str, port: int) -> TCPStore:
     return TCPStore(host, port, is_master=False, wait_for_workers=False)
 
 
+def get_cpu_distributed_timeout_or_none() -> timedelta | None:
+    from vllm.config import get_current_vllm_config_or_none
+
+    vllm_config = get_current_vllm_config_or_none()
+    if vllm_config is None:
+        return None
+    timeout_seconds = vllm_config.parallel_config.cpu_distributed_timeout_seconds
+    return timedelta(seconds=timeout_seconds) if timeout_seconds is not None else None
+
+
 def init_gloo_process_group(
     prefix_store: PrefixStore,
     group_rank: int,
@@ -570,6 +580,10 @@ def stateless_init_torch_distributed_process_group(
     init_method = get_tcp_uri(host, port)
     backend = Backend(backend)  # it is basically string
     timeout = _get_default_timeout(backend)
+    if backend == "gloo":
+        gloo_timeout = get_cpu_distributed_timeout_or_none()
+        if gloo_timeout is not None:
+            timeout = gloo_timeout
 
     if listen_socket is not None:
         store = create_tcp_store(
diff --git a/vllm/distributed/weight_transfer/__init__.py b/vllm/distributed/weight_transfer/__init__.py
index c96ad0e3bb4f..af3322e0cbb7 100644
--- a/vllm/distributed/weight_transfer/__init__.py
+++ b/vllm/distributed/weight_transfer/__init__.py
@@ -5,8 +5,10 @@
 to inference workers.
 """
 
+from vllm.distributed.weight_transfer.base import WeightTransferEngine
 from vllm.distributed.weight_transfer.factory import WeightTransferEngineFactory
 
 __all__ = [
+    "WeightTransferEngine",
     "WeightTransferEngineFactory",
 ]
diff --git a/vllm/distributed/weight_transfer/base.py b/vllm/distributed/weight_transfer/base.py
index 788dcef128e5..6e99adde1ca7 100644
--- a/vllm/distributed/weight_transfer/base.py
+++ b/vllm/distributed/weight_transfer/base.py
@@ -4,7 +4,7 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterator
-from dataclasses import KW_ONLY, dataclass, field
+from dataclasses import dataclass, field
 from typing import Any, Generic, TypeVar
 
 import torch
@@ -28,11 +28,7 @@ class WeightTransferInitInfo(ABC):  # noqa: B024
 class WeightTransferUpdateInfo(ABC):  # noqa: B024
     """Base class for backend-specific weight update info."""
 
-    _: KW_ONLY
-    is_checkpoint_format: bool = True
-    """Set to True if weights are in checkpoint/original model format and need
-    layerwise processing. Set to False if weights have already been processed
-    into kernel format (repacking, renaming, etc.)."""
+    pass
 
 
 # API-level request classes (accept dicts for backend-agnostic serialization)
@@ -69,7 +65,10 @@ class WeightTransferEngine(ABC, Generic[TInitInfo, TUpdateInfo]):
     update_info_cls: type[TUpdateInfo]
 
     def __init__(
-        self, config: WeightTransferConfig, parallel_config: ParallelConfig
+        self,
+        config: WeightTransferConfig,
+        parallel_config: ParallelConfig,
+        model: torch.nn.Module,
     ) -> None:
         """
         Initialize the weight transfer engine.
@@ -77,9 +76,11 @@ def __init__(
         Args:
             config: The configuration for the weight transfer engine
             parallel_config: The configuration for the parallel setup
+            model: The local model instance which will receive the weights
         """
         self.config = config
         self.parallel_config = parallel_config
+        self.model = model
 
     def parse_init_info(self, init_dict: dict[str, Any]) -> TInitInfo:
         """
diff --git a/vllm/distributed/weight_transfer/factory.py b/vllm/distributed/weight_transfer/factory.py
index f8e9c864fcc1..791401c7c2de 100644
--- a/vllm/distributed/weight_transfer/factory.py
+++ b/vllm/distributed/weight_transfer/factory.py
@@ -10,6 +10,8 @@
 from vllm.logger import init_logger
 
 if TYPE_CHECKING:
+    import torch
+
     from vllm.config.parallel import ParallelConfig
     from vllm.config.weight_transfer import WeightTransferConfig
 
@@ -75,12 +77,14 @@ def create_engine(
         cls,
         config: "WeightTransferConfig",
         parallel_config: "ParallelConfig",
+        model: "torch.nn.Module",
     ) -> WeightTransferEngine:
         """Create a weight transfer engine instance.
 
         Args:
             config: Weight transfer configuration containing the backend name
             parallel_config: Parallel configuration for the engine
+            model: The local model instance which will receive the weights
 
         Returns:
             An initialized weight transfer engine instance
@@ -102,7 +106,7 @@ def create_engine(
             engine_cls.__name__,
         )
 
-        return engine_cls(config, parallel_config)
+        return engine_cls(config, parallel_config, model)
 
 
 # Register built-in weight transfer engines here.
diff --git a/vllm/distributed/weight_transfer/ipc_engine.py b/vllm/distributed/weight_transfer/ipc_engine.py
index 43b23be544c1..b138c7dd9374 100644
--- a/vllm/distributed/weight_transfer/ipc_engine.py
+++ b/vllm/distributed/weight_transfer/ipc_engine.py
@@ -8,9 +8,10 @@
 from typing import Any
 
 import pybase64 as base64
+import ray
 import requests
 import torch
-from torch.multiprocessing.reductions import reduce_tensor
+from torch.multiprocessing.reductions import rebuild_cuda_tensor, reduce_tensor
 
 from vllm import envs
 from vllm.config.parallel import ParallelConfig
@@ -20,27 +21,43 @@
     WeightTransferInitInfo,
     WeightTransferUpdateInfo,
 )
+from vllm.distributed.weight_transfer.packed_tensor import (
+    DEFAULT_PACKED_BUFFER_SIZE_BYTES,
+    packed_ipc_consumer,
+    packed_ipc_producer,
+)
 
 
 @dataclass
 class IPCTrainerSendWeightsArgs:
     """Arguments for IPC trainer_send_weights method."""
 
-    mode: str
-    """Transport mode: 'http' or 'ray'."""
+    send_mode: str | Callable[["IPCWeightTransferUpdateInfo"], None]
+    """How to send updates to vLLM. Either a string ('ray' or 'http') for
+    built-in transports, or a callable that receives an
+    IPCWeightTransferUpdateInfo and performs the send."""
     llm_handle: Any = None
-    """Ray ObjectRef to LLM handle (required for 'ray' mode)."""
+    """Ray actor handle or list of handles (required for 'ray' send_mode)."""
     url: str | None = None
-    """Base URL for HTTP endpoint (required for 'http' mode)."""
+    """Base URL for HTTP endpoint (required for 'http' send_mode)."""
+    packed: bool = False
+    """Whether to use packed tensor transfer for bounded-memory chunking."""
+    packed_buffer_size_bytes: int = DEFAULT_PACKED_BUFFER_SIZE_BYTES
+    """Size in bytes for each packed tensor buffer when packed=True."""
 
     def __post_init__(self):
         """Validate that required arguments are provided for the selected mode."""
-        if self.mode == "ray" and self.llm_handle is None:
-            raise ValueError("llm_handle is required for 'ray' mode")
-        if self.mode == "http" and self.url is None:
-            raise ValueError("url is required for 'http' mode")
-        if self.mode not in ("ray", "http"):
-            raise ValueError(f"mode must be 'ray' or 'http', got {self.mode}")
+        if callable(self.send_mode):
+            return
+        if self.send_mode == "ray" and self.llm_handle is None:
+            raise ValueError("llm_handle is required for 'ray' send_mode")
+        if self.send_mode == "http" and self.url is None:
+            raise ValueError("url is required for 'http' send_mode")
+        if self.send_mode not in ("ray", "http"):
+            raise ValueError(
+                f"send_mode must be 'ray', 'http', or a callable, "
+                f"got {self.send_mode!r}"
+            )
 
 
 @dataclass
@@ -52,44 +69,22 @@ class IPCWeightTransferInitInfo(WeightTransferInitInfo):
 
 @dataclass
 class IPCWeightTransferUpdateInfo(WeightTransferUpdateInfo):
-    """Update info for IPC weight transfer backend.
-
-    Accepts IPC handles either directly via ``ipc_handles`` (Ray transport)
-    or as a base64-encoded pickle via ``ipc_handles_pickled`` (HTTP transport).
-    Exactly one of the two must be provided; if ``ipc_handles_pickled`` is set
-    it is unpickled into ``ipc_handles`` during ``__post_init__``.
-    """
+    """Update info for IPC weight transfer backend."""
 
     names: list[str]
     dtype_names: list[str]
     shapes: list[list[int]]
-    ipc_handles: list[dict[str, tuple[Callable, tuple]]] | None = None
-    """IPC handles mapping physical GPU UUID to (func, args) tuple.
-    Each handle is a dictionary mapping GPU UUID strings to IPC handle tuples."""
-    ipc_handles_pickled: str | None = None
-    """Base64-encoded pickled IPC handles, used for HTTP transport."""
+    ipc_handles: list[dict[str, tuple]] | dict[str, tuple]
+    """IPC handles mapping physical GPU UUID to rebuild_cuda_tensor args.
+    For non-packed mode: list of per-parameter handle dicts.
+    For packed mode: single handle dict for the packed buffer."""
+    tensor_sizes: list[int] | None = None
+    """Per-parameter sizes in bytes within the packed buffer.
+    Required when packed=True, unused otherwise."""
+    packed: bool = False
+    """Whether this update uses packed tensor format."""
 
     def __post_init__(self):
-        if self.ipc_handles_pickled is not None:
-            if self.ipc_handles is not None:
-                raise ValueError(
-                    "Cannot specify both `ipc_handles` and `ipc_handles_pickled`"
-                )
-
-            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
-                raise ValueError(
-                    "Refusing to deserialize `ipc_handles_pickled` without "
-                    "VLLM_ALLOW_INSECURE_SERIALIZATION=1"
-                )
-
-            self.ipc_handles = pickle.loads(base64.b64decode(self.ipc_handles_pickled))
-            self.ipc_handles_pickled = None
-
-        if self.ipc_handles is None:
-            raise ValueError(
-                "Either `ipc_handles` or `ipc_handles_pickled` must be provided"
-            )
-
         num_params = len(self.names)
         if len(self.dtype_names) != num_params:
             raise ValueError(
@@ -101,11 +96,17 @@ def __post_init__(self):
                 f"`shapes` should be of the same size as `names`: "
                 f"got {len(self.shapes)} and {len(self.names)}"
             )
-        if len(self.ipc_handles) != num_params:
+        if (
+            not self.packed
+            and isinstance(self.ipc_handles, list)
+            and len(self.ipc_handles) != num_params
+        ):
             raise ValueError(
                 f"`ipc_handles` should be of the same size as `names`: "
                 f"got {len(self.ipc_handles)} and {len(self.names)}"
             )
+        if self.packed and self.tensor_sizes is None:
+            raise ValueError("`tensor_sizes` is required when packed=True")
 
 
 class IPCWeightTransferEngine(
@@ -124,7 +125,10 @@ class IPCWeightTransferEngine(
     update_info_cls = IPCWeightTransferUpdateInfo
 
     def __init__(
-        self, config: WeightTransferConfig, parallel_config: ParallelConfig
+        self,
+        config: WeightTransferConfig,
+        parallel_config: ParallelConfig,
+        model: torch.nn.Module,
     ) -> None:
         """
         Initialize the IPC weight transfer engine.
@@ -132,8 +136,39 @@ def __init__(
         Args:
             config: The configuration for the weight transfer engine
             parallel_config: The configuration for the parallel setup
+            model: The local model instance which will receive the weights
         """
-        super().__init__(config, parallel_config)
+        super().__init__(config, parallel_config, model)
+
+    def parse_update_info(
+        self, update_dict: dict[str, Any]
+    ) -> IPCWeightTransferUpdateInfo:
+        """Parse update dict, deserializing pickled IPC handles if present.
+
+        HTTP transport sends IPC handles as a base64-encoded pickle under the
+        key ``ipc_handles_pickled``. This method deserializes them back into
+        ``ipc_handles`` before constructing the typed dataclass, keeping
+        serialization concerns out of the dataclass itself.
+
+        Requires ``VLLM_ALLOW_INSECURE_SERIALIZATION=1`` because the
+        payload is deserialized via ``pickle.loads``.
+        """
+        if "ipc_handles_pickled" in update_dict:
+            if "ipc_handles" in update_dict:
+                raise ValueError(
+                    "Cannot specify both `ipc_handles` and `ipc_handles_pickled`"
+                )
+
+            if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION:
+                raise ValueError(
+                    "Refusing to deserialize `ipc_handles_pickled` without "
+                    "VLLM_ALLOW_INSECURE_SERIALIZATION=1"
+                )
+
+            pickled = update_dict.pop("ipc_handles_pickled")
+            update_dict["ipc_handles"] = pickle.loads(base64.b64decode(pickled))
+
+        return super().parse_update_info(update_dict)
 
     def init_transfer_engine(self, init_info: IPCWeightTransferInitInfo) -> None:
         """
@@ -157,46 +192,52 @@ def receive_weights(
         Args:
             update_info: IPC update info containing parameter names, dtypes, shapes,
                         and IPC handles. Each IPC handle is a mapping between physical
-                        GPU UUID and the IPC handle tuple (func, args).
+                        GPU UUID and the rebuild_cuda_tensor args tuple.
             load_weights: Callable that loads weights into the model. Called
                          incrementally for each weight to avoid OOM.
         """
-        assert update_info.ipc_handles is not None
-        weights = []
-        for name, _dtype_name, _shape, ipc_handle in zip(
-            update_info.names,
-            update_info.dtype_names,
-            update_info.shapes,
-            update_info.ipc_handles,
-        ):
-            device_index = torch.accelerator.current_device_index()
-            props = torch.cuda.get_device_properties(device_index)
-            physical_gpu_id = str(props.uuid)
-
-            if physical_gpu_id not in ipc_handle:
-                raise ValueError(
-                    f"IPC handle not found for GPU UUID {physical_gpu_id}. "
-                    f"Available UUIDs: {list(ipc_handle.keys())}"
-                )
-
-            handle = ipc_handle[physical_gpu_id]
-
-            func, args = handle
-            list_args = list(args)  # type: ignore
-            # Index 6 is the device_index parameter in torch's
-            # IPC handle tuple (rebuild_cuda_tensor). Update it
-            # to the current device since the logical index can
-            # differ between sender and receiver.
-            list_args[6] = device_index
-            weight = func(*list_args)  # type: ignore
-            weights.append((name, weight))
+        device_index = torch.accelerator.current_device_index()
 
-        load_weights(weights)
+        if update_info.packed:
+            assert update_info.tensor_sizes is not None
+            assert isinstance(update_info.ipc_handles, dict)
+            weights = packed_ipc_consumer(
+                ipc_handle=update_info.ipc_handles,
+                names=update_info.names,
+                shapes=update_info.shapes,
+                dtype_names=update_info.dtype_names,
+                tensor_sizes=update_info.tensor_sizes,
+                device_index=device_index,
+            )
+            load_weights(weights)
+        else:
+            assert isinstance(update_info.ipc_handles, list)
+            weights = []
+            for name, ipc_handle in zip(
+                update_info.names,
+                update_info.ipc_handles,
+            ):
+                props = torch.cuda.get_device_properties(device_index)
+                physical_gpu_id = str(props.uuid)
+
+                if physical_gpu_id not in ipc_handle:
+                    raise ValueError(
+                        f"IPC handle not found for GPU UUID "
+                        f"{physical_gpu_id}. "
+                        f"Available UUIDs: {list(ipc_handle.keys())}"
+                    )
+
+                args = ipc_handle[physical_gpu_id]
+                list_args = list(args)
+                # Index 6 of the args from reduce_tensor is the device_index.
+                # We need to overwrite it with the receiver's device index.
+                list_args[6] = device_index
+                weight = rebuild_cuda_tensor(*list_args)
+                weights.append((name, weight))
+
+            load_weights(weights)
 
     def shutdown(self) -> None:
-        """
-        Shutdown the weight transfer engine.
-        """
         pass
 
     @staticmethod
@@ -204,96 +245,213 @@ def trainer_send_weights(
         iterator: Iterator[tuple[str, torch.Tensor]],
         trainer_args: dict[str, Any] | IPCTrainerSendWeightsArgs,
     ) -> None:
-        """
-        Send weights from trainer to inference workers via CUDA IPC.
+        """Send weights from trainer to inference workers via CUDA IPC.
+
+        Supports two transport modes ('ray' and 'http') and two transfer
+        strategies:
+        - Non-packed (default): all weights in a single API call.
+        - Packed (packed=True): chunked transfer with bounded GPU memory.
+
+        For multi-GPU training, all ranks must call this method in
+        parallel. IPC handles are all-gathered across ranks and merged
+        so that each vLLM worker can find its own GPU UUID. Only rank 0
+        sends the payload to vLLM.
 
-        Supports two modes:
-        - 'ray': Sends weights via Ray RPC to a Ray-based LLM handle
-        - 'http': Sends weights via HTTP POST to a vLLM HTTP server
+        .. note::
+            This method calls ``update_weights`` internally. The caller must
+            call ``start_weight_update`` before and ``finish_weight_update``
+            after this method.
 
         Args:
-            iterator: Iterator of model parameters. Returns (name, tensor) tuples.
-                     Tensors should be on the same GPU as the inference workers.
-            trainer_args: Dictionary containing IPC-specific arguments.
-                         Should contain keys from IPCTrainerSendWeightsArgs:
-                         - mode: 'ray' or 'http'
-                         - llm_handle: Ray ObjectRef (for 'ray' mode)
-                         - url: Base URL string (for 'http' mode)
-
-        Example (Ray mode):
-            >>> from vllm.distributed.weight_transfer.ipc_engine import (
-            ...     IPCWeightTransferEngine,
-            ...     IPCTrainerSendWeightsArgs,
-            ... )
-            >>> param_iter = ((n, p) for n, p in model.named_parameters())
-            >>> args = IPCTrainerSendWeightsArgs(mode="ray", llm_handle=llm_handle)
-            >>> IPCWeightTransferEngine.trainer_send_weights(param_iter, asdict(args))
-
-        Example (HTTP mode):
-            >>> args = IPCTrainerSendWeightsArgs(
-            ...     mode="http", url="http://localhost:8000"
-            ... )
-            >>> IPCWeightTransferEngine.trainer_send_weights(param_iter, asdict(args))
+            iterator: Iterator of (name, tensor) pairs. For multi-GPU,
+                     each rank should yield the full tensor on its own GPU
+                     (e.g. via FSDP full_tensor()).
+            trainer_args: IPCTrainerSendWeightsArgs or equivalent dict.
         """
-        # Parse trainer args - accept either dict or dataclass instance
-        if isinstance(trainer_args, dict):
-            args = IPCTrainerSendWeightsArgs(**trainer_args)
+        args = (
+            IPCTrainerSendWeightsArgs(**trainer_args)
+            if isinstance(trainer_args, dict)
+            else trainer_args
+        )
+        device_index = torch.accelerator.current_device_index()
+        gpu_uuid = str(torch.cuda.get_device_properties(device_index).uuid)
+        if args.packed:
+            IPCWeightTransferEngine._send_packed(iterator, args, gpu_uuid)
         else:
-            args = trainer_args
+            IPCWeightTransferEngine._send_unpacked(iterator, args, gpu_uuid)
 
-        # Get physical GPU UUID
-        device_index = torch.accelerator.current_device_index()
-        props = torch.cuda.get_device_properties(device_index)
-        gpu_uuid = str(props.uuid)
+    @staticmethod
+    def _is_rank_zero() -> bool:
+        """Return True if this is rank 0 or no distributed group exists."""
+        if not torch.distributed.is_initialized():
+            return True
+        return torch.distributed.get_rank() == 0
 
-        # Collect weight metadata and create IPC handles
-        names = []
-        dtype_names = []
-        shapes = []
-        ipc_handles = []
+    @staticmethod
+    def _all_gather_and_merge_handles(
+        handles: list[dict[str, tuple]],
+    ) -> list[dict[str, tuple]]:
+        """All-gather and merge IPC handle dicts across ranks in one call.
+
+        Each rank contributes a list of {gpu_uuid: ipc_args} dicts (one
+        per parameter or one per chunk). A single all_gather_object
+        collects every rank's full list, then rank 0 merges per-index so
+        each dict maps every GPU UUID to its args.
+
+        Non-rank-0 returns a list of empty dicts.
+        No-op (returns handles unchanged) when no distributed group exists.
+        """
+        if (
+            not torch.distributed.is_initialized()
+            or torch.distributed.get_world_size() == 1
+        ):
+            return handles
+
+        world_size = torch.distributed.get_world_size()
+        gathered: list[list[dict[str, tuple]] | None] = [None] * world_size
+        torch.distributed.all_gather_object(gathered, handles)
+        torch.distributed.barrier()
+        torch.cuda.synchronize()
+
+        if torch.distributed.get_rank() == 0:
+            merged: list[dict[str, tuple]] = []
+            for param_idx in range(len(handles)):
+                m: dict[str, tuple] = {}
+                for rank_handles in gathered:
+                    if rank_handles is not None:
+                        m.update(rank_handles[param_idx])
+                merged.append(m)
+            return merged
+        return [{} for _ in handles]
+
+    @staticmethod
+    def _post_send_sync() -> None:
+        """Barrier + ipc_collect after a send; no-op if single-GPU."""
+        if (
+            torch.distributed.is_initialized()
+            and torch.distributed.get_world_size() > 1
+        ):
+            torch.distributed.barrier()
+        torch.cuda.ipc_collect()
+
+    @staticmethod
+    def _send_unpacked(
+        iterator: Iterator[tuple[str, torch.Tensor]],
+        args: IPCTrainerSendWeightsArgs,
+        gpu_uuid: str,
+    ) -> None:
+        """Send all weights in a single API call (non-packed mode)."""
+        names: list[str] = []
+        dtype_names: list[str] = []
+        shapes: list[list[int]] = []
+        ipc_handles: list[dict[str, tuple]] = []
+        # Hold strong refs to every contiguous copy until the send + post-send
+        # sync completes. reduce_tensor's returned args do NOT keep storage
+        # alive, and non-contiguous inputs allocate fresh storage in
+        # .contiguous() that would otherwise be GC'd before the consumer opens
+        # the IPC handle.
+        weight_refs: list[torch.Tensor] = []
 
         for name, tensor in iterator:
             names.append(name)
             dtype_names.append(str(tensor.dtype).split(".")[-1])
             shapes.append(list(tensor.shape))
 
-            # Create IPC handle for this weight tensor
-            # The tensor must remain in memory for IPC to work
             weight = tensor.detach().contiguous()
-            ipc_handle = reduce_tensor(weight)
-            ipc_handles.append({gpu_uuid: ipc_handle})
-
-        # Send weights based on mode
-        if args.mode == "ray":
-            # Ray mode: send via Ray RPC
-            import ray
-
-            update_info = asdict(
-                IPCWeightTransferUpdateInfo(
-                    names=names,
-                    dtype_names=dtype_names,
-                    shapes=shapes,
-                    ipc_handles=ipc_handles,
+            weight_refs.append(weight)
+            _, ipc_args = reduce_tensor(weight)
+            ipc_handles.append({gpu_uuid: ipc_args})
+
+        ipc_handles = IPCWeightTransferEngine._all_gather_and_merge_handles(ipc_handles)
+
+        if IPCWeightTransferEngine._is_rank_zero():
+            IPCWeightTransferEngine._do_send(
+                args=args,
+                names=names,
+                dtype_names=dtype_names,
+                shapes=shapes,
+                ipc_handles=ipc_handles,
+            )
+
+        IPCWeightTransferEngine._post_send_sync()
+
+    @staticmethod
+    def _send_packed(
+        iterator: Iterator[tuple[str, torch.Tensor]],
+        args: IPCTrainerSendWeightsArgs,
+        gpu_uuid: str,
+    ) -> None:
+        """Send weights in bounded-memory chunks (packed mode)."""
+        post_iter_func: Callable = lambda item: item[1]
+
+        for chunk in packed_ipc_producer(
+            iterator=iterator,
+            gpu_uuid=gpu_uuid,
+            post_iter_func=post_iter_func,
+            buffer_size_bytes=args.packed_buffer_size_bytes,
+        ):
+            ipc_handle = IPCWeightTransferEngine._all_gather_and_merge_handles(
+                [chunk.ipc_handle]
+            )[0]
+
+            if IPCWeightTransferEngine._is_rank_zero():
+                IPCWeightTransferEngine._do_send(
+                    args=args,
+                    names=chunk.names,
+                    dtype_names=chunk.dtype_names,
+                    shapes=chunk.shapes,
+                    ipc_handles=ipc_handle,
+                    tensor_sizes=chunk.tensor_sizes,
+                    packed=True,
                 )
+
+            IPCWeightTransferEngine._post_send_sync()
+
+    @staticmethod
+    def _do_send(
+        args: IPCTrainerSendWeightsArgs,
+        names: list[str],
+        dtype_names: list[str],
+        shapes: list[list[int]],
+        ipc_handles: list[dict[str, tuple]] | dict[str, tuple],
+        tensor_sizes: list[int] | None = None,
+        packed: bool = False,
+    ) -> None:
+        """Send a single update payload via the configured transport."""
+        update_fields: dict[str, Any] = {
+            "names": names,
+            "dtype_names": dtype_names,
+            "shapes": shapes,
+            "packed": packed,
+        }
+        if tensor_sizes is not None:
+            update_fields["tensor_sizes"] = tensor_sizes
+
+        update_fields["ipc_handles"] = ipc_handles
+        update_info = IPCWeightTransferUpdateInfo(**update_fields)
+
+        if callable(args.send_mode):
+            args.send_mode(update_info)
+        elif args.send_mode == "ray":
+            handles = (
+                args.llm_handle
+                if isinstance(args.llm_handle, list)
+                else [args.llm_handle]
             )
             ray.get(
-                args.llm_handle.update_weights.remote(dict(update_info=update_info))
+                [
+                    h.update_weights.remote(dict(update_info=asdict(update_info)))
+                    for h in handles
+                ]
             )
-        elif args.mode == "http":
-            # HTTP mode: send via HTTP POST with pickled handles
-            # Pickle and base64 encode IPC handles for HTTP transmission
+        elif args.send_mode == "http":
             pickled_handles = base64.b64encode(pickle.dumps(ipc_handles)).decode(
                 "utf-8"
             )
+            http_fields = {k: v for k, v in update_fields.items() if k != "ipc_handles"}
+            http_fields["ipc_handles_pickled"] = pickled_handles
 
             url = f"{args.url}/update_weights"
-            payload = {
-                "update_info": {
-                    "names": names,
-                    "dtype_names": dtype_names,
-                    "shapes": shapes,
-                    "ipc_handles_pickled": pickled_handles,
-                }
-            }
+            payload = {"update_info": http_fields}
             response = requests.post(url, json=payload, timeout=300)
             response.raise_for_status()
diff --git a/vllm/distributed/weight_transfer/nccl_engine.py b/vllm/distributed/weight_transfer/nccl_engine.py
index fbfe7a0df618..3b04a5f65ba3 100644
--- a/vllm/distributed/weight_transfer/nccl_engine.py
+++ b/vllm/distributed/weight_transfer/nccl_engine.py
@@ -21,7 +21,7 @@
 from vllm.distributed.weight_transfer.packed_tensor import (
     DEFAULT_PACKED_BUFFER_SIZE_BYTES,
     DEFAULT_PACKED_NUM_BUFFERS,
-    packed_broadcast_consumer,
+    packed_nccl_broadcast_consumer,
 )
 
 
@@ -109,7 +109,10 @@ class NCCLWeightTransferEngine(
     update_info_cls = NCCLWeightTransferUpdateInfo
 
     def __init__(
-        self, config: WeightTransferConfig, parallel_config: ParallelConfig
+        self,
+        config: WeightTransferConfig,
+        parallel_config: ParallelConfig,
+        model: torch.nn.Module,
     ) -> None:
         """
         Initialize the NCCL weight transfer engine.
@@ -117,8 +120,9 @@ def __init__(
         Args:
             config: The configuration for the weight transfer engine
             parallel_config: The configuration for the parallel setup
+            model: The local model instance which will receive the weights
         """
-        super().__init__(config, parallel_config)
+        super().__init__(config, parallel_config, model)
         self.model_update_group: PyNcclCommunicator | None = None
 
     def init_transfer_engine(self, init_info: NCCLWeightTransferInitInfo) -> None:
@@ -184,7 +188,7 @@ def state_dict_info_iterator():
                     dtype = getattr(torch, dtype_name)
                     yield (name, (shape, dtype))
 
-            packed_broadcast_consumer(
+            packed_nccl_broadcast_consumer(
                 iterator=state_dict_info_iterator(),
                 group=self.model_update_group,
                 src=0,
@@ -247,10 +251,10 @@ def trainer_send_weights(
         if args.packed:
             # Use packed tensor broadcasting for efficiency
             from vllm.distributed.weight_transfer.packed_tensor import (
-                packed_broadcast_producer,
+                packed_nccl_broadcast_producer,
             )
 
-            packed_broadcast_producer(
+            packed_nccl_broadcast_producer(
                 iterator=iterator,
                 group=args.group,
                 src=args.src,
diff --git a/vllm/distributed/weight_transfer/packed_tensor.py b/vllm/distributed/weight_transfer/packed_tensor.py
index 1c96d72edac7..1001eba2e81e 100644
--- a/vllm/distributed/weight_transfer/packed_tensor.py
+++ b/vllm/distributed/weight_transfer/packed_tensor.py
@@ -4,9 +4,11 @@
 
 import math
 from collections.abc import Callable, Iterator
+from dataclasses import dataclass
 from typing import Any
 
 import torch
+from torch.multiprocessing.reductions import reduce_tensor
 
 # Default values for packed tensor configuration.
 # These are imported by NCCLWeightTransferUpdateInfo and trainer_send_weights.
@@ -14,7 +16,124 @@
 DEFAULT_PACKED_NUM_BUFFERS = 2
 
 
-def packed_broadcast_producer(
+def unpack_tensor(
+    packed_tensor: torch.Tensor,
+    names: list[str],
+    shapes: list[list[int]],
+    dtypes: list[torch.dtype],
+    tensor_sizes: list[int],
+) -> list[tuple[str, torch.Tensor]]:
+    """Unpack a packed uint8 tensor into a list of named tensors.
+
+    The returned tensors are **views** of ``packed_tensor`` (the
+    ``.contiguous()`` call is a no-op on already-contiguous row-slices).
+    If ``packed_tensor`` lives in storage that may be reused — e.g. a
+    reused CUDA IPC buffer — callers must clone the results before the
+    underlying storage is overwritten.
+
+    Args:
+        packed_tensor: The packed torch.uint8 tensor to unpack
+        names: List of tensor names
+        shapes: List of tensor shapes
+        dtypes: List of tensor dtypes
+        tensor_sizes: List of tensor sizes in bytes
+    """
+    unpacked_tensors = packed_tensor.split(tensor_sizes)
+
+    return [
+        (name, tensor.contiguous().view(dtype).view(*shape))
+        for name, shape, dtype, tensor in zip(names, shapes, dtypes, unpacked_tensors)
+    ]
+
+
+@dataclass
+class PackedChunk:
+    """Result of packing tensors into a single contiguous uint8 buffer."""
+
+    packed_tensor: torch.Tensor
+    names: list[str]
+    shapes: list[list[int]]
+    dtypes: list[torch.dtype]
+    tensor_sizes: list[int]
+
+
+def pack_tensors(
+    iterator: Iterator[tuple[str, torch.Tensor]],
+    post_iter_func: Callable[[tuple[str, torch.Tensor]], torch.Tensor],
+    buffer_size_bytes: int,
+    tensor_list: list[torch.Tensor] | None = None,
+    current_size: int = 0,
+) -> PackedChunk | None:
+    """Pack tensors from an iterator into a single contiguous uint8 buffer.
+
+    Consumes from the iterator until the accumulated size exceeds
+    buffer_size_bytes or the iterator is exhausted, then returns a
+    PackedChunk. Returns None if no tensors were consumed.
+
+    Args:
+        iterator: Iterator of (name, tensor) pairs
+        post_iter_func: Applied to each item before linearizing to uint8
+        buffer_size_bytes: Max bytes before flushing
+        tensor_list: Pre-existing tensor list to append to (for NCCL
+                    multi-buffer reuse). If None, a fresh list is created.
+        current_size: Byte count already accumulated in tensor_list
+    """
+    if tensor_list is None:
+        tensor_list = []
+
+    names: list[str] = []
+    shapes: list[list[int]] = []
+    dtypes: list[torch.dtype] = []
+    tensor_sizes: list[int] = []
+    total_bytes = current_size
+
+    while True:
+        try:
+            item = next(iterator)
+        except StopIteration:
+            break
+
+        name, orig_tensor = item
+        # Apply post processing and convert to linearized uint8 tensor
+        tensor = post_iter_func(item).contiguous().view(torch.uint8).view(-1)
+
+        if tensor.numel() > buffer_size_bytes:
+            import warnings
+
+            warnings.warn(
+                f"Tensor '{name}' has size {tensor.numel()} bytes, which "
+                f"exceeds buffer_size_bytes={buffer_size_bytes}.",
+                stacklevel=2,
+            )
+
+        tensor_list.append(tensor)
+        names.append(name)
+        shapes.append(list(orig_tensor.shape))
+        dtypes.append(orig_tensor.dtype)
+        tensor_sizes.append(tensor.numel())
+        total_bytes += tensor.numel()
+
+        if total_bytes > buffer_size_bytes:
+            break
+
+    if not tensor_list:
+        return None
+
+    packed = torch.cat(tensor_list, dim=0)
+    del tensor_list
+    return PackedChunk(
+        packed_tensor=packed,
+        names=names,
+        shapes=shapes,
+        dtypes=dtypes,
+        tensor_sizes=tensor_sizes,
+    )
+
+
+# ── NCCL packed broadcast ──────────────────────────────────────────────
+
+
+def packed_nccl_broadcast_producer(
     iterator: Iterator[tuple[str, torch.Tensor]],
     group: Any,
     src: int,
@@ -36,57 +155,31 @@ def packed_broadcast_producer(
                     Both producer and consumer must use the same value.
 
     """
-    target_packed_tensor_size = buffer_size_bytes
-
     streams = [torch.cuda.Stream() for _ in range(num_buffers)]
+    # Keep references to in-flight chunks so their packed_tensors
+    # aren't freed while an async broadcast is still reading them.
+    in_flight: list[PackedChunk | None] = [None] * num_buffers
     buffer_idx = 0
 
-    packing_tensor_list: list[list[torch.Tensor]] = [[] for _ in range(num_buffers)]
-    packing_tensor_sizes: list[int] = [0 for _ in range(num_buffers)]
-    packed_tensors: list[torch.Tensor] = [
-        torch.empty(0, dtype=torch.uint8, device="cuda") for _ in range(num_buffers)
-    ]
-
     while True:
         # Synchronize the current stream
         streams[buffer_idx].synchronize()
+        # Previous chunk on this buffer slot is now safe to free
+        in_flight[buffer_idx] = None
         # Start tasks for the new buffer in a new stream
         with torch.cuda.stream(streams[buffer_idx]):
-            try:
-                # Initialize the packing tensor list and sizes
-                packing_tensor_list[buffer_idx] = []
-                packing_tensor_sizes[buffer_idx] = 0
-                # Pack the tensors
-                while True:
-                    # Apply post processing and convert to linearized uint8 tensor
-                    tensor = (
-                        post_iter_func(next(iterator))
-                        .contiguous()
-                        .view(torch.uint8)
-                        .view(-1)
-                    )
-                    packing_tensor_list[buffer_idx].append(tensor)
-                    packing_tensor_sizes[buffer_idx] += tensor.numel()
-                    if packing_tensor_sizes[buffer_idx] > target_packed_tensor_size:
-                        break
-                # Pack the tensors and call broadcast collective
-                packed_tensors[buffer_idx] = torch.cat(
-                    packing_tensor_list[buffer_idx], dim=0
-                )
-                group.broadcast(packed_tensors[buffer_idx], src=src)
-                # Move to the next buffer
-                buffer_idx = (buffer_idx + 1) % num_buffers
-            except StopIteration:
-                # Do the last broadcast if there are remaining tensors
-                if len(packing_tensor_list[buffer_idx]) > 0:
-                    packed_tensors[buffer_idx] = torch.cat(
-                        packing_tensor_list[buffer_idx], dim=0
-                    )
-                    group.broadcast(packed_tensors[buffer_idx], src=src)
+            chunk = pack_tensors(iterator, post_iter_func, buffer_size_bytes)
+            if chunk is None:
                 break
+            # Pack the tensors and call broadcast collective
+            group.broadcast(chunk.packed_tensor, src=src)
+            # Hold reference until this stream is synchronized
+            in_flight[buffer_idx] = chunk
+            # Move to the next buffer
+            buffer_idx = (buffer_idx + 1) % num_buffers
 
 
-def packed_broadcast_consumer(
+def packed_nccl_broadcast_consumer(
     iterator: Iterator[tuple[str, tuple[list[int], torch.dtype]]],
     group: Any,
     src: int,
@@ -108,37 +201,6 @@ def packed_broadcast_consumer(
                     Both producer and consumer must use the same value.
 
     """
-
-    def unpack_tensor(
-        packed_tensor: torch.Tensor,
-        names: list[str],
-        shapes: list[list[int]],
-        dtypes: list[torch.dtype],
-        tensor_sizes: list[int],
-    ) -> list[tuple[str, torch.Tensor]]:
-        """Unpack a single tensor into a list of tensors.
-
-        Args:
-            packed_tensor: The packed torch.uint8 tensor to unpack
-            names: List of tensor names
-            shapes: List of tensor shapes
-            dtypes: List of tensor dtypes
-            tensor_sizes: List of tensor sizes in bytes
-
-        Returns:
-            unpacked List[(name, tensor)]
-        """
-        unpacked_tensors = packed_tensor.split(tensor_sizes)
-
-        unpacked_list = [
-            (name, tensor.contiguous().view(dtype).view(*shape))
-            for name, shape, dtype, tensor in zip(
-                names, shapes, dtypes, unpacked_tensors
-            )
-        ]
-
-        return unpacked_list
-
     target_packed_tensor_size = buffer_size_bytes
 
     streams = [torch.cuda.Stream() for _ in range(num_buffers)]
@@ -214,3 +276,152 @@ def unpack_tensor(
                         )
                     )
                 break
+
+
+# ── IPC packed transfer ────────────────────────────────────────────────
+
+
+@dataclass
+class PackedIpcChunk:
+    """Metadata and IPC handle for a single packed chunk."""
+
+    names: list[str]
+    shapes: list[list[int]]
+    dtype_names: list[str]
+    tensor_sizes: list[int]
+    ipc_handle: dict[str, tuple]
+
+
+def packed_ipc_producer(
+    iterator: Iterator[tuple[str, torch.Tensor]],
+    gpu_uuid: str,
+    post_iter_func: Callable[[tuple[str, torch.Tensor]], torch.Tensor],
+    buffer_size_bytes: int = DEFAULT_PACKED_BUFFER_SIZE_BYTES,
+) -> Iterator[PackedIpcChunk]:
+    """Pack tensors into a reusable IPC buffer and yield handles.
+
+    Allocates a single GPU buffer of ``buffer_size_bytes`` and registers
+    it for IPC once via ``reduce_tensor``.  Each chunk's packed data is
+    copied into this buffer before yielding, so only one IPC-shared
+    allocation exists for the lifetime of the transfer.
+
+    Callers **must** ensure the consumer has finished reading the buffer
+    (e.g. ``ray.get`` returned) before resuming the generator for the
+    next chunk.
+
+    Args:
+        iterator: Iterator of (name, tensor) pairs.
+        gpu_uuid: Physical GPU UUID string for this rank.
+        post_iter_func: Applied to each (name, tensor) before packing.
+        buffer_size_bytes: Exact capacity of the reusable IPC buffer.
+            Every chunk is guaranteed to fit within this size.  A
+            ``ValueError`` is raised if any single tensor exceeds it.
+    """
+    ipc_buffer = torch.empty(buffer_size_bytes, dtype=torch.uint8, device="cuda")
+    _, ipc_args = reduce_tensor(ipc_buffer)
+
+    names: list[str] = []
+    shapes: list[list[int]] = []
+    dtypes: list[torch.dtype] = []
+    tensor_sizes: list[int] = []
+    total_bytes = 0
+
+    for name, orig_tensor in iterator:
+        flat = (
+            post_iter_func((name, orig_tensor)).contiguous().view(torch.uint8).view(-1)
+        )
+
+        if flat.numel() > buffer_size_bytes:
+            raise ValueError(
+                f"Tensor '{name}' has size {flat.numel()} bytes, "
+                f"which exceeds buffer_size_bytes={buffer_size_bytes}. "
+                f"Increase buffer_size_bytes to at least {flat.numel()}."
+            )
+
+        if total_bytes and total_bytes + flat.numel() > buffer_size_bytes:
+            # Drain queued copies so the consumer sees a fully-written buffer.
+            torch.cuda.current_stream().synchronize()
+            yield PackedIpcChunk(
+                names=names,
+                shapes=shapes,
+                dtype_names=[str(d).split(".")[-1] for d in dtypes],
+                tensor_sizes=tensor_sizes,
+                ipc_handle={gpu_uuid: ipc_args},
+            )
+            # Rebind to fresh lists so the yielded chunk's metadata is
+            # not mutated while the consumer is still reading.
+            names, shapes, dtypes, tensor_sizes = [], [], [], []
+            total_bytes = 0
+
+        ipc_buffer[total_bytes : total_bytes + flat.numel()].copy_(flat)
+        names.append(name)
+        shapes.append(list(orig_tensor.shape))
+        dtypes.append(orig_tensor.dtype)
+        tensor_sizes.append(flat.numel())
+        total_bytes += flat.numel()
+
+    if total_bytes:
+        torch.cuda.current_stream().synchronize()
+        yield PackedIpcChunk(
+            names=names,
+            shapes=shapes,
+            dtype_names=[str(d).split(".")[-1] for d in dtypes],
+            tensor_sizes=tensor_sizes,
+            ipc_handle={gpu_uuid: ipc_args},
+        )
+
+
+def packed_ipc_consumer(
+    ipc_handle: dict[str, tuple],
+    names: list[str],
+    shapes: list[list[int]],
+    dtype_names: list[str],
+    tensor_sizes: list[int],
+    device_index: int,
+) -> list[tuple[str, torch.Tensor]]:
+    """Unpack a single packed IPC chunk into named tensors.
+
+    Reconstructs the packed buffer via rebuild_cuda_tensor, unpacks
+    into individual tensors, and clones each into independent storage
+    before returning.
+
+    The clone is intentional: the producer reuses one IPC buffer across
+    chunks, so any tensor view that aliases the buffer would observe the
+    *next* chunk's bytes as soon as the producer's generator is resumed.
+    Callers that retain references past their own update_weights call
+    (notably vLLM's layerwise reload, which buffers ``bound_args`` for
+    replay in ``_layerwise_process``) would otherwise replay against
+    stale data and silently corrupt multi-chunk weight transfers.
+
+    Args:
+        ipc_handle: Mapping of GPU UUID to rebuild_cuda_tensor args tuple
+        names: Parameter names in the packed buffer
+        shapes: Parameter shapes
+        dtype_names: Parameter dtype name strings (e.g. "float16")
+        tensor_sizes: Size in bytes of each parameter in the packed buffer
+        device_index: Local CUDA device index
+    """
+    from torch.multiprocessing.reductions import rebuild_cuda_tensor
+
+    props = torch.cuda.get_device_properties(device_index)
+    physical_gpu_id = str(props.uuid)
+
+    if physical_gpu_id not in ipc_handle:
+        raise ValueError(
+            f"IPC handle not found for GPU UUID {physical_gpu_id}. "
+            f"Available UUIDs: {list(ipc_handle.keys())}"
+        )
+
+    args = ipc_handle[physical_gpu_id]
+    list_args = list(args)
+    list_args[6] = device_index
+    packed = rebuild_cuda_tensor(*list_args)
+
+    content_size = sum(tensor_sizes)
+    packed = packed[:content_size]
+
+    dtypes = [getattr(torch, dn) for dn in dtype_names]
+    return [
+        (name, t.clone())
+        for name, t in unpack_tensor(packed, names, shapes, dtypes, tensor_sizes)
+    ]
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index e1772ab1d427..3d1c6f2e9b6e 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -8,7 +8,7 @@
 import json
 import sys
 from collections.abc import Callable
-from dataclasses import MISSING, dataclass, fields, is_dataclass
+from dataclasses import MISSING, asdict, dataclass, fields, is_dataclass
 from itertools import permutations
 from types import UnionType
 from typing import (
@@ -45,6 +45,7 @@
     KVTransferConfig,
     LoadConfig,
     LoRAConfig,
+    MambaConfig,
     ModelConfig,
     MultiModalConfig,
     ObservabilityConfig,
@@ -70,8 +71,9 @@
     PrefixCachingHashAlgo,
 )
 from vllm.config.device import Device
-from vllm.config.kernel import MoEBackend
+from vllm.config.kernel import IrOpPriorityConfig, LinearBackend, MoEBackend
 from vllm.config.lora import MaxLoRARanks
+from vllm.config.mamba import MambaBackendEnum
 from vllm.config.model import (
     ConvertOption,
     HfOverrides,
@@ -103,7 +105,11 @@
 from vllm.transformers_utils.gguf_utils import is_gguf
 from vllm.transformers_utils.repo_utils import get_model_path
 from vllm.transformers_utils.utils import is_cloud_storage
-from vllm.utils.argparse_utils import FlexibleArgumentParser
+from vllm.utils.argparse_utils import (
+    FlexibleArgumentParser,
+    human_readable_int,
+    human_readable_int_or_auto,
+)
 from vllm.utils.mem_constants import GiB_bytes
 from vllm.utils.network_utils import get_ip
 from vllm.utils.torch_utils import resolve_kv_cache_dtype_string
@@ -112,6 +118,7 @@
 from vllm.version import __version__ as VLLM_VERSION
 
 if TYPE_CHECKING:
+    from vllm.config.quantization import QuantizationConfigArgs
     from vllm.model_executor.layers.quantization import QuantizationMethods
     from vllm.model_executor.model_loader import LoadFormats
     from vllm.usage.usage_lib import UsageContext
@@ -253,6 +260,28 @@ def _maybe_add_docs_url(cls: Any) -> str:
     return f"\n\nAPI docs: https://docs.vllm.ai/en/{version}/api/vllm/config/#vllm.config.{cls.__name__}"
 
 
+def _expand_json_human_readable_numbers(val: str) -> str:
+    """Expand human-readable number suffixes in a JSON string.
+
+    Based on :func:`human_readable_int` so that the ``k/m/g/t`` (decimal) and
+    ``K/M/G/T`` (binary) conventions work out the box.
+    Also works inside JSON config arguments such
+    as ``--kv-transfer-config '{"cpu_bytes_to_use": 80m}'``.
+
+    Only bare (unquoted) tokens are replaced so that JSON string values
+    like ``"model_name"`` are never modified.
+    """
+    # Split on quoted strings so we only touch non-string regions.
+    parts = re.split(r'("(?:[^"\\]|\\.)*")', val)
+    for i in range(0, len(parts), 2):  # even indices = outside strings
+        parts[i] = re.sub(
+            r"\b\d+(?:\.\d+)?[kKmMgGtT]\b",
+            lambda m: str(human_readable_int(m.group())),
+            parts[i],
+        )
+    return "".join(parts)
+
+
 @functools.lru_cache(maxsize=30)
 def _compute_kwargs(cls: ConfigType) -> dict[str, dict[str, Any]]:
     # Save time only getting attr docs if we're generating help text
@@ -298,6 +327,7 @@ def _compute_kwargs(cls: ConfigType) -> dict[str, dict[str, Any]]:
 
             def parse_dataclass(val: str, cls=dataclass_cls) -> Any:
                 try:
+                    val = _expand_json_human_readable_numbers(val)
                     return TypeAdapter(cls).validate_json(val)
                 except ValidationError as e:
                     raise argparse.ArgumentTypeError(repr(e)) from e
@@ -305,6 +335,11 @@ def parse_dataclass(val: str, cls=dataclass_cls) -> Any:
             kwargs[name]["type"] = parse_dataclass
             kwargs[name]["help"] += _maybe_add_docs_url(dataclass_cls)
             kwargs[name]["help"] += f"\n\n{json_tip}"
+        elif type_hints == {bool, str, type(None)}:
+            # Optional-valued flag: bare flag -> True, value -> str.
+            kwargs[name]["type"] = str
+            kwargs[name]["nargs"] = "?"
+            kwargs[name]["const"] = True
         elif contains_type(type_hints, bool):
             # Creates --no-<name> and --<name> flags
             kwargs[name]["action"] = argparse.BooleanOptionalAction
@@ -320,7 +355,11 @@ def parse_dataclass(val: str, cls=dataclass_cls) -> Any:
             if name == "max_model_len":
                 kwargs[name]["type"] = human_readable_int_or_auto
                 kwargs[name]["help"] += f"\n\n{human_readable_int_or_auto.__doc__}"
-            elif name in ("max_num_batched_tokens", "kv_cache_memory_bytes"):
+            elif name in (
+                "max_num_batched_tokens",
+                "kv_cache_memory_bytes",
+                "safetensors_prefetch_block_size",
+            ):
                 kwargs[name]["type"] = human_readable_int
                 kwargs[name]["help"] += f"\n\n{human_readable_int.__doc__}"
             else:
@@ -389,6 +428,8 @@ class EngineArgs:
     allowed_media_domains: list[str] | None = ModelConfig.allowed_media_domains
     download_dir: str | None = LoadConfig.download_dir
     safetensors_load_strategy: str | None = LoadConfig.safetensors_load_strategy
+    safetensors_prefetch_num_threads: int = LoadConfig.safetensors_prefetch_num_threads
+    safetensors_prefetch_block_size: int = LoadConfig.safetensors_prefetch_block_size
     load_format: str | LoadFormats = LoadConfig.load_format
     config_format: str = ModelConfig.config_format
     dtype: ModelDType = ModelConfig.dtype
@@ -401,6 +442,7 @@ class EngineArgs:
     max_cudagraph_capture_size: int | None = get_field(
         CompilationConfig, "max_cudagraph_capture_size"
     )
+    ir_op_priority: IrOpPriorityConfig = get_field(KernelConfig, "ir_op_priority")
     # Note: Specifying a custom executor backend by passing a class
     # is intended for expert use only. The API may change without
     # notice.
@@ -414,6 +456,12 @@ class EngineArgs:
     nnodes: int = ParallelConfig.nnodes
     node_rank: int = ParallelConfig.node_rank
     distributed_timeout_seconds: int | None = ParallelConfig.distributed_timeout_seconds
+    cpu_distributed_timeout_seconds: int | None = (
+        ParallelConfig.cpu_distributed_timeout_seconds
+    )
+    numa_bind: bool = ParallelConfig.numa_bind
+    numa_bind_nodes: list[int] | None = ParallelConfig.numa_bind_nodes
+    numa_bind_cpus: list[str] | None = ParallelConfig.numa_bind_cpus
     tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
     prefill_context_parallel_size: int = ParallelConfig.prefill_context_parallel_size
     decode_context_parallel_size: int = ParallelConfig.decode_context_parallel_size
@@ -428,10 +476,12 @@ class EngineArgs:
     data_parallel_rpc_port: int | None = None
     data_parallel_hybrid_lb: bool = False
     data_parallel_external_lb: bool = False
+    data_parallel_multi_port_external_lb: bool = False
     data_parallel_backend: DataParallelBackend = ParallelConfig.data_parallel_backend
     enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
     enable_ep_weight_filter: bool = ParallelConfig.enable_ep_weight_filter
     moe_backend: MoEBackend = KernelConfig.moe_backend
+    linear_backend: LinearBackend = KernelConfig.linear_backend
     all2all_backend: All2AllBackend = ParallelConfig.all2all_backend
     enable_elastic_ep: bool = ParallelConfig.enable_elastic_ep
     enable_dbo: bool = ParallelConfig.enable_dbo
@@ -474,6 +524,7 @@ class EngineArgs:
     max_num_seqs: int | None = None
     max_logprobs: int = ModelConfig.max_logprobs
     logprobs_mode: LogprobsMode = ModelConfig.logprobs_mode
+    use_fp64_gumbel: bool = ModelConfig.use_fp64_gumbel
     disable_log_stats: bool = False
     aggregate_engine_logging: bool = False
     revision: str | None = ModelConfig.revision
@@ -482,6 +533,12 @@ class EngineArgs:
     hf_overrides: HfOverrides = get_field(ModelConfig, "hf_overrides")
     tokenizer_revision: str | None = ModelConfig.tokenizer_revision
     quantization: QuantizationMethods | str | None = ModelConfig.quantization
+    quantization_config: "dict[str, Any] | QuantizationConfigArgs | None" = None
+    """User-facing quantization configuration. Carries per-layer-kind
+    QuantSpecs (linear, moe) and ignore patterns; see
+    :class:`QuantizationConfigArgs`. Auto-populated from the matching online
+    shorthand when `quantization` is one of the values in
+    `ONLINE_QUANT_SHORTHAND_NAMES`."""
     allow_deprecated_quantization: bool = ModelConfig.allow_deprecated_quantization
     enforce_eager: bool = ModelConfig.enforce_eager
     disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
@@ -507,6 +564,14 @@ class EngineArgs:
     mm_encoder_attn_backend: AttentionBackendEnum | str | None = (
         MultiModalConfig.mm_encoder_attn_backend
     )
+    mm_encoder_attn_dtype: str | None = MultiModalConfig.mm_encoder_attn_dtype
+    mm_encoder_fp8_scale_path: str | None = MultiModalConfig.mm_encoder_fp8_scale_path
+    mm_encoder_fp8_scale_save_path: str | None = (
+        MultiModalConfig.mm_encoder_fp8_scale_save_path
+    )
+    mm_encoder_fp8_scale_save_margin: float = (
+        MultiModalConfig.mm_encoder_fp8_scale_save_margin
+    )
     io_processor_plugin: str | None = None
     renderer_num_workers: int = 1
     skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling
@@ -523,6 +588,7 @@ class EngineArgs:
     lora_target_modules: list[str] | None = LoRAConfig.target_modules
     enable_tower_connector_lora: bool = LoRAConfig.enable_tower_connector_lora
     specialize_active_lora: bool = LoRAConfig.specialize_active_lora
+    enable_mixed_moe_lora_format: bool = LoRAConfig.enable_mixed_moe_lora_format
 
     ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight
     num_gpu_blocks_override: int | None = CacheConfig.num_gpu_blocks_override
@@ -545,6 +611,9 @@ class EngineArgs:
     reasoning_parser_plugin: str | None = None
 
     speculative_config: dict[str, Any] | None = None
+    spec_method: str | None = None
+    spec_model: str | None = None
+    spec_tokens: int | None = None
 
     show_hidden_metrics_for_version: str | None = (
         ObservabilityConfig.show_hidden_metrics_for_version
@@ -572,6 +641,7 @@ class EngineArgs:
     pooler_config: PoolerConfig | None = ModelConfig.pooler_config
     compilation_config: CompilationConfig = get_field(VllmConfig, "compilation_config")
     attention_config: AttentionConfig = get_field(VllmConfig, "attention_config")
+    mamba_config: MambaConfig = get_field(VllmConfig, "mamba_config")
     kernel_config: KernelConfig = get_field(VllmConfig, "kernel_config")
     enable_flashinfer_autotune: bool = get_field(
         KernelConfig, "enable_flashinfer_autotune"
@@ -589,6 +659,7 @@ class EngineArgs:
 
     generation_config: str = ModelConfig.generation_config
     enable_sleep_mode: bool = ModelConfig.enable_sleep_mode
+    enable_cumem_allocator: bool = ModelConfig.enable_cumem_allocator
     override_generation_config: dict[str, Any] = get_field(
         ModelConfig, "override_generation_config"
     )
@@ -605,6 +676,12 @@ class EngineArgs:
     mamba_block_size: int | None = get_field(CacheConfig, "mamba_block_size")
     mamba_cache_mode: MambaCacheMode = CacheConfig.mamba_cache_mode
 
+    mamba_backend: MambaBackendEnum = MambaBackendEnum.TRITON
+    enable_mamba_cache_stochastic_rounding: bool = (
+        MambaConfig.enable_stochastic_rounding
+    )
+    mamba_cache_philox_rounds: int = MambaConfig.stochastic_rounding_philox_rounds
+
     additional_config: dict[str, Any] = get_field(VllmConfig, "additional_config")
 
     use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
@@ -645,6 +722,8 @@ def __post_init__(self):
             self.compilation_config = CompilationConfig(**self.compilation_config)
         if isinstance(self.attention_config, dict):
             self.attention_config = AttentionConfig(**self.attention_config)
+        if isinstance(self.mamba_config, dict):
+            self.mamba_config = MambaConfig(**self.mamba_config)
         if isinstance(self.kernel_config, dict):
             self.kernel_config = KernelConfig(**self.kernel_config)
         if isinstance(self.eplb_config, dict):
@@ -653,6 +732,15 @@ def __post_init__(self):
             self.weight_transfer_config = WeightTransferConfig(
                 **self.weight_transfer_config
             )
+        if isinstance(self.ir_op_priority, dict):
+            self.ir_op_priority = IrOpPriorityConfig(**self.ir_op_priority)
+
+        from vllm.config.quantization import resolve_quantization_config
+
+        self.quantization_config = resolve_quantization_config(
+            self.quantization, self.quantization_config
+        )
+
         # Setup plugins
         from vllm.plugins import load_general_plugins
 
@@ -713,6 +801,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         )
         model_group.add_argument("--max-model-len", **model_kwargs["max_model_len"])
         model_group.add_argument("--quantization", "-q", **model_kwargs["quantization"])
+        model_group.add_argument(
+            "--quantization-config", **model_kwargs["quantization_config"]
+        )
         model_group.add_argument(
             "--allow-deprecated-quantization",
             **model_kwargs["allow_deprecated_quantization"],
@@ -724,6 +815,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         )
         model_group.add_argument("--max-logprobs", **model_kwargs["max_logprobs"])
         model_group.add_argument("--logprobs-mode", **model_kwargs["logprobs_mode"])
+        model_group.add_argument("--use-fp64-gumbel", **model_kwargs["use_fp64_gumbel"])
         model_group.add_argument(
             "--disable-sliding-window", **model_kwargs["disable_sliding_window"]
         )
@@ -740,16 +832,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "--served-model-name", **model_kwargs["served_model_name"]
         )
         model_group.add_argument("--config-format", **model_kwargs["config_format"])
-        # This one is a special case because it can bool
-        # or str. TODO: Handle this in get_kwargs
-        model_group.add_argument(
-            "--hf-token",
-            type=str,
-            nargs="?",
-            const=True,
-            default=model_kwargs["hf_token"]["default"],
-            help=model_kwargs["hf_token"]["help"],
-        )
+        model_group.add_argument("--hf-token", **model_kwargs["hf_token"])
         model_group.add_argument("--hf-overrides", **model_kwargs["hf_overrides"])
         model_group.add_argument("--pooler-config", **model_kwargs["pooler_config"])
         model_group.add_argument(
@@ -761,6 +844,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         model_group.add_argument(
             "--enable-sleep-mode", **model_kwargs["enable_sleep_mode"]
         )
+        model_group.add_argument(
+            "--enable-cumem-allocator", **model_kwargs["enable_cumem_allocator"]
+        )
         model_group.add_argument("--model-impl", **model_kwargs["model_impl"])
         model_group.add_argument(
             "--override-attention-dtype", **model_kwargs["override_attention_dtype"]
@@ -787,6 +873,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         load_group.add_argument(
             "--safetensors-load-strategy", **load_kwargs["safetensors_load_strategy"]
         )
+        load_group.add_argument(
+            "--safetensors-prefetch-num-threads",
+            **load_kwargs["safetensors_prefetch_num_threads"],
+        )
+        load_group.add_argument(
+            "--safetensors-prefetch-block-size",
+            **load_kwargs["safetensors_prefetch_block_size"],
+        )
         load_group.add_argument(
             "--model-loader-extra-config", **load_kwargs["model_loader_extra_config"]
         )
@@ -806,6 +900,22 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "--attention-backend", **attention_kwargs["backend"]
         )
 
+        # Mamba arguments
+        mamba_kwargs = get_kwargs(MambaConfig)
+        mamba_group = parser.add_argument_group(
+            title="MambaConfig",
+            description=MambaConfig.__doc__,
+        )
+        mamba_group.add_argument("--mamba-backend", **mamba_kwargs["backend"])
+        mamba_group.add_argument(
+            "--enable-mamba-cache-stochastic-rounding",
+            **mamba_kwargs["enable_stochastic_rounding"],
+        )
+        mamba_group.add_argument(
+            "--mamba-cache-philox-rounds",
+            **mamba_kwargs["stochastic_rounding_philox_rounds"],
+        )
+
         # Structured outputs arguments
         structured_outputs_kwargs = get_kwargs(StructuredOutputsConfig)
         structured_outputs_group = parser.add_argument_group(
@@ -845,6 +955,17 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "--distributed-timeout-seconds",
             **parallel_kwargs["distributed_timeout_seconds"],
         )
+        parallel_group.add_argument(
+            "--cpu-distributed-timeout-seconds",
+            **parallel_kwargs["cpu_distributed_timeout_seconds"],
+        )
+        parallel_group.add_argument("--numa-bind", **parallel_kwargs["numa_bind"])
+        parallel_group.add_argument(
+            "--numa-bind-nodes", **parallel_kwargs["numa_bind_nodes"]
+        )
+        parallel_group.add_argument(
+            "--numa-bind-cpus", **parallel_kwargs["numa_bind_cpus"]
+        )
         parallel_group.add_argument(
             "--tensor-parallel-size", "-tp", **parallel_kwargs["tensor_parallel_size"]
         )
@@ -878,7 +999,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "-dpn",
             type=int,
             help="Data parallel rank of this instance. "
-            "When set, enables external load balancer mode.",
+            "When set, enables external load balancer mode for MoE "
+            "data-parallel deployments. Unsupported for non-MoE models; "
+            "launch independent vLLM instances instead.",
         )
         parallel_group.add_argument(
             "--data-parallel-start-rank",
@@ -921,6 +1044,15 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "-dpe",
             **parallel_kwargs["data_parallel_external_lb"],
         )
+        parallel_group.add_argument(
+            "--data-parallel-multi-port-external-lb",
+            "-dpm",
+            action="store_true",
+            default=False,
+            help="Run a node-local supervisor that launches one external-LB API "
+            "server per local data parallel rank and exposes aggregated health on "
+            "a supervisor port.",
+        )
         parallel_group.add_argument(
             "--enable-expert-parallel",
             "-ep",
@@ -1103,6 +1235,22 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             "--mm-encoder-attn-backend",
             **multimodal_kwargs["mm_encoder_attn_backend"],
         )
+        multimodal_group.add_argument(
+            "--mm-encoder-attn-dtype",
+            **multimodal_kwargs["mm_encoder_attn_dtype"],
+        )
+        multimodal_group.add_argument(
+            "--mm-encoder-fp8-scale-path",
+            **multimodal_kwargs["mm_encoder_fp8_scale_path"],
+        )
+        multimodal_group.add_argument(
+            "--mm-encoder-fp8-scale-save-path",
+            **multimodal_kwargs["mm_encoder_fp8_scale_save_path"],
+        )
+        multimodal_group.add_argument(
+            "--mm-encoder-fp8-scale-save-margin",
+            **multimodal_kwargs["mm_encoder_fp8_scale_save_margin"],
+        )
         multimodal_group.add_argument(
             "--interleave-mm-strings", **multimodal_kwargs["interleave_mm_strings"]
         )
@@ -1149,6 +1297,10 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         lora_group.add_argument(
             "--specialize-active-lora", **lora_kwargs["specialize_active_lora"]
         )
+        lora_group.add_argument(
+            "--enable-mixed-moe-lora-format",
+            **lora_kwargs["enable_mixed_moe_lora_format"],
+        )
 
         # Observability arguments
         observability_kwargs = get_kwargs(ObservabilityConfig)
@@ -1282,6 +1434,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             title="KernelConfig",
             description=KernelConfig.__doc__,
         )
+        kernel_group.add_argument("--ir-op-priority", **kernel_kwargs["ir_op_priority"])
         kernel_group.add_argument(
             "--enable-flashinfer-autotune",
             **kernel_kwargs["enable_flashinfer_autotune"],
@@ -1289,6 +1442,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         moe_backend_kwargs = kernel_kwargs["moe_backend"]
         moe_backend_kwargs["type"] = lambda s: s.lower().replace("-", "_")
         kernel_group.add_argument("--moe-backend", **moe_backend_kwargs)
+        linear_backend_kwargs = kernel_kwargs["linear_backend"]
+        linear_backend_kwargs["type"] = lambda s: s.lower().replace("-", "_")
+        kernel_group.add_argument("--linear-backend", **linear_backend_kwargs)
 
         # vLLM arguments
         vllm_kwargs = get_kwargs(VllmConfig)
@@ -1303,6 +1459,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         vllm_group.add_argument(
             "--speculative-config", "-sc", **vllm_kwargs["speculative_config"]
         )
+        speculative_kwargs = get_kwargs(SpeculativeConfig)
+        vllm_group.add_argument("--spec-method", **speculative_kwargs["method"])
+        vllm_group.add_argument("--spec-model", **speculative_kwargs["model"])
+        vllm_group.add_argument(
+            "--spec-tokens", **speculative_kwargs["num_speculative_tokens"]
+        )
         vllm_group.add_argument(
             "--kv-transfer-config", **vllm_kwargs["kv_transfer_config"]
         )
@@ -1415,11 +1577,13 @@ def create_model_config(self) -> ModelConfig:
             tokenizer_revision=self.tokenizer_revision,
             max_model_len=self.max_model_len,
             quantization=self.quantization,
+            quantization_config=self.quantization_config,
             allow_deprecated_quantization=self.allow_deprecated_quantization,
             enforce_eager=self.enforce_eager,
             enable_return_routed_experts=self.enable_return_routed_experts,
             max_logprobs=self.max_logprobs,
             logprobs_mode=self.logprobs_mode,
+            use_fp64_gumbel=self.use_fp64_gumbel,
             disable_sliding_window=self.disable_sliding_window,
             disable_cascade_attn=self.disable_cascade_attn,
             skip_tokenizer_init=self.skip_tokenizer_init,
@@ -1439,10 +1603,15 @@ def create_model_config(self) -> ModelConfig:
             mm_encoder_only=self.mm_encoder_only,
             mm_encoder_tp_mode=self.mm_encoder_tp_mode,
             mm_encoder_attn_backend=self.mm_encoder_attn_backend,
+            mm_encoder_attn_dtype=self.mm_encoder_attn_dtype,
+            mm_encoder_fp8_scale_path=self.mm_encoder_fp8_scale_path,
+            mm_encoder_fp8_scale_save_path=self.mm_encoder_fp8_scale_save_path,
+            mm_encoder_fp8_scale_save_margin=self.mm_encoder_fp8_scale_save_margin,
             pooler_config=self.pooler_config,
             generation_config=self.generation_config,
             override_generation_config=self.override_generation_config,
             enable_sleep_mode=self.enable_sleep_mode,
+            enable_cumem_allocator=self.enable_cumem_allocator,
             model_impl=self.model_impl,
             override_attention_dtype=self.override_attention_dtype,
             logits_processors=self.logits_processors,
@@ -1480,6 +1649,8 @@ def create_load_config(self) -> LoadConfig:
             load_format=self.load_format,
             download_dir=self.download_dir,
             safetensors_load_strategy=self.safetensors_load_strategy,
+            safetensors_prefetch_num_threads=self.safetensors_prefetch_num_threads,
+            safetensors_prefetch_block_size=self.safetensors_prefetch_block_size,
             model_loader_extra_config=self.model_loader_extra_config,
             ignore_patterns=self.ignore_patterns,
             use_tqdm_on_load=self.use_tqdm_on_load,
@@ -1493,12 +1664,22 @@ def create_speculative_config(
     ) -> SpeculativeConfig | None:
         """Initializes and returns a SpeculativeConfig object based on
         `speculative_config`.
-
-        This function utilizes `speculative_config` to create a
-        SpeculativeConfig object. The `speculative_config` can either be
-        provided as a JSON string input via CLI arguments or directly as a
-        dictionary from the engine.
         """
+        for flag, key, value in (
+            ("--spec-method", "method", self.spec_method),
+            ("--spec-model", "model", self.spec_model),
+            ("--spec-tokens", "num_speculative_tokens", self.spec_tokens),
+        ):
+            if value is None:
+                continue
+            if self.speculative_config is None:
+                self.speculative_config = {}
+            if key in self.speculative_config:
+                raise ValueError(
+                    f"{flag} and --speculative-config['{key}'] are mutually exclusive"
+                )
+            self.speculative_config[key] = value
+
         if self.speculative_config is None:
             return None
 
@@ -1553,10 +1734,7 @@ def create_engine_config(
 
         self._check_feature_supported()
         self._set_default_chunked_prefill_and_prefix_caching_args(model_config)
-        self._set_default_max_num_seqs_and_batched_tokens_args(
-            usage_context, model_config
-        )
-
+        self._set_default_reasoning_config_args()
         sliding_window: int | None = None
         if not is_interleaved(model_config.hf_text_config):
             # Only set CacheConfig.sliding_window if the model is all sliding
@@ -1594,6 +1772,17 @@ def create_engine_config(
             kv_offloading_backend=self.kv_offloading_backend,
         )
 
+        if resolved_cache_dtype.startswith("turboquant_"):
+            from vllm.model_executor.layers.quantization.turboquant.config import (
+                TurboQuantConfig,
+            )
+
+            boundary = TurboQuantConfig.get_boundary_skip_layers(model_config)
+            existing = set(cache_config.kv_cache_dtype_skip_layers)
+            cache_config.kv_cache_dtype_skip_layers = sorted(
+                existing | set(boundary), key=int
+            )
+
         ray_runtime_env = None
         if is_ray_initialized():
             # Ray Serve LLM calls `create_engine_config` in the context
@@ -1665,6 +1854,16 @@ def create_engine_config(
         data_parallel_external_lb = (
             self.data_parallel_external_lb or self.data_parallel_rank is not None
         )
+        if (
+            self.data_parallel_size > 1
+            and data_parallel_external_lb
+            and not model_config.is_moe
+        ):
+            raise ValueError(
+                "Non-MoE models do not support external data parallel mode. "
+                "For external load balancing, launch independent vLLM "
+                "instances without --data-parallel-* arguments."
+            )
         # Local DP rank = 1, use pure-external LB.
         if data_parallel_external_lb:
             assert self.data_parallel_rank is not None, (
@@ -1768,6 +1967,7 @@ def create_engine_config(
             nnodes=self.nnodes,
             node_rank=self.node_rank,
             distributed_timeout_seconds=self.distributed_timeout_seconds,
+            cpu_distributed_timeout_seconds=self.cpu_distributed_timeout_seconds,
             data_parallel_master_ip=data_parallel_address,
             data_parallel_rpc_port=data_parallel_rpc_port,
             data_parallel_backend=self.data_parallel_backend,
@@ -1799,6 +1999,9 @@ def create_engine_config(
             cp_kv_cache_interleave_size=self.cp_kv_cache_interleave_size,
             _api_process_count=self._api_process_count,
             _api_process_rank=self._api_process_rank,
+            numa_bind=self.numa_bind,
+            numa_bind_nodes=self.numa_bind_nodes,
+            numa_bind_cpus=self.numa_bind_cpus,
         )
 
         speculative_config = self.create_speculative_config(
@@ -1806,6 +2009,12 @@ def create_engine_config(
             target_parallel_config=parallel_config,
         )
 
+        self._set_default_max_num_seqs_and_batched_tokens_args(
+            usage_context,
+            model_config,
+            parallel_config,
+        )
+
         assert self.max_num_batched_tokens is not None, (
             "max_num_batched_tokens must be set by this point"
         )
@@ -1852,6 +2061,7 @@ def create_engine_config(
                 target_modules=self.lora_target_modules,
                 enable_tower_connector_lora=self.enable_tower_connector_lora,
                 specialize_active_lora=self.specialize_active_lora,
+                enable_mixed_moe_lora_format=self.enable_mixed_moe_lora_format,
                 max_cpu_loras=self.max_cpu_loras
                 if self.max_cpu_loras and self.max_cpu_loras > 0
                 else None,
@@ -1891,6 +2101,35 @@ def create_engine_config(
                 self.attention_backend
             )
 
+        # TurboQuant requires FlashAttention 2 — FA3 boundary layers assert
+        # FlashAttentionImpl which fails with TurboQuantAttentionImpl.
+        if resolved_cache_dtype.startswith("turboquant_") and (
+            attention_config.flash_attn_version is None
+            or attention_config.flash_attn_version >= 3
+        ):
+            logger.warning(
+                "TurboQuant is not yet compatible with FlashAttention >= 3. "
+                "Overriding flash_attn_version to 2. To silence this "
+                "warning, pass --attention-config.flash_attn_version=2"
+            )
+            attention_config.flash_attn_version = 2
+
+        # Mamba config overrides
+        mamba_config = copy.deepcopy(self.mamba_config)
+        # Convert string to enum if needed (CLI parsing returns a string)
+        if isinstance(self.mamba_backend, str):
+            mamba_config.backend = MambaBackendEnum[self.mamba_backend.upper()]
+        else:
+            mamba_config.backend = self.mamba_backend
+        if self.enable_mamba_cache_stochastic_rounding:
+            mamba_config.enable_stochastic_rounding = (
+                self.enable_mamba_cache_stochastic_rounding
+            )
+        if self.mamba_cache_philox_rounds:
+            mamba_config.stochastic_rounding_philox_rounds = (
+                self.mamba_cache_philox_rounds
+            )
+
         # Kernel config overrides
         kernel_config = copy.deepcopy(self.kernel_config)
         if self.enable_flashinfer_autotune is not None:
@@ -1903,6 +2142,24 @@ def create_engine_config(
             kernel_config.enable_flashinfer_autotune = self.enable_flashinfer_autotune
         if self.moe_backend != "auto":
             kernel_config.moe_backend = self.moe_backend
+        if self.linear_backend != "auto":
+            kernel_config.linear_backend = self.linear_backend
+
+        # Transfer top-level ir_op_priority into KernelConfig.ir_op_priority
+        for op_name, op_priority in asdict(self.ir_op_priority).items():
+            # Empty means unset
+            if not op_priority:
+                continue
+
+            # Priority cannot be set 2x for the same op
+            if getattr(kernel_config.ir_op_priority, op_name):
+                raise ValueError(
+                    f"Op priority for {op_name} specified via both ir_op_priority "
+                    f"and KernelConfig.ir_op_priority, only one allowed at a time."
+                )
+
+            # Set the attribute
+            setattr(kernel_config.ir_op_priority, op_name, op_priority)
 
         load_config = self.create_load_config()
 
@@ -1973,6 +2230,7 @@ def create_engine_config(
             load_config=load_config,
             offload_config=offload_config,
             attention_config=attention_config,
+            mamba_config=mamba_config,
             kernel_config=kernel_config,
             lora_config=lora_config,
             speculative_config=speculative_config,
@@ -2126,7 +2384,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support disabling chunked prefill. "
                 "Disabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
         elif (
             model_config.runner_type == "pooling"
@@ -2137,7 +2394,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support chunked prefill. "
                 "Enabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
 
         if self.enable_prefix_caching is None:
@@ -2156,7 +2412,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support prefix caching. "
                 "Enabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
 
         # Disable chunked prefill and prefix caching for:
@@ -2177,10 +2432,51 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
             )
             self.enable_prefix_caching = False
 
+    def _set_default_reasoning_config_args(self):
+        if not self.reasoning_parser:
+            return
+        if self.reasoning_config is None:
+            self.reasoning_config = ReasoningConfig()
+        self.reasoning_config.reasoning_parser = self.reasoning_parser
+
+    @staticmethod
+    def _get_min_mm_batched_tokens(
+        model_config: ModelConfig,
+    ) -> tuple[int, str] | None:
+        """Get the minimum max_num_batched_tokens needed for a multimodal
+        prefix-LM model to process at least one item of any supported modality.
+
+        Returns (token_count, modality_name) for the most expensive modality,
+        or None if the value cannot be determined at this stage.
+        """
+        try:
+            from vllm.multimodal import MULTIMODAL_REGISTRY
+
+            # get_processing_info returns the model's multimodal processing
+            # metadata (supported modalities, token limits) without loading
+            # model weights or generating dummy data.
+            info = MULTIMODAL_REGISTRY.get_processing_info(model_config)
+            mm_counts = {modality: 1 for modality in info.supported_mm_limits}
+            # get_mm_max_tokens_per_item returns pre-computed per-item token
+            # ceilings for models that override it (e.g., Gemma4), or None
+            # for models that rely on dummy-input profiling. When None is
+            # returned we bail out — no dummy generation is triggered here.
+            max_tokens = info.get_mm_max_tokens_per_item(
+                seq_len=model_config.max_model_len,
+                mm_counts=mm_counts,
+            )
+            if max_tokens is not None:
+                modality = max(max_tokens, key=max_tokens.__getitem__)
+                return (max_tokens[modality], modality)
+        except Exception as e:
+            logger.warning("Failed to determine min multimodal batched tokens: %s", e)
+        return None
+
     def _set_default_max_num_seqs_and_batched_tokens_args(
         self,
         usage_context: UsageContext | None,
         model_config: ModelConfig,
+        parallel_config: ParallelConfig,
     ):
         world_size = self.pipeline_parallel_size * self.tensor_parallel_size
         (
@@ -2192,10 +2488,15 @@ def _set_default_max_num_seqs_and_batched_tokens_args(
         orig_max_num_seqs = self.max_num_seqs
 
         if self.max_num_batched_tokens is None:
-            self.max_num_batched_tokens = default_max_num_batched_tokens.get(
-                usage_context,
-                SchedulerConfig.DEFAULT_MAX_NUM_BATCHED_TOKENS,
-            )
+            if parallel_config.use_batched_dp_moe:
+                self.max_num_batched_tokens = (
+                    SchedulerConfig.DEFAULT_MAX_NUM_BATCHED_TOKENS_FOR_BATCHED_DP
+                )
+            else:
+                self.max_num_batched_tokens = default_max_num_batched_tokens.get(
+                    usage_context,
+                    SchedulerConfig.DEFAULT_MAX_NUM_BATCHED_TOKENS,
+                )
 
         if self.max_num_seqs is None:
             self.max_num_seqs = default_max_num_seqs.get(
@@ -2221,6 +2522,23 @@ def _set_default_max_num_seqs_and_batched_tokens_args(
                     self.max_num_batched_tokens,
                 )
 
+            # For multimodal prefix-LM models (e.g., Gemma 4) that disable
+            # chunked MM input, a single multimodal item must fit in one batch.
+            # Raise the floor to accommodate the largest per-item token count.
+            if model_config.is_multimodal_model and model_config.is_mm_prefix_lm:
+                result = self._get_min_mm_batched_tokens(model_config)
+                if result is not None and result[0] > self.max_num_batched_tokens:
+                    mm_min, modality = result
+                    logger.info(
+                        "Raising max_num_batched_tokens from %d to %d to "
+                        "accommodate '%s' input for prefix-LM model %s.",
+                        self.max_num_batched_tokens,
+                        mm_min,
+                        modality,
+                        model_config.model,
+                    )
+                    self.max_num_batched_tokens = mm_min
+
             # When using default settings,
             # Ensure max_num_batched_tokens does not exceed model limit.
             # Some models (e.g., Whisper) have embeddings tied to max length.
@@ -2281,68 +2599,3 @@ def _raise_unsupported_error(feature_name: str):
         f"remove {feature_name} from your config."
     )
     raise NotImplementedError(msg)
-
-
-def human_readable_int(value: str) -> int:
-    """Parse human-readable integers like '1k', '2M', etc.
-    Including decimal values with decimal multipliers.
-
-    Examples:
-    - '1k' -> 1,000
-    - '1K' -> 1,024
-    - '25.6k' -> 25,600
-    """
-    value = value.strip()
-
-    match = re.fullmatch(r"(\d+(?:\.\d+)?)([kKmMgGtT])", value)
-    if match:
-        decimal_multiplier = {
-            "k": 10**3,
-            "m": 10**6,
-            "g": 10**9,
-            "t": 10**12,
-        }
-        binary_multiplier = {
-            "K": 2**10,
-            "M": 2**20,
-            "G": 2**30,
-            "T": 2**40,
-        }
-
-        number, suffix = match.groups()
-        if suffix in decimal_multiplier:
-            mult = decimal_multiplier[suffix]
-            return int(float(number) * mult)
-        elif suffix in binary_multiplier:
-            mult = binary_multiplier[suffix]
-            # Do not allow decimals with binary multipliers
-            try:
-                return int(number) * mult
-            except ValueError as e:
-                raise argparse.ArgumentTypeError(
-                    "Decimals are not allowed "
-                    f"with binary suffixes like {suffix}. Did you mean to use "
-                    f"{number}{suffix.lower()} instead?"
-                ) from e
-
-    # Regular plain number.
-    return int(value)
-
-
-def human_readable_int_or_auto(value: str) -> int:
-    """Parse human-readable integers like '1k', '2M', etc.
-    Including decimal values with decimal multipliers.
-    Also accepts -1 or 'auto' as a special value for auto-detection.
-
-    Examples:
-    - '1k' -> 1,000
-    - '1K' -> 1,024
-    - '25.6k' -> 25,600
-    - '-1' or 'auto' -> -1 (special value for auto-detection)
-    """
-    value = value.strip()
-
-    if value == "-1" or value.lower() == "auto":
-        return -1
-
-    return human_readable_int(value)
diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py
index 3d466e3fc2af..3f83734a5b78 100644
--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
@@ -14,7 +14,6 @@
 from vllm.inputs import EngineInput, PromptType
 from vllm.lora.request import LoRARequest
 from vllm.outputs import PoolingRequestOutput, RequestOutput
-from vllm.plugins.io_processors import IOProcessor
 from vllm.pooling_params import PoolingParams
 from vllm.renderers import BaseRenderer
 from vllm.sampling_params import SamplingParams
@@ -44,7 +43,6 @@ class EngineClient(ABC):
     vllm_config: VllmConfig
     model_config: ModelConfig
     renderer: BaseRenderer
-    io_processor: IOProcessor | None
     input_processor: InputProcessor
 
     @property
@@ -80,6 +78,7 @@ def generate(
         priority: int = 0,
         data_parallel_rank: int | None = None,
         reasoning_ended: bool | None = None,
+        reasoning_parser_kwargs: dict[str, Any] | None = None,
     ) -> AsyncGenerator[RequestOutput, None]:
         """Generate outputs for a request."""
         ...
@@ -109,6 +108,20 @@ async def abort(self, request_id: str | Iterable[str]) -> None:
         """
         ...
 
+    @abstractmethod
+    async def notify_kv_transfer_request_rejected(
+        self,
+        request_id: str,
+        kv_transfer_params: dict[str, Any],
+        *,
+        data_parallel_rank: int | None = None,
+    ) -> None:
+        """Notify the engine that a KV-transfer request was rejected before
+        engine admission, so connector-side cleanup can run (e.g. free
+        prefill blocks pinned on the P node).
+        """
+        ...
+
     @abstractmethod
     async def is_tracing_enabled(self) -> bool: ...
 
@@ -231,6 +244,14 @@ async def init_weight_transfer_engine(
         """Initialize weight transfer for RL training."""
         raise NotImplementedError
 
+    async def start_weight_update(self, is_checkpoint_format: bool = True) -> None:
+        """Start a new weight update."""
+        raise NotImplementedError
+
     async def update_weights(self, request: WeightTransferUpdateRequest) -> None:
         """Batched weight update for RL training."""
         raise NotImplementedError
+
+    async def finish_weight_update(self) -> None:
+        """Finish the current weight update."""
+        raise NotImplementedError
diff --git a/vllm/entrypoints/anthropic/protocol.py b/vllm/entrypoints/anthropic/protocol.py
index 3445f709109f..f3c4dd7f3e32 100644
--- a/vllm/entrypoints/anthropic/protocol.py
+++ b/vllm/entrypoints/anthropic/protocol.py
@@ -39,6 +39,7 @@ class AnthropicContentBlock(BaseModel):
         "image",
         "tool_use",
         "tool_result",
+        "tool_reference",
         "thinking",
         "redacted_thinking",
     ]
@@ -52,6 +53,8 @@ class AnthropicContentBlock(BaseModel):
     input: dict[str, Any] | None = None
     content: str | list[dict[str, Any]] | None = None
     is_error: bool | None = None
+    # For tool_reference content
+    tool_name: str | None = None
     # For thinking content
     thinking: str | None = None
     signature: str | None = None
@@ -72,6 +75,7 @@ class AnthropicTool(BaseModel):
     name: str
     description: str | None = None
     input_schema: dict[str, Any]
+    defer_loading: bool | None = None
 
     @field_validator("input_schema")
     @classmethod
@@ -117,6 +121,13 @@ class AnthropicMessagesRequest(BaseModel):
         default=None,
         description="KVTransfer parameters used for disaggregated serving.",
     )
+    chat_template_kwargs: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Additional keyword args to pass to the chat template renderer. "
+            "Will be accessible by the template."
+        ),
+    )
 
     @field_validator("model")
     @classmethod
@@ -212,6 +223,15 @@ class AnthropicCountTokensRequest(BaseModel):
     tool_choice: AnthropicToolChoice | None = None
     tools: list[AnthropicTool] | None = None
 
+    # vLLM-specific fields that are not in Anthropic spec
+    chat_template_kwargs: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Additional keyword args to pass to the chat template renderer. "
+            "Will be accessible by the template."
+        ),
+    )
+
     @field_validator("model")
     @classmethod
     def validate_model(cls, v):
diff --git a/vllm/entrypoints/anthropic/serving.py b/vllm/entrypoints/anthropic/serving.py
index 9270a49d1d1b..867ee73948ff 100644
--- a/vllm/entrypoints/anthropic/serving.py
+++ b/vllm/entrypoints/anthropic/serving.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # Adapted from
-# https://github.com/vllm/vllm/entrypoints/openai/serving_chat.py
+# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/chat_completion/serving.py
 
 """Anthropic Messages API serving handler"""
 
@@ -170,7 +170,8 @@ def _convert_messages(
             else:
                 cls._convert_message_content(msg, openai_msg, openai_messages)
 
-            openai_messages.append(openai_msg)
+            if not (msg.role == "user" and "content" not in openai_msg):
+                openai_messages.append(openai_msg)
 
     @classmethod
     def _convert_message_content(
@@ -236,6 +237,10 @@ def _convert_block(
             cls._convert_tool_use_block(block, tool_calls)
         elif block.type == "tool_result":
             cls._convert_tool_result_block(block, role, openai_messages, content_parts)
+        elif block.type == "tool_reference":
+            # Tool references are expanded during tool_result processing
+            # when they appear inside tool_result content.
+            pass
 
     @classmethod
     def _convert_tool_use_block(cls, block, tool_calls: list[dict[str, Any]]) -> None:
@@ -274,6 +279,7 @@ def _convert_user_tool_result(
         """Convert user tool_result with text and image support"""
         tool_text = ""
         tool_image_urls: list[str] = []
+        tool_reference: list[dict[str, Any]] = []
 
         if isinstance(block.content, str):
             tool_text = block.content
@@ -290,6 +296,12 @@ def _convert_user_tool_result(
                     url = cls._convert_image_source_to_url(source)
                     if url:
                         tool_image_urls.append(url)
+                elif item_type == "tool_reference":
+                    ref_name = item.get("tool_name") or item.get("name")
+                    if ref_name:
+                        tool_reference.append(
+                            {"type": "tool_reference", "name": ref_name}
+                        )
             tool_text = "\n".join(text_parts)
 
         openai_messages.append(
@@ -311,6 +323,15 @@ def _convert_user_tool_result(
                 }
             )
 
+        if tool_reference:
+            openai_messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": block.tool_use_id or "",
+                    "content": tool_reference,  # type: ignore[dict-item]
+                }
+            )
+
     @classmethod
     def _build_base_request(
         cls,
@@ -322,6 +343,7 @@ def _build_base_request(
             return ChatCompletionRequest(
                 model=anthropic_request.model,
                 messages=openai_messages,
+                chat_template_kwargs=anthropic_request.chat_template_kwargs,
             )
 
         return ChatCompletionRequest(
@@ -334,6 +356,7 @@ def _build_base_request(
             top_p=anthropic_request.top_p,
             top_k=anthropic_request.top_k,
             kv_transfer_params=anthropic_request.kv_transfer_params,
+            chat_template_kwargs=anthropic_request.chat_template_kwargs,
         )
 
     @classmethod
@@ -397,6 +420,7 @@ def _convert_tools(
                             "name": tool.name,
                             "description": tool.description,
                             "parameters": tool.input_schema,
+                            "defer_loading": tool.defer_loading,
                         },
                     }
                 )
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 51e62042f3e6..35256bc647d2 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -3,7 +3,6 @@
 
 import asyncio
 import json
-import warnings
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict
 from collections.abc import Awaitable, Callable, Iterable
@@ -11,7 +10,7 @@
 from functools import cached_property, lru_cache, partial
 from itertools import accumulate
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, Literal, TypeAlias, TypeVar, cast
+from typing import TYPE_CHECKING, Any, Final, Generic, Literal, TypeAlias, TypeVar, cast
 
 from openai.types.chat import (
     ChatCompletionAssistantMessageParam,
@@ -36,10 +35,11 @@
 from pydantic import BaseModel, ConfigDict, TypeAdapter
 
 # pydantic needs the TypedDict from typing_extensions
-from typing_extensions import Required, TypedDict
+from typing_extensions import Required, TypedDict, override
 
 from vllm import envs
 from vllm.config import ModelConfig
+from vllm.exceptions import VLLMValidationError
 from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict
 from vllm.logger import init_logger
 from vllm.model_executor.models import SupportsMultiModal
@@ -54,6 +54,10 @@
 )
 from vllm.multimodal.media import MEDIA_CONNECTOR_REGISTRY, MediaConnector
 from vllm.multimodal.processing import BaseMultiModalProcessor
+from vllm.renderers.embed_utils import (
+    safe_load_prompt_embeds,
+    safe_load_prompt_embeds_async,
+)
 from vllm.utils import random_uuid
 from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import LazyLoader
@@ -68,23 +72,6 @@
 logger = init_logger(__name__)
 
 
-def __getattr__(name: str):
-    if name == "resolve_hf_chat_template":
-        from vllm.renderers.hf import resolve_chat_template
-
-        warnings.warn(
-            "`vllm.entrypoints.chat_utils.resolve_hf_chat_template` has been moved to "
-            "`vllm.renderers.hf.resolve_chat_template`. "
-            "The old name will be removed in v0.16.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        return resolve_chat_template
-
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
-
-
 class ChatTemplateResolutionError(ValueError):
     """Raised when chat template resolution fails.
 
@@ -97,9 +84,40 @@ class ChatTemplateResolutionError(ValueError):
     "image": "<##IMAGE##>",
     "audio": "<##AUDIO##>",
     "video": "<##VIDEO##>",
+    "prompt_embeds": "<##PROMPT_EMBEDS##>",
 }
 
 
+PROMPT_EMBEDS_PLACEHOLDER_TOKEN: Final[str] = "<prompt_embeds>"
+"""The special token used as a placeholder for each embedding
+position during chat template rendering.
+
+Registered as an additional special token when `--enable-prompt-embeds` is set.
+See `_ensure_prompt_embeds_placeholder_token` in `vllm/renderers/hf.py`.
+"""
+
+
+_REQUIRE_MM_PROCESSOR_ERROR: Final[str] = (
+    "Resolving modality {modality!r} requires a multimodal processor "
+    "but none is available."
+)
+
+_ENABLE_PROMPT_EMBEDS_ERROR: Final[str] = (
+    "You must set `--enable-prompt-embeds` to input `prompt_embeds`"
+)
+
+_PROMPT_EMBEDS_MISSING_DATA_ERROR: Final[str] = (
+    "prompt_embeds content part requires a non-empty `data` field "
+    "with base64-encoded tensor bytes."
+)
+
+_RESERVED_PLACEHOLDER_IN_TEXT_ERROR: Final[str] = (
+    "Text content may not contain the reserved placeholder {token!r}. "
+    "This placeholder is used internally to mark `prompt_embeds` splice "
+    "positions in the tokenized prompt."
+)
+
+
 class AudioURL(TypedDict, total=False):
     url: Required[str]
     """
@@ -146,6 +164,17 @@ class ChatCompletionContentPartAudioEmbedsParam(TypedDict, total=False):
     """
 
 
+class ChatCompletionContentPartPromptEmbedsParam(TypedDict, total=False):
+    data: Required[str]
+    """
+    Base64-encoded bytes of a serialized `torch.Tensor` of shape
+    `(num_tokens, hidden_size)`. The tensor's `dtype` and `hidden_size` must
+    match the model's input embedding layer.
+    """
+    type: Required[Literal["prompt_embeds"]]
+    """The type of the content part."""
+
+
 class VideoURL(TypedDict, total=False):
     url: Required[str]
     """
@@ -254,6 +283,23 @@ class CustomThinkCompletionContentParam(TypedDict, total=False):
     """The thinking type."""
 
 
+class CustomChatCompletionContentToolReferenceParam(TypedDict, total=False):
+    """A tool reference content param that only accepts a plain tool name.
+
+    Example:
+    {
+        "name": "get_weather",
+        "type": "tool_reference"
+    }
+    """
+
+    name: str
+    """The name of the tool being referenced."""
+
+    type: Literal["tool_reference"]
+    """The content type."""
+
+
 ChatCompletionContentPartParam: TypeAlias = (
     OpenAIChatCompletionContentPartParam
     | ChatCompletionContentPartAudioParam
@@ -264,8 +310,10 @@ class CustomThinkCompletionContentParam(TypedDict, total=False):
     | CustomChatCompletionContentSimpleImageParam
     | ChatCompletionContentPartImageEmbedsParam
     | ChatCompletionContentPartAudioEmbedsParam
+    | ChatCompletionContentPartPromptEmbedsParam
     | CustomChatCompletionContentSimpleAudioParam
     | CustomChatCompletionContentSimpleVideoParam
+    | CustomChatCompletionContentToolReferenceParam
     | str
     | CustomThinkCompletionContentParam
 )
@@ -290,7 +338,7 @@ class CustomChatCompletionMessageParam(TypedDict, total=False):
     tool_call_id: str | None
     """Tool call that this message is responding to."""
 
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
+    tool_calls: list[ChatCompletionMessageToolCallParam] | None
     """The tool calls generated by the model, such as function calls."""
 
     reasoning: str | None
@@ -299,6 +347,9 @@ class CustomChatCompletionMessageParam(TypedDict, total=False):
     tools: list[ChatCompletionFunctionToolParam] | None
     """The tools for developer role."""
 
+    task: str | None
+    """Model-specific task marker. Currently passed through for DeepSeek V4."""
+
 
 ChatCompletionMessageParam: TypeAlias = (
     OpenAIChatCompletionMessageParam
@@ -321,7 +372,7 @@ class ConversationMessage(TypedDict, total=False):
     name: str | None
     """The name of the function to call"""
 
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
+    tool_calls: list[ChatCompletionMessageToolCallParam] | None
     """The tool calls generated by the model, such as function calls."""
 
     reasoning: str | None
@@ -333,6 +384,9 @@ class ConversationMessage(TypedDict, total=False):
     tools: list[ChatCompletionFunctionToolParam] | None
     """The tools for developer role."""
 
+    task: str | None
+    """Model-specific task marker. Currently passed through for DeepSeek V4."""
+
 
 # Passed in by user
 ChatTemplateContentFormatOption = Literal["auto", "string", "openai"]
@@ -342,7 +396,13 @@ class ConversationMessage(TypedDict, total=False):
 
 
 ModalityStr = Literal[
-    "image", "audio", "video", "image_embeds", "audio_embeds", "vision_chunk"
+    "image",
+    "audio",
+    "video",
+    "image_embeds",
+    "audio_embeds",
+    "vision_chunk",
+    "prompt_embeds",
 ]
 _T = TypeVar("_T")
 
@@ -524,7 +584,17 @@ def add(self, modality: ModalityStr, item: _T) -> str | None:
 
         An optional uuid can be added which serves as a unique identifier of the
         media.
+
+        Note:
+            `prompt_embeds` bypass MM-processor validation because they are
+            pre-computed embeddings that do not go through any HF processor, encoder,
+            or model-specific placeholder logic. The corresponding placeholder string is
+            managed by the parser via `_add_placeholder`, so we return None here.
         """
+        if modality == "prompt_embeds":
+            self._items_by_modality["prompt_embeds"].append(item)
+            return None
+
         input_modality = modality.replace("_embeds", "")
         original_modality = modality
         use_vision_chunk = (
@@ -635,17 +705,32 @@ def _resolve_vision_chunk_items(
 
 def _resolve_items(
     items_by_modality: dict[str, list[tuple[object, str | None]]],
-    mm_processor: BaseMultiModalProcessor,
+    mm_processor: BaseMultiModalProcessor | None,
     modality_order: dict[str, list[str]],
 ) -> tuple[MultiModalDataDict, MultiModalUUIDDict]:
+    """
+    Materialize the tracker's per-modality items into `mm_data` / `mm_uuids`.
+
+    Note:
+        `mm_processor` is `None` for text-only models (no registered HF
+        processor) whose only modality is `prompt_embeds`. Every other
+        modality requires a processor, enforced by the guard below.
+    """
     if "image" in items_by_modality and "image_embeds" in items_by_modality:
         raise ValueError("Mixing raw image and embedding inputs is not allowed")
     if "audio" in items_by_modality and "audio_embeds" in items_by_modality:
         raise ValueError("Mixing raw audio and embedding inputs is not allowed")
+    # `prompt_embeds` bypasses HF MM processors. Every other modality requires one.
+    processor_modalities = items_by_modality.keys() - {"prompt_embeds"}
+    if processor_modalities and mm_processor is None:
+        raise RuntimeError(
+            _REQUIRE_MM_PROCESSOR_ERROR.format(modality=processor_modalities)
+        )
 
     mm_data = {}
     mm_uuids = {}
     if "image_embeds" in items_by_modality:
+        assert mm_processor is not None
         mm_data["image"] = _get_embeds_data(
             "image",
             [data for data, uuid in items_by_modality["image_embeds"]],
@@ -656,6 +741,7 @@ def _resolve_items(
         mm_data["image"] = [data for data, uuid in items_by_modality["image"]]
         mm_uuids["image"] = [uuid for data, uuid in items_by_modality["image"]]
     if "audio_embeds" in items_by_modality:
+        assert mm_processor is not None
         mm_data["audio"] = _get_embeds_data(
             "audio",
             [data for data, uuid in items_by_modality["audio_embeds"]],
@@ -669,6 +755,7 @@ def _resolve_items(
         mm_data["video"] = [data for data, uuid in items_by_modality["video"]]
         mm_uuids["video"] = [uuid for data, uuid in items_by_modality["video"]]
     if "vision_chunk" in items_by_modality:
+        assert mm_processor is not None
         # Process vision_chunk items - extract from (data, modality) tuples
         # and convert to VisionChunk types with proper UUID handling
         processed_chunks, vision_chunk_uuids = _resolve_vision_chunk_items(
@@ -678,6 +765,10 @@ def _resolve_items(
         )
         mm_data["vision_chunk"] = processed_chunks
         mm_uuids["vision_chunk"] = vision_chunk_uuids
+    if "prompt_embeds" in items_by_modality:
+        mm_data["prompt_embeds"] = [
+            data for data, _uuid in items_by_modality["prompt_embeds"]
+        ]
 
     return mm_data, mm_uuids
 
@@ -689,8 +780,16 @@ def resolve_items(
         if not self._items_by_modality:
             return None, None
 
+        # Text-only models (`is_multimodal_model=False`) with inputs of
+        # modality `prompt_embeds` have no MM processor since `prompt_embeds` are
+        # pre-computed and require no processing, so we pass `None`.
+        mm_processor = (
+            self.mm_processor if self._model_config.is_multimodal_model else None
+        )
         return _resolve_items(
-            dict(self._items_by_modality), self.mm_processor, self._modality_order
+            dict(self._items_by_modality),
+            mm_processor,
+            self._modality_order,
         )
 
     def create_parser(
@@ -713,8 +812,13 @@ async def resolve_items(
             for modality, coros in self._items_by_modality.items()
         }
 
+        mm_processor = (
+            self.mm_processor if self._model_config.is_multimodal_model else None
+        )
         return _resolve_items(
-            resolved_items_by_modality, self.mm_processor, self._modality_order
+            resolved_items_by_modality,
+            mm_processor,
+            self._modality_order,
         )
 
     def create_parser(
@@ -733,10 +837,16 @@ def __init__(self) -> None:
         # general MM placeholder:
         # {
         #   "<##IMAGE##>": ["<image>", "<image>", "<image>"],
-        #   "<##AUDIO##>": ["<audio>", "<audio>"]
+        #   "<##AUDIO##>": ["<audio>", "<audio>"],
+        #   "<##PROMPT_EMBEDS##>": ["<prompt_embeds>", "<prompt_embeds>"]
         # }
         self._placeholder_storage: dict[str, list] = defaultdict(list)
 
+    @property
+    @abstractmethod
+    def model_config(self) -> ModelConfig:
+        raise NotImplementedError
+
     def _add_placeholder(self, modality: ModalityStr, placeholder: str | None):
         mod_placeholder = MODALITY_PLACEHOLDERS_MAP[modality]
         if placeholder:
@@ -781,6 +891,10 @@ def parse_audio_embeds(
     ) -> None:
         raise NotImplementedError
 
+    @abstractmethod
+    def parse_prompt_embeds(self, data: str) -> None:
+        raise NotImplementedError
+
     @abstractmethod
     def parse_video(self, video_url: str | None, uuid: str | None = None) -> None:
         raise NotImplementedError
@@ -809,6 +923,21 @@ def __init__(
     def model_config(self) -> ModelConfig:
         return self._tracker.model_config
 
+    @override
+    def parse_prompt_embeds(self, data: str) -> None:
+        """Decode a base64 prompt embeds tensor and store it in the tracker.
+
+        Emits a single `PROMPT_EMBEDS_PLACEHOLDER_TOKEN` sentinel per
+        content part. The renderer later expands each sentinel to a span of
+        `tensor.shape[0]` placeholder tokens after tokenization.
+        """
+        if not self.model_config.enable_prompt_embeds:
+            raise ValueError(_ENABLE_PROMPT_EMBEDS_ERROR)
+
+        tensor = safe_load_prompt_embeds(self.model_config, data.encode())
+        self._tracker.add("prompt_embeds", (tensor, None))
+        self._add_placeholder("prompt_embeds", PROMPT_EMBEDS_PLACEHOLDER_TOKEN)
+
     def parse_image(self, image_url: str | None, uuid: str | None = None) -> None:
         image = self._connector.fetch_image(image_url) if image_url else None
 
@@ -933,6 +1062,29 @@ def __init__(
     def model_config(self) -> ModelConfig:
         return self._tracker.model_config
 
+    @override
+    def parse_prompt_embeds(self, data: str) -> None:
+        """Schedule async prompt embeds decode and store the coroutine in the tracker.
+
+        Like the sync variant, emits a single sentinel `PROMPT_EMBEDS_PLACEHOLDER_TOKEN`
+        per content part. Unlike the sync variant, the tensor decode is deferred to a
+        thread-pool executor via `safe_load_prompt_embeds_async`.
+        """
+        if not self.model_config.enable_prompt_embeds:
+            raise ValueError(_ENABLE_PROMPT_EMBEDS_ERROR)
+
+        coro = self._load_prompt_embeds_async(data.encode())
+        self._tracker.add("prompt_embeds", coro)
+        self._add_placeholder("prompt_embeds", PROMPT_EMBEDS_PLACEHOLDER_TOKEN)
+
+    async def _load_prompt_embeds_async(
+        self, data_bytes: bytes
+    ) -> tuple[torch.Tensor, None]:
+        # Second tuple slot fills the tracker's generic `(item, uuid | None)`
+        # contract. prompt_embeds has no UUID concept, so it's always `None`.
+        tensor = await safe_load_prompt_embeds_async(self.model_config, data_bytes)
+        return tensor, None
+
     async def _image_with_uuid_async(self, image_url: str | None, uuid: str | None):
         image = (
             await self._connector.fetch_image_async(image_url) if image_url else None
@@ -1187,6 +1339,7 @@ def _get_full_multimodal_text_prompt(
     placeholder_storage: dict[str, list],
     texts: list[str],
     interleave_strings: bool,
+    multimodal_content_part_separator: str = "\n",
 ) -> str:
     """Combine multimodal prompts for a multimodal language model."""
 
@@ -1232,15 +1385,18 @@ def _get_full_multimodal_text_prompt(
     # NOTE: Default behaviour: we always add missing placeholders
     # at the front of the prompt, if interleave_strings=False
     if text_prompt:
-        return "\n".join(missing_placeholders + [text_prompt])
+        return multimodal_content_part_separator.join(
+            missing_placeholders + [text_prompt]
+        )
     else:
-        return "\n".join(missing_placeholders)
+        return multimodal_content_part_separator.join(missing_placeholders)
 
 
 # No need to validate using Pydantic again
 _TextParser = partial(cast, ChatCompletionContentPartTextParam)
 _ImageEmbedsParser = partial(cast, ChatCompletionContentPartImageEmbedsParam)
 _AudioEmbedsParser = partial(cast, ChatCompletionContentPartAudioEmbedsParam)
+_PromptEmbedsParser = partial(cast, ChatCompletionContentPartPromptEmbedsParam)
 _InputAudioParser = partial(cast, ChatCompletionContentPartInputAudioParam)
 _RefusalParser = partial(cast, ChatCompletionContentPartRefusalParam)
 _PILImageParser = partial(cast, CustomChatCompletionContentPILImageParam)
@@ -1266,11 +1422,15 @@ def _get_full_multimodal_text_prompt(
     "image_url": lambda part: _ImageParser(part).get("image_url", {}).get("url", None),
     "image_embeds": lambda part: _ImageEmbedsParser(part).get("image_embeds", None),
     "audio_embeds": lambda part: _AudioEmbedsParser(part).get("audio_embeds", None),
+    "prompt_embeds": lambda part: _PromptEmbedsParser(part).get("data", None),
     "image_pil": lambda part: _PILImageParser(part).get("image_pil", None),
     "audio_url": lambda part: _AudioParser(part).get("audio_url", {}).get("url", None),
     "input_audio": lambda part: _InputAudioParser(part).get("input_audio", None),
     "refusal": lambda part: _RefusalParser(part).get("refusal", None),
     "video_url": lambda part: _VideoParser(part).get("video_url", {}).get("url", None),
+    "tool_reference": lambda part: cast(
+        CustomChatCompletionContentToolReferenceParam, part
+    ).get("name", None),
 }
 
 
@@ -1341,6 +1501,11 @@ def _parse_chat_message_content_mm_part(
             )
             audio_embeds = audio_params.get("audio_embeds", None)
             return "audio_embeds", audio_embeds
+        if "prompt_embeds" in part:
+            prompt_embeds_params = cast(  # type: ignore[assignment]
+                ChatCompletionContentPartPromptEmbedsParam, part
+            )
+            return "prompt_embeds", prompt_embeds_params.get("data", None)
         if "audio_url" in part:
             audio_params = cast(  # type: ignore[assignment]
                 CustomChatCompletionContentSimpleAudioParam, part
@@ -1352,7 +1517,7 @@ def _parse_chat_message_content_mm_part(
                 audio_url = audio_url.get("url", None)
             return "audio_url", audio_url
         if part.get("input_audio") is not None:
-            input_audio_params = cast(dict[str, str], part)
+            input_audio_params = _InputAudioParser(part).get("input_audio", None)
             return "input_audio", input_audio_params
         if "video_url" in part:
             video_params = cast(CustomChatCompletionContentSimpleVideoParam, part)
@@ -1362,6 +1527,12 @@ def _parse_chat_message_content_mm_part(
                 # with url as a dict of {"url": url}
                 video_url = video_url.get("url", None)
             return "video_url", video_url
+        if "tool_reference" in part:
+            tool_reference_params = cast(
+                CustomChatCompletionContentToolReferenceParam, part
+            )
+            tool_reference = tool_reference_params.get("name", None)
+            return "tool_reference", tool_reference
         # Raise an error if no 'type' or direct URL is found.
         raise ValueError("Missing 'type' field in multimodal part.")
 
@@ -1384,6 +1555,7 @@ def _parse_chat_message_content_parts(
     wrap_dicts: bool,
     interleave_strings: bool,
     mm_processor_kwargs: dict[str, Any] | None = None,
+    multimodal_content_part_separator="\n",
 ) -> list[ConversationMessage]:
     content = list[_ContentPart]()
 
@@ -1406,7 +1578,10 @@ def _parse_chat_message_content_parts(
     mm_placeholder_storage = mm_parser.mm_placeholder_storage()
     if mm_placeholder_storage:
         text_prompt = _get_full_multimodal_text_prompt(
-            mm_placeholder_storage, texts, interleave_strings
+            mm_placeholder_storage,
+            texts,
+            interleave_strings,
+            multimodal_content_part_separator=multimodal_content_part_separator,
         )
     else:
         text_prompt = "\n".join(texts)
@@ -1414,6 +1589,24 @@ def _parse_chat_message_content_parts(
     return [ConversationMessage(role=role, content=text_prompt)]
 
 
+def _reject_reserved_placeholder_in_text(text: str, model_config: ModelConfig) -> None:
+    """Reject user-supplied text parts that contains the reserved `prompt_embeds`
+    placeholder sentinel.
+
+    When the server accepts `prompt_embeds`, the placeholder token is
+    registered as a single unsplittable special token on the tokenizer. Any
+    user text that happens to contain the literal sequence would tokenize to
+    the same ID and be mistaken for a splice point by the renderer, letting a
+    caller move or inject splice positions via plain text content.
+    """
+    if model_config.enable_prompt_embeds and PROMPT_EMBEDS_PLACEHOLDER_TOKEN in text:
+        raise ValueError(
+            _RESERVED_PLACEHOLDER_IN_TEXT_ERROR.format(
+                token=PROMPT_EMBEDS_PLACEHOLDER_TOKEN
+            )
+        )
+
+
 def _parse_chat_message_content_part(
     part: ChatCompletionContentPartParam,
     mm_parser: BaseMultiModalContentParser,
@@ -1429,6 +1622,7 @@ def _parse_chat_message_content_part(
     with multimodal placeholders.
     """
     if isinstance(part, str):  # Handle plain text parts
+        _reject_reserved_placeholder_in_text(part, mm_parser.model_config)
         if wrap_dicts:
             return {"type": "text", "text": part}
         return part
@@ -1447,6 +1641,7 @@ def _parse_chat_message_content_part(
 
     if part_type in ("text", "input_text", "output_text", "refusal", "thinking"):
         str_content = cast(str, content)
+        _reject_reserved_placeholder_in_text(str_content, mm_parser.model_config)
         if wrap_dicts:
             return {"type": "text", "text": str_content}
         else:
@@ -1475,6 +1670,11 @@ def _parse_chat_message_content_part(
         content = cast(str | dict[str, str], content) if content is not None else None
         mm_parser.parse_audio_embeds(content, uuid)
         modality = "audio"
+    elif part_type == "prompt_embeds":
+        if not content:
+            raise ValueError(_PROMPT_EMBEDS_MISSING_DATA_ERROR)
+        mm_parser.parse_prompt_embeds(cast(str, content))
+        modality = "prompt_embeds"
     elif part_type == "audio_url":
         str_content = cast(str, content)
         mm_parser.parse_audio(str_content, uuid)
@@ -1487,11 +1687,34 @@ def _parse_chat_message_content_part(
         str_content = cast(str, content)
         mm_parser.parse_video(str_content, uuid)
         modality = "video"
+    elif part_type == "tool_reference":
+        # Tool references are not multimodal data — they reference deferred
+        # tools and are passed through as-is for the chat template to expand.
+        if wrap_dicts:
+            return {"type": "tool_reference", "name": cast(str, content)}
+        return cast(str, content)
     else:
-        raise NotImplementedError(f"Unknown part type: {part_type}")
+        supported = sorted(MM_PARSER_MAP.keys() | set(PART_TYPES_TO_SKIP_NONE_CONTENT))
+        raise VLLMValidationError(
+            f"Unsupported chat content part type: {part_type!r}. "
+            f"Supported types: {', '.join(supported)}.",
+            parameter="type",
+            value=part_type,
+        )
 
     if wrap_dicts:
+        if modality == "prompt_embeds":
+            # Chat templates don't know about the "prompt_embeds" modality,
+            # emit the single sentinel token as text so the template renders
+            # it inline. The renderer later expands it to N tokens post-tokenize.
+            return {"type": "text", "text": PROMPT_EMBEDS_PLACEHOLDER_TOKEN}
         return {"type": modality}
+    if modality == "prompt_embeds":
+        # Emit the renderer token inline regardless of `interleave_strings`,
+        # prompt_embeds are spliced at the token offset so position matters.
+        # Falling back to front-padding via `missing_placeholders` would
+        # reorder them relative to surrounding text.
+        return PROMPT_EMBEDS_PLACEHOLDER_TOKEN
     return MODALITY_PLACEHOLDERS_MAP[modality] if interleave_strings else None
 
 
@@ -1543,10 +1766,35 @@ def _parse_chat_message_content(
             parsed_msg = _ToolParser(message)
             if "tool_call_id" in parsed_msg:
                 result_msg["tool_call_id"] = parsed_msg["tool_call_id"]
+            # Normalize tool message content from OpenAI array format to plain
+            # string. Clients like Claude Code / Cursor send tool results as
+            # [{"type": "text", "text": "..."}], but most chat templates only
+            # handle string content for tool messages.
+            # However, tool_reference items must be preserved as structured
+            # dicts for the chat template to expand them.
+            msg_content = result_msg.get("content")
+            if isinstance(msg_content, list):
+                has_non_text = any(
+                    isinstance(item, dict) and item.get("type") != "text"
+                    for item in msg_content
+                )
+                if has_non_text:
+                    # Keep structured content (e.g., tool_reference)
+                    result_msg["content"] = msg_content
+                else:
+                    texts = [
+                        item.get("text", "")
+                        for item in msg_content
+                        if isinstance(item, dict) and item.get("type") == "text"
+                    ]
+                    result_msg["content"] = "\n".join(texts) if texts else ""
 
         if "name" in message and isinstance(message["name"], str):
             result_msg["name"] = message["name"]
 
+        if "task" in message and isinstance(message["task"], str):
+            result_msg["task"] = message["task"]
+
         if role == "developer":
             result_msg["tools"] = message.get("tools", None)
     return result
@@ -1570,12 +1818,28 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None:
                 continue
 
             for item in tool_calls:
+                if not isinstance(item, dict):
+                    raise VLLMValidationError(
+                        "assistant tool_calls entries must be objects.",
+                        parameter="tool_calls",
+                    )
+
+                function = item.get("function")
+                if item.get("type", "function") != "function" or not isinstance(
+                    function, dict
+                ):
+                    raise VLLMValidationError(
+                        "chat completions only support assistant tool_calls "
+                        "of type 'function'.",
+                        parameter="tool_calls",
+                    )
+
                 # if arguments is None or empty string, set to {}
-                if content := item["function"].get("arguments"):
+                if content := function.get("arguments"):
                     if not isinstance(content, (dict, list)):
-                        item["function"]["arguments"] = json.loads(content)
+                        function["arguments"] = json.loads(content)
                 else:
-                    item["function"]["arguments"] = {}
+                    function["arguments"] = {}
 
 
 def parse_chat_messages(
@@ -1590,7 +1854,10 @@ def parse_chat_messages(
     MultiModalUUIDDict | None,
 ]:
     conversation: list[ConversationMessage] = []
-    mm_tracker = MultiModalItemTracker(model_config, media_io_kwargs=media_io_kwargs)
+    mm_tracker = MultiModalItemTracker(
+        model_config,
+        media_io_kwargs=media_io_kwargs,
+    )
 
     for msg in messages:
         sub_messages = _parse_chat_message_content(
@@ -1627,7 +1894,8 @@ async def parse_chat_messages_async(
 ]:
     conversation: list[ConversationMessage] = []
     mm_tracker = AsyncMultiModalItemTracker(
-        model_config, media_io_kwargs=media_io_kwargs
+        model_config,
+        media_io_kwargs=media_io_kwargs,
     )
 
     for msg in messages:
diff --git a/vllm/entrypoints/cli/__init__.py b/vllm/entrypoints/cli/__init__.py
index 704d94d36f70..208f01a7cb5e 100644
--- a/vllm/entrypoints/cli/__init__.py
+++ b/vllm/entrypoints/cli/__init__.py
@@ -1,19 +1,2 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.entrypoints.cli.benchmark.latency import BenchmarkLatencySubcommand
-from vllm.entrypoints.cli.benchmark.mm_processor import (
-    BenchmarkMMProcessorSubcommand,
-)
-from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
-from vllm.entrypoints.cli.benchmark.startup import BenchmarkStartupSubcommand
-from vllm.entrypoints.cli.benchmark.sweep import BenchmarkSweepSubcommand
-from vllm.entrypoints.cli.benchmark.throughput import BenchmarkThroughputSubcommand
-
-__all__: list[str] = [
-    "BenchmarkLatencySubcommand",
-    "BenchmarkMMProcessorSubcommand",
-    "BenchmarkServingSubcommand",
-    "BenchmarkStartupSubcommand",
-    "BenchmarkSweepSubcommand",
-    "BenchmarkThroughputSubcommand",
-]
diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py
index 48f34fce1d44..f64de4cf6732 100644
--- a/vllm/entrypoints/cli/benchmark/main.py
+++ b/vllm/entrypoints/cli/benchmark/main.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import argparse
+import sys
 import typing
 
 from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase
@@ -14,6 +15,17 @@
     FlexibleArgumentParser = argparse.ArgumentParser
 
 
+def _import_bench_subcommand_modules() -> None:
+    # Imported lazily so `BenchmarkSubcommandBase` subclasses register only
+    # when `vllm bench` is actually invoked.
+    import vllm.entrypoints.cli.benchmark.latency  # noqa: F401
+    import vllm.entrypoints.cli.benchmark.mm_processor  # noqa: F401
+    import vllm.entrypoints.cli.benchmark.serve  # noqa: F401
+    import vllm.entrypoints.cli.benchmark.startup  # noqa: F401
+    import vllm.entrypoints.cli.benchmark.sweep  # noqa: F401
+    import vllm.entrypoints.cli.benchmark.throughput  # noqa: F401
+
+
 class BenchmarkSubcommand(CLISubcommand):
     """The `bench` subcommand for the vLLM CLI."""
 
@@ -38,18 +50,28 @@ def subparser_init(
         )
         bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type")
 
-        for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
-            cmd_subparser = bench_subparsers.add_parser(
-                cmd_cls.name,
-                help=cmd_cls.help,
-                description=cmd_cls.help,
-                usage=f"vllm {self.name} {cmd_cls.name} [options]",
-            )
-            cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd)
-            cmd_cls.add_cli_args(cmd_subparser)
-            cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
-                subcmd=f"{self.name} {cmd_cls.name}"
-            )
+        # Only build the nested bench subparsers when the user is actually
+        # invoking `bench`; otherwise we'd drag in imports
+        # unnecessarily on every `vllm --help` and `vllm serve`.
+        # Scan for the first positional arg so global flags (e.g. `-v`)
+        # before the subcommand don't break detection.
+        first_positional = next(
+            (arg for arg in sys.argv[1:] if not arg.startswith("-")), None
+        )
+        if first_positional == self.name:
+            _import_bench_subcommand_modules()
+            for cmd_cls in BenchmarkSubcommandBase.__subclasses__():
+                cmd_subparser = bench_subparsers.add_parser(
+                    cmd_cls.name,
+                    help=cmd_cls.help,
+                    description=cmd_cls.help,
+                    usage=f"vllm {self.name} {cmd_cls.name} [options]",
+                )
+                cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd)
+                cmd_cls.add_cli_args(cmd_subparser)
+                cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
+                    subcmd=f"{self.name} {cmd_cls.name}"
+                )
         return bench_parser
 
 
diff --git a/vllm/entrypoints/cli/launch.py b/vllm/entrypoints/cli/launch.py
index 9871a27da381..0af9f32c3ee3 100644
--- a/vllm/entrypoints/cli/launch.py
+++ b/vllm/entrypoints/cli/launch.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import argparse
+import signal
 
 import uvloop
 
@@ -110,6 +111,14 @@ def cmd_init() -> list[CLISubcommand]:
 
 async def run_launch_fastapi(args: argparse.Namespace) -> None:
     """Run the online serving layer with FastAPI (no GPU inference)."""
+
+    # Interrupt initialization if SIGTERM arrives before uvicorn installs
+    # its own signal handlers. Once uvicorn is running it replaces this.
+    def _interrupt_init(*_) -> None:
+        raise KeyboardInterrupt("terminated")
+
+    signal.signal(signal.SIGTERM, _interrupt_init)
+
     # 1. Socket binding
     listen_address, sock = setup_server(args)
 
diff --git a/vllm/entrypoints/cli/main.py b/vllm/entrypoints/cli/main.py
index 2261ef233134..ac7f9e0a7e02 100644
--- a/vllm/entrypoints/cli/main.py
+++ b/vllm/entrypoints/cli/main.py
@@ -7,6 +7,7 @@
 
 import importlib.metadata
 import sys
+from importlib.util import find_spec
 
 from vllm.logger import init_logger
 
@@ -34,47 +35,63 @@ def main():
 
     cli_env_setup()
 
-    # For 'vllm bench *': use CPU instead of UnspecifiedPlatform by default
-    if len(sys.argv) > 1 and sys.argv[1] == "bench":
-        logger.debug(
-            "Bench command detected, must ensure current platform is not "
-            "UnspecifiedPlatform to avoid device type inference error"
-        )
-        from vllm import platforms
+    # If `--omni` arg is passed to the CLI, delegate to vLLM Omni's entrypoint handling
+    if "--omni" in sys.argv:
+        # NOTE: Check the spec instead of importing directly here, since things could
+        # fail with ImportError due to mismatched versions if things are moved around.
+        spec = find_spec("vllm_omni")
+        if spec is None:
+            logger.error(
+                "--omni flag requires a valid instance of vllm-omni to be installed."
+            )
+            sys.exit(1)
 
-        if platforms.current_platform.is_unspecified():
-            from vllm.platforms.cpu import CpuPlatform
+        from vllm_omni.entrypoints.cli.main import main as omni_main
 
-            platforms.current_platform = CpuPlatform()
-            logger.info(
-                "Unspecified platform detected, switching to CPU Platform instead."
+        logger.info("Delegating entrypoint handling to vllm-omni")
+        omni_main()
+    else:
+        # For 'vllm bench *': use CPU instead of UnspecifiedPlatform by default
+        if len(sys.argv) > 1 and sys.argv[1] == "bench":
+            logger.debug(
+                "Bench command detected, must ensure current platform is not "
+                "UnspecifiedPlatform to avoid device type inference error"
             )
+            from vllm import platforms
 
-    parser = FlexibleArgumentParser(
-        description="vLLM CLI",
-        epilog=VLLM_SUBCMD_PARSER_EPILOG.format(subcmd="[subcommand]"),
-    )
-    parser.add_argument(
-        "-v",
-        "--version",
-        action="version",
-        version=importlib.metadata.version("vllm"),
-    )
-    subparsers = parser.add_subparsers(required=False, dest="subparser")
-    cmds = {}
-    for cmd_module in CMD_MODULES:
-        new_cmds = cmd_module.cmd_init()
-        for cmd in new_cmds:
-            cmd.subparser_init(subparsers).set_defaults(dispatch_function=cmd.cmd)
-            cmds[cmd.name] = cmd
-    args = parser.parse_args()
-    if args.subparser in cmds:
-        cmds[args.subparser].validate(args)
-
-    if hasattr(args, "dispatch_function"):
-        args.dispatch_function(args)
-    else:
-        parser.print_help()
+            if platforms.current_platform.is_unspecified():
+                from vllm.platforms.cpu import CpuPlatform
+
+                platforms.current_platform = CpuPlatform()
+                logger.info(
+                    "Unspecified platform detected, switching to CPU Platform instead."
+                )
+
+        parser = FlexibleArgumentParser(
+            description="vLLM CLI",
+            epilog=VLLM_SUBCMD_PARSER_EPILOG.format(subcmd="[subcommand]"),
+        )
+        parser.add_argument(
+            "-v",
+            "--version",
+            action="version",
+            version=importlib.metadata.version("vllm"),
+        )
+        subparsers = parser.add_subparsers(required=False, dest="subparser")
+        cmds = {}
+        for cmd_module in CMD_MODULES:
+            new_cmds = cmd_module.cmd_init()
+            for cmd in new_cmds:
+                cmd.subparser_init(subparsers).set_defaults(dispatch_function=cmd.cmd)
+                cmds[cmd.name] = cmd
+        args = parser.parse_args()
+        if args.subparser in cmds:
+            cmds[args.subparser].validate(args)
+
+        if hasattr(args, "dispatch_function"):
+            args.dispatch_function(args)
+        else:
+            parser.print_help()
 
 
 if __name__ == "__main__":
diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py
index 1ece3e4df18c..5972bd48aa46 100644
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -12,6 +12,9 @@
 from vllm.entrypoints.cli.types import CLISubcommand
 from vllm.entrypoints.openai.api_server import run_server, setup_server
 from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
+from vllm.entrypoints.openai.dp_supervisor import (
+    run_dp_supervisor,
+)
 from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
 from vllm.logger import init_logger
 from vllm.usage.usage_lib import UsageContext
@@ -21,7 +24,11 @@
 from vllm.v1.executor import Executor
 from vllm.v1.executor.multiproc_executor import MultiprocExecutor
 from vllm.v1.metrics.prometheus import setup_multiprocess_prometheus
-from vllm.v1.utils import APIServerProcessManager, wait_for_completion_or_failure
+from vllm.v1.utils import (
+    APIServerProcessManager,
+    RustFrontendProcessManager,
+    wait_for_completion_or_failure,
+)
 
 logger = init_logger(__name__)
 
@@ -62,30 +69,41 @@ def cmd(args: argparse.Namespace) -> None:
             args.api_server_count = 0
 
         # Detect LB mode for defaulting api_server_count.
+        # Multi-port: --data-parallel-multi-port-external-lb
         # External LB: --data-parallel-external-lb or --data-parallel-rank
         # Hybrid LB: --data-parallel-hybrid-lb or --data-parallel-start-rank
         is_external_lb = (
             args.data_parallel_external_lb or args.data_parallel_rank is not None
         )
-        is_hybrid_lb = (
-            args.data_parallel_hybrid_lb or args.data_parallel_start_rank is not None
-        )
 
-        if is_external_lb and is_hybrid_lb:
+        # If --data_parallel_multi_port_external_lb and --data_parallel_hybrid_lb
+        # are unset, default to hybrid if --data-parallel-start-rank is set
+        is_hybrid_lb = is_multi_port = False
+        if (
+            not args.data_parallel_hybrid_lb
+            and not args.data_parallel_multi_port_external_lb
+        ):
+            is_hybrid_lb = args.data_parallel_start_rank is not None
+        else:
+            is_hybrid_lb = args.data_parallel_hybrid_lb
+            is_multi_port = args.data_parallel_multi_port_external_lb
+
+        if sum([is_multi_port, is_external_lb, is_hybrid_lb]) > 1:
             raise ValueError(
-                "Cannot use both external and hybrid data parallel load "
-                "balancing modes. External LB is enabled via "
-                "--data-parallel-external-lb or --data-parallel-rank. "
-                "Hybrid LB is enabled via --data-parallel-hybrid-lb or "
-                "--data-parallel-start-rank. Use one mode or the other."
+                "Cannot use more than one data parallel load balancing mode. "
+                "Choose one of: --data-parallel-multi-port-external-lb, "
+                "--data-parallel-external-lb (or --data-parallel-rank), "
+                "--data-parallel-hybrid-lb (or --data-parallel-start-rank)."
             )
 
         # Default api_server_count if not explicitly set.
-        # - External LB: Leave as 1 (external LB handles distribution)
+        # - Multi-port: 1 (supervisor spawns one server per local DP rank)
+        # - Rust frontend: 1 (not applicable as it's multithreaded)
+        # - External LB: 1 (external LB handles distribution)
         # - Hybrid LB: Use local DP size (internal LB for local ranks only)
         # - Internal LB: Use full DP size
         if args.api_server_count is None:
-            if is_external_lb:
+            if is_multi_port or is_external_lb or envs.VLLM_RUST_FRONTEND_PATH:
                 args.api_server_count = 1
             elif is_hybrid_lb:
                 args.api_server_count = args.data_parallel_size_local or 1
@@ -102,6 +120,12 @@ def cmd(args: argparse.Namespace) -> None:
                         "Defaulting api_server_count to data_parallel_size (%d).",
                         args.api_server_count,
                     )
+        elif envs.VLLM_RUST_FRONTEND_PATH and args.api_server_count > 1:
+            logger.warning(
+                "Ignoring --api-server-count=%d when using rust front-end process",
+                args.api_server_count,
+            )
+            args.api_server_count = 1
 
         # Elastic EP currently only supports running with at most one API server.
         if getattr(args, "enable_elastic_ep", False) and args.api_server_count > 1:
@@ -112,9 +136,11 @@ def cmd(args: argparse.Namespace) -> None:
             )
             args.api_server_count = 1
 
-        if args.api_server_count < 1:
+        if is_multi_port:
+            run_dp_supervisor(args)
+        elif args.api_server_count < 1:
             run_headless(args)
-        elif args.api_server_count > 1:
+        elif args.api_server_count > 1 or envs.VLLM_RUST_FRONTEND_PATH:
             run_multi_api_server(args)
         else:
             # Single API server (this process).
@@ -136,13 +162,6 @@ def subparser_init(
         )
 
         serve_parser = make_arg_parser(serve_parser)
-        serve_parser.add_argument(
-            "--grpc",
-            action="store_true",
-            default=False,
-            help="Launch a gRPC server instead of the HTTP OpenAI-compatible "
-            "server. Requires: pip install vllm[grpc].",
-        )
         serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(subcmd=self.name)
         return serve_parser
 
@@ -225,7 +244,7 @@ def signal_handler(signum, frame):
     )
 
     try:
-        engine_manager.join_first()
+        engine_manager.monitor_engine_liveness()
     finally:
         timeout = None
         if shutdown_requested:
@@ -237,9 +256,15 @@ def signal_handler(signum, frame):
 
 def run_multi_api_server(args: argparse.Namespace):
     assert not args.headless
+    rust_frontend_path = envs.VLLM_RUST_FRONTEND_PATH
     num_api_servers: int = args.api_server_count
     assert num_api_servers > 0
 
+    if rust_frontend_path and num_api_servers > 1:
+        raise ValueError(
+            "VLLM_RUST_FRONTEND_PATH does not support api_server_count > 1"
+        )
+
     if num_api_servers > 1:
         setup_multiprocess_prometheus()
 
@@ -277,7 +302,9 @@ def signal_handler(signum, frame):
     dp_rank = parallel_config.data_parallel_rank
     assert parallel_config.local_engines_only or dp_rank == 0
 
-    api_server_manager: APIServerProcessManager | None = None
+    api_server_manager: APIServerProcessManager | RustFrontendProcessManager | None = (
+        None
+    )
 
     from vllm.v1.engine.utils import get_engine_zmq_addresses
 
@@ -286,37 +313,35 @@ def signal_handler(signum, frame):
     with launch_core_engines(
         vllm_config, executor_class, log_stats, addresses, num_api_servers
     ) as (local_engine_manager, coordinator, addresses, tensor_queue):
-        # Construct common args for the APIServerProcessManager up-front.
-        api_server_manager_kwargs = dict(
-            listen_address=listen_address,
-            sock=sock,
-            args=args,
-            num_servers=num_api_servers,
-            input_addresses=addresses.inputs,
-            output_addresses=addresses.outputs,
-            stats_update_address=coordinator.get_stats_publish_address()
-            if coordinator
-            else None,
-            tensor_queue=tensor_queue,
+        stats_update_address = (
+            coordinator.get_stats_publish_address() if coordinator else None
         )
 
-        # For dp ranks > 0 in external/hybrid DP LB modes, we must delay the
-        # start of the API servers until the local engine is started
-        # (after the launcher context manager exits),
-        # since we get the front-end stats update address from the coordinator
-        # via the handshake with the local engine.
-        if dp_rank == 0 or not parallel_config.local_engines_only:
-            # Start API servers using the manager.
-            api_server_manager = APIServerProcessManager(**api_server_manager_kwargs)
-
-    # Start API servers now if they weren't already started.
-    if api_server_manager is None:
-        api_server_manager_kwargs["stats_update_address"] = (
-            addresses.frontend_stats_publish_address
-        )
-        api_server_manager = APIServerProcessManager(**api_server_manager_kwargs)
+        if rust_frontend_path:
+            # Start rust front-end process.
+            api_server_manager = RustFrontendProcessManager(
+                binary_path=rust_frontend_path,
+                sock=sock,
+                args=args,
+                input_address=addresses.inputs[0],
+                output_address=addresses.outputs[0],
+                engine_count=parallel_config.data_parallel_size,
+                stats_update_address=stats_update_address,
+            )
+        else:
+            # Start API server(s).
+            api_server_manager = APIServerProcessManager(
+                listen_address=listen_address,
+                sock=sock,
+                args=args,
+                num_servers=num_api_servers,
+                input_addresses=addresses.inputs,
+                output_addresses=addresses.outputs,
+                stats_update_address=stats_update_address,
+                tensor_queue=tensor_queue,
+            )
 
-    # Wait for API servers
+    # Wait for API servers.
     try:
         wait_for_completion_or_failure(
             api_server_manager=api_server_manager,
diff --git a/vllm/entrypoints/generate/__init__.py b/vllm/entrypoints/generate/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/generate/beam_search/__init__.py b/vllm/entrypoints/generate/beam_search/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/generate/beam_search/offline.py b/vllm/entrypoints/generate/beam_search/offline.py
new file mode 100644
index 000000000000..29a0402fa13e
--- /dev/null
+++ b/vllm/entrypoints/generate/beam_search/offline.py
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import itertools
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Iterable, Sequence
+from typing import Any
+
+from tqdm import tqdm
+
+from vllm import PromptType, RequestOutput, TextPrompt, TokensPrompt
+from vllm.inputs import EngineInput
+from vllm.logger import init_logger
+from vllm.lora.request import LoRARequest
+from vllm.renderers import BaseRenderer
+from vllm.sampling_params import BeamSearchParams, SamplingParams
+
+from .utils import (
+    BeamSearchInstance,
+    BeamSearchOutput,
+    BeamSearchSequence,
+    create_sort_beams_key_function,
+)
+
+logger = init_logger(__name__)
+
+
+class BeamSearchOfflineMixin(ABC):
+    """Offline inference for beam search"""
+
+    renderer: BaseRenderer
+
+    def beam_search(
+        self,
+        prompts: list[TokensPrompt | TextPrompt],
+        params: BeamSearchParams,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        use_tqdm: bool = False,
+        concurrency_limit: int | None = None,
+    ) -> list[BeamSearchOutput]:
+        """
+        Generate sequences using beam search.
+
+        Args:
+            prompts: A list of prompts. Each prompt can be a string or a list
+                of token IDs.
+            params: The beam search parameters.
+            lora_request: LoRA request to use for generation, if any.
+            use_tqdm: Whether to use tqdm to display the progress bar.
+            concurrency_limit: The maximum number of concurrent requests.
+                If None, the number of concurrent requests is unlimited.
+        """
+        # TODO: how does beam search work together with length penalty,
+        # frequency, penalty, and stopping criteria, etc.?
+        beam_width = params.beam_width
+        max_tokens = params.max_tokens
+        temperature = params.temperature
+        ignore_eos = params.ignore_eos
+        length_penalty = params.length_penalty
+
+        tokenizer = self.renderer.get_tokenizer()
+        eos_token_id = tokenizer.eos_token_id
+        sort_beams_key = create_sort_beams_key_function(eos_token_id, length_penalty)
+
+        engine_inputs = self._preprocess_cmpl(prompts)
+        lora_requests = self._lora_request_to_seq(lora_request, len(engine_inputs))
+
+        if use_tqdm and concurrency_limit is not None:
+            logger.warning(
+                "Progress bar is not supported when using concurrency_limit. "
+                "Disabling progress bar."
+            )
+            use_tqdm = False
+
+        if concurrency_limit is None:
+            concurrency_limit = len(engine_inputs)
+
+        # generate 2 * beam_width candidates at each step
+        # following the huggingface transformers implementation
+        # at https://github.com/huggingface/transformers/blob/e15687fffe5c9d20598a19aeab721ae0a7580f8a/src/transformers/generation/beam_search.py#L534 # noqa
+        sampling_params = SamplingParams(
+            logprobs=2 * beam_width,
+            max_tokens=1,
+            temperature=temperature,
+            skip_clone=True,  # Internal beam search, safe to skip clone
+        )
+        instances: list[BeamSearchInstance] = []
+
+        for lora_req, prompt in zip(lora_requests, engine_inputs):
+            if prompt["type"] == "embeds":
+                raise NotImplementedError(
+                    "Embedding prompt not supported for beam search"
+                )
+
+            instances.append(
+                BeamSearchInstance(
+                    prompt,
+                    lora_request=lora_req,
+                    logprobs=None,
+                ),
+            )
+
+        for prompt_start in range(0, len(instances), concurrency_limit):
+            instances_batch = instances[prompt_start : prompt_start + concurrency_limit]
+
+            token_iter = range(max_tokens)
+            if use_tqdm:
+                token_iter = tqdm(
+                    token_iter, desc="Beam search", unit="token", unit_scale=False
+                )
+                logger.warning(
+                    "The progress bar shows the upper bound on token steps and "
+                    "may finish early due to stopping conditions. It does not "
+                    "reflect instance-level progress."
+                )
+            for _ in token_iter:
+                all_beams: list[BeamSearchSequence] = list(
+                    sum((instance.beams for instance in instances_batch), [])
+                )
+                pos = [0] + list(
+                    itertools.accumulate(
+                        len(instance.beams) for instance in instances_batch
+                    )
+                )
+                instance_start_and_end: list[tuple[int, int]] = list(
+                    zip(pos[:-1], pos[1:])
+                )
+
+                if len(all_beams) == 0:
+                    break
+
+                # only runs for one step
+                # we don't need to use tqdm here
+                output = self._render_and_run_requests(
+                    prompts=(beam.get_prompt() for beam in all_beams),
+                    params=self._params_to_seq(sampling_params, len(all_beams)),
+                    output_type=RequestOutput,
+                    lora_requests=[beam.lora_request for beam in all_beams],
+                    use_tqdm=False,
+                )
+
+                for (start, end), instance in zip(
+                    instance_start_and_end, instances_batch
+                ):
+                    instance_new_beams = []
+                    for i in range(start, end):
+                        current_beam = all_beams[i]
+                        result = output[i]
+
+                        if result.outputs[0].logprobs is not None:
+                            # if `result.outputs[0].logprobs` is None, it means
+                            # the sequence is completed because of the
+                            # max-model-len or abortion. we don't need to add
+                            # it to the new beams.
+                            logprobs = result.outputs[0].logprobs[0]
+                            for token_id, logprob_obj in logprobs.items():
+                                new_beam = BeamSearchSequence(
+                                    current_beam.orig_prompt,
+                                    tokens=current_beam.tokens + [token_id],
+                                    logprobs=current_beam.logprobs + [logprobs],
+                                    lora_request=current_beam.lora_request,
+                                    cum_logprob=current_beam.cum_logprob
+                                    + logprob_obj.logprob,
+                                )
+
+                                if token_id == eos_token_id and not ignore_eos:
+                                    instance.completed.append(new_beam)
+                                else:
+                                    instance_new_beams.append(new_beam)
+                    sorted_beams = sorted(
+                        instance_new_beams, key=sort_beams_key, reverse=True
+                    )
+                    instance.beams = sorted_beams[:beam_width]
+
+        outputs = []
+        for instance in instances:
+            instance.completed.extend(instance.beams)
+            sorted_completed = sorted(
+                instance.completed, key=sort_beams_key, reverse=True
+            )
+            best_beams = sorted_completed[:beam_width]
+
+            for beam in best_beams:
+                beam.text = tokenizer.decode(beam.tokens)
+
+            outputs.append(BeamSearchOutput(sequences=best_beams))
+
+        return outputs
+
+    @abstractmethod
+    def _preprocess_cmpl(
+        self,
+        prompts: Sequence[PromptType],
+        tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
+    ) -> Sequence[EngineInput]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _lora_request_to_seq(
+        self,
+        lora_request: LoRARequest | None | Sequence[LoRARequest | None],
+        num_requests: int,
+    ) -> Sequence[LoRARequest | None]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _params_to_seq(
+        self,
+        params: SamplingParams,
+        num_requests: int,
+    ) -> Sequence[SamplingParams]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _render_and_run_requests(
+        self,
+        prompts: Iterable[EngineInput],
+        params: Sequence[SamplingParams],
+        output_type: type[RequestOutput],
+        *,
+        lora_requests: Sequence[LoRARequest | None] | None = None,
+        priorities: Sequence[int] | None = None,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+    ):
+        raise NotImplementedError
diff --git a/vllm/entrypoints/generate/beam_search/online.py b/vllm/entrypoints/generate/beam_search/online.py
new file mode 100644
index 000000000000..1daef9529bee
--- /dev/null
+++ b/vllm/entrypoints/generate/beam_search/online.py
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import asyncio
+from abc import ABC
+from collections.abc import AsyncGenerator, Mapping
+
+import numpy as np
+
+from vllm import CompletionOutput, RequestOutput
+from vllm.engine.protocol import EngineClient
+from vllm.inputs import EngineInput
+from vllm.lora.request import LoRARequest
+from vllm.renderers import BaseRenderer
+from vllm.sampling_params import BeamSearchParams, SamplingParams
+from vllm.utils import random_uuid
+from vllm.utils.async_utils import collect_from_async_generator
+
+from .utils import BeamSearchSequence, create_sort_beams_key_function
+
+
+class BeamSearchOnlineMixin(ABC):
+    """online serving for beam search"""
+
+    renderer: BaseRenderer
+    engine_client: EngineClient
+
+    async def beam_search(
+        self,
+        prompt: EngineInput,
+        request_id: str,
+        params: BeamSearchParams,
+        lora_request: LoRARequest | None = None,
+        trace_headers: Mapping[str, str] | None = None,
+    ) -> AsyncGenerator[RequestOutput, None]:
+        beam_width = params.beam_width
+        max_tokens = params.max_tokens
+        ignore_eos = params.ignore_eos
+        temperature = params.temperature
+        length_penalty = params.length_penalty
+        include_stop_str_in_output = params.include_stop_str_in_output
+
+        tokenizer = self.renderer.get_tokenizer()
+        eos_token_id = tokenizer.eos_token_id
+        sort_beams_key = create_sort_beams_key_function(eos_token_id, length_penalty)
+
+        if prompt["type"] == "embeds":
+            raise NotImplementedError("Embedding prompt not supported for beam search")
+
+        # Extract prompt tokens and text based on model type
+        decoder_prompt = (
+            prompt if prompt["type"] != "enc_dec" else prompt["decoder_prompt"]
+        )
+        prompt_text = decoder_prompt.get("prompt")
+        prompt_token_ids = decoder_prompt["prompt_token_ids"]
+
+        tokenized_length = len(prompt_token_ids)
+
+        logprobs_num = 2 * beam_width
+        sampling_params = SamplingParams(
+            logprobs=logprobs_num,
+            max_tokens=1,
+            temperature=temperature,
+        )
+        all_beams = [
+            BeamSearchSequence(
+                orig_prompt=prompt,
+                tokens=prompt_token_ids,
+                cum_logprob=0,
+                logprobs=[],
+                lora_request=lora_request,
+            )
+        ]
+        completed = []
+
+        for _ in range(max_tokens):
+            tasks = []
+            request_id_batch = f"{request_id}-{random_uuid()}"
+
+            for i, beam in enumerate(all_beams):
+                prompt_item = beam.get_prompt()
+                lora_request_item = beam.lora_request
+                request_id_item = f"{request_id_batch}-beam-{i}"
+                task = asyncio.create_task(
+                    collect_from_async_generator(
+                        self.engine_client.generate(
+                            prompt_item,
+                            sampling_params,
+                            request_id_item,
+                            lora_request=lora_request_item,
+                            trace_headers=trace_headers,
+                        )
+                    )
+                )
+                tasks.append(task)
+
+            output = [x[0] for x in await asyncio.gather(*tasks)]
+
+            new_beams = []
+            # Store all new tokens generated by beam
+            all_beams_token_id = []
+            # Store the cumulative probability of all tokens
+            # generated by beam search
+            all_beams_logprob = []
+            # Iterate through all beam inference results
+            for i, result in enumerate(output):
+                current_beam = all_beams[i]
+
+                # check for error finish reason and abort beam search
+                if result.outputs[0].finish_reason == "error":
+                    # yield error output and terminate beam search
+                    yield RequestOutput(
+                        request_id=request_id,
+                        prompt=prompt_text,
+                        outputs=[
+                            CompletionOutput(
+                                index=0,
+                                text="",
+                                token_ids=[],
+                                cumulative_logprob=None,
+                                logprobs=None,
+                                finish_reason="error",
+                            )
+                        ],
+                        finished=True,
+                        prompt_token_ids=prompt_token_ids,
+                        prompt_logprobs=None,
+                    )
+                    return
+
+                if result.outputs[0].logprobs is not None:
+                    logprobs = result.outputs[0].logprobs[0]
+                    all_beams_token_id.extend(list(logprobs.keys()))
+                    all_beams_logprob.extend(
+                        [
+                            current_beam.cum_logprob + obj.logprob
+                            for obj in logprobs.values()
+                        ]
+                    )
+
+            # Handle the token for the end of sentence (EOS)
+            all_beams_token_id = np.array(all_beams_token_id)
+            all_beams_logprob = np.array(all_beams_logprob)
+
+            if not ignore_eos:
+                # Get the index position of eos token in all generated results
+                eos_idx = np.where(all_beams_token_id == eos_token_id)[0]
+                for idx in eos_idx:
+                    current_beam = all_beams[idx // logprobs_num]
+                    result = output[idx // logprobs_num]
+                    assert result.outputs[0].logprobs is not None
+                    logprobs_entry = result.outputs[0].logprobs[0]
+                    completed.append(
+                        BeamSearchSequence(
+                            orig_prompt=prompt,
+                            tokens=current_beam.tokens + [eos_token_id]
+                            if include_stop_str_in_output
+                            else current_beam.tokens,
+                            logprobs=current_beam.logprobs + [logprobs_entry],
+                            cum_logprob=float(all_beams_logprob[idx]),
+                            finish_reason="stop",
+                            stop_reason=eos_token_id,
+                        )
+                    )
+                # After processing, set the log probability of the eos condition
+                # to negative infinity.
+                all_beams_logprob[eos_idx] = -np.inf
+
+            # Processing non-EOS tokens
+            # Get indices of the top beam_width probabilities
+            topn_idx = np.argpartition(np.negative(all_beams_logprob), beam_width)[
+                :beam_width
+            ]
+
+            for idx in topn_idx:
+                current_beam = all_beams[idx // logprobs_num]
+                result = output[idx // logprobs_num]
+                token_id = int(all_beams_token_id[idx])
+                assert result.outputs[0].logprobs is not None
+                logprobs_entry = result.outputs[0].logprobs[0]
+                new_beams.append(
+                    BeamSearchSequence(
+                        orig_prompt=prompt,
+                        tokens=current_beam.tokens + [token_id],
+                        logprobs=current_beam.logprobs + [logprobs_entry],
+                        lora_request=current_beam.lora_request,
+                        cum_logprob=float(all_beams_logprob[idx]),
+                    )
+                )
+
+            all_beams = new_beams
+
+        completed.extend(all_beams)
+        sorted_completed = sorted(completed, key=sort_beams_key, reverse=True)
+        best_beams = sorted_completed[:beam_width]
+
+        for beam in best_beams:
+            if beam.tokens[-1] == eos_token_id and not ignore_eos:
+                # Skip the eos token in the text.
+                tokens = beam.tokens[tokenized_length:-1]
+            else:
+                tokens = beam.tokens[tokenized_length:]
+            beam.text = tokenizer.decode(tokens)
+
+        yield RequestOutput(
+            request_id=request_id,
+            prompt=prompt_text,
+            outputs=[
+                CompletionOutput(
+                    text=beam.text,  # type: ignore
+                    cumulative_logprob=beam.cum_logprob,
+                    token_ids=beam.tokens[tokenized_length:],
+                    index=i,
+                    logprobs=beam.logprobs,
+                    finish_reason=beam.finish_reason
+                    if beam.finish_reason is not None
+                    else "length",
+                    stop_reason=beam.stop_reason,
+                )
+                for (i, beam) in enumerate(best_beams)
+            ],
+            finished=True,
+            prompt_token_ids=prompt_token_ids,
+            prompt_logprobs=None,
+        )
diff --git a/vllm/beam_search.py b/vllm/entrypoints/generate/beam_search/utils.py
similarity index 100%
rename from vllm/beam_search.py
rename to vllm/entrypoints/generate/beam_search/utils.py
diff --git a/vllm/entrypoints/grpc_server.py b/vllm/entrypoints/grpc_server.py
index 5bb8ea1b4567..b9173b302ca3 100644
--- a/vllm/entrypoints/grpc_server.py
+++ b/vllm/entrypoints/grpc_server.py
@@ -26,17 +26,22 @@
 
 try:
     import grpc
+    from grpc_health.v1 import health_pb2_grpc
     from grpc_reflection.v1alpha import reflection
     from smg_grpc_proto import vllm_engine_pb2, vllm_engine_pb2_grpc
+    from smg_grpc_servicer.vllm.health_servicer import VllmHealthServicer
     from smg_grpc_servicer.vllm.servicer import VllmEngineServicer
-except ImportError:
+except ImportError as e:
     raise ImportError(
-        "smg-grpc-servicer is required for gRPC mode. "
-        "Install it with: pip install vllm[grpc]"
-    ) from None
+        "gRPC mode requires smg-grpc-servicer. "
+        "If not installed, run: pip install vllm[grpc]. "
+        "If already installed, there may be a broken import due to a "
+        "version mismatch — see the chained exception above for details."
+    ) from e
 
 import uvloop
 
+from vllm import envs
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.entrypoints.utils import log_version_and_model
 from vllm.logger import init_logger
@@ -95,9 +100,14 @@ async def serve_grpc(args: argparse.Namespace):
     # Add servicer to server
     vllm_engine_pb2_grpc.add_VllmEngineServicer_to_server(servicer, server)
 
+    # Add standard gRPC health service for Kubernetes probes
+    health_servicer = VllmHealthServicer(async_llm)
+    health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
+
     # Enable reflection for grpcurl and other tools
     service_names = (
         vllm_engine_pb2.DESCRIPTOR.services_by_name["VllmEngine"].full_name,
+        "grpc.health.v1.Health",
         reflection.SERVICE_NAME,
     )
     reflection.enable_server_reflection(service_names, server)
@@ -113,6 +123,18 @@ async def serve_grpc(args: argparse.Namespace):
         logger.info("vLLM gRPC server started on %s", address)
         logger.info("Server is ready to accept requests")
 
+        # Start periodic stats logging (mirrors the HTTP server's lifespan task)
+        if not args.disable_log_stats:
+
+            async def _force_log():
+                while True:
+                    await asyncio.sleep(envs.VLLM_LOG_STATS_INTERVAL)
+                    await async_llm.do_log_stats()
+
+            stats_task = asyncio.create_task(_force_log())
+        else:
+            stats_task = None
+
         # Handle shutdown signals
         loop = asyncio.get_running_loop()
         stop_event = asyncio.Event()
@@ -130,6 +152,12 @@ def signal_handler():
             logger.info("Interrupted by user")
     finally:
         logger.info("Shutting down vLLM gRPC server...")
+        if stats_task is not None:
+            stats_task.cancel()
+        try:
+            health_servicer.set_not_serving()
+        except Exception:  # broad: must not prevent server.stop() / shutdown()
+            logger.warning("Failed to set health status to NOT_SERVING", exc_info=True)
         await server.stop(grace=5.0)
         logger.info("gRPC server stopped")
         async_llm.shutdown()
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 3e9e2f6d40de..cb3f3444ebac 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import itertools
 from collections.abc import Callable, Iterable, Sequence
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -12,12 +11,6 @@
 from tqdm.auto import tqdm
 from typing_extensions import TypeVar, overload
 
-from vllm.beam_search import (
-    BeamSearchInstance,
-    BeamSearchOutput,
-    BeamSearchSequence,
-    create_sort_beams_key_function,
-)
 from vllm.config import (
     AttentionConfig,
     CompilationConfig,
@@ -34,6 +27,7 @@
     RunnerOption,
     TokenizerMode,
 )
+from vllm.config.quantization import QuantizationConfigArgs
 from vllm.distributed.weight_transfer.base import (
     WeightTransferInitRequest,
     WeightTransferUpdateRequest,
@@ -41,40 +35,20 @@
 from vllm.engine.arg_utils import EngineArgs
 from vllm.entrypoints.chat_utils import (
     ChatCompletionMessageParam,
-    ChatTemplateConfig,
     ChatTemplateContentFormatOption,
     load_chat_template,
 )
-from vllm.entrypoints.pooling.io_processor_factories import init_pooling_io_processors
-from vllm.entrypoints.pooling.score.utils import (
-    ScoreData,
-    ScoreMultiModalParam,
-    _cosine_similarity,
-    compress_token_type_ids,
-    compute_maxsim_score,
-    get_score_prompt,
-    score_data_to_prompts,
-    validate_score_input,
-)
+from vllm.entrypoints.generate.beam_search.offline import BeamSearchOfflineMixin
+from vllm.entrypoints.pooling.offline import PoolingOfflineMixin
 from vllm.entrypoints.utils import log_non_default_args
 from vllm.inputs import (
-    DataPrompt,
     EngineInput,
     PromptType,
-    SingletonPrompt,
-    TextPrompt,
-    TokensPrompt,
 )
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.layers.quantization import QuantizationMethods
-from vllm.outputs import (
-    ClassificationRequestOutput,
-    EmbeddingRequestOutput,
-    PoolingRequestOutput,
-    RequestOutput,
-    ScoringRequestOutput,
-)
+from vllm.outputs import PoolingRequestOutput, RequestOutput
 from vllm.platforms import current_platform
 from vllm.pooling_params import PoolingParams
 from vllm.renderers import ChatParams, merge_kwargs
@@ -83,8 +57,7 @@
     parse_model_prompt,
     prompt_to_seq,
 )
-from vllm.sampling_params import BeamSearchParams, RequestOutputKind, SamplingParams
-from vllm.tasks import PoolingTask
+from vllm.sampling_params import RequestOutputKind, SamplingParams
 from vllm.tokenizers import TokenizerLike
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils.counter import Counter
@@ -108,7 +81,7 @@
 _R = TypeVar("_R", default=Any)
 
 
-class LLM:
+class LLM(BeamSearchOfflineMixin, PoolingOfflineMixin):
     """An LLM for generating texts from given prompts and sampling parameters.
 
     This class includes a tokenizer, a language model (possibly distributed
@@ -207,6 +180,10 @@ class LLM:
             dictionary or an AttentionConfig instance. If a dictionary, it will
             be converted to an AttentionConfig. Allows specifying the attention
             backend and other attention-related settings.
+        spec_method: Top-level alias for `speculative_config["method"]`.
+        spec_model: Top-level alias for `speculative_config["model"]`.
+        spec_tokens: Top-level alias for
+            `speculative_config["num_speculative_tokens"]`.
         **kwargs: Arguments for [`EngineArgs`][vllm.EngineArgs].
 
     Note:
@@ -233,7 +210,7 @@ def __init__(
         tokenizer_revision: str | None = None,
         chat_template: Path | str | None = None,
         seed: int = 0,
-        gpu_memory_utilization: float = 0.9,
+        gpu_memory_utilization: float = 0.92,
         cpu_offload_gb: float = 0,
         offload_group_size: int = 0,
         offload_num_in_group: int = 1,
@@ -253,7 +230,11 @@ def __init__(
         attention_config: dict[str, Any] | AttentionConfig | None = None,
         kv_cache_memory_bytes: int | None = None,
         compilation_config: int | dict[str, Any] | CompilationConfig | None = None,
+        quantization_config: dict[str, Any] | QuantizationConfigArgs | None = None,
         logits_processors: list[str | type[LogitsProcessor]] | None = None,
+        spec_method: str | None = None,
+        spec_model: str | None = None,
+        spec_tokens: int | None = None,
         **kwargs: Any,
     ) -> None:
         """LLM constructor."""
@@ -336,7 +317,7 @@ def _make_config(value: Any, cls: type[_R]) -> _R:
                 f"LLM(data_parallel_size={_dp_size}) is not supported for single-"
                 "process usage and may hang. Please use "
                 "the explicit multi-process data-parallel example at "
-                "'examples/offline_inference/data_parallel.py'."
+                "'examples/features/data_parallel/data_parallel_offline.py'."
             )
 
         engine_args = EngineArgs(
@@ -373,7 +354,11 @@ def _make_config(value: Any, cls: type[_R]) -> _R:
             profiler_config=profiler_config_instance,
             attention_config=attention_config_instance,
             compilation_config=compilation_config_instance,
+            quantization_config=quantization_config,
             logits_processors=logits_processors,
+            spec_method=spec_method,
+            spec_model=spec_model,
+            spec_tokens=spec_tokens,
             **kwargs,
         )
 
@@ -390,22 +375,29 @@ def _make_config(value: Any, cls: type[_R]) -> _R:
 
         supported_tasks = self.llm_engine.get_supported_tasks()
         self.supported_tasks = supported_tasks
-        self.pooling_task = self.model_config.get_pooling_task(supported_tasks)
-        if self.pooling_task is not None:
-            logger.info("Supported pooling task: %s", self.pooling_task)
 
         self.runner_type = self.model_config.runner_type
         self.renderer = self.llm_engine.renderer
         self.chat_template = load_chat_template(chat_template)
-        self.io_processor = self.llm_engine.io_processor
         self.input_processor = self.llm_engine.input_processor
-        self.chat_template_config = ChatTemplateConfig(chat_template=self.chat_template)
-        self.pooling_io_processors = init_pooling_io_processors(
-            supported_tasks=supported_tasks,
-            model_config=self.model_config,
-            renderer=self.renderer,
-            chat_template_config=self.chat_template_config,
-        )
+
+        # The renderer thread pool is only consumed by the async renderer
+        # path; the synchronous `LLM` entrypoint runs multimodal
+        # preprocessing serially. Warn so the setting is not a silent
+        # no-op. See vllm-project/vllm#42901.
+        if self.model_config.renderer_num_workers > 1:
+            logger.warning_once(
+                "`renderer_num_workers=%d` was set, but the offline `LLM` "
+                "entrypoint uses the synchronous renderer path and runs "
+                "multimodal preprocessing serially across prompts. The "
+                "renderer thread pool is only consumed by the async "
+                "renderer path used by `vllm serve` / `AsyncLLM`, so this "
+                "setting has no effect here.",
+                self.model_config.renderer_num_workers,
+            )
+
+        PoolingOfflineMixin.__init__(self)
+
         # Cache for __repr__ to avoid repeated collective_rpc calls
         self._cached_repr: str | None = None
 
@@ -454,6 +446,7 @@ def generate(
         lora_request: Sequence[LoRARequest] | LoRARequest | None = None,
         priority: list[int] | None = None,
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ) -> list[RequestOutput]:
         """Generates the completions for the input prompts.
 
@@ -481,6 +474,7 @@ def generate(
                 of `prompts`, where each priority value corresponds to the prompt
                 at the same index.
             tokenization_kwargs: Overrides for `tokenizer.encode`.
+            mm_processor_kwargs: Overrides for `processor.__call__`.
 
         Returns:
             A list of `RequestOutput` objects containing the
@@ -505,6 +499,7 @@ def generate(
             lora_request=lora_request,
             tokenization_kwargs=tokenization_kwargs,
             priority=priority,
+            mm_processor_kwargs=mm_processor_kwargs,
         )
 
     def enqueue(
@@ -515,6 +510,7 @@ def enqueue(
         priority: list[int] | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ) -> list[str]:
         """Enqueue prompts for generation without waiting for completion.
 
@@ -529,6 +525,7 @@ def enqueue(
             priority: The priority of the requests, if any.
             use_tqdm: If True, shows a tqdm progress bar while adding requests.
             tokenization_kwargs: Overrides for `tokenizer.encode`.
+            mm_processor_kwargs: Overrides for `processor.__call__`.
 
         Returns:
             A list of request IDs for the enqueued requests.
@@ -547,6 +544,7 @@ def enqueue(
             lora_request=lora_request,
             priority=priority,
             tokenization_kwargs=tokenization_kwargs,
+            mm_processor_kwargs=mm_processor_kwargs,
         )
 
     @overload
@@ -684,167 +682,11 @@ def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]:
         """
         return self.llm_engine.apply_model(func)
 
-    def beam_search(
-        self,
-        prompts: list[TokensPrompt | TextPrompt],
-        params: BeamSearchParams,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        use_tqdm: bool = False,
-        concurrency_limit: int | None = None,
-    ) -> list[BeamSearchOutput]:
-        """
-        Generate sequences using beam search.
-
-        Args:
-            prompts: A list of prompts. Each prompt can be a string or a list
-                of token IDs.
-            params: The beam search parameters.
-            lora_request: LoRA request to use for generation, if any.
-            use_tqdm: Whether to use tqdm to display the progress bar.
-            concurrency_limit: The maximum number of concurrent requests.
-                If None, the number of concurrent requests is unlimited.
-        """
-        # TODO: how does beam search work together with length penalty,
-        # frequency, penalty, and stopping criteria, etc.?
-        beam_width = params.beam_width
-        max_tokens = params.max_tokens
-        temperature = params.temperature
-        ignore_eos = params.ignore_eos
-        length_penalty = params.length_penalty
-
-        tokenizer = self.renderer.get_tokenizer()
-        eos_token_id = tokenizer.eos_token_id
-        sort_beams_key = create_sort_beams_key_function(eos_token_id, length_penalty)
-
-        engine_inputs = self._preprocess_cmpl(prompts)
-        lora_requests = self._lora_request_to_seq(lora_request, len(engine_inputs))
-
-        if use_tqdm and concurrency_limit is not None:
-            logger.warning(
-                "Progress bar is not supported when using concurrency_limit. "
-                "Disabling progress bar."
-            )
-            use_tqdm = False
-
-        if concurrency_limit is None:
-            concurrency_limit = len(engine_inputs)
-
-        # generate 2 * beam_width candidates at each step
-        # following the huggingface transformers implementation
-        # at https://github.com/huggingface/transformers/blob/e15687fffe5c9d20598a19aeab721ae0a7580f8a/src/transformers/generation/beam_search.py#L534 # noqa
-        sampling_params = SamplingParams(
-            logprobs=2 * beam_width,
-            max_tokens=1,
-            temperature=temperature,
-            skip_clone=True,  # Internal beam search, safe to skip clone
-        )
-        instances: list[BeamSearchInstance] = []
-
-        for lora_req, prompt in zip(lora_requests, engine_inputs):
-            if prompt["type"] == "embeds":
-                raise NotImplementedError(
-                    "Embedding prompt not supported for beam search"
-                )
-
-            instances.append(
-                BeamSearchInstance(
-                    prompt,
-                    lora_request=lora_req,
-                    logprobs=None,
-                ),
-            )
-
-        for prompt_start in range(0, len(instances), concurrency_limit):
-            instances_batch = instances[prompt_start : prompt_start + concurrency_limit]
-
-            token_iter = range(max_tokens)
-            if use_tqdm:
-                token_iter = tqdm(
-                    token_iter, desc="Beam search", unit="token", unit_scale=False
-                )
-                logger.warning(
-                    "The progress bar shows the upper bound on token steps and "
-                    "may finish early due to stopping conditions. It does not "
-                    "reflect instance-level progress."
-                )
-            for _ in token_iter:
-                all_beams: list[BeamSearchSequence] = list(
-                    sum((instance.beams for instance in instances_batch), [])
-                )
-                pos = [0] + list(
-                    itertools.accumulate(
-                        len(instance.beams) for instance in instances_batch
-                    )
-                )
-                instance_start_and_end: list[tuple[int, int]] = list(
-                    zip(pos[:-1], pos[1:])
-                )
-
-                if len(all_beams) == 0:
-                    break
-
-                # only runs for one step
-                # we don't need to use tqdm here
-                output = self._render_and_run_requests(
-                    prompts=(beam.get_prompt() for beam in all_beams),
-                    params=self._params_to_seq(sampling_params, len(all_beams)),
-                    output_type=RequestOutput,
-                    lora_requests=[beam.lora_request for beam in all_beams],
-                    use_tqdm=False,
-                )
-
-                for (start, end), instance in zip(
-                    instance_start_and_end, instances_batch
-                ):
-                    instance_new_beams = []
-                    for i in range(start, end):
-                        current_beam = all_beams[i]
-                        result = output[i]
-
-                        if result.outputs[0].logprobs is not None:
-                            # if `result.outputs[0].logprobs` is None, it means
-                            # the sequence is completed because of the
-                            # max-model-len or abortion. we don't need to add
-                            # it to the new beams.
-                            logprobs = result.outputs[0].logprobs[0]
-                            for token_id, logprob_obj in logprobs.items():
-                                new_beam = BeamSearchSequence(
-                                    current_beam.orig_prompt,
-                                    tokens=current_beam.tokens + [token_id],
-                                    logprobs=current_beam.logprobs + [logprobs],
-                                    lora_request=current_beam.lora_request,
-                                    cum_logprob=current_beam.cum_logprob
-                                    + logprob_obj.logprob,
-                                )
-
-                                if token_id == eos_token_id and not ignore_eos:
-                                    instance.completed.append(new_beam)
-                                else:
-                                    instance_new_beams.append(new_beam)
-                    sorted_beams = sorted(
-                        instance_new_beams, key=sort_beams_key, reverse=True
-                    )
-                    instance.beams = sorted_beams[:beam_width]
-
-        outputs = []
-        for instance in instances:
-            instance.completed.extend(instance.beams)
-            sorted_completed = sorted(
-                instance.completed, key=sort_beams_key, reverse=True
-            )
-            best_beams = sorted_completed[:beam_width]
-
-            for beam in best_beams:
-                beam.text = tokenizer.decode(beam.tokens)
-
-            outputs.append(BeamSearchOutput(sequences=best_beams))
-
-        return outputs
-
     def _preprocess_cmpl(
         self,
         prompts: Sequence[PromptType],
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ) -> Sequence[EngineInput]:
         """
         Convert prompt inputs from LLM APIs (other than [LLM.chat][]) into
@@ -864,15 +706,29 @@ def _preprocess_cmpl(
         tok_params = renderer.default_cmpl_tok_params.with_kwargs(
             **(tokenization_kwargs or {})
         )
+        prompt_extras = (
+            None
+            if mm_processor_kwargs is None
+            else {"mm_processor_kwargs": mm_processor_kwargs}
+        )
 
-        return renderer.render_cmpl(parsed_prompts, tok_params)
+        return renderer.render_cmpl(
+            parsed_prompts,
+            tok_params,
+            prompt_extras=prompt_extras,
+        )
 
     def _preprocess_cmpl_one(
         self,
         prompt: PromptType,
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ) -> EngineInput:
-        (engine_input,) = self._preprocess_cmpl([prompt], tokenization_kwargs)
+        (engine_input,) = self._preprocess_cmpl(
+            [prompt],
+            tokenization_kwargs,
+            mm_processor_kwargs=mm_processor_kwargs,
+        )
         return engine_input
 
     def _preprocess_chat(
@@ -907,19 +763,28 @@ def _preprocess_chat(
                     add_generation_prompt=add_generation_prompt,
                     continue_final_message=continue_final_message,
                     tools=tools,
-                    tokenize=is_mistral_tokenizer(renderer.tokenizer),
+                    tokenize=(
+                        is_mistral_tokenizer(renderer.tokenizer)
+                        or self.model_config.enable_prompt_embeds
+                    ),
                 ),
             ),
+            mm_processor_kwargs=mm_processor_kwargs,
         )
         tok_params = renderer.default_chat_tok_params.with_kwargs(
             **(tokenization_kwargs or {})
         )
+        prompt_extras = (
+            None
+            if mm_processor_kwargs is None
+            else {"mm_processor_kwargs": mm_processor_kwargs}
+        )
 
         _, engine_inputs = renderer.render_chat(
             conversations,
             chat_params,
             tok_params,
-            prompt_extras={"mm_processor_kwargs": mm_processor_kwargs},
+            prompt_extras=prompt_extras,
         )
 
         return engine_inputs
@@ -1044,646 +909,83 @@ def chat(
             mm_processor_kwargs=mm_processor_kwargs,
         )
 
-    def encode(
+    def enqueue_chat(
         self,
-        prompts: PromptType | Sequence[PromptType] | DataPrompt,
-        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
-        *,
+        messages: list[ChatCompletionMessageParam]
+        | Sequence[list[ChatCompletionMessageParam]],
+        sampling_params: SamplingParams | Sequence[SamplingParams] | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        pooling_task: PoolingTask | None = None,
+        lora_request: Sequence[LoRARequest] | LoRARequest | None = None,
+        priority: list[int] | None = None,
+        chat_template: str | None = None,
+        chat_template_content_format: ChatTemplateContentFormatOption = "auto",
+        add_generation_prompt: bool = True,
+        continue_final_message: bool = False,
+        tools: list[dict[str, Any]] | None = None,
+        chat_template_kwargs: dict[str, Any] | None = None,
         tokenization_kwargs: dict[str, Any] | None = None,
-    ) -> list[PoolingRequestOutput]:
-        """Apply pooling to the hidden states corresponding to the input
-        prompts.
+        mm_processor_kwargs: dict[str, Any] | None = None,
+    ) -> list[str]:
+        """Enqueue chat conversations for generation without waiting.
 
-        This class automatically batches the given prompts, considering
-        the memory constraint. For the best performance, put all of your prompts
-        into a single list and pass it to this method.
+        This method renders chat conversations and adds the resulting requests
+        to the engine queue. Use wait_for_completion() to get results. To
+        guarantee that all requests are queued before scheduling starts, pause
+        scheduling with sleep(level=0) before calling this method and resume it
+        with wake_up(tags=["scheduling"]) afterward.
 
         Args:
-            prompts: The prompts to the LLM. You may pass a sequence of prompts
-                for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompt.
-            pooling_params: The pooling parameters for pooling. If None, we
-                use the default pooling parameters.
-            use_tqdm: If `True`, shows a tqdm progress bar.
-                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
-                it is used to create the progress bar.
-                If `False`, no progress bar is created.
+            messages: A sequence of conversations or a single conversation.
+                Each conversation is represented as a list of messages.
+            sampling_params: The sampling parameters for text generation.
+                If None, we use the default sampling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar while rendering
+                conversations.
             lora_request: LoRA request to use for generation, if any.
-            pooling_task: Override the pooling task to use.
+            priority: The priority of the requests, if any.
+            chat_template: The template to use for structuring the chat.
+            chat_template_content_format: The format to render message content.
+            add_generation_prompt: If True, adds a generation template
+                to each message.
+            continue_final_message: If True, continues the final message in
+                the conversation instead of starting a new one.
+            tools: Tools to make available to the model, if any.
+            chat_template_kwargs: Additional kwargs to pass to the chat
+                template.
             tokenization_kwargs: Overrides for `tokenizer.encode`.
+            mm_processor_kwargs: Overrides for `processor.__call__`.
 
         Returns:
-            A list of `PoolingRequestOutput` objects containing the
-            pooled hidden states in the same order as the input prompts.
+            A list of request IDs for the enqueued requests.
         """
-
-        self._verify_pooling_task(pooling_task)
-
-        if isinstance(prompts, dict) and "data" in prompts:
-            if self.io_processor is None:
-                raise ValueError(
-                    "No IOProcessor plugin installed. Please refer "
-                    "to the documentation and to the "
-                    "'prithvi_geospatial_mae_io_processor' "
-                    "offline inference example for more details."
-                )
-
-            # Validate the request data is valid for the loaded plugin
-            prompt_data = prompts.get("data")
-            if prompt_data is None:
-                raise ValueError(
-                    "The 'data' field of the prompt is expected to contain "
-                    "the prompt data and it cannot be None. "
-                    "Refer to the documentation of the IOProcessor "
-                    "in use for more details."
-                )
-            validated_prompt = self.io_processor.parse_data(prompt_data)
-
-            # obtain the actual model prompts from the pre-processor
-            prompts = self.io_processor.pre_process(prompt=validated_prompt)
-            prompts_seq = prompt_to_seq(prompts)
-
-            params_seq: Sequence[PoolingParams] = [
-                self.io_processor.merge_pooling_params(param)
-                for param in self._params_to_seq(
-                    pooling_params,
-                    len(prompts_seq),
-                )
-            ]
-            for p in params_seq:
-                if p.task is None:
-                    p.task = "plugin"
-
-            outputs = self._run_completion(
-                prompts=prompts_seq,
-                params=params_seq,
-                output_type=PoolingRequestOutput,
-                use_tqdm=use_tqdm,
-                lora_request=lora_request,
-                tokenization_kwargs=tokenization_kwargs,
-            )
-
-            # get the post-processed model outputs
-            assert self.io_processor is not None
-            processed_outputs = self.io_processor.post_process(outputs)
-
-            return [
-                PoolingRequestOutput[Any](
-                    request_id="",
-                    outputs=processed_outputs,
-                    num_cached_tokens=getattr(
-                        processed_outputs, "num_cached_tokens", 0
-                    ),
-                    prompt_token_ids=[],
-                    finished=True,
-                )
-            ]
-        else:
-            if pooling_params is None:
-                # Use default pooling params.
-                pooling_params = PoolingParams()
-
-            prompts_seq = prompt_to_seq(prompts)
-            params_seq = self._params_to_seq(pooling_params, len(prompts_seq))
-
-            for param in params_seq:
-                if param.task is None:
-                    param.task = pooling_task
-                elif param.task != pooling_task:
-                    msg = (
-                        f"You cannot overwrite {param.task=!r} with {pooling_task=!r}!"
-                    )
-                    raise ValueError(msg)
-
-            if pooling_task in self.pooling_io_processors:
-                io_processor = self.pooling_io_processors[pooling_task]
-                processor_inputs = io_processor.pre_process_offline(
-                    prompts_seq, tokenization_kwargs
-                )
-                seq_lora_requests = self._lora_request_to_seq(
-                    lora_request, len(prompts_seq)
-                )
-                seq_priority = self._priority_to_seq(None, len(prompts))
-
-                self._render_and_add_requests(
-                    prompts=processor_inputs,
-                    params=params_seq,
-                    lora_requests=seq_lora_requests,
-                    priorities=seq_priority,
-                )
-
-                outputs = self._run_engine(
-                    use_tqdm=use_tqdm, output_type=PoolingRequestOutput
-                )
-                outputs = io_processor.post_process_offline(outputs)
-            else:
-                outputs = self._run_completion(
-                    prompts=prompts_seq,
-                    params=params_seq,
-                    output_type=PoolingRequestOutput,
-                    use_tqdm=use_tqdm,
-                    lora_request=lora_request,
-                    tokenization_kwargs=tokenization_kwargs,
-                )
-        return outputs
-
-    def _verify_pooling_task(self, pooling_task: PoolingTask | None):
-        if self.runner_type != "pooling":
-            raise ValueError(
-                "LLM.encode() is only supported for pooling models. "
-                "Try passing `--runner pooling` to use the model as a "
-                "pooling model."
-            )
-
-        if pooling_task is None:
-            raise ValueError(
-                "pooling_task required for `LLM.encode`\n"
-                "Please use one of the more specific methods or set the "
-                "pooling_task when using `LLM.encode`:\n"
-                "  - For embeddings, use `LLM.embed(...)` "
-                'or `pooling_task="embed"`.\n'
-                "  - For classification logits, use `LLM.classify(...)` "
-                'or `pooling_task="classify"`.\n'
-                "  - For similarity scores, use `LLM.score(...)`.\n"
-                "  - For rewards, use `LLM.reward(...)` "
-                'or `pooling_task="token_classify"`\n'
-                "  - For token classification, "
-                'use `pooling_task="token_classify"`\n'
-                '  - For multi-vector retrieval, use `pooling_task="token_embed"`'
-            )
-
-        if (
-            pooling_task in ("embed", "token_embed")
-            and pooling_task not in self.supported_tasks
-        ):
-            raise ValueError(
-                "Embedding API is not supported by this model. "
-                "Try converting the model using `--convert embed`."
-            )
-
-        if (
-            pooling_task in ("classify", "token_classify")
-            and pooling_task not in self.supported_tasks
-        ):
+        model_config = self.model_config
+        runner_type = model_config.runner_type
+        if runner_type != "generate":
             raise ValueError(
-                "Classification API is not supported by this model. "
-                "Try converting the model using `--convert classify`."
+                "LLM.enqueue_chat() is only supported for generative models. "
+                "Try passing `--runner generate` to use the model as a "
+                "generative model."
             )
 
-        # plugin task uses io_processor.parse_request to verify inputs
-        if pooling_task != "plugin" and pooling_task != self.pooling_task:
-            if pooling_task not in self.supported_tasks:
-                raise ValueError(
-                    f"Unsupported task: {pooling_task!r} "
-                    f"Supported tasks: {self.supported_tasks}"
-                )
-            else:
-                logger.warning_once(
-                    "Pooling multitask support is deprecated and will "
-                    "be removed in v0.20. When the default pooling task is "
-                    "not what you want, you need to manually specify it "
-                    'via PoolerConfig(task="%s"). ',
-                    pooling_task,
-                )
-
-    def embed(
-        self,
-        prompts: PromptType | Sequence[PromptType],
-        *,
-        use_tqdm: bool | Callable[..., tqdm] = True,
-        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
-    ) -> list[EmbeddingRequestOutput]:
-        """
-        Generate an embedding vector for each prompt.
-
-        This class automatically batches the given prompts, considering
-        the memory constraint. For the best performance, put all of your prompts
-        into a single list and pass it to this method.
-
-        Args:
-            prompts: The prompts to the LLM. You may pass a sequence of prompts
-                for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompt.
-            pooling_params: The pooling parameters for pooling. If None, we
-                use the default pooling parameters.
-            use_tqdm: If `True`, shows a tqdm progress bar.
-                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
-                it is used to create the progress bar.
-                If `False`, no progress bar is created.
-            lora_request: LoRA request to use for generation, if any.
-            tokenization_kwargs: Overrides for `tokenizer.encode`.
-
-        Returns:
-            A list of `EmbeddingRequestOutput` objects containing the
-            embedding vectors in the same order as the input prompts.
-        """
-
-        items = self.encode(
-            prompts,
-            use_tqdm=use_tqdm,
-            pooling_params=pooling_params,
-            lora_request=lora_request,
-            pooling_task="embed",
-            tokenization_kwargs=tokenization_kwargs,
-        )
-
-        return [EmbeddingRequestOutput.from_base(item) for item in items]
-
-    def classify(
-        self,
-        prompts: PromptType | Sequence[PromptType],
-        *,
-        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
-        use_tqdm: bool | Callable[..., tqdm] = True,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
-    ) -> list[ClassificationRequestOutput]:
-        """
-        Generate class logits for each prompt.
-
-        This class automatically batches the given prompts, considering
-        the memory constraint. For the best performance, put all of your prompts
-        into a single list and pass it to this method.
-
-        Args:
-            prompts: The prompts to the LLM. You may pass a sequence of prompts
-                for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompt.
-            pooling_params: The pooling parameters for pooling. If None, we
-                use the default pooling parameters.
-            use_tqdm: If `True`, shows a tqdm progress bar.
-                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
-                it is used to create the progress bar.
-                If `False`, no progress bar is created.
-            lora_request: LoRA request to use for generation, if any.
-            tokenization_kwargs: Overrides for `tokenizer.encode`.
-
-        Returns:
-            A list of `ClassificationRequestOutput` objects containing the
-            embedding vectors in the same order as the input prompts.
-        """
-
-        items = self.encode(
-            prompts,
-            use_tqdm=use_tqdm,
-            pooling_params=pooling_params,
-            lora_request=lora_request,
-            pooling_task="classify",
-            tokenization_kwargs=tokenization_kwargs,
-        )
-
-        return [ClassificationRequestOutput.from_base(item) for item in items]
-
-    def reward(
-        self,
-        prompts: PromptType | Sequence[PromptType],
-        /,
-        *,
-        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
-        use_tqdm: bool | Callable[..., tqdm] = True,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
-    ) -> list[PoolingRequestOutput]:
-        """
-        Generate rewards for each prompt.
-
-        Args:
-            prompts: The prompts to the LLM. You may pass a sequence of prompts
-                for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompt.
-            pooling_params: The pooling parameters for pooling. If None, we
-                use the default pooling parameters.
-            use_tqdm: If `True`, shows a tqdm progress bar.
-                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
-                it is used to create the progress bar.
-                If `False`, no progress bar is created.
-            lora_request: LoRA request to use for generation, if any.
-            tokenization_kwargs: Overrides for `tokenizer.encode`.
-
-        Returns:
-            A list of `PoolingRequestOutput` objects containing the
-            pooled hidden states in the same order as the input prompts.
-        """
-        return self.encode(
-            prompts,
-            use_tqdm=use_tqdm,
-            lora_request=lora_request,
-            pooling_params=pooling_params,
-            pooling_task="token_classify",
-            tokenization_kwargs=tokenization_kwargs,
-        )
-
-    def _embedding_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        *,
-        use_tqdm: bool | Callable[..., tqdm],
-        pooling_params: PoolingParams | None,
-        lora_request: list[LoRARequest] | LoRARequest | None,
-        tokenization_kwargs: dict[str, Any],
-    ) -> list[ScoringRequestOutput]:
-        tokenizer = self.get_tokenizer()
-
-        input_texts: list[str] = []
-        for text in data_1 + data_2:
-            if not isinstance(text, str):
-                raise NotImplementedError(
-                    "Embedding scores currently do not support multimodal input."
-                )
-            input_texts.append(text)
-
-        encoded_output = self.encode(
-            input_texts,
-            use_tqdm=use_tqdm,
-            lora_request=lora_request,
-            pooling_params=pooling_params,
-            pooling_task="embed",
-            tokenization_kwargs=tokenization_kwargs,
-        )
-
-        encoded_output_1 = encoded_output[0 : len(data_1)]
-        encoded_output_2 = encoded_output[len(data_1) :]
-
-        if len(encoded_output_1) == 1:
-            encoded_output_1 = encoded_output_1 * len(encoded_output_2)
-
-        scores = _cosine_similarity(
-            tokenizer=tokenizer,
-            embed_1=encoded_output_1,
-            embed_2=encoded_output_2,
-        )
-
-        return [ScoringRequestOutput.from_base(item) for item in scores]
-
-    def _late_interaction_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        *,
-        use_tqdm: bool | Callable[..., tqdm],
-        pooling_params: PoolingParams | None,
-        lora_request: list[LoRARequest] | LoRARequest | None,
-        tokenization_kwargs: dict[str, Any],
-    ) -> list[ScoringRequestOutput]:
-        """
-        Late interaction scoring (ColBERT MaxSim).
-
-        Encodes queries and documents into per-token embeddings, then computes
-        MaxSim: sum over query tokens of max similarity to any document token.
-        """
-        from vllm.outputs import PoolingOutput
-
-        tokenizer = self.get_tokenizer()
-
-        # Convert ScoreData to PromptType (handles both text and multimodal)
-        model_config = self.model_config
-        prompts_1 = score_data_to_prompts(data_1, "query", model_config)
-        prompts_2 = score_data_to_prompts(data_2, "document", model_config)
+        if sampling_params is None:
+            sampling_params = self.get_default_sampling_params()
 
-        encoded_output: list[PoolingRequestOutput] = self.encode(
-            prompts_1 + prompts_2,
+        return self._add_chat_requests(
+            messages=messages,
+            params=sampling_params,
             use_tqdm=use_tqdm,
             lora_request=lora_request,
-            pooling_params=pooling_params,
-            pooling_task="token_embed",
+            priority=priority,
+            chat_template=chat_template,
+            chat_template_content_format=chat_template_content_format,
+            chat_template_kwargs=chat_template_kwargs,
+            add_generation_prompt=add_generation_prompt,
+            continue_final_message=continue_final_message,
+            tools=tools,
             tokenization_kwargs=tokenization_kwargs,
+            mm_processor_kwargs=mm_processor_kwargs,
         )
 
-        encoded_output_1: list[PoolingRequestOutput] = encoded_output[: len(prompts_1)]
-        encoded_output_2: list[PoolingRequestOutput] = encoded_output[len(prompts_1) :]
-
-        if len(encoded_output_1) == 1:
-            encoded_output_1 = encoded_output_1 * len(encoded_output_2)
-
-        # Compute MaxSim scores
-        scores: list[PoolingRequestOutput] = []
-        padding: list[int] = []
-        if (pad_token_id := tokenizer.pad_token_id) is not None:
-            padding = [pad_token_id]
-
-        for emb_1, emb_2 in zip(encoded_output_1, encoded_output_2):
-            # emb_1.outputs.data: [query_len, dim]
-            # emb_2.outputs.data: [doc_len, dim]
-            q_emb = emb_1.outputs.data
-            d_emb = emb_2.outputs.data
-
-            maxsim_score = compute_maxsim_score(q_emb, d_emb)
-
-            tokens = emb_1.prompt_token_ids + padding + emb_2.prompt_token_ids
-
-            scores.append(
-                PoolingRequestOutput(
-                    request_id=f"{emb_1.request_id}_{emb_2.request_id}",
-                    outputs=PoolingOutput(data=maxsim_score),
-                    prompt_token_ids=tokens,
-                    num_cached_tokens=emb_1.num_cached_tokens + emb_2.num_cached_tokens,
-                    finished=True,
-                )
-            )
-
-        return [ScoringRequestOutput.from_base(item) for item in scores]
-
-    def _cross_encoding_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        *,
-        use_tqdm: bool | Callable[..., tqdm],
-        pooling_params: PoolingParams | None,
-        lora_request: list[LoRARequest] | LoRARequest | None,
-        tokenization_kwargs: dict[str, Any],
-        score_template: str | None,
-    ) -> list[ScoringRequestOutput]:
-        model_config = self.model_config
-        tokenizer = self.get_tokenizer()
-
-        if is_mistral_tokenizer(tokenizer):
-            raise ValueError("Score API is not supported for Mistral tokenizer")
-
-        if len(data_1) == 1:
-            data_1 = data_1 * len(data_2)
-
-        if pooling_params is None:
-            pooling_params = PoolingParams(task="classify")
-        elif pooling_params.task is None:
-            pooling_params.task = "classify"
-
-        pooling_params_list = list[PoolingParams]()
-
-        prompts = list[PromptType]()
-
-        input_pairs = [(t1, t2) for t1, t2 in zip(data_1, data_2)]
-
-        for q, d in input_pairs:
-            _, engine_prompt = get_score_prompt(
-                model_config=model_config,
-                data_1=q,
-                data_2=d,
-                tokenizer=tokenizer,
-                tokenization_kwargs=tokenization_kwargs,
-                score_template=score_template,
-            )
-
-            if token_type_ids := engine_prompt.pop("token_type_ids", None):
-                params = pooling_params.clone()
-                compressed = compress_token_type_ids(token_type_ids)
-                params.extra_kwargs = {"compressed_token_type_ids": compressed}
-                pooling_params_list.append(params)
-            else:
-                pooling_params_list.append(pooling_params)
-
-            prompts.append(engine_prompt)
-
-        outputs = self._run_completion(
-            prompts=prompts,
-            params=pooling_params_list,
-            output_type=PoolingRequestOutput,
-            use_tqdm=use_tqdm,
-            lora_request=lora_request,
-        )
-
-        return [ScoringRequestOutput.from_base(item) for item in outputs]
-
-    def score(
-        self,
-        data_1: SingletonPrompt
-        | Sequence[SingletonPrompt]
-        | ScoreMultiModalParam
-        | list[ScoreMultiModalParam],
-        data_2: SingletonPrompt
-        | Sequence[SingletonPrompt]
-        | ScoreMultiModalParam
-        | list[ScoreMultiModalParam],
-        /,
-        *,
-        use_tqdm: bool | Callable[..., tqdm] = True,
-        pooling_params: PoolingParams | None = None,
-        lora_request: list[LoRARequest] | LoRARequest | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
-        chat_template: str | None = None,
-    ) -> list[ScoringRequestOutput]:
-        """Generate similarity scores for all pairs `<text,text_pair>` or
-          `<multi-modal data, multi-modal data pair>`.
-
-        The inputs can be `1 -> 1`, `1 -> N` or `N -> N`.
-        In the `1 - N` case the `data_1` input will be replicated `N`
-        times to pair with the `data_2` inputs.
-        The input pairs are used to build a list of prompts for the
-        cross encoder model. This class automatically batches the prompts,
-        considering the memory constraint. For the best performance, put all
-        of your inputs into a single list and pass it to this method.
-
-        Supports both text and multi-modal data (images, etc.) when used with
-        appropriate multi-modal models. For multi-modal inputs, ensure the
-        prompt structure matches the model's expected input format.
-
-        Args:
-            data_1: Can be a single prompt, a list of prompts or
-                `ScoreMultiModalParam`, which can contain either text or
-                multi-modal data. When a list, it must have the same length as
-                the `data_2` list.
-            data_2: The data to pair with the query to form the input to
-                the LLM. Can be text or multi-modal data. See [PromptType]
-                [vllm.inputs.PromptType] for more details about the format of
-                each prompt.
-            pooling_params: The pooling parameters for pooling. If None, we
-                use the default pooling parameters.
-            use_tqdm: If `True`, shows a tqdm progress bar.
-                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
-                it is used to create the progress bar.
-                If `False`, no progress bar is created.
-            lora_request: LoRA request to use for generation, if any.
-            chat_template: The chat template to use for the scoring. If None, we
-                use the model's default chat template.
-            tokenization_kwargs: Overrides for `tokenizer.encode`.
-        Returns:
-            A list of `ScoringRequestOutput` objects containing the
-            generated scores in the same order as the input prompts.
-        """
-        model_config = self.model_config
-
-        runner_type = model_config.runner_type
-        if runner_type != "pooling":
-            raise ValueError(
-                "LLM.score() is only supported for pooling models. "
-                "Try passing `--runner pooling` to use the model as a "
-                "pooling model."
-            )
-
-        supported_tasks = self.supported_tasks
-        score_type = self.model_config.score_type
-        is_late_interaction = score_type == "late-interaction"
-        is_cross_encoder = score_type == "cross-encoder"
-
-        # Late interaction models (e.g., ColBERT) use token_embed for scoring
-        if not is_late_interaction and all(
-            t not in supported_tasks for t in ("embed", "classify")
-        ):
-            raise ValueError(
-                "Score API is not supported by this model. "
-                "Try converting the model using "
-                "`--convert embed` or `--convert classify`."
-            )
-
-        if is_cross_encoder and getattr(model_config.hf_config, "num_labels", 0) != 1:
-            raise ValueError("Score API is only enabled for num_labels == 1.")
-
-        if not is_cross_encoder and chat_template is not None:
-            raise ValueError(
-                "chat_template is only supported for cross-encoder models."
-            )
-
-        is_multimodal_model = model_config.is_multimodal_model
-        architecture = model_config.architecture
-
-        score_data_1, score_data_2 = validate_score_input(
-            data_1,  # type: ignore[arg-type]
-            data_2,  # type: ignore[arg-type]
-            is_multimodal_model=is_multimodal_model,
-            architecture=architecture,
-        )
-
-        renderer = self.renderer
-        tok_params = renderer.default_cmpl_tok_params.with_kwargs(
-            **(tokenization_kwargs or {})
-        )
-        encode_kwargs = tok_params.get_encode_kwargs()
-
-        if is_cross_encoder:
-            return self._cross_encoding_score(
-                score_data_1,
-                score_data_2,
-                use_tqdm=use_tqdm,
-                pooling_params=pooling_params,
-                lora_request=lora_request,
-                tokenization_kwargs=encode_kwargs,
-                score_template=chat_template,
-            )
-        elif is_late_interaction:
-            return self._late_interaction_score(
-                score_data_1,
-                score_data_2,
-                use_tqdm=use_tqdm,
-                pooling_params=pooling_params,
-                lora_request=lora_request,
-                tokenization_kwargs=encode_kwargs,
-            )
-        else:
-            return self._embedding_score(
-                score_data_1,
-                score_data_2,
-                use_tqdm=use_tqdm,
-                pooling_params=pooling_params,
-                lora_request=lora_request,
-                tokenization_kwargs=encode_kwargs,
-            )
-
     def start_profile(self, profile_prefix: str | None = None) -> None:
         """Start profiling with optional custom trace prefix.
 
@@ -1764,7 +1066,7 @@ def _params_to_seq(
         if isinstance(params, Sequence):
             if len(params) != num_requests:
                 raise ValueError(
-                    f"The lengths of prompts ({params}) "
+                    f"The lengths of prompts ({num_requests}) "
                     f"and params ({len(params)}) must be the same."
                 )
 
@@ -1815,15 +1117,20 @@ def _add_completion_requests(
         lora_request: Sequence[LoRARequest] | LoRARequest | None = None,
         priority: list[int] | None = None,
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ) -> list[str]:
         seq_prompts = prompt_to_seq(prompts)
         seq_params = self._params_to_seq(params, len(seq_prompts))
         seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_prompts))
-        seq_priority = self._priority_to_seq(priority, len(prompts))
+        seq_priority = self._priority_to_seq(priority, len(seq_prompts))
 
         return self._render_and_add_requests(
             prompts=(
-                self._preprocess_cmpl_one(prompt, tokenization_kwargs)
+                self._preprocess_cmpl_one(
+                    prompt,
+                    tokenization_kwargs,
+                    mm_processor_kwargs=mm_processor_kwargs,
+                )
                 for prompt in maybe_tqdm(
                     seq_prompts,
                     use_tqdm=use_tqdm,
@@ -1847,6 +1154,7 @@ def _run_completion(
         lora_request: Sequence[LoRARequest] | LoRARequest | None = None,
         priority: list[int] | None = None,
         tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
     ):
         self._add_completion_requests(
             prompts=prompts,
@@ -1855,6 +1163,7 @@ def _run_completion(
             lora_request=lora_request,
             priority=priority,
             tokenization_kwargs=tokenization_kwargs,
+            mm_processor_kwargs=mm_processor_kwargs,
         )
         return self._run_engine(use_tqdm=use_tqdm, output_type=output_type)
 
@@ -1878,11 +1187,59 @@ def _run_chat(
         tokenization_kwargs: dict[str, Any] | None = None,
         mm_processor_kwargs: dict[str, Any] | None = None,
     ):
+        self._add_chat_requests(
+            messages=messages,
+            params=params,
+            use_tqdm=use_tqdm,
+            lora_request=lora_request,
+            chat_template=chat_template,
+            chat_template_content_format=chat_template_content_format,
+            chat_template_kwargs=chat_template_kwargs,
+            add_generation_prompt=add_generation_prompt,
+            continue_final_message=continue_final_message,
+            tools=tools,
+            tokenization_kwargs=tokenization_kwargs,
+            mm_processor_kwargs=mm_processor_kwargs,
+        )
+        return self._run_engine(output_type=output_type, use_tqdm=use_tqdm)
+
+    def _add_chat_requests(
+        self,
+        messages: list[ChatCompletionMessageParam]
+        | Sequence[list[ChatCompletionMessageParam]],
+        params: SamplingParams
+        | PoolingParams
+        | Sequence[SamplingParams | PoolingParams],
+        *,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        lora_request: Sequence[LoRARequest] | LoRARequest | None = None,
+        priority: list[int] | None = None,
+        chat_template: str | None = None,
+        chat_template_content_format: ChatTemplateContentFormatOption = "auto",
+        add_generation_prompt: bool = True,
+        continue_final_message: bool = False,
+        tools: list[dict[str, Any]] | None = None,
+        chat_template_kwargs: dict[str, Any] | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+        mm_processor_kwargs: dict[str, Any] | None = None,
+    ) -> list[str]:
         seq_convs = conversation_to_seq(messages)
         seq_params = self._params_to_seq(params, len(seq_convs))
         seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_convs))
+        seq_priority = self._priority_to_seq(priority, len(seq_convs))
+
+        # When thinking is enabled or tools are provided, and the model
+        # uses special tokens for structured output (e.g. Gemma4's
+        # <|channel>, <|tool_call>, <|"|>), automatically set
+        # skip_special_tokens=False so these tokens are preserved in
+        # output.text for downstream parsing.
+        needs_parsing = (
+            chat_template_kwargs and chat_template_kwargs.get("enable_thinking")
+        ) or tools
+        if needs_parsing:
+            self._adjust_params_for_parsing(seq_params)
 
-        return self._render_and_run_requests(
+        return self._render_and_add_requests(
             prompts=(
                 self._preprocess_chat_one(
                     conversation,
@@ -1902,11 +1259,57 @@ def _run_chat(
                 )
             ),
             params=seq_params,
-            output_type=output_type,
             lora_requests=seq_lora_requests,
-            use_tqdm=use_tqdm,
+            priorities=seq_priority,
         )
 
+    def _adjust_params_for_parsing(
+        self, params: Sequence[SamplingParams | PoolingParams]
+    ) -> None:
+        """Set ``skip_special_tokens=False`` when the model encodes
+        structured output syntax as special tokens.
+
+        Models like Gemma4 register thinking delimiters
+        (``<|channel>``/``<channel|>``) and tool call tokens
+        (``<|tool_call>``/``<tool_call|>``/``<|"|>``) as special tokens.
+        The default ``skip_special_tokens=True`` strips them from
+        ``output.text``, breaking parsing of both reasoning blocks and
+        tool calls.
+
+        This is a no-op for models whose structured tokens are regular
+        text tokens (e.g. DeepSeek's ``<think>``/``</think>``).
+        """
+        # The offline API currently lacks a unified rendering pipeline.
+        # Until the planned Renderer refactor is complete, we hardcode
+        # this token preservation logic specifically for Gemma4 models
+        # to avoid regressions on other models.
+        hf_config = getattr(self.model_config, "hf_config", None)
+        architectures = getattr(hf_config, "architectures", [])
+
+        if any("Gemma4" in arch for arch in architectures):
+            tokenizer = self.renderer.get_tokenizer()
+            vocab = tokenizer.get_vocab()
+            special_ids = set(getattr(tokenizer, "all_special_ids", []))
+
+            # Tokens used for thinking delimiters and tool call syntax
+            # that some models (Gemma4) register as special tokens.
+            structured_tokens = (
+                "<|channel>",
+                "<channel|>",  # thinking delimiters
+                "<|tool_call>",
+                "<tool_call|>",  # tool call delimiters
+                '<|"|>',  # string quoting in tool args
+            )
+            needs_special = any(
+                vocab.get(tok) in special_ids
+                for tok in structured_tokens
+                if tok in vocab
+            )
+            if needs_special:
+                for sp in params:
+                    if isinstance(sp, SamplingParams) and sp.skip_special_tokens:
+                        sp.skip_special_tokens = False
+
     def _render_and_run_requests(
         self,
         prompts: Iterable[EngineInput],
@@ -2058,6 +1461,20 @@ def init_weight_transfer_engine(
             "init_weight_transfer_engine", kwargs={"init_info": init_info_dict}
         )
 
+    def start_weight_update(self, is_checkpoint_format: bool = True) -> None:
+        """
+        Start a new weight update.
+
+        Args:
+            is_checkpoint_format: Whether incoming weights are in checkpoint
+                format (need layerwise processing) or kernel format (direct
+                copy).
+        """
+        self.llm_engine.collective_rpc(
+            "start_weight_update",
+            kwargs={"is_checkpoint_format": is_checkpoint_format},
+        )
+
     def update_weights(self, request: WeightTransferUpdateRequest | dict) -> None:
         """
         Update the weights of the model.
@@ -2073,6 +1490,12 @@ def update_weights(self, request: WeightTransferUpdateRequest | dict) -> None:
             "update_weights", kwargs={"update_info": update_info_dict}
         )
 
+    def finish_weight_update(self) -> None:
+        """
+        Finish the current weight update.
+        """
+        self.llm_engine.collective_rpc("finish_weight_update")
+
     def __repr__(self) -> str:
         """Return a transformers-style hierarchical view of the model."""
         # Cache the result to avoid repeated collective_rpc calls
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 95e831b51ec0..461128ed9053 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -151,7 +151,7 @@ async def build_async_engine_client_from_engine_args(
         yield async_llm
     finally:
         if async_llm:
-            async_llm.shutdown()
+            async_llm.shutdown(timeout=vllm_config.shutdown_timeout)
 
 
 def build_app(
@@ -220,6 +220,12 @@ def build_app(
 
         elastic_ep_attach_router(app)
 
+        from vllm.entrypoints.openai.generative_scoring.api_router import (
+            register_generative_scoring_api_router,
+        )
+
+        register_generative_scoring_api_router(app)
+
     if "generate" in supported_tasks or "render" in supported_tasks:
         from vllm.entrypoints.serve.render.api_router import (
             attach_router as attach_render_router,
@@ -227,22 +233,15 @@ def build_app(
 
         attach_render_router(app)
 
-    if "transcription" in supported_tasks:
-        from vllm.entrypoints.openai.speech_to_text.api_router import (
-            attach_router as register_speech_to_text_api_router,
-        )
-
-        register_speech_to_text_api_router(app)
-
-    if "realtime" in supported_tasks:
-        from vllm.entrypoints.openai.realtime.api_router import (
-            attach_router as register_realtime_api_router,
+    if "transcription" in supported_tasks or "realtime" in supported_tasks:
+        from vllm.entrypoints.speech_to_text.factories import (
+            register_speech_to_text_api_routers,
         )
 
-        register_realtime_api_router(app)
+        register_speech_to_text_api_routers(app, supported_tasks)
 
     if any(task in POOLING_TASKS for task in supported_tasks):
-        from vllm.entrypoints.pooling import register_pooling_api_routers
+        from vllm.entrypoints.pooling.factories import register_pooling_api_routers
 
         register_pooling_api_routers(app, supported_tasks, model_config)
 
@@ -278,11 +277,11 @@ def build_app(
 
     if "realtime" in supported_tasks:
         # Add WebSocket metrics middleware
-        from vllm.entrypoints.openai.realtime.metrics import (
-            WebSocketMetricsMiddleware,
+        from vllm.entrypoints.speech_to_text.factories import (
+            add_websocket_metrics_middleware,
         )
 
-        app.add_middleware(WebSocketMetricsMiddleware)
+        add_websocket_metrics_middleware(app)
 
     if envs.VLLM_DEBUG_LOG_API_SERVER_RESPONSE:
         logger.warning(
@@ -315,6 +314,21 @@ async def init_app_state(
     supported_tasks: tuple["SupportedTask", ...] | None = None,
 ) -> None:
     vllm_config = engine_client.vllm_config
+
+    # Propagate enable_in_reasoning to the API-server process. The engine core
+    # runs in a separate process, so the contextvar that backs
+    # `get_current_vllm_config_or_none()` is None on this stack. Tool parsers
+    # call `get_enable_structured_outputs_in_reasoning()` during request
+    # handling and need to see the real flag, otherwise they silently fall
+    # back to False and mismatch the engine-side bitmask gating.
+    from vllm.tool_parsers.structural_tag_registry import (
+        set_enable_structured_outputs_in_reasoning,
+    )
+
+    set_enable_structured_outputs_in_reasoning(
+        vllm_config.structured_outputs_config.enable_in_reasoning
+    )
+
     if supported_tasks is None:
         warnings.warn(
             "The 'supported_tasks' parameter was not provided to "
@@ -363,7 +377,6 @@ async def init_app_state(
     state.openai_serving_render = OpenAIServingRender(
         model_config=engine_client.model_config,
         renderer=engine_client.renderer,
-        io_processor=engine_client.io_processor,
         model_registry=state.openai_serving_models.registry,
         request_logger=request_logger,
         chat_template=resolved_chat_template,
@@ -372,6 +385,7 @@ async def init_app_state(
         enable_auto_tools=args.enable_auto_tool_choice,
         exclude_tools_when_tool_choice_none=args.exclude_tools_when_tool_choice_none,
         tool_parser=args.tool_call_parser,
+        reasoning_parser=args.structured_outputs_config.reasoning_parser,
         default_chat_template_kwargs=args.default_chat_template_kwargs,
         log_error_stack=args.log_error_stack,
     )
@@ -394,22 +408,21 @@ async def init_app_state(
             engine_client, state, args, request_logger, supported_tasks
         )
 
-    if "transcription" in supported_tasks:
-        from vllm.entrypoints.openai.speech_to_text.api_router import (
-            init_transcription_state,
+        from vllm.entrypoints.openai.generative_scoring.api_router import (
+            init_generative_scoring_state,
         )
 
-        init_transcription_state(
-            engine_client, state, args, request_logger, supported_tasks
-        )
+        await init_generative_scoring_state(engine_client, state, args, request_logger)
 
-    if "realtime" in supported_tasks:
-        from vllm.entrypoints.openai.realtime.api_router import init_realtime_state
+    if "transcription" in supported_tasks or "realtime" in supported_tasks:
+        from vllm.entrypoints.speech_to_text.factories import init_speech_to_text_state
 
-        init_realtime_state(engine_client, state, args, request_logger, supported_tasks)
+        init_speech_to_text_state(
+            engine_client, state, args, request_logger, supported_tasks
+        )
 
     if any(task in POOLING_TASKS for task in supported_tasks):
-        from vllm.entrypoints.pooling import init_pooling_state
+        from vllm.entrypoints.pooling.factories import init_pooling_state
 
         init_pooling_state(engine_client, state, args, request_logger, supported_tasks)
 
@@ -426,13 +439,12 @@ async def init_render_app_state(
 
     Unlike :func:`init_app_state` this function does not require an
     :class:`~vllm.engine.protocol.EngineClient`; it bootstraps the
-    preprocessing pipeline (renderer, io_processor, input_processor)
+    preprocessing pipeline (renderer, input_processor)
     directly from the :class:`~vllm.config.VllmConfig`.
     """
     from vllm.entrypoints.chat_utils import load_chat_template
     from vllm.entrypoints.openai.models.serving import OpenAIModelRegistry
     from vllm.entrypoints.serve.render.serving import OpenAIServingRender
-    from vllm.plugins.io_processors import get_io_processor
     from vllm.renderers import renderer_from_config
 
     served_model_names = args.served_model_name or [args.model]
@@ -450,15 +462,11 @@ async def init_render_app_state(
         request_logger = None
 
     renderer = renderer_from_config(vllm_config)
-    io_processor = get_io_processor(
-        vllm_config, renderer, vllm_config.model_config.io_processor_plugin
-    )
     resolved_chat_template = load_chat_template(args.chat_template)
 
     state.openai_serving_render = OpenAIServingRender(
         model_config=vllm_config.model_config,
         renderer=renderer,
-        io_processor=io_processor,
         model_registry=model_registry,
         request_logger=request_logger,
         chat_template=resolved_chat_template,
@@ -467,6 +475,7 @@ async def init_render_app_state(
         enable_auto_tools=args.enable_auto_tool_choice,
         exclude_tools_when_tool_choice_none=args.exclude_tools_when_tool_choice_none,
         tool_parser=args.tool_call_parser,
+        reasoning_parser=args.structured_outputs_config.reasoning_parser,
         default_chat_template_kwargs=args.default_chat_template_kwargs,
         log_error_stack=args.log_error_stack,
     )
@@ -524,8 +533,7 @@ def validate_api_server_args(args):
 
 @instrument(span_name="API server setup")
 def setup_server(args):
-    """Validate API server args, set up signal handler, create socket
-    ready to serve."""
+    """Validate API server args and create the server socket."""
 
     log_version_and_model(logger, VLLM_VERSION, args.model)
     log_non_default_args(args)
@@ -551,12 +559,6 @@ def setup_server(args):
     # many concurrent requests active
     set_ulimit()
 
-    def signal_handler(*_) -> None:
-        # Interrupt server on sigterm while initializing
-        raise KeyboardInterrupt("terminated")
-
-    signal.signal(signal.SIGTERM, signal_handler)
-
     if args.uds:
         listen_address = f"unix:{args.uds}"
     else:
@@ -663,8 +665,14 @@ async def build_and_serve_renderer(
 async def run_server(args, **uvicorn_kwargs) -> None:
     """Run a single-worker API server."""
 
-    # Add process-specific prefix to stdout and stderr.
-    decorate_logs("APIServer")
+    decorate_logs("APIServer", skip_if_decorated=True)
+
+    # Interrupt initialization if SIGTERM arrives before uvicorn installs its
+    # own signal handlers. Once uvicorn is running it replaces this.
+    def _interrupt_init(*_) -> None:
+        raise KeyboardInterrupt("terminated")
+
+    signal.signal(signal.SIGTERM, _interrupt_init)
 
     listen_address, sock = setup_server(args)
     await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
diff --git a/vllm/entrypoints/openai/chat_completion/batch_serving.py b/vllm/entrypoints/openai/chat_completion/batch_serving.py
index f97c93bb03c8..0dfcdd925158 100644
--- a/vllm/entrypoints/openai/chat_completion/batch_serving.py
+++ b/vllm/entrypoints/openai/chat_completion/batch_serving.py
@@ -114,12 +114,15 @@ async def create_batch_chat_completion(
         """
         tokenizer = self.renderer.tokenizer
         assert tokenizer is not None
+        single_requests = [
+            request.to_chat_completion_request(messages)
+            for messages in request.messages
+        ]
 
         reasoning_parser: ReasoningParser | None = None
         if self.reasoning_parser_cls:
-            chat_template_kwargs = self._prepare_extra_chat_template_kwargs(
-                request.chat_template_kwargs,
-                self.default_chat_template_kwargs,
+            chat_template_kwargs = self._effective_chat_template_kwargs(
+                single_requests[0]
             )
             reasoning_parser = self.reasoning_parser_cls(
                 tokenizer,
@@ -155,7 +158,7 @@ async def create_batch_chat_completion(
                 self.default_sampling_params,
                 self.override_max_tokens,
             )
-            single_request = request.to_chat_completion_request(request.messages[i])
+            single_request = single_requests[i]
             sampling_params = single_request.to_sampling_params(
                 max_tokens, self.default_sampling_params
             )
@@ -215,8 +218,6 @@ async def chat_completion_full_generator_batch(
         ``check_batch_mode`` validator, so neither needs to be handled here.
         """
         created_time = int(time.time())
-        role = self.get_chat_request_role(request)  # type: ignore[arg-type]
-
         final_results: dict[int, RequestOutput] = {}
         try:
             async for prompt_idx, res in merge_async_iterators(*generators):
@@ -272,6 +273,12 @@ async def chat_completion_full_generator_batch(
                     reasoning = None
                     content = output.text
 
+                role = (
+                    self.response_role
+                    if request.add_generation_prompt
+                    else request.messages[prompt_idx][-1]["role"]
+                )
+
                 message = ChatMessage(role=role, reasoning=reasoning, content=content)
 
                 if request.echo:
@@ -314,4 +321,5 @@ async def chat_completion_full_generator_batch(
             model=model_name,
             choices=choices,
             usage=usage,
+            system_fingerprint=self.system_fingerprint,
         )
diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index 533959df6094..921f4a84a2c9 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -11,7 +11,7 @@
     ChatCompletionAudio as OpenAIChatCompletionAudio,
 )
 from openai.types.chat.chat_completion_message import Annotation as OpenAIAnnotation
-from pydantic import Field, model_validator
+from pydantic import Field, PrivateAttr, model_serializer, model_validator
 
 from vllm.config import ModelConfig
 from vllm.config.utils import replace
@@ -92,6 +92,16 @@ class ChatCompletionResponseChoice(OpenAIBaseModel):
     # not part of the OpenAI spec but is useful for tracing the tokens
     # in agent scenarios
     token_ids: list[int] | None = None
+    # Per-token expert routing decisions, base64-encoded ``.npy`` bytes
+    # (numpy serialization). Shape after decode:
+    #   (num_tokens - 1, num_layers, num_experts_per_tok)  dtype uint8/uint16
+    # ``num_tokens - 1`` because the last sampled token has not been
+    # forwarded yet and therefore has no routing data.
+    # Decode:
+    #   np.load(io.BytesIO(base64.b64decode(s)))
+    # ``None`` if (a) the request was aborted before any forward pass,
+    # or (b) ``enable_return_routed_experts`` is off server-side.
+    routed_experts: str | None = None
 
 
 class ChatCompletionResponse(OpenAIBaseModel):
@@ -107,6 +117,9 @@ class ChatCompletionResponse(OpenAIBaseModel):
     # vLLM-specific fields that are not in OpenAI spec
     prompt_logprobs: list[dict[int, Logprob] | None] | None = None
     prompt_token_ids: list[int] | None = None
+    # Rendered prompt text from chat templating (only set when
+    # ``return_prompt_text=True`` on the request).
+    prompt_text: str | None = None
     kv_transfer_params: dict[str, Any] | None = Field(
         default=None, description="KVTransfer parameters."
     )
@@ -129,13 +142,33 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
     model: str
     choices: list[ChatCompletionResponseStreamChoice]
     usage: UsageInfo | None = Field(default=None)
+    # Set only on the final chunk of a stream to mirror non-streaming responses
+    # without the per-chunk serialization overhead.
+    system_fingerprint: str | None = None
     # not part of the OpenAI spec but for tracing the tokens
     prompt_token_ids: list[int] | None = None
+    # Rendered prompt text from chat templating (only set when
+    # ``return_prompt_text=True`` on the request); only sent on the first chunk.
+    prompt_text: str | None = None
 
 
 class ChatCompletionToolsParam(OpenAIBaseModel):
     type: Literal["function"] = "function"
     function: FunctionDefinition
+    defer_loading: bool | None = None
+
+    @model_validator(mode="after")
+    def _propagate_defer_loading(self) -> "ChatCompletionToolsParam":
+        if self.defer_loading is not None and self.function.defer_loading is None:
+            self.function.defer_loading = self.defer_loading
+        return self
+
+    @model_serializer(mode="wrap")
+    def _serialize(self, handler):
+        data = handler(self)
+        if self.defer_loading is None:
+            data.pop("defer_loading", None)
+        return data
 
 
 class ChatCompletionNamedFunction(OpenAIBaseModel):
@@ -179,7 +212,19 @@ class ChatCompletionRequest(OpenAIBaseModel):
         | ChatCompletionNamedToolChoiceParam
         | None
     ) = "none"
-    reasoning_effort: Literal["none", "low", "medium", "high"] | None = None
+    reasoning_effort: (
+        Literal["none", "minimal", "low", "medium", "high", "xhigh", "max"] | None
+    ) = Field(
+        default=None,
+        description=(
+            "Constrains effort on reasoning for reasoning models. "
+            "Currently supported values are none, minimal, low, medium, "
+            "high, xhigh, and max. Reducing reasoning effort can result in "
+            "faster responses and fewer tokens used on reasoning in a response. "
+            "Note that 'max' is specific to the DeepSeek V4 series and is not "
+            "part of the standard OpenAI API specification."
+        ),
+    )
     thinking_token_budget: int | None = None
     include_reasoning: bool = True
     parallel_tool_calls: bool | None = True
@@ -200,6 +245,14 @@ class ChatCompletionRequest(OpenAIBaseModel):
     skip_special_tokens: bool = True
     spaces_between_special_tokens: bool = True
     truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
+    truncation_side: Literal["left", "right"] | None = Field(
+        default=None,
+        description=(
+            "Which side to truncate from when truncate_prompt_tokens is active. "
+            "'right' keeps the first N tokens. "
+            "'left' keeps the last N tokens."
+        ),
+    )
     prompt_logprobs: int | None = None
     allowed_token_ids: list[int] | None = None
     bad_words: list[str] = Field(default_factory=list)
@@ -319,6 +372,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
             "need to map generated text back to input tokens."
         ),
     )
+    return_prompt_text: bool | None = Field(
+        default=None,
+        description=(
+            "If true, the response will include ``prompt_text`` containing the "
+            "prompt string produced by chat templating. In streaming mode it "
+            "is sent only on the first chunk. This is useful for inspecting "
+            "exactly what was fed into the model."
+        ),
+    )
 
     cache_salt: str | None = Field(
         default=None,
@@ -357,6 +419,54 @@ class ChatCompletionRequest(OpenAIBaseModel):
 
     # --8<-- [end:chat-completion-extra-params]
 
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_messages_before(cls, data: Any) -> Any:
+        """Pre-process message dicts before Pydantic field validation.
+
+        Performs two normalizations in a single pass:
+        - Converts tool_calls generators/iterators to lists so one-shot
+          generators are not consumed during union type matching.
+        - Renames the deprecated ``reasoning_content`` field to
+          ``reasoning`` so downstream code only needs to check one field.
+        """
+        if not isinstance(data, dict):
+            return data
+        messages = data.get("messages")
+        if not isinstance(messages, list):
+            return data
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            tool_calls = msg.get("tool_calls")
+            if tool_calls is not None and not isinstance(tool_calls, list):
+                msg["tool_calls"] = list(tool_calls)
+            reasoning_content = msg.pop("reasoning_content", None)
+            if reasoning_content is not None and msg.get("reasoning") is None:
+                msg["reasoning"] = reasoning_content
+        return data
+
+    @model_validator(mode="after")
+    def _materialize_tool_calls_after(self) -> "ChatCompletionRequest":
+        """Convert Pydantic ValidatorIterator wrappers back to lists.
+
+        Even after the "before" validator converts iterables to lists,
+        Pydantic re-wraps them in a ValidatorIterator when validating
+        against ChatCompletionAssistantMessageParam's Iterable[...] type.
+        This "after" pass materialises those wrappers so downstream code
+        (tokenizers, model_dump_json) always sees plain lists.
+        """
+        for msg in self.messages:
+            if not isinstance(msg, dict):
+                continue
+            tool_calls = msg.get("tool_calls")
+            if tool_calls is not None and not isinstance(tool_calls, list):
+                msg["tool_calls"] = list(tool_calls)
+        return self
+
+    _grammar_from_tool_parser: bool = PrivateAttr(default=False)
+    """CAUTION: Should only be set by ``ToolParser.adjust_request``."""
+
     def build_chat_params(
         self,
         default_template: str | None,
@@ -389,6 +499,7 @@ def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
             max_total_tokens=model_config.max_model_len,
             max_output_tokens=max_output_tokens or 0,
             truncate_prompt_tokens=self.truncate_prompt_tokens,
+            truncation_side=self.truncation_side,
             add_special_tokens=self.add_special_tokens,
             needs_detokenization=bool(self.echo and not self.return_token_ids),
             max_total_tokens_param="max_model_len",
@@ -637,6 +748,18 @@ def check_structured_outputs_count(cls, data):
     @model_validator(mode="before")
     @classmethod
     def check_tool_usage(cls, data):
+        if isinstance(data, ValueError):
+            raise data
+        if not isinstance(data, dict):
+            return data
+
+        # Reject empty tools array, matching OpenAI API behavior
+        if data.get("tools") == []:
+            raise ValueError(
+                "`tools` must not be an empty array. "
+                "Either provide at least one tool or omit the field entirely."
+            )
+
         # if "tool_choice" is not specified but tools are provided,
         # default to "auto" tool_choice
         if "tool_choice" not in data and data.get("tools"):
@@ -663,18 +786,6 @@ def check_tool_usage(cls, data):
                     "are supported."
                 )
 
-            # if tool_choice is "required" but the "tools" list is empty,
-            # override the data to behave like "none" to align with
-            # OpenAI’s behavior.
-            if (
-                data["tool_choice"] == "required"
-                and isinstance(data["tools"], list)
-                and len(data["tools"]) == 0
-            ):
-                data["tool_choice"] = "none"
-                del data["tools"]
-                return data
-
             # ensure that if "tool_choice" is specified as an object,
             # it matches a valid tool
             correct_usage_message = (
@@ -781,13 +892,6 @@ def check_system_message_content_type(cls, data):
 
         return data
 
-    @model_validator(mode="before")
-    @classmethod
-    def set_include_reasoning_for_none_effort(cls, data: Any) -> Any:
-        if data.get("reasoning_effort") == "none":
-            data["include_reasoning"] = False
-        return data
-
 
 class BatchChatCompletionRequest(OpenAIBaseModel):
     """Request model for the /v1/chat/completions/batch endpoint.
@@ -805,7 +909,9 @@ class BatchChatCompletionRequest(OpenAIBaseModel):
     - The ``n`` parameter must be 1 (or omitted).
     """
 
-    messages: list[list[ChatCompletionMessageParam]] = Field(..., min_length=1)
+    messages: list[Annotated[list[ChatCompletionMessageParam], Field(min_length=1)]] = (
+        Field(..., min_length=1)
+    )
     model: str | None = None
 
     # Shared sampling / generation fields — mirror ChatCompletionRequest.
diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 493c26d3aed9..8f6d76d78526 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import io
 import json
 import time
 from collections.abc import AsyncGenerator, AsyncIterator
@@ -9,10 +10,9 @@
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Any, Final
 
-import partial_json_parser
-import regex as re
+import numpy as np
+import pybase64 as base64
 from fastapi import Request
-from partial_json_parser.core.options import Allow
 
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (
@@ -68,15 +68,13 @@
 from vllm.logprobs import Logprob
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.parser import ParserManager
+from vllm.parser.abstract_parser import Parser
 from vllm.reasoning import ReasoningParser
 from vllm.renderers import ChatParams
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers import ToolParser
-from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
-from vllm.tool_parsers.utils import partial_json_loads
 from vllm.utils.collection_utils import as_list
-from vllm.utils.mistral import is_mistral_tokenizer
+from vllm.utils.mistral import is_mistral_tokenizer, is_mistral_tool_parser
 
 if TYPE_CHECKING:
     from vllm.entrypoints.serve.render.serving import OpenAIServingRender
@@ -134,6 +132,20 @@ def __init__(
             enable_auto_tools=enable_auto_tools,
             model_name=self.model_config.model,
         )
+        self.parser_cls = ParserManager.get_parser(
+            tool_parser_name=tool_parser,
+            reasoning_parser_name=reasoning_parser,
+            enable_auto_tools=enable_auto_tools,
+            model_name=self.model_config.model,
+        )
+        if (
+            is_mistral_tool_parser(self.tool_parser)
+            and self.reasoning_parser_cls is not None
+        ):
+            from vllm.tool_parsers.mistral_tool_parser import MistralToolParser
+
+            MistralToolParser.model_can_reason = True
+
         self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none
 
         self.enable_prompt_tokens_details = enable_prompt_tokens_details
@@ -174,6 +186,18 @@ def warmup(self) -> None:
             )
         )
 
+    def _effective_chat_template_kwargs(
+        self, request: ChatCompletionRequest
+    ) -> dict[str, Any]:
+        return (
+            request.build_chat_params(
+                self.chat_template,
+                self.chat_template_content_format,
+            )
+            .with_defaults(self.default_chat_template_kwargs)
+            .chat_template_kwargs
+        )
+
     async def render_chat_request(
         self,
         request: ChatCompletionRequest,
@@ -213,16 +237,21 @@ async def create_chat_completion(
         for the API specification. This API mimics the OpenAI
         Chat Completion API.
         """
+        return await self._with_kv_transfer_rejection_cleanup(
+            self._create_chat_completion(request, raw_request), request, raw_request
+        )
+
+    async def _create_chat_completion(
+        self,
+        request: ChatCompletionRequest,
+        raw_request: Request | None = None,
+    ) -> AsyncGenerator[str, None] | ChatCompletionResponse | ErrorResponse:
         # Streaming response
         tokenizer = self.renderer.tokenizer
         assert tokenizer is not None
+        chat_template_kwargs = self._effective_chat_template_kwargs(request)
         reasoning_parser: ReasoningParser | None = None
         if self.reasoning_parser_cls:
-            # Pass the same chat template kwargs as used in tokenization
-            chat_template_kwargs = self._prepare_extra_chat_template_kwargs(
-                request.chat_template_kwargs,
-                self.default_chat_template_kwargs,
-            )
             reasoning_parser = self.reasoning_parser_cls(
                 tokenizer,
                 chat_template_kwargs=chat_template_kwargs,  # type: ignore[call-arg]
@@ -268,6 +297,7 @@ async def create_chat_completion(
                 self._extract_prompt_len(engine_input),
                 self.default_sampling_params,
                 self.override_max_tokens,
+                truncate_prompt_tokens=request.truncate_prompt_tokens,
             )
 
             sampling_params: SamplingParams | BeamSearchParams
@@ -305,6 +335,11 @@ async def create_chat_completion(
             else:
                 if not request.include_reasoning:
                     reasoning_ended = True
+                elif request._grammar_from_tool_parser:
+                    # The Mistral grammar already includes an optional
+                    # `think?` rule that handles both reasoning and
+                    # non-reasoning outputs.
+                    reasoning_ended = True
                 elif reasoning_parser:
                     reasoning_ended = reasoning_parser.is_reasoning_end(
                         prompt_token_ids or []
@@ -321,6 +356,11 @@ async def create_chat_completion(
                     priority=request.priority,
                     data_parallel_rank=data_parallel_rank,
                     reasoning_ended=reasoning_ended,
+                    reasoning_parser_kwargs={
+                        "chat_template_kwargs": chat_template_kwargs,
+                    }
+                    if reasoning_parser
+                    else None,
                 )
 
             generators.append(generator)
@@ -338,6 +378,7 @@ async def create_chat_completion(
                 tokenizer,
                 request_metadata,
                 reasoning_parser,
+                chat_template_kwargs=chat_template_kwargs,
             )
 
         return await self.chat_completion_full_generator(
@@ -356,145 +397,6 @@ def get_chat_request_role(self, request: ChatCompletionRequest) -> str:
             return self.response_role
         return request.messages[-1]["role"]
 
-    @staticmethod
-    def _bracket_level(s: str, opening="{", closing="}") -> int:
-        """
-        Calculate the current level of nested brackets in a given string.
-        """
-        level = 0
-        for char in s:
-            if char == opening:
-                level += 1
-            elif char == closing:
-                level -= 1
-        return level
-
-    @staticmethod
-    def _filter_delta_text(delta_text: str, previous_text: str) -> tuple[str, bool]:
-        # remove last '},' of the tool definition stemming from the
-        # "name"/"parameters" outer object or closing ']' of the tool list
-        # count occurrences of opening and closing curly braces and
-        # once level 0 is reached stop outputting text
-        # if 0 is reached while parsing the delta_text we know the current
-        # tool will finish in this current iteration
-        bracket_level = OpenAIServingChat._bracket_level(previous_text)
-        updated_delta, passed_zero = "", False
-        for c in delta_text:
-            if c == "{":
-                bracket_level += 1
-                passed_zero = bracket_level == 0
-            elif c == "}":
-                bracket_level -= 1
-                passed_zero = bracket_level == 0
-
-            if bracket_level != 0:
-                updated_delta += c
-            else:
-                # if a comma is reached at level 0 we can stop
-                if c == ",":
-                    break
-        return updated_delta, passed_zero
-
-    def extract_tool_call_required_streaming(
-        self,
-        previous_text: str,
-        current_text: str | None,
-        delta_text: str,
-        function_name_returned: bool,
-        tool_call_idx: int | None = None,
-    ) -> tuple[DeltaMessage | None, bool]:
-        if current_text is None or current_text == "":
-            # if the current text is empty, we cannot parse it
-            return None, function_name_returned
-        try:
-            flags = Allow.ALL
-            obj, _ = partial_json_loads(current_text, flags)
-        except (
-            partial_json_parser.core.exceptions.MalformedJSON,
-            json.JSONDecodeError,
-        ):
-            logger.debug("not enough tokens to parse into JSON yet")
-            obj = None
-
-        # check if the current text is a valid array
-        # containing a partial tool calling object
-        # if not repeat
-        if obj is None or not isinstance(obj, list) or not len(obj) > 0:
-            function_name_returned = False
-            delta_message = None
-        else:
-            _, finishes_previous_tool = OpenAIServingChat._filter_delta_text(
-                delta_text, previous_text
-            )
-            # take the last tool call from the generated list
-            current_tool_call = obj[-1]
-
-            # once parameters have been generated the name is complete as well
-            if not finishes_previous_tool and (
-                "name" not in current_tool_call or "parameters" not in current_tool_call
-            ):
-                function_name_returned = False
-                delta_message = None
-            else:
-                if not function_name_returned:
-                    # get partly generated arguments from the latest tool call
-                    param_match = re.search(
-                        r'.*"parameters":\s*(.*)', current_text, re.DOTALL
-                    )
-                    arguments = param_match.group(1) if param_match else ""
-                    arguments, _ = OpenAIServingChat._filter_delta_text(
-                        arguments, previous_text
-                    )
-
-                    # if this iteration finishes a previous tool call but a
-                    # new incomplete tool is already generated, take the
-                    # previous from the list
-                    if finishes_previous_tool and "parameters" not in current_tool_call:
-                        current_tool_call = obj[-2]
-
-                    function_name_returned = True
-                    tool_call_id = make_tool_call_id(
-                        id_type=self.tool_call_id_type,
-                        func_name=current_tool_call["name"],
-                        idx=tool_call_idx,
-                    )
-                    delta_message = DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                id=tool_call_id,
-                                function=DeltaFunctionCall(
-                                    name=current_tool_call["name"], arguments=arguments
-                                ),
-                                index=len(obj) - 1,
-                                type="function",
-                            )
-                        ]
-                    )
-
-                else:
-                    delta_text, _ = OpenAIServingChat._filter_delta_text(
-                        delta_text, previous_text
-                    )
-
-                    if delta_text != "":
-                        delta_message = DeltaMessage(
-                            tool_calls=[
-                                DeltaToolCall(
-                                    function=DeltaFunctionCall(
-                                        # OpenAI API returns None
-                                        # instead of name every time
-                                        name=None,
-                                        arguments=delta_text,
-                                    ),
-                                    index=len(obj) - 1,
-                                )
-                            ]
-                        )
-                    else:
-                        delta_message = None
-
-        return delta_message, function_name_returned
-
     async def chat_completion_stream_generator(
         self,
         request: ChatCompletionRequest,
@@ -505,6 +407,7 @@ async def chat_completion_stream_generator(
         tokenizer: TokenizerLike,
         request_metadata: RequestResponseMetadata,
         reasoning_parser: ReasoningParser | None = None,
+        chat_template_kwargs: dict[str, Any] | None = None,
     ) -> AsyncGenerator[str, None]:
         created_time = int(time.time())
         chunk_object_type: Final = "chat.completion.chunk"
@@ -523,6 +426,8 @@ async def chat_completion_stream_generator(
             harmony_tools_streamed = [False] * num_choices
         tools_streamed = [False] * num_choices
 
+        is_mistral_grammar_path = request._grammar_from_tool_parser
+
         if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
             tool_choice_function_name = request.tool_choice.function.name
         else:
@@ -535,7 +440,6 @@ async def chat_completion_stream_generator(
         )
 
         all_previous_token_ids: list[list[int]] | None
-        function_name_returned = [False] * num_choices
         if self.tool_call_id_type == "kimi_k2":
             history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
         else:
@@ -546,31 +450,41 @@ async def chat_completion_stream_generator(
 
         # Only one of these will be used, thus previous_texts and
         # all_previous_token_ids will not be used twice in the same iteration.
-        if tool_choice_auto or reasoning_parser:
-            # These are only required in "auto" tool choice case
-            all_previous_token_ids = [[]] * num_choices
-            # For reasoning parser and tool call all enabled
-            added_content_delta_arr = [False] * num_choices
+        if (
+            is_mistral_grammar_path
+            or tool_choice_auto
+            or tool_choice_function_name
+            or request.tool_choice == "required"
+            or reasoning_parser
+        ):
+            all_previous_token_ids = [[] for _ in range(num_choices)]
             reasoning_end_arr = [False] * num_choices
             prompt_is_reasoning_end_arr: list[bool | None] = [None] * num_choices
         else:
             all_previous_token_ids = None
 
-        # Prepare the tool parser if it's needed
         try:
-            if tool_choice_auto and self.tool_parser:
+            if self.parser_cls is not None:
                 if tokenizer is None:
                     raise ValueError(
                         "Tokenizer not available when `skip_tokenizer_init=True`"
                     )
-
-                tool_parsers: list[ToolParser | None] = [
-                    self.tool_parser(tokenizer, request.tools)
-                ] * num_choices
+                parsers: list[Parser | None] = [
+                    self.parser_cls(
+                        tokenizer,
+                        request.tools,
+                        chat_template_kwargs=chat_template_kwargs,
+                    )
+                    for _ in range(num_choices)
+                ]
+                for p in parsers:
+                    if p is not None:
+                        p._stream_state.tool_call_id_type = self.tool_call_id_type
+                        p._stream_state.history_tool_call_cnt = history_tool_call_cnt
             else:
-                tool_parsers = [None] * num_choices
+                parsers = [None] * num_choices
         except Exception as e:
-            logger.exception("Error in tool parser creation.")
+            logger.exception("Error in parser creation.")
             data = self.create_streaming_error_response(e)
             yield f"data: {data}\n\n"
             yield "data: [DONE]\n\n"
@@ -597,6 +511,9 @@ async def chat_completion_stream_generator(
                     # the role
                     role = self.get_chat_request_role(request)
 
+                    # ``res.prompt`` is the rendered chat-templated prompt
+                    prompt_text = res.prompt if request.return_prompt_text else None
+
                     # NOTE num_choices defaults to 1 so this usually executes
                     # once per request
                     for i in range(num_choices):
@@ -622,6 +539,7 @@ async def chat_completion_stream_generator(
                                 if request.return_token_ids
                                 else None
                             ),
+                            prompt_text=prompt_text,
                         )
 
                         # if continuous usage stats are requested, add it
@@ -674,7 +592,8 @@ async def chat_completion_stream_generator(
 
                 for output in res.outputs:
                     i = output.index
-                    tool_parser = tool_parsers[i]
+                    parser = parsers[i]
+                    tool_parser = parser.tool_parser if parser is not None else None
 
                     if (
                         reasoning_parser
@@ -739,7 +658,13 @@ async def chat_completion_stream_generator(
                     delta_message: DeltaMessage | None
 
                     # just update previous_texts and previous_token_ids
-                    if tool_choice_auto or reasoning_parser:
+                    if (
+                        is_mistral_grammar_path
+                        or tool_choice_auto
+                        or tool_choice_function_name
+                        or request.tool_choice == "required"
+                        or reasoning_parser
+                    ):
                         assert previous_texts is not None
                         assert all_previous_token_ids is not None
                         previous_text = previous_texts[i]
@@ -763,253 +688,56 @@ async def chat_completion_stream_generator(
                             )
                         )
                         harmony_tools_streamed[i] |= tools_streamed_flag
-                    # handle streaming deltas for tools with named tool_choice
-                    elif tool_choice_function_name:
-                        # When encountering think end id in prompt_token_ids
-                        # i.e {"enable_thinking": False},
-                        # check BEFORE calling the parser to avoid a spurious
-                        # reasoning delta on the first chunk.
-                        if (
-                            reasoning_parser
-                            and not reasoning_end_arr[i]
-                            and prompt_is_reasoning_end_arr[i]
-                        ):
-                            reasoning_end_arr[i] = True
-
-                        if (
-                            reasoning_parser
-                            and not reasoning_end_arr[i]
-                            and not reasoning_parser.is_reasoning_end(
-                                previous_token_ids
-                            )
-                        ):
-                            assert reasoning_parser is not None
-                            delta_message = (
-                                reasoning_parser.extract_reasoning_streaming(
-                                    previous_text,
-                                    current_text,
-                                    delta_text,
-                                    previous_token_ids,
-                                    current_token_ids,
-                                    output.token_ids,
-                                )
-                            )
-                            # When encountering think end id in delta_token_ids,
-                            # set reasoning status to end.
-                            # Only keep 'content', remove 'reasoning'.
-                            if reasoning_parser.is_reasoning_end(
-                                as_list(output.token_ids)
-                            ):
-                                reasoning_end_arr[i] = True
-                                if delta_message and delta_message.content:
-                                    # This need to be added to next `delta_text`
-                                    current_text = delta_message.content
-                                    delta_message.content = None
-                                else:
-                                    current_text = ""
-                        else:
-                            # Just to add remaining `content`
-                            if reasoning_parser:
-                                delta_text = previous_text + delta_text
-                                current_text = ""
-
-                            if function_name_returned[i]:
-                                delta_tool_call = DeltaToolCall(
-                                    function=DeltaFunctionCall(arguments=delta_text),
-                                    index=i,
-                                )
-                            else:
-                                # Generate ID based on tokenizer type
-                                if is_mistral_tokenizer(tokenizer):
-                                    tool_call_id = MistralToolCall.generate_random_id()
-                                else:
-                                    tool_call_id = make_tool_call_id(
-                                        id_type=self.tool_call_id_type,
-                                        func_name=tool_choice_function_name,
-                                        idx=history_tool_call_cnt,
-                                    )
-                                delta_tool_call = DeltaToolCall(
-                                    id=tool_call_id,
-                                    type="function",
-                                    function=DeltaFunctionCall(
-                                        name=tool_choice_function_name,
-                                        arguments=delta_text,
-                                    ),
-                                    index=i,
-                                )
-                                function_name_returned[i] = True
-                                history_tool_call_cnt += 1
-
-                            delta_message = DeltaMessage(
-                                tool_calls=[
-                                    delta_tool_call,
-                                ]
-                            )
-                            tools_streamed[i] = True
-
-                    elif request.tool_choice == "required":
-                        assert previous_texts is not None
-                        previous_text = previous_texts[i]
-                        current_text = previous_text + delta_text
-                        fn_name_returned = function_name_returned[i]
-                        output_token_ids = as_list(output.token_ids)
-
-                        if (
-                            reasoning_parser is not None
-                            and not reasoning_end_arr[i]
-                            and prompt_is_reasoning_end_arr[i]
-                        ):
-                            reasoning_end_arr[i] = True
-
-                        if reasoning_parser and not reasoning_end_arr[i]:
-                            delta_message = (
-                                reasoning_parser.extract_reasoning_streaming(
-                                    previous_text,
-                                    current_text,
-                                    delta_text,
-                                    previous_token_ids,
-                                    current_token_ids,
-                                    output_token_ids,
-                                )
-                            )
-                            if reasoning_parser.is_reasoning_end(output_token_ids):
-                                reasoning_end_arr[i] = True
-                                if delta_message and delta_message.content:
-                                    current_text = delta_message.content
-                                    delta_message.content = None
-                                else:
-                                    # reasoning ended
-                                    current_text = ""
-
-                        else:
-                            # either finished reasoning or no reasoning at all
-                            content = current_text
-
-                            delta_message, function_name_returned[i] = (
-                                self.extract_tool_call_required_streaming(
-                                    previous_text=previous_text,
-                                    current_text=content,
-                                    delta_text=delta_text,
-                                    function_name_returned=fn_name_returned,
-                                    tool_call_idx=history_tool_call_cnt,
-                                )
-                            )
-                            if (
-                                delta_message
-                                and delta_message.tool_calls
-                                and delta_message.tool_calls[0].id is not None
-                            ):
-                                history_tool_call_cnt += 1
-                                tools_streamed[i] = True
+                    # Mistral grammar path: combined reasoning + tool streaming
+                    elif is_mistral_grammar_path:
+                        from vllm.tool_parsers.mistral_tool_parser import (
+                            MistralToolParser,
+                        )
 
-                    # handle streaming deltas for tools with "auto" tool choice
-                    # and reasoning parser
-                    elif tool_choice_auto and reasoning_parser:
                         assert tool_parser is not None
-                        assert added_content_delta_arr is not None
+                        assert isinstance(tool_parser, MistralToolParser)
                         assert reasoning_end_arr is not None
                         output_token_ids = as_list(output.token_ids)
-                        if not reasoning_end_arr[i]:
-                            # When encountering think end id in prompt_token_ids
-                            # i.e {"enable_thinking": False},
-                            # set reasoning status to end.
-                            if prompt_is_reasoning_end_arr[i]:
-                                reasoning_end_arr[i] = True
-                                current_token_ids = output_token_ids
-                                # Don't update current_text, keep it as is from delta
-                            else:
-                                delta_message = (
-                                    reasoning_parser.extract_reasoning_streaming(
-                                        previous_text,
-                                        current_text,
-                                        delta_text,
-                                        previous_token_ids,
-                                        current_token_ids,
-                                        output_token_ids,
-                                    )
-                                )
-
-                                # When encountering think end id in delta_token_ids,
-                                # set reasoning status to end.
-                                # Remove the text and token ids related
-                                # to 'reasoning'.
-                                if reasoning_parser.is_reasoning_end(output_token_ids):
-                                    reasoning_end_arr[i] = True
-                                    current_token_ids = (
-                                        reasoning_parser.extract_content_ids(
-                                            output_token_ids
-                                        )
-                                    )
-                                    if delta_message and delta_message.content:
-                                        current_text = delta_message.content
-                                        delta_message.content = None
-                                    else:
-                                        current_text = ""
-
-                        # handle tool calls only after reasoning is done,
-                        if reasoning_end_arr[i]:
-                            delta_token_ids = output_token_ids
-                            # First time to tool call,
-                            # add the remaining text and token ids
-                            # to delta from previous
-                            if not added_content_delta_arr[i]:
-                                added_content_delta_arr[i] = True
-                                previous_text = ""
-                                previous_token_ids = []
-                                delta_text = current_text
-                                delta_token_ids = current_token_ids
-
-                            delta_message = tool_parser.extract_tool_calls_streaming(
-                                previous_text=previous_text,
-                                current_text=current_text,
-                                delta_text=delta_text,
-                                previous_token_ids=previous_token_ids,
-                                current_token_ids=current_token_ids,
-                                delta_token_ids=delta_token_ids,
-                                request=request,
-                            )
-                            if delta_message and delta_message.tool_calls:
-                                tools_streamed[i] = True
-                    # when only tool calls
-                    elif tool_choice_auto:
-                        assert tool_parser is not None
-                        delta_message = tool_parser.extract_tool_calls_streaming(
+                        result = tool_parser.extract_maybe_reasoning_and_tool_streaming(
+                            reasoning_parser=reasoning_parser,
                             previous_text=previous_text,
                             current_text=current_text,
                             delta_text=delta_text,
                             previous_token_ids=previous_token_ids,
                             current_token_ids=current_token_ids,
-                            delta_token_ids=output.token_ids,
+                            output_token_ids=output_token_ids,
+                            reasoning_ended=reasoning_end_arr[i],
+                            prompt_is_reasoning_end=(prompt_is_reasoning_end_arr[i]),
                             request=request,
                         )
-                        if delta_message and delta_message.tool_calls:
+                        delta_message = result.delta_message
+                        reasoning_end_arr[i] = result.reasoning_ended
+                        current_text = result.current_text
+                        current_token_ids = result.current_token_ids
+                        if result.tools_called:
                             tools_streamed[i] = True
 
-                    # when only reasoning
-                    elif reasoning_parser:
-                        # When encountering think end id in prompt_token_ids
-                        # i.e {"enable_thinking": False},
-                        # set reasoning status to end.
-                        # Route all generated tokens as content directly.
-                        if prompt_is_reasoning_end_arr[i]:
-                            delta_message = DeltaMessage(content=delta_text)
-                        else:
-                            delta_message = (
-                                reasoning_parser.extract_reasoning_streaming(
-                                    previous_text,
-                                    current_text,
-                                    delta_text,
-                                    previous_token_ids,
-                                    current_token_ids,
-                                    output.token_ids,
-                                )
-                            )
-                    # handle streaming just a content delta
+                    elif parser is not None:
+                        delta_message = parser.parse_delta(
+                            delta_text=delta_text,
+                            delta_token_ids=as_list(output.token_ids),
+                            request=request,
+                            prompt_token_ids=res.prompt_token_ids,
+                        )
+                        if delta_message and delta_message.tool_calls:
+                            tools_streamed[i] = True
+                    # handle streaming just a content delta (no parsers)
                     else:
                         delta_message = DeltaMessage(content=delta_text)
 
                     # update the previous values for the next iteration
-                    if (tool_choice_auto or reasoning_parser) and not self.use_harmony:
+                    if (
+                        is_mistral_grammar_path
+                        or tool_choice_auto
+                        or tool_choice_function_name
+                        or request.tool_choice == "required"
+                        or reasoning_parser
+                    ) and not self.use_harmony:
                         assert previous_texts is not None
                         assert all_previous_token_ids is not None
                         previous_texts[i] = current_text
@@ -1189,6 +917,16 @@ async def chat_completion_stream_generator(
                         choices=[choice_data],
                         model=model_name,
                     )
+                    # Stamp the fingerprint on terminal chunks only (those with
+                    # finish_reason set). When ``include_usage`` is on, the
+                    # trailing usage chunk below overrides this as the true
+                    # final message.
+                    if (
+                        not include_usage
+                        and self.system_fingerprint is not None
+                        and choice_data.finish_reason is not None
+                    ):
+                        chunk.system_fingerprint = self.system_fingerprint
 
                     # handle usage stats if requested & if continuous
                     if include_continuous_usage:
@@ -1223,6 +961,7 @@ async def chat_completion_stream_generator(
                     choices=[],
                     model=model_name,
                     usage=final_usage,
+                    system_fingerprint=self.system_fingerprint,
                 )
                 final_usage_data = final_usage_chunk.model_dump_json(
                     exclude_unset=True, exclude_none=True
@@ -1275,8 +1014,6 @@ async def chat_completion_full_generator(
         request_metadata: RequestResponseMetadata,
         reasoning_parser: ReasoningParser | None = None,
     ) -> ErrorResponse | ChatCompletionResponse:
-        from vllm.tokenizers.mistral import MistralTokenizer
-
         created_time = int(time.time())
         final_res: RequestOutput | None = None
 
@@ -1352,6 +1089,18 @@ async def chat_completion_full_generator(
                         content=content,
                     )
 
+                # Encode routed_experts for transport. JSON can't carry raw
+                # bytes, so we write the ndarray as a ``.npy`` byte stream
+                # and base64-encode it. ``pybase64`` is ~3x faster than the
+                # stdlib ``base64`` on large payloads thanks to SIMD.
+                routed_experts_b64 = None
+                if output.routed_experts is not None:
+                    buf = io.BytesIO()
+                    np.save(buf, output.routed_experts)
+                    routed_experts_b64 = base64.b64encode(buf.getvalue()).decode(
+                        "ascii"
+                    )
+
                 choice_data = ChatCompletionResponseChoice(
                     index=output.index,
                     message=message,
@@ -1367,6 +1116,7 @@ async def chat_completion_full_generator(
                     token_ids=(
                         as_list(output.token_ids) if request.return_token_ids else None
                     ),
+                    routed_experts=routed_experts_b64,
                 )
                 choices.append(choice_data)
                 continue
@@ -1393,10 +1143,32 @@ async def chat_completion_full_generator(
                 enable_auto_tools=self.enable_auto_tools,
                 tool_parser_cls=self.tool_parser,
             )
-            tool_call_class = (
-                MistralToolCall if is_mistral_tokenizer(tokenizer) else ToolCall
-            )
-            if (not self.enable_auto_tools or not self.tool_parser) and (
+            if is_mistral_tokenizer(tokenizer):
+                from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
+
+                tool_call_class: type[ToolCall] = MistralToolCall
+            else:
+                tool_call_class = ToolCall
+
+            use_mistral_tool_parser = request._grammar_from_tool_parser
+            if use_mistral_tool_parser:
+                from vllm.tool_parsers.mistral_tool_parser import MistralToolParser
+
+                tool_call_items = MistralToolParser.build_non_streaming_tool_calls(
+                    tool_calls
+                )
+                if tool_call_items:
+                    auto_tools_called = (
+                        request.tool_choice is None or request.tool_choice == "auto"
+                    )
+                message = ChatMessage(
+                    role=role,
+                    reasoning=reasoning,
+                    content=content,
+                    tool_calls=tool_call_items,
+                )
+
+            elif (not self.enable_auto_tools or not self.tool_parser) and (
                 not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
                 and request.tool_choice != "required"
             ):
@@ -1406,8 +1178,8 @@ async def chat_completion_full_generator(
                 request.tool_choice
                 and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam
             ):
-                assert tool_calls is not None and len(tool_calls) > 0
                 tool_call_class_items = []
+                tool_calls = tool_calls or []
                 for idx, tc in enumerate(tool_calls):
                     # Use native ID if available (e.g., Kimi K2),
                     # otherwise generate ID with correct id_type
@@ -1419,7 +1191,7 @@ async def chat_completion_full_generator(
                         # Generate ID using the correct format (kimi_k2 or random),
                         # but leave it to the class if it's Mistral to preserve
                         # 9-char IDs
-                        if isinstance(tokenizer, MistralTokenizer):
+                        if is_mistral_tokenizer(tokenizer):
                             tool_call_class_items.append(tool_call_class(function=tc))
                         else:
                             generated_id = make_tool_call_id(
@@ -1452,7 +1224,7 @@ async def chat_completion_full_generator(
                         # Generate ID using the correct format (kimi_k2 or random),
                         # but leave it to the class if it's Mistral to preserve
                         # 9-char IDs
-                        if isinstance(tokenizer, MistralTokenizer):
+                        if is_mistral_tokenizer(tokenizer):
                             tool_call_class_items.append(
                                 tool_call_class(function=tool_call)
                             )
@@ -1502,7 +1274,7 @@ async def chat_completion_full_generator(
                             # Generate ID using the correct format (kimi_k2 or random),
                             # but leave it to the class if it's Mistral to preserve
                             # 9-char IDs
-                            if isinstance(tokenizer, MistralTokenizer):
+                            if is_mistral_tokenizer(tokenizer):
                                 tool_call_items.append(tool_call_class(function=tc))
                             else:
                                 generated_id = make_tool_call_id(
@@ -1553,6 +1325,16 @@ async def chat_completion_full_generator(
                 and output.finish_reason == "stop"
             )
 
+            # Encode routed_experts for transport. JSON can't carry raw
+            # bytes, so we write the ndarray as a ``.npy`` byte stream
+            # and base64-encode it. ``pybase64`` is ~3x faster than the
+            # stdlib ``base64`` on large payloads thanks to SIMD.
+            routed_experts_b64 = None
+            if output.routed_experts is not None:
+                buf = io.BytesIO()
+                np.save(buf, output.routed_experts)
+                routed_experts_b64 = base64.b64encode(buf.getvalue()).decode("ascii")
+
             choice_data = ChatCompletionResponseChoice(
                 index=output.index,
                 message=message,
@@ -1566,6 +1348,7 @@ async def chat_completion_full_generator(
                 token_ids=(
                     as_list(output.token_ids) if request.return_token_ids else None
                 ),
+                routed_experts=routed_experts_b64,
             )
             choice_data = maybe_filter_parallel_tool_calls(choice_data, request)
 
@@ -1605,16 +1388,21 @@ async def chat_completion_full_generator(
 
         request_metadata.final_usage_info = usage
 
+        # ``final_res.prompt`` is the rendered chat-templated prompt text
+        prompt_text = final_res.prompt if request.return_prompt_text else None
+
         response = ChatCompletionResponse(
             id=request_id,
             created=created_time,
             model=model_name,
             choices=choices,
             usage=usage,
+            system_fingerprint=self.system_fingerprint,
             prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),
             prompt_token_ids=(
                 final_res.prompt_token_ids if request.return_token_ids else None
             ),
+            prompt_text=prompt_text,
             kv_transfer_params=final_res.kv_transfer_params,
         )
 
diff --git a/vllm/entrypoints/openai/chat_completion/stream_harmony.py b/vllm/entrypoints/openai/chat_completion/stream_harmony.py
index 87f2f9b92275..271f8e8c85a5 100644
--- a/vllm/entrypoints/openai/chat_completion/stream_harmony.py
+++ b/vllm/entrypoints/openai/chat_completion/stream_harmony.py
@@ -17,6 +17,10 @@
     DeltaMessage,
     DeltaToolCall,
 )
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+)
 
 
 class TokenState(NamedTuple):
@@ -79,16 +83,12 @@ def extract_harmony_streaming_delta(
     # This counts completed tool calls in messages
     base_index = 0
     for msg in harmony_parser.messages:
-        if (
-            (msg.channel == "commentary" or msg.channel == "analysis")
-            and msg.recipient
-            and msg.recipient.startswith("functions.")
-        ):
+        if msg.recipient and is_function_recipient(msg.recipient):
             base_index += 1
 
     # If there's an ongoing tool call from previous chunk,
     # the next new tool call starts at base_index + 1
-    if prev_recipient and prev_recipient.startswith("functions."):
+    if prev_recipient and is_function_recipient(prev_recipient):
         next_tool_index = base_index + 1
         # Ongoing call is at base_index
         ongoing_tool_index = base_index
@@ -101,15 +101,11 @@ def extract_harmony_streaming_delta(
         if group.channel == "final":
             combined_content += group.text
             content_encountered = True
-        elif (
-            (group.channel == "commentary" or group.channel == "analysis")
-            and group.recipient
-            and group.recipient.startswith("functions.")
-        ):
+        elif group.recipient and is_function_recipient(group.recipient):
             opened_new_call = False
             if prev_recipient != group.recipient:
                 # New tool call - emit the opening message
-                tool_name = group.recipient.split("functions.", 1)[1]
+                tool_name = extract_function_from_recipient(group.recipient)
                 tool_messages.append(
                     DeltaToolCall(
                         id=make_tool_call_id(),
diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
index 2bd991b0010e..d130e83422a9 100644
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -153,9 +153,21 @@ class BaseFrontendArgs:
     """If set to True, log the stack trace of error responses"""
     tokens_only: bool = False
     """
-    If set to True, only enable the Tokens In<>Out endpoint. 
+    If set to True, only enable the Tokens In<>Out endpoint.
     This is intended for use in a Disaggregated Everything setup.
     """
+    fingerprint_mode: Literal["full", "hash", "custom", "none"] = "full"
+    """Controls the ``system_fingerprint`` field on responses.
+
+    - ``full`` (default): ``vllm-<version>[-<parallelism>]-<hash8>``. Encodes
+      server version, non-trivial parallelism degrees (tp/pp/dp/ep), and an
+      8-char config hash.
+    - ``hash``: ``vllm-<version>-<hash8>``. Parallelism stripped.
+    - ``custom``: emits the literal string from ``--fingerprint-value``.
+    - ``none``: the field is omitted (serialized as ``null``).
+    """
+    fingerprint_value: str | None = None
+    """Literal fingerprint string used when ``--fingerprint-mode=custom``."""
 
     @classmethod
     def _customize_cli_kwargs(
@@ -217,6 +229,17 @@ class FrontendArgs(BaseFrontendArgs):
     """Host name."""
     port: int = 8000
     """Port number."""
+    data_parallel_supervisor_port: int = 9256
+    """HTTP port for aggregated health endpoints in multi-port external LB
+    mode."""
+    dp_supervisor_probe_interval_s: float = 5.0
+    """Seconds between aggregated health probes in multi-port external LB mode."""
+    dp_supervisor_probe_timeout_s: float = 5.0
+    """Seconds to wait between retries when a child health probe fails with a
+    connection error in multi-port external LB mode."""
+    dp_supervisor_probe_failure_threshold: int = 3
+    """Number of consecutive connection-error retries before a child health
+    probe is declared failed in multi-port external LB mode."""
     uds: str | None = None
     """Unix domain socket path. If set, host and port arguments are ignored."""
     uvicorn_log_level: Literal[
@@ -278,6 +301,9 @@ class FrontendArgs(BaseFrontendArgs):
     Enable offline FastAPI documentation for air-gapped environments.
     Uses vendored static assets bundled with vLLM.
     """
+    enable_flash_late_interaction: bool = True
+    """If set, run pooling score MaxSim on GPU in the API server process.
+    Can significantly improve late-interaction scoring performance."""
 
     @classmethod
     def _customize_cli_kwargs(
@@ -345,6 +371,13 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         "Must be a YAML with the following options: "
         "https://docs.vllm.ai/en/latest/configuration/serve_args.html",
     )
+    parser.add_argument(
+        "--grpc",
+        action="store_true",
+        default=False,
+        help="Launch a gRPC server instead of the HTTP OpenAI-compatible "
+        "server. Requires: pip install vllm[grpc].",
+    )
     parser = FrontendArgs.add_cli_args(parser)
     parser = AsyncEngineArgs.add_cli_args(parser)
 
@@ -365,6 +398,13 @@ def validate_parsed_serve_args(args: argparse.Namespace):
     if args.enable_log_outputs and not args.enable_log_requests:
         raise TypeError("Error: --enable-log-outputs requires --enable-log-requests")
 
+    if args.data_parallel_multi_port_external_lb:
+        from vllm.entrypoints.openai.dp_supervisor import (
+            validate_multi_port_external_lb_args,
+        )
+
+        validate_multi_port_external_lb_args(args)
+
 
 def create_parser_for_docs() -> FlexibleArgumentParser:
     parser_for_docs = FlexibleArgumentParser(
diff --git a/vllm/entrypoints/openai/completion/protocol.py b/vllm/entrypoints/openai/completion/protocol.py
index c785d254084d..cd8d9b89dfcc 100644
--- a/vllm/entrypoints/openai/completion/protocol.py
+++ b/vllm/entrypoints/openai/completion/protocol.py
@@ -79,6 +79,14 @@ class CompletionRequest(OpenAIBaseModel):
     skip_special_tokens: bool = True
     spaces_between_special_tokens: bool = True
     truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_INT64_MAX)] | None = None
+    truncation_side: Literal["left", "right"] | None = Field(
+        default=None,
+        description=(
+            "Which side to truncate from when truncate_prompt_tokens is active. "
+            "'right' keeps the first N tokens. "
+            "'left' keeps the last N tokens."
+        ),
+    )
     allowed_token_ids: list[int] | None = None
     prompt_logprobs: int | None = None
     # --8<-- [end:completion-sampling-params]
@@ -177,6 +185,14 @@ class CompletionRequest(OpenAIBaseModel):
         "can detect such behavior and terminate early, saving time and tokens.",
     )
 
+    thinking_token_budget: int | None = Field(
+        default=None,
+        description=(
+            "Maximum number of tokens allowed for thinking operations "
+            "(reasoning models). -1 = unlimited."
+        ),
+    )
+
     # --8<-- [end:completion-extra-params]
 
     def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
@@ -184,6 +200,7 @@ def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
             max_total_tokens=model_config.max_model_len,
             max_output_tokens=self.max_tokens or 0,
             truncate_prompt_tokens=self.truncate_prompt_tokens,
+            truncation_side=self.truncation_side,
             add_special_tokens=self.add_special_tokens,
             needs_detokenization=bool(self.echo and not self.return_token_ids),
             max_total_tokens_param="max_model_len",
@@ -322,6 +339,7 @@ def to_sampling_params(
             extra_args=extra_args or None,
             skip_clone=True,  # Created fresh per request, safe to skip clone
             repetition_detection=self.repetition_detection,
+            thinking_token_budget=self.thinking_token_budget,
         )
 
     @model_validator(mode="before")
@@ -468,6 +486,16 @@ class CompletionResponseChoice(OpenAIBaseModel):
     token_ids: list[int] | None = None  # For response
     prompt_logprobs: list[dict[int, Logprob] | None] | None = None
     prompt_token_ids: list[int] | None = None  # For prompt
+    # Per-token expert routing decisions, base64-encoded ``.npy`` bytes
+    # (numpy serialization). Shape after decode:
+    #   (num_tokens - 1, num_layers, num_experts_per_tok)  dtype uint8/uint16
+    # ``num_tokens - 1`` because the last sampled token has not been
+    # forwarded yet and therefore has no routing data.
+    # Decode:
+    #   np.load(io.BytesIO(base64.b64decode(s)))
+    # ``None`` if (a) the request was aborted before any forward pass,
+    # or (b) ``enable_return_routed_experts`` is off server-side.
+    routed_experts: str | None = None
 
 
 class CompletionResponse(OpenAIBaseModel):
@@ -512,3 +540,6 @@ class CompletionStreamResponse(OpenAIBaseModel):
     model: str
     choices: list[CompletionResponseStreamChoice]
     usage: UsageInfo | None = Field(default=None)
+    # Set only on the final chunk of a stream to mirror non-streaming responses
+    # without the per-chunk serialization overhead.
+    system_fingerprint: str | None = None
diff --git a/vllm/entrypoints/openai/completion/serving.py b/vllm/entrypoints/openai/completion/serving.py
index fb7f253c7ea3..f393954e2a05 100644
--- a/vllm/entrypoints/openai/completion/serving.py
+++ b/vllm/entrypoints/openai/completion/serving.py
@@ -2,11 +2,14 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import io
 import time
 from collections.abc import AsyncGenerator, AsyncIterator
 from collections.abc import Sequence as GenericSequence
 from typing import TYPE_CHECKING, cast
 
+import numpy as np
+import pybase64 as base64
 from fastapi import Request
 
 from vllm.engine.protocol import EngineClient
@@ -118,6 +121,15 @@ async def create_completion(
             - suffix (the language models we currently support do not support
             suffix)
         """
+        return await self._with_kv_transfer_rejection_cleanup(
+            self._create_completion(request, raw_request), request, raw_request
+        )
+
+    async def _create_completion(
+        self,
+        request: CompletionRequest,
+        raw_request: Request | None = None,
+    ) -> AsyncGenerator[str, None] | CompletionResponse | ErrorResponse:
         if request.stream and request.use_beam_search:
             return self.create_error_response(
                 "Streaming is not currently supported with beam search"
@@ -151,6 +163,7 @@ async def create_completion(
                 self._extract_prompt_len(engine_input),
                 self.default_sampling_params,
                 self.override_max_tokens,
+                truncate_prompt_tokens=request.truncate_prompt_tokens,
             )
 
             sampling_params: SamplingParams | BeamSearchParams
@@ -383,6 +396,7 @@ async def completion_stream_generator(
 
                     chunk = CompletionStreamResponse(
                         id=request_id,
+                        object="text_completion",
                         created=created_time,
                         model=model_name,
                         choices=[
@@ -401,6 +415,14 @@ async def completion_stream_generator(
                             )
                         ],
                     )
+                    # Stamp on terminal chunk only when no trailing usage chunk
+                    # will follow (that one is the true final message).
+                    if (
+                        not include_usage
+                        and self.system_fingerprint is not None
+                        and finish_reason is not None
+                    ):
+                        chunk.system_fingerprint = self.system_fingerprint
                     if include_continuous_usage:
                         prompt_tokens = num_prompt_tokens[prompt_idx]
                         completion_tokens = previous_num_tokens[i]
@@ -410,7 +432,7 @@ async def completion_stream_generator(
                             total_tokens=prompt_tokens + completion_tokens,
                         )
 
-                    response_json = chunk.model_dump_json(exclude_unset=False)
+                    response_json = chunk.model_dump_json(exclude_unset=True)
                     yield f"data: {response_json}\n\n"
 
             total_prompt_tokens = sum(num_prompt_tokens)
@@ -433,6 +455,7 @@ async def completion_stream_generator(
                     model=model_name,
                     choices=[],
                     usage=final_usage_info,
+                    system_fingerprint=self.system_fingerprint,
                 )
                 final_usage_data = final_usage_chunk.model_dump_json(
                     exclude_unset=False, exclude_none=True
@@ -518,6 +541,18 @@ def request_output_to_completion_response(
                 else:
                     logprobs = None
 
+                # Encode routed_experts for transport. JSON can't carry raw
+                # bytes, so we write the ndarray as a ``.npy`` byte stream
+                # and base64-encode it. ``pybase64`` is ~3x faster than the
+                # stdlib ``base64`` on large payloads thanks to SIMD.
+                routed_experts_b64 = None
+                if output.routed_experts is not None:
+                    buf = io.BytesIO()
+                    np.save(buf, output.routed_experts)
+                    routed_experts_b64 = base64.b64encode(buf.getvalue()).decode(
+                        "ascii"
+                    )
+
                 choice_data = CompletionResponseChoice(
                     index=len(choices),
                     text=output_text,
@@ -531,6 +566,7 @@ def request_output_to_completion_response(
                     token_ids=(
                         as_list(output.token_ids) if request.return_token_ids else None
                     ),
+                    routed_experts=routed_experts_b64,
                 )
                 choices.append(choice_data)
 
@@ -562,6 +598,7 @@ def request_output_to_completion_response(
             model=model_name,
             choices=choices,
             usage=usage,
+            system_fingerprint=self.system_fingerprint,
             kv_transfer_params=kv_transfer_params,
         )
 
diff --git a/vllm/entrypoints/openai/dp_supervisor.py b/vllm/entrypoints/openai/dp_supervisor.py
new file mode 100644
index 000000000000..2dff91fa7942
--- /dev/null
+++ b/vllm/entrypoints/openai/dp_supervisor.py
@@ -0,0 +1,493 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import contextlib
+import copy
+import multiprocessing
+import os
+import signal
+import time
+from functools import partial
+from http import HTTPStatus
+from multiprocessing.process import BaseProcess
+
+import aiohttp
+import psutil
+import uvicorn
+import uvloop
+from fastapi import FastAPI, Response
+
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils.system_utils import (
+    decorate_logs,
+    kill_process_tree,
+    set_process_title,
+    update_environment_variables,
+)
+
+logger = init_logger(__name__)
+
+CHILD_EXIT_GRACE_S = 5.0
+
+
+def infer_multi_port_external_lb_start_rank(args: argparse.Namespace) -> int:
+    start_rank = getattr(args, "data_parallel_start_rank", None)
+    if start_rank is not None:
+        return start_rank
+
+    node_rank = getattr(args, "node_rank", 0) or 0
+    local_size = getattr(args, "data_parallel_size_local", 0) or 0
+    return node_rank * local_size
+
+
+def validate_multi_port_external_lb_args(args: argparse.Namespace) -> None:
+    if getattr(args, "grpc", False):
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb does not support --grpc"
+        )
+    if args.uds is not None:
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb does not support --uds"
+        )
+    if any((args.ssl_keyfile, args.ssl_certfile, args.ssl_ca_certs)):
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb does not support HTTPS yet"
+        )
+    if args.api_server_count not in (None, 1):
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb currently requires "
+            "--api-server-count=1"
+        )
+    if args.data_parallel_rank is not None:
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb manages child "
+            "--data-parallel-rank values internally"
+        )
+    if args.data_parallel_external_lb or args.data_parallel_hybrid_lb:
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb cannot be combined with "
+            "--data-parallel-external-lb or --data-parallel-hybrid-lb"
+        )
+    if args.data_parallel_size < 2:
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb requires "
+            "--data-parallel-size > 1"
+        )
+
+    local_size = args.data_parallel_size_local
+    if local_size is None or local_size < 2:
+        raise ValueError(
+            "Error: --data-parallel-multi-port-external-lb requires "
+            "--data-parallel-size-local >= 2"
+        )
+    if local_size > args.data_parallel_size:
+        raise ValueError(
+            "Error: --data-parallel-size-local cannot exceed --data-parallel-size"
+        )
+    if args.data_parallel_size % local_size != 0:
+        raise ValueError(
+            "Error: --data-parallel-size must be divisible by "
+            "--data-parallel-size-local"
+        )
+
+    start_rank = infer_multi_port_external_lb_start_rank(args)
+    if start_rank + local_size > args.data_parallel_size:
+        raise ValueError(
+            "Error: multi-port supervised ranks would exceed --data-parallel-size"
+        )
+
+    supervisor_port = args.data_parallel_supervisor_port
+    child_port_min = args.port
+    child_port_max = args.port + local_size - 1
+    if child_port_min <= supervisor_port <= child_port_max:
+        raise ValueError(
+            f"Error: --data-parallel-supervisor-port {supervisor_port} "
+            f"overlaps with child rank ports {child_port_min}-{child_port_max}"
+        )
+
+
+def _build_vllm_dp_server_args(
+    args: argparse.Namespace, local_rank: int
+) -> argparse.Namespace:
+    child_args = copy.copy(args)
+    child_args.port = args.port + local_rank
+    child_args.data_parallel_rank = (
+        infer_multi_port_external_lb_start_rank(args) + local_rank
+    )
+    child_args.data_parallel_start_rank = None
+    child_args.data_parallel_size_local = 1
+    child_args.data_parallel_external_lb = True
+    child_args.data_parallel_hybrid_lb = False
+    child_args.data_parallel_multi_port_external_lb = False
+    child_args.data_parallel_supervisor_port = None
+    child_args.api_server_count = 1
+    return child_args
+
+
+def _build_vllm_dp_server_env(
+    args: argparse.Namespace, local_rank: int
+) -> dict[str, str]:
+    # set visible devices for the child process
+    devices_per_rank = args.tensor_parallel_size * args.pipeline_parallel_size
+    start = local_rank * devices_per_rank
+    stop = start + devices_per_rank
+    device_env = current_platform.device_control_env_var
+    visible_devices = ",".join(
+        str(current_platform.device_id_to_physical_device_id(idx))
+        for idx in range(start, stop)
+    )
+    return {device_env: visible_devices}
+
+
+def _child_base_url(args: argparse.Namespace, port: int) -> str:
+    host = args.host or "127.0.0.1"
+    if host == "0.0.0.0":
+        host = "127.0.0.1"
+    elif host == "::":
+        host = "::1"
+    return f"http://{host}:{port}"
+
+
+def _join_processes_with_timeout(processes: list[BaseProcess], timeout: float) -> None:
+    deadline = time.monotonic() + timeout
+    for process in processes:
+        remaining = deadline - time.monotonic()
+        if remaining <= 0:
+            break
+        process.join(timeout=remaining)
+
+
+async def _probe_endpoint(
+    session: aiohttp.ClientSession,
+    args: argparse.Namespace,
+    port: int,
+    path: str,
+    conn_err_failure_threshold: int = 3,
+    conn_err_retry_delay: float = 5.0,
+) -> bool:
+    """
+    Probe /health endpoint for 200 status.
+
+    If there is a connection error, retry every N seconds.
+    """
+    for iteration in range(conn_err_failure_threshold):
+        try:
+            async with session.get(_child_base_url(args, port) + path) as response:
+                # vLLM returns 503 on EngineDeadError, so we should return
+                # immediately if vLLM responds with a non-200 status code.
+                return response.status == HTTPStatus.OK
+        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
+            # Allow retry of connection errors.
+            logger.debug(
+                "Probe attempt %d/%d failed on port %d: %r",
+                iteration + 1,
+                conn_err_failure_threshold,
+                port,
+                e,
+            )
+
+        if iteration < conn_err_failure_threshold - 1:
+            await asyncio.sleep(conn_err_retry_delay)
+
+    return False
+
+
+def _build_dp_supervisor_app(supervisor: DPSupervisor) -> FastAPI:
+    app = FastAPI(openapi_url=None, docs_url=None, redoc_url=None)
+    app.state.supervisor = supervisor
+
+    def _status_response(ok: bool) -> Response:
+        return Response(
+            status_code=(HTTPStatus.OK if ok else HTTPStatus.SERVICE_UNAVAILABLE)
+        )
+
+    @app.get("/health", include_in_schema=False)
+    async def health() -> Response:
+        return _status_response(app.state.supervisor.is_ready)
+
+    @app.get("/ready", include_in_schema=False)
+    @app.get("/readyz", include_in_schema=False)
+    async def ready() -> Response:
+        return _status_response(app.state.supervisor.is_ready)
+
+    return app
+
+
+def _run_vllm_dp_server(
+    child_args: argparse.Namespace, env_updates: dict[str, str]
+) -> None:
+    """
+    Entrypoint function for the vLLM DP Server.
+    """
+    from vllm.entrypoints.openai.api_server import run_server
+
+    # Create a fresh process group for the vLLM DP Server,
+    # so that CTRL-C is propagated cleanly.
+    os.setpgrp()
+
+    name = f"APIServer_DP{child_args.data_parallel_rank}"
+    update_environment_variables(env_updates)
+    set_process_title(name)
+    decorate_logs(name)
+    uvloop.run(run_server(child_args))
+
+
+class DPSupervisor:
+    def __init__(self, args: argparse.Namespace):
+        validate_multi_port_external_lb_args(args)
+        self.args = args
+        self.supervisor_port = args.data_parallel_supervisor_port
+        self.child_ports = [
+            args.port + local_rank
+            for local_rank in range(args.data_parallel_size_local)
+        ]
+        self._is_ready = False
+        self._processes: list[BaseProcess] = []
+        self._shutdown_event = asyncio.Event()
+        self._shutdown_signal = signal.SIGTERM
+
+    @property
+    def is_ready(self) -> bool:
+        return self._is_ready and not self._shutdown_event.is_set()
+
+    async def run(self) -> None:
+        loop = asyncio.get_running_loop()
+
+        # K8s sends SIGTERM for shutdown - begin graceful termination.
+        for sig in (signal.SIGTERM, signal.SIGINT):
+            loop.add_signal_handler(sig, partial(self._handle_signal, sig))
+
+        # Launch DPSupervisor Server.
+        app = _build_dp_supervisor_app(self)
+        decorate_logs("DPSupervisor")
+        host = self.args.host or "0.0.0.0"
+        config = uvicorn.Config(
+            app,
+            host=host,
+            port=self.supervisor_port,
+            log_level=self.args.uvicorn_log_level,
+        )
+        supervisor_server = uvicorn.Server(config)
+        supervisor_server_task = asyncio.create_task(
+            supervisor_server.serve(),
+            name="dp-supervisor",
+        )
+        supervisor_server_task.add_done_callback(
+            lambda _task: self._shutdown_event.set()
+        )
+
+        # Ensure DPSupervisor task starts on the event loop.
+        while not supervisor_server.started:
+            if supervisor_server_task.done():
+                supervisor_server_task.result()
+                raise RuntimeError("DPSupervisor exited before startup.")
+            await asyncio.sleep(0.05)
+        logger.info("Started DPSupervisor on %s:%d", host, self.supervisor_port)
+
+        # Launch and Monitor vLLM Server Processes.
+        try:
+            self._start_children()
+            await self._monitor_children()
+        finally:
+            self._is_ready = False
+            await self._shutdown_children()
+
+            # Shutdown the DP Supervisor server.
+            supervisor_server.should_exit = True
+            await supervisor_server_task
+
+    def _handle_signal(self, signum: int) -> None:
+        """
+        Signal handler that is added to the event loop.
+
+        This catches the SIGTERM from K8s and begins graceful shutdown,
+        by setting the _shutdown_event(), which is watched by the main
+        coroutine monitoring the vLLM DP Servers.
+        """
+
+        if self._shutdown_event.is_set():
+            return
+
+        self._shutdown_signal = signal.Signals(signum)
+        logger.info(
+            "DPSupervisor received %s, shutting down.",
+            self._shutdown_signal.name,
+        )
+
+        self._shutdown_event.set()
+        self._is_ready = False
+
+    def _start_children(self) -> None:
+        """
+        Launch vLLM DP Servers on separate GPUs.
+        """
+        logger.info("Launching vLLM DP Servers")
+        context = multiprocessing.get_context("spawn")
+        for local_rank in range(self.args.data_parallel_size_local):
+            child_args = _build_vllm_dp_server_args(self.args, local_rank)
+            child_env = _build_vllm_dp_server_env(self.args, local_rank)
+            process = context.Process(
+                target=_run_vllm_dp_server,
+                name=f"APIServer_DPRank_{child_args.data_parallel_rank}",
+                args=(child_args, child_env),
+            )
+            process.start()
+            self._processes.append(process)
+
+    async def _probe_all_children(self) -> None:
+        """
+        Background coroutine: probes all child endpoints on each interval.
+
+        Exits when any server becomes unhealthy after being ready, signalling
+        _monitor_children to initiate shutdown.
+        """
+        timeout = aiohttp.ClientTimeout(total=self.args.dp_supervisor_probe_timeout_s)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            while not self._shutdown_event.is_set():
+                threshold = (
+                    self.args.dp_supervisor_probe_failure_threshold
+                    if self._is_ready
+                    else 1
+                )
+                results = await asyncio.gather(
+                    *(
+                        _probe_endpoint(
+                            session,
+                            self.args,
+                            port,
+                            "/health",
+                            conn_err_failure_threshold=threshold,
+                            conn_err_retry_delay=self.args.dp_supervisor_probe_interval_s,
+                        )
+                        for port in self.child_ports
+                    ),
+                    return_exceptions=True,
+                )
+                all_healthy = all(r is True for r in results)
+
+                if all_healthy:
+                    # If all healthy, we are ready to receive requests.
+                    # This conditional avoids a potential race condition
+                    # where shutdown is set, THEN the probe returns true.
+                    if not self._shutdown_event.is_set():
+                        self._is_ready = True
+                elif self._is_ready:
+                    # Once ready, any failure in the probe means vLLM is dead.
+                    num_unhealthy = sum(1 for r in results if r is not True)
+                    logger.info(
+                        "DPSupervisor probe found %s unhealthy DP Servers.",
+                        num_unhealthy,
+                    )
+                    self._is_ready = False
+                    self._shutdown_event.set()
+                    return
+
+                with contextlib.suppress(asyncio.TimeoutError):
+                    logger.debug(
+                        "Waiting for %s seconds before next probe",
+                        self.args.dp_supervisor_probe_interval_s,
+                    )
+                    await asyncio.wait_for(
+                        self._shutdown_event.wait(),
+                        timeout=self.args.dp_supervisor_probe_interval_s,
+                    )
+
+    async def _monitor_children(self) -> None:
+        """
+        Main coroutine task that monitors the children vLLM servers.
+
+        Before the vLLM servers are /ready:
+        - if the pid is dead, we will shut down
+        - if the probe fails, we try again after dp_supervisor_probe_interval_s
+
+        After the vLLM servers are /ready:
+        - if the pid is dead, we will shut down
+        - if the probe fails, we will shut down
+        """
+        probe_task = asyncio.create_task(
+            self._probe_all_children(), name="dp-health-probe"
+        )
+
+        try:
+            while not self._shutdown_event.is_set():
+                # 1. Check for dead processes
+                n_failed = len([p for p in self._processes if not p.is_alive()])
+                if n_failed > 0:
+                    logger.info("DPSupervisor found %s exited DP Servers.", n_failed)
+                    break
+
+                # 2. Check if the probe background task crashed or failed.
+                if probe_task.done():
+                    # Extract exception if it crashed, or log failure
+                    exc = probe_task.exception() if not probe_task.cancelled() else None
+                    logger.info("DPSupervisor probe task stopped. Exception: %s", exc)
+                    break
+
+                # Sleep for probe_interval seconds or until a shutdown.
+                with contextlib.suppress(asyncio.TimeoutError):
+                    logger.debug(
+                        "Waiting for %s seconds before next monitor",
+                        self.args.dp_supervisor_probe_interval_s,
+                    )
+                    await asyncio.wait_for(
+                        self._shutdown_event.wait(),
+                        timeout=self.args.dp_supervisor_probe_interval_s,
+                    )
+
+        finally:
+            # Cleanup probe task if needed.
+            if not probe_task.done():
+                probe_task.cancel()
+                with contextlib.suppress(asyncio.CancelledError):
+                    await probe_task
+
+    async def _shutdown_children(self) -> None:
+        """Terminate the vLLM DP servers."""
+        timeout = self.args.shutdown_timeout + CHILD_EXIT_GRACE_S
+
+        try:
+            logger.info(
+                "DPSupervisor forwarding %s to DP Servers.",
+                self._shutdown_signal.name,
+            )
+            for process in self._processes:
+                pid = process.pid
+                if not process.is_alive() or pid is None:
+                    continue
+                with contextlib.suppress(ProcessLookupError, OSError):
+                    os.kill(pid, self._shutdown_signal)
+
+            try:
+                await asyncio.to_thread(
+                    _join_processes_with_timeout, self._processes, timeout
+                )
+            except asyncio.CancelledError:
+                logger.warning("Shutdown await cancelled")
+                raise
+        finally:
+            for process in self._processes:
+                pid = process.pid
+                if not process.is_alive() or pid is None:
+                    continue
+                logger.warning(
+                    "DP server %s did not exit within %.1fs; force killing.",
+                    process.name,
+                    timeout,
+                )
+                with contextlib.suppress(
+                    ProcessLookupError,
+                    OSError,
+                    psutil.NoSuchProcess,
+                    psutil.AccessDenied,
+                ):
+                    kill_process_tree(pid)
+
+
+def run_dp_supervisor(args: argparse.Namespace) -> None:
+    uvloop.run(DPSupervisor(args).run())
diff --git a/vllm/entrypoints/openai/engine/protocol.py b/vllm/entrypoints/openai/engine/protocol.py
index 8f6cdb3e6241..890af0300efc 100644
--- a/vllm/entrypoints/openai/engine/protocol.py
+++ b/vllm/entrypoints/openai/engine/protocol.py
@@ -12,6 +12,7 @@
     BaseModel,
     ConfigDict,
     Field,
+    model_serializer,
     model_validator,
 )
 
@@ -166,6 +167,14 @@ class FunctionDefinition(OpenAIBaseModel):
     name: str
     description: str | None = None
     parameters: dict[str, Any] | None = None
+    defer_loading: bool | None = None
+
+    @model_serializer(mode="wrap")
+    def _serialize(self, handler):
+        data = handler(self)
+        if self.defer_loading is None:
+            data.pop("defer_loading", None)
+        return data
 
 
 # extra="forbid" is a workaround to have kwargs as a field,
diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py
index cab5a536c2f0..ff67575fcc6c 100644
--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -1,29 +1,23 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import asyncio
 import contextlib
 import json
 import time
-from collections.abc import AsyncGenerator, Mapping
+from collections.abc import Awaitable, Mapping
 from dataclasses import dataclass, field
 from http import HTTPStatus
 from typing import Any, ClassVar, Generic, Protocol, TypeAlias, TypeVar
 
-import numpy as np
 from fastapi import Request
-from openai.types.responses import (
-    ToolChoiceFunction,
-)
+from openai.types.responses import ToolChoiceFunction
 from pydantic import ConfigDict, TypeAdapter, ValidationError
 from starlette.datastructures import Headers
 
 import vllm.envs as envs
-from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function
 from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
-from vllm.entrypoints.chat_utils import (
-    ChatTemplateContentFormatOption,
-)
+from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
+from vllm.entrypoints.generate.beam_search.online import BeamSearchOnlineMixin
 from vllm.entrypoints.logger import RequestLogger
 from vllm.entrypoints.openai.chat_completion.protocol import (
     BatchChatCompletionRequest,
@@ -42,28 +36,7 @@
     GenerationError,
 )
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.openai.responses.protocol import (
-    ResponsesRequest,
-)
-from vllm.entrypoints.openai.speech_to_text.protocol import (
-    TranscriptionRequest,
-    TranscriptionResponse,
-    TranslationRequest,
-)
-from vllm.entrypoints.pooling.pooling.protocol import (
-    IOProcessorRequest,
-    PoolingChatRequest,
-    PoolingCompletionRequest,
-    PoolingResponse,
-)
-from vllm.entrypoints.pooling.score.protocol import (
-    RerankRequest,
-    ScoreDataRequest,
-    ScoreQueriesDocumentsRequest,
-    ScoreRequest,
-    ScoreResponse,
-    ScoreTextRequest,
-)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.entrypoints.serve.disagg.protocol import GenerateRequest, GenerateResponse
 from vllm.entrypoints.serve.tokenize.protocol import (
     DetokenizeRequest,
@@ -71,14 +44,16 @@
     TokenizeCompletionRequest,
     TokenizeResponse,
 )
+from vllm.entrypoints.speech_to_text.transcription.protocol import (
+    TranscriptionRequest,
+    TranscriptionResponse,
+)
+from vllm.entrypoints.speech_to_text.translation.protocol import TranslationRequest
 from vllm.entrypoints.utils import create_error_response
-from vllm.exceptions import VLLMValidationError
-from vllm.inputs import EngineInput, PromptType, TokensPrompt
+from vllm.inputs import EngineInput, PromptType
 from vllm.logger import init_logger
 from vllm.logprobs import Logprob, PromptLogprobs
 from vllm.lora.request import LoRARequest
-from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
-from vllm.pooling_params import PoolingParams
 from vllm.renderers import ChatParams, TokenizeParams
 from vllm.renderers.inputs.preprocess import (
     extract_prompt_components,
@@ -93,10 +68,7 @@
     log_tracing_disabled_warning,
 )
 from vllm.utils import random_uuid
-from vllm.utils.async_utils import (
-    collect_from_async_generator,
-    merge_async_iterators,
-)
+from vllm.utils.mistral import is_mistral_tool_parser
 
 logger = init_logger(__name__)
 
@@ -116,19 +88,11 @@ def build_chat_params(
 
 
 CompletionLikeRequest: TypeAlias = (
-    CompletionRequest
-    | TokenizeCompletionRequest
-    | DetokenizeRequest
-    | RerankRequest
-    | ScoreRequest
-    | PoolingCompletionRequest
+    CompletionRequest | TokenizeCompletionRequest | DetokenizeRequest
 )
 
 ChatLikeRequest: TypeAlias = (
-    ChatCompletionRequest
-    | BatchChatCompletionRequest
-    | TokenizeChatRequest
-    | PoolingChatRequest
+    ChatCompletionRequest | BatchChatCompletionRequest | TokenizeChatRequest
 )
 
 SpeechToTextRequest: TypeAlias = TranscriptionRequest | TranslationRequest
@@ -138,7 +102,6 @@ def build_chat_params(
     | ChatLikeRequest
     | SpeechToTextRequest
     | ResponsesRequest
-    | IOProcessorRequest
     | GenerateRequest
 )
 
@@ -147,12 +110,11 @@ def build_chat_params(
     | ChatCompletionResponse
     | TranscriptionResponse
     | TokenizeResponse
-    | PoolingResponse
-    | ScoreResponse
     | GenerateResponse
 )
 
 RequestT = TypeVar("RequestT", bound=AnyRequest)
+_T = TypeVar("_T")
 
 
 @dataclass(kw_only=True)
@@ -164,16 +126,10 @@ class ServeContext(Generic[RequestT]):
     created_time: int = field(default_factory=lambda: int(time.time()))
     lora_request: LoRARequest | None = None
     engine_inputs: list[EngineInput] | None = None
-
-    result_generator: AsyncGenerator[tuple[int, PoolingRequestOutput], None] | None = (
-        None
-    )
-    final_res_batch: list[PoolingRequestOutput] = field(default_factory=list)
-
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
-class OpenAIServing:
+class OpenAIServing(BeamSearchOnlineMixin):
     request_id_prefix: ClassVar[str] = """
     A short string prepended to every request’s ID.
     """
@@ -189,7 +145,6 @@ def __init__(
         super().__init__()
 
         self.engine_client = engine_client
-
         self.models = models
 
         self.request_logger = request_logger
@@ -197,356 +152,23 @@ def __init__(
 
         self.model_config = engine_client.model_config
         self.renderer = engine_client.renderer
-        self.io_processor = engine_client.io_processor
         self.input_processor = engine_client.input_processor
+        vllm_config = getattr(engine_client, "vllm_config", None)
+        kv_transfer_config = getattr(vllm_config, "kv_transfer_config", None)
+        self.has_kv_connector = kv_transfer_config is not None
 
-    async def beam_search(
-        self,
-        prompt: EngineInput,
-        request_id: str,
-        params: BeamSearchParams,
-        lora_request: LoRARequest | None = None,
-        trace_headers: Mapping[str, str] | None = None,
-    ) -> AsyncGenerator[RequestOutput, None]:
-        beam_width = params.beam_width
-        max_tokens = params.max_tokens
-        ignore_eos = params.ignore_eos
-        temperature = params.temperature
-        length_penalty = params.length_penalty
-        include_stop_str_in_output = params.include_stop_str_in_output
-
-        tokenizer = self.renderer.get_tokenizer()
-        eos_token_id = tokenizer.eos_token_id
-        sort_beams_key = create_sort_beams_key_function(eos_token_id, length_penalty)
-
-        if prompt["type"] == "embeds":
-            raise NotImplementedError("Embedding prompt not supported for beam search")
-
-        # Extract prompt tokens and text based on model type
-        decoder_prompt = (
-            prompt if prompt["type"] != "enc_dec" else prompt["decoder_prompt"]
-        )
-        prompt_text = decoder_prompt.get("prompt")
-        prompt_token_ids = decoder_prompt["prompt_token_ids"]
-
-        tokenized_length = len(prompt_token_ids)
-
-        logprobs_num = 2 * beam_width
-        sampling_params = SamplingParams(
-            logprobs=logprobs_num,
-            max_tokens=1,
-            temperature=temperature,
-        )
-        all_beams = [
-            BeamSearchSequence(
-                orig_prompt=prompt,
-                tokens=prompt_token_ids,
-                cum_logprob=0,
-                logprobs=[],
-                lora_request=lora_request,
-            )
-        ]
-        completed = []
-
-        for _ in range(max_tokens):
-            tasks = []
-            request_id_batch = f"{request_id}-{random_uuid()}"
-
-            for i, beam in enumerate(all_beams):
-                prompt_item = beam.get_prompt()
-                lora_request_item = beam.lora_request
-                request_id_item = f"{request_id_batch}-beam-{i}"
-                task = asyncio.create_task(
-                    collect_from_async_generator(
-                        self.engine_client.generate(
-                            prompt_item,
-                            sampling_params,
-                            request_id_item,
-                            lora_request=lora_request_item,
-                            trace_headers=trace_headers,
-                        )
-                    )
-                )
-                tasks.append(task)
-
-            output = [x[0] for x in await asyncio.gather(*tasks)]
-
-            new_beams = []
-            # Store all new tokens generated by beam
-            all_beams_token_id = []
-            # Store the cumulative probability of all tokens
-            # generated by beam search
-            all_beams_logprob = []
-            # Iterate through all beam inference results
-            for i, result in enumerate(output):
-                current_beam = all_beams[i]
-
-                # check for error finish reason and abort beam search
-                if result.outputs[0].finish_reason == "error":
-                    # yield error output and terminate beam search
-                    yield RequestOutput(
-                        request_id=request_id,
-                        prompt=prompt_text,
-                        outputs=[
-                            CompletionOutput(
-                                index=0,
-                                text="",
-                                token_ids=[],
-                                cumulative_logprob=None,
-                                logprobs=None,
-                                finish_reason="error",
-                            )
-                        ],
-                        finished=True,
-                        prompt_token_ids=prompt_token_ids,
-                        prompt_logprobs=None,
-                    )
-                    return
-
-                if result.outputs[0].logprobs is not None:
-                    logprobs = result.outputs[0].logprobs[0]
-                    all_beams_token_id.extend(list(logprobs.keys()))
-                    all_beams_logprob.extend(
-                        [
-                            current_beam.cum_logprob + obj.logprob
-                            for obj in logprobs.values()
-                        ]
-                    )
-
-            # Handle the token for the end of sentence (EOS)
-            all_beams_token_id = np.array(all_beams_token_id)
-            all_beams_logprob = np.array(all_beams_logprob)
-
-            if not ignore_eos:
-                # Get the index position of eos token in all generated results
-                eos_idx = np.where(all_beams_token_id == eos_token_id)[0]
-                for idx in eos_idx:
-                    current_beam = all_beams[idx // logprobs_num]
-                    result = output[idx // logprobs_num]
-                    assert result.outputs[0].logprobs is not None
-                    logprobs_entry = result.outputs[0].logprobs[0]
-                    completed.append(
-                        BeamSearchSequence(
-                            orig_prompt=prompt,
-                            tokens=current_beam.tokens + [eos_token_id]
-                            if include_stop_str_in_output
-                            else current_beam.tokens,
-                            logprobs=current_beam.logprobs + [logprobs_entry],
-                            cum_logprob=float(all_beams_logprob[idx]),
-                            finish_reason="stop",
-                            stop_reason=eos_token_id,
-                        )
-                    )
-                # After processing, set the log probability of the eos condition
-                # to negative infinity.
-                all_beams_logprob[eos_idx] = -np.inf
-
-            # Processing non-EOS tokens
-            # Get indices of the top beam_width probabilities
-            topn_idx = np.argpartition(np.negative(all_beams_logprob), beam_width)[
-                :beam_width
-            ]
-
-            for idx in topn_idx:
-                current_beam = all_beams[idx // logprobs_num]
-                result = output[idx // logprobs_num]
-                token_id = int(all_beams_token_id[idx])
-                assert result.outputs[0].logprobs is not None
-                logprobs_entry = result.outputs[0].logprobs[0]
-                new_beams.append(
-                    BeamSearchSequence(
-                        orig_prompt=prompt,
-                        tokens=current_beam.tokens + [token_id],
-                        logprobs=current_beam.logprobs + [logprobs_entry],
-                        lora_request=current_beam.lora_request,
-                        cum_logprob=float(all_beams_logprob[idx]),
-                    )
-                )
-
-            all_beams = new_beams
-
-        completed.extend(all_beams)
-        sorted_completed = sorted(completed, key=sort_beams_key, reverse=True)
-        best_beams = sorted_completed[:beam_width]
-
-        for beam in best_beams:
-            if beam.tokens[-1] == eos_token_id and not ignore_eos:
-                # Skip the eos token in the text.
-                tokens = beam.tokens[tokenized_length:-1]
-            else:
-                tokens = beam.tokens[tokenized_length:]
-            beam.text = tokenizer.decode(tokens)
-
-        yield RequestOutput(
-            request_id=request_id,
-            prompt=prompt_text,
-            outputs=[
-                CompletionOutput(
-                    text=beam.text,  # type: ignore
-                    cumulative_logprob=beam.cum_logprob,
-                    token_ids=beam.tokens[tokenized_length:],
-                    index=i,
-                    logprobs=beam.logprobs,
-                    finish_reason=beam.finish_reason
-                    if beam.finish_reason is not None
-                    else "length",
-                    stop_reason=beam.stop_reason,
-                )
-                for (i, beam) in enumerate(best_beams)
-            ],
-            finished=True,
-            prompt_token_ids=prompt_token_ids,
-            prompt_logprobs=None,
-        )
-
-    async def _preprocess(
-        self,
-        ctx: ServeContext,
-    ) -> ErrorResponse | None:
-        """
-        Default preprocessing hook. Subclasses may override to prepare `ctx`.
-        """
-        return None
-
-    def _build_response(
-        self,
-        ctx: ServeContext,
-    ) -> AnyResponse | ErrorResponse:
-        """
-        Default response builder. Subclass may override this method
-        to return the appropriate response object.
-        """
-        return self.create_error_response("unimplemented endpoint")
-
-    async def handle(
-        self,
-        ctx: ServeContext,
-    ) -> AnyResponse | ErrorResponse:
-        async for response in self._pipeline(ctx):
-            return response
-
-        return self.create_error_response("No response yielded from pipeline")
-
-    async def _pipeline(
-        self,
-        ctx: ServeContext,
-    ) -> AsyncGenerator[AnyResponse | ErrorResponse, None]:
-        """Execute the request processing pipeline yielding responses."""
-        if error := await self._check_model(ctx.request):
-            yield error
-        if error := self._validate_request(ctx):
-            yield error
-
-        preprocess_ret = await self._preprocess(ctx)
-        if isinstance(preprocess_ret, ErrorResponse):
-            yield preprocess_ret
-
-        generators_ret = await self._prepare_generators(ctx)
-        if isinstance(generators_ret, ErrorResponse):
-            yield generators_ret
+        # Computed once at startup (cached by ``vllm_config`` identity) and
+        # stamped on non-streaming responses. Streaming chunks deliberately
+        # omit it to avoid per-chunk overhead.
+        from vllm.entrypoints.openai.fingerprint import get_system_fingerprint
 
-        collect_ret = await self._collect_batch(ctx)
-        if isinstance(collect_ret, ErrorResponse):
-            yield collect_ret
-
-        yield self._build_response(ctx)
-
-    def _validate_request(self, ctx: ServeContext) -> ErrorResponse | None:
-        truncate_prompt_tokens = getattr(ctx.request, "truncate_prompt_tokens", None)
-
-        if (
-            truncate_prompt_tokens is not None
-            and truncate_prompt_tokens > self.model_config.max_model_len
-        ):
-            return self.create_error_response(
-                "truncate_prompt_tokens value is "
-                "greater than max_model_len."
-                " Please request a smaller truncation size."
-            )
-        return None
-
-    def _create_pooling_params(
-        self,
-        ctx: ServeContext,
-    ) -> PoolingParams | ErrorResponse:
-        if not hasattr(ctx.request, "to_pooling_params"):
-            return self.create_error_response(
-                "Request type does not support pooling parameters"
-            )
-
-        return ctx.request.to_pooling_params()
-
-    async def _prepare_generators(
-        self,
-        ctx: ServeContext,
-    ) -> ErrorResponse | None:
-        """Schedule the request and get the result generator."""
-        generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-
-        trace_headers = (
-            None
-            if ctx.raw_request is None
-            else await self._get_trace_headers(ctx.raw_request.headers)
-        )
-
-        pooling_params = self._create_pooling_params(ctx)
-        if isinstance(pooling_params, ErrorResponse):
-            return pooling_params
-
-        if ctx.engine_inputs is None:
-            return self.create_error_response("Engine prompts not available")
-
-        for i, engine_input in enumerate(ctx.engine_inputs):
-            request_id_item = f"{ctx.request_id}-{i}"
-
-            self._log_inputs(
-                request_id_item,
-                engine_input,
-                params=pooling_params,
-                lora_request=ctx.lora_request,
-            )
-
-            generator = self.engine_client.encode(
-                engine_input,
-                pooling_params,
-                request_id_item,
-                lora_request=ctx.lora_request,
-                trace_headers=trace_headers,
-                priority=getattr(ctx.request, "priority", 0),
-            )
-
-            generators.append(generator)
-
-        ctx.result_generator = merge_async_iterators(*generators)
-
-        return None
-
-    async def _collect_batch(
-        self,
-        ctx: ServeContext,
-    ) -> ErrorResponse | None:
-        """Collect batch results from the result generator."""
-        if ctx.engine_inputs is None:
-            return self.create_error_response("Engine prompts not available")
-
-        num_prompts = len(ctx.engine_inputs)
-        final_res_batch: list[PoolingRequestOutput | None]
-        final_res_batch = [None] * num_prompts
-
-        if ctx.result_generator is None:
-            return self.create_error_response("Result generator not available")
-
-        async for i, res in ctx.result_generator:
-            final_res_batch[i] = res
-
-        if None in final_res_batch:
-            return self.create_error_response(
-                "Failed to generate results for all prompts"
+        try:
+            self.system_fingerprint: str | None = get_system_fingerprint(
+                engine_client.vllm_config
             )
-
-        ctx.final_res_batch = [res for res in final_res_batch if res is not None]
-
-        return None
+        except Exception:
+            # Never fail server startup over the fingerprint.
+            self.system_fingerprint = None
 
     @staticmethod
     def create_error_response(
@@ -692,88 +314,6 @@ def _get_message_types(self, request: AnyRequest) -> set[str]:
                         message_types.add(content_dict["type"].split("_")[0])
         return message_types
 
-    def _validate_input(
-        self,
-        request: object,
-        input_ids: list[int],
-        input_text: str,
-    ) -> TokensPrompt:
-        token_num = len(input_ids)
-        max_model_len = self.model_config.max_model_len
-
-        # Note: ScoreRequest doesn't have max_tokens
-        if isinstance(
-            request,
-            (
-                ScoreDataRequest,
-                ScoreTextRequest,
-                ScoreQueriesDocumentsRequest,
-                RerankRequest,
-            ),
-        ):
-            # Note: input length can be up to the entire model context length
-            # since these requests don't generate tokens.
-            if token_num > max_model_len:
-                operations: dict[type[AnyRequest], str] = {
-                    ScoreDataRequest: "score",
-                    ScoreTextRequest: "score",
-                    ScoreQueriesDocumentsRequest: "score",
-                }
-                operation = operations.get(type(request), "embedding generation")
-                raise VLLMValidationError(
-                    f"This model's maximum context length is "
-                    f"{max_model_len} tokens. However, you requested "
-                    f"{token_num} tokens in the input for {operation}. "
-                    f"Please reduce the length of the input prompt.",
-                    parameter="input_tokens",
-                    value=token_num,
-                )
-            return TokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
-
-        # Note: TokenizeRequest and DetokenizeRequest doesn't have max_tokens
-        # and does not require model context length validation
-        if isinstance(
-            request,
-            (TokenizeCompletionRequest, TokenizeChatRequest, DetokenizeRequest),
-        ):
-            return TokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
-
-        # chat completion endpoint supports max_completion_tokens
-        if isinstance(request, ChatCompletionRequest):
-            # TODO(#9845): remove max_tokens when field dropped from OpenAI API
-            max_tokens = request.max_completion_tokens or request.max_tokens
-        else:
-            max_tokens = getattr(request, "max_tokens", None)
-
-        # Note: input length can be up to model context length - 1 for
-        # completion-like requests.
-        if token_num >= max_model_len:
-            raise VLLMValidationError(
-                f"This model's maximum context length is "
-                f"{max_model_len} tokens. However, your request has "
-                f"{token_num} input tokens. Please reduce the length of "
-                "the input messages.",
-                parameter="input_tokens",
-                value=token_num,
-            )
-
-        if max_tokens is not None and token_num + max_tokens > max_model_len:
-            raise VLLMValidationError(
-                f"This model's maximum context length is "
-                f"{max_model_len} tokens. However, you requested "
-                f"{max_tokens} output tokens and your prompt contains "
-                f"{token_num} input tokens, for a total of "
-                f"{token_num + max_tokens} tokens "
-                f"({token_num} + {max_tokens} = "
-                f"{token_num + max_tokens} > {max_model_len}). "
-                f"Please reduce the length of the input prompt or the "
-                f"number of requested output tokens.",
-                parameter="max_tokens",
-                value=max_tokens,
-            )
-
-        return TokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
-
     def _validate_chat_template(
         self,
         request_chat_template: str | None,
@@ -819,7 +359,7 @@ def _log_inputs(
         self,
         request_id: str,
         inputs: PromptType | EngineInput,
-        params: SamplingParams | PoolingParams | BeamSearchParams | None,
+        params: SamplingParams | BeamSearchParams | None,
         lora_request: LoRARequest | None,
     ) -> None:
         if self.request_logger is None:
@@ -877,6 +417,40 @@ def _get_data_parallel_rank(raw_request: Request | None) -> int | None:
         except ValueError:
             return None
 
+    async def _with_kv_transfer_rejection_cleanup(
+        self,
+        awaitable: Awaitable[_T],
+        request: ChatCompletionRequest | CompletionRequest | ResponsesRequest,
+        raw_request: Request | None,
+    ) -> _T:
+        """Wrap a `create_*` coroutine so that, if it raises or returns an
+        ErrorResponse (i.e. the request never reached the engine), the KV
+        connector is notified to free any pinned remote-prefill blocks."""
+        kv_transfer_params = self.has_kv_connector and request.kv_transfer_params
+        if not kv_transfer_params or not kv_transfer_params.get("do_remote_prefill"):
+            return await awaitable
+
+        notify = True
+        try:
+            result = await awaitable
+            if not isinstance(result, ErrorResponse):
+                notify = False
+            return result
+        finally:
+            if notify:
+                try:
+                    await self.engine_client.notify_kv_transfer_request_rejected(
+                        request.request_id,
+                        kv_transfer_params,
+                        data_parallel_rank=self._get_data_parallel_rank(raw_request),
+                    )
+                except Exception:
+                    logger.warning(
+                        "Failed to notify KV connector about rejected request %s",
+                        request.request_id,
+                        exc_info=True,
+                    )
+
     @staticmethod
     def _parse_tool_calls_from_content(
         request: ResponsesRequest | ChatCompletionRequest,
@@ -885,24 +459,46 @@ def _parse_tool_calls_from_content(
         tool_parser_cls: type[ToolParser] | None,
         content: str | None = None,
     ) -> tuple[list[FunctionCall] | None, str | None]:
+        # When the Mistral grammar factory injected structured outputs,
+        # let the parser handle the output.
+        use_mistral_tool_parser = (
+            isinstance(request, ChatCompletionRequest)
+            and is_mistral_tool_parser(tool_parser_cls)
+            and request._grammar_from_tool_parser
+        )
+
         function_calls = list[FunctionCall]()
-        if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):
-            assert content is not None
-            # Forced Function Call
+        if (
+            not use_mistral_tool_parser
+            and request.tool_choice
+            and isinstance(request.tool_choice, ToolChoiceFunction)
+        ):
+            # Forced Function Call (Responses API)
+            if content is None:
+                return [], None
             function_calls.append(
                 FunctionCall(name=request.tool_choice.name, arguments=content)
             )
             content = None  # Clear content since tool is called.
-        elif request.tool_choice and isinstance(
-            request.tool_choice, ChatCompletionNamedToolChoiceParam
+        elif (
+            not use_mistral_tool_parser
+            and request.tool_choice
+            and isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
+            and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
         ):
-            assert content is not None
-            # Forced Function Call
+            # Named function with standard JSON-based parsing
+            if content is None:
+                return [], None
             function_calls.append(
                 FunctionCall(name=request.tool_choice.function.name, arguments=content)
             )
             content = None  # Clear content since tool is called.
-        elif request.tool_choice == "required":
+        elif (
+            not use_mistral_tool_parser
+            and request.tool_choice == "required"
+            and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
+        ):
+            # "required" with standard JSON-based parsing
             tool_calls = []
             with contextlib.suppress(ValidationError):
                 content = content or ""
@@ -917,17 +513,34 @@ def _parse_tool_calls_from_content(
                     )
                 )
             content = None  # Clear content since tool is called.
-        elif (
-            tool_parser_cls
-            and enable_auto_tools
-            and (request.tool_choice == "auto" or request.tool_choice is None)
+        elif tool_parser_cls and (
+            use_mistral_tool_parser
+            or (
+                enable_auto_tools
+                and (
+                    request.tool_choice == "auto"
+                    or request.tool_choice is None
+                    or (
+                        not tool_parser_cls.supports_required_and_named
+                        and request.tools
+                        and (
+                            request.tool_choice == "required"
+                            or isinstance(
+                                request.tool_choice,
+                                ChatCompletionNamedToolChoiceParam,
+                            )
+                        )
+                    )
+                )
+            )
         ):
+            # Automatic Tool Call Parsing (also used as fallback for
+            # required/named when supports_required_and_named=False)
             if tokenizer is None:
                 raise ValueError(
                     "Tokenizer not available when `skip_tokenizer_init=True`"
                 )
 
-            # Automatic Tool Call Parsing
             try:
                 tool_parser = tool_parser_cls(tokenizer, request.tools)
             except RuntimeError as e:
@@ -979,6 +592,8 @@ def _get_decoded_token(
     def _is_model_supported(self, model_name: str | None) -> bool:
         if not model_name:
             return True
+        if envs.VLLM_SKIP_MODEL_NAME_VALIDATION:
+            return True
         return self.models.is_base_model(model_name)
 
 
diff --git a/vllm/entrypoints/openai/fingerprint.py b/vllm/entrypoints/openai/fingerprint.py
new file mode 100644
index 000000000000..e858e667d06c
--- /dev/null
+++ b/vllm/entrypoints/openai/fingerprint.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Build the ``system_fingerprint`` string returned by the OpenAI-compatible
+server.
+
+Four modes, configured via ``--fingerprint-mode``:
+
+* ``full`` (default): ``vllm-<version>[-<parallelism>]-<hash8>`` — encodes
+  server version, any non-trivial parallelism degree (tp/pp/dp/ep), and an
+  8-char prefix of ``vllm_config.compute_hash()`` (covers model identity,
+  quant config, speculative, attention backend, etc.).
+* ``hash``: ``vllm-<version>-<hash8>`` — parallelism stripped.
+* ``custom``: user-provided literal via ``--fingerprint-value``.
+* ``none``: the field is omitted (serialized as ``null``).
+
+``get_system_fingerprint`` is only called at serving-class init (a handful
+of times per server); each subclass caches the returned string on
+``self.system_fingerprint``, so per-request cost is one attribute read.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Literal
+
+FingerprintMode = Literal["full", "hash", "custom", "none"]
+
+_DEFAULT_MODE: FingerprintMode = "full"
+_CUSTOM_VALUE: str | None = None
+
+
+def set_default_fingerprint_mode(
+    mode: FingerprintMode,
+    custom_value: str | None = None,
+) -> None:
+    """Configure the fingerprint mode for subsequent ``get_system_fingerprint``
+    calls. Called once at server startup."""
+    global _DEFAULT_MODE, _CUSTOM_VALUE
+    _DEFAULT_MODE = mode
+    _CUSTOM_VALUE = custom_value
+
+
+def get_system_fingerprint(vllm_config: Any) -> str | None:
+    """Return the fingerprint for ``vllm_config`` using the mode configured by
+    ``set_default_fingerprint_mode``."""
+    return build_system_fingerprint(vllm_config, _DEFAULT_MODE, _CUSTOM_VALUE)
+
+
+def build_system_fingerprint(
+    vllm_config: Any,
+    mode: FingerprintMode = "full",
+    custom_value: str | None = None,
+) -> str | None:
+    if mode == "none":
+        return None
+    if mode == "custom":
+        return custom_value
+
+    from vllm import __version__ as vllm_version
+
+    try:
+        hash8 = vllm_config.compute_hash()[:8]
+    except Exception:
+        hash8 = "nohash"
+
+    if mode == "hash":
+        return f"vllm-{vllm_version}-{hash8}"
+
+    # mode == "full"
+    parts: list[str] = [f"vllm-{vllm_version}"]
+    pc = getattr(vllm_config, "parallel_config", None)
+    if pc is not None:
+        tp = getattr(pc, "tensor_parallel_size", 1)
+        if tp > 1:
+            parts.append(f"tp{tp}")
+        pp = getattr(pc, "pipeline_parallel_size", 1)
+        if pp > 1:
+            parts.append(f"pp{pp}")
+        dp = getattr(pc, "data_parallel_size", 1)
+        if dp > 1:
+            parts.append(f"dp{dp}")
+        if getattr(pc, "enable_expert_parallel", False):
+            parts.append("ep")
+    parts.append(hash8)
+    return "-".join(parts)
diff --git a/vllm/entrypoints/openai/generate/api_router.py b/vllm/entrypoints/openai/generate/api_router.py
index 9a64db929e8f..84a7fddeabe3 100644
--- a/vllm/entrypoints/openai/generate/api_router.py
+++ b/vllm/entrypoints/openai/generate/api_router.py
@@ -61,9 +61,17 @@ async def init_generate_state(
     )
     from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
     from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
+    from vllm.entrypoints.openai.fingerprint import set_default_fingerprint_mode
     from vllm.entrypoints.openai.responses.serving import OpenAIServingResponses
     from vllm.entrypoints.serve.disagg.serving import ServingTokens
 
+    # Applied before any serving class is constructed so that each one picks
+    # up the chosen mode on its first cache miss.
+    set_default_fingerprint_mode(
+        getattr(args, "fingerprint_mode", "full"),
+        getattr(args, "fingerprint_value", None),
+    )
+
     if args.tool_server == "demo":
         tool_server: ToolServer | None = DemoToolServer()
         assert isinstance(tool_server, DemoToolServer)
@@ -95,6 +103,7 @@ async def init_generate_state(
             enable_prompt_tokens_details=args.enable_prompt_tokens_details,
             enable_force_include_usage=args.enable_force_include_usage,
             enable_log_outputs=args.enable_log_outputs,
+            default_chat_template_kwargs=args.default_chat_template_kwargs,
         )
         if "generate" in supported_tasks
         else None
diff --git a/vllm/entrypoints/openai/generate/factories.py b/vllm/entrypoints/openai/generate/factories.py
new file mode 100644
index 000000000000..899601db3ca0
--- /dev/null
+++ b/vllm/entrypoints/openai/generate/factories.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import TYPE_CHECKING
+
+from vllm.config import ModelConfig
+from vllm.tasks import SupportedTask
+
+if TYPE_CHECKING:
+    from vllm.entrypoints.sagemaker.api_router import (
+        EndpointFn,
+        GetHandlerFn,
+        RequestType,
+    )
+
+
+def get_generate_invocation_types(
+    supported_tasks: tuple["SupportedTask", ...],
+    model_config: ModelConfig | None = None,
+):
+    # NOTE: Items defined earlier take higher priority
+    invocation_types: list[tuple[RequestType, tuple[GetHandlerFn, EndpointFn]]] = []
+
+    if "generate" in supported_tasks:
+        from vllm.entrypoints.openai.chat_completion.api_router import (
+            chat,
+            create_chat_completion,
+        )
+        from vllm.entrypoints.openai.chat_completion.protocol import (
+            ChatCompletionRequest,
+        )
+        from vllm.entrypoints.openai.completion.api_router import (
+            completion,
+            create_completion,
+        )
+        from vllm.entrypoints.openai.completion.protocol import CompletionRequest
+
+        invocation_types += [
+            (ChatCompletionRequest, (chat, create_chat_completion)),
+            (CompletionRequest, (completion, create_completion)),
+        ]
+
+    return invocation_types
diff --git a/vllm/entrypoints/openai/generative_scoring/__init__.py b/vllm/entrypoints/openai/generative_scoring/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/entrypoints/openai/generative_scoring/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/entrypoints/openai/generative_scoring/api_router.py b/vllm/entrypoints/openai/generative_scoring/api_router.py
new file mode 100644
index 000000000000..ed0a81d149cc
--- /dev/null
+++ b/vllm/entrypoints/openai/generative_scoring/api_router.py
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from http import HTTPStatus
+from typing import TYPE_CHECKING
+
+from fastapi import APIRouter, Depends, FastAPI, Request
+from fastapi.responses import JSONResponse
+
+from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.openai.generative_scoring.serving import (
+    GenerativeScoringResponse,
+    OpenAIServingGenerativeScoring,
+)
+from vllm.entrypoints.openai.utils import validate_json_request
+from vllm.entrypoints.utils import load_aware_call, with_cancellation
+from vllm.logger import init_logger
+
+if TYPE_CHECKING:
+    from argparse import Namespace
+
+    from starlette.datastructures import State
+
+    from vllm.engine.protocol import EngineClient
+    from vllm.entrypoints.logger import RequestLogger
+
+router = APIRouter()
+
+logger = init_logger(__name__)
+
+
+def generative_scoring(request: Request) -> OpenAIServingGenerativeScoring | None:
+    return request.app.state.serving_generative_scoring
+
+
+@router.post(
+    "/generative_scoring",
+    dependencies=[Depends(validate_json_request)],
+    responses={
+        HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
+        HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
+    },
+)
+@with_cancellation
+@load_aware_call
+async def create_generative_scoring(raw_request: Request):
+    handler = generative_scoring(raw_request)
+    if handler is None:
+        raise NotImplementedError(
+            "The model does not support the Generative Scoring API"
+        )
+
+    raw_body = await raw_request.json()
+
+    from vllm.entrypoints.openai.generative_scoring.serving import (
+        GenerativeScoringRequest,
+    )
+
+    gen_request = GenerativeScoringRequest(**raw_body)
+    result = await handler.create_generative_scoring(gen_request, raw_request)
+
+    if isinstance(result, ErrorResponse):
+        return JSONResponse(content=result.model_dump(), status_code=result.error.code)
+    elif isinstance(result, GenerativeScoringResponse):
+        return JSONResponse(content=result.model_dump())
+
+    raise ValueError(f"Unexpected response type: {type(result)}")
+
+
+def register_generative_scoring_api_router(app: FastAPI):
+    app.include_router(router)
+
+
+async def init_generative_scoring_state(
+    engine_client: "EngineClient",
+    state: "State",
+    args: "Namespace",
+    request_logger: "RequestLogger | None",
+):
+    from vllm.entrypoints.openai.generative_scoring.serving import (
+        OpenAIServingGenerativeScoring,
+    )
+
+    state.serving_generative_scoring = OpenAIServingGenerativeScoring(
+        engine_client,
+        state.openai_serving_models,
+        request_logger=request_logger,
+    )
diff --git a/vllm/entrypoints/openai/generative_scoring/serving.py b/vllm/entrypoints/openai/generative_scoring/serving.py
new file mode 100644
index 000000000000..fd8f89cadadf
--- /dev/null
+++ b/vllm/entrypoints/openai/generative_scoring/serving.py
@@ -0,0 +1,491 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Generative Scoring implementation for generative models.
+
+This module implements generative scoring functionality that computes the
+probability of specified token IDs appearing as the next token after a
+given query+item prompt. This works on any generative model that produces
+logits (task="generate").
+"""
+
+import asyncio
+import math
+import time
+from collections.abc import AsyncGenerator, Mapping
+from typing import Literal
+
+from fastapi import Request
+from pydantic import Field
+
+from vllm.engine.protocol import EngineClient
+from vllm.entrypoints.logger import RequestLogger
+from vllm.entrypoints.openai.engine.protocol import (
+    ErrorResponse,
+    OpenAIBaseModel,
+    UsageInfo,
+)
+from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.inputs import EngineInput, tokens_input
+from vllm.logger import init_logger
+from vllm.outputs import RequestOutput
+from vllm.sampling_params import SamplingParams
+from vllm.tokenizers import TokenizerLike
+from vllm.tracing import (
+    contains_trace_headers,
+    extract_trace_headers,
+    log_tracing_disabled_warning,
+)
+from vllm.utils import random_uuid
+from vllm.utils.async_utils import merge_async_iterators
+
+logger = init_logger(__name__)
+
+
+# ============================================================================
+# Protocol definitions
+# ============================================================================
+
+
+class GenerativeScoringRequest(OpenAIBaseModel):
+    """Request for computing generative scoring.
+
+    Attributes:
+        model: The model to use for scoring. Optional, follows existing patterns.
+        query: The query text or pre-tokenized query token IDs.
+        items: The item text(s) or pre-tokenized item token IDs.
+        label_token_ids: List of token IDs to compute probabilities for.
+        apply_softmax: Whether to normalize probabilities using softmax over only
+            the label_token_ids (True) or return true model probabilities over
+            the full vocab for those ids (False).
+        item_first: If True, prepend items to query. Otherwise append items to query.
+        add_special_tokens: Whether to add special tokens when tokenizing.
+    """
+
+    model: str | None = None
+    query: str | list[int] = Field(
+        ...,
+        description="The query text or pre-tokenized query token IDs.",
+    )
+    items: list[str] | list[list[int]] = Field(
+        ...,
+        description="List of item texts or pre-tokenized item token IDs.",
+    )
+    label_token_ids: list[int] = Field(
+        ...,
+        description="List of token IDs to compute probabilities for.",
+    )
+    apply_softmax: bool = Field(
+        default=True,
+        description=(
+            "If True, normalize probabilities using softmax over only the "
+            "label_token_ids. If False, return the true model probabilities "
+            "over the full vocab for those ids."
+        ),
+    )
+    item_first: bool = Field(
+        default=False,
+        description="If True, prepend items to query. Otherwise append items to query.",
+    )
+    add_special_tokens: bool = Field(
+        default=True,
+        description="Whether to add special tokens when tokenizing.",
+    )
+    priority: int = Field(
+        default=0,
+        description=(
+            "The priority of the request (lower means earlier handling; default: 0)."
+        ),
+    )
+    request_id: str = Field(
+        default_factory=random_uuid,
+        description="The request_id related to this request.",
+    )
+
+
+class GenerativeScoringItemResult(OpenAIBaseModel):
+    """Result for a single item in the generative scoring response.
+
+    Attributes:
+        index: The index of this item in the input items list.
+        object: Type of object, always "score".
+        score: The probability score for the first label token.
+    """
+
+    index: int
+    object: Literal["score"] = "score"
+    score: float
+
+
+class GenerativeScoringResponse(OpenAIBaseModel):
+    """Response from the generative scoring computation.
+
+    Attributes:
+        id: Unique identifier for this response.
+        object: Type of object, always "list".
+        created: Unix timestamp of when the response was created.
+        model: The model used for scoring.
+        data: List of scoring results, one per input item.
+        usage: Token usage information.
+    """
+
+    id: str = Field(default="")
+    object: Literal["list"] = "list"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    data: list[GenerativeScoringItemResult]
+    usage: UsageInfo
+
+
+# ============================================================================
+# Serving class
+# ============================================================================
+
+
+class OpenAIServingGenerativeScoring(OpenAIServing):
+    """Serving class for generative scoring computation.
+
+    This class handles computing the probability of specified token IDs
+    appearing as the next token after concatenating query and item prompts.
+
+    The key operation is:
+    1. For each item, build a prompt: query + item (or item + query if item_first)
+    2. Run a forward pass to get the next token distribution
+    3. Extract probabilities for the specified label_token_ids
+    4. Normalize either over the full vocab (apply_softmax=False) or
+       over just the label_token_ids (apply_softmax=True)
+    """
+
+    def __init__(
+        self,
+        engine_client: EngineClient,
+        models: OpenAIServingModels,
+        *,
+        request_logger: RequestLogger | None,
+    ) -> None:
+        super().__init__(
+            engine_client=engine_client,
+            models=models,
+            request_logger=request_logger,
+        )
+
+    async def create_generative_scoring(
+        self,
+        request: GenerativeScoringRequest,
+        raw_request: Request | None = None,
+    ) -> GenerativeScoringResponse | ErrorResponse:
+        """Create generative scoring for the given request.
+
+        Args:
+            request: The GenerativeScoringRequest containing query, items, and
+                label_token_ids.
+            raw_request: The raw FastAPI request object.
+
+        Returns:
+            GenerativeScoringResponse with probabilities for each item, or
+            ErrorResponse if an error occurred.
+        """
+        # Check model
+        error_check_ret = await self._check_model(request)  # type: ignore[arg-type]
+        if error_check_ret is not None:
+            return error_check_ret
+
+        # Check if engine is alive
+        if self.engine_client.errored:
+            raise self.engine_client.dead_error
+
+        # Get tokenizer
+        tokenizer = self.renderer.tokenizer
+        if tokenizer is None:
+            return self.create_error_response(
+                "Tokenizer not available. Cannot process generative scoring request."
+            )
+
+        # Validate label_token_ids
+        vocab_size = self.model_config.get_vocab_size()
+        for token_id in request.label_token_ids:
+            if token_id < 0 or token_id >= vocab_size:
+                return self.create_error_response(
+                    f"label_token_id {token_id} is out of vocabulary range "
+                    f"[0, {vocab_size}). Please provide valid token IDs."
+                )
+
+        if len(request.label_token_ids) == 0:
+            return self.create_error_response(
+                "label_token_ids must contain at least one token ID."
+            )
+
+        # Validate items
+        if len(request.items) == 0:
+            return self.create_error_response("items must contain at least one item.")
+
+        # Note: Mixed item types (string and token list) are validated by
+        # Pydantic at request parsing time, so we don't need to check here.
+
+        try:
+            lora_request = self._maybe_get_adapters(request)  # type: ignore[arg-type]
+        except (ValueError, TypeError, RuntimeError) as e:
+            logger.exception("Error preparing request components")
+            return self.create_error_response(e)
+
+        base_id = self._base_request_id(raw_request, default=request.request_id)
+        request_id = f"generative-scoring-{base_id}"
+        created_time = int(time.time())
+
+        # Build prompts for each item
+        try:
+            engine_inputs, prompt_token_counts = await self._build_prompts(
+                request, tokenizer, self.model_config.max_model_len
+            )
+        except (ValueError, TypeError) as e:
+            logger.exception("Error building prompts")
+            return self.create_error_response(e)
+
+        # Create sampling params for scoring
+        # We use max_tokens=1 with logprob_token_ids to efficiently get
+        # logprobs for only the specified label tokens (not full vocab)
+        # Note: temperature/top_k/top_p don't affect logprobs - they only
+        # affect the sampling distribution. Logprobs are computed from raw
+        # logits via log_softmax before any sampling transformations.
+        sampling_params = SamplingParams(
+            max_tokens=1,
+            logprobs=len(request.label_token_ids),
+            logprob_token_ids=request.label_token_ids,
+            n=1,
+        )
+
+        # Get trace headers
+        trace_headers = (
+            None
+            if raw_request is None
+            else await self._get_trace_headers(raw_request.headers)
+        )
+
+        # Schedule requests for all inputs
+        generators: list[AsyncGenerator[RequestOutput, None]] = []
+        for i, engine_input in enumerate(engine_inputs):
+            request_id_item = f"{request_id}-{i}"
+
+            self._log_inputs(
+                request_id_item,
+                engine_input,
+                params=sampling_params,
+                lora_request=lora_request,
+            )
+
+            generator = self.engine_client.generate(
+                engine_input,
+                sampling_params,
+                request_id_item,
+                lora_request=lora_request,
+                trace_headers=trace_headers,
+                priority=request.priority,
+            )
+            generators.append(generator)
+
+        # Collect results
+        result_generator = merge_async_iterators(*generators)
+        results: list[RequestOutput | None] = [None] * len(engine_inputs)
+
+        try:
+            async for i, res in result_generator:
+                results[i] = res
+        except asyncio.CancelledError:
+            return self.create_error_response("Client disconnected")
+        except Exception as e:
+            logger.exception("Error during generation")
+            return self.create_error_response(e)
+
+        # Process results to extract label token probabilities
+        item_results: list[GenerativeScoringItemResult] = []
+        total_prompt_tokens = 0
+        total_completion_tokens = 0
+
+        for i, result in enumerate(results):
+            if result is None:
+                return self.create_error_response(
+                    f"Failed to generate result for item {i}"
+                )
+
+            # Check for errors
+            if result.outputs and result.outputs[0].finish_reason == "error":
+                return self.create_error_response(f"Generation error for item {i}")
+
+            # Get logprobs from the generated token
+            if not result.outputs or len(result.outputs) == 0:
+                return self.create_error_response(f"No output generated for item {i}")
+
+            output = result.outputs[0]
+            if output.logprobs is None or len(output.logprobs) == 0:
+                return self.create_error_response(
+                    f"No logprobs available for item {i}. "
+                    "This might indicate an issue with logprobs configuration."
+                )
+
+            # The logprobs dict maps token_id -> Logprob object
+            # For logprobs=-1, this contains all vocab tokens
+            logprobs_dict = output.logprobs[0]
+
+            # Extract logprobs for label tokens
+            label_logprobs: dict[int, float] = {}
+            missing_tokens = []
+            for token_id in request.label_token_ids:
+                if token_id in logprobs_dict:
+                    label_logprobs[token_id] = logprobs_dict[token_id].logprob
+                else:
+                    missing_tokens.append(token_id)
+
+            if missing_tokens:
+                return self.create_error_response(
+                    f"Token IDs {missing_tokens} not found in logprobs for item {i}. "
+                    "This might indicate the tokens are outside the model's vocabulary."
+                )
+
+            # Compute probabilities based on apply_softmax setting
+            token_probs = self._compute_probabilities(
+                label_logprobs,
+                apply_softmax=request.apply_softmax,
+            )
+
+            # Use the first label token's probability as the score
+            first_label_id = request.label_token_ids[0]
+            score = token_probs[first_label_id]
+
+            item_results.append(
+                GenerativeScoringItemResult(
+                    index=i,
+                    score=score,
+                )
+            )
+
+            # Update token counts
+            total_prompt_tokens += prompt_token_counts[i]
+            total_completion_tokens += len(output.token_ids)
+
+        # Build response
+        model_name = self.models.model_name(lora_request)
+        response = GenerativeScoringResponse(
+            id=request_id,
+            created=created_time,
+            model=model_name,
+            data=item_results,
+            usage=UsageInfo(
+                prompt_tokens=total_prompt_tokens,
+                total_tokens=total_prompt_tokens + total_completion_tokens,
+                completion_tokens=total_completion_tokens,
+            ),
+        )
+
+        return response
+
+    async def _build_prompts(
+        self,
+        request: GenerativeScoringRequest,
+        tokenizer: TokenizerLike,
+        max_model_len: int,
+    ) -> tuple[list[EngineInput], list[int]]:
+        """Build prompts by concatenating query and items.
+
+        Uses the Renderer's tokenizer to tokenize text inputs, then
+        creates EngineInput via tokens_input() for engine consumption.
+
+        Args:
+            request: The request containing query, items, and settings.
+            tokenizer: The tokenizer to use.
+            max_model_len: Maximum model context length for truncation.
+
+        Returns:
+            Tuple of (list of EngineInput, list of prompt token counts).
+        """
+        # Tokenize query if it's a string
+        if isinstance(request.query, str):
+            query_token_ids = tokenizer.encode(
+                request.query,
+                add_special_tokens=request.add_special_tokens,
+            )
+        else:
+            query_token_ids = request.query
+
+        engine_inputs: list[EngineInput] = []
+        prompt_token_counts: list[int] = []
+
+        for item in request.items:
+            # Tokenize item if it's a string
+            if isinstance(item, str):
+                # Don't add special tokens for items to avoid duplicate BOS/EOS
+                item_token_ids = tokenizer.encode(
+                    item,
+                    add_special_tokens=False,
+                )
+            else:
+                item_token_ids = item
+
+            # Concatenate based on item_first setting
+            if request.item_first:
+                prompt_token_ids = item_token_ids + query_token_ids
+            else:
+                prompt_token_ids = query_token_ids + item_token_ids
+
+            # Truncate to max_model_len - 1 to leave room for 1 output token
+            max_prompt_len = max_model_len - 1
+            if len(prompt_token_ids) > max_prompt_len:
+                prompt_token_ids = prompt_token_ids[:max_prompt_len]
+
+            engine_inputs.append(tokens_input(prompt_token_ids))
+            prompt_token_counts.append(len(prompt_token_ids))
+
+        return engine_inputs, prompt_token_counts
+
+    def _compute_probabilities(
+        self,
+        label_logprobs: dict[int, float],
+        apply_softmax: bool,
+    ) -> dict[int, float]:
+        """Compute probabilities from logprobs.
+
+        Args:
+            label_logprobs: Dictionary mapping token_id to logprob.
+            apply_softmax: If True, normalize over only the label tokens.
+                If False, return true model probabilities (exp(logprob)).
+
+        Returns:
+            Dictionary mapping token_id to probability.
+        """
+        if apply_softmax:
+            # Normalize over only the label tokens (subset softmax)
+            # softmax(gathered_logits) over the subset
+            logprobs_list = list(label_logprobs.values())
+            max_logprob = max(logprobs_list)
+
+            # Compute exp(logprob - max) for numerical stability
+            exp_values = {
+                token_id: math.exp(logprob - max_logprob)
+                for token_id, logprob in label_logprobs.items()
+            }
+            sum_exp = sum(exp_values.values())
+
+            return {
+                token_id: exp_val / sum_exp for token_id, exp_val in exp_values.items()
+            }
+        else:
+            # Return true model probabilities
+            # Since logprobs are already log(softmax(logits)),
+            # we just need to exp() them
+            return {
+                token_id: math.exp(logprob)
+                for token_id, logprob in label_logprobs.items()
+            }
+
+    async def _get_trace_headers(
+        self,
+        headers: Mapping[str, str],
+    ) -> Mapping[str, str] | None:
+        """Extract trace headers from request headers."""
+        if not contains_trace_headers(headers):
+            return None
+
+        if not await self.engine_client.is_tracing_enabled():
+            log_tracing_disabled_warning()
+            return None
+
+        return extract_trace_headers(headers)
diff --git a/vllm/entrypoints/openai/models/protocol.py b/vllm/entrypoints/openai/models/protocol.py
index e7b96476c6ea..b383b018c39c 100644
--- a/vllm/entrypoints/openai/models/protocol.py
+++ b/vllm/entrypoints/openai/models/protocol.py
@@ -16,3 +16,4 @@ class LoRAModulePath:
     name: str
     path: str
     base_model_name: str | None = None
+    is_3d_lora_weight: bool = False
diff --git a/vllm/entrypoints/openai/models/serving.py b/vllm/entrypoints/openai/models/serving.py
index dd7a8687f2b5..347752c912ca 100644
--- a/vllm/entrypoints/openai/models/serving.py
+++ b/vllm/entrypoints/openai/models/serving.py
@@ -112,7 +112,6 @@ def __init__(
 
         self.model_config = self.engine_client.model_config
         self.renderer = self.engine_client.renderer
-        self.io_processor = self.engine_client.io_processor
         self.input_processor = self.engine_client.input_processor
 
     async def init_static_loras(self):
@@ -122,7 +121,9 @@ async def init_static_loras(self):
             return
         for lora in self.static_lora_modules:
             load_request = LoadLoRAAdapterRequest(
-                lora_path=lora.path, lora_name=lora.name
+                lora_path=lora.path,
+                lora_name=lora.name,
+                is_3d_lora_weight=lora.is_3d_lora_weight,
             )
             load_result = await self.load_lora_adapter(
                 request=load_request, base_model_name=lora.base_model_name
@@ -178,6 +179,7 @@ async def load_lora_adapter(
                 lora_int_id=lora_int_id,
                 lora_path=lora_path,
                 load_inplace=request.load_inplace,
+                is_3d_lora_weight=request.is_3d_lora_weight,
             )
             if base_model_name is not None and self.is_base_model(base_model_name):
                 lora_request.base_model_name = base_model_name
diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py
index 9b4264456c51..7dc3704cea90 100644
--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -3,7 +3,6 @@
 
 import datetime
 from collections.abc import Iterable, Sequence
-from typing import Literal
 
 from openai.types.responses.tool import Tool
 from openai_harmony import (
@@ -27,6 +26,42 @@
 
 logger = init_logger(__name__)
 
+
+def is_function_recipient(
+    recipient: str,
+    allowed_function_tool_names: frozenset[str] | None = None,
+) -> bool:
+    """Check whether *recipient* refers to a function tool call.
+
+    The optional *allowed_function_tool_names* parameter is used by the
+    Responses API to distinguish bare function-call recipients (missing the
+    ``functions.`` prefix) from MCP tool calls.  When provided, a bare
+    recipient is only treated as a function call if it appears in the set.
+    The Chat Completions path omits this parameter so that all bare
+    recipients are accepted as function calls (the heuristic fallback).
+    """
+    if not recipient:
+        return False
+    if recipient.startswith("<|"):
+        return False
+    if recipient.startswith("functions."):
+        return len(recipient) > len("functions.")
+    if recipient == "assistant":
+        return False
+    if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+        return False
+    first_segment = recipient.split(".", 1)[0]
+    if first_segment in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+        return False
+    if allowed_function_tool_names is not None:
+        return recipient in allowed_function_tool_names
+    return True
+
+
+def extract_function_from_recipient(recipient: str) -> str:
+    return recipient.removeprefix("functions.")
+
+
 REASONING_EFFORT = {
     "high": ReasoningEffort.HIGH,
     "medium": ReasoningEffort.MEDIUM,
@@ -52,7 +87,7 @@
 def has_custom_tools(tool_types: set[str]) -> bool:
     """
     Checks if the given tool types are custom tools
-    (i.e. any tool other than MCP buildin tools)
+    (i.e. any tool other than MCP builtin tools)
     """
     return not tool_types.issubset(MCP_BUILTIN_TOOLS)
 
@@ -66,7 +101,7 @@ def get_encoding():
 
 def get_system_message(
     model_identity: str | None = None,
-    reasoning_effort: Literal["high", "medium", "low"] | None = None,
+    reasoning_effort: str | None = None,
     start_date: str | None = None,
     browser_description: str | None = None,
     python_description: str | None = None,
@@ -84,6 +119,12 @@ def get_system_message(
         )
         sys_msg_content = sys_msg_content.with_model_identity(new_identity)
     if reasoning_effort is not None:
+        if reasoning_effort not in REASONING_EFFORT:
+            supported_values = ", ".join(REASONING_EFFORT)
+            raise ValueError(
+                f"reasoning_effort={reasoning_effort!r} is not supported by "
+                f"Harmony. Supported values are: {supported_values}."
+            )
         sys_msg_content = sys_msg_content.with_reasoning_effort(
             REASONING_EFFORT[reasoning_effort]
         )
diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py
index a31f20501e0b..1868a31ca285 100644
--- a/vllm/entrypoints/openai/parser/responses_parser.py
+++ b/vllm/entrypoints/openai/parser/responses_parser.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import logging
-from collections.abc import Callable
+from typing import Any
 
 from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
 from openai.types.responses.response_function_tool_call_output_item import (
@@ -15,6 +15,7 @@
     ResponseReasoningItem,
 )
 
+from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
 from vllm.entrypoints.constants import MCP_PREFIX
 from vllm.entrypoints.openai.responses.protocol import (
     ResponseInputOutputItem,
@@ -36,10 +37,12 @@ def __init__(
         self,
         *,
         tokenizer: TokenizerLike,
-        reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser],
+        reasoning_parser_cls: type[ReasoningParser],
         response_messages: list[ResponseInputOutputItem],
         request: ResponsesRequest,
         tool_parser_cls: type[ToolParser] | None,
+        chat_template: str | None,
+        chat_template_content_format: ChatTemplateContentFormatOption,
     ):
         self.response_messages: list[ResponseInputOutputItem] = (
             # TODO: initial messages may not be properly typed
@@ -49,7 +52,14 @@ def __init__(
         self.tokenizer = tokenizer
         self.request = request
 
-        self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
+        self.reasoning_parser_instance = reasoning_parser_cls(
+            tokenizer,
+            chat_template_kwargs=_effective_chat_template_kwargs(
+                request,
+                chat_template=chat_template,
+                chat_template_content_format=chat_template_content_format,
+            ),
+        )
         self.tool_parser_instance = None
         if tool_parser_cls is not None:
             self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)
@@ -159,10 +169,12 @@ def make_response_output_items_from_parsable_context(
 def get_responses_parser_for_simple_context(
     *,
     tokenizer: TokenizerLike,
-    reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser],
+    reasoning_parser_cls: type[ReasoningParser],
     response_messages: list[ResponseInputOutputItem],
     request: ResponsesRequest,
     tool_parser_cls,
+    chat_template: str | None,
+    chat_template_content_format: ChatTemplateContentFormatOption,
 ) -> ResponsesParser:
     """Factory function to create a ResponsesParser with
     optional reasoning parser.
@@ -176,4 +188,17 @@ def get_responses_parser_for_simple_context(
         response_messages=response_messages,
         request=request,
         tool_parser_cls=tool_parser_cls,
+        chat_template=chat_template,
+        chat_template_content_format=chat_template_content_format,
     )
+
+
+def _effective_chat_template_kwargs(
+    request: ResponsesRequest,
+    chat_template: str | None,
+    chat_template_content_format: ChatTemplateContentFormatOption,
+) -> dict[str, Any]:
+    return request.build_chat_params(
+        default_template=chat_template,
+        default_template_content_format=chat_template_content_format,
+    ).chat_template_kwargs
diff --git a/vllm/entrypoints/openai/responses/api_router.py b/vllm/entrypoints/openai/responses/api_router.py
index 88d821260940..61077f1a7c53 100644
--- a/vllm/entrypoints/openai/responses/api_router.py
+++ b/vllm/entrypoints/openai/responses/api_router.py
@@ -39,7 +39,8 @@ async def _convert_stream_to_sse_events(
         event_type = getattr(event, "type", "unknown")
         # https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format
         event_data = (
-            f"event: {event_type}\ndata: {event.model_dump_json(indent=None)}\n\n"
+            f"event: {event_type}\ndata: "
+            f"{event.model_dump_json(indent=None, by_alias=True)}\n\n"
         )
         yield event_data
 
@@ -65,10 +66,11 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
 
     if isinstance(generator, ErrorResponse):
         return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
+            content=generator.model_dump(mode="json", by_alias=True),
+            status_code=generator.error.code,
         )
     elif isinstance(generator, ResponsesResponse):
-        return JSONResponse(content=generator.model_dump())
+        return JSONResponse(content=generator.model_dump(mode="json", by_alias=True))
 
     return StreamingResponse(
         content=_convert_stream_to_sse_events(generator), media_type="text/event-stream"
@@ -95,10 +97,11 @@ async def retrieve_responses(
 
     if isinstance(response, ErrorResponse):
         return JSONResponse(
-            content=response.model_dump(), status_code=response.error.code
+            content=response.model_dump(mode="json", by_alias=True),
+            status_code=response.error.code,
         )
     elif isinstance(response, ResponsesResponse):
-        return JSONResponse(content=response.model_dump())
+        return JSONResponse(content=response.model_dump(mode="json", by_alias=True))
     return StreamingResponse(
         content=_convert_stream_to_sse_events(response), media_type="text/event-stream"
     )
@@ -115,9 +118,10 @@ async def cancel_responses(response_id: str, raw_request: Request):
 
     if isinstance(response, ErrorResponse):
         return JSONResponse(
-            content=response.model_dump(), status_code=response.error.code
+            content=response.model_dump(mode="json", by_alias=True),
+            status_code=response.error.code,
         )
-    return JSONResponse(content=response.model_dump())
+    return JSONResponse(content=response.model_dump(mode="json", by_alias=True))
 
 
 def attach_router(app: FastAPI):
diff --git a/vllm/entrypoints/openai/responses/context.py b/vllm/entrypoints/openai/responses/context.py
index 48360173cf48..644dc8cfaaa6 100644
--- a/vllm/entrypoints/openai/responses/context.py
+++ b/vllm/entrypoints/openai/responses/context.py
@@ -6,7 +6,6 @@
 import json
 import logging
 from abc import ABC, abstractmethod
-from collections.abc import Callable
 from contextlib import AsyncExitStack
 from dataclasses import replace
 from typing import TYPE_CHECKING, Any, Final, Union
@@ -273,7 +272,7 @@ def __init__(
         *,
         response_messages: list[ResponseInputOutputItem],
         tokenizer: TokenizerLike,
-        reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser] | None,
+        reasoning_parser_cls: type[ReasoningParser] | None,
         request: ResponsesRequest,
         available_tools: list[str] | None,
         tool_parser_cls: type[ToolParser] | None,
@@ -296,6 +295,8 @@ def __init__(
             response_messages=response_messages,
             request=request,
             tool_parser_cls=tool_parser_cls,
+            chat_template=chat_template,
+            chat_template_content_format=chat_template_content_format,
         )
         self.tool_parser_cls = tool_parser_cls
         self.request = request
@@ -523,10 +524,12 @@ def __init__(
         self,
         messages: list,
         available_tools: list[str],
+        function_tool_names: frozenset[str] | None = None,
     ):
         self._messages = messages
         self.finish_reason: str | None = None
         self.available_tools = available_tools
+        self.function_tool_names = function_tool_names
         self._tool_sessions: dict[str, ClientSession | Tool] = {}
         self.called_tools: set[str] = set()
 
diff --git a/vllm/entrypoints/openai/responses/harmony.py b/vllm/entrypoints/openai/responses/harmony.py
index faab2f7f4cc7..cfe5fb67bd24 100644
--- a/vllm/entrypoints/openai/responses/harmony.py
+++ b/vllm/entrypoints/openai/responses/harmony.py
@@ -31,7 +31,9 @@
 
 from vllm.entrypoints.openai.parser.harmony_utils import (
     BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
+    extract_function_from_recipient,
     flatten_chat_text_content,
+    is_function_recipient,
 )
 from vllm.entrypoints.openai.responses.protocol import (
     ResponseInputOutputItem,
@@ -175,6 +177,8 @@ def response_input_to_harmony(
             Author.new(Role.TOOL, f"functions.{call_response.name}"),
             response_msg["output"],
         )
+        msg = msg.with_channel("commentary")
+        msg = msg.with_recipient("assistant")
     elif response_msg["type"] == "reasoning":
         content = response_msg.get("content")
         if content and len(content) >= 1:
@@ -292,7 +296,7 @@ def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutput
 
 def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
     """Parse function calls into function tool call items."""
-    function_name = recipient.split(".")[-1]
+    function_name = extract_function_from_recipient(recipient)
     output_items = []
     for content in message.content:
         random_id = random_uuid()
@@ -408,7 +412,10 @@ def _parse_message_no_recipient(
 # ---------------------------------------------------------------------------
 
 
-def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
+def harmony_to_response_output(
+    message: Message,
+    function_tool_names: frozenset[str] | None = None,
+) -> list[ResponseOutputItem]:
     """Parse a Harmony message into a list of output response items.
 
     This is the main dispatcher that routes based on channel and recipient.
@@ -427,8 +434,8 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
         if recipient.startswith("browser."):
             output_items.append(_parse_browser_tool_call(message, recipient))
 
-        # Function calls (should only happen on commentary channel)
-        elif message.channel == "commentary" and recipient.startswith("functions."):
+        # Function calls (with or without "functions." prefix)
+        elif is_function_recipient(recipient, function_tool_names):
             output_items.extend(_parse_function_call(message, recipient))
 
         # Built-in MCP tools (python, browser, container)
@@ -448,6 +455,7 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
 
 def parser_state_to_response_output(
     parser: StreamableParser,
+    function_tool_names: frozenset[str] | None = None,
 ) -> list[ResponseOutputItem]:
     """Extract in-progress response items from incomplete parser state.
 
@@ -462,15 +470,15 @@ def parser_state_to_response_output(
     if current_recipient is not None and current_recipient.startswith("browser."):
         return []
 
-    if current_recipient and parser.current_channel in ("commentary", "analysis"):
-        if current_recipient.startswith("functions."):
+    if current_recipient:
+        if is_function_recipient(current_recipient, function_tool_names):
             rid = random_uuid()
             return [
                 ResponseFunctionToolCall(
                     arguments=parser.current_content,
                     call_id=f"call_{rid}",
                     type="function_call",
-                    name=current_recipient.split(".")[-1],
+                    name=extract_function_from_recipient(current_recipient),
                     id=f"fc_{rid}",
                     status="in_progress",
                 )
diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py
index 609d8e40f8db..10aa5bde392b 100644
--- a/vllm/entrypoints/openai/responses/protocol.py
+++ b/vllm/entrypoints/openai/responses/protocol.py
@@ -23,7 +23,9 @@
     ResponseOutputItem,
     ResponseOutputItemAddedEvent,
     ResponseOutputItemDoneEvent,
+    ResponseOutputMessage,
     ResponsePrompt,
+    ResponseReasoningItem,
     ResponseReasoningTextDeltaEvent,
     ResponseReasoningTextDoneEvent,
     ResponseStatus,
@@ -106,7 +108,7 @@ def serialize_message(msg):
         return msg.to_dict()
     else:
         # fallback to pydantic dump
-        return msg.model_dump_json(by_alias=True)
+        return msg.model_dump(mode="json", by_alias=True)
 
 
 def serialize_messages(msgs):
@@ -173,6 +175,24 @@ class ResponsesRequest(OpenAIBaseModel):
     user: str | None = None
     skip_special_tokens: bool = True
     include_stop_str_in_output: bool = False
+    presence_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description=(
+            "The presence penalty that was used to penalize new tokens based on "
+            "whether they appear in the text so far."
+        ),
+    )
+    frequency_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description=(
+            "The frequency penalty that was used to penalize new tokens based on "
+            "their frequency in the text so far."
+        ),
+    )
     prompt_cache_key: str | None = Field(
         default=None,
         description=(
@@ -256,6 +276,13 @@ class ResponsesRequest(OpenAIBaseModel):
         default=None,
         description="KVTransfer parameters used for disaggregated serving.",
     )
+    chat_template_kwargs: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Additional keyword args to pass to the chat template renderer. "
+            "Will be accessible by the template."
+        ),
+    )
     # --8<-- [end:responses-extra-params]
 
     def build_chat_params(
@@ -276,7 +303,7 @@ def build_chat_params(
             chat_template=default_template,
             chat_template_content_format=default_template_content_format,
             chat_template_kwargs=merge_kwargs(  # To remove unset values
-                {},
+                self.chat_template_kwargs,
                 dict(
                     add_generation_prompt=not continue_final,
                     continue_final_message=continue_final,
@@ -328,6 +355,12 @@ def to_sampling_params(
         if (repetition_penalty := self.repetition_penalty) is None:
             repetition_penalty = default_sampling_params.get("repetition_penalty", 1.0)
 
+        if (presence_penalty := self.presence_penalty) is None:
+            presence_penalty = default_sampling_params.get("presence_penalty", 0.0)
+
+        if (frequency_penalty := self.frequency_penalty) is None:
+            frequency_penalty = default_sampling_params.get("frequency_penalty", 0.0)
+
         stop_token_ids = default_sampling_params.get("stop_token_ids")
 
         # Structured output
@@ -367,6 +400,8 @@ def to_sampling_params(
             logprobs=self.top_logprobs if self.is_include_output_logprobs() else None,
             stop_token_ids=stop_token_ids,
             stop=stop,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
             repetition_penalty=repetition_penalty,
             seed=self.seed,
             ignore_eos=self.ignore_eos,
@@ -425,18 +460,21 @@ def check_cache_salt_support(cls, data):
 
     @model_validator(mode="before")
     @classmethod
-    def function_call_parsing(cls, data):
-        """Parse function_call dictionaries into ResponseFunctionToolCall objects.
-        This ensures Pydantic can properly resolve union types in the input field.
-        Function calls provided as dicts are converted to ResponseFunctionToolCall
-        objects before validation, while invalid structures are left for Pydantic
-        to reject with appropriate error messages.
-        """
+    def input_item_parsing(cls, data):
+        """Parse input items that are missing required fields or that Pydantic
+        cannot disambiguate in a Union of TypedDict / BaseModel types.
 
+        Specifically handles:
+        - function_call -> ResponseFunctionToolCall
+        - reasoning     -> ResponseReasoningItem (auto-generates id)
+        - message(role=assistant) -> ResponseOutputMessage (auto-generates
+          id/status and annotations)
+
+        Invalid structures are left for Pydantic to reject.
+        """
         input_data = data.get("input")
 
         # Early return for None, strings, or bytes
-        # (strings are iterable but shouldn't be processed)
         if input_data is None or isinstance(input_data, (str, bytes)):
             return data
 
@@ -450,22 +488,107 @@ def function_call_parsing(cls, data):
 
         processed_input = []
         for item in input_data:
-            if isinstance(item, dict) and item.get("type") == "function_call":
+            if not isinstance(item, dict):
+                processed_input.append(item)
+                continue
+
+            item_type = item.get("type")
+
+            if item_type == "function_call":
                 try:
                     processed_input.append(ResponseFunctionToolCall(**item))
                 except ValidationError:
-                    # Let Pydantic handle validation for malformed function calls
                     logger.debug(
                         "Failed to parse function_call to ResponseFunctionToolCall, "
                         "leaving for Pydantic validation"
                     )
                     processed_input.append(item)
+
+            elif item_type == "reasoning":
+                if "id" not in item:
+                    item = {**item, "id": f"rs_{random_uuid()}"}
+                try:
+                    processed_input.append(ResponseReasoningItem(**item))
+                except ValidationError:
+                    logger.debug(
+                        "Failed to parse reasoning to ResponseReasoningItem, "
+                        "leaving for Pydantic validation"
+                    )
+                    processed_input.append(item)
+
+            elif item_type == "message" and item.get("role") == "assistant":
+                item = dict(item)
+                if "id" not in item:
+                    item["id"] = f"msg_{random_uuid()}"
+                if "status" not in item:
+                    item["status"] = "completed"
+                # ResponseOutputText requires annotations
+                if isinstance(item.get("content"), list):
+                    new_content = []
+                    for c in item["content"]:
+                        if (
+                            isinstance(c, dict)
+                            and c.get("type") == "output_text"
+                            and "annotations" not in c
+                        ):
+                            c = {**c, "annotations": []}
+                        new_content.append(c)
+                    item["content"] = new_content
+                try:
+                    processed_input.append(ResponseOutputMessage(**item))
+                except ValidationError:
+                    logger.debug(
+                        "Failed to parse assistant message to ResponseOutputMessage, "
+                        "leaving for Pydantic validation"
+                    )
+                    processed_input.append(item)
+
             else:
                 processed_input.append(item)
 
         data["input"] = processed_input
         return data
 
+    @model_validator(mode="before")
+    @classmethod
+    def check_tool_usage(cls, data):
+        if not isinstance(data, dict):
+            return data
+
+        tools = data.get("tools")
+        tool_choice = data.get("tool_choice", "auto")
+        has_tools = tools is not None and len(tools) > 0
+        is_named_tool_choice = (
+            isinstance(tool_choice, dict) and tool_choice.get("type") == "function"
+        )
+
+        if not has_tools:
+            if tool_choice in ("auto", "none"):
+                data["tool_choice"] = "none"
+            elif tool_choice == "required":
+                raise VLLMValidationError(
+                    "Tool choice 'required' must be specified with 'tools' parameter.",
+                    parameter="tool_choice",
+                )
+            elif is_named_tool_choice:
+                raise VLLMValidationError(
+                    "Tool choice 'function' not found in 'tools' parameter.",
+                    parameter="tool_choice",
+                )
+        elif is_named_tool_choice and tools is not None:
+            tool_name = tool_choice.get("name")
+            tool_names = {
+                t.get("name") if isinstance(t, dict) else getattr(t, "name", None)
+                for t in tools
+            }
+            if not tool_name or tool_name not in tool_names:
+                raise VLLMValidationError(
+                    "Tool choice 'function' not found in 'tools' parameter.",
+                    parameter="tool_choice",
+                )
+
+        return data
+
 
 class ResponsesResponse(OpenAIBaseModel):
     id: str = Field(default_factory=lambda: f"resp_{random_uuid()}")
@@ -496,6 +619,25 @@ class ResponsesResponse(OpenAIBaseModel):
     usage: ResponseUsage | None = None
     user: str | None = None
 
+    presence_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description=(
+            "The presence penalty that was used to penalize new tokens based on "
+            "whether they appear in the text so far."
+        ),
+    )
+    frequency_penalty: float | None = Field(
+        default=None,
+        ge=-2.0,
+        le=2.0,
+        description=(
+            "The frequency penalty that was used to penalize new tokens based on "
+            "their frequency in the text so far."
+        ),
+    )
+
     # vLLM-specific fields that are not in OpenAI spec
     kv_transfer_params: dict[str, Any] | None = Field(
         default=None, description="KVTransfer parameters."
@@ -574,6 +716,8 @@ def from_request(
             prompt=request.prompt,
             reasoning=request.reasoning,
             service_tier=request.service_tier,
+            presence_penalty=sampling_params.presence_penalty,
+            frequency_penalty=sampling_params.frequency_penalty,
             status=status,
             text=request.text,
             top_logprobs=sampling_params.logprobs,
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index df94848e3b9b..68f71bd5d41b 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -3,7 +3,6 @@
 
 import asyncio
 import time
-import uuid
 from collections import deque
 from collections.abc import AsyncGenerator, AsyncIterator, Callable, Mapping, Sequence
 from contextlib import AsyncExitStack
@@ -13,29 +12,14 @@
 
 from fastapi import Request
 from openai.types.responses import (
-    ResponseContentPartAddedEvent,
-    ResponseContentPartDoneEvent,
-    ResponseFunctionCallArgumentsDeltaEvent,
-    ResponseFunctionCallArgumentsDoneEvent,
     ResponseFunctionToolCall,
-    ResponseFunctionToolCallItem,
     ResponseOutputItem,
-    ResponseOutputItemAddedEvent,
-    ResponseOutputItemDoneEvent,
     ResponseOutputMessage,
     ResponseOutputText,
-    ResponseReasoningItem,
-    ResponseReasoningTextDeltaEvent,
-    ResponseReasoningTextDoneEvent,
     ResponseStatus,
-    ResponseTextDeltaEvent,
-    ResponseTextDoneEvent,
     response_text_delta_event,
 )
 from openai.types.responses.response_output_text import Logprob, LogprobTopLogprob
-from openai.types.responses.response_reasoning_item import (
-    Content as ResponseReasoningTextContent,
-)
 from openai.types.responses.tool import Mcp, Tool
 from openai_harmony import Message as OpenAIHarmonyMessage
 from pydantic import TypeAdapter
@@ -89,15 +73,15 @@
     ResponseInProgressEvent,
     ResponseInputOutputItem,
     ResponseInputOutputMessage,
-    ResponseReasoningPartAddedEvent,
-    ResponseReasoningPartDoneEvent,
     ResponsesRequest,
     ResponsesResponse,
     ResponseUsage,
     StreamingResponsesResponse,
 )
 from vllm.entrypoints.openai.responses.streaming_events import (
+    SimpleStreamingEventProcessor,
     StreamingState,
+    _StateType,
     emit_content_delta_events,
     emit_previous_item_done_events,
     emit_tool_action_events,
@@ -105,6 +89,7 @@
 from vllm.entrypoints.openai.responses.utils import (
     construct_input_messages,
     construct_tool_dicts,
+    extract_function_tool_names,
     extract_tool_types,
 )
 from vllm.entrypoints.serve.render.serving import OpenAIServingRender
@@ -183,6 +168,7 @@ def __init__(
         enable_prompt_tokens_details: bool = False,
         enable_force_include_usage: bool = False,
         enable_log_outputs: bool = False,
+        default_chat_template_kwargs: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(
             engine_client=engine_client,
@@ -194,6 +180,7 @@ def __init__(
         self.openai_serving_render = openai_serving_render
         self.chat_template = chat_template
         self.chat_template_content_format: Final = chat_template_content_format
+        self.chat_template_kwargs = default_chat_template_kwargs or {}
         self.enable_log_outputs = enable_log_outputs
 
         # Set up the unified parser - either a unified parser or fall back to
@@ -267,6 +254,18 @@ def __init__(
 
         self.tool_server = tool_server
 
+    def _effective_chat_template_kwargs(
+        self, request: ResponsesRequest
+    ) -> dict[str, Any]:
+        return (
+            request.build_chat_params(
+                self.chat_template,
+                self.chat_template_content_format,
+            )
+            .with_defaults(self.chat_template_kwargs)
+            .chat_template_kwargs
+        )
+
     def _validate_generator_input(
         self,
         engine_input: EngineInput,
@@ -331,6 +330,17 @@ async def create_responses(
         AsyncGenerator[StreamingResponsesResponse, None]
         | ResponsesResponse
         | ErrorResponse
+    ):
+        return await self._with_kv_transfer_rejection_cleanup(
+            self._create_responses(request, raw_request), request, raw_request
+        )
+
+    async def _create_responses(
+        self, request: ResponsesRequest, raw_request: Request | None = None
+    ) -> (
+        AsyncGenerator[StreamingResponsesResponse, None]
+        | ResponsesResponse
+        | ErrorResponse
     ):
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
@@ -424,6 +434,9 @@ async def create_responses(
                 self._extract_prompt_len(engine_input),
                 self.default_sampling_params,
                 self.override_max_tokens,
+                truncate_prompt_tokens=(
+                    -1 if request.truncation != "disabled" else None
+                ),
             )
 
             sampling_params = request.to_sampling_params(
@@ -437,11 +450,16 @@ async def create_responses(
             )
 
             context: ConversationContext
+            function_tool_names = extract_function_tool_names(request.tools)
             if self.use_harmony:
                 if request.stream:
-                    context = StreamingHarmonyContext(messages, available_tools)
+                    context = StreamingHarmonyContext(
+                        messages, available_tools, function_tool_names
+                    )
                 else:
-                    context = HarmonyContext(messages, available_tools)
+                    context = HarmonyContext(
+                        messages, available_tools, function_tool_names
+                    )
             else:
                 if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
                     # This is a feature in development for parsing
@@ -464,7 +482,14 @@ async def create_responses(
                     context = SimpleContext()
 
             if self.parser and self.parser.reasoning_parser_cls is not None:
-                reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
+                chat_template_kwargs = self._effective_chat_template_kwargs(request)
+                reasoning_parser_kwargs = {
+                    "chat_template_kwargs": chat_template_kwargs,
+                }
+                reasoning_parser = self.parser.reasoning_parser_cls(
+                    tokenizer,
+                    chat_template_kwargs=chat_template_kwargs,
+                )
                 if (
                     isinstance(
                         struct_out := sampling_params.structured_outputs,
@@ -486,6 +511,9 @@ async def create_responses(
                 lora_request=lora_request,
                 priority=request.priority,
                 trace_headers=trace_headers,
+                reasoning_parser_kwargs=reasoning_parser_kwargs
+                if self.parser and self.parser.reasoning_parser_cls is not None
+                else None,
             )
             generators.append(generator)
 
@@ -585,15 +613,16 @@ async def _make_request(
             prev_msg=self.msg_store.get(prev_response.id) if prev_response else None,
             prev_response_output=prev_response.output if prev_response else None,
         )
-
+        chat_template_kwargs = self._effective_chat_template_kwargs(request)
         _, engine_inputs = await self.openai_serving_render.preprocess_chat(
             request,
             messages,
             default_template=self.chat_template,
             default_template_content_format=self.chat_template_content_format,
-            default_template_kwargs=None,
+            default_template_kwargs=chat_template_kwargs,
             tool_dicts=tool_dicts,
             tool_parser=self.parser.tool_parser_cls if self.parser else None,
+            reasoning_parser=self.parser.reasoning_parser_cls if self.parser else None,
         )
         return messages, engine_inputs
 
@@ -609,15 +638,16 @@ async def _render_next_turn(
         new_messages = construct_input_messages(
             request_input=messages,
         )
-
+        chat_template_kwargs = self._effective_chat_template_kwargs(request)
         _, engine_inputs = await self.openai_serving_render.preprocess_chat(
             request,
             new_messages,
             default_template=chat_template,
             default_template_content_format=chat_template_content_format,
-            default_template_kwargs=None,
+            default_template_kwargs=chat_template_kwargs,
             tool_dicts=tool_dicts,
             tool_parser=tool_parser,
+            reasoning_parser=self.parser.reasoning_parser_cls if self.parser else None,
         )
         return engine_inputs
 
@@ -630,6 +660,7 @@ async def _generate_with_builtin_tools(
         lora_request: LoRARequest | None = None,
         priority: int = 0,
         trace_headers: Mapping[str, str] | None = None,
+        reasoning_parser_kwargs: dict[str, Any] | None = None,
     ):
         max_model_len = self.model_config.max_model_len
 
@@ -653,6 +684,7 @@ async def _generate_with_builtin_tools(
                 lora_request=lora_request,
                 trace_headers=trace_headers,
                 priority=priority,
+                reasoning_parser_kwargs=reasoning_parser_kwargs,
             )
 
             async for res in generator:
@@ -694,6 +726,9 @@ async def _generate_with_builtin_tools(
                     self._extract_prompt_len(engine_input),
                     self.default_sampling_params,  # type: ignore
                     self.override_max_tokens,  # type: ignore
+                    truncate_prompt_tokens=(
+                        -1 if context.request.truncation != "disabled" else None
+                    ),
                 )
 
             # OPTIMIZATION
@@ -705,9 +740,10 @@ def _make_request_with_harmony(
         request: ResponsesRequest,
         prev_response: ResponsesResponse | None,
     ):
-        if request.tool_choice != "auto":
+        if request.tool_choice not in ("auto", "none"):
             raise NotImplementedError(
-                "Only 'auto' tool_choice is supported in response API with Harmony"
+                "Only 'auto' or 'none' tool_choice is supported "
+                "in response API with Harmony"
             )
 
         arrival_time = time.time()
@@ -833,7 +869,10 @@ async def responses_full_generator(
             and self.parser.reasoning_parser_cls is not None
             and isinstance(context, (SimpleContext, ParsableContext))
         ):
-            reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
+            reasoning_parser = self.parser.reasoning_parser_cls(
+                tokenizer,
+                chat_template_kwargs=self._effective_chat_template_kwargs(request),
+            )
             accumulated = getattr(context, "_accumulated_token_ids", []) or []
             num_reasoning_tokens = reasoning_parser.count_reasoning_tokens(accumulated)
 
@@ -1001,7 +1040,7 @@ def _make_response_output_items(
 
         # Use parser to extract and create response output items
         if self.parser:
-            parser = self.parser(tokenizer)
+            parser = self.parser(tokenizer, request.tools)
             return parser.extract_response_outputs(
                 model_output=final_output.text,
                 model_output_token_ids=final_output.token_ids,
@@ -1037,10 +1076,11 @@ def _make_response_output_items_with_harmony(
     ) -> list[ResponseOutputItem]:
         output_items: list[ResponseOutputItem] = []
         num_init_messages = context.num_init_messages
+        fn_names = context.function_tool_names
         for msg in context.messages[num_init_messages:]:
-            output_items.extend(harmony_to_response_output(msg))
+            output_items.extend(harmony_to_response_output(msg, fn_names))
         # Handle the generation stopped in the middle (if any).
-        last_items = parser_state_to_response_output(context.parser)
+        last_items = parser_state_to_response_output(context.parser, fn_names)
         if last_items:
             output_items.extend(last_items)
         return output_items
@@ -1336,563 +1376,59 @@ async def _process_simple_streaming_events(
             [StreamingResponsesResponse], StreamingResponsesResponse
         ],
     ) -> AsyncGenerator[StreamingResponsesResponse, None]:
-        current_content_index = 0
-        current_output_index = 0
-        current_item_id = ""
-        reasoning_parser = None
-        if self.parser and self.parser.reasoning_parser_cls:
-            reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
-        tool_parser = None
-        if self.parser and self.parser.tool_parser_cls:
-            tool_parser = self.parser.tool_parser_cls(tokenizer, request.tools)
-        reasoning_ended = False
-        tool_call_text_started = False
-        previous_text = ""
-        previous_token_ids: list[int] = []
-        prompt_is_reasoning_end = None
-        first_delta_sent = False
-        previous_delta_messages: list[DeltaMessage] = []
+        processor = SimpleStreamingEventProcessor()
+        parser = self.parser(tokenizer, request.tools) if self.parser else None
+
+        def _get_logprobs(
+            output: CompletionOutput,
+        ) -> list[response_text_delta_event.Logprob]:
+            if not request.is_include_output_logprobs():
+                return []
+            return self._create_stream_response_logprobs(
+                token_ids=output.token_ids,
+                logprobs=output.logprobs,
+                tokenizer=tokenizer,
+                top_logprobs=request.top_logprobs,
+            )
+
         async for ctx in result_generator:
             assert isinstance(ctx, SimpleContext)
-            if ctx.last_output is None:
+            if ctx.last_output is None or not ctx.last_output.outputs:
                 continue
-            if reasoning_parser and prompt_is_reasoning_end is None:
-                prompt_is_reasoning_end = reasoning_parser.is_reasoning_end(
-                    ctx.last_output.prompt_token_ids
-                )
-            if ctx.last_output.outputs:
-                output = ctx.last_output.outputs[0]
-                # finish_reason='error' indicates a retryable error
-                self._raise_if_error(output.finish_reason, request.request_id)
-                delta_text = output.text
-                delta_token_ids = as_list(output.token_ids)
-                current_text = previous_text + delta_text
-                current_token_ids = previous_token_ids + delta_token_ids
-
-                if reasoning_parser and tool_parser:
-                    if prompt_is_reasoning_end:
-                        reasoning_ended = True
-                    if not reasoning_ended:
-                        delta_message = reasoning_parser.extract_reasoning_streaming(
-                            previous_text=previous_text,
-                            current_text=current_text,
-                            delta_text=delta_text,
-                            previous_token_ids=previous_token_ids,
-                            current_token_ids=current_token_ids,
-                            delta_token_ids=delta_token_ids,
-                        )
-                        if reasoning_parser.is_reasoning_end(delta_token_ids):
-                            reasoning_ended = True
-                            current_token_ids = reasoning_parser.extract_content_ids(
-                                delta_token_ids
-                            )
-                            if delta_message and delta_message.content:
-                                current_text = delta_message.content
-                                delta_message.content = None
-                            else:
-                                current_text = ""
-
-                    if reasoning_ended:
-                        if not tool_call_text_started:
-                            tool_call_text_started = True
-                            previous_text = ""
-                            previous_token_ids = []
-                            delta_text = current_text
-                            delta_token_ids = current_token_ids
-
-                        delta_message = tool_parser.extract_tool_calls_streaming(
-                            previous_text=previous_text,
-                            current_text=current_text,
-                            delta_text=delta_text,
-                            previous_token_ids=previous_token_ids,
-                            current_token_ids=current_token_ids,
-                            delta_token_ids=delta_token_ids,
-                            request=request,  # type: ignore[arg-type]
-                        )
-                elif reasoning_parser:
-                    delta_message = reasoning_parser.extract_reasoning_streaming(
-                        previous_text=previous_text,
-                        current_text=current_text,
-                        delta_text=delta_text,
-                        previous_token_ids=previous_token_ids,
-                        current_token_ids=current_token_ids,
-                        delta_token_ids=delta_token_ids,
-                    )
-                elif tool_parser:
-                    delta_message = tool_parser.extract_tool_calls_streaming(
-                        previous_text=previous_text,
-                        current_text=current_text,
-                        delta_text=delta_text,
-                        previous_token_ids=previous_token_ids,
-                        current_token_ids=current_token_ids,
-                        delta_token_ids=delta_token_ids,
-                        request=request,  # type: ignore[arg-type]
-                    )
-                else:
-                    delta_message = DeltaMessage(
-                        content=output.text,
-                    )
-                previous_text = current_text
-                previous_token_ids = current_token_ids
-                if not delta_message:
-                    continue
-                if not first_delta_sent:
-                    current_item_id = random_uuid()
-                    if delta_message.tool_calls:
-                        current_tool_call_id = f"call_{random_uuid()}"
-                        assert len(delta_message.tool_calls) == 1, (
-                            "Multiple tool calls in one delta is not supported"
-                        )
-                        assert delta_message.tool_calls[0].function is not None, (
-                            "Tool call without function is not supported"
-                        )
-                        assert delta_message.tool_calls[0].function.name is not None, (
-                            "Tool call without function name is not supported"
-                        )
-                        current_tool_call_name = delta_message.tool_calls[
-                            0
-                        ].function.name
-                        yield _increment_sequence_number_and_return(
-                            ResponseOutputItemAddedEvent(
-                                type="response.output_item.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item=ResponseFunctionToolCallItem(
-                                    type="function_call",
-                                    id=current_item_id,
-                                    call_id=current_tool_call_id,
-                                    name=current_tool_call_name,
-                                    arguments=delta_message.tool_calls[
-                                        0
-                                    ].function.arguments,
-                                    status="in_progress",
-                                ),
-                            )
-                        )
-                    elif delta_message.reasoning:
-                        yield _increment_sequence_number_and_return(
-                            ResponseOutputItemAddedEvent(
-                                type="response.output_item.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item=ResponseReasoningItem(
-                                    type="reasoning",
-                                    id=current_item_id,
-                                    summary=[],
-                                    status="in_progress",
-                                ),
-                            )
-                        )
-                        yield _increment_sequence_number_and_return(
-                            ResponseReasoningPartAddedEvent(
-                                type="response.reasoning_part.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item_id=current_item_id,
-                                content_index=current_content_index,
-                                part=ResponseReasoningTextContent(
-                                    text="",
-                                    type="reasoning_text",
-                                ),
-                            )
-                        )
-                    elif not delta_message.tool_calls:
-                        yield _increment_sequence_number_and_return(
-                            ResponseOutputItemAddedEvent(
-                                type="response.output_item.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item=ResponseOutputMessage(
-                                    id=current_item_id,
-                                    type="message",
-                                    role="assistant",
-                                    content=[],
-                                    status="in_progress",
-                                ),
-                            )
-                        )
-                        yield _increment_sequence_number_and_return(
-                            ResponseContentPartAddedEvent(
-                                type="response.content_part.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item_id=current_item_id,
-                                content_index=current_content_index,
-                                part=ResponseOutputText(
-                                    type="output_text",
-                                    text="",
-                                    annotations=[],
-                                    logprobs=[],
-                                ),
-                            )
-                        )
-                    first_delta_sent = True
 
-                # check delta message and previous delta message are
-                # same as content or reasoning content
-                if (
-                    previous_delta_messages
-                    and previous_delta_messages[-1].reasoning is not None
-                    and delta_message.content is not None
-                ):
-                    # from reasoning to normal content, send done
-                    # event for reasoning
-                    reason_content = "".join(
-                        pm.reasoning
-                        for pm in previous_delta_messages
-                        if pm.reasoning is not None
-                    )
+            output = ctx.last_output.outputs[0]
+            self._raise_if_error(output.finish_reason, request.request_id)
+            delta_text = output.text
+            delta_token_ids = as_list(output.token_ids)
+
+            if parser:
+                delta_message = parser.parse_delta(
+                    delta_text=delta_text,
+                    delta_token_ids=delta_token_ids,
+                    request=request,
+                    prompt_token_ids=ctx.last_output.prompt_token_ids,
+                )
+            else:
+                delta_message = DeltaMessage(content=output.text)
 
-                    # delta message could have both reasoning and
-                    # content. Include current delta's reasoning in the
-                    # finalization since it may carry the tail end of
-                    # reasoning text (e.g. when reasoning end and
-                    # content start arrive in the same delta).
-                    if delta_message.reasoning is not None:
-                        yield _increment_sequence_number_and_return(
-                            ResponseReasoningTextDeltaEvent(
-                                type="response.reasoning_text.delta",
-                                sequence_number=-1,
-                                content_index=current_content_index,
-                                output_index=current_output_index,
-                                item_id=current_item_id,
-                                delta=delta_message.reasoning,
-                            )
-                        )
-                        reason_content += delta_message.reasoning
-                        delta_message = DeltaMessage(content=delta_message.content)
-
-                    yield _increment_sequence_number_and_return(
-                        ResponseReasoningTextDoneEvent(
-                            type="response.reasoning_text.done",
-                            item_id=current_item_id,
-                            sequence_number=-1,
-                            output_index=current_output_index,
-                            content_index=current_content_index,
-                            text=reason_content,
-                        )
-                    )
-                    yield _increment_sequence_number_and_return(
-                        ResponseReasoningPartDoneEvent(
-                            type="response.reasoning_part.done",
-                            sequence_number=-1,
-                            item_id=current_item_id,
-                            output_index=current_output_index,
-                            content_index=current_content_index,
-                            part=ResponseReasoningTextContent(
-                                text=reason_content,
-                                type="reasoning_text",
-                            ),
-                        )
-                    )
-                    current_content_index = 0
-                    reasoning_item = ResponseReasoningItem(
-                        type="reasoning",
-                        content=[
-                            ResponseReasoningTextContent(
-                                text=reason_content,
-                                type="reasoning_text",
-                            ),
-                        ],
-                        status="completed",
-                        id=current_item_id,
-                        summary=[],
-                    )
-                    yield _increment_sequence_number_and_return(
-                        ResponseOutputItemDoneEvent(
-                            type="response.output_item.done",
-                            sequence_number=-1,
-                            output_index=current_output_index,
-                            item=reasoning_item,
-                        )
-                    )
-                    current_output_index += 1
-                    current_item_id = str(uuid.uuid4())
-                    yield _increment_sequence_number_and_return(
-                        ResponseOutputItemAddedEvent(
-                            type="response.output_item.added",
-                            sequence_number=-1,
-                            output_index=current_output_index,
-                            item=ResponseOutputMessage(
-                                id=current_item_id,
-                                type="message",
-                                role="assistant",
-                                content=[],
-                                status="in_progress",
-                            ),
-                        )
-                    )
-                    yield _increment_sequence_number_and_return(
-                        ResponseContentPartAddedEvent(
-                            type="response.content_part.added",
-                            sequence_number=-1,
-                            output_index=current_output_index,
-                            item_id=current_item_id,
-                            content_index=current_content_index,
-                            part=ResponseOutputText(
-                                type="output_text",
-                                text="",
-                                annotations=[],
-                                logprobs=[],
-                            ),
-                        )
-                    )
-                    # reset previous delta messages
-                    previous_delta_messages = []
-                if delta_message.tool_calls and delta_message.tool_calls[0].function:
-                    if delta_message.tool_calls[0].function.arguments:
-                        yield _increment_sequence_number_and_return(
-                            ResponseFunctionCallArgumentsDeltaEvent(
-                                type="response.function_call_arguments.delta",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item_id=current_item_id,
-                                delta=delta_message.tool_calls[0].function.arguments,
-                            )
-                        )
-                    # tool call initiated with no arguments
-                    elif delta_message.tool_calls[0].function.name:
-                        # send done with current content part
-                        # and add new function call item
-                        yield _increment_sequence_number_and_return(
-                            ResponseTextDoneEvent(
-                                type="response.output_text.done",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                content_index=current_content_index,
-                                text="",
-                                logprobs=[],
-                                item_id=current_item_id,
-                            )
-                        )
-                        yield _increment_sequence_number_and_return(
-                            ResponseContentPartDoneEvent(
-                                type="response.content_part.done",
-                                sequence_number=-1,
-                                item_id=current_item_id,
-                                output_index=current_output_index,
-                                content_index=current_content_index,
-                                part=ResponseOutputText(
-                                    type="output_text",
-                                    text="",
-                                    annotations=[],
-                                    logprobs=[],
-                                ),
-                            )
-                        )
-                        yield _increment_sequence_number_and_return(
-                            ResponseOutputItemDoneEvent(
-                                type="response.output_item.done",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item=ResponseOutputMessage(
-                                    id=current_item_id,
-                                    type="message",
-                                    role="assistant",
-                                    content=[],
-                                    status="completed",
-                                ),
-                            )
-                        )
-                        current_output_index += 1
-                        current_item_id = random_uuid()
-                        assert delta_message.tool_calls[0].function is not None
-                        current_tool_call_name = delta_message.tool_calls[
-                            0
-                        ].function.name
-                        current_tool_call_id = f"call_{random_uuid()}"
-                        yield _increment_sequence_number_and_return(
-                            ResponseOutputItemAddedEvent(
-                                type="response.output_item.added",
-                                sequence_number=-1,
-                                output_index=current_output_index,
-                                item=ResponseFunctionToolCallItem(
-                                    type="function_call",
-                                    id=current_item_id,
-                                    call_id=current_tool_call_id,
-                                    name=current_tool_call_name,
-                                    arguments="",
-                                    status="in_progress",
-                                ),
-                            )
-                        )
-                        # skip content part for tool call
-                        current_content_index = 1
-                        continue
-                elif delta_message.reasoning is not None:
-                    yield _increment_sequence_number_and_return(
-                        ResponseReasoningTextDeltaEvent(
-                            type="response.reasoning_text.delta",
-                            sequence_number=-1,
-                            content_index=current_content_index,
-                            output_index=current_output_index,
-                            item_id=current_item_id,
-                            delta=delta_message.reasoning,
-                        )
-                    )
-                elif delta_message.content:
-                    yield _increment_sequence_number_and_return(
-                        ResponseTextDeltaEvent(
-                            type="response.output_text.delta",
-                            sequence_number=-1,
-                            content_index=current_content_index,
-                            output_index=current_output_index,
-                            item_id=current_item_id,
-                            delta=delta_message.content,
-                            logprobs=(
-                                self._create_stream_response_logprobs(
-                                    token_ids=output.token_ids,
-                                    logprobs=output.logprobs,
-                                    tokenizer=tokenizer,
-                                    top_logprobs=request.top_logprobs,
-                                )
-                                if request.is_include_output_logprobs()
-                                else []
-                            ),
-                        )
-                    )
+            if not delta_message:
+                continue
 
-                previous_delta_messages.append(delta_message)
+            target_state, tool_call = processor.resolve_target_state(delta_message)
+            if target_state == _StateType.NONE:
+                continue
 
-        if previous_delta_messages:
-            parts = []
-            for pm in previous_delta_messages:
-                if pm.tool_calls:
-                    assert len(pm.tool_calls) == 1, (
-                        "Multiple tool calls in one delta is not supported"
-                    )
-                    assert pm.tool_calls[0].function is not None, (
-                        "Tool call without function is not supported"
-                    )
-                    parts.append(pm.tool_calls[0].function.arguments or "")
+            if processor.needs_transition(target_state, tool_call):
+                for event in processor.close_current():
+                    yield _increment_sequence_number_and_return(event)
+                for event in processor.open(target_state, tool_call):
+                    yield _increment_sequence_number_and_return(event)
 
-            tool_call_arguments = "".join(parts)
-            if tool_call_arguments:
-                yield _increment_sequence_number_and_return(
-                    ResponseFunctionCallArgumentsDoneEvent(
-                        type="response.function_call_arguments.done",
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        item_id=current_item_id,
-                        arguments=tool_call_arguments,
-                        name=current_tool_call_name,
-                    )
-                )
-                current_content_index = 0
-                function_call_item = ResponseFunctionToolCall(
-                    type="function_call",
-                    name=current_tool_call_name,
-                    arguments=tool_call_arguments,
-                    status="completed",
-                    id=current_item_id,
-                    call_id=current_tool_call_id,
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseOutputItemDoneEvent(
-                        type="response.output_item.done",
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        item=function_call_item,
-                    )
-                )
+            for event in processor.emit_delta(delta_message, output, _get_logprobs):
+                yield _increment_sequence_number_and_return(event)
 
-            elif previous_delta_messages[-1].reasoning is not None:
-                reason_content = "".join(
-                    pm.reasoning
-                    for pm in previous_delta_messages
-                    if pm.reasoning is not None
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseReasoningTextDoneEvent(
-                        type="response.reasoning_text.done",
-                        item_id=current_item_id,
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        content_index=current_content_index,
-                        text=reason_content,
-                    )
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseReasoningPartDoneEvent(
-                        type="response.reasoning_part.done",
-                        sequence_number=-1,
-                        item_id=current_item_id,
-                        output_index=current_output_index,
-                        content_index=current_content_index,
-                        part=ResponseReasoningTextContent(
-                            text=reason_content,
-                            type="reasoning_text",
-                        ),
-                    )
-                )
-                reasoning_item = ResponseReasoningItem(
-                    type="reasoning",
-                    content=[
-                        ResponseReasoningTextContent(
-                            text=reason_content,
-                            type="reasoning_text",
-                        ),
-                    ],
-                    status="completed",
-                    id=current_item_id,
-                    summary=[],
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseOutputItemDoneEvent(
-                        type="response.output_item.done",
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        item=reasoning_item,
-                    )
-                )
-            elif previous_delta_messages[-1].content:
-                final_content = "".join(
-                    pm.content for pm in previous_delta_messages if pm.content
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseTextDoneEvent(
-                        type="response.output_text.done",
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        content_index=current_content_index,
-                        text=final_content,
-                        logprobs=[],
-                        item_id=current_item_id,
-                    )
-                )
-                part = ResponseOutputText(
-                    text=final_content,
-                    type="output_text",
-                    annotations=[],
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseContentPartDoneEvent(
-                        type="response.content_part.done",
-                        sequence_number=-1,
-                        item_id=current_item_id,
-                        output_index=current_output_index,
-                        content_index=current_content_index,
-                        part=part,
-                    )
-                )
-                item = ResponseOutputMessage(
-                    type="message",
-                    role="assistant",
-                    content=[
-                        part,
-                    ],
-                    status="completed",
-                    id=current_item_id,
-                    summary=[],
-                )
-                yield _increment_sequence_number_and_return(
-                    ResponseOutputItemDoneEvent(
-                        type="response.output_item.done",
-                        sequence_number=-1,
-                        output_index=current_output_index,
-                        item=item,
-                    )
-                )
+        for event in processor.close_current():
+            yield _increment_sequence_number_and_return(event)
 
     async def _process_harmony_streaming_events(
         self,
@@ -1919,7 +1455,9 @@ async def _process_harmony_streaming_events(
             if ctx.is_expecting_start():
                 if len(ctx.parser.messages) > 0:
                     previous_item = ctx.parser.messages[-1]
-                    for event in emit_previous_item_done_events(previous_item, state):
+                    for event in emit_previous_item_done_events(
+                        previous_item, state, ctx.function_tool_names
+                    ):
                         yield _increment_sequence_number_and_return(event)
                 state.reset_for_new_item()
 
@@ -1977,7 +1515,7 @@ def _increment_sequence_number_and_return(
                 output=[],
                 status="in_progress",
                 usage=None,
-            ).model_dump()
+            ).model_dump(mode="json", by_alias=True)
             yield _increment_sequence_number_and_return(
                 ResponseCreatedEvent(
                     type="response.created",
diff --git a/vllm/entrypoints/openai/responses/streaming_events.py b/vllm/entrypoints/openai/responses/streaming_events.py
index cc242e7baa83..b49c46fc5390 100644
--- a/vllm/entrypoints/openai/responses/streaming_events.py
+++ b/vllm/entrypoints/openai/responses/streaming_events.py
@@ -16,8 +16,10 @@
 """
 
 import json
-from dataclasses import dataclass
-from typing import Final
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from typing import Any, ClassVar, Final, NamedTuple
 
 from openai.types.responses import (
     ResponseCodeInterpreterCallCodeDeltaEvent,
@@ -31,6 +33,7 @@
     ResponseFunctionCallArgumentsDeltaEvent,
     ResponseFunctionCallArgumentsDoneEvent,
     ResponseFunctionToolCall,
+    ResponseFunctionToolCallItem,
     ResponseFunctionWebSearch,
     ResponseMcpCallArgumentsDeltaEvent,
     ResponseMcpCallArgumentsDoneEvent,
@@ -49,6 +52,7 @@
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
     response_function_web_search,
+    response_text_delta_event,
 )
 from openai.types.responses.response_output_item import McpCall
 from openai.types.responses.response_reasoning_item import (
@@ -57,12 +61,18 @@
 from openai_harmony import Message as HarmonyMessage
 
 from vllm.entrypoints.mcp.tool_server import ToolServer
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+)
 from vllm.entrypoints.openai.responses.context import StreamingHarmonyContext
 from vllm.entrypoints.openai.responses.protocol import (
     ResponseReasoningPartAddedEvent,
     ResponseReasoningPartDoneEvent,
     StreamingResponsesResponse,
 )
+from vllm.outputs import CompletionOutput
 from vllm.utils import random_uuid
 
 TOOL_NAME_TO_MCP_SERVER_LABEL: Final[dict[str, str]] = {
@@ -105,19 +115,19 @@ def reset_for_new_item(self) -> None:
         self.current_call_id = ""
 
 
-def is_mcp_tool_by_namespace(recipient: str | None) -> bool:
+def is_mcp_tool_by_namespace(
+    recipient: str | None,
+    allowed_function_tool_names: frozenset[str] | None = None,
+) -> bool:
     """
     Determine if a tool call is an MCP tool based on recipient prefix.
 
-    - Tools starting with "functions." are function calls
-    - Everything else is an MCP tool
+    Inverse of :func:`is_function_recipient` — everything that is not
+    a function call is an MCP tool.
     """
     if recipient is None:
         return False
-
-    # Function calls have "functions." prefix
-    # Everything else is an MCP tool
-    return not recipient.startswith("functions.")
+    return not is_function_recipient(recipient, allowed_function_tool_names)
 
 
 # =====================================================================
@@ -569,16 +579,16 @@ def emit_content_delta_events(
         return emit_text_delta_events(delta, state)
     elif channel == "analysis" and recipient is None:
         return emit_reasoning_delta_events(delta, state)
-    # built-in tools will be triggered on the analysis channel
-    # However, occasionally built-in tools will
-    # still be output to commentary.
-    elif channel in ("commentary", "analysis") and recipient is not None:
-        if recipient.startswith("functions."):
-            function_name = recipient[len("functions.") :]
+    elif recipient is not None:
+        fn_names = ctx.function_tool_names
+        if is_function_recipient(recipient, fn_names):
+            function_name = extract_function_from_recipient(recipient)
             return emit_function_call_delta_events(delta, function_name, state)
         elif recipient == "python":
             return emit_code_interpreter_delta_events(delta, state)
-        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
+        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
+            recipient, fn_names
+        ):
             return emit_mcp_delta_events(delta, state, recipient)
 
     return []
@@ -587,6 +597,7 @@ def emit_content_delta_events(
 def emit_previous_item_done_events(
     previous_item: HarmonyMessage,
     state: StreamingState,
+    function_tool_names: frozenset[str] | None = None,
 ) -> list[StreamingResponsesResponse]:
     """Emit done events for the previous item when expecting a new start.
 
@@ -596,13 +607,13 @@ def emit_previous_item_done_events(
     text = previous_item.content[0].text
     if previous_item.recipient is not None:
         # Deal with tool call
-        if previous_item.recipient.startswith("functions."):
-            function_name = previous_item.recipient[len("functions.") :]
+        if is_function_recipient(previous_item.recipient, function_tool_names):
+            function_name = extract_function_from_recipient(previous_item.recipient)
             return emit_function_call_done_events(function_name, text, state)
         elif previous_item.recipient == "python":
             return emit_code_interpreter_completion_events(previous_item, state)
         elif (
-            is_mcp_tool_by_namespace(previous_item.recipient)
+            is_mcp_tool_by_namespace(previous_item.recipient, function_tool_names)
             and state.current_item_id is not None
             and state.current_item_id.startswith("mcp_")
         ):
@@ -786,9 +797,12 @@ def emit_tool_action_events(
         and state.sent_output_item_added
     ):
         recipient = previous_item.recipient
+        fn_names = ctx.function_tool_names
         if recipient == "python":
             events.extend(emit_code_interpreter_completion_events(previous_item, state))
-        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
+        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
+            recipient, fn_names
+        ):
             events.extend(
                 emit_mcp_completion_events(
                     recipient, previous_item.content[0].text, state
@@ -796,3 +810,457 @@ def emit_tool_action_events(
             )
 
     return events
+
+
+# =====================================================================
+# Simple streaming helpers
+# =====================================================================
+
+
+class _StateType(Enum):
+    NONE = auto()
+    CONTENT = auto()
+    REASONING = auto()
+    TOOL_CALL = auto()
+
+
+@dataclass
+class SimpleStreamingState:
+    output_index: int = 0
+    current_item_id: str = ""
+    content_index: int = 0
+    accumulated_text: str = ""
+    tool_call_id: str = ""
+    tool_call_name: str = ""
+    tool_call_index: int | None = None
+    has_emitted_tool_call_delta: bool = False
+    current_state: _StateType = field(default_factory=lambda: _StateType.NONE)
+
+
+def emit_simple_content_open(
+    state: SimpleStreamingState,
+) -> list[StreamingResponsesResponse]:
+    state.current_state = _StateType.CONTENT
+    state.current_item_id = random_uuid()
+    state.content_index = 0
+    state.accumulated_text = ""
+    return [
+        ResponseOutputItemAddedEvent(
+            type="response.output_item.added",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseOutputMessage(
+                id=state.current_item_id,
+                type="message",
+                role="assistant",
+                content=[],
+                status="in_progress",
+            ),
+        ),
+        ResponseContentPartAddedEvent(
+            type="response.content_part.added",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item_id=state.current_item_id,
+            content_index=state.content_index,
+            part=ResponseOutputText(
+                type="output_text",
+                text="",
+                annotations=[],
+                logprobs=[],
+            ),
+        ),
+    ]
+
+
+def emit_simple_content_delta(
+    state: SimpleStreamingState,
+    delta: str,
+    logprobs: list[response_text_delta_event.Logprob] | None = None,
+) -> list[StreamingResponsesResponse]:
+    state.accumulated_text += delta
+    return [
+        ResponseTextDeltaEvent(
+            type="response.output_text.delta",
+            sequence_number=-1,
+            content_index=state.content_index,
+            output_index=state.output_index,
+            item_id=state.current_item_id,
+            delta=delta,
+            logprobs=logprobs or [],
+        )
+    ]
+
+
+def emit_simple_content_done(
+    state: SimpleStreamingState,
+) -> list[StreamingResponsesResponse]:
+    part = ResponseOutputText(
+        type="output_text",
+        text=state.accumulated_text,
+        annotations=[],
+    )
+    events: list[StreamingResponsesResponse] = [
+        ResponseTextDoneEvent(
+            type="response.output_text.done",
+            sequence_number=-1,
+            output_index=state.output_index,
+            content_index=state.content_index,
+            text=state.accumulated_text,
+            logprobs=[],
+            item_id=state.current_item_id,
+        ),
+        ResponseContentPartDoneEvent(
+            type="response.content_part.done",
+            sequence_number=-1,
+            item_id=state.current_item_id,
+            output_index=state.output_index,
+            content_index=state.content_index,
+            part=part,
+        ),
+        ResponseOutputItemDoneEvent(
+            type="response.output_item.done",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseOutputMessage(
+                id=state.current_item_id,
+                type="message",
+                role="assistant",
+                content=[part] if state.accumulated_text else [],
+                status="completed",
+                summary=[],
+            ),
+        ),
+    ]
+    state.output_index += 1
+    state.current_state = _StateType.NONE
+    return events
+
+
+def emit_simple_reasoning_open(
+    state: SimpleStreamingState,
+) -> list[StreamingResponsesResponse]:
+    state.current_state = _StateType.REASONING
+    state.current_item_id = random_uuid()
+    state.content_index = 0
+    state.accumulated_text = ""
+    return [
+        ResponseOutputItemAddedEvent(
+            type="response.output_item.added",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseReasoningItem(
+                type="reasoning",
+                id=state.current_item_id,
+                summary=[],
+                status="in_progress",
+            ),
+        ),
+        ResponseReasoningPartAddedEvent(
+            type="response.reasoning_part.added",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item_id=state.current_item_id,
+            content_index=state.content_index,
+            part=ResponseReasoningTextContent(
+                text="",
+                type="reasoning_text",
+            ),
+        ),
+    ]
+
+
+def emit_simple_reasoning_delta(
+    state: SimpleStreamingState,
+    delta: str,
+) -> list[StreamingResponsesResponse]:
+    state.accumulated_text += delta
+    return [
+        ResponseReasoningTextDeltaEvent(
+            type="response.reasoning_text.delta",
+            item_id=state.current_item_id,
+            sequence_number=-1,
+            output_index=state.output_index,
+            content_index=state.content_index,
+            delta=delta,
+        )
+    ]
+
+
+def emit_simple_reasoning_done(
+    state: SimpleStreamingState,
+) -> list[StreamingResponsesResponse]:
+    part = ResponseReasoningTextContent(
+        text=state.accumulated_text,
+        type="reasoning_text",
+    )
+    events: list[StreamingResponsesResponse] = [
+        ResponseReasoningTextDoneEvent(
+            type="response.reasoning_text.done",
+            item_id=state.current_item_id,
+            sequence_number=-1,
+            output_index=state.output_index,
+            content_index=state.content_index,
+            text=state.accumulated_text,
+        ),
+        ResponseReasoningPartDoneEvent(
+            type="response.reasoning_part.done",
+            sequence_number=-1,
+            item_id=state.current_item_id,
+            output_index=state.output_index,
+            content_index=state.content_index,
+            part=part,
+        ),
+        ResponseOutputItemDoneEvent(
+            type="response.output_item.done",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseReasoningItem(
+                type="reasoning",
+                content=[part],
+                status="completed",
+                id=state.current_item_id,
+                summary=[],
+            ),
+        ),
+    ]
+    state.output_index += 1
+    state.current_state = _StateType.NONE
+    return events
+
+
+def emit_simple_tool_call_open(
+    state: SimpleStreamingState,
+    name: str,
+    index: int | None,
+) -> list[StreamingResponsesResponse]:
+    state.current_state = _StateType.TOOL_CALL
+    state.current_item_id = random_uuid()
+    state.tool_call_id = f"call_{random_uuid()}"
+    state.tool_call_name = name
+    state.tool_call_index = index
+    state.accumulated_text = ""
+    state.has_emitted_tool_call_delta = False
+    return [
+        ResponseOutputItemAddedEvent(
+            type="response.output_item.added",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseFunctionToolCallItem(
+                type="function_call",
+                id=state.current_item_id,
+                call_id=state.tool_call_id,
+                name=name,
+                arguments="",
+                status="in_progress",
+            ),
+        ),
+    ]
+
+
+def emit_simple_tool_call_delta(
+    state: SimpleStreamingState,
+    delta: str,
+) -> list[StreamingResponsesResponse]:
+    state.accumulated_text += delta
+    state.has_emitted_tool_call_delta = True
+    return [
+        ResponseFunctionCallArgumentsDeltaEvent(
+            type="response.function_call_arguments.delta",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item_id=state.current_item_id,
+            delta=delta,
+        )
+    ]
+
+
+def emit_simple_tool_call_done(
+    state: SimpleStreamingState,
+) -> list[StreamingResponsesResponse]:
+    events: list[StreamingResponsesResponse] = []
+    if state.has_emitted_tool_call_delta:
+        events.append(
+            ResponseFunctionCallArgumentsDoneEvent(
+                type="response.function_call_arguments.done",
+                sequence_number=-1,
+                output_index=state.output_index,
+                item_id=state.current_item_id,
+                arguments=state.accumulated_text,
+                name=state.tool_call_name,
+            )
+        )
+    events.append(
+        ResponseOutputItemDoneEvent(
+            type="response.output_item.done",
+            sequence_number=-1,
+            output_index=state.output_index,
+            item=ResponseFunctionToolCall(
+                type="function_call",
+                name=state.tool_call_name,
+                arguments=state.accumulated_text,
+                status="completed",
+                id=state.current_item_id,
+                call_id=state.tool_call_id,
+            ),
+        ),
+    )
+    state.output_index += 1
+    state.current_state = _StateType.NONE
+    return events
+
+
+class _StateHandlers(NamedTuple):
+    """Tuple for each state: open(start), delta(chunk), done(finish)."""
+
+    open_fn: Callable[..., list[StreamingResponsesResponse]]
+    delta_fn: Callable[..., list[StreamingResponsesResponse]]
+    done_fn: Callable[..., list[StreamingResponsesResponse]]
+
+
+class SimpleStreamingEventProcessor:
+    """
+    State-machine processor for the simple (non-Harmony) streaming path.
+
+    Core flow:
+      1. Resolve the target state from the delta_message
+         (CONTENT / REASONING / TOOL_CALL).
+      2. If the target state differs from the current one,
+         close_current() then open() the new state.
+      3. emit_delta() produces the incremental events for the state.
+
+    State lifecycle:
+      open()  ->  repeated emit_delta()  ->  close_current()
+    """
+
+    _STATE_HANDLERS: ClassVar[dict[_StateType, _StateHandlers]] = {
+        _StateType.CONTENT: _StateHandlers(
+            emit_simple_content_open,
+            emit_simple_content_delta,
+            emit_simple_content_done,
+        ),
+        _StateType.REASONING: _StateHandlers(
+            emit_simple_reasoning_open,
+            emit_simple_reasoning_delta,
+            emit_simple_reasoning_done,
+        ),
+        _StateType.TOOL_CALL: _StateHandlers(
+            emit_simple_tool_call_open,
+            emit_simple_tool_call_delta,
+            emit_simple_tool_call_done,
+        ),
+    }
+
+    def __init__(self, state: SimpleStreamingState | None = None) -> None:
+        self.state = state or SimpleStreamingState()
+
+    def resolve_target_state(
+        self, delta_message: DeltaMessage
+    ) -> tuple[_StateType, Any]:
+        """
+        Decide which state the next delta belongs to.
+
+        Priority: TOOL_CALL > REASONING > CONTENT, fallback to NONE.
+        For TOOL_CALL the first tool_call object is also returned so
+        callers can detect a switch between consecutive tools.
+        """
+        if (
+            delta_message.tool_calls
+            and delta_message.tool_calls[0].function is not None
+        ):
+            return _StateType.TOOL_CALL, delta_message.tool_calls[0]
+        if delta_message.reasoning is not None:
+            return _StateType.REASONING, None
+        if delta_message.content:
+            return _StateType.CONTENT, None
+        return _StateType.NONE, None
+
+    def needs_transition(self, target_state: _StateType, tool_call: Any) -> bool:
+        """
+        Return True when we must close the current state and open a new one.
+
+        Two cases trigger a transition:
+          1. The target state differs from the current state
+             (e.g. CONTENT -> TOOL_CALL).
+          2. We are already in TOOL_CALL but the next tool_call has a
+             different index (multiple consecutive tool calls).
+        """
+        if self.state.current_state != target_state:
+            return True
+        return (
+            target_state == _StateType.TOOL_CALL
+            and tool_call is not None
+            and self.state.tool_call_index is not None
+            and tool_call.index is not None
+            and self.state.tool_call_index != tool_call.index
+        )
+
+    def close_current(self) -> list[StreamingResponsesResponse]:
+        """Close the current state and emit its 'done' event sequence."""
+        handlers = self._STATE_HANDLERS.get(self.state.current_state)
+        if handlers is None:
+            return []
+        return handlers.done_fn(self.state)
+
+    def open(
+        self, target_state: _StateType, tool_call: Any = None
+    ) -> list[StreamingResponsesResponse]:
+        """Open a new state and emit its 'added' / 'open' event sequence."""
+        handlers = self._STATE_HANDLERS[target_state]
+        if target_state == _StateType.TOOL_CALL:
+            assert tool_call is not None
+            return handlers.open_fn(
+                self.state, tool_call.function.name, tool_call.index
+            )
+        return handlers.open_fn(self.state)
+
+    def emit_delta(
+        self,
+        delta_message: DeltaMessage,
+        output: CompletionOutput,
+        get_logprobs: Callable[
+            [CompletionOutput], list[response_text_delta_event.Logprob]
+        ]
+        | None = None,
+    ) -> list[StreamingResponsesResponse]:
+        """
+        Emit incremental events for the current state from the delta.
+
+        Special case: when already in REASONING and the same delta also
+        carries content, we emit the reasoning delta, close reasoning,
+        open content, and then emit the content delta.
+        """
+        handlers = self._STATE_HANDLERS[self.state.current_state]
+        events: list[StreamingResponsesResponse] = []
+
+        # Special case: reasoning -> content inside a single delta.
+        if (
+            self.state.current_state == _StateType.REASONING
+            and delta_message.reasoning is not None
+            and delta_message.content is not None
+        ):
+            events.extend(handlers.delta_fn(self.state, delta_message.reasoning))
+            events.extend(self.close_current())
+            events.extend(self.open(_StateType.CONTENT))
+            content_handlers = self._STATE_HANDLERS[_StateType.CONTENT]
+            logprobs = get_logprobs(output) if get_logprobs else []
+            events.extend(
+                content_handlers.delta_fn(self.state, delta_message.content, logprobs)
+            )
+            return events
+
+        if self.state.current_state == _StateType.TOOL_CALL:
+            assert delta_message.tool_calls is not None
+            tool_call_function = delta_message.tool_calls[0].function
+            assert tool_call_function is not None
+            if tool_call_function.arguments:
+                return handlers.delta_fn(self.state, tool_call_function.arguments)
+            return []
+        elif self.state.current_state == _StateType.REASONING:
+            assert delta_message.reasoning is not None
+            return handlers.delta_fn(self.state, delta_message.reasoning)
+        elif self.state.current_state == _StateType.CONTENT:
+            assert delta_message.content is not None
+            logprobs = get_logprobs(output) if get_logprobs else []
+            return handlers.delta_fn(self.state, delta_message.content, logprobs)
+        return []
diff --git a/vllm/entrypoints/openai/responses/utils.py b/vllm/entrypoints/openai/responses/utils.py
index 789a0e0b6be6..9556867f5c36 100644
--- a/vllm/entrypoints/openai/responses/utils.py
+++ b/vllm/entrypoints/openai/responses/utils.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from collections.abc import Iterable
 from typing import Any
 
 from openai.types.chat import (
@@ -21,7 +22,6 @@
 from openai.types.responses.tool import Tool
 
 from vllm import envs
-from vllm.entrypoints.constants import MCP_PREFIX
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionMessageParam
 from vllm.entrypoints.openai.responses.protocol import ResponseInputOutputItem
 from vllm.logger import init_logger
@@ -94,8 +94,10 @@ def construct_input_messages(
 
     # Prepend the conversation history.
     if prev_msg is not None:
-        # Add the previous messages.
-        messages.extend(prev_msg)
+        # Filter out system messages from previous conversation -- per the
+        # OpenAI spec, instructions should NOT carry over across responses.
+        # The current request's instructions (if any) were already added above.
+        messages.extend(m for m in prev_msg if m.get("role") != "system")
     if prev_response_output is not None:
         # Add the previous output.
         for output_item in prev_response_output:
@@ -119,76 +121,69 @@ def construct_input_messages(
     return messages
 
 
-def _maybe_combine_reasoning_and_tool_call(
-    item: ResponseInputOutputItem, messages: list[ChatCompletionMessageParam]
-) -> ChatCompletionMessageParam | None:
-    """Many models treat MCP calls and reasoning as a single message.
-    This function checks if the last message is a reasoning message and
-    the current message is a tool call"""
-    if not (
-        isinstance(item, ResponseFunctionToolCall)
-        and item.id
-        and item.id.startswith(MCP_PREFIX)
-    ):
-        return None
-    if len(messages) == 0:
-        return None
-    last_message = messages[-1]
-    if not (
-        last_message.get("role") == "assistant"
-        and last_message.get("reasoning") is not None
-    ):
-        return None
-
-    last_message["tool_calls"] = [
-        ChatCompletionMessageToolCallParam(
-            id=item.call_id,
-            function=FunctionCallTool(
-                name=item.name,
-                arguments=item.arguments,
-            ),
-            type="function",
-        )
-    ]
-    return last_message
-
-
 def construct_chat_messages_with_tool_call(
     input_messages: list[ResponseInputOutputItem],
 ) -> list[ChatCompletionMessageParam]:
-    """This function wraps _construct_single_message_from_response_item
-    Because some chatMessages come from multiple response items
-    for example a reasoning item and a MCP tool call are two response items
-    but are one chat message
+    """Build chat messages from response items.
+
+    Some chat messages span multiple response items (e.g., reasoning + tool calls).
     """
     messages: list[ChatCompletionMessageParam] = []
     for item in input_messages:
-        maybe_combined_message = _maybe_combine_reasoning_and_tool_call(item, messages)
-        if maybe_combined_message is not None:
-            messages[-1] = maybe_combined_message
-        else:
-            messages.append(_construct_single_message_from_response_item(item))
+        message = _construct_message_from_response_item(
+            item, prev_msg=messages[-1] if messages else None
+        )
+        if message is not None:
+            messages.append(message)
 
     return messages
 
 
-def _construct_single_message_from_response_item(
+def _construct_message_from_response_item(
     item: ResponseInputOutputItem,
-) -> ChatCompletionMessageParam:
+    prev_msg: ChatCompletionMessageParam | None = None,
+) -> ChatCompletionMessageParam | None:
+    """
+    Returns a new message or None. If `None`, `prev_msg` might be updated.
+    If `prev_msg` is `None`, a new message is always returned.
+    """
+    prev_assistant_msg = (
+        prev_msg if prev_msg and prev_msg.get("role") == "assistant" else None
+    )
+
     if isinstance(item, ResponseFunctionToolCall):
-        # Append the function call as a tool call.
+        tool_call = ChatCompletionMessageToolCallParam(
+            id=item.call_id,
+            function=FunctionCallTool(
+                name=item.name,
+                arguments=item.arguments,
+            ),
+            type="function",
+        )
+        if prev_assistant_msg:
+            tool_calls = prev_assistant_msg.get("tool_calls")
+            if tool_calls is None:
+                prev_assistant_msg["tool_calls"] = [tool_call]
+                return None
+            if isinstance(tool_calls, list):
+                tool_calls.append(tool_call)
+                return None
+            if isinstance(tool_calls, Iterable) and not isinstance(
+                tool_calls, (dict, str)
+            ):
+                tool_calls = list(tool_calls)
+                tool_calls.append(tool_call)
+                prev_assistant_msg["tool_calls"] = tool_calls
+                return None
+            logger.warning(
+                "Previous assistant message has unknown tool_calls format. "
+                "Tool call merging is skipped and a new assistant message is created. "
+                "Item %s",
+                item.id,
+            )
         return ChatCompletionAssistantMessageParam(
             role="assistant",
-            tool_calls=[
-                ChatCompletionMessageToolCallParam(
-                    id=item.call_id,
-                    function=FunctionCallTool(
-                        name=item.name,
-                        arguments=item.arguments,
-                    ),
-                    type="function",
-                )
-            ],
+            tool_calls=[tool_call],
         )
     elif isinstance(item, ResponseReasoningItem):
         reasoning = ""
@@ -204,14 +199,26 @@ def _construct_single_message_from_response_item(
                 "reasoning items.",
                 item.id,
             )
+
+        if prev_assistant_msg:
+            previous_reasoning = prev_assistant_msg.get("reasoning")
+            if previous_reasoning is None:
+                prev_assistant_msg["reasoning"] = reasoning
+                return None
         return {
             "role": "assistant",
             "reasoning": reasoning,
         }
     elif isinstance(item, ResponseOutputMessage):
+        output_text = item.content[0].text
+        if prev_assistant_msg:
+            previous_content = prev_assistant_msg.get("content")
+            if previous_content is None:
+                prev_assistant_msg["content"] = output_text
+                return None
         return {
             "role": "assistant",
-            "content": item.content[0].text,
+            "content": output_text,
         }
     elif isinstance(item, ResponseFunctionToolCallOutputItem):
         return ChatCompletionToolMessageParam(
@@ -226,7 +233,11 @@ def _construct_single_message_from_response_item(
             content=item.get("output"),
             tool_call_id=item.get("call_id"),
         )
-    return item  # type: ignore
+    return item  # type: ignore[arg-type]
+
+
+def extract_function_tool_names(tools: list[Tool]) -> frozenset[str]:
+    return frozenset(tool.name for tool in tools if tool.type == "function")
 
 
 def extract_tool_types(tools: list[Tool]) -> set[str]:
@@ -262,7 +273,7 @@ def convert_tool_responses_to_completions_format(tool: dict) -> dict:
 def construct_tool_dicts(
     tools: list[Tool], tool_choice: ToolChoice
 ) -> list[dict[str, Any]] | None:
-    if tools is None or (tool_choice == "none"):
+    if not tools or (tool_choice == "none"):
         tool_dicts = None
     else:
         tool_dicts = [
diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py
index 03a15991d858..327254e3acc1 100644
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@@ -2,6 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import contextlib
+import json
 import sys
 import tempfile
 from argparse import Namespace
@@ -13,14 +15,18 @@
 
 import aiohttp
 import pybase64 as base64
+import pydantic
 import torch
 from fastapi import UploadFile
 from prometheus_client import start_http_server
 from pydantic import Field, TypeAdapter, field_validator, model_validator
 from pydantic_core.core_schema import ValidationInfo
 from starlette.datastructures import State
+from starlette.responses import JSONResponse
 from tqdm import tqdm
+from urllib3.util import parse_url
 
+import vllm.envs as envs
 from vllm.config import config
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.protocol import EngineClient
@@ -35,24 +41,26 @@
     ErrorResponse,
     OpenAIBaseModel,
 )
-from vllm.entrypoints.openai.speech_to_text.protocol import (
-    TranscriptionRequest,
-    TranscriptionResponse,
-    TranscriptionResponseVerbose,
-    TranslationRequest,
-    TranslationResponse,
-    TranslationResponseVerbose,
-)
 from vllm.entrypoints.pooling.embed.protocol import (
     EmbeddingRequest,
     EmbeddingResponse,
 )
-from vllm.entrypoints.pooling.score.protocol import (
+from vllm.entrypoints.pooling.scoring.protocol import (
     RerankRequest,
     RerankResponse,
     ScoreRequest,
     ScoreResponse,
 )
+from vllm.entrypoints.speech_to_text.transcription.protocol import (
+    TranscriptionRequest,
+    TranscriptionResponse,
+    TranscriptionResponseVerbose,
+)
+from vllm.entrypoints.speech_to_text.translation.protocol import (
+    TranslationRequest,
+    TranslationResponse,
+    TranslationResponseVerbose,
+)
 from vllm.entrypoints.utils import create_error_response
 from vllm.exceptions import VLLMValidationError
 from vllm.logger import init_logger
@@ -178,6 +186,18 @@ def check_type_for_url(cls, value: Any, info: ValidationInfo):
         return TypeAdapter(BatchRequestInputBody).validate_python(value)
 
 
+AllResponse: TypeAlias = (
+    ChatCompletionResponse
+    | EmbeddingResponse
+    | ScoreResponse
+    | RerankResponse
+    | TranscriptionResponse
+    | TranscriptionResponseVerbose
+    | TranslationResponse
+    | TranslationResponseVerbose
+)
+
+
 class BatchResponseData(OpenAIBaseModel):
     # HTTP status code of the response.
     status_code: int = 200
@@ -186,17 +206,7 @@ class BatchResponseData(OpenAIBaseModel):
     request_id: str
 
     # The body of the response.
-    body: (
-        ChatCompletionResponse
-        | EmbeddingResponse
-        | ScoreResponse
-        | RerankResponse
-        | TranscriptionResponse
-        | TranscriptionResponseVerbose
-        | TranslationResponse
-        | TranslationResponseVerbose
-        | None
-    ) = None
+    body: AllResponse | None = None
 
 
 class BatchRequestOutput(OpenAIBaseModel):
@@ -439,19 +449,25 @@ async def write_file(
         await write_local_file(path_or_url, batch_outputs)
 
 
-async def download_bytes_from_url(url: str) -> bytes:
+async def download_bytes_from_url(
+    url: str,
+    allowed_media_domains: list[str] | None = None,
+) -> bytes:
     """
     Download data from a URL or decode from a data URL.
 
     Args:
         url: Either an HTTP/HTTPS URL or a data URL (data:...;base64,...)
+        allowed_media_domains: If set, only HTTP/HTTPS URLs whose hostname
+            is in this list are permitted. data: URLs are not subject to
+            this restriction.
 
     Returns:
         Data as bytes
     """
     parsed = urlparse(url)
 
-    # Handle data URLs (base64 encoded)
+    # Handle data URLs (base64 encoded) - not subject to domain restrictions
     if parsed.scheme == "data":
         # Format: data:...;base64,<base64_data>
         if "," in url:
@@ -465,9 +481,24 @@ async def download_bytes_from_url(url: str) -> bytes:
 
     # Handle HTTP/HTTPS URLs
     elif parsed.scheme in ("http", "https"):
+        if allowed_media_domains is not None:
+            url_spec = parse_url(url)
+            if url_spec.hostname not in allowed_media_domains:
+                raise ValueError(
+                    f"The URL must be from one of the allowed domains: "
+                    f"{allowed_media_domains}. Input URL domain: "
+                    f"{url_spec.hostname}"
+                )
+            # Use the normalized URL to prevent parsing discrepancies
+            # between urllib3 and aiohttp (e.g. backslash-@ attacks).
+            url = url_spec.url
+
         async with (
             aiohttp.ClientSession() as session,
-            session.get(url) as resp,
+            session.get(
+                url,
+                allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
+            ) as resp,
         ):
             if resp.status != 200:
                 raise Exception(
@@ -513,19 +544,13 @@ async def run_request(
     except Exception as e:
         response = create_error_response(e)
 
-    if isinstance(
-        response,
-        (
-            ChatCompletionResponse,
-            EmbeddingResponse,
-            ScoreResponse,
-            RerankResponse,
-            TranscriptionResponse,
-            TranscriptionResponseVerbose,
-            TranslationResponse,
-            TranslationResponseVerbose,
-        ),
-    ):
+    if isinstance(response, JSONResponse):
+        with contextlib.suppress(pydantic.ValidationError):
+            response = TypeAdapter(AllResponse | ErrorResponse).validate_python(
+                json.loads(response.body)
+            )
+
+    if isinstance(response, AllResponse):
         batch_output = BatchRequestOutput(
             id=f"vllm-{random_uuid()}",
             custom_id=request.custom_id,
@@ -593,7 +618,10 @@ def handle_endpoint_request(
     return run_request(handler_fn, request, tracker)
 
 
-def make_transcription_wrapper(is_translation: bool) -> WrapperFn:
+def make_transcription_wrapper(
+    is_translation: bool,
+    allowed_media_domains: list[str] | None = None,
+) -> WrapperFn:
     """
     Factory function to create a wrapper for transcription/translation handlers.
     The wrapper converts BatchTranscriptionRequest or BatchTranslationRequest
@@ -602,6 +630,8 @@ def make_transcription_wrapper(is_translation: bool) -> WrapperFn:
     Args:
         is_translation: If True, process as translation; otherwise process
             as transcription
+        allowed_media_domains: If set, only URLs from these domains are
+            permitted for HTTP/HTTPS fetches.
 
     Returns:
         A function that takes a handler and returns a wrapped handler
@@ -619,7 +649,10 @@ async def transcription_wrapper(
         ):
             try:
                 # Download data from URL
-                audio_data = await download_bytes_from_url(batch_request_body.file_url)
+                audio_data = await download_bytes_from_url(
+                    batch_request_body.file_url,
+                    allowed_media_domains=allowed_media_domains,
+                )
 
                 # Create a mock file from the downloaded audio data
                 mock_file = UploadFile(
@@ -691,6 +724,8 @@ async def build_endpoint_registry(
     serving_embedding = getattr(state, "serving_embedding", None)
     serving_scores = getattr(state, "serving_scores", None)
 
+    allowed_media_domains = getattr(args, "allowed_media_domains", None)
+
     # Registry of endpoint configurations
     endpoint_registry: dict[str, dict[str, Any]] = {
         "completions": {
@@ -712,14 +747,14 @@ async def build_endpoint_registry(
         "score": {
             "url_matcher": lambda url: url.endswith("/score"),
             "handler_getter": lambda: (
-                serving_scores.create_score if serving_scores is not None else None
+                serving_scores if serving_scores is not None else None
             ),
             "wrapper_fn": None,
         },
         "rerank": {
             "url_matcher": lambda url: url.endswith("/rerank"),
             "handler_getter": lambda: (
-                serving_scores.do_rerank if serving_scores is not None else None
+                serving_scores if serving_scores is not None else None
             ),
             "wrapper_fn": None,
         },
@@ -730,7 +765,10 @@ async def build_endpoint_registry(
                 if openai_serving_transcription is not None
                 else None
             ),
-            "wrapper_fn": make_transcription_wrapper(is_translation=False),
+            "wrapper_fn": make_transcription_wrapper(
+                is_translation=False,
+                allowed_media_domains=allowed_media_domains,
+            ),
         },
         "translations": {
             "url_matcher": lambda url: url == "/v1/audio/translations",
@@ -739,7 +777,10 @@ async def build_endpoint_registry(
                 if openai_serving_translation is not None
                 else None
             ),
-            "wrapper_fn": make_transcription_wrapper(is_translation=True),
+            "wrapper_fn": make_transcription_wrapper(
+                is_translation=True,
+                allowed_media_domains=allowed_media_domains,
+            ),
         },
     }
 
diff --git a/vllm/entrypoints/openai/server_utils.py b/vllm/entrypoints/openai/server_utils.py
index 02b8c3352621..269c33549e84 100644
--- a/vllm/entrypoints/openai/server_utils.py
+++ b/vllm/entrypoints/openai/server_utils.py
@@ -15,7 +15,7 @@
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse
 from starlette.concurrency import iterate_in_threadpool
-from starlette.datastructures import URL, Headers, MutableHeaders
+from starlette.datastructures import Headers, MutableHeaders
 from starlette.types import ASGIApp, Message, Receive, Scope, Send
 
 from vllm import envs
@@ -35,6 +35,9 @@
 logger = init_logger("vllm.entrypoints.openai.server_utils")
 
 
+GUARDED_PREFIX = ("/v1", "/v2", "/inference")
+
+
 class AuthenticationMiddleware:
     """
     Pure ASGI middleware that authenticates each request by checking
@@ -44,7 +47,7 @@ class AuthenticationMiddleware:
     -----
     There are two cases in which authentication is skipped:
         1. The HTTP method is OPTIONS.
-        2. The request path doesn't start with /v1 (e.g. /health).
+        2. The request path doesn't start with GUARDED_PREFIX (e.g. /health).
     """
 
     def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
@@ -69,15 +72,18 @@ def verify_token(self, headers: Headers) -> bool:
         return token_match
 
     def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awaitable[None]:
-        if scope["type"] not in ("http", "websocket") or scope["method"] == "OPTIONS":
+        if (
+            scope["type"] not in ("http", "websocket")
+            or scope.get("method") == "OPTIONS"
+        ):
             # scope["type"] can be "lifespan" or "startup" for example,
             # in which case we don't need to do anything
             return self.app(scope, receive, send)
         root_path = scope.get("root_path", "")
-        url_path = URL(scope=scope).path.removeprefix(root_path)
+        url_path = scope["path"].removeprefix(root_path)
         headers = Headers(scope=scope)
         # Type narrow to satisfy mypy.
-        if url_path.startswith("/v1") and not self.verify_token(headers):
+        if url_path.startswith(GUARDED_PREFIX) and not self.verify_token(headers):
             response = JSONResponse(content={"error": "Unauthorized"}, status_code=401)
             return response(scope, receive, send)
         return self.app(scope, receive, send)
diff --git a/vllm/entrypoints/openai/speech_to_text/api_router.py b/vllm/entrypoints/openai/speech_to_text/api_router.py
deleted file mode 100644
index b940a97e4dff..000000000000
--- a/vllm/entrypoints/openai/speech_to_text/api_router.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-
-from http import HTTPStatus
-from typing import TYPE_CHECKING, Annotated
-
-from fastapi import APIRouter, FastAPI, Form, Request
-from fastapi.responses import JSONResponse, StreamingResponse
-
-from vllm.entrypoints.openai.engine.protocol import ErrorResponse
-from vllm.entrypoints.openai.speech_to_text.protocol import (
-    TranscriptionRequest,
-    TranscriptionResponseVariant,
-    TranslationRequest,
-    TranslationResponseVariant,
-)
-from vllm.entrypoints.openai.speech_to_text.serving import (
-    OpenAIServingTranscription,
-    OpenAIServingTranslation,
-)
-from vllm.entrypoints.utils import (
-    load_aware_call,
-    with_cancellation,
-)
-from vllm.logger import init_logger
-
-if TYPE_CHECKING:
-    from argparse import Namespace
-
-    from starlette.datastructures import State
-
-    from vllm.engine.protocol import EngineClient
-    from vllm.entrypoints.logger import RequestLogger
-    from vllm.tasks import SupportedTask
-else:
-    RequestLogger = object
-
-logger = init_logger(__name__)
-
-router = APIRouter()
-
-
-def transcription(request: Request) -> OpenAIServingTranscription:
-    return request.app.state.openai_serving_transcription
-
-
-def translation(request: Request) -> OpenAIServingTranslation:
-    return request.app.state.openai_serving_translation
-
-
-@router.post(
-    "/v1/audio/transcriptions",
-    responses={
-        HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
-        HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
-        HTTPStatus.UNPROCESSABLE_ENTITY.value: {"model": ErrorResponse},
-        HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
-    },
-)
-@with_cancellation
-@load_aware_call
-async def create_transcriptions(
-    raw_request: Request, request: Annotated[TranscriptionRequest, Form()]
-):
-    handler = transcription(raw_request)
-    if handler is None:
-        raise NotImplementedError("The model does not support Transcriptions API")
-
-    audio_data = await request.file.read()
-
-    generator = await handler.create_transcription(audio_data, request, raw_request)
-
-    if isinstance(generator, ErrorResponse):
-        return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
-        )
-
-    elif isinstance(generator, TranscriptionResponseVariant):
-        return JSONResponse(content=generator.model_dump())
-
-    return StreamingResponse(content=generator, media_type="text/event-stream")
-
-
-@router.post(
-    "/v1/audio/translations",
-    responses={
-        HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
-        HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
-        HTTPStatus.UNPROCESSABLE_ENTITY.value: {"model": ErrorResponse},
-        HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
-    },
-)
-@with_cancellation
-@load_aware_call
-async def create_translations(
-    request: Annotated[TranslationRequest, Form()], raw_request: Request
-):
-    handler = translation(raw_request)
-    if handler is None:
-        raise NotImplementedError("The model does not support Translations API")
-
-    audio_data = await request.file.read()
-
-    generator = await handler.create_translation(audio_data, request, raw_request)
-
-    if isinstance(generator, ErrorResponse):
-        return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
-        )
-
-    elif isinstance(generator, TranslationResponseVariant):
-        return JSONResponse(content=generator.model_dump())
-
-    return StreamingResponse(content=generator, media_type="text/event-stream")
-
-
-def attach_router(app: FastAPI):
-    app.include_router(router)
-
-
-def init_transcription_state(
-    engine_client: "EngineClient",
-    state: "State",
-    args: "Namespace",
-    request_logger: RequestLogger | None,
-    supported_tasks: tuple["SupportedTask", ...],
-):
-    state.openai_serving_transcription = (
-        OpenAIServingTranscription(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-            enable_force_include_usage=args.enable_force_include_usage,
-        )
-        if "transcription" in supported_tasks
-        else None
-    )
-    state.openai_serving_translation = (
-        OpenAIServingTranslation(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-            enable_force_include_usage=args.enable_force_include_usage,
-        )
-        if "transcription" in supported_tasks
-        else None
-    )
diff --git a/vllm/entrypoints/pooling/__init__.py b/vllm/entrypoints/pooling/__init__.py
index 6d72bb1a8e2a..e69de29bb2d1 100644
--- a/vllm/entrypoints/pooling/__init__.py
+++ b/vllm/entrypoints/pooling/__init__.py
@@ -1,144 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from typing import TYPE_CHECKING
-
-from fastapi import FastAPI
-
-from vllm.config import ModelConfig
-from vllm.logger import init_logger
-
-if TYPE_CHECKING:
-    from argparse import Namespace
-
-    from starlette.datastructures import State
-
-    from vllm.engine.protocol import EngineClient
-    from vllm.entrypoints.logger import RequestLogger
-    from vllm.tasks import SupportedTask
-else:
-    RequestLogger = object
-    SupportedTask = object
-
-logger = init_logger(__name__)
-
-
-def enable_scoring_api(
-    supported_tasks: tuple["SupportedTask", ...],
-    model_config: ModelConfig | None = None,
-) -> bool:
-    if any(t in supported_tasks for t in ("embed", "token_embed")):
-        return True
-
-    if model_config is not None and "classify" in supported_tasks:
-        num_labels = getattr(model_config.hf_config, "num_labels", 0)
-        if num_labels != 1:
-            logger.debug_once("Score API is only enabled for num_labels == 1.")
-            return False
-        return True
-
-    return False
-
-
-def register_pooling_api_routers(
-    app: FastAPI,
-    supported_tasks: tuple["SupportedTask", ...],
-    model_config: ModelConfig | None = None,
-):
-    if model_config is None:
-        return
-
-    pooling_task = model_config.get_pooling_task(supported_tasks)
-
-    if pooling_task is not None:
-        from vllm.entrypoints.pooling.pooling.api_router import router as pooling_router
-
-        app.include_router(pooling_router)
-
-    if "classify" in supported_tasks:
-        from vllm.entrypoints.pooling.classify.api_router import (
-            router as classify_router,
-        )
-
-        app.include_router(classify_router)
-
-    if "embed" in supported_tasks:
-        from vllm.entrypoints.pooling.embed.api_router import router as embed_router
-
-        app.include_router(embed_router)
-
-    if enable_scoring_api(supported_tasks, model_config):
-        from vllm.entrypoints.pooling.score.api_router import router as score_router
-
-        app.include_router(score_router)
-
-
-def init_pooling_state(
-    engine_client: "EngineClient",
-    state: "State",
-    args: "Namespace",
-    request_logger: RequestLogger | None,
-    supported_tasks: tuple["SupportedTask", ...],
-):
-    from vllm.entrypoints.chat_utils import load_chat_template
-    from vllm.entrypoints.pooling.classify.serving import ServingClassification
-    from vllm.entrypoints.pooling.embed.serving import ServingEmbedding
-    from vllm.entrypoints.pooling.pooling.serving import OpenAIServingPooling
-    from vllm.entrypoints.pooling.score.serving import ServingScores
-    from vllm.tasks import POOLING_TASKS
-
-    model_config = engine_client.model_config
-
-    resolved_chat_template = load_chat_template(args.chat_template)
-
-    state.serving_pooling = (
-        (
-            OpenAIServingPooling(
-                engine_client,
-                state.openai_serving_models,
-                state.openai_serving_render,
-                supported_tasks=supported_tasks,
-                request_logger=request_logger,
-                chat_template=resolved_chat_template,
-                chat_template_content_format=args.chat_template_content_format,
-                trust_request_chat_template=args.trust_request_chat_template,
-            )
-        )
-        if any(t in supported_tasks for t in POOLING_TASKS)
-        else None
-    )
-    state.serving_embedding = (
-        ServingEmbedding(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-            chat_template=resolved_chat_template,
-            chat_template_content_format=args.chat_template_content_format,
-            trust_request_chat_template=args.trust_request_chat_template,
-        )
-        if "embed" in supported_tasks
-        else None
-    )
-    state.serving_classification = (
-        ServingClassification(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-            chat_template=resolved_chat_template,
-            chat_template_content_format=args.chat_template_content_format,
-            trust_request_chat_template=args.trust_request_chat_template,
-        )
-        if "classify" in supported_tasks
-        else None
-    )
-    state.serving_scores = (
-        ServingScores(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-            score_template=resolved_chat_template,
-            log_error_stack=args.log_error_stack,
-        )
-        if enable_scoring_api(supported_tasks, model_config)
-        else None
-    )
diff --git a/vllm/entrypoints/pooling/base/io_processor.py b/vllm/entrypoints/pooling/base/io_processor.py
index 09e22156e6da..fc24bc657800 100644
--- a/vllm/entrypoints/pooling/base/io_processor.py
+++ b/vllm/entrypoints/pooling/base/io_processor.py
@@ -4,8 +4,8 @@
 from collections.abc import Sequence
 from typing import Any, Final
 
-from vllm import PoolingRequestOutput, PromptType
-from vllm.config import ModelConfig
+from vllm import PoolingParams, PoolingRequestOutput, PromptType
+from vllm.config import VllmConfig
 from vllm.entrypoints.chat_utils import (
     ChatCompletionMessageParam,
     ChatTemplateConfig,
@@ -13,28 +13,39 @@
     ConversationMessage,
 )
 from vllm.entrypoints.openai.engine.serving import RendererChatRequest, RendererRequest
-from vllm.entrypoints.pooling.typing import (
-    PoolingChatLikeRequest,
-    PoolingCompletionLikeRequest,
-    PoolingServeContext,
-)
 from vllm.inputs import EngineInput, SingletonPrompt
-from vllm.renderers import BaseRenderer, merge_kwargs
+from vllm.renderers import BaseRenderer, TokenizeParams, merge_kwargs
 from vllm.renderers.inputs.preprocess import parse_model_prompt, prompt_to_seq
 from vllm.tool_parsers import ToolParser
 from vllm.utils.mistral import is_mistral_tokenizer
 
+from ..scoring.typing import ScoringData
+from ..typing import (
+    OfflineInputsContext,
+    OfflineOutputsContext,
+    PoolingChatLikeRequest,
+    PoolingCompletionLikeRequest,
+    PoolingServeContext,
+)
+
 
 class PoolingIOProcessor:
+    """Processor for handling preprocessing & postprocessing ops for pooling requests.
+
+    This class manages both online (serving) and offline (batch) processing of pooling
+    requests, handling chat and completion formats.
+    """
+
     name: str
 
     def __init__(
         self,
-        model_config: ModelConfig,
+        vllm_config: VllmConfig,
         renderer: BaseRenderer,
         chat_template_config: ChatTemplateConfig,
     ):
-        self.model_config = model_config
+        self.vllm_config = vllm_config
+        self.model_config = vllm_config.model_config
         self.renderer = renderer
 
         self.chat_template = chat_template_config.chat_template
@@ -45,16 +56,16 @@ def __init__(
             chat_template_config.trust_request_chat_template
         )
 
-    def create_pooling_params(self, request):
-        return request.to_pooling_params()
-
     #######################################
     # online APIs
 
+    def create_pooling_params(self, request):
+        return request.to_pooling_params()
+
     def pre_process_online(self, ctx: PoolingServeContext):
         request = ctx.request
 
-        if isinstance(ctx.request, PoolingChatLikeRequest):
+        if isinstance(request, PoolingChatLikeRequest):
             self._validate_chat_template(
                 request_chat_template=request.chat_template,
                 chat_template_kwargs=request.chat_template_kwargs,
@@ -68,7 +79,7 @@ def pre_process_online(self, ctx: PoolingServeContext):
                 default_template_kwargs=None,
             )
         elif isinstance(request, PoolingCompletionLikeRequest):
-            engine_inputs = self._preprocess_completion_online(
+            engine_inputs = self._preprocess_cmpl_online(
                 request,
                 prompt_input=request.input,
                 prompt_embeds=None,
@@ -78,52 +89,36 @@ def pre_process_online(self, ctx: PoolingServeContext):
 
         ctx.engine_inputs = engine_inputs
 
-    async def pre_process_online_async(self, ctx: PoolingServeContext):
-        self.pre_process_online(ctx)
-
     def post_process_online(
         self,
         ctx: PoolingServeContext,
     ):
         pass
 
-    async def post_process_online_async(
-        self,
-        ctx: PoolingServeContext,
-    ):
-        self.post_process_online(ctx)
-
     #######################################
     # offline APIs
 
-    def pre_process_offline(
-        self,
-        prompts: PromptType | Sequence[PromptType],
-        tokenization_kwargs: dict[str, Any] | None = None,
-    ) -> Sequence[EngineInput]:
-        return self._preprocess_completion_offline(
-            prompts=prompts, tokenization_kwargs=tokenization_kwargs
+    def pre_process_offline(self, ctx: OfflineInputsContext) -> Sequence[EngineInput]:
+        assert not isinstance(ctx.prompts, ScoringData) and not (
+            isinstance(ctx.prompts, dict) and "data" in ctx.prompts
         )
 
-    async def pre_process_offline_async(self, *args, **kwargs):
-        return self.pre_process_offline(*args, **kwargs)
+        prompts_seq = prompt_to_seq(ctx.prompts)
+        tok_params = self.renderer.default_cmpl_tok_params.with_kwargs(
+            **(ctx.tokenization_kwargs or {})
+        )
+        return self._preprocess_cmpl_offline(prompts=prompts_seq, tok_params=tok_params)
 
     def post_process_offline(
         self,
-        outputs: list[PoolingRequestOutput],
+        ctx: OfflineOutputsContext,
     ) -> list[PoolingRequestOutput]:
-        return outputs
-
-    async def post_process_offline_async(
-        self,
-        outputs: list[PoolingRequestOutput],
-    ) -> list[PoolingRequestOutput]:
-        return self.post_process_offline(outputs)
+        return ctx.outputs
 
     #######################################
     # helpers
 
-    def _preprocess_completion_online(
+    def _preprocess_cmpl_online(
         self,
         request: RendererRequest,
         prompt_input: str | list[str] | list[int] | list[list[int]] | None,
@@ -201,31 +196,24 @@ def _preprocess_chat_online(
 
         return conversation, [engine_input]
 
-    def _preprocess_completion_offline(
+    def _preprocess_cmpl_offline(
         self,
         prompts: PromptType | Sequence[PromptType],
-        tokenization_kwargs: dict[str, Any] | None = None,
+        tok_params: TokenizeParams,
+        prompt_extras: dict[str, Any] | None = None,
     ) -> Sequence[EngineInput]:
-        renderer = self.renderer
-        model_config = self.model_config
-
         prompts = prompt_to_seq(prompts)
-
         parsed_prompts = [
             (
                 prompt
                 if isinstance(prompt, bytes)
-                else parse_model_prompt(model_config, prompt)
+                else parse_model_prompt(self.model_config, prompt)
             )
             for prompt in prompts
         ]
-        tok_params = renderer.default_cmpl_tok_params.with_kwargs(
-            **(tokenization_kwargs or {})
-        )
 
-        return renderer.render_cmpl(
-            parsed_prompts,
-            tok_params,
+        return self.renderer.render_cmpl(
+            parsed_prompts, tok_params, prompt_extras=prompt_extras
         )
 
     def _validate_chat_template(
@@ -247,3 +235,19 @@ def _validate_chat_template(
                 "Refused request with untrusted chat template."
             )
         return None
+
+    def _params_to_seq(
+        self,
+        params: PoolingParams | Sequence[PoolingParams],
+        num_requests: int,
+    ) -> Sequence[PoolingParams]:
+        if isinstance(params, Sequence):
+            if len(params) != num_requests:
+                raise ValueError(
+                    f"The lengths of prompts ({num_requests}) "
+                    f"and params ({len(params)}) must be the same."
+                )
+
+            return params
+
+        return [params] * num_requests
diff --git a/vllm/entrypoints/pooling/base/protocol.py b/vllm/entrypoints/pooling/base/protocol.py
index 2ce89e4bf2fc..9e410a2b540d 100644
--- a/vllm/entrypoints/pooling/base/protocol.py
+++ b/vllm/entrypoints/pooling/base/protocol.py
@@ -6,13 +6,14 @@
 
 from pydantic import Field, model_validator
 
+from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
     ChatCompletionMessageParam,
     ChatTemplateContentFormatOption,
 )
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel
 from vllm.exceptions import VLLMValidationError
-from vllm.renderers import ChatParams, merge_kwargs
+from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
 from vllm.utils import random_uuid
 from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness
 
@@ -68,6 +69,88 @@ class PoolingBasicRequestMixin(OpenAIBaseModel):
     )
     # --8<-- [end:pooling-common-extra-params]
 
+    def _build_pooling_tok_params(
+        self,
+        model_config: ModelConfig,
+        *,
+        add_special_tokens: bool,
+        max_total_tokens: int | None,
+        max_output_tokens: int,
+        max_total_tokens_param: str = "max_model_len",
+        max_output_tokens_param: str | None = None,
+    ) -> TokenizeParams:
+        encoder_config = model_config.encoder_config or {}
+        if max_output_tokens_param is None:
+            return TokenizeParams(
+                max_total_tokens=max_total_tokens,
+                max_output_tokens=max_output_tokens,
+                truncate_prompt_tokens=self.truncate_prompt_tokens,
+                truncation_side=self.truncation_side,
+                do_lower_case=encoder_config.get("do_lower_case", False),
+                add_special_tokens=add_special_tokens,
+                max_total_tokens_param=max_total_tokens_param,
+            )
+
+        return TokenizeParams(
+            max_total_tokens=max_total_tokens,
+            max_output_tokens=max_output_tokens,
+            truncate_prompt_tokens=self.truncate_prompt_tokens,
+            truncation_side=self.truncation_side,
+            do_lower_case=encoder_config.get("do_lower_case", False),
+            add_special_tokens=add_special_tokens,
+            max_total_tokens_param=max_total_tokens_param,
+            max_output_tokens_param=max_output_tokens_param,
+        )
+
+
+class PoolingTokenizeParamsMixin:
+    add_special_tokens: bool
+
+    def _build_pooling_tok_params(
+        self,
+        model_config: ModelConfig,
+        *,
+        add_special_tokens: bool,
+        max_total_tokens: int | None,
+        max_output_tokens: int,
+        max_total_tokens_param: str = "max_model_len",
+        max_output_tokens_param: str | None = None,
+    ) -> TokenizeParams:
+        raise NotImplementedError
+
+
+class FixedMaxLenTokenizeParamsMixin(PoolingTokenizeParamsMixin):
+    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
+        return self._build_pooling_tok_params(
+            model_config,
+            add_special_tokens=self.add_special_tokens,
+            max_total_tokens=model_config.max_model_len,
+            max_output_tokens=0,
+        )
+
+
+class EmbeddingTokenizeParamsMixin(PoolingTokenizeParamsMixin):
+    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
+        default_max_total_tokens = model_config.max_model_len
+        max_total_tokens: int | None = default_max_total_tokens
+        max_output_tokens = 0
+
+        pooler_config = model_config.pooler_config
+        if pooler_config is not None:
+            if pooler_config.enable_chunked_processing:
+                max_total_tokens = None
+            else:
+                max_embed_len = pooler_config.max_embed_len or default_max_total_tokens
+                max_output_tokens = default_max_total_tokens - max_embed_len
+
+        return self._build_pooling_tok_params(
+            model_config,
+            add_special_tokens=self.add_special_tokens,
+            max_total_tokens=max_total_tokens,
+            max_output_tokens=max_output_tokens,
+            max_output_tokens_param="max_model_len - max_embed_len",
+        )
+
 
 class CompletionRequestMixin(OpenAIBaseModel):
     # --8<-- [start:completion-params]
diff --git a/vllm/entrypoints/pooling/base/serving.py b/vllm/entrypoints/pooling/base/serving.py
index 60685e90e03a..4a9ef4a06286 100644
--- a/vllm/entrypoints/pooling/base/serving.py
+++ b/vllm/entrypoints/pooling/base/serving.py
@@ -1,24 +1,24 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator, Mapping
+from concurrent.futures import Executor
 from http import HTTPStatus
 from typing import ClassVar
 
+import torch
 from fastapi import Request
 from fastapi.responses import Response
 from starlette.datastructures import Headers
 
 from vllm import PoolingParams, PoolingRequestOutput, envs
-from vllm.config import ModelConfig
+from vllm.config import VllmConfig
 from vllm.engine.protocol import EngineClient
-from vllm.entrypoints.chat_utils import (
-    ChatTemplateConfig,
-    ChatTemplateContentFormatOption,
-)
+from vllm.entrypoints.chat_utils import ChatTemplateConfig
 from vllm.entrypoints.logger import RequestLogger
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.pooling.typing import AnyPoolingRequest, PoolingServeContext
 from vllm.exceptions import VLLMNotFoundError
 from vllm.inputs import EngineInput
 from vllm.lora.request import LoRARequest
@@ -30,12 +30,13 @@
     log_tracing_disabled_warning,
 )
 from vllm.utils import random_uuid
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.async_utils import make_async, merge_async_iterators
 
+from ..typing import AnyPoolingRequest, PoolingServeContext
 from .io_processor import PoolingIOProcessor
 
 
-class PoolingServing:
+class PoolingServingBase(ABC):
     request_id_prefix: ClassVar[str]
 
     def __init__(
@@ -44,63 +45,81 @@ def __init__(
         models: OpenAIServingModels,
         *,
         request_logger: RequestLogger | None,
-        chat_template: str | None = None,
-        chat_template_content_format: ChatTemplateContentFormatOption = "auto",
-        trust_request_chat_template: bool = False,
+        chat_template_config: ChatTemplateConfig,
         return_tokens_as_token_ids: bool = False,
         log_error_stack: bool = False,
     ):
-        super().__init__()
         self.engine_client = engine_client
         self.models = models
         self.model_config = models.model_config
+        self.renderer = models.renderer
+        self.vllm_config = engine_client.vllm_config
         self.max_model_len = self.model_config.max_model_len
         self.request_logger = request_logger
         self.return_tokens_as_token_ids = return_tokens_as_token_ids
         self.log_error_stack = log_error_stack
-        self.chat_template_config = ChatTemplateConfig(
-            chat_template=chat_template,
-            chat_template_content_format=chat_template_content_format,
-            trust_request_chat_template=trust_request_chat_template,
+        self.chat_template_config = chat_template_config
+
+        # Shared thread pool executor for preprocessing and postprocessing.
+        self._executor: Executor = models.renderer._executor
+        self._preprocessing_async = make_async(
+            self._preprocessing, executor=self._executor
         )
-        self.io_processor = self.init_io_processor(
-            model_config=models.model_config,
-            renderer=models.renderer,
-            chat_template_config=self.chat_template_config,
+        self._postprocessing_async = make_async(
+            self._postprocessing, executor=self._executor
         )
 
-    def init_io_processor(
+    async def __call__(
         self,
-        model_config: ModelConfig,
-        renderer: BaseRenderer,
-        chat_template_config: ChatTemplateConfig,
-    ) -> PoolingIOProcessor:
+        request: AnyPoolingRequest,
+        raw_request: Request | None = None,
+    ) -> Response:
+        io_processor = self.get_io_processor(request)
+        ctx = await self._init_ctx(io_processor, request, raw_request)
+        await self._preprocessing_async(io_processor, ctx)
+        await self._prepare_generators(ctx)
+        await self._collect_batch(ctx)
+        return await self._postprocessing_async(io_processor, ctx)
+
+    @abstractmethod
+    def get_io_processor(self, request: AnyPoolingRequest) -> PoolingIOProcessor:
         raise NotImplementedError
 
-    async def __call__(
+    @torch.inference_mode()
+    def _preprocessing(
+        self, io_processor: PoolingIOProcessor, ctx: PoolingServeContext
+    ):
+        return io_processor.pre_process_online(ctx)
+
+    @torch.inference_mode()
+    def _postprocessing(
+        self, io_processor: PoolingIOProcessor, ctx: PoolingServeContext
+    ):
+        io_processor.post_process_online(ctx)
+        return self._build_response(ctx)
+
+    async def _init_ctx(
         self,
+        io_processor: PoolingIOProcessor,
         request: AnyPoolingRequest,
         raw_request: Request | None = None,
-    ) -> Response:
+    ):
         model_name = self.models.model_name()
         request_id = f"{self.request_id_prefix}-{self._base_request_id(raw_request)}"
-
         await self._check_model(request)
 
+        pooling_params = io_processor.create_pooling_params(request)
         ctx = PoolingServeContext(
             request=request,
             raw_request=raw_request,
             model_name=model_name,
+            pooling_params=pooling_params,
             request_id=request_id,
         )
 
         self._validate_request(ctx)
         self._maybe_get_adapters(ctx)
-        await self.io_processor.pre_process_online_async(ctx)
-        await self._prepare_generators(ctx)
-        await self._collect_batch(ctx)
-        await self.io_processor.post_process_online_async(ctx)
-        return await self._build_response(ctx)
+        return ctx
 
     async def _prepare_generators(
         self,
@@ -117,8 +136,14 @@ async def _prepare_generators(
             else await self._get_trace_headers(ctx.raw_request.headers)
         )
 
-        pooling_params = self.io_processor.create_pooling_params(ctx.request)
-        pooling_params.verify(self.model_config)
+        assert ctx.pooling_params is not None
+        pooling_params = ctx.pooling_params
+
+        if isinstance(pooling_params, list):
+            for params in pooling_params:
+                params.verify(self.model_config)
+        else:
+            pooling_params.verify(self.model_config)
 
         for i, engine_input in enumerate(ctx.engine_inputs):
             prompt_request_id = (
@@ -127,16 +152,22 @@ async def _prepare_generators(
                 else ctx.prompt_request_ids[i]
             )
 
+            params = (
+                pooling_params[i]
+                if isinstance(pooling_params, list)
+                else pooling_params
+            )
+
             self._log_inputs(
                 prompt_request_id,
                 engine_input,
-                params=pooling_params,
+                params=params,
                 lora_request=ctx.lora_request,
             )
 
             generator = self.engine_client.encode(
                 engine_input,
-                pooling_params,
+                params,
                 prompt_request_id,
                 lora_request=ctx.lora_request,
                 trace_headers=trace_headers,
@@ -169,7 +200,8 @@ async def _collect_batch(
 
         ctx.final_res_batch = [res for res in final_res_batch if res is not None]
 
-    async def _build_response(
+    @abstractmethod
+    def _build_response(
         self,
         ctx: PoolingServeContext,
     ) -> Response:
@@ -226,6 +258,7 @@ def _validate_request(self, ctx: PoolingServeContext) -> None:
                 "greater than max_model_len."
                 " Please request a smaller truncation size."
             )
+
         return None
 
     async def _get_trace_headers(
@@ -334,3 +367,26 @@ def _log_inputs(
             params=params,
             lora_request=lora_request,
         )
+
+
+class PoolingServing(PoolingServingBase, ABC):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.io_processor = self.init_io_processor(
+            vllm_config=self.vllm_config,
+            renderer=self.renderer,
+            chat_template_config=self.chat_template_config,
+        )
+
+    @abstractmethod
+    def init_io_processor(
+        self,
+        vllm_config: VllmConfig,
+        renderer: BaseRenderer,
+        chat_template_config: ChatTemplateConfig,
+    ) -> PoolingIOProcessor:
+        raise NotImplementedError
+
+    def get_io_processor(self, request: AnyPoolingRequest) -> PoolingIOProcessor:
+        return self.io_processor
diff --git a/vllm/entrypoints/pooling/classify/api_router.py b/vllm/entrypoints/pooling/classify/api_router.py
index f254a6c2b399..2d27628bc692 100644
--- a/vllm/entrypoints/pooling/classify/api_router.py
+++ b/vllm/entrypoints/pooling/classify/api_router.py
@@ -5,13 +5,14 @@
 from fastapi.responses import Response
 
 from vllm.entrypoints.openai.utils import validate_json_request
-from vllm.entrypoints.pooling.classify.protocol import ClassificationRequest
-from vllm.entrypoints.pooling.classify.serving import ServingClassification
 from vllm.entrypoints.utils import (
     load_aware_call,
     with_cancellation,
 )
 
+from .protocol import ClassificationRequest
+from .serving import ServingClassification
+
 router = APIRouter()
 
 
diff --git a/vllm/entrypoints/pooling/classify/io_processor.py b/vllm/entrypoints/pooling/classify/io_processor.py
index ee73207dff5f..1b2cd0ffa9db 100644
--- a/vllm/entrypoints/pooling/classify/io_processor.py
+++ b/vllm/entrypoints/pooling/classify/io_processor.py
@@ -1,8 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from vllm.entrypoints.pooling.base.io_processor import PoolingIOProcessor
+from ..base.io_processor import PoolingIOProcessor
 
 
 class ClassifyIOProcessor(PoolingIOProcessor):
-    name = "classification"
+    name = "classify"
+
+
+class TokenClassifyIOProcessor(PoolingIOProcessor):
+    name = "token_classify"
diff --git a/vllm/entrypoints/pooling/classify/protocol.py b/vllm/entrypoints/pooling/classify/protocol.py
index fe8c898e0945..39cacdd835e2 100644
--- a/vllm/entrypoints/pooling/classify/protocol.py
+++ b/vllm/entrypoints/pooling/classify/protocol.py
@@ -7,37 +7,27 @@
 from pydantic import Field
 
 from vllm import PoolingParams
-from vllm.config import ModelConfig
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
-from vllm.entrypoints.pooling.base.protocol import (
+from vllm.logger import init_logger
+from vllm.utils import random_uuid
+
+from ..base.protocol import (
     ChatRequestMixin,
     ClassifyRequestMixin,
     CompletionRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
     PoolingBasicRequestMixin,
 )
-from vllm.logger import init_logger
-from vllm.renderers import TokenizeParams
-from vllm.utils import random_uuid
 
 logger = init_logger(__name__)
 
 
 class ClassificationCompletionRequest(
-    PoolingBasicRequestMixin, CompletionRequestMixin, ClassifyRequestMixin
+    PoolingBasicRequestMixin,
+    CompletionRequestMixin,
+    ClassifyRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
 ):
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task="classify",
@@ -46,21 +36,11 @@ def to_pooling_params(self):
 
 
 class ClassificationChatRequest(
-    PoolingBasicRequestMixin, ChatRequestMixin, ClassifyRequestMixin
+    PoolingBasicRequestMixin,
+    ChatRequestMixin,
+    ClassifyRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
 ):
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task="classify",
diff --git a/vllm/entrypoints/pooling/classify/serving.py b/vllm/entrypoints/pooling/classify/serving.py
index 24d4f9aacffc..232483ca3aec 100644
--- a/vllm/entrypoints/pooling/classify/serving.py
+++ b/vllm/entrypoints/pooling/classify/serving.py
@@ -6,15 +6,12 @@
 import numpy as np
 from fastapi.responses import JSONResponse
 
-from vllm.config import ModelConfig
-from vllm.entrypoints.chat_utils import ChatTemplateConfig
 from vllm.entrypoints.openai.engine.protocol import UsageInfo
-from vllm.entrypoints.pooling.base.serving import PoolingServing
-from vllm.entrypoints.pooling.typing import PoolingServeContext
 from vllm.logger import init_logger
 from vllm.outputs import ClassificationOutput
-from vllm.renderers import BaseRenderer
 
+from ..base.serving import PoolingServing
+from ..typing import PoolingServeContext
 from .io_processor import ClassifyIOProcessor
 from .protocol import (
     ClassificationData,
@@ -31,19 +28,10 @@
 class ServingClassification(PoolingServing):
     request_id_prefix = "classify"
 
-    def init_io_processor(
-        self,
-        model_config: ModelConfig,
-        renderer: BaseRenderer,
-        chat_template_config: ChatTemplateConfig,
-    ) -> ClassifyIOProcessor:
-        return ClassifyIOProcessor(
-            model_config=model_config,
-            renderer=renderer,
-            chat_template_config=chat_template_config,
-        )
+    def init_io_processor(self, *args, **kwargs) -> ClassifyIOProcessor:
+        return ClassifyIOProcessor(*args, **kwargs)
 
-    async def _build_response(
+    def _build_response(
         self,
         ctx: ClassificationServeContext,
     ) -> JSONResponse:
diff --git a/vllm/entrypoints/pooling/embed/api_router.py b/vllm/entrypoints/pooling/embed/api_router.py
index 390efc6a13ab..4eb86e4e2d29 100644
--- a/vllm/entrypoints/pooling/embed/api_router.py
+++ b/vllm/entrypoints/pooling/embed/api_router.py
@@ -7,13 +7,11 @@
 
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.utils import validate_json_request
-from vllm.entrypoints.pooling.embed.protocol import (
-    CohereEmbedRequest,
-    EmbeddingRequest,
-)
-from vllm.entrypoints.pooling.embed.serving import ServingEmbedding
 from vllm.entrypoints.utils import load_aware_call, with_cancellation
 
+from .protocol import CohereEmbedRequest, EmbeddingRequest
+from .serving import ServingEmbedding
+
 router = APIRouter()
 
 
diff --git a/vllm/entrypoints/pooling/embed/io_processor.py b/vllm/entrypoints/pooling/embed/io_processor.py
index 614f8e0d9d02..8c28f9f3d4e7 100644
--- a/vllm/entrypoints/pooling/embed/io_processor.py
+++ b/vllm/entrypoints/pooling/embed/io_processor.py
@@ -16,15 +16,6 @@
     ChatCompletionMessageParam,
     CustomChatCompletionMessageParam,
 )
-from vllm.entrypoints.pooling.base.io_processor import PoolingIOProcessor
-from vllm.entrypoints.pooling.embed.protocol import (
-    CohereEmbedContent,
-    CohereEmbedInput,
-    CohereEmbedRequest,
-    EmbeddingChatRequest,
-    EmbeddingCompletionRequest,
-)
-from vllm.entrypoints.pooling.typing import PoolingServeContext
 from vllm.inputs import EngineInput, tokens_input
 from vllm.logger import init_logger
 from vllm.outputs import PoolingOutput, PoolingRequestOutput
@@ -33,11 +24,27 @@
 from vllm.utils.collection_utils import chunk_list
 from vllm.utils.mistral import is_mistral_tokenizer
 
+from ..base.io_processor import PoolingIOProcessor
+from ..scoring.io_processor import JinaRankingIOProcessorMixin
+from ..typing import (
+    OfflineInputsContext,
+    PoolingChatLikeRequest,
+    PoolingCompletionLikeRequest,
+    PoolingServeContext,
+)
+from .protocol import (
+    CohereEmbedContent,
+    CohereEmbedInput,
+    CohereEmbedRequest,
+    EmbeddingChatRequest,
+    EmbeddingCompletionRequest,
+)
+
 logger = init_logger(__name__)
 
 
 class EmbedIOProcessor(PoolingIOProcessor):
-    name = "embedding"
+    name = "embed"
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -88,7 +95,7 @@ def _pre_process_chunked(self, ctx: PoolingServeContext) -> None:
         if ctx.engine_inputs is None:
             raise ValueError("Engine prompts not available")
 
-        ctx.intermediates = ctx.engine_inputs
+        ctx.original_engine_inputs = ctx.engine_inputs
         request_id = ctx.request_id
         max_model_len = self.model_config.max_model_len
         chunked_engine_inputs: list[EngineInput] = []
@@ -183,10 +190,10 @@ def _post_process_chunked(self, ctx: PoolingServeContext) -> None:
                 aggregator["total_weight"] += weight
                 aggregator["chunk_count"] += 1
 
-        if ctx.intermediates is None:
-            raise ValueError("Original prompts inputs not available")
+        if ctx.original_engine_inputs is None:
+            raise ValueError("Original engine inputs not available")
 
-        original_engine_inputs = cast(list[EngineInput], ctx.intermediates)
+        original_engine_inputs = ctx.original_engine_inputs
         num_prompts = len(original_engine_inputs)
 
         # Finalize aggregated results
@@ -464,7 +471,7 @@ def _preprocess_cohere_text_completion(
             truncate_prompt_tokens=truncate_prompt_tokens,
             truncation_side=truncation_side,
         )
-        return self._preprocess_completion_online(
+        return self._preprocess_cmpl_online(
             proxy, prompt_input=proxy.input, prompt_embeds=None
         )
 
@@ -549,3 +556,52 @@ def _enforce_cohere_max_tokens(self, ctx: PoolingServeContext) -> None:
             request = ctx.request
             if request.truncate == "NONE" and request.max_tokens is not None:
                 self._check_cohere_max_tokens(ctx.final_res_batch, request.max_tokens)
+
+
+class TokenEmbedIOProcessor(PoolingIOProcessor):
+    name = "token_embed"
+
+
+class JinaRankingTokenEmbedIOProcessor(
+    TokenEmbedIOProcessor, JinaRankingIOProcessorMixin
+):
+    def pre_process_online(self, ctx: PoolingServeContext):
+        request = ctx.request
+        if isinstance(request, PoolingCompletionLikeRequest):
+            prompts = request.input
+            if not isinstance(prompts, Sequence) or len(prompts) < 2:
+                raise ValueError("The JinaForRanking model requires at least 2 inputs.")
+
+            text_prompts = self.ensure_str(prompts)
+
+            # The JinaForRanking model concatenates docs first, then query.
+            # Let's stay consistent with this novel design.
+            prompt_input = self.format_docs_prompts_func(
+                query=text_prompts[-1], docs=text_prompts[:-1]
+            )
+
+            engine_inputs = self._preprocess_cmpl_online(
+                request,
+                prompt_input=prompt_input,
+                prompt_embeds=None,
+            )
+        elif isinstance(request, PoolingChatLikeRequest):
+            raise ValueError("The JinaForRanking does not support chat Request.")
+        else:
+            raise ValueError(f"Invalid {self.name} request type")
+
+        ctx.engine_inputs = engine_inputs
+
+    def pre_process_offline(self, ctx: OfflineInputsContext) -> Sequence[EngineInput]:
+        if not isinstance(ctx.prompts, Sequence) or len(ctx.prompts) < 2:
+            raise ValueError("The JinaForRanking model requires at least 2 inputs.")
+
+        text_prompts = self.ensure_str(ctx.prompts)
+
+        # The JinaForRanking model concatenates docs first, then query.
+        # Let's stay consistent with this novel design.
+        ctx.prompts = self.format_docs_prompts_func(
+            query=text_prompts[-1], docs=text_prompts[:-1]
+        )
+
+        return super().pre_process_offline(ctx)
diff --git a/vllm/entrypoints/pooling/embed/protocol.py b/vllm/entrypoints/pooling/embed/protocol.py
index 9b39b41df286..d886e3199f7c 100644
--- a/vllm/entrypoints/pooling/embed/protocol.py
+++ b/vllm/entrypoints/pooling/embed/protocol.py
@@ -16,61 +16,24 @@
 from pydantic import BaseModel, Field
 
 from vllm import PoolingParams
-from vllm.config import ModelConfig
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
-from vllm.entrypoints.pooling.base.protocol import (
+from vllm.utils import random_uuid
+
+from ..base.protocol import (
     ChatRequestMixin,
     CompletionRequestMixin,
+    EmbeddingTokenizeParamsMixin,
     EmbedRequestMixin,
     PoolingBasicRequestMixin,
 )
-from vllm.renderers import TokenizeParams
-from vllm.utils import random_uuid
-
-# ---------------------------------------------------------------------------
-# OpenAI /v1/embeddings — request models
-# ---------------------------------------------------------------------------
-
-
-def _get_max_total_output_tokens(
-    model_config: ModelConfig,
-) -> tuple[int | None, int]:
-    max_total_tokens = model_config.max_model_len
-    pooler_config = model_config.pooler_config
-
-    if pooler_config is None:
-        return max_total_tokens, 0
-
-    if pooler_config.enable_chunked_processing:
-        return None, 0
-
-    max_embed_len = pooler_config.max_embed_len or max_total_tokens
-    max_output_tokens = max_total_tokens - max_embed_len
-    return max_total_tokens, max_output_tokens
 
 
 class EmbeddingCompletionRequest(
-    PoolingBasicRequestMixin, CompletionRequestMixin, EmbedRequestMixin
+    PoolingBasicRequestMixin,
+    CompletionRequestMixin,
+    EmbedRequestMixin,
+    EmbeddingTokenizeParamsMixin,
 ):
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        (
-            max_total_tokens,
-            max_output_tokens,
-        ) = _get_max_total_output_tokens(model_config)
-
-        return TokenizeParams(
-            max_total_tokens=max_total_tokens,
-            max_output_tokens=max_output_tokens,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-            max_output_tokens_param="max_model_len - max_embed_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task="embed",
@@ -80,27 +43,11 @@ def to_pooling_params(self):
 
 
 class EmbeddingChatRequest(
-    PoolingBasicRequestMixin, ChatRequestMixin, EmbedRequestMixin
+    PoolingBasicRequestMixin,
+    ChatRequestMixin,
+    EmbedRequestMixin,
+    EmbeddingTokenizeParamsMixin,
 ):
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        (
-            max_total_tokens,
-            max_output_tokens,
-        ) = _get_max_total_output_tokens(model_config)
-
-        return TokenizeParams(
-            max_total_tokens=max_total_tokens,
-            max_output_tokens=max_output_tokens,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-            max_output_tokens_param="max_model_len - max_embed_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task="embed",
diff --git a/vllm/entrypoints/pooling/embed/serving.py b/vllm/entrypoints/pooling/embed/serving.py
index f0c331645910..5d9616f00c0f 100644
--- a/vllm/entrypoints/pooling/embed/serving.py
+++ b/vllm/entrypoints/pooling/embed/serving.py
@@ -1,44 +1,40 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import json
-from collections.abc import Callable
-from functools import partial
-from typing import Literal, TypeAlias, cast
+from typing import TypeAlias, cast
 
 from fastapi.responses import JSONResponse, Response, StreamingResponse
 from typing_extensions import assert_never
 
-from vllm.config import ModelConfig
-from vllm.entrypoints.chat_utils import ChatTemplateConfig
-from vllm.entrypoints.openai.engine.protocol import UsageInfo
-from vllm.entrypoints.pooling.base.serving import PoolingServing
-from vllm.entrypoints.pooling.embed.io_processor import EmbedIOProcessor
-from vllm.entrypoints.pooling.embed.protocol import (
+from vllm.logger import init_logger
+from vllm.outputs import PoolingRequestOutput
+from vllm.utils.serial_utils import EmbedDType, Endianness
+
+from ..base.serving import PoolingServing
+from ..typing import PoolingServeContext
+from ..utils import (
+    BytesEncodingFormat,
+    JsonEncodingFormat,
+    build_pooling_bytes_streaming_response,
+    encode_pooling_output_float,
+    encode_pooling_output_float_or_ndarray,
+    get_json_response_cls,
+    get_pooling_output_encoder,
+    get_pooling_usage,
+)
+from .io_processor import EmbedIOProcessor
+from .protocol import (
     CohereBilledUnits,
     CohereEmbedRequest,
     CohereEmbedResponse,
     CohereMeta,
-    EmbeddingBytesResponse,
     EmbeddingRequest,
     EmbeddingResponse,
     EmbeddingResponseData,
     build_typed_embeddings,
 )
-from vllm.entrypoints.pooling.typing import PoolingServeContext
-from vllm.entrypoints.pooling.utils import (
-    encode_pooling_bytes,
-    encode_pooling_output_base64,
-    encode_pooling_output_float,
-    get_json_response_cls,
-)
-from vllm.logger import init_logger
-from vllm.outputs import PoolingRequestOutput
-from vllm.renderers import BaseRenderer
-from vllm.utils.serial_utils import EmbedDType, Endianness
 
 logger = init_logger(__name__)
 
-JSONResponseCLS = get_json_response_cls()
 
 EmbeddingServeContext: TypeAlias = PoolingServeContext[EmbeddingRequest]
 
@@ -49,27 +45,23 @@ class ServingEmbedding(PoolingServing):
     request_id_prefix = "embd"
     io_processor: EmbedIOProcessor
 
-    def init_io_processor(
-        self,
-        model_config: ModelConfig,
-        renderer: BaseRenderer,
-        chat_template_config: ChatTemplateConfig,
-    ) -> EmbedIOProcessor:
-        return EmbedIOProcessor(
-            model_config=model_config,
-            renderer=renderer,
-            chat_template_config=chat_template_config,
-        )
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
-    async def _build_response(
+        self.json_response_cls = get_json_response_cls()
+
+    def init_io_processor(self, *args, **kwargs) -> EmbedIOProcessor:
+        return EmbedIOProcessor(*args, **kwargs)
+
+    def _build_response(
         self,
         ctx: PoolingServeContext,
     ) -> Response:
         if isinstance(ctx.request, CohereEmbedRequest):
             return self._build_cohere_response_from_ctx(ctx)
-        return await self._build_openai_response(ctx)
+        return self._build_openai_response(ctx)
 
-    async def _build_openai_response(
+    def _build_openai_response(
         self,
         ctx: EmbeddingServeContext,
     ) -> JSONResponse | StreamingResponse:
@@ -107,49 +99,61 @@ def _openai_json_response(
         request_id: str,
         created_time: int,
         model_name: str,
-        encoding_format: Literal["float", "base64"],
+        encoding_format: JsonEncodingFormat,
         embed_dtype: EmbedDType,
         endianness: Endianness,
     ) -> JSONResponse:
-        encode_fn = cast(
-            Callable[[PoolingRequestOutput], list[float] | str],
-            (
-                encode_pooling_output_float
-                if encoding_format == "float"
-                else partial(
-                    encode_pooling_output_base64,
-                    embed_dtype=embed_dtype,
-                    endianness=endianness,
+        use_ndarray_response = (
+            encoding_format == "float"
+            and self.json_response_cls.__name__ == "ORJSONResponse"
+        )
+        if use_ndarray_response:
+            ndarray_items: list[dict[str, object]] = []
+
+            for idx, final_res in enumerate(final_res_batch):
+                item_dict = EmbeddingResponseData(
+                    index=idx,
+                    embedding=[],
+                ).model_dump()
+                item_dict["embedding"] = encode_pooling_output_float_or_ndarray(
+                    final_res
                 )
-            ),
+                ndarray_items.append(item_dict)
+            ndarray_response = EmbeddingResponse(
+                id=request_id,
+                created=created_time,
+                model=model_name,
+                data=[],  # type: ignore[arg-type]
+                usage=get_pooling_usage(final_res_batch),
+            ).model_dump()
+            ndarray_response["data"] = ndarray_items
+
+            return self.json_response_cls(content=ndarray_response)
+
+        encode_fn = get_pooling_output_encoder(
+            encoding_format=encoding_format,
+            embed_dtype=embed_dtype,
+            endianness=endianness,
         )
 
         items: list[EmbeddingResponseData] = []
-        num_prompt_tokens = 0
 
         for idx, final_res in enumerate(final_res_batch):
             item = EmbeddingResponseData(
                 index=idx,
-                embedding=encode_fn(final_res),
+                embedding=cast(list[float] | str, encode_fn(final_res)),
             )
-            prompt_token_ids = final_res.prompt_token_ids
 
             items.append(item)
-            num_prompt_tokens += len(prompt_token_ids)
-
-        usage = UsageInfo(
-            prompt_tokens=num_prompt_tokens,
-            total_tokens=num_prompt_tokens,
-        )
 
         response = EmbeddingResponse(
             id=request_id,
             created=created_time,
             model=model_name,
             data=items,
-            usage=usage,
+            usage=get_pooling_usage(final_res_batch),
         )
-        return JSONResponseCLS(content=response.model_dump())
+        return self.json_response_cls(content=response.model_dump())
 
     def _openai_bytes_response(
         self,
@@ -157,48 +161,32 @@ def _openai_bytes_response(
         request_id: str,
         created_time: int,
         model_name: str,
-        encoding_format: Literal["bytes", "bytes_only"],
+        encoding_format: BytesEncodingFormat,
         embed_dtype: EmbedDType,
         endianness: Endianness,
     ) -> StreamingResponse:
-        content, items, usage = encode_pooling_bytes(
+        return build_pooling_bytes_streaming_response(
             pooling_outputs=final_res_batch,
+            request_id=request_id,
+            created_time=created_time,
+            model_name=model_name,
+            encoding_format=encoding_format,
             embed_dtype=embed_dtype,
             endianness=endianness,
         )
 
-        headers = (
-            None
-            if encoding_format == "bytes_only"
-            else {
-                "metadata": json.dumps(
-                    {
-                        "id": request_id,
-                        "created": created_time,
-                        "model": model_name,
-                        "data": items,
-                        "usage": usage,
-                    }
-                )
-            }
-        )
-
-        response = EmbeddingBytesResponse(content=content, headers=headers)
-        return StreamingResponse(
-            content=response.content,
-            headers=response.headers,
-            media_type=response.media_type,
-        )
-
-    @staticmethod
     def _build_cohere_response_from_ctx(
+        self,
         ctx: PoolingServeContext,
     ) -> JSONResponse:
         request = ctx.request
         assert isinstance(request, CohereEmbedRequest)
 
-        all_floats = [encode_pooling_output_float(out) for out in ctx.final_res_batch]
-        total_tokens = sum(len(out.prompt_token_ids) for out in ctx.final_res_batch)
+        all_floats = [
+            cast(list[float], encode_pooling_output_float(out))
+            for out in ctx.final_res_batch
+        ]
+        total_tokens = get_pooling_usage(ctx.final_res_batch).prompt_tokens
 
         image_tokens = total_tokens if request.images is not None else 0
         texts_echo = request.texts
@@ -218,4 +206,4 @@ def _build_cohere_response_from_ctx(
                 ),
             ),
         )
-        return JSONResponse(content=response.model_dump(exclude_none=True))
+        return self.json_response_cls(content=response.model_dump(exclude_none=True))
diff --git a/vllm/entrypoints/pooling/factories.py b/vllm/entrypoints/pooling/factories.py
new file mode 100644
index 000000000000..62f76a7aa285
--- /dev/null
+++ b/vllm/entrypoints/pooling/factories.py
@@ -0,0 +1,265 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import TYPE_CHECKING
+
+from fastapi import FastAPI
+
+from vllm.config import ModelConfig, VllmConfig
+from vllm.entrypoints.chat_utils import ChatTemplateConfig
+from vllm.logger import init_logger
+from vllm.plugins.io_processors import has_io_processor
+from vllm.renderers import BaseRenderer
+from vllm.tasks import POOLING_TASKS, SCORE_TYPE_MAP, SupportedTask
+
+from .base.io_processor import PoolingIOProcessor
+from .utils import enable_scoring_api
+
+if TYPE_CHECKING:
+    from argparse import Namespace
+
+    from starlette.datastructures import State
+
+    from vllm.engine.protocol import EngineClient
+    from vllm.entrypoints.logger import RequestLogger
+    from vllm.entrypoints.sagemaker.api_router import (
+        EndpointFn,
+        GetHandlerFn,
+        RequestType,
+    )
+
+else:
+    RequestLogger = object
+
+
+logger = init_logger(__name__)
+
+
+def init_pooling_io_processors(
+    supported_tasks: tuple[SupportedTask, ...],
+    vllm_config: VllmConfig,
+    renderer: BaseRenderer,
+    chat_template_config: ChatTemplateConfig,
+) -> dict[str, PoolingIOProcessor]:
+    model_config = vllm_config.model_config
+    processors: dict[str, type[PoolingIOProcessor]] = {}
+    pooling_task = model_config.get_pooling_task(supported_tasks)
+
+    if pooling_task == "classify":
+        from .classify.io_processor import ClassifyIOProcessor
+
+        processors["classify"] = ClassifyIOProcessor
+
+    if pooling_task == "token_classify":
+        from .classify.io_processor import TokenClassifyIOProcessor
+
+        processors["token_classify"] = TokenClassifyIOProcessor
+
+    if pooling_task == "embed":
+        from .embed.io_processor import EmbedIOProcessor
+
+        processors["embed"] = EmbedIOProcessor
+
+    if pooling_task == "token_embed":
+        from .embed.io_processor import TokenEmbedIOProcessor
+
+        processors["token_embed"] = TokenEmbedIOProcessor
+
+    if has_io_processor(
+        vllm_config,
+        model_config.io_processor_plugin,
+    ):
+        from .pooling.io_processor import PluginWithIOProcessorPlugins
+
+        processors["plugin"] = PluginWithIOProcessorPlugins
+    elif pooling_task == "plugin":
+        from .pooling.io_processor import PluginWithoutIOProcessorPlugins
+
+        processors["plugin"] = PluginWithoutIOProcessorPlugins
+
+    if enable_scoring_api(supported_tasks, model_config):
+        from .scoring.io_processor import ScoringIOProcessors
+
+        score_type: str | None = SCORE_TYPE_MAP.get(pooling_task, None)  # type: ignore[arg-type]
+        if score_type is not None and score_type in ScoringIOProcessors:
+            processors[score_type] = ScoringIOProcessors[score_type]
+
+    if model_config.architecture == "JinaForRanking":
+        from .embed.io_processor import JinaRankingTokenEmbedIOProcessor
+        from .scoring.io_processor import ScoringIOProcessors
+
+        processors["token_embed"] = JinaRankingTokenEmbedIOProcessor
+        processors["late-interaction"] = ScoringIOProcessors["jina-reranking-scoring"]
+
+    return {
+        task: processor_cls(
+            vllm_config=vllm_config,
+            renderer=renderer,
+            chat_template_config=chat_template_config,
+        )
+        for task, processor_cls in processors.items()
+    }
+
+
+def register_pooling_api_routers(
+    app: FastAPI,
+    supported_tasks: tuple["SupportedTask", ...],
+    model_config: ModelConfig | None = None,
+):
+    if model_config is None:
+        return
+
+    pooling_task = model_config.get_pooling_task(supported_tasks)
+
+    if pooling_task is not None:
+        from .pooling.api_router import router as pooling_router
+
+        app.include_router(pooling_router)
+
+    if "classify" in supported_tasks:
+        from .classify.api_router import (
+            router as classify_router,
+        )
+
+        app.include_router(classify_router)
+
+    if "embed" in supported_tasks:
+        from .embed.api_router import router as embed_router
+
+        app.include_router(embed_router)
+
+    if enable_scoring_api(supported_tasks, model_config):
+        from .scoring.api_router import router as score_router
+
+        app.include_router(score_router)
+
+
+def init_pooling_state(
+    engine_client: "EngineClient",
+    state: "State",
+    args: "Namespace",
+    request_logger: RequestLogger | None,
+    supported_tasks: tuple["SupportedTask", ...],
+):
+    model_config = engine_client.model_config
+    if model_config is None:
+        return
+
+    from vllm.entrypoints.chat_utils import load_chat_template
+    from vllm.tasks import POOLING_TASKS
+
+    from .classify.serving import ServingClassification
+    from .embed.serving import ServingEmbedding
+    from .pooling.serving import ServingPooling
+    from .scoring.serving import ServingScores
+
+    resolved_chat_template = load_chat_template(args.chat_template)
+    pooling_task = model_config.get_pooling_task(supported_tasks)
+
+    chat_template_config = ChatTemplateConfig(
+        chat_template=resolved_chat_template,
+        chat_template_content_format=args.chat_template_content_format,
+        trust_request_chat_template=args.trust_request_chat_template,
+    )
+
+    state.serving_pooling = (
+        (
+            ServingPooling(
+                engine_client,
+                state.openai_serving_models,
+                supported_tasks=supported_tasks,
+                request_logger=request_logger,
+                chat_template_config=chat_template_config,
+            )
+        )
+        if any(t in supported_tasks for t in POOLING_TASKS)
+        else None
+    )
+    state.serving_embedding = (
+        ServingEmbedding(
+            engine_client,
+            state.openai_serving_models,
+            request_logger=request_logger,
+            chat_template_config=chat_template_config,
+        )
+        if pooling_task == "embed"
+        else None
+    )
+    state.serving_classification = (
+        ServingClassification(
+            engine_client,
+            state.openai_serving_models,
+            request_logger=request_logger,
+            chat_template_config=chat_template_config,
+        )
+        if pooling_task == "classify"
+        else None
+    )
+    state.serving_scores = (
+        ServingScores(
+            engine_client,
+            state.openai_serving_models,
+            supported_tasks=supported_tasks,
+            request_logger=request_logger,
+            chat_template_config=chat_template_config,
+            enable_flash_late_interaction=getattr(
+                args, "enable_flash_late_interaction", True
+            ),
+        )
+        if enable_scoring_api(supported_tasks, model_config)
+        else None
+    )
+
+
+def get_pooling_invocation_types(
+    supported_tasks: tuple["SupportedTask", ...],
+    model_config: ModelConfig | None = None,
+):
+    # NOTE: Items defined earlier take higher priority
+    invocation_types: list[tuple[RequestType, tuple[GetHandlerFn, EndpointFn]]] = []
+
+    if model_config is None:
+        return invocation_types
+
+    pooling_task = model_config.get_pooling_task(supported_tasks)
+
+    if pooling_task == "embed":
+        from .embed.api_router import create_embedding, embedding
+        from .embed.protocol import EmbeddingRequest
+
+        invocation_types += [
+            (EmbeddingRequest, (embedding, create_embedding)),
+        ]
+
+    if pooling_task == "classify":
+        from .classify.api_router import classify, create_classify
+        from .classify.protocol import ClassificationRequest
+
+        invocation_types += [
+            (ClassificationRequest, (classify, create_classify)),
+        ]
+
+    if enable_scoring_api(supported_tasks, model_config):
+        from .scoring.api_router import do_rerank, rerank
+        from .scoring.protocol import RerankRequest
+
+        invocation_types += [
+            (RerankRequest, (rerank, do_rerank)),
+        ]
+
+        from .scoring.api_router import create_score, score
+        from .scoring.protocol import ScoreRequest
+
+        invocation_types += [
+            (ScoreRequest, (score, create_score)),
+        ]
+
+    if any(task in POOLING_TASKS for task in supported_tasks):
+        from .pooling.api_router import create_pooling, pooling
+        from .pooling.protocol import PoolingRequest
+
+        invocation_types += [
+            (PoolingRequest, (pooling, create_pooling)),
+        ]
+
+    return invocation_types
diff --git a/vllm/entrypoints/pooling/io_processor_factories.py b/vllm/entrypoints/pooling/io_processor_factories.py
deleted file mode 100644
index f0c0f5490313..000000000000
--- a/vllm/entrypoints/pooling/io_processor_factories.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-
-from vllm.config import ModelConfig
-from vllm.entrypoints.chat_utils import ChatTemplateConfig
-from vllm.entrypoints.pooling.base.io_processor import PoolingIOProcessor
-from vllm.renderers import BaseRenderer
-from vllm.tasks import SupportedTask
-
-
-def init_pooling_io_processors(
-    supported_tasks: tuple[SupportedTask, ...],
-    model_config: ModelConfig,
-    renderer: BaseRenderer,
-    chat_template_config: ChatTemplateConfig,
-) -> dict[str, PoolingIOProcessor]:
-    processors: list[tuple[str, type[PoolingIOProcessor]]] = []
-    if "classify" in supported_tasks:
-        from vllm.entrypoints.pooling.classify.io_processor import ClassifyIOProcessor
-
-        processors.append(("classify", ClassifyIOProcessor))
-    if "embed" in supported_tasks:
-        from vllm.entrypoints.pooling.embed.io_processor import EmbedIOProcessor
-
-        processors.append(("embed", EmbedIOProcessor))
-
-    return {
-        task: processor_cls(
-            model_config=model_config,
-            renderer=renderer,
-            chat_template_config=chat_template_config,
-        )
-        for task, processor_cls in processors
-    }
diff --git a/vllm/entrypoints/pooling/offline.py b/vllm/entrypoints/pooling/offline.py
new file mode 100644
index 000000000000..f3ea08bf42a0
--- /dev/null
+++ b/vllm/entrypoints/pooling/offline.py
@@ -0,0 +1,510 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Iterable, Sequence
+from typing import Any
+
+from tqdm.auto import tqdm
+from typing_extensions import TypeVar
+
+from vllm.config import ModelConfig
+from vllm.entrypoints.chat_utils import ChatTemplateConfig
+from vllm.inputs import (
+    DataPrompt,
+    EngineInput,
+    PromptType,
+)
+from vllm.logger import init_logger
+from vllm.lora.request import LoRARequest
+from vllm.outputs import (
+    ClassificationRequestOutput,
+    EmbeddingRequestOutput,
+    PoolingRequestOutput,
+    RequestOutput,
+    ScoringRequestOutput,
+)
+from vllm.pooling_params import PoolingParams
+from vllm.renderers import BaseRenderer
+from vllm.sampling_params import SamplingParams
+from vllm.tasks import SCORE_TYPE_MAP, PoolingTask, SupportedTask
+from vllm.v1.engine.llm_engine import LLMEngine
+
+from .factories import init_pooling_io_processors
+from .scoring.io_processor import ScoringIOProcessor
+from .scoring.typing import ScoreInput
+from .typing import OfflineInputsContext, OfflineOutputsContext
+
+logger = init_logger(__name__)
+
+_P = TypeVar("_P", bound=SamplingParams | PoolingParams | None)
+_O = TypeVar(
+    "_O",
+    bound=RequestOutput | PoolingRequestOutput,
+    default=RequestOutput | PoolingRequestOutput,
+)
+
+
+class PoolingOfflineMixin(ABC):
+    """Offline inference for pooling models"""
+
+    renderer: BaseRenderer
+    llm_engine: "LLMEngine"
+    model_config: ModelConfig
+    runner_type: str
+    chat_template: str | None
+    supported_tasks: tuple[SupportedTask, ...]
+
+    def __init__(self):
+        self.pooling_task = self.model_config.get_pooling_task(self.supported_tasks)
+        if self.pooling_task is not None:
+            logger.info("Supported pooling task: %s", self.pooling_task)
+
+        self.chat_template_config = ChatTemplateConfig(chat_template=self.chat_template)
+        self.pooling_io_processors = init_pooling_io_processors(
+            supported_tasks=self.supported_tasks,
+            vllm_config=self.llm_engine.vllm_config,
+            renderer=self.renderer,
+            chat_template_config=self.chat_template_config,
+        )
+
+    def encode(
+        self,
+        prompts: PromptType | Sequence[PromptType] | DataPrompt,
+        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
+        *,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        pooling_task: PoolingTask | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+    ) -> list[PoolingRequestOutput]:
+        """Apply pooling to the hidden states corresponding to the input
+        prompts.
+
+        This class automatically batches the given prompts, considering
+        the memory constraint. For the best performance, put all of your prompts
+        into a single list and pass it to this method.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompt.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            pooling_task: Override the pooling task to use.
+            tokenization_kwargs: Overrides for `tokenizer.encode`.
+
+        Returns:
+            A list of `PoolingRequestOutput` objects containing the
+            pooled hidden states in the same order as the input prompts.
+        """
+
+        if isinstance(prompts, dict) and "data" in prompts and pooling_task != "plugin":
+            raise ValueError(
+                "The 'data' field is only supported for the 'plugin' pooling task."
+            )
+        self._verify_pooling_task(pooling_task)
+        assert pooling_task is not None and pooling_task in self.pooling_io_processors
+
+        io_processor = self.pooling_io_processors[pooling_task]
+
+        if pooling_params is None:
+            pooling_params = PoolingParams()
+
+        ctx = OfflineInputsContext(
+            prompts=prompts,
+            pooling_params=pooling_params,
+            tokenization_kwargs=tokenization_kwargs,
+        )
+
+        engine_inputs = io_processor.pre_process_offline(ctx)
+        n_inputs = len(engine_inputs)
+        assert ctx.pooling_params is not None
+
+        params_seq = self._params_to_seq(ctx.pooling_params, n_inputs)
+
+        for param in params_seq:
+            if param.task is None:
+                param.task = pooling_task
+            elif pooling_task == "plugin":
+                # `plugin` task uses io_processor.parse_request to verify inputs.
+                # We actually allow plugin to overwrite pooling_task.
+                pass
+            elif param.task != pooling_task:
+                msg = f"You cannot overwrite {param.task=!r} with {pooling_task=!r}!"
+                raise ValueError(msg)
+
+        seq_lora_requests = self._lora_request_to_seq(lora_request, n_inputs)
+        seq_priority = self._priority_to_seq(None, n_inputs)
+
+        self._render_and_add_requests(
+            prompts=engine_inputs,
+            params=params_seq,
+            lora_requests=seq_lora_requests,
+            priorities=seq_priority,
+        )
+
+        outputs = self._run_engine(use_tqdm=use_tqdm, output_type=PoolingRequestOutput)
+        outputs = io_processor.post_process_offline(
+            ctx=OfflineOutputsContext(outputs=outputs)
+        )
+        return outputs
+
+    def _verify_pooling_task(self, pooling_task: PoolingTask | None):
+        if self.runner_type != "pooling":
+            raise ValueError(
+                "LLM.encode() is only supported for pooling models. "
+                "Try passing `--runner pooling` to use the model as a "
+                "pooling model."
+            )
+
+        if pooling_task is None:
+            raise ValueError(
+                """
+                pooling_task required for `LLM.encode`.
+                Please use one of the more specific methods or set the pooling_task when using `LLM.encode`:
+                  - For embeddings, use `LLM.embed(...)` or `pooling_task="embed"`.
+                  - For classification logits, use `LLM.classify(...)` or `pooling_task="classify"`.
+                  - For similarity scores, use `LLM.score(...)`.
+                  - For rewards, `pooling_task="classify"` or `pooling_task="token_classify"`.
+                  - For token classification, use `pooling_task="token_classify"`.
+                  - For multi-vector retrieval, use `pooling_task="token_embed"`.
+                """  # noqa: E501
+            )
+
+        if (
+            pooling_task in ("embed", "token_embed")
+            and pooling_task not in self.supported_tasks
+        ):
+            raise ValueError(
+                "Embedding API is not supported by this model. "
+                "Try converting the model using `--convert embed`."
+            )
+
+        if (
+            pooling_task in ("classify", "token_classify")
+            and pooling_task not in self.supported_tasks
+        ):
+            raise ValueError(
+                "Classification API is not supported by this model. "
+                "Try converting the model using `--convert classify`."
+            )
+
+        # plugin task uses io_processor.parse_request to verify inputs
+        if pooling_task != "plugin" and pooling_task != self.pooling_task:
+            if pooling_task not in self.supported_tasks:
+                raise ValueError(
+                    f"Unsupported task: {pooling_task!r} "
+                    f"Supported tasks: {self.supported_tasks}"
+                )
+            else:
+                raise ValueError(
+                    f"Try switching the model's pooling_task "
+                    f'via `PoolerConfig(task="{pooling_task}")`'
+                )
+
+        if pooling_task == "plugin" and "plugin" not in self.pooling_io_processors:
+            raise ValueError(
+                "No IOProcessor plugin installed. Please refer "
+                "to the documentation and to the "
+                "'prithvi_geospatial_mae_io_processor' "
+                "offline inference example for more details."
+            )
+
+    def embed(
+        self,
+        prompts: PromptType | Sequence[PromptType],
+        *,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+    ) -> list[EmbeddingRequestOutput]:
+        """
+        Generate an embedding vector for each prompt.
+
+        This class automatically batches the given prompts, considering
+        the memory constraint. For the best performance, put all of your prompts
+        into a single list and pass it to this method.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompt.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            tokenization_kwargs: Overrides for `tokenizer.encode`.
+
+        Returns:
+            A list of `EmbeddingRequestOutput` objects containing the
+            embedding vectors in the same order as the input prompts.
+        """
+
+        items = self.encode(
+            prompts,
+            use_tqdm=use_tqdm,
+            pooling_params=pooling_params,
+            lora_request=lora_request,
+            pooling_task="embed",
+            tokenization_kwargs=tokenization_kwargs,
+        )
+
+        return [EmbeddingRequestOutput.from_base(item) for item in items]
+
+    def classify(
+        self,
+        prompts: PromptType | Sequence[PromptType],
+        *,
+        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+    ) -> list[ClassificationRequestOutput]:
+        """
+        Generate class logits for each prompt.
+
+        This class automatically batches the given prompts, considering
+        the memory constraint. For the best performance, put all of your prompts
+        into a single list and pass it to this method.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompt.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            tokenization_kwargs: Overrides for `tokenizer.encode`.
+
+        Returns:
+            A list of `ClassificationRequestOutput` objects containing the
+            embedding vectors in the same order as the input prompts.
+        """
+
+        items = self.encode(
+            prompts,
+            use_tqdm=use_tqdm,
+            pooling_params=pooling_params,
+            lora_request=lora_request,
+            pooling_task="classify",
+            tokenization_kwargs=tokenization_kwargs,
+        )
+
+        return [ClassificationRequestOutput.from_base(item) for item in items]
+
+    def reward(
+        self,
+        prompts: PromptType | Sequence[PromptType],
+        /,
+        *,
+        pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+    ) -> list[PoolingRequestOutput]:
+        """
+        Generate rewards for each prompt.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompt.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            tokenization_kwargs: Overrides for `tokenizer.encode`.
+
+        Returns:
+            A list of `PoolingRequestOutput` objects containing the
+            pooled hidden states in the same order as the input prompts.
+        """
+        logger.warning_once(
+            "`llm.reward` api is deprecated and will be removed in v0.23. "
+            'Please use `LLM.encode` with `pooling_task="classify"` or '
+            '`pooling_task="token_classify"` instead.'
+        )
+        return self.encode(
+            prompts,
+            use_tqdm=use_tqdm,
+            lora_request=lora_request,
+            pooling_params=pooling_params,
+            pooling_task="token_classify",
+            tokenization_kwargs=tokenization_kwargs,
+        )
+
+    def score(
+        self,
+        data_1: ScoreInput | list[ScoreInput],
+        data_2: ScoreInput | list[ScoreInput],
+        /,
+        *,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+        pooling_params: PoolingParams | None = None,
+        lora_request: list[LoRARequest] | LoRARequest | None = None,
+        tokenization_kwargs: dict[str, Any] | None = None,
+        chat_template: str | None = None,
+    ) -> list[ScoringRequestOutput]:
+        """Generate similarity scores for all pairs `<text,text_pair>` or
+          `<multi-modal data, multi-modal data pair>`.
+
+        The inputs can be `1 -> 1`, `1 -> N` or `N -> N`.
+        In the `1 - N` case the `data_1` input will be replicated `N`
+        times to pair with the `data_2` inputs.
+        The input pairs are used to build a list of prompts for the
+        cross encoder model. This class automatically batches the prompts,
+        considering the memory constraint. For the best performance, put all
+        of your inputs into a single list and pass it to this method.
+
+        Supports both text and multi-modal data (images, etc.) when used with
+        appropriate multi-modal models. For multi-modal inputs, ensure the
+        prompt structure matches the model's expected input format.
+
+        Args:
+            data_1: Can be a single prompt, a list of prompts or
+                `ScoreMultiModalParam`, which can contain either text or
+                multi-modal data. When a list, it must have the same length as
+                the `data_2` list.
+            data_2: The data to pair with the query to form the input to
+                the LLM. Can be text or multi-modal data. See [PromptType]
+                [vllm.inputs.PromptType] for more details about the format of
+                each prompt.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            chat_template: The chat template to use for the scoring. If None, we
+                use the model's default chat template.
+            tokenization_kwargs: Overrides for `tokenizer.encode`.
+        Returns:
+            A list of `ScoringRequestOutput` objects containing the
+            generated scores in the same order as the input prompts.
+        """
+
+        if self.runner_type != "pooling":
+            raise ValueError(
+                "LLM.score() is only supported for pooling models. "
+                "Try passing `--runner pooling` to use the model as a "
+                "pooling model."
+            )
+
+        score_type: str | None = SCORE_TYPE_MAP.get(self.pooling_task, None)  # type: ignore[arg-type]
+        if (
+            score_type == "cross-encoder"
+            and getattr(self.model_config.hf_config, "num_labels", 0) != 1
+        ):
+            raise ValueError("Scoring API is only enabled for num_labels == 1.")
+
+        if score_type is None or score_type not in self.pooling_io_processors:
+            raise ValueError("This model does not support the Scoring API.")
+
+        io_processor = self.pooling_io_processors[score_type]
+        assert isinstance(io_processor, ScoringIOProcessor)
+
+        pooling_task = io_processor.pooling_task
+        scoring_data = io_processor.valid_inputs(data_1, data_2)
+        n_queries = len(scoring_data.data_1)
+
+        if pooling_params is None:
+            pooling_params = PoolingParams()
+
+        ctx = OfflineInputsContext(
+            prompts=scoring_data,
+            pooling_params=pooling_params,
+            tokenization_kwargs=tokenization_kwargs,
+            chat_template=chat_template,
+            n_queries=n_queries,
+        )
+
+        engine_inputs = io_processor.pre_process_offline(ctx)
+        n_inputs = len(engine_inputs)
+
+        seq_lora_requests = self._lora_request_to_seq(lora_request, n_inputs)
+        params_seq = self._params_to_seq(ctx.pooling_params, n_inputs)
+
+        for param in params_seq:
+            if param.task is None:
+                param.task = pooling_task
+            elif param.task != pooling_task:
+                msg = f"You cannot overwrite {param.task=!r} with {pooling_task=!r}!"
+                raise ValueError(msg)
+
+        seq_priority = self._priority_to_seq(None, n_inputs)
+
+        self._render_and_add_requests(
+            prompts=engine_inputs,
+            params=params_seq,
+            lora_requests=seq_lora_requests,
+            priorities=seq_priority,
+        )
+
+        outputs = self._run_engine(use_tqdm=use_tqdm, output_type=PoolingRequestOutput)
+        outputs = io_processor.post_process_offline(
+            ctx=OfflineOutputsContext(outputs=outputs, n_queries=n_queries),
+        )
+
+        return [ScoringRequestOutput.from_base(item) for item in outputs]
+
+    @abstractmethod
+    def _params_to_seq(
+        self,
+        params: _P | Sequence[_P],
+        num_requests: int,
+    ) -> Sequence[_P]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _lora_request_to_seq(
+        self,
+        lora_request: LoRARequest | None | Sequence[LoRARequest | None],
+        num_requests: int,
+    ) -> Sequence[LoRARequest | None]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _priority_to_seq(
+        self,
+        priority: list[int] | None,
+        num_requests: int,
+    ) -> Sequence[int]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _render_and_add_requests(
+        self,
+        prompts: Iterable[EngineInput],
+        params: Sequence[SamplingParams | PoolingParams],
+        *,
+        lora_requests: Sequence[LoRARequest | None] | None = None,
+        priorities: Sequence[int] | None = None,
+    ) -> list[str]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def _run_engine(
+        self,
+        output_type: type[_O] | tuple[type[_O], ...],
+        *,
+        use_tqdm: bool | Callable[..., tqdm] = True,
+    ) -> list[_O]:
+        raise NotImplementedError
diff --git a/vllm/entrypoints/pooling/pooling/api_router.py b/vllm/entrypoints/pooling/pooling/api_router.py
index f63a8edf6ca8..0c77c050dc0d 100644
--- a/vllm/entrypoints/pooling/pooling/api_router.py
+++ b/vllm/entrypoints/pooling/pooling/api_router.py
@@ -3,24 +3,18 @@
 from http import HTTPStatus
 
 from fastapi import APIRouter, Depends, Request
-from fastapi.responses import JSONResponse, StreamingResponse
-from typing_extensions import assert_never
 
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.utils import validate_json_request
-from vllm.entrypoints.pooling.pooling.protocol import (
-    IOProcessorResponse,
-    PoolingBytesResponse,
-    PoolingRequest,
-    PoolingResponse,
-)
-from vllm.entrypoints.pooling.pooling.serving import OpenAIServingPooling
 from vllm.entrypoints.utils import load_aware_call, with_cancellation
 
+from .protocol import PoolingRequest
+from .serving import ServingPooling
+
 router = APIRouter()
 
 
-def pooling(request: Request) -> OpenAIServingPooling | None:
+def pooling(request: Request) -> ServingPooling | None:
     return request.app.state.serving_pooling
 
 
@@ -39,19 +33,4 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
     if handler is None:
         raise NotImplementedError("The model does not support Pooling API")
 
-    generator = await handler.create_pooling(request, raw_request)
-
-    if isinstance(generator, ErrorResponse):
-        return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
-        )
-    elif isinstance(generator, (PoolingResponse, IOProcessorResponse)):
-        return JSONResponse(content=generator.model_dump())
-    elif isinstance(generator, PoolingBytesResponse):
-        return StreamingResponse(
-            content=generator.content,
-            headers=generator.headers,
-            media_type=generator.media_type,
-        )
-
-    assert_never(generator)
+    return await handler(request, raw_request)
diff --git a/vllm/entrypoints/pooling/pooling/io_processor.py b/vllm/entrypoints/pooling/pooling/io_processor.py
new file mode 100644
index 000000000000..b07ceeede32b
--- /dev/null
+++ b/vllm/entrypoints/pooling/pooling/io_processor.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Sequence
+from typing import Any
+
+from vllm import PoolingParams, PoolingRequestOutput
+from vllm.inputs import EngineInput
+from vllm.logger import init_logger
+from vllm.plugins.io_processors import get_io_processor
+from vllm.renderers.inputs.preprocess import parse_model_prompt, prompt_to_seq
+
+from ..base.io_processor import PoolingIOProcessor
+from ..typing import OfflineInputsContext, OfflineOutputsContext, PoolingServeContext
+from .protocol import IOProcessorRequest, IOProcessorResponse
+
+logger = init_logger(__name__)
+
+
+class PluginWithoutIOProcessorPlugins(PoolingIOProcessor):
+    # Some models, such as Terratorch (tests/models/test_terratorch.py),
+    # use plugin tasks in the pooler but do not use IO Processor plugins.
+    name = "plugin"
+
+
+class PluginWithIOProcessorPlugins(PoolingIOProcessor):
+    """IO Processor plugins are a feature that allows pre- and post-processing
+    of the model input and output for pooling models."""
+
+    name = "plugin"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        io_processor = get_io_processor(
+            self.vllm_config,
+            self.renderer,
+            self.model_config.io_processor_plugin,
+        )
+
+        assert io_processor is not None
+        self.io_processor = io_processor
+
+    #######################################
+    # online APIs
+
+    def pre_process_online(self, ctx: PoolingServeContext):
+        assert isinstance(ctx.request, IOProcessorRequest)
+
+        validated_prompt = self.io_processor.parse_data(ctx.request.data)
+
+        raw_prompts = self.io_processor.pre_process(
+            prompt=validated_prompt, request_id=ctx.request_id
+        )
+
+        parsed_prompts = [
+            (
+                prompt
+                if isinstance(prompt, bytes)
+                else parse_model_prompt(self.model_config, prompt)
+            )
+            for prompt in prompt_to_seq(raw_prompts)
+        ]
+
+        tok_params = ctx.request.build_tok_params(self.model_config)
+
+        ctx.engine_inputs = self.renderer.render_cmpl(
+            parsed_prompts,
+            tok_params,
+            prompt_extras={
+                k: v
+                for k in ("mm_processor_kwargs", "cache_salt")
+                if (v := getattr(ctx.request, k, None)) is not None
+            },
+        )
+
+        pooling_params = self.io_processor.merge_pooling_params()
+        if pooling_params.task is None:
+            pooling_params.task = "plugin"
+        ctx.pooling_params = pooling_params
+
+    def post_process_online(
+        self,
+        ctx: PoolingServeContext,
+    ):
+        output = self.io_processor.post_process(
+            ctx.final_res_batch,
+            request_id=ctx.request_id,
+        )
+
+        if callable(
+            output_to_response := getattr(self.io_processor, "output_to_response", None)
+        ):
+            logger.warning_once(
+                "`IOProcessor.output_to_response` is deprecated. To ensure "
+                "consistency between offline and online APIs, "
+                "`IOProcessorResponse` will become a transparent wrapper "
+                "around output data from v0.19 onwards.",
+            )
+
+            if hasattr(output, "request_id") and output.request_id is None:
+                output.request_id = ctx.request_id  # type: ignore
+
+            ctx.response = output_to_response(output)  # type: ignore
+        else:
+            ctx.response = IOProcessorResponse(request_id=ctx.request_id, data=output)
+
+    #######################################
+    # offline APIs
+
+    def pre_process_offline(self, ctx: OfflineInputsContext) -> Sequence[EngineInput]:
+        assert isinstance(ctx.prompts, dict) and "data" in ctx.prompts
+        assert ctx.pooling_params is not None
+
+        # Validate the request data is valid for the loaded plugin
+        prompt_data = ctx.prompts.get("data")
+        if prompt_data is None:
+            raise ValueError(
+                "The 'data' field of the prompt is expected to contain "
+                "the prompt data and it cannot be None. "
+                "Refer to the documentation of the IOProcessor "
+                "in use for more details."
+            )
+        validated_prompt = self.io_processor.parse_data(prompt_data)
+
+        # obtain the actual model prompts from the pre-processor
+        prompts = self.io_processor.pre_process(prompt=validated_prompt)
+        prompts_seq = prompt_to_seq(prompts)
+
+        params_seq: list[PoolingParams] = [
+            self.io_processor.merge_pooling_params(param)
+            for param in self._params_to_seq(
+                ctx.pooling_params,
+                len(prompts_seq),
+            )
+        ]
+        for p in params_seq:
+            if p.task is None:
+                p.task = "plugin"
+
+        ctx.pooling_params = params_seq
+        ctx.prompts = prompts_seq
+        return super().pre_process_offline(ctx)
+
+    def post_process_offline(
+        self,
+        ctx: OfflineOutputsContext,
+    ) -> list[PoolingRequestOutput]:
+        processed_outputs = self.io_processor.post_process(ctx.outputs)
+
+        return [
+            PoolingRequestOutput[Any](
+                request_id="",
+                outputs=processed_outputs,
+                num_cached_tokens=getattr(processed_outputs, "num_cached_tokens", 0),
+                prompt_token_ids=[],
+                finished=True,
+            )
+        ]
diff --git a/vllm/entrypoints/pooling/pooling/protocol.py b/vllm/entrypoints/pooling/pooling/protocol.py
index 098690db262d..b2a43b1935ec 100644
--- a/vllm/entrypoints/pooling/pooling/protocol.py
+++ b/vllm/entrypoints/pooling/pooling/protocol.py
@@ -8,17 +8,19 @@
 from vllm import PoolingParams
 from vllm.config import ModelConfig
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
-from vllm.entrypoints.pooling.base.protocol import (
+from vllm.renderers import TokenizeParams
+from vllm.tasks import PoolingTask
+from vllm.utils import random_uuid
+
+from ..base.protocol import (
     ChatRequestMixin,
     ClassifyRequestMixin,
     CompletionRequestMixin,
     EmbedRequestMixin,
     EncodingRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
     PoolingBasicRequestMixin,
 )
-from vllm.renderers import TokenizeParams
-from vllm.tasks import PoolingTask
-from vllm.utils import random_uuid
 
 
 class PoolingCompletionRequest(
@@ -26,22 +28,10 @@ class PoolingCompletionRequest(
     CompletionRequestMixin,
     EmbedRequestMixin,
     ClassifyRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
 ):
     task: PoolingTask | None = None
 
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task=self.task,
@@ -51,23 +41,14 @@ def to_pooling_params(self):
 
 
 class PoolingChatRequest(
-    PoolingBasicRequestMixin, ChatRequestMixin, EmbedRequestMixin, ClassifyRequestMixin
+    PoolingBasicRequestMixin,
+    ChatRequestMixin,
+    EmbedRequestMixin,
+    ClassifyRequestMixin,
+    FixedMaxLenTokenizeParamsMixin,
 ):
     task: PoolingTask | None = None
 
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=self.add_special_tokens,
-            max_total_tokens_param="max_model_len",
-        )
-
     def to_pooling_params(self):
         return PoolingParams(
             task=self.task,
@@ -84,16 +65,16 @@ class IOProcessorRequest(PoolingBasicRequestMixin, EncodingRequestMixin, Generic
     task: PoolingTask = "plugin"
 
     def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
+        return self._build_pooling_tok_params(
+            model_config,
+            add_special_tokens=not model_config.is_encoder_decoder,
             max_total_tokens=model_config.max_model_len,
             max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            add_special_tokens=not model_config.is_encoder_decoder,
-            max_total_tokens_param="max_model_len",
+        )
+
+    def to_pooling_params(self):
+        return PoolingParams(
+            task=self.task,
         )
 
 
diff --git a/vllm/entrypoints/pooling/pooling/serving.py b/vllm/entrypoints/pooling/pooling/serving.py
index 4706684f3637..1783c861fcc7 100644
--- a/vllm/entrypoints/pooling/pooling/serving.py
+++ b/vllm/entrypoints/pooling/pooling/serving.py
@@ -1,252 +1,132 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import asyncio
-import json
-import time
-from collections.abc import AsyncGenerator, Callable, Sequence
-from functools import partial
-from typing import Final, Literal, cast
-
-from fastapi import Request
+from fastapi.responses import JSONResponse, Response, StreamingResponse
 from typing_extensions import assert_never
 
-from vllm.engine.protocol import EngineClient
-from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
-from vllm.entrypoints.logger import RequestLogger
-from vllm.entrypoints.openai.engine.protocol import ErrorResponse, UsageInfo
-from vllm.entrypoints.openai.engine.serving import OpenAIServing
-from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.pooling.pooling.protocol import (
+from vllm.logger import init_logger
+from vllm.outputs import PoolingRequestOutput
+from vllm.tasks import SupportedTask
+from vllm.utils.serial_utils import EmbedDType, Endianness
+
+from ..base.io_processor import PoolingIOProcessor
+from ..base.serving import PoolingServingBase
+from ..factories import init_pooling_io_processors
+from ..typing import AnyPoolingRequest, PoolingServeContext
+from ..utils import (
+    BytesEncodingFormat,
+    JsonEncodingFormat,
+    build_pooling_bytes_streaming_response,
+    get_json_response_cls,
+    get_pooling_output_encoder,
+    get_pooling_usage,
+)
+from .protocol import (
     IOProcessorRequest,
-    IOProcessorResponse,
-    PoolingBytesResponse,
-    PoolingChatRequest,
-    PoolingCompletionRequest,
     PoolingRequest,
     PoolingResponse,
     PoolingResponseData,
 )
-from vllm.entrypoints.pooling.utils import (
-    encode_pooling_bytes,
-    encode_pooling_output_base64,
-    encode_pooling_output_float,
-)
-from vllm.entrypoints.serve.render.serving import OpenAIServingRender
-from vllm.inputs import EngineInput
-from vllm.logger import init_logger
-from vllm.outputs import PoolingRequestOutput
-from vllm.renderers.inputs.preprocess import prompt_to_seq
-from vllm.tasks import SupportedTask
-from vllm.utils.async_utils import merge_async_iterators
-from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness
 
 logger = init_logger(__name__)
 
 
-class OpenAIServingPooling(OpenAIServing):
+class ServingPooling(PoolingServingBase):
+    request_id_prefix = "pooling"
+
     def __init__(
         self,
-        engine_client: EngineClient,
-        models: OpenAIServingModels,
-        openai_serving_render: OpenAIServingRender,
+        *args,
         supported_tasks: tuple[SupportedTask, ...],
-        *,
-        request_logger: RequestLogger | None,
-        chat_template: str | None,
-        chat_template_content_format: ChatTemplateContentFormatOption,
-        trust_request_chat_template: bool = False,
-    ) -> None:
-        super().__init__(
-            engine_client=engine_client,
-            models=models,
-            request_logger=request_logger,
-        )
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
         self.supported_tasks = supported_tasks
         self.pooling_task = self.model_config.get_pooling_task(supported_tasks)
-        self.openai_serving_render = openai_serving_render
-        self.chat_template = chat_template
-        self.chat_template_content_format: Final = chat_template_content_format
-        self.trust_request_chat_template = trust_request_chat_template
-
-    async def create_pooling(
-        self,
-        request: PoolingRequest,
-        raw_request: Request | None = None,
-    ) -> PoolingResponse | IOProcessorResponse | PoolingBytesResponse | ErrorResponse:
-        """
-        See https://platform.openai.com/docs/api-reference/embeddings/create
-        for the API specification. This API mimics the OpenAI Embedding API.
-        """
-        error_check_ret = await self._check_model(request)
-        if error_check_ret is not None:
-            return error_check_ret
-
-        model_name = self.models.model_name()
+        self.io_processors = init_pooling_io_processors(
+            supported_tasks=supported_tasks,
+            vllm_config=self.vllm_config,
+            renderer=self.renderer,
+            chat_template_config=self.chat_template_config,
+        )
+        self.json_response_cls = get_json_response_cls()
 
-        request_id = f"pool-{self._base_request_id(raw_request)}"
-        created_time = int(time.time())
+    def get_io_processor(self, request: AnyPoolingRequest) -> PoolingIOProcessor:
+        assert isinstance(request, PoolingRequest)
+        pooling_task = self._verify_pooling_task(request)
+        return self.io_processors[pooling_task]
 
-        lora_request = self._maybe_get_adapters(request)
+    def _verify_pooling_task(self, request: PoolingRequest) -> str:
+        if getattr(request, "dimensions", None) is not None:
+            raise ValueError("dimensions is currently not supported")
 
         if request.task is None:
             request.task = self.pooling_task
 
-        if getattr(request, "dimensions", None) is not None:
-            return self.create_error_response("dimensions is currently not supported")
+        if isinstance(request, IOProcessorRequest):
+            request.task = "plugin"
+
+        assert request.task is not None
+        pooling_task = request.task
 
         # plugin task uses io_processor.parse_request to verify inputs
-        if request.task != "plugin" and request.task != self.pooling_task:
-            if request.task not in self.supported_tasks:
+        if pooling_task != "plugin" and pooling_task != self.pooling_task:
+            if pooling_task not in self.supported_tasks:
                 raise ValueError(
-                    f"Unsupported task: {request.task!r} "
+                    f"Unsupported task: {pooling_task!r} "
                     f"Supported tasks: {self.supported_tasks}"
                 )
             else:
-                logger.warning_once(
-                    "Pooling multitask support is deprecated and will be removed "
-                    "in v0.20. When the default pooling task is not what you want, you "
-                    'need to manually specify it via --pooler-config.task "%s". ',
-                    request.task,
-                )
-
-        engine_inputs: Sequence[EngineInput]
-        if use_io_processor := isinstance(request, IOProcessorRequest):
-            if self.io_processor is None:
                 raise ValueError(
-                    "No IOProcessor plugin installed. Please refer "
-                    "to the documentation and to the "
-                    "'prithvi_geospatial_mae_io_processor' "
-                    "offline inference example for more details."
+                    "Try switching the model's pooling_task "
+                    f"via --pooler-config.task {request.task}."
                 )
 
-            validated_prompt = self.io_processor.parse_data(request.data)
-
-            raw_prompts = await self.io_processor.pre_process_async(
-                prompt=validated_prompt, request_id=request_id
-            )
-            engine_inputs = await self.openai_serving_render.preprocess_cmpl(
-                request,
-                prompt_to_seq(raw_prompts),
-            )
-        elif isinstance(request, PoolingChatRequest):
-            error_check_ret = self.openai_serving_render.validate_chat_template(
-                request_chat_template=request.chat_template,
-                chat_template_kwargs=request.chat_template_kwargs,
-                trust_request_chat_template=self.trust_request_chat_template,
-            )
-            if error_check_ret is not None:
-                return error_check_ret
-
-            _, engine_inputs = await self.openai_serving_render.preprocess_chat(
-                request,
-                request.messages,
-                default_template=self.chat_template,
-                default_template_content_format=self.chat_template_content_format,
-                default_template_kwargs=None,
-            )
-        elif isinstance(request, PoolingCompletionRequest):
-            engine_inputs = await self.openai_serving_render.preprocess_completion(
-                request,
-                prompt_input=request.input,
-                prompt_embeds=None,
-            )
-        else:
-            raise ValueError(f"Unsupported request of type {type(request)}")
-
-        # Schedule the request and get the result generator.
-        generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-        if use_io_processor:
-            assert self.io_processor is not None
-
-            pooling_params = self.io_processor.merge_pooling_params()
-            if pooling_params.task is None:
-                pooling_params.task = "plugin"
-        else:
-            pooling_params = request.to_pooling_params()  # type: ignore
-
-        for i, engine_input in enumerate(engine_inputs):
-            request_id_item = f"{request_id}-{i}"
-
-            self._log_inputs(
-                request_id_item,
-                engine_input,
-                params=pooling_params,
-                lora_request=lora_request,
-            )
-
-            trace_headers = (
-                None
-                if raw_request is None
-                else await self._get_trace_headers(raw_request.headers)
+        if pooling_task == "plugin" and "plugin" not in self.io_processors:
+            raise ValueError(
+                "No IOProcessor plugin installed. Please refer "
+                "to the documentation and to the "
+                "'prithvi_geospatial_mae_io_processor' "
+                "offline inference example for more details."
             )
 
-            generator = self.engine_client.encode(
-                engine_input,
-                pooling_params,
-                request_id_item,
-                lora_request=lora_request,
-                trace_headers=trace_headers,
-                priority=request.priority,
-            )
+        return pooling_task
 
-            generators.append(generator)
+    def _build_response(
+        self,
+        ctx: PoolingServeContext,
+    ) -> Response:
+        if ctx.response is not None:
+            # for IOProcessorResponse
+            return self.json_response_cls(content=ctx.response.model_dump())
 
-        result_generator = merge_async_iterators(*generators)
+        encoding_format = ctx.request.encoding_format
+        embed_dtype = ctx.request.embed_dtype
+        endianness = ctx.request.endianness
 
-        if use_io_processor:
-            assert self.io_processor is not None
-            output = await self.io_processor.post_process_async(
-                result_generator,
-                request_id=request_id,
+        if encoding_format == "float" or encoding_format == "base64":
+            return self.request_output_to_pooling_json_response(
+                ctx.final_res_batch,
+                ctx.request_id,
+                ctx.created_time,
+                ctx.model_name,
+                encoding_format,
+                embed_dtype,
+                endianness,
             )
 
-            if callable(
-                output_to_response := getattr(
-                    self.io_processor, "output_to_response", None
-                )
-            ):
-                logger.warning_once(
-                    "`IOProcessor.output_to_response` is deprecated. To ensure "
-                    "consistency between offline and online APIs, "
-                    "`IOProcessorResponse` will become a transparent wrapper "
-                    "around output data from v0.19 onwards.",
-                )
-
-                if hasattr(output, "request_id") and output.request_id is None:
-                    output.request_id = request_id  # type: ignore
-
-                return output_to_response(output)  # type: ignore
-
-            return IOProcessorResponse(request_id=request_id, data=output)
-
-        assert isinstance(request, (PoolingCompletionRequest, PoolingChatRequest))
-        num_prompts = len(engine_inputs)
-
-        # Non-streaming response
-        final_res_batch: list[PoolingRequestOutput | None]
-        final_res_batch = [None] * num_prompts
-        try:
-            async for i, res in result_generator:
-                final_res_batch[i] = res
-
-            assert all(final_res is not None for final_res in final_res_batch)
-
-            final_res_batch_checked = cast(list[PoolingRequestOutput], final_res_batch)
-
-            response = self.request_output_to_pooling_response(
-                final_res_batch_checked,
-                request_id,
-                created_time,
-                model_name,
-                request.encoding_format,
-                request.embed_dtype,
-                request.endianness,
+        if encoding_format == "bytes" or encoding_format == "bytes_only":
+            return self.request_output_to_pooling_bytes_response(
+                ctx.final_res_batch,
+                ctx.request_id,
+                ctx.created_time,
+                ctx.model_name,
+                encoding_format,
+                embed_dtype,
+                endianness,
             )
-        except asyncio.CancelledError:
-            return self.create_error_response("Client disconnected")
 
-        return response
+        assert_never(encoding_format)
 
     def request_output_to_pooling_json_response(
         self,
@@ -254,48 +134,34 @@ def request_output_to_pooling_json_response(
         request_id: str,
         created_time: int,
         model_name: str,
-        encoding_format: Literal["float", "base64"],
+        encoding_format: JsonEncodingFormat,
         embed_dtype: EmbedDType,
         endianness: Endianness,
-    ) -> PoolingResponse:
-        encode_fn = cast(
-            Callable[[PoolingRequestOutput], list[float] | str],
-            (
-                encode_pooling_output_float
-                if encoding_format == "float"
-                else partial(
-                    encode_pooling_output_base64,
-                    embed_dtype=embed_dtype,
-                    endianness=endianness,
-                )
-            ),
+    ) -> JSONResponse:
+        encode_fn = get_pooling_output_encoder(
+            encoding_format=encoding_format,
+            embed_dtype=embed_dtype,
+            endianness=endianness,
         )
 
         items: list[PoolingResponseData] = []
-        num_prompt_tokens = 0
 
         for idx, final_res in enumerate(final_res_batch):
             item = PoolingResponseData(
                 index=idx,
                 data=encode_fn(final_res),
             )
-            prompt_token_ids = final_res.prompt_token_ids
 
             items.append(item)
-            num_prompt_tokens += len(prompt_token_ids)
-
-        usage = UsageInfo(
-            prompt_tokens=num_prompt_tokens,
-            total_tokens=num_prompt_tokens,
-        )
 
-        return PoolingResponse(
+        response = PoolingResponse(
             id=request_id,
             created=created_time,
             model=model_name,
             data=items,
-            usage=usage,
+            usage=get_pooling_usage(final_res_batch),
         )
+        return self.json_response_cls(content=response.model_dump())
 
     def request_output_to_pooling_bytes_response(
         self,
@@ -303,64 +169,16 @@ def request_output_to_pooling_bytes_response(
         request_id: str,
         created_time: int,
         model_name: str,
-        encoding_format: Literal["bytes", "bytes_only"],
+        encoding_format: BytesEncodingFormat,
         embed_dtype: EmbedDType,
         endianness: Endianness,
-    ) -> PoolingBytesResponse:
-        content, items, usage = encode_pooling_bytes(
+    ) -> StreamingResponse:
+        return build_pooling_bytes_streaming_response(
             pooling_outputs=final_res_batch,
+            request_id=request_id,
+            created_time=created_time,
+            model_name=model_name,
+            encoding_format=encoding_format,
             embed_dtype=embed_dtype,
             endianness=endianness,
         )
-
-        headers = (
-            None
-            if encoding_format == "bytes_only"
-            else {
-                "metadata": json.dumps(
-                    {
-                        "id": request_id,
-                        "created": created_time,
-                        "model": model_name,
-                        "data": items,
-                        "usage": usage,
-                    }
-                )
-            }
-        )
-
-        return PoolingBytesResponse(content=content, headers=headers)
-
-    def request_output_to_pooling_response(
-        self,
-        final_res_batch: list[PoolingRequestOutput],
-        request_id: str,
-        created_time: int,
-        model_name: str,
-        encoding_format: EncodingFormat,
-        embed_dtype: EmbedDType,
-        endianness: Endianness,
-    ) -> PoolingResponse | PoolingBytesResponse:
-        if encoding_format == "float" or encoding_format == "base64":
-            return self.request_output_to_pooling_json_response(
-                final_res_batch,
-                request_id,
-                created_time,
-                model_name,
-                encoding_format,
-                embed_dtype,
-                endianness,
-            )
-
-        if encoding_format == "bytes" or encoding_format == "bytes_only":
-            return self.request_output_to_pooling_bytes_response(
-                final_res_batch,
-                request_id,
-                created_time,
-                model_name,
-                encoding_format,
-                embed_dtype,
-                endianness,
-            )
-
-        assert_never(encoding_format)
diff --git a/vllm/entrypoints/pooling/score/protocol.py b/vllm/entrypoints/pooling/score/protocol.py
deleted file mode 100644
index bb633fc28b3c..000000000000
--- a/vllm/entrypoints/pooling/score/protocol.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import time
-from typing import TypeAlias
-
-from pydantic import BaseModel, Field
-
-from vllm import PoolingParams
-from vllm.config import ModelConfig
-from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
-from vllm.entrypoints.pooling.base.protocol import (
-    ClassifyRequestMixin,
-    PoolingBasicRequestMixin,
-)
-from vllm.entrypoints.pooling.score.utils import (
-    ScoreContentPartParam,
-    ScoreInput,
-    ScoreInputs,
-)
-from vllm.renderers import TokenizeParams
-from vllm.tasks import PoolingTask
-from vllm.utils import random_uuid
-
-
-class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            max_total_tokens_param="max_model_len",
-        )
-
-    def to_pooling_params(self, task: PoolingTask = "classify"):
-        return PoolingParams(
-            task=task,
-            use_activation=self.use_activation,
-        )
-
-
-class ScoreDataRequest(ScoreRequestMixin):
-    data_1: ScoreInputs
-    data_2: ScoreInputs
-
-
-class ScoreQueriesDocumentsRequest(ScoreRequestMixin):
-    queries: ScoreInputs
-    documents: ScoreInputs
-
-    @property
-    def data_1(self):
-        return self.queries
-
-    @property
-    def data_2(self):
-        return self.documents
-
-
-class ScoreQueriesItemsRequest(ScoreRequestMixin):
-    queries: ScoreInputs
-    items: ScoreInputs
-
-    @property
-    def data_1(self):
-        return self.queries
-
-    @property
-    def data_2(self):
-        return self.items
-
-
-class ScoreTextRequest(ScoreRequestMixin):
-    text_1: ScoreInputs
-    text_2: ScoreInputs
-
-    @property
-    def data_1(self):
-        return self.text_1
-
-    @property
-    def data_2(self):
-        return self.text_2
-
-
-ScoreRequest: TypeAlias = (
-    ScoreQueriesDocumentsRequest
-    | ScoreQueriesItemsRequest
-    | ScoreDataRequest
-    | ScoreTextRequest
-)
-
-
-class RerankRequest(PoolingBasicRequestMixin, ClassifyRequestMixin):
-    query: ScoreInput
-    documents: ScoreInputs
-    top_n: int = Field(default_factory=lambda: 0)
-
-    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
-        encoder_config = model_config.encoder_config or {}
-
-        return TokenizeParams(
-            max_total_tokens=model_config.max_model_len,
-            max_output_tokens=0,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
-            do_lower_case=encoder_config.get("do_lower_case", False),
-            max_total_tokens_param="max_model_len",
-        )
-
-    def to_pooling_params(self, task: PoolingTask = "classify"):
-        return PoolingParams(
-            task=task,
-            use_activation=self.use_activation,
-        )
-
-
-class RerankDocument(BaseModel):
-    text: str | None = None
-    multi_modal: list[ScoreContentPartParam] | None = None
-
-
-class RerankResult(BaseModel):
-    index: int
-    document: RerankDocument
-    relevance_score: float
-
-
-class RerankUsage(BaseModel):
-    prompt_tokens: int
-    total_tokens: int
-
-
-class RerankResponse(OpenAIBaseModel):
-    id: str
-    model: str
-    usage: RerankUsage
-    results: list[RerankResult]
-
-
-class ScoreResponseData(OpenAIBaseModel):
-    index: int
-    object: str = "score"
-    score: float
-
-
-class ScoreResponse(OpenAIBaseModel):
-    id: str = Field(default_factory=lambda: f"embd-{random_uuid()}")
-    object: str = "list"
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    data: list[ScoreResponseData]
-    usage: UsageInfo
diff --git a/vllm/entrypoints/pooling/score/serving.py b/vllm/entrypoints/pooling/score/serving.py
deleted file mode 100644
index d6b70c7ac4a8..000000000000
--- a/vllm/entrypoints/pooling/score/serving.py
+++ /dev/null
@@ -1,667 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import asyncio
-import time
-from collections.abc import AsyncGenerator, Mapping
-from concurrent.futures import ThreadPoolExecutor
-from typing import Any
-
-from fastapi import Request
-
-from vllm.engine.protocol import EngineClient
-from vllm.entrypoints.logger import RequestLogger
-from vllm.entrypoints.openai.engine.protocol import (
-    ErrorResponse,
-    UsageInfo,
-)
-from vllm.entrypoints.openai.engine.serving import OpenAIServing
-from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.pooling.score.protocol import (
-    RerankDocument,
-    RerankRequest,
-    RerankResponse,
-    RerankResult,
-    RerankUsage,
-    ScoreRequest,
-    ScoreResponse,
-    ScoreResponseData,
-)
-from vllm.entrypoints.pooling.score.utils import (
-    ScoreData,
-    ScoreInputs,
-    _cosine_similarity,
-    compress_token_type_ids,
-    get_score_prompt,
-    parse_score_data_single,
-    validate_score_input,
-)
-from vllm.inputs import EngineInput, TokensPrompt, tokens_input
-from vllm.logger import init_logger
-from vllm.lora.request import LoRARequest
-from vllm.outputs import PoolingRequestOutput, ScoringRequestOutput
-from vllm.tokenizers import TokenizerLike
-from vllm.utils.async_utils import make_async, merge_async_iterators
-from vllm.utils.mistral import is_mistral_tokenizer
-from vllm.v1.pool.late_interaction import (
-    build_late_interaction_doc_params,
-    build_late_interaction_query_params,
-)
-
-logger = init_logger(__name__)
-
-
-class ServingScores(OpenAIServing):
-    def __init__(
-        self,
-        engine_client: EngineClient,
-        models: OpenAIServingModels,
-        *,
-        request_logger: RequestLogger | None,
-        score_template: str | None = None,
-        log_error_stack: bool = False,
-    ) -> None:
-        super().__init__(
-            engine_client=engine_client,
-            models=models,
-            request_logger=request_logger,
-        )
-        self.score_template = score_template
-
-        self._tokenizer_executor = ThreadPoolExecutor(max_workers=1)
-
-        self.score_type = self.model_config.score_type
-        self.architecture = self.model_config.architecture
-        self.is_multimodal_model = self.model_config.is_multimodal_model
-
-        if self.score_type == "cross-encoder":
-            self._score_func = self._cross_encoding_score
-        elif self.score_type == "late-interaction":
-            self._score_func = self._late_interaction_score
-        else:  # "bi-encoder"
-            self._score_func = self._embedding_score
-
-    async def _embedding_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        request: RerankRequest | ScoreRequest,
-        request_id: str,
-        lora_request: LoRARequest | None | None = None,
-        trace_headers: Mapping[str, str] | None = None,
-    ) -> list[PoolingRequestOutput] | ErrorResponse:
-        input_texts: list[str] = []
-        for text in data_1 + data_2:
-            if not isinstance(text, str):
-                raise NotImplementedError(
-                    "Embedding scores currently do not support multimodal input."
-                )
-            input_texts.append(text)
-
-        model_config = self.model_config
-        tokenizer = self.renderer.get_tokenizer()
-
-        encode_async = make_async(
-            tokenizer.encode,
-            executor=self._tokenizer_executor,
-        )
-
-        tokenization_kwargs = request.build_tok_params(model_config).get_encode_kwargs()
-        tokenized_prompts = await asyncio.gather(
-            *(encode_async(t, **tokenization_kwargs) for t in input_texts)
-        )
-
-        engine_inputs: list[EngineInput] = []
-        for tok_result, input_text in zip(tokenized_prompts, input_texts):
-            text_token_prompt = self._validate_input(request, tok_result, input_text)
-
-            engine_inputs.append(
-                tokens_input(
-                    text_token_prompt["prompt_token_ids"],
-                    prompt=input_text,
-                )
-            )
-
-        # Schedule the request and get the result generator.
-        generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-        pooling_params = request.to_pooling_params("embed")
-
-        for i, engine_input in enumerate(engine_inputs):
-            request_id_item = f"{request_id}-{i}"
-
-            self._log_inputs(
-                request_id_item,
-                engine_input,
-                params=pooling_params,
-                lora_request=lora_request,
-            )
-
-            generators.append(
-                self.engine_client.encode(
-                    engine_input,
-                    pooling_params,
-                    request_id_item,
-                    lora_request=lora_request,
-                    trace_headers=trace_headers,
-                    priority=request.priority,
-                )
-            )
-
-        result_generator = merge_async_iterators(*generators)
-
-        # Non-streaming response
-        final_res_batch: list[PoolingRequestOutput] = []
-
-        embeddings: list[PoolingRequestOutput | None] = [None] * len(engine_inputs)
-
-        async for i, res in result_generator:
-            embeddings[i] = res
-
-        emb_data_1: list[PoolingRequestOutput] = []
-        emb_data_2: list[PoolingRequestOutput] = []
-
-        for i in range(0, len(data_1)):
-            assert (emb := embeddings[i]) is not None
-            emb_data_1.append(emb)
-
-        for i in range(len(data_1), len(embeddings)):
-            assert (emb := embeddings[i]) is not None
-            emb_data_2.append(emb)
-
-        if len(emb_data_1) == 1:
-            emb_data_1 = emb_data_1 * len(emb_data_2)
-
-        final_res_batch = _cosine_similarity(
-            tokenizer=tokenizer, embed_1=emb_data_1, embed_2=emb_data_2
-        )
-
-        return final_res_batch
-
-    def _preprocess_late_interaction_item(
-        self,
-        data: ScoreData,
-        role: str,
-        request: RerankRequest | ScoreRequest,
-        tokenizer: TokenizerLike,
-        tokenization_kwargs: dict[str, Any],
-    ) -> TokensPrompt:
-        """Parse a single ScoreData into a text + optional multimodal
-        TokensPrompt for late-interaction encoding.
-
-        For plain strings, tokenises directly.
-        For multimodal content parts, extracts text and multi_modal_data.
-        """
-        model_config = self.model_config
-
-        if isinstance(data, str):
-            text, mm_data, mm_uuids = data, None, None
-        else:
-            text, mm_data, mm_uuids = parse_score_data_single(data, role, model_config)
-
-        prompt_ids = tokenizer.encode(text, **tokenization_kwargs)
-        self._validate_input(request, prompt_ids, text)
-
-        tok_prompt = TokensPrompt(
-            prompt_token_ids=prompt_ids,
-            prompt=text,
-        )
-
-        if mm_data is not None:
-            tok_prompt["multi_modal_data"] = mm_data
-        if mm_uuids is not None:
-            tok_prompt["multi_modal_uuids"] = mm_uuids
-        if request.mm_processor_kwargs is not None:
-            tok_prompt["mm_processor_kwargs"] = request.mm_processor_kwargs
-
-        return tok_prompt
-
-    async def _late_interaction_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        request: RerankRequest | ScoreRequest,
-        request_id: str,
-        lora_request: LoRARequest | None = None,
-        trace_headers: Mapping[str, str] | None = None,
-    ) -> list[PoolingRequestOutput] | ErrorResponse:
-        """
-        Late interaction scoring (ColBERT MaxSim).
-
-        Encodes queries and documents into per-token embeddings, then computes
-        MaxSim: sum over query tokens of max similarity to any document token.
-        """
-        model_config = self.model_config
-        tokenizer = self.renderer.get_tokenizer()
-        tokenization_kwargs = request.build_tok_params(model_config).get_encode_kwargs()
-
-        all_data = data_1 + data_2
-        roles = ["query"] * len(data_1) + ["document"] * len(data_2)
-
-        preprocess_async = make_async(
-            self._preprocess_late_interaction_item,
-            executor=self._tokenizer_executor,
-        )
-
-        tok_prompts = await asyncio.gather(
-            *(
-                preprocess_async(
-                    data=d,
-                    role=r,
-                    request=request,
-                    tokenizer=tokenizer,
-                    tokenization_kwargs=tokenization_kwargs,
-                )
-                for d, r in zip(all_data, roles)
-            )
-        )
-
-        query_prompts = tok_prompts[: len(data_1)]
-        doc_prompts = tok_prompts[len(data_1) :]
-
-        default_pooling_params = request.to_pooling_params("token_embed")
-
-        # stage 1: encode queries and cache token embeddings on workers.
-        query_keys = [f"{request_id}-query-{i}" for i in range(len(query_prompts))]
-        query_uses = [len(doc_prompts) if len(query_prompts) == 1 else 1] * len(
-            query_prompts
-        )
-        query_generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-        for i, tok_prompt in enumerate(query_prompts):
-            request_id_item = f"{request_id}-query-{i}"
-            pooling_params = default_pooling_params.clone()
-            pooling_params.late_interaction_params = (
-                build_late_interaction_query_params(
-                    query_key=query_keys[i],
-                    query_uses=query_uses[i],
-                )
-            )
-
-            self._log_inputs(
-                request_id_item,
-                tok_prompt,
-                params=pooling_params,
-                lora_request=lora_request,
-            )
-
-            query_generators.append(
-                self.engine_client.encode(
-                    tok_prompt,
-                    pooling_params,
-                    request_id_item,
-                    lora_request=lora_request,
-                    trace_headers=trace_headers,
-                    priority=request.priority,
-                )
-            )
-
-        query_outputs: list[PoolingRequestOutput | None] = [None] * len(query_prompts)
-        if query_generators:
-            async for i, res in merge_async_iterators(*query_generators):
-                query_outputs[i] = res
-
-        assert all(res is not None for res in query_outputs)
-        query_results = [res for res in query_outputs if res is not None]
-
-        # stage 2: encode docs and return scalar scores from workers.
-        doc_generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-        for i, tok_prompt in enumerate(doc_prompts):
-            request_id_item = f"{request_id}-doc-{i}"
-            query_idx = 0 if len(query_prompts) == 1 else i
-            pooling_params = default_pooling_params.clone()
-            pooling_params.late_interaction_params = build_late_interaction_doc_params(
-                query_key=query_keys[query_idx]
-            )
-
-            self._log_inputs(
-                request_id_item,
-                tok_prompt,
-                params=pooling_params,
-                lora_request=lora_request,
-            )
-
-            doc_generators.append(
-                self.engine_client.encode(
-                    tok_prompt,
-                    pooling_params,
-                    request_id_item,
-                    lora_request=lora_request,
-                    trace_headers=trace_headers,
-                    priority=request.priority,
-                )
-            )
-
-        doc_outputs: list[PoolingRequestOutput | None] = [None] * len(doc_prompts)
-        if doc_generators:
-            async for i, res in merge_async_iterators(*doc_generators):
-                doc_outputs[i] = res
-
-        assert all(res is not None for res in doc_outputs)
-        doc_results = [res for res in doc_outputs if res is not None]
-
-        scores: list[PoolingRequestOutput] = []
-        padding: list[int] = []
-        if (pad_token_id := tokenizer.pad_token_id) is not None:
-            padding = [pad_token_id]
-
-        if len(query_results) == 1:
-            query_results = query_results * len(doc_results)
-
-        for query_result, doc_result in zip(query_results, doc_results):
-            tokens = (
-                query_result.prompt_token_ids + padding + doc_result.prompt_token_ids
-            )
-
-            scores.append(
-                PoolingRequestOutput(
-                    request_id=f"{query_result.request_id}_{doc_result.request_id}",
-                    outputs=doc_result.outputs,
-                    prompt_token_ids=tokens,
-                    num_cached_tokens=(
-                        query_result.num_cached_tokens + doc_result.num_cached_tokens
-                    ),
-                    finished=True,
-                )
-            )
-
-        return scores
-
-    async def _cross_encoding_score(
-        self,
-        data_1: list[ScoreData],
-        data_2: list[ScoreData],
-        request: RerankRequest | ScoreRequest,
-        request_id: str,
-        lora_request: LoRARequest | None | None = None,
-        trace_headers: Mapping[str, str] | None = None,
-    ) -> list[PoolingRequestOutput] | ErrorResponse:
-        tokenizer = self.renderer.get_tokenizer()
-        if is_mistral_tokenizer(tokenizer):
-            raise ValueError("MistralTokenizer not supported for cross-encoding")
-
-        model_config = self.model_config
-
-        if len(data_1) == 1:
-            data_1 = data_1 * len(data_2)
-
-        tok_kwargs = request.build_tok_params(model_config).get_encode_kwargs()
-        input_pairs = [(t1, t2) for t1, t2 in zip(data_1, data_2)]
-        preprocess_async = make_async(
-            self._preprocess_score,
-            executor=self._tokenizer_executor,
-        )
-        preprocessed_prompts = await asyncio.gather(
-            *(
-                preprocess_async(
-                    request=request,
-                    tokenizer=tokenizer,
-                    tokenization_kwargs=tok_kwargs,
-                    data_1=t1,
-                    data_2=t2,
-                )
-                for t1, t2 in input_pairs
-            )
-        )
-
-        # Schedule the request and get the result generator.
-        generators: list[AsyncGenerator[PoolingRequestOutput, None]] = []
-
-        default_pooling_params = request.to_pooling_params("classify")
-
-        for i, (full_prompt, tok_prompt) in enumerate(preprocessed_prompts):
-            request_id_item = f"{request_id}-{i}"
-
-            self._log_inputs(
-                request_id_item,
-                full_prompt,
-                params=default_pooling_params,
-                lora_request=lora_request,
-            )
-
-            if token_type_ids := tok_prompt.pop("token_type_ids", None):
-                pooling_params = default_pooling_params.clone()
-                compressed = compress_token_type_ids(token_type_ids)
-                pooling_params.extra_kwargs = {"compressed_token_type_ids": compressed}
-            else:
-                pooling_params = default_pooling_params
-
-            generator = self.engine_client.encode(
-                tok_prompt,
-                pooling_params,
-                request_id_item,
-                lora_request=lora_request,
-                trace_headers=trace_headers,
-                priority=request.priority,
-            )
-
-            generators.append(generator)
-
-        result_generator = merge_async_iterators(*generators)
-
-        # Non-streaming response
-        final_res_batch: list[PoolingRequestOutput | None] = [None] * len(
-            preprocessed_prompts
-        )
-
-        async for i, res in result_generator:
-            final_res_batch[i] = res
-
-        return [out for out in final_res_batch if out is not None]
-
-    def _preprocess_score(
-        self,
-        request: RerankRequest | ScoreRequest,
-        tokenizer: TokenizerLike,
-        tokenization_kwargs: dict[str, Any],
-        data_1: ScoreData,
-        data_2: ScoreData,
-    ) -> tuple[str, TokensPrompt]:
-        model_config = self.model_config
-        full_prompt, engine_input = get_score_prompt(
-            model_config=model_config,
-            data_1=data_1,
-            data_2=data_2,
-            tokenizer=tokenizer,
-            tokenization_kwargs=tokenization_kwargs,
-            score_template=self.score_template,
-        )
-        self._validate_input(request, engine_input["prompt_token_ids"], full_prompt)
-        if request.mm_processor_kwargs is not None:
-            engine_input["mm_processor_kwargs"] = request.mm_processor_kwargs
-
-        return full_prompt, engine_input
-
-    async def _run_scoring(
-        self,
-        data_1: ScoreInputs,
-        data_2: ScoreInputs,
-        request: ScoreRequest | RerankRequest,
-        request_id: str,
-        raw_request: Request | None = None,
-    ) -> list[PoolingRequestOutput] | ErrorResponse:
-        lora_request = self._maybe_get_adapters(request)
-
-        trace_headers = (
-            None
-            if raw_request is None
-            else await self._get_trace_headers(raw_request.headers)
-        )
-
-        score_data_1, score_data_2 = validate_score_input(
-            data_1,
-            data_2,
-            is_multimodal_model=self.is_multimodal_model,
-            architecture=self.architecture,
-        )
-
-        return await self._score_func(
-            data_1=score_data_1,
-            data_2=score_data_2,
-            request=request,
-            request_id=request_id,
-            lora_request=lora_request,
-            trace_headers=trace_headers,
-        )
-
-    async def create_score(
-        self,
-        request: ScoreRequest,
-        raw_request: Request | None = None,
-    ) -> ScoreResponse | ErrorResponse:
-        """
-        Score API similar to Sentence Transformers cross encoder
-
-        See https://sbert.net/docs/package_reference/cross_encoder
-        """
-        error_check_ret = await self._check_model(request)
-        if error_check_ret is not None:
-            return error_check_ret
-
-        request_id = f"score-{self._base_request_id(raw_request)}"
-        created_time = int(time.time())
-
-        try:
-            final_res_batch = await self._run_scoring(
-                request.data_1,
-                request.data_2,
-                request,
-                request_id,
-                raw_request,
-            )
-            if isinstance(final_res_batch, ErrorResponse):
-                return final_res_batch
-
-            return self.request_output_to_score_response(
-                final_res_batch,
-                request_id,
-                created_time,
-                self.models.model_name(),
-            )
-        except asyncio.CancelledError:
-            return self.create_error_response("Client disconnected")
-
-    async def do_rerank(
-        self, request: RerankRequest, raw_request: Request | None = None
-    ) -> RerankResponse | ErrorResponse:
-        """
-        Rerank API based on JinaAI's rerank API; implements the same
-        API interface. Designed for compatibility with off-the-shelf
-        tooling, since this is a common standard for reranking APIs
-
-        See example client implementations at
-        https://github.com/infiniflow/ragflow/blob/main/rag/llm/rerank_model.py
-        numerous clients use this standard.
-        """
-        error_check_ret = await self._check_model(request)
-        if error_check_ret is not None:
-            return error_check_ret
-
-        request_id = f"rerank-{self._base_request_id(raw_request)}"
-        documents = request.documents
-
-        try:
-            final_res_batch = await self._run_scoring(
-                request.query,
-                documents,
-                request,
-                request_id,
-                raw_request,
-            )
-            if isinstance(final_res_batch, ErrorResponse):
-                return final_res_batch
-
-            top_n = request.top_n if request.top_n > 0 else len(final_res_batch)
-
-            return self.request_output_to_rerank_response(
-                final_res_batch,
-                request_id,
-                self.models.model_name(),
-                documents,
-                top_n,
-            )
-        except asyncio.CancelledError:
-            return self.create_error_response("Client disconnected")
-
-    def request_output_to_score_response(
-        self,
-        final_res_batch: list[PoolingRequestOutput],
-        request_id: str,
-        created_time: int,
-        model_name: str,
-    ) -> ScoreResponse:
-        items: list[ScoreResponseData] = []
-        num_prompt_tokens = 0
-
-        for idx, final_res in enumerate(final_res_batch):
-            classify_res = ScoringRequestOutput.from_base(final_res)
-
-            item = ScoreResponseData(
-                index=idx,
-                score=classify_res.outputs.score,
-            )
-            prompt_token_ids = final_res.prompt_token_ids
-
-            items.append(item)
-            num_prompt_tokens += len(prompt_token_ids)
-
-        usage = UsageInfo(
-            prompt_tokens=num_prompt_tokens,
-            total_tokens=num_prompt_tokens,
-        )
-
-        return ScoreResponse(
-            id=request_id,
-            created=created_time,
-            model=model_name,
-            data=items,
-            usage=usage,
-        )
-
-    def request_output_to_rerank_response(
-        self,
-        final_res_batch: list[PoolingRequestOutput],
-        request_id: str,
-        model_name: str,
-        documents: ScoreInputs,
-        top_n: int,
-    ) -> RerankResponse:
-        """
-        Convert the output of do_rank to a RerankResponse
-        """
-
-        if not isinstance(documents, list):
-            documents = [documents]
-
-        results: list[RerankResult] = []
-        num_prompt_tokens = 0
-        for idx, final_res in enumerate(final_res_batch):
-            classify_res = ScoringRequestOutput.from_base(final_res)
-
-            document = documents[idx]
-            if isinstance(document, str):
-                rerank_document = RerankDocument(text=document)
-            else:
-                rerank_document = RerankDocument(
-                    multi_modal=document.get("content", [])
-                )
-
-            result = RerankResult(
-                index=idx,
-                document=rerank_document,
-                relevance_score=classify_res.outputs.score,
-            )
-            results.append(result)
-            prompt_token_ids = final_res.prompt_token_ids
-            num_prompt_tokens += len(prompt_token_ids)
-
-        # sort by relevance, then return the top n if set
-        results.sort(key=lambda x: x.relevance_score, reverse=True)
-        if top_n < len(documents):
-            results = results[:top_n]
-
-        return RerankResponse(
-            id=request_id,
-            model=model_name,
-            results=results,
-            usage=RerankUsage(
-                total_tokens=num_prompt_tokens, prompt_tokens=num_prompt_tokens
-            ),
-        )
diff --git a/vllm/entrypoints/pooling/scoring/__init__.py b/vllm/entrypoints/pooling/scoring/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/pooling/score/api_router.py b/vllm/entrypoints/pooling/scoring/api_router.py
similarity index 76%
rename from vllm/entrypoints/pooling/score/api_router.py
rename to vllm/entrypoints/pooling/scoring/api_router.py
index a9a8641e9214..cf583293eace 100644
--- a/vllm/entrypoints/pooling/score/api_router.py
+++ b/vllm/entrypoints/pooling/scoring/api_router.py
@@ -3,21 +3,15 @@
 from http import HTTPStatus
 
 from fastapi import APIRouter, Depends, Request
-from fastapi.responses import JSONResponse
-from typing_extensions import assert_never
 
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.utils import validate_json_request
-from vllm.entrypoints.pooling.score.protocol import (
-    RerankRequest,
-    RerankResponse,
-    ScoreRequest,
-    ScoreResponse,
-)
-from vllm.entrypoints.pooling.score.serving import ServingScores
 from vllm.entrypoints.utils import load_aware_call, with_cancellation
 from vllm.logger import init_logger
 
+from .protocol import RerankRequest, ScoreRequest
+from .serving import ServingScores
+
 router = APIRouter()
 
 logger = init_logger(__name__)
@@ -46,16 +40,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
     if handler is None:
         raise NotImplementedError("The model does not support Score API")
 
-    generator = await handler.create_score(request, raw_request)
-
-    if isinstance(generator, ErrorResponse):
-        return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
-        )
-    elif isinstance(generator, ScoreResponse):
-        return JSONResponse(content=generator.model_dump())
-
-    assert_never(generator)
+    return await handler(request, raw_request)
 
 
 @router.post(
@@ -92,16 +77,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
     if handler is None:
         raise NotImplementedError("The model does not support Rerank (Score) API")
 
-    generator = await handler.do_rerank(request, raw_request)
-
-    if isinstance(generator, ErrorResponse):
-        return JSONResponse(
-            content=generator.model_dump(), status_code=generator.error.code
-        )
-    elif isinstance(generator, RerankResponse):
-        return JSONResponse(content=generator.model_dump())
-
-    assert_never(generator)
+    return await handler(request, raw_request)
 
 
 @router.post(
diff --git a/vllm/entrypoints/pooling/scoring/io_processor.py b/vllm/entrypoints/pooling/scoring/io_processor.py
new file mode 100644
index 000000000000..7c58e7e9aba9
--- /dev/null
+++ b/vllm/entrypoints/pooling/scoring/io_processor.py
@@ -0,0 +1,739 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import time
+from collections.abc import Sequence
+from typing import Any, TypeAlias
+
+import torch.nn.functional as F
+
+from vllm import PoolingParams, PoolingRequestOutput, TokensPrompt
+from vllm.inputs import EngineInput
+from vllm.renderers import TokenizeParams
+from vllm.renderers.hf import safe_apply_chat_template
+from vllm.tasks import PoolingTask
+from vllm.utils.mistral import is_mistral_tokenizer
+
+from ...chat_utils import ChatTemplateResolutionError
+from ..base.io_processor import PoolingIOProcessor
+from ..typing import (
+    OfflineInputsContext,
+    OfflineOutputsContext,
+    PoolingServeContext,
+)
+from .protocol import RerankRequest, ScoreRequest, ScoringRequest
+from .typing import ScoreData, ScoreInput, ScoringData
+from .utils import (
+    compress_token_type_ids,
+    compute_maxsim_score,
+    get_num_special_tokens_for_pair,
+    parse_score_data,
+    score_data_to_prompts,
+    truncate_text_to_tokens,
+    validate_score_input,
+)
+
+ScoringServeContext: TypeAlias = PoolingServeContext[ScoringRequest]
+
+
+class ScoringIOProcessor(PoolingIOProcessor):
+    name: str
+    pooling_task: PoolingTask
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.tokenizer = self.renderer.get_tokenizer()
+        self.architecture = self.model_config.architecture
+        self.is_multimodal_model = self.model_config.is_multimodal_model
+        self.pad_token_id = self.tokenizer.pad_token_id
+
+    def create_pooling_params(self, request):
+        return request.to_pooling_params(self.pooling_task)
+
+    def _validate_token_limit(self, value: int, name: str) -> None:
+        if value < 0:
+            raise ValueError(f"{name} must be a non-negative integer")
+        if value >= self.model_config.max_model_len:
+            raise ValueError(
+                f"{name} ({value}) must be less "
+                f"than max_model_len ({self.model_config.max_model_len})."
+            )
+
+    def _get_token_limits(
+        self,
+        request: ScoringRequest | None = None,
+        pooling_params: PoolingParams | None = None,
+    ) -> tuple[int, int]:
+        """Extract and validate token limits from request or pooling_params."""
+        if request is not None:
+            max_tokens_per_query = getattr(request, "max_tokens_per_query", 0)
+            max_tokens_per_doc = getattr(request, "max_tokens_per_doc", 0)
+        else:
+            extra = (
+                (pooling_params.extra_kwargs or {})
+                if pooling_params is not None
+                else {}
+            )
+            max_tokens_per_query = extra.get("max_tokens_per_query", 0)
+            max_tokens_per_doc = extra.get("max_tokens_per_doc", 0)
+
+        if max_tokens_per_query != 0:
+            self._validate_token_limit(max_tokens_per_query, "max_tokens_per_query")
+        if max_tokens_per_doc != 0:
+            self._validate_token_limit(max_tokens_per_doc, "max_tokens_per_doc")
+        return max_tokens_per_query, max_tokens_per_doc
+
+    def _truncate_scoring_data(
+        self,
+        scoring_data: ScoringData,
+        max_tokens_per_query: int = 0,
+        max_tokens_per_doc: int = 0,
+    ) -> ScoringData:
+        """Truncate query/document texts to token limits."""
+        data_1 = scoring_data.data_1
+        data_2 = scoring_data.data_2
+        if max_tokens_per_query > 0:
+            data_1 = [
+                truncate_text_to_tokens(d, self.tokenizer, max_tokens_per_query)
+                if isinstance(d, str)
+                else d
+                for d in data_1
+            ]
+        if max_tokens_per_doc > 0:
+            data_2 = [
+                truncate_text_to_tokens(d, self.tokenizer, max_tokens_per_doc)
+                if isinstance(d, str)
+                else d
+                for d in data_2
+            ]
+        return ScoringData(data_1=data_1, data_2=data_2)
+
+    def valid_inputs(
+        self,
+        data_1: ScoreInput | list[ScoreInput],
+        data_2: ScoreInput | list[ScoreInput],
+    ) -> ScoringData:
+        scoring_data = validate_score_input(
+            data_1,
+            data_2,
+            is_multimodal_model=self.is_multimodal_model,
+            architecture=self.architecture,
+        )
+        return scoring_data
+
+
+class BiEncoderIOProcessor(ScoringIOProcessor):
+    name = "bi-encoder"
+    pooling_task: PoolingTask = "embed"
+
+    #######################################
+    # online APIs
+
+    def pre_process_online(self, ctx: ScoringServeContext):
+        request = ctx.request
+
+        if isinstance(request, ScoreRequest):
+            data_1 = request.data_1
+            data_2 = request.data_2
+        elif isinstance(request, RerankRequest):
+            data_1 = request.query
+            data_2 = request.documents
+        else:
+            raise ValueError(f"Invalid {self.name} request type")
+
+        scoring_data = self.valid_inputs(data_1, data_2)
+
+        max_tokens_per_query, max_tokens_per_doc = self._get_token_limits(
+            request=request
+        )
+        if max_tokens_per_query > 0 or max_tokens_per_doc > 0:
+            scoring_data = self._truncate_scoring_data(
+                scoring_data, max_tokens_per_query, max_tokens_per_doc
+            )
+
+        tok_params = request.build_tok_params(self.model_config)
+        engine_inputs = self._pre_process(
+            scoring_data,
+            tok_params,
+            prompt_extras={
+                k: v
+                for k in ("mm_processor_kwargs", "cache_salt", "chat_template_kwargs")
+                if (v := getattr(request, k, None)) is not None
+            },
+        )
+
+        ctx.engine_inputs = engine_inputs
+        ctx.n_queries = len(scoring_data.data_1)
+
+    def post_process_online(
+        self,
+        ctx: ScoringServeContext,
+    ):
+        assert ctx.final_res_batch is not None
+        assert isinstance(ctx.n_queries, int)
+
+        ctx.final_res_batch = self._post_process(
+            outputs=ctx.final_res_batch, n_queries=ctx.n_queries
+        )
+
+    #######################################
+    # offline APIs
+
+    def pre_process_offline(self, ctx: OfflineInputsContext) -> Sequence[EngineInput]:
+        assert isinstance(ctx.prompts, ScoringData)
+        assert not isinstance(ctx.pooling_params, Sequence)
+
+        tok_params = self.renderer.default_cmpl_tok_params.with_kwargs(
+            **(ctx.tokenization_kwargs or {})
+        )
+
+        max_tokens_per_query, max_tokens_per_doc = self._get_token_limits(
+            pooling_params=ctx.pooling_params
+        )
+
+        scoring_data = ctx.prompts
+        if max_tokens_per_query > 0 or max_tokens_per_doc > 0:
+            scoring_data = self._truncate_scoring_data(
+                scoring_data, max_tokens_per_query, max_tokens_per_doc
+            )
+
+        return self._pre_process(scoring_data, tok_params)
+
+    def post_process_offline(
+        self,
+        ctx: OfflineOutputsContext,
+    ) -> list[PoolingRequestOutput]:
+        assert ctx.n_queries is not None
+        return self._post_process(outputs=ctx.outputs, n_queries=ctx.n_queries)
+
+    #######################################
+    # helpers
+
+    def _pre_process(
+        self,
+        scoring_data: ScoringData,
+        tok_params: TokenizeParams,
+        prompt_extras: dict[str, Any] | None = None,
+    ) -> Sequence[EngineInput]:
+        data_1 = score_data_to_prompts(scoring_data.data_1, "query", self.model_config)
+        data_2 = score_data_to_prompts(
+            scoring_data.data_2, "document", self.model_config
+        )
+
+        return self._preprocess_cmpl_offline(
+            prompts=data_1 + data_2, tok_params=tok_params, prompt_extras=prompt_extras
+        )
+
+    def _post_process(self, outputs: list[PoolingRequestOutput], n_queries: int):
+        emb_data_1 = outputs[:n_queries]
+        emb_data_2 = outputs[n_queries:]
+
+        if len(emb_data_1) == 1:
+            emb_data_1 = emb_data_1 * len(emb_data_2)
+
+        final_res_batch: list[PoolingRequestOutput] = []
+        for emb_1, emb_2 in zip(emb_data_1, emb_data_2):
+            pair_score = F.cosine_similarity(
+                emb_1.outputs.data.float(), emb_2.outputs.data.float(), dim=0
+            )
+
+            padding: list[int] = []
+            if self.pad_token_id is not None:
+                padding = [self.pad_token_id]
+
+            tokens = emb_1.prompt_token_ids + padding + emb_2.prompt_token_ids
+
+            final_res_batch.append(
+                PoolingRequestOutput(
+                    request_id=f"{emb_1.request_id}_{emb_2.request_id}",
+                    outputs=pair_score,
+                    prompt_token_ids=tokens,
+                    num_cached_tokens=emb_1.num_cached_tokens + emb_2.num_cached_tokens,
+                    finished=True,
+                )
+            )
+        return final_res_batch
+
+
+class LateInteractionIOProcessor(BiEncoderIOProcessor):
+    name = "late-interaction"
+    pooling_task: PoolingTask = "token_embed"
+
+    def _post_process(self, outputs: list[PoolingRequestOutput], n_queries: int):
+        # Split into query and document embeddings
+        emb_data_1 = outputs[:n_queries]
+        emb_data_2 = outputs[n_queries:]
+
+        # Expand queries if 1:N scoring
+        if len(emb_data_1) == 1:
+            emb_data_1 = emb_data_1 * len(emb_data_2)
+
+        final_res_batch: list[PoolingRequestOutput] = []
+        padding: list[int] = []
+        if (pad_token_id := self.pad_token_id) is not None:
+            padding = [pad_token_id]
+
+        # Compute MaxSim scores
+        for emb_1, emb_2 in zip(emb_data_1, emb_data_2):
+            # emb_1.outputs.data: [query_len, dim]
+            # emb_2.outputs.data: [doc_len, dim]
+            q_emb = emb_1.outputs.data
+            d_emb = emb_2.outputs.data
+
+            maxsim_score = compute_maxsim_score(q_emb, d_emb)
+
+            tokens = emb_1.prompt_token_ids + padding + emb_2.prompt_token_ids
+
+            final_res_batch.append(
+                PoolingRequestOutput(
+                    request_id=f"{emb_1.request_id}_{emb_2.request_id}",
+                    outputs=maxsim_score,
+                    prompt_token_ids=tokens,
+                    num_cached_tokens=emb_1.num_cached_tokens + emb_2.num_cached_tokens,
+                    finished=True,
+                )
+            )
+        return final_res_batch
+
+
+class FlashLateInteractionIOProcessor(LateInteractionIOProcessor):
+    name = "flash-late-interaction"
+
+    def post_process_online(
+        self,
+        ctx: ScoringServeContext,
+    ):
+        assert ctx.query_final_res_batch is not None
+        assert ctx.final_res_batch is not None
+        assert isinstance(ctx.n_queries, int)
+
+        # Expand queries if 1:N scoring
+        if len(ctx.query_final_res_batch) == 1:
+            ctx.query_final_res_batch = ctx.query_final_res_batch * len(
+                ctx.final_res_batch
+            )
+
+        final_res_batch: list[PoolingRequestOutput] = []
+        for d1, d2 in zip(ctx.query_final_res_batch, ctx.final_res_batch):
+            padding: list[int] = []
+            if (pad_token_id := self.pad_token_id) is not None:
+                padding = [pad_token_id]
+
+            tokens = d1.prompt_token_ids + padding + d2.prompt_token_ids
+
+            final_res_batch.append(
+                PoolingRequestOutput(
+                    request_id=f"{d1.request_id}_{d2.request_id}",
+                    outputs=d2.outputs,
+                    prompt_token_ids=tokens,
+                    num_cached_tokens=d1.num_cached_tokens + d2.num_cached_tokens,
+                    finished=True,
+                )
+            )
+        ctx.final_res_batch = final_res_batch
+
+
+class CrossEncoderIOProcessor(ScoringIOProcessor):
+    name = "cross-encoder"
+    pooling_task: PoolingTask = "classify"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        if is_mistral_tokenizer(self.tokenizer):
+            raise ValueError("MistralTokenizer not supported for cross-encoding")
+
+        from vllm.model_executor.model_loader import get_model_cls
+        from vllm.model_executor.models.interfaces import supports_score_template
+
+        model = get_model_cls(self.model_config)
+        self.supports_score_template = supports_score_template(model)
+        self.model = model if self.supports_score_template else None
+        self.use_sep_token = self.model_config.use_sep_token
+
+    #######################################
+    # online APIs
+
+    def pre_process_online(self, ctx: ScoringServeContext):
+        request = ctx.request
+
+        if isinstance(request, ScoreRequest):
+            data_1 = request.data_1
+            data_2 = request.data_2
+        elif isinstance(request, RerankRequest):
+            data_1 = request.query
+            data_2 = request.documents
+        else:
+            raise ValueError(f"Invalid {self.name} request type")
+
+        scoring_data = self.valid_inputs(data_1, data_2)
+
+        max_tokens_per_query, max_tokens_per_doc = self._get_token_limits(
+            request=request
+        )
+
+        tok_params = request.build_tok_params(self.model_config)
+        pooling_params = self.create_pooling_params(request)
+
+        engine_inputs, pooling_params_list = self._pre_process(
+            scoring_data,
+            tok_params,
+            pooling_params,
+            chat_template=self.chat_template,
+            max_tokens_per_query=max_tokens_per_query,
+            max_tokens_per_doc=max_tokens_per_doc,
+            prompt_extras={
+                k: v
+                for k in ("mm_processor_kwargs", "cache_salt", "chat_template_kwargs")
+                if (v := getattr(request, k, None)) is not None
+            },
+        )
+
+        ctx.engine_inputs = engine_inputs
+        ctx.pooling_params = pooling_params_list
+
+    #######################################
+    # offline APIs
+
+    def pre_process_offline(self, ctx: OfflineInputsContext) -> Sequence[EngineInput]:
+        assert isinstance(ctx.prompts, ScoringData)
+        assert not isinstance(ctx.pooling_params, Sequence)
+
+        tok_params = self.renderer.default_cmpl_tok_params.with_kwargs(
+            **(ctx.tokenization_kwargs or {})
+        )
+
+        max_tokens_per_query, max_tokens_per_doc = self._get_token_limits(
+            pooling_params=ctx.pooling_params
+        )
+
+        prompt_extras = ctx.pooling_params.extra_kwargs if ctx.pooling_params else None
+        engine_inputs, pooling_params_list = self._pre_process(
+            ctx.prompts,
+            tok_params,
+            ctx.pooling_params,
+            ctx.chat_template,
+            max_tokens_per_query=max_tokens_per_query,
+            max_tokens_per_doc=max_tokens_per_doc,
+            prompt_extras=prompt_extras,
+        )
+        ctx.pooling_params = pooling_params_list
+        return engine_inputs
+
+    #######################################
+    # helpers
+
+    def _pre_process(
+        self,
+        scoring_data: ScoringData,
+        tok_params: TokenizeParams,
+        pooling_params: PoolingParams | None,
+        chat_template: str | None = None,
+        max_tokens_per_query: int = 0,
+        max_tokens_per_doc: int = 0,
+        prompt_extras: dict[str, Any] | None = None,
+    ) -> tuple[Sequence[EngineInput], list[PoolingParams]]:
+        # todo: support prompt_extras
+        arrival_time = time.time()
+
+        data_1 = scoring_data.data_1
+        data_2 = scoring_data.data_2
+
+        if len(data_1) == 1:
+            data_1 = data_1 * len(data_2)
+
+        if pooling_params is None:
+            pooling_params = PoolingParams(task="classify")
+
+        pooling_params_list = list[PoolingParams]()
+        engine_inputs = list[EngineInput]()
+        for q, d in zip(data_1, data_2):
+            _, engine_prompt = self.get_score_prompt(
+                data_1=q,
+                data_2=d,
+                encode_kwargs=tok_params.get_encode_kwargs(),
+                chat_template=chat_template,
+                max_tokens_per_query=max_tokens_per_query,
+                max_tokens_per_doc=max_tokens_per_doc,
+                chat_template_kwargs=prompt_extras.get("chat_template_kwargs")
+                if prompt_extras
+                else None,
+            )
+
+            if token_type_ids := engine_prompt.pop("token_type_ids", None):
+                params = pooling_params.clone()
+                compressed = compress_token_type_ids(token_type_ids)
+                params.extra_kwargs = {"compressed_token_type_ids": compressed}
+                pooling_params_list.append(params)
+            else:
+                pooling_params_list.append(pooling_params)
+
+            tok_params.apply_post_tokenization(self.tokenizer, engine_prompt)
+            engine_inputs.append(
+                self.renderer.process_for_engine(engine_prompt, arrival_time)
+            )
+        return engine_inputs, pooling_params_list
+
+    def get_score_prompt(
+        self,
+        data_1: ScoreData,
+        data_2: ScoreData,
+        encode_kwargs: dict[str, Any],
+        chat_template: str | None = None,
+        max_tokens_per_query: int = 0,
+        max_tokens_per_doc: int = 0,
+        chat_template_kwargs: dict[str, Any] | None = None,
+    ):
+        model_config = self.model_config
+        tokenizer = self.tokenizer
+
+        prompt_1, prompt_2, mm_data, mm_uuids = parse_score_data(
+            data_1,
+            data_2,
+            model_config,
+        )
+
+        # Apply truncation before defining closures
+        if max_tokens_per_query > 0 and isinstance(prompt_1, str):
+            prompt_1 = truncate_text_to_tokens(
+                prompt_1, tokenizer, max_tokens_per_query
+            )
+        if max_tokens_per_doc > 0 and isinstance(prompt_2, str):
+            prompt_2 = truncate_text_to_tokens(prompt_2, tokenizer, max_tokens_per_doc)
+
+        def default_tokenizer_encode():
+            local_kwargs = encode_kwargs.copy()
+
+            if self.supports_score_template:
+                assert self.model is not None
+                full_prompt = self.model.get_score_template(prompt_1, prompt_2)
+                if full_prompt is None:
+                    raise ValueError("Get empty score template from model")
+
+                prompt_inputs = tokenizer(full_prompt, **local_kwargs)
+            else:
+                if self.use_sep_token:
+                    # cross_encoder models defaults to using separating token.
+                    if max_tokens_per_doc > 0 and isinstance(prompt_2, str):
+                        query_tokens = tokenizer.encode(
+                            prompt_1, add_special_tokens=False
+                        )
+                        num_special = get_num_special_tokens_for_pair(tokenizer)
+                        doc_limit_max_length = (
+                            len(query_tokens) + max_tokens_per_doc + num_special
+                        )
+                        existing_max_length = local_kwargs.get("max_length")
+                        if existing_max_length is not None:
+                            effective_max_length = min(
+                                doc_limit_max_length, existing_max_length
+                            )
+                        else:
+                            effective_max_length = doc_limit_max_length
+                        local_kwargs["truncation"] = "only_second"
+                        local_kwargs["max_length"] = effective_max_length
+
+                    prompt_inputs = tokenizer(
+                        text=prompt_1, text_pair=prompt_2, **local_kwargs
+                    )
+                    full_prompt = tokenizer.decode(prompt_inputs["input_ids"])
+                else:
+                    # `llm as reranker` defaults to not using separating token.
+                    if max_tokens_per_doc > 0 and isinstance(prompt_2, str):
+                        query_ids = tokenizer.encode(prompt_1, add_special_tokens=False)
+                        doc_ids = tokenizer.encode(prompt_2, add_special_tokens=False)
+                        doc_ids = doc_ids[:max_tokens_per_doc]
+                        input_ids = query_ids + doc_ids
+                        full_prompt = tokenizer.decode(input_ids)
+                        prompt_inputs = {"input_ids": input_ids}
+                    else:
+                        full_prompt = prompt_1 + prompt_2
+                        prompt_inputs = tokenizer(text=full_prompt, **local_kwargs)
+            return full_prompt, prompt_inputs
+
+        # FIXME: For now, we only apply a template when one is explicitly provided.
+        # We cannot rely on the tokenizer's chat template because many models
+        # inherit junk templates from their base LLM, which breaks both the models
+        # and the tests that use them.
+        if chat_template is None:
+            full_prompt, prompt_inputs = default_tokenizer_encode()
+        else:
+            # FIXME:
+            # Try applying a score template from the CLI arg or tokenizer_config.json
+            # If that fails because there is no such template,
+            # fall back to the default implementation.
+            try:
+                _safe_kwargs = chat_template_kwargs or {}
+                _reserved = {"chat_template", "tools", "tokenize"}
+                _unexpected = _reserved & _safe_kwargs.keys()
+                if _unexpected:
+                    raise ValueError(
+                        "chat_template_kwargs contains reserved keys that "
+                        f"conflict with fixed scorer arguments: {_unexpected}"
+                    )
+                full_prompt = safe_apply_chat_template(
+                    model_config,
+                    tokenizer,
+                    [
+                        {"role": "query", "content": prompt_1},
+                        {"role": "document", "content": prompt_2},
+                    ],
+                    chat_template=chat_template,
+                    tools=None,
+                    tokenize=False,
+                    **_safe_kwargs,
+                )
+                prompt_inputs = tokenizer(full_prompt, **encode_kwargs)
+            except ChatTemplateResolutionError:
+                full_prompt, prompt_inputs = default_tokenizer_encode()
+
+        engine_prompt = TokensPrompt(prompt_token_ids=prompt_inputs["input_ids"])
+
+        if (token_type_ids := prompt_inputs.get("token_type_ids")) is not None:
+            engine_prompt["token_type_ids"] = token_type_ids
+
+        if self.model is not None:
+            self.model.post_process_tokens(engine_prompt)
+
+        if mm_data is not None:
+            engine_prompt["multi_modal_data"] = mm_data
+        if mm_uuids is not None:
+            engine_prompt["multi_modal_uuids"] = mm_uuids
+
+        return full_prompt, engine_prompt
+
+
+class JinaRankingIOProcessorMixin:
+    @staticmethod
+    def sanitize_input(text: str, special_tokens: dict[str, str]) -> str:
+        for token in special_tokens.values():
+            text = text.replace(token, "")
+        return text
+
+    @staticmethod
+    def format_docs_prompts_func(
+        query: str,
+        docs: list[str],
+        special_tokens: dict[str, str] | None = None,
+        instruction: str | None = None,
+        no_thinking: bool = True,
+    ) -> str:
+        # TODO: Try converting the code below into a chat template.
+
+        default_special_tokens = {
+            "query_embed_token": "<|rerank_token|>",
+            "doc_embed_token": "<|embed_token|>",
+        }
+        if special_tokens is None:
+            special_tokens = default_special_tokens
+
+        query = JinaRankingIOProcessorMixin.sanitize_input(query, special_tokens)
+        docs = [
+            JinaRankingIOProcessorMixin.sanitize_input(doc, special_tokens)
+            for doc in docs
+        ]
+
+        prefix = (
+            "<|im_start|>system\n"
+            "You are a search relevance expert who can determine a ranking of the passages based on how relevant they are to the query. "  # noqa: E501
+            "If the query is a question, how relevant a passage is depends on how well it answers the question. "  # noqa: E501
+            "If not, try to analyze the intent of the query and assess how well each passage satisfies the intent. "  # noqa: E501
+            "If an instruction is provided, you should follow the instruction when determining the ranking."  # noqa: E501
+            "<|im_end|>\n<|im_start|>user\n"
+        )
+        suffix = "<|im_end|>\n<|im_start|>assistant\n"
+        if no_thinking:
+            suffix += "<think>\n\n</think>\n\n"
+
+        doc_emb_token = special_tokens["doc_embed_token"]
+        query_emb_token = special_tokens["query_embed_token"]
+
+        prompt = (
+            f"I will provide you with {len(docs)} passages, each indicated by a numerical identifier. "  # noqa: E501
+            f"Rank the passages based on their relevance to query: {query}\n"
+        )
+
+        if instruction:
+            prompt += f"<instruct>\n{instruction}\n</instruct>\n"
+
+        doc_prompts = [
+            f'<passage id="{i}">\n{doc}{doc_emb_token}\n</passage>'
+            for i, doc in enumerate(docs)
+        ]
+        prompt += "\n".join(doc_prompts) + "\n"
+        prompt += f"<query>\n{query}{query_emb_token}\n</query>"
+
+        return prefix + prompt + suffix
+
+    @staticmethod
+    def ensure_str(data: Sequence[Any]) -> list[str]:
+        text: list[str] = []
+        for prompt in data:
+            if not isinstance(prompt, str):
+                raise ValueError(
+                    "The JinaForRanking model only supports text as input."
+                )
+            text.append(prompt)
+        return text
+
+
+class JinaRankingIOProcessor(LateInteractionIOProcessor, JinaRankingIOProcessorMixin):
+    name = "jina-reranking-scoring"
+    pooling_task: PoolingTask = "token_embed"
+
+    def _pre_process(
+        self,
+        scoring_data: ScoringData,
+        tok_params: TokenizeParams,
+        prompt_extras: dict[str, Any] | None = None,
+    ) -> Sequence[EngineInput]:
+        queries = self.ensure_str(scoring_data.data_1)
+        docs = self.ensure_str(scoring_data.data_2)
+
+        if len(queries) == 1:
+            prompts = [self.format_docs_prompts_func(query=queries[0], docs=docs)]
+        else:
+            prompts = [
+                self.format_docs_prompts_func(query=q, docs=[d])
+                for q, d in zip(queries, docs)
+            ]
+
+        return self._preprocess_cmpl_offline(
+            prompts=prompts, tok_params=tok_params, prompt_extras=prompt_extras
+        )
+
+    def _post_process(self, outputs: list[PoolingRequestOutput], n_queries: int):
+        final_res_batch: list[PoolingRequestOutput] = []
+
+        for i in range(len(outputs)):
+            embeds = outputs[i].outputs.data.float()
+
+            # The JinaForRanking model concatenates docs first, then query.
+            # Let's stay consistent with this novel design.
+            query_embeds = embeds[-1]
+            doc_embeds = embeds[:-1]
+
+            scores = F.cosine_similarity(query_embeds, doc_embeds)
+
+            for score in scores:
+                final_res_batch.append(
+                    PoolingRequestOutput(
+                        request_id=outputs[i].request_id,
+                        outputs=score,
+                        prompt_token_ids=outputs[i].prompt_token_ids,
+                        num_cached_tokens=outputs[i].num_cached_tokens,
+                        finished=True,
+                    )
+                )
+        return final_res_batch
+
+
+ScoringIOProcessors: dict[str, type[ScoringIOProcessor]] = {
+    p.name: p
+    for p in [
+        BiEncoderIOProcessor,
+        LateInteractionIOProcessor,
+        JinaRankingIOProcessor,
+        FlashLateInteractionIOProcessor,
+        CrossEncoderIOProcessor,
+    ]
+}
diff --git a/vllm/entrypoints/pooling/scoring/protocol.py b/vllm/entrypoints/pooling/scoring/protocol.py
new file mode 100644
index 000000000000..49cce7a4ee23
--- /dev/null
+++ b/vllm/entrypoints/pooling/scoring/protocol.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import time
+from typing import Any, TypeAlias
+
+from pydantic import BaseModel, Field, model_validator
+
+from vllm import PoolingParams
+from vllm.config import ModelConfig
+from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
+from vllm.renderers import TokenizeParams
+from vllm.tasks import PoolingTask
+from vllm.utils import random_uuid
+
+from ..base.protocol import ClassifyRequestMixin, PoolingBasicRequestMixin
+from .typing import ScoreContentPartParam, ScoreInput
+
+
+class ScoringRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
+    # --8<-- [start:scoring-common-params]
+    max_tokens_per_query: int = Field(
+        default=0,
+        description=(
+            "Maximum number of tokens per query. Queries longer than "
+            "this will be truncated to this length. 0 means no "
+            "query-level truncation is applied."
+        ),
+    )
+    max_tokens_per_doc: int = Field(
+        default=0,
+        description=(
+            "Maximum number of tokens per document. Documents longer than "
+            "this will be truncated to this length. 0 means no "
+            "document-level truncation is applied (only truncate_prompt_tokens "
+            "applies to the combined query+document)."
+        ),
+    )
+    instruction: str | None = Field(
+        default=None,
+        description=(
+            "Task instruction prepended to each scored pair via the chat "
+            "template. Equivalent to passing "
+            "chat_template_kwargs={'instruction': ...}."
+        ),
+    )
+    chat_template_kwargs: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Additional keyword args to pass to the chat template renderer. "
+            "Will be accessible by the score/rerank chat template."
+        ),
+    )
+    # --8<-- [end:scoring-common-params]
+
+    @model_validator(mode="after")
+    def _merge_instruction_into_kwargs(self) -> "ScoringRequestMixin":
+        """Fold the top-level `instruction` field into `chat_template_kwargs`.
+
+        This allows callers to use either the convenience field or the generic
+        dict.  Explicit keys inside `chat_template_kwargs` take precedence over
+        the top-level `instruction` field.
+        """
+        if self.instruction is not None:
+            merged = dict(self.chat_template_kwargs or {})
+            merged.setdefault("instruction", self.instruction)
+            self.chat_template_kwargs = merged
+        return self
+
+    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
+        return self._build_pooling_tok_params(
+            model_config,
+            add_special_tokens=True,
+            max_total_tokens=model_config.max_model_len,
+            max_output_tokens=0,
+        )
+
+    def to_pooling_params(self, task: PoolingTask = "classify"):
+        return PoolingParams(
+            task=task,
+            use_activation=self.use_activation,
+        )
+
+
+class ScoreDataRequest(ScoringRequestMixin):
+    data_1: ScoreInput | list[ScoreInput]
+    data_2: ScoreInput | list[ScoreInput]
+
+
+class ScoreQueriesDocumentsRequest(ScoringRequestMixin):
+    # --8<-- [start:score-request-params]
+    queries: ScoreInput | list[ScoreInput]
+    documents: ScoreInput | list[ScoreInput]
+    # --8<-- [end:score-request-params]
+
+    @property
+    def data_1(self):
+        return self.queries
+
+    @property
+    def data_2(self):
+        return self.documents
+
+
+class ScoreQueriesItemsRequest(ScoringRequestMixin):
+    queries: ScoreInput | list[ScoreInput]
+    items: ScoreInput | list[ScoreInput]
+
+    @property
+    def data_1(self):
+        return self.queries
+
+    @property
+    def data_2(self):
+        return self.items
+
+
+class ScoreTextRequest(ScoringRequestMixin):
+    text_1: ScoreInput | list[ScoreInput]
+    text_2: ScoreInput | list[ScoreInput]
+
+    @property
+    def data_1(self):
+        return self.text_1
+
+    @property
+    def data_2(self):
+        return self.text_2
+
+
+ScoreRequest: TypeAlias = (
+    ScoreQueriesDocumentsRequest
+    | ScoreQueriesItemsRequest
+    | ScoreDataRequest
+    | ScoreTextRequest
+)
+
+
+class RerankRequest(ScoringRequestMixin):
+    # --8<-- [start:rerank-request-params]
+    query: ScoreInput
+    documents: ScoreInput | list[ScoreInput]
+    top_n: int = Field(default_factory=lambda: 0)
+    # --8<-- [end:rerank-request-params]
+
+
+ScoringRequest: TypeAlias = ScoreRequest | RerankRequest
+
+
+class RerankDocument(BaseModel):
+    text: str | None = None
+    multi_modal: list[ScoreContentPartParam] | None = None
+
+
+class RerankResult(BaseModel):
+    index: int
+    document: RerankDocument
+    relevance_score: float
+
+
+class RerankUsage(BaseModel):
+    prompt_tokens: int
+    total_tokens: int
+
+
+class RerankResponse(OpenAIBaseModel):
+    id: str
+    model: str
+    usage: RerankUsage
+    results: list[RerankResult]
+
+
+class ScoreResponseData(OpenAIBaseModel):
+    index: int
+    object: str = "score"
+    score: float
+
+
+class ScoreResponse(OpenAIBaseModel):
+    id: str = Field(default_factory=lambda: f"embd-{random_uuid()}")
+    object: str = "list"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    data: list[ScoreResponseData]
+    usage: UsageInfo
+
+
+ScoringResponse: TypeAlias = RerankResponse | ScoreResponse
diff --git a/vllm/entrypoints/pooling/scoring/serving.py b/vllm/entrypoints/pooling/scoring/serving.py
new file mode 100644
index 000000000000..5937664d5687
--- /dev/null
+++ b/vllm/entrypoints/pooling/scoring/serving.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from fastapi.responses import JSONResponse, Response
+
+from vllm import PoolingParams
+from vllm.engine.protocol import EngineClient
+from vllm.entrypoints.openai.engine.protocol import UsageInfo
+from vllm.logger import init_logger
+from vllm.outputs import PoolingRequestOutput, ScoringRequestOutput
+from vllm.tasks import SCORE_TYPE_MAP, SupportedTask
+from vllm.v1.pool.late_interaction import (
+    build_late_interaction_doc_params,
+    build_late_interaction_query_params,
+)
+
+from ..base.io_processor import PoolingIOProcessor
+from ..base.serving import PoolingServing
+from .io_processor import ScoringIOProcessors, ScoringServeContext
+from .protocol import (
+    RerankDocument,
+    RerankRequest,
+    RerankResponse,
+    RerankResult,
+    RerankUsage,
+    ScoreRequest,
+    ScoreResponse,
+    ScoreResponseData,
+)
+from .typing import ScoreInput
+
+logger = init_logger(__name__)
+
+
+class ServingScores(PoolingServing):
+    request_id_prefix = "score"
+
+    def __init__(
+        self,
+        engine_client: EngineClient,
+        *args,
+        supported_tasks: tuple[SupportedTask, ...],
+        enable_flash_late_interaction: bool = True,
+        **kwargs,
+    ):
+        pooling_task = engine_client.model_config.get_pooling_task(supported_tasks)
+        score_type = SCORE_TYPE_MAP.get(pooling_task, None)  # type: ignore[arg-type]
+        assert score_type is not None
+
+        self.io_processor_name: str = score_type
+        self.enable_flash_late_interaction = (
+            self.io_processor_name == "late-interaction"
+            and enable_flash_late_interaction
+        )
+
+        if self.enable_flash_late_interaction:
+            self.io_processor_name = "flash-late-interaction"
+
+        if engine_client.model_config.architecture == "JinaForRanking":
+            self.io_processor_name = "jina-reranking-scoring"
+            self.enable_flash_late_interaction = False
+
+        super().__init__(engine_client, *args, **kwargs)
+
+    def init_io_processor(self, *args, **kwargs) -> PoolingIOProcessor:
+        return ScoringIOProcessors[self.io_processor_name](*args, **kwargs)
+
+    async def __call__(self, *args, **kwargs) -> Response:
+        if not self.enable_flash_late_interaction:
+            return await super().__call__(*args, **kwargs)
+
+        return await self.flash_late_interaction(*args, **kwargs)
+
+    def _build_response(
+        self,
+        ctx: ScoringServeContext,
+    ) -> JSONResponse:
+        final_res_batch = ctx.final_res_batch
+        request_id = ctx.request_id
+        created_time = ctx.created_time
+        model_name = self.models.model_name()
+
+        if isinstance(ctx.request, ScoreRequest):
+            return self._request_output_to_score_response(
+                final_res_batch,
+                request_id,
+                created_time,
+                model_name,
+            )
+        elif isinstance(ctx.request, RerankRequest):
+            return self._request_output_to_rerank_response(
+                final_res_batch,
+                request_id,
+                model_name,
+                ctx.request.documents,
+                ctx.request.top_n if ctx.request.top_n > 0 else len(final_res_batch),
+            )
+        else:
+            raise ValueError(f"Invalid {self.request_id_prefix} request type")
+
+    def _request_output_to_score_response(
+        self,
+        final_res_batch: list[PoolingRequestOutput],
+        request_id: str,
+        created_time: int,
+        model_name: str,
+    ) -> JSONResponse:
+        items: list[ScoreResponseData] = []
+        num_prompt_tokens = 0
+
+        for idx, final_res in enumerate(final_res_batch):
+            classify_res = ScoringRequestOutput.from_base(final_res)
+
+            item = ScoreResponseData(
+                index=idx,
+                score=classify_res.outputs.score,
+            )
+            prompt_token_ids = final_res.prompt_token_ids
+
+            items.append(item)
+            num_prompt_tokens += len(prompt_token_ids)
+
+        usage = UsageInfo(
+            prompt_tokens=num_prompt_tokens,
+            total_tokens=num_prompt_tokens,
+        )
+
+        response = ScoreResponse(
+            id=request_id,
+            created=created_time,
+            model=model_name,
+            data=items,
+            usage=usage,
+        )
+
+        return JSONResponse(content=response.model_dump())
+
+    def _request_output_to_rerank_response(
+        self,
+        final_res_batch: list[PoolingRequestOutput],
+        request_id: str,
+        model_name: str,
+        documents: ScoreInput | list[ScoreInput],
+        top_n: int,
+    ) -> JSONResponse:
+        if not isinstance(documents, list):
+            documents = [documents]
+
+        results: list[RerankResult] = []
+        num_prompt_tokens = 0
+        for idx, final_res in enumerate(final_res_batch):
+            classify_res = ScoringRequestOutput.from_base(final_res)
+
+            document = documents[idx]
+            if isinstance(document, str):
+                rerank_document = RerankDocument(text=document)
+            else:
+                rerank_document = RerankDocument(
+                    multi_modal=document.get("content", [])
+                )
+
+            result = RerankResult(
+                index=idx,
+                document=rerank_document,
+                relevance_score=classify_res.outputs.score,
+            )
+            results.append(result)
+            prompt_token_ids = final_res.prompt_token_ids
+            num_prompt_tokens += len(prompt_token_ids)
+
+        # sort by relevance, then return the top n if set
+        results.sort(key=lambda x: x.relevance_score, reverse=True)
+        if top_n < len(documents):
+            results = results[:top_n]
+
+        response = RerankResponse(
+            id=request_id,
+            model=model_name,
+            results=results,
+            usage=RerankUsage(
+                total_tokens=num_prompt_tokens, prompt_tokens=num_prompt_tokens
+            ),
+        )
+
+        return JSONResponse(content=response.model_dump())
+
+    ###################################################################################
+    ### Run pooling score MaxSim on worker side (GPU) in the API server process
+    ### Can significantly improve late-interaction scoring performance.
+
+    async def flash_late_interaction(self, *args, **kwargs) -> Response:
+        ctx = await self._init_ctx(self.io_processor, *args, **kwargs)
+        await self._preprocessing_async(self.io_processor, ctx)
+
+        # stage 1: encode queries and cache token embeddings on workers.
+        await self._flash_late_interaction_encode_queries(ctx)
+        # stage 2: encode docs and return scalar scores from workers.
+        await self._flash_late_interaction_encode_docs(ctx)
+
+        return await self._postprocessing_async(self.io_processor, ctx)
+
+    async def _flash_late_interaction_encode_queries(self, ctx: ScoringServeContext):
+        assert ctx.n_queries is not None
+        assert ctx.engine_inputs is not None
+        assert isinstance(ctx.pooling_params, PoolingParams)
+
+        n_queries = ctx.n_queries
+        n_docs = len(ctx.engine_inputs) - n_queries
+        query_engine_inputs = ctx.engine_inputs[:n_queries]
+
+        query_keys = [f"{ctx.request_id}-query-{i}" for i in range(n_queries)]
+        query_uses = [n_docs if n_queries == 1 else 1] * n_queries
+
+        query_pooling_params_list = []
+        for i in range(n_queries):
+            pooling_params = ctx.pooling_params.clone()
+            pooling_params.late_interaction_params = (
+                build_late_interaction_query_params(
+                    query_key=query_keys[i],
+                    query_uses=query_uses[i],
+                )
+            )
+            query_pooling_params_list.append(pooling_params)
+
+        assert (
+            n_queries
+            == len(query_pooling_params_list)
+            == len(query_engine_inputs)
+            == len(query_keys)
+        )
+
+        query_ctx = ScoringServeContext(
+            request=ctx.request,
+            raw_request=ctx.raw_request,
+            model_name=ctx.model_name,
+            request_id=ctx.request_id,
+            pooling_params=query_pooling_params_list,
+            prompt_request_ids=query_keys,
+            engine_inputs=query_engine_inputs,
+        )
+
+        await self._prepare_generators(query_ctx)
+        await self._collect_batch(query_ctx)
+        ctx.query_final_res_batch = query_ctx.final_res_batch
+
+    async def _flash_late_interaction_encode_docs(self, ctx: ScoringServeContext):
+        assert ctx.n_queries is not None
+        assert ctx.engine_inputs is not None
+        assert isinstance(ctx.pooling_params, PoolingParams)
+
+        n_queries = ctx.n_queries
+        n_docs = len(ctx.engine_inputs) - n_queries
+        doc_engine_inputs = ctx.engine_inputs[n_queries:]
+
+        query_keys = [f"{ctx.request_id}-query-{i}" for i in range(n_queries)]
+        doc_keys = [f"{ctx.request_id}-doc-{i}" for i in range(n_docs)]
+
+        doc_pooling_params_list = []
+        for i in range(n_docs):
+            query_idx = 0 if n_queries == 1 else i
+            pooling_params = ctx.pooling_params.clone()
+            pooling_params.late_interaction_params = build_late_interaction_doc_params(
+                query_key=query_keys[query_idx]
+            )
+            doc_pooling_params_list.append(pooling_params)
+
+        assert (
+            n_docs
+            == len(doc_pooling_params_list)
+            == len(doc_engine_inputs)
+            == len(doc_keys)
+        )
+
+        doc_ctx = ScoringServeContext(
+            request=ctx.request,
+            raw_request=ctx.raw_request,
+            model_name=ctx.model_name,
+            request_id=ctx.request_id,
+            pooling_params=doc_pooling_params_list,
+            prompt_request_ids=doc_keys,
+            engine_inputs=doc_engine_inputs,
+        )
+
+        await self._prepare_generators(doc_ctx)
+        await self._collect_batch(doc_ctx)
+
+        ctx.final_res_batch = doc_ctx.final_res_batch
diff --git a/vllm/entrypoints/pooling/scoring/typing.py b/vllm/entrypoints/pooling/scoring/typing.py
new file mode 100644
index 000000000000..fd6191b6af8f
--- /dev/null
+++ b/vllm/entrypoints/pooling/scoring/typing.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+from typing import TypeAlias
+
+from typing_extensions import Required, TypedDict
+
+from vllm.entrypoints.chat_utils import (
+    ChatCompletionContentPartImageEmbedsParam,
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartTextParam,
+    ChatCompletionContentPartVideoParam,
+)
+
+ScoreContentPartParam: TypeAlias = (
+    ChatCompletionContentPartImageParam
+    | ChatCompletionContentPartImageEmbedsParam
+    | ChatCompletionContentPartTextParam
+    | ChatCompletionContentPartVideoParam
+)
+
+
+class ScoreMultiModalParam(TypedDict, total=False):
+    """
+    A specialized parameter type for scoring multimodal content
+
+    The reasons why don't reuse `CustomChatCompletionMessageParam` directly:
+    1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions
+    2. Including chat-specific fields would confuse users about their purpose in scoring
+    3. This is a more focused interface that only exposes what's needed for scoring
+    """  # noqa: E501
+
+    content: Required[list[ScoreContentPartParam]]
+    """The multimodal contents"""
+
+
+# Raw input data with content key in ScoreMultiModalParam.
+ScoreInput = str | ScoreMultiModalParam
+# Score data without content key.
+ScoreData = str | list[ScoreContentPartParam]
+
+
+@dataclass
+class ScoringData:
+    data_1: list[ScoreData]
+    data_2: list[ScoreData]
diff --git a/vllm/entrypoints/pooling/score/utils.py b/vllm/entrypoints/pooling/scoring/utils.py
similarity index 50%
rename from vllm/entrypoints/pooling/score/utils.py
rename to vllm/entrypoints/pooling/scoring/utils.py
index f620e37902b7..13db389b8233 100644
--- a/vllm/entrypoints/pooling/score/utils.py
+++ b/vllm/entrypoints/pooling/scoring/utils.py
@@ -1,45 +1,61 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Iterable
-from typing import Any, TypeAlias, cast
+from typing import cast
 
 import torch
-from torch.nn import CosineSimilarity
-from typing_extensions import Required, TypedDict
 
+from vllm import PromptType, TextPrompt
 from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
     BaseMultiModalItemTracker,
-    ChatCompletionContentPartImageEmbedsParam,
-    ChatCompletionContentPartImageParam,
     ChatCompletionContentPartParam,
     ChatCompletionContentPartTextParam,
-    ChatCompletionContentPartVideoParam,
-    ChatTemplateResolutionError,
     ConversationMessage,
     MultiModalItemTracker,
     _parse_chat_message_content_parts,
 )
-from vllm.inputs import (
-    MultiModalDataDict,
-    MultiModalUUIDDict,
-    PromptType,
-    TextPrompt,
-    TokensPrompt,
-)
-from vllm.model_executor.models.interfaces import supports_score_template
-from vllm.outputs import PoolingRequestOutput
-from vllm.renderers.hf import safe_apply_chat_template
-from vllm.tokenizers import TokenizerLike
-
-ScoreContentPartParam: TypeAlias = (
-    ChatCompletionContentPartImageParam
-    | ChatCompletionContentPartImageEmbedsParam
-    | ChatCompletionContentPartTextParam
-    | ChatCompletionContentPartVideoParam
+from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict
+
+from .typing import (
+    ScoreContentPartParam,
+    ScoreData,
+    ScoreInput,
+    ScoringData,
 )
 
 
+def get_num_special_tokens_for_pair(tokenizer) -> int:
+    """Get number of special tokens added for a text pair encoding."""
+    method = getattr(tokenizer, "num_special_tokens_to_add", None)
+    if method is not None:
+        try:
+            return method(pair=True)
+        except TypeError:
+            pass
+    # Fallback: compute by tokenizing empty strings
+    empty_encoding = tokenizer("", text_pair="", add_special_tokens=True)
+    return len(empty_encoding["input_ids"])
+
+
+def truncate_text_to_tokens(
+    text: str,
+    tokenizer,
+    max_tokens: int,
+) -> str:
+    """Truncate text to a maximum number of content tokens.
+
+    Uses offset_mapping to slice the original text at the exact character
+    boundary, avoiding lossy encode→decode round-trips that can shift
+    the token count by 1-3 tokens due to BPE merge boundary changes.
+    """
+    encoding = tokenizer(text, add_special_tokens=False, return_offsets_mapping=True)
+    if len(encoding["input_ids"]) <= max_tokens:
+        return text
+    char_end = encoding["offset_mapping"][max_tokens - 1][1]
+    return text[:char_end]
+
+
 def compute_maxsim_score(q_emb: torch.Tensor, d_emb: torch.Tensor) -> torch.Tensor:
     """
     Compute ColBERT MaxSim score.
@@ -51,78 +67,13 @@ def compute_maxsim_score(q_emb: torch.Tensor, d_emb: torch.Tensor) -> torch.Tens
     Returns:
         MaxSim score (sum over query tokens of max similarity to any doc token)
     """
+    # compute in float32 for numerical stability
     # [query_len, doc_len]
-    token_scores = torch.matmul(q_emb, d_emb.T)
+    token_scores = torch.matmul(q_emb.float(), d_emb.float().T)
     # Max over document tokens, sum over query tokens
     return token_scores.amax(dim=-1).sum()
 
 
-class ScoreMultiModalParam(TypedDict, total=False):
-    """
-    A specialized parameter type for scoring multimodal content
-
-    The reasons why don't reuse `CustomChatCompletionMessageParam` directly:
-    1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions
-    2. Including chat-specific fields would confuse users about their purpose in scoring
-    3. This is a more focused interface that only exposes what's needed for scoring
-    """  # noqa: E501
-
-    content: Required[list[ScoreContentPartParam]]
-    """The multimodal contents"""
-
-
-# Raw input data with content key in ScoreMultiModalParam.
-ScoreInput = str | ScoreMultiModalParam
-ScoreInputs = ScoreInput | list[ScoreInput]
-# Score data without content key.
-ScoreData = str | list[ScoreContentPartParam]
-
-
-def _cosine_similarity(
-    tokenizer: TokenizerLike,
-    embed_1: list[PoolingRequestOutput],
-    embed_2: list[PoolingRequestOutput],
-) -> list[PoolingRequestOutput]:
-    scorer = CosineSimilarity(0)
-    scores: list[PoolingRequestOutput] = []
-
-    for emb_1, emb_2 in zip(embed_1, embed_2):
-        pair_score = scorer(emb_1.outputs.data, emb_2.outputs.data)
-
-        padding: list[int] = []
-        if (pad_token_id := tokenizer.pad_token_id) is not None:
-            padding = [pad_token_id]
-
-        tokens = emb_1.prompt_token_ids + padding + emb_2.prompt_token_ids
-
-        scores.append(
-            PoolingRequestOutput(
-                request_id=f"{emb_1.request_id}_{emb_2.request_id}",
-                outputs=pair_score,
-                prompt_token_ids=tokens,
-                num_cached_tokens=emb_1.num_cached_tokens + emb_2.num_cached_tokens,
-                finished=True,
-            )
-        )
-
-    return scores
-
-
-def _validate_score_input_lens(
-    data_1: list[ScoreData],
-    data_2: list[ScoreData],
-):
-    len_1 = len(data_1)
-    len_2 = len(data_2)
-
-    if len_1 > 1 and len_1 != len_2:
-        raise ValueError("Input lengths must be either 1:1, 1:N or N:N")
-    if len_1 == 0:
-        raise ValueError("At least one text element must be given")
-    if len_2 == 0:
-        raise ValueError("At least one text_pair element must be given")
-
-
 def _validate_mm_score_input(
     data: list[ScoreInput],
     is_multimodal_model: bool,
@@ -140,12 +91,27 @@ def _validate_mm_score_input(
     return out
 
 
+def _validate_score_input_lens(
+    data_1: list[ScoreData],
+    data_2: list[ScoreData],
+):
+    len_1 = len(data_1)
+    len_2 = len(data_2)
+
+    if len_1 > 1 and len_1 != len_2:
+        raise ValueError("Input lengths must be either 1:1, 1:N or N:N")
+    if len_1 == 0:
+        raise ValueError("At least one text element must be given")
+    if len_2 == 0:
+        raise ValueError("At least one text_pair element must be given")
+
+
 def validate_score_input(
-    data_1: ScoreInputs,
-    data_2: ScoreInputs,
+    data_1: ScoreInput | list[ScoreInput],
+    data_2: ScoreInput | list[ScoreInput],
     is_multimodal_model: bool,
     architecture: str,
-) -> tuple[list[ScoreData], list[ScoreData]]:
+) -> ScoringData:
     if not isinstance(data_1, list):
         data_1 = [data_1]
 
@@ -155,62 +121,7 @@ def validate_score_input(
     score_input_1 = _validate_mm_score_input(data_1, is_multimodal_model, architecture)
     score_input_2 = _validate_mm_score_input(data_2, is_multimodal_model, architecture)
     _validate_score_input_lens(score_input_1, score_input_2)
-    return score_input_1, score_input_2
-
-
-def _ensure_str(content: list[ConversationMessage]) -> str:
-    """Extract a single string prompt from parsed conversation content."""
-    assert len(content) == 1
-    prompt = content[0]["content"]
-    if prompt is not None and isinstance(prompt, str):
-        return cast(str, prompt)
-    raise ValueError(f"Only string content is supported, but got {content}.")
-
-
-def parse_score_data(
-    data_1: ScoreData,
-    data_2: ScoreData,
-    model_config: ModelConfig,
-) -> tuple[str, str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
-    """Parse a query-document pair into text prompts and shared multi-modal
-    data.
-
-    Uses a **single** :class:`MultiModalItemTracker` so that multi-modal
-    items from both inputs are merged into one ``mm_data`` dict.  This is
-    the correct behaviour for cross-encoder scoring, where query and
-    document are concatenated into a single model prompt.
-    """
-    mm_tracker = MultiModalItemTracker(model_config)
-
-    content_1 = _parse_score_content("query", data_1, mm_tracker)
-    content_2 = _parse_score_content("document", data_2, mm_tracker)
-
-    prompt_1 = _ensure_str(content_1)
-    prompt_2 = _ensure_str(content_2)
-    mm_items, mm_uuids = mm_tracker.resolve_items()
-
-    return prompt_1, prompt_2, mm_items, mm_uuids
-
-
-def parse_score_data_single(
-    data: ScoreData,
-    role: str,
-    model_config: ModelConfig,
-) -> tuple[str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
-    """Parse **one** ScoreData into a text prompt and its own multi-modal
-    data.
-
-    Unlike :func:`parse_score_data`, each call creates an **independent**
-    :class:`MultiModalItemTracker` so multi-modal items are kept separate.
-    This is the correct behaviour for late-interaction scoring, where
-    query and document are encoded independently.
-    """
-    mm_tracker = MultiModalItemTracker(model_config)
-    content = _parse_score_content(role, data, mm_tracker)
-
-    prompt = _ensure_str(content)
-    mm_items, mm_uuids = mm_tracker.resolve_items()
-    return prompt, mm_items, mm_uuids
+    return ScoringData(data_1=score_input_1, data_2=score_input_2)
 
 
 def score_data_to_prompts(
@@ -243,6 +154,15 @@ def score_data_to_prompts(
     return prompts
 
 
+def _ensure_str(content: list[ConversationMessage]) -> str:
+    """Extract a single string prompt from parsed conversation content."""
+    assert len(content) == 1
+    prompt = content[0]["content"]
+    if prompt is not None and isinstance(prompt, str):
+        return cast(str, prompt)
+    raise ValueError(f"Only string content is supported, but got {content}.")
+
+
 def _parse_score_content(
     role: str,
     data: ScoreData,
@@ -262,6 +182,7 @@ def _parse_score_content(
         mm_tracker=mm_tracker,
         wrap_dicts=False,
         interleave_strings=False,
+        multimodal_content_part_separator="",
     )
 
     if parse_res:
@@ -278,113 +199,50 @@ def _parse_score_content(
     return next(iter(mm_placeholder_storage.values()))[0]
 
 
-def _apply_model_score_template(
-    model_config: ModelConfig, prompt_1: str, prompt_2: str
-) -> str:
-    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
-    from vllm.model_executor.model_loader import get_model_cls
-
-    model = get_model_cls(model_config)
-    if supports_score_template(model):
-        full_prompt = model.get_score_template(prompt_1, prompt_2)
-        if full_prompt is None:
-            raise ValueError("Get empty score template from model")
-        return full_prompt
-
-    raise ValueError(f"Unsupported model architecture: {model_config.architecture}")
-
-
-def post_process_tokens(
+def parse_score_data_single(
+    data: ScoreData,
+    role: str,
     model_config: ModelConfig,
-    prompt: TokensPrompt,
-) -> None:
-    """
-    Perform architecture-specific manipulations on the input tokens.
+) -> tuple[str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
+    """Parse **one** ScoreData into a text prompt and its own multi-modal
+    data.
 
-    Note:
-        This is an in-place operation.
+    Unlike :func:`parse_score_data`, each call creates an **independent**
+    :class:`MultiModalItemTracker` so multi-modal items are kept separate.
+    This is the correct behaviour for late-interaction scoring, where
+    query and document are encoded independently.
     """
-    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
-    from vllm.model_executor.model_loader import get_model_cls
+    mm_tracker = MultiModalItemTracker(model_config)
+    content = _parse_score_content(role, data, mm_tracker)
 
-    model = get_model_cls(model_config)
-    if supports_score_template(model):
-        model.post_process_tokens(prompt)
+    prompt = _ensure_str(content)
+    mm_items, mm_uuids = mm_tracker.resolve_items()
+    return prompt, mm_items, mm_uuids
 
 
-def get_score_prompt(
-    model_config: ModelConfig,
-    tokenizer: TokenizerLike,
-    tokenization_kwargs: dict[str, Any],
+def parse_score_data(
     data_1: ScoreData,
     data_2: ScoreData,
-    score_template: str | None = None,
-) -> tuple[str, TokensPrompt]:
-    prompt_1, prompt_2, mm_data, mm_uuids = parse_score_data(
-        data_1,
-        data_2,
-        model_config,
-    )
-    from vllm.model_executor.model_loader import get_model_cls
+    model_config: ModelConfig,
+) -> tuple[str, str, MultiModalDataDict | None, MultiModalUUIDDict | None]:
+    """Parse a query-document pair into text prompts and shared multi-modal
+    data.
 
-    model = get_model_cls(model_config)
+    Uses a **single** :class:`MultiModalItemTracker` so that multi-modal
+    items from both inputs are merged into one ``mm_data`` dict.  This is
+    the correct behaviour for cross-encoder scoring, where query and
+    document are concatenated into a single model prompt.
+    """
+    mm_tracker = MultiModalItemTracker(model_config)
 
-    def default_tokenizer_encode():
-        if supports_score_template(model):
-            full_prompt = _apply_model_score_template(model_config, prompt_1, prompt_2)
-            prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)
-        else:
-            if model_config.use_sep_token:
-                # cross_encoder models defaults to using separating token.
-                prompt_inputs = tokenizer(
-                    text=prompt_1, text_pair=prompt_2, **tokenization_kwargs
-                )
-                full_prompt = tokenizer.decode(prompt_inputs["input_ids"])
-            else:
-                # `llm as reranker` defaults to not using separating token.
-                full_prompt = prompt_1 + prompt_2
-                prompt_inputs = tokenizer(text=full_prompt, **tokenization_kwargs)
-        return full_prompt, prompt_inputs
-
-    # FIXME: For now, we only apply a template when one is explicitly provided.
-    # We cannot rely on the tokenizer's chat template because many models
-    # inherit junk templates from their base LLM, which breaks both the models
-    # and the tests that use them.
-    if score_template is None:
-        full_prompt, prompt_inputs = default_tokenizer_encode()
-    else:
-        # FIXME: Try applying a score template from the CLI arg or tokenizer_config.json
-        # If that fails because there is no such template,
-        # fall back to the default implementation.
-        try:
-            full_prompt = safe_apply_chat_template(
-                model_config,
-                tokenizer,
-                [
-                    {"role": "query", "content": prompt_1},
-                    {"role": "document", "content": prompt_2},
-                ],
-                chat_template=score_template,
-                tools=None,
-                tokenize=False,
-            )
-            prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)
-        except ChatTemplateResolutionError:
-            full_prompt, prompt_inputs = default_tokenizer_encode()
-
-    engine_prompt = TokensPrompt(prompt_token_ids=prompt_inputs["input_ids"])
-
-    if (token_type_ids := prompt_inputs.get("token_type_ids")) is not None:
-        engine_prompt["token_type_ids"] = token_type_ids
-
-    post_process_tokens(model_config, engine_prompt)
-
-    if mm_data is not None:
-        engine_prompt["multi_modal_data"] = mm_data
-    if mm_uuids is not None:
-        engine_prompt["multi_modal_uuids"] = mm_uuids
-
-    return full_prompt, engine_prompt
+    content_1 = _parse_score_content("query", data_1, mm_tracker)
+    content_2 = _parse_score_content("document", data_2, mm_tracker)
+
+    prompt_1 = _ensure_str(content_1)
+    prompt_2 = _ensure_str(content_2)
+    mm_items, mm_uuids = mm_tracker.resolve_items()
+
+    return prompt_1, prompt_2, mm_items, mm_uuids
 
 
 def compress_token_type_ids(token_type_ids: list[int]) -> int:
diff --git a/vllm/entrypoints/pooling/typing.py b/vllm/entrypoints/pooling/typing.py
index 1df72ca5c704..ffcd3e7be434 100644
--- a/vllm/entrypoints/pooling/typing.py
+++ b/vllm/entrypoints/pooling/typing.py
@@ -1,39 +1,38 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import time
-from collections.abc import AsyncGenerator
+from collections.abc import AsyncGenerator, Sequence
 from dataclasses import dataclass, field
 from typing import Any, Generic, TypeAlias, TypeVar
 
 from fastapi import Request
 from pydantic import ConfigDict
 
-from vllm import PoolingRequestOutput
-from vllm.entrypoints.pooling.classify.protocol import (
+from vllm import PoolingParams, PoolingRequestOutput, PromptType
+from vllm.inputs import DataPrompt, EngineInput
+from vllm.lora.request import LoRARequest
+
+from .classify.protocol import (
     ClassificationChatRequest,
     ClassificationCompletionRequest,
     ClassificationResponse,
 )
-from vllm.entrypoints.pooling.embed.protocol import (
+from .embed.protocol import (
     CohereEmbedRequest,
     EmbeddingBytesResponse,
     EmbeddingChatRequest,
     EmbeddingCompletionRequest,
     EmbeddingResponse,
 )
-from vllm.entrypoints.pooling.pooling.protocol import (
+from .pooling.protocol import (
     IOProcessorRequest,
+    PoolingBytesResponse,
     PoolingChatRequest,
     PoolingCompletionRequest,
     PoolingResponse,
 )
-from vllm.entrypoints.pooling.score.protocol import (
-    RerankRequest,
-    ScoreRequest,
-    ScoreResponse,
-)
-from vllm.inputs import EngineInput
-from vllm.lora.request import LoRARequest
+from .scoring.protocol import ScoringRequest, ScoringResponse
+from .scoring.typing import ScoringData
 
 PoolingCompletionLikeRequest: TypeAlias = (
     EmbeddingCompletionRequest
@@ -49,8 +48,7 @@
     PoolingCompletionLikeRequest
     | PoolingChatLikeRequest
     | IOProcessorRequest
-    | RerankRequest
-    | ScoreRequest
+    | ScoringRequest
     | CohereEmbedRequest
 )
 
@@ -59,7 +57,8 @@
     | EmbeddingResponse
     | EmbeddingBytesResponse
     | PoolingResponse
-    | ScoreResponse
+    | PoolingBytesResponse
+    | ScoringResponse
 )
 
 PoolingRequestT = TypeVar("PoolingRequestT", bound=AnyPoolingRequest)
@@ -67,20 +66,50 @@
 
 @dataclass(kw_only=True)
 class PoolingServeContext(Generic[PoolingRequestT]):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
     request: PoolingRequestT
     raw_request: Request | None = None
     model_name: str
     request_id: str
+    pooling_params: PoolingParams | list[PoolingParams]
     created_time: int = field(default_factory=lambda: int(time.time()))
     lora_request: LoRARequest | None = None
-
-    engine_inputs: list[EngineInput] | None = None
+    engine_inputs: Sequence[EngineInput] | None = None
     prompt_request_ids: list[str] | None = None
-    intermediates: Any | None = None
 
     result_generator: AsyncGenerator[tuple[int, PoolingRequestOutput], None] | None = (
         None
     )
     final_res_batch: list[PoolingRequestOutput] = field(default_factory=list)
 
-    model_config = ConfigDict(arbitrary_types_allowed=True)
+    ## for Long Text Embedding with Chunked Processing
+    original_engine_inputs: Sequence[EngineInput] | None = None
+
+    ## for bi-encoder & late-interaction
+    n_queries: int | None = None
+
+    ## for IOProcessorResponse
+    response: Any | None = None
+
+    ## for flash-late-interaction
+    query_final_res_batch: list[PoolingRequestOutput] | None = None
+
+
+@dataclass
+class OfflineInputsContext:
+    prompts: PromptType | Sequence[PromptType] | DataPrompt | ScoringData
+    pooling_params: PoolingParams | Sequence[PoolingParams]
+    tokenization_kwargs: dict[str, Any] | None = None
+    chat_template: str | None = None
+
+    ## for bi-encoder & late-interaction
+    n_queries: int | None = None
+
+
+@dataclass
+class OfflineOutputsContext:
+    outputs: list[PoolingRequestOutput]
+
+    ## for bi-encoder & late-interaction
+    n_queries: int | None = None
diff --git a/vllm/entrypoints/pooling/utils.py b/vllm/entrypoints/pooling/utils.py
index 1af6b35088bf..b6d91332efd5 100644
--- a/vllm/entrypoints/pooling/utils.py
+++ b/vllm/entrypoints/pooling/utils.py
@@ -2,17 +2,22 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import importlib.util
+import json
 import math
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
-from functools import lru_cache
-from typing import Any
+from functools import lru_cache, partial
+from typing import Any, Literal, cast
 
 import pybase64
 import torch
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, StreamingResponse
 
+from vllm.config import ModelConfig
+from vllm.entrypoints.openai.engine.protocol import UsageInfo
 from vllm.logger import init_logger
 from vllm.outputs import PoolingRequestOutput
+from vllm.tasks import SupportedTask
 from vllm.utils.serial_utils import (
     EMBED_DTYPES,
     EmbedDType,
@@ -23,6 +28,11 @@
 
 logger = init_logger(__name__)
 
+JsonEncodingFormat = Literal["float", "base64"]
+BytesEncodingFormat = Literal["bytes", "bytes_only"]
+FloatEncodedPoolingOutput = list[float] | list[list[float]]
+JsonEncodedPoolingOutput = FloatEncodedPoolingOutput | str
+
 
 @dataclass
 class MetadataItem:
@@ -56,10 +66,23 @@ def build_metadata_items(
     ]
 
 
-def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]:
+def encode_pooling_output_float(
+    output: PoolingRequestOutput,
+) -> FloatEncodedPoolingOutput:
     return output.outputs.data.tolist()
 
 
+def encode_pooling_output_float_or_ndarray(output: PoolingRequestOutput) -> Any:
+    """Return an ndarray when the response renderer can serialize NumPy."""
+    try:
+        data = output.outputs.data
+        if not data.is_contiguous():
+            data = data.contiguous()
+        return data.numpy()
+    except (RuntimeError, TypeError):
+        return output.outputs.data.tolist()
+
+
 def encode_pooling_output_base64(
     output: PoolingRequestOutput,
     embed_dtype: EmbedDType,
@@ -73,8 +96,7 @@ def encode_pooling_bytes(
     pooling_outputs: list[PoolingRequestOutput],
     embed_dtype: EmbedDType,
     endianness: Endianness,
-) -> tuple[list[bytes], list[dict[str, Any]], dict[str, Any]]:
-    num_prompt_tokens = 0
+) -> tuple[list[bytes], list[dict[str, Any]]]:
     items: list[dict[str, Any]] = []
     body: list[bytes] = []
     offset = 0
@@ -98,17 +120,89 @@ def encode_pooling_bytes(
 
         body.append(binary)
         items.append(item)
-        prompt_token_ids = output.prompt_token_ids
-        num_prompt_tokens += len(prompt_token_ids)
         offset += size
 
-    # Dictionary form of UsageInfo
-    usage = dict(
+    return body, items
+
+
+def get_pooling_output_encoder(
+    encoding_format: JsonEncodingFormat,
+    embed_dtype: EmbedDType,
+    endianness: Endianness,
+) -> Callable[[PoolingRequestOutput], JsonEncodedPoolingOutput]:
+    return cast(
+        Callable[[PoolingRequestOutput], JsonEncodedPoolingOutput],
+        (
+            encode_pooling_output_float
+            if encoding_format == "float"
+            else partial(
+                encode_pooling_output_base64,
+                embed_dtype=embed_dtype,
+                endianness=endianness,
+            )
+        ),
+    )
+
+
+def get_pooling_usage(
+    pooling_outputs: Sequence[PoolingRequestOutput],
+) -> UsageInfo:
+    num_prompt_tokens = sum(
+        len(output.prompt_token_ids) if output.prompt_token_ids is not None else 0
+        for output in pooling_outputs
+    )
+    return UsageInfo(
         prompt_tokens=num_prompt_tokens,
         total_tokens=num_prompt_tokens,
     )
 
-    return body, items, usage
+
+def get_pooling_usage_payload(
+    pooling_outputs: Sequence[PoolingRequestOutput],
+) -> dict[str, int]:
+    usage = get_pooling_usage(pooling_outputs)
+    return {
+        "prompt_tokens": usage.prompt_tokens,
+        "total_tokens": usage.total_tokens,
+    }
+
+
+def build_pooling_bytes_streaming_response(
+    pooling_outputs: list[PoolingRequestOutput],
+    request_id: str,
+    created_time: int,
+    model_name: str,
+    encoding_format: BytesEncodingFormat,
+    embed_dtype: EmbedDType,
+    endianness: Endianness,
+) -> StreamingResponse:
+    content, items = encode_pooling_bytes(
+        pooling_outputs=pooling_outputs,
+        embed_dtype=embed_dtype,
+        endianness=endianness,
+    )
+
+    headers = (
+        None
+        if encoding_format == "bytes_only"
+        else {
+            "metadata": json.dumps(
+                {
+                    "id": request_id,
+                    "created": created_time,
+                    "model": model_name,
+                    "data": items,
+                    "usage": get_pooling_usage_payload(pooling_outputs),
+                }
+            )
+        }
+    )
+
+    return StreamingResponse(
+        content=content,
+        headers=headers,
+        media_type="application/octet-stream",
+    )
 
 
 def decode_pooling_output(items: list[MetadataItem], body: bytes) -> list[torch.Tensor]:
@@ -133,3 +227,24 @@ def get_json_response_cls() -> type[JSONResponse]:
         "To make v1/embeddings API fast, please install orjson by `pip install orjson`"
     )
     return JSONResponse
+
+
+def enable_scoring_api(
+    supported_tasks: tuple["SupportedTask", ...],
+    model_config: ModelConfig | None = None,
+) -> bool:
+    if model_config is None:
+        return False
+
+    pooling_task = model_config.get_pooling_task(supported_tasks)
+    if pooling_task in ("embed", "token_embed"):
+        return True
+
+    if pooling_task == "classify":
+        num_labels = getattr(model_config.hf_config, "num_labels", 0)
+        if num_labels != 1:
+            logger.debug_once("Scoring API is only enabled for num_labels == 1.")
+            return False
+        return True
+
+    return False
diff --git a/vllm/entrypoints/sagemaker/api_router.py b/vllm/entrypoints/sagemaker/api_router.py
index e8c48d1c6d53..b3b11cd07b4c 100644
--- a/vllm/entrypoints/sagemaker/api_router.py
+++ b/vllm/entrypoints/sagemaker/api_router.py
@@ -13,94 +13,21 @@
 from vllm.config import ModelConfig
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.entrypoints.openai.generate.factories import get_generate_invocation_types
 from vllm.entrypoints.openai.utils import validate_json_request
-from vllm.entrypoints.pooling import enable_scoring_api
-from vllm.entrypoints.pooling.base.serving import PoolingServing
+from vllm.entrypoints.pooling.base.serving import PoolingServingBase
+from vllm.entrypoints.pooling.factories import get_pooling_invocation_types
 from vllm.entrypoints.serve.instrumentator.basic import base
 from vllm.entrypoints.serve.instrumentator.health import health
-from vllm.tasks import POOLING_TASKS, SupportedTask
+from vllm.tasks import SupportedTask
 
 # TODO: RequestType = TypeForm[BaseModel] when recognized by type checkers
 # (requires typing_extensions >= 4.13)
 RequestType = Any
-GetHandlerFn = Callable[[Request], OpenAIServing | PoolingServing | None]
+GetHandlerFn = Callable[[Request], OpenAIServing | PoolingServingBase | None]
 EndpointFn = Callable[[RequestType, Request], Awaitable[Any]]
 
 
-def get_invocation_types(
-    supported_tasks: tuple["SupportedTask", ...],
-    model_config: ModelConfig | None = None,
-):
-    # NOTE: Items defined earlier take higher priority
-    INVOCATION_TYPES: list[tuple[RequestType, tuple[GetHandlerFn, EndpointFn]]] = []
-
-    if "generate" in supported_tasks:
-        from vllm.entrypoints.openai.chat_completion.api_router import (
-            chat,
-            create_chat_completion,
-        )
-        from vllm.entrypoints.openai.chat_completion.protocol import (
-            ChatCompletionRequest,
-        )
-        from vllm.entrypoints.openai.completion.api_router import (
-            completion,
-            create_completion,
-        )
-        from vllm.entrypoints.openai.completion.protocol import CompletionRequest
-
-        INVOCATION_TYPES += [
-            (ChatCompletionRequest, (chat, create_chat_completion)),
-            (CompletionRequest, (completion, create_completion)),
-        ]
-
-    if "embed" in supported_tasks:
-        from vllm.entrypoints.pooling.embed.api_router import (
-            create_embedding,
-            embedding,
-        )
-        from vllm.entrypoints.pooling.embed.protocol import EmbeddingRequest
-
-        INVOCATION_TYPES += [
-            (EmbeddingRequest, (embedding, create_embedding)),
-        ]
-
-    if "classify" in supported_tasks:
-        from vllm.entrypoints.pooling.classify.api_router import (
-            classify,
-            create_classify,
-        )
-        from vllm.entrypoints.pooling.classify.protocol import ClassificationRequest
-
-        INVOCATION_TYPES += [
-            (ClassificationRequest, (classify, create_classify)),
-        ]
-
-    if enable_scoring_api(supported_tasks, model_config):
-        from vllm.entrypoints.pooling.score.api_router import do_rerank, rerank
-        from vllm.entrypoints.pooling.score.protocol import RerankRequest
-
-        INVOCATION_TYPES += [
-            (RerankRequest, (rerank, do_rerank)),
-        ]
-
-        from vllm.entrypoints.pooling.score.api_router import create_score, score
-        from vllm.entrypoints.pooling.score.protocol import ScoreRequest
-
-        INVOCATION_TYPES += [
-            (ScoreRequest, (score, create_score)),
-        ]
-
-    if any(task in POOLING_TASKS for task in supported_tasks):
-        from vllm.entrypoints.pooling.pooling.api_router import create_pooling, pooling
-        from vllm.entrypoints.pooling.pooling.protocol import PoolingRequest
-
-        INVOCATION_TYPES += [
-            (PoolingRequest, (pooling, create_pooling)),
-        ]
-
-    return INVOCATION_TYPES
-
-
 def attach_router(
     app: FastAPI,
     supported_tasks: tuple["SupportedTask", ...],
@@ -109,7 +36,10 @@ def attach_router(
     router = APIRouter()
 
     # NOTE: Construct the TypeAdapters only once
-    INVOCATION_TYPES = get_invocation_types(supported_tasks, model_config)
+    INVOCATION_TYPES = get_generate_invocation_types(
+        supported_tasks, model_config
+    ) + get_pooling_invocation_types(supported_tasks, model_config)
+
     INVOCATION_VALIDATORS = [
         (pydantic.TypeAdapter(request_type), (get_handler, endpoint))
         for request_type, (get_handler, endpoint) in INVOCATION_TYPES
diff --git a/vllm/entrypoints/serve/disagg/mm_serde.py b/vllm/entrypoints/serve/disagg/mm_serde.py
new file mode 100644
index 000000000000..60a3560ad737
--- /dev/null
+++ b/vllm/entrypoints/serve/disagg/mm_serde.py
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Encode/decode utilities for multimodal tensors and field metadata
+over JSON/HTTP, used by the disaggregated generate endpoint."""
+
+from __future__ import annotations
+
+import pybase64
+
+from vllm.multimodal.inputs import MultiModalKwargsItem
+from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
+
+_encoder = MsgpackEncoder(size_threshold=2**62)  # force all tensors inline
+_decoder = MsgpackDecoder(t=MultiModalKwargsItem)
+
+
+def encode_mm_kwargs_item(item: MultiModalKwargsItem) -> str:
+    """Serialize a MultiModalKwargsItem to a base64 string."""
+    bufs = _encoder.encode(item)
+    assert len(bufs) == 1, "All tensors should be inline"
+    return pybase64.b64encode(bufs[0]).decode("ascii")
+
+
+def decode_mm_kwargs_item(data: str) -> MultiModalKwargsItem:
+    """Deserialize a base64 string back to a MultiModalKwargsItem."""
+    raw = pybase64.b64decode(data)
+    return _decoder.decode(raw)
diff --git a/vllm/entrypoints/serve/disagg/protocol.py b/vllm/entrypoints/serve/disagg/protocol.py
index af4e8c20c14c..60d2a6424a00 100644
--- a/vllm/entrypoints/serve/disagg/protocol.py
+++ b/vllm/entrypoints/serve/disagg/protocol.py
@@ -2,11 +2,17 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from typing import Any
 
-from pydantic import BaseModel, Field, field_validator
+from pydantic import (
+    BaseModel,
+    Field,
+    PrivateAttr,
+    field_validator,
+    model_validator,
+)
 
 from vllm.config import ModelConfig
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionLogProbs
-from vllm.entrypoints.openai.engine.protocol import StreamOptions
+from vllm.entrypoints.openai.engine.protocol import StreamOptions, UsageInfo
 from vllm.logprobs import Logprob
 from vllm.renderers import TokenizeParams
 from vllm.sampling_params import SamplingParams
@@ -35,14 +41,6 @@ class MultiModalFeatures(BaseModel):
     Carries hashes (for cache lookup / identification) and placeholder
     positions so the downstream `/generate` service knows *where* in
     the token sequence each multimodal item lives.
-
-    Note:
-        Phase 1 — metadata only.
-        Phase 2 should add `mm_kwargs` (processed tensor data) using a
-        binary transport so the ``/generate` side can skip re-processing.
-        The `/generate` endpoint must also be updated to inject these
-        features into `EngineInput` before passing to
-        `InputProcessor.process_inputs`.
     """
 
     mm_hashes: dict[str, list[str]]
@@ -51,6 +49,15 @@ class MultiModalFeatures(BaseModel):
     mm_placeholders: dict[str, list[PlaceholderRangeInfo]]
     """Per-modality placeholder ranges in the token sequence."""
 
+    kwargs_data: dict[str, list[str | None]] | None = None
+    """Per-modality serialized tensor data.
+
+    Each value is a list parallel to ``mm_hashes[modality]``.  A ``str``
+    entry is a base64-encoded ``MultiModalKwargsItem``; ``None`` means
+    the item should be resolved from cache.  The entire field is
+    ``None`` for metadata-only (cache-hit) responses.
+    """
+
 
 class GenerateRequest(BaseModel):
     request_id: str = Field(
@@ -61,7 +68,7 @@ class GenerateRequest(BaseModel):
             "through out the inference process and return in response."
         ),
     )
-    token_ids: list[int]
+    token_ids: list[int] = Field(min_length=1)
     """The token ids to generate text from."""
 
     @field_validator("token_ids")
@@ -107,6 +114,39 @@ def validate_token_ids(cls, v: list[int]) -> list[int]:
         description="KVTransfer parameters used for disaggregated serving.",
     )
 
+    # Tracks which keys the caller explicitly set inside ``sampling_params``
+    # when the request was parsed from a JSON body. Lets the server tell
+    # "client said max_tokens=16" from "client said nothing → dataclass
+    # default 16" so it can apply server-side defaulting only in the latter
+    # case. ``None`` means the request was constructed with a pre-built
+    # ``SamplingParams`` instance (e.g. from internal callers that have
+    # already resolved values), in which case all fields are considered set.
+    _sampling_params_provided_keys: set[str] | None = PrivateAttr(default=None)
+
+    @model_validator(mode="wrap")
+    @classmethod
+    def _capture_sampling_params_provided_keys(cls, data: Any, handler):
+        provided: set[str] | None = None
+        if isinstance(data, dict):
+            sp = data.get("sampling_params")
+            if isinstance(sp, dict):
+                provided = set(sp.keys())
+        instance = handler(data)
+        instance._sampling_params_provided_keys = provided
+        return instance
+
+    def is_sampling_param_provided(self, name: str) -> bool:
+        """Whether the caller explicitly set ``sampling_params.<name>``.
+
+        For requests parsed from a JSON body, this reflects the raw input
+        dict. For requests constructed with a pre-built ``SamplingParams``
+        instance, all fields are considered provided so server-side defaults
+        do not clobber values already resolved upstream.
+        """
+        if self._sampling_params_provided_keys is None:
+            return True
+        return name in self._sampling_params_provided_keys
+
     def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
         return TokenizeParams(
             max_total_tokens=None,
@@ -120,6 +160,36 @@ class GenerateResponseChoice(BaseModel):
     # per OpenAI spec this is the default
     finish_reason: str | None = "stop"
     token_ids: list[int] | None = None
+    # Per-token expert routing decisions, base64-encoded ``.npy`` bytes
+    # (numpy serialization). Shape after decode:
+    #   (num_tokens - 1, num_layers, num_experts_per_tok)  dtype uint8/uint16
+    # ``num_tokens - 1`` because the last sampled token has not been
+    # forwarded yet and therefore has no routing data.
+    # Decode:
+    #   np.load(io.BytesIO(base64.b64decode(s)))
+    # ``None`` if (a) the request was aborted before any forward pass,
+    # or (b) ``enable_return_routed_experts`` is off server-side.
+    routed_experts: str | None = None
+
+
+class GenerateResponseStreamChoice(BaseModel):
+    index: int
+    logprobs: ChatCompletionLogProbs | None = None
+    finish_reason: str | None = None
+    token_ids: list[int] | None = None
+
+
+class GenerateStreamResponse(BaseModel):
+    request_id: str = Field(
+        default_factory=lambda: f"{random_uuid()}",
+        description=(
+            "The request_id related to this request. If the caller does "
+            "not set it, a random_uuid will be generated. This id is used "
+            "through out the inference process and return in response."
+        ),
+    )
+    choices: list[GenerateResponseStreamChoice]
+    usage: UsageInfo | None = Field(default=None)
 
 
 class GenerateResponse(BaseModel):
diff --git a/vllm/entrypoints/serve/disagg/serving.py b/vllm/entrypoints/serve/disagg/serving.py
index 79367622c369..0cc227ee74db 100644
--- a/vllm/entrypoints/serve/disagg/serving.py
+++ b/vllm/entrypoints/serve/disagg/serving.py
@@ -3,10 +3,14 @@
 
 
 import asyncio
+import io
 import time
 from collections.abc import AsyncGenerator
 from collections.abc import Sequence as GenericSequence
 
+import msgspec
+import numpy as np
+import pybase64 as base64
 from fastapi import Request
 
 from vllm.engine.protocol import EngineClient
@@ -18,22 +22,33 @@
 )
 from vllm.entrypoints.openai.engine.protocol import (
     ErrorResponse,
+    GenerationError,
     PromptTokenUsageInfo,
     RequestResponseMetadata,
     UsageInfo,
 )
 from vllm.entrypoints.openai.engine.serving import OpenAIServing, clamp_prompt_logprobs
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.entrypoints.serve.disagg.mm_serde import decode_mm_kwargs_item
 from vllm.entrypoints.serve.disagg.protocol import (
     GenerateRequest,
     GenerateResponse,
     GenerateResponseChoice,
+    GenerateResponseStreamChoice,
+    GenerateStreamResponse,
 )
 from vllm.entrypoints.serve.render.serving import OpenAIServingRender
+from vllm.entrypoints.utils import get_max_tokens, should_include_usage
+from vllm.inputs import EngineInput, mm_input
 from vllm.logger import init_logger
 from vllm.logprobs import Logprob
+from vllm.multimodal.inputs import (
+    MultiModalKwargsItem,
+    MultiModalKwargsItems,
+    PlaceholderRange,
+)
 from vllm.outputs import RequestOutput
-from vllm.sampling_params import SamplingParams
+from vllm.sampling_params import RequestOutputKind, SamplingParams
 from vllm.utils.collection_utils import as_list
 
 logger = init_logger(__name__)
@@ -70,11 +85,23 @@ def __init__(
                 "step for incoming requests."
             )
 
+        # Mirrors ``OpenAIServingChat`` so we can apply server-side
+        # ``max_tokens`` defaulting when the client omits it. Without this,
+        # ``SamplingParams.max_tokens`` falls back to its dataclass default
+        # of 16 and silently truncates every generation.
+        self.default_sampling_params = self.model_config.get_diff_sampling_param()
+        mc = self.model_config
+        self.override_max_tokens = (
+            self.default_sampling_params.get("max_tokens")
+            if mc.generation_config not in ("auto", "vllm")
+            else getattr(mc, "override_generation_config", {}).get("max_new_tokens")
+        )
+
     async def serve_tokens(
         self,
         request: GenerateRequest,
         raw_request: Request | None = None,
-    ) -> GenerateResponse | ErrorResponse:
+    ) -> GenerateResponse | ErrorResponse | AsyncGenerator[str, None]:
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
             logger.error("Error with model %s", error_check_ret)
@@ -99,17 +126,75 @@ async def serve_tokens(
         if raw_request:
             raw_request.state.request_metadata = request_metadata
 
-        (engine_input,) = await self.openai_serving_render.preprocess_completion(
-            request,
-            prompt_input=request.token_ids,
-            prompt_embeds=None,
-        )
+        sampling_params = request.sampling_params
+        max_num_seqs = self.engine_client.vllm_config.scheduler_config.max_num_seqs
+        if sampling_params.n > max_num_seqs:
+            return self.create_error_response(
+                f"sampling_params.n must be at most the server's max_num_seqs "
+                f"({max_num_seqs}), got {sampling_params.n}."
+            )
+        try:
+            msgspec.msgpack.encode(sampling_params)
+        except (OverflowError, TypeError, ValueError) as e:
+            return self.create_error_response(e)
+
+        engine_input: EngineInput
+        if features := request.features:
+            # Convert PlaceholderRangeInfo → PlaceholderRange per modality.
+            mm_placeholders: dict[str, list[PlaceholderRange]] = {
+                modality: [
+                    PlaceholderRange(offset=p.offset, length=p.length) for p in ranges
+                ]
+                for modality, ranges in features.mm_placeholders.items()
+            }
+
+            # Deserialize tensor data when present; None → cache hit.
+            mm_kwargs: dict[str, list[MultiModalKwargsItem | None]] = {}
+            if features.kwargs_data is not None:
+                for modality, items in features.kwargs_data.items():
+                    mm_kwargs[modality] = [
+                        decode_mm_kwargs_item(item) if item is not None else None
+                        for item in items
+                    ]
+            else:
+                for modality, hashes in features.mm_hashes.items():
+                    mm_kwargs[modality] = [None] * len(hashes)
+
+            engine_input = mm_input(
+                prompt_token_ids=request.token_ids,
+                mm_kwargs=MultiModalKwargsItems(mm_kwargs),
+                mm_hashes=features.mm_hashes,
+                mm_placeholders=mm_placeholders,
+                cache_salt=request.cache_salt,
+            )
+        else:
+            (engine_input,) = await self.openai_serving_render.preprocess_completion(
+                request,
+                prompt_input=request.token_ids,
+                prompt_embeds=None,
+                skip_mm_cache=True,
+            )
 
         # Schedule the request and get the result generator.
         result_generator: AsyncGenerator[RequestOutput, None] | None = None
-        sampling_params = request.sampling_params
+
+        # Apply server-side ``max_tokens`` defaulting when the client did
+        # not set it, matching the OpenAI-compat endpoints. ``SamplingParams``
+        # defaults ``max_tokens`` to 16, which would otherwise silently cap
+        # every generation that omits the field.
+        if not request.is_sampling_param_provided("max_tokens"):
+            sampling_params.max_tokens = get_max_tokens(
+                max_model_len=self.model_config.max_model_len,
+                max_tokens=None,
+                input_length=self._extract_prompt_len(engine_input),
+                default_sampling_params=self.default_sampling_params,
+                override_max_tokens=self.override_max_tokens,
+            )
+
         if self.force_no_detokenize:
             sampling_params.detokenize = False
+        if request.stream:
+            sampling_params.output_kind = RequestOutputKind.DELTA
 
         self._log_inputs(
             request_id,
@@ -124,6 +209,9 @@ async def serve_tokens(
             else await self._get_trace_headers(raw_request.headers)
         )
 
+        # Extract data_parallel_rank from header (router can inject it)
+        data_parallel_rank = self._get_data_parallel_rank(raw_request)
+
         result_generator = self.engine_client.generate(
             engine_input,
             sampling_params,
@@ -131,11 +219,20 @@ async def serve_tokens(
             lora_request=lora_request,
             trace_headers=trace_headers,
             priority=request.priority,
+            data_parallel_rank=data_parallel_rank,
         )
 
-        # TODO(NickLucche): Implement streaming response
-
         assert result_generator is not None
+
+        if request.stream:
+            return self.serve_tokens_stream_generator(
+                request,
+                result_generator,
+                request_id,
+                model_name,
+                request_metadata,
+            )
+
         return await self.serve_tokens_full_generator(
             request, result_generator, request_id, model_name, request_metadata
         )
@@ -177,11 +274,24 @@ async def serve_tokens_full_generator(
             else:
                 logprobs = None
 
+            # Encode routed_experts for transport. JSON can't carry raw
+            # bytes, so we write the ndarray as a ``.npy`` byte stream
+            # and base64-encode it. ``pybase64`` is ~3x faster than the
+            # stdlib ``base64`` on large payloads thanks to SIMD.
+            # This is the only base64 hop in the pipeline -- the
+            # engine<->API-server link is binary msgpack + zmq.
+            routed_experts_b64 = None
+            if output.routed_experts is not None:
+                buf = io.BytesIO()
+                np.save(buf, output.routed_experts)
+                routed_experts_b64 = base64.b64encode(buf.getvalue()).decode("ascii")
+
             choice_data = GenerateResponseChoice(
                 index=output.index,
                 logprobs=logprobs,
                 finish_reason=output.finish_reason if output.finish_reason else "stop",
                 token_ids=as_list(output.token_ids),
+                routed_experts=routed_experts_b64,
             )
 
             choices.append(choice_data)
@@ -236,6 +346,109 @@ async def serve_tokens_full_generator(
 
         return response
 
+    async def serve_tokens_stream_generator(
+        self,
+        request: GenerateRequest,
+        result_generator: AsyncGenerator[RequestOutput, None],
+        request_id: str,
+        model_name: str,
+        request_metadata: RequestResponseMetadata,
+    ) -> AsyncGenerator[str, None]:
+        num_prompt_tokens = 0
+        num_generated_tokens: list[int] = []
+        first_iteration = True
+        num_cached_tokens = None
+        sampling_params: SamplingParams = request.sampling_params
+
+        include_usage, include_continuous_usage = should_include_usage(
+            request.stream_options, False
+        )
+
+        try:
+            async for res in result_generator:
+                if first_iteration:
+                    if res.prompt_token_ids is not None:
+                        num_prompt_tokens = len(res.prompt_token_ids)
+                    if res.encoder_prompt_token_ids is not None:
+                        num_prompt_tokens += len(res.encoder_prompt_token_ids)
+                    num_cached_tokens = res.num_cached_tokens
+                    num_generated_tokens = [0] * len(res.outputs)
+                    first_iteration = False
+
+                for output in res.outputs:
+                    i = output.index
+                    delta_token_ids = output.token_ids
+                    num_generated_tokens[i] += len(delta_token_ids)
+
+                    finish_reason = output.finish_reason
+                    self._raise_if_error(finish_reason, request_id)
+
+                    if not delta_token_ids:
+                        continue
+
+                    if sampling_params.logprobs is not None:
+                        out_logprobs = output.logprobs
+                        assert out_logprobs is not None, "Did not output logprobs"
+                        logprobs = self._create_tokens_logprobs(
+                            token_ids=delta_token_ids,
+                            top_logprobs=out_logprobs,
+                            num_output_top_logprobs=sampling_params.logprobs,
+                        )
+                    else:
+                        logprobs = None
+
+                    chunk = GenerateStreamResponse(
+                        request_id=request_id,
+                        choices=[
+                            GenerateResponseStreamChoice(
+                                index=i,
+                                logprobs=logprobs,
+                                finish_reason=finish_reason,
+                                token_ids=as_list(delta_token_ids),
+                            )
+                        ],
+                    )
+                    if include_continuous_usage:
+                        chunk.usage = UsageInfo(
+                            prompt_tokens=num_prompt_tokens,
+                            completion_tokens=num_generated_tokens[i],
+                            total_tokens=(num_prompt_tokens + num_generated_tokens[i]),
+                        )
+
+                    yield f"data: {chunk.model_dump_json()}\n\n"
+
+            total_completion_tokens = sum(num_generated_tokens)
+            final_usage_info = UsageInfo(
+                prompt_tokens=num_prompt_tokens,
+                completion_tokens=total_completion_tokens,
+                total_tokens=num_prompt_tokens + total_completion_tokens,
+            )
+
+            if self.enable_prompt_tokens_details and num_cached_tokens:
+                final_usage_info.prompt_tokens_details = PromptTokenUsageInfo(
+                    cached_tokens=num_cached_tokens
+                )
+
+            if include_usage:
+                final_chunk = GenerateStreamResponse(
+                    request_id=request_id,
+                    choices=[],
+                    usage=final_usage_info,
+                )
+                yield f"data: {final_chunk.model_dump_json(exclude_none=True)}\n\n"
+
+            request_metadata.final_usage_info = final_usage_info
+
+        except GenerationError as e:
+            yield (
+                f"data: {self._convert_generation_error_to_streaming_response(e)}\n\n"
+            )
+        except Exception as e:
+            logger.exception("Error in token generation stream.")
+            data = self.create_streaming_error_response(e)
+            yield f"data: {data}\n\n"
+        yield "data: [DONE]\n\n"
+
     def _create_tokens_logprobs(
         self,
         token_ids: GenericSequence[int],
@@ -263,10 +476,12 @@ def _create_tokens_logprobs(
                         logprob=max(step_token.logprob, -9999.0),
                         top_logprobs=[
                             ChatCompletionLogProb(
-                                token=token,
-                                logprob=max(p[1].logprob, -9999.0),
+                                token=f"token_id:{token_id}",
+                                logprob=max(logprob.logprob, -9999.0),
+                            )
+                            for i, (token_id, logprob) in enumerate(
+                                step_top_logprobs.items()
                             )
-                            for i, p in enumerate(step_top_logprobs.items())
                             if num_output_top_logprobs is not None
                             and i < max(num_output_top_logprobs, 1)
                         ],
diff --git a/vllm/entrypoints/serve/lora/api_router.py b/vllm/entrypoints/serve/lora/api_router.py
index 057bf5c2e2c8..39ca0ec91b21 100644
--- a/vllm/entrypoints/serve/lora/api_router.py
+++ b/vllm/entrypoints/serve/lora/api_router.py
@@ -37,6 +37,7 @@ def attach_router(app: FastAPI):
             "lora_name": "body.name",
             "lora_path": "body.src",
             "load_inplace": "body.load_inplace || `false`",
+            "is_3d_lora_weight": "body.is_3d_lora_weight || `false`",
         },
     )
     @router.post("/v1/load_lora_adapter", dependencies=[Depends(validate_json_request)])
diff --git a/vllm/entrypoints/serve/lora/protocol.py b/vllm/entrypoints/serve/lora/protocol.py
index 3e3a30cf3f2e..e0786717d67b 100644
--- a/vllm/entrypoints/serve/lora/protocol.py
+++ b/vllm/entrypoints/serve/lora/protocol.py
@@ -8,6 +8,7 @@ class LoadLoRAAdapterRequest(BaseModel):
     lora_name: str
     lora_path: str
     load_inplace: bool = False
+    is_3d_lora_weight: bool = False
 
 
 class UnloadLoRAAdapterRequest(BaseModel):
diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py
index 52f03447dcaa..782b2eaea24b 100644
--- a/vllm/entrypoints/serve/render/serving.py
+++ b/vllm/entrypoints/serve/render/serving.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Sequence
 from http import HTTPStatus
-from typing import Any
+from typing import Any, cast
 
 from openai_harmony import Message as OpenAIMessage
 
@@ -25,6 +25,7 @@
     render_for_completion,
 )
 from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.entrypoints.serve.disagg.mm_serde import encode_mm_kwargs_item
 from vllm.entrypoints.serve.disagg.protocol import (
     GenerateRequest,
     MultiModalFeatures,
@@ -37,6 +38,7 @@
 from vllm.inputs import (
     EngineInput,
     MultiModalHashes,
+    MultiModalInput,
     MultiModalPlaceholders,
     PromptType,
     SingletonPrompt,
@@ -44,6 +46,7 @@
 )
 from vllm.logger import init_logger
 from vllm.parser import ParserManager
+from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
 from vllm.renderers import BaseRenderer, merge_kwargs
 from vllm.renderers.inputs.preprocess import (
     extract_prompt_components,
@@ -53,7 +56,7 @@
 )
 from vllm.tool_parsers import ToolParser
 from vllm.utils import random_uuid
-from vllm.utils.mistral import is_mistral_tokenizer
+from vllm.utils.mistral import is_mistral_tokenizer, is_mistral_tool_parser
 from vllm.utils.mistral import mt as _mt
 
 logger = init_logger(__name__)
@@ -64,7 +67,6 @@ def __init__(
         self,
         model_config: ModelConfig,
         renderer: BaseRenderer,
-        io_processor: Any,
         model_registry: OpenAIModelRegistry,
         *,
         request_logger: RequestLogger | None,
@@ -74,12 +76,12 @@ def __init__(
         enable_auto_tools: bool = False,
         exclude_tools_when_tool_choice_none: bool = False,
         tool_parser: str | None = None,
+        reasoning_parser: str | None = None,
         default_chat_template_kwargs: dict[str, Any] | None = None,
         log_error_stack: bool = False,
     ) -> None:
         self.model_config = model_config
         self.renderer = renderer
-        self.io_processor = io_processor
         self.model_registry = model_registry
         self.request_logger = request_logger
         self.chat_template = chat_template
@@ -94,6 +96,11 @@ def __init__(
             enable_auto_tools=enable_auto_tools,
             model_name=model_config.model,
         )
+        self.reasoning_parser: type[ReasoningParser] | None = (
+            ParserManager.get_reasoning_parser(
+                reasoning_parser_name=reasoning_parser,
+            )
+        )
         self.default_chat_template_kwargs: dict[str, Any] = (
             default_chat_template_kwargs or {}
         )
@@ -129,7 +136,7 @@ async def render_chat_request(
                 "Beam search is not supported by the render endpoint"
             )
 
-        result = await self.render_chat(request)
+        result = await self.render_chat(request, skip_mm_cache=True)
         if isinstance(result, ErrorResponse):
             return result
 
@@ -157,6 +164,7 @@ async def render_chat_request(
             input_length,
             self.default_sampling_params,
             self.override_max_tokens,
+            truncate_prompt_tokens=request.truncate_prompt_tokens,
         )
         params = request.to_sampling_params(max_tokens, self.default_sampling_params)
 
@@ -177,6 +185,8 @@ async def render_chat_request(
     async def render_chat(
         self,
         request: ChatCompletionRequest,
+        *,
+        skip_mm_cache: bool = False,
     ) -> tuple[list[ConversationMessage], list[EngineInput]] | ErrorResponse:
         """Core preprocessing logic for chat requests (no model/engine check).
 
@@ -245,6 +255,8 @@ async def render_chat(
                 default_template_kwargs=self.default_chat_template_kwargs,
                 tool_dicts=tool_dicts,
                 tool_parser=tool_parser,
+                skip_mm_cache=skip_mm_cache,
+                reasoning_parser=self.reasoning_parser,
             )
         else:
             # For GPT-OSS.
@@ -267,7 +279,7 @@ async def render_completion_request(
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
             return error_check_ret
-        result = await self.render_completion(request)
+        result = await self.render_completion(request, skip_mm_cache=True)
         if isinstance(result, ErrorResponse):
             return result
         generate_requests: list[GenerateRequest] = []
@@ -287,6 +299,7 @@ async def render_completion_request(
                 input_length,
                 self.default_sampling_params,
                 self.override_max_tokens,
+                truncate_prompt_tokens=request.truncate_prompt_tokens,
             )
             params = request.to_sampling_params(
                 max_tokens, self.default_sampling_params
@@ -313,6 +326,8 @@ async def render_completion_request(
     async def render_completion(
         self,
         request: CompletionRequest,
+        *,
+        skip_mm_cache: bool = False,
     ) -> list[EngineInput] | ErrorResponse:
         """Core preprocessing logic for completion requests (no model/engine check).
 
@@ -335,6 +350,7 @@ async def render_completion(
             request,
             prompt_input=request.prompt,
             prompt_embeds=request.prompt_embeds,
+            skip_mm_cache=skip_mm_cache,
         )
 
         return engine_inputs
@@ -350,9 +366,10 @@ def _extract_mm_features(
         if engine_input.get("type") != "multimodal":
             return None
 
-        # At this point engine_input is a MultiModalInputs TypedDict.
-        mm_hashes: MultiModalHashes = engine_input["mm_hashes"]  # type: ignore[typeddict-item]
-        raw_placeholders: MultiModalPlaceholders = engine_input["mm_placeholders"]  # type: ignore[typeddict-item]
+        # At this point engine_input is a MultiModalInput TypedDict.
+        mm_engine_input = cast(MultiModalInput, engine_input)
+        mm_hashes: MultiModalHashes = mm_engine_input["mm_hashes"]
+        raw_placeholders: MultiModalPlaceholders = mm_engine_input["mm_placeholders"]
 
         mm_placeholders = {
             modality: [
@@ -361,9 +378,20 @@ def _extract_mm_features(
             for modality, ranges in raw_placeholders.items()
         }
 
+        # Serialize tensor data per modality.
+        kwargs_data: dict[str, list[str | None]] | None = None
+        if raw_mm_kwargs := mm_engine_input.get("mm_kwargs"):
+            kwargs_data = {}
+            for modality, items in raw_mm_kwargs.items():
+                kwargs_data[modality] = [
+                    encode_mm_kwargs_item(item) if item is not None else None
+                    for item in items
+                ]
+
         return MultiModalFeatures(
             mm_hashes=mm_hashes,
             mm_placeholders=mm_placeholders,
+            kwargs_data=kwargs_data,
         )
 
     def _make_request_with_harmony(
@@ -451,6 +479,8 @@ async def preprocess_completion(
         request: Any,
         prompt_input: str | list[str] | list[int] | list[list[int]] | None,
         prompt_embeds: bytes | list[bytes] | None,
+        *,
+        skip_mm_cache: bool = False,
     ) -> list[EngineInput]:
         """Copied from OpenAIServing._preprocess_completion."""
         prompts = list[SingletonPrompt | bytes]()
@@ -458,12 +488,14 @@ async def preprocess_completion(
             prompts.extend(prompt_to_seq(prompt_embeds))
         if prompt_input is not None:
             prompts.extend(prompt_to_seq(prompt_input))
-        return await self.preprocess_cmpl(request, prompts)
+        return await self.preprocess_cmpl(request, prompts, skip_mm_cache=skip_mm_cache)
 
     async def preprocess_cmpl(
         self,
         request: Any,
         prompts: Sequence[PromptType | bytes],
+        *,
+        skip_mm_cache: bool = False,
     ) -> list[EngineInput]:
         """Copied from OpenAIServing._preprocess_cmpl."""
         renderer = self.renderer
@@ -487,6 +519,7 @@ async def preprocess_cmpl(
                 for k in ("mm_processor_kwargs", "cache_salt")
                 if (v := getattr(request, k, None)) is not None
             },
+            skip_mm_cache=skip_mm_cache,
         )
 
     async def preprocess_chat(
@@ -498,6 +531,9 @@ async def preprocess_chat(
         default_template_kwargs: dict[str, Any] | None,
         tool_dicts: list[dict[str, Any]] | None = None,
         tool_parser: type[ToolParser] | None = None,
+        reasoning_parser: type[ReasoningParser] | None = None,
+        *,
+        skip_mm_cache: bool = False,
     ) -> tuple[list[ConversationMessage], list[EngineInput]]:
         """Copied from OpenAIServing._preprocess_chat."""
         renderer = self.renderer
@@ -507,7 +543,10 @@ async def preprocess_chat(
             default_template_kwargs,
             dict(
                 tools=tool_dicts,
-                tokenize=is_mistral_tokenizer(renderer.tokenizer),
+                tokenize=(
+                    is_mistral_tokenizer(renderer.tokenizer)
+                    or self.model_config.enable_prompt_embeds
+                ),
             ),
         )
 
@@ -529,14 +568,33 @@ async def preprocess_chat(
                 for k in ("mm_processor_kwargs", "cache_salt")
                 if (v := getattr(request, k, None)) is not None
             },
+            skip_mm_cache=skip_mm_cache,
         )
 
+        if reasoning_parser is not None:
+            tokenizer = renderer.get_tokenizer()
+            request = reasoning_parser(
+                tokenizer,
+                model_config=self.model_config,
+                chat_template_kwargs=chat_params.chat_template_kwargs,
+            ).adjust_request(request=request)
+
         # tool parsing is done only if a tool_parser has been set and if
         # tool_choice is not "none" (if tool_choice is "none" but a tool_parser
         # is set, we want to prevent parsing a tool_call hallucinated by the LLM
+        #
+        # Exception: Mistral grammar-capable tokenizers always call
+        # adjust_request — even for tool_choice="none" — so that the grammar
+        # factory can prevent special-token leakage.
         if tool_parser is not None:
             tool_choice = getattr(request, "tool_choice", "none")
-            if tool_choice != "none":
+            tokenizer = renderer.get_tokenizer()
+            is_mistral_grammar_eligible = (
+                is_mistral_tool_parser(tool_parser)
+                and is_mistral_tokenizer(tokenizer)
+                and tokenizer.supports_grammar
+            )
+            if tool_choice != "none" or is_mistral_grammar_eligible:
                 if not isinstance(request, ChatCompletionRequest | ResponsesRequest):
                     msg = (
                         "Tool usage is only supported "
@@ -544,9 +602,8 @@ async def preprocess_chat(
                         f"but got {type(request).__name__}"
                     )
                     raise NotImplementedError(msg)
-                tokenizer = renderer.get_tokenizer()
                 request = tool_parser(tokenizer, request.tools).adjust_request(
-                    request=request  # type: ignore[arg-type]
+                    request=request
                 )
 
         return conversation, [engine_input]
diff --git a/vllm/entrypoints/serve/rlhf/api_router.py b/vllm/entrypoints/serve/rlhf/api_router.py
index 64a1dd20fdc7..dcae3889dc7c 100644
--- a/vllm/entrypoints/serve/rlhf/api_router.py
+++ b/vllm/entrypoints/serve/rlhf/api_router.py
@@ -128,6 +128,19 @@ async def init_weight_transfer_engine(raw_request: Request):
     return JSONResponse(content={"message": "Weight transfer initialized"})
 
 
+@router.post("/start_weight_update")
+async def start_weight_update(raw_request: Request):
+    try:
+        body = await raw_request.json()
+    except json.JSONDecodeError as e:
+        raise HTTPException(status_code=400, detail="Invalid JSON format") from e  # noqa: B904
+    is_checkpoint_format = body.get("is_checkpoint_format", True)
+    await engine_client(raw_request).start_weight_update(
+        is_checkpoint_format=is_checkpoint_format
+    )
+    return JSONResponse(content={"message": "Weight update started"})
+
+
 @router.post("/update_weights")
 async def update_weights(raw_request: Request):
     try:
@@ -146,6 +159,12 @@ async def update_weights(raw_request: Request):
     return JSONResponse(content={"message": "Weights updated"})
 
 
+@router.post("/finish_weight_update")
+async def finish_weight_update(raw_request: Request):
+    await engine_client(raw_request).finish_weight_update()
+    return JSONResponse(content={"message": "Weight update finished"})
+
+
 @router.get("/get_world_size")
 async def get_world_size(
     raw_request: Request,
diff --git a/vllm/entrypoints/serve/tokenize/serving.py b/vllm/entrypoints/serve/tokenize/serving.py
index 22b852d2778e..9b573b69eb83 100644
--- a/vllm/entrypoints/serve/tokenize/serving.py
+++ b/vllm/entrypoints/serve/tokenize/serving.py
@@ -86,12 +86,14 @@ async def create_tokenize(
                 default_template_content_format=self.chat_template_content_format,
                 default_template_kwargs=self.default_chat_template_kwargs,
                 tool_dicts=tool_dicts,
+                skip_mm_cache=True,
             )
         else:
             engine_inputs = await self.openai_serving_render.preprocess_completion(
                 request,
                 prompt_input=request.prompt,
                 prompt_embeds=None,
+                skip_mm_cache=True,
             )
 
         input_ids: list[int] = []
diff --git a/vllm/entrypoints/speech_to_text/__init__.py b/vllm/entrypoints/speech_to_text/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/entrypoints/speech_to_text/base/__init__.py b/vllm/entrypoints/speech_to_text/base/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/speech_to_text/base/protocol.py b/vllm/entrypoints/speech_to_text/base/protocol.py
new file mode 100644
index 000000000000..e8cb61a41472
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/base/protocol.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from typing import Literal, TypeAlias
+
+import torch
+
+## Protocols for Audio
+AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
+_LONG_INFO = torch.iinfo(torch.long)
diff --git a/vllm/entrypoints/openai/speech_to_text/speech_to_text.py b/vllm/entrypoints/speech_to_text/base/serving.py
similarity index 81%
rename from vllm/entrypoints/openai/speech_to_text/speech_to_text.py
rename to vllm/entrypoints/speech_to_text/base/serving.py
index e0a3cf0dc0da..a0f02a2c7830 100644
--- a/vllm/entrypoints/openai/speech_to_text/speech_to_text.py
+++ b/vllm/entrypoints/speech_to_text/base/serving.py
@@ -5,7 +5,7 @@
 import math
 import time
 import zlib
-from collections.abc import AsyncGenerator, Callable
+from collections.abc import AsyncGenerator, Callable, Set
 from functools import cached_property
 from typing import Final, Literal, TypeAlias, TypeVar, cast
 
@@ -24,18 +24,6 @@
 )
 from vllm.entrypoints.openai.engine.serving import OpenAIServing, SpeechToTextRequest
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.openai.speech_to_text.protocol import (
-    TranscriptionResponse,
-    TranscriptionResponseStreamChoice,
-    TranscriptionResponseVerbose,
-    TranscriptionSegment,
-    TranscriptionStreamResponse,
-    TranslationResponse,
-    TranslationResponseStreamChoice,
-    TranslationResponseVerbose,
-    TranslationSegment,
-    TranslationStreamResponse,
-)
 from vllm.entrypoints.utils import get_max_tokens
 from vllm.exceptions import VLLMValidationError
 from vllm.inputs import EncoderDecoderInput, EngineInput
@@ -49,6 +37,22 @@
 from vllm.renderers.inputs.preprocess import parse_enc_dec_prompt, parse_model_prompt
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.tokenizers import get_tokenizer
+from vllm.utils.async_utils import merge_async_iterators
+
+from ..transcription.protocol import (
+    TranscriptionResponse,
+    TranscriptionResponseStreamChoice,
+    TranscriptionResponseVerbose,
+    TranscriptionSegment,
+    TranscriptionStreamResponse,
+)
+from ..translation.protocol import (
+    TranslationResponse,
+    TranslationResponseStreamChoice,
+    TranslationResponseVerbose,
+    TranslationSegment,
+    TranslationStreamResponse,
+)
 
 SpeechToTextResponse: TypeAlias = TranscriptionResponse | TranslationResponse
 SpeechToTextResponseVerbose: TypeAlias = (
@@ -69,6 +73,17 @@
 logger = init_logger(__name__)
 
 
+def asr_inter_chunk_separator(
+    language: str | None, no_space_languages: Set[str]
+) -> str:
+    """Space to insert between ASR text chunks for streaming and non-streaming join.
+
+    Languages in ``no_space_languages`` (e.g. Chinese, Japanese) use an empty
+    separator; others use a single ASCII space.
+    """
+    return "" if language and language.lower() in no_space_languages else " "
+
+
 class OpenAISpeechToText(OpenAIServing):
     """Base class for speech-to-text operations like transcription and
     translation."""
@@ -152,10 +167,17 @@ async def _detect_language(
             request_id,
         )
 
-        final_output: RequestOutput
-        async for final_output in result_generator:
-            if final_output.finished:
-                break
+        try:
+            final_output: RequestOutput
+            async for final_output in result_generator:
+                if final_output.finished:
+                    break
+        except asyncio.CancelledError:
+            await asyncio.gather(
+                self.engine_client.abort(request_id),
+                return_exceptions=True,
+            )
+            raise
 
         token_ids = list(final_output.outputs[0].token_ids)
         lang = self.model_cls.parse_language_detection_output(
@@ -173,9 +195,8 @@ async def _preprocess_speech_to_text(
         request_id: str,
     ) -> tuple[list[EngineInput], float]:
         # Validate request
-        language = self.model_cls.validate_language(request.language)
-        # Skip to_language validation to avoid extra logging for Whisper.
-        to_language = (
+        request.language = self.model_cls.validate_language(request.language)
+        request.to_language = (
             self.model_cls.validate_language(request.to_language)
             if request.to_language
             else None
@@ -218,28 +239,23 @@ async def _preprocess_speech_to_text(
                 min_energy_window_size=self.asr_config.min_energy_split_window_size,
             )
 
-        if language is None and getattr(
+        if request.language is None and getattr(
             self.model_cls, "supports_explicit_language_detection", False
         ):
             # Auto-detect language from the first chunk.
-            language = await self._detect_language(
+            request.language = await self._detect_language(
                 chunks[0], f"{request_id}-lang_detect"
             )
-            request.language = language
 
         parsed_prompts: list[DictPrompt] = []
         for chunk in chunks:
-            # The model has control over the construction, as long as it
-            # returns a valid PromptType.
-            prompt = self.model_cls.get_generation_prompt(
+            stt_params = request.build_stt_params(
                 audio=chunk,
                 stt_config=self.asr_config,
                 model_config=self.model_config,
-                language=language,
                 task_type=self.task_type,
-                request_prompt=request.prompt,
-                to_language=to_language,
             )
+            prompt = self.model_cls.get_generation_prompt(stt_params)
 
             parsed_prompt: DictPrompt
             if request.response_format == "verbose_json":
@@ -378,6 +394,9 @@ async def _create_speech_to_text(
         if error_check_ret is not None:
             return error_check_ret
 
+        if not request.model:
+            request.model = self.models.model_name()
+
         # If the engine is dead, raise the engine's DEAD_ERROR.
         # This is required for the streaming case, where we return a
         # success status before we actually start generating text :).
@@ -450,85 +469,115 @@ async def _create_speech_to_text(
         if request.response_format == "verbose_json":
             sampling_params.logprobs = 1
 
+        engine_request_ids = [
+            request_id if len(engine_inputs) == 1 else f"{request_id}-{idx}"
+            for idx in range(len(engine_inputs))
+        ]
         list_result_generator = []
-        for i, engine_input in enumerate(engine_inputs):
-            request_id_item = f"{request_id}_{i}"
-
-            self._log_inputs(
-                request_id_item,
-                engine_input,
-                params=sampling_params,
-                lora_request=lora_request,
-            )
-
-            trace_headers = (
-                None
-                if raw_request is None
-                else await self._get_trace_headers(raw_request.headers)
-            )
-
-            if isinstance(sampling_params, BeamSearchParams):
-                generator = self.beam_search(
-                    prompt=engine_input,
+        try:
+            for request_id_item, engine_input in zip(engine_request_ids, engine_inputs):
+                self._log_inputs(
+                    request_id_item,
+                    engine_input,
                     params=sampling_params,
-                    request_id=request_id_item,
                     lora_request=lora_request,
-                    trace_headers=trace_headers,
                 )
-            else:
-                generator = self.engine_client.generate(
-                    engine_input,
-                    sampling_params,
-                    request_id_item,
-                    lora_request=lora_request,
-                    trace_headers=trace_headers,
+
+                trace_headers = (
+                    None
+                    if raw_request is None
+                    else await self._get_trace_headers(raw_request.headers)
                 )
 
-            list_result_generator.append(generator)
+                if isinstance(sampling_params, BeamSearchParams):
+                    generator = self.beam_search(
+                        prompt=engine_input,
+                        params=sampling_params,
+                        request_id=request_id_item,
+                        lora_request=lora_request,
+                        trace_headers=trace_headers,
+                    )
+                else:
+                    generator = self.engine_client.generate(
+                        engine_input,
+                        sampling_params,
+                        request_id_item,
+                        lora_request=lora_request,
+                        trace_headers=trace_headers,
+                    )
+
+                list_result_generator.append(generator)
+        except asyncio.CancelledError:
+            logger.info(
+                "Request %s cancelled; aborting %d transcription engine request(s).",
+                request_id,
+                len(engine_request_ids),
+            )
+            await asyncio.gather(
+                self.engine_client.abort(engine_request_ids),
+                return_exceptions=True,
+            )
+            raise
+
+        separator = asr_inter_chunk_separator(
+            request.language, self.model_cls.no_space_languages
+        )
 
         if request.stream:
             return stream_generator_method(
-                request, list_result_generator, request_id, request_metadata, duration_s
+                request,
+                list_result_generator,
+                request_id,
+                request_metadata,
+                duration_s,
+                separator,
             )
         # Non-streaming response.
-        total_segments = []
-        text_parts = []
         try:
             assert list_result_generator is not None
+            chunk_segment_parts: list[list[SpeechToTextSegment]] = [
+                [] for _ in list_result_generator
+            ]
+            chunk_text_parts: list[list[str]] = [[] for _ in list_result_generator]
             segments_types: dict[str, type[SpeechToTextSegment]] = {
                 "transcribe": TranscriptionSegment,
                 "translate": TranslationSegment,
             }
             segment_class: type[SpeechToTextSegment] = segments_types[self.task_type]
-            text = ""
             chunk_size_in_s = self.asr_config.max_audio_clip_s
             if chunk_size_in_s is None:
                 assert len(list_result_generator) == 1, (
                     "`max_audio_clip_s` is set to None, audio cannot be chunked"
                 )
-            for idx, result_generator in enumerate(list_result_generator):
+            result_generator = merge_async_iterators(*list_result_generator)
+            async for idx, op in result_generator:
                 start_time = (
                     float(idx * chunk_size_in_s) if chunk_size_in_s is not None else 0.0
                 )
-                async for op in result_generator:
-                    if request.response_format == "verbose_json":
-                        assert op.outputs[0].logprobs
-                        segments: list[SpeechToTextSegment] = (
-                            self._get_verbose_segments(
-                                tokens=tuple(op.outputs[0].token_ids),
-                                segment_class=segment_class,
-                                request=request,
-                                start_time=start_time,
-                                log_probs=op.outputs[0].logprobs,
-                            )
-                        )
+                if request.response_format == "verbose_json":
+                    assert op.outputs[0].logprobs
+                    segments: list[SpeechToTextSegment] = self._get_verbose_segments(
+                        tokens=tuple(op.outputs[0].token_ids),
+                        segment_class=segment_class,
+                        request=request,
+                        start_time=start_time,
+                        log_probs=op.outputs[0].logprobs,
+                    )
 
-                        total_segments.extend(segments)
-                        text_parts.extend([seg.text for seg in segments])
-                    else:
-                        raw_text = op.outputs[0].text
-                        text_parts.append(self.model_cls.post_process_output(raw_text))
-            text = "".join(text_parts)
+                    chunk_segment_parts[idx].extend(segments)
+                    chunk_text_parts[idx].extend([seg.text for seg in segments])
+                else:
+                    raw_text = op.outputs[0].text
+                    chunk_text_parts[idx].append(
+                        self.model_cls.post_process_output(raw_text)
+                    )
+            total_segments = [
+                segment
+                for segment_parts in chunk_segment_parts
+                for segment in segment_parts
+            ]
+            text_parts = [text for text_part in chunk_text_parts for text in text_part]
+            text = separator.join(text_parts)
             if self.task_type == "transcribe":
                 final_response: ResponseType
                 # add usage in TranscriptionResponse.
@@ -567,7 +616,16 @@ async def _create_speech_to_text(
                     )
             return final_response
         except asyncio.CancelledError:
-            return self.create_error_response("Client disconnected")
+            logger.info(
+                "Request %s cancelled; aborting %d transcription engine request(s).",
+                request_id,
+                len(engine_request_ids),
+            )
+            await asyncio.gather(
+                self.engine_client.abort(engine_request_ids),
+                return_exceptions=True,
+            )
+            raise
 
     async def _speech_to_text_stream_generator(
         self,
@@ -581,6 +639,7 @@ async def _speech_to_text_stream_generator(
         | type[TranslationResponseStreamChoice],
         stream_response_class: type[TranscriptionStreamResponse]
         | type[TranslationStreamResponse],
+        separator: str,
     ) -> AsyncGenerator[str, None]:
         created_time = int(time.time())
         model_name = request.model
@@ -597,6 +656,7 @@ async def _speech_to_text_stream_generator(
 
         try:
             for result_generator in list_result_generator:
+                beginning_of_chunk = True
                 async for res in result_generator:
                     # On first result.
                     if res.prompt_token_ids is not None:
@@ -614,6 +674,14 @@ async def _speech_to_text_stream_generator(
                     assert len(res.outputs) == 1
                     output = res.outputs[0]
 
+                    # dont add separator to the first chunk
+                    if (
+                        result_generator is not list_result_generator[0]
+                        and beginning_of_chunk
+                    ):
+                        output.text = separator + output.text
+                        beginning_of_chunk = False
+
                     # TODO: For models that output structured formats (e.g.,
                     # Qwen3-ASR with "language X<asr_text>" prefix), streaming
                     # would need buffering to strip the prefix properly since
diff --git a/vllm/entrypoints/speech_to_text/factories.py b/vllm/entrypoints/speech_to_text/factories.py
new file mode 100644
index 000000000000..3625f6d2a8d5
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/factories.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from typing import TYPE_CHECKING
+
+from fastapi import FastAPI
+
+if TYPE_CHECKING:
+    from argparse import Namespace
+
+    from starlette.datastructures import State
+
+    from vllm.engine.protocol import EngineClient
+    from vllm.entrypoints.logger import RequestLogger
+    from vllm.tasks import SupportedTask
+else:
+    RequestLogger = object
+
+
+def register_speech_to_text_api_routers(
+    app: FastAPI,
+    supported_tasks: tuple["SupportedTask", ...],
+):
+    if "realtime" in supported_tasks:
+        from .realtime.api_router import router as realtime_router
+
+        app.include_router(realtime_router)
+
+    if "transcription" in supported_tasks:
+        from .transcription.api_router import router as transcription_router
+
+        app.include_router(transcription_router)
+
+        from .translation.api_router import router as translation_router
+
+        app.include_router(translation_router)
+
+
+def add_websocket_metrics_middleware(app: FastAPI):
+    from .realtime.metrics import WebSocketMetricsMiddleware
+
+    app.add_middleware(WebSocketMetricsMiddleware)
+
+
+def init_speech_to_text_state(
+    engine_client: "EngineClient",
+    state: "State",
+    args: "Namespace",
+    request_logger: RequestLogger | None,
+    supported_tasks: tuple["SupportedTask", ...],
+):
+    if "transcription" in supported_tasks:
+        from .transcription.serving import OpenAIServingTranscription
+
+        state.openai_serving_transcription = OpenAIServingTranscription(
+            engine_client,
+            state.openai_serving_models,
+            request_logger=request_logger,
+            enable_force_include_usage=args.enable_force_include_usage,
+        )
+
+        from .translation.serving import OpenAIServingTranslation
+
+        state.openai_serving_translation = OpenAIServingTranslation(
+            engine_client,
+            state.openai_serving_models,
+            request_logger=request_logger,
+            enable_force_include_usage=args.enable_force_include_usage,
+        )
+
+    if "realtime" in supported_tasks:
+        from .realtime.serving import OpenAIServingRealtime
+
+        state.openai_serving_realtime = OpenAIServingRealtime(
+            engine_client,
+            state.openai_serving_models,
+            request_logger=request_logger,
+        )
diff --git a/vllm/entrypoints/speech_to_text/realtime/__init__.py b/vllm/entrypoints/speech_to_text/realtime/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/realtime/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/entrypoints/openai/realtime/api_router.py b/vllm/entrypoints/speech_to_text/realtime/api_router.py
similarity index 51%
rename from vllm/entrypoints/openai/realtime/api_router.py
rename to vllm/entrypoints/speech_to_text/realtime/api_router.py
index c48191d14cd4..2529b28633eb 100644
--- a/vllm/entrypoints/openai/realtime/api_router.py
+++ b/vllm/entrypoints/speech_to_text/realtime/api_router.py
@@ -1,26 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from typing import TYPE_CHECKING
 
-from fastapi import APIRouter, FastAPI, WebSocket
+from fastapi import APIRouter, WebSocket
 
-from vllm.entrypoints.openai.realtime.connection import RealtimeConnection
-from vllm.entrypoints.openai.realtime.serving import OpenAIServingRealtime
 from vllm.logger import init_logger
 
-logger = init_logger(__name__)
-
-if TYPE_CHECKING:
-    from argparse import Namespace
+from .connection import RealtimeConnection
 
-    from starlette.datastructures import State
+logger = init_logger(__name__)
 
-    from vllm.engine.protocol import EngineClient
-    from vllm.entrypoints.logger import RequestLogger
-    from vllm.tasks import SupportedTask
-else:
-    RequestLogger = object
 
 router = APIRouter()
 
@@ -48,27 +37,3 @@ async def realtime_endpoint(websocket: WebSocket):
 
     connection = RealtimeConnection(websocket, serving)
     await connection.handle_connection()
-
-
-def attach_router(app: FastAPI):
-    """Attach the realtime router to the FastAPI app."""
-    app.include_router(router)
-    logger.info("Realtime API router attached")
-
-
-def init_realtime_state(
-    engine_client: "EngineClient",
-    state: "State",
-    args: "Namespace",
-    request_logger: RequestLogger | None,
-    supported_tasks: tuple["SupportedTask", ...],
-):
-    state.openai_serving_realtime = (
-        OpenAIServingRealtime(
-            engine_client,
-            state.openai_serving_models,
-            request_logger=request_logger,
-        )
-        if "realtime" in supported_tasks
-        else None
-    )
diff --git a/vllm/entrypoints/openai/realtime/connection.py b/vllm/entrypoints/speech_to_text/realtime/connection.py
similarity index 98%
rename from vllm/entrypoints/openai/realtime/connection.py
rename to vllm/entrypoints/speech_to_text/realtime/connection.py
index 58af329054e1..c7d1af92990e 100644
--- a/vllm/entrypoints/openai/realtime/connection.py
+++ b/vllm/entrypoints/speech_to_text/realtime/connection.py
@@ -14,7 +14,10 @@
 
 from vllm import envs
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse, UsageInfo
-from vllm.entrypoints.openai.realtime.protocol import (
+from vllm.exceptions import VLLMValidationError
+from vllm.logger import init_logger
+
+from .protocol import (
     ErrorEvent,
     InputAudioBufferAppend,
     InputAudioBufferCommit,
@@ -22,9 +25,7 @@
     TranscriptionDelta,
     TranscriptionDone,
 )
-from vllm.entrypoints.openai.realtime.serving import OpenAIServingRealtime
-from vllm.exceptions import VLLMValidationError
-from vllm.logger import init_logger
+from .serving import OpenAIServingRealtime
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/realtime/metrics.py b/vllm/entrypoints/speech_to_text/realtime/metrics.py
similarity index 100%
rename from vllm/entrypoints/openai/realtime/metrics.py
rename to vllm/entrypoints/speech_to_text/realtime/metrics.py
diff --git a/vllm/entrypoints/openai/realtime/protocol.py b/vllm/entrypoints/speech_to_text/realtime/protocol.py
similarity index 100%
rename from vllm/entrypoints/openai/realtime/protocol.py
rename to vllm/entrypoints/speech_to_text/realtime/protocol.py
diff --git a/vllm/entrypoints/openai/realtime/serving.py b/vllm/entrypoints/speech_to_text/realtime/serving.py
similarity index 100%
rename from vllm/entrypoints/openai/realtime/serving.py
rename to vllm/entrypoints/speech_to_text/realtime/serving.py
diff --git a/vllm/entrypoints/speech_to_text/transcription/__init__.py b/vllm/entrypoints/speech_to_text/transcription/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/speech_to_text/transcription/api_router.py b/vllm/entrypoints/speech_to_text/transcription/api_router.py
new file mode 100644
index 000000000000..c4de6810ca66
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/transcription/api_router.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from http import HTTPStatus
+from typing import Annotated
+
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.utils import (
+    load_aware_call,
+    with_cancellation,
+)
+from vllm.logger import init_logger
+
+from .protocol import TranscriptionRequest, TranscriptionResponseVariant
+from .serving import OpenAIServingTranscription
+
+logger = init_logger(__name__)
+
+router = APIRouter()
+
+
+def transcription(request: Request) -> OpenAIServingTranscription:
+    return request.app.state.openai_serving_transcription
+
+
+@router.post(
+    "/v1/audio/transcriptions",
+    responses={
+        HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
+        HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
+        HTTPStatus.UNPROCESSABLE_ENTITY.value: {"model": ErrorResponse},
+        HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
+    },
+)
+@with_cancellation
+@load_aware_call
+async def create_transcriptions(
+    raw_request: Request, request: Annotated[TranscriptionRequest, Form()]
+):
+    handler = transcription(raw_request)
+    if handler is None:
+        raise NotImplementedError("The model does not support Transcriptions API")
+
+    audio_data = await request.file.read()
+
+    generator = await handler.create_transcription(audio_data, request, raw_request)
+
+    if isinstance(generator, ErrorResponse):
+        return JSONResponse(
+            content=generator.model_dump(), status_code=generator.error.code
+        )
+
+    elif isinstance(generator, TranscriptionResponseVariant):
+        return JSONResponse(content=generator.model_dump())
+
+    return StreamingResponse(content=generator, media_type="text/event-stream")
diff --git a/vllm/entrypoints/openai/speech_to_text/protocol.py b/vllm/entrypoints/speech_to_text/transcription/protocol.py
similarity index 58%
rename from vllm/entrypoints/openai/speech_to_text/protocol.py
rename to vllm/entrypoints/speech_to_text/transcription/protocol.py
index a8d978e33eb2..abf1a11a0eea 100644
--- a/vllm/entrypoints/openai/speech_to_text/protocol.py
+++ b/vllm/entrypoints/speech_to_text/transcription/protocol.py
@@ -1,17 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import json
 import time
 from http import HTTPStatus
-from typing import Literal, TypeAlias
+from typing import TYPE_CHECKING, Literal, TypeAlias
 
-import torch
 from fastapi import HTTPException, UploadFile
 from pydantic import (
     Field,
     model_validator,
 )
 
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
     OpenAIBaseModel,
@@ -26,8 +27,14 @@
 )
 from vllm.utils import random_uuid
 
+from ..base.protocol import _LONG_INFO, AudioResponseFormat
+
+if TYPE_CHECKING:
+    import numpy as np
+
+    from vllm.config import ModelConfig, SpeechToTextConfig
+
 logger = init_logger(__name__)
-_LONG_INFO = torch.iinfo(torch.long)
 
 
 class TranscriptionResponseStreamChoice(OpenAIBaseModel):
@@ -45,10 +52,6 @@ class TranscriptionStreamResponse(OpenAIBaseModel):
     usage: UsageInfo | None = Field(default=None)
 
 
-## Protocols for Audio
-AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
-
-
 class TranscriptionRequest(OpenAIBaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/audio/createTranscription
@@ -71,6 +74,12 @@ class TranscriptionRequest(OpenAIBaseModel):
     will improve accuracy and latency.
     """
 
+    hotwords: str | None = None
+    """
+    hotwords refers to a list of important words or phrases that the model
+    should pay extra attention to during transcription.
+    """
+
     prompt: str = Field(default="")
     """An optional text to guide the model's style or continue a previous audio
     segment.
@@ -183,6 +192,24 @@ class TranscriptionRequest(OpenAIBaseModel):
         "min_p": 0.0,
     }
 
+    def build_stt_params(
+        self,
+        audio: "np.ndarray",
+        stt_config: "SpeechToTextConfig",
+        model_config: "ModelConfig",
+        task_type: str,
+    ) -> SpeechToTextParams:
+        return SpeechToTextParams(
+            audio=audio,
+            stt_config=stt_config,
+            model_config=model_config,
+            language=self.language,
+            task_type=task_type,
+            request_prompt=self.prompt,
+            to_language=self.to_language,
+            hotwords=self.hotwords,
+        )
+
     def to_beam_search_params(
         self,
         default_max_tokens: int,
@@ -277,6 +304,17 @@ def validate_transcription_request(cls, data):
                 parameter=invalid_param,
             )
 
+        # Parse vllm_xargs from JSON string (form data sends it as a string)
+        xargs = data.get("vllm_xargs")
+        if isinstance(xargs, str):
+            try:
+                data["vllm_xargs"] = json.loads(xargs)
+            except json.JSONDecodeError as e:
+                raise VLLMValidationError(
+                    f"Failed to parse vllm_xargs. Must be valid JSON: {e}",
+                    parameter="vllm_xargs",
+                ) from e
+
         return data
 
 
@@ -365,252 +403,3 @@ class TranscriptionResponseVerbose(OpenAIBaseModel):
 TranscriptionResponseVariant: TypeAlias = (
     TranscriptionResponse | TranscriptionResponseVerbose
 )
-
-
-class TranslationResponseStreamChoice(OpenAIBaseModel):
-    delta: DeltaMessage
-    finish_reason: str | None = None
-    stop_reason: int | str | None = None
-
-
-class TranslationStreamResponse(OpenAIBaseModel):
-    id: str = Field(default_factory=lambda: f"trsl-{random_uuid()}")
-    object: Literal["translation.chunk"] = "translation.chunk"
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    choices: list[TranslationResponseStreamChoice]
-    usage: UsageInfo | None = Field(default=None)
-
-
-class TranslationRequest(OpenAIBaseModel):
-    # Ordered by official OpenAI API documentation
-    # https://platform.openai.com/docs/api-reference/audio/createTranslation
-
-    file: UploadFile
-    """
-    The audio file object (not file name) to translate, in one of these
-    formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-    """
-
-    model: str | None = None
-    """ID of the model to use.
-    """
-
-    prompt: str = Field(default="")
-    """An optional text to guide the model's style or continue a previous audio
-    segment.
-
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
-    should match the audio language.
-    """
-
-    response_format: AudioResponseFormat = Field(default="json")
-    """
-    The format of the output, in one of these options: `json`, `text`, `srt`,
-    `verbose_json`, or `vtt`.
-    """
-
-    # TODO support additional sampling parameters
-    # --8<-- [start:translation-sampling-params]
-    use_beam_search: bool = False
-    """Whether or not beam search should be used."""
-
-    n: int = 1
-    """The number of beams to be used in beam search."""
-
-    length_penalty: float = 1.0
-    """Length penalty to be used for beam search."""
-
-    include_stop_str_in_output: bool = False
-    """Whether to include the stop strings in output text."""
-
-    seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
-    """The seed to use for sampling."""
-
-    temperature: float = Field(default=0.0)
-    """The sampling temperature, between 0 and 1.
-
-    Higher values like 0.8 will make the output more random, while lower values
-    like 0.2 will make it more focused / deterministic. If set to 0, the model
-    will use [log probability](https://en.wikipedia.org/wiki/Log_probability)
-    to automatically increase the temperature until certain thresholds are hit.
-    """
-    # --8<-- [end:translation-sampling-params]
-
-    # --8<-- [start:translation-extra-params]
-    language: str | None = None
-    """The language of the input audio we translate from.
-
-    Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format
-    will improve accuracy.
-    """
-
-    to_language: str | None = None
-    """The language of the input audio we translate to.
-
-    Please note that this is not supported by all models, refer to the specific
-    model documentation for more details.
-    For instance, Whisper only supports `to_language=en`.
-    """
-
-    stream: bool | None = False
-    """Custom field not present in the original OpenAI definition. When set,
-    it will enable output to be streamed in a similar fashion as the Chat
-    Completion endpoint.
-    """
-    # Flattened stream option to simplify form data.
-    stream_include_usage: bool | None = False
-    stream_continuous_usage_stats: bool | None = False
-
-    max_completion_tokens: int | None = None
-    """The maximum number of tokens to generate."""
-    # --8<-- [end:translation-extra-params]
-
-    # Default sampling parameters for translation requests.
-    _DEFAULT_SAMPLING_PARAMS: dict = {
-        "temperature": 0,
-    }
-
-    def to_beam_search_params(
-        self,
-        default_max_tokens: int,
-        default_sampling_params: dict | None = None,
-    ) -> BeamSearchParams:
-        if default_sampling_params is None:
-            default_sampling_params = {}
-
-        max_tokens = default_max_tokens
-        n = self.n if self.n is not None else 1
-
-        # NOTE: Temp 0 is a different fallback than completions
-        if (temperature := self.temperature) is None:
-            temperature = default_sampling_params.get("temperature", 0)
-
-        return BeamSearchParams(
-            beam_width=n,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            length_penalty=self.length_penalty,
-            include_stop_str_in_output=self.include_stop_str_in_output,
-        )
-
-    def to_sampling_params(
-        self, default_max_tokens: int, default_sampling_params: dict | None = None
-    ) -> SamplingParams:
-        max_tokens = default_max_tokens
-
-        if default_sampling_params is None:
-            default_sampling_params = {}
-        # Default parameters
-        if (temperature := self.temperature) is None:
-            temperature = default_sampling_params.get(
-                "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
-            )
-
-        return SamplingParams.from_optional(
-            temperature=temperature,
-            max_tokens=max_tokens,
-            seed=self.seed,
-            output_kind=RequestOutputKind.DELTA
-            if self.stream
-            else RequestOutputKind.FINAL_ONLY,
-            skip_clone=True,  # Created fresh per request, safe to skip clone
-        )
-
-    @model_validator(mode="before")
-    @classmethod
-    def validate_stream_options(cls, data):
-        stream_opts = ["stream_include_usage", "stream_continuous_usage_stats"]
-        stream = data.get("stream", False)
-        if any(bool(data.get(so, False)) for so in stream_opts) and not stream:
-            # Find which specific stream option was set
-            invalid_param = next(
-                (so for so in stream_opts if data.get(so, False)),
-                "stream_include_usage",
-            )
-            raise VLLMValidationError(
-                "Stream options can only be defined when `stream=True`.",
-                parameter=invalid_param,
-            )
-
-        return data
-
-
-# Translation response objects
-class TranslationResponse(OpenAIBaseModel):
-    text: str
-    """The translated text."""
-
-
-class TranslationWord(OpenAIBaseModel):
-    end: float
-    """End time of the word in seconds."""
-
-    start: float
-    """Start time of the word in seconds."""
-
-    word: str
-    """The text content of the word."""
-
-
-class TranslationSegment(OpenAIBaseModel):
-    id: int
-    """Unique identifier of the segment."""
-
-    avg_logprob: float
-    """Average logprob of the segment.
-
-    If the value is lower than -1, consider the logprobs failed.
-    """
-
-    compression_ratio: float
-    """Compression ratio of the segment.
-
-    If the value is greater than 2.4, consider the compression failed.
-    """
-
-    end: float
-    """End time of the segment in seconds."""
-
-    no_speech_prob: float | None = None
-    """Probability of no speech in the segment.
-
-    If the value is higher than 1.0 and the `avg_logprob` is below -1, consider
-    this segment silent.
-    """
-
-    seek: int
-    """Seek offset of the segment."""
-
-    start: float
-    """Start time of the segment in seconds."""
-
-    temperature: float
-    """Temperature parameter used for generating the segment."""
-
-    text: str
-    """Text content of the segment."""
-
-    tokens: list[int]
-    """Array of token IDs for the text content."""
-
-
-class TranslationResponseVerbose(OpenAIBaseModel):
-    duration: str
-    """The duration of the input audio."""
-
-    language: str
-    """The language of the input audio."""
-
-    text: str
-    """The translated text."""
-
-    segments: list[TranslationSegment] | None = None
-    """Segments of the translated text and their corresponding details."""
-
-    words: list[TranslationWord] | None = None
-    """Extracted words and their corresponding timestamps."""
-
-
-TranslationResponseVariant: TypeAlias = TranslationResponse | TranslationResponseVerbose
diff --git a/vllm/entrypoints/openai/speech_to_text/serving.py b/vllm/entrypoints/speech_to_text/transcription/serving.py
similarity index 53%
rename from vllm/entrypoints/openai/speech_to_text/serving.py
rename to vllm/entrypoints/speech_to_text/transcription/serving.py
index 28e798a986f7..123c4c234ecb 100644
--- a/vllm/entrypoints/openai/speech_to_text/serving.py
+++ b/vllm/entrypoints/speech_to_text/transcription/serving.py
@@ -11,21 +11,17 @@
     RequestResponseMetadata,
 )
 from vllm.entrypoints.openai.models.serving import OpenAIServingModels
-from vllm.entrypoints.openai.speech_to_text.protocol import (
+from vllm.logger import init_logger
+from vllm.outputs import RequestOutput
+
+from ..base.serving import OpenAISpeechToText
+from .protocol import (
     TranscriptionRequest,
     TranscriptionResponse,
     TranscriptionResponseStreamChoice,
     TranscriptionResponseVerbose,
     TranscriptionStreamResponse,
-    TranslationRequest,
-    TranslationResponse,
-    TranslationResponseStreamChoice,
-    TranslationResponseVerbose,
-    TranslationStreamResponse,
 )
-from vllm.entrypoints.openai.speech_to_text.speech_to_text import OpenAISpeechToText
-from vllm.logger import init_logger
-from vllm.outputs import RequestOutput
 
 logger = init_logger(__name__)
 
@@ -86,6 +82,7 @@ async def transcription_stream_generator(
         request_id: str,
         request_metadata: RequestResponseMetadata,
         audio_duration_s: float,
+        separator: str,
     ) -> AsyncGenerator[str, None]:
         generator = self._speech_to_text_stream_generator(
             request=request,
@@ -96,77 +93,7 @@ async def transcription_stream_generator(
             chunk_object_type="transcription.chunk",
             response_stream_choice_class=TranscriptionResponseStreamChoice,
             stream_response_class=TranscriptionStreamResponse,
-        )
-        async for chunk in generator:
-            yield chunk
-
-
-class OpenAIServingTranslation(OpenAISpeechToText):
-    """Handles translation requests."""
-
-    def __init__(
-        self,
-        engine_client: EngineClient,
-        models: OpenAIServingModels,
-        *,
-        request_logger: RequestLogger | None,
-        return_tokens_as_token_ids: bool = False,
-        enable_force_include_usage: bool = False,
-    ):
-        super().__init__(
-            engine_client=engine_client,
-            models=models,
-            request_logger=request_logger,
-            return_tokens_as_token_ids=return_tokens_as_token_ids,
-            task_type="translate",
-            enable_force_include_usage=enable_force_include_usage,
-        )
-
-    async def create_translation(
-        self,
-        audio_data: bytes,
-        request: TranslationRequest,
-        raw_request: Request | None = None,
-    ) -> (
-        TranslationResponse
-        | TranslationResponseVerbose
-        | AsyncGenerator[str, None]
-        | ErrorResponse
-    ):
-        """Translation API similar to OpenAI's API.
-
-        See https://platform.openai.com/docs/api-reference/audio/createTranslation
-        for the API specification. This API mimics the OpenAI translation API.
-        """
-        return await self._create_speech_to_text(
-            audio_data=audio_data,
-            request=request,
-            raw_request=raw_request,
-            response_class=(
-                TranslationResponseVerbose
-                if request.response_format == "verbose_json"
-                else TranslationResponse
-            ),
-            stream_generator_method=self.translation_stream_generator,
-        )
-
-    async def translation_stream_generator(
-        self,
-        request: TranslationRequest,
-        result_generator: list[AsyncGenerator[RequestOutput, None]],
-        request_id: str,
-        request_metadata: RequestResponseMetadata,
-        audio_duration_s: float,
-    ) -> AsyncGenerator[str, None]:
-        generator = self._speech_to_text_stream_generator(
-            request=request,
-            list_result_generator=result_generator,
-            request_id=request_id,
-            request_metadata=request_metadata,
-            audio_duration_s=audio_duration_s,
-            chunk_object_type="translation.chunk",
-            response_stream_choice_class=TranslationResponseStreamChoice,
-            stream_response_class=TranslationStreamResponse,
+            separator=separator,
         )
         async for chunk in generator:
             yield chunk
diff --git a/vllm/entrypoints/speech_to_text/translation/__init__.py b/vllm/entrypoints/speech_to_text/translation/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/entrypoints/speech_to_text/translation/api_router.py b/vllm/entrypoints/speech_to_text/translation/api_router.py
new file mode 100644
index 000000000000..a68b098834bf
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/translation/api_router.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from http import HTTPStatus
+from typing import Annotated
+
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from vllm.entrypoints.openai.engine.protocol import ErrorResponse
+from vllm.entrypoints.utils import (
+    load_aware_call,
+    with_cancellation,
+)
+from vllm.logger import init_logger
+
+from .protocol import TranslationRequest, TranslationResponseVariant
+from .serving import OpenAIServingTranslation
+
+logger = init_logger(__name__)
+
+router = APIRouter()
+
+
+def translation(request: Request) -> OpenAIServingTranslation:
+    return request.app.state.openai_serving_translation
+
+
+@router.post(
+    "/v1/audio/translations",
+    responses={
+        HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
+        HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
+        HTTPStatus.UNPROCESSABLE_ENTITY.value: {"model": ErrorResponse},
+        HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
+    },
+)
+@with_cancellation
+@load_aware_call
+async def create_translations(
+    request: Annotated[TranslationRequest, Form()], raw_request: Request
+):
+    handler = translation(raw_request)
+    if handler is None:
+        raise NotImplementedError("The model does not support Translations API")
+
+    audio_data = await request.file.read()
+
+    generator = await handler.create_translation(audio_data, request, raw_request)
+
+    if isinstance(generator, ErrorResponse):
+        return JSONResponse(
+            content=generator.model_dump(), status_code=generator.error.code
+        )
+
+    elif isinstance(generator, TranslationResponseVariant):
+        return JSONResponse(content=generator.model_dump())
+
+    return StreamingResponse(content=generator, media_type="text/event-stream")
diff --git a/vllm/entrypoints/speech_to_text/translation/protocol.py b/vllm/entrypoints/speech_to_text/translation/protocol.py
new file mode 100644
index 000000000000..6e457682c2f6
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/translation/protocol.py
@@ -0,0 +1,308 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import time
+from typing import TYPE_CHECKING, Literal, TypeAlias
+
+from fastapi import UploadFile
+from pydantic import (
+    Field,
+    model_validator,
+)
+
+from vllm.config.speech_to_text import SpeechToTextParams
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaMessage,
+    OpenAIBaseModel,
+    UsageInfo,
+)
+from vllm.exceptions import VLLMValidationError
+from vllm.logger import init_logger
+from vllm.sampling_params import (
+    BeamSearchParams,
+    RequestOutputKind,
+    SamplingParams,
+)
+from vllm.utils import random_uuid
+
+from ..base.protocol import _LONG_INFO, AudioResponseFormat
+
+if TYPE_CHECKING:
+    import numpy as np
+
+    from vllm.config import ModelConfig, SpeechToTextConfig
+
+logger = init_logger(__name__)
+
+
+class TranslationResponseStreamChoice(OpenAIBaseModel):
+    delta: DeltaMessage
+    finish_reason: str | None = None
+    stop_reason: int | str | None = None
+
+
+class TranslationStreamResponse(OpenAIBaseModel):
+    id: str = Field(default_factory=lambda: f"trsl-{random_uuid()}")
+    object: Literal["translation.chunk"] = "translation.chunk"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: list[TranslationResponseStreamChoice]
+    usage: UsageInfo | None = Field(default=None)
+
+
+class TranslationRequest(OpenAIBaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/audio/createTranslation
+
+    file: UploadFile
+    """
+    The audio file object (not file name) to translate, in one of these
+    formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+    """
+
+    model: str | None = None
+    """ID of the model to use.
+    """
+
+    prompt: str = Field(default="")
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language.
+    """
+
+    response_format: AudioResponseFormat = Field(default="json")
+    """
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`.
+    """
+
+    # TODO support additional sampling parameters
+    # --8<-- [start:translation-sampling-params]
+    use_beam_search: bool = False
+    """Whether or not beam search should be used."""
+
+    n: int = 1
+    """The number of beams to be used in beam search."""
+
+    length_penalty: float = 1.0
+    """Length penalty to be used for beam search."""
+
+    include_stop_str_in_output: bool = False
+    """Whether to include the stop strings in output text."""
+
+    seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
+    """The seed to use for sampling."""
+
+    temperature: float = Field(default=0.0)
+    """The sampling temperature, between 0 and 1.
+
+    Higher values like 0.8 will make the output more random, while lower values
+    like 0.2 will make it more focused / deterministic. If set to 0, the model
+    will use [log probability](https://en.wikipedia.org/wiki/Log_probability)
+    to automatically increase the temperature until certain thresholds are hit.
+    """
+    # --8<-- [end:translation-sampling-params]
+
+    # --8<-- [start:translation-extra-params]
+    language: str | None = None
+    """The language of the input audio we translate from.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format
+    will improve accuracy.
+    """
+
+    hotwords: str | None = None
+    """
+    hotwords refers to a list of important words or phrases that the model
+    should pay extra attention to during transcription.
+    """
+
+    to_language: str | None = None
+    """The language of the input audio we translate to.
+
+    Please note that this is not supported by all models, refer to the specific
+    model documentation for more details.
+    For instance, Whisper only supports `to_language=en`.
+    """
+
+    stream: bool | None = False
+    """Custom field not present in the original OpenAI definition. When set,
+    it will enable output to be streamed in a similar fashion as the Chat
+    Completion endpoint.
+    """
+    # Flattened stream option to simplify form data.
+    stream_include_usage: bool | None = False
+    stream_continuous_usage_stats: bool | None = False
+
+    max_completion_tokens: int | None = None
+    """The maximum number of tokens to generate."""
+    # --8<-- [end:translation-extra-params]
+
+    # Default sampling parameters for translation requests.
+    _DEFAULT_SAMPLING_PARAMS: dict = {
+        "temperature": 0,
+    }
+
+    def build_stt_params(
+        self,
+        audio: "np.ndarray",
+        stt_config: "SpeechToTextConfig",
+        model_config: "ModelConfig",
+        task_type: str,
+    ) -> SpeechToTextParams:
+        return SpeechToTextParams(
+            audio=audio,
+            stt_config=stt_config,
+            model_config=model_config,
+            language=self.language,
+            task_type=task_type,
+            request_prompt=self.prompt,
+            to_language=self.to_language,
+            hotwords=self.hotwords,
+        )
+
+    def to_beam_search_params(
+        self,
+        default_max_tokens: int,
+        default_sampling_params: dict | None = None,
+    ) -> BeamSearchParams:
+        if default_sampling_params is None:
+            default_sampling_params = {}
+
+        max_tokens = default_max_tokens
+        n = self.n if self.n is not None else 1
+
+        # NOTE: Temp 0 is a different fallback than completions
+        if (temperature := self.temperature) is None:
+            temperature = default_sampling_params.get("temperature", 0)
+
+        return BeamSearchParams(
+            beam_width=n,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            length_penalty=self.length_penalty,
+            include_stop_str_in_output=self.include_stop_str_in_output,
+        )
+
+    def to_sampling_params(
+        self, default_max_tokens: int, default_sampling_params: dict | None = None
+    ) -> SamplingParams:
+        max_tokens = default_max_tokens
+
+        if default_sampling_params is None:
+            default_sampling_params = {}
+        # Default parameters
+        if (temperature := self.temperature) is None:
+            temperature = default_sampling_params.get(
+                "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
+            )
+
+        return SamplingParams.from_optional(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            seed=self.seed,
+            output_kind=RequestOutputKind.DELTA
+            if self.stream
+            else RequestOutputKind.FINAL_ONLY,
+            skip_clone=True,  # Created fresh per request, safe to skip clone
+        )
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_stream_options(cls, data):
+        stream_opts = ["stream_include_usage", "stream_continuous_usage_stats"]
+        stream = data.get("stream", False)
+        if any(bool(data.get(so, False)) for so in stream_opts) and not stream:
+            # Find which specific stream option was set
+            invalid_param = next(
+                (so for so in stream_opts if data.get(so, False)),
+                "stream_include_usage",
+            )
+            raise VLLMValidationError(
+                "Stream options can only be defined when `stream=True`.",
+                parameter=invalid_param,
+            )
+
+        return data
+
+
+# Translation response objects
+class TranslationResponse(OpenAIBaseModel):
+    text: str
+    """The translated text."""
+
+
+class TranslationWord(OpenAIBaseModel):
+    end: float
+    """End time of the word in seconds."""
+
+    start: float
+    """Start time of the word in seconds."""
+
+    word: str
+    """The text content of the word."""
+
+
+class TranslationSegment(OpenAIBaseModel):
+    id: int
+    """Unique identifier of the segment."""
+
+    avg_logprob: float
+    """Average logprob of the segment.
+
+    If the value is lower than -1, consider the logprobs failed.
+    """
+
+    compression_ratio: float
+    """Compression ratio of the segment.
+
+    If the value is greater than 2.4, consider the compression failed.
+    """
+
+    end: float
+    """End time of the segment in seconds."""
+
+    no_speech_prob: float | None = None
+    """Probability of no speech in the segment.
+
+    If the value is higher than 1.0 and the `avg_logprob` is below -1, consider
+    this segment silent.
+    """
+
+    seek: int
+    """Seek offset of the segment."""
+
+    start: float
+    """Start time of the segment in seconds."""
+
+    temperature: float
+    """Temperature parameter used for generating the segment."""
+
+    text: str
+    """Text content of the segment."""
+
+    tokens: list[int]
+    """Array of token IDs for the text content."""
+
+
+class TranslationResponseVerbose(OpenAIBaseModel):
+    duration: str
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the input audio."""
+
+    text: str
+    """The translated text."""
+
+    segments: list[TranslationSegment] | None = None
+    """Segments of the translated text and their corresponding details."""
+
+    words: list[TranslationWord] | None = None
+    """Extracted words and their corresponding timestamps."""
+
+
+TranslationResponseVariant: TypeAlias = TranslationResponse | TranslationResponseVerbose
diff --git a/vllm/entrypoints/speech_to_text/translation/serving.py b/vllm/entrypoints/speech_to_text/translation/serving.py
new file mode 100644
index 000000000000..257f8f74396e
--- /dev/null
+++ b/vllm/entrypoints/speech_to_text/translation/serving.py
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import AsyncGenerator
+
+from fastapi import Request
+
+from vllm.engine.protocol import EngineClient
+from vllm.entrypoints.logger import RequestLogger
+from vllm.entrypoints.openai.engine.protocol import (
+    ErrorResponse,
+    RequestResponseMetadata,
+)
+from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+from vllm.logger import init_logger
+from vllm.outputs import RequestOutput
+
+from ..base.serving import OpenAISpeechToText
+from .protocol import (
+    TranslationRequest,
+    TranslationResponse,
+    TranslationResponseStreamChoice,
+    TranslationResponseVerbose,
+    TranslationStreamResponse,
+)
+
+logger = init_logger(__name__)
+
+
+class OpenAIServingTranslation(OpenAISpeechToText):
+    """Handles translation requests."""
+
+    def __init__(
+        self,
+        engine_client: EngineClient,
+        models: OpenAIServingModels,
+        *,
+        request_logger: RequestLogger | None,
+        return_tokens_as_token_ids: bool = False,
+        enable_force_include_usage: bool = False,
+    ):
+        super().__init__(
+            engine_client=engine_client,
+            models=models,
+            request_logger=request_logger,
+            return_tokens_as_token_ids=return_tokens_as_token_ids,
+            task_type="translate",
+            enable_force_include_usage=enable_force_include_usage,
+        )
+
+    async def create_translation(
+        self,
+        audio_data: bytes,
+        request: TranslationRequest,
+        raw_request: Request | None = None,
+    ) -> (
+        TranslationResponse
+        | TranslationResponseVerbose
+        | AsyncGenerator[str, None]
+        | ErrorResponse
+    ):
+        """Translation API similar to OpenAI's API.
+
+        See https://platform.openai.com/docs/api-reference/audio/createTranslation
+        for the API specification. This API mimics the OpenAI translation API.
+        """
+        return await self._create_speech_to_text(
+            audio_data=audio_data,
+            request=request,
+            raw_request=raw_request,
+            response_class=(
+                TranslationResponseVerbose
+                if request.response_format == "verbose_json"
+                else TranslationResponse
+            ),
+            stream_generator_method=self.translation_stream_generator,
+        )
+
+    async def translation_stream_generator(
+        self,
+        request: TranslationRequest,
+        result_generator: list[AsyncGenerator[RequestOutput, None]],
+        request_id: str,
+        request_metadata: RequestResponseMetadata,
+        audio_duration_s: float,
+        separator: str,
+    ) -> AsyncGenerator[str, None]:
+        generator = self._speech_to_text_stream_generator(
+            request=request,
+            list_result_generator=result_generator,
+            request_id=request_id,
+            request_metadata=request_metadata,
+            audio_duration_s=audio_duration_s,
+            chunk_object_type="translation.chunk",
+            response_stream_choice_class=TranslationResponseStreamChoice,
+            stream_response_class=TranslationStreamResponse,
+            separator=separator,
+        )
+        async for chunk in generator:
+            yield chunk
diff --git a/vllm/entrypoints/utils.py b/vllm/entrypoints/utils.py
index e3682280ec50..8ec41098ad20 100644
--- a/vllm/entrypoints/utils.py
+++ b/vllm/entrypoints/utils.py
@@ -9,6 +9,7 @@
 from http import HTTPStatus
 from logging import Logger
 from string import Template
+from typing import Any
 
 import regex as re
 from fastapi import Request
@@ -177,7 +178,14 @@ def get_max_tokens(
     input_length: int,
     default_sampling_params: dict,
     override_max_tokens: int | None = None,
+    truncate_prompt_tokens: int | None = None,
 ) -> int:
+    if truncate_prompt_tokens is not None:
+        limit = truncate_prompt_tokens
+        input_length = min(
+            input_length,
+            max_model_len if limit == -1 else limit,
+        )
     if max_model_len < input_length:
         raise ValueError(
             f"Input length ({input_length}) exceeds model's maximum "
@@ -203,7 +211,7 @@ def get_max_tokens(
     )
 
 
-def log_non_default_args(args: Namespace | EngineArgs):
+def get_non_default_args(args: Namespace | EngineArgs) -> dict[str, Any]:
     from vllm.entrypoints.openai.cli_args import make_arg_parser
 
     non_default_args = {}
@@ -230,6 +238,43 @@ def log_non_default_args(args: Namespace | EngineArgs):
             "Unsupported argument type. Must be Namespace or EngineArgs instance."
         )
 
+    return non_default_args
+
+
+def _jsonify_arg_value(value: Any) -> Any:
+    if value is None or isinstance(value, bool | int | float | str):
+        return value
+    if dataclasses.is_dataclass(value) and not isinstance(value, type):
+        return {
+            key: _jsonify_arg_value(val)
+            for key, val in dataclasses.asdict(value).items()
+        }
+    if isinstance(value, dict):
+        return {str(key): _jsonify_arg_value(val) for key, val in value.items()}
+    if isinstance(value, tuple | list):
+        return [_jsonify_arg_value(item) for item in value]
+    if (model_dump := getattr(value, "model_dump", None)) is not None:
+        return _jsonify_arg_value(model_dump(mode="json"))
+    if (to_dict := getattr(value, "dict", None)) is not None:
+        return _jsonify_arg_value(to_dict())
+    return repr(value)
+
+
+def jsonify_non_default_args(
+    args: Namespace | EngineArgs,
+    *,
+    exclude: set[str] | None = None,
+) -> dict[str, Any]:
+    non_default_args = get_non_default_args(args)
+    if exclude is not None:
+        for key in exclude:
+            non_default_args.pop(key, None)
+
+    return {key: _jsonify_arg_value(value) for key, value in non_default_args.items()}
+
+
+def log_non_default_args(args: Namespace | EngineArgs):
+    non_default_args = get_non_default_args(args)
     logger.info("non-default args: %s", non_default_args)
 
 
diff --git a/vllm/env_override.py b/vllm/env_override.py
index 5358568fc180..78270c2bee37 100644
--- a/vllm/env_override.py
+++ b/vllm/env_override.py
@@ -87,7 +87,7 @@ def _maybe_set_cuda_compatibility_path():
 import torch
 
 from vllm.logger import init_logger
-from vllm.utils.torch_utils import is_torch_equal
+from vllm.utils.torch_utils import is_torch_equal, is_torch_equal_or_newer
 
 logger = init_logger(__name__)
 
@@ -100,10 +100,9 @@ def _maybe_set_cuda_compatibility_path():
 # it avoids unintentional cuda initialization from torch.cuda.is_available()
 os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1"
 
-# see https://github.com/vllm-project/vllm/issues/10480
+# see https://github.com/vllm-project/vllm/issues/10480 and
+# https://github.com/vllm-project/vllm/issues/10619.
 os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
-# see https://github.com/vllm-project/vllm/issues/10619
-torch._inductor.config.compile_threads = 1
 
 # Enable Triton autotuning result caching to disk by default.
 # Without this, Triton re-runs autotuning on every process restart,
@@ -113,6 +112,13 @@ def _maybe_set_cuda_compatibility_path():
 # in the environment.
 os.environ.setdefault("TRITON_CACHE_AUTOTUNING", "1")
 
+# When unset, TileLang routes JIT temp dirs through a world-shared
+# /tmp/tvm-debug-mode-tempdirs/ whose ownership is pinned to whichever
+# user compiled first, breaking every other user on a shared host.
+# Opt into per-process tempdirs unless the user explicitly chose the
+# debug layout (see https://github.com/vllm-project/vllm/issues/41410).
+os.environ.setdefault("TILELANG_CLEANUP_TEMP_FILES", "1")
+
 # ===================================================
 # torch 2.9 Inductor PythonWrapperCodegen monkeypatch
 # ===================================================
@@ -490,3 +496,265 @@ def _patch_get_raw_stream_if_needed():
 
     PythonWrapperCodegen.memory_plan_reuse = memory_plan_reuse_patched
     GraphLowering._update_scheduler = _update_scheduler_patched
+
+# ===================================================
+# torch <2.12 GraphCaptureOutput.get_runtime_env monkeypatch
+# ===================================================
+# PyTorch's AOT compile path omits builtins from used_globals, causing
+# 'Missing required external references' errors for refs like 'type'.
+# (which happens in transformers code)
+# This mirrors the fix in https://github.com/pytorch/pytorch/pull/177558
+# and can be removed once torch >=2.12 is the minimum supported version.
+
+# ===================================================
+# torch >= 2.11 Inductor constrain_to_fx_strides monkeypatch
+# ===================================================
+# Inductor's constrain_to_fx_strides calls .stride() on every FX arg's meta
+# value, which crashes on FakeScriptObject (the compile-time proxy for hoisted
+# opaque types). The patched version skips args whose meta value is not a
+# torch.Tensor.
+# Upstream issue: https://github.com/pytorch/pytorch/issues/175973
+
+
+_constrain_to_fx_strides_patched = False
+
+
+def _apply_constrain_to_fx_strides_patch():
+    """Patch lowering.constrain_to_fx_strides globally. Safe to call
+    multiple times; only the first call does anything.
+    Only applies for torch >= 2.11 and < 2.12."""
+    global _constrain_to_fx_strides_patched
+    if _constrain_to_fx_strides_patched:
+        return
+    _constrain_to_fx_strides_patched = True
+
+    if not is_torch_equal_or_newer("2.11.0.dev") or is_torch_equal_or_newer(
+        "2.12.0.dev"
+    ):
+        return
+
+    import torch._inductor.ir as _ir
+    import torch._inductor.lowering as _lowering
+    from torch._inductor.virtualized import V as _V
+
+    def _patched(fx_node, *args, **kwargs):
+        def apply_constraint(arg, fx_arg):
+            if isinstance(arg, _ir.IRNode):
+                meta_val = fx_arg.meta.get("val")
+                if isinstance(meta_val, torch.Tensor):
+                    stride_order = _ir.get_stride_order(
+                        meta_val.stride(), _V.graph.sizevars.shape_env
+                    )
+                    return _ir.ExternKernel.require_stride_order(arg, stride_order)
+                return arg
+            if isinstance(arg, dict):
+                return {key: apply_constraint(arg[key], fx_arg[key]) for key in arg}
+            return arg
+
+        args = tuple(
+            apply_constraint(arg, fx_arg) for arg, fx_arg in zip(args, fx_node.args)
+        )
+        kwargs = {k: apply_constraint(v, fx_node.kwargs[k]) for k, v in kwargs.items()}
+        return args, kwargs
+
+    _lowering.constrain_to_fx_strides = _patched
+
+
+if is_torch_equal_or_newer("2.10.0") and not is_torch_equal_or_newer("2.12.0.dev"):
+    import builtins as _builtins
+    import pickle
+
+    from torch._dynamo.convert_frame import GraphCaptureOutput
+
+    _original_get_runtime_env = GraphCaptureOutput.get_runtime_env
+
+    def _safe_builtins_dict(builtins_dict: dict) -> dict:
+        """Filter a builtins dict to only picklable entries for serialization."""
+        result = {}
+        for k, v in builtins_dict.items():
+            try:
+                pickle.dumps(v)
+                result[k] = v
+            except Exception:
+                pass
+        return result
+
+    def _patched_get_runtime_env(self):  # type: ignore[no-untyped-def]
+        runtime_env = _original_get_runtime_env(self)
+        for ref in runtime_env.external_refs:
+            if ref not in runtime_env.used_globals:
+                if ref.startswith("__builtins_dict__") and ref in self.f_globals:
+                    runtime_env.used_globals[ref] = _safe_builtins_dict(
+                        self.f_globals[ref]
+                    )
+                elif hasattr(_builtins, ref):
+                    runtime_env.used_globals[ref] = getattr(_builtins, ref)
+        return runtime_env
+
+    GraphCaptureOutput.get_runtime_env = _patched_get_runtime_env
+
+# ===================================================
+# torch 2.10 FxGraphCachePickler.dumps ValueError fix
+# ===================================================
+# PyTorch 2.10's FxGraphCachePickler.dumps() doesn't catch ValueError,
+# causing torch.compile cache failures when tensors with non-standard
+# layouts (e.g. blocked-layout prepacked weights) are serialized.
+# PyTorch mainline fixed this in pytorch/pytorch#176557 (merged 2026-03-04).
+# This is a thin backport for 2.10 users; remove once 2.10 is dropped.
+
+
+def _apply_fxgraphcache_pickle_patch(pickler_cls, bypass_cls):
+    """Wrap pickler_cls.dumps to convert ValueError into bypass_cls.
+
+    Idempotent: sets `_vllm_fxgraph_dumps_patched` on the class after the
+    first apply to prevent re-application. The wrapper function is also
+    marked with `_vllm_patched` as an additional safeguard.
+    """
+    if getattr(pickler_cls, "_vllm_fxgraph_dumps_patched", False):
+        return
+
+    original_dumps = pickler_cls.dumps
+    if hasattr(original_dumps, "_vllm_patched"):
+        return
+
+    def patched_dumps(self, obj):
+        try:
+            return original_dumps(self, obj)
+        except ValueError as e:
+            raise bypass_cls("Failed to pickle cache key") from e
+
+    patched_dumps._vllm_patched = True  # type: ignore[attr-defined]
+    pickler_cls.dumps = patched_dumps
+    pickler_cls._vllm_fxgraph_dumps_patched = True  # type: ignore[attr-defined]
+
+
+def _patch_fxgraphcache_pickle_if_needed():
+    """Apply FxGraphCachePickler.dumps ValueError backport when on torch 2.10.x."""
+    from vllm.utils.torch_utils import is_torch_equal_or_newer
+
+    if not is_torch_equal_or_newer("2.10.0") or is_torch_equal_or_newer("2.11.0"):
+        return
+
+    from torch._inductor.codecache import BypassFxGraphCache, FxGraphCachePickler
+
+    _apply_fxgraphcache_pickle_patch(FxGraphCachePickler, BypassFxGraphCache)
+
+
+_patch_fxgraphcache_pickle_if_needed()
+
+# ===================================================
+# torch 2.11 Inductor cpp codegen indirect_assert scalar-mask fix
+# ===================================================
+# CppVecKernel.indirect_assert wraps a scalar mask with
+# `VecMask<...>(scalar)`, which is not a valid constructor and triggers a
+# C++ compile error during torch.compile of any model that does indirect
+# indexing inside a tail-vectorized loop (e.g. Qwen3-VL).
+# Failure looks like:
+#   no matching function for call to 'VecMask<int64_t,2>::VecMask(int&)'
+# Upstream fix in PyTorch mainline replaces the call with
+# `VecMask<...>::from(scalar)`, see pytorch/pytorch#178148 (lands in 2.12).
+# This is a thin backport for torch >= 2.11 and < 2.12; remove once the
+# minimum supported torch is 2.12.
+
+
+def _apply_cpp_indirect_assert_patch():
+    """Replace CppVecKernel.indirect_assert with a fixed copy that uses
+    `VecMask<...>::from(scalar)` for scalar masks.
+
+    Idempotent: marks the class with `_vllm_indirect_assert_patched` after
+    the first apply.
+    """
+    from torch._inductor.codegen.cpp import CppVecKernel
+
+    if getattr(CppVecKernel, "_vllm_indirect_assert_patched", False):
+        return
+
+    from torch._inductor.codegen.cpp import CppCSEVariable, cexpr_index
+
+    def patched_indirect_assert(self, var, lower, upper, mask=None):
+        assert isinstance(var, CppCSEVariable)
+        assert var.dtype is not None
+        if not var.is_vec:
+            if isinstance(mask, CppCSEVariable) and mask.is_vec:
+                mask = f"({mask}).all_masked()"
+            return super(CppVecKernel, self).indirect_assert(var, lower, upper, mask)
+        lower_scalar = lower
+        upper_scalar = upper
+        if lower:
+            lower = f"{self._get_vec_type(var.dtype)}({lower})"
+        if upper:
+            upper = f"{self._get_vec_type(var.dtype)}({upper})"
+        if lower and upper:
+            cond = f"({lower} <= {var}) & ({var} < {upper})"
+            cond_print = f"{lower_scalar} <= {var} < {upper_scalar}"
+        elif lower:
+            cond = f"{lower} <= {var}"
+            cond_print = f"{lower_scalar} <= {var}"
+        else:
+            assert upper
+            cond = f"{var} < {upper}"
+            cond_print = f"{var} < {upper_scalar}"
+        cond = f"{self._get_mask_type(var.dtype)}({cond})"
+        if mask:
+            if not mask.is_vec:
+                # Backport of pytorch/pytorch#178148 -- use ::from for
+                # scalar masks so g++ picks the correct overload.
+                mask = f"{self._get_mask_type(var.dtype)}::from({mask})"
+            cond = f"({cond}) | ~({mask})"
+        if self.tail_size:
+            cond = (
+                f"{self._get_mask_type(var.dtype)}::set("
+                f"{self._get_mask_type(var.dtype)}::from(1)"
+                f", ({cond}), {cexpr_index(self.tail_size)})"
+            )
+        cond = f"({cond}).all_masked()"
+        return f'{self.assert_function}({cond}, "index out of bounds: {cond_print}")'
+
+    CppVecKernel.indirect_assert = patched_indirect_assert
+    CppVecKernel._vllm_indirect_assert_patched = True  # type: ignore[attr-defined]
+
+
+def _patch_cpp_indirect_assert_if_needed():
+    """Apply cpp codegen indirect_assert backport when on torch 2.11.x.
+
+    Defers application until torch._inductor.codegen.cpp is naturally
+    imported by Inductor. Importing it eagerly during vllm.__init__ pulls
+    in torch._inductor.scheduler, whose top-level
+    `import torch._inductor.async_compile` can fail with
+    `ModuleNotFoundError: import of torch._inductor.async_compile halted;
+    None in sys.modules` depending on the import order on the runner
+    (observed in vLLM CPU CI).
+    """
+    if not is_torch_equal_or_newer("2.11.0") or is_torch_equal_or_newer("2.12.0.dev"):
+        return
+
+    import sys
+
+    target_name = "torch._inductor.codegen.cpp"
+    if target_name in sys.modules:
+        _apply_cpp_indirect_assert_patch()
+        return
+
+    import importlib.abc
+
+    class _CppCodegenPatchFinder(importlib.abc.MetaPathFinder):
+        def find_spec(self, fullname, path, target=None):
+            if fullname != target_name:
+                return None
+            sys.meta_path.remove(self)
+            spec = importlib.util.find_spec(fullname)
+            if spec is None or spec.loader is None:
+                return None
+            original_exec = spec.loader.exec_module
+
+            def _exec_then_patch(module):
+                original_exec(module)
+                _apply_cpp_indirect_assert_patch()
+
+            spec.loader.exec_module = _exec_then_patch  # type: ignore[method-assign]
+            return spec
+
+    sys.meta_path.insert(0, _CppCodegenPatchFinder())
+
+
+_patch_cpp_indirect_assert_if_needed()
diff --git a/vllm/envs.py b/vllm/envs.py
index 2944bb111d24..564982d473ec 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -8,6 +8,7 @@
 import sys
 import tempfile
 import uuid
+import warnings
 from collections.abc import Callable
 from typing import TYPE_CHECKING, Any, Literal
 
@@ -16,6 +17,7 @@
     VLLM_PORT: int | None = None
     VLLM_RPC_BASE_PATH: str = tempfile.gettempdir()
     VLLM_USE_MODELSCOPE: bool = False
+    VLLM_USE_FASTOKENS: bool = False
     VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60
     VLLM_NCCL_SO_PATH: str | None = None
     LD_LIBRARY_PATH: str | None = None
@@ -45,18 +47,22 @@
     NO_COLOR: bool = False
     VLLM_LOG_STATS_INTERVAL: float = 10.0
     VLLM_TRACE_FUNCTION: int = 0
-    VLLM_USE_FLASHINFER_SAMPLER: bool | None = None
+    VLLM_USE_FLASHINFER_SAMPLER: bool = True
     VLLM_PP_LAYER_PARTITION: str | None = None
     VLLM_CPU_KVCACHE_SPACE: int | None = 0
     VLLM_CPU_OMP_THREADS_BIND: str = "auto"
     VLLM_CPU_NUM_OF_RESERVED_CPU: int | None = None
     VLLM_CPU_SGL_KERNEL: bool = False
+    VLLM_CPU_ATTN_SPLIT_KV: bool = True
     VLLM_ZENTORCH_WEIGHT_PREPACK: bool = True
+    VLLM_CPU_INT4_W4A8: bool = True
     VLLM_XLA_CACHE_PATH: str = os.path.join(VLLM_CACHE_ROOT, "xla_cache")
     VLLM_XLA_CHECK_RECOMPILATION: bool = False
+    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: int = 512
     VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: Literal["auto", "nccl", "shm"] = "auto"
     VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False
     VLLM_USE_RAY_WRAPPED_PP_COMM: bool = True
+    VLLM_USE_RAY_V2_EXECUTOR_BACKEND: bool = False
     VLLM_XLA_USE_SPMD: bool = False
     VLLM_WORKER_MULTIPROC_METHOD: Literal["fork", "spawn"] = "fork"
     VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
@@ -64,6 +70,9 @@
     VLLM_IMAGE_FETCH_TIMEOUT: int = 5
     VLLM_VIDEO_FETCH_TIMEOUT: int = 30
     VLLM_AUDIO_FETCH_TIMEOUT: int = 10
+    VLLM_MEDIA_CACHE: str = ""
+    VLLM_MEDIA_CACHE_MAX_SIZE_MB: int = 5120
+    VLLM_MEDIA_CACHE_TTL_HOURS: float = 24
     VLLM_MEDIA_FETCH_MAX_RETRIES: int = 3
     VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True
     VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
@@ -72,12 +81,15 @@
     VLLM_MEDIA_CONNECTOR: str = "http"
     VLLM_MM_HASHER_ALGORITHM: str = "blake3"
     VLLM_TARGET_DEVICE: str = "cuda"
-    VLLM_MAIN_CUDA_VERSION: str = "12.9"
+    VLLM_MAIN_CUDA_VERSION: str = "13.0"
     VLLM_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
     VLLM_BATCH_INVARIANT: bool = False
+    VLLM_TRITON_ATTN_USE_TD: bool | None = None
+    TRTLLM_ENABLE_PDL: bool = False
     MAX_JOBS: str | None = None
     NVCC_THREADS: str | None = None
     VLLM_USE_PRECOMPILED: bool = False
+    VLLM_USE_PRECOMPILED_RUST: bool = False
     VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX: bool = False
     VLLM_DOCKER_BUILD_CONTEXT: bool = False
     VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
@@ -122,9 +134,12 @@
     VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
     VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
     VLLM_DISABLE_COMPILE_CACHE: bool = False
+    VLLM_USE_LAYERNAME: bool = True
     Q_SCALE_CONSTANT: int = 200
     K_SCALE_CONSTANT: int = 200
     V_SCALE_CONSTANT: int = 100
+    VLLM_USE_RUST_FRONTEND: bool = False
+    VLLM_RUST_FRONTEND_PATH: str | None = "auto"
     VLLM_SERVER_DEV_MODE: bool = False
     VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
     VLLM_MLA_DISABLE: bool = False
@@ -135,17 +150,20 @@
     VLLM_DP_RANK_LOCAL: int = -1
     VLLM_DP_SIZE: int = 1
     VLLM_USE_STANDALONE_COMPILE: bool = True
-    VLLM_ENABLE_PREGRAD_PASSES: bool = False
+    VLLM_ENABLE_PREGRAD_PASSES: bool = True
+    VLLM_USE_BREAKABLE_CUDAGRAPH: bool = False
     VLLM_DP_MASTER_IP: str = ""
     VLLM_DP_MASTER_PORT: int = 0
-    VLLM_MOE_DP_CHUNK_SIZE: int = 256
-    VLLM_ENABLE_MOE_DP_CHUNK: bool = True
     VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
     VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
     VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY: str = ""
     VLLM_RAY_EXTRA_ENV_VARS_TO_COPY: str = ""
     VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
     VLLM_MARLIN_INPUT_DTYPE: Literal["int8", "fp8"] | None = None
+    VLLM_HUMMING_ONLINE_QUANT_CONFIG: dict[str, Any] | None = None
+    VLLM_HUMMING_INPUT_QUANT_CONFIG: dict[str, Any] | None = None
+    VLLM_HUMMING_USE_F16_ACCUM: bool = False
+    VLLM_HUMMING_MOE_GEMM_TYPE: Literal["indexed", "grouped", "auto"] | None = None
     VLLM_MXFP4_USE_MARLIN: bool | None = None
     VLLM_DEEPEPLL_NVFP4_DISPATCH: bool = False
     VLLM_V1_USE_OUTLINES_CACHE: bool = False
@@ -170,6 +188,7 @@
     VLLM_FLASHINFER_MOE_BACKEND: Literal["throughput", "latency", "masked_gemm"] = (
         "latency"
     )
+    VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR: str | None = None
     VLLM_FLASHINFER_ALLREDUCE_BACKEND: Literal["auto", "trtllm", "mnnvl"] = "auto"
     VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE: int = 394 * 1024 * 1024
     VLLM_XGRAMMAR_CACHE_MB: int = 0
@@ -179,11 +198,16 @@
     VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost"
     VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600
     VLLM_MOONCAKE_BOOTSTRAP_PORT: int = 8998
+    VLLM_MOONCAKE_STORE_TIER_LOG: bool = False
+    VLLM_MOONCAKE_DISK_STAGING_USABLE_RATIO: float = 0.9
+    MOONCAKE_PREFERRED_SEGMENT: str | None = None
+    MOONCAKE_REQUESTER_LOCAL_HOSTNAME: str | None = None
     VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840
     VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1
     VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16
     VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300
     VLLM_KV_CACHE_LAYOUT: Literal["NHD", "HND"] | None = None
+    VLLM_SSM_CONV_STATE_LAYOUT: Literal["SD", "DS"] | None = None
     VLLM_COMPUTE_NANS_IN_LOGITS: bool = False
     VLLM_USE_NVFP4_CT_EMULATIONS: bool = False
     VLLM_ROCM_QUICK_REDUCE_QUANTIZATION: Literal[
@@ -191,7 +215,8 @@
     ] = "NONE"
     VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True
     VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: int | None = None
-    VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 480
+    VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB: int | None = None
+    VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB: int | None = None
     VLLM_MORIIO_CONNECTOR_READ_MODE: bool = False
     VLLM_MORIIO_QP_PER_TRANSFER: int = 1
     VLLM_MORIIO_POST_BATCH_SIZE: int = -1
@@ -215,6 +240,7 @@
     VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
     VLLM_SYSTEM_START_DATE: str | None = None
     VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False
+    VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = False
     VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
     VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
     VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
@@ -234,8 +260,9 @@
     VLLM_DEBUG_WORKSPACE: bool = False
     VLLM_DISABLE_SHARED_EXPERTS_STREAM: bool = False
     VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD: int = 256
+    VLLM_MULTI_STREAM_GEMM_TOKEN_THRESHOLD: int = 1024
     VLLM_COMPILE_CACHE_SAVE_FORMAT: Literal["binary", "unpacked"] = "binary"
-    VLLM_USE_V2_MODEL_RUNNER: bool = False
+    VLLM_USE_V2_MODEL_RUNNER: bool | None = None
     VLLM_LOG_MODEL_INSPECTION: bool = False
     VLLM_DEBUG_MFU_METRICS: bool = False
     VLLM_WEIGHT_OFFLOADING_DISABLE_PIN_MEMORY: bool = False
@@ -244,11 +271,17 @@
     VLLM_LORA_DISABLE_PDL: bool = False
     VLLM_ENABLE_CUDA_COMPATIBILITY: bool = False
     VLLM_CUDA_COMPATIBILITY_PATH: str | None = None
+    VLLM_SKIP_MODEL_NAME_VALIDATION: bool = False
+    """If set, vLLM will skip model name validation in API requests.
+    This allows any model name to be accepted in the 'model' field of requests,
+    making the server model-name agnostic. Useful for proxy/gateway scenarios."""
     VLLM_ELASTIC_EP_SCALE_UP_LAUNCH: bool = False
     VLLM_ELASTIC_EP_DRAIN_REQUESTS: bool = False
-    VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = False
+    VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = True
     VLLM_NIXL_EP_MAX_NUM_RANKS: int = 32
     VLLM_XPU_ENABLE_XPU_GRAPH: bool = False
+    VLLM_XPU_USE_SAMPLER_KERNEL: bool = True
+    VLLM_LORA_ENABLE_DUAL_STREAM: bool = False
 
 
 def get_default_cache_root():
@@ -277,6 +310,15 @@ def maybe_convert_bool(value: str | None) -> bool | None:
     return bool(int(value))
 
 
+def maybe_convert_json_str_or_file(value: str | None) -> dict[str, Any] | None:
+    if value is None:
+        return None
+    if os.path.exists(value):
+        with open(value) as f:
+            return json.load(f)
+    return json.loads(value)
+
+
 def disable_compile_cache() -> bool:
     return bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0")))
 
@@ -290,10 +332,7 @@ def use_aot_compile() -> bool:
         else "0"
     )
 
-    return (
-        not bool(int(os.getenv("VLLM_BATCH_INVARIANT", "0")))
-        and os.environ.get("VLLM_USE_AOT_COMPILE", default_value) == "1"
-    )
+    return os.environ.get("VLLM_USE_AOT_COMPILE", default_value) == "1"
 
 
 def use_mega_aot_artifact():
@@ -306,6 +345,27 @@ def use_mega_aot_artifact():
     return os.environ.get("VLLM_USE_MEGA_AOT_ARTIFACT", default_value) == "1"
 
 
+def deprecated_env(
+    env_name: str,
+    removal_version: str,
+    replacement: str,
+    getter: Callable[[], Any],
+) -> Callable[[], Any]:
+    """Wrap an env-var getter to emit a FutureWarning when the var is set."""
+
+    def _read() -> Any:
+        if env_name in os.environ:
+            warnings.warn(
+                f"{env_name} is deprecated and will be removed in "
+                f"{removal_version}. {replacement}",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return getter()
+
+    return _read
+
+
 def env_with_choices(
     env_name: str,
     default: str | None,
@@ -481,14 +541,49 @@ def _get_or_set_default() -> str:
 
 logger = logging.getLogger(__name__)
 
+
+def _resolve_rust_frontend_path() -> str | None:
+    """Resolve the Rust frontend binary path.
+
+    Returns None if VLLM_USE_RUST_FRONTEND is not enabled.
+    When enabled, resolves VLLM_RUST_FRONTEND_PATH ("auto" by default)
+    to the actual binary path.
+    """
+    use_rust = bool(int(os.environ.get("VLLM_USE_RUST_FRONTEND", "0")))
+    raw = os.environ.get("VLLM_RUST_FRONTEND_PATH", "auto")
+
+    if not use_rust:
+        if os.environ.get("VLLM_RUST_FRONTEND_PATH") is not None:
+            logger.warning(
+                "VLLM_RUST_FRONTEND_PATH is set but VLLM_USE_RUST_FRONTEND "
+                "is not enabled. The Rust frontend will not be used. "
+                "Set VLLM_USE_RUST_FRONTEND=1 to enable it."
+            )
+        return None
+
+    if raw.lower() in ("auto", "1", "true"):
+        pkg_dir = os.path.dirname(os.path.abspath(__file__))
+        candidate = os.path.join(pkg_dir, "vllm-rs")
+        if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
+            return candidate
+
+        raise FileNotFoundError(
+            "VLLM_RUST_FRONTEND_PATH=auto but the vllm-rs binary was "
+            f"not found at {candidate}. "
+            "Build with setuptools-rust or set the path explicitly."
+        )
+    return raw
+
+
 environment_variables: dict[str, Callable[[], Any]] = {
     # ================== Installation Time Env Vars ==================
     # Target device of vLLM, supporting [cuda (by default),
     # rocm, cpu]
     "VLLM_TARGET_DEVICE": lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda").lower(),
     # Main CUDA version of vLLM. This follows PyTorch but can be overridden.
-    "VLLM_MAIN_CUDA_VERSION": lambda: os.getenv("VLLM_MAIN_CUDA_VERSION", "").lower()
-    or "12.9",
+    "VLLM_MAIN_CUDA_VERSION": lambda: (
+        os.getenv("VLLM_MAIN_CUDA_VERSION", "").lower() or "13.0"
+    ),
     # Controls PyTorch float32 matmul precision mode within vLLM workers.
     # Valid options mirror torch.set_float32_matmul_precision
     "VLLM_FLOAT32_MATMUL_PRECISION": env_with_choices(
@@ -500,6 +595,19 @@ def _get_or_set_default() -> str:
     # Enable batch-invariant mode: deterministic results regardless of
     # batch composition. Requires NVIDIA GPU with compute capability >= 9.0.
     "VLLM_BATCH_INVARIANT": lambda: bool(int(os.getenv("VLLM_BATCH_INVARIANT", "0"))),
+    # Use tensor descriptors for Q/K/V loads and output stores in the
+    # Triton unified-attention kernel.  Enables HW 2D block reads on
+    # Intel Xe2/Xe3; the non-TD branch is dead-code-eliminated at Triton
+    # compile time so other platforms see no overhead.  Tri-state override:
+    # unset (default) lets the `triton_attn` backend auto-select per
+    # platform (currently auto-enabled on XPU only); ``1`` forces TD on;
+    # ``0`` forces TD off.  Useful for A/B benchmarking the TD path.
+    "VLLM_TRITON_ATTN_USE_TD": lambda: {"1": True, "0": False}.get(
+        os.getenv("VLLM_TRITON_ATTN_USE_TD", "").strip()
+    ),
+    # Enable Programmatic Dependent Launch for supported NVIDIA MoE router
+    # kernels. Requires CUDA >= 12.0 and compute capability >= 9.0.
+    "TRTLLM_ENABLE_PDL": lambda: bool(int(os.getenv("TRTLLM_ENABLE_PDL", "0"))),
     # Maximum number of compilation jobs to run in parallel.
     # By default this is the number of CPUs
     "MAX_JOBS": lambda: os.getenv("MAX_JOBS", None),
@@ -507,22 +615,24 @@ def _get_or_set_default() -> str:
     # By default this is 1.
     # If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU.
     "NVCC_THREADS": lambda: os.getenv("NVCC_THREADS", None),
-    # If set, vllm will use precompiled binaries (*.so)
-    "VLLM_USE_PRECOMPILED": lambda: os.environ.get("VLLM_USE_PRECOMPILED", "")
-    .strip()
-    .lower()
-    in ("1", "true")
-    or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
+    # If set, vllm will use precompiled native binaries (*.so and vllm-rs).
+    "VLLM_USE_PRECOMPILED": lambda: (
+        os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in ("1", "true")
+        or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION"))
+    ),
+    # If set, vllm will use the precompiled Rust frontend binary (vllm-rs).
+    "VLLM_USE_PRECOMPILED_RUST": lambda: (
+        os.environ.get("VLLM_USE_PRECOMPILED_RUST", "").strip().lower() in ("1", "true")
+    ),
     # If set, skip adding +precompiled suffix to version string
     "VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX": lambda: bool(
         int(os.environ.get("VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX", "0"))
     ),
     # Used to mark that setup.py is running in a Docker build context,
     # in order to force the use of precompiled binaries.
-    "VLLM_DOCKER_BUILD_CONTEXT": lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "")
-    .strip()
-    .lower()
-    in ("1", "true"),
+    "VLLM_DOCKER_BUILD_CONTEXT": lambda: (
+        os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in ("1", "true")
+    ),
     # CMake build type
     # If not set, defaults to "Debug" or "RelWithDebInfo"
     # Available options: "Debug", "Release", "RelWithDebInfo"
@@ -568,10 +678,15 @@ def _get_or_set_default() -> str:
     ),
     # If true, will load models from ModelScope instead of Hugging Face Hub.
     # note that the value is true or false, not numbers
-    "VLLM_USE_MODELSCOPE": lambda: os.environ.get(
-        "VLLM_USE_MODELSCOPE", "False"
-    ).lower()
-    == "true",
+    "VLLM_USE_MODELSCOPE": lambda: (
+        os.environ.get("VLLM_USE_MODELSCOPE", "False").lower() == "true"
+    ),
+    # If true, replace the Rust BPE backend that powers HF fast tokenizers
+    # with the `fastokens` (https://github.com/crusoecloud/fastokens) shim.
+    # Applies to any tokenizer mode that loads an HF fast tokenizer
+    # (`hf`, `deepseek_v32`, `deepseek_v4`, `qwen_vl`, …). The `fastokens`
+    # Python package must be installed.
+    "VLLM_USE_FASTOKENS": lambda: bool(int(os.getenv("VLLM_USE_FASTOKENS", "0"))),
     # Interval in seconds to log a warning message when the ring buffer is full
     "VLLM_RINGBUFFER_WARNING_INTERVAL": lambda: int(
         os.environ.get("VLLM_RINGBUFFER_WARNING_INTERVAL", "60")
@@ -592,19 +707,22 @@ def _get_or_set_default() -> str:
     # Feature flag to enable/disable Inductor standalone compile.
     # In torch <= 2.7 we ignore this flag; in torch >= 2.9 this is
     # enabled by default.
-    "VLLM_USE_STANDALONE_COMPILE": lambda: os.environ.get(
-        "VLLM_USE_STANDALONE_COMPILE", "1"
-    )
-    == "1",
+    "VLLM_USE_STANDALONE_COMPILE": lambda: (
+        os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1"
+    ),
     # Inductor's pre-grad passes don't do anything for vLLM.
     # The pre-grad passes get run even on cache-hit and negatively impact
     # vllm cold compile times by O(1s)
     # Can remove this after the following issue gets fixed
+    # TODO(luka): maybe_inplace requires this
     # https://github.com/pytorch/pytorch/issues/174502
-    "VLLM_ENABLE_PREGRAD_PASSES": lambda: os.environ.get(
-        "VLLM_ENABLE_PREGRAD_PASSES", "0"
-    )
-    == "1",
+    "VLLM_ENABLE_PREGRAD_PASSES": lambda: (
+        os.environ.get("VLLM_ENABLE_PREGRAD_PASSES", "1") == "1"
+    ),
+    # Experimental: breakable cudagraph does not rely on torch.compile
+    "VLLM_USE_BREAKABLE_CUDAGRAPH": lambda: (
+        os.environ.get("VLLM_USE_BREAKABLE_CUDAGRAPH", "0") == "1"
+    ),
     # Debug pattern matching inside custom passes.
     # Should be set to the fx.Node name (e.g. 'getitem_34' or 'scaled_mm_3').
     "VLLM_PATTERN_MATCH_DEBUG": lambda: os.environ.get(
@@ -647,10 +765,9 @@ def _get_or_set_default() -> str:
     # API key for vLLM API server
     "VLLM_API_KEY": lambda: os.environ.get("VLLM_API_KEY", None),
     # Whether to log responses from API Server for debugging
-    "VLLM_DEBUG_LOG_API_SERVER_RESPONSE": lambda: os.environ.get(
-        "VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False"
-    ).lower()
-    == "true",
+    "VLLM_DEBUG_LOG_API_SERVER_RESPONSE": lambda: (
+        os.environ.get("VLLM_DEBUG_LOG_API_SERVER_RESPONSE", "False").lower() == "true"
+    ),
     # S3 access information, used for tensorizer to load model from S3
     "S3_ACCESS_KEY_ID": lambda: os.environ.get("S3_ACCESS_KEY_ID", None),
     "S3_SECRET_ACCESS_KEY": lambda: os.environ.get("S3_SECRET_ACCESS_KEY", None),
@@ -661,11 +778,13 @@ def _get_or_set_default() -> str:
     ),
     "VLLM_NO_USAGE_STATS": lambda: os.environ.get("VLLM_NO_USAGE_STATS", "0") == "1",
     "VLLM_DO_NOT_TRACK": lambda: (
-        os.environ.get("VLLM_DO_NOT_TRACK", None)
-        or os.environ.get("DO_NOT_TRACK", None)
-        or "0"
-    )
-    == "1",
+        (
+            os.environ.get("VLLM_DO_NOT_TRACK", None)
+            or os.environ.get("DO_NOT_TRACK", None)
+            or "0"
+        )
+        == "1"
+    ),
     "VLLM_USAGE_SOURCE": lambda: os.environ.get("VLLM_USAGE_SOURCE", "production"),
     # Logging configuration
     # If set to 0, vllm will not configure logging
@@ -688,43 +807,55 @@ def _get_or_set_default() -> str:
     "NO_COLOR": lambda: os.getenv("NO_COLOR", "0") != "0",
     # If set, vllm will log stats at this interval in seconds
     # If not set, vllm will log stats every 10 seconds.
-    "VLLM_LOG_STATS_INTERVAL": lambda: val
-    if (val := float(os.getenv("VLLM_LOG_STATS_INTERVAL", "10."))) > 0.0
-    else 10.0,
+    "VLLM_LOG_STATS_INTERVAL": lambda: (
+        val
+        if (val := float(os.getenv("VLLM_LOG_STATS_INTERVAL", "10."))) > 0.0
+        else 10.0
+    ),
     # Trace function calls
     # If set to 1, vllm will trace function calls
     # Useful for debugging
     "VLLM_TRACE_FUNCTION": lambda: int(os.getenv("VLLM_TRACE_FUNCTION", "0")),
-    # If set, vllm will use flashinfer sampler
-    "VLLM_USE_FLASHINFER_SAMPLER": lambda: bool(
-        int(os.environ["VLLM_USE_FLASHINFER_SAMPLER"])
-    )
-    if "VLLM_USE_FLASHINFER_SAMPLER" in os.environ
-    else None,
+    # Whether to use the FlashInfer top-k / top-p sampler on CUDA. Enabled
+    # by default when the hardware supports it — set to 0 to opt out
+    # explicitly, which forces the PyTorch-native (Triton for bs>=8) path.
+    "VLLM_USE_FLASHINFER_SAMPLER": lambda: (
+        bool(int(os.environ["VLLM_USE_FLASHINFER_SAMPLER"]))
+        if "VLLM_USE_FLASHINFER_SAMPLER" in os.environ
+        else True
+    ),
     # Pipeline stage partition strategy
     "VLLM_PP_LAYER_PARTITION": lambda: os.getenv("VLLM_PP_LAYER_PARTITION", None),
     # (CPU backend only) CPU key-value cache space.
     # default is None and will be set as 4 GB
-    "VLLM_CPU_KVCACHE_SPACE": lambda: int(os.getenv("VLLM_CPU_KVCACHE_SPACE", "0"))
-    if "VLLM_CPU_KVCACHE_SPACE" in os.environ
-    else None,
+    "VLLM_CPU_KVCACHE_SPACE": lambda: (
+        int(os.getenv("VLLM_CPU_KVCACHE_SPACE", "0"))
+        if "VLLM_CPU_KVCACHE_SPACE" in os.environ
+        else None
+    ),
     # (CPU backend only) CPU core ids bound by OpenMP threads, e.g., "0-31",
     # "0,1,2", "0-31,33". CPU cores of different ranks are separated by '|'.
     "VLLM_CPU_OMP_THREADS_BIND": lambda: os.getenv("VLLM_CPU_OMP_THREADS_BIND", "auto"),
     # (CPU backend only) CPU cores not used by OMP threads .
     # Those CPU cores will not be used by OMP threads of a rank.
-    "VLLM_CPU_NUM_OF_RESERVED_CPU": lambda: int(
-        os.getenv("VLLM_CPU_NUM_OF_RESERVED_CPU", "0")
-    )
-    if "VLLM_CPU_NUM_OF_RESERVED_CPU" in os.environ
-    else None,
+    "VLLM_CPU_NUM_OF_RESERVED_CPU": lambda: (
+        int(os.getenv("VLLM_CPU_NUM_OF_RESERVED_CPU", "0"))
+        if "VLLM_CPU_NUM_OF_RESERVED_CPU" in os.environ
+        else None
+    ),
     # (CPU backend only) whether to use SGL kernels, optimized for small batch.
     "VLLM_CPU_SGL_KERNEL": lambda: bool(int(os.getenv("VLLM_CPU_SGL_KERNEL", "0"))),
+    # (CPU backend only) whether to enable attention spilt KV.
+    "VLLM_CPU_ATTN_SPLIT_KV": lambda: bool(
+        int(os.getenv("VLLM_CPU_ATTN_SPLIT_KV", "1"))
+    ),
     # (Zen CPU backend) eagerly prepack weights into ZenDNN blocked layout
     # at model load time. Eliminates per-inference layout conversion overhead.
     "VLLM_ZENTORCH_WEIGHT_PREPACK": lambda: bool(
         int(os.getenv("VLLM_ZENTORCH_WEIGHT_PREPACK", "1"))
     ),
+    # (CPU backend only) whether to use SGLang INT4 W4A8 kernels for AWQ.
+    "VLLM_CPU_INT4_W4A8": lambda: bool(int(os.getenv("VLLM_CPU_INT4_W4A8", "1"))),
     # If the env var is set, Ray Compiled Graph uses the specified
     # channel type to communicate between workers belonging to
     # different pipeline-parallel stages.
@@ -746,6 +877,11 @@ def _get_or_set_default() -> str:
     "VLLM_USE_RAY_WRAPPED_PP_COMM": lambda: bool(
         int(os.getenv("VLLM_USE_RAY_WRAPPED_PP_COMM", "1"))
     ),
+    # When True and distributed_executor_backend="ray", use RayExecutorV2
+    # (MQ-based) instead of RayDistributedExecutor (compiled-graph backend).
+    "VLLM_USE_RAY_V2_EXECUTOR_BACKEND": lambda: bool(
+        int(os.getenv("VLLM_USE_RAY_V2_EXECUTOR_BACKEND", "1"))
+    ),
     # Use dedicated multiprocess context for workers.
     # Both spawn and fork work
     "VLLM_WORKER_MULTIPROC_METHOD": env_with_choices(
@@ -776,6 +912,19 @@ def _get_or_set_default() -> str:
     "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int(
         os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")
     ),
+    # Directory for caching media downloads (images, video, audio fetched
+    # from URLs during inference). Empty string disables caching.
+    "VLLM_MEDIA_CACHE": lambda: os.getenv("VLLM_MEDIA_CACHE", ""),
+    # Maximum cache size in MB. When exceeded, least-recently-used entries
+    # are evicted. Default is 5120 (5 GB).
+    "VLLM_MEDIA_CACHE_MAX_SIZE_MB": lambda: int(
+        os.getenv("VLLM_MEDIA_CACHE_MAX_SIZE_MB", "5120")
+    ),
+    # Time-to-live in hours for cached media files. Entries older than this
+    # are evicted regardless of cache size. Default is 24 hours.
+    "VLLM_MEDIA_CACHE_TTL_HOURS": lambda: float(
+        os.getenv("VLLM_MEDIA_CACHE_TTL_HOURS", "24")
+    ),
     # Maximum number of retries for fetching media (images, audio, video)
     # from URLs. Each retry quadruples the timeout. Default is 3.
     "VLLM_MEDIA_FETCH_MAX_RETRIES": lambda: int(
@@ -798,9 +947,10 @@ def _get_or_set_default() -> str:
     "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB": lambda: int(
         os.getenv("VLLM_MAX_AUDIO_CLIP_FILESIZE_MB", "25")
     ),
-    # Backend for Video IO
-    # - "opencv": Default backend that uses OpenCV stream buffered backend.
-    # - "identity": Returns raw video bytes for model processor to handle.
+    # Backend for Video IO — selects the frame-sampling algorithm.
+    # - "opencv": uniform sampling.
+    # - "opencv_dynamic": duration-aware dynamic sampling.
+    # - "identity": returns raw video bytes for model processor to handle.
     #
     # Custom backend implementations can be registered
     # via `@VIDEO_LOADER_REGISTRY.register("my_custom_video_loader")` and
@@ -842,6 +992,12 @@ def _get_or_set_default() -> str:
     ),
     # Enable SPMD mode for TPU backend.
     "VLLM_XLA_USE_SPMD": lambda: bool(int(os.getenv("VLLM_XLA_USE_SPMD", "0"))),
+    # Maximum size (in MB) for logits tensor in sparse MLA indexer prefill chunks.
+    # Bounds the [M, N] float32 logits tensor to prevent CUDA OOM.
+    # Default: 512 MB
+    "VLLM_SPARSE_INDEXER_MAX_LOGITS_MB": lambda: int(
+        os.getenv("VLLM_SPARSE_INDEXER_MAX_LOGITS_MB", "512")
+    ),
     # If set, the OpenAI API server will stay alive even after the underlying
     # AsyncLLMEngine errors and stops serving requests
     "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH": lambda: bool(
@@ -880,9 +1036,11 @@ def _get_or_set_default() -> str:
     # a list of plugin names to load, separated by commas.
     # if this is not set, it means all plugins will be loaded
     # if this is set to an empty string, no plugins will be loaded
-    "VLLM_PLUGINS": lambda: None
-    if "VLLM_PLUGINS" not in os.environ
-    else os.environ["VLLM_PLUGINS"].split(","),
+    "VLLM_PLUGINS": lambda: (
+        None
+        if "VLLM_PLUGINS" not in os.environ
+        else os.environ["VLLM_PLUGINS"].split(",")
+    ),
     # a local directory to look in for unrecognized LoRA adapters.
     # only works if plugins are enabled and
     # VLLM_ALLOW_RUNTIME_LORA_UPDATING is enabled.
@@ -914,9 +1072,11 @@ def _get_or_set_default() -> str:
     # and performance comparisons. Currently only affects MPLinearKernel
     # selection
     # (kernels: MacheteLinearKernel, MarlinLinearKernel, ExllamaLinearKernel)
-    "VLLM_DISABLED_KERNELS": lambda: []
-    if "VLLM_DISABLED_KERNELS" not in os.environ
-    else os.environ["VLLM_DISABLED_KERNELS"].split(","),
+    "VLLM_DISABLED_KERNELS": lambda: (
+        []
+        if "VLLM_DISABLED_KERNELS" not in os.environ
+        else os.environ["VLLM_DISABLED_KERNELS"].split(",")
+    ),
     "VLLM_ENABLE_FLA_PACKED_RECURRENT_DECODE": lambda: bool(
         int(os.getenv("VLLM_ENABLE_FLA_PACKED_RECURRENT_DECODE", "1"))
     ),
@@ -942,6 +1102,7 @@ def _get_or_set_default() -> str:
     # use aiter linear op if aiter ops are enabled
     # The following list of related ops
     # - scaled_mm (per-tensor / rowwise)
+    # - use aiter tuned gemms for unquantized gemms
     "VLLM_ROCM_USE_AITER_LINEAR": lambda: (
         os.getenv("VLLM_ROCM_USE_AITER_LINEAR", "True").lower() in ("true", "1")
     ),
@@ -1037,6 +1198,19 @@ def _get_or_set_default() -> str:
     "VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB": lambda: maybe_convert_int(
         os.environ.get("VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB", None)
     ),
+    # Custom quick allreduce kernel for MI3* cards.
+    # Controls the minimum allowed number of data bytes(MB) required to use
+    # custom quick allreduce communication.
+    # If unset, use the built-in threshold table.
+    "VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB": lambda: maybe_convert_int(
+        os.environ.get("VLLM_ROCM_QUICK_REDUCE_MIN_SIZE_BYTES_MB", None)
+    ),
+    # Controls the minimum tensor size (KB, where 1 KB = 1024 bytes) required
+    # to use the configured QuickReduce codec. Smaller tensors use FP
+    # QuickReduce. This does not affect QuickReduce eligibility.
+    "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB": lambda: maybe_convert_int(
+        os.environ.get("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION_MIN_SIZE_KB", None)
+    ),
     # Divisor for dynamic query scale factor calculation for FP8 KV Cache
     "Q_SCALE_CONSTANT": lambda: int(os.getenv("Q_SCALE_CONSTANT", "200")),
     # Divisor for dynamic key scale factor calculation for FP8 KV Cache
@@ -1051,6 +1225,18 @@ def _get_or_set_default() -> str:
         os.getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")
     ),
     "VLLM_DISABLE_COMPILE_CACHE": disable_compile_cache,
+    # If set to "0", disable LayerName opaque type for layer_name
+    # parameters in custom ops.  Defaults to enabled on torch >= 2.11.
+    "VLLM_USE_LAYERNAME": lambda: bool(int(os.getenv("VLLM_USE_LAYERNAME", "1"))),
+    # If set, use the Rust frontend binary instead of the Python API server
+    # process(es).
+    "VLLM_USE_RUST_FRONTEND": lambda: bool(
+        int(os.getenv("VLLM_USE_RUST_FRONTEND", "0"))
+    ),
+    # Path to the Rust frontend binary. Defaults to "auto" which discovers
+    # the binary installed with the vllm package. Only used when
+    # VLLM_USE_RUST_FRONTEND=1.
+    "VLLM_RUST_FRONTEND_PATH": lambda: _resolve_rust_frontend_path(),
     # If set, vllm will run in development mode, which will enable
     # some additional endpoints for developing and debugging,
     # e.g. `/reset_prefix_cache`
@@ -1094,20 +1280,10 @@ def _get_or_set_default() -> str:
     "VLLM_DP_MASTER_IP": lambda: os.getenv("VLLM_DP_MASTER_IP", "127.0.0.1"),
     # Port of the master node in the data parallel setting
     "VLLM_DP_MASTER_PORT": lambda: int(os.getenv("VLLM_DP_MASTER_PORT", "0")),
-    # In the context of executing MoE models with Data-Parallel, Expert-Parallel
-    # and Batched All-to-All dispatch/combine kernels, VLLM_MOE_DP_CHUNK_SIZE
-    # dictates the quantum of tokens that can be dispatched from a DP
-    # rank. All DP ranks process the activations in VLLM_MOE_DP_CHUNK_SIZE
-    # units.
-    "VLLM_MOE_DP_CHUNK_SIZE": lambda: int(os.getenv("VLLM_MOE_DP_CHUNK_SIZE", "256")),
-    "VLLM_ENABLE_MOE_DP_CHUNK": lambda: bool(
-        int(os.getenv("VLLM_ENABLE_MOE_DP_CHUNK", "1"))
-    ),
     # Randomize inputs during dummy runs when using Data Parallel
-    "VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: os.environ.get(
-        "VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0"
-    )
-    == "1",
+    "VLLM_RANDOMIZE_DP_DUMMY_INPUTS": lambda: (
+        os.environ.get("VLLM_RANDOMIZE_DP_DUMMY_INPUTS", "0") == "1"
+    ),
     # Strategy to pack the data parallel ranks for Ray.
     # Available options:
     # - "fill":
@@ -1147,18 +1323,41 @@ def _get_or_set_default() -> str:
         "VLLM_MODEL_REDIRECT_PATH", None
     ),
     # Whether to use atomicAdd reduce in gptq/awq marlin kernel.
-    "VLLM_MARLIN_USE_ATOMIC_ADD": lambda: os.environ.get(
-        "VLLM_MARLIN_USE_ATOMIC_ADD", "0"
-    )
-    == "1",
+    "VLLM_MARLIN_USE_ATOMIC_ADD": lambda: (
+        os.environ.get("VLLM_MARLIN_USE_ATOMIC_ADD", "0") == "1"
+    ),
     # Whether to use marlin kernel in mxfp4 quantization method
-    "VLLM_MXFP4_USE_MARLIN": lambda: maybe_convert_bool(
-        os.environ.get("VLLM_MXFP4_USE_MARLIN", None)
+    # Deprecated: use --moe-backend marlin (MoE) or --linear-backend marlin
+    # (linear) instead.
+    "VLLM_MXFP4_USE_MARLIN": deprecated_env(
+        "VLLM_MXFP4_USE_MARLIN",
+        "v0.23",
+        "Use --moe-backend marlin or --linear-backend marlin.",
+        lambda: maybe_convert_bool(os.environ.get("VLLM_MXFP4_USE_MARLIN", None)),
     ),
     # The activation dtype for marlin kernel
     "VLLM_MARLIN_INPUT_DTYPE": env_with_choices(
         "VLLM_MARLIN_INPUT_DTYPE", None, ["int8", "fp8"]
     ),
+    # The online quantization dtype for humming kernel
+    "VLLM_HUMMING_ONLINE_QUANT_CONFIG": lambda: maybe_convert_json_str_or_file(
+        os.environ.get("VLLM_HUMMING_ONLINE_QUANT_CONFIG", None)
+    ),
+    # The activation dtype config for humming kernel
+    "VLLM_HUMMING_INPUT_QUANT_CONFIG": lambda: maybe_convert_json_str_or_file(
+        os.environ.get("VLLM_HUMMING_INPUT_QUANT_CONFIG", None)
+    ),
+    # Whether to use fp16 accumulator mma
+    "VLLM_HUMMING_USE_F16_ACCUM": lambda: maybe_convert_bool(
+        os.environ.get("VLLM_HUMMING_USE_F16_ACCUM", "0")
+    ),
+    # Whether to use indexed gemm for humming moe
+    # if 1, force use indexed gemm
+    # if 0, force use grouped gemm
+    # if None, choose better gemm type automatically
+    "VLLM_HUMMING_MOE_GEMM_TYPE": lambda: os.environ.get(
+        "VLLM_HUMMING_MOE_GEMM_TYPE", None
+    ),
     # Whether to use DeepEPLL kernels for NVFP4 quantization and dispatch method
     # only supported on Blackwell GPUs and with
     # https://github.com/deepseek-ai/DeepEP/pull/341
@@ -1168,17 +1367,16 @@ def _get_or_set_default() -> str:
     # Whether to turn on the outlines cache for V1
     # This cache is unbounded and on disk, so it's not safe to use in
     # an environment with potentially malicious users.
-    "VLLM_V1_USE_OUTLINES_CACHE": lambda: os.environ.get(
-        "VLLM_V1_USE_OUTLINES_CACHE", "0"
-    )
-    == "1",
+    "VLLM_V1_USE_OUTLINES_CACHE": lambda: (
+        os.environ.get("VLLM_V1_USE_OUTLINES_CACHE", "0") == "1"
+    ),
     # Gap between padding buckets for the forward pass. So we have
     # 8, we will run forward pass with [16, 24, 32, ...].
-    "VLLM_TPU_BUCKET_PADDING_GAP": lambda: int(
-        os.environ["VLLM_TPU_BUCKET_PADDING_GAP"]
-    )
-    if "VLLM_TPU_BUCKET_PADDING_GAP" in os.environ
-    else 0,
+    "VLLM_TPU_BUCKET_PADDING_GAP": lambda: (
+        int(os.environ["VLLM_TPU_BUCKET_PADDING_GAP"])
+        if "VLLM_TPU_BUCKET_PADDING_GAP" in os.environ
+        else 0
+    ),
     "VLLM_TPU_MOST_MODEL_LEN": lambda: maybe_convert_int(
         os.environ.get("VLLM_TPU_MOST_MODEL_LEN", None)
     ),
@@ -1230,16 +1428,29 @@ def _get_or_set_default() -> str:
         int(os.getenv("VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER", "1"))
     ),
     # Allow use of FlashInfer BF16 MoE kernels for fused moe ops.
-    "VLLM_USE_FLASHINFER_MOE_FP16": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP16", "0"))
+    # Deprecated: use --moe-backend to select a kernel explicitly.
+    "VLLM_USE_FLASHINFER_MOE_FP16": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_FP16",
+        "v0.23",
+        "Use --moe-backend (e.g. flashinfer_trtllm, flashinfer_cutlass).",
+        lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP16", "0"))),
     ),
     # Allow use of FlashInfer FP8 MoE kernels for fused moe ops.
-    "VLLM_USE_FLASHINFER_MOE_FP8": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP8", "0"))
+    # Deprecated: use --moe-backend to select a kernel explicitly.
+    "VLLM_USE_FLASHINFER_MOE_FP8": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_FP8",
+        "v0.23",
+        "Use --moe-backend (e.g. flashinfer_trtllm, flashinfer_cutlass).",
+        lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP8", "0"))),
     ),
     # Allow use of FlashInfer NVFP4 MoE kernels for fused moe ops.
-    "VLLM_USE_FLASHINFER_MOE_FP4": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP4", "0"))
+    # Deprecated: use --moe-backend to select a kernel explicitly.
+    "VLLM_USE_FLASHINFER_MOE_FP4": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_FP4",
+        "v0.23",
+        "Use --moe-backend (e.g. flashinfer_trtllm, flashinfer_cutlass, "
+        "flashinfer_cutedsl).",
+        lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP4", "0"))),
     ),
     # Allow use of FlashInfer MxInt4 MoE kernels for fused moe ops.
     "VLLM_USE_FLASHINFER_MOE_INT4": lambda: bool(
@@ -1247,20 +1458,36 @@ def _get_or_set_default() -> str:
     ),
     # If set to 1, use the FlashInfer
     # MXFP8 (activation) x MXFP4 (weight) MoE backend.
-    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "0"))
+    # Deprecated: use --moe-backend flashinfer_trtllm combined with
+    # --quantization_config.moe.activation mxfp8.
+    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8",
+        "v0.23",
+        "Use --moe-backend flashinfer_trtllm with "
+        "--quantization_config.moe.activation mxfp8.",
+        lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "0"))),
     ),
     # If set to 1, use the FlashInfer CUTLASS backend for
     # MXFP8 (activation) x MXFP4 (weight) MoE.
-    # This is separate from the TRTLLMGEN path controlled by
-    # VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8.
-    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS", "0"))
+    # Deprecated: use --moe-backend flashinfer_cutlass combined with
+    # --quantization_config.moe.activation mxfp8.
+    "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS",
+        "v0.23",
+        "Use --moe-backend flashinfer_cutlass with "
+        "--quantization_config.moe.activation mxfp8.",
+        lambda: bool(
+            int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS", "0"))
+        ),
     ),
     # If set to 1, use the FlashInfer
     # BF16 (activation) x MXFP4 (weight) MoE backend.
-    "VLLM_USE_FLASHINFER_MOE_MXFP4_BF16": lambda: bool(
-        int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "0"))
+    # Deprecated: use --moe-backend to select a kernel explicitly.
+    "VLLM_USE_FLASHINFER_MOE_MXFP4_BF16": deprecated_env(
+        "VLLM_USE_FLASHINFER_MOE_MXFP4_BF16",
+        "v0.23",
+        "Use --moe-backend (e.g. flashinfer_trtllm, flashinfer_cutlass).",
+        lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "0"))),
     ),
     # Control the cache sized used by the xgrammar compiler. The default
     # of 512 MB should be enough for roughly 1000 JSON schemas.
@@ -1299,6 +1526,20 @@ def _get_or_set_default() -> str:
     "VLLM_MOONCAKE_BOOTSTRAP_PORT": lambda: int(
         os.getenv("VLLM_MOONCAKE_BOOTSTRAP_PORT", "8998")
     ),
+    # Log per-batch memory/disk tier breakdown on external GETs.
+    "VLLM_MOONCAKE_STORE_TIER_LOG": lambda: (
+        os.getenv("VLLM_MOONCAKE_STORE_TIER_LOG", "False").lower() in ("true", "1")
+    ),
+    # Fraction of the owner's DirectIO staging buffer to fill per GET batch.
+    "VLLM_MOONCAKE_DISK_STAGING_USABLE_RATIO": lambda: float(
+        os.getenv("VLLM_MOONCAKE_DISK_STAGING_USABLE_RATIO", "0.9")
+    ),
+    # Pin this rank to a specific owner segment ("host:port").
+    "MOONCAKE_PREFERRED_SEGMENT": lambda: os.getenv("MOONCAKE_PREFERRED_SEGMENT"),
+    # Override the hostname the rank registers as a Mooncake requester.
+    "MOONCAKE_REQUESTER_LOCAL_HOSTNAME": lambda: os.getenv(
+        "MOONCAKE_REQUESTER_LOCAL_HOSTNAME"
+    ),
     # Flashinfer MoE backend for vLLM's fused Mixture-of-Experts support.
     # Both require compute capability 10.0 or above.
     # Available options:
@@ -1306,10 +1547,21 @@ def _get_or_set_default() -> str:
     #     Uses CUTLASS kernels optimized for high-throughput batch inference.
     # - "latency":
     #     Uses TensorRT-LLM kernels optimized for low-latency inference.
-    "VLLM_FLASHINFER_MOE_BACKEND": env_with_choices(
+    # Deprecated: pass --moe-backend flashinfer_{trtllm,cutlass,cutedsl} directly.
+    "VLLM_FLASHINFER_MOE_BACKEND": deprecated_env(
         "VLLM_FLASHINFER_MOE_BACKEND",
-        "latency",
-        ["throughput", "latency", "masked_gemm"],
+        "v0.23",
+        "Use --moe-backend flashinfer_trtllm, flashinfer_cutlass, or "
+        "flashinfer_cutedsl.",
+        env_with_choices(
+            "VLLM_FLASHINFER_MOE_BACKEND",
+            "latency",
+            ["throughput", "latency", "masked_gemm"],
+        ),
+    ),
+    # Override the directory for the FlashInfer autotune config cache.
+    "VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR": lambda: os.getenv(
+        "VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR", None
     ),
     # Flashinfer fused allreduce backend.
     "VLLM_FLASHINFER_ALLREDUCE_BACKEND": env_with_choices(
@@ -1371,6 +1623,13 @@ def _get_or_set_default() -> str:
     "VLLM_KV_CACHE_LAYOUT": env_with_choices(
         "VLLM_KV_CACHE_LAYOUT", None, ["NHD", "HND"]
     ),
+    # SSM conv state layout used for Mamba models.
+    # - SD: (state_len, dim) — dim contiguous (default)
+    # - DS: (dim, state_len) — TP-sharded dim on dim1,
+    #   consistent with SSM temporal state and HND KV cache layout.
+    "VLLM_SSM_CONV_STATE_LAYOUT": env_with_choices(
+        "VLLM_SSM_CONV_STATE_LAYOUT", None, ["SD", "DS"]
+    ),
     # Enable checking whether the generated logits contain NaNs,
     # indicating corrupted output. Useful for debugging low level bugs
     # or bad hardware but it may add compute overhead.
@@ -1380,15 +1639,12 @@ def _get_or_set_default() -> str:
     # Controls whether or not emulations are used for NVFP4
     # generations on machines < 100 for compressed-tensors
     # models
-    "VLLM_USE_NVFP4_CT_EMULATIONS": lambda: bool(
-        int(os.getenv("VLLM_USE_NVFP4_CT_EMULATIONS", "0"))
-    ),
-    # Time (in seconds) after which the KV cache on the producer side is
-    # automatically cleared if no READ notification is received from the
-    # consumer. This is only applicable when using NixlConnector in a
-    # disaggregated decode-prefill setup.
-    "VLLM_NIXL_ABORT_REQUEST_TIMEOUT": lambda: int(
-        os.getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "480")
+    # Deprecated: use --linear-backend emulation instead.
+    "VLLM_USE_NVFP4_CT_EMULATIONS": deprecated_env(
+        "VLLM_USE_NVFP4_CT_EMULATIONS",
+        "v0.23",
+        "Use --linear-backend emulation.",
+        lambda: bool(int(os.getenv("VLLM_USE_NVFP4_CT_EMULATIONS", "0"))),
     ),
     # Controls the read mode for the Mori-IO connector
     "VLLM_MORIIO_CONNECTOR_READ_MODE": lambda: (
@@ -1418,17 +1674,29 @@ def _get_or_set_default() -> str:
     # - "flashinfer-trtllm": use flashinfer trtllm GEMM backend
     # - "flashinfer-cutlass": use flashinfer cutlass GEMM backend
     # - "marlin": use marlin GEMM backend (for GPUs without native FP4 support)
+    # - "emulation":
+    #     use BF16/FP16 GEMM, dequantizing weights and running QDQ on activations.
+    #     This is only meant for research purposes to run on devices where NVFP4
+    #     GEMM kernels are not available.
     # - <none>: automatically pick an available backend
-    "VLLM_NVFP4_GEMM_BACKEND": env_with_choices(
+    # Deprecated: use --linear-backend instead.
+    "VLLM_NVFP4_GEMM_BACKEND": deprecated_env(
         "VLLM_NVFP4_GEMM_BACKEND",
-        None,
-        [
-            "flashinfer-cudnn",
-            "flashinfer-trtllm",
-            "flashinfer-cutlass",
-            "cutlass",
-            "marlin",
-        ],
+        "v0.23",
+        "Use --linear-backend.",
+        env_with_choices(
+            "VLLM_NVFP4_GEMM_BACKEND",
+            None,
+            [
+                "flashinfer-b12x",
+                "flashinfer-cudnn",
+                "flashinfer-trtllm",
+                "flashinfer-cutlass",
+                "cutlass",
+                "marlin",
+                "emulation",
+            ],
+        ),
     ),
     # Controls garbage collection during CUDA graph capture.
     # If set to 0 (default), enables GC freezing to speed up capture time.
@@ -1480,7 +1748,10 @@ def _get_or_set_default() -> str:
     "VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT": lambda: bool(
         int(os.getenv("VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT", "0"))
     ),
-    # Allows vllm to find tuned config under customized folder
+    # User override folder for tuned Triton-kernel configs. Shared by MoE,
+    # Mamba SSU, and LoRA. Filenames are distinct so one folder can hold all.
+    # Each component first checks this folder, then the configs shipped with
+    # vLLM (if any). If no JSON matches, it uses a hard-coded heuristic.
     "VLLM_TUNED_CONFIG_FOLDER": lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
     # Valid values are container,code_interpreter,web_search_preview
     # ex VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS=container,code_interpreter
@@ -1507,6 +1778,12 @@ def _get_or_set_default() -> str:
     "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool(
         int(os.getenv("VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0"))
     ),
+    # When 1,the model structural tags will be used to enforce the model
+    # output conforming to the model's tool-calling format and schema.
+    # Default 0 (off).
+    "VLLM_ENFORCE_STRICT_TOOL_CALLING": lambda: bool(
+        int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "0"))
+    ),
     # Add optional custom scopes for profiling, disable to avoid overheads
     "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(
         int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))
@@ -1543,9 +1820,18 @@ def _get_or_set_default() -> str:
     "VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL": lambda: bool(
         int(os.getenv("VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL", "0"))
     ),
-    # The number of SMs to allocate for communication kernels when running DBO
-    # the rest of the SMs on the device will be allocated to compute
-    "VLLM_DBO_COMM_SMS": lambda: int(os.getenv("VLLM_DBO_COMM_SMS", "20")),
+    # The number of SMs/CUs to allocate for communication kernels when
+    # running DBO; the rest will be allocated to compute.
+    # Default: 20 on CUDA (SMs), 64 on ROCm (CUs).
+    "VLLM_DBO_COMM_SMS": lambda: int(
+        os.getenv(
+            "VLLM_DBO_COMM_SMS",
+            "64"
+            if hasattr(__import__("torch").version, "hip")
+            and __import__("torch").version.hip is not None
+            else "20",
+        )
+    ),
     # Enable max_autotune & coordinate_descent_tuning in inductor_config
     # to compile static shapes passed from compile_sizes in compilation_config
     # If set to 1, enable max_autotune; By default, this is enabled (1)
@@ -1564,7 +1850,13 @@ def _get_or_set_default() -> str:
     # NCCL header path
     "VLLM_NCCL_INCLUDE_PATH": lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None),
     # Flag to enable FBGemm kernels on model execution
-    "VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))),
+    # Deprecated: use --linear-backend fbgemm instead.
+    "VLLM_USE_FBGEMM": deprecated_env(
+        "VLLM_USE_FBGEMM",
+        "v0.23",
+        "Use --linear-backend fbgemm.",
+        lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))),
+    ),
     # GC debug config
     # - VLLM_GC_DEBUG=0: disable GC debugger
     # - VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times
@@ -1585,6 +1877,17 @@ def _get_or_set_default() -> str:
     "VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD": lambda: int(
         int(os.getenv("VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD", 256))
     ),
+    # Token-count cutoff for multi-stream overlap of the attention input
+    # GEMM with auxiliary GEMMs (e.g. fused_wqa_wkv overlapped with indexer
+    # weights / kv-score projections in DeepSeek-V4). At or below this many
+    # tokens the FP8 main GEMM has idle SMs to share with the bf16 aux GEMMs
+    # and overlap is a 5-45% win; above it the FP8 GEMM saturates the device
+    # and the cross-stream sync becomes pure overhead. Set to 0 to disable
+    # the multi-stream path entirely. See #PR 41526 for the empirical result
+    # for the default value of 1024 tokens.
+    "VLLM_MULTI_STREAM_GEMM_TOKEN_THRESHOLD": lambda: int(
+        os.getenv("VLLM_MULTI_STREAM_GEMM_TOKEN_THRESHOLD", "1024")
+    ),
     # Format for saving torch.compile cache artifacts
     # - "binary": saves as binary file
     #     Safe for multiple vllm serve processes accessing the same torch compile cache.
@@ -1594,9 +1897,9 @@ def _get_or_set_default() -> str:
     "VLLM_COMPILE_CACHE_SAVE_FORMAT": env_with_choices(
         "VLLM_COMPILE_CACHE_SAVE_FORMAT", "binary", ["binary", "unpacked"]
     ),
-    # Flag to enable v2 model runner.
-    "VLLM_USE_V2_MODEL_RUNNER": lambda: bool(
-        int(os.getenv("VLLM_USE_V2_MODEL_RUNNER", "0"))
+    # Flag to control the v2 model runner. If unset, use config defaults.
+    "VLLM_USE_V2_MODEL_RUNNER": lambda: maybe_convert_bool(
+        os.getenv("VLLM_USE_V2_MODEL_RUNNER", None)
     ),
     # Log model inspection after loading.
     # If enabled, logs a transformers-style hierarchical view of the model
@@ -1631,6 +1934,14 @@ def _get_or_set_default() -> str:
     "VLLM_CUDA_COMPATIBILITY_PATH": lambda: os.environ.get(
         "VLLM_CUDA_COMPATIBILITY_PATH", None
     ),
+    # Skip model name validation in OpenAI API requests.
+    # When set to 1, any model name will be accepted in the 'model' field
+    # of API requests. This is useful for proxy/gateway scenarios where
+    # the actual model is served but different names may be used in requests.
+    "VLLM_SKIP_MODEL_NAME_VALIDATION": lambda: (
+        os.getenv("VLLM_SKIP_MODEL_NAME_VALIDATION", "0").strip().lower()
+        in ("1", "true")
+    ),
     # Whether it is a scale up launch engine for elastic EP,
     # Should only be set by EngineCoreClient.
     "VLLM_ELASTIC_EP_SCALE_UP_LAUNCH": lambda: bool(
@@ -1643,9 +1954,9 @@ def _get_or_set_default() -> str:
     ),
     # If set to 1, enable CUDA graph memory estimation during memory profiling.
     # This profiles CUDA graph memory usage to provide more accurate KV cache
-    # memory allocation. Disabled by default to preserve existing behavior.
+    # memory allocation. Enabled by default as of v0.21.0
     "VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS": lambda: bool(
-        int(os.getenv("VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS", "0"))
+        int(os.getenv("VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS", "1"))
     ),
     # NIXL EP environment variables
     "VLLM_NIXL_EP_MAX_NUM_RANKS": lambda: int(
@@ -1655,6 +1966,21 @@ def _get_or_set_default() -> str:
     "VLLM_XPU_ENABLE_XPU_GRAPH": lambda: bool(
         int(os.getenv("VLLM_XPU_ENABLE_XPU_GRAPH", "0"))
     ),
+    # whether use xpu specific sample kernel
+    "VLLM_XPU_USE_SAMPLER_KERNEL": lambda: bool(
+        int(os.getenv("VLLM_XPU_USE_SAMPLER_KERNEL", "1"))
+    ),
+    # Enable simple KV offload.
+    "VLLM_USE_SIMPLE_KV_OFFLOAD": lambda: bool(
+        int(os.getenv("VLLM_USE_SIMPLE_KV_OFFLOAD", "0"))
+    ),
+    # Whether to enable dual cuda streams for LoRA computation
+    "VLLM_LORA_ENABLE_DUAL_STREAM": lambda: bool(
+        int(os.getenv("VLLM_LORA_ENABLE_DUAL_STREAM", "0"))
+    ),
+    # If set to 1, use Python spinloop extension to poll in a more efficient
+    # way when using the mp backend.
+    "VLLM_USE_SPINLOOP_EXT": lambda: bool(int(os.getenv("VLLM_USE_SPINLOOP_EXT", "0"))),
 }
 
 
@@ -1751,6 +2077,7 @@ def compile_factors() -> dict[str, object]:
         "VLLM_SERVER_DEV_MODE",
         "VLLM_DP_MASTER_IP",
         "VLLM_DP_MASTER_PORT",
+        "VLLM_NIXL_SIDE_CHANNEL_HOST",
         "VLLM_RANDOMIZE_DP_DUMMY_INPUTS",
         "VLLM_CI_USE_S3",
         "VLLM_MODEL_REDIRECT_PATH",
@@ -1770,6 +2097,7 @@ def compile_factors() -> dict[str, object]:
         "VLLM_LOG_STATS_INTERVAL",
         "VLLM_DEBUG_LOG_API_SERVER_RESPONSE",
         "VLLM_TUNED_CONFIG_FOLDER",
+        "VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR",
         "VLLM_ENGINE_ITERATION_TIMEOUT_S",
         "VLLM_HTTP_TIMEOUT_KEEP_ALIVE",
         "VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS",
@@ -1777,6 +2105,9 @@ def compile_factors() -> dict[str, object]:
         "VLLM_IMAGE_FETCH_TIMEOUT",
         "VLLM_VIDEO_FETCH_TIMEOUT",
         "VLLM_AUDIO_FETCH_TIMEOUT",
+        "VLLM_MEDIA_CACHE",
+        "VLLM_MEDIA_CACHE_MAX_SIZE_MB",
+        "VLLM_MEDIA_CACHE_TTL_HOURS",
         "VLLM_MEDIA_FETCH_MAX_RETRIES",
         "VLLM_MEDIA_URL_ALLOW_REDIRECTS",
         "VLLM_MEDIA_LOADING_THREAD_COUNT",
@@ -1795,6 +2126,7 @@ def compile_factors() -> dict[str, object]:
         "VLLM_TEST_FORCE_LOAD_FORMAT",
         "VLLM_ENABLE_CUDA_COMPATIBILITY",
         "VLLM_CUDA_COMPATIBILITY_PATH",
+        "VLLM_SKIP_MODEL_NAME_VALIDATION",
         "LOCAL_RANK",
         "CUDA_VISIBLE_DEVICES",
         "NO_COLOR",
diff --git a/vllm/forward_context.py b/vllm/forward_context.py
index a7aaeff4fc85..5527ec13b061 100644
--- a/vllm/forward_context.py
+++ b/vllm/forward_context.py
@@ -69,27 +69,8 @@ def _compute_sp_num_tokens(
     return sp_tokens.tolist()
 
 
-def _compute_chunked_local_num_tokens(
-    num_tokens_across_dp_cpu: torch.Tensor,
-    sequence_parallel_size: int,
-    max_num_tokens: int,
-    chunk_idx: int,
-) -> list[int]:
-    sp_tokens = _compute_sp_num_tokens(num_tokens_across_dp_cpu, sequence_parallel_size)
-    sp_size = len(sp_tokens)
-
-    local_size = [-1] * sp_size
-    for i in range(sp_size):
-        # Take into account sharding if MoE activation is sequence parallel.
-        local_size[i] = min(max_num_tokens, sp_tokens[i] - (max_num_tokens * chunk_idx))
-        if local_size[i] <= 0:
-            local_size[i] = 1  # ensure lockstep even if done
-    return local_size
-
-
 @dataclass
 class DPMetadata:
-    max_tokens_across_dp_cpu: torch.Tensor
     num_tokens_across_dp_cpu: torch.Tensor
 
     # NOTE: local_sizes should only be set by the chunked_sizes context manager
@@ -112,47 +93,7 @@ def make(
         assert num_tokens_across_dp_cpu[dp_rank] == batchsize, (
             f"{num_tokens_across_dp_cpu[dp_rank]} {batchsize}"
         )
-        max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp_cpu)
-        return DPMetadata(max_tokens_across_dp_cpu, num_tokens_across_dp_cpu)
-
-    @contextmanager
-    def chunked_sizes(
-        self, sequence_parallel_size: int, max_chunk_size_per_rank: int, chunk_idx: int
-    ):
-        """
-        Context manager to compute and temporarily set the per-rank local token
-        sizes for a specific chunk during chunked forward execution.
-
-        This is necessary to ensure each DP (data parallel) rank processes its
-        designated portion of tokens in lockstep with others, even when the
-        token counts are uneven or some ranks have completed their input early.
-
-        For chunked execution, we break up the total tokens on each rank into
-        multiple chunks (of at most `max_chunk_size_per_rank`), and for a given
-        `chunk_idx`, this context manager sets `self.local_sizes` to the number
-        of tokens to process in that chunk on each rank.
-
-        `self.local_sizes` is only valid inside the context.
-
-        Args:
-            sequence_parallel_size: When Attn is TP and MoE layers are EP,
-                                    we use SP between the layers to avoid
-                                    redundant ops. We need this value to
-                                    compute the chunked sizes.
-            max_chunk_size_per_rank: The max number of tokens each rank is
-                                     allowed to process in this chunk.
-            chunk_idx: The index of the chunk to compute sizes for.
-        """
-        self.local_sizes = _compute_chunked_local_num_tokens(
-            self.num_tokens_across_dp_cpu,
-            sequence_parallel_size,
-            max_chunk_size_per_rank,
-            chunk_idx,
-        )
-        try:
-            yield self.local_sizes
-        finally:
-            self.local_sizes = None
+        return DPMetadata(num_tokens_across_dp_cpu)
 
     @contextmanager
     def sp_local_sizes(self, sequence_parallel_size: int):
diff --git a/vllm/inputs/engine.py b/vllm/inputs/engine.py
index 2b426eba8f00..1c12fbc2c552 100644
--- a/vllm/inputs/engine.py
+++ b/vllm/inputs/engine.py
@@ -71,12 +71,27 @@ class EmbedsInput(_InputOptions):
     prompt: NotRequired[str]
     """The prompt text corresponding to the token IDs, if available."""
 
+    prompt_token_ids: NotRequired[list[int]]
+    """Token IDs of the rendered prompt. Only set for mixed-mode inputs
+    (chat completion with `prompt_embeds` content parts). When present,
+    `is_token_ids` MUST also be present and have the same length. 
+    For pure-embeds inputs this field is absent."""
+
+    is_token_ids: NotRequired[list[bool]]
+    """Per-position mask for mixed-mode inputs. `True` means the position
+    is a real token ID (use the model's embedding layer); `False` means
+    the position uses a pre-computed embedding row from `prompt_embeds`.
+    Length MUST equal `len(prompt_token_ids)`.
+    For pure-embeds inputs this field is absent."""
+
 
 def embeds_input(
     prompt_embeds: "torch.Tensor",
     *,
     prompt: str | None = None,
     cache_salt: str | None = None,
+    prompt_token_ids: list[int] | None = None,
+    is_token_ids: list[bool] | None = None,
 ) -> EmbedsInput:
     """
     Construct [`EmbedsInput`][vllm.inputs.engine.EmbedsInput]
@@ -88,6 +103,10 @@ def embeds_input(
         inputs["prompt"] = prompt
     if cache_salt is not None:
         inputs["cache_salt"] = cache_salt
+    if prompt_token_ids is not None:
+        inputs["prompt_token_ids"] = prompt_token_ids
+    if is_token_ids is not None:
+        inputs["is_token_ids"] = is_token_ids
 
     return inputs
 
diff --git a/vllm/inputs/llm.py b/vllm/inputs/llm.py
index ff22af819a28..918098b758ca 100644
--- a/vllm/inputs/llm.py
+++ b/vllm/inputs/llm.py
@@ -125,6 +125,17 @@ class EmbedsPrompt(_PromptOptions):
     prompt: NotRequired[str]
     """The prompt text corresponding to the token embeddings, if available."""
 
+    prompt_token_ids: NotRequired[list[int]]
+    """Token IDs for mixed-mode inputs (chat completion with
+    `prompt_embeds` content parts). The tokens at positions where 
+    `prompt_is_token_ids` is `False` are placeholder tokens that 
+    get replaced by entries from `prompt_embeds` in the forward pass."""
+
+    prompt_is_token_ids: NotRequired[list[bool]]
+    """Per-position mask, `True` uses the real token ID, `False` uses
+    the corresponding entry from `prompt_embeds`. 
+    Must be the same length as `prompt_token_ids` when both are set."""
+
 
 DecoderOnlyPrompt: TypeAlias = (
     str | TextPrompt | list[int] | TokensPrompt | EmbedsPrompt
diff --git a/vllm/ir/__init__.py b/vllm/ir/__init__.py
new file mode 100644
index 000000000000..78e6e1acba04
--- /dev/null
+++ b/vllm/ir/__init__.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from . import ops
+from .op import enable_torch_wrap, register_op, set_default_torch_wrap
+
+__all__ = ["enable_torch_wrap", "register_op", "set_default_torch_wrap", "ops"]
diff --git a/vllm/ir/op.py b/vllm/ir/op.py
new file mode 100644
index 000000000000..8e82b5d8c7ec
--- /dev/null
+++ b/vllm/ir/op.py
@@ -0,0 +1,658 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import contextlib
+import inspect
+import traceback
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any, ClassVar, Literal, overload
+
+import regex as re
+import torch
+from torch.library import Library, infer_schema
+
+from vllm.ir.tolerances import DEFAULT_TOLERANCES, ToleranceSpec
+from vllm.ir.util import hash_source, weak_cache
+from vllm.logger import init_logger
+from vllm.logging_utils import lazy, tensors_str_no_data
+
+InputGenerator = Callable[..., tuple[Any, ...]]
+
+vllm_ir_torch_lib = Library("vllm_ir", "FRAGMENT")  # IR op lib; monkeypatch in tests.
+
+logger = init_logger(__name__)
+
+
+def _torch_ops_subtree(lib: Any) -> Any:
+    """``torch.ops`` subtree for ``lib.ns``; fall back if doc mocks replace ``ns``."""
+    ns = getattr(lib, "ns", None)
+    if isinstance(ns, str):
+        return getattr(torch.ops, ns)
+    return torch.ops.vllm_ir
+
+
+_NAME_PATTERN = re.compile(r"^[a-z_][a-z_0-9]*$")
+
+RESERVED_PROVIDERS = ["native", "unfused"]
+"""Providers that are reserved and cannot be used for custom implementations."""
+
+
+def _validate_name(name: str, entity_type: str) -> None:
+    """Validate that a name matches the required pattern `[a-z_][a-z_0-9]*`."""
+    if not _NAME_PATTERN.match(name):
+        raise ValueError(
+            f"{entity_type} name '{name}' is invalid. "
+            f"Names must start with a letter or underscore, "
+            f"followed by lowercase letters, underscores, or digits only."
+        )
+
+
+_ENABLE_TORCH_WRAP: bool = True
+"""Global override flag to control torch op layer wrapping."""
+
+
+def set_default_torch_wrap(enable: bool = True) -> None:
+    """
+    Permanently set the torch wrap flag.
+    """
+    global _ENABLE_TORCH_WRAP
+    _ENABLE_TORCH_WRAP = enable
+
+
+@contextlib.contextmanager
+def enable_torch_wrap(enable: bool = True):
+    """
+    Context manager to enable/disable torch custom op wrapping for vLLM IR ops.
+    When torch wrapping is disabled, the torch custom op layer is skipped
+    and IR ops dispatch directly to the implementation.
+    Helpful for avoiding torch dispatch overhead in eager mode
+    and avoiding the need for lowering for platforms not using Inductor.
+    """
+
+    global _ENABLE_TORCH_WRAP
+    old = _ENABLE_TORCH_WRAP
+    try:
+        _ENABLE_TORCH_WRAP = enable
+        yield
+    finally:
+        _ENABLE_TORCH_WRAP = old
+
+
+# 0-param decorator overload (no inplace)
+@overload
+def register_op(f: Callable[..., Any]) -> "IrOp": ...
+
+
+# parametrized decorator with allow_inplace=False (default)
+@overload
+def register_op(
+    *,
+    name: str | None = None,
+    activations: list[str] | None = None,
+    allow_inplace: Literal[False] = False,
+) -> Callable[[Callable[..., Any]], "IrOp"]: ...
+
+
+# parametrized decorator with allow_inplace=True
+@overload
+def register_op(
+    *,
+    name: str | None = None,
+    activations: list[str] | None = None,
+    allow_inplace: Literal[True],
+) -> Callable[[Callable[..., Any]], "IrOpInplace"]: ...
+
+
+def register_op(
+    f: Callable | None = None,
+    *,
+    name: str | None = None,
+    activations: list[str] | None = None,
+    allow_inplace: bool = False,
+) -> "IrOp | Callable[[Callable], IrOp]":
+    """
+    Register a new vLLM IR op.
+
+    :param f: the native implementation of the op
+    :param name: the name of the op, defaults to the function name
+    :param activations: list of activation params, defaults to params starting with 'x'
+    :param allow_inplace: add a maybe_inplace overload that allows inplace impls
+    :return: the IrOp object if f is provided, otherwise a decorator
+
+    Example usage:
+    ```python
+    @vllm.ir.register_op
+    def my_add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x + y
+
+
+    @vllm.ir.register_op(name="custom_mul")
+    def multiply(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return x * y"""
+
+    def decorator(_f: Callable):
+        op_name: str = _f.__name__ if name is None else name
+        _validate_name(op_name, "Op")
+        assert op_name not in IrOp.registry, f"Op '{op_name}' is already registered."
+        # Slice out the decorator function frames from the stack
+        stack = traceback.format_stack()[:-2]
+        if allow_inplace:
+            op: IrOp = IrOpInplace(op_name, _f, activations, stack)
+        else:
+            op = IrOp(op_name, _f, activations, stack)
+        IrOp.registry[op_name] = op
+        return op
+
+    if f is not None:
+        return decorator(f)
+
+    return decorator
+
+
+class IrOp:
+    registry: ClassVar[dict[str, "IrOp"]] = {}
+
+    name: str
+    impls: dict[str, "IrOpImpl"]
+    allow_inplace: bool = False
+
+    def __init__(
+        self,
+        name: str,
+        native_impl: Callable,
+        activations: list[str] | None = None,
+        registration_stack: list[str] | None = None,
+    ):
+        self._py_signature = inspect.signature(native_impl)
+        if any(
+            p.kind == inspect.Parameter.KEYWORD_ONLY
+            for p in self._py_signature.parameters.values()
+        ):
+            raise ValueError(
+                f"Op {name} has keyword-only arguments which are not currently "
+                f"supported. That's because kwargs are not allowed during lowering."
+            )
+
+        # By convention, we consider parameters starting with 'x' as activations.
+        if activations is None:
+            activations = [
+                p.name
+                for p in self._py_signature.parameters.values()
+                if p.name.startswith("x")
+            ]
+
+        self.name = name
+        self._docstring = inspect.getdoc(native_impl) or ""
+        self._registration_stack = registration_stack or []
+        self.impls: dict[str, IrOpImpl] = {}
+        self.activations = activations
+        self.activation_indices = [
+            i
+            for i, p in enumerate(self._py_signature.parameters.values())
+            if p.name in activations
+        ]
+        self._priority_impls: list[IrOpImpl] = []
+        self._schema_str = infer_schema(native_impl, mutates_args=[])
+        self._input_generator: InputGenerator | None = None
+        self._tolerance_overrides: ToleranceSpec = {}
+
+        # native implementation
+        self.impls["native"] = IrOpImpl(
+            self,
+            "native",
+            native_impl,
+            # always supported
+            supported=True,
+            supports_args=None,
+            registration_stack=self._registration_stack,
+        )
+
+        # By default, fake routes directly to native,
+        # can be overridden by register_fake
+        self._fake_fn = native_impl
+
+        # torch registration (resolve ``torch.ops`` subtree from ``lib.ns``)
+        lib = vllm_ir_torch_lib
+        lib.define(self.name + self._schema_str)
+        # CompositeExplicitAutograd is not decomposed
+        # by ATen IR normalization in AOTAutograd
+        lib.impl(self.name, self._inner_call, dispatch_key="CompositeExplicitAutograd")
+        lib._register_fake(self.name, self._fake_call)
+        torch_ops = _torch_ops_subtree(lib)
+        assert hasattr(torch_ops, name)
+        self.torch_op: torch._ops.OpOverload = getattr(torch_ops, name).default
+
+    def register_fake(self, fn: Callable) -> Callable:
+        """
+        Register a fake impl for the torch custom op. If this method is not called,
+        the native implementation is used directly for the fake implementation.
+        """
+        self._fake_fn = fn
+        return fn
+
+    def _fake_call(self, *args, **kwargs) -> Any:
+        """
+        Call to the fake implementation of the op. We use indirection because we want
+        users to be able to register fake later but also want it to fall back to native
+        directly by default, instead of going through the dispatching mechanism.
+        """
+        return self._fake_fn(*args, **kwargs)
+
+    def register_impl(
+        self,
+        provider: str,
+        *,
+        supported: bool = True,
+        supports_args: Callable[..., bool] | None = None,
+        inplace: bool = False,
+    ):
+        """
+        Register an implementation for this custom op.
+        :param provider: The name of the provider, must be unique.
+        :param supported: Static support check, use this to check platform support.
+        :param supports_args: Dynamic arg support check, used for types and shapes.
+        :param inplace: Does this op reuse activation input memory for outputs
+        :return: A decorator that registers the implementation.
+
+        The decorated function must have the same semantics and signature as
+        the native implementation.
+
+        The provider name must be unique and not one of the RESERVED_PROVIDERS.
+        The supported and supports_args parameters should not be used to implement
+        custom enablement logic based on global state (e.g. environment variables).
+        Instead, supported param should only be used to check for platform support
+        (e.g. whether a specific hardware or library is available).
+        supports_args should be used to check whether the provided arguments are
+        compatible with the implementation.
+        For custom enablement logic, set op impl priority.
+
+        Example:
+        ```python
+        @my_op.register_impl("my_provider", supported=torch.cuda.is_available())
+        def my_provider_impl(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: ...
+        ```
+
+        """
+        assert provider not in RESERVED_PROVIDERS, (
+            f"Provider name {provider} is reserved."
+        )
+        _validate_name(provider, "Provider")
+
+        def _register_impl(f: Callable):
+            # Slice out the decorator function from the stack
+            stack = traceback.format_stack()[:-1]
+            impl = IrOpImpl(self, provider, f, supported, supports_args, inplace, stack)
+            self.impls[provider] = impl
+
+            if self.get_priority():
+                logger.warning(
+                    "Warning: registering new impl %s for op %s while priority is set.",
+                    provider,
+                    self.name,
+                )
+
+            return impl
+
+        return _register_impl
+
+    def _inner_call(self, *args, **kwargs) -> Any:
+        """
+        Eager call to torch op lands here. When torch wrapping is disabled,
+        __call__ routes straight here instead of going through torch op dispatching.
+        """
+        impl = self.dispatch(*args, **kwargs)
+
+        # Default overload must be functional,
+        # use func_impl_fn to correctly handle inplace impls.
+        return impl.func_impl_fn(*args, **kwargs)
+
+    def apply_arg_defaults(self, args) -> tuple:
+        """
+        Return args with default values applied.
+        Defaults are taken from the native implementation signature.
+
+        SHOULD NOT BE USED IN THE DISPATCH PATH (SLOW).
+        Only for Inductor lowering.
+        """
+        bound_args = self._py_signature.bind(*args)
+        bound_args.apply_defaults()
+        return bound_args.args
+
+    def dispatch(self, *args, **kwargs) -> "IrOpImpl":
+        """
+        Dispatch to the appropriate implementation based on current priority
+        and argument support checks. Returns the selected IrOpImpl.
+
+        THIS FUNCTION IS ON THE HOT PATH (OP DISPATCH), MUST BE FAST.
+        """
+        if not self._priority_impls:
+            if not torch.compiler.is_compiling():
+                # Logging not compatible with Dynamo tracing
+                # (this code is exposed when torch wrapping is disabled)
+                logger.warning_once(
+                    "Priority not set for op %s, using native implementation.",
+                    self.name,
+                )
+            return self.impls["native"]
+
+        for impl in self._priority_impls:
+            if not impl.supported:
+                raise ValueError(
+                    f"Implementation {impl.provider} for op {self.name} not supported. "
+                    f"All implementations in priority list must be supported."
+                )
+            if impl.supports_args(*args, **kwargs):
+                return impl
+
+            if not torch.compiler.is_compiling():
+                logger.debug(
+                    "Skipping provider %s because it does not support "
+                    "%s with args=%s kwargs=%s",
+                    impl.provider,
+                    self.name,
+                    lazy(lambda: tensors_str_no_data(args)),
+                    lazy(lambda: tensors_str_no_data(kwargs)),
+                )
+
+        raise RuntimeError(
+            "Priority set incorrectly: the last implementation must "
+            "support all args (can be native). This is likely an internal bug"
+        )
+
+    def __call__(self, *args, **kwargs) -> Any:
+        if not _ENABLE_TORCH_WRAP:
+            return self._inner_call(*args, **kwargs)
+
+        return self.torch_op(*args, **kwargs)
+
+    def __repr__(self) -> str:
+        """Return unambiguous string representation."""
+        return f"IrOp('{self.name}')"
+
+    def __str__(self) -> str:
+        """Return human-readable string representation using docstring."""
+        if not self._docstring:
+            return f"IrOp('{self.name}')"
+        first_line = self._docstring.split("\n")[0].strip()
+        return f"IrOp('{self.name}') - {first_line}"
+
+    def get_priority(self) -> list[str]:
+        """Get the current dispatch priority for implementations for this op."""
+        return [p.provider for p in self._priority_impls]
+
+    def _filter_priority_impls(self, priority: list[str]) -> list["IrOpImpl"]:
+        assert all(p in self.impls for p in priority), (
+            "All providers in priority must be registered implementations."
+        )
+        filtered_impls: list[IrOpImpl] = []
+        for p in priority:
+            impl = self.impls[p]
+            if not impl.supported:
+                # Skip unsupported implementations
+                continue
+
+            filtered_impls.append(impl)
+
+            # If all args are supported, skip other implementations
+            if impl.supports_all_args:
+                return filtered_impls
+
+        logger.warning_once(
+            "Op %s: No implementation in priority list supports all args, "
+            "execution fallback to native is possible. To silence this warning, "
+            "explicitly add 'native' to the end of the priority list",
+            self.name,
+        )
+        filtered_impls.append(self.impls["native"])
+        return filtered_impls
+
+    def set_default(self, priority: list[str]) -> None:
+        """
+        Permanently set the dispatch priority for this op. Use this for
+        process-lifetime setup (e.g., worker startup). For scoped overrides,
+        use ``set_priority`` instead.
+        """
+        self._priority_impls = self._filter_priority_impls(priority)
+        logger.debug(
+            "Priority for vllm.ir.%s set to %s",
+            self.name,
+            lazy(lambda: [p.provider for p in self._priority_impls]),
+        )
+
+    @contextlib.contextmanager
+    def set_priority(self, priority: list[str]):
+        """
+        Context manager to set the dispatch priority for implementations for this op.
+        """
+        old_priority_impls = self._priority_impls
+        try:
+            self._priority_impls = self._filter_priority_impls(priority)
+            logger.debug(
+                "Priority for vllm.ir.%s set to %s",
+                self.name,
+                lazy(lambda: [p.provider for p in self._priority_impls]),
+            )
+            yield
+        finally:
+            self._priority_impls = old_priority_impls
+
+    def supported_providers(self) -> list[str]:
+        return [p.provider for p in self.impls.values() if p.supported]
+
+    @property
+    def has_input_generator(self) -> bool:
+        return self._input_generator is not None
+
+    def register_input_generator(self, fn: InputGenerator) -> InputGenerator:
+        self._input_generator = fn
+        return fn
+
+    def generate_inputs(self, **kwargs: Any) -> tuple[Any, ...]:
+        if self._input_generator is None:
+            raise RuntimeError(
+                f"No input generator registered for op '{self.name}'. "
+                f"Use @ir.ops.{self.name}.register_input_generator"
+            )
+        return self._input_generator(**kwargs)
+
+    def override_tolerance(
+        self, dtype: torch.dtype, *, atol: float, rtol: float
+    ) -> None:
+        self._tolerance_overrides[dtype] = {"atol": atol, "rtol": rtol}
+
+    def get_tolerance(self, dtype: torch.dtype) -> dict[str, float]:
+        if dtype in self._tolerance_overrides:
+            return self._tolerance_overrides[dtype]
+        if dtype in DEFAULT_TOLERANCES:
+            return DEFAULT_TOLERANCES[dtype]
+        raise ValueError(
+            f"No tolerance defined for dtype {dtype} in op '{self.name}'. "
+            f"Use op.override_tolerance({dtype}, atol=..., rtol=...) "
+            f"or add {dtype} to DEFAULT_TOLERANCES."
+        )
+
+
+class IrOpInplace(IrOp):
+    """IR op with inplace support via maybe_inplace."""
+
+    maybe_inplace: "IrOpInplaceOverload"
+    allow_inplace: bool = True
+
+    def __init__(
+        self,
+        name: str,
+        native_impl: Callable,
+        activations: list[str] | None = None,
+        registration_stack: list[str] | None = None,
+    ):
+        super().__init__(name, native_impl, activations, registration_stack)
+
+        # Create the inplace overload
+        self.maybe_inplace = IrOpInplaceOverload(self)
+
+
+class IrOpInplaceOverload:
+    def __init__(self, op: IrOp):
+        params, returns = op._schema_str.split(" -> ")
+        n_outputs = returns.count("Tensor")
+
+        assert returns.count("Tensor") == len(op.activations), (
+            "Inplace overload requires the same number of outputs as activations."
+        )
+
+        assert returns.count(",") == n_outputs - 1, (
+            "Inplace overload only supports Tensor outputs for now."
+        )
+
+        self.op = op
+        self.name = f"{op.name}.maybe_inplace"
+        self._schema_str = infer_schema(
+            op.impls["native"].impl_fn, mutates_args=op.activations
+        )
+
+        # torch registration (resolve ``torch.ops`` subtree from ``lib.ns``)
+        lib = vllm_ir_torch_lib
+        lib.define(self.name + self._schema_str)
+        lib.impl(self.name, self._inner_call, dispatch_key="CompositeExplicitAutograd")
+        # fake goes to default overload for now
+        lib._register_fake(self.name, self.op._fake_call)
+
+        torch_ops = _torch_ops_subtree(lib)
+        assert hasattr(getattr(torch_ops, self.op.name), "maybe_inplace")
+        self.torch_op = getattr(torch_ops, self.op.name).maybe_inplace
+
+    def __call__(self, *args, **kwargs) -> Any:
+        if not _ENABLE_TORCH_WRAP:
+            return self._inner_call(*args, **kwargs)
+
+        return self.torch_op(*args, **kwargs)
+
+    def _inner_call(self, *args, **kwargs) -> Any:
+        # Calling the maybe_inplace overload means we can use inplace impls directly.
+        impl = self.op.dispatch(*args, **kwargs)
+        return impl.impl_fn(*args, **kwargs)
+
+
+class IrOpImpl:
+    def __init__(
+        self,
+        op: IrOp,
+        provider: str,
+        impl_fn: Callable,
+        supported: bool,
+        supports_args: Callable[..., bool] | None,
+        inplace: bool = False,
+        registration_stack: list[str] | None = None,
+    ):
+        assert provider not in op.impls, (
+            f"Implementation for provider {provider} already registered."
+        )
+        # Native also uses this path, so we allow it here.
+        assert provider == "native" or provider not in RESERVED_PROVIDERS, (
+            f"Provider name {provider} is reserved."
+        )
+
+        # Enforce the exact same schema as the native implementation.
+        # This takes care of names, types, and defaults.
+        schema = infer_schema(impl_fn, mutates_args=[])
+        if schema != op._schema_str:
+            raise ValueError(
+                f"Implementation for provider {provider} has schema '{schema}' which "
+                f"does not match native schema '{op._schema_str}' for op {op.name}."
+            )
+
+        if supports_args is not None:
+            if not callable(supports_args):
+                raise ValueError(
+                    f"supports_args for provider {provider} must be a callable"
+                )
+
+            # We also manually validate the supports_args signature.
+            # Matching signatures allow faster dispatch on the hotpath.
+
+            # Check that supports_args does not have keyword-only parameters
+            supports_args_signature = inspect.signature(supports_args)
+            params = supports_args_signature.parameters
+            if any(p.kind == inspect.Parameter.KEYWORD_ONLY for p in params.values()):
+                raise ValueError(
+                    f"supports_args for provider {provider} "
+                    f"cannot have keyword-only parameters"
+                )
+
+            # Check that supports_args has the same total number of parameters
+            op_params = op._py_signature.parameters
+            if len(params) != len(op_params):
+                raise ValueError(
+                    f"supports_args for provider {provider} must have the same number "
+                    f"of parameters ({len(params)}) as the native implementation "
+                    f"({len(op_params)})"
+                )
+
+            # Check that names and defaults match for supports_args
+            for p, op_p in zip(params.values(), op_params.values()):
+                if p.name != op_p.name:
+                    raise ValueError(
+                        f"supports_args for provider {provider} has parameter "
+                        f"'{p.name}' which does not match native parameter "
+                        f"'{op_p.name}'"
+                    )
+                if p.default != op_p.default:
+                    raise ValueError(
+                        f"supports_args for provider {provider} has parameter "
+                        f"'{p.name}' with default {p.default} which does not match "
+                        f"native default {op_p.default}'"
+                    )
+
+        if inplace:
+            assert op.allow_inplace, (
+                f"Inplace implementation cannot be registered for op {op.name}"
+                f" that does not allow inplace."
+            )
+
+        self.op = op
+        self.provider = provider
+        self.impl_fn = impl_fn
+        self.supported = supported
+        self._supports_args = supports_args
+        self.inplace = inplace
+        self._registration_stack = registration_stack or []
+
+    @property
+    def supports_all_args(self) -> bool:
+        """Check if this implementation supports all args unconditionally."""
+        return self._supports_args is None
+
+    def supports_args(self, *args, **kwargs) -> bool:
+        if self._supports_args is None:
+            return True
+
+        return self._supports_args(*args, **kwargs)
+
+    @weak_cache
+    def uuid(self):
+        """
+        Compile-time hash to uniquely determine whether the implementation has changed.
+        Used by vllm-compile hash mechanism and torch.compile lowering pass uuid to
+        control the vLLM compile cache and AOTAutograd/Inductor caches respectively.
+
+        Source file contents do not change so we cache uuid.
+        TODO(luka): Cache the file hash as multiple impls are likely in the same file.
+        """
+        sources = [Path(inspect.getfile(self.impl_fn))]
+        return hash_source(*sources)
+
+    def func_impl_fn(self, *args, **kwargs) -> Any:
+        """
+        Copy any inputs in activations if this is an inplace impl,
+        to ensure functional semantics.
+        """
+        if not self.inplace:
+            return self.impl_fn(*args, **kwargs)
+
+        # copy activations to ensure functional semantics
+        new_args = list(args)
+        for i in self.op.activation_indices:
+            assert isinstance(args[i], torch.Tensor)
+            new_args[i] = args[i].clone()
+
+        return self.impl_fn(*new_args, **kwargs)
diff --git a/vllm/ir/ops/__init__.py b/vllm/ir/ops/__init__.py
new file mode 100644
index 000000000000..d4d71afef723
--- /dev/null
+++ b/vllm/ir/ops/__init__.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from .layernorm import fused_add_rms_norm, rms_norm
+
+__all__ = ["rms_norm", "fused_add_rms_norm"]
diff --git a/vllm/ir/ops/layernorm.py b/vllm/ir/ops/layernorm.py
new file mode 100644
index 000000000000..33a71b8f853f
--- /dev/null
+++ b/vllm/ir/ops/layernorm.py
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch import Tensor
+
+from ..op import register_op
+
+
+@register_op
+def rms_norm(
+    x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None
+) -> Tensor:
+    """Weighted root-mean-square layer normalization"""
+    orig_dtype = x.dtype
+    x = x.to(torch.float32)
+    x_var = x if variance_size is None else x[..., :variance_size]
+    variance = x_var.pow(2).mean(dim=-1, keepdim=True)
+    x = x * torch.rsqrt(variance + epsilon)
+    if weight is not None:
+        x = x.to(weight.dtype) * weight
+    return x.to(orig_dtype)
+
+
+@rms_norm.register_input_generator
+def _rms_norm_input_generator(
+    num_tokens: int, hidden_size: int, dtype: torch.dtype, epsilon: float = 1e-5
+) -> tuple:
+    x = torch.randn(num_tokens, hidden_size, dtype=dtype)
+    weight = torch.randn(hidden_size, dtype=dtype)
+    return x, weight, epsilon
+
+
+# Reductions in rms_norm accumulate rounding error at large shapes
+# (e.g. 32768x16384), causing a few elements out of millions to exceed
+# the default float16 tolerance.
+rms_norm.override_tolerance(torch.float16, atol=1e-2, rtol=2e-3)
+
+
+@register_op(allow_inplace=True)
+def fused_add_rms_norm(
+    x: Tensor,
+    x_residual: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> tuple[Tensor, Tensor]:
+    """Fused add and weighted root-mean-square layer normalization"""
+    orig_dtype = x.dtype
+    x = x.to(torch.float32)
+    x = x + x_residual.to(torch.float32)
+    x_residual = x.to(orig_dtype)
+
+    x_var = x if variance_size is None else x[..., :variance_size]
+    variance = x_var.pow(2).mean(dim=-1, keepdim=True)
+    x = x * torch.rsqrt(variance + epsilon)
+    if weight is not None:
+        x = x.to(weight.dtype) * weight
+    return x.to(orig_dtype), x_residual
+
+
+# fused_add_rms_norm has similar rounding error accumulation as rms_norm
+fused_add_rms_norm.override_tolerance(torch.float16, atol=1e-2, rtol=2e-3)
+
+
+@fused_add_rms_norm.register_input_generator
+def _fused_add_rms_norm_input_generator(
+    num_tokens: int, hidden_size: int, dtype: torch.dtype, epsilon: float = 1e-5
+) -> tuple:
+    x = torch.randn(num_tokens, hidden_size, dtype=dtype)
+    x_residual = torch.randn(num_tokens, hidden_size, dtype=dtype)
+    weight = torch.randn(hidden_size, dtype=dtype)
+    return x, x_residual, weight, epsilon
diff --git a/vllm/ir/tolerances.py b/vllm/ir/tolerances.py
new file mode 100644
index 000000000000..a794a939765a
--- /dev/null
+++ b/vllm/ir/tolerances.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+ToleranceSpec = dict[torch.dtype, dict[str, float]]
+
+# Default tolerances for comparing IR op implementations against native.
+# These are intentionally conservative (permissive) to avoid false failures
+# across different hardware and kernel implementations. Ops that need tighter
+# or looser bounds should use override_tolerance.
+DEFAULT_TOLERANCES: ToleranceSpec = {
+    # 52-bit mantissa; machine epsilon ~1.1e-16
+    torch.float64: {"atol": 1e-8, "rtol": 1e-8},
+    # 23-bit mantissa; machine epsilon ~1.2e-7.
+    # Values from PyTorch test_transformers.py reference defaults.
+    torch.float32: {"atol": 1e-5, "rtol": 1.3e-6},
+    # 10-bit mantissa; machine epsilon ~9.8e-4.
+    # Standard tolerance used across vLLM kernel tests.
+    torch.float16: {"atol": 1e-3, "rtol": 1e-3},
+    # 7-bit mantissa; machine epsilon ~7.8e-3.
+    # Wider rtol than float16 to account for the coarser mantissa.
+    torch.bfloat16: {"atol": 1e-3, "rtol": 1.6e-2},
+    # 3-bit mantissa; machine epsilon ~6.25e-2.
+    # Derived from vLLM fp8 kernel tests (merge_attn_states, silu_mul_fp8).
+    torch.float8_e4m3fn: {"atol": 1e-1, "rtol": 1e-1},
+    # 2-bit mantissa; machine epsilon ~1.25e-1.
+    # Wider than e4m3fn due to the smaller mantissa.
+    torch.float8_e5m2: {"atol": 2e-1, "rtol": 2e-1},
+    # 1-bit mantissa; machine epsilon ~2.5e-1. Packed pair format (x2).
+    # Derived from vLLM fp4 tests (test_silu_mul_nvfp4_quant: atol=3e-1).
+    torch.float4_e2m1fn_x2: {"atol": 3e-1, "rtol": 3e-1},
+    # Integer quantized; off-by-one from rounding is expected.
+    # rtol=0 because relative error is meaningless for small integers.
+    torch.int8: {"atol": 1, "rtol": 0},
+}
diff --git a/vllm/ir/util.py b/vllm/ir/util.py
new file mode 100644
index 000000000000..ac8a06155da6
--- /dev/null
+++ b/vllm/ir/util.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import functools
+import hashlib
+import inspect
+import types
+import weakref
+from pathlib import Path
+from typing import Any
+
+
+def hash_source(*srcs: str | Any) -> str:
+    """
+    Utility method to hash the sources of functions or objects.
+    :param srcs: strings or objects to add to the hash.
+    Objects and functions have their source inspected.
+    :return:
+    """
+    hasher = hashlib.sha256()
+    for src in srcs:
+        if src is None:
+            src_str = "None"
+        elif isinstance(src, str):
+            src_str = src
+        elif isinstance(src, Path):
+            src_str = src.read_text()
+        elif isinstance(src, (types.FunctionType, type)):
+            src_str = inspect.getsource(src)
+        else:
+            # object instance
+            src_str = inspect.getsource(src.__class__)
+        hasher.update(src_str.encode("utf-8"))
+    return hasher.hexdigest()
+
+
+def weak_lru_cache(maxsize: int | None = 128, typed: bool = False):
+    """
+    LRU Cache decorator that keeps a weak reference to 'self'.
+    This avoids memory leakage, which happens when functools.lru_cache
+    stores a reference to self in the global cache.
+
+    Taken from: https://stackoverflow.com/a/68052994/5082708
+    """
+
+    def wrapper(func):
+        @functools.lru_cache(maxsize, typed)
+        def _func(_self, *args, **kwargs):
+            return func(_self(), *args, **kwargs)
+
+        @functools.wraps(func)
+        def inner(self, *args, **kwargs):
+            return _func(weakref.ref(self), *args, **kwargs)
+
+        return inner
+
+    return wrapper
+
+
+def weak_cache(user_function, /):
+    """Simple weak equivalent to functools.cache"""
+    return weak_lru_cache(maxsize=None)(user_function)
diff --git a/vllm/kernels/__init__.py b/vllm/kernels/__init__.py
index 3d0c9805e9ab..075bc01f3ba3 100644
--- a/vllm/kernels/__init__.py
+++ b/vllm/kernels/__init__.py
@@ -1,3 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Kernel implementations for vLLM."""
+
+from . import aiter_ops, oink_ops, vllm_c, xpu_ops
+
+__all__ = ["vllm_c", "aiter_ops", "oink_ops", "xpu_ops"]
diff --git a/vllm/kernels/aiter_ops.py b/vllm/kernels/aiter_ops.py
new file mode 100644
index 000000000000..273bc58935b7
--- /dev/null
+++ b/vllm/kernels/aiter_ops.py
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import functools
+
+import torch
+from torch import Tensor
+from torch.library import Library
+
+from vllm import ir
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import direct_register_custom_op
+
+current_platform.import_kernels()
+
+
+def is_aiter_found() -> bool:
+    from importlib.util import find_spec
+
+    return find_spec("aiter") is not None
+
+
+aiter_lib = Library("vllm_aiter", "FRAGMENT")
+"""
+This library holds torch custom ops for wrapped AITER ops.
+Many AITER ops want to remain invisible to torch.compile even after lowering.
+They are thus wrapped into torch custom ops inside the IR op implementations.
+"""
+
+direct_register_aiter_op = functools.partial(
+    direct_register_custom_op, target_lib=aiter_lib
+)
+"""Syntactic sugar for registering AITER custom ops."""
+
+AITER_SUPPORTED = is_aiter_found()
+"""Most kernels in this file are supported if AITER is installed."""
+
+rms_no_var_16bit_only = (
+    lambda x, weight, epsilon, variance_size=None: variance_size is None
+    and x.dtype in (torch.float16, torch.bfloat16)
+    and (weight is None or weight.dtype == x.dtype)
+)
+"""AITER rms_norm only supports float16 and bfloat16 acts, no var_size override,
+and requires weight dtype to match x dtype."""
+
+
+@ir.ops.rms_norm.register_impl(
+    "aiter", supports_args=rms_no_var_16bit_only, supported=AITER_SUPPORTED
+)
+def rms_norm(
+    x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None
+) -> Tensor:
+    assert variance_size is None
+    assert x.dtype in (torch.float16, torch.bfloat16)
+    if weight is None:
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+    return torch.ops.vllm_aiter.rms_norm(x, weight, epsilon)
+
+
+def _rms_norm_impl(x: Tensor, weight: Tensor, variance_epsilon: float) -> Tensor:
+    from aiter import rms_norm
+
+    if x.dim() > 2:
+        x_original_shape = x.shape
+        x = x.reshape(-1, x_original_shape[-1])
+        x = rms_norm(x, weight, variance_epsilon)
+        return x.reshape(x_original_shape)
+
+    return rms_norm(x, weight, variance_epsilon)
+
+
+def _rms_norm_fake(x: Tensor, weight: Tensor, variance_epsilon: float) -> Tensor:
+    return torch.empty_like(x)
+
+
+direct_register_aiter_op(
+    op_name="rms_norm", op_func=_rms_norm_impl, fake_impl=_rms_norm_fake
+)
+
+rms_add_no_var_16bit_only = (
+    lambda x, x_residual, weight, epsilon, variance_size=None: variance_size is None
+    and x.dtype in (torch.float16, torch.bfloat16)
+    and (weight is None or weight.dtype == x.dtype)
+)
+"""
+AITER fused_add_rms_norm only supports 16-bit activations and no var_size override.
+Requires weight dtype to match x dtype.
+"""
+
+
+@ir.ops.fused_add_rms_norm.register_impl(
+    "aiter", supports_args=rms_add_no_var_16bit_only, supported=AITER_SUPPORTED
+)
+def fused_add_rms_norm(
+    x: Tensor,
+    x_residual: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> tuple[Tensor, Tensor]:
+    assert variance_size is None
+    assert x.dtype in (torch.float16, torch.bfloat16)
+    if weight is None:
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+    return torch.ops.vllm_aiter.fused_add_rms_norm(x, x_residual, weight, epsilon)
+
+
+def _rocm_aiter_rmsnorm2d_fwd_with_add_impl(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    weight: torch.Tensor,
+    variance_epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    from aiter import rmsnorm2d_fwd_with_add
+
+    # TODO can out = x and residual_out = residual to save memory?
+    #  Need to check if the kernel supports in-place residual output
+    #  (if yes set mutates_args and inplace)
+    residual_out = torch.empty_like(residual)
+    out = torch.empty_like(x)
+    rmsnorm2d_fwd_with_add(
+        out,  # output
+        x,  # input
+        residual,  # residual input
+        residual_out,  # residual output
+        weight,
+        variance_epsilon,
+    )
+    return out, residual_out
+
+
+def _rocm_aiter_rmsnorm2d_fwd_with_add_fake(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    weight: torch.Tensor,
+    variance_epsilon: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    residual_out = torch.empty_like(residual)
+    out = torch.empty_like(x)
+    return out, residual_out
+
+
+direct_register_aiter_op(
+    op_name="fused_add_rms_norm",
+    op_func=_rocm_aiter_rmsnorm2d_fwd_with_add_impl,
+    fake_impl=_rocm_aiter_rmsnorm2d_fwd_with_add_fake,
+)
diff --git a/vllm/kernels/helion/__init__.py b/vllm/kernels/helion/__init__.py
index 2568baa20dae..8c05c428bb07 100644
--- a/vllm/kernels/helion/__init__.py
+++ b/vllm/kernels/helion/__init__.py
@@ -3,11 +3,13 @@
 """Helion integration for vLLM."""
 
 import vllm.kernels.helion.ops  # noqa: F401  Auto-register all Helion ops
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.kernels.helion.config_manager import (
     ConfigManager,
     ConfigSet,
 )
 from vllm.kernels.helion.register import (
+    ConfigPicker,
     ConfiguredHelionKernel,
     HelionKernelWrapper,
     get_kernel_by_name,
@@ -19,9 +21,11 @@
 
 __all__ = [
     # Config management
+    "CaseKey",
     "ConfigManager",
     "ConfigSet",
     # Kernel registration
+    "ConfigPicker",
     "ConfiguredHelionKernel",
     "HelionKernelWrapper",
     "get_kernel_by_name",
diff --git a/vllm/kernels/helion/case_key.py b/vllm/kernels/helion/case_key.py
new file mode 100644
index 000000000000..32b544de39cc
--- /dev/null
+++ b/vllm/kernels/helion/case_key.py
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Structured key for identifying kernel config/autotune/benchmark cases.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+class CaseKey(dict[str, Any]):
+    """Immutable, hashable dict for identifying kernel cases.
+
+    Used as the key for config lookup, autotuning, benchmarking, and
+    input generation.  Behaves like a read-only dict and can be used
+    as a dict key or in sets.
+
+    The canonical string form (``__str__``) is stable JSON with sorted
+    keys.  Use ``CaseKey.default()`` for the default/fallback key.
+    The regular constructor requires at least one key-value pair::
+
+        CaseKey({"intermediate": 2048, "numtokens": 256})
+        CaseKey.default()  # default/fallback
+    """
+
+    def __init__(self, *args: Any, _allow_empty: bool = False, **kwargs: Any):
+        super().__init__(*args, **kwargs)
+        if not self and not _allow_empty:
+            raise TypeError(
+                "CaseKey requires at least one key-value pair. "
+                "Use CaseKey.default() for the default config key."
+            )
+        self._str: str | None = None
+        self._hash: int | None = None
+
+    @classmethod
+    def default(cls) -> CaseKey:
+        """Create a default case key (empty)."""
+        return cls(_allow_empty=True)
+
+    def __hash__(self) -> int:  # type: ignore[override]
+        if self._hash is None:
+            self._hash = hash(str(self))
+        return self._hash
+
+    def __str__(self) -> str:
+        if self._str is None:
+            self._str = json.dumps(dict(self), sort_keys=True, separators=(",", ":"))
+        return self._str
+
+    def __repr__(self) -> str:
+        if not self:
+            return "CaseKey.default()"
+        return f"CaseKey({dict(self)})"
+
+    def is_default(self) -> bool:
+        """Return True if this is the default case key (empty)."""
+        return not self
+
+    def _readonly(self, *args: Any, **kwargs: Any) -> Any:
+        raise TypeError("CaseKey is immutable")
+
+    __setitem__ = _readonly  # type: ignore[assignment]
+    __delitem__ = _readonly  # type: ignore[assignment]
+    __ior__ = _readonly  # type: ignore[assignment]
+    update = _readonly  # type: ignore[assignment]
+    pop = _readonly  # type: ignore[assignment]
+    popitem = _readonly  # type: ignore[assignment]
+    setdefault = _readonly  # type: ignore[assignment]
+    clear = _readonly  # type: ignore[assignment]
diff --git a/vllm/kernels/helion/config_manager.py b/vllm/kernels/helion/config_manager.py
index f34d936041f4..ca37a68e8101 100644
--- a/vllm/kernels/helion/config_manager.py
+++ b/vllm/kernels/helion/config_manager.py
@@ -11,25 +11,25 @@
 Each kernel has a directory: {kernel_name}/
 Inside, each GPU platform has its own JSON file: {kernel_name}/{platform}.json
 
-For example:
-    silu_mul_fp8/
-        nvidia_h100.json    # { "default": {...}, "batch_32_hidden_4096": {...} }
-        nvidia_h200.json    # { "batch_16_hidden_2048": {...} }
-
-Each platform file maps config keys to Helion config objects.
-Config keys should be structured strings that encode the relevant
-parameters (e.g., "batch_32_hidden_4096", "seq_512_heads_16", "fp8_batch_64", etc.).
-
-Classes
--------
-- ConfigSet: In-memory collection of configs for a kernel with lookup/query APIs.
-- ConfigManager: File-level operations for config persistence.
+Platform files store config entries as a JSON array::
+
+    [
+        {"key": {}, "config": {...}},
+        {"key": {"intermediate": 2048, "numtokens": 256}, "config": {...}},
+        ...,
+    ]
+
+Config keys are ``CaseKey`` instances mapping parameter names to
+values.  The default config uses ``CaseKey.default()``.
 """
 
+from __future__ import annotations
+
 import json
 from pathlib import Path
 from typing import Any
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.logger import init_logger
 from vllm.utils.import_utils import has_helion
 
@@ -45,11 +45,13 @@
 
 
 class ConfigSet:
-    """In-memory collection of Helion configs with lookup/query capabilities."""
+    """In-memory collection of Helion configs with lookup/query capabilities.
 
-    # Type alias for nested config structure:
-    # platform -> config_key -> helion.Config
-    _ConfigDict = dict[str, dict[str, "helion.Config"]]
+    Configs are stored keyed by ``CaseKey``.  The default config
+    uses ``CaseKey.default()`` as its key.
+    """
+
+    _ConfigDict = dict[str, dict[CaseKey, "helion.Config"]]
 
     def __init__(self, kernel_name: str):
         self._kernel_name = kernel_name
@@ -59,7 +61,7 @@ def __init__(self, kernel_name: str):
     def kernel_name(self) -> str:
         return self._kernel_name
 
-    def get_config(self, platform: str, config_key: str) -> helion.Config:
+    def get_config(self, platform: str, config_key: CaseKey) -> helion.Config:
         platform_dict = self._configs.get(platform)
         if platform_dict is None:
             avail_platforms = self.get_platforms()
@@ -82,7 +84,8 @@ def get_config(self, platform: str, config_key: str) -> helion.Config:
             avail_keys = self.get_config_keys(platform)
             raise KeyError(
                 f"Config not found for kernel '{self._kernel_name}': "
-                f"config_key '{config_key}' not found for platform '{platform}'. "
+                f"config_key '{config_key}' not found for "
+                f"platform '{platform}'. "
                 f"Available config_keys: {avail_keys or '(none)'}"
             )
 
@@ -91,25 +94,34 @@ def get_config(self, platform: str, config_key: str) -> helion.Config:
     def get_platforms(self) -> list[str]:
         return sorted(self._configs.keys())
 
-    def get_config_keys(self, platform: str) -> list[str]:
+    def get_config_keys(self, platform: str) -> list[CaseKey]:
         platform_dict = self._configs.get(platform.lower())
         if platform_dict is None:
             return []
-        return sorted(platform_dict.keys())
-
-    def to_dict(self) -> dict[str, Any]:
-        result: dict[str, Any] = {}
-
-        for platform, config_keys_dict in self._configs.items():
-            result[platform] = {}
-
-            for config_key, config in config_keys_dict.items():
-                result[platform][config_key] = json.loads(config.to_json())
+        return sorted(platform_dict.keys(), key=str)
+
+    def to_config_entries(self) -> dict[str, list[dict[str, Any]]]:
+        """Serialize to config entries format for JSON output."""
+        result: dict[str, list[dict[str, Any]]] = {}
+        for platform, config_dict in self._configs.items():
+            pairs: list[dict[str, Any]] = []
+            for config_key, config in config_dict.items():
+                config_data = json.loads(config.to_json())
+                pairs.append({"key": dict(config_key), "config": config_data})
+            result[platform] = pairs
+        return result
 
+    def to_dict(self) -> dict[str, dict[CaseKey, Any]]:
+        """Return configs as a nested dict (platform -> key -> config)."""
+        result: dict[str, dict[CaseKey, Any]] = {}
+        for platform, config_dict in self._configs.items():
+            result[platform] = {
+                k: json.loads(v.to_json()) for k, v in config_dict.items()
+            }
         return result
 
     @classmethod
-    def from_dict(cls, kernel_name: str, data: dict[str, Any]) -> "ConfigSet":
+    def from_dict(cls, kernel_name: str, data: dict[str, Any]) -> ConfigSet:
         config_set = cls(kernel_name)
         count = 0
 
@@ -117,9 +129,11 @@ def from_dict(cls, kernel_name: str, data: dict[str, Any]) -> "ConfigSet":
             if platform not in config_set._configs:
                 config_set._configs[platform] = {}
 
-            for config_key, config_data in platform_data.items():
-                config = helion.Config(**config_data)
-                config_set._configs[platform][config_key] = config
+            for entry in platform_data:
+                raw_key = entry["key"]
+                key = CaseKey.default() if not raw_key else CaseKey(raw_key)
+                config = helion.Config(**entry["config"])
+                config_set._configs[platform][key] = config
                 count += 1
 
         if count > 0:
@@ -132,7 +146,10 @@ def from_dict(cls, kernel_name: str, data: dict[str, Any]) -> "ConfigSet":
         return config_set
 
     def set_config(
-        self, platform: str, config_key: str, config: "helion.Config"
+        self,
+        platform: str,
+        config_key: CaseKey,
+        config: helion.Config,
     ) -> None:
         platform = platform.lower()
         if platform not in self._configs:
@@ -145,7 +162,7 @@ def set_config(
             config_key,
         )
 
-    def has_config(self, platform: str, config_key: str) -> bool:
+    def has_config(self, platform: str, config_key: CaseKey) -> bool:
         platform = platform.lower()
         platform_dict = self._configs.get(platform)
         if platform_dict is None:
@@ -156,18 +173,18 @@ def has_config(self, platform: str, config_key: str) -> bool:
 class ConfigManager:
     """File-level configuration management for Helion kernels (global singleton)."""
 
-    _instance: "ConfigManager | None" = None
+    _instance: ConfigManager | None = None
     _instance_base_dir: Path | None = None
 
-    def __new__(cls, base_dir: str | Path | None = None) -> "ConfigManager":
+    def __new__(cls, base_dir: str | Path | None = None) -> ConfigManager:
         resolved_base_dir = cls._resolve_base_dir(base_dir)
 
         if cls._instance is not None:
             if cls._instance_base_dir != resolved_base_dir:
                 raise ValueError(
                     f"ConfigManager singleton already exists with base_dir "
-                    f"'{cls._instance_base_dir}', cannot create with different "
-                    f"base_dir '{resolved_base_dir}'"
+                    f"'{cls._instance_base_dir}', cannot create with "
+                    f"different base_dir '{resolved_base_dir}'"
                 )
             return cls._instance
 
@@ -190,7 +207,7 @@ def _resolve_base_dir(base_dir: str | Path | None) -> Path:
         return (Path(__file__).parent / "configs").resolve()
 
     @classmethod
-    def get_instance(cls) -> "ConfigManager":
+    def get_instance(cls) -> ConfigManager:
         if cls._instance is None:
             raise RuntimeError(
                 "ConfigManager instance has not been created. "
@@ -229,16 +246,16 @@ def ensure_base_dir_writable(self) -> None:
                 f"Config directory '{self._base_dir}' is not writable: {e}"
             ) from e
 
-    def _load_platform_file(self, kernel_name: str, platform: str) -> dict[str, Any]:
+    def _load_platform_file(self, kernel_name: str, platform: str) -> Any:
         config_path = self.get_config_file_path(kernel_name, platform)
         if not config_path.exists():
-            return {}
+            return []
         try:
             with open(config_path) as f:
                 return json.load(f)
         except (json.JSONDecodeError, OSError) as e:
             logger.error("Failed to load config file %s: %s", config_path, e)
-            return {}
+            return []
 
     def load_config_set(self, kernel_name: str) -> ConfigSet:
         kernel_dir = self.get_kernel_dir(kernel_name)
@@ -253,32 +270,36 @@ def load_config_set(self, kernel_name: str) -> ConfigSet:
                     platform_data = json.load(f)
                 data[platform] = platform_data
             except (json.JSONDecodeError, OSError) as e:
-                logger.error("Failed to load config file %s: %s", platform_file, e)
+                logger.error(
+                    "Failed to load config file %s: %s",
+                    platform_file,
+                    e,
+                )
 
         return ConfigSet.from_dict(kernel_name, data)
 
     def get_platform_configs(
         self, kernel_name: str, platform: str
-    ) -> dict[str, helion.Config]:
+    ) -> dict[CaseKey, helion.Config]:
         platform_data = self._load_platform_file(kernel_name, platform)
         if not platform_data:
             return {}
         config_set = ConfigSet.from_dict(kernel_name, {platform: platform_data})
-        config_keys = config_set.get_config_keys(platform)
         return {
-            config_key: config_set.get_config(platform, config_key)
-            for config_key in config_keys
+            k: config_set.get_config(platform, k)
+            for k in config_set.get_config_keys(platform)
         }
 
     def save_config_set(self, config_set: ConfigSet) -> Path:
         kernel_dir = self.get_kernel_dir(config_set.kernel_name)
         kernel_dir.mkdir(parents=True, exist_ok=True)
 
-        full_data = config_set.to_dict()
-        for platform, platform_data in full_data.items():
+        full_data = config_set.to_config_entries()
+        for platform, pairs in full_data.items():
             platform_path = kernel_dir / f"{platform}.json"
             with open(platform_path, "w") as f:
-                json.dump(platform_data, f, indent=2)
+                json.dump(pairs, f, indent=2)
+                f.write("\n")
             logger.info("Saved config to: %s", platform_path)
 
         return kernel_dir
@@ -287,21 +308,34 @@ def save_configs(
         self,
         kernel_name: str,
         platform: str,
-        configs: dict[str, "helion.Config"],
+        configs: dict[CaseKey, helion.Config],
     ) -> Path:
         """Save configs for a kernel/platform, merging with existing."""
-        platform_data = self._load_platform_file(kernel_name, platform)
-        for config_key, config in configs.items():
-            platform_data[config_key] = json.loads(config.to_json())
+        config_set = ConfigSet.from_dict(
+            kernel_name,
+            {platform: self._load_platform_file(kernel_name, platform)},
+        )
+        for key, config in configs.items():
+            config_set.set_config(platform, key, config)
 
+        pairs = config_set.to_config_entries().get(platform, [])
         platform_path = self.get_config_file_path(kernel_name, platform)
         platform_path.parent.mkdir(parents=True, exist_ok=True)
         with open(platform_path, "w") as f:
-            json.dump(platform_data, f, indent=2)
+            json.dump(pairs, f, indent=2)
+            f.write("\n")
 
         logger.info("Saved config to: %s", platform_path)
         return platform_path
 
-    def config_exists(self, kernel_name: str, platform: str, config_key: str) -> bool:
+    def config_exists(
+        self,
+        kernel_name: str,
+        platform: str,
+        config_key: CaseKey,
+    ) -> bool:
         platform_data = self._load_platform_file(kernel_name, platform)
-        return config_key in platform_data
+        if not platform_data:
+            return False
+        target = dict(config_key)
+        return any(entry["key"] == target for entry in platform_data)
diff --git a/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h100.json b/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h100.json
index c314eb2dab86..4dc5c2cab308 100644
--- a/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h100.json
+++ b/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h100.json
@@ -1,13866 +1,15711 @@
-{
-  "intermediate_2048_numtokens_256": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_256": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "default": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
+[
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {},
+    "config": {
+      "block_sizes": [
+        1,
+        512
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 7688,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        1
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "persistent_interleaved",
+      "num_sm_multiplier": 32,
+      "maxnreg": 32
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        2
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        2
+      ],
+      "range_multi_buffers": [
+        false
+      ],
+      "range_flattens": [
+        true
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "persistent_blocked",
+      "num_sm_multiplier": 2,
+      "maxnreg": 64
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_256": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_256": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_256": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_7688_numtokens_256": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_256": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_1": {
-    "block_sizes": [
-      1,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_2": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_2": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_14336_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_4096_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_4": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_4": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_14336_numtokens_4": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_2048_numtokens_8": {
-    "block_sizes": [
-      8,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_8": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_4096_numtokens_8": {
-    "block_sizes": [
-      8,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_8": {
-    "block_sizes": [
-      2,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_8": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_8": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_16": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_2880_numtokens_16": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_16": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_16": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_16": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_16": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        1
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_24": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_24": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_24": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_24": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_24": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        2
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_24": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_32": {
-    "block_sizes": [
-      32,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_32": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_32": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_32": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_32": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_32": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_40": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_40": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_40": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_40": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_40": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_40": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      1
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "persistent_interleaved",
-    "num_sm_multiplier": 32,
-    "maxnreg": 32
-  },
-  "intermediate_2048_numtokens_48": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_48": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_48": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_48": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_48": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_48": {
-    "block_sizes": [
-      32,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_56": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_56": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_56": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_56": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_56": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_56": {
-    "block_sizes": [
-      2,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_64": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_64": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_64": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_64": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_64": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_64": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_72": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_72": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_72": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_72": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_72": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_72": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_80": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_80": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_80": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_80": {
-    "block_sizes": [
-      4,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_80": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_80": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_88": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_88": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_88": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_88": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_88": {
-    "block_sizes": [
-      16,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_88": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_96": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_96": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_96": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_96": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_96": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_96": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_104": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_104": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_104": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_104": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_104": {
-    "block_sizes": [
-      2,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_104": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_112": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_112": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_112": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_112": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_112": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_112": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_120": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_120": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_120": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_120": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_120": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_120": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_128": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_128": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_128": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_128": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_128": {
-    "block_sizes": [
-      2,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_128": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_136": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_136": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_136": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_136": {
-    "block_sizes": [
-      2,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_136": {
-    "block_sizes": [
-      4,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_136": {
-    "block_sizes": [
-      4,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_144": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_144": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_144": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_144": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_144": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_144": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_152": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_152": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_152": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_152": {
-    "block_sizes": [
-      64,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_152": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_152": {
-    "block_sizes": [
-      2,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_160": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_160": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_160": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_160": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_160": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_160": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_168": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_168": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_168": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_168": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_168": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_168": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_176": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_176": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_176": {
-    "block_sizes": [
-      128,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_176": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_176": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_176": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_184": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_184": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_184": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_184": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_184": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_184": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_192": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_192": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_192": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_192": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_192": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_192": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_200": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_200": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_200": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_200": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_200": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_200": {
-    "block_sizes": [
-      16,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_208": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_208": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_208": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_208": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_208": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_208": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_216": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_216": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_216": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_216": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_216": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_216": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_224": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_224": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_224": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_224": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_224": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_224": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_232": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_232": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_232": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_232": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_232": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_232": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_240": {
-    "block_sizes": [
-      64,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_240": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_240": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_248": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_248": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_248": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_248": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_248": {
-    "block_sizes": [
-      4,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_248": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_272": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_272": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_272": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_272": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_272": {
-    "block_sizes": [
-      8,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_272": {
-    "block_sizes": [
-      512,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_288": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_288": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_288": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_288": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_288": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_288": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_304": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_304": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      2
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      2
-    ],
-    "range_multi_buffers": [
-      false
-    ],
-    "range_flattens": [
-      true
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "persistent_blocked",
-    "num_sm_multiplier": 2,
-    "maxnreg": 64
-  },
-  "intermediate_4096_numtokens_304": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_304": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_304": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_304": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_320": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_320": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_320": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_320": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_320": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_320": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_336": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_336": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_336": {
-    "block_sizes": [
-      16,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_336": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_336": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_336": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_352": {
-    "block_sizes": [
-      512,
-      1
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_352": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_352": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_352": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_352": {
-    "block_sizes": [
-      16,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_352": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_368": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_368": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_368": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_368": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_368": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_368": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_384": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_384": {
-    "block_sizes": [
-      512,
-      2
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_384": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_384": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_384": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
         1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_384": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_400": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_400": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_400": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_400": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_400": {
-    "block_sizes": [
-      2,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_400": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_416": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_416": {
-    "block_sizes": [
-      32,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_416": {
-    "block_sizes": [
-      512,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_416": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_416": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_416": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_432": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_432": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_432": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_432": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_432": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_432": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_448": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_448": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_448": {
-    "block_sizes": [
-      8,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_448": {
-    "block_sizes": [
-      128,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_448": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_448": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_464": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_464": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_464": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_464": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_464": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_464": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_480": {
-    "block_sizes": [
-      16,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_480": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_480": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_480": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_480": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_480": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_496": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_496": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_496": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_496": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_496": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_496": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_512": {
-    "block_sizes": [
-      512,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_512": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_512": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_512": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_512": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_512": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
   }
-}
\ No newline at end of file
+]
diff --git a/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h200.json b/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h200.json
index c314eb2dab86..4dc5c2cab308 100644
--- a/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h200.json
+++ b/vllm/kernels/helion/configs/silu_mul_fp8/nvidia_h200.json
@@ -1,13866 +1,15711 @@
-{
-  "intermediate_2048_numtokens_256": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_256": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "default": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
+[
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {},
+    "config": {
+      "block_sizes": [
+        1,
+        512
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 7688,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 256
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 1
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 2
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 4
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 8
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "xyz"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 16
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 24
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 32
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 40
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        1
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "persistent_interleaved",
+      "num_sm_multiplier": 32,
+      "maxnreg": 32
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 48
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 56
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 64
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 72
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 80
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 88
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 96
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 104
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 112
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 120
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 128
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 136
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 144
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 152
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 160
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 168
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 176
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 184
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 192
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 200
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 208
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 216
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 224
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 232
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 240
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 248
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 272
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 288
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        2
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        2
+      ],
+      "range_multi_buffers": [
+        false
+      ],
+      "range_flattens": [
+        true
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "persistent_blocked",
+      "num_sm_multiplier": 2,
+      "maxnreg": 64
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_256": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_256": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_256": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_7688_numtokens_256": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_256": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_1": {
-    "block_sizes": [
-      1,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_1": {
-    "block_sizes": [
-      1,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_2": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_2": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 304
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_14336_numtokens_2": {
-    "block_sizes": [
-      2,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_4096_numtokens_4": {
-    "block_sizes": [
-      4,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_4": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_4": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_14336_numtokens_4": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_2048_numtokens_8": {
-    "block_sizes": [
-      8,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_8": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_4096_numtokens_8": {
-    "block_sizes": [
-      8,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_8": {
-    "block_sizes": [
-      2,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_8": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_8": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_16": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "xyz"
-  },
-  "intermediate_2880_numtokens_16": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_16": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_16": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_16": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_16": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 320
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 336
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        1
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 352
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_24": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_24": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_24": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_24": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_24": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 368
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        2
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
         1,
+        8192
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 384
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_24": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_32": {
-    "block_sizes": [
-      32,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_32": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_32": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_32": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_32": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_32": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_40": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_40": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_40": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_40": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_40": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_40": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      1
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "persistent_interleaved",
-    "num_sm_multiplier": 32,
-    "maxnreg": 32
-  },
-  "intermediate_2048_numtokens_48": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_48": {
-    "block_sizes": [
-      16,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_48": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_48": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_48": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_48": {
-    "block_sizes": [
-      32,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_56": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_56": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_56": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_56": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_56": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_56": {
-    "block_sizes": [
-      2,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_64": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_64": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_64": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_64": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_64": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_64": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_72": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_72": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_72": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_72": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_72": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_72": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_80": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_80": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_80": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_80": {
-    "block_sizes": [
-      4,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_80": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_80": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_88": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_88": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_88": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_88": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_88": {
-    "block_sizes": [
-      16,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_88": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_96": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_96": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_96": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_96": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_96": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        2,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 400
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_96": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_104": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_104": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_104": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_104": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_104": {
-    "block_sizes": [
-      2,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_104": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_112": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_112": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_112": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_112": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_112": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_112": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_120": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_120": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_120": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_120": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_120": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_120": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_128": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_128": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_128": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_128": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_128": {
-    "block_sizes": [
-      2,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_128": {
-    "block_sizes": [
-      4,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_136": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_136": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_136": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_136": {
-    "block_sizes": [
-      2,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_136": {
-    "block_sizes": [
-      4,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_136": {
-    "block_sizes": [
-      4,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_144": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_144": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_144": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_144": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        32,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_144": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_144": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_152": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_152": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_152": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_152": {
-    "block_sizes": [
-      64,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_152": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_152": {
-    "block_sizes": [
-      2,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_160": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_160": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_160": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_160": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_160": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_160": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_168": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_168": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_168": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_168": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_168": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_168": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_176": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_176": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_176": {
-    "block_sizes": [
-      128,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_176": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_176": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_176": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_184": {
-    "block_sizes": [
-      2,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_184": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_184": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_184": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_184": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_184": {
-    "block_sizes": [
-      64,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_192": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_192": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_192": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_192": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_192": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_192": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_200": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_200": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_200": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_200": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_200": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 8,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 416
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 5,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "first"
+      ],
+      "num_warps": 1,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 432
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        4
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_200": {
-    "block_sizes": [
-      16,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_208": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_208": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_208": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_208": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_208": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_208": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_216": {
-    "block_sizes": [
-      32,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_216": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_216": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_216": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_216": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_216": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_224": {
-    "block_sizes": [
-      32,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_224": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_224": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_224": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_224": {
-    "block_sizes": [
-      32,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_224": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_232": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_232": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_232": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_232": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_232": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 6,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_232": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_240": {
-    "block_sizes": [
-      64,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_240": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        8
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
         1,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 448
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        16
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_240": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_240": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_248": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_248": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_248": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_248": {
-    "block_sizes": [
-      256,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_248": {
-    "block_sizes": [
-      4,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_248": {
-    "block_sizes": [
-      8,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_272": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_272": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_272": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 32,
+      "num_stages": 8,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 1,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_272": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_272": {
-    "block_sizes": [
-      8,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_272": {
-    "block_sizes": [
-      512,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_288": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_288": {
-    "block_sizes": [
-      8,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_288": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_288": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_288": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_288": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_304": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_304": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      2
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      2
-    ],
-    "range_multi_buffers": [
-      false
-    ],
-    "range_flattens": [
-      true
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "persistent_blocked",
-    "num_sm_multiplier": 2,
-    "maxnreg": 64
-  },
-  "intermediate_4096_numtokens_304": {
-    "block_sizes": [
-      16,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_304": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_304": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_304": {
-    "block_sizes": [
-      4,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_320": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_320": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_320": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_320": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_320": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_320": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_336": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_336": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_336": {
-    "block_sizes": [
-      16,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_336": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_336": {
-    "block_sizes": [
-      4,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_336": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_352": {
-    "block_sizes": [
-      512,
-      1
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_352": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_352": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_352": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_352": {
-    "block_sizes": [
-      16,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_352": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_368": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_368": {
-    "block_sizes": [
-      128,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_368": {
-    "block_sizes": [
-      64,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_368": {
-    "block_sizes": [
-      2,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_368": {
-    "block_sizes": [
-      128,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_368": {
-    "block_sizes": [
-      32,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_384": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_384": {
-    "block_sizes": [
-      512,
-      2
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_384": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_384": {
-    "block_sizes": [
-      128,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_384": {
-    "block_sizes": [
-      1,
-      8192
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
         1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_384": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_400": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_400": {
-    "block_sizes": [
-      16,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_400": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 6,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 464
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        512
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 32,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        16,
+        32
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "first",
+        ""
+      ],
+      "num_warps": 16,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 5,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
+        64,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        8
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 2,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_400": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "first",
+        ""
+      ],
+      "num_warps": 1,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        1024
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_400": {
-    "block_sizes": [
-      2,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_400": {
-    "block_sizes": [
-      4,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_416": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_416": {
-    "block_sizes": [
-      32,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_416": {
-    "block_sizes": [
-      512,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_416": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 4,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 480
+    },
+    "config": {
+      "block_sizes": [
         1,
+        16384
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 8,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_416": {
-    "block_sizes": [
-      256,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_416": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_432": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_432": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_432": {
-    "block_sizes": [
-      64,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_432": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 5,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_432": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "last",
+        "first"
+      ],
+      "num_warps": 32,
+      "num_stages": 3,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        256
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 8,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "first"
-    ],
-    "num_warps": 1,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_432": {
-    "block_sizes": [
-      512,
-      4
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_448": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_448": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        256,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        4
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "last",
+        "last"
+      ],
+      "num_warps": 8,
+      "num_stages": 4,
+      "indexing": [
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 496
+    },
+    "config": {
+      "block_sizes": [
+        4,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "first"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2048,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        512,
+        16
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 6,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_448": {
-    "block_sizes": [
-      8,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_448": {
-    "block_sizes": [
-      128,
-      8
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_448": {
-    "block_sizes": [
-      1,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_448": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      16
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 32,
-    "num_stages": 8,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_464": {
-    "block_sizes": [
-      256,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_464": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_464": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 1,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_464": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_464": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 6,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_464": {
-    "block_sizes": [
-      64,
-      512
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 32,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_480": {
-    "block_sizes": [
-      16,
-      32
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "first",
-      ""
-    ],
-    "num_warps": 16,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_480": {
-    "block_sizes": [
-      128,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 5,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_480": {
-    "block_sizes": [
-      64,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      8
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 2,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_480": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "first",
-      ""
-    ],
-    "num_warps": 1,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_480": {
-    "block_sizes": [
-      1,
-      1024
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 4,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_480": {
-    "block_sizes": [
-      1,
-      16384
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "last",
-      "first"
-    ],
-    "num_warps": 32,
-    "num_stages": 3,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_496": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 2880,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        2048
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        ""
+      ],
+      "num_warps": 8,
+      "num_stages": 1,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 4096,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        8,
+        128
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        2
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "last",
+        "last",
+        "last"
+      ],
+      "num_warps": 16,
+      "num_stages": 2,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 8192,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
         1,
+        2048
+      ],
+      "loop_orders": [
+        [
+          1,
+          0
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        64
+      ],
+      "range_unroll_factors": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_496": {
-    "block_sizes": [
-      8,
-      256
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 8,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_496": {
-    "block_sizes": [
-      256,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_496": {
-    "block_sizes": [
-      256,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_496": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      4
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "last",
-      "last"
-    ],
-    "num_warps": 8,
-    "num_stages": 4,
-    "indexing": [
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_496": {
-    "block_sizes": [
-      4,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "first"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2048_numtokens_512": {
-    "block_sizes": [
-      512,
-      16
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_2880_numtokens_512": {
-    "block_sizes": [
-      8,
-      2048
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      ""
-    ],
-    "num_warps": 8,
-    "num_stages": 1,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_4096_numtokens_512": {
-    "block_sizes": [
-      8,
-      128
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      2
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "last",
-      "last",
-      "last"
-    ],
-    "num_warps": 16,
-    "num_stages": 2,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_8192_numtokens_512": {
-    "block_sizes": [
-      1,
-      2048
-    ],
-    "loop_orders": [
-      [
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "",
+        "last"
+      ],
+      "num_warps": 4,
+      "num_stages": 4,
+      "indexing": [
+        "pointer",
+        "pointer",
+        "pointer",
+        "pointer"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 11008,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
         1,
+        4096
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        false
+      ],
+      "l2_groupings": [
+        1
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
+        0
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "first",
+        "",
+        "first"
+      ],
+      "num_warps": 16,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "pointer",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
+  },
+  {
+    "key": {
+      "intermediate": 14336,
+      "numtokens": 512
+    },
+    "config": {
+      "block_sizes": [
+        128,
+        64
+      ],
+      "loop_orders": [
+        [
+          0,
+          1
+        ]
+      ],
+      "flatten_loops": [
+        true
+      ],
+      "l2_groupings": [
+        32
+      ],
+      "range_unroll_factors": [
+        0
+      ],
+      "range_warp_specializes": [],
+      "range_num_stages": [
         0
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      64
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "",
-      "last"
-    ],
-    "num_warps": 4,
-    "num_stages": 4,
-    "indexing": [
-      "pointer",
-      "pointer",
-      "pointer",
-      "pointer"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_11008_numtokens_512": {
-    "block_sizes": [
-      1,
-      4096
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      false
-    ],
-    "l2_groupings": [
-      1
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "first",
-      "",
-      "first"
-    ],
-    "num_warps": 16,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "pointer",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
-  },
-  "intermediate_14336_numtokens_512": {
-    "block_sizes": [
-      128,
-      64
-    ],
-    "loop_orders": [
-      [
-        0,
-        1
-      ]
-    ],
-    "flatten_loops": [
-      true
-    ],
-    "l2_groupings": [
-      32
-    ],
-    "range_unroll_factors": [
-      0
-    ],
-    "range_warp_specializes": [],
-    "range_num_stages": [
-      0
-    ],
-    "range_multi_buffers": [
-      null
-    ],
-    "range_flattens": [
-      null
-    ],
-    "load_eviction_policies": [
-      "",
-      "first",
-      ""
-    ],
-    "num_warps": 2,
-    "num_stages": 7,
-    "indexing": [
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor",
-      "tensor_descriptor"
-    ],
-    "pid_type": "flat"
+      ],
+      "range_multi_buffers": [
+        null
+      ],
+      "range_flattens": [
+        null
+      ],
+      "load_eviction_policies": [
+        "",
+        "first",
+        ""
+      ],
+      "num_warps": 2,
+      "num_stages": 7,
+      "indexing": [
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor",
+        "tensor_descriptor"
+      ],
+      "pid_type": "flat"
+    }
   }
-}
\ No newline at end of file
+]
diff --git a/vllm/kernels/helion/ops/silu_mul_fp8.py b/vllm/kernels/helion/ops/silu_mul_fp8.py
index 1399b15d0092..e092efccc1ec 100644
--- a/vllm/kernels/helion/ops/silu_mul_fp8.py
+++ b/vllm/kernels/helion/ops/silu_mul_fp8.py
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 from typing import Any
 
-import regex as re
 import torch
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.logger import init_logger
 from vllm.utils.import_utils import has_helion
 
@@ -22,14 +24,14 @@
 logger = init_logger(__name__)
 
 
-def generate_silu_mul_fp8_inputs() -> dict[str, tuple[Any, ...]]:
+def generate_silu_mul_fp8_inputs() -> dict[CaseKey, tuple[Any, ...]]:
     intermediate_sizes = [2048, 2880, 4096, 8192, 11008, 14336]
 
     # Use the same num_tokens values as vLLM's default cudagraph capture sizes.
     # See vllm/config/vllm.py _set_cudagraph_sizes() for the canonical formula.
     num_tokens_list = [1, 2, 4] + list(range(8, 256, 8)) + list(range(256, 513, 16))
 
-    inputs = {}
+    inputs: dict[CaseKey, tuple[Any, ...]] = {}
     for num_tokens in num_tokens_list:
         for intermediate_size in intermediate_sizes:
             input_tensor = torch.randn(
@@ -40,15 +42,18 @@ def generate_silu_mul_fp8_inputs() -> dict[str, tuple[Any, ...]]:
             )
             scale = torch.tensor([1.0], device="cuda", dtype=torch.float32)
 
-            config_key = f"intermediate_{intermediate_size}_numtokens_{num_tokens}"
-            inputs[config_key] = (input_tensor, scale)
+            key = CaseKey({"intermediate": intermediate_size, "numtokens": num_tokens})
+            inputs[key] = (input_tensor, scale)
 
     return inputs
 
 
+_pick_cache: dict[tuple[int, int], CaseKey | None] = {}
+
+
 def pick_silu_mul_fp8_config(
-    args: tuple[Any, ...], config_keys: list[str]
-) -> str | None:
+    args: tuple[Any, ...], config_keys: list[CaseKey]
+) -> CaseKey | None:
     """Pick the best pre-tuned config for the given input shape.
 
     Selection strategy:
@@ -57,39 +62,35 @@ def pick_silu_mul_fp8_config(
       2. Among the num_tokens values tuned for that intermediate_size, pick
          the smallest num_tokens >= the input's num_tokens. If the input is
          larger than all available num_tokens, fall back to the largest.
-
-    Config keys must be "default" or follow the format
-    "intermediate_{int}_numtokens_{int}".
     """
     if not config_keys:
         return None
 
     input_tensor, _scale = args
-    intermediate_size = input_tensor.shape[-1] // 2
-    num_tokens = input_tensor.view(-1, input_tensor.shape[-1]).shape[0]
-    configs: dict[int, list[int]] = {}
-    for key in config_keys:
-        if key == "default":
+    intermediate_size = int(input_tensor.shape[-1]) // 2
+    num_tokens = int(input_tensor.view(-1, input_tensor.shape[-1]).shape[0])
+
+    cache_key = (num_tokens, intermediate_size)
+    cached = _pick_cache.get(cache_key)
+    if cached is not None:
+        return cached
+
+    by_isize: dict[int, list[int]] = {}
+    for k in config_keys:
+        if k.is_default():
             continue
-        match = re.fullmatch(r"intermediate_(\d+)_numtokens_(\d+)", key)
-        if not match:
-            raise ValueError(
-                f"Malformed config key '{key}', "
-                f"expected format 'intermediate_{{int}}_numtokens_{{int}}'"
-            )
-        isize_str, ntokens_str = match.groups()
-        configs.setdefault(int(isize_str), []).append(int(ntokens_str))
+        by_isize.setdefault(k["intermediate"], []).append(k["numtokens"])
 
-    if not configs:
-        return "default" if "default" in config_keys else None
+    if not by_isize:
+        return None
 
-    best_isize = min(configs, key=lambda s: abs(s - intermediate_size))
-    available_ntokens = sorted(configs[best_isize])
-    best_ntokens = next(
-        (n for n in available_ntokens if n >= num_tokens), available_ntokens[-1]
-    )
+    best_isize = min(by_isize, key=lambda s: abs(s - intermediate_size))
+    available = sorted(by_isize[best_isize])
+    best_ntokens = next((n for n in available if n >= num_tokens), available[-1])
 
-    return f"intermediate_{best_isize}_numtokens_{best_ntokens}"
+    result = CaseKey({"intermediate": best_isize, "numtokens": best_ntokens})
+    _pick_cache[cache_key] = result
+    return result
 
 
 @register_kernel(
diff --git a/vllm/kernels/helion/register.py b/vllm/kernels/helion/register.py
index ba98e87ca09a..f18120da45f9 100644
--- a/vllm/kernels/helion/register.py
+++ b/vllm/kernels/helion/register.py
@@ -36,12 +36,15 @@
 - PresetConfigSearch: Custom autotuner that returns pre-tuned configs
 """
 
+from __future__ import annotations
+
 from collections.abc import Callable
-from typing import Any, cast
+from typing import Any
 
 import torch
 from torch.library import Library
 
+from vllm.kernels.helion.case_key import CaseKey
 from vllm.logger import init_logger
 from vllm.utils.import_utils import has_helion
 from vllm.utils.torch_utils import direct_register_custom_op
@@ -53,34 +56,34 @@
     )
 
 import helion
-from helion._compat import requires_torch_version
 from helion.autotuner.base_search import BaseAutotuner
 from helion.runtime.config import Config
 from helion.runtime.settings import default_autotuner_fn
 
 # TODO(gmagogsfm): Remove CustomOp fallback path (_get_or_register_custom_op,
 # vllm_helion_lib, direct_register_custom_op) once vLLM requires PyTorch >= 2.11.
-_HOP_AVAILABLE = requires_torch_version("2.11")
+# FIXME(gmagogsfm): Re-enable HOP path once performance regression is fixed.
+# _HOP_AVAILABLE = requires_torch_version("2.11")
+_HOP_AVAILABLE = False
 
 if _HOP_AVAILABLE:
-    import torch.utils._pytree as pytree
-    from helion._compiler._dynamo.higher_order_ops import (
-        helion_kernel_side_table,
-        helion_kernel_wrapper_mutation,
-    )
-    from helion._compiler._dynamo.variables import infer_output_spec
-    from torch.fx.experimental.proxy_tensor import (
-        disable_proxy_modes_tracing,
-        get_proxy_mode,
-    )
+    from helion._compat import supports_torch_compile_fusion
+    from helion._compiler._dynamo.higher_order_ops import helion_kernel_side_table
+    from helion._compiler._dynamo.variables import HelionKernelVariable
+    from helion.runtime.kernel import Kernel
+    from torch._dynamo.guards import GuardBuilder
+    from torch._dynamo.variables.builder import VariableBuilder
+
 
 logger = init_logger(__name__)
 
 vllm_helion_lib = Library("vllm_helion", "FRAGMENT")  # noqa
 
+ConfigPicker = Callable[[tuple[Any, ...], list[CaseKey]], CaseKey | None]
+
 
 def validate_helion_settings(
-    helion_settings: "helion.Settings | None", op_name: str
+    helion_settings: helion.Settings | None, op_name: str
 ) -> None:
     if helion_settings is None:
         return
@@ -109,7 +112,7 @@ def validate_helion_settings(
 
 def create_helion_decorated_kernel(
     raw_kernel_func: Callable,
-    helion_settings: "helion.Settings | None" = None,
+    helion_settings: helion.Settings | None = None,
     extra_kwargs: dict[str, Any] | None = None,
 ) -> Any:
     kernel_kwargs: dict[str, Any] = {}
@@ -146,9 +149,9 @@ class ConfiguredHelionKernel:
     def __init__(
         self,
         op_name: str,
-        config_picker: Callable[[tuple[Any, ...], list[str]], str | None] | None,
+        config_picker: ConfigPicker | None,
         raw_kernel_func: Callable,
-        helion_settings: "helion.Settings | None" = None,
+        helion_settings: helion.Settings | None = None,
     ):
         self.op_name = op_name
         self.config_picker = config_picker
@@ -172,41 +175,44 @@ def _create_key_computer(self):
                 f"A config_picker must be provided to register_kernel()."
             )
 
-        # After None check, config_picker is guaranteed to be non-None
-        assert self.config_picker is not None
+        picker = self.config_picker
+        all_keys = list(self.configs.keys())
+        default = CaseKey.default()
+        has_default = default in self.configs
 
         def key_computer(*args):
-            config_keys = list(self.configs.keys())
-            # Cast is safe because we checked for None above
-            config_picker = cast(
-                Callable[[tuple[Any, ...], list[str]], str | None], self.config_picker
-            )
-            selected_key = config_picker(args, config_keys)
-            if selected_key:
-                return selected_key
-            return "default" if "default" in self.configs else None
+            selected = picker(args, all_keys)
+            if selected is not None:
+                return str(selected)
+            if has_default:
+                return str(default)
+            return None
 
         return key_computer
 
     def _create_config_selector(self, key_computer):
+        str_to_key = {str(k): k for k in self.configs}
+
         def config_selector(args):
-            # args is a tuple; key_computer expects unpacked args
-            selected_config_key = key_computer(*args)
+            selected_str = key_computer(*args)
 
-            if selected_config_key is None:
+            if selected_str is None:
                 raise ValueError(
-                    f"Config picker returned None for kernel '{self.op_name}' "
-                    f"with available config keys: {list(self.configs.keys())}"
+                    f"Config picker returned None for kernel "
+                    f"'{self.op_name}' with available config keys: "
+                    f"{list(self.configs.keys())}"
                 )
 
-            if selected_config_key not in self.configs:
+            config_key = str_to_key.get(selected_str)
+            if config_key is None:
                 raise ValueError(
                     f"Config picker returned invalid config key "
-                    f"'{selected_config_key}' for kernel '{self.op_name}'. "
+                    f"'{selected_str}' for kernel "
+                    f"'{self.op_name}'. "
                     f"Available keys: {list(self.configs.keys())}"
                 )
 
-            return self.configs[selected_config_key]
+            return self.configs[config_key]
 
         return config_selector
 
@@ -253,9 +259,9 @@ def __init__(
         raw_kernel_func: Callable,
         op_name: str,
         fake_impl: Callable,
-        config_picker: Callable[[tuple[Any, ...], list[str]], str | None],
-        helion_settings: "helion.Settings | None" = None,
-        input_generator: Callable[[], dict[str, tuple[Any, ...]]] | None = None,
+        config_picker: ConfigPicker,
+        helion_settings: helion.Settings | None = None,
+        input_generator: (Callable[[], dict[CaseKey, tuple[Any, ...]]] | None) = None,
     ):
         # Validate helion_settings doesn't conflict with our custom autotuner
         validate_helion_settings(helion_settings, op_name)
@@ -298,77 +304,13 @@ def __call__(self, *args, **kwargs):
             f"Kernel '{self.op_name}' was not initialized. "
             "Please open an issue on GitHub."
         )
-        if get_proxy_mode() is not None:
-            return self._call_via_hop(args, kwargs)
-        return self._configured_kernel(*args, **kwargs)
-
-    def _call_via_hop(
-        self,
-        args: tuple[Any, ...],
-        kwargs: dict[str, Any],
-    ) -> Any:
-        kernel = self.get_configured_op()._decorated_kernel
-        kernel_idx = helion_kernel_side_table.add_kernel(kernel)
-
-        constant_args, tensor_args = self._partition_args(kernel, args, kwargs)
-
-        all_named = {**constant_args, **tensor_args}
-        full_args = tuple(
-            all_named.get(n, p.default)
-            for n, p in kernel.signature.parameters.items()  # type: ignore[attr-defined]
-            if n in all_named or p.default is not p.empty
-        )
-
-        with disable_proxy_modes_tracing():
-            output_spec = infer_output_spec(kernel, full_args)
-
-        hop_result = helion_kernel_wrapper_mutation(
-            kernel_idx=kernel_idx,
-            constant_args=constant_args,
-            tensor_args=tensor_args,
-            output_spec=output_spec,
-        )
-
-        tree_spec_str = output_spec.get("tree_spec_str")
-        if tree_spec_str is None:
-            return None
-        tree_spec = pytree.treespec_loads(tree_spec_str)
 
-        hop_iter = iter(hop_result)
-        reconstructed = []
-        for spec in output_spec["leaf_specs"]:
-            is_constant_scalar = spec["type"] == "scalar" and not isinstance(
-                spec.get("scalar_value"), torch.SymInt
-            )
-            if is_constant_scalar:
-                reconstructed.append(spec["scalar_value"])
-            else:
-                reconstructed.append(next(hop_iter))
-        return pytree.tree_unflatten(reconstructed, tree_spec)
-
-    @staticmethod
-    def _partition_args(
-        kernel: Any,
-        args: tuple[Any, ...],
-        kwargs: dict[str, Any],
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        constant_args: dict[str, Any] = {}
-        tensor_args: dict[str, Any] = {}
-        params = list(kernel.signature.parameters.keys())
-        for i, val in enumerate(args):
-            name = params[i]
-            if isinstance(val, torch.Tensor):
-                tensor_args[name] = val
-            else:
-                constant_args[name] = val
-        for name, val in kwargs.items():
-            if isinstance(val, torch.Tensor):
-                tensor_args[name] = val
-            else:
-                constant_args[name] = val
-        return constant_args, tensor_args
+        # During Dynamo tracing, this call will be intercepted by our custom
+        # HelionKernelWrapperVariable and handled via proper HOP emission.
+        # During eager execution, call the kernel directly.
+        return self._configured_kernel(*args, **kwargs)
 
-    def get_inputs(self) -> dict[str, tuple[Any, ...]]:
+    def get_inputs(self) -> dict[CaseKey, tuple[Any, ...]]:
         if self._input_generator is None:
             raise NotImplementedError(
                 f"No input generator registered for kernel '{self.op_name}'. "
@@ -436,7 +378,7 @@ def get_kernel_by_name(kernel_name: str) -> HelionKernelWrapper | None:
 
 def infer_fake_impl(
     kernel_func: Callable,
-    helion_settings: "helion.Settings | None" = None,
+    helion_settings: helion.Settings | None = None,
 ) -> Callable:
     def helion_fake_kernel(*args, **kwargs):
         kernel_kwargs = {}
@@ -458,37 +400,29 @@ def helion_fake_kernel(*args, **kwargs):
 def register_kernel(
     op_name: str | None = None,
     *,
-    config_picker: Callable[[tuple[Any, ...], list[str]], str | None],
+    config_picker: ConfigPicker,
     fake_impl: Callable | None = None,
-    helion_settings: "helion.Settings | None" = None,
-    input_generator: Callable[[], dict[str, tuple[Any, ...]]] | None = None,
+    helion_settings: helion.Settings | None = None,
+    input_generator: (Callable[[], dict[CaseKey, tuple[Any, ...]]] | None) = None,
 ) -> Callable[[Callable], HelionKernelWrapper]:
     """Register a Helion kernel with pre-tuned config selection.
 
-    Wraps the kernel function in a HelionKernelWrapper that eagerly builds
-    the configured kernel and (on older PyTorch) registers a custom op.
-
     Args:
-        config_picker: Required. Function with signature
-            ``(args: tuple, config_keys: list[str]) -> str | None``
-            that picks the best config key from available options.
-            Return ``None`` to fall back to ``"default"``.
+        config_picker: Required. Receives ``(args, config_keys)``
+            where each config key is a ``dict[str, Any]`` mapping
+            parameter names to values.  Return the best-matching
+            dict, or ``None`` to fall back to the default config.
 
             Example::
 
                 def pick_config(args, config_keys):
                     x = args[0]
-                    hidden_size = x.shape[-1]
-                    batch_size = x.shape[0]
-                    for key in config_keys:
-                        if key == f"hiddensize_{hidden_size}_batchsize_{batch_size}":
-                            return key
-                    return "default" if "default" in config_keys else None
-
-        input_generator: Optional. Function that returns
-            ``dict[str, tuple]`` where each key is a configuration
-            identifier (e.g. ``"4096"``, ``"hidden_4096"``) and each
-            value is a tuple of arguments to pass to the kernel.
+                    best = min(config_keys, key=lambda k: abs(k["size"] - x.shape[0]))
+                    return best
+
+        input_generator: Optional. Returns ``dict[str, tuple]`` where
+            each key is a serialized config key and each value is a
+            tuple of arguments to pass to the kernel.
 
             Example::
 
@@ -535,3 +469,33 @@ def decorator(kernel_func: Callable) -> HelionKernelWrapper:
         return kernel_wrapper
 
     return decorator
+
+
+# Register HelionKernelWrapper with Dynamo's variable tracker system
+if _HOP_AVAILABLE:
+
+    def _register_vllm_helion_dynamo_variable():
+        """Register HelionKernelWrapper with Dynamo's VariableBuilder.
+
+        When Dynamo encounters a HelionKernelWrapper during tracing, this
+        extracts the underlying Helion Kernel and delegates to Helion's own
+        registered Kernel handler, which handles HOP emission, side table
+        registration, and inductor lowering setup.
+        """
+
+        def wrap_helion_kernel_wrapper(
+            builder: VariableBuilder, value: HelionKernelWrapper
+        ):
+            kernel = value.get_configured_op()._decorated_kernel
+            if supports_torch_compile_fusion():
+                helion_handler = VariableBuilder._type_dispatch()[Kernel]
+                return helion_handler(builder, kernel)
+            kernel_idx = helion_kernel_side_table.add_kernel(kernel)
+            builder.install_guards(GuardBuilder.ID_MATCH)
+            return HelionKernelVariable(kernel, kernel_idx, source=builder.source)
+
+        dispatch = VariableBuilder._type_dispatch()
+        dispatch[HelionKernelWrapper] = wrap_helion_kernel_wrapper
+
+    # Register immediately when the module is imported
+    _register_vllm_helion_dynamo_variable()
diff --git a/vllm/kernels/helion/utils.py b/vllm/kernels/helion/utils.py
index 5ff8046c73c5..130d79093b7f 100644
--- a/vllm/kernels/helion/utils.py
+++ b/vllm/kernels/helion/utils.py
@@ -2,11 +2,10 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Utility functions for Helion kernel management."""
 
-import logging
-
+from vllm.logger import init_logger
 from vllm.platforms import current_platform
 
-logger = logging.getLogger(__name__)
+logger = init_logger(__name__)
 
 # Maps known variant GPU names (after lowercase/underscore normalization)
 # to their canonical form.
@@ -49,7 +48,7 @@
 
 def get_gpu_name(device_id: int | None = None) -> str:
     if device_id is None:
-        logger.warning(
+        logger.warning_once(
             "get_gpu_name() called without device_id, defaulting to 0. "
             "This may return the wrong device name in multi-node setups."
         )
diff --git a/vllm/kernels/oink_ops.py b/vllm/kernels/oink_ops.py
new file mode 100644
index 000000000000..835cd062d037
--- /dev/null
+++ b/vllm/kernels/oink_ops.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""This file registers Oink implementations for vLLM IR ops.
+
+vLLM does not depend on the external Oink repository/package. When an external
+plugin registers torch.library.custom_op entrypoints under the `oink::`
+namespace (e.g. via vLLM's general_plugins mechanism), these ops will be marked
+ as supported. To dispatch to those ops, set kernel_config.ir_op_priority.<op> to oink.
+Alternatively, `VLLM_USE_OINK_OPS=1` will add this to priority by default.
+"""
+
+import torch
+from torch import Tensor
+
+from vllm import ir
+from vllm.platforms import current_platform
+
+OINK_AVAILABLE = current_platform.has_device_capability(100) and hasattr(
+    torch.ops, "oink"
+)
+
+
+def has_oink_op(name: str) -> bool:
+    """Check if a specific oink op is registered."""
+    return OINK_AVAILABLE and hasattr(torch.ops.oink, name)
+
+
+def _can_view_as_2d(x: Tensor) -> bool:
+    """Return True if x.view(-1, x.shape[-1]) is viewable (no copy)."""
+    if x.dim() < 2:
+        return False
+    if x.dim() == 2:
+        return True
+    # For a view(-1, N) to be valid, all leading dims must be contiguous with
+    # respect to each other (size-1 dims are ignored).
+    for dim in range(x.dim() - 1):
+        # Strides for size-1 dims are irrelevant and can be arbitrary.
+        if x.size(dim + 1) != 1 and x.stride(dim) != x.stride(dim + 1) * x.size(
+            dim + 1
+        ):
+            return False
+    return True
+
+
+def _is_oink_stride_compatible_2d(x_2d: Tensor) -> bool:
+    """Return True if x_2d meets Oink's pointer-path stride constraints."""
+    if x_2d.dim() != 2:
+        return False
+    if x_2d.stride(1) != 1:
+        return False
+    # Match Oink's vectorization constraint: stride(0) divisible by 256b.
+    if x_2d.dtype in (torch.float16, torch.bfloat16):
+        divby = 16
+    elif x_2d.dtype == torch.float32:
+        divby = 8
+    else:
+        return False
+    return (x_2d.stride(0) % divby) == 0
+
+
+oink_rms_supported = (
+    lambda x, weight, epsilon, variance_size=None: variance_size is None
+    and weight is not None
+    and x.dim() >= 2
+    and x.dtype == weight.dtype
+    and weight.is_contiguous()
+    and _can_view_as_2d(x)
+    and _is_oink_stride_compatible_2d(x.view(-1, x.shape[-1]))
+)
+"""
+Oink rms only supports 2d-like inputs with contiguous weight 
+and no variance_size override.
+"""
+
+
+@ir.ops.rms_norm.register_impl(
+    "oink", supports_args=oink_rms_supported, supported=has_oink_op("rmsnorm")
+)
+def rms_norm(
+    x: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> Tensor:
+    assert variance_size is None
+    x_2d = x.view(-1, x.shape[-1])
+    return torch.ops.oink.rmsnorm(x_2d, weight, epsilon).view_as(x)
+
+
+oink_add_rms_supported = (
+    lambda x, x_residual, weight, epsilon, variance_size=None: variance_size is None
+    and weight is not None
+    and x.dim() >= 2
+    and x.dtype == weight.dtype
+    and weight.is_contiguous()
+    and _can_view_as_2d(x)
+    and _is_oink_stride_compatible_2d(x.view(-1, x.shape[-1]))
+    # residual must have 2d-compatible strides and match x shape/dtype
+    and x.dtype == x_residual.dtype
+    and x.shape == x_residual.shape
+    and _can_view_as_2d(x_residual)
+    and _is_oink_stride_compatible_2d(x_residual.view(-1, x_residual.shape[-1]))
+)
+"""
+Oink fused_add_rms_norm has the same constraints as rms_norm,
+and residual must be 2d-like with compatible strides.
+"""
+
+
+@ir.ops.fused_add_rms_norm.register_impl(
+    "oink",
+    supports_args=oink_add_rms_supported,
+    supported=has_oink_op("fused_add_rms_norm"),
+    inplace=True,
+)
+def fused_add_rms_norm(
+    x: Tensor,
+    x_residual: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> tuple[Tensor, Tensor]:
+    assert variance_size is None
+    x_2d = x.view(-1, x.shape[-1])
+    residual_2d = x_residual.view(-1, x_residual.shape[-1])
+    torch.ops.oink.fused_add_rms_norm(x_2d, residual_2d, weight, epsilon)
+    return x, x_residual
diff --git a/vllm/kernels/triton/__init__.py b/vllm/kernels/triton/__init__.py
new file mode 100644
index 000000000000..6626213145ca
--- /dev/null
+++ b/vllm/kernels/triton/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Triton kernel implementations."""
diff --git a/vllm/kernels/triton/qkv_padded_fp8_quant.py b/vllm/kernels/triton/qkv_padded_fp8_quant.py
new file mode 100644
index 000000000000..74dfe1043638
--- /dev/null
+++ b/vllm/kernels/triton/qkv_padded_fp8_quant.py
@@ -0,0 +1,180 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Stride-aware FP8 quantization with head_dim padding for ViT attention.
+
+Reads directly from non-contiguous QKV views using 3D strides and pads
+head_dim to a multiple of 16 for cuDNN compatibility.
+"""
+
+import torch
+
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    get_fp8_min_max,
+)
+from vllm.platforms import current_platform
+from vllm.triton_utils import HAS_TRITON, tl, triton
+from vllm.utils.math_utils import round_up
+
+_FP8_MIN, _FP8_MAX = get_fp8_min_max()
+
+
+@triton.jit
+def _quantize_pad_fp8_kernel(
+    x_ptr,
+    y_ptr,
+    scale_ptr,
+    stride_xs,
+    stride_xh,
+    stride_xd,
+    stride_ys,
+    stride_yh,
+    stride_yd,
+    num_heads,
+    n_rows,
+    n_cols,
+    n_cols_padded,
+    fp8_min,
+    fp8_max,
+    SKIP_SCALE: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_N: tl.constexpr,
+):
+    pid_m = tl.program_id(0)
+    pid_n = tl.program_id(1)
+
+    offs_m = pid_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    offs_n = pid_n * BLOCK_N + tl.arange(0, BLOCK_N)
+    mask_m = offs_m < n_rows
+    mask_out = mask_m[:, None] & (offs_n[None, :] < n_cols_padded)
+    mask_in = mask_m[:, None] & (offs_n[None, :] < n_cols)
+
+    # Decompose flattened row into (token, head) for 3D stride indexing.
+    s = offs_m // num_heads
+    h = offs_m % num_heads
+
+    x_ptrs = (
+        x_ptr
+        + s[:, None] * stride_xs
+        + h[:, None] * stride_xh
+        + offs_n[None, :] * stride_xd
+    )
+    x = tl.load(x_ptrs, mask=mask_in, other=0.0).to(tl.float32)
+    if SKIP_SCALE:
+        x_q = x
+    else:
+        scale = tl.load(scale_ptr)
+        x_q = x / scale
+    x_q = tl.clamp(x_q, fp8_min, fp8_max).to(y_ptr.dtype.element_ty)
+
+    y_ptrs = (
+        y_ptr
+        + s[:, None] * stride_ys
+        + h[:, None] * stride_yh
+        + offs_n[None, :] * stride_yd
+    )
+    tl.store(y_ptrs, x_q, mask=mask_out)
+
+
+def _get_fp8_pad_quant_config(padded_head_dim: int) -> tuple[int, int, int]:
+    block_n = triton.next_power_of_2(padded_head_dim)
+    block_n = max(16, min(block_n, 128))
+    block_m = 16
+    num_warps = 4
+    return block_m, block_n, num_warps
+
+
+def quantize_fp8_pad_head_dim_triton(
+    tensor: torch.Tensor,
+    scale: torch.Tensor,
+    skip_scale: bool = False,
+    block_m: int | None = None,
+    block_n: int | None = None,
+    num_warps: int | None = None,
+) -> torch.Tensor:
+    """Quantize a 3D/4D tensor to FP8, padding head_dim to a multiple of 16.
+
+    Reads directly from the input using its 3D strides, so non-contiguous
+    views (e.g. Q/K/V slices from an interleaved QKV buffer) are handled
+    without an extra copy.  Output is always a fresh contiguous tensor
+    with shape (S, H, padded_D).
+    """
+    if not HAS_TRITON:
+        raise RuntimeError("Triton is required to quantize with head_dim padding.")
+
+    original_shape = tensor.shape
+    if tensor.dim() == 4:
+        tensor = tensor.view(-1, tensor.shape[-2], tensor.shape[-1])
+    assert tensor.dim() == 3, f"Expected 3D input (S, H, D), got {tensor.dim()}D"
+    S, H, D = tensor.shape
+    padded_head_dim = round_up(D, 16)
+    out_dtype = current_platform.fp8_dtype()
+    output = torch.empty(
+        (S, H, padded_head_dim),
+        device=tensor.device,
+        dtype=out_dtype,
+    )
+
+    scale_1d = scale.reshape(-1)
+    n_rows = S * H
+
+    if block_m is None or block_n is None or num_warps is None:
+        block_m, block_n, num_warps = _get_fp8_pad_quant_config(padded_head_dim)
+
+    grid = (
+        triton.cdiv(n_rows, block_m),
+        triton.cdiv(padded_head_dim, block_n),
+    )
+
+    _quantize_pad_fp8_kernel[grid](
+        tensor,
+        output,
+        scale_1d,
+        tensor.stride(0),
+        tensor.stride(1),
+        tensor.stride(2),
+        output.stride(0),
+        output.stride(1),
+        output.stride(2),
+        H,
+        n_rows,
+        D,
+        padded_head_dim,
+        _FP8_MIN,
+        _FP8_MAX,
+        SKIP_SCALE=skip_scale,
+        BLOCK_M=block_m,
+        BLOCK_N=block_n,
+        num_warps=num_warps,
+    )
+
+    return output.view((*original_shape[:-1], padded_head_dim))
+
+
+def quantize_fp8_maybe_pad_head_dim(
+    tensor: torch.Tensor,
+    scale: torch.Tensor,
+    fp8_quant: QuantFP8,
+    skip_scale: bool = False,
+) -> torch.Tensor:
+    """Quantize a 3D/4D tensor to FP8, padding head_dim to a multiple of 16
+    only when needed.
+
+    Accepts (S, H, D) or (B, S, H, D) input. Uses ``fp8_quant`` (a
+    :class:`QuantFP8` CustomOp) when head_dim is already aligned to 16
+    (no padding); otherwise falls back to a stride-aware Triton kernel
+    that pads head_dim to a multiple of 16.
+    """
+    head_dim = tensor.shape[-1]
+    if head_dim % 16 != 0:
+        return quantize_fp8_pad_head_dim_triton(tensor, scale, skip_scale=skip_scale)
+
+    if skip_scale:
+        return tensor.to(current_platform.fp8_dtype())
+
+    # QuantFP8 expects 2D: flatten all dims except (H, D).
+    orig_shape = tensor.shape
+    total_tokens = tensor.numel() // (orig_shape[-1] * orig_shape[-2])
+    tensor_2d = tensor.reshape(total_tokens, -1)
+    fp8_tensor, _ = fp8_quant(tensor_2d, scale=scale)
+    return fp8_tensor.reshape(orig_shape)
diff --git a/vllm/kernels/vllm_c.py b/vllm/kernels/vllm_c.py
new file mode 100644
index 000000000000..3b194b2ab935
--- /dev/null
+++ b/vllm/kernels/vllm_c.py
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch import Tensor
+
+from vllm import ir
+from vllm.platforms import current_platform
+
+current_platform.import_kernels()
+
+CUDA_ALIKE = current_platform.is_cuda_alike()
+"""Most kernels in this file are supported on all CUDA-alike platforms."""
+IS_ROCM = current_platform.is_rocm()
+"""ROCm needs shape normalization before calling some vLLM C kernels."""
+
+rms_no_var_size = lambda x, weight, epsilon, variance_size=None: (
+    variance_size is None and (weight is None or weight.dtype == x.dtype)
+)
+"""vLLM kernel requires no variance_size override and matching input/weight dtype."""
+
+
+@ir.ops.rms_norm.register_impl(
+    "vllm_c", supports_args=rms_no_var_size, supported=CUDA_ALIKE
+)
+def rms_norm(
+    x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None
+) -> Tensor:
+    if weight is None:
+        # Kernel requires weight tensor, pass ones
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+    assert variance_size is None
+    # ROCm's vLLM C RMSNorm kernel operates on contiguous 2D tensors.
+    # Higher-rank callers still normalize over the last dimension, so flatten
+    # all leading dims. reshape handles strided views from q/k/v splits.
+    if IS_ROCM and (x.dim() > 2 or not x.is_contiguous()):
+        original_shape = x.shape
+        x = x.reshape(-1, original_shape[-1])
+        output = torch.empty_like(x)
+        torch.ops._C.rms_norm(output, x, weight, epsilon)
+        return output.reshape(original_shape)
+
+    output = torch.empty(x.shape, device=x.device, dtype=x.dtype)
+    torch.ops._C.rms_norm(output, x, weight, epsilon)
+    return output
+
+
+rms_add_no_var_size = lambda x, x_residual, weight, epsilon, variance_size=None: (
+    variance_size is None and (weight is None or weight.dtype == x.dtype)
+)
+"""vLLM Kernel does not support variance_size parameter and requires
+matching input/weight dtype."""
+
+
+@ir.ops.fused_add_rms_norm.register_impl(
+    "vllm_c",
+    supports_args=rms_add_no_var_size,
+    supported=CUDA_ALIKE,
+    inplace=True,
+)
+def fused_add_rms_norm(
+    x: Tensor,
+    x_residual: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> tuple[Tensor, Tensor]:
+    if weight is None:
+        # Kernel requires weight tensor, pass ones
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+
+    assert variance_size is None
+    if IS_ROCM and (not x.is_contiguous() or not x_residual.is_contiguous()):
+        output, residual = ir.ops.fused_add_rms_norm.impls["native"].impl_fn(
+            x, x_residual, weight, epsilon
+        )
+        x.copy_(output)
+        x_residual.copy_(residual)
+        return x, x_residual
+
+    # ROCm's vLLM C RMSNorm kernel operates on contiguous 2D tensors.
+    # Higher-rank callers still normalize over the last dimension, so flatten
+    # all leading dims.
+    if IS_ROCM and x.dim() > 2:
+        original_shape = x.shape
+        x = x.view(-1, original_shape[-1])
+        x_residual = x_residual.view(-1, original_shape[-1])
+        torch.ops._C.fused_add_rms_norm(x, x_residual, weight, epsilon)
+        return x.view(original_shape), x_residual.view(original_shape)
+
+    torch.ops._C.fused_add_rms_norm(x, x_residual, weight, epsilon)
+    return x, x_residual
diff --git a/vllm/kernels/xpu_ops.py b/vllm/kernels/xpu_ops.py
new file mode 100644
index 000000000000..5e7f90f70868
--- /dev/null
+++ b/vllm/kernels/xpu_ops.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch import Tensor
+
+from vllm import ir
+from vllm.platforms import current_platform
+
+current_platform.import_kernels()
+
+
+def is_xpu_kernels_found() -> bool:
+    from importlib.util import find_spec
+
+    return find_spec("vllm_xpu_kernels") is not None
+
+
+XPU_KERNELS_SUPPORTED = is_xpu_kernels_found()
+"""Kernels in this file are supported if vLLM XPU kernels are installed."""
+
+rms_no_var = lambda x, weight, epsilon, variance_size=None: variance_size is None and (
+    weight is None or weight.dtype == x.dtype
+)
+
+
+@ir.ops.rms_norm.register_impl(
+    "xpu_kernels", supports_args=rms_no_var, supported=XPU_KERNELS_SUPPORTED
+)
+def rms_norm(
+    x: Tensor, weight: Tensor | None, epsilon: float, variance_size: int | None = None
+) -> Tensor:
+    if weight is None:
+        # Kernel requires weight tensor, pass ones
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+    assert variance_size is None
+    output = torch.empty(x.shape, device=x.device, dtype=x.dtype)
+    torch.ops._C.rms_norm(output, x, weight, epsilon)
+    return output
+
+
+rms_add_no_var_size = (
+    lambda x, x_residual, weight, epsilon, variance_size=None: variance_size is None
+    and (weight is None or weight.dtype == x.dtype)
+)
+
+
+@ir.ops.fused_add_rms_norm.register_impl(
+    "xpu_kernels",
+    supports_args=rms_add_no_var_size,
+    supported=XPU_KERNELS_SUPPORTED,
+    inplace=True,
+)
+def fused_add_rms_norm(
+    x: Tensor,
+    x_residual: Tensor,
+    weight: Tensor | None,
+    epsilon: float,
+    variance_size: int | None = None,
+) -> tuple[Tensor, Tensor]:
+    if weight is None:
+        # Kernel requires weight tensor, pass ones
+        weight = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
+
+    assert variance_size is None
+    torch.ops._C.fused_add_rms_norm(x, x_residual, weight, epsilon)
+    return x, x_residual
diff --git a/vllm/logging_utils/__init__.py b/vllm/logging_utils/__init__.py
index 94dee07ed8ca..b83d499dbbb8 100644
--- a/vllm/logging_utils/__init__.py
+++ b/vllm/logging_utils/__init__.py
@@ -8,6 +8,7 @@
 from vllm.logging_utils.formatter import ColoredFormatter, NewLineFormatter
 from vllm.logging_utils.lazy import lazy
 from vllm.logging_utils.log_time import logtime
+from vllm.logging_utils.torch_tensor import tensors_str_no_data
 
 __all__ = [
     "NewLineFormatter",
@@ -16,4 +17,5 @@
     "create_uvicorn_log_config",
     "lazy",
     "logtime",
+    "tensors_str_no_data",
 ]
diff --git a/vllm/logging_utils/torch_tensor.py b/vllm/logging_utils/torch_tensor.py
new file mode 100644
index 000000000000..7af4326ba311
--- /dev/null
+++ b/vllm/logging_utils/torch_tensor.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any
+
+
+def tensors_str_no_data(arg: Any):
+    from torch._tensor_str import printoptions
+
+    with printoptions(threshold=1, edgeitems=0):
+        return str(arg)
diff --git a/vllm/lora/layers/__init__.py b/vllm/lora/layers/__init__.py
index 235f40b73852..1f3fdea2cdaf 100644
--- a/vllm/lora/layers/__init__.py
+++ b/vllm/lora/layers/__init__.py
@@ -13,7 +13,6 @@
     QKVParallelLinearWithShardedLoRA,
 )
 from vllm.lora.layers.fused_moe import FusedMoE3DWithLoRA, FusedMoEWithLoRA
-from vllm.lora.layers.gate_linear import GateLinearWithLoRA
 from vllm.lora.layers.logits_processor import LogitsProcessorWithLoRA
 from vllm.lora.layers.replicated_linear import ReplicatedLinearWithLoRA
 from vllm.lora.layers.row_parallel_linear import (
@@ -39,7 +38,6 @@
     "RowParallelLinearWithLoRA",
     "RowParallelLinearWithShardedLoRA",
     "ReplicatedLinearWithLoRA",
-    "GateLinearWithLoRA",
     "LoRAMapping",
     "LoRAMappingType",
     "FusedMoEWithLoRA",
diff --git a/vllm/lora/layers/base_linear.py b/vllm/lora/layers/base_linear.py
index 1b666dcb790c..68783ae50d4b 100644
--- a/vllm/lora/layers/base_linear.py
+++ b/vllm/lora/layers/base_linear.py
@@ -5,8 +5,15 @@
 import torch
 from transformers import PretrainedConfig
 
+from vllm import envs
+from vllm.config import get_current_vllm_config
 from vllm.config.lora import LoRAConfig
 from vllm.distributed.utils import divide
+from vllm.forward_context import (
+    ForwardContext,
+    get_forward_context,
+    is_forward_context_available,
+)
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     LinearBase,
@@ -14,24 +21,88 @@
     RowParallelLinear,
 )
 from vllm.platforms import current_platform
+from vllm.utils.multi_stream_utils import maybe_execute_in_parallel
+from vllm.utils.torch_utils import direct_register_custom_op
 
 from .base import BaseLayerWithLoRA
 from .utils import _get_lora_device
 
+if envs.VLLM_LORA_ENABLE_DUAL_STREAM:
+    _lora_aux_cuda_stream: torch.cuda.Stream | None = None
+
+    def _get_lora_aux_cuda_stream() -> torch.cuda.Stream | None:
+        global _lora_aux_cuda_stream
+        if _lora_aux_cuda_stream is None and current_platform.is_cuda_alike():
+            _lora_aux_cuda_stream = torch.cuda.Stream()
+        return _lora_aux_cuda_stream
+
+    def lora_linear_async(
+        layer_name: str,
+        output_size: int,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        forward_context: ForwardContext = get_forward_context()
+        self = forward_context.no_compile_layers[layer_name]
+        return self._apply_async_impl(x, bias)
+
+    def lora_linear_async_fake(
+        layer_name: str,
+        output_size: int,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        # The real function reshapes output back to the original 3D shape
+        # when the input has an extra batch dimension (transformers backend).
+        if x.ndim == 3:
+            return torch.empty(
+                (x.size(0), x.size(1), output_size),
+                device=x.device,
+                dtype=x.dtype,
+            )
+        return torch.empty(
+            (x.size(0), output_size),
+            device=x.device,
+            dtype=x.dtype,
+        )
+
+    direct_register_custom_op(
+        op_name="lora_linear_async",
+        op_func=lora_linear_async,
+        fake_impl=lora_linear_async_fake,
+    )
+
 
 class BaseLinearLayerWithLoRA(BaseLayerWithLoRA):
     def __init__(self, base_layer: LinearBase):
         super().__init__()
+
+        self._enable_aux_cuda_stream = envs.VLLM_LORA_ENABLE_DUAL_STREAM
         self.base_layer = base_layer
         self.input_size = self.base_layer.input_size
         # Ensure tp_size and tp_rank consistency with the base_layer.
         self.tp_size = self.base_layer.tp_size
         self.tp_rank = self.base_layer.tp_rank
         self.device = _get_lora_device(self.base_layer)
+        self._init_lora_stream_context()
         self.output_slices: tuple[int, ...]
         self.output_size: int
         self.n_slices: int
 
+    def _init_lora_stream_context(self) -> None:
+        if not self._enable_aux_cuda_stream:
+            return
+        vllm_config = get_current_vllm_config()
+        self._lora_stream = _get_lora_aux_cuda_stream()
+        assert current_platform.is_cuda_alike()
+        self._events = [torch.cuda.Event(), torch.cuda.Event()]
+        # lora_linear avoids prefix conflicts with the base layer
+        self.layer_name = self.base_layer.prefix + ".lora_linear_async"
+        compilation_config = vllm_config.compilation_config
+        if self.layer_name in compilation_config.static_forward_context:
+            raise ValueError("Duplicate layer name: {}".format(self.layer_name))
+        compilation_config.static_forward_context[self.layer_name] = self
+
     def create_lora_weights(
         self,
         max_loras: int,
@@ -39,7 +110,6 @@ def create_lora_weights(
         model_config: PretrainedConfig | None = None,
     ) -> None:
         self.lora_config = lora_config
-        #
         if isinstance(self.base_layer, ReplicatedLinear):
             lora_a_out_size = lora_config.max_lora_rank
             lora_b_out_size = self.output_size
@@ -120,8 +190,29 @@ def set_lora(
         )
 
     def apply(self, x: torch.Tensor, bias: torch.Tensor | None = None) -> torch.Tensor:
+        # is_forward_context_available for tower modules
+        if self._enable_aux_cuda_stream and is_forward_context_available():
+            output_size = sum(self.output_slices)
+            return torch.ops.vllm.lora_linear_async(
+                self.layer_name, output_size, x, bias
+            )
+        else:
+            return self._apply_sync(x, bias)
+
+    def _apply_sync(
+        self, x: torch.Tensor, bias: torch.Tensor | None = None
+    ) -> torch.Tensor:
         output = self.base_layer.quant_method.apply(self.base_layer, x, bias)
+        return self._apply_lora_to_output(x, output)
 
+    def _apply_base_forward(self, x: torch.Tensor) -> torch.Tensor:
+        base_output = self.base_layer(x)
+        output = base_output[0] if isinstance(base_output, tuple) else base_output
+        return self._apply_lora_to_output(x, output)
+
+    def _apply_lora_to_output(
+        self, x: torch.Tensor, output: torch.Tensor
+    ) -> torch.Tensor:
         original_shape = output.shape if output.ndim == 3 else None
 
         # In transformers backend, x and output have extra batch dimension like
@@ -144,6 +235,72 @@ def apply(self, x: torch.Tensor, bias: torch.Tensor | None = None) -> torch.Tens
 
         return output
 
+    def _apply_async_impl(
+        self, x: torch.Tensor, bias: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        """
+        Forward pass with base linear and LoRA on separate CUDA streams
+        for overlap, using maybe_execute_in_parallel.
+        Base layer runs on default stream; LoRA runs on aux stream.
+        """
+        assert envs.VLLM_LORA_ENABLE_DUAL_STREAM
+        assert x.ndim in (2, 3)
+        num_tokens = x.size(0) if x.ndim == 2 else x.size(1)
+        output_size = sum(self.output_slices)
+
+        def base_fn() -> torch.Tensor:
+            return self.base_layer.quant_method.apply(self.base_layer, x, bias)
+
+        def lora_fn() -> torch.Tensor:
+            # Must be zeros, not empty: _lora_expand_kernel exits early (without
+            # writing) when lora_id == -1 (no active LoRA). If uninitialized,
+            # output.add_(lora_result) below would corrupt the base output.
+            lora_output = torch.zeros(
+                (num_tokens, output_size),
+                device=self.device,
+                dtype=x.dtype,
+            )
+
+            # Flatten the batch dimension for the transformers backend
+            # (which uses shape (1, seq_len, hidden)), matching _apply_sync.
+            x_2d = x.flatten(0, 1) if x.ndim == 3 else x
+            self.punica_wrapper.add_lora_linear(
+                lora_output,
+                x_2d,
+                self.lora_a_stacked,
+                self.lora_b_stacked,
+                1.0,
+                self.output_slices,
+                add_inputs=False,
+            )
+            return lora_output
+
+        output, lora_result = maybe_execute_in_parallel(
+            base_fn,
+            lora_fn,
+            self._events[0],
+            self._events[1],
+            self._lora_stream,
+        )
+
+        original_shape = output.shape if output.ndim == 3 else None
+
+        # In transformers backend, x and output have extra batch dimension like
+        # (1, seq_len, hidden_dim), while punica expects (seq_len, hidden_dim),
+        # therefore we need to flatten the batch dimensions.
+        if x.ndim == 3 and output.ndim == 3:
+            output = output.flatten(0, 1)
+            x = x.flatten(0, 1)
+
+        output.add_(lora_result)
+
+        # Reshape the flattened output back to its original shape,
+        # as some MM encoders cannot handle flattened inputs.
+        if original_shape is not None:
+            output = output.reshape(original_shape)
+
+        return output
+
     @property
     def weight(self) -> torch.Tensor:
         # unquantizedLinear
diff --git a/vllm/lora/layers/column_parallel_linear.py b/vllm/lora/layers/column_parallel_linear.py
index f49a3fcbb941..8a86191b8918 100644
--- a/vllm/lora/layers/column_parallel_linear.py
+++ b/vllm/lora/layers/column_parallel_linear.py
@@ -40,11 +40,19 @@ def _mcp_apply(x, bias, layer: "ColumnParallelLinearWithLoRA"):
 
     # Since communication is needed, the buffer is directly initialized as a
     # tensor rather than a tuple of tensor.
-    buffers = torch.zeros(
-        (layer.n_slices, x.shape[0], layer.lora_a_stacked[0].shape[2]),
+    local_lora_rank = layer.lora_a_stacked[0].shape[2]
+    buffer_shape = (layer.n_slices, x.shape[0], local_lora_rank)
+    # Under torch.compile, the local-rank-1 fully-sharded path can otherwise
+    # get lowered to a reinterpret view with a non-canonical layout. The
+    # Triton shrink op mutates this buffer in place and expects the standard
+    # contiguous [slice, token, rank] stride contract.
+    buffers = torch.empty_strided(
+        buffer_shape,
+        (x.shape[0] * local_lora_rank, local_lora_rank, 1),
         dtype=torch.float32,
         device=x.device,
     )
+    buffers.zero_()
 
     shrunk_buffers: torch.Tensor | None = layer.punica_wrapper.add_shrink(
         buffers, x, layer.lora_a_stacked, 1.0
@@ -86,7 +94,7 @@ def __init__(self, base_layer: ColumnParallelLinear) -> None:
         # The base_layer type is ColumnParallelLinear or
         # MergedColumnParallelLinear, their weight sharding logic is
         # inconsistent when TP is greater than 1.
-        self.is_merged_col_linear = type(base_layer) is MergedColumnParallelLinear
+        self.is_merged_col_linear = isinstance(base_layer, MergedColumnParallelLinear)
         self.output_size = self.base_layer.output_size_per_partition
         # There is only one LoRA layer
         self.n_slices = 1
@@ -158,7 +166,7 @@ def can_replace_layer(
     ) -> bool:
         if type(source_layer) is maybe_get_oot_by_class(ColumnParallelLinear):
             return True
-        if type(source_layer) is maybe_get_oot_by_class(MergedColumnParallelLinear):
+        if isinstance(source_layer, maybe_get_oot_by_class(MergedColumnParallelLinear)):
             if len(packed_modules_list) != 1:
                 return False
             # Exclude layers with 3+ output sizes - those are handled by
@@ -187,9 +195,9 @@ def __init__(
         # There are two LoRA layers
         # the output_sizes in MergedColumnParallelLinear is not sharded by tp
         # we need to divide it by the tp_size to get correct slices size
-        output_sizes = self.base_layer.output_sizes
+        self.output_sizes = self.base_layer.output_sizes
         self.output_slices = tuple(
-            divide(output_size, self.tp_size) for output_size in output_sizes
+            divide(output_size, self.tp_size) for output_size in self.output_sizes
         )
         self.n_slices = len(self.output_slices)
         self.output_ids = (self.tp_rank,) * self.n_slices
@@ -253,6 +261,42 @@ def slice_lora_b(
                 ]
         return sliced_lora_b
 
+    def expand_packed_lora(
+        self,
+        lora_a: list[torch.Tensor],
+        lora_b: list[torch.Tensor],
+    ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
+        """
+        Expand packed adapter groups when they don't match n_slices.
+        E.g. in_proj_qkv (covers Q+K+V) + in_proj_z
+        """
+        expanded_a: list[torch.Tensor] = []
+        expanded_b: list[torch.Tensor] = []
+        start_idx = 0
+        for a_i, b_i in zip(lora_a, lora_b):
+            # Determine which output slices this b_i covers.
+            b_rows, cu_rows, covered = b_i.shape[0], 0, 0
+            for i in range(start_idx, self.n_slices):
+                cu_rows += self.output_sizes[i]
+                if cu_rows == b_rows:
+                    covered = i - start_idx + 1
+                    break
+            else:
+                raise ValueError(
+                    f"Cannot determine how to split lora_b with {b_rows} rows "
+                    f"into {self.n_slices} slices with output sizes "
+                    f"{self.output_sizes} starting from index {start_idx}."
+                )
+            # Split b_i into per-slice tensors and replicate a_i for each.
+            start = 0
+            for j in range(covered):
+                size = self.output_sizes[start_idx + j]
+                expanded_b.append(b_i[start : start + size, :])
+                expanded_a.append(a_i)
+                start += size
+            start_idx += covered
+        return expanded_a, expanded_b
+
     def set_lora(
         self,
         index: int,
@@ -261,6 +305,12 @@ def set_lora(
     ):
         self.reset_lora(index)
 
+        # Expand packed adapter groups when they don't match n_slices.
+        # E.g. in_proj_qkv (covers Q+K+V) + in_proj_z as 2 groups for a
+        # 4-slice layer: split b_qkv by output_sizes and replicate a_qkv.
+        if isinstance(lora_b, list) and len(lora_b) != self.n_slices:
+            lora_a, lora_b = self.expand_packed_lora(lora_a, lora_b)
+
         if self.tp_size > 1:
             lora_a = self.slice_lora_a(lora_a)
             lora_b = self.slice_lora_b(lora_b)
@@ -275,19 +325,41 @@ def set_lora(
                     index, 0, : lora_b_i.shape[0], : lora_b_i.shape[1]
                 ].copy_(lora_b_i, non_blocking=True)
 
+    def apply(self, x: torch.Tensor, bias: torch.Tensor | None = None) -> torch.Tensor:
+        merged_cls = maybe_get_oot_by_class(MergedColumnParallelLinear)
+        # Effectively unsharded subclasses can safely reuse their custom
+        # forward() implementation before applying the LoRA delta.
+        if (
+            self.tp_size == 1
+            and type(self.base_layer) is not merged_cls
+            and type(self.base_layer).forward is not merged_cls.forward
+        ):
+            return self._apply_base_forward(x)
+        return _mcp_apply(x, bias, self)
+
     @classmethod
-    @_not_fully_sharded_can_replace
     def can_replace_layer(
         cls,
         source_layer: nn.Module,
         lora_config: LoRAConfig,
         packed_modules_list: list,
         model_config: PretrainedConfig | None = None,
+        decorate: bool = True,
     ) -> bool:
-        return (
-            type(source_layer) is MergedColumnParallelLinear
-            and len(packed_modules_list) == 2
-        )
+        merged_cls = maybe_get_oot_by_class(MergedColumnParallelLinear)
+        if not isinstance(source_layer, merged_cls) or len(packed_modules_list) != 2:
+            return False
+
+        tp_size = getattr(source_layer, "tp_size", 1)
+        if type(source_layer) is merged_cls:
+            if not decorate:
+                return True
+            return not lora_config.fully_sharded_loras or tp_size == 1
+
+        # Only support effectively unsharded subclasses here. Sharded
+        # subclasses may have custom communication semantics that the generic
+        # merged-column LoRA path does not know how to preserve.
+        return tp_size == 1
 
 
 class QKVParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
@@ -350,7 +422,10 @@ def can_replace_layer(
         packed_modules_list: list,
         model_config: PretrainedConfig | None = None,
     ) -> bool:
-        return type(source_layer) is QKVParallelLinear and len(packed_modules_list) == 1
+        return (
+            type(source_layer) is maybe_get_oot_by_class(QKVParallelLinear)
+            and len(packed_modules_list) == 1
+        )
 
 
 class MergedQKVParallelLinearWithLoRA(MergedColumnParallelLinearWithLoRA):
@@ -408,7 +483,10 @@ def can_replace_layer(
         packed_modules_list: list,
         model_config: PretrainedConfig | None = None,
     ) -> bool:
-        return type(source_layer) is QKVParallelLinear and len(packed_modules_list) == 3
+        return (
+            type(source_layer) is maybe_get_oot_by_class(QKVParallelLinear)
+            and len(packed_modules_list) == 3
+        )
 
 
 # These following layers are based on the tensor parallelism strategy given in
@@ -467,18 +545,14 @@ class MergedColumnParallelLinearWithShardedLoRA(MergedColumnParallelLinearWithLo
     def slice_lora_a(
         self, lora_a: list[torch.Tensor | None]
     ) -> list[torch.Tensor | None]:
-        # NOTE: lora_a contains 2 subloras, and each sublora could be None.
         output_shard_size = self.lora_a_stacked[0].shape[2]
         output_start_idx = self.tp_rank * output_shard_size
-        lora_a = [
-            lora_a[0][output_start_idx : output_start_idx + output_shard_size, :]
-            if lora_a[0] is not None
-            else None,
-            lora_a[1][output_start_idx : output_start_idx + output_shard_size, :]
-            if lora_a[1] is not None
-            else None,
+        return [
+            lora_a_i[output_start_idx : output_start_idx + output_shard_size, :]
+            if (lora_a_i := lora_a[i]) is not None
+            else None
+            for i in range(len(lora_a))
         ]
-        return lora_a
 
     def apply(self, x: torch.Tensor, bias: torch.Tensor | None = None) -> torch.Tensor:
         return _mcp_apply(x, bias, self)
@@ -607,7 +681,9 @@ def can_replace_layer(
     ) -> bool:
         # Support MergedColumnParallelLinear with 3 or more slices
         # (2 slices are handled by MergedColumnParallelLinearWithLoRA)
-        if type(source_layer) is not maybe_get_oot_by_class(MergedColumnParallelLinear):
+        if not isinstance(
+            source_layer, maybe_get_oot_by_class(MergedColumnParallelLinear)
+        ):
             return False
 
         # If packed_modules_list has 3+ items, use this class
diff --git a/vllm/lora/layers/fused_moe.py b/vllm/lora/layers/fused_moe.py
index a8bc559ce747..dc83143a751a 100644
--- a/vllm/lora/layers/fused_moe.py
+++ b/vllm/lora/layers/fused_moe.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import functools
 
 import torch
 import torch.nn as nn
@@ -8,346 +7,80 @@
 
 from vllm import envs
 from vllm.config.lora import LoRAConfig
-from vllm.distributed.parallel_state import (
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-)
 from vllm.distributed.utils import divide
 from vllm.lora.layers.base import BaseLayerWithLoRA
-from vllm.lora.ops.triton_ops.utils import get_lora_op_configs
+from vllm.model_executor.custom_op import maybe_get_oot_by_class
 from vllm.model_executor.layers.fused_moe import FusedMoE
-from vllm.model_executor.layers.fused_moe.config import (
-    _get_config_dtype_str,
-)
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
-    MarlinExperts,
-)
-from vllm.model_executor.layers.fused_moe.fused_moe import (
-    TritonExperts,
-)
+from vllm.model_executor.layers.fused_moe.experts.lora_context import MoELoRAContext
 from vllm.model_executor.layers.fused_moe.fused_moe_modular_method import (
     FusedMoEModularMethod,
 )
-from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
-    UnfusedOAITritonExperts,
-)
-from vllm.model_executor.layers.fused_moe.modular_kernel import (
-    FusedMoEKernel,
-)
+from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEKernel
 from vllm.model_executor.layers.fused_moe.prepare_finalize import (
     MoEPrepareAndFinalizeNoDPEPModular,
 )
 
-from .utils import _get_lora_device, try_get_optimal_moe_lora_config
+from .utils import _get_lora_device
 
 
 class FusedMoEWithLoRA(BaseLayerWithLoRA):
     def __init__(self, base_layer: FusedMoE) -> None:
         super().__init__()
         self.base_layer = base_layer
-
-        assert not self.base_layer.use_ep, (
-            "EP support for Fused MoE LoRA is not implemented yet."
-        )
-        self.tp_size = get_tensor_model_parallel_world_size()
-        self.tp_rank = get_tensor_model_parallel_rank()
+        self._ep_check()
+        # Use the MoE-aware TP rank/size: when EP is active, FusedMoE collapses
+        # moe_parallel_config.tp_size to 1 (experts are sharded across the
+        # TP group instead).
+        self.tp_size = self.base_layer.tp_size
+        self.tp_rank = self.base_layer.tp_rank
         self.device = _get_lora_device(base_layer)
         # For non-gated MoE (is_act_and_mul=False), only 1 slice is needed
         # since there's only up_proj (w1), not gate_proj + up_proj (w1 + w3)
         self._w13_slices = 2 if base_layer.moe_config.is_act_and_mul else 1
-        self._inject_lora_into_fused_moe()
-
-    def _normalize_keys(self, config: dict[str, int | None]) -> dict[str, int | None]:
-        normalized_config = {}
-        for key, value in config.items():
-            if key.islower():
-                if key.startswith("block_"):
-                    normalized_key = "BLOCK_SIZE_" + key.split("_")[-1].upper()
-                else:
-                    normalized_key = key.upper()
-            else:
-                normalized_key = key
-            normalized_config[normalized_key] = value
-        return normalized_config
-
-    def _get_lora_moe_configs(
-        self,
-        op_prefix: str,
-        num_loras: int,
-        rank: int,
-        num_slices: int,
-        M: int,
-        layer: FusedMoE,
-        top_k: int,
-        config_dtype: str,
-    ):
-        if envs.VLLM_TUNED_CONFIG_FOLDER:
-            hidden_size = layer.hidden_size
-            intermediate_size = (
-                self.w2_lora_a_stacked[0].shape[-1]
-                if op_prefix == "w2"
-                else self.w13_lora_b_stacked[0].shape[-2]
-            )
-            shrink_config = get_lora_op_configs(
-                op_type=f"fused_moe_lora_{op_prefix}_shrink",
-                max_loras=num_loras,
-                batch=M,
-                hidden_size=hidden_size,
-                rank=rank,
-                num_slices=num_slices,
-                moe_intermediate_size=intermediate_size,
-            )
-            expand_config = get_lora_op_configs(
-                op_type=f"fused_moe_lora_{op_prefix}_expand",
-                max_loras=num_loras,
-                batch=M,
-                hidden_size=hidden_size,  # lora_a_stacked.shape[-1],
-                rank=rank,
-                num_slices=num_slices,
-                moe_intermediate_size=intermediate_size,  # lora_b_stacked.shape[-2],
-            )
-        else:  # fall back to the default config
-            get_config_func = functools.partial(
-                try_get_optimal_moe_lora_config,
-                w1_shape=layer.w13_weight.shape,
-                w2_shape=layer.w2_weight.shape,
-                rank=rank,
-                top_k=top_k,
-                dtype=config_dtype,
-                M=M,
-                block_shape=layer.quant_method.moe_quant_config.block_shape,
-            )
-            shrink_config = get_config_func(
-                op_type=f"fused_moe_lora_{op_prefix}_shrink"
-            )
-            expand_config = get_config_func(
-                op_type=f"fused_moe_lora_{op_prefix}_expand"
-            )
-        shrink_config = self._normalize_keys(shrink_config)
-        expand_config = self._normalize_keys(expand_config)
-        return shrink_config, expand_config
-
-    def _inject_lora_into_fused_moe(self):
-        moe_state_dict = {}
-        top_k = self.base_layer.top_k
+        # Mirrors per-(lora_id) layout of `self.lora_a_stacked` (built in
+        # `create_lora_weights`) so `create_dummy_lora`'s n_slices fallback
+        # matches `lora_a_stacked` length under EP.
+        self.n_slices = base_layer.local_num_experts * (self._w13_slices + 1)
 
         self.base_layer.ensure_moe_quant_config_init()
-        quant_config = self.base_layer.quant_method.moe_quant_config
-
         if getattr(self.base_layer.quant_method, "supports_internal_mk", False):
-            # Use the existing modular kernel from the quant method
-            m_fused_moe_fn = self.base_layer.quant_method.moe_kernel
-            # Don't let the kernel own shared experts so the runner can
-            # overlap them with routed experts via a separate CUDA stream.
-            m_fused_moe_fn.shared_experts = None
+            moe_kernel = self.base_layer.quant_method.moe_kernel
         else:
-            # Create a new modular kernel via select_gemm_impl.
-            # Don't pass shared_experts to the kernel so the runner can
-            # overlap them with routed experts via a separate CUDA stream.
             prepare_finalize = MoEPrepareAndFinalizeNoDPEPModular()
-            m_fused_moe_fn = FusedMoEKernel(
+            moe_kernel = FusedMoEKernel(
                 prepare_finalize,
                 self.base_layer.quant_method.select_gemm_impl(
                     prepare_finalize, self.base_layer
                 ),
             )
-
-        if quant_config.use_mxfp4_w4a16:
-            assert isinstance(
-                m_fused_moe_fn.impl.fused_experts,
-                (MarlinExperts, UnfusedOAITritonExperts),
-            )
-        else:
-            assert isinstance(m_fused_moe_fn.impl.fused_experts, TritonExperts)
-
-        def fwd_decorator(layer, func):
-            def wrapper(*args, **kwargs):
-                moe_state_dict["hidden_states"] = kwargs["hidden_states"]
-                moe_state_dict["topk_ids"] = kwargs["topk_ids"]
-                moe_state_dict["topk_weights"] = kwargs["topk_weights"]
-                moe_state_dict["expert_map"] = kwargs["expert_map"]
-                moe_state_dict["apply_router_weight_on_input"] = kwargs[
-                    "apply_router_weight_on_input"
-                ]
-                result = func(*args, **kwargs)
-                return result
-
-            return wrapper
-
-        def act_decorator(layer, func):
-            def wrapper(*args, **kwargs):
-                _, output, input = args
-
-                hidden_states = moe_state_dict["hidden_states"]
-                topk_weights = moe_state_dict["topk_weights"]
-                curr_topk_ids = moe_state_dict["topk_ids"]
-
-                expert_map = moe_state_dict["expert_map"]
-
-                config_dtype = _get_config_dtype_str(
-                    dtype=hidden_states.dtype,
-                    use_fp8_w8a8=False,
-                    use_int8_w8a16=False,
-                    use_int4_w4a16=False,
-                )
-                num_tokens = hidden_states.size(0)
-                M = num_tokens
-                max_lora_rank = self.w13_lora_a_stacked[0].shape[-2]
-                shrink_config, expand_config = self._get_lora_moe_configs(
-                    op_prefix="w13",
-                    num_loras=self.max_loras,
-                    rank=max_lora_rank,
-                    num_slices=self._w13_slices,
-                    M=M,
-                    layer=layer,
-                    top_k=top_k,
-                    config_dtype=config_dtype,
-                )
-
-                # SPARSITY_FACTOR is a heuristic margin ensuring tokens * top_k
-                # activates only a small fraction of total experts * loras.
-                SPARSITY_FACTOR = 8
-                naive_block_assignment = (
-                    expert_map is None
-                    and num_tokens * top_k * SPARSITY_FACTOR
-                    <= self.base_layer.local_num_experts * self.max_loras
-                )
-
-                # get the block size of m from customized config or default config
-                (
-                    token_lora_mapping,
-                    sorted_token_ids_lora,
-                    expert_ids_lora,
-                    num_tokens_post_padded_lora,
-                ) = self.punica_wrapper.moe_lora_align_block_size(
-                    curr_topk_ids,
-                    num_tokens,
-                    shrink_config["BLOCK_SIZE_M"],
-                    self.base_layer.local_num_experts,
-                    self.max_loras,
-                    self.adapter_enabled,
-                    expert_map,
-                    naive_block_assignment=naive_block_assignment,
-                )
-
-                moe_state_dict["sorted_token_ids_lora"] = sorted_token_ids_lora
-                moe_state_dict["expert_ids_lora"] = expert_ids_lora
-                moe_state_dict["num_tokens_post_padded_lora"] = (
-                    num_tokens_post_padded_lora
-                )
-                moe_state_dict["token_lora_mapping"] = token_lora_mapping
-
-                if sorted_token_ids_lora is not None:
-                    expert_ids_lora = expert_ids_lora.view(self.max_loras, -1)
-                    sorted_token_ids_lora = sorted_token_ids_lora.view(
-                        self.max_loras, -1
-                    )
-                #
-
-                self.punica_wrapper.add_lora_fused_moe(
-                    input.view(-1, top_k, input.shape[-1]),
-                    hidden_states,
-                    self.w13_lora_a_stacked,
-                    self.w13_lora_b_stacked,
-                    topk_weights,
-                    sorted_token_ids_lora,
-                    expert_ids_lora,
-                    num_tokens_post_padded_lora,
-                    max_lora_rank,
-                    top_k,
-                    shrink_config,  ## pass the shrink config
-                    expand_config,  ## pass the expand config
-                    self.adapter_enabled,
-                    fully_sharded=self.fully_sharded,
-                    token_lora_mapping=token_lora_mapping,
-                )
-
-                result = func(*args, **kwargs)
-
-                moe_state_dict["intermediate_cache2"] = output
-                return result
-
-            return wrapper
-
-        def moe_sum_decorator(layer, func):
-            def wrapper(*args, **kwargs):
-                hidden_states = moe_state_dict["hidden_states"]
-                topk_weights = moe_state_dict["topk_weights"]
-
-                config_dtype = _get_config_dtype_str(
-                    dtype=hidden_states.dtype,
-                    use_fp8_w8a8=False,
-                    use_int8_w8a16=False,
-                    use_int4_w4a16=False,
-                )
-                num_tokens = hidden_states.size(0)
-                M = num_tokens
-                max_lora_rank = self.w2_lora_a_stacked[0].shape[-2]
-                shrink_config, expand_config = self._get_lora_moe_configs(
-                    op_prefix="w2",
-                    num_loras=self.max_loras,
-                    rank=max_lora_rank,
-                    num_slices=1,
-                    M=M,
-                    layer=layer,
-                    top_k=top_k,
-                    config_dtype=config_dtype,
-                )
-
-                sorted_token_ids_lora = moe_state_dict["sorted_token_ids_lora"]
-                expert_ids_lora = moe_state_dict["expert_ids_lora"]
-                num_tokens_post_padded_lora = moe_state_dict[
-                    "num_tokens_post_padded_lora"
-                ]
-                token_lora_mapping = moe_state_dict.get("token_lora_mapping")
-
-                if sorted_token_ids_lora is not None:
-                    expert_ids_lora = expert_ids_lora.view(self.max_loras, -1)
-                    sorted_token_ids_lora = sorted_token_ids_lora.view(
-                        self.max_loras, -1
-                    )
-                intermediate_cache2 = moe_state_dict["intermediate_cache2"]
-                intermediate_cache3 = args[0]
-
-                shard_size_w2 = divide(self.base_layer.hidden_size, self.tp_size)
-
-                self.punica_wrapper.add_lora_fused_moe(
-                    intermediate_cache3,
-                    intermediate_cache2,
-                    self.w2_lora_a_stacked,
-                    self.w2_lora_b_stacked,
-                    topk_weights,
-                    sorted_token_ids_lora,
-                    expert_ids_lora,
-                    num_tokens_post_padded_lora,
-                    max_lora_rank,
-                    top_k,
-                    shrink_config,  ## pass the shrink config
-                    expand_config,  ## pass the expand config
-                    self.adapter_enabled,
-                    True,
-                    fully_sharded=self.fully_sharded,
-                    offset=shard_size_w2 * self.tp_rank if self.fully_sharded else 0,
-                    token_lora_mapping=token_lora_mapping,
-                )
-
-                result = func(*args, **kwargs)
-                return result
-
-            return wrapper
-
-        fused_experts = m_fused_moe_fn.impl.fused_experts
-
-        m_fused_moe_fn.apply = fwd_decorator(self.base_layer, m_fused_moe_fn.apply)
-        fused_experts.activation = act_decorator(
-            self.base_layer, fused_experts.activation
-        )
-        fused_experts.moe_sum = moe_sum_decorator(
-            self.base_layer, fused_experts.moe_sum
+        assert moe_kernel.supports_lora(), (
+            f"{type(moe_kernel.fused_experts).__name__} does not support LoRA. "
+            "For unquantized MoE, set moe_backend='triton' or moe_backend='auto' "
+            "(auto selects Triton automatically when LoRA is enabled). "
+            "For quantized MoE, mix LoRAExpertsMixin into the experts class "
+            "and consume self._lora_context in apply()."
         )
-        # TODO(bnell): find a less intrusive way to handle this.
+        self._moe_kernel = moe_kernel
         self.base_layer._replace_quant_method(
-            FusedMoEModularMethod(self.base_layer.quant_method, m_fused_moe_fn)
+            FusedMoEModularMethod(self.base_layer.quant_method, moe_kernel)
+        )
+
+    def _build_lora_context(self):
+        return MoELoRAContext(
+            w13_lora_a_stacked=self.w13_lora_a_stacked,
+            w13_lora_b_stacked=self.w13_lora_b_stacked,
+            w2_lora_a_stacked=self.w2_lora_a_stacked,
+            w2_lora_b_stacked=self.w2_lora_b_stacked,
+            adapter_enabled=self.adapter_enabled,
+            max_loras=self.max_loras,
+            top_k=self.base_layer.top_k,
+            w13_num_slices=self._w13_slices,
+            fully_sharded=self.fully_sharded,
+            tp_rank=self.tp_rank,
+            tp_size=self.tp_size,
+            local_num_experts=self.base_layer.local_num_experts,
+            punica_wrapper=self.punica_wrapper,
+            use_tuned_config=bool(envs.VLLM_TUNED_CONFIG_FOLDER),
         )
 
     def _create_lora_a_weights(
@@ -412,6 +145,26 @@ def _create_lora_b_weights(self, max_loras: int, lora_config: LoRAConfig):
             ),
         )
 
+    def _ep_check(self):
+        if self.base_layer.use_ep:
+            moe_config = self.base_layer.moe_config
+            all2all_backend = moe_config.moe_parallel_config.all2all_backend
+            assert all2all_backend == "allgather_reducescatter", (
+                "Fused MoE LoRA with EP currently only supports "
+                f"all2all_backend='allgather_reducescatter', got '{all2all_backend}'."
+            )
+            assert not moe_config.moe_parallel_config.is_sequence_parallel
+
+    def _verify_ep_fs(self, lora_config: LoRAConfig):
+        # EP and fully_sharded LoRA both partition along the same TP group —
+        # EP on the expert dim, fully_sharded on the LoRA rank dim — with
+        # mutually contradictory assumptions about which rank holds which
+        # expert's rank-shard.
+        assert not (self.base_layer.use_ep and lora_config.fully_sharded_loras), (
+            "Fused MoE LoRA does not support enable_expert_parallel=True "
+            "together with fully_sharded_loras=True. Disable one of them."
+        )
+
     def create_lora_weights(
         self,
         max_loras: int,
@@ -419,6 +172,8 @@ def create_lora_weights(
         model_config: PretrainedConfig | None = None,
     ) -> None:
         """Initializes lora matrices."""
+
+        self._verify_ep_fs(lora_config)
         self.max_loras = lora_config.max_loras
         self.fully_sharded = lora_config.fully_sharded_loras
 
@@ -544,6 +299,10 @@ def set_lora(
 
         w1_lora_a, w2_lora_a, w3_lora_a = lora_a
         w1_lora_b, w2_lora_b, w3_lora_b = lora_b
+
+        # EP slicing is done once at add time in
+        # LoRAModelManager._slice_moe_lora_ep, so by here the cached
+        # tensors already match the local-expert dim of the stacked buffers.
         assert (
             num_experts
             == w1_lora_a.shape[0]
@@ -586,20 +345,25 @@ def set_lora(
             index, :, : sliced_w2_lora_b.shape[1], : sliced_w2_lora_b.shape[2]
         ].copy_(sliced_w2_lora_b, non_blocking=True)
 
+    def set_mapping(self, punica_wrapper):
+        super().set_mapping(punica_wrapper)
+        lora_context = self._build_lora_context()
+        self._moe_kernel.fused_experts.set_lora_context(lora_context)
+        prepare_finalize = self._moe_kernel.prepare_finalize
+        if hasattr(prepare_finalize, "set_lora_context"):
+            prepare_finalize.set_lora_context(lora_context)
+
     def forward(self, *args, **kwargs):
         return self.base_layer.forward(*args, **kwargs)
 
-    def maybe_all_reduce_tensor_model_parallel(self, *args, **kwargs):
-        return self.base_layer.maybe_all_reduce_tensor_model_parallel(*args, **kwargs)
-
-    @property
-    def _shared_experts(self):
-        return self.base_layer._shared_experts
-
     @property
     def quant_method(self):
         return self.base_layer.quant_method
 
+    @property
+    def runner(self):
+        return self.base_layer.runner
+
     @property
     def is_internal_router(self) -> bool:
         return self.base_layer.is_internal_router
@@ -614,8 +378,9 @@ def can_replace_layer(
     ) -> bool:
         """Returns True if the layer can be replaced by this LoRA layer."""
 
-        # source_layer is FusedMoE or SharedFusedMoE
-        return isinstance(source_layer, FusedMoE) and len(packed_modules_list) == 2
+        # source_layer is FusedMoE
+        moe_cls = maybe_get_oot_by_class(FusedMoE)
+        return isinstance(source_layer, moe_cls) and len(packed_modules_list) == 2
 
 
 class FusedMoE3DWithLoRA(FusedMoEWithLoRA):
@@ -661,6 +426,7 @@ def create_lora_weights(
         """Initializes lora matrices."""
 
         assert isinstance(model_config, PretrainedConfig)
+        self._verify_ep_fs(lora_config)
         self._base_model = model_config.architectures[0]
         self.max_loras = lora_config.max_loras
         self.fully_sharded = lora_config.fully_sharded_loras
@@ -776,5 +542,6 @@ def can_replace_layer(
         model_config: PretrainedConfig | None = None,
     ) -> bool:
         """Returns True if the layer can be replaced by this LoRA layer."""
-        # source_layer is FusedMoE or SharedFusedMoE
-        return isinstance(source_layer, FusedMoE) and len(packed_modules_list) == 1
+        # source_layer is FusedMoE
+        moe_cls = maybe_get_oot_by_class(FusedMoE)
+        return isinstance(source_layer, moe_cls) and len(packed_modules_list) == 1
diff --git a/vllm/lora/layers/gate_linear.py b/vllm/lora/layers/gate_linear.py
deleted file mode 100644
index 9bcaaa5b8e20..000000000000
--- a/vllm/lora/layers/gate_linear.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import torch.nn as nn
-from transformers import PretrainedConfig
-
-from vllm.config.lora import LoRAConfig
-from vllm.model_executor.custom_op import maybe_get_oot_by_class
-from vllm.model_executor.layers.fused_moe.router.gate_linear import GateLinear
-
-from .replicated_linear import ReplicatedLinearWithLoRA
-
-
-class GateLinearWithLoRA(ReplicatedLinearWithLoRA):
-    def __init__(self, base_layer: GateLinear) -> None:
-        super().__init__(
-            base_layer,
-        )
-
-    # GateLinearWithLoRA should always be replaced, regardless of the fully
-    # sharded LoRAs setting, because it is, by definition, copied per GPU.
-    @classmethod
-    def can_replace_layer(
-        cls,
-        source_layer: nn.Module,
-        lora_config: LoRAConfig,
-        packed_modules_list: list,
-        model_config: PretrainedConfig | None = None,
-    ) -> bool:
-        return type(source_layer) is maybe_get_oot_by_class(GateLinear)
diff --git a/vllm/lora/layers/replicated_linear.py b/vllm/lora/layers/replicated_linear.py
index f1f499b841ba..53ae26be4c36 100644
--- a/vllm/lora/layers/replicated_linear.py
+++ b/vllm/lora/layers/replicated_linear.py
@@ -46,6 +46,12 @@ def forward(
 
         return output, output_bias
 
+    def apply(self, x: torch.Tensor, bias: torch.Tensor | None = None) -> torch.Tensor:
+        # ReplicatedLinear subclasses such as GateLinear override forward() to
+        # dispatch custom kernels and/or adjust the output dtype. Apply LoRA on
+        # top of the actual base-layer output instead of bypassing that path.
+        return self._apply_base_forward(x)
+
     # ReplicatedLinear should always be replaced, regardless of the fully
     # sharded LoRAs setting, because it is, by definition, copied per GPU.
     @classmethod
@@ -56,7 +62,7 @@ def can_replace_layer(
         packed_modules_list: list,
         model_config: PretrainedConfig | None = None,
     ) -> bool:
-        return type(source_layer) is maybe_get_oot_by_class(ReplicatedLinear)
+        return isinstance(source_layer, maybe_get_oot_by_class(ReplicatedLinear))
 
     def slice_lora_a(
         self, lora_a: torch.Tensor | list[torch.Tensor | None]
diff --git a/vllm/lora/layers/utils.py b/vllm/lora/layers/utils.py
index c19b097586f5..1b8083f5c4d1 100644
--- a/vllm/lora/layers/utils.py
+++ b/vllm/lora/layers/utils.py
@@ -90,11 +90,12 @@ def try_get_optimal_moe_lora_config(
     top_k: int,
     dtype: str | None,
     M: int,
-    block_shape: list[int] | None = None,
 ) -> dict[str, int | None]:
-    config = try_get_optimal_moe_config(
-        w1_shape, w2_shape, top_k, dtype, M, block_shape
-    ).copy()
+    # LoRA shrink/expand operates on bf16/fp16 adapters regardless of the
+    # base MoE weight's block-wise quantization, so block_shape is omitted
+    # from the config lookup — the non-quantized branch in get_default_config
+    # ignores it anyway.
+    config = try_get_optimal_moe_config(w1_shape, w2_shape, top_k, dtype, M).copy()
     if op_type in [
         "fused_moe_lora_w13_shrink",
         "fused_moe_lora_w2_shrink",
diff --git a/vllm/lora/lora_model.py b/vllm/lora/lora_model.py
index 7c1dd39bb5e3..e3cb82e35694 100644
--- a/vllm/lora/lora_model.py
+++ b/vllm/lora/lora_model.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import os
+from dataclasses import dataclass
 
 import safetensors
 import torch
@@ -21,6 +22,41 @@
 logger = init_logger(__name__)
 
 
+@dataclass(frozen=True)
+class MoEEPLoadSpec:
+    """Per-expert-parallel slicing metadata for one FusedMoE LoRA module.
+
+    Threaded into the LoRA loader so per-expert weights from EP ranks
+    other than this one can be skipped before they ever hit CPU memory.
+    """
+
+    ep_rank: int
+    local_num_experts: int
+    global_num_experts: int
+
+
+_EXPERTS_SEPARATOR = ".experts."
+
+
+def _is_remote_expert_key(raw_name: str, spec: "MoEEPLoadSpec") -> bool:
+    """
+    Decide whether a checkpoint key belongs to a non-local expert.
+    """
+    pos = raw_name.find(_EXPERTS_SEPARATOR)
+    if pos < 0:
+        return False
+    idx_start = pos + len(_EXPERTS_SEPARATOR)
+    idx_end = raw_name.find(".", idx_start)
+    if idx_end < 0:
+        return False
+    idx_str = raw_name[idx_start:idx_end]
+    if not idx_str.isdigit():
+        return False
+    expert_idx = int(idx_str)
+    local_start = spec.ep_rank * spec.local_num_experts
+    return not (local_start <= expert_idx < local_start + spec.local_num_experts)
+
+
 class LoRAModel:
     """A LoRA fine-tuned model."""
 
@@ -29,12 +65,17 @@ def __init__(
         lora_model_id: int,
         rank: int,
         loras: dict[str, LoRALayerWeights],
+        is_3d_lora_weight: bool = False,
     ) -> None:
         """
         Args:
             lora_model_id: The integer id for the lora model.
             rank: lora rank.
             loras: module name -> weights for lora-replaced layers.
+            is_3d_lora_weight: Whether the on-disk MoE adapter is in the 3D
+                fused (gate_up_proj / down_proj) layout. Propagated from the
+                originating LoRARequest. Only consulted by the LoRA model
+                manager when enable_mixed_moe_lora_format is on.
 
         """
         self.id = lora_model_id
@@ -44,6 +85,7 @@ def __init__(
         )
         self.rank = rank
         self.loras: dict[str, LoRALayerWeights] = loras
+        self.is_3d_lora_weight = is_3d_lora_weight
 
     def clone(self, lora_model_id: int) -> "LoRAModel":
         """Return a copy of the object with different ids.
@@ -53,6 +95,7 @@ def clone(self, lora_model_id: int) -> "LoRAModel":
             lora_model_id,
             rank=self.rank,
             loras=self.loras.copy(),
+            is_3d_lora_weight=self.is_3d_lora_weight,
         )
 
     def get_lora(self, module_name: str) -> LoRALayerWeights | None:
@@ -134,6 +177,7 @@ def from_local_checkpoint(
         weights_mapper: WeightsMapper | None = None,
         tensorizer_config_dict: dict | None = None,
         skip_prefixes: list[str] | None = None,
+        moe_ep_spec: MoEEPLoadSpec | None = None,
     ) -> "LoRAModel":
         """Create a LoRAModel from a local checkpoint.
 
@@ -149,6 +193,11 @@ def from_local_checkpoint(
             skip_prefixes: List of module name prefixes to skip during loading.
                 Models can define this to skip modules not used in inference
                 (e.g., MTP layers). Format: ["mtp."]
+            moe_ep_spec: When 2D FusedMoE LoRA modules are present with
+                expert parallelism enabled, the (ep_rank, local, global)
+                slicing metadata shared across all MoE layers. Non-local
+                expert weights are skipped at read time instead of being
+                loaded and discarded later.
 
         Returns:
             Loaded LoRA Model.
@@ -203,6 +252,7 @@ def check_unexpected_modules(modules: dict):
             tensors = TensorDeserializer(
                 lora_tensor_path,
                 dtype=tensorizer_config.dtype,
+                device=device,
                 **tensorizer_args.deserialization_kwargs,
             )
             check_unexpected_modules(tensors)
@@ -219,6 +269,10 @@ def check_unexpected_modules(modules: dict):
                 # Load tensors if there are only expected modules.
                 check_unexpected_modules(f)
                 for module in f.keys():  # noqa
+                    if moe_ep_spec is not None and _is_remote_expert_key(
+                        module, moe_ep_spec
+                    ):
+                        continue
                     tensors[module] = f.get_tensor(module)
         elif os.path.isfile(lora_bin_file_path) or os.path.isfile(lora_pt_file_path):
             lora_file_path = (
@@ -228,6 +282,15 @@ def check_unexpected_modules(modules: dict):
             )
             tensors = torch.load(lora_file_path, map_location=device, weights_only=True)
             check_unexpected_modules(tensors)
+            if moe_ep_spec is not None:
+                # `.bin`/`.pt` adapters can't be lazy-loaded, but pruning
+                # the dict here still frees the non-local expert tensors
+                # before the dtype cast / pin_memory work that follows.
+                tensors = {
+                    k: v
+                    for k, v in tensors.items()
+                    if not _is_remote_expert_key(k, moe_ep_spec)
+                }
         else:
             raise ValueError(f"{lora_dir} doesn't contain tensors")
 
diff --git a/vllm/lora/model_manager.py b/vllm/lora/model_manager.py
index 9d3772560433..07df1b53da17 100644
--- a/vllm/lora/model_manager.py
+++ b/vllm/lora/model_manager.py
@@ -14,10 +14,11 @@
 from vllm.lora.layers import (
     BaseLayerWithLoRA,
     FusedMoE3DWithLoRA,
+    FusedMoEWithLoRA,
     LoRAMapping,
     LoRAMappingType,
 )
-from vllm.lora.lora_model import LoRAModel
+from vllm.lora.lora_model import LoRAModel, MoEEPLoadSpec
 from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.punica_wrapper import PunicaWrapperBase, get_punica_wrapper
 from vllm.lora.utils import (
@@ -101,7 +102,6 @@ def __init__(
         self.max_num_batched_tokens = math.ceil(max_num_batched_tokens / 8) * 8
         self.lora_index_to_id: list[int | None] = [None] * self.lora_slots
         self.vocab_size = vocab_size
-        self.packed_modules_mapping = process_packed_modules_mapping(self.model)
 
         self.is_pooling_model = is_pooling_model(self.model)
         self.packed_modules: dict[str, list[str]] = {}
@@ -109,11 +109,31 @@ def __init__(
         # Dict instead of a set for compatibility with LRUCache.
         self._last_mapping: LoRAMapping | None = None
         is_moe = is_moe_model(self.model)
-        self._is_3d_moe_model = is_moe and self.model.is_3d_moe_weight
+        self._is_moe = is_moe
+
+        # When the engine is started with enable_mixed_moe_lora_format=True
+        # we force the universal 2D wrapper (FusedMoEWithLoRA) regardless of
+        # the model's 3D flag, so 2D and 3D adapters can coexist.
+        self._enable_mixed_moe_lora_format = (
+            is_moe and lora_config.enable_mixed_moe_lora_format
+        )
+        self._is_3d_moe_model = (
+            self._is_moe
+            and self.model.is_3d_moe_weight
+            and not self._enable_mixed_moe_lora_format
+        )
+        self.packed_modules_mapping = process_packed_modules_mapping(
+            self.model, force_2d_moe=self._enable_mixed_moe_lora_format
+        )
         self._is_non_gated_moe = is_moe and self.model.is_non_gated_moe
+        self._use_ep = bool(
+            vllm_config and vllm_config.parallel_config.enable_expert_parallel
+        )
         self._init_punica_wrapper(max_num_batched_tokens, vllm_config)
         self._create_lora_modules()
 
+        self.moe_ep_load_spec: MoEEPLoadSpec | None = self._build_moe_ep_load_spec()
+
         self.model.lora_manager = self
 
     def _init_punica_wrapper(
@@ -360,6 +380,8 @@ def _parent_module(module_name: str) -> str:
             #  - given an input 'x' return ''
             return module_name.rpartition(".")[0]
 
+        wrapped_by_id: dict[int, BaseLayerWithLoRA] = {}
+
         for module_name, module in self.model.named_modules(remove_duplicate=False):
             if isinstance(module, PPMissingLayer):
                 continue
@@ -387,10 +409,26 @@ def _parent_module(module_name: str) -> str:
                     "LoRA is not supported for non-gated MoE gate module."
                     " %s will be ignored.",
                     module_name,
-                    scope="local",
                 )
                 continue
 
+            existing_wrapper = wrapped_by_id.get(id(module))
+            if existing_wrapper is not None and "lm_head" not in module_name:
+                # Same underlying module was already wrapped under another
+                # path (e.g. a MoE gate held both directly on the block and
+                # inside the MoE runner). The adapter targets the canonical
+                # path (`mlp.gate`); rewire the alias attribute
+                # (`runner.gate`) to the SAME wrapper so the forward path
+                # through the alias still applies LoRA, but do NOT add a
+                # second entry to self.modules — otherwise `activate_adapter`
+                # would call `reset_lora` on the alias and wipe the weights
+                # just set under the canonical name,  because the alias can't
+                # load LoRA weights due to name mismatch.
+                parent = self.model.get_submodule(_parent_module(module_name))
+                # reference
+                setattr(parent, module_name.rpartition(".")[-1], existing_wrapper)
+                continue
+
             parts = module_name.split(".")[-1]
             packed_moduled_lst = self.packed_modules_mapping.get(parts, [])
             if isinstance(module, FusedMoE):
@@ -411,6 +449,8 @@ def _parent_module(module_name: str) -> str:
                     self.model.config,
                 ),
             )
+            if isinstance(new_module, BaseLayerWithLoRA):
+                wrapped_by_id[id(module)] = new_module
 
             # (yard1): TODO make this more robust
             if "lm_head" in module_name:
@@ -437,12 +477,21 @@ def _parent_module(module_name: str) -> str:
                     ),
                 )
 
-            # In some models, especially multimodal ones, layers with the same
-            # name may have different types, such as nn.Linear and
-            # ReplicatedLinear. The nn.Linear layers cannot be replaced with
-            # LoRA layers, leading to assertion error. The following check
-            # aims to prevent this error
-            if self.supports_mm and not isinstance(new_module, BaseLayerWithLoRA):
+            # Some matched modules can be unsupported by LoRA wrappers
+            # (e.g. subclasses with specialized forward behavior).
+            if not isinstance(new_module, BaseLayerWithLoRA):
+                error_msg = (
+                    "LoRA target module "
+                    f"{module_name} ({type(module).__name__}) matched the "
+                    "deployment configuration but could not be wrapped by any "
+                    "LoRA layer implementation."
+                )
+                if self.lora_config.target_modules is not None:
+                    raise ValueError(
+                        f"{error_msg} target_modules="
+                        f"{sorted(self.lora_config.target_modules)}"
+                    )
+                logger.warning_once("%s It will be ignored.", error_msg)
                 continue
             self.register_module(module_name, new_module)
 
@@ -554,7 +603,16 @@ def create_dummy_lora(
             else:
                 parts = module_name.split(".")
                 replacements = self.packed_modules_mapping[parts[-1]]
+                n_slices = getattr(module, "n_slices", len(replacements))
+                if module.__class__.__name__ == "FusedMoEWithLoRA":
+                    replacements = replacements[
+                        : len(module.lora_a_stacked) // self.lora_slots
+                    ]
                 subloras: list[LoRALayerWeights | None] = []
+                # HACK: overrides replacements for qkvz = qkv + z case.
+                # Any better methods to handle this case?
+                if n_slices != len(replacements):
+                    replacements = [f"slice_{i}" for i in range(n_slices)]
                 for i, r in enumerate(replacements):
                     lora = LoRALayerWeights.create_dummy_lora_weights(
                         module_name + "." + r,
@@ -578,6 +636,38 @@ def create_dummy_lora(
                 model.loras[module_name] = lora
         return model
 
+    def get_dummy_lora_warmup_rank(self, default_rank: int) -> int:
+        """Return a dummy LoRA rank compatible with wrapped modules.
+
+        Dummy LoRAs keep warmup memory low by using a small rank. Fully
+        sharded MoE wrappers additionally require the dummy rank to be divisible
+        by tensor parallel size because they shard W13 along the rank axis.
+        """
+        if not self.lora_config.fully_sharded_loras:
+            return default_rank
+
+        required_multiple = 1
+        for module in self.modules.values():
+            if not getattr(module, "fully_sharded", False):
+                continue
+            required_multiple = math.lcm(required_multiple, module.tp_size)
+
+        if required_multiple == 1 or default_rank % required_multiple == 0:
+            return default_rank
+
+        adjusted_rank = (
+            (default_rank + required_multiple - 1) // required_multiple
+        ) * required_multiple
+        if adjusted_rank > self.lora_config.max_lora_rank:
+            raise ValueError(
+                "Unable to choose a dummy LoRA warmup rank compatible with "
+                "fully sharded MoE modules: "
+                f"default_rank={default_rank}, "
+                f"required_multiple={required_multiple}, "
+                f"max_lora_rank={self.lora_config.max_lora_rank}"
+            )
+        return adjusted_rank
+
     def _match_target_modules(self, module_name: str) -> bool:
         """Check if a module should have LoRA applied.
 
@@ -594,7 +684,11 @@ def _match_target_modules(self, module_name: str) -> bool:
         """
         if not is_supported_lora_module(module_name, self.supported_lora_modules):
             return False
-        return is_in_target_modules(module_name, self.lora_config.target_modules)
+        return is_in_target_modules(
+            module_name,
+            self.lora_config.target_modules,
+            self.packed_modules_mapping,
+        )
 
     def _get_punica_wrapper(self, module_name: str) -> PunicaWrapperBase | None:
         """
@@ -628,15 +722,21 @@ def _register_packed_modules(self, module_full_name: str) -> None:
 
     def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None:
         for module_name, new_module_names in self.packed_modules.items():
+            # For 2D FusedMoE modules with EP, narrow the per-expert
+            # sub-module list to this rank's owned experts so pack_moe
+            # produces a tensor sized to local_num_experts directly.
+            packed_module_names = new_module_names
+            if module_name.endswith(".experts"):
+                new_module_names = self._restrict_to_local_experts(
+                    module_name, new_module_names
+                )
             replacement_loras: list[LoRALayerWeights | None] = []
-            replaced_module: set[str] = set()
             has_replacement = False
             for r in new_module_names:
                 lora = self._get_lora_layer_weights(lora_model, r)
                 replacement_loras.append(lora)
                 if lora:
                     has_replacement = True
-                    replaced_module.add(r)
             if not has_replacement:
                 continue
             for i in range(len(replacement_loras)):
@@ -662,8 +762,11 @@ def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None:
                 lora_model.loras[module_name] = PackedLoRALayerWeights.pack(
                     replacement_loras
                 )
-            # Remove the modules that have been replaced.
-            for module in replaced_module:
+            # Drop every candidate sub-module, including non-local expert
+            # entries that were loaded but did not contribute to the
+            # packed result. Without this they would keep extra CPU
+            # memory alive after pack_moe.
+            for module in packed_module_names:
                 lora_model.loras.pop(module, None)
 
         for lora in lora_model.loras.values():
@@ -672,6 +775,17 @@ def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None:
         for module_name, module in self.modules.items():
             if isinstance(module, FusedMoE3DWithLoRA):
                 self._stack_moe_lora_weights(lora_model, module, module_name)
+            elif isinstance(module, FusedMoEWithLoRA):
+                # When mixed mode is enabled the universal 2D wrapper has to
+                # absorb both 2D and 3D-format adapters. 3D-format adapters
+                # need to be split into per-(w1, w2, w3) tensors before the
+                # 2D set_lora can copy them into the stacked buffers.
+                if self._enable_mixed_moe_lora_format and getattr(
+                    lora_model, "is_3d_lora_weight", False
+                ):
+                    self._convert_3d_to_2d_moe_lora(lora_model, module, module_name)
+                else:
+                    self._slice_moe_lora_ep(lora_model, module, module_name)
 
         first_lora: LoRALayerWeights = next(iter(lora_model.loras.values()))
         assert first_lora.lora_a is not None
@@ -718,23 +832,33 @@ def _stack_moe_lora_weights(
             assert gate_up_proj_lora is not None
             assert down_proj_lora is not None
             if self._is_3d_moe_model:
-                num_experts = module.w13_lora_a_stacked[0].shape[1]
+                local_num_experts = module.w13_lora_a_stacked[0].shape[1]
+                # The checkpoint holds weights for all global experts, but
+                # each EP rank owns only local_num_experts. Reshape against
+                # the adapter's actual expert count, then slice this rank's
+                # owned expert range before it gets copied into the local
+                # stacked buffer. For non-EP (local == global) this is a
+                # no-op slice.
+                global_num_experts = module.base_layer.global_num_experts
+                ep_rank = module.base_layer.ep_rank
+                expert_start = ep_rank * local_num_experts
+                expert_end = expert_start + local_num_experts
 
                 # (num_experts,rank,input_size)
                 gate_up_proj_lora.lora_a = gate_up_proj_lora.lora_a.reshape(
-                    num_experts, -1, gate_up_proj_lora.lora_a.shape[-1]
-                )
+                    global_num_experts, -1, gate_up_proj_lora.lora_a.shape[-1]
+                )[expert_start:expert_end].contiguous()
                 down_proj_lora.lora_a = down_proj_lora.lora_a.reshape(
-                    num_experts, -1, down_proj_lora.lora_a.shape[-1]
-                )
+                    global_num_experts, -1, down_proj_lora.lora_a.shape[-1]
+                )[expert_start:expert_end].contiguous()
 
                 # (output_size,rank,num_experts)
                 gate_up_proj_lora.lora_b = gate_up_proj_lora.lora_b.reshape(
-                    gate_up_proj_lora.lora_b.shape[0], -1, num_experts
-                )
+                    gate_up_proj_lora.lora_b.shape[0], -1, global_num_experts
+                )[..., expert_start:expert_end]
                 down_proj_lora.lora_b = down_proj_lora.lora_b.reshape(
-                    down_proj_lora.lora_b.shape[0], -1, num_experts
-                )
+                    down_proj_lora.lora_b.shape[0], -1, global_num_experts
+                )[..., expert_start:expert_end]
 
                 # (num_experts,output_size,rank)
                 gate_up_proj_lora.lora_b = gate_up_proj_lora.lora_b.permute(
@@ -784,6 +908,202 @@ def _stack_moe_lora_weights(
                 module_lora.lora_a = lora_a
                 module_lora.lora_b = lora_b
 
+    def _convert_3d_to_2d_moe_lora(
+        self,
+        lora_model: LoRAModel,
+        module: FusedMoEWithLoRA,
+        module_name: str,
+    ) -> None:
+        """Convert a 3D-format MoE LoRA checkpoint into the 2D pack layout
+        that `FusedMoEWithLoRA.set_lora` expects.
+
+        On disk the 3D PEFT layout stores two flat tensor pairs per layer:
+          - `{module}.base_layer.lora_{A,B}`: gate_up_proj, with shapes
+                `(rank * num_experts, hidden)` / `(intermediate * 2,
+                rank * num_experts)`
+          - `{module}.lora_{A,B}`: down_proj, with shapes
+                `(rank * num_experts, intermediate)` / `(hidden,
+                rank * num_experts)`
+        The 2D wrapper expects three stacked per-expert tensors,
+        `[w1, w2, w3]`, with `(num_experts, rank, in)` for lora_a and
+        `(num_experts, out, rank)` for lora_b. In the 3D layout w1
+        (gate_proj) and w3 (up_proj) share the rank-r intermediate
+        representation, so both halves use the same lora_a tensor.
+
+        Only invoked when `enable_mixed_moe_lora_format=True` and the
+        source LoRARequest declares `is_3d_lora_weight=True`.
+        """
+        gate_up_proj_lora = self._get_lora_layer_weights(
+            lora_model, module_name + ".base_layer"
+        )
+        down_proj_lora = self._get_lora_layer_weights(lora_model, module_name)
+        if gate_up_proj_lora is None or down_proj_lora is None:
+            # Either the adapter omits the experts entirely or the file
+            # layout differs from what this path supports; leave the entry
+            # untouched so set_lora can raise a clear error if needed.
+            return
+
+        local_num_experts = module.base_layer.local_num_experts
+        global_num_experts = module.base_layer.global_num_experts
+        ep_rank = module.base_layer.ep_rank
+        expert_start = ep_rank * local_num_experts
+        expert_end = expert_start + local_num_experts
+
+        # Reshape and EP-slice into per-expert 3D tensors. This mirrors
+        # `_stack_moe_lora_weights`; for non-EP runs the slice is a no-op.
+        gate_up_a = gate_up_proj_lora.lora_a.reshape(
+            global_num_experts, -1, gate_up_proj_lora.lora_a.shape[-1]
+        )[expert_start:expert_end].contiguous()
+        gate_up_b = (
+            gate_up_proj_lora.lora_b.reshape(
+                gate_up_proj_lora.lora_b.shape[0], -1, global_num_experts
+            )[..., expert_start:expert_end]
+            .permute(2, 0, 1)
+            .contiguous()
+        )
+        down_a = down_proj_lora.lora_a.reshape(
+            global_num_experts, -1, down_proj_lora.lora_a.shape[-1]
+        )[expert_start:expert_end].contiguous()
+        down_b = (
+            down_proj_lora.lora_b.reshape(
+                down_proj_lora.lora_b.shape[0], -1, global_num_experts
+            )[..., expert_start:expert_end]
+            .permute(2, 0, 1)
+            .contiguous()
+        )
+
+        # Split the fused gate_up_proj output dim into separate w1 / w3
+        # halves. GPT-OSS interleaves them along the output dim, all other
+        # 3D MoE checkpoints we know about concatenate them.
+        intermediate_x2 = gate_up_b.shape[1]
+        if intermediate_x2 % 2 != 0:
+            raise ValueError(
+                "Expected gate_up_proj LoRA-B output dim to be 2 * intermediate, "
+                f"got {intermediate_x2}."
+            )
+        intermediate = intermediate_x2 // 2
+        base_arch = self.model.config.architectures[0]
+        if base_arch == "GptOssForCausalLM":
+            w1_b = gate_up_b[:, ::2, :].contiguous()
+            w3_b = gate_up_b[:, 1::2, :].contiguous()
+        else:
+            w1_b = gate_up_b[:, :intermediate, :].contiguous()
+            w3_b = gate_up_b[:, intermediate:, :].contiguous()
+
+        # In the 3D layout w1 and w3 share the same rank-r mid
+        # representation, so they reuse the same lora_a tensor. The 2D
+        # wrapper's set_lora copies whatever it gets here into independent
+        # per-slice buffers, so the sharing is purely a CPU-side memory
+        # optimization and does not affect numerics.
+        down_proj_lora.lora_a = [gate_up_a, down_a, gate_up_a]
+        down_proj_lora.lora_b = [w1_b, down_b, w3_b]
+        # Drop the redundant base_layer entry to avoid double pin_memory
+        # and to keep the activation path looking up only the wrapper key.
+        lora_model.loras.pop(module_name + ".base_layer", None)
+
+    def _slice_moe_lora_ep(
+        self,
+        lora_model: LoRAModel,
+        module: FusedMoEWithLoRA,
+        module_name: str,
+    ) -> None:
+        """Slice the cached LoRA tensors down to this rank's local experts.
+
+        The 2D MoE checkpoint enters as a list of per-(w1/w2/w3) tensors of
+        shape (num_experts, rank, in) / (num_experts, out, rank). When EP
+        is active each rank only owns local_num_experts; without this slice
+        the CPU LoRAModel keeps the full global weight and set_lora has to
+        re-slice on every activation.
+
+        With the load-time / pack-time slicing in
+        ``_restrict_to_local_experts``, the stacked tensors already match
+        ``local_num_experts`` and the inner branch becomes a no-op. The
+        guard remains so checkpoints that bypassed the pre-slicing (e.g.
+        ``.bin``/``.pt`` adapters with weights mappers we don't recognize)
+        still get sliced here.
+        """
+        if not module.base_layer.use_ep:
+            return
+        module_lora = self._get_lora_layer_weights(lora_model, module_name)
+        if module_lora is None or not isinstance(module_lora.lora_a, list):
+            return
+
+        local_num_experts = module.base_layer.local_num_experts
+        global_num_experts = module.base_layer.global_num_experts
+        ep_rank = module.base_layer.ep_rank
+        expert_start = ep_rank * local_num_experts
+        expert_end = expert_start + local_num_experts
+
+        new_lora_a: list[torch.Tensor | None] = []
+        new_lora_b: list[torch.Tensor | None] = []
+        for a, b in zip(module_lora.lora_a, module_lora.lora_b):
+            if a is not None and b is not None and a.shape[0] == global_num_experts:
+                a = a[expert_start:expert_end].contiguous()
+                b = b[expert_start:expert_end].contiguous()
+            new_lora_a.append(a)
+            new_lora_b.append(b)
+        module_lora.lora_a = new_lora_a
+        module_lora.lora_b = new_lora_b
+
+    def _restrict_to_local_experts(
+        self, module_name: str, new_module_names: list[str]
+    ) -> list[str]:
+        """Narrow a flat expert-major sub-module list to this rank's experts.
+
+        ``new_module_names`` is produced by
+        ``FusedMoE.make_expert_params_mapping`` and is ordered
+        ``[e=0,w1, e=0,w2, e=0,w3, e=1,w1, ...]`` (non-gated MoE has 2
+        entries per expert instead of 3). When the module is a 2D
+        ``FusedMoEWithLoRA`` with EP enabled, we slice the list to the
+        contiguous block of experts owned by this rank so the downstream
+        ``pack_moe`` call only consumes local weights and produces a
+        tensor sized to ``local_num_experts`` directly.
+
+        Returns the original list unchanged for non-MoE modules, the 3D
+        MoE path (handled separately by ``_stack_moe_lora_weights``),
+        modules without EP, or layouts we cannot cleanly partition.
+        """
+        module = self.modules.get(module_name)
+        if not isinstance(module, FusedMoEWithLoRA):
+            return new_module_names
+        if isinstance(module, FusedMoE3DWithLoRA):
+            return new_module_names
+        if not getattr(module.base_layer, "use_ep", False):
+            return new_module_names
+        global_num_experts = module.base_layer.global_num_experts
+        local_num_experts = module.base_layer.local_num_experts
+        ep_rank = module.base_layer.ep_rank
+        if global_num_experts <= 0 or len(new_module_names) % global_num_experts != 0:
+            return new_module_names
+        per_expert = len(new_module_names) // global_num_experts
+        start = ep_rank * local_num_experts * per_expert
+        end = start + local_num_experts * per_expert
+        return new_module_names[start:end]
+
+    def _build_moe_ep_load_spec(self) -> MoEEPLoadSpec | None:
+        """
+        Per-rank slicing metadata for 2D FusedMoE LoRA modules.
+        """
+        if not self._use_ep or not self._is_moe:
+            return None
+        module = next(
+            (
+                m
+                for m in self.modules.values()
+                if isinstance(m, FusedMoEWithLoRA)
+                and not isinstance(m, FusedMoE3DWithLoRA)
+            ),
+            None,
+        )
+        if module is None:
+            return None
+        base = module.base_layer
+        return MoEEPLoadSpec(
+            ep_rank=base.ep_rank,
+            local_num_experts=base.local_num_experts,
+            global_num_experts=base.global_num_experts,
+        )
+
     def _get_lora_layer_weights(
         self, lora_model: LoRAModel, module_name: str
     ) -> LoRALayerWeights | None:
diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py
index 343e0c81080d..7f8ed577ecb6 100644
--- a/vllm/lora/ops/triton_ops/lora_expand_op.py
+++ b/vllm/lora/ops/triton_ops/lora_expand_op.py
@@ -9,8 +9,13 @@
 
 import torch
 
+from vllm import envs
 from vllm.lora.ops.triton_ops.kernel_utils import do_expand_kernel
-from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr, get_lora_op_configs
+from vllm.lora.ops.triton_ops.utils import (
+    _get_lora_b_ptr,
+    get_lora_op_configs,
+    supports_pdl,
+)
 from vllm.triton_utils import tl, triton
 from vllm.utils.torch_utils import direct_register_custom_op
 
@@ -237,9 +242,9 @@ def _lora_expand(
         NUM_SLICES,
         num_active_loras.item(),
     )
-    # We disable PDL temporarily because LoRA kernels are not launching back-to-back,
-    # making PDL invalid and affecting the kernel performance.
-    use_gdc = False  # supports_pdl(inputs.device)
+
+    # PDL only works when dual-stream is being used.
+    use_gdc = supports_pdl(inputs.device) and envs.VLLM_LORA_ENABLE_DUAL_STREAM
     _lora_expand_kernel[grid](
         inputs,
         lora_ptr_tensor,
diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py
index ea850baa2535..88c24c740db5 100644
--- a/vllm/lora/ops/triton_ops/lora_shrink_op.py
+++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py
@@ -9,8 +9,13 @@
 
 import torch
 
+from vllm import envs
 from vllm.lora.ops.triton_ops.kernel_utils import do_shrink_kernel
-from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr, get_lora_op_configs
+from vllm.lora.ops.triton_ops.utils import (
+    _get_lora_a_ptr,
+    get_lora_op_configs,
+    supports_pdl,
+)
 from vllm.triton_utils import tl, triton
 from vllm.utils.torch_utils import direct_register_custom_op
 
@@ -220,9 +225,9 @@ def _lora_shrink(
         NUM_SLICES,
         num_active_loras.item(),
     )
-    # We disable PDL temporarily because LoRA kernels are not launching back-to-back,
-    # making PDL invalid and affecting the kernel performance.
-    use_gdc = False  # supports_pdl(inputs.device)
+
+    # PDL only works when dual-stream is being used.
+    use_gdc = supports_pdl(inputs.device) and envs.VLLM_LORA_ENABLE_DUAL_STREAM
     _lora_shrink_kernel[grid](
         inputs,
         lora_ptr_tensor,
diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py
index 0ab52e698318..dd4c0d2097cf 100644
--- a/vllm/lora/ops/triton_ops/utils.py
+++ b/vllm/lora/ops/triton_ops/utils.py
@@ -13,6 +13,7 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import next_power_of_2
+from vllm.utils.torch_utils import async_tensor_h2d
 
 logger = init_logger(__name__)
 is_batch_invariant = envs.VLLM_BATCH_INVARIANT
@@ -49,7 +50,9 @@ def _get_lora_a_ptr(lora_a_weights: list[torch.Tensor], device: torch.device):
         lora_strides_d1.append(lora_a_weight.stride(1))
         lora_strides_d2.append(lora_a_weight.stride(2))
     if len(lora_a_weights) > 1:
-        lora_ptr_tensor = torch.tensor(tensor_ptrs, device=device, dtype=torch.uint64)
+        lora_ptr_tensor = async_tensor_h2d(
+            tensor_ptrs, dtype=torch.uint64, device=device
+        )
     else:
         lora_ptr_tensor = lora_a_weights[0]
 
@@ -106,10 +109,11 @@ def _get_lora_b_ptr(
         hidden_sizes.append(lora_b_weight.size(1))
 
     if len(lora_weights) > 1:
-        # note these are device tensors
-        lora_ptr_tensor = torch.tensor(tensor_ptrs, device=device, dtype=torch.uint64)
-        slice_start_tensor = torch.tensor(
-            slice_offset_lst, device=device, dtype=torch.uint64
+        lora_ptr_tensor = async_tensor_h2d(
+            tensor_ptrs, dtype=torch.uint64, device=device
+        )
+        slice_start_tensor = async_tensor_h2d(
+            slice_offset_lst, dtype=torch.uint64, device=device
         )
     else:
         slice_start_tensor = slice_offset_lst[0]
@@ -129,10 +133,18 @@ def _get_lora_b_ptr(
         same_stride = True
 
     else:
-        lora_strides_d0_tensor = torch.tensor(lora_strides_d0, device=device)
-        lora_strides_d1_tensor = torch.tensor(lora_strides_d1, device=device)
-        lora_strides_d2_tensor = torch.tensor(lora_strides_d2, device=device)
-        hidden_sizes_tensor = torch.tensor(hidden_sizes, device=device)
+        lora_strides_d0_tensor = async_tensor_h2d(
+            lora_strides_d0, dtype=torch.int64, device=device
+        )
+        lora_strides_d1_tensor = async_tensor_h2d(
+            lora_strides_d1, dtype=torch.int64, device=device
+        )
+        lora_strides_d2_tensor = async_tensor_h2d(
+            lora_strides_d2, dtype=torch.int64, device=device
+        )
+        hidden_sizes_tensor = async_tensor_h2d(
+            hidden_sizes, dtype=torch.int64, device=device
+        )
         same_stride = False
     # MAX_N is the maximum hidden size among all the lora_b weights
     MAX_N = max(hidden_sizes)
@@ -321,3 +333,20 @@ def supports_pdl(device: torch.device | None = None) -> bool:
 def supports_tma(device: torch.device | None = None) -> bool:
     # TMA requires compute capability SM90 or above
     return current_platform.is_cuda() and current_platform.has_device_capability(90)
+
+
+def _normalize_lora_config_keys(
+    config: dict[str, int | None],
+) -> dict[str, int | None]:
+    """Normalize Triton config dict keys to uppercase BLOCK_SIZE_* format."""
+    out: dict[str, int | None] = {}
+    for key, val in config.items():
+        if key.islower():
+            if key.startswith("block_"):
+                nk = "BLOCK_SIZE_" + key.split("_")[-1].upper()
+            else:
+                nk = key.upper()
+        else:
+            nk = key
+        out[nk] = val
+    return out
diff --git a/vllm/lora/ops/xpu_ops/lora_ops.py b/vllm/lora/ops/xpu_ops/lora_ops.py
index 6d1751c3738e..070fd8645821 100644
--- a/vllm/lora/ops/xpu_ops/lora_ops.py
+++ b/vllm/lora/ops/xpu_ops/lora_ops.py
@@ -27,9 +27,42 @@ def bgmv_expand(
     lora_indices_tensor: torch.Tensor,
     add_inputs: bool = True,
 ) -> None:
-    torch.ops._xpu_C.bgmv_expand(
-        output_tensor, inputs, lora_b_weights, lora_indices_tensor, add_inputs
-    )
+    weight_out_dim = lora_b_weights.size(-2)
+    output_dim = output_tensor.size(1)
+
+    if weight_out_dim == output_dim:
+        torch.ops._xpu_C.bgmv_expand(
+            output_tensor,
+            inputs,
+            lora_b_weights,
+            lora_indices_tensor,
+            add_inputs,
+        )
+    elif weight_out_dim < output_dim:
+        # LoRA weight output dim can be smaller than the output tensor
+        # (e.g. vocab_size vs padded logits). Use expand_slice to write
+        # only the matching portion, mirroring torch_ops common_len logic.
+        torch.ops._xpu_C.bgmv_expand_slice(
+            output_tensor,
+            inputs,
+            lora_b_weights,
+            lora_indices_tensor,
+            0,
+            weight_out_dim,
+            add_inputs,
+        )
+    else:
+        # Weight output dim larger than output tensor: truncate weights.
+        lora_b_weights = lora_b_weights[..., :output_dim, :].contiguous()
+        torch.ops._xpu_C.bgmv_expand_slice(
+            output_tensor,
+            inputs,
+            lora_b_weights,
+            lora_indices_tensor,
+            0,
+            output_dim,
+            add_inputs,
+        )
 
 
 def bgmv_expand_slice(
diff --git a/vllm/lora/punica_wrapper/punica_base.py b/vllm/lora/punica_wrapper/punica_base.py
index facbd681a09a..0448a6d00cda 100644
--- a/vllm/lora/punica_wrapper/punica_base.py
+++ b/vllm/lora/punica_wrapper/punica_base.py
@@ -493,3 +493,70 @@ def add_lora_fused_moe(
         """
         # TODO: implement it based on torch ops
         raise NotImplementedError
+
+    def add_lora_w13(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_ids: torch.Tensor,
+        topk_weights: torch.Tensor,
+        expert_map: torch.Tensor | None,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        num_tokens: int,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        local_num_experts: int,
+        top_k: int,
+        num_slices: int,
+        fully_sharded: bool,
+        use_tuned_config: bool,
+        token_lora_mapping: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+    ]:
+        """Apply w13 LoRA to y (intermediate_cache1) in-place before activation.
+
+        When `token_lora_mapping` is provided it overrides the punica_wrapper's
+        global mapping — used by EP+LoRA to pass the per-rank-local mapping
+        after all-to-all dispatch.
+
+        Returns (sorted_token_ids_lora, expert_ids_lora,
+                 num_tokens_post_padded_lora, token_lora_mapping)
+        for reuse by add_lora_w2.
+        """
+        raise NotImplementedError
+
+    def add_lora_w2(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_weights: torch.Tensor,
+        sorted_token_ids_lora: torch.Tensor | None,
+        expert_ids_lora: torch.Tensor | None,
+        num_tokens_post_padded_lora: torch.Tensor | None,
+        token_lora_mapping: torch.Tensor | None,
+        num_tokens: int,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        top_k: int,
+        fully_sharded: bool,
+        tp_rank: int,
+        use_tuned_config: bool,
+    ) -> None:
+        """Apply w2 LoRA to y (intermediate_cache3) in-place before moe_sum.
+
+        Reuses routing tensors returned by add_lora_w13.
+        """
+        raise NotImplementedError
diff --git a/vllm/lora/punica_wrapper/punica_gpu.py b/vllm/lora/punica_wrapper/punica_gpu.py
index 5f2604892ce9..bf951e074949 100644
--- a/vllm/lora/punica_wrapper/punica_gpu.py
+++ b/vllm/lora/punica_wrapper/punica_gpu.py
@@ -144,7 +144,9 @@ def add_expand(
             x (torch.Tensor): Input tensors
             lora_b_stacked (tuple[torch.Tensor, ...]): lora_b's weight
             output_slices (tuple[int, ...]): Every slice's size
-            add_inputs (bool): Defaults to True.
+            add_inputs (bool): If True, add LoRA output to y; if False, write
+                LoRA-only output to y (used for dual-stream when base and LoRA
+                run on different CUDA streams). Defaults to True.
         """
         y_org = y
         y = y.view(-1, y.shape[-1])
@@ -161,7 +163,7 @@ def add_expand(
                 num_tokens, self.lora_config.specialize_active_lora
             ),
             offset_start=offset_start,
-            add_inputs=True,
+            add_inputs=add_inputs,
         )
 
         y = y.view_as(y_org)
@@ -244,7 +246,7 @@ def add_lora_linear(
         buffer = torch.empty(
             (len(output_slices), x.size(0), r), dtype=torch.float32, device=x.device
         )
-
+        add_inputs = kwargs.pop("add_inputs", True)
         self.add_shrink(
             buffer,  # type: ignore
             x,
@@ -257,7 +259,7 @@ def add_lora_linear(
             buffer,  # type: ignore
             lora_b_stacked,
             output_slices,
-            add_inputs=True,
+            add_inputs=add_inputs,
             **kwargs,
         )
 
@@ -333,25 +335,49 @@ def moe_lora_align_block_size(
         expert_map: torch.Tensor | None = None,
         pad_sorted_ids: bool = False,
         naive_block_assignment: bool = False,
+        token_lora_mapping: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Aligns tokens and experts into block-sized chunks for LoRA-based
         mixture-of-experts (MoE) execution.
+
+        When `token_lora_mapping` is provided, it overrides the global mapping
+        read from `self.token_mapping_meta`. This is how EP+LoRA injects the
+        per-rank-local token→LoRA map after all-to-all dispatch.
         """
-        (token_lora_mapping, _, _, _, lora_ids, _, _) = (
-            self.token_mapping_meta.meta_args(
-                num_tokens, self.lora_config.specialize_active_lora
-            )
+        (
+            token_lora_mapping_meta,
+            _,
+            _,
+            _,
+            lora_ids,
+            _,
+            _,
+        ) = self.token_mapping_meta.meta_args(
+            num_tokens, self.lora_config.specialize_active_lora
+        )
+        if token_lora_mapping is None:
+            token_lora_mapping = token_lora_mapping_meta
+        # Under EP the caller passes local_num_experts but topk_ids carries
+        # GLOBAL expert indices. The CUDA kernel uses num_experts to size
+        # its bucketing table; with EP we must size by global_num_experts
+        # so global topk_ids don't overflow. expert_map inside the kernel
+        # then translates global→local so the output expert_ids are local
+        # (mirrors the non-LoRA moe_align_block_size behavior).
+        kernel_num_experts = (
+            expert_map.numel() if expert_map is not None else num_experts
         )
         if naive_block_assignment:
             expert_ids = topk_ids.reshape(-1)
             sorted_ids = None
             num_tokens_post_pad = None
         else:
-            max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
+            max_num_tokens_padded = topk_ids.numel() + kernel_num_experts * (
+                block_size - 1
+            )
             if pad_sorted_ids:
                 max_num_tokens_padded = round_up(max_num_tokens_padded, block_size)
-            if topk_ids.numel() < num_experts:
+            if topk_ids.numel() < kernel_num_experts:
                 max_num_tokens_padded = topk_ids.numel() * block_size
             sorted_ids = torch.empty(
                 (max_loras * max_num_tokens_padded,),
@@ -359,9 +385,12 @@ def moe_lora_align_block_size(
                 device=topk_ids.device,
             )
             max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
-            # Expert ids must be set default to -1 to prevent a blank block
-            expert_ids = torch.empty(
+            # Expert ids are initialized to -1 so unused (lora, expert)
+            # slots don't drive the LoRA Triton kernel into the wrong bucket.
+            # The kernel overwrites only active slots.
+            expert_ids = torch.full(
                 (max_loras * max_num_m_blocks,),
+                -1,
                 dtype=torch.int32,
                 device=topk_ids.device,
             )
@@ -372,7 +401,7 @@ def moe_lora_align_block_size(
             ops.moe_lora_align_block_size(
                 topk_ids,
                 token_lora_mapping,
-                num_experts,
+                kernel_num_experts,
                 block_size,
                 max_loras,
                 max_num_tokens_padded,
@@ -382,11 +411,10 @@ def moe_lora_align_block_size(
                 num_tokens_post_pad,
                 adapter_enabled,
                 lora_ids,
+                expert_map,
             )
-            if expert_map is not None:
-                expert_ids = expert_map[expert_ids]
 
-        return None, sorted_ids, expert_ids, num_tokens_post_pad
+        return token_lora_mapping, sorted_ids, expert_ids, num_tokens_post_pad
 
     def add_lora_fused_moe(
         self,
@@ -457,3 +485,241 @@ def add_lora_fused_moe(
             fully_sharded,
             offset,
         )
+
+    def add_lora_w13(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_ids: torch.Tensor,
+        topk_weights: torch.Tensor,
+        expert_map: torch.Tensor | None,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        num_tokens: int,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        local_num_experts: int,
+        top_k: int,
+        num_slices: int,
+        fully_sharded: bool,
+        use_tuned_config: bool,
+        token_lora_mapping: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+    ]:
+        import functools
+
+        from vllm.lora.layers.utils import try_get_optimal_moe_lora_config
+        from vllm.lora.ops.triton_ops.utils import (
+            _normalize_lora_config_keys,
+            get_lora_op_configs,
+        )
+        from vllm.model_executor.layers.fused_moe.config import _get_config_dtype_str
+
+        config_dtype = _get_config_dtype_str(
+            dtype=x.dtype,
+            use_fp8_w8a8=False,
+            use_int8_w8a16=False,
+            use_int4_w4a16=False,
+        )
+        max_lora_rank = lora_a_stacked[0].shape[-2]
+
+        if use_tuned_config:
+            shrink_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w13_shrink",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=x.shape[-1],
+                rank=max_lora_rank,
+                num_slices=num_slices,
+                moe_intermediate_size=lora_b_stacked[0].shape[-2],
+            )
+            expand_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w13_expand",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=x.shape[-1],
+                rank=max_lora_rank,
+                num_slices=num_slices,
+                moe_intermediate_size=lora_b_stacked[0].shape[-2],
+            )
+        else:
+            get_config = functools.partial(
+                try_get_optimal_moe_lora_config,
+                w1_shape=w1.shape,
+                w2_shape=w2.shape,
+                rank=max_lora_rank,
+                top_k=top_k,
+                dtype=config_dtype,
+                M=num_tokens,
+            )
+            shrink_config = get_config(op_type="fused_moe_lora_w13_shrink")
+            expand_config = get_config(op_type="fused_moe_lora_w13_expand")
+
+        shrink_config = _normalize_lora_config_keys(shrink_config)
+        expand_config = _normalize_lora_config_keys(expand_config)
+
+        SPARSITY_FACTOR = 8
+        naive_block_assignment = (
+            expert_map is None
+            and num_tokens * top_k * SPARSITY_FACTOR <= local_num_experts * max_loras
+        )
+
+        (
+            token_lora_mapping,
+            sorted_token_ids_lora,
+            expert_ids_lora,
+            num_tokens_post_padded_lora,
+        ) = self.moe_lora_align_block_size(
+            topk_ids,
+            num_tokens,
+            int(shrink_config.get("BLOCK_SIZE_M") or 64),
+            local_num_experts,
+            max_loras,
+            adapter_enabled,
+            expert_map,
+            naive_block_assignment=naive_block_assignment,
+            token_lora_mapping=token_lora_mapping,
+        )
+
+        _sorted = sorted_token_ids_lora
+        _eids = expert_ids_lora
+        if _sorted is not None:
+            _eids = _eids.view(max_loras, -1)
+            _sorted = _sorted.view(max_loras, -1)
+
+        self.add_lora_fused_moe(
+            y.view(-1, top_k_num, y.shape[-1]),
+            x,
+            lora_a_stacked,
+            lora_b_stacked,
+            topk_weights,
+            _sorted,
+            _eids,
+            num_tokens_post_padded_lora,
+            max_lora_rank,
+            top_k,
+            shrink_config,
+            expand_config,
+            adapter_enabled,
+            fully_sharded=fully_sharded,
+            token_lora_mapping=token_lora_mapping,
+        )
+
+        return (
+            sorted_token_ids_lora,
+            expert_ids_lora,
+            num_tokens_post_padded_lora,
+            token_lora_mapping,
+        )
+
+    def add_lora_w2(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_weights: torch.Tensor,
+        sorted_token_ids_lora: torch.Tensor | None,
+        expert_ids_lora: torch.Tensor | None,
+        num_tokens_post_padded_lora: torch.Tensor | None,
+        token_lora_mapping: torch.Tensor | None,
+        num_tokens: int,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        top_k: int,
+        fully_sharded: bool,
+        tp_rank: int,
+        use_tuned_config: bool,
+    ) -> None:
+        import functools
+
+        from vllm.lora.layers.utils import try_get_optimal_moe_lora_config
+        from vllm.lora.ops.triton_ops.utils import (
+            _normalize_lora_config_keys,
+            get_lora_op_configs,
+        )
+        from vllm.model_executor.layers.fused_moe.config import _get_config_dtype_str
+
+        config_dtype = _get_config_dtype_str(
+            dtype=x.dtype,
+            use_fp8_w8a8=False,
+            use_int8_w8a16=False,
+            use_int4_w4a16=False,
+        )
+        max_lora_rank = lora_a_stacked[0].shape[-2]
+
+        if use_tuned_config:
+            shrink_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w2_shrink",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=y.shape[-1],
+                rank=max_lora_rank,
+                num_slices=1,
+                moe_intermediate_size=lora_a_stacked[0].shape[-1],
+            )
+            expand_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w2_expand",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=y.shape[-1],
+                rank=max_lora_rank,
+                num_slices=1,
+                moe_intermediate_size=lora_a_stacked[0].shape[-1],
+            )
+        else:
+            get_config = functools.partial(
+                try_get_optimal_moe_lora_config,
+                w1_shape=w1.shape,
+                w2_shape=w2.shape,
+                rank=max_lora_rank,
+                top_k=top_k,
+                dtype=config_dtype,
+                M=num_tokens,
+            )
+            shrink_config = get_config(op_type="fused_moe_lora_w2_shrink")
+            expand_config = get_config(op_type="fused_moe_lora_w2_expand")
+
+        shrink_config = _normalize_lora_config_keys(shrink_config)
+        expand_config = _normalize_lora_config_keys(expand_config)
+
+        _sorted = sorted_token_ids_lora
+        _eids = expert_ids_lora
+        if _sorted is not None:
+            assert _eids is not None
+            _eids = _eids.view(max_loras, -1)
+            _sorted = _sorted.view(max_loras, -1)
+
+        # w2_lora_b shape[-2] is hidden_size // tp_size when fully_sharded
+        shard_size = lora_b_stacked[0].shape[-2]
+        offset = shard_size * tp_rank if fully_sharded else 0
+
+        self.add_lora_fused_moe(
+            y,
+            x,
+            lora_a_stacked,
+            lora_b_stacked,
+            topk_weights,
+            _sorted,
+            _eids,
+            num_tokens_post_padded_lora,
+            max_lora_rank,
+            top_k,
+            shrink_config,
+            expand_config,
+            adapter_enabled,
+            True,  # mul_routed_weight
+            fully_sharded=fully_sharded,
+            offset=offset,
+            token_lora_mapping=token_lora_mapping,
+        )
diff --git a/vllm/lora/punica_wrapper/punica_xpu.py b/vllm/lora/punica_wrapper/punica_xpu.py
old mode 100644
new mode 100755
index f031e1bfa341..58316cb75970
--- a/vllm/lora/punica_wrapper/punica_xpu.py
+++ b/vllm/lora/punica_wrapper/punica_xpu.py
@@ -14,6 +14,7 @@
 from vllm import _custom_ops as ops
 from vllm.lora.layers import LoRAMapping
 from vllm.lora.ops.xpu_ops import bgmv_expand, bgmv_expand_slice, bgmv_shrink
+from vllm.lora.utils import get_captured_lora_counts
 from vllm.triton_utils import HAS_TRITON, triton
 from vllm.utils.math_utils import round_up
 
@@ -48,8 +49,24 @@ def __init__(
 
         self.lora_config = kwargs["lora_config"]
         self.max_loras = self.lora_config.max_loras
+
+        # Compute captured LoRA counts for cudagraph specialization.
+        captured_lora_counts = get_captured_lora_counts(
+            self.max_loras, self.lora_config.specialize_active_lora
+        )
+
         self.token_mapping_meta = LoRAKernelMeta.make(
-            self.max_loras, max_num_batched_tokens, device=device
+            self.max_loras,
+            max_num_batched_tokens,
+            device=device,
+            captured_lora_counts=captured_lora_counts,
+        )
+
+        self.prompt_mapping_meta = LoRAKernelMeta.make(
+            self.max_loras,
+            max_num_batched_tokens,
+            device=device,
+            captured_lora_counts=captured_lora_counts,
         )
 
     def update_metadata(
@@ -63,6 +80,10 @@ def update_metadata(
         self.is_prefill = mapping.is_prefill
         self._update_base_metadata(mapping, lora_index_to_id, max_loras, vocab_size)
 
+        # Prepare kernel metadata tensors
+        self.token_mapping_meta.prepare_tensors(self.token_lora_indices)
+        self.prompt_mapping_meta.prepare_tensors(self.sampler_indices)
+
     def _get_token_lora_indices(self, x: torch.Tensor) -> torch.IntTensor:
         return torch.narrow(self._token_lora_indices, 0, 0, x.size(0))
 
@@ -419,3 +440,240 @@ def add_lora_fused_moe(
             fully_sharded,
             offset,
         )
+
+    def add_lora_w13(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_ids: torch.Tensor,
+        topk_weights: torch.Tensor,
+        expert_map: torch.Tensor | None,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        num_tokens: int,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        local_num_experts: int,
+        top_k: int,
+        num_slices: int,
+        fully_sharded: bool,
+        use_tuned_config: bool,
+        token_lora_mapping: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+    ]:
+        import functools
+
+        from vllm.lora.layers.utils import try_get_optimal_moe_lora_config
+        from vllm.lora.ops.triton_ops.utils import (
+            _normalize_lora_config_keys,
+            get_lora_op_configs,
+        )
+        from vllm.model_executor.layers.fused_moe.config import _get_config_dtype_str
+
+        config_dtype = _get_config_dtype_str(
+            dtype=x.dtype,
+            use_fp8_w8a8=False,
+            use_int8_w8a16=False,
+            use_int4_w4a16=False,
+        )
+        max_lora_rank = lora_a_stacked[0].shape[-2]
+
+        if use_tuned_config:
+            shrink_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w13_shrink",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=x.shape[-1],
+                rank=max_lora_rank,
+                num_slices=num_slices,
+                moe_intermediate_size=lora_b_stacked[0].shape[-2],
+            )
+            expand_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w13_expand",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=x.shape[-1],
+                rank=max_lora_rank,
+                num_slices=num_slices,
+                moe_intermediate_size=lora_b_stacked[0].shape[-2],
+            )
+        else:
+            get_config = functools.partial(
+                try_get_optimal_moe_lora_config,
+                w1_shape=w1.shape,
+                w2_shape=w2.shape,
+                rank=max_lora_rank,
+                top_k=top_k,
+                dtype=config_dtype,
+                M=num_tokens,
+            )
+            shrink_config = get_config(op_type="fused_moe_lora_w13_shrink")
+            expand_config = get_config(op_type="fused_moe_lora_w13_expand")
+
+        shrink_config = _normalize_lora_config_keys(shrink_config)
+        expand_config = _normalize_lora_config_keys(expand_config)
+
+        SPARSITY_FACTOR = 8
+        naive_block_assignment = (
+            expert_map is None
+            and num_tokens * top_k * SPARSITY_FACTOR <= local_num_experts * max_loras
+        )
+
+        (
+            token_lora_mapping,
+            sorted_token_ids_lora,
+            expert_ids_lora,
+            num_tokens_post_padded_lora,
+        ) = self.moe_lora_align_block_size(
+            topk_ids,
+            num_tokens,
+            int(shrink_config.get("BLOCK_SIZE_M") or 64),
+            local_num_experts,
+            max_loras,
+            adapter_enabled,
+            expert_map,
+            naive_block_assignment=naive_block_assignment,
+        )
+
+        _sorted = sorted_token_ids_lora
+        _eids = expert_ids_lora
+        if _sorted is not None:
+            _eids = _eids.view(max_loras, -1)
+            _sorted = _sorted.view(max_loras, -1)
+
+        self.add_lora_fused_moe(
+            y.view(-1, top_k_num, y.shape[-1]),
+            x,
+            lora_a_stacked,
+            lora_b_stacked,
+            topk_weights,
+            _sorted,
+            _eids,
+            num_tokens_post_padded_lora,
+            max_lora_rank,
+            top_k,
+            shrink_config,
+            expand_config,
+            adapter_enabled,
+            fully_sharded=fully_sharded,
+            token_lora_mapping=token_lora_mapping,
+        )
+
+        return (
+            sorted_token_ids_lora,
+            expert_ids_lora,
+            num_tokens_post_padded_lora,
+            token_lora_mapping,
+        )
+
+    def add_lora_w2(
+        self,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        lora_a_stacked: tuple[torch.Tensor, ...],
+        lora_b_stacked: tuple[torch.Tensor, ...],
+        topk_weights: torch.Tensor,
+        sorted_token_ids_lora: torch.Tensor | None,
+        expert_ids_lora: torch.Tensor | None,
+        num_tokens_post_padded_lora: torch.Tensor | None,
+        token_lora_mapping: torch.Tensor | None,
+        num_tokens: int,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        top_k_num: int,
+        max_loras: int,
+        adapter_enabled: torch.Tensor,
+        top_k: int,
+        fully_sharded: bool,
+        tp_rank: int,
+        use_tuned_config: bool,
+    ) -> None:
+        import functools
+
+        from vllm.lora.layers.utils import try_get_optimal_moe_lora_config
+        from vllm.lora.ops.triton_ops.utils import (
+            _normalize_lora_config_keys,
+            get_lora_op_configs,
+        )
+        from vllm.model_executor.layers.fused_moe.config import _get_config_dtype_str
+
+        config_dtype = _get_config_dtype_str(
+            dtype=x.dtype,
+            use_fp8_w8a8=False,
+            use_int8_w8a16=False,
+            use_int4_w4a16=False,
+        )
+        max_lora_rank = lora_a_stacked[0].shape[-2]
+
+        if use_tuned_config:
+            shrink_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w2_shrink",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=y.shape[-1],
+                rank=max_lora_rank,
+                num_slices=1,
+                moe_intermediate_size=lora_a_stacked[0].shape[-1],
+            )
+            expand_config = get_lora_op_configs(
+                op_type="fused_moe_lora_w2_expand",
+                max_loras=max_loras,
+                batch=num_tokens,
+                hidden_size=y.shape[-1],
+                rank=max_lora_rank,
+                num_slices=1,
+                moe_intermediate_size=lora_a_stacked[0].shape[-1],
+            )
+        else:
+            get_config = functools.partial(
+                try_get_optimal_moe_lora_config,
+                w1_shape=w1.shape,
+                w2_shape=w2.shape,
+                rank=max_lora_rank,
+                top_k=top_k,
+                dtype=config_dtype,
+                M=num_tokens,
+            )
+            shrink_config = get_config(op_type="fused_moe_lora_w2_shrink")
+            expand_config = get_config(op_type="fused_moe_lora_w2_expand")
+
+        shrink_config = _normalize_lora_config_keys(shrink_config)
+        expand_config = _normalize_lora_config_keys(expand_config)
+
+        _sorted = sorted_token_ids_lora
+        _eids = expert_ids_lora
+        if _sorted is not None:
+            assert _eids is not None
+            _eids = _eids.view(max_loras, -1)
+            _sorted = _sorted.view(max_loras, -1)
+
+        # w2_lora_b shape[-2] is hidden_size // tp_size when fully_sharded
+        shard_size = lora_b_stacked[0].shape[-2]
+        offset = shard_size * tp_rank if fully_sharded else 0
+
+        self.add_lora_fused_moe(
+            y,
+            x,
+            lora_a_stacked,
+            lora_b_stacked,
+            topk_weights,
+            _sorted,
+            _eids,
+            num_tokens_post_padded_lora,
+            max_lora_rank,
+            top_k,
+            shrink_config,
+            expand_config,
+            adapter_enabled,
+            True,  # mul_routed_weight
+            fully_sharded=fully_sharded,
+            offset=offset,
+            token_lora_mapping=token_lora_mapping,
+        )
diff --git a/vllm/lora/punica_wrapper/utils.py b/vllm/lora/punica_wrapper/utils.py
index 584745f86b1a..8cf5f1a176ef 100644
--- a/vllm/lora/punica_wrapper/utils.py
+++ b/vllm/lora/punica_wrapper/utils.py
@@ -5,6 +5,8 @@
 
 import torch
 
+from vllm.utils.torch_utils import async_tensor_h2d
+
 if TYPE_CHECKING:
     # avoid circuit import
     from vllm.lora.layers import LoRAMapping
@@ -110,8 +112,8 @@ def convert_mapping(
         embedding_indices,
     ]
 
-    indices = torch.tensor(indices_list, dtype=torch.long, device=device)
-    prompt_mapping_tensor = torch.tensor(
+    indices = async_tensor_h2d(indices_list, dtype=torch.long, device=device)
+    prompt_mapping_tensor = async_tensor_h2d(
         prompt_mapping, dtype=torch.long, device=device
     )
     embeddings_indices = torch.stack(
diff --git a/vllm/lora/request.py b/vllm/lora/request.py
index 008ade5e5f1f..7ba1725fa2e8 100644
--- a/vllm/lora/request.py
+++ b/vllm/lora/request.py
@@ -28,6 +28,13 @@ class LoRARequest(
     base_model_name: str | None = msgspec.field(default=None)
     tensorizer_config_dict: dict | None = None
     load_inplace: bool = False
+    is_3d_lora_weight: bool = False
+    """Whether this adapter's MoE weights are stored in the 3D fused
+    `gate_up_proj` / `down_proj` layout (one fused tensor per layer) or the
+    2D per-expert split layout (separate `gate_proj` / `up_proj` / `down_proj`
+    tensors per expert). Only consulted when the engine is started with
+    `enable_mixed_moe_lora_format=True`; otherwise it is ignored and the
+    on-disk format is inferred from the base model."""
 
     def __post_init__(self):
         if self.lora_int_id < 1:
diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py
index 75ed9674af56..7b68c1f952aa 100644
--- a/vllm/lora/utils.py
+++ b/vllm/lora/utils.py
@@ -21,7 +21,6 @@
     ColumnParallelLinearWithShardedLoRA,
     FusedMoE3DWithLoRA,
     FusedMoEWithLoRA,
-    GateLinearWithLoRA,
     LogitsProcessorWithLoRA,
     MergedColumnParallelLinearVariableSliceWithLoRA,
     MergedColumnParallelLinearWithLoRA,
@@ -74,7 +73,9 @@ def get_lora_id():
     return _GLOBAL_LORA_ID
 
 
-_all_lora_classes: set[type[BaseLayerWithLoRA]] = {
+# Order matters here: more specific wrappers must be checked before generic
+# merged/column-parallel wrappers in from_layer().
+_all_lora_classes: tuple[type[BaseLayerWithLoRA], ...] = (
     VocabParallelEmbeddingWithLoRA,
     ColumnParallelLinearWithLoRA,
     MergedColumnParallelLinearWithLoRA,
@@ -82,7 +83,6 @@ def get_lora_id():
     MergedQKVParallelLinearWithLoRA,
     RowParallelLinearWithLoRA,
     ReplicatedLinearWithLoRA,
-    GateLinearWithLoRA,
     LogitsProcessorWithLoRA,
     ColumnParallelLinearWithShardedLoRA,
     QKVParallelLinearWithShardedLoRA,
@@ -92,7 +92,7 @@ def get_lora_id():
     RowParallelLinearWithShardedLoRA,
     FusedMoEWithLoRA,
     FusedMoE3DWithLoRA,
-}
+)
 
 
 def is_moe_model(model: nn.Module) -> bool:
@@ -260,6 +260,7 @@ def is_supported_lora_module(
 def is_in_target_modules(
     module_name: str,
     target_modules: list[str] | None,
+    packed_modules_mapping: dict[str, list[str]] | None = None,
 ) -> bool:
     """Check if a module passes the deployment-time target_modules filter.
 
@@ -270,14 +271,33 @@ def is_in_target_modules(
         module_name: Full dot-separated module name.
         target_modules: Optional deployment-time restriction list from
             LoRAConfig.target_modules.
+        packed_modules_mapping: Optional model-defined mapping from packed
+            runtime module names to their adapter-visible submodule names
+            (e.g. ``{"gate_up_proj": ["gate_proj", "up_proj"]}``).
 
     Returns:
         True if the module passes the filter, False otherwise.
     """
     if target_modules is None:
         return True
+    target_module_set = set(target_modules)
     module_suffix = module_name.split(".")[-1]
-    return module_suffix in set(target_modules)
+    if module_suffix in target_module_set or module_name in target_module_set:
+        return True
+
+    if not packed_modules_mapping:
+        return False
+
+    # Runtime packed parent matched by deployment-time child targets.
+    packed_children = packed_modules_mapping.get(module_suffix)
+    if packed_children and any(child in target_module_set for child in packed_children):
+        return True
+
+    # Adapter-visible packed child matched by deployment-time parent target.
+    return any(
+        module_suffix in children and packed_parent in target_module_set
+        for packed_parent, children in packed_modules_mapping.items()
+    )
 
 
 def get_adapter_absolute_path(lora_path: str) -> str:
@@ -335,7 +355,9 @@ def get_adapter_absolute_path(lora_path: str) -> str:
     return local_snapshot_path
 
 
-def process_packed_modules_mapping(model: nn.Module) -> dict[str, list[str]]:
+def process_packed_modules_mapping(
+    model: nn.Module, force_2d_moe: bool = False
+) -> dict[str, list[str]]:
     if is_moe_model(model):
         if moe_packed_mapping := get_moe_expert_mapping(model):
             # This method generates and returns a dictionary mapping packed module
@@ -344,8 +366,11 @@ def process_packed_modules_mapping(model: nn.Module) -> dict[str, list[str]]:
             # the expert indices are expanded based on the configured number
             # of routed experts.
             packed_modules_mapping = get_packed_modules_mapping(model)
-            if not model.is_3d_moe_weight:
-                # 3D MoE LoRA does not need `packed_modules_mapping`
+            # The 2D mapping is needed when the model itself is 2D, or when
+            # the engine forces the universal 2D wrapper via
+            # enable_mixed_moe_lora_format (so 3D models can also load 2D
+            # adapters through FusedMoEWithLoRA).
+            if (not model.is_3d_moe_weight) or force_2d_moe:
                 # Filter out malformed entries: non-gated MoE has empty
                 # ckpt_up_proj_name which results in weight_name containing ".."
                 # (e.g., "experts.0.." instead of "experts.0.layer_name.")
diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py
index bea6d015e0a6..166d5c36ba57 100644
--- a/vllm/lora/worker_manager.py
+++ b/vllm/lora/worker_manager.py
@@ -17,11 +17,7 @@
 )
 from vllm.lora.peft_helper import PEFTHelper
 from vllm.lora.request import LoRARequest
-from vllm.lora.utils import (
-    get_adapter_absolute_path,
-    is_in_target_modules,
-    is_supported_lora_module,
-)
+from vllm.lora.utils import get_adapter_absolute_path
 
 logger = init_logger(__name__)
 
@@ -144,31 +140,12 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel:
                 tensorizer_config_dict=lora_request.tensorizer_config_dict,
                 weights_mapper=hf_to_vllm_mapper,
                 skip_prefixes=lora_skip_prefixes,
+                moe_ep_spec=self._adapter_manager.moe_ep_load_spec,
             )
-
-            # Warn about adapter modules that will be ignored.
-            target_modules = self.lora_config.target_modules
-            expected_lora_modules_lst = list(expected_lora_modules)
-            for module_name in lora.loras:
-                if not is_supported_lora_module(module_name, expected_lora_modules_lst):
-                    logger.warning_once(
-                        "LoRA module '%s' in adapter '%s' is not in the "
-                        "model's supported LoRA target modules [%s]. "
-                        "These parameters will be ignored, which may "
-                        "cause abnormal model behavior.",
-                        module_name,
-                        lora_request.lora_path,
-                        ", ".join(sorted(expected_lora_modules_lst)),
-                    )
-                elif not is_in_target_modules(module_name, target_modules):
-                    logger.warning_once(
-                        "LoRA module '%s' in adapter '%s' is not in the "
-                        "deployment-time target_modules restriction [%s]."
-                        " These parameters will be ignored.",
-                        module_name,
-                        lora_request.lora_path,
-                        ", ".join(sorted(target_modules)),
-                    )
+            # Stamp the on-disk MoE layout onto the loaded model so the
+            # adapter manager can route 3D-format checkpoints through the
+            # 3D->2D conversion when running under the universal 2D wrapper.
+            lora.is_3d_lora_weight = lora_request.is_3d_lora_weight
 
         except FileNotFoundError as e:
             # FileNotFoundError should be raised if both
@@ -197,6 +174,9 @@ def add_dummy_lora(self, lora_request: LoRARequest, rank: int) -> bool:
                 self._cached_dummy_lora = dummy_lora
         return self._adapter_manager.add_adapter(dummy_lora)
 
+    def get_dummy_lora_warmup_rank(self, default_rank: int) -> int:
+        return self._adapter_manager.get_dummy_lora_warmup_rank(default_rank)
+
     def pin_adapter(self, adapter_id: int) -> bool:
         return self._adapter_manager.pin_adapter(adapter_id)
 
diff --git a/vllm/model_executor/kernels/linear/__init__.py b/vllm/model_executor/kernels/linear/__init__.py
index 282208502c59..b99fcc7d34b2 100644
--- a/vllm/model_executor/kernels/linear/__init__.py
+++ b/vllm/model_executor/kernels/linear/__init__.py
@@ -19,6 +19,10 @@
 
 import vllm.envs as envs
 from vllm.logger import init_logger
+from vllm.model_executor.kernels.linear.base import (
+    MMLinearKernel,
+    MMLinearLayerConfig,
+)
 from vllm.model_executor.kernels.linear.mixed_precision import (
     MPLinearKernel,
     MPLinearLayerConfig,
@@ -47,29 +51,92 @@
 from vllm.model_executor.kernels.linear.mixed_precision.marlin import (
     MarlinLinearKernel,
 )
+from vllm.model_executor.kernels.linear.mixed_precision.triton_w4a16 import (
+    TritonW4A16LinearKernel,
+)
 from vllm.model_executor.kernels.linear.mixed_precision.xpu import (
     XPUW4A8IntLinearKernel,
     XPUwNa16LinearKernel,
 )
+from vllm.model_executor.kernels.linear.mxfp4 import (
+    MxFp4LinearKernel,
+    MxFp4LinearLayerConfig,
+)
+from vllm.model_executor.kernels.linear.mxfp4.flashinfer import (
+    FlashInferMxFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp4.marlin import (
+    MarlinMxFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp4.xpu import (
+    XPUMxFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp8 import (
+    Mxfp8LinearKernel,
+    Mxfp8LinearLayerConfig,
+)
+from vllm.model_executor.kernels.linear.mxfp8.emulation import (
+    EmulationMxfp8LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp8.flashinfer import (
+    FlashInferCutlassMxfp8LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp8.marlin import (
+    MarlinMxfp8LinearKernel,
+)
+from vllm.model_executor.kernels.linear.mxfp8.xpu import (
+    XPUMxFp8LinearKernel,
+)
+from vllm.model_executor.kernels.linear.nvfp4 import (
+    NvFp4LinearKernel,
+    NvFp4LinearLayerConfig,
+)
+from vllm.model_executor.kernels.linear.nvfp4.cutlass import (
+    CutlassNvFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.nvfp4.emulation import (
+    EmulationNvFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.nvfp4.fbgemm import (
+    FbgemmNvFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.nvfp4.flashinfer import (
+    FlashInferB12xNvFp4LinearKernel,
+    FlashInferCudnnNvFp4LinearKernel,
+    FlashInferCutlassNvFp4LinearKernel,
+    FlashInferTrtllmNvFp4LinearKernel,
+)
+from vllm.model_executor.kernels.linear.nvfp4.marlin import (
+    MarlinNvFp4LinearKernel,
+)
 from vllm.model_executor.kernels.linear.scaled_mm import (
+    Fp8BlockScaledMMLinearKernel,
     FP8ScaledMMLinearKernel,
     FP8ScaledMMLinearLayerConfig,
     Int8ScaledMMLinearKernel,
     Int8ScaledMMLinearLayerConfig,
     ScaledMMLinearKernel,
-    ScaledMMLinearLayerConfig,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.aiter import (
+    AiterFp8BlockScaledMMKernel,
     AiterInt8ScaledMMLinearKernel,
+    AiterPerTokenFp8ScaledMMLinearKernel,
+    AiterPreshuffledPerTokenFp8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.cpu import (
+    CPUFp8BlockScaledMMKernel,
     CPUInt8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.cutlass import (
+    CutlassFp8BlockScaledMMKernel,
     CutlassFP8ScaledMMLinearKernel,
     CutlassInt8ScaledMMLinearKernel,
 )
+from vllm.model_executor.kernels.linear.scaled_mm.deep_gemm import (
+    DeepGemmFp8BlockScaledMMKernel,
+)
 from vllm.model_executor.kernels.linear.scaled_mm.flashinfer import (
+    FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
     FlashInferFP8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.marlin import (
@@ -84,6 +151,7 @@
     ROCmFP8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.triton import (
+    TritonFp8BlockScaledMMKernel,
     TritonInt8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.xpu import (
@@ -94,6 +162,96 @@
 
 logger = init_logger(__name__)
 
+
+def _get_linear_backend() -> str:
+    """Get the linear_backend setting from the current vllm config."""
+    from vllm.config import get_current_vllm_config_or_none
+
+    config = get_current_vllm_config_or_none()
+    if config is not None:
+        return config.kernel_config.linear_backend
+    return "auto"
+
+
+# Mapping from linear_backend name to the set of kernel classes it covers.
+# When a user sets --linear-backend <name>, only kernels in the corresponding
+# set are considered candidates. If none can implement the layer config,
+# an error is raised to respect the user's explicit intent.
+_LINEAR_BACKEND_KERNEL_MAP: dict[str, set[type]] = {
+    "cutlass": {
+        CutlassInt8ScaledMMLinearKernel,
+        CutlassFP8ScaledMMLinearKernel,
+        CutlassFp8BlockScaledMMKernel,
+        CutlassW4A8LinearKernel,
+        CutlassNvFp4LinearKernel,
+    },
+    "flashinfer_cutlass": {
+        FlashInferFP8ScaledMMLinearKernel,
+        FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
+        FlashInferCutlassMxfp8LinearKernel,
+        FlashInferCutlassNvFp4LinearKernel,
+        FlashInferMxFp4LinearKernel,
+    },
+    "flashinfer_trtllm": {
+        FlashInferTrtllmNvFp4LinearKernel,
+    },
+    "flashinfer_cudnn": {
+        FlashInferCudnnNvFp4LinearKernel,
+    },
+    "marlin": {
+        MarlinFP8ScaledMMLinearKernel,
+        MarlinLinearKernel,
+        MarlinMxfp8LinearKernel,
+        MarlinNvFp4LinearKernel,
+        MarlinMxFp4LinearKernel,
+    },
+    "triton": {
+        TritonInt8ScaledMMLinearKernel,
+        TritonFp8BlockScaledMMKernel,
+        TritonW4A16LinearKernel,
+    },
+    "deep_gemm": {
+        DeepGemmFp8BlockScaledMMKernel,
+    },
+    "torch": {
+        PerTensorTorchFP8ScaledMMLinearKernel,
+        ChannelWiseTorchFP8ScaledMMLinearKernel,
+        RowWiseTorchFP8ScaledMMLinearKernel,
+    },
+    "aiter": {
+        AiterInt8ScaledMMLinearKernel,
+        AiterFp8BlockScaledMMKernel,
+        AiterPerTokenFp8ScaledMMLinearKernel,
+        AiterPreshuffledPerTokenFp8ScaledMMLinearKernel,
+    },
+    "machete": {
+        MacheteLinearKernel,
+    },
+    "fbgemm": {
+        FbgemmNvFp4LinearKernel,
+    },
+    "conch": {
+        ConchLinearKernel,
+    },
+    "exllama": {
+        ExllamaLinearKernel,
+    },
+    "emulation": {
+        EmulationMxfp8LinearKernel,
+        EmulationNvFp4LinearKernel,
+    },
+}
+
+
+def _filter_kernels_by_backend(
+    backend: str,
+    kernels: list[type],
+) -> list[type]:
+    """Filter a kernel priority list to only those matching the backend."""
+    backend_kernels = _LINEAR_BACKEND_KERNEL_MAP.get(backend, set())
+    return [k for k in kernels if k in backend_kernels]
+
+
 # in priority/performance order (when available)
 _POSSIBLE_INT8_KERNELS: dict[PlatformEnum, list[type[Int8ScaledMMLinearKernel]]] = {
     PlatformEnum.CPU: [CPUInt8ScaledMMLinearKernel],
@@ -114,6 +272,8 @@
         ChannelWiseTorchFP8ScaledMMLinearKernel,
     ],
     PlatformEnum.ROCM: [
+        AiterPreshuffledPerTokenFp8ScaledMMLinearKernel,
+        AiterPerTokenFp8ScaledMMLinearKernel,
         ROCmFP8ScaledMMLinearKernel,
         PerTensorTorchFP8ScaledMMLinearKernel,
         RowWiseTorchFP8ScaledMMLinearKernel,
@@ -128,6 +288,45 @@
     ],
 }
 
+
+# in priority/performance order (when available)
+_POSSIBLE_FP8_BLOCK_KERNELS: dict[
+    PlatformEnum, list[type[Fp8BlockScaledMMLinearKernel | FP8ScaledMMLinearKernel]]
+] = {
+    PlatformEnum.CUDA: [
+        FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
+        DeepGemmFp8BlockScaledMMKernel,
+        CutlassFp8BlockScaledMMKernel,
+        MarlinFP8ScaledMMLinearKernel,
+        TritonFp8BlockScaledMMKernel,
+    ],
+    PlatformEnum.ROCM: [
+        AiterFp8BlockScaledMMKernel,
+        TritonFp8BlockScaledMMKernel,
+    ],
+    PlatformEnum.CPU: [
+        CPUFp8BlockScaledMMKernel,
+    ],
+    PlatformEnum.XPU: [
+        TritonFp8BlockScaledMMKernel,
+    ],
+}
+
+_POSSIBLE_WFP8A16_KERNELS: dict[PlatformEnum, list[type[FP8ScaledMMLinearKernel]]] = {
+    PlatformEnum.CUDA: [
+        MarlinFP8ScaledMMLinearKernel,
+    ],
+    PlatformEnum.ROCM: [
+        # To be added
+    ],
+    PlatformEnum.CPU: [
+        # To be added
+    ],
+    PlatformEnum.XPU: [
+        XPUFP8ScaledMMLinearKernel,
+    ],
+}
+
 # in priority/performance order (when available)
 _POSSIBLE_KERNELS: dict[PlatformEnum, list[type[MPLinearKernel]]] = {
     PlatformEnum.CUDA: [
@@ -139,6 +338,7 @@
         ExllamaLinearKernel,
     ],
     PlatformEnum.ROCM: [
+        TritonW4A16LinearKernel,
         ConchLinearKernel,
         ExllamaLinearKernel,
     ],
@@ -152,8 +352,54 @@
     ],
 }
 
-_KernelT = TypeVar("_KernelT", bound=ScaledMMLinearKernel)
-_KernelConfigT = TypeVar("_KernelConfigT", bound=ScaledMMLinearLayerConfig)
+# in priority/performance order (when available)
+_POSSIBLE_MXFP8_KERNELS: dict[PlatformEnum, list[type[Mxfp8LinearKernel]]] = {
+    PlatformEnum.CUDA: [
+        FlashInferCutlassMxfp8LinearKernel,
+        MarlinMxfp8LinearKernel,
+        EmulationMxfp8LinearKernel,
+    ],
+    PlatformEnum.ROCM: [
+        EmulationMxfp8LinearKernel,
+    ],
+    PlatformEnum.XPU: [
+        XPUMxFp8LinearKernel,
+        EmulationMxfp8LinearKernel,
+    ],
+}
+
+_POSSIBLE_NVFP4_KERNELS: dict[PlatformEnum, list[type[NvFp4LinearKernel]]] = {
+    PlatformEnum.CUDA: [
+        # FlashInferB12xNvFp4LinearKernel excluded from auto-selection until
+        # upstream CUTLASS SM121 MMA op guard is resolved; use
+        # VLLM_NVFP4_GEMM_BACKEND=flashinfer-b12x to opt in explicitly.
+        FlashInferCutlassNvFp4LinearKernel,
+        CutlassNvFp4LinearKernel,
+        MarlinNvFp4LinearKernel,
+        FlashInferTrtllmNvFp4LinearKernel,
+        FlashInferCudnnNvFp4LinearKernel,
+        FbgemmNvFp4LinearKernel,
+        EmulationNvFp4LinearKernel,
+    ],
+    PlatformEnum.ROCM: [
+        EmulationNvFp4LinearKernel,
+    ],
+}
+
+_POSSIBLE_MXFP4_KERNELS: dict[PlatformEnum, list[type[MxFp4LinearKernel]]] = {
+    PlatformEnum.CUDA: [
+        FlashInferMxFp4LinearKernel,
+        MarlinMxFp4LinearKernel,
+    ],
+    PlatformEnum.XPU: [
+        XPUMxFp4LinearKernel,
+    ],
+}
+
+# TODO make all kernels inherit from MMLinearKernel
+# then bound _KernelT only to MMLinearKernel
+_KernelT = TypeVar("_KernelT", bound=ScaledMMLinearKernel | MMLinearKernel)
+_KernelConfigT = TypeVar("_KernelConfigT", bound=MMLinearLayerConfig)
 
 
 def is_supported_and_can_implement_kernel(
@@ -226,7 +472,20 @@ def choose_scaled_mm_linear_kernel(
             scope="global",
         )
 
-    for kernel in possible_kernels[current_platform._enum]:
+    platform_kernels = possible_kernels[current_platform._enum]
+
+    # Apply --linear-backend filtering when set.
+    linear_backend = _get_linear_backend()
+    if linear_backend != "auto":
+        filtered = _filter_kernels_by_backend(linear_backend, platform_kernels)
+        if not filtered:
+            raise ValueError(
+                f"--linear-backend={linear_backend} was requested but no "
+                f"'{linear_backend}' kernel exists for this layer type."
+            )
+        platform_kernels = filtered
+
+    for kernel in platform_kernels:
         is_supported_and_can_implement, failure_reason = (
             is_supported_and_can_implement_kernel(kernel, config, compute_capability)
         )
@@ -243,32 +502,74 @@ def choose_scaled_mm_linear_kernel(
 def init_fp8_linear_kernel(
     activation_quant_key: QuantKey,
     weight_quant_key: QuantKey,
+    input_dtype: torch.dtype,
     out_dtype: torch.dtype,
+    weight_shape: tuple[int, int],
     force_kernel: type[FP8ScaledMMLinearKernel] | None = None,
     module_name: str | None = None,
-) -> FP8ScaledMMLinearKernel:
+) -> FP8ScaledMMLinearKernel | Fp8BlockScaledMMLinearKernel:
     scaled_mm_linear_kernel_config = FP8ScaledMMLinearLayerConfig(
         weight_quant_key=weight_quant_key,
         activation_quant_key=activation_quant_key,
+        input_dtype=input_dtype,
         out_dtype=out_dtype,
+        weight_shape=weight_shape,
     )
 
-    kernel_type = choose_scaled_mm_linear_kernel(
-        scaled_mm_linear_kernel_config, _POSSIBLE_FP8_KERNELS, force_kernel=force_kernel
-    )
+    if activation_quant_key.scale.group_shape.is_per_group():
+        kernel_type = choose_scaled_mm_linear_kernel(
+            config=scaled_mm_linear_kernel_config,
+            possible_kernels=_POSSIBLE_FP8_BLOCK_KERNELS,  # type: ignore[misc]
+            force_kernel=force_kernel,
+        )
+        if module_name:
+            logger.info_once(
+                "Selected %s for %s",
+                kernel_type.__name__,
+                module_name,
+                scope="global",
+            )
 
-    if module_name:
-        logger.info_once(
-            "Selected %s for %s",
-            kernel_type.__name__,
-            module_name,
-            scope="global",
+        # TODO make scaled_mm kernels inherit from MMLinearKernel
+        # only MarlinFP8ScaledMMLinearKernel is a type of FP8ScaledMMLinearKernel.
+        if issubclass(kernel_type, FP8ScaledMMLinearKernel):
+            return kernel_type(
+                scaled_mm_linear_kernel_config,
+                layer_param_names=[
+                    "weight",
+                    "weight_scale",
+                    "input_scale",
+                    "input_scale_ub",
+                ],
+            )
+
+        return kernel_type(
+            scaled_mm_linear_kernel_config,
         )
 
-    return kernel_type(
-        scaled_mm_linear_kernel_config,
-        layer_param_names=["weight", "weight_scale", "input_scale", "input_scale_ub"],
-    )
+    else:
+        kernel_type = choose_scaled_mm_linear_kernel(
+            config=scaled_mm_linear_kernel_config,
+            possible_kernels=_POSSIBLE_FP8_KERNELS,  # type: ignore[arg-type]
+            force_kernel=force_kernel,
+        )
+        if module_name:
+            logger.info_once(
+                "Selected %s for %s",
+                kernel_type.__name__,
+                module_name,
+                scope="global",
+            )
+
+        return kernel_type(
+            scaled_mm_linear_kernel_config,
+            layer_param_names=[
+                "weight",
+                "weight_scale",
+                "input_scale",
+                "input_scale_ub",
+            ],
+        )
 
 
 def init_int8_linear_kernel(
@@ -335,8 +636,21 @@ def choose_mp_linear_kernel(
         if _cc is not None:
             compute_capability = _cc[0] * 10 + _cc[1]
 
+    platform_kernels = _POSSIBLE_KERNELS[current_platform._enum]
+
+    # Apply --linear-backend filtering when set.
+    linear_backend = _get_linear_backend()
+    if linear_backend != "auto":
+        filtered = _filter_kernels_by_backend(linear_backend, platform_kernels)
+        if not filtered:
+            raise ValueError(
+                f"--linear-backend={linear_backend} was requested but no "
+                f"'{linear_backend}' kernel exists for mixed-precision layers."
+            )
+        platform_kernels = filtered
+
     failure_reasons = []
-    for kernel in _POSSIBLE_KERNELS[current_platform._enum]:
+    for kernel in platform_kernels:
         if kernel.__name__ in envs.VLLM_DISABLED_KERNELS:
             failure_reasons.append(
                 f" {kernel.__name__} disabled by environment variable"
@@ -367,16 +681,333 @@ def choose_mp_linear_kernel(
     )
 
 
+def init_mxfp8_linear_kernel() -> Mxfp8LinearKernel:
+    """Select and instantiate the best MXFP8 linear kernel for the
+    current platform."""
+    config = Mxfp8LinearLayerConfig()
+
+    platform = current_platform._enum
+    possible = list(_POSSIBLE_MXFP8_KERNELS.get(platform, []))
+
+    # Apply --linear-backend filtering when set.
+    linear_backend = _get_linear_backend()
+    if linear_backend != "auto":
+        filtered = _filter_kernels_by_backend(linear_backend, possible)
+        if not filtered:
+            raise ValueError(
+                f"--linear-backend={linear_backend} was requested but no "
+                f"'{linear_backend}' kernel exists for MXFP8 layers."
+            )
+        possible = filtered
+
+    failure_reasons = []
+    for kernel_cls in possible:
+        if kernel_cls.__name__ in envs.VLLM_DISABLED_KERNELS:
+            failure_reasons.append(
+                f" {kernel_cls.__name__} disabled by environment variable"
+            )
+            continue
+
+        is_supported, reason = kernel_cls.is_supported()
+        if not is_supported:
+            failure_reasons.append(f"{kernel_cls.__name__}: {reason}")
+            continue
+
+        can_implement, reason = kernel_cls.can_implement(config)
+        if not can_implement:
+            failure_reasons.append(f"{kernel_cls.__name__}: {reason}")
+            continue
+
+        logger.info_once("Using %s for MXFP8 GEMM", kernel_cls.__name__)
+        return kernel_cls(config)
+
+    raise ValueError(
+        "Failed to find a kernel that can implement the "
+        "MXFP8 linear layer. Reasons: \n" + "\n".join(failure_reasons)
+    )
+
+
+def init_mxfp4_linear_kernel() -> MxFp4LinearKernel:
+    """Select and instantiate the best MXFP4 linear kernel for the
+    current platform."""
+    linear_backend = _get_linear_backend()
+
+    force_kernel: type[MxFp4LinearKernel] | None = None
+    if linear_backend == "auto" and envs.VLLM_MXFP4_USE_MARLIN:
+        force_kernel = MarlinMxFp4LinearKernel
+
+    if force_kernel is not None:
+        is_supported, reason = force_kernel.is_supported()
+        if not is_supported:
+            raise ValueError(
+                f"Forced MXFP4 kernel {force_kernel.__name__} is not "
+                f"supported: {reason}"
+            )
+        logger.info_once("Using %s for MXFP4 GEMM", force_kernel.__name__)
+        return force_kernel(MxFp4LinearLayerConfig())
+
+    platform = current_platform._enum
+    possible = list(_POSSIBLE_MXFP4_KERNELS.get(platform, []))
+
+    # Apply --linear-backend filtering when set.
+    if linear_backend != "auto":
+        filtered = _filter_kernels_by_backend(linear_backend, possible)
+        if not filtered:
+            raise ValueError(
+                f"--linear-backend={linear_backend} was requested but no "
+                f"'{linear_backend}' kernel exists for MXFP4 layers."
+            )
+        possible = filtered
+
+    failure_reasons = []
+    for kernel_cls in possible:
+        if kernel_cls.__name__ in envs.VLLM_DISABLED_KERNELS:
+            failure_reasons.append(
+                f" {kernel_cls.__name__} disabled by environment variable"
+            )
+            continue
+
+        is_supported, reason = kernel_cls.is_supported()
+        if not is_supported:
+            failure_reasons.append(f"{kernel_cls.__name__}: {reason}")
+            continue
+
+        logger.info_once("Using %s for MXFP4 GEMM", kernel_cls.__name__)
+        return kernel_cls(MxFp4LinearLayerConfig())
+
+    raise ValueError(
+        "Failed to find a kernel that can implement the "
+        "MXFP4 linear layer. Reasons: \n" + "\n".join(failure_reasons)
+    )
+
+
+def init_wfp8_a16_linear_kernel(
+    weight_quant_key: QuantKey,
+    activation_quant_key: QuantKey,
+    weight_shape: tuple[int, int],
+    input_dtype: torch.dtype,
+    out_dtype: torch.dtype,
+    force_kernel: type[FP8ScaledMMLinearKernel] | None = None,
+    module_name: str | None = None,
+) -> FP8ScaledMMLinearKernel:
+    config = FP8ScaledMMLinearLayerConfig(
+        weight_quant_key=weight_quant_key,
+        activation_quant_key=activation_quant_key,
+        weight_shape=weight_shape,
+        input_dtype=input_dtype,
+        out_dtype=out_dtype,
+    )
+
+    kernel_type = choose_scaled_mm_linear_kernel(
+        config, _POSSIBLE_WFP8A16_KERNELS, force_kernel=force_kernel
+    )
+
+    if module_name:
+        logger.info_once(
+            "Selected %s for %s",
+            kernel_type.__name__,
+            module_name,
+            scope="global",
+        )
+
+    return kernel_type(
+        config,
+        layer_param_names=["weight", "weight_scale", "input_scale", "input_scale_ub"],
+    )
+
+
+# Maps VLLM_NVFP4_GEMM_BACKEND env var values to kernel classes.
+_NVFP4_BACKEND_TO_KERNEL: dict[str, type[NvFp4LinearKernel]] = {
+    "flashinfer-b12x": FlashInferB12xNvFp4LinearKernel,
+    "flashinfer-cutlass": FlashInferCutlassNvFp4LinearKernel,
+    "cutlass": CutlassNvFp4LinearKernel,
+    "marlin": MarlinNvFp4LinearKernel,
+    "flashinfer-trtllm": FlashInferTrtllmNvFp4LinearKernel,
+    "flashinfer-cudnn": FlashInferCudnnNvFp4LinearKernel,
+    "emulation": EmulationNvFp4LinearKernel,
+}
+
+
+def init_nvfp4_linear_kernel() -> NvFp4LinearKernel:
+    """Select and instantiate the best NVFP4 linear kernel for the
+    current platform."""
+    config = NvFp4LinearLayerConfig()
+
+    # VLLM_BATCH_INVARIANT forces deterministic execution. Prefer the
+    # batch-invariant CUTLASS implementation when available, otherwise fall
+    # back to emulation. It overrides both --linear-backend and the deprecated
+    # env vars below.
+    force_kernel: type[NvFp4LinearKernel] | None = None
+    linear_backend = _get_linear_backend()
+    if envs.VLLM_BATCH_INVARIANT:
+        bi_supported, reason = CutlassNvFp4LinearKernel.is_supported()
+        if bi_supported:
+            if linear_backend not in ("auto", "cutlass"):
+                logger.warning_once(
+                    "VLLM_BATCH_INVARIANT overrides --linear-backend=%s; "
+                    "using the CUTLASS backend for deterministic execution.",
+                    linear_backend,
+                )
+            else:
+                logger.info_once(
+                    "VLLM_BATCH_INVARIANT forces NVFP4 linear to use the "
+                    "CUTLASS backend for deterministic execution."
+                )
+            force_kernel = CutlassNvFp4LinearKernel
+        else:
+            if linear_backend not in ("auto", "emulation"):
+                logger.warning_once(
+                    "VLLM_BATCH_INVARIANT overrides --linear-backend=%s; "
+                    "using the emulation backend for deterministic execution.",
+                    linear_backend,
+                )
+            logger.info_once(
+                "VLLM_BATCH_INVARIANT is set but the batch-invariant NVFP4 "
+                "kernel is not supported on this platform; falling back to "
+                "emulation for deterministic execution. Reason: %s",
+                reason,
+            )
+            force_kernel = EmulationNvFp4LinearKernel
+    elif linear_backend == "auto":
+        # Deprecated env-var overrides — only honoured when --linear-backend
+        # is "auto". Deprecation warnings are emitted from vllm/envs.py.
+        if envs.VLLM_USE_FBGEMM:
+            force_kernel = FbgemmNvFp4LinearKernel
+        elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
+            force_kernel = EmulationNvFp4LinearKernel
+        elif envs.VLLM_NVFP4_GEMM_BACKEND is not None:
+            backend_name = envs.VLLM_NVFP4_GEMM_BACKEND
+            force_kernel = _NVFP4_BACKEND_TO_KERNEL.get(backend_name)
+            if force_kernel is None:
+                raise ValueError(
+                    f"Unknown VLLM_NVFP4_GEMM_BACKEND={backend_name!r}. "
+                    f"Valid choices: "
+                    f"{list(_NVFP4_BACKEND_TO_KERNEL.keys())}"
+                )
+
+    if force_kernel is not None:
+        is_supported, reason = force_kernel.is_supported()
+        if not is_supported:
+            raise ValueError(
+                f"Forced NVFP4 kernel {force_kernel.__name__} is not "
+                f"supported: {reason}"
+            )
+        logger.info_once("Using %s for NVFP4 GEMM", force_kernel.__name__)
+        return force_kernel(config)
+
+    # Auto-select from registry (or --linear-backend filtered).
+    platform = current_platform._enum
+    possible = list(_POSSIBLE_NVFP4_KERNELS.get(platform, []))
+
+    # Apply --linear-backend filtering when set.
+    if linear_backend != "auto":
+        filtered = _filter_kernels_by_backend(linear_backend, possible)
+        if not filtered:
+            raise ValueError(
+                f"--linear-backend={linear_backend} was requested but no "
+                f"'{linear_backend}' kernel exists for NVFP4 layers."
+            )
+        possible = filtered
+
+    failure_reasons = []
+    for kernel_cls in possible:
+        if kernel_cls.__name__ in envs.VLLM_DISABLED_KERNELS:
+            failure_reasons.append(
+                f" {kernel_cls.__name__} disabled by environment variable"
+            )
+            continue
+
+        is_supported, reason = kernel_cls.is_supported()
+        if not is_supported:
+            failure_reasons.append(f"{kernel_cls.__name__}: {reason}")
+            continue
+
+        can_implement, reason = kernel_cls.can_implement(config)
+        if not can_implement:
+            failure_reasons.append(f"{kernel_cls.__name__}: {reason}")
+            continue
+
+        if kernel_cls is EmulationNvFp4LinearKernel and failure_reasons:
+            logger.warning_once(
+                "NVFP4 linear falling back to the slow and unoptimized "
+                "emulation backend as no optimized backend is available "
+                "(unavailable reasons:\n - %s\n). "
+                "In case you expect one of these backends to be used, "
+                "please verify your environment.",
+                "\n - ".join(failure_reasons),
+            )
+
+        logger.info_once("Using %s for NVFP4 GEMM", kernel_cls.__name__)
+        return kernel_cls(config)
+
+    raise ValueError(
+        "Failed to find a kernel that can implement the "
+        "NVFP4 linear layer. Reasons: \n" + "\n".join(failure_reasons)
+    )
+
+
+def register_linear_kernel(
+    kernel_class: type,
+    platform: PlatformEnum,
+    kernel_type: str = "mp",
+) -> None:
+    """
+    Register a new linear kernel class to be considered in kernel selection.
+
+    Args:
+        kernel_class (type): The kernel class to register.
+        platform (PlatformEnum): The platform for which this kernel is applicable.
+        kernel_type (str): The type of the kernel, either "mp", "int8", or "fp8".
+            Defaults to "mp".
+
+    Raises:
+        ValueError: If the kernel_type is not recognized.
+    """
+    if kernel_type == "mp":
+        if platform not in _POSSIBLE_KERNELS:
+            _POSSIBLE_KERNELS[platform] = []
+        _POSSIBLE_KERNELS[platform].append(kernel_class)
+    elif kernel_type == "int8":
+        if platform not in _POSSIBLE_INT8_KERNELS:
+            _POSSIBLE_INT8_KERNELS[platform] = []
+        _POSSIBLE_INT8_KERNELS[platform].append(kernel_class)
+    elif kernel_type == "fp8":
+        if platform not in _POSSIBLE_FP8_KERNELS:
+            _POSSIBLE_FP8_KERNELS[platform] = []
+        _POSSIBLE_FP8_KERNELS[platform].append(kernel_class)
+    elif kernel_type == "mxfp8":
+        if platform not in _POSSIBLE_MXFP8_KERNELS:
+            _POSSIBLE_MXFP8_KERNELS[platform] = []
+        _POSSIBLE_MXFP8_KERNELS[platform].append(kernel_class)
+    elif kernel_type == "nvfp4":
+        if platform not in _POSSIBLE_NVFP4_KERNELS:
+            _POSSIBLE_NVFP4_KERNELS[platform] = []
+        _POSSIBLE_NVFP4_KERNELS[platform].append(kernel_class)
+    elif kernel_type == "mxfp4":
+        if platform not in _POSSIBLE_MXFP4_KERNELS:
+            _POSSIBLE_MXFP4_KERNELS[platform] = []
+        _POSSIBLE_MXFP4_KERNELS[platform].append(kernel_class)
+    else:
+        raise ValueError(f"Unrecognized kernel type: {kernel_type}")
+
+
 __all__ = [
     "init_fp8_linear_kernel",
     "init_int8_linear_kernel",
+    "init_nvfp4_linear_kernel",
     "choose_mp_linear_kernel",
+    "register_linear_kernel",
+    "init_wfp8_a16_linear_kernel",
     "FP8ScaledMMLinearKernel",
     "Int8ScaledMMLinearKernel",
     "ScaledMMLinearKernel",
     "FP8ScaledMMLinearLayerConfig",
     "Int8ScaledMMLinearLayerConfig",
     "ScaledMMLinearLayerConfig",
+    "AiterPreshuffledPerTokenFp8ScaledMMLinearKernel",
+    "AiterPerTokenFp8ScaledMMLinearKernel",
+    "NvFp4LinearKernel",
+    "NvFp4LinearLayerConfig",
     "AiterInt8ScaledMMLinearKernel",
     "CPUInt8ScaledMMLinearKernel",
     "CutlassFP8ScaledMMLinearKernel",
@@ -397,6 +1028,30 @@ def choose_mp_linear_kernel(
     "ExllamaLinearKernel",
     "MacheteLinearKernel",
     "MarlinLinearKernel",
+    "TritonW4A16LinearKernel",
     "XPUW4A8IntLinearKernel",
     "XPUwNa16LinearKernel",
+    "init_mxfp8_linear_kernel",
+    "Mxfp8LinearKernel",
+    "Mxfp8LinearLayerConfig",
+    "init_mxfp4_linear_kernel",
+    "MxFp4LinearKernel",
+    "MxFp4LinearLayerConfig",
+    "FlashInferMxFp4LinearKernel",
+    "MarlinMxFp4LinearKernel",
+    "FlashInferCutlassMxfp8LinearKernel",
+    "MarlinMxfp8LinearKernel",
+    "XPUMxFp8LinearKernel",
+    "EmulationMxfp8LinearKernel",
+    "CutlassNvFp4LinearKernel",
+    "EmulationNvFp4LinearKernel",
+    "FbgemmNvFp4LinearKernel",
+    "FlashInferB12xNvFp4LinearKernel",
+    "FlashInferCutlassNvFp4LinearKernel",
+    "FlashInferTrtllmNvFp4LinearKernel",
+    "FlashInferCudnnNvFp4LinearKernel",
+    "MarlinNvFp4LinearKernel",
+    "_KernelT",
+    "DeepGemmFp8BlockScaledMMKernel",
+    "FlashInferFp8DeepGEMMDynamicBlockScaledKernel",
 ]
diff --git a/vllm/model_executor/kernels/linear/base.py b/vllm/model_executor/kernels/linear/base.py
new file mode 100644
index 000000000000..4e9b89bb3ff1
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/base.py
@@ -0,0 +1,324 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, ClassVar, Generic, TypeVar
+
+import torch
+from typing_extensions import Self
+
+
+@dataclass
+class MMLinearLayerConfig: ...
+
+
+@dataclass
+class Params:
+    """Base class for quantized layer parameters.
+
+    This class provides a typed interface for accessing quantized weights and scales
+    from layer modules. It serves as a parameter container that can be extracted from
+    layers and passed to kernel implementations.
+
+    Attributes:
+        weight: The quantized weight tensor
+        weight_scale: weight scaling factors
+        input_scale: Optional input scaling factors
+
+    Class Variables:
+        WEIGHT: Attribute name for weight tensor on the layer module
+        WEIGHT_SCALE: Attribute name for weight scale tensor on the layer module
+        INPUT_SCALE: Attribute name for input scale tensor on the layer module
+
+    Important:
+        The string values of WEIGHT, WEIGHT_SCALE, and INPUT_SCALE class variables
+        MUST match the attribute names used in the corresponding quantization method's
+        create_weights() implementation.
+        For example, if FP8LinearMethod.create_weights()
+        sets layer.weight and layer.weight_scale,
+        then WEIGHT="weight" and
+        WEIGHT_SCALE="weight_scale" must be used here.
+
+    Usage:
+        ```python
+        # Extract parameters from a quantized layer
+        params = Params.from_layer(layer)
+
+        # Access typed parameters
+        output = func(input, params.weight, params.weight_scale)
+        ```
+    """
+
+    weight: torch.Tensor
+    weight_scale: torch.Tensor
+    input_scale: torch.Tensor | None
+
+    # Attribute names on the layer
+    WEIGHT: ClassVar[str] = "weight"
+    WEIGHT_SCALE: ClassVar[str] = "weight_scale"
+    INPUT_SCALE: ClassVar[str] = "input_scale"
+
+    @classmethod
+    def from_layer(cls, layer: torch.nn.Module) -> Self:
+        return cls(
+            weight=getattr(layer, cls.WEIGHT),
+            weight_scale=getattr(layer, cls.WEIGHT_SCALE),
+            input_scale=getattr(layer, cls.INPUT_SCALE, None),
+        )
+
+
+@dataclass
+class FP8Params(Params):
+    """FP8 layer parameters with typed fields"""
+
+    input_scale_ub: torch.Tensor | None
+
+    INPUT_SCALE_UB: ClassVar[str] = "input_scale_ub"
+
+    @classmethod
+    def from_layer(cls, layer: torch.nn.Module) -> "FP8Params":
+        """Extract parameters from layer"""
+        return cls(
+            weight=getattr(layer, cls.WEIGHT),
+            weight_scale=getattr(layer, cls.WEIGHT_SCALE),
+            input_scale=getattr(layer, cls.INPUT_SCALE, None),
+            input_scale_ub=getattr(layer, cls.INPUT_SCALE_UB, None),
+        )
+
+
+@dataclass
+class Int8Params(Params):
+    """Int8 layer parameters with typed fields"""
+
+    input_zero_point: torch.Tensor | None
+    azp_adj: torch.Tensor | None
+
+    INPUT_ZERO_POINT: ClassVar[str] = "input_zero_point"
+    AZP_ADJ: ClassVar[str] = "azp_adj"
+
+    @classmethod
+    def from_layer(cls, layer: torch.nn.Module) -> "Int8Params":
+        """Extract parameters from layer"""
+        return cls(
+            weight=getattr(layer, cls.WEIGHT),
+            weight_scale=getattr(layer, cls.WEIGHT_SCALE),
+            input_scale=getattr(layer, cls.INPUT_SCALE, None),
+            input_zero_point=getattr(layer, cls.INPUT_ZERO_POINT, None),
+            azp_adj=getattr(layer, cls.AZP_ADJ, None),
+        )
+
+
+_ParamsT = TypeVar("_ParamsT", bound=Params)
+_ConfigT = TypeVar("_ConfigT", bound=MMLinearLayerConfig)
+
+
+class MMLinearKernel(ABC, Generic[_ConfigT, _ParamsT]):
+    """Abstract base class for quantized matrix multiplication kernels.
+
+    This class provides the interface for implementing custom quantized linear layer
+    kernels in vLLM. Subclasses should implement specific quantization strategies
+    (e.g., FP8, INT8) and their corresponding compute kernels.
+
+    Generic Type Parameters:
+        _ConfigT: Configuration type for the kernel (subclass of MMLinearLayerConfig).
+                  Contains kernel-specific settings like quantization keys, dtypes, etc.
+        _ParamsT: Parameter type for the kernel (subclass of Params).
+                  Defines the quantized weights and scales needed by the kernel.
+
+    Typical Usage:
+        1. Define a config dataclass inheriting from MMLinearLayerConfig
+        2. Define a params dataclass inheriting from Params (or FP8Params/Int8Params)
+        3. Subclass MMLinearKernel with your config and params types
+        4. Implement all abstract methods
+        5. Register the kernel with the quantization method
+
+    Example:
+        ```python
+        @dataclass
+        class MyKernelConfig(MMLinearLayerConfig):
+            static: bool
+            output_dtype: torch.dtype
+
+
+        @dataclass
+        class MyKernelParams(FP8Params):
+            custom_scale: torch.Tensor
+            CUSTOM_SCALE: ClassVar[str] = "custom_scale"
+
+
+        class MyKernel(MMLinearKernel[MyKernelConfig, MyKernelParams]):
+            @classmethod
+            def is_supported(cls, compute_capability=None):
+                if compute_capability and compute_capability < 90:
+                    return False, "Requires compute capability >= 9.0"
+                return True, None
+
+            @classmethod
+            def can_implement(cls, config):
+                if not config.static:
+                    return False, "Only static quantization supported"
+                return True, None
+
+            def process_weights_after_loading(self, layer):
+                # Preprocess weights for the kernel
+                params = self._get_layer_params(layer)
+                processed = preprocess_weights(params.weight)
+                replace_parameter(layer, params.WEIGHT, processed)
+
+            def _get_layer_params(self, layer, **kwargs):
+                return MyKernelParams.from_layer(layer)
+
+            def apply_weights(self, layer, x, bias=None, **kwargs):
+                params = self._get_layer_params(layer)
+                # Call your custom kernel
+                output = my_custom_kernel(x, params.weight, params.weight_scale)
+                if bias is not None:
+                    output += bias
+                return output
+        ```
+
+    Lifecycle:
+        1. Kernel selection: is_supported() and can_implement() check compatibility
+        2. Initialization: __init__() creates kernel instance with config
+        3. Weight loading: process_weights_after_loading() preprocesses weights
+        4. Inference: apply_weights() executes the quantized matmul
+    """
+
+    @classmethod
+    @abstractmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        """Check if this kernel is supported on the current hardware.
+
+        This method checks hardware-level compatibility (e.g., GPU architecture,
+        compute capability, available instructions). It's called during kernel
+        selection to filter out kernels that cannot run on the current device.
+
+        Args:
+            compute_capability: GPU compute capability (e.g., 80 for A100, 90 for H100).
+                               If None, should check the current device.
+
+        Returns:
+            A tuple of (is_supported, reason):
+                - is_supported: True if the kernel can run on this hardware
+                - reason: If not supported, a string explaining why; otherwise None
+        """
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def can_implement(cls, config: _ConfigT) -> tuple[bool, str | None]:
+        """Check if this kernel can implement the given configuration.
+
+        This method checks configuration-level compatibility (e.g., quantization
+        scheme, group sizes, static vs dynamic quantization). It's called after
+        is_supported() to determine if this kernel can handle the specific
+        quantization configuration.
+
+        Args:
+            config: The kernel configuration to check
+
+        Returns:
+            A tuple of (can_implement, reason):
+                - can_implement: True if this kernel supports the config
+                - reason: If not supported, a string explaining why; otherwise None
+            ```
+        """
+        raise NotImplementedError
+
+    def __init__(self, config: _ConfigT) -> None:
+        """Initialize the kernel with the given configuration.
+
+        Args:
+            config: Kernel-specific configuration containing settings like
+                   quantization keys, output dtypes, etc.
+        """
+        self.config = config
+
+    @abstractmethod
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        """Process and transform weights after loading from checkpoint.
+
+        This method is called once after weights are loaded but before inference.
+        Use it to preprocess weights into the format required by your kernel
+        (e.g., reordering, padding, format conversion).
+
+        Modifications should be done in-place using replace_parameter() to ensure
+        the layer's parameters are properly updated.
+
+        Args:
+            layer: The layer module containing the weights to process
+
+        Example:
+            ```python
+            def process_weights_after_loading(self, layer):
+                params = self._get_layer_params(layer)
+                # Reorder weights for better memory access
+                weight_reordered = reorder_weights(params.weight)
+                replace_parameter(layer, params.WEIGHT, weight_reordered)
+            ```
+        """
+        raise NotImplementedError
+
+    # return a covariant type in the subclass
+    @abstractmethod
+    def _get_layer_params(self, layer: torch.nn.Module, **kwargs: Any) -> _ParamsT:
+        """Extract typed parameters from the layer module.
+
+        This internal method retrieves the quantized weights and scales from
+        the layer as a typed parameter object. Subclasses should typically
+        delegate to ParamsClass.from_layer().
+
+        Args:
+            layer: The layer module containing the parameters
+            **kwargs: Additional arguments
+
+        Returns:
+            A typed parameter object containing weights, scales, and other
+            quantization parameters
+
+        Example:
+            ```python
+            def _get_layer_params(self, layer, **kwargs):
+                return MyKernelParams.from_layer(layer)
+            ```
+        """
+        raise NotImplementedError
+
+    def get_output_padding(self) -> int | None:
+        """Get the number of output tokens to pad for this kernel.
+
+        Some kernels require input padding for optimal performance.
+        Override this method to specify padding requirements.
+
+        Returns:
+            Number of tokens to pad, or None for no padding (default)
+        """
+        return None
+
+    @abstractmethod
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+        **kwargs: Any,
+    ) -> torch.Tensor:
+        """Apply the quantized weights to the input tensor.
+
+        This is the main inference method that performs the quantized matrix
+        multiplication. It should handle input quantization (if needed), call
+        the underlying kernel, and apply bias.
+
+        Args:
+            layer: The layer module containing the quantized weights
+            x: Input tensor of shape [..., in_features]
+            bias: Optional bias tensor of shape [out_features]
+            **kwargs: Additional kernel-specific arguments
+
+        Returns:
+            Output tensor of shape [..., out_features]
+        """
+        raise NotImplementedError
diff --git a/vllm/model_executor/kernels/linear/mixed_precision/MPLinearKernel.py b/vllm/model_executor/kernels/linear/mixed_precision/MPLinearKernel.py
index 7aeb1f86c279..b6fc82e87ae7 100644
--- a/vllm/model_executor/kernels/linear/mixed_precision/MPLinearKernel.py
+++ b/vllm/model_executor/kernels/linear/mixed_precision/MPLinearKernel.py
@@ -50,8 +50,8 @@ def __init__(
             assert w_zp_param_name is not None
         if c.has_g_idx:
             assert w_gidx_param_name is not None
-        self.w_zp_name = w_zp_param_name
-        self.w_gidx_name = w_gidx_param_name
+        self.w_zp_name: str | None = w_zp_param_name
+        self.w_gidx_name: str | None = w_gidx_param_name
 
     @abstractmethod
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
diff --git a/vllm/model_executor/kernels/linear/mixed_precision/__init__.py b/vllm/model_executor/kernels/linear/mixed_precision/__init__.py
index 6c144a5ec8a8..4d659b360425 100644
--- a/vllm/model_executor/kernels/linear/mixed_precision/__init__.py
+++ b/vllm/model_executor/kernels/linear/mixed_precision/__init__.py
@@ -29,6 +29,9 @@
     MPLinearKernel,
     MPLinearLayerConfig,
 )
+from vllm.model_executor.kernels.linear.mixed_precision.triton_w4a16 import (
+    TritonW4A16LinearKernel,
+)
 from vllm.model_executor.kernels.linear.mixed_precision.xpu import (
     XPUW4A8IntLinearKernel,
     XPUwNa16LinearKernel,
@@ -45,6 +48,7 @@
     "ExllamaLinearKernel",
     "MacheteLinearKernel",
     "MarlinLinearKernel",
+    "TritonW4A16LinearKernel",
     "XPUW4A8IntLinearKernel",
     "XPUwNa16LinearKernel",
 ]
diff --git a/vllm/model_executor/kernels/linear/mixed_precision/triton_w4a16.py b/vllm/model_executor/kernels/linear/mixed_precision/triton_w4a16.py
new file mode 100644
index 000000000000..5cc100b3e1e3
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mixed_precision/triton_w4a16.py
@@ -0,0 +1,438 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Triton-based W4A16 GEMM kernel for ROCm MI300.
+
+Implements fused int4-weight dequantization + fp16 GEMM in a single kernel,
+using GPTQ sequential packing (8 int4 values per int32, shifts [0,4,...,28]).
+Plugs into the MPLinearKernel selection system and is preferred over
+MarlinLinearKernel/ExllamaLinearKernel on ROCm.
+
+Weight layout expected by this kernel (post-process_weights_after_loading):
+  qweight: [K, N//8]  int32  — rows=K (input), cols=N//8 (N is packed)
+  scales:  [K//G, N]  fp16/bf16
+  qzeros:  [K//G, N//8]  int32  (optional; None for symmetric uint4b8)
+
+Checkpoint layout from compressed_tensors_wNa16 create_weights:
+  weight_packed:     [N, K//8]  int32  (output_dim=0, input_dim=1, packed_dim=1)
+  weight_scale:      [N, K//G]  fp16   (output_dim=0, input_dim=1)
+  weight_zero_point: [N//8, K//G]  int32 (output_dim=0, packed_dim=0)
+"""
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils import replace_parameter
+from vllm.model_executor.parameter import BasevLLMParameter, permute_param_layout_
+from vllm.platforms import current_platform
+from vllm.scalar_type import scalar_types
+from vllm.triton_utils import tl, triton
+
+from .MPLinearKernel import MPLinearKernel, MPLinearLayerConfig
+
+TRITON_W4A16_SUPPORTED_GROUP_SIZES = [-1, 32, 64, 128, 256]
+TRITON_W4A16_SUPPORTED_QUANT_TYPES = [
+    scalar_types.uint4b8,  # symmetric GPTQ (bias=8)
+    scalar_types.uint4,  # asymmetric with explicit zeros
+]
+
+
+@triton.jit
+def triton_w4a16_gemm_kernel(
+    # Pointers
+    a_ptr,  # [M, K]  fp16/bf16 activations
+    b_ptr,  # [K, N//8]  int32 packed 4-bit weights (N is the packed dim)
+    scales_ptr,  # [K//G, N]  fp16/bf16 scales
+    zeros_ptr,  # [K//G, N//8]  int32 packed zeros (unused when HAS_ZP=False)
+    c_ptr,  # [M, N]  fp16/bf16 output
+    # Dimensions
+    M,
+    N,
+    K,
+    # Strides
+    stride_am,
+    stride_ak,
+    stride_bk,
+    stride_bn,  # stride in b along the packed N//8 dim
+    stride_cm,
+    stride_cn,
+    # Quantization parameters
+    group_size,
+    # Whether explicit zero points are provided
+    HAS_ZP: tl.constexpr,
+    # Zero bias used when HAS_ZP is False (e.g. 8 for uint4b8)
+    ZP_BIAS: tl.constexpr,
+    # Block sizes (tuned for MI300 wavefront=64)
+    BLOCK_M: tl.constexpr,
+    BLOCK_N: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+):
+    """
+    Fused W4A16 GEMM: C[M,N] = A[M,K] @ dequant(B)[K,N]
+
+    B is stored as [K, N//8] int32 using GPTQ sequential packing:
+      each int32 packs 8 consecutive N-values at bit offsets [0,4,8,12,16,20,24,28].
+
+    Dequant: w_fp = (w_int4 - zero) * scale
+      HAS_ZP=True:  zero is loaded from zeros_ptr and unpacked
+      HAS_ZP=False: zero = ZP_BIAS constant (e.g. 8 for uint4b8 symmetric)
+    """
+    pid_m = tl.program_id(0)
+    pid_n = tl.program_id(1)
+
+    # Row/col offsets for this tile
+    offs_m = pid_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    offs_n = pid_n * BLOCK_N + tl.arange(0, BLOCK_N)
+
+    # b/zeros are stored with N packed: N//8 int32 columns per K row
+    offs_bn = pid_n * (BLOCK_N // 8) + tl.arange(0, BLOCK_N // 8)
+
+    # GPTQ sequential shifts tiled across BLOCK_N:
+    #   [0,4,8,...,28] repeating for every group of 8 N-values.
+    # Build 1D shifts_1d of length BLOCK_N: column j gets shift (j % 8) * 4.
+    shifts_row = tl.arange(0, 8) * 4  # [8]
+    shifts_1d_2d = tl.broadcast_to(shifts_row[None, :], (BLOCK_N // 8, 8))
+    shifts_1d = tl.reshape(shifts_1d_2d, (BLOCK_N,))  # [BLOCK_N]
+    # Broadcast to [BLOCK_K, BLOCK_N] for weight unpacking
+    shifts = tl.broadcast_to(shifts_1d[None, :], (BLOCK_K, BLOCK_N))
+
+    # Scales column offsets: full N-width (one scale per output neuron)
+    offs_sn = pid_n * BLOCK_N + tl.arange(0, BLOCK_N)
+
+    accumulator = tl.zeros((BLOCK_M, BLOCK_N), dtype=tl.float32)
+
+    for k_start in range(0, tl.cdiv(K, BLOCK_K)):
+        offs_k = k_start * BLOCK_K + tl.arange(0, BLOCK_K)
+        mask_k = offs_k < K
+
+        # ---- Load activations A: [BLOCK_M, BLOCK_K] ----
+        a_ptrs = a_ptr + offs_m[:, None] * stride_am + offs_k[None, :] * stride_ak
+        mask_a = (offs_m[:, None] < M) & mask_k[None, :]
+        a = tl.load(a_ptrs, mask=mask_a, other=0.0)
+
+        # ---- Load packed weights B: [BLOCK_K, BLOCK_N//8] int32 ----
+        b_ptrs = b_ptr + offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn
+        mask_b = mask_k[:, None] & (offs_bn[None, :] < N // 8)
+        b_packed = tl.load(b_ptrs, mask=mask_b, other=0)
+
+        # ---- Unpack int4 weights → [BLOCK_K, BLOCK_N] ----
+        # tl.interleave(x, x) doubles the last dim by interleaving.
+        # Starting from [BLOCK_K, BLOCK_N//8], three interleaves give
+        # [BLOCK_K, BLOCK_N], where each int32 is replicated 8 times.
+        b = tl.interleave(b_packed, b_packed)
+        b = tl.interleave(b, b)
+        b = tl.interleave(b, b)
+        # Extract the correct 4-bit nibble for each output column
+        b = (b >> shifts) & 0xF
+
+        # ---- Compute scale/zero group row index ----
+        g_idx = (k_start * BLOCK_K) // group_size
+
+        # ---- Load scales: [BLOCK_N] → broadcast to [BLOCK_K, BLOCK_N] ----
+        scale_offset = g_idx * N + offs_sn
+        scale_mask = offs_sn < N
+        scales = tl.load(scales_ptr + scale_offset, mask=scale_mask, other=1.0)
+        scales = tl.broadcast_to(scales[None, :], (BLOCK_K, BLOCK_N))
+
+        # ---- Load / compute zeros ----
+        if HAS_ZP:
+            # Load packed zeros row: [BLOCK_N//8] int32
+            zero_offset = g_idx * (N // 8) + offs_bn
+            zero_mask = offs_bn < N // 8
+            z_packed = tl.load(zeros_ptr + zero_offset, mask=zero_mask, other=0)
+            # Unpack to [BLOCK_N] using same interleave+shift pattern
+            z = tl.interleave(z_packed, z_packed)
+            z = tl.interleave(z, z)
+            z = tl.interleave(z, z)
+            z = (z >> shifts_1d) & 0xF
+            z = tl.broadcast_to(z[None, :], (BLOCK_K, BLOCK_N))
+        else:
+            z = tl.full((BLOCK_K, BLOCK_N), ZP_BIAS, dtype=tl.int32)
+
+        # ---- Dequantize: (w - zero) * scale ----
+        b_fp = (b - z).to(a.dtype) * scales
+
+        # ---- Accumulate ----
+        accumulator += tl.dot(a, b_fp, out_dtype=tl.float32)
+
+    # ---- Store output C: [BLOCK_M, BLOCK_N] ----
+    c = accumulator.to(c_ptr.type.element_ty)
+    c_ptrs = c_ptr + offs_m[:, None] * stride_cm + offs_n[None, :] * stride_cn
+    mask_c = (offs_m[:, None] < M) & (offs_n[None, :] < N)
+    tl.store(c_ptrs, c, mask=mask_c)
+
+
+def triton_w4a16_gemm(
+    a: torch.Tensor,  # [M, K] fp16/bf16
+    b_q: torch.Tensor,  # [K, N//8] int32
+    scales: torch.Tensor,  # [K//G, N] fp16/bf16
+    qzeros: torch.Tensor | None,  # [K//G, N//8] int32, or None
+    group_size: int,
+    zp_bias: int = 8,  # bias for uint4b8 when qzeros is None
+) -> torch.Tensor:
+    """
+    Fused W4A16 GEMM using GPTQ-packed int4 weights.
+
+    Args:
+        a:          Activation matrix [M, K], float16 or bfloat16.
+        b_q:        Packed weight matrix [K, N//8], int32 (GPTQ sequential).
+        scales:     Per-group scales [K//G, N], same dtype as a.
+        qzeros:     Per-group packed zero points [K//G, N//8] int32, or None
+                    for symmetric quantization (uses zp_bias instead).
+        group_size: Quantization group size (resolved from -1 to K by caller).
+        zp_bias:    Constant zero used when qzeros is None (default 8 for uint4b8).
+
+    Returns:
+        Output matrix [M, N], same dtype as a.
+    """
+    assert a.is_contiguous(), "Activation matrix must be contiguous"
+    assert b_q.is_contiguous(), "Weight matrix must be contiguous"
+    assert scales.is_contiguous(), "Scales must be contiguous"
+
+    M, K = a.shape
+    N = b_q.shape[1] * 8
+
+    assert b_q.shape == (K, N // 8), (
+        f"b_q shape mismatch: {b_q.shape} vs ({K}, {N // 8})"
+    )
+    assert scales.shape == (K // group_size, N), (
+        f"scales shape mismatch: {scales.shape} vs ({K // group_size}, {N})"
+    )
+    if qzeros is not None:
+        assert qzeros.shape == (K // group_size, N // 8), (
+            f"qzeros shape mismatch: {qzeros.shape}"
+        )
+
+    c = torch.empty((M, N), dtype=a.dtype, device=a.device)
+
+    has_zp = qzeros is not None
+    # Provide a dummy pointer when HAS_ZP=False (Triton requires a valid ptr)
+    zeros_ptr = qzeros if has_zp else b_q
+
+    if current_platform.is_rocm():
+        from vllm.platforms.rocm import on_gfx1x
+
+        if on_gfx1x():
+            # Tuned for RDNA 3.5 (gfx1151, 40 CUs, 32-wide wavefronts).
+            if M <= 32:
+                BLOCK_M, BLOCK_N, BLOCK_K = 32, 32, 64
+            elif M <= 64:
+                BLOCK_M, BLOCK_N, BLOCK_K = 64, 64, 32
+            else:
+                BLOCK_M, BLOCK_N, BLOCK_K = 128, 32, 64
+        else:
+            # Tuned for MI300 (gfx942, 304 CUs, 64-wide wavefronts).
+            if M <= 32:
+                BLOCK_M, BLOCK_N, BLOCK_K = 32, 64, 32
+            elif M <= 64:
+                BLOCK_M, BLOCK_N, BLOCK_K = 64, 64, 32
+            else:
+                BLOCK_M, BLOCK_N, BLOCK_K = 128, 128, 32
+    else:
+        if M <= 32:
+            BLOCK_M, BLOCK_N, BLOCK_K = 32, 64, 32
+        elif M <= 64:
+            BLOCK_M, BLOCK_N, BLOCK_K = 64, 64, 32
+        else:
+            BLOCK_M, BLOCK_N, BLOCK_K = 128, 128, 32
+
+    # The kernel loads scales/zeros for a single group per BLOCK_K tile
+    # (one g_idx per iteration). If BLOCK_K > group_size, rows at the tail
+    # of the tile dequantize with the wrong group's scales, silently
+    # corrupting the output. Clamp BLOCK_K to group_size to keep one
+    # scale group per tile.
+    if group_size < BLOCK_K:
+        BLOCK_K = group_size
+
+    grid = (triton.cdiv(M, BLOCK_M), triton.cdiv(N, BLOCK_N))
+
+    triton_w4a16_gemm_kernel[grid](
+        a,
+        b_q,
+        scales,
+        zeros_ptr,
+        c,
+        M,
+        N,
+        K,
+        a.stride(0),
+        a.stride(1),
+        b_q.stride(0),
+        b_q.stride(1),
+        c.stride(0),
+        c.stride(1),
+        group_size=group_size,
+        HAS_ZP=has_zp,
+        ZP_BIAS=zp_bias,
+        BLOCK_M=BLOCK_M,
+        BLOCK_N=BLOCK_N,
+        BLOCK_K=BLOCK_K,
+    )
+    return c
+
+
+class TritonW4A16LinearKernel(MPLinearKernel):
+    """
+    Triton-based W4A16 GEMM kernel for ROCm (MI300 and newer).
+
+    Supports GPTQ-format int4 weights (uint4b8 symmetric, uint4 asymmetric)
+    with grouped quantization. Weight tensors are transposed from the
+    compressed-tensors checkpoint layout to the kernel's [K, N//8] layout.
+    """
+
+    SUPPORTED_QUANT_TYPES = TRITON_W4A16_SUPPORTED_QUANT_TYPES
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        # Triton handles capability checks itself
+        return 0
+
+    @classmethod
+    def can_implement(cls, c: MPLinearLayerConfig) -> tuple[bool, str | None]:
+        if not current_platform.is_rocm():
+            return False, "TritonW4A16LinearKernel only targets ROCm"
+
+        if c.weight_type not in cls.SUPPORTED_QUANT_TYPES:
+            return (
+                False,
+                f"Quant type {c.weight_type} not supported; "
+                f"supported: {cls.SUPPORTED_QUANT_TYPES}",
+            )
+
+        if c.act_type not in (torch.float16, torch.bfloat16):
+            return False, "Only float16/bfloat16 activations are supported"
+
+        N = c.partition_weight_shape[1]
+        if N % 8 != 0:
+            return (
+                False,
+                f"Output features ({N}) must be divisible by 8 "
+                "(8 int4 values packed per int32)",
+            )
+
+        if c.has_g_idx:
+            return (
+                False,
+                "Activation reordering (g_idx) is not supported by "
+                "TritonW4A16LinearKernel",
+            )
+
+        gs = c.group_size
+        if (
+            gs not in TRITON_W4A16_SUPPORTED_GROUP_SIZES
+            and gs != c.full_weight_shape[0]
+        ):
+            return (
+                False,
+                f"Group size {gs} not supported; "
+                f"supported: {TRITON_W4A16_SUPPORTED_GROUP_SIZES} "
+                f"or full K ({c.full_weight_shape[0]})",
+            )
+
+        K = c.partition_weight_shape[0]
+        eff_gs = gs if gs != -1 else K
+        if K % eff_gs != 0:
+            return (False, f"Input features {K} not divisible by group size {eff_gs}")
+
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        """
+        Convert compressed-tensors checkpoint layout to kernel layout.
+
+        Checkpoint (from compressed_tensors_wNa16.create_weights):
+          weight_packed:     [N, K//8]  int32   input_dim=1, output_dim=0, packed_dim=1
+          weight_scale:      [N, K//G]  fp16    input_dim=1, output_dim=0
+          weight_zero_point: [N//8, K//G] int32  output_dim=0, packed_dim=0
+
+        Kernel needs:
+          qweight: [K, N//8]  int32   (transpose weight_packed)
+          scales:  [K//G, N]  fp16    (transpose weight_scale)
+          qzeros:  [K//G, N//8] int32 (transpose weight_zero_point)
+        """
+
+        # ---- Transform qweight: [N, K//8] → [K//8, N] → back to [K, N//8] ----
+        # permute_param_layout_(x, input_dim=0, output_dim=1) rearranges so that
+        # the input(K) dimension is at physical dim 0 and output(N) at dim 1.
+        # Checkpoint has input_dim=1, output_dim=0, packed_dim=1 (K is packed).
+        # After permute we get [K//8, N] (K packed at dim 0, N at dim 1).
+        # The kernel wants [K, N//8] (K at dim 0, N packed at dim 1), so we
+        # then transpose: [K//8, N].T = [N, K//8] — that's not right.
+        #
+        # Actually we need to change WHAT is packed:
+        #   Original packing: K packed into K//8 (8 K-values per int32)
+        #   Kernel packing:   N packed into N//8 (8 N-values per int32)
+        # These require a full repack, not just a transpose.
+        #
+        # Simple approach: unpack → transpose the full [N, K] → repack as [K, N//8].
+        # This is done CPU-side at load time (one-time cost).
+        def repack_w_q(x: BasevLLMParameter) -> BasevLLMParameter:
+            # x.data is [N, K//8] int32, K packed (GPTQ checkpoint format)
+            # Step 1: bring to [N, K//8] with output(N) at dim 0
+            permute_param_layout_(x, input_dim=1, output_dim=0, packed_dim=1)
+            w = x.data  # [N, K//8] int32
+
+            N_dim, K8 = w.shape
+            K_dim = K8 * 8
+            # Step 2: unpack to [N, K] int32 (vectorized)
+            shifts = torch.arange(8, device=w.device, dtype=torch.int32) * 4
+            w_unpacked = ((w.unsqueeze(-1) >> shifts) & 0xF).reshape(N_dim, K_dim)
+            # Step 3: transpose to [K, N] int32
+            w_KN = w_unpacked.t().contiguous()
+            # Step 4: repack N into N//8 int32 values → [K, N//8] (vectorized)
+            N8 = N_dim // 8
+            w_repacked = torch.sum(
+                (w_KN.view(K_dim, N8, 8) & 0xF) << shifts,
+                dim=2,
+                dtype=torch.int32,
+            )
+            x.data = w_repacked.contiguous()
+            return x
+
+        def repack_w_s(x: BasevLLMParameter) -> BasevLLMParameter:
+            # x.data is [N, K//G] fp16, bring to [K//G, N]
+            permute_param_layout_(x, input_dim=1, output_dim=0)
+            x.data = x.data.t().contiguous()
+            return x
+
+        self._transform_param(layer, self.w_q_name, repack_w_q)
+        self._transform_param(layer, self.w_s_name, repack_w_s)
+
+        if self.w_zp_name is not None:
+            zp = getattr(layer, self.w_zp_name, None)
+            if zp is not None:
+                # Checkpoint: [N//8, K//G] int32 (N packed at dim 0, K//G at dim 1)
+                # Kernel needs: [K//G, N//8] — just transpose
+                replace_parameter(
+                    layer,
+                    self.w_zp_name,
+                    torch.nn.Parameter(zp.data.t().contiguous(), requires_grad=False),
+                )
+
+    def apply_weights(
+        self, layer: torch.nn.Module, x: torch.Tensor, bias: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        c = self.config
+        w_q, w_s, w_zp, _ = self._get_weight_params(layer)
+
+        x_2d = x.reshape(-1, x.shape[-1]).contiguous()
+        out_shape = x.shape[:-1] + (c.partition_weight_shape[1],)
+
+        K = c.partition_weight_shape[0]
+        group_size = c.group_size if c.group_size != -1 else K
+
+        # For symmetric types (uint4b8), use the scalar bias; no zeros tensor
+        zp_bias = c.weight_type.bias if c.weight_type.has_bias() else 0
+
+        output = triton_w4a16_gemm(
+            a=x_2d,
+            b_q=w_q,
+            scales=w_s,
+            qzeros=w_zp,
+            group_size=group_size,
+            zp_bias=zp_bias,
+        )
+
+        if bias is not None:
+            output.add_(bias)
+
+        return output.reshape(out_shape)
diff --git a/vllm/model_executor/kernels/linear/mixed_precision/xpu.py b/vllm/model_executor/kernels/linear/mixed_precision/xpu.py
index 78fa7e83c194..68528bbd488d 100644
--- a/vllm/model_executor/kernels/linear/mixed_precision/xpu.py
+++ b/vllm/model_executor/kernels/linear/mixed_precision/xpu.py
@@ -61,17 +61,53 @@ def can_implement(cls, c: MPLinearLayerConfig) -> tuple[bool, str | None]:
         return True, None
 
     def process_weights_after_loading(self, layer: torch.nn.Module):
-        layer.weight_scale.data = layer.weight_scale.t().contiguous()
+        # Default names since marlin requires empty parameters for these,
+        # TODO: remove this requirement from marlin (allow optional tensors)
+        if self.w_gidx_name is None:
+            self.w_gidx_name = "g_idx"
+        if self.w_zp_name is None:
+            self.w_zp_name = "w_zp"
+
+        need_transpose = False
+        qweight_shape = getattr(layer, self.w_q_name).shape
+        scale_shape = getattr(layer, self.w_s_name).shape
+        # gptq marlin and compressed tensors wna16 expect different default
+        # layouts for weight and scale, so we check the shapes to determine
+        # if we need to transpose
+        if qweight_shape[0] != scale_shape[0]:
+            need_transpose = True
+
+        if need_transpose:
+            getattr(layer, self.w_q_name).data = (
+                getattr(layer, self.w_q_name).data.t().contiguous()
+            )
+            getattr(layer, self.w_s_name).data = getattr(layer, self.w_s_name).data
+        else:
+            getattr(layer, self.w_s_name).data = (
+                getattr(layer, self.w_s_name).data.t().contiguous()
+            )
 
         if self.config.zero_points:
-            layer.weight_zero_point.data = layer.weight_zero_point.t().contiguous()
+            # (FIXME): maybe zero points should also be transposed.
+            getattr(layer, self.w_zp_name).data = (
+                getattr(layer, self.w_zp_name).data.t().contiguous()
+            )
         else:
             weight_zero_point = torch.Tensor([8]).to(torch.int8).to("xpu")
-            layer.weight_zero_point = Parameter(weight_zero_point, requires_grad=False)
+            setattr(
+                layer, self.w_zp_name, Parameter(weight_zero_point, requires_grad=False)
+            )
         if self.config.has_g_idx:
-            layer.g_idx.data = layer.g_idx.t().contiguous()
+            setattr(
+                layer,
+                self.w_gidx_name,
+                Parameter(
+                    getattr(layer, self.w_gidx_name).data.t().contiguous(),
+                    requires_grad=False,
+                ),
+            )
         else:
-            layer.g_idx = None
+            setattr(layer, self.w_gidx_name, None)
 
     def apply_weights(
         self,
@@ -80,14 +116,15 @@ def apply_weights(
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
         reshaped_x = x.reshape(-1, x.shape[-1])
+        w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)
         out = torch.ops._xpu_C.int4_gemm_w4a16(
             reshaped_x,
-            layer.weight_packed.t(),
-            bias,
-            layer.weight_scale,
-            layer.weight_zero_point,
+            w_q.t(),
+            bias if bias is not None else None,
+            w_s,
+            w_zp,
             self.config.group_size,
-            layer.g_idx,
+            w_gidx,
         )
         return out
 
diff --git a/vllm/model_executor/kernels/linear/mxfp4/__init__.py b/vllm/model_executor/kernels/linear/mxfp4/__init__.py
new file mode 100644
index 000000000000..0927cd945f6f
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp4/__init__.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.model_executor.kernels.linear.mxfp4.base import (
+    MxFp4LinearKernel,
+    MxFp4LinearLayerConfig,
+)
+
+__all__ = [
+    "MxFp4LinearKernel",
+    "MxFp4LinearLayerConfig",
+]
diff --git a/vllm/model_executor/kernels/linear/mxfp4/base.py b/vllm/model_executor/kernels/linear/mxfp4/base.py
new file mode 100644
index 000000000000..868faa4731d5
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp4/base.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+import torch
+
+
+@dataclass
+class MxFp4LinearLayerConfig:
+    """Configuration for an MXFP4 linear layer.
+
+    All MXFP4 layers share the same structure: packed uint8 weights (2 FP4 values per
+    byte) and per-block weight scales (group size 32).
+    """
+
+    pass
+
+
+class MxFp4LinearKernel(ABC):
+    """Base class for MXFP4 quantized linear kernels.
+
+    Each subclass implements a specific GEMM backend (CUTLASS, Marlin, etc).
+    The kernel selection mechanism iterates over registered subclasses in
+    priority order,calling ``is_supported`` and ``can_implement`` to find the best
+    match for the current hardware.
+    """
+
+    def __init__(self, config: MxFp4LinearLayerConfig) -> None:
+        assert self.can_implement(config)[0]
+        assert self.is_supported()[0]
+        self.config = config
+
+    @classmethod
+    @abstractmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        """Return whether this kernel can run on the current platform."""
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def can_implement(cls, config: MxFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        """Return whether this kernel can handle *config*."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        """Transform weights into the format required by this kernel.
+
+        Called once after checkpoint weights have been loaded onto the
+        device.  Implementations should repack / swizzle / pad weights
+        and scales in-place on *layer*.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Run the quantized GEMM."""
+        raise NotImplementedError
diff --git a/vllm/model_executor/kernels/linear/mxfp4/flashinfer.py b/vllm/model_executor/kernels/linear/mxfp4/flashinfer.py
new file mode 100644
index 000000000000..8889986f05b9
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp4/flashinfer.py
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+from torch.nn.parameter import Parameter
+
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+    swizzle_mxfp4_scales,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import has_flashinfer_cutedsl
+
+from .base import MxFp4LinearKernel, MxFp4LinearLayerConfig
+
+_MXFP4_GROUP_SIZE = 32
+
+
+class FlashInferMxFp4LinearKernel(MxFp4LinearKernel):
+    """MXFP4 W4A4 GEMM via FlashInfer CUTLASS (SM100+)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if current_platform.has_device_capability(100) and has_flashinfer_cutedsl():
+            return True, None
+        return False, "FlashInfer + >=sm_100 (Blackwell) required"
+
+    @classmethod
+    def can_implement(cls, config: MxFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        N, scale_K = layer.weight_scale.shape
+        K = scale_K * _MXFP4_GROUP_SIZE
+
+        # swizzle pads N to the next multiple of 128 for CUTLASS tiling
+        padded_N = ((N + 127) // 128) * 128
+        layer.weight_scale = Parameter(
+            swizzle_mxfp4_scales(layer.weight_scale.data, N, K).reshape(padded_N, -1),
+            requires_grad=False,
+        )
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        from vllm.utils.flashinfer import (
+            flashinfer_mxfp4_quantize,
+            flashinfer_scaled_fp4_mm,
+        )
+
+        weight = layer.weight
+        out_shape = x.shape[:-1] + (layer.output_size_per_partition,)
+        x_2d = x.reshape(-1, x.shape[-1])
+
+        x_fp4, x_scale = flashinfer_mxfp4_quantize(x_2d)
+        out = flashinfer_scaled_fp4_mm(
+            x_fp4,
+            weight,
+            x_scale,
+            layer.weight_scale,
+            alpha=None,
+            out_dtype=x.dtype,
+            backend="cute-dsl",
+            block_size=_MXFP4_GROUP_SIZE,
+            use_nvfp4=False,
+        )
+
+        if bias is not None:
+            out = out + bias
+        return out.view(out_shape)
diff --git a/vllm/model_executor/kernels/linear/mxfp4/marlin.py b/vllm/model_executor/kernels/linear/mxfp4/marlin.py
new file mode 100644
index 000000000000..38440752072e
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp4/marlin.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from .base import MxFp4LinearKernel, MxFp4LinearLayerConfig
+
+
+class MarlinMxFp4LinearKernel(MxFp4LinearKernel):
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+            is_fp4_marlin_supported,
+        )
+
+        if is_fp4_marlin_supported():
+            return True, None
+        return False, "Marlin FP4 not available"
+
+    @classmethod
+    def can_implement(cls, c: MxFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+            prepare_fp4_layer_for_marlin,
+        )
+
+        prepare_fp4_layer_for_marlin(layer)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+            apply_fp4_marlin_linear,
+        )
+
+        return apply_fp4_marlin_linear(
+            input=x,
+            weight=layer.weight,
+            weight_scale=layer.weight_scale,
+            weight_global_scale=None,
+            workspace=layer.workspace,
+            size_n=layer.output_size_per_partition,
+            size_k=layer.input_size_per_partition,
+            bias=bias,
+        )
diff --git a/vllm/model_executor/kernels/linear/mxfp4/xpu.py b/vllm/model_executor/kernels/linear/mxfp4/xpu.py
new file mode 100644
index 000000000000..8d33939d2ed9
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp4/xpu.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils.mxfp4_utils import (
+    xpu_mxfp4_quantize as quant_mxfp4,
+)
+from vllm.model_executor.utils import replace_parameter
+from vllm.platforms import current_platform
+
+from .base import MxFp4LinearKernel, MxFp4LinearLayerConfig
+
+
+class XPUMxFp4LinearKernel(MxFp4LinearKernel):
+    """MXFP4 W4A4 GEMM on XPU."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if not current_platform.is_xpu():
+            return False, "XPUMxFp4 only support on XPU"
+        return True, None
+
+    @classmethod
+    def can_implement(cls, c: MxFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        weight = layer.weight.view(torch.float4_e2m1fn_x2)
+        replace_parameter(layer, "weight", weight.data.t())
+
+        weight_scale = layer.weight_scale.view(torch.float8_e8m0fnu)
+        weight_scale = weight_scale.t().contiguous()
+        replace_parameter(layer, "weight_scale", weight_scale.data)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        out_dtype = x.dtype
+        x_fp4, x_blockscale = quant_mxfp4(x)
+        return torch.ops._xpu_C.fp4_gemm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            out_dtype,
+            bias,
+        )
diff --git a/vllm/model_executor/kernels/linear/mxfp8/Mxfp8LinearKernel.py b/vllm/model_executor/kernels/linear/mxfp8/Mxfp8LinearKernel.py
new file mode 100644
index 000000000000..28c958d4fd42
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/Mxfp8LinearKernel.py
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+import torch
+
+
+@dataclass
+class Mxfp8LinearLayerConfig:
+    """Configuration for an MXFP8 linear layer.
+
+    All MXFP8 layers share the same structure: FP8-E4M3 weights with
+    uint8 (E8M0) per-block scales at block size 32.
+    """
+
+    pass
+
+
+class Mxfp8LinearKernel(ABC):
+    """Base class for MXFP8 quantized linear kernels.
+
+    Each subclass implements a specific GEMM backend (FlashInfer CUTLASS,
+    Marlin, emulation).
+    """
+
+    def __init__(self, c: Mxfp8LinearLayerConfig) -> None:
+        assert self.can_implement(c)[0]
+        assert self.is_supported()[0]
+        self.config = c
+
+    @classmethod
+    @abstractmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def can_implement(cls, c: Mxfp8LinearLayerConfig) -> tuple[bool, str | None]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        raise NotImplementedError
diff --git a/vllm/model_executor/kernels/linear/mxfp8/__init__.py b/vllm/model_executor/kernels/linear/mxfp8/__init__.py
new file mode 100644
index 000000000000..507aedee14cc
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/__init__.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.model_executor.kernels.linear.mxfp8.Mxfp8LinearKernel import (
+    Mxfp8LinearKernel,
+    Mxfp8LinearLayerConfig,
+)
+
+__all__ = [
+    "Mxfp8LinearKernel",
+    "Mxfp8LinearLayerConfig",
+]
diff --git a/vllm/model_executor/kernels/linear/mxfp8/emulation.py b/vllm/model_executor/kernels/linear/mxfp8/emulation.py
new file mode 100644
index 000000000000..a7cc29be758b
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/emulation.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+from torch.nn.parameter import Parameter
+
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    MXFP8_BLOCK_SIZE,
+    MXFP8_SCALE_DTYPE,
+    dequant_mxfp8_to_bf16,
+)
+
+from .Mxfp8LinearKernel import Mxfp8LinearKernel, Mxfp8LinearLayerConfig
+
+
+class EmulationMxfp8LinearKernel(Mxfp8LinearKernel):
+    """Software emulation fallback for MXFP8 (dequant to BF16)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        return True, None
+
+    @classmethod
+    def can_implement(cls, c: Mxfp8LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        weight = layer.weight.data  # [N, K]
+        N, K = weight.shape
+        scale_k = K // MXFP8_BLOCK_SIZE
+
+        weight_scale = layer.weight_scale.data[:N, :scale_k].contiguous()
+
+        layer.weight = Parameter(weight.contiguous(), requires_grad=False)
+        layer.weight_scale = Parameter(weight_scale, requires_grad=False)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        weight_scale = layer.weight_scale
+        if weight_scale.dtype != MXFP8_SCALE_DTYPE:
+            raise ValueError(
+                f"Emulation backend requires {MXFP8_SCALE_DTYPE} "
+                f"weight_scale dtype, got {weight_scale.dtype}."
+            )
+        if weight_scale.ndim != 2:
+            raise ValueError(
+                f"Emulation backend requires 2D weight_scale, "
+                f"got {weight_scale.ndim}D. "
+                f"Ensure process_weights_after_loading was called."
+            )
+
+        weight_bf16 = dequant_mxfp8_to_bf16(layer.weight, weight_scale)
+        output = torch.nn.functional.linear(x, weight_bf16, bias)
+        return output.to(x.dtype)
diff --git a/vllm/model_executor/kernels/linear/mxfp8/flashinfer.py b/vllm/model_executor/kernels/linear/mxfp8/flashinfer.py
new file mode 100644
index 000000000000..336da511ad8b
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/flashinfer.py
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+from torch.nn.parameter import Parameter
+
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    MXFP8_BLOCK_SIZE,
+    mxfp8_e4m3_quantize,
+    swizzle_mxfp8_scale,
+)
+from vllm.platforms import current_platform
+from vllm.utils import flashinfer as vllm_flashinfer
+
+from .Mxfp8LinearKernel import Mxfp8LinearKernel, Mxfp8LinearLayerConfig
+
+
+class FlashInferCutlassMxfp8LinearKernel(Mxfp8LinearKernel):
+    """MXFP8 W8A8 GEMM via FlashInfer CUTLASS (SM100+)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if current_platform.has_device_capability(100):
+            return True, None
+        return False, "requires >=sm_100 (Blackwell)"
+
+    @classmethod
+    def can_implement(cls, c: Mxfp8LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        weight = layer.weight.data  # [N, K]
+        N, K = weight.shape
+
+        scale_k = K // MXFP8_BLOCK_SIZE
+        weight_scale_2d = layer.weight_scale.data[:N, :scale_k].contiguous()
+        weight_scale_swizzled = swizzle_mxfp8_scale(weight_scale_2d, M=N, K=K)
+
+        layer.weight = Parameter(weight.contiguous(), requires_grad=False)
+        layer.weight_scale = Parameter(
+            weight_scale_swizzled.contiguous(), requires_grad=False
+        )
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        weight = layer.weight
+        weight_scale = layer.weight_scale
+        out_dtype = x.dtype
+        N, K = weight.shape
+
+        input_shape = x.shape
+        input_2d = x.view(-1, K)
+        M_orig = input_2d.shape[0]
+
+        min_dim = 128
+
+        assert min_dim <= K, (
+            f"mm_mxfp8 requires K >= {min_dim}, got K={K}. "
+            f"in_features is too small for mm_mxfp8."
+        )
+        assert K % MXFP8_BLOCK_SIZE == 0, (
+            f"mm_mxfp8 requires K to be divisible by {MXFP8_BLOCK_SIZE}, got K={K}."
+        )
+        assert min_dim <= N, (
+            f"mm_mxfp8 requires N >= {min_dim}, got N={N}. "
+            f"out_features is too small for mm_mxfp8."
+        )
+
+        M_padded = ((M_orig + min_dim - 1) // min_dim) * min_dim
+        if M_padded != M_orig:
+            pad_rows = M_padded - M_orig
+            input_2d = torch.nn.functional.pad(input_2d, (0, 0, 0, pad_rows))
+
+        input_mxfp8, input_scale = mxfp8_e4m3_quantize(
+            input_2d, is_sf_swizzled_layout=True
+        )
+
+        if not weight.is_contiguous():
+            weight = weight.contiguous()
+
+        output = vllm_flashinfer.mm_mxfp8(
+            input_mxfp8,
+            weight.t(),
+            input_scale,
+            weight_scale,
+            out_dtype=out_dtype,
+            backend="cutlass",
+        )
+
+        if M_padded != M_orig:
+            output = output[:M_orig, :]
+
+        if bias is not None:
+            output = output + bias
+
+        output_shape = (*input_shape[:-1], N)
+        return output.view(output_shape)
diff --git a/vllm/model_executor/kernels/linear/mxfp8/marlin.py b/vllm/model_executor/kernels/linear/mxfp8/marlin.py
new file mode 100644
index 000000000000..bec54cd942ed
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/marlin.py
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from .Mxfp8LinearKernel import Mxfp8LinearKernel, Mxfp8LinearLayerConfig
+
+
+class MarlinMxfp8LinearKernel(Mxfp8LinearKernel):
+    """MXFP8 W8A16 GEMM via Marlin (SM80+)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
+            is_fp8_marlin_supported,
+        )
+
+        if is_fp8_marlin_supported():
+            return True, None
+        return False, "Marlin FP8 not available"
+
+    @classmethod
+    def can_implement(cls, c: Mxfp8LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
+            prepare_mxfp8_layer_for_marlin,
+        )
+
+        prepare_mxfp8_layer_for_marlin(layer)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
+            apply_mxfp8_marlin_linear,
+        )
+
+        return apply_mxfp8_marlin_linear(
+            input=x,
+            weight=layer.weight,
+            weight_scale=layer.weight_scale,
+            workspace=layer.workspace,
+            size_n=layer.output_size_per_partition,
+            size_k=layer.input_size_per_partition,
+            bias=bias,
+        )
diff --git a/vllm/model_executor/kernels/linear/mxfp8/xpu.py b/vllm/model_executor/kernels/linear/mxfp8/xpu.py
new file mode 100644
index 000000000000..d64e175c2954
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/mxfp8/xpu.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    xpu_mxfp8_quantize as quant_mxfp8,
+)
+from vllm.model_executor.utils import replace_parameter
+from vllm.platforms import current_platform
+
+from .Mxfp8LinearKernel import Mxfp8LinearKernel, Mxfp8LinearLayerConfig
+
+
+class XPUMxFp8LinearKernel(Mxfp8LinearKernel):
+    """MXFP8 W8A8 GEMM on XPU."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if not current_platform.is_xpu():
+            return False, "XPUMxFp8 only support on XPU"
+        return True, None
+
+    @classmethod
+    def can_implement(cls, c: Mxfp8LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        weight_scale = layer.weight_scale.view(torch.float8_e8m0fnu)
+        weight_scale = weight_scale.t().contiguous()
+        replace_parameter(layer, "weight", layer.weight.t())
+        replace_parameter(layer, "weight_scale", weight_scale.data)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        out_dtype = x.dtype
+        x_fp8, x_scale = quant_mxfp8(x)
+        return torch.ops._xpu_C.fp8_gemm(
+            x_fp8,
+            layer.weight,
+            out_dtype,
+            x_scale,
+            layer.weight_scale,
+            bias,
+        )
diff --git a/vllm/model_executor/kernels/linear/nvfp4/__init__.py b/vllm/model_executor/kernels/linear/nvfp4/__init__.py
new file mode 100644
index 000000000000..de72584057d1
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/__init__.py
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.model_executor.kernels.linear.nvfp4.base import (
+    NvFp4LinearKernel,
+    NvFp4LinearLayerConfig,
+)
+
+__all__ = [
+    "NvFp4LinearKernel",
+    "NvFp4LinearLayerConfig",
+]
diff --git a/vllm/model_executor/kernels/linear/nvfp4/base.py b/vllm/model_executor/kernels/linear/nvfp4/base.py
new file mode 100644
index 000000000000..24e0aa308928
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/base.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+import torch
+
+
+@dataclass
+class NvFp4LinearLayerConfig:
+    """Configuration for an NVFP4 linear layer.
+
+    All NVFP4 layers share the same structure: packed uint8 weights (2 FP4 values per
+    byte), FP8-E4M3 per-block weight scales (group size 16), and scalar global
+    scales for both weights and activations.
+    """
+
+    pass
+
+
+class NvFp4LinearKernel(ABC):
+    """Base class for NVFP4 quantized linear kernels.
+
+    Each subclass implements a specific GEMM backend (CUTLASS, Marlin, etc).
+    The kernel selection mechanism iterates over registered subclasses in
+    priority order,calling ``is_supported`` and ``can_implement`` to find the best
+    match for the current hardware.
+    """
+
+    def __init__(self, config: NvFp4LinearLayerConfig) -> None:
+        assert self.can_implement(config)[0]
+        assert self.is_supported()[0]
+        self.config = config
+
+    @classmethod
+    @abstractmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        """Return whether this kernel can run on the current platform."""
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        """Return whether this kernel can handle *config*."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        """Transform weights into the format required by this kernel.
+
+        Called once after checkpoint weights have been loaded onto the
+        device.  Implementations should repack / swizzle / pad weights
+        and scales in-place on *layer*.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Run the quantized GEMM."""
+        raise NotImplementedError
diff --git a/vllm/model_executor/kernels/linear/nvfp4/cutlass.py b/vllm/model_executor/kernels/linear/nvfp4/cutlass.py
new file mode 100644
index 000000000000..106e92d912dc
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/cutlass.py
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm._custom_ops import (
+    cutlass_scaled_fp4_mm,
+    scaled_fp4_quant,
+)
+from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
+    cutlass_fp4_supported,
+    pad_nvfp4_weight_for_cutlass,
+    slice_nvfp4_output,
+    swizzle_blockscale,
+)
+
+from .base import NvFp4LinearKernel, NvFp4LinearLayerConfig
+
+
+class CutlassNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via the vLLM CUTLASS kernel."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if cutlass_fp4_supported():
+            return True, None
+        return False, "CUTLASS FP4 kernels not available"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        layer.weight_scale = torch.nn.Parameter(
+            swizzle_blockscale(layer.weight_scale.data), requires_grad=False
+        )
+        padded_weight, weights_padding_cols = pad_nvfp4_weight_for_cutlass(
+            layer.weight.data
+        )
+        layer.weight = torch.nn.Parameter(padded_weight, requires_grad=False)
+        layer.weights_padding_cols = weights_padding_cols
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+        weights_padding_bytes = getattr(layer, "weights_padding_cols", 0)
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="cutlass",
+            padded_n=x.shape[-1] + weights_padding_bytes * 2,
+        )
+
+        out = cutlass_scaled_fp4_mm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            layer.alpha,
+            output_dtype,
+        )
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
diff --git a/vllm/model_executor/kernels/linear/nvfp4/emulation.py b/vllm/model_executor/kernels/linear/nvfp4/emulation.py
new file mode 100644
index 000000000000..2a55b3177676
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/emulation.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
+    kE2M1ToFloat_handle,
+    run_nvfp4_emulations,
+)
+
+from .base import NvFp4LinearKernel, NvFp4LinearLayerConfig
+
+
+class EmulationNvFp4LinearKernel(NvFp4LinearKernel):
+    """Software emulation fallback for NVFP4 (dequant → BF16 matmul)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        # Always available as a last-resort fallback.
+        return True, None
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Move the E2M1 lookup table to the device now, because
+        # `.to(device)` is not allowed during CUDA graph capture.
+        kE2M1ToFloat_handle.val = kE2M1ToFloat_handle.val.to(layer.weight.device)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        out = run_nvfp4_emulations(
+            x=x,
+            input_global_scale=layer.input_global_scale_inv,
+            weight=layer.weight,
+            weight_scale_swizzled=layer.weight_scale,
+            weight_global_scale=layer.weight_global_scale,
+            swizzle=False,
+        )
+        if bias is not None:
+            out = out + bias
+        return out
diff --git a/vllm/model_executor/kernels/linear/nvfp4/fbgemm.py b/vllm/model_executor/kernels/linear/nvfp4/fbgemm.py
new file mode 100644
index 000000000000..fa30a75c47de
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/fbgemm.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm._custom_ops import scaled_fp4_quant
+from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
+    slice_nvfp4_output,
+    swizzle_blockscale,
+)
+from vllm.utils.import_utils import has_fbgemm_gpu
+
+from .base import NvFp4LinearKernel, NvFp4LinearLayerConfig
+
+
+class FbgemmNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via FBGEMM."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if has_fbgemm_gpu():
+            return True, None
+        return False, "fbgemm_gpu required"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        swizzled = swizzle_blockscale(layer.weight_scale.data)
+        layer.weight_scale = torch.nn.Parameter(
+            swizzled.view(-1).view(torch.uint8), requires_grad=False
+        )
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        import fbgemm_gpu  # noqa: F401 - registers torch.ops.fbgemm.*
+
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="fbgemm",
+        )
+
+        out = torch.ops.fbgemm.f4f4bf16(
+            x_fp4,
+            layer.weight,
+            x_blockscale.view(-1).view(torch.uint8),
+            layer.weight_scale,
+            layer.alpha,
+            use_mx=False,
+        ).to(output_dtype)
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
diff --git a/vllm/model_executor/kernels/linear/nvfp4/flashinfer.py b/vllm/model_executor/kernels/linear/nvfp4/flashinfer.py
new file mode 100644
index 000000000000..bcd47fda96ec
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/flashinfer.py
@@ -0,0 +1,285 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm._custom_ops import scaled_fp4_quant
+from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
+    pad_nvfp4_activation_for_cutlass,
+    pad_nvfp4_weight_for_cutlass,
+    slice_nvfp4_output,
+    swizzle_blockscale,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import (
+    flashinfer_scaled_fp4_mm,
+    has_flashinfer,
+    has_flashinfer_b12x_gemm,
+)
+
+from .base import NvFp4LinearKernel, NvFp4LinearLayerConfig
+
+
+class FlashInferCutlassNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via FlashInfer's CUTLASS wrapper."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
+            cutlass_fp4_supported,
+        )
+
+        if (
+            cutlass_fp4_supported()
+            and current_platform.has_device_capability(100)
+            and has_flashinfer()
+        ):
+            return True, None
+        return False, "FlashInfer + >=sm_100 required"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        layer.weight_scale = torch.nn.Parameter(
+            swizzle_blockscale(layer.weight_scale.data), requires_grad=False
+        )
+        padded_weight, weights_padding_cols = pad_nvfp4_weight_for_cutlass(
+            layer.weight.data
+        )
+        layer.weight = torch.nn.Parameter(padded_weight, requires_grad=False)
+        layer.weights_padding_cols = weights_padding_cols
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+        weights_padding_bytes = getattr(layer, "weights_padding_cols", 0)
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="flashinfer-cutlass",
+            padded_n=x.shape[-1] + weights_padding_bytes * 2,
+        )
+
+        out = flashinfer_scaled_fp4_mm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            layer.alpha,
+            output_dtype,
+            backend="cutlass",
+        )
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
+
+
+class FlashInferTrtllmNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via FlashInfer's TensorRT-LLM wrapper."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if has_flashinfer():
+            return True, None
+        return False, "FlashInfer required"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        from flashinfer import shuffle_matrix_a, shuffle_matrix_sf_a
+
+        weight = layer.weight.data
+        weight_scale = layer.weight_scale.data
+        epilogue_tile_m = 128
+
+        layer.weight = torch.nn.Parameter(
+            shuffle_matrix_a(weight.view(torch.uint8), epilogue_tile_m),
+            requires_grad=False,
+        )
+        layer.weight_scale = torch.nn.Parameter(
+            shuffle_matrix_sf_a(weight_scale.view(torch.uint8), epilogue_tile_m)
+            .reshape(weight_scale.shape)
+            .view(torch.float8_e4m3fn),
+            requires_grad=False,
+        )
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="flashinfer-trtllm",
+        )
+
+        out = flashinfer_scaled_fp4_mm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            layer.alpha,
+            output_dtype,
+            backend="trtllm",
+        )
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
+
+
+class FlashInferCudnnNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via FlashInfer's cuDNN wrapper."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if has_flashinfer():
+            return True, None
+        return False, "FlashInfer required"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # cuDNN uses the same swizzled + padded layout as CUTLASS
+        layer.weight_scale = torch.nn.Parameter(
+            swizzle_blockscale(layer.weight_scale.data), requires_grad=False
+        )
+        padded_weight, weights_padding_cols = pad_nvfp4_weight_for_cutlass(
+            layer.weight.data
+        )
+        layer.weight = torch.nn.Parameter(padded_weight, requires_grad=False)
+        layer.weights_padding_cols = weights_padding_cols
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+        weights_padding_bytes = getattr(layer, "weights_padding_cols", 0)
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="flashinfer-cudnn",
+            padded_n=x.shape[-1] + weights_padding_bytes * 2,
+        )
+
+        out = flashinfer_scaled_fp4_mm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            layer.alpha,
+            output_dtype,
+            backend="cudnn",
+        )
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
+
+
+class FlashInferB12xNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 GEMM via FlashInfer's b12x CuTe DSL warp-level MMA kernel (SM120+)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if current_platform.has_device_capability(120) and has_flashinfer_b12x_gemm():
+            return True, None
+        return (
+            False,
+            "FlashInfer b12x requires SM120+ and FlashInfer "
+            "with Sm120BlockScaledDenseGemmKernel",
+        )
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        layer.weight_scale = torch.nn.Parameter(
+            swizzle_blockscale(layer.weight_scale.data), requires_grad=False
+        )
+        padded_weight, weights_padding_cols = pad_nvfp4_weight_for_cutlass(
+            layer.weight.data
+        )
+        layer.weight = torch.nn.Parameter(padded_weight, requires_grad=False)
+        layer.weights_padding_cols = weights_padding_cols
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        output_size = layer.output_size_per_partition
+        output_dtype = x.dtype
+        output_shape = [*x.shape[:-1], output_size]
+
+        x_fp4, x_blockscale = scaled_fp4_quant(
+            x,
+            layer.input_global_scale_inv,
+            is_sf_swizzled_layout=True,
+            backend="b12x",
+        )
+
+        x_fp4 = pad_nvfp4_activation_for_cutlass(
+            x_fp4, getattr(layer, "weights_padding_cols", 0)
+        )
+
+        out = flashinfer_scaled_fp4_mm(
+            x_fp4,
+            layer.weight,
+            x_blockscale,
+            layer.weight_scale,
+            layer.alpha,
+            output_dtype,
+            backend="b12x",
+        )
+
+        out = slice_nvfp4_output(out, output_size)
+
+        if bias is not None:
+            out = out + bias
+        return out.view(*output_shape)
diff --git a/vllm/model_executor/kernels/linear/nvfp4/marlin.py b/vllm/model_executor/kernels/linear/nvfp4/marlin.py
new file mode 100644
index 000000000000..a05d6823c881
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/nvfp4/marlin.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.logger import init_logger
+from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+    apply_fp4_marlin_linear,
+    is_fp4_marlin_supported,
+    prepare_fp4_layer_for_marlin,
+)
+
+from .base import NvFp4LinearKernel, NvFp4LinearLayerConfig
+
+logger = init_logger(__name__)
+
+
+class MarlinNvFp4LinearKernel(NvFp4LinearKernel):
+    """NVFP4 weight-only GEMM via Marlin (W4A16)."""
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if is_fp4_marlin_supported():
+            return True, None
+        return False, "Marlin FP4 not available"
+
+    @classmethod
+    def can_implement(cls, config: NvFp4LinearLayerConfig) -> tuple[bool, str | None]:
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        logger.warning_once(
+            "Your GPU does not have native support for FP4 computation but "
+            "FP4 quantization is being used. Weight-only FP4 compression "
+            "will be used leveraging the Marlin kernel. This may degrade "
+            "performance for compute-heavy workloads."
+        )
+        prepare_fp4_layer_for_marlin(layer)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return apply_fp4_marlin_linear(
+            input=x,
+            weight=layer.weight,
+            weight_scale=layer.weight_scale,
+            weight_global_scale=layer.weight_global_scale,
+            workspace=layer.workspace,
+            size_n=layer.output_size_per_partition,
+            size_k=layer.input_size_per_partition,
+            bias=bias,
+        )
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/BlockScaledMMLinearKernel.py b/vllm/model_executor/kernels/linear/scaled_mm/BlockScaledMMLinearKernel.py
new file mode 100644
index 000000000000..d738796ce6bf
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/scaled_mm/BlockScaledMMLinearKernel.py
@@ -0,0 +1,209 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import ClassVar
+
+import torch
+from typing_extensions import Self
+
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    process_fp8_weight_block_strategy,
+)
+from vllm.model_executor.utils import replace_parameter
+
+from ..base import (
+    FP8Params,
+    MMLinearKernel,
+)
+from .ScaledMMLinearKernel import FP8ScaledMMLinearLayerConfig
+
+
+@dataclass
+class FP8BlockParams(FP8Params):
+    weight_scale_inv: torch.Tensor | None
+    weight_scale: torch.Tensor | None
+
+    WEIGHT_SCALE_INV: ClassVar[str] = "weight_scale_inv"
+
+    @classmethod
+    def from_layer(cls, layer: torch.nn.Module) -> Self:
+        return cls(
+            weight=getattr(layer, cls.WEIGHT),
+            weight_scale_inv=getattr(layer, cls.WEIGHT_SCALE_INV, None),
+            weight_scale=getattr(layer, cls.WEIGHT_SCALE, None),
+            input_scale=getattr(layer, cls.INPUT_SCALE, None),
+            input_scale_ub=getattr(layer, cls.INPUT_SCALE_UB, None),
+        )
+
+
+class Fp8BlockScaledMMLinearKernel(
+    MMLinearKernel[FP8ScaledMMLinearLayerConfig, FP8BlockParams], ABC
+):
+    # Set to False in subclasses that accept BF16 input directly (e.g. FlashInfer)
+    # and therefore do not need the input quantization step in apply_weights.
+    apply_input_quant: ClassVar[bool] = True
+
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig) -> None:
+        super().__init__(config)
+        act_scale_descriptor = config.activation_quant_key.scale
+        self.weight_group_shape = config.weight_quant_key.scale.group_shape
+        self.quant_fp8 = QuantFP8(
+            static=act_scale_descriptor.static,
+            group_shape=act_scale_descriptor.group_shape,
+            num_token_padding=self.get_output_padding(),
+            use_ue8m0=False,
+        )
+        self.use_triton = False
+
+    @classmethod
+    def can_implement(cls, config: FP8ScaledMMLinearLayerConfig):
+        act_quant_key = config.activation_quant_key
+        if act_quant_key.scale.static:
+            return (
+                False,
+                "Only dynamic per token group activation quantization is supported.",
+            )
+
+        return True, None
+
+    def _get_layer_params(self, layer: torch.nn.Module, **kwargs) -> FP8BlockParams:
+        return FP8BlockParams.from_layer(layer)
+
+    def process_weights_after_loading(self, layer: torch.nn.Module):
+        params = self._get_layer_params(layer)
+        # Fp8LinearMethod registered weight scale
+        # buffer as weight_scale_inv unlike compressed tensors.
+        weight_scale = (
+            params.weight_scale
+            if params.weight_scale_inv is None
+            else params.weight_scale_inv
+        )
+        scale_attr_name = (
+            params.WEIGHT_SCALE
+            if params.weight_scale_inv is None
+            else params.WEIGHT_SCALE_INV
+        )
+        new_weight, new_weight_scale = process_fp8_weight_block_strategy(
+            params.weight,
+            weight_scale,
+        )
+
+        replace_parameter(layer, params.WEIGHT, new_weight.data)
+        replace_parameter(layer, scale_attr_name, new_weight_scale.data)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        out_dtype = self.config.out_dtype
+        params = self._get_layer_params(layer)
+        weight = params.weight
+        weight_scale = (
+            params.weight_scale
+            if params.weight_scale_inv is None
+            else params.weight_scale_inv
+        )
+        input_scale = params.input_scale
+        scale_up = params.input_scale_ub
+
+        # View input as 2D matrix for fp8 methods
+        input_2d = x.view(-1, x.shape[-1])
+        output_shape = [*x.shape[:-1], weight.shape[0]]
+
+        if self.apply_input_quant:
+            q_input, input_scale = self.quant_fp8(
+                input_2d, input_scale, scale_up, use_triton=self.use_triton
+            )
+        else:
+            q_input = input_2d
+            # Provide a concrete placeholder so apply_block_scaled_mm args are
+            # always Tensors. Subclasses with apply_input_quant=False must not
+            # use As in apply_block_scaled_mm.
+            input_scale = (
+                input_scale if input_scale is not None else input_2d.new_ones(1)
+            )
+
+        output = self.apply_block_scaled_mm(
+            A=q_input,
+            B=weight,
+            As=input_scale,
+            Bs=weight_scale,
+        )
+
+        if bias is not None:
+            output = output + bias
+        return output.to(dtype=out_dtype).view(*output_shape)
+
+    @abstractmethod
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        raise NotImplementedError
+
+
+class Fp8BlockScaledDynamicMMLinearKernel(Fp8BlockScaledMMLinearKernel, ABC):
+    """Dynamic FP8 block-scaled kernel that dispatches at runtime.
+
+    Extends Fp8BlockScaledMMLinearKernel to inherit apply_weights and overrides
+    apply_block_scaled_mm to dispatch between two sub-kernels using torch.cond.
+
+    Subclasses must define:
+        base_type:     The primary kernel class.
+        fallback_type: The fallback kernel class.
+    """
+
+    base_type: ClassVar[type[Fp8BlockScaledMMLinearKernel]]
+    fallback_type: ClassVar[type[Fp8BlockScaledMMLinearKernel]]
+
+    def __init__(self, config: "FP8ScaledMMLinearLayerConfig") -> None:
+        super().__init__(config)
+        self.base = self.base_type(config)
+        self.fallback = self.fallback_type(config)
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        is_base_supported, reason_1 = cls.base_type.is_supported(compute_capability)
+        is_fallback_supported, reason_2 = cls.fallback_type.is_supported(
+            compute_capability
+        )
+        if is_base_supported and is_fallback_supported:
+            return True, None
+        if not is_base_supported and not is_fallback_supported:
+            return (
+                False,
+                f"base is not supported due to {reason_1}; "
+                f"fallback is not supported due to {reason_2}",
+            )
+        if not is_base_supported:
+            return False, f"base is not supported due to {reason_1}"
+        return False, f"fallback is not supported due to {reason_2}"
+
+    @classmethod
+    def can_implement(
+        cls, config: "FP8ScaledMMLinearLayerConfig"
+    ) -> tuple[bool, str | None]:
+        can_implement_base, reason_1 = cls.base_type.can_implement(config)
+        can_implement_fallback, reason_2 = cls.fallback_type.can_implement(config)
+        if can_implement_base and can_implement_fallback:
+            return True, None
+        if not can_implement_base and not can_implement_fallback:
+            return (
+                False,
+                f"base cannot implement due to {reason_1}; "
+                f"fallback cannot implement due to {reason_2}",
+            )
+        if not can_implement_base:
+            return False, f"base cannot implement due to {reason_1}"
+        return False, f"fallback cannot implement due to {reason_2}"
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/ScaledMMLinearKernel.py b/vllm/model_executor/kernels/linear/scaled_mm/ScaledMMLinearKernel.py
index cdb69b06f5cd..b9f6f0c8f873 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/ScaledMMLinearKernel.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/ScaledMMLinearKernel.py
@@ -14,14 +14,11 @@
 )
 from vllm.platforms import current_platform
 
-
-@dataclass
-class ScaledMMLinearLayerConfig:
-    pass
+from ..base import MMLinearLayerConfig
 
 
 @dataclass
-class Int8ScaledMMLinearLayerConfig(ScaledMMLinearLayerConfig):
+class Int8ScaledMMLinearLayerConfig(MMLinearLayerConfig):
     # TODO: Change to QuantKey like FP8ScaledMMLinearLayerConfig
     is_static_input_scheme: bool
     is_channelwise: bool
@@ -29,10 +26,12 @@ class Int8ScaledMMLinearLayerConfig(ScaledMMLinearLayerConfig):
 
 
 @dataclass
-class FP8ScaledMMLinearLayerConfig(ScaledMMLinearLayerConfig):
+class FP8ScaledMMLinearLayerConfig(MMLinearLayerConfig):
     weight_quant_key: QuantKey
     activation_quant_key: QuantKey
-    out_dtype: torch.dtype | None
+    weight_shape: tuple[int, int]
+    input_dtype: torch.dtype
+    out_dtype: torch.dtype
 
 
 _FP8ParamsT = tuple[
@@ -50,7 +49,7 @@ class FP8ScaledMMLinearLayerConfig(ScaledMMLinearLayerConfig):
 ]
 
 _ParamsT = TypeVar("_ParamsT", _Int8ParamsT, _FP8ParamsT)
-_ConfigT = TypeVar("_ConfigT", bound=ScaledMMLinearLayerConfig)
+_ConfigT = TypeVar("_ConfigT", bound=MMLinearLayerConfig)
 
 
 class ScaledMMLinearKernel(Generic[_ConfigT, _ParamsT], ABC):
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/__init__.py b/vllm/model_executor/kernels/linear/scaled_mm/__init__.py
index 2323a02ba593..f8f12f7b0cba 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/__init__.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/__init__.py
@@ -4,7 +4,11 @@
 from vllm.model_executor.kernels.linear.scaled_mm.aiter import (
     AiterInt8ScaledMMLinearKernel,
 )
+from vllm.model_executor.kernels.linear.scaled_mm.BlockScaledMMLinearKernel import (
+    Fp8BlockScaledMMLinearKernel,
+)
 from vllm.model_executor.kernels.linear.scaled_mm.cpu import (
+    CPUFp8BlockScaledMMKernel,
     CPUInt8ScaledMMLinearKernel,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.cutlass import (
@@ -31,7 +35,6 @@
     Int8ScaledMMLinearKernel,
     Int8ScaledMMLinearLayerConfig,
     ScaledMMLinearKernel,
-    ScaledMMLinearLayerConfig,
 )
 from vllm.model_executor.kernels.linear.scaled_mm.triton import (
     TritonInt8ScaledMMLinearKernel,
@@ -55,4 +58,6 @@
     "RowWiseTorchFP8ScaledMMLinearKernel",
     "ROCmFP8ScaledMMLinearKernel",
     "TritonInt8ScaledMMLinearKernel",
+    "Fp8BlockScaledMMLinearKernel",
+    "CPUFp8BlockScaledMMKernel",
 ]
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/aiter.py b/vllm/model_executor/kernels/linear/scaled_mm/aiter.py
index 1945a1e4354d..5ded5ca798ad 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/aiter.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/aiter.py
@@ -5,11 +5,27 @@
 import torch
 
 from vllm import _custom_ops as ops
-from vllm._aiter_ops import rocm_aiter_ops
+from vllm._aiter_ops import (
+    rocm_aiter_ops,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+)
+from vllm.model_executor.utils import replace_parameter
 from vllm.platforms import current_platform
 
+from .BlockScaledMMLinearKernel import (
+    Fp8BlockScaledMMLinearKernel,
+)
 from .cutlass import CutlassInt8ScaledMMLinearKernel
-from .ScaledMMLinearKernel import Int8ScaledMMLinearLayerConfig
+from .ScaledMMLinearKernel import (
+    FP8ScaledMMLinearKernel,
+    FP8ScaledMMLinearLayerConfig,
+    Int8ScaledMMLinearLayerConfig,
+)
+
+logger = init_logger(__name__)
 
 
 class AiterInt8ScaledMMLinearKernel(CutlassInt8ScaledMMLinearKernel):
@@ -106,4 +122,217 @@ def apply_weights(
         # a to be [M, K]
         # b to be [N, K]
         # CutlassInt8ScaledMMLinearKernel prepare weight `w_q` in [K, N] format
-        return rocm_aiter_ops.gemm_a8w8(x_q, w_q.t(), x_s, w_s, bias, out_dtype)
+        return rocm_aiter_ops.w8a8_gemm(x_q, w_q.t(), x_s, w_s, bias, out_dtype)
+
+
+class AiterPreshuffledPerTokenFp8ScaledMMLinearKernel(FP8ScaledMMLinearKernel):
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if not current_platform.is_rocm():
+            return False, "requires ROCm."
+        if not rocm_aiter_ops.is_linear_fp8_enabled():
+            return (
+                False,
+                "requires setting `VLLM_ROCM_USE_AITER=1` "
+                "and `VLLM_ROCM_USE_AITER_LINEAR=1`. "
+                "`VLLM_ROCM_USE_AITER_LINEAR` default is True.",
+            )
+        try:
+            import aiter  # noqa: F401
+        except Exception:
+            return False, "requires aiter library to be installed."
+        return True, None
+
+    @classmethod
+    def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
+        is_ptpc = (
+            c.activation_quant_key.scale.group_shape.is_per_token()
+            and c.weight_quant_key.scale.group_shape.is_per_channel()
+        )
+        if c.weight_shape is None:
+            return False, "weight_shape is required for Aiter kernels"
+        N, K = c.weight_shape
+        fp8_dtype = current_platform.fp8_dtype()
+
+        if c.out_dtype is not torch.bfloat16:
+            return False, "requires bfloat16 output dtype."
+
+        if not is_ptpc:
+            return (
+                False,
+                "requires per token activation scales and per channel weight scales.",
+            )
+
+        if not (N % 16 == 0 and K % 16 == 0):
+            return (
+                False,
+                f"requires N and K dimensions divisible by 16, received "
+                f"N={N} and K={K}.",
+            )
+
+        # Aiter's shuffled per-token Gemm performs better than torch only when its
+        # tuned.
+        if not rocm_aiter_ops.is_shuffled_per_token_w8a8_gemm_tuned(N, K, fp8_dtype):
+            return (
+                False,
+                f"requires a tuned configuration for N: {N} and K: {K} "
+                f"and fp8 dtype {fp8_dtype}.",
+            )
+
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        w_name, *_ = self.layer_param_names
+        w, *_ = self._get_layer_params(layer)
+
+        replace_parameter(
+            layer,
+            w_name,
+            torch.nn.Parameter(
+                rocm_aiter_ops.shuffle_weight(w.t().contiguous()).data,
+                requires_grad=False,
+            ),
+        )
+
+    def apply_scaled_mm(
+        self,
+        *,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        out_dtype: torch.dtype,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+        bias: torch.Tensor | None,
+        output_shape: list,
+    ) -> torch.Tensor:
+        return rocm_aiter_ops.preshuffled_per_token_w8a8_gemm(
+            A, B, As, Bs, bias, out_dtype
+        )
+
+
+class AiterPerTokenFp8ScaledMMLinearKernel(FP8ScaledMMLinearKernel):
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        return AiterPreshuffledPerTokenFp8ScaledMMLinearKernel.is_supported(
+            compute_capability
+        )
+
+    @classmethod
+    def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
+        is_ptpc = (
+            c.activation_quant_key.scale.group_shape.is_per_token()
+            and c.weight_quant_key.scale.group_shape.is_per_channel()
+        )
+        if c.weight_shape is None:
+            return False, "weight_shape is required for Aiter kernels"
+        N, K = c.weight_shape
+        fp8_dtype = current_platform.fp8_dtype()
+
+        if not is_ptpc:
+            return (
+                False,
+                "requires per token activation scales and per channel weight scales.",
+            )
+
+        # Aiter's per-token Gemm performs better than torch oonly when its
+        # tuned.
+        if not rocm_aiter_ops.is_per_token_w8a8_gemm_tuned(N, K, fp8_dtype):
+            return (
+                False,
+                f"requires a tuned configuration for N: {N} and K: {K} "
+                f"and fp8 dtype {fp8_dtype}.",
+            )
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        w_name, *_ = self.layer_param_names
+        w, *_ = self._get_layer_params(layer)
+
+        replace_parameter(
+            layer,
+            w_name,
+            torch.nn.Parameter(w.t(), requires_grad=False),
+        )
+
+    def apply_scaled_mm(
+        self,
+        *,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        out_dtype: torch.dtype,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+        bias: torch.Tensor | None,
+        output_shape: list,
+    ) -> torch.Tensor:
+        return rocm_aiter_ops.w8a8_gemm(A, B, As, Bs, bias, out_dtype)
+
+
+class AiterFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig):
+        super().__init__(config)
+        n, k = config.weight_shape
+
+        self.use_triton = (
+            not current_platform.is_fp8_fnuz()
+            and rocm_aiter_ops.is_triton_gemm_w8a8_tuned(n, k)
+        )
+
+    @classmethod
+    def is_supported(cls, compute_capability=None):
+        return (
+            rocm_aiter_ops.is_linear_enabled(),
+            "Only supported on ROCm platform \
+                with aiter package installed.",
+        )
+
+    @classmethod
+    def can_implement(cls, config: FP8ScaledMMLinearLayerConfig):
+        can_implement_base, reason = super().can_implement(config)
+        if not can_implement_base:
+            return can_implement_base, reason
+
+        act_quant_desc = config.activation_quant_key.scale
+        if act_quant_desc.group_shape != GroupShape(1, 128):
+            return (
+                False,
+                "Supports only dynamic per token group activation "
+                "quantization with group_shape=(1,128).",
+            )
+        return True, None
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        if As.dtype != Bs.dtype:
+            from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+                _upcast_e8m0_to_fp32,
+            )
+
+            if As.dtype == torch.float8_e8m0fnu:
+                As = _upcast_e8m0_to_fp32(As).contiguous()
+            else:
+                As = As.to(torch.float32)
+
+            if Bs.dtype == torch.float8_e8m0fnu:
+                Bs = _upcast_e8m0_to_fp32(Bs).contiguous()
+            else:
+                Bs = Bs.to(torch.float32)
+
+        out_dtype = self.config.out_dtype
+        if self.use_triton:
+            gemm_a8w8_blockscale_op = rocm_aiter_ops.triton_gemm_a8w8_blockscale
+        else:
+            gemm_a8w8_blockscale_op = rocm_aiter_ops.gemm_a8w8_blockscale
+
+        return gemm_a8w8_blockscale_op(
+            A, B, As, Bs, list(self.weight_group_shape), output_dtype=out_dtype
+        )
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/cpu.py b/vllm/model_executor/kernels/linear/scaled_mm/cpu.py
index 3d67a73af433..083cb473aaca 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/cpu.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/cpu.py
@@ -14,6 +14,10 @@
 from vllm.platforms import current_platform
 from vllm.platforms.interface import CpuArchEnum
 
+from .BlockScaledMMLinearKernel import (
+    Fp8BlockScaledMMLinearKernel,
+    FP8ScaledMMLinearLayerConfig,
+)
 from .ScaledMMLinearKernel import (
     Int8ScaledMMLinearKernel,
     Int8ScaledMMLinearLayerConfig,
@@ -215,3 +219,109 @@ def _apply_weights_sgl(
             x.dtype,
             True,
         )
+
+
+class CPUFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    """FP8 W8A16 block-quantized GEMM via AMX BRGEMM on CPU."""
+
+    # Input stays BF16 — no FP8 activation quantization.
+    apply_input_quant = False
+
+    @classmethod
+    def is_supported(
+        cls, compute_capability: int | None = None
+    ) -> tuple[bool, str | None]:
+        if not current_platform.is_cpu():
+            return False, "requires CPU platform."
+        if not torch.cpu._is_amx_tile_supported():
+            return False, "requires AMX tile support (Sapphire Rapids or newer)."
+        if not ops._supports_cpu_fp8_w8a16:
+            return False, "fp8_scaled_mm_cpu op not available."
+        return True, None
+
+    @classmethod
+    def can_implement(
+        cls, config: FP8ScaledMMLinearLayerConfig
+    ) -> tuple[bool, str | None]:
+        # Validate weight block shape
+        weight_gs = config.weight_quant_key.scale.group_shape
+        if weight_gs.col <= 0 or weight_gs.col != 128:
+            return False, (
+                "CPU FP8 kernel requires K-dimension block size of 128, "
+                f"got {weight_gs.col}."
+            )
+        if weight_gs.row <= 0 or weight_gs.row % 32 != 0:
+            return False, (
+                "CPU FP8 kernel requires N-dimension block size to be "
+                f"a positive multiple of 32, got {weight_gs.row}."
+            )
+        if config.out_dtype not in (torch.bfloat16, torch.float32):
+            return False, "Only bfloat16/float32 output dtype supported."
+        return True, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Skip the base class process (FP8 padding / fnuz normalization)
+        # which is GPU-oriented.  Instead, VNNI-prepack weights for AMX.
+        params = self._get_layer_params(layer)
+        packed_weight = torch.ops._C.convert_weight_packed(params.weight)
+        replace_parameter(
+            layer,
+            params.WEIGHT,
+            torch.nn.Parameter(packed_weight, requires_grad=False),
+        )
+
+        # Re-wrap scale as a plain Parameter so the kernel can read it
+        # without weight-loader metadata interfering.
+        scale_attr = (
+            params.WEIGHT_SCALE_INV
+            if params.weight_scale_inv is not None
+            else params.WEIGHT_SCALE
+        )
+        weight_scale = (
+            params.weight_scale_inv
+            if params.weight_scale_inv is not None
+            else params.weight_scale
+        )
+        assert weight_scale is not None
+        replace_parameter(
+            layer,
+            scale_attr,
+            torch.nn.Parameter(weight_scale.data, requires_grad=False),
+        )
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        params = self._get_layer_params(layer)
+        weight_scale = (
+            params.weight_scale_inv
+            if params.weight_scale_inv is not None
+            else params.weight_scale
+        )
+
+        x_2d = x.reshape(-1, x.shape[-1]) if x.dim() > 2 else x
+        out = torch.ops._C.fp8_scaled_mm_cpu(
+            x_2d,
+            params.weight,
+            weight_scale,
+            list(self.weight_group_shape),
+            bias,
+            x.dtype,
+            True,  # is_vnni (weight already prepacked)
+        )
+        return out.reshape(x.shape[:-1] + (out.size(-1),)) if x.dim() > 2 else out
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        raise NotImplementedError(
+            "CPUFp8BlockScaledMMKernel overrides apply_weights directly."
+        )
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/cutlass.py b/vllm/model_executor/kernels/linear/scaled_mm/cutlass.py
index bcaf57bcbb26..9e65edb851e3 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/cutlass.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/cutlass.py
@@ -2,15 +2,25 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 
+from collections.abc import Sequence
+
 import torch
 
 from vllm import _custom_ops as ops
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils import replace_parameter
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+)
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
+    CUTLASS_BLOCK_FP8_SUPPORTED,
     convert_to_channelwise,
 )
+from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import direct_register_custom_op
 
+from .BlockScaledMMLinearKernel import Fp8BlockScaledMMLinearKernel
 from .ScaledMMLinearKernel import (
     FP8ScaledMMLinearKernel,
     FP8ScaledMMLinearLayerConfig,
@@ -143,6 +153,12 @@ def apply_weights(
 
 
 class CutlassFP8ScaledMMLinearKernel(FP8ScaledMMLinearKernel):
+    def __init__(
+        self, c: FP8ScaledMMLinearLayerConfig, layer_param_names: Sequence[str]
+    ) -> None:
+        self.logical_output_size: int | None = None
+        super().__init__(c, layer_param_names)
+
     @classmethod
     def is_supported(
         cls, compute_capability: int | None = None
@@ -155,6 +171,67 @@ def is_supported(
     def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
         return True, None
 
+    @staticmethod
+    def _pad_to_alignment(
+        x: torch.Tensor, dim: int, alignment: int, value: float = 0.0
+    ) -> torch.Tensor:
+        """Pad tensor ``x`` along ``dim`` to the next multiple of
+        ``alignment``."""
+        remainder = x.shape[dim] % alignment
+        if remainder == 0:
+            return x
+        pad_size = alignment - remainder
+        pad_spec = [0] * (2 * x.dim())
+        pad_spec[-(2 * dim + 1)] = pad_size
+        return torch.nn.functional.pad(x, pad_spec, value=value)
+
+    @staticmethod
+    def padded_weight_loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
+        if loaded_weight.shape != param.shape:
+            slices = tuple(slice(0, s) for s in loaded_weight.shape)
+            param.data[slices].copy_(loaded_weight)
+        else:
+            param.data.copy_(loaded_weight.view(param.shape))
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        weight_name, weight_scale_name, _, _ = self.layer_param_names
+        weight = getattr(layer, weight_name)
+
+        # keep the logical output width so runtime can slice away static padding.
+        self.logical_output_size = weight.shape[1]
+
+        pad_k = (16 - weight.shape[0] % 16) % 16
+        pad_n = (16 - weight.shape[1] % 16) % 16
+        if pad_k == 0 and pad_n == 0:
+            return
+
+        # B is column-major [K, N]
+        padded_weight = torch.nn.functional.pad(
+            weight.t().contiguous(),
+            (0, pad_k, 0, pad_n),
+        ).t()
+        replace_parameter(layer, weight_name, padded_weight.data)
+        set_weight_attrs(
+            getattr(layer, weight_name),
+            {
+                "weight_loader": self.padded_weight_loader,
+            },
+        )
+
+        weight_scale = getattr(layer, weight_scale_name, None)
+        if weight_scale is not None and pad_n > 0 and weight_scale.numel() > 1:
+            flat_scale = weight_scale.reshape(-1)
+            padded_scale = self._pad_to_alignment(
+                flat_scale, dim=0, alignment=16, value=1.0
+            ).view(-1, *weight_scale.shape[1:])
+            replace_parameter(layer, weight_scale_name, padded_scale.data)
+            set_weight_attrs(
+                getattr(layer, weight_name),
+                {
+                    "weight_loader": self.padded_weight_loader,
+                },
+            )
+
     def apply_scaled_mm(
         self,
         *,
@@ -166,8 +243,162 @@ def apply_scaled_mm(
         bias: torch.Tensor | None,
         output_shape: list,
     ) -> torch.Tensor:
-        # Fused GEMM_DQ
+        padded_k, padded_n = B.shape
+        output_size = self.logical_output_size
+        assert output_size is not None
+        pad_k = padded_k - A.shape[1]
+        pad_n = padded_n - output_size
+
+        if pad_k > 0:
+            A = self._pad_to_alignment(A, dim=1, alignment=16)
+        if pad_n > 0 and bias is not None:
+            bias = self._pad_to_alignment(bias, dim=0, alignment=16)
+
         output = ops.cutlass_scaled_mm(
             A, B, out_dtype=out_dtype, scale_a=As, scale_b=Bs, bias=bias
         )
-        return output.view(*output_shape)
+
+        if pad_n > 0:
+            output = output[..., :output_size].contiguous()
+
+        return output.view(*output_shape[:-1], output_size)
+
+
+class CutlassFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig) -> None:
+        super().__init__(config)
+        act_scale_descriptor = config.activation_quant_key.scale
+        self.weight_group_shape = config.weight_quant_key.scale.group_shape
+        self.quant_fp8 = QuantFP8(
+            static=act_scale_descriptor.static,
+            group_shape=act_scale_descriptor.group_shape,
+            num_token_padding=self.get_output_padding(),
+            use_ue8m0=False,
+            column_major_scales=True,
+        )
+        self.is_hopper = current_platform.is_device_capability(90)
+
+    @classmethod
+    def is_supported(cls, compute_capability=None):
+        if not CUTLASS_BLOCK_FP8_SUPPORTED:
+            return (
+                False,
+                "The device compute capability of"
+                f"{compute_capability} is not supported.",
+            )
+        return True, None
+
+    @classmethod
+    def can_implement(cls, config: FP8ScaledMMLinearLayerConfig):
+        can_implement_base, reason = super().can_implement(config)
+        if not can_implement_base:
+            return can_implement_base, reason
+
+        act_quant_desc = config.activation_quant_key.scale
+        if act_quant_desc.group_shape != GroupShape(1, 128):
+            return (
+                False,
+                "Supports only dynamic per token group activation "
+                "quantization with group_shape=(1,128).",
+            )
+        return True, None
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        out_dtype = self.config.out_dtype
+        if self.is_hopper:
+            return torch.ops.vllm.padded_cutlass(
+                A,
+                B,
+                As,
+                Bs,
+                list(self.weight_group_shape),
+                out_dtype,
+            )
+        else:
+            return ops.cutlass_scaled_mm(
+                A,
+                B.T,
+                out_dtype=out_dtype,
+                scale_a=As,
+                scale_b=Bs.T,
+            )
+
+
+def cutlass_scaled_mm(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    As: torch.Tensor,
+    Bs: torch.Tensor,
+    block_size: list[int],
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
+    return ops.cutlass_scaled_mm(
+        A,
+        B.T,
+        out_dtype=output_dtype,
+        scale_a=As,
+        scale_b=Bs.T,
+    )
+
+
+def _padded_cutlass(
+    qx: torch.Tensor,
+    weight: torch.Tensor,
+    x_scale: torch.Tensor,
+    weight_scale: torch.Tensor,
+    block_size: list[int],
+    output_dtype: torch.dtype,
+) -> torch.Tensor:
+    pad_multiple = 4
+    dim = qx.shape[0]
+    padded = (
+        dim if dim % pad_multiple == 0 else dim + pad_multiple - (dim % pad_multiple)
+    )
+
+    has_pad = padded > dim
+
+    if has_pad:
+        padded_shape = [padded, *qx.shape[1:]]
+        padded_qx = torch.zeros(padded_shape, device=qx.device, dtype=qx.dtype)
+        padded_qx[0 : qx.shape[0], ...].copy_(qx)
+
+        padded_x_scale_shape = [*x_scale.shape[1:], padded]
+        padded_x_scale = torch.ones(
+            padded_x_scale_shape, device=x_scale.device, dtype=x_scale.dtype
+        ).permute(-1, -2)
+        padded_x_scale[0 : x_scale.shape[0], ...].copy_(x_scale)
+
+        output = cutlass_scaled_mm(
+            padded_qx, weight, padded_x_scale, weight_scale, block_size, output_dtype
+        )
+        return output[0 : qx.shape[0], ...]
+    else:
+        return cutlass_scaled_mm(
+            qx, weight, x_scale, weight_scale, block_size, output_dtype
+        )
+
+
+def _padded_cutlass_fake(
+    qx: torch.Tensor,
+    weight: torch.Tensor,
+    x_scale: torch.Tensor,
+    weight_scale: torch.Tensor,
+    block_size: list[int],
+    output_dtype: torch.dtype,
+) -> torch.Tensor:
+    return torch.empty(
+        (qx.size(0), weight.size(0)), dtype=output_dtype, device=qx.device
+    )
+
+
+direct_register_custom_op(
+    "padded_cutlass",
+    _padded_cutlass,
+    fake_impl=_padded_cutlass_fake,
+)
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/deep_gemm.py b/vllm/model_executor/kernels/linear/scaled_mm/deep_gemm.py
new file mode 100644
index 000000000000..70122f7b4ac6
--- /dev/null
+++ b/vllm/model_executor/kernels/linear/scaled_mm/deep_gemm.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.envs as envs
+from vllm.config import get_current_vllm_config
+from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    deepgemm_post_process_fp8_weight_block,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+)
+from vllm.model_executor.utils import replace_parameter
+from vllm.platforms import current_platform
+from vllm.utils.deep_gemm import (
+    fp8_gemm_nt,
+    is_deep_gemm_e8m0_used,
+    is_deep_gemm_supported,
+    should_auto_disable_deep_gemm,
+    should_use_deepgemm_for_fp8_linear,
+)
+from vllm.utils.torch_utils import direct_register_custom_op
+
+from .BlockScaledMMLinearKernel import (
+    Fp8BlockScaledMMLinearKernel,
+    FP8ScaledMMLinearLayerConfig,
+)
+
+
+class DeepGemmFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig):
+        super().__init__(config)
+        self.use_deep_gemm_e8m0 = is_deep_gemm_e8m0_used()
+        act_scale_descriptor = config.activation_quant_key.scale
+        self.is_deep_gemm_supported = is_deep_gemm_supported()
+        self.quant_fp8 = QuantFP8(
+            static=False,
+            group_shape=act_scale_descriptor.group_shape,
+            use_ue8m0=self.use_deep_gemm_e8m0,
+            tma_aligned_scales=envs.VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES,
+            column_major_scales=True,
+        )
+
+    @classmethod
+    def is_supported(cls, compute_capability=None):
+        if not current_platform.is_cuda():
+            return False, "DeepGEMM is only supported on cuda platform"
+        if not is_deep_gemm_supported():
+            return False, "Currently, only Hopper and Blackwell GPUs are supported."
+        return True, None
+
+    @classmethod
+    def can_implement(cls, config):
+        can_implement_base, reason = super().can_implement(config)
+        if not can_implement_base:
+            return can_implement_base, reason
+        if config.out_dtype != torch.bfloat16:
+            return (False, "Supports only output dtype of bfloat16")
+
+        act_quant_desc = config.activation_quant_key.scale
+        if act_quant_desc.group_shape != GroupShape(1, 128):
+            return (
+                False,
+                "Supports only dynamic per token group activation "
+                "quantization with group_shape=(1,128).",
+            )
+        model_config = get_current_vllm_config().model_config
+
+        if model_config is None:
+            return False, "Model configuration is required."
+
+        model_type = getattr(model_config.hf_text_config, "model_type", None)
+        if should_auto_disable_deep_gemm(model_type):
+            return False, f"Should not use deepgemm for model {model_type}"
+
+        if not should_use_deepgemm_for_fp8_linear(
+            config.out_dtype, config.weight_shape
+        ):
+            return False, "The provided metadata is not supported."
+        return True, None
+
+    def process_weights_after_loading(self, layer):
+        super().process_weights_after_loading(layer)
+        params = self._get_layer_params(layer)
+        assert layer.weight_block_size is not None
+
+        if self.is_deep_gemm_supported:
+            weight_scale_invs = params.weight_scale_inv
+            scale_attr = (
+                params.WEIGHT_SCALE_INV
+                if weight_scale_invs is not None
+                else params.WEIGHT_SCALE
+            )
+            dg_weight, dg_weight_scale = deepgemm_post_process_fp8_weight_block(
+                wq=params.weight,
+                ws=weight_scale_invs
+                if weight_scale_invs is not None
+                else params.weight_scale,
+                quant_block_shape=tuple(layer.weight_block_size),
+                use_e8m0=self.use_deep_gemm_e8m0,
+                is_bmm=getattr(layer, "is_bmm", False),
+                bmm_batch_size=getattr(layer, "bmm_batch_size", 0),
+            )
+            replace_parameter(layer, params.WEIGHT, dg_weight)
+            replace_parameter(layer, scale_attr, dg_weight_scale)
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        out_dtype = self.config.out_dtype
+        output = torch.empty(
+            (A.shape[0], B.shape[0]),
+            dtype=out_dtype,
+            device=A.device,
+        )
+        torch.ops.vllm.fp8_gemm_nt_op(A, As, B, Bs, output, self.use_deep_gemm_e8m0)
+        return output
+
+
+def _fp8_gemm_nt_op(
+    q_input: torch.Tensor,
+    input_scale: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    output: torch.Tensor,
+    use_deep_gemm_e8m0: bool,
+) -> None:
+    fp8_gemm_nt(
+        (q_input, input_scale),
+        (weight, weight_scale),
+        output,
+        is_deep_gemm_e8m0_used=use_deep_gemm_e8m0,
+    )
+
+
+def _fp8_gemm_nt_op_fake(
+    q_input: torch.Tensor,
+    input_scale: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    output: torch.Tensor,
+    use_deep_gemm_e8m0: bool,
+) -> None:
+    return None
+
+
+direct_register_custom_op(
+    "fp8_gemm_nt_op",
+    _fp8_gemm_nt_op,
+    mutates_args=["output"],
+    fake_impl=_fp8_gemm_nt_op_fake,
+)
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/flashinfer.py b/vllm/model_executor/kernels/linear/scaled_mm/flashinfer.py
index 991cda862acf..c84fd5dda84e 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/flashinfer.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/flashinfer.py
@@ -2,11 +2,32 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 
+from typing import ClassVar
+
 import torch
 
+import vllm.envs as envs
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    per_token_group_quant_fp8,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+)
 from vllm.platforms import current_platform
-from vllm.utils.flashinfer import flashinfer_scaled_fp8_mm, has_flashinfer
+from vllm.utils.flashinfer import (
+    flashinfer_fp8_blockscale_gemm,
+    flashinfer_scaled_fp8_mm,
+    has_flashinfer,
+    is_flashinfer_fp8_blockscale_gemm_supported,
+    should_use_flashinfer_for_blockscale_fp8_gemm,
+)
+from vllm.utils.torch_utils import direct_register_custom_op
 
+from .BlockScaledMMLinearKernel import (
+    Fp8BlockScaledDynamicMMLinearKernel,
+    Fp8BlockScaledMMLinearKernel,
+)
+from .deep_gemm import DeepGemmFp8BlockScaledMMKernel, fp8_gemm_nt
 from .ScaledMMLinearKernel import (
     FP8ScaledMMLinearKernel,
     FP8ScaledMMLinearLayerConfig,
@@ -55,3 +76,256 @@ def apply_scaled_mm(
         return flashinfer_scaled_fp8_mm(
             A, B, out_dtype=out_dtype, scale_a=As, scale_b=Bs, bias=bias
         )
+
+
+class FlashInferFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    # FlashInfer accepts BF16 input and handles FP8 conversion internally.
+    apply_input_quant: ClassVar[bool] = False
+
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig) -> None:
+        super().__init__(config)
+
+    @classmethod
+    def can_implement(cls, config: FP8ScaledMMLinearLayerConfig):
+        can_implement_base, reason = super().can_implement(config)
+        if not can_implement_base:
+            return can_implement_base, reason
+
+        act_quant_desc = config.activation_quant_key.scale
+        if act_quant_desc.group_shape != GroupShape(1, 128):
+            return (
+                False,
+                "Supports only dynamic per token group activation "
+                "quantization with group_shape=(1,128).",
+            )
+
+        if not should_use_flashinfer_for_blockscale_fp8_gemm(
+            is_flashinfer_fp8_blockscale_gemm_supported(),
+            config.out_dtype,
+            config.input_dtype,
+            config.weight_quant_key.dtype,
+            config.weight_shape,
+        ):
+            return (
+                False,
+                "The provided metadata is not supported.",
+            )
+
+        return True, None
+
+    @classmethod
+    def is_supported(cls, compute_capability=None):
+        if not current_platform.is_cuda():
+            return False, "only cuda devices are supported."
+
+        if not is_flashinfer_fp8_blockscale_gemm_supported():
+            return False, "FlashInfer block-scale FP8 GEMM is not available."
+
+        return True, None
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        # A is BF16 — FlashInfer handles FP8 conversion internally.
+        # As is a placeholder (apply_input_quant=False) and is not used here.
+        return torch.ops.vllm.flashinfer_fp8_blockscale_gemm(
+            A,  # BF16 input
+            B,  # FP8 weight
+            Bs,  # Weight scales
+        )
+
+
+class FlashInferFp8DeepGEMMDynamicBlockScaledKernel(
+    Fp8BlockScaledDynamicMMLinearKernel
+):
+    """
+    Conditional FlashInfer / DeepGEMM FP8 block-scaled GEMM.
+
+    Dispatches between two kernels based on input batch size:
+    - Small batches (M < 32): FlashInfer's swapAB trick for better utilisation.
+    - Large batches (M >= 32): DeepGEMM for peak throughput.
+
+    apply_input_quant is False because FlashInfer accepts BF16 input and
+    handles FP8 conversion internally.  The DeepGEMM branch therefore
+    quantises BF16→FP8 inside apply_mm via a closure before dispatching to
+    the DeepGEMM kernel — keeping both branches compatible with the single
+    BF16 tensor operand list passed by torch.cond.
+    """
+
+    base_type: ClassVar[type[FlashInferFp8BlockScaledMMKernel]] = (
+        FlashInferFp8BlockScaledMMKernel
+    )
+    fallback_type: ClassVar[type[DeepGemmFp8BlockScaledMMKernel]] = (
+        DeepGemmFp8BlockScaledMMKernel
+    )
+    apply_input_quant: ClassVar[bool] = False
+
+    def __init__(self, config: FP8ScaledMMLinearLayerConfig):
+        super().__init__(config)
+        self.base: FlashInferFp8BlockScaledMMKernel
+        self.fallback: DeepGemmFp8BlockScaledMMKernel
+
+    def process_weights_after_loading(self, layer: torch.nn.Module):
+        # DeepGEMM need post-processing; both kernels share the same
+        # parameter tensor layout so processing once is sufficient.
+        self.fallback.process_weights_after_loading(layer)
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        group_size = self.weight_group_shape.col
+        use_deep_gemm_e8m0 = self.fallback.use_deep_gemm_e8m0
+
+        return torch.ops.vllm.dynamic_flashinfer_deepgemm_blockscale_gemm(
+            A, B, Bs, group_size, use_deep_gemm_e8m0
+        )
+
+
+def _flashinfer_fp8_blockscale_gemm_impl(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+) -> torch.Tensor:
+    return flashinfer_fp8_blockscale_gemm(
+        input=input,
+        weight=weight,
+        weight_scale=weight_scale,
+        out_dtype=torch.bfloat16,
+    )
+
+
+def _flashinfer_fp8_blockscale_gemm_fake(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Required fake/meta implementation for torch.compile graph tracing.
+    """
+    return torch.empty(
+        input.shape[0], weight.shape[0], dtype=torch.bfloat16, device=input.device
+    )
+
+
+direct_register_custom_op(
+    "flashinfer_fp8_blockscale_gemm",
+    _flashinfer_fp8_blockscale_gemm_impl,
+    fake_impl=_flashinfer_fp8_blockscale_gemm_fake,
+)
+
+
+def _dynamic_flashinfer_deepgemm_blockscale_gemm_impl(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    group_size: int,
+    use_deep_gemm_e8m0: bool,
+) -> torch.Tensor:
+    """
+    Conditional FlashInfer FP8 blockscale GEMM with batch-size-dependent selection.
+
+    This function switches between two optimized kernels based on the input batch size:
+    - For small batches (M < 32): Uses FlashInfer's DeepGEMM swapAB optimization.
+    - For larger batches (M >= 32): Uses the official DeepGEMM kernel.
+
+    The conditional logic must use torch.cond() instead of a simple if-else statement
+    to maintain compatibility with torch.compile graph compilation.
+
+    This batch-size-dependent selection is essential for maintaining model accuracy.
+    Benchmarks on GSM8K show a significant accuracy gap (88% vs 95%) for DeepSeek-V3.1
+    when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accuracy
+    drop.
+
+    Args:
+        input: Input tensor of shape (batch_size, input_dim) in FP8 format
+        weight: Weight tensor of shape (output_dim, input_dim) in FP8 format
+        weight_scale: Scale factors for weight quantization (per-group)
+        group_size: Quantization group size for the weight tensor
+        use_deep_gemm_e8m0: Whether to use the E8M0 format in DeepGEMM quantization
+
+    Returns:
+        Output tensor of shape (batch_size, output_dim) in bfloat16 format
+    """
+
+    def run_flashinfer_deepgemm_swapAB(
+        input: torch.Tensor,
+        weight: torch.Tensor,
+        weight_scale: torch.Tensor,
+    ) -> torch.Tensor:
+        return flashinfer_fp8_blockscale_gemm(
+            input=input,
+            weight=weight,
+            weight_scale=weight_scale,
+            out_dtype=torch.bfloat16,
+        )
+
+    def run_deepgemm(
+        input: torch.Tensor,
+        weight: torch.Tensor,
+        weight_scale: torch.Tensor,
+    ) -> torch.Tensor:
+        q_input, input_scale = per_token_group_quant_fp8(
+            input,
+            group_size=group_size,
+            column_major_scales=True,
+            use_ue8m0=use_deep_gemm_e8m0,
+        )
+        output = torch.empty(
+            (q_input.shape[0], weight.shape[0]),
+            dtype=torch.bfloat16,
+            device=q_input.device,
+        )
+        fp8_gemm_nt(
+            (q_input, input_scale),
+            (weight, weight_scale),
+            output,
+            is_deep_gemm_e8m0_used=use_deep_gemm_e8m0,
+        )
+        return output
+
+    if envs.VLLM_BATCH_INVARIANT:
+        return run_deepgemm(input, weight, weight_scale)
+
+    condition = input.shape[0] < 32
+
+    # PyTorch's torch.compile cannot handle input-dependent control flow in standard
+    # Python conditionals. torch.cond() explicitly registers both code paths in the
+    # computation graph, allowing torch.compile to capture both branches.
+    # without torch.cond, the M < 32 condition won't be able to be captured by torch
+    # compile
+    return torch.cond(
+        condition,
+        run_flashinfer_deepgemm_swapAB,
+        run_deepgemm,
+        (input, weight, weight_scale),
+    )
+
+
+def _dynamic_flashinfer_deepgemm_blockscale_gemm_fake(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    group_size: int,
+    use_deep_gemm_e8m0: bool,
+) -> torch.Tensor:
+    """
+    Required fake/meta implementation for torch.compile graph tracing.
+    """
+    return torch.empty(
+        input.shape[0], weight.shape[0], dtype=torch.bfloat16, device=input.device
+    )
+
+
+direct_register_custom_op(
+    "dynamic_flashinfer_deepgemm_blockscale_gemm",
+    _dynamic_flashinfer_deepgemm_blockscale_gemm_impl,
+    fake_impl=_dynamic_flashinfer_deepgemm_blockscale_gemm_fake,
+)
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py b/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py
index 2fb6e87413aa..9182977e9577 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import math
 
 import torch
 
@@ -13,6 +14,13 @@
 )
 
 
+def _get_num_tokens(output_shape: list) -> int:
+    # torch._scaled_mm works with 2D tensors, so input tensors are
+    # flattened if they are 3D. If output_shape is 3D, num_tokens is
+    # the product of all dims except the last (hidden dim).
+    return math.prod(output_shape[:-1])
+
+
 class TorchFP8ScaledMMLinearKernel(FP8ScaledMMLinearKernel):
     """
     Base class for FP8 linear kernels using Torch.
@@ -78,7 +86,8 @@ def apply_scaled_mm(
         if type(output) is tuple and len(output) == 2:
             output = output[0]
 
-        return torch.narrow(output, 0, 0, output_shape[0]).view(*output_shape)
+        num_tokens = _get_num_tokens(output_shape)
+        return torch.narrow(output, 0, 0, num_tokens).view(*output_shape)
 
 
 class RowWiseTorchFP8ScaledMMLinearKernel(TorchFP8ScaledMMLinearKernel):
@@ -135,17 +144,26 @@ def apply_scaled_mm(
         #  For CUDA platform please validate if the torch._scaled_mm supports
         #  rowwise scaled GEMM before using it
 
+        # torch._scaled_mm rowwise requires scale_a = (m, 1), scale_b = (1, n).
+        # CompressedTensors stores weight_scale as (n, 1), so `.t()` yields (1, n).
+        # ModelOpt FP8_PER_CHANNEL_PER_TOKEN stores it as 1-D (n,); reshape to
+        # (1, n) so both paths satisfy the rowwise contract.
+        scale_b = Bs.view(1, -1) if Bs.dim() == 1 else Bs.t()
+        if As.dim() == 1:
+            As = As.view(-1, 1)
+
         # Fused GEMM_DQ Rowwise GEMM
         output = torch._scaled_mm(
             A,
             B,
             out_dtype=out_dtype,
             scale_a=As,
-            scale_b=Bs.t(),
+            scale_b=scale_b,
             bias=bias,
         )
 
-        return torch.narrow(output, 0, 0, output_shape[0]).view(*output_shape)
+        num_tokens = _get_num_tokens(output_shape)
+        return torch.narrow(output, 0, 0, num_tokens).view(*output_shape)
 
 
 class ChannelWiseTorchFP8ScaledMMLinearKernel(TorchFP8ScaledMMLinearKernel):
@@ -206,8 +224,9 @@ def apply_scaled_mm(
             output = output[0]
 
         # Unpad (undo num_token_padding)
-        output = torch.narrow(output, 0, 0, output_shape[0])
-        x_scale = torch.narrow(As, 0, 0, output_shape[0])
+        num_tokens = _get_num_tokens(output_shape)
+        output = torch.narrow(output, 0, 0, num_tokens)
+        x_scale = torch.narrow(As, 0, 0, num_tokens)
 
         # DQ
         # C = sw * sx * (X * W) + bias
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/rocm.py b/vllm/model_executor/kernels/linear/scaled_mm/rocm.py
index c8370dff512c..64bc5b6c8bbe 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/rocm.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/rocm.py
@@ -79,10 +79,10 @@ def is_supported(
         if not current_platform.is_rocm():
             return False, "requires ROCm."
 
-        from vllm.platforms.rocm import on_mi3xx
+        from vllm.platforms.rocm import on_gfx12x, on_mi3xx
 
-        if not on_mi3xx():
-            return False, "requires MI3xx."
+        if not (on_mi3xx() or on_gfx12x()):
+            return False, "requires MI3xx or gfx12x"
 
         if not envs.VLLM_ROCM_USE_SKINNY_GEMM:
             return False, "requires VLLM_ROCM_USE_SKINNY_GEMM to be enabled."
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/triton.py b/vllm/model_executor/kernels/linear/scaled_mm/triton.py
index d2d90ed06a7a..7003e727bfac 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/triton.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/triton.py
@@ -13,7 +13,11 @@
     convert_to_channelwise,
 )
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import direct_register_custom_op
 
+from .BlockScaledMMLinearKernel import (
+    Fp8BlockScaledMMLinearKernel,
+)
 from .cutlass import CutlassInt8ScaledMMLinearKernel
 from .ScaledMMLinearKernel import (
     Int8ScaledMMLinearLayerConfig,
@@ -31,8 +35,6 @@ def is_supported(
 
     @classmethod
     def can_implement(cls, c: Int8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
-        if not c.input_symmetric:
-            return False, "supports symmetric input only."
         return True, None
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
@@ -62,17 +64,59 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         # INPUT SCALE
         if self.config.is_static_input_scheme:
             assert i_s is not None
-            replace_parameter(
-                layer,
-                i_s_name,
-                torch.nn.Parameter(i_s.max(), requires_grad=False),
-            )
-            setattr(layer, i_zp_name, None)
+
+            if self.config.input_symmetric:
+                replace_parameter(
+                    layer,
+                    i_s_name,
+                    torch.nn.Parameter(i_s.max(), requires_grad=False),
+                )
+                setattr(layer, i_zp_name, None)
+            else:
+                input_zero_point = getattr(layer, i_zp_name)
+
+                # Reconstruct the ranges to find a single scale and azp
+                int8_traits = torch.iinfo(torch.int8)
+                azps = input_zero_point.to(dtype=torch.int32)
+                range_max = (i_s * (int8_traits.max - azps)).max()
+                range_min = (i_s * (int8_traits.min - azps)).min()
+
+                scale = (range_max - range_min) / (int8_traits.max - int8_traits.min)
+                replace_parameter(
+                    layer,
+                    i_s_name,
+                    torch.nn.Parameter(scale, requires_grad=False),
+                )
+
+                # AZP loaded as int8 but used as int32
+                azp = (int8_traits.min - range_min / scale).to(dtype=torch.int32)
+                replace_parameter(
+                    layer,
+                    i_zp_name,
+                    torch.nn.Parameter(azp, requires_grad=False),
+                )
         else:
             setattr(layer, i_s_name, None)
             setattr(layer, i_zp_name, None)
 
-        setattr(layer, azp_adj_name, None)
+        # azp_adj is the AZP adjustment term, used to account for weights.
+        # It does not depend on scales or azp, so it is the same for
+        # static and dynamic quantization.
+        # See csrc/quantization/w8a8/cutlass/Epilogues.md for the math.
+        if not self.config.input_symmetric:
+            weight = getattr(layer, w_q_name)
+            # weight is already transposed to [K, N], sum over K (dim=0)
+            azp_adj = weight.sum(dim=0, keepdim=True, dtype=torch.int32)
+            if self.config.is_static_input_scheme:
+                # Fold azp into azp_adj for the per-tensor case
+                azp_adj = getattr(layer, i_zp_name) * azp_adj
+            setattr(
+                layer,
+                azp_adj_name,
+                torch.nn.Parameter(azp_adj, requires_grad=False),
+            )
+        else:
+            setattr(layer, azp_adj_name, None)
 
     def apply_weights(
         self,
@@ -80,14 +124,97 @@ def apply_weights(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        w_q, w_s, i_s, i_zp, _ = self._get_layer_params(layer)
+        w_q, w_s, i_s, i_zp, azp_adj = self._get_layer_params(layer)
 
+        symmetric = azp_adj is None
         x_q, x_s, x_zp = ops.scaled_int8_quant(
-            x.contiguous(), i_s, i_zp, symmetric=True
+            x.contiguous(), i_s, i_zp, symmetric=symmetric
         )
 
-        assert x_zp is None, "Triton kernel only supports symmetric quantization"
-
-        return triton_scaled_mm(
+        out = triton_scaled_mm(
             x_q, w_q, scale_a=x_s, scale_b=w_s, out_dtype=x.dtype, bias=bias
         )
+
+        if azp_adj is not None:
+            # Asymmetric quantization: subtract the zero-point correction.
+            # D = scale_a * scale_b * (A_q @ B_q - azp * azp_adj) + bias
+            # triton_scaled_mm already computed scale_a * scale_b * (A_q @ B_q) + bias
+            # so we subtract scale_a * scale_b * azp * azp_adj
+            #
+            # x_s: [M, 1] or scalar, w_s: [N, 1] or scalar, azp_adj: [1, N]
+            # Reshape w_s from [N, 1] to [1, N] for proper broadcasting.
+            w_s_row = w_s.view(1, -1) if w_s.dim() > 0 else w_s
+            static = i_zp is not None
+            if not static and x_zp is not None:
+                # Dynamic per-token: azp is per-token, azp_adj is per-channel
+                # x_zp: [M, 1], azp_adj: [1, N]
+                out -= x_s * w_s_row * (x_zp * azp_adj).to(x.dtype)
+            else:
+                # Static per-tensor: azp already folded into azp_adj
+                out -= (x_s * w_s_row * azp_adj).to(x.dtype)
+
+        return out
+
+
+class TritonFp8BlockScaledMMKernel(Fp8BlockScaledMMLinearKernel):
+    @classmethod
+    def is_supported(cls, compute_capability=None):
+        if not (current_platform.is_cuda_alike() or current_platform.is_xpu()):
+            return False, "only cuda-like and xpu devices are supported."
+        return True, None
+
+    def apply_block_scaled_mm(
+        self,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        As: torch.Tensor,
+        Bs: torch.Tensor,
+    ) -> torch.Tensor:
+        return torch.ops.vllm.w8a8_triton_block_scaled_mm_func(
+            A,
+            B,
+            As,
+            Bs,
+            list(self.weight_group_shape),
+            self.config.out_dtype,
+        )
+
+
+# TODO we should be able to change the type of block_size to GroupShape
+# after we resolve GroupShape compilation issue
+# https://github.com/vllm-project/vllm/issues/25270
+def _w8a8_triton_block_scaled_mm_func(
+    qx: torch.Tensor,
+    weight: torch.Tensor,
+    x_scale: torch.Tensor,
+    weight_scale: torch.Tensor,
+    block_size: list[int],
+    output_dtype: torch.dtype,
+) -> torch.Tensor:
+    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+        w8a8_triton_block_scaled_mm,
+    )
+
+    return w8a8_triton_block_scaled_mm(
+        qx, weight, x_scale, weight_scale, block_size, output_dtype
+    )
+
+
+def _w8a8_triton_block_scaled_mm_fake(
+    qx: torch.Tensor,
+    weight: torch.Tensor,
+    x_scale: torch.Tensor,
+    weight_scale: torch.Tensor,
+    block_size: list[int],
+    output_dtype: torch.dtype,
+) -> torch.Tensor:
+    return torch.empty(
+        (qx.size(0), weight.size(0)), dtype=output_dtype, device=qx.device
+    )
+
+
+direct_register_custom_op(
+    "w8a8_triton_block_scaled_mm_func",
+    _w8a8_triton_block_scaled_mm_func,
+    fake_impl=_w8a8_triton_block_scaled_mm_fake,
+)
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/xpu.py b/vllm/model_executor/kernels/linear/scaled_mm/xpu.py
index b16ee169972b..0e4ead392195 100644
--- a/vllm/model_executor/kernels/linear/scaled_mm/xpu.py
+++ b/vllm/model_executor/kernels/linear/scaled_mm/xpu.py
@@ -9,6 +9,11 @@
     FP8ScaledMMLinearKernel,
     FP8ScaledMMLinearLayerConfig,
 )
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8StaticChannelSym,
+    kFp8StaticTensorSym,
+)
+from vllm.model_executor.utils import replace_parameter
 from vllm.platforms import current_platform
 
 
@@ -23,6 +28,11 @@ def is_supported(
 
     @classmethod
     def can_implement(cls, c: FP8ScaledMMLinearLayerConfig) -> tuple[bool, str | None]:
+        if c.weight_quant_key not in {kFp8StaticChannelSym, kFp8StaticTensorSym}:
+            return (
+                False,
+                "XPUFP8ScaledMM only support per-channel and per-tensor quantization",
+            )
         if c.weight_quant_key.dtype not in {torch.float8_e5m2, torch.float8_e4m3fn}:
             return False, "XPUFP8ScaledMM only support FP8 weight dtype"
         return True, None
@@ -35,6 +45,23 @@ def __init__(
         self.config = c
         self.layer_param_names = layer_param_names
 
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # fp8_gemm_w8a16 expects weight in [in, out] layout.
+        # Transpose if weight is still in [out, in] layout.
+        # For square matrices, use contiguity as tie-breaker:
+        # checkpoint weights are contiguous, .t() views are not.
+        weight = layer.weight
+        out_features, in_features = self.config.weight_shape
+
+        if weight.shape == (out_features, in_features) and (
+            in_features != out_features or weight.is_contiguous()
+        ):
+            replace_parameter(layer, "weight", weight.data.t())
+        # else: already in [in, out] layout — no-op
+
+        weight_scale = layer.weight_scale.t().contiguous()
+        replace_parameter(layer, "weight_scale", weight_scale.data)
+
     def apply_weights(
         self,
         layer: torch.nn.Module,
diff --git a/vllm/model_executor/kernels/mhc/__init__.py b/vllm/model_executor/kernels/mhc/__init__.py
new file mode 100644
index 000000000000..3bde02e28a31
--- /dev/null
+++ b/vllm/model_executor/kernels/mhc/__init__.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from .aiter import *
+from .tilelang import *
+from .torch import *
+from .triton import *
+
+__all__ = [
+    "mhc_pre_cuda",
+    "mhc_post_cuda",
+    "mhc_fused_post_pre_cuda",
+    "hc_head_fused_kernel_cuda",
+    "mhc_pre_aiter",
+    "mhc_post_aiter",
+    "mhc_fused_post_pre_aiter",
+    "hc_head_fused_aiter",
+    "mhc_pre_tilelang",
+    "mhc_post_tilelang",
+    "mhc_fused_post_pre_tilelang",
+    "hc_head_fused_tilelang",
+    "mhc_pre_torch",
+    "mhc_post_torch",
+    "mhc_fused_post_pre_torch",
+    "hc_head_fused_torch",
+    "mhc_pre_triton",
+    "mhc_post_triton",
+    "mhc_fused_post_pre_triton",
+    "hc_head_fused_triton",
+]
diff --git a/vllm/model_executor/kernels/mhc/aiter.py b/vllm/model_executor/kernels/mhc/aiter.py
new file mode 100644
index 000000000000..e844b4191e30
--- /dev/null
+++ b/vllm/model_executor/kernels/mhc/aiter.py
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+def mhc_pre_aiter(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Forward pass for mHC pre block.
+
+    Args:
+        residual: shape (..., hc_mult, hidden_size), dtype torch.bfloat16
+        fn: shape (hc_mult3, hc_mult * hidden_size), dtype torch.float32
+        hc_scale: shape (3,), dtype torch.float32
+        hc_base: shape (hc_mult3,), dtype torch.float32
+        rms_eps: RMS normalization epsilon
+        hc_pre_eps: pre-mix epsilon
+        hc_sinkhorn_eps: sinkhorn epsilon
+        hc_post_mult_value: post-mix multiplier value
+        sinkhorn_repeat: number of sinkhorn iterations
+        n_splits: split-k factor;
+
+    Returns:
+        post_mix: shape (..., hc_mult), dtype torch.float32
+        comb_mix: shape (..., hc_mult, hc_mult), dtype torch.float32
+        layer_input: shape (..., hidden_size), dtype torch.bfloat16
+    """
+
+    hidden_size = residual.shape[-1]
+    assert hidden_size % 256 == 0
+    from vllm._aiter_ops import rocm_aiter_ops
+
+    return rocm_aiter_ops.mhc_pre(
+        residual,
+        fn,
+        hc_scale,
+        hc_base,
+        rms_eps,
+        hc_pre_eps,
+        hc_sinkhorn_eps,
+        hc_post_mult_value,
+        sinkhorn_repeat,
+    )
+
+
+def _mhc_pre_aiter_fake(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    outer_shape = residual.shape[:-2]
+
+    # Create empty tensors with correct shapes for meta device / shape inference
+    post_mix = torch.empty(
+        *outer_shape,
+        hc_mult,
+        1,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    comb_mix = torch.empty(
+        *outer_shape,
+        hc_mult,
+        hc_mult,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    layer_input = torch.empty(
+        *outer_shape,
+        hidden_size,
+        dtype=torch.bfloat16,
+        device=residual.device,
+    )
+
+    return post_mix, comb_mix, layer_input
+
+
+def mhc_post_aiter(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    hidden_size = residual.shape[-1]
+
+    assert hidden_size % 256 == 0
+    from vllm._aiter_ops import rocm_aiter_ops
+
+    return rocm_aiter_ops.mhc_post(
+        x,
+        residual,
+        post_layer_mix,
+        comb_res_mix,
+    )
+
+
+def _mhc_post_aiter_fake(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    return torch.empty_like(residual)
+
+
+direct_register_custom_op(
+    op_name="mhc_pre_aiter",
+    op_func=mhc_pre_aiter,
+    mutates_args=[],
+    fake_impl=_mhc_pre_aiter_fake,
+)
+direct_register_custom_op(
+    op_name="mhc_post_aiter",
+    op_func=mhc_post_aiter,
+    mutates_args=[],
+    fake_impl=_mhc_post_aiter_fake,
+)
diff --git a/vllm/model_executor/kernels/mhc/tilelang.py b/vllm/model_executor/kernels/mhc/tilelang.py
new file mode 100644
index 000000000000..a4d05ef245c7
--- /dev/null
+++ b/vllm/model_executor/kernels/mhc/tilelang.py
@@ -0,0 +1,546 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+def mhc_pre_tilelang(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+    norm_weight: torch.Tensor | None = None,
+    norm_eps: float = 1e-6,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Forward pass for mHC pre block.
+
+    Args:
+        residual: shape (..., hc_mult, hidden_size), dtype torch.bfloat16
+        fn: shape (hc_mult3, hc_mult * hidden_size), dtype torch.float32
+        hc_scale: shape (3,), dtype torch.float32
+        hc_base: shape (hc_mult3,), dtype torch.float32
+        rms_eps: RMS normalization epsilon
+        hc_pre_eps: pre-mix epsilon
+        hc_sinkhorn_eps: sinkhorn epsilon
+        hc_post_mult_value: post-mix multiplier value
+        sinkhorn_repeat: number of sinkhorn iterations
+        n_splits: split-k factor;
+        norm_weight: optional RMSNorm weight, shape (hidden_size,), dtype
+            torch.bfloat16. When provided, RMSNorm is fused into the
+            layer_input write path of the big_fuse kernel.
+        norm_eps: epsilon for the fused RMSNorm; only consulted when
+            norm_weight is given.
+
+    Returns:
+        post_mix: shape (..., hc_mult), dtype torch.float32
+        comb_mix: shape (..., hc_mult, hc_mult), dtype torch.float32
+        layer_input: shape (..., hidden_size), dtype torch.bfloat16
+    """
+    from vllm._tilelang_ops import (
+        compute_num_split,
+        mhc_pre_big_fuse_tilelang,
+        mhc_pre_big_fuse_with_norm_tilelang,
+    )
+    from vllm.utils.deep_gemm import tf32_hc_prenorm_gemm
+    from vllm.utils.math_utils import cdiv
+
+    assert residual.dtype == torch.bfloat16
+    assert fn.dtype == torch.float32
+    assert hc_scale.dtype == torch.float32
+    assert hc_base.dtype == torch.float32
+
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    hc_mult2 = hc_mult * hc_mult
+    hc_mult3 = hc_mult * 2 + hc_mult2
+
+    hc_hidden_size = hc_mult * hidden_size
+    assert fn.shape[0] == hc_mult3
+    assert fn.shape[1] == hc_hidden_size
+    assert hc_scale.shape == (3,)
+    assert hc_base.shape == (hc_mult3,)
+
+    if norm_weight is not None:
+        assert norm_weight.shape == (hidden_size,)
+        if norm_weight.dtype != torch.bfloat16:
+            norm_weight = norm_weight.to(torch.bfloat16)
+        if not norm_weight.is_contiguous():
+            norm_weight = norm_weight.contiguous()
+
+    outer_shape = residual.shape[:-2]
+
+    residual_flat = residual.view(-1, hc_mult, hidden_size)
+    num_tokens = residual_flat.shape[0]
+
+    # these numbers are from deepgemm kernel impl
+    block_k = 64
+    block_m = 64
+    n_splits = compute_num_split(block_k, hc_hidden_size, cdiv(num_tokens, block_m))
+
+    post_mix = torch.empty(
+        num_tokens, hc_mult, dtype=torch.float32, device=residual.device
+    )
+    comb_mix = torch.empty(
+        num_tokens, hc_mult2, dtype=torch.float32, device=residual.device
+    )
+    layer_input = torch.empty(
+        num_tokens, hidden_size, dtype=torch.bfloat16, device=residual.device
+    )
+
+    gemm_out_mul = torch.empty(
+        n_splits, num_tokens, hc_mult3, dtype=torch.float32, device=residual.device
+    )
+    gemm_out_sqrsum = torch.empty(
+        n_splits, num_tokens, dtype=torch.float32, device=residual.device
+    )
+
+    tf32_hc_prenorm_gemm(
+        residual_flat.view(num_tokens, hc_mult * hidden_size),
+        fn,
+        gemm_out_mul,
+        gemm_out_sqrsum,
+        n_splits,
+    )
+
+    if norm_weight is None:
+        mhc_pre_big_fuse_tilelang(
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            hc_scale,
+            hc_base,
+            residual_flat,
+            post_mix,
+            comb_mix,
+            layer_input,
+            hidden_size,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            n_splits,
+            hc_mult,
+        )
+    else:
+        mhc_pre_big_fuse_with_norm_tilelang(
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            hc_scale,
+            hc_base,
+            residual_flat,
+            post_mix,
+            comb_mix,
+            layer_input,
+            norm_weight,
+            hidden_size,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            norm_eps,
+            n_splits,
+            hc_mult,
+        )
+
+    return (
+        post_mix.view(*outer_shape, hc_mult, 1),
+        comb_mix.view(*outer_shape, hc_mult, hc_mult),
+        layer_input.view(*outer_shape, hidden_size),
+    )
+
+
+def _mhc_pre_tilelang_fake(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+    norm_weight: torch.Tensor | None = None,
+    norm_eps: float = 0.0,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    outer_shape = residual.shape[:-2]
+
+    # Create empty tensors with correct shapes for meta device / shape inference
+    post_mix = torch.empty(
+        *outer_shape,
+        hc_mult,
+        1,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    comb_mix = torch.empty(
+        *outer_shape,
+        hc_mult,
+        hc_mult,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    layer_input = torch.empty(
+        *outer_shape,
+        hidden_size,
+        dtype=torch.bfloat16,
+        device=residual.device,
+    )
+
+    return post_mix, comb_mix, layer_input
+
+
+def mhc_post_tilelang(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    from vllm._tilelang_ops import mhc_post_tilelang as _mhc_post_kernel
+
+    out = torch.empty_like(residual)
+    _mhc_post_kernel(
+        comb_res_mix,
+        residual,
+        post_layer_mix.squeeze(-1),
+        x,
+        out,
+        residual.shape[-2],
+        residual.shape[-1],
+    )
+    return out
+
+
+def mhc_fused_post_pre_tilelang(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+    tile_n: int = 1,
+    norm_weight: torch.Tensor | None = None,
+    norm_eps: float = 0.0,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Run one MHC post block followed by the next MHC pre block.
+
+    When ``norm_weight`` is provided, the layer_input_cur output is the
+    RMSNorm'd activation (fused into the kernel); otherwise it is the
+    raw pre-norm activation as before.
+
+    Returns:
+        residual_cur: post-mapped residual, shape (..., hc_mult, hidden_size)
+        post_mix_cur: shape (..., hc_mult, 1)
+        comb_mix_cur: shape (..., hc_mult, hc_mult)
+        layer_input_cur: shape (..., hidden_size)
+    """
+
+    from vllm._tilelang_ops import (
+        compute_num_split,
+        mhc_fused_tilelang,
+        mhc_post_tilelang,
+        mhc_pre_big_fuse_tilelang,
+        mhc_pre_big_fuse_with_norm_tilelang,
+    )
+    from vllm.utils.math_utils import cdiv
+
+    assert residual.dtype == torch.bfloat16
+    assert x.dtype == torch.bfloat16
+    assert post_layer_mix.dtype == torch.float32
+    assert comb_res_mix.dtype == torch.float32
+    assert fn.dtype == torch.float32
+    assert hc_scale.dtype == torch.float32
+    assert hc_base.dtype == torch.float32
+
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    hc_mult2 = hc_mult * hc_mult
+    hc_mult3 = hc_mult * 2 + hc_mult2
+    hc_hidden_size = hc_mult * hidden_size
+    outer_shape = residual.shape[:-2]
+
+    assert x.shape == (*outer_shape, hidden_size)
+    assert post_layer_mix.shape in (
+        (*outer_shape, hc_mult, 1),
+        (*outer_shape, hc_mult),
+    )
+    assert comb_res_mix.shape == (*outer_shape, hc_mult, hc_mult)
+    assert fn.shape == (hc_mult3, hc_hidden_size)
+    assert hc_scale.shape == (3,)
+    assert hc_base.shape == (hc_mult3,)
+
+    if norm_weight is not None:
+        assert norm_weight.shape == (hidden_size,)
+        if norm_weight.dtype != torch.bfloat16:
+            norm_weight = norm_weight.to(torch.bfloat16)
+        if not norm_weight.is_contiguous():
+            norm_weight = norm_weight.contiguous()
+
+    assert n_splits in (1, 2, 4, 8)
+    assert hidden_size % n_splits == 0
+
+    residual_flat = residual.view(-1, hc_mult, hidden_size)
+    num_tokens = residual_flat.shape[0]
+    x_flat = x.view(num_tokens, hidden_size)
+    post_layer_mix_flat = post_layer_mix.view(num_tokens, hc_mult)
+    comb_res_mix_flat = comb_res_mix.view(num_tokens, hc_mult, hc_mult)
+
+    fma_token_threshold = 16
+    if num_tokens <= fma_token_threshold:
+        # TODO(gnovack): investigate autotuning these heuristics
+        tile_n = 2 if num_tokens < 8 else 3
+        n_splits = 8 if (num_tokens < 8 and hidden_size <= 4096) else 4
+    else:
+        # these number are from deepgemm kernel impl
+        block_k = 64
+        block_m = 64
+        n_splits = compute_num_split(block_k, hc_hidden_size, cdiv(num_tokens, block_m))
+
+    gemm_out_mul = torch.empty(
+        n_splits,
+        num_tokens,
+        hc_mult3,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    gemm_out_sqrsum = torch.empty(
+        n_splits,
+        num_tokens,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    residual_cur = torch.empty_like(residual_flat)
+    post_mix_cur = torch.empty(
+        num_tokens,
+        hc_mult,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    comb_mix_cur = torch.empty(
+        num_tokens,
+        hc_mult2,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    layer_input_cur = torch.empty(
+        num_tokens,
+        hidden_size,
+        dtype=torch.bfloat16,
+        device=residual.device,
+    )
+
+    if num_tokens <= fma_token_threshold:
+        mhc_fused_tilelang(
+            comb_res_mix_flat,
+            residual_flat,
+            post_layer_mix_flat,
+            x_flat,
+            fn.view(hc_mult3, hc_mult, hidden_size),
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            residual_cur,
+            hc_mult,
+            hidden_size,
+            hc_mult3,
+            tile_n=tile_n,
+            n_splits=n_splits,
+        )
+    else:
+        mhc_post_tilelang(
+            comb_res_mix_flat,
+            residual_flat,
+            post_layer_mix_flat,
+            x_flat,
+            residual_cur,
+            residual.shape[-2],
+            residual.shape[-1],
+        )
+
+        from vllm.utils.deep_gemm import tf32_hc_prenorm_gemm
+
+        tf32_hc_prenorm_gemm(
+            residual_cur.view(num_tokens, hc_mult * hidden_size),
+            fn,
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            n_splits,
+        )
+
+    if norm_weight is None:
+        mhc_pre_big_fuse_tilelang(
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            hc_scale,
+            hc_base,
+            residual_cur,
+            post_mix_cur,
+            comb_mix_cur,
+            layer_input_cur,
+            hidden_size,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            n_splits,
+            hc_mult,
+        )
+    else:
+        mhc_pre_big_fuse_with_norm_tilelang(
+            gemm_out_mul,
+            gemm_out_sqrsum,
+            hc_scale,
+            hc_base,
+            residual_cur,
+            post_mix_cur,
+            comb_mix_cur,
+            layer_input_cur,
+            norm_weight,
+            hidden_size,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            norm_eps,
+            n_splits,
+            hc_mult,
+        )
+
+    return (
+        residual_cur.view(*outer_shape, hc_mult, hidden_size),
+        post_mix_cur.view(*outer_shape, hc_mult, 1),
+        comb_mix_cur.view(*outer_shape, hc_mult, hc_mult),
+        layer_input_cur.view(*outer_shape, hidden_size),
+    )
+
+
+def _mhc_fused_post_pre_tilelang_fake(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+    tile_n: int = 1,
+    norm_weight: torch.Tensor | None = None,
+    norm_eps: float = 0.0,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    outer_shape = residual.shape[:-2]
+
+    residual_cur = torch.empty_like(residual)
+    post_mix_cur = torch.empty(
+        *outer_shape,
+        hc_mult,
+        1,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    comb_mix_cur = torch.empty(
+        *outer_shape,
+        hc_mult,
+        hc_mult,
+        dtype=torch.float32,
+        device=residual.device,
+    )
+    layer_input_cur = torch.empty(
+        *outer_shape,
+        hidden_size,
+        dtype=torch.bfloat16,
+        device=residual.device,
+    )
+
+    return residual_cur, post_mix_cur, comb_mix_cur, layer_input_cur
+
+
+def _mhc_post_tilelang_fake(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    return torch.empty_like(residual)
+
+
+def _hc_head_fused_kernel_tilelang(
+    hs_flat: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    out: torch.Tensor,
+    hidden_size: int,
+    rms_eps: float,
+    hc_eps: float,
+    hc_mult: int,
+) -> None:
+    """Fill pre-allocated `out` (T, H) in-place with the hc_head result."""
+    if hs_flat.shape[0] == 0:
+        return
+    from vllm._tilelang_ops import hc_head_fuse_tilelang
+
+    hc_head_fuse_tilelang(
+        hs_flat,
+        fn,
+        hc_scale,
+        hc_base,
+        out,
+        hidden_size,
+        rms_eps,
+        hc_eps,
+        hc_mult,
+    )
+
+
+direct_register_custom_op(
+    op_name="mhc_pre_tilelang",
+    op_func=mhc_pre_tilelang,
+    mutates_args=[],
+    fake_impl=_mhc_pre_tilelang_fake,
+)
+direct_register_custom_op(
+    op_name="mhc_post_tilelang",
+    op_func=mhc_post_tilelang,
+    mutates_args=[],
+    fake_impl=_mhc_post_tilelang_fake,
+)
+
+direct_register_custom_op(
+    op_name="mhc_fused_post_pre_tilelang",
+    op_func=mhc_fused_post_pre_tilelang,
+    mutates_args=[],
+    fake_impl=_mhc_fused_post_pre_tilelang_fake,
+)
+
+direct_register_custom_op(
+    op_name="hc_head_fused_kernel_tilelang",
+    op_func=_hc_head_fused_kernel_tilelang,
+    mutates_args=["out"],
+)
diff --git a/vllm/model_executor/kernels/mhc/torch.py b/vllm/model_executor/kernels/mhc/torch.py
new file mode 100644
index 000000000000..1a54ce4ad202
--- /dev/null
+++ b/vllm/model_executor/kernels/mhc/torch.py
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+
+def mhc_pre_torch(
+    residual: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    rms_eps: float,
+    hc_pre_eps: float,
+    hc_sinkhorn_eps: float,
+    hc_post_mult_value: float,
+    sinkhorn_repeat: int,
+    n_splits: int = 1,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Forward pass for mHC pre block.
+
+    Args:
+        residual: shape (..., hc_mult, hidden_size), dtype torch.bfloat16
+        fn: shape (hc_mult3, hc_mult * hidden_size), dtype torch.float32
+        hc_scale: shape (3,), dtype torch.float32
+        hc_base: shape (hc_mult3,), dtype torch.float32
+        rms_eps: RMS normalization epsilon
+        hc_pre_eps: pre-mix epsilon
+        hc_sinkhorn_eps: sinkhorn epsilon
+        hc_post_mult_value: post-mix multiplier value
+        sinkhorn_repeat: number of sinkhorn iterations
+        n_splits: split-k factor;
+
+    Returns:
+        post_mix: shape (..., hc_mult), dtype torch.float32
+        comb_mix: shape (..., hc_mult, hc_mult), dtype torch.float32
+        layer_input: shape (..., hidden_size), dtype torch.bfloat16
+    """
+
+    # Validate shapes
+    assert residual.dtype == torch.bfloat16
+    assert fn.dtype == torch.float32
+    assert hc_scale.dtype == torch.float32
+    assert hc_base.dtype == torch.float32
+
+    hc_mult = residual.shape[-2]
+    hidden_size = residual.shape[-1]
+    hc_mult2 = hc_mult * hc_mult
+    hc_mult3 = hc_mult * 2 + hc_mult2
+
+    hc_hidden_size = hc_mult * hidden_size
+    assert fn.shape[0] == hc_mult3
+    assert fn.shape[1] == hc_hidden_size
+    assert hc_scale.shape == (3,)
+    assert hc_base.shape == (hc_mult3,)
+
+    outer_shape = residual.shape[:-2]
+
+    residual_flat = residual.view(-1, hc_mult, hidden_size)
+    num_tokens = residual_flat.shape[0]
+    fn_flat = fn
+
+    x = residual_flat.view(num_tokens, hc_mult * hidden_size).to(torch.float32)
+    mixes = torch.matmul(x, fn_flat.t())
+    sqrsum = x.square().sum(dim=-1, keepdim=True)
+    mixes = mixes * torch.rsqrt(sqrsum / (hc_mult * hidden_size) + rms_eps)
+
+    pre_logits = mixes[:, :hc_mult] * hc_scale[0] + hc_base[:hc_mult]
+    pre_mix = torch.sigmoid(pre_logits) + hc_pre_eps
+
+    post_logits = (
+        mixes[:, hc_mult : 2 * hc_mult] * hc_scale[1] + hc_base[hc_mult : 2 * hc_mult]
+    )
+    post_mix = torch.sigmoid(post_logits) * hc_post_mult_value
+
+    comb_logits = mixes[:, 2 * hc_mult :].view(num_tokens, hc_mult, hc_mult) * hc_scale[
+        2
+    ] + hc_base[2 * hc_mult :].view(1, hc_mult, hc_mult)
+    comb_mix = torch.softmax(comb_logits, dim=-1) + hc_sinkhorn_eps
+    comb_mix = comb_mix / (comb_mix.sum(dim=-2, keepdim=True) + hc_sinkhorn_eps)
+    for _ in range(sinkhorn_repeat - 1):
+        comb_mix = comb_mix / (comb_mix.sum(dim=-1, keepdim=True) + hc_sinkhorn_eps)
+        comb_mix = comb_mix / (comb_mix.sum(dim=-2, keepdim=True) + hc_sinkhorn_eps)
+
+    layer_input = torch.sum(
+        pre_mix.unsqueeze(-1) * residual_flat.to(torch.float32), dim=1
+    ).to(torch.bfloat16)
+    return (
+        post_mix.view(*outer_shape, hc_mult, 1),
+        comb_mix.view(*outer_shape, hc_mult, hc_mult),
+        layer_input.view(*outer_shape, hidden_size),
+    )
+
+
+def mhc_post_torch(
+    x: torch.Tensor,
+    residual: torch.Tensor,
+    post_layer_mix: torch.Tensor,
+    comb_res_mix: torch.Tensor,
+) -> torch.Tensor:
+    mixed_residual = torch.einsum(
+        "...ij,...ih->...jh",
+        comb_res_mix.to(torch.float32),
+        residual.to(torch.float32),
+    )
+    post_term = post_layer_mix.to(torch.float32) * x.unsqueeze(-2).to(torch.float32)
+    return (mixed_residual + post_term).to(residual.dtype)
diff --git a/vllm/model_executor/kernels/mhc/triton.py b/vllm/model_executor/kernels/mhc/triton.py
new file mode 100644
index 000000000000..facb6813caa2
--- /dev/null
+++ b/vllm/model_executor/kernels/mhc/triton.py
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+
+from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+@triton.jit
+def _rmsnorm_nw_kernel(
+    x_ptr,
+    out_ptr,
+    stride_row,
+    D,
+    eps,
+    RBLOCK: tl.constexpr,
+):
+    """Weight-free RMSNorm Triton kernel: out = x * rsqrt(mean(x², -1) + eps)."""
+    row = tl.program_id(0)
+    cols = tl.arange(0, RBLOCK)
+    mask = cols < D
+
+    x = tl.load(
+        x_ptr + row * stride_row + cols,
+        mask=mask,
+        other=0.0,
+        eviction_policy="evict_first",
+    ).to(tl.float32)
+
+    var = tl.sum(x * x, 0) / D
+    rstd = tl.rsqrt(var + eps)
+
+    out = (x * rstd).to(out_ptr.dtype.element_ty)
+    tl.store(out_ptr + row * D + cols, out, mask=mask, eviction_policy="evict_first")
+
+
+def rmsnorm_nw(x: Tensor, eps: float) -> Tensor:
+    """Weight-free RMSNorm over the last dimension.
+
+    Treats *x* as ``[num_rows, D]`` where ``num_rows = product(shape[:-1])``.
+    Returns a contiguous tensor with the same shape and dtype as *x*.
+    """
+    orig_shape = x.shape
+    D = orig_shape[-1]
+    x_2d = x.reshape(-1, D)
+    num_rows = x_2d.shape[0]
+
+    out = torch.empty_like(x_2d)
+    RBLOCK = triton.next_power_of_2(D)
+
+    _rmsnorm_nw_kernel[(num_rows,)](
+        x_2d,
+        out,
+        x_2d.stride(0),
+        D,
+        eps,
+        RBLOCK=RBLOCK,
+        num_warps=1 if RBLOCK <= 512 else (4 if RBLOCK <= 4096 else 8),
+    )
+    return out.view(orig_shape)
+
+
+@triton.jit
+def _hc_head_reduce_store_kernel(
+    pre_ptr,
+    x_ptr,
+    out_ptr,
+    hidden_size: tl.constexpr,
+    hc_mult: tl.constexpr,
+    pre_stride_t: tl.constexpr,
+    pre_stride_m: tl.constexpr,
+    x_stride_t: tl.constexpr,
+    x_stride_m: tl.constexpr,
+    x_stride_h: tl.constexpr,
+    out_stride_t: tl.constexpr,
+    out_stride_h: tl.constexpr,
+    BLOCK_H: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    block_idx = tl.program_id(1)
+    offsets = block_idx * BLOCK_H + tl.arange(0, BLOCK_H)
+    mask = offsets < hidden_size
+
+    acc = tl.zeros((BLOCK_H,), dtype=tl.float32)
+    for mix_idx in tl.static_range(0, hc_mult):
+        pre = tl.load(pre_ptr + token_idx * pre_stride_t + mix_idx * pre_stride_m).to(
+            tl.float32
+        )
+        x = tl.load(
+            x_ptr
+            + token_idx * x_stride_t
+            + mix_idx * x_stride_m
+            + offsets * x_stride_h,
+            mask=mask,
+            other=0.0,
+        ).to(tl.float32)
+        acc += pre * x
+
+    tl.store(
+        out_ptr + token_idx * out_stride_t + offsets * out_stride_h,
+        acc,
+        mask=mask,
+    )
+
+
+def hc_head_reduce_triton_kernel(
+    x: torch.Tensor,
+    hc_fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    out: torch.Tensor,
+    norm_eps: float,
+    hc_eps: float,
+) -> None:
+    x_flat = x.flatten(-2)
+    x_normed = rmsnorm_nw(x_flat, norm_eps)
+    mixes = F.linear(x_normed.float(), hc_fn)
+    pre = torch.sigmoid(mixes * hc_scale + hc_base) + hc_eps
+
+    hidden_size = x.shape[-1]
+    hc_mult = x.shape[-2]
+    block_h = 1024
+    _hc_head_reduce_store_kernel[(x.shape[0], (hidden_size + block_h - 1) // block_h)](
+        pre,
+        x,
+        out,
+        hidden_size,
+        hc_mult,
+        pre.stride(0),
+        pre.stride(1),
+        x.stride(0),
+        x.stride(1),
+        x.stride(2),
+        out.stride(0),
+        out.stride(1),
+        BLOCK_H=block_h,
+        num_warps=4,
+    )
+
+
+def _hc_head_triton(
+    hs_flat: torch.Tensor,
+    fn: torch.Tensor,
+    hc_scale: torch.Tensor,
+    hc_base: torch.Tensor,
+    out: torch.Tensor,
+    hidden_size: int,
+    rms_eps: float,
+    hc_eps: float,
+    hc_mult: int,
+) -> None:
+    """Fill pre-allocated `out` (T, H) in-place with the hc_head result."""
+    if hs_flat.shape[0] == 0:
+        return
+
+    hc_head_reduce_triton_kernel(
+        hs_flat,
+        fn,
+        hc_scale,
+        hc_base,
+        out,
+        rms_eps,
+        hc_eps,
+    )
+    return
+
+
+direct_register_custom_op(
+    op_name="hc_head_triton",
+    op_func=_hc_head_triton,
+    mutates_args=["out"],
+)
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index 3e00d21d5a1c..7f57e80a42cc 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -16,7 +16,7 @@
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.utils import set_weight_attrs
-from vllm.platforms import current_platform
+from vllm.platforms import CpuArchEnum, current_platform
 from vllm.triton_utils import tl, triton
 from vllm.utils.collection_utils import LazyDict
 
@@ -151,6 +151,48 @@ def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
         return self.forward_cuda(x)
 
 
+@CustomOp.register("silu_and_mul_with_clamp")
+class SiluAndMulWithClamp(CustomOp):
+    """SwiGLU activation with input clamping (used by some MoE shared experts).
+
+    Computes:
+        gate = clamp(x[..., :d], max=swiglu_limit)
+        up   = clamp(x[..., d:], min=-swiglu_limit, max=swiglu_limit)
+        out  = silu(gate) * up
+    where d = x.shape[-1] // 2.
+
+    Shapes:
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    def __init__(self, swiglu_limit: float, *, compile_native: bool = True):
+        super().__init__(compile_native=compile_native)
+        self.swiglu_limit = float(swiglu_limit)
+        if current_platform.is_rocm() or current_platform.is_xpu():
+            self._forward_method = self.forward_native
+        elif current_platform.is_cuda_alike():
+            self.op = torch.ops._C.silu_and_mul_with_clamp
+        elif current_platform.is_cpu():
+            self._forward_method = self.forward_native
+
+    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        gate = torch.clamp(x[..., :d], max=self.swiglu_limit)
+        up = torch.clamp(x[..., d:], min=-self.swiglu_limit, max=self.swiglu_limit)
+        return F.silu(gate) * up
+
+    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = x.shape[:-1] + (d,)
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        self.op(out, x, self.swiglu_limit)
+        return out
+
+    def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
+        return self.forward_native(x)
+
+
 # --8<-- [start:mul_and_silu]
 @CustomOp.register("mul_and_silu")
 class MulAndSilu(CustomOp):
@@ -247,6 +289,34 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
         return self.forward_native(x)
 
 
+# --8<-- [start:gelu]
+@CustomOp.register("gelu")
+class GELU(CustomOp):
+    # --8<-- [end:gelu]
+
+    def __init__(self):
+        super().__init__()
+        if current_platform.get_cpu_architecture() == CpuArchEnum.ARM and hasattr(
+            torch.ops._C, "activation_lut_bf16"
+        ):
+            self.op = torch.ops._C.activation_lut_bf16
+        else:
+            self.op = None
+
+    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+        return F.gelu(x, approximate="none")
+
+    def forward_cpu(self, x: torch.Tensor) -> torch.Tensor:
+        if self.op and x.dtype == torch.bfloat16 and x.is_contiguous():
+            out = torch.empty_like(x)
+            self.op(out, x, "gelu")
+            return out
+        return self.forward_native(x)
+
+    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+        return self.forward_native(x)
+
+
 # --8<-- [start:gelu_and_mul]
 @CustomOp.register("gelu_and_mul")
 class GeluAndMul(CustomOp):
@@ -635,22 +705,14 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor):
 
 _ACTIVATION_REGISTRY = LazyDict(
     {
-        "gelu": lambda: nn.GELU(),
+        "gelu": lambda: GELU(),
         "gelu_fast": lambda: FastGELU(),
         "gelu_new": lambda: NewGELU(),
-        "gelu_pytorch_tanh": lambda: (
-            # TODO:[ROCm] PyTorch native GELU with tanh is unstable with torch.compile
-            logger.warning_once(
-                "[ROCm] PyTorch's native GELU with tanh approximation is unstable. "
-                "Falling back to GELU(approximate='none')."
-            ),
-            nn.GELU(approximate="none"),
-        )[1]
-        if current_platform.is_rocm()
-        else nn.GELU(approximate="tanh"),
+        "gelu_pytorch_tanh": lambda: _get_gelu_pytorch_tanh(),
         "relu": lambda: nn.ReLU(),
         "relu2": lambda: ReLUSquaredActivation(),
         "silu": lambda: nn.SiLU(),
+        "swish": lambda: nn.SiLU(),
         "quick_gelu": lambda: QuickGELU(),
         "tanh": lambda: nn.Tanh(),
         "sigmoid": lambda: nn.Sigmoid(),
@@ -659,6 +721,18 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor):
 )
 
 
+def _get_gelu_pytorch_tanh() -> nn.Module:
+    """Get PyTorch GELU with tanh approximation, with ROCm fallback."""
+    if current_platform.is_rocm():
+        # TODO:[ROCm] PyTorch native GELU with tanh is unstable with torch.compile
+        logger.warning_once(
+            "[ROCm] PyTorch's native GELU with tanh approximation is unstable. "
+            "Falling back to GELU(approximate='none')."
+        )
+        return nn.GELU(approximate="none")
+    return nn.GELU(approximate="tanh")
+
+
 def get_act_fn(act_fn_name: str) -> nn.Module:
     """Get an activation function by name."""
     act_fn_name = act_fn_name.lower()
@@ -675,12 +749,14 @@ def get_act_fn(act_fn_name: str) -> nn.Module:
     return _ACTIVATION_REGISTRY[act_fn_name]
 
 
-_ACTIVATION_AND_MUL_REGISTRY = LazyDict(
+_ACTIVATION_AND_MUL_REGISTRY: LazyDict[nn.Module] = LazyDict(
     {
         "gelu": lambda: GeluAndMul(),
+        "gelu_pytorch_tanh": lambda: GeluAndMul(approximate="tanh"),
         "silu": lambda: SiluAndMul(),
+        "swish": lambda: SiluAndMul(),
         "geglu": lambda: GeluAndMul(),
-        "swigluoai": lambda *args, **kwargs: SwigluOAIAndMul(*args, **kwargs),
+        "swigluoai": lambda: SwigluOAIAndMul(),
     }
 )
 
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
index 7610030f3ed0..2e17a55ce7c4 100644
--- a/vllm/model_executor/layers/attention/attention.py
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -25,11 +25,15 @@
 from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 from vllm.platforms import current_platform
 from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
     direct_register_custom_op,
     kv_cache_dtype_str_to_dtype,
 )
 from vllm.v1.attention.backend import (
     AttentionBackend,
+    AttentionMetadata,
     AttentionType,
 )
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
@@ -38,6 +42,7 @@
     FullAttentionSpec,
     KVCacheSpec,
     SlidingWindowSpec,
+    get_kv_quant_mode,
 )
 
 if TYPE_CHECKING:
@@ -130,9 +135,6 @@ def _init_kv_cache_quant(
         quant_config: Optional quantization configuration.
         prefix: Layer name prefix for quantization method lookup.
     """
-    quant_method = (
-        quant_config.get_quant_method(layer, prefix=prefix) if quant_config else None
-    )
 
     # Note [Register q/k/v/prob scales in state dict]
     # When calling model.to(device), only parameters/buffers in state dict are
@@ -208,6 +210,7 @@ def __init__(
         `self.kv_cache`.
         """
         super().__init__()
+        sliding_window: int | None
         if per_layer_sliding_window is not None:
             # per-layer sliding window
             sliding_window = per_layer_sliding_window
@@ -225,9 +228,14 @@ def __init__(
             kv_cache_dtype = "auto"
             calculate_kv_scales = False
 
-        # llm-compressor mdls need to set cache_dtype to "fp8" manually.
+        # llm-compressor models declare an FP8 KV-cache scheme in their
+        # checkpoint config. Honor it only when the user did not explicitly
+        # pick a kv_cache_dtype; an explicit choice (e.g. bfloat16) must win.
+        # The "auto" case is normally resolved upstream in
+        # resolve_kv_cache_dtype_string, but we re-apply here defensively in
+        # case anything bypassed that path.
         kv_cache_scheme = getattr(quant_config, "kv_cache_scheme", None)
-        if kv_cache_scheme is not None:
+        if kv_cache_scheme is not None and kv_cache_dtype == "auto":
             kv_cache_dtype = "fp8"
             calculate_kv_scales = False
             if cache_config is not None:
@@ -258,7 +266,7 @@ def __init__(
             if skip:
                 kv_cache_dtype = "auto"
                 calculate_kv_scales = False
-            logger.info(
+            logger.debug(
                 "Layer %s: kv_cache_dtype=%s, sliding_window=%s",
                 prefix,
                 kv_cache_dtype,
@@ -329,12 +337,40 @@ def __init__(
             logger.warning_once(
                 "Disabling prefix caching for FLASHINFER/TRITON_MLA "
                 "with batch invariance, as it is not yet supported.",
-                scope="local",
             )
             cache_config.enable_prefix_caching = False
 
+        if extra_impl_args.get("chunk_lookback", -1) > -1:
+            assert self.attn_backend.get_name() == "TRITON_ATTN", (
+                f"Chunked attention with lookback requires the Triton backend, "
+                f"but got {self.attn_backend.get_name()}."
+            )
+
+        if self.attn_backend.get_name() == "FLEX_ATTENTION":
+            block_m = vllm_config.attention_config.flex_attn_block_m
+            block_n = vllm_config.attention_config.flex_attn_block_n
+
+            if envs.VLLM_BATCH_INVARIANT and cache_config is not None:
+                if block_m is not None and block_m > cache_config.block_size:
+                    raise ValueError(
+                        f"flex_attn_block_m ({block_m}) must be "
+                        f"<= cache block size ({cache_config.block_size}) for "
+                        f"batch invariance"
+                    )
+                if block_n is not None and block_n > cache_config.block_size:
+                    raise ValueError(
+                        f"flex_attn_block_n ({block_n}) must be "
+                        f"<= cache block size ({cache_config.block_size}) for "
+                        f"batch invariance"
+                    )
+
+            if block_m is not None:
+                extra_impl_args.setdefault("block_m", block_m)
+            if block_n is not None:
+                extra_impl_args.setdefault("block_n", block_n)
+
         impl_cls = self.attn_backend.get_impl_cls()
-        self.impl = impl_cls(
+        self.impl = impl_cls(  # type: ignore[assignment]  # impl_cls always returns an AttentionImpl subclass
             num_heads,
             head_size,
             scale,
@@ -356,7 +392,6 @@ def __init__(
         # and let torch.compile handle them.
         self.use_direct_call = not current_platform.opaque_attention_op()
 
-        self.use_output = self.attn_backend.accept_output_buffer
         compilation_config = vllm_config.compilation_config
         if prefix in compilation_config.static_forward_context:
             raise ValueError(f"Duplicate layer name: {prefix}")
@@ -381,8 +416,12 @@ def __init__(
 
         # for attn backends supporting query quantization
         self.query_quant = None
-        if self.impl.supports_quant_query_input and self.kv_cache_dtype.startswith(
-            "fp8"
+        if (
+            self.impl.supports_quant_query_input
+            and (
+                self.kv_cache_dtype.startswith("fp8") or self.kv_cache_dtype == "nvfp4"
+            )
+            and not self.kv_cache_dtype.endswith("per_token_head")
         ):
             is_per_head = (
                 hasattr(self, "q_scale") and self.q_scale.numel() == self.num_kv_heads
@@ -415,7 +454,9 @@ def forward(
         `vllm.forward_context.get_forward_context().attn_metadata`.
         """
         if self.calculate_kv_scales:
-            torch.ops.vllm.maybe_calc_kv_scales(query, key, value, self.layer_name)
+            torch.ops.vllm.maybe_calc_kv_scales(
+                query, key, value, _encode_layer_name(self.layer_name)
+            )
         output_dtype = query.dtype
         if self.query_quant is not None:
             # quantizing with a simple torch operation enables
@@ -423,81 +464,69 @@ def forward(
             # which reduces overheads during decoding.
             # Otherwise queries are quantized using custom ops
             # which causes decoding overheads
-            assert self.kv_cache_dtype in {"fp8", "fp8_e4m3"}
+            assert self.kv_cache_dtype in {"fp8", "fp8_e4m3", "nvfp4"}
 
             # check if query quantization is supported
             if self.impl.supports_quant_query_input:
                 query, _ = self.query_quant(query, self._q_scale)
 
-        if self.use_output:
-            if output_shape is None:
-                # Handle both 2D [num_tokens, hidden] and
-                # 3D [num_tokens, heads, head_dim] query
-                num_tokens = query.shape[0]
-                output_shape = torch.Size(
-                    (num_tokens, self.num_heads * self.head_size_v)
-                )
-            output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
-            hidden_size = output_shape[-1]
-            # Reshape the query, key, and value tensors.
-            # NOTE(woosuk): We do this outside the custom op to minimize the
-            # CPU overheads from the non-CUDA-graph regions.
-            query = query.view(-1, self.num_heads, self.head_size)
-            output = output.view(-1, self.num_heads, self.head_size_v)
-            if key is not None:
-                key = key.view(-1, self.num_kv_heads, self.head_size)
-            if value is not None:
-                value = value.view(-1, self.num_kv_heads, self.head_size_v)
-            kv_cache_dummy_dep = None
-            if self.use_direct_call:
-                # Skip this if sharing KV cache with an earlier attention layer.
-                if (
-                    not self.attn_backend.forward_includes_kv_cache_update
-                    and self.kv_sharing_target_layer_name is None
-                    and key is not None
-                    and value is not None
-                ):
-                    kv_cache_dummy_dep = unified_kv_cache_update(
-                        key, value, self.layer_name
-                    )
-                unified_attention_with_output(
-                    query,
-                    key,
-                    value,
-                    output,
-                    self.layer_name,
-                    kv_cache_dummy_dep=kv_cache_dummy_dep,
-                )
-            else:
-                # Skip this if sharing KV cache with an earlier attention layer.
-                if (
-                    not self.attn_backend.forward_includes_kv_cache_update
-                    and self.kv_sharing_target_layer_name is None
-                    and key is not None
-                    and value is not None
-                ):
-                    kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
-                        key, value, self.layer_name
-                    )
-                torch.ops.vllm.unified_attention_with_output(
-                    query,
-                    key,
-                    value,
-                    output,
-                    self.layer_name,
-                    kv_cache_dummy_dep=kv_cache_dummy_dep,
+        if output_shape is None:
+            # Handle both 2D [num_tokens, hidden] and
+            # 3D [num_tokens, heads, head_dim] query
+            num_tokens = query.shape[0]
+            output_shape = torch.Size((num_tokens, self.num_heads * self.head_size_v))
+        output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
+        hidden_size = output_shape[-1]
+        # Reshape the query, key, and value tensors.
+        # NOTE(woosuk): We do this outside the custom op to minimize the
+        # CPU overheads from the non-CUDA-graph regions.
+        query = query.view(-1, self.num_heads, self.head_size)
+        output = output.view(-1, self.num_heads, self.head_size_v)
+        if key is not None:
+            key = key.view(-1, self.num_kv_heads, self.head_size)
+        if value is not None:
+            value = value.view(-1, self.num_kv_heads, self.head_size_v)
+        kv_cache_dummy_dep = None
+        if self.use_direct_call:
+            # Skip this if sharing KV cache with an earlier attention layer.
+            if (
+                not self.attn_backend.forward_includes_kv_cache_update
+                and self.kv_sharing_target_layer_name is None
+                and key is not None
+                and value is not None
+            ):
+                kv_cache_dummy_dep = unified_kv_cache_update(
+                    key, value, self.layer_name
                 )
-            return output.view(-1, hidden_size)
-        else:
-            assert self.attn_backend.forward_includes_kv_cache_update, (
-                "Split KV cache update not supported when output tensor not provided."
+            unified_attention_with_output(
+                query,
+                key,
+                value,
+                output,
+                self.layer_name,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
             )
-            if self.use_direct_call:
-                return unified_attention(query, key, value, self.layer_name)
-            else:
-                return torch.ops.vllm.unified_attention(
-                    query, key, value, self.layer_name
+        else:
+            # Skip this if sharing KV cache with an earlier attention layer.
+            encoded = _encode_layer_name(self.layer_name)
+            if (
+                not self.attn_backend.forward_includes_kv_cache_update
+                and self.kv_sharing_target_layer_name is None
+                and key is not None
+                and value is not None
+            ):
+                kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
+                    key, value, encoded
                 )
+            torch.ops.vllm.unified_attention_with_output(
+                query,
+                key,
+                value,
+                output,
+                encoded,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+        return output.view(-1, hidden_size)
 
     def calc_kv_scales(self, query, key, value):
         self._q_scale.copy_(torch.abs(query).max() / self.q_range)
@@ -534,11 +563,12 @@ def process_weights_after_loading(self, act_dtype: torch.dtype):
     def get_attn_backend(self) -> type[AttentionBackend]:
         return self.attn_backend
 
-    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
         # Block size may get updated after model loading, refresh it
         block_size = vllm_config.cache_config.block_size
         # Should not be called for enc-dec or encoder-only attention.
         assert self.attn_type == AttentionType.DECODER
+        quant_mode = get_kv_quant_mode(self.kv_cache_dtype)
         if self.sliding_window is not None:
             assert not vllm_config.model_config.use_mla, (
                 "MLA is not supported for slidingwindow"
@@ -547,9 +577,28 @@ def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
                 block_size=block_size,
                 num_kv_heads=self.num_kv_heads,
                 head_size=self.head_size,
+                head_size_v=self.head_size_v,
                 dtype=self.kv_cache_torch_dtype,
+                kv_quant_mode=quant_mode,
                 sliding_window=self.sliding_window,
             )
+        elif self.kv_cache_dtype.startswith("turboquant_"):
+            from vllm.model_executor.layers.quantization.turboquant.config import (
+                TurboQuantConfig,
+            )
+            from vllm.v1.kv_cache_interface import TQFullAttentionSpec
+
+            tq_config = TurboQuantConfig.from_cache_dtype(
+                self.kv_cache_dtype, self.head_size
+            )
+            return TQFullAttentionSpec(
+                block_size=block_size,
+                num_kv_heads=self.num_kv_heads,
+                head_size=self.head_size,
+                head_size_v=self.head_size,
+                dtype=self.kv_cache_torch_dtype,
+                tq_slot_size=tq_config.slot_size_aligned,
+            )
         else:
             return FullAttentionSpec(
                 block_size=block_size,
@@ -557,6 +606,7 @@ def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
                 head_size=self.head_size,
                 head_size_v=self.head_size_v,
                 dtype=self.kv_cache_torch_dtype,
+                kv_quant_mode=quant_mode,
             )
 
 
@@ -564,8 +614,9 @@ def maybe_calc_kv_scales(
     query: torch.Tensor,
     key: torch.Tensor,
     value: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
+    layer_name = _resolve_layer_name(layer_name)
     forward_context: ForwardContext = get_forward_context()
     self = forward_context.no_compile_layers[layer_name]
 
@@ -581,7 +632,7 @@ def maybe_calc_kv_scales_fake(
     query: torch.Tensor,
     key: torch.Tensor,
     value: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
     return
 
@@ -617,9 +668,16 @@ def get_attention_context(
         extracted from the forward context.
     """
     forward_context: ForwardContext = get_forward_context()
-    attn_metadata = forward_context.attn_metadata
-    if isinstance(attn_metadata, dict):
-        attn_metadata = attn_metadata[layer_name]
+    attn_metadata_raw = forward_context.attn_metadata
+    attn_metadata: AttentionMetadata
+    if isinstance(attn_metadata_raw, dict):
+        attn_metadata = attn_metadata_raw[layer_name]
+    elif isinstance(attn_metadata_raw, list):
+        # list[dict[str, AttentionMetadata]]: used in speculative decoding
+        # where [0] is the base-model (non-speculative) metadata dict.
+        attn_metadata = attn_metadata_raw[0][layer_name]
+    else:
+        attn_metadata = attn_metadata_raw
     attn_layer: Attention | MLAAttention = forward_context.no_compile_layers[layer_name]
     kv_cache = attn_layer.kv_cache
     slot_mapping = forward_context.slot_mapping
@@ -630,50 +688,22 @@ def get_attention_context(
     return attn_metadata, attn_layer, kv_cache, layer_slot_mapping
 
 
-@maybe_transfer_kv_layer
-def unified_attention(
-    query: torch.Tensor,
-    key: torch.Tensor,
-    value: torch.Tensor,
-    layer_name: str,
-) -> torch.Tensor:
-    attn_metadata, self, kv_cache, _ = get_attention_context(layer_name)
-    output = self.impl.forward(self, query, key, value, kv_cache, attn_metadata)
-
-    return output
-
-
-def unified_attention_fake(
-    query: torch.Tensor,
-    key: torch.Tensor,
-    value: torch.Tensor,
-    layer_name: str,
-) -> torch.Tensor:
-    return torch.empty_like(query).contiguous()
-
-
-direct_register_custom_op(
-    op_name="unified_attention",
-    op_func=unified_attention,
-    fake_impl=unified_attention_fake,
-)
-
-
 def unified_kv_cache_update(
     key: torch.Tensor,
     value: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> torch.Tensor:
     """
     Returns a dummy that is passed to unified_attention to signal a side effect and
     the data dependency between them to ensure torch.compile preserves ordering.
     """
+    layer_name = _resolve_layer_name(layer_name)
     _, attn_layer, kv_cache, layer_slot_mapping = get_attention_context(layer_name)
     if layer_slot_mapping is not None:
         assert hasattr(attn_layer.impl, "do_kv_cache_update"), (
             f"{attn_layer.impl.__class__.__name__} does not support kv cache update"
         )
-        attn_layer.impl.do_kv_cache_update(
+        attn_layer.impl.do_kv_cache_update(  # type: ignore[attr-defined]
             attn_layer,
             key,
             value,
@@ -687,7 +717,7 @@ def unified_kv_cache_update(
 def unified_kv_cache_update_fake(
     key: torch.Tensor,
     value: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> torch.Tensor:
     return torch.empty(0, device=key.device, dtype=key.dtype)
 
@@ -706,7 +736,7 @@ def unified_attention_with_output(
     key: torch.Tensor,
     value: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     output_scale: torch.Tensor | None = None,
     output_block_scale: torch.Tensor | None = None,
     kv_cache_dummy_dep: torch.Tensor | None = None,
@@ -715,6 +745,7 @@ def unified_attention_with_output(
     # that ensures torch.compile preserves ordering between KV cache update and
     # attention forward.
     del kv_cache_dummy_dep
+    layer_name = _resolve_layer_name(layer_name)
     attn_metadata, self, kv_cache, _ = get_attention_context(layer_name)
 
     self.impl.forward(
@@ -735,7 +766,7 @@ def unified_attention_with_output_fake(
     key: torch.Tensor,
     value: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     output_scale: torch.Tensor | None = None,
     output_block_scale: torch.Tensor | None = None,
     kv_cache_dummy_dep: torch.Tensor | None = None,
diff --git a/vllm/model_executor/layers/attention/chunked_local_attention.py b/vllm/model_executor/layers/attention/chunked_local_attention.py
index b747304acd0b..cb595438adef 100644
--- a/vllm/model_executor/layers/attention/chunked_local_attention.py
+++ b/vllm/model_executor/layers/attention/chunked_local_attention.py
@@ -23,12 +23,13 @@
     AttentionSpec,
     ChunkedLocalAttentionSpec,
     KVCacheSpec,
+    get_kv_quant_mode,
 )
 
 
 @functools.lru_cache
 def create_chunked_local_attention_backend(
-    underlying_attn_backend: AttentionBackend,
+    underlying_attn_backend: type[AttentionBackend],
     attention_chunk_size: int,
 ) -> type[AttentionBackend]:
     prefix = f"ChunkedLocalAttention_{attention_chunk_size}_"
@@ -123,5 +124,6 @@ def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
             num_kv_heads=self.num_kv_heads,
             head_size=self.head_size,
             dtype=self.kv_cache_torch_dtype,
+            kv_quant_mode=get_kv_quant_mode(self.kv_cache_dtype),
             attention_chunk_size=self.attention_chunk_size,
         )
diff --git a/vllm/model_executor/layers/attention/cross_attention.py b/vllm/model_executor/layers/attention/cross_attention.py
index 5bd8e163f4aa..091f0a1856d4 100644
--- a/vllm/model_executor/layers/attention/cross_attention.py
+++ b/vllm/model_executor/layers/attention/cross_attention.py
@@ -18,7 +18,11 @@
     subclass_attention_backend_with_overrides,
 )
 from vllm.v1.attention.selector import get_attn_backend
-from vllm.v1.kv_cache_interface import CrossAttentionSpec, KVCacheSpec
+from vllm.v1.kv_cache_interface import (
+    CrossAttentionSpec,
+    KVCacheSpec,
+    get_kv_quant_mode,
+)
 
 logger = init_logger(__name__)
 
@@ -68,7 +72,7 @@ def _get_cross_slot_mapping(
 
 @functools.lru_cache
 def create_cross_attention_backend(
-    underlying_attn_backend: AttentionBackend,
+    underlying_attn_backend: type[AttentionBackend],
 ) -> type[AttentionBackend]:
     prefix = "CrossAttention_"
     underlying_builder = underlying_attn_backend.get_builder_cls()
@@ -83,17 +87,26 @@ def build(
         ) -> AttentionMetadata:
             new_metadata = copy(common_attn_metadata)
             new_metadata.causal = False
+            assert new_metadata.encoder_seq_lens_cpu is not None
             max_encoder_len = int(new_metadata.encoder_seq_lens_cpu.max())
             new_metadata.max_seq_len = max_encoder_len
-            # Any computed tokens indicated decode step>1 (no chunked prefill)
-            num_cache_decodes = (
-                (common_attn_metadata.num_computed_tokens_cpu > 0).sum().item()
+            # Any computed tokens indicates decode step>1 (no chunked prefill).
+            # The upper bound is exact for this `> 0` test - prefill rows have
+            # num_computed == 0 and decode rows have num_computed > 0.
+            query_lens_cpu = (
+                common_attn_metadata.query_start_loc_cpu[1:]
+                - common_attn_metadata.query_start_loc_cpu[:-1]
+            )
+            assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+            num_computed_tokens_cpu = (
+                common_attn_metadata.seq_lens_cpu_upper_bound - query_lens_cpu
             )
+            num_cache_decodes = (num_computed_tokens_cpu > 0).sum().item()
             if num_cache_decodes > 0:
                 # CrossAttn KV cache has already been populated on first decoder step,
                 # skip slot_mapping calculation for requests that do not need
                 # reshape_and_cache.
-                num_tokens = common_attn_metadata.num_computed_tokens_cpu.numpy()
+                num_tokens = num_computed_tokens_cpu.numpy()
                 new_metadata.encoder_seq_lens_cpu = np.where(
                     num_tokens > 0, 0, new_metadata.encoder_seq_lens_cpu
                 )
@@ -114,7 +127,7 @@ def build(
                 self.device,
             )
             attn_metadata = super().build(common_prefix_len, new_metadata, fast_build)
-            attn_metadata.slot_mapping = slot_mapping
+            attn_metadata.slot_mapping = slot_mapping  # type: ignore[attr-defined]
             return attn_metadata
 
     # NOTE(Lucas): we need a custom impl so we can use the slot-mapping computed by
@@ -129,7 +142,7 @@ def forward(
             value: torch.Tensor,
             kv_cache: torch.Tensor,
             attn_metadata: AttentionMetadata,
-            output: torch.Tensor | None = None,
+            output: torch.Tensor,
             output_scale: torch.Tensor | None = None,
             output_block_scale: torch.Tensor | None = None,
         ) -> torch.Tensor:
@@ -140,8 +153,12 @@ def forward(
                 and key is not None
                 and value is not None
             ):
-                self.do_kv_cache_update(
-                    layer, key, value, kv_cache, attn_metadata.slot_mapping
+                self.do_kv_cache_update(  # type: ignore[attr-defined]
+                    layer,
+                    key,
+                    value,
+                    kv_cache,
+                    attn_metadata.slot_mapping,  # type: ignore[attr-defined]
                 )
 
             return super().forward(
@@ -220,4 +237,5 @@ def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
             num_kv_heads=self.num_kv_heads,
             head_size=self.head_size,
             dtype=self.kv_cache_torch_dtype,
+            kv_quant_mode=get_kv_quant_mode(self.kv_cache_dtype),
         )
diff --git a/vllm/model_executor/layers/attention/encoder_only_attention.py b/vllm/model_executor/layers/attention/encoder_only_attention.py
index 0897ee45b84d..5805fe2ae1ca 100644
--- a/vllm/model_executor/layers/attention/encoder_only_attention.py
+++ b/vllm/model_executor/layers/attention/encoder_only_attention.py
@@ -21,7 +21,7 @@
 
 @functools.lru_cache
 def create_encoder_only_attention_backend(
-    underlying_attn_backend: AttentionBackend,
+    underlying_attn_backend: type[AttentionBackend],
 ) -> type[AttentionBackend]:
     prefix = "EncoderOnlyAttention_"
     underlying_builder = underlying_attn_backend.get_builder_cls()
@@ -93,6 +93,6 @@ def __init__(
             **kwargs,
         )
 
-    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
         # Does not need KV cache
         return None
diff --git a/vllm/model_executor/layers/attention/kv_transfer_utils.py b/vllm/model_executor/layers/attention/kv_transfer_utils.py
index 4afc5ccb1658..1dcd445b5eee 100644
--- a/vllm/model_executor/layers/attention/kv_transfer_utils.py
+++ b/vllm/model_executor/layers/attention/kv_transfer_utils.py
@@ -9,6 +9,7 @@
     has_kv_transfer_group,
     is_v1_kv_transfer_group,
 )
+from vllm.utils.torch_utils import _resolve_layer_name
 
 
 def maybe_transfer_kv_layer(func: Callable) -> Callable:
@@ -38,7 +39,7 @@ def wrapper(*args, **kwargs):
         if not has_kv_transfer_group() or not is_v1_kv_transfer_group():
             return func(*args, **kwargs)
 
-        layer_name: str = args[layer_name_index]
+        layer_name = _resolve_layer_name(args[layer_name_index])
 
         # Extract attention context (metadata, layer, kv_cache, layer_slot_mapping)
         attn_metadata, _, kv_cache, _ = get_attention_context(layer_name)
diff --git a/vllm/model_executor/layers/attention/mla_attention.py b/vllm/model_executor/layers/attention/mla_attention.py
index 0215ec1a0735..71fd297a7edd 100644
--- a/vllm/model_executor/layers/attention/mla_attention.py
+++ b/vllm/model_executor/layers/attention/mla_attention.py
@@ -189,12 +189,9 @@
 
 import functools
 from abc import abstractmethod
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from enum import Enum
-from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar, cast
-
-if TYPE_CHECKING:
-    from flashinfer import BatchPrefillWithRaggedKVCacheWrapper
+from typing import ClassVar, Generic, TypeVar, cast
 
 import torch
 import torch.nn as nn
@@ -203,6 +200,7 @@
 import vllm.envs as envs
 from vllm import _custom_ops as ops
 from vllm._aiter_ops import rocm_aiter_ops
+from vllm.compilation.breakable_cudagraph import eager_break_during_capture
 from vllm.config import (
     CacheConfig,
     ModelConfig,
@@ -234,13 +232,22 @@
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    QuantKey,
     get_and_maybe_dequant_weights,
+    kFp8Dynamic64Sym,
+    kFp8Dynamic128Sym,
+    kFp8StaticTensorSym,
+    kNvfp4Dynamic,
 )
 from vllm.platforms import current_platform
-from vllm.utils.flashinfer import has_flashinfer, has_nvidia_artifactory
+from vllm.utils.flashinfer import has_flashinfer
 from vllm.utils.math_utils import cdiv, round_down
 from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
     direct_register_custom_op,
+    is_quantized_kv_cache,
     kv_cache_dtype_str_to_dtype,
 )
 from vllm.v1.attention.backend import (
@@ -253,11 +260,12 @@
     MLAAttentionImpl,
     SparseMLAAttentionImpl,
 )
-from vllm.v1.attention.backends.fa_utils import get_flash_attn_version
+from vllm.v1.attention.backends.mla.prefill import (
+    MLAPrefillBackend,
+    get_mla_prefill_backend,
+)
 from vllm.v1.attention.backends.utils import (
     get_dcp_local_seq_lens,
-    get_per_layer_parameters,
-    infer_global_hyperparameters,
     split_decodes_and_prefills,
 )
 from vllm.v1.attention.ops.common import cp_lse_ag_out_rs
@@ -272,6 +280,44 @@
 
 logger = init_logger(__name__)
 
+_FP8_DTYPE = current_platform.fp8_dtype()
+
+
+def _detect_output_quant_key(
+    output: torch.Tensor,
+    output_scale: torch.Tensor | None,
+    output_block_scale: torch.Tensor | None,
+    output_dim: int,
+) -> QuantKey | None:
+    """Detect the output quantization key from fusion pass parameters.
+
+    Returns the appropriate QuantKey, or None if no quantization is needed.
+    Detection is based on output dtype and which scale tensors are present.
+    """
+    if output_scale is None and output_block_scale is None:
+        return None
+    if output_block_scale is not None:
+        if output.dtype == _FP8_DTYPE:
+            # Per-group FP8 uses block scales only, not a separate output_scale
+            assert output_scale is None
+            # Infer group size from scale shape
+            num_groups = output_block_scale.shape[-1]
+            group_size = output_dim // num_groups
+            if group_size == 128:
+                return kFp8Dynamic128Sym
+            elif group_size == 64:
+                return kFp8Dynamic64Sym
+            else:
+                raise ValueError(
+                    f"Unsupported group FP8 group_size={group_size} "
+                    f"(output_dim={output_dim}, num_groups={num_groups}). "
+                    f"Only group_size 128 and 64 are supported."
+                )
+        # output_scale None implies MXFP4, not supported
+        assert output_scale is not None
+        return kNvfp4Dynamic
+    return kFp8StaticTensorSym
+
 
 class MLAAttention(nn.Module, AttentionLayerBase):
     """Multi-Head Latent Attention layer.
@@ -300,6 +346,7 @@ def __init__(
         cache_config: CacheConfig | None = None,
         quant_config: QuantizationConfig | None = None,
         prefix: str = "",
+        attn_backend: type[AttentionBackend] | None = None,
         use_sparse: bool = False,
         indexer: object | None = None,
         **extra_impl_args,
@@ -329,20 +376,27 @@ def __init__(
         self.quant_config = quant_config
 
         dtype = torch.get_default_dtype()
-        self.attn_backend = get_attn_backend(
-            self.head_size,
-            dtype,
-            kv_cache_dtype,
-            use_mla=True,
-            use_sparse=use_sparse,
-            num_heads=self.num_heads,
-        )
+        if attn_backend is not None:
+            assert attn_backend.is_mla(), (
+                f"MLAAttention: attn_backend must be an MLA backend, "
+                f"got {attn_backend.get_name()} instead"
+            )
+            self.attn_backend = attn_backend
+        else:
+            self.attn_backend = get_attn_backend(
+                self.head_size,
+                dtype,
+                kv_cache_dtype,
+                use_mla=True,
+                use_sparse=use_sparse,
+                num_heads=self.num_heads,
+            )
 
         # FlashMLA Sparse Attention fp8 backend uses "fp8_ds_mla" kv-cache format
         # Automatically convert fp8 kv-cache format to "fp8_ds_mla"
         if (
             self.attn_backend.get_name() == "FLASHMLA_SPARSE"
-            and kv_cache_dtype.startswith("fp8")
+            and is_quantized_kv_cache(kv_cache_dtype)
             and kv_cache_dtype != "fp8_ds_mla"
         ):
             assert cache_config is not None
@@ -356,7 +410,7 @@ def __init__(
 
         if (
             self.attn_backend.get_name() == "FLASHINFER_MLA_SPARSE"
-            and kv_cache_dtype.startswith("fp8")
+            and is_quantized_kv_cache(kv_cache_dtype)
         ):
             logger.info_once(
                 "Using standard fp8 KV cache format. To use DeepSeek's fp8_ds_mla "
@@ -380,12 +434,11 @@ def __init__(
             logger.warning_once(
                 "Disabling prefix caching for TRITON_MLA / FLASHINFER "
                 "with batch invariance, as it is not yet supported.",
-                scope="local",
             )
             cache_config.enable_prefix_caching = False
 
         impl_cls = cast(type[MLAAttentionImpl], self.attn_backend.get_impl_cls())
-        self.impl = impl_cls(
+        self.impl = impl_cls(  # type: ignore[assignment]  # impl_cls always returns an MLAAttentionImpl subclass
             num_heads=self.num_heads,
             head_size=self.head_size,
             scale=self.scale,
@@ -410,20 +463,32 @@ def __init__(
         self.q_pad_num_heads = getattr(self.impl, "q_pad_num_heads", None)
         self.use_direct_call = not current_platform.opaque_attention_op()
 
-        compilation_config = get_current_vllm_config().compilation_config
+        vllm_config = get_current_vllm_config()
+        compilation_config = vllm_config.compilation_config
         if prefix in compilation_config.static_forward_context:
             raise ValueError(f"Duplicate layer name: {prefix}")
         compilation_config.static_forward_context[prefix] = self
 
+        prefill_backend_cls = get_mla_prefill_backend(vllm_config)
+        self.prefill_backend = prefill_backend_cls(
+            num_heads=self.num_heads,
+            scale=self.scale,
+            kv_lora_rank=self.kv_lora_rank,
+            qk_nope_head_dim=self.qk_nope_head_dim,
+            qk_rope_head_dim=self.qk_rope_head_dim,
+            v_head_dim=self.v_head_dim,
+            vllm_config=vllm_config,
+        )
+
         self.kv_cache = torch.tensor([])
 
         self.use_sparse = use_sparse
 
-        vllm_config = get_current_vllm_config_or_none()
+        _vllm_config = get_current_vllm_config_or_none()
         self.dcp_a2a = (
-            vllm_config is not None
-            and vllm_config.parallel_config.decode_context_parallel_size > 1
-            and vllm_config.parallel_config.dcp_comm_backend == "a2a"
+            _vllm_config is not None
+            and _vllm_config.parallel_config.decode_context_parallel_size > 1
+            and _vllm_config.parallel_config.dcp_comm_backend == "a2a"
         )
 
         # Initialize q/k/v range constants.
@@ -448,6 +513,11 @@ def __init__(
             group_shape=GroupShape.PER_TENSOR,
             compile_native=True,
         )
+        self._quant_fp8_op = QuantFP8(
+            static=True,
+            group_shape=GroupShape.PER_TENSOR,
+            compile_native=True,
+        )
 
     @property
     def chunked_prefill_workspace_size(self) -> int:
@@ -467,20 +537,32 @@ def forward(
         output_shape: torch.Size | None = None,
     ) -> torch.Tensor:
         if self.calculate_kv_scales:
-            torch.ops.vllm.maybe_calc_kv_scales(q, kv_c_normed, k_pe, self.layer_name)
+            torch.ops.vllm.maybe_calc_kv_scales(
+                q,
+                kv_c_normed,
+                k_pe,
+                _encode_layer_name(self.layer_name),
+            )
 
         if self.use_direct_call:
             forward_context: ForwardContext = get_forward_context()
-            attn_metadata = forward_context.attn_metadata
-            if isinstance(attn_metadata, dict):
-                attn_metadata = attn_metadata[self.layer_name]
+            attn_metadata_raw = forward_context.attn_metadata
+            attn_metadata: MLACommonMetadata
+            if isinstance(attn_metadata_raw, dict):
+                attn_metadata = attn_metadata_raw[self.layer_name]  # type: ignore[assignment]
+            elif isinstance(attn_metadata_raw, list):
+                # list[dict[str, AttentionMetadata]]: used in speculative decoding
+                # where [0] is the base-model (non-speculative) metadata dict.
+                attn_metadata = attn_metadata_raw[0][self.layer_name]  # type: ignore[assignment]
+            else:
+                attn_metadata = attn_metadata_raw
             self_kv_cache = self.kv_cache
             slot_mapping = forward_context.slot_mapping
 
             assert isinstance(slot_mapping, dict), (
                 f"Expected slot_mapping to be a dict, got {type(slot_mapping)}. "
             )
-            self.impl.do_kv_cache_update(
+            self.impl.do_kv_cache_update(  # type: ignore[attr-defined]
                 kv_c_normed,
                 k_pe,
                 self_kv_cache,
@@ -488,48 +570,35 @@ def forward(
                 self.kv_cache_dtype,
                 self._k_scale,
             )
-            if self.attn_backend.accept_output_buffer:
-                output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
-                self.forward_impl(
-                    q,
-                    kv_c_normed,
-                    k_pe,
-                    self_kv_cache,
-                    attn_metadata,
-                    output=output,
-                )
-                return output
-            else:
-                return self.forward_impl(
-                    q, kv_c_normed, k_pe, self_kv_cache, attn_metadata
-                )
+            output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
+            self.forward_impl(
+                q,
+                kv_c_normed,
+                k_pe,
+                self_kv_cache,
+                attn_metadata,
+                output=output,
+            )
+            return output
         else:
+            encoded = _encode_layer_name(self.layer_name)
             kv_cache_dummy_dep = torch.ops.vllm.unified_mla_kv_cache_update(
                 kv_c_normed,
                 k_pe,
-                self.layer_name,
+                encoded,
                 self.kv_cache_dtype,
                 self._k_scale,
             )
-            if self.attn_backend.accept_output_buffer:
-                output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
-                torch.ops.vllm.unified_mla_attention_with_output(
-                    q,
-                    kv_c_normed,
-                    k_pe,
-                    output,
-                    self.layer_name,
-                    kv_cache_dummy_dep=kv_cache_dummy_dep,
-                )
-                return output
-            else:
-                return torch.ops.vllm.unified_mla_attention(
-                    q,
-                    kv_c_normed,
-                    k_pe,
-                    self.layer_name,
-                    kv_cache_dummy_dep=kv_cache_dummy_dep,
-                )
+            output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
+            torch.ops.vllm.unified_mla_attention_with_output(
+                q,
+                kv_c_normed,
+                k_pe,
+                output,
+                encoded,
+                kv_cache_dummy_dep=kv_cache_dummy_dep,
+            )
+            return output
 
     def forward_impl(
         self,
@@ -538,15 +607,31 @@ def forward_impl(
         k_pe: torch.Tensor,  # value in unified attn
         kv_cache: torch.Tensor,
         attn_metadata: "MLACommonMetadata",
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
+        quant_group_size: int | None = None,
+        quant_scale_ue8m0: bool | None = None,
+        quant_col_major: bool | None = None,
+        quant_tma_aligned: bool | None = None,
     ) -> torch.Tensor:
         assert output is not None, "Output tensor must be provided."
 
-        if output_scale is not None or output_block_scale is not None:
-            raise NotImplementedError(
-                "fused output quantization is not yet supported for MLA"
+        quant_key = _detect_output_quant_key(
+            output, output_scale, output_block_scale, self.num_heads * self.v_head_dim
+        )
+        if quant_key is not None:
+            # The fusion pass has allocated output with quantized dtype
+            # (FP8 or uint8 for FP4). We can't write into it directly,
+            # so we swap in a temp buffer for computation, then quantize
+            # into the real output at the end.
+            # NOTE(carlyou): this is temporary until kernels support fp8 output
+            quant_output = output
+            output = torch.empty(
+                output.shape[0],
+                self.num_heads * self.v_head_dim,
+                dtype=q.dtype,
+                device=output.device,
             )
 
         if attn_metadata is None:
@@ -566,12 +651,14 @@ def forward_impl(
             # The zero fill is required when used with DP + EP
             # to ensure all ranks within a DP group compute the
             # same expert outputs.
+            if quant_key is not None:
+                return quant_output.fill_(0)
             return output.fill_(0)
 
         if self.impl.dcp_world_size == -1:
             self.impl.dcp_world_size = get_dcp_group().world_size
 
-        fp8_attention = self.kv_cache_dtype.startswith("fp8")
+        fp8_attention = is_quantized_kv_cache(self.kv_cache_dtype)
 
         num_actual_toks = attn_metadata.num_actual_tokens
 
@@ -601,7 +688,7 @@ def forward_impl(
             num_mha_tokens = q.size(0) - num_mqa_tokens
 
         if num_mha_tokens > 0:
-            self.impl.forward_mha(
+            self.impl.forward_mha(  # type: ignore[attr-defined]
                 q[num_mqa_tokens:],
                 k_c_normed[num_mqa_tokens:],
                 k_pe[num_mqa_tokens:],
@@ -684,7 +771,7 @@ def forward_impl(
             # call decode attn
             if not is_sparse_impl:
                 assert attn_metadata.decode is not None
-            attn_out, lse = self.impl.forward_mqa(mqa_q, kv_cache, attn_metadata, self)
+            attn_out, lse = self.impl.forward_mqa(mqa_q, kv_cache, attn_metadata, self)  # type: ignore[attr-defined]
 
             # correct dcp attn_out with lse.
             if self.impl.dcp_world_size > 1:
@@ -693,18 +780,56 @@ def forward_impl(
                         attn_out,
                         lse,
                         get_dcp_group(),
-                        is_lse_base_on_e=not getattr(self, "_use_fi_prefill", False),
+                        is_lse_base_on_e=True,
                     )
                 else:
                     attn_out = cp_lse_ag_out_rs(
                         attn_out,
                         lse,
                         get_dcp_group(),
-                        is_lse_base_on_e=not getattr(self, "_use_fi_prefill", False),
+                        is_lse_base_on_e=True,
                     )
 
             # v_up projection
             self._v_up_proj(attn_out, out=mqa_output_slice)
+
+        if quant_key is not None:
+            # Quantize the BF16 computation result into the quantized output
+            actual = output[:num_actual_toks]
+            if quant_key == kNvfp4Dynamic:
+                # NVFP4: two FP4 values packed into one uint8
+                assert output_block_scale is not None
+                fp4_data, fp4_scales = ops.scaled_fp4_quant(actual, output_scale)
+                quant_output[:num_actual_toks].copy_(fp4_data)
+                output_block_scale[: fp4_scales.shape[0]].copy_(fp4_scales)
+            elif quant_key in (kFp8Dynamic128Sym, kFp8Dynamic64Sym):
+                # Per-group FP8
+                assert output_block_scale is not None
+                assert quant_group_size is not None, (
+                    "Group FP8 output quant requested but "
+                    "quant_group_size not passed through custom op"
+                )
+                finfo = torch.finfo(_FP8_DTYPE)
+                torch.ops._C.per_token_group_fp8_quant(
+                    actual,
+                    quant_output[:num_actual_toks],
+                    output_block_scale[:num_actual_toks],
+                    quant_group_size,
+                    1e-10,  # eps
+                    finfo.min,
+                    finfo.max,
+                    quant_scale_ue8m0,
+                    quant_col_major,
+                    quant_tma_aligned,
+                )
+            elif quant_key == kFp8StaticTensorSym:
+                # Static FP8 quantization
+                fp8_data, _ = self._quant_fp8_op(actual, output_scale)
+                quant_output[:num_actual_toks].copy_(fp8_data)
+            else:
+                raise ValueError(f"Unsupported quant_key: {quant_key}")
+            return quant_output
+
         return output_padded
 
     def process_weights_after_loading(self, act_dtype: torch.dtype):
@@ -865,62 +990,14 @@ def _v_up_proj(self, x: torch.Tensor, out: torch.Tensor):
                 x, self.W_V, self.W_V_scale, group_size=128, transpose_bm=True, YQ=out
             )
         else:
-            # Convert from (B, N * V) to (N, B, V)
-            out = out.transpose(0, 1)
-
-            # Multiply (N, B, L) x (N, L, V) -> (N, B, V)
-            torch.bmm(x, self.W_UV, out=out)  # Reuse "out" to make it "hot"
-
-            # Convert from (N, B, V) to (B, N * V)
-            out_new = out.transpose(0, 1).reshape(-1, self.num_heads * self.v_head_dim)
-
-            # Adjust output buffer shape back to the original (B, N * V)
-            N, B, V = out.shape
-            out.resize_((B, N * V))
-            out.copy_(out_new)  # Copy result
-
-
-@maybe_transfer_kv_layer
-def unified_mla_attention(
-    q: torch.Tensor,
-    kv_c_normed: torch.Tensor,
-    k_pe: torch.Tensor,
-    layer_name: str,
-    kv_cache_dummy_dep: torch.Tensor | None = None,
-) -> torch.Tensor:
-    # kv_cache_dummy_dep is not used but accepting it creates a data dependency
-    # that ensures torch.compile preserves ordering between KV cache update and
-    # attention forward.
-    del kv_cache_dummy_dep
-    attn_metadata, layer, kv_cache, _ = get_attention_context(layer_name)
-    output = layer.forward_impl(q, kv_c_normed, k_pe, kv_cache, attn_metadata)
-
-    return output
-
-
-def unified_mla_attention_fake(
-    q: torch.Tensor,
-    kv_c_normed: torch.Tensor,
-    k_pe: torch.Tensor,
-    layer_name: str,
-    kv_cache_dummy_dep: torch.Tensor | None = None,
-) -> torch.Tensor:
-    return torch.empty_like(q).contiguous()
-
-
-direct_register_custom_op(
-    op_name="unified_mla_attention",
-    op_func=unified_mla_attention,
-    mutates_args=[],
-    fake_impl=unified_mla_attention_fake,
-    dispatch_key=current_platform.dispatch_key,
-)
+            # Multiply + Transpose (N, B, L) x (N, L, V)->(N, B, V)->(B, N, V)
+            torch.bmm(x, self.W_UV, out=out.transpose(0, 1))
 
 
 def unified_mla_kv_cache_update(
     kv_c_normed: torch.Tensor,
     k_pe: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     kv_cache_dtype: str,
     k_scale: torch.Tensor,
 ) -> torch.Tensor:
@@ -928,21 +1005,10 @@ def unified_mla_kv_cache_update(
     Returns a dummy that is passed to unified_attention to signal a side effect and
     the data dependency between them to ensure torch.compile preserves ordering.
     """
-    forward_context = get_forward_context()
-    if forward_context.attn_metadata is None:
-        # Dummy/profile forwards should not update live KV cache pages.
-        return torch.empty(0, device=kv_c_normed.device, dtype=kv_c_normed.dtype)
-
-    attn_layer = forward_context.no_compile_layers[layer_name]
-    kv_cache = attn_layer.kv_cache
-
-    slot_mapping = forward_context.slot_mapping
-    assert isinstance(slot_mapping, dict), (
-        f"Expected slot_mapping to be a dict, got {type(slot_mapping)}. "
-    )
-    layer_slot_mapping = slot_mapping.get(layer_name)
+    layer_name = _resolve_layer_name(layer_name)
+    _, attn_layer, kv_cache, layer_slot_mapping = get_attention_context(layer_name)
     if layer_slot_mapping is not None:
-        attn_layer.impl.do_kv_cache_update(
+        attn_layer.impl.do_kv_cache_update(  # type: ignore[attr-defined]
             kv_c_normed,
             k_pe,
             kv_cache,
@@ -957,7 +1023,7 @@ def unified_mla_kv_cache_update(
 def unified_mla_kv_cache_update_fake(
     kv_c_normed: torch.Tensor,
     k_pe: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     kv_cache_dtype: str,
     k_scale: torch.Tensor,
 ) -> torch.Tensor:
@@ -971,21 +1037,27 @@ def unified_mla_kv_cache_update_fake(
 )
 
 
+@eager_break_during_capture
 @maybe_transfer_kv_layer
 def unified_mla_attention_with_output(
     q: torch.Tensor,
     kv_c_normed: torch.Tensor,
     k_pe: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     output_scale: torch.Tensor | None = None,
     output_block_scale: torch.Tensor | None = None,
     kv_cache_dummy_dep: torch.Tensor | None = None,
+    quant_group_size: int | None = None,
+    quant_scale_ue8m0: bool | None = None,
+    quant_col_major: bool | None = None,
+    quant_tma_aligned: bool | None = None,
 ) -> None:
     # kv_cache_dummy_dep is not used but accepting it creates a data dependency
     # that ensures torch.compile preserves ordering between KV cache update and
     # attention forward.
     del kv_cache_dummy_dep
+    layer_name = _resolve_layer_name(layer_name)
     attn_metadata, layer, kv_cache, _ = get_attention_context(layer_name)
     layer.forward_impl(
         q,
@@ -996,6 +1068,10 @@ def unified_mla_attention_with_output(
         output=output,
         output_scale=output_scale,
         output_block_scale=output_block_scale,
+        quant_group_size=quant_group_size,
+        quant_scale_ue8m0=quant_scale_ue8m0,
+        quant_col_major=quant_col_major,
+        quant_tma_aligned=quant_tma_aligned,
     )
 
 
@@ -1004,10 +1080,14 @@ def unified_mla_attention_with_output_fake(
     kv_c_normed: torch.Tensor,
     k_pe: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
     output_scale: torch.Tensor | None = None,
     output_block_scale: torch.Tensor | None = None,
     kv_cache_dummy_dep: torch.Tensor | None = None,
+    quant_group_size: int | None = None,
+    quant_scale_ue8m0: bool | None = None,
+    quant_col_major: bool | None = None,
+    quant_tma_aligned: bool | None = None,
 ) -> None:
     return
 
@@ -1018,6 +1098,7 @@ def unified_mla_attention_with_output_fake(
     mutates_args=["output", "output_block_scale"],
     fake_impl=unified_mla_attention_with_output_fake,
     dispatch_key=current_platform.dispatch_key,
+    tags=(torch.Tag.flexible_layout,),
 )
 
 
@@ -1038,33 +1119,6 @@ class QueryLenSupport(Enum):
     VARLEN = "varlen"
 
 
-try:
-    from vllm.vllm_flash_attn import (  # type: ignore[attr-defined]
-        flash_attn_varlen_func,
-    )
-
-    is_vllm_fa = True
-except ImportError:
-    is_vllm_fa = False
-    flash_attn_varlen_func = None  # type: ignore[assignment]
-    # On ROCm, vllm_flash_attn is not available, try upstream flash_attn instead.
-    # On CUDA, vllm_flash_attn should always be available (built with vLLM),
-    # so we don't attempt the fallback there.
-    if current_platform.is_rocm():
-        try:
-            from flash_attn import flash_attn_varlen_func  # type: ignore[no-redef]
-        except ImportError:
-            logger.debug(
-                "flash_attn not available on ROCm; "
-                "MLA models using TRITON_MLA will require flash_attn. "
-                "AITER_MLA backends use aiter kernels instead."
-            )
-    elif current_platform.is_xpu():
-        from vllm._xpu_ops import xpu_ops as ops
-
-        flash_attn_varlen_func = ops.flash_attn_varlen_func  # type: ignore[no-redef]
-
-
 def dynamic_per_batched_tensor_quant(
     x: torch.Tensor, dtype: torch.dtype = torch.float8_e4m3fn
 ):
@@ -1076,9 +1130,6 @@ def dynamic_per_batched_tensor_quant(
     return x_scl_sat.to(dtype).contiguous(), scale.float().reciprocal()
 
 
-logger = init_logger(__name__)
-
-
 @CustomOp.register(
     "mla_decode_concat_quant_fp8",
     dynamic_arg_dims={"decode_ql_nope": 0, "decode_q_pe": 0},
@@ -1112,12 +1163,7 @@ def forward(
     forward_hip = _make_forward(QuantFP8.forward_hip)  # type: ignore[arg-type]
 
 
-CUDNN_WORKSPACE_SIZE = 12800
-
-
 class MLACommonBackend(AttentionBackend):
-    accept_output_buffer: bool = True
-
     @staticmethod
     def get_name() -> str:
         return "TRITON_MLA"
@@ -1179,31 +1225,15 @@ class ChunkedContextMetadata:
         padded_local_cu_seq_lens: torch.Tensor | None = None
         cu_seq_lens_lst: list[list[int]] | None = None
         chunk_size: int | None = None
+        prefill_tokens_with_context: int | None = None
 
     block_table: torch.Tensor
     query_start_loc: torch.Tensor
     max_query_len: int
     chunked_context: ChunkedContextMetadata | None = None
-    query_seq_lens: torch.Tensor | None = None
-    workspace_buffer: torch.Tensor | None = None
     q_data_type: torch.dtype | None = None
     output_dtype: torch.dtype | None = None
-
-
-@dataclass
-class FlashInferPrefillMetadata(MLACommonPrefillMetadata):
-    prefill_main: "BatchPrefillWithRaggedKVCacheWrapper | None" = None
-    prefill_chunks: "list[BatchPrefillWithRaggedKVCacheWrapper]" = field(
-        default_factory=list
-    )
-
-
-@dataclass
-class CudnnPrefillMetadata(MLACommonPrefillMetadata):
-    class ChunkedContextMetadata(MLACommonPrefillMetadata.ChunkedContextMetadata):
-        seq_lens: torch.Tensor
-
-    cudnn_workspace: torch.Tensor | None = None
+    prefill_backend: MLAPrefillBackend | None = None
 
 
 @dataclass
@@ -1249,13 +1279,8 @@ class MLACommonMetadata(AttentionMetadata, Generic[D]):
     # The dimension of the attention heads
     head_dim: int | None = None
 
+    prefill: MLACommonPrefillMetadata | None = None
     decode: D | None = None
-    prefill: (
-        MLACommonPrefillMetadata
-        | FlashInferPrefillMetadata
-        | CudnnPrefillMetadata
-        | None
-    ) = None
 
     def __post_init__(self):
         if self.head_dim is not None and not MLACommonBackend.supports_head_size(
@@ -1268,64 +1293,6 @@ def __post_init__(self):
 A = TypeVar("A", bound=AttentionMetadata)
 
 
-def is_deepseek_r1_mla_compatible(vllm_config: VllmConfig) -> bool:
-    # Check if model has DeepSeek R1 compatible MLA dimensions:
-    # qk_nope_head_dim = 128, qk_rope_head_dim = 64, v_head_dim = 128
-    # which results in query/key head dim = 192.
-    if vllm_config.model_config is None:
-        return False
-    hf_text_config = vllm_config.model_config.hf_text_config
-    qk_nope_head_dim = getattr(hf_text_config, "qk_nope_head_dim", 1)
-    qk_rope_head_dim = getattr(hf_text_config, "qk_rope_head_dim", 1)
-    v_head_dim = getattr(hf_text_config, "v_head_dim", 1)
-    return qk_nope_head_dim == 128 and qk_rope_head_dim == 64 and v_head_dim == 128
-
-
-@functools.cache
-def use_flashinfer_prefill() -> bool:
-    from vllm.config import get_current_vllm_config
-
-    vllm_config = get_current_vllm_config()
-    if not (
-        not vllm_config.attention_config.disable_flashinfer_prefill
-        and has_flashinfer()
-        and not vllm_config.attention_config.use_cudnn_prefill
-        and current_platform.is_device_capability_family(100)
-    ):
-        return False
-
-    return is_deepseek_r1_mla_compatible(vllm_config)
-
-
-@functools.cache
-def use_cudnn_prefill() -> bool:
-    from vllm.config import get_current_vllm_config
-
-    vllm_config = get_current_vllm_config()
-    return (
-        has_flashinfer()
-        and vllm_config.attention_config.use_cudnn_prefill
-        and current_platform.is_device_capability_family(100)
-        and has_nvidia_artifactory()
-    )
-
-
-@functools.cache
-def use_trtllm_ragged_deepseek_prefill() -> bool:
-    """Check if TRT-LLM ragged DeepSeek prefill should be used."""
-    from vllm.config import get_current_vllm_config
-
-    vllm_config = get_current_vllm_config()
-    if not (
-        has_flashinfer()
-        and vllm_config.attention_config.use_trtllm_ragged_deepseek_prefill
-        and current_platform.is_device_capability_family(100)
-    ):
-        return False
-
-    return is_deepseek_r1_mla_compatible(vllm_config)
-
-
 @dataclass
 class MLADims:
     q_lora_rank: int | None
@@ -1338,6 +1305,20 @@ class MLADims:
 def get_mla_dims(model_config: ModelConfig) -> MLADims:
     hf_text_config = model_config.hf_text_config
 
+    # Check if this is a DeepseekV4 config (uses unified head_dim + rope_head_dim)
+    if hasattr(hf_text_config, "compress_ratios"):
+        # DeepseekV4 style config: unified head_dim with rope_head_dim
+        head_dim = hf_text_config.head_dim
+        rope_head_dim = hf_text_config.qk_rope_head_dim
+        return MLADims(
+            q_lora_rank=hf_text_config.q_lora_rank,
+            kv_lora_rank=head_dim,
+            qk_nope_head_dim=head_dim - rope_head_dim,
+            qk_rope_head_dim=rope_head_dim,
+            v_head_dim=head_dim,
+        )
+
+    # DeepseekV2/V3 style config
     return MLADims(
         q_lora_rank=getattr(hf_text_config, "q_lora_rank", None),
         kv_lora_rank=hf_text_config.kv_lora_rank,
@@ -1349,15 +1330,14 @@ def get_mla_dims(model_config: ModelConfig) -> MLADims:
 
 @functools.cache
 def backend_supports_prefill_query_quantization() -> bool:
-    """Check if the selected MLA backend supports prefill query quantization.
+    """Check if the selected MLA prefill backend supports query quantization.
 
     Currently supported backends:
-    - FlashInfer prefill
-    - TRT-LLM ragged DeepSeek prefill
+    - FlashInfer
+    - TRT-LLM Ragged
 
     Not supported:
-    - cuDNN Prefill
-    - FlashAttention
+    - FlashAttention (FA3/FA4)
     - Non-GB200 devices (FP8 prefill requires device capability 100)
     """
     # FP8 prefill query quantization requires GB200 (device capability 100)
@@ -1365,7 +1345,16 @@ def backend_supports_prefill_query_quantization() -> bool:
     if not current_platform.is_device_capability_family(100):
         return False
 
-    return use_flashinfer_prefill() or use_trtllm_ragged_deepseek_prefill()
+    from vllm.config import get_current_vllm_config
+    from vllm.v1.attention.backends.mla.prefill import get_mla_prefill_backend
+
+    vllm_config = get_current_vllm_config()
+    backend_cls = get_mla_prefill_backend(vllm_config)
+    return backend_cls.get_name() in (
+        "FLASHINFER",
+        "TRTLLM_RAGGED",
+        "TOKENSPEED_MLA",
+    )
 
 
 class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
@@ -1431,16 +1420,14 @@ def determine_prefill_query_data_type(
         is enabled, else model dtype.
         """
         use_fp8 = (
-            vllm_config.cache_config.cache_dtype.startswith("fp8")
+            is_quantized_kv_cache(vllm_config.cache_config.cache_dtype)
             and vllm_config.attention_config.use_prefill_query_quantization
             and backend_supports_prefill_query_quantization()
         )
 
         if use_fp8:
             fp8_dtype = current_platform.fp8_dtype()
-            logger.info_once(
-                "FP8 prefill attention enabled: query data type is FP8", scope="local"
-            )
+            logger.info_once("FP8 prefill attention enabled: query data type is FP8")
             return fp8_dtype
         elif vllm_config.attention_config.use_prefill_query_quantization:
             logger.info_once(
@@ -1448,9 +1435,20 @@ def determine_prefill_query_data_type(
                 " use_prefill_query_quantization is enabled. Please"
                 " ensure that --kv-cache-dtype is set to fp8 and your prefill"
                 " backend is compatible with FP8 attention.",
-                scope="local",
             )
             return model_dtype
+        elif (
+            is_quantized_kv_cache(vllm_config.cache_config.cache_dtype)
+            and backend_supports_prefill_query_quantization()
+        ):
+            logger.warning_once(
+                "FP8 KV cache is enabled but prefill queries are not "
+                "quantized to FP8. For long-context workloads (ISL >= 4K), "
+                "enabling FP8 prefill attention can significantly optimize "
+                "prefill latency. To enable, add: "
+                '--attention-config \'{"use_prefill_query_quantization"'
+                ": true}'",
+            )
 
         return model_dtype
 
@@ -1467,7 +1465,6 @@ def __init__(
             metadata_cls if metadata_cls is not None else MLACommonMetadata
         )
         self.kv_cache_spec = kv_cache_spec
-        scheduler_config = vllm_config.scheduler_config
         self.model_config = vllm_config.model_config
         parallel_config = vllm_config.parallel_config
         self.compilation_config = vllm_config.compilation_config
@@ -1527,139 +1524,21 @@ def __init__(
                 device=device,
             )
 
-        self._use_cudnn_prefill = use_cudnn_prefill()
-        self._use_fi_prefill = use_flashinfer_prefill()
-        self._use_trtllm_ragged_prefill = use_trtllm_ragged_deepseek_prefill()
-        self.prefill_metadata_cls = (
-            FlashInferPrefillMetadata
-            if self._use_fi_prefill
-            else CudnnPrefillMetadata
-            if self._use_cudnn_prefill
-            else MLACommonPrefillMetadata
-        )
-
-        if self._use_fi_prefill:
-            self._workspace_buffer = torch.empty(
-                envs.VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE,
-                dtype=torch.uint8,
-                device=device,
-            )
-
-            self._fi_prefill_main: BatchPrefillWithRaggedKVCacheWrapper | None = None
-            self._fi_prefill_chunks: list[BatchPrefillWithRaggedKVCacheWrapper] = []
-
-            self._global_hyperparameters = infer_global_hyperparameters(
-                get_per_layer_parameters(vllm_config, layer_names, MLACommonImpl)  # type: ignore[type-abstract]
-            )
-
-        if self._use_trtllm_ragged_prefill:
-            self._workspace_buffer = torch.empty(
-                envs.VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE,
-                dtype=torch.uint8,
-                device=device,
-            )
-
-        if self._use_cudnn_prefill:
-            self.cudnn_workspace = torch.empty(
-                CUDNN_WORKSPACE_SIZE * scheduler_config.max_num_seqs,
-                dtype=torch.int8,
-                device=device,
-            )
+        self._prefill_backend = self.compilation_config.static_forward_context[
+            layer_names[0]
+        ].prefill_backend
 
         supports_spec_decode = self.query_len_support != QueryLenSupport.SINGLE_ONLY
         self._init_reorder_batch_threshold(
             self.reorder_batch_threshold, supports_spec_decode, supports_dcp_with_varlen
         )
 
-        # Validate consistency between query_len_support and reorder_batch_threshold
         if self.query_len_support == QueryLenSupport.SINGLE_ONLY:
             assert self.reorder_batch_threshold == 1, (
                 f"reorder_batch_threshold must be 1 when query_len_support is "
                 f"SINGLE_ONLY, got {self.reorder_batch_threshold}"
             )
 
-    def _build_fi_prefill_wrappers(self, prefill: FlashInferPrefillMetadata):
-        qo_indptr = prefill.query_start_loc
-
-        has_context = False
-        if prefill.chunked_context is not None:
-            chunked_context = prefill.chunked_context
-            has_context = True
-
-        if self._fi_prefill_main is None:
-            from flashinfer import BatchPrefillWithRaggedKVCacheWrapper
-
-            self._fi_prefill_main = BatchPrefillWithRaggedKVCacheWrapper(
-                self._workspace_buffer, "NHD", backend="cutlass"
-            )
-
-        if has_context:
-            num_chunks = chunked_context.cu_seq_lens.shape[0]
-            # Allocate more prefill chunk wrappers if needed
-            if len(self._fi_prefill_chunks) < num_chunks:
-                from flashinfer import BatchPrefillWithRaggedKVCacheWrapper
-
-                for _ in range(len(self._fi_prefill_chunks), num_chunks):
-                    self._fi_prefill_chunks.append(
-                        BatchPrefillWithRaggedKVCacheWrapper(
-                            self._workspace_buffer, "NHD", backend="cutlass"
-                        )
-                    )
-            assert num_chunks <= len(self._fi_prefill_chunks)
-
-        # In MLA, the non-latent num_qo_heads == num_kv_heads
-        num_qo_heads = self.num_heads
-        num_kv_heads = num_qo_heads
-
-        # Sanity: Verify that num_kv_heads == 1 since it is latent space
-        assert self.kv_cache_spec.num_kv_heads == 1
-
-        # Get non-latent head_dim_qk and head_dim_vo
-        head_dim_qk = self.mla_dims.qk_nope_head_dim + self.mla_dims.qk_rope_head_dim
-        head_dim_vo = self.mla_dims.v_head_dim
-
-        # For main run, qo_indptr == kv_indptr
-        kv_indptr = qo_indptr.clone()
-
-        # Prepare main prefill
-        self._fi_prefill_main.plan(
-            qo_indptr=qo_indptr,
-            kv_indptr=kv_indptr,
-            num_qo_heads=num_qo_heads,
-            num_kv_heads=num_kv_heads,
-            head_dim_qk=head_dim_qk,
-            head_dim_vo=head_dim_vo,
-            causal=True,  # This is main run
-            sm_scale=self._global_hyperparameters.sm_scale,
-            window_left=self._global_hyperparameters.window_left,
-            logits_soft_cap=self._global_hyperparameters.logits_soft_cap,
-            q_data_type=self.q_data_type,
-            o_data_type=prefill.output_dtype,
-        )
-
-        # Prepare context prefills
-        if has_context:
-            for i in range(num_chunks):
-                kv_indptr_chunk = chunked_context.cu_seq_lens[i]
-
-                self._fi_prefill_chunks[i].plan(
-                    qo_indptr=qo_indptr,
-                    kv_indptr=kv_indptr_chunk,
-                    num_qo_heads=num_qo_heads,
-                    num_kv_heads=num_kv_heads,
-                    head_dim_qk=head_dim_qk,
-                    head_dim_vo=head_dim_vo,
-                    causal=False,  # This is context run
-                    sm_scale=self._global_hyperparameters.sm_scale,
-                    window_left=self._global_hyperparameters.window_left,
-                    logits_soft_cap=self._global_hyperparameters.logits_soft_cap,
-                    q_data_type=self.q_data_type,
-                    o_data_type=prefill.output_dtype,
-                )
-
-        prefill.prefill_main = self._fi_prefill_main
-        prefill.prefill_chunks = self._fi_prefill_chunks
-
     def _build_decode(
         self,
         block_table_tensor: torch.Tensor,
@@ -1729,18 +1608,26 @@ def build(
 
         prefill_metadata = None
         if num_prefills > 0:
-            num_computed_tokens_cpu = (
-                common_attn_metadata.compute_num_computed_tokens().cpu()
-            )
-
             reqs_start = num_decodes  # prefill_start
 
-            context_lens_cpu = num_computed_tokens_cpu[reqs_start:num_reqs]
+            # Upper bound is exact for prefill rows (no D2H sync).
+            seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+            assert seq_lens_cpu is not None
+            prefill_query_lens_cpu = (
+                query_start_loc_cpu[reqs_start + 1 : num_reqs + 1]
+                - query_start_loc_cpu[reqs_start:num_reqs]
+            )
+            context_lens_cpu = (
+                seq_lens_cpu[reqs_start:num_reqs] - prefill_query_lens_cpu
+            )
             max_context_len_cpu = context_lens_cpu.max().item()
             num_prefills_with_context_cpu = (context_lens_cpu > 0).sum().item()
             prefill_query_start_loc = (
                 query_start_loc[reqs_start:] - query_start_loc[reqs_start]
             )
+            prefill_query_start_loc_cpu = (
+                query_start_loc_cpu[reqs_start:] - query_start_loc_cpu[reqs_start]
+            )
 
             chunked_context_metadata = None
             if max_context_len_cpu > 0:
@@ -1857,13 +1744,14 @@ def build(
                         dtype=torch.int32,
                     )
 
-                chunked_context_metadata_cls = (
-                    CudnnPrefillMetadata.ChunkedContextMetadata
-                    if self._use_cudnn_prefill
-                    else MLACommonPrefillMetadata.ChunkedContextMetadata
-                )
+                prefill_tokens_with_context = None
+                if num_prefills_with_context_cpu > 0:
+                    prefill_tokens_with_context = prefill_query_start_loc_cpu[
+                        num_prefills_with_context_cpu
+                    ].item()
+                _ChunkedMetadata = MLACommonPrefillMetadata.ChunkedContextMetadata
                 if self.dcp_world_size > 1:
-                    chunked_context_metadata = chunked_context_metadata_cls(
+                    chunked_context_metadata = _ChunkedMetadata(
                         cu_seq_lens=cu_seq_lens_cpu.to(device, non_blocking=True),
                         starts=local_chunk_starts.to(device, non_blocking=True),
                         seq_tot=padded_local_chunk_seq_lens.sum(dim=1).tolist(),
@@ -1881,9 +1769,10 @@ def build(
                         ),
                         cu_seq_lens_lst=cu_seq_lens_cpu.tolist(),
                         chunk_size=padded_local_max_context_chunk_across_ranks,
+                        prefill_tokens_with_context=prefill_tokens_with_context,
                     )
                 else:
-                    chunked_context_metadata = chunked_context_metadata_cls(
+                    chunked_context_metadata = _ChunkedMetadata(
                         cu_seq_lens=cu_seq_lens_cpu.to(device, non_blocking=True),
                         starts=chunk_starts.to(device, non_blocking=True),
                         seq_tot=chunk_seq_lens.sum(dim=1).tolist(),
@@ -1894,37 +1783,25 @@ def build(
                         ),
                         chunk_total_token=chunk_total_token,
                         workspace=self.chunked_prefill_workspace,
+                        prefill_tokens_with_context=prefill_tokens_with_context,
                     )
 
-                if self._use_cudnn_prefill:
-                    chunked_context_metadata.seq_lens = chunk_seq_lens
-
                 assert (
                     max(chunked_context_metadata.max_seq_lens)
                     <= self.chunked_prefill_workspace_size
                 )
 
-            prefill_metadata = self.prefill_metadata_cls(
+            prefill_metadata = MLACommonPrefillMetadata(
                 block_table=block_table_tensor[reqs_start:, ...],
                 query_start_loc=prefill_query_start_loc,
                 max_query_len=max_query_len,
                 chunked_context=chunked_context_metadata,
                 output_dtype=self.model_config.dtype,
                 q_data_type=self.q_data_type,
+                prefill_backend=self._prefill_backend,
             )
 
-            if self._use_cudnn_prefill:
-                assert isinstance(prefill_metadata, CudnnPrefillMetadata)
-                prefill_metadata.query_seq_lens = (
-                    prefill_query_start_loc[1:] - prefill_query_start_loc[:-1]
-                )
-                prefill_metadata.cudnn_workspace = self.cudnn_workspace
-
-            if self._use_trtllm_ragged_prefill:
-                prefill_metadata.query_seq_lens = (
-                    prefill_query_start_loc[1:] - prefill_query_start_loc[:-1]
-                )
-                prefill_metadata.workspace_buffer = self._workspace_buffer
+            self._prefill_backend.prepare_metadata(prefill_metadata)
 
         decode_metadata = None
         if num_decodes > 0:
@@ -1969,11 +1846,7 @@ def build(
             decode=decode_metadata,
         )
 
-        if self._use_fi_prefill and num_prefills > 0:
-            assert isinstance(attn_metadata.prefill, FlashInferPrefillMetadata)
-            self._build_fi_prefill_wrappers(attn_metadata.prefill)
-
-        return attn_metadata
+        return attn_metadata  # type: ignore[return-value]
 
 
 def reorg_kvcache(
@@ -2055,6 +1928,14 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
     understand this class
     """
 
+    def fused_output_quant_supported(self, quant_key):
+        return quant_key in (
+            kFp8StaticTensorSym,
+            kNvfp4Dynamic,
+            kFp8Dynamic128Sym,
+            kFp8Dynamic64Sym,
+        )
+
     def __init__(
         self,
         num_heads: int,
@@ -2075,6 +1956,7 @@ def __init__(
         qk_head_dim: int,
         v_head_dim: int,
         kv_b_proj: ColumnParallelLinear,
+        # DSV3.2 MLA Specific Arguments
         indexer: object | None = None,
         q_pad_num_heads: int | None = None,
     ) -> None:
@@ -2097,6 +1979,7 @@ def __init__(
         self.indexer = indexer
         self.q_pad_num_heads = q_pad_num_heads
         self.supports_quant_query_input = True
+        self.is_aiter_triton_fp8_bmm_enabled = rocm_aiter_ops.is_fp8bmm_enabled()
 
         # Use flashinfer's optimized concat_mla_k kernel when available.
         # The kernel is optimized for DeepSeek V3 dimensions:
@@ -2108,311 +1991,12 @@ def __init__(
             and (self.qk_rope_head_dim == 64)
         )
 
-        if use_trtllm_ragged_deepseek_prefill():
-            logger.info_once(
-                "Using TRT-LLM ragged DeepSeek prefill for MLA", scope="local"
-            )
-            self._run_prefill_context_chunk = (
-                self._run_prefill_context_chunk_trtllm_ragged
-            )
-            self._run_prefill_new_tokens = self._run_prefill_new_tokens_trtllm_ragged
-            self._pad_v = False
-        elif use_flashinfer_prefill():
-            logger.info_once("Using FlashInfer prefill for MLA", scope="local")
-            self._run_prefill_context_chunk = self._run_prefill_context_chunk_fi
-            self._run_prefill_new_tokens = self._run_prefill_new_tokens_fi
-            self._pad_v = False
-        elif use_cudnn_prefill():
-            logger.info_once("Using CUDNN prefill for MLA", scope="local")
-            self._run_prefill_context_chunk = self._run_prefill_context_chunk_cudnn
-            self._run_prefill_new_tokens = self._run_prefill_new_tokens_cudnn
-            self._pad_v = False
-        else:  # Use FlashAttention
-            if flash_attn_varlen_func is None:
-                raise RuntimeError(
-                    "MLA attention requires FlashAttention but it is not "
-                    "available. Please install flash_attn or use "
-                    "--attention-backend ROCM_AITER_MLA."
-                )
-            logger.info_once("Using FlashAttention prefill for MLA", scope="local")
-            self._run_prefill_context_chunk = self._run_prefill_context_chunk_fa
-            self._run_prefill_new_tokens = self._run_prefill_new_tokens_fa
-
-            # Handle the differences between the flash_attn_varlen from
-            # flash_attn and the one from vllm_flash_attn. The former is used on
-            # RoCM and the latter has an additional parameter to control
-            # FA2 vs FA3
-            self.flash_attn_varlen_func = flash_attn_varlen_func
-            self.vllm_flash_attn_version = get_flash_attn_version(
-                head_size=self.qk_head_dim
-            )
-            if self.vllm_flash_attn_version is not None:
-                self.flash_attn_varlen_func = functools.partial(
-                    flash_attn_varlen_func, fa_version=self.vllm_flash_attn_version
-                )
-
-            # For MLA the v head dim is smaller than qk head dim so we pad out
-            # v with 0s to match the qk head dim for attention backends that do
-            # not support different headdims.
-            # FA3 on Hopper (SM90) and FA4 natively handle diff headdims.
-            device_capability = current_platform.get_device_capability()
-            self._pad_v = self.vllm_flash_attn_version is None or not (
-                (
-                    self.vllm_flash_attn_version == 3
-                    and device_capability is not None
-                    and device_capability[0] == 9
-                )
-                or self.vllm_flash_attn_version == 4
-            )
-
         self.dcp_world_size: int = -1
 
         self.cp_kv_cache_interleave_size: int = (
             get_current_vllm_config().parallel_config.cp_kv_cache_interleave_size
         )
 
-    def _flash_attn_varlen_diff_headdims(
-        self, q, k, v, return_softmax_lse=False, softmax_scale=None, **kwargs
-    ):
-        maybe_padded_v = v
-        if self._pad_v:
-            maybe_padded_v = torch.nn.functional.pad(
-                v, [0, q.shape[-1] - v.shape[-1]], value=0
-            )
-
-        if is_vllm_fa:
-            kwargs["return_softmax_lse"] = return_softmax_lse
-        else:
-            # ROCm leverages the upstream flash_attn, which takes a parameter
-            # called "return_attn_probs" instead of return_softmax_lse
-            kwargs["return_attn_probs"] = return_softmax_lse
-        if envs.VLLM_BATCH_INVARIANT:
-            kwargs["num_splits"] = 1
-
-        attn_out = self.flash_attn_varlen_func(
-            q=q,
-            k=k,
-            v=maybe_padded_v,
-            softmax_scale=softmax_scale,
-            **kwargs,
-        )
-
-        # Unpack the output if there is multiple results
-        lse = None
-        if isinstance(attn_out, tuple):
-            attn_out, lse = attn_out[0], attn_out[1]
-
-        # Remain consistent with old `flash_attn_varlen_func` where there
-        # is only one output tensor if `return_softmax_lse` is False.
-        if return_softmax_lse:
-            return attn_out, lse
-        return attn_out
-
-    def _run_prefill_new_tokens_fa(
-        self, prefill: MLACommonPrefillMetadata, q, k, v, return_softmax_lse
-    ):
-        return self._flash_attn_varlen_diff_headdims(
-            q=q,
-            k=k,
-            v=v,
-            cu_seqlens_q=prefill.query_start_loc,
-            cu_seqlens_k=prefill.query_start_loc,
-            max_seqlen_q=prefill.max_query_len,
-            max_seqlen_k=prefill.max_query_len,
-            softmax_scale=self.scale,
-            causal=True,
-            return_softmax_lse=return_softmax_lse,
-        )
-
-    def _run_prefill_new_tokens_fi(
-        self, prefill: MLACommonPrefillMetadata, q, k, v, return_softmax_lse
-    ):
-        assert isinstance(prefill, FlashInferPrefillMetadata)
-        assert prefill.prefill_main is not None
-
-        ret = prefill.prefill_main.run(
-            q=q,
-            k=k,
-            v=v,
-            return_lse=return_softmax_lse,
-        )
-
-        if isinstance(ret, tuple):
-            return ret[0], ret[1].transpose(0, 1).contiguous()
-        return ret
-
-    def _run_prefill_new_tokens_cudnn(
-        self, prefill: MLACommonPrefillMetadata, q, k, v, return_softmax_lse
-    ):
-        assert isinstance(prefill, CudnnPrefillMetadata)
-        assert prefill.query_seq_lens is not None
-        from flashinfer.prefill import cudnn_batch_prefill_with_kv_cache
-
-        output, lse = cudnn_batch_prefill_with_kv_cache(
-            q=q,
-            k_cache=k,
-            v_cache=v,
-            scale=self.scale,
-            workspace_buffer=prefill.cudnn_workspace,
-            max_token_per_sequence=prefill.max_query_len,
-            max_sequence_kv=prefill.max_query_len,
-            actual_seq_lens_q=prefill.query_seq_lens.view(-1, 1, 1, 1),
-            actual_seq_lens_kv=prefill.query_seq_lens.view(-1, 1, 1, 1),
-            causal=True,
-            # Do not support False for now
-            return_lse=True,
-            # Indicates actual_seq_lens are on GPU or CPU.
-            is_cuda_graph_compatible=True,
-        )
-        if return_softmax_lse:
-            return output, lse
-        return output
-
-    def _run_prefill_context_chunk_fa(
-        self, prefill: MLACommonPrefillMetadata, chunk_idx: int, q, k, v
-    ):
-        assert prefill.chunked_context is not None
-        return self._flash_attn_varlen_diff_headdims(
-            q=q,
-            k=k,
-            v=v,
-            cu_seqlens_q=prefill.query_start_loc,
-            cu_seqlens_k=prefill.chunked_context.cu_seq_lens[chunk_idx],
-            max_seqlen_q=prefill.max_query_len,
-            max_seqlen_k=prefill.chunked_context.max_seq_lens[chunk_idx],
-            softmax_scale=self.scale,
-            causal=False,  # Context is unmasked
-            return_softmax_lse=True,
-        )
-
-    def _run_prefill_context_chunk_fi(
-        self, prefill: MLACommonPrefillMetadata, chunk_idx: int, q, k, v
-    ):
-        assert isinstance(prefill, FlashInferPrefillMetadata)
-
-        attn_out, lse = prefill.prefill_chunks[chunk_idx].run(
-            q=q,
-            k=k,
-            v=v,
-            return_lse=True,
-        )
-
-        # Convert from (q_len, num_heads) to (num_heads, q_len)
-        return attn_out, lse.transpose(0, 1).contiguous()
-
-    def _run_prefill_context_chunk_cudnn(
-        self, prefill: MLACommonPrefillMetadata, chunk_idx: int, q, k, v
-    ):
-        assert isinstance(prefill, CudnnPrefillMetadata)
-        assert prefill.chunked_context is not None
-        assert prefill.chunked_context.seq_lens[chunk_idx] is not None
-        assert prefill.query_seq_lens is not None
-        from flashinfer.prefill import cudnn_batch_prefill_with_kv_cache
-
-        return cudnn_batch_prefill_with_kv_cache(
-            q=q,
-            k_cache=k,
-            v_cache=v,
-            scale=self.scale,
-            workspace_buffer=prefill.cudnn_workspace,
-            max_token_per_sequence=prefill.max_query_len,
-            max_sequence_kv=prefill.chunked_context.max_seq_lens[chunk_idx],
-            actual_seq_lens_q=prefill.query_seq_lens.view(-1, 1, 1, 1),
-            actual_seq_lens_kv=prefill.chunked_context.seq_lens[chunk_idx].view(
-                -1, 1, 1, 1
-            ),
-            causal=False,
-            return_lse=True,
-            # Indicates actual_seq_lens are on GPU or CPU.
-            is_cuda_graph_compatible=True,
-        )
-
-    def _run_prefill_new_tokens_trtllm_ragged(
-        self, prefill: MLACommonPrefillMetadata, q, k, v, return_softmax_lse
-    ):
-        """TRT-LLM ragged attention for new tokens (causal)."""
-        from flashinfer.prefill import trtllm_ragged_attention_deepseek
-
-        assert prefill.query_seq_lens is not None
-        assert prefill.workspace_buffer is not None
-        # allocate BF16 / FP16 output tensor for TRT-LLM ragged attention
-        out = torch.empty(
-            q.shape[0],
-            q.shape[1],
-            v.shape[2],
-            device=q.device,
-            dtype=prefill.output_dtype,
-        )
-
-        ret = trtllm_ragged_attention_deepseek(
-            query=q,
-            key=k,
-            value=v,
-            workspace_buffer=prefill.workspace_buffer,
-            seq_lens=prefill.query_seq_lens,
-            max_q_len=prefill.max_query_len,
-            max_kv_len=prefill.max_query_len,
-            bmm1_scale=self.scale,
-            bmm2_scale=1.0,
-            o_sf_scale=1.0,
-            batch_size=prefill.query_seq_lens.shape[0],
-            window_left=-1,
-            cum_seq_lens_q=prefill.query_start_loc,
-            cum_seq_lens_kv=prefill.query_start_loc,
-            enable_pdl=False,
-            is_causal=True,
-            return_lse=return_softmax_lse,
-            out=out,
-        )
-
-        if isinstance(ret, tuple):
-            # Convert from (q_len, num_heads) to (num_heads, q_len)
-            return ret[0], ret[1].transpose(0, 1).contiguous()
-        return ret
-
-    def _run_prefill_context_chunk_trtllm_ragged(
-        self, prefill: MLACommonPrefillMetadata, chunk_idx: int, q, k, v
-    ):
-        """TRT-LLM ragged attention for context chunks (non-causal)."""
-        from flashinfer.prefill import trtllm_ragged_attention_deepseek
-
-        assert prefill.chunked_context is not None
-        assert prefill.chunked_context.seq_lens[chunk_idx] is not None
-        assert prefill.workspace_buffer is not None
-
-        out = torch.zeros(
-            q.shape[0],
-            q.shape[1],
-            v.shape[2],
-            device=q.device,
-            dtype=prefill.output_dtype,
-        )
-        prefill.workspace_buffer.fill_(0)
-
-        attn_out, lse = trtllm_ragged_attention_deepseek(
-            query=q,
-            key=k,
-            value=v,
-            workspace_buffer=prefill.workspace_buffer,
-            seq_lens=prefill.chunked_context.seq_lens[chunk_idx],
-            max_q_len=prefill.max_query_len,
-            max_kv_len=prefill.chunked_context.max_seq_lens[chunk_idx],
-            bmm1_scale=self.scale,
-            bmm2_scale=1.0,
-            o_sf_scale=1.0,
-            batch_size=prefill.chunked_context.seq_lens[chunk_idx].shape[0],
-            window_left=-1,
-            cum_seq_lens_q=prefill.query_start_loc,
-            cum_seq_lens_kv=prefill.chunked_context.cu_seq_lens[chunk_idx],
-            enable_pdl=False,
-            is_causal=False,
-            return_lse=True,
-            out=out,
-        )
-
-        # Convert from (q_len, num_heads) to (num_heads, q_len)
-        return attn_out, lse.transpose(0, 1).contiguous()
-
     def _concat_k_nope_k_pe(
         self, k_nope: torch.Tensor, k_pe: torch.Tensor
     ) -> torch.Tensor:
@@ -2453,11 +2037,13 @@ def _compute_prefill_context(
     ):
         assert attn_metadata.prefill is not None
         prefill_metadata = attn_metadata.prefill
+        assert prefill_metadata.prefill_backend is not None
         assert prefill_metadata.chunked_context is not None
 
         use_fp8_prefill = prefill_metadata.q_data_type == current_platform.fp8_dtype()
 
         output = None
+        merge_output = None
         iters = len(prefill_metadata.chunked_context.seq_tot)
         workspace = prefill_metadata.chunked_context.workspace
 
@@ -2500,8 +2086,12 @@ def _compute_prefill_context(
                 if hasattr(self.kv_b_proj, "weight")
                 else self.kv_b_proj.params_dtype
             )
-            if use_fp8_prefill or _kv_b_proj_w_dtype != current_platform.fp8_dtype():
-                kv_c_normed = kv_c_normed.to(_kv_b_proj_w_dtype)
+            # For NVFP4, weights are packed uint8 — keep input in model dtype
+            # since the NVFP4 linear layer quantizes internally.
+            if (
+                use_fp8_prefill or _kv_b_proj_w_dtype != current_platform.fp8_dtype()
+            ) and _kv_b_proj_w_dtype != torch.uint8:
+                kv_c_normed = kv_c_normed.to(self.kv_b_proj.weight.dtype)
 
             k_pe = workspace[:toks][..., self.kv_lora_rank :].unsqueeze(1)
             kv_nope = self.kv_b_proj(kv_c_normed)[0].view(
@@ -2516,30 +2106,32 @@ def _compute_prefill_context(
 
             k = self._concat_k_nope_k_pe(k_nope, k_pe)
 
-            attn_output, attn_softmax_lse = self._run_prefill_context_chunk(
-                prefill=prefill_metadata,
-                chunk_idx=i,
-                q=q,
-                k=k,
-                v=v,
+            attn_output, attn_softmax_lse = (
+                prefill_metadata.prefill_backend.run_prefill_context_chunk(
+                    chunk_idx=i,
+                    q=q,
+                    k=k,
+                    v=v,
+                )
             )
 
             if output is None:
                 output = attn_output
                 output_lse = attn_softmax_lse
             else:
-                output_tmp = torch.empty_like(output)
-                output_lse_tmp = torch.empty_like(output_lse)
+                if merge_output is None:
+                    merge_output = torch.empty_like(output)
+                    merge_output_lse = torch.empty_like(output_lse)
                 merge_attn_states(
-                    output=output_tmp,
-                    output_lse=output_lse_tmp,
+                    output=merge_output,
+                    output_lse=merge_output_lse,
                     prefix_output=output,
                     prefix_lse=output_lse,
                     suffix_output=attn_output,
                     suffix_lse=attn_softmax_lse,
                 )
-                output = output_tmp
-                output_lse = output_lse_tmp
+                output, merge_output = merge_output, output
+                output_lse, merge_output_lse = merge_output_lse, output_lse
 
         return output, output_lse
 
@@ -2554,6 +2146,7 @@ def _context_parallel_compute_prefill_context(
         assert k_scale is None, "DCP not support scaled kvcache now."
         assert attn_metadata.prefill is not None
         prefill_metadata = attn_metadata.prefill
+        assert prefill_metadata.prefill_backend is not None
         assert prefill_metadata.chunked_context is not None
         assert prefill_metadata.chunked_context.padded_local_chunk_seq_lens is not None
         assert prefill_metadata.chunked_context.local_context_lens_allranks is not None
@@ -2562,6 +2155,7 @@ def _context_parallel_compute_prefill_context(
         assert prefill_metadata.chunked_context.chunk_size is not None
 
         output = None
+        merge_output = None
         iters = len(prefill_metadata.chunked_context.seq_tot)
         workspace = prefill_metadata.chunked_context.workspace
 
@@ -2620,30 +2214,32 @@ def _context_parallel_compute_prefill_context(
             k_nope, v = kv_nope.split([self.qk_nope_head_dim, self.v_head_dim], dim=-1)
             k = self._concat_k_nope_k_pe(k_nope, k_pe)
 
-            attn_output, attn_softmax_lse = self._run_prefill_context_chunk(
-                prefill=prefill_metadata,
-                chunk_idx=i,
-                q=q,
-                k=k,
-                v=v,
+            attn_output, attn_softmax_lse = (
+                prefill_metadata.prefill_backend.run_prefill_context_chunk(
+                    chunk_idx=i,
+                    q=q,
+                    k=k,
+                    v=v,
+                )
             )
 
             if output is None:
                 output = attn_output
                 output_lse = attn_softmax_lse
             else:
-                output_tmp = torch.empty_like(output)
-                output_lse_tmp = torch.empty_like(output_lse)
+                if merge_output is None:
+                    merge_output = torch.empty_like(output)
+                    merge_output_lse = torch.empty_like(output_lse)
                 merge_attn_states(
-                    output=output_tmp,
-                    output_lse=output_lse_tmp,
+                    output=merge_output,
+                    output_lse=merge_output_lse,
                     prefix_output=output,
                     prefix_lse=output_lse,
                     suffix_output=attn_output,
                     suffix_lse=attn_softmax_lse,
                 )
-                output = output_tmp
-                output_lse = output_lse_tmp
+                output, merge_output = merge_output, output
+                output_lse, merge_output_lse = merge_output_lse, output_lse
 
         return output, output_lse
 
@@ -2657,11 +2253,11 @@ def forward_mha(
         k_scale: torch.Tensor,
         output: torch.Tensor,
     ) -> None:
-        # TODO (zyongye): Prefill function here
         assert attn_metadata.prefill is not None
         assert self.dcp_world_size != -1
 
         prefill_metadata = attn_metadata.prefill
+        assert prefill_metadata.prefill_backend is not None
         use_fp8_prefill = prefill_metadata.q_data_type == current_platform.fp8_dtype()
 
         # Convert q to FP8 if FP8 prefill attention is enabled
@@ -2680,8 +2276,7 @@ def forward_mha(
             k = k.to(prefill_metadata.q_data_type)
             v = v.to(prefill_metadata.q_data_type)
 
-        output_prefill = self._run_prefill_new_tokens(
-            prefill=prefill_metadata,
+        output_prefill = prefill_metadata.prefill_backend.run_prefill_new_tokens(
             q=q,
             k=k,
             v=v,
@@ -2689,6 +2284,7 @@ def forward_mha(
         )
 
         if has_context:
+            assert prefill_metadata.chunked_context is not None
             suffix_output, suffix_lse = output_prefill
             if self.dcp_world_size > 1:
                 context_output, context_lse = (
@@ -2705,11 +2301,6 @@ def forward_mha(
                     q, kv_c_and_k_pe_cache, attn_metadata, k_scale
                 )
 
-            # unpad if necessary
-            if self._pad_v:
-                context_output = context_output[..., : v.shape[-1]]
-                suffix_output = suffix_output[..., : v.shape[-1]]
-
             output = output.view(-1, self.num_heads, self.v_head_dim)
             merge_attn_states(
                 output=output,
@@ -2717,9 +2308,11 @@ def forward_mha(
                 prefix_lse=context_lse,
                 suffix_output=suffix_output,
                 suffix_lse=suffix_lse,
+                prefill_tokens_with_context=prefill_metadata.chunked_context.prefill_tokens_with_context,
             )
         else:
-            output_prefill = output_prefill[..., : v.shape[-1]].flatten(start_dim=-2)
+            assert isinstance(output_prefill, torch.Tensor)
+            output_prefill = output_prefill.flatten(start_dim=-2)
             output.copy_(output_prefill)
 
     @abstractmethod
diff --git a/vllm/model_executor/layers/attention/mm_encoder_attention.py b/vllm/model_executor/layers/attention/mm_encoder_attention.py
index 6755e9af9e65..1731cc26bc36 100644
--- a/vllm/model_executor/layers/attention/mm_encoder_attention.py
+++ b/vllm/model_executor/layers/attention/mm_encoder_attention.py
@@ -1,13 +1,32 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import functools
+import json
 
 import numpy as np
 import torch
 
+from vllm.config import MultiModalConfig
+from vllm.kernels.triton.qkv_padded_fp8_quant import (
+    quantize_fp8_maybe_pad_head_dim,
+)
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp, maybe_get_oot_by_class
-from vllm.model_executor.models.vision import get_vit_attn_backend
+from vllm.model_executor.layers.quantization.input_quant_fp8 import (
+    QuantFP8,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+    get_fp8_min_max,
+)
+from vllm.model_executor.models.vision import (
+    get_multimodal_config,
+    get_vit_attn_backend,
+)
+from vllm.utils.flashinfer import (
+    is_flashinfer_cudnn_fp8_prefill_attn_supported,
+)
 from vllm.utils.math_utils import round_up
 from vllm.v1.attention.backends.fa_utils import get_flash_attn_version
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
@@ -20,6 +39,108 @@
 
 logger = init_logger(__name__)
 
+_, _FP8_MAX = get_fp8_min_max()
+_FP8_AMAX_HISTORY_LEN = 16
+
+# Module-level state for auto-saving dynamic scales. The save is a one-shot
+# triggered by the first layer whose amax buffer wraps. Path and margin are
+# captured during layer init (set_current_vllm_config context only lives
+# across model init, not forward passes).
+_fp8_scale_save_path: str | None = None
+_fp8_scale_save_margin: float = MultiModalConfig.mm_encoder_fp8_scale_save_margin
+_fp8_saved_scale_refs: dict[str, tuple[torch.Tensor, torch.Tensor, torch.Tensor]] = {}
+
+
+@functools.cache
+def _load_fp8_scales_file(path: str | None) -> dict[str, dict[str, float]]:
+    """Load per-layer FP8 Q/K/V scales from a JSON file. Results are cached.
+
+    Expected format (keys ``q_scale`` / ``k_scale`` / ``v_scale`` also accepted)::
+
+        {
+            "visual.blocks.0.attn.attn": {"q": 224.0, "k": 198.0, "v": 210.0},
+            "visual.blocks.1.attn.attn": {"q": 218.0, "k": 195.0, "v": 207.0},
+        }
+
+    To produce such a file, run with ``mm_encoder_fp8_scale_save_path`` set.
+    """
+    if path is None:
+        return {}
+
+    with open(path, encoding="utf-8") as f:
+        data = json.load(f)
+
+    # Handle nested "layers" format
+    if "layers" in data and isinstance(data["layers"], dict):
+        data = data["layers"]
+
+    scales: dict[str, dict[str, float]] = {}
+    for layer_name, layer_scales in data.items():
+        if not isinstance(layer_scales, dict):
+            continue
+        q = layer_scales.get("q", layer_scales.get("q_scale"))
+        k = layer_scales.get("k", layer_scales.get("k_scale"))
+        v = layer_scales.get("v", layer_scales.get("v_scale"))
+        if q is not None and k is not None and v is not None:
+            q_f, k_f, v_f = float(q), float(k), float(v)
+            if q_f <= 0 or k_f <= 0 or v_f <= 0:
+                raise ValueError(
+                    f"FP8 scales must be positive, got q={q_f}, "
+                    f"k={k_f}, v={v_f} for layer '{layer_name}'"
+                )
+            scales[layer_name] = {"q": q_f, "k": k_f, "v": v_f}
+
+    logger.info_once(
+        "Loaded FP8 attention scales from %s (%d layers)", path, len(scales)
+    )
+    return scales
+
+
+def _maybe_save_fp8_scales(
+    layer_name: str,
+    q_scale: torch.Tensor,
+    k_scale: torch.Tensor,
+    v_scale: torch.Tensor,
+    buffer_wrapped: bool,
+) -> None:
+    """Accumulate a layer's scale tensors; on the first amax buffer wrap,
+    dump all accumulated scales to ``mm_encoder_fp8_scale_save_path``.
+
+    No-op unless auto-save is configured. Tensor references are stored on
+    every call (no GPU->CPU sync); ``.item()`` is only called at the single
+    save point to avoid stalling the forward path.
+    """
+    global _fp8_scale_save_path
+    # Fast path: auto-save either disabled or already finished. Path is
+    # captured at layer init and cleared once the save fires.
+    if _fp8_scale_save_path is None:
+        return
+
+    # Stash scale tensor refs (no GPU->CPU sync yet); wait until the amax
+    # history has seen a full cycle before committing scales to disk.
+    _fp8_saved_scale_refs[layer_name] = (q_scale, k_scale, v_scale)
+    if not buffer_wrapped:
+        return
+
+    # Buffer just wrapped for the first time: materialize scales (with
+    # safety margin) and dump to disk. Clearing _fp8_scale_save_path
+    # makes this a one-shot across all layers.
+    path, margin = _fp8_scale_save_path, _fp8_scale_save_margin
+    scales = {
+        name: {
+            "q": q.item() * margin,
+            "k": k.item() * margin,
+            "v": v.item() * margin,
+        }
+        for name, (q, k, v) in _fp8_saved_scale_refs.items()
+    }
+    _fp8_scale_save_path = None
+    _fp8_saved_scale_refs.clear()
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(scales, f, indent=2)
+    logger.info("Saved FP8 scales (%d layers) to %s", len(scales), path)
+
+
 # Batch buckets for cuDNN graph caching.
 # Graphs use batch size and max sequence length as cache key.
 # This avoids creating a new graph for each unique set of
@@ -148,27 +269,47 @@ def maybe_recompute_cu_seqlens(
         hidden_size: int,
         tp_size: int,
         device: torch.device,
+        fp8_padded_hidden_size: int | None = None,
     ) -> torch.Tensor:
         if (oot_class := maybe_get_oot_by_class(cls)) is not cls:
             return oot_class.maybe_recompute_cu_seqlens(  # type: ignore[attr-defined]
-                attn_backend, cu_seqlens, hidden_size, tp_size, device
+                attn_backend,
+                cu_seqlens,
+                hidden_size,
+                tp_size,
+                device,
+                fp8_padded_hidden_size=fp8_padded_hidden_size,
             )
 
         if attn_backend == AttentionBackendEnum.FLASHINFER:
             batch_size = len(cu_seqlens) - 1
-            scale = hidden_size // tp_size
-            cu_seqlens = cu_seqlens * scale
 
-            cu_seqlens_qko = cu_seqlens
-            cu_seqlens_v = cu_seqlens * 3
-
-            cu_seqlens_qko = add_padding_to_seqlens(
-                cu_seqlens_qko, batch_size, cu_seqlens_qko[-1]
-            )
-            cu_seqlens_v = add_padding_to_seqlens(
-                cu_seqlens_v, batch_size, cu_seqlens_v[-1]
-            )
-            cu_seqlens = np.concatenate([cu_seqlens_qko, cu_seqlens_v])
+            if fp8_padded_hidden_size is not None:
+                # FP8 path: after quantization Q/K/V are each independent
+                # contiguous tensors with stride H * padded_D per token.
+                # All sections use the same element stride.
+                scale = fp8_padded_hidden_size // tp_size
+                cu_seqlens = cu_seqlens * scale
+                cu_seqlens_padded = add_padding_to_seqlens(
+                    cu_seqlens, batch_size, cu_seqlens[-1]
+                )
+                cu_seqlens = np.concatenate([cu_seqlens_padded, cu_seqlens_padded])
+            else:
+                # BF16 path: Q/K/V are non-contiguous views into shared
+                # buffers. V section has 3x stride from interleaved QKV.
+                scale = hidden_size // tp_size
+                cu_seqlens = cu_seqlens * scale
+
+                cu_seqlens_qko = cu_seqlens
+                cu_seqlens_v = cu_seqlens * 3
+
+                cu_seqlens_qko = add_padding_to_seqlens(
+                    cu_seqlens_qko, batch_size, cu_seqlens_qko[-1]
+                )
+                cu_seqlens_v = add_padding_to_seqlens(
+                    cu_seqlens_v, batch_size, cu_seqlens_v[-1]
+                )
+                cu_seqlens = np.concatenate([cu_seqlens_qko, cu_seqlens_v])
 
         cu_seqlens = torch.from_numpy(cu_seqlens).to(device, non_blocking=True)
         return cu_seqlens
@@ -206,6 +347,7 @@ def __init__(
         # During model initialization, the default dtype is set as the model
         # weight and activation dtype.
         dtype = torch.get_default_dtype()
+        self.dtype = dtype
 
         # Get device-specific vision attention backend.
         self.attn_backend = get_vit_attn_backend(
@@ -227,8 +369,113 @@ def __init__(
         if self.attn_backend == AttentionBackendEnum.FLASHINFER:
             _get_flashinfer_workspace_buffer()
 
-        logger.info_once(
-            f"Using {self.attn_backend} for MMEncoderAttention.", scope="local"
+        logger.info_once(f"Using {self.attn_backend} for MMEncoderAttention.")
+
+        self._init_fp8_state()
+
+    def _init_fp8_state(self) -> None:
+        """Initialize FP8 attention state from multimodal config.
+
+        No-op if FP8 is not requested. Raises ``ValueError`` if FP8 is
+        requested but the platform does not support it.
+        """
+        # Populate defaults so ``_forward_flashinfer`` can
+        # check ``self.fp8_enabled`` and others without AttributeError.
+        self.fp8_enabled = False
+        self._fp8_dynamic_scale = False
+        self.fp8_quant: QuantFP8 | None = None
+        self.skip_scale_q = False
+        self.skip_scale_k = False
+        self.skip_scale_v = False
+
+        mm_cfg = get_multimodal_config()
+        if mm_cfg is None or mm_cfg.mm_encoder_attn_dtype != "fp8":
+            return
+
+        # FP8 path
+        if not is_flashinfer_cudnn_fp8_prefill_attn_supported():
+            raise ValueError(
+                "mm_encoder_attn_dtype='fp8' requires the FlashInfer "
+                "cuDNN backend with cuDNN >= 9.17.1 on a GPU with native "
+                "FP8 support."
+            )
+
+        self.fp8_enabled = True
+        self._fp8_dynamic_scale = mm_cfg.mm_encoder_fp8_scale_path is None
+        self.fp8_quant = QuantFP8(static=True, group_shape=GroupShape.PER_TENSOR)
+
+        # Register buffers pre-device-move; values populated in
+        # process_weights_after_loading. Shape (1, 1, 1, 1) is required by cuDNN.
+        for attr in ("_fp8_q_scale", "_fp8_k_scale", "_fp8_v_scale"):
+            self.register_buffer(
+                attr, torch.ones(1, dtype=torch.float32).view(1, 1, 1, 1)
+            )
+        if self._fp8_dynamic_scale:
+            for attr in ("_fp8_q_amax", "_fp8_k_amax", "_fp8_v_amax"):
+                self.register_buffer(
+                    attr,
+                    torch.zeros(_FP8_AMAX_HISTORY_LEN, dtype=torch.float32),
+                    persistent=False,
+                )
+            self._fp8_amax_pos = 0
+
+        # Capture auto-save config now: the VllmConfig context only lives
+        # across model init, not forward passes, so ``_maybe_save_fp8_scales``
+        # reads these globals instead of re-querying ``get_multimodal_config``.
+        if (
+            mm_cfg.mm_encoder_fp8_scale_save_path is not None
+            and self._fp8_dynamic_scale
+        ):
+            global _fp8_scale_save_path, _fp8_scale_save_margin
+            _fp8_scale_save_path = mm_cfg.mm_encoder_fp8_scale_save_path
+            _fp8_scale_save_margin = mm_cfg.mm_encoder_fp8_scale_save_margin
+
+    def process_weights_after_loading(self, act_dtype: torch.dtype) -> None:
+        """Populate FP8 scale buffers after weights are loaded.
+
+        ``act_dtype`` matches the signature used by :class:`Attention` and
+        :class:`MLAAttention` for the loader auto-scan but is unused:
+        FP8 scales are always float32.
+        """
+        if not self.fp8_enabled:
+            return
+
+        mm_cfg = get_multimodal_config()
+        scale_path = mm_cfg.mm_encoder_fp8_scale_path if mm_cfg is not None else None
+        if scale_path is None:
+            logger.info_once(
+                "FP8 attention enabled with dynamic scaling "
+                "(no scale file provided). Scales will adapt from "
+                "observed Q/K/V amax values (history_len=%d).",
+                _FP8_AMAX_HISTORY_LEN,
+            )
+            return
+
+        all_scales = _load_fp8_scales_file(scale_path)
+        layer_scales = all_scales.get(self.layer_name)
+        if layer_scales is None:
+            raise ValueError(
+                "FP8 attention enabled but scales not found for layer "
+                f"'{self.layer_name}' in {scale_path}. "
+                f"Available layers: {list(all_scales.keys())}"
+            )
+
+        for attr, key in (
+            ("_fp8_q_scale", "q"),
+            ("_fp8_k_scale", "k"),
+            ("_fp8_v_scale", "v"),
+        ):
+            getattr(self, attr).fill_(layer_scales[key])
+        self.skip_scale_q = layer_scales["q"] == 1.0
+        self.skip_scale_k = layer_scales["k"] == 1.0
+        self.skip_scale_v = layer_scales["v"] == 1.0
+
+        logger.debug(
+            "FP8 attention enabled for %s: q=%.4f, k=%.4f, v=%.4f",
+            self.layer_name if self.layer_name else "MMEncoderAttention",
+            layer_scales["q"],
+            layer_scales["k"],
+            layer_scales["v"],
         )
 
     @classmethod
@@ -355,6 +602,44 @@ def _forward_triton(
             output = output.reshape(bsz, q_len, -1)
         return output
 
+    @torch.no_grad()
+    def _record_amax_and_update_scales(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+    ) -> None:
+        """Record Q/K/V amax into circular history and recompute scales.
+
+        All work stays on GPU with no device-to-host sync. The Python-side
+        history position counter is mutated, so this method must NOT be
+        called inside CUDA graph capture/replay. When CUDA graphs are
+        used for the encoder, dynamic scaling should be disabled by
+        providing a static scale file via --mm-encoder-fp8-scale-path.
+        """
+        pos = self._fp8_amax_pos
+        self._fp8_amax_pos = (pos + 1) % _FP8_AMAX_HISTORY_LEN
+
+        for tensor, amax_buf, scale_buf in (
+            (query, self._fp8_q_amax, self._fp8_q_scale),
+            (key, self._fp8_k_amax, self._fp8_k_scale),
+            (value, self._fp8_v_amax, self._fp8_v_scale),
+        ):
+            amax_buf[pos] = tensor.amax()
+            max_amax = amax_buf.max()
+            scale_buf.fill_(
+                torch.clamp(max_amax, min=torch.finfo(torch.float32).tiny) / _FP8_MAX
+            )
+
+        buffer_wrapped = self._fp8_amax_pos == 0 and pos == _FP8_AMAX_HISTORY_LEN - 1
+        _maybe_save_fp8_scales(
+            self.layer_name,
+            self._fp8_q_scale,
+            self._fp8_k_scale,
+            self._fp8_v_scale,
+            buffer_wrapped,
+        )
+
     def _forward_flashinfer(
         self,
         query: torch.Tensor,
@@ -365,7 +650,32 @@ def _forward_flashinfer(
         sequence_lengths: torch.Tensor
         | None = None,  # Only used for FlashInfer CuDNN backend
     ) -> torch.Tensor:
-        return vit_flashinfer_wrapper(
+        if self.fp8_enabled:
+            assert self.fp8_quant is not None
+
+            if self._fp8_dynamic_scale:
+                self._record_amax_and_update_scales(query, key, value)
+
+            query = quantize_fp8_maybe_pad_head_dim(
+                query,
+                self._fp8_q_scale,
+                skip_scale=self.skip_scale_q,
+                fp8_quant=self.fp8_quant,
+            )
+            key = quantize_fp8_maybe_pad_head_dim(
+                key,
+                self._fp8_k_scale,
+                skip_scale=self.skip_scale_k,
+                fp8_quant=self.fp8_quant,
+            )
+            value = quantize_fp8_maybe_pad_head_dim(
+                value,
+                self._fp8_v_scale,
+                skip_scale=self.skip_scale_v,
+                fp8_quant=self.fp8_quant,
+            )
+
+        output = vit_flashinfer_wrapper(
             q=query,
             k=key,
             v=value,
@@ -374,8 +684,17 @@ def _forward_flashinfer(
             cu_seqlens=cu_seqlens,
             max_seqlen=max_seqlen,
             sequence_lengths=sequence_lengths,
+            q_scale=self._fp8_q_scale if self.fp8_enabled else None,
+            k_scale=self._fp8_k_scale if self.fp8_enabled else None,
+            v_scale=self._fp8_v_scale if self.fp8_enabled else None,
+            o_data_type=self.dtype if self.fp8_enabled else None,
         )
 
+        if self.fp8_enabled and output.shape[-1] != self.head_size:
+            output = output[..., : self.head_size].contiguous()
+
+        return output
+
     def forward_native(
         self,
         query: torch.Tensor,
diff --git a/vllm/model_executor/layers/attention/static_sink_attention.py b/vllm/model_executor/layers/attention/static_sink_attention.py
index 913d73a16c2c..8d199be0a577 100644
--- a/vllm/model_executor/layers/attention/static_sink_attention.py
+++ b/vllm/model_executor/layers/attention/static_sink_attention.py
@@ -10,7 +10,12 @@
 from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.layers.attention import Attention
 from vllm.utils.math_utils import cdiv
-from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionMetadata,
@@ -26,6 +31,7 @@
     AttentionSpec,
     KVCacheSpec,
     SinkFullAttentionSpec,
+    get_kv_quant_mode,
 )
 
 logger = init_logger(__name__)
@@ -169,7 +175,9 @@ def forward_native(
         )
         if not self.sink_populated:
             self_kv_cache = self.kv_cache
-            torch.ops.vllm.maybe_populate_sink(self_kv_cache, self.layer_name)
+            torch.ops.vllm.maybe_populate_sink(
+                self_kv_cache, _encode_layer_name(self.layer_name)
+            )
 
         return super().forward(query, key, value, output_shape)
 
@@ -217,13 +225,15 @@ def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
             head_size_v=self.head_size_v,
             sink_len=self.sink_len,
             dtype=self.kv_cache_torch_dtype,
+            kv_quant_mode=get_kv_quant_mode(self.kv_cache_dtype),
         )
 
 
 def maybe_populate_sink(
     self_kv_cache: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
+    layer_name = _resolve_layer_name(layer_name)
     forward_context: ForwardContext = get_forward_context()
     self = forward_context.no_compile_layers[layer_name]
     if self.sink_populated or self_kv_cache.numel() == 0:
@@ -233,7 +243,7 @@ def maybe_populate_sink(
 
 def maybe_populate_sink_fake(
     self_kv_cache: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
     return
 
diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py
index 2f945024400e..2e1beeec1b71 100644
--- a/vllm/model_executor/layers/batch_invariant.py
+++ b/vllm/model_executor/layers/batch_invariant.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import math
 import os
 from collections.abc import Callable
 from typing import Any
@@ -7,14 +8,11 @@
 import torch
 
 import vllm.envs as envs
-from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
+from vllm.utils.mem_utils import get_max_shared_memory_bytes
 from vllm.utils.platform_utils import num_compute_units
 from vllm.utils.torch_utils import is_torch_equal_or_newer
-from vllm.v1.attention.backends.registry import AttentionBackendEnum
-
-logger = init_logger(__name__)
 
 
 def _matmul_launch_metadata(
@@ -23,22 +21,15 @@ def _matmul_launch_metadata(
     ret = {}
     m, n, k = args["M"], args["N"], args["K"]
     ret["name"] = f"{kernel.name} [M={m}, N={n}, K={k}]"
-    if "tiles_per_update" in args:
-        ret["name"] = (
-            f"{kernel.name} [M={m}, N={n}, K={k}, "
-            f"tiles_per_update={args['tiles_per_update']:02}]"
-        )
-    if "c_ptr" in args:
-        bytes_per_elem = args["c_ptr"].element_size()
-    else:
-        bytes_per_elem = 1 if args["FP8_OUTPUT"] else 2
+
+    bytes_per_elem = args["c_ptr"].element_size()
     ret[f"flops{bytes_per_elem * 8}"] = 2.0 * m * n * k
     ret["bytes"] = bytes_per_elem * (m * k + n * k + m * n)
     return ret
 
 
 @triton.jit
-def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS):
+def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M):
     group_id = tile_id // num_pid_in_group
     first_pid_m = group_id * GROUP_SIZE_M
     group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
@@ -84,9 +75,7 @@ def matmul_kernel_persistent(
     num_pid_in_group = GROUP_SIZE_M * num_pid_n
 
     for tile_id in tl.range(start_pid, num_tiles, NUM_SMS, flatten=True):
-        pid_m, pid_n = _compute_pid(
-            tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS
-        )
+        pid_m, pid_n = _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M)
         start_m = pid_m * BLOCK_SIZE_M
         start_n = pid_n * BLOCK_SIZE_N
         offs_am = start_m + tl.arange(0, BLOCK_SIZE_M)
@@ -123,7 +112,7 @@ def matmul_kernel_persistent(
 
         tile_id_c += NUM_SMS
         pid_m, pid_n = _compute_pid(
-            tile_id_c, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS
+            tile_id_c, num_pid_in_group, num_pid_m, GROUP_SIZE_M
         )
         offs_cm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
         offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
@@ -177,7 +166,7 @@ def grid(META):
         },
         torch.float16: {
             "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 256,
+            "BLOCK_SIZE_N": _fp16_block_size_n,
             "BLOCK_SIZE_K": 64,
             "GROUP_SIZE_M": 8,
             "num_stages": 3,
@@ -192,7 +181,6 @@ def grid(META):
             "num_warps": 8,
         },
     }
-    # print(a.device, b.device, c.device)
     matmul_kernel_persistent[grid](
         a,
         b,
@@ -421,7 +409,7 @@ def log_softmax(input: torch.Tensor, dim: int = -1) -> torch.Tensor:
         input: Input tensor
         dim: Dimension along which to compute log_softmax
              (only -1 or last dim supported)
-    >> Stashed changes
+
     Returns:
         Tensor with log_softmax applied along the specified dimension
     """
@@ -610,51 +598,43 @@ def matmul_batch_invariant(a, b, *, out=None):
             out.copy_(result)
             return out
         return result
-    elif a.ndim == 3 and b.ndim == 3:
-        # Handle batched case like bmm
-        return bmm_batch_invariant(a, b, out=out)
-    elif a.ndim == 3 and b.ndim == 2:
-        # Handle 3D x 2D: common for linear layers
-        # (batch, seq, hidden) @ (hidden, out) -> (batch, seq, out)
-        # Reshape to 2D, do mm, reshape back
-        batch, seq, hidden = a.shape
+    elif b.ndim == 2:
+        # Handle ND x 2D: Common for linear layers
+        # (..., batch, seq, hidden) @ (hidden, out) -> (..., batch, seq, out)
+        batch_dims = a.shape[:-1]
+        hidden = a.shape[-1]
+        out_dim = b.shape[-1]
         a_2d = a.reshape(-1, hidden)
         result_2d = matmul_persistent(a_2d, b)
-        result = result_2d.reshape(batch, seq, -1)
+        result = result_2d.reshape(batch_dims + (out_dim,))
         if out is not None:
             out.copy_(result)
             return out
         return result
-    elif a.ndim == 2 and b.ndim == 3:
-        # Handle 2D x 3D: (M, K) @ (B, K, N) -> (B, M, N)
-        # By broadcasting `a` to 3D, we can reuse the batched matrix
-        # multiplication logic.
-        a_expanded = a.unsqueeze(0).expand(b.shape[0], -1, -1)
-        return bmm_batch_invariant(a_expanded, b, out=out)
-    elif a.ndim == 4 and b.ndim == 4:
-        # Handle 4D attention tensors: [batch, heads, seq, dim]
-        # Reshape to 3D, process, reshape back
-        batch, heads, seq_a, dim_a = a.shape
-        _, _, dim_b, seq_b = b.shape
-
-        # Reshape to [batch*heads, seq_a, dim_a]
-        a_3d = a.reshape(batch * heads, seq_a, dim_a)
-        b_3d = b.reshape(batch * heads, dim_b, seq_b)
-
+    elif a.ndim >= 2 and b.ndim >= 3:
+        # Generic handler for 2D x ND and ND x ND (except 1D)
+        # Broadcast dims to ensure both matrices have the same shape
+        # If 2D x ND, then unsqueeze to add a dim to a
+        if a.ndim == 2:
+            a = a.unsqueeze(0)
+        broadcast_shape = torch.broadcast_shapes(a.shape[:-2], b.shape[:-2])
+        a = a.expand(broadcast_shape + a.shape[-2:])
+        b = b.expand(broadcast_shape + b.shape[-2:])
+        batch_dim = math.prod(broadcast_shape)
+        # Reuse broadcast shape to get all dims except mm dims
+        a_3d = a.reshape(batch_dim, a.shape[-2], a.shape[-1])
+        b_3d = b.reshape(batch_dim, b.shape[-2], b.shape[-1])
         # Do batched matmul
         result_3d = bmm_batch_invariant(a_3d, b_3d)
-
-        # Reshape back to [batch, heads, seq_a, seq_b]
-        result = result_3d.reshape(batch, heads, seq_a, seq_b)
-
+        # Reshape back to [broadcast_shape, seq_a, seq_b]
+        result = result_3d.reshape(broadcast_shape + (a.shape[-2], b.shape[-1]))
         if out is not None:
             out.copy_(result)
             return out
         return result
     else:
         raise ValueError(
-            f"matmul_batch_invariant currently only supports 2D x 2D, 3D x 3D, "
-            f"3D x 2D, 2D x 3D, and 4D x 4D, "
+            f"matmul_batch_invariant requires both inputs be at least 2D "
             f"got shapes {a.shape} and {b.shape}"
         )
 
@@ -700,7 +680,7 @@ def bmm_batch_invariant(a, b, *, out=None):
         },
         torch.float16: {
             "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 256,
+            "BLOCK_SIZE_N": _fp16_block_size_n,
             "BLOCK_SIZE_K": 64,
             "num_stages": 3,
             "num_warps": 8,
@@ -752,7 +732,8 @@ def addmm_batch_invariant(bias, a, b):
 
 
 def _log_softmax_batch_invariant(input, dim, _half_to_float):
-    assert not _half_to_float, "not implemented"
+    if _half_to_float:
+        return log_softmax(input.float(), dim=dim)
     return log_softmax(input, dim=dim)
 
 
@@ -918,44 +899,38 @@ def linear_batch_invariant(input, weight, bias=None):
 
 _batch_invariant_MODE = False
 _batch_invariant_LIB = None
-_original_torch_bmm = None
-_original_fp16_reduction_precision = None
-_original_bf16_reduction_precision = None
-_original_cublas_workspace_cfg = None
-_original_cublaslt_workspace_size = None
+_fp16_block_size_n = 256
 
 
 def enable_batch_invariant_mode():
-    global _batch_invariant_MODE, _batch_invariant_LIB, _original_torch_bmm
-    global _original_fp16_reduction_precision, _original_bf16_reduction_precision
-    global _original_cublas_workspace_cfg, _original_cublaslt_workspace_size
+    global _batch_invariant_MODE, _batch_invariant_LIB
+    global _fp16_block_size_n
+
     if _batch_invariant_MODE:
         return
 
     _batch_invariant_MODE = True
     _batch_invariant_LIB = torch.library.Library("aten", "IMPL")
 
-    if (
-        current_platform.is_device_capability_family(100)
-        or current_platform.is_device_capability(80)
-        or current_platform.is_device_capability(89)
-    ):
-        # For PyTorch 2.9, B200 uses GEMV for bs=1
-        # Requires https://github.com/pytorch/pytorch/pull/166735
+    if current_platform.is_device_capability_family(80):
+        # SM80 (Ampere) cannot rely on cuBLASLt-only determinism; install the
+        # triton persistent matmul overrides for mm/addmm/matmul/linear.
         _batch_invariant_LIB.impl("aten::mm", mm_batch_invariant, "CUDA")
         _batch_invariant_LIB.impl("aten::addmm", addmm_batch_invariant, "CUDA")
         _batch_invariant_LIB.impl("aten::matmul", matmul_batch_invariant, "CUDA")
         _batch_invariant_LIB.impl("aten::linear", linear_batch_invariant, "CUDA")
     else:
-        # Only source of batch invariance for Hopper is split-k, can disable through
-        # cuBLAS workspace config
-        _original_cublas_workspace_cfg = os.environ.get("CUBLAS_WORKSPACE_CONFIG", None)
-        _original_cublaslt_workspace_size = os.environ.get(
-            "CUBLASLT_WORKSPACE_SIZE", None
-        )
+        # Hopper (SM90) and Blackwell (SM100): the only source of batch
+        # variance is split-k, which we disable via the cuBLAS workspace
+        # config.
         os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
         os.environ["CUBLASLT_WORKSPACE_SIZE"] = "1"
 
+    # Triton bmm/persistent-matmul kernels read this for the FP16 N-tile size;
+    # set unconditionally because bmm is overridden on all CUDA platforms.
+    if current_platform.is_cuda():
+        _fp16_block_size_n = 256 if get_max_shared_memory_bytes() > 106496 else 128
+
     _batch_invariant_LIB.impl(
         "aten::_log_softmax", _log_softmax_batch_invariant, "CUDA"
     )
@@ -963,17 +938,13 @@ def enable_batch_invariant_mode():
     _batch_invariant_LIB.impl("aten::_softmax", softmax_batch_invariant, "CUDA")
     _batch_invariant_LIB.impl("aten::mean.dim", mean_batch_invariant, "CUDA")
 
-    # Also monkeypatch torch.bmm directly as a fallback
-    _batch_invariant_LIB.impl("aten::bmm", bmm_batch_invariant, "CUDA")
-    _original_torch_bmm = torch.bmm
-    torch.bmm = bmm_batch_invariant
-
-    _original_bf16_reduction_precision = (
-        torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction
-    )
-    _original_fp16_reduction_precision = (
-        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction
+    # torch 2.12+ registers a built-in Triton bmm kernel for CUDA
+    # (torch._native.ops.bmm_outer_product), so we need allow_override
+    # to replace it at the dispatcher level.
+    _batch_invariant_LIB.impl(
+        "aten::bmm", bmm_batch_invariant, "CUDA", allow_override=True
     )
+    torch.bmm = bmm_batch_invariant
 
     reduced_precision_val = (
         (False, False) if is_torch_equal_or_newer("2.10.0") else False
@@ -987,40 +958,7 @@ def enable_batch_invariant_mode():
     torch.backends.cuda.preferred_blas_library(backend="cublaslt")
 
 
-def override_envs_for_invariance(
-    attention_backend: AttentionBackendEnum | None,
-):
-    decode_invariant_backends = [
-        AttentionBackendEnum.FLASH_ATTN,  # best supported backend
-        AttentionBackendEnum.TRITON_ATTN,
-    ]
-    supported_backends = decode_invariant_backends + [
-        # FlashInfer temporarily disabled due to invariant CTA sizes.
-        # See FlashInfer issue #2424
-        # AttentionBackendEnum.FLASHINFER,
-        AttentionBackendEnum.FLASH_ATTN_MLA,
-        AttentionBackendEnum.TRITON_MLA,
-        # Not yet supported MLA backends
-        # AttentionBackendEnum.FLASHMLA,
-        # AttentionBackendEnum.FLEX_ATTENTION,  # IMA issue
-        # AttentionBackendEnum.FLASHINFER_MLA,  # PR #28967
-    ]
-    if attention_backend not in supported_backends:
-        supported_names = [b.name for b in supported_backends]
-        backend_name = attention_backend.name if attention_backend else None
-        error = (
-            "VLLM batch_invariant mode requires an attention backend in "
-            f"{supported_names}, but got '{backend_name}'. "
-            "Please use --attention-backend or attention_config to set "
-            "one of the supported backends before enabling batch_invariant."
-        )
-        raise RuntimeError(error)
-    if attention_backend not in decode_invariant_backends:
-        warning = (
-            "You are using a non-decode-invariant form of batch invariance. "
-            "This will not be invariant between prefill and decode."
-        )
-        logger.warning_once(warning, scope="local")
+def override_envs_for_invariance():
     os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
 
     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
@@ -1041,12 +979,10 @@ def override_envs_for_invariance(
     os.environ["VLLM_USE_AOT_COMPILE"] = "0"
 
 
-def init_batch_invariance(
-    attention_backend: AttentionBackendEnum | None,
-):
+def init_batch_invariance():
     # this will hit all the csrc overrides as well
     if envs.VLLM_BATCH_INVARIANT:
-        override_envs_for_invariance(attention_backend)
+        override_envs_for_invariance()
         enable_batch_invariant_mode()
 
         # Disable TF32 for batch invariance - it causes non-deterministic rounding
diff --git a/vllm/model_executor/layers/conv.py b/vllm/model_executor/layers/conv.py
index f4709f2f4d80..51314263b735 100644
--- a/vllm/model_executor/layers/conv.py
+++ b/vllm/model_executor/layers/conv.py
@@ -10,7 +10,7 @@
 import torch.nn.functional as F
 
 from vllm.model_executor.custom_op import CustomOp
-from vllm.utils.torch_utils import is_torch_equal
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 
 class ConvLayerBase(CustomOp):
@@ -252,11 +252,12 @@ def forward_native(self, x: torch.Tensor) -> torch.Tensor:
             return self._forward_conv(x)
 
     def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
-        # PyTorch2.9.0 disabled CUDNN's Conv3D, which caused a
+        # PyTorch 2.9.0+ disabled CUDNN's Conv3D, which caused a
         # significant performance regression.
         # See: https://github.com/vllm-project/vllm/issues/27406
         # and https://github.com/pytorch/pytorch/issues/166122
+        # and https://github.com/huggingface/transformers/pull/45041
         # By default, we use CUDNN's convolution ops with optimization.
-        if self.enable_linear and (is_torch_equal("2.9.0") or is_torch_equal("2.9.1")):
+        if self.enable_linear and is_torch_equal_or_newer("2.9.0"):
             return self._forward_mulmat(x)
         return self._forward_conv(x)
diff --git a/vllm/model_executor/layers/fla/ops/__init__.py b/vllm/model_executor/layers/fla/ops/__init__.py
index e52387a20b41..1942d8980bc5 100644
--- a/vllm/model_executor/layers/fla/ops/__init__.py
+++ b/vllm/model_executor/layers/fla/ops/__init__.py
@@ -7,6 +7,7 @@
 # the following copyright notice:
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 from .chunk import chunk_gated_delta_rule
+from .fused_gdn_prefill_post_conv import fused_post_conv_prep
 from .fused_recurrent import (
     fused_recurrent_gated_delta_rule,
     fused_recurrent_gated_delta_rule_packed_decode,
@@ -19,5 +20,6 @@
     "chunk_gated_delta_rule",
     "fused_recurrent_gated_delta_rule",
     "fused_recurrent_gated_delta_rule_packed_decode",
+    "fused_post_conv_prep",
     "fused_sigmoid_gating_delta_rule_update",
 ]
diff --git a/vllm/model_executor/layers/fla/ops/chunk.py b/vllm/model_executor/layers/fla/ops/chunk.py
index 9261885956e5..caf8b0c97654 100644
--- a/vllm/model_executor/layers/fla/ops/chunk.py
+++ b/vllm/model_executor/layers/fla/ops/chunk.py
@@ -7,7 +7,6 @@
 # the following copyright notice:
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 # ruff: noqa: E501
-import warnings
 
 import torch
 
@@ -17,7 +16,7 @@
 from .cumsum import chunk_local_cumsum
 from .l2norm import l2norm_fwd
 from .solve_tril import solve_tril
-from .utils import SUPPRESS_LEVEL, input_guard
+from .utils import FLA_CHUNK_SIZE, SUPPRESS_LEVEL, input_guard
 from .wy_fast import recompute_w_u_fwd
 
 
@@ -31,13 +30,25 @@ def chunk_gated_delta_rule_fwd(
     initial_state: torch.Tensor,
     output_final_state: bool,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
+    chunk_offsets: torch.Tensor | None = None,
+    core_attn_out: torch.Tensor | None = None,
 ):
-    g = chunk_local_cumsum(g, chunk_size=64, cu_seqlens=cu_seqlens)
+    g = chunk_local_cumsum(
+        g, chunk_size=FLA_CHUNK_SIZE, cu_seqlens=cu_seqlens, chunk_indices=chunk_indices
+    )
     # obtain WY representation. u is actually the new v.
     A = chunk_scaled_dot_kkt_fwd(
-        k=k, beta=beta, g=g, cu_seqlens=cu_seqlens, output_dtype=torch.float32
+        k=k,
+        beta=beta,
+        g=g,
+        cu_seqlens=cu_seqlens,
+        chunk_indices=chunk_indices,
+        output_dtype=torch.float32,
+    )
+    A = solve_tril(
+        A=A, cu_seqlens=cu_seqlens, chunk_indices=chunk_indices, output_dtype=k.dtype
     )
-    A = solve_tril(A=A, cu_seqlens=cu_seqlens, output_dtype=k.dtype)
     w, u = recompute_w_u_fwd(
         k=k,
         v=v,
@@ -45,6 +56,7 @@ def chunk_gated_delta_rule_fwd(
         A=A,
         g_cumsum=g,
         cu_seqlens=cu_seqlens,
+        chunk_indices=chunk_indices,
     )
     h, v_new, final_state = chunk_gated_delta_rule_fwd_h(
         k=k,
@@ -54,6 +66,8 @@ def chunk_gated_delta_rule_fwd(
         initial_state=initial_state,
         output_final_state=output_final_state,
         cu_seqlens=cu_seqlens,
+        chunk_indices=chunk_indices,
+        chunk_offsets=chunk_offsets,
     )
     o = chunk_fwd_o(
         q=q,
@@ -63,6 +77,8 @@ def chunk_gated_delta_rule_fwd(
         g=g,
         scale=scale,
         cu_seqlens=cu_seqlens,
+        chunk_indices=chunk_indices,
+        core_attn_out=core_attn_out,
     )
     if SUPPRESS_LEVEL < 3:
         return g, o, A, final_state, None, None, None
@@ -85,7 +101,10 @@ def forward(
         initial_state: torch.Tensor,
         output_final_state: bool,
         cu_seqlens: torch.Tensor | None = None,
+        chunk_indices: torch.Tensor | None = None,
+        chunk_offsets: torch.Tensor | None = None,
         use_qk_l2norm_in_kernel: bool = False,
+        core_attn_out: torch.Tensor | None = None,
     ):
         if use_qk_l2norm_in_kernel:
             q = l2norm_fwd(q)
@@ -101,9 +120,17 @@ def forward(
             initial_state=initial_state,
             output_final_state=output_final_state,
             cu_seqlens=cu_seqlens,
+            chunk_indices=chunk_indices,
+            chunk_offsets=chunk_offsets,
+            core_attn_out=core_attn_out,
         )
         ctx.scale = scale
         ctx.use_qk_l2norm_in_kernel = use_qk_l2norm_in_kernel
+        if core_attn_out is not None:
+            assert not torch.is_grad_enabled(), (
+                "core_attn_out buffer reuse is only supported for inference"
+            )
+            assert q.dtype == o.dtype, "Incompatible dtype for inplace computation"
         return o.to(q.dtype), final_state
 
 
@@ -118,7 +145,10 @@ def chunk_gated_delta_rule(
     initial_state: torch.Tensor = None,
     output_final_state: bool = False,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
+    chunk_offsets: torch.Tensor | None = None,
     use_qk_l2norm_in_kernel: bool = False,
+    core_attn_out: torch.Tensor | None = None,
 ):
     r"""
     Args:
@@ -184,13 +214,6 @@ def chunk_gated_delta_rule(
         "ChunkGatedDeltaRuleFunction does not support float32. Please use bfloat16."
     )
     assert len(beta.shape) == 3, "beta must be of shape [B, T, H]."
-    if q.shape[1] < q.shape[2]:
-        warnings.warn(
-            f"Input tensor shape suggests potential format mismatch: seq_len ({q.shape[1]}) < num_heads ({q.shape[2]}). "
-            "This may indicate the inputs were passed in head-first format [B, H, T, ...] "
-            "Please verify your input tensor format matches the expected shape [B, T, H, ...].",
-            stacklevel=2,
-        )
     if cu_seqlens is not None:
         if q.shape[0] != 1:
             raise ValueError(
@@ -214,6 +237,9 @@ def chunk_gated_delta_rule(
         initial_state,
         output_final_state,
         cu_seqlens,
+        chunk_indices,
+        chunk_offsets,
         use_qk_l2norm_in_kernel,
+        core_attn_out,
     )
     return o, final_state
diff --git a/vllm/model_executor/layers/fla/ops/chunk_delta_h.py b/vllm/model_executor/layers/fla/ops/chunk_delta_h.py
index ce60ca46f6c9..89a8c4d9093c 100644
--- a/vllm/model_executor/layers/fla/ops/chunk_delta_h.py
+++ b/vllm/model_executor/layers/fla/ops/chunk_delta_h.py
@@ -13,8 +13,8 @@
 from vllm.triton_utils import tl, triton
 
 from .index import prepare_chunk_indices, prepare_chunk_offsets
-from .op import exp
-from .utils import use_cuda_graph
+from .op import exp, exp2
+from .utils import FLA_CHUNK_SIZE, use_cuda_graph
 
 NUM_WARPS = [2, 4, 8, 16]
 
@@ -65,6 +65,7 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
     STORE_FINAL_STATE: tl.constexpr,
     SAVE_NEW_VALUE: tl.constexpr,
     IS_VARLEN: tl.constexpr,
+    USE_EXP2: tl.constexpr,
 ):
     i_v, i_nh = tl.program_id(0), tl.program_id(1)
     i_n, i_h = i_nh // H, i_nh % H
@@ -129,22 +130,42 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
     # main recurrence
     for i_t in range(NT):
         p_h1 = tl.make_block_ptr(
-            h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)
+            h + i_t.to(tl.int64) * stride_h,
+            (V, K),
+            (K, 1),
+            (i_v * BV, 0),
+            (BV, 64),
+            (1, 0),
         )
         tl.store(p_h1, b_h1.to(p_h1.dtype.element_ty), boundary_check=(0, 1))
         if K > 64:
             p_h2 = tl.make_block_ptr(
-                h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0)
+                h + i_t.to(tl.int64) * stride_h,
+                (V, K),
+                (K, 1),
+                (i_v * BV, 64),
+                (BV, 64),
+                (1, 0),
             )
             tl.store(p_h2, b_h2.to(p_h2.dtype.element_ty), boundary_check=(0, 1))
         if K > 128:
             p_h3 = tl.make_block_ptr(
-                h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0)
+                h + i_t.to(tl.int64) * stride_h,
+                (V, K),
+                (K, 1),
+                (i_v * BV, 128),
+                (BV, 64),
+                (1, 0),
             )
             tl.store(p_h3, b_h3.to(p_h3.dtype.element_ty), boundary_check=(0, 1))
         if K > 192:
             p_h4 = tl.make_block_ptr(
-                h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0)
+                h + i_t.to(tl.int64) * stride_h,
+                (V, K),
+                (K, 1),
+                (i_v * BV, 192),
+                (BV, 64),
+                (1, 0),
             )
             tl.store(p_h4, b_h4.to(p_h4.dtype.element_ty), boundary_check=(0, 1))
 
@@ -182,16 +203,20 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
             )
             tl.store(p_v, b_v.to(p_v.dtype.element_ty), boundary_check=(0, 1))
 
-        last_idx = min((i_t + 1) * BT, T) - 1
+        last_idx = min((i_t.to(tl.int64) + 1) * BT, T) - 1
         if USE_G:
-            m_t = (i_t * BT + tl.arange(0, BT)) < T
+            m_t = (i_t.to(tl.int64) * BT + tl.arange(0, BT)) < T
             b_g_last = tl.load(g + bos * H + last_idx * H + i_h)
             p_g = tl.make_block_ptr(
                 g + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)
             )
             b_g = tl.load(p_g, boundary_check=(0,))
-            b_v = b_v * tl.where(m_t, exp(b_g_last - b_g), 0)[:, None]
-            b_g_last = exp(b_g_last)
+            if USE_EXP2:
+                b_v = b_v * tl.where(m_t, exp2(b_g_last - b_g), 0)[:, None]
+                b_g_last = exp2(b_g_last)
+            else:
+                b_v = b_v * tl.where(m_t, exp(b_g_last - b_g), 0)[:, None]
+                b_g_last = exp(b_g_last)
             b_h1 *= b_g_last
             if K > 64:
                 b_h2 *= b_g_last
@@ -207,7 +232,10 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
                 mask=(o_k1 < K),
                 other=0.0,
             )
-            b_h1 *= exp(b_gk_last1)[None, :]
+            if USE_EXP2:
+                b_h1 *= exp2(b_gk_last1)[None, :]
+            else:
+                b_h1 *= exp(b_gk_last1)[None, :]
             if K > 64:
                 o_k2 = 64 + o_k1
                 b_gk_last2 = tl.load(
@@ -215,7 +243,10 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
                     mask=(o_k2 < K),
                     other=0.0,
                 )
-                b_h2 *= exp(b_gk_last2)[None, :]
+                if USE_EXP2:
+                    b_h2 *= exp2(b_gk_last2)[None, :]
+                else:
+                    b_h2 *= exp(b_gk_last2)[None, :]
             if K > 128:
                 o_k3 = 128 + o_k1
                 b_gk_last3 = tl.load(
@@ -223,7 +254,10 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
                     mask=(o_k3 < K),
                     other=0.0,
                 )
-                b_h3 *= exp(b_gk_last3)[None, :]
+                if USE_EXP2:
+                    b_h3 *= exp2(b_gk_last3)[None, :]
+                else:
+                    b_h3 *= exp(b_gk_last3)[None, :]
             if K > 192:
                 o_k4 = 192 + o_k1
                 b_gk_last4 = tl.load(
@@ -231,7 +265,10 @@ def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
                     mask=(o_k4 < K),
                     other=0.0,
                 )
-                b_h4 *= exp(b_gk_last4)[None, :]
+                if USE_EXP2:
+                    b_h4 *= exp2(b_gk_last4)[None, :]
+                else:
+                    b_h4 *= exp(b_gk_last4)[None, :]
         b_v = b_v.to(k.dtype.element_ty)
 
         p_k = tl.make_block_ptr(
@@ -286,9 +323,12 @@ def chunk_gated_delta_rule_fwd_h(
     gk: torch.Tensor | None = None,
     initial_state: torch.Tensor | None = None,
     output_final_state: bool = False,
-    chunk_size: int = 64,  # SY: remove this argument and force chunk size 64?
+    chunk_size: int = FLA_CHUNK_SIZE,
     save_new_value: bool = True,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
+    chunk_offsets: torch.Tensor | None = None,
+    use_exp2: bool = False,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     # This kernel is slightly different from fla to support Q/K with different head numbers.
     # In fla, Q/K always have the same head number, so Hg is always equal to H.
@@ -296,20 +336,15 @@ def chunk_gated_delta_rule_fwd_h(
     H = u.shape[-2]
     BT = chunk_size
 
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, chunk_size)
-        if cu_seqlens is not None
-        else None
-    )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, chunk_size)
     # N: the actual number of sequences in the batch with either equal or variable lengths
     if cu_seqlens is None:
         N, NT, chunk_offsets = B, triton.cdiv(T, BT), None
     else:
-        N, NT, chunk_offsets = (
-            len(cu_seqlens) - 1,
-            len(chunk_indices),
-            prepare_chunk_offsets(cu_seqlens, BT),
-        )
+        N, NT = len(cu_seqlens) - 1, len(chunk_indices)
+        if chunk_offsets is None:
+            chunk_offsets = prepare_chunk_offsets(cu_seqlens, BT)
     assert K <= 256, "current kernel does not support head dimension larger than 256."
 
     h = k.new_empty(B, NT, H, V, K)
@@ -340,5 +375,6 @@ def grid(meta):
         K=K,
         V=V,
         BT=BT,
+        USE_EXP2=use_exp2,
     )
     return h, v_new, final_state
diff --git a/vllm/model_executor/layers/fla/ops/chunk_o.py b/vllm/model_executor/layers/fla/ops/chunk_o.py
index 130781276259..0c323b8ce215 100644
--- a/vllm/model_executor/layers/fla/ops/chunk_o.py
+++ b/vllm/model_executor/layers/fla/ops/chunk_o.py
@@ -16,7 +16,7 @@
 
 from .index import prepare_chunk_indices
 from .op import exp
-from .utils import FLA_GDN_FIX_BT, check_shared_mem, is_nvidia_hopper
+from .utils import FLA_CHUNK_SIZE, check_shared_mem, is_nvidia_hopper
 
 BKV_LIST = [64, 128] if check_shared_mem() else [32, 64]
 NUM_WARPS = [2, 4] if is_nvidia_hopper else [2, 4, 8]
@@ -146,19 +146,26 @@ def chunk_fwd_o(
     g: torch.Tensor | None = None,  # cumsum of log decay
     scale: float | None = None,
     cu_seqlens: torch.Tensor | None = None,
-    chunk_size: int = 64,
+    chunk_indices: torch.Tensor | None = None,
+    chunk_size: int = FLA_CHUNK_SIZE,
+    core_attn_out: torch.Tensor | None = None,
 ) -> torch.Tensor:
     B, T, Hg, K, V = *q.shape, v.shape[-1]
     H = v.shape[-2]
-    BT = 64 if FLA_GDN_FIX_BT else min(chunk_size, max(16, triton.next_power_of_2(T)))
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
-    )
+    BT = chunk_size
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, BT)
     NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
     if scale is None:
         scale = k.shape[-1] ** -0.5
 
-    o = torch.empty_like(v)
+    if core_attn_out is not None:
+        assert core_attn_out.numel() >= v.numel(), (
+            f"core_attn_out too small: {core_attn_out.numel()} < {v.numel()}"
+        )
+        o = core_attn_out[: v.numel()].view(*v.shape)
+    else:
+        o = torch.empty_like(v)
 
     def grid(meta):
         return (triton.cdiv(V, meta["BV"]), NT, B * H)
diff --git a/vllm/model_executor/layers/fla/ops/chunk_scaled_dot_kkt.py b/vllm/model_executor/layers/fla/ops/chunk_scaled_dot_kkt.py
index 31bd489ebd87..a715504da6b7 100644
--- a/vllm/model_executor/layers/fla/ops/chunk_scaled_dot_kkt.py
+++ b/vllm/model_executor/layers/fla/ops/chunk_scaled_dot_kkt.py
@@ -14,6 +14,7 @@
 
 from .index import prepare_chunk_indices
 from .op import exp
+from .utils import FLA_CHUNK_SIZE
 
 
 @triton.heuristics(
@@ -82,7 +83,7 @@ def chunk_scaled_dot_kkt_fwd_kernel(
         )
         b_k = tl.load(p_k, boundary_check=(0, 1))
         b_kb = b_k * b_beta[:, None]
-        b_A += tl.dot(b_kb.to(b_k.dtype), tl.trans(b_k))
+        b_A += tl.dot(b_kb, tl.trans(b_k).to(b_kb.dtype))
 
     if USE_G:
         p_g = tl.make_block_ptr(g + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,))
@@ -103,7 +104,8 @@ def chunk_scaled_dot_kkt_fwd(
     g: torch.Tensor | None = None,
     beta: torch.Tensor | None = None,
     cu_seqlens: torch.Tensor | None = None,
-    chunk_size: int = 64,
+    chunk_indices: torch.Tensor | None = None,
+    chunk_size: int = FLA_CHUNK_SIZE,
     output_dtype: torch.dtype = torch.float32,
 ) -> torch.Tensor:
     r"""
@@ -119,6 +121,9 @@ def chunk_scaled_dot_kkt_fwd(
         cu_seqlens (torch.Tensor):
             The cumulative sequence lengths of the input tensor.
             Default: None
+        chunk_indices (torch.Tensor):
+            Pre-computed chunk indices. If None and cu_seqlens is provided,
+            computed internally. Default: None
         chunk_size (int):
             The chunk size. Default: 64.
         output_dtype (torch.dtype):
@@ -132,9 +137,8 @@ def chunk_scaled_dot_kkt_fwd(
     B, T, Hg, K = k.shape
     H = beta.shape[-1]
     BT = chunk_size
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
-    )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, BT)
     NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
 
     A = torch.empty(B, T, H, BT, device=k.device, dtype=output_dtype)
diff --git a/vllm/model_executor/layers/fla/ops/cumsum.py b/vllm/model_executor/layers/fla/ops/cumsum.py
index 99b41794796d..b0820104b1a1 100644
--- a/vllm/model_executor/layers/fla/ops/cumsum.py
+++ b/vllm/model_executor/layers/fla/ops/cumsum.py
@@ -7,7 +7,6 @@
 # the following copyright notice:
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 # ruff: noqa: E501
-import warnings
 
 import torch
 
@@ -163,6 +162,7 @@ def chunk_local_cumsum_scalar(
     chunk_size: int,
     reverse: bool = False,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
     head_first: bool = False,
     output_dtype: torch.dtype | None = torch.float,
 ) -> torch.Tensor:
@@ -173,10 +173,9 @@ def chunk_local_cumsum_scalar(
     assert chunk_size == 2 ** (chunk_size.bit_length() - 1), (
         "chunk_size must be a power of 2"
     )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, chunk_size)
     BT = chunk_size
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
-    )
     NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
     g_org, g = g, torch.empty_like(g, dtype=output_dtype or g.dtype)
     grid = (NT, B * H)
@@ -200,6 +199,7 @@ def chunk_local_cumsum_vector(
     chunk_size: int,
     reverse: bool = False,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
     head_first: bool = False,
     output_dtype: torch.dtype | None = torch.float,
 ) -> torch.Tensor:
@@ -207,16 +207,13 @@ def chunk_local_cumsum_vector(
         B, H, T, S = g.shape
     else:
         B, T, H, S = g.shape
-    BT = chunk_size
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, chunk_size)
-        if cu_seqlens is not None
-        else None
-    )
-    NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
     assert chunk_size == 2 ** (chunk_size.bit_length() - 1), (
         "chunk_size must be a power of 2"
     )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, chunk_size)
+    BT = chunk_size
+    NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
 
     g_org, g = g, torch.empty_like(g, dtype=output_dtype or g.dtype)
 
@@ -248,29 +245,34 @@ def chunk_local_cumsum(
     chunk_size: int,
     reverse: bool = False,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
     head_first: bool = False,
     output_dtype: torch.dtype | None = torch.float,
     **kwargs,
 ) -> torch.Tensor:
-    if not head_first and g.shape[1] < g.shape[2]:
-        warnings.warn(
-            f"Input tensor shape suggests potential format mismatch: seq_len ({g.shape[1]}) < num_heads ({g.shape[2]}). "
-            "This may indicate the inputs were passed in head-first format [B, H, T, ...] "
-            "when head_first=False was specified. "
-            "Please verify your input tensor format matches the expected shape [B, T, H, ...].",
-            stacklevel=2,
-        )
     if cu_seqlens is not None:
         assert g.shape[0] == 1, (
             "Only batch size 1 is supported when cu_seqlens are provided"
         )
     if len(g.shape) == 3:
         return chunk_local_cumsum_scalar(
-            g, chunk_size, reverse, cu_seqlens, head_first, output_dtype
+            g,
+            chunk_size,
+            reverse,
+            cu_seqlens,
+            chunk_indices,
+            head_first,
+            output_dtype,
         )
     elif len(g.shape) == 4:
         return chunk_local_cumsum_vector(
-            g, chunk_size, reverse, cu_seqlens, head_first, output_dtype
+            g,
+            chunk_size,
+            reverse,
+            cu_seqlens,
+            chunk_indices,
+            head_first,
+            output_dtype,
         )
     else:
         raise ValueError(
diff --git a/vllm/model_executor/layers/fla/ops/fused_gdn_prefill_post_conv.py b/vllm/model_executor/layers/fla/ops/fused_gdn_prefill_post_conv.py
new file mode 100644
index 000000000000..4807c78e7b1a
--- /dev/null
+++ b/vllm/model_executor/layers/fla/ops/fused_gdn_prefill_post_conv.py
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Fused post-conv1d preparation for GDN prefill.
+
+Replaces the chain:
+    split → rearrange → contiguous * 3 → l2norm * 2 → gating
+with a **single Triton kernel** that reads the conv'd mixed_qkv output
+and writes directly to q/k/v/g/beta in the target contiguous layout.
+
+"""
+
+from __future__ import annotations
+
+import torch
+
+from vllm.triton_utils import tl, triton
+
+
+@triton.jit
+def _fused_post_conv_kernel(
+    # ---- inputs ----
+    mixed_qkv_ptr,  # [L, qkv_dim] conv'd output (contiguous)
+    a_ptr,  # [L, HV]
+    b_ptr,  # [L, HV]
+    # ---- params ----
+    A_log_ptr,  # [HV]
+    dt_bias_ptr,  # [HV]
+    # ---- outputs ----
+    q_ptr,  # [L, H, K] contiguous
+    k_ptr,  # [L, H, K] contiguous
+    v_ptr,  # [L, HV, V] contiguous
+    g_ptr,  # [L, HV] float32
+    beta_ptr,  # [L, HV] float32
+    # ---- strides ----
+    stride_x_tok,  # qkv_dim
+    stride_a_tok,  # HV
+    stride_b_tok,  # HV
+    stride_q_tok,  # H * K
+    stride_k_tok,  # H * K
+    stride_v_tok,  # HV * V
+    # ---- dims ----
+    L,
+    H: tl.constexpr,
+    HV: tl.constexpr,
+    K: tl.constexpr,
+    V: tl.constexpr,
+    APPLY_L2NORM: tl.constexpr,
+    L2NORM_EPS: tl.constexpr,
+    OUTPUT_G_EXP: tl.constexpr,
+    SOFTPLUS_THRESHOLD: tl.constexpr,
+    BLOCK_T: tl.constexpr,
+    BK: tl.constexpr,
+    BV: tl.constexpr,
+):
+    """Single fused kernel for post-conv1d preparation.
+
+    Grid: (ceil(L, BLOCK_T), H + HV)
+      - program_id(1) in [0, H):    Q/K head processing + l2norm
+      - program_id(1) in [H, H+HV): V head processing + gating
+    """
+    i_tb = tl.program_id(0)
+    i_head = tl.program_id(1)
+
+    HK: tl.constexpr = H * K
+
+    offs_t = i_tb * BLOCK_T + tl.arange(0, BLOCK_T)  # [BLOCK_T]
+    mask_t = offs_t < L
+
+    if i_head < H:
+        # ============ Q/K head processing ============
+        i_h = i_head
+        offs_k = tl.arange(0, BK)  # [BK]
+        mask_k = offs_k < K
+        mask_2d = mask_t[:, None] & mask_k[None, :]  # [BLOCK_T, BK]
+
+        # Load Q features: mixed_qkv[t, i_h*K + k]
+        q_offsets = offs_t[:, None] * stride_x_tok + i_h * K + offs_k[None, :]
+        q_f32 = tl.load(mixed_qkv_ptr + q_offsets, mask=mask_2d, other=0).to(tl.float32)
+
+        # Load K features: mixed_qkv[t, HK + i_h*K + k]
+        k_offsets = offs_t[:, None] * stride_x_tok + HK + i_h * K + offs_k[None, :]
+        k_f32 = tl.load(mixed_qkv_ptr + k_offsets, mask=mask_2d, other=0).to(tl.float32)
+
+        if APPLY_L2NORM:
+            q_sq_sum = tl.sum(q_f32 * q_f32, axis=1)  # [BLOCK_T]
+            q_inv = 1.0 / tl.sqrt(q_sq_sum + L2NORM_EPS)
+            q_f32 = q_f32 * q_inv[:, None]
+
+            k_sq_sum = tl.sum(k_f32 * k_f32, axis=1)
+            k_inv = 1.0 / tl.sqrt(k_sq_sum + L2NORM_EPS)
+            k_f32 = k_f32 * k_inv[:, None]
+
+        # Store Q
+        q_out = offs_t[:, None] * stride_q_tok + i_h * K + offs_k[None, :]
+        tl.store(
+            q_ptr + q_out,
+            q_f32.to(q_ptr.dtype.element_ty),
+            mask=mask_2d,
+        )
+
+        # Store K
+        k_out = offs_t[:, None] * stride_k_tok + i_h * K + offs_k[None, :]
+        tl.store(
+            k_ptr + k_out,
+            k_f32.to(k_ptr.dtype.element_ty),
+            mask=mask_2d,
+        )
+    else:
+        # ============ V head + gating processing ============
+        i_hv = i_head - H
+        offs_v = tl.arange(0, BV)  # [BV]
+        mask_v = offs_v < V
+        mask_2d = mask_t[:, None] & mask_v[None, :]  # [BLOCK_T, BV]
+
+        V_OFFSET: tl.constexpr = 2 * H * K
+
+        # Load V features: mixed_qkv[t, 2*H*K + i_hv*V + v]
+        v_offsets = (
+            offs_t[:, None] * stride_x_tok + V_OFFSET + i_hv * V + offs_v[None, :]
+        )
+        v_vals = tl.load(mixed_qkv_ptr + v_offsets, mask=mask_2d, other=0)
+
+        # Store V
+        v_out = offs_t[:, None] * stride_v_tok + i_hv * V + offs_v[None, :]
+        tl.store(v_ptr + v_out, v_vals, mask=mask_2d)
+
+        # Gating: one scalar per (token, v-head)
+        A_log_val = tl.load(A_log_ptr + i_hv).to(tl.float32)
+        dt_bias_val = tl.load(dt_bias_ptr + i_hv).to(tl.float32)
+
+        a_offsets = offs_t * stride_a_tok + i_hv
+        b_offsets = offs_t * stride_b_tok + i_hv
+        a_vals = tl.load(a_ptr + a_offsets, mask=mask_t, other=0).to(tl.float32)
+        b_vals = tl.load(b_ptr + b_offsets, mask=mask_t, other=0).to(tl.float32)
+
+        # g = -exp(A_log) * softplus(a + dt_bias)
+        x = a_vals + dt_bias_val
+        sp = tl.where(x > 0, x + tl.log(1.0 + tl.exp(-x)), tl.log(1.0 + tl.exp(x)))
+        sp = tl.where(x <= SOFTPLUS_THRESHOLD, sp, x)
+        g_vals = -tl.exp(A_log_val) * sp
+
+        if OUTPUT_G_EXP:
+            g_vals = tl.exp(g_vals)
+
+        beta_vals = tl.sigmoid(b_vals)
+
+        gb_offsets = offs_t * HV + i_hv
+        tl.store(g_ptr + gb_offsets, g_vals, mask=mask_t)
+        tl.store(beta_ptr + gb_offsets, beta_vals, mask=mask_t)
+
+
+def fused_post_conv_prep(
+    conv_output: torch.Tensor,  # [L, qkv_dim] conv'd mixed_qkv
+    a: torch.Tensor,  # [L, HV]
+    b: torch.Tensor,  # [L, HV]
+    A_log: torch.Tensor,  # [HV]
+    dt_bias: torch.Tensor,  # [HV]
+    num_k_heads: int,
+    head_k_dim: int,
+    head_v_dim: int,
+    apply_l2norm: bool = True,
+    output_g_exp: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Fused post-conv1d prep: split + l2norm + gating in one kernel.
+
+    Args:
+        conv_output: [L, qkv_dim] contiguous conv'd mixed_qkv
+        a: [L, HV] gating input
+        b: [L, HV] gating input
+        A_log: [HV] log decay parameter
+        dt_bias: [HV] dt bias parameter
+        num_k_heads: number of K heads (H)
+        head_k_dim: dimension per K head (K)
+        head_v_dim: dimension per V head (V)
+        apply_l2norm: whether to L2-normalize q and k
+        output_g_exp: if True, output exp(g) instead of g (for FlashInfer)
+
+    Returns:
+        q: [L, H, K] contiguous, optionally l2-normalized
+        k: [L, H, K] contiguous, optionally l2-normalized
+        v: [L, HV, V] contiguous
+        g: [L, HV] float32
+        beta: [L, HV] float32
+    """
+    L = conv_output.shape[0]
+    qkv_dim = conv_output.shape[1]
+    H = num_k_heads
+    K = head_k_dim
+    V = head_v_dim
+    HV = A_log.shape[0]
+    dtype = conv_output.dtype
+    device = conv_output.device
+
+    assert qkv_dim == 2 * H * K + HV * V, (
+        f"qkv_dim={qkv_dim} != 2*H*K + HV*V = {2 * H * K + HV * V}"
+    )
+
+    # Allocate outputs in target contiguous layout
+    q = torch.empty(L, H, K, dtype=dtype, device=device)
+    k = torch.empty(L, H, K, dtype=dtype, device=device)
+    v = torch.empty(L, HV, V, dtype=dtype, device=device)
+    g = torch.empty(L, HV, dtype=torch.float32, device=device)
+    beta = torch.empty(L, HV, dtype=torch.float32, device=device)
+
+    if L == 0:
+        return q, k, v, g, beta
+
+    # ---- Kernel config ----
+    BK = triton.next_power_of_2(K)
+    BV = triton.next_power_of_2(V)
+    BLOCK_T = 16  # tokens per block
+
+    # Single kernel: blocks [0,H) do Q/K, blocks [H, H+HV) do V+gating
+    grid = (triton.cdiv(L, BLOCK_T), H + HV)
+    _fused_post_conv_kernel[grid](
+        mixed_qkv_ptr=conv_output,
+        a_ptr=a,
+        b_ptr=b,
+        A_log_ptr=A_log,
+        dt_bias_ptr=dt_bias,
+        q_ptr=q,
+        k_ptr=k,
+        v_ptr=v,
+        g_ptr=g,
+        beta_ptr=beta,
+        stride_x_tok=conv_output.stride(0),
+        stride_a_tok=a.stride(0),
+        stride_b_tok=b.stride(0),
+        stride_q_tok=q.stride(0),
+        stride_k_tok=k.stride(0),
+        stride_v_tok=v.stride(0),
+        L=L,
+        H=H,
+        HV=HV,
+        K=K,
+        V=V,
+        APPLY_L2NORM=apply_l2norm,
+        L2NORM_EPS=1e-6,
+        OUTPUT_G_EXP=output_g_exp,
+        SOFTPLUS_THRESHOLD=20.0,
+        BLOCK_T=BLOCK_T,
+        BK=BK,
+        BV=BV,
+        num_warps=4,
+        num_stages=2,
+    )
+
+    return q, k, v, g, beta
diff --git a/vllm/model_executor/layers/fla/ops/fused_recurrent.py b/vllm/model_executor/layers/fla/ops/fused_recurrent.py
index 17b59b5bce71..920efa444178 100644
--- a/vllm/model_executor/layers/fla/ops/fused_recurrent.py
+++ b/vllm/model_executor/layers/fla/ops/fused_recurrent.py
@@ -106,12 +106,12 @@ def fused_recurrent_gated_delta_rule_fwd_kernel(
                 i_t = tl.load(num_accepted_tokens + i_n).to(tl.int64) - 1
             else:
                 i_t = 0
-            # Load state index and check for PAD_SLOT_ID (-1)
+            # Load state index and check for invalid entries
             state_idx = tl.load(ssm_state_indices + i_n * stride_indices_seq + i_t).to(
                 tl.int64
             )
-            # Skip if state index is invalid (PAD_SLOT_ID = -1)
-            if state_idx < 0:
+            # Skip if state index is invalid (NULL_BLOCK_ID=0)
+            if state_idx <= 0:
                 return
             p_h0 = h0 + state_idx * stride_init_state_token
         else:
@@ -150,12 +150,12 @@ def fused_recurrent_gated_delta_rule_fwd_kernel(
 
         # keep the states for multi-query tokens
         if INPLACE_FINAL_STATE:
-            # Load state index and check for PAD_SLOT_ID (-1)
+            # Load state index and check for invalid entries
             final_state_idx = tl.load(
                 ssm_state_indices + i_n * stride_indices_seq + i_t
             ).to(tl.int64)
-            # Only store if state index is valid (not PAD_SLOT_ID)
-            if final_state_idx >= 0:
+            # Only store if state index is valid (not NULL_BLOCK_ID=0)
+            if final_state_idx > 0:
                 p_ht = ht + final_state_idx * stride_final_state_token
                 p_ht = p_ht + i_hv * V * K + o_v[:, None] * K + o_k[None, :]
                 tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h)
@@ -292,7 +292,8 @@ def fused_recurrent_gated_delta_rule_packed_decode_kernel(
     state_idx = tl.load(ssm_state_indices + i_n * stride_indices_seq).to(tl.int64)
     p_o = o + (i_n * HV + i_hv) * V + o_v
 
-    if state_idx < 0:
+    # Skip if state index is invalid (NULL_BLOCK_ID=0)
+    if state_idx <= 0:
         zero = tl.zeros([BV], dtype=tl.float32).to(p_o.dtype.element_ty)
         tl.store(p_o, zero, mask=mask_v)
         return
diff --git a/vllm/model_executor/layers/fla/ops/fused_sigmoid_gating.py b/vllm/model_executor/layers/fla/ops/fused_sigmoid_gating.py
index 07ed185413f6..7e0c7e05cab1 100644
--- a/vllm/model_executor/layers/fla/ops/fused_sigmoid_gating.py
+++ b/vllm/model_executor/layers/fla/ops/fused_sigmoid_gating.py
@@ -106,12 +106,12 @@ def fused_sigmoid_gating_delta_rule_update_kernel(
                 i_t = tl.load(num_accepted_tokens + i_n).to(tl.int64) - 1
             else:
                 i_t = 0
-            # Load state index and check for PAD_SLOT_ID (-1)
+            # Load state index and check for invalid entries
             state_idx = tl.load(ssm_state_indices + i_n * stride_indices_seq + i_t).to(
                 tl.int64
             )
-            # Skip if state index is invalid (PAD_SLOT_ID = -1)
-            if state_idx < 0:
+            # Skip if state index is invalid (NULL_BLOCK_ID=0)
+            if state_idx <= 0:
                 return
             p_h0 = h0 + state_idx * stride_init_state_token
         else:
@@ -155,12 +155,12 @@ def fused_sigmoid_gating_delta_rule_update_kernel(
 
         # keep the states for multi-query tokens
         if INPLACE_FINAL_STATE:
-            # Load state index and check for PAD_SLOT_ID (-1)
+            # Load state index and check for invalid entries
             final_state_idx = tl.load(
                 ssm_state_indices + i_n * stride_indices_seq + i_t
             ).to(tl.int64)
-            # Only store if state index is valid (not PAD_SLOT_ID)
-            if final_state_idx >= 0:
+            # Only store if state index is valid (not NULL_BLOCK_ID=0)
+            if final_state_idx > 0:
                 p_ht = ht + final_state_idx * stride_final_state_token
                 p_ht = p_ht + i_hv * V * K + o_v[:, None] * K + o_k[None, :]
                 tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h)
diff --git a/vllm/model_executor/layers/fla/ops/kda.py b/vllm/model_executor/layers/fla/ops/kda.py
index b8c07d1dc896..5f1418fd24ec 100644
--- a/vllm/model_executor/layers/fla/ops/kda.py
+++ b/vllm/model_executor/layers/fla/ops/kda.py
@@ -14,16 +14,16 @@
 
 from vllm.model_executor.custom_op import CustomOp
 from vllm.triton_utils import tl, triton
-from vllm.utils.math_utils import cdiv, next_power_of_2
+from vllm.utils.math_utils import RCP_LN2, cdiv, next_power_of_2
 
 from .chunk_delta_h import chunk_gated_delta_rule_fwd_h
 from .cumsum import chunk_local_cumsum
 from .fused_recurrent import fused_recurrent_gated_delta_rule_fwd_kernel
 from .index import prepare_chunk_indices
 from .l2norm import l2norm_fwd
-from .op import exp, log
+from .op import exp2, log
 from .solve_tril import solve_tril
-from .utils import is_amd
+from .utils import FLA_CHUNK_SIZE, is_amd
 
 BT_LIST_AUTOTUNE = [32, 64, 128]
 NUM_WARPS_AUTOTUNE = [2, 4, 8, 16] if is_amd else [4, 8, 16, 32]
@@ -594,16 +594,16 @@ def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_inter(
         b_gn = tl.load(g + (i_t * BT + i_i * BC) * H * K + o_k, mask=m_k, other=0)
         # [BC, BK]
         b_g = tl.load(p_g, boundary_check=(0, 1))
-        b_k = tl.load(p_k, boundary_check=(0, 1)) * exp(b_g - b_gn[None, :])
+        b_k = tl.load(p_k, boundary_check=(0, 1)) * exp2(b_g - b_gn[None, :])
         # [BK, BC]
         b_gk = tl.load(p_gk, boundary_check=(0, 1))
         b_kt = tl.load(b_kt, boundary_check=(0, 1))
         # [BC, BC]
-        b_ktg = b_kt * exp(b_gn[:, None] - b_gk)
+        b_ktg = b_kt * exp2(b_gn[:, None] - b_gk)
         b_A += tl.dot(b_k, b_ktg)
 
         b_q = tl.load(p_q, boundary_check=(0, 1))
-        b_qg = b_q * exp(b_g - b_gn[None, :]) * scale
+        b_qg = b_q * exp2(b_g - b_gn[None, :]) * scale
         b_Aqk += tl.dot(b_qg, b_ktg)
 
     b_A *= b_b[:, None]
@@ -703,7 +703,7 @@ def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_intra(
     for j in range(0, min(BC, T - i_t * BT - i_i * BC)):
         b_kt = tl.load(p_kt, mask=m_k, other=0).to(tl.float32)
         b_gk = tl.load(p_gk, mask=m_k, other=0).to(tl.float32)
-        b_ktg = b_kt[None, :] * exp(b_g - b_gk[None, :])
+        b_ktg = b_kt[None, :] * exp2(b_g - b_gk[None, :])
         b_A = tl.sum(b_k * b_ktg, 1)
         b_A = tl.where(o_i > j, b_A, 0.0)
         b_Aqk = tl.sum(b_q * b_ktg, 1)
@@ -721,7 +721,7 @@ def chunk_kda_scaled_dot_kkt_fwd(
     beta: torch.Tensor | None = None,
     scale: float | None = None,
     cu_seqlens: torch.Tensor | None = None,
-    chunk_size: int = 64,
+    chunk_size: int = FLA_CHUNK_SIZE,
     output_dtype: torch.dtype = torch.float32,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     r"""
@@ -912,7 +912,7 @@ def recompute_w_u_fwd_kernel(
             (1, 0),
         )
         b_gk = tl.load(p_gk, boundary_check=(0, 1))
-        b_kb *= exp(b_gk)
+        b_kb *= exp2(b_gk)
         if STORE_QG:
             p_q = tl.make_block_ptr(
                 q + (bos * H + i_h) * K,
@@ -931,7 +931,7 @@ def recompute_w_u_fwd_kernel(
                 (1, 0),
             )
             b_q = tl.load(p_q, boundary_check=(0, 1))
-            b_qg = b_q * exp(b_gk)
+            b_qg = b_q * exp2(b_gk)
             tl.store(p_qg, b_qg.to(p_qg.dtype.element_ty), boundary_check=(0, 1))
         if STORE_KG:
             last_idx = min(i_t * BT + BT, T) - 1
@@ -941,7 +941,7 @@ def recompute_w_u_fwd_kernel(
             b_gn = tl.load(
                 gk + ((bos + last_idx) * H + i_h) * K + o_k, mask=m_k, other=0.0
             )
-            b_kg = b_k * exp(b_gn - b_gk)
+            b_kg = b_k * exp2(b_gn - b_gk)
 
             p_kg = tl.make_block_ptr(
                 kg + (bos * H + i_h) * K,
@@ -1076,10 +1076,10 @@ def chunk_gla_fwd_kernel_o(
         )
         p_h = tl.make_block_ptr(
             h + (i_tg * H + i_h) * K * V,
-            (K, V),
-            (V, 1),
-            (i_k * BK, i_v * BV),
-            (BK, BV),
+            (V, K),
+            (K, 1),
+            (i_v * BV, i_k * BK),
+            (BV, BK),
             (1, 0),
         )
 
@@ -1089,13 +1089,12 @@ def chunk_gla_fwd_kernel_o(
         # [BT, BK]
         b_g = tl.load(p_g, boundary_check=(0, 1))
         # [BT, BK]
-        b_qg = (b_q * exp(b_g)).to(b_q.dtype)
-        # [BK, BV]
+        b_qg = (b_q * exp2(b_g)).to(b_q.dtype)
+        # [BV, BK]
         b_h = tl.load(p_h, boundary_check=(0, 1))
-        # works but dkw, owing to divine benevolence
         # [BT, BV]
         if i_k >= 0:
-            b_o += tl.dot(b_qg, b_h.to(b_qg.dtype))
+            b_o += tl.dot(b_qg, tl.trans(b_h).to(b_qg.dtype))
     p_v = tl.make_block_ptr(
         v + (bos * H + i_h) * V,
         (T, V),
@@ -1178,8 +1177,11 @@ def chunk_kda_fwd(
     output_final_state: bool,
     cu_seqlens: torch.Tensor | None = None,
 ):
-    chunk_size = 64
+    chunk_size = FLA_CHUNK_SIZE
     g = chunk_local_cumsum(g, chunk_size=chunk_size, cu_seqlens=cu_seqlens)
+    # KDA evaluates cumulative gate decays with exp2. Convert from natural-log
+    # space so exp(x) is preserved as exp2(x / ln(2)).
+    g = g * RCP_LN2
     # the intra Aqk is kept in fp32
     # the computation has very marginal effect on the entire throughput
     A, Aqk = chunk_kda_scaled_dot_kkt_fwd(
@@ -1189,6 +1191,7 @@ def chunk_kda_fwd(
         beta=beta,
         scale=scale,
         cu_seqlens=cu_seqlens,
+        chunk_size=chunk_size,
         output_dtype=torch.float32,
     )
     A = solve_tril(A=A, cu_seqlens=cu_seqlens, output_dtype=k.dtype)
@@ -1209,6 +1212,7 @@ def chunk_kda_fwd(
         initial_state=initial_state,
         output_final_state=output_final_state,
         cu_seqlens=cu_seqlens,
+        use_exp2=True,
     )
     del w, u, kg
     o = chunk_gla_fwd_o_gk(
diff --git a/vllm/model_executor/layers/fla/ops/op.py b/vllm/model_executor/layers/fla/ops/op.py
index a91975c8e567..777ca585f35d 100644
--- a/vllm/model_executor/layers/fla/ops/op.py
+++ b/vllm/model_executor/layers/fla/ops/op.py
@@ -15,10 +15,12 @@
 
 if os.environ.get("FLA_USE_FAST_OPS", "0") == "1":
     exp = tldevice.fast_expf
+    exp2 = tl.exp2
     log = tldevice.fast_logf
     log2 = tldevice.fast_log2f
 else:
     exp = tl.exp
+    exp2 = tl.exp2
     log = tl.log
     log2 = tl.log2
 
diff --git a/vllm/model_executor/layers/fla/ops/solve_tril.py b/vllm/model_executor/layers/fla/ops/solve_tril.py
index da85aab19207..8d3811ca4c17 100644
--- a/vllm/model_executor/layers/fla/ops/solve_tril.py
+++ b/vllm/model_executor/layers/fla/ops/solve_tril.py
@@ -507,6 +507,7 @@ def merge_16x16_to_64x64_inverse_kernel(
 def solve_tril(
     A: torch.Tensor,
     cu_seqlens: torch.Tensor | None = None,
+    chunk_indices: torch.Tensor | None = None,
     output_dtype: torch.dtype = torch.float,
 ) -> torch.Tensor:
     """
@@ -518,6 +519,8 @@ def solve_tril(
             [B, T, H, BT], where BT should only be 16, 32, or 64.
         cu_seqlens (torch.Tensor):
             The cumulative sequence lengths of the input tensor. Default: `None`.
+        chunk_indices (torch.Tensor):
+            Pre-computed chunk indices. Default: `None`.
         output_dtype (torch.dtype):
             The dtype of the output tensor. Default: `torch.float`.
             If `None`, the output dtype will be the same as the input dtype.
@@ -529,9 +532,8 @@ def solve_tril(
     output_dtype = A.dtype if output_dtype is None else output_dtype
 
     B, T, H, BT = A.shape
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
-    )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, BT)
     NT = len(chunk_indices) if cu_seqlens is not None else triton.cdiv(T, BT)
 
     Ai = torch.zeros_like(A, dtype=output_dtype)
diff --git a/vllm/model_executor/layers/fla/ops/utils.py b/vllm/model_executor/layers/fla/ops/utils.py
index f0ec1f7a6c78..d8e7db934984 100644
--- a/vllm/model_executor/layers/fla/ops/utils.py
+++ b/vllm/model_executor/layers/fla/ops/utils.py
@@ -24,10 +24,12 @@
 
 COMPILER_MODE = os.getenv("FLA_COMPILER_MODE") == "1"
 FLA_CI_ENV = os.getenv("FLA_CI_ENV") == "1"
-FLA_GDN_FIX_BT = os.getenv("FLA_GDN_FIX_BT", "0") == "1"
 
 SUPPRESS_LEVEL = int(os.getenv("GDN_RECOMPUTE_SUPPRESS_LEVEL", "0"))
 
+# Default chunk size used across FLA triton kernels (kda, chunk, chunk_o, etc.)
+FLA_CHUNK_SIZE = 64
+
 
 def tensor_cache(fn: Callable[..., torch.Tensor]) -> Callable[..., torch.Tensor]:
     """
@@ -152,9 +154,13 @@ def _check_platform() -> Literal["nvidia", "amd", "intel", "musa"]:
 )
 use_cuda_graph = is_nvidia and os.environ.get("FLA_USE_CUDA_GRAPH", "0") == "1"
 is_gather_supported = hasattr(triton.language, "gather")
-is_tma_supported = (is_nvidia and torch.cuda.get_device_capability(0)[0] >= 9) and (
-    hasattr(triton.language, "_experimental_make_tensor_descriptor")
-    or hasattr(triton.language, "make_tensor_descriptor")
+is_tma_supported = (
+    is_nvidia_hopper
+    and os.getenv("FLA_USE_TMA", "0") == "1"
+    and (
+        hasattr(triton.language, "_experimental_make_tensor_descriptor")
+        or hasattr(triton.language, "make_tensor_descriptor")
+    )
 )
 
 
diff --git a/vllm/model_executor/layers/fla/ops/wy_fast.py b/vllm/model_executor/layers/fla/ops/wy_fast.py
index 6baa08ab4996..52d2b28195a8 100644
--- a/vllm/model_executor/layers/fla/ops/wy_fast.py
+++ b/vllm/model_executor/layers/fla/ops/wy_fast.py
@@ -123,14 +123,14 @@ def recompute_w_u_fwd(
     g_cumsum: torch.Tensor,
     A: torch.Tensor,
     cu_seqlens: torch.Tensor | None,
+    chunk_indices: torch.Tensor | None = None,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     B, T, Hg, K, V = *k.shape, v.shape[-1]
     H = v.shape[-2]
     BT = A.shape[-1]
 
-    chunk_indices = (
-        prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
-    )
+    if chunk_indices is None and cu_seqlens is not None:
+        chunk_indices = prepare_chunk_indices(cu_seqlens, BT)
     NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
     BK = 64
     BV = 64
diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py
index f56a2e63bf40..4f8627a97c6a 100644
--- a/vllm/model_executor/layers/fused_moe/__init__.py
+++ b/vllm/model_executor/layers/fused_moe/__init__.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from contextlib import contextmanager
-from typing import Any
+from typing import Any, TypeAlias
 
 from vllm.model_executor.layers.fused_moe.activation import (
     MoEActivation,
@@ -11,6 +11,8 @@
 )
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
     RoutingMethodType,
 )
 from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
@@ -19,6 +21,7 @@
 from vllm.model_executor.layers.fused_moe.layer import (
     FusedMoE,
     FusedMoeWeightScaleSupported,
+    fused_moe_make_expert_params_mapping,
 )
 from vllm.model_executor.layers.fused_moe.modular_kernel import (
     FusedMoEActivationFormat,
@@ -29,18 +32,21 @@
     FusedMoERouter,
 )
 from vllm.model_executor.layers.fused_moe.router.gate_linear import GateLinear
-from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
+)
 from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
     UnquantizedFusedMoEMethod,
 )
-from vllm.model_executor.layers.fused_moe.zero_expert_fused_moe import (
-    ZeroExpertFusedMoE,
-)
 from vllm.triton_utils import HAS_TRITON
 
 _config: dict[str, Any] | None = None
 
 
+# Temporary alias for FusedMoE, eventually we be its own class.
+RoutedExperts: TypeAlias = FusedMoE
+
+
 @contextmanager
 def override_config(config):
     global _config
@@ -58,6 +64,8 @@ def get_config() -> dict[str, Any] | None:
     "FusedMoE",
     "FusedMoERouter",
     "FusedMoEConfig",
+    "FusedMoEQuantConfig",
+    "FusedMoEParallelConfig",
     "FusedMoEMethodBase",
     "MoEActivation",
     "UnquantizedFusedMoEMethod",
@@ -67,51 +75,56 @@ def get_config() -> dict[str, Any] | None:
     "FusedMoEPrepareAndFinalizeModular",
     "GateLinear",
     "RoutingMethodType",
-    "SharedFusedMoE",
-    "ZeroExpertFusedMoE",
+    "RoutedExperts",
+    "SharedExperts",
     "activation_without_mul",
     "apply_moe_activation",
+    "fused_moe_make_expert_params_mapping",
     "override_config",
     "get_config",
 ]
 
 if HAS_TRITON:
     # import to register the custom ops
-    from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
         BatchedDeepGemmExperts,
     )
-    from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
         CutlassBatchedExpertsFp8,
         CutlassExpertsFp8,
         CutlassExpertsW4A8Fp8,
-        cutlass_moe_w4a8_fp8,
     )
-    from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
-    from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
+        DeepGemmExperts,
+    )
+    from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
         BatchedTritonExperts,
     )
-    from vllm.model_executor.layers.fused_moe.fused_moe import (
+    from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+        AiterExperts,
+    )
+    from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
+        TritonOrDeepGemmExperts,
+    )
+    from vllm.model_executor.layers.fused_moe.experts.triton_moe import (
         TritonExperts,
         TritonWNA16Experts,
+    )
+    from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
+        XPUExperts,
+        XPUExpertsFp8,
+        XPUExpertsMXFp4,
+    )
+    from vllm.model_executor.layers.fused_moe.fused_moe import (
         fused_experts,
         get_config_file_name,
     )
-    from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
-        AiterExperts,
-    )
     from vllm.model_executor.layers.fused_moe.router.fused_topk_router import (
         fused_topk,
     )
     from vllm.model_executor.layers.fused_moe.router.grouped_topk_router import (
         GroupedTopk,
     )
-    from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
-        TritonOrDeepGemmExperts,
-    )
-    from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
-        XPUExperts,
-        XPUExpertsFp8,
-    )
 
     __all__ += [
         "AiterExperts",
@@ -119,7 +132,6 @@ def get_config() -> dict[str, Any] | None:
         "fused_experts",
         "get_config_file_name",
         "GroupedTopk",
-        "cutlass_moe_w4a8_fp8",
         "CutlassExpertsFp8",
         "CutlassBatchedExpertsFp8",
         "CutlassExpertsW4A8Fp8",
@@ -131,6 +143,7 @@ def get_config() -> dict[str, Any] | None:
         "TritonOrDeepGemmExperts",
         "XPUExperts",
         "XPUExpertsFp8",
+        "XPUExpertsMXFp4",
     ]
 else:
     # Some model classes directly use the custom ops. Add placeholders
diff --git a/vllm/model_executor/layers/fused_moe/activation.py b/vllm/model_executor/layers/fused_moe/activation.py
index 3112b3054fcd..b2e67e6220a9 100644
--- a/vllm/model_executor/layers/fused_moe/activation.py
+++ b/vllm/model_executor/layers/fused_moe/activation.py
@@ -15,6 +15,7 @@ class MoEActivation(Enum):
     # and produce output of shape [..., d]
     SILU = "silu"
     GELU = "gelu"
+    GELU_TANH = "gelu_tanh"
     RELU2 = "relu2"
     SWIGLUOAI = "swigluoai"
     SWIGLUSTEP = "swiglustep"
@@ -24,6 +25,7 @@ class MoEActivation(Enum):
     # NOTE: Non-gated activations require the "_no_mul" suffix to be present.
     SILU_NO_MUL = "silu_no_mul"
     GELU_NO_MUL = "gelu_no_mul"
+    GELU_TANH_NO_MUL = "gelu_tanh_no_mul"
     RELU2_NO_MUL = "relu2_no_mul"
 
     @property
@@ -53,6 +55,7 @@ def without_mul(self) -> "MoEActivation":
     @classmethod
     def from_str(cls, s: str) -> "MoEActivation":
         """Parse from string for backward compatibility."""
+        s = _STR_ALIASES.get(s, s)
         for member in cls:
             if member.value == s:
                 return member
@@ -61,20 +64,27 @@ def from_str(cls, s: str) -> "MoEActivation":
 
 
 # Module-level lookup tables used by MoEActivation functions.
+_STR_ALIASES: dict[str, str] = {
+    "gelu_pytorch_tanh": "gelu_tanh",
+}
+
 _CUSTOM_OP_NAMES: dict[MoEActivation, str] = {
     MoEActivation.SILU: "silu_and_mul",
     MoEActivation.GELU: "gelu_and_mul",
+    MoEActivation.GELU_TANH: "gelu_tanh_and_mul",
     MoEActivation.SWIGLUOAI: "swigluoai_and_mul",
     MoEActivation.SWIGLUSTEP: "swiglustep_and_mul",
     MoEActivation.RELU2: "relu2",
     MoEActivation.SILU_NO_MUL: "silu_and_mul",
     MoEActivation.GELU_NO_MUL: "gelu_and_mul",
+    MoEActivation.GELU_TANH_NO_MUL: "gelu_tanh_and_mul",
     MoEActivation.RELU2_NO_MUL: "relu2",
 }
 
 _WITHOUT_MUL: dict[MoEActivation, MoEActivation] = {
     MoEActivation.SILU: MoEActivation.SILU_NO_MUL,
     MoEActivation.GELU: MoEActivation.GELU_NO_MUL,
+    MoEActivation.GELU_TANH: MoEActivation.GELU_TANH_NO_MUL,
     MoEActivation.RELU2: MoEActivation.RELU2_NO_MUL,
 }
 
@@ -115,6 +125,8 @@ def apply_moe_activation(
         torch.ops._C.silu_and_mul(output, input)
     elif activation == MoEActivation.GELU:
         torch.ops._C.gelu_and_mul(output, input)
+    elif activation == MoEActivation.GELU_TANH:
+        torch.ops._C.gelu_tanh_and_mul(output, input)
     elif activation == MoEActivation.SWIGLUOAI:
         torch.ops._C.swigluoai_and_mul(output, input)
     elif activation == MoEActivation.SWIGLUSTEP:
@@ -127,6 +139,8 @@ def apply_moe_activation(
         output.copy_(F.silu(input))
     elif activation == MoEActivation.GELU_NO_MUL:
         output.copy_(F.gelu(input))
+    elif activation == MoEActivation.GELU_TANH_NO_MUL:
+        output.copy_(F.gelu(input, approximate="tanh"))
     elif activation == MoEActivation.RELU2_NO_MUL:
         F.relu(input, inplace=True)
         torch.square(input, out=output)
diff --git a/vllm/model_executor/layers/fused_moe/all2all_utils.py b/vllm/model_executor/layers/fused_moe/all2all_utils.py
index 44c9bb79e154..6d4822146437 100644
--- a/vllm/model_executor/layers/fused_moe/all2all_utils.py
+++ b/vllm/model_executor/layers/fused_moe/all2all_utils.py
@@ -41,14 +41,30 @@
             DeepEPLLPrepareAndFinalize,
         )
     if has_mori():
-        from .mori_prepare_finalize import MoriPrepareAndFinalize
+        from .prepare_finalize.mori import MoriPrepareAndFinalize
     if has_nixl_ep():
-        from .nixl_ep_prepare_finalize import (
+        from .prepare_finalize.nixl_ep import (
             NIXL_EP_QUANT_BLOCK_SHAPE,
             NixlEPPrepareAndFinalize,
         )
 
 
+def _get_ep_all2all_manager(eep_stage: bool = False) -> Any:
+    if eep_stage:
+        from vllm.distributed.elastic_ep.standby_state import get_standby_ep_group
+
+        ep_group = get_standby_ep_group()
+        assert ep_group is not None
+        device_communicator = ep_group.device_communicator
+    else:
+        device_communicator = get_ep_group().device_communicator
+
+    assert device_communicator is not None
+    all2all_manager = device_communicator.all2all_manager
+    assert all2all_manager is not None
+    return all2all_manager
+
+
 def maybe_roundup_layer_hidden_size(
     hidden_size: int,
     act_dtype: torch.dtype,
@@ -92,6 +108,7 @@ def maybe_make_prepare_finalize(
     routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
     allow_new_interface: bool = False,
     use_monolithic: bool = False,
+    eep_stage: bool = False,
 ) -> FusedMoEPrepareAndFinalize | None:
     # NOTE(rob): we are migrating each quant_method to hold the MK
     # in all cases. The allow_new_interface=False flag allow us to fall
@@ -117,18 +134,16 @@ def maybe_make_prepare_finalize(
                 "Detected DP deployment with no --enable-expert-parallel. "
                 "Falling back to AllGather+ReduceScatter dispatch/combine."
             )
+            all2all_manager = _get_ep_all2all_manager(eep_stage)
             return make_moe_prepare_and_finalize_naive_dp_ep(
                 is_sequence_parallel=moe.moe_parallel_config.is_sequence_parallel,
-                num_dispatchers=(
-                    get_ep_group().device_communicator.all2all_manager.world_size
-                ),
+                num_dispatchers=all2all_manager.world_size,
                 use_monolithic=use_monolithic,
             )
         else:
             return make_moe_prepare_and_finalize_no_dp_ep(use_monolithic)
 
-    all2all_manager = get_ep_group().device_communicator.all2all_manager
-    assert all2all_manager is not None
+    all2all_manager = _get_ep_all2all_manager(eep_stage)
 
     prepare_finalize: FusedMoEPrepareAndFinalize | None = None
 
@@ -186,16 +201,23 @@ def maybe_make_prepare_finalize(
         use_fp8_dispatch = (
             quant_config.is_per_act_token or quant_config.is_block_quantized
         )
-        # For PTPC (per token per channel) quant, the scale dim for each token is 1
-        # For 1x128 quant, the scale dim for each token is hidden_dim // 128
-        scale_dim = 1 if quant_config.is_per_act_token else moe.hidden_dim // 128
+        if use_fp8_dispatch:
+            # For PTPC (per token per channel) quant, scale dim is 1
+            # For 1x128 quant, scale dim is hidden_dim // 128
+            quant_dtype = quant_config.quant_dtype
+            scale_dim = 1 if quant_config.is_per_act_token else moe.hidden_dim // 128
+        else:
+            # Unquantized dispatch (e.g. AITER with defer_input_quant):
+            # dispatch raw BF16/FP16 data, no scales needed.
+            quant_dtype = moe.in_dtype
+            scale_dim = 0
         all_to_all_args = dict(
             rank=all2all_manager.rank,
             num_ep_ranks=all2all_manager.world_size,
-            quant_dtype=quant_config.quant_dtype,
+            quant_dtype=quant_dtype,
             token_hidden_size=moe.hidden_dim,
             scale_dim=scale_dim,
-            scale_type_size=torch.float32.itemsize,
+            scale_type_size=0 if scale_dim == 0 else torch.float32.itemsize,
             max_num_tokens_per_dp_rank=moe.max_num_tokens,
             input_dtype=moe.in_dtype,
             num_local_experts=moe.num_experts // all2all_manager.world_size,
@@ -221,12 +243,34 @@ def maybe_make_prepare_finalize(
         max_num_tokens = (
             get_current_vllm_config().scheduler_config.max_num_batched_tokens
         )
+        if quant_config.quant_dtype is None:
+            dispatch_dtype_bytes_per_elem = 2
+            dispatch_scale_bytes_per_token = 0
+        elif quant_config.quant_dtype == "nvfp4":
+            dispatch_dtype_bytes_per_elem = 0
+            dispatch_scale_bytes_per_token = moe.hidden_dim // 16
+        elif quant_config.quant_dtype == "mxfp8":
+            dispatch_dtype_bytes_per_elem = 1
+            align = quant_config.mx_alignment
+            if align > 0:
+                padded_k = ((moe.hidden_dim + align - 1) // align) * align
+            else:
+                padded_k = moe.hidden_dim
+            dispatch_scale_bytes_per_token = padded_k // 32
+        else:
+            raise NotImplementedError(
+                "flashinfer_nvlink_one_sided dispatch supports nvfp4, mxfp8, "
+                "and bf16 (quant_dtype=None) today; got "
+                f"quant_dtype={quant_config.quant_dtype!r}"
+            )
         prepare_finalize = FlashInferNVLinkOneSidedPrepareAndFinalize(
             max_num_tokens=max_num_tokens,
             top_k=moe.experts_per_token,
             num_experts=moe.num_experts,
             hidden_size=moe.hidden_dim,
             num_dispatchers=all2all_manager.world_size,
+            dispatch_dtype_bytes_per_elem=dispatch_dtype_bytes_per_elem,
+            dispatch_scale_bytes_per_token=dispatch_scale_bytes_per_token,
         )
 
     elif moe.use_ag_rs_all2all_kernels and allow_new_interface:
@@ -251,6 +295,7 @@ def maybe_make_prepare_finalize(
             num_ep_ranks=all2all_manager.world_size,
             num_global_experts=moe.num_experts,
             num_local_experts=moe.num_experts // all2all_manager.world_size,
+            stage=eep_stage,
         )
         handle = all2all_manager.get_handle(all_to_all_args)
 
diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py
index 5b58353927a4..eec7d4b87f5b 100644
--- a/vllm/model_executor/layers/fused_moe/config.py
+++ b/vllm/model_executor/layers/fused_moe/config.py
@@ -6,8 +6,8 @@
 
 import torch
 
-import vllm.envs as envs
-from vllm.config import ParallelConfig
+from vllm.config import ParallelConfig, SchedulerConfig
+from vllm.config.kernel import MoEBackend
 from vllm.distributed import get_dp_group, get_pcp_group, get_tensor_model_parallel_rank
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
@@ -113,12 +113,17 @@ class RoutingMethodType(IntEnum):
     RenormalizeNaive = (4,)
     # TopK: TopK (no softmax)
     TopK = (5,)
-    # Custom
-    Custom = (6,)
-    # Simulated
-    Simulated = (7,)
+    # SigmoidRenorm: Sigmoid -> TopK -> Renormalize (divide by sum of top-K)
+    SigmoidRenorm = (6,)
+    # MiniMax2: Sigmoid + Bias -> TopK -> ScaledSumNormalize
+    MiniMax2 = (7,)
     # Unspecified
-    Unspecified = 8.0
+    Unspecified = (8,)
+    # other routing types (not passed to FlashInfer kernels)
+    # Deepseek V4 -> sqrtsoftplus + Bias + Normalize
+    DeepseekV4 = (100,)
+    Custom = (101,)
+    Simulated = (102,)
 
 
 def get_routing_method_type(
@@ -128,21 +133,33 @@ def get_routing_method_type(
     num_expert_group: int | None,
     has_e_score_bias: bool,
 ) -> RoutingMethodType:
+    if scoring_func == "sqrtsoftplus":
+        # DeepSeek V4 uses sqrtsoftplus routing with optional routing bias
+        # and top-k renormalization.
+        if renormalize:
+            return RoutingMethodType.DeepseekV4
+        else:
+            return RoutingMethodType.Unspecified
+
     if has_e_score_bias:
         if (num_expert_group or 0) > 0 and scoring_func == "sigmoid":
             return RoutingMethodType.DeepSeekV3
+        elif scoring_func == "sigmoid":
+            return RoutingMethodType.MiniMax2
         else:
             return RoutingMethodType.Unspecified
 
     if scoring_func == "sigmoid":
         if top_k == 1:
             return RoutingMethodType.Llama4
+        elif renormalize:
+            return RoutingMethodType.SigmoidRenorm
         else:
             return RoutingMethodType.Unspecified
 
     if scoring_func == "softmax":
         if renormalize:
-            return RoutingMethodType.Renormalize
+            return RoutingMethodType.RenormalizeNaive
         else:
             return RoutingMethodType.Default
 
@@ -228,7 +245,16 @@ class FusedMoEQuantConfig:
     _a2: FusedMoEQuantDesc
     _w1: FusedMoEQuantDesc
     _w2: FusedMoEQuantDesc
-    is_nvfp4_scale_swizzled: bool = True
+    is_scale_swizzled: bool = True
+
+    # MXFP4-specific TRTLLM parameters for SwiGLU activation clamping.
+    # These correspond to gemm1_alpha, gemm1_beta, gemm1_clamp_limit
+    # in TrtLlmMxfp4ExpertsBase.
+    gemm1_alpha: float | None = None
+    gemm1_beta: float | None = None
+    gemm1_clamp_limit: float | None = None
+
+    mx_alignment: int = 0
 
     def __post_init__(self):
         assert not self.per_act_token_quant or self.block_shape is None, (
@@ -476,7 +502,10 @@ def make(
         w1_zp: torch.Tensor | None = None,
         w2_zp: torch.Tensor | None = None,
         weight_dtype: torch.dtype | str | None = None,
-        is_nvfp4_scale_swizzled: bool = True,
+        is_scale_swizzled: bool = True,
+        gemm1_alpha: float | None = None,
+        gemm1_beta: float | None = None,
+        gemm1_clamp_limit: float | None = None,
     ) -> "FusedMoEQuantConfig":
         """
         General builder function for a FusedMoEQuantConfig.
@@ -506,7 +535,12 @@ def make(
         - w2_bias: Optional biases for w1 (GPT OSS Triton).
         - w1_zp: Optional w1 zero points for int4/int8 quantization.
         - w2_zp: Optional w2 zero points for int4/int8 quantization.
-        - is_nvfp4_scale_swizzled: Whether to swizzle the nvfp4 scale swizzling.
+        - is_scale_swizzled: Whether the activation scale-factor layout is
+          swizzled. Pass through to the underlying quantization kernel for
+          dtypes that distinguish layouts (nvfp4, mxfp8). Defaults to True.
+        - gemm1_alpha: Optional MXFP4 TRTLLM SwiGLU alpha parameter.
+        - gemm1_beta: Optional MXFP4 TRTLLM SwiGLU beta parameter.
+        - gemm1_clamp_limit: Optional MXFP4 TRTLLM SwiGLU clamp limit.
         """
         assert not isinstance(quant_dtype, str) or quant_dtype in {
             "nvfp4",
@@ -539,7 +573,10 @@ def make(
             _w2=FusedMoEQuantDesc(
                 weight_dtype, w_shape, w2_scale, g2_alphas, w2_zp, w2_bias
             ),
-            is_nvfp4_scale_swizzled=is_nvfp4_scale_swizzled,
+            is_scale_swizzled=is_scale_swizzled,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=gemm1_clamp_limit,
         )
         assert quant_config.per_act_token_quant == per_act_token_quant
         assert quant_config.per_out_ch_quant == per_out_ch_quant
@@ -561,6 +598,7 @@ def fp8_w8a8_moe_quant_config(
     a2_gscale: torch.Tensor | None = None,
     g1_alphas: torch.Tensor | None = None,
     g2_alphas: torch.Tensor | None = None,
+    gemm1_clamp_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for fp8 activations and fp8 weights.
@@ -580,6 +618,7 @@ def fp8_w8a8_moe_quant_config(
         per_act_token_quant=per_act_token_quant,
         per_out_ch_quant=per_out_ch_quant,
         block_shape=block_shape,
+        gemm1_clamp_limit=gemm1_clamp_limit,
     )
 
 
@@ -650,6 +689,9 @@ def mxfp4_w4a16_moe_quant_config(
     w2_scale: Union[torch.Tensor, "PrecisionConfig"],
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
+    gemm1_alpha: float | None = None,
+    gemm1_beta: float | None = None,
+    gemm1_clamp_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for unquantized activations and mxfp4 weights.
@@ -659,6 +701,9 @@ def mxfp4_w4a16_moe_quant_config(
         _a2=FusedMoEQuantDesc(),
         _w1=FusedMoEQuantDesc("mxfp4", None, w1_scale, None, None, w1_bias),
         _w2=FusedMoEQuantDesc("mxfp4", None, w2_scale, None, None, w2_bias),
+        gemm1_alpha=gemm1_alpha,
+        gemm1_beta=gemm1_beta,
+        gemm1_clamp_limit=gemm1_clamp_limit,
     )
 
 
@@ -670,6 +715,11 @@ def mxfp4_mxfp8_moe_quant_config(
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
     block_shape: list[int] | None = None,
+    gemm1_alpha: float | None = None,
+    gemm1_beta: float | None = None,
+    gemm1_clamp_limit: float | None = None,
+    mx_alignment: int = 0,
+    is_scale_swizzled: bool = True,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for mxfp4 activations and mxfp4 weights.
@@ -679,6 +729,11 @@ def mxfp4_mxfp8_moe_quant_config(
         _a2=FusedMoEQuantDesc("mxfp8"),
         _w1=FusedMoEQuantDesc("mxfp4", None, w1_scale, None, None, w1_bias),
         _w2=FusedMoEQuantDesc("mxfp4", None, w2_scale, None, None, w2_bias),
+        gemm1_alpha=gemm1_alpha,
+        gemm1_beta=gemm1_beta,
+        gemm1_clamp_limit=gemm1_clamp_limit,
+        mx_alignment=mx_alignment,
+        is_scale_swizzled=is_scale_swizzled,
     )
 
 
@@ -690,6 +745,7 @@ def mxfp4_w4a8_moe_quant_config(
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
     block_shape: list[int] | None = None,
+    gemm1_clamp_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for fp8 activations and mxfp4 weights.
@@ -699,6 +755,7 @@ def mxfp4_w4a8_moe_quant_config(
         _a2=FusedMoEQuantDesc("fp8", None, a2_scale, None, None, None),
         _w1=FusedMoEQuantDesc("mxfp4", None, w1_scale, None, None, w1_bias),
         _w2=FusedMoEQuantDesc("mxfp4", None, w2_scale, None, None, w2_bias),
+        gemm1_clamp_limit=gemm1_clamp_limit,
     )
 
 
@@ -712,6 +769,9 @@ def ocp_mx_moe_quant_config(
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
     block_shape: list[int] | None = None,
+    gemm1_alpha: float | None = None,
+    gemm1_beta: float | None = None,
+    gemm1_clamp_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for mxfp4 activations and mxfp4 weights.
@@ -729,6 +789,9 @@ def ocp_mx_moe_quant_config(
         per_act_token_quant=False,
         per_out_ch_quant=False,
         block_shape=block_shape,
+        gemm1_alpha=gemm1_alpha,
+        gemm1_beta=gemm1_beta,
+        gemm1_clamp_limit=gemm1_clamp_limit,
     )
 
 
@@ -741,7 +804,8 @@ def nvfp4_moe_quant_config(
     w2_scale: torch.Tensor,
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
-    is_nvfp4_scale_swizzled: bool = True,
+    is_scale_swizzled: bool = True,
+    gemm1_clamp_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for mxfp4 activations and nvp4 weights.
@@ -759,7 +823,27 @@ def nvfp4_moe_quant_config(
         per_act_token_quant=False,
         per_out_ch_quant=False,
         block_shape=None,
-        is_nvfp4_scale_swizzled=is_nvfp4_scale_swizzled,
+        is_scale_swizzled=is_scale_swizzled,
+        gemm1_clamp_limit=gemm1_clamp_limit,
+    )
+
+
+def mxfp4_moe_quant_config(
+    w1_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+) -> FusedMoEQuantConfig:
+    """
+    Construct a quant config for MXFP4 x MXFP4 MoE.
+    MXFP4 uses block scaling only (E8M0 scales, 32-element groups), with no
+    separate alphas / global activation scales in this config.
+    """
+    return FusedMoEQuantConfig.make(
+        "mxfp4",
+        w1_scale=w1_scale,
+        w2_scale=w2_scale,
+        per_act_token_quant=False,
+        per_out_ch_quant=False,
+        block_shape=None,
     )
 
 
@@ -875,9 +959,14 @@ def awq_marlin_moe_quant_config(
     group_size: int,
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
+    a1_gscale: torch.Tensor | None = None,
+    a2_gscale: torch.Tensor | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Construct a quant config for awq marlin quantization.
+
+    a1_gscale / a2_gscale are optional global scales applied to activation
+    quantization scales when Marlin runs with 8-bit activations.
     """
     from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 
@@ -895,8 +984,8 @@ def awq_marlin_moe_quant_config(
         raise ValueError(f"Unsupported weight_bits: {weight_bits}")
 
     return FusedMoEQuantConfig(
-        _a1=FusedMoEQuantDesc(dtype=None, shape=a_shape),
-        _a2=FusedMoEQuantDesc(dtype=None, shape=a_shape),
+        _a1=FusedMoEQuantDesc(dtype=None, shape=a_shape, alpha_or_gscale=a1_gscale),
+        _a2=FusedMoEQuantDesc(dtype=None, shape=a_shape, alpha_or_gscale=a2_gscale),
         _w1=FusedMoEQuantDesc(weight_dtype, w_shape, w1_scale, None, w1_zp, w1_bias),
         _w2=FusedMoEQuantDesc(weight_dtype, w_shape, w2_scale, None, w2_zp, w2_bias),
     )
@@ -972,7 +1061,11 @@ def use_fi_nvl_one_sided_kernels(self):
 
     @property
     def use_batched_activation_format(self):
-        return self.use_deepep_ll_kernels
+        return self.use_deepep_ll_kernels or self.use_nixl_ep_kernels
+
+    @property
+    def needs_round_robin_routing_tables(self):
+        return self.use_deepep_ll_kernels or self.use_nixl_ep_kernels
 
     @property
     def use_ag_rs_all2all_kernels(self):
@@ -1174,12 +1267,17 @@ class FusedMoEConfig:
     # Defaults to intermediate_size_per_partition if not specified.
     intermediate_size_per_partition_unpadded: int | None = None
 
-    moe_backend: str = "auto"
-    max_num_tokens: int = envs.VLLM_MOE_DP_CHUNK_SIZE
+    moe_backend: MoEBackend = "auto"
+    max_num_tokens: int = SchedulerConfig.DEFAULT_MAX_NUM_BATCHED_TOKENS_FOR_BATCHED_DP
     has_bias: bool = False
     is_act_and_mul: bool = True
     is_lora_enabled: bool = False
 
+    # SwiGLU clamp limit. When set, backends that do not implement the clamp
+    # are filtered out by `FusedMoEExperts.is_supported_config` so the oracle
+    # cannot silently select one and drop the clamp.
+    swiglu_limit: float | None = None
+
     # This flag is used to disable the inplace optimization
     # in MoE kernels. If this flag is True then the kernel
     # should not be using inplace. If the flag is false, the
@@ -1275,3 +1373,7 @@ def use_ag_rs_all2all_kernels(self):
     @property
     def use_nixl_ep_kernels(self):
         return self.moe_parallel_config.use_nixl_ep_kernels
+
+    @property
+    def needs_round_robin_routing_tables(self):
+        return self.moe_parallel_config.needs_round_robin_routing_tables
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=2880,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=128,N=2880,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 000000000000..2d53aedbed48
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=128,N=2880,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,147 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 5
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 5
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..d7e503b36150
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,165 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=256,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..bcec61632e3e
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,147 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "48": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..705ca33d594b
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,147 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2
+    },
+    "8": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 2
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 2
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 2
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1536,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=64,N=1536,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..9c2ebaddd83f
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=64,N=1536,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Server_Edition,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,147 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 2
+    },
+    "4": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 5
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "256": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 3
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 3
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..a5541722d4cf
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=64,N=768,device_name=AMD_Radeon_R9700,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,165 @@
+{
+    "triton_version": "3.6.0",
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 2
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 1
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
index 72e9db514a8f..9192b6a9b7e4 100644
--- a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
@@ -7,7 +7,12 @@
 from torch.nn import functional as F
 
 from vllm import _custom_ops as ops
-from vllm._custom_ops import cpu_fused_moe, cpu_prepack_moe_weight
+from vllm._custom_ops import (
+    CPUQuantMethod,
+    cpu_fused_moe,
+    cpu_prepack_moe_weight,
+    fused_experts_cpu,
+)
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.quantization.utils.layer_utils import replace_parameter
@@ -34,12 +39,20 @@ def _swigluoai_forward_native(
     return gated_output
 
 
+def _gelu_and_mul(
+    x: torch.Tensor,
+) -> torch.Tensor:
+    d = x.shape[-1] // 2
+    return F.gelu(x[..., :d], approximate="none") * x[..., d:]
+
+
 # Map activation names to their native forward functions.
 # Uses static methods or standalone functions to avoid instantiating CustomOp
 # classes, which would call get_current_vllm_config() before config is set.
 _CPU_MOE_ACT_FN: dict[MoEActivation, Callable[[torch.Tensor], torch.Tensor]] = {
-    MoEActivation.SILU: SiluAndMul.forward_native,
+    MoEActivation.SILU: lambda x: SiluAndMul(compile_native=False).forward_native(x),
     MoEActivation.SWIGLUOAI: _swigluoai_forward_native,
+    MoEActivation.GELU: _gelu_and_mul,
 }
 
 
@@ -187,23 +200,25 @@ def __call__(
             e_score_correction_bias=e_score_correction_bias,
         )
 
-        torch.ops._C.fused_experts_cpu(
+        return fused_experts_cpu(
             x,
             layer.w13_weight,
             layer.w2_weight,
             topk_weights,
             topk_ids,
-            True,
-            False,
-            False,
-            None,
-            None,
-            None,
-            None,
-            None,
-            True,
+            False,  # inplace
+            CPUQuantMethod.UNQUANT,  # moe_comp_method
+            None,  # w1_scale
+            None,  # w2_scale
+            None,  # w1_zero
+            None,  # w2_zero
+            None,  # block_size
+            None,  # w1_bias
+            None,  # w2_bias
+            None,  # alpha
+            None,  # limit
+            True,  # is_vnni
         )
-        return x
 
 
 class CPUFusedMOE:
diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_utils.py b/vllm/model_executor/layers/fused_moe/deep_gemm_utils.py
index a2d267bd7490..df69fa328ca7 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_utils.py
+++ b/vllm/model_executor/layers/fused_moe/deep_gemm_utils.py
@@ -140,6 +140,8 @@ def _fwd_kernel_ep_scatter_2(
     offset_in_s = tl.arange(0, SCALE_HIDDEN_SIZE_PAD)
     mask_s = offset_in_s < SCALE_HIDDEN_SIZE
 
+    output_tensor_stride0 = output_tensor_stride0.to(tl.int64)
+
     for token_id in range(start_token_id, total_token_num, grid_num):
         to_copy = tl.load(recv_x + token_id * recv_x_stride0 + offset_in, mask=mask)
         to_copy_s = tl.load(
@@ -154,12 +156,13 @@ def _fwd_kernel_ep_scatter_2(
 
             if expert_id >= 0:
                 dest_token_index = tl.atomic_add(expert_start_loc + expert_id, 1)
+                dest_token_index_i64 = dest_token_index.to(tl.int64)
                 tl.store(
                     output_index + token_id * output_index_stride0 + topk_index,
                     dest_token_index,
                 )
                 output_tensor_ptr = (
-                    output_tensor + dest_token_index * output_tensor_stride0
+                    output_tensor + dest_token_index_i64 * output_tensor_stride0
                 )
                 output_tensor_scale_ptr = (
                     output_tensor_scale + dest_token_index * output_tensor_scale_stride0
diff --git a/vllm/model_executor/layers/fused_moe/eep_reconfigure.py b/vllm/model_executor/layers/fused_moe/eep_reconfigure.py
new file mode 100644
index 000000000000..6d40c6749f41
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/eep_reconfigure.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from inspect import signature
+from typing import TYPE_CHECKING, Any
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
+from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig
+from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
+    FusedMoEMethodBase,
+)
+from vllm.model_executor.layers.fused_moe.fused_moe_modular_method import (
+    FusedMoEModularMethod,
+)
+from vllm.model_executor.layers.fused_moe.modular_kernel import (
+    FusedMoEExpertsModular,
+    FusedMoEPrepareAndFinalizeModular,
+)
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+
+
+def _make_eep_experts(
+    quant_method: FusedMoEMethodBase,
+    source_experts: FusedMoEExpertsModular,
+    prepare_finalize: FusedMoEPrepareAndFinalizeModular,
+    moe_config: FusedMoEConfig,
+) -> FusedMoEExpertsModular:
+    experts_cls = source_experts.__class__
+    assert quant_method.moe_quant_config is not None
+    experts_kwargs: dict[str, Any] = {
+        "moe_config": moe_config,
+        "quant_config": quant_method.moe_quant_config,
+    }
+    if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+        max_num_tokens = prepare_finalize.max_num_tokens_per_rank()
+        assert max_num_tokens is not None
+        experts_kwargs.update(
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=prepare_finalize.num_dispatchers(),
+        )
+
+    # Expert kernels with extra init params need explicit EEP support.
+    generic_arg_names = set(signature(mk.FusedMoEExperts.__init__).parameters)
+    ctor_arg_names = set(signature(experts_cls.__init__).parameters)
+    unsupported_args = ctor_arg_names - generic_arg_names
+    missing_args = set(experts_kwargs) - ctor_arg_names
+    if unsupported_args or missing_args:
+        raise NotImplementedError(
+            f"{experts_cls.__name__} experts do not support Elastic EP."
+        )
+
+    return experts_cls(**experts_kwargs)
+
+
+def make_eep_staged_quant_method(
+    module: "FusedMoE",
+    moe_config: FusedMoEConfig,
+) -> FusedMoEMethodBase | None:
+    quant_method = module.quant_method
+    if not quant_method.supports_internal_mk:
+        return None
+    if getattr(quant_method, "wraps_legacy_quant_method", False):
+        return None
+
+    old_batched_format = (
+        module.moe_config.moe_parallel_config.use_batched_activation_format
+    )
+    new_batched_format = moe_config.moe_parallel_config.use_batched_activation_format
+    assert old_batched_format == new_batched_format
+
+    moe_kernel = quant_method.moe_kernel
+    if moe_kernel is None:
+        return None
+    if moe_kernel.is_monolithic:
+        raise NotImplementedError(
+            "Elastic EP full modular-kernel staging is not supported for "
+            "monolithic fused MoE kernels."
+        )
+    if quant_method.moe_quant_config is None:
+        raise ValueError(
+            "Elastic EP full modular-kernel staging requires initialized "
+            "MoE quant config."
+        )
+
+    prepare_finalize = maybe_make_prepare_finalize(
+        moe_config,
+        quant_method.moe_quant_config,
+        routing_tables=None,
+        allow_new_interface=True,
+        use_monolithic=quant_method.is_monolithic,
+        eep_stage=True,
+    )
+    assert prepare_finalize is not None
+    assert isinstance(prepare_finalize, FusedMoEPrepareAndFinalizeModular)
+
+    source_experts = moe_kernel.fused_experts
+    assert isinstance(source_experts, FusedMoEExpertsModular)
+
+    experts = _make_eep_experts(
+        quant_method,
+        source_experts,
+        prepare_finalize,
+        moe_config,
+    )
+
+    if isinstance(quant_method, FusedMoEModularMethod):
+        base_quant_method = quant_method.old_quant_method
+    else:
+        base_quant_method = quant_method
+
+    return FusedMoEModularMethod(
+        base_quant_method,
+        mk.FusedMoEKernel(
+            prepare_finalize,
+            experts,
+            inplace=moe_kernel.inplace,
+        ),
+    )
diff --git a/vllm/model_executor/layers/fused_moe/expert_map_manager.py b/vllm/model_executor/layers/fused_moe/expert_map_manager.py
new file mode 100644
index 000000000000..71f2186ea4dd
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/expert_map_manager.py
@@ -0,0 +1,516 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Expert Map Manager for MoE layers.
+
+This module contains the ExpertMapManager class which manages expert ID
+mappings and placement strategies for Expert Parallelism in MoE models.
+"""
+
+import torch
+
+from vllm.config.parallel import ExpertPlacementStrategy
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.config import FusedMoEParallelConfig
+from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+    init_aiter_topK_meta_data,
+)
+
+logger = init_logger(__name__)
+
+
+def determine_expert_map(
+    ep_size: int,
+    ep_rank: int,
+    global_num_experts: int,
+    expert_placement_strategy: ExpertPlacementStrategy = "linear",
+    num_fused_shared_experts: int = 0,
+    return_expert_mask: bool = False,
+) -> tuple[int, torch.Tensor | None, torch.Tensor | None]:
+    """
+    Calculates how many experts should be assigned to each rank for EP and
+    creates a mapping from global to local expert index. Experts are
+    distributed evenly across ranks. Any remaining are assigned to the
+    last rank.
+
+    Args:
+        ep_size: The size of the expert parallel group
+        ep_rank: The rank of the current process in the expert parallel
+            group
+        global_num_experts: The total number of experts in the model.
+        expert_placement_strategy: The expert placement strategy.
+        num_fused_shared_experts: Number of fused shared experts (for AITER)
+        return_expert_mask: Whether to return expert mask for AITER
+
+    Returns:
+        tuple[int, Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple containing:
+            - local_num_experts (int): The number of experts assigned
+                to the current rank.
+            - expert_map (Optional[torch.Tensor]): A tensor of shape
+                (global_num_experts,) mapping from global to local index.
+                Contains -1 for experts not assigned to the current rank.
+                Returns None if ep_size is 1.
+            - expert_mask (Optional[torch.Tensor]): A tensor of shape
+                (global_num_experts + num_fused_shared_experts + 1,)
+                containing 1 for experts assigned to the current rank
+                and 0 for sentinel.
+                Returns None if ep_size is 1.
+                Used only when AITER MOE is enabled.
+    """
+    from typing import get_args
+
+    assert ep_size > 0
+    if ep_size == 1:
+        return (global_num_experts, None, None)
+
+    # Distribute experts as evenly as possible to each rank.
+    base_experts = global_num_experts // ep_size
+    remainder = global_num_experts % ep_size
+    local_num_experts = base_experts + 1 if ep_rank < remainder else base_experts
+
+    # Create a tensor of size num_experts filled with -1
+    expert_map = torch.full((global_num_experts,), -1, dtype=torch.int32)
+
+    # Create an expert map for the local experts
+    if expert_placement_strategy == "linear":
+        start_idx = ep_rank * base_experts + min(ep_rank, remainder)
+        expert_map[start_idx : start_idx + local_num_experts] = torch.arange(
+            0, local_num_experts, dtype=torch.int32
+        )
+    elif expert_placement_strategy == "round_robin":
+        local_log_experts = torch.arange(
+            ep_rank, global_num_experts, ep_size, dtype=torch.int32
+        )
+
+        expert_map[local_log_experts] = torch.arange(
+            0, local_num_experts, dtype=torch.int32
+        )
+    else:
+        raise ValueError(
+            "Unsupported expert placement strategy "
+            f"'{expert_placement_strategy}', expected one of "
+            f"{get_args(ExpertPlacementStrategy)}"
+        )
+
+    expert_mask = None
+    if return_expert_mask:
+        expert_mask = torch.ones(
+            (global_num_experts + num_fused_shared_experts + 1,), dtype=torch.int32
+        )
+        expert_mask[-1] = 0
+        expert_mask[:global_num_experts] = expert_map > -1
+        expert_map = torch.cat(
+            (
+                expert_map,
+                torch.tensor(
+                    [local_num_experts + i for i in range(num_fused_shared_experts)],
+                    dtype=torch.int32,
+                ),
+            ),
+            dim=0,
+        )
+
+    return (local_num_experts, expert_map, expert_mask)
+
+
+def determine_expert_placement_strategy(
+    expert_placement_strategy: ExpertPlacementStrategy,
+    moe_parallel_config: FusedMoEParallelConfig,
+    num_expert_group: int | None,
+    num_redundant_experts: int,
+    enable_eplb: bool,
+) -> ExpertPlacementStrategy:
+    if expert_placement_strategy == "round_robin":
+        round_robin_supported = (
+            (num_expert_group is not None and num_expert_group > 1)
+            and num_redundant_experts == 0
+            and not enable_eplb
+        )
+
+        if not round_robin_supported:
+            logger.warning(
+                "Round-robin expert placement is only supported for "
+                "models with multiple expert groups and no redundant "
+                "experts. Falling back to linear expert placement."
+            )
+            return "linear"
+        if (
+            moe_parallel_config.use_all2all_kernels
+            and not moe_parallel_config.needs_round_robin_routing_tables
+        ):
+            logger.warning(
+                "Round-robin expert placement currently only supports "
+                "the DeepEP low-latency or NIXL EP backend, but '%s' was configured. "
+                "Falling back to linear expert placement.",
+                moe_parallel_config.all2all_backend,
+            )
+            return "linear"
+
+    return expert_placement_strategy
+
+
+class ExpertMapManager:
+    """
+    Manages expert ID mappings and placement for Expert Parallelism.
+
+    Responsibilities:
+    - Calculate local vs global expert counts
+    - Map between global, local, and physical expert IDs
+    - Manage placement strategies (linear, round_robin)
+    - Maintain routing tables for round-robin placement
+    - Support dynamic reconfiguration of EP topology
+
+    When expert_map is required:
+    - Expert Parallelism (EP) is enabled, i.e., when ep_size > 1
+    - EP disabled (ep_size == 1): expert_map is None
+      * All experts are local to the current rank
+      * No mapping is needed
+    - EP enabled (ep_size > 1): expert_map is created
+      * Maps global expert IDs to local expert IDs
+      * Shape: (global_num_experts,)
+      * Contains the local expert index for experts on this rank, -1 for experts
+         on other ranks
+      * Used by kernels to handle distributed expert execution
+    - Kernel support varies:
+      * Supports expert_map: fused_moe, fused_marlin_moe, fused_humming_moe,
+        rocm_aiter_fused_moe, deep_gemm_moe, xpu_moe, gpt_oss_triton_kernels_moe
+      * Does not support: flashinfer_cutlass_moe, fused_batched_moe, most cutlass_moe
+        variants, trtllm_* kernels
+      * When kernel doesn't support expert_map: The modular kernel method sets
+        expert_map=None even if EP is enabled
+    """
+
+    def __init__(
+        self,
+        max_num_batched_tokens: int,
+        top_k: int,
+        global_num_experts: int,
+        num_redundant_experts: int,
+        num_expert_group: int | None,
+        moe_parallel_config: FusedMoEParallelConfig,
+        placement_strategy: ExpertPlacementStrategy,
+        enable_eplb: bool,
+        num_fused_shared_experts: int = 0,
+        rocm_aiter_enabled: bool = False,
+    ):
+        """
+        Initialize expert map manager.
+
+        Args:
+            global_num_experts: Total number of experts across all ranks
+            moe_parallel_config: MoE parallel configuration (contains ep_size,
+                                 ep_rank, backend flags)
+            placement_strategy: Strategy for placing experts ('linear' or 'round_robin')
+            num_fused_shared_experts: Number of fused shared experts (for AITER)
+            rocm_aiter_enabled: Whether ROCm AITER fusion is enabled
+        """
+        self.global_num_experts = global_num_experts
+        self.moe_parallel_config = moe_parallel_config
+        self.num_fused_shared_experts = num_fused_shared_experts
+        self.rocm_aiter_enabled = rocm_aiter_enabled
+        self.top_k = top_k
+        self.max_num_batched_tokens = max_num_batched_tokens
+
+        if moe_parallel_config.use_ep:
+            # Determine expert placement strategy before creating manager
+            placement_strategy = determine_expert_placement_strategy(
+                expert_placement_strategy=placement_strategy,
+                moe_parallel_config=moe_parallel_config,
+                num_expert_group=num_expert_group,
+                num_redundant_experts=num_redundant_experts,
+                enable_eplb=enable_eplb,
+            )
+
+        # Determine effective placement strategy
+        self._placement_strategy = self._determine_placement_strategy(
+            placement_strategy
+        )
+
+        # Calculate expert mappings
+        self._calculate_expert_maps()
+
+        # Initialize routing tables if needed
+        self._routing_tables = self._init_routing_tables()
+
+        self._init_aiter_shared_experts_topK_buffer()
+
+        if self.use_ep and self.rocm_aiter_enabled:
+            expert_mask = self.expert_mask
+            assert expert_mask is None or torch.all(
+                (expert_mask == 0) | (expert_mask == 1)
+            ), "Aiter Fused MoE kernel only supports expert_map with 0 and 1s."
+
+        # Log EP configuration
+        if self.use_ep:
+            logger.info_once(
+                "[EP Rank %s/%s] Expert parallelism is enabled. Expert "
+                "placement strategy: %s. Local/global"
+                " number of experts: %s/%s. Experts local to global index map:"
+                " %s.",
+                self.ep_rank,
+                self.ep_size,
+                self.placement_strategy,
+                self.local_num_experts,
+                self.global_num_experts,
+                self.get_compressed_map_string(),
+            )
+
+    def _init_aiter_shared_experts_topK_buffer(self):
+        if self.num_fused_shared_experts > 0:
+            dp_size = self.moe_parallel_config.dp_size
+            init_aiter_topK_meta_data(
+                n_routed_experts=self.global_num_experts,
+                n_shared_experts=self.num_fused_shared_experts,
+                top_k=self.top_k,
+                tp_rank=self.ep_rank if self.use_ep else self.tp_rank,
+                tp_size=self.ep_size if self.use_ep else self.tp_size,
+                shared_experts_score=1.0,
+                max_num_tokens=self.max_num_batched_tokens * dp_size,
+                is_EP=self.use_ep,
+            )
+
+    @property
+    def use_ep(self) -> int:
+        return self.moe_parallel_config.use_ep
+
+    @property
+    def ep_size(self) -> int:
+        return self.moe_parallel_config.ep_size
+
+    @property
+    def ep_rank(self) -> int:
+        return self.moe_parallel_config.ep_rank
+
+    @property
+    def tp_size(self) -> int:
+        return self.moe_parallel_config.tp_size
+
+    @property
+    def tp_rank(self) -> int:
+        return self.moe_parallel_config.tp_rank
+
+    @property
+    def local_num_experts(self) -> int:
+        return self._local_num_experts
+
+    @property
+    def expert_map(self) -> torch.Tensor | None:
+        """
+        Mapping from global expert ID to local expert ID.
+
+        Returns tensor of shape (global_num_experts,) where:
+        - expert_map[global_id] = local_id if expert is on this rank
+        - expert_map[global_id] = -1 if expert is not on this rank
+
+        Returns None if EP is not enabled (ep_size == 1).
+        """
+        return self._expert_map
+
+    @property
+    def expert_mask(self) -> torch.Tensor | None:
+        """
+        Expert mask for AITER fusion (ROCm-specific).
+
+        Returns tensor of shape (global_num_experts + num_fused_shared + 1,)
+        where 1 indicates expert is on this rank, 0 otherwise.
+        """
+        return self._expert_mask
+
+    @property
+    def placement_strategy(self) -> ExpertPlacementStrategy:
+        """Expert placement strategy ('linear' or 'round_robin')."""
+        return self._placement_strategy
+
+    @property
+    def routing_tables(
+        self,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None:
+        """
+        Routing tables for round-robin placement.
+
+        Returns (global_to_physical, physical_to_global, local_to_global)
+        or None if not using round-robin or tables not needed.
+        """
+        return self._routing_tables
+
+    def map_global_to_local(self, global_id: int) -> int:
+        """
+        Map global expert ID to local expert ID.
+
+        Args:
+            global_id: Global expert ID (0 to global_num_experts - 1)
+
+        Returns:
+            Local expert ID (0 to local_num_experts - 1)
+
+        Raises:
+            ValueError: If expert is not on this rank
+        """
+        if self._expert_map is None:
+            return global_id
+
+        return self._expert_map[global_id].item()
+
+    def is_local_expert(self, global_id: int) -> bool:
+        """Check if expert is assigned to this rank."""
+        if self._expert_map is None:
+            return True
+        return self._expert_map[global_id] != -1
+
+    def get_local_expert_ids(self) -> list[int]:
+        """Get list of global IDs for experts on this rank."""
+        if self._expert_map is None:
+            return list(range(self.global_num_experts))
+
+        return torch.where(self._expert_map != -1)[0].tolist()
+
+    def update(
+        self,
+        moe_parallel_config: FusedMoEParallelConfig,
+        global_num_experts: int,
+    ) -> None:
+        """
+        Update expert mappings for new EP configuration.
+
+        Used during dynamic reconfiguration (e.g., elastic scaling).
+
+        Args:
+            global_num_experts: New total number of experts across all ranks
+            moe_parallel_config: New MoE parallel configuration (contains ep_size,
+                                 ep_rank, backend flags)
+        """
+        self.moe_parallel_config = moe_parallel_config
+        self.global_num_experts = global_num_experts
+
+        if self._expert_map is not None:
+            device = self._expert_map.device
+        elif self._expert_mask is not None:
+            device = self._expert_mask.device
+        else:
+            raise AssertionError("_expert_map or _expert_mask must be present.")
+
+        with device:
+            self._calculate_expert_maps()
+            self._routing_tables = self._init_routing_tables()
+
+            # Reinitialize AITER buffer if needed and parameters provided
+            self._init_aiter_shared_experts_topK_buffer()
+
+    def get_compressed_map_string(self) -> str:
+        """
+        Get compressed string representation of expert map for logging.
+
+        Returns string mapping local to global expert IDs.
+        """
+        if self._expert_map is None:
+            return f"[0..{self.global_num_experts - 1}]"
+
+        global_indices = torch.where(self._expert_map != -1)[0]
+        local_indices = self._expert_map[global_indices]
+        return ", ".join(
+            f"{local_index.item()}->{global_index.item()}"
+            for local_index, global_index in zip(local_indices, global_indices)
+        )
+
+    # Private methods
+
+    def _determine_placement_strategy(
+        self, requested_strategy: ExpertPlacementStrategy
+    ) -> ExpertPlacementStrategy:
+        """Determine effective placement strategy based on config."""
+        if requested_strategy != "round_robin":
+            return requested_strategy
+
+        # Round-robin requires specific conditions
+        if self.ep_size == 1:
+            return "linear"
+
+        if (
+            self.moe_parallel_config.use_all2all_kernels
+            and not self.moe_parallel_config.needs_round_robin_routing_tables
+        ):
+            logger.warning(
+                "Round-robin placement requires DeepEP-ll or NIXL backend. "
+                "Falling back to linear."
+            )
+            return "linear"
+
+        return "round_robin"
+
+    def _calculate_expert_maps(self) -> None:
+        """Calculate expert mappings based on placement strategy."""
+        (
+            self._local_num_experts,
+            self._expert_map,
+            self._expert_mask,
+        ) = determine_expert_map(
+            ep_size=self.ep_size,
+            ep_rank=self.ep_rank,
+            global_num_experts=self.global_num_experts,
+            expert_placement_strategy=self._placement_strategy,
+            num_fused_shared_experts=self.num_fused_shared_experts,
+            return_expert_mask=self.rocm_aiter_enabled,
+        )
+
+        self._local_num_experts += self.num_fused_shared_experts
+
+    def _init_routing_tables(
+        self,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None:
+        """
+        Ensure routing tables are initialized if needed for round-robin.
+
+        This is a public method that can be called to explicitly initialize
+        routing tables. It's safe to call multiple times (idempotent).
+        """
+        if self._placement_strategy != "round_robin":
+            return None
+
+        if not self.moe_parallel_config.needs_round_robin_routing_tables:
+            return None
+
+        if self._expert_map is None:
+            return None
+
+        return self._init_round_robin_expert_routing_tables()
+
+    def _init_round_robin_expert_routing_tables(
+        self,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Build routing tables for round-robin placement."""
+        assert self.num_fused_shared_experts == 0, (
+            "Round robin not supported for AITER."
+        )
+
+        global_indices = torch.arange(
+            self.global_num_experts,
+            dtype=torch.long,
+        )
+        owner = torch.remainder(global_indices, self.ep_size)
+        local_index = torch.div(global_indices, self.ep_size, rounding_mode="floor")
+
+        base = self.global_num_experts // self.ep_size
+        remainder = self.global_num_experts % self.ep_size
+        physical_offset = owner * base
+
+        if remainder > 0:
+            remainder_tensor = torch.tensor(
+                remainder,
+                dtype=torch.long,
+            )
+            physical_offset = physical_offset + torch.minimum(owner, remainder_tensor)
+
+        global_to_physical = physical_offset + local_index
+        physical_to_global = torch.empty_like(global_to_physical)
+        physical_to_global[global_to_physical] = global_indices
+
+        local_global = torch.arange(
+            self.ep_rank,
+            self.global_num_experts,
+            self.ep_size,
+            dtype=torch.long,
+        )
+        if local_global.numel() != self._local_num_experts:
+            local_global = local_global[: self._local_num_experts]
+
+        return (global_to_physical, physical_to_global, local_global)
diff --git a/vllm/model_executor/layers/fused_moe/experts/aiter_mxfp4_w4a8_moe.py b/vllm/model_executor/layers/fused_moe/experts/aiter_mxfp4_w4a8_moe.py
new file mode 100644
index 000000000000..3906a7e057ca
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/aiter_mxfp4_w4a8_moe.py
@@ -0,0 +1,292 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm._aiter_ops import rocm_aiter_ops
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8StaticTensorSym,
+    kMxfp4Static,
+)
+
+__all__ = [
+    "AiterW4A8ExpertsMonolithic",
+    "aiter_triton_kernel_w4a8_moe_forward",
+]
+
+
+def aiter_triton_kernel_w4a8_moe_forward(
+    hidden_states: torch.Tensor,
+    w1,  # Tensor or triton_kernels.Tensor
+    w2,  # Tensor or triton_kernels.Tensor
+    gating_output: torch.Tensor,
+    topk: int,
+    renormalize: bool,
+    activation: MoEActivation = MoEActivation.SWIGLUOAI,
+    quant_config: FusedMoEQuantConfig | None = None,
+    apply_router_weight_on_input: bool = False,
+    global_num_experts: int = -1,
+    expert_map: torch.Tensor | None = None,
+    unpadded_N_w1=None,
+    unpadded_K_w1=None,
+    unpadded_N_w2=None,
+    unpadded_K_w2=None,
+):
+    assert (
+        quant_config is not None
+        and quant_config.use_mxfp4_w4a8
+        and rocm_aiter_ops.is_enabled()
+    )
+    from aiter.ops.triton.moe_routing.routing import routing as aiter_routing
+
+    routing_data, gather_idx, scatter_idx = aiter_routing(
+        gating_output, topk, sm_first=not renormalize
+    )
+    return triton_kernel_fused_mxfp4_w4a8_experts(
+        None,
+        hidden_states,
+        w1,
+        w2,
+        routing_data,
+        gather_idx,
+        scatter_idx,
+        activation=activation.value,
+        quant_config=quant_config,
+        apply_router_weight_on_input=apply_router_weight_on_input,
+        global_num_experts=global_num_experts,
+        expert_map=expert_map,
+        unpadded_N_w1=unpadded_N_w1,
+        unpadded_K_w1=unpadded_K_w1,
+        unpadded_N_w2=unpadded_N_w2,
+        unpadded_K_w2=unpadded_K_w2,
+    )
+
+
+def triton_kernel_fused_mxfp4_w4a8_experts(
+    output_tensor: torch.Tensor,
+    hidden_states: torch.Tensor,
+    w1,  # Tensor or triton_kernels.Tensor
+    w2,  # Tensor or triton_kernels.Tensor
+    routing_data,  # RoutingData
+    gather_indx,  # GatherIndx
+    scatter_indx,  # ScatterIndx
+    activation: str = "silu",
+    quant_config: FusedMoEQuantConfig | None = None,
+    swiglu_alpha: float = 1.702,
+    swiglu_limit: float = 7.0,
+    apply_router_weight_on_input: bool = False,
+    global_num_experts: int = -1,
+    expert_map: torch.Tensor | None = None,
+    a1q_scale: torch.Tensor | None = None,
+    unpadded_N_w1=None,
+    unpadded_K_w1=None,
+    unpadded_N_w2=None,
+    unpadded_K_w2=None,
+) -> torch.Tensor:
+    assert quant_config is not None
+    # type check, uint8 means mxfp4
+    assert hidden_states.dtype == torch.bfloat16
+    assert quant_config.w1_bias is None or quant_config.w1_bias.dtype == torch.float32
+    assert quant_config.w2_bias is None or quant_config.w2_bias.dtype == torch.float32
+
+    # Shape check: weights are padded (e.g. hidden_size padded for
+    # GFX950 swizzle).
+    assert hidden_states.shape[-1] == w1.shape[-2]
+    assert w2.shape[-1] == w1.shape[1]
+
+    E, _, N = w1.shape
+
+    if global_num_experts == -1:
+        global_num_experts = E
+
+    gammas = routing_data.gate_scal if routing_data else None
+
+    from aiter.ops.triton.moe_op_gemm_a8w4 import moe_gemm_a8w4
+    from aiter.ops.triton.quant_moe import downcast_to_static_fp8
+
+    assert quant_config.w1_precision is not None, (
+        "w1_precision in quant config can't be None"
+    )
+    assert quant_config.w2_precision is not None, (
+        "w2_precision in quant config can't be None"
+    )
+
+    hidden_states = downcast_to_static_fp8(
+        hidden_states, quant_config.w1_precision.flex_ctx.lhs_data.scale
+    )
+
+    intermediate_cache1 = moe_gemm_a8w4(
+        hidden_states,
+        w1.storage.data,
+        None,
+        quant_config.w1_precision.weight_scale.storage.data,
+        quant_config.w1_precision.flex_ctx.lhs_data.scale,
+        quant_config.w2_precision.flex_ctx.lhs_data.scale,
+        quant_config.w1_bias,
+        routing_data,
+        gather_indx=gather_indx,
+        gammas=gammas if apply_router_weight_on_input else None,
+        swizzle_mx_scale="CDNA4_SCALE",
+        out_dtype=torch.float8_e4m3fn,
+        apply_swiglu=True,
+        alpha=swiglu_alpha,
+        limit=swiglu_limit,
+        unpadded_N=unpadded_N_w1,
+        unpadded_K=unpadded_K_w1,
+    )
+
+    intermediate_cache3 = moe_gemm_a8w4(
+        intermediate_cache1,
+        w2.storage.data,
+        None,
+        quant_config.w2_precision.weight_scale.storage.data,
+        quant_config.w2_precision.flex_ctx.lhs_data.scale,
+        None,
+        quant_config.w2_bias,
+        routing_data,
+        scatter_indx=scatter_indx,
+        gammas=None if apply_router_weight_on_input else gammas,
+        swizzle_mx_scale="CDNA4_SCALE",
+        unpadded_N=unpadded_N_w2,
+        unpadded_K=unpadded_K_w2,
+    )
+
+    return intermediate_cache3
+
+
+class AiterW4A8ExpertsMonolithic(mk.FusedMoEExpertsMonolithic):
+    """
+    Monolithic MXFP4 W4A8 expert using AITER triton kernels.
+
+    This backend uses:
+    - aiter.ops.triton.moe_routing.routing for routing
+    - aiter.ops.triton.moe_op_gemm_a8w4.moe_gemm_a8w4 for computation
+
+    Weight format: MXFP4 weights with GFX950 swizzle
+    Activation: Static FP8 quantization
+    """
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(moe_config, quant_config)
+        self.topk = moe_config.experts_per_token
+        self.renormalize = moe_config.routing_method in (
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
+        )
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        # Requires AITER and GFX950
+        if not rocm_aiter_ops.is_enabled():
+            return False
+        from vllm.platforms.rocm import on_gfx950
+
+        return on_gfx950()
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        # W4A8: MXFP4 weights with static FP8 activations
+        SUPPORTED_W_A = [
+            (kMxfp4Static, kFp8StaticTensorSym),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        # Only SILU activation (swiglu) is supported
+        return activation == MoEActivation.SWIGLUOAI
+
+    @staticmethod
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
+        return (
+            not moe_parallel_config.use_all2all_kernels
+            and not moe_parallel_config.enable_eplb
+            and moe_parallel_config.dp_size <= 1
+        )
+
+    @staticmethod
+    def _supports_routing_method(
+        routing_method: RoutingMethodType,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return routing_method in [
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
+        ]
+
+    @staticmethod
+    def _supports_router_logits_dtype(
+        router_logits_dtype: torch.dtype | None,
+        routing_method: RoutingMethodType,
+    ) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return False  # Expert parallelism not yet supported
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    def apply(
+        self,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        router_logits: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        apply_router_weight_on_input: bool,
+        # grouped topk + fused topk bias parameters
+        num_expert_group: int | None = None,
+        e_score_correction_bias: torch.Tensor | None = None,
+        routed_scaling_factor: float | None = None,
+        topk_group: int | None = None,
+    ) -> torch.Tensor:
+        assert self.moe_config.intermediate_size_per_partition_unpadded is not None
+        assert self.moe_config.hidden_dim_unpadded is not None
+        return aiter_triton_kernel_w4a8_moe_forward(
+            hidden_states=hidden_states,
+            w1=w1,
+            w2=w2,
+            gating_output=router_logits,
+            topk=self.topk,
+            renormalize=self.renormalize,
+            global_num_experts=global_num_experts,
+            expert_map=expert_map,
+            quant_config=self.quant_config,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+            unpadded_N_w1=self.moe_config.intermediate_size_per_partition_unpadded * 2,
+            unpadded_K_w1=self.moe_config.hidden_dim_unpadded,
+            unpadded_N_w2=self.moe_config.hidden_dim_unpadded,
+            unpadded_K_w2=self.moe_config.intermediate_size_per_partition_unpadded,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
similarity index 98%
rename from vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
index 2cb0bd7649f5..7bd383b9cdac 100644
--- a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
@@ -210,9 +210,9 @@ def persistent_masked_m_silu_mul_quant(
         DeepGemmQuantScaleFMT.UE8M0,
     ]
 
-    cuda_arch = current_platform.get_device_capability(
-        device_id=y.device.index
-    ).to_int()
+    device_capability = current_platform.get_device_capability(device_id=y.device.index)
+    assert device_capability is not None
+    cuda_arch = device_capability.to_int()
 
     if current_platform.is_cuda() and cuda_arch >= 80:
         torch.ops._C.persistent_masked_m_silu_mul_quant(
@@ -369,7 +369,6 @@ def estimate_expected_m(
             logger.warning_once(
                 "DPMetadata unavailable. Defaulting expected_m to "
                 f"{max_tokens_per_expert}.",
-                scope="local",
             )
             return max_tokens_per_expert
 
diff --git a/vllm/model_executor/layers/fused_moe/experts/cpu_moe.py b/vllm/model_executor/layers/fused_moe/experts/cpu_moe.py
new file mode 100644
index 000000000000..54b264ef772e
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/cpu_moe.py
@@ -0,0 +1,326 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""CPU FP8 W8A16 and MXFP4 W4A16 fused MoE experts."""
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm._custom_ops import CPUQuantMethod, fused_experts_cpu
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8Dynamic128Sym,
+    kFp8Static128BlockSym,
+    kMxfp4Static,
+)
+from vllm.platforms import current_platform
+
+
+def prepare_fp8_moe_layer_for_cpu(
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """VNNI-prepack FP8 MoE weights for CPU kernel."""
+    packed_w13 = torch.ops._C.convert_weight_packed(w13)
+    packed_w2 = torch.ops._C.convert_weight_packed(w2)
+    return packed_w13, packed_w2
+
+
+class CPUExpertsFp8(mk.FusedMoEExpertsMonolithic):
+    """CPU FP8 W8A16 block-quantized monolithic MoE experts."""
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(
+            moe_config,
+            quant_config,
+        )
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        return current_platform.is_cpu()
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation == MoEActivation.SILU
+
+    @staticmethod
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kFp8Static128BlockSym, kFp8Dynamic128Sym),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+    @staticmethod
+    def _supports_routing_method(
+        routing_method: RoutingMethodType,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return routing_method in [
+            RoutingMethodType.Default,
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
+        ]
+
+    @staticmethod
+    def _supports_router_logits_dtype(
+        router_logits_dtype: torch.dtype | None,
+        routing_method: RoutingMethodType,
+    ) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    def apply(
+        self,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        router_logits: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        apply_router_weight_on_input: bool,
+        # grouped topk + fused topk bias parameters
+        num_expert_group: int | None = None,
+        e_score_correction_bias: torch.Tensor | None = None,
+        routed_scaling_factor: float | None = None,
+        topk_group: int | None = None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe.cpu_fused_moe import (
+            select_experts,
+        )
+
+        topk_weights, topk_ids = select_experts(
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+            use_grouped_topk=num_expert_group is not None,
+            top_k=self.moe_config.experts_per_token,
+            renormalize=self.moe_config.routing_method
+            in (
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
+            ),
+            topk_group=topk_group,
+            num_expert_group=num_expert_group,
+            scoring_func="softmax",
+            routed_scaling_factor=(
+                routed_scaling_factor if routed_scaling_factor is not None else 1.0
+            ),
+            e_score_correction_bias=e_score_correction_bias,
+        )
+
+        block_shape = (
+            list(self.quant_config.block_shape)
+            if self.quant_config.block_shape
+            else (
+                [self.quant_config._w1.shape.row, self.quant_config._w1.shape.col]
+                if self.quant_config._w1.shape is not None
+                else None
+            )
+        )
+
+        return fused_experts_cpu(
+            hidden_states,
+            w1,
+            w2,
+            topk_weights,
+            topk_ids,
+            False,  # inplace
+            CPUQuantMethod.FP8_W8A16,  # moe_comp_method
+            self.w1_scale,  # w1_scale
+            self.w2_scale,  # w2_scale
+            None,  # w1_zero
+            None,  # w2_zero
+            block_shape,  # block_size
+            None,  # w1_bias
+            None,  # w2_bias
+            None,  # alpha
+            None,  # limit
+            True,  # is_vnni
+        )
+
+
+def prepare_mxfp4_moe_layer_for_cpu(
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+    w13_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """VNNI-prepack MXFP4 MoE weights and repack scales for CPU AMX kernel."""
+    packed_w13 = torch.ops._C.convert_weight_packed(w13)
+    packed_w2 = torch.ops._C.convert_weight_packed(w2)
+    packed_w13_scale = torch.ops._C.convert_scale_packed(w13_scale)
+    packed_w2_scale = torch.ops._C.convert_scale_packed(w2_scale)
+    return packed_w13, packed_w2, packed_w13_scale, packed_w2_scale
+
+
+class CPUExpertsMxfp4(mk.FusedMoEExpertsMonolithic):
+    """CPU MXFP4 W4A16 monolithic MoE experts."""
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(
+            moe_config,
+            quant_config,
+        )
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        return current_platform.is_cpu()
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation in (MoEActivation.SILU, MoEActivation.SWIGLUOAI)
+
+    @staticmethod
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kMxfp4Static, None),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+    @staticmethod
+    def _supports_routing_method(
+        routing_method: RoutingMethodType,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return routing_method in [
+            RoutingMethodType.Default,
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
+        ]
+
+    @staticmethod
+    def _supports_router_logits_dtype(
+        router_logits_dtype: torch.dtype | None,
+        routing_method: RoutingMethodType,
+    ) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    def apply(
+        self,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        router_logits: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        apply_router_weight_on_input: bool,
+        # grouped topk + fused topk bias parameters
+        num_expert_group: int | None = None,
+        e_score_correction_bias: torch.Tensor | None = None,
+        routed_scaling_factor: float | None = None,
+        topk_group: int | None = None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe.cpu_fused_moe import (
+            select_experts,
+        )
+
+        topk_weights, topk_ids = select_experts(
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+            use_grouped_topk=num_expert_group is not None,
+            top_k=self.moe_config.experts_per_token,
+            renormalize=self.moe_config.routing_method
+            in (
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
+            ),
+            topk_group=topk_group,
+            num_expert_group=num_expert_group,
+            scoring_func="softmax",
+            routed_scaling_factor=(
+                routed_scaling_factor if routed_scaling_factor is not None else 1.0
+            ),
+            e_score_correction_bias=e_score_correction_bias,
+        )
+
+        # Get bias and swiglu params from quant config
+        w1_bias = self.quant_config.w1_bias
+        w2_bias = self.quant_config.w2_bias
+        alpha = getattr(self.quant_config, "gemm1_alpha", None)
+        limit = getattr(self.quant_config, "gemm1_clamp_limit", None)
+
+        return fused_experts_cpu(
+            hidden_states,
+            w1,
+            w2,
+            topk_weights,
+            topk_ids,
+            False,  # inplace
+            CPUQuantMethod.MXFP4,  # moe_comp_method
+            self.w1_scale,  # w1_scale
+            self.w2_scale,  # w2_scale
+            None,  # w1_zero
+            None,  # w2_zero
+            None,  # block_size
+            w1_bias,
+            w2_bias,
+            alpha,
+            limit,
+            True,  # is_vnni
+        )
diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
similarity index 77%
rename from vllm/model_executor/layers/fused_moe/cutlass_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
index 75ee776646ba..53a876721a13 100644
--- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
@@ -20,9 +20,6 @@
     moe_permute,
     moe_unpermute,
 )
-from vllm.model_executor.layers.fused_moe.prepare_finalize import (
-    MoEPrepareAndFinalizeNoDPEPModular,
-)
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceDelegate,
     TopKWeightAndReduceNoOP,
@@ -36,6 +33,9 @@
     kFp8DynamicTokenSym,
     kFp8StaticChannelSym,
     kFp8StaticTensorSym,
+    kInt4Static,
+    kMxfp4Dynamic,
+    kMxfp4Static,
     kNvfp4Dynamic,
     kNvfp4Static,
 )
@@ -795,6 +795,299 @@ def apply(
         )
 
 
+def run_cutlass_moe_mxfp4(
+    output: torch.Tensor,
+    a: torch.Tensor,
+    w1_fp4: torch.Tensor,
+    w1_blockscale: torch.Tensor,
+    w2_fp4: torch.Tensor,
+    w2_blockscale: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    activation: MoEActivation,
+    workspace13: torch.Tensor,
+    workspace2: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    device: torch.device,
+    apply_router_weight_on_input: bool = False,
+) -> None:
+    """MXFP4 x MXFP4 MoE implementation using CUTLASS grouped GEMM."""
+    is_gated = activation.is_gated
+    w1_n = n * 2 if is_gated else n
+
+    assert topk_weights.shape == topk_ids.shape, "topk shape mismatch"
+    assert w1_fp4.dtype == torch.uint8, "weight 1 must be uint8"
+    assert w2_fp4.dtype == torch.uint8, "weight 2 must be uint8"
+    assert (
+        w1_fp4.ndim == 3
+        and w2_fp4.ndim == 3
+        and w1_blockscale.ndim == 3
+        and w2_blockscale.ndim == 3
+    ), "All Weights must be of rank 3 for cutlass_moe_mxfp4"
+    m_a, k_a = a.shape
+    e_w1, w1_n_actual, half_k_w1 = w1_fp4.shape
+    e_w2, k_w2, half_n_w2 = w2_fp4.shape
+
+    assert e_w1 == e_w2 and e_w1 == e
+    assert k_a == half_k_w1 * 2 and k == k_w2
+    assert w1_n_actual == w1_n and half_n_w2 * 2 == n
+    assert m == m_a
+    assert 2 * half_k_w1 == k_w2
+    assert a.dtype in [torch.half, torch.bfloat16], "Invalid input dtype"
+    assert topk_weights.size(0) == m and topk_ids.size(0) == m
+
+    topk = topk_ids.size(1)
+    out_dtype = a.dtype
+    num_topk = topk_ids.size(1)
+
+    expert_offsets = torch.empty((e + 1), dtype=torch.int32, device=device)
+    blockscale_offsets = torch.empty((e + 1), dtype=torch.int32, device=device)
+    problem_sizes1 = torch.empty((e, 3), dtype=torch.int32, device=device)
+    problem_sizes2 = torch.empty((e, 3), dtype=torch.int32, device=device)
+
+    a_map = torch.empty((topk_ids.numel()), dtype=torch.int32, device=device)
+    c_map = torch.empty((topk_ids.numel()), dtype=torch.int32, device=device)
+
+    if apply_router_weight_on_input:
+        assert num_topk == 1, (
+            "apply_router_weight_on_input is only implemented for topk=1"
+        )
+        a.mul_(topk_weights.to(out_dtype))
+
+    ops.get_cutlass_moe_mm_data(
+        topk_ids,
+        expert_offsets,
+        problem_sizes1,
+        problem_sizes2,
+        a_map,
+        c_map,
+        e,
+        n,
+        k,
+        blockscale_offsets,
+        is_gated=is_gated,
+    )
+
+    a = ops.shuffle_rows(a, a_map)
+    rep_a_fp4, rep_a_blockscale = ops.mxfp4_experts_quant(
+        a,
+        expert_offsets,
+        blockscale_offsets,
+        e,
+        num_topk,
+    )
+    c1 = _resize_cache(workspace13, (m * topk, w1_n))
+    c2 = _resize_cache(workspace2, (m * topk, n))
+    c3 = _resize_cache(workspace13, (m * topk, k))
+
+    ops.cutlass_mxfp4_moe_mm(
+        c1,
+        rep_a_fp4,
+        w1_fp4,
+        rep_a_blockscale,
+        w1_blockscale,
+        problem_sizes1,
+        expert_offsets[:-1],
+        blockscale_offsets[:-1],
+    )
+    del rep_a_fp4, rep_a_blockscale
+    if activation == MoEActivation.SILU:
+        int_fp4, int_blockscale = ops.silu_and_mul_mxfp4_experts_quant(
+            c1, expert_offsets, blockscale_offsets, e, num_topk
+        )
+    else:
+        apply_moe_activation(activation, c2, c1)
+        int_fp4, int_blockscale = ops.mxfp4_experts_quant(
+            c2, expert_offsets, blockscale_offsets, e, num_topk
+        )
+
+    ops.cutlass_mxfp4_moe_mm(
+        c3,
+        int_fp4,
+        w2_fp4,
+        int_blockscale,
+        w2_blockscale,
+        problem_sizes2,
+        expert_offsets[:-1],
+        blockscale_offsets[:-1],
+    )
+    del int_fp4, int_blockscale
+
+    c3 = ops.shuffle_rows(c3, c_map)
+
+    assert output.dtype == out_dtype
+    if not apply_router_weight_on_input:
+        output.copy_(
+            (
+                c3.view(m, num_topk, k)
+                * topk_weights.view(m, num_topk, 1).to(out_dtype)
+            ).sum(dim=1),
+            non_blocking=True,
+        )
+    else:
+        output.copy_(c3.view(m, num_topk, k).sum(dim=1), non_blocking=True)
+    return
+
+
+def swizzle_mxfp4_scales(
+    scales: torch.Tensor,
+    N: int,
+    K: int,
+) -> torch.Tensor:
+    """Swizzle flat [N, K//32] E8M0 scales to CUTLASS tiled layout.
+
+    CUTLASS expects MX scale factors in a tiled layout:
+        [numMTiles, numKTiles, 32, 4, 4]
+    where numMTiles = ceil(N/128), numKTiles = ceil(K/128),
+    and the inner dimensions correspond to the swizzle pattern:
+        mTileIdx = mIdx / 128
+        outerMIdx = mIdx % 32
+        innerMIdx = (mIdx / 32) % 4
+        kTileIdx = kIdx / 4
+        innerKIdx = kIdx % 4
+    with kIdx = col_in_scale_space (i.e., index into K//32).
+    """
+    assert scales.dtype == torch.uint8
+    num_scale_cols = K // 32  # number of E8M0 scale values per row
+
+    num_m_tiles = (N + 127) // 128
+    num_k_tiles = (num_scale_cols + 3) // 4
+
+    # Pad N to multiple of 128 and scale_cols to multiple of 4
+    padded_N = num_m_tiles * 128
+    padded_scale_cols = num_k_tiles * 4
+
+    # Start with flat scales, pad if needed
+    padded = torch.zeros(
+        padded_N, padded_scale_cols, dtype=torch.uint8, device=scales.device
+    )
+    padded[:N, :num_scale_cols] = scales
+
+    # Reshape to tile structure:
+    # [numMTiles, 4, 32, numKTiles, 4]
+    #  mTileIdx, innerMIdx, outerMIdx, kTileIdx, innerKIdx
+    tiled = padded.reshape(num_m_tiles, 4, 32, num_k_tiles, 4)
+    # Permute to [numMTiles, numKTiles, 32, 4, 4]
+    #            (outerMIdx, innerMIdx, innerKIdx)
+    tiled = tiled.permute(0, 3, 2, 1, 4).contiguous()
+    return tiled.reshape(-1)
+
+
+class CutlassExpertsMxfp4(mk.FusedMoEExpertsModular):
+    """CUTLASS MXFP4 x MXFP4 fused MoE expert implementation."""
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        p = current_platform
+        return p.is_cuda() and p.is_device_capability_family(100)
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return (weight_key, activation_key) == (kMxfp4Static, kMxfp4Dynamic)
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation in [
+            MoEActivation.SILU,
+            MoEActivation.GELU,
+            MoEActivation.SWIGLUOAI,
+            MoEActivation.SWIGLUSTEP,
+            MoEActivation.SILU_NO_MUL,
+            MoEActivation.GELU_NO_MUL,
+            MoEActivation.RELU2_NO_MUL,
+        ]
+
+    @staticmethod
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
+        return moe_parallel_config.ep_size == 1
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceNoOP()
+
+    def workspace_dtype(self, act_dtype: torch.dtype) -> torch.dtype:
+        return act_dtype
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        workspace1 = (M * topk, max(2 * N, K))
+        workspace2 = (M * topk, N)
+        output = (M, K)
+        return (workspace1, workspace2, output)
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor | None,
+        workspace2: torch.Tensor | None,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        e, m, n, k, _ = self.moe_problem_size(hidden_states, w1, w2, topk_ids)
+        n = w2.shape[2] * 2
+
+        run_cutlass_moe_mxfp4(
+            output=output,
+            a=hidden_states,
+            w1_fp4=w1,
+            w1_blockscale=self.w1_scale,
+            w2_fp4=w2,
+            w2_blockscale=self.w2_scale,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=activation,
+            workspace13=workspace13,
+            workspace2=workspace2,
+            m=m,
+            n=n,
+            k=k,
+            e=e,
+            device=hidden_states.device,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+        )
+
+
 # W4A8
 def run_cutlass_moe_w4a8_fp8(
     output: torch.Tensor,
@@ -942,29 +1235,36 @@ def run_cutlass_moe_w4a8_fp8(
 class CutlassExpertsW4A8Fp8(mk.FusedMoEExpertsModular):
     def __init__(
         self,
-        out_dtype: torch.dtype | None,
-        a_strides1: torch.Tensor,
-        a_strides2: torch.Tensor,
-        b_strides1: torch.Tensor,
-        b_strides2: torch.Tensor,
-        c_strides1: torch.Tensor,
-        c_strides2: torch.Tensor,
-        s_strides1: torch.Tensor,
-        s_strides2: torch.Tensor,
         moe_config: FusedMoEConfig,
         quant_config: FusedMoEQuantConfig,
+        b_strides1: torch.Tensor,
+        b_strides2: torch.Tensor,
         group_size: int,
     ):
         super().__init__(moe_config=moe_config, quant_config=quant_config)
-        self.out_dtype = out_dtype
-        self.a_strides1 = a_strides1
-        self.a_strides2 = a_strides2
+
+        e = moe_config.num_local_experts
+        n = moe_config.intermediate_size_per_partition
+        k = moe_config.hidden_dim
+        device = moe_config.device
+
+        self.out_dtype = moe_config.in_dtype
+
+        a_strides1_c_strides2 = torch.full((e,), k, device=device, dtype=torch.int64)
+        self.a_strides1 = a_strides1_c_strides2
+        self.a_strides2 = torch.full((e,), n, device=device, dtype=torch.int64)
+        self.c_strides1 = torch.full((e,), 2 * n, device=device, dtype=torch.int64)
+        self.c_strides2 = a_strides1_c_strides2
+
         self.b_strides1 = b_strides1
         self.b_strides2 = b_strides2
-        self.c_strides1 = c_strides1
-        self.c_strides2 = c_strides2
-        self.s_strides1 = s_strides1
-        self.s_strides2 = s_strides2
+
+        # sizeof(StrideS) = 16 bytes, encoded as 2xint64.
+        self.s_strides1 = torch.zeros((e, 2), device=device, dtype=torch.int64)
+        self.s_strides1[:, 0] = 2 * n
+        self.s_strides2 = torch.zeros((e, 2), device=device, dtype=torch.int64)
+        self.s_strides2[:, 0] = k
+
         self.group_size = group_size
 
     @staticmethod
@@ -972,42 +1272,53 @@ def activation_format() -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
 
     @staticmethod
-    def _supports_current_device() -> bool:
-        raise NotImplementedError(
-            "CutlassExpertsW4A8Fp8 is not yet used by an Oracle. "
-            "This method should not be called."
+    def is_supported_config(
+        cls: type[mk.FusedMoEExperts],
+        moe_config: FusedMoEConfig,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+        activation_format: mk.FusedMoEActivationFormat,
+    ) -> tuple[bool, str | None]:
+        if moe_config.in_dtype != torch.bfloat16:
+            return (
+                False,
+                f"kernel does not support {moe_config.in_dtype} input/output dtype",
+            )
+
+        return mk.FusedMoEExperts.is_supported_config(
+            cls,
+            moe_config,
+            weight_key,
+            activation_key,
+            activation_format,
         )
 
+    @staticmethod
+    def _supports_current_device() -> bool:
+        return cutlass_group_gemm_supported()
+
     @staticmethod
     def _supports_no_act_and_mul() -> bool:
-        raise NotImplementedError(
-            "CutlassExpertsW4A8Fp8 is not yet used by an Oracle. "
-            "This method should not be called."
-        )
+        return False
 
     @staticmethod
     def _supports_quant_scheme(
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        raise NotImplementedError(
-            "CutlassExpertsW4A8Fp8 is not yet used by an Oracle. "
-            "This method should not be called."
-        )
+        return (weight_key, activation_key) == (kInt4Static, kFp8DynamicTokenSym)
 
     @staticmethod
     def _supports_activation(activation: MoEActivation) -> bool:
-        raise NotImplementedError(
-            "CutlassExpertsW4A8Fp8 is not yet used by an Oracle. "
-            "This method should not be called."
+        return activation in (
+            MoEActivation.SILU,
+            MoEActivation.GELU,
+            MoEActivation.SWIGLUOAI,
         )
 
     @staticmethod
     def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
-        raise NotImplementedError(
-            "CutlassExpertsW4A8Fp8 is not yet used by an Oracle. "
-            "This method should not be called."
-        )
+        return True
 
     def supports_expert_map(self) -> bool:
         return True
@@ -1099,111 +1410,3 @@ def apply(
             topk_weights,
             self.group_size,
         )
-
-
-def cutlass_moe_w4a8_fp8(
-    a: torch.Tensor,
-    w1_q: torch.Tensor,
-    w2_q: torch.Tensor,
-    topk_weights: torch.Tensor,
-    topk_ids: torch.Tensor,
-    a_strides1: torch.Tensor,
-    a_strides2: torch.Tensor,
-    b_strides1: torch.Tensor,
-    b_strides2: torch.Tensor,
-    c_strides1: torch.Tensor,
-    c_strides2: torch.Tensor,
-    s_strides1: torch.Tensor,
-    s_strides2: torch.Tensor,
-    quant_config: FusedMoEQuantConfig,
-    moe_config: FusedMoEConfig,
-    activation: MoEActivation = MoEActivation.SILU,
-    expert_map: torch.Tensor | None = None,
-    apply_router_weight_on_input: bool = False,
-    global_num_experts: int = -1,
-    group_size: int = 128,
-) -> torch.Tensor:
-    """
-    This function computes a w4a8-quantized Mixture of Experts (MoE) layer
-    using two sets of quantized weights, w1_q and w2_q, and top-k gating
-    mechanism. The matrix multiplications are implemented with CUTLASS
-    mixed-dtype grouped gemm.
-
-    Parameters:
-    - a (torch.Tensor): The input tensor to the MoE layer.
-        Shape: [M, K]
-    - w1_q (torch.Tensor): The first set of fp8-quantized expert weights.
-        Shape: [num_experts, 2*N, K // packed_factor]
-    - w2_q (torch.Tensor): The second set of fp8-quantized expert weights.
-        Shape: [num_experts, K, N // packed_factor]
-    - topk_weights (torch.Tensor): The weights of each token->expert mapping.
-    - topk_ids (torch.Tensor): The token->expert mappings.
-    - a_strides1 (torch.Tensor): The input strides for the first gemm.
-        Shape: [num_experts]
-    - a_strides2 (torch.Tensor): The input strides for the second gemm.
-        Shape: [num_experts]
-    - b_strides1 (torch.Tensor): The packed layout for the first gemm weights.
-        Shape: [num_experts, 3]
-        dtype: torch.int32
-    - b_strides2 (torch.Tensor): The packed layout for the second gemm weights.
-        Shape: [num_experts, 3]
-        dtype: torch.int32
-    - c_strides1 (torch.Tensor): The output strides for the first gemm.
-        Shape: [num_experts]
-    - c_strides2 (torch.Tensor): The output strides for the second gemm.
-        Shape: [num_experts]
-    - s_strides1 (torch.Tensor): strides for the group-wise scales for the first gemm.
-        Shape: [num_experts, 2]
-        dtype: torch.int64
-    - s_strides2 (torch.Tensor): strides for the group-wise scales for the second gemm.
-        Shape: [num_experts, 2]
-        dtype: torch.int64
-    - per_act_token (Optional[bool]): Whether the scale is per-token or
-                                      per-tensor.
-    - activation (MoEActivation): The activation function to use.
-    - expert_map (Optional[torch.Tensor]): In the case of Expert parallel,
-        every Rank is responsible for a subset of experts. expert_map is a
-        mapping from global expert-id to local expert-id. When expert_map[i]
-        is -1, it means that this Rank is not responsible for global
-        expert-id i.
-    - apply_router_weight_on_input (bool): When true, the topk weights are
-        applied directly on the inputs. This is only applicable when topk is 1.
-    - global_num_experts (int): The total number of experts.
-    - group_size (int): The number of weights per scale factor
-
-    Returns:
-    - torch.Tensor: The bf16 output tensor after applying the MoE layer.
-    """
-    assert quant_config is not None
-
-    num_experts = global_num_experts if global_num_experts != -1 else w1_q.size(0)
-
-    fn = mk.FusedMoEKernel(
-        MoEPrepareAndFinalizeNoDPEPModular(),
-        CutlassExpertsW4A8Fp8(
-            out_dtype=a.dtype,
-            a_strides1=a_strides1,
-            a_strides2=a_strides2,
-            b_strides1=b_strides1,
-            b_strides2=b_strides2,
-            c_strides1=c_strides1,
-            c_strides2=c_strides2,
-            s_strides1=s_strides1,
-            s_strides2=s_strides2,
-            moe_config=moe_config,
-            quant_config=quant_config,
-            group_size=group_size,
-        ),
-    )
-
-    return fn.apply(
-        a,
-        w1_q,
-        w2_q,
-        topk_weights,
-        topk_ids,
-        activation=activation,
-        global_num_experts=num_experts,
-        expert_map=expert_map,
-        apply_router_weight_on_input=apply_router_weight_on_input,
-    )
diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py
similarity index 55%
rename from vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py
index 03341378a13c..e3e15e316182 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py
@@ -25,15 +25,20 @@
     per_token_group_quant_fp8_packed_for_deepgemm,
     silu_mul_per_token_group_quant_fp8_colmajor,
 )
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    silu_mul_quant_fp8_packed_triton as fused_silu_mul_fp8_quant_packed,
+)
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
     kFp8Dynamic128Sym,
     kFp8Static128BlockSym,
+    kMxfp4Static,
 )
 from vllm.utils.deep_gemm import (
     DeepGemmQuantScaleFMT,
     get_mk_alignment_for_contiguous_layout,
     is_deep_gemm_supported,
+    m_grouped_fp8_fp4_gemm_nt_contiguous,
     m_grouped_fp8_gemm_nt_contiguous,
 )
 from vllm.utils.import_utils import has_deep_gemm
@@ -123,6 +128,8 @@ def __init__(self, moe_config: FusedMoEConfig, quant_config: FusedMoEQuantConfig
         assert not quant_config.per_act_token_quant
         assert not quant_config.per_out_ch_quant
 
+        self.gemm1_clamp_limit = quant_config.gemm1_clamp_limit
+
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
@@ -197,8 +204,15 @@ def _act_mul_quant(
         M_sum, N = input.size()
         activation_out_dim = self.adjust_N_for_activation(N, activation)
 
-        # 1. DeepGemm UE8M0: use packed per-token-group quant
+        # 1. DeepGemm UE8M0: fused SiLU+mul+clamp+quant+pack
         if scale_fmt == DeepGemmQuantScaleFMT.UE8M0:
+            if activation == MoEActivation.SILU:
+                return fused_silu_mul_fp8_quant_packed(
+                    input=input,
+                    output_q=output,
+                    group_size=block_k,
+                    clamp_limit=self.gemm1_clamp_limit,
+                )
             act_out = torch.empty(
                 (M_sum, activation_out_dim), dtype=input.dtype, device=input.device
             )
@@ -217,6 +231,7 @@ def _act_mul_quant(
                 input=input,
                 output=output,
                 use_ue8m0=use_ue8m0,
+                clamp_limit=self.gemm1_clamp_limit,
             )
 
         # 3. fallback path for non-SiLU activations in non‑UE8M0 cases.
@@ -312,3 +327,226 @@ def apply(
             expert_map=expert_map,
             output=output,
         )
+
+
+class DeepGemmFP4Experts(mk.FusedMoEExpertsModular):
+    """DeepGemm-based fused MoE expert implementation for FP4 weights.
+
+    Uses m_grouped_fp8_fp4_gemm_nt_contiguous with FP8 activations and
+    MXFP4 (FP4 E2M1 packed as uint8) weights. Requires SM100+ (Blackwell).
+    """
+
+    # FP8 activation block size (hardcoded since mxfp4_w4a8 quant config
+    # does not set a block_shape on the activation descriptor).
+    _ACT_BLOCK_K = 128
+    # FP4 weight block size
+    _WEIGHT_BLOCK_K = 32
+
+    def __init__(self, moe_config: FusedMoEConfig, quant_config: FusedMoEQuantConfig):
+        super().__init__(moe_config=moe_config, quant_config=quant_config)
+        assert quant_config.weight_quant_dtype == "mxfp4"
+        assert not quant_config.per_act_token_quant
+        assert not quant_config.per_out_ch_quant
+
+        self.gemm1_clamp_limit = quant_config.gemm1_clamp_limit
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        from vllm.platforms import current_platform
+
+        return (
+            is_deep_gemm_supported()
+            and current_platform.is_device_capability_family(100)
+        )
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kMxfp4Static, kFp8Dynamic128Sym),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation in [MoEActivation.SILU, MoEActivation.SWIGLUSTEP]
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        return not (
+            moe_parallel_config.use_fi_nvl_two_sided_kernels
+            or moe_parallel_config.use_fi_nvl_one_sided_kernels
+        )
+
+    def supports_expert_map(self) -> bool:
+        return True
+
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceNoOP()
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        block_m = get_mk_alignment_for_contiguous_layout()[0]
+        M_sum = compute_aligned_M(
+            M, topk, local_num_experts, block_m, expert_tokens_meta
+        )
+        assert M_sum % block_m == 0
+
+        activation_out_dim = self.adjust_N_for_activation(N, activation)
+        workspace1 = (M_sum, max(activation_out_dim, K))
+        workspace2 = (M_sum, max(N, K))
+        output = (M, K)
+        return (workspace1, workspace2, output)
+
+    def _act_mul_quant(
+        self, input: torch.Tensor, output: torch.Tensor, activation: MoEActivation
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        block_k = self._ACT_BLOCK_K
+        scale_fmt = DeepGemmQuantScaleFMT.from_oracle()
+
+        M_sum, N = input.size()
+        activation_out_dim = self.adjust_N_for_activation(N, activation)
+
+        if scale_fmt == DeepGemmQuantScaleFMT.UE8M0:
+            assert activation == MoEActivation.SILU
+            return fused_silu_mul_fp8_quant_packed(
+                input=input,
+                output_q=output,
+                group_size=block_k,
+                clamp_limit=self.gemm1_clamp_limit,
+            )
+
+        if activation == MoEActivation.SILU:
+            use_ue8m0 = scale_fmt == DeepGemmQuantScaleFMT.FLOAT32_CEIL_UE8M0
+            return silu_mul_per_token_group_quant_fp8_colmajor(
+                input=input,
+                output=output,
+                use_ue8m0=use_ue8m0,
+                clamp_limit=self.gemm1_clamp_limit,
+            )
+
+        act_out = torch.empty(
+            (M_sum, activation_out_dim), dtype=input.dtype, device=input.device
+        )
+        self.activation(activation, act_out, input)
+        return per_token_group_quant_fp8(
+            act_out, block_k, column_major_scales=True, out_q=output
+        )
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        assert a1q_scale is not None
+        assert a2_scale is None
+        assert self.w1_scale is not None
+        assert self.w2_scale is not None
+
+        a1q = hidden_states
+        _, N, _ = w1.size()
+        # K comes from activations (full hidden dim), not from w1 which is
+        # packed FP4 (E, N, K//2).
+        K = a1q.size(1)
+
+        local_num_experts = w1.size(0)
+        if global_num_experts == -1:
+            global_num_experts = local_num_experts
+
+        M_sum = compute_aligned_M(
+            M=topk_ids.size(0),
+            num_topk=topk_ids.size(1),
+            local_num_experts=local_num_experts,
+            alignment=get_mk_alignment_for_contiguous_layout()[0],
+            expert_tokens_meta=expert_tokens_meta,
+        )
+
+        a1q_perm = _resize_cache(
+            workspace13.view(dtype=torch.float8_e4m3fn), (M_sum, K)
+        )
+        a1q, a1q_scale, expert_ids, inv_perm = deepgemm_moe_permute(
+            aq=a1q,
+            aq_scale=a1q_scale,
+            topk_ids=topk_ids,
+            local_num_experts=local_num_experts,
+            expert_map=expert_map,
+            expert_tokens_meta=expert_tokens_meta,
+            aq_out=a1q_perm,
+        )
+        assert a1q.size(0) == M_sum
+
+        # FC1: FP8 activations x FP4 weights
+        # DeepGEMM 2.4.2 requires FP4-packed weights as int8 (kPackedFP4).
+        mm1_out = _resize_cache(workspace2, (M_sum, N))
+        m_grouped_fp8_fp4_gemm_nt_contiguous(
+            (a1q, a1q_scale),
+            (w1.view(torch.int8), self.w1_scale),
+            mm1_out,
+            expert_ids,
+            recipe_a=(1, self._ACT_BLOCK_K),
+            recipe_b=(1, self._WEIGHT_BLOCK_K),
+        )
+
+        # SwiGLU activation + FP8 requant
+        activation_out_dim = self.adjust_N_for_activation(N, activation)
+        quant_out = _resize_cache(
+            workspace13.view(dtype=torch.float8_e4m3fn), (M_sum, activation_out_dim)
+        )
+        a2q, a2q_scale = self._act_mul_quant(
+            input=mm1_out.view(-1, N), output=quant_out, activation=activation
+        )
+
+        # FC2: FP8 activations x FP4 weights
+        mm2_out = _resize_cache(workspace2, (M_sum, K))
+        m_grouped_fp8_fp4_gemm_nt_contiguous(
+            (a2q, a2q_scale),
+            (w2.view(torch.int8), self.w2_scale),
+            mm2_out,
+            expert_ids,
+            recipe_a=(1, self._ACT_BLOCK_K),
+            recipe_b=(1, self._WEIGHT_BLOCK_K),
+        )
+
+        if apply_router_weight_on_input:
+            topk_weights = torch.ones_like(topk_weights)
+
+        deepgemm_unpermute_and_reduce(
+            a=mm2_out,
+            topk_ids=topk_ids,
+            topk_weights=topk_weights,
+            inv_perm=inv_perm,
+            expert_map=expert_map,
+            output=output,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/fallback.py b/vllm/model_executor/layers/fused_moe/experts/fallback.py
similarity index 100%
rename from vllm/model_executor/layers/fused_moe/fallback.py
rename to vllm/model_executor/layers/fused_moe/experts/fallback.py
diff --git a/vllm/model_executor/layers/fused_moe/experts/flashinfer_b12x_moe.py b/vllm/model_executor/layers/fused_moe/experts/flashinfer_b12x_moe.py
new file mode 100644
index 000000000000..6481434f2e78
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/flashinfer_b12x_moe.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
+    TopKWeightAndReduceNoOP,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kNvfp4Dynamic,
+    kNvfp4Static,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import (
+    flashinfer_b12x_fused_moe,
+    flashinfer_convert_sf_to_mma_layout,
+    has_flashinfer_b12x_moe,
+)
+
+
+class FlashInferB12xExperts(mk.FusedMoEExpertsModular):
+    """FlashInfer CuteDSL fused MoE expert for SM12x (SM120/SM121,
+    RTX Pro 6000 / DGX Spark).
+
+    Uses ``b12x_fused_moe`` from FlashInfer PR #3080 which fuses token
+    dispatch, two GEMMs, SwiGLU activation, and topk-weight reduction into a
+    single kernel call.  Input quantization (BF16→FP4) is performed inside the
+    kernel so BF16 hidden states are passed directly.
+
+    Weight scale factors are converted to the MMA layout produced by
+    ``convert_sf_to_mma_layout`` once during ``process_weights_after_loading``
+    and cached as ``w1_sf_mma`` / ``w2_sf_mma``.
+
+    Only NVFP4 (kNvfp4Static/kNvfp4Dynamic) quantization is supported.
+    """
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(moe_config=moe_config, quant_config=quant_config)
+        assert quant_config.quant_dtype == "nvfp4", (
+            "FlashInferB12xExperts only supports nvfp4 quantization."
+        )
+        self.out_dtype = moe_config.in_dtype
+        self.num_local_experts = moe_config.num_local_experts
+        self.ep_rank = moe_config.moe_parallel_config.ep_rank
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Normalise block scales to absorb the per-expert weight global scale
+        # (w_gs).  vLLM's NVFP4 convention stores:
+        #   block_scale = max_abs * w_gs / fp4_max,  g1_alphas = 1/w_gs
+        # The SM12x kernel treats w1_alpha (= g1_alphas) as a per-expert weight
+        # dequant multiplier separate from input_gs (activation scale).  We bake
+        # w_gs into the block scales so that w1_alpha = 1.0 and the kernel sees
+        # the simpler form:
+        #   block_scale = max_abs / fp4_max,  w1_alpha = 1.0
+        # The FP4-packed values and dequantised results are identical in both
+        # representations.  We set scale_2 = 1.0 to signal that the bake-in is
+        # already done.
+        layer.w13_weight_scale.data = (
+            layer.w13_weight_scale.float() * layer.w13_weight_scale_2.view(-1, 1, 1)
+        ).to(layer.w13_weight_scale.dtype)
+        layer.w13_weight_scale_2.data.fill_(1.0)
+
+        layer.w2_weight_scale.data = (
+            layer.w2_weight_scale.float() * layer.w2_weight_scale_2.view(-1, 1, 1)
+        ).to(layer.w2_weight_scale.dtype)
+        layer.w2_weight_scale_2.data.fill_(1.0)
+
+        # The SM12x kernel uses dynamic per-block quantization for FC2 input
+        # activations (the SwiGLU output before the down projection).  The
+        # calibrated a2_gscale from the modelopt checkpoint (~tens to hundreds)
+        # is intended for static-quantisation backends (TRTLLM/CUTLASS) and
+        # causes every intermediate activation to saturate at max FP4 when
+        # multiplied by values that large.  Force to 1.0 so the kernel uses
+        # its own per-block dynamic scale.
+        if self.a2_gscale is not None:
+            self.a2_gscale.fill_(1.0)
+
+        # Precompute MMA-layout views of the weight scale factors once here
+        # rather than recomputing on every forward pass.
+        assert self.w1_scale is not None
+        num_experts_w1, m1, k1_sf = self.w1_scale.shape
+        k1 = k1_sf * 16
+        self.w1_sf_mma = flashinfer_convert_sf_to_mma_layout(
+            self.w1_scale.reshape(num_experts_w1 * m1, k1_sf),
+            m=m1,
+            k=k1,
+            num_groups=num_experts_w1,
+        )
+
+        assert self.w2_scale is not None
+        num_experts_w2, m2, k2_sf = self.w2_scale.shape
+        k2 = k2_sf * 16
+        self.w2_sf_mma = flashinfer_convert_sf_to_mma_layout(
+            self.w2_scale.reshape(num_experts_w2 * m2, k2_sf),
+            m=m2,
+            k=k2,
+            num_groups=num_experts_w2,
+        )
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        p = current_platform
+        return (
+            p.is_cuda()
+            and p.is_device_capability_family(120)
+            and has_flashinfer_b12x_moe()
+        )
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return (weight_key, activation_key) == (kNvfp4Static, kNvfp4Dynamic)
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation == MoEActivation.SILU
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        # b12x_fused_moe applies topk weights internally.
+        return TopKWeightAndReduceNoOP()
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        # b12x_fused_moe manages its own internal workspace.
+        workspace1 = (1,)
+        workspace2 = (0,)
+        output_shape = (M, K)
+        return (workspace1, workspace2, output_shape)
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        # b12x_fused_moe expects BF16 hidden states and performs its own FP4
+        # quantization internally.  Returning True prevents the modular kernel
+        # from pre-quantizing activations, which would produce an FP4-packed
+        # tensor with size(-1)=k//2 and break the scale-factor conversion that
+        # expects size(-1)=k.
+        return True
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor | None,
+        workspace2: torch.Tensor | None,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool | None,
+    ):
+        assert self.w1_scale is not None and self.w2_scale is not None, (
+            "w1_scale and w2_scale must not be None for FlashInferB12xExperts"
+        )
+        assert self.g1_alphas is not None and self.g2_alphas is not None, (
+            "g1_alphas and g2_alphas must not be None for FlashInferB12xExperts"
+        )
+        assert self.a2_gscale is not None, (
+            "a2_gscale must not be None for FlashInferB12xExperts"
+        )
+
+        top_k = topk_ids.shape[1]
+
+        flashinfer_b12x_fused_moe(
+            x=hidden_states,
+            token_selected_experts=topk_ids.to(torch.int32),
+            token_final_scales=topk_weights,
+            w1_weight=w1,
+            w1_weight_sf=self.w1_sf_mma,
+            w1_alpha=self.g1_alphas,
+            fc2_input_scale=self.a2_gscale,
+            w2_weight=w2,
+            w2_weight_sf=self.w2_sf_mma,
+            w2_alpha=self.g2_alphas,
+            num_experts=global_num_experts,
+            top_k=top_k,
+            num_local_experts=self.num_local_experts,
+            output_dtype=self.out_dtype,
+            output=output,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_batched_moe.py b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_batched_moe.py
new file mode 100644
index 000000000000..5eaaf46739fc
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_batched_moe.py
@@ -0,0 +1,353 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm import envs
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
+    TopKWeightAndReduceDelegate,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kNvfp4Dynamic,
+    kNvfp4Static,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import (
+    flashinfer_cutedsl_grouped_gemm_nt_masked,
+    has_flashinfer_cutedsl_grouped_gemm_nt_masked,
+    scaled_fp4_grouped_quantize,
+    silu_and_mul_scaled_nvfp4_experts_quantize,
+)
+
+logger = init_logger(__name__)
+
+
+class FlashInferCuteDSLBatchedExperts(mk.FusedMoEExpertsModular):
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+        max_num_tokens: int,
+        num_dispatchers: int,
+    ):
+        super().__init__(
+            moe_config=moe_config,
+            quant_config=quant_config,
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=num_dispatchers,
+        )
+        assert quant_config.quant_dtype == "nvfp4", (
+            "Only nvfp4 quantization are currently supported."
+        )
+        self.out_dtype = moe_config.in_dtype
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        layer.w13_weight_scale_2.data.mul_(layer.w13_input_scale)
+        layer.w2_weight_scale_2.data.mul_(layer.w2_input_scale)
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.BatchedExperts
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        p = current_platform
+        return (
+            p.is_cuda()
+            and p.is_device_capability_family(100)
+            and has_flashinfer_cutedsl_grouped_gemm_nt_masked()
+        )
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return False
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kNvfp4Static, kNvfp4Dynamic),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation == MoEActivation.SILU
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        # Let PrepareAndFinalize::finalize() decide the impl.
+        return TopKWeightAndReduceDelegate()
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        """
+        Compute the shapes for the temporary and final outputs of the two gemms
+        and activation in the fused expert function.  Since the gemms are
+        independent, the workspace for the first gemm can be shared with the
+        workspace for the last gemm.
+
+        Returns a tuple of:
+        - workspace13 shape tuple: must be large enough to hold the
+          result of either expert gemm.
+        - workspace2 shape tuple: must be large enough to hold the
+          result of the activation function.
+        - output shape tuple: must be exact size of the final gemm output.
+        - Workspace type: The dtype to use for the workspace tensors.
+        - Note: in order for activation chunking to work, the first dimension
+          of each tuple must be the number of tokens.
+        """
+
+        # We use global_num_experts due to how moe_align_block_size handles
+        # expert_maps.
+        K_dim = K * 2 if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH else K
+        output_shape = (local_num_experts, M, K_dim)
+        workspace2 = (local_num_experts, M, N)
+        workspace1 = output_shape
+        return (workspace1, workspace2, output_shape)
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,  # Not used
+        workspace13: torch.Tensor | None,
+        workspace2: torch.Tensor | None,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool | None,
+    ):
+        assert self.quant_dtype == "nvfp4", (
+            "Only nvfp4 quantization are currently supported."
+        )
+        # Ensure w1_scale and w2_scale are not None before calling view
+        assert self.w1_scale is not None and self.w2_scale is not None, (
+            "w1_scale and w2_scale must not be None for FlashInferExperts"
+        )
+        assert expert_tokens_meta is not None
+        expert_num_tokens = expert_tokens_meta.expert_num_tokens
+        assert hidden_states.ndim == 3
+        assert self.w1_scale.ndim == 3
+        assert self.w2_scale.ndim == 3
+
+        input_global_scale = (
+            None if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH else self.a1_gscale
+        )
+        flashinfer_hidden_states = (
+            (hidden_states, a1q_scale)
+            if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH
+            else hidden_states
+        )
+        flashinfer_cutedsl_moe_masked(
+            hidden_states=flashinfer_hidden_states,
+            input_global_scale=input_global_scale,
+            w1=w1,
+            w1_blockscale=self.w1_scale,
+            w1_alpha=self.g1_alphas,
+            w2=w2,
+            a2_global_scale=self.a2_gscale,
+            w2_blockscale=self.w2_scale,
+            w2_alpha=self.g2_alphas,
+            masked_m=expert_num_tokens,
+            workspace=workspace2,
+            out=output,
+        )
+
+
+def get_cute_dtype(input: torch.Tensor) -> str:
+    if input.dtype == torch.bfloat16:
+        return "bfloat16"
+    elif input.dtype == torch.float16:
+        return "float16"
+    elif input.dtype == torch.float32:
+        return "float32"
+    else:
+        raise ValueError(f"Unsupported cute dtype {input.dtype}")
+
+
+def flashinfer_cutedsl_moe_masked(
+    hidden_states: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
+    input_global_scale: torch.Tensor,
+    w1: torch.Tensor,
+    w1_blockscale: torch.Tensor,
+    w1_alpha,
+    w2: torch.Tensor,
+    a2_global_scale: torch.Tensor,
+    w2_blockscale: torch.Tensor,
+    w2_alpha,
+    masked_m: torch.Tensor,
+    workspace: torch.Tensor,
+    out: torch.Tensor,
+):
+    """
+    Perform masked Mixture-of-Experts computation with FlashInfer's CuteDSL
+    kernels.
+
+    Args:
+        hidden_states: Either of the following case
+            * torch.Tensor: [num_experts, m, k], bf16
+            * tuple[torch.Tensor, torch.Tensor]: [num_experts, m, k // 2],
+                  uint8, [num_experts, m, k // 16], float8_e4m3fn
+        input_global_scale (torch.Tensor): (l,)
+        w1 (torch.Tensor): fp4 weights, [l, 2 * n, k // 2], uint8
+        w1_blockscale (torch.Tensor): blockscale factors, e4m3,
+        w1_alpha (torch.Tensor): (l,)
+        w2 (torch.Tensor): fp4 weights, [l, k, n // 2], uint8
+        a2_global_scale (torch.Tensor): (l,)
+        w2_blockscale (torch.Tensor): blockscale factors, e4m3,
+        w2_alpha (torch.Tensor): (l,)
+        masked_m (torch.Tensor): Masked dimension indices
+        workspace (torch.Tensor): For gateup_output
+
+    Notes:
+        - Assumes max(masked_m) <= m.
+    """
+
+    # === Assertions on dtypes ===
+    assert w1.dtype == torch.uint8, f"w1 must be uint8, got {w1.dtype}"
+    assert w1_blockscale.dtype == torch.float8_e4m3fn, (
+        f"w1_blockscale must be float8_e4m3fn, got {w1_blockscale.dtype}"
+    )
+    assert w1_alpha.dtype == torch.float32, (
+        f"w1_alpha must be float32, got {w1_alpha.dtype}"
+    )
+    assert w2.dtype == torch.uint8, f"w2 must be uint8, got {w2.dtype}"
+    assert a2_global_scale.dtype == torch.float32, (
+        f"a2_global_scale must be float32, got {a2_global_scale.dtype}"
+    )
+    assert w2_blockscale.dtype == torch.float8_e4m3fn, (
+        f"w2_blockscale must be float8_e4m3fn, got {w2_blockscale.dtype}"
+    )
+    assert w2_alpha.dtype == torch.float32, (
+        f"w2_alpha must be float32, got {w2_alpha.dtype}"
+    )
+
+    # === Assertions on shapes ===
+    n = w2.shape[-1] * 2  # intermediate dimension
+    if isinstance(hidden_states, tuple):
+        assert input_global_scale is None, (
+            "input_global_scale is needed when input needs quant"
+        )
+
+        aq = hidden_states[0].view(torch.uint8)
+        aq_sf = hidden_states[1].view(torch.float8_e4m3fn)
+        # m, k_by_2, num_experts = aq.shape
+        num_experts, m, k_by_2 = aq.shape
+        k = k_by_2 * 2
+        aq = aq.permute(1, 2, 0)
+    else:
+        num_experts, m, k = hidden_states.shape
+
+        assert input_global_scale.dtype == torch.float32, (
+            f"input_global_scale must be float32, got {input_global_scale.dtype}"
+        )
+        assert input_global_scale.shape == (num_experts,), (
+            f"input_global_scale must be (l,), got {input_global_scale.shape}"
+        )
+
+        aq, aq_sf = scaled_fp4_grouped_quantize(
+            hidden_states,
+            masked_m,
+            input_global_scale,
+        )
+
+    assert w1.shape[-2] == 2 * n, f"w1 last-2 dim must be 2*n, got {w1.shape}"
+    assert w1.shape[-1] * 2 == k, (
+        f"w1 last dim * 2 must equal k, got {w1.shape[-1]} vs k={k}"
+    )
+    assert w2.shape[-2:] == (
+        k,
+        n // 2,
+    ), f"w2 shape mismatch, got {w2.shape[-2:]}, expected {(k, n // 2)}"
+
+    assert w1_alpha.shape == (num_experts,), (
+        f"w1_alpha must be (l,), got {w1_alpha.shape}"
+    )
+    assert a2_global_scale.shape == (num_experts,), (
+        f"a2_global_scale must be (l,), got {a2_global_scale.shape}"
+    )
+    assert w2_alpha.shape == (num_experts,), (
+        f"w2_alpha must be (l,), got {w2_alpha.shape}"
+    )
+
+    workspace = workspace.permute(1, 2, 0)  # requirement of kernel
+    sf_vec_size = 16
+    assert aq_sf.dtype == torch.float8_e4m3fn
+    assert aq.dtype == torch.uint8
+    ab_dtype = "float4_e2m1fn"
+    sf_dtype = "float8_e4m3fn"
+
+    if isinstance(hidden_states, tuple):
+        c_dtype = "bfloat16"
+    else:
+        c_dtype = get_cute_dtype(hidden_states)
+
+    # Gemm1
+    flashinfer_cutedsl_grouped_gemm_nt_masked(
+        (aq, aq_sf),
+        (w1.permute(1, 2, 0), w1_blockscale),
+        workspace,
+        masked_m,
+        ab_dtype=ab_dtype,
+        sf_dtype=sf_dtype,
+        c_dtype=c_dtype,
+        sf_vec_size=sf_vec_size,
+        alpha=w1_alpha.view(1, 1, num_experts),
+        alpha_dtype=get_cute_dtype(w1_alpha),
+    )  # in logical [m, n, l]
+
+    # SILU and quantization
+    diq, diq_sf = silu_and_mul_scaled_nvfp4_experts_quantize(
+        workspace.permute(2, 0, 1),
+        masked_m,
+        a2_global_scale,
+    )
+
+    # Gemm2
+    out = out.permute(1, 2, 0)  # requirement of kernel
+    flashinfer_cutedsl_grouped_gemm_nt_masked(
+        (diq, diq_sf),
+        (w2.permute(1, 2, 0), w2_blockscale),
+        out,
+        masked_m,
+        ab_dtype=ab_dtype,
+        sf_dtype=sf_dtype,
+        c_dtype=c_dtype,
+        sf_vec_size=sf_vec_size,
+        alpha=w2_alpha.view(1, 1, num_experts),
+        alpha_dtype=get_cute_dtype(w2_alpha),
+    )  # in logical [m, k, l]
+    out = out.permute(2, 0, 1)
diff --git a/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py
index a1db26619389..2310982792fe 100644
--- a/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py
@@ -4,8 +4,6 @@
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
-from vllm import envs
-from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
@@ -13,7 +11,7 @@
     FusedMoEQuantConfig,
 )
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
-    TopKWeightAndReduceDelegate,
+    TopKWeightAndReduceNoOP,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
@@ -22,33 +20,42 @@
 )
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import (
-    flashinfer_cutedsl_grouped_gemm_nt_masked,
-    has_flashinfer_cutedsl_grouped_gemm_nt_masked,
-    scaled_fp4_grouped_quantize,
-    silu_and_mul_scaled_nvfp4_experts_quantize,
+    flashinfer_cute_dsl_fused_moe_nvfp4,
+    has_flashinfer_cutedsl_moe_nvfp4,
 )
 
-logger = init_logger(__name__)
-
 
 class FlashInferCuteDSLExperts(mk.FusedMoEExpertsModular):
+    """
+    CuteDSL NvFP4 MoE experts using the FlashInfer functional API.
+
+    Uses Standard activation format (non-batched). The kernel handles
+    routing, expert computation, and reduction internally.
+    Supports expert parallelism natively.
+    """
+
     def __init__(
         self,
         moe_config: FusedMoEConfig,
         quant_config: FusedMoEQuantConfig,
-        max_num_tokens: int,
-        num_dispatchers: int,
     ):
         super().__init__(
             moe_config=moe_config,
             quant_config=quant_config,
-            max_num_tokens=max_num_tokens,
-            num_dispatchers=num_dispatchers,
         )
         assert quant_config.quant_dtype == "nvfp4", (
-            "Only nvfp4 quantization are currently supported."
+            "Only nvfp4 quantization is currently supported."
         )
         self.out_dtype = moe_config.in_dtype
+        self.hidden_dim = moe_config.hidden_dim
+        self.intermediate_size_per_partition = (
+            moe_config.intermediate_size_per_partition
+        )
+        self.topk = moe_config.experts_per_token
+        self.local_num_experts = moe_config.num_local_experts
+        self.global_num_experts = moe_config.num_experts
+        self.ep_rank = moe_config.moe_parallel_config.ep_rank
+        self.local_expert_offset = self.ep_rank * self.local_num_experts
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.w13_weight_scale_2.data.mul_(layer.w13_input_scale)
@@ -56,7 +63,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
 
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
-        return mk.FusedMoEActivationFormat.BatchedExperts
+        return mk.FusedMoEActivationFormat.Standard
 
     @staticmethod
     def _supports_current_device() -> bool:
@@ -64,7 +71,7 @@ def _supports_current_device() -> bool:
         return (
             p.is_cuda()
             and p.is_device_capability_family(100)
-            and has_flashinfer_cutedsl_grouped_gemm_nt_masked()
+            and has_flashinfer_cutedsl_moe_nvfp4()
         )
 
     @staticmethod
@@ -86,15 +93,16 @@ def _supports_activation(activation: MoEActivation) -> bool:
         return activation == MoEActivation.SILU
 
     @staticmethod
-    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
         return True
 
     def supports_expert_map(self) -> bool:
         return False
 
     def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
-        # Let PrepareAndFinalize::finalize() decide the impl.
-        return TopKWeightAndReduceDelegate()
+        return TopKWeightAndReduceNoOP()
 
     def workspace_shapes(
         self,
@@ -107,29 +115,12 @@ def workspace_shapes(
         expert_tokens_meta: mk.ExpertTokensMetadata | None,
         activation: MoEActivation,
     ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
-        # We use global_num_experts due to how moe_align_block_size handles
-        # expert_maps.
-        """
-        Compute the shapes for the temporary and final outputs of the two gemms
-        and activation in the fused expert function.  Since the gemms are
-        independent, the workspace for the first gemm can be shared with the
-        workspace for the last gemm.
-
-        Returns a tuple of:
-        - workspace13 shape tuple: must be large enough to hold the
-          result of either expert gemm.
-        - workspace2 shape tuple: must be large enough to hold the
-          result of the activation function.
-        - output shape tuple: must be exact size of the final gemm output.
-        - Workspace type: The dtype to use for the workspace tensors.
-        - Note: in order for activation chunking to work, the first dimension
-          of each tuple must be the number of tokens.
-        """
-        K_dim = K * 2 if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH else K
-        output_shape = (local_num_experts, M, K_dim)
-        workspace2 = (local_num_experts, M, N)
-        workspace1 = output_shape
-        return (workspace1, workspace2, output_shape)
+        workspace1 = (0,)
+        workspace2 = (0,)
+        # K is packed (K//2 for uint8), so output uses hidden_dim.
+        assert self.hidden_dim == K * 2
+        output = (M, self.hidden_dim)
+        return (workspace1, workspace2, output)
 
     def apply(
         self,
@@ -143,210 +134,36 @@ def apply(
         global_num_experts: int,
         expert_map: torch.Tensor | None,
         a1q_scale: torch.Tensor | None,
-        a2_scale: torch.Tensor | None,  # Not used
+        a2_scale: torch.Tensor | None,
         workspace13: torch.Tensor | None,
         workspace2: torch.Tensor | None,
         expert_tokens_meta: mk.ExpertTokensMetadata | None,
         apply_router_weight_on_input: bool | None,
     ):
-        assert self.quant_dtype == "nvfp4", (
-            "Only nvfp4 quantization are currently supported."
-        )
-        # Ensure w1_scale and w2_scale are not None before calling view
-        assert self.w1_scale is not None and self.w2_scale is not None, (
-            "w1_scale and w2_scale must not be None for FlashInferExperts"
-        )
-        assert expert_tokens_meta is not None
-        expert_num_tokens = expert_tokens_meta.expert_num_tokens
-        assert hidden_states.ndim == 3
-        assert self.w1_scale.ndim == 3
-        assert self.w2_scale.ndim == 3
-
-        input_global_scale = (
-            None if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH else self.a1_gscale
-        )
-        flashinfer_hidden_states = (
-            (hidden_states, a1q_scale)
-            if envs.VLLM_DEEPEPLL_NVFP4_DISPATCH
-            else hidden_states
-        )
-        flashinfer_cutedsl_moe_masked(
-            hidden_states=flashinfer_hidden_states,
-            input_global_scale=input_global_scale,
-            w1=w1,
-            w1_blockscale=self.w1_scale,
+        assert self.quant_dtype == "nvfp4"
+        assert a1q_scale is not None
+        assert self.w1_scale is not None
+        assert self.w2_scale is not None
+
+        # a1q_scale is (M, K//16) float8_e4m3fn from fp4_quantize.
+        # The functional API expects x_sf with trailing dim: (M, K//16, 1).
+        x_sf = a1q_scale.unsqueeze(-1)
+
+        flashinfer_cute_dsl_fused_moe_nvfp4(
+            x=hidden_states,
+            x_sf=x_sf,
+            token_selected_experts=topk_ids.to(torch.int32),
+            token_final_scales=topk_weights.float(),
+            w1_weight=w1,
+            w1_weight_sf=self.w1_scale,
             w1_alpha=self.g1_alphas,
-            w2=w2,
-            a2_global_scale=self.a2_gscale,
-            w2_blockscale=self.w2_scale,
+            fc2_input_scale=self.a2_gscale,
+            w2_weight=w2,
+            w2_weight_sf=self.w2_scale,
             w2_alpha=self.g2_alphas,
-            masked_m=expert_num_tokens,
-            workspace=workspace2,
-            out=output,
+            num_experts=self.global_num_experts,
+            top_k=self.topk,
+            num_local_experts=self.local_num_experts,
+            local_expert_offset=self.local_expert_offset,
+            moe_output=output,
         )
-
-
-def get_cute_dtype(input: torch.Tensor) -> str:
-    if input.dtype == torch.bfloat16:
-        return "bfloat16"
-    elif input.dtype == torch.float16:
-        return "float16"
-    elif input.dtype == torch.float32:
-        return "float32"
-    else:
-        raise ValueError(f"Unsupported cute dtype {input.dtype}")
-
-
-def flashinfer_cutedsl_moe_masked(
-    hidden_states: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
-    input_global_scale: torch.Tensor,
-    w1: torch.Tensor,
-    w1_blockscale: torch.Tensor,
-    w1_alpha,
-    w2: torch.Tensor,
-    a2_global_scale: torch.Tensor,
-    w2_blockscale: torch.Tensor,
-    w2_alpha,
-    masked_m: torch.Tensor,
-    workspace: torch.Tensor,
-    out: torch.Tensor,
-):
-    """
-    Perform masked Mixture-of-Experts computation with FlashInfer's CuteDSL
-    kernels.
-
-    Args:
-        hidden_states: Either of the following case
-            * torch.Tensor: [num_experts, m, k], bf16
-            * tuple[torch.Tensor, torch.Tensor]: [num_experts, m, k // 2],
-                  uint8, [num_experts, m, k // 16], float8_e4m3fn
-        input_global_scale (torch.Tensor): (l,)
-        w1 (torch.Tensor): fp4 weights, [l, 2 * n, k // 2], uint8
-        w1_blockscale (torch.Tensor): blockscale factors, e4m3,
-        w1_alpha (torch.Tensor): (l,)
-        w2 (torch.Tensor): fp4 weights, [l, k, n // 2], uint8
-        a2_global_scale (torch.Tensor): (l,)
-        w2_blockscale (torch.Tensor): blockscale factors, e4m3,
-        w2_alpha (torch.Tensor): (l,)
-        masked_m (torch.Tensor): Masked dimension indices
-        workspace (torch.Tensor): For gateup_output
-
-    Notes:
-        - Assumes max(masked_m) <= m.
-    """
-
-    # === Assertions on dtypes ===
-    assert w1.dtype == torch.uint8, f"w1 must be uint8, got {w1.dtype}"
-    assert w1_blockscale.dtype == torch.float8_e4m3fn, (
-        f"w1_blockscale must be float8_e4m3fn, got {w1_blockscale.dtype}"
-    )
-    assert w1_alpha.dtype == torch.float32, (
-        f"w1_alpha must be float32, got {w1_alpha.dtype}"
-    )
-    assert w2.dtype == torch.uint8, f"w2 must be uint8, got {w2.dtype}"
-    assert a2_global_scale.dtype == torch.float32, (
-        f"a2_global_scale must be float32, got {a2_global_scale.dtype}"
-    )
-    assert w2_blockscale.dtype == torch.float8_e4m3fn, (
-        f"w2_blockscale must be float8_e4m3fn, got {w2_blockscale.dtype}"
-    )
-    assert w2_alpha.dtype == torch.float32, (
-        f"w2_alpha must be float32, got {w2_alpha.dtype}"
-    )
-
-    # === Assertions on shapes ===
-    n = w2.shape[-1] * 2  # intermediate dimension
-    if isinstance(hidden_states, tuple):
-        assert input_global_scale is None, (
-            "input_global_scale is needed when input needs quant"
-        )
-
-        aq = hidden_states[0].view(torch.uint8)
-        aq_sf = hidden_states[1].view(torch.float8_e4m3fn)
-        # m, k_by_2, num_experts = aq.shape
-        num_experts, m, k_by_2 = aq.shape
-        k = k_by_2 * 2
-        aq = aq.permute(1, 2, 0)
-    else:
-        num_experts, m, k = hidden_states.shape
-
-        assert input_global_scale.dtype == torch.float32, (
-            f"input_global_scale must be float32, got {input_global_scale.dtype}"
-        )
-        assert input_global_scale.shape == (num_experts,), (
-            f"input_global_scale must be (l,), got {input_global_scale.shape}"
-        )
-
-        aq, aq_sf = scaled_fp4_grouped_quantize(
-            hidden_states,
-            masked_m,
-            input_global_scale,
-        )
-
-    assert w1.shape[-2] == 2 * n, f"w1 last-2 dim must be 2*n, got {w1.shape}"
-    assert w1.shape[-1] * 2 == k, (
-        f"w1 last dim * 2 must equal k, got {w1.shape[-1]} vs k={k}"
-    )
-    assert w2.shape[-2:] == (
-        k,
-        n // 2,
-    ), f"w2 shape mismatch, got {w2.shape[-2:]}, expected {(k, n // 2)}"
-
-    assert w1_alpha.shape == (num_experts,), (
-        f"w1_alpha must be (l,), got {w1_alpha.shape}"
-    )
-    assert a2_global_scale.shape == (num_experts,), (
-        f"a2_global_scale must be (l,), got {a2_global_scale.shape}"
-    )
-    assert w2_alpha.shape == (num_experts,), (
-        f"w2_alpha must be (l,), got {w2_alpha.shape}"
-    )
-
-    workspace = workspace.permute(1, 2, 0)  # requirement of kernel
-    sf_vec_size = 16
-    assert aq_sf.dtype == torch.float8_e4m3fn
-    assert aq.dtype == torch.uint8
-    ab_dtype = "float4_e2m1fn"
-    sf_dtype = "float8_e4m3fn"
-
-    if isinstance(hidden_states, tuple):
-        c_dtype = "bfloat16"
-    else:
-        c_dtype = get_cute_dtype(hidden_states)
-
-    # Gemm1
-    flashinfer_cutedsl_grouped_gemm_nt_masked(
-        (aq, aq_sf),
-        (w1.permute(1, 2, 0), w1_blockscale),
-        workspace,
-        masked_m,
-        ab_dtype=ab_dtype,
-        sf_dtype=sf_dtype,
-        c_dtype=c_dtype,
-        sf_vec_size=sf_vec_size,
-        alpha=w1_alpha.view(1, 1, num_experts),
-        alpha_dtype=get_cute_dtype(w1_alpha),
-    )  # in logical [m, n, l]
-
-    # SILU and quantization
-    diq, diq_sf = silu_and_mul_scaled_nvfp4_experts_quantize(
-        workspace.permute(2, 0, 1),
-        masked_m,
-        a2_global_scale,
-    )
-
-    # Gemm2
-    out = out.permute(1, 2, 0)  # requirement of kernel
-    flashinfer_cutedsl_grouped_gemm_nt_masked(
-        (diq, diq_sf),
-        (w2.permute(1, 2, 0), w2_blockscale),
-        out,
-        masked_m,
-        ab_dtype=ab_dtype,
-        sf_dtype=sf_dtype,
-        c_dtype=c_dtype,
-        sf_vec_size=sf_vec_size,
-        alpha=w2_alpha.view(1, 1, num_experts),
-        alpha_dtype=get_cute_dtype(w2_alpha),
-    )  # in logical [m, k, l]
-    out = out.permute(2, 0, 1)
diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutlass_moe.py
similarity index 95%
rename from vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/flashinfer_cutlass_moe.py
index 91f7a83f6fce..b891583e3ef4 100644
--- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutlass_moe.py
@@ -97,6 +97,13 @@ def __init__(
         self.max_capture_size = (
             get_current_vllm_config().compilation_config.max_cudagraph_capture_size
         )
+        self.gemm1_clamp_limit: torch.Tensor | None = None
+        if quant_config.gemm1_clamp_limit is not None:
+            self.gemm1_clamp_limit = torch.tensor(
+                [quant_config.gemm1_clamp_limit] * self.num_experts,
+                dtype=torch.float32,
+                device=self.device,
+            )
 
         if quant_config.weight_quant_dtype == "mxfp4":
             # This value is used specifically for gpt-oss,
@@ -107,9 +114,12 @@ def __init__(
             self.gemm1_beta = torch.tensor(
                 [1.0] * self.num_experts, dtype=torch.float32, device=self.device
             )
-            self.gemm1_clamp_limit = torch.tensor(
-                [7.0] * self.num_experts, dtype=torch.float32, device=self.device
-            )
+            if self.gemm1_clamp_limit is None:
+                self.gemm1_clamp_limit = torch.tensor(
+                    [7.0] * self.num_experts,
+                    dtype=torch.float32,
+                    device=self.device,
+                )
             if quant_config.quant_dtype == "mxfp8":
                 self.fake_input_scale = torch.ones(
                     self.num_experts,
@@ -129,7 +139,7 @@ def _supports_current_device() -> bool:
             and (
                 p.is_device_capability(90)
                 or p.is_device_capability_family(100)
-                or p.is_device_capability_family(110)
+                # SM110 excluded: flashinfer-ai/flashinfer#3134
                 or p.is_device_capability_family(120)
             )
             and has_flashinfer_cutlass_fused_moe()
@@ -277,7 +287,9 @@ def apply(
         fc2_expert_biases = None
         swiglu_alpha = None
         swiglu_beta = None
-        swiglu_limit = None
+        swiglu_limit = (
+            self.gemm1_clamp_limit if activation == MoEActivation.SILU else None
+        )
         use_mxfp8_act_scaling = False
         use_w4_group_scaling = False
         # Select quantization metadata based on FP8 format/path
@@ -361,7 +373,7 @@ def apply(
             fc1_expert_weights = w1
             fc2_expert_weights = w2
         else:
-            quant_scales = None
+            quant_scales = []
             a1q_scale = None
             fc1_expert_weights = w1
             fc2_expert_weights = w2
diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/experts/fused_batched_moe.py
similarity index 83%
rename from vllm/model_executor/layers/fused_moe/fused_batched_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/fused_batched_moe.py
index e2b5a8f6764e..0e31331e7262 100644
--- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/fused_batched_moe.py
@@ -14,13 +14,12 @@
 from vllm.model_executor.layers.fused_moe.fused_moe import try_get_optimal_moe_config
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceDelegate,
-    TopKWeightAndReduceNaiveBatched,
 )
 from vllm.model_executor.layers.fused_moe.utils import (
     _resize_cache,
     moe_kernel_quantize_input,
     normalize_batched_scales_shape,
-    normalize_scales_shape,
+    swiglu_limit_func,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
@@ -489,162 +488,6 @@ def invoke_moe_batched_triton_kernel(
     )
 
 
-class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalizeModular):
-    """
-    A reference prepare/finalize class that reorganizes the tokens into
-    expert batched format, i.e. E x max_num_tokens x K.  This is the format
-    that the batched dispatch/combine kernels use.
-    """
-
-    def __init__(
-        self,
-        max_num_tokens: int,
-        num_local_experts: int,
-        num_dispatchers: int,
-        rank: int,
-    ):
-        super().__init__()
-        self.max_num_tokens = max_num_tokens
-        self.num_local_experts = num_local_experts
-        self.rank = rank
-        self.num_dispatchers_ = num_dispatchers
-
-    @property
-    def activation_format(self) -> mk.FusedMoEActivationFormat:
-        return mk.FusedMoEActivationFormat.BatchedExperts
-
-    def max_num_tokens_per_rank(self) -> int | None:
-        return self.max_num_tokens
-
-    def topk_indices_dtype(self) -> torch.dtype | None:
-        return None
-
-    def num_dispatchers(self) -> int:
-        return self.num_dispatchers_
-
-    def output_is_reduced(self) -> bool:
-        return False
-
-    def prepare(
-        self,
-        a1: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        num_experts: int,
-        expert_map: torch.Tensor | None,
-        apply_router_weight_on_input: bool,
-        quant_config: FusedMoEQuantConfig,
-        defer_input_quant: bool = False,
-    ) -> mk.PrepareResultType:
-        if defer_input_quant:
-            raise NotImplementedError(
-                f"{self.__class__.__name__} does not support defer_input_quant=True. "
-                "Please select an MoE kernel that accepts quantized inputs."
-            )
-        assert a1.dim() == 2
-        assert topk_ids.dim() == 2
-        assert topk_ids.size(0) == a1.size(0)
-
-        if apply_router_weight_on_input:
-            topk = topk_ids.size(1)
-            # TODO: this only works for topK=1, will need to update for topK>1
-            assert topk == 1, (
-                "apply_router_weight_on_input is only implemented for topk=1"
-            )
-            a1.mul_(topk_weights.to(a1.dtype))
-
-        num_tokens, hidden_dim = a1.size()
-        topk = topk_ids.size(1)
-
-        tokens_per_expert = torch.zeros(num_experts, dtype=torch.int, device=a1.device)
-
-        num_local_experts = self.num_local_experts
-
-        if quant_config.quant_dtype is None:
-            b_type = a1.dtype
-        else:
-            b_type = quant_config.quant_dtype
-
-        b_a1 = torch.zeros(
-            (num_local_experts, self.max_num_tokens, hidden_dim),
-            dtype=b_type,
-            device=a1.device,
-        )
-
-        if quant_config.is_quantized:
-            scale_shape = quant_config.batched_scale_shape(
-                num_local_experts, self.max_num_tokens, hidden_dim
-            )
-
-            b_a1_scale = torch.empty(scale_shape, dtype=torch.float32, device=a1.device)
-        else:
-            assert quant_config.a1_scale is None
-            b_a1_scale = None
-
-        first_expert = num_local_experts * self.rank
-        last_expert = first_expert + num_local_experts
-
-        a1_scale = normalize_scales_shape(quant_config.a1_scale)
-
-        for expert_id in range(first_expert, last_expert):
-            topks = torch.any(topk_ids == expert_id, dim=1).flatten()
-            rows = torch.count_nonzero(topks.flatten())
-            if rows == 0:
-                continue
-            idx = expert_id - first_expert
-            tokens_per_expert[idx] = rows
-            rhs = a1[: topks.numel()][topks]
-            if quant_config.quant_dtype is not None:
-                if a1_scale is not None:
-                    if quant_config.is_per_act_token:
-                        rhs_a1_scale = a1_scale[: topks.numel()][topks]
-                    else:
-                        rhs_a1_scale = a1_scale
-                else:
-                    rhs_a1_scale = None
-                b_a1[idx, :rows, :], b_s = moe_kernel_quantize_input(
-                    rhs,
-                    rhs_a1_scale,
-                    quant_config.quant_dtype,
-                    quant_config.per_act_token_quant,
-                    quant_config.block_shape,
-                )
-                assert b_s is not None
-                if quant_config.is_per_act_token:
-                    b_a1_scale[idx, :rows] = b_s[:rows]
-                else:
-                    b_a1_scale[idx, : b_s.shape[0]] = b_s
-            else:
-                b_a1[idx, :rows, :] = rhs
-
-        assert b_a1_scale is None or b_a1_scale.ndim == 3
-
-        expert_tokens_meta = mk.ExpertTokensMetadata(
-            expert_num_tokens=tokens_per_expert, expert_num_tokens_cpu=None
-        )
-
-        return b_a1, b_a1_scale, expert_tokens_meta, None, None
-
-    def finalize(
-        self,
-        output: torch.Tensor,
-        fused_expert_output: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        apply_router_weight_on_input: bool,
-        weight_and_reduce_impl: mk.TopKWeightAndReduce,
-    ) -> None:
-        if isinstance(weight_and_reduce_impl, TopKWeightAndReduceDelegate):
-            weight_and_reduce_impl = TopKWeightAndReduceNaiveBatched(self.rank)
-        weight_and_reduce_impl.apply(
-            output=output,
-            fused_expert_output=fused_expert_output,
-            topk_weights=topk_weights,
-            topk_ids=topk_ids,
-            apply_router_weight_on_input=apply_router_weight_on_input,
-        )
-
-
 class NaiveBatchedExperts(mk.FusedMoEExpertsModular):
     """
     A reference MoE expert class that operates on expert batched format,
@@ -928,25 +771,27 @@ def _supports_quant_scheme(
             p.is_cuda() and p.has_device_capability((8, 9))
         )
 
-        SUPPORTED_W_A_FP8 = [
-            (kFp8Static128BlockSym, kFp8Dynamic128Sym),
-            (kFp8StaticChannelSym, kFp8DynamicTokenSym),
-            (kFp8StaticTensorSym, kFp8DynamicTokenSym),
-            (kFp8StaticTensorSym, kFp8StaticTensorSym),
-            (kFp8StaticTensorSym, kFp8DynamicTensorSym),
-        ]
-        return (weight_key, activation_key) == (None, None) or (
-            device_supports_fp8 and (weight_key, activation_key) in SUPPORTED_W_A_FP8
-        )
+        supported: list[tuple[QuantKey | None, QuantKey | None]] = [(None, None)]
+        if device_supports_fp8:
+            supported += [
+                (kFp8Static128BlockSym, kFp8Dynamic128Sym),
+                (kFp8StaticChannelSym, kFp8DynamicTokenSym),
+                (kFp8StaticTensorSym, kFp8DynamicTokenSym),
+                (kFp8StaticTensorSym, kFp8StaticTensorSym),
+                (kFp8StaticTensorSym, kFp8DynamicTensorSym),
+            ]
+        return (weight_key, activation_key) in supported
 
     @staticmethod
     def _supports_activation(activation: MoEActivation) -> bool:
         return activation in [
             MoEActivation.SILU,
             MoEActivation.GELU,
+            MoEActivation.GELU_TANH,
             MoEActivation.SWIGLUOAI,
             MoEActivation.SILU_NO_MUL,
             MoEActivation.GELU_NO_MUL,
+            MoEActivation.GELU_TANH_NO_MUL,
             MoEActivation.RELU2_NO_MUL,
         ]
 
@@ -961,6 +806,16 @@ def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
         # Let PrepareAndFinalize::finalize() decide the impl.
         return TopKWeightAndReduceDelegate()
 
+    def activation(
+        self, activation: MoEActivation, output: torch.Tensor, input: torch.Tensor
+    ) -> None:
+        gemm1_clamp_limit = self.quant_config.gemm1_clamp_limit
+        if activation == MoEActivation.SILU and gemm1_clamp_limit is not None:
+            swiglu_limit_func(output, input, float(gemm1_clamp_limit))
+            return
+
+        super().activation(activation, output, input)
+
     def workspace_shapes(
         self,
         M: int,
diff --git a/vllm/model_executor/layers/fused_moe/experts/fused_humming_moe.py b/vllm/model_executor/layers/fused_moe/experts/fused_humming_moe.py
new file mode 100644
index 000000000000..203c5782829c
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/fused_humming_moe.py
@@ -0,0 +1,732 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Fused MoE utilities for Humming."""
+
+import json
+import math
+from typing import TYPE_CHECKING, Any
+
+import torch
+from humming import dtypes
+from humming.config import GemmType as HummingGemmType
+from humming.layer import HummingLayerMeta, HummingMethod
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm import envs
+from vllm.forward_context import get_forward_context
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
+    moe_align_block_size,
+)
+from vllm.model_executor.layers.fused_moe.moe_fused_mul_sum import moe_fused_mul_sum
+from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
+    moe_permute,
+    moe_unpermute,
+)
+from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
+    TopKWeightAndReduceDelegate,
+    TopKWeightAndReduceNoOP,
+)
+from vllm.model_executor.layers.fused_moe.utils import (
+    _resize_cache,
+    swiglu_limit_func,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import QuantKey
+from vllm.platforms import current_platform
+from vllm.v1.worker.workspace import current_workspace_manager
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe import RoutedExperts
+
+
+logger = init_logger(__name__)
+
+
+def get_humming_moe_gemm_type() -> str:
+    env_gemm_type: str = envs.VLLM_HUMMING_MOE_GEMM_TYPE or ""
+    env_gemm_type = env_gemm_type.lower()
+    if env_gemm_type == "indexed":
+        gemm_type = env_gemm_type
+    elif env_gemm_type in ["grouped_contiguous", "grouped"]:
+        gemm_type = "grouped_contiguous"
+    else:
+        gemm_type = "indexed"
+
+    logger.info_once(f"Using {gemm_type} gemm for humming moe")  # noqa
+    return gemm_type
+
+
+class HummingExpertsBase(mk.FusedMoEExpertsModular):
+    def __init__(
+        self,
+        layer: "RoutedExperts",
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+        max_num_tokens: int | None = None,
+        num_dispatchers: int | None = None,
+    ):
+        self.layer = layer
+        self.num_experts = self.layer.num_experts
+        self.global_num_experts = self.layer.global_num_experts
+        self.init_humming_moe()
+
+        if self.is_batched():
+            assert max_num_tokens is not None and num_dispatchers is not None
+
+        super().__init__(
+            moe_config=moe_config,
+            quant_config=quant_config,
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=num_dispatchers,
+        )
+
+    def init_humming_moe(self):
+        self.compute_config = {
+            "use_batch_invariant": envs.VLLM_BATCH_INVARIANT,
+            "use_f16_accum": envs.VLLM_HUMMING_USE_F16_ACCUM,
+            "gemm_type": self.humming_gemm_type().value,
+        }
+        self.w13_tuning_config = HummingMethod.get_default_tuning_configs(
+            layer=self.layer,
+            use_f16_accum=envs.VLLM_HUMMING_USE_F16_ACCUM,
+            use_batch_invariant=envs.VLLM_BATCH_INVARIANT,
+            gemm_type=self.humming_gemm_type(),
+            sublayer_name="w13",
+        )
+        self.w2_tuning_config = HummingMethod.get_default_tuning_configs(
+            layer=self.layer,
+            use_f16_accum=envs.VLLM_HUMMING_USE_F16_ACCUM,
+            use_batch_invariant=envs.VLLM_BATCH_INVARIANT,
+            gemm_type=self.humming_gemm_type(),
+            sublayer_name="w2",
+        )
+        self.compute_config_str = json.dumps(self.compute_config)
+        self.w13_tuning_config_str = json.dumps(self.w13_tuning_config)
+        self.w2_tuning_config_str = json.dumps(self.w2_tuning_config)
+
+    def get_global_valid_shape_m(self, topk_ids: torch.Tensor):
+        num_tokens = topk_ids.size(0)
+        ctx = get_forward_context()
+        if ctx.dp_metadata is not None:
+            num_tokens = ctx.dp_metadata.num_tokens_across_dp_cpu.sum().item()
+
+        return num_tokens * topk_ids.size(1)
+
+    def estimate_local_valid_shape_m(self, topk_ids: torch.Tensor):
+        # estimate shape_m for kernel tuning
+        global_valid_shape_m = self.get_global_valid_shape_m(topk_ids)
+        num_experts = self.num_experts
+        global_num_experts = self.global_num_experts
+        return math.ceil(global_valid_shape_m * num_experts / global_num_experts)
+
+    @staticmethod
+    def humming_gemm_type() -> HummingGemmType:
+        raise NotImplementedError
+
+    @classmethod
+    def is_batched(cls) -> bool:
+        return cls.activation_format() == mk.FusedMoEActivationFormat.BatchedExperts
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        platform = current_platform
+        return platform.is_cuda() and platform.has_device_capability((7, 5))
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return True
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        # Humming uses apply_moe_activation() callback for activation,
+        # so any activation supported there can be used here.
+        return activation in [
+            MoEActivation.SILU,
+            MoEActivation.GELU,
+            MoEActivation.GELU_TANH,
+            MoEActivation.SWIGLUOAI,
+            MoEActivation.SWIGLUSTEP,
+            MoEActivation.SILU_NO_MUL,
+            MoEActivation.GELU_NO_MUL,
+            MoEActivation.GELU_TANH_NO_MUL,
+            MoEActivation.RELU2_NO_MUL,
+        ]
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        return not (
+            moe_parallel_config.use_fi_nvl_two_sided_kernels
+            or moe_parallel_config.use_fi_nvl_one_sided_kernels
+        )
+
+    def moe_problem_size(
+        self,
+        a1: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_ids: torch.Tensor,
+    ) -> tuple[int, int, int, int, int]:
+        meta1: HummingLayerMeta = self.layer.humming_metas["w13"]
+        meta2: HummingLayerMeta = self.layer.humming_metas["w2"]
+
+        assert meta1.num_experts == meta2.num_experts
+
+        num_experts = meta1.num_experts
+        top_k = topk_ids.size(1)
+        assert w1.size(0) == num_experts
+        assert w2.size(0) == num_experts
+
+        if not self.is_batched():
+            num_tokens = a1.size(0)
+            assert topk_ids.size(0) == num_tokens
+        else:
+            assert a1.dim() == 3
+            assert a1.size(0) == num_experts
+            num_tokens = a1.size(1)
+
+        return meta1.num_experts, num_tokens, meta1.shape_n // 2, meta1.shape_k, top_k
+
+    def get_buffer_metas(self, M: int, topk: int, activation: MoEActivation):
+        num_experts = self.num_experts
+        N = self.layer.intermediate_size_per_partition
+        K = self.layer.hidden_size
+        assert isinstance(num_experts, int)
+        assert isinstance(N, int)
+        assert isinstance(K, int)
+
+        # hidden_states
+        # (-> quanted_gate_up_input) (if not BF16/FP16 activation)
+        # -> gate_up_output
+        # -> activation_output
+        # (-> quanted_down_input) (if not BF16/FP16 activation)
+        # -> down_output
+        # (-> output) (if not is_batched)
+        # Neighboring nodes are required to utilize distinct workspaces.
+        # The output must be derived from workspace1.
+
+        output_shape: tuple[int, ...]
+        if self.is_batched():
+            max_num_tokens = self.max_num_tokens
+            num_dispatchers = self.num_dispatchers
+            assert max_num_tokens is not None and num_dispatchers is not None
+            input_shape_m = num_experts * max_num_tokens
+            real_shape_m = num_experts * max_num_tokens * num_dispatchers
+            output_shape = (num_experts, max_num_tokens * num_dispatchers, K)
+        else:
+            input_shape_m = M
+            if self.humming_gemm_type() != HummingGemmType.INDEXED:
+                input_shape_m = M * topk
+            real_shape_m = M * topk
+            output_shape = (M, K)
+
+        down_input_size = N if activation.is_gated else (N * 2)
+        a_dtype = self.layer.humming_metas["w13"].a_dtype
+        c_dtype = self.layer.humming_metas["w13"].c_dtype
+        num_bits = a_dtype.num_bits
+        torch_dtype_map = {
+            dtypes.float16: torch.float16,
+            dtypes.bfloat16: torch.bfloat16,
+            dtypes.float8e4m3: torch.float8_e4m3fn,
+            dtypes.int8: torch.int8,
+            dtypes.int4: torch.uint8,
+        }
+
+        buffer_metas = {
+            "quanted_gate_up_input": {
+                "shape": (input_shape_m, K),
+                "dtype": torch_dtype_map[a_dtype],
+            },
+            "gate_up_output": {
+                "shape": (real_shape_m, N * 2),
+                "dtype": torch_dtype_map[c_dtype],
+            },
+            "activation_output": {
+                "shape": (real_shape_m, down_input_size),
+                "dtype": torch_dtype_map[c_dtype],
+            },
+            "quanted_down_input": {
+                "shape": (real_shape_m, down_input_size),
+                "dtype": torch_dtype_map[a_dtype],
+            },
+            "down_output": {
+                "shape": output_shape if self.is_batched() else (real_shape_m, K),
+                "dtype": torch_dtype_map[c_dtype],
+            },
+            "output": {
+                "shape": output_shape,
+                "dtype": torch_dtype_map[c_dtype],
+            },
+        }
+
+        for key in buffer_metas:
+            meta = buffer_metas[key]
+            if "quanted" in key and a_dtype.num_bits == 4:
+                meta["shape"] = meta["shape"][:-1] + (meta["shape"][-1] // 2,)
+
+        if num_bits == 16:
+            required_buffers = ["gate_up_output", "activation_output", "down_output"]
+        else:
+            required_buffers = [
+                "quanted_gate_up_input",
+                "gate_up_output",
+                "activation_output",
+                "quanted_down_input",
+                "down_output",
+            ]
+
+        # batched moe use down_output as output
+        if not self.is_batched():
+            required_buffers.append("output")
+
+        return buffer_metas, required_buffers
+
+    def _workspace_shapes(self, M: int, topk: int, activation: MoEActivation):
+        buffer_metas, required_buffers = self.get_buffer_metas(M, topk, activation)
+
+        workspace1_nbytes = 0
+        workspace2_nbytes = 0
+
+        for index, name in enumerate(required_buffers[::-1]):
+            buffer_meta = buffer_metas[name]
+            nelement = math.prod(buffer_meta["shape"])
+            nbytes = nelement * buffer_meta["dtype"].itemsize
+            if index % 2 == 0:
+                workspace1_nbytes = max(workspace1_nbytes, nbytes)
+            else:
+                workspace2_nbytes = max(workspace2_nbytes, nbytes)
+
+        output_key = "down_output" if self.is_batched() else "output"
+        output_shape = buffer_metas[output_key]["shape"]
+
+        return (workspace1_nbytes // 2,), (workspace2_nbytes // 2,), output_shape
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        return self._workspace_shapes(M, topk, activation)
+
+    def make_workspaces(self, M: int, topk: int, activation: MoEActivation):
+        shapes = self._workspace_shapes(M, topk, activation)
+        workspace1_shape, workspace2_shape, output_shape = shapes
+        torch_dtype = self.layer.param_dtype
+        workspace1, workspace2 = current_workspace_manager().get_simultaneous(
+            (workspace1_shape, torch_dtype),
+            (workspace2_shape, torch_dtype),
+        )
+        output = _resize_cache(workspace1, output_shape)
+        return workspace1, workspace2, output
+
+    def prepare_buffers(
+        self,
+        workspace1: torch.Tensor,
+        workspace2: torch.Tensor,
+        M: int,
+        topk: int,
+        activation: MoEActivation,
+    ) -> dict[str, torch.Tensor]:
+        buffer_metas, required_buffers = self.get_buffer_metas(M, topk, activation)
+        buffers = {}
+        for index, name in enumerate(required_buffers[::-1]):
+            buffer_meta = buffer_metas[name]
+            workspace = workspace1 if index % 2 == 0 else workspace2
+            workspace = workspace.view(buffer_meta["dtype"])
+            buffers[name] = _resize_cache(workspace, buffer_meta["shape"])
+
+        return buffers
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        assert not apply_router_weight_on_input
+
+        self.main_apply(
+            hidden_states=hidden_states,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            workspace1=workspace13,
+            workspace2=workspace2,
+            expert_tokens_meta=expert_tokens_meta,
+        )
+
+    def main_apply(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        workspace1: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+    ):
+        raise NotImplementedError
+
+    @staticmethod
+    def is_supported_config(
+        cls: type[mk.FusedMoEExperts],
+        moe_config: FusedMoEConfig,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+        activation_format: mk.FusedMoEActivationFormat,
+    ) -> tuple[bool, str | None]:
+        if activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+            supported = cls.activation_format() == activation_format
+            reason = "activation_format mismatched"
+        elif activation_format == mk.FusedMoEActivationFormat.Standard:
+            if cls.activation_format() != mk.FusedMoEActivationFormat.Standard:
+                supported = False
+                reason = "activation_format mismatched"
+            else:
+                assert hasattr(cls, "humming_gemm_type")
+                gemm_type = cls.humming_gemm_type().value.lower()
+                preferred_gemm_type = get_humming_moe_gemm_type().lower()
+                supported = preferred_gemm_type == gemm_type
+                reason = "preferred gemm type mismatched"
+        else:
+            supported = False
+            reason = "unsupported activation_format"
+
+        return supported, None if supported else reason
+
+    def apply_activation(
+        self,
+        activation: MoEActivation,
+        output: torch.Tensor,
+        input: torch.Tensor,
+    ) -> None:
+        swiglu_limit = self.quant_config.gemm1_clamp_limit
+        if activation == MoEActivation.SILU and swiglu_limit is not None:
+            swiglu_limit_func(output=output, input=input, swiglu_limit=swiglu_limit)
+        else:
+            self.activation(activation=activation, input=input, output=output)
+
+
+class HummingIndexedExperts(HummingExpertsBase):
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceNoOP()
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def humming_gemm_type() -> HummingGemmType:
+        return HummingGemmType.INDEXED
+
+    def prepare_humming_moe_kwargs(
+        self,
+        topk_ids: torch.Tensor,
+        expert_map: torch.Tensor | None,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        valid_shape_m = self.estimate_local_valid_shape_m(topk_ids)
+
+        for min_shape_m, max_shape_m, config in self.w13_tuning_config:
+            if valid_shape_m > min_shape_m and valid_shape_m <= max_shape_m:
+                moe_block_size = config["block_shape"][0]
+                break
+        else:
+            raise ValueError(f"cannot found moe_block_size for shape {valid_shape_m}")
+
+        sorted_ids, expert_ids, num_tokens_padded = moe_align_block_size(
+            topk_ids=topk_ids,
+            block_size=moe_block_size,
+            num_experts=self.global_num_experts,
+            expert_map=expert_map,
+            ignore_invalid_experts=True,
+        )
+
+        moe_common_kwargs = {
+            "sorted_ids": sorted_ids,
+            "expert_ids": expert_ids,
+            "num_tokens_padded": num_tokens_padded,
+            "compute_config": self.compute_config_str,
+            "valid_shape_m": valid_shape_m,
+        }
+
+        top_k = topk_ids.size(1)
+        moe_kwargs1 = {"top_k": top_k, "tuning_config": self.w13_tuning_config_str}
+        moe_kwargs2 = {"top_k": 1, "tuning_config": self.w2_tuning_config_str}
+        moe_kwargs1.update(moe_common_kwargs)
+        moe_kwargs2.update(moe_common_kwargs)
+
+        return moe_kwargs1, moe_kwargs2
+
+    def main_apply(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        workspace1: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+    ):
+        hidden_states = hidden_states.view(-1, hidden_states.size(-1))
+        buffers = self.prepare_buffers(
+            workspace1,
+            workspace2,
+            topk_ids.size(0),
+            topk_ids.size(1),
+            self.layer.activation,
+        )
+
+        moe_kwargs1, moe_kwargs2 = self.prepare_humming_moe_kwargs(
+            topk_ids=topk_ids,
+            expert_map=self.layer.expert_map,
+            expert_tokens_meta=expert_tokens_meta,
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=hidden_states,
+            quanted_input=buffers.get("quanted_gate_up_input", None),
+            sublayer_name="w13",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["gate_up_output"],
+            sublayer_name="w13",
+            **moe_kwargs1,
+        )
+
+        self.apply_activation(
+            activation=self.layer.activation,
+            input=buffers["gate_up_output"],
+            output=buffers["activation_output"],
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=buffers["activation_output"],
+            quanted_input=buffers.get("quanted_down_input", None),
+            sublayer_name="w2",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["down_output"].view(-1, hidden_states.size(-1)),
+            sublayer_name="w2",
+            **moe_kwargs2,
+        )
+
+        moe_fused_mul_sum(
+            inputs=buffers["down_output"].view(*topk_ids.shape, -1),
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            expert_map=self.layer.expert_map,
+            outputs=buffers["output"],
+        )
+
+
+class HummingGroupedExperts(HummingExpertsBase):
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceNoOP()
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def humming_gemm_type() -> HummingGemmType:
+        return HummingGemmType.GROUPED_CONTIGUOUS
+
+    def main_apply(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        workspace1: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+    ):
+        valid_shape_m = self.estimate_local_valid_shape_m(topk_ids)
+
+        buffers = self.prepare_buffers(
+            workspace1,
+            workspace2,
+            topk_ids.size(0),
+            topk_ids.size(1),
+            self.layer.activation,
+        )
+
+        hidden_states, _, expert_first_token_offset, inv_perm, _ = moe_permute(
+            hidden_states=hidden_states,
+            a1q_scale=None,
+            topk_ids=topk_ids,
+            n_expert=self.global_num_experts,
+            n_local_expert=self.num_experts,
+            expert_map=self.layer.expert_map,
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=hidden_states,
+            quanted_input=buffers.get("quanted_gate_up_input", None),
+            sublayer_name="w13",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["gate_up_output"],
+            valid_shape_m=valid_shape_m,
+            expert_layout=expert_first_token_offset,
+            compute_config=self.compute_config_str,
+            tuning_config=self.w13_tuning_config_str,
+            sublayer_name="w13",
+        )
+
+        self.apply_activation(
+            activation=self.layer.activation,
+            input=buffers["gate_up_output"],
+            output=buffers["activation_output"],
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=buffers["activation_output"],
+            quanted_input=buffers.get("quanted_down_input", None),
+            sublayer_name="w2",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["down_output"],
+            valid_shape_m=valid_shape_m,
+            expert_layout=expert_first_token_offset,
+            compute_config=self.compute_config_str,
+            tuning_config=self.w2_tuning_config_str,
+            sublayer_name="w2",
+        )
+
+        moe_unpermute(
+            out=buffers["output"],
+            permuted_hidden_states=buffers["down_output"].view(*topk_ids.shape, -1),
+            topk_weights=topk_weights,
+            inv_permuted_idx=inv_perm,
+            expert_first_token_offset=expert_first_token_offset,
+        )
+
+
+class BatchedHummingGroupedExperts(HummingExpertsBase):
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceDelegate()
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.BatchedExperts
+
+    @staticmethod
+    def humming_gemm_type() -> HummingGemmType:
+        return HummingGemmType.GROUPED_MASKED
+
+    def main_apply(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        workspace1: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+    ):
+        assert expert_tokens_meta is not None
+        hidden_states = hidden_states.view(-1, hidden_states.size(-1))
+        valid_shape_m = self.estimate_local_valid_shape_m(topk_ids)
+        expert_num_tokens = expert_tokens_meta.expert_num_tokens
+
+        buffers = self.prepare_buffers(
+            workspace1,
+            workspace2,
+            topk_ids.size(0),
+            topk_ids.size(1),
+            self.layer.activation,
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=hidden_states,
+            quanted_input=buffers.get("quanted_gate_up_input", None),
+            sublayer_name="w13",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["gate_up_output"],
+            valid_shape_m=valid_shape_m,
+            expert_layout=expert_num_tokens,
+            compute_config=self.compute_config_str,
+            tuning_config=self.w13_tuning_config_str,
+            sublayer_name="w13",
+        )
+
+        self.apply_activation(
+            activation=self.layer.activation,
+            input=buffers["gate_up_output"],
+            output=buffers["activation_output"],
+        )
+
+        inputs, input_scale = HummingMethod.may_quant_input(
+            layer=self.layer,
+            inputs=buffers["activation_output"],
+            quanted_input=buffers.get("quanted_down_input", None),
+            sublayer_name="w2",
+        )
+
+        HummingMethod.forward_layer(
+            layer=self.layer,
+            inputs=inputs,
+            input_scale=input_scale,
+            outputs=buffers["down_output"].view(-1, hidden_states.size(-1)),
+            valid_shape_m=valid_shape_m,
+            expert_layout=expert_num_tokens,
+            compute_config=self.compute_config_str,
+            tuning_config=self.w2_tuning_config_str,
+            sublayer_name="w2",
+        )
diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py
similarity index 62%
rename from vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py
index e03ecd01ae79..98265abf7c8f 100644
--- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py
@@ -1,12 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import _custom_ops as ops
-from vllm._aiter_ops import rocm_aiter_ops
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
@@ -16,6 +14,9 @@
     FusedMoEQuantConfig,
     RoutingMethodType,
 )
+from vllm.model_executor.layers.fused_moe.experts.lora_experts_mixin import (
+    LoRAExpertsMixin,
+)
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceNoOP,
 )
@@ -28,8 +29,207 @@
 from vllm.triton_utils import tl, triton
 from vllm.utils.import_utils import has_triton_kernels
 
+from ..utils import swiglu_limit_func
+
 logger = init_logger(__name__)
 
+
+def _triton_kernel_moe_supports_current_device() -> bool:
+    # Shared device gate for the OAI Triton MoE expert classes.
+    # Platform-aware to avoid ROCm capability aliasing — cap (9, 0)
+    # matches both gfx90a (verified) and gfx906 (unverified), so we
+    # dispatch on gfx-string helpers instead of the cap tuple on ROCm.
+    p = current_platform
+    if p.is_cuda():
+        cap = p.get_device_capability()
+        # Keep the original `(9, 0) <= cap < (11, 0)` window on
+        # CUDA (covers Hopper SM90 and Blackwell SM100, excludes
+        # SM120) — this PR is ROCm-scoped and the broader CUDA
+        # range was not validated.
+        return cap is not None and (9, 0) <= (cap.major, cap.minor) < (11, 0)
+    if p.is_rocm():
+        from vllm.platforms.rocm import on_gfx1x, on_gfx9
+
+        # gfx9 family: gfx90a (MI200), gfx942/gfx950 (MI3xx);
+        # on_gfx9() already excludes gfx906/gfx908.
+        # gfx1x family: gfx11xx (RDNA3/3.5) and gfx12xx (RDNA4);
+        # on_gfx1x() excludes gfx10xx (RDNA1/RDNA2).
+        return on_gfx9() or on_gfx1x()
+    return False
+
+
+def _patch_make_bitmatrix_metadata() -> None:
+    """Monkey-patch make_bitmatrix_metadata to support non-power-of-2 top_k.
+
+    triton's tl.arange requires a power-of-2 range.  The original kernel
+    computes BLOCK_SIZE = BLOCK_PER_TOK * TOKS_PER_ROW (= 32 * top_k).  For
+    DeepSeek-V4 with top_k=6 this gives 192, which is not a power of 2 and
+    causes a compile error at the first forward pass.
+
+    Fix: define a drop-in replacement kernel that accepts an extra constexpr
+    BLOCK_SIZE_PADDED (next power of 2 >= BLOCK_SIZE) and uses it for the
+    tl.arange call while keeping the actual BLOCK_SIZE as the stride between
+    thread-blocks so that all flat indices into NonzeroIndx stay correct.
+    Elements beyond BLOCK_SIZE are masked out (col_indx = 0xffff) and ignored.
+
+    This function is called once at module load time and patches the function
+    inside the triton_kernels tensor module so that SparseMatrix.__post_init__
+    picks up the fixed version transparently.
+    """
+    import torch
+    import triton
+    import triton.language as tl
+
+    try:
+        if current_platform.is_rocm():
+            from triton_kernels.tensor_details import bitmatrix as _bm
+            from triton_kernels.tensor_details.bitmatrix import (
+                BitmatrixMetadata,
+                _keyed_add,
+                cdiv,
+            )
+            from triton_kernels.tensor_details.bitmatrix_details.sum_bitmatrix_rows import (  # noqa: E501
+                sum_bitmatrix_rows,
+            )
+        else:
+            from vllm.third_party.triton_kernels.tensor_details import (
+                bitmatrix as _bm,
+            )
+            from vllm.third_party.triton_kernels.tensor_details.bitmatrix import (
+                BitmatrixMetadata,
+                _keyed_add,
+                cdiv,
+            )
+            from vllm.third_party.triton_kernels.tensor_details.bitmatrix_details.sum_bitmatrix_rows import (  # noqa: E501
+                sum_bitmatrix_rows,
+            )
+    except ImportError:
+        return
+
+    @triton.jit
+    def _stage2_pow2(
+        ColSortedIndx,
+        RowSortedIndx,
+        NonzeroIndx,
+        n_tokens,
+        ColPartialSum,
+        stride_pm,
+        stride_pn,
+        ColOffs,
+        TOKS_PER_ROW: tl.constexpr,
+        BLOCK_PER_TOK: tl.constexpr,
+        BLOCK_SIZE_PADDED: tl.constexpr,
+    ):
+        # Actual number of elements per block (may not be a power of 2).
+        BLOCK_SIZE: tl.constexpr = BLOCK_PER_TOK * TOKS_PER_ROW
+        tl.static_assert(BLOCK_SIZE_PADDED <= 32768)
+        if isinstance(n_tokens, tl.tensor) and n_tokens.dtype.is_ptr():
+            n_tokens = tl.load(n_tokens)
+        nonzero_indx_size = n_tokens * TOKS_PER_ROW
+        pid_m = tl.program_id(0)
+        # Use BLOCK_SIZE_PADDED (a power of 2) for tl.arange, but stride by
+        # the actual BLOCK_SIZE so flat positions in NonzeroIndx are correct.
+        # Elements with offs_local >= BLOCK_SIZE have offs_global beyond the
+        # valid range, get col_indx = 0xffff, and are filtered by the mask
+        # below without producing any output.
+        offs_local = tl.arange(0, BLOCK_SIZE_PADDED)
+        offs_global = pid_m * BLOCK_SIZE + offs_local
+        mask = offs_global < nonzero_indx_size
+        col_indx = tl.load(NonzeroIndx + offs_global, mask=mask, other=-1).to(tl.uint32)
+        kv_pairs = ((col_indx << 16) | offs_local).to(tl.uint32)
+        kv_pairs = tl.sort(kv_pairs, 0)
+        col_indx = kv_pairs >> 16
+        offs_global = pid_m * BLOCK_SIZE + (kv_pairs & 0xFFFF)
+        mask = col_indx != 0xFFFF
+        x = kv_pairs & 0xFFFF0000 | 0x00000001
+        cols_and_inclusive_run_lengths = tl.associative_scan(x, 0, _keyed_add)
+        exclusive_run_lengths = (cols_and_inclusive_run_lengths - 1) & 0xFFFF
+        row_sorted_indx = tl.load(
+            ColPartialSum + pid_m * stride_pm + col_indx * stride_pn, mask=mask
+        )
+        row_sorted_indx += tl.load(ColOffs + col_indx, mask=mask)
+        row_sorted_indx += exclusive_run_lengths
+        tl.store(RowSortedIndx + offs_global, row_sorted_indx, mask=mask)
+        tl.store(ColSortedIndx + row_sorted_indx, offs_global, mask=mask)
+
+    def _make_bitmatrix_metadata_pow2_safe(nonzero_indx, bitmatrix):
+        assert nonzero_indx.ndim == 2
+        PARTIAL_BLOCK_M = 32
+        col_sum, col_partial_sum = sum_bitmatrix_rows(
+            bitmatrix, partials_block_size=PARTIAL_BLOCK_M
+        )
+        device = bitmatrix.device
+        n_indx = nonzero_indx.numel()
+        n_cols = bitmatrix.shape[1]
+        col_offs = torch.empty(n_cols, dtype=torch.int32, device=device)
+        combined_indx = torch.empty(n_indx * 2, dtype=torch.int32, device=device)
+        col_sorted_indx = combined_indx[:n_indx]
+        row_sorted_indx = combined_indx[n_indx:]
+        MEMSET_BLOCK = 1024
+        memset_grid = (cdiv(n_indx * 2, MEMSET_BLOCK) + n_cols + 1,)
+        _bm._bitmatrix_metadata_compute_stage1[memset_grid](
+            combined_indx,
+            n_indx * 2,
+            -1,
+            MEMSET_BLOCK,
+            col_sum,
+            col_offs,
+            col_sum.shape[0],
+            col_partial_sum,
+            col_partial_sum.shape[0],
+            col_partial_sum.stride(0),
+            col_partial_sum.stride(1),
+            BLOCK_M=512,
+            BLOCK_N=512,
+        )
+        toks_per_row = nonzero_indx.shape[-1]
+        block_size = PARTIAL_BLOCK_M * toks_per_row
+        # Next power of 2 >= block_size (required by tl.arange).
+        block_size_padded = 1 << (max(block_size, 1) - 1).bit_length()
+        compute_grid = (cdiv(bitmatrix.shape_max[0], PARTIAL_BLOCK_M),)
+        _stage2_pow2[compute_grid](
+            col_sorted_indx,
+            row_sorted_indx,
+            nonzero_indx,
+            bitmatrix.shape[0],
+            col_partial_sum,
+            col_partial_sum.stride(0),
+            col_partial_sum.stride(1),
+            col_offs,
+            TOKS_PER_ROW=toks_per_row,
+            BLOCK_PER_TOK=PARTIAL_BLOCK_M,
+            BLOCK_SIZE_PADDED=block_size_padded,
+        )
+        return BitmatrixMetadata(
+            col_sum=col_sum,
+            col_sorted_indx=col_sorted_indx,
+            row_sorted_indx=row_sorted_indx,
+        )
+
+    # The most reliable patch point: SparseMatrix.__post_init__ looks up
+    # make_bitmatrix_metadata via its own __globals__ dict (the tensor.py
+    # module dict).  Patching through __globals__ works regardless of how
+    # sys.modules maps "triton_kernels.tensor" vs
+    # "vllm.third_party.triton_kernels.tensor".
+    from triton_kernels.tensor import SparseMatrix as _SparseMatrix
+
+    _SparseMatrix.__post_init__.__globals__["make_bitmatrix_metadata"] = (
+        _make_bitmatrix_metadata_pow2_safe
+    )
+    # Also patch the bitmatrix module itself in case it is imported directly.
+    _bm.make_bitmatrix_metadata = _make_bitmatrix_metadata_pow2_safe
+
+
+# Two API generations of triton_kernels are supported:
+#   - v3.5.1 (the version bundled with vLLM): exposes `routing()` and
+#     `routing_from_bitmatrix()` in triton_kernels.routing; the `Bitmatrix`
+#     constructor takes a `scratchpad` argument.
+#   - v3.6.0+: removes the `routing` module in favor of a `SparseMatrix`
+#     based path, and adds a `dtype=BIT` kwarg to `Bitmatrix`. Used only
+#     when the user has triton_kernels installed system-wide at v3.6.0+.
+#
+# `use_legacy_triton_kernels` selects between them at import time based on
+# whether `SparseMatrix` is importable.
 use_legacy_triton_kernels = False
 
 if has_triton_kernels():
@@ -47,7 +247,6 @@
             BIT,
             Bitmatrix,
         )
-        from triton_kernels.topk import topk
 
         try:
             from triton_kernels.tensor import (
@@ -55,11 +254,12 @@
                 make_ragged_tensor_metadata,
             )
         except ImportError:
-            if current_platform.is_rocm():
-                logger.warning_once("Using legacy triton_kernels on ROCm")
-                use_legacy_triton_kernels = True
-            else:
-                raise
+            # TODO(mgoin): drop the v3.5.1 pin and remove this fallback once
+            # the gpt-oss perf regression in v3.6.0+ is resolved upstream.
+            # Tracking: https://github.com/triton-lang/triton/issues/9969
+            use_legacy_triton_kernels = True
+        if not use_legacy_triton_kernels:
+            _patch_make_bitmatrix_metadata()
     except (AttributeError, ImportError) as e:
         logger.error(
             "Failed to import Triton kernels. Please make sure your triton "
@@ -89,6 +289,7 @@ def pack_bitmatrix(
     offsets = offsets_m[:, None] * n_expts_act + offsets_k[None, :]
     mask = (offsets_m < n_rows)[:, None] & (offsets_k < n_expts_act)[None, :]
     indices = tl.load(topk_ids + offsets, mask=mask, other=-1)
+    valid = indices >= 0
     div = indices // 32
     rem = indices % 32
     one = tl.cast(1, tl.uint32)
@@ -99,8 +300,13 @@ def pack_bitmatrix(
         offs = tl.arange(0, BLOCK_SIZE_K // 32) + i * (BLOCK_SIZE_K // 32)
         # All topks that need to go into this column has the correct bit set.
         # Other bits are 0. x is a 2D tensor.
+        # Guard with `valid` to prevent negative indices from producing
+        # spurious bits (on HIP, -1 // 32 == 0 and 1 << (-1 % 32) sets
+        # bit 31).
         x = tl.where(
-            div[:, :, None] == offs[None, None, :], (one << rem)[:, :, None], 0
+            valid[:, :, None] & (div[:, :, None] == offs[None, None, :]),
+            (one << rem)[:, :, None],
+            0,
         )
         # Reduce x to get a single int32_t bitpack.
         y = tl.reduce_or(x, axis=1)
@@ -108,93 +314,6 @@ def pack_bitmatrix(
         tl.store(bitmatrix_ptrs, y, mask=offsets_m[:, None] < n_rows)
 
 
-def legacy_routing_from_bitmatrix(
-    bitmatrix: "Bitmatrix",
-    expt_scal: torch.Tensor,
-    expt_indx: torch.Tensor,
-    n_expts_tot: int,
-    n_expts_act: int,
-) -> tuple["RoutingData", "GatherIndx", "ScatterIndx"]:
-    """
-    Replacement for the removed triton_kernels.routing.routing_from_bitmatrix.
-    Creates routing data from a bitmatrix representation.
-    """
-    if use_legacy_triton_kernels:
-        from triton_kernels.routing import routing_from_bitmatrix
-
-        return routing_from_bitmatrix(
-            bitmatrix, expt_scal, expt_indx, n_expts_tot, n_expts_act
-        )
-    sparse_logits = SparseMatrix(indx=expt_indx, vals=expt_scal, mask=bitmatrix)
-    dispatch_indx = sparse_logits.mask_metadata.row_sorted_indx
-    combine_indx = sparse_logits.mask_metadata.col_sorted_indx
-    ragged_batch_metadata = make_ragged_tensor_metadata(
-        sparse_logits.mask_metadata.col_sum,
-        dispatch_indx.shape[0],
-    )
-    gate_scal = sparse_logits.vals.flatten()[combine_indx]
-    routing_data = RoutingData(
-        gate_scal,
-        ragged_batch_metadata.block_sizes,
-        n_expts_tot,
-        n_expts_act,
-        ragged_batch_metadata,
-    )
-    gather_idx = GatherIndx(combine_indx, dispatch_indx)
-    scatter_idx = ScatterIndx(dispatch_indx, combine_indx)
-    return routing_data, gather_idx, scatter_idx
-
-
-def legacy_routing_from_sparsematrix(
-    sparse_logits: "SparseMatrix",
-    n_expts_tot: int,
-    n_expts_act: int,
-) -> tuple["RoutingData", "GatherIndx", "ScatterIndx"]:
-    """
-    Creates routing data from a SparseMatrix representation.
-    """
-    dispatch_indx = sparse_logits.mask_metadata.row_sorted_indx
-    combine_indx = sparse_logits.mask_metadata.col_sorted_indx
-    ragged_batch_metadata = make_ragged_tensor_metadata(
-        sparse_logits.mask_metadata.col_sum,
-        dispatch_indx.shape[0],
-    )
-    gate_scal = sparse_logits.vals.flatten()[combine_indx]
-    routing_data = RoutingData(
-        gate_scal,
-        ragged_batch_metadata.block_sizes,
-        n_expts_tot,
-        n_expts_act,
-        ragged_batch_metadata,
-    )
-    gather_idx = GatherIndx(combine_indx, dispatch_indx)
-    scatter_idx = ScatterIndx(dispatch_indx, combine_indx)
-    return routing_data, gather_idx, scatter_idx
-
-
-def legacy_routing(
-    logits: torch.Tensor,
-    n_expts_act: int,
-    sm_first: bool = False,
-) -> tuple["RoutingData", "GatherIndx", "ScatterIndx"]:
-    """
-    Replacement for the removed triton_kernels.routing.routing function.
-    Computes routing data from gating logits.
-    """
-    if use_legacy_triton_kernels:
-        from triton_kernels.routing import routing
-
-        return routing(logits, n_expts_act, sm_first=sm_first)
-    if sm_first:
-        logits = torch.softmax(logits, dim=-1)
-    sparse_logits = topk(logits, n_expts_act, apply_softmax=not sm_first)
-    return legacy_routing_from_sparsematrix(
-        sparse_logits,
-        logits.shape[-1],
-        n_expts_act,
-    )
-
-
 def triton_kernel_moe_forward(
     hidden_states: torch.Tensor,
     w1,  # Tensor or triton_kernels.Tensor
@@ -212,44 +331,25 @@ def triton_kernel_moe_forward(
     unpadded_N_w2=None,
     unpadded_K_w2=None,
 ) -> torch.Tensor:
-    if (
-        quant_config is not None
-        and quant_config.use_mxfp4_w4a8
-        and rocm_aiter_ops.is_enabled()
-    ):
-        from aiter.ops.triton.moe_routing.routing import routing as aiter_routing
-
-        routing_data, gather_idx, scatter_idx = aiter_routing(
-            gating_output, topk, sm_first=not renormalize
-        )
-        return triton_kernel_fused_mxfp4_w4a8_experts(
-            None,
-            hidden_states,
-            w1,
-            w2,
-            routing_data,
-            gather_idx,
-            scatter_idx,
-            activation=activation.value,
-            quant_config=quant_config,
-            apply_router_weight_on_input=apply_router_weight_on_input,
-            global_num_experts=global_num_experts,
-            expert_map=expert_map,
-            unpadded_N_w1=unpadded_N_w1,
-            unpadded_K_w1=unpadded_K_w1,
-            unpadded_N_w2=unpadded_N_w2,
-            unpadded_K_w2=unpadded_K_w2,
+    sm_first = not renormalize
+
+    # When no expert map is provided (no EP), call the fused `routing()`
+    # kernel directly. It combines softmax, topk, bitmatrix packing, and
+    # routing-metadata construction in a single launch, instead of the
+    # three separate kernels used by the generic path below.
+    # Only available in the legacy (v3.5.1) API; the v3.6.0+ path inlines
+    # equivalent logic via SparseMatrix in `make_routing_data`.
+    if use_legacy_triton_kernels and expert_map is None:
+        from triton_kernels.routing import routing as fused_routing
+
+        routing_data, gather_idx, scatter_idx = fused_routing(
+            gating_output, topk, sm_first=sm_first
         )
-
-    if expert_map is not None:
-        # With expert parallelism, legacy_routing produces routing data
-        # using global expert IDs which don't correspond to local weight
-        # indices.  Split the routing into topk selection + expert_map
-        # remapping + local routing data construction (matching the
-        # approach used by OAITritonExperts.apply).
+        effective_expert_map = None
+        effective_global_num_experts = global_num_experts
+    else:
         from triton_kernels.topk import topk as topk_fn
 
-        sm_first = not renormalize
         logits = gating_output
         if sm_first:
             logits = torch.softmax(logits, dim=-1)
@@ -261,21 +361,24 @@ def triton_kernel_moe_forward(
         else:
             topk_weights = topk_result.vals
             topk_ids_raw = topk_result.indx
-        # topk_ids_raw contains global expert IDs - remap to local.
-        topk_ids = expert_map[topk_ids_raw.to(torch.long)]
-        local_num_experts = w1.shape[0]
-        routing_data, gather_idx, scatter_idx = make_routing_data(
-            topk_ids, topk_weights, local_num_experts
-        )
-        # expert_map already applied; pass None downstream.
-        effective_expert_map = None
-        effective_global_num_experts = local_num_experts
-    else:
-        routing_data, gather_idx, scatter_idx = legacy_routing(
-            gating_output, topk, sm_first=not renormalize
-        )
-        effective_expert_map = expert_map
-        effective_global_num_experts = global_num_experts
+
+        if expert_map is not None:
+            # topk_ids_raw contains global expert IDs - remap to local.
+            topk_ids = expert_map[topk_ids_raw.to(torch.long)]
+            local_num_experts = w1.shape[0]
+            routing_data, gather_idx, scatter_idx = make_routing_data(
+                topk_ids, topk_weights, local_num_experts
+            )
+            # expert_map already applied; pass None downstream.
+            effective_expert_map = None
+            effective_global_num_experts = local_num_experts
+        else:
+            topk_ids = topk_ids_raw.to(torch.long)
+            routing_data, gather_idx, scatter_idx = make_routing_data(
+                topk_ids, topk_weights, gating_output.shape[-1]
+            )
+            effective_expert_map = expert_map
+            effective_global_num_experts = global_num_experts
 
     output = torch.empty_like(hidden_states)
     effective_quant_config = (
@@ -400,99 +503,6 @@ def triton_kernel_fused_experts(
     return output_tensor
 
 
-# This is a triton implementation of the fused_experts function
-def triton_kernel_fused_mxfp4_w4a8_experts(
-    output_tensor: torch.Tensor,
-    hidden_states: torch.Tensor,
-    w1,  # Tensor or triton_kernels.Tensor
-    w2,  # Tensor or triton_kernels.Tensor
-    routing_data,  # RoutingData
-    gather_indx,  # GatherIndx
-    scatter_indx,  # ScatterIndx
-    activation: str = "silu",
-    quant_config: FusedMoEQuantConfig | None = None,
-    swiglu_alpha: float = 1.702,
-    swiglu_limit: float = 7.0,
-    apply_router_weight_on_input: bool = False,
-    global_num_experts: int = -1,
-    expert_map: torch.Tensor | None = None,
-    a1q_scale: torch.Tensor | None = None,
-    unpadded_N_w1=None,
-    unpadded_K_w1=None,
-    unpadded_N_w2=None,
-    unpadded_K_w2=None,
-) -> torch.Tensor:
-    assert quant_config is not None
-    # type check, uint8 means mxfp4
-    assert hidden_states.dtype == torch.bfloat16
-    assert quant_config.w1_bias is None or quant_config.w1_bias.dtype == torch.float32
-    assert quant_config.w2_bias is None or quant_config.w2_bias.dtype == torch.float32
-
-    # Shape check: weights are padded (e.g. hidden_size padded for
-    # GFX950 swizzle).
-    assert hidden_states.shape[-1] == w1.shape[-2]
-    assert w2.shape[-1] == w1.shape[1]
-
-    E, _, N = w1.shape
-
-    if global_num_experts == -1:
-        global_num_experts = E
-
-    gammas = routing_data.gate_scal if routing_data else None
-
-    from aiter.ops.triton.moe_op_gemm_a8w4 import moe_gemm_a8w4
-    from aiter.ops.triton.quant_moe import downcast_to_static_fp8
-
-    assert quant_config.w1_precision is not None, (
-        "w1_precision in quant config can't be None"
-    )
-    assert quant_config.w2_precision is not None, (
-        "w2_precision in quant config can't be None"
-    )
-
-    hidden_states = downcast_to_static_fp8(
-        hidden_states, quant_config.w1_precision.flex_ctx.lhs_data.scale
-    )
-
-    intermediate_cache1 = moe_gemm_a8w4(
-        hidden_states,
-        w1.storage.data,
-        None,
-        quant_config.w1_precision.weight_scale.storage.data,
-        quant_config.w1_precision.flex_ctx.lhs_data.scale,
-        quant_config.w2_precision.flex_ctx.lhs_data.scale,
-        quant_config.w1_bias,
-        routing_data,
-        gather_indx=gather_indx,
-        gammas=gammas if apply_router_weight_on_input else None,
-        swizzle_mx_scale="CDNA4_SCALE",
-        out_dtype=torch.float8_e4m3fn,
-        apply_swiglu=True,
-        alpha=swiglu_alpha,
-        limit=swiglu_limit,
-        unpadded_N=unpadded_N_w1,
-        unpadded_K=unpadded_K_w1,
-    )
-
-    intermediate_cache3 = moe_gemm_a8w4(
-        intermediate_cache1,
-        w2.storage.data,
-        None,
-        quant_config.w2_precision.weight_scale.storage.data,
-        quant_config.w2_precision.flex_ctx.lhs_data.scale,
-        None,
-        quant_config.w2_bias,
-        routing_data,
-        scatter_indx=scatter_indx,
-        gammas=None if apply_router_weight_on_input else gammas,
-        swizzle_mx_scale="CDNA4_SCALE",
-        unpadded_N=unpadded_N_w2,
-        unpadded_K=unpadded_K_w2,
-    )
-
-    return intermediate_cache3
-
-
 def make_routing_data(
     topk_ids: torch.Tensor,
     topk_weights: torch.Tensor,
@@ -539,10 +549,31 @@ def make_routing_data(
 
     # matmul_ogs expects invalid topk_weights to be -1s
     topk_weights = torch.where(topk_ids == -1, -1.0, topk_weights)
-    routing_data, gather_indx, scatter_indx = legacy_routing_from_bitmatrix(
-        bitmatrix, topk_weights, topk_ids, num_local_experts, num_topk
-    )
 
+    if use_legacy_triton_kernels:
+        from triton_kernels.routing import routing_from_bitmatrix
+
+        return routing_from_bitmatrix(
+            bitmatrix, topk_weights, topk_ids, num_local_experts, num_topk
+        )
+
+    sparse_logits = SparseMatrix(indx=topk_ids, vals=topk_weights, mask=bitmatrix)
+    dispatch_indx = sparse_logits.mask_metadata.row_sorted_indx
+    combine_indx = sparse_logits.mask_metadata.col_sorted_indx
+    ragged_batch_metadata = make_ragged_tensor_metadata(
+        sparse_logits.mask_metadata.col_sum,
+        dispatch_indx.shape[0],
+    )
+    gate_scal = sparse_logits.vals.flatten()[combine_indx]
+    routing_data = RoutingData(
+        gate_scal,
+        ragged_batch_metadata.block_sizes,
+        num_local_experts,
+        num_topk,
+        ragged_batch_metadata,
+    )
+    gather_indx = GatherIndx(combine_indx, dispatch_indx)
+    scatter_indx = ScatterIndx(dispatch_indx, combine_indx)
     return routing_data, gather_indx, scatter_indx
 
 
@@ -553,15 +584,7 @@ def expects_unquantized_inputs(self) -> bool:
 
     @staticmethod
     def _supports_current_device() -> bool:
-        p = current_platform
-        if not p.is_cuda_alike():
-            return False
-        cap = p.get_device_capability()
-        if cap is None:
-            return False
-        # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell)
-        # and ROCm gfx942/gfx950 (which map to 9.4/9.5).
-        return (9, 0) <= (cap.major, cap.minor) < (11, 0)
+        return _triton_kernel_moe_supports_current_device() and has_triton_kernels()
 
     @staticmethod
     def _supports_no_act_and_mul() -> bool:
@@ -718,7 +741,7 @@ def apply(
         )
 
 
-class UnfusedOAITritonExperts(BaseOAITritonExperts):
+class UnfusedOAITritonExperts(LoRAExpertsMixin, BaseOAITritonExperts):
     """
     A Triton based MoE expert class that operates on expert standard
     format and explicitly keeps the activation and reduction (moe_sum) steps
@@ -762,6 +785,37 @@ def workspace_shapes(
     def moe_sum(self, input: torch.Tensor, output: torch.Tensor):
         ops.moe_sum(input, output)
 
+    def activation(
+        self,
+        activation: MoEActivation,
+        output: torch.Tensor,
+        input: torch.Tensor,
+    ) -> None:
+        quant_config = self.quant_config or FUSED_MOE_UNQUANTIZED_CONFIG
+        if activation == MoEActivation.SWIGLUOAI:
+            alpha = (
+                quant_config.gemm1_alpha
+                if quant_config.gemm1_alpha is not None
+                else 1.702
+            )
+            limit = (
+                quant_config.gemm1_clamp_limit
+                if quant_config.gemm1_clamp_limit is not None
+                else 7.0
+            )
+            torch.ops._C.swigluoai_and_mul(output, input, alpha, limit)
+        elif (
+            activation == MoEActivation.SILU
+            and quant_config.gemm1_clamp_limit is not None
+        ):
+            swiglu_limit_func(
+                output,
+                input,
+                quant_config.gemm1_clamp_limit,
+            )
+        else:
+            super().activation(activation, output, input)
+
     def apply(
         self,
         output: torch.Tensor,
@@ -785,6 +839,7 @@ def apply(
         if quant_config is None:
             quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
 
+        global_topk_ids = topk_ids
         if expert_map is not None:
             topk_ids = expert_map[topk_ids]
 
@@ -839,15 +894,45 @@ def apply(
             y=intermediate_cache1,
         )
 
+        # w13 LoRA: gather the activation input from expert-sorted
+        # intermediate_cache1, then add the LoRA delta in-place on that copy
+        # before passing it to activation — exactly mirroring the old
+        # decorator approach which modified the gathered tensor in-place.
+        act_input = intermediate_cache1.view(-1, N)[gather_indx.dst_indx]
+
+        sorted_token_ids_lora = None
+        expert_ids_lora = None
+        num_tokens_post_padded_lora = None
+        token_lora_mapping = None
+        lora_context = self._lora_context
+        if lora_context is not None:
+            (
+                sorted_token_ids_lora,
+                expert_ids_lora,
+                num_tokens_post_padded_lora,
+                token_lora_mapping,
+            ) = self.apply_w13_lora(
+                lora_context,
+                y=act_input,
+                x=hidden_states,
+                topk_ids=global_topk_ids,
+                topk_weights=topk_weights,
+                expert_map=expert_map,
+                w1=w1,
+                w2=w2,
+                num_tokens=M,
+                top_k_num=topk,
+            )
+
         self.activation(
             activation,
             intermediate_cache2,
-            intermediate_cache1.view(-1, N)[gather_indx.dst_indx],
+            act_input,
         )
 
-        # matmul_ogs grouped reduction fuse sum across multiple experts:
+        # matmul_ogs grouped reduction fuses sum across multiple experts:
         # y[dst_indx // n_expts_act, :] += x
-        # Need to set n_expts_act to 1 to unfuse moe_sum
+        # Set n_expts_act to 1 to unfuse the sum so we can do it manually via moe_sum.
         routing_data.n_expts_act = 1
 
         matmul_ogs(
@@ -861,6 +946,24 @@ def apply(
             y=intermediate_cache3,
         )
 
+        # w2 LoRA: after matmul_ogs with scatter_indx, intermediate_cache3 is
+        # in token-topk order, matching the (M, topk, K) layout add_lora_w2 expects.
+        if lora_context is not None:
+            self.apply_w2_lora(
+                lora_context,
+                y=intermediate_cache3.view(-1, topk, K),
+                x=intermediate_cache2,
+                topk_weights=topk_weights,
+                sorted_token_ids_lora=sorted_token_ids_lora,
+                expert_ids_lora=expert_ids_lora,
+                num_tokens_post_padded_lora=num_tokens_post_padded_lora,
+                token_lora_mapping=token_lora_mapping,
+                num_tokens=M,
+                w1=w1,
+                w2=w2,
+                top_k_num=topk,
+            )
+
         self.moe_sum(intermediate_cache3.view(-1, topk, K), output)
 
 
@@ -885,15 +988,7 @@ def activation_format() -> mk.FusedMoEActivationFormat:
 
     @staticmethod
     def _supports_current_device() -> bool:
-        p = current_platform
-        if not p.is_cuda_alike():
-            return False
-        cap = p.get_device_capability()
-        if cap is None:
-            return False
-        # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell)
-        # and ROCm gfx942/gfx950 (which map to 9.4/9.5).
-        return (9, 0) <= (cap.major, cap.minor) < (11, 0)
+        return _triton_kernel_moe_supports_current_device() and has_triton_kernels()
 
     @staticmethod
     def _supports_no_act_and_mul() -> bool:
diff --git a/vllm/model_executor/layers/fused_moe/experts/lora_context.py b/vllm/model_executor/layers/fused_moe/experts/lora_context.py
new file mode 100644
index 000000000000..ab1f0bfc1476
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/lora_context.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import dataclass
+
+import torch
+
+from vllm.lora.punica_wrapper.punica_base import PunicaWrapperBase
+
+
+@dataclass
+class MoELoRAContext:
+    """
+    Carries all LoRA state for one MoE forward pass.
+
+    Built by FusedMoEWithLoRA.forward() and propagated explicitly through the
+    modular kernel path (FusedMoEKernel -> FusedMoEExpertsModular.apply) so
+    that TritonExperts.apply() can compute the LoRA contribution inline,
+    replacing the decorator-based monkey-patch approach.
+    """
+
+    # LoRA weight tensors (same shapes as FusedMoEWithLoRA attributes)
+    w13_lora_a_stacked: tuple[torch.Tensor, ...]
+    w13_lora_b_stacked: tuple[torch.Tensor, ...]
+    w2_lora_a_stacked: tuple[torch.Tensor, ...]
+    w2_lora_b_stacked: tuple[torch.Tensor, ...]
+
+    # (max_loras + 1,) int32; slot 0 is the "no-adapter" sentinel
+    adapter_enabled: torch.Tensor
+
+    # Metadata
+    max_loras: int
+    top_k: int
+    w13_num_slices: int  # 2 = gated (gate + up), 1 = non-gated or 3D-fused
+    fully_sharded: bool
+    tp_rank: int
+    tp_size: int
+    local_num_experts: int
+
+    punica_wrapper: PunicaWrapperBase
+
+    # Whether VLLM_TUNED_CONFIG_FOLDER is set; selects get_lora_op_configs vs
+    # try_get_optimal_moe_lora_config for Triton kernel tile configs.
+    use_tuned_config: bool
+
+    # Per-rank token→LoRA mapping after EP dispatch. Set by
+    # FusedMoEPrepareAndFinalizeModular.prepare() when EP+LoRA is active, read
+    # by LoRAExpertsMixin helpers in place of punica_wrapper's global mapping.
+    # None means no dispatch happened (non-EP path), in which case callers
+    # fall back to punica_wrapper.token_mapping_meta.
+    local_token_lora_mapping: torch.Tensor | None = None
diff --git a/vllm/model_executor/layers/fused_moe/experts/lora_experts_mixin.py b/vllm/model_executor/layers/fused_moe/experts/lora_experts_mixin.py
new file mode 100644
index 000000000000..2a680909d5f6
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/lora_experts_mixin.py
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.model_executor.layers.fused_moe.experts.lora_context import MoELoRAContext
+
+
+class LoRAExpertsMixin:
+    """
+    Mixin for FusedMoEExpertsModular subclasses that natively handle
+    MoELoRAContext inside their apply() implementation.
+
+    Mixing this class in:
+    - Flips supports_lora() to True so _can_fused_experts_support lets
+      LoRA through the gate check.
+    - Stashes a MoELoRAContext on the experts instance via
+      set_lora_context(), which apply() consumes from self._lora_context.
+    - Provides apply_w13_lora / apply_w2_lora helpers that dispatch to
+      the PunicaWrapper kernels.
+
+    The helper methods are pure functions of their inputs; all required
+    state is on lora_context or passed as arguments.
+    """
+
+    _lora_context: MoELoRAContext | None = None
+
+    def set_lora_context(self, ctx: MoELoRAContext) -> None:
+        self._lora_context = ctx
+
+    @staticmethod
+    def supports_lora() -> bool:
+        return True
+
+    def apply_w13_lora(
+        self,
+        lora_context: MoELoRAContext,
+        *,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        topk_ids: torch.Tensor,
+        topk_weights: torch.Tensor,
+        expert_map: torch.Tensor | None,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        num_tokens: int,
+        top_k_num: int,
+    ) -> tuple[
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+        torch.Tensor | None,
+    ]:
+        return lora_context.punica_wrapper.add_lora_w13(
+            y,
+            x,
+            lora_context.w13_lora_a_stacked,
+            lora_context.w13_lora_b_stacked,
+            topk_ids,
+            topk_weights,
+            expert_map,
+            w1,
+            w2,
+            num_tokens,
+            top_k_num,
+            lora_context.max_loras,
+            lora_context.adapter_enabled,
+            lora_context.local_num_experts,
+            lora_context.top_k,
+            lora_context.w13_num_slices,
+            lora_context.fully_sharded,
+            lora_context.use_tuned_config,
+            token_lora_mapping=lora_context.local_token_lora_mapping,
+        )
+
+    def apply_w2_lora(
+        self,
+        lora_context: MoELoRAContext,
+        *,
+        y: torch.Tensor,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        sorted_token_ids_lora: torch.Tensor | None,
+        expert_ids_lora: torch.Tensor | None,
+        num_tokens_post_padded_lora: torch.Tensor | None,
+        token_lora_mapping: torch.Tensor | None,
+        num_tokens: int,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        top_k_num: int,
+    ) -> None:
+        lora_context.punica_wrapper.add_lora_w2(
+            y,
+            x,
+            lora_context.w2_lora_a_stacked,
+            lora_context.w2_lora_b_stacked,
+            topk_weights,
+            sorted_token_ids_lora,
+            expert_ids_lora,
+            num_tokens_post_padded_lora,
+            token_lora_mapping,
+            num_tokens,
+            w1,
+            w2,
+            top_k_num,
+            lora_context.max_loras,
+            lora_context.adapter_enabled,
+            lora_context.top_k,
+            lora_context.fully_sharded,
+            lora_context.tp_rank,
+            lora_context.use_tuned_config,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py b/vllm/model_executor/layers/fused_moe/experts/marlin_moe.py
similarity index 82%
rename from vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/marlin_moe.py
index 136a8188d6a0..d2d4444d8cae 100644
--- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/marlin_moe.py
@@ -17,6 +17,9 @@
     FusedMoEParallelConfig,
     FusedMoEQuantConfig,
 )
+from vllm.model_executor.layers.fused_moe.experts.lora_experts_mixin import (
+    LoRAExpertsMixin,
+)
 from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
     batched_moe_align_block_size,
     moe_align_block_size,
@@ -28,6 +31,7 @@
 from vllm.model_executor.layers.fused_moe.utils import (
     _resize_cache,
     disable_inplace,
+    swiglu_limit_func,
 )
 from vllm.model_executor.layers.quantization.utils.marlin_utils import (
     get_marlin_input_dtype,
@@ -40,7 +44,10 @@
     kFp8Static128BlockSym,
     kFp8StaticChannelSym,
     kFp8StaticTensorSym,
+    kInt4Static,
+    kInt8Static,
     kMxfp4Static,
+    kMxfp8Static,
     kNvfp4Static,
 )
 from vllm.platforms import current_platform
@@ -84,6 +91,7 @@ def _fused_marlin_moe(
     output: torch.Tensor | None = None,
     input_dtype: torch.dtype | None = None,
     is_k_full: bool = True,
+    clamp_limit: float | None = None,
 ) -> torch.Tensor:
     assert hidden_states.ndim == 2
     M, K = hidden_states.size()
@@ -151,11 +159,18 @@ def _fused_marlin_moe(
         use_fp32_reduce=True,
         is_zp_float=False,
     )
-    activation_func(
-        activation,
-        intermediate_cache2,
-        intermediate_cache1.view(-1, w13_num_shards * N),
-    )
+    if clamp_limit is not None and activation == MoEActivation.SILU:
+        swiglu_limit_func(
+            intermediate_cache2,
+            intermediate_cache1.view(-1, w13_num_shards * N),
+            clamp_limit,
+        )
+    else:
+        activation_func(
+            activation,
+            intermediate_cache2,
+            intermediate_cache1.view(-1, w13_num_shards * N),
+        )
 
     if output is None:
         output = intermediate_cache3
@@ -243,6 +258,7 @@ def fused_marlin_moe(
     output: torch.Tensor | None = None,
     input_dtype: torch.dtype | None = None,
     inplace: bool = False,
+    clamp_limit: float | None = None,
 ) -> torch.Tensor:
     """
     This function computes a Mixture of Experts (MoE) layer using two sets of
@@ -359,6 +375,7 @@ def fused_marlin_moe(
         output=None,
         input_dtype=input_dtype,
         is_k_full=is_k_full,
+        clamp_limit=clamp_limit,
     ).view(-1, topk, K)
 
     if output is None:
@@ -384,6 +401,8 @@ def batched_fused_marlin_moe(
     global_num_experts: int = -1,
     activation: MoEActivation = MoEActivation.SILU,
     expert_map: torch.Tensor | None = None,
+    input_global_scale1: torch.Tensor | None = None,
+    input_global_scale2: torch.Tensor | None = None,
     global_scale1: torch.Tensor | None = None,
     global_scale2: torch.Tensor | None = None,
     g_idx1: torch.Tensor | None = None,
@@ -397,7 +416,9 @@ def batched_fused_marlin_moe(
     intermediate_cache2: torch.Tensor | None = None,
     is_k_full: bool = True,
     output: torch.Tensor | None = None,
+    input_dtype: torch.dtype | None = None,
     inplace: bool = False,
+    clamp_limit: float | None = None,
 ) -> torch.Tensor:
     """
     This function massages the inputs so the batched hidden_states can be
@@ -471,7 +492,15 @@ def batched_fused_marlin_moe(
     topk = 1
 
     # TODO(varun) : Choose a decent block size like in fused_marlin_moe
+    # Tune block_size_m based on expert capacity to reduce padding overhead.
     block_size_m = 64
+    for b_m in [8, 16, 32, 48, 64]:
+        if BATCH_TOKENS_MAX / b_m < 0.9:
+            block_size_m = b_m
+            break
+
+    if input_dtype is not None and input_dtype.itemsize == 1:
+        block_size_m = max(block_size_m, 16)
 
     sorted_token_ids, expert_ids, num_tokens_post_padded = batched_moe_align_block_size(
         max_tokens_per_batch=BATCH_TOKENS_MAX,
@@ -507,6 +536,8 @@ def batched_fused_marlin_moe(
         sorted_token_ids=sorted_token_ids,
         expert_ids=expert_ids,
         num_tokens_post_padded=num_tokens_post_padded,
+        input_global_scale1=input_global_scale1,
+        input_global_scale2=input_global_scale2,
         global_scale1=global_scale1,
         global_scale2=global_scale2,
         g_idx1=g_idx1,
@@ -519,7 +550,9 @@ def batched_fused_marlin_moe(
         intermediate_cache13=intermediate_cache13,
         intermediate_cache2=intermediate_cache2,
         output=output.view(-1, K) if output is not None else output,
+        input_dtype=input_dtype,
         is_k_full=is_k_full,
+        clamp_limit=clamp_limit,
     )
 
     output = output.view(B, BATCH_TOKENS_MAX, K)
@@ -553,6 +586,7 @@ def __init__(
         self.w2_g_idx_sort_indices = w2_g_idx_sort_indices
         self.is_k_full = is_k_full
         self.input_dtype = get_marlin_input_dtype()
+        self.gemm1_clamp_limit = quant_config.gemm1_clamp_limit
 
         super().__init__(
             moe_config=moe_config,
@@ -582,7 +616,10 @@ def _supports_quant_scheme(
             kFp8StaticChannelSym,
             kFp8StaticTensorSym,
             kMxfp4Static,
+            kMxfp8Static,
             kNvfp4Static,
+            kInt4Static,
+            kInt8Static,
         ]
         return weight_key in SUPPORTED_W
 
@@ -593,10 +630,12 @@ def _supports_activation(activation: MoEActivation) -> bool:
         return activation in [
             MoEActivation.SILU,
             MoEActivation.GELU,
+            MoEActivation.GELU_TANH,
             MoEActivation.SWIGLUOAI,
             MoEActivation.SWIGLUSTEP,
             MoEActivation.SILU_NO_MUL,
             MoEActivation.GELU_NO_MUL,
+            MoEActivation.GELU_TANH_NO_MUL,
             MoEActivation.RELU2_NO_MUL,
         ]
 
@@ -609,8 +648,9 @@ def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bo
 
     @property
     def quant_type_id(self) -> int:
-        # uint4b8 will be set for int4 weight and float4_e2m1f will be used for mxfp4
         if self.quant_config.use_int4_w4a16:
+            if self.w1_zp is not None or self.w2_zp is not None:
+                return scalar_types.uint4.id
             return scalar_types.uint4b8.id
         elif self.quant_config.use_mxfp4_w4a16 or self.quant_config.use_nvfp4_w4a16:
             return scalar_types.float4_e2m1f.id
@@ -650,7 +690,7 @@ def moe_problem_size(
         return E, M, N, K, topk
 
 
-class MarlinExperts(MarlinExpertsBase):
+class MarlinExperts(LoRAExpertsMixin, MarlinExpertsBase):
     """Marlin-based fused MoE expert implementation."""
 
     def supports_expert_map(self) -> bool:
@@ -715,6 +755,112 @@ def apply(
     ):
         assert self.w1_scale is not None
         assert self.w2_scale is not None
+
+        ctx = self._lora_context
+        if ctx is None:
+            fused_marlin_moe(
+                hidden_states=hidden_states,
+                w1=w1,
+                w2=w2,
+                bias1=self.w1_bias,
+                bias2=self.w2_bias,
+                w1_scale=self.w1_scale,
+                w2_scale=self.w2_scale,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
+                global_scale1=self.g1_alphas,
+                global_scale2=self.g2_alphas,
+                input_global_scale1=self.a1_gscale,
+                input_global_scale2=self.a2_gscale,
+                w1_zeros=self.w1_zp,
+                w2_zeros=self.w2_zp,
+                quant_type_id=self.quant_type_id,
+                apply_router_weight_on_input=apply_router_weight_on_input,
+                global_num_experts=global_num_experts,
+                activation=activation,
+                activation_func=self.activation,
+                moe_sum=self.moe_sum,
+                expert_map=expert_map,
+                output=output,
+                # Workspaces are swapped in workspace_shapes() to account for proper
+                # output buffer allocation. Please refer to workspace_shapes().
+                intermediate_cache13=workspace2,
+                intermediate_cache2=workspace13,
+                g_idx1=self.w13_g_idx,
+                g_idx2=self.w2_g_idx,
+                sort_indices1=self.w13_g_idx_sort_indices,
+                sort_indices2=self.w2_g_idx_sort_indices,
+                is_k_full=self.is_k_full,
+                input_dtype=self.input_dtype,
+                clamp_limit=self.gemm1_clamp_limit,
+            )
+            return
+
+        # LoRA path: wrap activation_func and moe_sum to inject LoRA at the
+        # two natural injection points.
+        #
+        # Marlin uses moe_align_block_size (same as TritonExperts) so
+        # intermediate_cache1 is indexed by flat (token, expert) pair index,
+        # which is compatible with add_lora_fused_moe's scatter mechanism.
+
+        M = hidden_states.size(0)
+        top_k_num = topk_ids.size(1)
+        lora_state: dict = {}
+
+        def activation_with_lora(
+            act_enum: MoEActivation,
+            act_output: torch.Tensor,
+            act_input: torch.Tensor,
+        ) -> None:
+            # act_input  = intermediate_cache1 (M*topk, 2N for gated)
+            # act_output = intermediate_cache2 (M*topk, N)
+
+            (
+                sorted_token_ids_lora,
+                expert_ids_lora,
+                num_tokens_post_padded_lora,
+                token_lora_mapping,
+            ) = self.apply_w13_lora(
+                ctx,
+                y=act_input,
+                x=hidden_states,
+                topk_ids=topk_ids,
+                topk_weights=topk_weights,
+                expert_map=expert_map,
+                w1=w1,
+                w2=w2,
+                num_tokens=M,
+                top_k_num=top_k_num,
+            )
+            lora_state.update(
+                {
+                    "sorted": sorted_token_ids_lora,
+                    "eids": expert_ids_lora,
+                    "npad": num_tokens_post_padded_lora,
+                    "tlm": token_lora_mapping,
+                }
+            )
+            self.activation(act_enum, act_output, act_input)
+            lora_state["cache2"] = act_output
+
+        def moe_sum_with_lora(moe_out: torch.Tensor, out: torch.Tensor) -> None:
+            # moe_out shape: (M, topk, K)
+            self.apply_w2_lora(
+                ctx,
+                y=moe_out,
+                x=lora_state["cache2"],
+                topk_weights=topk_weights,
+                sorted_token_ids_lora=lora_state["sorted"],
+                expert_ids_lora=lora_state["eids"],
+                num_tokens_post_padded_lora=lora_state["npad"],
+                token_lora_mapping=lora_state["tlm"],
+                num_tokens=M,
+                w1=w1,
+                w2=w2,
+                top_k_num=top_k_num,
+            )
+            self.moe_sum(moe_out, out)
+
         return fused_marlin_moe(
             hidden_states=hidden_states,
             w1=w1,
@@ -727,16 +873,18 @@ def apply(
             topk_ids=topk_ids,
             global_scale1=self.g1_alphas,
             global_scale2=self.g2_alphas,
+            input_global_scale1=self.a1_gscale,
+            input_global_scale2=self.a2_gscale,
+            w1_zeros=self.w1_zp,
+            w2_zeros=self.w2_zp,
             quant_type_id=self.quant_type_id,
             apply_router_weight_on_input=apply_router_weight_on_input,
             global_num_experts=global_num_experts,
             activation=activation,
-            activation_func=self.activation,
-            moe_sum=self.moe_sum,
+            activation_func=activation_with_lora,
+            moe_sum=moe_sum_with_lora,
             expert_map=expert_map,
             output=output,
-            # Workspaces are swapped in workspace_shapes() to account for proper
-            # output buffer allocation. Please refer to workspace_shapes().
             intermediate_cache13=workspace2,
             intermediate_cache2=workspace13,
             g_idx1=self.w13_g_idx,
@@ -745,6 +893,7 @@ def apply(
             sort_indices2=self.w2_g_idx_sort_indices,
             is_k_full=self.is_k_full,
             input_dtype=self.input_dtype,
+            clamp_limit=self.gemm1_clamp_limit,
         )
 
     def moe_sum(self, input: torch.Tensor, output: torch.Tensor) -> None:
@@ -842,6 +991,8 @@ def apply(
             global_num_experts=global_num_experts,
             activation=activation,
             expert_map=expert_map,
+            input_global_scale1=self.a1_gscale,
+            input_global_scale2=self.a2_gscale,
             output=output,
             intermediate_cache13=workspace13,
             intermediate_cache2=workspace2,
@@ -849,5 +1000,9 @@ def apply(
             g_idx2=self.w2_g_idx,
             sort_indices1=self.w13_g_idx_sort_indices,
             sort_indices2=self.w2_g_idx_sort_indices,
+            w1_zeros=self.w1_zp,
+            w2_zeros=self.w2_zp,
+            input_dtype=self.input_dtype,
             is_k_full=self.is_k_full,
+            clamp_limit=self.gemm1_clamp_limit,
         )
diff --git a/vllm/model_executor/layers/fused_moe/experts/nvfp4_emulation_moe.py b/vllm/model_executor/layers/fused_moe/experts/nvfp4_emulation_moe.py
new file mode 100644
index 000000000000..de5b45ccb87b
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/nvfp4_emulation_moe.py
@@ -0,0 +1,164 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+NVFP4 quantization emulation for MoE.
+
+This file implements NVFP4 emulation for NVFP4 MOE in case the hardware used does not
+natively support NVFP4 MOE.
+
+Weights are dequantized on the fly during each forward, we fall back to calling
+`TritonExperts` using BF16, and fake NVFP4 quantize-dequantize
+is applied on `a13`, `a2`.
+"""
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
+from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
+from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
+    dequantize_to_dtype,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kNvfp4Dynamic,
+    kNvfp4Static,
+)
+
+logger = init_logger(__name__)
+
+
+class Nvfp4QuantizationEmulationTritonExperts(TritonExperts):
+    """
+    Extension of TritonExperts to support emulated NVFP4 MoE experts.
+
+    It may be used for NVFP4 models when the device does not have
+    native support for this dtype.
+    """
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(moe_config, quant_config)
+        logger.warning_once(
+            "Using Nvfp4QuantizationEmulationTritonExperts MOE backend. This will"
+            " dequantize weights on the fly and may be slower than native"
+            " quantized MOE. Consider using a device with native quantization"
+            " support (e.g. Nvidia Blackwell) for better performance."
+        )
+
+        # `TritonExperts.apply` expects pre-dequantized weights,
+        # which we handle in `apply` below.
+        self.w1_scale_val = self.quant_config.w1_scale
+        self.w2_scale_val = self.quant_config.w2_scale
+
+        self.quant_config._w1.scale = None
+        self.quant_config._w2.scale = None
+
+        self.quantization_emulation = True
+
+    @property
+    def quant_dtype(self) -> torch.dtype | str | None:
+        return "nvfp4"
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return (weight_key, activation_key) == (kNvfp4Static, kNvfp4Dynamic)
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        """
+        Apply emulated quantized MoE computation.
+
+        This dequantizes the weights on the fly and calls fused_experts_impl
+        with activation quantization support.
+        """
+        # Dequantize weights if they are quantized
+        # For NVFP4, weights are packed in uint8 format
+        # w1 shape: [num_experts, 2*intermediate_size, hidden_size//2]
+        # w2 shape: [num_experts, hidden_size, intermediate_size//2]
+        assert w1.dtype == torch.uint8
+        assert w2.dtype == torch.uint8
+
+        # Dequantize w1 from packed NVFP4 to fp16/bf16
+        w13_global_scale = self.quant_config.g1_alphas
+
+        w1_dequant = dequantize_to_dtype(
+            tensor_fp4=w1,
+            tensor_sf=self.w1_scale_val,
+            global_scale=w13_global_scale,
+            dtype=hidden_states.dtype,
+            block_size=16,
+            swizzle=False,
+        )
+
+        # Dequantize w2 from packed NVFP4 to fp16/bf16
+        w2_global_scale = self.quant_config.g2_alphas
+
+        w2_dequant = dequantize_to_dtype(
+            tensor_fp4=w2,
+            tensor_sf=self.w2_scale_val,
+            global_scale=w2_global_scale,
+            dtype=hidden_states.dtype,
+            block_size=16,
+            swizzle=False,
+        )
+
+        hidden_states, _ = moe_kernel_quantize_input(
+            A=hidden_states,
+            A_scale=self.quant_config.a1_gscale,
+            quant_dtype="nvfp4",
+            per_act_token_quant=False,
+            quantization_emulation=True,
+        )
+
+        # Activation quantization/dequantization is deferred to
+        # `moe_kernel_quantize_input` in TritonExperts.apply.
+        super().apply(
+            output=output,
+            hidden_states=hidden_states,
+            w1=w1_dequant,
+            w2=w2_dequant,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=activation,
+            global_num_experts=global_num_experts,
+            expert_map=expert_map,
+            a1q_scale=None,
+            a2_scale=self.quant_config.a2_gscale,
+            workspace13=workspace13,
+            workspace2=workspace2,
+            expert_tokens_meta=expert_tokens_meta,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/experts/ocp_mx_emulation_moe.py b/vllm/model_executor/layers/fused_moe/experts/ocp_mx_emulation_moe.py
new file mode 100644
index 000000000000..feb8c2ea769b
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/ocp_mx_emulation_moe.py
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+OCP MX quantization emulation for MoE.
+
+This file implements OCP MX (MXFP4/MXFP6) emulation for MoE in case the
+hardware used does not natively support OCP MX MoE.
+
+Weights are dequantized on the fly during each forward, we fall back to calling
+`TritonExperts` using BF16, and fake OCP MX quantize-dequantize
+is applied on activations via `moe_kernel_quantize_input`.
+"""
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
+from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
+from vllm.model_executor.layers.quantization.utils.mxfp4_utils import dequant_mxfp4
+from vllm.model_executor.layers.quantization.utils.mxfp6_utils import dequant_mxfp6
+from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import (
+    OCP_MX_Scheme,
+)
+
+logger = init_logger(__name__)
+
+
+class OCP_MXQuantizationEmulationTritonExperts(TritonExperts):
+    """
+    Extension of TritonExperts to support emulated OCP MX MoE experts.
+
+    It may be used for OCP MX (MXFP4/MXFP6) models when the device does not
+    have native support for these dtypes.
+    """
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(moe_config, quant_config)
+        logger.warning_once(
+            "Using OCP_MXQuantizationEmulationTritonExperts MOE backend. This"
+            " will dequantize weights on the fly and may be slower than native"
+            " quantized MOE. Consider using a device with native OCP MX"
+            " quantization support for better performance."
+        )
+
+        self.ocp_mx_scheme = quant_config.ocp_mx_scheme
+        assert self.ocp_mx_scheme is not None, (
+            "ocp_mx_scheme must be set in quant_config for"
+            " OCP_MXQuantizationEmulationTritonExperts"
+        )
+
+        # `TritonExperts.apply` expects pre-dequantized weights,
+        # which we handle in `apply` below.
+        self.w1_scale_val = self.quant_config.w1_scale
+        self.w2_scale_val = self.quant_config.w2_scale
+
+        self.quant_config._w1.scale = None
+        self.quant_config._w2.scale = None
+
+        self.quantization_emulation = True
+
+        if self.ocp_mx_scheme in {
+            OCP_MX_Scheme.w_mxfp4_a_mxfp4,
+        }:
+            # Weight has to be dequantized for mxfp4 emulation.
+            self._quant_dtype = "mxfp4"
+        elif self.ocp_mx_scheme in [
+            OCP_MX_Scheme.w_mxfp4_a_mxfp6_e3m2,
+            OCP_MX_Scheme.w_mxfp4_a_mxfp6_e2m3,
+            OCP_MX_Scheme.w_mxfp6_e3m2_a_mxfp6_e3m2,
+            OCP_MX_Scheme.w_mxfp6_e2m3_a_mxfp6_e2m3,
+        ]:
+            self._quant_dtype = "mxfp6"
+        elif self.ocp_mx_scheme in [
+            OCP_MX_Scheme.w_mxfp4_a_fp8,
+            OCP_MX_Scheme.w_mxfp6_e3m2_a_fp8,
+        ]:
+            # TODO: double check this one
+            self._quant_dtype = "mxfp8"
+
+    @property
+    def quant_dtype(self) -> torch.dtype | str | None:
+        return self._quant_dtype
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key,
+        activation_key,
+    ) -> bool:
+        # This class is used for emulation only - the oracle selects it
+        # directly rather than via quant scheme matching.
+        return True
+
+    def _dequantize_weights(
+        self,
+        w: torch.Tensor,
+        w_scale: torch.Tensor,
+        dtype: torch.dtype,
+    ) -> torch.Tensor:
+        """Dequantize weights based on the OCP MX scheme."""
+        if self.ocp_mx_scheme.startswith("w_mxfp4"):  # type: ignore[union-attr]
+            return dequant_mxfp4(w, w_scale, dtype)
+        elif self.ocp_mx_scheme.startswith("w_mxfp6_e3m2"):  # type: ignore[union-attr]
+            return dequant_mxfp6(w, w_scale, quant_dtype="fp6_e3m2", float_dtype=dtype)
+        elif self.ocp_mx_scheme.startswith("w_mxfp6_e2m3"):  # type: ignore[union-attr]
+            return dequant_mxfp6(w, w_scale, quant_dtype="fp6_e2m3", float_dtype=dtype)
+        else:
+            raise NotImplementedError(f"Unsupported ocp_mx_scheme={self.ocp_mx_scheme}")
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        """
+        Apply emulated quantized MoE computation.
+
+        This dequantizes the weights on the fly and calls TritonExperts.apply
+        with activation quantization support.
+        """
+        assert w1.dtype == torch.uint8
+        assert w2.dtype == torch.uint8
+
+        # Dequantize w1 and w2 from packed OCP MX format to bf16/fp16
+        w1_dequant = self._dequantize_weights(
+            w1, self.w1_scale_val, hidden_states.dtype
+        )
+        w2_dequant = self._dequantize_weights(
+            w2, self.w2_scale_val, hidden_states.dtype
+        )
+
+        # Apply activation QDQ if needed by the OCP MX scheme
+        hidden_states, _ = moe_kernel_quantize_input(
+            A=hidden_states,
+            A_scale=None,
+            quant_dtype=self.quant_config.quant_dtype,
+            per_act_token_quant=False,
+            ocp_mx_scheme=self.ocp_mx_scheme,
+            quantization_emulation=True,
+        )
+
+        # Activation quantization/dequantization is deferred to
+        # `moe_kernel_quantize_input` in TritonExperts.apply.
+        super().apply(
+            output=output,
+            hidden_states=hidden_states,
+            w1=w1_dequant,
+            w2=w2_dequant,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=activation,
+            global_num_experts=global_num_experts,
+            expert_map=expert_map,
+            a1q_scale=None,
+            a2_scale=None,
+            workspace13=workspace13,
+            workspace2=workspace2,
+            expert_tokens_meta=expert_tokens_meta,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/experts/rocm_aiter_moe.py
similarity index 82%
rename from vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/rocm_aiter_moe.py
index d24bda101ffa..2b8abbfc50d6 100644
--- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/rocm_aiter_moe.py
@@ -17,6 +17,7 @@
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceNoOP,
 )
+from vllm.model_executor.layers.fused_moe.utils import disable_inplace
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
     kFp8Dynamic128Sym,
@@ -25,6 +26,7 @@
     kFp8Static128BlockSym,
     kFp8StaticChannelSym,
     kFp8StaticTensorSym,
+    kMxfp4Dynamic,
     kMxfp4Static,
 )
 
@@ -108,6 +110,55 @@ def init_aiter_topK_meta_data(
     aiter_topK_meta_data = (total_topk_weights, total_topk_ids)
 
 
+def inject_shared_expert_weights(
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    topk: int,
+    num_fused_shared_experts: int,
+    shared_expert_weights: torch.Tensor | None = None,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Merge routed topk results with the shared expert buffer and inject
+    dynamic per-token shared expert gate values for AITER fusion.
+
+    For routers that already return the combined buffer (e.g. GroupedTopKRouter
+    via rocm_aiter_grouped_topk), only the dynamic weight injection is needed.
+    For routers that return only routed slots (e.g. FusedTopKRouter), this also
+    copies the routed results into the pre-allocated combined buffer.
+    """
+    if num_fused_shared_experts == 0:
+        return topk_weights, topk_ids
+
+    assert aiter_topK_meta_data is not None, (
+        "aiter_topK_meta_data is not initialized but "
+        "num_fused_shared_experts > 0. Ensure init_aiter_topK_meta_data "
+        "is called before routing."
+    )
+
+    total_topk_weights, total_topk_ids = aiter_topK_meta_data
+    token = topk_weights.shape[0]
+
+    assert total_topk_weights.shape[0] >= token, (
+        f"AITER topK meta data supports {total_topk_weights.shape[0]} "
+        f"tokens, but got {token} tokens."
+    )
+
+    total_topk_weights_slice = total_topk_weights[:token]
+    total_topk_ids_slice = total_topk_ids[:token]
+
+    if topk_weights.shape[1] == topk:
+        total_topk_weights_slice[:, :topk] = topk_weights
+        total_topk_ids_slice[:, :topk] = topk_ids
+        topk_weights = total_topk_weights_slice
+        topk_ids = total_topk_ids_slice
+
+    if shared_expert_weights is not None:
+        topk_weights[:, topk : topk + num_fused_shared_experts] = shared_expert_weights[
+            :token
+        ]
+
+    return topk_weights, topk_ids
+
+
 def rocm_aiter_grouped_topk(
     hidden_states: torch.Tensor,
     gating_output: torch.Tensor,
@@ -152,7 +203,7 @@ def rocm_aiter_grouped_topk(
     if e_score_correction_bias is not None:
         rocm_aiter_ops.biased_grouped_topk(
             gating_output,
-            e_score_correction_bias.to(gating_output.dtype),
+            e_score_correction_bias,
             topk_weights,
             topk_ids,
             num_expert_group,
@@ -252,7 +303,8 @@ def rocm_aiter_fused_experts(
 
     else:
         quant_method = QuantMethod.NO.value
-        # mxfp4: both w4a4 (quark) and w4a16 (oracle CK) use BLOCK_1X32
+        # mxfp4 i.e. w4a4, w4a16 uses BLOCK_1X32
+        # mxfp6 and mxfp8 are unsupported in AITER currently and use emulation instead
         if quant_config.use_mxfp4_w4a4 or quant_config.use_mxfp4_w4a16:
             quant_method = QuantMethod.BLOCK_1X32.value
         # w8a8 block-scaled
@@ -305,8 +357,8 @@ def rocm_aiter_fused_experts(
             doweight_stage1=apply_router_weight_on_input,
             num_local_tokens=num_local_tokens,
             output_dtype=output_dtype,
-            hidden_pad=hidden_pad,
-            intermediate_pad=intermediate_pad,
+            hidden_pad=hidden_pad // 128 * 128,
+            intermediate_pad=intermediate_pad // 64 * 64 * 2,
             bias1=quant_config.w1_bias if quant_config.use_mxfp4_w4a16 else None,
             bias2=quant_config.w2_bias if quant_config.use_mxfp4_w4a16 else None,
         )
@@ -326,6 +378,21 @@ def expects_unquantized_inputs(self) -> bool:
     def activation_format() -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
 
+    @staticmethod
+    def is_supported_config(
+        cls, moe_config, weight_key, activation_key, activation_format
+    ):
+        is_supported, reason = super().is_supported_config(
+            cls, moe_config, weight_key, activation_key, activation_format
+        )
+        if not is_supported and not rocm_aiter_ops.is_fused_moe_enabled():
+            reason = (
+                f"{reason}. AITER MoE is not enabled — "
+                "set VLLM_ROCM_USE_AITER=1 and VLLM_ROCM_USE_AITER_MOE=1 "
+                "to enable it"
+            )
+        return is_supported, reason
+
     @staticmethod
     def _supports_current_device() -> bool:
         return rocm_aiter_ops.is_fused_moe_enabled()
@@ -346,6 +413,7 @@ def _supports_quant_scheme(
             (kFp8StaticTensorSym, kFp8DynamicTensorSym),
             (kFp8StaticChannelSym, kFp8DynamicTokenSym),
             (kMxfp4Static, None),
+            (kMxfp4Static, kMxfp4Dynamic),
         ]
         if (weight_key, activation_key) not in SUPPORTED_W_A:
             return False
@@ -437,4 +505,16 @@ def apply(
             num_local_tokens=num_local_tokens,
             output_dtype=output.dtype,
         )
-        output.copy_(result)
+        # avoid redundant copy when output is a view of the result
+        if (
+            output.shape == result.shape
+            and output.dtype == result.dtype
+            and output.device == result.device
+            and output.is_contiguous()
+            and result.is_contiguous()
+            and output._base is None
+            and disable_inplace()
+        ):
+            output.set_(result)
+        else:
+            output.copy_(result)
diff --git a/vllm/model_executor/layers/fused_moe/triton_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/experts/triton_cutlass_moe.py
similarity index 90%
rename from vllm/model_executor/layers/fused_moe/triton_cutlass_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/triton_cutlass_moe.py
index 4aa396d24b0c..1753d7354cba 100644
--- a/vllm/model_executor/layers/fused_moe/triton_cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/triton_cutlass_moe.py
@@ -10,9 +10,9 @@
     FusedMoEConfig,
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassExpertsFp8
-from vllm.model_executor.layers.fused_moe.fallback import FallbackExperts
-from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import CutlassExpertsFp8
+from vllm.model_executor.layers.fused_moe.experts.fallback import FallbackExperts
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
 from vllm.platforms import current_platform
 
 
diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/experts/triton_deep_gemm_moe.py
similarity index 91%
rename from vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/triton_deep_gemm_moe.py
index b601806b067a..58700d714857 100644
--- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/triton_deep_gemm_moe.py
@@ -9,13 +9,13 @@
     FusedMoEConfig,
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
     DeepGemmExperts,
     _valid_deep_gemm,
     _valid_deep_gemm_shape,
 )
-from vllm.model_executor.layers.fused_moe.fallback import FallbackExperts
-from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
+from vllm.model_executor.layers.fused_moe.experts.fallback import FallbackExperts
+from vllm.model_executor.layers.fused_moe.experts.triton_moe import TritonExperts
 from vllm.utils.deep_gemm import (
     is_deep_gemm_e8m0_used,
 )
diff --git a/vllm/model_executor/layers/fused_moe/experts/triton_moe.py b/vllm/model_executor/layers/fused_moe/experts/triton_moe.py
new file mode 100644
index 000000000000..1ba39b35fd4e
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/triton_moe.py
@@ -0,0 +1,552 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Triton-based MoE expert implementations."""
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm import _custom_ops as ops
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.experts.lora_experts_mixin import (
+    LoRAExpertsMixin,
+)
+from vllm.model_executor.layers.fused_moe.fused_moe import (
+    _prepare_expert_assignment,
+    invoke_fused_moe_triton_kernel,
+    invoke_fused_moe_wna16_triton_kernel,
+    try_get_optimal_moe_config,
+)
+from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
+    moe_align_block_size,
+)
+from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
+    TopKWeightAndReduceNoOP,
+)
+from vllm.model_executor.layers.fused_moe.utils import (
+    _resize_cache,
+    moe_kernel_quantize_input,
+    swiglu_limit_func,
+)
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    is_deep_gemm_e8m0_used,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8Dynamic128Sym,
+    kFp8DynamicTensorSym,
+    kFp8DynamicTokenSym,
+    kFp8Static128BlockSym,
+    kFp8StaticChannelSym,
+    kFp8StaticTensorSym,
+    kInt8DynamicTokenSym,
+    kInt8StaticChannelSym,
+)
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl
+
+
+class TritonExperts(LoRAExpertsMixin, mk.FusedMoEExpertsModular):
+    """Triton-based fused MoE expert implementation."""
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        # Whether quantized MOE runs natively, or through
+        # higher-precision + activation QDQ.
+        self.quantization_emulation = False
+        super().__init__(moe_config, quant_config)
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        return current_platform.is_cuda_alike() or current_platform.is_xpu()
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        return True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        # INT8 requires at least 7.5 (Turing).
+        device_supports_int8 = (
+            current_platform.is_cuda()
+            and current_platform.has_device_capability((7, 5))
+        )
+
+        supported: list[tuple[QuantKey | None, QuantKey | None]] = [(None, None)]
+        if device_supports_int8:
+            supported.append((kInt8StaticChannelSym, kInt8DynamicTokenSym))
+        if current_platform.supports_fp8():
+            supported += [
+                (kFp8Static128BlockSym, kFp8Dynamic128Sym),
+                (kFp8StaticChannelSym, kFp8DynamicTokenSym),
+                (kFp8StaticTensorSym, kFp8DynamicTokenSym),
+                (kFp8StaticTensorSym, kFp8StaticTensorSym),
+                (kFp8StaticTensorSym, kFp8DynamicTensorSym),
+            ]
+        return (weight_key, activation_key) in supported
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation in [
+            MoEActivation.SILU,
+            MoEActivation.GELU,
+            MoEActivation.GELU_TANH,
+            MoEActivation.SWIGLUOAI,
+            MoEActivation.SWIGLUSTEP,
+            MoEActivation.SILU_NO_MUL,
+            MoEActivation.GELU_NO_MUL,
+            MoEActivation.GELU_TANH_NO_MUL,
+            MoEActivation.RELU2_NO_MUL,
+        ]
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        return not (
+            moe_parallel_config.use_fi_nvl_two_sided_kernels
+            or moe_parallel_config.use_fi_nvl_one_sided_kernels
+        )
+
+    @staticmethod
+    def _supports_batch_invariance():
+        return True
+
+    def supports_expert_map(self) -> bool:
+        return True
+
+    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
+        return TopKWeightAndReduceNoOP()
+
+    def activation(
+        self, activation: MoEActivation, output: torch.Tensor, input: torch.Tensor
+    ) -> None:
+        gemm1_clamp_limit = self.quant_config.gemm1_clamp_limit
+        if activation == MoEActivation.SILU and gemm1_clamp_limit is not None:
+            swiglu_limit_func(output, input, float(gemm1_clamp_limit))
+            return
+
+        super().activation(activation, output, input)
+
+    def workspace_shapes(
+        self,
+        M: int,
+        N: int,
+        K: int,
+        topk: int,
+        global_num_experts: int,
+        local_num_experts: int,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        activation: MoEActivation,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        activation_out_dim = self.adjust_N_for_activation(N, activation)
+        workspace1 = (M, topk, max(activation_out_dim, K))
+        workspace2 = (M, topk, max(N, K))
+        output = (M, K)
+        return (workspace1, workspace2, output)
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        # Check constraints.
+        if self.quant_config.use_int4_w4a16:
+            assert hidden_states.size(-1) // 2 == w1.size(2), "Hidden size mismatch"
+        else:
+            assert hidden_states.size(-1) == w1.size(2), (
+                f"Hidden size mismatch {hidden_states.size(-1)} != {w1.size(2)}"
+            )
+
+        assert hidden_states.is_contiguous(), "Hidden_states must be contiguous"
+        assert hidden_states.dim() == 2
+        assert w1.stride(-1) == 1, "Stride of last dimension must be 1"
+        assert w2.stride(-1) == 1, "Stride of last dimension must be 1"
+        assert hidden_states.dtype in [
+            torch.float32,
+            torch.float16,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e4m3fnuz,
+        ]
+
+        E, num_tokens, N, K, top_k_num = self.moe_problem_size(
+            hidden_states, w1, w2, topk_ids
+        )
+
+        if global_num_experts == -1:
+            global_num_experts = E
+
+        config = try_get_optimal_moe_config(
+            w1.size(),
+            w2.size(),
+            top_k_num,
+            self.quant_config.config_name(hidden_states.dtype),
+            num_tokens,
+            block_shape=self.block_shape,
+        )
+
+        if hidden_states.dtype == torch.bfloat16:
+            compute_type = tl.bfloat16
+        elif hidden_states.dtype == torch.float16:
+            compute_type = tl.float16
+        elif hidden_states.dtype == torch.float32:
+            compute_type = tl.float32
+        elif (
+            hidden_states.dtype == torch.float8_e4m3fn
+            or hidden_states.dtype == torch.float8_e4m3fnuz
+        ):
+            compute_type = tl.bfloat16
+        else:
+            raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
+
+        # Note that the output tensor might be in workspace1
+        intermediate_cache1 = _resize_cache(workspace2, (num_tokens, top_k_num, N))
+        cache2_dim = self.adjust_N_for_activation(N, activation)
+        intermediate_cache2 = _resize_cache(
+            workspace13, (num_tokens * top_k_num, cache2_dim)
+        )
+        intermediate_cache3 = _resize_cache(workspace2, (num_tokens, top_k_num, K))
+
+        sorted_token_ids, expert_ids, num_tokens_post_padded = (
+            _prepare_expert_assignment(
+                topk_ids,
+                config,
+                num_tokens,
+                top_k_num,
+                global_num_experts,
+                expert_map,
+                use_int8_w8a16=self.quant_config.use_int8_w8a16,
+                use_int4_w4a16=self.quant_config.use_int4_w4a16,
+                block_shape=self.block_shape,
+            )
+        )
+
+        invoke_fused_moe_triton_kernel(
+            hidden_states,
+            w1,
+            intermediate_cache1,
+            a1q_scale if a1q_scale is not None else self.a1_scale,
+            self.w1_scale,
+            None,  # topk_weights
+            sorted_token_ids,
+            expert_ids,
+            num_tokens_post_padded,
+            False,  # mul_routed_weights
+            top_k_num,
+            config,
+            compute_type=compute_type,
+            use_fp8_w8a8=self.quant_config.use_fp8_w8a8,
+            use_int8_w8a8=self.quant_config.use_int8_w8a8,
+            use_int8_w8a16=self.quant_config.use_int8_w8a16,
+            use_int4_w4a16=self.quant_config.use_int4_w4a16,
+            per_channel_quant=self.per_act_token_quant,
+            block_shape=self.block_shape,
+            B_bias=self.w1_bias,
+        )
+
+        # LoRA w13: applied to intermediate_cache1 before activation, using
+        # hidden_states as the lora_a input.  moe_lora_align_block_size is
+        # called once here and results reused for the w2 LoRA below.
+        sorted_token_ids_lora = None
+        expert_ids_lora = None
+        num_tokens_post_padded_lora = None
+        token_lora_mapping = None
+        lora_context = self._lora_context
+        if lora_context is not None:
+            (
+                sorted_token_ids_lora,
+                expert_ids_lora,
+                num_tokens_post_padded_lora,
+                token_lora_mapping,
+            ) = self.apply_w13_lora(
+                lora_context,
+                y=intermediate_cache1,
+                x=hidden_states,
+                topk_ids=topk_ids,
+                topk_weights=topk_weights,
+                expert_map=expert_map,
+                w1=w1,
+                w2=w2,
+                num_tokens=num_tokens,
+                top_k_num=top_k_num,
+            )
+
+        a2q_scale: torch.Tensor | None = None
+
+        # Fuse SiLU+Mul + FP8 block quantize into a single kernel
+        # when conditions permit (gated SiLU, fp8 block quant with
+        # group_size=128, no LoRA requiring the BF16 intermediate).
+        if (
+            activation == MoEActivation.SILU
+            and self.quant_config.use_fp8_w8a8
+            and self.block_shape == [128, 128]
+            and lora_context is None
+            and not is_deep_gemm_e8m0_used()
+        ):
+            qintermediate_cache2, a2q_scale = ops.silu_and_mul_per_block_quant(
+                intermediate_cache1.view(-1, N),
+                group_size=128,
+                quant_dtype=current_platform.fp8_dtype(),
+            )
+        else:
+            self.activation(
+                activation, intermediate_cache2, intermediate_cache1.view(-1, N)
+            )
+
+            qintermediate_cache2, a2q_scale = moe_kernel_quantize_input(
+                intermediate_cache2,
+                a2_scale,
+                self.quant_dtype,
+                self.per_act_token_quant,
+                self.block_shape,
+                quantization_emulation=self.quantization_emulation,
+            )
+
+        invoke_fused_moe_triton_kernel(
+            qintermediate_cache2,
+            w2,
+            intermediate_cache3,
+            a2q_scale,
+            self.w2_scale,
+            topk_weights,
+            sorted_token_ids,
+            expert_ids,
+            num_tokens_post_padded,
+            not apply_router_weight_on_input,
+            1,
+            config,
+            compute_type=compute_type,
+            use_fp8_w8a8=self.quant_config.use_fp8_w8a8,
+            use_int8_w8a8=self.quant_config.use_int8_w8a8,
+            use_int8_w8a16=self.quant_config.use_int8_w8a16,
+            use_int4_w4a16=self.quant_config.use_int4_w4a16,
+            per_channel_quant=self.per_act_token_quant,
+            block_shape=self.block_shape,
+            B_bias=self.w2_bias,
+        )
+
+        # LoRA w2: applied to intermediate_cache3 before moe_sum, using the
+        # unquantized intermediate_cache2 as the lora_a input.  Reuses the
+        # sorted_token_ids_lora computed above.
+        if lora_context is not None:
+            self.apply_w2_lora(
+                lora_context,
+                y=intermediate_cache3,
+                x=intermediate_cache2,
+                topk_weights=topk_weights,
+                sorted_token_ids_lora=sorted_token_ids_lora,
+                expert_ids_lora=expert_ids_lora,
+                num_tokens_post_padded_lora=num_tokens_post_padded_lora,
+                token_lora_mapping=token_lora_mapping,
+                num_tokens=num_tokens,
+                w1=w1,
+                w2=w2,
+                top_k_num=top_k_num,
+            )
+
+        # separate function is required for MoE + LoRA
+        self.moe_sum(intermediate_cache3, output)
+
+    def moe_sum(self, input: torch.Tensor, output: torch.Tensor) -> None:
+        ops.moe_sum(input, output)
+
+
+class TritonWNA16Experts(TritonExperts):
+    @staticmethod
+    def _supports_current_device() -> bool:
+        raise NotImplementedError(
+            "TritonWNA16Experts is not yet used by an Oracle. "
+            "This method should not be called."
+        )
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        raise NotImplementedError(
+            "TritonWNA16Experts is not yet used by an Oracle. "
+            "This method should not be called."
+        )
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        raise NotImplementedError(
+            "TritonWNA16Experts is not yet used by an Oracle. "
+            "This method should not be called."
+        )
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        raise NotImplementedError(
+            "TritonWNA16Experts is not yet used by an Oracle. "
+            "This method should not be called."
+        )
+
+    @staticmethod
+    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
+        raise NotImplementedError(
+            "TritonWNA16Experts is not yet used by an Oracle. "
+            "This method should not be called."
+        )
+
+    def apply(
+        self,
+        output: torch.Tensor,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        a2_scale: torch.Tensor | None,
+        workspace13: torch.Tensor,
+        workspace2: torch.Tensor,
+        expert_tokens_meta: mk.ExpertTokensMetadata | None,
+        apply_router_weight_on_input: bool,
+    ):
+        # Check constraints.
+        if self.quant_config.use_int4_w4a16:
+            assert hidden_states.size(-1) // 2 == w1.size(2), "Hidden size mismatch"
+        else:
+            assert hidden_states.size(-1) == w1.size(2), (
+                f"Hidden size mismatch {hidden_states.size(-1)} != {w1.size(2)}"
+            )
+
+        assert hidden_states.is_contiguous(), "Hidden_states must be contiguous"
+        assert hidden_states.dim() == 2
+        assert w1.stride(-1) == 1, "Stride of last dimension must be 1"
+        assert w2.stride(-1) == 1, "Stride of last dimension must be 1"
+        assert hidden_states.dtype in [
+            torch.float32,
+            torch.float16,
+            torch.bfloat16,
+            torch.float8_e4m3fn,
+            torch.float8_e4m3fnuz,
+        ]
+
+        E, num_tokens, N, K, top_k_num = self.moe_problem_size(
+            hidden_states, w1, w2, topk_ids
+        )
+
+        if global_num_experts == -1:
+            global_num_experts = E
+
+        config = try_get_optimal_moe_config(
+            w1.size(),
+            w2.size(),
+            top_k_num,
+            self.quant_config.config_name(hidden_states.dtype),
+            num_tokens,
+            block_shape=self.block_shape,
+        )
+
+        if hidden_states.dtype == torch.bfloat16:
+            compute_type = tl.bfloat16
+        elif hidden_states.dtype == torch.float16:
+            compute_type = tl.float16
+        elif hidden_states.dtype == torch.float32:
+            compute_type = tl.float32
+        elif (
+            hidden_states.dtype == torch.float8_e4m3fn
+            or hidden_states.dtype == torch.float8_e4m3fnuz
+        ):
+            compute_type = tl.bfloat16
+        else:
+            raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
+
+        # Note that the output tensor might be in workspace1
+        intermediate_cache1 = _resize_cache(workspace2, (num_tokens, top_k_num, N))
+        activation_out_dim = self.adjust_N_for_activation(N, activation)
+        intermediate_cache2 = _resize_cache(
+            workspace13, (num_tokens * top_k_num, activation_out_dim)
+        )
+        intermediate_cache3 = _resize_cache(workspace2, (num_tokens, top_k_num, K))
+
+        sorted_token_ids, expert_ids, num_tokens_post_padded = moe_align_block_size(
+            topk_ids, config["BLOCK_SIZE_M"], global_num_experts, expert_map
+        )
+
+        invoke_fused_moe_wna16_triton_kernel(
+            hidden_states,
+            w1,
+            intermediate_cache1,
+            self.w1_scale,
+            self.quant_config.w1_zp,
+            None,  # topk_weights
+            sorted_token_ids,
+            expert_ids,
+            num_tokens_post_padded,
+            False,  # mul_routed_weights
+            top_k_num,
+            config,
+            compute_type=compute_type,
+            use_int8_w8a16=self.quant_config.use_int8_w8a16,
+            use_int4_w4a16=self.quant_config.use_int4_w4a16,
+            block_shape=self.block_shape,
+        )
+
+        self.activation(
+            activation, intermediate_cache2, intermediate_cache1.view(-1, N)
+        )
+
+        a2q_scale: torch.Tensor | None = None
+
+        qintermediate_cache2, a2q_scale = moe_kernel_quantize_input(
+            intermediate_cache2,
+            a2_scale,
+            self.quant_dtype,
+            self.per_act_token_quant,
+            self.block_shape,
+        )
+
+        invoke_fused_moe_wna16_triton_kernel(
+            qintermediate_cache2,
+            w2,
+            intermediate_cache3,
+            self.w2_scale,
+            self.quant_config.w2_zp,
+            topk_weights,
+            sorted_token_ids,
+            expert_ids,
+            num_tokens_post_padded,
+            not apply_router_weight_on_input,
+            1,
+            config,
+            compute_type=compute_type,
+            use_int8_w8a16=self.quant_config.use_int8_w8a16,
+            use_int4_w4a16=self.quant_config.use_int4_w4a16,
+            block_shape=self.block_shape,
+        )
+
+        # separate function is required for MoE + LoRA
+        self.moe_sum(intermediate_cache3, output)
diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
new file mode 100644
index 000000000000..0b679b78c929
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEParallelConfig,
+    FusedMoEQuantConfig,
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+)
+from vllm.platforms import current_platform
+from vllm.utils.flashinfer import has_flashinfer_trtllm_fused_moe
+
+
+class TrtLlmBf16Experts(mk.FusedMoEExpertsMonolithic):
+    """
+    BF16 unquantized TRTLLM-Gen MoE kernels. Supports monolithic interface.
+    """
+
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+    ):
+        super().__init__(moe_config, quant_config)
+        self.routing_method_type = moe_config.routing_method
+        self.topk = moe_config.experts_per_token
+        self.intermediate_size_per_partition = (
+            moe_config.intermediate_size_per_partition
+        )
+        self.hidden_dim = moe_config.hidden_dim
+        self.local_num_experts = moe_config.num_local_experts
+        self.ep_rank = moe_config.moe_parallel_config.ep_rank
+
+    @staticmethod
+    def activation_format() -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.Standard
+
+    @staticmethod
+    def _supports_current_device() -> bool:
+        """Supports only Blackwell-family GPUs."""
+        p = current_platform
+        return (
+            p.is_cuda()
+            and p.is_device_capability_family(100)
+            and has_flashinfer_trtllm_fused_moe()
+        )
+
+    @staticmethod
+    def _supports_no_act_and_mul() -> bool:
+        """BF16 kernels do not support non-gated MoE"""
+        return False
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        """Supports only unquantized inputs."""
+        return weight_key is None and activation_key is None
+
+    @staticmethod
+    def _supports_activation(activation: MoEActivation) -> bool:
+        return activation in [MoEActivation.SILU]
+
+    @staticmethod
+    def _supports_routing_method(
+        routing_method: RoutingMethodType,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        return routing_method in [
+            RoutingMethodType.DeepSeekV3,
+            RoutingMethodType.Llama4,
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
+        ]
+
+    @staticmethod
+    def _supports_parallel_config(
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> bool:
+        """Monolithic kernel so only use with naive DP/EP and TP."""
+        return (
+            not moe_parallel_config.use_all2all_kernels
+            or moe_parallel_config.use_ag_rs_all2all_kernels
+        ) and not moe_parallel_config.enable_eplb
+
+    @staticmethod
+    def _supports_router_logits_dtype(
+        router_logits_dtype: torch.dtype | None,
+        routing_method: RoutingMethodType,
+    ) -> bool:
+        return True
+
+    def supports_chunking(self) -> bool:
+        return False
+
+    def supports_expert_map(self) -> bool:
+        return False
+
+    @property
+    def expects_unquantized_inputs(self) -> bool:
+        return True
+
+    def apply(
+        self,
+        hidden_states: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        router_logits: torch.Tensor,
+        activation: MoEActivation,
+        global_num_experts: int,
+        expert_map: torch.Tensor | None,
+        a1q_scale: torch.Tensor | None,
+        apply_router_weight_on_input: bool,
+        num_expert_group: int | None = None,
+        e_score_correction_bias: torch.Tensor | None = None,
+        routed_scaling_factor: float | None = None,
+        topk_group: int | None = None,
+    ) -> torch.Tensor:
+        import flashinfer
+
+        return flashinfer.fused_moe.trtllm_bf16_moe(
+            routing_logits=router_logits,
+            routing_bias=e_score_correction_bias,
+            hidden_states=hidden_states,
+            gemm1_weights=w1,
+            gemm2_weights=w2,
+            num_experts=global_num_experts,
+            top_k=self.topk,
+            n_group=num_expert_group,
+            topk_group=topk_group,
+            intermediate_size=self.intermediate_size_per_partition,
+            local_expert_offset=self.ep_rank * self.local_num_experts,
+            local_num_experts=self.local_num_experts,
+            routing_method_type=self.routing_method_type,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
index 9a6f67b421f9..31af4a32bae2 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
@@ -53,6 +53,7 @@ def __init__(
         self.local_num_experts = moe_config.num_local_experts
         self.ep_rank = moe_config.moe_parallel_config.ep_rank
 
+        self.moe_config = moe_config
         self.quant_config = quant_config
 
     @staticmethod
@@ -111,6 +112,24 @@ def _supports_quant_scheme(
         ]
         return (weight_key, activation_key) in SUPPORTED_W_A
 
+    def moe_problem_size(
+        self,
+        a1: torch.Tensor,
+        w1: torch.Tensor,
+        w2: torch.Tensor,
+        topk_ids: torch.Tensor,
+    ) -> tuple[int, int, int, int, int]:
+        """Override to handle 4D BlockMajorK weights (E, K/bk, Mn, bk)."""
+        if w1.dim() == 4:
+            # BlockMajorK: (E, K/bk, Mn, bk)
+            E = w1.shape[0]
+            N = w1.shape[2]
+            K = a1.size(-1)
+            M = a1.size(0) if a1.dim() == 2 else a1.size(1)
+            topk = topk_ids.size(1)
+            return E, M, N, K, topk
+        return super().moe_problem_size(a1, w1, w2, topk_ids)
+
     def workspace_shapes(
         self,
         M: int,
@@ -151,35 +170,26 @@ def apply(
         apply_router_weight_on_input: bool,
     ):
         import flashinfer
-        from flashinfer.fused_moe import Fp8QuantizationType
+        from flashinfer.fused_moe import Fp8QuantizationType, WeightLayout
 
         # Pack topk ids and weights into format expected by the kernel.
         packed_topk_ids = trtllm_moe_pack_topk_ids_weights(topk_ids, topk_weights)
 
-        # trtllm_fp8_block_scale_routed_moe does not support autotuning
-        # so skip this kernel during dummy run for autotuning.
-        import vllm.utils.flashinfer as fi_utils
-
-        if fi_utils._is_fi_autotuning:
-            return
-
         assert a1q_scale is not None
 
         is_mxfp8 = self.quant_config.block_shape == [1, 32]
         if is_mxfp8:
             fp8_quant_type = Fp8QuantizationType.MxFp8
             use_shuffled_weight = True
+            weight_layout = WeightLayout.MajorK
             hidden_states_scale = a1q_scale
         else:
             fp8_quant_type = Fp8QuantizationType.DeepSeekFp8
-            use_shuffled_weight = False
+            use_shuffled_weight = True
+            weight_layout = WeightLayout.BlockMajorK
             hidden_states_scale = a1q_scale.t().contiguous()
 
-        # `trtllm_fp8_block_scale_routed_moe` has a bug and does not write to the
-        # output tensor in-place so we need to manually copy the result to the
-        # output tensor
-        # https://github.com/flashinfer-ai/flashinfer/issues/2703
-        result = flashinfer.fused_moe.trtllm_fp8_block_scale_routed_moe(
+        flashinfer.fused_moe.trtllm_fp8_block_scale_routed_moe(
             topk_ids=packed_topk_ids,
             routing_bias=None,
             hidden_states=hidden_states,
@@ -196,13 +206,12 @@ def apply(
             local_expert_offset=self.ep_rank * self.local_num_experts,
             local_num_experts=self.local_num_experts,
             routed_scaling_factor=None,
-            routing_method_type=1,
+            routing_method_type=1,  # not used
             use_shuffled_weight=use_shuffled_weight,
-            weight_layout=0,
+            weight_layout=weight_layout,
             fp8_quantization_type=fp8_quant_type,
-            # output=output,
+            output=output,
         )
-        output.copy_(result)
 
 
 class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolithic):
@@ -254,15 +263,6 @@ def _supports_router_logits_dtype(
         router_logits_dtype: torch.dtype | None,
         routing_method: RoutingMethodType,
     ) -> bool:
-        """
-        The FlashInfer TRTLLM FP8 kernel expects bfloat16 router_logits by default.
-        Only DeepSeekV3 routing supports float32 router_logits (which is converted
-        internally in the kernel).
-        """
-        if router_logits_dtype == torch.float32:
-            # Only DeepSeekV3 routing handles float32 logits
-            # https://github.com/flashinfer-ai/flashinfer/issues/2469
-            return routing_method == RoutingMethodType.DeepSeekV3
         return True
 
     @staticmethod
@@ -271,13 +271,7 @@ def _supports_routing_method(
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        """Monolithic kernels need to express router support.
-        Renormalize/RenormalizeNaive are excluded: the monolithic kernel's
-        internal routing for these methods produces output uncorrelated
-        with the modular kernel's output and with Triton kernel's output
-        for Qwen3.5-35B-A3B-FP8.
-        See: https://github.com/vllm-project/vllm/issues/37591
-        """
+        """Monolithic kernels need to express router support."""
         # NOTE(dbari): TopK routing could also be enabled, but need to validate models
         # NOTE(dbari): Default is not implemented and should not be enabled until it is
 
@@ -288,12 +282,22 @@ def _supports_routing_method(
             # NOTE(rob): potentially allow others here. This is a conservative list.
             return routing_method in [
                 RoutingMethodType.DeepSeekV3,
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
+                RoutingMethodType.SigmoidRenorm,
+                RoutingMethodType.MiniMax2,
+                RoutingMethodType.Simulated,
             ]
         elif (weight_key, activation_key) == (kFp8StaticTensorSym, kFp8StaticTensorSym):
             # NOTE(dbari): as above, potentially allow others here.
             return routing_method in [
                 RoutingMethodType.DeepSeekV3,
                 RoutingMethodType.Llama4,
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
+                RoutingMethodType.SigmoidRenorm,
+                RoutingMethodType.MiniMax2,
+                RoutingMethodType.Simulated,
             ]
         else:
             raise ValueError("Unsupported quantization scheme.")
@@ -316,7 +320,7 @@ def _apply_block_scale(
         topk_group: int | None = None,
     ) -> torch.Tensor:
         import flashinfer
-        from flashinfer.fused_moe import Fp8QuantizationType
+        from flashinfer.fused_moe import Fp8QuantizationType, WeightLayout
 
         assert not apply_router_weight_on_input
         assert activation == MoEActivation.SILU
@@ -329,17 +333,16 @@ def _apply_block_scale(
         # TODO: fuse into the quant kernel.
         assert a1q_scale is not None
 
-        if self.routing_method_type == RoutingMethodType.DeepSeekV3:
-            router_logits = router_logits.to(torch.float32)
-
         is_mxfp8 = self.quant_config.block_shape == [1, 32]
         if is_mxfp8:
             fp8_quant_type = Fp8QuantizationType.MxFp8
             use_shuffled_weight = True
+            weight_layout = WeightLayout.MajorK
             hidden_states_scale = a1q_scale
         else:
             fp8_quant_type = Fp8QuantizationType.DeepSeekFp8
-            use_shuffled_weight = False
+            use_shuffled_weight = True
+            weight_layout = WeightLayout.BlockMajorK
             hidden_states_scale = a1q_scale.t().contiguous()
 
         return flashinfer.fused_moe.trtllm_fp8_block_scale_moe(
@@ -361,6 +364,7 @@ def _apply_block_scale(
             routed_scaling_factor=routed_scaling_factor,
             routing_method_type=self.routing_method_type,
             use_shuffled_weight=use_shuffled_weight,
+            weight_layout=weight_layout,
             fp8_quantization_type=fp8_quant_type,
         )
 
@@ -395,9 +399,10 @@ def _apply_per_tensor(
         else:
             assert not apply_router_weight_on_input
 
-        # The DeepSeekV3 routing method requires float32 router logits.
-        if self.routing_method_type == RoutingMethodType.DeepSeekV3:
-            router_logits = router_logits.to(torch.float32)
+        # Currently FI requires bfloat16 routing bias.
+        # https://github.com/flashinfer-ai/flashinfer/issues/2909
+        if e_score_correction_bias is not None:
+            e_score_correction_bias = e_score_correction_bias.to(torch.bfloat16)
 
         out = flashinfer.fused_moe.trtllm_fp8_per_tensor_scale_moe(
             routing_logits=router_logits,
diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_mxfp4_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_mxfp4_moe.py
index d084283360c4..64c163d73eb1 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_mxfp4_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_mxfp4_moe.py
@@ -14,6 +14,7 @@
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceNoOP,
 )
+from vllm.model_executor.layers.fused_moe.utils import trtllm_moe_pack_topk_ids_weights
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
     kMxfp4Static,
@@ -32,10 +33,8 @@ def __init__(
         self,
         moe_config: FusedMoEConfig,
         quant_config: FusedMoEQuantConfig,
+        **kwargs,
     ):
-        # NOTE: FusedMoEExperts.__init__ is called by the concrete subclass
-        # (Monolithic/Modular) via MRO, not here, to avoid mypy issues with
-        # multiple inheritance. This matches the NvFP4 expert pattern.
         self.moe_config = moe_config
         self.quant_config = quant_config
 
@@ -45,26 +44,40 @@ def __init__(
             moe_config.intermediate_size_per_partition
         )
         self.hidden_dim = moe_config.hidden_dim
+        self.hidden_dim_unpadded = (
+            moe_config.hidden_dim_unpadded or moe_config.hidden_dim
+        )
         self.local_num_experts = moe_config.num_local_experts
         self.ep_rank = moe_config.moe_parallel_config.ep_rank
 
-        # MXFP4-specific TRTLLM parameters
+        # MXFP4-specific TRTLLM parameters from quant_config
         device = torch.accelerator.current_device_index()
-        self.gemm1_alpha = torch.tensor(
-            [1.702] * self.local_num_experts,
-            dtype=torch.float32,
-            device=device,
-        )
-        self.gemm1_beta = torch.tensor(
-            [1.0] * self.local_num_experts,
-            dtype=torch.float32,
-            device=device,
-        )
-        self.gemm1_clamp_limit = torch.tensor(
-            [7.0] * self.local_num_experts,
-            dtype=torch.float32,
-            device=device,
-        )
+        if quant_config.gemm1_alpha is not None:
+            self.gemm1_alpha = torch.tensor(
+                [quant_config.gemm1_alpha] * self.local_num_experts,
+                dtype=torch.float32,
+                device=device,
+            )
+        else:
+            self.gemm1_alpha = None
+
+        if quant_config.gemm1_beta is not None:
+            self.gemm1_beta = torch.tensor(
+                [quant_config.gemm1_beta] * self.local_num_experts,
+                dtype=torch.float32,
+                device=device,
+            )
+        else:
+            self.gemm1_beta = None
+
+        if quant_config.gemm1_clamp_limit is not None:
+            self.gemm1_clamp_limit = torch.tensor(
+                [quant_config.gemm1_clamp_limit] * self.local_num_experts,
+                dtype=torch.float32,
+                device=device,
+            )
+        else:
+            self.gemm1_clamp_limit = None
 
         from vllm.config import get_current_vllm_config
 
@@ -72,9 +85,6 @@ def __init__(
             get_current_vllm_config().compilation_config.max_cudagraph_capture_size
         )
 
-        # P1-5 fix: use public quant_dtype property instead of private _a1
-        self.use_mxfp8_input = quant_config.quant_dtype == "mxfp8"
-
     @staticmethod
     def _supports_current_device() -> bool:
         p = current_platform
@@ -97,7 +107,7 @@ def _supports_quant_scheme(
 
     @staticmethod
     def _supports_activation(activation: MoEActivation) -> bool:
-        return activation == MoEActivation.SWIGLUOAI
+        return activation in (MoEActivation.SWIGLUOAI, MoEActivation.SILU)
 
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
@@ -111,8 +121,7 @@ def supports_expert_map(self) -> bool:
 
     @property
     def expects_unquantized_inputs(self) -> bool:
-        # Expert handles MXFP8 quantization internally if needed
-        return True
+        return False
 
 
 class TrtLlmMxfp4ExpertsMonolithic(
@@ -171,26 +180,21 @@ def apply(
     ) -> torch.Tensor:
         from flashinfer import trtllm_fp4_block_scale_moe
 
-        # Handle input quantization
-        if self.use_mxfp8_input:
-            from flashinfer import mxfp8_quantize
-
-            x_quant, x_scale = mxfp8_quantize(
-                hidden_states,
-                is_sf_swizzled_layout=False,
-                alignment=256,
-            )
-            x_scale = x_scale.view(torch.float8_e4m3fn).reshape(
-                *hidden_states.shape[:-1], -1
-            )
+        if a1q_scale is not None:
+            x_quant = hidden_states
+            x_scale = a1q_scale.view(torch.float8_e4m3fn)
         else:
             assert hidden_states.dtype == torch.bfloat16
             x_quant = hidden_states
             x_scale = None
+        output = torch.empty(
+            *hidden_states.shape[:-1],
+            self.hidden_dim_unpadded,
+            dtype=torch.bfloat16,
+            device=hidden_states.device,
+        )
 
-        output = torch.empty_like(hidden_states)
-
-        return trtllm_fp4_block_scale_moe(
+        trtllm_fp4_block_scale_moe(
             routing_logits=router_logits.to(torch.bfloat16),
             routing_bias=None,
             hidden_states=x_quant,
@@ -219,7 +223,9 @@ def apply(
             do_finalize=True,
             tune_max_num_tokens=max(self.max_capture_size, 1),
             output=output,
-        )[0]
+        )
+
+        return output
 
 
 class TrtLlmMxfp4ExpertsModular(TrtLlmMxfp4ExpertsBase, mk.FusedMoEExpertsModular):
@@ -229,16 +235,22 @@ class TrtLlmMxfp4ExpertsModular(TrtLlmMxfp4ExpertsBase, mk.FusedMoEExpertsModula
     Moved from trtllm_moe.py.
     """
 
-    @property
-    def expects_unquantized_inputs(self) -> bool:
-        return True
-
     @staticmethod
     def _supports_parallel_config(
         moe_parallel_config: FusedMoEParallelConfig,
     ) -> bool:
         return True
 
+    @staticmethod
+    def _supports_routing_method(
+        routing_method: RoutingMethodType,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        # Modular kernel handles only the expert computation;
+        # routing is done externally, so accept any routing method.
+        return True
+
     def supports_expert_map(self) -> bool:
         return True
 
@@ -259,7 +271,7 @@ def workspace_shapes(
         # The workspaces for this implementation are managed by flashinfer.
         workspace1 = (0,)
         workspace2 = (0,)
-        output = (M, K)
+        output = (M, self.hidden_dim_unpadded)
         return (workspace1, workspace2, output)
 
     def apply(
@@ -282,29 +294,19 @@ def apply(
     ):
         topk = topk_ids.size(-1)
         local_num_experts = w1.size(0)
-        intermediate_size = w2.size(1)
+        intermediate_size = self.intermediate_size_per_partition
         local_expert_offset = self.moe_config.ep_rank * local_num_experts
 
-        # Handle input quantization
-        if self.use_mxfp8_input:
-            from flashinfer import mxfp8_quantize
-
-            x_quant, x_scale = mxfp8_quantize(
-                hidden_states,
-                is_sf_swizzled_layout=False,
-                alignment=256,
-            )
-            x_scale = x_scale.view(torch.float8_e4m3fn).reshape(
-                *hidden_states.shape[:-1], -1
-            )
+        if a1q_scale is not None:
+            x_quant = hidden_states
+            x_scale = a1q_scale.view(torch.float8_e4m3fn)
         else:
             assert hidden_states.dtype == torch.bfloat16
             x_quant = hidden_states
             x_scale = None
 
-        packed_tensor = (topk_ids.to(torch.int32) << 16) | topk_weights.to(
-            torch.bfloat16
-        ).view(torch.int16)
+        # Pack topk ids and weights into format expected by the kernel.
+        packed_tensor = trtllm_moe_pack_topk_ids_weights(topk_ids, topk_weights)
 
         assert self.w1_scale is not None
         assert self.w2_scale is not None
@@ -333,20 +335,18 @@ def apply(
             "local_expert_offset": local_expert_offset,
             "local_num_experts": local_num_experts,
             "routed_scaling_factor": None,
-            "routing_method_type": self.routing_method_type,
+            # Modular kernel receives pre-routed tokens, so routing
+            # is already done. Use Renormalize as a safe default that
+            # the TRTLLM C++ kernel supports.
+            "routing_method_type": RoutingMethodType.Renormalize,
             "do_finalize": True,
+            "enable_pdl": True,
             "output": output,
             "tune_max_num_tokens": max(self.max_capture_size, 1),
         }
 
         from flashinfer import trtllm_fp4_block_scale_routed_moe
 
-        from vllm.utils.flashinfer import autotune
-
-        with autotune(False):
-            # Enable autotune when,
-            # https://github.com/flashinfer-ai/flashinfer/issues/2023 is
-            # resolved.
-            trtllm_fp4_block_scale_routed_moe(**kwargs)
+        trtllm_fp4_block_scale_routed_moe(**kwargs)
 
         return output
diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
index 84beb6abb553..3ddcdef77575 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
@@ -1,10 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import flashinfer
+
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
@@ -27,6 +28,8 @@
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import has_flashinfer_trtllm_fused_moe
 
+logger = init_logger(__name__)
+
 
 class TrtLlmNvFp4ExpertsBase:
     """
@@ -47,6 +50,9 @@ def __init__(
             moe_config.intermediate_size_per_partition
         )
         self.hidden_dim = moe_config.hidden_dim
+        self.hidden_dim_unpadded = (
+            moe_config.hidden_dim_unpadded or moe_config.hidden_dim
+        )
         self.local_num_experts = moe_config.num_local_experts
         self.ep_rank = moe_config.moe_parallel_config.ep_rank
 
@@ -60,6 +66,17 @@ def __init__(
         else:
             self.g1_scale_c = self.quant_config.a2_gscale.clone()
 
+        if moe_config.is_act_and_mul and quant_config.gemm1_clamp_limit is not None:
+            device = torch.accelerator.current_device_index()
+            self.gemm1_clamp_limit = torch.full(
+                (self.local_num_experts,),
+                quant_config.gemm1_clamp_limit,
+                dtype=torch.float32,
+                device=device,
+            )
+        else:
+            self.gemm1_clamp_limit = None
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.w13_weight_scale_2.data.mul_(layer.w13_input_scale)
         layer.w2_weight_scale_2.data.mul_(layer.w2_input_scale)
@@ -78,6 +95,20 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         )
         self.g1_scale_c = layer.g1_scale_c
 
+        # Pre-fold the per-expert g1_alphas (= output1_scale_gate_scalar)
+        # division so the TRTLLM kernel receives the raw-GEMM-space clamp
+        # directly. g1_alphas is set once here in process_weights_after_loading
+        # (via the in-place mul above) and never changes again, so this is a
+        # static, per-expert constant. Register on the layer so EPLB
+        # rearranges it alongside the other expert tensors.
+        if self.gemm1_clamp_limit is not None:
+            gemm1_clamp_limit = self.gemm1_clamp_limit / self.quant_config.g1_alphas
+            layer.register_parameter(
+                "gemm1_clamp_limit",
+                torch.nn.Parameter(gemm1_clamp_limit, requires_grad=False),
+            )
+            self.gemm1_clamp_limit = layer.gemm1_clamp_limit
+
     @staticmethod
     def _supports_current_device() -> bool:
         """Supports only Blackwell-family GPUs."""
@@ -106,13 +137,21 @@ def _supports_quant_scheme(
 
     @staticmethod
     def _supports_activation(activation: MoEActivation) -> bool:
-        """Supports only SiLU and RELU^2 non-gated activation."""
-        return activation in [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL]
+        """Supports only SiLU, RELU^2 non-gated and GELU activation."""
+        return activation in [
+            MoEActivation.SILU,
+            MoEActivation.RELU2_NO_MUL,
+            MoEActivation.GELU,
+        ]
 
     @staticmethod
     def _supports_shape(hidden_dim: int) -> bool:
-        """Requires hidden dim to be multiple of 512."""
-        return hidden_dim % 512 == 0
+        # Weights are zero-padded to 256-alignment at load time and the MoE
+        # runner pads activations via _maybe_pad_hidden_states, so any
+        # hidden_dim is accepted.
+        # NOTE: non-256-aligned dims will trigger a warning log and may
+        # cause performance degradation due to activation slicing.
+        return True
 
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
@@ -178,20 +217,16 @@ def apply(
         expert_tokens_meta: mk.ExpertTokensMetadata | None,
         apply_router_weight_on_input: bool,
     ):
-        assert activation in [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL]
+        import flashinfer
+
+        assert self._supports_activation(activation)
         assert a1q_scale is not None
         assert self.quant_config.w1_scale is not None
         assert self.quant_config.w2_scale is not None
 
         # Pack topk ids and weights into format expected by the kernel.
         packed_tensor = trtllm_moe_pack_topk_ids_weights(topk_ids, topk_weights)
-
-        # trtllm_fp4_block_scale_routed_moe does not support autotuning
-        # so skip this kernel during dummy run for autotuning.
-        import vllm.utils.flashinfer as fi_utils
-
-        if fi_utils._is_fi_autotuning:
-            return hidden_states
+        output1_scale_gate_scalar = self.quant_config.g1_alphas
 
         # Invoke kernel.
         flashinfer.fused_moe.trtllm_fp4_block_scale_routed_moe(
@@ -206,12 +241,12 @@ def apply(
             gemm1_bias=None,
             gemm1_alpha=None,
             gemm1_beta=None,
-            gemm1_clamp_limit=None,
+            gemm1_clamp_limit=self.gemm1_clamp_limit,
             gemm2_weights=w2,
             gemm2_weights_scale=self.quant_config.w2_scale.view(torch.float8_e4m3fn),
             gemm2_bias=None,
             output1_scale_scalar=self.g1_scale_c,
-            output1_scale_gate_scalar=self.quant_config.g1_alphas,
+            output1_scale_gate_scalar=output1_scale_gate_scalar,
             output2_scale_scalar=self.quant_config.g2_alphas,
             num_experts=global_num_experts,
             top_k=self.topk,
@@ -221,7 +256,7 @@ def apply(
             local_expert_offset=self.ep_rank * self.local_num_experts,
             local_num_experts=self.local_num_experts,
             routed_scaling_factor=None,
-            routing_method_type=1,
+            routing_method_type=1,  # not used
             do_finalize=True,
             activation_type=activation_to_flashinfer_int(activation),
             output=output,
@@ -255,6 +290,10 @@ def _supports_routing_method(
             RoutingMethodType.Renormalize,
             RoutingMethodType.RenormalizeNaive,
             RoutingMethodType.Llama4,
+            RoutingMethodType.SigmoidRenorm,
+            RoutingMethodType.MiniMax2,
+            RoutingMethodType.Simulated,
+            RoutingMethodType.SigmoidRenorm,
         ]
 
     @staticmethod
@@ -262,15 +301,6 @@ def _supports_router_logits_dtype(
         router_logits_dtype: torch.dtype | None,
         routing_method: RoutingMethodType,
     ) -> bool:
-        """
-        The FlashInfer TRTLLM NvFp4 kernel expects bfloat16 router_logits by default.
-        Only DeepSeekV3 routing supports float32 router_logits (which is converted
-        internally in the kernel).
-        """
-        if router_logits_dtype == torch.float32:
-            # Only DeepSeekV3 routing handles float32 logits
-            # https://github.com/flashinfer-ai/flashinfer/issues/2469
-            return routing_method == RoutingMethodType.DeepSeekV3
         return True
 
     def apply(
@@ -290,7 +320,9 @@ def apply(
         routed_scaling_factor: float | None = None,
         topk_group: int | None = None,
     ) -> torch.Tensor:
-        assert activation in [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL]
+        import flashinfer
+
+        assert self._supports_activation(activation)
         assert a1q_scale is not None
         assert self.quant_config.w1_scale is not None
         assert self.quant_config.w2_scale is not None
@@ -302,14 +334,16 @@ def apply(
             and self.routing_method_type != RoutingMethodType.Llama4
         )
 
-        # Prepare router logits for kernel format.
-        router_logits = (
-            router_logits.to(torch.float32)
-            if self.routing_method_type == RoutingMethodType.DeepSeekV3
-            else router_logits
-        )
+        # Currently FI requires bfloat16 routing bias.
+        # https://github.com/flashinfer-ai/flashinfer/issues/2909
+        if e_score_correction_bias is not None:
+            e_score_correction_bias = e_score_correction_bias.to(torch.bfloat16)
+
+        output1_scale_gate_scalar = self.quant_config.g1_alphas
 
         # Invoke kernel.
+        # NOTE: Activation padding and output
+        # truncation are handled by the MoE runner's
         return flashinfer.fused_moe.trtllm_fp4_block_scale_moe(
             routing_logits=router_logits,
             routing_bias=e_score_correction_bias,
@@ -322,12 +356,12 @@ def apply(
             gemm1_bias=None,
             gemm1_alpha=None,
             gemm1_beta=None,
-            gemm1_clamp_limit=None,
+            gemm1_clamp_limit=self.gemm1_clamp_limit,
             gemm2_weights=w2,
             gemm2_weights_scale=self.quant_config.w2_scale.view(torch.float8_e4m3fn),
             gemm2_bias=None,
             output1_scale_scalar=self.g1_scale_c,
-            output1_scale_gate_scalar=self.quant_config.g1_alphas,
+            output1_scale_gate_scalar=output1_scale_gate_scalar,
             output2_scale_scalar=self.quant_config.g2_alphas,
             num_experts=global_num_experts,
             top_k=self.topk,
diff --git a/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py b/vllm/model_executor/layers/fused_moe/experts/xpu_moe.py
similarity index 66%
rename from vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
rename to vllm/model_executor/layers/fused_moe/experts/xpu_moe.py
index 9cc0ade288c7..fa6ff2cd9491 100644
--- a/vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/xpu_moe.py
@@ -17,11 +17,20 @@
     kFp8DynamicTensorSym,
     kFp8StaticTensorSym,
     kMxfp4Static,
+    kMxfp8Dynamic,
+    kMxfp8Static,
 )
 from vllm.platforms import current_platform
 
 if current_platform.is_xpu():
-    from vllm_xpu_kernels.fused_moe_interface import xpu_fused_moe
+    from vllm_xpu_kernels.fused_moe_interface import XpuFusedMoe
+
+
+def prepare_fp8_moe_layer_for_xpu(
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return w13.transpose(-1, -2).contiguous(), w2.transpose(-1, -2).contiguous()
 
 
 class XPUExperts(mk.FusedMoEExpertsModular):
@@ -40,6 +49,8 @@ def __init__(
         )
         self.is_fp8 = False
         self.is_mxfp4 = False
+        self.is_mxfp8 = False
+        self.fused_moe_impl: XpuFusedMoe | None = None
 
     @property
     def expects_unquantized_inputs(self) -> bool:
@@ -55,7 +66,7 @@ def _supports_current_device() -> bool:
 
     @staticmethod
     def _supports_no_act_and_mul() -> bool:
-        return False
+        return True
 
     @staticmethod
     def _supports_activation(activation: MoEActivation) -> bool:
@@ -63,6 +74,7 @@ def _supports_activation(activation: MoEActivation) -> bool:
             MoEActivation.SILU,
             MoEActivation.GELU,
             MoEActivation.SWIGLUOAI,
+            MoEActivation.RELU2_NO_MUL,
         ]
 
     @staticmethod
@@ -121,25 +133,30 @@ def apply(
         expert_tokens_meta: mk.ExpertTokensMetadata | None,
         apply_router_weight_on_input: bool,
     ):
-        topk = topk_ids.size(-1)
-        xpu_fused_moe(
+        if self.fused_moe_impl is None:
+            topk = topk_ids.size(-1)
+            self.fused_moe_impl = XpuFusedMoe(
+                w13=w1,
+                w13_scales=self.w1_scale,
+                w13_bias=self.w1_bias,
+                w2=w2,
+                w2_scales=self.w2_scale,
+                w2_bias=self.w2_bias,
+                n_experts_per_token=topk,
+                activation=activation.value,
+                num_experts=self.moe_config.num_local_experts,
+                ep_rank=self.moe_config.ep_rank,
+                ep_size=self.moe_config.ep_size,
+                is_fp8=self.is_fp8,
+                is_mxfp4=self.is_mxfp4,
+                is_mxfp8=self.is_mxfp8,
+            )
+        assert self.fused_moe_impl is not None
+        self.fused_moe_impl.apply(
+            output=output,
             hidden_states=hidden_states,
-            w13=w1,
-            w13_scales=self.w1_scale,
-            w13_bias=self.w1_bias,
-            w2=w2,
-            w2_scales=self.w2_scale,
-            w2_bias=self.w2_bias,
             topk_weights=topk_weights,
             topk_ids=topk_ids,
-            n_experts_per_token=topk,
-            activation=activation.value,
-            num_experts=self.moe_config.num_local_experts,
-            ep_rank=self.moe_config.ep_rank,
-            ep_size=self.moe_config.ep_size,
-            output=output,
-            is_fp8=self.is_fp8,
-            is_mxfp4=self.is_mxfp4,
         )
 
 
@@ -159,6 +176,46 @@ def __init__(
         )
         self.is_fp8 = True
 
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kFp8StaticTensorSym, None),
+            (kFp8StaticTensorSym, kFp8DynamicTensorSym),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
+
+class XPUExpertsMxfp8(XPUExpertsFp8):
+    def __init__(
+        self,
+        moe_config: FusedMoEConfig,
+        quant_config: FusedMoEQuantConfig,
+        max_num_tokens: int | None = None,
+        num_dispatchers: int | None = None,
+    ):
+        super().__init__(
+            moe_config,
+            quant_config,
+            max_num_tokens,
+            num_dispatchers,
+        )
+        assert quant_config.quant_dtype == "mxfp8"
+        self.is_mxfp8 = True
+
+    @staticmethod
+    def _supports_quant_scheme(
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+    ) -> bool:
+        SUPPORTED_W_A = [
+            (kMxfp8Static, None),
+            (kMxfp8Static, kMxfp8Dynamic),
+        ]
+        return (weight_key, activation_key) in SUPPORTED_W_A
+
 
 class XPUExpertsMXFp4(XPUExperts):
     def __init__(
diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
deleted file mode 100644
index d04e040c8959..000000000000
--- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import torch
-
-import vllm.model_executor.layers.fused_moe.modular_kernel as mk
-from vllm.model_executor.layers.fused_moe.activation import MoEActivation
-from vllm.model_executor.layers.fused_moe.config import (
-    FusedMoEConfig,
-    FusedMoEParallelConfig,
-    RoutingMethodType,
-)
-from vllm.platforms import current_platform
-from vllm.utils.torch_utils import direct_register_custom_op
-
-#
-# Methods used by the oracle for kernel selection.
-#
-
-
-def _supports_current_device() -> bool:
-    """Supports only Blackwell-family GPUs."""
-    p = current_platform
-    return p.is_cuda() and p.is_device_capability_family(100)
-
-
-def _supports_no_act_and_mul() -> bool:
-    """BF16 kernels do not support non-gated MoE"""
-    return False
-
-
-def _supports_activation(activation: MoEActivation) -> bool:
-    return activation in [MoEActivation.SILU]
-
-
-def _supports_routing_method_bf16(
-    routing_method: RoutingMethodType,
-) -> bool:
-    return routing_method in [
-        RoutingMethodType.Default,
-        RoutingMethodType.Renormalize,
-        RoutingMethodType.DeepSeekV3,
-        RoutingMethodType.Llama4,
-        RoutingMethodType.RenormalizeNaive,
-    ]
-
-
-def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
-    """Supports TRTLLM Kernel does not support EPLB."""
-    return not moe_parallel_config.enable_eplb
-
-
-def is_supported_config_trtllm_bf16(
-    moe_config: FusedMoEConfig,
-    activation_format: mk.FusedMoEActivationFormat,
-) -> tuple[bool, str | None]:
-    """
-    This method mirrors mk.FusedMoEPermuteExpertsUnpermute.is_supported_config
-    for BF16 unquantized kernels.
-    """
-
-    def _make_reason(reason: str) -> str:
-        return f"kernel does not support {reason}"
-
-    if not _supports_current_device():
-        return False, _make_reason(f"current device {current_platform.device_name}")
-    elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()):
-        return False, _make_reason("no act_and_mul MLP layer")
-    elif not _supports_activation(moe_config.activation):
-        return False, _make_reason(f"{moe_config.activation} activation")
-    elif not _supports_parallel_config(moe_config.moe_parallel_config):
-        return False, _make_reason(f"parallel config {moe_config.moe_parallel_config}")
-    elif not _supports_routing_method_bf16(moe_config.routing_method):
-        return False, _make_reason(f"routing method {moe_config.routing_method}")
-    elif activation_format != mk.FusedMoEActivationFormat.Standard:
-        return False, _make_reason(f"activation format {activation_format}")
-
-    return True, None
-
-
-def flashinfer_fused_moe_bf16(
-    routing_logits: torch.Tensor,
-    routing_bias: torch.Tensor | None,
-    hidden_states: torch.Tensor,
-    gemm1_weights: torch.Tensor,
-    gemm2_weights: torch.Tensor,
-    num_experts: int,
-    top_k: int,
-    n_group: int | None,
-    topk_group: int | None,
-    intermediate_size: int,
-    local_expert_offset: int,
-    local_num_experts: int,
-    routing_method_type: int,
-    tune_max_num_tokens: int = 8192,
-) -> torch.Tensor:
-    from vllm.utils.flashinfer import flashinfer_trtllm_bf16_moe
-
-    return flashinfer_trtllm_bf16_moe(
-        routing_logits=routing_logits,
-        routing_bias=routing_bias,
-        hidden_states=hidden_states,
-        gemm1_weights=gemm1_weights,
-        gemm2_weights=gemm2_weights,
-        num_experts=num_experts,
-        top_k=top_k,
-        n_group=n_group,
-        topk_group=topk_group,
-        intermediate_size=intermediate_size,
-        local_expert_offset=local_expert_offset,
-        local_num_experts=local_num_experts,
-        routing_method_type=routing_method_type,
-        tune_max_num_tokens=tune_max_num_tokens,
-    )
-
-
-def flashinfer_fused_moe_bf16_fake(
-    routing_logits: torch.Tensor,
-    routing_bias: torch.Tensor | None,
-    hidden_states: torch.Tensor,
-    gemm1_weights: torch.Tensor,
-    gemm2_weights: torch.Tensor,
-    num_experts: int,
-    top_k: int,
-    n_group: int | None,
-    topk_group: int | None,
-    intermediate_size: int,
-    local_expert_offset: int,
-    local_num_experts: int,
-    routing_method_type: int = RoutingMethodType.Renormalize,
-    tune_max_num_tokens: int = 8192,
-) -> torch.Tensor:
-    return torch.empty_like(hidden_states)
-
-
-direct_register_custom_op(
-    op_name="flashinfer_fused_moe_bf16",
-    op_func=flashinfer_fused_moe_bf16,
-    fake_impl=flashinfer_fused_moe_bf16_fake,
-    tags=(torch.Tag.needs_fixed_stride_order,),
-)
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index dccdc52bc4a9..8d974ea5671d 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -20,33 +20,16 @@
 )
 from vllm.model_executor.layers.fused_moe.config import (
     FUSED_MOE_UNQUANTIZED_CONFIG,
-    FusedMoEConfig,
-    FusedMoEParallelConfig,
     FusedMoEQuantConfig,
     _get_config_dtype_str,
 )
 from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
     moe_align_block_size,
 )
-from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
-    TopKWeightAndReduceNoOP,
-)
 from vllm.model_executor.layers.fused_moe.utils import (
-    _resize_cache,
     disable_inplace,
     moe_kernel_quantize_input,
 )
-from vllm.model_executor.layers.quantization.utils.mxfp4_utils import dequant_mxfp4
-from vllm.model_executor.layers.quantization.utils.mxfp6_utils import dequant_mxfp6
-from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    QuantKey,
-    kFp8Dynamic128Sym,
-    kFp8DynamicTensorSym,
-    kFp8DynamicTokenSym,
-    kFp8Static128BlockSym,
-    kFp8StaticChannelSym,
-    kFp8StaticTensorSym,
-)
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
 from vllm.utils.torch_utils import direct_register_custom_op
@@ -1091,7 +1074,6 @@ def get_moe_configs(
         "Using default MoE config. Performance might be sub-optimal! "
         "Config file not found at %s",
         ", ".join(config_file_paths),
-        scope="local",
     )
     return None
 
@@ -1551,6 +1533,55 @@ def dispatch_fused_experts_func(inplace: bool) -> Callable[..., torch.Tensor]:
     return torch_vllm_outplace_fused_experts
 
 
+def _prepare_expert_assignment(
+    topk_ids: torch.Tensor,
+    config: dict[str, Any],
+    num_tokens: int,
+    top_k_num: int,
+    global_num_experts: int,
+    expert_map: torch.Tensor | None,
+    *,
+    use_int8_w8a16: bool = False,
+    use_int4_w4a16: bool = False,
+    block_shape: list[int] | None = None,
+    ignore_invalid_experts: bool = False,
+) -> tuple[torch.Tensor | None, torch.Tensor, torch.Tensor]:
+    """Prepare expert assignments for the aligned and low-latency Triton paths."""
+    # SPARSITY_FACTOR is a heuristic margin ensuring tokens_in_chunk * top_k
+    # activates only a small fraction of total experts
+    # Skips moe_align_block_size and activates the `sorted_token_ids is None`
+    # path of the fused_moe_kernel kernel
+    naive_block_assignment = (
+        expert_map is None
+        and num_tokens * top_k_num * 4 <= global_num_experts
+        and not (
+            (use_int8_w8a16 or use_int4_w4a16)
+            and block_shape is not None
+            and block_shape[1] > 0
+        )
+    )
+
+    if naive_block_assignment:
+        return (
+            None,
+            topk_ids.view(-1),
+            torch.full(
+                (1,),
+                topk_ids.numel() * config["BLOCK_SIZE_M"],
+                dtype=torch.int32,
+                device=topk_ids.device,
+            ),
+        )
+
+    return moe_align_block_size(
+        topk_ids,
+        config["BLOCK_SIZE_M"],
+        global_num_experts,
+        expert_map,
+        ignore_invalid_experts=ignore_invalid_experts,
+    )
+
+
 # TODO (bnell): replace this with modular op.  Can get rid of inplace/outplace
 # torch ops.
 def fused_experts(
@@ -1657,22 +1688,18 @@ def fused_experts_impl(
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
 ) -> torch.Tensor:
+    if ocp_mx_scheme is not None:
+        raise NotImplementedError(
+            f"Using ocp_mx_scheme={ocp_mx_scheme} in functional fused_experts call is "
+            "deprecated. Please use OCP_MXQuantizationEmulationTritonExperts."
+        )
+
     # Convert string activation to enum for internal use
     activation_enum = MoEActivation.from_str(activation)
 
     # Check constraints.
     if use_int4_w4a16:
         assert hidden_states.size(1) // 2 == w1.size(2), "Hidden size mismatch"
-    elif ocp_mx_scheme is not None:
-        if ocp_mx_scheme.startswith("w_mxfp4"):
-            # 16bit activation and fp4x2 packed weight
-            assert hidden_states.size(1) == w1.size(2) * 2, "hidden size mismatch"
-        elif ocp_mx_scheme.startswith("w_mxfp6"):
-            assert hidden_states.size(1) == (w1.size(2) * 4) // 3, (
-                "hidden size mismatch"
-            )
-        else:
-            raise NotImplementedError(f"Unsupported ocp_mx_scheme={ocp_mx_scheme}")
     else:
         assert hidden_states.size(1) == w1.size(2), (
             f"Hidden size mismatch {hidden_states.size(1)} != {w1.size(2)}"
@@ -1697,7 +1724,6 @@ def fused_experts_impl(
         use_fp8_w8a8=use_fp8_w8a8,
         use_int8_w8a16=use_int8_w8a16,
         use_int4_w4a16=use_int4_w4a16,
-        ocp_mx_scheme=ocp_mx_scheme,
         dtype=hidden_states.dtype,
     )
 
@@ -1706,7 +1732,7 @@ def fused_experts_impl(
     quant_dtype = _get_config_quant_dtype(
         use_fp8_w8a8=use_fp8_w8a8,
         use_int8_w8a8=use_int8_w8a8,
-        ocp_mx_scheme=ocp_mx_scheme,
+        ocp_mx_scheme=None,
     )
 
     get_config_func = functools.partial(
@@ -1751,77 +1777,27 @@ def fused_experts_impl(
 
     out_hidden_states = hidden_states if inplace else torch.empty_like(hidden_states)
 
-    if ocp_mx_scheme is not None:
-        # TODO: On platforms for which `current_platform.supports_mx()` is True
-        # and for which we have a native OCP mx fused MOE kernel,
-        # this dequantization step should not be done.
-        if ocp_mx_scheme.startswith("w_mxfp4"):
-            # Weight has to be dequantized for mxfp4 emulation.
-            w1 = dequant_mxfp4(w1, w1_scale, hidden_states.dtype)
-            w1_scale = None
-            w2 = dequant_mxfp4(w2, w2_scale, hidden_states.dtype)
-            w2_scale = None
-        elif ocp_mx_scheme.startswith("w_mxfp6_e3m2"):
-            w1 = dequant_mxfp6(
-                w1, w1_scale, quant_dtype="fp6_e3m2", float_dtype=hidden_states.dtype
-            )
-            w1_scale = None
-            w2 = dequant_mxfp6(
-                w2, w2_scale, quant_dtype="fp6_e3m2", float_dtype=hidden_states.dtype
-            )
-            w2_scale = None
-        elif ocp_mx_scheme.startswith("w_mxfp6_e2m3"):
-            w1 = dequant_mxfp6(
-                w1, w1_scale, quant_dtype="fp6_e2m3", float_dtype=hidden_states.dtype
-            )
-            w1_scale = None
-            w2 = dequant_mxfp6(
-                w2, w2_scale, quant_dtype="fp6_e2m3", float_dtype=hidden_states.dtype
-            )
-            w2_scale = None
-        else:
-            raise NotImplementedError(f"Unsupported ocp_mx_scheme={ocp_mx_scheme}")
-
     qhidden_states, a1q_scale = moe_kernel_quantize_input(
         A=hidden_states,
         A_scale=a1_scale,
         quant_dtype=quant_dtype,
         per_act_token_quant=per_channel_quant,
         block_shape=block_shape,
-        ocp_mx_scheme=ocp_mx_scheme,
     )
 
-    # SPARSITY_FACTOR is a heuristic margin ensuring num_tokens * top_k
-    # activates only a small fraction of total experts
-    SPARSITY_FACTOR = 4
-    # block quantized code path is not implemented yet.
-    naive_block_assignment = (
-        expert_map is None
-        and num_tokens * top_k_num * SPARSITY_FACTOR <= global_num_experts
-        and not (
-            (use_int8_w8a16 or use_int4_w4a16)
-            and block_shape is not None
-            and block_shape[1] > 0
-        )
+    sorted_token_ids, expert_ids, num_tokens_post_padded = _prepare_expert_assignment(
+        topk_ids,
+        config,
+        num_tokens,
+        top_k_num,
+        global_num_experts,
+        expert_map,
+        use_int8_w8a16=use_int8_w8a16,
+        use_int4_w4a16=use_int4_w4a16,
+        block_shape=block_shape,
+        ignore_invalid_experts=True,
     )
 
-    if not naive_block_assignment:
-        sorted_token_ids, expert_ids, num_tokens_post_padded = moe_align_block_size(
-            topk_ids,
-            config["BLOCK_SIZE_M"],
-            global_num_experts,
-            expert_map,
-            ignore_invalid_experts=True,
-        )
-    else:
-        max_num_tokens_padded = topk_ids.numel() * config["BLOCK_SIZE_M"]
-        expert_ids = topk_ids.view(-1)
-        num_tokens_post_padded = torch.empty(
-            (1), dtype=torch.int32, device=topk_ids.device
-        )
-        num_tokens_post_padded.fill_(max_num_tokens_padded)
-        sorted_token_ids = None
-
     dispatch_fused_moe_kernel(
         qhidden_states,
         w1,
@@ -1856,7 +1832,6 @@ def fused_experts_impl(
         quant_dtype=quant_dtype,
         per_act_token_quant=per_channel_quant,
         block_shape=block_shape,
-        ocp_mx_scheme=ocp_mx_scheme,
     )
 
     if expert_map is not None:
@@ -1892,421 +1867,3 @@ def fused_experts_impl(
     )
 
     return out_hidden_states
-
-
-class TritonExperts(mk.FusedMoEExpertsModular):
-    """Triton-based fused MoE expert implementation."""
-
-    def __init__(
-        self,
-        moe_config: FusedMoEConfig,
-        quant_config: FusedMoEQuantConfig,
-    ):
-        super().__init__(moe_config, quant_config)
-
-    @staticmethod
-    def activation_format() -> mk.FusedMoEActivationFormat:
-        return mk.FusedMoEActivationFormat.Standard
-
-    @staticmethod
-    def _supports_current_device() -> bool:
-        return current_platform.is_cuda_alike() or current_platform.is_xpu()
-
-    @staticmethod
-    def _supports_no_act_and_mul() -> bool:
-        return True
-
-    @staticmethod
-    def _supports_quant_scheme(
-        weight_key: QuantKey | None,
-        activation_key: QuantKey | None,
-    ) -> bool:
-        p = current_platform
-        if p.is_rocm():
-            from vllm.platforms.rocm import on_gfx9
-
-            is_rocm_on_gfx9 = on_gfx9()
-        else:
-            is_rocm_on_gfx9 = False
-
-        device_supports_fp8 = (
-            is_rocm_on_gfx9
-            or (p.is_cuda() and p.has_device_capability((8, 9)))
-            or p.is_xpu()
-        )
-
-        if not device_supports_fp8:
-            return (weight_key, activation_key) == (None, None)
-
-        SUPPORTED_W_A = [
-            (None, None),
-            (kFp8Static128BlockSym, kFp8Dynamic128Sym),
-            (kFp8StaticChannelSym, kFp8DynamicTokenSym),
-            (kFp8StaticTensorSym, kFp8DynamicTokenSym),
-            (kFp8StaticTensorSym, kFp8StaticTensorSym),
-            (kFp8StaticTensorSym, kFp8DynamicTensorSym),
-        ]
-        return (weight_key, activation_key) in SUPPORTED_W_A
-
-    @staticmethod
-    def _supports_activation(activation: MoEActivation) -> bool:
-        return activation in [
-            MoEActivation.SILU,
-            MoEActivation.GELU,
-            MoEActivation.SWIGLUOAI,
-            MoEActivation.SWIGLUSTEP,
-            MoEActivation.SILU_NO_MUL,
-            MoEActivation.GELU_NO_MUL,
-            MoEActivation.RELU2_NO_MUL,
-        ]
-
-    @staticmethod
-    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
-        return not (
-            moe_parallel_config.use_fi_nvl_two_sided_kernels
-            or moe_parallel_config.use_fi_nvl_one_sided_kernels
-        )
-
-    def supports_expert_map(self) -> bool:
-        return True
-
-    def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce:
-        return TopKWeightAndReduceNoOP()
-
-    def workspace_shapes(
-        self,
-        M: int,
-        N: int,
-        K: int,
-        topk: int,
-        global_num_experts: int,
-        local_num_experts: int,
-        expert_tokens_meta: mk.ExpertTokensMetadata | None,
-        activation: MoEActivation,
-    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
-        activation_out_dim = self.adjust_N_for_activation(N, activation)
-        workspace1 = (M, topk, max(activation_out_dim, K))
-        workspace2 = (M, topk, max(N, K))
-        output = (M, K)
-        return (workspace1, workspace2, output)
-
-    def apply(
-        self,
-        output: torch.Tensor,
-        hidden_states: torch.Tensor,
-        w1: torch.Tensor,
-        w2: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        activation: MoEActivation,
-        global_num_experts: int,
-        expert_map: torch.Tensor | None,
-        a1q_scale: torch.Tensor | None,
-        a2_scale: torch.Tensor | None,
-        workspace13: torch.Tensor,
-        workspace2: torch.Tensor,
-        expert_tokens_meta: mk.ExpertTokensMetadata | None,
-        apply_router_weight_on_input: bool,
-    ):
-        # Check constraints.
-        if self.quant_config.use_int4_w4a16:
-            assert hidden_states.size(-1) // 2 == w1.size(2), "Hidden size mismatch"
-        else:
-            assert hidden_states.size(-1) == w1.size(2), (
-                f"Hidden size mismatch {hidden_states.size(-1)} != {w1.size(2)}"
-            )
-
-        assert hidden_states.is_contiguous(), "Hidden_states must be contiguous"
-        assert hidden_states.dim() == 2
-        assert w1.stride(-1) == 1, "Stride of last dimension must be 1"
-        assert w2.stride(-1) == 1, "Stride of last dimension must be 1"
-        assert hidden_states.dtype in [
-            torch.float32,
-            torch.float16,
-            torch.bfloat16,
-            torch.float8_e4m3fn,
-            torch.float8_e4m3fnuz,
-        ]
-
-        E, num_tokens, N, K, top_k_num = self.moe_problem_size(
-            hidden_states, w1, w2, topk_ids
-        )
-
-        if global_num_experts == -1:
-            global_num_experts = E
-
-        config = try_get_optimal_moe_config(
-            w1.size(),
-            w2.size(),
-            top_k_num,
-            self.quant_config.config_name(hidden_states.dtype),
-            num_tokens,
-            block_shape=self.block_shape,
-        )
-
-        if hidden_states.dtype == torch.bfloat16:
-            compute_type = tl.bfloat16
-        elif hidden_states.dtype == torch.float16:
-            compute_type = tl.float16
-        elif hidden_states.dtype == torch.float32:
-            compute_type = tl.float32
-        elif (
-            hidden_states.dtype == torch.float8_e4m3fn
-            or hidden_states.dtype == torch.float8_e4m3fnuz
-        ):
-            compute_type = tl.bfloat16
-        else:
-            raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
-
-        # Note that the output tensor might be in workspace1
-        intermediate_cache1 = _resize_cache(workspace2, (num_tokens, top_k_num, N))
-        cache2_dim = self.adjust_N_for_activation(N, activation)
-        intermediate_cache2 = _resize_cache(
-            workspace13, (num_tokens * top_k_num, cache2_dim)
-        )
-        intermediate_cache3 = _resize_cache(workspace2, (num_tokens, top_k_num, K))
-
-        sorted_token_ids, expert_ids, num_tokens_post_padded = moe_align_block_size(
-            topk_ids, config["BLOCK_SIZE_M"], global_num_experts, expert_map
-        )
-
-        invoke_fused_moe_triton_kernel(
-            hidden_states,
-            w1,
-            intermediate_cache1,
-            a1q_scale,
-            self.w1_scale,
-            None,  # topk_weights
-            sorted_token_ids,
-            expert_ids,
-            num_tokens_post_padded,
-            False,  # mul_routed_weights
-            top_k_num,
-            config,
-            compute_type=compute_type,
-            use_fp8_w8a8=self.quant_config.use_fp8_w8a8,
-            use_int8_w8a8=self.quant_config.use_int8_w8a8,
-            use_int8_w8a16=self.quant_config.use_int8_w8a16,
-            use_int4_w4a16=self.quant_config.use_int4_w4a16,
-            per_channel_quant=self.per_act_token_quant,
-            block_shape=self.block_shape,
-            B_bias=self.w1_bias,
-        )
-
-        self.activation(
-            activation, intermediate_cache2, intermediate_cache1.view(-1, N)
-        )
-
-        a2q_scale: torch.Tensor | None = None
-
-        qintermediate_cache2, a2q_scale = moe_kernel_quantize_input(
-            intermediate_cache2,
-            a2_scale,
-            self.quant_dtype,
-            self.per_act_token_quant,
-            self.block_shape,
-        )
-
-        invoke_fused_moe_triton_kernel(
-            qintermediate_cache2,
-            w2,
-            intermediate_cache3,
-            a2q_scale,
-            self.w2_scale,
-            topk_weights,
-            sorted_token_ids,
-            expert_ids,
-            num_tokens_post_padded,
-            not apply_router_weight_on_input,
-            1,
-            config,
-            compute_type=compute_type,
-            use_fp8_w8a8=self.quant_config.use_fp8_w8a8,
-            use_int8_w8a8=self.quant_config.use_int8_w8a8,
-            use_int8_w8a16=self.quant_config.use_int8_w8a16,
-            use_int4_w4a16=self.quant_config.use_int4_w4a16,
-            per_channel_quant=self.per_act_token_quant,
-            block_shape=self.block_shape,
-            B_bias=self.w2_bias,
-        )
-
-        # separate function is required for MoE + LoRA
-        self.moe_sum(intermediate_cache3, output)
-
-    def moe_sum(self, input: torch.Tensor, output: torch.Tensor) -> None:
-        ops.moe_sum(input, output)
-
-
-class TritonWNA16Experts(TritonExperts):
-    @staticmethod
-    def _supports_current_device() -> bool:
-        raise NotImplementedError(
-            "TritonWNA16Experts is not yet used by an Oracle. "
-            "This method should not be called."
-        )
-
-    @staticmethod
-    def _supports_no_act_and_mul() -> bool:
-        raise NotImplementedError(
-            "TritonWNA16Experts is not yet used by an Oracle. "
-            "This method should not be called."
-        )
-
-    @staticmethod
-    def _supports_quant_scheme(
-        weight_key: QuantKey | None,
-        activation_key: QuantKey | None,
-    ) -> bool:
-        raise NotImplementedError(
-            "TritonWNA16Experts is not yet used by an Oracle. "
-            "This method should not be called."
-        )
-
-    @staticmethod
-    def _supports_activation(activation: MoEActivation) -> bool:
-        raise NotImplementedError(
-            "TritonWNA16Experts is not yet used by an Oracle. "
-            "This method should not be called."
-        )
-
-    @staticmethod
-    def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
-        raise NotImplementedError(
-            "TritonWNA16Experts is not yet used by an Oracle. "
-            "This method should not be called."
-        )
-
-    def apply(
-        self,
-        output: torch.Tensor,
-        hidden_states: torch.Tensor,
-        w1: torch.Tensor,
-        w2: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        activation: MoEActivation,
-        global_num_experts: int,
-        expert_map: torch.Tensor | None,
-        a1q_scale: torch.Tensor | None,
-        a2_scale: torch.Tensor | None,
-        workspace13: torch.Tensor,
-        workspace2: torch.Tensor,
-        expert_tokens_meta: mk.ExpertTokensMetadata | None,
-        apply_router_weight_on_input: bool,
-    ):
-        # Check constraints.
-        if self.quant_config.use_int4_w4a16:
-            assert hidden_states.size(-1) // 2 == w1.size(2), "Hidden size mismatch"
-        else:
-            assert hidden_states.size(-1) == w1.size(2), (
-                f"Hidden size mismatch {hidden_states.size(-1)} != {w1.size(2)}"
-            )
-
-        assert hidden_states.is_contiguous(), "Hidden_states must be contiguous"
-        assert hidden_states.dim() == 2
-        assert w1.stride(-1) == 1, "Stride of last dimension must be 1"
-        assert w2.stride(-1) == 1, "Stride of last dimension must be 1"
-        assert hidden_states.dtype in [
-            torch.float32,
-            torch.float16,
-            torch.bfloat16,
-            torch.float8_e4m3fn,
-            torch.float8_e4m3fnuz,
-        ]
-
-        E, num_tokens, N, K, top_k_num = self.moe_problem_size(
-            hidden_states, w1, w2, topk_ids
-        )
-
-        if global_num_experts == -1:
-            global_num_experts = E
-
-        config = try_get_optimal_moe_config(
-            w1.size(),
-            w2.size(),
-            top_k_num,
-            self.quant_config.config_name(hidden_states.dtype),
-            num_tokens,
-            block_shape=self.block_shape,
-        )
-
-        if hidden_states.dtype == torch.bfloat16:
-            compute_type = tl.bfloat16
-        elif hidden_states.dtype == torch.float16:
-            compute_type = tl.float16
-        elif hidden_states.dtype == torch.float32:
-            compute_type = tl.float32
-        elif (
-            hidden_states.dtype == torch.float8_e4m3fn
-            or hidden_states.dtype == torch.float8_e4m3fnuz
-        ):
-            compute_type = tl.bfloat16
-        else:
-            raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
-
-        # Note that the output tensor might be in workspace1
-        intermediate_cache1 = _resize_cache(workspace2, (num_tokens, top_k_num, N))
-        activation_out_dim = self.adjust_N_for_activation(N, activation)
-        intermediate_cache2 = _resize_cache(
-            workspace13, (num_tokens * top_k_num, activation_out_dim)
-        )
-        intermediate_cache3 = _resize_cache(workspace2, (num_tokens, top_k_num, K))
-
-        sorted_token_ids, expert_ids, num_tokens_post_padded = moe_align_block_size(
-            topk_ids, config["BLOCK_SIZE_M"], global_num_experts, expert_map
-        )
-
-        invoke_fused_moe_wna16_triton_kernel(
-            hidden_states,
-            w1,
-            intermediate_cache1,
-            self.w1_scale,
-            self.quant_config.w1_zp,
-            None,  # topk_weights
-            sorted_token_ids,
-            expert_ids,
-            num_tokens_post_padded,
-            False,  # mul_routed_weights
-            top_k_num,
-            config,
-            compute_type=compute_type,
-            use_int8_w8a16=self.quant_config.use_int8_w8a16,
-            use_int4_w4a16=self.quant_config.use_int4_w4a16,
-            block_shape=self.block_shape,
-        )
-
-        self.activation(
-            activation, intermediate_cache2, intermediate_cache1.view(-1, N)
-        )
-
-        a2q_scale: torch.Tensor | None = None
-
-        qintermediate_cache2, a2q_scale = moe_kernel_quantize_input(
-            intermediate_cache2,
-            a2_scale,
-            self.quant_dtype,
-            self.per_act_token_quant,
-            self.block_shape,
-        )
-
-        invoke_fused_moe_wna16_triton_kernel(
-            qintermediate_cache2,
-            w2,
-            intermediate_cache3,
-            self.w2_scale,
-            self.quant_config.w2_zp,
-            topk_weights,
-            sorted_token_ids,
-            expert_ids,
-            num_tokens_post_padded,
-            not apply_router_weight_on_input,
-            1,
-            config,
-            compute_type=compute_type,
-            use_int8_w8a16=self.quant_config.use_int8_w8a16,
-            use_int4_w4a16=self.quant_config.use_int4_w4a16,
-            block_shape=self.block_shape,
-        )
-
-        # separate function is required for MoE + LoRA
-        self.moe_sum(intermediate_cache3, output)
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py b/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py
index d951439d34a0..601d64b792e6 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from abc import abstractmethod
+from typing import TYPE_CHECKING
 
 import torch
 
@@ -20,6 +21,9 @@
     QuantizeMethodBase,
 )
 
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe.runner.shared_experts import SharedExperts
+
 logger = init_logger(__name__)
 
 
@@ -37,11 +41,11 @@ def supports_internal_mk(self) -> bool:
         return self.moe_kernel is not None
 
     @property
-    def mk_owns_shared_expert(self) -> bool:
+    def mk_can_overlap_shared_experts(self) -> bool:
         # NOTE(rob): temporary attribute to indicate support for
         # completed migration to the new internal MK interface.
         return (
-            self.moe_kernel is not None and self.moe_kernel.shared_experts is not None
+            self.moe_kernel is not None and self.moe_kernel.can_overlap_shared_experts
         )
 
     @abstractmethod
@@ -158,18 +162,20 @@ def is_monolithic(self) -> bool:
 
     def apply(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",  # type: ignore[name-defined] # noqa: F821
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: "SharedExperts | None",
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         raise NotImplementedError
 
     def apply_monolithic(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",  # type: ignore[name-defined] # noqa: F821
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         raise NotImplementedError
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe_modular_method.py b/vllm/model_executor/layers/fused_moe/fused_moe_modular_method.py
index 0065c11f3163..bbf06df8e472 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe_modular_method.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe_modular_method.py
@@ -16,6 +16,9 @@
     FusedMoEKernel,
     FusedMoEPrepareAndFinalizeModular,
 )
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
+)
 
 logger = init_logger(__name__)
 
@@ -28,7 +31,7 @@ class FusedMoEModularMethod(FusedMoEMethodBase, CustomOp):
     def __init__(
         self, old_quant_method: FusedMoEMethodBase, moe_kernel: FusedMoEKernel
     ):
-        super().__init__(old_quant_method.moe)
+        super().__init__(moe_kernel.moe_config)
         self.moe_quant_config = old_quant_method.moe_quant_config
         self.moe_kernel = moe_kernel
         self.disable_expert_map = getattr(
@@ -39,12 +42,15 @@ def __init__(
         self.old_quant_method = old_quant_method
         logger.debug("Swapping out %s", self.old_quant_method.__class__.__name__)
 
+    @property
+    def wraps_legacy_quant_method(self) -> bool:
+        return not self.old_quant_method.supports_internal_mk
+
     @staticmethod
     def make(
         moe_layer: torch.nn.Module,
         old_quant_method: FusedMoEMethodBase,
         prepare_finalize: FusedMoEPrepareAndFinalizeModular,
-        shared_experts: torch.nn.Module | None,
         inplace: bool = False,
     ) -> "FusedMoEModularMethod":
         return FusedMoEModularMethod(
@@ -52,8 +58,6 @@ def make(
             FusedMoEKernel(
                 prepare_finalize,
                 old_quant_method.select_gemm_impl(prepare_finalize, moe_layer),
-                shared_experts,
-                moe_parallel_config=moe_layer.moe_parallel_config,
                 inplace=inplace,
             ),
         )
@@ -84,12 +88,13 @@ def get_fused_moe_quant_config(
 
     def apply(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",  # type: ignore[name-defined] # noqa: F821
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert self.moe_kernel is not None
         return self.moe_kernel.apply(
             hidden_states=x,
@@ -101,5 +106,6 @@ def apply(
             global_num_experts=layer.global_num_experts,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
             expert_map=None if self.disable_expert_map else layer.expert_map,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index a95481a7e6a0..727cbd34010f 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -3,14 +3,13 @@
 
 from collections.abc import Callable, Iterable
 from enum import Enum
-from typing import Literal, cast, get_args, overload
+from typing import Literal, cast, overload
 
 import torch
 from torch.nn.parameter import UninitializedParameter
 
-import vllm.envs as envs
 from vllm._aiter_ops import rocm_aiter_ops
-from vllm.config import VllmConfig, get_current_vllm_config
+from vllm.config import get_current_vllm_config
 from vllm.config.parallel import ExpertPlacementStrategy
 from vllm.distributed import (
     get_dp_group,
@@ -19,7 +18,7 @@
 )
 from vllm.distributed.eplb.eplb_state import EplbLayerState, EplbState
 from vllm.logger import init_logger
-from vllm.model_executor.custom_op import CustomOp
+from vllm.model_executor.custom_op import PluggableLayer
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
@@ -27,20 +26,26 @@
     FusedMoEQuantConfig,
     RoutingMethodType,
 )
+from vllm.model_executor.layers.fused_moe.expert_map_manager import (
+    ExpertMapManager,
+)
 from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
     FusedMoEMethodBase,
 )
 from vllm.model_executor.layers.fused_moe.fused_moe_modular_method import (
     FusedMoEModularMethod,
 )
-from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
-    init_aiter_topK_meta_data,
-)
 from vllm.model_executor.layers.fused_moe.router.router_factory import (
     create_fused_moe_router,
 )
-from vllm.model_executor.layers.fused_moe.runner.default_moe_runner import (
-    DefaultMoERunner,
+from vllm.model_executor.layers.fused_moe.runner.moe_runner import (
+    MoERunner,
+)
+from vllm.model_executor.layers.fused_moe.runner.moe_runner_interface import (
+    MoERunnerInterface,
+)
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
 )
 from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
     UnquantizedFusedMoEMethod,
@@ -63,156 +68,9 @@ class FusedMoeWeightScaleSupported(Enum):
     BLOCK = "block"
 
 
-def determine_expert_map(
-    ep_size: int,
-    ep_rank: int,
-    global_num_experts: int,
-    expert_placement_strategy: ExpertPlacementStrategy = "linear",
-    num_fused_shared_experts: int = 0,
-    return_expert_mask: bool = False,
-) -> tuple[int, torch.Tensor | None, torch.Tensor | None]:
-    """
-    Calculates how many experts should be assigned to each rank for EP and
-    creates a mapping from global to local expert index. Experts are
-    distributed evenly across ranks. Any remaining are assigned to the
-    last rank.
-
-    Args:
-        ep_size: The size of the expert parallel group
-        ep_rank: The rank of the current process in the expert parallel
-            group
-        global_num_experts: The total number of experts in the model.
-        expert_placement_strategy: The expert placement strategy.
-
-    Returns:
-        tuple[int, Optional[torch.Tensor]]: A tuple containing:
-            - local_num_experts (int): The number of experts assigned
-                to the current rank.
-            - expert_map (Optional[torch.Tensor]): A tensor of shape
-                (global_num_experts,) mapping from global to local index.
-                Contains -1 for experts not assigned to the current rank.
-                Returns None if ep_size is 1.
-            - expert_mask (Optional[torch.Tensor]): A tensor of shape
-                (global_num_experts + num_fused_shared_experts + 1,)
-                containing 1 for experts assigned to the current rank
-                and 0 for sentinel.
-                Returns None if ep_size is 1.
-                Used only when AITER MOE is enabled.
-    """
-    assert ep_size > 0
-    if ep_size == 1:
-        return (global_num_experts, None, None)
-
-    # Distribute experts as evenly as possible to each rank.
-    base_experts = global_num_experts // ep_size
-    remainder = global_num_experts % ep_size
-    local_num_experts = base_experts + 1 if ep_rank < remainder else base_experts
-
-    # Create a tensor of size num_experts filled with -1
-    expert_map = torch.full((global_num_experts,), -1, dtype=torch.int32)
-    # Create an expert map for the local experts
-    if expert_placement_strategy == "linear":
-        start_idx = ep_rank * base_experts + min(ep_rank, remainder)
-        expert_map[start_idx : start_idx + local_num_experts] = torch.arange(
-            0, local_num_experts, dtype=torch.int32
-        )
-    elif expert_placement_strategy == "round_robin":
-        local_log_experts = torch.arange(
-            ep_rank, global_num_experts, ep_size, dtype=torch.int32
-        )
-
-        expert_map[local_log_experts] = torch.arange(
-            0, local_num_experts, dtype=torch.int32
-        )
-    else:
-        raise ValueError(
-            "Unsupported expert placement strategy "
-            f"'{expert_placement_strategy}', expected one of "
-            f"{get_args(ExpertPlacementStrategy)}"
-        )
-
-    expert_mask = None
-    if return_expert_mask:
-        expert_mask = torch.ones(
-            (global_num_experts + num_fused_shared_experts + 1,), dtype=torch.int32
-        )
-        expert_mask[-1] = 0
-        expert_mask[:global_num_experts] = expert_map > -1
-        expert_map = torch.cat(
-            (
-                expert_map,
-                torch.tensor(
-                    [local_num_experts + i for i in range(num_fused_shared_experts)],
-                    dtype=torch.int32,
-                ),
-            ),
-            dim=0,
-        )
-
-    return (local_num_experts, expert_map, expert_mask)
-
-
-def determine_expert_placement_strategy(
-    expert_placement_strategy: ExpertPlacementStrategy,
-    moe_parallel_config: FusedMoEParallelConfig,
-    num_expert_group: int | None,
-    num_redundant_experts: int,
-    enable_eplb: bool,
-) -> ExpertPlacementStrategy:
-    if expert_placement_strategy == "round_robin":
-        round_robin_supported = (
-            (num_expert_group is not None and num_expert_group > 1)
-            and num_redundant_experts == 0
-            and not enable_eplb
-        )
-
-        if not round_robin_supported:
-            logger.warning(
-                "Round-robin expert placement is only supported for "
-                "models with multiple expert groups and no redundant "
-                "experts. Falling back to linear expert placement."
-            )
-            return "linear"
-        if (
-            moe_parallel_config.use_all2all_kernels
-            and not moe_parallel_config.use_deepep_ll_kernels
-            and not moe_parallel_config.use_nixl_ep_kernels
-        ):
-            logger.warning(
-                "Round-robin expert placement currently only supports "
-                "the DeepEP low-latency or NIXL EP backend, but '%s' was configured. "
-                "Falling back to linear expert placement.",
-                moe_parallel_config.all2all_backend,
-            )
-            return "linear"
-
-    return expert_placement_strategy
-
-
-def get_compressed_expert_map(expert_map: torch.Tensor) -> str:
-    """
-    Compresses the expert map by removing any -1 entries.
-
-    Args:
-        expert_map (torch.Tensor): A tensor of shape (global_num_experts,)
-            mapping from global to local index. Contains -1 for experts not
-            assigned to the current rank.
-
-    Returns:
-        str: A string mapping from local to global index.
-            Using str to support hashing for logging once only.
-    """
-    global_indices = torch.where(expert_map != -1)[0]
-    local_indices = expert_map[global_indices]
-    return ", ".join(
-        f"{local_index.item()}->{global_index.item()}"
-        for local_index, global_index in zip(local_indices, global_indices)
-    )
-
-
 # --8<-- [start:fused_moe]
-@CustomOp.register("fused_moe")
-class FusedMoE(CustomOp):
+@PluggableLayer.register("fused_moe")
+class FusedMoE(PluggableLayer):
     """FusedMoE layer for MoE models.
 
     This layer contains both MergedColumnParallel weights (gate_up_proj /
@@ -228,11 +86,20 @@ class FusedMoE(CustomOp):
         hidden_size: Input hidden state size of the transformer
         intermediate_size: Intermediate size of the experts
         params_dtype: Data type for the parameters.
-        reduce_results: Whether to all_reduce on the output of the layer
         renormalize: Whether to renormalize the logits in the fused_moe kernel
         quant_config: Quantization configure.
         enable_eplb: Whether to enable expert parallelism load balancer.
         router_logits_dtype: Data type for router logits buffers.
+        routed_scaling_factor: A scaling factor that is applied to the topk_weights
+                               by the router or the output of the layer depending
+                               on the value of `apply_routed_scale_to_output`
+        apply_routed_scale_to_output: Determine whether or not `routed_scaling_factor`
+                                      is applied to the topk_weights or to the experts
+                                      output. It is applied to the experts output
+                                      instead of the topk_weights when this feature is
+                                      not supported by the router (or the experts).
+        enable_router_pdl: Whether fused top-k routing kernels should join a
+                           Programmatic Dependent Launch chain.
     """
 
     # --8<-- [end:fused_moe]
@@ -244,7 +111,6 @@ def __init__(
         hidden_size: int,
         intermediate_size: int,
         params_dtype: torch.dtype | None = None,
-        reduce_results: bool = False,
         renormalize: bool = True,
         use_grouped_topk: bool = False,
         num_expert_group: int | None = None,
@@ -258,6 +124,7 @@ def __init__(
         custom_routing_function: Callable | None = None,
         scoring_func: str = "softmax",
         routed_scaling_factor: float = 1.0,
+        swiglu_limit: float | None = None,
         e_score_correction_bias: torch.Tensor | None = None,
         apply_router_weight_on_input: bool = False,
         activation: str = "silu",
@@ -271,20 +138,23 @@ def __init__(
         router_logits_dtype: torch.dtype | None = None,
         gate: torch.nn.Module | None = None,
         shared_experts: torch.nn.Module | None = None,
+        shared_expert_gate: torch.nn.Module | None = None,
         routed_input_transform: torch.nn.Module | None = None,
+        routed_output_transform: torch.nn.Module | None = None,
+        apply_routed_scale_to_output: bool = False,
+        zero_expert_type: str | None = None,
+        hash_indices_table: torch.Tensor | None = None,
+        enable_router_pdl: bool = False,
     ):
         super().__init__()
 
-        self._gate = gate
-        self._shared_experts = shared_experts
-        self._routed_input_transform = routed_input_transform
-
         if params_dtype is None:
             params_dtype = torch.get_default_dtype()
         self.params_dtype = params_dtype
 
         vllm_config = get_current_vllm_config()
         self.vllm_config = vllm_config
+        self.swiglu_limit = swiglu_limit
 
         # FIXME (varun): We should have a better way of inferring the activation
         # datatype. This works for now as the tensor datatype entering the MoE
@@ -329,13 +199,24 @@ def __init__(
         compilation_config.static_all_moe_layers.append(prefix)
         self.layer_name = prefix
 
-        self.enable_eplb = enable_eplb
-        # TODO(bnell): should this be owned by router?
-        self.eplb_state = EplbLayerState()
         self.expert_placement_strategy: ExpertPlacementStrategy = (
             vllm_config.parallel_config.expert_placement_strategy
         )
 
+        self.eplb_state: EplbLayerState | None = None
+        if enable_eplb:
+            if self.use_ep and self.global_num_experts % self.ep_size != 0:
+                raise ValueError(
+                    f"EPLB currently only supports even distribution of "
+                    f"experts across ranks. Got {self.global_num_experts} experts "
+                    f"and {self.ep_size} EP ranks."
+                )
+            self.eplb_state = EplbLayerState()
+        else:
+            assert not self.use_ep or num_redundant_experts == 0, (
+                "Redundant experts are only supported with EPLB."
+            )
+
         # ROCm aiter shared experts fusion
         # AITER only supports gated activations (silu/gelu), so disable it
         # for non-gated MoE (is_act_and_mul=False)
@@ -351,6 +232,8 @@ def __init__(
             if n_shared_experts is not None and self.aiter_fmoe_shared_expert_enabled
             else 0
         )
+        self.shared_expert_gate = shared_expert_gate
+
         if (
             not self.aiter_fmoe_shared_expert_enabled
             and self.num_fused_shared_experts != 0
@@ -361,70 +244,30 @@ def __init__(
             )
 
         # Determine expert maps
-        if self.use_ep:
-            if self.enable_eplb:
-                assert self.global_num_experts % self.ep_size == 0, (
-                    "EPLB currently only supports even distribution of "
-                    "experts across ranks."
-                )
-            else:
-                assert num_redundant_experts == 0, (
-                    "Redundant experts are only supported with EPLB."
-                )
+        max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
 
-            self.expert_placement_strategy = determine_expert_placement_strategy(
-                expert_placement_strategy=self.expert_placement_strategy,
-                moe_parallel_config=self.moe_parallel_config,
-                num_expert_group=num_expert_group,
-                num_redundant_experts=num_redundant_experts,
-                enable_eplb=self.enable_eplb,
-            )
+        # Create ExpertMapManager to handle expert mapping and placement for EP.
+        # See ExpertMapManager for a detailed description of what it does and when
+        # it is required.
+        self.expert_map_manager = ExpertMapManager(
+            max_num_batched_tokens=max_num_batched_tokens,
+            top_k=top_k,
+            global_num_experts=self.global_num_experts,
+            num_redundant_experts=num_redundant_experts,
+            num_expert_group=num_expert_group,
+            moe_parallel_config=self.moe_parallel_config,
+            placement_strategy=self.expert_placement_strategy,
+            enable_eplb=enable_eplb,
+            num_fused_shared_experts=self.num_fused_shared_experts,
+            rocm_aiter_enabled=self.rocm_aiter_fmoe_enabled,
+        )
 
-            self._expert_map: torch.Tensor | None
-            local_num_experts, expert_map, expert_mask = determine_expert_map(
-                ep_size=self.ep_size,
-                ep_rank=self.ep_rank,
-                global_num_experts=self.global_num_experts,
-                expert_placement_strategy=self.expert_placement_strategy,
-                num_fused_shared_experts=self.num_fused_shared_experts,
-                return_expert_mask=self.rocm_aiter_fmoe_enabled,
-            )
-            self.local_num_experts = local_num_experts
-            self.register_buffer("_expert_map", expert_map)
-            self.register_buffer("expert_mask", expert_mask)
-            self._maybe_init_expert_routing_tables()
-            logger.info_once(
-                "[EP Rank %s/%s] Expert parallelism is enabled. Expert "
-                "placement strategy: %s. Local/global"
-                " number of experts: %s/%s. Experts local to global index map:"
-                " %s.",
-                self.ep_rank,
-                self.ep_size,
-                self.expert_placement_strategy,
-                self.local_num_experts,
-                self.global_num_experts,
-                get_compressed_expert_map(self._expert_map),
-            )
-        else:
-            self.local_num_experts, self._expert_map, self.expert_mask = (
-                self.global_num_experts,
-                None,
-                None,
-            )
+        self.update_expert_map_info()
 
         self.top_k = top_k
 
-        self._init_aiter_shared_experts_topK_buffer(
-            vllm_config=vllm_config, dp_size=dp_size_
-        )
-        if self.use_ep and self.rocm_aiter_fmoe_enabled:
-            assert self.expert_mask is None or torch.all(
-                (expert_mask == 0) | (expert_mask == 1)
-            ), "Aiter Fused MoE kernel only supports expert_map with 0 and 1s."
-
         assert intermediate_size % self.tp_size == 0
         intermediate_size_per_partition = intermediate_size // self.tp_size
-        self.reduce_results = reduce_results
         self.renormalize = renormalize
 
         # TODO(bnell): these attributes are only used by monolithic kernels.
@@ -436,10 +279,18 @@ def __init__(
         self.topk_group = topk_group
         self.custom_routing_function = custom_routing_function
         self.scoring_func = scoring_func
-        self.routed_scaling_factor = routed_scaling_factor
+        # When apply_routed_scale_to_output is True, we set the scaling factor
+        # to 1.0 so it ends up being a nop. Applying the scale will be handled
+        # by the runner in this case.
+        # The member variable must be set in the same way as the router since
+        # some quantization methods can access it.
+        self.routed_scaling_factor = (
+            routed_scaling_factor if not apply_routed_scale_to_output else 1.0
+        )
         self.e_score_correction_bias = e_score_correction_bias
         # TODO(bnell): end attributes
 
+        self.hash_indices_table = hash_indices_table
         self.apply_router_weight_on_input = apply_router_weight_on_input
         self.activation = MoEActivation.from_str(activation)
 
@@ -455,13 +306,16 @@ def __init__(
             topk_group=topk_group,
             custom_routing_function=custom_routing_function,
             scoring_func=scoring_func,
-            routed_scaling_factor=routed_scaling_factor,
+            routed_scaling_factor=self.routed_scaling_factor,
             e_score_correction_bias=e_score_correction_bias,
             num_fused_shared_experts=self.num_fused_shared_experts,
-            enable_eplb=enable_eplb,
             # TODO(bnell): once we can construct the MK at init time, we
             # can make this a value.
             indices_type_getter=lambda: self.quant_method.topk_indices_dtype,
+            zero_expert_type=zero_expert_type,
+            num_logical_experts=self.logical_num_experts,
+            hash_indices_table=self.hash_indices_table,
+            enable_pdl=enable_router_pdl,
         )
         self.routing_method_type: RoutingMethodType = self.router.routing_method_type
 
@@ -478,15 +332,16 @@ def __init__(
             in_dtype=moe_in_dtype,
             moe_backend=vllm_config.kernel_config.moe_backend,
             router_logits_dtype=router_logits_dtype,
-            max_num_tokens=envs.VLLM_MOE_DP_CHUNK_SIZE,
+            max_num_tokens=max_num_batched_tokens,
             has_bias=has_bias,
             is_act_and_mul=is_act_and_mul,
             is_lora_enabled=vllm_config.lora_config is not None,
             activation=self.activation,
             device=vllm_config.device_config.device,
             routing_method=self.routing_method_type,
+            swiglu_limit=swiglu_limit,
             # TODO: in_dtype == out_dtype?
-            disable_inplace=disable_inplace() or self._shared_experts is not None,
+            disable_inplace=disable_inplace() or shared_experts is not None,
         )
         if self.moe_config.use_mori_kernels:
             assert self.rocm_aiter_fmoe_enabled, (
@@ -516,12 +371,14 @@ def _get_quant_method() -> FusedMoEMethodBase:
         # for heuristic purposes, so it must be initialized first.
         self.quant_method: FusedMoEMethodBase = _get_quant_method()
 
-        if not self.moe_config.is_act_and_mul and not current_platform.is_cuda_alike():
+        if not self.moe_config.is_act_and_mul and not (
+            current_platform.is_cuda_alike() or current_platform.is_xpu()
+        ):
             raise NotImplementedError(
-                "is_act_and_mul=False is supported only for CUDA and ROCm for now"
+                "is_act_and_mul=False is supported only for CUDA and XPU for now"
             )
 
-        if self.enable_eplb and not self.quant_method.supports_eplb:
+        if enable_eplb and not self.quant_method.supports_eplb:
             # TODO: Add support for additional quantization methods.
             # The implementation for other quantization methods does not
             # contain essential differences, but the current quant API
@@ -557,44 +414,37 @@ def _get_quant_method() -> FusedMoEMethodBase:
         }
         # need full intermediate size pre-sharding for WNA16 act order
         if self.quant_method.__class__.__name__ in (
-            "GPTQMarlinMoEMethod",
+            "AutoGPTQMoEMethod",
             "CompressedTensorsWNA16MarlinMoEMethod",
             "CompressedTensorsWNA16MoEMethod",
         ):
             moe_quant_params["intermediate_size_full"] = intermediate_size
 
         self.quant_method.create_weights(layer=self, **moe_quant_params)
-        self.base_quant_method = self.quant_method
-
-        # Disable shared expert overlap if:
-        #   - we are using eplb with non-default backend, because of correctness issues
-        #   - we are using flashinfer with DP, since there nothing to gain
-        #   - we are using marlin kernels
-        backend = self.moe_parallel_config.all2all_backend
-        self.use_overlapped = (
-            not (
-                (self.enable_eplb and backend != "allgather_reducescatter")
-                or self.moe_parallel_config.use_fi_nvl_two_sided_kernels
-            )
-            and self._shared_experts is not None
-        )
 
-        self.runner = self._init_runner()
+        # TODO(bnell): this is un-needed and removed in a follow up PR.
+        self.base_quant_method = self.quant_method
 
-    def _init_runner(self):
         # Storing the runner in the FusedMoE is an intermediate state, eventually
         # the runner will own the FusedMoE layer and provide the execution interface
         # for MoE ops.
-        return DefaultMoERunner(
-            layer=self,
+        self.runner: MoERunnerInterface = MoERunner(
+            layer_name=self.layer_name,
             moe_config=self.moe_config,
             router=self.router,
-            routed_input_transform=self._routed_input_transform,
-            gate=self.gate,
-            shared_experts=self.shared_experts,
+            gate=gate,
+            shared_experts=shared_experts,
+            shared_expert_gate=self.shared_expert_gate,
             quant_method=self.quant_method,
-            reduce_results=self.reduce_results,
             enable_dbo=self.vllm_config.parallel_config.enable_dbo,
+            routed_input_transform=routed_input_transform,
+            routed_output_transform=routed_output_transform,
+            # When apply_routed_scale_to_output is True, we allow
+            # the scaling factor to be passed to the runner, otherwise
+            # we pass 1.0 so it ends up being a nop.
+            routed_scaling_factor=routed_scaling_factor
+            if apply_routed_scale_to_output
+            else 1.0,
         )
 
     # TODO(bnell): This method is provided as a hook so vllm/lora/layers/fused_moe.py
@@ -602,10 +452,7 @@ def _init_runner(self):
     # intrusive way to do this.
     def _replace_quant_method(self, mk: FusedMoEMethodBase):
         self.quant_method = mk
-        # We need to force reconstruction of runner because we're swapping out
-        # the quant_method with a FusedMoEModularMethod. This logic can go
-        # away once the FusedMoEModularMethod is eliminated.
-        self.runner = self._init_runner()
+        self.runner._replace_quant_method(mk)
 
     # Note: maybe_init_modular_kernel should only be called by
     # prepare_communication_buffer_for_model.
@@ -618,11 +465,8 @@ def maybe_init_modular_kernel(self) -> None:
             return None
 
         self.ensure_moe_quant_config_init()
-        # routing_tables only needed for round-robin expert placement with
-        # DeepEP all2all backend.
-        routing_tables = self._maybe_init_expert_routing_tables()
         prepare_finalize = self.base_quant_method.maybe_make_prepare_finalize(
-            routing_tables=routing_tables
+            routing_tables=self._expert_routing_tables()
         )
         if prepare_finalize is not None:
             logger.debug(
@@ -633,14 +477,13 @@ def maybe_init_modular_kernel(self) -> None:
                     self,
                     self.base_quant_method,
                     prepare_finalize,
-                    self.shared_experts,
                     inplace=not self.moe_config.disable_inplace,
                 )
             )
 
     @property
-    def shared_experts(self) -> torch.nn.Module | None:
-        return self._shared_experts if self.use_overlapped else None
+    def shared_experts(self) -> SharedExperts | None:
+        return self.runner.shared_experts
 
     @property
     def layer_id(self):
@@ -649,10 +492,6 @@ def layer_id(self):
 
         return extract_layer_index(self.layer_name)
 
-    @property
-    def gate(self) -> torch.nn.Module | None:
-        return self._gate if self.use_overlapped else None
-
     @property
     def tp_size(self):
         return self.moe_parallel_config.tp_size
@@ -676,19 +515,28 @@ def use_ep(self):
     @property
     def is_internal_router(self) -> bool:
         # By default, router/gate is called before FusedMoE forward pass
-        return self.gate is not None
-
-    def _maybe_init_expert_routing_tables(
+        return self.runner.is_internal_router()
+
+    def update_expert_map_info(self):
+        # Update local attributes from ExpertMapManager
+        self.local_num_experts = self.expert_map_manager.local_num_experts
+        self.expert_placement_strategy = self.expert_map_manager.placement_strategy
+        self.register_buffer("_expert_map", self.expert_map_manager.expert_map)
+        self.register_buffer("expert_mask", self.expert_map_manager.expert_mask)
+
+        # Get routing tables from ExpertMapManager
+        routing_tables = self.expert_map_manager.routing_tables
+        if routing_tables is not None:
+            # Register routing tables as buffers for this layer
+            global_to_physical, physical_to_global, local_global = routing_tables
+            self.register_buffer("expert_global_to_physical", global_to_physical)
+            self.register_buffer("expert_physical_to_global", physical_to_global)
+            self.register_buffer("expert_local_to_global", local_global)
+
+    def _expert_routing_tables(
         self,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None:
-        # Currently routing_tables only needed for round-robin expert placement
-        # with DeepEP-ll or NIXL EP all2all backends.
-        if self.expert_placement_strategy != "round_robin" or (
-            not self.moe_parallel_config.use_deepep_ll_kernels
-            and not self.moe_parallel_config.use_nixl_ep_kernels
-        ):
-            return None
-
+        # Return cached routing tables if already registered as buffers
         if hasattr(self, "expert_global_to_physical"):
             return cast(
                 tuple[torch.Tensor, torch.Tensor, torch.Tensor],
@@ -698,85 +546,21 @@ def _maybe_init_expert_routing_tables(
                     self.expert_local_to_global,
                 ),
             )
+        return None
 
-        if self._expert_map is None:
-            return None
-
-        routing_tables = self.ensure_round_robin_expert_routing_tables(
+    def update_expert_map(self):
+        # Update ExpertMapManager with new EP configuration
+        # The moe_parallel_config (including ep_size and ep_rank)
+        # should already be updated.
+        # Note: ExpertMapManager.update() recalculates expert maps and
+        # reinitializes routing tables internally.
+        self.expert_map_manager.update(
+            self.moe_parallel_config,
             global_num_experts=self.global_num_experts,
-            ep_size=self.ep_size,
-            ep_rank=self.ep_rank,
-            local_num_experts=self.local_num_experts,
-            device=self._expert_map.device,
-        )
-
-        global_to_physical, physical_to_global, local_global = routing_tables
-        self.register_buffer("expert_global_to_physical", global_to_physical)
-        self.register_buffer("expert_physical_to_global", physical_to_global)
-        self.register_buffer("expert_local_to_global", local_global)
-
-        return routing_tables
-
-    @staticmethod
-    def ensure_round_robin_expert_routing_tables(
-        global_num_experts: int,
-        ep_size: int,
-        ep_rank: int,
-        local_num_experts: int,
-        device: torch.device | None = None,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        device_kwargs = {"device": device} if device is not None else {}
-        global_indices = torch.arange(
-            global_num_experts, dtype=torch.long, **device_kwargs
         )
-        owner = torch.remainder(global_indices, ep_size)
-        local_index = torch.div(global_indices, ep_size, rounding_mode="floor")
-        base = global_num_experts // ep_size
-        remainder = global_num_experts % ep_size
-        physical_offset = owner * base
-        if remainder > 0:
-            remainder_tensor = torch.tensor(
-                remainder, dtype=torch.long, **device_kwargs
-            )
-            physical_offset = physical_offset + torch.minimum(owner, remainder_tensor)
-
-        global_to_physical = physical_offset + local_index
-        physical_to_global = torch.empty_like(global_to_physical)
-        physical_to_global[global_to_physical] = global_indices
-
-        local_global = torch.arange(
-            ep_rank,
-            global_num_experts,
-            ep_size,
-            dtype=torch.long,
-            **device_kwargs,
-        )
-        if local_global.numel() != local_num_experts:
-            local_global = local_global[:local_num_experts]
 
-        return (global_to_physical, physical_to_global, local_global)
-
-    def update_expert_map(self):
-        # ep_size and ep_rank should already be updated
-        assert self._expert_map is not None
-        with self._expert_map.device:
-            local_num_experts, expert_map, expert_mask = determine_expert_map(
-                ep_size=self.ep_size,
-                ep_rank=self.ep_rank,
-                global_num_experts=self.global_num_experts,
-                expert_placement_strategy=self.expert_placement_strategy,
-                num_fused_shared_experts=self.num_fused_shared_experts,
-                return_expert_mask=self.rocm_aiter_fmoe_enabled,
-            )
-            self.local_num_experts = local_num_experts
-            self.register_buffer("_expert_map", expert_map)
-            self.register_buffer("expert_mask", expert_mask)
-            self._maybe_init_expert_routing_tables()
-            if self.aiter_fmoe_shared_expert_enabled:
-                self._init_aiter_shared_experts_topK_buffer(
-                    vllm_config=get_current_vllm_config(),
-                    dp_size=get_dp_group().world_size,
-                )
+        # Update local attributes from ExpertMapManager
+        self.update_expert_map_info()
 
     def _load_per_tensor_weight_scale(
         self,
@@ -860,6 +644,13 @@ def _load_per_channel_weight_scale(
     ):
         # for per channel weight quantization
         if shard_id == "w2":
+            hidden_dim = self._get_hidden_dim(shard_dim, expert_data.ndim)
+            expert_data = self._narrow_expert_data_for_padding(
+                expert_data,
+                loaded_weight,
+                hidden_dim=hidden_dim,
+                shard_dim=shard_dim,
+            )
             expert_data.copy_(loaded_weight)
         elif shard_id in ("w1", "w3"):
             self._load_w13(
@@ -870,6 +661,63 @@ def _load_per_channel_weight_scale(
                 tp_rank=tp_rank,
             )
 
+    @staticmethod
+    def _get_hidden_dim(shard_dim: int, ndim: int) -> int:
+        """Compute the hidden dimension index from the shard (intermediate)
+        dimension and tensor rank.
+
+        For 2D weight tensors the two data dims are (0, 1). For 3D tensors
+        with an expert dimension at dim 0, they are (1, 2). ``shard_dim``
+        occupies one of these; the hidden dimension is the other.
+        For 1D tensors (e.g. per-channel scales) returns 0.
+        """
+        if ndim < 2:
+            return 0
+        dim_a = ndim - 2
+        dim_b = ndim - 1
+        if shard_dim == dim_a:
+            return dim_b
+        if shard_dim == dim_b:
+            return dim_a
+        raise ValueError(
+            f"shard_dim={shard_dim} is not a valid data dimension "
+            f"for a {ndim}D tensor (expected {dim_a} or {dim_b})"
+        )
+
+    @staticmethod
+    def _narrow_expert_data_for_padding(
+        expert_data: torch.Tensor,
+        loaded_weight: torch.Tensor,
+        hidden_dim: int,
+        shard_dim: int | None = None,
+    ) -> torch.Tensor:
+        """Narrow expert_data to match loaded_weight for padded dimensions.
+
+        When backends (e.g., DeepEP) round up hidden_size, weight parameters
+        are larger than checkpoint weights. Narrow the padded hidden dimension
+        before copying. Similarly, when padding occurs on the shard
+        (intermediate) dimension (e.g. for MXFP4 GEMM), narrow that dimension
+        as well.
+
+        Args:
+            expert_data: The (possibly padded) parameter tensor to narrow.
+            loaded_weight: The checkpoint weight tensor with original size.
+            hidden_dim: The dimension index corresponding to hidden_size.
+                Must be non-negative.
+            shard_dim: The dimension index corresponding to the shard
+                (intermediate) dimension. Defaults to `None`.
+        """
+        dims = (hidden_dim,) if shard_dim is None else (hidden_dim, shard_dim)
+        if loaded_weight.ndim > 0:
+            for dim in dims:
+                if (
+                    0 <= dim < expert_data.ndim
+                    and dim < loaded_weight.ndim
+                    and expert_data.shape[dim] > loaded_weight.shape[dim]
+                ):
+                    expert_data = expert_data.narrow(dim, 0, loaded_weight.shape[dim])
+        return expert_data
+
     def _load_w13(
         self,
         expert_data: torch.Tensor,
@@ -907,13 +755,13 @@ def _load_w13(
         else:
             assert shard_id == "w3"
             expert_data = expert_data.narrow(shard_dim, shard_size, shard_size)
-
-        # Handle padding: if loaded_weight is smaller than expert_data (can happen
-        # on last TP shard with padding), copy to top-left corner
-        if expert_data.shape != loaded_weight.shape:
-            expert_data = expert_data[
-                : loaded_weight.shape[0], : loaded_weight.shape[1]
-            ]
+        hidden_dim = self._get_hidden_dim(shard_dim, expert_data.ndim)
+        expert_data = self._narrow_expert_data_for_padding(
+            expert_data,
+            loaded_weight,
+            hidden_dim=hidden_dim,
+            shard_dim=shard_dim,
+        )
         expert_data.copy_(loaded_weight)
 
     def _load_w2(
@@ -943,12 +791,13 @@ def _load_w2(
             narrow_size = min(shard_size, available)
             loaded_weight = loaded_weight.narrow(shard_dim, start_offset, narrow_size)
         # w2, down_proj: Load into only logical weight of w2.
-        # Handle padding: if loaded_weight is smaller than expert_data (can happen
-        # on last TP shard with padding), copy to top-left corner
-        if expert_data.shape != loaded_weight.shape:
-            expert_data = expert_data[
-                : loaded_weight.shape[0], : loaded_weight.shape[1]
-            ]
+        hidden_dim = self._get_hidden_dim(shard_dim, expert_data.ndim)
+        expert_data = self._narrow_expert_data_for_padding(
+            expert_data,
+            loaded_weight,
+            hidden_dim=hidden_dim,
+            shard_dim=shard_dim,
+        )
         expert_data.copy_(loaded_weight)
 
     def _load_single_value(
@@ -979,26 +828,7 @@ def _load_g_idx(
             expert_data.copy_(loaded_weight)
 
     def _map_global_expert_id_to_local_expert_id(self, expert_id: int) -> int:
-        if self._expert_map is None:
-            return expert_id
-        return self._expert_map[expert_id].item()
-
-    def _init_aiter_shared_experts_topK_buffer(
-        self, vllm_config: VllmConfig, dp_size: int
-    ):
-        if self.num_fused_shared_experts > 0:
-            init_aiter_topK_meta_data(
-                n_routed_experts=self.global_num_experts,
-                n_shared_experts=self.num_fused_shared_experts,
-                top_k=self.top_k,
-                tp_rank=self.ep_rank if self.use_ep else self.tp_rank,
-                tp_size=self.ep_size if self.use_ep else self.tp_size,
-                shared_experts_score=1.0,
-                max_num_tokens=vllm_config.scheduler_config.max_num_batched_tokens
-                * dp_size,
-                is_EP=self.use_ep,
-            )
-        self.local_num_experts += self.num_fused_shared_experts
+        return self.expert_map_manager.map_global_to_local(expert_id)
 
     @overload
     def weight_loader(
@@ -1031,7 +861,8 @@ def weight_loader(
         expert_id: int,
         return_success: bool = False,
     ) -> bool | None:
-        if self.quant_config and self.quant_config.get_name() == "mxfp4":
+        quant_config_name = self.quant_config and self.quant_config.get_name()
+        if quant_config_name == "gpt_oss_mxfp4":
             # (FIXME) for gpt-oss all experts are combined
             if "bias" in weight_name:
                 dim1 = loaded_weight.shape[1]
@@ -1095,9 +926,25 @@ def weight_loader(
 
             expert_data = param.data[expert_id]
             if shard_id == "w2":
+                # BnB params are stored as flat packed tensors (e.g.
+                # (packed_size, 1)), not in the logical weight layout.
+                # Narrowing packed data for hidden-dim padding is not
+                # meaningful, so require an exact shape match.
+                if expert_data.shape != loaded_weight.shape:
+                    raise ValueError(
+                        "BitsAndBytes quantization with padded hidden_size "
+                        "(e.g., from DeepEP) is not supported. "
+                        f"Parameter shape {tuple(expert_data.shape)} != "
+                        f"checkpoint shape {tuple(loaded_weight.shape)}"
+                    )
                 expert_data.copy_(loaded_weight)
             elif shard_id in ("w1", "w3"):
-                # BNB inflight quantization has already sharded the weights
+                # BnB stores weights as flat packed tensors.  _load_w13 is
+                # still used to split the w1/w3 portions along shard_dim.
+                # _narrow_expert_data_for_padding will be a no-op since
+                # packed sizes should already match; if DeepEP padding
+                # causes a mismatch the copy_() will fail with a clear
+                # shape error.
                 full_load = True
                 self._load_w13(
                     shard_id=shard_id,
@@ -1396,10 +1243,19 @@ def _maybe_make_contiguous(
             "w2_input_scale",
         }
 
+        # Parameters of non-expert submodules that live inside runner (MoERunner).
+        # These must be excluded from EPLB weight rearrangement.
+        NON_EXPERT_PREFIXES = (
+            "runner._shared_experts.",
+            "runner.gate.",
+            "runner.routed_input_transform.",
+            "runner.routed_output_transform.",
+        )
+
         assert all(
             weight.is_contiguous()
             for name, weight in weights
-            if not (name.startswith("_shared_experts.") or name.startswith("_gate."))
+            if not name.startswith(NON_EXPERT_PREFIXES)
             and name not in NON_EXPERT_WEIGHTS
         )
 
@@ -1408,9 +1264,7 @@ def _maybe_make_contiguous(
             for name, weight in weights
             if name not in NON_EXPERT_WEIGHTS
             and weight.shape != torch.Size([])
-            and not name.startswith("_shared_experts.")
-            # exclude parameters from non-expert submodules (e.g. gate/shared)
-            and not name.startswith("_gate.")
+            and not name.startswith(NON_EXPERT_PREFIXES)
         ]
 
     def set_eplb_state(
@@ -1425,10 +1279,20 @@ def set_eplb_state(
 
         This is used later in forward pass, where we get the expert mapping
         and record the load metrics in `expert_load_view`.
+
+        Args:
+            moe_layer_idx: Index of this MoE layer
+            expert_load_view: View into global expert load tracking tensor
+            logical_to_physical_map: Mapping from logical to physical expert IDs
+            logical_replica_count: Number of replicas for each logical expert
         """
-        self.eplb_state.expert_load_view = expert_load_view[moe_layer_idx]
-        self.eplb_state.logical_to_physical_map = logical_to_physical_map[moe_layer_idx]
-        self.eplb_state.logical_replica_count = logical_replica_count[moe_layer_idx]
+        if self.eplb_state is not None:
+            self.eplb_state.set_layer_state(
+                moe_layer_idx,
+                expert_load_view,
+                logical_to_physical_map,
+                logical_replica_count,
+            )
 
     def ensure_moe_quant_config_init(self):
         if self.quant_method.moe_quant_config is None:
@@ -1443,35 +1307,16 @@ def moe_quant_config(self) -> FusedMoEQuantConfig | None:
         self.ensure_moe_quant_config_init()
         return self.quant_method.moe_quant_config
 
-    def must_reduce_shared_expert_outputs(self) -> bool:
-        """
-        The shared_experts are typically computed using the RowParallelLinear
-        layer. The result of this function is typically used as
-        the reduce_results argument to the module.
-        When just tensor-parallel is used, it is not required to reduce
-        the shared_experts results immediately. Instead we reduce at the
-        once at the end of the MoE op. (Refer to DeepSeekV2MoE module)
-        With EP and all2all kernels - this is no longer viable as all
-        GPU ranks in DP, produce the complete set of hidden_states.
-        Therefore it is required that we reduce the shared_experts output
-        early.
-        """
-        return self.runner.must_reduce_shared_expert_outputs()
-
-    def maybe_all_reduce_tensor_model_parallel(self, final_hidden_states: torch.Tensor):
-        """
-        Some combine kernels reduce across GPU ranks by default.
-        """
-        return self.runner.maybe_all_reduce_tensor_model_parallel(final_hidden_states)
-
-    def forward_native(
+    def forward(
         self,
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         return self.runner.forward(
             hidden_states,
             router_logits,
+            input_ids,
         )
 
     @property
@@ -1480,13 +1325,6 @@ def expert_map(self) -> torch.Tensor | None:
             self._expert_map if not self.rocm_aiter_fmoe_enabled else self.expert_mask
         )
 
-    def forward_cuda(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        return self.forward_native(hidden_states, router_logits)
-
     @classmethod
     def make_expert_params_mapping(
         cls,
@@ -1549,12 +1387,30 @@ def extra_repr(self) -> str:
             f"intermediate_size_per_partition={self.intermediate_size_per_partition}, "  # noqa: E501
             f"tp_size={self.tp_size},\n"
             f"ep_size={self.ep_size}, "
-            f"reduce_results={self.reduce_results}, "
         )
 
         return s
 
 
+# This is a temporary forwarding method which will be removed/modified layer.
+def fused_moe_make_expert_params_mapping(
+    model: torch.nn.Module,
+    ckpt_gate_proj_name: str,
+    ckpt_down_proj_name: str,
+    ckpt_up_proj_name: str,
+    num_experts: int,
+    num_redundant_experts: int = 0,
+) -> list[tuple[str, str, int, str]]:
+    return FusedMoE.make_expert_params_mapping(
+        model,
+        ckpt_gate_proj_name,
+        ckpt_down_proj_name,
+        ckpt_up_proj_name,
+        num_experts,
+        num_redundant_experts,
+    )
+
+
 # Mark the FusedMoE weight_loader as supporting MoE-specific parameters
 # to avoid expensive runtime reflection in model loading code
 FusedMoE.weight_loader.supports_moe_loading = True  # type: ignore[attr-defined]
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
index ca3536bd9285..6d9e63278abd 100644
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -9,6 +9,7 @@
 
 import torch
 
+import vllm.envs as envs
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.activation import (
     MoEActivation,
@@ -20,6 +21,10 @@
     FusedMoEQuantConfig,
     RoutingMethodType,
 )
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
+    SharedExpertsOrder,
+)
 from vllm.model_executor.layers.fused_moe.utils import (
     _resize_cache,
     disable_inplace,
@@ -234,6 +239,20 @@ def output_is_reduced(self) -> bool:
         """
         raise NotImplementedError
 
+    def supports_async(self) -> bool:
+        """
+        Indicates whether or not this class implements prepare_async and
+        finalize_async.
+        """
+        return False
+
+    def on_commit(self) -> None:
+        """
+        Runs after this prepare/finalize has been committed to the active
+        MoE kernel.
+        """
+        return
+
 
 # TODO: pass FusedMoEParallelConfig in as ctor parameter?
 class FusedMoEPrepareAndFinalizeModular(FusedMoEPrepareAndFinalize):
@@ -280,13 +299,6 @@ def prepare(
         """
         raise NotImplementedError
 
-    def supports_async(self) -> bool:
-        """
-        Indicates whether or not this class implements prepare_async and
-        finalize_async.
-        """
-        return False
-
     def prepare_async(
         self,
         a1: torch.Tensor,
@@ -563,6 +575,10 @@ def _make_reason(reason: str) -> str:
             )
         elif activation_format != cls.activation_format():
             return False, _make_reason(f"{activation_format.value} activation format")
+        elif envs.VLLM_BATCH_INVARIANT and not cls._supports_batch_invariance():
+            return False, _make_reason("batch invariance")
+        elif moe_config.is_lora_enabled and not cls.supports_lora():
+            return False, _make_reason("LoRA")
         return True, None
 
     @staticmethod
@@ -645,6 +661,15 @@ def _supports_shape(hidden_dim: int) -> bool:
         """
         return True
 
+    @staticmethod
+    def _supports_batch_invariance() -> bool:
+        """
+        Whether the kernel supports batch invariance, i.e. the output does not
+        depend on the order of the tokens in the input batch. This is useful
+        for determining if the kernel can used with VLLM_BATCH_INVARIANT=1.
+        """
+        return False
+
     #
     # Various helpers for accessing quantization parameters from the
     # quant_config.
@@ -718,6 +743,15 @@ def g1_alphas(self) -> torch.Tensor | None:
     def g2_alphas(self) -> torch.Tensor | None:
         return self.quant_config.g2_alphas
 
+    @staticmethod
+    def supports_lora() -> bool:
+        """Return True if this expert impl natively handles LoRA.
+
+        LoRA-aware experts should mix in LoRAExpertsMixin, which flips this
+        to True and provides the per-forward LoRA state plumbing.
+        """
+        return False
+
     @abstractmethod
     def supports_expert_map(self) -> bool:
         """
@@ -991,15 +1025,13 @@ def __init__(
         self,
         prepare_finalize: FusedMoEPrepareAndFinalizeModular,
         fused_experts: FusedMoEExpertsModular,
-        shared_experts: torch.nn.Module | None = None,
-        moe_parallel_config: FusedMoEParallelConfig | None = None,
         inplace: bool = False,
     ):
         self.prepare_finalize = prepare_finalize
         self.fused_experts = fused_experts
-        self.shared_experts = shared_experts
-        self.moe_parallel_config = moe_parallel_config
         self.inplace = inplace
+        moe_parallel_config = fused_experts.moe_config.moe_parallel_config
+        self.moe_parallel_config = moe_parallel_config
         self.is_dp_ep = (
             moe_parallel_config is not None
             and moe_parallel_config.dp_size > 1
@@ -1069,6 +1101,18 @@ def _allocate_buffers(
 
         return workspace13, workspace2, fused_out
 
+    def _maybe_apply_shared_experts(
+        self,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ):
+        if shared_experts is not None:
+            assert shared_experts_input is not None
+            shared_experts.apply(
+                shared_experts_input,
+                SharedExpertsOrder.MK_INTERNAL_OVERLAPPED,
+            )
+
     def _prepare(
         self,
         hidden_states: torch.Tensor,
@@ -1172,6 +1216,7 @@ def _fused_experts(
         expert_map: torch.Tensor | None,
         apply_router_weight_on_input: bool,
         expert_tokens_meta: ExpertTokensMetadata | None,
+        output_alias: torch.Tensor | None = None,
     ) -> torch.Tensor:
         _, M_full, N, K, top_k = self.fused_experts.moe_problem_size(
             a1q, w1, w2, topk_ids
@@ -1200,6 +1245,23 @@ def _fused_experts(
             activation,
         )
 
+        # If caller's output buffer already matches fused_out shape/dtype, alias
+        # to skip the redundant copy in TopKWeightAndReduceNoOP.apply downstream.
+        # This eliminates ~94% of __amd_rocclr_copyBuffer events (Copy 2 of the
+        # double-copy MoE write-back path).
+        if current_platform.is_rocm():
+            from vllm._aiter_ops import rocm_aiter_ops
+
+            if (
+                rocm_aiter_ops.is_fused_moe_enabled()
+                and output_alias is not None
+                and output_alias.shape == fused_out.shape
+                and output_alias.dtype == fused_out.dtype
+                and output_alias.device == fused_out.device
+                and output_alias.is_contiguous()
+            ):
+                fused_out = output_alias
+
         self.fused_experts.apply(
             output=fused_out,
             hidden_states=a1q,
@@ -1228,28 +1290,21 @@ def _finalize(
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
         apply_router_weight_on_input: bool,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         """
         The _finalize method is a wrapper around self.prepare_finalize.finalize
         that handles DBO, async and shared expert overlap.
 
         Args:
+            shared_experts: SharedExperts | None. The shared experts if any.
             shared_experts_input: Optional separate input for shared experts.
                 When latent MoE is used, hidden_states is the latent-projected
                 tensor (smaller dimension) used by routed experts, while
                 shared_experts_input is the original hidden_states (full
                 dimension) needed by the shared expert MLP.
         """
-        shared_output: torch.Tensor | None = None
-
-        # For latent MoE: shared experts need the original hidden_states
-        # (full hidden_size), not the latent-projected version used by
-        # routed experts.
-        se_hidden_states = (
-            shared_experts_input if shared_experts_input is not None else hidden_states
-        )
-
         if not self.prepare_finalize.supports_async():
             assert not dbo_enabled()
 
@@ -1261,8 +1316,6 @@ def _finalize(
                 apply_router_weight_on_input,
                 self.fused_experts.finalize_weight_and_reduce_impl(),
             )
-            if self.shared_experts is not None:
-                shared_output = self.shared_experts(se_hidden_states)
         else:
             finalize_ret = self.prepare_finalize.finalize_async(
                 output,
@@ -1272,8 +1325,7 @@ def _finalize(
                 apply_router_weight_on_input,
                 self.fused_experts.finalize_weight_and_reduce_impl(),
             )
-            if self.shared_experts is not None:
-                shared_output = self.shared_experts(se_hidden_states)
+            self._maybe_apply_shared_experts(shared_experts, shared_experts_input)
 
             # TODO(lucas): refactor this in the alternative schedules followup
             # currently unpack if we have hook + receiver pair or just
@@ -1296,11 +1348,7 @@ def _finalize(
 
             receiver()
 
-        if self.shared_experts is None:
-            return output
-        else:
-            assert shared_output is not None
-            return shared_output, output
+        return output
 
     def apply(
         self,
@@ -1313,8 +1361,9 @@ def apply(
         global_num_experts: int = -1,
         expert_map: torch.Tensor | None = None,
         apply_router_weight_on_input: bool = False,
+        shared_experts: SharedExperts | None = None,
         shared_experts_input: torch.Tensor | None = None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         """
         This function computes a Mixture of Experts (MoE) layer using two sets
         of weights, w1 and w2, and top-k gating mechanism.
@@ -1335,6 +1384,7 @@ def apply(
         - apply_router_weight_on_input (bool): When true, the topk weights are
           applied directly on the inputs. This is only applicable when topk is
           1.
+        - shared_experts: SharedExperts | None. The shared experts if any.
         - shared_experts_input (Optional[torch.Tensor]): Optional separate
           input for shared experts. For latent MoE, this is the original
           hidden_states before latent projection.
@@ -1343,7 +1393,7 @@ def apply(
         - torch.Tensor: The output tensor after applying the MoE layer.
         """
         if self.inplace:
-            assert self.shared_experts is None
+            assert shared_experts is None
             assert not disable_inplace()
             output = hidden_states
         else:
@@ -1376,6 +1426,7 @@ def apply(
             expert_map=expert_map,
             apply_router_weight_on_input=apply_router_weight_on_input,
             expert_tokens_meta=expert_tokens_meta,
+            output_alias=output,
         )
 
         return self._finalize(
@@ -1385,6 +1436,7 @@ def apply(
             topk_weights,
             topk_ids,
             apply_router_weight_on_input,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
@@ -1457,12 +1509,9 @@ def __init__(
         self,
         prepare_finalize: FusedMoEPrepareAndFinalize,
         fused_experts: FusedMoEExperts,
-        shared_experts: torch.nn.Module | None = None,
-        moe_parallel_config: FusedMoEParallelConfig | None = None,
         inplace: bool = False,
     ):
         super().__init__()
-        self.shared_experts = shared_experts  # NOTE: check if we can remove
 
         # Initialize the implementation (monolithic or modular).
         self.impl: FusedMoEKernelModularImpl | FusedMoEKernelMonolithicImpl
@@ -1472,15 +1521,12 @@ def __init__(
             self.impl = FusedMoEKernelModularImpl(
                 prepare_finalize,
                 fused_experts,
-                shared_experts,
-                moe_parallel_config,
                 inplace,
             )
 
         elif isinstance(
             prepare_finalize, FusedMoEPrepareAndFinalizeMonolithic
         ) and isinstance(fused_experts, FusedMoEExpertsMonolithic):
-            assert shared_experts is None
             assert not inplace
             self.impl = FusedMoEKernelMonolithicImpl(
                 prepare_finalize,
@@ -1496,6 +1542,19 @@ def __init__(
 
         self._post_init_setup()
 
+    @property
+    def can_overlap_shared_experts(self) -> bool:
+        if isinstance(self.impl, FusedMoEKernelModularImpl):
+            return self.impl.prepare_finalize.supports_async()
+        else:
+            return False
+
+    @property
+    def inplace(self) -> bool:
+        if isinstance(self.impl, FusedMoEKernelModularImpl):
+            return self.impl.inplace
+        return False
+
     @property
     def is_monolithic(self) -> bool:
         return isinstance(self.impl, FusedMoEKernelMonolithicImpl)
@@ -1508,6 +1567,13 @@ def prepare_finalize(self) -> FusedMoEPrepareAndFinalize:
     def fused_experts(self) -> FusedMoEExperts:
         return self.impl.fused_experts
 
+    @property
+    def moe_config(self) -> FusedMoEConfig:
+        return self.fused_experts.moe_config
+
+    def supports_lora(self) -> bool:
+        return self.fused_experts.supports_lora()
+
     def _post_init_setup(self):
         """
         Resolve any leftover setup dependencies between self.prepare_finalize
@@ -1575,6 +1641,7 @@ def apply(
         global_num_experts: int,
         expert_map: torch.Tensor | None,
         apply_router_weight_on_input: bool,
+        shared_experts: SharedExperts | None = None,
         shared_experts_input: torch.Tensor | None = None,
     ) -> torch.Tensor:
         assert isinstance(self.impl, FusedMoEKernelModularImpl)
@@ -1588,5 +1655,6 @@ def apply(
             global_num_experts=global_num_experts,
             expert_map=expert_map,
             apply_router_weight_on_input=apply_router_weight_on_input,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
diff --git a/vllm/model_executor/layers/fused_moe/moe_fused_mul_sum.py b/vllm/model_executor/layers/fused_moe/moe_fused_mul_sum.py
new file mode 100644
index 000000000000..768f41db854e
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/moe_fused_mul_sum.py
@@ -0,0 +1,202 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from torch._subclasses.fake_tensor import FakeTensor
+
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+
+
+@triton.jit
+def moe_fused_mul_sum_kernel(
+    inputs_ptr,
+    topk_weights_ptr,
+    outputs_ptr,
+    top_ids_ptr,
+    expert_map_ptr,
+    num_tokens,
+    stride_m,
+    has_expert_map: tl.constexpr,
+    top_k: tl.constexpr,
+    size: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+):
+    pid_k = tl.program_id(0)
+    pid_m = tl.program_id(1)
+
+    offs_m = pid_m * BLOCK_M + tl.arange(0, BLOCK_M)
+    offs_k = pid_k * BLOCK_K + tl.arange(0, BLOCK_K)
+
+    m_mask = offs_m < num_tokens
+    k_mask = offs_k < size
+    mask = m_mask[:, None] & k_mask[None, :]
+
+    a_base = inputs_ptr + (offs_m * stride_m)[:, None] + offs_k[None, :]
+    b_base = topk_weights_ptr + offs_m * top_k
+
+    acc = tl.zeros((BLOCK_M, BLOCK_K), dtype=tl.float32)
+
+    for n in tl.static_range(top_k):
+        b_val = tl.load(b_base + n, mask=m_mask, other=0.0).to(tl.float32)
+        if has_expert_map:
+            id_val = tl.load(top_ids_ptr + offs_m * top_k + n, mask=m_mask, other=0)
+            expert_mask = tl.load(expert_map_ptr + id_val) >= 0
+            a_vec = tl.load(
+                a_base + n * size,
+                mask=mask & expert_mask[:, None],
+                other=0.0,
+            ).to(tl.float32)
+        else:
+            a_vec = tl.load(
+                a_base + n * size,
+                mask=mask,
+                other=0.0,
+            ).to(tl.float32)
+        acc += a_vec * b_val[:, None]
+
+    out_ptrs = outputs_ptr + (offs_m * size)[:, None] + offs_k[None, :]
+    tl.store(
+        out_ptrs,
+        acc.to(outputs_ptr.dtype.element_ty),
+        mask=mask,
+    )
+
+
+def _heuristic_config(
+    num_tokens: int,
+    top_k: int,
+    size: int,
+    element_size: int,
+):
+    is_fp32 = element_size > 2
+    is_sm90_plus = current_platform.has_device_capability(90)
+    is_sm80_before = not current_platform.has_device_capability(80)
+
+    if current_platform.has_device_capability(90):
+        # SM90/SM100+: prefer small tiles + many CTAs.
+        if is_fp32:
+            BLOCK_M = 1 if num_tokens <= 4 else 2
+        else:
+            if num_tokens <= 4:
+                BLOCK_M = 1
+            elif num_tokens <= 128:
+                BLOCK_M = 2
+            else:
+                BLOCK_M = 4
+    elif is_fp32:
+        if num_tokens <= 4:
+            BLOCK_M = 1
+        elif num_tokens <= 32:
+            BLOCK_M = 2
+        elif num_tokens <= 128:
+            BLOCK_M = 4
+        else:
+            BLOCK_M = 4
+    else:
+        if num_tokens <= 4:
+            BLOCK_M = 1
+        elif num_tokens <= 32:
+            BLOCK_M = 2
+        elif num_tokens <= 128:
+            BLOCK_M = 4
+        elif num_tokens <= 1024:
+            BLOCK_M = 16
+        else:
+            BLOCK_M = 8
+
+    if is_fp32:
+        max_block_k = 256
+    elif is_sm80_before or is_sm90_plus:
+        max_block_k = 512
+    else:
+        max_block_k = 1024
+    BLOCK_K = min(triton.next_power_of_2(size), max_block_k)
+    BLOCK_K = max(BLOCK_K, 256)
+
+    total = BLOCK_M * BLOCK_K
+    if is_fp32:
+        num_warps = max(8, min(16, total // 64))
+    else:
+        num_warps = max(4, min(16, total // 256))
+
+    if is_sm80_before:
+        num_warps = min(num_warps, 8)
+        num_stages = 2
+    elif is_sm90_plus:
+        num_warps = min(num_warps, 8)
+        num_stages = 4 if total <= 2048 else 2
+    else:
+        num_stages = 4 if total <= 2048 else 2
+
+    return BLOCK_M, BLOCK_K, num_warps, num_stages
+
+
+def moe_fused_mul_sum(
+    inputs: torch.Tensor,
+    topk_weights: torch.Tensor,
+    outputs: torch.Tensor | None = None,
+    topk_ids: torch.Tensor | None = None,
+    expert_map: torch.Tensor | None = None,
+) -> torch.Tensor:
+    """
+    Fused kernel for MoE (Mixture of Experts) to perform weighted summation
+    of expert outputs.
+
+    Args:
+        inputs: The output from experts.
+            Shape: (num_tokens, top_k, hidden_size).
+        topk_weights: The weights assigned to each expert for each token.
+            Shape: (num_tokens, top_k).
+        outputs: Optional pre-allocated output tensor.
+            Shape: (num_tokens, hidden_size).
+        topk_ids: Optional indices of the top-k experts. Used when
+            `expert_map` is provided. Shape: (num_tokens, top_k).
+        expert_map: Optional mapping for Expert Parallelism. A value < 0
+            indicates an invalid token/expert pair that will be skipped.
+
+    Returns:
+        The fused weighted sum of expert outputs.
+        Shape: (num_tokens, hidden_size).
+    """
+    assert inputs.ndim == 3
+    assert topk_weights.ndim == 2
+    assert inputs.is_contiguous()
+    assert topk_weights.is_contiguous()
+    assert inputs.dtype in (torch.float32, torch.float16, torch.bfloat16)
+    assert topk_weights.dtype in (torch.float32, torch.float16, torch.bfloat16)
+
+    num_tokens, top_k, size = inputs.shape
+    output_shape = (num_tokens, size)
+    if outputs is None:
+        outputs = torch.empty(output_shape, dtype=inputs.dtype, device=inputs.device)
+
+    assert outputs.shape == output_shape
+    assert topk_weights.shape == (num_tokens, top_k)
+
+    if not isinstance(inputs, FakeTensor):
+        BLOCK_M, BLOCK_K, num_warps, num_stages = _heuristic_config(
+            num_tokens,
+            top_k,
+            size,
+            inputs.element_size(),
+        )
+        grid = (triton.cdiv(size, BLOCK_K), triton.cdiv(num_tokens, BLOCK_M))
+        moe_fused_mul_sum_kernel[grid](
+            inputs,
+            topk_weights,
+            outputs,
+            topk_ids,
+            expert_map,
+            num_tokens,
+            top_k * size,
+            expert_map is not None,
+            top_k,
+            size,
+            BLOCK_M,
+            BLOCK_K,
+            num_warps=num_warps,
+            num_stages=num_stages,
+        )
+
+    return outputs
diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
index a63c02663886..a64c99bdb9f5 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -52,6 +52,7 @@ class Fp8MoeBackend(Enum):
     VLLM_CUTLASS = "VLLM_CUTLASS"
     BATCHED_VLLM_CUTLASS = "BATCHED_VLLM_CUTLASS"
     XPU = "XPU"
+    CPU = "CPU"
 
 
 def _get_priority_backends(
@@ -77,6 +78,7 @@ def _get_priority_backends(
         Fp8MoeBackend.BATCHED_VLLM_CUTLASS,
         Fp8MoeBackend.BATCHED_TRITON,
         Fp8MoeBackend.XPU,
+        Fp8MoeBackend.CPU,
     ]
 
     def _move_to_front(backends: list[Fp8MoeBackend], backend: Fp8MoeBackend) -> None:
@@ -99,6 +101,10 @@ def _move_to_front(backends: list[Fp8MoeBackend], backend: Fp8MoeBackend) -> Non
         # move XPU backend to the front.
         _move_to_front(_AVAILABLE_BACKENDS, Fp8MoeBackend.XPU)
 
+    if current_platform.is_cpu():
+        # CPU platform uses FP8 W8A16 fused MoE kernel.
+        _move_to_front(_AVAILABLE_BACKENDS, Fp8MoeBackend.CPU)
+
     return _AVAILABLE_BACKENDS
 
 
@@ -114,74 +120,82 @@ def backend_to_kernel_cls(
         return [TrtLlmFp8ExpertsMonolithic, TrtLlmFp8ExpertsModular]
 
     elif backend == Fp8MoeBackend.FLASHINFER_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (  # noqa: E501
             FlashInferExperts,
         )
 
         return [FlashInferExperts]
 
     elif backend == Fp8MoeBackend.DEEPGEMM:
-        from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
             TritonOrDeepGemmExperts,
         )
 
         return [TritonOrDeepGemmExperts]
 
     elif backend == Fp8MoeBackend.BATCHED_DEEPGEMM:
-        from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
             BatchedDeepGemmExperts,
         )
 
         return [BatchedDeepGemmExperts]
 
     elif backend == Fp8MoeBackend.MARLIN:
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
             MarlinExperts,
         )
 
         return [MarlinExperts]
 
     elif backend == Fp8MoeBackend.TRITON:
-        from vllm.model_executor.layers.fused_moe.fused_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.triton_moe import (
             TritonExperts,
         )
 
         return [TritonExperts]
 
     elif backend == Fp8MoeBackend.BATCHED_TRITON:
-        from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
             BatchedTritonExperts,
         )
 
         return [BatchedTritonExperts]
 
     elif backend == Fp8MoeBackend.AITER:
-        from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
             AiterExperts,
         )
 
         return [AiterExperts]
 
     elif backend == Fp8MoeBackend.VLLM_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.triton_cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.triton_cutlass_moe import (
             TritonOrCutlassExperts,
         )
 
         return [TritonOrCutlassExperts]
 
     elif backend == Fp8MoeBackend.BATCHED_VLLM_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
             CutlassBatchedExpertsFp8,
         )
 
         return [CutlassBatchedExpertsFp8]
 
     elif backend == Fp8MoeBackend.XPU:
-        from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
             XPUExpertsFp8,
+            XPUExpertsMxfp8,
+        )
+
+        return [XPUExpertsFp8, XPUExpertsMxfp8]
+
+    elif backend == Fp8MoeBackend.CPU:
+        from vllm.model_executor.layers.fused_moe.experts.cpu_moe import (
+            CPUExpertsFp8,
         )
 
-        return [XPUExpertsFp8]
+        return [CPUExpertsFp8]
 
     else:
         raise ValueError(f"Unknown FP8 MoE backend: {backend.value}")
@@ -217,9 +231,6 @@ def select_fp8_moe_backend(
     Note: Shape-specific fallbacks may still occur at runtime.
     """
 
-    if config.is_lora_enabled:
-        return Fp8MoeBackend.TRITON, backend_to_kernel_cls(Fp8MoeBackend.TRITON)[0]
-
     # NOTE: the kernels are selected in the following order.
     AVAILABLE_BACKENDS = _get_priority_backends(config, weight_key, activation_key)
 
@@ -263,7 +274,7 @@ def _return_or_raise(
                 k_cls, config, weight_key, activation_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
         raise ValueError(_make_log_unsupported(backend, reason))
 
@@ -334,12 +345,10 @@ def _return_or_raise(
                     )
 
                     if supported:
-                        logger.info_once(_make_log_backend(backend), scope="local")
+                        logger.info_once(_make_log_backend(backend))
                         return backend, k_cls
                     else:
-                        logger.debug_once(
-                            _make_log_unsupported(backend, reason), scope="local"
-                        )
+                        logger.debug_once(_make_log_unsupported(backend, reason))
 
             raise NotImplementedError(
                 "Found VLLM_USE_FLASHINFER_MOE_FP8=1, but no "
@@ -393,10 +402,10 @@ def _return_or_raise(
                 activation_format,
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     # TODO(rob): per discussion with TPU team, we need a way to register
     # MoE backends by OOT plugins, rather than having an explicit list
@@ -433,13 +442,27 @@ def convert_to_fp8_moe_kernel_format(
     elif fp8_backend == Fp8MoeBackend.AITER:
         w13, w2 = rocm_aiter_ops.shuffle_weights(w13, w2)
     elif fp8_backend == Fp8MoeBackend.MARLIN:
-        w13, w2, w13_scale, w2_scale = prepare_fp8_moe_layer_for_marlin(
-            layer,
-            w13,
-            w2,
-            w13_scale,
-            w2_scale,
-        )
+        weight_block_size = getattr(layer, "weight_block_size", None)
+        if weight_block_size == [1, 32]:
+            from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
+                prepare_mxfp8_moe_layer_for_marlin,
+            )
+
+            w13, w2, w13_scale, w2_scale = prepare_mxfp8_moe_layer_for_marlin(
+                layer,
+                w13,
+                w2,
+                w13_scale,
+                w2_scale,
+            )
+        else:
+            w13, w2, w13_scale, w2_scale = prepare_fp8_moe_layer_for_marlin(
+                layer,
+                w13,
+                w2,
+                w13_scale,
+                w2_scale,
+            )
     elif fp8_backend in [
         Fp8MoeBackend.FLASHINFER_CUTLASS,
         Fp8MoeBackend.FLASHINFER_TRTLLM,
@@ -454,6 +477,18 @@ def convert_to_fp8_moe_kernel_format(
             w2_input_scale=w2_input_scale,
             is_trtllm=(fp8_backend == Fp8MoeBackend.FLASHINFER_TRTLLM),
         )
+    elif fp8_backend == Fp8MoeBackend.XPU:
+        from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
+            prepare_fp8_moe_layer_for_xpu,
+        )
+
+        w13, w2 = prepare_fp8_moe_layer_for_xpu(w13, w2)
+    elif fp8_backend == Fp8MoeBackend.CPU:
+        from vllm.model_executor.layers.fused_moe.experts.cpu_moe import (
+            prepare_fp8_moe_layer_for_cpu,
+        )
+
+        w13, w2 = prepare_fp8_moe_layer_for_cpu(w13, w2)
     else:
         if fp8_backend not in [
             Fp8MoeBackend.TRITON,
@@ -476,6 +511,7 @@ def make_fp8_moe_quant_config(
     block_shape: list[int] | None = None,
     per_act_token_quant: bool = False,
     per_out_ch_quant: bool = False,
+    swiglu_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     """
     Create FusedMoEQuantConfig for the specified FP8 Backend.
@@ -498,6 +534,14 @@ def make_fp8_moe_quant_config(
             block_shape=block_shape,
         )
 
+    # CPU is mixed precision W8A16 config.
+    if fp8_backend == Fp8MoeBackend.CPU:
+        return fp8_w8a16_moe_quant_config(
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            block_shape=block_shape,
+        )
+
     # Flashinfer CUTLASS per-tensor uses single dq scale
     # (alpha = w_scale * a_scale) and inverse a2 scale.
     if fp8_backend == Fp8MoeBackend.FLASHINFER_CUTLASS and block_shape is None:
@@ -511,6 +555,7 @@ def make_fp8_moe_quant_config(
             a2_gscale=(1.0 / a2_scale),
             g1_alphas=(w1_scale * a1_scale).squeeze(),
             g2_alphas=(w2_scale * a2_scale).squeeze(),
+            gemm1_clamp_limit=swiglu_limit,
         )
     # MXFP8 uses "mxfp8" quant_dtype so the prepare step dispatches to
     # _mxfp8_e4m3_quantize rather than standard FP8 block quantization.
@@ -524,7 +569,8 @@ def make_fp8_moe_quant_config(
             a1_scale=a1_scale,
             a2_scale=a2_scale,
             block_shape=block_shape,
-            is_nvfp4_scale_swizzled=False,
+            is_scale_swizzled=False,
+            gemm1_clamp_limit=swiglu_limit,
         )
 
     # All other backends use normal config.
@@ -536,6 +582,7 @@ def make_fp8_moe_quant_config(
         block_shape=block_shape,
         per_act_token_quant=per_act_token_quant,
         per_out_ch_quant=per_out_ch_quant,
+        gemm1_clamp_limit=swiglu_limit,
     )
 
 
@@ -545,7 +592,6 @@ def make_fp8_moe_kernel(
     experts_cls: type[mk.FusedMoEExperts],
     fp8_backend: Fp8MoeBackend,
     routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    shared_experts: torch.nn.Module | None = None,
 ) -> mk.FusedMoEKernel:
     # Create Prepare/Finalize.
     prepare_finalize = maybe_make_prepare_finalize(
@@ -557,7 +603,7 @@ def make_fp8_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
     # Create Experts.
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
@@ -575,18 +621,9 @@ def make_fp8_moe_kernel(
             quant_config=moe_quant_config,
         )
 
-    # NOTE(rob): we only want the mk to control the shared_expert
-    # if using all2all (for SBO). bnell is making this explicit in
-    # the new MoE runner class.
     kernel = mk.FusedMoEKernel(
         prepare_finalize,
         experts,
-        shared_experts=(
-            shared_experts
-            if moe_config.moe_parallel_config.use_deepep_ll_kernels
-            else None
-        ),
-        moe_parallel_config=moe_config.moe_parallel_config,
         inplace=(
             not moe_config.disable_inplace
             and fp8_backend != Fp8MoeBackend.FLASHINFER_CUTLASS
diff --git a/vllm/model_executor/layers/fused_moe/oracle/int8.py b/vllm/model_executor/layers/fused_moe/oracle/int8.py
new file mode 100644
index 000000000000..f5fceef727d1
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/oracle/int8.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from enum import Enum
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.config.kernel import MoEBackend
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+    int8_w8a8_moe_quant_config,
+    int8_w8a16_moe_quant_config,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kInt8DynamicTokenSym,
+    kInt8StaticChannelSym,
+)
+
+logger = init_logger(__name__)
+
+
+class Int8MoeBackend(Enum):
+    TRITON = "TRITON"
+
+
+def _get_priority_backends(
+    moe_config: FusedMoEConfig,
+) -> list[Int8MoeBackend]:
+    """
+    Get available backends in priority order based on platform and config.
+    """
+    return [Int8MoeBackend.TRITON]
+
+
+def backend_to_kernel_cls(
+    backend: Int8MoeBackend,
+) -> list[type[mk.FusedMoEExperts]]:
+    if backend == Int8MoeBackend.TRITON:
+        from vllm.model_executor.layers.fused_moe.experts.triton_moe import (
+            TritonExperts,
+        )
+
+        return [TritonExperts]
+
+    else:
+        raise ValueError(f"Unknown Int8 MoE backend: {backend.value}")
+
+
+def map_int8_backend(runner_backend: MoEBackend) -> Int8MoeBackend:
+    """Map user's MoEBackend to Int8MoeBackend."""
+    mapping = {
+        "triton": Int8MoeBackend.TRITON,
+    }
+    if backend := mapping.get(runner_backend):
+        return backend
+    raise ValueError(
+        f"moe_backend='{runner_backend}' is not supported for Int8 MoE. "
+        f"Expected one of {list(mapping.keys())}."
+    )
+
+
+def select_int8_moe_backend(
+    config: FusedMoEConfig,
+    weight_key: QuantKey | None = kInt8StaticChannelSym,
+    activation_key: QuantKey | None = kInt8DynamicTokenSym,
+) -> tuple[Int8MoeBackend, type[mk.FusedMoEExperts]]:
+    """
+    Select the primary Int8 MoE backend.
+    Note: Shape-specific fallbacks may still occur at runtime.
+    """
+
+    AVAILABLE_BACKENDS = _get_priority_backends(config)
+
+    activation_format = (
+        mk.FusedMoEActivationFormat.BatchedExperts
+        if config.moe_parallel_config.use_batched_activation_format
+        else mk.FusedMoEActivationFormat.Standard
+    )
+
+    def _make_log_backend(backend: Int8MoeBackend) -> str:
+        available_backend_strs = [b.value for b in AVAILABLE_BACKENDS]
+        return (
+            f"Using {backend.value} Int8 MoE backend out "
+            f"of potential backends: {available_backend_strs}."
+        )
+
+    def _make_log_unsupported(backend: Int8MoeBackend, reason: str | None) -> str:
+        if reason:
+            return (
+                f"Int8 MoE backend {backend.value} does not support the "
+                f"deployment configuration since {reason}."
+            )
+        else:
+            return (
+                f"Int8 MoE backend '{backend.value}' does not support the "
+                "deployment configuration."
+            )
+
+    def _return_or_raise(
+        backend: Int8MoeBackend,
+    ) -> tuple[Int8MoeBackend, type[mk.FusedMoEExperts]]:
+        for k_cls in backend_to_kernel_cls(backend):
+            supported, reason = k_cls.is_supported_config(
+                k_cls, config, weight_key, activation_key, activation_format
+            )
+            if supported:
+                logger.info_once(_make_log_backend(backend))
+                return backend, k_cls
+        raise ValueError(_make_log_unsupported(backend, reason))
+
+    # Handle explicit moe_backend from user.
+    runner_backend = config.moe_backend
+    if runner_backend != "auto":
+        requested_backend = map_int8_backend(runner_backend)
+        return _return_or_raise(requested_backend)
+
+    # Select kernels in order of backend.
+    for backend in AVAILABLE_BACKENDS:
+        for k_cls in backend_to_kernel_cls(backend):
+            supported, reason = k_cls.is_supported_config(
+                k_cls,
+                config,
+                weight_key,
+                activation_key,
+                activation_format,
+            )
+            if supported:
+                logger.info_once(_make_log_backend(backend))
+                return backend, k_cls
+            else:
+                logger.debug_once(_make_log_unsupported(backend, reason))
+
+    raise NotImplementedError(
+        "No Int8 MoE backend supports the deployment configuration."
+    )
+
+
+def make_int8_moe_quant_config(
+    w1_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+    a1_scale: torch.Tensor | None = None,
+    a2_scale: torch.Tensor | None = None,
+    per_act_token_quant: bool = False,
+) -> FusedMoEQuantConfig:
+    assert (a1_scale is None and a2_scale is None) or (
+        a1_scale is not None and a2_scale is not None
+    ), "a1_scale and a2_scale must both be provided or both be None"
+
+    if a1_scale is None or a2_scale is None:
+        return int8_w8a16_moe_quant_config(
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            w1_zp=None,
+            w2_zp=None,
+        )
+
+    return int8_w8a8_moe_quant_config(
+        w1_scale=w1_scale,
+        w2_scale=w2_scale,
+        a1_scale=a1_scale,
+        a2_scale=a2_scale,
+        per_act_token_quant=per_act_token_quant,
+    )
+
+
+def make_int8_moe_kernel(
+    moe_quant_config: FusedMoEQuantConfig,
+    moe_config: FusedMoEConfig,
+    experts_cls: type[mk.FusedMoEExperts],
+    routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+) -> mk.FusedMoEKernel:
+    # Create Prepare/Finalize.
+    prepare_finalize = maybe_make_prepare_finalize(
+        moe=moe_config,
+        quant_config=moe_quant_config,
+        routing_tables=routing_tables,
+        allow_new_interface=True,
+        use_monolithic=issubclass(experts_cls, mk.FusedMoEExpertsMonolithic),
+    )
+    assert prepare_finalize is not None
+
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
+
+    # Create Experts.
+    if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+        max_num_tokens = prepare_finalize.max_num_tokens_per_rank()
+        assert max_num_tokens is not None
+        experts = experts_cls(
+            moe_config=moe_config,
+            quant_config=moe_quant_config,
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=prepare_finalize.num_dispatchers(),
+        )
+    else:
+        experts = experts_cls(
+            moe_config=moe_config,
+            quant_config=moe_quant_config,
+        )
+
+    kernel = mk.FusedMoEKernel(
+        prepare_finalize,
+        experts,
+        inplace=not moe_config.disable_inplace,
+    )
+
+    return kernel
diff --git a/vllm/model_executor/layers/fused_moe/oracle/int_wna16.py b/vllm/model_executor/layers/fused_moe/oracle/int_wna16.py
new file mode 100644
index 000000000000..c7edd9a500da
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/oracle/int_wna16.py
@@ -0,0 +1,622 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from enum import Enum
+from typing import TYPE_CHECKING
+
+import torch
+
+import vllm._custom_ops as ops
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
+    BatchedMarlinExperts,
+    MarlinExperts,
+)
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.utils.marlin_utils import (
+    marlin_act_int8_process_scales,
+    marlin_moe_permute_scales,
+    marlin_permute_bias,
+    moe_awq_to_marlin_zero_points,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+)
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQConfig
+    from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig
+
+logger = init_logger(__name__)
+
+
+class WNA16MoEBackend(Enum):
+    MARLIN = "MARLIN"
+    BATCHED_MARLIN = "BATCHED_MARLIN"
+
+
+def backend_to_kernel_cls(
+    backend: WNA16MoEBackend,
+) -> list[type[mk.FusedMoEExperts]]:
+    """Return the experts class for the given backend, or None for NONE."""
+    if backend == WNA16MoEBackend.MARLIN:
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
+            MarlinExperts,
+        )
+
+        return [MarlinExperts]
+
+    elif backend == WNA16MoEBackend.BATCHED_MARLIN:
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
+            BatchedMarlinExperts,
+        )
+
+        return [BatchedMarlinExperts]
+
+    else:
+        raise ValueError(f"Unknown WNA16 MoE backend: {backend.value}")
+
+
+def _get_priority_backends() -> list[WNA16MoEBackend]:
+    """
+    Get available backends in priority order based on platform and config.
+    """
+    _AVAILABLE_BACKENDS = [
+        WNA16MoEBackend.MARLIN,
+        WNA16MoEBackend.BATCHED_MARLIN,
+    ]
+    return _AVAILABLE_BACKENDS
+
+
+def select_wna16_moe_backend(
+    config: FusedMoEConfig,
+    weight_key: QuantKey,
+    weight_bits: int,
+) -> tuple[WNA16MoEBackend, type[mk.FusedMoEExperts]]:
+    """Select the WNA16 MoE backend.
+
+    Args:
+        config: the shared ``FusedMoEConfig`` for this layer.
+        weight_bits: quantization bit-width (4 or 8). 8-bit weights are not
+            supported by the modular Marlin kernel, so ``NONE`` is returned.
+
+    Returns:
+        A tuple of (``WNA16MoEBackend``, experts class or ``None``).
+    """
+
+    activation_format = (
+        mk.FusedMoEActivationFormat.BatchedExperts
+        if config.moe_parallel_config.use_batched_activation_format
+        else mk.FusedMoEActivationFormat.Standard
+    )
+
+    def _make_log_backend(backend: WNA16MoEBackend):
+        return f"Using '{backend.value}' WNA16 MoE backend."
+
+    def _make_log_unsupported(backend: WNA16MoEBackend, reason: str | None) -> str:
+        if reason:
+            return (
+                f"WNA16 MoE backend '{backend.value}' does not support the "
+                f"deployment configuration since {reason}."
+            )
+        return (
+            f"WNA16 MoE backend '{backend.value}' does not support the "
+            "deployment configuration."
+        )
+
+    def _return_or_raise(
+        backend: WNA16MoEBackend,
+        config: FusedMoEConfig,
+        weight_key: QuantKey | None,
+        activation_key: QuantKey | None,
+        activation_format: mk.FusedMoEActivationFormat,
+    ) -> tuple[WNA16MoEBackend, type[mk.FusedMoEExperts]]:
+        reason: str | None = None
+        for k_cls in backend_to_kernel_cls(backend):
+            supported, reason = k_cls.is_supported_config(
+                k_cls, config, weight_key, activation_key, activation_format
+            )
+            if supported:
+                logger.info_once(_make_log_backend(backend), scope="local")
+                return backend, k_cls
+        raise ValueError(_make_log_unsupported(backend, reason))
+
+    # Select kernels in order of backend.
+    AVAILABLE_BACKENDS = _get_priority_backends()
+
+    for backend in AVAILABLE_BACKENDS:
+        activation_key = None  # always BF16 activation for WNA16 MoE
+        for k_cls in backend_to_kernel_cls(backend):
+            supported, reason = k_cls.is_supported_config(
+                k_cls, config, weight_key, activation_key, activation_format
+            )
+            if supported:
+                logger.info_once(_make_log_backend(backend), scope="local")
+                return backend, k_cls
+            else:
+                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+
+    raise NotImplementedError(
+        "No WNA16 MoE backend supports the deployment configuration."
+    )
+
+
+def make_wna16_moe_kernel(
+    moe_quant_config: FusedMoEQuantConfig,
+    moe_config: FusedMoEConfig,
+    experts_cls: type[mk.FusedMoEExperts] | None,
+    is_k_full: bool,
+    w13_g_idx: torch.Tensor | None,
+    w2_g_idx: torch.Tensor | None,
+    w13_g_idx_sort_indices: torch.Tensor | None,
+    w2_g_idx_sort_indices: torch.Tensor | None,
+    routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+) -> mk.FusedMoEKernel:
+    # Currently, we only support MarlinExperts and BatchedMarlinExperts
+    assert experts_cls in (MarlinExperts, BatchedMarlinExperts)
+
+    from vllm.model_executor.layers.fused_moe.all2all_utils import (
+        maybe_make_prepare_finalize,
+    )
+
+    prepare_finalize = maybe_make_prepare_finalize(
+        moe=moe_config,
+        quant_config=moe_quant_config,
+        routing_tables=routing_tables,
+        allow_new_interface=True,
+    )
+    assert prepare_finalize is not None
+    assert isinstance(prepare_finalize, mk.FusedMoEPrepareAndFinalizeModular)
+
+    if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+        assert experts_cls == BatchedMarlinExperts
+        max_num_tokens = prepare_finalize.max_num_tokens_per_rank()
+        assert max_num_tokens is not None
+        experts: mk.FusedMoEExperts = BatchedMarlinExperts(
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=prepare_finalize.num_dispatchers(),
+            moe_config=moe_config,
+            quant_config=moe_quant_config,
+            w13_g_idx=w13_g_idx,
+            w2_g_idx=w2_g_idx,
+            w13_g_idx_sort_indices=w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices=w2_g_idx_sort_indices,
+            is_k_full=is_k_full,
+        )
+    else:
+        assert experts_cls == MarlinExperts
+        experts = MarlinExperts(
+            moe_config=moe_config,
+            quant_config=moe_quant_config,
+            w13_g_idx=w13_g_idx,
+            w2_g_idx=w2_g_idx,
+            w13_g_idx_sort_indices=w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices=w2_g_idx_sort_indices,
+            is_k_full=is_k_full,
+        )
+
+    return mk.FusedMoEKernel(
+        prepare_finalize,
+        experts,
+        inplace=not moe_config.disable_inplace,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Per-backend weight post-processing
+# ---------------------------------------------------------------------------
+
+
+def _process_weights_marlin(
+    layer: torch.nn.Module,
+    quant_config: "AutoGPTQConfig",
+    input_dtype: torch.dtype | None,
+    w13_qweight: torch.Tensor,
+    w2_qweight: torch.Tensor,
+    w13_scales: torch.Tensor,
+    w2_scales: torch.Tensor,
+    w13_g_idx: torch.Tensor,
+    w2_g_idx: torch.Tensor,
+    w13_qzeros: torch.Tensor | None = None,
+    w2_qzeros: torch.Tensor | None = None,
+    w13_bias: torch.Tensor | None = None,
+    w2_bias: torch.Tensor | None = None,
+) -> tuple[
+    torch.Tensor,  # w13_qweight
+    torch.Tensor,  # w2_qweight
+    torch.Tensor,  # w13_scales
+    torch.Tensor,  # w2_scales
+    torch.Tensor,  # w13_g_idx
+    torch.Tensor,  # w2_g_idx
+    torch.Tensor,  # w13_g_idx_sort_indices
+    torch.Tensor,  # w2_g_idx_sort_indices
+    torch.Tensor | None,  # w13_qzeros
+    torch.Tensor | None,  # w2_qzeros
+    torch.Tensor | None,  # w13_input_global_scale
+    torch.Tensor | None,  # w2_input_global_scale
+    torch.Tensor | None,  # w13_bias
+    torch.Tensor | None,  # w2_bias
+]:
+    """Standard Marlin weight post-processing shared by MARLIN and
+    BATCHED_MARLIN backends.
+
+    Steps
+    -----
+    1. Optional FP8 preprocessing of packed weights / scales.
+    2. Sort / reset g_idx tensors for act-order handling.
+    3. Repack weights via ``gptq_marlin_moe_repack``.
+    4. Permute scales (and optionally extract INT8 global scales).
+    5. Permute bias tensors.
+    """
+    is_a_8bit = input_dtype is not None and input_dtype.itemsize == 1
+
+    marlin_w13_qweight: torch.Tensor
+    marlin_w2_qweight: torch.Tensor
+    marlin_w13_scales: torch.Tensor
+    marlin_w2_scales: torch.Tensor
+    w13_g_idx_sort_indices: torch.Tensor | None = None
+    w2_g_idx_sort_indices: torch.Tensor | None = None
+    w13_input_global_scale: torch.Tensor | None = None
+    w2_input_global_scale: torch.Tensor | None = None
+    w13_bias_out: torch.Tensor | None = None
+    w2_bias_out: torch.Tensor | None = None
+
+    # --- FP8 weight / scale adjustment ---
+    if input_dtype == torch.float8_e4m3fn:
+        marlin_w13_qweight = ops.marlin_int4_fp8_preprocess(w13_qweight, inplace=False)
+        marlin_w2_qweight = ops.marlin_int4_fp8_preprocess(w2_qweight, inplace=False)
+        marlin_w13_scales = w13_scales.data * 512
+        marlin_w2_scales = w2_scales.data * 512
+    else:
+        marlin_w13_qweight = w13_qweight
+        marlin_w2_qweight = w2_qweight
+        marlin_w13_scales = w13_scales
+        marlin_w2_scales = w2_scales
+
+    # --- Process act_order (g_idx) ---
+    if quant_config.desc_act:
+        num_experts = w13_g_idx.shape[0]
+        w13_g_idx_sort_indices = torch.empty_like(w13_g_idx)
+        w2_g_idx_sort_indices = torch.empty_like(w2_g_idx)
+        w13_sorted_g_idx = torch.empty_like(w13_g_idx)
+        w2_sorted_g_idx = torch.empty_like(w2_g_idx)
+        for e in range(num_experts):
+            w13_g_idx_sort_indices[e] = torch.argsort(w13_g_idx[e]).to(torch.int32)
+            w2_g_idx_sort_indices[e] = torch.argsort(w2_g_idx[e]).to(torch.int32)
+            w13_sorted_g_idx[e] = w13_g_idx[e][w13_g_idx_sort_indices[e]]
+            w2_sorted_g_idx[e] = w2_g_idx[e][w2_g_idx_sort_indices[e]]
+    else:
+        num_experts = w13_g_idx.shape[0]
+        device = w13_g_idx.device
+        w13_g_idx = torch.nn.Parameter(
+            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+            requires_grad=False,
+        )
+        w2_g_idx = torch.nn.Parameter(
+            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+            requires_grad=False,
+        )
+        w13_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+            requires_grad=False,
+        )
+        w2_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+            requires_grad=False,
+        )
+
+    # --- Repack weights ---
+    marlin_w13_qweight = ops.gptq_marlin_moe_repack(
+        marlin_w13_qweight,
+        w13_g_idx_sort_indices,
+        marlin_w13_qweight.shape[1] * quant_config.pack_factor,
+        marlin_w13_qweight.shape[2],
+        quant_config.quant_type.size_bits,
+        is_a_8bit=is_a_8bit,
+    )
+    marlin_w2_qweight = ops.gptq_marlin_moe_repack(
+        marlin_w2_qweight,
+        w2_g_idx_sort_indices,
+        marlin_w2_qweight.shape[1] * quant_config.pack_factor,
+        marlin_w2_qweight.shape[2],
+        quant_config.quant_type.size_bits,
+        is_a_8bit=is_a_8bit,
+    )
+
+    # --- Permute scales ---
+    marlin_w13_scales = marlin_moe_permute_scales(
+        s=marlin_w13_scales,
+        size_k=layer.intermediate_size_per_partition,
+        size_n=marlin_w13_scales.shape[2],
+        group_size=quant_config.group_size,
+        is_a_8bit=is_a_8bit,
+    )
+    marlin_w2_scales = marlin_moe_permute_scales(
+        s=marlin_w2_scales,
+        size_k=marlin_w2_scales.shape[1]
+        * (
+            quant_config.group_size
+            if quant_config.group_size != -1
+            else quant_config.pack_factor
+        ),
+        size_n=marlin_w2_scales.shape[2],
+        group_size=quant_config.group_size,
+        is_a_8bit=is_a_8bit,
+    )
+
+    if input_dtype == torch.int8:
+        if layer.num_groups_w13 > 1:
+            marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
+                marlin_w13_scales
+            )
+        if layer.num_groups_w2 > 1:
+            marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
+                marlin_w2_scales
+            )
+
+    # --- Permute bias ---
+    if w13_bias is not None:
+        w13_bias_out = marlin_permute_bias(w13_bias)
+    if w2_bias is not None:
+        w2_bias_out = marlin_permute_bias(w2_bias)
+
+    return (
+        marlin_w13_qweight,
+        marlin_w2_qweight,
+        marlin_w13_scales,
+        marlin_w2_scales,
+        w13_g_idx,
+        w2_g_idx,
+        w13_g_idx_sort_indices,
+        w2_g_idx_sort_indices,
+        w13_qzeros,
+        w2_qzeros,
+        w13_input_global_scale,
+        w2_input_global_scale,
+        w13_bias_out,
+        w2_bias_out,
+    )
+
+
+def _process_awq_weights_marlin(
+    layer: torch.nn.Module,
+    quant_config: "AWQMarlinConfig",
+    input_dtype: torch.dtype | None,
+    w13_qweight: torch.Tensor,
+    w2_qweight: torch.Tensor,
+    w13_scales: torch.Tensor,
+    w2_scales: torch.Tensor,
+    w13_qzeros: torch.Tensor,
+    w2_qzeros: torch.Tensor,
+    w13_bias: torch.Tensor | None = None,
+    w2_bias: torch.Tensor | None = None,
+) -> tuple[
+    torch.Tensor,  # w13_qweight
+    torch.Tensor,  # w2_qweight
+    torch.Tensor,  # w13_scales
+    torch.Tensor,  # w2_scales
+    torch.Tensor | None,  # w13_g_idx
+    torch.Tensor | None,  # w2_g_idx
+    torch.Tensor | None,  # w13_g_idx_sort_indices
+    torch.Tensor | None,  # w2_g_idx_sort_indices
+    torch.Tensor | None,  # w13_qzeros
+    torch.Tensor | None,  # w2_qzeros
+    torch.Tensor | None,  # w13_input_global_scale
+    torch.Tensor | None,  # w2_input_global_scale
+    torch.Tensor | None,  # w13_bias
+    torch.Tensor | None,  # w2_bias
+]:
+    """AWQ-specific Marlin weight post-processing.
+
+    AWQ checkpoints use a different packing order than GPTQ, so they need
+    AWQ-specific weight repacking and zero-point conversion before Marlin runs.
+    """
+    num_experts = w13_qweight.shape[0]
+    device = w13_qweight.device
+    is_a_8bit = input_dtype is not None and input_dtype.itemsize == 1
+    w13_input_global_scale: torch.Tensor | None = None
+    w2_input_global_scale: torch.Tensor | None = None
+    w13_bias_out: torch.Tensor | None = None
+    w2_bias_out: torch.Tensor | None = None
+
+    if input_dtype == torch.float8_e4m3fn:
+        ops.marlin_int4_fp8_preprocess(
+            w13_qweight.view(-1, w13_qweight.size(2)),
+            w13_qzeros.view(-1, w13_qzeros.size(2)),
+            inplace=True,
+        )
+        ops.marlin_int4_fp8_preprocess(
+            w2_qweight.view(-1, w2_qweight.size(2)),
+            w2_qzeros.view(-1, w2_qzeros.size(2)),
+            inplace=True,
+        )
+        w13_scales = w13_scales.data * 512
+        w2_scales = w2_scales.data * 512
+
+    w13_g_idx_sort_indices = torch.nn.Parameter(
+        torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+        requires_grad=False,
+    )
+    w2_g_idx_sort_indices = torch.nn.Parameter(
+        torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+        requires_grad=False,
+    )
+
+    marlin_w13_qweight = ops.awq_marlin_moe_repack(
+        w13_qweight,
+        w13_g_idx_sort_indices,
+        size_k=w13_qweight.shape[1],
+        size_n=w13_qweight.shape[2] * quant_config.pack_factor,
+        num_bits=quant_config.weight_bits,
+        is_a_8bit=is_a_8bit,
+    )
+    marlin_w2_qweight = ops.awq_marlin_moe_repack(
+        w2_qweight,
+        w2_g_idx_sort_indices,
+        size_k=w2_qweight.shape[1],
+        size_n=w2_qweight.shape[2] * quant_config.pack_factor,
+        num_bits=quant_config.weight_bits,
+        is_a_8bit=is_a_8bit,
+    )
+
+    marlin_w13_scales = marlin_moe_permute_scales(
+        s=w13_scales,
+        size_k=layer.intermediate_size_per_partition,
+        size_n=w13_scales.shape[2],
+        group_size=quant_config.group_size,
+        is_a_8bit=is_a_8bit,
+    )
+    if input_dtype == torch.int8 and layer.num_groups_w13 > 1:
+        marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
+            marlin_w13_scales
+        )
+
+    marlin_w2_scales = marlin_moe_permute_scales(
+        s=w2_scales,
+        size_k=layer.intermediate_size_per_partition,
+        size_n=w2_scales.shape[2],
+        group_size=quant_config.group_size,
+        is_a_8bit=is_a_8bit,
+    )
+    if input_dtype == torch.int8 and layer.num_groups_w2 > 1:
+        marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
+            marlin_w2_scales
+        )
+
+    marlin_w13_qzeros = moe_awq_to_marlin_zero_points(
+        w13_qzeros,
+        size_k=w13_qzeros.shape[1],
+        size_n=w13_qzeros.shape[2] * quant_config.pack_factor,
+        num_bits=quant_config.weight_bits,
+        is_a_8bit=is_a_8bit,
+    )
+    marlin_w2_qzeros = moe_awq_to_marlin_zero_points(
+        w2_qzeros,
+        size_k=w2_qzeros.shape[1],
+        size_n=w2_qzeros.shape[2] * quant_config.pack_factor,
+        num_bits=quant_config.weight_bits,
+        is_a_8bit=is_a_8bit,
+    )
+
+    if w13_bias is not None:
+        w13_bias_out = marlin_permute_bias(w13_bias)
+    if w2_bias is not None:
+        w2_bias_out = marlin_permute_bias(w2_bias)
+
+    return (
+        marlin_w13_qweight,
+        marlin_w2_qweight,
+        marlin_w13_scales,
+        marlin_w2_scales,
+        None,
+        None,
+        w13_g_idx_sort_indices,
+        w2_g_idx_sort_indices,
+        marlin_w13_qzeros,
+        marlin_w2_qzeros,
+        w13_input_global_scale,
+        w2_input_global_scale,
+        w13_bias_out,
+        w2_bias_out,
+    )
+
+
+def convert_to_wna16_moe_kernel_format(
+    backend: WNA16MoEBackend,
+    layer: torch.nn.Module,
+    quant_config: QuantizationConfig,
+    input_dtype: torch.dtype | None,
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+    w13_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+    w13_g_idx: torch.Tensor | None = None,
+    w2_g_idx: torch.Tensor | None = None,
+    w13_qzeros: torch.Tensor | None = None,
+    w2_qzeros: torch.Tensor | None = None,
+    w13_bias: torch.Tensor | None = None,
+    w2_bias: torch.Tensor | None = None,
+) -> tuple[
+    torch.Tensor,  # w13_qweight
+    torch.Tensor,  # w2_qweight
+    torch.Tensor,  # w13_scales
+    torch.Tensor,  # w2_scales
+    torch.Tensor | None,  # w13_g_idx
+    torch.Tensor | None,  # w2_g_idx
+    torch.Tensor | None,  # w13_g_idx_sort_indices
+    torch.Tensor | None,  # w2_g_idx_sort_indices
+    torch.Tensor | None,  # w13_qzeros
+    torch.Tensor | None,  # w2_qzeros
+    torch.Tensor | None,  # w13_input_global_scale
+    torch.Tensor | None,  # w2_input_global_scale
+    torch.Tensor | None,  # w13_bias
+    torch.Tensor | None,  # w2_bias
+]:
+    """Dispatch weight post-processing to the appropriate per-backend handler.
+
+    To add a new backend, implement a ``_process_weights_<name>`` helper and
+    add a branch here.
+
+    Args:
+        backend: the selected ``WNA16MoEBackend``.
+        layer: the ``FusedMoE`` layer whose parameters are being prepared.
+        quant_config: the ``QuantizationConfig`` for this layer.
+        input_dtype: optional activation dtype, usually should be 16 bit.
+    """
+    if backend in (
+        WNA16MoEBackend.MARLIN,
+        WNA16MoEBackend.BATCHED_MARLIN,
+    ):
+        from vllm.model_executor.layers.quantization.auto_gptq import (
+            AutoGPTQConfig,
+        )
+        from vllm.model_executor.layers.quantization.awq_marlin import (
+            AWQMarlinConfig,
+        )
+
+        if isinstance(quant_config, AWQMarlinConfig):
+            if w13_qzeros is None or w2_qzeros is None:
+                raise ValueError("AWQ Marlin MoE requires zero-point tensors.")
+            return _process_awq_weights_marlin(
+                layer,
+                quant_config,
+                input_dtype,
+                w13,
+                w2,
+                w13_scale,
+                w2_scale,
+                w13_qzeros,
+                w2_qzeros,
+                w13_bias,
+                w2_bias,
+            )
+
+        if not isinstance(quant_config, AutoGPTQConfig):
+            raise TypeError(
+                "Marlin WNA16 MoE backend requires AutoGPTQConfig or "
+                "AWQMarlinConfig, got "
+                f"{type(quant_config).__name__}."
+            )
+        if w13_g_idx is None or w2_g_idx is None:
+            raise ValueError("GPTQ Marlin MoE requires g_idx tensors.")
+        return _process_weights_marlin(
+            layer,
+            quant_config,
+            input_dtype,
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            w13_g_idx,
+            w2_g_idx,
+            w13_qzeros,
+            w2_qzeros,
+            w13_bias,
+            w2_bias,
+        )
+    else:
+        raise ValueError(f"Unsupported wna16 MoE backend: {backend.value}")
diff --git a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
index 9008bdeeca7e..3ecc4f018f19 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
@@ -1,12 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from enum import Enum
-from typing import Union
+from typing import TYPE_CHECKING, Literal, Union
 
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import envs
+from vllm.config import get_current_vllm_config
+from vllm.config.kernel import MoEBackend
+from vllm.config.quantization import QuantizationConfigArgs
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe import (
     FusedMoEConfig,
@@ -16,20 +19,31 @@
 )
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEQuantConfig,
+    FusedMoEQuantDesc,
+    RoutingMethodType,
     mxfp4_mxfp8_moe_quant_config,
+    mxfp4_w4a8_moe_quant_config,
     mxfp4_w4a16_moe_quant_config,
     ocp_mx_moe_quant_config,
 )
 from vllm.model_executor.layers.quantization.utils.mxfp4_utils import _swizzle_mxfp4
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
+    kFp8Dynamic128Sym,
+    kFp8StaticTensorSym,
+    kMxfp4Dynamic,
     kMxfp4Static,
     kMxfp8Dynamic,
 )
+from vllm.model_executor.layers.quantization.utils.w8a8_utils import all_close_1d
 from vllm.platforms import current_platform
 from vllm.utils.import_utils import has_triton_kernels
 from vllm.utils.math_utils import round_up
 
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe import RoutedExperts
+
+
 logger = init_logger(__name__)
 
 if has_triton_kernels():
@@ -45,6 +59,8 @@
 
 class Mxfp4MoeBackend(Enum):
     NONE = "None"
+    # DeepGEMM FP8xFP4 backend (SM100+)
+    DEEPGEMM_MXFP4 = "DEEPGEMM_MXFP4"
     # FlashInfer TRTLLM backends
     FLASHINFER_TRTLLM_MXFP4_MXFP8 = "FLASHINFER_TRTLLM_MXFP4_MXFP8"
     FLASHINFER_TRTLLM_MXFP4_BF16 = "FLASHINFER_TRTLLM_MXFP4_BF16"
@@ -54,13 +70,31 @@ class Mxfp4MoeBackend(Enum):
     # Marlin
     BATCHED_MARLIN = "BATCHED_MARLIN"
     MARLIN = "MARLIN"
-    # ROCm AITER (CK)
-    CK = "CK"
+    # ROCm AITER backends
+    AITER_MXFP4_BF16 = "AITER_MXFP4_BF16"  # W4A16: CK kernel
+    # Keep the legacy name as an alias while the ROCm split backend rename settles.
+    AITER = "AITER_MXFP4_BF16"
+    AITER_MXFP4_FP8 = "AITER_MXFP4_FP8"  # W4A8: triton kernel
+    AITER_MXFP4_MXFP4 = "AITER_MXFP4_MXFP4"  # W4A4: CK kernel
     # Triton
     TRITON = "TRITON"
     TRITON_UNFUSED = "TRITON_UNFUSED"
     # XPU
     XPU = "XPU"
+    # CPU
+    CPU = "CPU"
+    # Emulation
+    EMULATION = "EMULATION"
+    # Humming
+    HUMMING = "HUMMING"
+
+
+# AITER backends group
+AITER_BACKENDS = (
+    Mxfp4MoeBackend.AITER_MXFP4_BF16,
+    Mxfp4MoeBackend.AITER_MXFP4_FP8,
+    Mxfp4MoeBackend.AITER_MXFP4_MXFP4,
+)
 
 
 # Backends that share the same TRTLLM weight format
@@ -78,7 +112,14 @@ class Mxfp4MoeBackend(Enum):
 def backend_to_kernel_cls(
     backend: Mxfp4MoeBackend,
 ) -> list[type[mk.FusedMoEExperts]]:
-    if backend in (
+    if backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
+            DeepGemmFP4Experts,
+        )
+
+        return [DeepGemmFP4Experts]
+
+    elif backend in (
         Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
         Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
     ):
@@ -94,14 +135,14 @@ def backend_to_kernel_cls(
         Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
         Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
     ):
-        from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (  # noqa: E501
             FlashInferExperts,
         )
 
         return [FlashInferExperts]
 
     elif backend == Mxfp4MoeBackend.TRITON:
-        from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe import (  # noqa: E501
             OAITritonExperts,
             OAITritonMxfp4ExpertsMonolithic,
         )
@@ -110,73 +151,137 @@ def backend_to_kernel_cls(
         return [OAITritonMxfp4ExpertsMonolithic, OAITritonExperts]
 
     elif backend == Mxfp4MoeBackend.TRITON_UNFUSED:
-        from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.gpt_oss_triton_kernels_moe import (  # noqa: E501
             UnfusedOAITritonExperts,
         )
 
         return [UnfusedOAITritonExperts]
 
+    elif backend == Mxfp4MoeBackend.HUMMING:
+        from vllm.model_executor.layers.fused_moe.experts.fused_humming_moe import (
+            BatchedHummingGroupedExperts,
+            HummingGroupedExperts,
+            HummingIndexedExperts,
+        )
+
+        return [
+            BatchedHummingGroupedExperts,
+            HummingGroupedExperts,
+            HummingIndexedExperts,
+        ]
+
     elif backend == Mxfp4MoeBackend.MARLIN:
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
             MarlinExperts,
         )
 
         return [MarlinExperts]
 
     elif backend == Mxfp4MoeBackend.BATCHED_MARLIN:
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
             BatchedMarlinExperts,
         )
 
         return [BatchedMarlinExperts]
 
-    elif backend == Mxfp4MoeBackend.CK:
-        from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+    elif backend == Mxfp4MoeBackend.AITER_MXFP4_BF16:
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+            AiterExperts,
+        )
+
+        return [AiterExperts]
+
+    elif backend == Mxfp4MoeBackend.AITER_MXFP4_FP8:
+        from vllm.model_executor.layers.fused_moe.experts.aiter_mxfp4_w4a8_moe import (
+            AiterW4A8ExpertsMonolithic,
+        )
+
+        return [AiterW4A8ExpertsMonolithic]
+
+    elif backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4:
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
             AiterExperts,
         )
 
         return [AiterExperts]
 
     elif backend == Mxfp4MoeBackend.XPU:
-        from vllm.model_executor.layers.fused_moe.xpu_fused_moe import XPUExpertsMXFp4
+        from vllm.model_executor.layers.fused_moe.experts.xpu_moe import XPUExpertsMXFp4
 
         return [XPUExpertsMXFp4]
 
+    elif backend == Mxfp4MoeBackend.CPU:
+        from vllm.model_executor.layers.fused_moe.experts.cpu_moe import CPUExpertsMxfp4
+
+        return [CPUExpertsMxfp4]
+
+    elif backend == Mxfp4MoeBackend.EMULATION:
+        from vllm.model_executor.layers.fused_moe.experts.ocp_mx_emulation_moe import (
+            OCP_MXQuantizationEmulationTritonExperts,
+        )
+
+        return [OCP_MXQuantizationEmulationTritonExperts]
+
     else:
         raise ValueError(f"Unknown MXFP4 MoE backend: {backend.value}")
 
 
-def map_mxfp4_backend(runner_backend: str) -> Mxfp4MoeBackend:
-    """Map user's moe_backend string to Mxfp4MoeBackend."""
-    mapping: dict[str, Mxfp4MoeBackend] = {
-        "flashinfer_trtllm": Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
-        "flashinfer_trtllm_afp8": Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
-        "flashinfer_cutlass": Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
-        "flashinfer_cutlass_afp8": Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
-        "triton": Mxfp4MoeBackend.TRITON,
-        "marlin": Mxfp4MoeBackend.MARLIN,
-        "ck": Mxfp4MoeBackend.CK,
-        "xpu": Mxfp4MoeBackend.XPU,
+def map_mxfp4_backend(runner_backend: MoEBackend) -> list[Mxfp4MoeBackend]:
+    """Map a moe_backend string to its candidate Mxfp4MoeBackends.
+
+    Vendor families return all activation variants; the caller picks one
+    via ``activation_key`` and ``is_supported_config``.
+    """
+    mapping: dict[str, list[Mxfp4MoeBackend]] = {
+        "deep_gemm": [Mxfp4MoeBackend.DEEPGEMM_MXFP4],
+        "flashinfer_trtllm": [
+            Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
+            Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
+        ],
+        "flashinfer_trtllm_afp8": [Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8],
+        "flashinfer_cutlass": [
+            Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
+            Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
+        ],
+        "flashinfer_cutlass_afp8": [Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8],
+        "triton": [Mxfp4MoeBackend.TRITON],
+        "triton_unfused": [Mxfp4MoeBackend.TRITON_UNFUSED],
+        "humming": [Mxfp4MoeBackend.HUMMING],
+        "marlin": [Mxfp4MoeBackend.MARLIN],
+        "aiter": [
+            Mxfp4MoeBackend.AITER_MXFP4_BF16,
+            Mxfp4MoeBackend.AITER_MXFP4_FP8,
+            Mxfp4MoeBackend.AITER_MXFP4_MXFP4,
+        ],
+        "aiter_mxfp4_fp8": [Mxfp4MoeBackend.AITER_MXFP4_FP8],
+        "aiter_mxfp4_mxfp4": [Mxfp4MoeBackend.AITER_MXFP4_MXFP4],
+        "xpu": [Mxfp4MoeBackend.XPU],
+        "cpu": [Mxfp4MoeBackend.CPU],
+        "emulation": [Mxfp4MoeBackend.EMULATION],
     }
-    if backend := mapping.get(runner_backend):
-        return backend
+    if backends := mapping.get(runner_backend):
+        return backends
     raise ValueError(
         f"moe_backend='{runner_backend}' is not supported for MXFP4 MoE. "
         f"Expected one of {list(mapping.keys())}."
     )
 
 
-def _get_priority_backends() -> list[Mxfp4MoeBackend]:
-    """
-    Get available backends in priority order based on platform and config.
-    Only includes BF16 backends. MXFP8 backends are selected via env vars.
-    """
+def _get_priority_backends_for_gpt_oss() -> list[Mxfp4MoeBackend]:
+    """Available backends in priority order, BF16-act variant before
+    activation-quantized variant within each vendor family."""
     _AVAILABLE_BACKENDS = [
         Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
-        Mxfp4MoeBackend.CK,
+        Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
+        Mxfp4MoeBackend.AITER_MXFP4_BF16,
+        Mxfp4MoeBackend.AITER_MXFP4_FP8,
+        Mxfp4MoeBackend.AITER_MXFP4_MXFP4,
         Mxfp4MoeBackend.TRITON,
         Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
-        Mxfp4MoeBackend.TRITON_UNFUSED,
+        Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
+        # TRITON_UNFUSED has bug with MTP support
+        # TODO re-enable after kernel is fixed
+        # TRITON_UNFUSED
         Mxfp4MoeBackend.MARLIN,
         Mxfp4MoeBackend.BATCHED_MARLIN,
         Mxfp4MoeBackend.XPU,
@@ -184,43 +289,134 @@ def _get_priority_backends() -> list[Mxfp4MoeBackend]:
     return _AVAILABLE_BACKENDS
 
 
+def _get_priority_backends() -> list[Mxfp4MoeBackend]:
+    """
+    Get available backends in priority order. SM100+ prefers DeepGEMM FP4 /
+    TRTLLM MXFP8; SM90 falls through to Triton_unfused or Marlin (the
+    backend-level ``is_supported_config`` check filters by device capability).
+    """
+    if current_platform.is_rocm():
+        return [Mxfp4MoeBackend.AITER_MXFP4_BF16]
+    if current_platform.is_xpu():
+        return [Mxfp4MoeBackend.XPU]
+    _AVAILABLE_BACKENDS = [
+        Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
+        Mxfp4MoeBackend.DEEPGEMM_MXFP4,
+        # TRITON_UNFUSED has bug with MTP support
+        # TODO re-enable after kernel is fixed
+        # TRITON_UNFUSED
+        Mxfp4MoeBackend.MARLIN,
+        Mxfp4MoeBackend.BATCHED_MARLIN,
+    ]
+    return _AVAILABLE_BACKENDS
+
+
 def _backend_activation_key(backend: Mxfp4MoeBackend) -> QuantKey | None:
-    """Map backend to its activation key (MXFP8 or None for BF16)."""
+    """Map backend to its activation key (FP8, MXFP8, or None for BF16)."""
+    if backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        return kFp8Dynamic128Sym
     if backend in (
         Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
         Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
     ):
         return kMxfp8Dynamic
-    return None
+    if backend == Mxfp4MoeBackend.AITER_MXFP4_FP8:
+        return kFp8StaticTensorSym
+    if backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4:
+        return kMxfp4Dynamic
+    return None  # BF16 activation
+
+
+def _user_moe_activation_override() -> QuantKey | None:
+    """User's MoE activation override from quantization_config, or None."""
+    args = get_current_vllm_config().model_config.quantization_config
+    if not isinstance(args, QuantizationConfigArgs) or args.moe is None:
+        return None
+    return args.moe.activation
+
+
+def _resolve_activation_key(
+    model_activation_key: QuantKey | None,
+) -> QuantKey | None:
+    """Combine the model-supplied activation key with the user override.
+    Raises on conflict (both set and disagreeing)."""
+    user_override = _user_moe_activation_override()
+    if user_override is None:
+        return model_activation_key
+    if model_activation_key is None or model_activation_key == user_override:
+        return user_override
+    raise ValueError(
+        f"checkpoint declares MoE activation={model_activation_key} but "
+        f"quantization_config.moe.activation={user_override}; remove the "
+        f"override or align it with the checkpoint."
+    )
+
+
+def _make_log_backend(backend: Mxfp4MoeBackend) -> str:
+    return f"Using '{backend.value}' Mxfp4 MoE backend."
+
+
+def _make_log_unsupported(backend: Mxfp4MoeBackend, reason: str | None) -> str:
+    base = (
+        f"Mxfp4 MoE backend '{backend.value}' does not support the "
+        f"deployment configuration"
+    )
+    return f"{base} since {reason}." if reason else f"{base}."
+
+
+def _return_or_raise(
+    backend: Mxfp4MoeBackend,
+    config: FusedMoEConfig,
+    weight_key: QuantKey | None,
+    activation_key: QuantKey | None,
+    activation_format: mk.FusedMoEActivationFormat,
+    scope: Literal["process", "global", "local"] = "local",
+) -> tuple[Mxfp4MoeBackend, type[mk.FusedMoEExperts]]:
+    reason: str | None = None
+    for k_cls in backend_to_kernel_cls(backend):
+        supported, reason = k_cls.is_supported_config(
+            k_cls, config, weight_key, activation_key, activation_format
+        )
+        if supported:
+            logger.info_once(_make_log_backend(backend), scope=scope)
+            return backend, k_cls
+    raise ValueError(_make_log_unsupported(backend, reason))
+
+
+def _filter_by_activation(
+    backends: list[Mxfp4MoeBackend],
+    requested_activation_key: QuantKey | None,
+) -> list[Mxfp4MoeBackend]:
+    """Pick variants matching ``requested_activation_key``; without one,
+    prefer BF16 if the list has any, else keep the list as-is so explicit
+    non-BF16 picks (e.g. the ``_afp8`` aliases) still land."""
+    if requested_activation_key is not None:
+        return [
+            b
+            for b in backends
+            if _backend_activation_key(b) == requested_activation_key
+            or b == Mxfp4MoeBackend.EMULATION
+        ]
+    bf16 = [b for b in backends if _backend_activation_key(b) is None]
+    return bf16 if bf16 else backends
 
 
 def select_mxfp4_moe_backend(
     config: FusedMoEConfig,
+    activation_key: QuantKey | None = None,
 ) -> tuple[Mxfp4MoeBackend, type[mk.FusedMoEExperts] | None]:
     """
     Select the primary MXFP4 MoE backend.
+
+    Args:
+        config: MoE configuration
+        activation_key: Optional activation quantization key. If provided,
+            overrides the default activation key for backend selection.
+            Use kFp8StaticTensorSym for W4A8 scheme.
+
     Note: Shape-specific fallbacks may still occur at runtime.
     """
-    triton_kernels_supported = has_triton_kernels() and (
-        9,
-        0,
-    ) <= current_platform.get_device_capability() < (11, 0)
-
-    # LoRA: separate experts backend path
-    if config.is_lora_enabled:
-        if not current_platform.is_cuda():
-            # ROCm: Triton mxfp4 LoRA hits GPU memory faults due to
-            # triton_kernels.tensor.Tensor / HIP read-only page issues
-            # during weight swizzle and LoRA forward. Needs work from
-            # the triton_kernels/aiter side.
-            raise NotImplementedError("Mxfp4 LoRA is currently only supported on CUDA.")
-        if envs.VLLM_MXFP4_USE_MARLIN is False and triton_kernels_supported:
-            logger.info_once("Using Triton backend for mxfp4 lora")
-            return Mxfp4MoeBackend.TRITON_UNFUSED, backend_to_kernel_cls(
-                Mxfp4MoeBackend.TRITON_UNFUSED
-            )[0]
-        logger.info_once("Using Marlin backend for mxfp4 lora")
-        return Mxfp4MoeBackend.MARLIN, backend_to_kernel_cls(Mxfp4MoeBackend.MARLIN)[0]
+    requested_activation_key = _resolve_activation_key(activation_key)
 
     activation_format = (
         mk.FusedMoEActivationFormat.BatchedExperts
@@ -228,61 +424,55 @@ def select_mxfp4_moe_backend(
         else mk.FusedMoEActivationFormat.Standard
     )
 
-    def _make_log_backend(backend: Mxfp4MoeBackend):
-        return f"Using '{backend.value}' Mxfp4 MoE backend."
-
-    def _make_log_unsupported(backend: Mxfp4MoeBackend, reason: str | None) -> str:
-        if reason:
-            return (
-                f"Mxfp4 MoE backend '{backend.value}' does not support the "
-                f"deployment configuration since {reason}."
-            )
-        return (
-            f"Mxfp4 MoE backend '{backend.value}' does not support the "
-            "deployment configuration."
-        )
-
-    def _return_or_raise(
-        backend: Mxfp4MoeBackend,
-        config: FusedMoEConfig,
-        weight_key: QuantKey | None,
-        activation_key: QuantKey | None,
-        activation_format: mk.FusedMoEActivationFormat,
-    ) -> tuple[Mxfp4MoeBackend, type[mk.FusedMoEExperts]]:
-        reason: str | None = None
-        for k_cls in backend_to_kernel_cls(backend):
-            supported, reason = k_cls.is_supported_config(
-                k_cls, config, weight_key, activation_key, activation_format
-            )
-            if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
-                return backend, k_cls
-        raise ValueError(_make_log_unsupported(backend, reason))
-
     runner_backend = config.moe_backend
     if runner_backend != "auto":
-        requested_backend = map_mxfp4_backend(runner_backend)
-        if (
-            activation_format == mk.FusedMoEActivationFormat.BatchedExperts
-            and requested_backend == Mxfp4MoeBackend.MARLIN
-        ):
-            requested_backend = Mxfp4MoeBackend.BATCHED_MARLIN
-        return _return_or_raise(
-            requested_backend,
-            config,
-            kMxfp4Static,
-            _backend_activation_key(requested_backend),
-            activation_format,
-        )
+        requested_backends = map_mxfp4_backend(runner_backend)
+        if activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+            requested_backends = [
+                Mxfp4MoeBackend.BATCHED_MARLIN if b == Mxfp4MoeBackend.MARLIN else b
+                for b in requested_backends
+            ]
+        candidates = _filter_by_activation(requested_backends, requested_activation_key)
+        if not candidates:
+            raise ValueError(
+                f"moe_backend={runner_backend!r} does not support "
+                f"activation={requested_activation_key}; supported variants: "
+                f"{[b.name for b in requested_backends]}"
+            )
+        last_error: Exception | None = None
+        for requested_backend in candidates:
+            act_key = (
+                requested_activation_key
+                if requested_activation_key is not None
+                else _backend_activation_key(requested_backend)
+            )
+            try:
+                return _return_or_raise(
+                    requested_backend,
+                    config,
+                    kMxfp4Static,
+                    act_key,
+                    activation_format,
+                )
+            except ValueError as e:
+                last_error = e
+        assert last_error is not None
+        raise last_error
 
     # Select kernels in order of backend.
-    AVAILABLE_BACKENDS = _get_priority_backends()
+    AVAILABLE_BACKENDS = _filter_by_activation(
+        _get_priority_backends_for_gpt_oss(), requested_activation_key
+    )
 
     # Handle explicit FlashInfer MXFP4 BF16 configuration.
     if envs.is_set("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16"):
         if not envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16:
-            AVAILABLE_BACKENDS.remove(Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16)
-            AVAILABLE_BACKENDS.remove(Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16)
+            for _b in (
+                Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
+                Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
+            ):
+                if _b in AVAILABLE_BACKENDS:
+                    AVAILABLE_BACKENDS.remove(_b)
         else:
             if current_platform.is_device_capability(90):
                 return _return_or_raise(
@@ -343,16 +533,21 @@ def _return_or_raise(
         )
 
     for backend in AVAILABLE_BACKENDS:
-        activation_key = _backend_activation_key(backend)
+        # Use requested_activation_key if provided, otherwise use backend default
+        act_key = (
+            requested_activation_key
+            if requested_activation_key is not None
+            else _backend_activation_key(backend)
+        )
         for k_cls in backend_to_kernel_cls(backend):
             supported, reason = k_cls.is_supported_config(
-                k_cls, config, kMxfp4Static, activation_key, activation_format
+                k_cls, config, kMxfp4Static, act_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     if current_platform.is_xpu():
         backend = Mxfp4MoeBackend.XPU
@@ -365,19 +560,109 @@ def _return_or_raise(
             activation_format,
         )
 
+    if current_platform.is_cpu():
+        backend = Mxfp4MoeBackend.CPU
+        logger.info_once(_make_log_backend(backend))
+        return _return_or_raise(
+            Mxfp4MoeBackend.CPU,
+            config,
+            kMxfp4Static,
+            None,
+            activation_format,
+        )
+
     if current_platform.is_cuda() or current_platform.is_rocm():
         raise NotImplementedError(
-            "No MXFP4 MoE backend supports the deployment configuration."
+            "No MXFP4 MoE backend supports the deployment configuration. "
+            f"weight_key=kMxfp4Static, activation_key={activation_key}. "
+            "Native backends require specific hardware. "
+            "Set `VLLM_LOGGING_LEVEL=DEBUG` to see detailed unsupported reasons. "
+            "To use the emulation backend for research/debugging, pass "
+            "--moe-backend emulation."
         )
 
     return Mxfp4MoeBackend.NONE, None
 
 
+def select_deepseek_v4_mxfp4_moe_backend(
+    config: FusedMoEConfig,
+) -> tuple[Mxfp4MoeBackend, type[mk.FusedMoEExperts] | None]:
+    """
+    Select the MXFP4 MoE backend with MXFP8 activation as top priority.
+    Falls back through BF16 and other backends.
+    """
+    activation_format = (
+        mk.FusedMoEActivationFormat.BatchedExperts
+        if config.moe_parallel_config.use_batched_activation_format
+        else mk.FusedMoEActivationFormat.Standard
+    )
+
+    # Honor explicit moe_backend (e.g. "marlin", "triton_unfused") before
+    # falling back to the auto priority list.
+    runner_backend = config.moe_backend
+    if runner_backend != "auto":
+        requested_backends = map_mxfp4_backend(runner_backend)
+        if activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+            requested_backends = [
+                Mxfp4MoeBackend.BATCHED_MARLIN if b == Mxfp4MoeBackend.MARLIN else b
+                for b in requested_backends
+            ]
+        last_error: Exception | None = None
+        for requested_backend in requested_backends:
+            try:
+                return _return_or_raise(
+                    requested_backend,
+                    config,
+                    kMxfp4Static,
+                    _backend_activation_key(requested_backend),
+                    activation_format,
+                )
+            except ValueError as e:
+                last_error = e
+        assert last_error is not None
+        raise last_error
+
+    # DeepSeek-V4 on ROCm is more accurate with the unfused Triton MXFP4 path
+    # than the default AITER path. Prefer Triton-unfused for this routing mode,
+    # while keeping AITER as a fallback if Triton-unfused rejects the config.
+    if (
+        current_platform.is_rocm()
+        and config.routing_method == RoutingMethodType.DeepseekV4
+    ):
+        priority_backends = [
+            Mxfp4MoeBackend.TRITON_UNFUSED,
+            Mxfp4MoeBackend.AITER_MXFP4_BF16,
+        ]
+    else:
+        priority_backends = _get_priority_backends()
+
+    # Iterate priority backends: TRTLLM MXFP8, then Triton.
+    for backend in priority_backends:
+        activation_key = _backend_activation_key(backend)
+        for k_cls in backend_to_kernel_cls(backend):
+            supported, reason = k_cls.is_supported_config(
+                k_cls, config, kMxfp4Static, activation_key, activation_format
+            )
+            if supported:
+                logger.info_once(_make_log_backend(backend), scope="local")
+                return backend, k_cls
+            else:
+                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+
+    raise NotImplementedError(
+        "No MXFP4 MoE backend supports the deployment configuration."
+    )
+
+
 def mxfp4_round_up_hidden_size_and_intermediate_size(
     backend: Mxfp4MoeBackend, hidden_size: int, intermediate_size: int
 ) -> tuple[int, int]:
     """Round up hidden_size and intermediate_size based on backend requirements."""
-    if backend in (Mxfp4MoeBackend.MARLIN, Mxfp4MoeBackend.BATCHED_MARLIN):
+    if backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        # DeepGEMM requires M/N/K alignment
+        intermediate_size = round_up(intermediate_size, 128)
+        hidden_size = round_up(hidden_size, 128)
+    elif backend in (Mxfp4MoeBackend.MARLIN, Mxfp4MoeBackend.BATCHED_MARLIN):
         intermediate_size = round_up(intermediate_size, 128)
         if current_platform.is_xpu():
             hidden_size = round_up(hidden_size, 128)
@@ -395,12 +680,16 @@ def mxfp4_round_up_hidden_size_and_intermediate_size(
     elif current_platform.is_rocm():
         intermediate_size = round_up(intermediate_size, 256)
         hidden_size = round_up(hidden_size, 256)
+    elif backend == Mxfp4MoeBackend.CPU:
+        # CPU AMX kernel uses BLOCK_N=32, align to 32
+        intermediate_size = round_up(intermediate_size, 32)
+        hidden_size = round_up(hidden_size, 32)
     else:
         intermediate_size = round_up(intermediate_size, 64)
     return hidden_size, intermediate_size
 
 
-def convert_to_mxfp4_moe_kernel_format(
+def convert_gpt_oss_weight_to_mxfp4_moe_kernel_format(
     mxfp4_backend: Mxfp4MoeBackend,
     layer: torch.nn.Module,
     w13_weight: torch.Tensor,
@@ -420,13 +709,44 @@ def convert_to_mxfp4_moe_kernel_format(
 ]:
     """Convert loaded weights into backend-specific kernel format."""
 
+    if mxfp4_backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+            _upcast_e8m0_to_fp32,
+        )
+
+        return (
+            w13_weight.data,
+            w2_weight.data,
+            _upcast_e8m0_to_fp32(w13_weight_scale.data),
+            _upcast_e8m0_to_fp32(w2_weight_scale.data),
+            w13_bias,
+            w2_bias,
+        )
+
     num_experts = w13_weight.shape[0]
     intermediate_size = w13_weight.shape[1] // 2
     hidden_size = w13_weight.shape[2] * 2
 
     sf_block_size = 32  # mxfp4 block size
 
-    if mxfp4_backend in (Mxfp4MoeBackend.MARLIN, Mxfp4MoeBackend.BATCHED_MARLIN):
+    if mxfp4_backend == Mxfp4MoeBackend.HUMMING:
+        from vllm.model_executor.layers.quantization.utils.humming_utils import (
+            prepare_humming_moe_layer,
+        )
+
+        prepare_humming_moe_layer(layer, {"quant_method": "gpt_oss_mxfp4"})
+        return (
+            layer.w13_weight,
+            layer.w2_weight,
+            layer.w13_weight_scale,
+            layer.w2_weight_scale,
+            getattr(layer, "w13_bias", None),
+            getattr(layer, "w2_bias", None),
+        )
+    elif mxfp4_backend in (
+        Mxfp4MoeBackend.MARLIN,
+        Mxfp4MoeBackend.BATCHED_MARLIN,
+    ):
         from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
             prepare_moe_mxfp4_layer_for_marlin,
         )
@@ -632,31 +952,77 @@ def swap_every_two_rows(x, axis=-1):
         else:
             assert mxfp4_backend == Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16
 
-            def _interleave_mxfp4_cutlass_sm90(w):
-                w_shape = w.shape
-                w_interleaved = w.reshape(w_shape[0], w_shape[1], (w_shape[2] // 4), 4)
-                w_interleaved = w_interleaved.permute(0, 2, 1, 3)
-                w_interleaved = w_interleaved.reshape(
-                    w_shape[0], w_shape[2] // 4, w_shape[1] * 4
-                )
-                return w_interleaved
-
-            w31_scales = w13_scale_swapped.to(torch.uint8)
-            w31_scales_interleaved = _interleave_mxfp4_cutlass_sm90(w31_scales)
+            from flashinfer.fused_moe import (
+                interleave_moe_scales_for_sm90_mixed_gemm,
+                interleave_moe_weights_for_sm90_mixed_gemm,
+            )
 
-            w2_scale = w2_weight_scale.data.to(torch.uint8)
-            w2_scale_interleaved = _interleave_mxfp4_cutlass_sm90(w2_scale)
+            w13_weight_interleaved = interleave_moe_weights_for_sm90_mixed_gemm(
+                w13_weight_swapped.contiguous(), "fp4"
+            )
+            w2_weight_interleaved = interleave_moe_weights_for_sm90_mixed_gemm(
+                w2_weight.contiguous(), "fp4"
+            )
+            w31_scales_interleaved = interleave_moe_scales_for_sm90_mixed_gemm(
+                w13_scale_swapped.to(torch.uint8)
+            )
+            w2_scale_interleaved = interleave_moe_scales_for_sm90_mixed_gemm(
+                w2_weight_scale.data.to(torch.uint8)
+            )
 
             return (
-                w13_weight_swapped,
-                w2_weight,
+                w13_weight_interleaved,
+                w2_weight_interleaved,
                 w31_scales_interleaved,
                 w2_scale_interleaved,
                 w13_bias_swapped,
                 w2_bias,
             )
 
-    elif mxfp4_backend == Mxfp4MoeBackend.CK:
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4:
+        from vllm._aiter_ops import rocm_aiter_ops
+
+        if w13_bias is not None:
+            w13_bias = w13_bias.data.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.data.to(torch.float32)
+
+        # e8m0_shuffle on weight scales (GFX950 swizzle layout)
+        from aiter.utility.fp4_utils import e8m0_shuffle
+
+        s0, s1, _ = w13_weight_scale.shape
+        w13_weight_scale.data = e8m0_shuffle(w13_weight_scale.view(s0 * s1, -1)).view(
+            s0, s1, -1
+        )
+
+        s0, s1, _ = w2_weight_scale.shape
+        w2_weight_scale.data = e8m0_shuffle(w2_weight_scale.view(s0 * s1, -1)).view(
+            s0, s1, -1
+        )
+
+        # View as native FP4 dtype
+        fp4_dtype = getattr(torch, "float4_e2m1fn_x2", None)
+        if fp4_dtype is not None:
+            w13_weight.data = w13_weight.data.view(fp4_dtype)
+            w2_weight.data = w2_weight.data.view(fp4_dtype)
+
+        # Shuffle weights for AITER CK kernel
+        shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights(
+            w13_weight, w2_weight
+        )
+        shuffled_w13.is_shuffled = True
+        shuffled_w2.is_shuffled = True
+
+        return (
+            shuffled_w13,
+            shuffled_w2,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_bias,
+            w2_bias,
+        )
+
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_BF16:
         from vllm._aiter_ops import rocm_aiter_ops
 
         if w13_bias is not None:
@@ -718,12 +1084,70 @@ def _interleave_mxfp4_cutlass_sm90(w):
             w2_bias,
         )
 
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_FP8:
+        # W4A8: MXFP4 weights + static FP8 activations (triton kernel)
+        from triton_kernels.matmul_ogs import FlexCtx, PrecisionConfig
+        from triton_kernels.numerics import InFlexData
+
+        if w13_bias is not None:
+            w13_bias = w13_bias.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.to(torch.float32)
+
+        # Process static FP8 input scales (reduce to scalar, warn if not uniform)
+        w13_input_scale = layer.w13_input_scale
+        w2_input_scale = layer.w2_input_scale
+        if w13_input_scale is None or w2_input_scale is None:
+            raise ValueError(
+                "W4A8 (AITER_MXFP4_FP8) requires static input scales, but found "
+                "w13_input_scale or w2_input_scale is None."
+            )
+        if not all_close_1d(w13_input_scale) or not all_close_1d(w2_input_scale):
+            logger.warning_once(
+                "Found input_scales that are not equal for "
+                "fp8 MoE layer. Using the maximum across experts "
+                "for each layer."
+            )
+        w13_input_scale = w13_input_scale.max().to(torch.float32)
+        w2_input_scale = w2_input_scale.max().to(torch.float32)
+
+        # Swizzle weights for GFX950
+        w13_weight, w13_flex, w13_scale = _swizzle_mxfp4(w13_weight, w13_weight_scale)
+        w2_weight, w2_flex, w2_scale = _swizzle_mxfp4(w2_weight, w2_weight_scale)
+
+        # Create InFlexData for activation scales
+        lhs_data13 = InFlexData(scale=w13_input_scale)
+        lhs_data2 = InFlexData(scale=w2_input_scale)
+
+        # Create PrecisionConfig with both weight and activation info
+        w13_precision_config = PrecisionConfig(
+            weight_scale=w13_scale,
+            flex_ctx=FlexCtx(rhs_data=w13_flex, lhs_data=lhs_data13),
+        )
+        w2_precision_config = PrecisionConfig(
+            weight_scale=w2_scale,
+            flex_ctx=FlexCtx(rhs_data=w2_flex, lhs_data=lhs_data2),
+        )
+
+        del layer.w13_weight
+        del layer.w2_weight
+
+        return (
+            w13_weight,
+            w2_weight,
+            w13_precision_config,
+            w2_precision_config,
+            w13_bias,
+            w2_bias,
+        )
+
     elif mxfp4_backend in TRITON_BACKENDS:
         from triton_kernels.matmul_ogs import FlexCtx, PrecisionConfig
 
-        assert w13_bias is not None and w2_bias is not None
-        w13_bias = w13_bias.to(torch.float32)
-        w2_bias = w2_bias.to(torch.float32)
+        if w13_bias is not None:
+            w13_bias = w13_bias.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.to(torch.float32)
 
         w13_weight, w13_flex, w13_scale = _swizzle_mxfp4(
             w13_weight,
@@ -762,6 +1186,42 @@ def _interleave_mxfp4_cutlass_sm90(w):
             w13_bias,
             w2_bias,
         )
+    elif mxfp4_backend == Mxfp4MoeBackend.CPU:
+        from vllm.model_executor.layers.fused_moe.experts.cpu_moe import (
+            prepare_mxfp4_moe_layer_for_cpu,
+        )
+
+        packed_w13, packed_w2, packed_w13_scale, packed_w2_scale = (
+            prepare_mxfp4_moe_layer_for_cpu(
+                w13_weight.data,
+                w2_weight.data,
+                w13_weight_scale.data,
+                w2_weight_scale.data,
+            )
+        )
+        if w13_bias is not None:
+            w13_bias = w13_bias.data.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.data.to(torch.float32)
+        return (
+            packed_w13,
+            packed_w2,
+            packed_w13_scale,
+            packed_w2_scale,
+            w13_bias,
+            w2_bias,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.EMULATION:
+        # No additional transformation needed for emulation backend,
+        # weights are dequantized on the fly in the experts class.
+        return (
+            w13_weight,
+            w2_weight,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_bias,
+            w2_bias,
+        )
     else:
         raise ValueError(
             f"Unsupported mxfp4_backend: {mxfp4_backend}: "
@@ -769,23 +1229,403 @@ def _interleave_mxfp4_cutlass_sm90(w):
         )
 
 
+def convert_weight_to_mxfp4_moe_kernel_format(
+    mxfp4_backend: Mxfp4MoeBackend,
+    layer: torch.nn.Module,
+    w13_weight: torch.Tensor,
+    w2_weight: torch.Tensor,
+    w13_weight_scale: torch.Tensor,
+    w2_weight_scale: torch.Tensor,
+    w13_bias: torch.Tensor | None = None,
+    w2_bias: torch.Tensor | None = None,
+    _cache_permute_indices: dict[torch.Size, torch.Tensor] | None = None,
+) -> tuple[
+    torch.Tensor,
+    torch.Tensor,
+    Union[torch.Tensor, "PrecisionConfig"],
+    Union[torch.Tensor, "PrecisionConfig"],
+    torch.Tensor | None,
+    torch.Tensor | None,
+]:
+    """Convert loaded weights into backend-specific kernel format.
+
+    Supports DeepGEMM, TRTLLM MXFP8, Triton and Marlin backends.
+    """
+
+    if mxfp4_backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+            _upcast_e8m0_to_fp32,
+        )
+
+        # Weights stay as uint8 packed FP4 — no layout change needed.
+        # Convert E8M0 uint8 scales to float32.
+        return (
+            w13_weight.data,
+            w2_weight.data,
+            _upcast_e8m0_to_fp32(w13_weight_scale.data),
+            _upcast_e8m0_to_fp32(w2_weight_scale.data),
+            w13_bias,
+            w2_bias,
+        )
+
+    if mxfp4_backend == Mxfp4MoeBackend.HUMMING:
+        from vllm.model_executor.layers.quantization.utils.humming_utils import (
+            prepare_humming_moe_layer,
+        )
+
+        prepare_humming_moe_layer(layer, {"quant_method": "mxfp4"})
+        return (
+            layer.w13_weight,
+            layer.w2_weight,
+            layer.w13_weight_scale,
+            layer.w2_weight_scale,
+            getattr(layer, "w13_bias", None),
+            getattr(layer, "w2_bias", None),
+        )
+
+    if mxfp4_backend in (Mxfp4MoeBackend.MARLIN, Mxfp4MoeBackend.BATCHED_MARLIN):
+        from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+            prepare_moe_mxfp4_layer_for_marlin,
+        )
+
+        return prepare_moe_mxfp4_layer_for_marlin(
+            layer,
+            w13_weight,
+            w2_weight,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_bias,
+            w2_bias,
+        )
+
+    num_experts = w13_weight.shape[0]
+    intermediate_size = w13_weight.shape[1] // 2
+    hidden_size = w13_weight.shape[2] * 2
+
+    sf_block_size = 32  # mxfp4 block size
+
+    if mxfp4_backend in TRTLLM_BACKENDS:
+        assert _cache_permute_indices is not None
+        from flashinfer.fp4_quantization import nvfp4_block_scale_interleave
+        from flashinfer.fused_moe.core import get_w2_permute_indices_with_cache
+
+        w13_weight = w13_weight.data
+        w2_weight = w2_weight.data
+        w13_weight_scale = w13_weight_scale.data
+        w2_weight_scale = w2_weight_scale.data
+        if w13_bias is not None:
+            w13_bias = w13_bias.data.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.data.to(torch.float32)
+
+        # Swap w1/w3 and interleave to match TRTLLM SwiGLU convention.
+        # Standard loading gives contiguous [w1/gate, w3/up].
+        # TRTLLM kernel expects interleaved [w3_0, w1_0, w3_1, w1_1, ...].
+        w1_weight = w13_weight[:, :intermediate_size, :]
+        w3_weight = w13_weight[:, intermediate_size:, :]
+        w13_weight = torch.stack([w3_weight, w1_weight], dim=2).reshape(
+            w13_weight.shape
+        )
+
+        w1_scale = w13_weight_scale[:, :intermediate_size, :]
+        w3_scale = w13_weight_scale[:, intermediate_size:, :]
+        w13_weight_scale = torch.stack([w3_scale, w1_scale], dim=2).reshape(
+            w13_weight_scale.shape
+        )
+
+        if w13_bias is not None:
+            b1 = w13_bias[:, :intermediate_size]
+            b3 = w13_bias[:, intermediate_size:]
+            w13_bias = torch.stack([b3, b1], dim=2).reshape(w13_bias.shape)
+
+        # Shuffle weights and scaling factors for transposed mma output.
+        # Permute indices depend only on shape (cached by torch.Size),
+        # so compute once and apply to all experts via batched indexing.
+        epilogue_tile_m = 128
+
+        # w13 weight permute
+        w13_perm = get_w2_permute_indices_with_cache(
+            _cache_permute_indices,
+            w13_weight[0].view(torch.uint8),
+            epilogue_tile_m,
+        ).to(w13_weight.device)
+        w13_weight = w13_weight.view(torch.uint8)[:, w13_perm].contiguous()
+
+        # w13 scale permute + interleave
+        w13_sf_perm = get_w2_permute_indices_with_cache(
+            _cache_permute_indices,
+            w13_weight_scale[0].view(torch.uint8),
+            epilogue_tile_m,
+            num_elts_per_sf=16,
+        ).to(w13_weight_scale.device)
+        w13_s = w13_weight_scale.view(torch.uint8)[:, w13_sf_perm].contiguous()
+        E, N_s, K_s = w13_s.shape
+        w13_weight_scale = (
+            nvfp4_block_scale_interleave(w13_s.reshape(E * N_s, K_s))
+            .reshape(num_experts, 2 * intermediate_size, hidden_size // sf_block_size)
+            .view(torch.float8_e4m3fn)
+        )
+
+        # w2 weight permute
+        w2_perm = get_w2_permute_indices_with_cache(
+            _cache_permute_indices,
+            w2_weight[0].view(torch.uint8),
+            epilogue_tile_m,
+        ).to(w2_weight.device)
+        w2_weight = w2_weight.view(torch.uint8)[:, w2_perm].contiguous()
+
+        # w2 scale permute + interleave
+        w2_sf_perm = get_w2_permute_indices_with_cache(
+            _cache_permute_indices,
+            w2_weight_scale[0].view(torch.uint8),
+            epilogue_tile_m,
+            num_elts_per_sf=16,
+        ).to(w2_weight_scale.device)
+        w2_s = w2_weight_scale.view(torch.uint8)[:, w2_sf_perm].contiguous()
+        E2, N2_s, K2_s = w2_s.shape
+        w2_weight_scale = (
+            nvfp4_block_scale_interleave(w2_s.reshape(E2 * N2_s, K2_s))
+            .reshape(num_experts, hidden_size, intermediate_size // sf_block_size)
+            .view(torch.float8_e4m3fn)
+        )
+
+        # w13 bias permute
+        if w13_bias is not None:
+            w13_b_perm = get_w2_permute_indices_with_cache(
+                _cache_permute_indices,
+                w13_bias[0].reshape(-1, 1),
+                epilogue_tile_m,
+            ).to(w13_bias.device)
+            w13_bias = w13_bias.reshape(num_experts, -1, 1)[:, w13_b_perm].reshape(
+                num_experts, -1
+            )
+
+        # w2 bias permute
+        if w2_bias is not None:
+            w2_b_perm = get_w2_permute_indices_with_cache(
+                _cache_permute_indices,
+                w2_bias[0].reshape(-1, 1),
+                epilogue_tile_m,
+            ).to(w2_bias.device)
+            w2_bias = w2_bias.reshape(num_experts, -1, 1)[:, w2_b_perm].reshape(
+                num_experts, -1
+            )
+
+        return (
+            w13_weight,
+            w2_weight,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_bias,
+            w2_bias,
+        )
+
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_BF16:
+        from vllm._aiter_ops import rocm_aiter_ops
+
+        if w13_bias is not None:
+            w13_bias = w13_bias.data.to(torch.float32)
+        if w2_bias is not None:
+            w2_bias = w2_bias.data.to(torch.float32)
+
+        e, n, k = w13_weight.shape
+
+        w13_weight.view(torch.uint8).copy_(
+            w13_weight.data.view(torch.uint8)
+            .view(e, n // 2, 2, k)
+            .permute(0, 2, 1, 3)
+            .contiguous()
+            .view(e, n, k)
+        )
+        w13_weight_scale.data = (
+            w13_weight_scale.data.view(e, n // 2, 2, -1)
+            .permute(0, 2, 1, 3)
+            .contiguous()
+            .view(e, n, -1)
+        )
+
+        w13_weight.data = w13_weight.data.view(torch.float4_e2m1fn_x2)
+        w2_weight.data = w2_weight.data.view(torch.float4_e2m1fn_x2)
+
+        w13_weight.data = rocm_aiter_ops.shuffle_weight_a16w4(w13_weight, 16, True)
+        shuffled_w13_scale = rocm_aiter_ops.shuffle_scale_a16w4(
+            w13_weight_scale.view(-1, w13_weight_scale.shape[-1]),
+            num_experts,
+            True,
+        )
+
+        w2_weight.data = rocm_aiter_ops.shuffle_weight_a16w4(w2_weight, 16, False)
+        shuffled_w2_scale = rocm_aiter_ops.shuffle_scale_a16w4(
+            w2_weight_scale.view(-1, w2_weight_scale.shape[-1]),
+            num_experts,
+            False,
+        )
+
+        if w13_bias is not None:
+            w13_bias = (
+                w13_bias.data.view(-1, n // 2, 2)
+                .permute(0, 2, 1)
+                .contiguous()
+                .view(-1, n)
+            )
+
+        return (
+            w13_weight,
+            w2_weight,
+            shuffled_w13_scale,
+            shuffled_w2_scale,
+            w13_bias,
+            w2_bias,
+        )
+
+    elif mxfp4_backend in TRITON_BACKENDS:
+        from triton_kernels.matmul_ogs import FlexCtx, PrecisionConfig
+
+        if mxfp4_backend == Mxfp4MoeBackend.TRITON:
+
+            def shuffle_weight(w: torch.Tensor) -> torch.Tensor:
+                shape = w.shape
+                n = shape[-1]
+                first = w[..., : n // 2]
+                second = w[..., n // 2 :]
+                stacked = torch.stack((first, second), dim=-1)
+                return stacked.reshape(shape)
+
+            w13_weight = shuffle_weight(w13_weight)
+            w13_weight_scale = shuffle_weight(w13_weight_scale)
+
+            if w13_bias is not None:
+                w13_bias = shuffle_weight(w13_bias.to(torch.float32))
+        else:
+            if w13_bias is not None:
+                w13_bias = w13_bias.to(torch.float32)
+
+        if w2_bias is not None:
+            w2_bias = w2_bias.to(torch.float32)
+
+        w13_weight, w13_flex, w13_scale = _swizzle_mxfp4(
+            w13_weight,
+            w13_weight_scale,
+        )
+        w2_weight, w2_flex, w2_scale = _swizzle_mxfp4(
+            w2_weight,
+            w2_weight_scale,
+        )
+
+        w13_precision_config = PrecisionConfig(
+            weight_scale=w13_scale, flex_ctx=FlexCtx(rhs_data=w13_flex)
+        )
+        w2_precision_config = PrecisionConfig(
+            weight_scale=w2_scale, flex_ctx=FlexCtx(rhs_data=w2_flex)
+        )
+
+        del layer.w13_weight
+        del layer.w2_weight
+
+        return (
+            w13_weight,
+            w2_weight,
+            w13_precision_config,
+            w2_precision_config,
+            w13_bias,
+            w2_bias,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.XPU:
+        # No additional transformation needed for XPU backend
+        return (
+            w13_weight,
+            w2_weight,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_bias,
+            w2_bias,
+        )
+    else:
+        raise ValueError(
+            f"Unsupported mxfp4_backend for Mxfp4MoEMethod: {mxfp4_backend}. "
+            f"Expected TRTLLM, Triton, AITER, or XPU backend."
+        )
+
+
 def make_mxfp4_moe_quant_config(
     mxfp4_backend: Mxfp4MoeBackend,
     w1_scale: Union[torch.Tensor, "PrecisionConfig"],
     w2_scale: Union[torch.Tensor, "PrecisionConfig"],
+    gemm1_alpha: float | None = None,
+    gemm1_beta: float | None = None,
+    swiglu_limit: float | None = None,
     w1_bias: torch.Tensor | None = None,
     w2_bias: torch.Tensor | None = None,
+    a1_scale: torch.Tensor | None = None,
+    a2_scale: torch.Tensor | None = None,
+    layer: torch.nn.Module | None = None,
 ) -> FusedMoEQuantConfig | None:
     """Create a FusedMoEQuantConfig for the given MXFP4 backend."""
-    if mxfp4_backend in (
-        Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8,
-        Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8,
-    ):
+    if mxfp4_backend == Mxfp4MoeBackend.DEEPGEMM_MXFP4:
+        from vllm.model_executor.layers.quantization.utils.quant_utils import (
+            GroupShape,
+        )
+
+        # DeepGEMM FP4 uses FP8 per-token-group activation quantization
+        # with block 128, matching the FP8 DeepGEMM path.
+        _fp8_dtype = current_platform.fp8_dtype()
+        _block_shape = GroupShape(128, 128)
+        return FusedMoEQuantConfig(
+            _a1=FusedMoEQuantDesc(_fp8_dtype, _block_shape, None, None, None, None),
+            _a2=FusedMoEQuantDesc(_fp8_dtype, _block_shape, None, None, None, None),
+            _w1=FusedMoEQuantDesc("mxfp4", None, w1_scale, None, None, w1_bias),
+            _w2=FusedMoEQuantDesc("mxfp4", None, w2_scale, None, None, w2_bias),
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8:
+        # TRTLLM kernel expects non-swizzled mxfp8 activation scales.
         return mxfp4_mxfp8_moe_quant_config(
             w1_bias=w1_bias,
             w2_bias=w2_bias,
             w1_scale=w1_scale,
             w2_scale=w2_scale,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
+            mx_alignment=256,
+            is_scale_swizzled=False,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_MXFP8:
+        # CUTLASS kernel expects swizzled mxfp8 activation scales.
+        return mxfp4_mxfp8_moe_quant_config(
+            w1_bias=w1_bias,
+            w2_bias=w2_bias,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
+            is_scale_swizzled=True,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_FP8:
+        # W4A8: MXFP4 weights + static FP8 activations
+        return mxfp4_w4a8_moe_quant_config(
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            a1_scale=a1_scale,
+            a2_scale=a2_scale,
+            w1_bias=w1_bias,
+            w2_bias=w2_bias,
+            block_shape=None,
+            gemm1_clamp_limit=swiglu_limit,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4:
+        return ocp_mx_moe_quant_config(
+            quant_dtype="mxfp4",
+            w1_bias=w1_bias,
+            w2_bias=w2_bias,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
         )
     elif mxfp4_backend in (
         Mxfp4MoeBackend.MARLIN,
@@ -794,13 +1634,30 @@ def make_mxfp4_moe_quant_config(
         Mxfp4MoeBackend.TRITON_UNFUSED,
         Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_BF16,
         Mxfp4MoeBackend.FLASHINFER_CUTLASS_MXFP4_BF16,
-        Mxfp4MoeBackend.CK,
+        Mxfp4MoeBackend.AITER_MXFP4_BF16,
+        Mxfp4MoeBackend.CPU,
     ):
         return mxfp4_w4a16_moe_quant_config(
             w1_bias=w1_bias,
             w2_bias=w2_bias,
             w1_scale=w1_scale,
             w2_scale=w2_scale,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
+        )
+    elif mxfp4_backend == Mxfp4MoeBackend.HUMMING:
+        from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+        from vllm.model_executor.layers.quantization.utils.humming_utils import (
+            get_humming_moe_quant_config,
+        )
+
+        assert isinstance(layer, FusedMoE)
+        return get_humming_moe_quant_config(
+            layer,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
         )
     else:
         return ocp_mx_moe_quant_config(
@@ -809,6 +1666,9 @@ def make_mxfp4_moe_quant_config(
             w2_bias=w2_bias,
             w1_scale=w1_scale,
             w2_scale=w2_scale,
+            gemm1_alpha=gemm1_alpha,
+            gemm1_beta=gemm1_beta,
+            gemm1_clamp_limit=swiglu_limit,
         )
 
 
@@ -818,12 +1678,11 @@ def make_mxfp4_moe_kernel(
     experts_cls: type[mk.FusedMoEExperts],
     mxfp4_backend: Mxfp4MoeBackend,
     routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    shared_experts: torch.nn.Module | None = None,
+    layer: "RoutedExperts | None" = None,
 ) -> mk.FusedMoEKernel:
     """Create a FusedMoEKernel for the given MXFP4 backend."""
     is_monolithic = issubclass(experts_cls, mk.FusedMoEExpertsMonolithic)
 
-    # Create Prepare/Finalize.
     prepare_finalize = maybe_make_prepare_finalize(
         moe=moe_config,
         quant_config=moe_quant_config,
@@ -833,7 +1692,12 @@ def make_mxfp4_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
+
+    extra_kwargs = {}
+    if mxfp4_backend == Mxfp4MoeBackend.HUMMING:
+        assert layer is not None
+        extra_kwargs["layer"] = layer
 
     # Create Experts.
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
@@ -844,22 +1708,18 @@ def make_mxfp4_moe_kernel(
             quant_config=moe_quant_config,
             max_num_tokens=max_num_tokens,
             num_dispatchers=prepare_finalize.num_dispatchers(),
+            **extra_kwargs,
         )
     else:
         experts = experts_cls(
             moe_config=moe_config,
             quant_config=moe_quant_config,
+            **extra_kwargs,
         )
 
     kernel = mk.FusedMoEKernel(
         prepare_finalize,
         experts,
-        shared_experts=(
-            shared_experts
-            if moe_config.moe_parallel_config.use_deepep_ll_kernels
-            else None
-        ),
-        moe_parallel_config=moe_config.moe_parallel_config,
         inplace=(
             not moe_config.disable_inplace and mxfp4_backend not in TRTLLM_BACKENDS
         ),
diff --git a/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py b/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py
index ed3af4b5a474..64e6cb93fa80 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/mxfp8.py
@@ -15,14 +15,16 @@
 
 logger = init_logger(__name__)
 
-_SUPPORTED_BACKENDS: frozenset[Fp8MoeBackend] = frozenset(
-    {
-        Fp8MoeBackend.FLASHINFER_TRTLLM,
-    }
+_SUPPORTED_BACKENDS = (
+    Fp8MoeBackend.FLASHINFER_TRTLLM,
+    Fp8MoeBackend.MARLIN,
+    Fp8MoeBackend.XPU,
 )
 
 _BACKEND_NAME_MAP: dict[str, Fp8MoeBackend] = {
     "flashinfer_trtllm": Fp8MoeBackend.FLASHINFER_TRTLLM,
+    "marlin": Fp8MoeBackend.MARLIN,
+    "xpu": Fp8MoeBackend.XPU,
 }
 
 
@@ -61,8 +63,6 @@ def select_mxfp8_moe_backend(
     Returns:
         A tuple of (fp8_backend, experts_cls).
     """
-    if config.is_lora_enabled:
-        raise NotImplementedError("LoRA is not supported for MXFP8 MoE.")
 
     runner_backend = config.moe_backend
     if runner_backend != "auto":
@@ -81,7 +81,11 @@ def select_mxfp8_moe_backend(
 
     # Auto-select: pick the first supported backend.
     for backend in _SUPPORTED_BACKENDS:
+        try:
+            experts_cls = _select_kernel_cls(backend, config)
+        except ValueError:
+            continue
         logger.info_once("Using '%s' MxFp8 MoE backend.", backend.value)
-        return backend, _select_kernel_cls(backend, config)
+        return backend, experts_cls
 
     raise ValueError("No MXFP8 MoE backends available.")
diff --git a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
index 35451e87dd7d..5a2c64cecc00 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
@@ -19,6 +19,7 @@
 )
 from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import (
     prepare_nvfp4_moe_layer_for_fi_or_cutlass,
+    prepare_nvfp4_moe_layer_for_flashinfer_cutedsl,
 )
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
     FlashinferMoeBackend,
@@ -27,6 +28,9 @@
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
     prepare_nvfp4_moe_layer_for_marlin,
 )
+from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
+    kE2M1ToFloat_handle,
+)
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
 )
@@ -38,14 +42,19 @@ class NvFp4MoeBackend(Enum):
     FLASHINFER_TRTLLM = "FLASHINFER_TRTLLM"
     FLASHINFER_CUTLASS = "FLASHINFER_CUTLASS"
     FLASHINFER_CUTEDSL = "FLASHINFER_CUTEDSL"
+    FLASHINFER_CUTEDSL_BATCHED = "FLASHINFER_CUTEDSL_BATCHED"
+    FLASHINFER_B12X = "FLASHINFER_B12X"
     VLLM_CUTLASS = "VLLM_CUTLASS"
     MARLIN = "MARLIN"
+    EMULATION = "EMULATION"
 
 
 FLASHINFER_NVFP4_MOE_BACKENDS = [
     NvFp4MoeBackend.FLASHINFER_TRTLLM,
     NvFp4MoeBackend.FLASHINFER_CUTLASS,
     NvFp4MoeBackend.FLASHINFER_CUTEDSL,
+    NvFp4MoeBackend.FLASHINFER_CUTEDSL_BATCHED,
+    NvFp4MoeBackend.FLASHINFER_B12X,
 ]
 
 fi_2_vllm_backend_map: dict[FlashinferMoeBackend, NvFp4MoeBackend] = {
@@ -79,7 +88,7 @@ def backend_to_kernel_cls(
         ]
 
     elif backend == NvFp4MoeBackend.FLASHINFER_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (  # noqa: E501
             FlashInferExperts,
         )
 
@@ -92,19 +101,39 @@ def backend_to_kernel_cls(
 
         return [FlashInferCuteDSLExperts]
 
+    elif backend == NvFp4MoeBackend.FLASHINFER_CUTEDSL_BATCHED:
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutedsl_batched_moe import (  # noqa: E501
+            FlashInferCuteDSLBatchedExperts,
+        )
+
+        return [FlashInferCuteDSLBatchedExperts]
+
+    elif backend == NvFp4MoeBackend.FLASHINFER_B12X:
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_b12x_moe import (  # noqa: E501
+            FlashInferB12xExperts,
+        )
+
+        return [FlashInferB12xExperts]
+
     elif backend == NvFp4MoeBackend.VLLM_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.cutlass_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
             CutlassExpertsFp4,
         )
 
         return [CutlassExpertsFp4]
 
     elif backend == NvFp4MoeBackend.MARLIN:
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
+        from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
             MarlinExperts,
         )
 
         return [MarlinExperts]
+    elif backend == NvFp4MoeBackend.EMULATION:
+        from vllm.model_executor.layers.fused_moe.experts.nvfp4_emulation_moe import (
+            Nvfp4QuantizationEmulationTritonExperts,
+        )
+
+        return [Nvfp4QuantizationEmulationTritonExperts]
     else:
         raise ValueError(f"Unknown NvFP4 MoE backend: {backend.value}")
 
@@ -116,7 +145,9 @@ def map_nvfp4_backend(runner_backend: MoEBackend) -> NvFp4MoeBackend:
         "flashinfer_trtllm": NvFp4MoeBackend.FLASHINFER_TRTLLM,
         "flashinfer_cutlass": NvFp4MoeBackend.FLASHINFER_CUTLASS,
         "flashinfer_cutedsl": NvFp4MoeBackend.FLASHINFER_CUTEDSL,
+        "flashinfer_b12x": NvFp4MoeBackend.FLASHINFER_B12X,
         "marlin": NvFp4MoeBackend.MARLIN,
+        "emulation": NvFp4MoeBackend.EMULATION,
     }
     if backend := mapping.get(runner_backend):
         return backend
@@ -137,18 +168,29 @@ def select_nvfp4_moe_backend(
     """
 
     # NOTE: the kernels are selected in the following order.
+    # FLASHINFER_B12X is intentionally excluded from auto-selection until
+    # the upstream CUTLASS SM121 MMA op guard is resolved; use
+    # moe_backend="flashinfer_b12x" to opt in explicitly.
     AVAILABLE_BACKENDS = [
         NvFp4MoeBackend.FLASHINFER_TRTLLM,
         NvFp4MoeBackend.FLASHINFER_CUTEDSL,
+        NvFp4MoeBackend.FLASHINFER_CUTEDSL_BATCHED,
         NvFp4MoeBackend.FLASHINFER_CUTLASS,
         NvFp4MoeBackend.VLLM_CUTLASS,
         NvFp4MoeBackend.MARLIN,
+        NvFp4MoeBackend.EMULATION,
     ]
 
-    # NOTE(rob): this is kind of a hack. We need to peak into
-    # the prepare-finalize selection to determine if we are using
-    # the batched or standard expert format.
-    use_batched = config.moe_parallel_config.use_deepep_ll_kernels
+    NVFP4_BACKENDS_WITH_CLAMP = {
+        NvFp4MoeBackend.FLASHINFER_TRTLLM,
+    }
+
+    if config.swiglu_limit is not None:
+        AVAILABLE_BACKENDS = [
+            b for b in AVAILABLE_BACKENDS if b in NVFP4_BACKENDS_WITH_CLAMP
+        ]
+
+    use_batched = config.moe_parallel_config.use_batched_activation_format
     activation_format = (
         mk.FusedMoEActivationFormat.BatchedExperts
         if use_batched
@@ -195,6 +237,22 @@ def _return_or_raise(
     runner_backend = config.moe_backend
     if runner_backend != "auto":
         requested_backend = map_nvfp4_backend(runner_backend)
+        # For batched activation format, use batched variant if available.
+        if (
+            activation_format == mk.FusedMoEActivationFormat.BatchedExperts
+            and requested_backend == NvFp4MoeBackend.FLASHINFER_CUTEDSL
+        ):
+            requested_backend = NvFp4MoeBackend.FLASHINFER_CUTEDSL_BATCHED
+        if (
+            config.swiglu_limit is not None
+            and requested_backend not in NVFP4_BACKENDS_WITH_CLAMP
+        ):
+            raise ValueError(
+                f"Model sets swiglu_limit={config.swiglu_limit}, but the "
+                f"explicitly requested moe_backend={runner_backend!r} does "
+                f"not apply the SwiGLU clamp. Use 'flashinfer_trtllm' or "
+                f"'flashinfer_cutlass' instead."
+            )
         return _return_or_raise(
             requested_backend, config, weight_key, activation_key, activation_format
         )
@@ -203,17 +261,32 @@ def _return_or_raise(
         if not envs.VLLM_USE_FLASHINFER_MOE_FP4:
             # If the user rejects FlashInfer remove those backends.
             for b in FLASHINFER_NVFP4_MOE_BACKENDS:
-                AVAILABLE_BACKENDS.remove(b)
+                if b in AVAILABLE_BACKENDS:
+                    AVAILABLE_BACKENDS.remove(b)
 
         elif envs.is_set("VLLM_FLASHINFER_MOE_BACKEND"):
             # If user is explicit about backend, validate it.
             backend = fi_2_vllm_backend_map[get_flashinfer_moe_backend()]
+            if (
+                config.swiglu_limit is not None
+                and backend not in NVFP4_BACKENDS_WITH_CLAMP
+            ):
+                raise ValueError(
+                    f"Model sets swiglu_limit={config.swiglu_limit}, but the "
+                    f"FlashInfer backend selected via VLLM_FLASHINFER_MOE_BACKEND "
+                    f"({backend.value}) does not apply the SwiGLU clamp."
+                )
             return _return_or_raise(
                 backend, config, weight_key, activation_key, activation_format
             )
         else:
             # If the user is not explicit about the backend, try each.
-            for backend in FLASHINFER_NVFP4_MOE_BACKENDS:
+            fi_backends = [
+                b
+                for b in FLASHINFER_NVFP4_MOE_BACKENDS
+                if config.swiglu_limit is None or b in NVFP4_BACKENDS_WITH_CLAMP
+            ]
+            for backend in fi_backends:
                 for k_cls in backend_to_kernel_cls(backend):
                     supported, reason = k_cls.is_supported_config(
                         k_cls,
@@ -223,12 +296,10 @@ def _return_or_raise(
                         activation_format,
                     )
                     if supported:
-                        logger.info_once(_make_log_backend(backend), scope="local")
+                        logger.info_once(_make_log_backend(backend))
                         return backend, k_cls
                     else:
-                        logger.debug_once(
-                            _make_log_unsupported(backend, reason), scope="local"
-                        )
+                        logger.debug_once(_make_log_unsupported(backend, reason))
 
             raise NotImplementedError(
                 "Found VLLM_USE_FLASHINFER_MOE_FP4=1, but no "
@@ -251,12 +322,11 @@ def _return_or_raise(
                 activation_key,
                 activation_format,
             )
-
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     raise NotImplementedError(
         "No NvFp4 MoE backend supports the deployment configuration."
@@ -285,7 +355,28 @@ def convert_to_nvfp4_moe_kernel_format(
     torch.Tensor,
     torch.Tensor,
 ]:
-    if (
+    if nvfp4_backend == NvFp4MoeBackend.FLASHINFER_CUTEDSL:
+        (
+            w13,
+            w13_scale,
+            w13_scale_2,
+            a13_scale,
+            w2,
+            w2_scale,
+            w2_scale_2,
+            a2_scale,
+        ) = prepare_nvfp4_moe_layer_for_flashinfer_cutedsl(
+            layer=layer,
+            w13=w13,
+            w13_scale=w13_scale,
+            w13_scale_2=w13_scale_2,
+            a13_scale=a13_scale,
+            w2=w2,
+            w2_scale=w2_scale,
+            w2_scale_2=w2_scale_2,
+            a2_scale=a2_scale,
+        )
+    elif (
         nvfp4_backend in FLASHINFER_NVFP4_MOE_BACKENDS
         or nvfp4_backend == NvFp4MoeBackend.VLLM_CUTLASS
     ):
@@ -331,6 +422,34 @@ def convert_to_nvfp4_moe_kernel_format(
             w2_scale_2=w2_scale_2,
             is_act_and_mul=is_act_and_mul,
         )
+    elif nvfp4_backend == NvFp4MoeBackend.EMULATION:
+        # Move the E2M1 lookup table to the device now, because
+        # `.to(device)` is not allowed during CUDA graph capture.
+        kE2M1ToFloat_handle.val = kE2M1ToFloat_handle.val.to(w13.device)
+
+        if a13_scale is None or a2_scale is None:
+            raise ValueError(
+                "Activation global scales should not be None, got"
+                f" a13_scale={a13_scale}, a2_scale={a2_scale}"
+            )
+
+        if torch.unique(a13_scale).numel() != 1 or torch.unique(a2_scale).numel() != 1:
+            logger.warning_once(
+                "In NVFP4 linear, the activation global scale for inputs are different"
+                " for MOE w13 (gate_up_proj) layer or MOE w2 (down_proj). Using"
+                " a13_scale = a13_scale.max() and a2_scale = a2_scale.max()."
+            )
+
+        # 1. We take the max following e.g. quantization/utils/flashinfer_fp4_moe.py.
+        # 2. moe_kernel_quantize_input -> ref_nvfp4_quant_dequant
+        # use the inverse scale directly (large global scale).
+        # NOTE: Before this point, `a13_scale` and `a2_scale` are such that:
+        # `FP8_MAX = activation[expert_id].abs().max() * global_scale[expert_id]`,
+        # and `global_scale[expert_id]` are small (~1e-4).
+        # Taking the largest global scale likely results in overflowing the FP8 range
+        # for other experts - other selection strategies may be used.
+        a13_scale = 1.0 / a13_scale.max().to(torch.float32)
+        a2_scale = 1.0 / a2_scale.max().to(torch.float32)
     else:
         raise ValueError(f"Unknown NvFp4 backend for MoE: {nvfp4_backend}")
 
@@ -354,6 +473,7 @@ def make_nvfp4_moe_quant_config(
     w2_scale_2: torch.Tensor,
     a13_scale: torch.Tensor,
     a2_scale: torch.Tensor,
+    swiglu_limit: float | None = None,
 ) -> FusedMoEQuantConfig:
     if backend == NvFp4MoeBackend.MARLIN:
         return nvfp4_w4a16_moe_quant_config(
@@ -362,6 +482,16 @@ def make_nvfp4_moe_quant_config(
             w1_scale=w13_scale,
             w2_scale=w2_scale,
         )
+    elif backend == NvFp4MoeBackend.EMULATION:
+        return nvfp4_moe_quant_config(
+            g1_alphas=w13_scale_2,
+            g2_alphas=w2_scale_2,
+            a1_gscale=a13_scale,
+            a2_gscale=a2_scale,
+            w1_scale=w13_scale,
+            w2_scale=w2_scale,
+            gemm1_clamp_limit=swiglu_limit,
+        )
 
     # Pass w13_scale_2 / w2_scale_2 directly as g1/g2_alphas.
     # The expert's process_weights_after_loading will fuse activation
@@ -377,7 +507,14 @@ def make_nvfp4_moe_quant_config(
         # NOTE(rob): this is a hack until the MoE kernels
         # create their own quant configs. TRTLLM kernel
         # does not accept swizzled input quant scales.
-        is_nvfp4_scale_swizzled=(backend != NvFp4MoeBackend.FLASHINFER_TRTLLM),
+        is_scale_swizzled=(
+            backend
+            not in (
+                NvFp4MoeBackend.FLASHINFER_TRTLLM,
+                NvFp4MoeBackend.FLASHINFER_CUTEDSL,
+            )
+        ),
+        gemm1_clamp_limit=swiglu_limit,
     )
 
 
@@ -386,7 +523,6 @@ def make_nvfp4_moe_kernel(
     moe_config: FusedMoEConfig,
     experts_cls: type[mk.FusedMoEExperts],
     routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    shared_experts: torch.nn.Module | None = None,
 ) -> mk.FusedMoEKernel:
     # Create Prepare/Finalize.
     prepare_finalize = maybe_make_prepare_finalize(
@@ -416,18 +552,9 @@ def make_nvfp4_moe_kernel(
             quant_config=moe_quant_config,
         )
 
-    # NOTE(rob): we only want the mk to control the shared_expert
-    # if using all2all (for SBO). bnell is making this explicit in
-    # the new MoE runner class.
     kernel = mk.FusedMoEKernel(
         prepare_finalize,
         experts,
-        shared_experts=(
-            shared_experts
-            if moe_config.moe_parallel_config.use_deepep_ll_kernels
-            else None
-        ),
-        moe_parallel_config=moe_config.moe_parallel_config,
         inplace=False,
     )
 
diff --git a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
index 9c31da10dd94..9c0df9d153bf 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
@@ -11,21 +11,20 @@
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config.kernel import MoEBackend
 from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe import (
-    is_supported_config_trtllm_bf16,
-)
-from vllm.model_executor.layers.fused_moe.prepare_finalize import (
-    MoEPrepareAndFinalizeNoDPEPModular,
-)
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
+    FlashinferMoeBackend,
+    convert_moe_weights_to_flashinfer_trtllm_block_layout,
+    get_flashinfer_moe_backend,
     swap_w13_to_w31,
 )
 from vllm.platforms import current_platform
-from vllm.utils.flashinfer import has_flashinfer, has_flashinfer_cutlass_fused_moe
 
 logger = init_logger(__name__)
 
@@ -35,21 +34,98 @@ class UnquantizedMoeBackend(Enum):
     FLASHINFER_CUTLASS = "FlashInfer CUTLASS"
     AITER = "ROCm AITER"
     TRITON = "TRITON"
+    BATCHED_TRITON = "BATCHED_TRITON"
     CPU = "CPU"
     XPU = "XPU"
     TPU = "TPU"
     OOT = "OOT"
 
 
-# NOTE(zyongye): Unsupported backend means backend
-# that is not conform with Modular kernel format.
-# We will directly call the kernel for those backend
-UNSUPPORTED_BACKEND = [
-    UnquantizedMoeBackend.FLASHINFER_TRTLLM,
-    UnquantizedMoeBackend.CPU,
-    UnquantizedMoeBackend.TPU,
-    UnquantizedMoeBackend.OOT,
-]
+def _get_priority_backends(moe_config: FusedMoEConfig) -> list[UnquantizedMoeBackend]:
+    """
+    Get available backends in priority order based on platform and config.
+
+    This function can be extended to become more complex as needed.
+    """
+
+    def _move_to_back(
+        backends: list[UnquantizedMoeBackend],
+        backend: UnquantizedMoeBackend,
+    ) -> None:
+        backends.append(backends.pop(backends.index(backend)))
+
+    if current_platform.is_rocm():
+        _AVAILABLE_BACKENDS = [
+            UnquantizedMoeBackend.AITER,
+            UnquantizedMoeBackend.TRITON,
+            UnquantizedMoeBackend.BATCHED_TRITON,
+        ]
+    elif current_platform.is_cuda():
+        _AVAILABLE_BACKENDS = [
+            UnquantizedMoeBackend.FLASHINFER_TRTLLM,
+            UnquantizedMoeBackend.FLASHINFER_CUTLASS,
+            UnquantizedMoeBackend.TRITON,
+            UnquantizedMoeBackend.BATCHED_TRITON,
+        ]
+
+        # HACK: Qwen3.5 has crash with FLASHINFER_CUTLASS BF16 if DEP.
+        # Updating the oracle querying logic is out of the scope of this
+        # PR. Need to fix the kernel or update structure in follow up.
+        if moe_config.moe_parallel_config.dp_size > 1:
+            _move_to_back(_AVAILABLE_BACKENDS, UnquantizedMoeBackend.FLASHINFER_CUTLASS)
+
+    elif current_platform.is_xpu():
+        _AVAILABLE_BACKENDS = [UnquantizedMoeBackend.XPU]
+    elif current_platform.is_cpu():
+        _AVAILABLE_BACKENDS = [UnquantizedMoeBackend.CPU]
+    return _AVAILABLE_BACKENDS
+
+
+def backend_to_kernel_cls(
+    backend: UnquantizedMoeBackend,
+) -> type[mk.FusedMoEExperts]:
+    if backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM:
+        from vllm.model_executor.layers.fused_moe.experts.trtllm_bf16_moe import (
+            TrtLlmBf16Experts,
+        )
+
+        return TrtLlmBf16Experts
+
+    elif backend == UnquantizedMoeBackend.FLASHINFER_CUTLASS:
+        from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutlass_moe import (  # noqa: E501
+            FlashInferExperts,
+        )
+
+        return FlashInferExperts
+
+    elif backend == UnquantizedMoeBackend.AITER:
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+            AiterExperts,
+        )
+
+        return AiterExperts
+
+    elif backend == UnquantizedMoeBackend.TRITON:
+        from vllm.model_executor.layers.fused_moe.experts.triton_moe import (
+            TritonExperts,
+        )
+
+        return TritonExperts
+
+    elif backend == UnquantizedMoeBackend.BATCHED_TRITON:
+        from vllm.model_executor.layers.fused_moe.experts.fused_batched_moe import (
+            BatchedTritonExperts,
+        )
+
+        return BatchedTritonExperts
+
+    elif backend == UnquantizedMoeBackend.XPU:
+        from vllm.model_executor.layers.fused_moe.experts.xpu_moe import XPUExperts
+
+        return XPUExperts
+
+    else:
+        raise ValueError(f"Unknown unquantized MoE backend: {backend.value}")
 
 
 def map_unquantized_backend(runner_backend: MoEBackend) -> UnquantizedMoeBackend:
@@ -70,196 +146,220 @@ def map_unquantized_backend(runner_backend: MoEBackend) -> UnquantizedMoeBackend
 
 def select_unquantized_moe_backend(
     moe_config: FusedMoEConfig,
-    use_ep: bool,
-    use_dp: bool,
-) -> UnquantizedMoeBackend:
+) -> tuple[UnquantizedMoeBackend, type[mk.FusedMoEExperts] | None]:
     """
-    Select the primary Unquantized MoE backend
+    Select the primary Unquantized MoE backend.
     Note: Shape-specific fallbacks may still occur at runtime.
     """
 
-    def _make_log_backend(backend: UnquantizedMoeBackend):
-        return f"Using {backend.value} backend for Unquantized MoE"
+    if current_platform.is_cpu():
+        # TODO: migrate to MK structure.
+        return UnquantizedMoeBackend.CPU, None
+
+    if current_platform.is_tpu():
+        return UnquantizedMoeBackend.TPU, None
+
+    if current_platform.is_out_of_tree():
+        return UnquantizedMoeBackend.OOT, None
+
+    if moe_config.is_lora_enabled:
+        return UnquantizedMoeBackend.TRITON, backend_to_kernel_cls(
+            UnquantizedMoeBackend.TRITON
+        )
+
+    # NOTE: the kernels are selected in the following order.
+    AVAILABLE_BACKENDS = _get_priority_backends(moe_config)
 
+    # NOTE(rob): We need to peak into the P/F selection to determine
+    # if we are using the batched or standard expert format, which
+    # if not ideal. Once we unify TP + DP/EP, we can select P/F first.
     activation_format = (
         mk.FusedMoEActivationFormat.BatchedExperts
         if moe_config.moe_parallel_config.use_batched_activation_format
         else mk.FusedMoEActivationFormat.Standard
     )
 
-    # Check if FlashInfer TRTLLM BF16 MoE is supported
-    trtllm_supported, _ = is_supported_config_trtllm_bf16(
-        moe_config=moe_config,
-        activation_format=activation_format,
-    )
-    flashinfer_trtllm_available = has_flashinfer() and trtllm_supported
-    # FlashInfer CUTLASS MoE is only supported on Hopper and later GPUS
-    flashinfer_cutlass_available = (
-        has_flashinfer_cutlass_fused_moe()
-        and use_ep
-        and (not use_dp)
-        and current_platform.has_device_capability(90)
-    )
-    flashinfer_trtllm_moe_enabled = (
-        flashinfer_trtllm_available
-        and envs.VLLM_USE_FLASHINFER_MOE_FP16
-        and envs.VLLM_FLASHINFER_MOE_BACKEND == "latency"
-    )
-    flashinfer_cutlass_moe_enabled = (
-        flashinfer_cutlass_available and envs.VLLM_USE_FLASHINFER_MOE_FP16
-    )
-    rocm_aiter_moe_enabled = rocm_aiter_ops.is_fused_moe_enabled()
+    def _make_log_backend(backend: UnquantizedMoeBackend) -> str:
+        available_strs = [b.value for b in AVAILABLE_BACKENDS]
+        return (
+            f"Using {backend.value} Unquantized MoE backend out "
+            f"of potential backends: {available_strs}."
+        )
+
+    def _make_log_unsupported(
+        backend: UnquantizedMoeBackend, reason: str | None
+    ) -> str:
+        if reason:
+            return (
+                f"Unquantized MoE backend {backend.value} does not support the "
+                f"deployment configuration since {reason}."
+            )
+        return (
+            f"Unquantized MoE backend '{backend.value}' does not support the "
+            "deployment configuration."
+        )
+
+    def _return_or_raise(
+        backend: UnquantizedMoeBackend,
+        config: FusedMoEConfig,
+        activation_format: mk.FusedMoEActivationFormat,
+    ) -> tuple[UnquantizedMoeBackend, type[mk.FusedMoEExperts] | None]:
+        k_cls = backend_to_kernel_cls(backend)
+        supported, reason = k_cls.is_supported_config(
+            k_cls, config, None, None, activation_format
+        )
+        if supported:
+            logger.info_once(_make_log_backend(backend))
+            return backend, k_cls
+        raise ValueError(_make_log_unsupported(backend, reason))
 
-    # Handle explicit moe_backend from user.
     runner_backend = moe_config.moe_backend
     if runner_backend != "auto":
         requested_backend = map_unquantized_backend(runner_backend)
-        if requested_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM:
-            if not flashinfer_trtllm_available:
+        if (
+            activation_format == mk.FusedMoEActivationFormat.BatchedExperts
+            and requested_backend == UnquantizedMoeBackend.TRITON
+        ):
+            requested_backend = UnquantizedMoeBackend.BATCHED_TRITON
+
+        return _return_or_raise(requested_backend, moe_config, activation_format)
+
+    # Handle explicit FlashInfer FP16 configuration.
+    if envs.is_set("VLLM_USE_FLASHINFER_MOE_FP16"):
+        if not envs.VLLM_USE_FLASHINFER_MOE_FP16:
+            if UnquantizedMoeBackend.FLASHINFER_TRTLLM in AVAILABLE_BACKENDS:
+                AVAILABLE_BACKENDS.remove(UnquantizedMoeBackend.FLASHINFER_TRTLLM)
+            if UnquantizedMoeBackend.FLASHINFER_CUTLASS in AVAILABLE_BACKENDS:
+                AVAILABLE_BACKENDS.remove(UnquantizedMoeBackend.FLASHINFER_CUTLASS)
+
+        elif envs.is_set("VLLM_FLASHINFER_MOE_BACKEND"):
+            # If user is explicit about backend, validate it.
+            fi_backend = get_flashinfer_moe_backend()
+            if fi_backend == FlashinferMoeBackend.CUTLASS:
+                backend = UnquantizedMoeBackend.FLASHINFER_CUTLASS
+            elif fi_backend == FlashinferMoeBackend.TENSORRT_LLM:
+                backend = UnquantizedMoeBackend.FLASHINFER_TRTLLM
+            else:
                 raise ValueError(
-                    "FlashInfer TRTLLM MoE backend is not available for this "
-                    "configuration."
+                    f"FlashInfer MOE backend {fi_backend} "
+                    "does not support unquantized MoE."
                 )
-        elif requested_backend == UnquantizedMoeBackend.FLASHINFER_CUTLASS:
-            if not flashinfer_cutlass_available:
-                raise ValueError(
-                    "FlashInfer CUTLASS MoE backend is not available for this "
-                    "configuration."
+            k_cls = backend_to_kernel_cls(backend)
+            return _return_or_raise(backend, moe_config, activation_format)
+        else:
+            # If the user is not explicit about the backend, try both.
+            for backend in [
+                UnquantizedMoeBackend.FLASHINFER_TRTLLM,
+                UnquantizedMoeBackend.FLASHINFER_CUTLASS,
+            ]:
+                k_cls = backend_to_kernel_cls(backend)
+                supported, reason = k_cls.is_supported_config(
+                    k_cls, moe_config, None, None, activation_format
                 )
-        elif requested_backend == UnquantizedMoeBackend.AITER and not (
-            current_platform.is_rocm() and rocm_aiter_moe_enabled
-        ):
-            raise ValueError(
-                "ROCm AITer MoE backend is not available for this configuration."
+                if supported:
+                    logger.info_once(_make_log_backend(backend))
+                    return backend, k_cls
+                else:
+                    logger.debug_once(_make_log_unsupported(backend, reason))
+
+            raise NotImplementedError(
+                "Found VLLM_USE_FLASHINFER_MOE_FP16=1, but no "
+                "FlashInfer unquantized MoE backend supports the configuration."
             )
-        logger.info_once(_make_log_backend(requested_backend), scope="local")
-        return requested_backend
 
-    if current_platform.is_rocm():
-        if rocm_aiter_moe_enabled:
-            backend = UnquantizedMoeBackend.AITER
-        else:
-            backend = UnquantizedMoeBackend.TRITON
-    if current_platform.is_cuda():
-        if flashinfer_trtllm_moe_enabled:
-            backend = UnquantizedMoeBackend.FLASHINFER_TRTLLM
-        elif flashinfer_cutlass_moe_enabled:
-            backend = UnquantizedMoeBackend.FLASHINFER_CUTLASS
-            if trtllm_supported:
-                logger.info_once(
-                    "FlashInfer TRTLLM MoE is available but not enabled, "
-                    "consider setting VLLM_FLASHINFER_MOE_BACKEND=latency "
-                    "to enable it for better performance.",
-                    scope="local",
-                )
+    # Handle explicit AITER FP8 configuration.
+    if envs.is_set("VLLM_ROCM_USE_AITER") or envs.is_set("VLLM_ROCM_USE_AITER_MOE"):
+        if not envs.VLLM_ROCM_USE_AITER or not envs.VLLM_ROCM_USE_AITER_MOE:
+            if UnquantizedMoeBackend.AITER in AVAILABLE_BACKENDS:
+                AVAILABLE_BACKENDS.remove(UnquantizedMoeBackend.AITER)
         else:
-            if not envs.VLLM_USE_FLASHINFER_MOE_FP16 and trtllm_supported:
-                logger.info_once(
-                    "FlashInfer TRTLLM MoE is available but not enabled, "
-                    "consider setting VLLM_USE_FLASHINFER_MOE_FP16=1 "
-                    "and VLLM_FLASHINFER_MOE_BACKEND=latency "
-                    "to enable it for better performance.",
-                    scope="local",
-                )
-            elif use_ep and (not use_dp):
-                logger.info_once(
-                    "FlashInfer MoE is available for EP"
-                    " but not enabled, consider setting"
-                    " VLLM_USE_FLASHINFER_MOE_FP16=1 to enable it.",
-                    scope="local",
-                )
-            elif use_dp:
-                logger.info_once(
-                    "FlashInfer CUTLASS MoE is currently not available for DP.",
-                    scope="local",
-                )
-            backend = UnquantizedMoeBackend.TRITON
-    if current_platform.is_xpu():
-        backend = UnquantizedMoeBackend.XPU
-    if current_platform.is_cpu():
-        backend = UnquantizedMoeBackend.CPU
-    if current_platform.is_tpu():
-        backend = UnquantizedMoeBackend.TPU
-    if current_platform.is_out_of_tree():
-        backend = UnquantizedMoeBackend.OOT
+            backend = UnquantizedMoeBackend.AITER
+            return _return_or_raise(backend, moe_config, activation_format)
+
+    for backend in AVAILABLE_BACKENDS:
+        k_cls = backend_to_kernel_cls(backend)
+        supported, reason = k_cls.is_supported_config(
+            k_cls, moe_config, None, None, activation_format
+        )
+        if supported:
+            logger.info_once(_make_log_backend(backend))
+            return backend, k_cls
 
-    logger.info_once(_make_log_backend(backend), scope="local")
-    return backend
+        logger.debug_once(_make_log_unsupported(backend, reason))
+
+    raise NotImplementedError(
+        "No Unquantized MoE backend supports the deployment configuration."
+    )
 
 
 def convert_to_unquantized_kernel_format(
     unquantized_backend: UnquantizedMoeBackend,
     layer: Module,
-    w13_weight: torch.Tensor | None = None,
-    w2_weight: torch.Tensor | None = None,
+    w13_weight: torch.Tensor,
+    w2_weight: torch.Tensor,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     if unquantized_backend == UnquantizedMoeBackend.AITER:
-        w13_weight, w2_weight = rocm_aiter_ops.shuffle_weights(
-            layer.w13_weight.data, layer.w2_weight.data
-        )
+        w13_weight, w2_weight = rocm_aiter_ops.shuffle_weights(w13_weight, w2_weight)
 
     elif unquantized_backend == UnquantizedMoeBackend.FLASHINFER_CUTLASS:
+        if layer.moe_config.is_act_and_mul:
+            # Swap halves to arrange as [w3; w1] (kernel expectation)
+            # Non-gated MoE: w13 is a single projection, no need to swap.
+            w13_weight = swap_w13_to_w31(w13_weight)
+
+    elif unquantized_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM:
         # Swap halves to arrange as [w3; w1] (kernel expectation)
-        w13_weight = swap_w13_to_w31(layer.w13_weight.data)
+        w13_weight = swap_w13_to_w31(w13_weight)
+        _cache_permute_indices: dict[torch.Size, torch.Tensor] = {}
+        w13_weight, w2_weight = convert_moe_weights_to_flashinfer_trtllm_block_layout(
+            _cache_permute_indices,
+            w13_weight,
+            w2_weight,
+        )
 
-    return w13_weight, w2_weight
+    return w13_weight.contiguous(), w2_weight.contiguous()
 
 
 def make_unquantized_moe_kernel(
-    backend: UnquantizedMoeBackend,
     quant_config: FusedMoEQuantConfig,
     moe_config: FusedMoEConfig,
-) -> mk.FusedMoEKernel | None:
-    if backend in UNSUPPORTED_BACKEND:
-        return None
+    backend: UnquantizedMoeBackend,
+    experts_cls: type[mk.FusedMoEExperts],
+    routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+) -> mk.FusedMoEKernel:
+    # Create Prepare/Finalize
+    is_monolithic = issubclass(experts_cls, mk.FusedMoEExpertsMonolithic)
+    prepare_finalize = maybe_make_prepare_finalize(
+        moe=moe_config,
+        quant_config=quant_config,
+        routing_tables=routing_tables,
+        allow_new_interface=True,
+        use_monolithic=is_monolithic,
+    )
+    assert prepare_finalize is not None
 
-    if backend == UnquantizedMoeBackend.FLASHINFER_CUTLASS:
-        from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
-            FlashInferExperts,
-        )
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
-        kernel = mk.FusedMoEKernel(
-            MoEPrepareAndFinalizeNoDPEPModular(),
-            FlashInferExperts(
-                moe_config=moe_config,
-                quant_config=quant_config,
-            ),
-            inplace=False,
+    # Create Experts
+    if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+        max_num_tokens = prepare_finalize.max_num_tokens_per_rank()
+        assert max_num_tokens is not None
+        experts = experts_cls(
+            moe_config=moe_config,
+            quant_config=quant_config,
+            max_num_tokens=max_num_tokens,
+            num_dispatchers=prepare_finalize.num_dispatchers(),
         )
-
-    elif backend == UnquantizedMoeBackend.AITER:
-        from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
-            AiterExperts,
+    else:
+        experts = experts_cls(
+            moe_config=moe_config,
+            quant_config=quant_config,
         )
 
-        kernel = mk.FusedMoEKernel(
-            MoEPrepareAndFinalizeNoDPEPModular(),
-            AiterExperts(
-                moe_config=moe_config,
-                quant_config=quant_config,
-            ),
-            inplace=not moe_config.disable_inplace,
-        )
-    elif backend == UnquantizedMoeBackend.TRITON:
-        from vllm.model_executor.layers.fused_moe import TritonExperts
-
-        kernel = mk.FusedMoEKernel(
-            MoEPrepareAndFinalizeNoDPEPModular(),
-            TritonExperts(
-                moe_config=moe_config,
-                quant_config=quant_config,
-            ),
-            inplace=not moe_config.disable_inplace,
-        )
-    elif backend == UnquantizedMoeBackend.XPU:
-        from vllm.model_executor.layers.fused_moe import XPUExperts
-
-        kernel = mk.FusedMoEKernel(
-            MoEPrepareAndFinalizeNoDPEPModular(),
-            XPUExperts(
-                moe_config=moe_config,
-                quant_config=quant_config,
-            ),
-            inplace=not moe_config.disable_inplace,
-        )
+    kernel = mk.FusedMoEKernel(
+        prepare_finalize,
+        experts,
+        inplace=(not moe_config.disable_inplace and not is_monolithic),
+    )
+
     return kernel
diff --git a/vllm/model_executor/layers/fused_moe/oracle/w4a8.py b/vllm/model_executor/layers/fused_moe/oracle/w4a8.py
new file mode 100644
index 000000000000..e886d9d5da5b
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/oracle/w4a8.py
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from enum import Enum
+from typing import TYPE_CHECKING
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.all2all_utils import (
+    maybe_make_prepare_finalize,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+    int4_w4afp8_moe_quant_config,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8DynamicTokenSym,
+    kInt4Static,
+)
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+        CutlassExpertsW4A8Fp8,
+    )
+
+logger = init_logger(__name__)
+
+
+class W4A8MoeBackend(Enum):
+    CUTLASS = "CUTLASS"
+
+
+def backend_to_kernel_cls(
+    backend: W4A8MoeBackend,
+) -> list[type["CutlassExpertsW4A8Fp8"]]:
+    if backend == W4A8MoeBackend.CUTLASS:
+        from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+            CutlassExpertsW4A8Fp8,
+        )
+
+        return [CutlassExpertsW4A8Fp8]
+    else:
+        raise ValueError(f"Unknown W4A8 MoE backend: {backend.value}")
+
+
+def select_w4a8_moe_backend(
+    config: FusedMoEConfig,
+    weight_key: QuantKey | None = kInt4Static,
+    activation_key: QuantKey | None = kFp8DynamicTokenSym,
+) -> tuple[W4A8MoeBackend, type["CutlassExpertsW4A8Fp8"]]:
+    backend = W4A8MoeBackend.CUTLASS
+
+    activation_format = (
+        mk.FusedMoEActivationFormat.BatchedExperts
+        if config.moe_parallel_config.use_batched_activation_format
+        else mk.FusedMoEActivationFormat.Standard
+    )
+
+    last_reason: str | None = None
+    for kernel_cls in backend_to_kernel_cls(backend):
+        supported, reason = kernel_cls.is_supported_config(
+            kernel_cls,
+            config,
+            weight_key,
+            activation_key,
+            activation_format,
+        )
+        if supported:
+            logger.info_once("Using %s W4A8 MoE backend.", backend.value)
+            return backend, kernel_cls
+        last_reason = reason
+
+    raise NotImplementedError(
+        f"W4A8 MoE backend {backend.value} does not support the "
+        f"deployment configuration: {last_reason}."
+    )
+
+
+def convert_to_w4a8_moe_kernel_format(
+    w13_weight_packed: torch.Tensor,
+    w2_weight_packed: torch.Tensor,
+    w13_weight_scale: torch.Tensor,
+    w2_weight_scale: torch.Tensor,
+) -> tuple[
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+]:
+    from vllm import _custom_ops as ops
+    from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
+    from vllm.model_executor.layers.quantization.utils.quant_utils import (
+        GroupShape,
+        convert_bf16_scales_to_fp8,
+        convert_packed_uint4b8_to_signed_int4_inplace,
+    )
+
+    quant_fp8 = QuantFP8(static=False, group_shape=GroupShape.PER_TOKEN)
+
+    convert_packed_uint4b8_to_signed_int4_inplace(w13_weight_packed)
+    # Mirror the sync in CutlassW4A8LinearKernel; required for TP>1 correctness.
+    torch.accelerator.synchronize()
+    w13_weight_shuffled, b_strides1 = ops.cutlass_encode_and_reorder_int4b_grouped(
+        w13_weight_packed
+    )
+
+    convert_packed_uint4b8_to_signed_int4_inplace(w2_weight_packed)
+    # Mirror the sync in CutlassW4A8LinearKernel; required for TP>1 correctness.
+    torch.accelerator.synchronize()
+    w2_weight_shuffled, b_strides2 = ops.cutlass_encode_and_reorder_int4b_grouped(
+        w2_weight_packed
+    )
+
+    w13_weight_scale, w13_weight_chan_scale = convert_bf16_scales_to_fp8(
+        quant_fp8, w13_weight_scale
+    )
+    w2_weight_scale, w2_weight_chan_scale = convert_bf16_scales_to_fp8(
+        quant_fp8, w2_weight_scale
+    )
+
+    # Scales are stored as (E, N, K // 128), but the kernel expects
+    # (E, K // 128, N) in row-major format.
+    w13_weight_scale_packed = ops.cutlass_pack_scale_fp8(
+        w13_weight_scale.permute(0, 2, 1).contiguous()
+    )
+    w2_weight_scale_packed = ops.cutlass_pack_scale_fp8(
+        w2_weight_scale.permute(0, 2, 1).contiguous()
+    )
+
+    return (
+        w13_weight_shuffled,
+        w2_weight_shuffled,
+        w13_weight_scale_packed,
+        w2_weight_scale_packed,
+        w13_weight_chan_scale,
+        w2_weight_chan_scale,
+        b_strides1,
+        b_strides2,
+    )
+
+
+def make_w4a8_moe_quant_config(
+    w1_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+    g1_alphas: torch.Tensor,
+    g2_alphas: torch.Tensor,
+) -> FusedMoEQuantConfig:
+    return int4_w4afp8_moe_quant_config(
+        w1_scale=w1_scale,
+        w2_scale=w2_scale,
+        g1_alphas=g1_alphas,
+        g2_alphas=g2_alphas,
+        per_act_token_quant=True,
+        per_out_ch_quant=True,
+    )
+
+
+def make_w4a8_moe_kernel(
+    moe_quant_config: FusedMoEQuantConfig,
+    moe_config: FusedMoEConfig,
+    experts_cls: type["CutlassExpertsW4A8Fp8"],
+    b_strides1: torch.Tensor,
+    b_strides2: torch.Tensor,
+    group_size: int,
+    routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+) -> mk.FusedMoEKernel:
+    prepare_finalize = maybe_make_prepare_finalize(
+        moe=moe_config,
+        quant_config=moe_quant_config,
+        routing_tables=routing_tables,
+        allow_new_interface=True,
+    )
+    assert prepare_finalize is not None
+
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
+
+    experts = experts_cls(
+        moe_config=moe_config,
+        quant_config=moe_quant_config,
+        b_strides1=b_strides1,
+        b_strides2=b_strides2,
+        group_size=group_size,
+    )
+
+    return mk.FusedMoEKernel(
+        prepare_finalize,
+        experts,
+        inplace=not moe_config.disable_inplace,
+    )
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/__init__.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/__init__.py
index d388ee411407..b3529c99565f 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/__init__.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/__init__.py
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from vllm.model_executor.layers.fused_moe.prepare_finalize.batched import (
+    BatchedPrepareAndFinalize,
+)
 from vllm.model_executor.layers.fused_moe.prepare_finalize.naive_dp_ep import (
     MoEPrepareAndFinalizeNaiveDPEPModular,
     MoEPrepareAndFinalizeNaiveDPEPMonolithic,
@@ -13,6 +16,7 @@
 )
 
 __all__ = [
+    "BatchedPrepareAndFinalize",
     "MoEPrepareAndFinalizeNaiveDPEPMonolithic",
     "MoEPrepareAndFinalizeNaiveDPEPModular",
     "make_moe_prepare_and_finalize_naive_dp_ep",
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/batched.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/batched.py
new file mode 100644
index 000000000000..943027717bbe
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/batched.py
@@ -0,0 +1,171 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
+from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
+    TopKWeightAndReduceDelegate,
+    TopKWeightAndReduceNaiveBatched,
+)
+from vllm.model_executor.layers.fused_moe.utils import (
+    moe_kernel_quantize_input,
+    normalize_scales_shape,
+)
+
+
+class BatchedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalizeModular):
+    """
+    A reference prepare/finalize class that reorganizes the tokens into
+    expert batched format, i.e. E x max_num_tokens x K.  This is the format
+    that the batched dispatch/combine kernels use.
+    """
+
+    def __init__(
+        self,
+        max_num_tokens: int,
+        num_local_experts: int,
+        num_dispatchers: int,
+        rank: int,
+    ):
+        super().__init__()
+        self.max_num_tokens = max_num_tokens
+        self.num_local_experts = num_local_experts
+        self.rank = rank
+        self.num_dispatchers_ = num_dispatchers
+
+    @property
+    def activation_format(self) -> mk.FusedMoEActivationFormat:
+        return mk.FusedMoEActivationFormat.BatchedExperts
+
+    def max_num_tokens_per_rank(self) -> int | None:
+        return self.max_num_tokens
+
+    def topk_indices_dtype(self) -> torch.dtype | None:
+        return None
+
+    def num_dispatchers(self) -> int:
+        return self.num_dispatchers_
+
+    def output_is_reduced(self) -> bool:
+        return False
+
+    def prepare(
+        self,
+        a1: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        num_experts: int,
+        expert_map: torch.Tensor | None,
+        apply_router_weight_on_input: bool,
+        quant_config: FusedMoEQuantConfig,
+        defer_input_quant: bool = False,
+    ) -> mk.PrepareResultType:
+        if defer_input_quant:
+            raise NotImplementedError(
+                f"{self.__class__.__name__} does not support defer_input_quant=True. "
+                "Please select an MoE kernel that accepts quantized inputs."
+            )
+        assert a1.dim() == 2
+        assert topk_ids.dim() == 2
+        assert topk_ids.size(0) == a1.size(0)
+
+        if apply_router_weight_on_input:
+            topk = topk_ids.size(1)
+            # TODO: this only works for topK=1, will need to update for topK>1
+            assert topk == 1, (
+                "apply_router_weight_on_input is only implemented for topk=1"
+            )
+            a1.mul_(topk_weights.to(a1.dtype))
+
+        num_tokens, hidden_dim = a1.size()
+        topk = topk_ids.size(1)
+
+        tokens_per_expert = torch.zeros(num_experts, dtype=torch.int, device=a1.device)
+
+        num_local_experts = self.num_local_experts
+
+        if quant_config.quant_dtype is None:
+            b_type = a1.dtype
+        else:
+            b_type = quant_config.quant_dtype
+
+        b_a1 = torch.zeros(
+            (num_local_experts, self.max_num_tokens, hidden_dim),
+            dtype=b_type,
+            device=a1.device,
+        )
+
+        if quant_config.is_quantized:
+            scale_shape = quant_config.batched_scale_shape(
+                num_local_experts, self.max_num_tokens, hidden_dim
+            )
+
+            b_a1_scale = torch.empty(scale_shape, dtype=torch.float32, device=a1.device)
+        else:
+            assert quant_config.a1_scale is None
+            b_a1_scale = None
+
+        first_expert = num_local_experts * self.rank
+        last_expert = first_expert + num_local_experts
+
+        a1_scale = normalize_scales_shape(quant_config.a1_scale)
+
+        for expert_id in range(first_expert, last_expert):
+            topks = torch.any(topk_ids == expert_id, dim=1).flatten()
+            rows = torch.count_nonzero(topks.flatten())
+            if rows == 0:
+                continue
+            idx = expert_id - first_expert
+            tokens_per_expert[idx] = rows
+            rhs = a1[: topks.numel()][topks]
+            if quant_config.quant_dtype is not None:
+                if a1_scale is not None:
+                    if quant_config.is_per_act_token:
+                        rhs_a1_scale = a1_scale[: topks.numel()][topks]
+                    else:
+                        rhs_a1_scale = a1_scale
+                else:
+                    rhs_a1_scale = None
+                b_a1[idx, :rows, :], b_s = moe_kernel_quantize_input(
+                    rhs,
+                    rhs_a1_scale,
+                    quant_config.quant_dtype,
+                    quant_config.per_act_token_quant,
+                    quant_config.block_shape,
+                )
+                assert b_s is not None
+                if quant_config.is_per_act_token:
+                    b_a1_scale[idx, :rows] = b_s[:rows]
+                else:
+                    b_a1_scale[idx, : b_s.shape[0]] = b_s
+            else:
+                b_a1[idx, :rows, :] = rhs
+
+        assert b_a1_scale is None or b_a1_scale.ndim == 3
+
+        expert_tokens_meta = mk.ExpertTokensMetadata(
+            expert_num_tokens=tokens_per_expert, expert_num_tokens_cpu=None
+        )
+
+        return b_a1, b_a1_scale, expert_tokens_meta, None, None
+
+    def finalize(
+        self,
+        output: torch.Tensor,
+        fused_expert_output: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        apply_router_weight_on_input: bool,
+        weight_and_reduce_impl: mk.TopKWeightAndReduce,
+    ) -> None:
+        if isinstance(weight_and_reduce_impl, TopKWeightAndReduceDelegate):
+            weight_and_reduce_impl = TopKWeightAndReduceNaiveBatched(self.rank)
+        weight_and_reduce_impl.apply(
+            output=output,
+            fused_expert_output=fused_expert_output,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+        )
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ht.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ht.py
index 63312557d85d..9ca94521363e 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ht.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ht.py
@@ -107,15 +107,17 @@ def _do_dispatch(
     ) -> Callable:
         has_scales = token_scales is not None
 
+        # Capture a DeepEP event on the compute stream before yielding.
+        # This must happen before the yield so the event only covers this
+        # ubatch's compute work. If captured after, the compute stream tail
+        # may include the other ubatch's work, preventing overlap.
+        previous_event = dbo_get_previous_event(self.buffer.capture)
+
         # We yield before launching the dispatch kernel since the dispatch
         # kernel will block the CPU so we want to queue up all the compute
         # for the other ubatch before the dispatch kernel starts.
         dbo_yield_and_switch_from_compute_to_comm()
 
-        # capture a DeepEP event and pass it as previous_event so
-        # DeepEP honors the dependency internally.
-        previous_event = dbo_get_previous_event(self.buffer.capture)
-
         (
             num_tokens_per_rank,
             num_tokens_per_rdma_rank,
@@ -239,7 +241,7 @@ def _receiver(
                     quant_dtype=quant_config.quant_dtype,
                     per_act_token_quant=False,
                     block_shape=quant_config.block_shape,
-                    is_fp4_scale_swizzled=quant_config.is_nvfp4_scale_swizzled,
+                    is_scale_swizzled=quant_config.is_scale_swizzled,
                 )
 
         return (
@@ -357,11 +359,11 @@ def _finalize(
                 topk_ids=topk_ids,
                 apply_router_weight_on_input=apply_router_weight_on_input,
             )
+        previous_event = dbo_get_previous_event(self.buffer.capture)
         dbo_yield_and_switch_from_compute_to_comm()
         assert fused_expert_output.dtype == torch.bfloat16, (
             f"Expected fused_expert_output bfloat16, got {fused_expert_output.dtype}"
         )
-        previous_event = dbo_get_previous_event(self.buffer.capture)
         combined_x, _, event = self.buffer.combine(
             # HT combine only supports BF16
             x=fused_expert_output,
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
index a3266f5e847b..e5d2b601a768 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
@@ -16,7 +16,6 @@
     moe_kernel_quantize_input,
     normalize_batched_scales_shape,
 )
-from vllm.platforms import current_platform
 from vllm.v1.worker.ubatching import (
     dbo_current_ubatch_id,
     dbo_enabled,
@@ -135,7 +134,6 @@ def post_init_setup(self, fused_experts: mk.FusedMoEExperts):
                 "DeepEPLLPrepareAndFinalize is setup to dispatch raw/unquantized "
                 f"activations despite ({fused_experts.__class__.__name__}) being able "
                 "to support quantized activations.",
-                scope="local",
             )
 
     def num_dispatchers(self) -> int:
@@ -291,46 +289,29 @@ def prepare_async(
 
         # Dispatch
         dispatch_topk_ids = self._map_global_to_physical_ids(topk_ids)
-        if current_platform.is_rocm():
-            (
-                expert_x,
-                expert_num_tokens,
-                handle,
-                _,
-                hook,
-            ) = self.buffer.low_latency_dispatch(
-                a1,
-                dispatch_topk_ids,
-                self.max_tokens_per_rank,
-                num_experts,
-                use_fp8=self.use_fp8_dispatch,
-                async_finish=False,
-                return_recv_hook=True,
-            )
-        else:
-            (
-                expert_x,
-                expert_num_tokens,
-                handle,
-                _,
-                hook,
-            ) = self.buffer.low_latency_dispatch(
-                a1,
-                dispatch_topk_ids,
-                self.max_tokens_per_rank,
-                num_experts,
-                use_fp8=self.use_fp8_dispatch,
-                round_scale=self.use_ue8m0_dispatch,
-                use_ue8m0=self.use_ue8m0_dispatch,
-                **(dict(use_nvfp4=True) if use_nvfp4 else dict()),
-                **(
-                    dict(x_global_scale=qc_a1_gscale_or_scale)
-                    if qc_a1_gscale_or_scale is not None
-                    else dict()
-                ),
-                async_finish=False,
-                return_recv_hook=True,
-            )
+        (
+            expert_x,
+            expert_num_tokens,
+            handle,
+            _,
+            hook,
+        ) = self.buffer.low_latency_dispatch(
+            a1,
+            dispatch_topk_ids,
+            self.max_tokens_per_rank,
+            num_experts,
+            use_fp8=self.use_fp8_dispatch,
+            round_scale=self.use_ue8m0_dispatch,
+            use_ue8m0=self.use_ue8m0_dispatch,
+            **(dict(use_nvfp4=True) if use_nvfp4 else dict()),
+            **(
+                dict(x_global_scale=qc_a1_gscale_or_scale)
+                if qc_a1_gscale_or_scale is not None and nvfp4_dispatch
+                else dict()
+            ),
+            async_finish=False,
+            return_recv_hook=True,
+        )
         self.handles[a2a_idx] = handle
 
         return (
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py
index bdde3da6b3a3..e49d8b2624ab 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py
@@ -4,6 +4,9 @@
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.distributed import get_ep_group
+from vllm.distributed.device_communicators.base_device_communicator import (
+    All2AllManagerBase,
+)
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
@@ -11,12 +14,16 @@
 
 
 def get_local_sizes():
-    return get_forward_context().dp_metadata.get_chunk_sizes_across_dp_rank()
+    dp_metadata = get_forward_context().dp_metadata
+    assert dp_metadata is not None
+    return dp_metadata.get_chunk_sizes_across_dp_rank()
 
 
 class FlashInferNVLinkOneSidedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalizeModular):
     """FlashInfer implementation using the Moe AlltoAll kernel."""
 
+    all2all_manager: All2AllManagerBase
+
     def __init__(
         self,
         max_num_tokens: int,
@@ -24,6 +31,8 @@ def __init__(
         num_experts: int,
         hidden_size: int,
         num_dispatchers: int = 1,
+        dispatch_dtype_bytes_per_elem: int = 0,
+        dispatch_scale_bytes_per_token: int = 0,
     ):
         super().__init__()
         self.max_num_tokens = max_num_tokens
@@ -31,13 +40,20 @@ def __init__(
         self.num_experts = num_experts
         self.hidden_size = hidden_size
         self.num_dispatchers_ = num_dispatchers
-
-        self.all2all_manager = get_ep_group().device_communicator.all2all_manager
-        self.all2all_manager.initialize(
+        self.scale_elems_per_token = dispatch_scale_bytes_per_token
+
+        device_communicator = get_ep_group().device_communicator
+        assert device_communicator is not None
+        all2all_manager = device_communicator.all2all_manager
+        assert all2all_manager is not None
+        self.all2all_manager = all2all_manager
+        self.all2all_manager.initialize(  # type: ignore[attr-defined]
             max_num_tokens=self.max_num_tokens,
             top_k=self.top_k,
             num_experts=self.num_experts,
             hidden_size=self.hidden_size,
+            dispatch_dtype_bytes_per_elem=dispatch_dtype_bytes_per_elem,
+            dispatch_scale_bytes_per_token=dispatch_scale_bytes_per_token,
         )
 
     @property
@@ -51,7 +67,7 @@ def num_dispatchers(self) -> int:
         return self.num_dispatchers_
 
     def output_is_reduced(self) -> bool:
-        return False
+        return True
 
     def topk_indices_dtype(self) -> torch.dtype | None:
         return torch.int32
@@ -81,38 +97,44 @@ def prepare(
             else a1.shape[0]
         )
 
-        a1q, a1q_scale = moe_kernel_quantize_input(
-            a1,
-            quant_config.a1_gscale,
-            quant_config.quant_dtype,
-            quant_config.per_act_token_quant,
-            quant_config.block_shape,
-            is_fp4_scale_swizzled=False,  # delay swizzle to after comm
-        )
+        if defer_input_quant:
+            a1q, a1q_scale = a1, None
+        else:
+            a1q, a1q_scale = moe_kernel_quantize_input(
+                a1,
+                quant_config.a1_gscale,
+                quant_config.quant_dtype,
+                quant_config.per_act_token_quant,
+                quant_config.block_shape,
+                is_scale_swizzled=False,  # delay swizzle to after comm
+                mx_alignment=quant_config.mx_alignment,
+            )
 
         payloads = []
         payloads.append(a1q)
         if a1q_scale is not None:
             payloads.append(a1q_scale)
+        topk_ids_payload_index = len(payloads)
         payloads.append(topk_ids)
         payloads.append(topk_weights)
 
-        recv_payloads = self.all2all_manager.moe_alltoall.dispatch(
+        assert self.all2all_manager.moe_alltoall is not None  # type: ignore[attr-defined]
+        recv_payloads = self.all2all_manager.moe_alltoall.dispatch(  # type: ignore[attr-defined]
             token_selected_experts=topk_ids,
             input_payloads=payloads,
             runtime_max_tokens_per_rank=self.runtime_max_tokens_per_rank,
+            invalid_token_expert_id=-1,  # Follow TRTLLM Pattern
+            expert_id_payload_index=topk_ids_payload_index,
         )
         if a1q_scale is not None:
             a1q_recv, a1q_scale_recv, topk_ids_recv, topk_weights_recv = recv_payloads
             # Apply scale interleaving only for CUTLASS (not TRT-LLM)
-            if (
-                quant_config.quant_dtype == "nvfp4"
-                and quant_config.is_nvfp4_scale_swizzled
-            ):
+            if quant_config.quant_dtype == "nvfp4" and quant_config.is_scale_swizzled:
                 a1q_scale_recv = a1q_scale_recv.view(-1, a1q_scale_recv.shape[-1])
                 a1q_scale_recv = a1q_scale_recv.view(torch.uint8)
                 a1q_scale_recv = nvfp4_block_scale_interleave(a1q_scale_recv)
-            a1q_scale_recv = a1q_scale_recv.view(-1, self.hidden_size // 16)
+            assert self.scale_elems_per_token > 0
+            a1q_scale_recv = a1q_scale_recv.view(-1, self.scale_elems_per_token)
         else:
             a1q_recv, topk_ids_recv, topk_weights_recv = recv_payloads
             a1q_scale_recv = None
@@ -131,7 +153,7 @@ def finalize(
         apply_router_weight_on_input: bool,
         weight_and_reduce_impl: mk.TopKWeightAndReduce,
     ) -> None:
-        assert self.all2all_manager.moe_alltoall is not None
+        assert self.all2all_manager.moe_alltoall is not None  # type: ignore[attr-defined]
 
         ep_size = self.all2all_manager.world_size
         hidden_size = fused_expert_output.shape[-1]
@@ -139,7 +161,7 @@ def finalize(
             ep_size, self.runtime_max_tokens_per_rank, hidden_size
         )
 
-        combined_output = self.all2all_manager.moe_alltoall.combine(
+        combined_output = self.all2all_manager.moe_alltoall.combine(  # type: ignore[attr-defined]
             payload=fused_expert_output,
             runtime_max_tokens_per_rank=self.runtime_max_tokens_per_rank,
         )
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_two_sided.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_two_sided.py
index be63bd4e3f61..73083db139fa 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_two_sided.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_two_sided.py
@@ -15,19 +15,26 @@
 
 
 def get_local_sizes():
-    return get_forward_context().dp_metadata.get_chunk_sizes_across_dp_rank()
+    dp_metadata = get_forward_context().dp_metadata
+    assert dp_metadata is not None
+    return dp_metadata.get_chunk_sizes_across_dp_rank()
 
 
 class FlashInferNVLinkTwoSidedPrepareAndFinalize(mk.FusedMoEPrepareAndFinalizeModular):
     """Base class for FlashInfer MoE prepare and finalize operations."""
 
+    all2all_manager: All2AllManagerBase
+
     def __init__(
         self,
         num_dispatchers: int = 1,
     ):
         super().__init__()
         self.num_dispatchers_ = num_dispatchers
-        self.all2all_manager = get_ep_group().device_communicator.all2all_manager
+        device_communicator = get_ep_group().device_communicator
+        assert device_communicator is not None
+        assert device_communicator.all2all_manager is not None
+        self.all2all_manager = device_communicator.all2all_manager
 
     @property
     def activation_format(self) -> mk.FusedMoEActivationFormat:
@@ -43,7 +50,7 @@ def num_dispatchers(self) -> int:
         return self.num_dispatchers_
 
     def output_is_reduced(self) -> bool:
-        return False
+        return True
 
     def _apply_router_weight_on_input(
         self,
@@ -129,7 +136,7 @@ def flashinfer_alltoall_dispatch(
 ):
     from flashinfer.comm.trtllm_alltoall import MnnvlMoe
 
-    assert all2all_manager.ensure_alltoall_workspace_initialized(), (
+    assert all2all_manager.ensure_alltoall_workspace_initialized(), (  # type: ignore[attr-defined]
         "FlashInfer AllToAll workspace not available"
     )
 
@@ -144,7 +151,7 @@ def flashinfer_alltoall_dispatch(
             topk_ids,
             topk_weights,
             None,
-            all2all_manager.prepare_workspace_tensor,
+            all2all_manager.prepare_workspace_tensor,  # type: ignore[attr-defined]
             max_num_token,
             ep_rank,
             ep_size,
@@ -166,27 +173,33 @@ def flashinfer_alltoall_dispatch(
             # which makes the scales tensor different shape than
             # the hidden states, breaking the A2A kernel. So, we
             # delay the swizzling until after the A2A.
-            is_fp4_scale_swizzled=False,
+            is_scale_swizzled=False,
+            mx_alignment=quant_config.mx_alignment,
         )
 
         x = MnnvlMoe.mnnvl_moe_alltoallv(
             x,
             alltoall_info,
-            all2all_manager.workspace_tensor,
+            all2all_manager.workspace_tensor,  # type: ignore[attr-defined]
             ep_rank,
             ep_size,
         )
 
-        x_sf = MnnvlMoe.mnnvl_moe_alltoallv(
-            x_sf,
-            alltoall_info,
-            all2all_manager.workspace_tensor,
-            ep_rank,
-            ep_size,
-        )
+        if x_sf is not None:
+            x_sf = MnnvlMoe.mnnvl_moe_alltoallv(
+                x_sf,
+                alltoall_info,
+                all2all_manager.workspace_tensor,  # type: ignore[attr-defined]
+                ep_rank,
+                ep_size,
+            )
 
         # Swizzle after the A2A if MoE kernel expects swizzled scales.
-        if quant_config.quant_dtype == "nvfp4" and quant_config.is_nvfp4_scale_swizzled:
+        if (
+            x_sf is not None
+            and quant_config.quant_dtype == "nvfp4"
+            and quant_config.is_scale_swizzled
+        ):
             if x_sf.element_size() == 1:
                 x_sf = x_sf.view(torch.uint8)
             x_sf = nvfp4_block_scale_interleave(x_sf)
@@ -196,7 +209,7 @@ def flashinfer_alltoall_dispatch(
         x = MnnvlMoe.mnnvl_moe_alltoallv(
             x,
             alltoall_info,
-            all2all_manager.workspace_tensor,
+            all2all_manager.workspace_tensor,  # type: ignore[attr-defined]
             ep_rank,
             ep_size,
         )
@@ -212,13 +225,13 @@ def flashinfer_alltoall_combine(
 ):
     from flashinfer.comm.trtllm_alltoall import MnnvlMoe
 
-    assert all2all_manager.ensure_alltoall_workspace_initialized(), (
+    assert all2all_manager.ensure_alltoall_workspace_initialized(), (  # type: ignore[attr-defined]
         "FlashInfer AllToAll workspace not available"
     )
     return MnnvlMoe.mnnvl_moe_alltoallv_combine(
         output,
         alltoall_info,
-        all2all_manager.workspace_tensor,
+        all2all_manager.workspace_tensor,  # type: ignore[attr-defined]
         ep_rank=all2all_manager.rank,
         ep_size=all2all_manager.world_size,
         top_k=top_k,
diff --git a/vllm/model_executor/layers/fused_moe/mori_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/mori.py
similarity index 100%
rename from vllm/model_executor/layers/fused_moe/mori_prepare_finalize.py
rename to vllm/model_executor/layers/fused_moe/prepare_finalize/mori.py
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py
index 6dc9f6958048..b8633726c72b 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py
@@ -39,7 +39,8 @@ def _quantize_and_setup_dispatch(
             quant_dtype=quant_config.quant_dtype,
             per_act_token_quant=quant_config.per_act_token_quant,
             block_shape=quant_config.block_shape,
-            is_fp4_scale_swizzled=False,
+            is_scale_swizzled=False,
+            mx_alignment=quant_config.mx_alignment,
         )
 
     # Skip gathering scales if we have static quantization
@@ -58,7 +59,7 @@ def _unwrap_scale_and_prepare_for_moe(
     assert scales is not None and len(scales) == 1
     a1q_scale = scales[0]
     # Apply swizzling after a2a if the MoE kernel needs it.
-    if quant_config.quant_dtype == "nvfp4" and quant_config.is_nvfp4_scale_swizzled:
+    if quant_config.quant_dtype == "nvfp4" and quant_config.is_scale_swizzled:
         assert a1q_scale is not None
         if a1q_scale.element_size() == 1:
             a1q_scale = a1q_scale.view(torch.uint8)
@@ -83,6 +84,14 @@ def __init__(
         super().__init__()
         self.is_sequence_parallel = is_sequence_parallel
         self._num_dispatchers = num_dispatchers
+        # Set by FusedMoEWithLoRA.set_mapping() when LoRA is active. When
+        # present, prepare() dispatches the per-token LoRA mapping alongside
+        # hidden_states and writes the gathered result back to the context so
+        # experts can use the per-rank-local mapping.
+        self._lora_context = None
+
+    def set_lora_context(self, ctx) -> None:
+        self._lora_context = ctx
 
     @property
     def activation_format(self) -> mk.FusedMoEActivationFormat:
@@ -123,20 +132,54 @@ def prepare(
 
         a1q, scales = _quantize_and_setup_dispatch(a1, quant_config, defer_input_quant)
 
+        # When LoRA is active, dispatch the per-token LoRA id along with
+        # hidden_states so every rank receives the correct mapping for the
+        # tokens it ends up processing. The punica_wrapper stores indices as
+        # int64 but the moe_lora_align_block_size kernel expects int32, so
+        # pull the pre-cast view from token_mapping_meta.
+        lora_ctx = self._lora_context
+        local_token_lora_mapping = None
+        if lora_ctx is not None:
+            local_token_lora_mapping = (
+                lora_ctx.punica_wrapper.token_mapping_meta.token_lora_mapping[
+                    : a1.shape[0]
+                ]
+            )
+
+        extra_tensors: list[torch.Tensor] | None = None
+        if scales is not None:
+            extra_tensors = list(scales)
+        if local_token_lora_mapping is not None:
+            if extra_tensors is None:
+                extra_tensors = []
+            extra_tensors.append(local_token_lora_mapping)
+
         res = get_ep_group().dispatch(
             a1q,
             topk_weights,
             topk_ids,
             is_sequence_parallel=self.is_sequence_parallel,
-            extra_tensors=scales,
+            extra_tensors=extra_tensors,
         )
 
-        if scales is None:
+        if extra_tensors is None:
+            assert len(res) == 3
             a1q, topk_weights, topk_ids = res
             a1q_scale = None
         else:
-            a1q, topk_weights, topk_ids, scales = res
-            a1q_scale = _unwrap_scale_and_prepare_for_moe(scales, quant_config)
+            assert len(res) == 4
+            a1q, topk_weights, topk_ids, gathered_extras = res
+            gathered_extras = list(gathered_extras)
+            if local_token_lora_mapping is not None:
+                dispatched_lora_mapping = gathered_extras.pop()
+                assert lora_ctx is not None
+                lora_ctx.local_token_lora_mapping = dispatched_lora_mapping
+            if scales is not None:
+                a1q_scale = _unwrap_scale_and_prepare_for_moe(
+                    gathered_extras, quant_config
+                )
+            else:
+                a1q_scale = None
 
         return a1q, a1q_scale, None, topk_ids, topk_weights
 
@@ -217,9 +260,11 @@ def prepare(
         )
 
         if scales is None:
+            assert len(res) == 2
             a1q, router_logits = res
             a1q_scale = None
         else:
+            assert len(res) == 3
             a1q, router_logits, scales = res
             a1q_scale = _unwrap_scale_and_prepare_for_moe(scales, quant_config)
 
diff --git a/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/nixl_ep.py
similarity index 94%
rename from vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py
rename to vllm/model_executor/layers/fused_moe/prepare_finalize/nixl_ep.py
index dbc54e2c9def..977d4556f132 100644
--- a/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/nixl_ep.py
@@ -7,6 +7,8 @@
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import envs
+from vllm.distributed import get_ep_group
+from vllm.distributed.device_communicators.all2all import NixlEPAll2AllManager
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
@@ -123,7 +125,6 @@ def post_init_setup(self, fused_experts: mk.FusedMoEExperts):
                 "NixlEPPrepareAndFinalize is setup to dispatch raw/unquantized "
                 f"activations despite ({fused_experts.__class__.__name__}) being able "
                 "to support quantized activations.",
-                scope="local",
             )
 
     def num_dispatchers(self) -> int:
@@ -139,6 +140,17 @@ def activation_format(self) -> mk.FusedMoEActivationFormat:
     def max_num_tokens_per_rank(self) -> int | None:
         return self.max_tokens_per_rank
 
+    def on_commit(self) -> None:
+        device_communicator = get_ep_group().device_communicator
+        assert device_communicator is not None
+        all2all_manager = device_communicator.all2all_manager
+        assert isinstance(all2all_manager, NixlEPAll2AllManager)
+        # maybe_make_prepare_finalize(..., eep_stage=True) initializes self.buffer
+        # with get_handle(..., stage=True), which stages global NIXL state for the
+        # new config but leaves it inactive while the old config remains active.
+        # When EEP commit switches to this P/F, this P/F needs to commit that state.
+        all2all_manager.commit_staged_state()
+
     def topk_indices_dtype(self) -> torch.dtype | None:
         return torch.int64
 
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/no_dp_ep.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/no_dp_ep.py
index b9d57da08326..82079127251c 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/no_dp_ep.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/no_dp_ep.py
@@ -30,7 +30,8 @@ def _quantize_input(
         quant_dtype=quant_config.quant_dtype,
         per_act_token_quant=quant_config.per_act_token_quant,
         block_shape=quant_config.block_shape,
-        is_fp4_scale_swizzled=quant_config.is_nvfp4_scale_swizzled,
+        is_scale_swizzled=quant_config.is_scale_swizzled,
+        mx_alignment=quant_config.mx_alignment,
     )
 
     return a1q, a1q_scale
diff --git a/vllm/model_executor/layers/fused_moe/routed_experts_capturer.py b/vllm/model_executor/layers/fused_moe/routed_experts_capturer.py
index b061b3d38b8d..115f43a58bfd 100644
--- a/vllm/model_executor/layers/fused_moe/routed_experts_capturer.py
+++ b/vllm/model_executor/layers/fused_moe/routed_experts_capturer.py
@@ -5,170 +5,135 @@
 
 from __future__ import annotations
 
-import fcntl
 import logging
-import os
-import tempfile
-from collections.abc import Generator
-from contextlib import contextmanager
-from multiprocessing import shared_memory
-from unittest.mock import patch
 
 import numpy as np
 import torch
 
 from vllm.config import VllmConfig
-from vllm.distributed import get_tensor_model_parallel_rank
+from vllm.distributed.parallel_state import get_tp_group
 from vllm.forward_context import get_forward_context
 from vllm.platforms import current_platform
+from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheConfig
 
 logger = logging.getLogger(__name__)
 
-# Constants
-_TMP_DIR = tempfile.gettempdir()
-_LOCK_FILE_PREFIX = os.path.join(_TMP_DIR, "vllm_routed_experts")
-_BUFFER_PREFIX = "vllm_routed_experts_buffer"
-
-# Global singleton instances
-_global_experts_capturer: RoutedExpertsCapturer | None = None
-_global_experts_reader: RoutedExpertsReader | None = None
-
-
-@contextmanager
-def _file_lock(lock_file: str, mode: str = "wb+") -> Generator[None, None, None]:
-    """Context manager for file-based locking."""
-    with open(lock_file, mode) as fp:
-        fcntl.flock(fp, fcntl.LOCK_EX)
-        try:
-            yield
-        finally:
-            fcntl.flock(fp, fcntl.LOCK_UN)
-
-
-def _create_or_attach_shared_memory(
-    name: str, size: int, lock_file: str
-) -> shared_memory.SharedMemory:
-    """Create or attach to shared memory with proper locking."""
-    # Ensure lock file exists before acquiring lock
-    with open(lock_file, "wb"):
-        pass
-
-    with _file_lock(lock_file):
-        try:
-            shm = shared_memory.SharedMemory(name=name, create=True, size=size)
-        except FileExistsError:
-            shm = shared_memory.SharedMemory(name=name, create=False, size=size)
-
-        if shm.size != size:
-            logger.warning(
-                "Shared memory %s size mismatch; recreating",
-                name,
-            )
-            shm.close()
-            shm.unlink()
-            try:
-                shm = shared_memory.SharedMemory(name=name, create=True, size=size)
-                logger.info("Created shared memory %s", name)
-            except FileExistsError:
-                shm = shared_memory.SharedMemory(name=name, create=False, size=size)
-                logger.info("Linked to existing shared memory %s", name)
-
-    return shm
 
+def _get_num_experts_per_tok(hf_config) -> int:
+    """Resolve the per-token expert count from the HF config.
 
-class RoutedExpertsCapturer:
-    """
-    Capturer for routed experts with device and optional shared memory buffer.
-
-    This class captures expert routing decisions during model forward passes
-    and optionally stores them in shared memory for cross-process access.
+    Different model families store this under different attribute names
+    (e.g. ``num_experts_per_tok`` for DeepSeek, ``top_k_experts`` for Gemma 4).
     """
+    val = getattr(hf_config, "num_experts_per_tok", None)
+    if val is None:
+        val = getattr(hf_config, "top_k_experts", None)
+    if val is None:
+        raise ValueError(
+            "Cannot determine num_experts_per_tok: HF config has neither "
+            "'num_experts_per_tok' nor 'top_k_experts'"
+        )
+    return val
 
-    _instance: RoutedExpertsCapturer | None = None
 
-    def __init__(self) -> None:
-        self._device_buffer: torch.Tensor | None = None
-        self._shm: shared_memory.SharedMemory | None = None
-        self._host_buffer_view: np.ndarray | None = None
-        self._lock_file: str | None = None
+def get_num_experts(hf_config) -> int:
+    """Resolve ``num_experts`` across HuggingFace config naming conventions.
 
-    @classmethod
-    def create(cls) -> RoutedExpertsCapturer:
-        """Create a global singleton instance."""
-        global _global_experts_capturer
-        if _global_experts_capturer is not None:
-            raise RuntimeError("Experts capturer already created.")
+    Different MoE model families expose this under different keys:
+      - ``num_experts``: Mixtral, Qwen2-MoE, Qwen3-MoE
+      - ``n_routed_experts``: DeepSeek-V2/V3
+      - ``num_local_experts``: Mixtral (older exports)
+    """
+    for key in ("num_experts", "n_routed_experts", "num_local_experts"):
+        val = getattr(hf_config, key, None)
+        if val is not None:
+            return val
+    raise ValueError(
+        "Could not resolve num_experts from model config. "
+        "Expected one of 'num_experts', 'n_routed_experts', "
+        "or 'num_local_experts'."
+    )
 
-        _global_experts_capturer = cls()
-        return _global_experts_capturer
 
-    @staticmethod
-    def get_instance() -> RoutedExpertsCapturer | None:
-        """Get the global singleton instance."""
-        return _global_experts_capturer
+class RoutedExpertsCapturer:
+    """Worker-side capturer for routed experts, lives on GPU.
+
+    Layer-level hooks call :meth:`capture` from inside the forward pass
+    with the per-layer ``topk_ids`` tensor. The tensor is sliced to the
+    tokens owned by this DP rank and written into a preallocated device
+    buffer. At the end of the step, :class:`GPUModelRunner` reads the
+    device buffer, issues a D2H copy into a pinned CPU buffer, and hands
+    the result to the scheduler via :class:`RoutedExpertsLists`.
+
+    The device / pinned-CPU transit buffers use ``torch.int32`` (not a
+    narrow ``uint8``/``uint16`` sized by ``num_experts``). This keeps the
+    SP all-gather path free of dtype casts, matches the router's native
+    ``topk_ids`` indices dtype more closely, and costs only a few MB per
+    worker (``max_num_batched_tokens * num_layers * top_k * 4`` bytes).
+    The scheduler-side slot buffer
+    (``RoutedExpertsManager.routed_experts_by_slot``) still uses the
+    narrow dtype -- numpy fancy-index assignment in ``store_batch``
+    narrows the data on the way in.
+
+    Invariants:
+        - One instance per worker; shape is fixed at init and covers the
+          worst-case step (``max_num_batched_tokens`` tokens).
+        - :meth:`clear_buffer` is called at the start of every step, so
+          unused slots stay zero.
+        - ``device_buffer.dtype`` is ``torch.int32``.
+    """
 
-    def init_buffer(
+    def __init__(
         self,
         max_num_batched_tokens: int,
-        max_num_kv_tokens: int,
         vllm_config: VllmConfig,
     ) -> None:
-        """
-        Initialize the device buffer and optionally shared memory buffer.
-
-        Args:
-            max_num_batched_tokens: Maximum number of tokens in a batch.
-            max_num_kv_tokens: Maximum number of KV tokens for shared memory.
-            vllm_config: vllm configuration containing layer and expert info.
-        """
-
-        if self._device_buffer is not None:
-            raise RuntimeError("Device buffer has already been initialized")
-
         hf_config = vllm_config.model_config.hf_text_config
-        num_layers = hf_config.num_hidden_layers
-        num_experts_per_tok = hf_config.num_experts_per_tok
-
-        # Initialize device buffer
-        self._device_buffer = torch.zeros(
-            (max_num_batched_tokens, num_layers, num_experts_per_tok),
+        num_experts_per_tok = _get_num_experts_per_tok(hf_config)
+        self.device_buffer = torch.zeros(
+            (
+                max_num_batched_tokens,
+                hf_config.num_hidden_layers,
+                num_experts_per_tok,
+            ),
+            # Use int32 for the device / host transit buffers: it
+            # matches the router's native topk_ids dtype, is universally
+            # supported by NCCL (uint8/uint16 are version-dependent),
+            # and the extra bytes are small (few MB per worker). The
+            # big scheduler-side slot buffer stays narrow.
             dtype=torch.int32,
             device=current_platform.device_type,
         )
         self.dp_rank = vllm_config.parallel_config.data_parallel_rank
-
-        if get_tensor_model_parallel_rank() != 0:
-            return
-
-        # Initialize shared memory
-        shape = (max_num_kv_tokens, num_layers, num_experts_per_tok)
-        buffer_size = int(np.prod(shape)) * np.dtype(np.int32).itemsize
-        instance_id = vllm_config.instance_id
-        self._lock_file = f"{_LOCK_FILE_PREFIX}_{instance_id}_{self.dp_rank}.lock"
-        shm_name = f"{_BUFFER_PREFIX}_{instance_id}_{self.dp_rank}"
-
-        self._shm = _create_or_attach_shared_memory(
-            shm_name, buffer_size, self._lock_file
-        )
-        self._host_buffer_view = np.ndarray(shape, dtype=np.int32, buffer=self._shm.buf)
-        self._host_buffer_view.fill(0)
-
-        logger.debug(
-            "Created shared memory buffer '%s' with shape %s",
-            shm_name,
-            shape,
-        )
+        self.tp_size = vllm_config.parallel_config.tensor_parallel_size
 
     def capture(self, layer_id: int, topk_ids: torch.Tensor) -> None:
-        """
-        Capture expert routing decisions for a specific layer.
+        """Capture expert routing decisions for a specific layer.
+
+        Under data parallelism, ``topk_ids`` may have three different batch
+        layouts depending on where the DP combine happens and whether
+        Sequence Parallelism (SP) is active for the MoE layer:
+          - ``n == total`` (naive dispatch): all DP ranks' tokens are
+            concatenated before routing; we slice out this rank's span
+            using the cumulative per-rank counts.
+          - ``n == token_num_per_dp`` (modular-kernel path): DP combine
+            happens inside ``quant_method.apply``; ``select_experts`` only
+            ever sees this rank's tokens, so we take the whole tensor.
+          - ``n == ceil(token_num_per_dp / tp_size)`` (SP + modular-kernel
+            path): tokens were split along dim=0 across the TP group by
+            ``_sequence_parallel_context``
+            (``moe_runner_base.py:_sequence_parallel_context``), so each
+            TP rank only sees its shard. We all-gather along dim=0 to
+            reconstruct this DP rank's full routing tensor. SP pads with
+            ceil-div (see ``_compute_sp_num_tokens`` in
+            ``forward_context.py``), so the gathered tensor may contain a
+            few trailing padding rows which are trimmed by the downstream
+            ``[:token_num_per_dp]`` slice.
 
         Args:
             layer_id: The layer index.
             topk_ids: Tensor of shape (batch_size, num_routed_experts).
         """
-        if self._device_buffer is None:
-            raise RuntimeError("Buffer not initialized. Call init_buffer() first.")
 
         ctx = get_forward_context()
         if ctx.dp_metadata is None:  # single dp
@@ -176,162 +141,209 @@ def capture(self, layer_id: int, topk_ids: torch.Tensor) -> None:
             end_loc = topk_ids.shape[0]
             token_num_per_dp = topk_ids.shape[0]
         else:  # multi dp
-            token_num_per_dp = ctx.dp_metadata.num_tokens_across_dp_cpu[self.dp_rank]
-            cumsum = torch.cumsum(ctx.dp_metadata.num_tokens_across_dp_cpu, dim=0)
-            assert cumsum[-1] == topk_ids.shape[0]
-            end_loc = cumsum[self.dp_rank]
-            start_loc = end_loc - token_num_per_dp
-
-        if layer_id >= self._device_buffer.shape[1]:
+            num_tokens_dp = ctx.dp_metadata.num_tokens_across_dp_cpu
+            token_num_per_dp = int(num_tokens_dp[self.dp_rank].item())
+            total = int(num_tokens_dp.sum().item())
+            n = topk_ids.shape[0]
+
+            if n == total:
+                # Naive dispatch: all DP ranks' tokens concatenated
+                # before routing. This rank owns tokens
+                # [end_loc - token_num_per_dp, end_loc).
+                cumsum = torch.cumsum(num_tokens_dp, dim=0)
+                end_loc = int(cumsum[self.dp_rank].item())
+                start_loc = end_loc - token_num_per_dp
+            elif n == token_num_per_dp:
+                # Modular-kernel path: DP combine happens inside
+                # quant_method.apply; select_experts only sees this
+                # rank's tokens, take the whole tensor.
+                start_loc = 0
+                end_loc = token_num_per_dp
+            elif (
+                self.tp_size > 1
+                and n != token_num_per_dp
+                and n == (token_num_per_dp + self.tp_size - 1) // self.tp_size
+            ):
+                # SP + modular-kernel path. All-gather across the TP
+                # group along dim=0 to reconstruct the full per-DP-rank
+                # tensor; keep only the first ``token_num_per_dp`` rows
+                # (trailing rows are SP ceil-div padding). The TP group
+                # is always initialized on real rollout workers, and
+                # every rank in the group reaches this branch in
+                # lockstep (bind is per-FusedMoE layer, SP is a global
+                # condition), so a bare all_gather here will not
+                # deadlock -- let it raise if the precondition is
+                # violated rather than skip silently.
+                #
+                # ``topk_ids`` is already whatever the router produced
+                # (typically int32/int64, both supported by NCCL); the
+                # downstream ``device_buffer[...] = topk_ids[...]``
+                # setitem narrows into int32 automatically.
+                topk_ids = get_tp_group().all_gather(topk_ids, dim=0)
+                start_loc = 0
+                end_loc = token_num_per_dp
+            else:
+                sp_expected = (
+                    (token_num_per_dp + self.tp_size - 1) // self.tp_size
+                    if self.tp_size > 0
+                    else -1
+                )
+                raise AssertionError(
+                    "RoutedExpertsCapturer: unexpected topk_ids batch "
+                    f"dim {n} (expected {total}, {token_num_per_dp}, "
+                    f"or {sp_expected} for dp_rank={self.dp_rank}, "
+                    f"tp_size={self.tp_size})"
+                )
+
+        # Defensive: model may expose more layers than the capture buffer
+        # was sized for (unusual, but guards against miss-config).
+        if layer_id >= self.device_buffer.shape[1]:
             return
 
-        self._device_buffer[:token_num_per_dp, layer_id, :] = topk_ids[
+        self.device_buffer[:token_num_per_dp, layer_id, :] = topk_ids[
             start_loc:end_loc, :
         ]
 
     def clear_buffer(self) -> None:
-        """Clear the device buffer."""
-        if self._device_buffer is not None:
-            self._device_buffer.zero_()
-
-    def save_captured_experts(self, indices: np.ndarray) -> None:
+        """Zero the device buffer. Called at the start of every step so
+        slots belonging to finished / preempted tokens don't leak into
+        the next step.
         """
-        Save captured experts from device buffer to shared memory.
+        self.device_buffer.zero_()
 
-        Args:
-            indices: Array of indices indicating where to store the data.
+    def get_device_buffer(self) -> torch.Tensor:
+        """Return the underlying device buffer so the model runner can
+        issue the D2H copy. The tensor is shared; callers must either
+        clone or fully drain it before the next forward pass runs
+        :meth:`clear_buffer`.
         """
-        if get_tensor_model_parallel_rank() != 0:
-            return
-        if self._lock_file is None:
-            raise RuntimeError("Shared memory not initialized.")
-        if self._host_buffer_view is None:
-            return
-        if self._device_buffer is None:
-            raise RuntimeError("Device buffer not initialized.")
-
-        num_tokens = len(indices)
-        data = self._device_buffer[:num_tokens, :, :].cpu().numpy()
-
-        with _file_lock(self._lock_file):
-            self._host_buffer_view[indices, :, :] = data
-
-    def cleanup(self) -> None:
-        """Explicitly clean up shared memory resources."""
-        if self._shm is not None:
-            try:
-                self._shm.close()
-                self._shm.unlink()
-            except Exception:
-                logger.debug("Exception during cleanup for capturer", exc_info=True)
-            finally:
-                self._shm = None
-
-    def __del__(self) -> None:
-        """Clean up shared memory on destruction."""
-        self.cleanup()
-
-
-class RoutedExpertsReader:
-    """
-    Reader for routed experts from shared memory.
-
-    This class attaches to shared memory created by RoutedExpertsCapturer
-    and reads expert routing decisions.
+        return self.device_buffer
+
+
+class RoutedExpertsManager:
+    """Scheduler-side slot-indexed buffer for routed experts.
+
+    Lives on CPU in the scheduler process. Each slot corresponds to
+    ``block_id * block_size + offset_in_block`` where ``block_id`` is
+    drawn from the physical KV-cache block pool, so routing data is
+    tied to physical blocks and naturally survives preemption for
+    prefix-cached blocks (prefix hits re-expose the same slots).
+
+    Data flow per step:
+      1. Worker D2Hs its device capture buffer into
+         :class:`RoutedExpertsLists` and returns it via
+         :class:`ModelRunnerOutput`.
+      2. Scheduler calls :meth:`store_batch` with that step's
+         ``(routing_data, slot_mapping)`` — a single CPU->CPU
+         fancy-index assign, ~few MB per step.
+      3. On request completion / abort / preemption, the scheduler
+         calls :meth:`get` with the request's block IDs to recover
+         the full per-token routing.
+
+    Memory: ``routed_experts_by_slot`` is sized for the whole block
+    pool (``num_blocks * block_size`` slots). For large block pools
+    this can reach multiple GB; see the init log for the exact size.
     """
 
-    _instance: RoutedExpertsReader | None = None
-
-    def __init__(self) -> None:
-        self._shm: shared_memory.SharedMemory | None = None
-        self._host_buffer_view: np.ndarray | None = None
-        self._lock_file: str | None = None
-
-    @classmethod
-    def create(cls) -> RoutedExpertsReader:
-        """Create a global singleton instance."""
-        global _global_experts_reader
-        if _global_experts_reader is not None:
-            raise RuntimeError("Experts reader already created.")
-
-        _global_experts_reader = cls()
-        return _global_experts_reader
-
-    @staticmethod
-    def get_instance() -> RoutedExpertsReader | None:
-        """Get the global singleton instance."""
-        if _global_experts_reader is None:
-            logger.info("Experts reader not initialized.")
-        return _global_experts_reader
-
-    def attach_buffer(
+    def __init__(
         self,
-        max_num_kv_tokens: int,
         vllm_config: VllmConfig,
+        kv_cache_config: KVCacheConfig,
     ) -> None:
-        """
-        Attach to an existing shared memory buffer.
-
-        Args:
-            max_num_kv_tokens: Maximum number of KV tokens.
-            vllm_config: vllm configuration.
-        """
-        if self._shm is not None:
-            logger.warning("Already attached to shared memory buffer.")
-            return  # Already attached
+        # Pick the attention group for block/slot mapping. We require
+        # a FullAttentionSpec group rather than any AttentionSpec to
+        # stay consistent with the worker-side lookup in
+        # ``GPUModelRunner._get_attention_kv_cache_gid``; hybrid models
+        # (Mamba / linear attention) also have other AttentionSpec
+        # groups whose slot layout differs.
+        self.attn_gid = next(
+            gid
+            for gid, g in enumerate(kv_cache_config.kv_cache_groups)
+            if isinstance(g.kv_cache_spec, FullAttentionSpec)
+        )
+        attn_group = kv_cache_config.kv_cache_groups[self.attn_gid]
+        self.block_size = attn_group.kv_cache_spec.block_size
 
+        # All kv_cache_groups share the same physical block pool, so
+        # block IDs span [0, num_blocks) regardless of how many groups
+        # exist. Sizing to the full pool avoids index-out-of-range
+        # when different groups happen to land on the same block.
         hf_config = vllm_config.model_config.hf_text_config
-        shape = (
-            max_num_kv_tokens,
+        num_experts = get_num_experts(hf_config)
+        num_experts_per_tok = _get_num_experts_per_tok(hf_config)
+        max_num_slots = kv_cache_config.num_blocks * self.block_size
+        # Expert IDs are 0..num_experts-1; uint8 fits 256 distinct
+        # values so the boundary is ``<= 256`` (NOT ``< 256``). Keeping
+        # this narrow matters because the slot buffer is sized for the
+        # whole block pool and can reach multiple GB.
+        expert_id_dtype = np.uint8 if num_experts <= 256 else np.uint16
+        self.routed_experts_by_slot = np.zeros(
+            (
+                max_num_slots,
+                hf_config.num_hidden_layers,
+                num_experts_per_tok,
+            ),
+            dtype=expert_id_dtype,
+        )
+        logger.info(
+            "RoutedExpertsManager CPU buffer: %.2f GB "
+            "(slots=%d, layers=%d, top_k=%d, dtype=%s)",
+            self.routed_experts_by_slot.nbytes / 1e9,
+            max_num_slots,
             hf_config.num_hidden_layers,
             hf_config.num_experts_per_tok,
+            self.routed_experts_by_slot.dtype.name,
         )
 
-        self.dp_rank = vllm_config.parallel_config.data_parallel_rank
-        instance_id = vllm_config.instance_id
-        self._lock_file = f"{_LOCK_FILE_PREFIX}_{instance_id}_{self.dp_rank}.lock"
-        shm_name = f"{_BUFFER_PREFIX}_{instance_id}_{self.dp_rank}"
-
-        with _file_lock(self._lock_file, mode="rb+"):
-            # Avoid resource_tracker registering the shared memory
-            with patch(
-                "multiprocessing.resource_tracker.register",
-                lambda *args, **kwargs: None,
-            ):
-                self._shm = shared_memory.SharedMemory(name=shm_name)
-
-            self._host_buffer_view = np.ndarray(
-                shape, dtype=np.int32, buffer=self._shm.buf
-            )
+    def store_batch(self, data: np.ndarray, slot_mapping: np.ndarray) -> None:
+        """Persist one step's routed experts into the slot buffer.
 
-    def get_routed_experts(self, indices: np.ndarray) -> np.ndarray:
+        Equivalent to ``slot_buffer[slot_mapping] = data``; numpy fancy
+        indexing handles repeated / out-of-order indices. Called once
+        per scheduler step in ``update_from_output``.
         """
-        Read routed expert data from shared memory.
+        self.routed_experts_by_slot[slot_mapping] = data
+
+    def get(
+        self,
+        block_ids: list[int],
+        num_tokens: int,
+        token_start: int = 0,
+    ) -> np.ndarray:
+        """Read routed experts data for a completed / preempted request.
+
+        Reconstructs a per-token slot_mapping from the request's block
+        IDs and returns the routing slice. Because numpy fancy indexing
+        returns a **copy** (not a view), the returned ndarray is safe
+        to hold across subsequent :meth:`store_batch` calls — do not
+        replace the fancy index with a slice without re-verifying.
 
         Args:
-            indices: Array of indices to read.
+            block_ids: Block IDs from the attention KV-cache group.
+            num_tokens: Number of tokens that have gone through a forward
+                pass and therefore have routing data written to their
+                slots (typically ``request.num_tokens - 1``; the last
+                sampled token has not been forwarded yet). Slots beyond
+                ``request.num_computed_tokens`` are zero-initialized.
+            token_start: Skip the first ``token_start`` tokens from the
+                result. The slot_mapping is sliced before the fancy-index
+                read, so only the requested slots are fetched — no large
+                intermediate array is allocated. Clamped to
+                ``[0, num_tokens]`` automatically.
 
         Returns:
-            Copy of the expert routing data for the given indices.
+            Array of shape (num_tokens - token_start, num_layers,
+            num_experts_per_tok).
         """
-        if self._host_buffer_view is None:
-            raise RuntimeError("Buffer not attached. Call attach_buffer() first.")
-        if self._lock_file is None:
-            raise RuntimeError("Lock file not initialized.")
-
-        with _file_lock(self._lock_file, mode="rb+"):
-            return self._host_buffer_view[indices, :, :].copy()
-
-    def cleanup(self) -> None:
-        """Explicitly clean up resources (close without unlink)."""
-        if self._shm is not None:
-            try:
-                self._shm.close()
-            except Exception:
-                logger.debug("Exception during cleanup for reader", exc_info=True)
-            finally:
-                self._shm = None
-
-    def __del__(self) -> None:
-        """Close shared memory on destruction (do not unlink)."""
-        self.cleanup()
+        bs = self.block_size
+        block_ids_array = np.array(block_ids, dtype=np.int32)
+        block_offsets = np.arange(bs)
+        # slot = block_id * block_size + offset_in_block; flatten the
+        # (num_blocks, block_size) grid and trim to num_tokens, then
+        # skip the first token_start entries so only the requested
+        # range is fetched in a single fancy-index read.
+        slot_mapping = (
+            block_ids_array.reshape(-1, 1) * bs + block_offsets.reshape(1, -1)
+        ).flatten()[:num_tokens]
+        slot_mapping = slot_mapping[token_start:]
+        return self.routed_experts_by_slot[slot_mapping]
diff --git a/vllm/model_executor/layers/fused_moe/router/aiter_shared_routed_fused_moe_router.py b/vllm/model_executor/layers/fused_moe/router/aiter_shared_routed_fused_moe_router.py
new file mode 100644
index 000000000000..8c17ac4d011b
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/router/aiter_shared_routed_fused_moe_router.py
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+
+import torch
+
+from vllm._aiter_ops import rocm_aiter_ops
+from vllm.distributed.eplb.eplb_state import EplbLayerState
+from vllm.model_executor.layers.fused_moe.config import (
+    RoutingMethodType,
+    get_routing_method_type,
+)
+from vllm.model_executor.layers.fused_moe.router.base_router import BaseRouter
+from vllm.model_executor.layers.fused_moe.router.fused_topk_router import (
+    dispatch_topk_softmax_func,
+)
+
+
+class AiterSharedRoutedFusedMoERouter(BaseRouter):
+    """
+    ROCm AITER router for models with fused shared experts (e.g. Qwen3-MoE).
+
+    When the AITER topk_softmax kernel supports sigmoid fusion, the routing
+    softmax and shared-expert sigmoid are computed in a single kernel launch.
+    Otherwise the shared-expert weights are injected into the pre-allocated
+    AITER buffer via a fallback path.
+
+    Only instantiated when rocm_aiter fused-MoE is active and
+    num_fused_shared_experts > 0.
+    """
+
+    def __init__(
+        self,
+        top_k: int,
+        global_num_experts: int,
+        num_fused_shared_experts: int,
+        eplb_state: EplbLayerState | None = None,
+        scoring_func: str = "softmax",
+        renormalize: bool = True,
+        indices_type_getter: Callable[[], torch.dtype | None] | None = None,
+    ):
+        super().__init__(
+            top_k=top_k,
+            global_num_experts=global_num_experts,
+            eplb_state=eplb_state,
+            indices_type_getter=indices_type_getter,
+        )
+        self.renormalize = renormalize
+        self.scoring_func = scoring_func
+        self.num_fused_shared_experts = num_fused_shared_experts
+
+    @property
+    def routing_method_type(self) -> RoutingMethodType:
+        return get_routing_method_type(
+            scoring_func=self.scoring_func,
+            top_k=self.top_k,
+            renormalize=self.renormalize,
+            num_expert_group=None,
+            has_e_score_bias=False,
+        )
+
+    def _compute_routing(
+        self,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+        indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        assert hidden_states.size(0) == router_logits.size(0), (
+            "Number of tokens mismatch"
+        )
+
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+            aiter_topK_meta_data,
+        )
+
+        M = hidden_states.size(0)
+        topk = self.top_k
+        num_fse = self.num_fused_shared_experts
+
+        token_expert_indices = torch.empty(
+            M, topk, dtype=torch.int32, device=hidden_states.device
+        )
+
+        if rocm_aiter_ops.fuse_sigmoid_in_kernel(aiter_topK_meta_data):
+            total_topk_weights, total_topk_ids = aiter_topK_meta_data  # type: ignore[misc]
+            total_topk_weights_slice = total_topk_weights[:M]
+            topk_ids_slice = total_topk_ids[:M, :topk]
+
+            topk_func = dispatch_topk_softmax_func(use_rocm_aiter=True)
+            topk_func(
+                total_topk_weights_slice,
+                topk_ids_slice,
+                token_expert_indices,
+                router_logits,
+                self.renormalize,
+                num_fse,
+                "sigmoid",
+            )
+            return total_topk_weights_slice, total_topk_ids[:M]
+
+        routing_logits = router_logits[:, :-num_fse]
+        shared_logits = router_logits[:, -num_fse:]
+
+        topk_weights = torch.empty(
+            M, topk, dtype=torch.float32, device=hidden_states.device
+        )
+        topk_ids = torch.empty(
+            M,
+            topk,
+            dtype=torch.int32 if indices_type is None else indices_type,
+            device=hidden_states.device,
+        )
+
+        topk_func = dispatch_topk_softmax_func(
+            use_rocm_aiter=rocm_aiter_ops.is_fused_moe_enabled()
+        )
+        topk_weights, topk_ids = topk_func(
+            topk_weights,
+            topk_ids,
+            token_expert_indices,
+            routing_logits,
+            self.renormalize,
+        )
+
+        if aiter_topK_meta_data is not None:
+            from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
+                inject_shared_expert_weights,
+            )
+
+            shared_weights = torch.sigmoid(shared_logits)
+            topk_weights, topk_ids = inject_shared_expert_weights(
+                topk_weights,
+                topk_ids,
+                topk=topk,
+                num_fused_shared_experts=num_fse,
+                shared_expert_weights=shared_weights,
+            )
+
+        return topk_weights, topk_ids
diff --git a/vllm/model_executor/layers/fused_moe/router/base_router.py b/vllm/model_executor/layers/fused_moe/router/base_router.py
index 6332827d1d09..3bc83e0648e8 100644
--- a/vllm/model_executor/layers/fused_moe/router/base_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/base_router.py
@@ -10,61 +10,52 @@
     FusedMoERouter,
 )
 from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
 
 if current_platform.is_cuda_alike():
 
-    @torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
-    def eplb_map_to_physical_and_record(
-        topk_ids: torch.Tensor,
-        expert_load_view: torch.Tensor,
-        logical_to_physical_map: torch.Tensor,
-        logical_replica_count: torch.Tensor,
-    ) -> torch.Tensor:
-        """
-        Map the logical expert ids to physical expert ids
-        and record the expert load metrics.
-
-        This will select a pseudo-random replica for each logical expert.
-        Only used for EPLB.
-
-        Args:
-            topk_ids: The logical expert ids.
-            expert_load_view: The expert load view.
-            logical_to_physical_map: The logical to physical map.
-            logical_replica_count: The logical replica count.
+    @triton.jit
+    def _eplb_map_and_record_i32_kernel(
+        topk_ids_ptr,
+        logical_replica_count_ptr,
+        logical_to_physical_ptr,
+        out_ids_ptr,
+        out_ptr,
+        record_enabled_ptr,
+        num_logical_experts,
+        map_slots,
+        out_size,
+        numel,
+        num_active_experts,
+        BLOCK_SIZE: tl.constexpr,
+    ):
+        pid = tl.program_id(0)
+        offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+        mask = offs < numel
 
-        Returns:
-            The physical expert ids.
-        """
+        expert_id = tl.load(topk_ids_ptr + offs, mask=mask, other=0).to(tl.int64)
+        valid_expert = (expert_id >= 0) & (expert_id < num_logical_experts)
+        safe_expert_id = tl.where(valid_expert, expert_id, 0)
 
         # 1. Convert the logical expert ids to physical expert ids
-        # Directly select a random replica for each logical expert
-
-        # In case `indices_type` is not `torch.long` or `torch.int`,
-        # e.g. `torch.uint32` as required by dispatch/combine kernels
-        topk_ids_long = topk_ids.long()
-        # Use (token position) modulo (replica count)
-        # to deterministically choose a replica
-        replica_count = logical_replica_count[topk_ids_long]
-        # Flatten-position based index, reshaped back to `topk_ids` shape
-        pos_indices = torch.arange(
-            topk_ids.numel(), device=topk_ids.device, dtype=torch.long
-        ).reshape_as(topk_ids)
-        # Compute pseudo-random indices by modulo
-        replica_indices = (pos_indices % replica_count).unsqueeze(-1)
-        physical_ids = (
-            logical_to_physical_map[topk_ids_long]
-            .gather(-1, replica_indices)
-            .squeeze(-1)
+        replica_count = tl.load(
+            logical_replica_count_ptr + safe_expert_id,
+            mask=mask & valid_expert,
+            other=1,
         )
-
-        topk_ids = physical_ids
+        # Avoid invalid modulo/div by forcing at least 1.
+        replica_count = tl.maximum(replica_count, 1)
+        # floor(2^32 / phi), classic Knuth multiplicative hash multiplier.
+        KNUTH_MULTIPLIER = 2654435769
+        token_idx = (offs // num_active_experts).to(tl.int64)
+        hashed = (token_idx * KNUTH_MULTIPLIER) & 0xFFFFFFFF
+        replica_idx = hashed % replica_count
 
         # 2. Record expert load metrics.
 
         # TODO(bowen): When using `FusedMoEModularKernel`, this
         # can be done in a more unified way, since
-        # `FusedMoEPrepareAndFinalizeModular` will return the expert
+        # `FusedMoEPrepareAndFinalize` will return the expert
         # token count, in some cases directly from the kernel.
         # However, now there are many code paths not using
         # the modular kernel, e.g. calling `fused_experts`,
@@ -73,17 +64,65 @@ def eplb_map_to_physical_and_record(
         # If later refactor moved all the MoE kernel calls
         # to the modular kernel, we can move this logic there
         # to achieve better efficiency.
+        map_index = safe_expert_id * map_slots + replica_idx
+        physical_id = tl.load(
+            logical_to_physical_ptr + map_index,
+            mask=mask & valid_expert,
+            other=-1,
+        )
+        tl.store(out_ids_ptr + offs, physical_id, mask=mask)
 
-        # `expert_load_view`: (num_physical_experts,)
+        record_enabled = tl.load(record_enabled_ptr) != 0
+        valid = mask & record_enabled & (physical_id >= 0) & (physical_id < out_size)
+        safe_physical_id = tl.where(physical_id >= 0, physical_id, 0)
+        tl.atomic_add(out_ptr + safe_physical_id, 1, mask=valid)
 
-        # `torch.bincount` is not compilable, so use `scatter_add_` instead.
-        topk_ids_flatten = topk_ids.flatten()
-        expert_load_view.scatter_add_(
-            dim=0,
-            index=topk_ids_flatten.long(),
-            src=torch.ones_like(topk_ids_flatten).to(expert_load_view),
+    def _eplb_map_and_record_triton(
+        topk_ids: torch.Tensor,
+        logical_to_physical_map: torch.Tensor,
+        logical_replica_count: torch.Tensor,
+        expert_load_view: torch.Tensor,
+        record_enabled: torch.Tensor,
+    ) -> torch.Tensor:
+        topk_ids_in = topk_ids.contiguous().to(dtype=torch.int32)
+        numel = topk_ids_in.numel()
+        if numel == 0:
+            return topk_ids
+        num_active_experts = topk_ids_in.shape[-1]
+        out_flat = torch.empty((numel,), device=topk_ids.device, dtype=topk_ids.dtype)
+        grid = lambda meta: (triton.cdiv(numel, meta["BLOCK_SIZE"]),)
+        assert expert_load_view.is_contiguous()
+        _eplb_map_and_record_i32_kernel[grid](
+            topk_ids_in,
+            logical_replica_count.contiguous(),
+            logical_to_physical_map.contiguous(),
+            out_flat,
+            expert_load_view,
+            record_enabled,
+            logical_replica_count.shape[0],
+            logical_to_physical_map.shape[1],
+            expert_load_view.shape[0],
+            numel,
+            num_active_experts,
+            BLOCK_SIZE=256,
+        )
+        return out_flat.reshape(topk_ids.shape)
+
+    def eplb_map_to_physical_and_record(
+        topk_ids: torch.Tensor,
+        expert_load_view: torch.Tensor,
+        logical_to_physical_map: torch.Tensor,
+        logical_replica_count: torch.Tensor,
+        record_enabled: torch.Tensor,
+    ) -> torch.Tensor:
+        # Fused triton implementation: mapping + optional recording in one kernel.
+        return _eplb_map_and_record_triton(
+            topk_ids=topk_ids,
+            logical_to_physical_map=logical_to_physical_map,
+            logical_replica_count=logical_replica_count,
+            expert_load_view=expert_load_view,
+            record_enabled=record_enabled,
         )
-        return topk_ids
 else:
 
     def eplb_map_to_physical_and_record(
@@ -91,8 +130,8 @@ def eplb_map_to_physical_and_record(
         expert_load_view: torch.Tensor,
         logical_to_physical_map: torch.Tensor,
         logical_replica_count: torch.Tensor,
+        record_enabled: torch.Tensor,
     ) -> torch.Tensor:
-        # CPU fallback: no EPLB so just return as is
         return topk_ids
 
 
@@ -109,8 +148,7 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
-        enable_eplb: bool = False,
+        eplb_state: EplbLayerState | None = None,
         # TODO(bnell): Once the MK is constructed at layer init time, we
         # can make this a plain value instead of a callback.
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
@@ -120,12 +158,17 @@ def __init__(
         time, so we need to supply a callback to get it at runtime.  This is
         because the indices type is supplied by modular kernels which are
         created after MoE layer/router construction.
+
+        Args:
+            top_k: Number of experts to select per token
+            global_num_experts: Total number of experts
+            eplb_state: Optional EPLBLayerState for load balancing
+            indices_type_getter: Optional callback to get indices dtype
         """
         super().__init__()
         self.top_k = top_k
         self.global_num_experts = global_num_experts
         self.eplb_state = eplb_state
-        self.enable_eplb = enable_eplb
         self.indices_type_getter = indices_type_getter
         self.capture_fn: Callable[[torch.Tensor], None] | None = None
 
@@ -135,17 +178,16 @@ def set_capture_fn(self, capture_fn: Callable[[torch.Tensor], None] | None) -> N
 
     def _validate_eplb_state(self) -> None:
         """Validate that EPLB state is properly initialized if EPLB is enabled."""
-        if self.enable_eplb:
-            if self.eplb_state.expert_load_view is None:
-                raise ValueError("enable_eplb=True requires expert_load_view != None")
-            if self.eplb_state.logical_to_physical_map is None:
-                raise ValueError(
-                    "enable_eplb=True requires logical_to_physical_map != None"
-                )
-            if self.eplb_state.logical_replica_count is None:
-                raise ValueError(
-                    "enable_eplb=True requires logical_replica_count != None"
-                )
+        if self.eplb_state is not None:
+            eplb_state = self.eplb_state
+            if eplb_state.expert_load_view is None:
+                raise ValueError("EPLB requires expert_load_view != None")
+            if eplb_state.logical_to_physical_map is None:
+                raise ValueError("EPLB requires logical_to_physical_map != None")
+            if eplb_state.logical_replica_count is None:
+                raise ValueError("EPLB requires logical_replica_count != None")
+            if eplb_state.should_record_tensor is None:
+                raise ValueError("EPLB requires should_record_tensor != None")
 
     def _get_indices_type(self) -> torch.dtype | None:
         """Get the desired indices dtype from the getter function."""
@@ -155,15 +197,18 @@ def _get_indices_type(self) -> torch.dtype | None:
 
     def _apply_eplb_mapping(self, topk_ids: torch.Tensor) -> torch.Tensor:
         """Apply EPLB mapping to convert logical expert IDs to physical expert IDs."""
-        if self.enable_eplb:
-            assert self.eplb_state.expert_load_view is not None
-            assert self.eplb_state.logical_to_physical_map is not None
-            assert self.eplb_state.logical_replica_count is not None
+        if self.eplb_state is not None:
+            eplb_state = self.eplb_state
+            assert eplb_state.expert_load_view is not None
+            assert eplb_state.logical_to_physical_map is not None
+            assert eplb_state.logical_replica_count is not None
+            assert eplb_state.should_record_tensor is not None
             return eplb_map_to_physical_and_record(
                 topk_ids=topk_ids,
-                expert_load_view=self.eplb_state.expert_load_view,
-                logical_to_physical_map=self.eplb_state.logical_to_physical_map,
-                logical_replica_count=self.eplb_state.logical_replica_count,
+                logical_to_physical_map=eplb_state.logical_to_physical_map,
+                logical_replica_count=eplb_state.logical_replica_count,
+                expert_load_view=eplb_state.expert_load_view,
+                record_enabled=eplb_state.should_record_tensor,
             )
         return topk_ids
 
@@ -183,6 +228,8 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Compute the actual routing logic.
@@ -204,6 +251,8 @@ def select_experts(
         self,
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Route the input hidden states to the top-k experts based on the
@@ -233,7 +282,7 @@ def select_experts(
 
         # Step 3: Compute routing (delegated to subclass)
         topk_weights, topk_ids = self._compute_routing(
-            hidden_states, router_logits, indices_type
+            hidden_states, router_logits, indices_type, input_ids=input_ids
         )
 
         # Capture logical ids before EPLB mapping.
diff --git a/vllm/model_executor/layers/fused_moe/router/custom_routing_router.py b/vllm/model_executor/layers/fused_moe/router/custom_routing_router.py
index 0367189ca1ab..731afffd15f8 100644
--- a/vllm/model_executor/layers/fused_moe/router/custom_routing_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/custom_routing_router.py
@@ -16,17 +16,15 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
         custom_routing_function: Callable,
+        eplb_state: EplbLayerState | None = None,
         renormalize: bool = True,
-        enable_eplb: bool = False,
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
     ):
         super().__init__(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
         self.custom_routing_function = custom_routing_function
@@ -34,11 +32,15 @@ def __init__(
 
     @property
     def routing_method_type(self) -> RoutingMethodType:
+        from vllm.model_executor.models.cohere2_moe import token_choice_with_bias
         from vllm.model_executor.models.llama4 import Llama4MoE
 
         # NOTE: FLASHINFER_TRTLLM support the Llama4 router.
         if self.custom_routing_function == Llama4MoE.custom_routing_function:
             return RoutingMethodType.Llama4
+        # Cohere MoE uses a sigmoid -> top-k -> renormalize routing function.
+        if self.custom_routing_function == token_choice_with_bias:
+            return RoutingMethodType.SigmoidRenorm
         return RoutingMethodType.Custom
 
     def _compute_routing(
@@ -46,6 +48,8 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Compute routing using the custom routing function."""
         topk_weights, topk_ids = self.custom_routing_function(
diff --git a/vllm/model_executor/layers/fused_moe/router/fused_moe_router.py b/vllm/model_executor/layers/fused_moe/router/fused_moe_router.py
index c322a8cd4cd6..d82085254f9b 100644
--- a/vllm/model_executor/layers/fused_moe/router/fused_moe_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/fused_moe_router.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from abc import ABC, abstractmethod
+from collections.abc import Callable
 
 import torch
 
@@ -13,6 +14,13 @@ class FusedMoERouter(ABC):
     method that is used for routing hidden states based on router logits.
     """
 
+    @abstractmethod
+    def set_capture_fn(
+        self,
+        capture_fn: Callable[[torch.Tensor], None] | None,
+    ) -> None:
+        raise NotImplementedError
+
     @property
     @abstractmethod
     def routing_method_type(self) -> RoutingMethodType:
@@ -23,6 +31,8 @@ def select_experts(
         self,
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Route the input hidden states to the top-k experts based on the
diff --git a/vllm/model_executor/layers/fused_moe/router/fused_topk_bias_router.py b/vllm/model_executor/layers/fused_moe/router/fused_topk_bias_router.py
index bcabb1f3672b..b9cb132ce84d 100644
--- a/vllm/model_executor/layers/fused_moe/router/fused_topk_bias_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/fused_topk_bias_router.py
@@ -4,6 +4,7 @@
 from collections.abc import Callable
 
 import torch
+import torch.nn.functional as F
 
 import vllm._custom_ops as ops
 import vllm.envs as envs
@@ -23,6 +24,7 @@ def vllm_topk_softmax(
     gating_output: torch.Tensor,
     renormalize: bool = False,
     e_score_correction_bias: torch.Tensor | None = None,
+    enable_pdl: bool = False,
 ) -> tuple[torch.Tensor, ...]:
     ops.topk_softmax(
         topk_weights,
@@ -31,6 +33,7 @@ def vllm_topk_softmax(
         gating_output,
         renormalize,
         e_score_correction_bias,
+        enable_pdl,
     )
 
     return topk_weights, topk_indices
@@ -43,6 +46,7 @@ def vllm_topk_sigmoid(
     gating_output: torch.Tensor,
     renormalize: bool = False,
     e_score_correction_bias: torch.Tensor | None = None,
+    enable_pdl: bool = False,
 ) -> tuple[torch.Tensor, ...]:
     ops.topk_sigmoid(
         topk_weights,
@@ -51,6 +55,94 @@ def vllm_topk_sigmoid(
         gating_output,
         renormalize,
         e_score_correction_bias,
+        enable_pdl,
+    )
+
+    return topk_weights, topk_indices
+
+
+def _topk_softplus_sqrt_torch(
+    topk_weights: torch.Tensor,
+    topk_indices: torch.Tensor,
+    token_expert_indices: torch.Tensor,
+    gating_output: torch.Tensor,
+    renormalize: bool = False,
+    e_score_correction_bias: torch.Tensor | None = None,
+    input_tokens: torch.Tensor | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+    routed_scaling_factor: float = 1.0,
+) -> tuple[torch.Tensor, ...]:
+    """Pure PyTorch fallback for topk_softplus_sqrt (XPU/CPU)."""
+    # scores = sqrt(softplus(gating_output))
+    scores = torch.sqrt(F.softplus(gating_output.float()))
+
+    # Bias is used for expert SELECTION only, not for weight computation.
+    # Using biased scores as weights flattens the distribution when the bias
+    # is near-uniform (e.g., DSv4-Flash where all biases ≈ 8.08).
+    if e_score_correction_bias is not None:
+        scores_for_choice = scores + e_score_correction_bias.float()
+    else:
+        scores_for_choice = scores
+
+    topk = topk_weights.shape[-1]
+
+    if hash_indices_table is not None and input_tokens is not None:
+        # Hash MoE: expert indices predetermined by lookup table
+        # hash_indices_table: [vocab_size, topk] mapping token_id -> expert_ids
+        expert_ids = hash_indices_table[input_tokens.long()]  # [M, topk]
+        topk_indices.copy_(expert_ids)
+        # Gather weights from unbiased scores
+        weights = scores.gather(1, expert_ids.long())
+    else:
+        # Standard topk selection using biased scores
+        _, indices = torch.topk(scores_for_choice, k=topk, dim=-1)
+        topk_indices.copy_(indices)
+        # Gather weights from unbiased scores
+        weights = scores.gather(1, indices)
+
+    if renormalize:
+        weights = weights / (weights.sum(dim=-1, keepdim=True).clamp(min=1e-20))
+
+    topk_weights.copy_(weights * routed_scaling_factor)
+    return topk_weights, topk_indices
+
+
+def vllm_topk_softplus_sqrt(
+    topk_weights: torch.Tensor,
+    topk_indices: torch.Tensor,
+    token_expert_indices: torch.Tensor,
+    gating_output: torch.Tensor,
+    renormalize: bool = False,
+    e_score_correction_bias: torch.Tensor | None = None,
+    input_tokens: torch.Tensor | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+    routed_scaling_factor: float = 1.0,
+) -> tuple[torch.Tensor, ...]:
+    from vllm.platforms import current_platform
+
+    if current_platform.is_xpu():
+        return _topk_softplus_sqrt_torch(
+            topk_weights,
+            topk_indices,
+            token_expert_indices,
+            gating_output,
+            renormalize,
+            e_score_correction_bias,
+            input_tokens,
+            hash_indices_table,
+            routed_scaling_factor,
+        )
+
+    ops.topk_hash_softplus_sqrt(
+        topk_weights,
+        topk_indices,
+        token_expert_indices,
+        gating_output,
+        renormalize,
+        routed_scaling_factor,
+        e_score_correction_bias,
+        input_tokens,
+        hash_indices_table,
     )
 
     return topk_weights, topk_indices
@@ -72,11 +164,15 @@ def _aiter_get_num_expert_group(num_experts: int) -> int:
 def fused_topk_bias(
     hidden_states: torch.Tensor,
     gating_output: torch.Tensor,
+    scoring_func: str,
     e_score_correction_bias: torch.Tensor,
     topk: int,
     renormalize: bool,
-    scoring_func: str = "softmax",
     indices_type: torch.dtype | None = None,
+    input_tokens: torch.Tensor | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+    routed_scaling_factor: float = 1.0,
+    enable_pdl: bool = False,
 ):
     if not rocm_aiter_ops.is_fused_moe_enabled():
         assert hidden_states.size(0) == gating_output.size(0), (
@@ -106,7 +202,10 @@ def fused_topk_bias(
                 gating_output,
                 renormalize,
                 e_score_correction_bias,
+                enable_pdl,
             )
+            if routed_scaling_factor != 1.0:
+                topk_weights *= routed_scaling_factor
             return topk_weights, topk_ids
         elif scoring_func == "sigmoid":
             topk_weights, topk_ids = vllm_topk_sigmoid(
@@ -116,10 +215,26 @@ def fused_topk_bias(
                 gating_output,
                 renormalize,
                 e_score_correction_bias,
+                enable_pdl,
             )
+            if routed_scaling_factor != 1.0:
+                topk_weights *= routed_scaling_factor
             return topk_weights, topk_ids
+        elif scoring_func == "sqrtsoftplus":
+            return vllm_topk_softplus_sqrt(
+                topk_weights,
+                topk_ids,
+                token_expert_indices,
+                gating_output,
+                renormalize,
+                e_score_correction_bias,
+                input_tokens,
+                hash_indices_table,
+                routed_scaling_factor,
+            )
         else:
             raise ValueError(f"Unsupported scoring function: {scoring_func}")
+
     elif rocm_aiter_ops.is_fused_moe_enabled() and scoring_func == "sigmoid":
         M = hidden_states.size(0)
         num_experts = gating_output.shape[-1]
@@ -136,13 +251,15 @@ def fused_topk_bias(
             )
             rocm_aiter_ops.biased_grouped_topk(
                 gating_output,
-                e_score_correction_bias.to(gating_output.dtype),
+                e_score_correction_bias,
                 topk_weights,
                 topk_ids,
                 num_expert_group=num_expert_group,
                 topk_group=num_expert_group,
                 need_renorm=renormalize,
             )
+            if routed_scaling_factor != 1.0:
+                topk_weights *= routed_scaling_factor
             return topk_weights, topk_ids
 
     n_routed_experts = gating_output.shape[-1]
@@ -150,20 +267,31 @@ def fused_topk_bias(
         scores = gating_output.softmax(dim=-1)
     elif scoring_func == "sigmoid":
         scores = gating_output.sigmoid()
+    elif scoring_func == "sqrtsoftplus":
+        scores = F.softplus(gating_output).sqrt()
     else:
         raise ValueError(f"Unsupported scoring function: {scoring_func}")
-
-    scores_for_choice = scores.view(
-        -1, n_routed_experts
-    ) + e_score_correction_bias.unsqueeze(0)
-
+    if e_score_correction_bias is not None:
+        scores_for_choice = scores.view(
+            -1, n_routed_experts
+        ) + e_score_correction_bias.unsqueeze(0)
+    else:
+        scores_for_choice = scores.view(-1, n_routed_experts)
     # For batch invariance, use sorted=True to ensure deterministic expert selection
-    use_sorted = envs.VLLM_BATCH_INVARIANT
-    topk_indices = torch.topk(scores_for_choice, k=topk, dim=-1, sorted=use_sorted)[1]
+    if hash_indices_table is not None:
+        topk_indices = hash_indices_table[input_tokens]
+    else:
+        use_sorted = envs.VLLM_BATCH_INVARIANT
+        topk_indices = torch.topk(scores_for_choice, k=topk, dim=-1, sorted=use_sorted)[
+            1
+        ]
     topk_weights = scores.gather(1, topk_indices)
     if renormalize:
         topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
-    return topk_weights.to(torch.float32), topk_indices.to(
+    topk_weights = topk_weights.to(torch.float32)
+    if routed_scaling_factor != 1.0:
+        topk_weights *= routed_scaling_factor
+    return topk_weights, topk_indices.to(
         torch.int32 if indices_type is None else indices_type
     )
 
@@ -175,25 +303,28 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
-        e_score_correction_bias: torch.Tensor,
-        scoring_func: str,
+        e_score_correction_bias: torch.Tensor | None = None,
         renormalize: bool = True,
         routed_scaling_factor: float = 1.0,
-        enable_eplb: bool = False,
+        eplb_state: EplbLayerState | None = None,
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
+        *,
+        scoring_func: str = "sigmoid",
+        hash_indices_table: torch.Tensor | None = None,
+        enable_pdl: bool = False,
     ):
         super().__init__(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
         self.e_score_correction_bias = e_score_correction_bias
         self.renormalize = renormalize
         self.scoring_func = scoring_func
         self.routed_scaling_factor = routed_scaling_factor
+        self._hash_indices_table = hash_indices_table
+        self.enable_pdl = enable_pdl
 
     @property
     def routing_method_type(self) -> RoutingMethodType:
@@ -210,19 +341,24 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Compute routing using fused top-k with bias."""
         topk_weights, topk_ids = fused_topk_bias(
             hidden_states=hidden_states,
             gating_output=router_logits,
-            e_score_correction_bias=self.e_score_correction_bias.data,
+            scoring_func=self.scoring_func,
+            e_score_correction_bias=self.e_score_correction_bias.data
+            if self.e_score_correction_bias is not None
+            else None,
             topk=self.top_k,
             renormalize=self.renormalize,
-            scoring_func=self.scoring_func,
             indices_type=indices_type,
+            input_tokens=input_ids,
+            hash_indices_table=self._hash_indices_table,
+            routed_scaling_factor=self.routed_scaling_factor,
+            enable_pdl=self.enable_pdl,
         )
 
-        if self.routed_scaling_factor != 1.0:
-            topk_weights *= self.routed_scaling_factor
-
         return topk_weights, topk_ids
diff --git a/vllm/model_executor/layers/fused_moe/router/fused_topk_router.py b/vllm/model_executor/layers/fused_moe/router/fused_topk_router.py
index 01376e6b16b5..b61ca8016161 100644
--- a/vllm/model_executor/layers/fused_moe/router/fused_topk_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/fused_topk_router.py
@@ -20,6 +20,7 @@ def vllm_topk_softmax(
     token_expert_indices: torch.Tensor,
     gating_output: torch.Tensor,
     renormalize: bool = False,
+    enable_pdl: bool = False,
 ) -> tuple[torch.Tensor, ...]:
     ops.topk_softmax(
         topk_weights,
@@ -27,6 +28,7 @@ def vllm_topk_softmax(
         token_expert_indices,
         gating_output,
         renormalize,
+        enable_pdl=enable_pdl,
     )
 
     return topk_weights, topk_indices
@@ -38,6 +40,7 @@ def vllm_topk_sigmoid(
     token_expert_indices: torch.Tensor,
     gating_output: torch.Tensor,
     renormalize: bool = False,
+    enable_pdl: bool = False,
 ) -> tuple[torch.Tensor, ...]:
     ops.topk_sigmoid(
         topk_weights,
@@ -45,6 +48,7 @@ def vllm_topk_sigmoid(
         token_expert_indices,
         gating_output,
         renormalize,
+        enable_pdl=enable_pdl,
     )
 
     return topk_weights, topk_indices
@@ -73,6 +77,7 @@ def fused_topk(
     renormalize: bool,
     indices_type: torch.dtype | None = None,
     scoring_func: str = "softmax",
+    enable_pdl: bool = False,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     assert hidden_states.size(0) == gating_output.size(0), "Number of tokens mismatch"
 
@@ -92,20 +97,30 @@ def fused_topk(
     )
 
     if scoring_func == "softmax":
-        topk_func = dispatch_topk_softmax_func(
-            use_rocm_aiter=rocm_aiter_ops.is_fused_moe_enabled()
-        )
+        use_rocm_aiter = rocm_aiter_ops.is_fused_moe_enabled()
+        topk_func = dispatch_topk_softmax_func(use_rocm_aiter=use_rocm_aiter)
+        pdl_kwargs = {} if use_rocm_aiter else {"enable_pdl": enable_pdl}
         topk_weights, topk_ids = topk_func(
-            topk_weights, topk_ids, token_expert_indices, gating_output, renormalize
+            topk_weights,
+            topk_ids,
+            token_expert_indices,
+            gating_output,
+            renormalize,
+            **pdl_kwargs,
         )
 
         return topk_weights, topk_ids, token_expert_indices
     elif scoring_func == "sigmoid":
-        topk_func = dispatch_topk_sigmoid_func(
-            use_rocm_aiter=rocm_aiter_ops.is_fused_moe_enabled()
-        )
+        use_rocm_aiter = rocm_aiter_ops.is_fused_moe_enabled()
+        topk_func = dispatch_topk_sigmoid_func(use_rocm_aiter=use_rocm_aiter)
+        pdl_kwargs = {} if use_rocm_aiter else {"enable_pdl": enable_pdl}
         topk_weights, topk_ids = topk_func(
-            topk_weights, topk_ids, token_expert_indices, gating_output, renormalize
+            topk_weights,
+            topk_ids,
+            token_expert_indices,
+            gating_output,
+            renormalize,
+            **pdl_kwargs,
         )
 
         return topk_weights, topk_ids, token_expert_indices
@@ -120,21 +135,21 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
         scoring_func: str = "softmax",
         renormalize: bool = True,
-        enable_eplb: bool = False,
+        eplb_state: EplbLayerState | None = None,
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
+        enable_pdl: bool = False,
     ):
         super().__init__(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
         self.renormalize = renormalize
         self.scoring_func = scoring_func
+        self.enable_pdl = enable_pdl
 
     @property
     def routing_method_type(self) -> RoutingMethodType:
@@ -151,6 +166,8 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Compute routing using standard fused top-k."""
         topk_weights, topk_ids, token_expert_indices = fused_topk(
@@ -160,6 +177,7 @@ def _compute_routing(
             renormalize=self.renormalize,
             indices_type=indices_type,
             scoring_func=self.scoring_func,
+            enable_pdl=self.enable_pdl,
         )
 
         return topk_weights, topk_ids
diff --git a/vllm/model_executor/layers/fused_moe/router/gate_linear.py b/vllm/model_executor/layers/fused_moe/router/gate_linear.py
index e8ed8a5249d1..0a57a6f4dfeb 100644
--- a/vllm/model_executor/layers/fused_moe/router/gate_linear.py
+++ b/vllm/model_executor/layers/fused_moe/router/gate_linear.py
@@ -12,11 +12,12 @@
 
 @PluggableLayer.register("gate_linear")
 class GateLinear(ReplicatedLinear):
-    """MoE gate linear layer with three-tier GEMM dispatch:
+    """MoE gate linear layer with multi-tier GEMM dispatch:
 
-    1. DSV3 specialized kernel (SM90+, batch<=16, supported dims)
-    2. gpt-oss specialized kernel (SM90+, batch<=128, supported dims)
-    3. cuBLAS bf16×bf16→fp32 (SM90+ + bf16 + fp32 out_dtype)
+    1. DSV3 specialized kernel (SM90+, fp32 out, M<=16, H=7168, E=256/384)
+    2. fp32 specialized kernel  (SM90+, bf16/fp32 in, fp32 out,
+       M<=32, H=3072, E=256)
+    3. cuBLAS bf16×bf16→fp32 (SM90+ + bf16 weight + fp32 out_dtype)
     4. F.linear via ReplicatedLinear (ultimate fallback)
 
     The ``out_dtype`` attribute is mutable and can be set after init
@@ -28,9 +29,10 @@ class GateLinear(ReplicatedLinear):
     DSV3_SUPPORTED_NUM_EXPERTS = [256, 384]
     DSV3_SUPPORTED_HIDDEN_SIZES = [7168]
 
-    # Dimensions supported by the gpt-oss specialized kernel
-    GPT_OSS_SUPPORTED_NUM_EXPERTS = [32, 128]
-    GPT_OSS_SUPPORTED_HIDDEN_SIZES = [2880]
+    # Dimensions supported by the fp32 specialized kernel
+    FP32_SUPPORTED_NUM_EXPERTS = [256]
+    FP32_SUPPORTED_HIDDEN_SIZES = [3072]
+    FP32_MAX_TOKENS = 32
 
     def __init__(
         self,
@@ -50,7 +52,7 @@ def __init__(
         )
 
         # If fp32 compute is required and no specialized kernel is available,
-        # store weights in fp32 so Tier 3 computes in fp32 natively.
+        # store weights in fp32 so the fallback linear path computes in fp32.
         if force_fp32_compute and not can_use_specialized_kernels:
             params_dtype = torch.float32
 
@@ -72,13 +74,14 @@ def __init__(
             and input_size in self.DSV3_SUPPORTED_HIDDEN_SIZES
         )
 
-        # gpt-oss specialized kernel eligibility (SM90+, exact dims)
-        self.allow_gpt_oss_router_gemm = (
-            self.weight.dtype == torch.bfloat16
+        # fp32 specialized kernel eligibility (SM90+, exact dims, fp32 weight)
+        self.allow_fp32_router_gemm = (
+            not bias
+            and self.weight.dtype == torch.float32
             and current_platform.is_cuda()
             and is_hopper_or_blackwell
-            and output_size in self.GPT_OSS_SUPPORTED_NUM_EXPERTS
-            and input_size in self.GPT_OSS_SUPPORTED_HIDDEN_SIZES
+            and output_size in self.FP32_SUPPORTED_NUM_EXPERTS
+            and input_size in self.FP32_SUPPORTED_HIDDEN_SIZES
         )
 
         # cuBLAS bf16→fp32 eligibility
@@ -117,14 +120,19 @@ def forward(
             )
             return output, None
 
-        # Tier 2: gpt-oss specialized kernel
-        if self.allow_gpt_oss_router_gemm:
-            output = torch.ops.vllm.gpt_oss_router_gemm(x, self.weight, self.bias)
+        # Tier 2: fp32 specialized kernel (H=3072, E=256, M<=32)
+        # Dispatch is wrapped in a custom op so that torch.compile/CUDA-graph
+        # capture does not freeze the runtime num_tokens branch.
+        if self.allow_fp32_router_gemm and x.dtype in (
+            torch.float32,
+            torch.bfloat16,
+        ):
+            output = torch.ops.vllm.fp32_router_gemm_dispatch(x, self.weight)
             return output, None
 
         # Tier 3: cuBLAS bf16→fp32
         if self.allow_cublas_router_gemm and x.dtype == torch.bfloat16:
-            output = ops.router_gemm_bf16_fp32(x, self.weight)
+            output = torch.mm(x, self.weight.T, out_dtype=torch.float32)
             return output, None
 
         # Tier 4: F.linear (ReplicatedLinear)
@@ -136,28 +144,32 @@ def forward(
         return output, output_bias
 
 
-def gpt_oss_router_gemm_impl(
-    x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
+_FP32_ROUTER_GEMM_MAX_TOKENS = GateLinear.FP32_MAX_TOKENS
+
+
+def fp32_router_gemm_dispatch_impl(
+    x: torch.Tensor, weight: torch.Tensor
 ) -> torch.Tensor:
     """
-    Dynamically run min-latency gemm if num_tokens <= 128.
+    Dynamically run fp32 specialized gemm if num_tokens <= FP32_MAX_TOKENS,
+    otherwise fall back to F.linear.
     This must be wrapped in a custom op because our torch.compile integration
     does not support runtime dispatching on num_tokens.
     """
-    if x.shape[0] <= 128:
-        return ops.gpt_oss_router_gemm(x, weight, bias)
+    if x.shape[0] <= _FP32_ROUTER_GEMM_MAX_TOKENS:
+        return ops.fp32_router_gemm(x, weight)
     else:
-        return torch.nn.functional.linear(x, weight, bias)
+        return torch.nn.functional.linear(x.float(), weight)
 
 
-def gpt_oss_router_gemm_fake(
-    x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
+def fp32_router_gemm_dispatch_fake(
+    x: torch.Tensor, weight: torch.Tensor
 ) -> torch.Tensor:
-    return x.new_empty((x.shape[0], weight.shape[0]))
+    return x.new_empty((x.shape[0], weight.shape[0]), dtype=torch.float32)
 
 
 direct_register_custom_op(
-    op_name="gpt_oss_router_gemm",
-    op_func=gpt_oss_router_gemm_impl,
-    fake_impl=gpt_oss_router_gemm_fake,
+    op_name="fp32_router_gemm_dispatch",
+    op_func=fp32_router_gemm_dispatch_impl,
+    fake_impl=fp32_router_gemm_dispatch_fake,
 )
diff --git a/vllm/model_executor/layers/fused_moe/router/grouped_topk_router.py b/vllm/model_executor/layers/fused_moe/router/grouped_topk_router.py
index 1bf141d81e4b..6f792b46a0aa 100644
--- a/vllm/model_executor/layers/fused_moe/router/grouped_topk_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/grouped_topk_router.py
@@ -14,7 +14,7 @@
     RoutingMethodType,
     get_routing_method_type,
 )
-from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
     rocm_aiter_grouped_topk,
 )
 from vllm.model_executor.layers.fused_moe.router.base_router import BaseRouter
@@ -251,7 +251,6 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
         num_expert_group: int,
         topk_group: int,
         renormalize: bool = True,
@@ -259,14 +258,13 @@ def __init__(
         routed_scaling_factor: float = 1.0,
         e_score_correction_bias: torch.Tensor | None = None,
         num_fused_shared_experts: int = 0,
-        enable_eplb: bool = False,
+        eplb_state: EplbLayerState | None = None,
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
     ):
         super().__init__(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
         self.num_expert_group = num_expert_group
@@ -292,6 +290,8 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Compute routing using grouped top-k."""
 
@@ -308,6 +308,7 @@ def valid_grouping() -> bool:
                 topk_weights, topk_ids = fused_topk_bias(
                     hidden_states=hidden_states,
                     gating_output=router_logits,
+                    scoring_func=self.scoring_func,
                     e_score_correction_bias=self.e_score_correction_bias.data,
                     topk=self.top_k,
                     renormalize=self.renormalize,
diff --git a/vllm/model_executor/layers/fused_moe/router/norm_gate_linear.py b/vllm/model_executor/layers/fused_moe/router/norm_gate_linear.py
new file mode 100644
index 000000000000..50f1ef7a0efe
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/router/norm_gate_linear.py
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Fused RMSNorm + GateLinear for DeepSeek V4 MoE routing."""
+
+import torch
+from torch import nn
+
+import vllm._custom_ops as ops
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.fused_moe.router.gate_linear import GateLinear
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.utils.torch_utils import direct_register_custom_op
+
+DSV4_PRO_NUM_EXPERTS = 384
+DSV4_PRO_HIDDEN_SIZE = 7168
+DSV4_PRO_MAX_NUM_TOKENS = 16
+
+
+def _dsv4_pro_norm_gate(
+    x: torch.Tensor,
+    norm_weight: torch.Tensor,
+    gate_weight: torch.Tensor,
+    rms_eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Runtime dispatcher: fused ``dsv4_norm_router_gemm`` (M<=16) vs the
+    unfused ``rms_norm + dsv3_router_gemm`` fallback (M>16).
+
+    """
+    if x.shape[0] <= DSV4_PRO_MAX_NUM_TOKENS:
+        return ops.dsv4_norm_router_gemm(x, norm_weight, gate_weight, rms_eps)
+
+    normed = torch.empty_like(x)
+    # Call `_C::rms_norm` here to avoid select the path of native rms
+    torch.ops._C.rms_norm(normed, x, norm_weight, rms_eps)
+    logits = torch.mm(normed, gate_weight.t(), out_dtype=torch.float32)
+    return normed, logits
+
+
+def _dsv4_pro_norm_gate_fake(
+    x: torch.Tensor,
+    norm_weight: torch.Tensor,
+    gate_weight: torch.Tensor,
+    rms_eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    num_tokens = x.shape[0]
+    num_experts = gate_weight.shape[0]
+    return (
+        torch.empty_like(x),
+        torch.empty(num_tokens, num_experts, dtype=torch.float32, device=x.device),
+    )
+
+
+direct_register_custom_op(
+    op_name="dsv4_pro_norm_gate",
+    op_func=_dsv4_pro_norm_gate,
+    mutates_args=[],
+    fake_impl=_dsv4_pro_norm_gate_fake,
+)
+
+
+@PluggableLayer.register("norm_gated_linear")
+class NormGateLinear(nn.Module):
+    """RMSNorm + GateLinear, fused on DSV4-Pro only."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_experts: int,
+        rms_eps: float = 1e-6,
+        params_dtype: torch.dtype | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.num_experts = num_experts
+        self.rms_eps = rms_eps
+
+        self.norm = RMSNorm(hidden_size, eps=rms_eps, dtype=params_dtype)
+        self.gate = GateLinear(
+            hidden_size,
+            num_experts,
+            bias=False,
+            out_dtype=torch.float32,  # DSV4 router output is fp32
+            params_dtype=params_dtype,
+            prefix=f"{prefix}.gate" if prefix else "gate",
+        )
+
+        self.e_score_correction_bias = None
+        self.tid2eid = None
+
+        self._fused_kernel_supported = (
+            hidden_size == DSV4_PRO_HIDDEN_SIZE
+            and num_experts == DSV4_PRO_NUM_EXPERTS
+            and self.gate.allow_dsv3_router_gemm  # cuda platform
+        )
+
+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        if self._fused_kernel_supported:
+            assert x.shape[1] == DSV4_PRO_HIDDEN_SIZE
+            assert self.gate.weight.shape == (
+                DSV4_PRO_NUM_EXPERTS,
+                DSV4_PRO_HIDDEN_SIZE,
+            )
+            # This must be wrapped in a custom op because our torch.compile integration
+            # does not support runtime dispatching on num_tokens.
+            return torch.ops.vllm.dsv4_pro_norm_gate(
+                x, self.norm.weight, self.gate.weight, self.rms_eps
+            )
+
+        # Non-Pro fallback (e.g. DSV4-Flash with hidden_size=4096):
+
+        normed_x = self.norm(x)
+        logits, _ = self.gate(normed_x)
+        return normed_x, logits
diff --git a/vllm/model_executor/layers/fused_moe/router/router_factory.py b/vllm/model_executor/layers/fused_moe/router/router_factory.py
index 11027e894bee..a78c5eef26e6 100644
--- a/vllm/model_executor/layers/fused_moe/router/router_factory.py
+++ b/vllm/model_executor/layers/fused_moe/router/router_factory.py
@@ -5,8 +5,14 @@
 import torch
 
 import vllm.envs as envs
+from vllm._aiter_ops import rocm_aiter_ops
 from vllm.distributed.eplb.eplb_state import EplbLayerState
-from vllm.model_executor.layers.fused_moe.config import RoutingMethodType
+from vllm.model_executor.layers.fused_moe.config import (
+    RoutingMethodType,
+)
+from vllm.model_executor.layers.fused_moe.router.aiter_shared_routed_fused_moe_router import (  # noqa: E501
+    AiterSharedRoutedFusedMoERouter,
+)
 from vllm.model_executor.layers.fused_moe.router.custom_routing_router import (
     CustomRoutingRouter,
 )
@@ -25,8 +31,9 @@
 from vllm.model_executor.layers.fused_moe.router.routing_simulator_router import (
     RoutingSimulatorRouter,
 )
-
-EMPTY_EPLB_STATE: EplbLayerState = EplbLayerState()
+from vllm.model_executor.layers.fused_moe.router.zero_expert_router import (
+    ZeroExpertRouter,
+)
 
 
 def create_fused_moe_router(
@@ -47,8 +54,13 @@ def create_fused_moe_router(
     # custom routing parameters
     custom_routing_function: Callable | None = None,
     # eplb parameters
-    enable_eplb: bool = False,
-    eplb_state: EplbLayerState = EMPTY_EPLB_STATE,
+    eplb_state: EplbLayerState | None = None,
+    # zero expert parameters
+    zero_expert_type: str | None = None,
+    num_logical_experts: int | None = None,
+    hash_indices_table: torch.Tensor | None = None,
+    # routing kernel parameters
+    enable_pdl: bool = False,
 ) -> FusedMoERouter:
     """
     Factory function to create the appropriate FusedMoERouter subclass based on
@@ -56,10 +68,12 @@ def create_fused_moe_router(
 
     The selection logic follows this priority order:
     1. RoutingSimulatorRouter - if VLLM_MOE_ROUTING_SIMULATION_STRATEGY env var is set
-    2. GroupedTopKRouter - if use_grouped_topk is True
-    3. CustomRoutingRouter - if custom_routing_function is not None
-    4. FusedTopKBiasRouter - if e_score_correction_bias is not None
-    5. FusedTopKRouter - default fallback
+    2. ZeroExpertRouter - if zero_expert_type is not None
+    3. GroupedTopKRouter - if use_grouped_topk is True
+    4. CustomRoutingRouter - if custom_routing_function is not None
+    5. FusedTopKBiasRouter - if e_score_correction_bias is not None
+    6. AiterSharedRoutedFusedMoERouter - if num_fused_shared_experts > 0
+    7. FusedTopKRouter - default fallback
 
     Common arguments:
         top_k: Number of experts to select per token
@@ -83,8 +97,21 @@ def create_fused_moe_router(
         custom_routing_function: Optional custom routing function
 
     EPLB arguments:
-        enable_eplb: Whether EPLB is enabled
-        eplb_state: EPLB (Expert Parallelism Load Balancing) state
+        eplb_state: Optional EplbLayerState, None when EPLB is disabled.
+
+    Zero expert arguments:
+        zero_expert_type: Type of zero expert (e.g. identity). If not None,
+            creates a ZeroExpertRouter.
+        num_logical_experts: Number of real (non-zero) experts. Required when
+            zero_expert_type is not None.
+
+    Hash Indices Table:
+        Used to map input_ids to experts, need for Deepseek V4
+
+    Routing kernel arguments:
+        enable_pdl: Whether CUDA fused top-k routing kernels should participate
+            in a Programmatic Dependent Launch chain. This is only used on
+            supported NVIDIA GPUs with CUDA >= 12.0 and SM90+.
 
     Returns:
         An instance of the appropriate FusedMoERouter subclass
@@ -96,7 +123,26 @@ def create_fused_moe_router(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
+            indices_type_getter=indices_type_getter,
+        )
+
+    if zero_expert_type is not None:
+        assert num_logical_experts is not None, (
+            "num_logical_experts is required when zero_expert_type is set"
+        )
+        assert e_score_correction_bias is not None, (
+            "e_score_correction_bias is required when zero_expert_type is set"
+        )
+        return ZeroExpertRouter(
+            top_k=top_k,
+            global_num_experts=global_num_experts,
+            eplb_state=eplb_state,
+            e_score_correction_bias=e_score_correction_bias,
+            num_logical_experts=num_logical_experts,
+            zero_expert_type=zero_expert_type,
+            scoring_func=scoring_func,
+            renormalize=renormalize,
+            routed_scaling_factor=routed_scaling_factor,
             indices_type_getter=indices_type_getter,
         )
 
@@ -118,7 +164,6 @@ def create_fused_moe_router(
             routed_scaling_factor=routed_scaling_factor,
             e_score_correction_bias=e_score_correction_bias,
             num_fused_shared_experts=num_fused_shared_experts,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
         if (
@@ -141,20 +186,37 @@ def create_fused_moe_router(
             eplb_state=eplb_state,
             custom_routing_function=custom_routing_function,
             renormalize=renormalize,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
 
-    if e_score_correction_bias is not None:
+    assert scoring_func in ["sigmoid", "softmax", "sqrtsoftplus"]
+
+    if e_score_correction_bias is not None or hash_indices_table is not None:
         return FusedTopKBiasRouter(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
             e_score_correction_bias=e_score_correction_bias,
-            scoring_func=scoring_func,
             renormalize=renormalize,
             routed_scaling_factor=routed_scaling_factor,
-            enable_eplb=enable_eplb,
+            indices_type_getter=indices_type_getter,
+            scoring_func=scoring_func,
+            hash_indices_table=hash_indices_table,
+            enable_pdl=enable_pdl,
+        )
+
+    if (
+        num_fused_shared_experts > 0
+        and scoring_func == "softmax"
+        and rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
+    ):
+        return AiterSharedRoutedFusedMoERouter(
+            top_k=top_k,
+            global_num_experts=global_num_experts,
+            eplb_state=eplb_state,
+            num_fused_shared_experts=num_fused_shared_experts,
+            renormalize=renormalize,
+            scoring_func=scoring_func,
             indices_type_getter=indices_type_getter,
         )
 
@@ -164,6 +226,6 @@ def create_fused_moe_router(
         eplb_state=eplb_state,
         renormalize=renormalize,
         scoring_func=scoring_func,
-        enable_eplb=enable_eplb,
         indices_type_getter=indices_type_getter,
+        enable_pdl=enable_pdl,
     )
diff --git a/vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py b/vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
index f8e46371841a..233dc82667c8 100644
--- a/vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
+++ b/vllm/model_executor/layers/fused_moe/router/routing_simulator_router.py
@@ -313,15 +313,13 @@ def __init__(
         self,
         top_k: int,
         global_num_experts: int,
-        eplb_state: EplbLayerState,
-        enable_eplb: bool = False,
+        eplb_state: EplbLayerState | None = None,
         indices_type_getter: Callable[[], torch.dtype | None] | None = None,
     ):
         super().__init__(
             top_k=top_k,
             global_num_experts=global_num_experts,
             eplb_state=eplb_state,
-            enable_eplb=enable_eplb,
             indices_type_getter=indices_type_getter,
         )
 
@@ -334,6 +332,8 @@ def _compute_routing(
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
         indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Use routing simulator to compute routing."""
         routing_strategy = envs.VLLM_MOE_ROUTING_SIMULATION_STRATEGY
diff --git a/vllm/model_executor/layers/fused_moe/router/zero_expert_router.py b/vllm/model_executor/layers/fused_moe/router/zero_expert_router.py
new file mode 100644
index 000000000000..54f0fa4fb0ac
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/router/zero_expert_router.py
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable
+
+import torch
+
+from vllm.distributed.eplb.eplb_state import EplbLayerState
+from vllm.model_executor.layers.fused_moe.config import (
+    RoutingMethodType,
+    get_routing_method_type,
+)
+from vllm.model_executor.layers.fused_moe.fused_moe import (
+    zero_experts_compute_triton,
+)
+from vllm.model_executor.layers.fused_moe.router.base_router import BaseRouter
+from vllm.model_executor.layers.fused_moe.router.fused_topk_bias_router import (
+    fused_topk_bias,
+)
+
+
+class ZeroExpertRouter(BaseRouter):
+    """Router that handles zero expert computation as part of routing.
+
+    Routes over all experts (real + zero) using full e_score_correction_bias.
+    Computes zero expert identity contributions as a side effect during routing.
+    Remaps zero expert IDs to real expert ID 0 (with weight 0) so downstream
+    MoE computation can ignore them.
+    """
+
+    def __init__(
+        self,
+        top_k: int,
+        global_num_experts: int,
+        e_score_correction_bias: torch.Tensor,
+        num_logical_experts: int,
+        zero_expert_type: str,
+        scoring_func: str = "softmax",
+        renormalize: bool = False,
+        routed_scaling_factor: float = 1.0,
+        eplb_state: EplbLayerState | None = None,
+        indices_type_getter: Callable[[], torch.dtype | None] | None = None,
+    ):
+        super().__init__(
+            top_k=top_k,
+            global_num_experts=global_num_experts,
+            eplb_state=eplb_state,
+            indices_type_getter=indices_type_getter,
+        )
+        self.e_score_correction_bias = e_score_correction_bias
+        self.num_logical_experts = num_logical_experts
+        self.zero_expert_type = zero_expert_type
+        self.scoring_func = scoring_func
+        self.renormalize = renormalize
+        self.routed_scaling_factor = routed_scaling_factor
+        self._zero_expert_output: torch.Tensor | None = None
+
+    @property
+    def routing_method_type(self) -> RoutingMethodType:
+        return get_routing_method_type(
+            scoring_func=self.scoring_func,
+            top_k=self.top_k,
+            renormalize=self.renormalize,
+            num_expert_group=None,
+            has_e_score_bias=True,
+        )
+
+    def _compute_routing(
+        self,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+        indices_type: torch.dtype | None,
+        *,
+        input_ids: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Compute routing with full bias, compute zero expert output,
+        mask zero expert IDs."""
+        topk_weights, topk_ids = fused_topk_bias(
+            hidden_states=hidden_states,
+            gating_output=router_logits,
+            e_score_correction_bias=self.e_score_correction_bias.data,
+            topk=self.top_k,
+            renormalize=self.renormalize,
+            scoring_func=self.scoring_func,
+            indices_type=indices_type,
+        )
+
+        if self.routed_scaling_factor != 1.0:
+            topk_weights *= self.routed_scaling_factor
+
+        # Compute zero expert output using pre-EPLB topk_ids/weights.
+        # zero_experts_compute_triton modifies its inputs in-place, so
+        # pass clones.
+        self._zero_expert_output = zero_experts_compute_triton(
+            expert_indices=topk_ids.clone(),
+            expert_scales=topk_weights.clone(),
+            num_experts=self.num_logical_experts,
+            zero_expert_type=self.zero_expert_type,
+            hidden_states=hidden_states,
+        )
+
+        # Mask zero expert entries: remap zero expert IDs to 0 with weight 0
+        # so downstream MoE computation ignores them.
+        zero_mask = topk_ids >= self.num_logical_experts
+        topk_ids[zero_mask] = 0
+        topk_weights[zero_mask] = 0.0
+
+        return topk_weights, topk_ids
+
+    @property
+    def zero_expert_output(self) -> torch.Tensor | None:
+        """Retrieve and clear the zero expert output."""
+        output = self._zero_expert_output
+        self._zero_expert_output = None
+        return output
diff --git a/vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py b/vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
deleted file mode 100644
index a09273fc8049..000000000000
--- a/vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
+++ /dev/null
@@ -1,826 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections.abc import Callable
-from contextlib import nullcontext
-from typing import TYPE_CHECKING
-
-import torch
-import torch.nn.functional as F
-
-import vllm.envs as envs
-from vllm.distributed import (
-    get_ep_group,
-    get_pcp_group,
-    tensor_model_parallel_all_reduce,
-)
-from vllm.forward_context import (
-    ForwardContext,
-    get_forward_context,
-    is_forward_context_available,
-)
-from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe.config import (
-    FusedMoEConfig,
-)
-from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
-    FusedMoEMethodBase,
-)
-from vllm.model_executor.layers.fused_moe.router.fused_moe_router import (
-    FusedMoERouter,
-)
-from vllm.model_executor.layers.fused_moe.runner.moe_runner import MoERunner
-from vllm.platforms import current_platform
-from vllm.utils.math_utils import cdiv
-from vllm.utils.torch_utils import (
-    HAS_OPAQUE_TYPE,
-    ModuleName,
-    aux_stream,
-    current_stream,
-    direct_register_custom_op,
-)
-from vllm.v1.worker.ubatching import dbo_current_ubatch_id
-
-logger = init_logger(__name__)
-
-
-def get_layer_from_name(layer_name: str) -> torch.nn.Module:
-    forward_context: ForwardContext = get_forward_context()
-    if layer_name == "from_forward_context":
-        all_moe_layers = forward_context.all_moe_layers
-        assert all_moe_layers is not None
-        moe_layer_index = forward_context.moe_layer_index
-        if moe_layer_index >= len(all_moe_layers):
-            raise AssertionError(
-                "We expected the number of MOE layers in `all_moe_layers` "
-                "to be equal to the number of "
-                "{vllm.moe_forward, vllm.moe_forward_shared} calls."
-            )
-        layer_name = all_moe_layers[moe_layer_index]
-        forward_context.moe_layer_index += 1
-    return forward_context.no_compile_layers[layer_name]
-
-
-# On torch >= 2.11, layer_name is a hoisted ModuleName opaque object;
-# on older versions it remains a plain str.
-if TYPE_CHECKING:
-    from typing import TypeAlias
-
-    _layer_name_type: TypeAlias = str | ModuleName
-else:
-    _layer_name_type = ModuleName if HAS_OPAQUE_TYPE else str
-
-
-def _resolve_layer_name(layer_name: str | ModuleName) -> str:
-    return layer_name.value if isinstance(layer_name, ModuleName) else layer_name
-
-
-def _moe_forward(
-    hidden_states: torch.Tensor,
-    router_logits: torch.Tensor,
-    shared_experts_input: torch.Tensor | None,
-    layer_name: _layer_name_type,
-) -> torch.Tensor:
-    layer = get_layer_from_name(_resolve_layer_name(layer_name))
-    # TODO(bnell): this can be removed after MK migration is complete.
-    layer.ensure_moe_quant_config_init()
-    runner = layer.runner
-    with runner._sequence_parallel_context():
-        if runner.use_dp_chunking:
-            return runner.forward_impl_chunked(
-                layer,
-                hidden_states,
-                router_logits,
-                shared_experts_input,
-            )
-        else:
-            return runner.forward_impl(
-                layer,
-                hidden_states,
-                router_logits,
-                shared_experts_input,
-            )
-
-
-def _moe_forward_fake(
-    hidden_states: torch.Tensor,
-    router_logits: torch.Tensor,
-    shared_experts_input: torch.Tensor | None,
-    layer_name: _layer_name_type,
-) -> torch.Tensor:
-    return torch.empty_like(hidden_states)
-
-
-def _moe_forward_shared(
-    hidden_states: torch.Tensor,
-    router_logits: torch.Tensor,
-    shared_experts_input: torch.Tensor | None,
-    layer_name: _layer_name_type,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    layer = get_layer_from_name(_resolve_layer_name(layer_name))
-    # TODO(bnell): this can be removed after MK migration is complete.
-    layer.ensure_moe_quant_config_init()
-    runner = layer.runner
-    with runner._sequence_parallel_context():
-        if runner.use_dp_chunking:
-            return runner.forward_impl_chunked(
-                layer,
-                hidden_states,
-                router_logits,
-                shared_experts_input,
-            )
-        else:
-            return runner.forward_impl(
-                layer,
-                hidden_states,
-                router_logits,
-                shared_experts_input,
-            )
-
-
-def _moe_forward_shared_fake(
-    hidden_states: torch.Tensor,
-    router_logits: torch.Tensor,
-    shared_experts_input: torch.Tensor | None,
-    layer_name: _layer_name_type,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    # Output shapes:
-    # - fused_out: same as hidden_states (routed experts use transformed size)
-    # - shared_out: same as shared_experts_input if provided, else same as
-    #               hidden_states
-    # (For latent MoE: shared experts use original hidden_size, not latent size)
-    fused_out = torch.empty_like(hidden_states)
-    if shared_experts_input is not None:
-        shared_out = torch.empty_like(shared_experts_input)
-    else:
-        shared_out = torch.empty_like(hidden_states)
-    return shared_out, fused_out
-
-
-direct_register_custom_op(
-    op_name="moe_forward",
-    op_func=_moe_forward,
-    mutates_args=["hidden_states"],
-    fake_impl=_moe_forward_fake,
-    tags=(torch.Tag.needs_fixed_stride_order,),
-)
-
-
-direct_register_custom_op(
-    op_name="moe_forward_shared",
-    op_func=_moe_forward_shared,
-    mutates_args=["hidden_states"],
-    fake_impl=_moe_forward_shared_fake,
-    tags=(torch.Tag.needs_fixed_stride_order,),
-)
-
-
-class DefaultMoERunner(MoERunner):
-    """
-    Default implementation of the MoE runner for executing Mixture of Experts layers.
-
-    This class provides a comprehensive implementation for running MoE computations
-    with support for:
-    - Expert routing and token dispatching
-    - Shared experts computation with optional parallel execution using CUDA streams
-    - Data parallel (DP) chunking for large batch processing
-    - Tensor model parallel and expert parallel operations
-    - Various quantization methods and custom operators
-    - Both monolithic and decomposed expert execution paths
-
-    The runner handles the complete MoE forward pass including routing tokens to
-    experts, executing expert computations, and combining results. It supports
-    advanced features like overlapped execution of shared experts and optimized
-    kernels for different parallel execution modes.
-
-    Eventually, this class will be split up and specialized for different
-    configurations, e.g. the presence or absence of shared experts, a gate, etc.
-    """
-
-    def __init__(
-        self,
-        layer: torch.nn.Module,
-        moe_config: FusedMoEConfig,
-        router: FusedMoERouter,
-        routed_input_transform: torch.nn.Module | None,
-        gate: torch.nn.Module | None,
-        shared_experts: torch.nn.Module | None,
-        quant_method: FusedMoEMethodBase,
-        reduce_results: bool,
-        enable_dbo: bool,
-    ):
-        super().__init__()
-        self.moe_config = moe_config
-        self.router = router
-        self.routed_input_transform = routed_input_transform
-        self.gate = gate
-        self.shared_experts = shared_experts
-        self.quant_method = quant_method
-        self.reduce_results = reduce_results
-        self.enable_dbo = enable_dbo
-
-        # Chunked all2all staging tensor
-        # TODO(bnell) rename these?
-        self.batched_hidden_states: torch.Tensor | None = None
-        self.batched_router_logits: torch.Tensor | None = None
-        self._maybe_init_dp_chunking()
-
-        # Allow disabling of the separate shared experts stream for
-        # debug purposes.
-        # TODO: Remove this after more extensive testings with TP/DP
-        # and other execution modes
-        self.use_shared_experts_stream = False
-        if envs.VLLM_DISABLE_SHARED_EXPERTS_STREAM:
-            logger.debug_once("Disabling MoE shared_experts cuda stream", scope="local")
-            self.shared_experts_stream = None
-        else:
-            # TODO(rob): enable shared expert overlap with non-cuda-alike.
-            # aux_stream() returns None on non-cuda-alike platforms.
-            self.shared_experts_stream = aux_stream()
-            if self.shared_experts_stream is not None:
-                logger.debug_once(
-                    "Enabled separate cuda stream for MoE shared_experts", scope="local"
-                )
-
-        # Needed for string -> FusedMoE layer lookup in custom ops.
-        self.layer_name = layer.layer_name
-
-        self.moe_forward = self._select_forward(layer)
-
-    def _select_forward(self, layer: torch.nn.Module) -> Callable:
-        if current_platform.is_tpu() or current_platform.is_cpu():
-            # TODO: Once the OOM issue for the TPU backend is resolved, we
-            # will switch to using the moe_forward custom op.
-            # Note: CPU doesn't require wrapped forward_impl.
-            return _moe_forward if self.shared_experts is None else _moe_forward_shared
-
-        return (
-            torch.ops.vllm.moe_forward
-            if self.shared_experts is None
-            else torch.ops.vllm.moe_forward_shared
-        )
-
-    @property
-    def use_dp_chunking(self) -> bool:
-        return (
-            self.moe_config.moe_parallel_config.use_deepep_ll_kernels
-            or self.moe_config.moe_parallel_config.use_mori_kernels
-            or self.moe_config.moe_parallel_config.use_fi_nvl_two_sided_kernels
-            or self.moe_config.moe_parallel_config.use_nixl_ep_kernels
-        ) and envs.VLLM_ENABLE_MOE_DP_CHUNK
-
-    def _maybe_setup_shared_experts_stream(
-        self,
-        hidden_states: torch.Tensor,
-        shared_input: torch.Tensor | None,
-    ):
-        if self.use_shared_experts_stream:
-            assert self.shared_experts_stream is not None
-            assert self.moe_config.disable_inplace
-
-            shared_experts_input = (
-                shared_input if shared_input is not None else hidden_states
-            )
-
-            # Record that the shared_experts_input will be used in the
-            # shared_experts_stream to avoid gc issue from
-            # deallocation. For more details:
-            # https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
-            # NOTE: We don't need shared_output.record_stream(current_stream())
-            # because we synch the streams before using shared_output.
-            shared_experts_input.record_stream(self.shared_experts_stream)
-
-            # Mark sync start point for the separate shared experts
-            # stream here since we want to run in parallel with the
-            # router/gate (next op below)
-            assert self.shared_experts_stream is not None
-            self.shared_experts_stream.wait_stream(current_stream())
-
-    def _maybe_init_dp_chunking(self):
-        if not self.use_dp_chunking:
-            return
-
-        assert self.batched_hidden_states is None
-        states_shape: tuple[int, ...]
-        logits_shape: tuple[int, ...]
-
-        moe = self.moe_config
-
-        if self.enable_dbo:
-            states_shape = (2, moe.max_num_tokens, self.moe_config.hidden_dim)
-            logits_shape = (2, moe.max_num_tokens, self.moe_config.num_logical_experts)
-        else:
-            states_shape = (moe.max_num_tokens, self.moe_config.hidden_dim)
-            logits_shape = (moe.max_num_tokens, self.moe_config.num_logical_experts)
-
-        device = torch.accelerator.current_device_index()
-        self.batched_hidden_states = torch.zeros(
-            states_shape,
-            dtype=moe.in_dtype,
-            device=device,
-        )
-
-        self.batched_router_logits = torch.zeros(
-            logits_shape,
-            dtype=moe.router_logits_dtype,
-            device=device,
-        )
-
-    @property
-    def has_separate_shared_experts(self) -> bool:
-        return (
-            not self.quant_method.mk_owns_shared_expert
-            and self.shared_experts is not None
-        )
-
-    def _apply_shared_experts(
-        self,
-        hidden_states: torch.Tensor,
-        allow_streaming: bool = False,
-    ) -> torch.Tensor | None:
-        shared_output: torch.Tensor | None = None
-        if self.has_separate_shared_experts:
-            assert self.shared_experts is not None
-
-            if self.use_shared_experts_stream and allow_streaming:
-                # Run shared experts in parallel on a separate stream
-                # NOTE: We start the separate stream here and mark the
-                # sync end point immediately after it is done. This is
-                # important to avoid excessive stream allocations by the cuda
-                # graph replay later.
-                with torch.cuda.stream(self.shared_experts_stream):
-                    # Note that hidden_states clone() is necessary here to avoid
-                    # conflict with the main stream
-                    shared_output = self.shared_experts(hidden_states)
-                current_stream().wait_stream(self.shared_experts_stream)
-            else:
-                shared_output = self.shared_experts(hidden_states)
-
-        return shared_output
-
-    def must_reduce_shared_expert_outputs(self) -> bool:
-        """
-        The shared_experts are typically computed using the RowParallelLinear
-        layer. The result of this function is typically used as
-        the reduce_results argument to the module.
-        When just tensor-parallel is used, it is not required to reduce
-        the shared_experts results immediately. Instead we reduce at the
-        once at the end of the MoE op. (Refer to DeepSeekV2MoE module)
-        With EP and all2all kernels - this is no longer viable as all
-        GPU ranks in DP, produce the complete set of hidden_states.
-        Therefore it is required that we reduce the shared_experts output
-        early.
-        """
-        return (
-            self.quant_method.moe_kernel is not None
-            and self.quant_method.moe_kernel.output_is_reduced()
-        )
-
-    def maybe_all_reduce_tensor_model_parallel(self, final_hidden_states: torch.Tensor):
-        """
-        Some combine kernels reduce across GPU ranks by default.
-        """
-        if self.must_reduce_shared_expert_outputs():
-            return final_hidden_states
-        else:
-            return tensor_model_parallel_all_reduce(final_hidden_states)
-
-    def apply_routed_input_transform(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        """Apply transform for routed experts (e.g., latent projection).
-
-        This is called by FusedMoE.forward_native. The original hidden_states
-        is saved separately so shared experts get [S, hidden_size] while
-        routed experts get the transformed [S, moe_latent_size].
-
-        TODO: For latent MoE bandwidth optimization, fc2_latent_proj could be
-        moved inside SharedFusedMoE to all-reduce on the smaller latent
-        dimension.
-        """
-        if self.routed_input_transform is not None:
-            result = self.routed_input_transform(hidden_states)
-            # ReplicatedLinear returns (output, extra_bias) tuple.
-            # We only need the output tensor; extra_bias is not used here.
-            if isinstance(result, tuple):
-                return result[0]
-            return result
-        return hidden_states
-
-    def _maybe_reduce_output(
-        self,
-        states: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
-        trunc_sizes: list[int],
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        def trunc(x: torch.Tensor, trunc_size: int) -> torch.Tensor:
-            return x[..., :trunc_size]
-
-        def reduce_and_trunc(x: torch.Tensor, trunc_size: int) -> torch.Tensor:
-            return trunc(self.maybe_all_reduce_tensor_model_parallel(x), trunc_size)
-
-        if (
-            not self.moe_config.is_sequence_parallel
-            and not self.use_dp_chunking
-            and self.reduce_results
-            and (self.moe_config.tp_size > 1 or self.moe_config.ep_size > 1)
-        ):
-            func = reduce_and_trunc
-        else:
-            func = trunc
-
-        if isinstance(states, tuple):
-            return tuple(
-                [func(s, trunc_size) for s, trunc_size in zip(states, trunc_sizes)]
-            )
-        else:
-            assert len(trunc_sizes) == 1
-            return func(states, trunc_sizes[0])
-
-    def _encode_layer_name(self) -> str | ModuleName:
-        if HAS_OPAQUE_TYPE:
-            return ModuleName(self.layer_name)
-        # Can be unavailable or None in unittests
-        if (
-            is_forward_context_available()
-            and get_forward_context().all_moe_layers is not None
-        ):
-            return "from_forward_context"
-        return self.layer_name
-
-    def _maybe_pad_hidden_states(
-        self,
-        original_hidden_states: torch.Tensor | None,
-        hidden_states: torch.Tensor,
-    ) -> tuple[torch.Tensor, list[int]]:
-        original_hidden_dim = (
-            original_hidden_states.shape[-1]
-            if original_hidden_states is not None
-            else 0
-        )
-        transformed_hidden_dim = hidden_states.shape[-1]
-        if (
-            not self.quant_method.skip_forward_padding
-            and self.moe_config.hidden_dim != transformed_hidden_dim
-        ):
-            hidden_states = F.pad(
-                hidden_states,
-                (0, self.moe_config.hidden_dim - transformed_hidden_dim),
-                mode="constant",
-                value=0.0,
-            )
-
-        if self.shared_experts is not None:
-            orig_hidden_dims = [original_hidden_dim, transformed_hidden_dim]
-        else:
-            orig_hidden_dims = [transformed_hidden_dim]
-
-        return hidden_states, orig_hidden_dims
-
-    def _apply_quant_method(
-        self,
-        layer: torch.nn.Module,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-        shared_input: torch.Tensor | None,
-        run_shared_experts_before: bool = True,
-    ) -> tuple[torch.Tensor | None, torch.Tensor]:
-        shared_input = shared_input if shared_input is not None else hidden_states
-        shared_output: torch.Tensor | None = None
-
-        # Run this before quant_method to avoid inplace issues.
-        if run_shared_experts_before:
-            shared_output = self._apply_shared_experts(shared_input, False)
-
-        if self.quant_method.is_monolithic:
-            result = self.quant_method.apply_monolithic(
-                layer=layer,
-                x=hidden_states,
-                router_logits=router_logits,
-            )
-        else:
-            topk_weights, topk_ids = self.router.select_experts(
-                hidden_states=hidden_states,
-                router_logits=router_logits,
-            )
-
-            result = self.quant_method.apply(
-                layer=layer,
-                x=hidden_states,
-                topk_weights=topk_weights,
-                topk_ids=topk_ids,
-                shared_experts_input=shared_input,
-            )
-
-        if isinstance(result, tuple):
-            assert shared_output is None
-            shared_output, hidden_states = result
-        else:
-            hidden_states = result
-
-        if not run_shared_experts_before and self.has_separate_shared_experts:
-            assert shared_output is None
-            shared_output = self._apply_shared_experts(shared_input, True)
-
-        return shared_output, hidden_states
-
-    def _sequence_parallel_context(self):
-        ctx = get_forward_context()
-        return (
-            ctx.dp_metadata.sp_local_sizes(self.moe_config.sp_size)
-            if ctx.dp_metadata
-            else nullcontext()
-        )
-
-    def _allocate_dp_chunking_outputs(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> tuple[torch.Tensor | None, torch.Tensor]:
-        assert self.use_dp_chunking
-
-        # Assert the inputs are of the proper type and shape.
-        assert self.batched_hidden_states is not None
-        assert self.batched_router_logits is not None
-
-        assert self.batched_hidden_states.dtype == hidden_states.dtype, (
-            f"{self.batched_hidden_states.dtype} == {hidden_states.dtype}"
-        )
-        assert self.batched_router_logits.dtype == router_logits.dtype, (
-            f"{self.batched_router_logits.dtype} == {router_logits.dtype}"
-        )
-
-        # Check size compatibility.
-        assert self.batched_hidden_states.size(-1) == hidden_states.size(-1)
-        assert self.batched_router_logits.size(-1) == router_logits.size(-1)
-
-        final_fused_hidden_states = torch.empty_like(hidden_states)
-        if self.shared_experts is not None:
-            final_shared_hidden_states = torch.empty_like(hidden_states)
-        else:
-            final_shared_hidden_states = None
-
-        return final_shared_hidden_states, final_fused_hidden_states
-
-    def _maybe_gate(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor:
-        # If router/gate provided, then apply it here.
-        # (Note: This code runs only when "overlapped mode" is on to allow
-        #        parallel execution of shared experts with the FusedMoE via
-        #        separate cuda stream)
-        if self.gate is not None:
-            router_logits, _ = self.gate(hidden_states)
-        return router_logits
-
-    @property
-    def do_naive_dispatch_combine(self) -> bool:
-        return (
-            self.moe_config.dp_size > 1 and not self.quant_method.supports_internal_mk
-        )
-
-    def _maybe_dispatch(
-        self,
-        layer: torch.nn.Module,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        # For naive dispatch/combine Dp/Ep, dispatch the hidden states and
-        # router logits to all experts.
-        # NOTE: this will be removed once all kernels are migrated into the
-        # MoEKernel framework.
-        if self.do_naive_dispatch_combine:
-            hidden_states, router_logits = get_ep_group().dispatch_router_logits(
-                hidden_states,
-                router_logits,
-                self.moe_config.is_sequence_parallel,
-            )
-
-        # NOTE: Similar with DP, PCP also needs dispatch and combine. For
-        # simplicity, AgRsAll2All was added separately for PCP here. Maybe
-        # we should modify All2AllManager abstraction to better support PCP.
-        if self.moe_config.pcp_size > 1:
-            hidden_states = get_pcp_group().all_gather(
-                hidden_states,
-                dim=0,
-            )
-            router_logits = get_pcp_group().all_gather(
-                router_logits,
-                dim=0,
-            )
-
-        return hidden_states, router_logits
-
-    def _maybe_combine(
-        self,
-        shared_output: torch.Tensor | None,
-        hidden_states: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor | None]:
-        if self.do_naive_dispatch_combine:
-            hidden_states = get_ep_group().combine(
-                hidden_states, self.moe_config.is_sequence_parallel
-            )
-
-        if self.moe_config.pcp_size > 1:
-            hidden_states = get_pcp_group().reduce_scatter(
-                hidden_states,
-                dim=0,
-            )
-            # need RS for shared_output?
-
-        if self.shared_experts is not None:
-            assert shared_output is not None
-            return shared_output, hidden_states
-        else:
-            return hidden_states
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        # For latent MoE: save ORIGINAL hidden_states before transform
-        # (shared_experts need original dimension, routed experts use transformed)
-        if self.shared_experts is not None:
-            original_hidden_states = hidden_states
-        else:
-            original_hidden_states = None
-
-        # Apply transform for routed experts (e.g., latent projection for latent MoE)
-        hidden_states = self.apply_routed_input_transform(hidden_states)
-
-        hidden_states, og_hidden_dims = self._maybe_pad_hidden_states(
-            original_hidden_states,
-            hidden_states,
-        )
-
-        fused_output = self.moe_forward(
-            hidden_states,
-            router_logits,
-            original_hidden_states,
-            self._encode_layer_name(),
-        )
-
-        return self._maybe_reduce_output(fused_output, og_hidden_dims)
-
-    def _slice_and_copy_input(
-        self,
-        out_slice: torch.Tensor,
-        orig: torch.Tensor | None,
-        start: int,
-        end: int,
-    ) -> torch.Tensor:
-        assert orig is not None
-        slice_size = end - start
-        orig_slice = orig[start:end, :]
-        if self.enable_dbo:
-            assert out_slice.dim() == 3
-            batch_buffer_idx = dbo_current_ubatch_id()
-            out_slice = out_slice[batch_buffer_idx, :]
-
-        assert out_slice.size(0) >= slice_size
-        out_slice = out_slice[:slice_size, :]
-        out_slice.copy_(orig_slice, non_blocking=True)
-        return out_slice
-
-    def forward_impl_chunked(
-        self,
-        layer: torch.nn.Module,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-        shared_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        # Gate overlap not supported when chunking is enabled. Run the
-        # gate first.
-        router_logits = self._maybe_gate(hidden_states, router_logits)
-
-        final_shared_hidden_states, final_fused_hidden_states = (
-            self._allocate_dp_chunking_outputs(hidden_states, router_logits)
-        )
-
-        ctx = get_forward_context()
-        # flashinfer_cutlass_kernels can handle: optional DP + TP/EP
-        max_tokens_across_dispatchers = ctx.dp_metadata.max_tokens_across_dp_cpu
-        moe_dp_chunk_size_per_rank = self.moe_config.max_num_tokens
-
-        # If the input to the MoE is sequence parallel then divide by sp_size
-        # to find the maximum number of tokens for any individual dispatcher.
-        if self.moe_config.is_sequence_parallel:
-            max_tokens_across_dispatchers = cdiv(
-                max_tokens_across_dispatchers, self.moe_config.sp_size
-            )
-
-        num_tokens = hidden_states.size(0)
-        for chunk_idx, chunk_start_ in enumerate(
-            range(0, max_tokens_across_dispatchers, moe_dp_chunk_size_per_rank)
-        ):
-            chunk_start = chunk_start_
-            chunk_end = min(
-                chunk_start + moe_dp_chunk_size_per_rank, max_tokens_across_dispatchers
-            )
-            # clamp start and end
-            chunk_start = min(chunk_start, num_tokens - 1)
-            chunk_end = min(chunk_end, num_tokens)
-            chunk_sizes = ctx.dp_metadata.chunked_sizes(
-                self.moe_config.sp_size, moe_dp_chunk_size_per_rank, chunk_idx
-            )
-            with chunk_sizes:
-                hidden_states_chunk = self._slice_and_copy_input(
-                    self.batched_hidden_states,
-                    hidden_states,
-                    chunk_start,
-                    chunk_end,
-                )
-
-                router_logits_chunk = self._slice_and_copy_input(
-                    self.batched_router_logits,
-                    router_logits,
-                    chunk_start,
-                    chunk_end,
-                )
-
-                shared_input_chunk = (
-                    shared_input[chunk_start:chunk_end, :]
-                    if shared_input is not None
-                    else None
-                )
-
-                shared_output_chunk, hidden_states_chunk = self._apply_quant_method(
-                    layer=layer,
-                    hidden_states=hidden_states_chunk,
-                    router_logits=router_logits_chunk,
-                    shared_input=shared_input_chunk,
-                )
-
-                # Store outputs
-                # TODO(bnell): document when chunk_start >= num_tokens
-                if chunk_start < num_tokens:
-                    final_fused_hidden_states[chunk_start:chunk_end, :].copy_(
-                        hidden_states_chunk, non_blocking=True
-                    )
-                    if self.shared_experts is not None:
-                        assert shared_output_chunk is not None
-                        assert final_shared_hidden_states is not None
-                        final_shared_hidden_states[chunk_start:chunk_end, :].copy_(
-                            shared_output_chunk, non_blocking=True
-                        )
-
-        if self.shared_experts is None:
-            return final_fused_hidden_states
-        else:
-            assert final_shared_hidden_states is not None
-            return (final_shared_hidden_states, final_fused_hidden_states)
-
-    def forward_impl(
-        self,
-        layer: torch.nn.Module,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-        shared_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        self.use_shared_experts_stream = (
-            current_platform.is_cuda()
-            and self.has_separate_shared_experts
-            and not self.use_dp_chunking
-            and self.shared_experts_stream is not None
-            and (
-                hidden_states.shape[0]
-                <= envs.VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD
-            )
-        )
-
-        # Check if we need to run shared experts before matrix multiply because
-        # matrix multiply may modify the hidden_states.
-        run_shared_experts_before = (
-            self.has_separate_shared_experts and not self.use_shared_experts_stream
-        )
-
-        # The shared experts stream must be set up before calling the gate so they
-        # can be overlapped.
-        if not run_shared_experts_before:
-            self._maybe_setup_shared_experts_stream(
-                hidden_states,
-                shared_input,
-            )
-
-        router_logits = self._maybe_gate(hidden_states, router_logits)
-
-        # TODO(bnell): parts of the dispatch/combine steps will go away once
-        # #32567 lands and the remaining kernels are made MKs.  The PCP
-        # code will probably remain
-        hidden_states, router_logits = self._maybe_dispatch(
-            layer,
-            hidden_states,
-            router_logits,
-        )
-
-        shared_output, hidden_states = self._apply_quant_method(
-            layer=layer,
-            hidden_states=hidden_states,
-            router_logits=router_logits,
-            shared_input=shared_input,
-            run_shared_experts_before=run_shared_experts_before,
-        )
-
-        return self._maybe_combine(
-            shared_output,
-            hidden_states,
-        )
diff --git a/vllm/model_executor/layers/fused_moe/runner/moe_runner.py b/vllm/model_executor/layers/fused_moe/runner/moe_runner.py
index b298cc2d0c4c..39b40d1abe6a 100644
--- a/vllm/model_executor/layers/fused_moe/runner/moe_runner.py
+++ b/vllm/model_executor/layers/fused_moe/runner/moe_runner.py
@@ -1,34 +1,799 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from abc import ABC, abstractmethod
+from collections.abc import Callable
+from contextlib import nullcontext
+from typing import TYPE_CHECKING
 
 import torch
+import torch.nn.functional as F
 
+from vllm.distributed import (
+    get_ep_group,
+    get_pcp_group,
+    tensor_model_parallel_all_reduce,
+)
+from vllm.forward_context import (
+    ForwardContext,
+    get_forward_context,
+    is_forward_context_available,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+)
+from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
+    FusedMoEMethodBase,
+)
+from vllm.model_executor.layers.fused_moe.router.fused_moe_router import (
+    FusedMoERouter,
+)
+from vllm.model_executor.layers.fused_moe.router.zero_expert_router import (
+    ZeroExpertRouter,
+)
+from vllm.model_executor.layers.fused_moe.runner.moe_runner_interface import (
+    MoERunnerInterface,
+)
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
+    SharedExpertsOrder,
+)
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import (
+    _USE_LAYERNAME,
+    LayerName,
+    direct_register_custom_op,
+)
 
-class MoERunner(ABC):
+
+def get_layer_from_name(layer_name: str) -> torch.nn.Module:
+    forward_context: ForwardContext = get_forward_context()
+    if not _USE_LAYERNAME and layer_name == "from_forward_context":
+        all_moe_layers = forward_context.all_moe_layers
+        assert all_moe_layers is not None
+        moe_layer_index = forward_context.moe_layer_index
+        if moe_layer_index >= len(all_moe_layers):
+            raise AssertionError(
+                "We expected the number of MOE layers in `all_moe_layers` "
+                "to be equal to the number of "
+                "{vllm.moe_forward, vllm.moe_forward_shared} calls."
+            )
+        layer_name = all_moe_layers[moe_layer_index]
+        forward_context.moe_layer_index += 1
+    return forward_context.no_compile_layers[layer_name]
+
+
+# On torch >= 2.11, layer_name is a hoisted LayerName opaque object;
+# on older versions it remains a plain str.
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+    _layer_name_type: TypeAlias = str | LayerName
+else:
+    _layer_name_type = LayerName if _USE_LAYERNAME else str
+
+
+@torch.compiler.assume_constant_result
+def _resolve_layer_name(layer_name: str | LayerName) -> str:
+    from torch._library.fake_class_registry import FakeScriptObject
+
+    if isinstance(layer_name, LayerName):
+        return layer_name.value
+    elif isinstance(layer_name, FakeScriptObject):
+        return layer_name.real_obj.value
+    return layer_name
+
+
+# Note: _moe_forward and _moe_forward_shared should not contain any
+# implementation details, They should merely pass along control to
+# the runner's '_forward_impl' method.
+# These functions should never be called directly since they do not
+# include all the functionality of the MoE layer.
+def _moe_forward(
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    shared_experts_input: torch.Tensor | None,
+    input_ids: torch.Tensor | None,
+    layer_name: _layer_name_type,
+    hidden_dim_unpadded: int,
+) -> torch.Tensor:
+    layer = get_layer_from_name(_resolve_layer_name(layer_name))
+    return layer.runner._forward_impl(
+        layer,
+        hidden_states,
+        router_logits,
+        shared_experts_input,
+        input_ids,
+    )
+
+
+def _moe_forward_fake(
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    shared_experts_input: torch.Tensor | None,
+    input_ids: torch.Tensor | None,
+    layer_name: _layer_name_type,
+    hidden_dim_unpadded: int,
+) -> torch.Tensor:
+    # `hidden_dim_unpadded > 0` only on the TRT-LLM MXFP4 path, where the
+    # real kernel writes narrower than `hidden_states.shape[-1]`. Plumbed
+    # as an op arg (not peeked from the layer registry) to keep the fake
+    # a pure shape function of its inputs and preserve subgraph dedup.
+    if hidden_dim_unpadded > 0:
+        return hidden_states.new_empty((*hidden_states.shape[:-1], hidden_dim_unpadded))
+    return torch.empty_like(hidden_states)
+
+
+def _moe_forward_shared(
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    shared_experts_input: torch.Tensor | None,
+    input_ids: torch.Tensor | None,
+    layer_name: _layer_name_type,
+    hidden_dim_unpadded: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    layer = get_layer_from_name(_resolve_layer_name(layer_name))
+    return layer.runner._forward_impl(
+        layer,
+        hidden_states,
+        router_logits,
+        shared_experts_input,
+        input_ids,
+    )
+
+
+def _moe_forward_shared_fake(
+    hidden_states: torch.Tensor,
+    router_logits: torch.Tensor,
+    shared_experts_input: torch.Tensor | None,
+    input_ids: torch.Tensor | None,
+    layer_name: _layer_name_type,
+    hidden_dim_unpadded: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    # `fused_out`: see `_moe_forward_fake` for hidden_dim_unpadded semantics.
+    # `shared_out`: matches `shared_experts_input` if provided (latent MoE),
+    # else `hidden_states`.
+    if hidden_dim_unpadded > 0:
+        fused_out = hidden_states.new_empty(
+            (*hidden_states.shape[:-1], hidden_dim_unpadded)
+        )
+    else:
+        fused_out = torch.empty_like(hidden_states)
+    if shared_experts_input is not None:
+        shared_out = torch.empty_like(shared_experts_input)
+    else:
+        shared_out = torch.empty_like(hidden_states)
+    return shared_out, fused_out
+
+
+direct_register_custom_op(
+    op_name="moe_forward",
+    op_func=_moe_forward,
+    mutates_args=["hidden_states"],
+    fake_impl=_moe_forward_fake,
+    tags=(torch.Tag.needs_fixed_stride_order,),
+)
+
+
+direct_register_custom_op(
+    op_name="moe_forward_shared",
+    op_func=_moe_forward_shared,
+    fake_impl=_moe_forward_shared_fake,
+    tags=(torch.Tag.needs_fixed_stride_order,),
+)
+
+
+def _unpack(
+    result: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
+) -> tuple[torch.Tensor | None, torch.Tensor]:
+    if isinstance(result, tuple):
+        return result
+    else:
+        return (None, result)
+
+
+class MoERunner(MoERunnerInterface):
     """
-    Abstract base class for Mixture of Experts (MoE) runners.
+    Standard MoE runner implementation for executing Mixture of Experts layers.
+
+    This is the primary concrete implementation of MoE execution logic, providing
+    comprehensive support for standard MoE operations. It handles:
+    - Expert routing and token dispatching using various routing strategies
+    - Shared experts computation with optional parallel execution using CUDA streams
+    - Tensor model parallel and expert parallel operations
+    - Multiple quantization methods and optimized kernel selection
+    - Both monolithic and decomposed expert execution paths
+    - Integration with various parallel execution modes (TP, EP, DP)
 
-    This class defines the interface that all MoE runner implementations must follow.
-    MoE runners are responsible for executing the forward pass of MoE layers, handling
-    expert routing, and managing tensor parallel operations.
+    The runner orchestrates the complete MoE forward pass including routing tokens
+    to experts, executing expert computations in parallel, and combining results.
+    It supports advanced features like overlapped execution of shared experts,
+    optimized kernels for different parallel configurations, and seamless
+    integration with vLLM's distributed execution framework.
+
+    Eventually, this class may be split into more specialized implementations
+    for different configurations (e.g., with/without shared experts, gates, etc.).
     """
 
-    @abstractmethod
-    def forward(
+    def __init__(
         self,
+        layer_name: str,
+        moe_config: FusedMoEConfig,
+        router: FusedMoERouter,
+        routed_input_transform: torch.nn.Module | None,
+        gate: torch.nn.Module | None,
+        shared_experts: torch.nn.Module | None,
+        quant_method: FusedMoEMethodBase,
+        enable_dbo: bool,
+        shared_expert_gate: torch.nn.Module | None = None,
+        routed_output_transform: torch.nn.Module | None = None,
+        routed_scaling_factor: float = 1.0,
+    ):
+        super().__init__()
+        self.moe_config = moe_config
+        self.router = router
+        self.routed_input_transform = routed_input_transform
+        self.routed_output_transform = routed_output_transform
+        self.routed_scaling_factor = routed_scaling_factor
+        self.gate = gate
+        self.shared_expert_gate = shared_expert_gate
+        self._quant_method = quant_method
+        self.enable_dbo = enable_dbo
+
+        # When both gates are present and FSE is enabled, fuse their
+        # weight matrices into [num_experts + num_shared, hidden] so one
+        # F.linear produces combined logits. The topk kernel can then
+        # apply routing softmax and shared expert activation (sigmoid)
+        # in a single launch.
+        self._fse_fuse_gate = gate is not None and shared_expert_gate is not None
+        self._combined_gate_weight: torch.Tensor | None = None
+
+        self._shared_experts: SharedExperts | None = None
+        if shared_experts is not None:
+            self._shared_experts = SharedExperts(
+                shared_experts,
+                moe_config=moe_config,
+                # Note: For now we must pass quant_method along to SharedExperts so it
+                # can property determine where the shared experts are supposed to be
+                # called, i.e. by a MK or by the MoERunner.
+                # Once the MK can be created upfront, we can just pass in the proper
+                # flags derived from the quant_method's MK.
+                quant_method=quant_method,
+                enable_dbo=enable_dbo,
+            )
+
+        # Needed for string -> FusedMoE layer lookup in custom ops.
+        self.layer_name = layer_name
+
+        self._forward_entry = self._select_forward()
+
+    def _select_forward(self) -> Callable:
+        if current_platform.is_tpu() or current_platform.is_cpu():
+            # TODO: Once the OOM issue for the TPU backend is resolved, we
+            # will switch to using the moe_forward custom op.
+            # Note: CPU doesn't require wrapped _forward_impl.
+            return _moe_forward if self._shared_experts is None else _moe_forward_shared
+
+        return (
+            torch.ops.vllm.moe_forward
+            if self._shared_experts is None
+            else torch.ops.vllm.moe_forward_shared
+        )
+
+    @property
+    def shared_experts(self) -> SharedExperts | None:
+        return self._shared_experts
+
+    # TODO(bnell): temporary hack, do not call this method.
+    def _replace_quant_method(self, quant_method: FusedMoEMethodBase):
+        if self._shared_experts is not None:
+            self._shared_experts._quant_method = quant_method
+        self._quant_method = quant_method
+
+    def _maybe_fuse_gate_weights(self):
+        """Fuse router and shared expert gate weights on first call.
+
+        Cannot be done at __init__ because gate weights are loaded after
+        module construction (via weight_loader). Called once from
+        _forward_impl before the first forward pass.
+        """
+        if self._combined_gate_weight is None:
+            assert self.gate is not None and self.shared_expert_gate is not None
+            self._combined_gate_weight = torch.cat(
+                [self.gate.weight, self.shared_expert_gate.weight],
+                dim=0,
+            )
+
+    def is_internal_router(self) -> bool:
+        return self.gate is not None
+
+    def apply_routed_input_transform(
+        self, hidden_states: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        """Apply transform for routed experts (e.g., latent projection).
+
+        This is called by FusedMoE.forward_native. The original hidden_states
+        is saved separately so shared experts get [S, hidden_size] while
+        routed experts get the transformed [S, moe_latent_size].
+
+        Returns (possibly transformed) hidden states and the input for shared
+        experts (or None if there are no shared experts).
+        """
+        if self.routed_input_transform is not None:
+            result = self.routed_input_transform(hidden_states)
+            # ReplicatedLinear returns (output, extra_bias) tuple.
+            # We only need the output tensor; extra_bias is not used here.
+            if isinstance(result, tuple):
+                return result[0], hidden_states
+            return result, hidden_states
+
+        return (
+            hidden_states,
+            hidden_states if self._shared_experts is not None else None,
+        )
+
+    def apply_routed_output_transform(
+        self,
+        fused_output: torch.Tensor,
+    ) -> torch.Tensor:
+        """Apply transform to routed expert output (e.g., latent to full dim).
+
+        Used by latent MoE models (e.g., NemotronH) where routed experts
+        operate in a compressed latent space and need projection back to
+        the full hidden dimension before combining with shared expert output.
+        """
+        if self.routed_output_transform is not None:
+            r = self.routed_output_transform(fused_output)
+            fused_output = r[0] if isinstance(r, tuple) else r
+        return fused_output
+
+    def _maybe_apply_routed_scale_to_output(
+        self,
+        shared_output: torch.Tensor | None,
+        fused_output: torch.Tensor,
+    ) -> tuple[torch.Tensor | None, torch.Tensor]:
+        """Apply routed_scaling_factor to the output with FP16 overflow
+        protection.
+
+        Scale the fused expert output by routed_scaling_factor. For FP16,
+        avoid overflow by dividing shared_output by the scale instead
+        (the decoder layer compensates with matching divisions).
+        """
+        if self.routed_scaling_factor != 1.0:
+            if fused_output.dtype != torch.float16 or shared_output is None:
+                fused_output *= self.routed_scaling_factor
+            elif shared_output is not None:
+                shared_output *= 1.0 / self.routed_scaling_factor
+        return shared_output, fused_output
+
+    @property
+    def _fused_output_is_reduced(self) -> bool:
+        return (
+            self._quant_method.moe_kernel is not None
+            and self._quant_method.moe_kernel.output_is_reduced()
+        )
+
+    def _maybe_reduce_shared_expert_output(
+        self,
+        shared_output: torch.Tensor | None,
+    ) -> torch.Tensor | None:
+        """All-reduce shared expert output when the combine kernel already
+        reduced fused output.
+
+        * If the combine kernel does the reduction for fused_output, reduce
+          shared_output separately. O.w, reduce fused_output+shared_output later.
+        * If we have SP (TP=N, DP=M, EP), there is a separate AG step handled
+          in the model.
+        """
+        if (
+            shared_output is not None
+            and not self.moe_config.is_sequence_parallel
+            and self._fused_output_is_reduced
+        ):
+            shared_output = tensor_model_parallel_all_reduce(shared_output)
+        return shared_output
+
+    def _maybe_reduce_final_output(
+        self,
+        states: torch.Tensor,
+        trunc_size: int,
+    ) -> torch.Tensor:
+        """Truncate padded dimensions and all-reduce the combined output.
+
+        This is the "late" all-reduce path. When neither fused nor shared
+        output was individually reduced, the combined sum is all-reduced
+        here. Skipped when sequence-parallel is active (SP handles its
+        own reduction) or when the early path already reduced both outputs.
+        """
+        # We don't need to reduce the final output if:
+        # - We are not running with TP or DP
+        # - The MK already reduced the fused output itself.
+        if (
+            not self.moe_config.is_sequence_parallel
+            and (self.moe_config.tp_size > 1 or self.moe_config.ep_size > 1)
+            and not self._fused_output_is_reduced
+        ):
+            states = tensor_model_parallel_all_reduce(states)
+
+        return states[..., :trunc_size]
+
+    def _encode_layer_name(self) -> str | LayerName:
+        if _USE_LAYERNAME:
+            return LayerName(self.layer_name)
+        # Can be unavailable or None in unittests
+        if (
+            is_forward_context_available()
+            and get_forward_context().all_moe_layers is not None
+        ):
+            return "from_forward_context"
+        return self.layer_name
+
+    def _trtllm_mxfp4_unpadded_dim(self) -> int:
+        """Return ``hidden_dim_unpadded`` when the active backend is TRT-LLM
+        MXFP4 (whose kernel writes narrower than the padded
+        ``hidden_states.shape[-1]``), else 0. Other MXFP4 backends (notably
+        Cutlass MXFP4 MXFP8) write the full padded width, so
+        ``moe_config.hidden_dim_unpadded`` alone is insufficient: it encodes
+        the model's logical hidden, not whether the kernel narrows. Computed
+        caller-side and passed as an op arg; doing the isinstance check
+        inside the fake would specialize per ``layer_name`` and break
+        subgraph dedup for identical-architecture models (e.g. Phi-MoE).
+        """
+        from vllm.model_executor.layers.fused_moe.experts.trtllm_mxfp4_moe import (
+            TrtLlmMxfp4ExpertsBase,
+        )
+
+        moe_kernel = getattr(self._quant_method, "moe_kernel", None)
+        fused_experts = getattr(
+            getattr(moe_kernel, "impl", None), "fused_experts", None
+        )
+        if isinstance(fused_experts, TrtLlmMxfp4ExpertsBase):
+            return self.moe_config.hidden_dim_unpadded or self.moe_config.hidden_dim
+        return 0
+
+    def _maybe_pad_hidden_states(
+        self,
+        shared_experts_input: torch.Tensor | None,
+        hidden_states: torch.Tensor,
+    ) -> tuple[torch.Tensor, int]:
+        """Pad hidden_states to moe_config.hidden_dim and compute the
+        original dimension for later truncation.
+
+        For latent MoE, the routed hidden_states may be smaller than
+        hidden_dim. Padding ensures uniform tensor sizes through the
+        fused MoE kernel. The returned trunc_size is used by
+        _maybe_reduce_final_output to strip the padding from the result.
+        """
+        shared_experts_hidden_dim = (
+            shared_experts_input.shape[-1] if shared_experts_input is not None else 0
+        )
+        transformed_hidden_dim = hidden_states.shape[-1]
+        if (
+            not self._quant_method.skip_forward_padding
+            and self.moe_config.hidden_dim != transformed_hidden_dim
+        ):
+            hidden_states = F.pad(
+                hidden_states,
+                (0, self.moe_config.hidden_dim - transformed_hidden_dim),
+                mode="constant",
+                value=0.0,
+            )
+
+        if self.routed_output_transform is not None and shared_experts_hidden_dim > 0:
+            orig_hidden_dims = shared_experts_hidden_dim
+        else:
+            orig_hidden_dims = transformed_hidden_dim
+
+        return hidden_states, orig_hidden_dims
+
+    def _maybe_apply_shared_experts(
+        self,
+        shared_experts_input: torch.Tensor | None,
+        order: SharedExpertsOrder,
+    ):
+        if self._shared_experts is not None:
+            assert shared_experts_input is not None
+            self._shared_experts.apply(shared_experts_input, order)
+
+    def _apply_quant_method(
+        self,
+        layer: torch.nn.Module,
         hidden_states: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        raise NotImplementedError
+        shared_experts_input: torch.Tensor | None,
+        input_ids: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor | None, torch.Tensor]:
+        """Run expert routing and the fused MoE kernel via the quant method.
 
-    @abstractmethod
-    def must_reduce_shared_expert_outputs(self) -> bool:
-        raise NotImplementedError
+        Orchestrates shared expert execution (before/after), expert selection
+        via the router, and the actual fused MoE computation. Returns
+        (shared_expert_output, fused_expert_output).
+        """
+        self._maybe_apply_shared_experts(
+            shared_experts_input, SharedExpertsOrder.NO_OVERLAP
+        )
 
-    @abstractmethod
-    def maybe_all_reduce_tensor_model_parallel(
+        if self._quant_method.is_monolithic:
+            fused_out = self._quant_method.apply_monolithic(
+                layer=layer,
+                x=hidden_states,
+                router_logits=router_logits,
+                input_ids=input_ids,
+            )
+        else:
+            topk_weights, topk_ids = self.router.select_experts(
+                hidden_states=hidden_states,
+                router_logits=router_logits,
+                input_ids=input_ids,
+            )
+
+            # Passing shared_experts_input in case SharedExpertsOrder is
+            # MK_INTERNAL_OVERLAPPED.
+            fused_out = self._quant_method.apply(
+                layer=layer,
+                x=hidden_states,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
+                shared_experts=self._shared_experts,
+                shared_experts_input=shared_experts_input,
+            )
+
+        self._maybe_apply_shared_experts(
+            shared_experts_input,
+            SharedExpertsOrder.MULTI_STREAM_OVERLAPPED,
+        )
+
+        return (
+            self._shared_experts.output if self._shared_experts is not None else None,
+            fused_out,
+        )
+
+    def _sequence_parallel_context(self):
+        """Return a context manager for sequence-parallel token
+        redistribution.
+
+        When sequence parallelism is active, returns a context that handles
+        local size tracking for proper token scatter/gather. Otherwise
+        returns a no-op context.
+        """
+        ctx = get_forward_context()
+        return (
+            ctx.dp_metadata.sp_local_sizes(self.moe_config.sp_size)
+            if ctx.dp_metadata
+            else nullcontext()
+        )
+
+    def _maybe_sync_shared_experts_stream(
         self,
-        final_hidden_states: torch.Tensor,
+        shared_experts_input: torch.Tensor | None,
     ):
-        raise NotImplementedError
+        # If router/gate provided, then apply it here.
+        # (Note: This code runs only when "overlapped mode" is on to allow
+        #        parallel execution of shared experts with the FusedMoE via
+        #        separate cuda stream)
+        if self._shared_experts is not None:
+            assert shared_experts_input is not None
+            self._shared_experts.maybe_sync_shared_experts_stream(shared_experts_input)
+
+    def _maybe_add_zero_expert_output(
+        self,
+        result: torch.Tensor,
+    ) -> torch.Tensor:
+        """Add the zero expert's contribution to the final result.
+
+        When a ZeroExpertRouter is used, it computes a bias-like output
+        from the "zero expert" that is added to the combined routed+shared
+        expert output.
+        """
+        if isinstance(self.router, ZeroExpertRouter):
+            zero_expert_output = self.router.zero_expert_output
+            assert zero_expert_output is not None
+            result = result + zero_expert_output
+        return result
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Invoke the fused moe layer.
+
+        Input:
+        - hidden_states
+        - router_logits
+
+        Output:
+        - The new hidden_states.
+
+        Calling sequence
+        - forward
+          - self._forward_entry (_moe_forward or _moe_forward_shared custom op)
+            - _forward_impl
+
+        Note: The existence of _moe_forward and _moe_forward_shared custom ops are due
+        to the following reason:
+        1. pytorch cannot handle union types in custom op signatures so
+           _moe_forward and _moe_forward_shared must be split.
+        """
+
+        # Apply transform for routed experts (e.g., latent projection
+        # for latent MoE)
+        hidden_states, shared_experts_input = self.apply_routed_input_transform(
+            hidden_states
+        )
+
+        # Record before `_maybe_pad_hidden_states` pads activations to match
+        # `moe_config.hidden_dim`, e.g. after `align_trtllm_fp4_moe_hidden_dim_for_fi`
+        # so routed output can be trimmed before
+        # shared+routed add / latent up proj if needed.
+        routed_hidden_dim = hidden_states.shape[-1]
+        hidden_states, og_hidden_dim = self._maybe_pad_hidden_states(
+            shared_experts_input,
+            hidden_states,
+        )
+        hidden_dim_was_padded = hidden_states.shape[-1] > routed_hidden_dim
+
+        result = self._forward_entry(
+            hidden_states,
+            router_logits,
+            shared_experts_input,
+            input_ids,
+            self._encode_layer_name(),
+            self._trtllm_mxfp4_unpadded_dim(),
+        )
+
+        #
+        # Note: there are two all-reduce points below. They are mutually
+        # exclusive, controlled by _fused_output_is_reduced
+        #  - When True: the combine kernel already reduced fused_output,
+        #    so we reduce shared_output here to match, then skip the
+        #    all-reduce in _maybe_reduce_final_output.
+        #  - When False: neither output is reduced yet, so we combine
+        #    them first and all-reduce the sum in _maybe_reduce_final_output.
+
+        # Extract outputs from result
+        shared_output, fused_output = _unpack(result)
+        if (
+            shared_output is not None or self.routed_output_transform is not None
+        ) and hidden_dim_was_padded:
+            fused_output = fused_output[..., :routed_hidden_dim]
+
+        # If combine kernel already reduced fused, reduce shared to match.
+        # See note above re: the two all-reduce points.
+        shared_output = self._maybe_reduce_shared_expert_output(shared_output)
+
+        shared_output, fused_output = self._maybe_apply_routed_scale_to_output(
+            shared_output, fused_output
+        )
+
+        # Apply output transform (e.g. latent -> full dim)
+        fused_output = self.apply_routed_output_transform(fused_output)
+
+        if shared_output is not None:
+            result = shared_output + fused_output
+        else:
+            result = fused_output
+
+        result = self._maybe_reduce_final_output(result, og_hidden_dim)
+
+        return self._maybe_add_zero_expert_output(result)
+
+    @property
+    def do_naive_dispatch_combine(self) -> bool:
+        return (
+            self.moe_config.dp_size > 1 and not self._quant_method.supports_internal_mk
+        )
+
+    def _maybe_dispatch(
+        self,
+        layer: torch.nn.Module,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # For naive dispatch/combine Dp/Ep, dispatch the hidden states and
+        # router logits to all experts.
+        # NOTE: this will be removed once all kernels are migrated into the
+        # MoEKernel framework.
+        if self.do_naive_dispatch_combine:
+            result = get_ep_group().dispatch_router_logits(
+                hidden_states,
+                router_logits,
+                self.moe_config.is_sequence_parallel,
+            )
+            assert len(result) == 2
+            hidden_states, router_logits = result
+
+        # NOTE: Similar with DP, PCP also needs dispatch and combine. For
+        # simplicity, AgRsAll2All was added separately for PCP here. Maybe
+        # we should modify All2AllManager abstraction to better support PCP.
+        if self.moe_config.pcp_size > 1:
+            hidden_states = get_pcp_group().all_gather(
+                hidden_states,
+                dim=0,
+            )
+            router_logits = get_pcp_group().all_gather(
+                router_logits,
+                dim=0,
+            )
+
+        return hidden_states, router_logits
+
+    def _maybe_combine(
+        self,
+        shared_output: torch.Tensor | None,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor | None]:
+        if self.do_naive_dispatch_combine:
+            hidden_states = get_ep_group().combine(
+                hidden_states, self.moe_config.is_sequence_parallel
+            )
+
+        if self.moe_config.pcp_size > 1:
+            hidden_states = get_pcp_group().reduce_scatter(
+                hidden_states,
+                dim=0,
+            )
+
+        if self.shared_experts is not None:
+            assert shared_output is not None
+            return shared_output, hidden_states
+        else:
+            return hidden_states
+
+    def _forward_impl(
+        self,
+        layer: torch.nn.Module,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+        shared_experts_input: torch.Tensor | None,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        """Entry point called by the custom op to run the MoE computation.
+
+        Handles pre-dispatch setup (gate application, external shared expert
+        triggering, quant config init) then performs the following steps
+        within the sequence-parallel context.
+
+        - Performs expert routing
+        - fused MoE kernel execution
+        - shared expert computation.
+
+        Returns a single tensor of combined fused and shared output (if present).
+        """
+        # TODO(bnell): this can be removed after MK migration is complete.
+        layer.ensure_moe_quant_config_init()
+
+        # Sync aux and main stream for shared expert multi-stream overlap.
+        self._maybe_sync_shared_experts_stream(shared_experts_input)
+
+        # If the Runner holds the gate, apply it after the stream sync,
+        # so it can run overlapped with the
+        # NOTE: in future PR, MoE runner will always hold the gate.
+        if self.gate is not None:
+            if self._fse_fuse_gate:
+                self._maybe_fuse_gate_weights()
+                router_logits = F.linear(hidden_states, self._combined_gate_weight)
+            else:
+                router_logits, _ = self.gate(hidden_states)
+
+        with self._sequence_parallel_context():
+            # TODO(bnell): parts of the dispatch/combine steps will go away once
+            # #32567 lands and the remaining kernels are made MKs.  The PCP
+            # code will probably remain
+            hidden_states, router_logits = self._maybe_dispatch(
+                layer,
+                hidden_states,
+                router_logits,
+            )
+
+            shared_output, hidden_states = self._apply_quant_method(
+                layer=layer,
+                hidden_states=hidden_states,
+                router_logits=router_logits,
+                shared_experts_input=shared_experts_input,
+                input_ids=input_ids,
+            )
+
+            return self._maybe_combine(
+                shared_output,
+                hidden_states,
+            )
diff --git a/vllm/model_executor/layers/fused_moe/runner/moe_runner_interface.py b/vllm/model_executor/layers/fused_moe/runner/moe_runner_interface.py
new file mode 100644
index 000000000000..e3b239ca60fa
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/runner/moe_runner_interface.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from abc import ABC, abstractmethod
+
+import torch
+
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
+    FusedMoEMethodBase,
+)
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
+)
+
+
+class MoERunnerInterface(PluggableLayer, ABC):
+    """
+    Abstract base class for Mixture of Experts (MoE) runners.
+
+    This class defines the interface that all MoE runner implementations must follow.
+    MoE runners are responsible for executing the forward pass of MoE layers, handling
+    expert routing, and managing tensor parallel operations.
+    """
+
+    @abstractmethod
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        raise NotImplementedError
+
+    @abstractmethod
+    def is_internal_router(self) -> bool:
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def shared_experts(self) -> SharedExperts | None:
+        raise NotImplementedError
+
+    # TODO(bnell): temporary hack, do not call this method.
+    @abstractmethod
+    def _replace_quant_method(self, quant_method: FusedMoEMethodBase):
+        raise NotImplementedError
diff --git a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
new file mode 100644
index 000000000000..e6492cef61c6
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from enum import IntEnum
+
+import torch
+
+import vllm.envs as envs
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+)
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizeMethodBase,
+)
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import (
+    aux_stream,
+    current_stream,
+)
+from vllm.v1.worker.ubatching import (
+    dbo_current_ubatch_id,
+)
+
+logger = init_logger(__name__)
+
+
+class SharedExpertsOrder(IntEnum):
+    # No shared experts.
+    NONE = (0,)
+
+    # No overlap - defensively called before MK.
+    NO_OVERLAP = (1,)
+
+    # Overlapped with dispatch/combine in DP/EP - called by the MK.
+    MK_INTERNAL_OVERLAPPED = (2,)
+
+    # Overlapped with the gate, router, experts in aux stream.
+    MULTI_STREAM_OVERLAPPED = (3,)
+
+
+class SharedExperts:
+    def __init__(
+        self,
+        layer: torch.nn.Module,
+        moe_config: FusedMoEConfig,
+        quant_method: QuantizeMethodBase,
+        enable_dbo: bool,
+    ):
+        from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
+            FusedMoEMethodBase,
+        )
+
+        # quant_method must be a FusedMoEMethodBase but we can't use the type
+        # due to circular imports.
+        assert isinstance(quant_method, FusedMoEMethodBase)
+
+        # The SharedExperts need to handle DBO since they can be called from
+        # an MK's finalize method.  We keep a list of outputs indexed by current
+        # DBO ubatch id to handle this case.  If DBO is not enabled, the
+        # index is always 0 and the second output list element is ignored.
+        self.enable_dbo = enable_dbo
+        self._output: list[torch.Tensor | None] = [None, None]
+        self._layer = layer
+        self._moe_config = moe_config
+        self._quant_method = quant_method
+
+        # Allow disabling of the separate shared experts stream for
+        # debug purposes.
+        # TODO: Remove this after more extensive testings with TP/DP
+        # and other execution modes
+        if envs.VLLM_DISABLE_SHARED_EXPERTS_STREAM:
+            logger.debug_once("Disabling MoE shared_experts cuda stream")
+            self._stream = None
+        else:
+            # TODO(rob): enable shared expert overlap with non-cuda-alike.
+            # aux_stream() returns None on non-cuda-alike platforms.
+            self._stream = aux_stream()
+            if self._stream is not None:
+                logger.debug_once("Enabled separate cuda stream for MoE shared_experts")
+
+    @property
+    def _disable_shared_experts_overlap(self) -> bool:
+        # Disable shared expert overlap if:
+        #   - we are using eplb with non-default backend, because of correctness issues
+        #   - we are using flashinfer with DP, since there nothing to gain
+        parallel_config = self._moe_config.moe_parallel_config
+        return (
+            parallel_config.enable_eplb
+            and parallel_config.all2all_backend != "allgather_reducescatter"
+        ) or parallel_config.use_fi_nvl_two_sided_kernels
+
+    def _determine_shared_experts_order(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> SharedExpertsOrder:
+        if self._disable_shared_experts_overlap:
+            return SharedExpertsOrder.NO_OVERLAP
+
+        if self._quant_method.mk_can_overlap_shared_experts:
+            return SharedExpertsOrder.MK_INTERNAL_OVERLAPPED
+
+        should_run_shared_in_aux_stream = (
+            current_platform.is_cuda()
+            and self._stream is not None
+            and hidden_states.shape[0]
+            <= envs.VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD
+        )
+
+        if should_run_shared_in_aux_stream:
+            return SharedExpertsOrder.MULTI_STREAM_OVERLAPPED
+        else:
+            return SharedExpertsOrder.NO_OVERLAP
+
+    def maybe_sync_shared_experts_stream(
+        self,
+        shared_experts_input: torch.Tensor,
+    ):
+        experts_order = self._determine_shared_experts_order(shared_experts_input)
+
+        if experts_order == SharedExpertsOrder.MULTI_STREAM_OVERLAPPED:
+            assert self._stream is not None
+            assert self._moe_config.disable_inplace
+
+            # Record that the clone will be used by shared_experts_stream
+            # to avoid gc issue from deallocation of hidden_states_clone
+            # For more details: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
+            # NOTE: We don't need shared_output.record_stream(current_stream())
+            # because we synch the streams before using shared_output.
+            shared_experts_input.record_stream(self._stream)
+
+            # Mark sync start point for the aux stream since we will
+            # run in parallel with router/gate.
+            self._stream.wait_stream(current_stream())
+
+    def _run_in_aux_stream(
+        self,
+        shared_experts_input: torch.Tensor,
+    ) -> torch.Tensor:
+        # TODO: assert that maybe_sync_shared_experts_stream has been called.
+
+        # Run shared experts in parallel on a separate stream.
+        with torch.cuda.stream(self._stream):
+            output = self._layer(shared_experts_input)
+        current_stream().wait_stream(self._stream)
+
+        return output
+
+    @property
+    def _output_idx(self) -> int:
+        return dbo_current_ubatch_id() if self.enable_dbo else 0
+
+    @property
+    def output(self) -> torch.Tensor:
+        assert self._output[self._output_idx] is not None
+        output = self._output[self._output_idx]
+        self._output[self._output_idx] = None
+        return output
+
+    def apply(
+        self,
+        shared_experts_input: torch.Tensor,
+        order: SharedExpertsOrder,
+    ):
+        experts_order = self._determine_shared_experts_order(shared_experts_input)
+
+        if order != experts_order:
+            return None
+
+        assert self._output[self._output_idx] is None
+
+        if order == SharedExpertsOrder.MULTI_STREAM_OVERLAPPED:
+            self._output[self._output_idx] = self._run_in_aux_stream(
+                shared_experts_input
+            )
+        else:
+            self._output[self._output_idx] = self._layer(shared_experts_input)
+
+        assert self._output[self._output_idx] is not None
diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
deleted file mode 100644
index 37336df17561..000000000000
--- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import torch
-
-from vllm.distributed import (
-    get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
-)
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE
-
-
-# TODO(bnell): Add shared + fused combo function? e.g. +
-class SharedFusedMoE(FusedMoE):
-    """
-    A FusedMoE operation that also computes the results of shared experts.
-    If an all2all communicator is being used the shared expert computation
-    can be interleaved with the fused all2all dispatch communication step.
-    """
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if not self.use_overlapped:
-            if self._shared_experts is not None:
-                shared_out = self._shared_experts(hidden_states)
-
-                # Reduce shared expert outputs if necessary, since the MLP
-                # should have been created with reduce_results=False.
-                if (
-                    self.reduce_results
-                    and get_tensor_model_parallel_world_size() > 1
-                    and self.must_reduce_shared_expert_outputs()
-                ):
-                    shared_out = tensor_model_parallel_all_reduce(shared_out)
-            else:
-                shared_out = None
-
-            fused_out = super().forward(
-                hidden_states=hidden_states,
-                router_logits=router_logits,
-            )
-        else:
-            shared_out, fused_out = super().forward(
-                hidden_states=hidden_states,
-                router_logits=router_logits,
-            )
-            # ensure early TP reduction of shared expert outputs when required
-            if (
-                shared_out is not None
-                and self.reduce_results
-                and get_tensor_model_parallel_world_size() > 1
-                and self.must_reduce_shared_expert_outputs()
-            ):
-                shared_out = tensor_model_parallel_all_reduce(shared_out)
-        return shared_out, fused_out
diff --git a/vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py b/vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py
index 4cebe608a6b4..837c1498622f 100644
--- a/vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py
+++ b/vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py
@@ -62,6 +62,10 @@ def apply(
         if output is None:
             return fused_expert_output
 
+        # Skip self-copy when caller aliased fused_out to output upstream.
+        if output is fused_expert_output:
+            return output
+
         # MoEPrepareAndFinalizeNoDPEPModular needs the output to be in the `output`
         # tensor.
         assert output.size() == fused_expert_output.size(), (
diff --git a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
index a29d8a7d8dda..0261b8f603a3 100644
--- a/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
+++ b/vllm/model_executor/layers/fused_moe/unquantized_fused_moe_method.py
@@ -2,15 +2,13 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from collections.abc import Callable
+from typing import TYPE_CHECKING
 
 import torch
 import torch.nn.functional as F
 from torch.nn import Module
-from torch.nn.parameter import Parameter
 
 import vllm.envs as envs
-import vllm.model_executor.layers.fused_moe.modular_kernel as mk
-from vllm._aiter_ops import rocm_aiter_ops
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.layers.fused_moe.config import (
@@ -23,7 +21,6 @@
     FusedMoEMethodBase,
 )
 from vllm.model_executor.layers.fused_moe.modular_kernel import (
-    FusedMoEActivationFormat,
     FusedMoEExpertsModular,
     FusedMoEPrepareAndFinalizeModular,
 )
@@ -33,19 +30,15 @@
     make_unquantized_moe_kernel,
     select_unquantized_moe_backend,
 )
-from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
-    convert_moe_weights_to_flashinfer_trtllm_block_layout,
+from vllm.model_executor.layers.fused_moe.runner.shared_experts import (
+    SharedExperts,
 )
 from vllm.model_executor.utils import replace_parameter, set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.platforms.interface import CpuArchEnum
 
-if current_platform.is_cuda_alike() or current_platform.is_xpu():
-    from .fused_batched_moe import BatchedTritonExperts
-    from .fused_moe import TritonExperts
-else:
-    TritonExperts = None  # type: ignore
-
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe import RoutedExperts
 
 logger = init_logger(__name__)
 
@@ -59,46 +52,16 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
 
     def __init__(self, moe: FusedMoEConfig):
         super().__init__(moe)
-        self.unquantized_backend = select_unquantized_moe_backend(
+        self.unquantized_backend, self.experts_cls = select_unquantized_moe_backend(
             moe_config=self.moe,
-            use_ep=self.moe.moe_parallel_config.use_ep,
-            use_dp=self.moe.moe_parallel_config.dp_size > 1,
-        )
-
-        # AITER only supports gated activations (silu/gelu), so disable it
-        # for non-gated MoE (is_act_and_mul=False)
-        self.rocm_aiter_moe_enabled = (
-            rocm_aiter_ops.is_fused_moe_enabled() and moe.is_act_and_mul
         )
-        self.kernel: mk.FusedMoEKernel | None = None
-        self._is_monolithic = (
-            current_platform.is_cpu()
-            or self.unquantized_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM
-        )
-
-        if self.is_monolithic:
-            self.apply_monolithic: Callable = self._select_monolithic()
-
-    def _select_monolithic(self) -> Callable:
-        """Select the monolithic implementation based on platform."""
-        if current_platform.is_cpu():
-            return self.forward_monolithic_cpu
-        else:
-            return self.forward_monolithic_cuda
-
-    def forward_native(
-        self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        return self.forward_cuda(layer, x, topk_weights, topk_ids, shared_experts_input)
 
     @property
     def is_monolithic(self) -> bool:
-        return self._is_monolithic
+        # Escape hatch for CPU, which stays on the old monolithic path.
+        if self.unquantized_backend == UnquantizedMoeBackend.CPU:
+            return True
+        return super().is_monolithic
 
     @property
     def supports_eplb(self) -> bool:
@@ -107,35 +70,22 @@ def supports_eplb(self) -> bool:
     def maybe_make_prepare_finalize(
         self,
         routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    ) -> FusedMoEPrepareAndFinalizeModular | None:
-        if self.unquantized_backend == UnquantizedMoeBackend.AITER:
-            return None
-        else:
-            return super().maybe_make_prepare_finalize(routing_tables)
+    ):
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel initialization "
+            "logic for all but the CPU backend. CPU backend is monolithic. "
+            "So this function should not be called."
+        )
 
     def select_gemm_impl(
         self,
         prepare_finalize: FusedMoEPrepareAndFinalizeModular,
         layer: torch.nn.Module,
     ) -> FusedMoEExpertsModular:
-        assert self.moe_quant_config is not None
-        if (
-            prepare_finalize.activation_format
-            == FusedMoEActivationFormat.BatchedExperts
-        ):
-            logger.debug("BatchedTritonExperts %s", self.moe)
-            return BatchedTritonExperts(
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                max_num_tokens=self.moe.max_num_tokens,
-                num_dispatchers=prepare_finalize.num_dispatchers(),
-            )
-        else:
-            logger.debug("TritonExperts %s", self.moe)
-            return TritonExperts(
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-            )
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel initialization "
+            "logic. This function should not be called."
+        )
 
     def create_weights(
         self,
@@ -211,48 +161,68 @@ def _setup_kernel(
         w2: torch.Tensor,
     ) -> None:
         # Shuffle weights to runtime format.
-        w13, w2 = convert_to_unquantized_kernel_format(
+        w13_new, w2_new = convert_to_unquantized_kernel_format(
             self.unquantized_backend,
             layer=layer,
             w13_weight=w13,
             w2_weight=w2,
         )
-        replace_parameter(layer, "w13_weight", w13)
-        replace_parameter(layer, "w2_weight", w2)
-
-        # Setup Modular Kernel for TP Case
-        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
-        assert self.moe_quant_config is not None
-
-        self.kernel = make_unquantized_moe_kernel(
-            backend=self.unquantized_backend,
-            quant_config=self.moe_quant_config,
-            moe_config=self.moe,
-        )
+        # `moe_kernel` is initialized to None in FusedMoEMethodBase.__init__;
+        # On the first call we replace the parameter normally. On subsequent
+        # calls (e.g. RL weight updates that re-trigger
+        # process_weights_after_loading) the moe kernel has already been set
+        # up and CUDA graphs may have captured the parameter addresses, so
+        # we copy the shuffled data into the existing storage instead of
+        # re-registering a new Parameter.
+        is_weight_update = self.moe_kernel is not None  # type: ignore[has-type]
+        replace_parameter(layer, "w13_weight", w13_new, prefer_copy=is_weight_update)
+        replace_parameter(layer, "w2_weight", w2_new, prefer_copy=is_weight_update)
+
+        # AITER backend requires weights to be marked as shuffled.
+        if self.unquantized_backend == UnquantizedMoeBackend.AITER:
+            layer.w13_weight.is_shuffled = True
+            layer.w2_weight.is_shuffled = True
+
+        if not is_weight_update:
+            # Setup moe kernel only on the first call. For the unquantized
+            # method, moe_quant_config is either the constant
+            # FUSED_MOE_UNQUANTIZED_CONFIG or biased_moe_quant_config(...)
+            # which references layer.w{13,2}_bias; since weight updates
+            # mutate those bias tensors in place, the kernel does not need
+            # to be re-built.
+            self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+            assert self.moe_quant_config is not None
+            assert self.experts_cls is not None
+            self.moe_kernel = make_unquantized_moe_kernel(
+                quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                backend=self.unquantized_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+            )
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         super().process_weights_after_loading(layer)
 
-        # Padding the weight for better performance on ROCm
+        # Padding the weight for better performance on ROCm.
+        # _maybe_pad_weight is idempotent: on the first call it allocates a
+        # padded storage and returns a strided view; on subsequent calls
+        # (weight updates) the stride condition no longer matches so it
+        # returns the input unchanged. The reassignment to .data is therefore
+        # a no-op on updates and preserves the storage address (data_ptr)
+        # used by captured CUDA graphs.
         layer.w13_weight.data = self._maybe_pad_weight(layer.w13_weight.data)
         layer.w2_weight.data = self._maybe_pad_weight(layer.w2_weight.data)
 
-        if self.unquantized_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM:
-            _cache_permute_indices: dict[torch.Size, torch.Tensor] = {}
-            # Swap halves to arrange as [w3; w1] (kernel expectation)
-            w1_w, w3_w = torch.chunk(layer.w13_weight.data, 2, dim=1)
-            w13_weight_swapped = torch.cat([w3_w, w1_w], dim=1)
-            layer.w13_weight.data = w13_weight_swapped.contiguous()
-            w13_weights_shuffled, w2_weights_shuffled = (
-                convert_moe_weights_to_flashinfer_trtllm_block_layout(
-                    _cache_permute_indices,
-                    layer.w13_weight.data,
-                    layer.w2_weight.data,
-                )
-            )
-            layer.w13_weight = Parameter(w13_weights_shuffled, requires_grad=False)
-            layer.w2_weight = Parameter(w2_weights_shuffled, requires_grad=False)
+        if self.unquantized_backend in [
+            UnquantizedMoeBackend.TPU,
+            UnquantizedMoeBackend.OOT,
+        ]:
+            # OOT handles internally.
+            return
+
         elif self.unquantized_backend == UnquantizedMoeBackend.CPU:
+            # CPU stays on the old path — no oracle, no moe_kernel.
             from vllm.model_executor.layers.fused_moe import cpu_fused_moe
 
             if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
@@ -283,49 +253,63 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
                     self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
             else:
                 self.cpu_fused_moe = cpu_fused_moe.CPUFusedMOE(layer)
-        elif current_platform.is_cuda_alike() or current_platform.is_xpu():
+        elif self.unquantized_backend == UnquantizedMoeBackend.XPU:
+            w13 = layer.w13_weight
+            w2 = layer.w2_weight
+
+            w13.data = w13.transpose(-1, -2).contiguous()
+            w2.data = w2.transpose(-1, -2).contiguous()
+
+            self._setup_kernel(
+                layer=layer,
+                w13=w13,
+                w2=w2,
+            )
+        else:
             self._setup_kernel(
                 layer=layer,
                 w13=layer.w13_weight,
                 w2=layer.w2_weight,
             )
 
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        if self.moe.has_bias:
+            return biased_moe_quant_config(
+                layer.w13_bias,
+                layer.w2_bias,
+            )
+        else:
+            return FUSED_MOE_UNQUANTIZED_CONFIG
+
     def apply(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         return self.forward(
             layer=layer,
             x=x,
             topk_weights=topk_weights,
             topk_ids=topk_ids,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
-        if self.moe.has_bias:
-            return biased_moe_quant_config(
-                layer.w13_bias,
-                layer.w2_bias,
-            )
-        else:
-            return FUSED_MOE_UNQUANTIZED_CONFIG
-
-    def forward_cuda(
+    def forward_native(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.kernel is not None
-
-        return self.kernel.apply(
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
             hidden_states=x,
             w1=layer.w13_weight,
             w2=layer.w2_weight,
@@ -335,56 +319,69 @@ def forward_cuda(
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
             global_num_experts=layer.global_num_experts,
             expert_map=layer.expert_map,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
-    def forward_monolithic_cuda(
+    def forward_cuda(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",
         x: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe  # noqa: F401
-
-        assert self.unquantized_backend == UnquantizedMoeBackend.FLASHINFER_TRTLLM
-
-        return torch.ops.vllm.flashinfer_fused_moe_bf16(
-            routing_logits=router_logits,
-            routing_bias=layer.e_score_correction_bias,
-            hidden_states=x,
-            gemm1_weights=layer.w13_weight,
-            gemm2_weights=layer.w2_weight,
-            num_experts=layer.global_num_experts,
-            top_k=layer.top_k,
-            n_group=layer.num_expert_group,
-            topk_group=layer.topk_group,
-            intermediate_size=layer.intermediate_size_per_partition,
-            local_expert_offset=layer.ep_rank * layer.local_num_experts,
-            local_num_experts=layer.local_num_experts,
-            routing_method_type=layer.routing_method_type,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        return self.forward_native(
+            layer,
+            x,
+            topk_weights,
+            topk_ids,
+            shared_experts,
+            shared_experts_input,
         )
 
-    def forward_monolithic_cpu(
+    def apply_monolithic(
         self,
-        layer: "FusedMoE",  # type: ignore[name-defined] # noqa: F821
+        layer: "RoutedExperts",
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        return self.cpu_fused_moe(
-            layer,
-            x,
-            layer.use_grouped_topk,
-            layer.top_k,
-            router_logits,
-            layer.renormalize,
-            layer.topk_group,
-            layer.num_expert_group,
-            layer.global_num_experts,
-            layer.expert_map,
-            layer.custom_routing_function,
-            layer.scoring_func,
-            layer.routed_scaling_factor,
-            layer.e_score_correction_bias,
-            layer.apply_router_weight_on_input,
-            layer.activation,
-        )
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.is_monolithic
+        if self.unquantized_backend == UnquantizedMoeBackend.CPU:
+            assert self.moe_kernel is None
+            return self.cpu_fused_moe(
+                layer,
+                x,
+                layer.use_grouped_topk,
+                layer.top_k,
+                router_logits,
+                layer.renormalize,
+                layer.topk_group,
+                layer.num_expert_group,
+                layer.global_num_experts,
+                layer.expert_map,
+                layer.custom_routing_function,
+                layer.scoring_func,
+                layer.routed_scaling_factor,
+                layer.e_score_correction_bias,
+                layer.apply_router_weight_on_input,
+                layer.activation,
+            )
+        else:
+            assert self.moe_kernel is not None
+            return self.moe_kernel.apply_monolithic(
+                x,
+                layer.w13_weight,
+                layer.w2_weight,
+                router_logits,
+                activation=layer.activation,
+                global_num_experts=layer.global_num_experts,
+                expert_map=layer.expert_map,
+                apply_router_weight_on_input=layer.apply_router_weight_on_input,
+                num_expert_group=layer.num_expert_group,
+                topk_group=layer.topk_group,
+                e_score_correction_bias=layer.e_score_correction_bias,
+                routed_scaling_factor=layer.routed_scaling_factor,
+            )
diff --git a/vllm/model_executor/layers/fused_moe/utils.py b/vllm/model_executor/layers/fused_moe/utils.py
index c576b0a25c28..b254dc7b7a72 100644
--- a/vllm/model_executor/layers/fused_moe/utils.py
+++ b/vllm/model_executor/layers/fused_moe/utils.py
@@ -4,6 +4,7 @@
 from math import prod
 
 import torch
+import torch.nn.functional as F
 
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
@@ -22,6 +23,9 @@
 from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
     mxfp8_e4m3_quantize,
 )
+from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
+    ref_nvfp4_quant_dequant,
+)
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     per_tensor_dequantize,
 )
@@ -163,8 +167,15 @@ def _int8_quantize(
     # activations apply per-token quantization. Otherwise, assume
     # activation tensor-wise fp8/int8 quantization, dynamic or static
     if block_shape is None:
-        assert per_act_token, "int8 quantization only supports block or channel-wise"
-        A, A_scale = per_token_quant_int8(A)
+        if per_act_token:
+            A, A_scale = per_token_quant_int8(A)
+        elif A_scale is not None:
+            # Static per-tensor: use the optimized CUDA kernel
+            A, A_scale, _ = ops.scaled_int8_quant(A, scale=A_scale)
+        elif A_scale is None:
+            # Dynamic per-tensor: compute scale then quantize via kernel
+            A_scale = torch.clamp(A.abs().max() / 127.0, min=1e-10)
+            A, A_scale, _ = ops.scaled_int8_quant(A, scale=A_scale)
     else:
         assert not per_act_token
         assert len(block_shape) == 2
@@ -197,11 +208,12 @@ def _mxfp8_e4m3_quantize(
     per_act_token_quant: bool,
     block_shape: list[int] | None = None,
     is_sf_swizzled_layout: bool = False,
+    mx_alignment: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     assert A_scale is None
     assert not per_act_token_quant
     assert block_shape is None or block_shape == [1, 32]
-    return mxfp8_e4m3_quantize(A, is_sf_swizzled_layout)
+    return mxfp8_e4m3_quantize(A, is_sf_swizzled_layout, mx_alignment)
 
 
 def _mxfp6_e3m2_quantize(
@@ -244,8 +256,10 @@ def moe_kernel_quantize_input(
     quant_dtype: None | torch.dtype | str,
     per_act_token_quant: bool,
     block_shape: list[int] | None = None,
-    is_fp4_scale_swizzled: bool = True,
+    is_scale_swizzled: bool = True,
     ocp_mx_scheme: str | None = None,
+    quantization_emulation: bool = False,
+    mx_alignment: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor | None]:
     # Handle OCP MX scheme that requires QDQ (quantize-dequantize) for emulation
     if ocp_mx_scheme is not None:
@@ -267,26 +281,63 @@ def moe_kernel_quantize_input(
         # activation quantization below.
 
     if quant_dtype == current_platform.fp8_dtype():
+        if quantization_emulation:
+            raise NotImplementedError(
+                f"moe_kernel_quantize_input does not support quant_dtype={quant_dtype}"
+                " MOE quantization emulation. Please open an issue."
+            )
         return _fp8_quantize(A, A_scale, per_act_token_quant, block_shape)
     elif quant_dtype == torch.int8:
+        if quantization_emulation:
+            raise NotImplementedError(
+                "moe_kernel_quantize_input does not support quant_dtype=torch.int8"
+                " MOE quantization emulation. Please open an issue."
+            )
         return _int8_quantize(A, A_scale, per_act_token_quant, block_shape)
     elif quant_dtype == "nvfp4":
-        return _nvfp4_quantize(A, A_scale, is_sf_swizzled_layout=is_fp4_scale_swizzled)
+        if not quantization_emulation:
+            return _nvfp4_quantize(A, A_scale, is_sf_swizzled_layout=is_scale_swizzled)
+        else:
+            A = ref_nvfp4_quant_dequant(A, A_scale, block_size=16)
+            return A, None
     elif quant_dtype == "mxfp4":
+        if not quantization_emulation:
+            raise NotImplementedError(
+                "moe_kernel_quantize_input should not be used for native"
+                " quant_dtype='mxfp4' MOE. Please open an issue."
+            )
         return _mxfp4_quantize(A, A_scale, per_act_token_quant, block_shape)
     elif quant_dtype == "mxfp8":
         # TODO: `quant_dtype == "mxfp8"` is ambiguous,
         # should be fp8_e4m3. OCP MX also defines `fp8_e5m2`.
+        if quantization_emulation:
+            raise NotImplementedError(
+                "moe_kernel_quantize_input does not support quant_dtype='mxfp8' MOE "
+                "quantization emulation. Please open an issue."
+            )
         return _mxfp8_e4m3_quantize(
             A,
             A_scale,
             per_act_token_quant,
             block_shape,
-            is_sf_swizzled_layout=is_fp4_scale_swizzled,
+            is_sf_swizzled_layout=is_scale_swizzled,
+            mx_alignment=mx_alignment,
         )
     elif quant_dtype == "mxfp6_e3m2":
+        if not quantization_emulation:
+            raise NotImplementedError(
+                "moe_kernel_quantize_input should not be used for native "
+                " quant_dtype='mxfp6_e3m2'MOE. Please open an issue."
+            )
+
         return _mxfp6_e3m2_quantize(A, A_scale, per_act_token_quant, block_shape)
     elif quant_dtype == "mxfp6_e2m3":
+        if not quantization_emulation:
+            raise NotImplementedError(
+                "moe_kernel_quantize_input should not be used for native"
+                " quant_dtype='mxfp6_e2m3' MOE. Please open an issue."
+            )
+
         return _mxfp6_e2m3_quantize(A, A_scale, per_act_token_quant, block_shape)
     else:
         return A, A_scale
@@ -325,14 +376,76 @@ def disable_inplace() -> bool:
     return is_torch_equal_or_newer("2.9")
 
 
-@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
+@triton.jit
+def _pack_topk_ids_weights_kernel(
+    topk_ids_ptr,
+    topk_weights_ptr,
+    output_ptr,
+    n_elements,
+    BLOCK_SIZE: tl.constexpr,
+    USE_GDC: tl.constexpr,
+    launch_pdl: tl.constexpr,  # triton metadata
+):
+    pid = tl.program_id(axis=0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < n_elements
+    if USE_GDC:
+        tl.extra.cuda.gdc_launch_dependents()
+        tl.extra.cuda.gdc_wait()
+    expert_id = tl.load(topk_ids_ptr + offsets, mask=mask, other=0).to(tl.int32)
+    expert_id_shifted = expert_id << 16
+
+    weight = tl.load(topk_weights_ptr + offsets, mask=mask, other=0.0)
+    weight_bf16 = weight.to(tl.bfloat16)
+    weight_int16 = weight_bf16.to(tl.int16, bitcast=True)
+
+    weight_int32 = weight_int16.to(tl.int32) & 0xFFFF
+
+    packed = expert_id_shifted | weight_int32
+    tl.store(output_ptr + offsets, packed, mask=mask)
+
+
 def trtllm_moe_pack_topk_ids_weights(
-    topk_ids: torch.Tensor, topk_weights: torch.Tensor
+    topk_ids: torch.Tensor,
+    topk_weights: torch.Tensor,
+    block_size: int = 1024,
 ) -> torch.Tensor:
-    """
-    Pack topk_ids and topk_weights into a single int32 tensor.
-    Format: (expert_id << 16) | weight_bf16.view(int16)
-    """
-    return (topk_ids.to(torch.int32) << 16) | topk_weights.to(torch.bfloat16).view(
-        torch.int16
+    assert topk_ids.shape == topk_weights.shape
+    assert topk_ids.is_contiguous() and topk_weights.is_contiguous()
+
+    original_shape = topk_ids.shape
+    ids_flat = topk_ids.reshape(-1)
+    weights_flat = topk_weights.reshape(-1)
+
+    n_elements = ids_flat.numel()
+    output = torch.empty(n_elements, dtype=torch.int32, device=topk_ids.device)
+
+    use_gdc = current_platform.is_cuda() and current_platform.has_device_capability(90)
+    grid = (triton.cdiv(n_elements, block_size),)
+    _pack_topk_ids_weights_kernel[grid](
+        ids_flat,
+        weights_flat,
+        output,
+        n_elements,
+        BLOCK_SIZE=block_size,
+        USE_GDC=use_gdc,
+        launch_pdl=use_gdc,
     )
+    return output.reshape(original_shape)
+
+
+@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
+def swiglu_limit_func(
+    output: torch.Tensor,
+    input: torch.Tensor,  # first half is gate, second half is up
+    swiglu_limit: float = 0.0,
+) -> None:
+    d = input.shape[1] // 2
+    gate = input[:, :d]
+    up = input[:, d:]
+
+    if swiglu_limit > 0:
+        gate = torch.clamp(gate, max=swiglu_limit)
+        up = torch.clamp(up, min=-swiglu_limit, max=swiglu_limit)
+
+    output.copy_(F.silu(gate) * up)
diff --git a/vllm/model_executor/layers/fused_moe/zero_expert_fused_moe.py b/vllm/model_executor/layers/fused_moe/zero_expert_fused_moe.py
deleted file mode 100644
index 97d21767f4fc..000000000000
--- a/vllm/model_executor/layers/fused_moe/zero_expert_fused_moe.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from contextlib import contextmanager
-
-import torch
-from torch import nn
-
-from vllm.model_executor.layers.fused_moe.fused_moe import zero_experts_compute_triton
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE
-
-
-class ZeroExpertFusedMoE(FusedMoE):
-    """
-    A FusedMoE operation that also computes the results of zero experts.
-    Zero experts perform identity operations (scaled pass-through) instead
-    of full MLP computations.
-
-    This class uses memoization to avoid redundant routing computation:
-    routing is computed once and reused for both zero expert computation
-    and the main FusedMoE forward pass.
-    """
-
-    def __init__(
-        self,
-        zero_expert_num: int,
-        zero_expert_type: str,
-        router: nn.Module,
-        **kwargs,
-    ):
-        # ZeroExpertFusedMoE manages its own custom_routing_function for memoization
-        assert (
-            "custom_routing_function" not in kwargs
-            or kwargs.get("custom_routing_function") is None
-        ), (
-            "ZeroExpertFusedMoE does not support external custom_routing_function. "
-            "It manages its own for routing memoization."
-        )
-
-        # Automatically slice router's e_score_correction_bias to only include
-        # real experts (not zero_experts) for the base FusedMoE.
-        # The full bias will be used temporarily in forward() for routing.
-        if hasattr(router, "e_score_correction_bias") and "num_experts" in kwargs:
-            num_real_experts = kwargs["num_experts"]
-            router_bias = router.e_score_correction_bias
-            user_bias = kwargs.get("e_score_correction_bias")
-
-            # Use router's bias if:
-            # 1. User didn't provide bias, or
-            # 2. User provided full bias (same size as router)
-            if user_bias is None or user_bias.shape[0] == router_bias.shape[0]:
-                kwargs["e_score_correction_bias"] = router_bias[:num_real_experts]
-
-        # FusedMoE no longer accepts zero_expert_num/zero_expert_type.
-        # We handle zero experts ourselves in forward().
-        super().__init__(**kwargs)
-        # Store the actual zero_expert_num and zero_expert_type for our own use
-        self._actual_zero_expert_num = zero_expert_num
-        self._actual_zero_expert_type = zero_expert_type
-        self._router = router  # Full router (includes zero experts)
-
-        # Expose zero_expert_num and zero_expert_type as attributes for
-        # compatibility with quantization methods that check these attributes
-        self.zero_expert_num = 0
-        self.zero_expert_type = None
-
-        # Memoization state for routing results
-        self._memoized_topk_weights: torch.Tensor | None = None
-        self._memoized_topk_ids: torch.Tensor | None = None
-
-        # Create custom_routing_function to reuse memoized routing results
-        def custom_routing_function(hidden_states, gating_output, topk, renormalize):
-            """Return memoized `topk_weights` and `topk_ids`."""
-            if self._memoized_topk_weights is None or self._memoized_topk_ids is None:
-                raise RuntimeError(
-                    "ZeroExpertFusedMoE: routing results not memoized. "
-                    "Call select_experts first to compute routing."
-                )
-            return self._memoized_topk_weights, self._memoized_topk_ids
-
-        self.custom_routing_function = custom_routing_function
-
-    @contextmanager
-    def _temporarily_set_attrs(self, **attrs):
-        """
-        Temporarily set attributes using object.__setattr__ and restore them.
-
-        This bypasses nn.Module.__setattr__ to avoid Dynamo tracing issues.
-        When PyTorch Dynamo traces the forward pass, it cannot handle
-        nn.Module.__setattr__ calls (which include parameter registration logic),
-        resulting in "Unsupported" errors. Using object.__setattr__ directly
-        sets the attribute without triggering nn.Module's custom __setattr__,
-        allowing Dynamo to trace the code successfully.
-        """
-        originals = {key: getattr(self, key) for key in attrs}
-        try:
-            for key, value in attrs.items():
-                object.__setattr__(self, key, value)
-            yield
-        finally:
-            for key, value in originals.items():
-                object.__setattr__(self, key, value)
-
-    def _compute_zero_expert_result(
-        self,
-        hidden_states: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-    ) -> torch.Tensor | None:
-        """Compute zero expert results using pre-computed routing."""
-        if (
-            self._actual_zero_expert_num is None
-            or self._actual_zero_expert_num <= 0
-            or self._actual_zero_expert_type is None
-        ):
-            return None
-
-        return zero_experts_compute_triton(
-            expert_indices=topk_ids.clone(),
-            expert_scales=topk_weights.clone(),
-            num_experts=self.logical_num_experts,
-            zero_expert_type=self._actual_zero_expert_type,
-            hidden_states=hidden_states,
-        )
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        router_logits: torch.Tensor,  # Full logits including zero experts
-    ) -> torch.Tensor:
-        """
-        Forward pass with zero expert support and routing memoization.
-
-        Args:
-            hidden_states: Input hidden states
-            router_logits: Full router logits (including zero experts)
-
-        Returns:
-            Combined output from real experts and zero experts
-        """
-        # Prepare temporary attribute overrides for routing computation
-        temp_attrs = {
-            "custom_routing_function": None,  # Disable for first routing
-        }
-        if self._router is not None:
-            temp_attrs["e_score_correction_bias"] = self._router.e_score_correction_bias
-
-        # Compute routing with temporary attributes
-        # Pass full router_logits (including zero experts) so that zero experts
-        # can be properly identified in topk_ids
-        with self._temporarily_set_attrs(**temp_attrs):
-            topk_weights, topk_ids = self.select_experts(
-                hidden_states=hidden_states,
-                router_logits=router_logits,  # Full logits (includes zero experts)
-            )
-
-        # Compute zero expert result if needed
-        zero_expert_result = self._compute_zero_expert_result(
-            hidden_states=hidden_states,
-            topk_weights=topk_weights,
-            topk_ids=topk_ids,
-        )
-
-        # Memoize routing results for reuse in super().forward()
-        self._memoized_topk_weights = topk_weights
-        self._memoized_topk_ids = topk_ids
-
-        # Slice router_logits for real experts only
-        router_logits_sliced = router_logits[..., : self.logical_num_experts]
-
-        # Compute real expert results (will reuse memoized routing via
-        # custom_routing_function)
-        # zero_expert_num is already 0, so FusedMoE won't handle zero experts
-        fused_out = super().forward(
-            hidden_states=hidden_states,
-            router_logits=router_logits_sliced,
-        )
-
-        # Combine results
-        # Both zero_expert_result and fused_out are computed from the same
-        # hidden_states, so they should be on the same device.
-        if zero_expert_result is not None:
-            fused_out = fused_out + zero_expert_result
-
-        # Clear memoization after use
-        self._memoized_topk_weights = None
-        self._memoized_topk_ids = None
-
-        return fused_out
diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py
index 7fa804587067..d5671eb9c1e6 100644
--- a/vllm/model_executor/layers/layernorm.py
+++ b/vllm/model_executor/layers/layernorm.py
@@ -6,96 +6,24 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from vllm import _oink_ops, envs
-from vllm._aiter_ops import rocm_aiter_ops
+# Import kernels
+import vllm.kernels  # noqa: F401
+from vllm import envs, ir
+from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
-from vllm.model_executor.layers.batch_invariant import (
-    rms_norm_batch_invariant,
-)
-from vllm.platforms import current_platform
+from vllm.model_executor.layers.batch_invariant import rms_norm_batch_invariant
 
 logger = init_logger(__name__)
 
 
-def _can_view_as_2d(x: torch.Tensor) -> bool:
-    """Return True if x.view(-1, x.shape[-1]) is viewable (no copy)."""
-    if x.dim() < 2:
-        return False
-    if x.dim() == 2:
-        return True
-    # For a view(-1, N) to be valid, all leading dims must be contiguous with
-    # respect to each other (size-1 dims are ignored).
-    for dim in range(x.dim() - 1):
-        # Strides for size-1 dims are irrelevant and can be arbitrary.
-        if x.size(dim + 1) != 1 and x.stride(dim) != x.stride(dim + 1) * x.size(
-            dim + 1
-        ):
-            return False
-    return True
-
-
-def _is_oink_stride_compatible_2d(x_2d: torch.Tensor) -> bool:
-    """Return True if x_2d meets Oink's pointer-path stride constraints."""
-    if x_2d.dim() != 2:
-        return False
-    if x_2d.stride(1) != 1:
-        return False
-    # Match Oink's vectorization constraint: stride(0) divisible by 256b.
-    if x_2d.dtype in (torch.float16, torch.bfloat16):
-        divby = 16
-    elif x_2d.dtype == torch.float32:
-        divby = 8
-    else:
-        return False
-    return (x_2d.stride(0) % divby) == 0
-
-
-def rms_norm(
-    x: torch.Tensor, weight: torch.Tensor, variance_epsilon: float
-) -> torch.Tensor:
-    from vllm import _custom_ops as ops
-
-    if envs.VLLM_BATCH_INVARIANT:
-        return rms_norm_batch_invariant(x, weight, variance_epsilon)
-    out = torch.empty_like(x)
-    ops.rms_norm(
-        out,
-        x,
-        weight,
-        variance_epsilon,
-    )
-    return out
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    variance_epsilon: float,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    from vllm import _custom_ops as ops
-
-    if envs.VLLM_BATCH_INVARIANT:
-        return rms_norm_batch_invariant(
-            x + residual, weight, variance_epsilon
-        ), x + residual
-    ops.fused_add_rms_norm(
-        x,
-        residual,
-        weight,
-        variance_epsilon,
-    )
-    return x, residual
-
-
 def poly_norm(
     x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, variance_epsilon: float
 ) -> torch.Tensor:
     from vllm import _custom_ops as ops
 
     out = torch.empty_like(x)
-    ops.poly_norm(
+    ops.poly_norm(  # type: ignore[attr-defined]
         out,
         x,
         weight,
@@ -105,25 +33,6 @@ def poly_norm(
     return out
 
 
-def dispatch_rocm_rmsnorm_func(
-    with_fused_add: bool, dtype: torch.dtype, use_aiter: bool = False
-):
-    use_aiter = use_aiter and dtype in [
-        torch.float16,
-        torch.bfloat16,
-    ]
-
-    if use_aiter and with_fused_add:
-        return rocm_aiter_ops.rms_norm2d_with_add
-    if use_aiter:
-        return rocm_aiter_ops.rms_norm
-
-    # fall back to CUDA implementation
-    if with_fused_add:
-        return fused_add_rms_norm
-    return rms_norm
-
-
 # --8<-- [start:rms_norm]
 @CustomOp.register("rms_norm")
 class RMSNorm(CustomOp):
@@ -156,113 +65,19 @@ def __init__(
         if self.has_weight:
             self.weight = nn.Parameter(self.weight)
 
-        if current_platform.is_rocm():
-            aiter_rmsnorm_enabled = rocm_aiter_ops.is_rmsnorm_enabled()
-            self.rocm_norm_func = dispatch_rocm_rmsnorm_func(
-                with_fused_add=False,
-                dtype=weight_dtype,
-                use_aiter=aiter_rmsnorm_enabled,
-            )
-            self.rocm_norm_func_with_add = dispatch_rocm_rmsnorm_func(
-                with_fused_add=True, dtype=weight_dtype, use_aiter=aiter_rmsnorm_enabled
-            )
-
-        # Optional: enable Oink Blackwell RMSNorm custom-op fast path on
-        # compatible CUDA devices (e.g., SM100) when the external Oink
-        # package is available. This is detected once at construction time
-        # to avoid per-call device queries in the hot path.
-        self._use_oink_rmsnorm = False
-        self._use_oink_fused_add_rmsnorm = False
-        if (
-            not current_platform.is_rocm()
-            and torch.cuda.is_available()
-            and bool(getattr(envs, "VLLM_USE_OINK_OPS", False))
-        ):
-            # NOTE: vLLM disables custom ops by default when using Inductor.
-            # If this op is disabled, CustomOp will dispatch to forward_native,
-            # and the Oink path in forward_cuda will never run.
-            if getattr(self._forward_method, "__func__", None) is getattr(
-                self.forward_native, "__func__", None
-            ):
-                try:
-                    from vllm.config import get_cached_compilation_config
-
-                    custom_ops = get_cached_compilation_config().custom_ops
-                except Exception:
-                    custom_ops = ["<unknown>"]
-                logger.warning_once(
-                    "VLLM_USE_OINK_OPS=1 but the `rms_norm` custom op is "
-                    "disabled (CompilationConfig.custom_ops=%s). Enable it via "
-                    "`compilation_config={'custom_ops': ['none', '+rms_norm']}` "
-                    "(or `['all']`) to let vLLM call into torch.ops.oink.*.",
-                    custom_ops,
-                )
-                # Custom op disabled => forward_cuda won't run. Avoid doing any
-                # external Oink initialization work in this case.
-            else:
-                try:
-                    device_index = torch.accelerator.current_device_index()
-                    if _oink_ops.is_oink_available_for_device(device_index):
-                        self._use_oink_rmsnorm = True
-                        self._use_oink_fused_add_rmsnorm = (
-                            _oink_ops.has_fused_add_rms_norm()
-                        )
-                except Exception as e:
-                    # If anything goes wrong (no Oink install, CPU-only env, etc.),
-                    # silently fall back to the built-in RMSNorm path.
-                    logger.warning_once(
-                        "VLLM_USE_OINK_OPS=1 but failed to initialize Oink "
-                        "RMSNorm; falling back to vLLM RMSNorm. Error: %s",
-                        e,
-                    )
-                    self._use_oink_rmsnorm = False
-                    self._use_oink_fused_add_rmsnorm = False
-
-    @staticmethod
-    def forward_static(
-        x: torch.Tensor,
-        variance_epsilon: float,
-        hidden_size: int,
-        orig_dtype: torch.dtype,
-        weight: torch.Tensor | None = None,
-        residual: torch.Tensor | None = None,
-        variance_size_override: int | None = None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        """PyTorch-native implementation equivalent to forward()."""
-        x = x.to(torch.float32)
-        if residual is not None:
-            # residual promoted f16->f32 automatically,
-            # otherwise Inductor eliminates the casts to and from f16,
-            # increasing memory usage (and complicating pattern matching)
-            x = x + residual
-            residual = x.to(orig_dtype)
-
-        if x.shape[-1] != hidden_size:
-            raise ValueError(
-                f"Expected hidden_size to be {hidden_size}, but found: {x.shape[-1]}"
-            )
-
-        if variance_size_override is None:
-            x_var = x
-        else:
-            if hidden_size < variance_size_override:
-                raise ValueError(
-                    "Expected hidden_size to be at least "
-                    f"{variance_size_override}, but found: {hidden_size}"
-                )
-
-            x_var = x[:, :, :variance_size_override]
-
-        variance = x_var.pow(2).mean(dim=-1, keepdim=True)
-
-        x = x * torch.rsqrt(variance + variance_epsilon)
-        x = x.to(orig_dtype)
-        if weight is not None:
-            x = x * weight
-        if residual is None:
-            return x
-        else:
-            return x, residual
+        # Do not pass identity weight to native implementation (causes issue on TPU).
+        # Other implementations require weight to be passed even if all ones.
+        # Cheat and predict if native will be dispatched to:
+        #  1) if native is first in priority list
+        #  2) if variance_size_override is given (only supported by native impl)
+        # TODO(luka): address weight passing inconsistency:
+        # https://github.com/vllm-project/vllm/issues/39370
+        priority = get_current_vllm_config().kernel_config.ir_op_priority
+        var_override = self.variance_size_override is not None
+        native_rms_norm = priority.rms_norm[0] == "native" or var_override
+        native_add_rms_norm = priority.fused_add_rms_norm[0] == "native" or var_override
+        self.pass_weight = self.has_weight or not native_rms_norm
+        self.pass_weight_add = self.has_weight or not native_add_rms_norm
 
     def forward_native(
         self,
@@ -270,115 +85,35 @@ def forward_native(
         residual: torch.Tensor | None = None,
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
         """PyTorch-native implementation equivalent to forward()."""
-
-        return self.forward_static(
-            x,
-            self.variance_epsilon,
-            self.hidden_size,
-            x.dtype,
-            self.weight.data if self.has_weight else None,
-            residual,
-            self.variance_size_override,
-        )
+        if residual is None:
+            return ir.ops.rms_norm(
+                x,
+                self.weight.data if self.pass_weight else None,
+                self.variance_epsilon,
+                self.variance_size_override,
+            )
+        else:
+            return ir.ops.fused_add_rms_norm.maybe_inplace(
+                x,
+                residual,
+                self.weight.data if self.pass_weight_add else None,
+                self.variance_epsilon,
+                self.variance_size_override,
+            )
 
     def forward_cuda(
         self,
         x: torch.Tensor,
         residual: torch.Tensor | None = None,
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if self.variance_size_override is not None:
-            return self.forward_native(x, residual)
-
-        # Optional Oink SM100 fast path (no residual). This path is
-        # torch.compile-friendly via torch.ops.oink.rmsnorm and preserves
-        # 2D layouts (including padded rows) when using the Oink
-        # pointer-based kernel.
-        if (
-            residual is None
-            and getattr(self, "_use_oink_rmsnorm", False)
-            and x.is_cuda
-            and x.dim() >= 2
-            and self.has_weight
-            and not envs.VLLM_BATCH_INVARIANT
-            and self.weight.data.dtype == x.dtype
-            and self.weight.data.is_contiguous()
-        ):
-            orig_shape = x.shape
-            hidden_size = orig_shape[-1]
-            if _can_view_as_2d(x):
-                x_2d = x.view(-1, hidden_size)
-                if _is_oink_stride_compatible_2d(x_2d):
-                    y_2d = _oink_ops.rmsnorm(
-                        x_2d,
-                        self.weight.data,
-                        self.variance_epsilon,
-                    )
-                    return y_2d.view(orig_shape)
-
-        # Optional Oink SM100 fast path (fused residual-add + RMSNorm, in-place).
-        # This mirrors vLLM's fused_add_rms_norm semantics by mutating both
-        # `x` (normalized output) and `residual` (residual-out buffer).
         if (
-            residual is not None
-            and getattr(self, "_use_oink_fused_add_rmsnorm", False)
-            and x.is_cuda
-            and residual.is_cuda
-            and x.shape == residual.shape
-            and x.dtype == residual.dtype
-            and x.dim() >= 2
-            and self.has_weight
-            and not envs.VLLM_BATCH_INVARIANT
-            and self.weight.data.dtype == x.dtype
-            and self.weight.data.is_contiguous()
+            envs.VLLM_BATCH_INVARIANT
+            and residual is None
+            and self.variance_size_override is None
         ):
-            orig_shape = x.shape
-            hidden_size = orig_shape[-1]
-            if _can_view_as_2d(x) and _can_view_as_2d(residual):
-                x_2d = x.view(-1, hidden_size)
-                res_2d = residual.view(-1, hidden_size)
-
-                # The Oink in-place pointer path supports the common vLLM
-                # layout where:
-                # - `x` may be strided/padded row-major (stride(1) == 1), and
-                # - `residual` is contiguous row-major ([M, N] with stride(0) == N).
-                # If these conditions are not met, fall back to vLLM's built-in
-                # fused kernel.
-                if (
-                    _is_oink_stride_compatible_2d(x_2d)
-                    and _is_oink_stride_compatible_2d(res_2d)
-                    and res_2d.is_contiguous()
-                ):
-                    _oink_ops.fused_add_rms_norm_(
-                        x_2d,
-                        res_2d,
-                        self.weight.data,
-                        self.variance_epsilon,
-                    )
-                    return x, residual
-
-        add_residual = residual is not None
-        if add_residual:
-            return fused_add_rms_norm(
-                x, residual, self.weight.data, self.variance_epsilon
-            )
-        else:
-            return rms_norm(x, self.weight.data, self.variance_epsilon)
+            return rms_norm_batch_invariant(x, self.weight.data, self.variance_epsilon)
 
-    def forward_hip(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor | None = None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if self.variance_size_override is not None:
-            return self.forward_native(x, residual)
-
-        add_residual = residual is not None
-        if add_residual:
-            return self.rocm_norm_func_with_add(
-                x, residual, self.weight.data, self.variance_epsilon
-            )
-        else:
-            return self.rocm_norm_func(x, self.weight.data, self.variance_epsilon)
+        return self.forward_native(x, residual)
 
     def forward_xpu(
         self,
@@ -414,77 +149,32 @@ def __init__(
         self.weight = nn.Parameter(torch.zeros(hidden_size))
         self.variance_epsilon = eps
 
-    @staticmethod
-    def _forward_static_no_residual(
-        weight: torch.Tensor,
-        variance_epsilon: float,
-        x: torch.Tensor,
-    ) -> torch.Tensor:
-        """PyTorch-native implementation equivalent to forward() without residual."""
-        orig_dtype = x.dtype
-        x = x.float()
-        variance = x.pow(2).mean(dim=-1, keepdim=True)
-        x = x * torch.rsqrt(variance + variance_epsilon)
-        x = x * (1.0 + weight.float())
-        x = x.to(orig_dtype)
-        return x
-
-    @staticmethod
-    def _forward_static_with_residual(
-        weight: torch.Tensor,
-        variance_epsilon: float,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """PyTorch-native implementation equivalent to forward() with residual."""
-        orig_dtype = x.dtype
-        x = (
-            x.float() + residual.float()
-            if orig_dtype == torch.float16
-            else x + residual
-        )
-        residual = x
-
-        x = x.float()
-        variance = x.pow(2).mean(dim=-1, keepdim=True)
-        x = x * torch.rsqrt(variance + variance_epsilon)
-        # Llama does x.to(float16) * w whilst Gemma is (x * w).to(float16)
-        # See https://github.com/huggingface/transformers/pull/29402
-        x = x * (1.0 + weight.float())
-        x = x.to(orig_dtype)
-        return x, residual
-
     def forward_native(
         self,
         x: torch.Tensor,
         residual: torch.Tensor | None = None,
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
         """PyTorch-native implementation equivalent to forward()."""
-        if residual is None:
-            return self._forward_static_no_residual(
-                self.weight.data, self.variance_epsilon, x
-            )
-        else:
-            return self._forward_static_with_residual(
-                self.weight.data, self.variance_epsilon, x, residual
+        orig_dtype = x.dtype
+        weight = self.weight.data.float() + 1.0
+        if residual is not None:
+            x = (
+                x.float() + residual.float()
+                if orig_dtype == torch.float16
+                else x + residual
             )
+            residual = x
+        # ir.ops.rms_norm handles fp32 upcast internally
+        out = ir.ops.rms_norm(x, weight, self.variance_epsilon)
+        return (
+            out.to(orig_dtype) if residual is None else (out.to(orig_dtype), residual)
+        )
 
     def forward_cuda(
         self,
         x: torch.Tensor,
         residual: torch.Tensor | None = None,
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if torch.compiler.is_compiling():
-            return self.forward_native(x, residual)
-
-        if not getattr(self, "_is_compiled", False):
-            self._forward_static_no_residual = torch.compile(  # type: ignore
-                self._forward_static_no_residual
-            )
-            self._forward_static_with_residual = torch.compile(  # type: ignore
-                self._forward_static_with_residual
-            )
-            self._is_compiled = True
         return self.forward_native(x, residual)
 
 
@@ -539,53 +229,71 @@ def __init__(
     def reset_parameters(self):
         torch.nn.init.ones_(self.weight)
 
-    def forward_native(
-        self, x: torch.Tensor, z: torch.Tensor | None = None
+    @staticmethod
+    def forward_static(
+        x: torch.Tensor,
+        z: torch.Tensor | None,
+        weight: torch.Tensor,
+        epsilon: float,
+        orig_dtype: torch.dtype,
+        group_size: int | None = None,
+        norm_before_gate: bool = True,
+        activation: str = "swish",
     ) -> torch.Tensor:
-        """
-        Native PyTorch implementation of RMS normalization with gating.
-
-        Args:
-            x: Input tensor
-            z: Optional gating tensor
+        """Pure-PyTorch RMS normalization with optional gating.
 
-        Returns:
-            Normalized (and optionally gated) tensor
+        This static method contains the full native logic so that both
+        ``forward_native`` and ``MatcherRMSNormGated`` (used by the
+        compilation pattern matcher) can share the same implementation.
 
-        If z is not None:
-            - norm_before_gate=True: out = norm(x) * silu(z)
-            - norm_before_gate=False: out = norm(x * silu(z))
+        If *z* is not None and *norm_before_gate* is True:
+            ``out = rms_norm(x) * act(z)``
+        If *z* is not None and *norm_before_gate* is False:
+            ``out = rms_norm(x * act(z))``
         """
-        orig_dtype = x.dtype
         x = x.float()
-        weight = self.weight.float()
-        z = z.float() if z is not None else None
+        weight = weight.float()
+        if z is not None:
+            z = z.float()
+
+        assert activation in ["silu", "sigmoid", "swish"]
+        act_fn = F.sigmoid if activation == "sigmoid" else F.silu
 
-        # Apply gating before normalization if needed
-        if z is not None and not self.norm_before_gate:
-            x = x * F.silu(z)
+        if z is not None and not norm_before_gate:
+            x = x * act_fn(z)
 
-        # RMS Normalization
-        if self.group_size is None:
-            # Standard RMS norm across the last dimension
+        if group_size is None:
             variance = x.pow(2).mean(dim=-1, keepdim=True)
-            x_normed = x * torch.rsqrt(variance + self.eps)
+            x_normed = x * torch.rsqrt(variance + epsilon)
             out = x_normed * weight
         else:
-            # Group RMS norm
             from einops import rearrange
 
-            x_group = rearrange(x, "... (g d) -> ... g d", d=self.group_size)
+            x_group = rearrange(x, "... (g d) -> ... g d", d=group_size)
             variance = x_group.pow(2).mean(dim=-1, keepdim=True)
-            x_normed = x_group * torch.rsqrt(variance + self.eps)
+            x_normed = x_group * torch.rsqrt(variance + epsilon)
             out = rearrange(x_normed, "... g d -> ... (g d)") * weight
 
-        # Apply gating after normalization if needed
-        if z is not None and self.norm_before_gate:
-            out = out * F.silu(z)
+        if z is not None and norm_before_gate:
+            out = out * act_fn(z)
 
         return out.to(orig_dtype)
 
+    def forward_native(
+        self, x: torch.Tensor, z: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        """PyTorch-native implementation equivalent to forward()."""
+        return self.forward_static(
+            x,
+            z,
+            self.weight,
+            self.eps,
+            x.dtype,
+            group_size=self.group_size,
+            norm_before_gate=self.norm_before_gate,
+            activation=self.activation,
+        )
+
     def forward_cuda(
         self, x: torch.Tensor, z: torch.Tensor | None = None
     ) -> torch.Tensor:
@@ -602,6 +310,11 @@ def forward_cuda(
             activation=self.activation,
         )
 
+    def forward_xpu(
+        self, x: torch.Tensor, z: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        return self.forward_cuda(x, z)
+
 
 class LayerNorm(nn.Module):
     """
diff --git a/vllm/model_executor/layers/lightning_attn.py b/vllm/model_executor/layers/lightning_attn.py
index ffccdc12241c..ef7a2745a068 100644
--- a/vllm/model_executor/layers/lightning_attn.py
+++ b/vllm/model_executor/layers/lightning_attn.py
@@ -5,6 +5,7 @@
 from einops import rearrange
 
 from vllm.triton_utils import tl, triton
+from vllm.v1.attention.backends.utils import PAD_SLOT_ID
 
 
 @triton.jit
@@ -602,6 +603,7 @@ def _linear_attn_decode_kernel(
     cache_h_stride,
     cache_d0_stride,
     cache_d1_stride,
+    pad_slot_id: tl.constexpr,
     BLOCK_SIZE: tl.constexpr,
 ):
     """
@@ -616,8 +618,8 @@ def _linear_attn_decode_kernel(
     # Load slot index for the current batch
     slot_id = tl.load(slot_idx + pid_b).to(tl.int64)
 
-    # Skip if slot_id is -1 (padding)
-    if slot_id == -1:
+    # Skip if slot_id is PAD_SLOT_ID (padding)
+    if slot_id == pad_slot_id:
         return
 
     batch_id = pid_b
@@ -727,6 +729,7 @@ def linear_decode_forward_triton(
         cache_h_stride,
         cache_d0_stride,
         cache_d1_stride,
+        pad_slot_id=PAD_SLOT_ID,
         BLOCK_SIZE=BLOCK_SIZE,
     )
 
diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index 44fd516f5e5c..dbd5577ee03c 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -48,19 +48,19 @@
     "CompressedTensorsLinearTransformMethod",
     "AWQMarlinLinearMethod",
     "AWQLinearMethod",
-    "GPTQMarlinLinearMethod",
+    "AutoGPTQLinearMethod",
     "Fp8LinearMethod",
     "MarlinLinearMethod",
     "GPTQMarlin24LinearMethod",
     "TPUInt8LinearMethod",
-    "GPTQLinearMethod",
     "FBGEMMFp8LinearMethod",
     "ModelOptFp8LinearMethod",
     "ModelOptFp8PcPtLinearMethod",
     "ModelOptFp8PbWoLinearMethod",
     "QuarkLinearMethod",
     "ModelOptNvFp4LinearMethod",
-    "PetitNvFp4LinearMethod",
+    "ModelOptNvFp4W4A16LinearMethod",
+    "HummingLinearMethod",
 ]
 
 
@@ -246,6 +246,7 @@ def __init__(
         self,
         input_size: int,
         output_size: int,
+        bias: bool = False,
         skip_bias_add: bool = False,
         params_dtype: torch.dtype | None = None,
         quant_config: QuantizationConfig | None = None,
@@ -259,6 +260,7 @@ def __init__(
         # Keep input parameters
         self.input_size = input_size
         self.output_size = output_size
+        self.has_bias = bias
         self.skip_bias_add = skip_bias_add
         if params_dtype is None:
             params_dtype = torch.get_default_dtype()
@@ -266,10 +268,13 @@ def __init__(
         self.quant_config = quant_config
         self.prefix = prefix
         self.allow_fp8_block_shape_mismatch = False
+        self.quant_method: QuantizeMethodBase
         if quant_config is None:
-            self.quant_method: QuantizeMethodBase | None = UnquantizedLinearMethod()
+            self.quant_method = UnquantizedLinearMethod()
+        elif quant_method := quant_config.get_quant_method(self, prefix=prefix):
+            self.quant_method = quant_method
         else:
-            self.quant_method = quant_config.get_quant_method(self, prefix=prefix)
+            raise ValueError("All linear layers should support quant method.")
         self.return_bias = return_bias
         self.disable_tp = disable_tp
         self.tp_rank = get_tensor_model_parallel_rank() if not disable_tp else 0
@@ -324,6 +329,7 @@ def __init__(
         super().__init__(
             input_size,
             output_size,
+            bias,
             skip_bias_add,
             params_dtype,
             quant_config,
@@ -332,8 +338,6 @@ def __init__(
             disable_tp=disable_tp,
         )
 
-        # All the linear layer supports quant method.
-        assert self.quant_method is not None
         self.quant_method.create_weights(
             self,
             self.input_size,
@@ -386,7 +390,6 @@ def forward(
         x: torch.Tensor,
     ) -> torch.Tensor | tuple[torch.Tensor, Parameter | None]:
         bias = self.bias if not self.skip_bias_add else None
-        assert self.quant_method is not None
 
         output = self.quant_method.apply(self, x, bias)
 
@@ -459,6 +462,7 @@ def __init__(
         super().__init__(
             input_size,
             output_size,
+            bias,
             skip_bias_add,
             params_dtype,
             quant_config,
@@ -470,7 +474,6 @@ def __init__(
         self._maybe_allow_fp8_block_shape_mismatch()
         self.gather_output = gather_output
 
-        assert self.quant_method is not None
         self.quant_method.create_weights(
             layer=self,
             input_size_per_partition=self.input_size_per_partition,
@@ -484,6 +487,7 @@ def __init__(
                 else self.weight_loader
             ),
         )
+
         if bias:
             self.bias = Parameter(
                 torch.empty(self.output_size_per_partition, dtype=params_dtype)
@@ -578,7 +582,6 @@ def forward(
         bias = self.bias if not self.skip_bias_add else None
 
         # Matrix multiply.
-        assert self.quant_method is not None
         output_parallel = self.quant_method.apply(self, input_, bias)
 
         if self.gather_output and self.tp_size > 1:
@@ -818,8 +821,8 @@ def weight_loader(
             # for the packing.
             packed_dim = getattr(param, "packed_dim", None)
             if packed_dim == output_dim:
-                shard_size = shard_size // param.packed_factor
-                shard_offset = shard_offset // param.packed_factor
+                shard_size = round(shard_size // param.packed_factor)
+                shard_offset = round(shard_offset // param.packed_factor)
                 # Special case for Marlin.
                 shard_size, shard_offset = adjust_marlin_shard(
                     param, shard_size, shard_offset
@@ -911,7 +914,21 @@ def weight_loader_v2(
         self.validate_shard_id(loaded_shard_id)
         if loaded_shard_id is None or isinstance(loaded_shard_id, tuple):
             if isinstance(param, PerTensorScaleParameter):
-                param.load_merged_column_weight(loaded_weight=loaded_weight, shard_id=0)
+                if isinstance(loaded_shard_id, tuple):
+                    for idx in loaded_shard_id:
+                        param.load_merged_column_weight(
+                            loaded_weight=loaded_weight, shard_id=idx
+                        )
+                else:
+                    # When weights are already fused on disk (e.g. Phi-3's
+                    # gate_up_proj), there is only a single scale for the
+                    # entire fused matrix. Fill all slots with this scale
+                    # to ensure that any subsequent reduction (like .max())
+                    # works correctly while preserving the parameter shape.
+                    for idx in range(param.data.shape[0]):
+                        param.load_merged_column_weight(
+                            loaded_weight=loaded_weight, shard_id=idx
+                        )
                 return
             elif type(param) in (RowvLLMParameter, BasevLLMParameter):
                 param.load_merged_column_weight(loaded_weight=loaded_weight)
@@ -1101,7 +1118,12 @@ def _load_fused_module_from_checkpoint(
             # Special case for Quantization.
             # If quantized, we need to adjust the offset and size to account
             # for the packing.
-            if (
+            if isinstance(param, BlockQuantScaleParameter):
+                weight_block_size = getattr(self, "weight_block_size", None)
+                shard_size, shard_offset = adjust_block_scale_shard(
+                    weight_block_size, shard_size, shard_offset
+                )
+            elif (
                 isinstance(param, (PackedColumnParameter, PackedvLLMParameter))
                 and param.packed_dim == param.output_dim
             ):
@@ -1123,9 +1145,15 @@ def weight_loader_v2(
         self.validate_shard_id(loaded_shard_id)
         if loaded_shard_id is None:  # special case for certain models
             if isinstance(param, PerTensorScaleParameter):
-                param.load_qkv_weight(
-                    loaded_weight=loaded_weight, shard_id=0, tp_rank=self.tp_rank
-                )
+                # When weights are already fused on disk (e.g. Phi-3's
+                # qkv_proj), there is only a single scale for the entire
+                # fused matrix. Fill all slots (q, k, v) with this scale
+                # to ensure that any subsequent reduction (like .max())
+                # works correctly while preserving the parameter shape.
+                for idx in range(param.data.shape[0]):
+                    param.load_qkv_weight(
+                        loaded_weight=loaded_weight, shard_id=idx, tp_rank=self.tp_rank
+                    )
                 return
             elif type(param) in (RowvLLMParameter, BasevLLMParameter):
                 param.load_qkv_weight(loaded_weight=loaded_weight, tp_rank=self.tp_rank)
@@ -1233,8 +1261,8 @@ def weight_loader(
                     )
 
                 if packed_dim == output_dim:
-                    shard_size = shard_size // param.packed_factor
-                    shard_offset = shard_offset // param.packed_factor
+                    shard_size = round(shard_size // param.packed_factor)
+                    shard_offset = round(shard_offset // param.packed_factor)
 
                     # Special case for Marlin.
                     shard_size, shard_offset = adjust_marlin_shard(
@@ -1296,8 +1324,8 @@ def weight_loader(
             # for the packing.
             packed_dim = getattr(param, "packed_dim", None)
             if packed_dim == output_dim:
-                shard_size = shard_size // param.packed_factor
-                shard_offset = shard_offset // param.packed_factor
+                shard_size = round(shard_size // param.packed_factor)
+                shard_offset = round(shard_offset // param.packed_factor)
 
                 # Special case for Marlin.
                 shard_size, shard_offset = adjust_marlin_shard(
@@ -1421,6 +1449,7 @@ def __init__(
         super().__init__(
             input_size,
             output_size,
+            bias,
             skip_bias_add,
             params_dtype,
             quant_config,
@@ -1432,7 +1461,6 @@ def __init__(
         self.input_is_parallel = input_is_parallel
         self.reduce_results = reduce_results
 
-        assert self.quant_method is not None
         self.quant_method.create_weights(
             layer=self,
             input_size_per_partition=self.input_size_per_partition,
@@ -1522,7 +1550,6 @@ def forward(
             input_parallel = split_input[self.tp_rank].contiguous()
 
         # Matrix multiply.
-        assert self.quant_method is not None
         # Only fuse bias add into GEMM for rank 0 (this ensures that
         # bias will not get added more than once in TP>1 case)
         bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias
diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py
index dd2a61bc6a2c..3541b9706686 100644
--- a/vllm/model_executor/layers/logits_processor.py
+++ b/vllm/model_executor/layers/logits_processor.py
@@ -9,14 +9,14 @@
     tensor_model_parallel_all_gather,
     tensor_model_parallel_gather,
 )
-from vllm.model_executor.custom_op import CustomOp
+from vllm.model_executor.custom_op import PluggableLayer
 from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
 from vllm.platforms import current_platform
 
 
 # --8<-- [start:logits_processor]
-@CustomOp.register("logits_processor")
-class LogitsProcessor(CustomOp):
+@PluggableLayer.register("logits_processor")
+class LogitsProcessor(PluggableLayer):
     """Process logits and apply logits processors from sampling metadata.
 
     This layer does the following:
diff --git a/vllm/model_executor/layers/mamba/abstract.py b/vllm/model_executor/layers/mamba/abstract.py
index 3c6b0139424d..8bbb21d7bc90 100644
--- a/vllm/model_executor/layers/mamba/abstract.py
+++ b/vllm/model_executor/layers/mamba/abstract.py
@@ -8,6 +8,7 @@
 from vllm.config import VllmConfig
 from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.v1.attention.backend import AttentionBackend
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 from vllm.v1.attention.selector import get_mamba_attn_backend
 from vllm.v1.kv_cache_interface import KVCacheSpec, MambaSpec
 
@@ -33,7 +34,7 @@ def get_state_shape(self) -> Iterable[tuple[int, ...]]:
 
     @property
     @abstractmethod
-    def mamba_type(self) -> str:
+    def mamba_type(self) -> MambaAttentionBackendEnum:
         pass
 
     @abstractmethod
@@ -42,9 +43,10 @@ def get_state_dtype(self) -> tuple[torch.dtype, ...]:
 
     def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
         mamba_block_size = vllm_config.cache_config.mamba_block_size
+        assert mamba_block_size is not None
         page_size_padded = vllm_config.cache_config.mamba_page_size_padded
         return MambaSpec(
-            shapes=self.get_state_shape(),
+            shapes=tuple(self.get_state_shape()),
             dtypes=self.get_state_dtype(),
             block_size=mamba_block_size,
             page_size_padded=page_size_padded,
diff --git a/vllm/model_executor/layers/mamba/gdn/__init__.py b/vllm/model_executor/layers/mamba/gdn/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/model_executor/layers/mamba/gdn/base.py b/vllm/model_executor/layers/mamba/gdn/base.py
new file mode 100644
index 000000000000..95ced2e0a4d3
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/gdn/base.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from transformers import PretrainedConfig
+
+from vllm.config import (
+    VllmConfig,
+)
+from vllm.distributed import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.mamba.abstract import MambaBase
+from vllm.model_executor.layers.mamba.mamba_utils import (
+    MambaStateDtypeCalculator,
+)
+from vllm.model_executor.models.utils import extract_layer_index
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+
+
+class GatedDeltaNetAttention(PluggableLayer, MambaBase):
+    """Base class for GatedDeltaNet attention layer."""
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.prefix = prefix
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.layer_idx = extract_layer_index(prefix)
+        self.hidden_size = config.hidden_size
+        self.activation = config.hidden_act
+        self.layer_norm_epsilon = config.rms_norm_eps
+        self.model_config = vllm_config.model_config
+        self.cache_config = vllm_config.cache_config
+        self.quant_config = vllm_config.quant_config
+        self.speculative_config = vllm_config.speculative_config
+        self.num_spec = (
+            self.speculative_config.num_speculative_tokens
+            if self.speculative_config
+            else 0
+        )
+
+    @property
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.GDN_ATTN
+
+    def get_state_dtype(self) -> tuple[torch.dtype, ...]:
+        return MambaStateDtypeCalculator.gated_delta_net_state_dtype(
+            self.model_config.dtype,
+            self.cache_config.mamba_cache_dtype,
+            self.cache_config.mamba_ssm_cache_dtype,
+        )
diff --git a/vllm/model_executor/layers/kda.py b/vllm/model_executor/layers/mamba/gdn/kimi_gdn_linear_attn.py
similarity index 80%
rename from vllm/model_executor/layers/kda.py
rename to vllm/model_executor/layers/mamba/gdn/kimi_gdn_linear_attn.py
index 46db5dc321d8..a7f0707f50ab 100644
--- a/vllm/model_executor/layers/kda.py
+++ b/vllm/model_executor/layers/mamba/gdn/kimi_gdn_linear_attn.py
@@ -5,35 +5,37 @@
 from einops import rearrange
 from torch import nn
 
-from vllm.config import CacheConfig, ModelConfig, get_current_vllm_config
+from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.distributed import (
     divide,
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
 )
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.logger import init_logger
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.mamba.gdn.base import GatedDeltaNetAttention
 from vllm.model_executor.model_loader.weight_utils import sharded_weight_loader
 from vllm.model_executor.utils import set_weight_attrs
+from vllm.transformers_utils.configs.kimi_linear import KimiLinearConfig
 from vllm.utils.torch_utils import direct_register_custom_op
-from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
 
-from .fla.ops.kda import (
+from ...fla.ops.kda import (
     FusedRMSNormGated,
     chunk_kda,
     fused_kda_gate,
     fused_recurrent_kda,
 )
-from .linear import (
+from ...linear import (
     ColumnParallelLinear,
     ReplicatedLinear,
     RowParallelLinear,
 )
-from .mamba.abstract import MambaBase
-from .mamba.mamba_utils import MambaStateDtypeCalculator, MambaStateShapeCalculator
-from .mamba.ops.causal_conv1d import causal_conv1d_fn, causal_conv1d_update
-from .quantization.base_config import QuantizationConfig
+from ..mamba_utils import (
+    MambaStateDtypeCalculator,
+    MambaStateShapeCalculator,
+    is_conv_state_dim_first,
+)
+from ..ops.causal_conv1d import causal_conv1d_fn, causal_conv1d_update
 
 logger = init_logger(__name__)
 
@@ -79,11 +81,8 @@ def kda_attention_fake(
 )
 
 
-class KimiDeltaAttention(nn.Module, MambaBase):
-    @property
-    def mamba_type(self) -> str:
-        return "gdn_attention"
-
+@PluggableLayer.register("kimi_gated_delta_net_attention")
+class KimiGatedDeltaNetAttention(GatedDeltaNetAttention):
     def get_state_dtype(
         self,
     ) -> tuple[torch.dtype, torch.dtype, torch.dtype, torch.dtype]:
@@ -102,28 +101,16 @@ def get_state_shape(
 
     def __init__(
         self,
-        layer_idx: int,
-        hidden_size: int,
-        quant_config: QuantizationConfig | None = None,
-        cache_config: CacheConfig | None = None,
-        model_config: ModelConfig | None = None,
-        rms_norm_eps: float = 1e-5,
+        config: KimiLinearConfig,
+        vllm_config: VllmConfig,
         prefix: str = "",
-        **kwargs,
     ) -> None:
-        super().__init__()
-        self.tp_size = get_tensor_model_parallel_world_size()
-        self.tp_rank = get_tensor_model_parallel_rank()
-        self.hidden_size = hidden_size
-        self.model_config = model_config
-        self.cache_config = cache_config
-        if model_config is None:
-            raise ValueError("model_config must be provided")
-        kda_config = model_config.linear_attn_config
+        super().__init__(config, vllm_config, prefix)
+
+        kda_config = config.linear_attn_config  # type: ignore[attr-defined]
+        assert kda_config is not None, "linear_attn_config must be set"
         self.head_dim = kda_config["head_dim"]
         self.num_heads = kda_config["num_heads"]
-        self.layer_idx = layer_idx
-        self.prefix = prefix
         assert self.num_heads % self.tp_size == 0
         self.local_num_heads = divide(self.num_heads, self.tp_size)
 
@@ -134,21 +121,21 @@ def __init__(
             self.hidden_size,
             projection_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.q_proj",
         )
         self.k_proj = ColumnParallelLinear(
             self.hidden_size,
             projection_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.k_proj",
         )
         self.v_proj = ColumnParallelLinear(
             self.hidden_size,
             projection_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.v_proj",
         )
 
@@ -156,7 +143,7 @@ def __init__(
             self.hidden_size,
             self.head_dim,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.f_a_proj",
         )
 
@@ -164,7 +151,7 @@ def __init__(
             self.head_dim,
             projection_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.f_b_proj",
         )
         self.dt_bias = nn.Parameter(
@@ -177,7 +164,7 @@ def __init__(
             self.hidden_size,
             self.num_heads,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.b_proj",
         )
 
@@ -219,24 +206,22 @@ def __init__(
             self.hidden_size,
             self.head_dim,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.g_a_proj",
         )
         self.g_b_proj = ColumnParallelLinear(
             self.head_dim,
             projection_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.g_b_proj",
         )
-        self.o_norm = FusedRMSNormGated(
-            self.head_dim, eps=rms_norm_eps, activation="sigmoid"
-        )
+        self.o_norm = FusedRMSNormGated(self.head_dim, activation="sigmoid")
         self.o_proj = RowParallelLinear(
             projection_size,
             self.hidden_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=self.quant_config,
             prefix=f"{prefix}.o_proj",
         )
 
@@ -293,19 +278,21 @@ def _forward(
         core_attn_out: torch.Tensor,
     ) -> None:
         forward_context = get_forward_context()
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
+        attn_metadata_raw = forward_context.attn_metadata
 
-        if attn_metadata is None:
+        if attn_metadata_raw is None:
             #     # V1 profile run
             return
 
-        assert isinstance(attn_metadata, dict)
-        attn_metadata = attn_metadata[self.prefix]
-        assert isinstance(attn_metadata, GDNAttentionMetadata)
-        has_initial_state = attn_metadata.has_initial_state
-        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
-        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
-        num_actual_tokens = attn_metadata.num_actual_tokens
+        assert isinstance(attn_metadata_raw, dict)
+        attn_metadata_narrowed = attn_metadata_raw[self.prefix]
+        assert isinstance(attn_metadata_narrowed, GDNAttentionMetadata)
+        has_initial_state = attn_metadata_narrowed.has_initial_state
+        non_spec_query_start_loc = attn_metadata_narrowed.non_spec_query_start_loc
+        non_spec_state_indices_tensor = (
+            attn_metadata_narrowed.non_spec_state_indices_tensor
+        )  # noqa: E501
+        num_actual_tokens = attn_metadata_narrowed.num_actual_tokens
         constant_caches = self.kv_cache
 
         q_proj_states = q_proj_states[:num_actual_tokens]
@@ -315,10 +302,12 @@ def _forward(
         beta = beta[:num_actual_tokens]
 
         (conv_state_q, conv_state_k, conv_state_v, recurrent_state) = constant_caches
-        # deal with strides
-        conv_state_q = conv_state_q.transpose(-1, -2)
-        conv_state_k = conv_state_k.transpose(-1, -2)
-        conv_state_v = conv_state_v.transpose(-1, -2)
+        # conv_state must be (..., dim, width-1) for the conv kernels.
+        # DS layout stores it that way directly; SD layout needs a transpose.
+        if not is_conv_state_dim_first():
+            conv_state_q = conv_state_q.transpose(-1, -2)
+            conv_state_k = conv_state_k.transpose(-1, -2)
+            conv_state_v = conv_state_v.transpose(-1, -2)
 
         q_conv_weights = self.q_conv1d.weight.view(
             self.q_conv1d.weight.size(0), self.q_conv1d.weight.size(2)
@@ -329,7 +318,7 @@ def _forward(
         v_conv_weights = self.v_conv1d.weight.view(
             self.v_conv1d.weight.size(0), self.v_conv1d.weight.size(2)
         )
-        if attn_metadata.num_prefills > 0:
+        if attn_metadata_narrowed.num_prefills > 0:
             q_proj_states = q_proj_states.transpose(0, 1)
             k_proj_states = k_proj_states.transpose(0, 1)
             v_proj_states = v_proj_states.transpose(0, 1)
@@ -342,7 +331,7 @@ def _forward(
                 has_initial_state=has_initial_state,
                 cache_indices=non_spec_state_indices_tensor,
                 query_start_loc=non_spec_query_start_loc,
-                metadata=attn_metadata,
+                metadata=attn_metadata_narrowed,
             ).transpose(0, 1)
             k = causal_conv1d_fn(
                 k_proj_states,
@@ -353,7 +342,7 @@ def _forward(
                 has_initial_state=has_initial_state,
                 cache_indices=non_spec_state_indices_tensor,
                 query_start_loc=non_spec_query_start_loc,
-                metadata=attn_metadata,
+                metadata=attn_metadata_narrowed,
             ).transpose(0, 1)
             v = causal_conv1d_fn(
                 v_proj_states,
@@ -364,11 +353,12 @@ def _forward(
                 has_initial_state=has_initial_state,
                 cache_indices=non_spec_state_indices_tensor,
                 query_start_loc=non_spec_query_start_loc,
-                metadata=attn_metadata,
+                metadata=attn_metadata_narrowed,
             ).transpose(0, 1)
         else:
+            assert non_spec_state_indices_tensor is not None
             decode_conv_indices = non_spec_state_indices_tensor[
-                : attn_metadata.num_actual_tokens
+                : attn_metadata_narrowed.num_actual_tokens
             ]
             q = causal_conv1d_update(
                 q_proj_states,
@@ -402,7 +392,9 @@ def _forward(
             lambda x: rearrange(x, "n (h d) -> 1 n h d", d=self.head_dim), (q, k, v)
         )
 
-        if attn_metadata.num_prefills > 0:
+        if attn_metadata_narrowed.num_prefills > 0:
+            assert non_spec_state_indices_tensor is not None
+            assert has_initial_state is not None
             zero_idx = non_spec_state_indices_tensor[~has_initial_state]
             recurrent_state[zero_idx] = 0
             initial_state = recurrent_state[non_spec_state_indices_tensor].contiguous()
@@ -423,6 +415,7 @@ def _forward(
             # Init cache
             recurrent_state[non_spec_state_indices_tensor] = last_recurrent_state
         else:
+            assert non_spec_query_start_loc is not None
             (
                 core_attn_out_non_spec,
                 last_recurrent_state,
@@ -434,7 +427,9 @@ def _forward(
                 beta=beta,
                 initial_state=recurrent_state,
                 use_qk_l2norm_in_kernel=True,
-                cu_seqlens=non_spec_query_start_loc[: attn_metadata.num_decodes + 1],
+                cu_seqlens=non_spec_query_start_loc[
+                    : attn_metadata_narrowed.num_decodes + 1
+                ],
                 ssm_state_indices=non_spec_state_indices_tensor,
             )
         core_attn_out[0, :num_actual_tokens] = core_attn_out_non_spec[
diff --git a/vllm/model_executor/layers/mamba/gdn/olmo_gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn/olmo_gdn_linear_attn.py
new file mode 100644
index 000000000000..65da90eccb79
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/gdn/olmo_gdn_linear_attn.py
@@ -0,0 +1,634 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from einops import rearrange
+from torch import nn
+
+from vllm.config import (
+    VllmConfig,
+    get_current_vllm_config,
+)
+from vllm.distributed import (
+    divide,
+)
+from vllm.forward_context import ForwardContext, get_forward_context
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.fla.ops import (
+    chunk_gated_delta_rule,
+    fused_recurrent_gated_delta_rule,
+)
+from vllm.model_executor.layers.layernorm import RMSNormGated
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    MergedColumnParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.mamba.gdn.base import GatedDeltaNetAttention
+from vllm.model_executor.layers.mamba.mamba_utils import (
+    MambaStateShapeCalculator,
+    is_conv_state_dim_first,
+)
+from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
+    causal_conv1d_fn,
+    causal_conv1d_update,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    sharded_weight_loader,
+)
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+from vllm.triton_utils.allocation import set_triton_allocator
+from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
+
+
+@PluggableLayer.register("olmo_hybrid_gated_delta_net_attention")
+class OlmoHybridGatedDeltaNetAttention(GatedDeltaNetAttention):
+    """
+    Gated DeltaNet linear attention layer for OLMo Hybrid.
+
+    This implements the linear attention mechanism that replaces sliding window
+    attention in the hybrid architecture.
+    """
+
+    def get_state_shape(
+        self,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        return MambaStateShapeCalculator.gated_delta_net_state_shape(
+            self.tp_size,
+            self.num_k_heads,
+            self.num_v_heads,
+            self.head_k_dim,
+            self.head_v_dim,
+            self.conv_kernel_size,
+            self.num_spec,
+        )
+
+    def __init__(
+        self,
+        config,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ) -> None:
+        super().__init__(config, vllm_config, prefix=prefix)
+
+        assert getattr(config, "linear_use_gate", True), (
+            "OlmoHybridGatedDeltaNet requires linear_use_gate=True"
+        )
+        self.num_k_heads = config.linear_num_key_heads
+        self.num_v_heads = config.linear_num_value_heads
+        self.head_k_dim = config.linear_key_head_dim
+        self.head_v_dim = config.linear_value_head_dim
+        self.conv_kernel_size = config.linear_conv_kernel_dim
+        self.key_dim = self.head_k_dim * self.num_k_heads
+        self.value_dim = self.head_v_dim * self.num_v_heads
+        self.allow_neg_eigval = getattr(config, "linear_allow_neg_eigval", False)
+
+        # Fused QKVG projection: 1 matmul instead of 4
+        self.in_proj_qkvg = MergedColumnParallelLinear(
+            input_size=self.hidden_size,
+            output_sizes=[self.key_dim, self.key_dim, self.value_dim, self.value_dim],
+            bias=False,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.in_proj_qkvg",
+        )
+
+        # Separate B and A projections to preserve numerical precision.
+        # Fusing these into one matmul changes FP accumulation order for the
+        # gating scalars, which compounds through the GDN recurrent state.
+        self.b_proj = ColumnParallelLinear(
+            input_size=self.hidden_size,
+            output_size=self.num_v_heads,
+            bias=False,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.b_proj",
+        )
+        self.a_proj = ColumnParallelLinear(
+            input_size=self.hidden_size,
+            output_size=self.num_v_heads,
+            bias=False,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.a_proj",
+        )
+
+        # Fused conv1d: single parameter instead of 3
+        self.conv_dim = self.key_dim * 2 + self.value_dim
+        self.conv1d = ColumnParallelLinear(
+            input_size=self.conv_kernel_size,
+            output_size=self.conv_dim,
+            bias=False,
+            prefix=f"{prefix}.conv1d",
+        )
+        self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1)
+        delattr(self.conv1d.weight, "weight_loader")
+        set_weight_attrs(
+            self.conv1d.weight,
+            {
+                "weight_loader": _make_fused_conv1d_weight_loader(
+                    [self.key_dim, self.key_dim, self.value_dim],
+                    self.tp_size,
+                    self.tp_rank,
+                )
+            },
+        )
+
+        self.dt_bias = nn.Parameter(
+            torch.ones(self.num_v_heads // self.tp_size),
+        )
+        self.A_log = nn.Parameter(
+            torch.empty(
+                divide(self.num_v_heads, self.tp_size),
+            )
+        )
+
+        set_weight_attrs(self.A_log, {"weight_loader": sharded_weight_loader(0)})
+        set_weight_attrs(self.dt_bias, {"weight_loader": sharded_weight_loader(0)})
+
+        # use eps=1e-5 to match FLA's FusedRMSNormGated
+        self.o_norm = RMSNormGated(
+            self.head_v_dim,
+            eps=1e-5,
+            group_size=None,
+            norm_before_gate=True,
+            device=current_platform.current_device(),
+            dtype=config.torch_dtype if hasattr(config, "torch_dtype") else None,
+        )
+
+        self.o_proj = RowParallelLinear(
+            self.value_dim,
+            self.hidden_size,
+            bias=False,
+            input_is_parallel=True,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        # FLA triton kernels need a PyTorch-backed allocator for scratch
+        # memory (required by triton >= 3.x autotuner). Set once at init.
+        set_triton_allocator(current_platform.current_device())
+
+        compilation_config = get_current_vllm_config().compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+    def rearrange_mixed_qkv(self, mixed_qkv):
+        if mixed_qkv is None:
+            return None, None, None
+        query, key, value = torch.split(
+            mixed_qkv,
+            [
+                self.key_dim // self.tp_size,
+                self.key_dim // self.tp_size,
+                self.value_dim // self.tp_size,
+            ],
+            dim=-1,
+        )
+
+        num_k_heads = self.num_k_heads // self.tp_size
+        num_v_heads = self.num_v_heads // self.tp_size
+
+        query = rearrange(query, "l (h d) -> 1 l h d", h=num_k_heads, d=self.head_k_dim)
+        key = rearrange(key, "l (h d) -> 1 l h d", h=num_k_heads, d=self.head_k_dim)
+        value = rearrange(value, "l (h d) -> 1 l h d", h=num_v_heads, d=self.head_v_dim)
+
+        # GQA expansion if needed
+        if num_v_heads > num_k_heads:
+            expand_ratio = num_v_heads // num_k_heads
+            query = query.unsqueeze(3).expand(-1, -1, -1, expand_ratio, -1)
+            query = query.reshape(1, query.shape[1], num_v_heads, self.head_k_dim)
+            key = key.unsqueeze(3).expand(-1, -1, -1, expand_ratio, -1)
+            key = key.reshape(1, key.shape[1], num_v_heads, self.head_k_dim)
+
+        return query.contiguous(), key.contiguous(), value.contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        # NOTE: We wrap the ENTIRE linear attention forward (projections +
+        # core recurrence + output norm + output projection) in a single
+        # custom op, rather than just wrapping the recurrent core like
+        # other GDN models (e.g. Qwen3Next) do.
+        #
+        # Why: torch.compile with inductor generates fused kernels for
+        # matmuls and pointwise ops. These fused kernels can differ in
+        # floating-point accumulation order from eager-mode cuBLAS,
+        # introducing small numerical differences (~1e-7 per op). For
+        # standard transformer attention this is harmless because each
+        # position is computed independently. But for the GDN recurrent
+        # state, these tiny input differences compound at every timestep
+        # across the full sequence length, causing severe logprob
+        # divergence (e.g. ~15% top-1 agreement with eager baseline).
+        #
+        # By making the full forward opaque to inductor, the projections
+        # and output norm run with eager-mode kernels (cuBLAS, triton),
+        # preserving numerical consistency. The tradeoff is reduced
+        # compilation speedup (~1.5x vs ~3x), but logprob agreement
+        # improves from ~15% to ~83% top-1 vs eager.
+        #
+        # The remaining ~17% divergence comes from inductor compiling
+        # the MLP and transformer attention layers that are NOT wrapped
+        # in custom ops -- their small precision differences propagate
+        # as inputs to the GDN layers from outside.
+        torch.ops.vllm.olmo_hybrid_gdn_full_forward(
+            hidden_states,
+            output,
+            self.prefix,
+        )
+
+    def _full_forward(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        num_tokens = hidden_states.size(0)
+
+        # ============================================================
+        # Part 1: Input Projection (2 fused matmuls instead of 6)
+        # ============================================================
+        projected_qkvg, _ = self.in_proj_qkvg(hidden_states)
+        conv_dim_sharded = (self.key_dim * 2 + self.value_dim) // self.tp_size
+        mixed_qkv = projected_qkvg[..., :conv_dim_sharded]
+        gate = projected_qkvg[..., conv_dim_sharded:]
+
+        b, _ = self.b_proj(hidden_states)
+        a, _ = self.a_proj(hidden_states)
+
+        # ============================================================
+        # Part 2: Core Attention
+        # ============================================================
+        core_attn_out = torch.zeros(
+            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+
+        self._forward_core(
+            mixed_qkv=mixed_qkv,
+            b=b,
+            a=a,
+            core_attn_out=core_attn_out,
+        )
+
+        # ============================================================
+        # Part 3: Output Projection
+        # ============================================================
+        gate = gate.view(num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim)
+        core_attn_out_flat = core_attn_out.reshape(-1, core_attn_out.shape[-1])
+        gate_flat = gate.reshape(-1, gate.shape[-1])
+        core_attn_out_normed = self.o_norm(core_attn_out_flat, gate_flat)
+        core_attn_out = core_attn_out_normed.view(
+            num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim
+        )
+
+        core_attn_out = rearrange(core_attn_out, "l h d -> l (h d)")
+        output[:num_tokens], _ = self.o_proj(core_attn_out)
+
+    def _forward_core(
+        self,
+        mixed_qkv: torch.Tensor,
+        b: torch.Tensor,
+        a: torch.Tensor,
+        core_attn_out: torch.Tensor,
+    ):
+        """
+        Core attention computation (called by custom op).
+        """
+        forward_context = get_forward_context()
+        attn_metadata = forward_context.attn_metadata
+
+        if attn_metadata is None:
+            # V1 profile run
+            return
+
+        assert isinstance(attn_metadata, dict)
+        attn_metadata = attn_metadata[self.prefix]  # type: ignore[assignment]
+        assert isinstance(attn_metadata, GDNAttentionMetadata)
+        has_initial_state = attn_metadata.has_initial_state
+        spec_query_start_loc = attn_metadata.spec_query_start_loc
+        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
+        spec_sequence_masks = attn_metadata.spec_sequence_masks
+        spec_token_indx = attn_metadata.spec_token_indx
+        non_spec_token_indx = attn_metadata.non_spec_token_indx
+        spec_state_indices_tensor = attn_metadata.spec_state_indices_tensor
+        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor
+        self_kv_cache = self.kv_cache
+        # conv_state must be (..., dim, width-1) for the conv kernels.
+        # DS layout stores it that way directly; SD layout needs a transpose.
+        conv_state = (
+            self_kv_cache[0]
+            if is_conv_state_dim_first()
+            else self_kv_cache[0].transpose(-1, -2)
+        )
+        ssm_state = self_kv_cache[1]
+        num_actual_tokens = attn_metadata.num_actual_tokens
+        num_accepted_tokens = attn_metadata.num_accepted_tokens
+
+        mixed_qkv = mixed_qkv[:num_actual_tokens]
+        b = b[:num_actual_tokens]
+        a = a[:num_actual_tokens]
+
+        conv_weights = self.conv1d.weight.view(
+            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+        )
+
+        if spec_sequence_masks is not None:
+            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
+                mixed_qkv_spec = mixed_qkv
+                mixed_qkv_non_spec = None
+            else:
+                mixed_qkv_spec = mixed_qkv.index_select(0, spec_token_indx)
+                mixed_qkv_non_spec = mixed_qkv.index_select(0, non_spec_token_indx)
+        else:
+            mixed_qkv_spec = None
+            mixed_qkv_non_spec = mixed_qkv
+
+        if spec_sequence_masks is not None:
+            assert spec_query_start_loc is not None
+            assert spec_state_indices_tensor is not None
+            assert num_accepted_tokens is not None
+            mixed_qkv_spec = causal_conv1d_update(
+                mixed_qkv_spec,
+                conv_state,
+                conv_weights,
+                None,  # no bias
+                self.activation,
+                conv_state_indices=spec_state_indices_tensor[:, 0][
+                    : attn_metadata.num_spec_decodes
+                ],
+                num_accepted_tokens=num_accepted_tokens,
+                query_start_loc=spec_query_start_loc,
+                max_query_len=spec_state_indices_tensor.size(-1),
+                validate_data=False,
+            )
+
+        if attn_metadata.num_prefills > 0:
+            assert mixed_qkv_non_spec is not None
+            mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1)
+            mixed_qkv_non_spec = causal_conv1d_fn(
+                mixed_qkv_non_spec_T,
+                conv_weights,
+                None,
+                activation=self.activation,
+                conv_states=conv_state,
+                has_initial_state=has_initial_state,
+                cache_indices=non_spec_state_indices_tensor,
+                query_start_loc=non_spec_query_start_loc,
+                metadata=attn_metadata,
+            ).transpose(0, 1)
+        elif attn_metadata.num_decodes > 0:
+            assert non_spec_state_indices_tensor is not None
+            mixed_qkv_non_spec = causal_conv1d_update(
+                mixed_qkv_non_spec,
+                conv_state,
+                conv_weights,
+                None,
+                self.activation,
+                conv_state_indices=non_spec_state_indices_tensor[
+                    : attn_metadata.num_decodes
+                ],
+                validate_data=True,
+            )
+        else:
+            mixed_qkv_non_spec = None
+
+        query_spec, key_spec, value_spec = self.rearrange_mixed_qkv(mixed_qkv_spec)
+        query_non_spec, key_non_spec, value_non_spec = self.rearrange_mixed_qkv(
+            mixed_qkv_non_spec
+        )
+
+        g, beta = fused_olmo_hybrid_gdn_gating(
+            self.A_log, a, b, self.dt_bias, self.allow_neg_eigval
+        )
+
+        if spec_sequence_masks is not None:
+            assert spec_token_indx is not None
+            assert non_spec_token_indx is not None
+            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
+                g_spec = g
+                beta_spec = beta
+                g_non_spec = None
+                beta_non_spec = None
+            else:
+                g_spec = g.index_select(1, spec_token_indx)
+                beta_spec = beta.index_select(1, spec_token_indx)
+                g_non_spec = g.index_select(1, non_spec_token_indx)
+                beta_non_spec = beta.index_select(1, non_spec_token_indx)
+        else:
+            g_spec = None
+            beta_spec = None
+            g_non_spec = g
+            beta_non_spec = beta
+
+        if spec_sequence_masks is not None:
+            assert spec_query_start_loc is not None
+            assert spec_state_indices_tensor is not None
+            assert num_accepted_tokens is not None
+            core_attn_out_spec, last_recurrent_state = fused_recurrent_gated_delta_rule(
+                q=query_spec,
+                k=key_spec,
+                v=value_spec,
+                g=g_spec,
+                beta=beta_spec,
+                initial_state=ssm_state,
+                inplace_final_state=True,
+                cu_seqlens=spec_query_start_loc[: attn_metadata.num_spec_decodes + 1],
+                ssm_state_indices=spec_state_indices_tensor,
+                num_accepted_tokens=num_accepted_tokens,
+                use_qk_l2norm_in_kernel=True,
+            )
+        else:
+            core_attn_out_spec, last_recurrent_state = None, None
+
+        if attn_metadata.num_prefills > 0:
+            assert non_spec_state_indices_tensor is not None
+            assert has_initial_state is not None
+            assert non_spec_query_start_loc is not None
+            initial_state = ssm_state[non_spec_state_indices_tensor].contiguous()
+            initial_state[~has_initial_state, ...] = 0
+            (
+                core_attn_out_non_spec,
+                last_recurrent_state,
+            ) = chunk_gated_delta_rule(
+                q=query_non_spec,
+                k=key_non_spec,
+                v=value_non_spec,
+                g=g_non_spec,
+                beta=beta_non_spec,
+                initial_state=initial_state,
+                output_final_state=True,
+                cu_seqlens=non_spec_query_start_loc,
+                use_qk_l2norm_in_kernel=True,
+            )
+            ssm_state[non_spec_state_indices_tensor] = last_recurrent_state.to(
+                ssm_state.dtype
+            )
+        elif attn_metadata.num_decodes > 0:
+            assert non_spec_query_start_loc is not None
+            assert non_spec_state_indices_tensor is not None
+            core_attn_out_non_spec, last_recurrent_state = (
+                fused_recurrent_gated_delta_rule(
+                    q=query_non_spec,
+                    k=key_non_spec,
+                    v=value_non_spec,
+                    g=g_non_spec,
+                    beta=beta_non_spec,
+                    initial_state=ssm_state,
+                    inplace_final_state=True,
+                    cu_seqlens=non_spec_query_start_loc[
+                        : attn_metadata.num_decodes + 1
+                    ],
+                    ssm_state_indices=non_spec_state_indices_tensor,
+                    use_qk_l2norm_in_kernel=True,
+                )
+            )
+        else:
+            core_attn_out_non_spec, last_recurrent_state = None, None
+
+        if spec_sequence_masks is not None and core_attn_out_non_spec is not None:
+            merged_out = torch.empty(
+                (1, num_actual_tokens, *core_attn_out_spec.shape[2:]),
+                dtype=core_attn_out_non_spec.dtype,
+                device=core_attn_out_non_spec.device,
+            )
+            merged_out.index_copy_(1, spec_token_indx, core_attn_out_spec)
+            merged_out.index_copy_(1, non_spec_token_indx, core_attn_out_non_spec)
+            core_attn_out[:num_actual_tokens] = merged_out.squeeze(0)
+        elif spec_sequence_masks is not None:
+            core_attn_out[:num_actual_tokens] = core_attn_out_spec.squeeze(0)
+        else:
+            core_attn_out[:num_actual_tokens] = core_attn_out_non_spec.squeeze(0)
+
+
+def _make_fused_conv1d_weight_loader(dims, tp_size, tp_rank):
+    """Weight loader for loading separate HF conv weights into a fused conv1d.
+
+    dims: list of original (un-sharded) dims per section,
+          e.g. [key_dim, key_dim, value_dim]
+    """
+    sharded_dims = [d // tp_size for d in dims]
+
+    def weight_loader(param, loaded_weight, loaded_shard_id=None):
+        if loaded_weight.dim() == 2:
+            loaded_weight = loaded_weight.unsqueeze(1)
+        dim = dims[loaded_shard_id]
+        shard_size = dim // tp_size
+        tp_start = tp_rank * shard_size
+        sharded_weight = loaded_weight[tp_start : tp_start + shard_size]
+        offset = sum(sharded_dims[:loaded_shard_id])
+        param.data[offset : offset + shard_size].copy_(sharded_weight)
+
+    return weight_loader
+
+
+def olmo_hybrid_gdn_full_forward(
+    hidden_states: torch.Tensor,
+    output: torch.Tensor,
+    layer_name: str,
+) -> None:
+    """Full linear attention forward wrapped as a custom op.
+
+    Prevents inductor from compiling the projections around the GDN core,
+    which would introduce numerical divergence that compounds through
+    the recurrent state.
+    """
+    forward_context: ForwardContext = get_forward_context()
+    self = forward_context.no_compile_layers[layer_name]
+    self._full_forward(
+        hidden_states=hidden_states,
+        output=output,
+    )
+
+
+def olmo_hybrid_gdn_full_forward_fake(
+    hidden_states: torch.Tensor,
+    output: torch.Tensor,
+    layer_name: str,
+) -> None:
+    """Fake implementation for torch.compile."""
+    return
+
+
+direct_register_custom_op(
+    op_name="olmo_hybrid_gdn_full_forward",
+    op_func=olmo_hybrid_gdn_full_forward,
+    mutates_args=["output"],
+    fake_impl=olmo_hybrid_gdn_full_forward_fake,
+)
+
+
+@triton.jit
+def fused_olmo_hybrid_gdn_gating_kernel(
+    g,
+    beta_output,
+    A_log,
+    a,
+    b,
+    dt_bias,
+    seq_len,
+    allow_neg_eigval: tl.constexpr,
+    NUM_HEADS: tl.constexpr,
+    beta: tl.constexpr,
+    threshold: tl.constexpr,
+    BLK_HEADS: tl.constexpr,
+):
+    i_b, i_s, i_d = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+    head_off = i_d * BLK_HEADS + tl.arange(0, BLK_HEADS)
+    off = i_b * seq_len * NUM_HEADS + i_s * NUM_HEADS + head_off
+    mask = head_off < NUM_HEADS
+    blk_A_log = tl.load(A_log + head_off, mask=mask)
+    blk_a = tl.load(a + off, mask=mask)
+    blk_b = tl.load(b + off, mask=mask)
+    blk_bias = tl.load(dt_bias + head_off, mask=mask)
+
+    # g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias)
+    x = blk_a.to(tl.float32) + blk_bias.to(tl.float32)
+    softplus_x = tl.where(
+        beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x
+    )
+    blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x
+    tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask)
+
+    # beta = self.b_proj(hidden_states).sigmoid()
+    # if self.allow_neg_eigval: beta = beta * 2.0
+    blk_beta_output = tl.sigmoid(blk_b.to(tl.float32))
+    if allow_neg_eigval:
+        blk_beta_output = blk_beta_output * 2.0
+    tl.store(
+        beta_output + off, blk_beta_output.to(beta_output.dtype.element_ty), mask=mask
+    )
+
+
+def fused_olmo_hybrid_gdn_gating(
+    A_log: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    dt_bias: torch.Tensor,
+    allow_neg_eigval: bool = False,
+    beta: float = 1.0,
+    threshold: float = 20.0,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    batch, num_heads = a.shape
+    seq_len = 1
+    grid = (batch, seq_len, triton.cdiv(num_heads, 8))
+    g = torch.empty(1, batch, num_heads, dtype=torch.float32, device=a.device)
+    beta_output = torch.empty(1, batch, num_heads, dtype=torch.float32, device=b.device)
+    fused_olmo_hybrid_gdn_gating_kernel[grid](
+        g,
+        beta_output,
+        A_log,
+        a,
+        b,
+        dt_bias,
+        seq_len,
+        allow_neg_eigval,
+        num_heads,
+        beta,
+        threshold,
+        8,
+        num_warps=1,
+    )
+    return g, beta_output
diff --git a/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py
new file mode 100644
index 000000000000..0b15dd86f452
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/gdn/qwen_gdn_linear_attn.py
@@ -0,0 +1,1673 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Qwen3-Next/Qwen3.5 model."""
+
+import functools
+
+import torch
+from einops import rearrange
+from torch import nn
+
+from vllm import envs
+from vllm._aiter_ops import rocm_aiter_ops
+from vllm.config import (
+    VllmConfig,
+    get_current_vllm_config,
+)
+from vllm.distributed import (
+    divide,
+)
+from vllm.forward_context import ForwardContext, get_forward_context
+from vllm.logger import init_logger
+from vllm.model_executor.custom_op import CustomOp, PluggableLayer
+from vllm.model_executor.layers.fla.ops import (
+    chunk_gated_delta_rule as fla_chunk_gated_delta_rule,
+)
+from vllm.model_executor.layers.fla.ops import (
+    fused_post_conv_prep,
+    fused_recurrent_gated_delta_rule_packed_decode,
+    fused_sigmoid_gating_delta_rule_update,
+)
+from vllm.model_executor.layers.fla.ops.chunk import l2norm_fwd
+from vllm.model_executor.layers.fla.ops.utils import FLA_CHUNK_SIZE
+from vllm.model_executor.layers.layernorm import RMSNormGated
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    MergedColumnParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.mamba.gdn.base import GatedDeltaNetAttention
+from vllm.model_executor.layers.mamba.mamba_mixer2 import mamba_v2_sharded_weight_loader
+from vllm.model_executor.layers.mamba.mamba_utils import (
+    MambaStateShapeCalculator,
+    is_conv_state_dim_first,
+)
+from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
+    causal_conv1d_fn,
+    causal_conv1d_update,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQConfig
+from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig
+from vllm.model_executor.layers.quantization.inc import INCConfig
+from vllm.model_executor.model_loader.weight_utils import (
+    sharded_weight_loader,
+)
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.platforms import current_platform
+from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig
+from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
+from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
+
+# Optional ROCm AITER Triton kernels for the GDN decode fast-path.
+# Availability is checked centrally via rocm_aiter_ops; the actual function
+# references are imported here so that they can be called without per-call
+# import overhead.
+GDN_AITER_TRITON_AVAILABLE = rocm_aiter_ops.are_gdn_triton_kernels_available()
+
+if GDN_AITER_TRITON_AVAILABLE:
+    from aiter.ops.triton.causal_conv1d_update_single_token import (
+        fused_reshape_causal_conv1d_update_single_token as gdn_aiter_fused_reshape_causal_conv1d_update_single_token,  # noqa: E501
+    )
+    from aiter.ops.triton.gated_delta_net.fused_rearrange_sigmoid_gdr import (
+        fused_rearrange_sigmoid_gated_delta_rule as gdn_aiter_fused_rearrange_sigmoid_gated_delta_rule,  # noqa: E501
+    )
+
+logger = init_logger(__name__)
+
+
+# TODO(arpera): remove ``_is_libs_cu13_install_intact`` and its caller in
+# ``_should_use_flashinfer_gdn_prefill`` once the upstream packaging bug is
+# fixed and the broken wheels are yanked / superseded on PyPI:
+#   https://github.com/NVIDIA/cutlass/issues/3170
+#   https://github.com/NVIDIA/cutlass/issues/3259
+@functools.cache
+def _is_libs_cu13_install_intact() -> bool:
+    """Return True if every file installed by ``nvidia-cutlass-dsl-libs-cu13``
+    matches the SHA-256 declared in its wheel ``RECORD``.
+
+    ``nvidia-cutlass-dsl-libs-base`` and ``nvidia-cutlass-dsl-libs-cu13``
+    both ship into the shared ``nvidia_cutlass_dsl/`` namespace and
+    write many of the same on-disk paths (the runtime ``.so``, the MLIR
+    Python bindings, cuTe-DSL Python sources, ...) with different
+    content. Whichever wheel extracts last wins; with a parallel
+    installer (e.g. ``uv``) the order is racy and the resulting venv
+    can end up with a mix of files from both variants. The
+    ``-libs-base`` variant fails MLIR legalization when JIT-compiling
+    the FlashInfer Blackwell GDN prefill kernel, and any other
+    cuTe-DSL-based kernel can break too if on-disk files diverge from
+    what ``-libs-cu13``'s wheel expects. Tracked upstream at:
+
+      * https://github.com/NVIDIA/cutlass/issues/3170
+      * https://github.com/NVIDIA/cutlass/issues/3259
+
+    This helper re-hashes every file the ``-libs-cu13`` wheel claims to
+    own and compares against its declared SHA-256. Returns False on any
+    error (uninstalled, missing RECORD, missing file, hash mismatch).
+    Result is cached per-process.
+    """
+    import hashlib
+    import importlib.metadata
+
+    import pybase64 as base64
+
+    try:
+        dist = importlib.metadata.distribution("nvidia-cutlass-dsl-libs-cu13")
+    except importlib.metadata.PackageNotFoundError:
+        return False
+
+    files = dist.files
+    if not files:
+        return False
+
+    for pkg_path in files:
+        file_hash = pkg_path.hash
+        # Skip RECORD rows without a hash (RECORD itself, generated
+        # ``.pyc`` files, ...) and any non-SHA-256 hash modes.
+        if file_hash is None or not file_hash.value:
+            continue
+        if file_hash.mode != "sha256":
+            continue
+        try:
+            with open(pkg_path.locate(), "rb") as f:
+                digest = hashlib.sha256(f.read()).digest()
+        except OSError:
+            return False
+        actual = base64.urlsafe_b64encode(digest).decode().rstrip("=")
+        if actual != file_hash.value:
+            return False
+
+    return True
+
+
+def _should_use_flashinfer_gdn_prefill(backend: str, head_k_dim: int | None) -> bool:
+    """Whether to use FlashInfer's GDN prefill kernel instead of the
+    Triton/FLA fallback.
+
+    Requirements:
+    * ``requested in ["flashinfer", "auto"]``;
+    * ``platform == cuda``;
+    * one of the following:
+      - Hopper (SM90) — no further constraints;
+      - Blackwell (SM10.x) with ``head_k_dim == 128``, ``cuda_runtime >= 13``,
+        and an intact ``nvidia-cutlass-dsl-libs-cu13`` install on disk
+        (see :func:`_is_libs_cu13_install_intact`).
+    """
+    if backend not in ["flashinfer", "auto"]:
+        return False
+    if not current_platform.is_cuda():
+        return False
+    if current_platform.is_device_capability(90):
+        return True  # Hopper — no further constraints.
+    if not current_platform.is_device_capability_family(100):
+        return False  # Neither Hopper nor Blackwell.
+    if head_k_dim != 128:
+        return False
+    if current_platform.get_cuda_runtime_major() < 13:
+        return False
+    if not _is_libs_cu13_install_intact():
+        logger.warning_once(
+            "FlashInfer Blackwell GDN requires an intact nvidia-cutlass-dsl"
+            "-libs-cu13 install, but some on-disk files do not match the "
+            "SHA-256 declared in its RECORD (install-order race in "
+            "nvidia-cutlass-dsl packaging — see "
+            "https://github.com/NVIDIA/cutlass/issues/3170 and "
+            "https://github.com/NVIDIA/cutlass/issues/3259). Falling back "
+            "to Triton/FLA. Repair with: pip install --force-reinstall "
+            "--no-deps nvidia-cutlass-dsl-libs-cu13"
+        )
+        return False
+    return True
+
+
+def _log_gdn_backend_decision(
+    backend: str, head_k_dim: int | None, use_flashinfer: bool
+) -> None:
+    """Log the GDN prefill backend choice in the attention-selector style."""
+    chosen = "FlashInfer" if use_flashinfer else "Triton/FLA"
+    logger.info_once(
+        "Using %s GDN prefill kernel (requested=%s, head_k_dim=%s).",
+        chosen,
+        backend,
+        head_k_dim,
+    )
+    # JIT-compiled cutlass path is only used on SM90 (Hopper).
+    if use_flashinfer and current_platform.is_device_capability(90):
+        logger.warning_once(
+            "FlashInfer GDN prefill is JIT-compiled; first run may take a "
+            "while. Set --gdn-prefill-backend triton to skip JIT.",
+        )
+
+
+def fi_chunk_gated_delta_rule(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    g: torch.Tensor,
+    beta: torch.Tensor,
+    initial_state: torch.Tensor,
+    output_final_state: bool,
+    cu_seqlens: torch.Tensor | None = None,
+    use_qk_l2norm_in_kernel: bool = True,
+):
+    from flashinfer.gdn_prefill import (
+        chunk_gated_delta_rule as chunk_gated_delta_rule_fi,
+    )
+
+    if use_qk_l2norm_in_kernel:
+        q = l2norm_fwd(q)
+        k = l2norm_fwd(k)
+
+    # use flashinfer implementation
+    q = q.squeeze(0).contiguous()
+    k = k.squeeze(0).contiguous()
+    v = v.squeeze(0).contiguous()
+
+    g = g.squeeze(0).contiguous()
+    beta = beta.squeeze(0).contiguous()
+    fi_state = initial_state.to(torch.float32)
+    fi_g = g.to(torch.float32)
+    fi_beta = beta.to(torch.float32)
+    result = chunk_gated_delta_rule_fi(
+        q=q,
+        k=k,
+        v=v,
+        g=torch.exp(fi_g),
+        beta=fi_beta,
+        initial_state=fi_state,
+        output_final_state=output_final_state,
+        cu_seqlens=cu_seqlens,
+    )
+    # FlashInfer returns (output, state) when output_final_state=True,
+    # or just output when output_final_state=False.
+    # Unsqueeze back to 4D (1, L, H, D) to match fla output format
+    if output_final_state:
+        output, final_state = result
+        return output.unsqueeze(0), final_state
+    else:
+        return result.unsqueeze(0), None
+
+
+@CustomOp.register("chunk_gated_delta_rule")
+class ChunkGatedDeltaRule(CustomOp):
+    def __init__(self, head_k_dim: int | None = None) -> None:
+        super().__init__()
+        additional_config = get_current_vllm_config().additional_config
+        assert isinstance(additional_config, dict)
+        backend_cfg = additional_config.get("gdn_prefill_backend", "auto")
+        backend = str(backend_cfg).strip().lower()
+
+        use_flashinfer = _should_use_flashinfer_gdn_prefill(backend, head_k_dim)
+        if backend == "flashinfer" and not use_flashinfer:
+            logger.warning_once(
+                "GDN prefill backend 'flashinfer' is selected but "
+                "cannot use this kernel on the current platform. "
+                "Falling back to Triton/FLA."
+            )
+        _log_gdn_backend_decision(backend, head_k_dim, use_flashinfer)
+
+        self._forward_method = (
+            self.forward_cuda if use_flashinfer else self.forward_native
+        )
+
+    def forward_cuda(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        g: torch.Tensor,
+        beta: torch.Tensor,
+        initial_state: torch.Tensor,
+        output_final_state: bool,
+        cu_seqlens: torch.Tensor | None = None,
+        chunk_indices: torch.Tensor | None = None,
+        chunk_offsets: torch.Tensor | None = None,
+        use_qk_l2norm_in_kernel: bool = True,
+        core_attn_out: torch.Tensor | None = None,
+    ):
+        o, final_state = fi_chunk_gated_delta_rule(
+            q=q,
+            k=k,
+            v=v,
+            g=g,
+            beta=beta,
+            initial_state=initial_state,
+            output_final_state=output_final_state,
+            cu_seqlens=cu_seqlens,
+            use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
+        )
+        if core_attn_out is not None:
+            o_flat = o.squeeze(0).reshape(-1)
+            co_flat = core_attn_out.reshape(-1)
+            co_flat[: o_flat.numel()].copy_(o_flat)
+        return o, final_state
+
+    def forward_native(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        g: torch.Tensor,
+        beta: torch.Tensor,
+        initial_state: torch.Tensor,
+        output_final_state: bool,
+        cu_seqlens: torch.Tensor | None = None,
+        chunk_indices: torch.Tensor | None = None,
+        chunk_offsets: torch.Tensor | None = None,
+        use_qk_l2norm_in_kernel: bool = True,
+        core_attn_out: torch.Tensor | None = None,
+    ):
+        return fla_chunk_gated_delta_rule(
+            q=q,
+            k=k,
+            v=v,
+            g=g,
+            beta=beta,
+            initial_state=initial_state,
+            output_final_state=output_final_state,
+            cu_seqlens=cu_seqlens,
+            chunk_indices=chunk_indices,
+            chunk_offsets=chunk_offsets,
+            use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
+            core_attn_out=core_attn_out,
+        )
+
+
+@PluggableLayer.register("qwen_gated_delta_net_attention")
+class QwenGatedDeltaNetAttention(GatedDeltaNetAttention):
+    def get_state_shape(
+        self,
+    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], tuple[int, ...]]:
+        return MambaStateShapeCalculator.gated_delta_net_state_shape(
+            self.tp_size,
+            self.num_k_heads,
+            self.num_v_heads,
+            self.head_k_dim,
+            self.head_v_dim,
+            self.conv_kernel_size,
+            self.num_spec,
+        )
+
+    def __init__(
+        self,
+        config: Qwen3NextConfig,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        gqa_interleaved_layout=False,
+    ) -> None:
+        super().__init__(config, vllm_config, prefix)
+
+        self.num_k_heads = config.linear_num_key_heads
+        self.num_v_heads = config.linear_num_value_heads
+        self.head_k_dim = config.linear_key_head_dim
+        self.head_v_dim = config.linear_value_head_dim
+        self.conv_kernel_size = config.linear_conv_kernel_dim
+        self.key_dim = self.head_k_dim * self.num_k_heads
+        self.value_dim = self.head_v_dim * self.num_v_heads
+        self.gqa_interleaved_layout = gqa_interleaved_layout
+        if current_platform.is_xpu():
+            self._forward_method = self.forward_xpu
+        elif current_platform.is_cpu():
+            from vllm.model_executor.layers.mamba.ops.cpu.gdn_attention import (
+                register_cpu_gdn_attention_ops,
+            )
+
+            register_cpu_gdn_attention_ops()
+            self._forward_method = self.forward_cpu
+        elif current_platform.is_rocm():
+            self._forward_method = self.forward_hip
+        else:
+            self._forward_method = self.forward_cuda
+
+        # QKV
+        self.conv_dim = self.key_dim * 2 + self.value_dim
+        self.conv1d = ColumnParallelLinear(
+            input_size=self.conv_kernel_size,
+            output_size=self.conv_dim,
+            bias=False,
+            prefix=f"{prefix}.conv1d",
+        )
+        self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1)
+
+        # projection of the input hidden states
+        # Qwen3-Next and Qwen3.5 has a different qkv_proj layout,
+        # we need to create qkvz_proj adaptively here.
+        # When create_in_proj_qkvz is False (e.g. LoRA enabled in Qwen3.5),
+        # in_proj_qkv and in_proj_z are created separately instead.
+        self.in_proj_qkvz = self.create_qkvz_proj(
+            hidden_size=self.hidden_size,
+            key_dim=self.key_dim,
+            value_dim=self.value_dim,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.in_proj_qkvz",
+        )
+
+        # ba_proj doesn't support blockwise fp8 quantization.
+        # Qwen3-Next and Qwen3.5 have different in_proj_ba checkpoint
+        # layouts, so we use a factory method to create the projection.
+        self.in_proj_ba = self.create_ba_proj(
+            hidden_size=self.hidden_size,
+            num_v_heads=self.num_v_heads,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.in_proj_ba",
+        )
+        self.disable_tp_for_ba_proj = self.maybe_disable_tp(self.quant_config)
+
+        query_key_settings = (self.key_dim, 0, False)
+        value_settings = (self.value_dim, 0, False)
+
+        self.conv1d.weight.weight_loader = mamba_v2_sharded_weight_loader(
+            [
+                query_key_settings,
+                query_key_settings,
+                value_settings,
+            ],
+            self.tp_size,
+            self.tp_rank,
+        )
+
+        # selective projection used to make dt, B and C input dependent
+
+        # time step projection (discretization)
+        # instantiate once and copy inv_dt in init_weights of PretrainedModel
+        self.dt_bias = nn.Parameter(
+            torch.ones(self.num_v_heads // self.tp_size),
+        )
+        self.A_log = nn.Parameter(
+            torch.empty(
+                divide(self.num_v_heads, self.tp_size),
+                dtype=torch.float32,
+            )
+        )
+
+        set_weight_attrs(self.A_log, {"weight_loader": sharded_weight_loader(0)})
+        set_weight_attrs(self.dt_bias, {"weight_loader": sharded_weight_loader(0)})
+
+        output_gate_type = getattr(config, "output_gate_type", "silu")
+        if output_gate_type == "swish":
+            output_gate_type = "silu"
+        assert output_gate_type in ["silu", "swish", "sigmoid"], (
+            f"unsupported {output_gate_type=}"
+        )
+
+        self.norm = RMSNormGated(
+            self.head_v_dim,
+            eps=self.layer_norm_epsilon,
+            group_size=None,
+            norm_before_gate=True,
+            activation=output_gate_type,
+            device=current_platform.current_device(),
+        )
+
+        self.out_proj = RowParallelLinear(
+            self.value_dim,
+            self.hidden_size,
+            bias=False,
+            input_is_parallel=True,
+            quant_config=self.quant_config,
+            prefix=f"{prefix}.out_proj",
+        )
+
+        self.chunk_gated_delta_rule = ChunkGatedDeltaRule(head_k_dim=self.head_k_dim)
+        self._prefill_kernels_warmed_up = False
+        self.enable_packed_recurrent_decode = (
+            envs.VLLM_ENABLE_FLA_PACKED_RECURRENT_DECODE
+        )
+
+        compilation_config = get_current_vllm_config().compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+    def create_qkvz_proj(
+        self,
+        hidden_size: int,
+        key_dim: int,
+        value_dim: int,
+        quant_config: QuantizationConfig | None,
+        prefix: str,
+    ) -> MergedColumnParallelLinear:
+        # When gqa_interleaved_layout=True (Qwen3-Next), qkvz weights are
+        # stored as a single fused tensor with interleaved GQA layout, so we
+        # use one output shard to preserve the interleaving across TP ranks.
+        # When gqa_interleaved_layout=False (Qwen3.5), the checkpoint has
+        # separate q, k, v, z weights, so we use 4 independent output sizes.
+        output_sizes = (
+            [sum((key_dim, key_dim, value_dim, value_dim))]
+            if self.gqa_interleaved_layout
+            else [key_dim, key_dim, value_dim, value_dim]
+        )
+        return MergedColumnParallelLinear(
+            input_size=hidden_size,
+            output_sizes=output_sizes,
+            bias=False,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+
+    def create_ba_proj(
+        self,
+        hidden_size: int,
+        num_v_heads: int,
+        quant_config: QuantizationConfig | None,
+        prefix: str,
+    ) -> MergedColumnParallelLinear:
+        # When gqa_interleaved_layout=True (Qwen3-Next), in_proj_ba is stored
+        # as a single fused weight [b_g0, a_g0, b_g1, a_g1, ...] interleaved
+        # by key-head group; a single output shard preserves this across TP.
+        # When gqa_interleaved_layout=False (Qwen3.5), in_proj_b and in_proj_a
+        # are separate checkpoint weights, so we use 2 independent output sizes.
+        output_sizes = (
+            [num_v_heads * 2] if self.gqa_interleaved_layout else [num_v_heads] * 2
+        )
+        return MergedColumnParallelLinear(
+            input_size=hidden_size,
+            output_sizes=output_sizes,
+            bias=False,
+            quant_config=quant_config,
+            prefix=prefix,
+            disable_tp=self.maybe_disable_tp(quant_config),
+        )
+
+    def maybe_disable_tp(self, quant_config: QuantizationConfig | None) -> bool:
+        """Whether to replicate ba_proj instead of TP-sharding it.
+
+        Marlin requires output_size_per_partition >= MIN_THREAD_N=64, which
+        the Qwen3.5 non-interleaved [num_v_heads]*2 layout violates at TP>=2
+        (e.g. num_v_heads=64, TP=4 -> 16). Replicating the projection keeps
+        each rank above the Marlin threshold; forward() then slices b/a to
+        the local TP partition. Qwen3-Next's interleaved [num_v_heads*2]
+        layout is unaffected and stays TP-sharded.
+
+        See https://github.com/vllm-project/vllm/issues/35924
+        """
+        return (
+            current_platform.is_cuda()
+            and not self.gqa_interleaved_layout
+            and isinstance(quant_config, (AWQMarlinConfig, AutoGPTQConfig, INCConfig))
+        )
+
+    def split_ba(self, ba: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        b, a = ba.chunk(2, dim=-1)
+        if self.disable_tp_for_ba_proj and self.tp_size > 1:
+            # ba_proj is replicated for Marlin; slice b/a to local TP rank.
+            ba_chunk = self.num_v_heads // self.tp_size
+            ba_start = self.tp_rank * ba_chunk
+            b = b[:, ba_start : ba_start + ba_chunk]
+            a = a[:, ba_start : ba_start + ba_chunk]
+        return b, a
+
+    def fix_query_key_value_ordering(
+        self,
+        mixed_qkvz: torch.Tensor,
+        mixed_ba: torch.Tensor,
+    ):
+        """
+        Derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
+        """
+        new_tensor_shape_qkvz = mixed_qkvz.size()[:-1] + (
+            self.num_k_heads // self.tp_size,
+            (
+                self.head_k_dim
+                + self.head_k_dim
+                + (self.head_v_dim + self.head_v_dim)
+                * self.num_v_heads
+                // self.num_k_heads
+            ),
+        )
+        new_tensor_shape_ba = mixed_ba.size()[:-1] + (
+            self.num_k_heads // self.tp_size,
+            2 * self.num_v_heads // self.num_k_heads,
+        )
+
+        mixed_qkvz = mixed_qkvz.view(*new_tensor_shape_qkvz)
+        mixed_ba = mixed_ba.view(*new_tensor_shape_ba)
+
+        split_arg_list_qkvz = [
+            self.head_k_dim,
+            self.head_k_dim,
+            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+        ]
+        split_arg_list_ba = [
+            self.num_v_heads // self.num_k_heads,
+            self.num_v_heads // self.num_k_heads,
+        ]
+
+        # [b, sq, ng, (hn + hn + np/ng * hn + np/ng + np/ng)]
+        # --> [b, sq, ng, hn], [b, sq, ng, hn], [b, sq, ng, np/ng * hn],
+        #  [b, sq, ng, np/ng * hn], [b, sq, ng, np/ng], [b, sq, ng, np/ng]
+        (query, key, value, z) = torch.split(mixed_qkvz, split_arg_list_qkvz, dim=2)
+        (b, a) = torch.split(mixed_ba, split_arg_list_ba, dim=2)
+
+        # [b, sq, ng, np/ng * hn] -> [b, sq, np, hn]
+        value = value.reshape(value.size(0), -1, self.head_v_dim)
+        z = z.reshape(z.size(0), -1, self.head_v_dim)
+        b = b.reshape(b.size(0), self.num_v_heads // self.tp_size)
+        a = a.reshape(a.size(0), self.num_v_heads // self.tp_size)
+
+        return query, key, value, z, b, a
+
+    @torch.compile(fullgraph=True)
+    def prepare_gdn_attention_core_inputs(
+        self,
+        mixed_qkvz: torch.Tensor,
+        mixed_ba: torch.Tensor,
+        num_tokens: int,
+    ):
+        """
+        Derives mixed_qkv, z, b, a from projected qkvz/ba for the GDN custom op.
+
+        For gqa_interleaved_layout (Qwen3-Next): unpack the interleaved
+        [ng, (hk + hk + np/ng*hv + np/ng*hv)] layout into contiguous qkv.
+        For non-interleaved layout (Qwen3.5): simple split along last dim.
+        """
+        if not self.gqa_interleaved_layout:
+            # Qwen3.5: weights are in [q, k, v, z] order
+            assert num_tokens == mixed_qkvz.shape[0]
+            qkv_size = (self.key_dim * 2 + self.value_dim) // self.tp_size
+            z_size = self.value_dim // self.tp_size
+            mixed_qkv, z_flat = mixed_qkvz.split([qkv_size, z_size], dim=-1)
+            n = mixed_qkvz.shape[0]
+            z_out = z_flat.reshape(n, -1, self.head_v_dim)
+            b, a = mixed_ba.chunk(2, dim=-1)
+            return mixed_qkv, z_out, b, a
+
+        # Qwen3-Next: interleaved GQA layout
+        base_shape_qkvz = mixed_qkvz.size()[:-1]
+        base_shape_ba = mixed_ba.size()[:-1]
+        ng = self.num_k_heads // self.tp_size
+
+        new_tensor_shape_qkvz = base_shape_qkvz + (
+            ng,
+            (
+                self.head_k_dim
+                + self.head_k_dim
+                + (self.head_v_dim + self.head_v_dim)
+                * self.num_v_heads
+                // self.num_k_heads
+            ),
+        )
+        new_tensor_shape_ba = base_shape_ba + (
+            ng,
+            2 * self.num_v_heads // self.num_k_heads,
+        )
+
+        mixed_qkvz = mixed_qkvz.view(*new_tensor_shape_qkvz)
+        mixed_ba = mixed_ba.view(*new_tensor_shape_ba)
+
+        split_arg_list_qkvz = [
+            self.head_k_dim,
+            self.head_k_dim,
+            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+        ]
+        split_arg_list_ba = [
+            self.num_v_heads // self.num_k_heads,
+            self.num_v_heads // self.num_k_heads,
+        ]
+
+        (query, key, value, z) = torch.split(mixed_qkvz, split_arg_list_qkvz, dim=-1)
+        (b, a) = torch.split(mixed_ba, split_arg_list_ba, dim=-1)
+
+        mixed_qkv_logical = torch.cat(
+            [
+                query.reshape(num_tokens, -1),
+                key.reshape(num_tokens, -1),
+                value.reshape(num_tokens, -1),
+            ],
+            dim=-1,
+        )
+
+        # The split above produces non-contiguous views into the interleaved
+        # buffer.  Concatenating everything into a single flat tensor forces a
+        # contiguous copy, then slicing back out gives contiguous q/k/v/z/b/a
+        # tensors that downstream kernels require.  Doing this in one cat+slice
+        # keeps torch.compile in a single Triton graph instead of emitting
+        # separate copy kernels per tensor.  The original code used
+        # rearrange(...).contiguous() on each tensor individually.
+        fused = torch.cat(
+            [
+                mixed_qkv_logical.reshape(-1),
+                z.reshape(-1),
+                b.reshape(-1),
+                a.reshape(-1),
+            ],
+            dim=0,
+        )
+
+        curr = 0
+        qkv_numel = mixed_qkv_logical.numel()
+        z_numel = z.numel()
+        b_numel = b.numel()
+        a_numel = a.numel()
+
+        mixed_qkv_out = fused[curr : curr + qkv_numel].view(num_tokens, -1)
+        curr += qkv_numel
+
+        z_out = fused[curr : curr + z_numel].view(
+            num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim
+        )
+        curr += z_numel
+
+        b_out = fused[curr : curr + b_numel].view(
+            num_tokens, self.num_v_heads // self.tp_size
+        )
+        curr += b_numel
+
+        a_out = fused[curr : curr + a_numel].view(
+            num_tokens, self.num_v_heads // self.tp_size
+        )
+
+        return mixed_qkv_out, z_out, b_out, a_out
+
+    def rearrange_mixed_qkv(self, mixed_qkv):
+        """Split packed qkv into contiguous (1, seq, heads, dim) tensors.
+
+        The original code used ``rearrange(x, "l (h d) -> 1 l h d", d=...)``
+        followed by ``.contiguous()`` on each tensor.  This version flattens
+        all three splits into a single buffer via ``torch.cat`` so that
+        torch.compile emits one Triton copy kernel instead of three separate
+        contiguous() calls.
+        """
+        if mixed_qkv is None:
+            return None, None, None
+
+        seq_len = mixed_qkv.shape[0]
+        q_dim = self.key_dim // self.tp_size
+        k_dim = self.key_dim // self.tp_size
+        v_dim = self.value_dim // self.tp_size
+
+        query, key, value = torch.split(mixed_qkv, [q_dim, k_dim, v_dim], dim=-1)
+
+        fused = torch.cat(
+            [query.reshape(-1), key.reshape(-1), value.reshape(-1)], dim=0
+        )
+
+        q_size = seq_len * q_dim
+        k_size = seq_len * k_dim
+
+        q_contig = fused[0:q_size]
+        k_contig = fused[q_size : q_size + k_size]
+        v_contig = fused[q_size + k_size :]
+
+        query = q_contig.view(1, seq_len, -1, self.head_k_dim)
+        key = k_contig.view(1, seq_len, -1, self.head_k_dim)
+        value = v_contig.view(1, seq_len, -1, self.head_v_dim)
+
+        return query, key, value
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        self._forward_method(hidden_states, output)
+
+    def _output_projection(
+        self,
+        core_attn_out: torch.Tensor,
+        z: torch.Tensor,
+        output: torch.Tensor,
+        num_tokens: int,
+    ):
+        """Part 3: RMSNormGated + output linear projection.
+
+        The RMSNormGated + quant sequence is eligible for fusion
+        by the compilation pass when fuse_norm_quant is enabled.
+        """
+        z_shape_og = z.shape
+        core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
+        z = z.reshape(-1, z.shape[-1])
+        core_attn_out = self.norm(core_attn_out, z)
+        core_attn_out = core_attn_out.reshape(z_shape_og)
+        core_attn_out = core_attn_out.flatten(-2)  # ... h d -> ... (h d)
+        output[:num_tokens], _ = self.out_proj(core_attn_out)
+
+    def forward_hip(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        """ROCm forward using AITER Triton fused projection+attention when
+        available, otherwise falling back to the generic CUDA path."""
+        if GDN_AITER_TRITON_AVAILABLE:
+            num_tokens = hidden_states.size(0)
+            projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)
+            projected_states_ba, _ = self.in_proj_ba(hidden_states)
+            projected_states_qkvz = projected_states_qkvz.view(num_tokens, -1)
+            projected_states_ba = projected_states_ba.view(num_tokens, -1)
+            core_attn_out = torch.empty(
+                (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+                dtype=hidden_states.dtype,
+                device=hidden_states.device,
+            )
+            z = torch.empty(
+                (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+                dtype=projected_states_qkvz.dtype,
+                device=projected_states_qkvz.device,
+            )
+
+            torch.ops.vllm.qwen_gdn_attention_core(
+                projected_states_qkvz,
+                projected_states_ba,
+                z,
+                core_attn_out,
+                fast_kernel=True,
+                layer_name=_encode_layer_name(self.prefix),
+            )
+
+            self._output_projection(core_attn_out, z, output, num_tokens)
+        else:
+            self.forward_cuda(hidden_states, output)
+
+    def forward_cuda(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        """
+        Forward pass with three parts:
+        1. Input projection
+        2. Core attention (custom op)
+        3. Output projection
+        """
+        num_tokens = hidden_states.size(0)
+        # ============================================================
+        # Part 1: Input Projection
+        # ============================================================
+        mixed_qkvz, _ = self.in_proj_qkvz(hidden_states)
+        ba, _ = self.in_proj_ba(hidden_states)
+
+        if self.gqa_interleaved_layout:
+            # Qwen3-Next: unpack the interleaved GQA layout
+            query, key, value, z, b, a = self.fix_query_key_value_ordering(
+                mixed_qkvz, ba
+            )
+            query, key, value = map(
+                lambda x: rearrange(x, "l p d -> l (p d)"), (query, key, value)
+            )
+            mixed_qkv = torch.cat((query, key, value), dim=-1)
+        else:
+            # Qwen3.5: weights are already in [q, k, v, z] and [b, a] order
+            qkv_size = (self.key_dim * 2 + self.value_dim) // self.tp_size
+            z_size = self.value_dim // self.tp_size
+            mixed_qkv, z = mixed_qkvz.split([qkv_size, z_size], dim=-1)
+            z = z.reshape(z.size(0), -1, self.head_v_dim)
+            b, a = self.split_ba(ba)
+            b = b.contiguous()
+            a = a.contiguous()
+
+        # ============================================================
+        # Part 2: Core Attention (Custom Op)
+        # ============================================================
+        # Note: we should not use torch.empty here like other attention backends,
+        # see discussions in https://github.com/vllm-project/vllm/pull/28182
+        core_attn_out = torch.zeros(
+            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+
+        torch.ops.vllm.qwen_gdn_attention_core(
+            mixed_qkv,
+            b,
+            a,
+            core_attn_out,
+            fast_kernel=False,
+            layer_name=_encode_layer_name(self.prefix),
+        )
+
+        # ============================================================
+        # Part 3: Output Projection
+        # ============================================================
+        self._output_projection(core_attn_out, z, output, num_tokens)
+
+    def forward_xpu(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        """
+        Forward pass with three parts:
+        1. Input projection
+        2. Core attention (custom op)
+        3. Output projection
+        """
+        num_tokens = hidden_states.size(0)
+
+        # ============================================================
+        # Part 1: Input Projection
+        # ============================================================
+        projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)
+        projected_states_ba, _ = self.in_proj_ba(hidden_states)
+
+        # ============================================================
+        # Part 2: Core Attention
+        # ============================================================
+        core_attn_out = torch.zeros(
+            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+        z = torch.empty_like(core_attn_out)
+
+        torch.ops.vllm.gdn_attention_core_xpu(
+            core_attn_out,
+            z,
+            projected_states_qkvz,
+            projected_states_ba,
+            self.prefix,
+        )
+
+        # ============================================================
+        # Part 3: Output Projection
+        # ============================================================
+        z_shape_og = z.shape
+        # Reshape input data into 2D tensor
+        core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
+        z = z.reshape(-1, z.shape[-1])
+        core_attn_out = self.norm(core_attn_out, z)
+        core_attn_out = core_attn_out.reshape(z_shape_og)
+        core_attn_out = core_attn_out.flatten(-2)  # ... h d -> ... (h d)
+        output[:num_tokens], _ = self.out_proj(core_attn_out)
+
+    def forward_cpu(
+        self,
+        hidden_states: torch.Tensor,
+        output: torch.Tensor,
+    ):
+        assert not hasattr(self, "in_proj_qkv"), "lora isn't supported on CPU."
+
+        mixed_qkvz, _ = self.in_proj_qkvz(hidden_states)
+        ba, _ = self.in_proj_ba(hidden_states)
+
+        if self.gqa_interleaved_layout:
+            # Qwen3-Next: unpack the interleaved GQA layout
+            query, key, value, z, b, a = self.fix_query_key_value_ordering(
+                mixed_qkvz, ba
+            )
+            query, key, value = map(
+                lambda x: rearrange(x, "l p d -> l (p d)"), (query, key, value)
+            )
+            mixed_qkv = torch.cat((query, key, value), dim=-1)
+        else:
+            # Qwen3.5: weights are already in [q, k, v, z] and [b, a] order
+            qkv_size = (self.key_dim * 2 + self.value_dim) // self.tp_size
+            z_size = self.value_dim // self.tp_size
+            mixed_qkv, z = mixed_qkvz.split([qkv_size, z_size], dim=-1)
+            z = z.reshape(z.size(0), -1, self.head_v_dim)
+            b, a = ba.chunk(2, dim=-1)
+
+        num_tokens = hidden_states.size(0)
+        core_attn_out = torch.zeros(
+            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+
+        torch.ops.vllm.cpu_gdn_attention_core(
+            mixed_qkv,
+            b,
+            a,
+            core_attn_out,
+            _encode_layer_name(self.prefix),
+        )
+
+        z_shape_og = z.shape
+        core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
+        z = z.reshape(-1, z.shape[-1])
+        core_attn_out = self.norm(core_attn_out, z)
+        core_attn_out = core_attn_out.reshape(z_shape_og)
+        core_attn_out = core_attn_out.flatten(-2)  # ... h d -> ... (h d)
+        output[:num_tokens], _ = self.out_proj(core_attn_out)
+
+    def _warmup_prefill_kernels(self, qkv_or_qkvz: torch.Tensor, v_dim: int) -> None:
+        """Warm up GDN prefill kernels during V1 profiling.
+
+        During V1 profile runs, ``_forward_core`` returns early because
+        ``attn_metadata`` is ``None``, so the autotuned kernels used by
+        ``chunk_gated_delta_rule`` (e.g. ``solve_tril``,
+        ``chunk_scaled_dot_kkt``) are never invoked.  After profiling,
+        vLLM allocates KV cache using most of the remaining GPU memory.
+        When the first real inference triggers the autotuner it OOMs
+        because there is not enough memory left for benchmarking.
+
+        This method runs minimal forward passes through
+        ``chunk_gated_delta_rule`` with small dummy tensors to force
+        autotuning while GPU memory is still plentiful.  The autotuner
+        results are cached globally, so only the first layer incurs
+        actual benchmarking cost.
+
+        All kernels including ``chunk_fwd_kernel_o`` now use a fixed
+        ``BT = chunk_size`` (64).  A single warmup pass with T = 64
+        is sufficient to populate the autotuner cache.
+
+        The decode path uses ``gdn_aiter_fused_rearrange_sigmoid_gated_delta_rule``
+        which has fixed kernel parameters (no autotuning), so only the
+        prefill (chunked) path needs warming up.
+        """
+        if self._prefill_kernels_warmed_up:
+            return
+        self._prefill_kernels_warmed_up = True
+
+        device = qkv_or_qkvz.device
+        dtype = qkv_or_qkvz.dtype
+        num_k_heads = self.num_k_heads // self.tp_size
+        num_v_heads = self.num_v_heads // self.tp_size
+        _, state_dtype = self.get_state_dtype()
+
+        # All kernels use BT = chunk_size, so a single pass with T = chunk_size
+        # is sufficient to populate every autotuner cache. Mirror the real
+        # prefill path here: build q/k/v/g/beta via fused_post_conv_prep and
+        # then run chunk_gated_delta_rule with in-kernel L2 norm disabled.
+        T = FLA_CHUNK_SIZE
+        dummy_mixed_qkv = torch.randn(
+            T, qkv_or_qkvz.shape[-1] - v_dim, device=device, dtype=dtype
+        )
+        dummy_a = torch.randn(T, num_v_heads, device=device, dtype=dtype)
+        dummy_b = torch.randn(T, num_v_heads, device=device, dtype=dtype)
+        q, k, v, g, beta = fused_post_conv_prep(
+            conv_output=dummy_mixed_qkv,
+            a=dummy_a,
+            b=dummy_b,
+            A_log=self.A_log,
+            dt_bias=self.dt_bias,
+            num_k_heads=num_k_heads,
+            head_k_dim=self.head_k_dim,
+            head_v_dim=self.head_v_dim,
+            apply_l2norm=True,
+            output_g_exp=False,
+        )
+        q = q.unsqueeze(0)
+        k = k.unsqueeze(0)
+        v = v.unsqueeze(0)
+        g = g.unsqueeze(0)
+        beta = beta.unsqueeze(0)
+        state = torch.zeros(
+            1,
+            num_v_heads,
+            self.head_v_dim,
+            self.head_k_dim,
+            device=device,
+            dtype=state_dtype,
+        )
+        cu_seqlens = torch.tensor([0, T], device=device, dtype=torch.int32)
+
+        try:
+            self.chunk_gated_delta_rule(
+                q=q,
+                k=k,
+                v=v,
+                g=g,
+                beta=beta,
+                initial_state=state,
+                output_final_state=True,
+                cu_seqlens=cu_seqlens,
+                use_qk_l2norm_in_kernel=False,
+            )
+        except Exception:
+            logger.warning(
+                "GDN prefill kernel warmup (T=%d) failed for "
+                "layer %s. First inference may OOM due to "
+                "autotuner.",
+                T,
+                self.prefix,
+                exc_info=True,
+            )
+        else:
+            logger.debug(
+                "GDN prefill kernel warmup (T=%d) completed for layer %s",
+                T,
+                self.prefix,
+            )
+        finally:
+            del dummy_mixed_qkv, q, k, v, dummy_a, dummy_b, g, beta, state, cu_seqlens
+
+        torch.accelerator.empty_cache()
+
+    def _forward_core_rocm(
+        self,
+        qkvz: torch.Tensor,
+        ba: torch.Tensor,
+        z_out: torch.Tensor,
+        core_attn_out: torch.Tensor,
+    ):
+        """ROCm AITER fast path: conv1d + recurrent attention from packed
+        qkvz/ba layout.
+
+        For decode-only (no spec, no prefill) interleaved-GQA layouts,
+        dispatches directly to ``_forward_core_decode_fast``. Otherwise unpacks
+        the packed layout and falls through to ``_forward_core``.
+
+        Args:
+            qkvz: packed [q, k, v, z] projection (num_tokens, qkvz_dim)
+            ba:   packed [b, a] gating vectors    (num_tokens, 2*num_heads)
+            z_out: **output** buffer for z        (num_tokens, num_heads,
+                   head_dim); mutated in-place.
+            core_attn_out: Pre-allocated output buffer for attention results.
+        """
+        forward_context = get_forward_context()
+        attn_metadata_raw = forward_context.attn_metadata
+
+        if attn_metadata_raw is None:
+            v_dim = core_attn_out.shape[-1] * core_attn_out.shape[-2]
+            self._warmup_prefill_kernels(qkvz, v_dim)
+            return
+
+        assert isinstance(attn_metadata_raw, dict)
+        attn_metadata = attn_metadata_raw[self.prefix]  # type: ignore[index]
+        assert isinstance(attn_metadata, GDNAttentionMetadata)
+
+        # The AITER fused reshape/conv kernel expects Qwen3-Next's interleaved
+        # GQA layout. Qwen3.5 uses a non-interleaved q/k/v/z layout and must use
+        # the generic path below to split/rearrange inputs correctly.
+        if (
+            self.gqa_interleaved_layout
+            and attn_metadata.spec_sequence_masks is None
+            and attn_metadata.num_prefills == 0
+            and attn_metadata.num_decodes > 0
+        ):
+            return self._forward_core_decode_fast(
+                qkvz=qkvz,
+                ba=ba,
+                z_out=z_out,
+                core_attn_out=core_attn_out,
+                attn_metadata=attn_metadata,
+            )
+
+        core_attn_out.zero_()
+        num_tokens_all = qkvz.shape[0]
+        mixed_qkv, z, b, a = self.prepare_gdn_attention_core_inputs(
+            qkvz, ba, num_tokens_all
+        )
+        z_out[:] = z
+        self._forward_core(
+            mixed_qkv=mixed_qkv,
+            b=b,
+            a=a,
+            core_attn_out=core_attn_out,
+        )
+
+    def _forward_core(
+        self,
+        mixed_qkv: torch.Tensor,
+        b: torch.Tensor,
+        a: torch.Tensor,
+        core_attn_out: torch.Tensor,
+    ):
+        """Core conv1d + recurrent attention (standard path).
+
+        Args:
+            mixed_qkv: packed [q, k, v] projection (num_tokens, qkv_dim)
+            b: beta gating vector                   (num_tokens, num_heads)
+            a: alpha gating vector                  (num_tokens, num_heads)
+            core_attn_out: Pre-allocated output buffer for attention results.
+        """
+        forward_context = get_forward_context()
+        attn_metadata_raw = forward_context.attn_metadata
+
+        if attn_metadata_raw is None:
+            self._warmup_prefill_kernels(mixed_qkv, 0)
+            return
+
+        assert isinstance(attn_metadata_raw, dict)
+        attn_metadata = attn_metadata_raw[self.prefix]  # type: ignore[index]
+        assert isinstance(attn_metadata, GDNAttentionMetadata)
+
+        if (
+            self.enable_packed_recurrent_decode
+            and attn_metadata.spec_sequence_masks is None
+            and attn_metadata.num_prefills == 0
+            and attn_metadata.num_decodes > 0
+        ):
+            return self._forward_core_decode_non_spec(
+                mixed_qkv=mixed_qkv,
+                b=b,
+                a=a,
+                core_attn_out=core_attn_out,
+                attn_metadata=attn_metadata,
+            )
+
+        has_initial_state = attn_metadata.has_initial_state
+        spec_query_start_loc = attn_metadata.spec_query_start_loc
+        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
+        spec_sequence_masks = attn_metadata.spec_sequence_masks
+        spec_token_indx = attn_metadata.spec_token_indx
+        non_spec_token_indx = attn_metadata.non_spec_token_indx
+        spec_state_indices_tensor = attn_metadata.spec_state_indices_tensor  # noqa: E501
+        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
+        self_kv_cache = self.kv_cache
+        # conv_state must be (..., dim, width-1) for the conv kernels.
+        # DS layout stores it that way directly; SD layout needs a transpose.
+        conv_state = (
+            self_kv_cache[0]
+            if is_conv_state_dim_first()
+            else self_kv_cache[0].transpose(-1, -2)
+        )
+        ssm_state = self_kv_cache[1]
+        num_actual_tokens = attn_metadata.num_actual_tokens
+        num_accepted_tokens = attn_metadata.num_accepted_tokens
+
+        mixed_qkv = mixed_qkv[:num_actual_tokens]
+        b = b[:num_actual_tokens]
+        a = a[:num_actual_tokens]
+
+        # 1. Convolution sequence transformation
+        conv_weights = self.conv1d.weight.view(
+            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+        )
+
+        if spec_sequence_masks is not None:
+            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
+                mixed_qkv_spec = mixed_qkv
+                mixed_qkv_non_spec = None
+            else:
+                mixed_qkv_spec = mixed_qkv.index_select(0, spec_token_indx)
+                mixed_qkv_non_spec = mixed_qkv.index_select(0, non_spec_token_indx)
+        else:
+            mixed_qkv_spec = None
+            mixed_qkv_non_spec = mixed_qkv
+
+        # 1.1: Process the multi-query part
+        if spec_sequence_masks is not None:
+            # spec_state_indices_tensor is always set when spec_sequence_masks is set
+            assert spec_state_indices_tensor is not None
+            mixed_qkv_spec = causal_conv1d_update(
+                mixed_qkv_spec,
+                conv_state,
+                conv_weights,
+                self.conv1d.bias,
+                self.activation,
+                conv_state_indices=spec_state_indices_tensor[:, 0][  # type: ignore[index]
+                    : attn_metadata.num_spec_decodes  # type: ignore[attr-defined]
+                ],
+                num_accepted_tokens=num_accepted_tokens,
+                query_start_loc=spec_query_start_loc,
+                max_query_len=spec_state_indices_tensor.size(-1),
+                validate_data=False,
+            )
+
+        # 1.2: Process the remaining part
+        if attn_metadata.num_prefills > 0:
+            assert mixed_qkv_non_spec is not None
+            mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1)
+            # - "cache_indices" updates the conv_state cache in positions
+            #   pointed to by "state_indices_tensor"
+            mixed_qkv_non_spec = causal_conv1d_fn(
+                mixed_qkv_non_spec_T,
+                conv_weights,
+                self.conv1d.bias,
+                activation=self.activation,
+                conv_states=conv_state,
+                has_initial_state=has_initial_state,
+                cache_indices=non_spec_state_indices_tensor,
+                query_start_loc=non_spec_query_start_loc,
+                metadata=attn_metadata,
+            ).transpose(0, 1)
+        elif attn_metadata.num_decodes > 0:
+            assert mixed_qkv_non_spec is not None
+            mixed_qkv_non_spec = causal_conv1d_update(
+                mixed_qkv_non_spec,
+                conv_state,
+                conv_weights,
+                self.conv1d.bias,
+                self.activation,
+                conv_state_indices=non_spec_state_indices_tensor[  # type: ignore[index]
+                    : attn_metadata.num_actual_tokens  # type: ignore[attr-defined]
+                ],
+                validate_data=True,
+            )
+        else:
+            mixed_qkv_non_spec = None
+
+        query_spec, key_spec, value_spec = self.rearrange_mixed_qkv(mixed_qkv_spec)
+        if attn_metadata.num_prefills > 0:
+            assert mixed_qkv_non_spec is not None, (
+                "mixed_qkv_non_spec must be provided for prefill path"
+            )
+            if spec_sequence_masks is not None:
+                a_non_spec = a.index_select(0, non_spec_token_indx)
+                b_non_spec = b.index_select(0, non_spec_token_indx)
+            else:
+                a_non_spec = a
+                b_non_spec = b
+
+            (
+                query_non_spec,
+                key_non_spec,
+                value_non_spec,
+                g_non_spec,
+                beta_non_spec,
+            ) = fused_post_conv_prep(
+                conv_output=mixed_qkv_non_spec,
+                a=a_non_spec,
+                b=b_non_spec,
+                A_log=self.A_log,
+                dt_bias=self.dt_bias,
+                num_k_heads=self.num_k_heads // self.tp_size,
+                head_k_dim=self.head_k_dim,
+                head_v_dim=self.head_v_dim,
+                apply_l2norm=True,
+                output_g_exp=False,
+            )
+            query_non_spec = query_non_spec.unsqueeze(0)
+            key_non_spec = key_non_spec.unsqueeze(0)
+            value_non_spec = value_non_spec.unsqueeze(0)
+            g_non_spec = g_non_spec.unsqueeze(0)
+            beta_non_spec = beta_non_spec.unsqueeze(0)
+        else:
+            query_non_spec, key_non_spec, value_non_spec = self.rearrange_mixed_qkv(
+                mixed_qkv_non_spec
+            )
+            g_non_spec = None
+            beta_non_spec = None
+
+        # 2. Recurrent attention
+
+        # 2.1: Process the multi-query part
+        if spec_sequence_masks is not None:
+            core_attn_out_spec, last_recurrent_state = (
+                fused_sigmoid_gating_delta_rule_update(
+                    A_log=self.A_log,
+                    a=a,
+                    b=b,
+                    dt_bias=self.dt_bias,
+                    q=query_spec,
+                    k=key_spec,
+                    v=value_spec,
+                    initial_state=ssm_state,
+                    inplace_final_state=True,
+                    cu_seqlens=spec_query_start_loc[  # type: ignore[index]
+                        : attn_metadata.num_spec_decodes
+                        + 1  # type: ignore[attr-defined]
+                    ],
+                    ssm_state_indices=spec_state_indices_tensor,
+                    num_accepted_tokens=num_accepted_tokens,
+                    use_qk_l2norm_in_kernel=True,
+                )
+            )
+        else:
+            core_attn_out_spec, last_recurrent_state = None, None
+
+        # 2.2: Process the remaining part
+        if attn_metadata.num_prefills > 0:
+            assert non_spec_state_indices_tensor is not None
+            initial_state = ssm_state[non_spec_state_indices_tensor].contiguous()  # type: ignore[index]
+            assert has_initial_state is not None
+            initial_state[~has_initial_state, ...] = 0  # type: ignore[operator]
+            (
+                core_attn_out_non_spec,
+                last_recurrent_state,
+            ) = self.chunk_gated_delta_rule(
+                q=query_non_spec,
+                k=key_non_spec,
+                v=value_non_spec,
+                g=g_non_spec,
+                beta=beta_non_spec,
+                initial_state=initial_state,
+                output_final_state=True,
+                cu_seqlens=non_spec_query_start_loc,
+                chunk_indices=attn_metadata.chunk_indices,
+                chunk_offsets=attn_metadata.chunk_offsets,
+                use_qk_l2norm_in_kernel=False,
+            )
+            # Init cache
+            ssm_state[non_spec_state_indices_tensor] = last_recurrent_state.to(
+                ssm_state.dtype
+            )
+        elif attn_metadata.num_decodes > 0:
+            core_attn_out_non_spec, last_recurrent_state = (
+                fused_sigmoid_gating_delta_rule_update(
+                    A_log=self.A_log,
+                    a=a,
+                    b=b,
+                    dt_bias=self.dt_bias,
+                    q=query_non_spec,
+                    k=key_non_spec,
+                    v=value_non_spec,
+                    initial_state=ssm_state,
+                    inplace_final_state=True,
+                    cu_seqlens=non_spec_query_start_loc[  # type: ignore[index]
+                        : attn_metadata.num_decodes
+                        + 1  # type: ignore[attr-defined]
+                    ],
+                    ssm_state_indices=non_spec_state_indices_tensor,
+                    use_qk_l2norm_in_kernel=True,
+                )
+            )
+        else:
+            core_attn_out_non_spec, last_recurrent_state = None, None
+
+        # 3. Merge core attention output
+        if spec_sequence_masks is not None and core_attn_out_non_spec is not None:
+            merged_out = torch.empty(
+                (1, num_actual_tokens, *core_attn_out_spec.shape[2:]),
+                dtype=core_attn_out_non_spec.dtype,
+                device=core_attn_out_non_spec.device,
+            )
+            merged_out.index_copy_(1, spec_token_indx, core_attn_out_spec)
+            merged_out.index_copy_(1, non_spec_token_indx, core_attn_out_non_spec)
+            core_attn_out[:num_actual_tokens] = merged_out.squeeze(0)
+        elif spec_sequence_masks is not None:
+            core_attn_out[:num_actual_tokens] = core_attn_out_spec.squeeze(0)
+        else:
+            core_attn_out[:num_actual_tokens] = core_attn_out_non_spec.squeeze(0)
+
+    def _forward_core_decode_fast(
+        self,
+        qkvz: torch.Tensor,
+        ba: torch.Tensor,
+        z_out: torch.Tensor,
+        core_attn_out: torch.Tensor,
+        attn_metadata: GDNAttentionMetadata,
+    ):
+        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
+        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
+        self_kv_cache = self.kv_cache
+        # conv_state must be (..., dim, width-1) for the conv kernels.
+        # DS layout stores it that way directly; SD layout needs a transpose.
+        conv_state = (
+            self_kv_cache[0]
+            if is_conv_state_dim_first()
+            else self_kv_cache[0].transpose(-1, -2)
+        )
+        ssm_state = self_kv_cache[1]
+
+        # 1. Convolution sequence transformation
+        conv_weights = self.conv1d.weight.view(
+            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+        )
+
+        mixed_qkv_non_spec, b, a = (
+            gdn_aiter_fused_reshape_causal_conv1d_update_single_token(
+                qkvz,
+                attn_metadata.num_actual_tokens,
+                self.num_k_heads // self.tp_size,
+                self.num_v_heads // self.tp_size,
+                self.head_k_dim,
+                self.head_v_dim,
+                ba,
+                z_out,
+                core_attn_out,
+                conv_state,
+                conv_weights,
+                self.conv1d.bias,
+                self.activation,
+                conv_state_indices=non_spec_state_indices_tensor[  # type: ignore[index]
+                    : attn_metadata.num_actual_tokens
+                ],
+                validate_data=True,
+            )
+        )
+
+        # 2. Recurrent attention
+        gdn_aiter_fused_rearrange_sigmoid_gated_delta_rule(
+            A_log=self.A_log,
+            a=a,
+            b=b,
+            dt_bias=self.dt_bias,
+            qkv=mixed_qkv_non_spec,
+            key_dim=self.key_dim // self.tp_size,
+            value_dim=self.value_dim // self.tp_size,
+            head_k_dim=self.head_k_dim,
+            head_v_dim=self.head_v_dim,
+            initial_state=ssm_state,
+            inplace_final_state=True,
+            cu_seqlens=non_spec_query_start_loc[: attn_metadata.num_decodes + 1],  # type: ignore[index]
+            ssm_state_indices=non_spec_state_indices_tensor,
+            use_qk_l2norm_in_kernel=True,
+            core_attn_out=core_attn_out.reshape(-1),
+        )
+
+    def _forward_core_decode_non_spec(
+        self,
+        mixed_qkv: torch.Tensor,
+        b: torch.Tensor,
+        a: torch.Tensor,
+        core_attn_out: torch.Tensor,
+        attn_metadata: GDNAttentionMetadata,
+    ):
+        """
+        Core attention computation with a packed non-spec decode fast path.
+        """
+        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
+        self_kv_cache = self.kv_cache
+        # conv_state must be (..., dim, width-1) for the conv kernels.
+        # DS layout stores it that way directly; SD layout needs a transpose.
+        conv_state = (
+            self_kv_cache[0]
+            if is_conv_state_dim_first()
+            else self_kv_cache[0].transpose(-1, -2)
+        )
+        ssm_state = self_kv_cache[1]
+        num_actual_tokens = attn_metadata.num_actual_tokens
+
+        mixed_qkv = mixed_qkv[:num_actual_tokens]
+        b = b[:num_actual_tokens]
+        a = a[:num_actual_tokens]
+
+        conv_weights = self.conv1d.weight.view(
+            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+        )
+        mixed_qkv_non_spec = causal_conv1d_update(
+            mixed_qkv,
+            conv_state,
+            conv_weights,
+            self.conv1d.bias,
+            self.activation,
+            conv_state_indices=non_spec_state_indices_tensor[:num_actual_tokens],  # type: ignore[index]
+            validate_data=False,
+        )
+        out_buf = core_attn_out[:num_actual_tokens].unsqueeze(1)
+        fused_recurrent_gated_delta_rule_packed_decode(
+            mixed_qkv=mixed_qkv_non_spec,
+            a=a,
+            b=b,
+            A_log=self.A_log,
+            dt_bias=self.dt_bias,
+            scale=self.head_k_dim**-0.5,
+            initial_state=ssm_state,
+            out=out_buf,
+            ssm_state_indices=non_spec_state_indices_tensor[:num_actual_tokens],  # type: ignore[index]
+            use_qk_l2norm_in_kernel=True,
+        )
+        return
+
+
+def qwen_gdn_attention_core(
+    qkv_or_qkvz: torch.Tensor,
+    b_or_ba: torch.Tensor,
+    a_or_z_out: torch.Tensor,
+    core_attn_out: torch.Tensor,
+    fast_kernel: bool,
+    layer_name: LayerNameType,
+) -> None:
+    """Custom op dispatching to _forward_core or _forward_core_rocm.
+
+    Handles conv1d + recurrent attention only; input/output projections
+    are performed by the caller.
+
+    When ``fast_kernel=False`` (standard path):
+        qkv_or_qkvz is [q, k, v], b_or_ba is b, a_or_z_out is a (read-only).
+    When ``fast_kernel=True`` (AITER Triton fast path, ROCm only):
+        qkv_or_qkvz is [q, k, v, z], b_or_ba is [b, a], a_or_z_out is the
+        z output buffer (mutated in-place).
+
+    ``core_attn_out`` is always mutated in-place.
+    """
+    layer_name = _resolve_layer_name(layer_name)
+    forward_context: ForwardContext = get_forward_context()
+    self = forward_context.no_compile_layers[layer_name]
+    if fast_kernel:
+        self._forward_core_rocm(
+            qkvz=qkv_or_qkvz,
+            ba=b_or_ba,
+            z_out=a_or_z_out,
+            core_attn_out=core_attn_out,
+        )
+    else:
+        self._forward_core(
+            mixed_qkv=qkv_or_qkvz,
+            b=b_or_ba,
+            a=a_or_z_out,
+            core_attn_out=core_attn_out,
+        )
+
+
+def gdn_attention_core_fake(
+    qkv_or_qkvz: torch.Tensor,
+    b_or_ba: torch.Tensor,
+    a_or_z_out: torch.Tensor,
+    core_attn_out: torch.Tensor,
+    fast_kernel: bool,
+    layer_name: LayerNameType,
+) -> None:
+    """Fake implementation for torch.compile."""
+    return
+
+
+direct_register_custom_op(
+    op_name="qwen_gdn_attention_core",
+    op_func=qwen_gdn_attention_core,
+    mutates_args=["a_or_z_out", "core_attn_out"],
+    fake_impl=gdn_attention_core_fake,
+)
+
+
+@triton.jit
+def fused_gdn_gating_kernel(
+    g,
+    beta_output,
+    A_log,
+    a,
+    b,
+    dt_bias,
+    seq_len,
+    NUM_HEADS: tl.constexpr,
+    beta: tl.constexpr,
+    threshold: tl.constexpr,
+    BLK_HEADS: tl.constexpr,
+):
+    i_b, i_s, i_d = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+    head_off = i_d * BLK_HEADS + tl.arange(0, BLK_HEADS)
+    off = i_b * seq_len * NUM_HEADS + i_s * NUM_HEADS + head_off
+    mask = head_off < NUM_HEADS
+    blk_A_log = tl.load(A_log + head_off, mask=mask)
+    blk_a = tl.load(a + off, mask=mask)
+    blk_b = tl.load(b + off, mask=mask)
+    blk_bias = tl.load(dt_bias + head_off, mask=mask)
+    # If the model is loaded in fp16, without the .float() here, A might be -inf
+    x = blk_a.to(tl.float32) + blk_bias.to(tl.float32)
+    softplus_x = tl.where(
+        beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x
+    )
+    blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x
+    tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask)
+    # compute beta_output = sigmoid(b)
+    blk_beta_output = tl.sigmoid(blk_b.to(tl.float32))
+    tl.store(
+        beta_output + off, blk_beta_output.to(beta_output.dtype.element_ty), mask=mask
+    )
+
+
+def fused_gdn_gating(
+    A_log: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    dt_bias: torch.Tensor,
+    beta: float = 1.0,
+    threshold: float = 20.0,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Fused computation of g and beta for Gated Delta Net.
+    g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias)
+    beta_output = b.sigmoid()
+    TODO maybe use torch.compile to replace this triton kernel
+    """
+    batch, num_heads = a.shape
+    seq_len = 1
+    grid = (batch, seq_len, triton.cdiv(num_heads, 8))
+    g = torch.empty(1, batch, num_heads, dtype=torch.float32, device=a.device)
+    beta_output = torch.empty(1, batch, num_heads, dtype=b.dtype, device=b.device)
+    fused_gdn_gating_kernel[grid](
+        g,
+        beta_output,
+        A_log,
+        a,
+        b,
+        dt_bias,
+        seq_len,
+        num_heads,
+        beta,
+        threshold,
+        8,
+        num_warps=1,
+    )
+    return g, beta_output
diff --git a/vllm/model_executor/layers/mamba/gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn_linear_attn.py
deleted file mode 100644
index 55cd17fe5763..000000000000
--- a/vllm/model_executor/layers/mamba/gdn_linear_attn.py
+++ /dev/null
@@ -1,1046 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Inference-only Qwen3-Next/Qwen3.5 model."""
-
-import torch
-from einops import rearrange
-from torch import nn
-from transformers.activations import ACT2FN
-
-from vllm import envs
-from vllm.config import (
-    VllmConfig,
-    get_current_vllm_config,
-)
-from vllm.distributed import (
-    divide,
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-)
-from vllm.forward_context import ForwardContext, get_forward_context
-from vllm.logger import init_logger
-from vllm.model_executor.custom_op import CustomOp, PluggableLayer
-from vllm.model_executor.layers.fla.ops import (
-    chunk_gated_delta_rule as fla_chunk_gated_delta_rule,
-)
-from vllm.model_executor.layers.fla.ops import (
-    fused_recurrent_gated_delta_rule_packed_decode,
-    fused_sigmoid_gating_delta_rule_update,
-)
-from vllm.model_executor.layers.fla.ops.chunk import l2norm_fwd
-from vllm.model_executor.layers.layernorm import RMSNormGated
-from vllm.model_executor.layers.linear import (
-    ColumnParallelLinear,
-    MergedColumnParallelLinear,
-    RowParallelLinear,
-)
-from vllm.model_executor.layers.mamba.abstract import MambaBase
-from vllm.model_executor.layers.mamba.mamba_mixer2 import mamba_v2_sharded_weight_loader
-from vllm.model_executor.layers.mamba.mamba_utils import (
-    MambaStateDtypeCalculator,
-    MambaStateShapeCalculator,
-)
-from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
-    causal_conv1d_fn,
-    causal_conv1d_update,
-)
-from vllm.model_executor.layers.quantization import QuantizationConfig
-from vllm.model_executor.model_loader.weight_utils import (
-    sharded_weight_loader,
-)
-from vllm.model_executor.models.utils import extract_layer_index
-from vllm.model_executor.utils import set_weight_attrs
-from vllm.platforms import current_platform
-from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig
-from vllm.triton_utils import tl, triton
-from vllm.utils.torch_utils import direct_register_custom_op
-from vllm.v1.attention.backend import AttentionMetadata
-from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
-
-logger = init_logger(__name__)
-
-
-def fi_chunk_gated_delta_rule(
-    q: torch.Tensor,
-    k: torch.Tensor,
-    v: torch.Tensor,
-    g: torch.Tensor,
-    beta: torch.Tensor,
-    initial_state: torch.Tensor,
-    output_final_state: bool,
-    cu_seqlens: torch.Tensor | None = None,
-    use_qk_l2norm_in_kernel: bool = True,
-):
-    from flashinfer.gdn_prefill import (
-        chunk_gated_delta_rule as chunk_gated_delta_rule_fi,
-    )
-
-    if use_qk_l2norm_in_kernel:
-        q = l2norm_fwd(q)
-        k = l2norm_fwd(k)
-
-    # use flashinfer implementation
-    q = q.squeeze(0).contiguous()
-    k = k.squeeze(0).contiguous()
-    v = v.squeeze(0).contiguous()
-
-    g = g.squeeze(0).contiguous()
-    beta = beta.squeeze(0).contiguous()
-    fi_state = initial_state.to(torch.float32)
-    fi_g = g.to(torch.float32)
-    fi_beta = beta.to(torch.float32)
-    result = chunk_gated_delta_rule_fi(
-        q=q,
-        k=k,
-        v=v,
-        g=torch.exp(fi_g),
-        beta=fi_beta,
-        initial_state=fi_state,
-        output_final_state=output_final_state,
-        cu_seqlens=cu_seqlens,
-    )
-    # FlashInfer returns (output, state) when output_final_state=True,
-    # or just output when output_final_state=False.
-    # Unsqueeze back to 4D (1, L, H, D) to match fla output format
-    if output_final_state:
-        output, final_state = result
-        return output.unsqueeze(0), final_state
-    else:
-        return result.unsqueeze(0), None
-
-
-@CustomOp.register("chunk_gated_delta_rule")
-class ChunkGatedDeltaRule(CustomOp):
-    def __init__(self) -> None:
-        super().__init__()
-        backend_cfg = get_current_vllm_config().additional_config.get(
-            "gdn_prefill_backend", "auto"
-        )
-        backend = str(backend_cfg).strip().lower()
-
-        supports_flashinfer = (
-            current_platform.is_cuda() and current_platform.is_device_capability(90)
-        )
-
-        if backend == "flashinfer":
-            use_flashinfer = supports_flashinfer
-            if not use_flashinfer:
-                logger.warning_once(
-                    "GDN prefill backend 'flashinfer' is selected but "
-                    "cannot use this kernel on the current platform. "
-                    "Falling back to Triton/FLA."
-                )
-        elif backend == "triton":
-            use_flashinfer = False
-        else:
-            use_flashinfer = supports_flashinfer
-
-        if use_flashinfer:
-            logger.info_once("Using FlashInfer GDN prefill kernel", scope="local")
-            logger.info_once(
-                "FlashInfer GDN prefill kernel is JIT-compiled; first run may "
-                "take a while to compile. Set `--gdn-prefill-backend triton` to "
-                "avoid JIT compile time.",
-                scope="local",
-            )
-        else:
-            logger.info_once("Using Triton/FLA GDN prefill kernel", scope="local")
-
-        self._forward_method = (
-            self.forward_cuda if use_flashinfer else self.forward_native
-        )
-
-    def forward_cuda(
-        self,
-        q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
-        g: torch.Tensor,
-        beta: torch.Tensor,
-        initial_state: torch.Tensor,
-        output_final_state: bool,
-        cu_seqlens: torch.Tensor | None = None,
-        use_qk_l2norm_in_kernel: bool = True,
-    ):
-        return fi_chunk_gated_delta_rule(
-            q=q,
-            k=k,
-            v=v,
-            g=g,
-            beta=beta,
-            initial_state=initial_state,
-            output_final_state=output_final_state,
-            cu_seqlens=cu_seqlens,
-            use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
-        )
-
-    def forward_native(
-        self,
-        q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
-        g: torch.Tensor,
-        beta: torch.Tensor,
-        initial_state: torch.Tensor,
-        output_final_state: bool,
-        cu_seqlens: torch.Tensor | None = None,
-        use_qk_l2norm_in_kernel: bool = True,
-    ):
-        return fla_chunk_gated_delta_rule(
-            q=q,
-            k=k,
-            v=v,
-            g=g,
-            beta=beta,
-            initial_state=initial_state,
-            output_final_state=output_final_state,
-            cu_seqlens=cu_seqlens,
-            use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
-        )
-
-
-@PluggableLayer.register("gated_delta_net_attention")
-class GatedDeltaNetAttention(PluggableLayer, MambaBase):
-    @property
-    def mamba_type(self) -> str:
-        return "gdn_attention"
-
-    def get_state_dtype(self) -> tuple[torch.dtype, torch.dtype]:
-        return MambaStateDtypeCalculator.gated_delta_net_state_dtype(
-            self.model_config.dtype,
-            self.cache_config.mamba_cache_dtype,
-            self.cache_config.mamba_ssm_cache_dtype,
-        )
-
-    def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]:
-        return MambaStateShapeCalculator.gated_delta_net_state_shape(
-            self.tp_size,
-            self.num_k_heads,
-            self.num_v_heads,
-            self.head_k_dim,
-            self.head_v_dim,
-            self.conv_kernel_size,
-            self.num_spec,
-        )
-
-    def __init__(
-        self,
-        config: Qwen3NextConfig,
-        vllm_config: VllmConfig,
-        prefix: str = "",
-        create_in_proj_qkvz: bool = True,
-        gqa_interleaved_layout=False,
-    ) -> None:
-        super().__init__()
-        self.tp_size = get_tensor_model_parallel_world_size()
-        self.tp_rank = get_tensor_model_parallel_rank()
-        self.hidden_size = config.hidden_size
-        self.num_v_heads = config.linear_num_value_heads
-        self.num_k_heads = config.linear_num_key_heads
-        self.head_k_dim = config.linear_key_head_dim
-        self.head_v_dim = config.linear_value_head_dim
-        self.key_dim = self.head_k_dim * self.num_k_heads
-        self.value_dim = self.head_v_dim * self.num_v_heads
-
-        self.conv_kernel_size = config.linear_conv_kernel_dim
-        self.layer_idx = extract_layer_index(prefix)
-        self.activation = config.hidden_act
-        self.act = ACT2FN[config.hidden_act]
-        self.layer_norm_epsilon = config.rms_norm_eps
-        self.prefix = prefix
-        self.config = config
-        self.model_config = vllm_config.model_config
-        self.cache_config = vllm_config.cache_config
-        quant_config = vllm_config.quant_config
-        self.speculative_config = vllm_config.speculative_config
-        self.num_spec = (
-            self.speculative_config.num_speculative_tokens
-            if self.speculative_config
-            else 0
-        )
-        self.gqa_interleaved_layout = gqa_interleaved_layout
-
-        # QKV
-        self.conv_dim = self.key_dim * 2 + self.value_dim
-        self.conv1d = ColumnParallelLinear(
-            input_size=self.conv_kernel_size,
-            output_size=self.conv_dim,
-            bias=False,
-            prefix=f"{prefix}.conv1d",
-        )
-        self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1)
-
-        # projection of the input hidden states
-        # Qwen3-Next and Qwen3.5 has a different qkv_proj layout,
-        # we need to create qkvz_proj adaptively here.
-        # When create_in_proj_qkvz is False (e.g. LoRA enabled in Qwen3.5),
-        # in_proj_qkv and in_proj_z are created separately instead.
-        if create_in_proj_qkvz:
-            self.in_proj_qkvz = self.create_qkvz_proj(
-                hidden_size=self.hidden_size,
-                key_dim=self.key_dim,
-                value_dim=self.value_dim,
-                quant_config=quant_config,
-                prefix=f"{prefix}.in_proj_qkvz",
-            )
-        else:
-            # LoRA case (Qwen3.5 only): keep q/k/v and z as separate modules
-            # so that LoRA adapters can be applied independently.
-            self.in_proj_qkv = MergedColumnParallelLinear(
-                input_size=self.hidden_size,
-                output_sizes=[self.key_dim, self.key_dim, self.value_dim],
-                bias=False,
-                quant_config=quant_config,
-                prefix=f"{prefix}.in_proj_qkv",
-            )
-            self.in_proj_z = ColumnParallelLinear(
-                input_size=self.hidden_size,
-                output_size=self.value_dim,
-                bias=False,
-                quant_config=quant_config,
-                prefix=f"{prefix}.in_proj_z",
-            )
-        # ba_proj doesn't support blockwise fp8 quantization.
-        # Qwen3-Next and Qwen3.5 have different in_proj_ba checkpoint
-        # layouts, so we use a factory method to create the projection.
-        self.in_proj_ba = self.create_ba_proj(
-            hidden_size=self.hidden_size,
-            num_v_heads=self.num_v_heads,
-            quant_config=quant_config,
-            prefix=f"{prefix}.in_proj_ba",
-        )
-
-        query_key_settings = (self.key_dim, 0, False)
-        value_settings = (self.value_dim, 0, False)
-
-        self.conv1d.weight.weight_loader = mamba_v2_sharded_weight_loader(
-            [
-                query_key_settings,
-                query_key_settings,
-                value_settings,
-            ],
-            self.tp_size,
-            self.tp_rank,
-        )
-
-        # selective projection used to make dt, B and C input dependent
-
-        # time step projection (discretization)
-        # instantiate once and copy inv_dt in init_weights of PretrainedModel
-        self.dt_bias = nn.Parameter(
-            torch.ones(self.num_v_heads // self.tp_size),
-        )
-        self.A_log = nn.Parameter(
-            torch.empty(
-                divide(self.num_v_heads, self.tp_size),
-                dtype=torch.float32,
-            )
-        )
-
-        set_weight_attrs(self.A_log, {"weight_loader": sharded_weight_loader(0)})
-        set_weight_attrs(self.dt_bias, {"weight_loader": sharded_weight_loader(0)})
-
-        self.norm = RMSNormGated(
-            self.head_v_dim,
-            eps=self.layer_norm_epsilon,
-            group_size=None,
-            norm_before_gate=True,
-            device=current_platform.current_device(),
-        )
-
-        self.out_proj = RowParallelLinear(
-            self.value_dim,
-            self.hidden_size,
-            bias=False,
-            input_is_parallel=True,
-            quant_config=quant_config,
-            prefix=f"{prefix}.out_proj",
-        )
-
-        self.chunk_gated_delta_rule = ChunkGatedDeltaRule()
-        self.enable_packed_recurrent_decode = (
-            envs.VLLM_ENABLE_FLA_PACKED_RECURRENT_DECODE
-        )
-
-        compilation_config = get_current_vllm_config().compilation_config
-        if prefix in compilation_config.static_forward_context:
-            raise ValueError(f"Duplicate layer name: {prefix}")
-        compilation_config.static_forward_context[prefix] = self
-
-    def create_qkvz_proj(
-        self,
-        hidden_size: int,
-        key_dim: int,
-        value_dim: int,
-        quant_config: QuantizationConfig | None,
-        prefix: str,
-    ) -> MergedColumnParallelLinear:
-        # When gqa_interleaved_layout=True (Qwen3-Next), qkvz weights are
-        # stored as a single fused tensor with interleaved GQA layout, so we
-        # use one output shard to preserve the interleaving across TP ranks.
-        # When gqa_interleaved_layout=False (Qwen3.5), the checkpoint has
-        # separate q, k, v, z weights, so we use 4 independent output sizes.
-        output_sizes = (
-            [sum((key_dim, key_dim, value_dim, value_dim))]
-            if self.gqa_interleaved_layout
-            else [key_dim, key_dim, value_dim, value_dim]
-        )
-        return MergedColumnParallelLinear(
-            input_size=hidden_size,
-            output_sizes=output_sizes,
-            bias=False,
-            quant_config=quant_config,
-            prefix=prefix,
-        )
-
-    def create_ba_proj(
-        self,
-        hidden_size: int,
-        num_v_heads: int,
-        quant_config: QuantizationConfig | None,
-        prefix: str,
-    ) -> MergedColumnParallelLinear:
-        # When gqa_interleaved_layout=True (Qwen3-Next), in_proj_ba is stored
-        # as a single fused weight [b_g0, a_g0, b_g1, a_g1, ...] interleaved
-        # by key-head group; a single output shard preserves this across TP.
-        # When gqa_interleaved_layout=False (Qwen3.5), in_proj_b and in_proj_a
-        # are separate checkpoint weights, so we use 2 independent output sizes.
-        output_sizes = (
-            [num_v_heads * 2] if self.gqa_interleaved_layout else [num_v_heads] * 2
-        )
-        return MergedColumnParallelLinear(
-            input_size=hidden_size,
-            output_sizes=output_sizes,
-            bias=False,
-            quant_config=quant_config,
-            prefix=prefix,
-        )
-
-    def fix_query_key_value_ordering(
-        self,
-        mixed_qkvz: torch.Tensor,
-        mixed_ba: torch.Tensor,
-    ):
-        """
-        Derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
-        """
-        new_tensor_shape_qkvz = mixed_qkvz.size()[:-1] + (
-            self.num_k_heads // self.tp_size,
-            (
-                self.head_k_dim
-                + self.head_k_dim
-                + (self.head_v_dim + self.head_v_dim)
-                * self.num_v_heads
-                // self.num_k_heads
-            ),
-        )
-        new_tensor_shape_ba = mixed_ba.size()[:-1] + (
-            self.num_k_heads // self.tp_size,
-            2 * self.num_v_heads // self.num_k_heads,
-        )
-
-        mixed_qkvz = mixed_qkvz.view(*new_tensor_shape_qkvz)
-        mixed_ba = mixed_ba.view(*new_tensor_shape_ba)
-
-        split_arg_list_qkvz = [
-            self.head_k_dim,
-            self.head_k_dim,
-            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
-            (self.num_v_heads // self.num_k_heads * self.head_v_dim),
-        ]
-        split_arg_list_ba = [
-            self.num_v_heads // self.num_k_heads,
-            self.num_v_heads // self.num_k_heads,
-        ]
-
-        # [b, sq, ng, (hn + hn + np/ng * hn + np/ng + np/ng)]
-        # --> [b, sq, ng, hn], [b, sq, ng, hn], [b, sq, ng, np/ng * hn],
-        #  [b, sq, ng, np/ng * hn], [b, sq, ng, np/ng], [b, sq, ng, np/ng]
-        (query, key, value, z) = torch.split(mixed_qkvz, split_arg_list_qkvz, dim=2)
-        (b, a) = torch.split(mixed_ba, split_arg_list_ba, dim=2)
-
-        # [b, sq, ng, np/ng * hn] -> [b, sq, np, hn]
-        value = value.reshape(value.size(0), -1, self.head_v_dim)
-        z = z.reshape(z.size(0), -1, self.head_v_dim)
-        b = b.reshape(b.size(0), self.num_v_heads // self.tp_size)
-        a = a.reshape(a.size(0), self.num_v_heads // self.tp_size)
-
-        return query, key, value, z, b, a
-
-    def rearrange_mixed_qkv(self, mixed_qkv):
-        if mixed_qkv is None:
-            return None, None, None
-        query, key, value = torch.split(
-            mixed_qkv,
-            [
-                self.key_dim // self.tp_size,
-                self.key_dim // self.tp_size,
-                self.value_dim // self.tp_size,
-            ],
-            dim=-1,
-        )
-        query, key = map(
-            lambda x: rearrange(x, "l (h d) -> 1 l h d", d=self.head_k_dim),
-            (query, key),
-        )
-        value = rearrange(value, "l (h d) -> 1 l h d", d=self.head_v_dim)
-        return query.contiguous(), key.contiguous(), value.contiguous()
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        output: torch.Tensor,
-    ):
-        """
-        Forward pass with three parts:
-        1. Input projection
-        2. Core attention (custom op)
-        3. Output projection
-        """
-        num_tokens = hidden_states.size(0)
-        # ============================================================
-        # Part 1: Input Projection
-        # ============================================================
-        if hasattr(self, "in_proj_qkv"):
-            # LoRA path (Qwen3.5 only): separate in_proj_qkv and in_proj_z
-            mixed_qkv, _ = self.in_proj_qkv(hidden_states)
-            ba, _ = self.in_proj_ba(hidden_states)
-            z, _ = self.in_proj_z(hidden_states)
-            z = z.reshape(z.size(0), -1, self.head_v_dim)
-            b, a = ba.chunk(2, dim=-1)
-            b = b.contiguous()
-            a = a.contiguous()
-        else:
-            mixed_qkvz, _ = self.in_proj_qkvz(hidden_states)
-            ba, _ = self.in_proj_ba(hidden_states)
-
-            if self.gqa_interleaved_layout:
-                # Qwen3-Next: unpack the interleaved GQA layout
-                query, key, value, z, b, a = self.fix_query_key_value_ordering(
-                    mixed_qkvz, ba
-                )
-                query, key, value = map(
-                    lambda x: rearrange(x, "l p d -> l (p d)"), (query, key, value)
-                )
-                mixed_qkv = torch.cat((query, key, value), dim=-1)
-            else:
-                # Qwen3.5: weights are already in [q, k, v, z] and [b, a] order
-                qkv_size = (self.key_dim * 2 + self.value_dim) // self.tp_size
-                z_size = self.value_dim // self.tp_size
-                mixed_qkv, z = mixed_qkvz.split([qkv_size, z_size], dim=-1)
-                z = z.reshape(z.size(0), -1, self.head_v_dim)
-                b, a = ba.chunk(2, dim=-1)
-                b = b.contiguous()
-                a = a.contiguous()
-
-        # ============================================================
-        # Part 2: Core Attention (Custom Op)
-        # ============================================================
-        # Note: we should not use torch.empty here like other attention backends,
-        # see discussions in https://github.com/vllm-project/vllm/pull/28182
-        core_attn_out = torch.zeros(
-            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
-            dtype=hidden_states.dtype,
-            device=hidden_states.device,
-        )
-
-        torch.ops.vllm.gdn_attention_core(
-            mixed_qkv,
-            b,
-            a,
-            core_attn_out,
-            self.prefix,
-        )
-
-        # ============================================================
-        # Part 3: Output Projection
-        # ============================================================
-        z_shape_og = z.shape
-        # Reshape input data into 2D tensor
-        core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
-        z = z.reshape(-1, z.shape[-1])
-        core_attn_out = self.norm(core_attn_out, z)
-        core_attn_out = core_attn_out.reshape(z_shape_og)
-        core_attn_out = rearrange(core_attn_out, "... h d -> ... (h d)")
-        output[:num_tokens], _ = self.out_proj(core_attn_out)
-
-    def _warmup_prefill_kernels(self, mixed_qkv: torch.Tensor) -> None:
-        """Warm up GDN prefill kernels during V1 profiling.
-
-        During V1 profile runs, ``_forward_core`` returns early because
-        ``attn_metadata`` is ``None``, so the autotuned kernels used by
-        ``chunk_gated_delta_rule`` (e.g. ``solve_tril``,
-        ``chunk_scaled_dot_kkt``) are never invoked.  After profiling,
-        vLLM allocates KV cache using most of the remaining GPU memory.
-        When the first real inference triggers the autotuner it OOMs
-        because there is not enough memory left for benchmarking.
-
-        This method runs minimal forward passes through
-        ``chunk_gated_delta_rule`` with small dummy tensors to force
-        autotuning while GPU memory is still plentiful.  The autotuner
-        results are cached globally, so only the first layer incurs
-        actual benchmarking cost.
-
-        Most kernels use a fixed ``BT = chunk_size`` (64), but
-        ``chunk_fwd_kernel_o`` recomputes ``BT`` from the sequence
-        length: ``min(64, max(16, next_power_of_2(T)))``.  Since ``BT``
-        is part of its autotune key, we run warmup passes with T = 16,
-        32, and 64 to cover all possible ``BT`` values.
-
-        The decode path uses ``fused_sigmoid_gating_delta_rule_update``
-        which has fixed kernel parameters (no autotuning), so only the
-        prefill (chunked) path needs warming up.
-        """
-        if hasattr(self, "_prefill_kernels_warmed_up"):
-            return
-        self._prefill_kernels_warmed_up = True
-
-        device = mixed_qkv.device
-        dtype = mixed_qkv.dtype
-        num_k_heads = self.num_k_heads // self.tp_size
-        num_v_heads = self.num_v_heads // self.tp_size
-        _, state_dtype = self.get_state_dtype()
-
-        # Run warmup for each possible BT value of chunk_fwd_kernel_o:
-        #   T=16 → BT=16, T=32 → BT=32, T=64 → BT=64.
-        # Other kernels always use BT=chunk_size(64), so their autotune
-        # cache is populated on the first pass and reused thereafter.
-        for T in (16, 32, 64):
-            q = torch.randn(
-                1, T, num_k_heads, self.head_k_dim, device=device, dtype=dtype
-            )
-            k = torch.randn(
-                1, T, num_k_heads, self.head_k_dim, device=device, dtype=dtype
-            )
-            v = torch.randn(
-                1, T, num_v_heads, self.head_v_dim, device=device, dtype=dtype
-            )
-            # NOTE: g and beta must have the same dtypes as during
-            # inference, so we construct them with the same function
-            # (fused_gdn_gating). dummy_a and dummy_b are throwaway
-            # inputs required by that function.
-            dummy_a = torch.randn(T, num_v_heads, device=device, dtype=dtype)
-            dummy_b = torch.randn(T, num_v_heads, device=device, dtype=dtype)
-            g, beta = fused_gdn_gating(self.A_log, dummy_a, dummy_b, self.dt_bias)
-            state = torch.zeros(
-                1,
-                num_v_heads,
-                self.head_v_dim,
-                self.head_k_dim,
-                device=device,
-                dtype=state_dtype,
-            )
-            cu_seqlens = torch.tensor([0, T], device=device, dtype=torch.int32)
-
-            try:
-                self.chunk_gated_delta_rule(
-                    q=q,
-                    k=k,
-                    v=v,
-                    g=g,
-                    beta=beta,
-                    initial_state=state,
-                    output_final_state=True,
-                    cu_seqlens=cu_seqlens,
-                    use_qk_l2norm_in_kernel=True,
-                )
-            except Exception:
-                logger.warning(
-                    "GDN prefill kernel warmup (T=%d) failed for "
-                    "layer %s. First inference may OOM due to "
-                    "autotuner.",
-                    T,
-                    self.prefix,
-                    exc_info=True,
-                )
-            else:
-                logger.debug(
-                    "GDN prefill kernel warmup (T=%d) completed for layer %s",
-                    T,
-                    self.prefix,
-                )
-            finally:
-                del q, k, v, dummy_a, dummy_b, g, beta, state, cu_seqlens
-
-        torch.accelerator.empty_cache()
-
-    def _forward_core(
-        self,
-        mixed_qkv: torch.Tensor,
-        b: torch.Tensor,
-        a: torch.Tensor,
-        core_attn_out: torch.Tensor,
-    ):
-        forward_context = get_forward_context()
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
-
-        if attn_metadata is None:
-            # V1 profile run — warm up prefill kernels so that
-            # autotuning completes before KV cache allocation.
-            self._warmup_prefill_kernels(mixed_qkv)
-            return
-
-        assert isinstance(attn_metadata, dict)
-        attn_metadata = attn_metadata[self.prefix]
-        assert isinstance(attn_metadata, GDNAttentionMetadata)
-
-        if (
-            self.enable_packed_recurrent_decode
-            and attn_metadata.spec_sequence_masks is None
-            and attn_metadata.num_prefills == 0
-            and attn_metadata.num_decodes > 0
-        ):
-            return self._forward_core_decode_non_spec(
-                mixed_qkv=mixed_qkv,
-                b=b,
-                a=a,
-                core_attn_out=core_attn_out,
-                attn_metadata=attn_metadata,
-            )
-
-        has_initial_state = attn_metadata.has_initial_state
-        spec_query_start_loc = attn_metadata.spec_query_start_loc
-        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
-        spec_sequence_masks = attn_metadata.spec_sequence_masks
-        spec_token_indx = attn_metadata.spec_token_indx
-        non_spec_token_indx = attn_metadata.non_spec_token_indx
-        spec_state_indices_tensor = attn_metadata.spec_state_indices_tensor  # noqa: E501
-        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
-        self_kv_cache = self.kv_cache
-        conv_state = self_kv_cache[0].transpose(-1, -2)
-        ssm_state = self_kv_cache[1]
-        num_actual_tokens = attn_metadata.num_actual_tokens
-        num_accepted_tokens = attn_metadata.num_accepted_tokens
-
-        mixed_qkv = mixed_qkv[:num_actual_tokens]
-        b = b[:num_actual_tokens]
-        a = a[:num_actual_tokens]
-
-        # 1. Convolution sequence transformation
-        conv_weights = self.conv1d.weight.view(
-            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
-        )
-
-        if spec_sequence_masks is not None:
-            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
-                mixed_qkv_spec = mixed_qkv
-                mixed_qkv_non_spec = None
-            else:
-                mixed_qkv_spec = mixed_qkv.index_select(0, spec_token_indx)
-                mixed_qkv_non_spec = mixed_qkv.index_select(0, non_spec_token_indx)
-        else:
-            mixed_qkv_spec = None
-            mixed_qkv_non_spec = mixed_qkv
-
-        # 1.1: Process the multi-query part
-        if spec_sequence_masks is not None:
-            mixed_qkv_spec = causal_conv1d_update(
-                mixed_qkv_spec,
-                conv_state,
-                conv_weights,
-                self.conv1d.bias,
-                self.activation,
-                conv_state_indices=spec_state_indices_tensor[:, 0][
-                    : attn_metadata.num_spec_decodes
-                ],
-                num_accepted_tokens=num_accepted_tokens,
-                query_start_loc=spec_query_start_loc,
-                max_query_len=spec_state_indices_tensor.size(-1),
-                validate_data=False,
-            )
-
-        # 1.2: Process the remaining part
-        if attn_metadata.num_prefills > 0:
-            assert mixed_qkv_non_spec is not None
-            mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1)
-            # - "cache_indices" updates the conv_state cache in positions
-            #   pointed to by "state_indices_tensor"
-            mixed_qkv_non_spec = causal_conv1d_fn(
-                mixed_qkv_non_spec_T,
-                conv_weights,
-                self.conv1d.bias,
-                activation=self.activation,
-                conv_states=conv_state,
-                has_initial_state=has_initial_state,
-                cache_indices=non_spec_state_indices_tensor,
-                query_start_loc=non_spec_query_start_loc,
-                metadata=attn_metadata,
-            ).transpose(0, 1)
-        elif attn_metadata.num_decodes > 0:
-            assert mixed_qkv_non_spec is not None
-            mixed_qkv_non_spec = causal_conv1d_update(
-                mixed_qkv_non_spec,
-                conv_state,
-                conv_weights,
-                self.conv1d.bias,
-                self.activation,
-                conv_state_indices=non_spec_state_indices_tensor[
-                    : attn_metadata.num_actual_tokens
-                ],
-                validate_data=True,
-            )
-        else:
-            mixed_qkv_non_spec = None
-
-        query_spec, key_spec, value_spec = self.rearrange_mixed_qkv(mixed_qkv_spec)
-        query_non_spec, key_non_spec, value_non_spec = self.rearrange_mixed_qkv(
-            mixed_qkv_non_spec
-        )
-
-        if attn_metadata.num_prefills > 0:
-            g, beta = fused_gdn_gating(self.A_log, a, b, self.dt_bias)
-            if spec_sequence_masks is not None:
-                g_non_spec = g.index_select(1, non_spec_token_indx)
-                beta_non_spec = beta.index_select(1, non_spec_token_indx)
-            else:
-                g_non_spec = g
-                beta_non_spec = beta
-        else:
-            g_non_spec = None
-            beta_non_spec = None
-
-        # 2. Recurrent attention
-
-        # 2.1: Process the multi-query part
-        if spec_sequence_masks is not None:
-            core_attn_out_spec, last_recurrent_state = (
-                fused_sigmoid_gating_delta_rule_update(
-                    A_log=self.A_log,
-                    a=a,
-                    b=b,
-                    dt_bias=self.dt_bias,
-                    q=query_spec,
-                    k=key_spec,
-                    v=value_spec,
-                    initial_state=ssm_state,
-                    inplace_final_state=True,
-                    cu_seqlens=spec_query_start_loc[
-                        : attn_metadata.num_spec_decodes + 1
-                    ],
-                    ssm_state_indices=spec_state_indices_tensor,
-                    num_accepted_tokens=num_accepted_tokens,
-                    use_qk_l2norm_in_kernel=True,
-                )
-            )
-        else:
-            core_attn_out_spec, last_recurrent_state = None, None
-
-        # 2.2: Process the remaining part
-        if attn_metadata.num_prefills > 0:
-            initial_state = ssm_state[non_spec_state_indices_tensor].contiguous()
-            initial_state[~has_initial_state, ...] = 0
-            (
-                core_attn_out_non_spec,
-                last_recurrent_state,
-            ) = self.chunk_gated_delta_rule(
-                q=query_non_spec,
-                k=key_non_spec,
-                v=value_non_spec,
-                g=g_non_spec,
-                beta=beta_non_spec,
-                initial_state=initial_state,
-                output_final_state=True,
-                cu_seqlens=non_spec_query_start_loc,
-                use_qk_l2norm_in_kernel=True,
-            )
-            # Init cache
-            ssm_state[non_spec_state_indices_tensor] = last_recurrent_state.to(
-                ssm_state.dtype
-            )
-        elif attn_metadata.num_decodes > 0:
-            core_attn_out_non_spec, last_recurrent_state = (
-                fused_sigmoid_gating_delta_rule_update(
-                    A_log=self.A_log,
-                    a=a,
-                    b=b,
-                    dt_bias=self.dt_bias,
-                    q=query_non_spec,
-                    k=key_non_spec,
-                    v=value_non_spec,
-                    initial_state=ssm_state,
-                    inplace_final_state=True,
-                    cu_seqlens=non_spec_query_start_loc[
-                        : attn_metadata.num_decodes + 1
-                    ],
-                    ssm_state_indices=non_spec_state_indices_tensor,
-                    use_qk_l2norm_in_kernel=True,
-                )
-            )
-        else:
-            core_attn_out_non_spec, last_recurrent_state = None, None
-
-        # 3. Merge core attention output
-        if spec_sequence_masks is not None and core_attn_out_non_spec is not None:
-            merged_out = torch.empty(
-                (1, num_actual_tokens, *core_attn_out_spec.shape[2:]),
-                dtype=core_attn_out_non_spec.dtype,
-                device=core_attn_out_non_spec.device,
-            )
-            merged_out.index_copy_(1, spec_token_indx, core_attn_out_spec)
-            merged_out.index_copy_(1, non_spec_token_indx, core_attn_out_non_spec)
-            core_attn_out[:num_actual_tokens] = merged_out.squeeze(0)
-        elif spec_sequence_masks is not None:
-            core_attn_out[:num_actual_tokens] = core_attn_out_spec.squeeze(0)
-        else:
-            core_attn_out[:num_actual_tokens] = core_attn_out_non_spec.squeeze(0)
-
-    def _forward_core_decode_non_spec(
-        self,
-        mixed_qkv: torch.Tensor,
-        b: torch.Tensor,
-        a: torch.Tensor,
-        core_attn_out: torch.Tensor,
-        attn_metadata: GDNAttentionMetadata,
-    ):
-        """
-        Core attention computation with a packed non-spec decode fast path.
-        """
-        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor  # noqa: E501
-        self_kv_cache = self.kv_cache
-        conv_state = self_kv_cache[0].transpose(-1, -2)
-        ssm_state = self_kv_cache[1]
-        num_actual_tokens = attn_metadata.num_actual_tokens
-
-        mixed_qkv = mixed_qkv[:num_actual_tokens]
-        b = b[:num_actual_tokens]
-        a = a[:num_actual_tokens]
-
-        conv_weights = self.conv1d.weight.view(
-            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
-        )
-        mixed_qkv_non_spec = causal_conv1d_update(
-            mixed_qkv,
-            conv_state,
-            conv_weights,
-            self.conv1d.bias,
-            self.activation,
-            conv_state_indices=non_spec_state_indices_tensor[:num_actual_tokens],
-            validate_data=False,
-        )
-        out_buf = core_attn_out[:num_actual_tokens].unsqueeze(1)
-        fused_recurrent_gated_delta_rule_packed_decode(
-            mixed_qkv=mixed_qkv_non_spec,
-            a=a,
-            b=b,
-            A_log=self.A_log,
-            dt_bias=self.dt_bias,
-            scale=self.head_k_dim**-0.5,
-            initial_state=ssm_state,
-            out=out_buf,
-            ssm_state_indices=non_spec_state_indices_tensor[:num_actual_tokens],
-            use_qk_l2norm_in_kernel=True,
-        )
-        return
-
-
-def gdn_attention_core(
-    mixed_qkv: torch.Tensor,
-    b: torch.Tensor,
-    a: torch.Tensor,
-    core_attn_out: torch.Tensor,
-    layer_name: str,
-) -> None:
-    """
-    Custom op for the core attention computation.
-    Only handles the convolution + recurrent attention part.
-    Input/output projections are handled outside this op.
-    """
-    forward_context: ForwardContext = get_forward_context()
-    self = forward_context.no_compile_layers[layer_name]
-    self._forward_core(
-        mixed_qkv=mixed_qkv,
-        b=b,
-        a=a,
-        core_attn_out=core_attn_out,
-    )
-
-
-def gdn_attention_core_fake(
-    mixed_qkv: torch.Tensor,
-    b: torch.Tensor,
-    a: torch.Tensor,
-    core_attn_out: torch.Tensor,
-    layer_name: str,
-) -> None:
-    """Fake implementation for torch.compile."""
-    return
-
-
-direct_register_custom_op(
-    op_name="gdn_attention_core",
-    op_func=gdn_attention_core,
-    mutates_args=["core_attn_out"],
-    fake_impl=gdn_attention_core_fake,
-)
-
-
-@triton.jit
-def fused_gdn_gating_kernel(
-    g,
-    beta_output,
-    A_log,
-    a,
-    b,
-    dt_bias,
-    seq_len,
-    NUM_HEADS: tl.constexpr,
-    beta: tl.constexpr,
-    threshold: tl.constexpr,
-    BLK_HEADS: tl.constexpr,
-):
-    i_b, i_s, i_d = tl.program_id(0), tl.program_id(1), tl.program_id(2)
-    head_off = i_d * BLK_HEADS + tl.arange(0, BLK_HEADS)
-    off = i_b * seq_len * NUM_HEADS + i_s * NUM_HEADS + head_off
-    mask = head_off < NUM_HEADS
-    blk_A_log = tl.load(A_log + head_off, mask=mask)
-    blk_a = tl.load(a + off, mask=mask)
-    blk_b = tl.load(b + off, mask=mask)
-    blk_bias = tl.load(dt_bias + head_off, mask=mask)
-    # If the model is loaded in fp16, without the .float() here, A might be -inf
-    x = blk_a.to(tl.float32) + blk_bias.to(tl.float32)
-    softplus_x = tl.where(
-        beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x
-    )
-    blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x
-    tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask)
-    # compute beta_output = sigmoid(b)
-    blk_beta_output = tl.sigmoid(blk_b.to(tl.float32))
-    tl.store(
-        beta_output + off, blk_beta_output.to(beta_output.dtype.element_ty), mask=mask
-    )
-
-
-def fused_gdn_gating(
-    A_log: torch.Tensor,
-    a: torch.Tensor,
-    b: torch.Tensor,
-    dt_bias: torch.Tensor,
-    beta: float = 1.0,
-    threshold: float = 20.0,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """
-    Fused computation of g and beta for Gated Delta Net.
-    g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias)
-    beta_output = b.sigmoid()
-    TODO maybe use torch.compile to replace this triton kernel
-    """
-    batch, num_heads = a.shape
-    seq_len = 1
-    grid = (batch, seq_len, triton.cdiv(num_heads, 8))
-    g = torch.empty(1, batch, num_heads, dtype=torch.float32, device=a.device)
-    beta_output = torch.empty(1, batch, num_heads, dtype=b.dtype, device=b.device)
-    fused_gdn_gating_kernel[grid](
-        g,
-        beta_output,
-        A_log,
-        a,
-        b,
-        dt_bias,
-        seq_len,
-        num_heads,
-        beta,
-        threshold,
-        8,
-        num_warps=1,
-    )
-    return g, beta_output
diff --git a/vllm/model_executor/layers/mamba/lamport_workspace.py b/vllm/model_executor/layers/mamba/lamport_workspace.py
new file mode 100644
index 000000000000..afae19c75feb
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/lamport_workspace.py
@@ -0,0 +1,302 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import array
+import contextlib
+import struct
+import sys
+import threading
+
+import torch
+
+try:
+    from cuda.bindings import runtime as cudart
+except ImportError:
+    from cuda import cudart
+
+_ALIGN = 1 << 21  # 2 MiB — CUDA IPC allocation alignment
+
+
+# ---------------------------------------------------------------------------
+# CUDA helpers
+# ---------------------------------------------------------------------------
+
+
+def _check(error):
+    """Raise on CUDA runtime error."""
+    success = getattr(cudart.cudaError_t, "cudaSuccess", None) or cudart.cudaError_t(0)
+    if error != success:
+        raise RuntimeError(f"CUDA runtime error: {error}")
+
+
+def _cuda_malloc(size: int):
+    aligned = ((size + _ALIGN - 1) >> 21) << 21
+    err, ptr = cudart.cudaMalloc(aligned)
+    _check(err)
+    return ptr, aligned
+
+
+def _cuda_free(ptr: int):
+    if ptr:
+        _check(cudart.cudaFree(ptr)[0])
+
+
+def _cuda_memset_zero(ptr: int, size: int):
+    _check(cudart.cudaMemset(ptr, 0, size)[0])
+
+
+def _cuda_memcpy_d2d(dst: int, src: int, size: int):
+    _check(
+        cudart.cudaMemcpy(
+            dst, src, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice
+        )[0]
+    )
+
+
+# ---------------------------------------------------------------------------
+# IPC buffer
+# ---------------------------------------------------------------------------
+
+
+class IpcBuffer:
+    """
+    Allocates CUDA device memory and exchanges IPC handles with all ranks
+    so that every rank holds a valid device pointer to every other rank's buffer.
+    """
+
+    def __init__(self, rank: int, world_size: int, size: int, process_group=None):
+        self.rank = rank
+        self.world_size = world_size
+        self.peer_ptrs: list[int] = [0] * world_size
+        self.local_ptr: int = 0
+        self._alive = False
+
+        if size <= 0:
+            return
+
+        self.local_ptr, _ = _cuda_malloc(size)
+        _cuda_memset_zero(self.local_ptr, size)
+        self._alive = True
+
+        # --- exchange IPC handles via torch.distributed ---
+        err, local_handle = cudart.cudaIpcGetMemHandle(self.local_ptr)
+        _check(err)
+
+        all_handles: list[bytes | None] = [None] * world_size
+        torch.distributed.all_gather_object(
+            all_handles, bytes(local_handle.reserved), group=process_group
+        )
+
+        for r in range(world_size):
+            if r == rank:
+                self.peer_ptrs[r] = self.local_ptr
+            else:
+                handle = cudart.cudaIpcMemHandle_t()
+                handle.reserved = all_handles[r]
+                err, ptr = cudart.cudaIpcOpenMemHandle(
+                    handle, cudart.cudaIpcMemLazyEnablePeerAccess
+                )
+                _check(err)
+                self.peer_ptrs[r] = ptr
+
+    def serialize(self) -> list[int]:
+        """Return peer pointers as a list of int64 values (one per rank)."""
+        raw = b""
+        for ptr in self.peer_ptrs:
+            raw += struct.pack("P", ptr)
+        return array.array("Q", raw).tolist()
+
+    def cleanup(self):
+        if not self._alive:
+            return
+        self._alive = False
+        for r in range(self.world_size):
+            if self.peer_ptrs[r] == 0:
+                continue
+            if r == self.rank:
+                _cuda_free(self.peer_ptrs[r])
+            else:
+                with contextlib.suppress(RuntimeError):
+                    _check(cudart.cudaIpcCloseMemHandle(self.peer_ptrs[r])[0])
+            self.peer_ptrs[r] = 0
+        self.local_ptr = 0
+
+    def __del__(self):
+        if not sys.is_finalizing():
+            self.cleanup()
+
+
+# ---------------------------------------------------------------------------
+# Lamport negative-zero initialization
+# ---------------------------------------------------------------------------
+
+
+def _lamport_fill_neg_zero(device_ptr: int, size_bytes: int):
+    """
+    Fill device memory with IEEE-754 negative zero (-0.0f = 0x80000000).
+    This is the "slot empty" sentinel for the Lamport protocol: the kernel
+    spin-waits until a value is *not* negative zero.
+    """
+    if size_bytes == 0 or device_ptr == 0:
+        return
+    n_floats = size_bytes // 4
+    # torch preserves -0.0 in IEEE-754
+    fill = torch.full((n_floats,), -0.0, dtype=torch.float32, device="cuda")
+    _cuda_memcpy_d2d(device_ptr, fill.data_ptr(), size_bytes)
+    del fill
+
+
+# ---------------------------------------------------------------------------
+# LamportWorkspace — the main class
+# ---------------------------------------------------------------------------
+
+
+class LamportWorkspace:
+    """
+    Self-contained workspace for Lamport-based cross-GPU AllReduce.
+
+    Parameters
+    ----------
+    rank : int
+        Local rank (0-based).
+    world_size : int
+        Total number of ranks in the TP group.
+    comm_size : int
+        Size in bytes of *one* Lamport buffer slot. The total IPC allocation
+        per rank is ``3 * comm_size`` (triple-buffering). Must be large enough
+        to hold the per-slot data written by the kernel.  Use
+        ``compute_comm_size_for_minimax()`` for a safe default.
+    process_group : optional
+        ``torch.distributed`` process group for IPC handle exchange.
+        ``None`` uses the default group.
+    """
+
+    def __init__(self, rank: int, world_size: int, comm_size: int, process_group=None):
+        assert world_size >= 2, "Lamport workspace requires at least 2 ranks"
+        assert comm_size > 0, "comm_size must be positive"
+
+        self.rank = rank
+        self.world_size = world_size
+        self.comm_size = comm_size
+
+        # 1) Lamport triple-buffer (the only IPC memory the kernel reads/writes)
+        lamport_total = 3 * comm_size
+        self._lamport = IpcBuffer(rank, world_size, lamport_total, process_group)
+        _lamport_fill_neg_zero(self._lamport.local_ptr, lamport_total)
+
+        # 2) flag_buffer on device: int32[3] = {counter, unused, lamport_flag}
+        #    counter  — used for block-level sync inside the kernel
+        #    unused   — reserved (index 1)
+        #    lamport_flag — triple-buffer rotation index (0 → 1 → 2 → 0 …)
+        self._flag_buf = torch.zeros(3, dtype=torch.int32, device="cuda")
+
+        # 3) layout_buffer on device: int64[2] = {clear_size, comm_size}
+        #    clear_size — bytes to clear from *previous* slot (set by kernel)
+        #    comm_size  — size of one triple-buffer slot
+        self._layout_buf = torch.tensor(
+            [0, comm_size], dtype=torch.int64, device="cuda"
+        )
+
+        # 4) Assemble device-side void* pointer array
+        N = world_size
+        ptrs: list[int] = []
+        ptrs += [0] * N  # [0   .. N-1]   ipc_buffers  (placeholder)
+        ptrs += [0] * N  # [N   .. 2N-1]  ipc_barriers (placeholder)
+        ptrs += self._lamport.serialize()  # [2N  .. 3N-1]  lamport peer ptrs
+        ptrs.append(self._flag_buf.data_ptr())  # [3N]           flag_buffer
+        ptrs.append(self._layout_buf.data_ptr())  # [3N+1]       layout_buffer
+
+        self._workspace = torch.tensor(ptrs, dtype=torch.int64, device="cuda")
+
+    @property
+    def workspace(self) -> torch.Tensor:
+        """Device tensor (int64) that can be passed to the kernel
+        as ``void** workspace``."""
+        return self._workspace
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def compute_comm_size_for_minimax(
+        max_tokens: int,
+        world_size: int,
+        fused_qk: bool = True,
+    ) -> int:
+        """
+        Return a safe ``comm_size`` (in bytes) for MiniMaxReduceRMSKernel.
+
+        The kernel stores per-token variance scalars in the Lamport buffer:
+          - single-matrix path: ``world_size × max_tokens × 4`` bytes per slot
+          - fused Q+K path: ``world_size × 2 × ceil(max_tokens/4) × 16`` bytes per slot
+
+        The returned value is rounded up to 2 MiB alignment.
+        """
+        if fused_qk:
+            groups = (max_tokens + 3) // 4
+            slot_bytes = world_size * 2 * groups * 16  # 16 = sizeof(float4)
+        else:
+            slot_bytes = world_size * max_tokens * 4  # 4  = sizeof(float)
+        return ((slot_bytes + _ALIGN - 1) >> 21) << 21
+
+    def cleanup(self):
+        if hasattr(self, "_lamport"):
+            self._lamport.cleanup()
+
+    def __del__(self):
+        if not sys.is_finalizing():
+            self.cleanup()
+
+    def __repr__(self):
+        return (
+            f"LamportWorkspace(rank={self.rank}, world_size={self.world_size}, "
+            f"comm_size={self.comm_size})"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Cached convenience function (mirrors TRT-LLM's get_allreduce_workspace)
+# ---------------------------------------------------------------------------
+
+_cache_lock = threading.Lock()
+_workspace_cache: dict = {}
+
+
+def get_allreduce_workspace(
+    rank: int,
+    world_size: int,
+    comm_size: int | None = None,
+    max_tokens: int = 16384,
+    process_group=None,
+) -> torch.Tensor:
+    """
+    Return a cached workspace tensor for the given (rank, world_size) pair.
+
+    On first call the workspace is allocated and IPC handles are exchanged;
+    subsequent calls with the same arguments return the cached tensor.
+
+    Parameters
+    ----------
+    rank, world_size : int
+        TP rank and TP size.
+    comm_size : int, optional
+        Explicit slot size in bytes.  If ``None``, computed automatically
+        from ``max_tokens`` and ``world_size`` (fused Q+K path).
+    max_tokens : int
+        Maximum number of tokens per batch (used when ``comm_size is None``).
+    process_group : optional
+        ``torch.distributed`` process group.
+    """
+    if comm_size is None:
+        comm_size = LamportWorkspace.compute_comm_size_for_minimax(
+            max_tokens, world_size, fused_qk=True
+        )
+    pg_id = id(process_group) if process_group is not None else 0
+    key = (rank, world_size, comm_size, pg_id)
+    with _cache_lock:
+        if key not in _workspace_cache:
+            ws = LamportWorkspace(rank, world_size, comm_size, process_group)
+            _workspace_cache[key] = ws
+        return _workspace_cache[key].workspace
diff --git a/vllm/model_executor/layers/mamba/linear_attn.py b/vllm/model_executor/layers/mamba/linear_attn.py
index 18fcc1426cc5..47508e3a8d85 100644
--- a/vllm/model_executor/layers/mamba/linear_attn.py
+++ b/vllm/model_executor/layers/mamba/linear_attn.py
@@ -3,6 +3,7 @@
 
 import math
 from collections.abc import Callable
+from functools import partial
 
 import torch
 import torch.nn.functional as F
@@ -31,30 +32,49 @@
 from vllm.utils.torch_utils import direct_register_custom_op
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.linear_attn import LinearAttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 
 
+@CustomOp.register("minimax_text01_rmsnorm_tp")
 class MiniMaxText01RMSNormTP(CustomOp):
-    name = "MiniMaxText01RMSNormTP"
-
-    def __init__(self, hidden_size: int, eps: float = 1e-6) -> None:
+    def __init__(
+        self,
+        hidden_size: int,
+        eps: float = 1e-6,
+        *,
+        weight_shard_world_size: int | None = None,
+        weight_shard_rank: int | None = None,
+    ) -> None:
         super().__init__()
         self.tp_world = get_tensor_model_parallel_world_size()
         self.tp_rank = get_tensor_model_parallel_rank()
-        self.weight = nn.Parameter(torch.ones(int(hidden_size / self.tp_world)))
+        self.weight_shard_world = weight_shard_world_size or self.tp_world
+        self.weight_shard_rank = (
+            self.tp_rank if weight_shard_rank is None else weight_shard_rank
+        )
 
-        self.weight.weight_loader = self.weight_loader
+        self.weight = nn.Parameter(torch.ones(hidden_size // self.weight_shard_world))
+        self.weight.weight_loader = partial(
+            self.weight_loader,
+            shard_world_size=self.weight_shard_world,
+            shard_rank=self.weight_shard_rank,
+        )
         self.variance_epsilon = eps
 
     @staticmethod
     def weight_loader(
         param: nn.Parameter,
         loaded_weight: torch.Tensor,
+        shard_world_size: int | None = None,
+        shard_rank: int | None = None,
     ) -> None:
-        tp_world = get_tensor_model_parallel_world_size()
-        tp_rank = get_tensor_model_parallel_rank()
+        if shard_world_size is None:
+            shard_world_size = get_tensor_model_parallel_world_size()
+        if shard_rank is None:
+            shard_rank = get_tensor_model_parallel_rank()
 
-        shard_size = loaded_weight.shape[0] // tp_world
-        shard = slice(tp_rank * shard_size, (tp_rank + 1) * shard_size)
+        shard_size = loaded_weight.shape[0] // shard_world_size
+        shard = slice(shard_rank * shard_size, (shard_rank + 1) * shard_size)
         param.data.copy_(loaded_weight[shard])
 
     def _forward(
@@ -227,8 +247,8 @@ def jit_linear_forward_prefix(
 
 class MiniMaxText01LinearAttention(nn.Module, MambaBase):
     @property
-    def mamba_type(self) -> str:
-        return "linear_attention"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.LINEAR
 
     def get_state_dtype(self) -> tuple[torch.dtype]:
         assert self.model_config is not None
@@ -396,10 +416,11 @@ def _forward(
         self, hidden_states: torch.Tensor, output: torch.Tensor, positions: torch.Tensor
     ) -> None:
         forward_context = get_forward_context()
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
-        if attn_metadata is not None:
-            assert isinstance(attn_metadata, dict)
-            attn_metadata = attn_metadata[self.prefix]
+        attn_metadata_raw = forward_context.attn_metadata
+        attn_metadata: AttentionMetadata | None = None
+        if attn_metadata_raw is not None:
+            assert isinstance(attn_metadata_raw, dict)
+            attn_metadata = attn_metadata_raw[self.prefix]
             assert isinstance(attn_metadata, LinearAttentionMetadata)
             num_actual_tokens = (
                 attn_metadata.num_prefill_tokens + attn_metadata.num_decode_tokens
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer.py b/vllm/model_executor/layers/mamba/mamba_mixer.py
index 82ca367fb26c..1d3159d1e7b3 100644
--- a/vllm/model_executor/layers/mamba/mamba_mixer.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer.py
@@ -24,19 +24,25 @@
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateDtypeCalculator,
     MambaStateShapeCalculator,
+    is_conv_state_dim_first,
 )
 from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
     causal_conv1d_fn,
     causal_conv1d_update,
 )
-from vllm.model_executor.layers.mamba.ops.mamba_ssm import (
-    selective_scan_fn,
-    selective_state_update,
-)
+from vllm.model_executor.layers.mamba.ops.mamba_ssm import selective_scan_fn
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import selective_state_update
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
+from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.mamba1_attn import Mamba1AttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 
 
 # Adapted from transformers.models.mamba.modeling_mamba.MambaMixer
@@ -227,7 +233,7 @@ def forward(self, hidden_states: torch.Tensor, output: torch.Tensor):
         torch.ops.vllm.mamba_mixer(
             hidden_states,
             output,
-            self.prefix,
+            _encode_layer_name(self.prefix),
         )
 
     def forward_impl(self, hidden_states: torch.Tensor, output: torch.Tensor):
@@ -254,22 +260,26 @@ def forward_impl(self, hidden_states: torch.Tensor, output: torch.Tensor):
         """
 
         forward_context: ForwardContext = get_forward_context()
-        attn_metadata = forward_context.attn_metadata
+        attn_metadata_raw = forward_context.attn_metadata
 
         assert self.cache_config is not None
         mamba_block_size = self.cache_config.mamba_block_size
         is_mamba_cache_all = self.cache_config.mamba_cache_mode == "all"
 
-        if attn_metadata is not None:
-            assert isinstance(attn_metadata, dict)
-            attn_metadata = attn_metadata[self.prefix]
+        attn_metadata: AttentionMetadata | None = None
+        if attn_metadata_raw is not None:
+            assert isinstance(attn_metadata_raw, dict)
+            attn_metadata = attn_metadata_raw[self.prefix]
             assert isinstance(attn_metadata, Mamba1AttentionMetadata)
             query_start_loc_p = attn_metadata.query_start_loc_p
             state_indices_tensor_p = attn_metadata.state_indices_tensor_p
             state_indices_tensor_d = attn_metadata.state_indices_tensor_d
-            self_kv_cache = self.kv_cache
-            conv_state = self_kv_cache[0].transpose(-1, -2)
-            ssm_state = self_kv_cache[1]
+            conv_state = (
+                self.kv_cache[0]
+                if is_conv_state_dim_first()
+                else self.kv_cache[0].transpose(-1, -2)
+            )
+            ssm_state = self.kv_cache[1]
             has_initial_states_p = attn_metadata.has_initial_states_p
             cu_chunk_seqlen_p = attn_metadata.cu_chunk_seqlen_p
             last_chunk_indices_p = attn_metadata.last_chunk_indices_p
@@ -352,6 +362,7 @@ def forward_impl(self, hidden_states: torch.Tensor, output: torch.Tensor):
                 initial_state_idx=block_idx_last_computed_token_p,
                 num_computed_tokens=num_computed_tokens_p,
                 block_size_to_align=mamba_block_size,
+                metadata=attn_metadata,
             )
             # 3. State Space Model sequence transformations.
             discrete_time_step_p, B_p, C_p = self._ssm_transform(
@@ -384,6 +395,9 @@ def forward_impl(self, hidden_states: torch.Tensor, output: torch.Tensor):
             ssm_outputs.append(scan_out_p)
 
         if has_decode:
+            # state_indices_tensor_d is assigned when attn_metadata is not None,
+            # and has_decode is only True when attn_metadata is not None
+            assert state_indices_tensor_d is not None
             if is_mamba_cache_all:
                 state_indices_tensor_d_input = state_indices_tensor_d.gather(
                     1, block_idx_last_computed_token_d.unsqueeze(1)
@@ -422,8 +436,8 @@ def forward_impl(self, hidden_states: torch.Tensor, output: torch.Tensor):
                 B_d,
                 C_d,
                 self.D,
-                gate_d.transpose(0, 1),
                 time_proj_bias,
+                z=gate_d.transpose(0, 1),
                 dt_softplus=True,
                 state_batch_indices=state_indices_tensor_d_input,
                 dst_state_batch_indices=state_indices_tensor_d_output,
@@ -464,8 +478,8 @@ def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]:
         )
 
     @property
-    def mamba_type(self) -> str:
-        return "mamba1"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.MAMBA1
 
     def _time_proj_bias(self) -> torch.Tensor | None:
         if hasattr(self.dt_proj, "bias") and self.dt_proj.bias is not None:
@@ -509,8 +523,9 @@ def split_batch_to_prefill_and_decode(
 def mamba_mixer(
     hidden_states: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
+    layer_name = _resolve_layer_name(layer_name)
     forward_context: ForwardContext = get_forward_context()
     self = forward_context.no_compile_layers[layer_name]
     self.forward_impl(hidden_states=hidden_states, output=output)
@@ -519,7 +534,7 @@ def mamba_mixer(
 def mamba_mixer_fake(
     hidden_states: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
     return
 
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py
index 9486e182ec46..a6524961ea92 100644
--- a/vllm/model_executor/layers/mamba/mamba_mixer2.py
+++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -14,6 +14,7 @@
     tensor_model_parallel_all_reduce,
 )
 from vllm.forward_context import ForwardContext, get_forward_context
+from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp, PluggableLayer
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -24,16 +25,17 @@
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateDtypeCalculator,
     MambaStateShapeCalculator,
+    is_conv_state_dim_first,
 )
 from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
     causal_conv1d_fn,
     causal_conv1d_update,
 )
 from vllm.model_executor.layers.mamba.ops.layernorm_gated import rms_norm_gated
-from vllm.model_executor.layers.mamba.ops.mamba_ssm import selective_state_update
 from vllm.model_executor.layers.mamba.ops.ssd_combined import (
     mamba_chunk_scan_combined_varlen,
 )
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import selective_state_update
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import (
     LoaderFunction,
@@ -43,9 +45,17 @@
 from vllm.model_executor.parameter import BasevLLMParameter
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
-from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+
+logger = init_logger(__name__)
 
 # Added by the IBM Team, 2024
 
@@ -466,6 +476,8 @@ def __init__(
             intermediate_size, n_groups, self.use_rms_norm, eps=rms_norm_eps
         )
 
+        self._ssd_kernels_warmed_up = False
+
         # - get hidden_states, B and C after depthwise convolution.
         self.split_hidden_states_B_C_fn = lambda hidden_states_B_C: torch.split(
             hidden_states_B_C,
@@ -490,6 +502,12 @@ def __init__(
         self.prefix = prefix
 
         self.num_spec = vllm_config.num_speculative_tokens
+        if self.num_spec > 0:
+            self.register_buffer(
+                "_decode_state_offsets",
+                torch.arange(1 + self.num_spec, dtype=torch.int32).unsqueeze(0),
+                persistent=False,
+            )
 
         # Pre-compute sizes for forward pass
         self.tped_intermediate_size = self.intermediate_size // self.tp_size
@@ -535,7 +553,7 @@ def forward(
         torch.ops.vllm.mamba_mixer2(
             projected_states,
             ssm_output,
-            self.prefix,
+            _encode_layer_name(self.prefix),
         )
 
         # 4. gated MLP
@@ -550,6 +568,104 @@ def forward(
 
         return output
 
+    def _warmup_ssd_kernels(self, projected_states: torch.Tensor) -> None:
+        """Run a minimal SSD forward pass to trigger Triton autotuning
+        while GPU memory is still plentiful (before SSM cache allocation).
+        """
+        if self._ssd_kernels_warmed_up:
+            return
+        self._ssd_kernels_warmed_up = True
+        logger.info_once("Warming up Mamba2 SSD Triton kernels...")
+
+        device = projected_states.device
+        dtype = projected_states.dtype
+
+        nheads = self.num_heads // self.tp_size
+        ngroups = self.n_groups // self.tp_size
+        headdim = self.head_dim
+        dstate = self.ssm_state_size
+
+        if self.model_config is None:
+            return
+        chunk_size = self.model_config.get_mamba_chunk_size()
+
+        # Triton's autotuner includes tensor dtypes in its cache key,
+        # so state_dtype must match what real inference uses.
+        _, ssm_state_dtype = self.get_state_dtype()
+
+        # SSD kernel autotune keys depend on dtype and head dimensions,
+        # not on sequence length or batch size, so a single shape suffices.
+        seqlen = chunk_size
+        batch = 1
+        nchunks = seqlen // chunk_size  # = 1
+
+        x = torch.randn(seqlen, nheads, headdim, device=device, dtype=dtype)
+        dt = torch.randn(seqlen, nheads, device=device, dtype=dtype)
+        B = torch.randn(seqlen, ngroups, dstate, device=device, dtype=dtype)
+        C = torch.randn(seqlen, ngroups, dstate, device=device, dtype=dtype)
+        cu_seqlens = torch.tensor([0, seqlen], device=device, dtype=torch.int32)
+        cu_chunk_seqlens = torch.tensor(
+            [i * chunk_size for i in range(nchunks + 1)],
+            device=device,
+            dtype=torch.int32,
+        )
+        last_chunk_indices = torch.tensor(
+            [nchunks - 1], device=device, dtype=torch.int32
+        )
+        seq_idx = torch.zeros(nchunks, device=device, dtype=torch.int32)
+        out = torch.empty(seqlen, nheads, headdim, device=device, dtype=dtype)
+
+        # Two kernels (_state_passing_fwd, _chunk_scan_fwd) use
+        # HAS_INITSTATES as a constexpr, producing separate compiled
+        # binaries. Warm up both code paths so neither triggers
+        # JIT compilation during inference.
+        for use_initial_states in (False, True):
+            initial_states = (
+                torch.randn(
+                    batch,
+                    nheads,
+                    headdim,
+                    dstate,
+                    device=device,
+                    dtype=ssm_state_dtype,
+                )
+                if use_initial_states
+                else None
+            )
+            try:
+                mamba_chunk_scan_combined_varlen(
+                    x=x,
+                    dt=dt,
+                    A=self.A,
+                    B=B,
+                    C=C,
+                    chunk_size=chunk_size,
+                    cu_seqlens=cu_seqlens,
+                    cu_chunk_seqlens=cu_chunk_seqlens,
+                    last_chunk_indices=last_chunk_indices,
+                    seq_idx=seq_idx,
+                    out=out,
+                    D=self.D,
+                    z=None,
+                    dt_bias=self.dt_bias,
+                    initial_states=initial_states,
+                    dt_softplus=True,
+                    dt_limit=(0.0, float("inf")),
+                    state_dtype=ssm_state_dtype,
+                )
+            except Exception:
+                logger.warning(
+                    "Mamba2 SSD kernel warmup failed for layer %s "
+                    "(initial_states=%s). First inference may experience "
+                    "latency spike or OOM due to autotuner.",
+                    self.prefix,
+                    use_initial_states,
+                    exc_info=True,
+                )
+
+        logger.debug("Mamba2 SSD kernel warmup completed for layer %s", self.prefix)
+        torch.accelerator.empty_cache()
+
     def conv_ssm_forward(
         self,
         projected_states: torch.Tensor,
@@ -566,19 +682,26 @@ def conv_ssm_forward(
         # kernels to operate in continuous batching and in chunked prefill
         # modes; they are computed at top-level model forward since they
         # stay the same and reused for all mamba layers in the same iteration
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
+        attn_metadata_raw = forward_context.attn_metadata
 
         assert self.cache_config is not None
         mamba_block_size = self.cache_config.mamba_block_size
         is_mamba_cache_all = self.cache_config.mamba_cache_mode == "all"
-        if attn_metadata is not None:
-            assert isinstance(attn_metadata, dict)
-            attn_metadata = attn_metadata[self.prefix]
+
+        attn_metadata: AttentionMetadata | None = None
+        if attn_metadata_raw is not None:
+            assert isinstance(attn_metadata_raw, dict)
+            attn_metadata = attn_metadata_raw[self.prefix]
             assert isinstance(attn_metadata, Mamba2AttentionMetadata)
-            self_kv_cache = self.kv_cache
-            # conv_state = (..., dim, width-1) yet contiguous along 'dim'
-            conv_state = self_kv_cache[0].transpose(-1, -2)
-            ssm_state = self_kv_cache[1]
+            # conv_state must be (..., dim, width-1) for the conv kernels.
+            # DS layout stores it that way directly; SD layout needs a
+            # transpose (which keeps dim contiguous via stride tricks).
+            conv_state = (
+                self.kv_cache[0]
+                if is_conv_state_dim_first()
+                else self.kv_cache[0].transpose(-1, -2)
+            )
+            ssm_state = self.kv_cache[1]
             has_initial_states_p = attn_metadata.has_initial_states_p
             prep_initial_states = attn_metadata.prep_initial_states
             chunk_size = attn_metadata.chunk_size
@@ -594,7 +717,9 @@ def conv_ssm_forward(
             num_decode_tokens = attn_metadata.num_decode_tokens
 
         if attn_metadata is None:
-            # profile run
+            # V1 profile run -- warm up SSD kernels so that autotuning
+            # completes before SSM cache allocation.
+            self._warmup_ssd_kernels(projected_states)
             hidden_states_B_C = (
                 hidden_states_B_C.transpose(0, 1).clone().transpose(0, 1)
             ).contiguous()
@@ -636,6 +761,14 @@ def conv_ssm_forward(
                     dim=0,
                 )
             )
+            if attn_metadata.block_idx_last_scheduled_token_prev_step is not None:
+                block_idx_last_scheduled_token_prev_step_d, _ = torch.split(
+                    attn_metadata.block_idx_last_scheduled_token_prev_step,
+                    [num_decodes, num_prefills],
+                    dim=0,
+                )
+            else:
+                block_idx_last_scheduled_token_prev_step_d = None
             # Prefill-only variables:
             block_idx_first_scheduled_token_p = (
                 attn_metadata.block_idx_first_scheduled_token_p
@@ -647,6 +780,7 @@ def conv_ssm_forward(
             block_idx_first_scheduled_token_p = None
             block_idx_last_scheduled_token_d = None
             block_idx_last_computed_token_d = None
+            block_idx_last_scheduled_token_prev_step_d = None
             num_computed_tokens_p = None
 
         preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split(
@@ -697,6 +831,7 @@ def conv_ssm_forward(
             # 3. State Space Model sequence transformation
             initial_states = None
             if has_initial_states_p is not None and prep_initial_states:
+                assert state_indices_tensor_p is not None
                 kernel_ssm_indices = state_indices_tensor_p
                 if is_mamba_cache_all:
                     kernel_ssm_indices = state_indices_tensor_p.gather(
@@ -735,6 +870,13 @@ def conv_ssm_forward(
             )
 
             if is_mamba_cache_all:
+                assert mamba_block_size is not None
+                assert state_indices_tensor_p is not None
+                assert block_idx_first_scheduled_token_p is not None
+                assert block_idx_last_scheduled_token_p is not None
+                assert last_chunk_indices_p is not None
+                assert num_computed_tokens_p is not None
+
                 # The chunk_stride is the number of chunks per mamba block
                 # e.g., if mamba_block_size = 512 and chunk_size = 256,
                 # then chunk_stride = 2
@@ -799,6 +941,7 @@ def conv_ssm_forward(
                     ssm_state[cache_blocks_to_fill] = from_where
 
                 # For all seqs, store the last state (note: might be partial):
+                assert state_indices_tensor_p is not None
                 ssm_state[
                     state_indices_tensor_p.gather(
                         1, block_idx_last_scheduled_token_p.unsqueeze(1)
@@ -809,23 +952,36 @@ def conv_ssm_forward(
                 # update ssm states
                 # - varlen state is a (num_prefills, nheads, headdim, dstate)
                 #   tensor
+                assert state_indices_tensor_p is not None
                 ssm_state[state_indices_tensor_p] = varlen_states
 
         # Process decode requests
         if has_decode:
+            assert state_indices_tensor_d is not None
             if is_mamba_cache_all:
-                state_indices_tensor_d_input = state_indices_tensor_d.gather(
-                    1, block_idx_last_computed_token_d.unsqueeze(1)
-                ).squeeze(1)
-                state_indices_tensor_d_output = state_indices_tensor_d.gather(
-                    1, block_idx_last_scheduled_token_d.unsqueeze(1)
-                ).squeeze(1)
-                # for decode:
-                #   block_idx_first_scheduled_token_d ==
-                #       block_idx_last_scheduled_token_d
-                # at block boundaries:
-                #   block_idx_first_scheduled_token_d >
-                #       block_idx_last_computed_token_d
+                if self.num_spec > 0:
+                    assert block_idx_last_scheduled_token_prev_step_d is not None
+                    input_indices = (
+                        block_idx_last_scheduled_token_prev_step_d.unsqueeze(1)
+                        + self._decode_state_offsets
+                    )
+                    output_indices = (
+                        block_idx_last_scheduled_token_d.unsqueeze(1)
+                        + self._decode_state_offsets
+                    )
+                    state_indices_tensor_d_input = state_indices_tensor_d.gather(
+                        1, input_indices
+                    )
+                    state_indices_tensor_d_output = state_indices_tensor_d.gather(
+                        1, output_indices
+                    )
+                else:
+                    state_indices_tensor_d_input = state_indices_tensor_d.gather(
+                        1, block_idx_last_computed_token_d.unsqueeze(1)
+                    ).squeeze(1)
+                    state_indices_tensor_d_output = state_indices_tensor_d.gather(
+                        1, block_idx_last_scheduled_token_d.unsqueeze(1)
+                    ).squeeze(1)
             else:
                 # Without caching, read and write in-place to the same blocks:
                 state_indices_tensor_d_input = state_indices_tensor_d
@@ -879,8 +1035,7 @@ def conv_ssm_forward(
                 B_d,
                 C_d,
                 D_d,
-                z=None,
-                dt_bias=dt_bias,
+                dt_bias,
                 dt_softplus=True,
                 state_batch_indices=state_indices_tensor_d_input,
                 dst_state_batch_indices=state_indices_tensor_d_output,
@@ -912,15 +1067,16 @@ def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]:
         )
 
     @property
-    def mamba_type(self) -> str:
-        return "mamba2"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.MAMBA2
 
 
 def mamba_mixer2(
     projected_states: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
+    layer_name = _resolve_layer_name(layer_name)
     forward_context: ForwardContext = get_forward_context()
     self = forward_context.no_compile_layers[layer_name]
     self.conv_ssm_forward(projected_states=projected_states, output=output)
@@ -929,7 +1085,7 @@ def mamba_mixer2(
 def mamba_mixer2_fake(
     projected_states: torch.Tensor,
     output: torch.Tensor,
-    layer_name: str,
+    layer_name: LayerNameType,
 ) -> None:
     return
 
diff --git a/vllm/model_executor/layers/mamba/mamba_utils.py b/vllm/model_executor/layers/mamba/mamba_utils.py
index 1f6751f6c8b1..c1fd81e40e34 100644
--- a/vllm/model_executor/layers/mamba/mamba_utils.py
+++ b/vllm/model_executor/layers/mamba/mamba_utils.py
@@ -1,20 +1,52 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import functools
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import TypeAlias
+from typing import Literal, TypeAlias
 
 import torch
 
+import vllm.envs as envs
 from vllm.config.cache import MambaDType
 from vllm.config.model import ModelDType
 from vllm.distributed import divide
+from vllm.logger import init_logger
 from vllm.utils.torch_utils import (
     STR_DTYPE_TO_TORCH_DTYPE,
     get_kv_cache_torch_dtype,
 )
 
+logger = init_logger(__name__)
+
+ConvStateLayoutType = Literal["SD", "DS"]
+
+
+@functools.lru_cache
+def get_conv_state_layout() -> ConvStateLayoutType:
+    """Return the SSM conv state layout.
+
+    SD = (state_len, dim) — dim is the innermost contiguous dimension.
+    DS = (dim, state_len) — TP-sharded dim is on dim-1 (like HND for KV
+         cache), consistent with SSM temporal state layout.
+    """
+    layout: ConvStateLayoutType | None = envs.VLLM_SSM_CONV_STATE_LAYOUT
+    if layout is not None:
+        logger.info_once(
+            "VLLM_SSM_CONV_STATE_LAYOUT env detected. "
+            "Setting SSM conv state layout to %s.",
+            layout,
+        )
+        return layout
+
+    return "SD"
+
+
+def is_conv_state_dim_first() -> bool:
+    """True when the conv state is stored as (dim, state_len) per block."""
+    return get_conv_state_layout() == "DS"
+
 
 class MambaStateDtypeCalculator:
     @classmethod
@@ -23,9 +55,6 @@ def linear_attention_state_dtype(
         model_dtype: ModelDType | torch.dtype,
         mamba_cache_dtype: MambaDType,
     ) -> tuple[torch.dtype, ...]:
-        # TODO (tdoublep) requires testing
-        if mamba_cache_dtype == "float32":
-            raise ValueError("fp32 state for minimax is not yet supported")
         state_dtype = get_kv_cache_torch_dtype(mamba_cache_dtype, model_dtype)
         return (state_dtype,)
 
@@ -107,6 +136,13 @@ def linear_attention_state_shape(
         state_shape = (num_heads // tp_size, head_dim, head_dim)
         return (state_shape,)
 
+    @staticmethod
+    def _orient_conv_shape(dim: int, state_len: int) -> tuple[int, int]:
+        """Return (dim, state_len) for DS layout, (state_len, dim) for SD."""
+        if is_conv_state_dim_first():
+            return (dim, state_len)
+        return (state_len, dim)
+
     @classmethod
     def mamba1_state_shape(
         cls,
@@ -115,12 +151,11 @@ def mamba1_state_shape(
         state_size: int,
         conv_kernel: int,
     ) -> tuple[tuple[int, int], tuple[int, int]]:
-        conv_state_shape = (divide(intermediate_size, tp_world_size), conv_kernel - 1)
+        conv_dim = divide(intermediate_size, tp_world_size)
+        conv_state_shape = cls._orient_conv_shape(conv_dim, conv_kernel - 1)
 
         temporal_state_shape = (divide(intermediate_size, tp_world_size), state_size)
 
-        conv_state_shape = conv_state_shape[1], conv_state_shape[0]
-
         return conv_state_shape, temporal_state_shape
 
     @classmethod
@@ -141,8 +176,9 @@ def mamba2_state_shape(
         # heads and n_groups are TP-ed
         conv_dim = intermediate_size + 2 * n_groups * state_size
 
-        # contiguous along 'dim' axis
-        conv_state_shape = (conv_kernel - 1 + num_spec, divide(conv_dim, tp_world_size))
+        conv_state_shape = cls._orient_conv_shape(
+            divide(conv_dim, tp_world_size), conv_kernel - 1 + num_spec
+        )
 
         # These are not TP-ed as they depend on A, dt_bias, D
         # - they are typically small
@@ -158,7 +194,7 @@ def short_conv_state_shape(
         conv_kernel: int,
     ) -> tuple[tuple[int, int]]:
         conv_dim = divide(intermediate_size, tp_world_size)
-        conv_state_shape = (conv_kernel - 1, conv_dim)
+        conv_state_shape = cls._orient_conv_shape(conv_dim, conv_kernel - 1)
         return (conv_state_shape,)
 
     @classmethod
@@ -185,13 +221,11 @@ def gated_delta_net_state_shape(
         num_spec: int = 0,
     ):
         conv_dim = head_k_dim * num_k_heads * 2 + head_v_dim * num_v_heads
-        conv_state_shape = (
+        conv_state_shape = cls._orient_conv_shape(
             divide(conv_dim, tp_world_size),
             conv_kernel_size - 1 + num_spec,
         )
 
-        conv_state_shape = conv_state_shape[1], conv_state_shape[0]
-
         temporal_state_shape = (
             divide(num_v_heads, tp_world_size),
             head_v_dim,
@@ -218,12 +252,13 @@ def kda_state_shape(
         proj_size = num_heads * head_dim
         proj_k_size = num_k_heads * head_k_dim
 
-        conv_state_shape = (divide(proj_size, tp_world_size), conv_kernel_size - 1)
-        conv_state_k_shape = (divide(proj_k_size, tp_world_size), conv_kernel_size - 1)
+        conv_state_shape = cls._orient_conv_shape(
+            divide(proj_size, tp_world_size), conv_kernel_size - 1
+        )
+        conv_state_k_shape = cls._orient_conv_shape(
+            divide(proj_k_size, tp_world_size), conv_kernel_size - 1
+        )
         recurrent_state_shape = (divide(num_heads, tp_world_size), head_dim, head_dim)
-
-        conv_state_shape = conv_state_shape[1], conv_state_shape[0]
-        conv_state_k_shape = conv_state_k_shape[1], conv_state_k_shape[0]
         return (
             conv_state_shape,
             conv_state_k_shape,
@@ -267,9 +302,27 @@ def get_conv_copy_spec(
     cur_block_idx: int,
     num_accepted_tokens: int,
 ) -> MambaCopySpec:
-    """Return a MambaCopySpec for copying a convolutional state slice."""
+    """Return a MambaCopySpec for copying a convolutional state slice.
+
+    Works for both SD layout ``(num_blocks, state_len, dim)`` and
+    DS layout ``(num_blocks, dim, state_len)``.
+    """
     src_block_id = block_ids[cur_block_idx]
-    src_state = state[src_block_id, num_accepted_tokens - 1 :]
+    offset = num_accepted_tokens - 1
+    if is_conv_state_dim_first():
+        # DS layout: (num_blocks, dim, state_len) — state_len is last.
+        if offset > 0:
+            # Slicing along the last dim yields a non-contiguous view
+            # because features (dim) are strided by state_len.
+            raise NotImplementedError(
+                "DS conv state layout does not yet support speculative "
+                "decoding with mamba_cache_mode='align' "
+                "(num_accepted_tokens > 1)."
+            )
+        src_state = state[src_block_id]
+    else:
+        # SD layout: (num_blocks, state_len, dim) — dim contiguous.
+        src_state = state[src_block_id, offset:]
     return MambaCopySpec(
         start_addr=src_state.data_ptr(), num_elements=src_state.numel()
     )
diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
index b0c1ffb0dc28..d87a7638533e 100644
--- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
@@ -9,7 +9,7 @@
 import torch
 
 from vllm.triton_utils import tl, triton
-from vllm.v1.attention.backends.utils import PAD_SLOT_ID
+from vllm.v1.attention.backends.utils import NULL_BLOCK_ID, PAD_SLOT_ID
 
 
 @triton.jit()
@@ -37,7 +37,7 @@ def _causal_conv1d_fwd_kernel(  # continuous batching
     num_cache_lines: tl.constexpr,  # added to support vLLM larger cache lines
     # Strides
     stride_x_dim: tl.constexpr,  # stride to get to next feature-value,
-    stride_x_token: tl.constexpr,  # stride to get to next token (same feature-index, same sequence-index)
+    stride_x_token: tl.int64,  # stride to get to next token (same feature-index, same sequence-index)
     stride_w_dim: tl.constexpr,  # stride to get to next dim-axis value
     stride_w_width: tl.constexpr,  # stride to get to next width-axis value
     stride_istate_seq: tl.constexpr,
@@ -45,16 +45,17 @@ def _causal_conv1d_fwd_kernel(  # continuous batching
     stride_istate_token: tl.constexpr,
     stride_cache_indices: tl.constexpr,
     stride_o_dim: tl.constexpr,
-    stride_o_token: tl.constexpr,
+    stride_o_token: tl.int64,
     stride_block_m: tl.constexpr,  # Stride block to align divided by BLOCK_M
     # others
     pad_slot_id: tl.constexpr,
+    null_block_id: tl.constexpr,
     # Meta-parameters
     HAS_BIAS: tl.constexpr,
     KERNEL_WIDTH: tl.constexpr,
     SILU_ACTIVATION: tl.constexpr,
     IS_APC_ENABLED: tl.constexpr,
-    USE_PAD_SLOT: tl.constexpr,
+    HAS_NULL_BLOCK: tl.constexpr,
     NP2_STATELEN: tl.constexpr,
     BLOCK_M: tl.constexpr,
     BLOCK_N: tl.constexpr,
@@ -133,9 +134,9 @@ def _causal_conv1d_fwd_kernel(  # continuous batching
         conv_state_indices_ptr + idx_seq * stride_cache_indices + conv_state_init_index
     ).to(tl.int64)
 
-    if USE_PAD_SLOT:  # noqa
-        if conv_states_input_coord == pad_slot_id:
-            # not processing as this is not the actual sequence
+    if HAS_NULL_BLOCK:  # noqa
+        if conv_states_input_coord == null_block_id:
+            # not processing as this is a null block (padding)
             return
     conv_states_base = (
         conv_states_ptr
@@ -475,6 +476,7 @@ def causal_conv1d_fn(
     has_initial_state: torch.Tensor | None = None,
     activation: str | None = "silu",
     pad_slot_id: int = PAD_SLOT_ID,
+    null_block_id: int = NULL_BLOCK_ID,
     block_idx_first_scheduled_token: torch.Tensor | None = None,
     block_idx_last_scheduled_token: torch.Tensor | None = None,
     initial_state_idx: torch.Tensor | None = None,
@@ -590,7 +592,6 @@ def causal_conv1d_fn(
         stride_istate_seq = conv_states.stride(0)
         stride_istate_dim = conv_states.stride(1)
         stride_istate_token = conv_states.stride(2)
-        assert stride_istate_dim == 1
     if out.dim() == 2:
         stride_o_dim = out.stride(0)
         stride_o_token = out.stride(1)
@@ -730,12 +731,13 @@ def grid(META):
         block_size_to_align // BLOCK_M,
         # others
         pad_slot_id,
+        null_block_id,
         # META
         HAS_BIAS=bias is not None,
         KERNEL_WIDTH=width,
         SILU_ACTIVATION=activation in ["silu", "swish"],
         IS_APC_ENABLED=block_idx_last_scheduled_token is not None,
-        USE_PAD_SLOT=pad_slot_id is not None,
+        HAS_NULL_BLOCK=null_block_id is not None,
         NP2_STATELEN=np2_statelen,
         # launch_cooperative_grid=True
         BLOCK_M=BLOCK_M,
@@ -767,7 +769,7 @@ def _causal_conv1d_update_kernel(
     # Strides
     stride_x_seq: tl.constexpr,
     stride_x_dim: tl.constexpr,
-    stride_x_token: tl.constexpr,
+    stride_x_token: tl.int64,
     stride_w_dim: tl.constexpr,
     stride_w_width: tl.constexpr,
     stride_conv_state_seq: tl.constexpr,
@@ -776,9 +778,9 @@ def _causal_conv1d_update_kernel(
     stride_state_indices: tl.constexpr,
     stride_o_seq: tl.constexpr,
     stride_o_dim: tl.constexpr,
-    stride_o_token: tl.constexpr,
+    stride_o_token: tl.int64,
     # others
-    pad_slot_id: tl.constexpr,
+    null_block_id: tl.constexpr,
     # Meta-parameters
     HAS_BIAS: tl.constexpr,
     KERNEL_WIDTH: tl.constexpr,
@@ -787,7 +789,7 @@ def _causal_conv1d_update_kernel(
     IS_APC_ENABLED: tl.constexpr,
     IS_SPEC_DECODING: tl.constexpr,
     NP2_STATELEN: tl.constexpr,
-    USE_PAD_SLOT: tl.constexpr,
+    HAS_NULL_BLOCK: tl.constexpr,
     BLOCK_N: tl.constexpr,
 ):
     # ruff: noqa: E501
@@ -811,8 +813,8 @@ def _causal_conv1d_update_kernel(
         conv_state_indices_ptr + idx_seq * stride_state_indices + conv_state_init
     ).to(tl.int64)
 
-    if USE_PAD_SLOT:  # noqa
-        if conv_states_input_coord == pad_slot_id:
+    if HAS_NULL_BLOCK:  # noqa
+        if conv_states_input_coord == null_block_id:
             # not processing as this is not the actual sequence
             return
 
@@ -1076,7 +1078,7 @@ def causal_conv1d_update(
     num_accepted_tokens: torch.Tensor | None = None,
     query_start_loc: torch.Tensor | None = None,
     max_query_len: int = -1,
-    pad_slot_id: int = PAD_SLOT_ID,
+    null_block_id: int = NULL_BLOCK_ID,
     block_idx_last_scheduled_token: torch.Tensor | None = None,
     initial_state_idx: torch.Tensor | None = None,
     validate_data=False,
@@ -1111,16 +1113,16 @@ def causal_conv1d_update(
     max_query_len: int
         If query_start_loc is not None, this indicates the maximum query
         length in the batch.
-    pad_slot_id: int
-            if conv_state_indices is passed, lets the kernel identify padded
-            entries that will not be processed,
-            for example: conv_state_indices = [pad_slot_id, 1 ,20 ,pad_slot_id]
+    null_block_id: int
+            Block ID used to identify padded entries in
+            conv_state_indices. Block 0 is the null block.
+            for example: conv_state_indices = [null_block_id, 1, 20, null_block_id]
             in this case, the kernel will not process entries at
             indices 0 and 3
     out: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim), same shape as `x`
     """
     if validate_data:
-        assert pad_slot_id is not None
+        assert null_block_id is not None
         assert x.stride(1) == 1
     if isinstance(activation, bool):
         activation = "silu" if activation is True else None
@@ -1146,9 +1148,6 @@ def causal_conv1d_update(
 
     if validate_data:
         assert dim == weight.size(0)
-        assert conv_state.stride(-2) == 1, (
-            f"ERROR: expect contiguous along feat-dim of conv_state (currently stride={conv_state.stride()})"
-        )
         assert state_len >= width - 1
         # when above happens, we don't shift-left to keep any records in conv_state
         assert dim == conv_state.size(1)
@@ -1225,7 +1224,7 @@ def grid(META):
         stride_o_dim,
         stride_o_token,
         # others
-        pad_slot_id,
+        null_block_id,
         # META
         HAS_BIAS=bias is not None,
         KERNEL_WIDTH=width,
@@ -1234,7 +1233,7 @@ def grid(META):
         IS_APC_ENABLED=block_idx_last_scheduled_token is not None,
         IS_SPEC_DECODING=num_accepted_tokens is not None,
         NP2_STATELEN=np2_statelen,
-        USE_PAD_SLOT=pad_slot_id is not None,
+        HAS_NULL_BLOCK=null_block_id is not None,
         BLOCK_N=256,
     )
     if unsqueeze:
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float16.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float16.json
new file mode 100644
index 000000000000..bbd24af8e330
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float16.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 8
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 32,
+        "num_warps": 2
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float32.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float32.json
new file mode 100644
index 000000000000..0fe496162805
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_B200,cache_dtype=float32.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float16.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float16.json
new file mode 100644
index 000000000000..b498fa3745d0
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float16.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 8
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "num_warps": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float32.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float32.json
new file mode 100644
index 000000000000..63fdcf5be246
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_GB200,cache_dtype=float32.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float16.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float16.json
new file mode 100644
index 000000000000..b479d7f69a43
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float16.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 2
+    },
+    "256": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 2
+    },
+    "512": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 2
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 8,
+        "num_warps": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float32.json b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float32.json
new file mode 100644
index 000000000000..57fe2996582e
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/configs/selective_state_update/headdim=64,dstate=128,device_name=NVIDIA_H100_80GB_HBM3,cache_dtype=float32.json
@@ -0,0 +1,87 @@
+{
+    "triton_version": "3.6.0",
+    "8": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 4,
+        "num_warps": 1
+    },
+    "8192": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "12288": {
+        "BLOCK_SIZE_M": 64,
+        "num_warps": 8
+    },
+    "16384": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 1
+    },
+    "24576": {
+        "BLOCK_SIZE_M": 64,
+        "num_warps": 8
+    },
+    "32768": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 1
+    },
+    "49152": {
+        "BLOCK_SIZE_M": 64,
+        "num_warps": 8
+    },
+    "65536": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "98304": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 1
+    },
+    "131072": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    },
+    "196608": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 1
+    },
+    "262144": {
+        "BLOCK_SIZE_M": 16,
+        "num_warps": 2
+    }
+}
diff --git a/vllm/model_executor/layers/mamba/ops/cpu/__init__.py b/vllm/model_executor/layers/mamba/ops/cpu/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/model_executor/layers/mamba/ops/cpu/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/cpu/causal_conv1d.py
new file mode 100644
index 000000000000..b047ca6d6169
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/cpu/causal_conv1d.py
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import torch
+import torch.nn.functional as F
+
+
+# for prefill
+def causal_conv1d_torch(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None,
+    conv_states: torch.Tensor,
+    query_start_loc: torch.Tensor,
+    cache_indices: torch.Tensor,
+    has_initial_state: torch.Tensor,
+    activation: str | None = "silu",
+) -> torch.Tensor:
+    out = torch.empty_like(x)
+    state_len = weight.shape[1] - 1
+    assert activation in {None, "silu", "swish"}
+
+    seq_begin_end_idx = [
+        (int(query_start_loc[idx].item()), int(query_start_loc[idx + 1].item()))
+        for idx in range(query_start_loc.shape[0] - 1)
+    ]
+    weight = weight.unsqueeze(1)
+    for seq_idx, (bos, eos) in enumerate(seq_begin_end_idx):
+        slot = int(cache_indices[seq_idx].item())
+
+        seq_x = x[:, bos:eos].unsqueeze(0)
+        if bool(has_initial_state[seq_idx].item()):
+            initial_state = conv_states[slot, :, :state_len].unsqueeze(0)
+        else:
+            initial_state = torch.zeros(
+                1,
+                weight.shape[0],
+                state_len,
+                device=seq_x.device,
+                dtype=seq_x.dtype,
+            )
+
+        conv_input = torch.cat([initial_state, seq_x], dim=-1).to(weight.dtype)
+        seq_out = F.conv1d(
+            conv_input,
+            weight,
+            bias,
+            padding=0,
+            groups=weight.shape[0],
+        )
+        seq_out = seq_out[..., -seq_x.shape[-1] :].to(dtype=x.dtype)
+        if activation in ("silu", "swish"):
+            seq_out = F.silu(seq_out)
+
+        out[:, bos:eos] = seq_out.squeeze(0)
+        conv_states[slot, :, :state_len].copy_(conv_input[..., -state_len:].squeeze(0))
+
+    return out
+
+
+# for decode
+def causal_conv1d_update_torch(
+    x: torch.Tensor,
+    conv_state: torch.Tensor,
+    weight: torch.Tensor,
+    bias: torch.Tensor | None = None,
+    activation: str | None = None,
+) -> torch.Tensor:
+    assert activation in {None, "silu", "swish"}
+
+    _, dim, seq_len = x.shape
+    state_len = conv_state.shape[-1]
+
+    x_new = torch.cat([conv_state, x], dim=-1).to(weight.dtype)
+    conv_state.copy_(x_new[:, :, -state_len:])
+
+    out = F.conv1d(
+        x_new,
+        weight.unsqueeze(1),
+        bias,
+        padding=0,
+        groups=dim,
+    )[:, :, -seq_len:]
+    if activation in ("silu", "swish"):
+        out = F.silu(out)
+    return out
diff --git a/vllm/model_executor/layers/mamba/ops/cpu/gdn_attention.py b/vllm/model_executor/layers/mamba/ops/cpu/gdn_attention.py
new file mode 100644
index 000000000000..166bd43bbddf
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/cpu/gdn_attention.py
@@ -0,0 +1,348 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import torch
+
+import vllm._custom_ops as ops
+from vllm.forward_context import ForwardContext, get_forward_context
+from vllm.model_executor.layers.mamba.mamba_utils import is_conv_state_dim_first
+from vllm.model_executor.layers.mamba.ops.cpu.causal_conv1d import (
+    causal_conv1d_torch,
+    causal_conv1d_update_torch,
+)
+from vllm.model_executor.layers.mamba.ops.cpu.recurrent_gated_delta_rule import (
+    chunk_gated_delta_rule,
+    gdn_gating,
+    recurrent_gated_delta_rule,
+)
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
+from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
+
+_CPU_GDN_ATTENTION_OPS_REGISTERED = False
+
+
+def cpu_gdn_attention_core(
+    mixed_qkv: torch.Tensor,
+    b: torch.Tensor,
+    a: torch.Tensor,
+    core_attn_out: torch.Tensor,
+    layer_name: LayerNameType,
+) -> None:
+    """CPU custom op for the core GDN attention computation."""
+    layer_name = _resolve_layer_name(layer_name)
+    forward_context: ForwardContext = get_forward_context()
+    layer = forward_context.no_compile_layers[layer_name]
+
+    attn_metadata = forward_context.attn_metadata
+
+    if attn_metadata is None:
+        return
+
+    assert isinstance(attn_metadata, dict)
+    attn_metadata_i = attn_metadata[layer.prefix]
+    assert isinstance(attn_metadata_i, GDNAttentionMetadata)
+
+    if attn_metadata_i.num_actual_tokens == 0:
+        return
+
+    assert (
+        attn_metadata_i.spec_sequence_masks is None
+        and attn_metadata_i.num_accepted_tokens is None
+    ), "speculative decode not supported in CPU GDN attention."
+
+    if torch.cpu._is_amx_tile_supported():
+        return cpu_gdn_attention_core_amx(
+            mixed_qkv,
+            b,
+            a,
+            core_attn_out,
+            attn_metadata_i,
+            layer,
+        )
+
+    state_indices_tensor = attn_metadata_i.non_spec_state_indices_tensor
+    query_start_loc = attn_metadata_i.non_spec_query_start_loc
+    assert state_indices_tensor is not None
+    assert query_start_loc is not None
+
+    # [num_allocated_slots, conv_dim, kernel - 1]
+    conv_state = layer.kv_cache[0]
+    if not is_conv_state_dim_first():
+        conv_state = conv_state.transpose(-1, -2)
+
+    # [num_allocated_slots, num_v_heads / tp_size, v_dim, k_dim]
+    ssm_state = layer.kv_cache[1]
+
+    num_decodes = attn_metadata_i.num_decodes
+    num_decode_tokens = attn_metadata_i.num_decode_tokens
+    num_prefills = attn_metadata_i.num_prefills
+    num_prefill_tokens = attn_metadata_i.num_prefill_tokens
+
+    conv_weights = layer.conv1d.weight.view(
+        layer.conv1d.weight.size(0), layer.conv1d.weight.size(2)
+    )
+
+    # all decode requests (batched)
+    if num_decodes > 0:
+        decode_mixed_qkv = mixed_qkv[:num_decode_tokens]
+        decode_b = b[:num_decode_tokens]
+        decode_a = a[:num_decode_tokens]
+        decode_state_indices = state_indices_tensor[:num_decodes]
+        decode_conv_state = conv_state[decode_state_indices].contiguous()
+
+        decode_mixed_qkv = causal_conv1d_update_torch(
+            # [B, dim] -> [B, dim, 1]
+            x=decode_mixed_qkv.unsqueeze(-1),
+            conv_state=decode_conv_state,
+            weight=conv_weights,
+            bias=layer.conv1d.bias,
+            activation=layer.activation,
+        ).squeeze(-1)
+        conv_state[decode_state_indices] = decode_conv_state
+
+        query, key, value = layer.rearrange_mixed_qkv(decode_mixed_qkv)
+
+        # [1, L, H, D] -> [B, 1, H, D] for batched decode
+        query = query.transpose(0, 1).contiguous()
+        key = key.transpose(0, 1).contiguous()
+        value = value.transpose(0, 1).contiguous()
+
+        g, beta_output = gdn_gating(
+            A_log=layer.A_log,
+            a=decode_a,
+            b=decode_b,
+            dt_bias=layer.dt_bias,
+        )
+        if g.ndim == 2:
+            g = g.unsqueeze(1)
+            beta_output = beta_output.unsqueeze(1)
+
+        initial_state = ssm_state[decode_state_indices].contiguous()
+        attn_out, last_recurrent_state = recurrent_gated_delta_rule(
+            query=query,
+            key=key,
+            value=value,
+            g=g,
+            beta=beta_output,
+            initial_state=initial_state,
+            scale=None,
+            use_qk_l2norm_in_kernel=True,
+        )
+        ssm_state[decode_state_indices] = last_recurrent_state.to(
+            ssm_state.dtype
+        ).contiguous()
+        core_attn_out[:num_decode_tokens] = attn_out.squeeze(1)
+
+    # all prefill requests: (varlen) currently naively loops over sequences
+    if num_prefills > 0:
+        has_initial_state = attn_metadata_i.has_initial_state
+        assert has_initial_state is not None
+
+        prefill_token_start = num_decode_tokens
+        prefill_token_end = prefill_token_start + num_prefill_tokens
+        prefill_mixed_qkv = mixed_qkv[prefill_token_start:prefill_token_end]
+        prefill_b = b[prefill_token_start:prefill_token_end]
+        prefill_a = a[prefill_token_start:prefill_token_end]
+        prefill_state_indices = state_indices_tensor[
+            num_decodes : num_decodes + num_prefills
+        ]
+        prefill_query_start_loc = (
+            query_start_loc[num_decodes : num_decodes + num_prefills + 1]
+            - num_decode_tokens
+        )
+        prefill_has_initial_state = has_initial_state[
+            num_decodes : num_decodes + num_prefills
+        ]
+
+        prefill_mixed_qkv = causal_conv1d_torch(
+            x=prefill_mixed_qkv.transpose(0, 1),
+            weight=conv_weights,
+            bias=layer.conv1d.bias,
+            conv_states=conv_state,
+            query_start_loc=prefill_query_start_loc,
+            cache_indices=prefill_state_indices,
+            has_initial_state=prefill_has_initial_state,
+            activation=layer.activation,
+        ).transpose(0, 1)
+
+        query, key, value = layer.rearrange_mixed_qkv(prefill_mixed_qkv)
+        g, beta = gdn_gating(layer.A_log, prefill_a, prefill_b, layer.dt_bias)
+        if g.ndim == 2:
+            g = g.unsqueeze(0)
+            beta = beta.unsqueeze(0)
+
+        initial_state = ssm_state[prefill_state_indices].contiguous()
+        initial_state[~prefill_has_initial_state, ...] = 0
+        attn_out, last_recurrent_state = chunk_gated_delta_rule(
+            q=query,
+            k=key,
+            v=value,
+            g=g,
+            beta=beta,
+            scale=None,
+            initial_state=initial_state,
+            cu_seqlens=prefill_query_start_loc,
+            use_qk_l2norm_in_kernel=True,
+        )
+        ssm_state[prefill_state_indices] = last_recurrent_state.to(ssm_state.dtype)
+        core_attn_out[prefill_token_start:prefill_token_end] = attn_out.squeeze(0)
+
+
+def cpu_gdn_attention_core_fake(
+    mixed_qkv: torch.Tensor,
+    b: torch.Tensor,
+    a: torch.Tensor,
+    core_attn_out: torch.Tensor,
+    layer_name: LayerNameType,
+) -> None:
+    """Fake implementation for torch.compile."""
+    return
+
+
+def cpu_gdn_attention_core_amx(
+    mixed_qkv: torch.Tensor,
+    b: torch.Tensor,
+    a: torch.Tensor,
+    core_attn_out: torch.Tensor,
+    attn_metadata_i: GDNAttentionMetadata,
+    layer: torch.nn.Module,
+):
+    state_indices_tensor = attn_metadata_i.non_spec_state_indices_tensor
+    query_start_loc = attn_metadata_i.non_spec_query_start_loc
+    assert state_indices_tensor is not None
+    assert query_start_loc is not None
+
+    # [num_allocated_slots, kernel - 1, conv_dim]
+    conv_state = layer.kv_cache[0]
+    if is_conv_state_dim_first():
+        raise RuntimeError("AMX GDN attention requires `SD` conv_state layout.")
+    # reshape to [num_allocated_slots, conv_dim, kernel - 1]
+    conv_state_t = conv_state.transpose(1, 2)
+
+    # [num_allocated_slots, num_v_heads / tp_size, v_dim, k_dim]
+    ssm_state = layer.kv_cache[1]
+    # rehape to [num_allocated_slots, num_v_heads / tp_size, k_dim, v_dim]
+    num_allocated_slots, head_num, v_dim, k_dim = ssm_state.size()
+    ssm_state = ssm_state.view(
+        num_allocated_slots,
+        head_num,
+        k_dim,
+        v_dim,
+    )
+
+    mixed_qkv = mixed_qkv.contiguous()
+    a = a.contiguous()
+    b = b.contiguous()
+
+    num_decodes = attn_metadata_i.num_decodes
+    num_decode_tokens = attn_metadata_i.num_decode_tokens
+    num_prefills = attn_metadata_i.num_prefills
+    num_prefill_tokens = attn_metadata_i.num_prefill_tokens
+
+    if num_decodes > 0:
+        decode_mixed_qkv = mixed_qkv[:num_decode_tokens]
+        decode_b = b[:num_decode_tokens]
+        decode_a = a[:num_decode_tokens]
+        decode_state_indices = state_indices_tensor[:num_decodes]
+
+        decode_mixed_qkv = ops.causal_conv1d_update_cpu(
+            x=decode_mixed_qkv,
+            conv_states=conv_state_t,
+            weight=layer.conv1d.weight,
+            bias=layer.conv1d.bias,
+            silu_activation=layer.activation == "silu",
+            conv_state_indices=decode_state_indices,
+            is_vnni=True,
+        )
+
+        query, key, value = layer.rearrange_mixed_qkv(decode_mixed_qkv)
+        attn_out = ops.fused_sigmoid_gating_delta_rule_update_cpu(
+            A_log=layer.A_log,
+            dt_bias=layer.dt_bias,
+            q=query,
+            k=key,
+            v=value,
+            a=decode_a,
+            b=decode_b,
+            initial_state_source=ssm_state,
+            initial_state_indices=decode_state_indices,
+            cu_seqlens=query_start_loc[: num_decodes + 1],
+            use_qk_l2norm_in_kernel=True,
+        )
+        core_attn_out[:num_decode_tokens] = attn_out.squeeze(1)
+
+    if num_prefills > 0:
+        has_initial_state = attn_metadata_i.has_initial_state
+        assert has_initial_state is not None
+
+        prefill_token_start = num_decode_tokens
+        prefill_token_end = prefill_token_start + num_prefill_tokens
+        prefill_mixed_qkv = mixed_qkv[prefill_token_start:prefill_token_end]
+        prefill_b = b[prefill_token_start:prefill_token_end]
+        prefill_a = a[prefill_token_start:prefill_token_end]
+        prefill_state_indices = state_indices_tensor[
+            num_decodes : num_decodes + num_prefills
+        ]
+        prefill_query_start_loc = (
+            query_start_loc[num_decodes : num_decodes + num_prefills + 1]
+            - num_decode_tokens
+        )
+        prefill_has_initial_state = has_initial_state[
+            num_decodes : num_decodes + num_prefills
+        ]
+
+        prefill_mixed_qkv = ops.causal_conv1d_fwd_cpu(
+            x=prefill_mixed_qkv.transpose(0, 1),
+            weight=layer.conv1d.weight,
+            bias=layer.conv1d.bias,
+            conv_states=conv_state_t,
+            query_start_loc=prefill_query_start_loc,
+            cache_indices=prefill_state_indices,
+            has_initial_state=prefill_has_initial_state,
+            silu_activation=layer.activation == "silu",
+            is_vnni=True,
+        ).transpose(0, 1)
+
+        query, key, value = layer.rearrange_mixed_qkv(prefill_mixed_qkv)
+        g, beta = ops.fused_gdn_gating_cpu(
+            A_log=layer.A_log, a=prefill_a, b=prefill_b, dt_bias=layer.dt_bias
+        )
+
+        initial_state = ssm_state[prefill_state_indices]
+        initial_state[~prefill_has_initial_state, ...] = 0
+        attn_out, last_recurrent_state = ops.chunk_gated_delta_rule_cpu(
+            query=query,
+            key=key,
+            value=value,
+            g=g,
+            beta=beta,
+            initial_state=initial_state,
+            output_final_state=True,
+            cu_seqlens=prefill_query_start_loc,
+            head_first=False,
+            use_qk_l2norm_in_kernel=True,
+        )
+        ssm_state[prefill_state_indices] = last_recurrent_state.to(
+            ssm_state.dtype, copy=False
+        )
+        core_attn_out[prefill_token_start:prefill_token_end] = attn_out.squeeze(0)
+
+
+def register_cpu_gdn_attention_ops() -> None:
+    global _CPU_GDN_ATTENTION_OPS_REGISTERED
+    if _CPU_GDN_ATTENTION_OPS_REGISTERED:
+        return
+
+    direct_register_custom_op(
+        op_name="cpu_gdn_attention_core",
+        op_func=cpu_gdn_attention_core,
+        mutates_args=["core_attn_out"],
+        fake_impl=cpu_gdn_attention_core_fake,
+    )
+    _CPU_GDN_ATTENTION_OPS_REGISTERED = True
diff --git a/vllm/model_executor/layers/mamba/ops/cpu/recurrent_gated_delta_rule.py b/vllm/model_executor/layers/mamba/ops/cpu/recurrent_gated_delta_rule.py
new file mode 100644
index 000000000000..30fca3423a38
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/cpu/recurrent_gated_delta_rule.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+import torch.nn.functional as F
+
+
+def l2norm(
+    x: torch.Tensor,
+    dim: int = -1,
+    eps: float = 1e-6,
+) -> torch.Tensor:
+    inv_norm = torch.rsqrt((x * x).sum(dim=dim, keepdim=True) + eps)
+    return x * inv_norm
+
+
+def recurrent_gated_delta_rule(
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    g: torch.Tensor,
+    beta: torch.Tensor,
+    initial_state: torch.Tensor,
+    scale: float | None = None,
+    use_qk_l2norm_in_kernel: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    initial_dtype = query.dtype
+    if use_qk_l2norm_in_kernel:
+        query = l2norm(query, dim=-1, eps=1e-6)
+        key = l2norm(key, dim=-1, eps=1e-6)
+
+    if query.shape[2] != value.shape[2]:
+        repeat_factor = value.shape[2] // query.shape[2]
+        query = query.repeat_interleave(repeat_factor, dim=2)
+        key = key.repeat_interleave(repeat_factor, dim=2)
+
+    query, key, value, beta, g = [
+        x.transpose(1, 2).contiguous().to(torch.float32)
+        for x in (query, key, value, beta, g)
+    ]
+
+    batch_size, num_heads, sequence_length, _ = key.shape
+    v_head_dim = value.shape[-1]
+    if scale is None:
+        scale = 1 / (query.shape[-1] ** 0.5)
+    query = query * scale
+
+    core_attn_out = torch.empty(
+        batch_size,
+        num_heads,
+        sequence_length,
+        v_head_dim,
+        dtype=value.dtype,
+    )
+    last_recurrent_state = initial_state.to(value)
+
+    for token_idx in range(sequence_length):
+        q_t = query[:, :, token_idx]
+        k_t = key[:, :, token_idx]
+        v_t = value[:, :, token_idx]
+        g_t = g[:, :, token_idx].exp().unsqueeze(-1).unsqueeze(-1)
+        beta_t = beta[:, :, token_idx].unsqueeze(-1)
+
+        last_recurrent_state = last_recurrent_state * g_t
+        kv_mem = (last_recurrent_state * k_t.unsqueeze(-2)).sum(dim=-1)
+        delta = (v_t - kv_mem) * beta_t
+        last_recurrent_state = last_recurrent_state + delta.unsqueeze(
+            -1
+        ) * k_t.unsqueeze(-2)
+        core_attn_out[:, :, token_idx] = (last_recurrent_state * q_t.unsqueeze(-2)).sum(
+            dim=-1
+        )
+
+    core_attn_out = core_attn_out.transpose(1, 2).contiguous().to(initial_dtype)
+    return core_attn_out, last_recurrent_state
+
+
+def gdn_gating(
+    A_log: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    dt_bias: torch.Tensor,
+    beta: float = 1.0,
+    threshold: float = 20.0,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    softplus_x = F.softplus(a.float() + dt_bias.float(), beta=beta, threshold=threshold)
+    g = -torch.exp(A_log.float()) * softplus_x
+    beta_output = torch.sigmoid(b.float()).to(dtype=b.dtype)
+    return g, beta_output
+
+
+def chunk_gated_delta_rule(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    g: torch.Tensor,
+    beta: torch.Tensor,
+    *,
+    initial_state: torch.Tensor,
+    scale: float | None = None,
+    cu_seqlens: torch.Tensor,
+    use_qk_l2norm_in_kernel: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    output = torch.empty_like(v)
+    state_dtype = initial_state.dtype
+    chunk_size = 128
+    sequence_bounds = [
+        (
+            seq_idx,
+            int(cu_seqlens[seq_idx].item()),
+            int(cu_seqlens[seq_idx + 1].item()),
+        )
+        for seq_idx in range(len(cu_seqlens) - 1)
+    ]
+    chunk_eye = torch.eye(chunk_size, dtype=torch.float32)
+    num_sequences = len(sequence_bounds)
+    num_value_heads = v.shape[2]
+    value_head_dim = v.shape[3]
+    key_head_dim = k.shape[3]
+    final_state = torch.empty(
+        (num_sequences, num_value_heads, value_head_dim, key_head_dim),
+        dtype=state_dtype,
+    )
+
+    for seq_idx, begin, end in sequence_bounds:
+        q_seq = q[:, begin:end]
+        k_seq = k[:, begin:end]
+        v_seq = v[:, begin:end]
+        g_seq = g[:, begin:end]
+        beta_seq = beta[:, begin:end]
+
+        initial_dtype = q_seq.dtype
+        if use_qk_l2norm_in_kernel:
+            q_seq = l2norm(q_seq, dim=-1, eps=1e-6)
+            k_seq = l2norm(k_seq, dim=-1, eps=1e-6)
+
+        num_qk_heads = q_seq.shape[2]
+        num_value_heads = v_seq.shape[2]
+        if num_qk_heads != num_value_heads:
+            repeat_factor = num_value_heads // num_qk_heads
+            q_seq = q_seq.repeat_interleave(repeat_factor, dim=2)
+            k_seq = k_seq.repeat_interleave(repeat_factor, dim=2)
+
+        q_seq, k_seq, v_seq, beta_seq, g_seq = [
+            x.transpose(1, 2).contiguous().to(torch.float32)
+            for x in (q_seq, k_seq, v_seq, beta_seq, g_seq)
+        ]
+        seq_batch_size, num_heads, seq_len, qk_head_dim = q_seq.shape
+        value_head_dim = v_seq.shape[-1]
+
+        if scale is None:
+            scale = 1 / (qk_head_dim**0.5)
+
+        q_seq = q_seq * scale
+
+        seq_state = initial_state[seq_idx : seq_idx + 1].to(v_seq)
+        seq_output = torch.empty(
+            seq_batch_size,
+            num_heads,
+            seq_len,
+            value_head_dim,
+            dtype=v_seq.dtype,
+        )
+
+        for chunk_start in range(0, seq_len, chunk_size):
+            chunk_end = min(chunk_start + chunk_size, seq_len)
+            q_chunk = q_seq[:, :, chunk_start:chunk_end]
+            k_chunk = k_seq[:, :, chunk_start:chunk_end]
+            v_chunk = v_seq[:, :, chunk_start:chunk_end]
+            beta_chunk = beta_seq[:, :, chunk_start:chunk_end]
+            g_chunk = g_seq[:, :, chunk_start:chunk_end]
+            chunk_len = chunk_end - chunk_start
+
+            cum_g = g_chunk.cumsum(dim=-1)
+            exp_cum_g = cum_g.exp()
+            decay = (cum_g.unsqueeze(-1) - cum_g.unsqueeze(-2)).exp()
+
+            interaction = (k_chunk * beta_chunk.unsqueeze(-1)) @ k_chunk.transpose(
+                -1, -2
+            )
+            interaction = torch.tril(interaction * decay, diagonal=-1)
+            system = interaction + chunk_eye[:chunk_len, :chunk_len]
+
+            solved_values = torch.linalg.solve_triangular(
+                system,
+                v_chunk * beta_chunk.unsqueeze(-1),
+                upper=False,
+            )
+            solved_keys = torch.linalg.solve_triangular(
+                system,
+                (k_chunk * beta_chunk.unsqueeze(-1)) * exp_cum_g.unsqueeze(-1),
+                upper=False,
+            )
+
+            incoming_memory = torch.einsum("bhvk,bhck->bhcv", seq_state, solved_keys)
+            transformed_values = solved_values - incoming_memory
+
+            # Each chunk contributes both from the incoming recurrent state and
+            # from its own in-chunk interactions.
+            inter_chunk = torch.einsum(
+                "bhvk,bhck->bhcv",
+                seq_state,
+                q_chunk * exp_cum_g.unsqueeze(-1),
+            )
+            intra_chunk = torch.tril((q_chunk @ k_chunk.transpose(-1, -2)) * decay)
+            seq_output[:, :, chunk_start:chunk_end] = (
+                inter_chunk + intra_chunk @ transformed_values
+            )
+
+            # Carry the recurrent state forward to the next chunk boundary.
+            end_decay = (cum_g[:, :, -1:] - cum_g).exp().unsqueeze(-1)
+            decayed_keys = k_chunk * end_decay
+            seq_state = seq_state * exp_cum_g[:, :, -1, None, None] + torch.einsum(
+                "bhcv,bhck->bhvk", transformed_values, decayed_keys
+            )
+
+        output[0, begin:end].copy_(
+            seq_output.transpose(1, 2).contiguous().to(initial_dtype).squeeze(0)
+        )
+        final_state[seq_idx].copy_(seq_state.squeeze(0).to(state_dtype).contiguous())
+
+    return output, final_state
diff --git a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
index 1cd077758326..2aef33375771 100644
--- a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
+++ b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
@@ -4,16 +4,190 @@
 # Copyright (c) 2024, Tri Dao, Albert Gu.
 # Adapted from https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/selective_state_update.py
 
+import functools
+import json
+import os
+from contextlib import contextmanager
+from typing import Any
+
 import torch
 from packaging import version
 
+import vllm.envs as envs
 from vllm import _custom_ops as ops
+from vllm.logger import init_logger
 from vllm.model_executor.layers.mamba.ops.triton_helpers import fast_exp
+from vllm.platforms import current_platform
 from vllm.triton_utils import HAS_TRITON, tl, triton
-from vllm.v1.attention.backends.utils import PAD_SLOT_ID
+from vllm.v1.attention.backends.utils import NULL_BLOCK_ID
+
+logger = init_logger(__name__)
 
 TRITON3 = HAS_TRITON and (version.parse(triton.__version__) >= version.parse("3.0.0"))
 
+
+# ---------------------------------------------------------------------------
+# JSON config loading
+# ---------------------------------------------------------------------------
+
+_CONFIGS_DIR = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), "configs", "selective_state_update"
+)
+
+
+def get_ssm_config_file_name(
+    headdim: int, dstate: int, cache_dtype: str, device_name: str
+) -> str:
+    """Return the JSON filename for the given kernel shape.
+
+    Layout: ``configs/selective_state_update/
+    headdim=<H>,dstate=<D>,device_name=<dev>,cache_dtype=<dt>.json``.
+    """
+    return (
+        f"headdim={headdim},dstate={dstate},"
+        f"device_name={device_name},cache_dtype={cache_dtype}.json"
+    )
+
+
+def get_ssm_device_name() -> str:
+    return current_platform.get_device_name().replace(" ", "_")
+
+
+def _canonical_cache_dtype(cache_dtype: str) -> str:
+    """Canonical key for config lookup. bf16 and fp16 share the same tuned
+    configs because the kernel only sees bit width when accessing state."""
+    return "float16" if cache_dtype == "bfloat16" else cache_dtype
+
+
+@functools.cache
+def get_ssm_configs(
+    headdim: int, dstate: int, cache_dtype: str
+) -> dict[int, Any] | None:
+    """
+    Return tuned (BLOCK_SIZE_M, num_warps) configs for *selective_state_update*
+    keyed by ``effective_batch = batch * nheads``, or ``None`` if no config
+    file is found for the (headdim, dstate, cache_dtype, device) combination.
+
+    They can be generated with:
+        benchmarks/kernels/benchmark_selective_state_update.py --save-configs
+    """
+    cache_dtype = _canonical_cache_dtype(cache_dtype)
+    device_name = get_ssm_device_name()
+    json_file_name = get_ssm_config_file_name(headdim, dstate, cache_dtype, device_name)
+
+    config_file_paths: list[str] = []
+
+    # User-supplied override
+    user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
+    if user_defined_config_folder is not None:
+        config_file_paths.append(
+            os.path.join(user_defined_config_folder, json_file_name)
+        )
+
+    # Bundled default
+    config_file_paths.append(os.path.join(_CONFIGS_DIR, json_file_name))
+
+    for path in config_file_paths:
+        if os.path.exists(path):
+            with open(path) as f:
+                logger.info_once(
+                    "Using SSM config from %s for selective_state_update.",
+                    path,
+                    scope="global",
+                )
+                raw = json.load(f)
+                if isinstance(raw, dict):
+                    # triton_version included in the config file only for reference
+                    raw.pop("triton_version", None)
+                    return {int(k): v for k, v in raw.items() if k.isdigit()}
+
+    logger.warning_once(
+        "Using default Mamba SSU config. Performance might be sub-optimal! "
+        "Config file not found at %s",
+        ", ".join(config_file_paths),
+    )
+    return None
+
+
+def _get_default_ssm_launch_config(
+    dstate: int,
+    is_blackwell: bool,
+) -> tuple[int, int]:
+    """Hard-coded fallback heuristic used when no tuned config is available."""
+    BLOCK_SIZE_M, num_warps = 4, 8
+    if dstate <= 16:
+        BLOCK_SIZE_M, num_warps = 32, 4
+    elif dstate <= 32:
+        BLOCK_SIZE_M, num_warps = 16, 4
+    elif dstate <= 64:
+        BLOCK_SIZE_M, num_warps = 8, 4
+    else:
+        if is_blackwell:
+            BLOCK_SIZE_M, num_warps = 32, 8
+        elif dstate <= 128:
+            BLOCK_SIZE_M, num_warps = 4, 4
+    return BLOCK_SIZE_M, num_warps
+
+
+@functools.cache
+def _try_get_optimal_ssm_config_cached(
+    headdim: int,
+    dstate: int,
+    batch: int,
+    nheads: int,
+    cache_dtype: str,
+    is_blackwell: bool,
+) -> tuple[int, int]:
+    """Cached resolution. See :func:`try_get_optimal_ssm_config`."""
+    effective_batch = batch * nheads
+    configs = get_ssm_configs(headdim, dstate, cache_dtype)
+    if configs:
+        # Pick the closest effective_batch in the tuned grid (MoE strategy).
+        closest = min(configs.keys(), key=lambda x: abs(x - effective_batch))
+        cfg = configs[closest]
+        return cfg["BLOCK_SIZE_M"], cfg["num_warps"]
+
+    return _get_default_ssm_launch_config(dstate, is_blackwell)
+
+
+# Override hook for benchmarks/tests, see `override_ssm_config`.
+_ssm_config_override: tuple[int, int] | None = None
+
+
+@contextmanager
+def override_ssm_config(config: tuple[int, int]):
+    """Pin ``try_get_optimal_ssm_config`` to ``config`` for the duration of
+    the context. Used by the tuning benchmark to time specific configs."""
+    global _ssm_config_override
+    prev = _ssm_config_override
+    _ssm_config_override = config
+    try:
+        yield
+    finally:
+        _ssm_config_override = prev
+
+
+def try_get_optimal_ssm_config(
+    headdim: int,
+    dstate: int,
+    batch: int,
+    nheads: int,
+    cache_dtype: str,
+    is_blackwell: bool,
+) -> tuple[int, int]:
+    """Return (BLOCK_SIZE_M, num_warps) for the given kernel shape.
+
+    Tuning is keyed on ``effective_batch = batch * nheads`` (the kernel grid
+    scales with the product), so configs transfer across (model, TP) combos
+    sharing ``(headdim, dstate, cache_dtype)``.
+    """
+    if _ssm_config_override is not None:
+        return _ssm_config_override
+    return _try_get_optimal_ssm_config_cached(
+        headdim, dstate, batch, nheads, cache_dtype, is_blackwell
+    )
+
+
 if TRITON3:
 
     @triton.jit
@@ -28,6 +202,21 @@ def softplus(dt):
         return dt
 
 
+@triton.jit
+def convert_rs_fp16x2(x: tl.tensor, rand: tl.tensor) -> tl.tensor:
+    y = tl.inline_asm_elementwise(
+        asm="""{
+cvt.rs.f16x2.f32 $0, $2, $1, $3;
+}""",
+        constraints="=r,r,r,r,r",
+        args=(x, rand),
+        dtype=tl.float16,
+        is_pure=True,
+        pack=2,
+    )
+    return y
+
+
 @triton.heuristics({"HAS_DT_BIAS": lambda args: args["dt_bias_ptr"] is not None})
 @triton.heuristics({"HAS_D": lambda args: args["D_ptr"] is not None})
 @triton.heuristics({"HAS_Z": lambda args: args["z_ptr"] is not None})
@@ -48,6 +237,7 @@ def softplus(dt):
 def _selective_scan_update_kernel(
     # Pointers to matrices
     state_ptr,
+    rand_seed_ptr,
     x_ptr,
     dt_ptr,
     dt_bias_ptr,
@@ -59,7 +249,7 @@ def _selective_scan_update_kernel(
     out_ptr,
     state_batch_indices_ptr,
     dst_state_batch_indices_ptr,
-    pad_slot_id,
+    null_block_id,
     num_accepted_tokens_ptr,
     cu_seqlens_ptr,
     # Matrix dimensions
@@ -113,6 +303,8 @@ def _selective_scan_update_kernel(
     IS_SPEC_DECODING: tl.constexpr,
     IS_VARLEN: tl.constexpr,
     BLOCK_SIZE_DSTATE: tl.constexpr,
+    USE_RS_ROUNDING: tl.constexpr,
+    PHILOX_ROUNDS: tl.constexpr,
 ):
     pid_m = tl.program_id(axis=0)
     pid_b = tl.program_id(axis=1)
@@ -185,7 +377,7 @@ def _selective_scan_update_kernel(
 
     mask = (offs_m[:, None] < dim) & (offs_n[None, :] < dstate)
     if HAS_STATE_BATCH_INDICES:
-        mask &= state_batch_idx != pad_slot_id
+        mask &= state_batch_idx != null_block_id
     state = tl.load(state_ptrs, mask=mask, other=0.0).to(tl.float32)
 
     if HAS_DT_BIAS:
@@ -239,7 +431,7 @@ def _selective_scan_update_kernel(
         if IS_SPEC_DECODING:
             dst_idx_ptr = dst_state_batch_indices_ptr + i_t * stride_dst_state_indices_T
             token_dst_idx = tl.load(dst_idx_ptr).to(tl.int64)
-            if token_dst_idx != pad_slot_id:
+            if token_dst_idx != null_block_id:
                 token_dst_ptrs = (
                     state_ptr_base
                     + token_dst_idx * stride_state_batch
@@ -267,7 +459,35 @@ def _selective_scan_update_kernel(
             z_ptr += stride_z_batch
 
     if not IS_SPEC_DECODING:
-        tl.store(dst_state_ptrs, state.to(dst_state_ptrs.dtype.element_ty), mask=mask)
+        if USE_RS_ROUNDING:
+            # Load random seed
+            rand_seed = tl.load(rand_seed_ptr)
+            # Generate random offsets for each element in state
+            if HAS_STATE_BATCH_INDICES:
+                rand_offsets = (
+                    state_batch_idx * stride_state_batch + pid_h * stride_state_head
+                )
+            else:
+                rand_offsets = pid_b * stride_state_batch + pid_h * stride_state_head
+            rand_offsets += (
+                offs_m[:, None] * stride_state_dim
+                + offs_n[None, :] * stride_state_dstate
+            )
+            # Generate random 32-bits for each element in state
+            if PHILOX_ROUNDS > 0:
+                rand = tl.randint(rand_seed, rand_offsets, PHILOX_ROUNDS)
+            else:
+                rand = tl.randint(rand_seed, rand_offsets)
+            # Convert state to fp16 with RS rounding
+            state = convert_rs_fp16x2(state, rand)
+            tl.static_assert(state.dtype == tl.float16, "state must be fp16")
+            tl.static_assert(
+                dst_state_ptrs.dtype.element_ty == tl.float16,
+                "dst_state_ptrs must be fp16",
+            )
+        else:
+            state = state.to(dst_state_ptrs.dtype.element_ty)
+        tl.store(dst_state_ptrs, state, mask=mask)
 
 
 def selective_state_update(
@@ -277,17 +497,19 @@ def selective_state_update(
     A,
     B,
     C,
-    D=None,
+    D,
+    dt_bias,
     z=None,
-    dt_bias=None,
     dt_softplus=False,
     state_batch_indices=None,
     dst_state_batch_indices=None,
-    pad_slot_id=PAD_SLOT_ID,
+    null_block_id=NULL_BLOCK_ID,
     out=None,
     num_accepted_tokens=None,
     cu_seqlens=None,
     is_blackwell=False,
+    enable_stochastic_rounding=False,
+    cache_philox_rounds=0,
 ):
     """
     Argument:
@@ -300,12 +522,12 @@ def selective_state_update(
         D: (dim,) or (nheads, dim)
         z: (batch, dim) or (batch, nheads, dim)
         dt_bias: (dim,) or (nheads, dim)
-        pad_slot_id: int
-            if cache_indices is passed, lets the kernel identify padded
-            entries that will not be processed,
-            for example: cache_indices = [pad_slot_id, 1, 20, pad_slot_id]
-            in this case, the kernel will not process entries at
-            indices 0 and 3
+        null_block_id: int
+            if state_batch_indices is passed, lets the kernel identify
+            padded entries that will not be processed,
+            for example: state_batch_indices = [null_block_id, 1, 20,
+            null_block_id] in this case, the kernel will not process
+            entries at indices 0 and 3
         out: Preallocated ssm output tensor. Assume same shape as x.
              In-place updated.
         num_accepted_tokens: (batch,)
@@ -326,11 +548,11 @@ def selective_state_update(
         B = B.unsqueeze(1)
     if C.dim() == 2:
         C = C.unsqueeze(1)
-    if D is not None and D.dim() == 1:
+    if D.dim() == 1:
         D = D.unsqueeze(0)
     if z is not None and z.dim() == 2:
         z = z.unsqueeze(1)
-    if dt_bias is not None and dt_bias.dim() == 1:
+    if dt_bias.dim() == 1:
         dt_bias = dt_bias.unsqueeze(0)
     if out.dim() == 2:
         out = out.unsqueeze(1)
@@ -362,12 +584,10 @@ def selective_state_update(
     assert nheads % ngroups == 0, "nheads must be divisible by ngroups"
     assert B.shape == (batch, ngroups, dstate)
     assert C.shape == B.shape
-    if D is not None:
-        assert D.shape == (nheads, dim)
+    assert D.shape == (nheads, dim)
     if z is not None:
         assert z.shape == x.shape
-    if dt_bias is not None:
-        assert dt_bias.shape == (nheads, dim)
+    assert dt_bias.shape == (nheads, dim)
     if state_batch_indices is not None:
         assert state_batch_indices.shape[0] >= N
         assert state_batch_indices.shape[1] >= max_seqlen
@@ -394,24 +614,11 @@ def selective_state_update(
         else (0, 0)
     )
     # We don't want autotune since it will overwrite the state.
-    # We instead tune by hand based on dstate.
-
-    # Default
-    BLOCK_SIZE_M, num_warps = 4, 8
-
-    if dstate <= 16:
-        BLOCK_SIZE_M, num_warps = 32, 4
-    elif dstate <= 32:
-        BLOCK_SIZE_M, num_warps = 16, 4
-    elif dstate <= 64:
-        BLOCK_SIZE_M, num_warps = 8, 4
-    else:
-        # dstate > 64
-        if is_blackwell:
-            # Optimized for B200 with dstate>64
-            BLOCK_SIZE_M, num_warps = 32, 8
-        elif dstate <= 128:
-            BLOCK_SIZE_M, num_warps = 4, 4
+    # Load from JSON config if available, otherwise fall back to heuristic.
+    cache_dtype = str(state.dtype).removeprefix("torch.")
+    BLOCK_SIZE_M, num_warps = try_get_optimal_ssm_config(
+        dim, dstate, N, nheads, cache_dtype, is_blackwell
+    )
 
     tie_hdim = (
         A.stride(-1) == 0
@@ -419,9 +626,16 @@ def selective_state_update(
         and dt.stride(-1) == 0
         and dt_bias.stride(-1) == 0
     )
+    rand_seed = (
+        torch.randint(0, 2**32, (1,), device=state.device)
+        if enable_stochastic_rounding
+        else None
+    )
+
     with torch.accelerator.device_index(x.device.index):
         _selective_scan_update_kernel[grid](
             state,
+            rand_seed,
             x,
             dt,
             dt_bias,
@@ -433,7 +647,7 @@ def selective_state_update(
             out,
             state_batch_indices,
             dst_state_batch_indices,
-            pad_slot_id,
+            null_block_id,
             num_accepted_tokens,
             cu_seqlens,
             N,
@@ -451,7 +665,8 @@ def selective_state_update(
             dt.stride(0),
             dt.stride(1),
             dt.stride(2),
-            *(dt_bias.stride(0), dt_bias.stride(1)) if dt_bias is not None else 0,
+            dt_bias.stride(0),
+            dt_bias.stride(1),
             A.stride(0),
             A.stride(1),
             A.stride(2),
@@ -461,7 +676,8 @@ def selective_state_update(
             C.stride(0),
             C.stride(1),
             C.stride(2),
-            *(D.stride(0), D.stride(1)) if D is not None else 0,
+            D.stride(0),
+            D.stride(1),
             z_strides[0],
             z_strides[1],
             z_strides[2],
@@ -476,6 +692,8 @@ def selective_state_update(
             tie_hdim,
             BLOCK_SIZE_M,
             num_warps=num_warps,
+            USE_RS_ROUNDING=enable_stochastic_rounding,
+            PHILOX_ROUNDS=cache_philox_rounds,
         )
 
 
@@ -493,7 +711,7 @@ def selective_scan_fn(
     query_start_loc=None,
     cache_indices=None,
     has_initial_state=None,
-    pad_slot_id=PAD_SLOT_ID,
+    null_block_id=NULL_BLOCK_ID,
     block_size=1024,
     block_idx_first_scheduled_token=None,
     block_idx_last_scheduled_token=None,
@@ -531,10 +749,10 @@ def selective_scan_fn(
         indicate if the ssm_state at the corresponding index should be
         used as initial state. Not providing argument assumes
         there's no initial state
-    pad_slot_id: int
+    null_block_id: int
         if cache_indices is passed, lets the kernel identify padding entries
         that will not be processed,
-        for example: cache_indices = [pad_slot_id, 1 ,20 ,pad_slot_id]
+        for example: cache_indices = [null_block_id, 1 ,20 ,null_block_id]
         in this case, the kernel will not process entries at indices 0 and 3
     block_size: int
         The block size to align the cached states to
@@ -586,7 +804,7 @@ def selective_scan_fn(
         cache_indices,
         has_initial_state,
         ssm_states,
-        pad_slot_id,
+        null_block_id,
         block_size,
         block_idx_first_scheduled_token,
         block_idx_last_scheduled_token,
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
index 8057a8d32580..e5e73625f861 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
@@ -356,7 +356,7 @@ def _chunk_scan_fwd_kernel(
         )
         # If there's seq_idx, we already set cb[i, j] = 0 for seq_idx[i] != seq_idx[j].
         # So we don't need masking wrt seq_idx here.
-        cb *= fast_exp(dA_cs_m[:, None] - dA_cs_k[None, :])
+        cb *= fast_exp(tl.minimum(dA_cs_m[:, None] - dA_cs_k[None, :], 0.0))
         dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size - k, other=0.0).to(tl.float32)
         cb *= dt_k
         if IS_CAUSAL:
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
index 37532e6db95b..8402d5291e6b 100644
--- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
+++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
@@ -280,7 +280,7 @@ def _chunk_state_fwd_kernel(
         dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(
             tl.float32
         )
-        scale = fast_exp(dA_cs_last - dA_cs_k) * dt_k
+        scale = fast_exp(tl.minimum(dA_cs_last - dA_cs_k, 0.0)) * dt_k
         b *= scale[:, None]
         b = b.to(x_ptr.dtype.element_ty)
         acc += tl.dot(x, b)
diff --git a/vllm/model_executor/layers/mamba/ops/ssu_dispatch.py b/vllm/model_executor/layers/mamba/ops/ssu_dispatch.py
new file mode 100644
index 000000000000..92258ef204bd
--- /dev/null
+++ b/vllm/model_executor/layers/mamba/ops/ssu_dispatch.py
@@ -0,0 +1,278 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Dispatch module for Mamba selective state update (SSU) backends.
+
+Provides a unified `selective_state_update` function that dispatches to
+either the Triton or FlashInfer backend based on the configured
+`MambaBackendEnum`. Follows SGLang's dispatch pattern adapted for vLLM.
+"""
+
+from abc import ABC, abstractmethod
+
+import torch
+
+from vllm.config.mamba import MambaBackendEnum, MambaConfig
+from vllm.logger import init_logger
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+from vllm.v1.attention.backends.utils import NULL_BLOCK_ID
+from vllm.v1.kv_cache_interface import KVCacheConfig, MambaSpec
+
+logger = init_logger(__name__)
+
+
+class MambaSSUBackend(ABC):
+    """Abstract base class for Mamba SSU backends."""
+
+    def __init__(self, mamba_config: MambaConfig):
+        self._mamba_config = mamba_config
+
+    @property
+    @abstractmethod
+    def name(self) -> str: ...
+
+    @abstractmethod
+    def __call__(
+        self,
+        state: torch.Tensor,
+        x: torch.Tensor,
+        dt: torch.Tensor,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        C: torch.Tensor,
+        D: torch.Tensor,
+        dt_bias: torch.Tensor,
+        z: torch.Tensor | None = None,
+        dt_softplus: bool = False,
+        state_batch_indices: torch.Tensor | None = None,
+        dst_state_batch_indices: torch.Tensor | None = None,
+        null_block_id: int = NULL_BLOCK_ID,
+        out: torch.Tensor | None = None,
+        num_accepted_tokens: torch.Tensor | None = None,
+        cu_seqlens: torch.Tensor | None = None,
+        is_blackwell: bool = False,
+    ) -> None: ...
+
+
+class TritonSSUBackend(MambaSSUBackend):
+    """Triton-based SSU backend (vLLM's default)."""
+
+    def __init__(self, mamba_config: MambaConfig):
+        super().__init__(mamba_config)
+        from vllm.model_executor.layers.mamba.ops.mamba_ssm import (
+            selective_state_update as _triton_selective_state_update,
+        )
+
+        self._kernel = _triton_selective_state_update
+
+    @property
+    def name(self) -> str:
+        return "triton"
+
+    def __call__(
+        self,
+        state: torch.Tensor,
+        x: torch.Tensor,
+        dt: torch.Tensor,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        C: torch.Tensor,
+        D: torch.Tensor,
+        dt_bias: torch.Tensor,
+        z: torch.Tensor | None = None,
+        dt_softplus: bool = False,
+        state_batch_indices: torch.Tensor | None = None,
+        dst_state_batch_indices: torch.Tensor | None = None,
+        null_block_id: int = NULL_BLOCK_ID,
+        out: torch.Tensor | None = None,
+        num_accepted_tokens: torch.Tensor | None = None,
+        cu_seqlens: torch.Tensor | None = None,
+        is_blackwell: bool = False,
+    ) -> None:
+        self._kernel(
+            state,
+            x,
+            dt,
+            A,
+            B,
+            C,
+            D=D,
+            z=z,
+            dt_bias=dt_bias,
+            dt_softplus=dt_softplus,
+            state_batch_indices=state_batch_indices,
+            dst_state_batch_indices=dst_state_batch_indices,
+            null_block_id=null_block_id,
+            out=out,
+            num_accepted_tokens=num_accepted_tokens,
+            cu_seqlens=cu_seqlens,
+            is_blackwell=is_blackwell,
+            enable_stochastic_rounding=self._mamba_config.enable_stochastic_rounding,
+            cache_philox_rounds=self._mamba_config.stochastic_rounding_philox_rounds,
+        )
+
+
+class FlashInferSSUBackend(MambaSSUBackend):
+    """FlashInfer-based SSU backend."""
+
+    def __init__(self, mamba_config: MambaConfig):
+        super().__init__(mamba_config)
+        try:
+            from flashinfer.mamba import selective_state_update as _fi_ssu
+        except ImportError as e:
+            raise ImportError(
+                "FlashInfer is required for the flashinfer Mamba SSU backend. "
+                "Please install flashinfer (>= 0.6.4): "
+                "pip install flashinfer-python"
+            ) from e
+        self._kernel = _fi_ssu
+
+    @property
+    def name(self) -> str:
+        return "flashinfer"
+
+    def __call__(
+        self,
+        state: torch.Tensor,
+        x: torch.Tensor,
+        dt: torch.Tensor,
+        A: torch.Tensor,
+        B: torch.Tensor,
+        C: torch.Tensor,
+        D: torch.Tensor,
+        dt_bias: torch.Tensor,
+        z: torch.Tensor | None = None,
+        dt_softplus: bool = False,
+        state_batch_indices: torch.Tensor | None = None,
+        dst_state_batch_indices: torch.Tensor | None = None,
+        null_block_id: int = NULL_BLOCK_ID,
+        out: torch.Tensor | None = None,
+        num_accepted_tokens: torch.Tensor | None = None,
+        cu_seqlens: torch.Tensor | None = None,
+        is_blackwell: bool = False,
+    ) -> None:
+        rand_seed = (
+            torch.randint(0, 2**32, (1,), device=state.device)
+            if self._mamba_config.enable_stochastic_rounding
+            else None
+        )
+
+        self._kernel(
+            state,
+            x,
+            dt,
+            A,
+            B,
+            C,
+            D=D,
+            z=z,
+            dt_bias=dt_bias,
+            dt_softplus=dt_softplus,
+            state_batch_indices=state_batch_indices,
+            dst_state_batch_indices=dst_state_batch_indices,
+            cu_seqlens=cu_seqlens,
+            num_accepted_tokens=num_accepted_tokens,
+            cache_steps=state_batch_indices.size(-1)
+            if cu_seqlens is not None and state_batch_indices is not None
+            else 0,
+            pad_slot_id=null_block_id,
+            out=out,
+            rand_seed=rand_seed,
+            philox_rounds=self._mamba_config.stochastic_rounding_philox_rounds or 10,
+        )
+
+
+_BACKEND_REGISTRY: dict[MambaBackendEnum, type[MambaSSUBackend]] = {
+    MambaBackendEnum.TRITON: TritonSSUBackend,
+    MambaBackendEnum.FLASHINFER: FlashInferSSUBackend,
+}
+
+_mamba_ssu_backend: MambaSSUBackend | None = None
+
+
+def initialize_mamba_ssu_backend(
+    mamba_config: MambaConfig,
+    kv_cache_config: KVCacheConfig,
+) -> None:
+    """Initialize the global Mamba SSU backend.
+
+    No-op if `kv_cache_config` contains no specs that call
+    selective_state_update.
+    """
+    if not any(
+        isinstance(g.kv_cache_spec, MambaSpec)
+        and g.kv_cache_spec.mamba_type
+        in (MambaAttentionBackendEnum.MAMBA1, MambaAttentionBackendEnum.MAMBA2)
+        for g in kv_cache_config.kv_cache_groups
+    ):
+        return
+
+    global _mamba_ssu_backend
+
+    backend = mamba_config.backend
+    if backend not in _BACKEND_REGISTRY:
+        raise ValueError(
+            f"Unknown Mamba SSU backend: {backend}. "
+            f"Valid options: {list(_BACKEND_REGISTRY.keys())}"
+        )
+
+    backend_cls = _BACKEND_REGISTRY[backend]
+    if isinstance(_mamba_ssu_backend, backend_cls):
+        return
+
+    _mamba_ssu_backend = backend_cls(mamba_config)
+    logger.info("Using %s Mamba SSU backend.", _mamba_ssu_backend.name)
+
+
+def get_mamba_ssu_backend() -> MambaSSUBackend:
+    """Get the current Mamba SSU backend. Raises if not initialized."""
+    if _mamba_ssu_backend is None:
+        raise RuntimeError(
+            "Mamba SSU backend has not been initialized. "
+            "Call initialize_mamba_ssu_backend() first."
+        )
+    return _mamba_ssu_backend
+
+
+def selective_state_update(
+    state: torch.Tensor,
+    x: torch.Tensor,
+    dt: torch.Tensor,
+    A: torch.Tensor,
+    B: torch.Tensor,
+    C: torch.Tensor,
+    D: torch.Tensor,
+    dt_bias: torch.Tensor,
+    z: torch.Tensor | None = None,
+    dt_softplus: bool = False,
+    state_batch_indices: torch.Tensor | None = None,
+    dst_state_batch_indices: torch.Tensor | None = None,
+    null_block_id: int = NULL_BLOCK_ID,
+    out: torch.Tensor | None = None,
+    num_accepted_tokens: torch.Tensor | None = None,
+    cu_seqlens: torch.Tensor | None = None,
+    is_blackwell: bool = False,
+) -> None:
+    """Unified dispatch for Mamba selective state update.
+
+    Delegates to the initialized backend (Triton or FlashInfer).
+    """
+    get_mamba_ssu_backend()(
+        state,
+        x,
+        dt,
+        A,
+        B,
+        C,
+        D,
+        dt_bias,
+        z=z,
+        dt_softplus=dt_softplus,
+        state_batch_indices=state_batch_indices,
+        dst_state_batch_indices=dst_state_batch_indices,
+        null_block_id=null_block_id,
+        out=out,
+        num_accepted_tokens=num_accepted_tokens,
+        cu_seqlens=cu_seqlens,
+        is_blackwell=is_blackwell,
+    )
diff --git a/vllm/model_executor/layers/mamba/short_conv.py b/vllm/model_executor/layers/mamba/short_conv.py
index d36dc00964af..79976dfff14e 100644
--- a/vllm/model_executor/layers/mamba/short_conv.py
+++ b/vllm/model_executor/layers/mamba/short_conv.py
@@ -17,6 +17,7 @@
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateDtypeCalculator,
     MambaStateShapeCalculator,
+    is_conv_state_dim_first,
 )
 from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
     causal_conv1d_fn,
@@ -24,6 +25,7 @@
 )
 from vllm.utils.torch_utils import direct_register_custom_op
 from vllm.v1.attention.backend import AttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 from vllm.v1.attention.backends.short_conv_attn import ShortConvAttentionMetadata
 
 
@@ -112,13 +114,17 @@ def forward_cuda(
         # chunked prefill modes; they are computed at top-level model forward
         # since they stay the same and reused for all mamba layers in the same
         # iteration.
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
-        if attn_metadata is not None:
-            assert isinstance(attn_metadata, dict)
-            attn_metadata = attn_metadata[self.prefix]
+        attn_metadata_raw = forward_context.attn_metadata
+        attn_metadata: AttentionMetadata | None = None
+        if attn_metadata_raw is not None:
+            assert isinstance(attn_metadata_raw, dict)
+            attn_metadata = attn_metadata_raw[self.prefix]
             assert isinstance(attn_metadata, ShortConvAttentionMetadata)
-            self_kv_cache = self.kv_cache
-            conv_state = self_kv_cache[0].transpose(-1, -2)
+            conv_state = (
+                self.kv_cache[0]
+                if is_conv_state_dim_first()
+                else self.kv_cache[0].transpose(-1, -2)
+            )
             state_indices_tensor_p = attn_metadata.state_indices_tensor_p
             state_indices_tensor_d = attn_metadata.state_indices_tensor_d
             has_initial_states_p = attn_metadata.has_initial_states_p
@@ -218,8 +224,8 @@ def get_state_shape(self) -> tuple[tuple[int, ...]]:
         )
 
     @property
-    def mamba_type(self) -> str:
-        return "short_conv"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.SHORT_CONV
 
 
 def short_conv(
diff --git a/vllm/model_executor/layers/mhc.py b/vllm/model_executor/layers/mhc.py
new file mode 100644
index 000000000000..cb3965f764ba
--- /dev/null
+++ b/vllm/model_executor/layers/mhc.py
@@ -0,0 +1,301 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+# this import will also register the custom ops
+import vllm.model_executor.kernels.mhc as mhc_kernels
+from vllm.model_executor.custom_op import CustomOp
+
+
+# --8<-- [start:mhc_pre]
+@CustomOp.register("mhc_pre")
+class MHCPreOp(CustomOp):
+    """MHC pre block.
+
+    Computes mix logits from RMS-normalized HC residual streams, then
+    returns post_mix, comb_mix, and
+    layer_input = sum_i pre_mix_i * residual_i.
+    """
+
+    # --8<-- [end:mhc_pre]
+    @classmethod
+    def enabled(cls) -> bool:
+        return True
+
+    def forward_cuda(
+        self,
+        residual: torch.Tensor,
+        fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_eps: float,
+        hc_pre_eps: float,
+        hc_sinkhorn_eps: float,
+        hc_post_mult_value: float,
+        sinkhorn_repeat: int,
+        n_splits: int = 1,
+        norm_weight: torch.Tensor | None = None,
+        norm_eps: float = 0.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        return torch.ops.vllm.mhc_pre_tilelang(
+            residual,
+            fn,
+            hc_scale,
+            hc_base,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            n_splits,
+            norm_weight,
+            norm_eps,
+        )
+
+    def forward_hip(
+        self,
+        residual: torch.Tensor,
+        fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_eps: float,
+        hc_pre_eps: float,
+        hc_sinkhorn_eps: float,
+        hc_post_mult_value: float,
+        sinkhorn_repeat: int,
+        n_splits: int = 1,
+        norm_weight: torch.Tensor | None = None,
+        norm_eps: float = 0.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        # TODO: Reenable aiter after we are at the aiter
+        # version that has this bugfix
+        # https://github.com/ROCm/aiter/commit/b639cb63bcac4672dce33a731fad042a65cb3649
+        # It has accuracy problem at large number of tokens.
+        # hidden_size = residual.shape[-1]
+        # if hidden_size % 256 == 0:
+        #     return torch.ops.vllm.mhc_pre_aiter(
+        #         residual,
+        #         fn,
+        #         hc_scale,
+        #         hc_base,
+        #         rms_eps,
+        #         hc_pre_eps,
+        #         hc_sinkhorn_eps,
+        #         hc_post_mult_value,
+        #         sinkhorn_repeat,
+        #     )
+        # else:
+        return mhc_kernels.mhc_pre_torch(
+            residual,
+            fn,
+            hc_scale,
+            hc_base,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+        )
+
+    def forward_native(self, *args, **kwargs):
+        raise NotImplementedError("Native implementation of mhc_pre is not available")
+
+
+# --8<-- [start:mhc_post]
+@CustomOp.register("mhc_post")
+class MHCPostOp(CustomOp):
+    """MHC post block.
+
+    Combines the layer output with the HC residual streams:
+    out_j = post_layer_mix_j * x + sum_i comb_res_mix_ij * residual_i.
+    """
+
+    # --8<-- [end:mhc_post]
+
+    @classmethod
+    def enabled(cls) -> bool:
+        return True
+
+    def forward_cuda(
+        self,
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post_layer_mix: torch.Tensor,
+        comb_res_mix: torch.Tensor,
+    ) -> torch.Tensor:
+        return torch.ops.vllm.mhc_post_tilelang(
+            x, residual, post_layer_mix, comb_res_mix
+        )
+
+    def forward_hip(
+        self,
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post_layer_mix: torch.Tensor,
+        comb_res_mix: torch.Tensor,
+    ) -> torch.Tensor:
+        # TODO: Reenable aiter after we are at the aiter
+        # version that has this bugfix
+        # https://github.com/ROCm/aiter/commit/b639cb63bcac4672dce33a731fad042a65cb3649
+        # It has accuracy problem at large number of tokens.
+        # hidden_size = residual.shape[-1]
+        # if hidden_size % 256 == 0:
+        #     return torch.ops.vllm.mhc_post_aiter(
+        #         x,
+        #         residual,
+        #         post_layer_mix,
+        #         comb_res_mix,
+        #     )
+        # else:
+        return mhc_kernels.mhc_post_torch(
+            x,
+            residual,
+            post_layer_mix,
+            comb_res_mix,
+        )
+
+    def forward_native(self, *args, **kwargs):
+        raise NotImplementedError("Native implementation of mhc_post is not available")
+
+
+# --8<-- [start:hc_head]
+@CustomOp.register("hc_head")
+class HCHeadOp(CustomOp):
+    """HC head reduction for DeepSeek V4.
+
+    Computes gates from the RMS-normalized flattened HC residual and
+    returns out = sum_i gate_i * residual_i, collapsing hc_mult streams
+    to one.
+    """
+
+    # --8<-- [end:hc_head]
+    @classmethod
+    def enabled(cls) -> bool:
+        return True
+
+    def forward_cuda(
+        self,
+        hidden_states: torch.Tensor,
+        hc_fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_norm_eps: float,
+        hc_eps: float,
+    ) -> torch.Tensor:
+        hc_mult, hidden_size = hidden_states.shape[-2:]
+        outer_shape = hidden_states.shape[:-2]
+        hs_flat = hidden_states.view(-1, hc_mult, hidden_size)
+        num_tokens = hs_flat.shape[0]
+
+        out = torch.empty(
+            num_tokens, hidden_size, dtype=torch.bfloat16, device=hidden_states.device
+        )
+        torch.ops.vllm.hc_head_fused_kernel_tilelang(
+            hs_flat,
+            hc_fn,
+            hc_scale,
+            hc_base,
+            out,
+            hidden_size,
+            rms_norm_eps,
+            hc_eps,
+            hc_mult,
+        )
+        return out.view(*outer_shape, hidden_size)
+
+    def forward_hip(
+        self,
+        hidden_states: torch.Tensor,
+        hc_fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_norm_eps: float,
+        hc_eps: float,
+    ) -> torch.Tensor:
+        hc_mult, hidden_size = hidden_states.shape[-2:]
+        outer_shape = hidden_states.shape[:-2]
+        hs_flat = hidden_states.view(-1, hc_mult, hidden_size)
+        num_tokens = hs_flat.shape[0]
+
+        out = torch.empty(
+            num_tokens, hidden_size, dtype=torch.bfloat16, device=hidden_states.device
+        )
+        torch.ops.vllm.hc_head_triton(
+            hs_flat,
+            hc_fn,
+            hc_scale,
+            hc_base,
+            out,
+            hidden_size,
+            rms_norm_eps,
+            hc_eps,
+            hc_mult,
+        )
+        return out.view(*outer_shape, hidden_size)
+
+    def forward_native(self, *args, **kwargs):
+        raise NotImplementedError("Native implementation of hc_head is not available")
+
+
+# --8<-- [start:mhc_fused_post_pre]
+@CustomOp.register("mhc_fused_post_pre")
+class MHCFusedPostPreOp(CustomOp):
+    """Fused MHC post block followed by the next MHC pre block.
+
+    Equivalent to applying MHCPostOp and then MHCPreOp to the updated
+    residual streams, returning residual_cur, post_mix_cur, comb_mix_cur,
+    and layer_input_cur.
+    """
+
+    # --8<-- [end:mhc_fused_post_pre]
+    @classmethod
+    def enabled(cls) -> bool:
+        return True
+
+    def forward_cuda(
+        self,
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post_layer_mix: torch.Tensor,
+        comb_res_mix: torch.Tensor,
+        fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        rms_eps: float,
+        hc_pre_eps: float,
+        hc_sinkhorn_eps: float,
+        hc_post_mult_value: float,
+        sinkhorn_repeat: int,
+        n_splits: int = 1,
+        tile_n: int = 1,
+        norm_weight: torch.Tensor | None = None,
+        norm_eps: float = 0.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        return torch.ops.vllm.mhc_fused_post_pre_tilelang(
+            x,
+            residual,
+            post_layer_mix,
+            comb_res_mix,
+            fn,
+            hc_scale,
+            hc_base,
+            rms_eps,
+            hc_pre_eps,
+            hc_sinkhorn_eps,
+            hc_post_mult_value,
+            sinkhorn_repeat,
+            n_splits,
+            tile_n,
+            norm_weight,
+            norm_eps,
+        )
+
+    def forward_hip(self, *args, **kwargs):
+        raise NotImplementedError(
+            "Hip implementation of mhc_fused_post_pre is not available"
+        )
+
+    def forward_native(self, *args, **kwargs):
+        raise NotImplementedError(
+            "Native implementation of mhc_fused_post_pre is not available"
+        )
diff --git a/vllm/model_executor/layers/mla.py b/vllm/model_executor/layers/mla.py
index 1d3e987b7e17..856f6bb8a3cf 100644
--- a/vllm/model_executor/layers/mla.py
+++ b/vllm/model_executor/layers/mla.py
@@ -64,6 +64,7 @@ def __init__(
         cache_config: CacheConfig | None = None,
         quant_config: QuantizationConfig | None = None,
         prefix: str = "",
+        skip_topk: bool = False,
     ) -> None:
         super().__init__()
         self.hidden_size = hidden_size
@@ -87,6 +88,11 @@ def __init__(
         self.indexer_rope_emb = mla_modules.indexer_rotary_emb
         self.is_sparse = mla_modules.is_sparse
 
+        # Whether to skip top-k token selection computation in this layer.
+        # When True, the indexer will not be called, and the layer will reuse
+        # the topk_tokens buffer written by a previous layer in the same pass.
+        # Refer: https://arxiv.org/abs/2603.12201 for more details.
+        self.skip_topk = skip_topk
         if self.indexer is not None:
             assert hasattr(self.indexer, "topk_tokens")
             self.topk_tokens = self.indexer.topk_tokens
@@ -159,10 +165,8 @@ def forward(
                 positions, q[..., self.qk_nope_head_dim :], k_pe
             )
 
-        if self.indexer and self.is_sparse:
-            _topk_indices = self.indexer(
-                hidden_states, q_c, positions, self.indexer_rope_emb
-            )
+        if self.indexer and self.is_sparse and not self.skip_topk:
+            self.indexer(hidden_states, q_c, positions, self.indexer_rope_emb)
 
         if llama_4_scaling is not None:
             q *= llama_4_scaling
diff --git a/vllm/model_executor/layers/pooler/activations.py b/vllm/model_executor/layers/pooler/activations.py
index 4213ee7b85cb..b997d11b627c 100644
--- a/vllm/model_executor/layers/pooler/activations.py
+++ b/vllm/model_executor/layers/pooler/activations.py
@@ -9,7 +9,7 @@
 import torch.nn.functional as F
 from transformers import PretrainedConfig
 
-from vllm.config import ModelConfig, get_current_vllm_config
+from vllm.config import ModelConfig
 from vllm.logger import init_logger
 from vllm.utils.import_utils import resolve_obj_by_qualname
 
@@ -23,11 +23,15 @@ def get_act_fn(
     # get classification act_fn
     # Implement alignment with transformers ForSequenceClassificationLoss
     # https://github.com/huggingface/transformers/blob/57bb6db6ee4cfaccc45b8d474dfad5a17811ca60/src/transformers/loss/loss_utils.py#L92
+    num_labels: int | None = None
+    if static_num_labels:
+        num_labels = getattr(config, "num_labels", 0)
+
     problem_type = getattr(config, "problem_type", "")
     if problem_type == "regression":
         return PoolerIdentity()
     if problem_type == "single_label_classification":
-        return PoolerClassify(static_num_labels=static_num_labels)
+        return PoolerClassify(num_labels=num_labels)
     if problem_type == "multi_label_classification":
         return PoolerMultiLabelClassify()
 
@@ -45,25 +49,27 @@ def get_act_fn(
         function_name = config.sbert_ce_default_activation_function
 
     if function_name is not None:
-        assert function_name.startswith("torch.nn.modules."), (
-            "Loading of activation functions is restricted to "
-            "torch.nn.modules for security reasons"
-        )
+        if not function_name.startswith("torch.nn.modules."):
+            raise ValueError(
+                "Loading of activation functions is restricted to "
+                "torch.nn.modules for security reasons"
+            )
         fn = resolve_obj_by_qualname(function_name)()
         return PoolerActivation.wraps(fn)
 
-    return PoolerClassify(static_num_labels=static_num_labels)
+    return PoolerClassify(num_labels=num_labels)
 
 
 def resolve_classifier_act_fn(
     model_config: ModelConfig,
     static_num_labels: bool = True,
     act_fn: "PoolerActivation | None" = None,
-):
+) -> "PoolerActivation":
     if act_fn is None:
         return get_act_fn(model_config.hf_config, static_num_labels)
 
-    assert callable(act_fn)
+    if not callable(act_fn):
+        raise TypeError(f"Expected a callable activation function, got {type(act_fn)}")
     return act_fn
 
 
@@ -72,7 +78,7 @@ def resolve_classifier_act_fn(
 
 class PoolerActivation(nn.Module, ABC):
     @staticmethod
-    def wraps(module: nn.Module):
+    def wraps(module: nn.Module) -> "PoolerActivation":
         if isinstance(module, nn.Identity):
             return PoolerIdentity()
         if isinstance(module, (nn.Sigmoid, nn.Softmax)):
@@ -110,20 +116,13 @@ def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
 
 
 class PoolerClassify(PoolerActivation):
-    def __init__(self, *, static_num_labels: bool = True) -> None:
+    def __init__(self, *, num_labels: int | None = None) -> None:
         super().__init__()
 
-        if static_num_labels:
-            vllm_config = get_current_vllm_config()
-            model_config = vllm_config.model_config
-            num_labels = getattr(model_config.hf_config, "num_labels", 0)
-        else:
-            num_labels = None
-
         if num_labels == 0:
             logger.warning(
                 "num_labels should be > 0 for classification "
-                "models, falling back to softmax. "
+                "models, falling back to sigmoid. "
                 "Please check if the configuration is correct."
             )
 
@@ -141,7 +140,7 @@ def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
 
 
 class LambdaPoolerActivation(PoolerActivation):
-    def __init__(self, fn: Callable[[torch.Tensor], torch.Tensor]):
+    def __init__(self, fn: Callable[[torch.Tensor], torch.Tensor]) -> None:
         super().__init__()
 
         self.fn = fn
diff --git a/vllm/model_executor/layers/pooler/seqwise/heads.py b/vllm/model_executor/layers/pooler/seqwise/heads.py
index 31a961223927..2424d841075a 100644
--- a/vllm/model_executor/layers/pooler/seqwise/heads.py
+++ b/vllm/model_executor/layers/pooler/seqwise/heads.py
@@ -103,14 +103,16 @@ class ClassifierPoolerHead(SequencePoolerHead):
     def __init__(
         self,
         classifier: ClassifierFn | None = None,
-        logit_bias: float | None = None,
+        logit_mean: float | None = None,
+        logit_sigma: float | None = None,
         head_dtype: torch.dtype | str | None = None,
         activation: ActivationFn | None = None,
     ) -> None:
         super().__init__()
 
         self.classifier = classifier
-        self.logit_bias = logit_bias
+        self.logit_mean = logit_mean
+        self.logit_sigma = logit_sigma
         self.head_dtype = head_dtype
         self.activation = activation
 
@@ -138,8 +140,11 @@ def forward(
             logits = pooled_data
 
         # logits shape: [batchsize, num_labels]
-        if self.logit_bias is not None:
-            logits -= self.logit_bias
+        # Affine score calibration: activation((logit - mean) / sigma)
+        if self.logit_mean is not None:
+            logits = logits - self.logit_mean
+        if self.logit_sigma is not None:
+            logits = logits / self.logit_sigma
 
         if self.activation is not None:
             flags = [p.use_activation for p in pooling_params]
diff --git a/vllm/model_executor/layers/pooler/seqwise/methods.py b/vllm/model_executor/layers/pooler/seqwise/methods.py
index f3c7f29d6092..d99216fc1032 100644
--- a/vllm/model_executor/layers/pooler/seqwise/methods.py
+++ b/vllm/model_executor/layers/pooler/seqwise/methods.py
@@ -14,6 +14,8 @@
 
 SequencePoolingMethodOutput: TypeAlias = torch.Tensor | list[torch.Tensor]
 
+_MEAN_POOL_ACCUMULATION_CHUNK_BYTES = 16 * 1024 * 1024  # 16MB
+
 
 class SequencePoolingMethod(nn.Module, ABC):
     def get_supported_tasks(self) -> Set[PoolingTask]:
@@ -66,23 +68,49 @@ def forward(
             "partial prefill not supported with MEAN pooling"
         )
 
-        prompt_lens = pooling_cursor.prompt_lens_cpu.to(
-            hidden_states.device, non_blocking=True
+        prompt_lens_cpu = pooling_cursor.prompt_lens_cpu
+        num_seqs = prompt_lens_cpu.numel()
+        hidden_size = hidden_states.shape[-1]
+
+        if num_seqs == 0:
+            # early return for empty batch
+            return hidden_states.new_empty((0, hidden_size), dtype=torch.float32)
+
+        # Build segment_ids on CPU so repeat_interleave doesn't need to sync
+        # GPU->CPU to learn its data-dependent output length, then upload
+        # non-blocking. eg. [2, 1, 3] -> [0, 0, 1, 2, 2, 2]
+        segment_ids = torch.repeat_interleave(
+            torch.arange(num_seqs, dtype=torch.long),
+            prompt_lens_cpu,
+        ).to(hidden_states.device, non_blocking=True)
+        prompt_lens = prompt_lens_cpu.to(
+            hidden_states.device, dtype=torch.int64, non_blocking=True
+        )
+        segment_sums = torch.zeros(
+            (num_seqs, hidden_size),
+            dtype=torch.float32,
+            device=hidden_states.device,
         )
 
-        # Use float32 for torch.cumsum in MeanPool,
-        # otherwise precision will be lost significantly.
-        cumsum = torch.cumsum(hidden_states, dim=0, dtype=torch.float32)
+        bytes_per_token = hidden_size * torch.finfo(torch.float32).bits // 8
+        chunk_size = max(1, _MEAN_POOL_ACCUMULATION_CHUNK_BYTES // bytes_per_token)
 
-        start_indices = pooling_cursor.first_token_indices_gpu
-        end_indices = pooling_cursor.last_token_indices_gpu
+        # iterate over the batch in chunks
+        for start in range(0, hidden_states.shape[0], chunk_size):
+            end = min(start + chunk_size, hidden_states.shape[0])
+            # using index_add_ to accumulate for each segment
+            segment_sums.index_add_(
+                0,
+                segment_ids[start:end],
+                hidden_states[start:end].to(dtype=torch.float32),
+            )
 
-        return (
-            cumsum[end_indices] - cumsum[start_indices] + hidden_states[start_indices]
-        ) / prompt_lens.unsqueeze(1)
+        return segment_sums / prompt_lens.unsqueeze(1)
 
 
-def get_seq_pooling_method(pooling_type: SequencePoolingType | str):
+def get_seq_pooling_method(
+    pooling_type: SequencePoolingType | str,
+) -> SequencePoolingMethod:
     if pooling_type == "CLS":
         return CLSPool()
     if pooling_type == "LAST":
diff --git a/vllm/model_executor/layers/pooler/seqwise/poolers.py b/vllm/model_executor/layers/pooler/seqwise/poolers.py
index f46834a7c3f2..81e19001b76a 100644
--- a/vllm/model_executor/layers/pooler/seqwise/poolers.py
+++ b/vllm/model_executor/layers/pooler/seqwise/poolers.py
@@ -89,7 +89,7 @@ def forward(
         return pooled_data
 
 
-def pooler_for_embed(pooler_config: PoolerConfig):
+def pooler_for_embed(pooler_config: PoolerConfig) -> SequencePooler:
     pooling = get_seq_pooling_method(pooler_config.get_seq_pooling_type())
 
     vllm_config = get_current_vllm_config()
@@ -109,16 +109,18 @@ def pooler_for_classify(
     pooling: SequencePoolingMethod | SequencePoolingFn | None = None,
     classifier: ClassifierFn | None = None,
     act_fn: PoolerActivation | None = None,
-):
+) -> SequencePooler:
     if pooling is None:
         pooling = get_seq_pooling_method(pooler_config.get_seq_pooling_type())
 
     vllm_config = get_current_vllm_config()
     model_config = vllm_config.model_config
+    assert model_config.pooler_config is not None
     head = ClassifierPoolerHead(
         head_dtype=model_config.head_dtype,
         classifier=classifier,
-        logit_bias=model_config.pooler_config.logit_bias,
+        logit_mean=model_config.pooler_config.logit_mean,
+        logit_sigma=model_config.pooler_config.logit_sigma,
         activation=resolve_classifier_act_fn(
             model_config, static_num_labels=True, act_fn=act_fn
         ),
diff --git a/vllm/model_executor/layers/pooler/special.py b/vllm/model_executor/layers/pooler/special.py
index d06663b5b947..16437ac2de72 100644
--- a/vllm/model_executor/layers/pooler/special.py
+++ b/vllm/model_executor/layers/pooler/special.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import dataclasses
 from collections.abc import Mapping, Set
 from itertools import groupby
 
@@ -80,9 +81,11 @@ def forward(
         pooling_metadata: PoolingMetadata,
     ) -> PoolerOutput:
         poolers_by_task = self.poolers_by_task
+        cursor = pooling_metadata.pooling_cursor
 
         outputs = list[torch.Tensor | None]()
         offset = 0
+        token_offset = 0
         for task, group in groupby(pooling_metadata.tasks):
             if not (pooler := poolers_by_task.get(task)):
                 raise ValueError(
@@ -91,10 +94,38 @@ def forward(
                 )
 
             num_items = len(list(group))
-            group_output: PoolerOutput = pooler(
-                hidden_states,
-                pooling_metadata[offset : offset + num_items],
-            )
+            group_metadata = pooling_metadata[offset : offset + num_items]
+            if cursor is None:
+                group_hidden_states = hidden_states
+            else:
+                # Slice out this group's tokens so sub-poolers see only their
+                # portion of the batch. Token offset is computed from the CPU
+                # `num_scheduled_tokens_cpu` to avoid a GPU->CPU sync.
+                group_cursor = group_metadata.pooling_cursor
+                assert group_cursor is not None
+                num_group_tokens = int(group_cursor.num_scheduled_tokens_cpu.sum())
+                group_hidden_states = hidden_states[
+                    token_offset : token_offset + num_group_tokens
+                ]
+                if token_offset:
+                    # Shift first/last indices to be relative to the slice
+                    # so seqwise poolers (which index `hidden_states` directly)
+                    # remain correct.
+                    pooling_cursor = dataclasses.replace(
+                        group_cursor,
+                        first_token_indices_gpu=(
+                            group_cursor.first_token_indices_gpu - token_offset
+                        ),
+                        last_token_indices_gpu=(
+                            group_cursor.last_token_indices_gpu - token_offset
+                        ),
+                    )
+                    group_metadata = dataclasses.replace(
+                        group_metadata, pooling_cursor=pooling_cursor
+                    )
+                token_offset += num_group_tokens
+
+            group_output: PoolerOutput = pooler(group_hidden_states, group_metadata)
 
             outputs.extend(group_output)
             offset += num_items
diff --git a/vllm/model_executor/layers/pooler/tokwise/__init__.py b/vllm/model_executor/layers/pooler/tokwise/__init__.py
index fbc610c85564..924c21fcf386 100644
--- a/vllm/model_executor/layers/pooler/tokwise/__init__.py
+++ b/vllm/model_executor/layers/pooler/tokwise/__init__.py
@@ -18,6 +18,8 @@
 from .poolers import (
     TokenPooler,
     TokenPoolerOutput,
+    TokenPoolingFn,
+    TokenPoolingHeadFn,
     pooler_for_token_classify,
     pooler_for_token_embed,
 )
@@ -34,6 +36,8 @@
     "get_tok_pooling_method",
     "TokenPooler",
     "TokenPoolerOutput",
+    "TokenPoolingFn",
+    "TokenPoolingHeadFn",
     "pooler_for_token_classify",
     "pooler_for_token_embed",
 ]
diff --git a/vllm/model_executor/layers/pooler/tokwise/heads.py b/vllm/model_executor/layers/pooler/tokwise/heads.py
index 80c5c831fa08..d9f41132c066 100644
--- a/vllm/model_executor/layers/pooler/tokwise/heads.py
+++ b/vllm/model_executor/layers/pooler/tokwise/heads.py
@@ -78,7 +78,8 @@ def forward_chunk(
         # embeddings shape: [n_tokens, embedding_size]
 
         # for matryoshka representation
-        embeddings = embeddings[..., : pooling_param.dimensions]
+        if pooling_param.dimensions is not None:
+            embeddings = embeddings[..., : pooling_param.dimensions]
 
         # for normalize
         if self.activation is not None and pooling_param.use_activation:
@@ -92,14 +93,16 @@ class TokenClassifierPoolerHead(TokenPoolerHead):
     def __init__(
         self,
         classifier: ClassifierFn | None = None,
-        logit_bias: float | None = None,
+        logit_mean: float | None = None,
+        logit_sigma: float | None = None,
         head_dtype: torch.dtype | str | None = None,
         activation: ActivationFn | None = None,
     ) -> None:
         super().__init__()
 
         self.classifier = classifier
-        self.logit_bias = logit_bias
+        self.logit_mean = logit_mean
+        self.logit_sigma = logit_sigma
         self.head_dtype = head_dtype
         self.activation = activation
 
@@ -125,8 +128,11 @@ def forward_chunk(
             logits = pooled_data
         # logits shape: [n_token, num_labels]
 
-        if self.logit_bias is not None:
-            logits -= self.logit_bias
+        # Affine score calibration: activation((logit - mean) / sigma)
+        if self.logit_mean is not None:
+            logits = logits - self.logit_mean
+        if self.logit_sigma is not None:
+            logits = logits / self.logit_sigma
 
         if self.activation is not None and pooling_param.use_activation:
             logits = self.activation(logits)
diff --git a/vllm/model_executor/layers/pooler/tokwise/methods.py b/vllm/model_executor/layers/pooler/tokwise/methods.py
index f242d215d7b2..59b7234661b5 100644
--- a/vllm/model_executor/layers/pooler/tokwise/methods.py
+++ b/vllm/model_executor/layers/pooler/tokwise/methods.py
@@ -47,13 +47,12 @@ def forward(
         pooling_metadata: PoolingMetadata,
     ) -> list[TokenPoolingMethodOutputItem]:
         pooling_cursor = pooling_metadata.get_pooling_cursor()
-        hidden_states_lst = [
-            hidden_states[first : last + 1]
-            for first, last in zip(
-                pooling_cursor.first_token_indices_gpu.tolist(),
-                pooling_cursor.last_token_indices_gpu.tolist(),
-            )
-        ]
+        # Use the already-CPU num_scheduled_tokens tensor so `.tolist()`
+        # doesn't trigger a GPU->CPU sync. torch.split produces the same
+        # consecutive slices as indexing with first/last per-sequence indices.
+        hidden_states_lst = list(
+            torch.split(hidden_states, pooling_cursor.num_scheduled_tokens_cpu.tolist())
+        )
 
         if not self.enable_chunked_prefill:
             return hidden_states_lst
@@ -91,16 +90,18 @@ def forward(
         pooling_metadata: PoolingMetadata,
     ) -> list[TokenPoolingMethodOutputItem]:
         pooled_data_lst = super().forward(hidden_states, pooling_metadata)
-        prompt_token_ids = pooling_metadata.get_prompt_token_ids()
+        # Use the CPU copy of prompt_token_ids so the step_tag_id mask can be
+        # resolved to indices without a d2h sync from boolean indexing.
+        prompt_token_ids_cpu = pooling_metadata.get_prompt_token_ids_cpu()
         pooling_params = pooling_metadata.pooling_params
 
         pooled_data = list[torch.Tensor | None]()
-        for data, token_id, pooling_param in zip(
-            pooled_data_lst, prompt_token_ids, pooling_params
+        for data, token_id_cpu, pooling_param in zip(
+            pooled_data_lst, prompt_token_ids_cpu, pooling_params
         ):
             # for unfinished chunked prefill
             if data is None:
-                pass
+                pooled_data.append(None)
             else:
                 step_tag_id = pooling_param.step_tag_id
                 returned_token_ids = pooling_param.returned_token_ids
@@ -109,9 +110,11 @@ def forward(
                     data = data[:, returned_token_ids]
 
                 if step_tag_id is not None:
-                    data = data[token_id == step_tag_id]
+                    idx_cpu = (token_id_cpu == step_tag_id).nonzero(as_tuple=True)[0]
+                    idx = idx_cpu.to(data.device, non_blocking=True)
+                    data = data[idx]
 
-            pooled_data.append(data)
+                pooled_data.append(data)
 
         return pooled_data
 
diff --git a/vllm/model_executor/layers/pooler/tokwise/poolers.py b/vllm/model_executor/layers/pooler/tokwise/poolers.py
index c56970fcabaa..0b66097e3813 100644
--- a/vllm/model_executor/layers/pooler/tokwise/poolers.py
+++ b/vllm/model_executor/layers/pooler/tokwise/poolers.py
@@ -58,7 +58,7 @@ class TokenPooler(Pooler):
     def __init__(
         self,
         pooling: TokenPoolingMethod | TokenPoolingFn,
-        head: TokenPoolerHead | TokenPoolingHeadFn,
+        head: TokenPoolerHead | TokenPoolingHeadFn | None = None,
     ) -> None:
         super().__init__()
 
@@ -89,7 +89,8 @@ def forward(
         pooling_metadata: PoolingMetadata,
     ) -> TokenPoolerOutput:
         pooled_data = self.pooling(hidden_states, pooling_metadata)
-        pooled_data = self.head(pooled_data, pooling_metadata)
+        if self.head is not None:
+            pooled_data = self.head(pooled_data, pooling_metadata)
         return pooled_data
 
 
@@ -117,16 +118,18 @@ def pooler_for_token_classify(
     pooling: TokenPoolingMethod | TokenPoolingFn | None = None,
     classifier: ClassifierFn | None = None,
     act_fn: PoolerActivation | None = None,
-):
+) -> TokenPooler:
     if pooling is None:
         pooling = get_tok_pooling_method(pooler_config.get_tok_pooling_type())
 
     vllm_config = get_current_vllm_config()
     model_config = vllm_config.model_config
+    assert model_config.pooler_config is not None
     head = TokenClassifierPoolerHead(
         head_dtype=model_config.head_dtype,
         classifier=classifier,
-        logit_bias=model_config.pooler_config.logit_bias,
+        logit_mean=model_config.pooler_config.logit_mean,
+        logit_sigma=model_config.pooler_config.logit_sigma,
         activation=resolve_classifier_act_fn(
             model_config, static_num_labels=False, act_fn=act_fn
         ),
diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
index 9aceb3be054d..11489feb9d74 100644
--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -19,9 +19,11 @@
     "modelopt_mxfp8",
     "modelopt_mixed",
     "gguf",
+    "auto_gptq",
+    "gptq",
     "gptq_marlin",
     "awq_marlin",
-    "gptq",
+    "humming",
     "compressed-tensors",
     "bitsandbytes",
     "experts_int8",
@@ -30,9 +32,17 @@
     "torchao",
     "inc",
     "mxfp4",
-    "mxfp8",
-    "petit_nvfp4",
+    "gpt_oss_mxfp4",
+    "deepseek_v4_fp8",
     "cpu_awq",
+    "online",
+    # Below are online quant shorthand names (see vllm.config.quantization).
+    # Listed here as strings to avoid a circular import; kept in sync with
+    # _ONLINE_SHORTHANDS by the assertion in get_quantization_config().
+    "fp8_per_tensor",
+    "fp8_per_block",
+    "int8_per_channel_weight_only",
+    "mxfp8",
 ]
 QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods))
 
@@ -40,8 +50,6 @@
     "tpu_int8",
     "fbgemm_fp8",
     "fp_quant",
-    "experts_int8",
-    "petit_nvfp4",
 ]
 
 # The customized quantization methods which will be added to this dict.
@@ -103,8 +111,11 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         raise ValueError(f"Invalid quantization method: {quantization}")
 
     # lazy import to avoid triggering `torch.compile` too early
+    from vllm.config.quantization import _ONLINE_SHORTHANDS
     from vllm.model_executor.layers.quantization.quark.quark import QuarkConfig
+    from vllm.models.deepseek_v4 import DeepseekV4FP8Config
 
+    from .auto_gptq import AutoGPTQConfig
     from .awq import AWQConfig
     from .awq_marlin import AWQMarlinConfig
     from .bitsandbytes import BitsAndBytesConfig
@@ -117,8 +128,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
     from .fp8 import Fp8Config
     from .fp_quant import FPQuantConfig
     from .gguf import GGUFConfig
-    from .gptq import GPTQConfig
-    from .gptq_marlin import GPTQMarlinConfig
+    from .humming import HummingConfig
     from .inc import INCConfig
     from .modelopt import (
         ModelOptFp8Config,
@@ -127,9 +137,8 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         ModelOptNvFp4Config,
     )
     from .moe_wna16 import MoeWNA16Config
-    from .mxfp4 import Mxfp4Config
-    from .mxfp8 import Mxfp8Config
-    from .petit import PetitNvFp4Config
+    from .mxfp4 import GptOssMxfp4Config, Mxfp4Config
+    from .online.base import OnlineQuantizationConfig
     from .torchao import TorchAOConfig
 
     method_to_config: dict[str, type[QuantizationConfig]] = {
@@ -142,9 +151,10 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         "modelopt_mxfp8": ModelOptMxFp8Config,
         "modelopt_mixed": ModelOptMixedPrecisionConfig,
         "gguf": GGUFConfig,
-        "gptq_marlin": GPTQMarlinConfig,
+        "auto_gptq": AutoGPTQConfig,
+        "gptq": AutoGPTQConfig,
+        "gptq_marlin": AutoGPTQConfig,
         "awq_marlin": AWQMarlinConfig,
-        "gptq": GPTQConfig,
         "compressed-tensors": CompressedTensorsConfig,
         "bitsandbytes": BitsAndBytesConfig,
         "experts_int8": ExpertsInt8Config,
@@ -154,10 +164,23 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         "auto-round": INCConfig,
         "inc": INCConfig,
         "mxfp4": Mxfp4Config,
-        "mxfp8": Mxfp8Config,
-        "petit_nvfp4": PetitNvFp4Config,
+        "gpt_oss_mxfp4": GptOssMxfp4Config,
+        "deepseek_v4_fp8": DeepseekV4FP8Config,
         "cpu_awq": CPUAWQConfig,
+        "humming": HummingConfig,
+        "online": OnlineQuantizationConfig,
     }
+
+    # Register online shorthands as quantization methods so the user can
+    # specify "LLM(..., quantization='fp8_per_tensor')" as shorthand for
+    # creating a more complicated online quant config object.
+    for shorthand in _ONLINE_SHORTHANDS:
+        assert shorthand not in method_to_config, (
+            f"Online quant shorthand {shorthand!r} conflicts with an "
+            f"existing quantization method"
+        )
+        method_to_config[shorthand] = OnlineQuantizationConfig
+
     # Update the `method_to_config` with customized quantization methods.
     method_to_config.update(_CUSTOMIZED_METHOD_TO_QUANT_CONFIG)
 
diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/auto_gptq.py
similarity index 61%
rename from vllm/model_executor/layers/quantization/gptq_marlin.py
rename to vllm/model_executor/layers/quantization/auto_gptq.py
index d7b2a366e1f0..85ee4061adab 100644
--- a/vllm/model_executor/layers/quantization/gptq_marlin.py
+++ b/vllm/model_executor/layers/quantization/auto_gptq.py
@@ -6,25 +6,28 @@
 
 import torch
 from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
+from transformers import PretrainedConfig
 
 import vllm.model_executor.layers.fused_moe  # noqa
-from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     MPLinearLayerConfig,
     choose_mp_linear_kernel,
 )
-from vllm.model_executor.layers.fused_moe.config import (
+from vllm.model_executor.layers.fused_moe import (
     FusedMoEConfig,
-    FusedMoEQuantConfig,
-)
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import fused_marlin_moe
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
     FusedMoEMethodBase,
+    FusedMoEQuantConfig,
     FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
     UnquantizedFusedMoEMethod,
 )
+from vllm.model_executor.layers.fused_moe.oracle.int_wna16 import (
+    convert_to_wna16_moe_kernel_format,
+    make_wna16_moe_kernel,
+    select_wna16_moe_backend,
+)
 from vllm.model_executor.layers.linear import LinearMethodBase, set_weight_attrs
 from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
@@ -38,16 +41,17 @@
     override_config,
 )
 from vllm.model_executor.layers.quantization.utils.marlin_utils import (
-    check_marlin_supported,
     check_moe_marlin_supports_layer,
     get_marlin_input_dtype,
-    marlin_act_int8_process_scales,
     marlin_make_workspace_new,
-    marlin_moe_permute_scales,
-    marlin_permute_bias,
     marlin_repeat_scales_on_all_ranks,
     verify_marlin_supported,
 )
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kInt4StaticGroupScale,
+    kInt8StaticGroupScale,
+)
 from vllm.model_executor.parameter import (
     ChannelQuantScaleParameter,
     GroupQuantScaleParameter,
@@ -55,7 +59,6 @@
     PackedvLLMParameter,
     RowvLLMParameter,
 )
-from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 from vllm.transformers_utils.config import get_safetensors_params_metadata
 from vllm.utils.collection_utils import is_list_of
@@ -64,34 +67,33 @@
 
 
 def get_moe_quant_method(
-    config: "GPTQMarlinConfig",
-    layer: torch.nn.Module,
+    config: "AutoGPTQConfig",
+    layer: RoutedExperts,
     prefix: str,
     moe_method_cls: type,
 ):
     cloned_config = deepcopy(config)
 
-    if isinstance(layer, FusedMoE):
-        # False = skip module, None = no override, else = Positive match
-        if (
-            get_dynamic_override(  # noqa: E712
-                cloned_config,  # noqa: E712
-                layer_name=prefix,
-            )
-            == False
-        ):  # noqa: E712
-            return UnquantizedFusedMoEMethod(layer.moe_config)
+    assert isinstance(layer, RoutedExperts)
+    # False = skip module, None = no override, else = Positive match
+    if (
+        get_dynamic_override(  # noqa: E712
+            cloned_config,  # noqa: E712
+            layer_name=prefix,
+        )
+        == False
+    ):  # noqa: E712
+        return UnquantizedFusedMoEMethod(layer.moe_config)
 
-        if prefix:
-            # Dynamic per module/layer rules may override base config
-            override_config(cloned_config, prefix=prefix)
+    if prefix:
+        # Dynamic per module/layer rules may override base config
+        override_config(cloned_config, prefix=prefix)
 
-        return moe_method_cls(cloned_config, layer.moe_config)
-    return None
+    return moe_method_cls(cloned_config, layer.moe_config)
 
 
-class GPTQMarlinConfig(QuantizationConfig):
-    """Config class for GPTQ Marlin"""
+class AutoGPTQConfig(QuantizationConfig):
+    """Config class for AutoGPTQ quantization using Marlin kernels."""
 
     # (num_bits, is_sym) -> quant_type
     TYPE_MAP = {
@@ -163,7 +165,7 @@ def __init__(
 
     def __repr__(self) -> str:
         return (
-            f"GPTQMarlinConfig(quant_type={self.quant_type}, "
+            f"AutoGPTQConfig(quant_type={self.quant_type}, "
             f"group_size={self.group_size}, "
             f"desc_act={self.desc_act}, "
             f"lm_head_quantized={self.lm_head_quantized}, "
@@ -173,7 +175,7 @@ def __repr__(self) -> str:
 
     @classmethod
     def get_name(cls) -> QuantizationMethods:
-        return "gptq_marlin"
+        return "auto_gptq"
 
     @classmethod
     def get_supported_act_dtypes(cls) -> list[torch.dtype]:
@@ -181,14 +183,14 @@ def get_supported_act_dtypes(cls) -> list[torch.dtype]:
 
     @classmethod
     def get_min_capability(cls) -> int:
-        return 75
+        return 60
 
     @classmethod
     def get_config_filenames(cls) -> list[str]:
         return ["quantize_config.json"]
 
     @classmethod
-    def from_config(cls, config: dict[str, Any]) -> "GPTQMarlinConfig":
+    def from_config(cls, config: dict[str, Any]) -> "AutoGPTQConfig":
         dynamic = cls.get_from_keys_or(config, ["dynamic"], default={})
         dynamic = {} if dynamic is None else dynamic
 
@@ -213,35 +215,30 @@ def from_config(cls, config: dict[str, Any]) -> "GPTQMarlinConfig":
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
-        can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
+        """Override to use AutoGPTQ for compatible GPTQ models."""
+        quant_method = hf_quant_cfg.get("quant_method", "").lower()
+
+        if quant_method != "gptq":
+            return None
 
-        is_valid_user_quant = (
-            user_quant is None or user_quant == "marlin" or user_quant == "gptq_marlin"
+        is_valid_user_quant = user_quant is None or user_quant in (
+            "gptq",
+            "gptq_marlin",
+            "auto_gptq",
+            "marlin",
         )
 
-        if can_convert and is_valid_user_quant:
-            msg = (
-                "The model is convertible to {} during runtime."
-                " Using {} kernel.".format(cls.get_name(), cls.get_name())
-            )
-            logger.info(msg)
+        if is_valid_user_quant:
             return cls.get_name()
 
-        if can_convert and user_quant == "gptq":
-            logger.info(
-                "Detected that the model can run with gptq_marlin"
-                ", however you specified quantization=gptq explicitly,"
-                " so forcing gptq. Use quantization=gptq_marlin for"
-                " faster inference"
-            )
         return None
 
     def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> "QuantizeMethodBase | None":
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
 
             if not check_moe_marlin_supports_layer(layer, self.group_size):
@@ -253,7 +250,7 @@ def get_quant_method(
                     layer, prefix
                 )
             moe_quant_method = get_moe_quant_method(
-                self, layer, prefix, GPTQMarlinMoEMethod
+                self, layer, prefix, AutoGPTQMoEMethod
             )
             if moe_quant_method is None:
                 return None
@@ -261,45 +258,25 @@ def get_quant_method(
             return moe_quant_method
 
         quant_method = get_linear_quant_method(
-            self, layer, prefix, GPTQMarlinLinearMethod
+            self, layer, prefix, AutoGPTQLinearMethod
         )
         if quant_method is None:
             return None
         quant_method.input_dtype = get_marlin_input_dtype(prefix)
         return quant_method
 
-    @classmethod
-    def is_gptq_marlin_compatible(cls, quant_config: dict[str, Any]):
-        quant_method = quant_config.get("quant_method", "").lower()
-        num_bits = quant_config.get("bits")
-        group_size = quant_config.get("group_size")
-        sym = quant_config.get("sym")
-        desc_act = quant_config.get("desc_act")
-
-        if not (current_platform.is_cuda() or current_platform.is_cpu()):
-            return False
-
-        if quant_method != "gptq":
-            return False
-
-        # Marlin conversion is only valid if required properties are found
-        if num_bits is None or group_size is None or sym is None or desc_act is None:
-            return False
-
-        if (num_bits, sym) not in cls.TYPE_MAP:
-            return False
-
-        return check_marlin_supported(
-            quant_type=cls.TYPE_MAP[(num_bits, sym)], group_size=group_size
-        )
-
     def apply_vllm_mapper(self, hf_to_vllm_mapper):
         if self.modules_in_block_to_quantize is not None:
             self.modules_in_block_to_quantize = hf_to_vllm_mapper.apply_list(
                 self.modules_in_block_to_quantize
             )
 
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
+    def maybe_update_config(
+        self,
+        model_name: str,
+        hf_config: PretrainedConfig | None = None,
+        revision: str | None = None,
+    ):
         if self.modules_in_block_to_quantize:
             if is_list_of(self.modules_in_block_to_quantize, list):
                 # original modules_in_block_to_quantize: list[list[str]]
@@ -322,16 +299,16 @@ def maybe_update_config(self, model_name: str, revision: str | None = None):
         self.modules_in_block_to_quantize = list(quant_layers)
 
 
-class GPTQMarlinLinearMethod(LinearMethodBase):
-    """Linear method for GPTQ Marlin.
+class AutoGPTQLinearMethod(LinearMethodBase):
+    """Linear method for AutoGPTQ using Marlin kernels.
 
     Args:
-        quant_config: The GPTQ Marlin quantization config.
+        quant_config: The AutoGPTQ quantization config.
     """
 
     _kernel_backends_being_used: set[str] = set()
 
-    def __init__(self, quant_config: GPTQMarlinConfig) -> None:
+    def __init__(self, quant_config: AutoGPTQConfig) -> None:
         self.quant_config = quant_config
         self.input_dtype = None
         self.quant_type = self.quant_config.quant_type
@@ -373,7 +350,7 @@ def create_weights(
         kernel_type = choose_mp_linear_kernel(mp_linear_kernel_config)
 
         if kernel_type.__name__ not in self._kernel_backends_being_used:
-            logger.info("Using %s for GPTQMarlinLinearMethod", kernel_type.__name__)
+            logger.info("Using %s for AutoGPTQLinearMethod", kernel_type.__name__)
             self._kernel_backends_being_used.add(kernel_type.__name__)
 
         # Normalize group_size
@@ -483,28 +460,35 @@ def apply(
         return self.kernel.apply_weights(layer, x, bias)
 
 
-class GPTQMarlinMoEMethod(FusedMoEMethodBase):
+class AutoGPTQMoEMethod(FusedMoEMethodBase):
     """MoE Marlin method with quantization."""
 
     def __init__(
         self,
-        quant_config: GPTQMarlinConfig,
+        quant_config: AutoGPTQConfig,
         moe: FusedMoEConfig,
     ) -> None:
         super().__init__(moe)
         self.quant_config = quant_config
         if self.quant_config.quant_type.size_bits == 4:
-            self.quant_type = scalar_types.uint4b8
+            quant_type = scalar_types.uint4b8
+            scale = kInt4StaticGroupScale
         elif self.quant_config.quant_type.size_bits == 8:
-            self.quant_type = scalar_types.uint8b128
+            quant_type = scalar_types.uint8b128
+            scale = kInt8StaticGroupScale
         else:
-            raise ValueError("GPTQMarlinMoEMethod only supports int4 and int8 now.")
+            raise ValueError("AutoGPTQMoEMethod only supports int4 and int8 now.")
         self.input_dtype = None
         self.use_marlin = True
+        weight_key = QuantKey(quant_type, scale)
+
+        self.wna16_moe_backend, self.experts_cls = select_wna16_moe_backend(
+            moe, weight_key, quant_config.weight_bits
+        )
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -515,7 +499,7 @@ def create_weights(
         is_a_8bit = self.input_dtype is not None and self.input_dtype.itemsize == 1
 
         if is_a_8bit:
-            assert self.quant_type == scalar_types.uint4b8, (
+            assert self.quant_config.quant_type.size_bits == 8, (
                 "W8A8-INT8 is not supported by marlin kernel."
             )
 
@@ -658,138 +642,104 @@ def create_weights(
         device = layer.w13_qweight.device
         layer.workspace = marlin_make_workspace_new(device, 4)
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         is_a_8bit = self.input_dtype is not None and self.input_dtype.itemsize == 1
 
         if is_a_8bit:
-            assert self.quant_type == scalar_types.uint4b8, (
+            assert self.quant_config.quant_type.size_bits == 8, (
                 "W8A8-INT8 is not supported by marlin kernel."
             )
 
-        if self.input_dtype == torch.float8_e4m3fn:
-            ops.marlin_int4_fp8_preprocess(layer.w13_qweight, inplace=True)
-            ops.marlin_int4_fp8_preprocess(layer.w2_qweight, inplace=True)
-            layer.w13_scales.data = layer.w13_scales.data * 512
-            layer.w2_scales.data = layer.w2_scales.data * 512
-
-        # Process act_order
-        if self.quant_config.desc_act:
-            # Get sorting based on g_idx
-            num_experts = layer.w13_g_idx.shape[0]
-            w13_g_idx_sort_indices = torch.empty_like(layer.w13_g_idx)
-            w2_g_idx_sort_indices = torch.empty_like(layer.w2_g_idx)
-            w13_sorted_g_idx = torch.empty_like(layer.w13_g_idx)
-            w2_sorted_g_idx = torch.empty_like(layer.w2_g_idx)
-            for e in range(num_experts):
-                w13_g_idx_sort_indices[e] = torch.argsort(layer.w13_g_idx[e]).to(
-                    torch.int32
-                )
-                w2_g_idx_sort_indices[e] = torch.argsort(layer.w2_g_idx[e]).to(
-                    torch.int32
-                )
-                w13_sorted_g_idx[e] = layer.w13_g_idx[e][w13_g_idx_sort_indices[e]]
-                w2_sorted_g_idx[e] = layer.w2_g_idx[e][w2_g_idx_sort_indices[e]]
-            replace_parameter(layer, "w13_g_idx", w13_sorted_g_idx)
-            replace_parameter(layer, "w2_g_idx", w2_sorted_g_idx)
-            replace_parameter(layer, "w13_g_idx_sort_indices", w13_g_idx_sort_indices)
-            replace_parameter(layer, "w2_g_idx_sort_indices", w2_g_idx_sort_indices)
-        else:
-            # Reset g_idx related tensors
-            num_experts = layer.w13_g_idx.shape[0]
-            device = layer.w13_g_idx.device
-            layer.w13_g_idx = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w2_g_idx = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w13_g_idx_sort_indices = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w2_g_idx_sort_indices = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-        # Repack weights
-        marlin_w13_qweight = ops.gptq_marlin_moe_repack(
-            layer.w13_qweight,
-            layer.w13_g_idx_sort_indices,
-            layer.w13_qweight.shape[1] * self.quant_config.pack_factor,
-            layer.w13_qweight.shape[2],
-            self.quant_config.quant_type.size_bits,
-            is_a_8bit=is_a_8bit,
-        )
-        replace_parameter(layer, "w13_qweight", marlin_w13_qweight)
-        marlin_w2_qweight = ops.gptq_marlin_moe_repack(
-            layer.w2_qweight,
-            layer.w2_g_idx_sort_indices,
-            layer.w2_qweight.shape[1] * self.quant_config.pack_factor,
-            layer.w2_qweight.shape[2],
-            self.quant_config.quant_type.size_bits,
-            is_a_8bit=is_a_8bit,
-        )
-        replace_parameter(layer, "w2_qweight", marlin_w2_qweight)
-
-        # The modular kernel expects w13_weight and w2_weight,
-        # but GPTQ uses w13_qweight and w2_qweight
-        # Alias for modular kernel
-        layer.w13_weight = layer.w13_qweight
-        # Alias for modular kernel
-        layer.w2_weight = layer.w2_qweight
-
-        # Repack scales
-        marlin_w13_scales = marlin_moe_permute_scales(
-            s=layer.w13_scales,
-            size_k=layer.intermediate_size_per_partition,
-            size_n=layer.w13_scales.shape[2],
-            group_size=self.quant_config.group_size,
-            is_a_8bit=is_a_8bit,
+        (
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            w13_g_idx,
+            w2_g_idx,
+            w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices,
+            _w13_qzeros,
+            _w2_qzeros,
+            w13_input_global_scale,
+            w2_input_global_scale,
+            w13_bias,
+            w2_bias,
+        ) = convert_to_wna16_moe_kernel_format(
+            backend=self.wna16_moe_backend,
+            layer=layer,
+            quant_config=self.quant_config,
+            input_dtype=self.input_dtype,
+            w13=layer.w13_qweight,
+            w2=layer.w2_qweight,
+            w13_scale=layer.w13_scales,
+            w2_scale=layer.w2_scales,
+            w13_g_idx=layer.w13_g_idx,
+            w2_g_idx=layer.w2_g_idx,
+            w13_bias=getattr(layer, "w13_bias", None),
+            w2_bias=getattr(layer, "w2_bias", None),
         )
-        if self.input_dtype == torch.int8 and layer.num_groups_w13 > 1:
-            marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w13_scales
-            )
-            layer.register_parameter(
-                "w13_input_global_scale",
-                torch.nn.Parameter(w13_input_global_scale, requires_grad=False),
-            )
 
-        replace_parameter(layer, "w13_scales", marlin_w13_scales)
-        marlin_w2_scales = marlin_moe_permute_scales(
-            s=layer.w2_scales,
-            size_k=layer.w2_scales.shape[1]
-            * (
-                self.quant_config.group_size
-                if self.quant_config.group_size != -1
-                else self.quant_config.pack_factor
-            ),
-            size_n=layer.w2_scales.shape[2],
-            group_size=self.quant_config.group_size,
-            is_a_8bit=is_a_8bit,
-        )
-        if self.input_dtype == torch.int8 and layer.num_groups_w2 > 1:
-            marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w2_scales
-            )
-            layer.register_parameter(
-                "w2_input_global_scale",
-                torch.nn.Parameter(w2_input_global_scale, requires_grad=False),
-            )
+        replace_parameter(layer, "w13_qweight", w13)
+        replace_parameter(layer, "w2_qweight", w2)
+        replace_parameter(layer, "w13_scales", w13_scale)
+        replace_parameter(layer, "w2_scales", w2_scale)
+        replace_parameter(layer, "w13_g_idx", w13_g_idx)
+        replace_parameter(layer, "w2_g_idx", w2_g_idx)
+        replace_parameter(layer, "w13_g_idx_sort_indices", w13_g_idx_sort_indices)
+        replace_parameter(layer, "w2_g_idx_sort_indices", w2_g_idx_sort_indices)
+        if w13_input_global_scale is not None:
+            if hasattr(layer, "w13_input_global_scale"):
+                replace_parameter(
+                    layer, "w13_input_global_scale", w13_input_global_scale
+                )
+            else:
+                layer.register_parameter(
+                    "w13_input_global_scale",
+                    torch.nn.Parameter(w13_input_global_scale, requires_grad=False),
+                )
+        if w2_input_global_scale is not None:
+            if hasattr(layer, "w2_input_global_scale"):
+                replace_parameter(layer, "w2_input_global_scale", w2_input_global_scale)
+            else:
+                layer.register_parameter(
+                    "w2_input_global_scale",
+                    torch.nn.Parameter(w2_input_global_scale, requires_grad=False),
+                )
+        if w13_bias is not None:
+            if hasattr(layer, "w13_bias"):
+                replace_parameter(layer, "w13_bias", w13_bias)
+            else:
+                layer.register_parameter(
+                    "w13_bias", torch.nn.Parameter(w13_bias, requires_grad=False)
+                )
+        if w2_bias is not None:
+            if hasattr(layer, "w2_bias"):
+                replace_parameter(layer, "w2_bias", w2_bias)
+            else:
+                layer.register_parameter(
+                    "w2_bias", torch.nn.Parameter(w2_bias, requires_grad=False)
+                )
 
-        replace_parameter(layer, "w2_scales", marlin_w2_scales)
+        self._setup_kernel(layer)
 
-        if hasattr(layer, "w13_bias") and layer.w13_bias is not None:
-            layer.w13_bias.data = marlin_permute_bias(layer.w13_bias)
+    def _setup_kernel(self, layer: RoutedExperts) -> None:
+        """Build the FusedMoEKernel for this layer."""
 
-        if hasattr(layer, "w2_bias") and layer.w2_bias is not None:
-            layer.w2_bias.data = marlin_permute_bias(layer.w2_bias)
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        self.moe_kernel = make_wna16_moe_kernel(
+            moe_quant_config=self.moe_quant_config,
+            moe_config=self.moe,
+            experts_cls=self.experts_cls,
+            is_k_full=self.is_k_full,
+            w13_g_idx=layer.w13_g_idx,
+            w2_g_idx=layer.w2_g_idx,
+            w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
+            routing_tables=layer._expert_routing_tables(),
+        )
 
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
+    def get_fused_moe_quant_config(self, layer: RoutedExperts) -> FusedMoEQuantConfig:
         from vllm.model_executor.layers.fused_moe.config import (
             gptq_marlin_moe_quant_config,
         )
@@ -812,118 +762,34 @@ def get_fused_moe_quant_config(
     def select_gemm_impl(
         self,
         prepare_finalize,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
     ):
-        """
-        Select the GEMM implementation for GPTQ-Marlin MoE.
-
-        Returns MarlinExperts configured for GPTQ quantization.
-        This is ONLY used when LoRA is enabled.
-        Without LoRA, GPTQ uses its own apply() method.
-        """
-        # Only use modular kernels when LoRA is enabled
-        # Without LoRA, GPTQ's own apply() method works fine and is more efficient
-        if not self.moe.is_lora_enabled:
-            raise NotImplementedError(
-                "GPTQ-Marlin uses its own apply() method when LoRA is not enabled. "
-                "Modular kernels are only used for LoRA support."
-            )
-
-        # The modular marlin kernels do not support 8-bit weights.
-        if self.quant_config.weight_bits == 8:
-            raise NotImplementedError(
-                "GPTQ-Marlin kernel does not support 8-bit weights."
-            )
-
-        from vllm.model_executor.layers.fused_moe import modular_kernel as mk
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
-            BatchedMarlinExperts,
-            MarlinExperts,
-        )
-
-        # Ensure quant config is initialized
-        assert self.moe_quant_config is not None, (
-            "moe_quant_config must be initialized before select_gemm_impl"
-        )
-
-        w13_g_idx = (
-            getattr(layer, "w13_g_idx", None) if self.quant_config.desc_act else None
-        )
-        w2_g_idx = (
-            getattr(layer, "w2_g_idx", None) if self.quant_config.desc_act else None
-        )
-        w13_g_idx_sort_indices = (
-            getattr(layer, "w13_g_idx_sort_indices", None)
-            if self.quant_config.desc_act
-            else None
-        )
-        w2_g_idx_sort_indices = (
-            getattr(layer, "w2_g_idx_sort_indices", None)
-            if self.quant_config.desc_act
-            else None
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel "
+            "initialization logic. This function should not be called."
         )
 
-        # Check if using batched expert format (for Expert Parallelism)
-        if (
-            prepare_finalize.activation_format
-            == mk.FusedMoEActivationFormat.BatchedExperts
-        ):
-            # For batched format, use BatchedMarlinExperts
-            max_num_tokens_per_rank = prepare_finalize.max_num_tokens_per_rank()
-            assert max_num_tokens_per_rank is not None
-            return BatchedMarlinExperts(
-                max_num_tokens=max_num_tokens_per_rank,
-                num_dispatchers=prepare_finalize.num_dispatchers(),
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=w13_g_idx,
-                w2_g_idx=w2_g_idx,
-                w13_g_idx_sort_indices=w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-        else:
-            # Standard Marlin experts for GPTQ
-            return MarlinExperts(
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=w13_g_idx,
-                w2_g_idx=w2_g_idx,
-                w13_g_idx_sort_indices=w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        return fused_marlin_moe(
-            x,
-            layer.w13_qweight,
-            layer.w2_qweight,
-            getattr(layer, "w13_bias", None),
-            getattr(layer, "w2_bias", None),
-            layer.w13_scales,
-            layer.w2_scales,
-            topk_weights,
-            topk_ids,
-            input_global_scale1=getattr(layer, "w13_input_global_scale", None),
-            input_global_scale2=getattr(layer, "w2_input_global_scale", None),
-            quant_type_id=self.quant_type.id,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            hidden_states=x,
+            w1=layer.w13_qweight,
+            w2=layer.w2_qweight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
             global_num_experts=layer.global_num_experts,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
             expert_map=layer.expert_map,
-            g_idx1=layer.w13_g_idx,
-            g_idx2=layer.w2_g_idx,
-            sort_indices1=layer.w13_g_idx_sort_indices,
-            sort_indices2=layer.w2_g_idx_sort_indices,
-            workspace=layer.workspace,
-            is_k_full=self.is_k_full,
-            input_dtype=self.input_dtype,
-            inplace=not self.moe.disable_inplace,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
         )
diff --git a/vllm/model_executor/layers/quantization/awq.py b/vllm/model_executor/layers/quantization/awq.py
index 3cf3116f0670..edacfc76334b 100644
--- a/vllm/model_executor/layers/quantization/awq.py
+++ b/vllm/model_executor/layers/quantization/awq.py
@@ -5,10 +5,12 @@
 
 import torch
 from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
+from transformers import PretrainedConfig
 
 from vllm import _custom_ops as ops
+from vllm import envs
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+from vllm.model_executor.layers.fused_moe import RoutedExperts
 from vllm.model_executor.layers.linear import (
     LinearBase,
     LinearMethodBase,
@@ -104,7 +106,7 @@ def get_quant_method(
             ):
                 return UnquantizedLinearMethod()
             return AWQLinearMethod(self)
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             # Lazy import to avoid circular import.
             from .awq_marlin import AWQMarlinConfig
             from .moe_wna16 import MoeWNA16Config
@@ -146,7 +148,12 @@ def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
                 self.modules_to_not_convert
             )
 
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
+    def maybe_update_config(
+        self,
+        model_name: str,
+        hf_config: PretrainedConfig | None = None,
+        revision: str | None = None,
+    ):
         if self.modules_to_not_convert:
             return
 
@@ -267,8 +274,9 @@ def apply(
 
         # num_tokens >= threshold
         FP16_MATMUL_HEURISTIC_CONDITION = x.shape[:-1].numel() >= 256
-
-        if FP16_MATMUL_HEURISTIC_CONDITION:
+        # Batch invariant mode requires torch.matmul path
+        # for Triton override
+        if FP16_MATMUL_HEURISTIC_CONDITION or envs.VLLM_BATCH_INVARIANT:
             out = ops.awq_dequantize(qweight, scales, qzeros, 0, 0, 0)
             out = torch.matmul(reshaped_x, out)
         else:
diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py
index 426b9aa71562..0692d3b84cbe 100644
--- a/vllm/model_executor/layers/quantization/awq_marlin.py
+++ b/vllm/model_executor/layers/quantization/awq_marlin.py
@@ -6,24 +6,30 @@
 import torch
 from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
 from torch.nn import Parameter
+from transformers import PretrainedConfig
 
 import vllm.model_executor.layers.fused_moe  # noqa
-from vllm import _custom_ops as ops
+from vllm import envs
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     MPLinearLayerConfig,
     choose_mp_linear_kernel,
 )
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoEMethodBase,
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+    UnquantizedFusedMoEMethod,
+)
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEConfig,
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import fused_marlin_moe
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
-    FusedMoEMethodBase,
-    FusedMoeWeightScaleSupported,
-    UnquantizedFusedMoEMethod,
+from vllm.model_executor.layers.fused_moe.oracle.int_wna16 import (
+    convert_to_wna16_moe_kernel_format,
+    make_wna16_moe_kernel,
+    select_wna16_moe_backend,
 )
 from vllm.model_executor.layers.linear import (
     LinearBase,
@@ -42,14 +48,13 @@
     check_marlin_supports_layer,
     check_moe_marlin_supports_layer,
     get_marlin_input_dtype,
-    marlin_act_int8_process_scales,
     marlin_make_workspace_new,
-    marlin_moe_permute_scales,
-    marlin_permute_bias,
-    moe_awq_to_marlin_zero_points,
     verify_marlin_supported,
 )
-from vllm.model_executor.layers.quantization.utils.quant_utils import is_layer_skipped
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    is_layer_skipped,
+    kInt4Static,
+)
 from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
 from vllm.model_executor.parameter import GroupQuantScaleParameter, PackedvLLMParameter
 from vllm.platforms import current_platform
@@ -69,6 +74,19 @@
 _REVERSE_AWQ_PACK_ORDER = [0, 4, 1, 5, 2, 6, 3, 7]
 
 
+def _replace_or_register_parameter(
+    layer: torch.nn.Module,
+    name: str,
+    value: torch.Tensor | None,
+) -> None:
+    if value is None:
+        return
+    if hasattr(layer, name):
+        replace_parameter(layer, name, value)
+    else:
+        layer.register_parameter(name, Parameter(value, requires_grad=False))
+
+
 def _convert_awq_to_standard_format(
     layer: torch.nn.Module,
     w_q_name: str,
@@ -230,8 +248,13 @@ def from_config(cls, config: dict[str, Any]) -> "AWQMarlinConfig":
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> "QuantizationMethods | None":
+        # Skip override to marlin kernels, as they are not
+        # batch invariant
+        if envs.VLLM_BATCH_INVARIANT:
+            return None
+
         can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
         is_valid_user_quant = (
             user_quant is None or user_quant == "marlin" or user_quant == "awq_marlin"
@@ -279,7 +302,7 @@ def get_quant_method(
             quant_method = AWQMarlinLinearMethod(self)
             quant_method.input_dtype = get_marlin_input_dtype(prefix)
             return quant_method
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
 
             if is_layer_skipped(
@@ -332,7 +355,12 @@ def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
                 self.modules_to_not_convert
             )
 
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
+    def maybe_update_config(
+        self,
+        model_name: str,
+        hf_config: PretrainedConfig | None = None,
+        revision: str | None = None,
+    ):
         if self.modules_to_not_convert:
             return
 
@@ -492,10 +520,13 @@ def __init__(
         self.quant_type = scalar_types.uint4
         self.input_dtype = None
         self.use_marlin = True
+        self.wna16_moe_backend, self.experts_cls = select_wna16_moe_backend(
+            moe, kInt4Static, quant_config.weight_bits
+        )
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -594,53 +625,39 @@ def create_weights(
         device = layer.w13_qweight.device
         layer.workspace = marlin_make_workspace_new(device, 4)
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        num_experts = layer.w13_qweight.shape[0]
-        device = layer.w13_qweight.device
-        is_a_8bit = self.input_dtype is not None and self.input_dtype.itemsize == 1
-
-        if self.input_dtype == torch.float8_e4m3fn:
-            ops.marlin_int4_fp8_preprocess(
-                layer.w13_qweight.view(-1, layer.w13_qweight.size(2)),
-                layer.w13_qzeros.view(-1, layer.w13_qzeros.size(2)),
-                inplace=True,
-            )
-            ops.marlin_int4_fp8_preprocess(
-                layer.w2_qweight.view(-1, layer.w2_qweight.size(2)),
-                layer.w2_qzeros.view(-1, layer.w2_qzeros.size(2)),
-                inplace=True,
-            )
-            layer.w13_scales.data = layer.w13_scales.data * 512
-            layer.w2_scales.data = layer.w2_scales.data * 512
-
-        layer.w13_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-            requires_grad=False,
-        )
-        layer.w2_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-            requires_grad=False,
-        )
-
-        marlin_w13_qweight = ops.awq_marlin_moe_repack(
-            layer.w13_qweight,
-            layer.w13_g_idx_sort_indices,
-            size_k=layer.w13_qweight.shape[1],
-            size_n=layer.w13_qweight.shape[2] * self.quant_config.pack_factor,
-            num_bits=self.quant_config.weight_bits,
-            is_a_8bit=is_a_8bit,
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        (
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            w13_g_idx,
+            w2_g_idx,
+            w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices,
+            w13_qzeros,
+            w2_qzeros,
+            w13_input_global_scale,
+            w2_input_global_scale,
+            w13_bias,
+            w2_bias,
+        ) = convert_to_wna16_moe_kernel_format(
+            backend=self.wna16_moe_backend,
+            layer=layer,
+            quant_config=self.quant_config,
+            input_dtype=self.input_dtype,
+            w13=layer.w13_qweight,
+            w2=layer.w2_qweight,
+            w13_scale=layer.w13_scales,
+            w2_scale=layer.w2_scales,
+            w13_qzeros=layer.w13_qzeros,
+            w2_qzeros=layer.w2_qzeros,
+            w13_bias=getattr(layer, "w13_bias", None),
+            w2_bias=getattr(layer, "w2_bias", None),
         )
-        replace_parameter(layer, "w13_qweight", marlin_w13_qweight)
 
-        marlin_w2_qweight = ops.awq_marlin_moe_repack(
-            layer.w2_qweight,
-            layer.w2_g_idx_sort_indices,
-            size_k=layer.w2_qweight.shape[1],
-            size_n=layer.w2_qweight.shape[2] * self.quant_config.pack_factor,
-            num_bits=self.quant_config.weight_bits,
-            is_a_8bit=is_a_8bit,
-        )
-        replace_parameter(layer, "w2_qweight", marlin_w2_qweight)
+        replace_parameter(layer, "w13_qweight", w13)
+        replace_parameter(layer, "w2_qweight", w2)
 
         # The modular kernel expects w13_weight and w2_weight,
         # but AWQ uses w13_qweight and w2_qweight
@@ -649,70 +666,46 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         # Alias for modular kernel
         layer.w2_weight = layer.w2_qweight
 
-        # Why does this take the intermediate size for size_k?
-        marlin_w13_scales = marlin_moe_permute_scales(
-            s=layer.w13_scales,
-            size_k=layer.intermediate_size_per_partition,
-            size_n=layer.w13_scales.shape[2],
-            group_size=self.quant_config.group_size,
-            is_a_8bit=is_a_8bit,
+        replace_parameter(layer, "w13_scales", w13_scale)
+        replace_parameter(layer, "w2_scales", w2_scale)
+        _replace_or_register_parameter(
+            layer, "w13_g_idx_sort_indices", w13_g_idx_sort_indices
         )
-        if self.input_dtype == torch.int8 and layer.num_groups_w13 > 1:
-            marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w13_scales
-            )
-            layer.register_parameter(
-                "w13_input_global_scale",
-                Parameter(w13_input_global_scale, requires_grad=False),
-            )
-
-        replace_parameter(layer, "w13_scales", marlin_w13_scales)
-
-        marlin_w2_scales = marlin_moe_permute_scales(
-            s=layer.w2_scales,
-            size_k=layer.intermediate_size_per_partition,
-            size_n=layer.w2_scales.shape[2],
-            group_size=self.quant_config.group_size,
-            is_a_8bit=is_a_8bit,
+        _replace_or_register_parameter(
+            layer, "w2_g_idx_sort_indices", w2_g_idx_sort_indices
         )
-        if self.input_dtype == torch.int8 and layer.num_groups_w2 > 1:
-            marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w2_scales
-            )
-            layer.register_parameter(
-                "w2_input_global_scale",
-                Parameter(w2_input_global_scale, requires_grad=False),
-            )
-
-        replace_parameter(layer, "w2_scales", marlin_w2_scales)
-
-        marlin_w13_zp = moe_awq_to_marlin_zero_points(
-            layer.w13_qzeros,
-            size_k=layer.w13_qzeros.shape[1],
-            size_n=layer.w13_qzeros.shape[2] * self.quant_config.pack_factor,
-            num_bits=self.quant_config.weight_bits,
-            is_a_8bit=is_a_8bit,
+        _replace_or_register_parameter(layer, "w13_g_idx", w13_g_idx)
+        _replace_or_register_parameter(layer, "w2_g_idx", w2_g_idx)
+        _replace_or_register_parameter(layer, "w13_qzeros", w13_qzeros)
+        _replace_or_register_parameter(layer, "w2_qzeros", w2_qzeros)
+        _replace_or_register_parameter(
+            layer, "w13_input_global_scale", w13_input_global_scale
         )
-        replace_parameter(layer, "w13_qzeros", marlin_w13_zp)
-
-        marlin_w2_zp = moe_awq_to_marlin_zero_points(
-            layer.w2_qzeros,
-            size_k=layer.w2_qzeros.shape[1],
-            size_n=layer.w2_qzeros.shape[2] * self.quant_config.pack_factor,
-            num_bits=self.quant_config.weight_bits,
-            is_a_8bit=is_a_8bit,
+        _replace_or_register_parameter(
+            layer, "w2_input_global_scale", w2_input_global_scale
         )
-        replace_parameter(layer, "w2_qzeros", marlin_w2_zp)
+        _replace_or_register_parameter(layer, "w13_bias", w13_bias)
+        _replace_or_register_parameter(layer, "w2_bias", w2_bias)
+
+        self._setup_kernel(layer)
 
-        if hasattr(layer, "w13_bias") and layer.w13_bias is not None:
-            layer.w13_bias.data = marlin_permute_bias(layer.w13_bias)
+    def _setup_kernel(self, layer: RoutedExperts) -> None:
+        """Build the FusedMoEKernel for this layer."""
 
-        if hasattr(layer, "w2_bias") and layer.w2_bias is not None:
-            layer.w2_bias.data = marlin_permute_bias(layer.w2_bias)
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        self.moe_kernel = make_wna16_moe_kernel(
+            moe_quant_config=self.moe_quant_config,
+            moe_config=self.moe,
+            experts_cls=self.experts_cls,
+            is_k_full=self.is_k_full,
+            w13_g_idx=getattr(layer, "w13_g_idx", None),
+            w2_g_idx=getattr(layer, "w2_g_idx", None),
+            w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
+            w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
+            routing_tables=layer._expert_routing_tables(),
+        )
 
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
+    def get_fused_moe_quant_config(self, layer: RoutedExperts) -> FusedMoEQuantConfig:
         from vllm.model_executor.layers.fused_moe.config import (
             awq_marlin_moe_quant_config,
         )
@@ -730,101 +723,41 @@ def get_fused_moe_quant_config(
             else None,
             w1_bias=getattr(layer, "w13_bias", None),
             w2_bias=getattr(layer, "w2_bias", None),
+            a1_gscale=getattr(layer, "w13_input_global_scale", None),
+            a2_gscale=getattr(layer, "w2_input_global_scale", None),
         )
 
     def select_gemm_impl(
         self,
         prepare_finalize,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
     ):
-        """
-        Select the GEMM implementation for AWQ-Marlin MoE.
-        Returns MarlinExperts configured for AWQ quantization.
-        This is ONLY used when LoRA is enabled.
-        Without LoRA, AWQ uses its own apply() method.
-        """
-        # Only use modular kernels when LoRA is enabled
-        # Without LoRA, AWQ's own apply() method works fine and is more efficient
-        if not self.moe.is_lora_enabled:
-            raise NotImplementedError(
-                "AWQ-Marlin uses its own apply() method when LoRA is not enabled. "
-                "Modular kernels are only used for LoRA support."
-            )
-
-        from vllm.model_executor.layers.fused_moe import modular_kernel as mk
-        from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
-            BatchedMarlinExperts,
-            MarlinExperts,
-        )
-
-        # Ensure quant config is initialized
-        assert self.moe_quant_config is not None, (
-            "moe_quant_config must be initialized before select_gemm_impl"
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel "
+            "initialization logic. This function should not be called."
         )
 
-        w13_g_idx = getattr(layer, "w13_g_idx", None)
-        w2_g_idx = getattr(layer, "w2_g_idx", None)
-        w13_g_idx_sort_indices = getattr(layer, "w13_g_idx_sort_indices", None)
-        w2_g_idx_sort_indices = getattr(layer, "w2_g_idx_sort_indices", None)
-
-        # Check if using batched expert format (for Expert Parallelism)
-        if (
-            prepare_finalize.activation_format
-            == mk.FusedMoEActivationFormat.BatchedExperts
-        ):
-            # For batched format, use BatchedMarlinExperts
-            max_num_tokens_per_rank = prepare_finalize.max_num_tokens_per_rank()
-            assert max_num_tokens_per_rank is not None
-            return BatchedMarlinExperts(
-                max_num_tokens=max_num_tokens_per_rank,
-                num_dispatchers=prepare_finalize.num_dispatchers(),
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=w13_g_idx,
-                w2_g_idx=w2_g_idx,
-                w13_g_idx_sort_indices=w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-        else:
-            # Standard Marlin experts for AWQ
-            return MarlinExperts(
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=w13_g_idx,
-                w2_g_idx=w2_g_idx,
-                w13_g_idx_sort_indices=w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        return fused_marlin_moe(
-            x,
-            layer.w13_qweight,
-            layer.w2_qweight,
-            getattr(layer, "w13_bias", None),
-            getattr(layer, "w2_bias", None),
-            layer.w13_scales,
-            layer.w2_scales,
-            topk_weights,
-            topk_ids,
-            input_global_scale1=getattr(layer, "w13_input_global_scale", None),
-            input_global_scale2=getattr(layer, "w2_input_global_scale", None),
-            quant_type_id=self.quant_type.id,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            hidden_states=x,
+            w1=layer.w13_qweight,
+            w2=layer.w2_qweight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
             global_num_experts=layer.global_num_experts,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
             expert_map=layer.expert_map,
-            w1_zeros=layer.w13_qzeros,
-            w2_zeros=layer.w2_qzeros,
-            workspace=layer.workspace,
-            input_dtype=self.input_dtype,
-            inplace=not self.moe.disable_inplace,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
         )
diff --git a/vllm/model_executor/layers/quantization/base_config.py b/vllm/model_executor/layers/quantization/base_config.py
index 06fe4270c713..344ddd8abd25 100644
--- a/vllm/model_executor/layers/quantization/base_config.py
+++ b/vllm/model_executor/layers/quantization/base_config.py
@@ -7,6 +7,7 @@
 
 import torch
 from torch import nn
+from transformers import PretrainedConfig
 
 if TYPE_CHECKING:
     from vllm.model_executor.layers.quantization import QuantizationMethods
@@ -109,13 +110,22 @@ def from_config(cls, config: dict[str, Any]) -> "QuantizationConfig":
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls,
+        hf_quant_cfg: dict[str, Any],
+        user_quant: str | None,
+        hf_config: Any = None,
     ) -> QuantizationMethods | None:
         """
         Detects if this quantization method can support a given checkpoint
         format by overriding the user specified quantization method --
         this method should only be overwritten by subclasses in exceptional
-        circumstances
+        circumstances.
+
+        Args:
+            hf_quant_cfg: The checkpoint's quantization config dict.
+            user_quant: The user-specified quantization method string.
+            hf_config: The HuggingFace model config object (e.g. for
+                model_type checks). May be None if not available.
         """
         return None
 
@@ -168,10 +178,23 @@ def apply_vllm_mapper(  # noqa: B027
         # TODO (@kylesayrs): add implementations for all subclasses
         pass
 
-    def maybe_update_config(self, model_name: str):  # noqa: B027
+    def maybe_update_config(  # noqa: B027
+        self,
+        model_name: str,
+        hf_config: PretrainedConfig | None = None,
+        revision: str | None = None,
+    ):
         """
         Interface to update values after config initialization.
+
+        Args:
+            model_name: The name of the model
+            hf_config: The Hugging Face config of the model
+            revision: The revision of the model
+        Returns:
         """
+        # TODO: revision is never passed currently in vllm.py,
+        # but is used in subclasses, should we remove this parameter?
         pass
 
     def is_mxfp4_quant(self, prefix: str, layer: torch.nn.Module) -> bool:
diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py
index 716a20090f69..a32ed3196c1a 100644
--- a/vllm/model_executor/layers/quantization/bitsandbytes.py
+++ b/vllm/model_executor/layers/quantization/bitsandbytes.py
@@ -6,13 +6,12 @@
 import torch
 from packaging import version
 
-from vllm.model_executor.layers.fused_moe.config import (
+from vllm.model_executor.layers.fused_moe import (
     FusedMoEConfig,
-    FusedMoEQuantConfig,
-)
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
     FusedMoEMethodBase,
+    FusedMoEQuantConfig,
+    RoutedExperts,
+    SharedExperts,
 )
 from vllm.model_executor.layers.linear import (
     LinearBase,
@@ -164,7 +163,7 @@ def get_quant_method(
             if is_layer_skipped_bnb(prefix, self.llm_int8_skip_modules):
                 return UnquantizedLinearMethod()
             return BitsAndBytesLinearMethod(self)
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             return BitsAndBytesMoEMethod(self, layer.moe_config)
         return None
 
@@ -451,7 +450,7 @@ def __init__(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -472,18 +471,19 @@ def create_weights(
         )
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
         return None
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         from vllm.model_executor.layers.fused_moe import fused_experts
 
         # TODO(bnell): Do these need to be called on the hot path?
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
index 4fcc468c6cfb..f48a3f01d216 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -1,16 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from contextlib import suppress
 from functools import partial
 from typing import TYPE_CHECKING, Any, Literal, cast
 
 import torch
-from compressed_tensors.config import (
-    CompressionFormat,
-    SparsityCompressionConfig,
-    SparsityStructure,
-)
+from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
 from compressed_tensors.quantization import (
     QuantizationArgs,
     QuantizationStrategy,
@@ -24,7 +19,7 @@
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import RoutedExperts
 from vllm.model_executor.layers.linear import (
     LinearBase,
     LinearMethodBase,
@@ -40,15 +35,15 @@
 )
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     WNA16_SUPPORTED_BITS,
-    CompressedTensors24,
     CompressedTensorsScheme,
     CompressedTensorsW4A4Fp4,
+    CompressedTensorsW4A4Mxfp4,
     CompressedTensorsW4A8Fp8,
     CompressedTensorsW4A8Int,
     CompressedTensorsW4A16Fp4,
-    CompressedTensorsW4A16Mxfp4,
     CompressedTensorsW8A8Fp8,
     CompressedTensorsW8A8Int8,
+    CompressedTensorsW8A8Mxfp8,
     CompressedTensorsW8A16Fp8,
     CompressedTensorsWNA16,
 )
@@ -62,6 +57,7 @@
     should_ignore_layer,
 )
 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
+from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
 from vllm.platforms import current_platform
 
 if TYPE_CHECKING:
@@ -81,8 +77,6 @@ def __init__(
         target_scheme_map: dict[str, Any],
         ignore: list[str],
         quant_format: str,
-        sparsity_scheme_map: dict[str, SparsityCompressionConfig],
-        sparsity_ignore_list: list[str],
         kv_cache_scheme: dict[str, Any] | None = None,
         config: dict[str, Any] | None = None,
         transform_config: dict[str, Any] | None = None,
@@ -95,8 +89,6 @@ def __init__(
         # Map from [target -> scheme]
         self.target_scheme_map = target_scheme_map
         self.kv_cache_scheme = kv_cache_scheme
-        self.sparsity_scheme_map = sparsity_scheme_map
-        self.sparsity_ignore_list = sparsity_ignore_list
         self.config = config
         self.total_num_heads = total_num_heads
         self.total_num_kv_heads = total_num_kv_heads
@@ -147,8 +139,6 @@ def _apply_list(lst: list) -> list:
 
         self.target_scheme_map = _apply_dict(self.target_scheme_map)
         self.ignore = _apply_list(self.ignore)
-        self.sparsity_scheme_map = _apply_dict(self.sparsity_scheme_map)
-        self.sparsity_ignore_list = _apply_list(self.sparsity_ignore_list)
         if self.kv_cache_scheme is not None:
             self.kv_cache_scheme = _apply_dict(self.kv_cache_scheme)
 
@@ -179,9 +169,18 @@ def get_quant_method(
             else:
                 return quant_method
 
+        if isinstance(layer, ParallelLMHead):
+            try:
+                quant_scheme = self.get_scheme(layer=layer, layer_name=prefix)
+            except ValueError:
+                quant_scheme = None
+            if quant_scheme is not None:
+                layer.scheme = quant_scheme
+                return CompressedTensorsLinearMethod(self)
+
         if isinstance(layer, Attention):
             return CompressedTensorsKVCacheMethod(self)
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             return CompressedTensorsMoEMethod.get_moe_method(
                 self, layer, layer_name=prefix
             )
@@ -192,7 +191,7 @@ def _add_fused_moe_to_target_scheme_map(self):
         Helper function to update target_scheme_map
         since linear layers get fused into FusedMoE
         targeting 'Linear' needs to also match
-        FusedMoE modules.
+        RoutedExperts modules.
         """
         if (
             "Linear" not in self.target_scheme_map
@@ -224,16 +223,14 @@ def from_config(cls, config: dict[str, Any]) -> "CompressedTensorsConfig":
         ignore: list[str] = cast(list[str], config.get("ignore", []))
         quant_format = cast(str, config.get("format"))
         target_scheme_map = cls._quantization_scheme_map_from_config(config=config)
-        sparsity_scheme_map, sparsity_ignore_list = cls._parse_sparsity_config(
-            config=config
-        )
+
+        # Check for deprecated sparsity config
+        cls._parse_sparsity_config(config=config)
 
         return cls(
             target_scheme_map=target_scheme_map,
             ignore=ignore,
             quant_format=quant_format,
-            sparsity_scheme_map=sparsity_scheme_map,
-            sparsity_ignore_list=sparsity_ignore_list,
             config=config,
             transform_config=config.get("transform_config"),
             kv_cache_scheme=config.get("kv_cache_scheme"),
@@ -260,6 +257,14 @@ def _parse_sparsity_config(
             target: sparsity_config for target in sparsity_config.targets or list()
         }
         sparsity_ignore_list = sparsity_config.ignore or list()
+
+        # Raise DeprecationError if non-empty sparse_scheme_map is detected
+        if sparse_scheme_map:
+            raise DeprecationWarning(
+                "Sparsity support has been removed from compressed-tensors. "
+                "Please use a model without sparsity configuration."
+            )
+
         return sparse_scheme_map, sparsity_ignore_list
 
     @classmethod
@@ -393,6 +398,27 @@ def _is_mxfp4(quant_args: QuantizationArgs) -> bool:
             and is_symmetric
         )
 
+    @staticmethod
+    def _is_mxfp8(quant_args: QuantizationArgs) -> bool:
+        if quant_args is None:
+            return False
+
+        is_group_quant = quant_args.strategy == QuantizationStrategy.GROUP.value
+        is_symmetric = quant_args.symmetric
+        is_group_size_32 = quant_args.group_size == 32
+        is_float_type = quant_args.type == QuantizationType.FLOAT
+        is_8_bits = quant_args.num_bits == 8
+        is_mxfp8_scale_dtype = quant_args.scale_dtype == torch.uint8
+
+        return (
+            is_group_quant
+            and is_float_type
+            and is_8_bits
+            and is_group_size_32
+            and is_symmetric
+            and is_mxfp8_scale_dtype
+        )
+
     @staticmethod
     def _is_static_tensor_w8a8(
         weight_quant: QuantizationArgs, input_quant: QuantizationArgs
@@ -594,7 +620,10 @@ def _get_scheme_from_parts(
             return CompressedTensorsW4A16Fp4()
 
         if self._is_mxfp4(weight_quant):
-            return CompressedTensorsW4A16Mxfp4()
+            return CompressedTensorsW4A4Mxfp4()
+
+        if self._is_mxfp8(weight_quant):
+            return CompressedTensorsW8A8Mxfp8()
 
         if self._is_fp8_w4a8_sm90(weight_quant, input_quant):
             return CompressedTensorsW4A8Fp8(
@@ -706,41 +735,7 @@ def get_scheme(
             input_quant = scheme_dict.get("input_activations")
             format = scheme_dict.get("format")
 
-        # Find the sparsity scheme of the layer
-        # assume that fused layers inherit first component's sparsity scheme
-        sparsity_targets = self.sparsity_scheme_map.keys() - set(
-            self.sparsity_ignore_list
-        )
-        sparsity_scheme: SparsityCompressionConfig | None = None
-        with suppress(ValueError):
-            matched_target = find_matched_target(
-                layer_name=layer_name,
-                module=layer,
-                targets=sparsity_targets,
-                fused_mapping=self.packed_modules_mapping,
-            )
-            sparsity_scheme = self.sparsity_scheme_map[matched_target]
-
-        if self.supports_cutlass_24(
-            weight_quant=weight_quant,
-            input_quant=input_quant,
-            sparsity_scheme=sparsity_scheme,
-        ):
-            # Have a valid sparsity scheme
-            # Validate layer is supported by Cutlass 2:4 Kernel
-            model_compression_config = (
-                None
-                if sparsity_scheme is None or sparsity_scheme.format == "dense"
-                else self.config
-            )
-
-            scheme = CompressedTensors24(
-                quantized=weight_quant is not None or input_quant is not None,
-                weight_quant=weight_quant,
-                input_quant=input_quant,
-                model_compression_config=model_compression_config,
-            )
-        elif weight_quant is None:
+        if weight_quant is None:
             # Falling back to UnquantizedLinearMethod
             return None
 
@@ -786,10 +781,11 @@ def get_scheme_dict(
                 targets=self.target_scheme_map.keys(),
                 fused_mapping=self.packed_modules_mapping,
             )
-            scheme_dict = self.target_scheme_map[matched_target]
-            if scheme_dict.get("format") is None:
-                scheme_dict["format"] = self.quant_format
-            return scheme_dict
+            if matched_target is not None:
+                scheme_dict = self.target_scheme_map[matched_target]
+                if scheme_dict.get("format") is None:
+                    scheme_dict["format"] = self.quant_format
+                return scheme_dict
 
         return None
 
@@ -803,72 +799,6 @@ def has_blocked_weights(self) -> bool:
                 return True
         return False
 
-    @staticmethod
-    def supports_cutlass_24(
-        weight_quant: QuantizationArgs | None,
-        input_quant: QuantizationArgs | None,
-        sparsity_scheme: SparsityCompressionConfig | None = None,
-    ) -> bool:
-        """
-        Check if the layer is supported by the Cutlass 2:4 Kernel
-        Conditions:
-            - Overarching condition: Sparsity Structure is 2:4
-            - Unquantized cases are supported
-            - Weight only quantization is not-supported
-            - Supported weight quantization strategies are TENSOR and CHANNEL
-            - Supported input quantization strategies are TENSOR and TOKEN
-            - Only 8 bit quantization is supported
-
-        :return: True if the layer is supported by the Cutlass 2:4 Kernel
-            False otherwise
-        """
-        if sparsity_scheme is None:
-            return False
-
-        is_valid_sparsity_structure: bool = (
-            sparsity_scheme.sparsity_structure == SparsityStructure.TWO_FOUR.value
-        )
-
-        valid_compressors = {
-            CompressionFormat.dense.value,
-            CompressionFormat.sparse_24_bitmask.value,
-        }
-
-        is_valid_sparsity = (
-            is_valid_sparsity_structure and sparsity_scheme.format in valid_compressors
-        )
-
-        if not is_valid_sparsity:
-            return False
-
-        # Unquantized cases are supported
-        if weight_quant is None and input_quant is None:
-            return True
-
-        # Weight only quantization is not-supported
-        if weight_quant is not None and input_quant is None:
-            return False
-
-        supported_weight_quant_strategies = [
-            QuantizationStrategy.TENSOR.value,
-            QuantizationStrategy.CHANNEL.value,
-        ]
-
-        assert weight_quant is not None
-        assert input_quant is not None
-        if weight_quant.strategy not in supported_weight_quant_strategies:
-            return False
-
-        supported_input_quant_strategies = [
-            QuantizationStrategy.TENSOR.value,
-            QuantizationStrategy.TOKEN.value,
-        ]
-
-        if input_quant.strategy not in supported_input_quant_strategies:
-            return False
-
-        return weight_quant.num_bits == input_quant.num_bits == 8
-
 
 class CompressedTensorsLinearMethod(LinearMethodBase):
     def __init__(self, quantization_config: CompressedTensorsConfig):
@@ -1088,6 +1018,17 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer._v_scale = layer.v_scale
         layer._q_scale = layer.q_scale
 
+        # Set the _float variants that the attention backend uses.
+        def _to_scalar(tensor: torch.Tensor) -> float:
+            # For n_scales > 1 (e.g., ATTN_HEAD strategy), take max
+            if tensor.numel() > 1:
+                return tensor.max().item()
+            return tensor.item()
+
+        layer._k_scale_float = _to_scalar(layer.k_scale)
+        layer._v_scale_float = _to_scalar(layer.v_scale)
+        layer._q_scale_float = _to_scalar(layer.q_scale)
+
         # Discard all placeholders.
         del layer.k_scale
         del layer.v_scale
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
deleted file mode 100644
index 1b8b726d9714..000000000000
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ /dev/null
@@ -1,2541 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import enum
-from enum import Enum
-
-import torch
-from compressed_tensors import CompressionFormat
-from compressed_tensors.quantization import (
-    ActivationOrdering,
-    QuantizationArgs,
-    QuantizationStrategy,
-)
-
-import vllm.model_executor.layers.fused_moe.modular_kernel as mk
-from vllm import _custom_ops as ops
-from vllm.distributed import get_tensor_model_parallel_world_size
-from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
-    FusedMoEActivationFormat,
-    FusedMoEExpertsModular,
-    FusedMoEMethodBase,
-    FusedMoeWeightScaleSupported,
-    UnquantizedFusedMoEMethod,
-)
-from vllm.model_executor.layers.fused_moe.activation import MoEActivation
-from vllm.model_executor.layers.fused_moe.config import (
-    FusedMoEConfig,
-    FusedMoEQuantConfig,
-    int4_w4a16_moe_quant_config,
-    int4_w4afp8_moe_quant_config,
-    int8_w8a8_moe_quant_config,
-    int8_w8a16_moe_quant_config,
-)
-from vllm.model_executor.layers.fused_moe.cpu_fused_moe import select_experts
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
-    BatchedMarlinExperts,
-    MarlinExperts,
-    fused_marlin_moe,
-)
-from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
-    convert_to_fp8_moe_kernel_format,
-    make_fp8_moe_kernel,
-    make_fp8_moe_quant_config,
-    select_fp8_moe_backend,
-)
-from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
-    Mxfp4MoeBackend,
-    make_mxfp4_moe_kernel,
-    make_mxfp4_moe_quant_config,
-)
-from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
-    convert_to_nvfp4_moe_kernel_format,
-    is_global_sf_supported_for_nvfp4_backend,
-    make_nvfp4_moe_kernel,
-    make_nvfp4_moe_quant_config,
-    select_nvfp4_moe_backend,
-)
-from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import (  # noqa
-    WNA16_SUPPORTED_BITS,
-    WNA16_SUPPORTED_TYPES_MAP,
-)
-from vllm.model_executor.layers.quantization.utils.flashinfer_mxint4_moe import (
-    flashinfer_trtllm_mxint4_moe,
-    is_flashinfer_mxint4_moe_available,
-    prepare_static_weights_for_trtllm_mxint4_moe,
-)
-from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    process_fp8_input_tensor_strategy_moe,
-    process_fp8_weight_tensor_strategy_moe,
-)
-from vllm.model_executor.layers.quantization.utils.marlin_utils import (
-    check_moe_marlin_supports_layer,
-    get_marlin_input_dtype,
-    marlin_act_int8_process_scales,
-    marlin_make_workspace_new,
-    marlin_moe_permute_scales,
-)
-from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
-    prepare_moe_fp4_layer_for_marlin,
-)
-from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    convert_bf16_scales_to_fp8,
-    convert_packed_uint4b8_to_signed_int4_inplace,
-    kFp8Dynamic128Sym,
-    kFp8DynamicTokenSym,
-    kFp8Static128BlockSym,
-    kFp8StaticChannelSym,
-    kFp8StaticTensorSym,
-    kNvfp4Dynamic,
-    kNvfp4Static,
-)
-from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
-    normalize_e4m3fn_to_e4m3fnuz,
-)
-from vllm.model_executor.utils import replace_parameter, set_weight_attrs
-from vllm.platforms import CpuArchEnum, current_platform
-
-logger = init_logger(__name__)
-
-
-class GPTQMarlinState(Enum):
-    REPACK = enum.auto()
-    READY = enum.auto()
-
-
-__all__ = [
-    "CompressedTensorsMoEMethod",
-    "CompressedTensorsW8A8Fp8MoEMethod",
-    "CompressedTensorsW8A8Int8MoEMethod",
-    "CompressedTensorsWNA16MarlinMoEMethod",
-    "CompressedTensorsWNA16MoEMethod",
-    "CompressedTensorsW4A4Nvfp4MoEMethod",
-    "CompressedTensorsW4A8Int8MoEMethod",
-]
-
-
-class CompressedTensorsMoEMethod(FusedMoEMethodBase):
-    @staticmethod
-    def get_moe_method(
-        quant_config: "CompressedTensorsConfig",  # type: ignore # noqa E501
-        layer: torch.nn.Module,
-        layer_name: str,
-    ) -> FusedMoEMethodBase:
-        # FusedMoE was made by combining multiple Linears so need to
-        # make sure quantization config for Linear can target it
-        quant_config._add_fused_moe_to_target_scheme_map()
-        unfused_names = [
-            layer_name + proj_name
-            for proj_name in [".0.gate_proj", ".0.up_proj", ".0.down_proj"]
-        ]
-        # TODO: refactor this to use expert_mapping and check all layer numbers
-        all_scheme_dicts = [
-            quant_config.get_scheme_dict(layer, name) for name in unfused_names
-        ]
-        scheme_dict = all_scheme_dicts.pop()
-
-        # multiple schemes found
-        if not all([cur_dict == scheme_dict for cur_dict in all_scheme_dicts]):
-            raise ValueError(
-                "All MoE projections need to have same "
-                "quantization scheme but found multiple"
-            )
-
-        if scheme_dict is None:  # ignored layer
-            return UnquantizedFusedMoEMethod(layer.moe_config)
-
-        # TODO: @dsikka: refactor this to use schemes as other kernels
-        # are supported + check if the layer is being ignored.
-        weight_quant = scheme_dict.get("weights")
-        input_quant = scheme_dict.get("input_activations")
-        format = scheme_dict.get("format")
-
-        if quant_config._is_mxfp4(weight_quant):
-            return CompressedTensorsW4A4Mxfp4MoEMethod(layer.moe_config)
-
-        if quant_config._is_wNa16_group_channel(weight_quant, input_quant):
-            # group_size=None means channelwise
-            group_size = weight_quant.group_size or -1
-
-            valid_format_and_bits = (
-                weight_quant.num_bits in WNA16_SUPPORTED_BITS
-                and format == CompressionFormat.pack_quantized.value
-            )
-
-            if not valid_format_and_bits:
-                raise ValueError(
-                    "For Fused MoE layers, only format: ",
-                    f"{CompressionFormat.pack_quantized.value} ",
-                    f" and bits: {WNA16_SUPPORTED_BITS} is supported ",
-                    f"but got format: {CompressionFormat.pack_quantized.value} "
-                    f" and bits: {weight_quant.num_bits}",
-                )
-
-            # Prefer to use the MarlinMoE kernel when it is supported.
-            if (
-                not check_moe_marlin_supports_layer(layer, group_size)
-                or current_platform.is_rocm()
-            ):
-                if (
-                    weight_quant.strategy == QuantizationStrategy.GROUP
-                    and weight_quant.actorder
-                    in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC)
-                ):
-                    raise ValueError(
-                        "WNA16MoE is not supported with actorder=group/dynamic."
-                    )
-                logger.info_once("Using CompressedTensorsWNA16MoEMethod")
-                return CompressedTensorsWNA16MoEMethod(
-                    weight_quant, input_quant, layer.moe_config
-                )
-            else:
-                logger.info_once("Using CompressedTensorsWNA16MarlinMoEMethod")
-                return CompressedTensorsWNA16MarlinMoEMethod(
-                    weight_quant, input_quant, layer.moe_config
-                )
-        elif quant_config._is_nvfp4_format(weight_quant):
-            _is_valid_nvfp4_activations = (
-                quant_config._is_nvfp4_format(input_quant) or input_quant is None
-            )
-            if not _is_valid_nvfp4_activations:
-                raise ValueError(
-                    "For NVFP4 weights, input quantization must also be NVFP4 format ",
-                    f"or None for NVFP4A16, found {input_quant}",
-                )
-            return CompressedTensorsW4A4Nvfp4MoEMethod(
-                layer.moe_config, layer_name, use_a16=(input_quant is None)
-            )
-        elif (
-            quant_config._is_fp8_w8a8_sm90(weight_quant, input_quant)
-            or quant_config._is_fp8_w8a8_sm100(weight_quant, input_quant)
-            or quant_config._is_fp8_w8a8(weight_quant, input_quant)
-        ):
-            return CompressedTensorsW8A8Fp8MoEMethod(
-                weight_quant, input_quant, layer.moe_config
-            )
-        elif quant_config._is_dynamic_token_w8a8(weight_quant, input_quant):
-            return CompressedTensorsW8A8Int8MoEMethod(
-                weight_quant, input_quant, layer.moe_config
-            )
-        elif quant_config._is_fp8_w4a8_sm90(weight_quant, input_quant):
-            logger.info_once("Using CompressedTensorsW4A8Fp8MoEMethod")
-            return CompressedTensorsW4A8Fp8MoEMethod(
-                weight_quant, input_quant, layer.moe_config
-            )
-        elif quant_config._is_dynamic_token_w4a8_int(weight_quant, input_quant):
-            return CompressedTensorsW4A8Int8MoEMethod(
-                weight_quant, input_quant, layer.moe_config
-            )
-        else:
-            raise RuntimeError(
-                f"Unsupported FusedMoe scheme: {weight_quant}, {input_quant}"
-            )
-
-
-class CompressedTensorsW4A4Mxfp4MoEMethod(CompressedTensorsMoEMethod):
-    def __init__(self, moe):
-        super().__init__(moe)
-        self.group_size = 32
-        self.mxfp4_backend = Mxfp4MoeBackend.MARLIN
-        self.experts_cls = MarlinExperts
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        layer.num_experts = num_experts
-        layer.params_dtype = params_dtype
-
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                # 2 fp4 items are packed in the input dimension
-                hidden_size // 2,
-                requires_grad=False,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_packed", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                # 2 fp4 items are packed in the input dimension
-                intermediate_size_per_partition // 2,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_packed", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        w13_weight_scale = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                # 2 fp4 items are packed in the input dimension
-                hidden_size // self.group_size,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_weight_scale)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
-        )
-        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-
-        w2_weight_scale = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                # 2 fp4 items are packed in the input dimension
-                intermediate_size_per_partition // self.group_size,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_weight_scale)
-        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        return make_mxfp4_moe_quant_config(
-            mxfp4_backend=self.mxfp4_backend,
-            w1_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-        )
-
-    def process_weights_after_loading(self, layer: FusedMoE) -> None:
-        layer.w13_weight = torch.nn.Parameter(
-            layer.w13_weight_packed.data, requires_grad=False
-        )
-        delattr(layer, "w13_weight_packed")
-
-        layer.w2_weight = torch.nn.Parameter(
-            layer.w2_weight_packed.data, requires_grad=False
-        )
-        delattr(layer, "w2_weight_packed")
-
-        logger.warning_once(
-            "Your GPU does not have native support for FP4 computation but "
-            "FP4 quantization is being used. Weight-only FP4 compression "
-            "will be used leveraging the Marlin kernel. This may degrade "
-            "performance for compute-heavy workloads."
-        )
-        prepare_moe_fp4_layer_for_marlin(layer)
-
-        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
-        if self.moe_quant_config is not None:
-            self.moe_kernel = make_mxfp4_moe_kernel(
-                moe_quant_config=self.moe_quant_config,
-                moe_config=self.moe,
-                experts_cls=self.experts_cls,
-                mxfp4_backend=self.mxfp4_backend,
-                shared_experts=layer.shared_experts,
-                routing_tables=layer._maybe_init_expert_routing_tables(),
-            )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.moe_kernel is not None
-        return self.moe_kernel.apply(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            topk_weights,
-            topk_ids,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            shared_experts_input=shared_experts_input,
-        )
-
-
-class CompressedTensorsW4A4Nvfp4MoEMethod(CompressedTensorsMoEMethod):
-    def __init__(
-        self,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-        use_a16: bool = False,
-    ):
-        super().__init__(moe)
-        self.group_size = 16
-
-        # Select experts implementation.
-        self.nvfp4_backend, self.experts_cls = select_nvfp4_moe_backend(
-            config=self.moe,
-            weight_key=kNvfp4Static,
-            activation_key=None if use_a16 else kNvfp4Dynamic,
-        )
-
-        self.use_global_sf = is_global_sf_supported_for_nvfp4_backend(
-            self.nvfp4_backend
-        )
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        layer.num_experts = num_experts
-        layer.params_dtype = params_dtype
-        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
-
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                w13_num_shards * intermediate_size_per_partition,
-                # 2 fp4 items are packed in the input dimension
-                hidden_size // 2,
-                requires_grad=False,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_packed", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                # 2 fp4 items are packed in the input dimension
-                intermediate_size_per_partition // 2,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_packed", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        # Weight Scales
-        w13_weight_scale = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                w13_num_shards * intermediate_size_per_partition,
-                # 2 fp4 items are packed in the input dimension
-                hidden_size // self.group_size,
-                dtype=torch.float8_e4m3fn,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_weight_scale)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
-        )
-        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-
-        w2_weight_scale = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                # 2 fp4 items are packed in the input dimension
-                intermediate_size_per_partition // self.group_size,
-                dtype=torch.float8_e4m3fn,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_weight_scale)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
-        )
-        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        # Weight Global Scales
-        w13_weight_scale_2 = torch.nn.Parameter(
-            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_global_scale", w13_weight_scale_2)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
-        )
-        set_weight_attrs(w13_weight_scale_2, extra_weight_attrs)
-
-        w2_weight_scale_2 = torch.nn.Parameter(
-            torch.empty(num_experts, dtype=torch.float32), requires_grad=False
-        )
-        layer.register_parameter("w2_weight_global_scale", w2_weight_scale_2)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
-        )
-        set_weight_attrs(w2_weight_scale_2, extra_weight_attrs)
-
-        # Input Global Scales
-        w13_input_scale = torch.nn.Parameter(
-            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_input_global_scale", w13_input_scale)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
-        )
-        set_weight_attrs(w13_input_scale, extra_weight_attrs)
-
-        w2_input_scale = torch.nn.Parameter(
-            torch.empty(num_experts, dtype=torch.float32), requires_grad=False
-        )
-        layer.register_parameter("w2_input_global_scale", w2_input_scale)
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
-        )
-        set_weight_attrs(w2_input_scale, extra_weight_attrs)
-
-    def process_weights_after_loading(self, layer: FusedMoE) -> None:
-        """
-        Convert NVFP4 MoE weights into kernel format and setup the kernel.
-        """
-        # NOTE(rob): wN_weight_packed -> wN_weight is because ModularKernelMethod
-        # requires this naming convention. However, the name change breaks
-        # reloading because the state dict no longer matches disk. Once we
-        # remove MKM, we should revert this change to ensure compatibility.
-        layer.w13_weight = torch.nn.Parameter(
-            layer.w13_weight_packed.data, requires_grad=False
-        )
-        delattr(layer, "w13_weight_packed")
-
-        layer.w2_weight = torch.nn.Parameter(
-            layer.w2_weight_packed.data, requires_grad=False
-        )
-        delattr(layer, "w2_weight_packed")
-
-        # Use a single gscale for w13.
-        if self.moe.is_act_and_mul and not torch.allclose(
-            layer.w13_weight_global_scale[:, 0], layer.w13_weight_global_scale[:, 1]
-        ):
-            logger.warning_once(
-                "w1_weight_global_scale must match w3_weight_global_scale. "
-                "Accuracy may be affected.",
-            )
-        w13_weight_global_scale = layer.w13_weight_global_scale[:, 0].contiguous()
-
-        # Shuffle weights into the NvFp4 kernel format.
-        (
-            w13,
-            w13_scale,
-            w13_scale_2,
-            a13_scale,
-            w2,
-            w2_scale,
-            w2_scale_2,
-            a2_scale,
-        ) = convert_to_nvfp4_moe_kernel_format(
-            nvfp4_backend=self.nvfp4_backend,
-            layer=layer,
-            w13=layer.w13_weight,
-            w13_scale=layer.w13_weight_scale,
-            w13_scale_2=(1.0 / w13_weight_global_scale),
-            a13_scale=(1.0 / layer.w13_input_global_scale),
-            w2=layer.w2_weight,
-            w2_scale=layer.w2_weight_scale,
-            w2_scale_2=(1.0 / layer.w2_weight_global_scale),
-            a2_scale=(1.0 / layer.w2_input_global_scale),
-            is_act_and_mul=self.moe.is_act_and_mul,
-        )
-
-        replace_parameter(layer, "w13_weight", w13)
-        replace_parameter(layer, "w13_weight_scale", w13_scale)
-        replace_parameter(layer, "w2_weight", w2)
-        replace_parameter(layer, "w2_weight_scale", w2_scale)
-        layer.w13_weight_scale_2 = w13_scale_2
-        layer.w2_weight_scale_2 = w2_scale_2
-        layer.w13_input_scale = a13_scale
-        layer.w2_input_scale = a2_scale
-
-        # Setup modular kernel.
-        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
-        assert self.experts_cls is not None
-        self.moe_kernel = make_nvfp4_moe_kernel(
-            moe_quant_config=self.moe_quant_config,
-            moe_config=self.moe,
-            experts_cls=self.experts_cls,
-            shared_experts=layer.shared_experts,
-            routing_tables=layer._maybe_init_expert_routing_tables(),
-        )
-        self.moe_kernel.fused_experts.process_weights_after_loading(layer)
-
-    def maybe_make_prepare_finalize(
-        self,
-        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
-        raise ValueError(
-            f"{self.__class__.__name__} uses the new modular kernel initialization "
-            "logic. This function should not be called."
-        )
-
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
-        return make_nvfp4_moe_quant_config(
-            backend=self.nvfp4_backend,
-            w13_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-            w13_scale_2=layer.w13_weight_scale_2,
-            w2_scale_2=layer.w2_weight_scale_2,
-            a13_scale=layer.w13_input_scale,
-            a2_scale=layer.w2_input_scale,
-        )
-
-    def apply_monolithic(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.is_monolithic
-        assert self.moe_kernel is not None
-        return self.moe_kernel.apply_monolithic(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            router_logits,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            num_expert_group=layer.num_expert_group,
-            topk_group=layer.topk_group,
-            e_score_correction_bias=layer.e_score_correction_bias,
-            routed_scaling_factor=layer.routed_scaling_factor,
-        )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.moe_kernel is not None
-        return self.moe_kernel.apply(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            topk_weights,
-            topk_ids,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            shared_experts_input=shared_experts_input,
-        )
-
-
-class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
-    """W8A8 FP8 MoE quantization using compressed tensors."""
-
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-
-        per_tensor = (
-            self.weight_quant.strategy == QuantizationStrategy.TENSOR
-            and self.input_quant.strategy == QuantizationStrategy.TENSOR
-        )
-        per_channel = (
-            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
-            and self.input_quant.strategy == QuantizationStrategy.TOKEN
-        )
-        if not (per_tensor or per_channel):
-            assert self.weight_quant.strategy == QuantizationStrategy.BLOCK
-            self.weight_block_size = self.weight_quant.block_structure
-            assert self.weight_quant.dynamic is not None
-        else:
-            self.weight_block_size = None
-        self.block_quant = self.weight_block_size is not None
-
-        self.static_input_scales = not self.input_quant.dynamic
-        if self.static_input_scales and per_channel:
-            raise ValueError(
-                "For FP8 Fused MoE layer, we require either per tensor or "
-                "channelwise, dynamic per token quantization."
-            )
-
-        ct2vllm_weight = {
-            QuantizationStrategy.CHANNEL: kFp8StaticChannelSym,
-            QuantizationStrategy.TENSOR: kFp8StaticTensorSym,
-            QuantizationStrategy.BLOCK: kFp8Static128BlockSym,
-        }
-        ct2vllm_act = {
-            QuantizationStrategy.TOKEN: kFp8DynamicTokenSym,
-            QuantizationStrategy.TENSOR: (
-                kFp8StaticTensorSym if self.static_input_scales else kFp8Dynamic128Sym
-            ),
-        }
-        weight_key = ct2vllm_weight[self.weight_quant.strategy]
-        if weight_key == kFp8Static128BlockSym:
-            activation_key = kFp8Dynamic128Sym
-        else:
-            activation_key = ct2vllm_act[self.input_quant.strategy]
-
-        # Select Fp8 MoE backend
-        self.fp8_backend, self.experts_cls = select_fp8_moe_backend(
-            config=self.moe,
-            weight_key=weight_key,
-            activation_key=activation_key,
-            allow_vllm_cutlass=True,
-        )
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        layer.num_experts = num_experts
-        layer.orig_dtype = params_dtype
-        layer.weight_block_size = None
-
-        params_dtype = torch.float8_e4m3fn
-        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
-
-        if self.block_quant:
-            assert self.weight_block_size is not None
-            layer.weight_block_size = self.weight_block_size
-            tp_size = get_tensor_model_parallel_world_size()
-            block_n, block_k = (
-                self.weight_block_size[0],
-                self.weight_block_size[1],
-            )
-            # NOTE: To ensure proper alignment of the block-wise quantization
-            # scales, the output_size of the weights for both the gate and up
-            # layers must be divisible by block_n.
-            # Required by column parallel or enabling merged weights
-            if intermediate_size_per_partition % block_n != 0:
-                raise ValueError(
-                    f"The output_size of gate's and up's weight = "
-                    f"{intermediate_size_per_partition} is not divisible by "
-                    f"weight quantization block_n = {block_n}."
-                )
-            if tp_size > 1 and intermediate_size_per_partition % block_k != 0:
-                # Required by row parallel
-                raise ValueError(
-                    f"The input_size of down's weight = "
-                    f"{intermediate_size_per_partition} is not divisible by "
-                    f"weight quantization block_k = {block_k}."
-                )
-
-        # WEIGHTS
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                w13_num_shards * intermediate_size_per_partition,
-                hidden_size,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        # WEIGHT_SCALES
-        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
-            # For gated MoE, allocate 2 scales for w1 and w3 respectively.
-            # They will be combined to a single scale after weight loading.
-            # For non-gated MoE, allocate 1 scale for w13.
-            w13_weight_scale = torch.nn.Parameter(
-                torch.ones(num_experts, w13_num_shards, dtype=torch.float32),
-                requires_grad=False,
-            )
-            layer.register_parameter("w13_weight_scale", w13_weight_scale)
-            w2_weight_scale = torch.nn.Parameter(
-                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
-            )
-            layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-TENSOR quantization for FusedMoE.weight_loader.
-            extra_weight_attrs.update(
-                {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
-            )
-            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        elif self.weight_quant.strategy == QuantizationStrategy.CHANNEL:
-            w13_weight_scale = torch.nn.Parameter(
-                torch.ones(
-                    num_experts,
-                    w13_num_shards * intermediate_size_per_partition,
-                    1,
-                    dtype=torch.float32,
-                ),
-                requires_grad=False,
-            )
-            layer.register_parameter("w13_weight_scale", w13_weight_scale)
-            w2_weight_scale = torch.nn.Parameter(
-                torch.ones(num_experts, hidden_size, 1, dtype=torch.float32),
-                requires_grad=False,
-            )
-            layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-CHANNEL quantization for FusedMoE.weight_loader.
-            extra_weight_attrs.update(
-                {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
-            )
-            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        elif self.weight_quant.strategy == QuantizationStrategy.BLOCK:
-            w13_weight_scale = torch.nn.Parameter(
-                torch.ones(
-                    num_experts,
-                    w13_num_shards
-                    * ((intermediate_size_per_partition + block_n - 1) // block_n),
-                    (hidden_size + block_k - 1) // block_k,
-                    dtype=torch.float32,
-                ),
-                requires_grad=False,
-            )
-            layer.register_parameter("w13_weight_scale", w13_weight_scale)
-            w2_weight_scale = torch.nn.Parameter(
-                torch.ones(
-                    num_experts,
-                    (hidden_size + block_n - 1) // block_n,
-                    (intermediate_size_per_partition + block_k - 1) // block_k,
-                    dtype=torch.float32,
-                ),
-                requires_grad=False,
-            )
-            layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-CHANNEL quantization for FusedMoE.weight_loader.
-            extra_weight_attrs.update(
-                {"quant_method": FusedMoeWeightScaleSupported.BLOCK.value}
-            )
-            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        # INPUT_SCALES
-        if self.static_input_scales:
-            w13_input_scale = torch.nn.Parameter(
-                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
-            )
-            layer.register_parameter("w13_input_scale", w13_input_scale)
-            set_weight_attrs(w13_input_scale, extra_weight_attrs)
-
-            w2_input_scale = torch.nn.Parameter(
-                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
-            )
-            layer.register_parameter("w2_input_scale", w2_input_scale)
-            set_weight_attrs(w2_input_scale, extra_weight_attrs)
-        else:
-            layer.w13_input_scale = None
-            layer.w2_input_scale = None
-
-    def process_weights_after_loading(self, layer: FusedMoE) -> None:
-        # Allow for accessing weights and scales in standard way.
-        w13 = layer.w13_weight
-        w2 = layer.w2_weight
-        w13_scale = layer.w13_weight_scale
-        w2_scale = layer.w2_weight_scale
-        w13_input_scale = layer.w13_input_scale
-        w2_input_scale = layer.w2_input_scale
-
-        # MI300x and MI325x use FNUZ format for FP8. Convert if needed.
-        if current_platform.is_fp8_fnuz():
-            w13, w13_scale, w13_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                w13, w13_scale, w13_input_scale
-            )
-            w2, w2_scale, w2_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                w2, w2_scale, w2_input_scale
-            )
-
-        # Per tensor kernels require single activation scale. Use the max.
-        if self.static_input_scales:
-            assert self.input_quant.strategy == QuantizationStrategy.TENSOR
-            assert w13_input_scale is not None and w2_input_scale is not None
-            w13_input_scale, w2_input_scale = process_fp8_input_tensor_strategy_moe(
-                w13_input_scale, w2_input_scale
-            )
-            replace_parameter(layer, "w13_input_scale", w13_input_scale)
-            replace_parameter(layer, "w2_input_scale", w2_input_scale)
-
-        # Per-tensor kernels use a single scale, for W13, but on disk there
-        # is a separate scale for W1 and W3. Requantize with the max scale.
-        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
-            w13, w13_scale = process_fp8_weight_tensor_strategy_moe(
-                w13,
-                w13_scale,
-                shard_size=layer.intermediate_size_per_partition,
-                num_experts=layer.local_num_experts,
-                is_act_and_mul=self.moe.is_act_and_mul,
-            )
-
-        w13, w2, w13_scale, w2_scale = convert_to_fp8_moe_kernel_format(
-            fp8_backend=self.fp8_backend,
-            layer=layer,
-            w13=w13,
-            w2=w2,
-            w13_scale=w13_scale,
-            w2_scale=w2_scale,
-            w13_input_scale=w13_input_scale,
-            w2_input_scale=w2_input_scale,
-        )
-
-        # Replace parameters with updated versions. Note that this helper
-        # function ensures the replacement is compatible with RL weight reloads.
-        replace_parameter(layer, "w13_weight", w13)
-        replace_parameter(layer, "w2_weight", w2)
-        replace_parameter(layer, "w13_weight_scale", w13_scale)
-        replace_parameter(layer, "w2_weight_scale", w2_scale)
-
-        # Setup modular kernel for TP case and naive DP/EP case.
-        # In non-naive DP/EP case, we will create a ModularKernelMethod.
-        # TODO(rob): unify these so FP8MoEMethod owns the ModularKernel
-        # in both cases.
-        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
-        if self.moe_quant_config:
-            assert self.experts_cls is not None
-            self.moe_kernel = make_fp8_moe_kernel(
-                moe_quant_config=self.moe_quant_config,
-                moe_config=self.moe,
-                fp8_backend=self.fp8_backend,
-                experts_cls=self.experts_cls,
-                routing_tables=layer._maybe_init_expert_routing_tables(),
-                shared_experts=layer.shared_experts,
-            )
-
-    def maybe_make_prepare_finalize(
-        self,
-        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
-        raise ValueError(
-            f"{self.__class__.__name__} uses the new modular kernel initialization "
-            "logic. This function should not be called."
-        )
-
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
-        is_per_token = self.input_quant.strategy == QuantizationStrategy.TOKEN
-        return make_fp8_moe_quant_config(
-            fp8_backend=self.fp8_backend,
-            w1_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-            a1_scale=layer.w13_input_scale,
-            a2_scale=layer.w2_input_scale,
-            per_act_token_quant=is_per_token,
-            per_out_ch_quant=is_per_token,
-            block_shape=self.weight_block_size,
-        )
-
-    def apply_monolithic(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.moe_kernel is not None
-        return self.moe_kernel.apply_monolithic(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            router_logits,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            num_expert_group=layer.num_expert_group,
-            topk_group=layer.topk_group,
-            e_score_correction_bias=layer.e_score_correction_bias,
-            routed_scaling_factor=layer.routed_scaling_factor,
-        )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert not self.is_monolithic
-        assert self.moe_kernel is not None
-        return self.moe_kernel.apply(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            topk_weights,
-            topk_ids,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            # TODO(rob): investigate the disable_expert_map introduced by:
-            # https://github.com/vllm-project/vllm/commit/84166fee9770e6fba71a96978b3e7d149392fb28 # noqa: E501
-            expert_map=layer.expert_map,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            shared_experts_input=shared_experts_input,
-        )
-
-    @property
-    def supports_eplb(self) -> bool:
-        return True
-
-
-class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-
-        per_channel = (
-            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
-            and self.input_quant.strategy == QuantizationStrategy.TOKEN
-        )
-        if not per_channel:
-            raise ValueError(
-                "For INT8 Fused MoE layers, we require channelwise, "
-                "dynamic per token quantization. Found "
-                f"{self.weight_quant}, {self.input_quant}"
-            )
-
-        self.static_input_scales = not self.input_quant.dynamic
-        if self.static_input_scales:
-            raise ValueError(
-                "For INT8 Fused MoE layers, we require channelwise, "
-                "dynamic per token quantization. Found static input scales."
-            )
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        params_dtype = torch.int8
-        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
-
-        # WEIGHTS
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                w13_num_shards * intermediate_size_per_partition,
-                hidden_size,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        # WEIGHT_SCALES
-        assert self.weight_quant.strategy == QuantizationStrategy.CHANNEL
-        w13_weight_scale = torch.nn.Parameter(
-            torch.ones(
-                num_experts,
-                w13_num_shards * intermediate_size_per_partition,
-                1,
-                dtype=torch.float32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_weight_scale)
-        w2_weight_scale = torch.nn.Parameter(
-            torch.ones(num_experts, hidden_size, 1, dtype=torch.float32),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_weight_scale)
-        # Add PER-CHANNEL quantization for FusedMoE.weight_loader.
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
-        )
-        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        # INPUT_SCALES
-        assert not self.static_input_scales
-        layer.w13_input_scale = None
-        layer.w2_input_scale = None
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        pass
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        return int8_w8a8_moe_quant_config(
-            w1_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-            a1_scale=layer.w13_input_scale,
-            a2_scale=layer.w2_input_scale,
-            per_act_token_quant=True,
-        )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        from vllm.model_executor.layers.fused_moe import fused_experts
-
-        return fused_experts(
-            hidden_states=x,
-            w1=layer.w13_weight,
-            w2=layer.w2_weight,
-            topk_weights=topk_weights,
-            topk_ids=topk_ids,
-            inplace=not self.moe.disable_inplace,
-            activation=layer.activation,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            quant_config=self.moe_quant_config,
-        )
-
-
-class CompressedTensorsWNA16MarlinMoEMethod(CompressedTensorsMoEMethod):
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs | None,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-        assert weight_quant.symmetric, (
-            "Only symmetric quantization is supported for MoE"
-        )
-        # Extract properties from weight_quant
-        self.num_bits = weight_quant.num_bits
-        self.packed_factor = 32 // weight_quant.num_bits
-        self.strategy = weight_quant.strategy
-        self.group_size = weight_quant.group_size
-        self.actorder = weight_quant.actorder
-
-        self.quant_type = WNA16_SUPPORTED_TYPES_MAP[self.num_bits]
-
-        self.marlin_input_dtype = get_marlin_input_dtype(layer_name)
-        self.use_flashinfer_mxint4_moe = (
-            is_flashinfer_mxint4_moe_available()
-            and self.group_size == 32
-            and weight_quant.num_bits == 4
-        )
-        self.kernel_backend = (
-            "Flashinfer" if self.use_flashinfer_mxint4_moe else "Marlin"
-        )
-        logger.info_once(
-            f"Using {self.kernel_backend} backend for WNA16 MoE "
-            f"(group_size={self.group_size}, num_bits={self.num_bits})",
-            scope="local",
-        )
-
-    def get_weight_shape(
-        self,
-        weight_name: str,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        num_groups_w2: int | None = None,
-        num_groups_w13: int | None = None,
-    ) -> tuple[int, int, int]:
-        """
-        Get the shape of the weight based on the weight name, number of experts
-        hidden size, intermediate size per partition, number of groups for w2,
-        and number of groups for w13. Pass in num_groups_w2 and num_groups_w13
-        for weight scales.
-        """
-        if weight_name == "w13_scale":
-            assert num_groups_w13 is not None, (
-                "num_groups_w13 must be provided for weight scales"
-            )
-        if weight_name == "w2_scale":
-            assert num_groups_w2 is not None, (
-                "num_groups_w2 must be provided for weight scales"
-            )
-        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
-        shape_map = {
-            "w13_weight": {
-                "Flashinfer": (
-                    num_experts,
-                    w13_num_shards * intermediate_size_per_partition,
-                    hidden_size // self.packed_factor,
-                ),
-                "Marlin": (
-                    num_experts,
-                    hidden_size // self.packed_factor,
-                    w13_num_shards * intermediate_size_per_partition,
-                ),
-            },
-            "w13_scale": {
-                "Flashinfer": (
-                    num_experts,
-                    w13_num_shards * intermediate_size_per_partition,
-                    num_groups_w13,
-                ),
-                "Marlin": (
-                    num_experts,
-                    num_groups_w13,
-                    w13_num_shards * intermediate_size_per_partition,
-                ),
-            },
-            "w2_weight": {
-                "Flashinfer": (
-                    num_experts,
-                    hidden_size,
-                    intermediate_size_per_partition // self.packed_factor,
-                ),
-                "Marlin": (
-                    num_experts,
-                    intermediate_size_per_partition // self.packed_factor,
-                    hidden_size,
-                ),
-            },
-            "w2_scale": {
-                "Flashinfer": (num_experts, hidden_size, num_groups_w2),
-                "Marlin": (num_experts, num_groups_w2, hidden_size),
-            },
-        }
-        return shape_map[weight_name][self.kernel_backend]
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        intermediate_size_full = extra_weight_attrs.pop("intermediate_size_full")
-
-        # Will transpose the loaded weight along the
-        # intermediate and hidden dim sizes. Will
-        # shard for TP along the transposed dims
-        is_transposed = self.kernel_backend != "Flashinfer"
-        extra_weight_attrs.update(
-            {"is_transposed": is_transposed, "quant_method": self.strategy}
-        )
-
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                *self.get_weight_shape(
-                    "w13_weight",
-                    num_experts,
-                    hidden_size,
-                    intermediate_size_per_partition,
-                ),
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_packed", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                *self.get_weight_shape(
-                    "w2_weight",
-                    num_experts,
-                    hidden_size,
-                    intermediate_size_per_partition,
-                ),
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_packed", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        # In the case where we have actorder/g_idx,
-        # we do not partition the w2 scales
-        load_full_w2 = self.actorder and self.group_size != -1
-        w2_scales_size = (
-            intermediate_size_full if load_full_w2 else intermediate_size_per_partition
-        )
-
-        self.is_k_full = (not self.actorder) or (
-            intermediate_size_per_partition == intermediate_size_full
-        )
-
-        if self.strategy == "channel":
-            num_groups_w2 = num_groups_w13 = 1
-            self.group_size = -1
-        else:
-            num_groups_w2 = w2_scales_size // self.group_size
-            num_groups_w13 = hidden_size // self.group_size
-
-        layer.num_groups_w13 = num_groups_w13
-        layer.num_groups_w2 = num_groups_w2
-
-        w13_scale = torch.nn.Parameter(
-            torch.ones(
-                *self.get_weight_shape(
-                    "w13_scale",
-                    num_experts,
-                    hidden_size,
-                    intermediate_size_per_partition,
-                    num_groups_w13=num_groups_w13,
-                ),
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_scale)
-        set_weight_attrs(w13_scale, extra_weight_attrs)
-
-        w2_scale = torch.nn.Parameter(
-            torch.ones(
-                *self.get_weight_shape(
-                    "w2_scale",
-                    num_experts,
-                    hidden_size,
-                    intermediate_size_per_partition,
-                    num_groups_w2=num_groups_w2,
-                ),
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_scale)
-        set_weight_attrs(w2_scale, extra_weight_attrs)
-        set_weight_attrs(w2_scale, {"load_full_w2": load_full_w2})
-
-        w2_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-        layer.register_parameter("w2_weight_shape", w2_weight_shape)
-        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
-        w13_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-
-        layer.register_parameter("w13_weight_shape", w13_weight_shape)
-        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
-
-        w13_g_idx = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_g_idx", w13_g_idx)
-        set_weight_attrs(w13_g_idx, extra_weight_attrs)
-
-        w2_g_idx = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                intermediate_size_per_partition,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_g_idx", w2_g_idx)
-        set_weight_attrs(w2_g_idx, extra_weight_attrs)
-
-        w13_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_g_idx_sort_indices", w13_g_idx_sort_indices)
-        set_weight_attrs(w13_g_idx_sort_indices, extra_weight_attrs)
-
-        w2_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                intermediate_size_per_partition,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_g_idx_sort_indices", w2_g_idx_sort_indices)
-        set_weight_attrs(w2_g_idx_sort_indices, extra_weight_attrs)
-
-        layer.a13_scale = None
-        layer.a2_scale = None
-        layer.marlin_state = GPTQMarlinState.REPACK
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        num_experts = layer.w13_weight_g_idx.shape[0]
-        device = layer.w13_weight_g_idx.device
-        if self.kernel_backend == "Flashinfer":
-            dict_weights_mxint4 = prepare_static_weights_for_trtllm_mxint4_moe(
-                layer.w13_weight_packed,
-                layer.w13_weight_scale,
-                layer.w2_weight_packed,
-                layer.w2_weight_scale,
-            )
-            replace_parameter(
-                layer, "w13_weight_packed", dict_weights_mxint4["gemm1_weights"]
-            )
-            replace_parameter(
-                layer, "w13_weight_scale", dict_weights_mxint4["gemm1_scales"]
-            )
-            replace_parameter(
-                layer, "w2_weight_packed", dict_weights_mxint4["gemm2_weights"]
-            )
-            replace_parameter(
-                layer, "w2_weight_scale", dict_weights_mxint4["gemm2_scales"]
-            )
-            return None
-
-        is_a_8bit = (
-            self.marlin_input_dtype is not None
-            and self.marlin_input_dtype.itemsize == 1
-        )
-
-        if self.marlin_input_dtype == torch.float8_e4m3fn:
-            # NOTE: for non-zp quantization format only
-            ops.marlin_int4_fp8_preprocess(layer.w13_weight_packed, inplace=True)
-            ops.marlin_int4_fp8_preprocess(layer.w2_weight_packed, inplace=True)
-            layer.w13_weight_scale.data = layer.w13_weight_scale.data * 512
-            layer.w2_weight_scale.data = layer.w2_weight_scale.data * 512
-
-        # when running models with grouped act order,
-        # resort to g_idx values provided in checkpoint
-        if self.actorder == "group":
-            w13_g_idx_sort_indices = torch.empty_like(layer.w13_weight_g_idx)
-            w2_g_idx_sort_indices = torch.empty_like(layer.w2_weight_g_idx)
-            w13_sorted_g_idx = torch.empty_like(layer.w13_weight_g_idx)
-            w2_sorted_g_idx = torch.empty_like(layer.w2_weight_g_idx)
-
-            for e in range(num_experts):
-                w13_g_idx_sort_indices[e] = torch.argsort(layer.w13_weight_g_idx[e]).to(
-                    torch.int32
-                )
-                w2_g_idx_sort_indices[e] = torch.argsort(layer.w2_weight_g_idx[e]).to(
-                    torch.int32
-                )
-                w13_sorted_g_idx[e] = layer.w13_weight_g_idx[e][
-                    w13_g_idx_sort_indices[e]
-                ]
-                w2_sorted_g_idx[e] = layer.w2_weight_g_idx[e][w2_g_idx_sort_indices[e]]
-
-            replace_parameter(layer, "w13_weight_g_idx", w13_sorted_g_idx)
-            replace_parameter(layer, "w2_weight_g_idx", w2_sorted_g_idx)
-            replace_parameter(layer, "w13_g_idx_sort_indices", w13_g_idx_sort_indices)
-            replace_parameter(layer, "w2_g_idx_sort_indices", w2_g_idx_sort_indices)
-
-        else:
-            layer.w13_weight_g_idx = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w2_weight_g_idx = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w13_g_idx_sort_indices = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-            layer.w2_g_idx_sort_indices = torch.nn.Parameter(
-                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
-                requires_grad=False,
-            )
-
-        marlin_w13_qweight = ops.gptq_marlin_moe_repack(
-            layer.w13_weight_packed,
-            layer.w13_g_idx_sort_indices,
-            layer.w13_weight_packed.shape[1] * self.packed_factor,
-            layer.w13_weight_packed.shape[2],
-            self.num_bits,
-            is_a_8bit=is_a_8bit,
-        )
-        replace_parameter(layer, "w13_weight_packed", marlin_w13_qweight)
-
-        marlin_w2_qweight = ops.gptq_marlin_moe_repack(
-            layer.w2_weight_packed,
-            layer.w2_g_idx_sort_indices,
-            layer.w2_weight_packed.shape[1] * self.packed_factor,
-            layer.w2_weight_packed.shape[2],
-            self.num_bits,
-            is_a_8bit=is_a_8bit,
-        )
-        replace_parameter(layer, "w2_weight_packed", marlin_w2_qweight)
-
-        # Repack scales
-        marlin_w13_scales = marlin_moe_permute_scales(
-            s=layer.w13_weight_scale,
-            size_k=layer.w13_weight_packed.shape[2],
-            size_n=layer.w13_weight_scale.shape[2],
-            group_size=self.group_size,
-            is_a_8bit=is_a_8bit,
-        )
-        if self.marlin_input_dtype == torch.int8 and layer.num_groups_w13 > 1:
-            marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w13_scales
-            )
-            layer.register_parameter(
-                "w13_input_global_scale",
-                torch.nn.Parameter(w13_input_global_scale, requires_grad=False),
-            )
-        replace_parameter(layer, "w13_weight_scale", marlin_w13_scales)
-
-        marlin_w2_scales = marlin_moe_permute_scales(
-            s=layer.w2_weight_scale,
-            size_k=layer.w2_weight_scale.shape[1]
-            * (self.group_size if self.group_size != -1 else self.packed_factor),
-            size_n=layer.w2_weight_scale.shape[2],
-            group_size=self.group_size,
-            is_a_8bit=is_a_8bit,
-        )
-        if self.marlin_input_dtype == torch.int8 and layer.num_groups_w2 > 1:
-            marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
-                marlin_w2_scales
-            )
-            layer.register_parameter(
-                "w2_input_global_scale",
-                torch.nn.Parameter(w2_input_global_scale, requires_grad=False),
-            )
-        replace_parameter(layer, "w2_weight_scale", marlin_w2_scales)
-
-        layer.workspace = marlin_make_workspace_new(device, 4)
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        if self.num_bits != 4:
-            return None
-        return int4_w4a16_moe_quant_config(
-            w1_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-            w1_zp=None,
-            w2_zp=None,
-            block_shape=[0, self.group_size],
-        )
-
-    def select_gemm_impl(
-        self,
-        prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
-        layer: torch.nn.Module,
-    ) -> mk.FusedMoEExpertsModular:
-        assert self.num_bits == 4, "only supporting w4"
-        layer.w13_weight = layer.w13_weight_packed
-        layer.w2_weight = layer.w2_weight_packed
-        assert all([w is not None for w in [layer.w13_weight, layer.w2_weight]])
-        assert self.moe_quant_config is not None
-        if (
-            prepare_finalize.activation_format
-            == mk.FusedMoEActivationFormat.BatchedExperts
-        ):
-            max_num_tokens_per_rank = prepare_finalize.max_num_tokens_per_rank()
-            assert max_num_tokens_per_rank is not None
-            return BatchedMarlinExperts(
-                max_num_tokens=max_num_tokens_per_rank,
-                num_dispatchers=prepare_finalize.num_dispatchers(),
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=layer.w13_weight_g_idx,
-                w2_g_idx=layer.w2_weight_g_idx,
-                w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-        else:
-            return MarlinExperts(
-                moe_config=self.moe,
-                quant_config=self.moe_quant_config,
-                w13_g_idx=layer.w13_weight_g_idx,
-                w2_g_idx=layer.w2_weight_g_idx,
-                w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
-                w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
-                is_k_full=self.is_k_full,
-            )
-
-    @property
-    def is_monolithic(self) -> bool:
-        return self.kernel_backend == "Flashinfer"
-
-    def apply_monolithic(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.kernel_backend == "Flashinfer"
-        return flashinfer_trtllm_mxint4_moe(
-            x=x,
-            router_logits=router_logits,
-            w13_weight_packed=layer.w13_weight_packed,
-            w13_weight_scale=layer.w13_weight_scale,
-            w2_weight_packed=layer.w2_weight_packed,
-            w2_weight_scale=layer.w2_weight_scale,
-            global_num_experts=layer.global_num_experts,
-            top_k=layer.top_k,
-            intermediate_size_per_partition=layer.intermediate_size_per_partition,
-            local_num_experts=layer.local_num_experts,
-            ep_rank=layer.ep_rank,
-            num_expert_group=layer.num_expert_group,
-            topk_group=layer.topk_group,
-            e_score_correction_bias=layer.e_score_correction_bias,
-            routing_method_type=layer.routing_method_type,
-        )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        assert self.kernel_backend == "Marlin"
-        return fused_marlin_moe(
-            x,
-            layer.w13_weight_packed,
-            layer.w2_weight_packed,
-            None,
-            None,
-            layer.w13_weight_scale,
-            layer.w2_weight_scale,
-            topk_weights,
-            topk_ids,
-            input_global_scale1=getattr(layer, "w13_input_global_scale", None),
-            input_global_scale2=getattr(layer, "w2_input_global_scale", None),
-            quant_type_id=self.quant_type.id,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            global_num_experts=layer.global_num_experts,
-            activation=layer.activation,
-            expert_map=layer.expert_map,
-            g_idx1=layer.w13_weight_g_idx,
-            g_idx2=layer.w2_weight_g_idx,
-            sort_indices1=layer.w13_g_idx_sort_indices,
-            sort_indices2=layer.w2_g_idx_sort_indices,
-            workspace=layer.workspace,
-            input_dtype=self.marlin_input_dtype,
-            is_k_full=self.is_k_full,
-            inplace=not self.moe.disable_inplace,
-        )
-
-
-class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs | None,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-        # Extract properties from weight_quant
-        self.num_bits = weight_quant.num_bits
-        self.packed_factor = 32 // weight_quant.num_bits
-        self.strategy = weight_quant.strategy
-        # channelwise is not supported by this kernel
-        assert weight_quant.strategy == "group"
-        self.group_size = weight_quant.group_size
-        # grouped actorder isn't supported by this kernel
-        assert weight_quant.actorder != "group"
-        assert weight_quant.symmetric, (
-            "Only symmetric quantization is supported for MoE"
-        )
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        # Will transpose the loaded weight along the
-        # intermediate and hidden dim sizes. Will
-        # shard for TP along the transposed dims
-        extra_weight_attrs.update(
-            {"is_transposed": True, "quant_method": self.strategy}
-        )
-        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size // self.packed_factor,
-                w13_num_shards * intermediate_size_per_partition,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_packed", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                intermediate_size_per_partition // self.packed_factor,
-                hidden_size,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_packed", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        w2_scales_size = intermediate_size_per_partition
-
-        if self.strategy == "channel":
-            num_groups_w2 = num_groups_w13 = 1
-            self.group_size = -1
-        else:
-            num_groups_w2 = w2_scales_size // self.group_size
-            num_groups_w13 = hidden_size // self.group_size
-
-        w13_scale = torch.nn.Parameter(
-            torch.ones(
-                num_experts,
-                num_groups_w13,
-                w13_num_shards * intermediate_size_per_partition,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_scale)
-        set_weight_attrs(w13_scale, extra_weight_attrs)
-
-        w2_scale = torch.nn.Parameter(
-            torch.ones(num_experts, num_groups_w2, hidden_size, dtype=params_dtype),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_scale)
-        set_weight_attrs(w2_scale, extra_weight_attrs)
-        set_weight_attrs(w2_scale, {"load_full_w2": False})
-
-        w2_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-        layer.register_parameter("w2_weight_shape", w2_weight_shape)
-        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
-        w13_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-
-        layer.register_parameter("w13_weight_shape", w13_weight_shape)
-        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
-
-        w13_g_idx = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_g_idx", w13_g_idx)
-        set_weight_attrs(w13_g_idx, extra_weight_attrs)
-
-        w2_g_idx = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                intermediate_size_per_partition,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_g_idx", w2_g_idx)
-        set_weight_attrs(w2_g_idx, extra_weight_attrs)
-
-        w13_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_g_idx_sort_indices", w13_g_idx_sort_indices)
-        set_weight_attrs(w13_g_idx_sort_indices, extra_weight_attrs)
-
-        w2_g_idx_sort_indices = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                intermediate_size_per_partition,
-                dtype=torch.int32,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_g_idx_sort_indices", w2_g_idx_sort_indices)
-        set_weight_attrs(w2_g_idx_sort_indices, extra_weight_attrs)
-
-        layer.a13_scale = None
-        layer.a2_scale = None
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        # Reconfigure packed weights and scales to match moe_wna16 format
-        layer.w13_weight_packed = torch.nn.Parameter(
-            layer.w13_weight_packed.transpose(1, 2).contiguous().view(torch.uint8),
-            requires_grad=False,
-        )
-        layer.w2_weight_packed = torch.nn.Parameter(
-            layer.w2_weight_packed.transpose(1, 2).contiguous().view(torch.uint8),
-            requires_grad=False,
-        )
-        layer.w13_weight_scale = torch.nn.Parameter(
-            layer.w13_weight_scale.transpose(1, 2).contiguous(), requires_grad=False
-        )
-        layer.w2_weight_scale = torch.nn.Parameter(
-            layer.w2_weight_scale.transpose(1, 2).contiguous(), requires_grad=False
-        )
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        assert self.num_bits == 4 or self.num_bits == 8
-        config_builder = (
-            int4_w4a16_moe_quant_config
-            if self.num_bits == 4
-            else int8_w8a16_moe_quant_config
-        )
-
-        return config_builder(
-            w1_scale=layer.w13_weight_scale,
-            w2_scale=layer.w2_weight_scale,
-            w1_zp=None,
-            w2_zp=None,
-            block_shape=[0, self.group_size],
-        )
-
-    def select_gemm_impl(
-        self,
-        prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
-        layer: torch.nn.Module,
-    ) -> mk.FusedMoEExpertsModular:
-        if self.moe.is_lora_enabled:
-            assert self.moe_quant_config is not None
-            from vllm.triton_utils import HAS_TRITON
-
-            if HAS_TRITON:
-                from vllm.model_executor.layers.fused_moe import TritonWNA16Experts
-
-                layer.w13_weight = layer.w13_weight_packed
-                layer.w2_weight = layer.w2_weight_packed
-                return TritonWNA16Experts(
-                    moe_config=self.moe, quant_config=self.moe_quant_config
-                )
-            else:
-                raise NotImplementedError(
-                    "TritonExperts requires Triton. "
-                    "Install triton or disable LoRA for MoE."
-                )
-
-        raise NotImplementedError
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        from vllm.model_executor.layers.fused_moe import fused_experts
-
-        return fused_experts(
-            x,
-            layer.w13_weight_packed,
-            layer.w2_weight_packed,
-            topk_weights=topk_weights,
-            topk_ids=topk_ids,
-            inplace=not self.moe.disable_inplace,
-            activation=layer.activation,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            quant_config=self.moe_quant_config,
-        )
-
-    @property
-    def supports_eplb(self) -> bool:
-        return True
-
-
-class CompressedTensorsW4A8Int8MoEMethod(CompressedTensorsMoEMethod):
-    """
-    CPU-only MoE method using dynamic 4-bit matmul kernels on Arm Platform
-    - Weights: int4 (stored as int8 values in [-8,7], packed to uint8 nibbles)
-    - Scales: Fp32 for Channelwise , bf16 for groupwise quantization
-    - Bias: Same data type as original weights
-    - Activations: FP32/Bf16 dynamic per-token (A8 Int),
-      quantized inside the kernel
-    """
-
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.has_bias = self.moe.has_bias
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-
-        # Validate scheme: weights=W4 (channel or group),
-        # activations=dynamic TOKEN (A8)
-
-        # Must be dynamic per-token activations
-        if (
-            input_quant.strategy != QuantizationStrategy.TOKEN
-            or not input_quant.dynamic
-        ):
-            raise ValueError(
-                "W4A8-int MoE needs dynamic per-token activation quantization."
-            )
-
-        # Weight can be channel-wise (group_size=None) or group-wise
-        self.group_size = (
-            weight_quant.group_size if (weight_quant.group_size is not None) else -1
-        )
-        if weight_quant.num_bits != 4:
-            raise ValueError("This method only supports 4-bit weights (num_bits=4).")
-
-        # CPU only
-        if not current_platform.is_cpu():
-            raise ValueError("CompressedTensorsW4A8Int8MoEMethod is CPU-only.")
-
-        # Arm: check _dyn ops availability
-        if current_platform.get_cpu_architecture() == CpuArchEnum.ARM:
-            try:
-                _ = torch.ops.aten._dyn_quant_matmul_4bit
-                _ = torch.ops.aten._dyn_quant_pack_4bit_weight
-            except AttributeError as err:
-                raise RuntimeError(
-                    f"""PyTorch {torch.__version__} lacks _dyn_quant_* 4bit ops;
-                    install a newer build."""
-                ) from err
-        self.static_input_scales = False  # always dynamic per token
-
-    # ---- parameter creation ----
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        # Shapes per local rank (TP/EP):
-        #   w13: [E, 2*I_local, H]  int8  (int4 values in [-8,7])
-        #   w2 : [E, H, I_local]    int8
-        # Scales:
-        #   channel-wise: group_size=-1 -> per-output-row, single scale per row
-        #   group-wise  : group_size=g   ->
-        #   per-output-row, (in_features/g) scales
-
-        E = num_experts
-        H = hidden_size
-        IN = intermediate_size_per_partition
-        g = self.group_size
-
-        # Per-row scale columns
-        def _n_scale_cols(in_features: int) -> int:
-            return 1 if g == -1 else (in_features // g)
-
-        # Register unpacked int4-as-int8 weights the loader will fill.
-        w13 = torch.nn.Parameter(
-            torch.empty(E, 2 * IN, H, dtype=torch.int8), requires_grad=False
-        )
-        set_weight_attrs(w13, extra_weight_attrs)
-        layer.register_parameter("w13_weight", w13)
-
-        w2 = torch.nn.Parameter(
-            torch.empty(E, H, IN, dtype=torch.int8), requires_grad=False
-        )
-        set_weight_attrs(w2, extra_weight_attrs)
-        layer.register_parameter("w2_weight", w2)
-
-        # Register scales
-        # KleidiAI groupwise kernels accepts float32 scales
-        # KleidiAI groupwise kernels accepts bfloat16 scales
-        scale_dtype = torch.float32 if g == -1 else torch.bfloat16
-
-        w13_s = torch.nn.Parameter(
-            torch.ones(E, 2 * IN, _n_scale_cols(H), dtype=scale_dtype),
-            requires_grad=False,
-        )
-        set_weight_attrs(
-            w13_s,
-            {"quant_method": "channel" if g == -1 else "group", **extra_weight_attrs},
-        )
-        layer.register_parameter("w13_weight_scale", w13_s)
-
-        w2_s = torch.nn.Parameter(
-            torch.ones(E, H, _n_scale_cols(IN), dtype=scale_dtype), requires_grad=False
-        )
-        set_weight_attrs(
-            w2_s,
-            {"quant_method": "channel" if g == -1 else "group", **extra_weight_attrs},
-        )
-        layer.register_parameter("w2_weight_scale", w2_s)
-
-        if self.has_bias:
-            w13_bias = torch.nn.Parameter(
-                torch.zeros(E, 2 * IN, dtype=params_dtype), requires_grad=False
-            )
-            layer.register_parameter("w13_bias", w13_bias)
-            set_weight_attrs(w13_bias, extra_weight_attrs)
-
-            w2_bias = torch.nn.Parameter(
-                torch.zeros(num_experts, hidden_size, dtype=params_dtype),
-                requires_grad=False,
-            )
-            layer.register_parameter("w2_bias", w2_bias)
-            set_weight_attrs(w2_bias, extra_weight_attrs)
-
-        # Placeholders for packed weights (will be replaced after packing)
-        layer.register_parameter(
-            "w13_weight_packed", torch.nn.Parameter(torch.empty(0), requires_grad=False)
-        )
-        set_weight_attrs(layer.w13_weight_packed, extra_weight_attrs)
-
-        layer.register_parameter(
-            "w2_weight_packed", torch.nn.Parameter(torch.empty(0), requires_grad=False)
-        )
-        set_weight_attrs(layer.w2_weight_packed, extra_weight_attrs)
-
-        # dims for 4 bit fused matmuls
-        layer.w13_in_features = H
-        layer.w13_out_features = 2 * IN
-        layer.w2_in_features = IN
-        layer.w2_out_features = H
-        layer.group_size = g
-
-    # post-load packing to dyn-4bit KleidiAI kernel's format
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        E = layer.w13_weight.shape[0]
-        H = layer.w13_in_features
-        I2 = layer.w13_out_features
-        IN = layer.w2_in_features
-        g = layer.group_size
-
-        def _pack_matrix(
-            int4_as_int8_2d: torch.Tensor,
-            scales_2d: torch.Tensor,
-            bias_1d: torch.Tensor | None,
-            in_features: int,
-            out_features: int,
-        ) -> torch.Tensor:
-            # int4 values are stored as int8 in [-8,7].
-            # Shift to unsigned nibble and pack pairs along input-dim.
-            tmp = int4_as_int8_2d.add(8)  # [out, in]
-            uint8_nibbles = ((tmp[:, 1::2] << 4) | tmp[:, ::2]).to(
-                torch.uint8
-            )  # [out, in//2]
-
-            # KleidiAI groupwise kernels accepts float32 scales
-            # KleidiAI groupwise kernels accepts bfloat16 scales
-            scale_dtype = torch.float32 if g == -1 else torch.bfloat16
-            scales = scales_2d.to(scale_dtype)
-            bias = None if bias_1d is None else bias_1d.to(torch.float32)
-            return torch.ops.aten._dyn_quant_pack_4bit_weight(
-                uint8_nibbles,
-                scales,
-                bias,
-                g if g != -1 else in_features,
-                in_features,
-                out_features,
-            )
-
-        # Pack per expert
-        w13_packed_list = []
-        w2_packed_list = []
-
-        has_w13_bias = hasattr(layer, "w13_bias") and layer.w13_bias is not None
-        has_w2_bias = hasattr(layer, "w2_bias") and layer.w2_bias is not None
-
-        for e in range(E):
-            w13_packed_list.append(
-                _pack_matrix(
-                    layer.w13_weight[e],  # [2I, H]
-                    layer.w13_weight_scale[e],  # [2I, H/g or 1]
-                    layer.w13_bias[e] if has_w13_bias else None,  # [2I]
-                    H,
-                    I2,
-                )
-            )
-            w2_packed_list.append(
-                _pack_matrix(
-                    # w2 shape is [H, IN]; we need [out, in] == [H, IN].
-                    layer.w2_weight[e],  # [H, IN]
-                    layer.w2_weight_scale[e],  # [H, IN/g or 1]
-                    layer.w2_bias[e] if has_w2_bias else None,  # [H]
-                    IN,
-                    layer.w2_out_features,  # in_features=IN, out_features=H
-                )
-            )
-
-        # each packed tensor has identical shape per expert; stack on dim 0
-        w13_packed = torch.stack(w13_packed_list, dim=0)
-        w2_packed = torch.stack(w2_packed_list, dim=0)
-
-        replace_parameter(
-            layer,
-            "w13_weight_packed",
-            torch.nn.Parameter(w13_packed, requires_grad=False),
-        )
-        replace_parameter(
-            layer,
-            "w2_weight_packed",
-            torch.nn.Parameter(w2_packed, requires_grad=False),
-        )
-
-        # free raw tensors/scales/bias now that they're packed into the payload.
-        replace_parameter(
-            layer, "w13_weight", torch.nn.Parameter(torch.empty(0), requires_grad=False)
-        )
-        replace_parameter(
-            layer, "w2_weight", torch.nn.Parameter(torch.empty(0), requires_grad=False)
-        )
-        replace_parameter(
-            layer,
-            "w13_weight_scale",
-            torch.nn.Parameter(torch.empty(0), requires_grad=False),
-        )
-        replace_parameter(
-            layer,
-            "w2_weight_scale",
-            torch.nn.Parameter(torch.empty(0), requires_grad=False),
-        )
-        if has_w13_bias:
-            replace_parameter(
-                layer,
-                "w13_bias",
-                torch.nn.Parameter(torch.empty(0), requires_grad=False),
-            )
-        if has_w2_bias:
-            replace_parameter(
-                layer,
-                "w2_bias",
-                torch.nn.Parameter(torch.empty(0), requires_grad=False),
-            )
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        # CPU dynamic 4-bit MoE path does not use modular kernels or
-        # fused_experts; quant config is not needed.
-        return None
-
-    @property
-    def is_monolithic(self) -> bool:
-        return True
-
-    def apply_monolithic(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        router_logits: torch.Tensor,
-    ) -> torch.Tensor:
-        assert not layer.enable_eplb, "EPLB not supported for W4A8-int MoE yet."
-        assert layer.activation in (
-            MoEActivation.SILU,
-            MoEActivation.SWIGLUOAI,
-            MoEActivation.SWIGLUSTEP,
-        ), "Only SiLU/SwiGLUGU/SwiGLUUG are supported."
-        assert layer.expert_map is None, """expert_map/EP not implemented
-        for CPU dyn-4bit MoE."""
-
-        def _act_kind(s: MoEActivation) -> int:
-            # 0 = SwiGLU_Gu (SiLU(g)*u), 1 = SwiGLU_Ug (SiLU(u)*g), 2 = SiLU
-            if s == MoEActivation.SWIGLUSTEP:
-                return 0
-            if s == MoEActivation.SWIGLUOAI:
-                return 1
-            if s == MoEActivation.SILU:
-                return 2
-            raise ValueError(f"Unknown activation '{s}'")
-
-        # Apply topk softmax on router output
-        topk_weights, topk_ids = select_experts(
-            hidden_states=x,
-            router_logits=router_logits,
-            top_k=layer.top_k,
-            use_grouped_topk=layer.use_grouped_topk,
-            renormalize=layer.renormalize,
-        )
-
-        return torch.ops._C.dynamic_4bit_int_moe(
-            x,
-            topk_ids.to(torch.long),
-            topk_weights,
-            layer.w13_weight_packed,
-            layer.w2_weight_packed,
-            layer.w2_out_features,
-            layer.w2_in_features,
-            layer.w13_out_features,
-            layer.group_size,
-            layer.apply_router_weight_on_input,
-            int(_act_kind(layer.activation)),
-        )
-
-
-class CompressedTensorsW4A8Fp8MoEMethod(CompressedTensorsMoEMethod):
-    def __init__(
-        self,
-        weight_quant: QuantizationArgs,
-        input_quant: QuantizationArgs,
-        moe: FusedMoEConfig,
-        layer_name: str | None = None,
-    ):
-        super().__init__(moe)
-        self.weight_quant = weight_quant
-        self.input_quant = input_quant
-
-        self.group_size = self.weight_quant.group_size
-        self.num_bits = self.weight_quant.num_bits
-        self.packed_factor = 32 // self.num_bits
-
-        assert self.weight_quant.symmetric, (
-            "Only symmetric quantization is supported for W4A8 MoE"
-        )
-        assert self.weight_quant.actorder != "group"
-        assert self.group_size == 128, "Only group size 128 supported for W4A8 MoE"
-
-        self.disable_expert_map = False
-        self.layer_name = layer_name
-
-        from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
-        from vllm.model_executor.layers.quantization.utils.quant_utils import (
-            GroupShape,
-        )
-
-        self.quant_fp8 = QuantFP8(static=False, group_shape=GroupShape.PER_TOKEN)
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        layer.num_experts = num_experts
-        layer.orig_dtype = params_dtype
-        layer.weight_block_size = None
-
-        # requirement for CUTLASS reorder_tensor
-        assert hidden_size % 256 == 0, f"{hidden_size=} must be divisible by 256"
-        assert intermediate_size_per_partition % 256 == 0, (
-            f"{intermediate_size_per_partition=} must be divisible by 256"
-        )
-        # storage type, pack 8xint4 into int32
-        params_dtype = torch.int32
-
-        # WEIGHTS
-        w13_weight_packed = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                hidden_size // self.packed_factor,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_packed", w13_weight_packed)
-        set_weight_attrs(w13_weight_packed, extra_weight_attrs)
-
-        w2_weight_packed = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition // self.packed_factor,
-                dtype=params_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_packed", w2_weight_packed)
-        set_weight_attrs(w2_weight_packed, extra_weight_attrs)
-
-        # SCALES
-        # weight_scale refers to the group-wise scales
-        # they are initially loaded as bf16, we will convert to fp8
-        # after loading
-        w13_weight_scale = torch.nn.Parameter(
-            torch.ones(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                hidden_size // self.group_size,
-                dtype=layer.orig_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_weight_scale)
-
-        w2_weight_scale = torch.nn.Parameter(
-            torch.ones(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition // self.group_size,
-                dtype=layer.orig_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight_scale", w2_weight_scale)
-        # Add PER-GROUP quantization for FusedMoE.weight_loader.
-        extra_weight_attrs.update(
-            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
-        )
-        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-
-        # weight shapes
-        w2_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-        layer.register_parameter("w2_weight_shape", w2_weight_shape)
-        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
-        w13_weight_shape = torch.nn.Parameter(
-            torch.empty(num_experts, 2), requires_grad=False
-        )
-        layer.register_parameter("w13_weight_shape", w13_weight_shape)
-        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
-
-        # don't use input scales
-        layer.w13_input_scale = None
-        layer.w2_input_scale = None
-
-    def process_weights_after_loading(self, layer):
-        device = layer.w13_weight_packed.device
-
-        # STRIDES
-        # A, C
-        self.a_strides1_c_strides2 = torch.full(
-            (layer.local_num_experts,),
-            layer.hidden_size,
-            device=device,
-            dtype=torch.int64,
-        )
-        self.a_strides2 = torch.full(
-            (layer.local_num_experts,),
-            layer.intermediate_size_per_partition,
-            device=device,
-            dtype=torch.int64,
-        )
-        self.c_strides1 = torch.full(
-            (layer.local_num_experts,),
-            2 * layer.intermediate_size_per_partition,
-            device=device,
-            dtype=torch.int64,
-        )
-
-        # S (group-wise scales)
-        # sizeof(StrideS) = 16 bytes, so we need to use 2xint64 to encode it
-        self.s_strides1 = torch.zeros(
-            (layer.local_num_experts, 2), device=device, dtype=torch.int64
-        )
-        self.s_strides1[:, 0] = 2 * layer.intermediate_size_per_partition
-
-        self.s_strides2 = torch.zeros(
-            (layer.local_num_experts, 2), device=device, dtype=torch.int64
-        )
-        self.s_strides2[:, 0] = layer.hidden_size
-
-        # encode and reorder weight tensors, and get the layout to pass to
-        # the grouped gemm kernel. `b_strides1/2` specifies the entire layout
-        convert_packed_uint4b8_to_signed_int4_inplace(layer.w13_weight_packed)
-        w13_weight_shuffled, self.b_strides1 = (
-            ops.cutlass_encode_and_reorder_int4b_grouped(layer.w13_weight_packed)
-        )
-        replace_parameter(layer, "w13_weight_packed", w13_weight_shuffled)
-        convert_packed_uint4b8_to_signed_int4_inplace(layer.w2_weight_packed)
-        w2_weight_shuffled, self.b_strides2 = (
-            ops.cutlass_encode_and_reorder_int4b_grouped(layer.w2_weight_packed)
-        )
-        replace_parameter(layer, "w2_weight_packed", w2_weight_shuffled)
-
-        # convert bf16 scales to (fp8_scales, channel_scales)
-        w13_weight_scale, w13_weight_chan_scale = convert_bf16_scales_to_fp8(
-            self.quant_fp8, layer.w13_weight_scale
-        )
-        w2_weight_scale, w2_weight_chan_scale = convert_bf16_scales_to_fp8(
-            self.quant_fp8, layer.w2_weight_scale
-        )
-
-        # register channel scales
-        layer.register_parameter(
-            "w13_weight_chan_scale",
-            torch.nn.Parameter(w13_weight_chan_scale, requires_grad=False),
-        )
-        layer.register_parameter(
-            "w2_weight_chan_scale",
-            torch.nn.Parameter(w2_weight_chan_scale, requires_grad=False),
-        )
-
-        # The scales are stored as (E, N, K // 128) but the kernel expects
-        # (E, K // 128, N) in row-major format, so we need to permute the last 2 dims
-        # and make it contiguous
-        w13_weight_scale_packed = ops.cutlass_pack_scale_fp8(
-            w13_weight_scale.permute(0, 2, 1).contiguous()
-        )
-        replace_parameter(layer, "w13_weight_scale", w13_weight_scale_packed)
-        w2_weight_scale_packed = ops.cutlass_pack_scale_fp8(
-            w2_weight_scale.permute(0, 2, 1).contiguous()
-        )
-        replace_parameter(layer, "w2_weight_scale", w2_weight_scale_packed)
-
-    def maybe_make_prepare_finalize(
-        self,
-        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
-    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
-        return super().maybe_make_prepare_finalize(routing_tables)
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        # Store quantization scales; both per-group and per-channel
-        # Note we haven't specified the group size here because
-        # the quant config logic assumes group-wise scaling
-        # and channel-wise scaling are exclusive.
-        return int4_w4afp8_moe_quant_config(
-            w1_scale=layer.w13_weight_scale,  # group scale
-            w2_scale=layer.w2_weight_scale,  # group scale
-            g1_alphas=layer.w13_weight_chan_scale,
-            g2_alphas=layer.w2_weight_chan_scale,
-            per_act_token_quant=True,  # always use dynamic per-token
-            per_out_ch_quant=True,  # always use per-channel
-        )
-
-    def select_gemm_impl(
-        self,
-        prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
-        layer: torch.nn.Module,
-    ) -> mk.FusedMoEExpertsModular:
-        assert self.moe_quant_config is not None
-        assert (
-            prepare_finalize.activation_format == FusedMoEActivationFormat.Standard
-        ), "BatchedExperts not supported"
-
-        from vllm.model_executor.layers.fused_moe import CutlassExpertsW4A8Fp8
-
-        experts: FusedMoEExpertsModular
-
-        logger.debug("CutlassExpertsW4A8Fp8(%s)", self.__class__.__name__)
-        experts = CutlassExpertsW4A8Fp8(
-            out_dtype=self.moe.in_dtype,
-            a_strides1=self.a_strides1_c_strides2,
-            a_strides2=self.a_strides2,
-            b_strides1=self.b_strides1,
-            b_strides2=self.b_strides2,
-            c_strides1=self.c_strides1,
-            c_strides2=self.a_strides1_c_strides2,
-            s_strides1=self.s_strides1,
-            s_strides2=self.s_strides2,
-            moe_config=self.moe,
-            quant_config=self.moe_quant_config,
-            group_size=self.group_size,
-        )
-
-        num_dispatchers = prepare_finalize.num_dispatchers()
-        self.disable_expert_map = (
-            num_dispatchers > 1 or not experts.supports_expert_map()
-        )
-
-        return experts
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if layer.enable_eplb:
-            raise NotImplementedError(
-                "EPLB not supported for `CompressedTensorsW4A8Fp8MoEMethod` yet."
-            )
-        assert self.moe_quant_config is not None
-
-        from vllm.model_executor.layers.fused_moe.cutlass_moe import (
-            cutlass_moe_w4a8_fp8,
-        )
-
-        return cutlass_moe_w4a8_fp8(
-            x,
-            layer.w13_weight_packed,
-            layer.w2_weight_packed,
-            topk_weights,
-            topk_ids,
-            moe_config=self.moe,
-            quant_config=self.moe_quant_config,
-            activation=layer.activation,
-            global_num_experts=layer.global_num_experts,
-            expert_map=None if self.disable_expert_map else layer.expert_map,
-            a_strides1=self.a_strides1_c_strides2,
-            a_strides2=self.a_strides2,
-            b_strides1=self.b_strides1,
-            b_strides2=self.b_strides2,
-            c_strides1=self.c_strides1,
-            c_strides2=self.a_strides1_c_strides2,
-            s_strides1=self.s_strides1,
-            s_strides2=self.s_strides2,
-            group_size=self.group_size,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-        )
-
-    @property
-    def supports_eplb(self) -> bool:
-        return False
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/__init__.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/__init__.py
new file mode 100644
index 000000000000..39c9113f65f6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/__init__.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.compressed_tensors_moe import (  # noqa: E501
+    CompressedTensorsMoEMethod,
+)
+
+__all__ = [
+    "CompressedTensorsMoEMethod",
+]
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe.py
new file mode 100644
index 000000000000..a2e941621927
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe.py
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors import CompressionFormat
+from compressed_tensors.quantization import (
+    ActivationOrdering,
+    QuantizationStrategy,
+)
+
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoEMethodBase,
+    UnquantizedFusedMoEMethod,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import (  # noqa
+    WNA16_SUPPORTED_BITS,
+)
+from vllm.model_executor.layers.quantization.utils.marlin_utils import (
+    check_moe_marlin_supports_layer,
+)
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsMoEMethod(FusedMoEMethodBase):
+    @staticmethod
+    def get_moe_method(
+        quant_config: "CompressedTensorsConfig",  # type: ignore # noqa E501
+        layer: torch.nn.Module,
+        layer_name: str,
+    ) -> FusedMoEMethodBase:
+        # RoutedExperts was made by combining multiple Linears so need to
+        # make sure quantization config for Linear can target it
+        quant_config._add_fused_moe_to_target_scheme_map()
+        unfused_names = [
+            layer_name + proj_name
+            for proj_name in [".0.gate_proj", ".0.up_proj", ".0.down_proj"]
+        ]
+        # TODO: refactor this to use expert_mapping and check all layer numbers
+        all_scheme_dicts = [
+            quant_config.get_scheme_dict(layer, name) for name in unfused_names
+        ]
+        scheme_dict = all_scheme_dicts.pop()
+
+        # multiple schemes found
+        if not all([cur_dict == scheme_dict for cur_dict in all_scheme_dicts]):
+            raise ValueError(
+                "All MoE projections need to have same "
+                "quantization scheme but found multiple"
+            )
+
+        if scheme_dict is None:  # ignored layer
+            return UnquantizedFusedMoEMethod(layer.moe_config)
+
+        # TODO: @dsikka: refactor this to use schemes as other kernels
+        # are supported + check if the layer is being ignored.
+        weight_quant = scheme_dict.get("weights")
+        input_quant = scheme_dict.get("input_activations")
+        format = scheme_dict.get("format")
+
+        if quant_config._is_mxfp4(weight_quant):
+            from .compressed_tensors_moe_w4a4_mxfp4 import (
+                CompressedTensorsW4A4Mxfp4MoEMethod,
+            )
+
+            return CompressedTensorsW4A4Mxfp4MoEMethod(layer.moe_config)
+
+        if quant_config._is_mxfp8(weight_quant):
+            from .compressed_tensors_moe_w8a8_mxfp8 import (
+                CompressedTensorsW8A8Mxfp8MoEMethod,
+            )
+
+            return CompressedTensorsW8A8Mxfp8MoEMethod(layer.moe_config)
+
+        if quant_config._is_wNa16_group_channel(weight_quant, input_quant):
+            # group_size=None means channelwise
+            group_size = weight_quant.group_size or -1
+
+            valid_format_and_bits = (
+                weight_quant.num_bits in WNA16_SUPPORTED_BITS
+                and format == CompressionFormat.pack_quantized.value
+            )
+
+            if not valid_format_and_bits:
+                raise ValueError(
+                    "For Fused MoE layers, only format: ",
+                    f"{CompressionFormat.pack_quantized.value} ",
+                    f" and bits: {WNA16_SUPPORTED_BITS} is supported ",
+                    f"but got format: {CompressionFormat.pack_quantized.value} "
+                    f" and bits: {weight_quant.num_bits}",
+                )
+
+            # Prefer to use the MarlinMoE kernel when it is supported.
+            if (
+                not check_moe_marlin_supports_layer(layer, group_size)
+                or current_platform.is_rocm()
+            ):
+                from .compressed_tensors_moe_wna16 import (
+                    CompressedTensorsWNA16MoEMethod,
+                )
+
+                if (
+                    weight_quant.strategy == QuantizationStrategy.GROUP
+                    and weight_quant.actorder
+                    in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC)
+                ):
+                    raise ValueError(
+                        "WNA16MoE is not supported with actorder=group/dynamic."
+                    )
+                logger.info_once("Using CompressedTensorsWNA16MoEMethod")
+                return CompressedTensorsWNA16MoEMethod(
+                    weight_quant, input_quant, layer.moe_config
+                )
+            else:
+                from .compressed_tensors_moe_wna16_marlin import (
+                    CompressedTensorsWNA16MarlinMoEMethod,
+                )
+
+                logger.info_once("Using CompressedTensorsWNA16MarlinMoEMethod")
+                return CompressedTensorsWNA16MarlinMoEMethod(
+                    weight_quant, input_quant, layer.moe_config
+                )
+        elif quant_config._is_nvfp4_format(weight_quant):
+            from .compressed_tensors_moe_w4a4_nvfp4 import (
+                CompressedTensorsW4A4Nvfp4MoEMethod,
+            )
+
+            _is_valid_nvfp4_activations = (
+                quant_config._is_nvfp4_format(input_quant) or input_quant is None
+            )
+            if not _is_valid_nvfp4_activations:
+                raise ValueError(
+                    "For NVFP4 weights, input quantization must also be NVFP4 format ",
+                    f"or None for NVFP4A16, found {input_quant}",
+                )
+            return CompressedTensorsW4A4Nvfp4MoEMethod(
+                layer.moe_config, layer_name, use_a16=(input_quant is None)
+            )
+        elif (
+            quant_config._is_fp8_w8a8_sm90(weight_quant, input_quant)
+            or quant_config._is_fp8_w8a8_sm100(weight_quant, input_quant)
+            or quant_config._is_fp8_w8a8(weight_quant, input_quant)
+        ):
+            from .compressed_tensors_moe_w8a8_fp8 import (
+                CompressedTensorsW8A8Fp8MoEMethod,
+            )
+
+            return CompressedTensorsW8A8Fp8MoEMethod(
+                weight_quant, input_quant, layer.moe_config
+            )
+        elif quant_config._is_dynamic_token_w8a8(weight_quant, input_quant):
+            from .compressed_tensors_moe_w8a8_int8 import (
+                CompressedTensorsW8A8Int8MoEMethod,
+            )
+
+            return CompressedTensorsW8A8Int8MoEMethod(
+                weight_quant, input_quant, layer.moe_config
+            )
+        elif quant_config._is_fp8_w4a8_sm90(weight_quant, input_quant):
+            from .compressed_tensors_moe_w4a8_fp8 import (
+                CompressedTensorsW4A8Fp8MoEMethod,
+            )
+
+            logger.info_once("Using CompressedTensorsW4A8Fp8MoEMethod")
+            return CompressedTensorsW4A8Fp8MoEMethod(
+                weight_quant, input_quant, layer.moe_config
+            )
+        elif quant_config._is_dynamic_token_w4a8_int(weight_quant, input_quant):
+            from .compressed_tensors_moe_w4a8_int8 import (
+                CompressedTensorsW4A8Int8MoEMethod,
+            )
+
+            return CompressedTensorsW4A8Int8MoEMethod(
+                weight_quant, input_quant, layer.moe_config
+            )
+        else:
+            raise RuntimeError(
+                f"Unsupported FusedMoe scheme: {weight_quant}, {input_quant}"
+            )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py
new file mode 100644
index 000000000000..af42222e0c53
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py
@@ -0,0 +1,223 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEQuantConfig,
+    mxfp4_moe_quant_config,
+)
+from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+    CutlassExpertsMxfp4,
+)
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
+    MarlinExperts,
+)
+from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
+    Mxfp4MoeBackend,
+    make_mxfp4_moe_kernel,
+    make_mxfp4_moe_quant_config,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
+    prepare_moe_fp4_layer_for_marlin,
+)
+from vllm.model_executor.utils import set_weight_attrs
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsW4A4Mxfp4MoEMethod(CompressedTensorsMoEMethod):
+    def __init__(self, moe):
+        super().__init__(moe)
+        self.group_size = 32
+        self.mxfp4_backend = Mxfp4MoeBackend.MARLIN
+        # use cutlass if supported, otherwise fallback to marlin for weight-only FP4
+        self.use_cutlass_mxfp4 = CutlassExpertsMxfp4._supports_current_device()
+        self.experts_cls: type[mk.FusedMoEExperts]
+        if self.use_cutlass_mxfp4:
+            logger.info_once("Using CutlassExpertsMxfp4 for MXFP4 MoE")
+            self.experts_cls = CutlassExpertsMxfp4
+        else:
+            logger.info_once("Using MarlinExperts for MXFP4 MoE")
+            self.experts_cls = MarlinExperts
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.params_dtype = params_dtype
+
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // 2,
+                requires_grad=False,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        w13_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // self.group_size,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // self.group_size,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        if self.use_cutlass_mxfp4:
+            # W4A4: both weights and activations quantized to MXFP4
+            return mxfp4_moe_quant_config(
+                w1_scale=layer.w13_weight_scale,
+                w2_scale=layer.w2_weight_scale,
+            )
+        else:
+            # W4A16: weight-only via Marlin
+            return make_mxfp4_moe_quant_config(
+                mxfp4_backend=self.mxfp4_backend,
+                w1_scale=layer.w13_weight_scale,
+                w2_scale=layer.w2_weight_scale,
+            )
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        layer.w13_weight = torch.nn.Parameter(
+            layer.w13_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w13_weight_packed")
+
+        layer.w2_weight = torch.nn.Parameter(
+            layer.w2_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w2_weight_packed")
+
+        if self.use_cutlass_mxfp4:
+            # Swizzle weight scales from flat checkpoint layout [E, N, K//32]
+            # to CUTLASS tiled layout [E, numMTiles*numKTiles*512].
+            from vllm.model_executor.layers.fused_moe.experts.cutlass_moe import (
+                swizzle_mxfp4_scales,
+            )
+
+            E = layer.w13_weight_scale.shape[0]
+            w13_N = layer.w13_weight_scale.shape[1]
+            w13_scale_K = layer.w13_weight_scale.shape[2]
+            w13_K = w13_scale_K * 32
+
+            w2_M = layer.w2_weight_scale.shape[1]
+            w2_scale_N = layer.w2_weight_scale.shape[2]
+            w2_N = w2_scale_N * 32
+
+            swizzled_w13 = []
+            swizzled_w2 = []
+            for e_idx in range(E):
+                s13 = layer.w13_weight_scale[e_idx]
+                sw13 = swizzle_mxfp4_scales(s13, w13_N, w13_K)
+                swizzled_w13.append(sw13.reshape(w13_N, w13_scale_K))
+                s2 = layer.w2_weight_scale[e_idx]
+                sw2 = swizzle_mxfp4_scales(s2, w2_M, w2_N)
+                swizzled_w2.append(sw2.reshape(w2_M, w2_scale_N))
+            layer.w13_weight_scale = torch.nn.Parameter(
+                torch.stack(swizzled_w13), requires_grad=False
+            )
+            layer.w2_weight_scale = torch.nn.Parameter(
+                torch.stack(swizzled_w2), requires_grad=False
+            )
+        else:
+            logger.warning_once(
+                "Your GPU does not have native support for FP4 computation "
+                "but FP4 quantization is being used. Weight-only FP4 "
+                "compression will be used leveraging the Marlin kernel. "
+                "This may degrade performance for compute-heavy workloads."
+            )
+            prepare_moe_fp4_layer_for_marlin(layer)
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config is not None:
+            self.moe_kernel = make_mxfp4_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                experts_cls=self.experts_cls,
+                mxfp4_backend=self.mxfp4_backend,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_nvfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_nvfp4.py
new file mode 100644
index 000000000000..9a051c038f9c
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_nvfp4.py
@@ -0,0 +1,310 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
+    convert_to_nvfp4_moe_kernel_format,
+    is_global_sf_supported_for_nvfp4_backend,
+    make_nvfp4_moe_kernel,
+    make_nvfp4_moe_quant_config,
+    select_nvfp4_moe_backend,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kNvfp4Dynamic,
+    kNvfp4Static,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsW4A4Nvfp4MoEMethod(CompressedTensorsMoEMethod):
+    def __init__(
+        self,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+        use_a16: bool = False,
+    ):
+        super().__init__(moe)
+        self.group_size = 16
+
+        # Select experts implementation.
+        self.nvfp4_backend, self.experts_cls = select_nvfp4_moe_backend(
+            config=self.moe,
+            weight_key=kNvfp4Static,
+            activation_key=None if use_a16 else kNvfp4Dynamic,
+        )
+
+        self.use_global_sf = is_global_sf_supported_for_nvfp4_backend(
+            self.nvfp4_backend
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.params_dtype = params_dtype
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // 2,
+                requires_grad=False,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # Weight Scales
+        w13_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // self.group_size,
+                dtype=torch.float8_e4m3fn,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // self.group_size,
+                dtype=torch.float8_e4m3fn,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # Weight Global Scales
+        w13_weight_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_global_scale", w13_weight_scale_2)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+        )
+        set_weight_attrs(w13_weight_scale_2, extra_weight_attrs)
+
+        w2_weight_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, dtype=torch.float32), requires_grad=False
+        )
+        layer.register_parameter("w2_weight_global_scale", w2_weight_scale_2)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+        )
+        set_weight_attrs(w2_weight_scale_2, extra_weight_attrs)
+
+        # Input Global Scales
+        w13_input_scale = torch.nn.Parameter(
+            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_input_global_scale", w13_input_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+        )
+        set_weight_attrs(w13_input_scale, extra_weight_attrs)
+
+        w2_input_scale = torch.nn.Parameter(
+            torch.empty(num_experts, dtype=torch.float32), requires_grad=False
+        )
+        layer.register_parameter("w2_input_global_scale", w2_input_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+        )
+        set_weight_attrs(w2_input_scale, extra_weight_attrs)
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        """
+        Convert NVFP4 MoE weights into kernel format and setup the kernel.
+        """
+        # NOTE(rob): wN_weight_packed -> wN_weight is because ModularKernelMethod
+        # requires this naming convention. However, the name change breaks
+        # reloading because the state dict no longer matches disk. Once we
+        # remove MKM, we should revert this change to ensure compatibility.
+        layer.w13_weight = torch.nn.Parameter(
+            layer.w13_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w13_weight_packed")
+
+        layer.w2_weight = torch.nn.Parameter(
+            layer.w2_weight_packed.data, requires_grad=False
+        )
+        delattr(layer, "w2_weight_packed")
+
+        # Use a single gscale for w13.
+        if self.moe.is_act_and_mul and not torch.allclose(
+            layer.w13_weight_global_scale[:, 0], layer.w13_weight_global_scale[:, 1]
+        ):
+            logger.warning_once(
+                "w1_weight_global_scale must match w3_weight_global_scale. "
+                "Accuracy may be affected.",
+            )
+        w13_weight_global_scale = layer.w13_weight_global_scale[:, 0].contiguous()
+
+        # Shuffle weights into the NvFp4 kernel format.
+        (
+            w13,
+            w13_scale,
+            w13_scale_2,
+            a13_scale,
+            w2,
+            w2_scale,
+            w2_scale_2,
+            a2_scale,
+        ) = convert_to_nvfp4_moe_kernel_format(
+            nvfp4_backend=self.nvfp4_backend,
+            layer=layer,
+            w13=layer.w13_weight,
+            w13_scale=layer.w13_weight_scale,
+            w13_scale_2=(1.0 / w13_weight_global_scale),
+            a13_scale=(1.0 / layer.w13_input_global_scale),
+            w2=layer.w2_weight,
+            w2_scale=layer.w2_weight_scale,
+            w2_scale_2=(1.0 / layer.w2_weight_global_scale),
+            a2_scale=(1.0 / layer.w2_input_global_scale),
+            is_act_and_mul=self.moe.is_act_and_mul,
+        )
+
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w13_weight_scale", w13_scale)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, "w2_weight_scale", w2_scale)
+        layer.w13_weight_scale_2 = w13_scale_2
+        layer.w2_weight_scale_2 = w2_scale_2
+        layer.w13_input_scale = a13_scale
+        layer.w2_input_scale = a2_scale
+
+        # Setup modular kernel.
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        assert self.experts_cls is not None
+        self.moe_kernel = make_nvfp4_moe_kernel(
+            moe_quant_config=self.moe_quant_config,
+            moe_config=self.moe,
+            experts_cls=self.experts_cls,
+            routing_tables=layer._expert_routing_tables(),
+        )
+        self.moe_kernel.fused_experts.process_weights_after_loading(layer)
+
+    def maybe_make_prepare_finalize(
+        self,
+        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel initialization "
+            "logic. This function should not be called."
+        )
+
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        return make_nvfp4_moe_quant_config(
+            backend=self.nvfp4_backend,
+            w13_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            w13_scale_2=layer.w13_weight_scale_2,
+            w2_scale_2=layer.w2_weight_scale_2,
+            a13_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
+        )
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            num_expert_group=layer.num_expert_group,
+            topk_group=layer.topk_group,
+            e_score_correction_bias=layer.e_score_correction_bias,
+            routed_scaling_factor=layer.routed_scaling_factor,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_fp8.py
new file mode 100644
index 000000000000..224b1a62bf53
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_fp8.py
@@ -0,0 +1,242 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+)
+
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.oracle.w4a8 import (
+    convert_to_w4a8_moe_kernel_format,
+    make_w4a8_moe_kernel,
+    make_w4a8_moe_quant_config,
+    select_w4a8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+
+
+class CompressedTensorsW4A8Fp8MoEMethod(CompressedTensorsMoEMethod):
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+
+        self.group_size = self.weight_quant.group_size
+        self.num_bits = self.weight_quant.num_bits
+        self.packed_factor = 32 // self.num_bits
+
+        assert self.weight_quant.symmetric, (
+            "Only symmetric quantization is supported for W4A8 MoE"
+        )
+        assert self.weight_quant.actorder != "group"
+        assert self.group_size == 128, "Only group size 128 supported for W4A8 MoE"
+
+        self.w4a8_backend, self.experts_cls = select_w4a8_moe_backend(
+            config=self.moe,
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.orig_dtype = params_dtype
+        layer.weight_block_size = None
+
+        # requirement for CUTLASS reorder_tensor
+        assert hidden_size % 256 == 0, f"{hidden_size=} must be divisible by 256"
+        assert intermediate_size_per_partition % 256 == 0, (
+            f"{intermediate_size_per_partition=} must be divisible by 256"
+        )
+        # storage type, pack 8xint4 into int32
+        params_dtype = torch.int32
+
+        # WEIGHTS
+        w13_weight_packed = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size // self.packed_factor,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight_packed)
+        set_weight_attrs(w13_weight_packed, extra_weight_attrs)
+
+        w2_weight_packed = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // self.packed_factor,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight_packed)
+        set_weight_attrs(w2_weight_packed, extra_weight_attrs)
+
+        # SCALES
+        # weight_scale refers to the group-wise scales
+        # they are initially loaded as bf16, we will convert to fp8
+        # after loading
+        w13_weight_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size // self.group_size,
+                dtype=layer.orig_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // self.group_size,
+                dtype=layer.orig_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        # Add PER-GROUP quantization for RoutedExperts.weight_loader.
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # weight shapes
+        w2_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+        layer.register_parameter("w2_weight_shape", w2_weight_shape)
+        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
+        w13_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+        layer.register_parameter("w13_weight_shape", w13_weight_shape)
+        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
+
+        w13_weight_chan_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_chan_scale", w13_weight_chan_scale)
+
+        w2_weight_chan_scale = torch.nn.Parameter(
+            torch.ones(num_experts, hidden_size, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_chan_scale", w2_weight_chan_scale)
+
+        # don't use input scales
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        (
+            w13_weight_packed,
+            w2_weight_packed,
+            w13_weight_scale,
+            w2_weight_scale,
+            w13_weight_chan_scale,
+            w2_weight_chan_scale,
+            b_strides1,
+            b_strides2,
+        ) = convert_to_w4a8_moe_kernel_format(
+            w13_weight_packed=layer.w13_weight_packed,
+            w2_weight_packed=layer.w2_weight_packed,
+            w13_weight_scale=layer.w13_weight_scale,
+            w2_weight_scale=layer.w2_weight_scale,
+        )
+
+        replace_parameter(layer, "w13_weight_packed", w13_weight_packed)
+        replace_parameter(layer, "w2_weight_packed", w2_weight_packed)
+        replace_parameter(layer, "w13_weight_scale", w13_weight_scale)
+        replace_parameter(layer, "w2_weight_scale", w2_weight_scale)
+        replace_parameter(layer, "w13_weight_chan_scale", w13_weight_chan_scale)
+        replace_parameter(layer, "w2_weight_chan_scale", w2_weight_chan_scale)
+
+        self.b_strides1 = b_strides1
+        self.b_strides2 = b_strides2
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config is not None:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_w4a8_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                experts_cls=self.experts_cls,
+                b_strides1=self.b_strides1,
+                b_strides2=self.b_strides2,
+                group_size=self.group_size,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        return make_w4a8_moe_quant_config(
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            g1_alphas=layer.w13_weight_chan_scale,
+            g2_alphas=layer.w2_weight_chan_scale,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            hidden_states=x,
+            w1=layer.w13_weight_packed,
+            w2=layer.w2_weight_packed,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
+
+    @property
+    def supports_eplb(self) -> bool:
+        return False
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_int8.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_int8.py
new file mode 100644
index 000000000000..5fdda451b3b6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a8_int8.py
@@ -0,0 +1,349 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
+
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    RoutedExperts,
+)
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.cpu_fused_moe import select_experts
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+from vllm.platforms import CpuArchEnum, current_platform
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsW4A8Int8MoEMethod(CompressedTensorsMoEMethod):
+    """
+    CPU-only MoE method using dynamic 4-bit matmul kernels on Arm Platform
+    - Weights: int4 (stored as int8 values in [-8,7], packed to uint8 nibbles)
+    - Scales: Fp32 for Channelwise , bf16 for groupwise quantization
+    - Bias: Same data type as original weights
+    - Activations: FP32/Bf16 dynamic per-token (A8 Int),
+      quantized inside the kernel
+    """
+
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.has_bias = self.moe.has_bias
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+
+        # Validate scheme: weights=W4 (channel or group),
+        # activations=dynamic TOKEN (A8)
+
+        # Must be dynamic per-token activations
+        if (
+            input_quant.strategy != QuantizationStrategy.TOKEN
+            or not input_quant.dynamic
+        ):
+            raise ValueError(
+                "W4A8-int MoE needs dynamic per-token activation quantization."
+            )
+
+        # Weight can be channel-wise (group_size=None) or group-wise
+        self.group_size = (
+            weight_quant.group_size if (weight_quant.group_size is not None) else -1
+        )
+        if weight_quant.num_bits != 4:
+            raise ValueError("This method only supports 4-bit weights (num_bits=4).")
+
+        # CPU only
+        if not current_platform.is_cpu():
+            raise ValueError("CompressedTensorsW4A8Int8MoEMethod is CPU-only.")
+
+        # Arm: check _dyn ops availability
+        if current_platform.get_cpu_architecture() == CpuArchEnum.ARM:
+            try:
+                _ = torch.ops.aten._dyn_quant_matmul_4bit
+                _ = torch.ops.aten._dyn_quant_pack_4bit_weight
+            except AttributeError as err:
+                raise RuntimeError(
+                    f"""PyTorch {torch.__version__} lacks _dyn_quant_* 4bit ops;
+                    install a newer build."""
+                ) from err
+        self.static_input_scales = False  # always dynamic per token
+
+    # ---- parameter creation ----
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        # Shapes per local rank (TP/EP):
+        #   w13: [E, 2*I_local, H]  int8  (int4 values in [-8,7])
+        #   w2 : [E, H, I_local]    int8
+        # Scales:
+        #   channel-wise: group_size=-1 -> per-output-row, single scale per row
+        #   group-wise  : group_size=g   ->
+        #   per-output-row, (in_features/g) scales
+
+        E = num_experts
+        H = hidden_size
+        IN = intermediate_size_per_partition
+        g = self.group_size
+
+        # Per-row scale columns
+        def _n_scale_cols(in_features: int) -> int:
+            return 1 if g == -1 else (in_features // g)
+
+        # Register unpacked int4-as-int8 weights the loader will fill.
+        w13 = torch.nn.Parameter(
+            torch.empty(E, 2 * IN, H, dtype=torch.int8), requires_grad=False
+        )
+        set_weight_attrs(w13, extra_weight_attrs)
+        layer.register_parameter("w13_weight", w13)
+
+        w2 = torch.nn.Parameter(
+            torch.empty(E, H, IN, dtype=torch.int8), requires_grad=False
+        )
+        set_weight_attrs(w2, extra_weight_attrs)
+        layer.register_parameter("w2_weight", w2)
+
+        # Register scales
+        # KleidiAI groupwise kernels accepts float32 scales
+        # KleidiAI groupwise kernels accepts bfloat16 scales
+        scale_dtype = torch.float32 if g == -1 else torch.bfloat16
+
+        w13_s = torch.nn.Parameter(
+            torch.ones(E, 2 * IN, _n_scale_cols(H), dtype=scale_dtype),
+            requires_grad=False,
+        )
+        set_weight_attrs(
+            w13_s,
+            {"quant_method": "channel" if g == -1 else "group", **extra_weight_attrs},
+        )
+        layer.register_parameter("w13_weight_scale", w13_s)
+
+        w2_s = torch.nn.Parameter(
+            torch.ones(E, H, _n_scale_cols(IN), dtype=scale_dtype), requires_grad=False
+        )
+        set_weight_attrs(
+            w2_s,
+            {"quant_method": "channel" if g == -1 else "group", **extra_weight_attrs},
+        )
+        layer.register_parameter("w2_weight_scale", w2_s)
+
+        if self.has_bias:
+            w13_bias = torch.nn.Parameter(
+                torch.zeros(E, 2 * IN, dtype=params_dtype), requires_grad=False
+            )
+            layer.register_parameter("w13_bias", w13_bias)
+            set_weight_attrs(w13_bias, extra_weight_attrs)
+
+            w2_bias = torch.nn.Parameter(
+                torch.zeros(num_experts, hidden_size, dtype=params_dtype),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_bias", w2_bias)
+            set_weight_attrs(w2_bias, extra_weight_attrs)
+
+        # Placeholders for packed weights (will be replaced after packing)
+        layer.register_parameter(
+            "w13_weight_packed", torch.nn.Parameter(torch.empty(0), requires_grad=False)
+        )
+        set_weight_attrs(layer.w13_weight_packed, extra_weight_attrs)
+
+        layer.register_parameter(
+            "w2_weight_packed", torch.nn.Parameter(torch.empty(0), requires_grad=False)
+        )
+        set_weight_attrs(layer.w2_weight_packed, extra_weight_attrs)
+
+        # dims for 4 bit fused matmuls
+        layer.w13_in_features = H
+        layer.w13_out_features = 2 * IN
+        layer.w2_in_features = IN
+        layer.w2_out_features = H
+        layer.group_size = g
+
+    # post-load packing to dyn-4bit KleidiAI kernel's format
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        E = layer.w13_weight.shape[0]
+        H = layer.w13_in_features
+        I2 = layer.w13_out_features
+        IN = layer.w2_in_features
+        g = layer.group_size
+
+        def _pack_matrix(
+            int4_as_int8_2d: torch.Tensor,
+            scales_2d: torch.Tensor,
+            bias_1d: torch.Tensor | None,
+            in_features: int,
+            out_features: int,
+        ) -> torch.Tensor:
+            # int4 values are stored as int8 in [-8,7].
+            # Shift to unsigned nibble and pack pairs along input-dim.
+            tmp = int4_as_int8_2d.add(8)  # [out, in]
+            uint8_nibbles = ((tmp[:, 1::2] << 4) | tmp[:, ::2]).to(
+                torch.uint8
+            )  # [out, in//2]
+
+            # KleidiAI groupwise kernels accepts float32 scales
+            # KleidiAI groupwise kernels accepts bfloat16 scales
+            scale_dtype = torch.float32 if g == -1 else torch.bfloat16
+            scales = scales_2d.to(scale_dtype)
+            bias = None if bias_1d is None else bias_1d.to(torch.float32)
+            return torch.ops.aten._dyn_quant_pack_4bit_weight(
+                uint8_nibbles,
+                scales,
+                bias,
+                g if g != -1 else in_features,
+                in_features,
+                out_features,
+            )
+
+        # Pack per expert
+        w13_packed_list = []
+        w2_packed_list = []
+
+        has_w13_bias = hasattr(layer, "w13_bias") and layer.w13_bias is not None
+        has_w2_bias = hasattr(layer, "w2_bias") and layer.w2_bias is not None
+
+        for e in range(E):
+            w13_packed_list.append(
+                _pack_matrix(
+                    layer.w13_weight[e],  # [2I, H]
+                    layer.w13_weight_scale[e],  # [2I, H/g or 1]
+                    layer.w13_bias[e] if has_w13_bias else None,  # [2I]
+                    H,
+                    I2,
+                )
+            )
+            w2_packed_list.append(
+                _pack_matrix(
+                    # w2 shape is [H, IN]; we need [out, in] == [H, IN].
+                    layer.w2_weight[e],  # [H, IN]
+                    layer.w2_weight_scale[e],  # [H, IN/g or 1]
+                    layer.w2_bias[e] if has_w2_bias else None,  # [H]
+                    IN,
+                    layer.w2_out_features,  # in_features=IN, out_features=H
+                )
+            )
+
+        # each packed tensor has identical shape per expert; stack on dim 0
+        w13_packed = torch.stack(w13_packed_list, dim=0)
+        w2_packed = torch.stack(w2_packed_list, dim=0)
+
+        replace_parameter(
+            layer,
+            "w13_weight_packed",
+            torch.nn.Parameter(w13_packed, requires_grad=False),
+        )
+        replace_parameter(
+            layer,
+            "w2_weight_packed",
+            torch.nn.Parameter(w2_packed, requires_grad=False),
+        )
+
+        # free raw tensors/scales/bias now that they're packed into the payload.
+        replace_parameter(
+            layer, "w13_weight", torch.nn.Parameter(torch.empty(0), requires_grad=False)
+        )
+        replace_parameter(
+            layer, "w2_weight", torch.nn.Parameter(torch.empty(0), requires_grad=False)
+        )
+        replace_parameter(
+            layer,
+            "w13_weight_scale",
+            torch.nn.Parameter(torch.empty(0), requires_grad=False),
+        )
+        replace_parameter(
+            layer,
+            "w2_weight_scale",
+            torch.nn.Parameter(torch.empty(0), requires_grad=False),
+        )
+        if has_w13_bias:
+            replace_parameter(
+                layer,
+                "w13_bias",
+                torch.nn.Parameter(torch.empty(0), requires_grad=False),
+            )
+        if has_w2_bias:
+            replace_parameter(
+                layer,
+                "w2_bias",
+                torch.nn.Parameter(torch.empty(0), requires_grad=False),
+            )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        # CPU dynamic 4-bit MoE path does not use modular kernels or
+        # fused_experts; quant config is not needed.
+        return None
+
+    @property
+    def is_monolithic(self) -> bool:
+        return True
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert layer.activation in (
+            MoEActivation.SILU,
+            MoEActivation.SWIGLUOAI,
+            MoEActivation.SWIGLUSTEP,
+        ), "Only SiLU/SwiGLUGU/SwiGLUUG are supported."
+        assert layer.expert_map is None, """expert_map/EP not implemented
+        for CPU dyn-4bit MoE."""
+
+        def _act_kind(s: MoEActivation) -> int:
+            # 0 = SwiGLU_Gu (SiLU(g)*u), 1 = SwiGLU_Ug (SiLU(u)*g), 2 = SiLU
+            if s == MoEActivation.SWIGLUSTEP:
+                return 0
+            if s == MoEActivation.SWIGLUOAI:
+                return 1
+            if s == MoEActivation.SILU:
+                return 2
+            raise ValueError(f"Unknown activation '{s}'")
+
+        # Apply topk softmax on router output
+        topk_weights, topk_ids = select_experts(
+            hidden_states=x,
+            router_logits=router_logits,
+            top_k=layer.top_k,
+            use_grouped_topk=layer.use_grouped_topk,
+            renormalize=layer.renormalize,
+        )
+
+        return torch.ops._C.dynamic_4bit_int_moe(
+            x,
+            topk_ids.to(torch.long),
+            topk_weights,
+            layer.w13_weight_packed,
+            layer.w2_weight_packed,
+            layer.w2_out_features,
+            layer.w2_in_features,
+            layer.w13_out_features,
+            layer.group_size,
+            layer.apply_router_weight_on_input,
+            int(_act_kind(layer.activation)),
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_fp8.py
new file mode 100644
index 000000000000..da5d85e4abc6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_fp8.py
@@ -0,0 +1,418 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+    convert_to_fp8_moe_kernel_format,
+    make_fp8_moe_kernel,
+    make_fp8_moe_quant_config,
+    select_fp8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+    process_fp8_input_tensor_strategy_moe,
+    process_fp8_weight_tensor_strategy_moe,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8Dynamic128Sym,
+    kFp8DynamicTokenSym,
+    kFp8Static128BlockSym,
+    kFp8StaticChannelSym,
+    kFp8StaticTensorSym,
+)
+from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
+    normalize_e4m3fn_to_e4m3fnuz,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
+    """W8A8 FP8 MoE quantization using compressed tensors."""
+
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+
+        per_tensor = (
+            self.weight_quant.strategy == QuantizationStrategy.TENSOR
+            and self.input_quant.strategy == QuantizationStrategy.TENSOR
+        )
+        per_channel = (
+            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
+            and self.input_quant.strategy == QuantizationStrategy.TOKEN
+        )
+        if not (per_tensor or per_channel):
+            assert self.weight_quant.strategy == QuantizationStrategy.BLOCK
+            self.weight_block_size = self.weight_quant.block_structure
+            assert self.weight_quant.dynamic is not None
+        else:
+            self.weight_block_size = None
+        self.block_quant = self.weight_block_size is not None
+
+        self.static_input_scales = not self.input_quant.dynamic
+        if self.static_input_scales and per_channel:
+            raise ValueError(
+                "For FP8 Fused MoE layer, we require either per tensor or "
+                "channelwise, dynamic per token quantization."
+            )
+
+        ct2vllm_weight = {
+            QuantizationStrategy.CHANNEL: kFp8StaticChannelSym,
+            QuantizationStrategy.TENSOR: kFp8StaticTensorSym,
+            QuantizationStrategy.BLOCK: kFp8Static128BlockSym,
+        }
+        ct2vllm_act = {
+            QuantizationStrategy.TOKEN: kFp8DynamicTokenSym,
+            QuantizationStrategy.TENSOR: (
+                kFp8StaticTensorSym if self.static_input_scales else kFp8Dynamic128Sym
+            ),
+        }
+        weight_key = ct2vllm_weight[self.weight_quant.strategy]
+        if weight_key == kFp8Static128BlockSym:
+            activation_key = kFp8Dynamic128Sym
+        else:
+            activation_key = ct2vllm_act[self.input_quant.strategy]
+
+        # Select Fp8 MoE backend
+        self.fp8_backend, self.experts_cls = select_fp8_moe_backend(
+            config=self.moe,
+            weight_key=weight_key,
+            activation_key=activation_key,
+            allow_vllm_cutlass=True,
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.orig_dtype = params_dtype
+        layer.weight_block_size = None
+
+        params_dtype = torch.float8_e4m3fn
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+
+        if self.block_quant:
+            assert self.weight_block_size is not None
+            layer.weight_block_size = self.weight_block_size
+            tp_size = get_tensor_model_parallel_world_size()
+            block_n, block_k = (
+                self.weight_block_size[0],
+                self.weight_block_size[1],
+            )
+            # NOTE: To ensure proper alignment of the block-wise quantization
+            # scales, the output_size of the weights for both the gate and up
+            # layers must be divisible by block_n.
+            # Required by column parallel or enabling merged weights
+            if intermediate_size_per_partition % block_n != 0:
+                raise ValueError(
+                    f"The output_size of gate's and up's weight = "
+                    f"{intermediate_size_per_partition} is not divisible by "
+                    f"weight quantization block_n = {block_n}."
+                )
+            if tp_size > 1 and intermediate_size_per_partition % block_k != 0:
+                # Required by row parallel
+                raise ValueError(
+                    f"The input_size of down's weight = "
+                    f"{intermediate_size_per_partition} is not divisible by "
+                    f"weight quantization block_k = {block_k}."
+                )
+
+        # WEIGHTS
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                hidden_size,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # WEIGHT_SCALES
+        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
+            # For gated MoE, allocate 2 scales for w1 and w3 respectively.
+            # They will be combined to a single scale after weight loading.
+            # For non-gated MoE, allocate 1 scale for w13.
+            w13_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, w13_num_shards, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_weight_scale", w13_weight_scale)
+            w2_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
+            )
+            layer.register_parameter("w2_weight_scale", w2_weight_scale)
+            # Add PER-TENSOR quantization for RoutedExperts.weight_loader.
+            extra_weight_attrs.update(
+                {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+            )
+            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        elif self.weight_quant.strategy == QuantizationStrategy.CHANNEL:
+            w13_weight_scale = torch.nn.Parameter(
+                torch.ones(
+                    num_experts,
+                    w13_num_shards * intermediate_size_per_partition,
+                    1,
+                    dtype=torch.float32,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_weight_scale", w13_weight_scale)
+            w2_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, hidden_size, 1, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_weight_scale", w2_weight_scale)
+            # Add PER-CHANNEL quantization for RoutedExperts.weight_loader.
+            extra_weight_attrs.update(
+                {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
+            )
+            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        elif self.weight_quant.strategy == QuantizationStrategy.BLOCK:
+            w13_weight_scale = torch.nn.Parameter(
+                torch.ones(
+                    num_experts,
+                    w13_num_shards
+                    * ((intermediate_size_per_partition + block_n - 1) // block_n),
+                    (hidden_size + block_k - 1) // block_k,
+                    dtype=torch.float32,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_weight_scale", w13_weight_scale)
+            w2_weight_scale = torch.nn.Parameter(
+                torch.ones(
+                    num_experts,
+                    (hidden_size + block_n - 1) // block_n,
+                    (intermediate_size_per_partition + block_k - 1) // block_k,
+                    dtype=torch.float32,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_weight_scale", w2_weight_scale)
+            # Add PER-CHANNEL quantization for RoutedExperts.weight_loader.
+            extra_weight_attrs.update(
+                {"quant_method": FusedMoeWeightScaleSupported.BLOCK.value}
+            )
+            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # INPUT_SCALES
+        if self.static_input_scales:
+            w13_input_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
+            )
+            layer.register_parameter("w13_input_scale", w13_input_scale)
+            set_weight_attrs(w13_input_scale, extra_weight_attrs)
+
+            w2_input_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32), requires_grad=False
+            )
+            layer.register_parameter("w2_input_scale", w2_input_scale)
+            set_weight_attrs(w2_input_scale, extra_weight_attrs)
+        else:
+            layer.w13_input_scale = None
+            layer.w2_input_scale = None
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        # Allow for accessing weights and scales in standard way.
+        w13 = layer.w13_weight
+        w2 = layer.w2_weight
+        w13_scale = layer.w13_weight_scale
+        w2_scale = layer.w2_weight_scale
+        w13_input_scale = layer.w13_input_scale
+        w2_input_scale = layer.w2_input_scale
+
+        # MI300x and MI325x use FNUZ format for FP8. Convert if needed.
+        if current_platform.is_fp8_fnuz():
+            w13, w13_scale, w13_input_scale = normalize_e4m3fn_to_e4m3fnuz(
+                w13, w13_scale, w13_input_scale
+            )
+            w2, w2_scale, w2_input_scale = normalize_e4m3fn_to_e4m3fnuz(
+                w2, w2_scale, w2_input_scale
+            )
+
+        # Per tensor kernels require single activation scale. Use the max.
+        if self.static_input_scales:
+            assert self.input_quant.strategy == QuantizationStrategy.TENSOR
+            assert w13_input_scale is not None and w2_input_scale is not None
+            w13_input_scale, w2_input_scale = process_fp8_input_tensor_strategy_moe(
+                w13_input_scale, w2_input_scale
+            )
+            replace_parameter(layer, "w13_input_scale", w13_input_scale)
+            replace_parameter(layer, "w2_input_scale", w2_input_scale)
+
+        # Per-tensor kernels use a single scale, for W13, but on disk there
+        # is a separate scale for W1 and W3. Requantize with the max scale.
+        if self.weight_quant.strategy == QuantizationStrategy.TENSOR:
+            w13, w13_scale = process_fp8_weight_tensor_strategy_moe(
+                w13,
+                w13_scale,
+                shard_size=layer.intermediate_size_per_partition,
+                num_experts=layer.local_num_experts,
+                is_act_and_mul=self.moe.is_act_and_mul,
+            )
+
+        w13, w2, w13_scale, w2_scale = convert_to_fp8_moe_kernel_format(
+            fp8_backend=self.fp8_backend,
+            layer=layer,
+            w13=w13,
+            w2=w2,
+            w13_scale=w13_scale,
+            w2_scale=w2_scale,
+            w13_input_scale=w13_input_scale,
+            w2_input_scale=w2_input_scale,
+        )
+
+        # Replace parameters with updated versions. Note that this helper
+        # function ensures the replacement is compatible with RL weight reloads.
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, "w13_weight_scale", w13_scale)
+        replace_parameter(layer, "w2_weight_scale", w2_scale)
+
+        # Setup modular kernel for TP case and naive DP/EP case.
+        # In non-naive DP/EP case, we will create a ModularKernelMethod.
+        # TODO(rob): unify these so FP8MoEMethod owns the ModularKernel
+        # in both cases.
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_fp8_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                fp8_backend=self.fp8_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def maybe_make_prepare_finalize(
+        self,
+        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel initialization "
+            "logic. This function should not be called."
+        )
+
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        is_per_token = self.input_quant.strategy == QuantizationStrategy.TOKEN
+        return make_fp8_moe_quant_config(
+            fp8_backend=self.fp8_backend,
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            a1_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            per_act_token_quant=is_per_token,
+            per_out_ch_quant=is_per_token,
+            block_shape=self.weight_block_size,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
+        )
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            num_expert_group=layer.num_expert_group,
+            topk_group=layer.topk_group,
+            e_score_correction_bias=layer.e_score_correction_bias,
+            routed_scaling_factor=layer.routed_scaling_factor,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            # TODO(rob): investigate the disable_expert_map introduced by:
+            # https://github.com/vllm-project/vllm/commit/84166fee9770e6fba71a96978b3e7d149392fb28 # noqa: E501
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
+
+    @property
+    def supports_eplb(self) -> bool:
+        return True
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_int8.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_int8.py
new file mode 100644
index 000000000000..74bf8a3546e0
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_int8.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.oracle.int8 import (
+    make_int8_moe_kernel,
+    make_int8_moe_quant_config,
+    select_int8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kInt8DynamicTokenSym,
+    kInt8StaticChannelSym,
+)
+from vllm.model_executor.utils import set_weight_attrs
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
+    """W8A8 Int8 MoE quantization using compressed tensors."""
+
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+
+        per_channel = (
+            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
+            and self.input_quant.strategy == QuantizationStrategy.TOKEN
+        )
+        if not per_channel:
+            raise ValueError(
+                "For INT8 Fused MoE layers, we require channelwise, "
+                "dynamic per token quantization. Found "
+                f"{self.weight_quant}, {self.input_quant}"
+            )
+
+        self.static_input_scales = not self.input_quant.dynamic
+        if self.static_input_scales:
+            raise ValueError(
+                "For INT8 Fused MoE layers, we require channelwise, "
+                "dynamic per token quantization. Found static input scales."
+            )
+
+        # Select Int8 MoE backend.
+        self.int8_backend, self.experts_cls = select_int8_moe_backend(
+            config=self.moe,
+            weight_key=kInt8StaticChannelSym,
+            activation_key=kInt8DynamicTokenSym,
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        params_dtype = torch.int8
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+
+        # WEIGHTS
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                hidden_size,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # WEIGHT_SCALES
+        assert self.weight_quant.strategy == QuantizationStrategy.CHANNEL
+        w13_weight_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                1,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        w2_weight_scale = torch.nn.Parameter(
+            torch.ones(num_experts, hidden_size, 1, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        # Add PER-CHANNEL quantization for RoutedExperts.weight_loader.
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # INPUT_SCALES
+        assert not self.static_input_scales
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        assert self.experts_cls is not None
+        self.moe_kernel = make_int8_moe_kernel(
+            moe_quant_config=self.moe_quant_config,
+            moe_config=self.moe,
+            experts_cls=self.experts_cls,
+            routing_tables=layer._expert_routing_tables(),
+        )
+
+    def maybe_make_prepare_finalize(
+        self,
+        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel initialization "
+            "logic. This function should not be called."
+        )
+
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        return make_int8_moe_quant_config(
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            a1_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            per_act_token_quant=True,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_mxfp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_mxfp8.py
new file mode 100644
index 000000000000..dc851cc13138
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w8a8_mxfp8.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+    convert_to_fp8_moe_kernel_format,
+    make_fp8_moe_kernel,
+    make_fp8_moe_quant_config,
+)
+from vllm.model_executor.layers.fused_moe.oracle.mxfp8 import (
+    select_mxfp8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.compressed_tensors_moe import (  # noqa: E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    MXFP8_BLOCK_SIZE,
+    MXFP8_SCALE_DTYPE,
+    MXFP8_VALUE_DTYPE,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+
+
+class CompressedTensorsW8A8Mxfp8MoEMethod(CompressedTensorsMoEMethod):
+    """Compressed-tensors MoE method for pre-quantized MXFP8 (W8A8) checkpoints.
+
+    Loads FP8 (E4M3) weights with E8M0 uint8 per-group scales (group_size=32)
+    from checkpoint. Activations are dynamically quantized to MXFP8 at runtime.
+    Supports FlashInfer TRT-LLM and Marlin backends (auto-selected).
+    """
+
+    def __init__(self, moe: FusedMoEConfig):
+        super().__init__(moe)
+        self.weight_block_size = [1, MXFP8_BLOCK_SIZE]
+        self.fp8_backend, self.experts_cls = select_mxfp8_moe_backend(config=self.moe)
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.params_dtype = params_dtype
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                hidden_size,
+                dtype=MXFP8_VALUE_DTYPE,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition,
+                dtype=MXFP8_VALUE_DTYPE,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        w13_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                hidden_size // MXFP8_BLOCK_SIZE,
+                dtype=MXFP8_SCALE_DTYPE,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // MXFP8_BLOCK_SIZE,
+                dtype=MXFP8_SCALE_DTYPE,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        layer.weight_block_size = self.weight_block_size
+
+        w13, w2, w13_scale, w2_scale = convert_to_fp8_moe_kernel_format(
+            fp8_backend=self.fp8_backend,
+            layer=layer,
+            w13=layer.w13_weight,
+            w2=layer.w2_weight,
+            w13_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            w13_input_scale=layer.w13_input_scale,
+            w2_input_scale=layer.w2_input_scale,
+        )
+
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, "w13_weight_scale", w13_scale)
+        replace_parameter(layer, "w2_weight_scale", w2_scale)
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config is not None:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_fp8_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                fp8_backend=self.fp8_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        return make_fp8_moe_quant_config(
+            fp8_backend=self.fp8_backend,
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            a1_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            block_shape=self.weight_block_size,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
+        )
+
+    def maybe_make_prepare_finalize(
+        self,
+        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel "
+            "initialization logic. This function should not be called."
+        )
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            num_expert_group=layer.num_expert_group,
+            topk_group=layer.topk_group,
+            e_score_correction_bias=layer.e_score_correction_bias,
+            routed_scaling_factor=layer.routed_scaling_factor,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16.py
new file mode 100644
index 000000000000..f6418194fd70
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16.py
@@ -0,0 +1,269 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+)
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+    int4_w4a16_moe_quant_config,
+    int8_w8a16_moe_quant_config,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.utils import set_weight_attrs
+
+logger = init_logger(__name__)
+
+
+class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs | None,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+        # Extract properties from weight_quant
+        self.num_bits = weight_quant.num_bits
+        self.packed_factor = 32 // weight_quant.num_bits
+        self.strategy = weight_quant.strategy
+        # channelwise is not supported by this kernel
+        assert weight_quant.strategy == "group"
+        self.group_size = weight_quant.group_size
+        # grouped actorder isn't supported by this kernel
+        assert weight_quant.actorder != "group"
+        assert weight_quant.symmetric, (
+            "Only symmetric quantization is supported for MoE"
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        # Will transpose the loaded weight along the
+        # intermediate and hidden dim sizes. Will
+        # shard for TP along the transposed dims
+        extra_weight_attrs.update(
+            {"is_transposed": True, "quant_method": self.strategy}
+        )
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size // self.packed_factor,
+                w13_num_shards * intermediate_size_per_partition,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                intermediate_size_per_partition // self.packed_factor,
+                hidden_size,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        w2_scales_size = intermediate_size_per_partition
+
+        if self.strategy == "channel":
+            num_groups_w2 = num_groups_w13 = 1
+            self.group_size = -1
+        else:
+            num_groups_w2 = w2_scales_size // self.group_size
+            num_groups_w13 = hidden_size // self.group_size
+
+        w13_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                num_groups_w13,
+                w13_num_shards * intermediate_size_per_partition,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_scale)
+        set_weight_attrs(w13_scale, extra_weight_attrs)
+
+        w2_scale = torch.nn.Parameter(
+            torch.ones(num_experts, num_groups_w2, hidden_size, dtype=params_dtype),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_scale)
+        set_weight_attrs(w2_scale, extra_weight_attrs)
+        set_weight_attrs(w2_scale, {"load_full_w2": False})
+
+        w2_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+        layer.register_parameter("w2_weight_shape", w2_weight_shape)
+        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
+        w13_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+
+        layer.register_parameter("w13_weight_shape", w13_weight_shape)
+        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
+
+        w13_g_idx = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_g_idx", w13_g_idx)
+        set_weight_attrs(w13_g_idx, extra_weight_attrs)
+
+        w2_g_idx = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                intermediate_size_per_partition,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_g_idx", w2_g_idx)
+        set_weight_attrs(w2_g_idx, extra_weight_attrs)
+
+        w13_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_g_idx_sort_indices", w13_g_idx_sort_indices)
+        set_weight_attrs(w13_g_idx_sort_indices, extra_weight_attrs)
+
+        w2_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                intermediate_size_per_partition,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_g_idx_sort_indices", w2_g_idx_sort_indices)
+        set_weight_attrs(w2_g_idx_sort_indices, extra_weight_attrs)
+
+        layer.a13_scale = None
+        layer.a2_scale = None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Reconfigure packed weights and scales to match moe_wna16 format
+        layer.w13_weight_packed = torch.nn.Parameter(
+            layer.w13_weight_packed.transpose(1, 2).contiguous().view(torch.uint8),
+            requires_grad=False,
+        )
+        layer.w2_weight_packed = torch.nn.Parameter(
+            layer.w2_weight_packed.transpose(1, 2).contiguous().view(torch.uint8),
+            requires_grad=False,
+        )
+        layer.w13_weight_scale = torch.nn.Parameter(
+            layer.w13_weight_scale.transpose(1, 2).contiguous(), requires_grad=False
+        )
+        layer.w2_weight_scale = torch.nn.Parameter(
+            layer.w2_weight_scale.transpose(1, 2).contiguous(), requires_grad=False
+        )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        assert self.num_bits == 4 or self.num_bits == 8
+        config_builder = (
+            int4_w4a16_moe_quant_config
+            if self.num_bits == 4
+            else int8_w8a16_moe_quant_config
+        )
+
+        return config_builder(
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            w1_zp=None,
+            w2_zp=None,
+            block_shape=[0, self.group_size],
+        )
+
+    def select_gemm_impl(
+        self,
+        prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
+        layer: torch.nn.Module,
+    ) -> mk.FusedMoEExpertsModular:
+        if self.moe.is_lora_enabled:
+            assert self.moe_quant_config is not None
+            from vllm.triton_utils import HAS_TRITON
+
+            if HAS_TRITON:
+                from vllm.model_executor.layers.fused_moe import TritonWNA16Experts
+
+                layer.w13_weight = layer.w13_weight_packed
+                layer.w2_weight = layer.w2_weight_packed
+                return TritonWNA16Experts(
+                    moe_config=self.moe, quant_config=self.moe_quant_config
+                )
+            else:
+                raise NotImplementedError(
+                    "TritonExperts requires Triton. "
+                    "Install triton or disable LoRA for MoE."
+                )
+
+        raise NotImplementedError
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe import fused_experts
+
+        return fused_experts(
+            x,
+            layer.w13_weight_packed,
+            layer.w2_weight_packed,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            inplace=not self.moe.disable_inplace,
+            activation=layer.activation,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            quant_config=self.moe_quant_config,
+        )
+
+    @property
+    def supports_eplb(self) -> bool:
+        return True
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py
new file mode 100644
index 000000000000..c60691df5835
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py
@@ -0,0 +1,577 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import enum
+from enum import Enum
+
+import torch
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+)
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm import _custom_ops as ops
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+    int4_w4a16_moe_quant_config,
+)
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import (
+    BatchedMarlinExperts,
+    MarlinExperts,
+    fused_marlin_moe,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (  # noqa E501
+    CompressedTensorsMoEMethod,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import (  # noqa
+    WNA16_SUPPORTED_TYPES_MAP,
+)
+from vllm.model_executor.layers.quantization.utils.flashinfer_mxint4_moe import (
+    flashinfer_trtllm_mxint4_moe,
+    is_flashinfer_mxint4_moe_available,
+    prepare_static_weights_for_trtllm_mxint4_moe,
+)
+from vllm.model_executor.layers.quantization.utils.marlin_utils import (
+    get_marlin_input_dtype,
+    marlin_act_int8_process_scales,
+    marlin_make_workspace_new,
+    marlin_moe_permute_scales,
+)
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+
+logger = init_logger(__name__)
+
+
+class GPTQMarlinState(Enum):
+    REPACK = enum.auto()
+    READY = enum.auto()
+
+
+class CompressedTensorsWNA16MarlinMoEMethod(CompressedTensorsMoEMethod):
+    def __init__(
+        self,
+        weight_quant: QuantizationArgs,
+        input_quant: QuantizationArgs | None,
+        moe: FusedMoEConfig,
+        layer_name: str | None = None,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_quant
+        self.input_quant = input_quant
+        assert weight_quant.symmetric, (
+            "Only symmetric quantization is supported for MoE"
+        )
+        # Extract properties from weight_quant
+        self.num_bits = weight_quant.num_bits
+        self.packed_factor = 32 // weight_quant.num_bits
+        self.strategy = weight_quant.strategy
+        self.group_size = weight_quant.group_size
+        self.actorder = weight_quant.actorder
+
+        self.quant_type = WNA16_SUPPORTED_TYPES_MAP[self.num_bits]
+
+        self.marlin_input_dtype = get_marlin_input_dtype(layer_name)
+        self.use_flashinfer_mxint4_moe = (
+            is_flashinfer_mxint4_moe_available()
+            and self.group_size == 32
+            and weight_quant.num_bits == 4
+        )
+        self.kernel_backend = (
+            "Flashinfer" if self.use_flashinfer_mxint4_moe else "Marlin"
+        )
+        logger.info_once(
+            f"Using {self.kernel_backend} backend for WNA16 MoE "
+            f"(group_size={self.group_size}, num_bits={self.num_bits})",
+        )
+
+    def get_weight_shape(
+        self,
+        weight_name: str,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        num_groups_w2: int | None = None,
+        num_groups_w13: int | None = None,
+    ) -> tuple[int, int, int]:
+        """
+        Get the shape of the weight based on the weight name, number of experts
+        hidden size, intermediate size per partition, number of groups for w2,
+        and number of groups for w13. Pass in num_groups_w2 and num_groups_w13
+        for weight scales.
+        """
+        if weight_name == "w13_scale":
+            assert num_groups_w13 is not None, (
+                "num_groups_w13 must be provided for weight scales"
+            )
+        if weight_name == "w2_scale":
+            assert num_groups_w2 is not None, (
+                "num_groups_w2 must be provided for weight scales"
+            )
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
+        shape_map = {
+            "w13_weight": {
+                "Flashinfer": (
+                    num_experts,
+                    w13_num_shards * intermediate_size_per_partition,
+                    hidden_size // self.packed_factor,
+                ),
+                "Marlin": (
+                    num_experts,
+                    hidden_size // self.packed_factor,
+                    w13_num_shards * intermediate_size_per_partition,
+                ),
+            },
+            "w13_scale": {
+                "Flashinfer": (
+                    num_experts,
+                    w13_num_shards * intermediate_size_per_partition,
+                    num_groups_w13,
+                ),
+                "Marlin": (
+                    num_experts,
+                    num_groups_w13,
+                    w13_num_shards * intermediate_size_per_partition,
+                ),
+            },
+            "w2_weight": {
+                "Flashinfer": (
+                    num_experts,
+                    hidden_size,
+                    intermediate_size_per_partition // self.packed_factor,
+                ),
+                "Marlin": (
+                    num_experts,
+                    intermediate_size_per_partition // self.packed_factor,
+                    hidden_size,
+                ),
+            },
+            "w2_scale": {
+                "Flashinfer": (num_experts, hidden_size, num_groups_w2),
+                "Marlin": (num_experts, num_groups_w2, hidden_size),
+            },
+        }
+        return shape_map[weight_name][self.kernel_backend]
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        intermediate_size_full = extra_weight_attrs.pop("intermediate_size_full")
+
+        # Will transpose the loaded weight along the
+        # intermediate and hidden dim sizes. Will
+        # shard for TP along the transposed dims
+        is_transposed = self.kernel_backend != "Flashinfer"
+        extra_weight_attrs.update(
+            {"is_transposed": is_transposed, "quant_method": self.strategy}
+        )
+
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                *self.get_weight_shape(
+                    "w13_weight",
+                    num_experts,
+                    hidden_size,
+                    intermediate_size_per_partition,
+                ),
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_packed", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                *self.get_weight_shape(
+                    "w2_weight",
+                    num_experts,
+                    hidden_size,
+                    intermediate_size_per_partition,
+                ),
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_packed", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # In the case where we have actorder/g_idx,
+        # we do not partition the w2 scales
+        load_full_w2 = self.actorder and self.group_size != -1
+        w2_scales_size = (
+            intermediate_size_full if load_full_w2 else intermediate_size_per_partition
+        )
+
+        self.is_k_full = (not self.actorder) or (
+            intermediate_size_per_partition == intermediate_size_full
+        )
+
+        if self.strategy == "channel":
+            num_groups_w2 = num_groups_w13 = 1
+            self.group_size = -1
+        else:
+            num_groups_w2 = w2_scales_size // self.group_size
+            num_groups_w13 = hidden_size // self.group_size
+
+        layer.num_groups_w13 = num_groups_w13
+        layer.num_groups_w2 = num_groups_w2
+
+        w13_scale = torch.nn.Parameter(
+            torch.ones(
+                *self.get_weight_shape(
+                    "w13_scale",
+                    num_experts,
+                    hidden_size,
+                    intermediate_size_per_partition,
+                    num_groups_w13=num_groups_w13,
+                ),
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_scale)
+        set_weight_attrs(w13_scale, extra_weight_attrs)
+
+        w2_scale = torch.nn.Parameter(
+            torch.ones(
+                *self.get_weight_shape(
+                    "w2_scale",
+                    num_experts,
+                    hidden_size,
+                    intermediate_size_per_partition,
+                    num_groups_w2=num_groups_w2,
+                ),
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_scale)
+        set_weight_attrs(w2_scale, extra_weight_attrs)
+        set_weight_attrs(w2_scale, {"load_full_w2": load_full_w2})
+
+        w2_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+        layer.register_parameter("w2_weight_shape", w2_weight_shape)
+        set_weight_attrs(w2_weight_shape, extra_weight_attrs)
+        w13_weight_shape = torch.nn.Parameter(
+            torch.empty(num_experts, 2), requires_grad=False
+        )
+
+        layer.register_parameter("w13_weight_shape", w13_weight_shape)
+        set_weight_attrs(w13_weight_shape, extra_weight_attrs)
+
+        w13_g_idx = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_g_idx", w13_g_idx)
+        set_weight_attrs(w13_g_idx, extra_weight_attrs)
+
+        w2_g_idx = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                intermediate_size_per_partition,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_g_idx", w2_g_idx)
+        set_weight_attrs(w2_g_idx, extra_weight_attrs)
+
+        w13_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_g_idx_sort_indices", w13_g_idx_sort_indices)
+        set_weight_attrs(w13_g_idx_sort_indices, extra_weight_attrs)
+
+        w2_g_idx_sort_indices = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                intermediate_size_per_partition,
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_g_idx_sort_indices", w2_g_idx_sort_indices)
+        set_weight_attrs(w2_g_idx_sort_indices, extra_weight_attrs)
+
+        layer.a13_scale = None
+        layer.a2_scale = None
+        layer.marlin_state = GPTQMarlinState.REPACK
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        num_experts = layer.w13_weight_g_idx.shape[0]
+        device = layer.w13_weight_g_idx.device
+        if self.kernel_backend == "Flashinfer":
+            dict_weights_mxint4 = prepare_static_weights_for_trtllm_mxint4_moe(
+                layer.w13_weight_packed,
+                layer.w13_weight_scale,
+                layer.w2_weight_packed,
+                layer.w2_weight_scale,
+            )
+            replace_parameter(
+                layer, "w13_weight_packed", dict_weights_mxint4["gemm1_weights"]
+            )
+            replace_parameter(
+                layer, "w13_weight_scale", dict_weights_mxint4["gemm1_scales"]
+            )
+            replace_parameter(
+                layer, "w2_weight_packed", dict_weights_mxint4["gemm2_weights"]
+            )
+            replace_parameter(
+                layer, "w2_weight_scale", dict_weights_mxint4["gemm2_scales"]
+            )
+            return None
+
+        is_a_8bit = (
+            self.marlin_input_dtype is not None
+            and self.marlin_input_dtype.itemsize == 1
+        )
+
+        if self.marlin_input_dtype == torch.float8_e4m3fn:
+            # NOTE: for non-zp quantization format only
+            ops.marlin_int4_fp8_preprocess(layer.w13_weight_packed, inplace=True)
+            ops.marlin_int4_fp8_preprocess(layer.w2_weight_packed, inplace=True)
+            layer.w13_weight_scale.data = layer.w13_weight_scale.data * 512
+            layer.w2_weight_scale.data = layer.w2_weight_scale.data * 512
+
+        # when running models with grouped act order,
+        # resort to g_idx values provided in checkpoint
+        if self.actorder == "group":
+            w13_g_idx_sort_indices = torch.empty_like(layer.w13_weight_g_idx)
+            w2_g_idx_sort_indices = torch.empty_like(layer.w2_weight_g_idx)
+            w13_sorted_g_idx = torch.empty_like(layer.w13_weight_g_idx)
+            w2_sorted_g_idx = torch.empty_like(layer.w2_weight_g_idx)
+
+            for e in range(num_experts):
+                w13_g_idx_sort_indices[e] = torch.argsort(layer.w13_weight_g_idx[e]).to(
+                    torch.int32
+                )
+                w2_g_idx_sort_indices[e] = torch.argsort(layer.w2_weight_g_idx[e]).to(
+                    torch.int32
+                )
+                w13_sorted_g_idx[e] = layer.w13_weight_g_idx[e][
+                    w13_g_idx_sort_indices[e]
+                ]
+                w2_sorted_g_idx[e] = layer.w2_weight_g_idx[e][w2_g_idx_sort_indices[e]]
+
+            replace_parameter(layer, "w13_weight_g_idx", w13_sorted_g_idx)
+            replace_parameter(layer, "w2_weight_g_idx", w2_sorted_g_idx)
+            replace_parameter(layer, "w13_g_idx_sort_indices", w13_g_idx_sort_indices)
+            replace_parameter(layer, "w2_g_idx_sort_indices", w2_g_idx_sort_indices)
+
+        else:
+            layer.w13_weight_g_idx = torch.nn.Parameter(
+                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+                requires_grad=False,
+            )
+            layer.w2_weight_g_idx = torch.nn.Parameter(
+                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+                requires_grad=False,
+            )
+            layer.w13_g_idx_sort_indices = torch.nn.Parameter(
+                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+                requires_grad=False,
+            )
+            layer.w2_g_idx_sort_indices = torch.nn.Parameter(
+                torch.empty((num_experts, 0), dtype=torch.int32, device=device),
+                requires_grad=False,
+            )
+
+        marlin_w13_qweight = ops.gptq_marlin_moe_repack(
+            layer.w13_weight_packed,
+            layer.w13_g_idx_sort_indices,
+            layer.w13_weight_packed.shape[1] * self.packed_factor,
+            layer.w13_weight_packed.shape[2],
+            self.num_bits,
+            is_a_8bit=is_a_8bit,
+        )
+        replace_parameter(layer, "w13_weight_packed", marlin_w13_qweight)
+
+        marlin_w2_qweight = ops.gptq_marlin_moe_repack(
+            layer.w2_weight_packed,
+            layer.w2_g_idx_sort_indices,
+            layer.w2_weight_packed.shape[1] * self.packed_factor,
+            layer.w2_weight_packed.shape[2],
+            self.num_bits,
+            is_a_8bit=is_a_8bit,
+        )
+        replace_parameter(layer, "w2_weight_packed", marlin_w2_qweight)
+
+        # Repack scales
+        marlin_w13_scales = marlin_moe_permute_scales(
+            s=layer.w13_weight_scale,
+            size_k=layer.w13_weight_packed.shape[2],
+            size_n=layer.w13_weight_scale.shape[2],
+            group_size=self.group_size,
+            is_a_8bit=is_a_8bit,
+        )
+        if self.marlin_input_dtype == torch.int8 and layer.num_groups_w13 > 1:
+            marlin_w13_scales, w13_input_global_scale = marlin_act_int8_process_scales(
+                marlin_w13_scales
+            )
+            layer.register_parameter(
+                "w13_input_global_scale",
+                torch.nn.Parameter(w13_input_global_scale, requires_grad=False),
+            )
+        replace_parameter(layer, "w13_weight_scale", marlin_w13_scales)
+
+        marlin_w2_scales = marlin_moe_permute_scales(
+            s=layer.w2_weight_scale,
+            size_k=layer.w2_weight_scale.shape[1]
+            * (self.group_size if self.group_size != -1 else self.packed_factor),
+            size_n=layer.w2_weight_scale.shape[2],
+            group_size=self.group_size,
+            is_a_8bit=is_a_8bit,
+        )
+        if self.marlin_input_dtype == torch.int8 and layer.num_groups_w2 > 1:
+            marlin_w2_scales, w2_input_global_scale = marlin_act_int8_process_scales(
+                marlin_w2_scales
+            )
+            layer.register_parameter(
+                "w2_input_global_scale",
+                torch.nn.Parameter(w2_input_global_scale, requires_grad=False),
+            )
+        replace_parameter(layer, "w2_weight_scale", marlin_w2_scales)
+
+        layer.workspace = marlin_make_workspace_new(device, 4)
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        if self.num_bits != 4:
+            return None
+        return int4_w4a16_moe_quant_config(
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            w1_zp=None,
+            w2_zp=None,
+            block_shape=[0, self.group_size],
+        )
+
+    def select_gemm_impl(
+        self,
+        prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
+        layer: torch.nn.Module,
+    ) -> mk.FusedMoEExpertsModular:
+        assert self.num_bits == 4, "only supporting w4"
+        layer.w13_weight = layer.w13_weight_packed
+        layer.w2_weight = layer.w2_weight_packed
+        assert all([w is not None for w in [layer.w13_weight, layer.w2_weight]])
+        assert self.moe_quant_config is not None
+        if (
+            prepare_finalize.activation_format
+            == mk.FusedMoEActivationFormat.BatchedExperts
+        ):
+            max_num_tokens_per_rank = prepare_finalize.max_num_tokens_per_rank()
+            assert max_num_tokens_per_rank is not None
+            return BatchedMarlinExperts(
+                max_num_tokens=max_num_tokens_per_rank,
+                num_dispatchers=prepare_finalize.num_dispatchers(),
+                moe_config=self.moe,
+                quant_config=self.moe_quant_config,
+                w13_g_idx=layer.w13_weight_g_idx,
+                w2_g_idx=layer.w2_weight_g_idx,
+                w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
+                w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
+                is_k_full=self.is_k_full,
+            )
+        else:
+            return MarlinExperts(
+                moe_config=self.moe,
+                quant_config=self.moe_quant_config,
+                w13_g_idx=layer.w13_weight_g_idx,
+                w2_g_idx=layer.w2_weight_g_idx,
+                w13_g_idx_sort_indices=layer.w13_g_idx_sort_indices,
+                w2_g_idx_sort_indices=layer.w2_g_idx_sort_indices,
+                is_k_full=self.is_k_full,
+            )
+
+    @property
+    def is_monolithic(self) -> bool:
+        return self.kernel_backend == "Flashinfer"
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.kernel_backend == "Flashinfer"
+        return flashinfer_trtllm_mxint4_moe(
+            x=x,
+            router_logits=router_logits,
+            w13_weight_packed=layer.w13_weight_packed,
+            w13_weight_scale=layer.w13_weight_scale,
+            w2_weight_packed=layer.w2_weight_packed,
+            w2_weight_scale=layer.w2_weight_scale,
+            global_num_experts=layer.global_num_experts,
+            top_k=layer.top_k,
+            intermediate_size_per_partition=layer.intermediate_size_per_partition,
+            local_num_experts=layer.local_num_experts,
+            ep_rank=layer.ep_rank,
+            num_expert_group=layer.num_expert_group,
+            topk_group=layer.topk_group,
+            e_score_correction_bias=layer.e_score_correction_bias,
+            routing_method_type=layer.routing_method_type,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert self.kernel_backend == "Marlin"
+        return fused_marlin_moe(
+            x,
+            layer.w13_weight_packed,
+            layer.w2_weight_packed,
+            None,
+            None,
+            layer.w13_weight_scale,
+            layer.w2_weight_scale,
+            topk_weights,
+            topk_ids,
+            input_global_scale1=getattr(layer, "w13_input_global_scale", None),
+            input_global_scale2=getattr(layer, "w2_input_global_scale", None),
+            quant_type_id=self.quant_type.id,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            global_num_experts=layer.global_num_experts,
+            activation=layer.activation,
+            expert_map=layer.expert_map,
+            g_idx1=layer.w13_weight_g_idx,
+            g_idx2=layer.w2_weight_g_idx,
+            sort_indices1=layer.w13_g_idx_sort_indices,
+            sort_indices2=layer.w2_g_idx_sort_indices,
+            workspace=layer.workspace,
+            input_dtype=self.marlin_input_dtype,
+            is_k_full=self.is_k_full,
+            inplace=not self.moe.disable_inplace,
+        )
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py
index c9dd98dfd4e0..6aacd9e7ae57 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py
@@ -2,19 +2,17 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from .compressed_tensors_scheme import CompressedTensorsScheme
+from .compressed_tensors_w4a4_mxfp4 import CompressedTensorsW4A4Mxfp4
 from .compressed_tensors_w4a4_nvfp4 import CompressedTensorsW4A4Fp4
 from .compressed_tensors_w4a8_fp8 import CompressedTensorsW4A8Fp8
 from .compressed_tensors_w4a8_int import CompressedTensorsW4A8Int
-from .compressed_tensors_w4a16_mxfp4 import CompressedTensorsW4A16Mxfp4
 from .compressed_tensors_w4a16_nvfp4 import CompressedTensorsW4A16Fp4
 from .compressed_tensors_w8a8_fp8 import CompressedTensorsW8A8Fp8
 from .compressed_tensors_w8a8_int8 import CompressedTensorsW8A8Int8
+from .compressed_tensors_w8a8_mxfp8 import CompressedTensorsW8A8Mxfp8
 from .compressed_tensors_w8a16_fp8 import CompressedTensorsW8A16Fp8
 from .compressed_tensors_wNa16 import WNA16_SUPPORTED_BITS, CompressedTensorsWNA16
 
-# This avoids circular import error
-from .compressed_tensors_24 import CompressedTensors24  # isort: skip
-
 __all__ = [
     "CompressedTensorsScheme",
     "CompressedTensorsWNA16",
@@ -22,10 +20,10 @@
     "CompressedTensorsW8A8Int8",
     "CompressedTensorsW8A8Fp8",
     "WNA16_SUPPORTED_BITS",
-    "CompressedTensors24",
     "CompressedTensorsW4A16Fp4",
-    "CompressedTensorsW4A16Mxfp4",
+    "CompressedTensorsW4A4Mxfp4",
     "CompressedTensorsW4A4Fp4",
     "CompressedTensorsW4A8Int",
     "CompressedTensorsW4A8Fp8",
+    "CompressedTensorsW8A8Mxfp8",
 ]
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
deleted file mode 100644
index e28bc36368be..000000000000
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from collections.abc import Callable
-from typing import Any
-
-import torch
-from compressed_tensors.quantization import (
-    QuantizationArgs,
-)
-
-from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
-    CompressedTensorsScheme,
-)
-
-__all__ = ["CompressedTensors24"]
-
-
-class CompressedTensors24(CompressedTensorsScheme):
-    def __init__(
-        self,
-        quantized: bool = False,
-        weight_quant: QuantizationArgs | None = None,
-        input_quant: QuantizationArgs | None = None,
-        model_compression_config: dict[str, Any] | None = None,
-    ):
-        raise NotImplementedError("Sparse24 models are no longer supported by vLLM")
-
-    @classmethod
-    def get_min_capability(cls) -> int:
-        raise NotImplementedError("Sparse24 models are no longer supported by vLLM")
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        input_size: int,
-        output_partition_sizes: list[int],
-        input_size_per_partition: int,
-        params_dtype: torch.dtype,
-        weight_loader: Callable,
-        **kwargs,
-    ):
-        raise NotImplementedError("Sparse24 models are no longer supported by vLLM")
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        raise NotImplementedError("Sparse24 models are no longer supported by vLLM")
-
-    def apply_weights(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        raise NotImplementedError("Sparse24 models are no longer supported by vLLM")
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_mxfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_mxfp4.py
similarity index 76%
rename from vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_mxfp4.py
rename to vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_mxfp4.py
index 77cea0f83e1c..7b994e10e448 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_mxfp4.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_mxfp4.py
@@ -5,24 +5,21 @@
 import torch
 from torch.nn.parameter import Parameter
 
+from vllm.model_executor.kernels.linear import init_mxfp4_linear_kernel
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     CompressedTensorsScheme,
 )
-from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
-    apply_fp4_marlin_linear,
-    prepare_fp4_layer_for_marlin,
-)
 from vllm.model_executor.parameter import (
     GroupQuantScaleParameter,
     ModelWeightParameter,
 )
 
-__all__ = ["CompressedTensorsW4A16Mxfp4"]
+__all__ = ["CompressedTensorsW4A4Mxfp4"]
 
 
-class CompressedTensorsW4A16Mxfp4(CompressedTensorsScheme):
+class CompressedTensorsW4A4Mxfp4(CompressedTensorsScheme):
     """
-    Compressed tensors scheme for MXFP4 weight-only quantization.
+    Compressed tensors scheme for MXFP4.
 
     Supports models quantized with the compressed-tensors mxfp4-pack-quantized
     format.
@@ -31,10 +28,14 @@ class CompressedTensorsW4A16Mxfp4(CompressedTensorsScheme):
     - 4-bit float weights (E2M1) packed into uint8
     - Per-group E8M0 scales with group_size=32
     - No global scale (unlike NVFP4)
+
+    On SM100+ with FlashInfer: true W4A4 (activations dynamically quantized).
+    Otherwise: W4A16 weight-only via Marlin.
     """
 
     def __init__(self):
         self.group_size = 32
+        self.kernel = init_mxfp4_linear_kernel()
 
     @classmethod
     def get_min_capability(cls) -> int:
@@ -82,11 +83,9 @@ def create_weights(
         layer.register_parameter("weight_scale", weight_scale)
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        # Rename weight_packed to weight that marlin expects
         layer.weight = Parameter(layer.weight_packed.data, requires_grad=False)
         del layer.weight_packed
-
-        prepare_fp4_layer_for_marlin(layer)
+        self.kernel.process_weights_after_loading(layer)
 
     def apply_weights(
         self,
@@ -94,13 +93,4 @@ def apply_weights(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        return apply_fp4_marlin_linear(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            weight_global_scale=None,
-            workspace=layer.workspace,
-            size_n=layer.output_size_per_partition,
-            size_k=layer.input_size_per_partition,
-            bias=bias,
-        )
+        return self.kernel.apply_weights(layer, x, bias)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py
index a3b53626bf6c..c818f334589b 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py
@@ -5,26 +5,26 @@
 import torch
 from torch.nn.parameter import Parameter
 
+from vllm.logger import init_logger
+from vllm.model_executor.kernels.linear import init_nvfp4_linear_kernel
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     CompressedTensorsScheme,
 )
-from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
-    apply_nvfp4_linear,
-    convert_to_nvfp4_linear_kernel_format,
-    select_nvfp4_linear_backend,
-)
 from vllm.model_executor.parameter import (
     GroupQuantScaleParameter,
     ModelWeightParameter,
     PerTensorScaleParameter,
 )
 
+logger = init_logger(__name__)
+
+
 __all__ = ["CompressedTensorsW4A4Fp4"]
 
 
 class CompressedTensorsW4A4Fp4(CompressedTensorsScheme):
     def __init__(self):
-        self.backend = select_nvfp4_linear_backend()
+        self.kernel = init_nvfp4_linear_kernel()
         self.group_size = 16
 
     @classmethod
@@ -89,6 +89,19 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         # Rename CT checkpoint names to standardized names
         layer.weight = layer.weight_packed
         del layer.weight_packed
+
+        if (
+            torch.unique(layer.input_global_scale).numel() != 1
+            or torch.unique(layer.weight_global_scale).numel() != 1
+        ):
+            logger.warning_once(
+                "In NVFP4 linear, the global scale for input or weight are different"
+                " for parallel layers (e.g. q_proj, k_proj, v_proj). This "
+                " will likely result in reduced accuracy. Please verify the model"
+                " accuracy. Consider using a checkpoint with a shared global NVFP4"
+                " scale for fused layers."
+            )
+
         # Process global scales (CT stores as divisors, i.e. 1/scale)
         input_global_scale_inv = layer.input_global_scale.max().to(torch.float32)
         layer.input_global_scale = Parameter(
@@ -108,7 +121,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         )
 
         # Convert layer to NVFP4 linear kernel format
-        convert_to_nvfp4_linear_kernel_format(self.backend, layer)
+        self.kernel.process_weights_after_loading(layer)
 
     def apply_weights(
         self,
@@ -116,9 +129,4 @@ def apply_weights(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        return apply_nvfp4_linear(
-            backend=self.backend,
-            layer=layer,
-            x=x,
-            bias=bias,
-        )
+        return self.kernel.apply_weights(layer=layer, x=x, bias=bias)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
index 7bffc3218b42..42b35a420cab 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
@@ -6,45 +6,49 @@
 import torch
 from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
 
+from vllm.config import get_current_vllm_config
+from vllm.model_executor.kernels.linear import (
+    init_wfp8_a16_linear_kernel,
+)
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     CompressedTensorsScheme,
 )
+from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+    STRATEGY_TO_PARAMETER_TYPE,
+    STRATEGY_TO_WEIGHT_QUANT_KEY,
+)
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     create_fp8_scale_parameter,
     create_fp8_weight_parameter,
-    process_fp8_weight_block_strategy,
     validate_fp8_block_shape,
 )
-from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
-    apply_fp8_marlin_linear,
-    prepare_fp8_layer_for_marlin,
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8DynamicTensorSym,
+    kFp8StaticTensorSym,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     convert_to_channelwise,
 )
-from vllm.model_executor.parameter import (
-    BlockQuantScaleParameter,
-    ChannelQuantScaleParameter,
-    PerTensorScaleParameter,
-)
+from vllm.model_executor.parameter import PerTensorScaleParameter
 from vllm.model_executor.utils import replace_parameter
 
 __all__ = ["CompressedTensorsW8A16Fp8"]
 
-strategy_to_parameter_type = {
-    QuantizationStrategy.BLOCK: BlockQuantScaleParameter,
-    QuantizationStrategy.CHANNEL: ChannelQuantScaleParameter,
-    QuantizationStrategy.TENSOR: PerTensorScaleParameter,
-}
-
 
 class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
     def __init__(self, weight_quant: QuantizationArgs, is_static_input_scheme: bool):
         self.weight_quant = weight_quant
         self.strategy = weight_quant.strategy
+        self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
         self.is_static_input_scheme = is_static_input_scheme
         self.weight_block_size = self.weight_quant.block_structure
 
+        self.weight_quant_key = STRATEGY_TO_WEIGHT_QUANT_KEY[self.strategy]
+        self.activation_quant_key = (
+            kFp8StaticTensorSym if is_static_input_scheme else kFp8DynamicTensorSym
+        )
+
     @classmethod
     def get_min_capability(cls) -> int:
         # turing and up
@@ -89,7 +93,7 @@ def create_weights(
 
         # WEIGHT SCALE
         weight_scale = create_fp8_scale_parameter(
-            strategy_to_parameter_type[self.strategy],
+            STRATEGY_TO_PARAMETER_TYPE[self.strategy],
             output_partition_sizes,
             input_size_per_partition,
             layer.weight_block_size,
@@ -105,32 +109,36 @@ def create_weights(
             )
             layer.register_parameter("input_scale", input_scale)
 
+        self.linear_kernel = init_wfp8_a16_linear_kernel(
+            weight_quant_key=self.weight_quant_key,
+            activation_quant_key=self.activation_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+        )
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        weight = layer.weight
-        weight_scale = layer.weight_scale
-        size_k_first = True
-        # TODO(rob): refactor block quant into separate class.
         if self.strategy == QuantizationStrategy.BLOCK:
             assert self.is_static_input_scheme is False
-            size_k_first = False
-            weight, weight_scale = process_fp8_weight_block_strategy(
-                weight, weight_scale
-            )
+            # MarlinFP8ScaledMMLinearKernel uses "weight_scale_inv" for block
+            # quant, while CT registers the scale as "weight_scale".
+            # Rename by deleting the old parameter and adding the new one so
+            # that prepare_fp8_layer_for_marlin (which prefers "weight_scale"
+            # over "weight_scale_inv") picks up "weight_scale_inv" correctly.
+            weight_scale_data = layer.weight_scale.data
+            del layer._parameters["weight_scale"]
+            replace_parameter(layer, "weight_scale_inv", weight_scale_data)
         else:
-            # Weights must be transposed for marlin
-            weight = weight.t()
             if self.strategy == QuantizationStrategy.TENSOR:
-                # If we have a fused module (QKV, MLP) with per tensor scales,
-                # we expand each scale to its shard's channels.
-                weight_scale = convert_to_channelwise(
-                    weight_scale, layer.logical_widths
+                # For fused modules with per-tensor scales, expand each scale
+                # to its shard's channels.
+                replace_parameter(
+                    layer,
+                    "weight_scale",
+                    convert_to_channelwise(layer.weight_scale, layer.logical_widths),
                 )
 
-        # Update layer with new values
-        replace_parameter(layer, "weight", weight.data)
-        replace_parameter(layer, "weight_scale", weight_scale.data)
-
-        prepare_fp8_layer_for_marlin(layer, size_k_first=size_k_first)
+        self.linear_kernel.process_weights_after_loading(layer)
 
     def apply_weights(
         self,
@@ -138,12 +146,4 @@ def apply_weights(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        return apply_fp8_marlin_linear(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            workspace=layer.workspace,
-            size_n=layer.output_size_per_partition,
-            size_k=layer.input_size_per_partition,
-            bias=bias,
-        )
+        return self.linear_kernel.apply_weights(layer, x, bias)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
index 9c4914e68778..7445634a8253 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -8,6 +8,7 @@
 from torch.nn import Parameter
 
 from vllm._aiter_ops import rocm_aiter_ops
+from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     init_fp8_linear_kernel,
@@ -15,40 +16,30 @@
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     CompressedTensorsScheme,
 )
+from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+    STRATEGY_TO_PARAMETER_TYPE,
+)
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    W8A8BlockFp8LinearOp,
     create_fp8_input_scale,
     create_fp8_scale_parameter,
     create_fp8_weight_parameter,
-    maybe_post_process_fp8_weight_block,
-    process_fp8_weight_block_strategy,
     process_fp8_weight_channel_strategy,
     process_fp8_weight_tensor_strategy,
     validate_fp8_block_shape,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    create_fp8_quant_key,
     kFp8DynamicTokenSym,
+    kFp8StaticChannelSym,
     kFp8StaticTensorSym,
-    kFp8StaticTokenSym,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     cutlass_block_fp8_supported,
 )
-from vllm.model_executor.parameter import (
-    BlockQuantScaleParameter,
-    ChannelQuantScaleParameter,
-    PerTensorScaleParameter,
-)
 
 __all__ = ["CompressedTensorsW8A8Fp8"]
 
-strategy_to_parameter_type = {
-    QuantizationStrategy.BLOCK: BlockQuantScaleParameter,
-    QuantizationStrategy.CHANNEL: ChannelQuantScaleParameter,
-    QuantizationStrategy.TENSOR: PerTensorScaleParameter,
-}
-
 STATIC_QUANT = True
 DYNAMIC_QUANT = False
 activation_quant_key_mapping = {
@@ -56,7 +47,7 @@
     DYNAMIC_QUANT: kFp8DynamicTokenSym,
 }
 weight_quant_key_mapping = {
-    QuantizationStrategy.CHANNEL: kFp8StaticTokenSym,
+    QuantizationStrategy.CHANNEL: kFp8StaticChannelSym,
     QuantizationStrategy.TENSOR: kFp8StaticTensorSym,
 }
 logger = init_logger(__name__)
@@ -67,6 +58,7 @@ def __init__(self, weight_quant: QuantizationArgs, is_static_input_scheme: bool)
         self.weight_quant = weight_quant
         self.strategy = weight_quant.strategy
         self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
         self.is_static_input_scheme = is_static_input_scheme
         self.weight_block_size = self.weight_quant.block_structure
 
@@ -75,21 +67,17 @@ def __init__(self, weight_quant: QuantizationArgs, is_static_input_scheme: bool)
             self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled()
             assert not self.is_static_input_scheme
             self.act_q_group_shape = GroupShape(1, self.weight_block_size[0])
-            self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp(
-                weight_group_shape=GroupShape(*self.weight_block_size),
-                act_quant_group_shape=self.act_q_group_shape,
-                cutlass_block_fp8_supported=self.cutlass_block_fp8_supported,
-                use_aiter_and_is_supported=self.use_aiter_and_is_supported,
+            self.weight_quant_key = create_fp8_quant_key(
+                static=True, group_shape=GroupShape(*self.weight_block_size)
             )
-        else:
-            activation_quant_key = activation_quant_key_mapping[is_static_input_scheme]
-            weight_quant_key = weight_quant_key_mapping[self.strategy]
-            self.fp8_linear = init_fp8_linear_kernel(
-                activation_quant_key=activation_quant_key,
-                weight_quant_key=weight_quant_key,
-                out_dtype=self.out_dtype,
-                module_name=self.__class__.__name__,
+            self.activation_quant_key = create_fp8_quant_key(
+                static=False, group_shape=self.act_q_group_shape
             )
+        else:
+            self.activation_quant_key = activation_quant_key_mapping[
+                self.is_static_input_scheme
+            ]
+            self.weight_quant_key = weight_quant_key_mapping[self.strategy]
 
     @classmethod
     def get_min_capability(cls) -> int:
@@ -133,7 +121,7 @@ def create_weights(
 
         # WEIGHT SCALE
         weight_scale = create_fp8_scale_parameter(
-            strategy_to_parameter_type[self.strategy],
+            STRATEGY_TO_PARAMETER_TYPE[self.strategy],
             output_partition_sizes,
             input_size_per_partition,
             layer.weight_block_size,
@@ -146,6 +134,15 @@ def create_weights(
             input_scale = create_fp8_input_scale(output_partition_sizes, weight_loader)
             layer.register_parameter("input_scale", input_scale)
 
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            weight_shape=(output_size_per_partition, input_size_per_partition),
+            module_name=self.__class__.__name__,
+        )
+
     def process_weights_after_loading(self, layer) -> None:
         if self.strategy == QuantizationStrategy.TENSOR:
             weight, weight_scale, input_scale = process_fp8_weight_tensor_strategy(
@@ -163,10 +160,12 @@ def process_weights_after_loading(self, layer) -> None:
 
         elif self.strategy == QuantizationStrategy.BLOCK:
             assert self.is_static_input_scheme is False
-            weight, weight_scale = process_fp8_weight_block_strategy(
-                layer.weight, layer.weight_scale
-            )
-            input_scale = None
+            self.fp8_linear.process_weights_after_loading(layer)
+
+            layer.input_scale = None
+            # fp8_linear.process_weights_after_loading applies the post process
+            # and reassigns the weight and weight_scale buffers to layer attributes.
+            return
 
         else:
             raise ValueError(
@@ -185,8 +184,6 @@ def process_weights_after_loading(self, layer) -> None:
             layer.input_scale = Parameter(layer.input_scale.max(), requires_grad=False)
         else:
             layer.input_scale = None
-        if self.strategy == QuantizationStrategy.BLOCK:
-            maybe_post_process_fp8_weight_block(layer)
 
         if hasattr(self, "fp8_linear"):
             self.fp8_linear.process_weights_after_loading(layer)
@@ -197,13 +194,4 @@ def apply_weights(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        if self.weight_block_size is not None:
-            return self.w8a8_block_fp8_linear.apply(
-                input=x,
-                weight=layer.weight,
-                weight_scale=layer.weight_scale,
-                input_scale=layer.input_scale,
-                bias=bias,
-            )
-
         return self.fp8_linear.apply_weights(layer, x, bias)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_mxfp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_mxfp8.py
new file mode 100644
index 000000000000..5c511fc98d99
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_mxfp8.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+
+import torch
+
+from vllm.model_executor.kernels.linear import init_mxfp8_linear_kernel
+from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme,
+)
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    MXFP8_BLOCK_SIZE,
+    MXFP8_SCALE_DTYPE,
+    MXFP8_VALUE_DTYPE,
+)
+from vllm.model_executor.parameter import (
+    GroupQuantScaleParameter,
+    ModelWeightParameter,
+)
+
+__all__ = ["CompressedTensorsW8A8Mxfp8"]
+
+
+class CompressedTensorsW8A8Mxfp8(CompressedTensorsScheme):
+    """
+    Compressed tensors scheme for MXFP8 quantization (W8A8).
+
+    Loads pre-quantized MXFP8 weights from compressed-tensors checkpoints.
+    Activations are dynamically quantized to MXFP8 at runtime.
+
+    MXFP8 format:
+    - 8-bit float weights (E4M3) stored as float8_e4m3fn
+    - Per-group E8M0 scales (uint8) with group_size=32
+    - Activations dynamically quantized to MXFP8 during inference
+    """
+
+    def __init__(self):
+        self.kernel = init_mxfp8_linear_kernel()
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        return 75
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        output_partition_sizes: list[int],
+        input_size_per_partition: int,
+        params_dtype: torch.dtype,
+        weight_loader: Callable,
+        **kwargs,
+    ):
+        output_size_per_partition = sum(output_partition_sizes)
+        layer.logical_widths = output_partition_sizes
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_size_per_partition = output_size_per_partition
+        layer.params_dtype = params_dtype
+
+        weight = ModelWeightParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition,
+                dtype=MXFP8_VALUE_DTYPE,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight", weight)
+
+        weight_scale = GroupQuantScaleParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition // MXFP8_BLOCK_SIZE,
+                dtype=MXFP8_SCALE_DTYPE,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight_scale", weight_scale)
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        self.kernel.process_weights_after_loading(layer)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.kernel.apply_weights(layer, x, bias)
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
index f88092169110..def4797b1396 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
@@ -6,8 +6,36 @@
 
 import regex as re
 from compressed_tensors import CompressionFormat
+from compressed_tensors.quantization import QuantizationStrategy
 from torch.nn import Module
 
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8Static128BlockSym,
+    kFp8StaticChannelSym,
+    kFp8StaticTensorSym,
+)
+from vllm.model_executor.parameter import (
+    BlockQuantScaleParameter,
+    ChannelQuantScaleParameter,
+    PerTensorScaleParameter,
+)
+
+# Maps quantization strategy to the corresponding scale parameter type.
+# Shared across compressed-tensor scheme classes (w8a16_fp8, w8a8_fp8, …).
+STRATEGY_TO_PARAMETER_TYPE = {
+    QuantizationStrategy.BLOCK: BlockQuantScaleParameter,
+    QuantizationStrategy.CHANNEL: ChannelQuantScaleParameter,
+    QuantizationStrategy.TENSOR: PerTensorScaleParameter,
+}
+
+# Maps quantization strategy to the vLLM weight-quant key used for
+# kernel selection.  Shared across compressed-tensor scheme classes.
+STRATEGY_TO_WEIGHT_QUANT_KEY = {
+    QuantizationStrategy.BLOCK: kFp8Static128BlockSym,
+    QuantizationStrategy.CHANNEL: kFp8StaticChannelSym,
+    QuantizationStrategy.TENSOR: kFp8StaticTensorSym,
+}
+
 
 def is_activation_quantization_format(format: str) -> bool:
     _ACTIVATION_QUANTIZATION_FORMATS = [
@@ -87,7 +115,7 @@ def find_matched_target(
     module: Module,
     targets: Iterable[str],
     fused_mapping: Mapping[str, list[str]] = MappingProxyType({}),
-) -> str:
+) -> str | None:
     """
     Helper function to look up which "target" in the compressed-tensors
     config that a layer corresponds to.
@@ -122,12 +150,6 @@ def find_matched_target(
         or _match_fused_layer(layer_name, targets, fused_mapping)
     )
 
-    if matched_target is None:
-        raise ValueError(
-            f"Unable to find matching target for {layer_name} in the "
-            "compressed-tensors config."
-        )
-
     return matched_target
 
 
diff --git a/vllm/model_executor/layers/quantization/cpu_wna16.py b/vllm/model_executor/layers/quantization/cpu_wna16.py
index ea7afef27ebd..6853013af88d 100644
--- a/vllm/model_executor/layers/quantization/cpu_wna16.py
+++ b/vllm/model_executor/layers/quantization/cpu_wna16.py
@@ -5,10 +5,10 @@
 
 import torch
 from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
+from transformers import PretrainedConfig
 
-from vllm._custom_ops import (
-    cpu_gemm_wna16,
-)
+import vllm.envs as envs
+from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.model_executor.layers.linear import (
     LinearBase,
@@ -104,7 +104,7 @@ def from_config(cls, config: dict[str, Any]) -> "CPUAWQConfig":
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> "QuantizationMethods | None":
         quant_method = hf_quant_cfg.get("quant_method", "").lower()
         if current_platform.is_cpu() and (quant_method == "awq"):
@@ -133,7 +133,12 @@ def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
                 self.modules_to_not_convert
             )
 
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
+    def maybe_update_config(
+        self,
+        model_name: str,
+        hf_config: PretrainedConfig | None = None,
+        revision: str | None = None,
+    ):
         if self.modules_to_not_convert:
             return
 
@@ -224,7 +229,14 @@ def create_weights(
         layer.register_parameter("scales", scales)
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        torch.set_printoptions(profile="full", linewidth=5000, sci_mode=False)
+        layer.use_w4a8 = envs.VLLM_CPU_INT4_W4A8 and torch.cpu._is_amx_tile_supported()
+        if layer.use_w4a8:
+            self._process_weights_sglang_int4(layer)
+        else:
+            self._process_weights_woq(layer)
+
+    def _process_weights_woq(self, layer: torch.nn.Module) -> None:
+        """Original WOQ int4 repack path."""
         packed_weight = layer.qweight.data
         packed_zeros = layer.qzeros.data
         group_num = packed_zeros.size(0)
@@ -260,8 +272,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         )
 
         zeros = pack_cols(zeros, bits, group_num, output_size).contiguous()
-        # make 16 output channel as a block and transpose to
-        # the make the block contiguous
         weight = pack_cols(weight, bits, input_size, output_size)
         weight = (
             weight.view(input_size, -1, 16 // pack_factor)
@@ -272,13 +282,43 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.qweight.data = weight
         layer.qzeros.data = zeros
 
+    def _process_weights_sglang_int4(self, layer: torch.nn.Module) -> None:
+        """SGLang INT4 W4A8 path: pack int4 weights with VNNI reordering."""
+        packed_weight = layer.qweight.data
+        packed_zeros = layer.qzeros.data
+        scales = layer.scales.data
+        blocked_w, blocked_zp, blocked_s = ops.convert_weight_packed_scale_zp(
+            packed_weight,
+            packed_zeros,
+            scales,
+            ops.CPUQuantAlgo.AWQ,
+        )
+
+        layer.packed_weight = blocked_w
+        layer.packed_qzeros = blocked_zp
+        layer.packed_scales = blocked_s
+        layer.qweight = None
+        layer.qzeros = None
+        layer.scales = None
+
     def apply(
         self,
         layer: torch.nn.Module,
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        x = cpu_gemm_wna16(
+        if layer.use_w4a8:
+            return self._apply_sglang_int4(layer, x, bias)
+        return self._apply_woq(layer, x, bias)
+
+    def _apply_woq(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Original WOQ int4 GEMM path."""
+        x = ops.cpu_gemm_wna16(
             input=x,
             q_weight=layer.qweight,
             scales=layer.scales,
@@ -290,6 +330,21 @@ def apply(
         )
         return x
 
+    def _apply_sglang_int4(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """SGLang INT4 W4A8 GEMM path."""
+        return ops.int4_scaled_mm_cpu(
+            x,
+            layer.packed_weight,
+            layer.packed_qzeros,
+            layer.packed_scales,
+            bias,
+        )
+
 
 def _get_isa_hint(dtype: torch.dtype) -> str:
     supports_amx = torch.cpu._is_amx_tile_supported()
diff --git a/vllm/model_executor/layers/quantization/experts_int8.py b/vllm/model_executor/layers/quantization/experts_int8.py
index d971f3b5b0d2..3db2916055ca 100644
--- a/vllm/model_executor/layers/quantization/experts_int8.py
+++ b/vllm/model_executor/layers/quantization/experts_int8.py
@@ -5,15 +5,8 @@
 
 import torch
 
-from vllm.distributed import get_tensor_model_parallel_rank, get_tp_group
 from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
-    FusedMoEConfig,
-    FusedMoEMethodBase,
-)
-from vllm.model_executor.layers.fused_moe.config import (
-    FusedMoEQuantConfig,
-    int8_w8a16_moe_quant_config,
+    RoutedExperts,
 )
 from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
 from vllm.model_executor.layers.quantization import QuantizationMethods
@@ -21,11 +14,18 @@
     QuantizationConfig,
     QuantizeMethodBase,
 )
-from vllm.model_executor.utils import set_weight_attrs
+from vllm.model_executor.layers.quantization.online.int8 import (
+    Int8OnlineMoEMethod,
+)
 
 
 class ExpertsInt8Config(QuantizationConfig):
-    """Config class for Int8 experts quantization."""
+    """Online int8 quantization for MoE expert weights.
+    Linear layers are left unquantized.
+
+    Backward-compatible config for ``--quantization experts_int8``.
+    Prefer ``--quantization int8_per_channel``
+    """
 
     def __init__(self) -> None:
         super().__init__()
@@ -55,150 +55,6 @@ def get_quant_method(
     ) -> "QuantizeMethodBase | None":
         if isinstance(layer, LinearBase):
             return UnquantizedLinearMethod()
-        elif isinstance(layer, FusedMoE):
-            return ExpertsInt8MoEMethod(self, layer.moe_config)
+        elif isinstance(layer, RoutedExperts):
+            return Int8OnlineMoEMethod(layer=layer)
         return None
-
-
-class ExpertsInt8MoEMethod(FusedMoEMethodBase):
-    def __init__(
-        self,
-        quant_config: ExpertsInt8Config,
-        moe: FusedMoEConfig,
-    ):
-        super().__init__(moe)
-        self.quant_config = quant_config
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        int8_dtype = torch.int8
-
-        assert "weight_loader" in extra_weight_attrs
-        weight_loader = extra_weight_attrs["weight_loader"]
-        wrapped_weight_loader = ExpertsInt8MoEMethod.quantizing_weight_loader(
-            layer, weight_loader
-        )
-        extra_weight_attrs["weight_loader"] = wrapped_weight_loader
-
-        # Fused gate_up_proj (column parallel)
-        w13_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                hidden_size,
-                dtype=int8_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight", w13_weight)
-        set_weight_attrs(w13_weight, extra_weight_attrs)
-
-        # down_proj (row parallel)
-        w2_weight = torch.nn.Parameter(
-            torch.empty(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition,
-                dtype=int8_dtype,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_weight", w2_weight)
-        set_weight_attrs(w2_weight, extra_weight_attrs)
-
-        w13_scale = torch.nn.Parameter(
-            torch.zeros(
-                num_experts, 2 * intermediate_size_per_partition, dtype=torch.float32
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_scale", w13_scale)
-
-        w2_scale = torch.nn.Parameter(
-            torch.zeros(num_experts, hidden_size, dtype=torch.float32),
-            requires_grad=False,
-        )
-        layer.register_parameter("w2_scale", w2_scale)
-
-    def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
-    ) -> FusedMoEQuantConfig | None:
-        return int8_w8a16_moe_quant_config(
-            w1_scale=layer.w13_scale, w2_scale=layer.w2_scale, w1_zp=None, w2_zp=None
-        )
-
-    def apply(
-        self,
-        layer: FusedMoE,
-        x: torch.Tensor,
-        topk_weights: torch.Tensor,
-        topk_ids: torch.Tensor,
-        shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        from vllm.model_executor.layers.fused_moe import fused_experts
-
-        return fused_experts(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            topk_weights=topk_weights,
-            topk_ids=topk_ids,
-            inplace=not self.moe.disable_inplace,
-            activation=layer.activation,
-            apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            global_num_experts=layer.global_num_experts,
-            expert_map=layer.expert_map,
-            quant_config=self.moe_quant_config,
-        )
-
-    @staticmethod
-    def quantizing_weight_loader(layer, weight_loader):
-        def quantize_and_call_weight_loader(
-            param: torch.nn.Parameter,
-            loaded_weight: torch.Tensor,
-            weight_name: str,
-            shard_id: int,
-            expert_id: int,
-        ):
-            tp_rank = get_tensor_model_parallel_rank()
-            shard_size = layer.intermediate_size_per_partition
-            shard = slice(tp_rank * shard_size, (tp_rank + 1) * shard_size)
-            device = get_tp_group().device
-            loaded_weight = loaded_weight.to(device)
-            # w1, gate_proj case: Load into first shard of w13.
-            if shard_id == "w1":
-                scales = quantize_in_place_and_get_scales(loaded_weight[shard, :])
-                layer.w13_scale.data[expert_id, 0:shard_size].copy_(scales[:, 0])
-            # w3, up_proj case: Load into second shard of w13.
-            elif shard_id == "w3":
-                scales = quantize_in_place_and_get_scales(loaded_weight[shard, :])
-                layer.w13_scale.data[expert_id, shard_size : 2 * shard_size].copy_(
-                    scales[:, 0]
-                )
-            # w2, down_proj case: Load into only shard of w2.
-            elif shard_id == "w2":
-                scales = quantize_in_place_and_get_scales(loaded_weight[:, shard])
-                layer.w2_scale.data[expert_id, :].copy_(scales[:, 0])
-            else:
-                raise ValueError(f"Shard id must be in [0,1,2] but got {shard_id}")
-            weight_loader(param, loaded_weight, weight_name, shard_id, expert_id)
-
-        return quantize_and_call_weight_loader
-
-
-def quantize_in_place_and_get_scales(weight: torch.Tensor) -> torch.Tensor:
-    vmax = torch.iinfo(torch.int8).max
-    scales = torch.max(torch.abs(weight), dim=1, keepdim=True)[0] / vmax
-
-    weight.div_(scales)
-    weight.round_()
-    weight.clamp_(-vmax, vmax)
-
-    return scales
diff --git a/vllm/model_executor/layers/quantization/fbgemm_fp8.py b/vllm/model_executor/layers/quantization/fbgemm_fp8.py
index c952b7690846..d95c51be0102 100644
--- a/vllm/model_executor/layers/quantization/fbgemm_fp8.py
+++ b/vllm/model_executor/layers/quantization/fbgemm_fp8.py
@@ -7,6 +7,7 @@
 from torch.nn import Module
 from torch.nn.parameter import Parameter
 
+from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     init_fp8_linear_kernel,
@@ -93,12 +94,7 @@ class FBGEMMFp8LinearMethod(LinearMethodBase):
     def __init__(self, quant_config: FBGEMMFp8Config):
         self.quant_config = quant_config
         self.out_dtype = torch.get_default_dtype()
-        self.fp8_linear = init_fp8_linear_kernel(
-            activation_quant_key=kFp8DynamicTokenSym,
-            weight_quant_key=kFp8StaticTokenSym,
-            out_dtype=torch.get_default_dtype(),
-            module_name=self.__class__.__name__,
-        )
+        self.input_dtype = get_current_vllm_config().model_config.dtype
 
     def create_weights(
         self,
@@ -149,6 +145,15 @@ def create_weights(
         )
         layer.input_scale_ub = input_scale_ub
 
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=kFp8DynamicTokenSym,
+            weight_quant_key=kFp8StaticTokenSym,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
     def process_weights_after_loading(self, layer: Module) -> None:
         # required by torch.compile
         layer.weight_scale = Parameter(layer.weight_scale.data, requires_grad=False)
@@ -170,6 +175,8 @@ def process_weights_after_loading(self, layer: Module) -> None:
             # Activations not quantized for marlin.
             del layer.input_scale_ub
 
+        self.fp8_linear.process_weights_after_loading(layer)
+
     def apply(
         self,
         layer: torch.nn.Module,
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
index fffcfa5e6329..41e2e19785cd 100644
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -4,30 +4,34 @@
 from typing import TYPE_CHECKING, Any
 
 import torch
-from torch.nn import Module
 from torch.utils._python_dispatch import TorchDispatchMode
 
 import vllm.envs as envs
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import _custom_ops as ops
-from vllm._aiter_ops import rocm_aiter_ops
+from vllm.config import get_current_vllm_config
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     init_fp8_linear_kernel,
 )
-from vllm.model_executor.kernels.linear.scaled_mm import MarlinFP8ScaledMMLinearKernel
+from vllm.model_executor.kernels.linear.scaled_mm import (
+    CutlassFP8ScaledMMLinearKernel,
+    MarlinFP8ScaledMMLinearKernel,
+)
 from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
     FusedMoEMethodBase,
     FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+    UnquantizedFusedMoEMethod,
 )
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEQuantConfig,
 )
-from vllm.model_executor.layers.fused_moe.layer import UnquantizedFusedMoEMethod
 from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+    Fp8MoeBackend,
     convert_to_fp8_moe_kernel_format,
     make_fp8_moe_kernel,
     make_fp8_moe_quant_config,
@@ -45,13 +49,10 @@
 )
 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    W8A8BlockFp8LinearOp,
     create_fp8_input_scale,
     create_fp8_scale_parameter,
     create_fp8_weight_parameter,
-    maybe_post_process_fp8_weight_block,
     process_fp8_input_tensor_strategy_moe,
-    process_fp8_weight_block_strategy,
     process_fp8_weight_tensor_strategy,
     process_fp8_weight_tensor_strategy_moe,
     validate_fp8_block_shape,
@@ -61,6 +62,7 @@
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    create_fp8_quant_key,
     is_layer_skipped,
     kFp8Dynamic128Sym,
     kFp8DynamicTensorSym,
@@ -189,7 +191,7 @@ def get_quant_method(
                 offline_method = Fp8LinearMethod(self)
                 offline_method.marlin_input_dtype = get_marlin_input_dtype(prefix)
                 return offline_method
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             if is_layer_skipped(
                 prefix=prefix,
                 ignored_layers=self.ignored_layers,
@@ -271,14 +273,16 @@ class Fp8LinearMethod(LinearMethodBase):
 
     def __init__(self, quant_config: Fp8Config):
         self.quant_config = quant_config
+        self.is_scale_e8m0 = getattr(quant_config, "is_scale_e8m0", False)
         self.cutlass_block_fp8_supported = cutlass_block_fp8_supported()
         self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
 
         # For GPUs that lack FP8 hardware support, we can leverage the Marlin
         # kernel for fast weight-only FP8 quantization
         self.marlin_input_dtype = None
+        self.use_marlin = False
 
-        self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled()
         if self.quant_config.use_deep_gemm is not None:
             self.use_deep_gemm = self.quant_config.use_deep_gemm
         else:
@@ -288,41 +292,30 @@ def __init__(self, quant_config: Fp8Config):
         self.block_quant = self.weight_block_size is not None
         self.act_q_static = self.quant_config.activation_scheme == "static"
 
-        # Use per-token quantization for better perf if dynamic and cutlass
-        if self.act_q_static:
-            activation_quant_key = kFp8StaticTensorSym
-        elif cutlass_fp8_supported():
-            activation_quant_key = kFp8DynamicTokenSym
-        else:
-            activation_quant_key = kFp8DynamicTensorSym
-
         if self.block_quant:
-            weight_quant_key = kFp8Static128BlockSym
-        else:
-            weight_quant_key = kFp8StaticTensorSym
-
-        self.fp8_linear = init_fp8_linear_kernel(
-            activation_quant_key=activation_quant_key,
-            weight_quant_key=weight_quant_key,
-            out_dtype=torch.get_default_dtype(),
-            module_name=self.__class__.__name__,
-        )
-        self.use_marlin = isinstance(self.fp8_linear, MarlinFP8ScaledMMLinearKernel)
-
-        if self.block_quant and not self.use_marlin:
             assert not self.act_q_static
             assert self.weight_block_size is not None
-            self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp(
-                weight_group_shape=GroupShape(*self.weight_block_size),
-                act_quant_group_shape=GroupShape(1, self.weight_block_size[0]),
-                cutlass_block_fp8_supported=self.cutlass_block_fp8_supported,
-                use_aiter_and_is_supported=self.use_aiter_and_is_supported,
-                use_deep_gemm=self.use_deep_gemm,
+
+            self.activation_quant_key = create_fp8_quant_key(
+                static=self.act_q_static,
+                group_shape=GroupShape(1, self.weight_block_size[0]),
+            )
+            self.weight_quant_key = create_fp8_quant_key(
+                static=True, group_shape=GroupShape(*self.weight_block_size)
             )
+        else:
+            self.weight_quant_key = kFp8StaticTensorSym
+            # Use per-token quantization for better perf if dynamic and cutlass
+            if self.act_q_static:
+                self.activation_quant_key = kFp8StaticTensorSym
+            elif cutlass_fp8_supported():
+                self.activation_quant_key = kFp8DynamicTokenSym
+            else:
+                self.activation_quant_key = kFp8DynamicTensorSym
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         input_size_per_partition: int,
         output_partition_sizes: list[int],
         input_size: int,
@@ -374,6 +367,7 @@ def create_weights(
                 input_size_per_partition,
                 self.weight_block_size,
                 weight_loader,
+                scale_dtype=(torch.float8_e8m0fnu if self.is_scale_e8m0 else None),
             )
             # The weight_scale_inv name is intentional for deepseekv3
             layer.register_parameter("weight_scale_inv", scale)
@@ -384,7 +378,18 @@ def create_weights(
             set_weight_attrs(scale, {"scale_type": "input_scale"})
             layer.register_parameter("input_scale", scale)
 
-    def process_weights_after_loading(self, layer: Module) -> None:
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
+        self.use_marlin = isinstance(self.fp8_linear, MarlinFP8ScaledMMLinearKernel)
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         if self.use_marlin:
             # Only Marlin kernels support `marlin_input_dtype`; guard to avoid
             # AttributeError if backend selection changes.
@@ -398,14 +403,6 @@ def process_weights_after_loading(self, layer: Module) -> None:
         if self.block_quant:
             assert not self.act_q_static
 
-            weight, weight_scale_inv = process_fp8_weight_block_strategy(
-                layer.weight, layer.weight_scale_inv
-            )
-
-            # Update layer with new values
-            replace_parameter(layer, "weight", weight.data)
-            replace_parameter(layer, "weight_scale_inv", weight_scale_inv.data)
-
         # If checkpoint not serialized fp8, quantize the weights.
         else:
             # If checkpoint is fp8 per-tensor, handle that there are N scales for N
@@ -435,8 +432,7 @@ def process_weights_after_loading(self, layer: Module) -> None:
         else:
             layer.input_scale = None
 
-        if self.block_quant and self.use_deep_gemm:
-            maybe_post_process_fp8_weight_block(layer)
+        self.fp8_linear.process_weights_after_loading(layer)
 
     def apply(
         self,
@@ -444,19 +440,20 @@ def apply(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        # if batch invariant mode is enabled, prefer DeepGEMM FP8 path
-        # we will use BF16 dequant when DeepGEMM is not supported.
+        # if batch invariant mode is enabled, prefer direct FP8 path
+        # we will use BF16 dequant when direct FP8 is not supported.
         if envs.VLLM_BATCH_INVARIANT:
             if self.block_quant:
                 assert self.weight_block_size is not None
-                return self.w8a8_block_fp8_linear.apply(
-                    input=x,
-                    weight=layer.weight,
-                    weight_scale=layer.weight_scale_inv,
-                    input_scale=layer.input_scale,
-                    bias=bias,
+                return self.fp8_linear.apply_weights(
+                    layer,
+                    x,
+                    bias,
                 )
             else:
+                if isinstance(self.fp8_linear, CutlassFP8ScaledMMLinearKernel):
+                    return self.fp8_linear.apply_weights(layer, x, bias)
+
                 # per-tensor/channel: dequant to BF16 and run GEMM
                 weight_fp8 = layer.weight.to(torch.bfloat16)
                 weight_scale = layer.weight_scale.to(torch.bfloat16)
@@ -483,20 +480,11 @@ def apply(
         if self.use_marlin:
             return self.fp8_linear.apply_weights(layer, x, bias)
 
-        if self.block_quant:
-            assert self.weight_block_size is not None
-
-            return self.w8a8_block_fp8_linear.apply(
-                input=x,
-                weight=layer.weight,
-                weight_scale=layer.weight_scale_inv,
-                input_scale=layer.input_scale,
-                bias=bias,
-            )
-
         return self.fp8_linear.apply_weights(layer, x, bias)
 
 
+# TODO(future PR): remove this class in favor of
+# online/fp8.py::Fp8PerTensorOnlineLinearMethod
 class Fp8OnlineLinearMethod(Fp8LinearMethod):
     """Online version of Fp8LinearMethod which loads a full precision checkpoint
     and quantizes weights during loading."""
@@ -536,7 +524,17 @@ def create_weights(
 
         initialize_online_processing(layer)
 
-    def process_weights_after_loading(self, layer: Module) -> None:
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+        self.use_marlin = isinstance(self.fp8_linear, MarlinFP8ScaledMMLinearKernel)
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         if getattr(layer, "_already_called_process_weights_after_loading", False):
             return
 
@@ -559,6 +557,7 @@ def process_weights_after_loading(self, layer: Module) -> None:
         else:
             weight = qweight.t()
             replace_parameter(layer, "weight", weight.data)
+            self.fp8_linear.process_weights_after_loading(layer)
 
         # Prevent duplicate processing (e.g., during weight reload)
         layer._already_called_process_weights_after_loading = True
@@ -577,7 +576,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
         quant_config: The quantization config.
     """
 
-    def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
+    def __init__(self, quant_config: Fp8Config, layer: RoutedExperts):
         super().__init__(layer.moe_config)
         self.quant_config = quant_config
         self.weight_block_size = self.quant_config.weight_block_size
@@ -608,7 +607,7 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
 
     def create_weights(
         self,
-        layer: Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -748,7 +747,7 @@ def create_weights(
 
     def _setup_kernel(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         w13: torch.Tensor,
         w2: torch.Tensor,
         w13_scale: torch.Tensor,
@@ -775,6 +774,11 @@ def _setup_kernel(
         replace_parameter(layer, f"w13_{self.weight_scale_name}", w13_scale)
         replace_parameter(layer, f"w2_{self.weight_scale_name}", w2_scale)
 
+        # AITER backend requires weights to be marked as shuffled.
+        if self.fp8_backend == Fp8MoeBackend.AITER:
+            layer.w13_weight.is_shuffled = True
+            layer.w2_weight.is_shuffled = True
+
         self.moe_quant_config = self.get_fused_moe_quant_config(layer)
         if self.moe_quant_config:
             assert self.experts_cls is not None
@@ -783,11 +787,10 @@ def _setup_kernel(
                 moe_config=self.moe,
                 fp8_backend=self.fp8_backend,
                 experts_cls=self.experts_cls,
-                routing_tables=layer._maybe_init_expert_routing_tables(),
-                shared_experts=layer.shared_experts,
+                routing_tables=layer._expert_routing_tables(),
             )
 
-    def process_weights_after_loading(self, layer: Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         # Allow for accessing weights and scales in standard way.
         w13 = layer.w13_weight
         w2 = layer.w2_weight
@@ -841,7 +844,7 @@ def maybe_make_prepare_finalize(
             "logic. This function should not be called."
         )
 
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+    def get_fused_moe_quant_config(self, layer: RoutedExperts) -> FusedMoEQuantConfig:
         w1_scale = getattr(layer, f"w13_{self.weight_scale_name}")
         w2_scale = getattr(layer, f"w2_{self.weight_scale_name}")
         a1_scale = layer.w13_input_scale
@@ -854,6 +857,7 @@ def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantCon
             a1_scale=a1_scale,
             a2_scale=a2_scale,
             block_shape=self.weight_block_size,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
         )
 
         # Inject biases into the quant config if the model has them
@@ -874,10 +878,11 @@ def supports_eplb(self) -> bool:
 
     def apply_monolithic(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         assert self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply_monolithic(
@@ -897,12 +902,13 @@ def apply_monolithic(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert not self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply(
@@ -915,10 +921,13 @@ def apply(
             global_num_experts=layer.global_num_experts,
             expert_map=layer.expert_map,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
 
+# TODO(future PR): remove this class in favor of
+# online/fp8.py::Fp8PerTensorOnlineMoEMethod
 class Fp8OnlineMoEMethod(Fp8MoEMethod):
     """MoE method for online FP8 quantization.
     Supports loading quantized FP16/BF16 model checkpoints with dynamic
@@ -931,7 +940,7 @@ class Fp8OnlineMoEMethod(Fp8MoEMethod):
 
     uses_meta_device: bool = True
 
-    def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
+    def __init__(self, quant_config: Fp8Config, layer: RoutedExperts):
         super().__init__(quant_config, layer)
         assert not quant_config.is_checkpoint_fp8_serialized
         assert quant_config.activation_scheme == "dynamic"
@@ -939,7 +948,7 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
 
     def create_weights(
         self,
-        layer: Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -1005,7 +1014,7 @@ def create_weights(
 
         initialize_online_processing(layer)
 
-    def process_weights_after_loading(self, layer: Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         # TODO(@ksayers): inplace fp8 quant kernel, initialize scales with ones
         if getattr(layer, "_already_called_process_weights_after_loading", False):
             return
diff --git a/vllm/model_executor/layers/quantization/fp_quant.py b/vllm/model_executor/layers/quantization/fp_quant.py
index 4ed8d57dd430..7d0b6a974d7c 100644
--- a/vllm/model_executor/layers/quantization/fp_quant.py
+++ b/vllm/model_executor/layers/quantization/fp_quant.py
@@ -3,7 +3,7 @@
 
 # Supports FP-Quant compression, see https://arxiv.org/abs/2509.23202
 
-from typing import Any
+from typing import Any, Literal, cast
 
 import torch
 from torch.nn.parameter import Parameter
@@ -251,7 +251,11 @@ def apply(
 def fused_quantize_mx(
     x_flat: torch.Tensor, hadamard_matrix: torch.Tensor, forward_method: str
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    return fusedQuantizeMx(x_flat, hadamard_matrix, method=forward_method)
+    return fusedQuantizeMx(
+        x_flat,
+        hadamard_matrix,
+        method=cast(Literal["quest", "abs_max"], forward_method),
+    )
 
 
 def fused_quantize_mx_fake(x_flat, hadamard_matrix, forward_method):
diff --git a/vllm/model_executor/layers/quantization/gguf.py b/vllm/model_executor/layers/quantization/gguf.py
index 88023349e779..dca49d7ed976 100644
--- a/vllm/model_executor/layers/quantization/gguf.py
+++ b/vllm/model_executor/layers/quantization/gguf.py
@@ -3,7 +3,10 @@
 
 from collections.abc import Mapping
 from types import MappingProxyType
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.quantization import QuantizationMethods
 
 import gguf
 import torch
@@ -12,17 +15,14 @@
 
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe.activation import (
-    MoEActivation,
-    apply_moe_activation,
-)
-from vllm.model_executor.layers.fused_moe.config import (
+from vllm.model_executor.layers.fused_moe import (
     FusedMoEConfig,
-    FusedMoEQuantConfig,
-)
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
     FusedMoEMethodBase,
+    FusedMoEQuantConfig,
+    MoEActivation,
+    RoutedExperts,
+    SharedExperts,
+    apply_moe_activation,
 )
 from vllm.model_executor.layers.linear import (
     LinearBase,
@@ -79,6 +79,16 @@ def get_config_filenames(cls) -> list[str]:
     def from_config(cls, config: dict[str, Any]) -> "GGUFConfig":
         return cls()
 
+    @classmethod
+    def override_quantization_method(
+        cls, hf_quant_cfg: dict[str, Any], user_quant: str | None, hf_config=None
+    ) -> "QuantizationMethods | None":
+        # When user explicitly specifies --quantization gguf, override
+        # whatever quantization method is in the HF model config (e.g. fp8).
+        if user_quant == "gguf":
+            return "gguf"
+        return None
+
     def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> "QuantizeMethodBase | None":
@@ -94,7 +104,7 @@ def get_quant_method(
             ):
                 return UnquantizedEmbeddingMethod()
             return GGUFEmbeddingMethod(self)
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             # TODO: Select UnquantizedFusedMoEMethod on unquantized layers.
             return GGUFMoEMethod(self, layer.moe_config)
         return None
@@ -565,7 +575,7 @@ def __init__(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -626,18 +636,19 @@ def create_weights(
         layer.register_parameter("w2_qweight_type", w2_qweight_type)
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
         return None
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         if layer.apply_router_weight_on_input:
             raise NotImplementedError(
                 "Apply router weight on input is not supported for"
diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py
deleted file mode 100644
index 154347a930a9..000000000000
--- a/vllm/model_executor/layers/quantization/gptq.py
+++ /dev/null
@@ -1,393 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import enum
-from enum import Enum
-from fractions import Fraction
-from typing import TYPE_CHECKING, Any, Union
-
-import torch
-from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
-from torch.nn.parameter import Parameter
-
-from vllm import _custom_ops as ops
-from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE
-from vllm.model_executor.layers.linear import LinearMethodBase
-from vllm.model_executor.layers.quantization.base_config import (
-    QuantizationConfig,
-    QuantizeMethodBase,
-)
-from vllm.model_executor.layers.quantization.utils.gptq_utils import (
-    get_linear_quant_method,
-)
-from vllm.model_executor.parameter import (
-    ChannelQuantScaleParameter,
-    GroupQuantScaleParameter,
-    PackedColumnParameter,
-    PackedvLLMParameter,
-    RowvLLMParameter,
-)
-from vllm.transformers_utils.config import get_safetensors_params_metadata
-from vllm.utils.collection_utils import is_list_of
-
-if TYPE_CHECKING:
-    from vllm.model_executor.layers.quantization import QuantizationMethods
-    from vllm.model_executor.models.utils import WeightsMapper
-else:
-    QuantizationMethods = str
-
-logger = init_logger(__name__)
-
-
-class GPTQConfig(QuantizationConfig):
-    """Config class for GPTQ.
-
-    Reference: https://arxiv.org/abs/2210.17323
-    """
-
-    def __init__(
-        self,
-        weight_bits: int,
-        group_size: int,
-        desc_act: bool,
-        lm_head_quantized: bool,
-        dynamic: dict[str, dict[str, int | bool]],
-        autoround_version: str = "",
-        modules_in_block_to_quantize: list[str] | None = None,
-        checkpoint_format: str = "",
-    ) -> None:
-        # GPTQModel use `dynamic` config property to allow per module
-        # quantization config so each module can be individually optimized.
-        # Format is dict[str, dict] where key is a regex string that can
-        # perform both positive ("+:" prefixed) or negative ("-:" prefixed)
-        # matching of a module.
-        # Default to positive match, override base quant config mode, if no
-        # prefix is used. Value is in dict format of field key and override
-        # value.
-        # Negative matching will skip quantization init for this module
-        # entirely:
-        # non-quantized inference. More details and quantization examples can be
-        # found at: https://github.com/ModelCloud/GPTQModel
-        # Example:
-        #  # last 1/2 of the layers 10-21 has 8bit vs 4bit for 0-9
-        #  # last 1/4 of the layers 16-21 has 8bit and group_size 64
-        # dynamic = {
-        #  #`.*\.` matches the layers_node prefix
-        #  # positive match layer 10-15
-        #  r"+:.*\.(?:1[0-5])\..*": {"bits": 8,},
-        #  # positive match layer 16-21
-        #  r"+:.*\.(?:1[6-9]|20|21)\..*": {"bits": 8, "group_size": 64,},
-        #  r"-:.*\.moe\..*": {}, # negative match (skip) all `moe` layers
-        # }
-        super().__init__()
-        self.dynamic = dynamic
-
-        self.weight_bits = weight_bits
-        self.group_size = group_size
-        self.desc_act = desc_act
-        self.lm_head_quantized = lm_head_quantized
-        self.pack_factor = Fraction(32, self.weight_bits)
-        if self.weight_bits not in [2, 3, 4, 8]:
-            raise ValueError(
-                "Currently, only 2/3/4/8-bit weight quantization is "
-                f"supported for GPTQ, but got {self.weight_bits} bits."
-            )
-        # Somehow gptq_gemm 4-bit is buggy, maybe fix it in the future.
-        # For now, show a warning, since gptq_marlin will be used by default.
-        if self.weight_bits == 4:
-            logger.warning_once(
-                "Currently, the 4-bit gptq_gemm kernel for GPTQ is buggy. "
-                "Please switch to gptq_marlin."
-            )
-
-        self.modules_in_block_to_quantize = modules_in_block_to_quantize or []
-
-        # used to identify GPTQ model quantized by autoround
-        self.autoround_version = autoround_version
-
-        # GPTQ v1 and v2 format deals with zero points differently.
-        # Currently GPTQModel stores v1 format checkpoints by default,
-        # but provides the option to set `format="gptq_v2"` in `QuantizeConfig`.
-        self.checkpoint_format = checkpoint_format
-
-    def __repr__(self) -> str:
-        return (
-            f"GPTQConfig(weight_bits={self.weight_bits}, "
-            f"group_size={self.group_size}, "
-            f"desc_act={self.desc_act}), "
-            f"lm_head_quantized={self.lm_head_quantized}, "
-            f"dynamic={self.dynamic}, "
-            f"modules_in_block_to_quantize={self.modules_in_block_to_quantize}), "
-            f"checkpoint_format={self.checkpoint_format})"
-        )
-
-    @classmethod
-    def get_name(cls) -> QuantizationMethods:
-        return "gptq"
-
-    @classmethod
-    def get_supported_act_dtypes(cls) -> list[torch.dtype]:
-        return [torch.half]
-
-    @classmethod
-    # Need to figure it out
-    def get_min_capability(cls) -> int:
-        return 60
-
-    @classmethod
-    def get_config_filenames(cls) -> list[str]:
-        return ["quantize_config.json"]
-
-    @classmethod
-    def from_config(cls, config: dict[str, Any]) -> "GPTQConfig":
-        dynamic = cls.get_from_keys_or(config, ["dynamic"], default={})
-        dynamic = {} if dynamic is None else dynamic
-
-        weight_bits = cls.get_from_keys(config, ["bits"])
-        group_size = cls.get_from_keys(config, ["group_size"])
-        desc_act = cls.get_from_keys(config, ["desc_act"])
-        lm_head_quantized = cls.get_from_keys_or(config, ["lm_head"], default=False)
-        autoround_version = cls.get_from_keys_or(
-            config, ["autoround_version"], default=""
-        )
-        modules_in_block_to_quantize = cls.get_from_keys_or(
-            config, ["modules_in_block_to_quantize"], default=None
-        )
-        checkpoint_format = cls.get_from_keys_or(
-            config, ["checkpoint_format"], default=""
-        )
-        return cls(
-            weight_bits,
-            group_size,
-            desc_act,
-            lm_head_quantized,
-            dynamic,
-            autoround_version,
-            modules_in_block_to_quantize,
-            checkpoint_format,
-        )
-
-    def get_quant_method(
-        self, layer: torch.nn.Module, prefix: str
-    ) -> Union["GPTQLinearMethod", "QuantizeMethodBase"] | None:
-        if isinstance(layer, FusedMoE):
-            # GPTQ MoE support: fall back to MoeWNA16 for broad compatibility
-            from .moe_wna16 import MoeWNA16Config
-
-            # TODO: maybe update this for GPTQv2 format checkpoints
-            config = {
-                "quant_method": "gptq",
-                "bits": self.weight_bits,
-                "group_size": self.group_size,
-                "sym": True,  # GPTQ typically uses symmetric quantization
-                "lm_head": False,
-            }
-            return MoeWNA16Config.from_config(config).get_quant_method(layer, prefix)
-
-        return get_linear_quant_method(self, layer, prefix, GPTQLinearMethod)
-
-    def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
-        if self.modules_in_block_to_quantize is not None:
-            self.modules_in_block_to_quantize = hf_to_vllm_mapper.apply_list(
-                self.modules_in_block_to_quantize
-            )
-
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
-        if self.modules_in_block_to_quantize:
-            if is_list_of(self.modules_in_block_to_quantize, list):
-                # original modules_in_block_to_quantize: list[list[str]]
-                # flatten original modules_in_block_to_quantize
-                self.modules_in_block_to_quantize = [
-                    item
-                    for sublist in self.modules_in_block_to_quantize
-                    for item in sublist
-                ]
-            return
-
-        unquant_dtypes = [torch.float16, torch.bfloat16, torch.float32]
-        metadata = get_safetensors_params_metadata(model_name, revision=revision)
-        quant_layers: set[str] = {
-            param_name.rsplit(".", 1)[0]
-            for param_name, info in metadata.items()
-            if (dtype := info.get("dtype", None))
-            and _SAFETENSORS_TO_TORCH_DTYPE[dtype] not in unquant_dtypes
-        }
-        self.modules_in_block_to_quantize = list(quant_layers)
-
-
-class ExllamaState(Enum):
-    UNUSED = enum.auto()
-    UNINITIALIZED = enum.auto()
-    READY = enum.auto()
-
-
-class GPTQLinearMethod(LinearMethodBase):
-    """Linear method for GPTQ.
-
-    Args:
-        quant_config: The GPTQ quantization config.
-    """
-
-    def __init__(self, quant_config: GPTQConfig):
-        self.quant_config = quant_config
-
-        # GPTQ v1 and v2 format deals with zero points differently
-        self.use_v2_format = quant_config.checkpoint_format == "gptq_v2"
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        input_size_per_partition: int,
-        output_partition_sizes: list[int],
-        input_size: int,
-        output_size: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        del output_size  # Unused.
-        weight_loader = extra_weight_attrs.get("weight_loader")
-        if input_size_per_partition % self.quant_config.group_size != 0:
-            raise ValueError(
-                "The input size is not aligned with the quantized "
-                "weight shape. This can be caused by too large "
-                "tensor parallel size."
-            )
-        output_size_per_partition = sum(output_partition_sizes)
-        if output_size_per_partition % self.quant_config.pack_factor.numerator != 0:
-            raise ValueError(
-                "The output size is not aligned with the quantized "
-                "weight shape. This can be caused by too large "
-                "tensor parallel size."
-            )
-
-        if self.quant_config.group_size != -1:
-            group_size = self.quant_config.group_size
-        else:
-            group_size = input_size
-        exllama_state = ExllamaState.UNINITIALIZED
-        scale_and_zero_size = input_size // group_size
-        scale_and_zero_input_dim = None
-        if (
-            input_size != input_size_per_partition
-            and self.quant_config.group_size != -1
-        ):
-            # For act-order models, we cannot use Exllama for row parallel layer
-            if self.quant_config.desc_act:
-                exllama_state = ExllamaState.UNUSED
-            else:
-                # we need to partition qzeros and scales for exllama kernel
-                scale_and_zero_size = input_size_per_partition // group_size
-                scale_and_zero_input_dim = 0
-
-        qweight = PackedvLLMParameter(
-            data=torch.empty(
-                input_size_per_partition // self.quant_config.pack_factor,
-                output_size_per_partition,
-                dtype=torch.int32,
-            ),
-            input_dim=0,
-            output_dim=1,
-            packed_dim=0,
-            packed_factor=self.quant_config.pack_factor,
-            weight_loader=weight_loader,
-        )
-
-        g_idx = RowvLLMParameter(
-            data=torch.tensor(
-                [
-                    i // self.quant_config.group_size
-                    for i in range(input_size_per_partition)
-                ],
-                dtype=torch.int32,
-            ),
-            input_dim=0,
-            weight_loader=weight_loader,
-        )
-        qzeros_args = {
-            "data": torch.empty(
-                scale_and_zero_size,
-                output_size_per_partition // self.quant_config.pack_factor,
-                dtype=torch.int32,
-            ),
-            "weight_loader": weight_loader,
-        }
-        weight_scale_args = {
-            "data": torch.empty(
-                scale_and_zero_size,
-                output_size_per_partition,
-                dtype=params_dtype,
-            ),
-            "weight_loader": weight_loader,
-        }
-        if scale_and_zero_input_dim is None:
-            scales = ChannelQuantScaleParameter(output_dim=1, **weight_scale_args)
-            qzeros = PackedColumnParameter(
-                output_dim=1,
-                packed_dim=1,
-                packed_factor=self.quant_config.pack_factor,
-                **qzeros_args,
-            )
-
-        else:
-            scales = GroupQuantScaleParameter(
-                output_dim=1, input_dim=0, **weight_scale_args
-            )
-            qzeros = PackedvLLMParameter(
-                input_dim=0,
-                output_dim=1,
-                packed_dim=1,
-                packed_factor=self.quant_config.pack_factor,
-                **qzeros_args,
-            )
-
-        layer.register_parameter("qweight", qweight)
-        layer.register_parameter("g_idx", g_idx)
-        layer.register_parameter("qzeros", qzeros)
-        layer.register_parameter("scales", scales)
-
-        layer.exllama_state = exllama_state
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        # for torch.compile
-        layer.qzeros = Parameter(layer.qzeros.data, requires_grad=False)
-        layer.qweight = Parameter(layer.qweight.data, requires_grad=False)
-        layer.g_idx = Parameter(layer.g_idx.data, requires_grad=False)
-        layer.scales = Parameter(layer.scales.data, requires_grad=False)
-
-        # exllama needs to shuffle the weight after the weight is loaded
-        # here we do the shuffle on first forward pass
-        if layer.exllama_state == ExllamaState.UNINITIALIZED:
-            if self.quant_config.desc_act:
-                layer.g_idx.data = torch.argsort(layer.g_idx).to(torch.int)
-            else:
-                layer.g_idx.data = torch.empty(
-                    (0,), dtype=torch.int, device=layer.g_idx.device
-                )
-            layer.exllama_state = ExllamaState.READY
-            ops.gptq_shuffle(layer.qweight, layer.g_idx, self.quant_config.weight_bits)
-
-    def apply(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        out_shape = x.shape[:-1] + (layer.qweight.shape[-1],)
-        reshaped_x = x.reshape(-1, x.shape[-1])
-
-        # GPTQ v1 and v2 format checkpoints deals with zero points differently,
-        # and require different gemm kernels.
-        output = ops.gptq_gemm(
-            reshaped_x,
-            layer.qweight,
-            layer.qzeros,
-            layer.scales,
-            layer.g_idx,
-            layer.exllama_state == ExllamaState.READY,
-            self.use_v2_format,
-            self.quant_config.weight_bits,
-        )
-        if bias is not None:
-            output.add_(bias)
-        return output.reshape(out_shape)
diff --git a/vllm/model_executor/layers/quantization/humming.py b/vllm/model_executor/layers/quantization/humming.py
new file mode 100644
index 000000000000..8139b2441b70
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/humming.py
@@ -0,0 +1,926 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
+import math
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any
+
+import regex as re
+import torch
+
+from vllm import envs
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoEMethodBase,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEConfig,
+    FusedMoEQuantConfig,
+)
+from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
+    UnquantizedFusedMoEMethod,
+)
+from vllm.model_executor.layers.linear import (
+    LinearBase,
+    LinearMethodBase,
+    UnquantizedLinearMethod,
+)
+from vllm.model_executor.layers.quantization import QuantizationMethods
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig,
+    QuantizeMethodBase,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.parameter import (
+    BasevLLMParameter,
+    BlockQuantScaleParameter,
+    ChannelQuantScaleParameter,
+    GroupQuantScaleParameter,
+    ModelWeightParameter,
+    PackedvLLMParameter,
+    PerTensorScaleParameter,
+    RowvLLMParameter,
+)
+from vllm.model_executor.utils import set_weight_attrs
+
+if TYPE_CHECKING:
+    from vllm.model_executor.models.utils import WeightsMapper
+
+
+try:
+    from humming.dtypes import DataType
+    from humming.layer import HummingMethod
+    from humming.schema import (
+        BaseInputSchema,
+        BaseWeightSchema,
+        HummingInputSchema,
+        HummingWeightSchema,
+    )
+    from humming.utils.weight import quantize_weight
+
+    from vllm.model_executor.layers.fused_moe.experts.fused_humming_moe import (
+        BatchedHummingGroupedExperts,
+        HummingGroupedExperts,
+        HummingIndexedExperts,
+        get_humming_moe_gemm_type,
+    )
+except ModuleNotFoundError:
+    HummingMethod = None
+
+
+def assert_humming_available():
+    assert HummingMethod is not None, (
+        "humming is not available, please run "
+        "'pip install git+https://github.com/inclusionAI/humming' to install it."
+    )
+
+
+def prepare_padded_shape(shape, x):
+    padded_shape = math.ceil(shape / x) * x
+    return padded_shape, padded_shape - shape
+
+
+def prepare_param(tensor, name, extra_attrs):
+    extra_attrs = extra_attrs.copy()
+    scale_type = extra_attrs.pop("scale_type", None)
+    param_cls_name_map = {
+        "block": BlockQuantScaleParameter,
+        "tensor": PerTensorScaleParameter,
+        "group": GroupQuantScaleParameter,
+        "channel": ChannelQuantScaleParameter,
+        "input_scale": PerTensorScaleParameter,
+    }
+
+    param_cls: type[BasevLLMParameter]
+    if "packed_dim" in extra_attrs:
+        param_cls = PackedvLLMParameter
+    elif scale_type in param_cls_name_map:
+        param_cls = param_cls_name_map[scale_type]
+    elif "output_dim" in extra_attrs and "input_dim" in extra_attrs:
+        param_cls = ModelWeightParameter
+    elif "input_dim" in extra_attrs:
+        param_cls = RowvLLMParameter
+    elif "output_dim" in extra_attrs:
+        param_cls = ChannelQuantScaleParameter
+    else:
+        param_cls = BasevLLMParameter
+
+    kwargs_keys = [
+        "input_dim",
+        "output_dim",
+        "packed_dim",
+        "packed_factor",
+        "weight_loader",
+    ]
+    cls_kwargs = {}
+    for key in extra_attrs.copy():
+        if key in kwargs_keys:
+            cls_kwargs[key] = extra_attrs.pop(key)
+
+    param = param_cls(data=tensor, **cls_kwargs)
+    set_weight_attrs(param, extra_attrs)
+
+    param.param_name = name
+    param.ignore_warning = True
+    if scale_type in ["tensor", "input_scale"]:
+        param.needs_scalar_to_array = True
+
+    return param
+
+
+def prepare_moe_param(tensor, name, extra_attrs):
+    param = torch.nn.Parameter(tensor, requires_grad=False)
+    if "scale_type" in extra_attrs:
+        extra_attrs["quant_method"] = extra_attrs["scale_type"]
+
+    if "input_dim" in extra_attrs and "output_dim" in extra_attrs:
+        input_dim = extra_attrs["input_dim"]
+        output_dim = extra_attrs["output_dim"]
+        extra_attrs["is_transposed"] = input_dim < output_dim
+
+    set_weight_attrs(param, extra_attrs)
+    param.param_name = name
+    return param
+
+
+def may_pad_loaded_weight(param, loaded_weight):
+    pad_shape = getattr(param, "pad_shape", None)
+    if pad_shape is None:
+        return loaded_weight
+    value = 1 if loaded_weight.dtype == torch.float8_e8m0fnu else 0
+    padding = []
+    for x in pad_shape[::-1][: loaded_weight.ndim]:
+        padding += [0, x]
+    loaded_weight = torch.nn.functional.pad(
+        input=loaded_weight,
+        pad=padding,
+        value=value,
+    )
+    return loaded_weight
+
+
+def compressed_tensors_get_config(config: dict[str, Any], key: str):
+    assert key in ["weights", "input_activations"]
+    target_group_config = None
+    for group_config in config["config_groups"].values():
+        if "Linear" in group_config["targets"]:
+            if "weights" not in group_config:
+                return None
+            if key not in group_config or group_config[key] is None:
+                return None
+            target_group_config = group_config[key].copy()
+            break
+
+    if target_group_config is None:
+        return None
+    target_group_config["quant_method"] = config["quant_method"]
+    if config["quant_method"] == "compressed-tensors":
+        target_group_config["format"] = config["format"]
+    elif config["quant_method"] == "modelopt":
+        target_group_config["quant_algo"] = config["quant_algo"]
+    return target_group_config
+
+
+class HummingConfig(QuantizationConfig):
+    packed_modules_mapping: dict[str, list[str]] = {}
+
+    def __init__(self, full_config: dict[str, Any] | None = None):
+        assert_humming_available()
+        self.full_config: dict[str, Any] = full_config or {}
+
+    @classmethod
+    def get_name(cls) -> QuantizationMethods:
+        return "humming"
+
+    @classmethod
+    def get_supported_act_dtypes(cls) -> list[torch.dtype]:
+        return [torch.bfloat16, torch.half]
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        return 75
+
+    @classmethod
+    def get_config_filenames(cls) -> list[str]:
+        return []
+
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> "HummingConfig":
+        return cls(full_config=config)
+
+    @classmethod
+    def override_quantization_method(
+        cls, hf_quant_cfg, user_quant, hf_config=None
+    ) -> QuantizationMethods | None:
+        if user_quant == "humming" and hf_config is not None:
+            model_type = hf_config.model_type
+            quant_method = hf_quant_cfg.get("quant_method", None)
+            if model_type == "gpt_oss" and quant_method == "mxfp4":
+                msg = (
+                    "For gpt-oss model, use '--moe-backend humming' "
+                    "instead of '--quantization humming'."
+                )
+                raise ValueError(msg)
+        return "humming" if user_quant == "humming" else None
+
+    def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
+        self.hf_to_vllm_mapper = hf_to_vllm_mapper
+
+    def is_layer_skipped(self, config: dict[str, Any], prefix: str):
+        keys = ["ignored_layers", "ignore", "modules_to_not_convert"]
+        ignored_layers = self.get_from_keys_or(config, keys, []) or []
+        if hasattr(self, "hf_to_vllm_mapper"):
+            ignored_layers = self.hf_to_vllm_mapper.apply_list(ignored_layers)
+
+        if any(module_name in prefix for module_name in ignored_layers):
+            return True
+        if "lm_head" in prefix:
+            return True
+
+        for regex in config.get("dynamic", {}):
+            if regex[:1] != "-":
+                continue
+            if re.match(regex[2:], prefix):
+                return True
+
+        return False
+
+    def get_layer_weight_schema(self, config: dict[str, Any], prefix: str):
+        if self.is_layer_skipped(config, prefix):
+            return None
+
+        if config["quant_method"] in ["compressed-tensors", "modelopt"]:
+            group_config = compressed_tensors_get_config(config, "weights")
+            if group_config is None:
+                return None
+            config = group_config
+
+        layer_config = config
+        layer_dynamic = config.get("dynamic", {})
+        if not isinstance(layer_dynamic, dict):
+            layer_dynamic = {}
+        for regex, override_config in layer_dynamic.items():
+            if regex[:1] != "+":
+                continue
+            if re.match(regex[2:], prefix):
+                layer_config = config.copy()
+                layer_config.update(override_config)
+                break
+
+        if "quant_method" in layer_config:
+            return BaseWeightSchema.from_config(layer_config)
+        return None
+
+    def get_layer_input_schema(self, config: dict[str, Any], prefix: str):
+        if self.is_layer_skipped(config, prefix):
+            return None
+        if config["quant_method"] in ["compressed-tensors", "modelopt"]:
+            group_config = compressed_tensors_get_config(config, "input_activations")
+            if group_config is None:
+                return None
+            config = group_config
+
+        if config.get("quant_method", None) in BaseInputSchema.INPUT_SCHEMA_MAP:
+            return BaseInputSchema.from_config(config)
+        return None
+
+    def get_quant_config_for_layer(
+        self, prefix: str, layer_type: str
+    ) -> "HummingLayerQuantizationConfig | None":
+        weight_schema: BaseWeightSchema | None = None
+        force_weight_schema: HummingWeightSchema | None = None
+
+        if self.full_config:
+            weight_schema = self.get_layer_weight_schema(self.full_config, prefix)
+
+        is_online_quant = False
+        online_quant_config = envs.VLLM_HUMMING_ONLINE_QUANT_CONFIG or {}
+        if not self.full_config or online_quant_config.get("force_requant", False):
+            online_quant_config["quant_method"] = "humming"
+            schema = self.get_layer_weight_schema(online_quant_config, prefix)
+            if not self.full_config:
+                weight_schema = schema
+                is_online_quant = True
+            else:
+                force_weight_schema = schema
+
+        if weight_schema is not None:
+            input_schema = None
+            force_input_schema = None
+
+            if self.full_config:
+                input_schema = self.get_layer_input_schema(self.full_config, prefix)
+
+            if envs.VLLM_HUMMING_INPUT_QUANT_CONFIG:
+                quant_config = envs.VLLM_HUMMING_INPUT_QUANT_CONFIG.copy()
+                quant_config["quant_method"] = "humming"
+                force_input_schema = self.get_layer_input_schema(quant_config, prefix)
+                if input_schema is None:
+                    input_schema = force_input_schema
+
+            if force_weight_schema is not None and force_input_schema is None:
+                force_input_schema = HummingInputSchema()
+
+            return HummingLayerQuantizationConfig(
+                weight_schema=weight_schema,
+                input_schema=input_schema,
+                force_weight_schema=force_weight_schema,
+                force_input_schema=force_input_schema,
+                is_online_quant=is_online_quant,
+            )
+        return None
+
+    def get_quant_method(
+        self, layer: torch.nn.Module, prefix: str
+    ) -> "QuantizeMethodBase | None":
+        layer_type = "other"
+        if isinstance(layer, RoutedExperts):
+            layer_type = "moe"
+        elif isinstance(layer, LinearBase):
+            layer_type = "linear"
+
+        quant_config = self.get_quant_config_for_layer(prefix, layer_type)
+        if quant_config is None:
+            if isinstance(layer, RoutedExperts):
+                return UnquantizedFusedMoEMethod(layer.moe_config)
+            elif isinstance(layer, LinearBase):
+                return UnquantizedLinearMethod()
+        elif isinstance(layer, LinearBase):
+            return HummingLinearMethod(quant_config)
+        elif isinstance(layer, RoutedExperts):
+            return HummingMoEMethod(quant_config, layer.moe_config)
+        return None
+
+
+class HummingLayerQuantizationConfig(HummingConfig):
+    def __init__(
+        self,
+        weight_schema: "BaseWeightSchema",
+        input_schema: "BaseInputSchema | None" = None,
+        force_weight_schema: "HummingWeightSchema | None" = None,
+        force_input_schema: "HummingInputSchema | None" = None,
+        is_online_quant: bool = False,
+    ):
+        self.weight_schema = weight_schema
+        if input_schema is None:
+            input_schema = HummingInputSchema()
+        self.input_schema = input_schema
+        self.force_weight_schema = force_weight_schema
+        self.force_input_schema = force_input_schema
+        self.is_online_quant = is_online_quant
+
+    @classmethod
+    def from_config(cls, config):
+        weight_schema = BaseWeightSchema.from_config(config)
+        return cls(weight_schema)
+
+    def get_quant_method(
+        self, layer: torch.nn.Module, prefix: str
+    ) -> QuantizeMethodBase | None:
+        raise NotImplementedError
+
+
+class HummingLinearMethod(LinearMethodBase):
+    def __init__(self, quant_config: HummingLayerQuantizationConfig):
+        self.quant_config = quant_config
+        self.weight_schema = quant_config.weight_schema
+        self.input_schema = quant_config.input_schema
+        self.force_weight_schema = quant_config.force_weight_schema
+        self.force_input_schema = quant_config.force_input_schema
+        self.is_online_quant = self.quant_config.is_online_quant
+
+    def prepare_weight_loader(self, layer: torch.nn.Module, weight_loader: Callable):
+        def new_weight_loader(
+            param: torch.nn.Parameter,
+            loaded_weight: torch.Tensor,
+            shard_id: str | int | None = None,
+        ):
+            name = param.param_name
+            float_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+            is_unquantized = name == "weight" and loaded_weight.dtype in float_dtypes
+            if is_unquantized and self.is_online_quant:
+                # online quant (fp16/bf16 -> quant_type)
+                assert isinstance(self.weight_schema, HummingWeightSchema)
+                f16_dtype = DataType.from_torch_dtype(layer.param_dtype)
+                has_global_scale = "TENSOR" in str(self.weight_schema.weight_scale_type)
+                tensor_list = quantize_weight(
+                    weight=loaded_weight,
+                    dtype=self.weight_schema.b_dtype,
+                    scale_dtype=self.weight_schema.bs_dtype or f16_dtype,
+                    group_size=self.weight_schema.weight_scale_group_size,
+                    has_zero_point=self.weight_schema.has_zero_point,
+                    has_global_scale=has_global_scale,
+                    is_fp_zero_point=self.weight_schema.is_fp_zero_point,
+                    pack=True,
+                )
+
+                key_list = ["weight", "weight_scale", "zero_point", "global_scale"]
+                for key, tensor in zip(key_list, tensor_list):
+                    if tensor is None or tensor.nelement() == 0:
+                        continue
+                    param = getattr(layer, key)
+                    param.weight_loader(param, tensor, shard_id)
+
+                return None
+            elif is_unquantized and not self.is_online_quant:
+                # fallback to unquantized linear
+                # some model skip some layer when quantizing model, but
+                # don't mark the layer as unquantized.
+                if not layer.is_fallback:
+                    layer.is_fallback = True
+                    for name, _ in list(layer.named_parameters()):
+                        if name != "bias":
+                            delattr(layer, name)
+                    delattr(layer, "locks")
+                    self.__class__ = UnquantizedLinearMethod  # type: ignore
+                    tensor = torch.empty(
+                        (
+                            layer.output_partition_sizes_sum,
+                            layer.input_size_per_partition,
+                        ),
+                        dtype=layer.param_dtype,
+                        device=param.device,
+                    )
+                    extra_weight_attrs = layer.extra_weight_attrs.copy()
+                    orig_weight_loader = extra_weight_attrs.pop("weight_loader")
+                    layer.weight = ModelWeightParameter(
+                        data=tensor,
+                        input_dim=1,
+                        output_dim=0,
+                        weight_loader=orig_weight_loader,
+                    )
+                    layer.weight.tp_size = layer.tp_size
+                    layer.weight.tp_rank = layer.tp_rank
+                    set_weight_attrs(layer.weight, extra_weight_attrs)
+
+                param = layer.weight
+                if shard_id is not None:
+                    return layer.weight.weight_loader(param, loaded_weight, shard_id)
+                return layer.weight.weight_loader(param, loaded_weight)
+
+            # weight processing logic for specific quantization schema
+            loaded_weight = self.weight_schema.process_loaded_weight(
+                tensor=loaded_weight,
+                name=name,
+            )
+            if shard_id is not None:
+                return weight_loader(param, loaded_weight, shard_id)
+            return weight_loader(param, loaded_weight)
+
+        return new_weight_loader
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.is_fallback = False
+        layer.param_dtype = params_dtype
+        layer.input_size = input_size
+        layer.output_size = output_size
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_partition_sizes_sum = sum(output_partition_sizes)
+        layer.output_partition_sizes = output_partition_sizes
+        layer.extra_weight_attrs = extra_weight_attrs.copy()
+
+        weight_loader = extra_weight_attrs.get("weight_loader", default_weight_loader)
+        new_weight_loader = self.prepare_weight_loader(layer, weight_loader)
+        extra_weight_attrs["weight_loader"] = new_weight_loader
+
+        for key in ["weight_block_size", "block_structure"]:
+            block_size = getattr(self.weight_schema, key, None)
+            if block_size is not None:
+                layer.weight_block_size = block_size
+
+        weight_tensor_attrs = self.weight_schema.get_tensors_attrs(
+            shape_n=layer.output_partition_sizes_sum,
+            shape_k=layer.input_size_per_partition,
+            param_dtype=params_dtype,
+            stack_size=len(layer.output_partition_sizes),
+        )
+
+        input_tensor_attrs = self.input_schema.get_tensors_attrs(
+            shape_k=layer.input_size_per_partition,
+            param_dtype=params_dtype,
+            stack_size=len(layer.output_partition_sizes),
+        )
+
+        tensors_attrs = weight_tensor_attrs | input_tensor_attrs
+
+        for name, attrs in tensors_attrs.items():
+            tensor = torch.empty(attrs["shape"], dtype=attrs["dtype"])
+            extra_attrs = attrs.get("extra_attrs", {}).copy()
+            extra_attrs.update(extra_weight_attrs)
+            param = prepare_param(tensor, name, extra_attrs)
+            setattr(layer, name, param)
+
+        locks = torch.zeros(1024, dtype=torch.int32)
+        layer.register_buffer("locks", locks)
+
+        if self.force_input_schema is not None:
+            self.input_schema = self.force_input_schema
+
+        if not hasattr(layer, "weight"):
+            param = prepare_param(torch.tensor(0), "weight", extra_weight_attrs)
+            layer.weight = param
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        if layer.is_fallback:
+            return None
+
+        # convert from checkpoint format to humming format
+        if not isinstance(self.weight_schema, HummingWeightSchema):
+            self.weight_schema, tensors = self.weight_schema.convert_humming(
+                tensors=layer.state_dict(),
+                shape_n_stacks=layer.output_partition_sizes,
+                shape_k_stacks=[layer.input_size_per_partition],
+                param_dtype=layer.param_dtype,
+            )
+
+            self.input_schema, _ = self.input_schema.convert_humming(
+                tensors=layer.state_dict(),
+                shape_n_stacks=layer.output_partition_sizes,
+                shape_k_stacks=[layer.input_size_per_partition],
+                param_dtype=layer.param_dtype,
+            )
+
+            for name, _ in list(layer.named_parameters()):
+                delattr(layer, name)
+
+            for name, tensor in tensors.items():
+                param = torch.nn.Parameter(tensor, requires_grad=False)
+                setattr(layer, name, param)
+
+            del tensors
+
+        # force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
+        assert isinstance(self.weight_schema, HummingWeightSchema)
+        force_requant = self.force_weight_schema is not None
+        if force_requant and self.weight_schema != self.force_weight_schema:
+            tensors = self.weight_schema.requant_tensors(
+                tensors=layer.state_dict(),
+                target_weight_schema=self.force_weight_schema,
+                param_dtype=layer.param_dtype,
+            )
+
+            self.weight_schema = self.force_weight_schema
+
+            for name, _ in list(layer.named_parameters()):
+                if name != "bias":
+                    delattr(layer, name)
+
+            for name, tensor in tensors.items():
+                param = torch.nn.Parameter(tensor, requires_grad=False)
+                setattr(layer, name, param)
+
+            del tensors
+
+        # prepare layer config from humming kernel
+        HummingMethod.prepare_layer_meta(
+            layer=layer,
+            shape_n=layer.output_partition_sizes_sum,
+            shape_k=layer.input_size_per_partition,
+            weight_schema=self.weight_schema,
+            input_schema=self.input_schema,
+            pad_n_to_multiple=256,
+            pad_k_to_multiple=128,
+            has_bias=layer.has_bias,
+            torch_dtype=layer.param_dtype,
+        )
+
+        # preprocess weight for inference
+        HummingMethod.transform_humming_layer(layer)
+
+        # compute_config: kernel configs that do not directly affect weights
+        # but significantly impact kernel behavior or computation precision.
+        # see https://github.com/inclusionAI/humming/blob/main/docs/config.md
+        compute_config = {
+            "use_batch_invariant": envs.VLLM_BATCH_INVARIANT,
+            "use_f16_accum": envs.VLLM_HUMMING_USE_F16_ACCUM,
+            "gemm_type": "dense",
+        }
+        self.compute_config = json.dumps(compute_config)
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        flatten_inputs = x.view(-1, x.size(-1))
+        output = HummingMethod.forward_layer(
+            layer=layer,
+            inputs=flatten_inputs,
+            compute_config=self.compute_config,
+        )
+        output = output.view(*x.shape[:-1], output.size(-1))
+        return output
+
+
+class HummingMoEMethod(FusedMoEMethodBase):
+    def __init__(
+        self, quant_config: HummingLayerQuantizationConfig, moe: "FusedMoEConfig"
+    ) -> None:
+        super().__init__(moe)
+        self.quant_config = quant_config
+        self.moe = moe
+        self.weight_schema = quant_config.weight_schema
+        self.input_schema = quant_config.input_schema
+        self.force_weight_schema = quant_config.force_weight_schema
+        self.force_input_schema = quant_config.force_input_schema
+
+    def prepare_weight_loader(self, layer, weight_loader):
+        def new_weight_loader(
+            param: torch.nn.Parameter,
+            loaded_weight: torch.Tensor,
+            weight_name: str,
+            shard_id: str,
+            expert_id: int | None = None,
+            return_success: bool = False,
+        ):
+            name = param.param_name
+            float_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+            is_unquantized = name == "weight" and loaded_weight.dtype in float_dtypes
+            # online quant (fp16/bf16 -> quant_type)
+            if is_unquantized:
+                assert isinstance(self.weight_schema, HummingWeightSchema)
+                f16_dtype = DataType.from_torch_dtype(layer.param_dtype)
+                has_global_scale = "TENSOR" in str(self.weight_schema.weight_scale_type)
+                tensor_list = quantize_weight(
+                    weight=loaded_weight,
+                    dtype=self.weight_schema.b_dtype,
+                    scale_dtype=self.weight_schema.bs_dtype or f16_dtype,
+                    group_size=self.weight_schema.weight_scale_group_size,
+                    has_zero_point=self.weight_schema.has_zero_point,
+                    has_global_scale=has_global_scale,
+                    is_fp_zero_point=self.weight_schema.is_fp_zero_point,
+                    pack=True,
+                )
+
+                key_list = ["weight", "weight_scale", "zero_point", "global_scale"]
+                success = True
+                for key, tensor in zip(key_list, tensor_list):
+                    if tensor is None or tensor.nelement() == 0:
+                        continue
+                    sublayer_name = "w2" if shard_id == "w2" else "w13"
+
+                    param = getattr(layer, sublayer_name + "_" + key)
+                    part_subccess = param.weight_loader(
+                        param=param,
+                        loaded_weight=tensor.cpu(),
+                        weight_name=shard_id + "_" + key,
+                        shard_id=shard_id,
+                        expert_id=expert_id,
+                        return_success=return_success,
+                    )
+                    success = success and part_subccess
+
+                return success if return_success else None
+
+            # weight processing logic for specific quantization schema
+            loaded_weight = self.weight_schema.process_loaded_weight(
+                tensor=loaded_weight,
+                name=name,
+            )
+            return weight_loader(
+                param,
+                loaded_weight,
+                weight_name,
+                shard_id=shard_id,
+                expert_id=expert_id,
+                return_success=return_success,
+            )
+
+        return new_weight_loader
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.param_dtype = params_dtype
+        layer.intermediate_size = intermediate_size_per_partition
+        weight_loader = extra_weight_attrs.get("weight_loader", default_weight_loader)
+        weight_loader = self.prepare_weight_loader(layer, weight_loader)
+        extra_weight_attrs["weight_loader"] = weight_loader
+
+        # sublayer: a layer contains multiple sets of weights for quantized GEMM
+        # (e.g., weight, weight_scale, etc.).
+        # The weight names of sublayer start with the prefix "{sublayer_name}_"
+        layer.sublayer_configs = {
+            "w13": {
+                "shape_n": intermediate_size_per_partition * 2,
+                "shape_k": hidden_size,
+                "tensors_attrs": self.weight_schema.get_padded_tensors_attrs(
+                    shape_n=intermediate_size_per_partition * 2,
+                    shape_k=hidden_size,
+                    num_experts=num_experts,
+                    param_dtype=params_dtype,
+                    has_bias=self.moe.has_bias,
+                ),
+            },
+            "w2": {
+                "shape_n": hidden_size,
+                "shape_k": intermediate_size_per_partition,
+                "tensors_attrs": self.weight_schema.get_padded_tensors_attrs(
+                    shape_n=hidden_size,
+                    shape_k=intermediate_size_per_partition,
+                    num_experts=num_experts,
+                    param_dtype=params_dtype,
+                    has_bias=self.moe.has_bias,
+                ),
+            },
+        }
+
+        for sublayer_name, configs in layer.sublayer_configs.items():
+            for name, attrs in configs["tensors_attrs"].items():
+                tensor = torch.empty(attrs["shape"], dtype=attrs["dtype"])
+                param = torch.nn.Parameter(tensor, requires_grad=False)
+                extra_attrs = attrs.get("extra_attrs", {}).copy()
+                extra_attrs.update(extra_weight_attrs)
+                param = prepare_moe_param(tensor, name, extra_attrs)
+                setattr(layer, f"{sublayer_name}_{name}", param)
+
+        if self.force_input_schema is not None:
+            self.input_schema = self.force_input_schema
+
+        locks = torch.zeros(1024, dtype=torch.int32)
+        layer.register_buffer("locks", locks)
+
+    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+        from vllm.model_executor.layers.quantization.utils.humming_utils import (
+            get_humming_moe_quant_config,
+        )
+
+        return get_humming_moe_quant_config(layer)
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        if getattr(self, "processed", False):
+            return
+        self.processed = True
+        layer.weight_schemas = {}
+        layer.input_schemas = {}
+        for sublayer_name, configs in layer.sublayer_configs.items():
+            input_schema = self.input_schema
+            weight_schema = self.weight_schema
+            # convert from checkpoint format to humming format
+            if not isinstance(weight_schema, HummingWeightSchema):
+                tensors: dict[str, torch.Tensor] = dict(
+                    (key.removeprefix(sublayer_name + "_"), value)
+                    for key, value in layer.state_dict().items()
+                    if key.startswith(sublayer_name + "_")
+                )
+
+                shape_k_stacks = [configs["shape_k"]]
+                shape_n_stacks = [configs["shape_n"]]
+                if sublayer_name == "w13":
+                    shape_n_stacks = [configs["shape_n"] // 2] * 2
+
+                weight_schema, tensors = weight_schema.convert_humming(
+                    tensors=tensors,
+                    shape_n_stacks=shape_n_stacks,
+                    shape_k_stacks=shape_k_stacks,
+                    param_dtype=layer.param_dtype,
+                    num_experts=layer.num_experts,
+                )
+
+                input_schema, _ = input_schema.convert_humming(
+                    tensors=tensors,
+                    shape_n_stacks=shape_n_stacks,
+                    shape_k_stacks=shape_k_stacks,
+                    param_dtype=layer.param_dtype,
+                    num_experts=layer.num_experts,
+                )
+
+                for name, _ in list(layer.named_parameters()):
+                    if not name.startswith(sublayer_name + "_"):
+                        continue
+                    delattr(layer, name)
+
+                for name, tensor in tensors.items():
+                    name = f"{sublayer_name}_{name}"
+                    param = torch.nn.Parameter(tensor, requires_grad=False)
+                    setattr(layer, name, param)
+
+                layer.weight_schemas[sublayer_name] = weight_schema
+                layer.input_schemas[sublayer_name] = input_schema
+
+            # force requant (origin quant setting -> fp16/bf16 -> new_quant setting)
+            assert isinstance(weight_schema, HummingWeightSchema)
+            force_requant = self.force_weight_schema is not None
+            if force_requant and weight_schema != self.force_weight_schema:
+                tensors = dict(
+                    (key.removeprefix(sublayer_name + "_"), value)
+                    for key, value in layer.state_dict().items()
+                    if key.startswith(sublayer_name + "_")
+                )
+
+                tensors = weight_schema.requant_tensors(
+                    tensors=tensors,
+                    target_weight_schema=self.force_weight_schema,
+                    param_dtype=layer.param_dtype,
+                )
+
+                weight_schema = self.force_weight_schema
+
+                for name, _ in list(layer.named_parameters()):
+                    if not name.startswith(sublayer_name + "_"):
+                        continue
+                    if name == sublayer_name + "_bias":
+                        continue
+                    delattr(layer, name)
+
+                for name, tensor in tensors.items():
+                    name = f"{sublayer_name}_{name}"
+                    param = torch.nn.Parameter(tensor, requires_grad=False)
+                    setattr(layer, name, param)
+
+                del tensors
+
+            # prepare layer config from humming kernel
+            HummingMethod.prepare_layer_meta(
+                layer=layer,
+                shape_n=configs["shape_n"],
+                shape_k=configs["shape_k"],
+                pad_n_to_multiple=256,
+                pad_k_to_multiple=128,
+                input_schema=input_schema,
+                weight_schema=weight_schema,
+                has_bias=self.moe.has_bias,
+                num_experts=layer.num_experts,
+                torch_dtype=layer.param_dtype,
+                sublayer_name=sublayer_name,
+            )
+
+            # preprocess weight for inference
+            HummingMethod.transform_humming_layer(layer, sublayer_name=sublayer_name)
+
+        # use moe modular
+        experts: HummingIndexedExperts | HummingGroupedExperts
+        assert self.moe_quant_config is not None
+        if get_humming_moe_gemm_type() == "indexed":
+            experts = HummingIndexedExperts(layer, self.moe, self.moe_quant_config)
+        else:
+            experts = HummingGroupedExperts(layer, self.moe, self.moe_quant_config)
+        self.experts = experts
+
+    def select_gemm_impl(
+        self,
+        prepare_finalize,
+        layer: torch.nn.Module,
+    ):
+        from vllm.model_executor.layers.fused_moe import modular_kernel as mk
+
+        activation_format = prepare_finalize.activation_format
+        assert self.moe_quant_config is not None
+        if activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
+            return BatchedHummingGroupedExperts(
+                layer=layer,
+                moe_config=self.moe,
+                quant_config=self.moe_quant_config,
+                max_num_tokens=prepare_finalize.max_num_tokens_per_rank(),
+                num_dispatchers=prepare_finalize.num_dispatchers(),
+            )
+        elif get_humming_moe_gemm_type() == "indexed":
+            return HummingIndexedExperts(layer, self.moe, self.moe_quant_config)
+        else:
+            return HummingGroupedExperts(layer, self.moe, self.moe_quant_config)
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        workspace1, workspace2, output = self.experts.make_workspaces(
+            M=topk_ids.size(0),
+            topk=topk_ids.size(1),
+            activation=layer.activation,
+        )
+
+        assert workspace1.data_ptr() == output.data_ptr()
+
+        self.experts.main_apply(
+            hidden_states=x,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            workspace1=workspace1,
+            workspace2=workspace2,
+            expert_tokens_meta=None,
+        )
+
+        return output
diff --git a/vllm/model_executor/layers/quantization/inc.py b/vllm/model_executor/layers/quantization/inc.py
index 359f24688ce9..3a4d7d4039ff 100644
--- a/vllm/model_executor/layers/quantization/inc.py
+++ b/vllm/model_executor/layers/quantization/inc.py
@@ -2,18 +2,30 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from fractions import Fraction
+from functools import lru_cache
 from typing import TYPE_CHECKING, Any
 
 import regex as re
 import torch
+from torch.nn.parameter import Parameter
 
 from vllm.logger import init_logger
-from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
+from vllm.model_executor.layers.fused_moe import RoutedExperts
+from vllm.model_executor.layers.linear import (
+    LinearBase,
+    LinearMethodBase,
+    UnquantizedLinearMethod,
+)
 from vllm.model_executor.layers.quantization import (
     QuantizationConfig,
     QuantizationMethods,
 )
 from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
+from vllm.model_executor.parameter import (
+    GroupQuantScaleParameter,
+    PackedvLLMParameter,
+    RowvLLMParameter,
+)
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 
@@ -224,7 +236,6 @@ def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
             self.extra_config = hf_to_vllm_mapper.apply_dict(self.extra_config)
 
     def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
-        from vllm.model_executor.layers.fused_moe import FusedMoE
         from vllm.model_executor.layers.quantization.utils.marlin_utils import (
             check_marlin_supported,
             check_moe_marlin_supports_layer,
@@ -254,7 +265,7 @@ def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
                 AWQ_TYPE_MAP[weight_bits], group_size, not sym
             )
 
-            if isinstance(layer, FusedMoE):
+            if isinstance(layer, RoutedExperts):
                 use_marlin = use_marlin and check_moe_marlin_supports_layer(
                     layer, group_size
                 )
@@ -288,7 +299,7 @@ def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
                 zero_point=not sym,
             )
 
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             if use_marlin:
                 return AWQMarlinMoEMethod(quant_args_marlin, layer.moe_config)
             from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
@@ -310,7 +321,6 @@ def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
         return None
 
     def apply_gptq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
-        from vllm.model_executor.layers.fused_moe import FusedMoE
         from vllm.model_executor.layers.quantization.utils.marlin_utils import (
             check_marlin_supported,
             check_moe_marlin_supports_layer,
@@ -339,20 +349,20 @@ def apply_gptq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
             use_marlin = (weight_bits, sym) in GPTQ_TYPE_MAP and check_marlin_supported(
                 GPTQ_TYPE_MAP[(weight_bits, sym)], group_size, has_zp=not sym
             )
-            if isinstance(layer, FusedMoE):
+            if isinstance(layer, RoutedExperts):
                 use_marlin = use_marlin and check_moe_marlin_supports_layer(
                     layer, group_size
                 )
         else:
             use_marlin = False
         if use_marlin:
-            from vllm.model_executor.layers.quantization.gptq_marlin import (
-                GPTQMarlinConfig,
-                GPTQMarlinLinearMethod,
-                GPTQMarlinMoEMethod,
+            from vllm.model_executor.layers.quantization.auto_gptq import (
+                AutoGPTQConfig,
+                AutoGPTQLinearMethod,
+                AutoGPTQMoEMethod,
             )
 
-            quant_args_marlin = GPTQMarlinConfig(
+            quant_args_marlin = AutoGPTQConfig(
                 weight_bits=weight_bits,
                 group_size=group_size,
                 is_sym=sym,
@@ -361,23 +371,10 @@ def apply_gptq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
                 dynamic={},
                 full_config={},
             )
-        else:
-            from vllm.model_executor.layers.quantization.gptq import (
-                GPTQConfig,
-                GPTQLinearMethod,
-            )
 
-            quant_args = GPTQConfig(
-                weight_bits=weight_bits,
-                group_size=group_size,
-                lm_head_quantized=False,
-                desc_act=False,
-                dynamic={},
-            )
-
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             if use_marlin:
-                return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe_config)
+                return AutoGPTQMoEMethod(quant_args_marlin, layer.moe_config)
             else:
                 from vllm.model_executor.layers.quantization.moe_wna16 import (
                     MoeWNA16Config,
@@ -396,22 +393,93 @@ def apply_gptq_quant_layer(self, layer, prefix: str, backend: str = "auto"):
 
         if isinstance(layer, (LinearBase, ParallelLMHead)):
             if use_marlin:
-                return GPTQMarlinLinearMethod(quant_args_marlin)
+                return AutoGPTQLinearMethod(quant_args_marlin)
             else:
-                return GPTQLinearMethod(quant_args)
+                raise NotImplementedError(
+                    f"INC quantization with bits={weight_bits}, sym={sym} "
+                    "is not supported. Only 4-bit and 8-bit symmetric "
+                    "quantization is supported with Marlin kernels."
+                )
 
         return None
 
-    def apply_ipex_quant_layer(self, layer, prefix: str):
+    def apply_xpu_w4a16_quant_layer(self, layer, prefix: str):
         weight_bits, group_size, sym = self.get_layer_config(layer, prefix)
+
         if not self.check_quantized(weight_bits):
             if isinstance(layer, (LinearBase, ParallelLMHead)):
                 return UnquantizedLinearMethod()
             else:
                 return None
-        raise NotImplementedError(
-            "INC quantization is not supported during xpu kernel migration."
-        )
+
+        if weight_bits != 4:
+            raise NotImplementedError(
+                f"INC on XPU only supports 4-bit quantization, "
+                f"got weight_bits={weight_bits}."
+            )
+        if not sym:
+            raise NotImplementedError(
+                "INC W4A16 on XPU only supports symmetric quantization for now."
+            )
+
+        if isinstance(layer, (LinearBase, ParallelLMHead)):
+            is_ark_available, ark_error, _, _ = get_ark_state()
+            if is_ark_available:
+                return INCARKLinearMethod(
+                    weight_bits=weight_bits,
+                    group_size=group_size,
+                    sym=sym,
+                )
+
+            logger.debug(
+                "ARK backend is unavailable for layer %s; "
+                "falling back to the default XPU INC path. Error: %s",
+                prefix,
+                ark_error or "unknown error",
+            )
+
+            return INCXPULinearMethod(
+                weight_bits=weight_bits,
+                group_size=group_size,
+                sym=sym,
+            )
+        return None
+
+    def apply_cpu_w4a16_quant_layer(self, layer, prefix: str):
+        weight_bits, group_size, sym = self.get_layer_config(layer, prefix)
+        if not self.check_quantized(weight_bits):
+            if isinstance(layer, (LinearBase, ParallelLMHead)):
+                return UnquantizedLinearMethod()
+            else:
+                return None
+
+        if weight_bits != 4:
+            raise NotImplementedError(
+                f"INC on CPU only supports 4-bit quantization, "
+                f"got weight_bits={weight_bits}."
+            )
+        if not sym:
+            raise NotImplementedError(
+                "INC W4A16 on CPU only supports symmetric quantization for now."
+            )
+        if isinstance(layer, (LinearBase, ParallelLMHead)):
+            is_ark_available, ark_error, _, _ = get_ark_state()
+            if is_ark_available:
+                return INCARKLinearMethod(
+                    weight_bits=weight_bits,
+                    group_size=group_size,
+                    sym=sym,
+                )
+
+            logger.debug(
+                "ARK backend is unavailable for layer %s; "
+                "falling back to the default CPU INC path. Error: %s",
+                prefix,
+                ark_error or "unknown error",
+            )
+
+            return self.apply_gptq_quant_layer(layer, prefix)
+        return None
 
     def get_quant_method(self, layer: torch.nn.Module, prefix: str):
         if prefix and self.extra_config:
@@ -420,23 +488,307 @@ def get_quant_method(self, layer: torch.nn.Module, prefix: str):
                     layer_name == prefix or layer_name == f"model.{prefix}"
                 ) and self.extra_config[layer_name].get("bits", 16) >= 16:
                     return UnquantizedLinearMethod()
-        if (
-            current_platform.is_cpu()
-            or current_platform.is_xpu()
-            or self.backend == "ipex"
-        ):
-            return self.apply_ipex_quant_layer(layer, prefix)
-        if "gptq" in self.packing_format or "gptq" in self.backend:
+
+        if current_platform.is_xpu():
+            return self.apply_xpu_w4a16_quant_layer(layer, prefix)
+        is_gptq = "gptq" in self.packing_format or "gptq" in self.backend
+        if current_platform.is_cpu() and is_gptq:
+            return self.apply_cpu_w4a16_quant_layer(layer, prefix)
+        if is_gptq:
             return self.apply_gptq_quant_layer(layer, prefix)
         if "awq" in self.packing_format or "awq" in self.backend:
             return self.apply_awq_quant_layer(layer, prefix)
 
+        raise NotImplementedError(
+            f"Unsupported quantization configuration for layer '{prefix}'. "
+            f"Platform: CPU={current_platform.is_cpu()}. "
+            f"Platform: XPU={current_platform.is_xpu()}. "
+            f"Format: {self.packing_format}, Backend: {self.backend}."
+        )
+
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> "QuantizationMethods | None":
         """Override the `auto-round` method to `inc`."""
         is_auto_round_format = hf_quant_cfg.get("quant_method", None) == "auto-round"
         if is_auto_round_format:
             return cls.get_name()
         return None
+
+
+class INCXPULinearBase(LinearMethodBase):
+    def __init__(self, weight_bits: int, group_size: int, sym: bool):
+        self.weight_bits = weight_bits
+        self.group_size = group_size
+        self.sym = sym
+        self.pack_factor = 32 // weight_bits
+
+    def _create_inc_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        params_dtype: torch.dtype,
+        weight_loader: Any,
+        group_size: int,
+        pack_factor: int,
+    ) -> None:
+        output_size_per_partition = sum(output_partition_sizes)
+        scales_and_zp_size = input_size_per_partition // group_size
+
+        qweight = PackedvLLMParameter(
+            data=torch.empty(
+                input_size_per_partition // pack_factor,
+                output_size_per_partition,
+                dtype=torch.int32,
+            ),
+            input_dim=0,
+            output_dim=1,
+            packed_dim=0,
+            packed_factor=pack_factor,
+            weight_loader=weight_loader,
+        )
+
+        scales = GroupQuantScaleParameter(
+            data=torch.empty(
+                scales_and_zp_size,
+                output_size_per_partition,
+                dtype=params_dtype,
+            ),
+            input_dim=0,
+            output_dim=1,
+            weight_loader=weight_loader,
+        )
+
+        qzeros = PackedvLLMParameter(
+            data=torch.empty(
+                scales_and_zp_size,
+                output_size_per_partition // pack_factor,
+                dtype=torch.int32,
+            ),
+            input_dim=0,
+            output_dim=1,
+            packed_dim=1,
+            packed_factor=pack_factor,
+            weight_loader=weight_loader,
+        )
+
+        layer.register_parameter("qweight", qweight)
+        layer.register_parameter("scales", scales)
+        layer.register_parameter("qzeros", qzeros)
+
+        g_idx = RowvLLMParameter(
+            data=torch.tensor(
+                [i // group_size for i in range(input_size_per_partition)],
+                dtype=torch.int32,
+            ),
+            input_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("g_idx", g_idx)
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        self._create_inc_weights(
+            layer=layer,
+            input_size_per_partition=input_size_per_partition,
+            output_partition_sizes=output_partition_sizes,
+            params_dtype=params_dtype,
+            weight_loader=extra_weight_attrs.get("weight_loader"),
+            group_size=self.group_size,
+            pack_factor=self.pack_factor,
+        )
+
+
+@lru_cache(maxsize=1)
+def get_ark_state() -> tuple[bool, str | None, Any | None, Any | None]:
+    """Return ARK availability, error details, cached instance, and QuantLinear."""
+    try:
+        import auto_round_kernel
+        from auto_round_kernel.qlinear import QuantLinear
+
+        logger.info("Successfully imported auto_round_kernel.")
+    except ImportError as error:
+        return False, str(error), None, None
+
+    ark_loader = getattr(auto_round_kernel, "_ark_instance", None)
+    if not callable(ark_loader):
+        return False, "auto_round_kernel does not expose _ark_instance().", None, None
+
+    try:
+        ark_instance = ark_loader()
+    except Exception as error:
+        return False, str(error), None, None
+
+    if ark_instance is None:
+        return False, "auto_round_kernel._ark_instance() returned None.", None, None
+
+    return True, None, ark_instance, QuantLinear
+
+
+class INCXPULinearMethod(INCXPULinearBase):
+    """XPU linear method for INC w4a16 GPTQ quantization (symmetric only).
+
+    Repacks GPTQ weights from [in_packed, out] to oneDNN [out, in_packed]
+    layout and calls torch.ops._xpu_C.int4_gemm_w4a16.
+
+    GPTQ format: qweight [in_packed, out] with sequential nibble order.
+
+    Note: Asymmetric quantization (sym=false) is not for now.
+
+    FIXME(yiliu30): Refine the implementation to reuse XPUwNa16LinearKernel.
+    """
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        """Repack GPTQ weights into kernel-ready NT layout."""
+        device = layer.qweight.data.device
+
+        # oneDNN int4 kernel requires strides[0]==1 ("NT format"), but GPTQ
+        # checkpoint is [K_packed, N] contiguous with strides (N, 1).
+        # Two transposes are needed — neither alone can achieve this:
+        #   1. .t().contiguous() → [N, K_packed] contiguous in memory
+        #   2. .t()              → [K_packed, N] view with strides (1, K_packed)
+        # The result has the same logical shape but strides[0]==1 as required.
+        qweight_ct = layer.qweight.data.t().contiguous()
+        layer.qweight = Parameter(qweight_ct.t(), requires_grad=False)
+
+        # Scales: [num_groups, out] — no change needed
+        layer.scales = Parameter(layer.scales.data, requires_grad=False)
+
+        # Symmetric: GPTQ v1 stores qzeros=7, effective zp = 7+1 = 8
+        # Kernel expects int8 scalar = 8
+        layer.qzeros = Parameter(
+            torch.tensor([8], dtype=torch.int8, device=device),
+            requires_grad=False,
+        )
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        # qweight is already in NT layout [K_packed, N] (strides (1, K_packed))
+        # from process_weights_after_loading — pass directly to kernel.
+        out_shape = x.shape[:-1] + (layer.qweight.shape[1],)
+        reshaped_x = x.reshape(-1, x.shape[-1])
+        out = torch.ops._xpu_C.int4_gemm_w4a16(
+            reshaped_x,
+            layer.qweight,
+            bias,
+            layer.scales,
+            layer.qzeros,
+            self.group_size,
+            None,  # g_idx not needed: desc_act is always False for INC models
+        )
+        return out.reshape(out_shape)
+
+
+class INCARKLinearMethod(INCXPULinearBase):
+    """XPU & CPU w4a16 linear method for INC quantization utilizing the ARK backend.
+
+    See: https://github.com/intel/auto-round/blob/main/auto_round_extension/ark/README.md
+
+    Repacks GPTQ/INC weights into ARK's layout.
+    """
+
+    def __init__(self, weight_bits: int, group_size: int, sym: bool):
+        super().__init__(weight_bits=weight_bits, group_size=group_size, sym=sym)
+
+        is_available, error_str, _, quant_linear_cls = get_ark_state()
+        if not is_available or quant_linear_cls is None:
+            reason = error_str or "unknown error"
+            raise ImportError(f"Failed to import auto_round_kernel. {reason}")
+
+        self.QuantLinear = quant_linear_cls
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        super().create_weights(
+            layer=layer,
+            input_size_per_partition=input_size_per_partition,
+            output_partition_sizes=output_partition_sizes,
+            input_size=input_size,
+            output_size=output_size,
+            params_dtype=params_dtype,
+            **extra_weight_attrs,
+        )
+        layer.in_features = input_size_per_partition
+        layer.out_features = sum(output_partition_sizes)
+        layer.params_dtype = params_dtype
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        if hasattr(layer, "input_size_per_partition"):
+            in_features = layer.input_size_per_partition
+        elif hasattr(layer, "input_size"):
+            in_features = layer.input_size
+        else:
+            raise AttributeError("Cannot determine in_features for layer.")
+
+        if hasattr(layer, "output_partition_sizes"):
+            out_features = sum(layer.output_partition_sizes)
+        elif hasattr(layer, "output_size_per_partition"):
+            out_features = layer.output_size_per_partition
+        elif hasattr(layer, "output_size"):
+            out_features = layer.output_size
+        else:
+            out_features = layer.scales.shape[-1]
+
+        ark_linear = self.QuantLinear(
+            bits=self.weight_bits,
+            group_size=self.group_size,
+            sym=self.sym,
+            in_features=in_features,
+            out_features=out_features,
+            bias=layer.bias is not None,
+            weight_dtype=layer.params_dtype,
+        )
+
+        ark_linear.to(layer.qweight.device)
+
+        with torch.no_grad():
+            ark_linear.qweight.copy_(layer.qweight.detach())
+
+            if hasattr(layer, "qzeros") and layer.qzeros is not None:
+                ark_linear.qzeros.copy_(layer.qzeros.detach())
+            else:
+                ark_linear.qzeros = None
+
+            ark_linear.scales.copy_(layer.scales.detach())
+
+            if hasattr(layer, "bias") and layer.bias is not None:
+                ark_linear.bias.copy_(layer.bias.detach())
+
+        ark_linear.post_init()
+
+        layer.ark_linear = ark_linear
+
+        del layer.qweight
+        if hasattr(layer, "qzeros"):
+            del layer.qzeros
+        del layer.scales
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return layer.ark_linear.forward(x)
diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py
index 0ae4ed33375d..35e0b4533f46 100644
--- a/vllm/model_executor/layers/quantization/input_quant_fp8.py
+++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py
@@ -173,7 +173,16 @@ def forward_xpu(
         scale_ub: torch.Tensor | None = None,
         use_triton: bool = False,
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        # XPU can use same code path as CUDA.
+        if self.is_group_quant and not self.static:
+            from vllm.model_executor.layers.quantization.utils import fp8_utils
+
+            return fp8_utils.per_token_group_quant_fp8(
+                x,
+                group_size=self.group_size,
+                column_major_scales=self.column_major_scales,
+                dtype=_FP8_DTYPE,
+                use_ue8m0=self.use_ue8m0,
+            )
         return self.forward_cuda(x, scale, scale_ub, use_triton)
 
     def forward_native(
diff --git a/vllm/model_executor/layers/quantization/kv_cache.py b/vllm/model_executor/layers/quantization/kv_cache.py
index fe2e31252250..726ac2232af9 100644
--- a/vllm/model_executor/layers/quantization/kv_cache.py
+++ b/vllm/model_executor/layers/quantization/kv_cache.py
@@ -9,7 +9,8 @@
     QuantizeMethodBase,
 )
 from vllm.platforms import current_platform
-from vllm.v1.attention.backend import is_quantized_kv_cache
+from vllm.utils.torch_utils import is_quantized_kv_cache
+from vllm.v1.kv_cache_interface import kv_cache_uses_per_token_head_scales
 
 logger = init_logger(__name__)
 
@@ -53,6 +54,20 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             assert not hasattr(layer, "prob_scale")
             return
 
+        # Per-token-head quantized KV cache: scales are computed dynamically
+        # per (token, head) in the kernel at cache-write time.  Checkpoint
+        # scales are never used regardless of calculate_kv_scales.
+        if kv_cache_uses_per_token_head_scales(layer.kv_cache_dtype):
+            layer._k_scale.copy_(1.0)
+            layer._v_scale.copy_(1.0)
+            layer._k_scale_float = 1.0
+            layer._v_scale_float = 1.0
+            del layer.k_scale
+            del layer.v_scale
+            del layer.q_scale
+            del layer.prob_scale
+            return
+
         # If the kv-cache is not quantized, we enforce the k/v_scale to be 1.0
         # regardless whether the kv-scale is available in the checkpoint.
         # No need to process kv scales after loading if we are going to
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
index eb9591936438..c4036e2f18b0 100644
--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -8,21 +8,25 @@
 from torch.nn.parameter import Parameter
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
-from vllm.model_executor.kernels.linear import init_fp8_linear_kernel
+from vllm.model_executor.kernels.linear import (
+    MarlinNvFp4LinearKernel,
+    NvFp4LinearLayerConfig,
+    init_fp8_linear_kernel,
+    init_mxfp8_linear_kernel,
+    init_nvfp4_linear_kernel,
+)
 from vllm.model_executor.layers.attention import Attention, MLAAttention
-from vllm.model_executor.layers.fused_moe.activation import MoEActivation
-from vllm.model_executor.layers.fused_moe.config import (
+from vllm.model_executor.layers.fused_moe import (
     FusedMoEConfig,
-    FusedMoEQuantConfig,
-    RoutingMethodType,
-)
-from vllm.model_executor.layers.fused_moe.fused_moe_method_base import (
     FusedMoEMethodBase,
-)
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
+    FusedMoEQuantConfig,
     FusedMoeWeightScaleSupported,
+    MoEActivation,
+    RoutedExperts,
+    RoutingMethodType,
+    SharedExperts,
 )
 from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
     Fp8MoeBackend,
@@ -56,7 +60,6 @@
     swap_w13_to_w31,
 )
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    W8A8BlockFp8LinearOp,
     process_fp8_input_tensor_strategy_moe,
     process_fp8_weight_tensor_strategy_moe,
 )
@@ -67,18 +70,11 @@
     MXFP8_BLOCK_SIZE,
     MXFP8_SCALE_DTYPE,
     MXFP8_VALUE_DTYPE,
-    Mxfp8LinearBackend,
-    Mxfp8LinearOp,
     mxfp8_e4m3_quantize,
-    swizzle_mxfp8_scale,
-)
-from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
-    apply_nvfp4_linear,
-    convert_to_nvfp4_linear_kernel_format,
-    select_nvfp4_linear_backend,
 )
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     GroupShape,
+    create_fp8_quant_key,
     is_layer_skipped,
     kFp8DynamicTokenSym,
     kFp8StaticTensorSym,
@@ -87,12 +83,12 @@
     kNvfp4Static,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
-    cutlass_block_fp8_supported,
     requantize_with_max_scale,
 )
 from vllm.model_executor.parameter import (
     BlockQuantScaleParameter,
     ChannelQuantScaleParameter,
+    GroupQuantScaleParameter,
     ModelWeightParameter,
     PerTensorScaleParameter,
 )
@@ -111,19 +107,21 @@
     "FP8_PER_CHANNEL_PER_TOKEN",
     # FP8 per-block weight-only (ModelOpt may emit this as lowercase).
     "FP8_PB_WO",
-    # FP4
+    # NVFP4 W4A4 (4-bit float weights AND 4-bit float activations).
     "NVFP4",
+    # W4A16 NVFP4 (4-bit float weights, fp16/bf16 activations).
+    "W4A16_NVFP4",
     # MXFP8
     "MXFP8",
     # MIXED_PRECISION,
     "MIXED_PRECISION",
 ]
-KV_CACHE_QUANT_ALGOS = ["FP8"]
+KV_CACHE_QUANT_ALGOS = ["FP8", "NVFP4"]
 
 
-class ModelOptFp8KVCacheMethod(BaseKVCacheMethod):
+class ModelOptKVCacheMethod(BaseKVCacheMethod):
     """
-    Supports loading kv-cache scaling factors from FP8 checkpoints.
+    Supports loading kv-cache scaling factors from FP8 or NVFP4 checkpoints.
     """
 
     def __init__(self, quant_config: "ModelOptQuantConfigBase"):
@@ -207,7 +205,7 @@ def get_quant_method(
             if getattr(quant_method, "backend", "") == "marlin":
                 quant_method.marlin_input_dtype = get_marlin_input_dtype(prefix)
             return quant_method
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             quant_method = self.FusedMoEMethodCls(
                 quant_config=self, moe_config=layer.moe_config
             )
@@ -249,7 +247,7 @@ def _extract_modelopt_quant_algo(
         """
         if hf_quant_cfg is None:
             return None
-        if hf_quant_cfg.get("quant_method", "").lower() != "modelopt":
+        if not hf_quant_cfg.get("quant_method", "").lower().startswith("modelopt"):
             return None
         if "quantization" in hf_quant_cfg:
             quant_config = hf_quant_cfg["quantization"]
@@ -410,7 +408,7 @@ def get_min_capability(cls) -> int:
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
         algo = cls._extract_modelopt_quant_algo(hf_quant_cfg)
         if algo is not None and algo == "FP8":
@@ -451,12 +449,8 @@ class ModelOptFp8LinearMethod(LinearMethodBase):
 
     def __init__(self, quant_config: ModelOptFp8Config) -> None:
         self.quant_config = quant_config
-        self.fp8_linear = init_fp8_linear_kernel(
-            activation_quant_key=kFp8StaticTensorSym,
-            weight_quant_key=kFp8StaticTensorSym,
-            out_dtype=torch.get_default_dtype(),
-            module_name=self.__class__.__name__,
-        )
+        self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
 
     def create_weights(
         self,
@@ -506,6 +500,15 @@ def create_weights(
             scale[:] = torch.finfo(torch.float32).min
             layer.register_parameter("input_scale", scale)
 
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=kFp8StaticTensorSym,
+            weight_quant_key=kFp8StaticTensorSym,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         weight = layer.weight
         max_w_scale = layer.weight_scale.max()
@@ -516,6 +519,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.weight = Parameter(weight.t(), requires_grad=False)
         layer.weight_scale = Parameter(max_w_scale, requires_grad=False)
         layer.input_scale = Parameter(layer.input_scale.max(), requires_grad=False)
+        self.fp8_linear.process_weights_after_loading(layer)
 
     def apply(
         self,
@@ -537,12 +541,8 @@ class ModelOptFp8PcPtLinearMethod(LinearMethodBase):
 
     def __init__(self, quant_config: ModelOptFp8Config) -> None:
         self.quant_config = quant_config
-        self.fp8_linear = init_fp8_linear_kernel(
-            activation_quant_key=kFp8DynamicTokenSym,
-            weight_quant_key=kFp8StaticTokenSym,
-            out_dtype=torch.get_default_dtype(),
-            module_name=self.__class__.__name__,
-        )
+        self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
 
     def create_weights(
         self,
@@ -588,9 +588,19 @@ def create_weights(
         weight_scale[:] = torch.finfo(torch.float32).min
         layer.register_parameter("weight_scale", weight_scale)
 
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=kFp8DynamicTokenSym,
+            weight_quant_key=kFp8StaticTokenSym,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.weight = Parameter(layer.weight.t(), requires_grad=False)
         layer.weight_scale = Parameter(layer.weight_scale.data, requires_grad=False)
+        self.fp8_linear.process_weights_after_loading(layer)
 
     def apply(
         self,
@@ -617,13 +627,17 @@ def __init__(self, quant_config: ModelOptFp8Config) -> None:
         self.quant_config = quant_config
         block_n, block_k = self._WEIGHT_BLOCK_SIZE
         self.weight_block_size = list(self._WEIGHT_BLOCK_SIZE)
-        self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp(
-            weight_group_shape=GroupShape(block_n, block_k),
-            act_quant_group_shape=GroupShape(1, block_k),
-            cutlass_block_fp8_supported=cutlass_block_fp8_supported(),
-            use_aiter_and_is_supported=False,
+
+        self.activation_quant_key = create_fp8_quant_key(
+            static=False, group_shape=GroupShape(1, block_k)
+        )
+        self.weight_quant_key = create_fp8_quant_key(
+            static=True, group_shape=GroupShape(block_n, block_k)
         )
 
+        self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
+
     def create_weights(
         self,
         layer: torch.nn.Module,
@@ -689,8 +703,17 @@ def create_weights(
         weight_scale[:] = torch.finfo(torch.float32).min
         layer.register_parameter("weight_scale", weight_scale)
 
+        self.w8a8_block_fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        # Keep weight in [out, in] layout for W8A8BlockFp8LinearOp.
+        # Keep weight in [out, in] layout for Fp8BlockScaledMMLinearKernel.
         layer.weight = Parameter(layer.weight.data, requires_grad=False)
 
         scale = layer.weight_scale
@@ -714,13 +737,7 @@ def apply(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        return self.w8a8_block_fp8_linear.apply(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            input_scale=None,
-            bias=bias,
-        )
+        return self.w8a8_block_fp8_linear.apply_weights(layer, x, bias)
 
 
 class ModelOptFp8MoEMethod(FusedMoEMethodBase):
@@ -759,7 +776,7 @@ def maybe_make_prepare_finalize(
     def select_gemm_impl(
         self,
         prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
     ) -> mk.FusedMoEExpertsModular:
         raise ValueError(
             f"{self.__class__.__name__} uses the new modular kernel initialization "
@@ -768,7 +785,7 @@ def select_gemm_impl(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -847,7 +864,7 @@ def create_weights(
 
     def _setup_kernel(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         w13: torch.Tensor,
         w2: torch.Tensor,
         w13_scale: torch.Tensor,
@@ -881,11 +898,10 @@ def _setup_kernel(
             moe_config=self.moe,
             fp8_backend=self.fp8_backend,
             experts_cls=self.experts_cls,
-            routing_tables=layer._maybe_init_expert_routing_tables(),
-            shared_experts=layer.shared_experts,
+            routing_tables=layer._expert_routing_tables(),
         )
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         w13 = layer.w13_weight
         w2 = layer.w2_weight
         w13_scale = layer.w13_weight_scale
@@ -916,7 +932,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             layer, w13, w2, w13_scale, w2_scale, w13_input_scale, w2_input_scale
         )
 
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+    def get_fused_moe_quant_config(self, layer: RoutedExperts) -> FusedMoEQuantConfig:
         w1_scale = layer.w13_weight_scale
         w2_scale = layer.w2_weight_scale
         a1_scale = layer.w13_input_scale
@@ -928,14 +944,16 @@ def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantCon
             w2_scale=w2_scale,
             a1_scale=a1_scale,
             a2_scale=a2_scale,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
         )
 
     def apply_monolithic(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         assert self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply_monolithic(
@@ -955,12 +973,13 @@ def apply_monolithic(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert not self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply(
@@ -973,13 +992,14 @@ def apply(
             global_num_experts=layer.global_num_experts,
             expert_map=layer.expert_map,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
 
 ModelOptFp8Config.LinearMethodCls = ModelOptFp8LinearMethod
 ModelOptFp8Config.FusedMoEMethodCls = ModelOptFp8MoEMethod
-ModelOptFp8Config.KVCacheMethodCls = ModelOptFp8KVCacheMethod
+ModelOptFp8Config.KVCacheMethodCls = ModelOptKVCacheMethod
 
 
 class ModelOptNvFp4Config(ModelOptQuantConfigBase):
@@ -987,22 +1007,41 @@ class ModelOptNvFp4Config(ModelOptQuantConfigBase):
 
     def __init__(
         self,
-        is_checkpoint_nvfp4_serialized: bool,
-        kv_cache_quant_algo: str | None,
-        exclude_modules: list[str],
+        quant_method: str = "NVFP4",
+        is_checkpoint_nvfp4_serialized: bool = False,
+        kv_cache_quant_algo: str | None = None,
+        exclude_modules: list[str] | None = None,
         group_size: int = 16,
     ) -> None:
+        if exclude_modules is None:
+            exclude_modules = []
         super().__init__(exclude_modules)
+        self.quant_method = quant_method
         self.is_checkpoint_nvfp4_serialized = is_checkpoint_nvfp4_serialized
         if is_checkpoint_nvfp4_serialized:
             logger.warning(
-                "Detected ModelOpt NVFP4 checkpoint. Please note that"
-                " the format is experimental and could change in future."
+                "Detected ModelOpt NVFP4 checkpoint (quant_algo=%s). Please "
+                "note that the format is experimental and could change in "
+                "future.",
+                quant_method,
             )
 
             self.group_size = group_size
             self.kv_cache_quant_algo = kv_cache_quant_algo
 
+        # Select LinearMethod implementation based on quant_algo (FP8 pattern).
+        # NVFP4         -> W4A4: cutlass NVFP4 GEMM with input quantization
+        # W4A16_NVFP4   -> W4A16: FP4 Marlin GEMM with bf16/fp16 activations
+        if quant_method == "NVFP4":
+            self.LinearMethodCls = ModelOptNvFp4LinearMethod
+        elif quant_method == "W4A16_NVFP4":
+            self.LinearMethodCls = ModelOptNvFp4W4A16LinearMethod
+        else:
+            raise ValueError(
+                f"Unsupported ModelOpt NVFP4 quant_algo: {quant_method}. "
+                "Supported: NVFP4 / W4A16_NVFP4."
+            )
+
     def get_name(self) -> QuantizationMethods:
         return "modelopt_fp4"
 
@@ -1015,7 +1054,7 @@ def get_min_capability(cls) -> int:
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
         algo = cls._extract_modelopt_quant_algo(hf_quant_cfg)
         if algo is not None and ("NVFP4" in algo or "FP4" in algo):
@@ -1053,6 +1092,7 @@ def _from_config(
                 )
 
         return cls(
+            quant_method,
             is_checkpoint_nvfp4_serialized,
             kv_cache_quant_method,
             exclude_modules,
@@ -1074,7 +1114,7 @@ class ModelOptNvFp4LinearMethod(LinearMethodBase):
     def __init__(self, quant_config: ModelOptNvFp4Config) -> None:
         self.quant_config = quant_config
         self.marlin_input_dtype = None
-        self.backend = select_nvfp4_linear_backend()
+        self.kernel = init_nvfp4_linear_kernel()
 
     def create_weights(
         self,
@@ -1151,10 +1191,23 @@ def create_weights(
         layer.register_parameter("weight_scale", weight_scale)
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        if (
+            torch.unique(layer.input_scale).numel() != 1
+            or torch.unique(layer.weight_scale_2).numel() != 1
+        ):
+            logger.warning_once(
+                "In NVFP4 linear, the global scale for input or weight are different"
+                " for parallel layers (e.g. q_proj, k_proj, v_proj). This "
+                " will likely results in reduce accuracy. Please verify the model"
+                " accuracy. Consider using a checkpoint with a shared global NVFP4"
+                " scale for parallel layers."
+            )
+
         # Rename ModelOpt checkpoint names to standardized names
         input_global_scale = layer.input_scale.max().to(torch.float32)
         layer.input_global_scale = Parameter(input_global_scale, requires_grad=False)
         del layer.input_scale
+
         weight_global_scale = layer.weight_scale_2.max().to(torch.float32)
         layer.weight_global_scale = Parameter(weight_global_scale, requires_grad=False)
         del layer.weight_scale_2
@@ -1168,7 +1221,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         )
 
         # Convert layer to NVFP4 linear kernel format
-        convert_to_nvfp4_linear_kernel_format(self.backend, layer)
+        self.kernel.process_weights_after_loading(layer)
 
     def apply(
         self,
@@ -1176,12 +1229,153 @@ def apply(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        return apply_nvfp4_linear(
-            backend=self.backend,
-            layer=layer,
-            x=x,
-            bias=bias,
+        return self.kernel.apply_weights(layer=layer, x=x, bias=bias)
+
+
+class ModelOptNvFp4W4A16LinearMethod(LinearMethodBase):
+    """Linear method for ModelOpt NVFP4 W4A16.
+
+    4-bit NVFP4 weights, fp16/bf16 activations. Loads ModelOpt-style names
+    directly (no on-disk conversion) and dispatches to the FP4 Marlin GEMM:
+
+        weight          uint8     packed NVFP4 (2 nibbles/byte along input dim)
+        weight_scale    fp8-e4m3  per 16-elem group along input dim
+        weight_scale_2  fp32      per-tensor global scale = amax / (6.0 * 448.0)
+
+    No activation quantization. Marlin expects the global scale in the same
+    form ModelOpt stores (amax/2688), so we rename weight_scale_2 ->
+    weight_global_scale **without reciprocation** -- the CT W4A16 path
+    reciprocates only because CT stores the inverse on disk.
+
+    We also register a placeholder input_scale parameter so that W4A4-shaped
+    checkpoints (which contain *_proj.input_scale tensors) can be loaded
+    under this method without the per-shard loader hitting a KeyError on
+    the merged-name lookup. The placeholder is discarded in
+    process_weights_after_loading -- its value is never used.
+    """
+
+    def __init__(self, quant_config: ModelOptNvFp4Config) -> None:
+        self.quant_config = quant_config
+        # Vestigial slot mirrored from ModelOptNvFp4LinearMethod: the parent
+        # config's get_quant_method only fills marlin_input_dtype when
+        # backend == "marlin"; we don't set that since we pin the kernel
+        # below, but we keep the attribute for shape parity.
+        self.marlin_input_dtype = None
+        # Direct-instantiate the Marlin NVFP4 adapter rather than going through
+        # init_nvfp4_linear_kernel(): the latter's priority list returns a
+        # cutlass W4A4 kernel as first-pick on this hardware, which would
+        # silently try to quantize activations (we have no input_scale). For
+        # W4A16 there is exactly one valid kernel, so we pin it.
+        self.kernel = MarlinNvFp4LinearKernel(NvFp4LinearLayerConfig())
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        del input_size, output_size
+        if not self.quant_config.is_checkpoint_nvfp4_serialized:
+            raise ValueError(
+                "W4A16_NVFP4 quantization was selected; "
+                "dynamic quantization is not supported."
+            )
+        output_size_per_partition = sum(output_partition_sizes)
+        weight_loader = extra_weight_attrs.get("weight_loader")
+        layer.logical_widths = output_partition_sizes
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_size_per_partition = output_size_per_partition
+
+        if input_size_per_partition % 16 != 0:
+            raise ValueError(
+                "Unsupported model: input feature size is not a multiple of 16."
+            )
+
+        # Packed NVFP4 weights: uint8, 2 nibbles per byte along the input dim.
+        weight = ModelWeightParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition // 2,
+                dtype=torch.uint8,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight", weight)
+
+        # Per-tensor global weight scale (fp32). ModelOpt stores
+        # amax / (NVFP4_max * fp8_e4m3_max) = amax / 2688. PerTensorScaleParameter
+        # holds one entry per fused output partition (e.g. q/k/v in a fused QKV).
+        weight_scale_2 = PerTensorScaleParameter(
+            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight_scale_2", weight_scale_2)
+
+        # Per-group fp8 weight scale.
+        weight_scale = GroupQuantScaleParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition // self.quant_config.group_size,
+                dtype=torch.float8_e4m3fn,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight_scale", weight_scale)
+
+        # Placeholder input_scale param so W4A4-shaped checkpoints can be
+        # loaded under this method without KeyError on the merged-name
+        # lookup (qwen2-style stacked-loader path renames *_proj.input_scale
+        # to e.g. qkv_proj.input_scale and looks it up unconditionally).
+        # Discarded in process_weights_after_loading; never read by the kernel.
+        # For native W4A16 checkpoints (no input_scale on disk) the param
+        # stays uninitialized and is simply deleted.
+        input_scale = PerTensorScaleParameter(
+            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("input_scale", input_scale)
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Discard the input_scale placeholder. Whether it carries values
+        # (W4A4 ckpt loaded as W4A16) or is uninitialized (native W4A16
+        # ckpt), W4A16 mode does not quantize activations, so this is unused.
+        if hasattr(layer, "input_scale"):
+            del layer.input_scale
+
+        if torch.unique(layer.weight_scale_2).numel() != 1:
+            logger.warning_once(
+                "In W4A16_NVFP4 linear, the global weight scale "
+                "(weight_scale_2) differs across fused parallel layers "
+                "(e.g. q/k/v_proj). This will likely reduce accuracy. "
+                "Consider a checkpoint with a shared global scale."
+            )
+
+        # Rename weight_scale_2 -> weight_global_scale. NO reciprocation:
+        # ModelOpt already stores amax/2688, which is exactly what Marlin
+        # consumes via nvfp4_marlin_process_global_scale (called inside the
+        # Marlin adapter's process_weights_after_loading).
+        layer.weight_global_scale = Parameter(
+            layer.weight_scale_2.max().to(torch.float32), requires_grad=False
         )
+        del layer.weight_scale_2
+
+        self.kernel.process_weights_after_loading(layer)
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.kernel.apply_weights(layer=layer, x=x, bias=bias)
 
 
 class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
@@ -1198,11 +1392,17 @@ def __init__(
     ) -> None:
         super().__init__(moe_config)
         self.quant_config = quant_config
-        # Select experts implementation.
+        # W4A16 mode fires for W4A16_NVFP4 on-disk checkpoints. With
+        # activation_key=None every W4A4 backend's _supports_quant_scheme
+        # rejects itself (they all require (kNvfp4Static, kNvfp4Dynamic)
+        # exactly); only Marlin survives. Marlin's MoE path drops
+        # activation scales in convert_to_nvfp4_moe_kernel_format, so no
+        # other change is needed.
+        self.use_a16 = quant_config.quant_method == "W4A16_NVFP4"
         self.nvfp4_backend, self.experts_cls = select_nvfp4_moe_backend(
             config=self.moe,
             weight_key=kNvfp4Static,
-            activation_key=kNvfp4Dynamic,
+            activation_key=None if self.use_a16 else kNvfp4Dynamic,
         )
 
         self.use_global_sf = is_global_sf_supported_for_nvfp4_backend(
@@ -1226,7 +1426,7 @@ def uses_weight_scale_2_pattern(self) -> bool:
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -1340,7 +1540,7 @@ def create_weights(
         )
         layer.register_parameter("w2_input_scale", w2_input_scale)
 
-    def process_weights_after_loading(self, layer: FusedMoE) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         """
         Convert NVFP4 MoE weights into kernel format and setup the kernel.
         """
@@ -1394,12 +1594,11 @@ def process_weights_after_loading(self, layer: FusedMoE) -> None:
             moe_quant_config=self.moe_quant_config,
             moe_config=self.moe,
             experts_cls=self.experts_cls,
-            shared_experts=layer.shared_experts,
-            routing_tables=layer._maybe_init_expert_routing_tables(),
+            routing_tables=layer._expert_routing_tables(),
         )
         self.moe_kernel.fused_experts.process_weights_after_loading(layer)
 
-    def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantConfig:
+    def get_fused_moe_quant_config(self, layer: RoutedExperts) -> FusedMoEQuantConfig:
         return make_nvfp4_moe_quant_config(
             backend=self.nvfp4_backend,
             w13_scale=layer.w13_weight_scale,
@@ -1408,6 +1607,7 @@ def get_fused_moe_quant_config(self, layer: torch.nn.Module) -> FusedMoEQuantCon
             w2_scale_2=layer.w2_weight_scale_2,
             a13_scale=layer.w13_input_scale,
             a2_scale=layer.w2_input_scale,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
         )
 
     @property
@@ -1416,10 +1616,11 @@ def supports_eplb(self) -> bool:
 
     def apply_monolithic(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         assert self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply_monolithic(
@@ -1439,12 +1640,13 @@ def apply_monolithic(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert not self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply(
@@ -1457,13 +1659,14 @@ def apply(
             global_num_experts=layer.global_num_experts,
             expert_map=layer.expert_map,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
 
 ModelOptNvFp4Config.LinearMethodCls = ModelOptNvFp4LinearMethod
 ModelOptNvFp4Config.FusedMoEMethodCls = ModelOptNvFp4FusedMoE
-ModelOptNvFp4Config.KVCacheMethodCls = ModelOptFp8KVCacheMethod
+ModelOptNvFp4Config.KVCacheMethodCls = ModelOptKVCacheMethod
 
 
 class ModelOptMxFp8Config(ModelOptQuantConfigBase):
@@ -1499,12 +1702,12 @@ def get_supported_act_dtypes(self) -> list[torch.dtype]:
 
     @classmethod
     def get_min_capability(cls) -> int:
-        # MXFP8 hardware acceleration requires Blackwell (SM100) or newer
-        return 100
+        # Marlin kernel supports MXFP8 on SM80+
+        return 80
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
         algo = cls._extract_modelopt_quant_algo(hf_quant_cfg)
         if algo is not None and "MXFP8" in algo:
@@ -1555,9 +1758,7 @@ def __init__(self, quant_config: ModelOptMxFp8Config) -> None:
                 "Dynamic quantization is not supported."
             )
 
-        self.backend: Mxfp8LinearBackend = Mxfp8LinearBackend.FLASHINFER_CUTLASS
-        self.mxfp8_linear_op = Mxfp8LinearOp(backend=self.backend)
-        logger.info_once("Using %s backend for MXFP8 GEMM", self.backend.value)
+        self.kernel = init_mxfp8_linear_kernel()
 
     def create_weights(
         self,
@@ -1615,36 +1816,6 @@ def create_weights(
         )
         layer.register_parameter("weight_scale", weight_scale)
 
-    def _process_weights_after_loading_scale_2d(self, layer: torch.nn.Module) -> None:
-        """Not swizzled - MXFP8 GEMM emulation"""
-        weight = layer.weight.data  # [N, K]
-        N, K = weight.shape
-        scale_k = K // MXFP8_BLOCK_SIZE
-
-        # Slice weight_scale to match weight dimensions (handles padding)
-        weight_scale = layer.weight_scale.data[:N, :scale_k].contiguous()
-
-        layer.weight = Parameter(weight.contiguous(), requires_grad=False)
-        layer.weight_scale = Parameter(weight_scale, requires_grad=False)
-
-    def _process_weights_after_loading_scale_1d(self, layer: torch.nn.Module) -> None:
-        """Swizzled - MXFP8 GEMM Flashinfer CUTLASS"""
-        weight = layer.weight.data  # [N, K]
-        N, K = weight.shape
-
-        # 2D weight scale
-        weight_scale = layer.weight_scale.data
-
-        # Swizzle the weight scales
-        scale_k = K // MXFP8_BLOCK_SIZE
-        weight_scale_2d = weight_scale[:N, :scale_k].contiguous()
-        weight_scale_swizzled = swizzle_mxfp8_scale(weight_scale_2d, M=N, K=K)
-
-        layer.weight = Parameter(weight.contiguous(), requires_grad=False)
-        layer.weight_scale = Parameter(
-            weight_scale_swizzled.contiguous(), requires_grad=False
-        )
-
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         # Validate weight tensor
         if layer.weight.ndim != 2:
@@ -1669,14 +1840,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             f" got {layer.weight_scale.dtype}"
         )
 
-        if self.backend == Mxfp8LinearBackend.EMULATION:
-            # Swizzled layout is not used
-            self._process_weights_after_loading_scale_2d(layer)
-            return
-
-        assert self.backend == Mxfp8LinearBackend.FLASHINFER_CUTLASS
-        # Swizzled layout is required for Flashinfer CUTLASS
-        self._process_weights_after_loading_scale_1d(layer)
+        self.kernel.process_weights_after_loading(layer)
 
     def apply(
         self,
@@ -1684,23 +1848,7 @@ def apply(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        if layer.weight.dtype != MXFP8_VALUE_DTYPE:
-            raise ValueError(
-                f"Weight dtype {layer.weight.dtype} != expected {MXFP8_VALUE_DTYPE}"
-            )
-        if layer.weight_scale.dtype != MXFP8_SCALE_DTYPE:
-            raise ValueError(
-                f"Weight scale dtype {layer.weight_scale.dtype} != "
-                f"expected {MXFP8_SCALE_DTYPE}"
-            )
-
-        return self.mxfp8_linear_op.apply(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            out_dtype=x.dtype,
-            bias=bias,
-        )
+        return self.kernel.apply_weights(layer, x, bias)
 
 
 class ModelOptMxFp8FusedMoE(FusedMoEMethodBase):
@@ -1719,15 +1867,15 @@ def __init__(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
         params_dtype: torch.dtype,
         **extra_weight_attrs,
     ):
-        layer.intermediate_size_per_partition = intermediate_size_per_partition
-        layer.hidden_size = hidden_size
+        assert layer.intermediate_size_per_partition == intermediate_size_per_partition
+        assert layer.hidden_size == hidden_size
         layer.orig_dtype = params_dtype
 
         if hidden_size % MXFP8_BLOCK_SIZE != 0:
@@ -1910,7 +2058,7 @@ def _shuffle_weights_for_trtllm(self, layer: torch.nn.Module) -> None:
             torch.stack(w2_scale_shuffled).contiguous(),
         )
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         if getattr(layer, "_already_called_process_weights_after_loading", False):
             return
 
@@ -1930,7 +2078,7 @@ def maybe_make_prepare_finalize(
     def select_gemm_impl(
         self,
         prepare_finalize: mk.FusedMoEPrepareAndFinalizeModular,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
     ) -> mk.FusedMoEExpertsModular:
         raise ValueError(
             f"{self.__class__.__name__} uses the new modular kernel initialization "
@@ -1938,7 +2086,7 @@ def select_gemm_impl(
         )
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
         # TRTLLM MXFP8 path is monolithic and does not use modular kernel config.
         return None
@@ -1949,10 +2097,11 @@ def is_monolithic(self) -> bool:
 
     def apply_monolithic(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         from flashinfer.fused_moe.core import (
             ActivationType,
             Fp8QuantizationType,
@@ -1960,7 +2109,7 @@ def apply_monolithic(
 
         assert self.mxfp8_backend == Fp8MoeBackend.FLASHINFER_TRTLLM
 
-        if layer.enable_eplb:
+        if layer.eplb_state is not None:
             raise NotImplementedError(
                 "EPLB is not supported for FlashInfer TRTLLM MXFP8 MoE backend."
             )
@@ -2032,12 +2181,13 @@ def apply_monolithic(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert not self.is_monolithic
         raise NotImplementedError(
             "Non-monolithic MXFP8 MoE path is not yet implemented."
@@ -2047,7 +2197,7 @@ def apply(
 # Register the method classes for ModelOptMxFp8Config
 ModelOptMxFp8Config.LinearMethodCls = ModelOptMxFp8LinearMethod
 ModelOptMxFp8Config.FusedMoEMethodCls = ModelOptMxFp8FusedMoE
-ModelOptMxFp8Config.KVCacheMethodCls = ModelOptFp8KVCacheMethod
+ModelOptMxFp8Config.KVCacheMethodCls = ModelOptKVCacheMethod
 
 
 class ModelOptMixedPrecisionConfig(ModelOptQuantConfigBase):
@@ -2067,12 +2217,14 @@ def __init__(
         quantized_layers: dict[str, dict[str, Any]],
         fp8_config: ModelOptFp8Config,
         nvfp4_config: ModelOptNvFp4Config,
+        w4a16_nvfp4_config: ModelOptNvFp4Config,
     ) -> None:
         super().__init__(exclude_modules)
         self.kv_cache_quant_method = kv_cache_quant_method
         self.quantized_layers = quantized_layers
         self.fp8_config = fp8_config
         self.nvfp4_config = nvfp4_config
+        self.w4a16_nvfp4_config = w4a16_nvfp4_config
 
     def get_name(self) -> QuantizationMethods:
         return "modelopt_mixed"
@@ -2086,7 +2238,7 @@ def get_min_capability(cls) -> int:
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
         algo = cls._extract_modelopt_quant_algo(hf_quant_cfg)
         if algo is not None and algo == "MIXED_PRECISION":
@@ -2117,10 +2269,15 @@ def _from_config(
                 "'quantized_layers' mapping in the quantization config."
             )
 
-        # Determine group_size from the first NVFP4 entry if not provided.
+        # Determine group_size from the first NVFP4-family entry if not
+        # provided. Both NVFP4 (W4A4) and W4A16_NVFP4 share the same packing
+        # + group-size convention; either entry resolves the value.
         if group_size is None:
             for layer_info in quantized_layers.values():
-                if layer_info.get("quant_algo", "").upper() == "NVFP4":
+                if layer_info.get("quant_algo", "").upper() in (
+                    "NVFP4",
+                    "W4A16_NVFP4",
+                ):
                     group_size = layer_info.get("group_size", 16)
                     break
         if group_size is None:
@@ -2138,6 +2295,18 @@ def _from_config(
             exclude_modules=[],
             group_size=group_size,
         )
+        # Sibling config for layers that declare quant_algo: "W4A16_NVFP4".
+        # ModelOptNvFp4Config.__init__ keys LinearMethodCls off quant_method,
+        # so this instance auto-selects ModelOptNvFp4W4A16LinearMethod. The
+        # MoE side reads quant_config.quant_method == "W4A16_NVFP4" to set
+        # use_a16 → Marlin backend in ModelOptNvFp4FusedMoE.__init__.
+        w4a16_nvfp4_config = ModelOptNvFp4Config(
+            quant_method="W4A16_NVFP4",
+            is_checkpoint_nvfp4_serialized=True,
+            kv_cache_quant_algo=kv_cache_quant_method,
+            exclude_modules=[],
+            group_size=group_size,
+        )
 
         return cls(
             kv_cache_quant_method=kv_cache_quant_method,
@@ -2145,6 +2314,7 @@ def _from_config(
             quantized_layers=quantized_layers,
             fp8_config=fp8_config,
             nvfp4_config=nvfp4_config,
+            w4a16_nvfp4_config=w4a16_nvfp4_config,
         )
 
     def _resolve_quant_algo(self, prefix: str) -> str | None:
@@ -2153,25 +2323,29 @@ def _resolve_quant_algo(self, prefix: str) -> str | None:
         Tries three strategies in order:
         1. Direct lookup in ``quantized_layers``.
         2. Packed/fused-layer lookup (unfuse via ``packed_modules_mapping``).
-        3. Prefix-based lookup for FusedMoE (any child key starts with
+        3. Prefix-based lookup for RoutedExperts (any child key starts with
            ``prefix + "."``).
 
         Returns the upper-cased quant_algo string, or *None* if the prefix
         is not found.
         """
         # 1. Direct lookup
-        if prefix in self.quantized_layers:
-            return self.quantized_layers[prefix]["quant_algo"].upper()
+        for candidate in self._quantized_layer_prefix_candidates(prefix):
+            if candidate in self.quantized_layers:
+                return self.quantized_layers[candidate]["quant_algo"].upper()
 
         # 2. Packed / fused layer lookup
         proj_name = prefix.rsplit(".", 1)[-1]
         if self.packed_modules_mapping and proj_name in self.packed_modules_mapping:
             algos: set[str] = set()
             base = prefix.rsplit(".", 1)[0]
-            for shard_name in self.packed_modules_mapping[proj_name]:
-                shard_prefix = f"{base}.{shard_name}"
-                if shard_prefix in self.quantized_layers:
-                    algos.add(self.quantized_layers[shard_prefix]["quant_algo"].upper())
+            for base_candidate in self._quantized_layer_prefix_candidates(base):
+                for shard_name in self.packed_modules_mapping[proj_name]:
+                    shard_prefix = f"{base_candidate}.{shard_name}"
+                    if shard_prefix in self.quantized_layers:
+                        algos.add(
+                            self.quantized_layers[shard_prefix]["quant_algo"].upper()
+                        )
             if len(algos) == 1:
                 return algos.pop()
             if len(algos) > 1:
@@ -2180,14 +2354,33 @@ def _resolve_quant_algo(self, prefix: str) -> str | None:
                     f"{algos}. All shards must use the same quantization."
                 )
 
-        # 3. Prefix-based lookup (for FusedMoE / parent modules)
-        prefix_dot = prefix + "."
-        for key, info in self.quantized_layers.items():
-            if key.startswith(prefix_dot):
-                return info["quant_algo"].upper()
+        # 3. Prefix-based lookup (for RoutedExperts / parent modules)
+        for candidate in self._quantized_layer_prefix_candidates(prefix):
+            prefix_dot = candidate + "."
+            for key, info in self.quantized_layers.items():
+                if key.startswith(prefix_dot):
+                    return info["quant_algo"].upper()
 
         return None
 
+    @staticmethod
+    def _quantized_layer_prefix_candidates(prefix: str) -> tuple[str, ...]:
+        candidates = [prefix]
+
+        if prefix.endswith(".lm_head"):
+            candidates.append("lm_head")
+
+        if prefix.startswith("language_model.model."):
+            candidates.append(
+                "model.language_model." + prefix[len("language_model.model.") :]
+            )
+        elif prefix.startswith("model.language_model."):
+            candidates.append(
+                "language_model.model." + prefix[len("model.language_model.") :]
+            )
+
+        return tuple(dict.fromkeys(candidates))
+
     def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> "QuantizeMethodBase | None":
@@ -2195,7 +2388,7 @@ def get_quant_method(
         # KV-cache quantization
         if isinstance(layer, Attention):
             if self.kv_cache_quant_method:
-                return ModelOptFp8KVCacheMethod(self)
+                return ModelOptKVCacheMethod(self)
             return None
 
         # Excluded layers
@@ -2211,10 +2404,12 @@ def get_quant_method(
                 return ModelOptFp8LinearMethod(self.fp8_config)
             if quant_algo == "NVFP4":
                 return ModelOptNvFp4LinearMethod(self.nvfp4_config)
+            if quant_algo == "W4A16_NVFP4":
+                return ModelOptNvFp4W4A16LinearMethod(self.w4a16_nvfp4_config)
             # Layer not in quantized_layers — leave unquantized
             return UnquantizedLinearMethod()
 
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             if quant_algo == "FP8":
                 return ModelOptFp8MoEMethod(
                     quant_config=self.fp8_config,
@@ -2225,6 +2420,11 @@ def get_quant_method(
                     quant_config=self.nvfp4_config,
                     moe_config=layer.moe_config,
                 )
+            if quant_algo == "W4A16_NVFP4":
+                return ModelOptNvFp4FusedMoE(
+                    quant_config=self.w4a16_nvfp4_config,
+                    moe_config=layer.moe_config,
+                )
             return None
 
         return None
diff --git a/vllm/model_executor/layers/quantization/moe_wna16.py b/vllm/model_executor/layers/quantization/moe_wna16.py
index f5c679840432..d6729bbbcb7e 100644
--- a/vllm/model_executor/layers/quantization/moe_wna16.py
+++ b/vllm/model_executor/layers/quantization/moe_wna16.py
@@ -6,18 +6,19 @@
 import torch
 
 from vllm.distributed import get_tensor_model_parallel_rank, get_tp_group
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoEConfig,
+    FusedMoEMethodBase,
+    FusedMoeWeightScaleSupported,
+    RoutedExperts,
+    SharedExperts,
+)
 from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEQuantConfig,
     int4_w4a16_moe_quant_config,
     int8_w8a16_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.layer import (
-    FusedMoE,
-    FusedMoEConfig,
-    FusedMoEMethodBase,
-    FusedMoeWeightScaleSupported,
-)
 from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
     UnquantizedFusedMoEMethod,
 )
@@ -59,10 +60,9 @@ def __init__(
         # Avoid circular import
         from vllm.model_executor.layers.quantization.awq import AWQConfig
         from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig
-        from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
 
         if self.linear_quant_method == "gptq":
-            self.use_marlin = GPTQMarlinConfig.is_gptq_marlin_compatible(full_config)
+            pass
         elif self.linear_quant_method in ("awq", "awq_marlin"):
             capability_tuple = current_platform.get_device_capability()
             device_capability = (
@@ -130,7 +130,7 @@ def from_config(cls, config: dict[str, Any]) -> "MoeWNA16Config":
 
     @classmethod
     def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
+        cls, hf_quant_cfg, user_quant, hf_config=None
     ) -> QuantizationMethods | None:
         can_convert = cls.is_moe_wna16_compatible(hf_quant_cfg)
         if can_convert and user_quant == "moe_wna16":
@@ -166,29 +166,23 @@ def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> "QuantizeMethodBase | None":
         if is_layer_skipped_quant(prefix, self.modules_to_not_convert):
-            if isinstance(layer, FusedMoE):
+            if isinstance(layer, RoutedExperts):
                 return UnquantizedFusedMoEMethod(layer.moe_config)
             return UnquantizedLinearMethod()
         elif isinstance(layer, LinearBase):
             # Avoid circular import
+            from vllm.model_executor.layers.quantization.auto_gptq import (
+                AutoGPTQConfig,
+            )
             from vllm.model_executor.layers.quantization.awq import AWQConfig
             from vllm.model_executor.layers.quantization.awq_marlin import (
                 AWQMarlinConfig,
             )
-            from vllm.model_executor.layers.quantization.gptq import GPTQConfig
-            from vllm.model_executor.layers.quantization.gptq_marlin import (
-                GPTQMarlinConfig,
-            )
 
             if self.linear_quant_method == "gptq":
-                if self.use_marlin:
-                    return GPTQMarlinConfig.from_config(
-                        self.full_config
-                    ).get_quant_method(layer, prefix)
-                else:
-                    return GPTQConfig.from_config(self.full_config).get_quant_method(
-                        layer, prefix
-                    )
+                return AutoGPTQConfig.from_config(self.full_config).get_quant_method(
+                    layer, prefix
+                )
             elif self.linear_quant_method in ("awq", "awq_marlin"):
                 if self.use_marlin and check_marlin_supports_layer(
                     layer, self.group_size
@@ -202,7 +196,7 @@ def get_quant_method(
                     )
             else:
                 raise ValueError("moe_wna16 only support gptq and awq.")
-        elif isinstance(layer, FusedMoE):
+        elif isinstance(layer, RoutedExperts):
             return MoeWNA16Method(self, layer.moe_config)
         return None
 
@@ -224,7 +218,7 @@ def __init__(self, quant_config: MoeWNA16Config, moe: "FusedMoEConfig") -> None:
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -343,7 +337,7 @@ def create_weights(
                 set_weight_attrs(param, extra_weight_attrs)
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
         weight_bits = self.quant_config.weight_bits
         has_zp = self.quant_config.has_zp
@@ -364,12 +358,13 @@ def get_fused_moe_quant_config(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         from vllm.model_executor.layers.fused_moe import fused_experts
 
         assert layer.activation == MoEActivation.SILU, (
diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py
index c69e99a68126..2c69fc74530d 100644
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -7,22 +7,23 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
     FusedMoEConfig,
     FusedMoEMethodBase,
-)
-from vllm.model_executor.layers.fused_moe import modular_kernel as mk
-from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEParallelConfig,
     FusedMoEQuantConfig,
+    RoutedExperts,
+    SharedExperts,
 )
+from vllm.model_executor.layers.fused_moe import modular_kernel as mk
 from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
     TRITON_BACKENDS,
     Mxfp4MoeBackend,
-    convert_to_mxfp4_moe_kernel_format,
+    convert_gpt_oss_weight_to_mxfp4_moe_kernel_format,
+    convert_weight_to_mxfp4_moe_kernel_format,
     make_mxfp4_moe_kernel,
     make_mxfp4_moe_quant_config,
     mxfp4_round_up_hidden_size_and_intermediate_size,
+    select_deepseek_v4_mxfp4_moe_backend,
     select_mxfp4_moe_backend,
 )
 from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
@@ -38,6 +39,12 @@
 
 
 class Mxfp4Config(QuantizationConfig):
+    """Canonical base config for MXFP4 quantization.
+
+    Subclasses override get_name() and override_quantization_method() to
+    register themselves as the handler for a specific checkpoint format.
+    """
+
     def __init__(self, ignored_layers: list[str] | None = None):
         super().__init__()
         self.ignored_layers = ignored_layers
@@ -62,6 +69,8 @@ def get_supported_act_dtypes(cls) -> list[torch.dtype]:
     def get_config_filenames(cls) -> list[str]:
         return []
 
+    # TODO (zyongye) This is only temporaty fallback.
+    # We should have `Mxfp4MoEMethod` after this migration is complete.
     def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> "QuantizeMethodBase | None":
@@ -75,16 +84,14 @@ def get_quant_method(
             logger.debug_once(
                 "MXFP4 linear layer is not implemented - falling back to "
                 "UnquantizedLinearMethod.",
-                scope="local",
             )
             return UnquantizedLinearMethod()
-        elif isinstance(layer, FusedMoE):
-            return Mxfp4MoEMethod(layer.moe_config)
+        elif isinstance(layer, RoutedExperts):
+            return GptOssMxfp4MoEMethod(layer.moe_config)
         elif isinstance(layer, Attention):
             logger.debug_once(
                 "MXFP4 attention layer is not implemented. "
                 "Skipping quantization for this layer.",
-                scope="local",
             )
         return None
 
@@ -93,13 +100,380 @@ def is_mxfp4_quant(self, prefix: str, layer: torch.nn.Module) -> bool:
         return True
 
 
+class GptOssMxfp4Config(Mxfp4Config):
+    """MXFP4 config for GPT-OSS checkpoints.
+
+    Checkpoints carry ``"quant_method": "mxfp4"`` in their JSON config.
+    override_quantization_method() maps that to the canonical internal name
+    so that the rest of the loading path uses "gpt_oss_mxfp4" consistently.
+    """
+
+    @classmethod
+    def get_name(cls) -> QuantizationMethods:
+        return "gpt_oss_mxfp4"
+
+    @classmethod
+    def override_quantization_method(
+        cls, hf_quant_cfg, user_quant, hf_config=None
+    ) -> QuantizationMethods | None:
+        # Match both "mxfp4" (original checkpoint value) and "gpt_oss_mxfp4"
+        # (already normalized by verify_and_update_model_config) so that
+        # explicit --quantization mxfp4 from the user doesn't cause a mismatch.
+        if not (
+            isinstance(hf_quant_cfg, dict)
+            and hf_quant_cfg.get("quant_method") in ("mxfp4", "gpt_oss_mxfp4")
+        ):
+            return None
+        # Require explicit confirmation that this is a GPT-OSS model.
+        # Do NOT fall back to returning the override when hf_config is None,
+        # as that would silently claim all mxfp4 checkpoints.
+        model_type = getattr(hf_config, "model_type", None)
+        if model_type != "gpt_oss":
+            return None
+        return "gpt_oss_mxfp4"
+
+
+class GptOssMxfp4MoEMethod(FusedMoEMethodBase):
+    """MXFP4 MoE quantization method."""
+
+    def __init__(self, moe: FusedMoEConfig):
+        super().__init__(moe)
+        self.weight_dtype = "gpt_oss_mxfp4"
+        self.mxfp4_backend, self.experts_cls = select_mxfp4_moe_backend(moe)
+
+        self.max_capture_size = (
+            get_current_vllm_config().compilation_config.max_cudagraph_capture_size
+        )
+
+        self._cache_permute_indices: dict[torch.Size, torch.Tensor] = {}
+        self.moe_kernel: mk.FusedMoEKernel | None = None
+
+        # Used for triton kernel precision configs
+        self.w13_precision_config = None
+        self.w2_precision_config = None
+
+    @property
+    def skip_forward_padding(self) -> bool:
+        # SM100_FI_MXFP4_MXFP8_TRTLLM supports padding with mxfp8 quant
+        # so can skip the padding in the forward before applying the moe method
+        return self.mxfp4_backend == Mxfp4MoeBackend.FLASHINFER_TRTLLM_MXFP4_MXFP8
+
+    def maybe_roundup_sizes(
+        self,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        act_dtype: torch.dtype,
+        moe_parallel_config: FusedMoEParallelConfig,
+    ) -> tuple[int, int]:
+        hidden_size, intermediate_size_per_partition = super().maybe_roundup_sizes(
+            hidden_size=hidden_size,
+            intermediate_size_per_partition=intermediate_size_per_partition,
+            act_dtype=act_dtype,
+            moe_parallel_config=moe_parallel_config,
+        )
+        return mxfp4_round_up_hidden_size_and_intermediate_size(
+            self.mxfp4_backend, hidden_size, intermediate_size_per_partition
+        )
+
+    def create_weights(
+        self,
+        layer: RoutedExperts,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        self.num_experts = num_experts
+        weight_dtype = torch.uint8
+        scale_dtype = torch.uint8
+        mxfp4_block = 32
+
+        layer.params_dtype = params_dtype
+        layer.num_experts = num_experts
+        self.intermediate_size = intermediate_size_per_partition
+        self.hidden_size = hidden_size
+
+        # Fused gate_up_proj (column parallel)
+        w13_weight = torch.nn.Parameter(
+            torch.zeros(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size // 2,
+                dtype=weight_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w13_weight_scale = torch.nn.Parameter(
+            torch.zeros(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size // mxfp4_block,
+                dtype=scale_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+        w13_weight_scale.quant_method = "block"
+
+        # down_proj (row parallel)
+        w2_weight = torch.nn.Parameter(
+            torch.zeros(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // 2,
+                dtype=weight_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        w2_weight_scale = torch.nn.Parameter(
+            torch.zeros(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // mxfp4_block,
+                dtype=scale_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+        w2_weight_scale.quant_method = "block"
+
+        if self.moe.has_bias:
+            w13_bias = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    2 * intermediate_size_per_partition,
+                    dtype=torch.bfloat16,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_bias", w13_bias)
+            set_weight_attrs(w13_bias, extra_weight_attrs)
+
+            w2_bias = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    hidden_size,
+                    dtype=torch.bfloat16,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_bias", w2_bias)
+            set_weight_attrs(w2_bias, extra_weight_attrs)
+
+    def _setup_kernel(
+        self,
+        layer: RoutedExperts,
+        w13: torch.Tensor,
+        w2: torch.Tensor,
+        w13_scale: torch.Tensor,
+        w2_scale: torch.Tensor,
+        w13_bias: torch.Tensor | None = None,
+        w2_bias: torch.Tensor | None = None,
+    ) -> None:
+        num_experts = self.num_experts
+        intermediate_size = self.intermediate_size
+        hidden_size = self.hidden_size
+        sf_block_size = 32
+
+        # Shape assertions
+        assert (
+            w13.dim() == 3
+            and w13.shape[0] == num_experts
+            and w13.shape[1] == intermediate_size * 2
+            and w13.shape[2] == hidden_size // 2
+        )
+        assert (
+            w13_scale.dim() == 3
+            and w13_scale.shape[0] == num_experts
+            and w13_scale.shape[1] == intermediate_size * 2
+            and w13_scale.shape[2] == hidden_size // sf_block_size
+        )
+        assert (
+            w2.dim() == 3
+            and w2.shape[0] == num_experts
+            and w2.shape[1] == hidden_size
+            and w2.shape[2] == intermediate_size // 2
+        )
+        assert (
+            w2_scale.dim() == 3
+            and w2_scale.shape[1] == hidden_size
+            and w2_scale.shape[2] == intermediate_size // sf_block_size
+        )
+        if w13_bias is not None:
+            assert (
+                w13_bias.dim() == 2
+                and w13_bias.shape[0] == num_experts
+                and w13_bias.shape[1] == intermediate_size * 2
+            )
+        if w2_bias is not None:
+            assert (
+                w2_bias.dim() == 2
+                and w2_bias.shape[0] == num_experts
+                and w2_bias.shape[1] == hidden_size
+            )
+
+        # Convert weights to kernel format
+        w13, w2, w13_scale, w2_scale, w13_bias, w2_bias = (
+            convert_gpt_oss_weight_to_mxfp4_moe_kernel_format(
+                mxfp4_backend=self.mxfp4_backend,
+                layer=layer,
+                w13_weight=w13,
+                w2_weight=w2,
+                w13_weight_scale=w13_scale,
+                w2_weight_scale=w2_scale,
+                w13_bias=w13_bias,
+                w2_bias=w2_bias,
+                _cache_permute_indices=self._cache_permute_indices,
+            )
+        )
+
+        # For TRITON backends, weights are wrapped tensors from triton_kernels
+        # that don't support .detach(). Manually assign parameters.
+        if self.mxfp4_backend not in TRITON_BACKENDS:
+            replace_parameter(layer, "w13_weight", w13)
+            replace_parameter(layer, "w2_weight", w2)
+            replace_parameter(layer, "w13_weight_scale", w13_scale)
+            replace_parameter(layer, "w2_weight_scale", w2_scale)
+        else:
+            layer.w13_weight = w13
+            layer.w2_weight = w2
+            self.w13_precision_config = w13_scale
+            self.w2_precision_config = w2_scale
+
+        # AITER backend requires weights to be marked as shuffled.
+        if self.mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_BF16:
+            layer.w13_weight.is_shuffled = True
+            layer.w2_weight.is_shuffled = True
+
+        if w13_bias is not None and w2_bias is not None:
+            replace_parameter(layer, "w13_bias", w13_bias)
+            replace_parameter(layer, "w2_bias", w2_bias)
+
+        # Build quant config
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+
+        # Build kernel (modular or monolithic)
+        if self.moe_quant_config is not None and self.experts_cls is not None:
+            self.moe_kernel = make_mxfp4_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                mxfp4_backend=self.mxfp4_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+                layer=layer,
+            )
+
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        w13 = layer.w13_weight
+        w2 = layer.w2_weight
+        w13_scale = layer.w13_weight_scale
+        w2_scale = layer.w2_weight_scale
+        w13_bias = getattr(layer, "w13_bias", None)
+        w2_bias = getattr(layer, "w2_bias", None)
+
+        if self.mxfp4_backend == Mxfp4MoeBackend.NONE:
+            return
+
+        self._setup_kernel(layer, w13, w2, w13_scale, w2_scale, w13_bias, w2_bias)
+
+    def get_fused_moe_quant_config(
+        self, layer: RoutedExperts
+    ) -> FusedMoEQuantConfig | None:
+        w1_scale = layer.w13_weight_scale
+        w2_scale = layer.w2_weight_scale
+        w1_bias = getattr(layer, "w13_bias", None)
+        w2_bias = getattr(layer, "w2_bias", None)
+
+        if self.mxfp4_backend in TRITON_BACKENDS:
+            assert self.w13_precision_config is not None
+            assert self.w2_precision_config is not None
+            w1_scale = self.w13_precision_config
+            w2_scale = self.w2_precision_config
+
+        return make_mxfp4_moe_quant_config(
+            mxfp4_backend=self.mxfp4_backend,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            w1_bias=w1_bias,
+            w2_bias=w2_bias,
+            gemm1_alpha=1.702,
+            gemm1_beta=1.0,
+            swiglu_limit=7.0,
+            layer=layer,
+        )
+
+    def select_gemm_impl(
+        self,
+        prepare_finalize: mk.FusedMoEPrepareAndFinalize,
+        layer: RoutedExperts,
+    ) -> mk.FusedMoEExpertsModular:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel "
+            "initialization logic. This function should not be called."
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            expert_map=layer.expert_map,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            router_logits=router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+        )
+
+
 class Mxfp4MoEMethod(FusedMoEMethodBase):
     """MXFP4 MoE quantization method."""
 
     def __init__(self, moe: FusedMoEConfig):
         super().__init__(moe)
         self.weight_dtype = "mxfp4"
-        self.mxfp4_backend, self.experts_cls = select_mxfp4_moe_backend(moe)
+        self.mxfp4_backend, self.experts_cls = select_deepseek_v4_mxfp4_moe_backend(moe)
 
         self.max_capture_size = (
             get_current_vllm_config().compilation_config.max_cudagraph_capture_size
@@ -137,7 +511,7 @@ def maybe_roundup_sizes(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -178,6 +552,7 @@ def create_weights(
         )
         layer.register_parameter("w13_weight_scale", w13_weight_scale)
         set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+        w13_weight_scale.quant_method = "block"
 
         # down_proj (row parallel)
         w2_weight = torch.nn.Parameter(
@@ -203,6 +578,7 @@ def create_weights(
         )
         layer.register_parameter("w2_weight_scale", w2_weight_scale)
         set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+        w2_weight_scale.quant_method = "block"
 
         if self.moe.has_bias:
             w13_bias = torch.nn.Parameter(
@@ -229,7 +605,7 @@ def create_weights(
 
     def _setup_kernel(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         w13: torch.Tensor,
         w2: torch.Tensor,
         w13_scale: torch.Tensor,
@@ -281,7 +657,7 @@ def _setup_kernel(
 
         # Convert weights to kernel format
         w13, w2, w13_scale, w2_scale, w13_bias, w2_bias = (
-            convert_to_mxfp4_moe_kernel_format(
+            convert_weight_to_mxfp4_moe_kernel_format(
                 mxfp4_backend=self.mxfp4_backend,
                 layer=layer,
                 w13_weight=w13,
@@ -307,6 +683,11 @@ def _setup_kernel(
             self.w13_precision_config = w13_scale
             self.w2_precision_config = w2_scale
 
+        # AITER backend requires weights to be marked as shuffled.
+        if self.mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_BF16:
+            layer.w13_weight.is_shuffled = True
+            layer.w2_weight.is_shuffled = True
+
         if w13_bias is not None and w2_bias is not None:
             replace_parameter(layer, "w13_bias", w13_bias)
             replace_parameter(layer, "w2_bias", w2_bias)
@@ -321,8 +702,8 @@ def _setup_kernel(
                 moe_config=self.moe,
                 mxfp4_backend=self.mxfp4_backend,
                 experts_cls=self.experts_cls,
-                routing_tables=layer._maybe_init_expert_routing_tables(),
-                shared_experts=layer.shared_experts,
+                routing_tables=layer._expert_routing_tables(),
+                layer=layer,
             )
 
     def process_weights_after_loading(self, layer):
@@ -339,12 +720,14 @@ def process_weights_after_loading(self, layer):
         self._setup_kernel(layer, w13, w2, w13_scale, w2_scale, w13_bias, w2_bias)
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self,
+        layer: RoutedExperts,
     ) -> FusedMoEQuantConfig | None:
         w1_scale = layer.w13_weight_scale
         w2_scale = layer.w2_weight_scale
         w1_bias = getattr(layer, "w13_bias", None)
         w2_bias = getattr(layer, "w2_bias", None)
+        swiglu_limit = getattr(layer, "swiglu_limit", None)
 
         if self.mxfp4_backend in TRITON_BACKENDS:
             assert self.w13_precision_config is not None
@@ -358,12 +741,14 @@ def get_fused_moe_quant_config(
             w2_scale=w2_scale,
             w1_bias=w1_bias,
             w2_bias=w2_bias,
+            swiglu_limit=swiglu_limit,
+            layer=layer,
         )
 
     def select_gemm_impl(
         self,
         prepare_finalize: mk.FusedMoEPrepareAndFinalize,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
     ) -> mk.FusedMoEExpertsModular:
         raise ValueError(
             f"{self.__class__.__name__} uses the new modular kernel "
@@ -372,12 +757,13 @@ def select_gemm_impl(
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         assert not self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply(
@@ -390,15 +776,17 @@ def apply(
             global_num_experts=layer.global_num_experts,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
             expert_map=layer.expert_map,
+            shared_experts=shared_experts,
             shared_experts_input=shared_experts_input,
         )
 
     def apply_monolithic(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         router_logits: torch.Tensor,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
         assert self.is_monolithic
         assert self.moe_kernel is not None
         return self.moe_kernel.apply_monolithic(
diff --git a/vllm/model_executor/layers/quantization/mxfp8.py b/vllm/model_executor/layers/quantization/mxfp8.py
deleted file mode 100644
index bd29f272bd10..000000000000
--- a/vllm/model_executor/layers/quantization/mxfp8.py
+++ /dev/null
@@ -1,356 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-"""Online MXFP8 (microscaling FP8, block-32) quantization config and methods."""
-
-from typing import Any
-
-import torch
-from torch.nn import Module
-
-from vllm.logger import init_logger
-from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
-    FusedMoEMethodBase,
-)
-from vllm.model_executor.layers.fused_moe.layer import UnquantizedFusedMoEMethod
-from vllm.model_executor.layers.fused_moe.oracle.mxfp8 import (
-    select_mxfp8_moe_backend,
-)
-from vllm.model_executor.layers.linear import (
-    LinearBase,
-    UnquantizedLinearMethod,
-)
-from vllm.model_executor.layers.quantization import QuantizationMethods
-from vllm.model_executor.layers.quantization.base_config import (
-    QuantizeMethodBase,
-)
-from vllm.model_executor.layers.quantization.fp8 import (
-    Fp8Config,
-    Fp8KVCacheMethod,
-    Fp8OnlineLinearMethod,
-    Fp8OnlineMoEMethod,
-    _copy_missing_attrs,
-)
-from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
-    MXFP8_BLOCK_SIZE,
-    Mxfp8LinearBackend,
-    Mxfp8LinearOp,
-    mxfp8_e4m3_quantize,
-    swizzle_mxfp8_scale,
-)
-from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    is_layer_skipped,
-)
-from vllm.model_executor.model_loader.weight_utils import (
-    initialize_single_dummy_weight,
-)
-from vllm.model_executor.parameter import ModelWeightParameter
-from vllm.model_executor.utils import replace_parameter, set_weight_attrs
-from vllm.platforms import current_platform
-
-logger = init_logger(__name__)
-
-
-class Mxfp8Config(Fp8Config):
-    """Config class for online MXFP8 MoE quantization."""
-
-    def __init__(
-        self,
-        activation_scheme: str = "dynamic",
-        ignored_layers: list[str] | None = None,
-    ) -> None:
-        if activation_scheme != "dynamic":
-            raise ValueError("mxfp8 only supports dynamic activation scheme.")
-        super().__init__(
-            is_checkpoint_fp8_serialized=False,
-            activation_scheme=activation_scheme,
-            ignored_layers=ignored_layers,
-            weight_block_size=None,
-        )
-
-    @classmethod
-    def get_name(cls) -> QuantizationMethods:
-        return "mxfp8"
-
-    @classmethod
-    def get_min_capability(cls) -> int:
-        return 100
-
-    @classmethod
-    def from_config(cls, config: dict[str, Any]) -> "Mxfp8Config":
-        activation_scheme = cls.get_from_keys_or(
-            config, ["activation_scheme"], "dynamic"
-        )
-        ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None)
-        if not ignored_layers:
-            ignored_layers = cls.get_from_keys_or(
-                config, ["modules_to_not_convert"], None
-            )
-        return cls(
-            activation_scheme=activation_scheme,
-            ignored_layers=ignored_layers,
-        )
-
-    def get_quant_method(
-        self, layer: torch.nn.Module, prefix: str
-    ) -> "QuantizeMethodBase | None":
-        if isinstance(layer, LinearBase):
-            if is_layer_skipped(
-                prefix=prefix,
-                ignored_layers=self.ignored_layers,
-                fused_mapping=self.packed_modules_mapping,
-                skip_with_substr=True,
-            ):
-                return UnquantizedLinearMethod()
-            return Mxfp8OnlineLinearMethod(self)
-        elif isinstance(layer, FusedMoE):
-            if is_layer_skipped(
-                prefix=prefix,
-                ignored_layers=self.ignored_layers,
-                fused_mapping=self.packed_modules_mapping,
-                skip_with_substr=True,
-            ):
-                return UnquantizedFusedMoEMethod(layer.moe_config)
-            return Mxfp8OnlineMoEMethod(self, layer)
-        elif isinstance(layer, Attention):
-            return Fp8KVCacheMethod(self)
-        return None
-
-
-class Mxfp8OnlineLinearMethod(Fp8OnlineLinearMethod):
-    """Online MXFP8 linear method.
-    Loads bf16/fp16 checkpoints and quantizes weights to MXFP8 (microscaling
-    FP8 with block-32 scales) during weight loading.
-
-    Args:
-        quant_config: The MXFP8 quantization config.
-    """
-
-    uses_meta_device: bool = True
-
-    def __init__(self, quant_config: "Mxfp8Config"):
-        self.quant_config = quant_config
-        self.out_dtype = torch.get_default_dtype()
-        self.mxfp8_linear = Mxfp8LinearOp(self._select_backend())
-        logger.info_once(
-            "Using %s backend for MXFP8 GEMM", self.mxfp8_linear.backend.value
-        )
-
-    @staticmethod
-    def _select_backend() -> Mxfp8LinearBackend:
-        try:
-            from vllm.utils import flashinfer as fi
-
-            _ = fi.mm_mxfp8
-            return Mxfp8LinearBackend.FLASHINFER_CUTLASS
-        except Exception:
-            logger.warning(
-                "FlashInfer mm_mxfp8 not available, "
-                "falling back to MXFP8 emulation backend."
-            )
-            return Mxfp8LinearBackend.EMULATION
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        input_size_per_partition: int,
-        output_partition_sizes: list[int],
-        input_size: int,
-        output_size: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        if input_size_per_partition % MXFP8_BLOCK_SIZE != 0:
-            raise ValueError(
-                f"MXFP8 requires input_size_per_partition "
-                f"({input_size_per_partition}) to be divisible by "
-                f"{MXFP8_BLOCK_SIZE}."
-            )
-
-        super().create_weights(
-            layer,
-            input_size_per_partition,
-            output_partition_sizes,
-            input_size,
-            output_size,
-            params_dtype,
-            **extra_weight_attrs,
-        )
-
-    def process_weights_after_loading(self, layer: Module) -> None:
-        if getattr(layer, "_already_called_process_weights_after_loading", False):
-            return
-
-        if layer.weight.device == torch.device("meta"):
-            weight = ModelWeightParameter(
-                data=torch.empty_like(layer.weight, device=layer._load_device),
-                input_dim=1,
-                output_dim=0,
-                weight_loader=layer.weight.weight_loader,
-            )
-            _copy_missing_attrs(layer.weight, weight)
-            layer.register_parameter("weight", weight)
-            initialize_single_dummy_weight(layer.weight)
-
-        weight_fp8, weight_scale = mxfp8_e4m3_quantize(layer.weight.contiguous())
-
-        if self.mxfp8_linear.backend == Mxfp8LinearBackend.FLASHINFER_CUTLASS:
-            N, K = layer.weight.shape[0], layer.weight.shape[1]
-            weight_scale = swizzle_mxfp8_scale(weight_scale, N, K)
-
-        layer.input_scale = None
-        replace_parameter(layer, "weight", weight_fp8.data)
-        replace_parameter(layer, "weight_scale", weight_scale.data)
-
-        layer._already_called_process_weights_after_loading = True
-
-    def apply(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        return self.mxfp8_linear.apply(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            out_dtype=self.out_dtype,
-            bias=bias,
-        )
-
-
-class Mxfp8OnlineMoEMethod(Fp8OnlineMoEMethod):
-    """MoE method for online MXFP8 (block) quantization."""
-
-    uses_meta_device: bool = True
-
-    def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
-        FusedMoEMethodBase.__init__(self, layer.moe_config)
-        self.quant_config = quant_config
-        assert not quant_config.is_checkpoint_fp8_serialized
-        assert quant_config.activation_scheme == "dynamic"
-
-        self.weight_block_size = [1, MXFP8_BLOCK_SIZE]
-        self.block_quant = True
-        self.weight_scale_name = "weight_scale"
-
-        self.fp8_backend, self.experts_cls = select_mxfp8_moe_backend(config=self.moe)
-
-    def create_weights(
-        self,
-        layer: Module,
-        num_experts: int,
-        hidden_size: int,
-        intermediate_size_per_partition: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        if (
-            hidden_size % MXFP8_BLOCK_SIZE != 0
-            or intermediate_size_per_partition % MXFP8_BLOCK_SIZE != 0
-        ):
-            raise ValueError(
-                "Online MXFP8 MoE requires hidden/intermediate sizes divisible "
-                f"by {MXFP8_BLOCK_SIZE}."
-            )
-
-        super().create_weights(
-            layer=layer,
-            num_experts=num_experts,
-            hidden_size=hidden_size,
-            intermediate_size_per_partition=intermediate_size_per_partition,
-            params_dtype=params_dtype,
-            **extra_weight_attrs,
-        )
-
-        w13_weight_scale = torch.nn.Parameter(
-            torch.zeros(
-                num_experts,
-                2 * intermediate_size_per_partition,
-                hidden_size // MXFP8_BLOCK_SIZE,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        w2_weight_scale = torch.nn.Parameter(
-            torch.zeros(
-                num_experts,
-                hidden_size,
-                intermediate_size_per_partition // MXFP8_BLOCK_SIZE,
-                dtype=torch.uint8,
-            ),
-            requires_grad=False,
-        )
-        layer.register_parameter("w13_weight_scale", w13_weight_scale)
-        layer.register_parameter("w2_weight_scale", w2_weight_scale)
-        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
-        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
-        layer.weight_block_size = [1, MXFP8_BLOCK_SIZE]
-
-    def _quantize_mxfp8_moe_weight(
-        self, weight: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """Batch quantization: bf16/fp16 weights -> MXFP8 (fp8 + uint8 scales)."""
-        num_batches = weight.size(0)
-        w_quant = []
-        w_scales = []
-        for i in range(num_batches):
-            mx_fp8_quant, mx_fp8_scale = mxfp8_e4m3_quantize(
-                weight[i], is_sf_swizzled_layout=False
-            )
-            w_quant.append(mx_fp8_quant)
-            w_scales.append(mx_fp8_scale)
-
-        return torch.stack(w_quant), torch.stack(w_scales)
-
-    def process_weights_after_loading(self, layer: Module) -> None:
-        if getattr(layer, "_already_called_process_weights_after_loading", False):
-            return
-
-        if layer.w13_weight.device == torch.device("meta"):
-            w13_weight = torch.nn.Parameter(
-                torch.empty_like(layer.w13_weight, device=layer._load_device),
-                requires_grad=False,
-            )
-            set_weight_attrs(
-                w13_weight, {"weight_loader": layer.w13_weight.weight_loader}
-            )
-            _copy_missing_attrs(layer.w13_weight, w13_weight)
-            layer.register_parameter("w13_weight", w13_weight)
-            initialize_single_dummy_weight(layer.w13_weight)
-        if layer.w2_weight.device == torch.device("meta"):
-            w2_weight = torch.nn.Parameter(
-                torch.empty_like(layer.w2_weight, device=layer._load_device),
-                requires_grad=False,
-            )
-            set_weight_attrs(
-                w2_weight, {"weight_loader": layer.w2_weight.weight_loader}
-            )
-            _copy_missing_attrs(layer.w2_weight, w2_weight)
-            layer.register_parameter("w2_weight", w2_weight)
-            initialize_single_dummy_weight(layer.w2_weight)
-
-        fp8_dtype = current_platform.fp8_dtype()
-        w13 = torch.empty_like(layer.w13_weight, dtype=fp8_dtype)
-        w2 = torch.empty_like(layer.w2_weight, dtype=fp8_dtype)
-        w13_scale = layer.w13_weight_scale
-        w2_scale = layer.w2_weight_scale
-        layer.w13_input_scale = None
-        layer.w2_input_scale = None
-
-        w13, w13_scale = self._quantize_mxfp8_moe_weight(layer.w13_weight)
-        w2, w2_scale = self._quantize_mxfp8_moe_weight(layer.w2_weight)
-
-        self._setup_kernel(
-            layer,
-            w13,
-            w2,
-            w13_scale,
-            w2_scale,
-            layer.w13_input_scale,
-            layer.w2_input_scale,
-        )
-
-        layer._already_called_process_weights_after_loading = True
diff --git a/vllm/model_executor/layers/quantization/online/__init__.py b/vllm/model_executor/layers/quantization/online/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/model_executor/layers/quantization/online/base.py b/vllm/model_executor/layers/quantization/online/base.py
new file mode 100644
index 000000000000..bf166b181827
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/base.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any
+
+import torch
+
+from vllm.config.quantization import QuantizationConfigArgs, QuantSpec
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import (
+    RoutedExperts,
+)
+from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
+    UnquantizedFusedMoEMethod,
+)
+from vllm.model_executor.layers.linear import (
+    LinearBase,
+    UnquantizedLinearMethod,
+)
+from vllm.model_executor.layers.quantization import QuantizationMethods
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig,
+    QuantizeMethodBase,
+)
+from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+    should_ignore_layer,
+)
+from vllm.model_executor.layers.quantization.online.fp8 import (
+    Fp8PerBlockOnlineLinearMethod,
+    Fp8PerBlockOnlineMoEMethod,
+    Fp8PerTensorOnlineLinearMethod,
+    Fp8PerTensorOnlineMoEMethod,
+)
+from vllm.model_executor.layers.quantization.online.int8 import (
+    Int8OnlineMoEMethod,
+)
+from vllm.model_executor.layers.quantization.online.mxfp8 import (
+    Mxfp8OnlineLinearMethod,
+    Mxfp8OnlineMoEMethod,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    QuantKey,
+    kFp8Static128BlockSym,
+    kFp8StaticTensorSym,
+    kInt8StaticChannelSym,
+    kMxfp8Dynamic,
+)
+
+logger = init_logger(__name__)
+
+
+# Online dispatch tables, keyed by the QuantSpec.weight QuantKey. The
+# corresponding method class handles the activation choice via its
+# `supported_activation_quant` set.
+_ONLINE_LINEAR_METHODS: dict[QuantKey, type] = {
+    kFp8StaticTensorSym: Fp8PerTensorOnlineLinearMethod,
+    kFp8Static128BlockSym: Fp8PerBlockOnlineLinearMethod,
+    kMxfp8Dynamic: Mxfp8OnlineLinearMethod,
+}
+
+_ONLINE_MOE_METHODS: dict[QuantKey, type] = {
+    kFp8StaticTensorSym: Fp8PerTensorOnlineMoEMethod,
+    kFp8Static128BlockSym: Fp8PerBlockOnlineMoEMethod,
+    kMxfp8Dynamic: Mxfp8OnlineMoEMethod,
+    kInt8StaticChannelSym: Int8OnlineMoEMethod,
+}
+
+
+class OnlineQuantizationConfig(QuantizationConfig):
+    """Model-level config for online quantization (quantize fp16/bf16 weights
+    during model loading, without requiring a pre-quantized checkpoint)."""
+
+    def __init__(
+        self,
+        args: QuantizationConfigArgs,
+    ) -> None:
+        super().__init__()
+        if args.linear is None and args.moe is None:
+            raise ValueError(
+                "OnlineQuantizationConfig requires at least one of "
+                "quantization_config.linear or quantization_config.moe "
+                "to be set."
+            )
+        self.args = args
+        self.ignored_layers: list[str] = args.ignore
+
+    @classmethod
+    def get_name(cls) -> QuantizationMethods:
+        return "online"
+
+    @classmethod
+    def get_supported_act_dtypes(cls) -> list[torch.dtype]:
+        return [torch.bfloat16, torch.half]
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        # Note: as more online quant schemes will be added, this
+        # value will become the minimum across all supported schemes.
+        return 75
+
+    @classmethod
+    def get_config_filenames(cls) -> list[str]:
+        return []
+
+    @classmethod
+    def from_config(cls, config: dict[str, Any]) -> "OnlineQuantizationConfig":
+        raise NotImplementedError(
+            "OnlineQuantizationConfig does not support loading from a "
+            "checkpoint config. Use quantization_config or "
+            "quantization='fp8_per_tensor'/'fp8_per_block' instead."
+        )
+
+    def _dispatch(
+        self,
+        spec: QuantSpec | None,
+        table: dict[QuantKey, type],
+        layer: torch.nn.Module,
+    ) -> "QuantizeMethodBase | None":
+        if spec is None or spec.weight is None:
+            return None
+        cls = table.get(spec.weight)
+        if cls is None:
+            raise ValueError(
+                f"online quantization for {type(layer).__name__} with "
+                f"weight={spec.weight} is not supported; supported weight "
+                f"keys: {sorted(str(k) for k in table)}"
+            )
+        # Online method classes pick their own activation format internally.
+        # Per-class activation overrides are not yet wired through; reject
+        # explicit overrides until the relevant method class opts in.
+        if spec.activation is not None:
+            raise ValueError(
+                f"activation override (activation={spec.activation}) is not "
+                f"yet supported for online {cls.__name__}"
+            )
+        if isinstance(layer, RoutedExperts):
+            return cls(layer=layer)
+        return cls()
+
+    def get_quant_method(
+        self, layer: torch.nn.Module, prefix: str
+    ) -> "QuantizeMethodBase | None":
+        if isinstance(layer, LinearBase):
+            if should_ignore_layer(
+                prefix,
+                ignore=self.ignored_layers,
+                fused_mapping=self.packed_modules_mapping,
+            ):
+                return UnquantizedLinearMethod()
+            method = self._dispatch(self.args.linear, _ONLINE_LINEAR_METHODS, layer)
+            return method if method is not None else UnquantizedLinearMethod()
+        elif isinstance(layer, RoutedExperts):
+            if should_ignore_layer(
+                prefix,
+                ignore=self.ignored_layers,
+                fused_mapping=self.packed_modules_mapping,
+            ):
+                return UnquantizedFusedMoEMethod(layer.moe_config)
+            method = self._dispatch(self.args.moe, _ONLINE_MOE_METHODS, layer)
+            return (
+                method
+                if method is not None
+                else UnquantizedFusedMoEMethod(layer.moe_config)
+            )
+        return None
diff --git a/vllm/model_executor/layers/quantization/online/fp8.py b/vllm/model_executor/layers/quantization/online/fp8.py
new file mode 100644
index 000000000000..5e14eade264d
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/fp8.py
@@ -0,0 +1,514 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import TYPE_CHECKING
+
+import torch
+from torch.nn import Module
+
+if TYPE_CHECKING:
+    import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+    from vllm.model_executor.layers.fused_moe.config import (
+        FusedMoEQuantConfig,
+    )
+    from vllm.model_executor.layers.fused_moe.oracle.fp8 import Fp8MoeBackend
+
+import vllm.envs as envs
+from vllm import _custom_ops as ops
+from vllm.config import get_current_vllm_config
+from vllm.model_executor.kernels.linear import init_fp8_linear_kernel
+from vllm.model_executor.kernels.linear.scaled_mm import (
+    CutlassFP8ScaledMMLinearKernel,
+)
+from vllm.model_executor.layers.fused_moe import RoutedExperts
+from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+    select_fp8_moe_backend,
+)
+from vllm.model_executor.layers.linear import (
+    LinearMethodBase,
+)
+from vllm.model_executor.layers.quantization.online.moe_base import (
+    OnlineMoEMethodBase,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+    create_fp8_quant_key,
+    kFp8Dynamic128Sym,
+    kFp8DynamicTensorSym,
+    kFp8DynamicTokenSym,
+    kFp8Static128BlockSym,
+    kFp8StaticTensorSym,
+)
+from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
+    cutlass_fp8_supported,
+)
+from vllm.model_executor.model_loader.reload.layerwise import (
+    initialize_online_processing,
+)
+from vllm.model_executor.parameter import ModelWeightParameter
+from vllm.model_executor.utils import replace_parameter
+from vllm.platforms import current_platform
+from vllm.utils.deep_gemm import per_block_cast_to_fp8
+
+# ---------------------------------------------------------------------------
+# Online FP8 Linear Methods
+# ---------------------------------------------------------------------------
+
+
+class _Fp8OnlineLinearBase(LinearMethodBase):
+    """Shared base for online FP8 linear methods. Loads fp16/bf16 checkpoint
+    weights onto meta device and materializes them just-in-time."""
+
+    uses_meta_device: bool = True
+
+    def __init__(self):
+        self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        output_size_per_partition = sum(output_partition_sizes)
+        weight_loader = extra_weight_attrs.get("weight_loader")
+        layer.logical_widths = output_partition_sizes
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_size_per_partition = output_size_per_partition
+        layer.orig_dtype = params_dtype
+        layer.weight_block_size = None
+
+        weight = ModelWeightParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition,
+                device="meta",  # materialized and processed during loading
+                dtype=params_dtype,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight", weight)
+
+        initialize_online_processing(layer)
+
+
+class Fp8PerTensorOnlineLinearMethod(_Fp8OnlineLinearBase):
+    """Online tensorwise FP8 linear quantization.
+    Loads fp16/bf16 weights and quantizes them per-tensor during loading."""
+
+    def __init__(self):
+        super().__init__()
+
+        self.weight_quant_key = kFp8StaticTensorSym
+        # Use per-token quantization for better perf if dynamic and cutlass
+        if cutlass_fp8_supported():
+            self.activation_quant_key = kFp8DynamicTokenSym
+        else:
+            self.activation_quant_key = kFp8DynamicTensorSym
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        super().create_weights(
+            layer,
+            input_size_per_partition,
+            output_partition_sizes,
+            input_size,
+            output_size,
+            params_dtype,
+            **extra_weight_attrs,
+        )
+
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        layer.input_scale = None
+        qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None)
+
+        # Update layer with new values.
+        replace_parameter(layer, "weight", qweight.t().data)
+        replace_parameter(layer, "weight_scale", weight_scale.data)
+
+        self.fp8_linear.process_weights_after_loading(layer)
+
+        # Prevent duplicate processing (e.g., during weight reload)
+        layer._already_called_process_weights_after_loading = True
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        # if batch invariant mode is enabled, use BF16 dequant
+        if envs.VLLM_BATCH_INVARIANT:
+            if isinstance(self.fp8_linear, CutlassFP8ScaledMMLinearKernel):
+                return self.fp8_linear.apply_weights(layer, x, bias)
+
+            weight_fp8 = layer.weight.to(torch.bfloat16)
+            weight_scale = layer.weight_scale.to(torch.bfloat16)
+            if weight_scale.numel() == 1:
+                # Per-tensor: simple scalar multiplication
+                weight_bf16 = weight_fp8 * weight_scale
+            else:
+                # Multiple scales (fused modules like QKV)
+                if (
+                    weight_scale.dim() == 1
+                    and weight_scale.shape[0] == weight_fp8.shape[0]
+                ):
+                    # Per-row scaling
+                    weight_bf16 = weight_fp8 * weight_scale.unsqueeze(1)
+                else:
+                    # Fallback
+                    weight_bf16 = weight_fp8 * weight_scale
+            return torch.nn.functional.linear(x, weight_bf16.t(), bias)
+
+        return self.fp8_linear.apply_weights(layer, x, bias)
+
+
+class Fp8PerBlockOnlineLinearMethod(_Fp8OnlineLinearBase):
+    """Online blockwise FP8 linear quantization.
+    Loads fp16/bf16 weights and quantizes them per-block during loading."""
+
+    def __init__(self):
+        super().__init__()
+        self.weight_block_size = [128, 128]
+        self.activation_quant_key = create_fp8_quant_key(
+            static=False,
+            group_shape=GroupShape(1, self.weight_block_size[0]),
+        )
+        self.weight_quant_key = create_fp8_quant_key(
+            static=True, group_shape=GroupShape(*self.weight_block_size)
+        )
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        super().create_weights(
+            layer,
+            input_size_per_partition,
+            output_partition_sizes,
+            input_size,
+            output_size,
+            params_dtype,
+            **extra_weight_attrs,
+        )
+        layer.weight_block_size = self.weight_block_size
+
+        self.fp8_linear = init_fp8_linear_kernel(
+            activation_quant_key=self.activation_quant_key,
+            weight_quant_key=self.weight_quant_key,
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
+            module_name=self.__class__.__name__,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        layer.input_scale = None
+        block_size = self.weight_block_size
+
+        qweight, weight_scale_inv = per_block_cast_to_fp8(
+            layer.weight, block_size=block_size, use_ue8m0=False
+        )
+
+        replace_parameter(layer, "weight", qweight.data)
+        replace_parameter(layer, "weight_scale_inv", weight_scale_inv.data)
+
+        self.fp8_linear.process_weights_after_loading(layer)
+
+        # Prevent duplicate processing (e.g., during weight reload)
+        layer._already_called_process_weights_after_loading = True
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.weight_block_size is not None
+
+        # Note: batch invariance already handled in the function below
+        return self.fp8_linear.apply_weights(
+            layer,
+            x,
+            bias,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Online FP8 MoE Methods
+# ---------------------------------------------------------------------------
+
+
+class _Fp8OnlineMoEBase(OnlineMoEMethodBase):
+    """Shared base for online FP8 MoE methods. Loads fp16/bf16 checkpoint
+    weights onto meta device and materializes them just-in-time."""
+
+    # Declared here for mypy; actual values are set in __init__.
+    fp8_backend: "Fp8MoeBackend"
+    experts_cls: "type[mk.FusedMoEExperts] | None"
+    weight_scale_name: str
+    weight_block_size: list[int] | None
+
+    def __init__(
+        self,
+        *,
+        weight_block_size: list[int] | None,
+        layer: torch.nn.Module,
+    ):
+        super().__init__(layer.moe_config)
+        self.weight_block_size = weight_block_size
+        self.block_quant: bool = self.weight_block_size is not None
+        self.weight_scale_name = (
+            "weight_scale_inv" if self.block_quant else "weight_scale"
+        )
+
+        # Set weight key and activation key for kernel compatibility
+        if self.block_quant:
+            weight_key = kFp8Static128BlockSym
+            activation_key = kFp8Dynamic128Sym
+        else:
+            weight_key = kFp8StaticTensorSym
+            activation_key = kFp8DynamicTensorSym
+
+        # Select Fp8 MoE backend
+        self.fp8_backend, self.experts_cls = select_fp8_moe_backend(
+            config=self.moe,
+            weight_key=weight_key,
+            activation_key=activation_key,
+            allow_vllm_cutlass=False,
+        )
+
+    def _setup_kernel(
+        self,
+        layer: RoutedExperts,
+        w13: torch.Tensor,
+        w2: torch.Tensor,
+        w13_scale: torch.Tensor,
+        w2_scale: torch.Tensor,
+        w13_input_scale: torch.Tensor | None,
+        w2_input_scale: torch.Tensor | None,
+    ) -> None:
+        from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+            convert_to_fp8_moe_kernel_format,
+            make_fp8_moe_kernel,
+        )
+
+        # Shuffle weights to runtime format.
+        w13, w2, w13_scale, w2_scale = convert_to_fp8_moe_kernel_format(
+            fp8_backend=self.fp8_backend,
+            layer=layer,
+            w13=w13,
+            w2=w2,
+            w13_scale=w13_scale,
+            w2_scale=w2_scale,
+            w13_input_scale=w13_input_scale,
+            w2_input_scale=w2_input_scale,
+        )
+
+        # Replace parameters with updated versions. Note that this helper
+        # function ensures the replacement is compatible with RL weight reloads.
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, f"w13_{self.weight_scale_name}", w13_scale)
+        replace_parameter(layer, f"w2_{self.weight_scale_name}", w2_scale)
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_fp8_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                fp8_backend=self.fp8_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> "FusedMoEQuantConfig":
+        from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+            make_fp8_moe_quant_config,
+        )
+
+        w1_scale = getattr(layer, f"w13_{self.weight_scale_name}")
+        w2_scale = getattr(layer, f"w2_{self.weight_scale_name}")
+        a1_scale = layer.w13_input_scale
+        a2_scale = layer.w2_input_scale
+
+        quant_config = make_fp8_moe_quant_config(
+            fp8_backend=self.fp8_backend,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            a1_scale=a1_scale,
+            a2_scale=a2_scale,
+            block_shape=self.weight_block_size,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
+        )
+
+        self._maybe_inject_biases(quant_config, layer)
+        return quant_config
+
+
+class Fp8PerTensorOnlineMoEMethod(_Fp8OnlineMoEBase):
+    """Online tensorwise FP8 MoE quantization.
+    Loads fp16/bf16 weights and quantizes them per-tensor during loading."""
+
+    def __init__(
+        self,
+        *,
+        layer: torch.nn.Module,
+    ):
+        super().__init__(
+            weight_block_size=None,
+            layer=layer,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        # TODO(@ksayers): inplace fp8 quant kernel, initialize scales with ones
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        # If checkpoint is fp16, quantize in place.
+        fp8_dtype = current_platform.fp8_dtype()
+        w13 = torch.empty_like(layer.w13_weight, dtype=fp8_dtype)
+        w2 = torch.empty_like(layer.w2_weight, dtype=fp8_dtype)
+        w13_scale = torch.ones(
+            layer.num_experts, device=w13.device, dtype=torch.float32
+        )
+        w2_scale = torch.ones(layer.num_experts, device=w2.device, dtype=torch.float32)
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+        for expert in range(layer.local_num_experts):
+            w13[expert, :, :], w13_scale[expert] = ops.scaled_fp8_quant(
+                layer.w13_weight[expert, :, :]
+            )
+            w2[expert, :, :], w2_scale[expert] = ops.scaled_fp8_quant(
+                layer.w2_weight[expert, :, :]
+            )
+
+        # Shuffle weights to runtime format and setup kernel.
+        self._setup_kernel(
+            layer,
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            w13_input_scale=layer.w13_input_scale,
+            w2_input_scale=layer.w2_input_scale,
+        )
+
+        # Prevent duplicate processing (e.g., during weight reload)
+        layer._already_called_process_weights_after_loading = True
+
+
+class Fp8PerBlockOnlineMoEMethod(_Fp8OnlineMoEBase):
+    """Online blockwise FP8 MoE quantization.
+    Loads fp16/bf16 weights and quantizes them per-block during loading."""
+
+    def __init__(
+        self,
+        *,
+        layer: torch.nn.Module,
+    ):
+        super().__init__(
+            weight_block_size=[128, 128],
+            layer=layer,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        fp8_dtype = current_platform.fp8_dtype()
+        w13 = torch.empty_like(layer.w13_weight, dtype=fp8_dtype)
+        w2 = torch.empty_like(layer.w2_weight, dtype=fp8_dtype)
+
+        block_size = self.weight_block_size
+        assert block_size is not None
+        block_n, block_k = block_size
+
+        # Create block-shaped scales (computed here rather than in
+        # create_weights because online quant doesn't need them until now).
+        num_experts = layer.local_num_experts
+        _, w13_out, w13_in = layer.w13_weight.shape
+        _, w2_out, w2_in = layer.w2_weight.shape
+
+        w13_scale = torch.ones(
+            num_experts,
+            (w13_out + block_n - 1) // block_n,
+            (w13_in + block_k - 1) // block_k,
+            dtype=torch.float32,
+            device=w13.device,
+        )
+        w2_scale = torch.ones(
+            num_experts,
+            (w2_out + block_n - 1) // block_n,
+            (w2_in + block_k - 1) // block_k,
+            dtype=torch.float32,
+            device=w2.device,
+        )
+
+        for expert in range(num_experts):
+            w13[expert], w13_scale[expert] = per_block_cast_to_fp8(
+                layer.w13_weight[expert],
+                block_size=block_size,
+                use_ue8m0=False,
+            )
+            w2[expert], w2_scale[expert] = per_block_cast_to_fp8(
+                layer.w2_weight[expert],
+                block_size=block_size,
+                use_ue8m0=False,
+            )
+
+        layer.weight_block_size = block_size
+
+        # Shuffle weights to runtime format and setup kernel.
+        self._setup_kernel(
+            layer,
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            layer.w13_input_scale,
+            layer.w2_input_scale,
+        )
+
+        # Prevent duplicate processing (e.g., during weight reload)
+        layer._already_called_process_weights_after_loading = True
diff --git a/vllm/model_executor/layers/quantization/online/int8.py b/vllm/model_executor/layers/quantization/online/int8.py
new file mode 100644
index 000000000000..fc9c18e66c75
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/int8.py
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import TYPE_CHECKING
+
+import torch
+from torch.nn import Module
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.fused_moe.config import (
+        FusedMoEQuantConfig,
+    )
+
+from vllm.model_executor.layers.fused_moe import RoutedExperts
+from vllm.model_executor.layers.fused_moe.oracle.int8 import (
+    make_int8_moe_kernel,
+    make_int8_moe_quant_config,
+    select_int8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.online.moe_base import (
+    OnlineMoEMethodBase,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kInt8DynamicTokenSym,
+    kInt8StaticChannelSym,
+)
+from vllm.model_executor.utils import replace_parameter
+
+
+class Int8OnlineMoEMethod(OnlineMoEMethodBase):
+    """Online per-channel INT8 MoE quantization.
+    Loads fp16/bf16 weights and quantizes them per-row to int8 during loading.
+    """
+
+    def __init__(
+        self,
+        *,
+        layer: torch.nn.Module,
+    ):
+        super().__init__(layer.moe_config)
+        self.int8_backend, self.experts_cls = select_int8_moe_backend(
+            config=self.moe,
+            weight_key=kInt8StaticChannelSym,
+            activation_key=kInt8DynamicTokenSym,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        self._quantize_weights(layer)
+        self._setup_kernel(layer)
+
+        layer._already_called_process_weights_after_loading = True
+
+    def _quantize_weights(self, layer: Module) -> None:
+        vmax = torch.iinfo(torch.int8).max
+
+        w13 = torch.empty_like(layer.w13_weight, dtype=torch.int8)
+        w2 = torch.empty_like(layer.w2_weight, dtype=torch.int8)
+        w13_scale = torch.zeros(
+            layer.num_experts,
+            layer.w13_weight.shape[1],
+            device=w13.device,
+            dtype=torch.float32,
+        )
+        w2_scale = torch.zeros(
+            layer.num_experts,
+            layer.w2_weight.shape[1],
+            device=w2.device,
+            dtype=torch.float32,
+        )
+
+        for expert in range(layer.local_num_experts):
+            # w13: per-row quantization over hidden_size dim
+            w = layer.w13_weight[expert, :, :]
+            scales = w.abs().amax(dim=1) / vmax
+            q = w.div(scales.unsqueeze(1)).round().clamp(-vmax, vmax)
+            w13[expert, :, :] = q.to(torch.int8)
+            w13_scale[expert, :] = scales
+
+            # w2: per-row quantization over intermediate_size dim
+            w = layer.w2_weight[expert, :, :]
+            scales = w.abs().amax(dim=1) / vmax
+            q = w.div(scales.unsqueeze(1)).round().clamp(-vmax, vmax)
+            w2[expert, :, :] = q.to(torch.int8)
+            w2_scale[expert, :] = scales
+
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, "w13_scale", w13_scale)
+        replace_parameter(layer, "w2_scale", w2_scale)
+
+    def _setup_kernel(self, layer: RoutedExperts) -> None:
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        assert self.moe_quant_config is not None
+        assert self.experts_cls is not None
+        self.moe_kernel = make_int8_moe_kernel(
+            moe_quant_config=self.moe_quant_config,
+            moe_config=self.moe,
+            experts_cls=self.experts_cls,
+            routing_tables=layer._expert_routing_tables(),
+        )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> "FusedMoEQuantConfig | None":
+        quant_config = make_int8_moe_quant_config(
+            w1_scale=layer.w13_scale,
+            w2_scale=layer.w2_scale,
+        )
+        self._maybe_inject_biases(quant_config, layer)
+        return quant_config
diff --git a/vllm/model_executor/layers/quantization/online/moe_base.py b/vllm/model_executor/layers/quantization/online/moe_base.py
new file mode 100644
index 000000000000..32eb81601bea
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/moe_base.py
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import abstractmethod
+
+import torch
+
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoEMethodBase,
+    FusedMoEQuantConfig,
+    RoutedExperts,
+    SharedExperts,
+)
+from vllm.model_executor.model_loader.reload.layerwise import (
+    initialize_online_processing,
+)
+from vllm.model_executor.utils import set_weight_attrs
+
+
+class OnlineMoEMethodBase(FusedMoEMethodBase):
+    """Base for MoE methods that load full-precision weights on meta device
+    and quantize them after loading via the QeRL layerwise processing system.
+    """
+
+    uses_meta_device: bool = True
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.orig_dtype = params_dtype
+        layer.weight_block_size = None
+
+        # Fused gate_up_proj (column parallel) — full precision on meta device
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size,
+                device="meta",
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        # down_proj (row parallel) — full precision on meta device
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition,
+                device="meta",
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # BIASES (for models like GPT-OSS that have biased MoE)
+        if self.moe.has_bias:
+            w13_bias = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    2 * intermediate_size_per_partition,
+                    device="meta",
+                    dtype=layer.orig_dtype,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_bias", w13_bias)
+            set_weight_attrs(w13_bias, extra_weight_attrs)
+
+            w2_bias = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    hidden_size,
+                    device="meta",
+                    dtype=layer.orig_dtype,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_bias", w2_bias)
+            set_weight_attrs(w2_bias, extra_weight_attrs)
+
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+        initialize_online_processing(layer)
+
+    @abstractmethod
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        pass
+
+    def _maybe_inject_biases(
+        self,
+        quant_config: FusedMoEQuantConfig,
+        layer: torch.nn.Module,
+    ) -> None:
+        """Inject biases into the quant config if the model has them
+        (e.g. GPT-OSS biased MoE)."""
+        if self.moe.has_bias:
+            w13_bias = getattr(layer, "w13_bias", None)
+            w2_bias = getattr(layer, "w2_bias", None)
+            if w13_bias is not None:
+                quant_config._w1.bias = w13_bias
+            if w2_bias is not None:
+                quant_config._w2.bias = w2_bias
+
+    def maybe_make_prepare_finalize(
+        self,
+        routing_tables: tuple[torch.Tensor, torch.Tensor, torch.Tensor] | None = None,
+    ) -> mk.FusedMoEPrepareAndFinalizeModular | None:
+        raise ValueError(
+            f"{self.__class__.__name__} uses the new modular kernel "
+            "initialization logic. This function should not be called."
+        )
+
+    @property
+    def supports_eplb(self) -> bool:
+        return True
+
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            num_expert_group=layer.num_expert_group,
+            topk_group=layer.topk_group,
+            e_score_correction_bias=layer.e_score_correction_bias,
+            routed_scaling_factor=layer.routed_scaling_factor,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert not self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
+        )
diff --git a/vllm/model_executor/layers/quantization/online/mxfp8.py b/vllm/model_executor/layers/quantization/online/mxfp8.py
new file mode 100644
index 000000000000..28d0706b11e6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/online/mxfp8.py
@@ -0,0 +1,253 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Online MXFP8 (microscaling FP8, block-32) quantization methods."""
+
+from typing import TYPE_CHECKING
+
+import torch
+from torch.nn import Module
+
+if TYPE_CHECKING:
+    import vllm.model_executor.layers.fused_moe.modular_kernel as mk
+    from vllm.model_executor.layers.fused_moe import (
+        FusedMoEQuantConfig,
+        RoutedExperts,
+    )
+    from vllm.model_executor.layers.fused_moe.oracle.fp8 import Fp8MoeBackend
+
+from vllm.model_executor.kernels.linear import init_mxfp8_linear_kernel
+from vllm.model_executor.layers.fused_moe.oracle.mxfp8 import (
+    select_mxfp8_moe_backend,
+)
+from vllm.model_executor.layers.quantization.online.fp8 import (
+    _Fp8OnlineLinearBase,
+)
+from vllm.model_executor.layers.quantization.online.moe_base import (
+    OnlineMoEMethodBase,
+)
+from vllm.model_executor.layers.quantization.utils.mxfp8_utils import (
+    MXFP8_BLOCK_SIZE,
+    mxfp8_e4m3_quantize,
+)
+from vllm.model_executor.utils import replace_parameter
+from vllm.platforms import current_platform
+
+
+class Mxfp8OnlineLinearMethod(_Fp8OnlineLinearBase):
+    """Online MXFP8 linear method.
+    Loads bf16/fp16 checkpoints and quantizes weights to MXFP8 (microscaling
+    FP8 with block-32 scales) during weight loading.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.kernel = init_mxfp8_linear_kernel()
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: list[int],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        if input_size_per_partition % MXFP8_BLOCK_SIZE != 0:
+            raise ValueError(
+                f"MXFP8 requires input_size_per_partition "
+                f"({input_size_per_partition}) to be divisible by "
+                f"{MXFP8_BLOCK_SIZE}."
+            )
+
+        super().create_weights(
+            layer,
+            input_size_per_partition,
+            output_partition_sizes,
+            input_size,
+            output_size,
+            params_dtype,
+            **extra_weight_attrs,
+        )
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        weight_fp8, weight_scale = mxfp8_e4m3_quantize(layer.weight.contiguous())
+
+        layer.input_scale = None
+        replace_parameter(layer, "weight", weight_fp8.data)
+        replace_parameter(layer, "weight_scale", weight_scale.data)
+
+        self.kernel.process_weights_after_loading(layer)
+
+        layer._already_called_process_weights_after_loading = True
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.kernel.apply_weights(layer, x, bias)
+
+
+class Mxfp8OnlineMoEMethod(OnlineMoEMethodBase):
+    """MoE method for online MXFP8 (block) quantization."""
+
+    fp8_backend: "Fp8MoeBackend"
+    experts_cls: "type[mk.FusedMoEExperts] | None"
+
+    def __init__(self, *, layer: torch.nn.Module):
+        super().__init__(layer.moe_config)
+        self.weight_block_size: list[int] = [1, MXFP8_BLOCK_SIZE]
+        self.weight_scale_name = "weight_scale"
+
+        self.fp8_backend, self.experts_cls = select_mxfp8_moe_backend(config=self.moe)
+
+    def create_weights(
+        self,
+        layer: Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        if (
+            hidden_size % MXFP8_BLOCK_SIZE != 0
+            or intermediate_size_per_partition % MXFP8_BLOCK_SIZE != 0
+        ):
+            raise ValueError(
+                "Online MXFP8 MoE requires hidden/intermediate sizes divisible "
+                f"by {MXFP8_BLOCK_SIZE}."
+            )
+
+        super().create_weights(
+            layer=layer,
+            num_experts=num_experts,
+            hidden_size=hidden_size,
+            intermediate_size_per_partition=intermediate_size_per_partition,
+            params_dtype=params_dtype,
+            **extra_weight_attrs,
+        )
+
+        layer.weight_block_size = [1, MXFP8_BLOCK_SIZE]
+
+    def _quantize_mxfp8_moe_weight(
+        self, weight: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Batch quantization: bf16/fp16 weights -> MXFP8 (fp8 + uint8 scales)."""
+        E = weight.size(0)
+        first_q, first_s = mxfp8_e4m3_quantize(weight[0], is_sf_swizzled_layout=False)
+        # Pre-allocate the output tensors rather than stacking.
+        # This is important for consistent memory layout.
+        w_quant = torch.empty(
+            (E, *first_q.shape), dtype=first_q.dtype, device=weight.device
+        )
+        w_scales = torch.empty(
+            (E, *first_s.shape), dtype=first_s.dtype, device=weight.device
+        )
+        w_quant[0] = first_q
+        w_scales[0] = first_s
+        for i in range(1, E):
+            w_quant[i], w_scales[i] = mxfp8_e4m3_quantize(
+                weight[i], is_sf_swizzled_layout=False
+            )
+
+        return w_quant, w_scales
+
+    def _setup_kernel(
+        self,
+        layer: "RoutedExperts",
+        w13: torch.Tensor,
+        w2: torch.Tensor,
+        w13_scale: torch.Tensor,
+        w2_scale: torch.Tensor,
+        w13_input_scale: torch.Tensor | None,
+        w2_input_scale: torch.Tensor | None,
+    ) -> None:
+        from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+            convert_to_fp8_moe_kernel_format,
+            make_fp8_moe_kernel,
+        )
+
+        # Shuffle weights to runtime format.
+        w13, w2, w13_scale, w2_scale = convert_to_fp8_moe_kernel_format(
+            fp8_backend=self.fp8_backend,
+            layer=layer,
+            w13=w13,
+            w2=w2,
+            w13_scale=w13_scale,
+            w2_scale=w2_scale,
+            w13_input_scale=w13_input_scale,
+            w2_input_scale=w2_input_scale,
+        )
+
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, f"w13_{self.weight_scale_name}", w13_scale)
+        replace_parameter(layer, f"w2_{self.weight_scale_name}", w2_scale)
+
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_fp8_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                fp8_backend=self.fp8_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
+            )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> "FusedMoEQuantConfig":
+        from vllm.model_executor.layers.fused_moe.oracle.fp8 import (
+            make_fp8_moe_quant_config,
+        )
+
+        w1_scale = getattr(layer, f"w13_{self.weight_scale_name}")
+        w2_scale = getattr(layer, f"w2_{self.weight_scale_name}")
+        a1_scale = layer.w13_input_scale
+        a2_scale = layer.w2_input_scale
+
+        quant_config = make_fp8_moe_quant_config(
+            fp8_backend=self.fp8_backend,
+            w1_scale=w1_scale,
+            w2_scale=w2_scale,
+            a1_scale=a1_scale,
+            a2_scale=a2_scale,
+            block_shape=self.weight_block_size,
+            swiglu_limit=getattr(layer, "swiglu_limit", None),
+        )
+
+        self._maybe_inject_biases(quant_config, layer)
+        return quant_config
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        if getattr(layer, "_already_called_process_weights_after_loading", False):
+            return
+
+        fp8_dtype = current_platform.fp8_dtype()
+        w13 = torch.empty_like(layer.w13_weight, dtype=fp8_dtype)
+        w2 = torch.empty_like(layer.w2_weight, dtype=fp8_dtype)
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+        w13, w13_scale = self._quantize_mxfp8_moe_weight(layer.w13_weight)
+        w2, w2_scale = self._quantize_mxfp8_moe_weight(layer.w2_weight)
+
+        self._setup_kernel(
+            layer,
+            w13,
+            w2,
+            w13_scale,
+            w2_scale,
+            layer.w13_input_scale,
+            layer.w2_input_scale,
+        )
+
+        layer._already_called_process_weights_after_loading = True
diff --git a/vllm/model_executor/layers/quantization/petit.py b/vllm/model_executor/layers/quantization/petit.py
deleted file mode 100644
index 71bd9c80c3ba..000000000000
--- a/vllm/model_executor/layers/quantization/petit.py
+++ /dev/null
@@ -1,319 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/modelopt.py
-
-from typing import Any
-
-import regex as re
-import torch
-from torch.nn.parameter import Parameter
-
-from vllm.logger import init_logger
-from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.linear import (
-    LinearBase,
-    LinearMethodBase,
-    UnquantizedLinearMethod,
-)
-from vllm.model_executor.layers.quantization import QuantizationMethods
-from vllm.model_executor.layers.quantization.base_config import (
-    QuantizationConfig,
-    QuantizeMethodBase,
-)
-from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
-from vllm.model_executor.layers.quantization.utils.petit_utils import (
-    apply_petit_nvfp4_linear,
-    prepare_nvfp4_layer_for_petit,
-    verify_petit_nvfp4_supported,
-)
-from vllm.model_executor.layers.quantization.utils.quant_utils import is_layer_skipped
-from vllm.model_executor.parameter import ModelWeightParameter, PerTensorScaleParameter
-from vllm.platforms import current_platform
-
-# Initialize logger for the module
-logger = init_logger(__name__)
-
-
-# Configuration class to support the NVFP4 quantized model
-# generated by the ModelOpt quantization tool
-class PetitNvFp4Config(QuantizationConfig):
-    """Config class for Petit FP4."""
-
-    def __init__(
-        self,
-        is_checkpoint_nvfp4_serialized: bool = False,
-        kv_cache_quant_algo: str | None = None,
-        group_size: int | None = None,
-        exclude_modules: list[str] | None = None,
-    ) -> None:
-        self._check_hardware_support()
-        self.is_checkpoint_nvfp4_serialized = is_checkpoint_nvfp4_serialized
-        if is_checkpoint_nvfp4_serialized:
-            logger.warning(
-                "Detected nvfp4 checkpoint. Please note that the "
-                "format is experimental and subject to change."
-            )
-        self.group_size = group_size
-        self.kv_cache_quant_algo = kv_cache_quant_algo
-        self.exclude_modules = exclude_modules
-
-    def _check_hardware_support(self) -> None:
-        """
-        Verifies that the current hardware is supported by the Petit backend.
-        This backend is specifically designed for AMD GPUs and is not
-        supported on the CUDA platform.
-        """
-        # This check ensures the code is NOT running on an NVIDIA GPU.
-        if current_platform.is_cuda():
-            raise ValueError(
-                "The 'petit' quantization backend is designed for AMD GPUs "
-                "and is not supported on the CUDA platform. For NVIDIA GPUs, "
-                "please use a different quantization method such as FP8, AWQ, "
-                "or GPTQ."
-            )
-
-    @classmethod
-    def get_name(cls) -> QuantizationMethods:
-        return "petit_nvfp4"
-
-    @classmethod
-    def get_supported_act_dtypes(cls) -> list[torch.dtype]:
-        return [torch.bfloat16, torch.half]
-
-    @classmethod
-    def get_min_capability(cls) -> int:
-        # Petit supports the gfx90a and gfx942 GPUs
-        return 90
-
-    @classmethod
-    def get_config_filenames(cls) -> list[str]:
-        return ["hf_quant_config.json"]
-
-    @classmethod
-    def from_config(cls, config: dict[str, Any]) -> "PetitNvFp4Config":
-        qc = cls.get_from_keys(config, ["quantization"])
-
-        quant_method_raw = qc.get("quant_algo")
-        if not isinstance(quant_method_raw, str) or not quant_method_raw:
-            raise ValueError("Missing or invalid 'quant_algo' in quantization config.")
-        quant_method = quant_method_raw.upper()
-
-        group_size_raw = qc.get("group_size")
-        if not isinstance(group_size_raw, int):
-            raise ValueError(
-                "Missing or invalid 'group_size' (int) in hf_quant_config.json."
-            )
-        group_size = group_size_raw
-
-        verify_petit_nvfp4_supported(quant_method, group_size)
-
-        kv_cache_quant_algo_raw = qc.get("kv_cache_quant_algo") or "auto"
-        if not isinstance(kv_cache_quant_algo_raw, str):
-            raise ValueError("'kv_cache_quant_algo' must be a string if provided.")
-        kv_cache_quant_algo = kv_cache_quant_algo_raw
-
-        exclude_raw = qc.get("exclude_modules", [])
-        if exclude_raw is None:
-            exclude_modules: list[str] = []
-        elif isinstance(exclude_raw, list) and all(
-            isinstance(x, str) for x in exclude_raw
-        ):
-            exclude_modules = exclude_raw
-        else:
-            raise ValueError("'exclude_modules' must be a list[str] (or omitted).")
-
-        is_checkpoint_nvfp4_serialized = "NVFP4" in quant_method
-
-        return cls(
-            is_checkpoint_nvfp4_serialized=is_checkpoint_nvfp4_serialized,
-            kv_cache_quant_algo=kv_cache_quant_algo,
-            group_size=group_size,
-            exclude_modules=exclude_modules,
-        )
-
-    @classmethod
-    def override_quantization_method(
-        cls, hf_quant_cfg, user_quant
-    ) -> QuantizationMethods | None:
-        if not current_platform.is_rocm():
-            return None
-
-        qc = hf_quant_cfg.get("quantization", hf_quant_cfg)
-        algo = (qc.get("quant_algo") or qc.get("quant_method") or "").upper()
-        if algo in ("NVFP4", "MODELOPT_FP4", "MODELOPT"):
-            return cls.get_name()  # "petit_nvfp4"
-        return None
-
-    @classmethod
-    def is_petit_nvfp4_compatible(cls, quant_config: dict[str, Any]) -> bool:
-        qc = quant_config.get("quantization", quant_config)
-        algo = (qc.get("quant_algo") or qc.get("quant_method") or "").upper()
-        return algo == "NVFP4"
-
-    def is_layer_excluded(self, prefix: str, exclude_modules: list[str]) -> bool:
-        for pattern in exclude_modules:
-            regex_str = pattern.replace(".", r"\.").replace("*", r".*")
-            if re.fullmatch(regex_str, prefix):
-                return True
-        return False
-
-    def get_quant_method(
-        self, layer: torch.nn.Module, prefix: str
-    ) -> "QuantizeMethodBase | None":
-        exclude = self.require_exclude_modules()
-
-        if isinstance(layer, LinearBase):
-            if is_layer_skipped(prefix, exclude) or self.is_layer_excluded(
-                prefix, exclude
-            ):
-                return UnquantizedLinearMethod()
-            return PetitNvFp4LinearMethod(self)
-        elif isinstance(layer, Attention):
-            return PetitFp8KVCacheMethod(self)
-        return None
-
-    def get_scaled_act_names(self) -> list[str]:
-        return []
-
-    def require_group_size(self) -> int:
-        if self.group_size is None:
-            logger.warning("group_size not set; defaulting to 16 for NVFP4.")
-            return 16
-        return self.group_size
-
-    def require_kv_cache_quant_algo(self) -> str:
-        return self.kv_cache_quant_algo or "auto"
-
-    def require_exclude_modules(self) -> list[str]:
-        return list(self.exclude_modules or [])
-
-
-class PetitFp8KVCacheMethod(BaseKVCacheMethod):
-    """
-    Supports loading kv-cache scaling factors from FP8 checkpoints.
-    """
-
-    def __init__(self, quant_config: PetitNvFp4Config):
-        super().__init__(quant_config)
-
-
-class PetitNvFp4LinearMethod(LinearMethodBase):
-    """Linear method for NVFP4.
-    Supports loading NVFP4 checkpoints with the following structure:
-
-    |Tensor Name           | datatype      |  shape      |
-    |----------------------------------------------------|
-    |input_scale           | torch.float32 | scalar      |
-    |weight                | NVFP4(SE2M1)  | [1, X, y/2] |
-    |weight_scale          | FP8-E4M3      | [X, Y]      |
-    |weight_scale_2        | torch.float32 | scalar      |
-
-    The weights are quantized per block of 16 elements.
-    Args: quant_config: The ModelOpt quantization config.
-    """
-
-    def __init__(self, quant_config: PetitNvFp4Config):
-        self.quant_config = quant_config
-
-    def create_weights(
-        self,
-        layer: torch.nn.Module,
-        input_size_per_partition: int,
-        output_partition_sizes: list[int],
-        input_size: int,
-        output_size: int,
-        params_dtype: torch.dtype,
-        **extra_weight_attrs,
-    ):
-        del input_size, output_size
-        if not self.quant_config.is_checkpoint_nvfp4_serialized:
-            raise ValueError(
-                "NVFP4 quantization was selected, "
-                " dynamic quantization is not supported."
-            )
-
-        output_size_per_partition = sum(output_partition_sizes)
-        weight_loader = extra_weight_attrs.get("weight_loader")
-
-        layer.logical_widths = output_partition_sizes
-
-        layer.input_size_per_partition = input_size_per_partition
-        layer.output_size_per_partition = output_size_per_partition
-        if input_size_per_partition % 16 != 0:
-            raise ValueError(
-                "Unsupported model when in features size is not multiple of 16"
-            )
-
-        weight_dtype = (
-            torch.float8_e4m3fn
-            if self.quant_config.is_checkpoint_nvfp4_serialized
-            else params_dtype
-        )
-
-        weight = ModelWeightParameter(
-            data=torch.empty(
-                # 2 fp4 data is packed in one uint8 in the input dimension
-                output_size_per_partition,
-                input_size_per_partition // 2,
-                dtype=torch.uint8,
-            ),
-            input_dim=1,
-            output_dim=0,
-            weight_loader=weight_loader,
-        )
-        layer.register_parameter("weight", weight)
-
-        input_scale = PerTensorScaleParameter(
-            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
-            weight_loader=weight_loader,
-        )
-
-        layer.register_parameter("input_scale", input_scale)
-
-        weight_scale_2 = PerTensorScaleParameter(
-            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
-            weight_loader=weight_loader,
-        )
-        layer.register_parameter("weight_scale_2", weight_scale_2)
-
-        group_size = self.quant_config.require_group_size()
-        weight_scale = ModelWeightParameter(
-            data=torch.empty(
-                output_size_per_partition,
-                input_size_per_partition // group_size,
-                dtype=weight_dtype,
-            ),
-            input_dim=1,
-            output_dim=0,
-            weight_loader=weight_loader,
-        )
-
-        layer.register_parameter("weight_scale", weight_scale)
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        input_scale_2 = layer.input_scale.max().to(torch.float32)
-        weight_scale_2 = layer.weight_scale_2.max().to(torch.float32)
-        layer.input_scale = Parameter(input_scale_2, requires_grad=False)
-        layer.weight_scale_2 = Parameter(weight_scale_2, requires_grad=False)
-        layer.alpha = Parameter(
-            layer.input_scale * layer.weight_scale_2, requires_grad=False
-        )
-
-        prepare_nvfp4_layer_for_petit(layer)
-        del layer.input_scale
-
-    def apply(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        return apply_petit_nvfp4_linear(
-            input=x,
-            weight=layer.weight,
-            weight_scale=layer.weight_scale,
-            weight_scale_2=layer.weight_scale_2,
-            size_n=layer.output_size_per_partition,
-            size_k=layer.input_size_per_partition,
-            bias=bias,
-        )
diff --git a/vllm/model_executor/layers/quantization/quark/quark.py b/vllm/model_executor/layers/quantization/quark/quark.py
index 78c64bac6187..d1f7a169ee74 100644
--- a/vllm/model_executor/layers/quantization/quark/quark.py
+++ b/vllm/model_executor/layers/quantization/quark/quark.py
@@ -8,7 +8,7 @@
 
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import RoutedExperts
 from vllm.model_executor.layers.linear import (
     LinearBase,
     LinearMethodBase,
@@ -24,6 +24,7 @@
     QuarkMoEMethod,
 )
 from vllm.model_executor.layers.quantization.quark.schemes import (
+    QuarkNVFP4,
     QuarkOCP_MX,
     QuarkScheme,
     QuarkW4A8_MXFP4_FP8,
@@ -36,7 +37,6 @@
 )
 from vllm.model_executor.models.utils import WeightsMapper
 from vllm.platforms import current_platform
-from vllm.transformers_utils.config import get_config
 
 if TYPE_CHECKING:
     from vllm.model_executor.models.utils import WeightsMapper
@@ -61,23 +61,12 @@ def __init__(
         self.kv_cache_group = kv_cache_group
         self.kv_cache_config = kv_cache_config
         self.pack_method = pack_method
+        # Note : this flag is kept disabled because the overhead of
+        # dynamic mxfp4 quantization negates the performance gains
+        # that come from shifting to mxfp4. It is left here in case
+        # we want to re-enable it in the future.
         self.dynamic_mxfp4_quant = False
 
-    def maybe_update_config(self, model_name: str, revision: str | None = None):
-        self.hf_config = get_config(
-            model=model_name,
-            trust_remote_code=False,  # or get from model_config if available
-            revision=revision,
-            config_format="auto",
-        )
-
-        quant_config = getattr(self.hf_config, "quantization_config", None)
-        if quant_config is not None:
-            quant_dtype = quant_config["global_quant_config"]["weight"]["dtype"]
-            model_type = self.hf_config.model_type
-            if quant_dtype == "fp4" and model_type == "deepseek_v3":
-                self.dynamic_mxfp4_quant = True
-
     def get_linear_method(self) -> "QuarkLinearMethod":
         return QuarkLinearMethod(self)
 
@@ -101,7 +90,7 @@ def apply_vllm_mapper(  # noqa: B027
         :param hf_to_vllm_mapper: maps from hf model structure (the assumed
             structure of the qconfig) to vllm model structure
         """
-        quant_config_with_hf_to_vllm_mapper = {}
+        quant_config_with_hf_to_vllm_mapper: dict[str, Any] = {}
 
         for k, v in self.quant_config.items():
             if isinstance(v, list):
@@ -147,7 +136,7 @@ def get_quant_method(
         if isinstance(layer, Attention):
             return QuarkKVCacheMethod(self)
 
-        if isinstance(layer, FusedMoE):
+        if isinstance(layer, RoutedExperts):
             return QuarkMoEMethod.get_moe_method(self, module=layer, layer_name=prefix)
         return None
 
@@ -376,6 +365,85 @@ def _is_w4a8_mxfp4_fp8(
 
         return is_weight_mxfp4 and is_input_fp8
 
+    def _is_dynamic_per_token_w8a8(
+        self,
+        weight_quant: dict[str, Any] | None,
+        input_quant: dict[str, Any] | None,
+    ) -> bool:
+        """Detect W8A8 INT8 with per-tensor or per-channel
+        weights and dynamic per-token input."""
+        if weight_quant is None or input_quant is None:
+            return False
+
+        is_int8_dtype = (
+            weight_quant.get("dtype") == "int8" and input_quant.get("dtype") == "int8"
+        )
+
+        is_valid_weight_scheme = weight_quant.get("qscheme") in [
+            "per_tensor",
+            "per_channel",
+        ]
+        is_per_token_input = input_quant.get("qscheme") == "per_channel"
+
+        is_dynamic_input = input_quant.get("is_dynamic") is True
+        is_weight_symmetric = weight_quant.get("symmetric") is True
+
+        return (
+            is_int8_dtype
+            and is_valid_weight_scheme
+            and is_per_token_input
+            and is_dynamic_input
+            and is_weight_symmetric
+        )
+
+    def _is_nvfp4(
+        self,
+        weight_quant: dict[str, Any] | list[dict[str, Any]] | None,
+        input_quant: dict[str, Any] | list[dict[str, Any]] | None,
+    ) -> bool:
+        # Confirm weights and input quantized.
+        if weight_quant is None or input_quant is None:
+            return False
+
+        # Confirm both weight_quant and input_quant are lists with 2 elements
+        if not isinstance(weight_quant, list) or len(weight_quant) != 2:
+            return False
+        if not isinstance(input_quant, list) or len(input_quant) != 2:
+            return False
+
+        # First element should be fp4 with per_group quantization
+        is_fp4_per_group_weight = (
+            weight_quant[0].get("dtype") == "fp4"
+            and weight_quant[0].get("qscheme") == "per_group"
+            and weight_quant[0].get("group_size") == 16
+            and not weight_quant[0].get("is_dynamic")
+        )
+        is_fp4_per_group_input = (
+            input_quant[0].get("dtype") == "fp4"
+            and input_quant[0].get("qscheme") == "per_group"
+            and input_quant[0].get("group_size") == 16
+            and input_quant[0].get("is_dynamic")
+        )
+
+        # Second element should be fp8_e4m3 with per_tensor quantization
+        is_fp8_per_tensor_weight = (
+            weight_quant[1].get("dtype") == "fp8_e4m3"
+            and weight_quant[1].get("qscheme") == "per_tensor"
+            and not weight_quant[1].get("is_dynamic")
+        )
+        is_fp8_per_tensor_input = (
+            input_quant[1].get("dtype") == "fp8_e4m3"
+            and input_quant[1].get("qscheme") == "per_tensor"
+            and not input_quant[1].get("is_dynamic")
+        )
+
+        return (
+            is_fp4_per_group_weight  # type: ignore[return-value]
+            and is_fp4_per_group_input
+            and is_fp8_per_tensor_weight
+            and is_fp8_per_tensor_input
+        )
+
     def _is_w_ocp_mx_a_x(
         self, weight_quant: dict[str, Any] | None, input_quant: dict[str, Any] | None
     ) -> bool:
@@ -524,7 +592,9 @@ def _get_scheme_from_config(
         weight_config = cast(dict[str, Any], config.get("weight"))
         input_config = cast(dict[str, Any], config.get("input_tensors"))
 
-        if self._is_fp8_w8a8(weight_config, input_config):
+        if self._is_nvfp4(weight_config, input_config):
+            return QuarkNVFP4()
+        elif self._is_fp8_w8a8(weight_config, input_config):
             is_fp8_w8a8_supported = self._check_scheme_supported(
                 QuarkW8A8Fp8.get_min_capability(), error=False
             )
@@ -543,6 +613,13 @@ def _get_scheme_from_config(
             )
             if is_w4a8_supported:
                 return QuarkW4A8_MXFP4_FP8(weight_config, input_config)
+        elif self._is_dynamic_per_token_w8a8(weight_config, input_config):
+            weight_qscheme = cast(str, weight_config.get("qscheme"))
+            return QuarkW8A8Int8(
+                qscheme=weight_qscheme,
+                is_static_input_scheme=False,
+                input_symmetric=input_config.get("symmetric"),
+            )
         elif self._is_w_ocp_mx_a_x(weight_config, input_config):
             return QuarkOCP_MX(
                 weight_config, input_config, dynamic_mxfp4_quant=dynamic_mxfp4_quant
diff --git a/vllm/model_executor/layers/quantization/quark/quark_moe.py b/vllm/model_executor/layers/quantization/quark/quark_moe.py
index a58ee5c44e00..627a44a77b53 100644
--- a/vllm/model_executor/layers/quantization/quark/quark_moe.py
+++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py
@@ -5,49 +5,66 @@
 
 import torch
 
+import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm import _custom_ops as ops
 from vllm import envs
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe import (
-    FusedMoE,
     FusedMoEConfig,
     FusedMoEMethodBase,
     FusedMoeWeightScaleSupported,
     MoEActivation,
+    RoutedExperts,
+    SharedExperts,
 )
 from vllm.model_executor.layers.fused_moe.config import (
     FusedMoEParallelConfig,
     FusedMoEQuantConfig,
     fp8_w8a8_moe_quant_config,
+    int8_w8a8_moe_quant_config,
     mxfp4_w4a8_moe_quant_config,
     mxfp4_w4a16_moe_quant_config,
     ocp_mx_moe_quant_config,
 )
-from vllm.model_executor.layers.fused_moe.fused_marlin_moe import fused_marlin_moe
+from vllm.model_executor.layers.fused_moe.experts.marlin_moe import fused_marlin_moe
 from vllm.model_executor.layers.fused_moe.oracle.mxfp4 import (
+    TRITON_BACKENDS,
     Mxfp4MoeBackend,
+    backend_to_kernel_cls,
+    convert_gpt_oss_weight_to_mxfp4_moe_kernel_format,
+    make_mxfp4_moe_kernel,
+    make_mxfp4_moe_quant_config,
     mxfp4_round_up_hidden_size_and_intermediate_size,
     select_mxfp4_moe_backend,
 )
+from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
+    convert_to_nvfp4_moe_kernel_format,
+    make_nvfp4_moe_kernel,
+    make_nvfp4_moe_quant_config,
+    select_nvfp4_moe_backend,
+)
 from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
     prepare_fp8_moe_layer_for_marlin,
 )
-from vllm.model_executor.layers.quantization.utils.mxfp4_utils import (
-    _swizzle_mxfp4,
-)
 from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import (
     OCP_MX_BLOCK_SIZE,
     OCP_MX_Scheme,
 )
-from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+    kFp8StaticTensorSym,
+    kMxfp4Dynamic,
+    kNvfp4Dynamic,
+    kNvfp4Static,
+)
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
     all_close_1d,
     normalize_e4m3fn_to_e4m3fnuz,
     per_tensor_dequantize,
 )
-from vllm.model_executor.utils import set_weight_attrs
+from vllm.model_executor.utils import replace_parameter, set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 
@@ -55,8 +72,9 @@
 
 __all__ = [
     "QuarkMoEMethod",
+    "QuarkW8A8Fp8MoEMethod",
     "QuarkOCP_MX_MoEMethod",
-    "QuarkOCP_MX_MoEMethod_OSS",
+    "QuarkNvfp4MoEMethod",
 ]
 
 
@@ -68,7 +86,7 @@ def __init__(self, moe: FusedMoEConfig):
     @staticmethod
     def get_moe_method(
         quant_config: "QuarkConfig",  # type: ignore # noqa E501 # noqa F821
-        module: torch.nn.Module,
+        module: RoutedExperts,
         layer_name: str,
     ) -> "QuarkMoEMethod":
         layer_quant_config = quant_config._find_matched_config(layer_name, module)
@@ -85,25 +103,22 @@ def get_moe_method(
 
         if quant_config._is_fp8_w4a8(weight_config, input_config):
             return QuarkW4A8Fp8MoEMethod(weight_config, input_config, module.moe_config)
+        elif quant_config._is_nvfp4(weight_config, input_config):
+            return QuarkNvfp4MoEMethod(
+                weight_config, input_config, module.moe_config, quant_config
+            )
         elif quant_config._is_fp8_w8a8(weight_config, input_config):
             return QuarkW8A8Fp8MoEMethod(weight_config, input_config, module.moe_config)
         elif quant_config._is_w_ocp_mx_a_x(weight_config, input_config):
-            emulate = not current_platform.supports_mx() or not (
-                rocm_aiter_ops.is_fused_moe_enabled()
-            )
-            if (
-                input_config is not None
-                and input_config.get("dtype") == "fp8_e4m3"
-                and not input_config.get("is_dynamic")
-                and not emulate
-            ):
-                return QuarkOCP_MX_MoEMethod_OSS(
-                    weight_config, input_config, module.moe_config
-                )
-            else:
-                return QuarkOCP_MX_MoEMethod(
-                    weight_config, input_config, module.moe_config
-                )
+            # All OCP MX schemes (W4A16, W4A8, etc.) handled by QuarkOCP_MX_MoEMethod
+            # Backend selection happens inside via oracle
+            return QuarkOCP_MX_MoEMethod(weight_config, input_config, module.moe_config)
+        elif quant_config._is_static_tensor_w8a8(
+            weight_config, input_config
+        ) or quant_config._is_dynamic_per_token_w8a8(weight_config, input_config):
+            return QuarkW8A8Int8MoEMethod(
+                weight_config, input_config, module.moe_config
+            )
         else:
             raise RuntimeError("Unsupported FusedMoe scheme")
 
@@ -166,7 +181,7 @@ def __init__(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -222,7 +237,7 @@ def create_weights(
                 torch.ones(num_experts, dtype=torch.float32), requires_grad=False
             )
             layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-TENSOR quantization for FusedMoE.weight_loader.
+            # Add PER-TENSOR quantization for RoutedExperts.weight_loader.
             extra_weight_attrs.update(
                 {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
             )
@@ -244,7 +259,7 @@ def create_weights(
                 requires_grad=False,
             )
             layer.register_parameter("w2_weight_scale", w2_weight_scale)
-            # Add PER-CHANNEL quantization for FusedMoE.weight_loader.
+            # Add PER-CHANNEL quantization for RoutedExperts.weight_loader.
             extra_weight_attrs.update(
                 {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
             )
@@ -289,7 +304,7 @@ def create_weights(
         else:
             layer.w13_bias, layer.w2_bias = None, None
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         # Fp8 moe kernels require a single activation scale.
         # We take the max of all the scales in case they differ.
         if self.static_input_scales:
@@ -424,7 +439,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             )
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
         return fp8_w8a8_moe_quant_config(
             w1_scale=layer.w13_weight_scale,
@@ -435,18 +450,20 @@ def get_fused_moe_quant_config(
             w2_bias=layer.w2_bias,
             per_act_token_quant=self.input_qscheme == "per_channel",
             per_out_ch_quant=self.weight_qscheme == "per_channel",
+            gemm1_clamp_limit=getattr(layer, "swiglu_limit", None),
         )
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    ) -> torch.Tensor:
         if self.rocm_aiter_moe_enabled:
-            from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+            from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
                 rocm_aiter_fused_experts,
             )
 
@@ -500,6 +517,307 @@ def apply(
             )
 
 
+class QuarkW8A8Int8MoEMethod(QuarkMoEMethod):
+    """Quark W8A8 INT8 MoE method."""
+
+    def __init__(
+        self,
+        weight_config: dict[str, Any],
+        input_config: dict[str, Any],
+        moe: FusedMoEConfig,
+    ):
+        super().__init__(moe)
+        self.weight_quant = weight_config
+        self.input_quant = input_config
+        self.weight_qscheme = self.weight_quant.get("qscheme", "per_tensor")
+        self.static_input_scales = not self.input_quant.get("is_dynamic", False)
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.orig_dtype = params_dtype
+        layer.weight_block_size = None
+        params_dtype = torch.int8
+
+        # WEIGHTS
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                2 * intermediate_size_per_partition,
+                hidden_size,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition,
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # WEIGHT_SCALES
+        if self.weight_qscheme == "per_channel":
+            w13_weight_scale = torch.nn.Parameter(
+                torch.ones(
+                    num_experts,
+                    2 * intermediate_size_per_partition,
+                    dtype=torch.float32,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_weight_scale", w13_weight_scale)
+            w2_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, hidden_size, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_weight_scale", w2_weight_scale)
+            extra_weight_attrs.update(
+                {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}
+            )
+            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+        else:
+            # per-tensor: one scalar per expert
+            w13_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, 2, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_weight_scale", w13_weight_scale)
+            w2_weight_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_weight_scale", w2_weight_scale)
+            extra_weight_attrs.update(
+                {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+            )
+            set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+            set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # INPUT_SCALES
+        if self.static_input_scales:
+            w13_input_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_input_scale", w13_input_scale)
+            set_weight_attrs(w13_input_scale, extra_weight_attrs)
+
+            w2_input_scale = torch.nn.Parameter(
+                torch.ones(num_experts, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_input_scale", w2_input_scale)
+            set_weight_attrs(w2_input_scale, extra_weight_attrs)
+        else:
+            layer.w13_input_scale = None
+            layer.w2_input_scale = None
+
+        # ZERO POINTS (loaded but discarded after loading; kernel uses symmetric)
+        w13_input_zero_point = torch.nn.Parameter(
+            torch.zeros(num_experts, 2, dtype=torch.int8),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_input_zero_point", w13_input_zero_point)
+        set_weight_attrs(w13_input_zero_point, extra_weight_attrs)
+
+        w2_input_zero_point = torch.nn.Parameter(
+            torch.zeros(num_experts, dtype=torch.int8),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_input_zero_point", w2_input_zero_point)
+        set_weight_attrs(w2_input_zero_point, extra_weight_attrs)
+
+        if self.weight_qscheme == "per_channel":
+            w13_weight_zero_point = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    2 * intermediate_size_per_partition,
+                    dtype=torch.int8,
+                ),
+                requires_grad=False,
+            )
+            w2_weight_zero_point = torch.nn.Parameter(
+                torch.zeros(num_experts, hidden_size, dtype=torch.int8),
+                requires_grad=False,
+            )
+        else:
+            w13_weight_zero_point = torch.nn.Parameter(
+                torch.zeros(num_experts, 2, dtype=torch.int8),
+                requires_grad=False,
+            )
+            w2_weight_zero_point = torch.nn.Parameter(
+                torch.zeros(num_experts, dtype=torch.int8),
+                requires_grad=False,
+            )
+        layer.register_parameter("w13_weight_zero_point", w13_weight_zero_point)
+        set_weight_attrs(w13_weight_zero_point, extra_weight_attrs)
+        layer.register_parameter("w2_weight_zero_point", w2_weight_zero_point)
+        set_weight_attrs(w2_weight_zero_point, extra_weight_attrs)
+
+        # BIAS
+        if self.has_bias:
+            w13_bias = torch.nn.Parameter(
+                torch.zeros(
+                    num_experts,
+                    2 * intermediate_size_per_partition,
+                    dtype=torch.float32,
+                ),
+                requires_grad=False,
+            )
+            layer.register_parameter("w13_bias", w13_bias)
+            set_weight_attrs(w13_bias, extra_weight_attrs)
+            w2_bias = torch.nn.Parameter(
+                torch.zeros(num_experts, hidden_size, dtype=torch.float32),
+                requires_grad=False,
+            )
+            layer.register_parameter("w2_bias", w2_bias)
+            set_weight_attrs(w2_bias, extra_weight_attrs)
+        else:
+            layer.w13_bias, layer.w2_bias = None, None
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        # Discard zero points (INT8 fused MoE kernel uses symmetric quant)
+        for attr in (
+            "w13_input_zero_point",
+            "w2_input_zero_point",
+            "w13_weight_zero_point",
+            "w2_weight_zero_point",
+        ):
+            if hasattr(layer, attr):
+                delattr(layer, attr)
+
+        # For static input scales, collapse per-expert scales to single max
+        if self.static_input_scales:
+            if layer.w13_input_scale is None or layer.w2_input_scale is None:
+                raise ValueError(
+                    "QuantConfig has static quantization, but found "
+                    "activation scales are None."
+                )
+            if not all_close_1d(layer.w13_input_scale) or not all_close_1d(
+                layer.w2_input_scale
+            ):
+                logger.warning_once(
+                    "Found input_scales that are not equal for "
+                    "INT8 MoE layer. Using the maximum across experts "
+                    "for each layer."
+                )
+            layer.w13_input_scale = torch.nn.Parameter(
+                layer.w13_input_scale.max(), requires_grad=False
+            )
+            layer.w2_input_scale = torch.nn.Parameter(
+                layer.w2_input_scale.max(), requires_grad=False
+            )
+
+        # Per-channel scales: 2D [E, N] -> 3D [E, N, 1] for the int8 MoE kernel.
+        if self.weight_qscheme == "per_channel":
+            for attr in ("w13_weight_scale", "w2_weight_scale"):
+                param = getattr(layer, attr, None)
+                if param is not None and param.dim() == 2:
+                    replace_parameter(
+                        layer,
+                        attr,
+                        torch.nn.Parameter(
+                            param.data.unsqueeze(-1).contiguous(),
+                            requires_grad=False,
+                        ),
+                    )
+
+        # For per-tensor weights, merge w1/w3 scales into single per-expert
+        if self.weight_qscheme == "per_tensor":
+            assert layer.w13_weight_scale is not None
+            shard_size = layer.intermediate_size_per_partition
+            max_w13_scales = layer.w13_weight_scale.max(dim=1).values
+
+            for expert_id in range(layer.local_num_experts):
+                start = 0
+                for shard_id in range(2):
+                    dq_weight = per_tensor_dequantize(
+                        layer.w13_weight[expert_id][start : start + shard_size, :],
+                        layer.w13_weight_scale[expert_id][shard_id],
+                    )
+                    layer.w13_weight[expert_id][start : start + shard_size, :], _, _ = (
+                        ops.scaled_int8_quant(
+                            dq_weight,
+                            scale=max_w13_scales[expert_id],
+                        )
+                    )
+                    start += shard_size
+
+            layer.w13_weight_scale = torch.nn.Parameter(
+                max_w13_scales, requires_grad=False
+            )
+
+    def get_fused_moe_quant_config(
+        self, layer: torch.nn.Module
+    ) -> FusedMoEQuantConfig | None:
+        if self.weight_qscheme == "per_channel" and not self.static_input_scales:
+            return int8_w8a8_moe_quant_config(
+                w1_scale=layer.w13_weight_scale,
+                w2_scale=layer.w2_weight_scale,
+                a1_scale=layer.w13_input_scale,
+                a2_scale=layer.w2_input_scale,
+                w1_bias=getattr(layer, "w13_bias", None),
+                w2_bias=getattr(layer, "w2_bias", None),
+                per_act_token_quant=True,
+            )
+        is_dynamic = not self.static_input_scales
+        is_per_channel = self.weight_qscheme == "per_channel"
+        return FusedMoEQuantConfig.make(
+            torch.int8,
+            w1_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            a1_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            w1_bias=getattr(layer, "w13_bias", None),
+            w2_bias=getattr(layer, "w2_bias", None),
+            per_act_token_quant=is_dynamic,
+            per_out_ch_quant=is_per_channel,
+            block_shape=None,
+        )
+
+    def apply(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe import fused_experts
+
+        return fused_experts(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            inplace=not self.moe.disable_inplace,
+            activation=layer.activation,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            quant_config=self.moe_quant_config,
+        )
+
+
 class QuarkW4A8Fp8MoEMethod(QuarkMoEMethod):
     def __init__(
         self,
@@ -517,7 +835,7 @@ def __init__(
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -584,7 +902,7 @@ def create_weights(
         set_weight_attrs(w13_weight_scale_2, extra_weight_attrs)
         set_weight_attrs(w2_weight_scale_2, extra_weight_attrs)
 
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
         shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights(
             layer.w13_weight.data, layer.w2_weight.data
         )
@@ -625,17 +943,19 @@ def get_fused_moe_quant_config(self, layer):
             w1_scale=layer.w13_weight_scale_2,
             w2_scale=layer.w2_weight_scale_2,
             per_out_ch_quant=True,
+            gemm1_clamp_limit=getattr(layer, "swiglu_limit", None),
         )
 
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe.experts.rocm_aiter_moe import (
             rocm_aiter_fused_experts,
         )
 
@@ -699,18 +1019,37 @@ def __init__(
                 f"Please check that the combination is supported in OCP_MX_Scheme."
             )
 
-        self.mxfp4_backend: Mxfp4MoeBackend | None = None
-        if self.ocp_mx_scheme == "w_mxfp4":
-            self.mxfp4_backend, _ = select_mxfp4_moe_backend(moe)
-        elif self.ocp_mx_scheme.startswith("w_mxfp4"):
-            # TODO(bowenbao): refactor and introduce backends for other OCP MX schemes.
-            self.mxfp4_backend = Mxfp4MoeBackend.NONE
+        # TODO(bowenbao): refactor and introduce backends for other OCP MX schemes,
+        # use kernel abstraction for all OCP MX MOE implementations.
+        self.mxfp4_backend: Mxfp4MoeBackend = Mxfp4MoeBackend.NONE
+        self.experts_cls: type[mk.FusedMoEExperts] | None = None
+        self.moe_kernel: mk.FusedMoEKernel | None = None
+
+        # Used for triton kernel precision configs (W4A8, TRITON backends)
+        self.w13_precision_config = None
+        self.w2_precision_config = None
 
         if self.input_quant is not None:
             self.static_input_scales = not self.input_quant.get("is_dynamic")
         else:
             self.static_input_scales = False
 
+        # Select backend based on OCP MX scheme
+        if self.ocp_mx_scheme == "w_mxfp4":
+            # W4A16: weight-only MXFP4
+            self.mxfp4_backend, self.experts_cls = select_mxfp4_moe_backend(moe)
+        elif self.ocp_mx_scheme == "w_mxfp4_a_fp8" and self.static_input_scales:
+            # W4A8: MXFP4 weights + static FP8 activations
+            self.mxfp4_backend, self.experts_cls = select_mxfp4_moe_backend(
+                moe, activation_key=kFp8StaticTensorSym
+            )
+        elif self.ocp_mx_scheme == "w_mxfp4_a_mxfp4":
+            # W4A4: MXFP4 weights + MXFP4 activations
+            self.mxfp4_backend, self.experts_cls = select_mxfp4_moe_backend(
+                moe, activation_key=kMxfp4Dynamic
+            )
+
+        # Validation for unsupported schemes
         if any(
             self.ocp_mx_scheme.endswith(a_scheme)
             for a_scheme in ["a_mxfp4", "a_mxfp6_e3m2", "a_mxfp6_e2m3"]
@@ -728,35 +1067,19 @@ def __init__(
                 "Please open an issue."
             )
 
-        self.use_rocm_aiter_moe = rocm_aiter_ops.is_fused_moe_enabled()
-
         self.model_type = getattr(
             get_current_vllm_config().model_config.hf_config, "model_type", None
         )
 
-        self.emulate = (
-            not current_platform.supports_mx()
-            or not self.ocp_mx_scheme.startswith("w_mxfp4")
-        ) and (
-            self.mxfp4_backend is None
-            or self.mxfp4_backend is Mxfp4MoeBackend.NONE
-            or not self.use_rocm_aiter_moe
-        )
+        # If no native backend available, use emulation.
+        if self.mxfp4_backend is Mxfp4MoeBackend.NONE:
+            self.mxfp4_backend = Mxfp4MoeBackend.EMULATION
 
-        if self.emulate:
-            logger.warning_once(
-                f"The current mode (supports_mx={current_platform.supports_mx()}, "
-                f"use_rocm_aiter_moe={self.use_rocm_aiter_moe}, "
-                f"ocp_mx_scheme={self.ocp_mx_scheme}) "
-                "does not support native MXFP4/MXFP6 "
-                "computation. Simulated weight dequantization and activation "
-                "QDQ (quantize and dequantize) will be used, with the linear "
-                "layers computed in high precision."
-            )
-        else:
-            logger.warning_once(
-                "The current mode supports native MoE MXFP4 computation"
-            )
+        self.experts_cls = backend_to_kernel_cls(self.mxfp4_backend)[0]
+
+        logger.info_once(
+            f"Using {self.mxfp4_backend.value} backend for {self.ocp_mx_scheme}"
+        )
 
     def maybe_roundup_sizes(
         self,
@@ -771,7 +1094,12 @@ def maybe_roundup_sizes(
             act_dtype=act_dtype,
             moe_parallel_config=moe_parallel_config,
         )
-        if self.mxfp4_backend is not None:
+        # In case quantization emulation backend is used, there is no need to apply
+        # MXFP4-specific padding logic as the compute happens in higher precision.
+        if (
+            self.mxfp4_backend is not None
+            and self.mxfp4_backend != Mxfp4MoeBackend.EMULATION
+        ):
             hidden_size, intermediate_size_per_partition = (
                 mxfp4_round_up_hidden_size_and_intermediate_size(
                     self.mxfp4_backend, hidden_size, intermediate_size_per_partition
@@ -790,7 +1118,7 @@ def get_packed_dim(self, dim: int, quant_dtype: str):
 
     def create_weights(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         num_experts: int,
         hidden_size: int,
         intermediate_size_per_partition: int,
@@ -896,95 +1224,91 @@ def create_weights(
             layer.w2_input_scale = None
 
     def process_weights_after_loading(self, layer):
-        if self.static_input_scales and self.input_dtype == "fp8":
-            # firstly, process activations if fp8 static input
-            if layer.w13_input_scale is None or layer.w2_input_scale is None:
-                raise ValueError(
-                    "QuantConfig has static quantization, but found "
-                    "activation scales are None."
-                )
-            if not all_close_1d(layer.w13_input_scale) or not all_close_1d(
-                layer.w2_input_scale
-            ):
-                logger.warning_once(
-                    "Found input_scales that are not equal for "
-                    "fp8 MoE layer. Using the maximum across experts "
-                    "for each layer. "
-                )
-            layer.w13_input_scale = torch.nn.Parameter(
-                layer.w13_input_scale.max(), requires_grad=False
+        self._setup_kernel(layer)
+
+    def _setup_kernel(self, layer: RoutedExperts):
+        """Setup kernel using oracle functions for MXFP4 schemes (W4A16, W4A8)."""
+        w13_bias = getattr(layer, "w13_bias", None)
+        w2_bias = getattr(layer, "w2_bias", None)
+
+        # Convert weights to kernel format (handles all backend-specific logic)
+        w13, w2, w13_scale, w2_scale, w13_bias, w2_bias = (
+            convert_gpt_oss_weight_to_mxfp4_moe_kernel_format(
+                mxfp4_backend=self.mxfp4_backend,
+                layer=layer,
+                w13_weight=layer.w13_weight,
+                w2_weight=layer.w2_weight,
+                w13_weight_scale=layer.w13_weight_scale,
+                w2_weight_scale=layer.w2_weight_scale,
+                w13_bias=w13_bias,
+                w2_bias=w2_bias,
             )
-            layer.w2_input_scale = torch.nn.Parameter(
-                layer.w2_input_scale.max(), requires_grad=False
-            )
-
-            if current_platform.is_fp8_fnuz():
-                # Normalize the weights and scales
-                _, _, w13_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                    torch.empty_like(layer.w13_weight, dtype=torch.float8_e4m3fn),
-                    torch.empty_like(
-                        layer.w13_weight_scale, dtype=layer.w13_weight_scale.dtype
-                    ),
-                    layer.w13_input_scale,
-                )
-                _, _, w2_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                    torch.empty_like(layer.w2_weight, dtype=torch.float8_e4m3fn),
-                    torch.empty_like(
-                        layer.w2_weight_scale, dtype=layer.w13_weight_scale.dtype
-                    ),
-                    layer.w2_input_scale,
-                )
-                # Reset the parameter
-                if w13_input_scale is not None:
-                    layer.w13_input_scale = torch.nn.Parameter(
-                        w13_input_scale, requires_grad=False
-                    )
-                if w2_input_scale is not None:
-                    layer.w2_input_scale = torch.nn.Parameter(
-                        w2_input_scale, requires_grad=False
-                    )
+        )
 
-        # secondly, process mxfp weights
-        if self.emulate:
-            torch.accelerator.empty_cache()
-            return
+        # Handle weight/scale assignment based on backend type
+        if self.mxfp4_backend in TRITON_BACKENDS or self.mxfp4_backend in (
+            Mxfp4MoeBackend.AITER_MXFP4_FP8,
+        ):
+            # Triton-based backends: w13/w2 are triton_kernels.tensor.Tensor
+            # Store on layer for apply(), scales are PrecisionConfig
+            layer.w13_weight = w13
+            layer.w2_weight = w2
+            self.w13_precision_config = w13_scale
+            self.w2_precision_config = w2_scale
+        else:
+            # Standard backends: replace parameters
+            replace_parameter(layer, "w13_weight", w13)
+            replace_parameter(layer, "w2_weight", w2)
+            replace_parameter(layer, "w13_weight_scale", w13_scale)
+            replace_parameter(layer, "w2_weight_scale", w2_scale)
 
-        from aiter.utility.fp4_utils import e8m0_shuffle
+        if w13_bias is not None and w2_bias is not None:
+            replace_parameter(layer, "w13_bias", w13_bias)
+            replace_parameter(layer, "w2_bias", w2_bias)
 
-        # Pre-shuffle weight scales
-        s0, s1, _ = layer.w13_weight_scale.shape
-        w13_weight_scale = layer.w13_weight_scale.view(s0 * s1, -1)
-        w13_weight_scale = e8m0_shuffle(w13_weight_scale)
-        layer.w13_weight_scale.data = w13_weight_scale.view(s0, s1, -1)
+        if self.mxfp4_backend == Mxfp4MoeBackend.AITER_MXFP4_MXFP4:
+            layer.w13_weight.is_shuffled = True
+            layer.w2_weight.is_shuffled = True
 
-        s0, s1, _ = layer.w2_weight_scale.shape
-        w2_weight_scale = layer.w2_weight_scale.view(s0 * s1, -1)
-        w2_weight_scale = e8m0_shuffle(w2_weight_scale)
-        layer.w2_weight_scale.data = w2_weight_scale.view(s0, s1, -1)
+        torch.accelerator.empty_cache()
 
-        if self.fp4_dtype is not None:
-            layer.w13_weight = torch.nn.Parameter(
-                layer.w13_weight.view(self.fp4_dtype),
-                requires_grad=layer.w13_weight.requires_grad,
+        # Build quant config and kernel
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config is not None and self.experts_cls is not None:
+            self.moe_kernel = make_mxfp4_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                mxfp4_backend=self.mxfp4_backend,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
             )
-            layer.w2_weight = torch.nn.Parameter(
-                layer.w2_weight.view(self.fp4_dtype),
-                requires_grad=layer.w2_weight.requires_grad,
-            )
-        # Pre-shuffle weight
-        shuffled_w13, shuffled_w2 = rocm_aiter_ops.shuffle_weights(
-            layer.w13_weight.data, layer.w2_weight.data
-        )
-
-        layer.w13_weight = torch.nn.Parameter(shuffled_w13, requires_grad=False)
-        layer.w2_weight = torch.nn.Parameter(shuffled_w2, requires_grad=False)
-        layer.w13_weight.is_shuffled = True
-        layer.w2_weight.is_shuffled = True
-        torch.accelerator.empty_cache()
 
     def get_fused_moe_quant_config(
-        self, layer: torch.nn.Module
+        self, layer: RoutedExperts
     ) -> FusedMoEQuantConfig | None:
+        # For oracle-based backends (W4A16, W4A8), use make_mxfp4_moe_quant_config
+        if self.mxfp4_backend not in (Mxfp4MoeBackend.NONE, Mxfp4MoeBackend.EMULATION):
+            # Determine scale source based on backend type
+            if self.mxfp4_backend in TRITON_BACKENDS or self.mxfp4_backend in (
+                Mxfp4MoeBackend.AITER_MXFP4_FP8,
+            ):
+                w1_scale = self.w13_precision_config
+                w2_scale = self.w2_precision_config
+            else:
+                w1_scale = layer.w13_weight_scale
+                w2_scale = layer.w2_weight_scale
+
+            return make_mxfp4_moe_quant_config(
+                mxfp4_backend=self.mxfp4_backend,
+                w1_scale=w1_scale,
+                w2_scale=w2_scale,
+                w1_bias=getattr(layer, "w13_bias", None),
+                w2_bias=getattr(layer, "w2_bias", None),
+                a1_scale=getattr(layer, "w13_input_scale", None),
+                a2_scale=getattr(layer, "w2_input_scale", None),
+            )
+
+        # Emulation and other schemes
         if self.ocp_mx_scheme == "w_mxfp4":
             return mxfp4_w4a16_moe_quant_config(
                 w1_scale=layer.w13_weight_scale,
@@ -1020,175 +1344,277 @@ def get_fused_moe_quant_config(
                 block_shape=None,
             )
 
+    @property
+    def is_monolithic(self) -> bool:
+        if self.moe_kernel is not None:
+            return self.moe_kernel.is_monolithic
+        return False
+
     def apply(
         self,
-        layer: FusedMoE,
+        layer: RoutedExperts,
         x: torch.Tensor,
         topk_weights: torch.Tensor,
         topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
         shared_experts_input: torch.Tensor | None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if not self.emulate:
-            from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
-                rocm_aiter_fused_experts,
-            )
-
-            return rocm_aiter_fused_experts(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights=topk_weights,
-                topk_ids=topk_ids,
-                activation=layer.activation,
-                quant_config=self.moe_quant_config,
-                moe_config=layer.moe_config,
-                expert_map=layer.expert_map,
-            )
-        else:
-            from vllm.model_executor.layers.fused_moe import fused_experts
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+            expert_map=layer.expert_map,
+            shared_experts_input=shared_experts_input,
+        )
 
-            return fused_experts(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights=topk_weights,
-                topk_ids=topk_ids,
-                inplace=not self.moe.disable_inplace,
-                activation=layer.activation,
-                global_num_experts=layer.global_num_experts,
-                apply_router_weight_on_input=layer.apply_router_weight_on_input,
-                expert_map=layer.expert_map,
-                quant_config=self.moe_quant_config,
-            )
+    def apply_monolithic(
+        self,
+        layer: RoutedExperts,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        input_ids: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        assert self.is_monolithic
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply_monolithic(
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            router_logits=router_logits,
+            activation=layer.activation,
+            global_num_experts=layer.global_num_experts,
+            expert_map=layer.expert_map,
+            apply_router_weight_on_input=layer.apply_router_weight_on_input,
+        )
 
 
-class QuarkOCP_MX_MoEMethod_OSS(QuarkOCP_MX_MoEMethod):
+class QuarkNvfp4MoEMethod(QuarkMoEMethod):
     def __init__(
         self,
         weight_config: dict[str, Any],
         input_config: dict[str, Any],
         moe: FusedMoEConfig,
+        quant_config: "QuarkConfig",  # type: ignore # noqa E501 # noqa F821
     ):
-        super().__init__(weight_config, input_config, moe)
-
-    def process_weights_after_loading(self, layer):
-        from triton_kernels.matmul_ogs import FlexCtx, PrecisionConfig
+        super().__init__(moe)
+        self.weight_quant = weight_config
+        self.input_quant = input_config
+        self.quant_config = quant_config
+        self.group_size = 16
+
+        # Select experts implementation.
+        self.nvfp4_backend, self.experts_cls = select_nvfp4_moe_backend(
+            config=self.moe,
+            weight_key=kNvfp4Static,
+            activation_key=kNvfp4Dynamic,
+        )
 
-        w13_bias = layer.w13_bias.to(torch.float32)
-        w2_bias = layer.w2_bias.to(torch.float32)
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size_per_partition: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        layer.num_experts = num_experts
+        layer.params_dtype = params_dtype
+        layer.quant_config = self.quant_config
+        weight_dtype = torch.uint8
+        weight_scale_dtype = torch.float8_e4m3fn
+        w13_num_shards = 2 if self.moe.is_act_and_mul else 1
 
-        layer.w13_bias = torch.nn.Parameter(w13_bias, requires_grad=False)
-        layer.w2_bias = torch.nn.Parameter(w2_bias, requires_grad=False)
+        # GEMM 1 - w13 weight
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                # 2 fp4 items are packed in the input dimension
+                hidden_size // 2,
+                dtype=weight_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
 
-        # FIXME warp need to be adjusted based on batch size
-        # only apply to  batched mode
-        if self.moe.use_ep:
-            num_warps = 4 if envs.VLLM_MOE_DP_CHUNK_SIZE <= 512 else 8
-        else:
-            num_warps = 8
+        # GEMM 2 - w2 weight
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                # 2 fp4 items are packed in the input dimension
+                intermediate_size_per_partition // 2,
+                dtype=weight_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
 
-        w13_weight, w13_flex, w13_scale = _swizzle_mxfp4(
-            layer.w13_weight, layer.w13_weight_scale, num_warps
+        # Weight scales (per-group FP8 scales)
+        w13_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                w13_num_shards * intermediate_size_per_partition,
+                hidden_size // self.group_size,
+                dtype=weight_scale_dtype,
+            ),
+            requires_grad=False,
         )
-        w2_weight, w2_flex, w2_scale = _swizzle_mxfp4(
-            layer.w2_weight, layer.w2_weight_scale, num_warps
+        layer.register_parameter("w13_weight_scale", w13_weight_scale)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.GROUP.value}
         )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
 
-        self.w13_weight_triton_tensor = w13_weight
-        self.w2_weight_triton_tensor = w2_weight
+        w2_weight_scale = torch.nn.Parameter(
+            torch.empty(
+                num_experts,
+                hidden_size,
+                intermediate_size_per_partition // self.group_size,
+                dtype=weight_scale_dtype,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale", w2_weight_scale)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
 
-        # need to delete the original weights to save memory on single GPU
-        del layer.w13_weight
-        del layer.w2_weight
-        layer.w13_weight = None
-        layer.w2_weight = None
-        torch.accelerator.empty_cache()
+        # Global weight scales (per-tensor FP32 scales)
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}
+        )
 
-        if self.static_input_scales:
-            if layer.w13_input_scale is None or layer.w2_input_scale is None:
-                raise ValueError(
-                    "QuantConfig has static quantization, but found "
-                    "activation scales are None."
-                )
-            if not all_close_1d(layer.w13_input_scale) or not all_close_1d(
-                layer.w2_input_scale
-            ):
-                logger.warning_once(
-                    "Found input_scales that are not equal for "
-                    "fp8 MoE layer. Using the maximum across experts "
-                    "for each layer."
-                )
+        w13_weight_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale_2", w13_weight_scale_2)
+        set_weight_attrs(w13_weight_scale_2, extra_weight_attrs)
 
-            layer.w13_input_scale = torch.nn.Parameter(
-                layer.w13_input_scale.max().to(torch.float32), requires_grad=False
-            )
-            layer.w2_input_scale = torch.nn.Parameter(
-                layer.w2_input_scale.max().to(torch.float32), requires_grad=False
-            )
+        w2_weight_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight_scale_2", w2_weight_scale_2)
+        set_weight_attrs(w2_weight_scale_2, extra_weight_attrs)
 
-            from triton_kernels.numerics import InFlexData
+        # Input global scales (per-tensor FP32 scales)
+        w13_input_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, w13_num_shards, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_input_scale_2", w13_input_scale_2)
+        set_weight_attrs(w13_input_scale_2, extra_weight_attrs)
 
-            lhs_data13 = InFlexData(scale=layer.w13_input_scale)
-            lhs_data2 = InFlexData(scale=layer.w2_input_scale)
+        w2_input_scale_2 = torch.nn.Parameter(
+            torch.empty(num_experts, dtype=torch.float32),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_input_scale_2", w2_input_scale_2)
+        set_weight_attrs(w2_input_scale_2, extra_weight_attrs)
 
-            self.w13_precision_config = PrecisionConfig(
-                weight_scale=w13_scale,
-                flex_ctx=FlexCtx(rhs_data=w13_flex, lhs_data=lhs_data13),
-            )
+    def process_weights_after_loading(self, layer: RoutedExperts) -> None:
+        """
+        Convert NVFP4 MoE weights into kernel format and setup the kernel.
+        """
+
+        if not torch.allclose(
+            layer.w13_weight_scale_2[:, 0], layer.w13_weight_scale_2[:, 1]
+        ):
+            raise ValueError("Different global scales for w1 and w3 is not supported.")
+
+        # Use a single gscale for w13
+        w13_weight_scale_2 = torch.maximum(
+            layer.w13_weight_scale_2[:, 0], layer.w13_weight_scale_2[:, 1]
+        ).contiguous()
+
+        w2_weight_scale_2 = layer.w2_weight_scale_2
+
+        (
+            w13,
+            w13_scale,
+            w13_scale_2,
+            a13_scale,
+            w2,
+            w2_scale,
+            w2_scale_2,
+            a2_scale,
+        ) = convert_to_nvfp4_moe_kernel_format(
+            nvfp4_backend=self.nvfp4_backend,
+            layer=layer,
+            w13=layer.w13_weight,
+            w13_scale=layer.w13_weight_scale,
+            w13_scale_2=w13_weight_scale_2,
+            a13_scale=layer.w13_input_scale_2,
+            w2=layer.w2_weight,
+            w2_scale=layer.w2_weight_scale,
+            w2_scale_2=w2_weight_scale_2,
+            a2_scale=layer.w2_input_scale_2,
+            is_act_and_mul=self.moe.is_act_and_mul,
+        )
 
-            self.w2_precision_config = PrecisionConfig(
-                weight_scale=w2_scale,
-                flex_ctx=FlexCtx(rhs_data=w2_flex, lhs_data=lhs_data2),
+        replace_parameter(layer, "w13_weight", w13)
+        replace_parameter(layer, "w13_weight_scale", w13_scale)
+        replace_parameter(layer, "w13_weight_scale_2", w13_scale_2)
+        replace_parameter(layer, "w13_input_scale_2", a13_scale)
+
+        replace_parameter(layer, "w2_weight", w2)
+        replace_parameter(layer, "w2_weight_scale", w2_scale)
+        replace_parameter(layer, "w2_weight_scale_2", w2_scale_2)
+        replace_parameter(layer, "w2_input_scale_2", a2_scale)
+
+        # Setup modular kernel.
+        self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+        if self.moe_quant_config:
+            assert self.experts_cls is not None
+            self.moe_kernel = make_nvfp4_moe_kernel(
+                moe_quant_config=self.moe_quant_config,
+                moe_config=self.moe,
+                experts_cls=self.experts_cls,
+                routing_tables=layer._expert_routing_tables(),
             )
 
     def get_fused_moe_quant_config(
         self, layer: torch.nn.Module
     ) -> FusedMoEQuantConfig | None:
-        return mxfp4_w4a8_moe_quant_config(
-            w1_scale=self.w13_precision_config,
-            w2_scale=self.w2_precision_config,
-            a1_scale=layer.w13_input_scale,
-            a2_scale=layer.w2_input_scale,
-            w1_bias=layer.w13_bias,
-            w2_bias=layer.w2_bias,
-            block_shape=None,
+        return make_nvfp4_moe_quant_config(
+            backend=self.nvfp4_backend,
+            w13_scale=layer.w13_weight_scale,
+            w2_scale=layer.w2_weight_scale,
+            w13_scale_2=layer.w13_weight_scale_2,
+            w2_scale_2=layer.w2_weight_scale_2,
+            a13_scale=layer.w13_input_scale_2,
+            a2_scale=layer.w2_input_scale_2,
         )
 
-    @property
-    def is_monolithic(self) -> bool:
-        return True
-
-    def apply_monolithic(
+    def apply(
         self,
-        layer: torch.nn.Module,
+        layer: RoutedExperts,
         x: torch.Tensor,
-        router_logits: torch.Tensor,
-        expert_map: torch.Tensor | None = None,
-    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if layer.enable_eplb:
-            raise NotImplementedError(
-                "EPLB not supported for `QuarkW4MXFp4MoEMethod_OSS` yet."
-            )
-
-        from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (  # noqa: E501
-            triton_kernel_moe_forward,
-        )
-
-        assert self.moe.hidden_dim_unpadded is not None
-        assert self.moe.intermediate_size_per_partition_unpadded is not None
-        return triton_kernel_moe_forward(
-            hidden_states=x,
-            w1=self.w13_weight_triton_tensor,
-            w2=self.w2_weight_triton_tensor,
-            gating_output=router_logits,
-            topk=layer.top_k,
-            renormalize=layer.renormalize,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        shared_experts: SharedExperts | None,
+        shared_experts_input: torch.Tensor | None,
+    ) -> torch.Tensor:
+        assert self.moe_kernel is not None
+        return self.moe_kernel.apply(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights,
+            topk_ids,
+            activation=layer.activation,
             global_num_experts=layer.global_num_experts,
-            expert_map=expert_map,
-            quant_config=self.moe_quant_config,
+            expert_map=layer.expert_map,
             apply_router_weight_on_input=layer.apply_router_weight_on_input,
-            unpadded_N_w1=self.moe.intermediate_size_per_partition_unpadded * 2,
-            unpadded_K_w1=self.moe.hidden_dim_unpadded,
-            unpadded_N_w2=self.moe.hidden_dim_unpadded,
-            unpadded_K_w2=self.moe.intermediate_size_per_partition_unpadded,
+            shared_experts=shared_experts,
+            shared_experts_input=shared_experts_input,
         )
diff --git a/vllm/model_executor/layers/quantization/quark/schemes/__init__.py b/vllm/model_executor/layers/quantization/quark/schemes/__init__.py
index a5e33a0442b1..1ef5824fec53 100644
--- a/vllm/model_executor/layers/quantization/quark/schemes/__init__.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/__init__.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from .quark_nvfp4 import QuarkNVFP4
 from .quark_ocp_mx import QuarkOCP_MX
 from .quark_scheme import QuarkScheme
 from .quark_w4a8_mxfp4_fp8 import QuarkW4A8_MXFP4_FP8
@@ -13,4 +14,5 @@
     "QuarkW8A8Int8",
     "QuarkOCP_MX",
     "QuarkW4A8_MXFP4_FP8",
+    "QuarkNVFP4",
 ]
diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_nvfp4.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_nvfp4.py
new file mode 100644
index 000000000000..d8a339770fac
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_nvfp4.py
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable
+
+import torch
+from torch.nn.parameter import Parameter
+
+from vllm.logger import init_logger
+from vllm.model_executor.kernels.linear import init_nvfp4_linear_kernel
+from vllm.model_executor.kernels.linear.nvfp4.emulation import (
+    EmulationNvFp4LinearKernel,
+)
+from vllm.model_executor.layers.quantization.quark.schemes.quark_scheme import (
+    QuarkScheme,
+)
+from vllm.model_executor.parameter import (
+    GroupQuantScaleParameter,
+    ModelWeightParameter,
+    PerTensorScaleParameter,
+)
+
+__all__ = ["QuarkNVFP4"]
+
+logger = init_logger(__name__)
+
+
+class QuarkNVFP4(QuarkScheme):
+    """
+    Quark NVFP4 quantization scheme.
+
+    Supports loading NVFP4 checkpoints with the following structure:
+    - weight: uint8, shape [out_features, in_features // 2] (packed FP4)
+    - weight_scale: float8_e4m3fn, shape [out_features, in_features // group_size]
+    - weight_scale_2: bfloat16/float32, scalar (global weight scale)
+    - input_scale_2: bfloat16/float32, scalar (global input scale)
+    """
+
+    def __init__(
+        self,
+    ):
+        self.kernel = init_nvfp4_linear_kernel()
+        self.group_size = 16
+
+        if not isinstance(self.kernel, EmulationNvFp4LinearKernel):
+            logger.warning_once(
+                "Only EmulationNvFp4LinearKernel NVFP4 dense implementation is "
+                "tested with QuarkNVFP4, got kernel=%s. Correctness is not validated.",
+                type(self.kernel).__name__,
+            )
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        # FP4 requires Turing (75) or newer
+        return 75
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        output_partition_sizes: list[int],
+        input_size_per_partition: int,
+        params_dtype: torch.dtype,
+        weight_loader: Callable,
+        **kwargs,
+    ):
+        output_size_per_partition = sum(output_partition_sizes)
+        layer.logical_widths = output_partition_sizes
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_size_per_partition = output_size_per_partition
+
+        if input_size_per_partition % self.group_size != 0:
+            raise ValueError(
+                f"Input size per partition ({input_size_per_partition}) must be "
+                f"divisible by group size ({self.group_size})"
+            )
+
+        # Weight: FP4 packed as uint8 (2 FP4 values per uint8)
+        weight = ModelWeightParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition // 2,
+                dtype=torch.uint8,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight", weight)
+
+        # Per-group weight scale (FP8 E4M3)
+        weight_scale = GroupQuantScaleParameter(
+            data=torch.empty(
+                output_size_per_partition,
+                input_size_per_partition // self.group_size,
+                dtype=torch.float8_e4m3fn,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight_scale", weight_scale)
+
+        # Global weight scale (scalar, per partition)
+        weight_scale_2 = PerTensorScaleParameter(
+            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight_scale_2", weight_scale_2)
+
+        # Global input scale (scalar, per partition)
+        input_scale_2 = PerTensorScaleParameter(
+            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("input_scale_2", input_scale_2)
+
+    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
+        input_global_scale = layer.input_scale_2.max().to(torch.float32)
+        layer.input_global_scale = Parameter(input_global_scale, requires_grad=False)
+        del layer.input_scale_2
+
+        weight_global_scale = layer.weight_scale_2.to(torch.float32)
+
+        if torch.unique(weight_global_scale).numel() != 1:
+            logger.warning_once(
+                "In NVFP4 linear, the global scale for weight are different"
+                " for parallel layers (e.g. q_proj, k_proj, v_proj). This"
+                " will likely result in reduced accuracy. Please verify the"
+                " model accuracy. Consider using a checkpoint with a shared"
+                " global NVFP4 scale for fused layers."
+            )
+
+        weight_global_scale = weight_global_scale.max()
+
+        layer.weight_global_scale = Parameter(weight_global_scale, requires_grad=False)
+        del layer.weight_scale_2
+
+        layer.alpha = Parameter(
+            layer.input_global_scale * layer.weight_global_scale, requires_grad=False
+        )
+        layer.input_global_scale_inv = Parameter(
+            (1.0 / layer.input_global_scale).to(torch.float32), requires_grad=False
+        )
+
+        # Convert layer to NVFP4 linear kernel format
+        self.kernel.process_weights_after_loading(layer)
+
+    def apply_weights(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.kernel.apply_weights(layer=layer, x=x, bias=bias)
diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
index 0b0a224f3891..70a7e81cc455 100644
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
@@ -96,21 +96,12 @@ def gemm_with_dynamic_quant(
                     x_q = x
                     x_s = x_scales
 
-                # 32 alignment is enough for dim0 padding of output for
-                # gemm_a4w4 kernel
-                y = torch.empty(
-                    (M + 31) // 32 * 32,
-                    weight.shape[0],
-                    device=x_q.device,
-                    dtype=out_dtype,
-                )
-
-                gemm_a4w4(
+                y = gemm_a4w4(
                     x_q,
                     weight.view(x_q.dtype),
                     x_s,
                     weight_scale.view(x_s.dtype),
-                    y,
+                    dtype=out_dtype,
                     bpreshuffle=True,
                 )
             return y[:M]
@@ -267,20 +258,26 @@ def get_packed_dim(self, dim: int, quant_dtype: str):
     def get_min_capability(cls) -> int:
         return 70
 
+    def process_dynamic_mxfp4_weights_after_loading(
+        self, layer: torch.nn.Module
+    ) -> None:
+        w_q, w_s = dynamic_mxfp4_quant(layer.weight)
+        layer.weight_scale = torch.nn.Parameter(w_s.T.contiguous(), requires_grad=False)
+        layer.weight = torch.nn.Parameter(w_q, requires_grad=False)
+
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
 
         if self.emulate:
-            layer.weight_scale = torch.nn.Parameter(
-                layer.weight_scale.data, requires_grad=False
-            )
-        else:
             if self.dynamic_mxfp4_quant:
-                w_q, w_s = dynamic_mxfp4_quant(layer.weight)
+                self.process_dynamic_mxfp4_weights_after_loading(layer)
+            else:
                 layer.weight_scale = torch.nn.Parameter(
-                    w_s.T.contiguous(), requires_grad=False
+                    layer.weight_scale.data, requires_grad=False
                 )
-                layer.weight = torch.nn.Parameter(w_q, requires_grad=False)
+        else:
+            if self.dynamic_mxfp4_quant:
+                self.process_dynamic_mxfp4_weights_after_loading(layer)
             elif self.rocm_use_aiter_fp4_asm_gemm:
                 # shuffle weight scale
                 weight_scale_shuffle = layer.weight_scale.data
@@ -370,11 +367,15 @@ def apply_weights(
             dq_w = self.dequant_func(layer.weight, layer.weight_scale, x.dtype)
             qdq_x = self.quant_dequant_func(x)
             return F.linear(qdq_x, dq_w, bias)
-        else:
-            return torch.ops.vllm.gemm_with_dynamic_quant(
-                x,
-                layer.weight,
-                layer.weight_scale,
-                self.rocm_use_aiter_fp4_asm_gemm,
-                self.out_dtype,
-            )
+        y = torch.ops.vllm.gemm_with_dynamic_quant(
+            x,
+            layer.weight,
+            layer.weight_scale,
+            self.rocm_use_aiter_fp4_asm_gemm,
+            self.out_dtype,
+        )
+        # gemm_with_dynamic_quant has no bias argument; add it here so the
+        # native path matches F.linear (e.g. qkv_proj with qkv_bias=True).
+        if bias is not None:
+            y = y + bias
+        return y
diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
index 72f050a1245b..6d94e26f960c 100644
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
@@ -7,6 +7,7 @@
 import torch
 from torch.nn import Parameter
 
+from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.kernels.linear import (
     init_fp8_linear_kernel,
@@ -57,6 +58,7 @@ def __init__(
             kFp8StaticTokenSym if per_token_weight else kFp8StaticTensorSym
         )
         self.out_dtype = torch.get_default_dtype()
+        self.input_dtype = get_current_vllm_config().model_config.dtype
 
     @classmethod
     def get_min_capability(cls) -> int:
@@ -118,6 +120,8 @@ def process_weights_after_loading(self, layer) -> None:
         if self.is_static_input_scheme:
             layer.input_scale = Parameter(layer.input_scale.max(), requires_grad=False)
 
+        self.fp8_linear.process_weights_after_loading(layer)
+
     def create_weights(
         self,
         layer: torch.nn.Module,
@@ -175,7 +179,9 @@ def create_weights(
         self.fp8_linear = init_fp8_linear_kernel(
             activation_quant_key=self.activation_quant_key,
             weight_quant_key=self.weight_quant_key,
-            out_dtype=torch.get_default_dtype(),
+            weight_shape=layer.weight.shape,
+            input_dtype=self.input_dtype,
+            out_dtype=self.out_dtype,
             module_name=self.__class__.__name__,
         )
 
diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py
index 2afbe521c4b5..1f0c5cbffd88 100644
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py
@@ -47,6 +47,17 @@ def create_weights(
     ):
         layer.logical_widths = output_partition_sizes
 
+        # Quark stores per-channel weight_scale as 1D [N]; reshape to [N, 1].
+        def _scale_weight_loader(
+            param: torch.nn.Parameter,
+            loaded_weight: torch.Tensor,
+            *args,
+            **kwargs,
+        ):
+            if loaded_weight.dim() == 1:
+                loaded_weight = loaded_weight.unsqueeze(-1)
+            return weight_loader(param, loaded_weight, *args, **kwargs)
+
         self.kernel = init_int8_linear_kernel(
             is_channelwise=(self.qscheme == "per_channel"),
             is_static_input_scheme=(self.is_static_input_scheme is True),
@@ -69,15 +80,15 @@ def create_weights(
         # WEIGHT SCALE
         if self.qscheme == "per_channel":
             weight_scale = ChannelQuantScaleParameter(
-                data=torch.empty((sum(output_partition_sizes)), dtype=torch.float32),
+                data=torch.empty((sum(output_partition_sizes), 1), dtype=torch.float32),
                 output_dim=0,
-                weight_loader=weight_loader,
+                weight_loader=_scale_weight_loader,
             )
             ChannelQuantZPParameter = ChannelQuantScaleParameter
             weight_zero_point = ChannelQuantZPParameter(
-                data=torch.empty((sum(output_partition_sizes)), dtype=torch.int8),
+                data=torch.empty((sum(output_partition_sizes), 1), dtype=torch.int8),
                 output_dim=0,
-                weight_loader=weight_loader,
+                weight_loader=_scale_weight_loader,
             )
         else:
             assert self.qscheme == "per_tensor"
diff --git a/vllm/model_executor/layers/quantization/quark/utils.py b/vllm/model_executor/layers/quantization/quark/utils.py
index 98ac1a4f355e..ee55e5d39e70 100644
--- a/vllm/model_executor/layers/quantization/quark/utils.py
+++ b/vllm/model_executor/layers/quantization/quark/utils.py
@@ -17,7 +17,8 @@ def deep_compare(dict1: Any, dict2: Any) -> bool:
             return False
         return all(deep_compare(dict1[k], dict2[k]) for k in dict1)
     elif isinstance(dict1, list):
-        return set(dict1) == set(dict2)
+        # `dict1` may be a list of dict.
+        return all(deep_compare(dict1[i], dict2[i]) for i in range(len(dict1)))
     else:
         return dict1 == dict2
 
diff --git a/vllm/model_executor/layers/quantization/schema.py b/vllm/model_executor/layers/quantization/schema.py
deleted file mode 100644
index 669bd9d6ed83..000000000000
--- a/vllm/model_executor/layers/quantization/schema.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-This file contains the Pydantic schemas for various quantization-related
-parameters. When a relevant quantization technique is specified, these
-parameters are loaded in the form of a JSON alongside the model weights
-and augment the model with additional information needed for use of that
-technique. The format of this JSON should be specified by one or more
-schemas contained here.
-
-For example, when the KV cache is quantized to FP8-E4M3 (currently only
-possible on ROCm), the model can be optionally augmented with KV cache
-scaling factors.
-"""
-
-from pydantic import BaseModel, ConfigDict, ValidationInfo, model_validator
-
-
-class KVCacheQuantSchema(BaseModel):
-    dtype: str
-    # Each key is a TP rank. Each value is a dictionary mapping a TP rank's
-    # layer indices to their per-tensor KV cache scaling factor.
-    # TODO: Consider pulling this and its validation methods out into its
-    # own schema class (tricky as its members are variable)
-    scaling_factor: dict[int, dict[int, float]]
-
-    @model_validator(mode="after")
-    def check_is_fp8(self) -> "KVCacheQuantSchema":
-        assert self.dtype == "float8_e4m3fn", (
-            "Loaded scaling factors intended for KV cache dtype = "
-            f"{self.dtype} rather than float8_e4m3fn!"
-        )
-        return self
-
-    @model_validator(mode="after")
-    def check_tp_ranks(self, info: ValidationInfo) -> "KVCacheQuantSchema":
-        context = info.context
-        if context:
-            tp_size = context["tp_size"]
-            num_hidden_layers = context["num_hidden_layers"]
-            assert len(self.scaling_factor) == tp_size, (
-                f"Loaded dictionary has TP size {len(self.scaling_factor)} "
-                f"but LLM engine is currently running with TP size {tp_size}."
-            )
-            for tp_rank, layer_maps in self.scaling_factor.items():
-                assert len(layer_maps) == num_hidden_layers, (
-                    f"KV cache scales map for TP rank {tp_rank} is malformed. "
-                    f"Expected {num_hidden_layers} layers, got "
-                    f"{len(layer_maps)}."
-                )
-            for i in range(tp_size):
-                assert i in self.scaling_factor, (
-                    f"KV cache scales map for TP rank {i} not found."
-                )
-        return self
-
-    @model_validator(mode="after")
-    def check_current_rank(self, info: ValidationInfo) -> "KVCacheQuantSchema":
-        context = info.context
-        if context:
-            tp_rank = context["tp_rank"]
-            num_hidden_layers = context["num_hidden_layers"]
-            layer_scales_map = self.scaling_factor[tp_rank]
-            for i in range(num_hidden_layers):
-                assert i in layer_scales_map, (
-                    f"Could not find KV cache scales for layer {i} in "
-                    f"TP rank {tp_rank}."
-                )
-        return self
-
-
-class QuantParamSchema(BaseModel):
-    # TODO: Generalize and extend with more fields
-    # (e.g. weights/activations params) once functionality is enabled
-    model_config = ConfigDict(protected_namespaces=())
-    model_type: str | None
-    kv_cache: KVCacheQuantSchema
-
-    @model_validator(mode="after")
-    def check_model_type(self, info: ValidationInfo) -> "QuantParamSchema":
-        context = info.context
-        if context:
-            model_type = context.get("model_type", None)
-            if model_type is not None:
-                assert model_type == self.model_type, (
-                    f"Model type is {model_type} but loaded "
-                    f"scaling factors belonging to different "
-                    f"model type {self.model_type}!"
-                )
-        return self
diff --git a/vllm/model_executor/layers/quantization/torchao.py b/vllm/model_executor/layers/quantization/torchao.py
index 3c6fdf043f34..15399cfd39b4 100644
--- a/vllm/model_executor/layers/quantization/torchao.py
+++ b/vllm/model_executor/layers/quantization/torchao.py
@@ -99,6 +99,38 @@ def should_skip(prefix: str, skip_modules: list[str]) -> bool:
     convert_to_packed_tensor_based_on_current_hardware = lambda t: t
 
 
+def _check_torchao_fp8_activation_capability(torchao_config) -> None:
+    """Check if the current GPU supports FP8 activation quantization.
+
+    FP8 activation configs (e.g., Float8DynamicActivationFloat8WeightConfig)
+    require GPU compute capability >= 8.9 (Ada Lovelace / Hopper) on NVIDIA,
+    or MI300+ on AMD. This check provides a clear error message before
+    torchao's internal assertion fires with a confusing message.
+    """
+    config_name = type(torchao_config).__name__
+    if "Float8" not in config_name or "Activation" not in config_name:
+        return
+
+    from vllm.platforms import current_platform
+
+    if current_platform.supports_fp8():
+        return
+
+    capability = current_platform.get_device_capability()
+    capability_str = (
+        f" (current GPU compute capability: {capability.major}.{capability.minor})"
+        if capability is not None
+        else ""
+    )
+    raise ValueError(
+        f"torchao FP8 activation quantization config '{config_name}' "
+        f"requires GPU compute capability >= 8.9 (e.g., NVIDIA Ada Lovelace "
+        f"/ Hopper or AMD MI300+){capability_str}. "
+        f"For older GPUs, consider using a non-FP8 config such as "
+        f"Int8WeightOnlyConfig or Int4WeightOnlyConfig."
+    )
+
+
 class TorchAOConfig(QuantizationConfig):
     """Config class for torchao."""
 
@@ -269,6 +301,7 @@ def torchao_quantize_param_data(
     from torchao.quantization import quantize_
 
     assert isinstance(torchao_config, AOBaseConfig), f"{torchao_config}"
+    _check_torchao_fp8_activation_capability(torchao_config)
     """
     Avoid real weight allocation for faster load, since we will
     end up setting it to param.
diff --git a/vllm/model_executor/layers/quantization/turboquant/__init__.py b/vllm/model_executor/layers/quantization/turboquant/__init__.py
new file mode 100644
index 000000000000..f9f4384e6e2b
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/turboquant/__init__.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TurboQuant: KV-cache quantization for vLLM.
+
+Hadamard rotation + per-coordinate Lloyd-Max scalar quantization for
+keys, uniform quantization for values.
+
+The core algorithmic pattern implemented for key quantization (Hadamard
+rotation followed by deterministic scalar quantization and
+re-normalization) was originally established in DRIVE (Vargaftik et al.,
+NeurIPS 2021) and EDEN (Vargaftik et al., ICML 2022). This formulation is
+also mathematically equivalent to the scalar case of the HIGGS
+quantization method (Malinovskii et al., "Pushing the Limits of Large
+Language Model Quantization via the Linearity Theorem", NAACL 2025;
+preprint arXiv:2411.17525), which subsequently generalized these concepts.
+
+A first application of this approach to KV-cache compression is in "Cache
+Me If You Must: Adaptive Key-Value Quantization for Large Language Models"
+(Shutova et al., ICML 2025; preprint arXiv:2501.19392). All of these
+foundational and application references pre-date the TurboQuant paper
+(Zandieh et al., ICLR 2026).
+"""
+
+from vllm.model_executor.layers.quantization.turboquant.config import TurboQuantConfig
+
+__all__ = ["TurboQuantConfig"]
diff --git a/vllm/model_executor/layers/quantization/turboquant/centroids.py b/vllm/model_executor/layers/quantization/turboquant/centroids.py
new file mode 100644
index 000000000000..490265747c5b
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/turboquant/centroids.py
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Lloyd-Max optimal scalar quantizer for TurboQuant.
+
+After rotating a d-dimensional unit vector by a random orthogonal matrix,
+each coordinate approximately follows N(0, 1/d) for d >= 64.
+We solve the Lloyd-Max conditions to find optimal centroids.
+
+Based on: turboquant-pytorch/lloyd_max.py (Zandieh et al.)
+"""
+
+import math
+from functools import lru_cache
+
+import torch
+
+
+def _gaussian_pdf(x: float, sigma2: float) -> float:
+    return (1.0 / math.sqrt(2 * math.pi * sigma2)) * math.exp(-x * x / (2 * sigma2))
+
+
+def _trapz(f, a: float, b: float, n: int = 200) -> float:
+    """Trapezoidal numerical integration (replaces scipy.integrate.quad)."""
+    h = (b - a) / n
+    result = 0.5 * (f(a) + f(b))
+    for i in range(1, n):
+        result += f(a + i * h)
+    return result * h
+
+
+def solve_lloyd_max(
+    d: int,
+    bits: int,
+    max_iter: int = 200,
+    tol: float = 1e-10,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Solve Lloyd-Max optimal quantizer for N(0, 1/d) distribution.
+
+    Args:
+        d: Vector dimension (determines variance = 1/d).
+        bits: Number of quantization bits.
+        max_iter: Maximum Lloyd-Max iterations.
+        tol: Convergence tolerance.
+
+    Returns:
+        centroids: Sorted tensor of 2^bits optimal centroids.
+        boundaries: Sorted tensor of 2^bits - 1 decision boundaries.
+    """
+    n_levels = 2**bits
+    sigma2 = 1.0 / d
+    sigma = math.sqrt(sigma2)
+
+    def pdf(x):
+        return _gaussian_pdf(x, sigma2)
+
+    lo, hi = -3.5 * sigma, 3.5 * sigma
+    centroids = [lo + (hi - lo) * (i + 0.5) / n_levels for i in range(n_levels)]
+
+    for _ in range(max_iter):
+        boundaries = [
+            (centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)
+        ]
+        edges = [lo * 3] + boundaries + [hi * 3]
+        new_centroids = []
+        for i in range(n_levels):
+            a, b = edges[i], edges[i + 1]
+            num = _trapz(lambda x: x * pdf(x), a, b)
+            den = _trapz(pdf, a, b)
+            new_centroids.append(num / den if den > 1e-15 else centroids[i])
+
+        if max(abs(new_centroids[i] - centroids[i]) for i in range(n_levels)) < tol:
+            break
+        centroids = new_centroids
+
+    boundaries = [(centroids[i] + centroids[i + 1]) / 2.0 for i in range(n_levels - 1)]
+    return (
+        torch.tensor(centroids, dtype=torch.float32),
+        torch.tensor(boundaries, dtype=torch.float32),
+    )
+
+
+@lru_cache(maxsize=32)
+def get_centroids(d: int, bits: int) -> torch.Tensor:
+    """Get precomputed Lloyd-Max centroids (cached)."""
+    centroids, _ = solve_lloyd_max(d, bits)
+    return centroids
diff --git a/vllm/model_executor/layers/quantization/turboquant/config.py b/vllm/model_executor/layers/quantization/turboquant/config.py
new file mode 100644
index 000000000000..84e3940d264d
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/turboquant/config.py
@@ -0,0 +1,260 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TurboQuant configuration."""
+
+from __future__ import annotations
+
+import logging
+import math
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from vllm.config import ModelConfig
+
+logger = logging.getLogger(__name__)
+
+# Named TQ presets: each maps to frozen config parameters.
+# key_quant_bits: 8 = FP8 keys, 3-4 = MSE (Lloyd-Max) quantized keys.
+# value_quant_bits: 3-4 = uniform quantized values.
+TQ_PRESETS: dict[str, dict] = {
+    "turboquant_k8v4": {
+        "key_quant_bits": 8,
+        "value_quant_bits": 4,
+        "norm_correction": False,
+    },
+    "turboquant_4bit_nc": {
+        "key_quant_bits": 4,
+        "value_quant_bits": 4,
+        "norm_correction": True,
+    },
+    "turboquant_k3v4_nc": {
+        "key_quant_bits": 3,
+        "value_quant_bits": 4,
+        "norm_correction": True,
+    },
+    "turboquant_3bit_nc": {
+        "key_quant_bits": 3,
+        "value_quant_bits": 3,
+        "norm_correction": True,
+    },
+}
+
+
+@dataclass
+class TurboQuantConfig:
+    """Configuration for TurboQuant KV-cache quantization.
+
+    Applies Hadamard rotation followed by per-coordinate Lloyd-Max scalar
+    quantization for keys, and uniform quantization for values.
+
+    Historical note: the core algorithmic pattern implemented for key
+    quantization (Hadamard rotation followed by deterministic scalar
+    quantization and re-normalization) was originally established in DRIVE
+    (Vargaftik et al., NeurIPS 2021) and EDEN (Vargaftik et al., ICML
+    2022). This formulation is also mathematically equivalent to the
+    scalar case of the HIGGS quantization method (Malinovskii et al.,
+    "Pushing the Limits of Large Language Model Quantization via the
+    Linearity Theorem", NAACL 2025; preprint arXiv:2411.17525), which
+    subsequently generalized these concepts.
+
+    A first application of this approach to KV-cache compression is in
+    "Cache Me If You Must: Adaptive Key-Value Quantization for Large
+    Language Models" (Shutova et al., ICML 2025; preprint
+    arXiv:2501.19392). All of these foundational and application
+    references pre-date the TurboQuant paper (Zandieh et al., ICLR 2026).
+
+    QJL is intentionally omitted: community consensus (5+ independent
+    groups) found it hurts attention quality by amplifying variance
+    through softmax.
+
+    Named presets (use via --kv-cache-dtype):
+        turboquant_k8v4:   FP8 keys + 4-bit values, 2.6x, +1.17% PPL
+        turboquant_4bit_nc: 4-bit MSE keys + 4-bit values + NC, 3.8x, +2.71%
+        turboquant_k3v4_nc: 3-bit MSE keys + 4-bit values + NC, ~3.5x, +10.63%
+        turboquant_3bit_nc: 3-bit MSE keys + 3-bit values + NC, 4.9x, +20.59%
+
+    Args:
+        head_dim: Attention head dimension (e.g. 64, 96, 128).
+        key_quant_bits: Bits for key quantization. 8 = FP8 keys (no
+            rotation/MSE). 3-4 = Lloyd-Max MSE quantized keys.
+        value_quant_bits: Bits per value dimension for uniform quantization.
+            3 = 8 levels, 4 = 16 levels (default).
+        norm_correction: Re-normalize centroid vectors to unit norm before
+            inverse rotation during dequant. Fixes quantization-induced norm
+            distortion, improving PPL by ~0.8% at 4-bit.
+    """
+
+    head_dim: int = 128
+    key_quant_bits: int = 3  # 3-4 = MSE keys, 8 = FP8 keys
+    value_quant_bits: int = 4  # 3-4 = uniform quantized values
+    seed: int = 42  # kept for backward compatibility; no longer used internally
+    norm_correction: bool = False
+
+    @property
+    def key_fp8(self) -> bool:
+        """Whether keys are stored as FP8 — no rotation/quantization needed."""
+        return self.key_quant_bits == 8
+
+    @property
+    def mse_bits(self) -> int:
+        """MSE quantizer bit-width (determines centroid count: 2^mse_bits).
+
+        For MSE key modes, equals key_quant_bits.
+        For FP8 key mode, falls back to value_quant_bits (centroids are still
+        needed for continuation-prefill dequant and decode kernel params).
+        """
+        if self.key_fp8:
+            return self.value_quant_bits
+        return self.key_quant_bits
+
+    @property
+    def key_mse_bits(self) -> int:
+        """MSE bits actually used for key quantization (0 if FP8 keys)."""
+        if self.key_fp8:
+            return 0
+        return self.key_quant_bits
+
+    @property
+    def centroid_bits(self) -> int:
+        """Bits for centroid generation — always non-zero."""
+        return self.mse_bits
+
+    @property
+    def n_centroids(self) -> int:
+        return 2**self.mse_bits
+
+    @property
+    def key_packed_size(self) -> int:
+        """Packed bytes for a single KEY vector.
+
+        FP8 mode (key_quant_bits=8):
+          head_dim bytes (1 byte per element, no overhead).
+
+        TQ mode:
+          - MSE indices: ceil(head_dim * key_mse_bits / 8) bytes
+          - vec_norm:     2 bytes (float16)
+        """
+        if self.key_fp8:
+            return self.head_dim  # 1 byte per element
+        mse_bytes = math.ceil(self.head_dim * self.key_mse_bits / 8)
+        norm_bytes = 2  # vec_norm fp16
+        return mse_bytes + norm_bytes
+
+    @property
+    def effective_value_quant_bits(self) -> int:
+        """Actual bits used for value storage."""
+        return self.value_quant_bits
+
+    @property
+    def value_packed_size(self) -> int:
+        """Packed bytes for a single VALUE vector.
+
+        Uniform quantization: ceil(head_dim * bits / 8) + 4 bytes (scale + zero fp16).
+        """
+        data_bytes = math.ceil(self.head_dim * self.value_quant_bits / 8)
+        return data_bytes + 4  # +2 scale(fp16) +2 zero(fp16)
+
+    @property
+    def slot_size(self) -> int:
+        """Total packed bytes per head per position (key + value combined).
+
+        Layout: [key_packed | value_packed]
+        """
+        return self.key_packed_size + self.value_packed_size
+
+    @property
+    def slot_size_aligned(self) -> int:
+        """Slot size rounded up to next even number.
+
+        Even-number is required so effective_head_size = slot_size_aligned // 2
+        is integral.
+        """
+        s = self.slot_size
+        return s + (s % 2)  # round up to even
+
+    @staticmethod
+    def get_boundary_skip_layers(
+        model_config: ModelConfig,
+        n: int = 2,
+    ) -> list[str]:
+        """Layer indices to skip TQ compression (boundary protection).
+
+        For hybrid models (attention + Mamba/linear-attention), boundary
+        protection is disabled — hybrids typically have only 8-12
+        full-attention layers and a hard n=2 on each side would cover
+        ~40 % of them.  The dense GSM8K baselines that motivate n=2
+        don't apply to hybrids.
+
+        For dense models, skips first N and last N attention layers.
+        Empirically required for aggressive presets (k3v4_nc, 3bit_nc)
+        — without it GSM8K drops ~30 points on Qwen3-4B.
+        """
+        if model_config.is_hybrid:
+            attn_indices = _get_full_attention_layer_indices(model_config)
+            if not attn_indices:
+                raise NotImplementedError(
+                    "TurboQuant KV cache requires identifiable "
+                    "full-attention layers, but none were found in "
+                    "the hybrid model config."
+                )
+            logger.info("TQ hybrid: full-attention layers %s", attn_indices)
+            return []
+
+        num_layers = model_config.hf_text_config.num_hidden_layers
+        if n <= 0 or num_layers <= 0:
+            return []
+        n = min(n, num_layers // 2)  # don't skip more than half
+        first = list(range(n))
+        last = list(range(num_layers - n, num_layers))
+        # Deduplicate (if num_layers <= 2*n)
+        indices = sorted(set(first + last))
+        return [str(i) for i in indices]
+
+    @staticmethod
+    def from_cache_dtype(cache_dtype: str, head_dim: int) -> TurboQuantConfig:
+        """Create config from a named preset.
+
+        Valid presets: turboquant_k8v4, turboquant_4bit_nc, etc.
+        """
+        if cache_dtype not in TQ_PRESETS:
+            valid = ", ".join(TQ_PRESETS.keys())
+            raise ValueError(
+                f"Unknown TurboQuant cache dtype: {cache_dtype!r}. "
+                f"Valid presets: {valid}"
+            )
+        preset = TQ_PRESETS[cache_dtype]
+        return TurboQuantConfig(
+            head_dim=head_dim,
+            key_quant_bits=preset["key_quant_bits"],
+            value_quant_bits=preset["value_quant_bits"],
+            norm_correction=preset["norm_correction"],
+        )
+
+
+def _get_full_attention_layer_indices(model_config: ModelConfig) -> list[int]:
+    """Global indices of full-attention layers in a hybrid model.
+
+    Covers the conventions used across vLLM: ``layer_types`` (Qwen3.5/Next),
+    ``layers_block_type`` (Jamba/Zamba2), ``attn_type_list`` (Minimax).
+    """
+    text_cfg = model_config.hf_text_config
+    hf_cfg = model_config.hf_config
+
+    layer_types = getattr(text_cfg, "layer_types", None)
+    if layer_types is not None:
+        return [
+            i for i, t in enumerate(layer_types) if t in ("full_attention", "attention")
+        ]
+
+    layers_block_type = getattr(text_cfg, "layers_block_type", None)
+    if layers_block_type is not None:
+        return [
+            i for i, t in enumerate(layers_block_type) if t in ("attention", "hybrid")
+        ]
+
+    attn_type_list = getattr(hf_cfg, "attn_type_list", None)
+    if attn_type_list is not None:
+        return [i for i, t in enumerate(attn_type_list) if t == 1]
+
+    return []
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
index d16d4a3d2619..082e42f964f4 100644
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
@@ -10,6 +10,7 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
     align_fp4_moe_weights_for_fi,
+    align_trtllm_fp4_moe_hidden_dim_for_fi,
 )
 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
     swizzle_blockscale,
@@ -20,7 +21,7 @@
 )
 
 if TYPE_CHECKING:
-    from vllm.model_executor.layers.fused_moe.layer import FusedMoE
+    from vllm.model_executor.layers.fused_moe import RoutedExperts
     from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
         NvFp4MoeBackend,
     )
@@ -60,6 +61,100 @@ def reorder_w1w3_to_w3w1(
     )
 
 
+def interleave_linear_and_gate(
+    x: torch.Tensor,
+    group_size: int = 64,
+    dim: int = -1,
+) -> torch.Tensor:
+    """Interleave gate and linear weight rows for CuteDSL wrapper."""
+    sizes = x.size()
+    dim = dim % x.dim()
+    assert sizes[dim] % (group_size * 2) == 0, (
+        f"dim {dim} size {sizes[dim]} must be divisible by {group_size * 2}"
+    )
+    prev_sizes = sizes[:dim]
+    post_sizes = sizes[dim + 1 :]
+    x = x.view(*prev_sizes, 2, sizes[dim] // (group_size * 2), group_size, *post_sizes)
+    x = x.transpose(dim, dim + 1).contiguous().view(*sizes)
+    return x
+
+
+def prepare_nvfp4_moe_layer_for_flashinfer_cutedsl(
+    layer: "RoutedExperts",
+    w13: torch.Tensor,
+    w13_scale: torch.Tensor,
+    w13_scale_2: torch.Tensor,
+    a13_scale: torch.Tensor,
+    w2: torch.Tensor,
+    w2_scale: torch.Tensor,
+    w2_scale_2: torch.Tensor,
+    a2_scale: torch.Tensor,
+) -> tuple[
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+    torch.Tensor,
+]:
+    """Prepare weights for the CuteDSL wrapper-based NvFP4 MoE backend.
+
+    Converts weight scale factors to MMA layout expected by CuteDslMoEWrapper,
+    and interleaves w13 gate/linear rows.
+    """
+    from flashinfer.cute_dsl.utils import convert_sf_to_mma_layout
+
+    # Global scaling factors (same as other FlashInfer backends).
+    num_experts = w13.shape[0]
+    a13_scale = a13_scale.max().to(torch.float32).expand(num_experts)
+    a2_scale = a2_scale.max().to(torch.float32).expand(num_experts)
+
+    half = w13.shape[1] // 2
+    w13 = torch.cat([w13[:, half:], w13[:, :half]], dim=1)
+    w13_scale = torch.cat([w13_scale[:, half:], w13_scale[:, :half]], dim=1)
+
+    # Interleave up/gate rows for w13 weights and scales.
+    w13 = interleave_linear_and_gate(w13, group_size=64, dim=1)
+    w13_scale = interleave_linear_and_gate(w13_scale, group_size=64, dim=1)
+
+    # Convert w13 scale factors: linear → swizzled → MMA layout.
+    w13_scale = swizzle_blockscale(w13_scale)
+    E, M_padded, K_sf_padded = w13_scale.shape
+    w13_scale_flat = w13_scale.reshape(E * M_padded, K_sf_padded)
+    w13_scale = convert_sf_to_mma_layout(
+        w13_scale_flat,
+        m=M_padded,
+        k=K_sf_padded * 16,
+        num_groups=E,
+        sf_vec_size=16,
+    )
+
+    # Convert w2 scale factors: linear → swizzled → MMA layout.
+    w2_scale = swizzle_blockscale(w2_scale)
+    E, M_padded, K_sf_padded = w2_scale.shape
+    w2_scale_flat = w2_scale.reshape(E * M_padded, K_sf_padded)
+    w2_scale = convert_sf_to_mma_layout(
+        w2_scale_flat,
+        m=M_padded,
+        k=K_sf_padded * 16,
+        num_groups=E,
+        sf_vec_size=16,
+    )
+
+    return (
+        w13,
+        w13_scale,
+        w13_scale_2,
+        a13_scale,
+        w2,
+        w2_scale,
+        w2_scale_2,
+        a2_scale,
+    )
+
+
 def prepare_static_weights_for_trtllm_fp4_moe(
     # args_dequant,
     # args,
@@ -191,7 +286,7 @@ def prepare_static_weights_for_trtllm_fp4_moe(
 
 def prepare_nvfp4_moe_layer_for_fi_or_cutlass(
     backend: "NvFp4MoeBackend",
-    layer: "FusedMoE",
+    layer: "RoutedExperts",
     w13: torch.Tensor,
     w13_scale: torch.Tensor,
     w13_scale_2: torch.Tensor,
@@ -221,7 +316,8 @@ def prepare_nvfp4_moe_layer_for_fi_or_cutlass(
         NvFp4MoeBackend.VLLM_CUTLASS,
         NvFp4MoeBackend.FLASHINFER_CUTLASS,
         NvFp4MoeBackend.FLASHINFER_TRTLLM,
-        NvFp4MoeBackend.FLASHINFER_CUTEDSL,
+        NvFp4MoeBackend.FLASHINFER_CUTEDSL_BATCHED,
+        NvFp4MoeBackend.FLASHINFER_B12X,
     ]
 
     # Reorder [w1, w3] to [w3, w1] for FI NVFP4 MoE kernels.
@@ -233,6 +329,7 @@ def prepare_nvfp4_moe_layer_for_fi_or_cutlass(
         in [
             NvFp4MoeBackend.FLASHINFER_CUTLASS,
             NvFp4MoeBackend.FLASHINFER_TRTLLM,
+            NvFp4MoeBackend.FLASHINFER_B12X,
         ]
     ):
         w13, w13_scale = reorder_w1w3_to_w3w1(w13, w13_scale)
@@ -247,6 +344,13 @@ def prepare_nvfp4_moe_layer_for_fi_or_cutlass(
 
     # Shuffle weights and scales for FI TRTLLM NVFP4 MoE kernels.
     if backend == NvFp4MoeBackend.FLASHINFER_TRTLLM:
+        w13, w13_scale, w2, w2_scale, padded_hidden = (
+            align_trtllm_fp4_moe_hidden_dim_for_fi(w13, w13_scale, w2, w2_scale)
+        )
+        if layer.moe_config.hidden_dim_unpadded is None:
+            layer.moe_config.hidden_dim_unpadded = layer.moe_config.hidden_dim
+        layer.moe_config.hidden_dim = padded_hidden
+
         # Align weights for FI NVFP4 MoE kernels.
         min_alignment = 16 if is_gated else 128
         w13, w13_scale, w2, w2_scale, padded_intermediate = (
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_mxint4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_mxint4_moe.py
index 98a3d1e12bdc..4e08a73a69dc 100644
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_mxint4_moe.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_mxint4_moe.py
@@ -259,8 +259,12 @@ def flashinfer_trtllm_mxint4_moe(
         routed_scaling_factor=None,
         routing_method_type=routing_method_type,
         enable_pdl=None,
+        do_finalize=True,
         output=None,
         tune_max_num_tokens=8192,
-    ).to(x.dtype)
+    )
+    if isinstance(out, (tuple, list)):
+        out = out[0]
+    out = out.to(x.dtype)
 
     return out
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
index 66827488ffed..973f759698f0 100644
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
@@ -111,7 +111,6 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
             logger.info_once(
                 "Flashinfer TRTLLM MOE backend is only supported on "
                 "SM100 and later, using CUTLASS backend instead",
-                scope="local",
             )
             return FlashinferMoeBackend.CUTLASS
         return backend_map[flashinfer_moe_backend]
@@ -239,7 +238,6 @@ def align_fp4_moe_weights_for_fi(
         "Padding intermediate size from %d to %d for up/down projection weights.",
         intermediate,
         padded_intermediate,
-        scope="local",
     )
 
     up_mult = 2 if is_act_and_mul else 1
@@ -265,6 +263,47 @@ def align_fp4_moe_weights_for_fi(
     return padded_w13, padded_w13_scale, padded_w2, padded_w2_scale, padded_intermediate
 
 
+def align_trtllm_fp4_moe_hidden_dim_for_fi(
+    w13: torch.Tensor,
+    w13_scale: torch.Tensor,
+    w2: torch.Tensor,
+    w2_scale: torch.Tensor,
+    min_alignment: int = 256,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, int]:
+    num_experts, gate_up_dim, packed_hidden_size = w13.shape
+    hidden_size = packed_hidden_size * 2
+    padded_hidden_size = round_up(hidden_size, min_alignment)
+
+    if padded_hidden_size == hidden_size:
+        return w13, w13_scale, w2, w2_scale, hidden_size
+
+    logger.warning_once(
+        "Padding hidden size from %d to %d for TRTLLM NVFP4 MoE weights. "
+        "This requires activation slicing at runtime and may cause "
+        "performance degradation.",
+        hidden_size,
+        padded_hidden_size,
+    )
+
+    padded_w13 = w13.new_zeros((num_experts, gate_up_dim, padded_hidden_size // 2))
+    padded_w13[:, :, :packed_hidden_size] = w13
+
+    padded_w13_scale = w13_scale.new_zeros(
+        (num_experts, gate_up_dim, padded_hidden_size // 16)
+    )
+    padded_w13_scale[:, :, : w13_scale.shape[2]] = w13_scale
+
+    padded_w2 = w2.new_zeros((num_experts, padded_hidden_size, w2.shape[2]))
+    padded_w2[:, : w2.shape[1], :] = w2
+
+    padded_w2_scale = w2_scale.new_zeros(
+        (num_experts, padded_hidden_size, w2_scale.shape[2])
+    )
+    padded_w2_scale[:, : w2_scale.shape[1], :] = w2_scale
+
+    return padded_w13, padded_w13_scale, padded_w2, padded_w2_scale, padded_hidden_size
+
+
 def align_fp8_moe_weights_for_fi(
     w13: torch.Tensor, w2: torch.Tensor, is_act_and_mul: bool, min_alignment: int = 16
 ) -> tuple[torch.Tensor, torch.Tensor, int]:
@@ -289,7 +328,6 @@ def align_fp8_moe_weights_for_fi(
         "Padding intermediate size from %d to %d for up/down projection weights.",
         intermediate,
         padded_intermediate,
-        scope="local",
     )
 
     up_mult = 2 if is_act_and_mul else 1
@@ -305,6 +343,42 @@ def align_fp8_moe_weights_for_fi(
     return padded_w13, padded_w2, padded_intermediate
 
 
+def _shuffle_deepseek_fp8_moe_weights(
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Preprocess DeepSeek FP8 block-scale weights for the FlashInfer TRT-LLM
+    kernel using the shuffle + BlockMajorK layout variant.
+
+    Returns 4D weight tensors in BlockMajorK layout
+    (E, K/block_k, Mn, block_k)
+    """
+    from flashinfer import shuffle_matrix_a
+    from flashinfer.fused_moe import convert_to_block_layout
+
+    epilogue_tile_m = 64
+    block_k = 128
+    num_experts = w13.shape[0]
+
+    M13, K13 = w13.shape[1], w13.shape[2]
+    M2, K2 = w2.shape[1], w2.shape[2]
+    w13_out = torch.empty(
+        num_experts, K13 // block_k, M13, block_k, dtype=torch.uint8, device=w13.device
+    )
+    w2_out = torch.empty(
+        num_experts, K2 // block_k, M2, block_k, dtype=torch.uint8, device=w2.device
+    )
+
+    for i in range(num_experts):
+        t13 = shuffle_matrix_a(w13[i].view(torch.uint8), epilogue_tile_m)
+        w13_out[i] = convert_to_block_layout(t13, block_k)
+
+        t2 = shuffle_matrix_a(w2[i].view(torch.uint8), epilogue_tile_m)
+        w2_out[i] = convert_to_block_layout(t2, block_k)
+
+    return w13_out.view(torch.float8_e4m3fn), w2_out.view(torch.float8_e4m3fn)
+
+
 def _shuffle_mxfp8_moe_weights(
     w13: torch.Tensor,
     w2: torch.Tensor,
@@ -405,6 +479,7 @@ def prepare_fp8_moe_layer_for_fi(
         hasattr(layer, "weight_block_size") and layer.weight_block_size is not None
     )
     is_mxfp8 = block_quant and w13_scale.dtype == torch.uint8
+    is_deepseek_fp8 = block_quant and not is_mxfp8
     is_gated = layer.activation.is_gated
 
     # MXFP8 TRT-LLM requires W31 swap + reorder + shuffle.
@@ -447,6 +522,10 @@ def prepare_fp8_moe_layer_for_fi(
         if block_quant:
             w13_scale = swap_w13_to_w31(w13_scale)
 
+    # DeepSeekFp8 TRT-LLM: shuffle weights into BlockMajorK layout.
+    if is_deepseek_fp8 and is_trtllm:
+        w13, w2 = _shuffle_deepseek_fp8_moe_weights(w13, w2)
+
     # FI TRT-LLM FP8 per-tensor MoE kernel requires weight shuffle
     # and registration of alpha scales.
     if is_trtllm and not block_quant:
diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
index 9568d1320bc6..8b20c13a97f9 100644
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -12,15 +12,11 @@
 
 import vllm.envs as envs
 from vllm import _custom_ops as ops
-from vllm._aiter_ops import rocm_aiter_ops
 from vllm.logger import init_logger
-from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    GroupShape,
     get_fp8_min_max,
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
-    CUTLASS_BLOCK_FP8_SUPPORTED,
     all_close_1d,
     per_tensor_dequantize,
 )
@@ -29,22 +25,14 @@
     ChannelQuantScaleParameter,
     PerTensorScaleParameter,
 )
-from vllm.model_executor.utils import replace_parameter, set_weight_attrs
+from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
 from vllm.utils.deep_gemm import (
-    fp8_gemm_nt,
     get_tma_aligned_size,
     is_deep_gemm_e8m0_used,
-    is_deep_gemm_supported,
-    should_use_deepgemm_for_fp8_linear,
     transform_sf_into_required_layout,
 )
-from vllm.utils.flashinfer import (
-    flashinfer_fp8_blockscale_gemm,
-    is_flashinfer_fp8_blockscale_gemm_supported,
-    should_use_flashinfer_for_blockscale_fp8_gemm,
-)
 from vllm.utils.torch_utils import direct_register_custom_op
 
 logger = init_logger(__name__)
@@ -56,153 +44,6 @@ def is_fp8(x: torch.dtype | torch.Tensor) -> bool:
     return x == torch.float8_e4m3fn or x == torch.float8_e4m3fnuz
 
 
-# We need to pass in the is_hopper flag as argument because the function
-# current_platform.is_device_capability() is not supported by Torch compiler.
-def cutlass_scaled_mm(
-    A: torch.Tensor,
-    B: torch.Tensor,
-    As: torch.Tensor,
-    Bs: torch.Tensor,
-    block_size: list[int],
-    output_dtype: torch.dtype = torch.float16,
-) -> torch.Tensor:
-    return ops.cutlass_scaled_mm(
-        A,
-        B.T,
-        out_dtype=output_dtype,
-        scale_a=As,
-        scale_b=Bs.T,
-    )
-
-
-# TODO we should be able to change the type of block_size to GroupShape
-# after we resolve GroupShape compilation issue
-# https://github.com/vllm-project/vllm/issues/25270
-def _w8a8_triton_block_scaled_mm_func(
-    qx: torch.Tensor,
-    weight: torch.Tensor,
-    x_scale: torch.Tensor,
-    weight_scale: torch.Tensor,
-    block_size: list[int],
-    output_dtype: torch.dtype,
-) -> torch.Tensor:
-    return w8a8_triton_block_scaled_mm(
-        qx, weight, x_scale, weight_scale, block_size, output_dtype
-    )
-
-
-def _w8a8_triton_block_scaled_mm_fake(
-    qx: torch.Tensor,
-    weight: torch.Tensor,
-    x_scale: torch.Tensor,
-    weight_scale: torch.Tensor,
-    block_size: list[int],
-    output_dtype: torch.dtype,
-) -> torch.Tensor:
-    return torch.empty(
-        (qx.size(0), weight.size(0)), dtype=output_dtype, device=qx.device
-    )
-
-
-direct_register_custom_op(
-    "w8a8_triton_block_scaled_mm_func",
-    _w8a8_triton_block_scaled_mm_func,
-    fake_impl=_w8a8_triton_block_scaled_mm_fake,
-)
-
-
-def _padded_cutlass(
-    qx: torch.Tensor,
-    weight: torch.Tensor,
-    x_scale: torch.Tensor,
-    weight_scale: torch.Tensor,
-    block_size: list[int],
-    output_dtype: torch.dtype,
-) -> torch.Tensor:
-    pad_multiple = 4
-    dim = qx.shape[0]
-    padded = (
-        dim if dim % pad_multiple == 0 else dim + pad_multiple - (dim % pad_multiple)
-    )
-
-    has_pad = padded > dim
-
-    if has_pad:
-        padded_shape = [padded, *qx.shape[1:]]
-        padded_qx = torch.zeros(padded_shape, device=qx.device, dtype=qx.dtype)
-        padded_qx[0 : qx.shape[0], ...].copy_(qx)
-
-        padded_x_scale_shape = [*x_scale.shape[1:], padded]
-        padded_x_scale = torch.ones(
-            padded_x_scale_shape, device=x_scale.device, dtype=x_scale.dtype
-        ).permute(-1, -2)
-        padded_x_scale[0 : x_scale.shape[0], ...].copy_(x_scale)
-
-        output = cutlass_scaled_mm(
-            padded_qx, weight, padded_x_scale, weight_scale, block_size, output_dtype
-        )
-        return output[0 : qx.shape[0], ...]
-    else:
-        return cutlass_scaled_mm(
-            qx, weight, x_scale, weight_scale, block_size, output_dtype
-        )
-
-
-def _padded_cutlass_fake(
-    qx: torch.Tensor,
-    weight: torch.Tensor,
-    x_scale: torch.Tensor,
-    weight_scale: torch.Tensor,
-    block_size: list[int],
-    output_dtype: torch.dtype,
-) -> torch.Tensor:
-    return torch.empty(
-        (qx.size(0), weight.size(0)), dtype=output_dtype, device=qx.device
-    )
-
-
-direct_register_custom_op(
-    "padded_cutlass",
-    _padded_cutlass,
-    fake_impl=_padded_cutlass_fake,
-)
-
-
-def _fp8_gemm_nt_op(
-    q_input: torch.Tensor,
-    input_scale: torch.Tensor,
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-    output: torch.Tensor,
-    use_deep_gemm_e8m0: bool,
-) -> None:
-    fp8_gemm_nt(
-        (q_input, input_scale),
-        (weight, weight_scale),
-        output,
-        is_deep_gemm_e8m0_used=use_deep_gemm_e8m0,
-    )
-
-
-def _fp8_gemm_nt_op_fake(
-    q_input: torch.Tensor,
-    input_scale: torch.Tensor,
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-    output: torch.Tensor,
-    use_deep_gemm_e8m0: bool,
-) -> None:
-    return None
-
-
-direct_register_custom_op(
-    "fp8_gemm_nt_op",
-    _fp8_gemm_nt_op,
-    mutates_args=["output"],
-    fake_impl=_fp8_gemm_nt_op_fake,
-)
-
-
 def _triton_per_token_group_quant_fp8_impl(
     x: torch.Tensor,
     group_size: int,
@@ -236,362 +77,6 @@ def _triton_per_token_group_quant_fp8_fake(
 )
 
 
-def _flashinfer_fp8_blockscale_gemm_impl(
-    input: torch.Tensor,
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-    group_size: int,
-    use_deep_gemm_e8m0: bool,
-) -> torch.Tensor:
-    """
-    Conditional FlashInfer FP8 blockscale GEMM with batch-size-dependent selection.
-
-    This function switches between two optimized kernels based on the input batch size:
-    - For small batches (M < 32): Uses FlashInfer's DeepGEMM swapAB optimization.
-    - For larger batches (M >= 32): Uses the official DeepGEMM kernel.
-
-    The conditional logic must use torch.cond() instead of a simple if-else statement
-    to maintain compatibility with torch.compile graph compilation.
-
-    This batch-size-dependent selection is essential for maintaining model accuracy.
-    Benchmarks on GSM8K show a significant accuracy gap (88% vs 95%) for DeepSeek-V3.1
-    when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accuracy
-    drop.
-
-    Args:
-        input: Input tensor of shape (batch_size, input_dim) in FP8 format
-        weight: Weight tensor of shape (output_dim, input_dim) in FP8 format
-        weight_scale: Scale factors for weight quantization (per-group)
-        group_size: Quantization group size for the weight tensor
-        use_deep_gemm_e8m0: Whether to use the E8M0 format in DeepGEMM quantization
-
-    Returns:
-        Output tensor of shape (batch_size, output_dim) in bfloat16 format
-    """
-
-    def run_flashinfer_deepgemm_swapAB(
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-    ) -> torch.Tensor:
-        return flashinfer_fp8_blockscale_gemm(
-            input=input,
-            weight=weight,
-            weight_scale=weight_scale,
-            out_dtype=torch.bfloat16,
-        )
-
-    def run_deepgemm(
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-    ) -> torch.Tensor:
-        q_input, input_scale = per_token_group_quant_fp8(
-            input,
-            group_size=group_size,
-            column_major_scales=True,
-            use_ue8m0=use_deep_gemm_e8m0,
-        )
-        output = torch.empty(
-            (q_input.shape[0], weight.shape[0]),
-            dtype=torch.bfloat16,
-            device=q_input.device,
-        )
-        fp8_gemm_nt(
-            (q_input, input_scale),
-            (weight, weight_scale),
-            output,
-            is_deep_gemm_e8m0_used=use_deep_gemm_e8m0,
-        )
-        return output
-
-    if envs.VLLM_BATCH_INVARIANT:
-        return run_deepgemm(input, weight, weight_scale)
-
-    condition = input.shape[0] < 32
-
-    # PyTorch's torch.compile cannot handle input-dependent control flow in standard
-    # Python conditionals. torch.cond() explicitly registers both code paths in the
-    # computation graph, allowing torch.compile to capture both branches.
-    # without torch.cond, the M < 32 condition won't be able to be captured by torch
-    # compile
-    return torch.cond(
-        condition,
-        run_flashinfer_deepgemm_swapAB,
-        run_deepgemm,
-        (input, weight, weight_scale),
-    )
-
-
-def _flashinfer_fp8_blockscale_gemm_fake(
-    input: torch.Tensor,
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-    group_size: int,
-    use_deep_gemm_e8m0: bool,
-) -> torch.Tensor:
-    """
-    Required fake/meta implementation for torch.compile graph tracing.
-    """
-    return torch.empty(
-        input.shape[0], weight.shape[0], dtype=torch.bfloat16, device=input.device
-    )
-
-
-direct_register_custom_op(
-    "flashinfer_fp8_blockscale_gemm",
-    _flashinfer_fp8_blockscale_gemm_impl,
-    fake_impl=_flashinfer_fp8_blockscale_gemm_fake,
-)
-
-
-# TODO fix ROCm->Triton custom path:
-#  https://github.com/vllm-project/vllm/issues/14397
-class W8A8BlockFp8LinearOp:
-    """
-    This class executes a Blocked FP8 linear layer using cutlass if supported
-    and torch.scaled_mm otherwise.
-    """
-
-    def __init__(
-        self,
-        weight_group_shape: GroupShape,
-        act_quant_group_shape: GroupShape,
-        cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED,
-        use_aiter_and_is_supported: bool = False,
-        use_deep_gemm: bool | None = None,
-    ):
-        self.weight_group_shape = weight_group_shape
-        self.act_quant_group_shape = act_quant_group_shape
-        if use_deep_gemm is not None:
-            self.is_deep_gemm_supported = use_deep_gemm
-        else:
-            self.is_deep_gemm_supported = is_deep_gemm_supported()
-        self.is_hopper = current_platform.is_device_capability(90)
-        self.use_deep_gemm_e8m0 = is_deep_gemm_e8m0_used()
-        self.is_flashinfer_supported = is_flashinfer_fp8_blockscale_gemm_supported()
-
-        # Get the correct blockscale mul and input quant operations.
-        # We can't use _dispatch_w8a8_blockscale_op to figure out if we want
-        # to use deepgemm because we don't know the shape of weights (and
-        # whether deepgemm supports it) at the init time.
-        self.w8a8_blockscale_op, self.input_quant_op = (
-            self._dispatch_w8a8_blockscale_op(
-                cutlass_block_fp8_supported, use_aiter_and_is_supported
-            )
-        )
-        self.deepgemm_input_quant_op = (
-            QuantFP8(
-                False,
-                self.act_quant_group_shape,
-                column_major_scales=True,
-                tma_aligned_scales=envs.VLLM_USE_DEEP_GEMM_TMA_ALIGNED_SCALES,
-                use_ue8m0=self.use_deep_gemm_e8m0,
-            )
-            if self.is_deep_gemm_supported
-            else None
-        )
-
-    def apply(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        input_scale: torch.Tensor | None = None,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        assert input_scale is None
-        # View input as 2D matrix for fp8 methods
-        input_2d = input.view(-1, input.shape[-1])
-        output_shape = [*input.shape[:-1], weight.shape[0]]
-        output_dtype = input.dtype
-
-        if should_use_flashinfer_for_blockscale_fp8_gemm(
-            self.is_flashinfer_supported, output_dtype, input_2d, weight
-        ) and should_use_deepgemm_for_fp8_linear(
-            output_dtype, weight, self.is_deep_gemm_supported
-        ):
-            output = self._run_flashinfer(input_2d, weight, weight_scale)
-
-        elif should_use_deepgemm_for_fp8_linear(
-            output_dtype, weight, self.is_deep_gemm_supported
-        ):
-            output = self._run_deepgemm(input_2d, weight, weight_scale)
-        else:
-            output = self.w8a8_blockscale_op(
-                input_2d, weight, weight_scale, input_scale
-            )
-
-        if bias is not None:
-            output = output + bias
-        return output.to(dtype=input.dtype).view(*output_shape)
-
-    def _run_deepgemm(
-        self,
-        input_2d: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-    ) -> torch.Tensor:
-        assert self.deepgemm_input_quant_op is not None
-        q_input, input_scale = self.deepgemm_input_quant_op(input_2d)
-        output = torch.empty(
-            (q_input.shape[0], weight.shape[0]),
-            dtype=torch.bfloat16,
-            device=q_input.device,
-        )
-        torch.ops.vllm.fp8_gemm_nt_op(
-            q_input, input_scale, weight, weight_scale, output, self.use_deep_gemm_e8m0
-        )
-        return output
-
-    def _run_cutlass(
-        self,
-        input_2d: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        input_scale: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        assert input_scale is None
-        assert self.input_quant_op is not None
-        q_input, input_scale = self.input_quant_op(input_2d)
-        if self.is_hopper:
-            return torch.ops.vllm.padded_cutlass(
-                q_input,
-                weight,
-                input_scale,
-                weight_scale,
-                list(self.weight_group_shape),
-                input_2d.dtype,
-            )
-        else:
-            return cutlass_scaled_mm(
-                q_input,
-                weight,
-                input_scale,
-                weight_scale,
-                list(self.weight_group_shape),
-                input_2d.dtype,
-            )
-
-    def _run_aiter(
-        self,
-        input_2d: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        input_scale: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        assert self.act_quant_group_shape == GroupShape(1, 128)
-
-        n, k = weight.shape
-
-        use_triton = (
-            not current_platform.is_fp8_fnuz()
-            and rocm_aiter_ops.is_triton_gemm_w8a8_tuned(n, k)
-        )
-
-        if use_triton:
-            gemm_a8w8_blockscale_op = rocm_aiter_ops.triton_gemm_a8w8_blockscale
-        else:
-            gemm_a8w8_blockscale_op = rocm_aiter_ops.gemm_a8w8_blockscale
-
-        if input_scale is not None:
-            q_input = input_2d
-        else:
-            q_input, input_scale = self.input_quant_op(input_2d, use_triton=use_triton)
-
-        return gemm_a8w8_blockscale_op(
-            q_input,
-            weight,
-            input_scale,
-            weight_scale,
-            list(self.weight_group_shape),
-            output_dtype=input_2d.dtype,
-        )
-
-    def _run_triton(
-        self,
-        input_2d: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        input_scale: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        assert input_scale is None
-        assert self.input_quant_op is not None
-        q_input, input_scale = self.input_quant_op(input_2d)
-        return torch.ops.vllm.w8a8_triton_block_scaled_mm_func(
-            q_input,
-            weight,
-            input_scale,
-            weight_scale,
-            list(self.weight_group_shape),
-            input_2d.dtype,
-        )
-
-    def _run_flashinfer(
-        self,
-        input_2d: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-    ) -> torch.Tensor:
-        """
-        Run FlashInfer FP8 block-scale GEMM.
-
-        This backend uses TensorRT-LLM's FP8 block-scale GEMM kernels
-        and supports FP8+FP8 (W8A8 full quantization) on SM90+ (Hopper).
-        """
-        # Now call FlashInfer with BF16 input + FP8 weight, input will be
-        # quantized with FlashInfer kernel (W8A8)
-        output = torch.ops.vllm.flashinfer_fp8_blockscale_gemm(
-            input=input_2d,  # BF16 input
-            weight=weight,  # FP8 weight
-            weight_scale=weight_scale,  # Weight scales
-            group_size=self.act_quant_group_shape.col,
-            use_deep_gemm_e8m0=self.use_deep_gemm_e8m0,
-        )
-        return output
-
-    def _dispatch_w8a8_blockscale_op(
-        self,
-        use_cutlass: bool,
-        use_aiter_and_is_supported: bool,
-    ) -> tuple[
-        Callable[
-            [
-                torch.Tensor,
-                torch.Tensor,
-                torch.Tensor,
-                torch.Tensor | None,
-            ],
-            torch.Tensor,
-        ],
-        QuantFP8,
-    ]:
-        if use_cutlass:
-            return self._run_cutlass, (
-                QuantFP8(
-                    False,
-                    self.act_quant_group_shape,
-                    column_major_scales=True,
-                    use_ue8m0=False,
-                )
-            )
-        if use_aiter_and_is_supported:
-            return self._run_aiter, QuantFP8(
-                False,
-                self.act_quant_group_shape,
-                column_major_scales=False,
-                use_ue8m0=False,
-            )
-        return self._run_triton, (
-            QuantFP8(
-                False,
-                self.act_quant_group_shape,
-                column_major_scales=False,
-                use_ue8m0=False,
-            )
-        )
-
-
 def input_to_float8(
     x: torch.Tensor, dtype: torch.dtype | None = None
 ) -> tuple[torch.Tensor, torch.Tensor]:
@@ -664,6 +149,148 @@ def _per_token_group_quant_fp8(
     tl.store(y_s_ptr, y_s)
 
 
+@triton.jit
+def _silu_mul_quant_fp8_packed_kernel(
+    input_ptr,
+    output_q_ptr,
+    output_scale_ptr,
+    M,
+    input_stride_m,
+    output_q_stride_m,
+    output_scale_stride_k,
+    clamp_limit,
+    N: tl.constexpr,
+    NUM_GROUPS: tl.constexpr,
+    fp8_min: tl.constexpr,
+    fp8_max: tl.constexpr,
+    GROUP_SIZE: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    HAS_CLAMP: tl.constexpr,
+):
+    N_2: tl.constexpr = N // 2
+
+    pid_pack = tl.program_id(0)
+    pid_m = tl.program_id(1)
+    m_offset = pid_m.to(tl.int64) * BLOCK_M
+
+    if m_offset >= M:
+        return
+
+    offs_m = tl.arange(0, BLOCK_M)
+    offs_n = tl.arange(0, GROUP_SIZE)
+    row_mask = (m_offset + offs_m) < M
+
+    base_row_offset = (m_offset + offs_m[:, None]) * input_stride_m
+    base_out_offset = (m_offset + offs_m[:, None]) * output_q_stride_m
+
+    packed_scale = tl.zeros((BLOCK_M,), dtype=tl.int32)
+
+    for pack_idx in tl.static_range(4):
+        group_id = pid_pack * 4 + pack_idx
+
+        if group_id < NUM_GROUPS:
+            n_offset = group_id * GROUP_SIZE
+
+            act_ptrs = input_ptr + base_row_offset + n_offset + offs_n[None, :]
+            act_in = tl.load(act_ptrs, mask=row_mask[:, None], other=0.0)
+
+            mul_ptrs = act_ptrs + N_2
+            mul_in = tl.load(mul_ptrs, mask=row_mask[:, None], other=0.0)
+
+            act_f32 = act_in.to(tl.float32)
+            mul_f32 = mul_in.to(tl.float32)
+
+            if HAS_CLAMP:
+                act_f32 = tl.minimum(act_f32, clamp_limit)
+                mul_f32 = tl.clamp(mul_f32, -clamp_limit, clamp_limit)
+
+            y = (act_f32 / (1.0 + tl.exp(-act_f32))) * mul_f32
+            # Round through bf16 to match unfused precision path
+            y = y.to(tl.bfloat16).to(tl.float32)
+
+            absmax = tl.max(tl.abs(y), axis=1)
+
+            scale_raw = tl.maximum(absmax / fp8_max, 1e-10)
+            exponent = tl.ceil(tl.log2(scale_raw))
+            scale = tl.math.exp2(exponent)
+
+            y_q = tl.clamp(y / scale[:, None], fp8_min, fp8_max)
+
+            out_q_ptrs = output_q_ptr + base_out_offset + n_offset + offs_n[None, :]
+            tl.store(
+                out_q_ptrs,
+                y_q.to(output_q_ptr.dtype.element_ty),
+                mask=row_mask[:, None],
+            )
+
+            exponent_biased = tl.clamp(exponent + 127.0, 0.0, 255.0).to(tl.int32)
+            packed_scale = packed_scale | (exponent_biased << (pack_idx * 8))
+
+    scale_ptrs = output_scale_ptr + pid_pack * output_scale_stride_k + m_offset + offs_m
+    tl.store(scale_ptrs, packed_scale, mask=row_mask)
+
+
+def silu_mul_quant_fp8_packed_triton(
+    input: torch.Tensor,
+    group_size: int = 128,
+    output_q: torch.Tensor | None = None,
+    clamp_limit: float | None = None,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    assert input.dim() == 2
+    assert input.is_contiguous()
+
+    M, N = input.shape
+    N_2 = N // 2
+
+    assert N_2 % group_size == 0
+
+    fp8_dtype = torch.float8_e4m3fn
+    finfo = torch.finfo(fp8_dtype)
+    fp8_min, fp8_max = finfo.min, finfo.max
+
+    num_groups_per_row = N_2 // group_size
+    num_packed_groups = (num_groups_per_row + 3) // 4
+    tma_aligned_M = ((M + 3) // 4) * 4
+
+    if output_q is None:
+        output_q = torch.empty((M, N_2), dtype=fp8_dtype, device=input.device)
+
+    output_scale_packed = torch.empty(
+        (num_packed_groups, tma_aligned_M),
+        dtype=torch.int32,
+        device=input.device,
+    ).T[:M, :]
+
+    BLOCK_M = 8
+    grid = (num_packed_groups, (M + BLOCK_M - 1) // BLOCK_M)
+
+    num_warps = max(4, group_size // 32)
+    num_stages = 2
+
+    has_clamp = clamp_limit is not None
+    _silu_mul_quant_fp8_packed_kernel[grid](
+        input,
+        output_q,
+        output_scale_packed,
+        M,
+        input.stride(0),
+        output_q.stride(0),
+        output_scale_packed.stride(1),
+        clamp_limit if has_clamp else 0.0,
+        N=N,
+        NUM_GROUPS=num_groups_per_row,
+        fp8_min=fp8_min,
+        fp8_max=fp8_max,
+        GROUP_SIZE=group_size,
+        BLOCK_M=BLOCK_M,
+        HAS_CLAMP=has_clamp,
+        num_warps=num_warps,
+        num_stages=num_stages,
+    )
+
+    return output_q, output_scale_packed
+
+
 @triton.jit
 def _silu_mul_per_token_group_quant_fp8_colmajor(
     y_ptr,  # [M, N]
@@ -675,9 +302,11 @@ def _silu_mul_per_token_group_quant_fp8_colmajor(
     y_s_col_stride: tl.int64,
     # Information for float8
     eps,
+    clamp_limit,
     fp8_min: tl.constexpr,
     fp8_max: tl.constexpr,
     use_ue8m0: tl.constexpr,
+    HAS_CLAMP: tl.constexpr,
     # Meta-parameters
     GROUP_SIZE: tl.constexpr,
     BLOCK_M: tl.constexpr,
@@ -694,8 +323,8 @@ def _silu_mul_per_token_group_quant_fp8_colmajor(
     pid_n = tl.program_id(1)
     N_2 = N // 2
 
-    m_offset = pid_m * BLOCK_M
-    n_offset = pid_n * BLOCK_N
+    m_offset = pid_m.to(tl.int64) * BLOCK_M
+    n_offset = pid_n.to(tl.int64) * BLOCK_N
     if m_offset >= M:
         return
 
@@ -709,7 +338,16 @@ def _silu_mul_per_token_group_quant_fp8_colmajor(
     act_in = tl.load(act_in_ptrs)
     mul_in = tl.load(act_in_ptrs + N_2)
 
-    # silu & mul
+    # silu & mul — match C++ silu_and_mul: clamp in fp32 then store back to the
+    # input dtype, run silu in fp32 then narrow, and do the mul at input
+    # precision so HAS_CLAMP True/False share the same multiplication path.
+    if HAS_CLAMP:
+        act_in = tl.minimum(act_in.to(tl.float32), clamp_limit).to(
+            y_ptr.dtype.element_ty
+        )
+        mul_in = tl.clamp(mul_in.to(tl.float32), -clamp_limit, clamp_limit).to(
+            y_ptr.dtype.element_ty
+        )
     act_in = act_in.to(tl.float32)
     one_f32 = tl.cast(1, tl.float32)
     silu_out = (act_in / (one_f32 + tl.exp(-act_in))).to(y_ptr.dtype.element_ty)
@@ -740,6 +378,7 @@ def silu_mul_per_token_group_quant_fp8_colmajor(
     output: torch.Tensor | None = None,  # [M, N // 2]
     use_ue8m0: bool | None = None,
     eps: float = 1e-10,
+    clamp_limit: float | None = None,
 ):
     """
     silu+mul + block-fp8 quant with group size 128.
@@ -782,6 +421,7 @@ def silu_mul_per_token_group_quant_fp8_colmajor(
     assert N_2 % BLOCK_N == 0
     grid = (M // BLOCK_M, N_2 // BLOCK_N)
 
+    has_clamp = clamp_limit is not None
     _silu_mul_per_token_group_quant_fp8_colmajor[grid](
         input,
         output,
@@ -790,9 +430,11 @@ def silu_mul_per_token_group_quant_fp8_colmajor(
         N,
         output_scales.stride(-1),
         eps,
+        clamp_limit if has_clamp else 0.0,
         fp8_min,
         fp8_max,
         use_ue8m0,
+        has_clamp,
         GROUP_SIZE,
         BLOCK_M,
         BLOCK_N,
@@ -931,7 +573,7 @@ def per_token_group_quant_fp8(
         shape = x.shape[:-1] + (x.shape[-1] // group_size,)
         x_s = torch.empty(shape, device=x.device, dtype=torch.float32)
 
-    # prefer CUDA kernel if available
+    # prefer CUDA/XPU kernel if available
     # TODO(bnell): this causes some fp8 moe test to fail.
     if current_platform.is_cuda() and x.is_contiguous():
         torch.ops._C.per_token_group_fp8_quant(
@@ -948,6 +590,12 @@ def per_token_group_quant_fp8(
         )
         return x_q, x_s
 
+    if current_platform.is_xpu() and x.is_contiguous():
+        torch.ops._C.per_token_group_fp8_quant(
+            x, x_q, x_s, group_size, eps, fp8_min, fp8_max, use_ue8m0
+        )
+        return x_q, x_s
+
     # TRITON FALLBACK
     M = x.numel() // group_size
     N = group_size
@@ -1216,6 +864,15 @@ def w8a8_triton_block_scaled_mm(
     assert len(block_size) == 2
     block_n, block_k = block_size[0], block_size[1]
 
+    # Triton cannot currently bind E8M0 scale tensors directly. On ROCm,
+    # DeepSeek-V4 checkpoints store block scales in exponent-only E8M0 format,
+    # so decode them to fp32 before launching the kernel.
+    if current_platform.is_rocm() or current_platform.is_xpu():
+        if As.dtype == torch.float8_e8m0fnu:
+            As = _upcast_e8m0_to_fp32(As).contiguous()
+        if Bs.dtype == torch.float8_e8m0fnu:
+            Bs = _upcast_e8m0_to_fp32(Bs).contiguous()
+
     assert A.shape[-1] == B.shape[-1]
     assert A.shape[:-1] == As.shape[:-1] and A.is_contiguous()
     assert triton.cdiv(A.shape[-1], block_k) == As.shape[-1]
@@ -1338,19 +995,65 @@ def requant_weight_ue8m0_inplace(
         s_old.copy_(s_requant)
 
 
+def _upcast_e8m0_to_fp32(scale: torch.Tensor) -> torch.Tensor:
+    """Upcast E8M0 (exponent-only) scale to float32.
+
+    E8M0 stores only the 8-bit biased exponent (bias=127). To convert
+    to float32 we place those 8 bits into the exponent field of an
+    IEEE-754 float32 (bits 23-30) with sign=0 and mantissa=0.
+    """
+    exp_bits = scale.view(torch.uint8).to(torch.int32)
+    fp32_bits = exp_bits << 23
+    return fp32_bits.view(torch.float32)
+
+
 def deepgemm_post_process_fp8_weight_block(
-    wq: torch.Tensor, ws: torch.Tensor, quant_block_shape: tuple[int], use_e8m0: bool
+    wq: torch.Tensor,
+    ws: torch.Tensor,
+    quant_block_shape: tuple[int, ...],
+    use_e8m0: bool,
+    is_bmm: bool = False,
+    bmm_batch_size: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     assert wq.dtype == torch.float8_e4m3fn, (
         "Expected quantized tensor dtype "
         f"to be torch.float8_e4m3fn, got {wq.dtype} instead."
     )
-    assert ws.dtype == torch.float32, (
-        f"Expected tensor scales dtype to be torch.float32, got {ws.dtype} instead"
-    )
 
-    if use_e8m0:
-        requant_weight_ue8m0_inplace(wq, ws, block_size=quant_block_shape)
+    if ws.dtype == torch.float8_e8m0fnu:
+        # Scales already in E8M0 from checkpoint — upcast to fp32
+        # and skip requantization (weights already have power-of-two scales).
+        ws = _upcast_e8m0_to_fp32(ws)
+    else:
+        assert ws.dtype == torch.float32, (
+            f"Expected tensor scales dtype to be torch.float32 or "
+            f"torch.float8_e8m0fnu, got {ws.dtype} instead"
+        )
+        if use_e8m0:
+            requant_weight_ue8m0_inplace(wq, ws, block_size=quant_block_shape)
+
+    if is_bmm:
+        # Reshape 2D weight/scale to 3D for grouped BMM (einsum):
+        # wq: (g*r, d) -> (g, r, d)
+        # ws: (g*r/128, d/128) -> (g, r/128, d/128)
+        g = bmm_batch_size
+        assert wq.ndim == 2 and ws.ndim == 2
+        d = wq.size(1)
+        r = wq.size(0) // g
+        wq = wq.view(g, r, d)
+        ws = ws.view(g, r // quant_block_shape[0], d // quant_block_shape[1])
+        # Pre-transform scale with recipe=(1, 128, 128) to broadcast + pack
+        # into TMA-aligned UE8M0 (INT32) layout. At runtime fp8_einsum uses
+        # recipe=(1, 1, 128) which sees INT dtype and skips re-transform.
+        dg_ws = transform_sf_into_required_layout(
+            sf=ws,
+            mn=r,
+            k=d,
+            recipe=(1, quant_block_shape[0], quant_block_shape[1]),
+            num_groups=g,
+            is_sfa=False,
+        )
+        return wq, dg_ws
 
     original_ndim = wq.ndim
     if wq.ndim == 2:
@@ -1499,11 +1202,13 @@ def create_fp8_scale_parameter(
     input_size_per_partition: int,
     block_size: list[int] | None,
     weight_loader: Callable | None,
+    scale_dtype: torch.dtype | None = None,
 ) -> torch.nn.Parameter:
     """Create scale parameter based on quantization strategy."""
+    dtype = scale_dtype if scale_dtype is not None else torch.float32
     if parameter_type == ChannelQuantScaleParameter:
         scale = parameter_type(
-            data=torch.empty((sum(output_partition_sizes), 1), dtype=torch.float32),
+            data=torch.empty((sum(output_partition_sizes), 1), dtype=dtype),
             output_dim=0,
             weight_loader=weight_loader,
         )
@@ -1515,7 +1220,7 @@ def create_fp8_scale_parameter(
             data=torch.empty(
                 (output_size_per_partition + block_n - 1) // block_n,
                 (input_size_per_partition + block_k - 1) // block_k,
-                dtype=torch.float32,
+                dtype=dtype,
             ),
             input_dim=1,
             output_dim=0,
@@ -1523,13 +1228,14 @@ def create_fp8_scale_parameter(
         )
     elif parameter_type == PerTensorScaleParameter:
         scale = parameter_type(
-            data=torch.empty(len(output_partition_sizes), dtype=torch.float32),
+            data=torch.empty(len(output_partition_sizes), dtype=dtype),
             weight_loader=weight_loader,
         )
     else:
         raise ValueError(f"Unknown parameter type: {parameter_type}")
 
-    scale[:] = torch.finfo(torch.float32).min
+    if dtype == torch.float32:
+        scale[:] = torch.finfo(torch.float32).min
     set_weight_attrs(scale, {"scale_type": "weight_scale"})
     return scale
 
@@ -1560,7 +1266,7 @@ def process_fp8_weight_tensor_strategy(
         requantize_with_max_scale,
     )
 
-    if current_platform.is_fp8_fnuz():
+    if current_platform.is_fp8_fnuz() and weight.dtype == torch.float8_e4m3fn:
         weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz(
             weight=weight, weight_scale=weight_scale, input_scale=input_scale
         )
@@ -1586,7 +1292,7 @@ def process_fp8_weight_channel_strategy(
         normalize_e4m3fn_to_e4m3fnuz,
     )
 
-    if current_platform.is_fp8_fnuz():
+    if current_platform.is_fp8_fnuz() and weight.dtype == torch.float8_e4m3fn:
         weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz(
             weight=weight, weight_scale=weight_scale, input_scale=input_scale
         )
@@ -1603,7 +1309,7 @@ def process_fp8_weight_block_strategy(
         normalize_e4m3fn_to_e4m3fnuz,
     )
 
-    if current_platform.is_fp8_fnuz():
+    if current_platform.is_fp8_fnuz() and weight.dtype == torch.float8_e4m3fn:
         weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz(
             weight=weight, weight_scale=weight_scale
         )
@@ -1612,34 +1318,6 @@ def process_fp8_weight_block_strategy(
     return weight, weight_scale
 
 
-def maybe_post_process_fp8_weight_block(layer: torch.nn.Module):
-    assert layer.weight_block_size is not None
-
-    from vllm.utils.deep_gemm import (
-        is_deep_gemm_e8m0_used,
-        should_use_deepgemm_for_fp8_linear,
-    )
-
-    # On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to
-    # requantize the weight and input to the specific scale
-    # at the same time.
-    should_use_deepgemm = should_use_deepgemm_for_fp8_linear(
-        layer.orig_dtype, layer.weight
-    )
-    if should_use_deepgemm:
-        scale_attr = (
-            "weight_scale_inv" if hasattr(layer, "weight_scale_inv") else "weight_scale"
-        )
-        dg_weight, dg_weight_scale = deepgemm_post_process_fp8_weight_block(
-            wq=layer.weight.data,
-            ws=getattr(layer, scale_attr).data,
-            quant_block_shape=tuple(layer.weight_block_size),
-            use_e8m0=is_deep_gemm_e8m0_used(),
-        )
-        replace_parameter(layer, "weight", dg_weight)
-        replace_parameter(layer, scale_attr, dg_weight_scale)
-
-
 def process_fp8_weight_tensor_strategy_moe(
     weight: torch.Tensor,
     weight_scales: torch.Tensor,
diff --git a/vllm/model_executor/layers/quantization/utils/gptq_utils.py b/vllm/model_executor/layers/quantization/utils/gptq_utils.py
index dfebeca93392..691d80b0b747 100644
--- a/vllm/model_executor/layers/quantization/utils/gptq_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/gptq_utils.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Mapping
 from copy import deepcopy
-from fractions import Fraction
 from types import MappingProxyType
 from typing import TYPE_CHECKING
 
@@ -16,16 +15,14 @@
 )
 
 if TYPE_CHECKING:
-    from ..gptq import GPTQConfig
-    from ..gptq_marlin import GPTQMarlinConfig
+    from ..auto_gptq import AutoGPTQConfig
 else:
-    GPTQConfig = object
-    GPTQMarlinConfig = object
+    AutoGPTQConfig = object
 
 
 # Match dynamic rules with module name (prefix) and override quantize
 # config if module (prefix) matches a rule
-def override_config(config: GPTQConfig | GPTQMarlinConfig, prefix: str):
+def override_config(config: AutoGPTQConfig, prefix: str):
     weight_bits = get_dynamic_override(config, prefix, "bits", config.weight_bits)
     if isinstance(weight_bits, int):
         config.weight_bits = weight_bits
@@ -36,31 +33,23 @@ def override_config(config: GPTQConfig | GPTQMarlinConfig, prefix: str):
     if isinstance(desc_act, bool):
         config.desc_act = desc_act
 
-    config.pack_factor = Fraction(32, config.weight_bits)  # packed into int32
-    if config.get_name() == "gptq_marlin":
-        assert isinstance(config, GPTQMarlinConfig)
-        is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym)
-        if isinstance(is_sym, bool):
-            config.is_sym = is_sym
-
-        if (config.weight_bits, config.is_sym) not in config.TYPE_MAP:
-            raise ValueError(
-                "Unsupported quantization config: "
-                f"bits={config.weight_bits}, sym={config.is_sym}"
-            )
+    config.pack_factor = 32 // config.weight_bits  # packed into int32
+    assert isinstance(config, AutoGPTQConfig)
+    is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym)
+    if isinstance(is_sym, bool):
+        config.is_sym = is_sym
 
-        config.quant_type = config.TYPE_MAP[(config.weight_bits, config.is_sym)]
-    elif config.get_name() == "gptq":
-        assert isinstance(config, GPTQConfig)
-        if config.weight_bits not in [2, 3, 4, 8]:
-            raise ValueError(
-                "Currently, only 2/3/4/8-bit weight quantization is "
-                f"supported for GPTQ, but got {config.weight_bits} bits."
-            )
+    if (config.weight_bits, config.is_sym) not in config.TYPE_MAP:
+        raise ValueError(
+            "Unsupported quantization config: "
+            f"bits={config.weight_bits}, sym={config.is_sym}"
+        )
+
+    config.quant_type = config.TYPE_MAP[(config.weight_bits, config.is_sym)]
 
 
 def get_dynamic_override(
-    config: GPTQConfig | GPTQMarlinConfig,
+    config: AutoGPTQConfig,
     layer_name: str,
     key: str | None = None,
     default_value: int | bool | None = None,
@@ -126,7 +115,7 @@ def is_layer_gptq_quantized(
 
 
 def get_linear_quant_method(
-    config: GPTQConfig | GPTQMarlinConfig,
+    config: AutoGPTQConfig,
     layer: torch.nn.Module,
     prefix: str,
     linear_method_cls: type,
diff --git a/vllm/model_executor/layers/quantization/utils/humming_utils.py b/vllm/model_executor/layers/quantization/utils/humming_utils.py
new file mode 100644
index 000000000000..63735708b6c6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/humming_utils.py
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any
+
+import regex as re
+import torch
+from humming.layer import HummingInputSchema, HummingMethod
+from humming.schema import BaseWeightSchema
+
+from vllm import envs
+from vllm.model_executor.layers.fused_moe.config import (
+    FusedMoEQuantConfig,
+    FusedMoEQuantDesc,
+)
+from vllm.model_executor.layers.fused_moe.routed_experts import RoutedExperts
+from vllm.model_executor.layers.linear import LinearBase
+from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
+
+
+def humming_is_layer_skipped(config: dict[str, Any], prefix: str):
+    if not config:
+        return True
+
+    keys = ["ignored_layers", "ignore", "modules_to_not_convert"]
+    ignored_layers: list[str] = []
+    for key in keys:
+        ignored_layers = config.get(key, []) or []
+        if not ignored_layers:
+            break
+
+    if any(module_name in prefix for module_name in ignored_layers):
+        return True
+    if "lm_head" in prefix:
+        return True
+
+    for regex in config.get("dynamic", {}):
+        if regex[:1] != "-":
+            continue
+        if re.match(regex[2:], prefix):
+            return True
+
+    return False
+
+
+def prepare_humming_layer(layer: LinearBase, quant_config: dict):
+    weight_schema = BaseWeightSchema.from_config(quant_config)
+    input_schema = HummingInputSchema()
+
+    shape_k_stacks = [layer.input_size_per_partition]
+    shape_n_stacks = layer.output_partition_sizes
+
+    # Step 1: convert weight to humming standard format
+    weight_schema, tensors = weight_schema.convert_humming(
+        tensors=layer.named_parameters(),
+        shape_n_stacks=shape_n_stacks,
+        shape_k_stacks=shape_k_stacks,
+        param_dtype=layer.params_dtype,
+    )
+
+    layer.weight_schema = weight_schema
+
+    for name, _ in list(layer.named_parameters()):
+        delattr(layer, name)
+
+    for name, tensor in tensors.items():
+        param = torch.nn.Parameter(tensor, requires_grad=False)
+        setattr(layer, name, param)
+
+    # Step 2: transform weight (humming standard format) for forwarding
+    HummingMethod.prepare_layer_meta(
+        layer=layer,
+        shape_n=layer.output_partition_sizes_sum,
+        shape_k=layer.input_size_per_partition,
+        weight_schema=weight_schema,
+        input_schema=input_schema,
+        pad_n_to_multiple=256,
+        pad_k_to_multiple=128,
+        has_bias=layer.has_bias,
+        torch_dtype=layer.param_dtype,
+    )
+
+    HummingMethod.transform_humming_layer(layer)
+
+
+def prepare_humming_moe_layer(layer: RoutedExperts, quant_config: dict):
+    weight_schema = BaseWeightSchema.from_config(quant_config)
+    input_quant_config = envs.VLLM_HUMMING_INPUT_QUANT_CONFIG or {}
+    if humming_is_layer_skipped(input_quant_config, layer.layer_name):
+        input_schema = HummingInputSchema()
+    else:
+        # TODO: read input_quant_config from quant_config
+        input_schema = HummingInputSchema.from_config(input_quant_config)
+
+    is_gated = layer.activation.is_gated
+    shape_config = {
+        "w13": (
+            layer.moe_config.intermediate_size_per_partition * 2,
+            layer.moe_config.hidden_dim,
+        ),
+        "w2": (
+            layer.moe_config.hidden_dim,
+            layer.moe_config.intermediate_size_per_partition * (1 if is_gated else 2),
+        ),
+    }
+
+    layer.weight_schemas = {}
+    layer.input_schemas = {}
+
+    for sublayer_name in shape_config:
+        # Step 1: convert weight to humming standard format
+        tensors: dict[str, torch.Tensor] = dict(
+            (key.removeprefix(sublayer_name + "_"), value)
+            for key, value in layer.state_dict().items()
+            if key.startswith(sublayer_name + "_")
+        )
+
+        shape_n, shape_k = shape_config[sublayer_name]
+        shape_n_stacks = [shape_n]
+        shape_k_stacks = [shape_k]
+        if sublayer_name == "w13":
+            shape_n_stacks = [shape_n // 2] * 2
+
+        weight_schema_new, tensors = weight_schema.convert_humming(
+            tensors=tensors,
+            shape_n_stacks=shape_n_stacks,
+            shape_k_stacks=shape_k_stacks,
+            num_experts=layer.local_num_experts,
+            param_dtype=layer.params_dtype,
+        )
+
+        layer.weight_schemas[sublayer_name] = weight_schema_new
+        layer.input_schemas[sublayer_name] = input_schema
+
+        for name, _ in list(layer.named_parameters()):
+            if not name.startswith(sublayer_name + "_"):
+                continue
+            delattr(layer, name)
+
+        for name, tensor in tensors.items():
+            name = f"{sublayer_name}_{name}"
+            param = torch.nn.Parameter(tensor, requires_grad=False)
+            setattr(layer, name, param)
+
+        # Step 2: transform weight (humming standard format) for forwarding
+        HummingMethod.prepare_layer_meta(
+            layer=layer,
+            shape_n=shape_n,
+            shape_k=shape_k,
+            pad_n_to_multiple=256,
+            pad_k_to_multiple=128,
+            input_schema=input_schema,
+            weight_schema=weight_schema_new,
+            has_bias=layer.moe_config.has_bias,
+            num_experts=layer.num_experts,
+            torch_dtype=layer.params_dtype,
+            sublayer_name=sublayer_name,
+        )
+
+        HummingMethod.transform_humming_layer(layer, sublayer_name=sublayer_name)
+
+    if not hasattr(layer, "locks"):
+        device = layer.w13_weight.device
+        locks = torch.zeros(1024, dtype=torch.int32, device=device)
+        layer.register_buffer("locks", locks)
+
+
+def get_humming_moe_quant_config(
+    layer: RoutedExperts,
+    gemm1_alpha: float | None = None,
+    gemm1_beta: float | None = None,
+    gemm1_clamp_limit: float | None = None,
+):
+    input_schema = layer.input_schemas["w13"]
+    weight_schema = layer.weight_schemas["w13"]
+
+    a_dtype = input_schema.a_dtype
+    if a_dtype is None or a_dtype.num_bits == 16:
+        a_quant_desc = FusedMoEQuantDesc(dtype=None)
+    else:
+        shape = GroupShape(row=1, col=-1)
+        a_quant_desc = FusedMoEQuantDesc(dtype=str(a_dtype), shape=shape)
+
+    weight_scale_group_size = weight_schema.weight_scale_group_size
+    weight_scale_group_size_n = weight_schema.weight_scale_group_size_n
+    weight_group_shape: tuple[int, ...] = ()
+    if weight_scale_group_size_n > 1:
+        weight_group_shape = GroupShape(
+            row=weight_scale_group_size,
+            col=weight_scale_group_size_n,
+        )
+    elif weight_scale_group_size == 0:
+        weight_group_shape = GroupShape(row=-1, col=1)
+    else:
+        weight_group_shape = GroupShape(row=weight_scale_group_size, col=1)
+
+    w1_quant_desc = FusedMoEQuantDesc(
+        dtype=str(weight_schema.b_dtype),
+        shape=weight_group_shape,
+        scale=getattr(layer, "w13_weight_scale", None),
+        alpha_or_gscale=getattr(layer, "w13_global_scale", None),
+        zp=getattr(layer, "w13_zero_point", None),
+        bias=getattr(layer, "w13_bias", None),
+    )
+
+    w2_quant_desc = FusedMoEQuantDesc(
+        dtype=str(weight_schema.b_dtype),
+        shape=weight_group_shape,
+        scale=getattr(layer, "w2_weight_scale", None),
+        alpha_or_gscale=getattr(layer, "w2_global_scale", None),
+        zp=getattr(layer, "w2_zero_point", None),
+        bias=getattr(layer, "w2_bias", None),
+    )
+
+    return FusedMoEQuantConfig(
+        _a1=a_quant_desc,
+        _a2=a_quant_desc,
+        _w1=w1_quant_desc,
+        _w2=w2_quant_desc,
+        gemm1_alpha=gemm1_alpha,
+        gemm1_beta=gemm1_beta,
+        gemm1_clamp_limit=gemm1_clamp_limit,
+    )
diff --git a/vllm/model_executor/layers/quantization/utils/int8_utils.py b/vllm/model_executor/layers/quantization/utils/int8_utils.py
index 020098dffc39..a98e29ffd570 100644
--- a/vllm/model_executor/layers/quantization/utils/int8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/int8_utils.py
@@ -88,6 +88,13 @@ def block_dequant(
     def round_int8(x):
         return tl.extra.hip.libdevice.round(x).to(tl.int8)
 
+
+elif current_platform.is_xpu():
+
+    @triton.jit
+    def round_int8(x):
+        return tl.extra.intel.libdevice.round(x).to(tl.int8)
+
 else:
 
     @triton.jit
diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils.py b/vllm/model_executor/layers/quantization/utils/marlin_utils.py
index d659effd70ff..eca04eed74b6 100644
--- a/vllm/model_executor/layers/quantization/utils/marlin_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils.py
@@ -8,6 +8,7 @@
 import vllm.envs as envs
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import RoutedExperts
 from vllm.model_executor.layers.linear import LinearBase
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.int8_utils import (
@@ -46,6 +47,9 @@ def query_marlin_supported_quant_types(
     if current_platform.is_cpu():
         return _query_cpu_marlin_supported_quant_types(has_zp, include_fp_type)
 
+    if current_platform.is_xpu():
+        return [scalar_types.uint4, scalar_types.uint4b8]
+
     if not current_platform.is_rocm():
         if device_capability is None:
             capability_tuple = current_platform.get_device_capability()
@@ -226,7 +230,7 @@ def check_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool:
     )[0]
 
 
-def check_moe_marlin_supports_layer(layer: LinearBase, group_size: int) -> bool:
+def check_moe_marlin_supports_layer(layer: RoutedExperts, group_size: int) -> bool:
     if current_platform.is_rocm():
         return False
     hidden_size = layer.hidden_size
@@ -479,9 +483,9 @@ def get_marlin_input_dtype(prefix: str | None = None):
     elif envs.VLLM_MARLIN_INPUT_DTYPE.lower() == "fp8":
         if not current_platform.is_device_capability(
             89
-        ) and not current_platform.is_device_capability(120):
+        ) and not current_platform.is_device_capability_family(120):
             raise ValueError(
-                "Marlin W4A8-FP8 only support SM89 or SM120 device "
+                "Marlin W4A8-FP8 only support SM89 or SM12x device "
                 "(It is slower than Marlin W4A16 on other devices). "
                 "You can consider using W4A8-INT8 instead"
                 "(set VLLM_MARLIN_INPUT_DTYPE=int8)."
diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
index 4fd484edeb30..c02d39c17a02 100644
--- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
@@ -24,7 +24,7 @@
 
 
 def is_fp4_marlin_supported():
-    return current_platform.has_device_capability(75)
+    return current_platform.is_cuda() and current_platform.has_device_capability(75)
 
 
 def _nvfp4_compute_scale_factor(
@@ -43,9 +43,9 @@ def _nvfp4_compute_scale_factor(
     ws_float = marlin_scales.float() * (2**7)
     nonzero_mask = ws_float > 0
     if nonzero_mask.any():
-        min_val = ws_float[nonzero_mask].min()
-        if min_val < 2:
-            sf = (2 / min_val).log2().ceil().exp2()
+        max_val = ws_float[nonzero_mask].max()
+        if max_val < 448 * (2**7):
+            sf = (448 * (2**7) / max_val).log2().floor().exp2()
             return sf.item()
     return 1.0
 
@@ -105,7 +105,9 @@ def nvfp4_marlin_process_scales(
     if scale_factor > 1.0:
         marlin_scales = (marlin_scales.float() * scale_factor).to(torch.half)
 
-    marlin_scales = (marlin_scales * (2**7)).view(torch.int16) << 1
+    marlin_scales = marlin_scales * (2**7)
+    marlin_scales[marlin_scales < 2] = 0
+    marlin_scales = marlin_scales.view(torch.int16) << 1
     marlin_scales = marlin_scales.view(torch.float8_e4m3fn)
     marlin_scales = marlin_scales[:, 1::2].contiguous()
 
diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
index b5a557ce999d..6e2ae5c91a36 100644
--- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
@@ -336,6 +336,202 @@ def pack_fp8_to_int32(
     return int32_tensor.T.contiguous() if size_k_first else int32_tensor
 
 
+def mxfp8_marlin_process_scales(marlin_scales: torch.Tensor) -> torch.Tensor:
+    """Reorder scales for e8m0 kernel layout and convert to float8_e8m0fnu."""
+    # fit the layout of fp8 dequantization
+    marlin_scales = marlin_scales.view(-1, 4)[:, [0, 2, 1, 3]].view(
+        marlin_scales.size(0), -1
+    )
+    marlin_scales = marlin_scales.to(torch.float8_e8m0fnu)
+    return marlin_scales
+
+
+def apply_mxfp8_marlin_linear(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    weight_scale: torch.Tensor,
+    workspace: torch.Tensor,
+    size_n: int,
+    size_k: int,
+    bias: torch.Tensor | None = None,
+    use_fp32_reduce: bool = USE_FP32_REDUCE_DEFAULT,
+) -> torch.Tensor:
+    reshaped_x = input.reshape(-1, input.shape[-1])
+    out_shape = input.shape[:-1] + (size_n,)
+
+    use_atomic_add = should_use_atomic_add_reduce(
+        m=reshaped_x.size(0),
+        n=size_n,
+        k=size_k,
+        device=input.device,
+        dtype=input.dtype,
+    )
+
+    output = ops.marlin_gemm(
+        a=reshaped_x,
+        c=None,
+        b_q_weight=weight,
+        b_bias=bias,
+        b_scales=weight_scale,
+        a_scales=None,
+        global_scale=None,
+        b_zeros=None,
+        g_idx=None,
+        perm=None,
+        workspace=workspace,
+        b_q_type=scalar_types.float8_e4m3fn,
+        size_m=reshaped_x.size(0),
+        size_n=size_n,
+        size_k=size_k,
+        use_atomic_add=use_atomic_add,
+        use_fp32_reduce=use_fp32_reduce,
+    )
+
+    return output.reshape(out_shape)
+
+
+def prepare_mxfp8_layer_for_marlin(layer: torch.nn.Module) -> None:
+    """Repack MXFP8 weights and scales into Marlin kernel format.
+
+    Expects the layer to have:
+      - weight: [N, K] float8_e4m3fn
+      - weight_scale: [N, K//32] uint8 (e8m0 encoded)
+      - input_size_per_partition / output_size_per_partition
+    """
+    part_size_n = layer.output_size_per_partition
+    part_size_k = layer.input_size_per_partition
+    group_size = 32  # MX standard block size
+
+    device = layer.weight.device
+
+    # WORKSPACE
+    layer.workspace = marlin_make_workspace_new(device)
+
+    # WEIGHT - repack FP8 weights to Marlin format
+    perm = torch.empty(0, dtype=torch.int, device=device)
+    qweight = pack_fp8_to_int32(layer.weight, size_k_first=False)
+    qweight = qweight.T.contiguous()
+
+    marlin_qweight = ops.gptq_marlin_repack(
+        b_q_weight=qweight,
+        perm=perm,
+        size_k=part_size_k,
+        size_n=part_size_n,
+        num_bits=8,
+    )
+    replace_parameter(layer, "weight", marlin_qweight)
+
+    # WEIGHT SCALES
+    # Convert uint8 scales -> e8m0fnu -> param_dtype for permutation
+    # Scales are [N, K//32], need [K//32, N] for marlin_permute_scales
+    param_dtype = torch.get_default_dtype()
+    scales = layer.weight_scale.data[:part_size_n, : part_size_k // group_size]
+    scales = scales.contiguous()
+    scales = scales.view(torch.float8_e8m0fnu).to(param_dtype)
+    scales = scales.T.contiguous()
+
+    # Permute scales to Marlin layout
+    marlin_scales = marlin_permute_scales(
+        s=scales,
+        size_k=part_size_k,
+        size_n=part_size_n,
+        group_size=group_size,
+    )
+
+    # Reorder for e8m0 kernel layout and convert back to e8m0fnu
+    marlin_scales = mxfp8_marlin_process_scales(marlin_scales)
+    replace_parameter(layer, "weight_scale", marlin_scales)
+
+    # BIAS
+    if hasattr(layer, "bias") and layer.bias is not None:
+        assert layer.bias.shape == (part_size_n,)
+        bias = marlin_permute_bias(layer.bias)
+        replace_parameter(layer, "bias", bias)
+
+
+def prepare_mxfp8_moe_layer_for_marlin(
+    layer: torch.nn.Module,
+    w13: torch.Tensor,
+    w2: torch.Tensor,
+    w13_scale: torch.Tensor,
+    w2_scale: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Repack MXFP8 MoE weights and scales into Marlin kernel format.
+
+    Args:
+        layer: MoE layer (used to read params_dtype and attach workspace).
+        w13: [E, 2*N, K] float8_e4m3fn weights.
+        w2:  [E, K, N] float8_e4m3fn weights.
+        w13_scale: [E, 2*N, K//32] uint8 e8m0 scales.
+        w2_scale:  [E, K, N//32] uint8 e8m0 scales.
+
+    Returns:
+        (w13, w2, w13_scale, w2_scale) in Marlin format.
+    """
+    group_size = 32
+    e = w13.shape[0]
+    w13_n = w13.shape[1]
+    k = w13.shape[2]
+    n = w2.shape[2]
+
+    device = w13.device
+    param_dtype = torch.get_default_dtype()
+    perm = torch.empty(0, dtype=torch.int, device=device)
+
+    layer.workspace = marlin_make_workspace_new(device, 4)
+
+    def repack_weight(weight: torch.Tensor, name: str) -> torch.Tensor:
+        if "w13" in name:
+            size_n, size_k = w13_n, k
+        else:
+            size_n, size_k = k, n
+
+        assert weight.shape == (e, size_n, size_k)
+
+        tensor_list = []
+        for i in range(e):
+            qweight = pack_fp8_to_int32(weight[i], size_k_first=False)
+            qweight = qweight.T.contiguous()
+            marlin_qweight = ops.gptq_marlin_repack(
+                b_q_weight=qweight,
+                perm=perm,
+                size_k=size_k,
+                size_n=size_n,
+                num_bits=8,
+            )
+            tensor_list.append(marlin_qweight)
+        return torch.cat([x.unsqueeze(0) for x in tensor_list], 0)
+
+    w13 = repack_weight(w13, "w13")
+    w2 = repack_weight(w2, "w2")
+
+    def permute_scales(scales: torch.Tensor, name: str) -> torch.Tensor:
+        if "w13" in name:
+            size_n, size_k = w13_n, k
+        else:
+            size_n, size_k = k, n
+
+        tensor_list = []
+        for i in range(e):
+            s = scales[i][:size_n, : size_k // group_size].contiguous()
+            s = s.view(torch.float8_e8m0fnu).to(param_dtype)
+            s = s.T.contiguous()
+            marlin_s = marlin_permute_scales(
+                s=s,
+                size_k=size_k,
+                size_n=size_n,
+                group_size=group_size,
+            )
+            marlin_s = mxfp8_marlin_process_scales(marlin_s)
+            tensor_list.append(marlin_s)
+        return torch.cat([x.unsqueeze(0) for x in tensor_list], 0)
+
+    w13_scale = permute_scales(w13_scale, "w13")
+    w2_scale = permute_scales(w2_scale, "w2")
+
+    return w13, w2, w13_scale, w2_scale
+
+
 def marlin_quant_fp8_torch(weight, group_size, input_dtype=None):
     is_a_8bit = input_dtype is not None and input_dtype.itemsize == 1
     if is_a_8bit:
diff --git a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
index 21c8aba1d56c..51b7b29551d0 100644
--- a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
@@ -162,3 +162,7 @@ def _quant_dequant_mxfp4_fake(
     quant_dequant_mxfp4 = torch.ops.vllm.quant_dequant_mxfp4
 except AttributeError as error:
     raise error
+
+
+def xpu_mxfp4_quantize(x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    return torch.ops.vllm.xpu_mxfp4_quantize(x)
diff --git a/vllm/model_executor/layers/quantization/utils/mxfp8_utils.py b/vllm/model_executor/layers/quantization/utils/mxfp8_utils.py
index ee849b167aba..a12918225348 100644
--- a/vllm/model_executor/layers/quantization/utils/mxfp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/mxfp8_utils.py
@@ -1,22 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from enum import Enum
-
 import torch
 
-from vllm.logger import init_logger
-from vllm.utils import flashinfer as vllm_flashinfer
 from vllm.utils.torch_utils import direct_register_custom_op
 
-logger = init_logger(__name__)
-
-
-class Mxfp8LinearBackend(Enum):
-    EMULATION = "emulation"
-    FLASHINFER_CUTLASS = "flashinfer-cutlass"
-
-
 # MXFP8 constants
 MXFP8_VALUE_DTYPE = torch.float8_e4m3fn
 MXFP8_SCALE_DTYPE = torch.uint8
@@ -47,23 +35,83 @@ def swizzle_mxfp8_scale(sf: torch.Tensor, M: int, K: int) -> torch.Tensor:
     return sf_swizzled.contiguous().view(-1)
 
 
+def _mxfp8_e4m3_quantize_torch(
+    x: torch.Tensor,
+    is_sf_swizzled_layout: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Naive MXFP8 quantization.
+    For each block of 32 elements along the last dimension, compute a
+    shared e8m0 scale (the biased exponent of the block-wise amax)
+    and quantize each element to float8_e4m3fn.
+
+    Returns (quantized_values [same shape, fp8], scales uint8).
+    Scale shape depends on is_sf_swizzled_layout:
+      False -> [..., K//32]  (row-major 2D)
+      True  -> [flat swizzled 1D]
+    """
+    assert x.shape[-1] % MXFP8_BLOCK_SIZE == 0
+    orig_shape = x.shape
+    num_blocks = x.shape[-1] // MXFP8_BLOCK_SIZE
+
+    x_fp32 = x.to(torch.float32)
+    x_blocked = x_fp32.view(*orig_shape[:-1], num_blocks, MXFP8_BLOCK_SIZE)
+
+    amax = x_blocked.abs().amax(dim=-1)
+    amax = amax.clamp(min=torch.finfo(torch.float32).tiny)
+    scale_biased = torch.floor(torch.log2(amax)) + 127.0
+    scale_biased = scale_biased.clamp(0, 254)
+    scales_uint8 = scale_biased.to(torch.uint8)
+
+    descale = torch.exp2(scale_biased - 127.0)
+    x_scaled = x_blocked / descale.unsqueeze(-1)
+
+    x_fp8 = x_scaled.view(orig_shape).to(MXFP8_VALUE_DTYPE)
+
+    if x.ndim == 2:
+        M, K = x.shape
+        scales_uint8 = scales_uint8.view(M, -1)
+        if is_sf_swizzled_layout:
+            scales_uint8 = swizzle_mxfp8_scale(scales_uint8, M=M, K=K)
+    elif x.ndim == 3:
+        B, M, K = x.shape
+        scales_uint8 = scales_uint8.view(B, M, -1)
+        if is_sf_swizzled_layout:
+            swizzled = []
+            for i in range(B):
+                swizzled.append(swizzle_mxfp8_scale(scales_uint8[i], M=M, K=K))
+            scales_uint8 = torch.cat(swizzled)
+
+    return x_fp8, scales_uint8
+
+
 def _mxfp8_e4m3_quantize_impl(
-    x: torch.Tensor, is_sf_swizzled_layout: bool = False
+    x: torch.Tensor,
+    is_sf_swizzled_layout: bool = False,
+    alignment: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    from flashinfer import mxfp8_quantize as flashinfer_mxfp8_quantize
+    from vllm.platforms import current_platform
 
-    x_q, x_scales = flashinfer_mxfp8_quantize(
-        x, is_sf_swizzled_layout=is_sf_swizzled_layout
-    )
-    if x_scales.ndim == 1 and x.ndim == 2 and not is_sf_swizzled_layout:
-        x_scales = x_scales.view(x.size(0), -1)
-    return x_q, x_scales
+    if current_platform.has_device_capability(100):
+        from flashinfer import mxfp8_quantize as flashinfer_mxfp8_quantize
+
+        x_q, x_scales = flashinfer_mxfp8_quantize(
+            x,
+            is_sf_swizzled_layout=is_sf_swizzled_layout,
+            alignment=alignment if alignment > 0 else 32,
+        )
+        if x_scales.ndim == 1 and x.ndim == 2 and not is_sf_swizzled_layout:
+            x_scales = x_scales.view(x.size(0), -1)
+        return x_q, x_scales
+
+    return _mxfp8_e4m3_quantize_torch(x, is_sf_swizzled_layout)
 
 
 def mxfp8_e4m3_quantize(
-    x: torch.Tensor, is_sf_swizzled_layout: bool = False
+    x: torch.Tensor,
+    is_sf_swizzled_layout: bool = False,
+    alignment: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    return torch.ops.vllm.mxfp8_quantize(x, is_sf_swizzled_layout)
+    return torch.ops.vllm.mxfp8_quantize(x, is_sf_swizzled_layout, alignment)
 
 
 def dequant_mxfp8_to_bf16(x: torch.Tensor, scales: torch.Tensor) -> torch.Tensor:
@@ -83,7 +131,9 @@ def dequant_mxfp8_to_bf16(x: torch.Tensor, scales: torch.Tensor) -> torch.Tensor
 
 
 def mxfp8_e4m3_quantize_fake(
-    x: torch.Tensor, is_sf_swizzled_layout: bool = False
+    x: torch.Tensor,
+    is_sf_swizzled_layout: bool = False,
+    alignment: int = 0,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     """Fake implementation for torch.compile tracing."""
     fp_data = torch.empty_like(x, dtype=MXFP8_VALUE_DTYPE)
@@ -127,110 +177,7 @@ def mxfp8_e4m3_quantize_fake(
 )
 
 
-class Mxfp8LinearOp:
-    def __init__(self, backend: Mxfp8LinearBackend):
-        if backend not in Mxfp8LinearBackend:
-            raise ValueError(f"Unsupported backend: {backend}")
-
-        self.backend = backend
-
-    def _apply_emulation(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        out_dtype: torch.dtype,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        # Validate weight_scale dtype and shape (must be 2D for TORCH backend)
-        if weight_scale.dtype != MXFP8_SCALE_DTYPE:
-            raise ValueError(
-                f"TORCH backend requires {MXFP8_SCALE_DTYPE} weight_scale dtype, "
-                f"got {weight_scale.dtype}."
-            )
-        if weight_scale.ndim != 2:
-            raise ValueError(
-                f"TORCH backend requires 2D weight_scale, got {weight_scale.ndim}D. "
-                f"Ensure process_weights_after_loading was called."
-            )
-
-        weight_bf16 = dequant_mxfp8_to_bf16(weight, weight_scale)
-
-        output = torch.nn.functional.linear(input, weight_bf16, bias)
-        return output.to(out_dtype)
-
-    def _apply_flashinfer_cutlass(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        out_dtype: torch.dtype,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        N, K = weight.shape
-
-        input_shape = input.shape
-        input_2d = input.view(-1, K)
-        M_orig = input_2d.shape[0]
-
-        # Minimum dimension size for F8_128x4 block scaling layout
-        min_dim = 128
-
-        assert min_dim <= K, (
-            f"mm_mxfp8 requires K >= {min_dim}, got K={K}. "
-            f"in_features is too small for mm_mxfp8."
-        )
-        assert K % MXFP8_BLOCK_SIZE == 0, (
-            f"mm_mxfp8 requires K to be divisible by {MXFP8_BLOCK_SIZE}, got K={K}."
-        )
-        assert min_dim <= N, (
-            f"mm_mxfp8 requires N >= {min_dim}, got N={N}. "
-            f"out_features is too small for mm_mxfp8."
-        )
-
-        M_padded = ((M_orig + min_dim - 1) // min_dim) * min_dim
-        if M_padded != M_orig:
-            pad_rows = M_padded - M_orig
-            input_2d = torch.nn.functional.pad(input_2d, (0, 0, 0, pad_rows))
-
-        input_mxfp8, input_scale = mxfp8_e4m3_quantize(
-            input_2d,
-            is_sf_swizzled_layout=True,  # Swizzled for best accuracy
-        )
-
-        if not weight.is_contiguous():
-            weight = weight.contiguous()
-
-        output = vllm_flashinfer.mm_mxfp8(
-            input_mxfp8,
-            weight.t(),
-            input_scale,
-            weight_scale,
-            out_dtype=out_dtype,
-            backend="cutlass",
-        )
-
-        if M_padded != M_orig:
-            output = output[:M_orig, :]
-
-        if bias is not None:
-            output = output + bias
-
-        output_shape = (*input_shape[:-1], N)
-        return output.view(output_shape)
-
-    def apply(
-        self,
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        weight_scale: torch.Tensor,
-        out_dtype: torch.dtype,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        if self.backend == Mxfp8LinearBackend.EMULATION:
-            return self._apply_emulation(input, weight, weight_scale, out_dtype, bias)
-
-        assert self.backend == Mxfp8LinearBackend.FLASHINFER_CUTLASS
-        return self._apply_flashinfer_cutlass(
-            input, weight, weight_scale, out_dtype, bias
-        )
+def xpu_mxfp8_quantize(
+    x: torch.Tensor, dtype: torch.dtype | None = None
+) -> tuple[torch.Tensor, torch.Tensor]:
+    return torch.ops.vllm.xpu_mxfp8_quantize(x, dtype)
diff --git a/vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py b/vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py
index 62b480210fc0..39c78a9062be 100644
--- a/vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py
@@ -1,8 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from types import SimpleNamespace
+
 import torch
 
+from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
+from vllm.triton_utils import tl, triton
 
 __all__ = [
     "break_fp4_bytes",
@@ -11,12 +15,316 @@
 ]
 
 FLOAT4_E2M1_MAX = scalar_types.float4_e2m1f.max()
+FLOAT4_E2M1_MAX_RECIPROCAL = 1 / FLOAT4_E2M1_MAX
 
-kE2M1ToFloat = torch.tensor(
-    [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
+kE2M1ToFloat_handle = SimpleNamespace(
+    val=torch.tensor([0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32)
 )
 
 
+@triton.jit
+def _e2m1_inline(magnitude):
+    """Inline E2M1 lookup using binary tree - 3 levels instead of 7 sequential.
+
+    Maps 3-bit magnitude to float: [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0]
+    Uses bit decomposition for fewer comparisons.
+    """
+    # Bit 2 (MSB): separates 0-3 from 4-7
+    # Bit 1: separates within groups
+    # Bit 0 (LSB): separates within pairs
+    b2 = (magnitude >> 2) & 1  # 0 for mag 0-3, 1 for mag 4-7
+    b1 = (magnitude >> 1) & 1  # middle bit
+    b0 = magnitude & 1  # LSB
+
+    # For mag 0-3: [0.0, 0.5, 1.0, 1.5]
+    low_group = tl.where(
+        b1 == 1, tl.where(b0 == 1, 1.5, 1.0), tl.where(b0 == 1, 0.5, 0.0)
+    )
+    # For mag 4-7: [2.0, 3.0, 4.0, 6.0]
+    high_group = tl.where(
+        b1 == 1, tl.where(b0 == 1, 6.0, 4.0), tl.where(b0 == 1, 3.0, 2.0)
+    )
+    return tl.where(b2 == 1, high_group, low_group)
+
+
+@triton.jit
+def _dequantize_nvfp4_kernel(
+    fp4_ptr,
+    scale_ptr,
+    global_scale_ptr,
+    output_ptr,
+    rows_per_batch: tl.constexpr,
+    num_blocks: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    has_batch_global_scale: tl.constexpr,
+    TILE_BLOCKS: tl.constexpr,
+):
+    """Triton kernel for NVFP4 dequantization (swizzle=False).
+
+    Optimized with 2D tile processing + interleave for coalesced stores.
+    """
+    BLOCK_PACKED: tl.constexpr = BLOCK_SIZE // 2
+
+    row_idx = tl.program_id(0)
+    tile_idx = tl.program_id(1)
+
+    if has_batch_global_scale:
+        batch_idx = row_idx // rows_per_batch
+        global_scale = tl.load(global_scale_ptr + batch_idx).to(tl.float32)
+    else:
+        global_scale = tl.load(global_scale_ptr).to(tl.float32)
+
+    fp4_row_offset = row_idx * num_blocks * BLOCK_PACKED
+    scale_row_offset = row_idx * num_blocks
+    output_row_offset = row_idx * num_blocks * BLOCK_SIZE
+
+    start_block = tile_idx * TILE_BLOCKS
+
+    # Load scales for this tile: [TILE_BLOCKS]
+    block_offsets = tl.arange(0, TILE_BLOCKS)
+    block_mask = (start_block + block_offsets) < num_blocks
+
+    raw_scales = tl.load(
+        scale_ptr + scale_row_offset + start_block + block_offsets,
+        mask=block_mask,
+        other=0,
+    )
+    scale_f32 = tl.cast(raw_scales, tl.float8e4nv, bitcast=True).to(tl.float32)
+    scale_values = (scale_f32 * global_scale)[:, None]
+
+    # Load [TILE_BLOCKS, BLOCK_PACKED] packed bytes
+    packed_offsets = tl.arange(0, BLOCK_PACKED)[None, :]
+    byte_indices = (
+        fp4_row_offset
+        + (start_block + block_offsets[:, None]) * BLOCK_PACKED
+        + packed_offsets
+    )
+    elem_mask = block_mask[:, None]
+    raw_bytes = tl.load(fp4_ptr + byte_indices, mask=elem_mask, other=0)
+
+    low_nibble = raw_bytes & 0x0F
+    high_nibble = (raw_bytes >> 4) & 0x0F
+
+    # Binary tree E2M1 decode
+    low_mag = low_nibble & 0x07
+    low_val = _e2m1_inline(low_mag)
+    low_sign = (low_nibble >> 3) & 1
+    low_result = tl.where(low_sign == 1, -low_val, low_val) * scale_values
+
+    high_mag = high_nibble & 0x07
+    high_val = _e2m1_inline(high_mag)
+    high_sign = (high_nibble >> 3) & 1
+    high_result = tl.where(high_sign == 1, -high_val, high_val) * scale_values
+
+    # Interleave for coalesced contiguous store
+    result = tl.interleave(low_result, high_result)
+
+    elem_offsets = tl.arange(0, BLOCK_SIZE)[None, :]
+    out_indices = (
+        output_row_offset
+        + (start_block + block_offsets[:, None]) * BLOCK_SIZE
+        + elem_offsets
+    )
+    tl.store(output_ptr + out_indices, result, mask=block_mask[:, None])
+
+
+@triton.jit
+def _e2m1_lookup(magnitude):
+    """Lookup E2M1 float value from 3-bit magnitude."""
+    result = tl.where(magnitude == 1, 0.5, 0.0)
+    result = tl.where(magnitude == 2, 1.0, result)
+    result = tl.where(magnitude == 3, 1.5, result)
+    result = tl.where(magnitude == 4, 2.0, result)
+    result = tl.where(magnitude == 5, 3.0, result)
+    result = tl.where(magnitude == 6, 4.0, result)
+    result = tl.where(magnitude == 7, 6.0, result)
+    return result
+
+
+@triton.jit
+def _round_to_fp4(x):
+    """Round float values to the nearest E2M1 representable value.
+
+    Matches the thresholds in the Python ``cast_to_fp4`` exactly.
+    """
+    sign = tl.where(x < 0.0, -1.0, 1.0)
+    abs_x = tl.abs(x)
+    result = tl.where(abs_x > 5.0, 6.0, 0.0)
+    result = tl.where((abs_x >= 3.5) & (abs_x <= 5.0), 4.0, result)
+    result = tl.where((abs_x > 2.5) & (abs_x < 3.5), 3.0, result)
+    result = tl.where((abs_x >= 1.75) & (abs_x <= 2.5), 2.0, result)
+    result = tl.where((abs_x > 1.25) & (abs_x < 1.75), 1.5, result)
+    result = tl.where((abs_x >= 0.75) & (abs_x <= 1.25), 1.0, result)
+    result = tl.where((abs_x > 0.25) & (abs_x < 0.75), 0.5, result)
+    return result * sign
+
+
+@triton.jit
+def _nvfp4_quant_dequant_kernel(
+    input_ptr,
+    output_ptr,
+    global_scale_ptr,
+    k: tl.constexpr,
+    num_blocks: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    FP4_MAX_RECIPROCAL: tl.constexpr,
+    TILE_BLOCKS: tl.constexpr,
+):
+    """Fused NVFP4 quantize-dequantize kernel.
+
+    Uses a 2D grid (rows x tiles) to parallelize across both rows
+    and quantization groups within a row. Each program handles
+    TILE_BLOCKS groups at once using vectorized 2D operations.
+    """
+    row_idx = tl.program_id(0)
+    tile_idx = tl.program_id(1)
+    global_scale = tl.load(global_scale_ptr).to(tl.float32)
+    row_offset = row_idx * k
+
+    start_block = tile_idx * TILE_BLOCKS
+    block_offsets = tl.arange(0, TILE_BLOCKS)
+    block_mask = (start_block + block_offsets) < num_blocks
+
+    # Load [TILE_BLOCKS, BLOCK_SIZE] elements
+    indices = (
+        row_offset
+        + (start_block + block_offsets[:, None]) * BLOCK_SIZE
+        + tl.arange(0, BLOCK_SIZE)[None, :]
+    )
+    mask_2d = block_mask[:, None]
+    x = tl.load(input_ptr + indices, mask=mask_2d, other=0.0).to(tl.float32)
+
+    # Per-group scale: [TILE_BLOCKS]
+    vec_max = tl.max(tl.abs(x), axis=1)
+    scale = global_scale * (vec_max * FP4_MAX_RECIPROCAL)
+    scale = tl.clamp(scale, -448.0, 448.0)
+    scale = scale.to(tl.float8e4nv).to(tl.float32)
+
+    # Safe reciprocal, broadcast to [TILE_BLOCKS, 1]
+    output_scale = tl.where(scale == 0.0, 0.0, global_scale / scale)[:, None]
+
+    # Quantize: scale, clamp, round to FP4
+    scaled_x = tl.clamp(x * output_scale, -6.0, 6.0)
+    fp4_val = _round_to_fp4(scaled_x)
+
+    # Dequantize: fp4_val * (scale / global_scale)
+    dequant_scale = (scale / global_scale)[:, None]
+    result = fp4_val * dequant_scale
+
+    tl.store(output_ptr + indices, result, mask=mask_2d)
+
+
+def _triton_nvfp4_quant_dequant(
+    x: torch.Tensor,
+    global_scale: torch.Tensor,
+    block_size: int,
+) -> torch.Tensor:
+    """Triton-accelerated NVFP4 quantize-dequantize."""
+    x_m, x_k = x.shape
+
+    if not torch.compiler.is_compiling():
+        assert x_k % block_size == 0, (
+            f"Weight shape K={x_k} is not divisible by block_size={block_size}"
+        )
+
+    output_dtype = x.dtype
+    num_blocks = x_k // block_size
+
+    output = torch.empty(x_m, x_k, dtype=output_dtype, device=x.device)
+
+    tile_blocks = min(64, triton.next_power_of_2(num_blocks))
+    num_tiles = (num_blocks + tile_blocks - 1) // tile_blocks
+    grid = (x_m, num_tiles)
+    _nvfp4_quant_dequant_kernel[grid](
+        x,
+        output,
+        global_scale,
+        x_k,
+        num_blocks,
+        block_size,
+        FLOAT4_E2M1_MAX_RECIPROCAL,
+        tile_blocks,
+    )
+
+    return output
+
+
+def _triton_dequantize_nvfp4(
+    tensor_fp4: torch.Tensor,
+    tensor_sf: torch.Tensor,
+    global_scale: torch.Tensor,
+    dtype: torch.dtype,
+    block_size: int = 16,
+) -> torch.Tensor:
+    """Dequantize NVFP4 using Triton (swizzle=False only).
+
+    Supports both 2D and 3D inputs:
+    - 2D: [m, packed_k] -> [m, k]
+    - 3D: [dim0, m, packed_k] -> [dim0, m, k]
+    """
+    assert tensor_fp4.dtype == torch.uint8
+
+    is_3d = tensor_fp4.ndim == 3
+    if is_3d:
+        dim0, m_per_batch, packed_k = tensor_fp4.shape
+        tensor_fp4_2d = tensor_fp4.reshape(-1, packed_k)
+        tensor_sf_2d = tensor_sf.reshape(-1, tensor_sf.shape[-1])
+        total_rows_flat = dim0 * m_per_batch
+    else:
+        m_per_batch, packed_k = tensor_fp4.shape
+        tensor_fp4_2d = tensor_fp4
+        tensor_sf_2d = tensor_sf
+        total_rows_flat = m_per_batch
+
+    k = packed_k * 2
+    num_blocks = k // block_size
+
+    output = torch.empty(total_rows_flat, k, dtype=dtype, device=tensor_fp4.device)
+
+    # View as uint8 so Triton can load raw bytes and bitcast to float8_e4m3fn
+    scale_raw = tensor_sf_2d.contiguous().view(torch.uint8)
+
+    # Shape-adaptive tile sizing: for large row counts (3D), process
+    # entire row in one tile. For small row counts (2D), use smaller
+    # tiles to increase parallelism across CUs.
+    np2 = triton.next_power_of_2(num_blocks)
+    if total_rows_flat >= 4096:
+        # Many rows: maximize work per CTA, one tile per row
+        tile_blocks = np2
+        nw = 1
+        ns = 2
+    elif total_rows_flat >= 2048:
+        # Medium-many rows: full row, 2 warps
+        tile_blocks = np2
+        nw = 2
+        ns = 2
+    else:
+        # Few rows: use moderate tiles for CU utilization
+        tile_blocks = min(64, np2)
+        nw = 4
+        ns = 2
+    num_tiles = (num_blocks + tile_blocks - 1) // tile_blocks
+    grid = (total_rows_flat, num_tiles)
+    _dequantize_nvfp4_kernel[grid](
+        tensor_fp4_2d,
+        scale_raw,
+        global_scale,
+        output,
+        m_per_batch,
+        num_blocks,
+        block_size,
+        is_3d,
+        tile_blocks,
+        num_warps=nw,
+        num_stages=ns,
+    )
+
+    if is_3d:
+        output = output.reshape(dim0, m_per_batch, k)
+
+    return output
+
+
 def break_fp4_bytes(a, dtype):
     assert a.dtype == torch.uint8
     m, n = a.shape
@@ -29,8 +337,9 @@ def break_fp4_bytes(a, dtype):
     # Vectorized sign and magnitude extraction
     signs = (combined & 0x08).to(torch.bool)  # Sign bits
     abs_vals = (combined & 0x07).to(torch.long)
+
+    kE2M1 = kE2M1ToFloat_handle.val
     # Device-aware lookup and sign application
-    kE2M1 = kE2M1ToFloat.to(device=a.device)
     values = kE2M1[abs_vals] * torch.where(signs, -1.0, 1.0)
     # Reshape to final form
     return values.reshape(m, n * 2).to(dtype=dtype)
@@ -47,27 +356,66 @@ def convert_swizzled_to_linear(a_sf_swizzled: torch.Tensor, m, k, block_size):
 
 
 def dequantize_to_dtype(
-    tensor_fp4, tensor_sf, global_scale, dtype, device, block_size=16
+    tensor_fp4: torch.Tensor,
+    tensor_sf: torch.Tensor,
+    global_scale: torch.Tensor,
+    dtype: torch.dtype,
+    block_size: int = 16,
+    swizzle: bool | None = True,
 ):
-    """Dequantize the fp4 tensor back to high precision."""
+    """Dequantize the fp4 tensor back to high precision.
+
+    Supports both 2D and 3D inputs:
+    - 2D: [m, packed_k] -> [m, k]
+    - 3D: [dim0, m, packed_k] -> [dim0, m, k]
+    """
     # Two fp4 values are packed into one uint8.
     assert tensor_fp4.dtype == torch.uint8
-    m, packed_k = tensor_fp4.shape
+
+    if not swizzle and current_platform.is_cuda_alike():
+        return _triton_dequantize_nvfp4(
+            tensor_fp4, tensor_sf, global_scale, dtype, block_size
+        )
+
+    # We handle 3D tensors reshaping them to 2D.
+    is_3d = tensor_fp4.ndim == 3
+
+    if is_3d:
+        dim0, m, packed_k = tensor_fp4.shape
+        tensor_fp4 = tensor_fp4.reshape(-1, packed_k)
+        tensor_sf = tensor_sf.reshape(-1, tensor_sf.shape[-1])
+        global_scale = global_scale[:, None, None]
+    else:
+        m, packed_k = tensor_fp4.shape
+
     k = packed_k * 2
     tensor_f32 = break_fp4_bytes(tensor_fp4, torch.float32)
-    tensor_f32 = tensor_f32.reshape(m, k // block_size, block_size)
+    tensor_f32 = tensor_f32.reshape(-1, k // block_size, block_size)
     tensor_sf = tensor_sf.view(torch.float8_e4m3fn)
-    tensor_sf = convert_swizzled_to_linear(tensor_sf, m, k, block_size)
-    tensor_sf_dtype = tensor_sf.to(torch.float32) / global_scale
+
+    if swizzle:
+        tensor_sf = convert_swizzled_to_linear(  # noqa: E501
+            tensor_sf, tensor_f32.size(0), k, block_size
+        )
+
+    if is_3d:
+        tensor_sf = tensor_sf.reshape(dim0, m, k // block_size)
+    tensor_sf_dtype = tensor_sf.to(torch.float32) * global_scale
+
+    if is_3d:
+        tensor_f32 = tensor_f32.reshape(dim0, m, -1, block_size)
 
     # scale the tensor
-    out = (tensor_f32 * tensor_sf_dtype.unsqueeze(-1)).reshape(m, k)
+    out = tensor_f32 * tensor_sf_dtype.unsqueeze(-1)
+    out = out.reshape(*out.shape[:-2], -1)
+
     return out.to(dtype)
 
 
 def get_reciprocal(x):
     if isinstance(x, torch.Tensor):
-        return torch.where(x == 0, torch.tensor(0.0, dtype=x.dtype), 1.0 / x)
+        # torch.where yields operation not permitted when stream is capturing.
+        return 1.0 / (x + (x == 0) * 1e8)
     elif isinstance(x, (float, int)):
         return 0.0 if x == 0 else 1.0 / x
     else:
@@ -94,7 +442,7 @@ def ref_nvfp4_quant(x, global_scale, block_size):
     m, n = x.shape
     x = torch.reshape(x, (m, n // block_size, block_size))
     vec_max = torch.max(torch.abs(x), dim=-1, keepdim=True)[0].to(torch.float32)
-    scale = global_scale * (vec_max * get_reciprocal(FLOAT4_E2M1_MAX))
+    scale = global_scale * (vec_max * FLOAT4_E2M1_MAX_RECIPROCAL)
     scale = torch.clamp(scale, max=448, min=-448)
     scale = scale.to(torch.float8_e4m3fn).to(torch.float32)
     output_scale = get_reciprocal(scale * get_reciprocal(global_scale))
@@ -105,25 +453,43 @@ def ref_nvfp4_quant(x, global_scale, block_size):
     return cast_to_fp4(clipped_x), scale.squeeze(-1)
 
 
+def ref_nvfp4_quant_dequant(
+    x: torch.Tensor, global_scale: torch.Tensor, block_size: int
+) -> torch.Tensor:
+    """
+    NVFP4 quantize-dequantize operation.
+
+    `global_scale` is expected to have a single element.
+    """
+    if current_platform.is_cuda_alike():
+        return _triton_nvfp4_quant_dequant(x, global_scale, block_size)
+
+    x_m, x_k = x.shape
+    output_dtype = x.dtype
+
+    # quantize input to (FP4 and interleaved block scale)
+    x_fp4, x_blockscale = ref_nvfp4_quant(x, global_scale, block_size)
+
+    # dequantize input
+    x_fp4 = x_fp4.reshape(x_m, x_k // block_size, block_size)
+    x_blockscale = x_blockscale.unsqueeze(-1) / global_scale
+    x_dq = (x_fp4 * x_blockscale).reshape(x_m, x_k).to(output_dtype)
+
+    return x_dq
+
+
 def run_nvfp4_emulations(
     x: torch.Tensor,
     input_global_scale: torch.Tensor,
     weight: torch.Tensor,
     weight_scale_swizzled: torch.Tensor,
     weight_global_scale: torch.Tensor,
+    swizzle: bool | None = True,
 ):
-    group_size = 16
-    x_m, x_k = x.shape
     output_dtype = x.dtype
+    group_size = 16
 
-    # quantize input to (FP4 and interleaved block scale)
-    x_fp4, x_blockscale = ref_nvfp4_quant(x, input_global_scale, group_size)
-
-    # dequantize input
-    x_fp4 = x_fp4.reshape(x_m, x_k // group_size, group_size)
-    x_blockscale = x_blockscale.unsqueeze(-1) / input_global_scale
-    x_dq = (x_fp4 * x_blockscale).reshape(x_m, x_k).to(output_dtype)
-    del x_fp4, x_blockscale
+    x_dq = ref_nvfp4_quant_dequant(x, input_global_scale, block_size=group_size)
 
     # dequantize weight
     w_fp4 = weight.data.view(torch.uint8)
@@ -132,11 +498,10 @@ def run_nvfp4_emulations(
         weight_scale_swizzled.data,
         weight_global_scale,
         output_dtype,
-        x.device,
         group_size,
+        swizzle=swizzle,
     )
 
     # matmul
     out = torch.matmul(x_dq, w_dq.t())
-    del w_dq, x_dq
     return out
diff --git a/vllm/model_executor/layers/quantization/utils/nvfp4_utils.py b/vllm/model_executor/layers/quantization/utils/nvfp4_utils.py
index bcb4769e4c9b..539a28d4cb21 100644
--- a/vllm/model_executor/layers/quantization/utils/nvfp4_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/nvfp4_utils.py
@@ -1,273 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from enum import Enum
 
 import torch
 
-import vllm.envs as envs
 from vllm._custom_ops import (
-    cutlass_scaled_fp4_mm,
     cutlass_scaled_mm_supports_fp4,
-    scaled_fp4_quant,
-)
-from vllm.logger import init_logger
-from vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 import (
-    apply_fp4_marlin_linear,
-    is_fp4_marlin_supported,
-    prepare_fp4_layer_for_marlin,
-)
-from vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils import (
-    run_nvfp4_emulations,
 )
 from vllm.platforms import current_platform
-from vllm.utils.flashinfer import flashinfer_scaled_fp4_mm, has_flashinfer
 from vllm.utils.math_utils import round_up
 
-logger = init_logger(__name__)
-
-
-class NvFp4LinearBackend(Enum):
-    VLLM_CUTLASS = "cutlass"
-    FLASHINFER_CUTLASS = "flashinfer-cutlass"
-    FLASHINFER_TRTLLM = "flashinfer-trtllm"
-    FLASHINFER_CUDNN = "flashinfer-cudnn"
-    FBGEMM = "fbgemm"
-    MARLIN = "marlin"
-    EMULATION = "emulation"
-
-
-def select_nvfp4_linear_backend() -> NvFp4LinearBackend:
-    """
-    Select the best available NVFP4 GEMM backend based on environment
-    configuration and platform capabilities.
-    """
-    backend: NvFp4LinearBackend | None = None
-
-    if envs.VLLM_USE_FBGEMM:
-        try:
-            import fbgemm_gpu  # noqa: F401
-        except ImportError as exc:
-            raise ImportError(
-                "Backend fbgemm requires fbgemm.f4f4bf16 operator, "
-                "Please install with: pip install fbgemm-gpu-genai"
-            ) from exc
-        backend = NvFp4LinearBackend.FBGEMM
-    elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
-        backend = NvFp4LinearBackend.EMULATION
-    elif envs.VLLM_NVFP4_GEMM_BACKEND is None:
-        # Auto-select best available backend
-        if current_platform.has_device_capability(100) and has_flashinfer():
-            backend = NvFp4LinearBackend.FLASHINFER_CUTLASS
-        elif cutlass_fp4_supported():
-            backend = NvFp4LinearBackend.VLLM_CUTLASS
-        elif is_fp4_marlin_supported():
-            backend = NvFp4LinearBackend.MARLIN
-    else:
-        backend = NvFp4LinearBackend(envs.VLLM_NVFP4_GEMM_BACKEND)
-
-    # Validate that the backend is supported
-    if backend in (
-        NvFp4LinearBackend.FLASHINFER_CUTLASS,
-        NvFp4LinearBackend.FLASHINFER_TRTLLM,
-        NvFp4LinearBackend.FLASHINFER_CUDNN,
-    ):
-        assert has_flashinfer(), f"FlashInfer is required for {backend}"
-    elif backend == NvFp4LinearBackend.VLLM_CUTLASS:
-        assert cutlass_fp4_supported(), f"Cutlass is required for {backend}"
-    elif backend == NvFp4LinearBackend.MARLIN:
-        assert is_fp4_marlin_supported(), f"Marlin is required for {backend}"
-    elif backend is None:
-        raise ValueError(
-            f"No NVFP4 GEMM backend selected, "
-            f"available backends: {list(NvFp4LinearBackend)}"
-        )
-
-    logger.info_once(f"Using {backend} for NVFP4 GEMM")
-    return backend
-
-
-def prepare_weights_for_nvfp4_flashinfer_trtllm(
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """Prepare weights and scales for FlashInfer TRTLLM FP4 GEMM."""
-    from flashinfer import shuffle_matrix_a, shuffle_matrix_sf_a
-
-    epilogue_tile_m = 128
-    shuffled_weight = shuffle_matrix_a(weight.view(torch.uint8), epilogue_tile_m)
-    shuffled_weight_scale = (
-        shuffle_matrix_sf_a(weight_scale.view(torch.uint8), epilogue_tile_m)
-        .reshape(weight_scale.shape)
-        .view(torch.float8_e4m3fn)
-    )
-
-    return shuffled_weight, shuffled_weight_scale
-
-
-def prepare_weights_for_nvfp4_cutlass(
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor, int]:
-    """
-    Prepare weights and scales for CUTLASS/FlashInfer-CUTLASS FP4 GEMM.
-    This involves padding weights for alignment (K and N divisible by 32)
-    """
-    swizzled_weight_scale = swizzle_blockscale(weight_scale)
-    padded_weight, weights_padding_cols = pad_nvfp4_weight_for_cutlass(weight)
-    return padded_weight, swizzled_weight_scale, weights_padding_cols
-
-
-def prepare_weights_for_nvfp4_fbgemm(
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """Prepare weights and scales for FBGEMM FP4 GEMM."""
-    swizzled_weight_scale = swizzle_blockscale(weight_scale)
-    swizzled_weight_scale = swizzled_weight_scale.view(-1).view(torch.uint8)
-    return weight, swizzled_weight_scale
-
-
-def convert_to_nvfp4_linear_kernel_format(
-    backend: NvFp4LinearBackend,
-    layer: torch.nn.Module,
-) -> None:
-    """Convert layer to NVFP4 linear kernel format."""
-
-    assert layer.weight_scale.dtype == torch.float8_e4m3fn, (
-        "Weight Block scale must be represented as FP8-E4M3"
-    )
-
-    # Default to no padding
-    layer.weights_padding_cols = 0
-
-    if backend == NvFp4LinearBackend.MARLIN:
-        logger.warning_once(
-            "Your GPU does not have native support for FP4 computation but "
-            "FP4 quantization is being used. Weight-only FP4 compression "
-            "will be used leveraging the Marlin kernel. This may degrade "
-            "performance for compute-heavy workloads."
-        )
-        prepare_fp4_layer_for_marlin(layer)
-    elif backend == NvFp4LinearBackend.FLASHINFER_TRTLLM:
-        weight, weight_scale = prepare_weights_for_nvfp4_flashinfer_trtllm(
-            layer.weight.data, layer.weight_scale.data
-        )
-        layer.weight = torch.nn.Parameter(weight, requires_grad=False)
-        layer.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
-    elif backend == NvFp4LinearBackend.FBGEMM:
-        weight, weight_scale = prepare_weights_for_nvfp4_fbgemm(
-            layer.weight.data, layer.weight_scale.data
-        )
-        layer.weight = torch.nn.Parameter(weight, requires_grad=False)
-        layer.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
-    elif backend in (
-        NvFp4LinearBackend.VLLM_CUTLASS,
-        NvFp4LinearBackend.FLASHINFER_CUTLASS,
-        NvFp4LinearBackend.FLASHINFER_CUDNN,
-    ):
-        weight, weight_scale, weights_padding_cols = prepare_weights_for_nvfp4_cutlass(
-            layer.weight.data, layer.weight_scale.data
-        )
-        layer.weight = torch.nn.Parameter(weight, requires_grad=False)
-        layer.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
-        layer.weights_padding_cols = weights_padding_cols
-
-
-def apply_nvfp4_linear(
-    backend: NvFp4LinearBackend,
-    layer: torch.nn.Module,
-    x: torch.Tensor,
-    bias: torch.Tensor | None = None,
-) -> torch.Tensor:
-    """
-    Apply NVFP4 linear transformation using the specified backend.
-    """
-    weight = layer.weight
-    weight_scale = layer.weight_scale
-    weight_global_scale = layer.weight_global_scale
-    input_global_scale_inv = layer.input_global_scale_inv
-    alpha = layer.alpha
-    output_size = layer.output_size_per_partition
-    input_size = layer.input_size_per_partition
-
-    if backend == NvFp4LinearBackend.MARLIN:
-        return apply_fp4_marlin_linear(
-            input=x,
-            weight=weight,
-            weight_scale=weight_scale,
-            weight_global_scale=weight_global_scale,
-            workspace=layer.workspace,
-            size_n=output_size,
-            size_k=input_size,
-            bias=bias,
-        )
-    elif backend == NvFp4LinearBackend.EMULATION:
-        out = run_nvfp4_emulations(
-            x=x,
-            input_global_scale=input_global_scale_inv,
-            weight=weight,
-            weight_scale_swizzled=weight_scale,
-            weight_global_scale=weight_global_scale,
-        )
-        if bias is not None:
-            out = out + bias
-        return out
-
-    output_dtype = x.dtype
-    output_shape = [*x.shape[:-1], output_size]
-
-    # Quantize BF16 or FP16 to (FP4 and interleaved block scale)
-    x_fp4, x_blockscale = scaled_fp4_quant(
-        x, input_global_scale_inv, is_sf_swizzled_layout=True, backend=backend.value
-    )
-
-    # Validate dtypes
-    assert x_fp4.dtype == torch.uint8
-    assert weight.dtype == torch.uint8
-    assert x_blockscale.dtype == torch.float8_e4m3fn
-    # weight_scale is fp8 for most backends, but uint8 for fbgemm
-    assert weight_scale.dtype in (torch.float8_e4m3fn, torch.uint8)
-    assert alpha.dtype == torch.float32
-
-    # Pad activations to match weight K-dimension padding
-    weights_padding_cols = getattr(layer, "weights_padding_cols", 0)
-    x_fp4 = pad_nvfp4_activation_for_cutlass(x_fp4, weights_padding_cols)
-
-    # Prepare args for the matmul
-    mm_args = (
-        x_fp4,
-        weight,
-        x_blockscale,
-        weight_scale,
-        alpha,
-        output_dtype,
-    )
-
-    # Call the appropriate backend
-    if backend.value.startswith("flashinfer-"):
-        backend_name = backend.value[len("flashinfer-") :]
-        out = flashinfer_scaled_fp4_mm(*mm_args, backend=backend_name)
-    elif backend == NvFp4LinearBackend.FBGEMM:
-        out = torch.ops.fbgemm.f4f4bf16(
-            x_fp4,
-            weight,
-            x_blockscale.view(-1).view(torch.uint8),
-            weight_scale,
-            alpha,
-            use_mx=False,
-        ).to(output_dtype)
-    else:
-        assert backend == NvFp4LinearBackend.VLLM_CUTLASS
-        out = cutlass_scaled_fp4_mm(*mm_args)
-
-    # Slice output to remove N-dimension padding
-    out = slice_nvfp4_output(out, output_size)
-
-    if bias is not None:
-        out = out + bias
-
-    return out.view(*output_shape)
-
 
 def swizzle_blockscale(scale: torch.Tensor) -> torch.Tensor:
     """
diff --git a/vllm/model_executor/layers/quantization/utils/petit_utils.py b/vllm/model_executor/layers/quantization/utils/petit_utils.py
deleted file mode 100644
index 2bed68c1bfa8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/petit_utils.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import TYPE_CHECKING
-
-import torch
-
-# TYPE_CHECKING is used for static type analysis to prevent circular imports.
-if TYPE_CHECKING:
-    from types import ModuleType
-
-# 1. Create a global variable as a placeholder for the module
-_petit_kernel: "ModuleType | None" = None
-
-_PETIT_INSTALL_MSG = (
-    "Petit is not installed. Please install it with `pip install petit-kernel`."
-)
-
-
-def _import_petit_kernel() -> "ModuleType":
-    """
-    A helper function to handle the lazy import.
-    The first time this function is called, it will import the petit_kernel
-    library and store it in the global _petit_kernel variable.
-    Subsequent calls will return the already-loaded module directly.
-    """
-    global _petit_kernel
-    if _petit_kernel is not None:
-        return _petit_kernel
-
-    try:
-        import petit_kernel
-
-        _petit_kernel = petit_kernel
-        return _petit_kernel
-    except ImportError:
-        # The 'from None' syntax prevents chaining the original ImportError,
-        # making the traceback cleaner.
-        raise ImportError(_PETIT_INSTALL_MSG) from None
-
-
-def _check_petit_nvfp4_supported(
-    quant_method: str, group_size: int | None
-) -> tuple[bool, str | None]:
-    if quant_method != "NVFP4":
-        return (
-            False,
-            (
-                "Petit currently only supports: NVFP4 quantizations in sglang. "
-                "Please check the `hf_quant_config.json` file for your model's "
-                "quant configuration."
-            ),
-        )
-    if group_size is not None and group_size != 16:
-        return (
-            False,
-            "Petit currently only supports: group_size=16 quantizations.",
-        )
-    return (True, None)
-
-
-def verify_petit_nvfp4_supported(quant_method: str, group_size: int | None) -> None:
-    supported, error_msg = _check_petit_nvfp4_supported(quant_method, group_size)
-    if not supported:
-        assert error_msg is not None
-        raise ValueError(error_msg)
-
-
-def prepare_nvfp4_layer_for_petit(layer: torch.nn.Module) -> None:
-    # 2. Call _import_petit_kernel() to trigger (or get) the import.
-    petit_kernel = _import_petit_kernel()
-
-    # Repack weights to petit format
-    part_size_n = layer.output_size_per_partition
-    part_size_k = layer.input_size_per_partition
-    qweight = layer.weight.view(torch.int32).contiguous()
-
-    # 3. Call functions through the imported module variable.
-    petit_qweight = petit_kernel.repack_nvfp4(
-        qweight, size_n=part_size_n, size_k=part_size_k
-    )
-    layer.weight = torch.nn.Parameter(petit_qweight, requires_grad=False)
-
-    # Permute scales
-    weight_scale = petit_kernel.process_nvfp4_scales(
-        scales=layer.weight_scale, size_k=part_size_k, size_n=part_size_n
-    )
-    layer.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
-
-
-def apply_petit_nvfp4_linear(
-    input: torch.Tensor,
-    weight: torch.Tensor,
-    weight_scale: torch.Tensor,
-    weight_scale_2: torch.Tensor,
-    size_n: int,
-    size_k: int,
-    bias: torch.Tensor | None = None,
-) -> torch.Tensor:
-    # Trigger (or get) the import here as well.
-    petit_kernel = _import_petit_kernel()
-
-    reshaped_x = input.reshape(-1, input.shape[-1])
-    out_shape = input.shape[:-1] + (size_n,)
-
-    # TODO: Use auto-tuning to find the performant solution_id
-    # Call the function via the module variable.
-    output = petit_kernel.mul_nvfp4_a16(
-        a=reshaped_x,
-        b=weight,
-        s=weight_scale,
-        global_scale=weight_scale_2,
-        size_m=reshaped_x.size(0),
-        size_n=size_n,
-        size_k=size_k,
-        solution_id=-1,
-    )
-    if bias is not None:
-        output.add_(bias)  # In-place add
-
-    return output.reshape(out_shape)
diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py
index 1170a2d3a77c..0b1802522415 100644
--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -20,6 +20,8 @@
 FP8_DTYPE = current_platform.fp8_dtype()
 FP4_DTYPE = torch.uint8
 MXFP_SCALE_DTYPE = torch.uint8
+INT4_DTYPE = scalar_types.uint4b8
+INT8_DTYPE = scalar_types.uint8b128
 
 
 def get_fp8_min_max() -> tuple[float, float]:
@@ -170,6 +172,25 @@ def __str__(self):
 kMxfp4StaticGroupScale = ScaleDesc(MXFP_SCALE_DTYPE, True, GroupShape(1, 32))
 kMxfp4Static = QuantKey(FP4_DTYPE, scale=kMxfp4StaticGroupScale, symmetric=True)
 
+# TODO: convert this to use SCALAR_TYPE. This is not right.
+kInt4StaticGroupScale = ScaleDesc(torch.float16, True, GroupShape(1, -1))
+kInt4Static = QuantKey(INT4_DTYPE, scale=kInt4StaticGroupScale, symmetric=True)
+kInt8StaticGroupScale = ScaleDesc(torch.float16, True, GroupShape(1, -1))
+kInt8Static = QuantKey(INT8_DTYPE, scale=kInt8StaticGroupScale, symmetric=True)
+
+kInt8StaticChannelSym = QuantKey(torch.int8, kStaticChannelScale, symmetric=True)
+kInt8DynamicTokenSym = QuantKey(torch.int8, kDynamicTokenScale, symmetric=True)
+
+
+def create_fp8_quant_key(
+    static: bool,
+    group_shape: GroupShape,
+    symmetric: bool = True,
+    scale_dtype: torch.dtype = torch.float32,
+) -> QuantKey:
+    scale_desc = ScaleDesc(scale_dtype, static, group_shape)
+    return QuantKey(FP8_DTYPE, scale_desc, symmetric=symmetric)
+
 
 # Normalize the group_shape to the full extent for any dims that are -1
 def _normalize_quant_group_shape(x: torch.Tensor, group_shape: GroupShape):
@@ -346,6 +367,12 @@ def get_and_maybe_dequant_weights(
     from vllm.model_executor.layers.linear import UnquantizedLinearMethod
     from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod
 
+    # LoRA linear wrappers store quantization metadata on `base_layer`.
+    # Unwrap here so callers can pass either a raw linear layer or its LoRA
+    # wrapper without special-casing.
+    while hasattr(layer, "base_layer") and hasattr(layer.base_layer, "quant_method"):
+        layer = layer.base_layer
+
     weight = get_attribute_fallback(layer, ["weight", "qweight", "weight_packed"])
 
     # Unquantized layer: just return base weights
@@ -808,7 +835,7 @@ def convert_bf16_scales_to_fp8(
 
     # restore original shape
     fp8_scales = fp8_scales.view(orig_shape)
-    chan_scales = chan_scales.view(orig_shape[:-1], -1)
+    chan_scales = chan_scales.view(*orig_shape[:-1], -1)
 
     return fp8_scales, chan_scales
 
diff --git a/vllm/model_executor/layers/rotary_embedding/__init__.py b/vllm/model_executor/layers/rotary_embedding/__init__.py
index 9ad7c9cdafd3..0a69b3b3f1d3 100644
--- a/vllm/model_executor/layers/rotary_embedding/__init__.py
+++ b/vllm/model_executor/layers/rotary_embedding/__init__.py
@@ -7,11 +7,15 @@
 import torch
 
 from .base import RotaryEmbedding
-from .deepseek_scaling_rope import DeepseekScalingRotaryEmbedding
+from .deepseek_scaling_rope import (
+    DeepseekScalingRotaryEmbedding,
+    DeepseekV4ScalingRotaryEmbedding,
+)
 from .dual_chunk_rope import DualChunkRotaryEmbedding
 from .dynamic_ntk_alpha_rope import DynamicNTKAlphaRotaryEmbedding
 from .dynamic_ntk_scaling_rope import DynamicNTKScalingRotaryEmbedding
 from .fope import FourierRotaryEmbedding
+from .gemma4_rope import Gemma4RotaryEmbedding
 from .linear_scaling_rope import LinearScalingRotaryEmbedding
 from .llama3_rope import Llama3RotaryEmbedding
 from .llama4_vision_rope import Llama4VisionRotaryEmbedding
@@ -19,6 +23,7 @@
 from .mrope_interleaved import MRotaryEmbeddingInterleaved
 from .ntk_scaling_rope import NTKScalingRotaryEmbedding
 from .phi3_long_rope_scaled_rope import Phi3LongRoPEScaledRotaryEmbedding
+from .telechat3_scaling_rope import TeleChat3RoPEScaledRotaryEmbedding
 from .xdrope import XDRotaryEmbedding
 from .yarn_scaling_rope import YaRNScalingRotaryEmbedding
 
@@ -58,11 +63,13 @@ def get_rope(
     rope_parameters = rope_parameters or {}
     base = rope_parameters.get("rope_theta", 10000)
     scaling_type = rope_parameters.get("rope_type", "default")
-    partial_rotary_factor = rope_parameters.get("partial_rotary_factor", 1.0)
-
-    if partial_rotary_factor <= 0.0 or partial_rotary_factor > 1.0:
-        raise ValueError(f"{partial_rotary_factor=} must be between 0.0 and 1.0")
-    rotary_dim = int(head_size * partial_rotary_factor)
+    if rotary_dim := rope_parameters.get("rope_dim", None):
+        pass
+    else:
+        partial_rotary_factor = rope_parameters.get("partial_rotary_factor", 1.0)
+        if partial_rotary_factor <= 0.0 or partial_rotary_factor > 1.0:
+            raise ValueError(f"{partial_rotary_factor=} must be between 0.0 and 1.0")
+        rotary_dim = int(head_size * partial_rotary_factor)
 
     key = (
         head_size,
@@ -134,6 +141,17 @@ def get_rope(
                 is_neox_style,
                 dtype,
             )
+    elif scaling_type == "proportional":
+        # Proportional RoPE is used by Gemma4 for global (full) attention.
+        # Gemma4 uses a sparse/fractional RoPE with cross-mixing between halves.
+        rotary_emb = Gemma4RotaryEmbedding(
+            head_size,
+            rotary_dim,
+            max_position,
+            base,
+            is_neox_style,
+            dtype,
+        )
     elif scaling_type == "llama3":
         scaling_factor = rope_parameters["factor"]
         low_freq_factor = rope_parameters["low_freq_factor"]
@@ -193,10 +211,14 @@ def get_rope(
             )
         elif "factor" in rope_parameters:
             scaling_factor = rope_parameters["factor"]
+            max_trained_positions = rope_parameters.get(
+                "max_trained_positions", max_position
+            )
             rotary_emb = DynamicNTKScalingRotaryEmbedding(
                 head_size,
                 rotary_dim,
                 max_position,
+                max_trained_positions,
                 base,
                 is_neox_style,
                 scaling_factor,
@@ -276,7 +298,11 @@ def get_rope(
                 "mscale_all_dim",
             )
         }
-        rotary_emb = DeepseekScalingRotaryEmbedding(
+        if rope_parameters.get("is_deepseek_v4", False):
+            cls = DeepseekV4ScalingRotaryEmbedding
+        else:
+            cls = DeepseekScalingRotaryEmbedding
+        rotary_emb = cls(
             head_size,
             rotary_dim,
             original_max_position,
@@ -322,6 +348,36 @@ def get_rope(
             )
         else:
             raise ValueError("Pangu mrope lacks necessary parameters.")
+    elif scaling_type == "telechat3-yarn":
+        scaling_factor = rope_parameters["factor"]
+        if "original_max_position_embeddings" in rope_parameters:
+            original_max_position = rope_parameters["original_max_position_embeddings"]
+            scaling_factor = max_position / original_max_position
+        else:
+            original_max_position = max_position
+        extra_kwargs = {
+            k: v
+            for k, v in rope_parameters.items()
+            if k
+            in (
+                "extrapolation_factor",
+                "attn_factor",
+                "beta_fast",
+                "beta_slow",
+                "mscale",
+                "mscale_all_dim",
+            )
+        }
+        rotary_emb = TeleChat3RoPEScaledRotaryEmbedding(
+            head_size,
+            rotary_dim,
+            original_max_position,
+            base,
+            is_neox_style,
+            scaling_factor,
+            dtype,
+            **extra_kwargs,
+        )
     else:
         raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
     _ROPE_DICT[key] = rotary_emb
diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py
index e0576ee8e4f7..2e407ae7159e 100644
--- a/vllm/model_executor/layers/rotary_embedding/common.py
+++ b/vllm/model_executor/layers/rotary_embedding/common.py
@@ -8,6 +8,7 @@
 
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
+from vllm.platforms import current_platform
 from vllm.utils.torch_utils import direct_register_custom_op
 
 logger = init_logger(__name__)
@@ -134,7 +135,7 @@ def __init__(
         self.enable_fp32_compute = enable_fp32_compute
 
         self.apply_rotary_emb_flash_attn = None
-        if find_spec("flash_attn") is not None:
+        if not current_platform.is_cpu() and find_spec("flash_attn") is not None:
             from flash_attn.ops.triton.rotary import apply_rotary
 
             self.apply_rotary_emb_flash_attn = apply_rotary
diff --git a/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
index 69c1101664d0..7362abcc8fbc 100644
--- a/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
+++ b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
@@ -45,6 +45,7 @@ def __init__(
         beta_slow: int = 1,
         mscale: float = 1,
         mscale_all_dim: float = 0,
+        init_cache: bool = True,
     ) -> None:
         self.scaling_factor = scaling_factor
         self.extrapolation_factor = extrapolation_factor
@@ -65,7 +66,13 @@ def __init__(
             and head_size in [64, 128, 256, 512]
         )
         super().__init__(
-            head_size, rotary_dim, max_position_embeddings, base, is_neox_style, dtype
+            head_size,
+            rotary_dim,
+            max_position_embeddings,
+            base,
+            is_neox_style,
+            dtype,
+            init_cache=init_cache,
         )
 
     def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
@@ -120,31 +127,52 @@ def forward_native(
     ) -> tuple[torch.Tensor, torch.Tensor | None]:
         """PyTorch-native implementation equivalent to forward()."""
         assert key is not None
-        cos_sin_cache = self._match_cos_sin_cache_dtype(query)
-        query_rot = query[..., : self.rotary_dim]
-        key_rot = key[..., : self.rotary_dim]
-        if self.rotary_dim < self.head_size:
-            query_pass = query[..., self.rotary_dim :]
-            key_pass = key[..., self.rotary_dim :]
+        return self.forward_static(
+            positions,
+            query,
+            key,
+            self.head_size,
+            self.rotary_dim,
+            self.cos_sin_cache,
+            self.is_neox_style,
+            offsets,
+        )
+
+    @staticmethod
+    def forward_static(
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None,
+        head_size: int,
+        rotary_dim: int,
+        cos_sin_cache: torch.Tensor,
+        is_neox_style: bool,
+        offsets: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        """A static implementation of forward()."""
+        assert key is not None
+        query_rot = query[..., :rotary_dim]
+        key_rot = key[..., :rotary_dim]
+        if rotary_dim < head_size:
+            query_pass = query[..., rotary_dim:]
+            key_pass = key[..., rotary_dim:]
 
         cos_sin = cos_sin_cache[
             torch.add(positions, offsets) if offsets is not None else positions
         ]
         cos, sin = cos_sin.chunk(2, dim=-1)
-        if self.is_neox_style:
-            # NOTE(woosuk): Here we assume that the positions tensor has the
-            # shape [batch_size, seq_len].
-            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
-            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
+        if is_neox_style:
+            cos = torch.cat((cos, cos), dim=-1).unsqueeze(-2)
+            sin = torch.cat((sin, sin), dim=-1).unsqueeze(-2)
         else:
             cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
             sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
 
-        rotate_fn = rotate_neox if self.is_neox_style else rotate_gptj
+        rotate_fn = rotate_neox if is_neox_style else rotate_gptj
         query_rot = query_rot * cos + rotate_fn(query_rot) * sin
         key_rot = key_rot * cos + rotate_fn(key_rot) * sin
 
-        if self.rotary_dim < self.head_size:
+        if rotary_dim < head_size:
             query = torch.cat((query_rot, query_pass), dim=-1)
             key = torch.cat((key_rot, key_pass), dim=-1)
         else:
@@ -197,3 +225,120 @@ def forward_cuda(
             return query, key
         else:
             return self.forward_native(positions, query, key, offsets)
+
+
+class DeepseekV4ScalingRotaryEmbedding(DeepseekScalingRotaryEmbedding):
+    """RotaryEmbedding extended with YaRN method.
+
+    Credits to Peng et al. github.com/jquesnelle/yarn
+
+    Compared to DeepseekScalingRotaryEmbedding:
+    - Applies RoPE to the last rotary_dim
+    - The forward method requires an inverse parameter to indicate
+      whether to negate the sin
+    - Supports applying RoPE to query only (without key)
+    - cos_sin_cache stored as fp32 for higher precision RoPE
+    """
+
+    def __init__(self, *args, **kwargs):
+        # Avoid compute cache repeatedly
+        kwargs.pop("init_cache", None)
+        super().__init__(*args, **kwargs, init_cache=False)
+        cache_fp32 = self._compute_cos_sin_cache()
+        self.register_buffer("cos_sin_cache", cache_fp32, persistent=False)
+
+    def _compute_cos_sin_cache(self) -> torch.Tensor:
+        inv_freq = self._compute_inv_freq(self.scaling_factor)
+        t = torch.arange(
+            self.max_position_embeddings * self.scaling_factor,
+            device=current_platform.device_type,
+            dtype=torch.float32,
+        )
+        freqs = torch.einsum("i,j -> ij", t, inv_freq)
+        cos = freqs.cos() * self.mscale
+        sin = freqs.sin() * self.mscale
+        cache = torch.cat((cos, sin), dim=-1)
+        return cache
+
+    def forward_native(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None = None,
+        offsets: torch.Tensor | None = None,
+        inverse: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        """PyTorch-native implementation equivalent to forward()."""
+
+        head_size = query.size(-1)
+        query_rot = query[..., -self.rotary_dim :]
+        key_rot = key[..., -self.rotary_dim :] if key is not None else None
+
+        if self.rotary_dim < head_size:
+            query_pass = query[..., : -self.rotary_dim]
+            key_pass = key[..., : -self.rotary_dim] if key is not None else None
+
+        cos_sin = self.cos_sin_cache[
+            torch.add(positions, offsets) if offsets is not None else positions
+        ]
+        cos, sin = cos_sin.chunk(2, dim=-1)
+        if self.is_neox_style:
+            cos = torch.cat((cos, cos), dim=-1).unsqueeze(-2)
+            sin = torch.cat((sin, sin), dim=-1).unsqueeze(-2)
+        else:
+            cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
+            sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
+        if inverse:
+            sin = -sin
+        rotate_fn = rotate_neox if self.is_neox_style else rotate_gptj
+        orig_dtype = query.dtype
+        query_rot = (query_rot * cos + rotate_fn(query_rot) * sin).to(orig_dtype)
+        if key_rot is not None:
+            key_rot = (key_rot * cos + rotate_fn(key_rot) * sin).to(orig_dtype)
+
+        if self.rotary_dim < head_size:
+            query = torch.cat((query_pass, query_rot), dim=-1)
+            key = torch.cat((key_pass, key_rot), dim=-1) if key is not None else None
+        else:
+            query = query_rot
+            key = key_rot
+
+        return query, key
+
+    def forward_hip(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None = None,
+        offsets: torch.Tensor | None = None,
+        inverse: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        return self.forward_native(positions, query, key, offsets)
+
+    def forward_cuda(
+        self,
+        positions: torch.Tensor,
+        query: torch.Tensor,
+        key: torch.Tensor | None = None,
+        offsets: torch.Tensor | None = None,
+        inverse: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        from vllm import _custom_ops as ops
+
+        # The indexer and attention have different head_dim,
+        # we obtain the corresponding head_dim via the query.
+        head_size = query.size(-1)
+        rope_dim_offset = head_size - self.rotary_dim
+        # ops.rotary_embedding() is an in-place operation
+        # that updates the query and key tensors.
+        ops.rotary_embedding(
+            torch.add(positions, offsets) if offsets is not None else positions,
+            query,
+            key,
+            head_size,
+            self.cos_sin_cache,
+            self.is_neox_style,
+            rope_dim_offset=rope_dim_offset,
+            inverse=inverse,
+        )
+        return query, key
diff --git a/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py b/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
index ec03fc6533f9..59ad6359aa1c 100644
--- a/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
+++ b/vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
@@ -195,10 +195,8 @@ def forward_cuda(
     def _apply_rotary_embedding(self, cos_sin, hidden_rot, hidden_pass):
         cos, sin = cos_sin.chunk(2, dim=-1)
         if self.is_neox_style:
-            # NOTE(woosuk): Here we assume that the positions tensor has the
-            # shape [batch_size, seq_len].
-            cos = cos.repeat(1, 1, 2).unsqueeze(-2)
-            sin = sin.repeat(1, 1, 2).unsqueeze(-2)
+            cos = torch.cat((cos, cos), dim=-1).unsqueeze(-2)
+            sin = torch.cat((sin, sin), dim=-1).unsqueeze(-2)
         else:
             cos = cos.repeat_interleave(2, dim=-1).unsqueeze(-2)
             sin = sin.repeat_interleave(2, dim=-1).unsqueeze(-2)
diff --git a/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
index 28fd87ecc21f..8a48be490b6f 100644
--- a/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
+++ b/vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
@@ -38,12 +38,14 @@ def __init__(
         head_size: int,
         rotary_dim: int,
         max_position_embeddings: int,
+        max_trained_positions: int,
         base: float,
         is_neox_style: bool,
         scaling_factor: float,
         dtype: torch.dtype,
     ) -> None:
         self.scaling_factor = scaling_factor
+        self.max_trained_positions = max_trained_positions
         super().__init__(
             head_size, rotary_dim, max_position_embeddings, base, is_neox_style, dtype
         )
@@ -53,13 +55,16 @@ def _compute_cos_sin_cache(self) -> torch.Tensor:
         # maximum length before applying the rope scaling.
         # Thus, the maximum length after applying the rope scaling is
         # self.max_position_embeddings * self.scaling_factor.
-        max_len = self.max_position_embeddings * self.scaling_factor
         base = self.base * (
-            (self.scaling_factor * max_len / self.max_position_embeddings)
+            (
+                self.scaling_factor
+                * self.max_position_embeddings
+                / self.max_trained_positions
+            )
             - (self.scaling_factor - 1)
         ) ** (self.rotary_dim / (self.rotary_dim - 2))
         inv_freq = self._compute_inv_freq(base)
-        t = torch.arange(max_len, dtype=torch.float)
+        t = torch.arange(self.max_position_embeddings, dtype=torch.float)
 
         freqs = torch.einsum("i,j -> ij", t, inv_freq)
         cos = freqs.cos()
diff --git a/vllm/model_executor/layers/rotary_embedding/gemma4_rope.py b/vllm/model_executor/layers/rotary_embedding/gemma4_rope.py
new file mode 100644
index 000000000000..48253f469cc8
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/gemma4_rope.py
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Gemma4-specific Rotary Positional Embeddings (proportional scaling).
+
+Gemma4 uses "proportional" RoPE which computes inv_freq frequencies scaled
+by head_dim (not rotary_dim), and zero-pads for non-rotated dimensions when
+partial_rotary_factor < 1. The actual rotation uses standard neox-style
+rotate_half, matching HF transformers' apply_rotary_pos_emb.
+"""
+
+import torch
+
+from .base import RotaryEmbedding
+
+
+class Gemma4RotaryEmbedding(RotaryEmbedding):
+    """Gemma4 proportional RoPE.
+
+    Extends RotaryEmbedding (which provides standard neox-style rotation
+    via ops.rotary_embedding CUDA kernel) but overrides the inv_freq
+    computation to match HF's _compute_proportional_rope_parameters:
+    - Frequency exponents use head_dim (not rotary_dim) as denominator
+    - Non-rotated dims are zero-padded (cos=1, sin=0 = identity rotation)
+
+    When partial_rotary_factor=1.0 (the default for some variants), ALL dims are
+    rotated and this is equivalent to standard RotaryEmbedding with
+    head_dim-scaled frequencies.
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: float,
+        is_neox_style: bool,
+        dtype: torch.dtype,
+    ) -> None:
+        # Number of rotation angle pairs (from partial_rotary_factor)
+        self.rope_angles = rotary_dim // 2
+        # Non-rotated angle pairs per half
+        self.nope_angles = (head_size // 2) - self.rope_angles
+
+        # Important: set rotary_dim = head_size so the base class's
+        # forward_static applies rotation to ALL dims of the cos/sin cache.
+        # The non-rotated dims will have cos=1, sin=0 (identity) thanks
+        # to our _compute_inv_freq zero-padding.
+        super().__init__(
+            head_size,
+            head_size,  # rotary_dim = head_size (full application)
+            max_position_embeddings,
+            base,
+            is_neox_style,
+            dtype,
+        )
+
+    def _compute_inv_freq(self, base: float) -> torch.Tensor:
+        """Compute frequencies matching HF proportional RoPE.
+
+        Key difference from base: exponent denominator is head_size (not
+        rotary_dim), and non-rotated dims are zero-padded.
+        """
+        # HF formula: base ** (arange(0, 2*rope_angles, 2) / head_dim)
+        freq_exponents = (
+            torch.arange(0, 2 * self.rope_angles, 2, dtype=torch.float) / self.head_size
+        )
+        inv_freq = 1.0 / (base**freq_exponents)
+
+        # Zero-pad for non-rotated dims (identity rotation: cos=1, sin=0)
+        if self.nope_angles > 0:
+            inv_freq = torch.cat(
+                [
+                    inv_freq,
+                    torch.zeros(self.nope_angles, dtype=torch.float),
+                ]
+            )
+        return inv_freq
+
+    def extra_repr(self) -> str:
+        s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}"
+        s += f", rope_angles={self.rope_angles}, nope_angles={self.nope_angles}"
+        s += f", max_position_embeddings={self.max_position_embeddings}"
+        s += f", base={self.base}, is_neox_style={self.is_neox_style}"
+        return s
diff --git a/vllm/model_executor/layers/rotary_embedding/telechat3_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/telechat3_scaling_rope.py
new file mode 100644
index 000000000000..dd2fb9c320b6
--- /dev/null
+++ b/vllm/model_executor/layers/rotary_embedding/telechat3_scaling_rope.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import math
+
+import torch
+
+from .base import RotaryEmbedding
+from .yarn_scaling_rope import YaRNScalingRotaryEmbedding
+
+
+class TeleChat3RoPEScaledRotaryEmbedding(YaRNScalingRotaryEmbedding):
+    """TeleChat3 uses a variant of YaRN method.
+
+    To achieve code reuse as much as possible, we have rewritten the
+    `get_mscale` method in the initialization function
+    """
+
+    def __init__(
+        self,
+        head_size: int,
+        rotary_dim: int,
+        max_position_embeddings: int,
+        base: int,
+        is_neox_style: bool,
+        scaling_factor: float,
+        dtype: torch.dtype,
+        *,
+        extrapolation_factor: float = 1,
+        attn_factor: float = 1,
+        beta_fast: int = 32,
+        beta_slow: int = 1,
+        truncate: bool = True,
+    ) -> None:
+        self.scaling_factor = scaling_factor
+        self.extrapolation_factor = extrapolation_factor
+        self.attn_factor = attn_factor
+        self.beta_fast = beta_fast
+        self.beta_slow = beta_slow
+        self.truncate = truncate
+
+        def get_mscale(scale, mscale=1):
+            if scale <= 1:
+                return 1.0
+            return 0.07 * mscale * math.log(scale) + 1.0
+
+        self.mscale = float(get_mscale(self.scaling_factor) * attn_factor)
+        # Initialization must be performed after mscale, otherwise mscale is useless
+        RotaryEmbedding.__init__(
+            self,
+            head_size,
+            rotary_dim,
+            max_position_embeddings,
+            base,
+            is_neox_style,
+            dtype,
+        )
diff --git a/vllm/model_executor/layers/sparse_attn_indexer.py b/vllm/model_executor/layers/sparse_attn_indexer.py
index 496b457a15e2..9597708b62e7 100644
--- a/vllm/model_executor/layers/sparse_attn_indexer.py
+++ b/vllm/model_executor/layers/sparse_attn_indexer.py
@@ -4,32 +4,87 @@
 
 import torch
 
+import vllm.envs as envs
+from vllm import _custom_ops as ops
 from vllm._aiter_ops import rocm_aiter_ops
+from vllm.compilation.breakable_cudagraph import eager_break_during_capture
 from vllm.forward_context import get_forward_context
 from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
 from vllm.platforms import current_platform
-from vllm.utils.deep_gemm import fp8_mqa_logits, fp8_paged_mqa_logits, has_deep_gemm
-from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.utils.deep_gemm import (
+    fp8_fp4_mqa_logits,
+    fp8_fp4_paged_mqa_logits,
+    has_deep_gemm,
+)
+from vllm.utils.torch_utils import (
+    LayerNameType,
+    _encode_layer_name,
+    _resolve_layer_name,
+    direct_register_custom_op,
+)
 from vllm.v1.attention.backends.mla.indexer import (
     DeepseekV32IndexerMetadata,
 )
 from vllm.v1.attention.ops.common import pack_seq_triton, unpack_seq_triton
 from vllm.v1.worker.workspace import current_workspace_manager
 
-if current_platform.is_cuda_alike():
-    from vllm import _custom_ops as ops
-elif current_platform.is_xpu():
-    from vllm._xpu_ops import xpu_ops as ops
-
 logger = init_logger(__name__)
 
+RADIX_TOPK_WORKSPACE_SIZE = 1024 * 1024
+
+# MXFP4 layout: 2 values packed per byte, ue8m0 (1-byte) scale per block of 32.
+MXFP4_BLOCK_SIZE = 32
+
+
+def _gather_workspace_shapes(
+    total_seq_lens: int,
+    head_dim: int,
+    fp8_dtype: torch.dtype,
+    use_fp4_cache: bool,
+) -> tuple[tuple[tuple[int, int], torch.dtype], tuple[tuple[int, int], torch.dtype]]:
+    """Return ((values_shape, values_dtype), (scales_shape, scales_dtype)) for
+    the K-gather workspace. FP8 path: (T, head_dim) fp8 + (T, 4) uint8 fp32
+    scales. MXFP4 path: (T, head_dim // 2) uint8 packed mxfp4 +
+    (T, head_dim // MXFP4_BLOCK_SIZE) uint8 ue8m0 scales."""
+    if use_fp4_cache:
+        return (
+            ((total_seq_lens, head_dim // 2), torch.uint8),
+            ((total_seq_lens, head_dim // MXFP4_BLOCK_SIZE), torch.uint8),
+        )
+    return (
+        ((total_seq_lens, head_dim), fp8_dtype),
+        ((total_seq_lens, 4), torch.uint8),
+    )
+
+
+def kv_cache_as_quant_view(
+    kv_cache: torch.Tensor,
+    head_dim: int,
+    use_fp4_cache: bool,
+) -> torch.Tensor:
+    """4D ``[num_blocks, block_size, 1, head_width]`` view expected by
+    DeepGEMM, from the 3D indexer kv-cache allocation."""
+    if use_fp4_cache:
+        assert kv_cache.ndim == 3 and kv_cache.dtype == torch.uint8
+        num_blocks, block_size, _ = kv_cache.shape
+        page_bytes = int(kv_cache.stride(0))
+        fp4_bytes = head_dim // 2 + head_dim // MXFP4_BLOCK_SIZE
+        return torch.as_strided(
+            kv_cache,
+            size=(num_blocks, block_size, 1, fp4_bytes),
+            stride=(page_bytes, fp4_bytes, fp4_bytes, 1),
+        )
+    return kv_cache.unsqueeze(-2)
+
 
+@eager_break_during_capture
 def sparse_attn_indexer(
     hidden_states: torch.Tensor,
-    k_cache_prefix: str,
+    k_cache_prefix: LayerNameType,
     kv_cache: torch.Tensor,
-    q_fp8: torch.Tensor,
+    q_quant: torch.Tensor,
+    q_scale: torch.Tensor | None,
     k: torch.Tensor,
     weights: torch.Tensor,
     quant_block_size: int,
@@ -39,23 +94,39 @@ def sparse_attn_indexer(
     max_model_len: int,
     total_seq_lens: int,
     topk_indices_buffer: torch.Tensor,
+    skip_k_cache_insert: bool,
+    use_fp4_cache: bool = False,
 ) -> torch.Tensor:
     # careful! this will be None in dummy run
     attn_metadata = get_forward_context().attn_metadata
     fp8_dtype = current_platform.fp8_dtype()
+    k_cache_prefix = _resolve_layer_name(k_cache_prefix)
 
     # assert isinstance(attn_metadata, dict)
     if not isinstance(attn_metadata, dict):
         # Reserve workspace for indexer during profiling run
+        values_spec, scales_spec = _gather_workspace_shapes(
+            total_seq_lens, head_dim, fp8_dtype, use_fp4_cache
+        )
         current_workspace_manager().get_simultaneous(
-            ((total_seq_lens, head_dim), torch.float8_e4m3fn),
-            ((total_seq_lens, 4), torch.uint8),
+            values_spec,
+            scales_spec,
+            ((RADIX_TOPK_WORKSPACE_SIZE,), torch.uint8),
+        )
+
+        # Dummy allocation to simulate for peak logits tensor memory during inference.
+        # FP8 elements so elements == bytes
+        max_logits_elems = envs.VLLM_SPARSE_INDEXER_MAX_LOGITS_MB * 1024 * 1024
+        _ = torch.empty(
+            max_logits_elems, dtype=torch.uint8, device=hidden_states.device
         )
+
         return sparse_attn_indexer_fake(
             hidden_states,
             k_cache_prefix,
             kv_cache,
-            q_fp8,
+            q_quant,
+            q_scale,
             k,
             weights,
             quant_block_size,
@@ -65,174 +136,228 @@ def sparse_attn_indexer(
             max_model_len,
             total_seq_lens,
             topk_indices_buffer,
+            skip_k_cache_insert,
+            use_fp4_cache,
         )
-    attn_metadata = attn_metadata[k_cache_prefix]
-    assert isinstance(attn_metadata, DeepseekV32IndexerMetadata)
-    slot_mapping = attn_metadata.slot_mapping
-    has_decode = attn_metadata.num_decodes > 0
-    has_prefill = attn_metadata.num_prefills > 0
-    num_decode_tokens = attn_metadata.num_decode_tokens
+    attn_metadata_narrowed = attn_metadata[k_cache_prefix]
+    assert isinstance(attn_metadata_narrowed, DeepseekV32IndexerMetadata)
+    slot_mapping = attn_metadata_narrowed.slot_mapping
+    has_decode = attn_metadata_narrowed.num_decodes > 0
+    has_prefill = attn_metadata_narrowed.num_prefills > 0
+    num_decode_tokens = attn_metadata_narrowed.num_decode_tokens
+
+    # q_scale is required iff the FP4 cache path is enabled; the FP8 path
+    # folds the Q scale into `weights` inside fused_indexer_q_rope_quant.
+    if use_fp4_cache:
+        assert q_scale is not None, "use_fp4_cache=True requires q_scale"
+    else:
+        assert q_scale is None, "q_scale must be None when use_fp4_cache=False"
 
     # During speculative decoding, k may be padded to the CUDA graph batch
     # size while slot_mapping only covers actual tokens. Truncate k to avoid
     # out-of-bounds reads in the kernel.
     num_tokens = slot_mapping.shape[0]
-    k = k[:num_tokens]
-
-    ops.indexer_k_quant_and_cache(
-        k,
-        kv_cache,
-        slot_mapping,
-        quant_block_size,
-        scale_fmt,
-    )
+    if k is not None:
+        k = k[:num_tokens]
+
+    if not skip_k_cache_insert:
+        # scale_fmt can be None, but the function expects str
+        assert scale_fmt is not None
+        assert not use_fp4_cache, "Unfused FP4 Insert is not supported yet"
+        ops.indexer_k_quant_and_cache(
+            k,
+            kv_cache,
+            slot_mapping,
+            quant_block_size,
+            scale_fmt,
+        )
 
     topk_indices_buffer[: hidden_states.shape[0]] = -1
     if has_prefill:
-        prefill_metadata = attn_metadata.prefill
+        prefill_metadata = attn_metadata_narrowed.prefill
         assert prefill_metadata is not None
 
-        # Get the full shared workspace buffers once (will allocate on first use)
+        # Get the full shared workspace buffers once (will allocate on first use).
+        # Layout switches between FP8 (head_dim bytes + 4-byte fp32 scale) and
+        # MXFP4 (head_dim/2 bytes packed + head_dim/MXFP4_BLOCK_SIZE ue8m0
+        # scales) based on use_fp4_cache.
         workspace_manager = current_workspace_manager()
-        k_fp8_full, k_scale_full = workspace_manager.get_simultaneous(
-            ((total_seq_lens, head_dim), fp8_dtype),
-            ((total_seq_lens, 4), torch.uint8),
+        values_spec, scales_spec = _gather_workspace_shapes(
+            total_seq_lens, head_dim, fp8_dtype, use_fp4_cache
+        )
+        k_quant_full, k_scale_full = workspace_manager.get_simultaneous(
+            values_spec,
+            scales_spec,
         )
         for chunk in prefill_metadata.chunks:
-            k_fp8 = k_fp8_full[: chunk.total_seq_lens]
+            k_quant = k_quant_full[: chunk.total_seq_lens]
             k_scale = k_scale_full[: chunk.total_seq_lens]
-            ops.cp_gather_indexer_k_quant_cache(
-                kv_cache,
-                k_fp8,
-                k_scale,
-                chunk.block_table,
-                chunk.cu_seq_lens,
-            )
-            logits = fp8_mqa_logits(
-                q_fp8[chunk.token_start : chunk.token_end],
-                (k_fp8, k_scale.view(torch.float32).flatten()),
-                weights[chunk.token_start : chunk.token_end],
-                chunk.cu_seqlen_ks,
-                chunk.cu_seqlen_ke,
-                clean_logits=False,
-            )
-            num_rows = logits.shape[0]
 
-            topk_indices = topk_indices_buffer[
-                chunk.token_start : chunk.token_end, :topk_tokens
-            ]
+            if not chunk.skip_kv_gather:
+                ops.cp_gather_indexer_k_quant_cache(
+                    kv_cache,
+                    k_quant,
+                    k_scale,
+                    chunk.block_table,
+                    chunk.cu_seq_lens,
+                )
 
+            q_slice = q_quant[chunk.token_start : chunk.token_end]
+            q_scale_slice = (
+                q_scale[chunk.token_start : chunk.token_end]
+                if q_scale is not None
+                else None
+            )
+            # DeepGEMM scalar-type tags (zero-copy): MXFP4 values → int8
+            # (kPackedFP4), scales → int32 squeezed to 1-D kv_sf / 2-D q_sf.
+            if use_fp4_cache:
+                q_slice_cast = q_slice.view(torch.int8)
+                k_quant_cast = k_quant.view(torch.int8)
+                k_scale_cast = k_scale.view(torch.int32).squeeze(-1)
+            else:
+                q_slice_cast = q_slice
+                k_quant_cast = k_quant
+                k_scale_cast = k_scale.view(torch.float32).squeeze(-1)
             if current_platform.is_xpu():
-                ops.top_k_per_row_prefill(
-                    logits,
+                if q_scale_slice is not None:
+                    raise RuntimeError("XPU fp8_mqa_logits does not support FP4 Q")
+                logits = torch.ops.vllm.xpu_fp8_mqa_logits(
+                    q_slice_cast,
+                    k_quant_cast,
+                    k_scale_cast,
+                    weights[chunk.token_start : chunk.token_end],
                     chunk.cu_seqlen_ks,
                     chunk.cu_seqlen_ke,
-                    topk_indices,
-                    num_rows,
-                    logits.stride(0),
-                    logits.stride(1),
-                    topk_tokens,
                 )
             else:
-                torch.ops._C.top_k_per_row_prefill(
-                    logits,
+                logits = fp8_fp4_mqa_logits(
+                    (q_slice_cast, q_scale_slice),
+                    (k_quant_cast, k_scale_cast),
+                    weights[chunk.token_start : chunk.token_end],
                     chunk.cu_seqlen_ks,
                     chunk.cu_seqlen_ke,
-                    topk_indices,
-                    num_rows,
-                    logits.stride(0),
-                    logits.stride(1),
-                    topk_tokens,
+                    clean_logits=False,
                 )
+            num_rows = logits.shape[0]
+
+            topk_indices = topk_indices_buffer[
+                chunk.token_start : chunk.token_end, :topk_tokens
+            ]
 
-            # Compute lengths from row spans
-            # lengths = (chunk.cu_seqlen_ke - chunk.cu_seqlen_ks).to(torch.int32)
-            # torch.ops._C.large_context_topk(
-            #    logits,
-            #    topk_indices,
-            #    lengths,
-            #    chunk.cu_seqlen_ks,  # row_starts
-            # )
+            ops.top_k_per_row_prefill(
+                logits,
+                chunk.cu_seqlen_ks,
+                chunk.cu_seqlen_ke,
+                topk_indices,
+                num_rows,
+                logits.stride(0),
+                logits.stride(1),
+                topk_tokens,
+            )
 
     if has_decode:
-        decode_metadata = attn_metadata.decode
+        decode_metadata = attn_metadata_narrowed.decode
         assert decode_metadata is not None
-        # kv_cache shape [
-        # kv_cache size requirement [num_block, block_size, n_head, head_dim],
-        # we only have [num_block, block_size, head_dim],
-        kv_cache = kv_cache.unsqueeze(-2)
+        kv_cache = kv_cache_as_quant_view(kv_cache, head_dim, use_fp4_cache)
         decode_lens = decode_metadata.decode_lens
         if decode_metadata.requires_padding:
             # pad in edge case where we have short chunked prefill length <
             # decode_threshold since we unstrictly split
             # prefill and decode by decode_threshold
-            # (currently set to 1 + speculative tokens)
-            padded_q_fp8_decode_tokens = pack_seq_triton(
-                q_fp8[:num_decode_tokens], decode_lens
-            )
+            # (currently set to 1 + speculative tokens).
+            # FP8 Q is float8_e4m3fn (pack_seq_triton's fp32 pad path is OK —
+            # downstream context_lens masks stale slots). MXFP4 Q is two
+            # uint8 tensors (values + ue8m0 scales) — use the dedicated uint8
+            # packer with pad_byte=0 so padded slots dequantize to 0 and
+            # can't produce NaN/Inf in the logits kernel.
+            if q_scale is not None:
+                padded_q_quant_decode_tokens = pack_seq_triton(
+                    q_quant[:num_decode_tokens], decode_lens, pad_value=0
+                )
+                padded_q_scale = pack_seq_triton(
+                    q_scale[:num_decode_tokens], decode_lens, pad_value=0
+                )
+            else:
+                padded_q_quant_decode_tokens = pack_seq_triton(
+                    q_quant[:num_decode_tokens], decode_lens
+                )
+                padded_q_scale = None
         else:
-            padded_q_fp8_decode_tokens = q_fp8[:num_decode_tokens].reshape(
-                decode_lens.shape[0], -1, *q_fp8.shape[1:]
+            padded_q_quant_decode_tokens = q_quant[:num_decode_tokens].reshape(
+                decode_lens.shape[0], -1, *q_quant.shape[1:]
             )
+            if q_scale is not None:
+                padded_q_scale = q_scale[:num_decode_tokens].reshape(
+                    decode_lens.shape[0], -1, *q_scale.shape[1:]
+                )
+            else:
+                padded_q_scale = None
         # TODO: move and optimize below logic with triton kernels
-        batch_size = padded_q_fp8_decode_tokens.shape[0]
-        next_n = padded_q_fp8_decode_tokens.shape[1]
-        assert batch_size == decode_metadata.seq_lens.shape[0]
+        batch_size = padded_q_quant_decode_tokens.shape[0]
+        next_n = padded_q_quant_decode_tokens.shape[1]
         num_padded_tokens = batch_size * next_n
-        logits = fp8_paged_mqa_logits(
-            padded_q_fp8_decode_tokens,
-            kv_cache,
-            weights[:num_padded_tokens],
-            decode_metadata.seq_lens,
-            decode_metadata.block_table,
-            decode_metadata.schedule_metadata,
-            max_model_len=max_model_len,
-            clean_logits=False,
+        seq_lens = decode_metadata.seq_lens[:batch_size]
+        # seq_lens is always 2D: (B, next_n) for native spec decode, (B, 1)
+        # otherwise. deep_gemm fp8_fp4_paged_mqa_logits requires 2D context_lens;
+        # the downstream topk kernels accept both 1D and 2D.
+        padded_q_quant_cast = (
+            padded_q_quant_decode_tokens.view(torch.int8)
+            if use_fp4_cache
+            else padded_q_quant_decode_tokens
         )
+        if current_platform.is_xpu():
+            if padded_q_scale is not None:
+                raise RuntimeError("XPU fp8_paged_mqa_logits does not support FP4 Q")
+            seq_lens_xpu = (
+                seq_lens[:, -1].contiguous() if seq_lens.ndim == 2 else seq_lens
+            )
+            logits = torch.ops.vllm.xpu_fp8_paged_mqa_logits(
+                padded_q_quant_cast,
+                kv_cache,
+                weights[:num_padded_tokens],
+                seq_lens_xpu,
+                decode_metadata.block_table,
+                decode_metadata.schedule_metadata,
+                max_model_len,
+            )
+        else:
+            logits = fp8_fp4_paged_mqa_logits(
+                (padded_q_quant_cast, padded_q_scale),
+                kv_cache,
+                weights[:num_padded_tokens],
+                seq_lens,
+                decode_metadata.block_table,
+                decode_metadata.schedule_metadata,
+                max_model_len=max_model_len,
+                clean_logits=False,
+            )
         num_rows = logits.shape[0]
         topk_indices = topk_indices_buffer[:num_padded_tokens, :topk_tokens]
 
-        if decode_metadata.use_large_context_topk:
-            if next_n == 1:
-                lengths = decode_metadata.seq_lens
-            else:
-                # (bs,) -> (bs, 1) + (next_n,) -> (bs, next_n) -> (bs * next_n,)
-                lengths = (
-                    decode_metadata.seq_lens.unsqueeze(1)
-                    - next_n
-                    + 1
-                    + decode_metadata.offsets
-                ).flatten()
-
-            torch.ops._C.large_context_topk(
+        if current_platform.is_cuda() and topk_tokens in (512, 1024, 2048):
+            workspace_manager = current_workspace_manager()
+            (topk_workspace,) = workspace_manager.get_simultaneous(
+                ((RADIX_TOPK_WORKSPACE_SIZE,), torch.uint8),
+            )
+            torch.ops._C.persistent_topk(
                 logits,
+                seq_lens,
                 topk_indices,
-                lengths,
-                None,
+                topk_workspace,
+                topk_tokens,
+                attn_metadata_narrowed.max_seq_len,
             )
         else:
-            if current_platform.is_xpu():
-                ops.top_k_per_row_decode(
-                    logits,
-                    next_n,
-                    decode_metadata.seq_lens,
-                    topk_indices,
-                    num_rows,
-                    logits.stride(0),
-                    logits.stride(1),
-                    topk_tokens,
-                )
-            else:
-                torch.ops._C.top_k_per_row_decode(
-                    logits,
-                    next_n,
-                    decode_metadata.seq_lens,
-                    topk_indices,
-                    num_rows,
-                    logits.stride(0),
-                    logits.stride(1),
-                    topk_tokens,
-                )
+            ops.top_k_per_row_decode(
+                logits,
+                next_n,
+                seq_lens,
+                topk_indices,
+                num_rows,
+                logits.stride(0),
+                logits.stride(1),
+                topk_tokens,
+            )
 
         if decode_metadata.requires_padding:
             # if padded, we need to unpack
@@ -241,7 +366,7 @@ def sparse_attn_indexer(
                 topk_indices.reshape(batch_size, -1, topk_indices.shape[-1]),
                 decode_lens,
             )
-            topk_indices_buffer[:num_decode_tokens, : topk_indices.shape[-1]] = (
+            topk_indices_buffer[: topk_indices.shape[0], : topk_indices.shape[-1]] = (
                 topk_indices
             )
 
@@ -250,9 +375,10 @@ def sparse_attn_indexer(
 
 def sparse_attn_indexer_fake(
     hidden_states: torch.Tensor,
-    k_cache_prefix: str,
+    k_cache_prefix: LayerNameType,
     kv_cache: torch.Tensor,
-    q_fp8: torch.Tensor,
+    q_quant: torch.Tensor,
+    q_scale: torch.Tensor | None,
     k: torch.Tensor,
     weights: torch.Tensor,
     quant_block_size: int,
@@ -262,6 +388,8 @@ def sparse_attn_indexer_fake(
     max_model_len: int,
     total_seq_lens: int,
     topk_indices_buffer: torch.Tensor | None,
+    skip_k_cache_insert: bool,
+    use_fp4_cache: bool = False,
 ) -> torch.Tensor:
     return topk_indices_buffer
 
@@ -298,6 +426,8 @@ def __init__(
         max_model_len: int,
         max_total_seq_len: int,
         topk_indices_buffer: torch.Tensor,
+        skip_k_cache_insert: bool = False,
+        use_fp4_cache: bool = False,
     ):
         super().__init__()
         self.k_cache = k_cache
@@ -308,6 +438,8 @@ def __init__(
         self.max_model_len = max_model_len
         self.max_total_seq_len = max_total_seq_len
         self.topk_indices_buffer = topk_indices_buffer
+        self.skip_k_cache_insert = skip_k_cache_insert
+        self.use_fp4_cache = use_fp4_cache
         if current_platform.is_cuda() and not has_deep_gemm():
             raise RuntimeError(
                 "Sparse Attention Indexer CUDA op requires DeepGEMM to be installed."
@@ -316,14 +448,14 @@ def __init__(
     def forward_native(
         self,
         hidden_states: torch.Tensor,
-        q_fp8: torch.Tensor,
+        q_quant: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
         k: torch.Tensor,
         weights: torch.Tensor,
     ):
         if current_platform.is_cuda() or current_platform.is_xpu():
-            return self.forward_cuda(hidden_states, q_fp8, k, weights)
+            return self.forward_cuda(hidden_states, q_quant, k, weights)
         elif current_platform.is_rocm():
-            return self.forward_hip(hidden_states, q_fp8, k, weights)
+            return self.forward_hip(hidden_states, q_quant, k, weights)
         else:
             raise NotImplementedError(
                 "SparseAttnIndexer native forward is only implemented for "
@@ -333,15 +465,22 @@ def forward_native(
     def forward_cuda(
         self,
         hidden_states: torch.Tensor,
-        q_fp8: torch.Tensor,
+        q_quant: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
         k: torch.Tensor,
         weights: torch.Tensor,
     ):
+        # FP8 path: single tensor (per-token scale is folded into `weights`).
+        # FP4 path: (values, scales) tuple with scales required by the kernel.
+        if isinstance(q_quant, tuple):
+            q_values, q_scale = q_quant
+        else:
+            q_values, q_scale = q_quant, None
         return torch.ops.vllm.sparse_attn_indexer(
             hidden_states,
-            self.k_cache.prefix,
+            _encode_layer_name(self.k_cache.prefix),
             self.k_cache.kv_cache,
-            q_fp8,
+            q_values,
+            q_scale,
             k,
             weights,
             self.quant_block_size,
@@ -351,21 +490,36 @@ def forward_cuda(
             self.max_model_len,
             self.max_total_seq_len,
             self.topk_indices_buffer,
+            self.skip_k_cache_insert,
+            self.use_fp4_cache,
         )
 
-    def forward_hip(
+    def forward_xpu(
         self,
         hidden_states: torch.Tensor,
         q_fp8: torch.Tensor,
         k: torch.Tensor,
         weights: torch.Tensor,
     ):
+        return self.forward_cuda(hidden_states, q_fp8, k, weights)
+
+    def forward_hip(
+        self,
+        hidden_states: torch.Tensor,
+        q_quant: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
+        k: torch.Tensor,
+        weights: torch.Tensor,
+    ):
+        assert not self.use_fp4_cache, "AMD platform doesn't support fp4 cache yet"
+        assert isinstance(q_quant, torch.Tensor), (
+            "AMD sparse_attn_indexer expects a single FP8 q_quant tensor"
+        )
         if rocm_aiter_ops.is_enabled():
             return torch.ops.vllm.rocm_aiter_sparse_attn_indexer(
                 hidden_states,
-                self.k_cache.prefix,
+                _encode_layer_name(self.k_cache.prefix),
                 self.k_cache.kv_cache,
-                q_fp8,
+                q_quant,
                 k,
                 weights,
                 self.quant_block_size,
@@ -375,9 +529,9 @@ def forward_hip(
                 self.max_model_len,
                 self.max_total_seq_len,
                 self.topk_indices_buffer,
+                skip_k_cache_insert=self.skip_k_cache_insert,
             )
-        else:
-            raise RuntimeError(
-                "Sparse attention indexer ROCm custom op requires ROCm "
-                "Aiter ops to be enabled."
-            )
+        raise RuntimeError(
+            "Sparse attention indexer ROCm path is only supported on AITER. "
+            "Please enable aiter with VLLM_ROCM_USE_AITER=1"
+        )
diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py
index 4918c83bdc39..dbc05273f305 100644
--- a/vllm/model_executor/layers/utils.py
+++ b/vllm/model_executor/layers/utils.py
@@ -150,6 +150,7 @@ def rocm_unquantized_gemm_impl(
         envs.VLLM_ROCM_USE_SKINNY_GEMM
         and on_gfx950()
         and x.dtype in [torch.float16, torch.bfloat16]
+        and x.dim() == 2
         and (
             10 <= n <= 128
             and k % 8 == 0
@@ -159,6 +160,7 @@ def rocm_unquantized_gemm_impl(
             and weight.is_contiguous()
         )
     )
+
     if use_skinny_reduce_counting:
         return ops.wvSplitKrc(x, weight, cu_count, bias)
 
@@ -174,17 +176,21 @@ def rocm_unquantized_gemm_impl(
         and k % 8 == 0
     )
 
-    if not use_skinny:
-        return torch.nn.functional.linear(x, weight, bias)
-
-    x_view = x.reshape(-1, x.size(-1))
-    if m > 8 and 0 < n <= 4:
-        cu_count = num_compute_units()
-        out = ops.wvSplitK(weight, x_view, cu_count, bias)
-        return out.reshape(*x.shape[:-1], weight.shape[0])
-    elif m % 4 == 0 and n == 1 and k <= 8192 and bias is None:
-        out = ops.LLMM1(weight, x_view, 4)
-        return out.reshape(*x.shape[:-1], weight.shape[0])
+    if use_skinny:
+        x_view = x.reshape(-1, x.size(-1))
+        if m > 8 and 0 < n <= 4:
+            cu_count = num_compute_units()
+            out = ops.wvSplitK(weight, x_view, cu_count, bias)
+            return out.reshape(*x.shape[:-1], weight.shape[0])
+        elif m % 4 == 0 and n == 1 and k <= 8192 and bias is None:
+            out = ops.LLMM1(weight, x_view, 4)
+            return out.reshape(*x.shape[:-1], weight.shape[0])
+
+    if rocm_aiter_ops.is_tgemm_enabled():
+        from aiter.tuned_gemm import tgemm
+
+        return tgemm.mm(x, weight, bias)
+
     return torch.nn.functional.linear(x, weight, bias)
 
 
@@ -228,6 +234,19 @@ def dispatch_cpu_unquantized_gemm(
         layer.cpu_linear = torch.nn.functional.linear
         return
 
+    if layer.weight.ndim != 2:
+        # this is not a linear layer
+        # For now it should be a causal_conv1d op
+        if torch.cpu._is_amx_tile_supported():
+            # prepack conv weight
+            layer.weight.data = ops.causal_conv1d_weight_pack(
+                layer.weight.view(
+                    layer.weight.size(0),
+                    layer.weight.size(2),
+                )
+            )
+        return
+
     N, K = layer.weight.size()
     dtype = layer.weight.dtype
 
diff --git a/vllm/model_executor/layers/vocab_parallel_embedding.py b/vllm/model_executor/layers/vocab_parallel_embedding.py
index daaa86bed478..ddae01856da0 100644
--- a/vllm/model_executor/layers/vocab_parallel_embedding.py
+++ b/vllm/model_executor/layers/vocab_parallel_embedding.py
@@ -8,13 +8,17 @@
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter, UninitializedParameter
 
+import vllm.envs as envs
 from vllm.distributed import (
     divide,
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
 )
-from vllm.model_executor.custom_op import CustomOp
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.batch_invariant import (
+    linear_batch_invariant,
+)
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig,
     QuantizeMethodBase,
@@ -66,6 +70,8 @@ def apply(
         x: torch.Tensor,
         bias: torch.Tensor | None = None,
     ) -> torch.Tensor:
+        if envs.VLLM_BATCH_INVARIANT and current_platform.is_cuda_alike():
+            return linear_batch_invariant(x, layer.weight, bias)
         return dispatch_unquantized_gemm()(layer, x, layer.weight, bias)
 
     def embedding(self, layer: torch.nn.Module, input_: torch.Tensor) -> torch.Tensor:
@@ -182,8 +188,8 @@ def get_masked_input_and_mask(
 
 
 # --8<-- [start:vocab_parallel_embedding]
-@CustomOp.register("vocab_parallel_embedding")
-class VocabParallelEmbedding(CustomOp):
+@PluggableLayer.register("vocab_parallel_embedding")
+class VocabParallelEmbedding(PluggableLayer):
     """Embedding parallelized in the vocabulary dimension.
 
     Adapted from torch.nn.Embedding, note that we pad the vocabulary size to
@@ -461,7 +467,7 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
         param[: loaded_weight.shape[0]].data.copy_(loaded_weight)
         param[loaded_weight.shape[0] :].data.fill_(0)
 
-    def forward_native(self, input_):
+    def forward(self, input_):
         if self.tp_size > 1:
             # Build the mask.
             masked_input, input_mask = get_masked_input_and_mask(
@@ -483,9 +489,6 @@ def forward_native(self, input_):
         output = tensor_model_parallel_all_reduce(output_parallel)
         return output
 
-    def forward_cuda(self, input_):
-        return self.forward_native(input_)
-
     def extra_repr(self) -> str:
         s = f"num_embeddings={self.num_embeddings_per_partition}"
         s += f", embedding_dim={self.embedding_dim}"
@@ -496,7 +499,7 @@ def extra_repr(self) -> str:
 
 
 # --8<-- [start:parallel_lm_head]
-@CustomOp.register("parallel_lm_head")
+@PluggableLayer.register("parallel_lm_head")
 class ParallelLMHead(VocabParallelEmbedding):
     """Parallelized LM head.
 
diff --git a/vllm/model_executor/model_loader/__init__.py b/vllm/model_executor/model_loader/__init__.py
index 53b6b3221b54..3b5064ea7c75 100644
--- a/vllm/model_executor/model_loader/__init__.py
+++ b/vllm/model_executor/model_loader/__init__.py
@@ -13,6 +13,9 @@
 from vllm.model_executor.model_loader.default_loader import DefaultModelLoader
 from vllm.model_executor.model_loader.dummy_loader import DummyModelLoader
 from vllm.model_executor.model_loader.gguf_loader import GGUFModelLoader
+from vllm.model_executor.model_loader.modelexpress_loader import (
+    ModelExpressModelLoader,
+)
 from vllm.model_executor.model_loader.runai_streamer_loader import (
     RunaiModelStreamerLoader,
 )
@@ -37,6 +40,7 @@
     "gguf",
     "instanttensor",
     "mistral",
+    "modelexpress",
     "npcache",
     "pt",
     "runai_streamer",
@@ -54,6 +58,7 @@
     "gguf": GGUFModelLoader,
     "instanttensor": DefaultModelLoader,
     "mistral": DefaultModelLoader,
+    "modelexpress": ModelExpressModelLoader,
     "npcache": DefaultModelLoader,
     "pt": DefaultModelLoader,
     "runai_streamer": RunaiModelStreamerLoader,
@@ -150,6 +155,7 @@ def get_model(
     "BaseModelLoader",
     "BitsAndBytesModelLoader",
     "GGUFModelLoader",
+    "ModelExpressModelLoader",
     "DefaultModelLoader",
     "DummyModelLoader",
     "RunaiModelStreamerLoader",
diff --git a/vllm/model_executor/model_loader/base_loader.py b/vllm/model_executor/model_loader/base_loader.py
index d6c38664fde6..55a4dd4c28fc 100644
--- a/vllm/model_executor/model_loader/base_loader.py
+++ b/vllm/model_executor/model_loader/base_loader.py
@@ -65,12 +65,11 @@ def load_model(
 
             # Log peak GPU memory after loading weights. This is needed
             # to have test coverage on peak memory for online quantization.
-            if current_platform.is_cuda():
+            if current_platform.is_cuda_alike():
                 peak_memory = torch.accelerator.max_memory_allocated()
                 logger.debug_once(
                     "Peak GPU memory after loading weights: %s GiB",
                     format_gib(peak_memory),
-                    scope="local",
                 )
 
             # Process weights into kernel format. Note that when using online
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
index 5c9c97f4b64a..5b5632e319c9 100644
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -76,7 +76,11 @@ def __init__(self, load_config: LoadConfig):
         self.local_expert_ids: set[int] | None = None
 
         extra_config = load_config.model_loader_extra_config
-        allowed_keys = {"enable_multithread_load", "num_threads"}
+        allowed_keys = {
+            "enable_multithread_load",
+            "num_threads",
+            "enable_weights_track",
+        }
         unexpected_keys = set(extra_config.keys()) - allowed_keys
 
         if unexpected_keys:
@@ -86,6 +90,10 @@ def __init__(self, load_config: LoadConfig):
                 f"{unexpected_keys}"
             )
 
+        self.enable_weights_track: bool | None = extra_config.get(
+            "enable_weights_track", None
+        )
+
     def _prepare_weights(
         self,
         model_name_or_path: str,
@@ -248,6 +256,12 @@ def _get_weights_iterator(
                         self.load_config.use_tqdm_on_load,
                         self.load_config.safetensors_load_strategy,
                         local_expert_ids=self.local_expert_ids,
+                        safetensors_prefetch_num_threads=(
+                            self.load_config.safetensors_prefetch_num_threads
+                        ),
+                        safetensors_prefetch_block_size=(
+                            self.load_config.safetensors_prefetch_block_size
+                        ),
                     )
         else:
             if extra_config.get("enable_multithread_load"):
@@ -377,18 +391,44 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
 
         self._init_ep_weight_filter(model_config)
 
-        weights_to_load = {name for name, _ in model.named_parameters()}
         loaded_weights = model.load_weights(self.get_all_weights(model_config, model))
 
         self.counter_after_loading_weights = time.perf_counter()
         logger.info_once(
             "Loading weights took %.2f seconds",
             self.counter_after_loading_weights - self.counter_before_loading_weights,
-            scope="local",
         )
         # We only enable strict check for non-quantized models
-        # that have loaded weights tracking currently.
-        if model_config.quantization is None and loaded_weights is not None:
+        # that have loaded weights tracking by default.
+        default_enable_weights_track = (
+            model_config.quantization is None and loaded_weights is not None
+        )
+        enable_weights_track = (
+            self.enable_weights_track
+            if self.enable_weights_track is not None
+            else default_enable_weights_track
+        )
+        if enable_weights_track:
+            self.track_weights_loading(model, loaded_weights)
+
+    def track_weights_loading(
+        self, model: nn.Module, loaded_weights: set[str] | None
+    ) -> None:
+        weights_to_load = {name for name, _ in model.named_parameters()}
+        if loaded_weights is not None:
+            # ignore online quantization scales
+            for name, module in model.named_modules():
+                quant_method = getattr(module, "quant_method", None)
+                has_online_quant = getattr(quant_method, "uses_meta_device", False)
+                has_postprocess_quant = getattr(
+                    quant_method, "process_weights_after_loading", None
+                )
+                # ignore kv_cache scale and online quant scale,
+                # which can be missing in checkpoints
+                if has_online_quant or has_postprocess_quant:
+                    for param_name, _ in module.named_parameters():
+                        full_name = f"{name}.{param_name}" if name else param_name
+                        loaded_weights.add(full_name)
             weights_not_loaded = weights_to_load - loaded_weights
             if weights_not_loaded:
                 raise ValueError(
diff --git a/vllm/model_executor/model_loader/dummy_loader.py b/vllm/model_executor/model_loader/dummy_loader.py
index 5a8b5de6f553..98f82a5b7197 100644
--- a/vllm/model_executor/model_loader/dummy_loader.py
+++ b/vllm/model_executor/model_loader/dummy_loader.py
@@ -1,14 +1,22 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import torch
 import torch.nn as nn
 
 from vllm.config import ModelConfig
 from vllm.config.load import LoadConfig
+from vllm.model_executor.layers.quantization.base_config import QuantizeMethodBase
 from vllm.model_executor.model_loader.base_loader import BaseModelLoader
-from vllm.model_executor.model_loader.reload.meta import materialize_meta_tensor
+from vllm.model_executor.model_loader.reload.layerwise import (
+    _get_original_loader,
+    get_layerwise_info,
+)
+from vllm.model_executor.model_loader.reload.meta import materialize_layer
+from vllm.model_executor.model_loader.reload.types import LayerReloadingInfo
 from vllm.model_executor.model_loader.reload.utils import get_layer_tensors
-from vllm.model_executor.model_loader.weight_utils import initialize_dummy_weights
+from vllm.model_executor.model_loader.weight_utils import (
+    initialize_dummy_weights,
+    initialize_single_dummy_weight,
+)
 
 
 class DummyModelLoader(BaseModelLoader):
@@ -26,12 +34,31 @@ def download_model(self, model_config: ModelConfig) -> None:
         pass  # Nothing to download
 
     def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
-        # materialize meta tensors as part of online quantization lifecycle
         for layer in model.modules():
-            for name, param in get_layer_tensors(layer).items():
-                if param.device == torch.device("meta"):
-                    setattr(layer, name, materialize_meta_tensor(param))
+            info = get_layerwise_info(layer)
+            if info.can_load():
+                self._process_online_quant_layer(layer, info)
+            else:
+                # NOTE(woosuk): For accurate performance evaluation, we assign
+                # random values to the weights.
+                initialize_dummy_weights(layer, model_config)
 
-        # NOTE(woosuk): For accurate performance evaluation, we assign
-        # random values to the weights.
-        initialize_dummy_weights(model, model_config)
+    def _process_online_quant_layer(
+        self,
+        layer: nn.Module,
+        info: LayerReloadingInfo,
+    ) -> None:
+        """Materialize, apply dummy weights, and run quantization processing."""
+        materialize_layer(layer, info)
+
+        for tensor in get_layer_tensors(layer).values():
+            initialize_single_dummy_weight(tensor)
+
+        for param in get_layer_tensors(layer).values():
+            param.weight_loader = _get_original_loader(param)
+
+        quant_method = getattr(layer, "quant_method", None)
+        if isinstance(quant_method, QuantizeMethodBase):
+            quant_method.process_weights_after_loading(layer)
+
+        info.reset()
diff --git a/vllm/model_executor/model_loader/gguf_loader.py b/vllm/model_executor/model_loader/gguf_loader.py
index 75d0b3425460..6148caa98746 100644
--- a/vllm/model_executor/model_loader/gguf_loader.py
+++ b/vllm/model_executor/model_loader/gguf_loader.py
@@ -24,6 +24,7 @@
     get_gguf_extra_tensor_names,
     get_gguf_weight_type_map,
     gguf_quant_weights_iterator,
+    gguf_quant_weights_iterator_multi,
 )
 from vllm.transformers_utils.gguf_utils import detect_gguf_multimodal
 from vllm.utils.torch_utils import set_default_torch_dtype
@@ -56,7 +57,12 @@ def _prepare_weights(self, model_config: ModelConfig):
         # repo id/filename.gguf
         if "/" in model_name_or_path and model_name_or_path.endswith(".gguf"):
             repo_id, filename = model_name_or_path.rsplit("/", 1)
-            return hf_hub_download(repo_id=repo_id, filename=filename)
+            return hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                revision=model_config.revision,
+                cache_dir=self.load_config.download_dir,
+            )
         # repo_id:quant_type
         elif "/" in model_name_or_path and ":" in model_name_or_path:
             repo_id, quant_type = model_name_or_path.rsplit(":", 1)
@@ -74,6 +80,31 @@ def _prepare_weights(self, model_config: ModelConfig):
             "or <repo_id>:<quant_type>)"
         )
 
+    @staticmethod
+    def _get_all_gguf_files(model_path: str) -> list[str]:
+        """Discover all GGUF shard files from a single shard path.
+
+        Supports variable-width shard indices by dynamically detecting
+        the padding from the original filename.
+        E.g. ``*-00001-of-00005.gguf`` → all 5 shards,
+             ``*-01-of-15.gguf`` → all 15 shards.
+        """
+        match = re.search(r"-(\d+)-of-(\d+)\.gguf$", model_path)
+        if not match:
+            return [model_path]
+        total = int(match.group(2))
+        num_digits = len(match.group(1))
+        prefix = model_path[: match.start(1)]
+        suffix = model_path[match.end(2) :]
+        files = []
+        for i in range(1, total + 1):
+            shard_path = f"{prefix}{i:0{num_digits}d}-of-{total:0{num_digits}d}{suffix}"
+            if os.path.isfile(shard_path):
+                files.append(shard_path)
+        if files:
+            logger.info("Discovered %d GGUF shard files", len(files))
+        return files if files else [model_path]
+
     def _get_gguf_weights_map(self, model_config: ModelConfig):
         """
         GGUF uses this naming convention for their tensors from HF checkpoint:
@@ -145,6 +176,29 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
                         r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
                     )
                 )
+        if model_type == "minimax_m2":
+            model_type = "minimax-m2"
+            # GGUF layer map assumes merged expert weights
+            # map them manually like deepseek2
+            for idx in range(config.num_hidden_layers):
+                gguf_to_hf_name_map[f"blk.{idx}.exp_probs_b.bias"] = (
+                    f"model.layers.{idx}.block_sparse_moe.e_score_correction_bias"
+                )
+                gguf_to_hf_name_map[f"blk.{idx}.ffn_down_exps.weight"] = (
+                    f"model.layers.{idx}.block_sparse_moe.experts.0.w2.weight"
+                )
+                gguf_to_hf_name_map[f"blk.{idx}.ffn_gate_exps.weight"] = (
+                    f"model.layers.{idx}.block_sparse_moe.experts.0.w1.weight"
+                )
+                gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = (
+                    f"model.layers.{idx}.block_sparse_moe.experts.0.w3.weight"
+                )
+                sideload_params.append(
+                    re.compile(
+                        f"model\\.layers\\.{idx}"
+                        r"\.block_sparse_moe\.experts\.(gate_up_proj|down_proj)"
+                    )
+                )
 
         arch = None
         for key, value in gguf.MODEL_ARCH_NAMES.items():
@@ -190,6 +244,13 @@ def revert_hf_rename(name: str) -> str:
                 revert_hf_rename(name): tensor for name, tensor in state_dict.items()
             }
 
+        if model_type == "minimax-m2" and not hf_checkpoint_map:
+            # Reverse HF convention: mlp -> block_sparse_moe
+            state_dict = {
+                name.replace(".mlp.", ".block_sparse_moe."): tensor
+                for name, tensor in state_dict.items()
+            }
+
         def find_hf_name_in_tensor_map(hf_name: str) -> str | None:
             """
             Map HuggingFace parameter name to GGUF tensor name.
@@ -209,12 +270,24 @@ def find_hf_name_in_tensor_map(hf_name: str) -> str | None:
                 GGUF tensor name with suffix (e.g., 'mm.soft_emb_norm.weight')
                 or None if no mapping found
             """
+            # In transformers v5, multimodal models (e.g. Gemma3) wrap
+            # all sub-models under an outer 'model.' attribute, producing
+            # state_dict keys like 'model.language_model.layers.0...' and
+            # 'model.vision_tower.vision_model...'.  Strip this outer
+            # prefix so the keys match what gguf-py expects.
+            if is_multimodal and hf_name.startswith("model."):
+                hf_name = hf_name[6:]  # Remove outer 'model.'
+
             # Strip 'language_model.' prefix for multimodal models - gguf-py
             # tensor mappings expect parameter names without this prefix.
             # Note: 'model.' prefix should be KEPT for text-only models as
             # gguf-py expects it.
             if hf_name.startswith("language_model."):
                 hf_name = hf_name[15:]  # Remove 'language_model.'
+                # Re-add 'model.' prefix because gguf-py text tensor maps
+                # expect 'model.layers...' format.
+                if is_multimodal:
+                    hf_name = "model." + hf_name
 
             # Parse parameter name and suffix
             if hf_name.endswith((".weight", ".bias")):
@@ -277,9 +350,10 @@ def _get_gguf_weight_type(
         model_name_or_path: str,
         gguf_to_hf_name_map: dict[str, str],
     ) -> dict[str, str]:
-        weight_type_map = get_gguf_weight_type_map(
-            model_name_or_path, gguf_to_hf_name_map
-        )
+        gguf_files = self._get_all_gguf_files(model_name_or_path)
+        weight_type_map = {}
+        for f in gguf_files:
+            weight_type_map.update(get_gguf_weight_type_map(f, gguf_to_hf_name_map))
         is_multimodal = hasattr(model_config.hf_config, "vision_config")
         if is_multimodal:
             mmproj_file = detect_gguf_multimodal(model_name_or_path)
@@ -321,7 +395,15 @@ def _get_weights_iterator(
             )
             yield from gguf_quant_weights_iterator(mmproj_file, gguf_to_hf_name_map)
 
-        yield from gguf_quant_weights_iterator(model_name_or_path, gguf_to_hf_name_map)
+        gguf_files = self._get_all_gguf_files(model_name_or_path)
+        if len(gguf_files) > 1:
+            yield from gguf_quant_weights_iterator_multi(
+                gguf_files, gguf_to_hf_name_map
+            )
+        else:
+            yield from gguf_quant_weights_iterator(
+                model_name_or_path, gguf_to_hf_name_map
+            )
 
     def download_model(self, model_config: ModelConfig) -> None:
         self._prepare_weights(model_config)
@@ -340,9 +422,11 @@ def load_model(
         local_model_path = self._prepare_weights(model_config)
         gguf_weights_map = self._get_gguf_weights_map(model_config)
         # we can only know if tie word embeddings after mapping weights
-        if "lm_head.weight" in get_gguf_extra_tensor_names(
-            local_model_path, gguf_weights_map
-        ):
+        gguf_files = self._get_all_gguf_files(local_model_path)
+        all_extra_names = []
+        for f in gguf_files:
+            all_extra_names.extend(get_gguf_extra_tensor_names(f, gguf_weights_map))
+        if "lm_head.weight" in all_extra_names:
             model_config.hf_config.update({"tie_word_embeddings": True})
 
         weight_type_map = self._get_gguf_weight_type(
diff --git a/vllm/model_executor/model_loader/modelexpress_loader.py b/vllm/model_executor/model_loader/modelexpress_loader.py
new file mode 100644
index 000000000000..959506925b48
--- /dev/null
+++ b/vllm/model_executor/model_loader/modelexpress_loader.py
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import importlib
+
+from torch import nn
+
+from vllm.config import ModelConfig, VllmConfig
+from vllm.config.load import LoadConfig
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.tracing import instrument
+
+_MODELEXPRESS_LOADER_MODULE = "modelexpress.engines.vllm.loader"
+_MISSING_MODELEXPRESS_MODULES = frozenset(
+    {
+        "modelexpress",
+        "modelexpress.engines",
+        "modelexpress.engines.vllm",
+        _MODELEXPRESS_LOADER_MODULE,
+    }
+)
+
+
+def _missing_modelexpress_error() -> ImportError:
+    return ImportError(
+        "The 'modelexpress' load format requires the ModelExpress Python package. "
+        "Install it with `pip install modelexpress`."
+    )
+
+
+class ModelExpressModelLoader(BaseModelLoader):
+    """Thin vLLM loader wrapper for ModelExpress."""
+
+    def __init__(self, load_config: LoadConfig):
+        super().__init__(load_config)
+        self._loader = self._load_modelexpress_loader(load_config)
+
+    @staticmethod
+    def _load_modelexpress_loader(load_config: LoadConfig) -> BaseModelLoader:
+        try:
+            module = importlib.import_module(_MODELEXPRESS_LOADER_MODULE)
+        except ModuleNotFoundError as exc:
+            if exc.name not in _MISSING_MODELEXPRESS_MODULES:
+                raise
+            raise _missing_modelexpress_error() from exc
+
+        ModelExpressVllmLoader = module.MxModelLoader
+        return ModelExpressVllmLoader(load_config)
+
+    def download_model(self, model_config: ModelConfig) -> None:
+        self._loader.download_model(model_config)
+
+    def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
+        self._loader.load_weights(model, model_config)
+
+    @instrument(span_name="Load model")
+    def load_model(
+        self,
+        vllm_config: VllmConfig,
+        model_config: ModelConfig,
+        prefix: str = "",
+    ) -> nn.Module:
+        model = self._loader.load_model(
+            vllm_config=vllm_config,
+            model_config=model_config,
+            prefix=prefix,
+        )
+        return model.eval()
diff --git a/vllm/model_executor/model_loader/reload/__init__.py b/vllm/model_executor/model_loader/reload/__init__.py
index 56a9d88ac4e4..61dc1b66e6f3 100644
--- a/vllm/model_executor/model_loader/reload/__init__.py
+++ b/vllm/model_executor/model_loader/reload/__init__.py
@@ -8,10 +8,9 @@
 
 Limitations:
 1. Composition with CPU offloading has not been implemented
-2. Reloading Attention/MLA weights (q_scale, k_scale, v_scale) has not been implemented
-3. Tied parameters will only reflect processing from one of the parent layers (for
+2. Tied parameters will only reflect processing from one of the parent layers (for
    example, only processing from embed_tokens will have an effect)
-4. This design assumes that the number of weights loaded from disk is the same as the
+3. This design assumes that the number of weights loaded from disk is the same as the
    number of weights created at model init time. This is not true for quant methods
    which (1) pad weights or (2) load qkv weights into the same parameter. Both of these
    cases are non-issues for today's quant methods, but future quantizations may cause
diff --git a/vllm/model_executor/model_loader/reload/layerwise.py b/vllm/model_executor/model_loader/reload/layerwise.py
index 03dd81255645..40dd6dc9f39b 100644
--- a/vllm/model_executor/model_loader/reload/layerwise.py
+++ b/vllm/model_executor/model_loader/reload/layerwise.py
@@ -3,7 +3,7 @@
 import inspect
 from collections.abc import Callable
 from functools import wraps
-from weakref import WeakKeyDictionary
+from weakref import WeakKeyDictionary, WeakSet
 
 import torch
 
@@ -14,13 +14,20 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
 from .meta import (
+    SKIP_TENSORS,
     capture_layer_to_meta,
     get_numel_loaded,
     materialize_layer,
     restore_layer_on_meta,
 )
 from .types import LayerReloadingInfo
-from .utils import get_layer_params_buffers, get_layer_size, get_layer_tensors
+from .utils import (
+    get_info_size,
+    get_layer_params_buffers,
+    get_layer_size,
+    get_layer_tensors,
+    has_device_tensors,
+)
 
 logger = init_logger(__name__)
 
@@ -42,6 +49,9 @@
     WeakKeyDictionary()
 )
 
+# Global set used to track loading for logging purposes only
+LOADING_LAYERS: WeakSet[torch.nn.Module] = WeakSet()
+
 
 def get_layerwise_info(layer: torch.nn.Module) -> LayerReloadingInfo:
     """
@@ -124,6 +134,8 @@ def initialize_online_processing(layer: torch.nn.Module):
     # Wrap each parameter's weight loader
     # Note that nested wrapping will occur for shared tensors
     for name, tensor in get_layer_tensors(layer).items():
+        if name in SKIP_TENSORS:
+            continue
         if _get_weight_loader(tensor).__name__ != "online_process_loader":
             tensor.weight_loader = make_online_process_loader(layer, name)
 
@@ -171,11 +183,30 @@ def online_process_loader(*args, **kwargs):
             info.load_numel_total,
         )
 
+        # Do not online process attention layers, must wait until finalize
+        if isinstance(layer, (Attention, MLAAttention)):
+            return ret
+
+        # Log warnings allocating excessive buffers on device
+        if has_device_tensors(bound_args):
+            LOADING_LAYERS.add(layer)
+            if len(LOADING_LAYERS) >= 2:
+                names = sorted([layer.__class__.__name__ for layer in LOADING_LAYERS])
+                mem_used = sum(
+                    get_info_size(LAYERWISE_INFO[layer]) for layer in LOADING_LAYERS
+                )
+                logger.warning_once(
+                    "Allocating %.1f MB of device memory to buffers to load %s layers. "
+                    "This extra memory usage can be avoided by ordering weights "
+                    "by their parent layer when reloading.",
+                    mem_used / 1e6,
+                    str(list(names)),
+                )
+
         # Process and copy when all weights are loaded
-        if info.load_numel >= info.load_numel_total and not isinstance(  # type: ignore[operator]
-            layer, (Attention, MLAAttention)
-        ):
+        if info.load_numel >= info.load_numel_total:  # type: ignore[operator]
             _layerwise_process(layer, info)
+            LOADING_LAYERS.discard(layer)
 
         return ret
 
@@ -197,6 +228,8 @@ def finalize_layerwise_processing(model: torch.nn.Module, model_config: ModelCon
     if hasattr(model, "_original_do_torchao_reload"):
         model._do_torchao_reload = model._original_do_torchao_reload
 
+    deferred_attn: list[tuple[torch.nn.Module, LayerReloadingInfo]] = []
+
     for layer in model.modules():
         info = get_layerwise_info(layer)
         if not info.can_load():
@@ -205,28 +238,18 @@ def finalize_layerwise_processing(model: torch.nn.Module, model_config: ModelCon
 
         # Attention/MLA layers are processed after all other layers
         if isinstance(layer, (Attention, MLAAttention)):
-            if info.load_numel > 0:
-                raise NotImplementedError(
-                    "Layerwise reloading of Q/K/V scale weights is not implemented yet"
-                )
-
-            elif info.kernel_tensors is None:
-                raise NotImplementedError(
-                    "Layerwise loading of Q/K/V scale weights is not implemented yet"
-                )
-
-            else:
-                _place_kernel_tensors(layer, info)
-                layer.process_weights_after_loading(model_config.dtype)
+            deferred_attn.append((layer, info))
+            continue
 
         # No weights were loaded
-        elif info.load_numel <= 0:
-            # first load but received no weights. This happens on dummy load
+        if info.load_numel <= 0:
+            # first load: checkpoint did not contain weights for this layer
             if info.kernel_tensors is None:
-                materialize_layer(layer, info)
+                _layerwise_process(layer, info)
+                continue
 
             # reloading: place kernel tensors back as a fallback
-            else:
+            elif info.load_numel_total > 0:  # type: ignore[operator]
                 logger.warning("%s: Failed to load weights", layer.__class__.__name__)
                 _place_kernel_tensors(layer, info)
 
@@ -240,11 +263,60 @@ def finalize_layerwise_processing(model: torch.nn.Module, model_config: ModelCon
 
         info.reset()
 
+    # Process attention layers after all other layers are done
+    for layer, info in deferred_attn:
+        _finalize_attention_layer(layer, info, model_config)
+        info.reset()
+
+    LOADING_LAYERS.clear()
+
 
 def finalize_layerwise_reload(*args, **kwargs):
     finalize_layerwise_processing(*args, **kwargs)
 
 
+def _finalize_attention_layer(
+    layer: torch.nn.Module, info: LayerReloadingInfo, model_config: ModelConfig
+) -> None:
+    if info.load_numel > 0 and info.kernel_tensors is not None:
+        # Reload with new scale weights from checkpoint
+        _place_kernel_tensors(layer, info)
+        _reload_attention_scales(layer, info)
+    elif info.load_numel > 0 or info.kernel_tensors is None:
+        raise ValueError(
+            "Layerwise loading of attention layers is not supported. "
+            "Attention must always process after linears."
+        )
+    else:
+        _place_kernel_tensors(layer, info)
+    layer.process_weights_after_loading(model_config.dtype)
+
+
+def _reload_attention_scales(layer: torch.nn.Module, info: LayerReloadingInfo) -> None:
+    """Load and process attention scale weights (k_scale, v_scale, etc.)
+    during reload.
+
+    Assumes dtype/shapes of attention tensors do not change during
+    processing, since we use .data.copy_() to preserve kernel tensor
+    references."""
+    quant_method = getattr(layer, "quant_method", None)
+    if quant_method is None:
+        return
+
+    # Re-create scale Parameters with sentinel values so unloaded scales
+    # are correctly detected by process_weights_after_loading
+    quant_method.create_weights(layer)
+
+    for name, args in info.loaded_weights:
+        param = getattr(layer, name)
+        args.arguments["param"] = param
+        _get_weight_loader(param)(*args.args, **args.kwargs)
+
+    quant_method.process_weights_after_loading(layer)
+
+    _copy_and_restore_kernel_tensors(layer, info)
+
+
 def _layerwise_process(layer: torch.nn.Module, info: LayerReloadingInfo):
     """
     Finalize layer loading after all weights have been buffered.
@@ -274,7 +346,6 @@ def _layerwise_process(layer: torch.nn.Module, info: LayerReloadingInfo):
         param.weight_loader(*args.args, **args.kwargs)
 
     # Process weights (quantization, repacking, etc.)
-    # Attention/MLA are processed in `finalize_layerwise_reload`
     quant_method = getattr(layer, "quant_method", None)
     if isinstance(quant_method, QuantizeMethodBase):
         quant_method.process_weights_after_loading(layer)
@@ -282,13 +353,7 @@ def _layerwise_process(layer: torch.nn.Module, info: LayerReloadingInfo):
     # Copy processed values into original tensor storage (preserves cudagraph refs)
     # this code is a no-op if not reloading (because kernel tensors is empty)
     if info.kernel_tensors is not None:
-        parameters, buffers = info.kernel_tensors
-        for name, param in parameters.items():
-            param.data.copy_(getattr(layer, name))
-        for name, buffer in buffers.items():
-            buffer.data.copy_(getattr(layer, name))
-
-        _place_kernel_tensors(layer, info)
+        _copy_and_restore_kernel_tensors(layer, info)
 
     info.reset()
     logger.debug("%s: Processed", layer.__class__.__name__)
@@ -307,6 +372,21 @@ def _get_weight_loader(tensor: torch.Tensor):
     return getattr(tensor, "weight_loader", default_weight_loader)
 
 
+def _copy_and_restore_kernel_tensors(layer: torch.nn.Module, info: LayerReloadingInfo):
+    """Copy processed values into original kernel tensor storage and restore
+    kernel tensor references on the layer. Preserves cudagraph references."""
+    assert info.kernel_tensors is not None
+    parameters, buffers = info.kernel_tensors
+    for name, param in parameters.items():
+        param.data.copy_(getattr(layer, name))
+    for name, buffer in buffers.items():
+        if name not in layer._buffers:
+            continue
+        buffer.data.copy_(getattr(layer, name))
+
+    _place_kernel_tensors(layer, info)
+
+
 def _place_kernel_tensors(layer: torch.nn.Module, info: LayerReloadingInfo):
     for name in get_layer_tensors(layer):
         delattr(layer, name)
diff --git a/vllm/model_executor/model_loader/reload/meta.py b/vllm/model_executor/model_loader/reload/meta.py
index 82bf9ce3d2af..397a458cbdd2 100644
--- a/vllm/model_executor/model_loader/reload/meta.py
+++ b/vllm/model_executor/model_loader/reload/meta.py
@@ -4,6 +4,7 @@
 from collections.abc import Callable
 
 import torch
+from torch.nn.parameter import UninitializedParameter
 from torch.utils._python_dispatch import TorchDispatchMode
 
 from .sanitize import restore_layer_refs, sanitize_layer_refs
@@ -27,6 +28,7 @@
     "expert_global_to_physical",
     "expert_physical_to_global",
     "expert_local_to_global",
+    "e_score_correction_bias",
 }
 
 
@@ -54,11 +56,45 @@ def materialize_meta_tensor(meta_tensor: torch.Tensor) -> torch.Tensor:
     return tensor
 
 
+def _is_non_persistent_parameter_alias_buffer(
+    layer: torch.nn.Module,
+    name: str,
+    buffer: torch.Tensor,
+    parameter_storage_ptrs: set[int],
+) -> bool:
+    if name not in layer._non_persistent_buffers_set:
+        return False
+
+    buffer_storage_ptr = _tensor_storage_ptr(buffer)
+    return (
+        buffer_storage_ptr is not None and buffer_storage_ptr in parameter_storage_ptrs
+    )
+
+
+def _tensor_storage_ptr(tensor: torch.Tensor) -> int | None:
+    if isinstance(tensor, UninitializedParameter):
+        return None
+
+    try:
+        return tensor.untyped_storage().data_ptr()
+    except (RuntimeError, ValueError):
+        return None
+
+
+def _parameter_storage_ptrs(layer: torch.nn.Module) -> set[int]:
+    return {
+        storage_ptr
+        for param in layer.parameters(recurse=True)
+        if (storage_ptr := _tensor_storage_ptr(param)) is not None
+    }
+
+
 def capture_layer_to_meta(layer: torch.nn.Module) -> LayerTensors:
     if layer.__class__.__name__ in SKIP_MODULES:
         return ({}, {})
 
     params, buffers = get_layer_params_buffers(layer)
+    parameter_storage_ptrs = _parameter_storage_ptrs(layer)
     return (
         {
             name: sanitize_layer_refs(to_meta_tensor(param), layer)
@@ -69,6 +105,9 @@ def capture_layer_to_meta(layer: torch.nn.Module) -> LayerTensors:
             name: sanitize_layer_refs(to_meta_tensor(buffer), layer)
             for name, buffer in buffers.items()
             if name not in SKIP_TENSORS
+            and not _is_non_persistent_parameter_alias_buffer(
+                layer, name, buffer, parameter_storage_ptrs
+            )
         },
     )
 
@@ -101,7 +140,7 @@ def materialize_layer(layer: torch.nn.Module, info: LayerReloadingInfo):
 
     with info.restore_device:
         for name, tensor in get_layer_tensors(layer).items():
-            if name not in SKIP_TENSORS:
+            if name not in SKIP_TENSORS and tensor.is_meta:
                 setattr(layer, name, materialize_meta_tensor(tensor))
 
 
diff --git a/vllm/model_executor/model_loader/reload/utils.py b/vllm/model_executor/model_loader/reload/utils.py
index 463ff6422213..7a3d6873e101 100644
--- a/vllm/model_executor/model_loader/reload/utils.py
+++ b/vllm/model_executor/model_loader/reload/utils.py
@@ -1,14 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from inspect import BoundArguments
+
 import torch
 
-from .types import LayerTensors
+from .types import LayerReloadingInfo, LayerTensors
 
 __all__ = [
     "get_layer_tensors",
     "get_layer_params_buffers",
     "get_layer_size",
+    "has_device_tensors",
+    "get_info_size",
 ]
 
 
@@ -39,3 +43,31 @@ def get_layer_size(layer: torch.nn.Module) -> int:
         for name, tensor in get_layer_tensors(layer).items()
         if name not in SKIP_TENSORS
     )
+
+
+def has_device_tensors(bound_args: BoundArguments) -> bool:
+    """
+    Return True if the loaded weights exist on an accelerator device
+
+    :param bound_args: args to load weights
+    :return: True if weights are on accelerator device
+    """
+    return any(
+        isinstance(value, torch.Tensor) and value.device.type not in ("meta", "cpu")
+        for value in bound_args.arguments.values()
+    )
+
+
+def get_info_size(info: LayerReloadingInfo) -> int:
+    """
+    Calculate the number of bytes used by loaded weights for a given layer
+
+    :param info: layerwise info to get size of
+    :return: number of bytes used by loaded weights
+    """
+    return sum(
+        value.nbytes
+        for _, args in info.loaded_weights
+        for value in args.arguments.values()
+        if isinstance(value, torch.Tensor) and value.device.type not in ("meta", "cpu")
+    )
diff --git a/vllm/model_executor/model_loader/sharded_state_loader.py b/vllm/model_executor/model_loader/sharded_state_loader.py
index a87731e8bc0b..3f57fe7e0265 100644
--- a/vllm/model_executor/model_loader/sharded_state_loader.py
+++ b/vllm/model_executor/model_loader/sharded_state_loader.py
@@ -31,8 +31,8 @@ class ShardedStateLoader(BaseModelLoader):
     Model loader that directly loads each worker's model state dict, which
     enables a fast load path for large tensor-parallel models where each worker
     only needs to read its own shard rather than the entire checkpoint. See
-    `examples/offline_inference/save_sharded_state.py` for creating a sharded
-    checkpoint.
+    `examples/features/sharded_state/save_sharded_state_offline.py` for creating
+    a sharded checkpoint.
     """
 
     DEFAULT_PATTERN = "model-rank-{rank}-part-{part}.safetensors"
@@ -157,7 +157,6 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         logger.info_once(
             "Loading weights took %.2f seconds",
             counter_after_loading_weights - counter_before_loading_weights,
-            scope="local",
         )
         if state_dict:
             raise ValueError(f"Missing keys {tuple(state_dict)} in loaded state!")
diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py
index 3e6ed248ff3a..37d37d55f543 100644
--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -211,7 +211,7 @@ class TensorizerConfig(MutableMapping):
         encryption_keyfile: File path to a binary file containing a  
             binary key to use for decryption. `None` (the default) means 
             no decryption. See the example script in 
-            examples/others/tensorize_vllm_model.py. 
+            examples/features/tensorize_vllm_model.py. 
         s3_access_key_id: The access key for the S3 bucket. Can also be set via
             the S3_ACCESS_KEY_ID environment variable.
         s3_secret_access_key: The secret access key for the S3 bucket. Can also
@@ -579,7 +579,7 @@ def tensorizer_weights_iterator(
         "loading on vLLM, as tensorizer is forced to load to CPU. "
         "Consider deserializing a vLLM model instead for faster "
         "load times. See the "
-        "examples/others/tensorize_vllm_model.py example script "
+        "examples/features/tensorize_vllm_model.py example script "
         "for serializing vLLM models."
     )
 
diff --git a/vllm/model_executor/model_loader/tensorizer_loader.py b/vllm/model_executor/model_loader/tensorizer_loader.py
index c5bff1312932..338f9eac072a 100644
--- a/vllm/model_executor/model_loader/tensorizer_loader.py
+++ b/vllm/model_executor/model_loader/tensorizer_loader.py
@@ -73,7 +73,7 @@ def _load_model_serialized_cpu(
         """Load a serialized model with tensorizer to the CPU.
 
         This is only necessary when the model isn't vLLM-tensorized (see
-        examples/others/tensorize_vllm_model.py) This should still
+        examples/features/tensorize_vllm_model.py) This should still
         be faster than default HuggingFace loading, but will be slower than
         loading a vLLM-tensorized model.
         """
@@ -104,7 +104,7 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         """Load serialized model weights with tensorizer.
 
         Expects a vLLM-tensorized model. See the
-        examples/others/tensorize_vllm_model.py example script
+        examples/features/tensorize_vllm_model.py example script
         for serializing vLLM models."""
         if is_vllm_tensorized(self.tensorizer_config):
             tensorizer_config = self._patch_tensorizer_config(model_config)
diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py
index 8f370717d818..2a5f746d783e 100644
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -15,7 +15,11 @@
 import vllm.envs as envs
 from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
-from vllm.model_executor.layers.attention import Attention, MLAAttention
+from vllm.model_executor.layers.attention import (
+    Attention,
+    MLAAttention,
+    MMEncoderAttention,
+)
 from vllm.model_executor.layers.quantization.base_config import (
     QuantizationConfig,
     QuantizeMethodBase,
@@ -106,12 +110,12 @@ def process_weights_after_loading(
             with device_loading_context(module, target_device):
                 quant_method.process_weights_after_loading(module)
 
-    # Initialize post-load attention weights for both Attention and MLA.
+    # Initialize post-load attention weights for Attention, MLA, and MM encoder.
     # NOTE: Happens after other modules so we can easily decompress weights.
     for _, module in model.named_modules():
-        if isinstance(module, (Attention, MLAAttention)) and hasattr(
-            module, "process_weights_after_loading"
-        ):
+        if isinstance(
+            module, (Attention, MLAAttention, MMEncoderAttention)
+        ) and hasattr(module, "process_weights_after_loading"):
             # TODO(lucas): see if there is a way to unify the signatures
             # of process_weights_after_loading
             with device_loading_context(module, target_device):
@@ -175,7 +179,7 @@ def device_loading_context(module: torch.nn.Module, target_device: torch.device)
 def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module], str]:
     from vllm.model_executor.models.adapters import as_embedding_model, as_seq_cls_model
 
-    architectures = getattr(model_config.hf_config, "architectures", [])
+    architectures = getattr(model_config.hf_config, "architectures", None) or []
 
     model_cls, arch = model_config.registry.resolve_model_cls(
         architectures,
@@ -215,7 +219,7 @@ def get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module],
             model_config.runner_type,
             model_config.trust_remote_code,
             model_config.model_impl,
-            tuple(getattr(model_config.hf_config, "architectures", [])),
+            tuple(getattr(model_config.hf_config, "architectures", None) or []),
         )
     )
     if key in _MODEL_ARCH_BY_HASH:
diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py
index 37023d3f1f5c..de15dc3d53cd 100644
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -30,7 +30,11 @@
 
 from vllm import envs
 from vllm.config import ModelConfig
-from vllm.config.load import LoadConfig
+from vllm.config.load import (
+    DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE,
+    DEFAULT_SAFETENSORS_PREFETCH_NUM_THREADS,
+    LoadConfig,
+)
 from vllm.distributed import get_tensor_model_parallel_rank, get_world_group
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import (
@@ -239,10 +243,12 @@ def convert_bin_to_safetensor_file(
     sf_size = os.stat(sf_filename).st_size
     pt_size = os.stat(pt_filename).st_size
     if (sf_size - pt_size) / pt_size > 0.01:
-        raise RuntimeError(f"""The file size different is more than 1%:
+        raise RuntimeError(
+            f"""The file size different is more than 1%:
          - {sf_filename}: {sf_size}
          - {pt_filename}: {pt_size}
-         """)
+         """
+        )
 
     # check if the tensors are the same
     reloaded = load_file(sf_filename)
@@ -294,6 +300,11 @@ def get_quant_config(
         )
 
     if hf_quant_config is not None:
+        # `model_config.quantization_config` may be set alongside a checkpoint
+        # quant config: the checkpoint determines `quant_cls`, and the user's
+        # QuantizationConfigArgs is consulted by individual quant methods
+        # (e.g. for activation overrides via the MXFP4 oracle).
+
         # For modelopt_mixed, config.json's quantization_config may or may
         # not contain the per-layer quantized_layers map.  Newer checkpoints
         # embed it directly; older ones keep it only in hf_quant_config.json.
@@ -335,6 +346,17 @@ def get_quant_config(
                 f"{quant_cls}"
             )
 
+    # Online quantization doesn't read from checkpoint configs - it quantizes
+    # fp16/bf16 weights on the fly during loading.
+    if model_config.quantization_config is not None:
+        from vllm.config.quantization import QuantizationConfigArgs
+        from vllm.model_executor.layers.quantization.online.base import (
+            OnlineQuantizationConfig,
+        )
+
+        assert isinstance(model_config.quantization_config, QuantizationConfigArgs)
+        return OnlineQuantizationConfig(args=model_config.quantization_config)
+
     # Inflight BNB quantization
     if model_config.quantization == "bitsandbytes":
         return quant_cls.from_config({})
@@ -729,31 +751,24 @@ def np_cache_weights_iterator(
         yield name, torch.from_numpy(param)
 
 
-def _checkpoints_fit_in_ram(files: list[str], threshold: float = 0.9) -> bool:
-    """Return True if total size of *files* fits within *threshold* of available RAM."""
+def _get_checkpoints_size_bytes(files: list[str]) -> int:
+    """Return the total size of the checkpoint files in bytes."""
     if not files:
-        return True
+        return 0
+    return sum(os.path.getsize(f) for f in files)
+
+
+def _get_available_ram_bytes() -> int:
+    """Return the available RAM in bytes."""
     import psutil
 
-    total_size = sum(os.path.getsize(f) for f in files)
-    available_ram = psutil.virtual_memory().available
-    fits = total_size <= threshold * available_ram
-    if not fits:
-        logger.warning(
-            "NFS detected but checkpoint total size (%.2f GiB) exceeds "
-            "%.0f%% of available RAM (%.2f GiB). Skipping prefetching checkpoints.",
-            total_size / (1024**3),
-            threshold * 100,
-            available_ram / (1024**3),
-        )
-    return fits
+    return psutil.virtual_memory().available
 
 
-def _is_nfs_path(files: list[str]) -> bool:
-    """Check whether the first file in *files* resides on an NFS
-    filesystem (Linux only)."""
+def _get_fs_type(files: list[str]) -> str:
+    """Get the filesystem type of the first file in *files* (Linux only)."""
     if not files:
-        return False
+        return ""
     try:
         # Only the first file is checked — all checkpoint shards reside
         # in the same directory and therefore on the same filesystem.
@@ -776,48 +791,64 @@ def _is_nfs_path(files: list[str]) -> bool:
                 ) and len(mount_point) > len(best_mount):
                     best_mount = mount_point
                     best_fstype = fstype
-        return best_fstype in ("nfs", "nfs4")
+        return best_fstype
     except Exception:
         # /proc/mounts is Linux-specific; on other OSes (or if the read
-        # fails for any reason) we fall back to "not NFS" rather than
-        # crashing model loading.
-        return False
+        # fails for any reason) we fall back to an empty string.
+        return ""
 
 
-def _prefetch_checkpoint(file_path: str) -> None:
+def _prefetch_checkpoint(
+    file_path: str,
+    block_size: int = DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE,
+) -> None:
     """Prefetch a checkpoint file into the OS page cache.
 
-    Reads the file in 16MB blocks so the kernel caches its pages before
-    workers load the same file.
+    Reads the file in blocks so the kernel caches its pages before workers load
+    the same file.
     """
-    block_size = 16 * 1024 * 1024  # 16MB
+    if block_size < 1:
+        raise ValueError("safetensors prefetch block size must be >= 1")
+
     with open(file_path, "rb") as f:
         while f.read(block_size):
             pass
 
 
-def _prefetch_all_checkpoints(sorted_files: list[str]) -> None:
+def _prefetch_all_checkpoints(
+    sorted_files: list[str],
+    num_prefetch_threads: int = DEFAULT_SAFETENSORS_PREFETCH_NUM_THREADS,
+    block_size: int = DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE,
+) -> None:
     """Start prefetching checkpoint files into page cache in a background thread."""
+    if num_prefetch_threads < 1:
+        raise ValueError("safetensors prefetch num threads must be >= 1")
+    if block_size < 1:
+        raise ValueError("safetensors prefetch block size must be >= 1")
+
     if torch.distributed.is_initialized():
         rank = torch.distributed.get_rank()
         world_size = torch.distributed.get_world_size()
     else:
         rank = 0
         world_size = 1
-    num_prefetch_threads = 8
     paths_to_prefetch = sorted_files[rank::world_size]
     total_for_rank = len(paths_to_prefetch)
 
     async def _prefetch_all() -> None:
-        semaphore = asyncio.Semaphore(num_prefetch_threads)
+        loop = asyncio.get_running_loop()
         completed = 0
         next_log_pct = 10
 
-        async def prefetch_one(path: str) -> None:
+        async def prefetch_one(
+            path: str,
+            executor: concurrent.futures.ThreadPoolExecutor,
+        ) -> None:
             nonlocal completed, next_log_pct
             try:
-                async with semaphore:
-                    await asyncio.to_thread(_prefetch_checkpoint, path)
+                await loop.run_in_executor(
+                    executor, _prefetch_checkpoint, path, block_size
+                )
                 completed += 1
                 if total_for_rank > 0 and next_log_pct <= 100:
                     pct = 100 * completed / total_for_rank
@@ -834,7 +865,12 @@ async def prefetch_one(path: str) -> None:
                     "Failed to prefetch checkpoint file %r.", path, exc_info=True
                 )
 
-        await asyncio.gather(*(prefetch_one(p) for p in paths_to_prefetch))
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=num_prefetch_threads
+        ) as executor:
+            await asyncio.gather(
+                *(prefetch_one(p, executor) for p in paths_to_prefetch)
+            )
 
     def _run_prefetch() -> None:
         start = time.perf_counter()
@@ -845,7 +881,12 @@ def _run_prefetch() -> None:
             elapsed,
         )
 
-    logger.info("Prefetching checkpoint files into page cache started (in background)")
+    logger.info(
+        "Prefetching checkpoint files into page cache started "
+        "(in background, num_threads=%d, block_size=%d bytes)",
+        num_prefetch_threads,
+        block_size,
+    )
     threading.Thread(target=_run_prefetch, daemon=True).start()
 
 
@@ -854,6 +895,9 @@ def safetensors_weights_iterator(
     use_tqdm_on_load: bool,
     safetensors_load_strategy: str | None = None,
     local_expert_ids: set[int] | None = None,
+    *,
+    safetensors_prefetch_num_threads: int = DEFAULT_SAFETENSORS_PREFETCH_NUM_THREADS,
+    safetensors_prefetch_block_size: int = DEFAULT_SAFETENSORS_PREFETCH_BLOCK_SIZE,
 ) -> Generator[tuple[str, torch.Tensor], None, None]:
     """Iterate over the weights in the model safetensor files.
 
@@ -867,13 +911,69 @@ def safetensors_weights_iterator(
 
     sorted_files = sorted(hf_weights_files, key=_natural_sort_key)
 
-    should_prefetch = safetensors_load_strategy == "prefetch" or (
-        safetensors_load_strategy is None
-        and _is_nfs_path(sorted_files)
-        and _checkpoints_fit_in_ram(sorted_files)
+    fs_type = _get_fs_type(sorted_files)
+    is_net_fs = fs_type in ("nfs", "nfs4", "lustre")
+    total_bytes = _get_checkpoints_size_bytes(sorted_files)
+    avail_bytes = _get_available_ram_bytes()
+    ram_threshold_pct = 90
+    fits_in_ram = total_bytes <= (ram_threshold_pct / 100.0) * avail_bytes
+    fs_name = fs_type.upper() if fs_type else "unknown"
+
+    logger.info_once(
+        "Filesystem type for checkpoints: %s. Checkpoint size: %.2f GiB. "
+        "Available RAM: %.2f GiB.",
+        fs_name,
+        total_bytes / 1024**3,
+        avail_bytes / 1024**3,
     )
+
+    should_prefetch = safetensors_load_strategy == "prefetch"
+    if safetensors_load_strategy is None:
+        if is_net_fs and fits_in_ram:
+            should_prefetch = True
+        elif is_net_fs and not fits_in_ram:
+            logger.warning_once(
+                "Network filesystem (%s) detected but checkpoint total size "
+                "(%.2f GiB) exceeds %d%% of available RAM (%.2f GiB). "
+                "Skipping auto-prefetch.",
+                fs_name,
+                total_bytes / 1024**3,
+                ram_threshold_pct,
+                avail_bytes / 1024**3,
+            )
+        elif not is_net_fs and fits_in_ram:
+            logger.info_once(
+                "Auto-prefetch is disabled because the filesystem (%s) is not a "
+                "recognized network FS (NFS/Lustre). If you want to force "
+                "prefetching, start vLLM with --safetensors-load-strategy=prefetch.",
+                fs_name,
+            )
+        elif not is_net_fs and not fits_in_ram:
+            logger.info_once(
+                "Auto-prefetch is disabled because the filesystem (%s) is not a "
+                "recognized network FS (NFS/Lustre) and the checkpoint size "
+                "(%.2f GiB) exceeds %d%% of available RAM (%.2f GiB).",
+                fs_name,
+                total_bytes / 1024**3,
+                ram_threshold_pct,
+                avail_bytes / 1024**3,
+            )
+    elif should_prefetch and not fits_in_ram:
+        logger.warning_once(
+            "safetensors_load_strategy='prefetch' was explicitly specified, but "
+            "checkpoint total size (%.2f GiB) exceeds %d%% of available RAM "
+            "(%.2f GiB). This may cause out-of-memory errors.",
+            total_bytes / 1024**3,
+            ram_threshold_pct,
+            avail_bytes / 1024**3,
+        )
+
     if should_prefetch:
-        _prefetch_all_checkpoints(sorted_files)
+        _prefetch_all_checkpoints(
+            sorted_files,
+            num_prefetch_threads=safetensors_prefetch_num_threads,
+            block_size=safetensors_prefetch_block_size,
+        )
 
     leftover_state_dict: dict[str, torch.Tensor] = {}
     for st_file in tqdm(
@@ -1222,6 +1322,49 @@ def gguf_quant_weights_iterator(
             yield name, param
 
 
+def gguf_quant_weights_iterator_multi(
+    gguf_files: list[str], gguf_to_hf_name_map: dict[str, str]
+) -> Generator[tuple[str, torch.Tensor], None, None]:
+    """
+    Iterate over the quant weights across multiple GGUF shard files
+    and convert them to torch tensors.
+
+    Like gguf_quant_weights_iterator, we yield all weight types first
+    before yielding any weights data to avoid issues with packed layers
+    that have different quant types.
+    """
+    readers = [gguf.GGUFReader(f) for f in gguf_files]
+
+    # First pass: yield all weight types across all shards
+    for reader in readers:
+        for tensor in reader.tensors:
+            if tensor.name in gguf_to_hf_name_map:
+                weight_type = tensor.tensor_type
+                name = gguf_to_hf_name_map[tensor.name]
+                if weight_type.name not in ("F32", "BF16", "F16"):
+                    weight_type_name = name.replace("weight", "qweight_type")
+                    weight_type = torch.tensor(weight_type)
+                    yield weight_type_name, weight_type
+
+    # Second pass: yield all weight data across all shards
+    for reader in readers:
+        for tensor in reader.tensors:
+            if tensor.name in gguf_to_hf_name_map:
+                weight = tensor.data
+                weight_type = tensor.tensor_type
+                name = gguf_to_hf_name_map[tensor.name]
+                if weight_type.name not in ("F32", "BF16", "F16"):
+                    name = name.replace("weight", "qweight")
+                if weight_type.name == "BF16" and tensor.data.dtype == np.uint8:
+                    weight = weight.view(np.uint16)
+                    if reader.byte_order == "S":
+                        weight = weight.byteswap()
+                    param = torch.tensor(weight).view(torch.bfloat16)
+                else:
+                    param = torch.tensor(weight)
+                yield name, param
+
+
 def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
     """convert PySafeSlice object from safetensors to torch.Tensor
 
@@ -1243,8 +1386,8 @@ def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> N
         if param.numel() == 1 and loaded_weight.numel() == 1:
             # Sometimes scalar values aren't considered tensors with shapes
             # so if both param and loaded_weight are a scalar,
-            # "broadcast" instead of copy
-            param.data.fill_(loaded_weight.item())
+            # reshape to match before copying
+            param.data.copy_(loaded_weight.view(param.shape))
         else:
             assert param.size() == loaded_weight.size(), (
                 f"Attempted to load weight ({loaded_weight.size()}) "
@@ -1334,6 +1477,9 @@ def initialize_single_dummy_weight(
     high: float = 1e-3,
     seed: int = 1234,
 ) -> None:
+    if param.device.type == "meta":
+        return  # deferred to finalize_layerwise_processing (e.g. online quant)
+
     if not torch.is_floating_point(param):
         if current_platform.is_rocm():
             # On ROCm, integer params (e.g. GPTQ qweight/qzeros) are left
@@ -1438,6 +1584,11 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
         # NemotronH format: .mixer.{k,v}_proj.{k,v}_scale ->
         # .mixer.attn.{k,v}_scale
         (r"\.mixer\.[kv]_proj\.([kv])_scale$", r".mixer.attn.\1_scale"),
+        # HYV3 format: .self_attn.q.scale -> .self_attn.attn.q_scale
+        (r"\.self_attn\.q\.scale$", r".self_attn.attn.q_scale"),
+        # HYV3 format: .self_attn.{k,v}_cache.scale ->
+        # .self_attn.attn.{k,v}_scale
+        (r"\.self_attn\.([kv])_cache\.scale$", r".self_attn.attn.\1_scale"),
         # Default format: .{k,v}_scale -> .attn.{k,v}_scale
         (r"\.([qkv])_scale$", r".attn.\1_scale"),
         (r"\.([qkv])_zero_point$", r".attn.\1_zero_point"),
@@ -1452,6 +1603,9 @@ def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
             ".k_zero_point",
             ".v_zero_point",
             ".q_zero_point",
+            ".q.scale",
+            ".k_cache.scale",
+            ".v_cache.scale",
         )
     ):
         import regex as re
diff --git a/vllm/model_executor/models/AXK1.py b/vllm/model_executor/models/AXK1.py
index f5ed4400fb65..701ec67c855c 100644
--- a/vllm/model_executor/models/AXK1.py
+++ b/vllm/model_executor/models/AXK1.py
@@ -42,7 +42,10 @@
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -74,6 +77,7 @@
 
 from .interfaces import MixtureOfExperts, SupportsEagle, SupportsLoRA, SupportsPP
 from .utils import (
+    AutoWeightsLoader,
     PPMissingLayer,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -100,7 +104,7 @@ def __init__(
         self.tp_size = get_tensor_model_parallel_world_size()
         self.tp_rank = get_tensor_model_parallel_rank()
 
-        self.routed_scaling_factor = config.routed_scaling_factor
+        self.routed_scaling_factor = getattr(config, "routed_scaling_factor", 1.0)
 
         self.ep_group = get_ep_group().device_group
         self.ep_rank = get_ep_group().rank_in_group
@@ -163,14 +167,13 @@ def __init__(
                 prefix=f"{prefix}.shared_experts",
             )
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             gate=self.gate,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -180,9 +183,8 @@ def __init__(
             scoring_func=config.scoring_func,
             # we do scaling outside, set factor to 1.0 to avoid double mul
             # aiter applies routed_scaling_factor internally
-            routed_scaling_factor=1.0
-            if not self.is_rocm_aiter_moe_enabled
-            else self.routed_scaling_factor,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=not self.is_rocm_aiter_moe_enabled,
             e_score_correction_bias=self.gate.e_score_correction_bias,
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts,
@@ -204,43 +206,20 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             hidden_states = sequence_parallel_chunk(hidden_states)
 
         if self.experts.is_internal_router:
-            # In this case, the gate/router runs inside the FusedMoE class
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=hidden_states
             )
         else:
-            # router_logits: (num_tokens, n_experts)
             router_logits, _ = self.gate(hidden_states)
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=router_logits
             )
 
-        shared_output, final_hidden_states = fused_moe_out
-        if self.shared_experts is None:
-            assert shared_output is None
-
-        # Fix FP16 overflow
-        # See AXK1DecoderLayer for more details.
-        if hidden_states.dtype != torch.float16:
-            if not self.is_rocm_aiter_moe_enabled:
-                final_hidden_states *= self.routed_scaling_factor
-        elif self.shared_experts is not None:
-            assert shared_output is not None
-            shared_output *= 1.0 / self.routed_scaling_factor
-
-        if self.shared_experts is not None:
-            assert shared_output is not None
-            final_hidden_states += shared_output
-
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
 
         return final_hidden_states.view(num_tokens, hidden_dim)
 
@@ -749,6 +728,13 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
             ["hidden_states", "residual"], config.hidden_size
         )
+        self.use_mha = all(
+            dim == 0 for dim in (config.qk_nope_head_dim, config.qk_rope_head_dim)
+        )
+        self.fuse_qkv_a_proj = config.q_lora_rank is not None
+        self.num_redundant_experts = (
+            vllm_config.parallel_config.eplb_config.num_redundant_experts
+        )
 
     def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.embed_tokens(input_ids)
@@ -798,158 +784,6 @@ def forward(
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
-
-class AXK1MixtureOfExperts(MixtureOfExperts):
-    moe_mlp_layers: list[AXK1MoE]
-    """
-    List of MoE MLP layers in the model.
-    """
-
-    def extract_moe_parameters(self, example_moe: AXK1MoE | None):
-        if example_moe is None:
-            self.num_moe_layers = 0
-            self.num_expert_groups = 0
-            self.num_logical_experts = 0
-            self.num_physical_experts = 0
-            self.num_local_physical_experts = 0
-            self.num_routed_experts = 0
-            self.num_shared_experts = 0
-            self.num_redundant_experts = 0
-            logger.warning("AXK1: No AXK1MoE layer found in model.layers.")
-        else:
-            self.num_logical_experts = example_moe.n_logical_experts
-            self.num_physical_experts = example_moe.n_physical_experts
-            self.num_local_physical_experts = example_moe.n_local_physical_experts
-            self.num_routed_experts = example_moe.n_routed_experts
-            self.num_shared_experts = example_moe.n_shared_experts
-            self.num_redundant_experts = example_moe.n_redundant_experts
-
-    def update_physical_experts_metadata(
-        self,
-        num_physical_experts: int,
-        num_local_physical_experts: int,
-    ) -> None:
-        assert self.num_local_physical_experts == num_local_physical_experts
-        self.num_physical_experts = num_physical_experts
-        self.num_local_physical_experts = num_local_physical_experts
-        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
-        for moe in self.moe_mlp_layers:
-            moe.n_local_physical_experts = num_local_physical_experts
-            moe.n_physical_experts = num_physical_experts
-            moe.n_redundant_experts = self.num_redundant_experts
-            moe.experts.update_expert_map()
-
-
-class AXK1ForCausalLM(
-    nn.Module, SupportsPP, AXK1MixtureOfExperts, SupportsLoRA, SupportsEagle
-):
-    packed_modules_mapping = {
-        "gate_up_proj": ["gate_proj", "up_proj"],
-    }
-    model_cls = AXK1Model
-
-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-        super().__init__()
-        config: AXK1Config = vllm_config.model_config.hf_config
-        quant_config = vllm_config.quant_config
-        self.config = config
-        self.quant_config = quant_config
-
-        qk_nope_head_dim = config.qk_nope_head_dim
-        qk_rope_head_dim = config.qk_rope_head_dim
-        self.use_mha = all(dim == 0 for dim in (qk_nope_head_dim, qk_rope_head_dim))
-
-        if self.use_mha:
-            self.packed_modules_mapping["qkv_proj"] = ["q_proj", "k_proj", "v_proj"]
-
-        # `packed_modules_mapping` needs to be modified before
-        # initializing AXK1Model, as it is passed inplace to
-        # quantization config init and may be used to select the
-        # quant_method for relevant layers during initialization.
-        self.fuse_qkv_a_proj = config.q_lora_rank is not None
-        if self.fuse_qkv_a_proj:
-            self.packed_modules_mapping["fused_qkv_a_proj"] = [
-                "q_a_proj",
-                "kv_a_proj_with_mqa",
-            ]
-
-        self.model = self.model_cls(
-            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
-        )
-        if get_pp_group().is_last_rank:
-            self.lm_head = ParallelLMHead(
-                config.vocab_size,
-                config.hidden_size,
-                quant_config=quant_config,
-                prefix=maybe_prefix(prefix, "lm_head"),
-            )
-        else:
-            self.lm_head = PPMissingLayer()
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors
-        )
-        # Set MoE hyperparameters
-        self.num_moe_layers = (
-            self.config.num_hidden_layers - self.config.first_k_dense_replace
-        )
-        self.set_moe_parameters()
-
-    def set_moe_parameters(self):
-        self.expert_weights = []
-
-        self.num_expert_groups = getattr(self.config, "n_group", 1)
-
-        self.moe_layers = []
-        self.moe_mlp_layers = []
-        example_moe = None
-        for layer in self.model.layers:
-            if isinstance(layer, PPMissingLayer):
-                continue
-
-            assert isinstance(layer, AXK1DecoderLayer)
-            if isinstance(layer.mlp, AXK1MoE):
-                # Pick last one layer since the first ones may be dense layers.
-                example_moe = layer.mlp
-                self.moe_mlp_layers.append(layer.mlp)
-                self.moe_layers.append(layer.mlp.experts)
-
-        self.extract_moe_parameters(example_moe)
-
-    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.embed_input_ids(input_ids)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor | None,
-        positions: torch.Tensor,
-        intermediate_tensors: IntermediateTensors | None = None,
-        inputs_embeds: torch.Tensor | None = None,
-    ) -> torch.Tensor | IntermediateTensors:
-        hidden_states = self.model(
-            input_ids, positions, intermediate_tensors, inputs_embeds
-        )
-        return hidden_states
-
-    def compute_logits(
-        self,
-        hidden_states: torch.Tensor,
-    ) -> torch.Tensor | None:
-        logits = self.logits_processor(self.lm_head, hidden_states)
-        return logits
-
-    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        # Params for weights, fp8 weight scales, fp8 activation scales
-        # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
-            self,
-            ckpt_gate_proj_name="gate_proj",
-            ckpt_down_proj_name="down_proj",
-            ckpt_up_proj_name="up_proj",
-            num_experts=self.config.n_routed_experts,
-            num_redundant_experts=0,
-        )
-
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         rocm_aiter_moe_shared_expert_enabled = (
             rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
@@ -975,7 +809,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -1157,12 +991,170 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         return loaded_params
 
 
+class AXK1MixtureOfExperts(MixtureOfExperts):
+    moe_mlp_layers: list[AXK1MoE]
+    """
+    List of MoE MLP layers in the model.
+    """
+
+    def extract_moe_parameters(self, example_moe: AXK1MoE | None):
+        if example_moe is None:
+            self.num_moe_layers = 0
+            self.num_expert_groups = 0
+            self.num_logical_experts = 0
+            self.num_physical_experts = 0
+            self.num_local_physical_experts = 0
+            self.num_routed_experts = 0
+            self.num_shared_experts = 0
+            self.num_redundant_experts = 0
+            logger.warning("AXK1: No AXK1MoE layer found in model.layers.")
+        else:
+            self.num_logical_experts = example_moe.n_logical_experts
+            self.num_physical_experts = example_moe.n_physical_experts
+            self.num_local_physical_experts = example_moe.n_local_physical_experts
+            self.num_routed_experts = example_moe.n_routed_experts
+            self.num_shared_experts = example_moe.n_shared_experts
+            self.num_redundant_experts = example_moe.n_redundant_experts
+
+    def update_physical_experts_metadata(
+        self,
+        num_physical_experts: int,
+        num_local_physical_experts: int,
+    ) -> None:
+        assert self.num_local_physical_experts == num_local_physical_experts
+        self.num_physical_experts = num_physical_experts
+        self.num_local_physical_experts = num_local_physical_experts
+        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
+        for moe in self.moe_mlp_layers:
+            moe.n_local_physical_experts = num_local_physical_experts
+            moe.n_physical_experts = num_physical_experts
+            moe.n_redundant_experts = self.num_redundant_experts
+            moe.experts.update_expert_map()
+
+
+class AXK1ForCausalLM(
+    nn.Module, SupportsPP, AXK1MixtureOfExperts, SupportsLoRA, SupportsEagle
+):
+    packed_modules_mapping = {
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+    model_cls = AXK1Model
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config: AXK1Config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        qk_nope_head_dim = config.qk_nope_head_dim
+        qk_rope_head_dim = config.qk_rope_head_dim
+        self.use_mha = all(dim == 0 for dim in (qk_nope_head_dim, qk_rope_head_dim))
+
+        if self.use_mha:
+            self.packed_modules_mapping["qkv_proj"] = ["q_proj", "k_proj", "v_proj"]
+
+        # `packed_modules_mapping` needs to be modified before
+        # initializing AXK1Model, as it is passed inplace to
+        # quantization config init and may be used to select the
+        # quant_method for relevant layers during initialization.
+        self.fuse_qkv_a_proj = config.q_lora_rank is not None
+        if self.fuse_qkv_a_proj:
+            self.packed_modules_mapping["fused_qkv_a_proj"] = [
+                "q_a_proj",
+                "kv_a_proj_with_mqa",
+            ]
+
+        self.model = self.model_cls(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+        # Set MoE hyperparameters
+        self.num_moe_layers = (
+            self.config.num_hidden_layers - self.config.first_k_dense_replace
+        )
+        self.set_moe_parameters()
+
+    def set_moe_parameters(self):
+        self.expert_weights = []
+
+        self.num_expert_groups = getattr(self.config, "n_group", 1)
+
+        self.moe_layers = []
+        self.moe_mlp_layers = []
+        example_moe = None
+        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
+            assert isinstance(layer, AXK1DecoderLayer)
+            if isinstance(layer.mlp, AXK1MoE):
+                # Pick last one layer since the first ones may be dense layers.
+                example_moe = layer.mlp
+                self.moe_mlp_layers.append(layer.mlp)
+                self.moe_layers.append(layer.mlp.experts)
+
+        self.extract_moe_parameters(example_moe)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return fused_moe_make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.n_routed_experts,
+            num_redundant_experts=0,
+        )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
+
+
 def get_spec_layer_idx_from_weight_name(
     config: AXK1Config, weight_name: str
 ) -> int | None:
     if config.num_nextn_predict_layers and config.num_nextn_predict_layers > 0:
         layer_idx = config.num_hidden_layers
         for i in range(config.num_nextn_predict_layers):
-            if weight_name.startswith(f"model.layers.{layer_idx + i}."):
+            if weight_name.startswith(
+                f"model.layers.{layer_idx + i}."
+            ) or weight_name.startswith(f"layers.{layer_idx + i}."):
                 return layer_idx + i
     return None
diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py
index 467e8ab67bf5..cccd849681f2 100644
--- a/vllm/model_executor/models/adapters.py
+++ b/vllm/model_executor/models/adapters.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import itertools
 from collections.abc import Iterable
 from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, TypeVar, cast
@@ -181,7 +182,8 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
 
             seen_weights = list[tuple[str, torch.Tensor]]()
             for name, loaded_weight in weights:
-                seen_weights.append((name, loaded_weight))
+                # Clone because the iterator may reuse the tensor buffer
+                seen_weights.append((name, loaded_weight.clone()))
 
                 try:
                     target_prefix = next(
@@ -208,9 +210,11 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
                     self._get_name(),
                 )
 
+            # Lazy chain so buffer-reusing weight iterators (e.g.
+            # runai_streamer) are consumed one tensor at a time.
             mapped_weights = (
                 (target_prefix + name, weight)
-                for name, weight in (*seen_weights, *weights)
+                for name, weight in itertools.chain(seen_weights, weights)
             )
 
             def default_load_weights(weights):
diff --git a/vllm/model_executor/models/afmoe.py b/vllm/model_executor/models/afmoe.py
index 22037336411a..2216e4948bd9 100644
--- a/vllm/model_executor/models/afmoe.py
+++ b/vllm/model_executor/models/afmoe.py
@@ -18,7 +18,10 @@
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -124,14 +127,13 @@ def __init__(
                 prefix=f"{prefix}.shared_experts",
             )
 
-        # Routed experts using SharedFusedMoE
-        self.experts = SharedFusedMoE(
+        # Routed experts using FusedMoE
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.num_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=self.route_norm if self.score_func == "sigmoid" else False,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -152,20 +154,10 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
         router_logits = self.gate(hidden_states.to(dtype=torch.float32))
 
-        fused_moe_out = self.experts(
+        final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
 
-        if self.shared_experts is not None:
-            shared_output, final_hidden_states = fused_moe_out
-            final_hidden_states = final_hidden_states + shared_output
-        else:
-            final_hidden_states = fused_moe_out
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
-
         return final_hidden_states.view(num_tokens, hidden_dim)
 
 
@@ -490,7 +482,7 @@ def make_empty_intermediate_tensors(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -648,7 +640,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.num_moe_layers = config.num_hidden_layers - config.num_dense_layers
         self.num_expert_groups = config.n_group
 
-        self.moe_layers: list[SharedFusedMoE] = []
+        self.moe_layers: list[FusedMoE] = []
         example_moe = None
         for layer in self.model.layers:
             if isinstance(layer, PPMissingLayer):
diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py
index bc4f85bf7ddb..eb8c3e3f65e1 100644
--- a/vllm/model_executor/models/arcee.py
+++ b/vllm/model_executor/models/arcee.py
@@ -45,6 +45,7 @@
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
     make_layers,
+    maybe_prefix,
 )
 
 
@@ -367,7 +368,10 @@ def __init__(self, *, vllm_config, prefix: str = "") -> None:
         self.config = config
 
         # Initialize the inner Transformer model (ArceeModel)
-        self.model = ArceeModel(vllm_config=vllm_config, prefix=f"{prefix}.model")
+        self.model = ArceeModel(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+        )
         # On the last pipeline stage, set up the LM head and logits processor
         if get_pp_group().is_last_rank:
             # Determine vocabulary size (including any LoRA extra tokens
@@ -378,7 +382,7 @@ def __init__(self, *, vllm_config, prefix: str = "") -> None:
                 config.hidden_size,
                 quant_config=vllm_config.quant_config,
                 bias=getattr(config, "lm_head_bias", False),
-                prefix=f"{prefix}.lm_head",
+                prefix=maybe_prefix(prefix, "lm_head"),
             )
             if config.tie_word_embeddings:
                 # Tie output weights with input embedding matrix
diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py
index 031b6534fb69..6ab55a4b1bf3 100644
--- a/vllm/model_executor/models/arctic.py
+++ b/vllm/model_executor/models/arctic.py
@@ -16,10 +16,12 @@
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
 )
-from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk
+from vllm.model_executor.layers.fused_moe import (
+    fused_experts,
+    fused_topk,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -42,6 +44,7 @@
 
 from .interfaces import SupportsPP, SupportsQuant
 from .utils import (
+    AutoWeightsLoader,
     extract_layer_index,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -49,8 +52,6 @@
     maybe_prefix,
 )
 
-logger = init_logger(__name__)
-
 
 class ArcticMLP(nn.Module):
     def __init__(
@@ -384,6 +385,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         cache_config = vllm_config.cache_config
         quant_config = vllm_config.quant_config
 
+        self.config = config
         self.vocab_size = config.vocab_size
         self.embed_tokens = VocabParallelEmbedding(
             self.vocab_size, config.hidden_size, org_num_embeddings=self.vocab_size
@@ -426,57 +428,6 @@ def forward(
         hidden_states = self.norm(hidden_states)
         return hidden_states
 
-
-class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
-    packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
-
-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-        super().__init__()
-        config = vllm_config.model_config.hf_config
-        quant_config = vllm_config.quant_config
-        self.config = config
-        self.model = ArcticModel(
-            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
-        )
-        self.vocab_size = config.vocab_size
-        self.lm_head = ParallelLMHead(
-            self.vocab_size,
-            config.hidden_size,
-            quant_config=quant_config,
-            prefix=maybe_prefix(prefix, "lm_head"),
-        )
-        if self.config.tie_word_embeddings:
-            self.lm_head.weight = self.model.embed_tokens.weight
-        self.num_experts = config.num_local_experts
-        self.num_experts_per_tok = config.num_experts_per_tok
-
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors
-        )
-
-    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.embed_input_ids(input_ids)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor | None,
-        positions: torch.Tensor,
-        intermediate_tensors: IntermediateTensors | None = None,
-        inputs_embeds: torch.Tensor | None = None,
-    ) -> torch.Tensor | IntermediateTensors:
-        hidden_states = self.model(
-            input_ids, positions, intermediate_tensors, inputs_embeds
-        )
-        return hidden_states
-
-    def compute_logits(
-        self,
-        hidden_states: torch.Tensor,
-    ) -> torch.Tensor | None:
-        logits = self.logits_processor(self.lm_head, hidden_states)
-        return logits
-
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
@@ -487,41 +438,26 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         mlp_params_mapping: list[tuple[str, str, int]] = []
         expert_params_mapping: list[tuple[str, str, int]] = []
-        num_layers = self.config.num_hidden_layers
-
-        for layer in range(num_layers):
-            mlp_params_mapping.append(
-                (
-                    f"layers.{layer}.residual_mlp.w13.weight",
-                    f"layers.{layer}.residual_mlp.w1.weight",
-                    0,
-                )
-            )
-            mlp_params_mapping.append(
-                (
-                    f"layers.{layer}.residual_mlp.w13.weight",
-                    f"layers.{layer}.residual_mlp.w3.weight",
-                    1,
-                )
-            )
-            if layer % 2 == 0:
-                # MLP layers
+
+        for layer in range(self.config.num_hidden_layers):
+            is_moe_layer = (layer + 1) % self.config.moe_layer_frequency == 0
+            if is_moe_layer and self.config.use_residual:
                 mlp_params_mapping.append(
                     (
-                        f"layers.{layer}.block_sparse_moe.mlp.w13.weight",
-                        f"layers.{layer}.block_sparse_moe.mlp.w1.weight",
+                        f"layers.{layer}.residual_mlp.w13.weight",
+                        f"layers.{layer}.residual_mlp.w1.weight",
                         0,
                     )
                 )
                 mlp_params_mapping.append(
                     (
-                        f"layers.{layer}.block_sparse_moe.mlp.w13.weight",
-                        f"layers.{layer}.block_sparse_moe.mlp.w3.weight",
+                        f"layers.{layer}.residual_mlp.w13.weight",
+                        f"layers.{layer}.residual_mlp.w3.weight",
                         1,
                     )
                 )
-            else:
-                # MoE layers
+
+            if is_moe_layer:
                 for expert_id in range(self.config.num_local_experts):
                     expert_params_mapping.append(
                         ("ws", f"experts.{expert_id}.w1.weight", expert_id)
@@ -532,15 +468,25 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                     expert_params_mapping.append(
                         ("ws", f"experts.{expert_id}.w3.weight", expert_id)
                     )
+            else:
+                mlp_params_mapping.append(
+                    (
+                        f"layers.{layer}.block_sparse_moe.mlp.w13.weight",
+                        f"layers.{layer}.block_sparse_moe.mlp.w1.weight",
+                        0,
+                    )
+                )
+                mlp_params_mapping.append(
+                    (
+                        f"layers.{layer}.block_sparse_moe.mlp.w13.weight",
+                        f"layers.{layer}.block_sparse_moe.mlp.w3.weight",
+                        1,
+                    )
+                )
 
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
 
-        logger.info(
-            "It will take ~10 minutes loading from the 16-bit weights. "
-            "Alternatively, use the prequantized 8-bit weights of arctic "
-            "and set load-format to `sharded_state` will accelerate loading."
-        )
         for name, loaded_weight in weights:
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
@@ -585,10 +531,67 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         if is_pp_missing_parameter(name, self):
                             continue
                         param = params_dict[name]
-
                         weight_loader = getattr(
                             param, "weight_loader", default_weight_loader
                         )
                         weight_loader(param, loaded_weight)
             loaded_params.add(name)
         return loaded_params
+
+
+class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
+    packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.model = ArcticModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        self.vocab_size = config.vocab_size
+        self.lm_head = ParallelLMHead(
+            self.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if self.config.tie_word_embeddings:
+            self.lm_head.weight = self.model.embed_tokens.weight
+        self.num_experts = config.num_local_experts
+        self.num_experts_per_tok = config.num_experts_per_tok
+
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
+        )
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py
index 7b891f8ee429..55bc64cd94a9 100644
--- a/vllm/model_executor/models/aria.py
+++ b/vllm/model_executor/models/aria.py
@@ -14,7 +14,9 @@
 from vllm.distributed import get_tensor_model_parallel_rank
 from vllm.inputs import MultiModalDataDict
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -214,7 +216,7 @@ def forward(
         return out
 
 
-class AriaFusedMoE(SharedFusedMoE):
+class AriaFusedMoE(FusedMoE):
     def weight_loader(
         self, param: nn.Parameter, loaded_weight: torch.Tensor, shard_id: str
     ) -> None:
@@ -283,7 +285,6 @@ def __init__(
             hidden_size=config.hidden_size,
             intermediate_size=config.intermediate_size,
             quant_config=quant_config,
-            reduce_results=True,
             prefix=f"{prefix}.experts",
         )
 
@@ -301,12 +302,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
         router_output = torch.nn.functional.linear(hidden_states, self.router_weight)
 
-        sparse_expert_output = self.experts(hidden_states, router_output)
-
-        if self.shared_experts is not None:
-            return sparse_expert_output[0] + sparse_expert_output[1]
-        else:
-            return sparse_expert_output
+        return self.experts(hidden_states, router_output)
 
 
 class AriaTextDecoderLayer(LlamaDecoderLayer):
@@ -544,7 +540,7 @@ def __init__(
             self.vision_tower = AriaVisionTransformer(
                 config.vision_config,
                 quant_config=quant_config,
-                prefix=f"{prefix}.vision_tower",
+                prefix=maybe_prefix(prefix, "vision_tower"),
             )
             self.multi_modal_projector = AriaProjector(
                 config, prefix=maybe_prefix(prefix, "multi_modal_projector")
diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py
index 65f306393608..f4c9bbee9e95 100644
--- a/vllm/model_executor/models/aya_vision.py
+++ b/vllm/model_executor/models/aya_vision.py
@@ -358,6 +358,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 architectures=["Cohere2ForCausalLM"],
             )
 
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
     @property
     def dtype(self):
         return next(self.parameters()).dtype
diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py
index 7725dfa2a887..56e119207dae 100644
--- a/vllm/model_executor/models/bailing_moe.py
+++ b/vllm/model_executor/models/bailing_moe.py
@@ -41,7 +41,10 @@
 )
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -285,13 +288,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=self.num_experts,
             top_k=self.top_k,
             hidden_size=self.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=self.norm_expert_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -301,6 +303,7 @@ def __init__(
             topk_group=self.topk_group,
             use_grouped_topk=self.use_grouped_topk,
             router_logits_dtype=self.router_dtype,
+            routed_scaling_factor=self.routed_scaling_factor,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -314,21 +317,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-
-        if self.shared_experts is not None:
-            shared_output, final_hidden_states = final_hidden_states
-        else:
-            shared_output = None
-
-        final_hidden_states *= self.routed_scaling_factor
-
-        if shared_output is not None:
-            final_hidden_states = final_hidden_states + shared_output
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
         return final_hidden_states.view(num_tokens, hidden_size)
 
 
@@ -476,7 +464,7 @@ def forward(
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/bailing_moe_linear.py b/vllm/model_executor/models/bailing_moe_linear.py
index ecc5d63ced75..a1fd1646a222 100644
--- a/vllm/model_executor/models/bailing_moe_linear.py
+++ b/vllm/model_executor/models/bailing_moe_linear.py
@@ -17,11 +17,15 @@
 )
 from vllm.forward_context import get_forward_context
 from vllm.logger import init_logger
+from vllm.model_executor.custom_op import PluggableLayer
 from vllm.model_executor.layers.fla.ops.layernorm_guard import (
     RMSNormGated,
     layernorm_fn,
 )
-from vllm.model_executor.layers.fused_moe import FusedMoE, SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -60,6 +64,7 @@
 from vllm.sequence import IntermediateTensors
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.linear_attn import LinearAttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 
 from .interfaces import HasInnerState, IsHybrid, SupportsPP
 from .utils import (
@@ -201,14 +206,19 @@ def __init__(
             self.q_a_layernorm = None
             self.q_b_proj = None
 
-        rope_parameters = _build_rope_parameters(config)
+        rope_parameters = _build_rope_parameters(config) or {}
+        # MLA rotates the full qk_rope_head_dim,
+        # partial_rotary_factor is for the linear-attn head only.
+        rope_parameters = {
+            k: v for k, v in rope_parameters.items() if k != "partial_rotary_factor"
+        }
+        rope_parameters["rope_dim"] = self.qk_rope_head_dim
         max_position = getattr(config, "max_position_embeddings", 8192)
         self.rotary_emb = get_rope(
             head_size=self.qk_rope_head_dim,
             max_position=max_position,
             is_neox_style=False,
-            rope_parameters=rope_parameters or None,
-            dtype=torch.float32,
+            rope_parameters=rope_parameters,
         )
 
         # Build MLAModules for MultiHeadLatentAttentionWrapper
@@ -351,14 +361,13 @@ def __init__(
         else:
             self.shared_experts = None
 
-        # Routed experts using SharedFusedMoE
-        self.experts = SharedFusedMoE(
+        # Routed experts using FusedMoE
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=self.num_experts,
             top_k=self.top_k,
             hidden_size=self.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=self.norm_expert_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -368,6 +377,8 @@ def __init__(
             topk_group=self.topk_group,
             use_grouped_topk=self.use_grouped_topk,
             router_logits_dtype=self.router_dtype,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -383,22 +394,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             hidden_states=hidden_states, router_logits=router_logits
         )
 
-        # Handle tuple return from SharedFusedMoE
-        if self.shared_experts is not None:
-            shared_output, final_hidden_states = final_hidden_states
-        else:
-            shared_output = None
-
-        final_hidden_states *= self.routed_scaling_factor
-
-        if shared_output is not None:
-            final_hidden_states = final_hidden_states + shared_output
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
-
         return final_hidden_states.view(num_tokens, hidden_size)
 
 
@@ -437,17 +432,21 @@ def _weight_loader(param: torch.nn.Parameter, loaded_weight: torch.Tensor) -> No
         param.data.copy_(loaded_weight[shard].contiguous())
 
 
-class BailingMoELinearAttention(nn.Module, MambaBase):
-    """
-    Bailing MoE Linear Attention implementation using minimax backend.
+# --8<-- [start:bailing_moe_linear_attention]
+@PluggableLayer.register("bailing_moe_linear_attention")
+class BailingMoELinearAttention(PluggableLayer, MambaBase):
+    """Pluggable Bailing MoE Linear Attention layer which allows OOT backends
+    to add custom implementations.
 
-    This implements the linear attention mechanism from sglang, adapted for vLLM's
-    v1 engine with MambaBase interface support.
+    This implements the linear attention mechanism from sglang, adapted for
+    vLLM's v1 engine with MambaBase interface support.
     """
 
+    # --8<-- [end:bailing_moe_linear_attention]
+
     @property
-    def mamba_type(self) -> str:
-        return "linear_attention"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.LINEAR
 
     def get_state_shape(self) -> tuple[tuple[int, ...], ...]:
         """Return state shape for linear attention cache.
@@ -581,7 +580,6 @@ def __init__(
             self.head_dim,
             max_position=self.max_position_embeddings,
             is_neox_style=True,
-            dtype=torch.float32,
             rope_parameters=rope_parameters or None,
         )
 
@@ -766,8 +764,6 @@ def _prefill_and_mix_infer(
 
     def _decode_infer(self, q, k, v, kv_cache, state_indices_tensor, attn_metadata):
         """Handle decode (single token per sequence)."""
-        num_prefill_tokens = attn_metadata.num_prefill_tokens
-        num_prefills = attn_metadata.num_prefills
         hidden = linear_attention_decode(
             q,
             k,
@@ -775,10 +771,10 @@ def _decode_infer(self, q, k, v, kv_cache, state_indices_tensor, attn_metadata):
             kv_cache,
             self.tp_slope,
             state_indices_tensor,
-            q_start=num_prefill_tokens,
-            q_end=None,
-            slot_start=num_prefills,
-            slot_end=None,
+            q_start=0,
+            q_end=attn_metadata.num_decode_tokens,
+            slot_start=0,
+            slot_end=attn_metadata.num_decodes,
             block_size=32,
         )
         return hidden
@@ -1005,7 +1001,7 @@ def forward(
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         """Get expert parameter mapping for MoE layers."""
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -1161,6 +1157,7 @@ def __init__(
                 config.vocab_size,
                 config.hidden_size,
                 quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
             )
             self.logits_processor = LogitsProcessor(config.vocab_size)
         else:
diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py
index 01854b96d56f..c9ea7363292d 100644
--- a/vllm/model_executor/models/bert.py
+++ b/vllm/model_executor/models/bert.py
@@ -559,13 +559,10 @@ def _encode_token_type_ids(
 
 
 def _decode_token_type_ids(input_ids: torch.Tensor) -> torch.Tensor:
-    ids_mask = (
-        torch.ones_like(input_ids, dtype=torch.int32, device=input_ids.device)
-        << TOKEN_TYPE_SHIFT
-    )
-    tokens_mask = ids_mask.bitwise_not()
+    ids_mask = 1 << TOKEN_TYPE_SHIFT
+    tokens_mask = ~ids_mask
 
-    token_type_ids = input_ids.bitwise_and(ids_mask) >> TOKEN_TYPE_SHIFT
+    token_type_ids = (input_ids & ids_mask) >> TOKEN_TYPE_SHIFT
 
     input_ids.bitwise_and_(tokens_mask)
 
diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py
index 8b5fd452e8ff..49d2a5a89f5f 100644
--- a/vllm/model_executor/models/blip2.py
+++ b/vllm/model_executor/models/blip2.py
@@ -561,7 +561,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 config.qformer_config,
                 cache_config=cache_config,
                 quant_config=quant_config,
-                prefix=f"{prefix}.qformer",
+                prefix=maybe_prefix(prefix, "qformer"),
             )
             self.language_projection = nn.Linear(
                 config.qformer_config.hidden_size,
diff --git a/vllm/model_executor/models/cheers.py b/vllm/model_executor/models/cheers.py
new file mode 100644
index 000000000000..5f74c6771e4e
--- /dev/null
+++ b/vllm/model_executor/models/cheers.py
@@ -0,0 +1,753 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Cheers (UMM) model compatible with HuggingFace weights.
+
+Cheers is a unified multimodal model for image understanding and generation.
+For vLLM, we focus on the image understanding (vision-to-text) capabilities.
+The image generation part (gen_projector, hi_gate, etc.) is not supported,
+but the VAE encoder + decoder projector are required for image understanding.
+"""
+
+import math
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Any, Literal, TypeAlias
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from transformers import BatchFeature
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.inputs import MultiModalDataDict
+from vllm.logger import init_logger
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+)
+from vllm.multimodal.parse import MultiModalDataItems
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    PromptReplacement,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.transformers_utils.processors.cheers import CheersProcessor
+from vllm.utils.tensor_schema import TensorSchema
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsLoRA,
+    SupportsMultiModal,
+    SupportsPP,
+)
+from .siglip import SiglipVisionModel
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    init_vllm_registered_model,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+# ── VAE components (needed for image understanding pipeline) ────────
+
+
+def _swish(x: torch.Tensor) -> torch.Tensor:
+    return x * torch.sigmoid(x)
+
+
+class _AttnBlock(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.norm = nn.GroupNorm(32, in_channels, eps=1e-6, affine=True)
+        self.q = nn.Conv2d(in_channels, in_channels, 1)
+        self.k = nn.Conv2d(in_channels, in_channels, 1)
+        self.v = nn.Conv2d(in_channels, in_channels, 1)
+        self.proj_out = nn.Conv2d(in_channels, in_channels, 1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        h_ = self.norm(x)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        b, c, h, w = q.shape
+        q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous()
+        k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous()
+        v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous()
+        h_ = F.scaled_dot_product_attention(q, k, v)
+        h_ = rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b)
+        return x + self.proj_out(h_)
+
+
+class _ResnetBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.norm1 = nn.GroupNorm(32, in_channels, eps=1e-6, affine=True)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, 1, 1)
+        self.norm2 = nn.GroupNorm(32, out_channels, eps=1e-6, affine=True)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, 1, 1)
+        if in_channels != out_channels:
+            self.nin_shortcut = nn.Conv2d(in_channels, out_channels, 1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        h = _swish(self.norm1(x))
+        h = self.conv1(h)
+        h = _swish(self.norm2(h))
+        h = self.conv2(h)
+        if self.in_channels != self.out_channels:
+            x = self.nin_shortcut(x)
+        return x + h
+
+
+class _Downsample(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, 3, stride=2, padding=0)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = F.pad(x, (0, 1, 0, 1), mode="constant", value=0)
+        return self.conv(x)
+
+
+class _Upsample(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        return self.conv(x)
+
+
+_VAE_ENCODER_DEFAULTS = {
+    "in_channels": 3,
+    "ch": 128,
+    "ch_mult": [1, 2, 4, 4],
+    "num_res_blocks": 2,
+    "z_channels": 32,
+}
+_VAE_DECODER_DEFAULTS = {
+    "in_channels": 3,
+    "out_ch": 3,
+    "ch": 128,
+    "ch_mult": [1, 2, 4, 4],
+    "num_res_blocks": 2,
+    "z_channels": 32,
+}
+
+
+def _cfg(config, key, defaults=None):
+    """Access config attribute whether it's a dict or namespace object."""
+    if isinstance(config, dict):
+        if key in config:
+            return config[key]
+        if defaults and key in defaults:
+            return defaults[key]
+        raise KeyError(f"Key '{key}' not found in config dict: {list(config.keys())}")
+    return getattr(config, key)
+
+
+class CheersVAEEncoder(nn.Module):
+    """VAE encoder from the Cheers/UMM model."""
+
+    def __init__(self, config):
+        super().__init__()
+        d = _VAE_ENCODER_DEFAULTS
+        ch = _cfg(config, "ch", d)
+        ch_mult = _cfg(config, "ch_mult", d)
+        num_res_blocks = _cfg(config, "num_res_blocks", d)
+        z_channels = _cfg(config, "z_channels", d)
+        in_channels = _cfg(config, "in_channels", d)
+        num_resolutions = len(ch_mult)
+
+        self.quant_conv = nn.Conv2d(2 * z_channels, 2 * z_channels, 1)
+        self.conv_in = nn.Conv2d(in_channels, ch, 3, 1, 1)
+
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.down = nn.ModuleList()
+        block_in = ch
+        for i_level in range(num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for _ in range(num_res_blocks):
+                block.append(_ResnetBlock(block_in, block_out))
+                block_in = block_out
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != num_resolutions - 1:
+                down.downsample = _Downsample(block_in)
+            self.down.append(down)
+
+        self.mid = nn.Module()
+        self.mid.block_1 = _ResnetBlock(block_in, block_in)
+        self.mid.attn_1 = _AttnBlock(block_in)
+        self.mid.block_2 = _ResnetBlock(block_in, block_in)
+
+        self.norm_out = nn.GroupNorm(32, block_in, eps=1e-6, affine=True)
+        self.conv_out = nn.Conv2d(block_in, 2 * z_channels, 3, 1, 1)
+        self._num_resolutions = num_resolutions
+        self._num_res_blocks = num_res_blocks
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        hs = [self.conv_in(x)]
+        for i_level in range(self._num_resolutions):
+            for i_block in range(self._num_res_blocks):
+                h = self.down[i_level].block[i_block](hs[-1])
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+                hs.append(h)
+            if hasattr(self.down[i_level], "downsample"):
+                hs.append(self.down[i_level].downsample(hs[-1]))
+        h = hs[-1]
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        h = _swish(self.norm_out(h))
+        h = self.conv_out(h)
+        h = self.quant_conv(h)
+        return h
+
+
+class CheersVAEDecoder(nn.Module):
+    """VAE decoder (used inside VAEDecoderProjector)."""
+
+    def __init__(self, config):
+        super().__init__()
+        d = _VAE_DECODER_DEFAULTS
+        ch = _cfg(config, "ch", d)
+        ch_mult = _cfg(config, "ch_mult", d)
+        num_res_blocks = _cfg(config, "num_res_blocks", d)
+        z_channels = _cfg(config, "z_channels", d)
+        out_ch = _cfg(config, "out_ch", d)
+        num_resolutions = len(ch_mult)
+
+        self.post_quant_conv = nn.Conv2d(z_channels, z_channels, 1)
+        block_in = ch * ch_mult[num_resolutions - 1]
+        self.conv_in = nn.Conv2d(z_channels, block_in, 3, 1, 1)
+
+        self.mid = nn.Module()
+        self.mid.block_1 = _ResnetBlock(block_in, block_in)
+        self.mid.attn_1 = _AttnBlock(block_in)
+        self.mid.block_2 = _ResnetBlock(block_in, block_in)
+
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for _ in range(num_res_blocks + 1):
+                block.append(_ResnetBlock(block_in, block_out))
+                block_in = block_out
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = _Upsample(block_in)
+            self.up.insert(0, up)
+
+        self.norm_out = nn.GroupNorm(32, block_in, eps=1e-6, affine=True)
+        self.conv_out = nn.Conv2d(block_in, out_ch, 3, 1, 1)
+        self._num_resolutions = num_resolutions
+        self._num_res_blocks = num_res_blocks
+
+    def forward(self, z: torch.Tensor) -> torch.Tensor:
+        z = self.post_quant_conv(z)
+        upscale_dtype = next(self.up.parameters()).dtype
+        h = self.conv_in(z)
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        h = h.to(upscale_dtype)
+        for i_level in reversed(range(self._num_resolutions)):
+            for i_block in range(self._num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        h = _swish(self.norm_out(h))
+        return self.conv_out(h)
+
+
+class CheersVAEModel(nn.Module):
+    """VAE model with encoder only (for image understanding)."""
+
+    def __init__(self, config):
+        super().__init__()
+        enc_cfg = _cfg(config, "vae_encoder_config")
+        self.encoder = CheersVAEEncoder(enc_cfg)
+        self.ps = [2, 2]
+        z_ch = _cfg(enc_cfg, "z_channels", _VAE_ENCODER_DEFAULTS)
+        self.bn = nn.BatchNorm2d(
+            math.prod(self.ps) * z_ch,
+            eps=1e-4,
+            momentum=0.1,
+            affine=False,
+            track_running_stats=True,
+        )
+
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
+        self.bn.eval()
+        moments = self.encoder(x)
+        mean = torch.chunk(moments, 2, dim=1)[0]
+        z = rearrange(
+            mean,
+            "... c (i pi) (j pj) -> ... (c pi pj) i j",
+            pi=self.ps[0],
+            pj=self.ps[1],
+        )
+        return self.bn(z)
+
+
+class CheersVAEDecoderProjector(nn.Module):
+    """VAE decoder projector that converts latent back to pixel-like space."""
+
+    def __init__(self, config):
+        super().__init__()
+        dec_cfg = _cfg(config, "vae_decoder_config")
+        enc_cfg = _cfg(config, "vae_encoder_config")
+        self.decoder = CheersVAEDecoder(dec_cfg)
+        self.ps = [2, 2]
+        z_ch = _cfg(enc_cfg, "z_channels", _VAE_ENCODER_DEFAULTS)
+        self.bn = nn.BatchNorm2d(
+            math.prod(self.ps) * z_ch,
+            eps=1e-4,
+            momentum=0.1,
+            affine=False,
+            track_running_stats=True,
+        )
+
+    def forward(self, z: torch.Tensor) -> torch.Tensor:
+        self.bn.eval()
+        s = torch.sqrt(self.bn.running_var.view(1, -1, 1, 1) + 1e-4)
+        m = self.bn.running_mean.view(1, -1, 1, 1)
+        z = z * s + m
+        z = rearrange(
+            z,
+            "... (c pi pj) i j -> ... c (i pi) (j pj)",
+            pi=self.ps[0],
+            pj=self.ps[1],
+        )
+        return self.decoder(z)
+
+
+class CheersImagePixelInputs(TensorSchema):
+    """
+    Dimensions:
+        - bn: Batch size * number of images
+        - c: Number of channels (3)
+        - h: Height of each image
+        - w: Width of each image
+    """
+
+    type: Literal["pixel_values"]
+    pixel_values: torch.Tensor  # Shape: (bn, 3, h, w)
+
+
+CheersImageInputs: TypeAlias = CheersImagePixelInputs
+
+
+class CheersUndProjector(nn.Module):
+    """Understanding projector that maps vision features to LLM dimension
+    with 2x2 spatial compression (4x token reduction)."""
+
+    def __init__(
+        self,
+        image_embed_dim: int,
+        text_embed_dim: int,
+        compression_factor: tuple[int, int] = (2, 2),
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.image_embed_dim = image_embed_dim
+        self.text_embed_dim = text_embed_dim
+        self.compression_factor = compression_factor
+        self.layernorm = nn.LayerNorm(image_embed_dim)
+        hidden_size = image_embed_dim * (compression_factor[0] * compression_factor[1])
+        self.mlp = nn.Sequential(
+            nn.Linear(hidden_size, hidden_size),
+            nn.GELU(),
+            nn.Linear(hidden_size, text_embed_dim),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.layernorm(x)
+        height = width = int(x.size(1) ** 0.5)
+        x = x.permute(0, 2, 1).unflatten(-1, (height, width))
+        batch_size, dim, height, width = x.shape
+        unfolded = x.unfold(
+            2, self.compression_factor[0], self.compression_factor[0]
+        ).unfold(3, self.compression_factor[1], self.compression_factor[1])
+        unfolded = unfolded.contiguous().view(
+            batch_size,
+            dim,
+            -1,
+            self.compression_factor[0] * self.compression_factor[1],
+        )
+        unfolded = (
+            unfolded.permute(0, 2, 3, 1)
+            .contiguous()
+            .view(
+                batch_size,
+                -1,
+                dim * self.compression_factor[0] * self.compression_factor[1],
+            )
+        )
+        return self.mlp(unfolded)
+
+
+class CheersProcessingInfo(BaseProcessingInfo):
+    """Processing information for Cheers model."""
+
+    def get_hf_processor(self, **kwargs: object) -> CheersProcessor:
+        from vllm.transformers_utils.processor import cached_get_image_processor
+
+        image_processor = cached_get_image_processor(
+            self.ctx.model_config.model,
+            revision=self.ctx.model_config.revision,
+            trust_remote_code=self.ctx.model_config.trust_remote_code,
+        )
+
+        tokenizer = self.get_tokenizer()
+
+        return CheersProcessor(
+            image_processor=image_processor,
+            tokenizer=tokenizer,
+            **kwargs,
+        )
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"image": None}
+
+    def get_mm_max_tokens_per_item(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> Mapping[str, int]:
+        hf_config = self.get_hf_config()
+        vit_config = hf_config.vision_representation_config
+        patch_size = vit_config.patch_size
+        image_size = vit_config.image_size
+        num_patches = (image_size // patch_size) ** 2
+        # After 2x2 compression, tokens reduce by 4x
+        num_tokens = num_patches // 4
+        return {"image": num_tokens}
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        hf_config = self.get_hf_config()
+        vit_config = hf_config.vision_representation_config
+        patch_size = vit_config.patch_size
+        image_size = vit_config.image_size
+        num_patches = (image_size // patch_size) ** 2
+        return num_patches // 4
+
+
+class CheersDummyInputsBuilder(BaseDummyInputsBuilder[CheersProcessingInfo]):
+    """Build dummy inputs for Cheers model profiling."""
+
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+        return "<|image_pad|>" * num_images
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions] | None = None,
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        hf_config = self.info.get_hf_config()
+        vit_config = hf_config.vision_representation_config
+        image_size = vit_config.image_size
+        image_overrides = mm_options.get("image") if mm_options else None
+
+        return {
+            "image": self._get_dummy_images(
+                width=image_size,
+                height=image_size,
+                num_images=num_images,
+                overrides=image_overrides,
+            ),
+        }
+
+
+class CheersMultiModalProcessor(BaseMultiModalProcessor[CheersProcessingInfo]):
+    """Multimodal processor for Cheers model."""
+
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        return super()._call_hf_processor(prompt, mm_data, mm_kwargs, tok_kwargs)
+
+    def _hf_processor_applies_updates(
+        self,
+        prompt_text: str,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
+    ) -> bool:
+        return False
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, Any],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptReplacement]:
+        hf_config = self.info.get_hf_config()
+        vit_config = hf_config.vision_representation_config
+        patch_size = vit_config.patch_size
+        image_size = vit_config.image_size
+
+        tokenizer = self.info.get_tokenizer()
+        image_token_id = tokenizer.get_vocab().get("<|image_pad|>")
+        if image_token_id is None:
+            raise ValueError(
+                "Image token '<|image_pad|>' not found in tokenizer vocabulary"
+            )
+
+        def get_replacement_cheers(item_idx: int):
+            num_patches = (image_size // patch_size) ** 2
+            num_tokens = num_patches // 4
+            return [image_token_id] * num_tokens
+
+        return [
+            PromptReplacement(
+                modality="image",
+                target=[image_token_id],
+                replacement=get_replacement_cheers,
+            )
+        ]
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: Any,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return {
+            "pixel_values": MultiModalFieldConfig.batched("image"),
+        }
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    CheersMultiModalProcessor,
+    info=CheersProcessingInfo,
+    dummy_inputs=CheersDummyInputsBuilder,
+)
+class CheersForConditionalGeneration(
+    nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP
+):
+    """
+    Cheers: A unified multimodal model for image understanding and generation.
+
+    For vLLM, we focus on the image understanding (vision-to-text) capabilities.
+    The image generation part is not supported in vLLM.
+    """
+
+    requires_raw_input_tokens = True
+
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "model.language_model.": "language_model.model.",
+            "model.vision_representation.": "vision_representation.vision_model.",
+            "model.und_projector.": "und_projector.",
+            "model.vae_model.": "vae_model.",
+            "model.vae_decoder_projector.": "vae_decoder_projector.",
+            "lm_head.": "language_model.lm_head.",
+        }
+    )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return "<|image_pad|>"
+        raise ValueError("Only image modality is supported")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        multimodal_config = vllm_config.model_config.multimodal_config
+
+        if type(config).__name__ not in ("CheersConfig", "UMMConfig"):
+            raise ValueError(
+                f"Expected CheersConfig or UMMConfig, got {type(config).__name__}."
+            )
+
+        self.config = config
+        self.multimodal_config = multimodal_config
+
+        # The Cheers model's custom Qwen2Config defaults rope_theta to
+        # 1_000_000, but this isn't stored in the JSON.  vLLM's standard
+        # Qwen2Config defaults to 10_000, causing a 100× mismatch.
+        # We must patch BOTH the attribute AND rope_parameters (which
+        # patch_rope_parameters may have already populated from the wrong
+        # default before __init__ runs).
+        _CHEERS_ROPE_THETA = 1_000_000.0
+        tc = config.text_config
+        old_theta = getattr(tc, "rope_theta", None)
+        if old_theta != _CHEERS_ROPE_THETA:
+            logger.info(
+                "Overriding text_config.rope_theta from %s to %s",
+                old_theta,
+                _CHEERS_ROPE_THETA,
+            )
+            tc.rope_theta = _CHEERS_ROPE_THETA
+        rp = getattr(tc, "rope_parameters", None)
+        if rp is not None and rp.get("rope_theta") != _CHEERS_ROPE_THETA:
+            logger.info(
+                "Overriding rope_parameters.rope_theta from %s to %s",
+                rp.get("rope_theta"),
+                _CHEERS_ROPE_THETA,
+            )
+            rp["rope_theta"] = _CHEERS_ROPE_THETA
+
+        with self._mark_language_model(vllm_config):
+            self.language_model = init_vllm_registered_model(
+                vllm_config=vllm_config,
+                hf_config=config.text_config,
+                prefix=maybe_prefix(prefix, "language_model"),
+                architectures=["Qwen2ForCausalLM"],
+            )
+
+        vit_config = config.vision_representation_config
+
+        with self._mark_tower_model(vllm_config, "image"):
+            self.vae_model = CheersVAEModel(config)
+            self.vae_decoder_projector = CheersVAEDecoderProjector(config)
+
+            self.vision_representation = SiglipVisionModel(
+                config=vit_config,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "vision_representation"),
+            )
+
+            vit_hidden_size = vit_config.hidden_size
+            llm_hidden_size = config.text_config.hidden_size
+
+            self.und_projector = CheersUndProjector(
+                image_embed_dim=vit_hidden_size,
+                text_embed_dim=llm_hidden_size,
+                compression_factor=(2, 2),
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "und_projector"),
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+    def _parse_and_validate_image_input(
+        self, **kwargs: object
+    ) -> CheersImageInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        if pixel_values is None:
+            return None
+        return CheersImagePixelInputs(
+            type="pixel_values",
+            pixel_values=pixel_values,
+        )
+
+    def _process_image_input(
+        self, image_input: CheersImageInputs
+    ) -> tuple[torch.Tensor, ...]:
+        """Process image inputs through VAE → SigLIP → projector pipeline.
+
+        HF native path: pixel_values → VAE.encode(t=1.0) → vae_decoder_projector
+                         → SigLIP → und_projector → text-space embeddings
+        """
+        pixel_values = image_input["pixel_values"]
+
+        if pixel_values.ndim == 5:
+            batch_size, num_images, channels, height, width = pixel_values.shape
+            pixel_values = pixel_values.reshape(
+                batch_size * num_images, channels, height, width
+            )
+
+        with torch.no_grad():
+            vae_dtype = next(self.vae_model.parameters()).dtype
+            image_latent = self.vae_model.encode(pixel_values.to(dtype=vae_dtype))
+            image_pixel_hat = self.vae_decoder_projector(image_latent)
+
+        vision_features = self.vision_representation(image_pixel_hat)
+        vision_embeds = self.und_projector(vision_features)
+
+        return tuple(vision_embeds)
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        image_input = self._parse_and_validate_image_input(**kwargs)
+        if image_input is None:
+            return []
+        return self._process_image_input(image_input)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> torch.Tensor | IntermediateTensors:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        hidden_states = self.language_model.model(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        """Load weights, keeping VAE encoder/decoder projector for understanding."""
+        skip_prefixes = [
+            "model.time_embed.",
+            "model.gen_projector.",
+            "model.hi_gate.",
+            "model.hi_projector.",
+            "model.vae_model.decoder.",
+        ]
+        skip_keywords = [
+            "text_loss_fc",
+        ]
+
+        filtered_weights = []
+        for name, tensor in weights:
+            if any(name.startswith(p) for p in skip_prefixes):
+                continue
+            if any(kw in name for kw in skip_keywords):
+                continue
+            filtered_weights.append((name, tensor))
+
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(filtered_weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/clip.py b/vllm/model_executor/models/clip.py
index 05a494683a85..d16bc1d32d7a 100644
--- a/vllm/model_executor/models/clip.py
+++ b/vllm/model_executor/models/clip.py
@@ -397,20 +397,12 @@ def __init__(
         )
         self.num_heads_per_partition = divide(self.num_heads, self.tp_size)
 
-        if attn_cls == MMEncoderAttention:
-            self.attn = attn_cls(
-                self.num_heads_per_partition,
-                self.head_dim,
-                self.scale,
-                prefix=f"{prefix}.attn",
-            )
-        else:
-            self.attn = attn_cls(
-                self.num_heads_per_partition,
-                self.head_dim,
-                self.scale,
-                prefix=f"{prefix}.attn",
-            )
+        self.attn = attn_cls(
+            self.num_heads_per_partition,
+            self.head_dim,
+            self.scale,
+            prefix=f"{prefix}.attn",
+        )
 
     def forward(
         self,
@@ -777,7 +769,7 @@ def __init__(
             quant_config=quant_config,
             num_hidden_layers_override=num_hidden_layers_override,
             require_post_norm=require_post_norm,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
         )
 
     def forward(
diff --git a/vllm/model_executor/models/cohere2_moe.py b/vllm/model_executor/models/cohere2_moe.py
new file mode 100644
index 000000000000..aa8adff188f7
--- /dev/null
+++ b/vllm/model_executor/models/cohere2_moe.py
@@ -0,0 +1,593 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable
+from itertools import islice
+
+import torch
+from torch import nn
+from transformers import CohereConfig
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed import (
+    get_pp_group,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.model_executor.layers.activation import SiluAndMul
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+    row_parallel_weight_loader,
+)
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+
+from .commandr import LayerNorm
+from .interfaces import SupportsPP, SupportsQuant
+from .utils import (
+    AutoWeightsLoader,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+
+
+@torch.compile(backend=current_platform.simple_compile_backend)
+def token_choice_with_bias(
+    hidden_states: torch.Tensor,
+    gating_output: torch.Tensor,
+    topk: int,
+    renormalize: bool,
+):
+    """Sigmoid -> top-k (-> renormalize) custom routing for Cohere2Moe."""
+    assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
+
+    scores = gating_output.float().sigmoid()
+    topk_weights, topk_ids = torch.topk(scores, k=topk, dim=-1, sorted=False)
+
+    if renormalize:
+        topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
+
+    return topk_weights.to(torch.float32), topk_ids.to(torch.int32)
+
+
+@torch.compile(backend=current_platform.simple_compile_backend)
+def rms_norm_func(hidden_states, weight, variance_epsilon):
+    input_dtype = hidden_states.dtype
+    hidden_states = hidden_states.to(torch.float32)
+    variance = hidden_states.pow(2).mean(-1, keepdim=True)
+    hidden_states = hidden_states * torch.rsqrt(variance + variance_epsilon)
+    hidden_states = weight.to(torch.float32) * hidden_states
+    return hidden_states.to(input_dtype)
+
+
+class RMSNorm(nn.Module):
+    def __init__(self, param_shape=None, eps=1e-6):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(param_shape))
+        self.variance_epsilon = eps
+        set_weight_attrs(self.weight, {"weight_loader": row_parallel_weight_loader})
+
+    def forward(self, hidden_states, residuals=None):
+        hidden_states = rms_norm_func(hidden_states, self.weight, self.variance_epsilon)
+        return hidden_states, residuals
+
+
+def select_norm_impl(config: CohereConfig) -> tuple[type[nn.Module], float]:
+    """Returns (norm_class, eps). Uses RMSNorm when config.rms_norm_eps is set,
+    otherwise falls back to LayerNorm with config.layer_norm_eps."""
+    rms_eps = getattr(config, "rms_norm_eps", None)
+    if rms_eps is not None:
+        return RMSNorm, rms_eps
+    return LayerNorm, config.layer_norm_eps
+
+
+class Cohere2MoeMLP(nn.Module):
+    """Cohere MLP used as shared experts in the MoE block."""
+
+    def __init__(
+        self,
+        config: CohereConfig,
+        intermediate_size: int | None = None,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = False,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.intermediate_size = (
+            intermediate_size
+            if intermediate_size is not None
+            else config.intermediate_size
+        )
+        self.gate_up_proj = MergedColumnParallelLinear(
+            self.hidden_size,
+            [self.intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            self.intermediate_size,
+            self.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            prefix=f"{prefix}.down_proj",
+        )
+        self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+class Cohere2MoeAttention(nn.Module):
+    """Cohere MoE attention with sliding-window interleave."""
+
+    def __init__(
+        self,
+        config: CohereConfig,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        tp_size = get_tensor_model_parallel_world_size()
+        self.config = config
+        self.layer_idx = extract_layer_index(prefix)
+        self.hidden_size = config.hidden_size
+        self.total_num_heads = config.num_attention_heads
+        self.num_heads = self.total_num_heads // tp_size
+        self.head_dim = getattr(
+            config, "head_dim", self.hidden_size // self.total_num_heads
+        )
+        self.total_num_kv_heads = config.num_key_value_heads
+        if self.total_num_kv_heads >= tp_size:
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+        self.max_position_embeddings = getattr(
+            config, "model_max_length", None
+        ) or getattr(config, "max_position_embeddings", 8192)
+        self.qkv_proj = QKVParallelLinear(
+            self.hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            self.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=self.max_position_embeddings,
+            rope_parameters=config.rope_parameters,
+            is_neox_style=False,
+        )
+
+        self.sliding_window = None
+        layer_types = getattr(config, "layer_types", None)
+        if (
+            layer_types is not None
+            and layer_types[self.layer_idx] == "sliding_attention"
+        ):
+            self.sliding_window = config.sliding_window
+
+        # Prefix-dense layers (layer_idx < first_k_dense_replace) have full
+        # attention (no sliding window). When prefix_dense_sliding_window_pattern
+        # == 1, they keep RoPE even though they are not sliding-window layers.
+        first_k_dense_replace = getattr(config, "first_k_dense_replace", 0)
+        prefix_dense_sliding_window_pattern = getattr(
+            config, "prefix_dense_sliding_window_pattern", 1
+        )
+        self.force_rope = bool(
+            first_k_dense_replace
+            and prefix_dense_sliding_window_pattern == 1
+            and self.layer_idx < first_k_dense_replace
+        )
+
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            per_layer_sliding_window=self.sliding_window,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+        if self.sliding_window or self.force_rope:
+            q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class Cohere2Moe(nn.Module):
+    """Tensor-parallel MoE block for Cohere2Moe with shared experts."""
+
+    def __init__(
+        self,
+        config: CohereConfig,
+        params_dtype: torch.dtype | None = None,
+        quant_config: QuantizationConfig | None = None,
+        tp_size: int | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.tp_size = get_tensor_model_parallel_world_size()
+
+        if self.tp_size > config.num_experts:
+            raise ValueError(
+                f"Tensor parallel size {self.tp_size} is greater than "
+                f"the number of experts {config.num_experts}."
+            )
+
+        if (
+            hasattr(config, "expert_selection_fn")
+            and config.expert_selection_fn == "sigmoid"
+        ):
+            self.custom_routing_function = token_choice_with_bias
+        else:
+            self.custom_routing_function = None
+
+        self.gate = ReplicatedLinear(
+            config.hidden_size,
+            config.num_experts,
+            bias=False,
+            params_dtype=params_dtype,
+            quant_config=None,
+            prefix=f"{prefix}.gate",
+        )
+
+        if hasattr(config, "num_shared_experts") and config.num_shared_experts > 0:
+            self.shared_experts = Cohere2MoeMLP(
+                config=config,
+                intermediate_size=config.intermediate_size * config.num_shared_experts,
+                quant_config=quant_config,
+                prefix=f"{prefix}.shared_experts",
+            )
+            self.shared_expert_combination_strategy = getattr(
+                config, "shared_expert_combination_strategy", "sum"
+            )
+            assert self.shared_expert_combination_strategy in ("average", "sum"), (
+                "shared_expert_combination_strategy must be one of ['average', 'sum']"
+            )
+        else:
+            self.shared_experts = None
+            self.shared_expert_combination_strategy = None
+
+        self.experts = FusedMoE(
+            num_experts=config.num_experts,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.intermediate_size,
+            params_dtype=params_dtype,
+            renormalize=getattr(config, "norm_topk_prob", True),
+            quant_config=quant_config,
+            tp_size=tp_size,
+            prefix=f"{prefix}.experts",
+            custom_routing_function=self.custom_routing_function,
+            shared_experts=self.shared_experts,
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        orig_shape = hidden_states.shape
+        hidden_states = hidden_states.view(-1, self.hidden_size)
+        router_logits, _ = self.gate(hidden_states)
+        # FusedMoE handles shared expert overlap internally and returns
+        # shared_output + routed_output when shared_experts is set.
+        final_hidden_states = self.experts(hidden_states, router_logits)
+        if self.shared_expert_combination_strategy == "average":
+            final_hidden_states = final_hidden_states / 2
+        return final_hidden_states.view(orig_shape)
+
+
+class Cohere2MoeDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config: CohereConfig,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.layer_idx = extract_layer_index(prefix)
+
+        self.self_attn = Cohere2MoeAttention(
+            config,
+            cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.self_attn",
+        )
+
+        # Layers before first_k_dense_replace use a dense MLP instead of MoE.
+        first_k_dense_replace = getattr(config, "first_k_dense_replace", 0)
+        if self.layer_idx < first_k_dense_replace:
+            self.mlp = Cohere2MoeMLP(
+                config=config,
+                intermediate_size=getattr(
+                    config, "prefix_dense_intermediate_size", config.intermediate_size
+                ),
+                quant_config=quant_config,
+                reduce_results=True,
+                prefix=f"{prefix}.mlp",
+            )
+        else:
+            self.mlp = Cohere2Moe(
+                config=config, quant_config=quant_config, prefix=f"{prefix}.mlp"
+            )
+
+        norm_cls, norm_eps = select_norm_impl(config)
+        self.input_layernorm = norm_cls(param_shape=(config.hidden_size,), eps=norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        residual = hidden_states
+        hidden_states, residual = self.input_layernorm(hidden_states, residual)
+        hidden_states_attention = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+        hidden_states_mlp = self.mlp(hidden_states)
+
+        hidden_states = residual + hidden_states_attention + hidden_states_mlp
+        return hidden_states, residual
+
+
+@support_torch_compile
+class Cohere2MoeModel(nn.Module):
+    """Transformer decoder for Cohere2Moe."""
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+
+        self.config = config
+        self.quant_config = quant_config
+        self.vocab_size = config.vocab_size
+        self.org_vocab_size = config.vocab_size
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size, config.hidden_size
+        )
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: Cohere2MoeDecoderLayer(
+                config, cache_config, quant_config, prefix=prefix
+            ),
+            prefix=f"{prefix}.layers",
+        )
+        norm_cls, norm_eps = select_norm_impl(config)
+        self.norm = norm_cls(param_shape=(config.hidden_size,), eps=norm_eps)
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )
+
+    def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.get_input_embeddings(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            hidden_states, residual = layer(positions, hidden_states, residual)
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.num_experts,
+        )
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = (
+                    loaded_weight if loaded_weight.dim() == 0 else loaded_weight[0]
+                )
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            for param_name, shard_name, shard_id in stacked_params_mapping:
+                if shard_name not in name:
+                    continue
+                if "mlp.experts" in name:
+                    continue
+                name = name.replace(shard_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                for mapping in expert_params_mapping:
+                    param_name, weight_name, expert_id, shard_id = mapping
+                    if weight_name not in name:
+                        continue
+                    name = name.replace(weight_name, param_name)
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    if (
+                        name.endswith(".bias") or name.endswith("_bias")
+                    ) and name not in params_dict:
+                        continue
+                    param = params_dict[name]
+                    weight_loader = param.weight_loader
+                    weight_loader(
+                        param,
+                        loaded_weight,
+                        name,
+                        shard_id=shard_id,
+                        expert_id=expert_id,
+                    )
+                    break
+                else:
+                    if (
+                        name.endswith(".bias") or name.endswith("_bias")
+                    ) and name not in params_dict:
+                        continue
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    name = maybe_remap_kv_scale_name(name, params_dict)
+                    if name is None:
+                        continue
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+
+class Cohere2MoeForCausalLM(nn.Module, SupportsPP, SupportsQuant):
+    is_text_generation_model = True
+
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        assert getattr(config, "tie_word_embeddings", True)
+        self.unpadded_vocab_size = config.vocab_size
+        self.quant_config = quant_config
+        self.logits_scale = config.logit_scale
+        self.logits_processor = LogitsProcessor(
+            self.unpadded_vocab_size, config.vocab_size, scale=self.logits_scale
+        )
+        self.model = Cohere2MoeModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.get_input_embeddings(input_ids)
+
+    def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.get_input_embeddings(input_ids)
+
+    @torch.no_grad()
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        return self.model(input_ids, positions, intermediate_tensors, inputs_embeds)
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.logits_processor(self.model.embed_tokens, hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self, skip_prefixes=["lm_head."])
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/cohere2_vision.py b/vllm/model_executor/models/cohere2_vision.py
index c3118ee7778d..c800c2149252 100644
--- a/vllm/model_executor/models/cohere2_vision.py
+++ b/vllm/model_executor/models/cohere2_vision.py
@@ -44,7 +44,12 @@
 from vllm.sequence import IntermediateTensors
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
-from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    SupportsPP,
+    SupportsQuant,
+)
 from .siglip import SiglipVisionModel
 from .utils import (
     AutoWeightsLoader,
@@ -309,16 +314,22 @@ def get_replacement(item_idx: int):
     info=Cohere2VisionProcessingInfo,
     dummy_inputs=Cohere2VisionDummyInputsBuilder,
 )
-class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
+class Cohere2VisionForConditionalGeneration(
+    nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant
+):
     hf_to_vllm_mapper = WeightsMapper(
         orig_to_new_prefix={
             "model.vision_tower.": "vision_tower.",
             "model.multi_modal_projector.": "multi_modal_projector.",
             "model.language_model.": "language_model.model.",
-            "lm_head.": "language_model.lm_head.",
         }
     )
 
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
         config: Cohere2VisionConfig = vllm_config.model_config.hf_config
@@ -347,6 +358,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 architectures=config.text_config.architectures,
             )
 
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
     @property
     def dtype(self):
         return next(self.parameters()).dtype
diff --git a/vllm/model_executor/models/cohere_asr.py b/vllm/model_executor/models/cohere_asr.py
index 1cebea56a138..da74404139aa 100644
--- a/vllm/model_executor/models/cohere_asr.py
+++ b/vllm/model_executor/models/cohere_asr.py
@@ -3,9 +3,8 @@
 
 import math
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Literal
+from typing import Any, ClassVar
 
-import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
@@ -13,8 +12,10 @@
 
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, ModelConfig, SpeechToTextConfig, VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.distributed import get_tensor_model_parallel_world_size
-from vllm.inputs import MultiModalDataDict, PromptType, TextPrompt
+from vllm.inputs import MultiModalDataDict, PromptType, TokensPrompt
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import get_act_fn
 from vllm.model_executor.layers.attention import (
@@ -48,6 +49,7 @@
     PromptUpdate,
 )
 from vllm.renderers import TokenizeParams
+from vllm.tokenizers import cached_tokenizer_from_config
 from vllm.transformers_utils.processors.cohere_asr import (
     INF_VAL,
     CohereASRFeatureExtractor,
@@ -62,7 +64,7 @@
     SupportsMultiModal,
     SupportsTranscription,
 )
-from .utils import AutoWeightsLoader, WeightsMapper, make_layers
+from .utils import AutoWeightsLoader, WeightsMapper, make_layers, maybe_prefix
 
 logger = init_logger(__name__)
 
@@ -1715,7 +1717,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.encoder = ConformerEncoder(vllm_config=vllm_config)
 
         self.decoder = CohereASRDecoder(
-            vllm_config=vllm_config, prefix=f"{prefix}.decoder"
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "decoder"),
         )
 
         if self.encoder.d_model != self.decoder.hidden_size:
@@ -1900,7 +1903,7 @@ def get_dummy_mm_data(
         self,
         seq_len: int,
         mm_counts: Mapping[str, int],
-        mm_options=None,
+        mm_options: Mapping[str, BaseDummyOptions],
         mm_processor_kwargs=None,
     ) -> MultiModalDataDict:
         feature_extractor = self.info.get_feature_extractor()
@@ -2007,6 +2010,10 @@ class CohereAsrForConditionalGeneration(
     supports_transcription_only = True
     supported_languages = ISO639_1_SUPPORTED_LANGS
     skip_warmup_audio_preprocessing = True
+    no_space_languages = {"ja", "zh"}
+    _default_prompt_token_ids_cache: ClassVar[
+        dict[tuple[str | None, str | None, str], tuple[int, ...]]
+    ] = {}
 
     @classmethod
     def validate_language(cls, language: str | None) -> str | None:
@@ -2020,38 +2027,85 @@ def validate_language(cls, language: str | None) -> str | None:
         return super().validate_language(language)
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,  # not needed here
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+        model_config = stt_params.model_config
+
         if language is None:
             raise ValueError(
                 "Language must be specified when creating the CohereASR prompt"
             )
 
-        # NOTE: this function is used only by online inference and not offline inference
-        # CohereASR doesnt have encoder prompt
-        language_tag = f"<|{language}|><|{language}|>"
-        pnc = True  # TODO(ekagra): make this configurable later
-        pnc_tag = "<|pnc|>" if pnc else "<|nopnc|>"
-        default_prompt = (
-            f"<|startofcontext|><|startoftranscript|>"
-            f"<|emo:undefined|>{language_tag}{pnc_tag}"
-            f"<|noitn|><|notimestamp|><|nodiarize|>"
+        tokenizer = cached_tokenizer_from_config(model_config)
+
+        # prompt_text is None because CoherASR uses fast implementation of
+        # sentencepiece tokenizer which needs "▁" as the first token
+        # (which is different from "_") and encode("▁ABC") ignores the first token
+        # so the prompt_text is unreliable. However, prompt_token_ids can be used
+        # to get prompt_text but it wont have the first token "▁".
+        prompt_text = None
+        prompt_token_ids = cls._get_default_prompt_token_ids(
+            tokenizer,
+            model_config,
+            language,
         )
-        prompt_text = request_prompt if request_prompt else default_prompt
 
-        return TextPrompt(
+        return TokensPrompt(
             prompt=prompt_text,
+            prompt_token_ids=prompt_token_ids,
             multi_modal_data={"audio": (audio, stt_config.sample_rate)},
         )
 
+    @classmethod
+    def _get_default_prompt_tokens(cls, language: str) -> tuple[str, ...]:
+        # Use token-level control tags so fast tokenizers do not have to parse
+        # the raw string form of the decoder prefix.
+        return (
+            "▁",
+            "<|startofcontext|>",
+            "<|startoftranscript|>",
+            "<|emo:undefined|>",
+            f"<|{language}|>",
+            f"<|{language}|>",
+            "<|pnc|>",
+            "<|noitn|>",
+            "<|notimestamp|>",
+            "<|nodiarize|>",
+        )
+
+    @classmethod
+    def _get_default_prompt_token_ids(
+        cls,
+        tokenizer: Any,
+        model_config: ModelConfig,
+        language: str,
+    ) -> list[int]:
+        cache_key = (
+            getattr(model_config, "tokenizer", None),
+            getattr(model_config, "tokenizer_revision", None),
+            language,
+        )
+        prompt_token_ids = cls._default_prompt_token_ids_cache.get(cache_key)
+        if prompt_token_ids is None:
+            prompt_tokens = list(cls._get_default_prompt_tokens(language))
+            token_ids = tokenizer.convert_tokens_to_ids(prompt_tokens)
+            if not isinstance(token_ids, list):
+                token_ids = [token_ids]
+            unk_token_id = getattr(tokenizer, "unk_token_id", None)
+            if unk_token_id is not None and any(
+                token_id == unk_token_id for token_id in token_ids
+            ):
+                raise ValueError(
+                    "Failed to resolve the CohereASR decoder control tokens "
+                    "with the configured tokenizer."
+                )
+            prompt_token_ids = tuple(int(token_id) for token_id in token_ids)
+            cls._default_prompt_token_ids_cache[cache_key] = prompt_token_ids
+
+        return list(prompt_token_ids)
+
     @classmethod
     def get_placeholder_str(cls, modality: str, i: int) -> str | None:
         # Required as part of SupportsMultiModal interface.
diff --git a/vllm/model_executor/models/cohere_eagle.py b/vllm/model_executor/models/cohere_eagle.py
new file mode 100644
index 000000000000..5c22d6e34dd5
--- /dev/null
+++ b/vllm/model_executor/models/cohere_eagle.py
@@ -0,0 +1,247 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable
+
+import torch
+import torch.nn as nn
+from transformers import CohereConfig
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.model_executor.layers.linear import ReplicatedLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.commandr import (
+    CohereDecoderLayer,
+    CohereForCausalLM,
+    LayerNorm,
+)
+
+from .utils import (
+    AutoWeightsLoader,
+    get_draft_quant_config,
+    maybe_prefix,
+    process_eagle_weight,
+)
+
+logger = init_logger(__name__)
+
+
+class CohereEagleDecoderLayer(CohereDecoderLayer):
+    """Eagle draft variant of CohereDecoderLayer."""
+
+    def __init__(
+        self,
+        config: CohereConfig,
+        cache_config=None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__(
+            config,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+
+
+@support_torch_compile
+class CohereEagleModel(nn.Module):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        start_layer_id: int = 0,
+    ) -> None:
+        super().__init__()
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        self.quant_config = get_draft_quant_config(vllm_config)
+
+        # Cohere2-targeted EAGLE drafts inherit the target's sliding-window
+        # attention pattern. ``CohereAttention`` resolves per-layer behavior
+        # via ``config.layer_types[layer_idx]`` and the eagle layers use
+        # absolute indices (target_layer_num + i), so prepend the target's
+        # ``layer_types`` to the draft's so the lookup succeeds.
+        target_text_config = vllm_config.model_config.hf_text_config
+        if hasattr(target_text_config, "layer_types") and hasattr(
+            self.config, "layer_types"
+        ):
+            self.config.layer_types = list(target_text_config.layer_types) + list(
+                self.config.layer_types
+            )
+
+        self.vocab_size = self.config.vocab_size
+        self.embed_tokens = VocabParallelEmbedding(
+            self.config.vocab_size,
+            self.config.hidden_size,
+            prefix=maybe_prefix(prefix, "embed_tokens"),
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                CohereEagleDecoderLayer(
+                    self.config,
+                    cache_config=vllm_config.cache_config,
+                    quant_config=self.quant_config,
+                    prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
+                )
+                for i in range(self.config.num_hidden_layers)
+            ]
+        )
+
+        # Cohere EAGLE checkpoints include a bias term on the input fusion
+        # projection (unlike LLaMA EAGLE which uses bias=False).
+        self.fc = ReplicatedLinear(
+            input_size=self.config.hidden_size * 2,
+            output_size=self.config.hidden_size,
+            bias=True,
+            params_dtype=vllm_config.model_config.dtype,
+            quant_config=self.quant_config,
+            prefix=maybe_prefix(prefix, "fc"),
+            return_bias=False,
+        )
+
+        # Cohere EAGLE applies an explicit final LayerNorm to the draft
+        # hidden states before they are consumed by the logits processor.
+        self.norm = LayerNorm(
+            param_shape=(self.config.hidden_size),
+            eps=self.config.layer_norm_eps,
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        input_embeds = self.embed_tokens(input_ids)
+        hidden_states = self.fc(torch.cat((input_embeds, hidden_states), dim=-1))
+        residual = None
+        for layer in self.layers:
+            hidden_states, residual = layer(
+                positions,
+                hidden_states,
+                residual,
+            )
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states, hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = (
+                    loaded_weight if loaded_weight.dim() == 0 else loaded_weight[0]
+                )
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
+
+class EagleCohereForCausalLM(CohereForCausalLM):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        nn.Module.__init__(self)
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        # Flags checked by the speculative proposer to decide whether to share
+        # embed_tokens / lm_head with the target model. Cohere EAGLE checkpoints
+        # use tied embeddings so these weights are absent from the draft file.
+        self.has_own_embed_tokens = False
+        self.has_own_lm_head = False
+        target_layer_num = vllm_config.model_config.get_num_layers(
+            vllm_config.parallel_config
+        )
+        self.model = CohereEagleModel(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
+        )
+
+        logit_scale = getattr(self.config, "logit_scale", 1.0)
+        self.logits_processor = LogitsProcessor(
+            self.config.vocab_size, scale=logit_scale
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if inputs_embeds is not None:
+            raise NotImplementedError(
+                f"{type(self).__name__} does not support multimodal inputs yet."
+            )
+        return self.model(input_ids, positions, hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        def _track_and_forward(inputs):
+            name, weight = inputs
+            process_eagle_weight(self, name)
+            return name, weight
+
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(
+                ["lm_head.", "model.embed_tokens."]
+                if self.config.tie_word_embeddings
+                else None
+            ),
+        )
+
+        loaded_weight_names = loader.load_weights(map(_track_and_forward, weights))
+
+        # Embed tokens are tied with the target model and therefore not
+        # present in the EAGLE checkpoint; mark them as loaded explicitly to
+        # avoid a spurious "weight not found" warning from the default
+        # weight loader.
+        loaded_weight_names.add("model.embed_tokens.weight")
+        return loaded_weight_names
diff --git a/vllm/model_executor/models/colmodernvbert.py b/vllm/model_executor/models/colmodernvbert.py
index 1e8477e120ee..b16bb4e221d7 100644
--- a/vllm/model_executor/models/colmodernvbert.py
+++ b/vllm/model_executor/models/colmodernvbert.py
@@ -18,7 +18,6 @@
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.inputs import MultiModalDataDict
 from vllm.model_executor.layers.pooler.tokwise import pooler_for_token_embed
-from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import (
     MultiModalFieldConfig,
@@ -358,70 +357,23 @@ def forward(
             "model.text_model.layers.": "text_layers.",
             "model.text_model.embeddings.": "text_embeddings.",
             "model.text_model.final_norm.": "text_final_norm.",
-            "model.connector.modality_projection.": "connector.",
+            "model.connector.modality_projection.": "connector.proj.",
             "model.custom_text_proj.": "custom_text_proj.",
-            "model.vision_model.": "vision_model.vision_model.",
+            "model.vision_model.vision_model.": "vision_model.vision_model.",
             "model.": "",
         },
     )
 
-    # Checkpoint names for DecoupledEmbedding parts
-    _BASE_EMB = "model.text_model.embeddings.tok_embeddings.weight"
-    _EXTRA_EMB = (
-        "model.text_model.embeddings.tok_embeddings.additional_embedding.weight"
-    )
-
     def load_weights(
         self,
         weights: Iterable[tuple[str, torch.Tensor]],
     ) -> set[str]:
-        # DecoupledEmbedding requires concatenating base + additional
-        # embedding tensors before loading, so we extract them first.
-        base_embedding_weight: torch.Tensor | None = None
-        additional_embedding_weight: torch.Tensor | None = None
-        remaining: list[tuple[str, torch.Tensor]] = []
-
-        for name, tensor in weights:
-            if name == self._BASE_EMB:
-                base_embedding_weight = tensor
-            elif name == self._EXTRA_EMB:
-                additional_embedding_weight = tensor
-            else:
-                remaining.append((name, tensor))
-
-        # Load all non-embedding weights via AutoWeightsLoader
         loader = AutoWeightsLoader(self)
         loaded_params = loader.load_weights(
-            remaining,
+            weights,
             mapper=self.hf_to_vllm_mapper,
         )
 
-        # Concatenate and load DecoupledEmbedding weights
-        if base_embedding_weight is not None:
-            combined = base_embedding_weight
-            if additional_embedding_weight is not None:
-                combined = torch.cat(
-                    [base_embedding_weight, additional_embedding_weight],
-                    dim=0,
-                )
-            param_name = "text_embeddings.tok_embeddings.weight"
-            params_dict = dict(self.named_parameters())
-            if param_name in params_dict:
-                param = params_dict[param_name]
-                weight_loader = getattr(
-                    param,
-                    "weight_loader",
-                    default_weight_loader,
-                )
-                weight_loader(param, combined)
-                loaded_params.add(param_name)
-        elif additional_embedding_weight is not None:
-            raise ValueError(
-                "Found 'text_model.embeddings.tok_embeddings"
-                ".additional_embedding.weight' but not "
-                "'text_model.embeddings.tok_embeddings.weight'"
-            )
-
         # The pooler wraps ``custom_text_proj`` as its head projector.
         # Mark those params as loaded under the pooler path too.
         if hasattr(self, "pooler") and hasattr(self.pooler, "head"):
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
index a5644a414aee..133e1c19209b 100644
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -1,18 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from copy import deepcopy
-from math import lcm
 from typing import TYPE_CHECKING
 
 from vllm.logger import init_logger
-from vllm.model_executor.models import ModelRegistry
-from vllm.utils.math_utils import cdiv, round_up
-from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
-from vllm.v1.attention.backends.registry import AttentionBackendEnum
-from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec, MLAAttentionSpec
+from vllm.utils.math_utils import round_up
 
 if TYPE_CHECKING:
-    from vllm.config import ModelConfig, VllmConfig
+    from transformers import PretrainedConfig
+
+    from vllm.config import CacheConfig, ModelConfig, VllmConfig
+
 
 logger = init_logger(__name__)
 
@@ -57,7 +54,101 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None:
         hf_config.is_causal = not hf_config.use_bidirectional_attention
 
 
+class Gemma4Config(VerifyAndUpdateConfig):
+    @staticmethod
+    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+        """Force unified attention backend for models with heterogeneous
+        head dimensions.
+
+        Some Gemma4 variants use different head dimensions for
+        sliding window (head_dim) vs full attention (global_head_dim) layers.
+        When global_head_dim > 256, FlashAttention rejects those layers
+        (head_size <= 256 kernel limit), causing vLLM to select a different
+        backend for each layer type. This mixed-backend execution produces
+        numerical divergence and output corruption.
+
+        The fix detects heterogeneous head dimensions from the model config
+        and forces TRITON_ATTN (which has no head_size ceiling) for all
+        layers when the user hasn't explicitly chosen a backend.
+
+        TODO: Heterogeneous head_sizes (head_dim != global_head_dim)
+        require NixlConnector changes to support per-layer KV transfer
+        with different head dimensions for prefill-decode disaggregation.
+        """
+        hf_text_config = vllm_config.model_config.hf_text_config
+        head_dim = getattr(hf_text_config, "head_dim", None)
+        global_head_dim = getattr(hf_text_config, "global_head_dim", None)
+
+        # Only force Triton when head dimensions actually differ AND the
+        # larger one exceeds FlashAttention's kernel limit (head_size <= 256).
+        # This avoids unnecessary backend forcing on smaller models where
+        # the config carries global_head_dim but all layers can still use
+        # the same FA backend.
+        max_head_dim = max(head_dim or 0, global_head_dim or 0)
+        if (
+            head_dim is not None
+            and global_head_dim is not None
+            and head_dim != global_head_dim
+            and max_head_dim > 256
+            and vllm_config.attention_config.backend is None
+        ):
+            from vllm.v1.attention.backends.registry import (
+                AttentionBackendEnum,
+            )
+
+            vllm_config.attention_config.backend = AttentionBackendEnum.TRITON_ATTN
+            logger.info(
+                "Gemma4 model has heterogeneous head dimensions "
+                "(head_dim=%d, global_head_dim=%d). Forcing TRITON_ATTN "
+                "backend to prevent mixed-backend numerical divergence.",
+                head_dim,
+                global_head_dim,
+            )
+
+
+class DeepseekV4ForCausalLMConfig(VerifyAndUpdateConfig):
+    @staticmethod
+    def verify_and_update_model_config(model_config: "ModelConfig") -> None:
+        quant_config = getattr(model_config.hf_config, "quantization_config", None)
+        if quant_config is not None and quant_config.get("quant_method") == "fp8":
+            model_type = getattr(model_config.hf_config, "model_type", None)
+            if model_type == "deepseek_v4":
+                model_config.hf_config.quantization_config["quant_method"] = (
+                    "deepseek_v4_fp8"
+                )
+
+        hf_text_quant_config = getattr(
+            model_config.hf_text_config, "quantization_config", None
+        )
+        if (
+            hf_text_quant_config is not None
+            and hf_text_quant_config.get("quant_method") == "fp8"
+        ):
+            model_type = getattr(model_config.hf_text_config, "model_type", None)
+            if model_type == "deepseek_v4":
+                model_config.hf_text_config.quantization_config["quant_method"] = (
+                    "deepseek_v4_fp8"
+                )
+
+
 class GptOssForCausalLMConfig(VerifyAndUpdateConfig):
+    @staticmethod
+    def verify_and_update_model_config(model_config: "ModelConfig") -> None:
+        quant_config = getattr(model_config.hf_config, "quantization_config", None)
+        if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
+            model_config.hf_config.quantization_config["quant_method"] = "gpt_oss_mxfp4"
+
+        hf_text_quant_config = getattr(
+            model_config.hf_text_config, "quantization_config", None
+        )
+        if (
+            hf_text_quant_config is not None
+            and hf_text_quant_config.get("quant_method") == "mxfp4"
+        ):
+            model_config.hf_text_config.quantization_config["quant_method"] = (
+                "gpt_oss_mxfp4"
+            )
+
     @staticmethod
     def verify_and_update_config(vllm_config: "VllmConfig") -> None:
         structured_outputs_config = vllm_config.structured_outputs_config
@@ -104,11 +195,11 @@ class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
     @classmethod
     def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         """
-        Ensure that page size of attention layers is greater than or
-        equal to the mamba layers. If not, automatically set the attention
-        block size to ensure that it is. If the attention page size is
-        strictly greater than the mamba page size, we pad the mamba page size
-        to make them equal.
+        Perform early validation and setup for hybrid attention/mamba models.
+
+        Block size alignment with mamba page sizes is handled later by
+        Platform.update_block_size_for_backend(), which runs after model
+        layers are constructed and the attention backend is known.
 
         Args:
             vllm_config: vLLM Config
@@ -118,6 +209,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         # Disable calculate_kv_scales for hybrid models: uninitialized
         # recurrent state corrupts scales during the calibration pass.
         # See issue: https://github.com/vllm-project/vllm/issues/37554
+
         if cache_config.calculate_kv_scales:
             logger.warning(
                 "Disabling calculate_kv_scales for hybrid model '%s'. "
@@ -129,140 +221,9 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
             )
             cache_config.calculate_kv_scales = False
 
-        # Save the user input before it gets modified by MambaModelConfig
-        mamba_block_size = cache_config.mamba_block_size
         # Enable FULL_AND_PIECEWISE by default
         MambaModelConfig.verify_and_update_config(vllm_config)
 
-        attention_config = vllm_config.attention_config
-        cache_config = vllm_config.cache_config
-        model_config = vllm_config.model_config
-        parallel_config = vllm_config.parallel_config
-
-        if cache_config.cache_dtype == "auto":
-            kv_cache_dtype = model_config.dtype
-        else:
-            kv_cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_config.cache_dtype]
-
-        # get attention page size (for 1 token)
-        # Attention backend constraints:
-        # - FlashAttention (FA) requires block size to be multiple of 16
-        # - MLA (Multi-head Latent Attention) requires larger alignment:
-        #   * CUTLASS_MLA backend: kernel_block_size 128 alignment
-        #   * Other MLA backends: kernel_block_size 64 alignment
-        if model_config.use_mla:
-            use_cutlass_mla = (
-                attention_config.backend == AttentionBackendEnum.CUTLASS_MLA
-            )
-            kernel_block_alignment_size = 128 if use_cutlass_mla else 64
-            attn_page_size_1_token = MLAAttentionSpec(
-                block_size=1,
-                num_kv_heads=model_config.get_num_kv_heads(parallel_config),
-                head_size=model_config.get_head_size(),
-                dtype=kv_cache_dtype,
-            ).page_size_bytes
-        else:
-            kernel_block_alignment_size = 16
-            attn_page_size_1_token = FullAttentionSpec(
-                block_size=1,
-                num_kv_heads=model_config.get_num_kv_heads(parallel_config),
-                head_size=model_config.get_head_size(),
-                dtype=kv_cache_dtype,
-            ).page_size_bytes
-
-        model_cls, _ = ModelRegistry.resolve_model_cls(
-            model_config.architecture,
-            model_config=model_config,
-        )
-
-        # get mamba page size
-        mamba_page_size = MambaSpec(
-            shapes=model_cls.get_mamba_state_shape_from_config(vllm_config),
-            dtypes=model_cls.get_mamba_state_dtype_from_config(vllm_config),
-            block_size=-1,  # block_size doesn't matter for mamba page size
-        ).page_size_bytes
-
-        # Model may be marked as is_hybrid
-        #  but mamba is skipped via config,
-        #  return directly
-        if mamba_page_size == 0:
-            return
-
-        if cache_config.mamba_cache_mode == "all":
-            # With prefix caching, select attention block size to
-            # optimize for mamba kernel performance
-
-            # Mamba2 SSD kernel uses a chunk_size, e.g. 256
-            # Align the block to the kernel: use lowest multiple of chunk_size
-            # of attention tokens that would fit mamba_page_size:
-            # e.g. for mamba page size = 788kB
-            #          attn_1_token = 2kB -> fits ~394 tokens
-            #      then round up to a multiple of 256 -> 512 tokens
-            # End result:
-            #  attn_block_size = 512
-            #  mamba_block_size = 512 (aligned to a multiple of chunk_size)
-            # TODO(tdoublep): this constraint can be relaxed fairly
-            # easily by changing the way we layout chunks in the
-            # mamba2 kernels.
-
-            base_chunk_size = mamba_block_size or model_config.get_mamba_chunk_size()
-            attn_tokens_per_mamba_state = cdiv(mamba_page_size, attn_page_size_1_token)
-            chunk_size = lcm(base_chunk_size, kernel_block_alignment_size)
-            attn_block_size = chunk_size * cdiv(attn_tokens_per_mamba_state, chunk_size)
-            cache_config.mamba_block_size = attn_block_size
-        else:
-            # Without prefix caching, select minimum valid attention block size
-            # to minimize mamba state padding
-
-            # Calculate minimum attention block size that satisfies both:
-            # 1. Backend alignment requirements (kernel_block_alignment_size)
-            # 2. Mamba page size compatibility (attn_page_size >= mamba_page_size)
-            attn_block_size = kernel_block_alignment_size * cdiv(
-                mamba_page_size, kernel_block_alignment_size * attn_page_size_1_token
-            )
-
-        # override attention block size if it is too small,
-        # even if the user has explicitly set it
-        if cache_config.block_size < attn_block_size:
-            cache_config.block_size = attn_block_size
-            logger.info(
-                "Setting attention block size to %d tokens "
-                "to ensure that attention page size is >= mamba page size.",
-                attn_block_size,
-            )
-
-        # By default, mamba block size will be set to max_model_len.
-        # When enabling prefix caching and using align mamba cache
-        # mode, we align mamba block size to the block size as the
-        # basic granularity for prefix caching.
-        if cache_config.mamba_cache_mode == "align":
-            cache_config.mamba_block_size = cache_config.block_size
-
-        # compute new attention page size
-        attn_page_size = cache_config.block_size * attn_page_size_1_token
-
-        assert attn_page_size >= mamba_page_size
-
-        if attn_page_size == mamba_page_size:
-            # don't need to pad mamba page size
-            return
-
-        # pad mamba page size to exactly match attention
-        if (
-            cache_config.mamba_page_size_padded is None
-            or cache_config.mamba_page_size_padded != attn_page_size
-        ):
-            cache_config.mamba_page_size_padded = attn_page_size
-            mamba_padding_pct = (
-                100 * (attn_page_size - mamba_page_size) / mamba_page_size
-            )
-            logger.info(
-                "Padding mamba page size by %.2f%% to ensure "
-                "that mamba page size and attention page size are "
-                "exactly equal.",
-                mamba_padding_pct,
-            )
-
 
 class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig):
     @staticmethod
@@ -272,6 +233,12 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None:
             pooler_config.use_activation = False
 
 
+class JinaForRankingConfig(VerifyAndUpdateConfig):
+    @staticmethod
+    def verify_and_update_model_config(model_config: "ModelConfig") -> None:
+        model_config.hf_config.embedding_size = 512
+
+
 class JinaRobertaModelConfig(VerifyAndUpdateConfig):
     @staticmethod
     def verify_and_update_model_config(model_config: "ModelConfig") -> None:
@@ -306,8 +273,8 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None:
         config = model_config.hf_config
         config.num_labels = 1
         pooler_config = model_config.pooler_config
-        if pooler_config.logit_bias is None:
-            pooler_config.logit_bias = 2.65
+        if pooler_config.logit_mean is None:
+            pooler_config.logit_mean = 2.65
 
 
 class LlamaBidirectionalConfig(VerifyAndUpdateConfig):
@@ -428,17 +395,20 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
 
 
 class NemotronHForCausalLMConfig(VerifyAndUpdateConfig):
-    @staticmethod
-    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+    DEFAULT_MAMBA_SSM_CACHE_DTYPE = "float32"
+    """Only `float32` is known to have no accuracy issues by default."""
+
+    @classmethod
+    def update_mamba_ssm_cache_dtype(
+        cls, *, cache_config: "CacheConfig", hf_config: "PretrainedConfig"
+    ) -> None:
         """Update mamba_ssm_cache_dtype for NemotronH models when set to 'auto'
         (or not explicitly set), to the value specified in the HF config, or to
-        float16 if not specified.
+        `float32` if not specified.
         """
-        cache_config = vllm_config.cache_config
         if cache_config.mamba_ssm_cache_dtype == "auto":
-            hf_config = vllm_config.model_config.hf_config
             mamba_ssm_cache_dtype = getattr(
-                hf_config, "mamba_ssm_cache_dtype", "float16"
+                hf_config, "mamba_ssm_cache_dtype", cls.DEFAULT_MAMBA_SSM_CACHE_DTYPE
             )
             logger.info(
                 "Updating mamba_ssm_cache_dtype to '%s' for NemotronH model",
@@ -446,8 +416,22 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
             )
             cache_config.mamba_ssm_cache_dtype = mamba_ssm_cache_dtype
 
+    @classmethod
+    def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
+        cls.update_mamba_ssm_cache_dtype(
+            cache_config=vllm_config.cache_config,
+            hf_config=vllm_config.model_config.hf_config,
+        )
+
 
 class NemotronHNanoVLV2Config(VerifyAndUpdateConfig):
+    @classmethod
+    def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
+        NemotronHForCausalLMConfig.update_mamba_ssm_cache_dtype(
+            cache_config=vllm_config.cache_config,
+            hf_config=vllm_config.model_config.hf_config.text_config,
+        )
+
     @staticmethod
     def verify_and_update_model_config(model_config: "ModelConfig") -> None:
         mm_config = model_config.multimodal_config
@@ -488,80 +472,22 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None:
         )
 
         head_dim = config.hidden_size // config.num_attention_heads
-        max_trained_positions = getattr(config, "max_trained_positions", 2048)
+        max_position_embeddings = getattr(config, "max_position_embeddings", 2048)
+        max_trained_positions = getattr(
+            config, "max_trained_positions", max_position_embeddings
+        )
+
+        rope_parameters = {
+            "max_trained_positions": max_trained_positions,
+            **(config.rope_parameters or {}),
+        }
 
         config.rotary_kwargs = {
             "head_size": head_dim,
-            "max_position": max_trained_positions,
-            "rope_parameters": config.rope_parameters,
+            "max_position": model_config.max_model_len,
+            "rope_parameters": rope_parameters,
         }
 
-        # we ignore config.rotary_scaling_factor so that for datasets shorter
-        # than max_trained_positions 2048, the results are consistent
-        # with SentenceTransformer.
-        # The context extension uses vllm style rope_theta and rope_parameters.
-        # See #17785 #18755
-        if (
-            not model_config.hf_overrides
-            and model_config.original_max_model_len is None
-        ):
-            # Default
-            # Reset max_model_len to max_trained_positions.
-            # nomic-embed-text-v2-moe the length is set to 512
-            # by sentence_bert_config.json.
-            max_model_len_before = model_config.max_model_len
-            max_model_len = min(model_config.max_model_len, max_trained_positions)
-
-            model_config.max_model_len = model_config.get_and_verify_max_len(
-                max_model_len
-            )
-
-            if model_config.max_model_len != max_model_len_before:
-                logger.warning(
-                    "Nomic context extension is disabled. "
-                    "Changing max_model_len from %s to %s. "
-                    "To enable context extension, see: "
-                    "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.py",
-                    max_model_len_before,
-                    model_config.max_model_len,
-                )
-        else:
-            # We need to re-verify max_model_len to avoid lengths
-            # greater than position_embedding.
-            hf_text_config = model_config.hf_text_config
-
-            if isinstance(model_config.hf_overrides, dict):
-                # hf_overrides_kw
-                max_model_len = model_config.hf_overrides.get(
-                    "max_model_len", model_config.max_model_len
-                )
-            else:
-                # hf_overrides_fn
-                # This might be overridden by sentence_bert_config.json.
-                max_model_len = model_config.max_model_len
-
-            # reset hf_text_config for recalculate_max_model_len.
-            if hasattr(hf_text_config, "max_model_len"):
-                delattr(hf_text_config, "max_model_len")
-            hf_text_config.max_position_embeddings = max_trained_positions
-            hf_text_config.rope_parameters = config.rotary_kwargs["rope_parameters"]
-
-            # Update the cached derived_max_model_len to enforce the limit
-            model_config.model_arch_config.derived_max_model_len_and_key = (
-                float(max_trained_positions),
-                "max_position_embeddings",
-            )
-
-            # The priority of sentence_bert_config.json is higher
-            # than max_position_embeddings
-            encoder_config = deepcopy(model_config.encoder_config)
-            encoder_config.pop("max_seq_length", None)
-            model_config.encoder_config = encoder_config
-
-            model_config.max_model_len = model_config.get_and_verify_max_len(
-                max_model_len
-            )
-
 
 class Qwen2ForProcessRewardModelConfig(VerifyAndUpdateConfig):
     @staticmethod
@@ -664,15 +590,19 @@ def verify_and_update_model_config(model_config: "ModelConfig") -> None:
 MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
     "ColBERTJinaRobertaModel": JinaRobertaModelConfig,
     "ColQwen3_5": Qwen3_5ForConditionalGenerationConfig,
+    "DeepseekV4ForCausalLM": DeepseekV4ForCausalLMConfig,
     "DeepseekV32ForCausalLM": DeepseekV32ForCausalLM,
     "Ernie4_5_VLMoeForConditionalGeneration": Ernie4_5_VLMoeForConditionalGenerationConfig,  # noqa: E501
     "FalconMambaForCausalLM": MambaModelConfig,
     "Gemma3TextModel": Gemma3TextModelConfig,
+    "Gemma4ForCausalLM": Gemma4Config,
+    "Gemma4ForConditionalGeneration": Gemma4Config,
     "GptOssForCausalLM": GptOssForCausalLMConfig,
     "GteModel": SnowflakeGteNewModelConfig,
     "GteNewForSequenceClassification": GteNewModelConfig,
     "GteNewModel": GteNewModelConfig,
     "JambaForSequenceClassification": JambaForSequenceClassificationConfig,
+    "JinaForRanking": JinaForRankingConfig,
     "JinaVLForRanking": JinaVLForSequenceClassificationConfig,
     "LlamaBidirectionalForSequenceClassification": LlamaBidirectionalConfig,
     "LlamaBidirectionalModel": LlamaBidirectionalConfig,
diff --git a/vllm/model_executor/models/conformer_encoder.py b/vllm/model_executor/models/conformer_encoder.py
new file mode 100644
index 000000000000..0d2e31270199
--- /dev/null
+++ b/vllm/model_executor/models/conformer_encoder.py
@@ -0,0 +1,350 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Shared Conformer encoder components for FireRedASR2 and FireRedLID.
+
+Both models use the same Conformer-based audio encoder architecture
+(Conv2dSubsampling → RelPositionalEncoding → N × RelPosEmbConformerBlock).
+This module factors out the common building blocks to avoid duplication.
+"""
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from vllm.model_executor.layers.linear import ReplicatedLinear
+
+
+class Conv2dSubsampling(nn.Module):
+    def __init__(self, idim: int, d_model: int, out_channels: int = 32):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(1, out_channels, 3, 2),
+            nn.ReLU(),
+            nn.Conv2d(out_channels, out_channels, 3, 2),
+            nn.ReLU(),
+        )
+        subsample_idim = ((idim - 1) // 2 - 1) // 2
+        self.out = ReplicatedLinear(
+            input_size=out_channels * subsample_idim,
+            output_size=d_model,
+            bias=True,
+        )
+
+        self.subsampling = 4
+        left_context = right_context = 3  # both exclude current frame
+        self.context = left_context + 1 + right_context  # 7
+
+    def forward(
+        self, x: torch.Tensor, x_mask: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        x = x.unsqueeze(1)
+        x = self.conv(x)
+        N, C, T, D = x.size()
+        x, _ = self.out(x.transpose(1, 2).contiguous().view(N, T, C * D))
+        mask = x_mask[:, :, :-2:2][:, :, :-2:2]
+        input_lengths = mask[:, -1, :].sum(dim=-1)
+        return x, input_lengths, mask
+
+
+class Swish(nn.Module):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x * torch.sigmoid(x)
+
+
+class RelPositionalEncoding(nn.Module):
+    def __init__(self, d_model: int, max_len: int = 5000):
+        super().__init__()
+        pe_positive = torch.zeros(max_len, d_model, requires_grad=False)
+        pe_negative = torch.zeros(max_len, d_model, requires_grad=False)
+        position = torch.arange(0, max_len).unsqueeze(1).float()
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float()
+            * -(torch.log(torch.tensor(10000.0)).item() / d_model)
+        )
+        pe_positive[:, 0::2] = torch.sin(position * div_term)
+        pe_positive[:, 1::2] = torch.cos(position * div_term)
+        pe_negative[:, 0::2] = torch.sin(-1 * position * div_term)
+        pe_negative[:, 1::2] = torch.cos(-1 * position * div_term)
+
+        pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0)
+        pe_negative = pe_negative[1:].unsqueeze(0)
+        self.pe = torch.cat([pe_positive, pe_negative], dim=1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Tmax = 2 * max_len - 1
+        Tmax, T = self.pe.size(1), x.size(1)
+        pos_emb = self.pe[:, Tmax // 2 - T + 1 : Tmax // 2 + T].clone().detach()
+        return pos_emb
+
+
+class ConformerFeedForward(nn.Module):
+    def __init__(self, d_model: int):
+        super().__init__()
+        self.pre_layer_norm = nn.LayerNorm(d_model)
+        self.linear_expand = ReplicatedLinear(
+            input_size=d_model,
+            output_size=d_model * 4,
+            bias=True,
+        )
+        self.nonlinear = Swish()
+        self.linear_project = ReplicatedLinear(
+            input_size=d_model * 4,
+            output_size=d_model,
+            bias=True,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        x = self.pre_layer_norm(x)
+        x, _ = self.linear_expand(x)
+        x = self.nonlinear(x)
+        x, _ = self.linear_project(x)
+        return x + residual
+
+
+class EncoderMultiHeadAttention(nn.Module):
+    def __init__(self, n_head: int, d_model: int):
+        super().__init__()
+        assert d_model % n_head == 0
+        self.n_head = n_head
+        self.d_k = d_model // n_head
+        self.d_v = self.d_k
+
+        self.w_qs = ReplicatedLinear(d_model, n_head * self.d_k, bias=False)
+        self.w_ks = ReplicatedLinear(d_model, n_head * self.d_k, bias=False)
+        self.w_vs = ReplicatedLinear(d_model, n_head * self.d_v, bias=False)
+
+        self.layer_norm_q = nn.LayerNorm(d_model)
+        self.layer_norm_k = nn.LayerNorm(d_model)
+        self.layer_norm_v = nn.LayerNorm(d_model)
+
+        self.fc = ReplicatedLinear(n_head * self.d_v, d_model, bias=False)
+
+    def forward_qkv(
+        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
+        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)
+
+        q = self.layer_norm_q(q)
+        k = self.layer_norm_k(k)
+        v = self.layer_norm_v(v)
+
+        q = self.w_qs(q)[0].view(sz_b, len_q, n_head, d_k)
+        k = self.w_ks(k)[0].view(sz_b, len_k, n_head, d_k)
+        v = self.w_vs(v)[0].view(sz_b, len_v, n_head, d_v)
+        q = q.transpose(1, 2)
+        k = k.transpose(1, 2)
+        v = v.transpose(1, 2)
+        return q, k, v
+
+    def forward_output(
+        self,
+        output: torch.Tensor,
+        residual: torch.Tensor,
+        sz_b: int,
+        len_q: int,
+    ) -> torch.Tensor:
+        output = output.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
+        fc_out, _ = self.fc(output)
+        return fc_out + residual
+
+    def forward_attention(
+        self,
+        attn: torch.Tensor,
+        v: torch.Tensor,
+        mask: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if mask is not None:
+            mask = mask.unsqueeze(1)
+            mask = mask.eq(0)
+            attn = attn.masked_fill(mask, -float("inf"))
+            attn = torch.softmax(attn, dim=-1).masked_fill(mask, 0.0)
+        else:
+            attn = torch.softmax(attn, dim=-1)
+        output = torch.matmul(attn, v)
+        return output, attn
+
+
+class RelPosMultiHeadAttention(EncoderMultiHeadAttention):
+    def __init__(self, n_head: int, d_model: int):
+        super().__init__(n_head, d_model)
+        d_k = d_model // n_head
+        self.scale = 1.0 / (d_k**0.5)
+        self.linear_pos = ReplicatedLinear(d_model, n_head * d_k, bias=False)
+        self.pos_bias_u = nn.Parameter(torch.empty([n_head, d_k]))
+        self.pos_bias_v = nn.Parameter(torch.empty([n_head, d_k]))
+
+    def _rel_shift(self, x):
+        N, H, T1, T2 = x.size()
+        zero_pad = torch.zeros((N, H, T1, 1), device=x.device, dtype=x.dtype)
+        x_padded = torch.cat([zero_pad, x], dim=-1)
+        x_padded = x_padded.view(N, H, T2 + 1, T1)
+        x = x_padded[:, :, 1:].view_as(x)
+        x = x[:, :, :, : x.size(-1) // 2 + 1]
+        return x
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        pos_emb: torch.Tensor,
+        mask: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        sz_b, len_q = q.size(0), q.size(1)
+        residual = q
+        q, k, v = self.forward_qkv(q, k, v)
+
+        q = q.transpose(1, 2)
+        n_batch_pos = pos_emb.size(0)
+        p = self.linear_pos(pos_emb)[0].view(n_batch_pos, -1, self.n_head, self.d_k)
+        p = p.transpose(1, 2)
+
+        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
+        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
+
+        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
+        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
+        matrix_bd = self._rel_shift(matrix_bd)
+
+        attn_scores = matrix_ac + matrix_bd
+        attn_scores.mul_(self.scale)
+
+        output, attn = self.forward_attention(attn_scores, v, mask=mask)
+        output = self.forward_output(output, residual, sz_b, len_q)
+        return output, attn
+
+
+class ConformerConvolution(nn.Module):
+    def __init__(self, d_model: int, kernel_size: int = 33):
+        super().__init__()
+        assert kernel_size % 2 == 1
+        self.pre_layer_norm = nn.LayerNorm(d_model)
+        self.pointwise_conv1 = nn.Conv1d(
+            d_model, d_model * 4, kernel_size=1, bias=False
+        )
+        self.padding = (kernel_size - 1) // 2
+        self.depthwise_conv = nn.Conv1d(
+            d_model * 2,
+            d_model * 2,
+            kernel_size,
+            stride=1,
+            padding=self.padding,
+            groups=d_model * 2,
+            bias=False,
+        )
+        self.batch_norm = nn.LayerNorm(d_model * 2)
+        self.swish = Swish()
+        self.pointwise_conv2 = nn.Conv1d(
+            d_model * 2, d_model, kernel_size=1, bias=False
+        )
+
+    def forward(
+        self, x: torch.Tensor, mask: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        residual = x
+        out = self.pre_layer_norm(x)
+        out = out.transpose(1, 2)
+        if mask is not None:
+            out.masked_fill_(mask.ne(1), 0.0)
+        out = self.pointwise_conv1(out)
+        out = F.glu(out, dim=1)
+        out = self.depthwise_conv(out)
+        out = out.transpose(1, 2)
+        out = self.swish(self.batch_norm(out))
+        out = out.transpose(1, 2)
+        out = self.pointwise_conv2(out)
+        if mask is not None:
+            out.masked_fill_(mask.ne(1), 0.0)
+        out = out.transpose(1, 2)
+        return out + residual
+
+
+class RelPosEmbConformerBlock(nn.Module):
+    def __init__(self, d_model: int, n_head: int, kernel_size: int = 33):
+        super().__init__()
+        self.ffn1 = ConformerFeedForward(d_model)
+        self.mhsa = RelPosMultiHeadAttention(n_head, d_model)
+        self.conv = ConformerConvolution(d_model, kernel_size)
+        self.ffn2 = ConformerFeedForward(d_model)
+        self.layer_norm = nn.LayerNorm(d_model)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        pos_emb: torch.Tensor,
+        slf_attn_mask: torch.Tensor | None = None,
+        pad_mask: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        out = 0.5 * x + 0.5 * self.ffn1(x)
+        out = self.mhsa(out, out, out, pos_emb, mask=slf_attn_mask)[0]
+        out = self.conv(out, pad_mask)
+        out = 0.5 * out + 0.5 * self.ffn2(out)
+        out = self.layer_norm(out)
+        return out
+
+
+class ConformerEncoder(nn.Module):
+    """
+    Conformer encoder shared by FireRedASR2 and FireRedLID.
+    """
+
+    def __init__(
+        self,
+        idim: int,
+        n_layers_enc: int,
+        n_head: int,
+        d_model: int,
+        kernel_size: int = 33,
+        pe_maxlen: int = 5000,
+    ):
+        super().__init__()
+        self.odim = d_model
+
+        self.input_preprocessor = Conv2dSubsampling(idim, d_model)
+        self.positional_encoding = RelPositionalEncoding(d_model, max_len=pe_maxlen)
+
+        self.layer_stack = nn.ModuleList()
+        for _ in range(n_layers_enc):
+            block = RelPosEmbConformerBlock(d_model, n_head, kernel_size)
+            self.layer_stack.append(block)
+
+    def forward(
+        self,
+        padded_input: torch.Tensor,
+        input_lengths: torch.Tensor,
+        pad: bool = True,
+    ):
+        if pad:
+            padded_input = F.pad(
+                padded_input,
+                (0, 0, 0, self.input_preprocessor.context - 1),
+                "constant",
+                0.0,
+            )
+        src_mask = self.padding_position_is_0(padded_input, input_lengths)
+
+        embed_output, input_lengths, src_mask = self.input_preprocessor(
+            padded_input, src_mask
+        )
+        enc_output = embed_output
+
+        pos_emb = self.positional_encoding(embed_output)
+
+        for enc_layer in self.layer_stack:
+            enc_output = enc_layer(
+                enc_output, pos_emb, slf_attn_mask=src_mask, pad_mask=src_mask
+            )
+
+        return enc_output, input_lengths, src_mask
+
+    def padding_position_is_0(
+        self, padded_input: torch.Tensor, input_lengths: torch.Tensor
+    ) -> torch.Tensor:
+        N, T = padded_input.size()[:2]
+        # Use broadcasting instead of a Python loop for efficiency.
+        positions = torch.arange(T, device=padded_input.device).unsqueeze(0)
+        mask = (positions < input_lengths.unsqueeze(1)).to(torch.uint8)
+        return mask.unsqueeze(1)
diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py
index ca6e6a49a98a..6c798bf2f36b 100644
--- a/vllm/model_executor/models/dbrx.py
+++ b/vllm/model_executor/models/dbrx.py
@@ -15,7 +15,9 @@
     get_tensor_model_parallel_world_size,
 )
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
     ReplicatedLinear,
@@ -85,7 +87,6 @@ def __init__(
             hidden_size=config.d_model,
             intermediate_size=config.ffn_config.ffn_hidden_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=True,
             quant_config=quant_config,
             tp_size=get_tensor_model_parallel_world_size(),
diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py
index 5c439cdf486d..76e90e327655 100644
--- a/vllm/model_executor/models/deepseek_eagle.py
+++ b/vllm/model_executor/models/deepseek_eagle.py
@@ -8,7 +8,9 @@
 
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -105,7 +107,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -196,7 +198,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             vllm_config.parallel_config
         )
         self.model = DeepseekV2Model(
-            vllm_config=vllm_config, prefix="model", start_layer_id=target_layer_num
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
         )
 
         self.lm_head = ParallelLMHead(
diff --git a/vllm/model_executor/models/deepseek_eagle3.py b/vllm/model_executor/models/deepseek_eagle3.py
index 640ba89914b2..4d26a9e00904 100644
--- a/vllm/model_executor/models/deepseek_eagle3.py
+++ b/vllm/model_executor/models/deepseek_eagle3.py
@@ -199,11 +199,18 @@ def __init__(
             ]
         )
 
-        # fc layer for combining auxiliary hidden states (3x hidden size input)
-        if hasattr(self.config, "target_hidden_size"):
-            fc_input_size = self.config.target_hidden_size * 3
-        else:
-            fc_input_size = self.config.hidden_size * 3
+        # fc layer for combining auxiliary hidden states
+        num_aux_hidden_states = getattr(self.config, "num_aux_hidden_states", None)
+        if num_aux_hidden_states is None:
+            eagle_config = getattr(self.config, "eagle_config", None) or {}
+            layer_ids = eagle_config.get("eagle_aux_hidden_state_layer_ids")
+            num_aux_hidden_states = len(layer_ids) if layer_ids else 3
+        self.num_aux_hidden_states = num_aux_hidden_states
+
+        target_hidden_size = getattr(
+            self.config, "target_hidden_size", self.config.hidden_size
+        )
+        fc_input_size = target_hidden_size * num_aux_hidden_states
 
         self.fc = ReplicatedLinear(
             input_size=fc_input_size,
@@ -215,6 +222,18 @@ def __init__(
             return_bias=False,
         )
 
+        use_fc_norm = getattr(self.config, "fc_norm", False)
+        if use_fc_norm:
+            self.fc_norm = nn.ModuleList(
+                [
+                    RMSNorm(target_hidden_size, eps=self.config.rms_norm_eps)
+                    for _ in range(self.num_aux_hidden_states)
+                ]
+            )
+        else:
+            self.fc_norm = None
+
+        self.norm_output = getattr(self.config, "norm_output", False)
         self.norm = RMSNorm(
             self.config.hidden_size,
             eps=self.config.rms_norm_eps,
@@ -242,8 +261,13 @@ def forward(
                 hidden_states=hidden_states,
                 residual=residual,
             )
+
         hidden_states, hidden_prenorm = self.norm(hidden_states, residual)
-        return hidden_states, hidden_prenorm
+
+        # norm_output variant uses the post-norm hidden states.
+        aux_output = hidden_states if self.norm_output else hidden_prenorm
+
+        return hidden_states, aux_output
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
@@ -318,7 +342,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.config.target_layer_count = target_layer_num
 
         self.model = DeepseekV2Eagle3Model(
-            vllm_config=vllm_config, prefix="model", start_layer_id=target_layer_num
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
         )
 
         logit_scale = getattr(self.config, "logit_scale", 1.0)
diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py
index c75ee1a1bbfe..37f94c687a21 100644
--- a/vllm/model_executor/models/deepseek_mtp.py
+++ b/vllm/model_executor/models/deepseek_mtp.py
@@ -11,7 +11,9 @@
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -30,6 +32,7 @@
     DeepseekV2DecoderLayer,
     DeepseekV2MixtureOfExperts,
     DeepseekV2MoE,
+    _try_load_fp8_indexer_wk,
     get_spec_layer_idx_from_weight_name,
 )
 from .utils import maybe_prefix
@@ -184,6 +187,7 @@ class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
         self.config = vllm_config.model_config.hf_config
+        self.quant_config = vllm_config.quant_config
         self.model = DeepSeekMultiTokenPredictor(
             vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
         )
@@ -243,7 +247,14 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1),
         ]
 
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        # Fused indexer wk + weights_proj (shard 0 = wk, shard 1 = weights_proj)
+        indexer_fused_mapping = [
+            ("wk_weights_proj", "wk", 0),
+            ("wk_weights_proj", "weights_proj", 1),
+        ]
+        stacked_params_mapping.extend(indexer_fused_mapping)
+
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -258,6 +269,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
+        _pending_wk_fp8: dict = {}  # FP8 indexer wk dequant buffer
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
@@ -268,6 +280,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                 rocm_aiter_moe_shared_expert_enabled and ("mlp.shared_experts" in name)
             )
             name = self._rewrite_spec_layer_name(spec_layer, name)
+
+            if _try_load_fp8_indexer_wk(
+                name, loaded_weight, _pending_wk_fp8, params_dict, loaded_params
+            ):
+                continue
+
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 # Skip non-stacked layers and experts (experts handled below).
                 if weight_name not in name:
diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py
index f1c4a7b21993..44797874a4c5 100644
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -48,9 +48,9 @@
 from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
     GateLinear,
-    RoutingMethodType,
-    SharedFusedMoE,
+    fused_moe_make_expert_params_mapping,
 )
 from vllm.model_executor.layers.layernorm import LayerNorm, RMSNorm
 from vllm.model_executor.layers.linear import (
@@ -66,8 +66,14 @@
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
     per_token_group_quant_fp8,
 )
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+    scaled_dequantize,
+)
 from vllm.model_executor.layers.rotary_embedding import get_rope
-from vllm.model_executor.layers.sparse_attn_indexer import SparseAttnIndexer
+from vllm.model_executor.layers.sparse_attn_indexer import (
+    SparseAttnIndexer,
+)
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
@@ -76,7 +82,11 @@
     default_weight_loader,
     maybe_remap_kv_scale_name,
 )
-from vllm.model_executor.models.utils import sequence_parallel_chunk
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    extract_layer_index,
+    sequence_parallel_chunk,
+)
 from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
 from vllm.utils.torch_utils import direct_register_custom_op
@@ -290,6 +300,15 @@ def __init__(
         self.is_fusion_moe_shared_experts_enabled = (
             rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
         )
+        if (
+            self.is_rocm_aiter_moe_enabled
+            and self.gate.e_score_correction_bias is not None
+        ):
+            # AITER biased_grouped_topk requires the correction bias dtype to
+            # match the router logits. Keep DeepSeek's correction bias in fp32
+            # by requesting fp32 router logits for this routing path.
+            self.gate.set_out_dtype(torch.float32)
+
         if config.n_shared_experts is None or self.is_fusion_moe_shared_experts_enabled:
             self.shared_experts = None
         else:
@@ -305,14 +324,13 @@ def __init__(
                 prefix=f"{prefix}.shared_experts",
             )
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             gate=self.gate,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -320,11 +338,9 @@ def __init__(
             topk_group=getattr(config, "topk_group", 1),
             prefix=f"{prefix}.experts",
             scoring_func=getattr(config, "scoring_func", "softmax"),
-            # we do scaling outside, set factor to 1.0 to avoid double mul
             # aiter applies routed_scaling_factor internally
-            routed_scaling_factor=1.0
-            if not self.is_rocm_aiter_moe_enabled
-            else self.routed_scaling_factor,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=not self.is_rocm_aiter_moe_enabled,
             e_score_correction_bias=self.gate.e_score_correction_bias,
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts,
@@ -332,18 +348,16 @@ def __init__(
             n_shared_experts=config.n_shared_experts
             if self.is_fusion_moe_shared_experts_enabled
             else None,
+            router_logits_dtype=self.gate.out_dtype,
         )
 
-        # NOTE(rob): this is a hack until we finish off the PR for
-        # merging TRTLLM kernels into the MK framework. Then we can
-        # query the MonolithicMK for the expected router logits.
-        # NOTE(dbari): Use BF16 if routing is not Deepseek, e.g. Mistral Large 3
-        self.gate.set_out_dtype(
-            torch.float32
-            if self.experts.quant_method.is_monolithic
-            and self.experts.routing_method_type == RoutingMethodType.DeepSeekV3
-            else torch.bfloat16
-        )
+        if (
+            self.is_rocm_aiter_moe_enabled
+            and self.gate.e_score_correction_bias is not None
+        ):
+            self.gate.e_score_correction_bias.data = (
+                self.gate.e_score_correction_bias.data.to(self.gate.out_dtype)
+            )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         num_tokens, hidden_dim = hidden_states.shape
@@ -357,43 +371,20 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             hidden_states = sequence_parallel_chunk(hidden_states)
 
         if self.experts.is_internal_router:
-            # In this case, the gate/router runs inside the FusedMoE class
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=hidden_states
             )
         else:
-            # router_logits: (num_tokens, n_experts)
             router_logits, _ = self.gate(hidden_states)
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=router_logits
             )
 
-        shared_output, final_hidden_states = fused_moe_out
-        if self.shared_experts is None:
-            assert shared_output is None
-
-        # Fix FP16 overflow
-        # See DeepseekV2DecoderLayer for more details.
-        if hidden_states.dtype != torch.float16:
-            if not self.is_rocm_aiter_moe_enabled:
-                final_hidden_states *= self.routed_scaling_factor
-        elif self.shared_experts is not None:
-            assert shared_output is not None
-            shared_output *= 1.0 / self.routed_scaling_factor
-
-        if self.shared_experts is not None:
-            assert shared_output is not None
-            final_hidden_states += shared_output
-
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
 
         return final_hidden_states.view(num_tokens, hidden_dim)
 
@@ -625,6 +616,7 @@ def __init__(
         super().__init__()
         self.vllm_config = vllm_config
         self.config = config
+        self.quant_config = quant_config
         # self.indexer_cfg = config.attn_module_list_cfg[0]["attn_index"]
         self.topk_tokens = config.index_topk
         self.n_head = config.index_n_heads  # 64
@@ -639,21 +631,17 @@ def __init__(
             quant_config=quant_config,
             prefix=f"{prefix}.wq_b",
         )
-        self.wk = ReplicatedLinear(
-            hidden_size,
-            self.head_dim,
-            bias=False,
-            quant_config=quant_config,
-            prefix=f"{prefix}.wk",
-        )
-        self.k_norm = LayerNorm(self.head_dim, eps=1e-6)
-        self.weights_proj = ReplicatedLinear(
+        # Fused wk + weights_proj: single GEMM producing [head_dim + n_head].
+        # FP8 wk weights are upcasted to BF16 during loading to maintain fusion.
+        self.wk_weights_proj = MergedColumnParallelLinear(
             hidden_size,
-            self.n_head,
+            [self.head_dim, self.n_head],
             bias=False,
             quant_config=None,
-            prefix=f"{prefix}.weights_proj",
+            disable_tp=True,
+            prefix=f"{prefix}.wk_weights_proj",
         )
+        self.k_norm = LayerNorm(self.head_dim, eps=1e-6)
         self.softmax_scale = self.head_dim**-0.5
 
         self.scale_fmt = "ue8m0"
@@ -690,27 +678,45 @@ def forward(
     ) -> torch.Tensor:
         q, _ = self.wq_b(qr)
         q = q.view(-1, self.n_head, self.head_dim)
-        q_pe, q_nope = torch.split(
-            q, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1
-        )
 
-        k, _ = self.wk(hidden_states)
-        k = self.k_norm(k)
-        k_pe, k_nope = torch.split(
-            k, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1
-        )
+        if current_platform.is_rocm():
+            # This path should works on all platform, will remove extra
+            # branches in the future
+            # Fused wk + weights_proj: one GEMM, then split
+            kw, _ = self.wk_weights_proj(hidden_states)
+            k = kw[:, : self.head_dim]
+            weights = kw[:, self.head_dim :]
 
-        q_pe, k_pe = rotary_emb(positions, q_pe, k_pe.unsqueeze(1))
-        # Note: RoPE (NeoX) can introduce extra leading dimensions during compilation
-        # so we need to reshape back to token-flattened shapes
-        q_pe = q_pe.reshape(-1, self.n_head, self.rope_dim)
-        k_pe = k_pe.reshape(-1, 1, self.rope_dim)
+            k = self.k_norm(k)
 
-        # `rotary_emb` is shape-preserving; `q_pe` is already
-        # [num_tokens, n_head, rope_dim].
-        q = torch.cat([q_pe, q_nope], dim=-1)
-        # `k_pe` is [num_tokens, 1, rope_dim] (MQA).
-        k = torch.cat([k_pe.squeeze(-2), k_nope], dim=-1)
+            rotary_emb(
+                positions, q[..., : self.rope_dim], k[..., : self.rope_dim].unsqueeze(1)
+            )
+        else:
+            q_pe, q_nope = torch.split(
+                q, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1
+            )
+            # Fused wk + weights_proj: one GEMM, then split
+            kw, _ = self.wk_weights_proj(hidden_states)
+            k = kw[:, : self.head_dim]
+            weights = kw[:, self.head_dim :]
+
+            k = self.k_norm(k)
+            k_pe, k_nope = torch.split(
+                k, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1
+            )
+
+            q_pe, k_pe = rotary_emb(positions, q_pe, k_pe.unsqueeze(1))
+            # Note: RoPE (NeoX) can introduce extra leading dimensions during
+            # compilation so we need to reshape back to token-flattened shapes
+            q_pe = q_pe.reshape(-1, self.n_head, self.rope_dim)
+            k_pe = k_pe.reshape(-1, 1, self.rope_dim)
+
+            # `rotary_emb` is shape-preserving; `q_pe` is already
+            # [num_tokens, n_head, rope_dim].
+            q = torch.cat([q_pe, q_nope], dim=-1)
+            # `k_pe` is [num_tokens, 1, rope_dim] (MQA).
+            k = torch.cat([k_pe.squeeze(-2), k_nope], dim=-1)
 
         # we only quant q here since k quant is fused with cache insertion
         q = q.view(-1, self.head_dim)
@@ -723,7 +729,6 @@ def forward(
         q_fp8 = q_fp8.view(-1, self.n_head, self.head_dim)
         q_scale = q_scale.view(-1, self.n_head, 1)
 
-        weights, _ = self.weights_proj(hidden_states)
         weights = (
             weights.unsqueeze(-1) * q_scale * self.softmax_scale * self.n_head**-0.5
         )
@@ -732,6 +737,46 @@ def forward(
         return self.indexer_op(hidden_states, q_fp8, k, weights)
 
 
+def _try_load_fp8_indexer_wk(name, tensor, buf, params_dict, loaded_params):
+    """
+    We fuse the WK and weights_proj projections, but in some checkpoints WK is stored
+    in FP8 with a separate weight_scale_inv, while weights_proj is stored in BF16.
+    Upcasting to BF16 during loading enables the fusion. This function loads the FP8 WK
+    weights and scale, and when both are available, dequantizes to BF16 and stores into
+    the fused wk_weights_proj.weight parameter.
+    """
+    if "indexer.wk." not in name or "wk_weights" in name:
+        return False  # Weight is not an isolated WK weight for the indexer, ignore.
+    is_weight = name.endswith(".weight") and tensor.dtype == torch.float8_e4m3fn
+    is_scale = "weight_scale_inv" in name
+    if not is_weight and not is_scale:
+        return False  # WK is not in FP8 format, ignore.
+    # Buffer this tensor (weight or scale) until both have arrived.
+    layer_prefix = name.rsplit(".wk.", 1)[0]  # e.g. "model.layers.0.self_attn.indexer"
+    entry = buf.setdefault(layer_prefix, {})
+    entry["weight" if is_weight else "scale"] = tensor
+    if "weight" not in entry or "scale" not in entry:
+        return True  # still waiting for the other param
+
+    # We have both weight and scale: dequantize FP8 to BF16.
+    weight_fp8, scale_inv = entry["weight"], entry["scale"]
+    del buf[layer_prefix]
+    block_size = weight_fp8.shape[1] // scale_inv.shape[1]
+    weight_bf16 = scaled_dequantize(
+        weight_fp8,
+        scale_inv,
+        group_shape=GroupShape(block_size, block_size),
+        out_dtype=torch.bfloat16,
+    )
+
+    # Load the dequantized weight into shard 0 of the fused buffer.
+    fused_name = f"{layer_prefix}.wk_weights_proj.weight"
+    param = params_dict[fused_name]
+    param.weight_loader(param, weight_bf16, 0)
+    loaded_params.add(fused_name)
+    return True
+
+
 def _min_latency_fused_qkv_a_proj_impl(
     input_: torch.Tensor,
     weight: torch.Tensor,
@@ -940,6 +985,7 @@ def __init__(
 
         self.is_v32 = hasattr(config, "index_topk")
 
+        _skip_topk = False
         if self.is_v32:
             self.indexer_rope_emb = get_rope(
                 qk_rope_head_dim,
@@ -957,6 +1003,21 @@ def __init__(
                 topk_indices_buffer,
                 f"{prefix}.indexer",
             )
+
+            # Enable IndexCache for DeepSeek models to reduce redundant top-k
+            # token selection computations in sparse attention.
+            use_index_cache = getattr(config, "use_index_cache", False)
+            if use_index_cache:
+                # IndexCache config
+                # Refer: https://arxiv.org/abs/2603.12201 for more details.
+                _index_topk_freq = getattr(config, "index_topk_freq", 1)
+                _index_topk_pattern = getattr(config, "index_topk_pattern", None)
+                layer_id = extract_layer_index(prefix)
+                if _index_topk_pattern is None:
+                    _skip_topk = max(layer_id - 1, 0) % _index_topk_freq != 0
+                elif 0 <= layer_id < len(_index_topk_pattern):
+                    _skip_topk = _index_topk_pattern[layer_id] == "S"
+
         else:
             self.indexer_rope_emb = None
             self.indexer = None
@@ -994,6 +1055,7 @@ def __init__(
             cache_config,
             quant_config,
             prefix,
+            skip_topk=_skip_topk,
         )
 
     def forward(
@@ -1176,7 +1238,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.start_layer, self.end_layer, self.layers = make_layers(
             config.num_hidden_layers,
             lambda prefix: DeepseekV2DecoderLayer(
-                vllm_config, prefix, topk_indices_buffer=topk_indices_buffer
+                vllm_config,
+                prefix,
+                topk_indices_buffer=topk_indices_buffer,
             ),
             prefix=f"{prefix}.layers",
         )
@@ -1191,6 +1255,16 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
         self.aux_hidden_state_layers = tuple[int, ...]()
 
+        # Needed by load_weights
+        qk_nope_head_dim = getattr(config, "qk_nope_head_dim", 0)
+        qk_rope_head_dim = getattr(config, "qk_rope_head_dim", 0)
+        self.use_mha = config.model_type == "deepseek" or all(
+            dim == 0 for dim in (qk_nope_head_dim, qk_rope_head_dim)
+        )
+        self.num_redundant_experts = (
+            vllm_config.parallel_config.eplb_config.num_redundant_experts
+        )
+
     def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.embed_tokens(input_ids)
 
@@ -1252,174 +1326,6 @@ def forward(
             return hidden_states, aux_hidden_states
         return hidden_states
 
-
-class DeepseekV2MixtureOfExperts(MixtureOfExperts):
-    moe_mlp_layers: list[DeepseekV2MoE]
-    """
-    List of MoE MLP layers in the model.
-    """
-
-    def extract_moe_parameters(self, example_moe: DeepseekV2MoE | None):
-        if example_moe is None:
-            self.num_moe_layers = 0
-            self.num_expert_groups = 0
-            self.num_logical_experts = 0
-            self.num_physical_experts = 0
-            self.num_local_physical_experts = 0
-            self.num_routed_experts = 0
-            self.num_shared_experts = 0
-            self.num_redundant_experts = 0
-            logger.warning("DeepSeekV2: No DeepseekV2MoE layer found in model.layers.")
-        else:
-            self.num_logical_experts = example_moe.n_logical_experts
-            self.num_physical_experts = example_moe.n_physical_experts
-            self.num_local_physical_experts = example_moe.n_local_physical_experts
-            self.num_routed_experts = example_moe.n_routed_experts
-            self.num_shared_experts = example_moe.n_shared_experts
-            self.num_redundant_experts = example_moe.n_redundant_experts
-
-    def update_physical_experts_metadata(
-        self,
-        num_physical_experts: int,
-        num_local_physical_experts: int,
-    ) -> None:
-        assert self.num_local_physical_experts == num_local_physical_experts
-        self.num_physical_experts = num_physical_experts
-        self.num_local_physical_experts = num_local_physical_experts
-        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
-        for moe in self.moe_mlp_layers:
-            moe.n_local_physical_experts = num_local_physical_experts
-            moe.n_physical_experts = num_physical_experts
-            moe.n_redundant_experts = self.num_redundant_experts
-            moe.experts.update_expert_map()
-
-
-class DeepseekV2ForCausalLM(
-    nn.Module,
-    SupportsPP,
-    DeepseekV2MixtureOfExperts,
-    SupportsLoRA,
-    SupportsEagle,
-    SupportsEagle3,
-):
-    packed_modules_mapping = {
-        "gate_up_proj": ["gate_proj", "up_proj"],
-    }
-    model_cls = DeepseekV2Model
-
-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-        super().__init__()
-        config = vllm_config.model_config.hf_config
-        quant_config = vllm_config.quant_config
-        self.config = config
-        self.quant_config = quant_config
-
-        qk_nope_head_dim = getattr(config, "qk_nope_head_dim", 0)
-        qk_rope_head_dim = getattr(config, "qk_rope_head_dim", 0)
-        self.use_mha = config.model_type == "deepseek" or all(
-            dim == 0 for dim in (qk_nope_head_dim, qk_rope_head_dim)
-        )
-
-        if self.use_mha:
-            self.packed_modules_mapping["qkv_proj"] = ["q_proj", "k_proj", "v_proj"]
-
-        # `packed_modules_mapping` needs to be modified before
-        # initializing DeepseekV2Model, as it is passed inplace to
-        # quantization config init and may be used to select the
-        # quant_method for relevant layers during initialization.
-        self.fuse_qkv_a_proj = (
-            hasattr(config, "q_lora_rank") and config.q_lora_rank is not None
-        )
-        if self.fuse_qkv_a_proj:
-            self.packed_modules_mapping["fused_qkv_a_proj"] = [
-                "q_a_proj",
-                "kv_a_proj_with_mqa",
-            ]
-
-        self.model = self.model_cls(
-            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
-        )
-        if get_pp_group().is_last_rank:
-            self.lm_head = ParallelLMHead(
-                config.vocab_size,
-                config.hidden_size,
-                quant_config=quant_config,
-                prefix=maybe_prefix(prefix, "lm_head"),
-            )
-        else:
-            self.lm_head = PPMissingLayer()
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors
-        )
-        # Set MoE hyperparameters
-        self.num_moe_layers = (
-            self.config.num_hidden_layers - self.config.first_k_dense_replace
-        )
-        self.set_moe_parameters()
-
-    def set_moe_parameters(self):
-        self.expert_weights = []
-
-        self.num_expert_groups = getattr(self.config, "n_group", 1)
-
-        self.moe_layers = []
-        self.moe_mlp_layers = []
-        example_moe = None
-        for layer in self.model.layers:
-            if isinstance(layer, PPMissingLayer):
-                continue
-
-            assert isinstance(layer, DeepseekV2DecoderLayer)
-            if isinstance(layer.mlp, DeepseekV2MoE):
-                # Pick last one layer since the first ones may be dense layers.
-                example_moe = layer.mlp
-                self.moe_mlp_layers.append(layer.mlp)
-                self.moe_layers.append(layer.mlp.experts)
-
-        self.extract_moe_parameters(example_moe)
-
-    def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None:
-        self.model.aux_hidden_state_layers = layers
-
-    def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]:
-        num_layers = len(self.model.layers)
-        return (2, num_layers // 2, num_layers - 3)
-
-    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.embed_input_ids(input_ids)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor | None,
-        positions: torch.Tensor,
-        intermediate_tensors: IntermediateTensors | None = None,
-        inputs_embeds: torch.Tensor | None = None,
-    ) -> torch.Tensor | IntermediateTensors:
-        hidden_states = self.model(
-            input_ids, positions, intermediate_tensors, inputs_embeds
-        )
-        return hidden_states
-
-    def compute_logits(
-        self,
-        hidden_states: torch.Tensor,
-    ) -> torch.Tensor | None:
-        logits = self.logits_processor(self.lm_head, hidden_states)
-        return logits
-
-    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        # Params for weights, fp8 weight scales, fp8 activation scales
-        # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
-            self,
-            ckpt_gate_proj_name="gate_proj",
-            ckpt_down_proj_name="down_proj",
-            ckpt_up_proj_name="up_proj",
-            num_experts=self.config.n_routed_experts,
-            num_redundant_experts=0,
-        )
-
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         rocm_aiter_moe_shared_expert_enabled = (
             rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
@@ -1438,6 +1344,14 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("qkv_proj", "k_proj", "k"),
             ("qkv_proj", "v_proj", "v"),
         ]
+        # Fused indexer wk + weights_proj (shard 0 = wk, shard 1 = weights_proj)
+        _pending_wk_fp8: dict = {}  # When WK is in FP8, we dequant to BF16 for fusion
+        indexer_fused_mapping = [
+            ("wk_weights_proj", "wk", 0),
+            ("wk_weights_proj", "weights_proj", 1),
+        ]
+        stacked_params_mapping.extend(indexer_fused_mapping)
+
         if self.use_mha:
             stacked_params_mapping.extend(mha_params_mapping)
         else:
@@ -1445,7 +1359,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -1473,6 +1387,11 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                 rocm_aiter_moe_shared_expert_enabled and ("mlp.shared_experts" in name)
             )
 
+            if _try_load_fp8_indexer_wk(
+                name, loaded_weight, _pending_wk_fp8, params_dict, loaded_params
+            ):
+                continue
+
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 # Skip non-stacked layers and experts (experts handled below).
                 if weight_name not in name:
@@ -1627,6 +1546,178 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         return loaded_params
 
 
+class DeepseekV2MixtureOfExperts(MixtureOfExperts):
+    moe_mlp_layers: list[DeepseekV2MoE]
+    """
+    List of MoE MLP layers in the model.
+    """
+
+    def extract_moe_parameters(self, example_moe: DeepseekV2MoE | None):
+        if example_moe is None:
+            self.num_moe_layers = 0
+            self.num_expert_groups = 0
+            self.num_logical_experts = 0
+            self.num_physical_experts = 0
+            self.num_local_physical_experts = 0
+            self.num_routed_experts = 0
+            self.num_shared_experts = 0
+            self.num_redundant_experts = 0
+            logger.warning("DeepSeekV2: No DeepseekV2MoE layer found in model.layers.")
+        else:
+            self.num_logical_experts = example_moe.n_logical_experts
+            self.num_physical_experts = example_moe.n_physical_experts
+            self.num_local_physical_experts = example_moe.n_local_physical_experts
+            self.num_routed_experts = example_moe.n_routed_experts
+            self.num_shared_experts = example_moe.n_shared_experts
+            self.num_redundant_experts = example_moe.n_redundant_experts
+
+    def update_physical_experts_metadata(
+        self,
+        num_physical_experts: int,
+        num_local_physical_experts: int,
+    ) -> None:
+        assert self.num_local_physical_experts == num_local_physical_experts
+        self.num_physical_experts = num_physical_experts
+        self.num_local_physical_experts = num_local_physical_experts
+        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
+        for moe in self.moe_mlp_layers:
+            moe.n_local_physical_experts = num_local_physical_experts
+            moe.n_physical_experts = num_physical_experts
+            moe.n_redundant_experts = self.num_redundant_experts
+            moe.experts.update_expert_map()
+
+
+class DeepseekV2ForCausalLM(
+    nn.Module,
+    SupportsPP,
+    DeepseekV2MixtureOfExperts,
+    SupportsLoRA,
+    SupportsEagle,
+    SupportsEagle3,
+):
+    packed_modules_mapping = {
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+    model_cls = DeepseekV2Model
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        qk_nope_head_dim = getattr(config, "qk_nope_head_dim", 0)
+        qk_rope_head_dim = getattr(config, "qk_rope_head_dim", 0)
+        self.use_mha = config.model_type == "deepseek" or all(
+            dim == 0 for dim in (qk_nope_head_dim, qk_rope_head_dim)
+        )
+
+        if self.use_mha:
+            self.packed_modules_mapping["qkv_proj"] = ["q_proj", "k_proj", "v_proj"]
+
+        # `packed_modules_mapping` needs to be modified before
+        # initializing DeepseekV2Model, as it is passed inplace to
+        # quantization config init and may be used to select the
+        # quant_method for relevant layers during initialization.
+        self.fuse_qkv_a_proj = (
+            hasattr(config, "q_lora_rank") and config.q_lora_rank is not None
+        )
+        if self.fuse_qkv_a_proj:
+            self.packed_modules_mapping["fused_qkv_a_proj"] = [
+                "q_a_proj",
+                "kv_a_proj_with_mqa",
+            ]
+
+        self.model = self.model_cls(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+        # Set MoE hyperparameters
+        self.num_moe_layers = (
+            self.config.num_hidden_layers - self.config.first_k_dense_replace
+        )
+        self.set_moe_parameters()
+
+    def set_moe_parameters(self):
+        self.expert_weights = []
+
+        self.num_expert_groups = getattr(self.config, "n_group", 1)
+
+        self.moe_layers = []
+        self.moe_mlp_layers = []
+        example_moe = None
+        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
+            assert isinstance(layer, DeepseekV2DecoderLayer)
+            if isinstance(layer.mlp, DeepseekV2MoE):
+                # Pick last one layer since the first ones may be dense layers.
+                example_moe = layer.mlp
+                self.moe_mlp_layers.append(layer.mlp)
+                self.moe_layers.append(layer.mlp.experts)
+
+        self.extract_moe_parameters(example_moe)
+
+    def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None:
+        self.model.aux_hidden_state_layers = layers
+
+    def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]:
+        num_layers = len(self.model.layers)
+        return (2, num_layers // 2, num_layers - 3)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return fused_moe_make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.n_routed_experts,
+            num_redundant_experts=0,
+        )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
+
+
 class DeepseekForCausalLM(DeepseekV2ForCausalLM):
     pass
 
@@ -1650,6 +1741,8 @@ def get_spec_layer_idx_from_weight_name(
     ):
         layer_idx = config.num_hidden_layers
         for i in range(config.num_nextn_predict_layers):
-            if weight_name.startswith(f"model.layers.{layer_idx + i}."):
+            if weight_name.startswith(
+                f"model.layers.{layer_idx + i}."
+            ) or weight_name.startswith(f"layers.{layer_idx + i}."):
                 return layer_idx + i
     return None
diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py
index 4e393145462a..f58fc4da92b2 100644
--- a/vllm/model_executor/models/dots1.py
+++ b/vllm/model_executor/models/dots1.py
@@ -37,11 +37,13 @@
 from vllm.distributed import (
     get_pp_group,
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -120,7 +122,6 @@ def __init__(
         prefix: str = "",
     ):
         super().__init__()
-        self.tp_size = get_tensor_model_parallel_world_size()
         self.routed_scaling_factor = config.routed_scaling_factor
         self.n_shared_experts = config.n_shared_experts
 
@@ -157,13 +158,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -171,9 +171,9 @@ def __init__(
             topk_group=config.topk_group,
             prefix=f"{prefix}.experts",
             scoring_func=config.scoring_func,
-            # we do scaling outside, set factor to 1.0 to avoid double mul
-            routed_scaling_factor=1.0,
             e_score_correction_bias=self.gate.e_score_correction_bias,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -182,16 +182,9 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
         router_logits, _ = self.gate(hidden_states)
 
-        shared_out, routed_out = self.experts(
+        final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-        if self.shared_experts is not None:
-            final_hidden_states = (routed_out + shared_out) * self.routed_scaling_factor
-        else:
-            final_hidden_states = routed_out * self.routed_scaling_factor
-
-        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
         return final_hidden_states.view(num_tokens, hidden_dim)
 
 
@@ -423,7 +416,7 @@ def forward(
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py
index f038cfb21f28..a2b0eccde65f 100644
--- a/vllm/model_executor/models/ernie45_moe.py
+++ b/vllm/model_executor/models/ernie45_moe.py
@@ -42,7 +42,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -188,13 +191,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.moe_num_experts,
             top_k=config.moe_k,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=True,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -215,16 +217,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             hidden_states=hidden_states, router_logits=router_logits
         )
 
-        if self.has_shared_experts:
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
-        else:
-            final_hidden_states = final_hidden_states[1]
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
-
         return final_hidden_states.view(orig_shape)
 
 
@@ -496,7 +488,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -678,7 +670,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.num_moe_layers = len(moe_layers_indices)
         self.num_expert_groups = 1
 
-        self.moe_layers: list[SharedFusedMoE] = []
+        self.moe_layers: list[FusedMoE] = []
         example_moe = None
         for layer in self.model.layers:
             if isinstance(layer, PPMissingLayer):
diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py
index 08a4c4862ed6..e7e71037cee9 100644
--- a/vllm/model_executor/models/ernie45_vl.py
+++ b/vllm/model_executor/models/ernie45_vl.py
@@ -23,9 +23,8 @@
 # limitations under the License.
 """Inference-only Ernie VL model compatible with HuggingFace weights."""
 
-import itertools
 import math
-from collections.abc import Callable, Iterable, Mapping, Sequence
+from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
 from functools import partial
 from typing import Annotated, Any, Literal
 
@@ -1401,131 +1400,62 @@ def get_mrope_input_positions(
         input_tokens: list[int],
         mm_features: list[MultiModalFeatureSpec],
     ) -> tuple[torch.Tensor, int]:
-        kwargs = MultiModalFeatureSpec.gather_kwargs(
-            mm_features,
-            {"image_grid_thw", "video_grid_thw"},
-        )
-        image_grid_thw = [item.tolist() for item in kwargs.get("image_grid_thw", [])]
-        video_grid_thw = [item.tolist() for item in kwargs.get("video_grid_thw", [])]
-
-        hf_config = self.config
-        image_token_id = hf_config.im_patch_id
-        video_start_token_id = hf_config.video_start_token_id
-        video_end_token_id = hf_config.video_end_token_id
-        spatial_conv_size = hf_config.spatial_conv_size
-        temporal_conv_size = hf_config.temporal_conv_size
         llm_pos_ids_list: list = []
+        st = 0
+
+        for (
+            offset,
+            llm_grid_t,
+            llm_grid_h,
+            llm_grid_w,
+        ) in self.iter_mm_grid_thw(mm_features):
+            text_len = offset - st
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
+            llm_pos_ids_list.append(
+                np.broadcast_to(np.arange(text_len), (3, text_len)) + st_idx
+            )
 
-        if image_grid_thw or video_grid_thw:
-            input_token_type: list[str] = []
-            video_check_flg = False
-            for token in input_tokens:
-                if token == video_start_token_id:
-                    video_check_flg = True
-                elif token == video_end_token_id:
-                    video_check_flg = False
-
-                if (token == image_token_id) and (video_check_flg is False):
-                    input_token_type.append("image")
-                elif (token == image_token_id) and (video_check_flg is True):
-                    input_token_type.append("video")
-                else:
-                    input_token_type.append("text")
-
-            input_type_group: list[tuple[str, int, int]] = []
-            for key, group_iter in itertools.groupby(
-                enumerate(input_token_type), lambda x: x[1]
-            ):
-                group_list = list(group_iter)
-                start_index = group_list[0][0]
-                end_index = group_list[-1][0] + 1
-                input_type_group.append((key, start_index, end_index))
-
-            video_frame_num = 1
-            mm_data_idx = 0
-            for modality_type, start_idx, end_idx in input_type_group:
-                st_idx = (
-                    llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
-                )
-                if modality_type == "image":
-                    t, h, w = image_grid_thw[mm_data_idx]
-                    llm_grid_t, llm_grid_h, llm_grid_w = (
-                        t,
-                        h // spatial_conv_size,
-                        w // spatial_conv_size,
-                    )
-
-                    t_index = (
-                        torch.arange(llm_grid_t)
-                        .view(-1, 1)
-                        .expand(-1, llm_grid_h * llm_grid_w)
-                        .flatten()
-                    )
-                    h_index = (
-                        torch.arange(llm_grid_h)
-                        .view(1, -1, 1)
-                        .expand(llm_grid_t, -1, llm_grid_w)
-                        .flatten()
-                    )
-                    w_index = (
-                        torch.arange(llm_grid_w)
-                        .view(1, 1, -1)
-                        .expand(llm_grid_t, llm_grid_h, -1)
-                        .flatten()
-                    )
-                    llm_pos_ids_list.append(
-                        torch.stack([t_index, h_index, w_index]) + st_idx
-                    )
-                    mm_data_idx += 1
-
-                elif modality_type == "video":
-                    t, h, w = video_grid_thw[mm_data_idx]
-                    llm_grid_t, llm_grid_h, llm_grid_w = (
-                        t // temporal_conv_size,
-                        h // spatial_conv_size,
-                        w // spatial_conv_size,
-                    )
-
-                    for t_idx in range(llm_grid_t):
-                        t_index = (
-                            torch.tensor(t_idx)
-                            .view(-1, 1)
-                            .expand(-1, llm_grid_h * llm_grid_w)
-                            .flatten()
-                        )
-                        h_index = (
-                            torch.arange(llm_grid_h)
-                            .view(1, -1, 1)
-                            .expand(1, -1, llm_grid_w)
-                            .flatten()
-                        )
-                        w_index = (
-                            torch.arange(llm_grid_w)
-                            .view(1, 1, -1)
-                            .expand(1, llm_grid_h, -1)
-                            .flatten()
-                        )
-                        llm_pos_ids_list.append(
-                            torch.stack([t_index, h_index, w_index]) + st_idx
-                        )
-
-                    mm_data_idx += 1
-                    video_frame_num += 1
-
-                else:
-                    text_len = end_idx - start_idx
-                    llm_pos_ids_list.append(
-                        torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx
-                    )
-                    video_frame_num = 1
-
-        else:
-            text_len = len(input_tokens)
-            llm_pos_ids_list.append(torch.arange(text_len).view(1, -1).expand(3, -1))
+            grid_indices = np.indices((llm_grid_t, llm_grid_h, llm_grid_w)).reshape(
+                3, -1
+            )
+            llm_pos_ids_list.append(grid_indices + text_len + st_idx)
+            st = offset + llm_grid_t * llm_grid_h * llm_grid_w
+
+        if st < len(input_tokens):
+            text_len = len(input_tokens) - st
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
+            llm_pos_ids_list.append(
+                np.broadcast_to(np.arange(text_len), (3, text_len)) + st_idx
+            )
 
-        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
+        llm_positions = np.concatenate(llm_pos_ids_list, axis=1).reshape(3, -1)
         mrope_position_delta = (llm_positions.max() + 1 - len(input_tokens)).item()
-        return llm_positions, mrope_position_delta
+        return torch.from_numpy(llm_positions), mrope_position_delta
+
+    def iter_mm_grid_thw(
+        self, mm_features: list[MultiModalFeatureSpec]
+    ) -> Iterator[tuple[int, int, int, int]]:
+        spatial_conv_size = self.config.spatial_conv_size
+        temporal_conv_size = self.config.temporal_conv_size
+
+        for mm_feature in sorted(mm_features, key=lambda f: f.mm_position.offset):
+            if mm_feature.data is None:
+                raise ValueError("M-RoPE calculation requires multimodal feature data")
+
+            offset = mm_feature.mm_position.offset
+            if mm_feature.modality == "image":
+                t, h, w = mm_feature.data["image_grid_thw"].data.tolist()
+                yield offset, t, h // spatial_conv_size, w // spatial_conv_size
+            elif mm_feature.modality == "video":
+                t, h, w = mm_feature.data["video_grid_thw"].data.tolist()
+                yield (
+                    offset,
+                    t // temporal_conv_size,
+                    h // spatial_conv_size,
+                    w // spatial_conv_size,
+                )
+            else:
+                raise ValueError(f"Unsupported modality: {mm_feature.modality}")
 
     def _parse_and_validate_image_input(
         self, **kwargs: object
diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py
index 418fdcfa072b..38ed756ba415 100644
--- a/vllm/model_executor/models/ernie45_vl_moe.py
+++ b/vllm/model_executor/models/ernie45_vl_moe.py
@@ -36,7 +36,10 @@
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -257,13 +260,12 @@ def __init__(
                 prefix=f"{prefix}.text_experts_gate",
             )
 
-            self.text_experts = SharedFusedMoE(
+            self.text_experts = FusedMoE(
                 shared_experts=self.shared_experts,
                 num_experts=config.moe_num_experts[0],
                 top_k=config.moe_k,
                 hidden_size=config.hidden_size,
                 intermediate_size=config.moe_intermediate_size[0],
-                reduce_results=False,
                 renormalize=True,
                 quant_config=quant_config,
                 e_score_correction_bias=self.e_score_correction_bias[0],
@@ -295,13 +297,12 @@ def __init__(
                 prefix=f"{prefix}.vision_experts_gate",
             )
 
-            self.vision_experts = SharedFusedMoE(
+            self.vision_experts = FusedMoE(
                 shared_experts=self.shared_experts,
                 num_experts=config.moe_num_experts[1],
                 top_k=config.moe_k,
                 hidden_size=config.hidden_size,
                 intermediate_size=config.moe_intermediate_size[1],
-                reduce_results=False,
                 renormalize=True,
                 quant_config=quant_config,
                 e_score_correction_bias=self.e_score_correction_bias[1],
@@ -342,9 +343,6 @@ def forward(
             visual_token_mask = visual_token_mask.repeat(1, self.hidden_size).bool()
             text_token_mask = ~visual_token_mask
             final_experts_hidden_states = torch.zeros_like(hidden_states)
-            final_shared_output = (
-                torch.zeros_like(hidden_states) if self.has_shared_experts else None
-            )
 
             text_hidden_states = hidden_states[text_token_mask].reshape(
                 -1, self.hidden_size
@@ -356,26 +354,20 @@ def forward(
             text_router_logits, _ = self.text_experts_gate(
                 text_hidden_states.to(dtype=torch.float32)
             )
-            text_shared_output, text_experts_output = self.text_experts(
+            text_output = self.text_experts(
                 hidden_states=text_hidden_states, router_logits=text_router_logits
             )
-            final_experts_hidden_states[text_token_mask] = text_experts_output.flatten()
-            if self.has_shared_experts:
-                final_shared_output[text_token_mask] = text_shared_output.flatten()
+            final_experts_hidden_states[text_token_mask] = text_output.flatten()
 
             vision_router_logits, _ = self.vision_experts_gate(
                 vision_hidden_states.to(dtype=torch.float32)
             )
-            vision_shared_output, vision_experts_output = self.vision_experts(
+            vision_output = self.vision_experts(
                 hidden_states=vision_hidden_states, router_logits=vision_router_logits
             )
-            final_experts_hidden_states[visual_token_mask] = (
-                vision_experts_output.flatten()
-            )
-            if self.has_shared_experts:
-                final_shared_output[visual_token_mask] = vision_shared_output.flatten()
+            final_experts_hidden_states[visual_token_mask] = vision_output.flatten()
 
-            final_hidden_states = (final_shared_output, final_experts_hidden_states)
+            final_hidden_states = final_experts_hidden_states
         else:
             # only text modal input
             text_router_logits, _ = self.text_experts_gate(
@@ -386,20 +378,6 @@ def forward(
                 hidden_states=hidden_states, router_logits=text_router_logits
             )
 
-        if self.has_shared_experts:
-            # for shared_experts model
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
-        else:
-            # for not shared_experts model
-            final_hidden_states = final_hidden_states[1]
-
-        if self.tp_size > 1:
-            final_hidden_states = (
-                self.text_experts.maybe_all_reduce_tensor_model_parallel(
-                    final_hidden_states
-                )
-            )
-
         return final_hidden_states.view(orig_shape)
 
 
@@ -674,7 +652,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py
index 485b145b9cdf..04708de93d39 100644
--- a/vllm/model_executor/models/exaone4.py
+++ b/vllm/model_executor/models/exaone4.py
@@ -75,6 +75,7 @@ def __init__(
         reduce_results: bool = True,
         bias: bool = False,
         prefix: str = "",
+        use_data_parallel: bool = False,
     ) -> None:
         super().__init__()
         self.gate_up_proj = MergedColumnParallelLinear(
@@ -83,6 +84,7 @@ def __init__(
             bias=bias,
             quant_config=quant_config,
             prefix=f"{prefix}.gate_up_proj",
+            disable_tp=use_data_parallel,
         )
         self.down_proj = RowParallelLinear(
             input_size=intermediate_size,
@@ -91,6 +93,7 @@ def __init__(
             quant_config=quant_config,
             reduce_results=reduce_results,
             prefix=f"{prefix}.down_proj",
+            disable_tp=use_data_parallel,
         )
         if hidden_act != "silu":
             raise ValueError(
diff --git a/vllm/model_executor/models/exaone4_5.py b/vllm/model_executor/models/exaone4_5.py
new file mode 100644
index 000000000000..b44708466cf5
--- /dev/null
+++ b/vllm/model_executor/models/exaone4_5.py
@@ -0,0 +1,362 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Inference-only EXAONE-4.5 model compatible with HuggingFace weights."""
+
+from collections.abc import Callable, Iterable
+from functools import partial
+
+import einops
+import torch
+import torch.nn as nn
+from transformers.models.exaone4_5 import (
+    Exaone4_5_Config,
+    Exaone4_5_Processor,
+)
+from transformers.models.exaone4_5.configuration_exaone4_5 import Exaone4_5_VisionConfig
+
+from vllm.compilation.decorators import (
+    should_torch_compile_mm_encoder,
+    support_torch_compile,
+)
+from vllm.config import VllmConfig
+from vllm.distributed import parallel_state
+from vllm.distributed import utils as dist_utils
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention.mm_encoder_attention import MMEncoderAttention
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import QKVParallelLinear, RowParallelLinear
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding.common import (
+    ApplyRotaryEmb,
+)
+from vllm.model_executor.models.exaone4 import Exaone4GatedMLP as Exaone4_5_VisionMLP
+from vllm.model_executor.models.qwen2_5_vl import (
+    Qwen2_5_VisionTransformer,
+    Qwen2_5_VLForConditionalGeneration,
+    Qwen2VLProcessingInfo,
+)
+from vllm.multimodal import MULTIMODAL_REGISTRY
+
+from .qwen2_vl import Qwen2VLDummyInputsBuilder as Exaone4_5_DummyInputsBuilder
+from .qwen2_vl import Qwen2VLMultiModalProcessor as Exaone4_5_MultiModalProcessor
+from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
+
+logger = init_logger(__name__)
+
+
+# === Vision Encoder === #
+
+
+class EXAONE4_5_VisionAttention(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        num_heads: int,
+        num_kv_heads: int,
+        projection_size: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        use_data_parallel: bool = False,
+    ) -> None:
+        super().__init__()
+        # Per attention head and per partition values.
+        self.tp_size = (
+            1
+            if use_data_parallel
+            else parallel_state.get_tensor_model_parallel_world_size()
+        )
+        self.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+        self.hidden_size_per_attention_head = dist_utils.divide(
+            projection_size, num_heads
+        )
+        self.num_attention_heads_per_partition = dist_utils.divide(
+            num_heads, self.tp_size
+        )
+
+        self.total_num_heads = num_heads
+        self.total_num_kv_heads = num_kv_heads
+        self.num_heads = num_heads // self.tp_size
+        self.num_kv_heads = max(1, num_kv_heads // self.tp_size)
+
+        self.head_dim = embed_dim // num_heads
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+
+        self.qkv = QKVParallelLinear(
+            hidden_size=embed_dim,
+            head_size=self.hidden_size_per_attention_head,
+            total_num_heads=self.total_num_heads,
+            total_num_kv_heads=self.total_num_kv_heads,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv",
+            disable_tp=use_data_parallel,
+        )
+
+        self.proj = RowParallelLinear(
+            input_size=projection_size,
+            output_size=embed_dim,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.proj",
+            disable_tp=use_data_parallel,
+        )
+
+        self.attn = MMEncoderAttention(
+            num_heads=self.num_attention_heads_per_partition,
+            head_size=self.hidden_size_per_attention_head,
+            num_kv_heads=self.num_kv_heads,
+            scale=self.hidden_size_per_attention_head**-0.5,
+            prefix=f"{prefix}.attn",
+        )
+
+        self.apply_rotary_emb = ApplyRotaryEmb(enforce_enable=True)
+
+    def split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]:
+        # qkv: [s, b, (h + 2*hk) * d]
+        s, b, _ = qkv.shape
+        h = self.num_heads
+        hk = self.num_kv_heads
+        d = self.head_dim
+
+        qkv = qkv.view(s, b, h + 2 * hk, d)
+
+        q = qkv[:, :, :h, :]
+        k = qkv[:, :, h : h + hk, :]
+        v = qkv[:, :, h + hk :, :]
+
+        # [s, b, h, d] -> [b, s, h, d]
+        return (
+            q.permute(1, 0, 2, 3).contiguous(),
+            k.permute(1, 0, 2, 3).contiguous(),
+            v.permute(1, 0, 2, 3).contiguous(),
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+        max_seqlen: int | None = None,
+    ) -> torch.Tensor:
+        # [s, b, c] --> [s, b, head * 3 * head_dim]
+        x, _ = self.qkv(x)
+        seq_len, batch_size, _ = x.shape
+
+        q, k, v = self.split_qkv(x)
+        q = self.apply_rotary_emb(
+            q,
+            rotary_pos_emb_cos,
+            rotary_pos_emb_sin,
+        )
+
+        k = self.apply_rotary_emb(
+            k,
+            rotary_pos_emb_cos,
+            rotary_pos_emb_sin,
+        )
+
+        context_layer = self.attn(
+            query=q,
+            key=k,
+            value=v,
+            cu_seqlens=cu_seqlens,
+            max_seqlen=max_seqlen,
+        )
+
+        context_layer = einops.rearrange(
+            context_layer, "b s h d -> s b (h d)", b=batch_size
+        ).contiguous()
+
+        output, _ = self.proj(context_layer)
+        return output
+
+
+@support_torch_compile(
+    dynamic_arg_dims={
+        "x": 0,
+        "cu_seqlens": 0,
+        "rotary_pos_emb_cos": 0,
+        "rotary_pos_emb_sin": 0,
+    },
+    enable_if=should_torch_compile_mm_encoder,
+    is_encoder=True,
+)
+class Exaone4_5_VisionBlock(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        num_kv_heads: int,
+        mlp_hidden_dim: int,
+        hidden_act: str = "silu",
+        norm_layer: Callable[[int], nn.Module] | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        use_data_parallel: bool = False,
+    ) -> None:
+        super().__init__()
+        if norm_layer is None:
+            norm_layer = partial(nn.LayerNorm, eps=1e-6)
+        self.norm1 = norm_layer(dim)
+        self.norm2 = norm_layer(dim)
+        self.attn = EXAONE4_5_VisionAttention(
+            embed_dim=dim,
+            num_heads=num_heads,
+            num_kv_heads=num_kv_heads,
+            projection_size=dim,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+            use_data_parallel=use_data_parallel,
+        )
+        self.mlp = Exaone4_5_VisionMLP(
+            dim,
+            mlp_hidden_dim,
+            hidden_act=hidden_act,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+            use_data_parallel=use_data_parallel,
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+        max_seqlen: int | None = None,  # Only used for Flash Attention
+        seqlens: list[int] | None = None,  # Only used for xFormers
+    ) -> torch.Tensor:
+        x_attn = self.attn(
+            self.norm1(x),
+            cu_seqlens=cu_seqlens,
+            rotary_pos_emb_cos=rotary_pos_emb_cos,
+            rotary_pos_emb_sin=rotary_pos_emb_sin,
+            max_seqlen=max_seqlen,
+        )
+        x_fused_norm, residual = self.norm2(x, residual=x_attn)
+        x = residual + self.mlp(x_fused_norm)
+        return x
+
+
+class EXAONE4_5_VisionTransformer(Qwen2_5_VisionTransformer):
+    def __init__(
+        self,
+        vision_config: Exaone4_5_VisionConfig,
+        norm_eps: float = 1e-6,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        use_data_parallel: bool = False,
+    ) -> None:
+        super().__init__(
+            vision_config=vision_config,
+            norm_eps=norm_eps,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+        depth = vision_config.depth
+        self.num_kv_heads = vision_config.num_key_value_heads
+
+        norm_layer = partial(RMSNorm, eps=norm_eps)
+
+        self.blocks = nn.ModuleList(
+            [
+                Exaone4_5_VisionBlock(
+                    dim=self.hidden_size,
+                    num_heads=self.num_heads,
+                    num_kv_heads=self.num_kv_heads,
+                    mlp_hidden_dim=vision_config.intermediate_size,
+                    hidden_act=vision_config.hidden_act,
+                    norm_layer=norm_layer,
+                    quant_config=quant_config,
+                    prefix=f"{prefix}.blocks.{layer_idx}",
+                    use_data_parallel=use_data_parallel,
+                )
+                for layer_idx in range(depth)
+            ]
+        )
+
+
+class Exaone4_5_ProcessingInfo(Qwen2VLProcessingInfo):
+    def get_hf_config(self):
+        return self.ctx.get_hf_config(Exaone4_5_Config)
+
+    def get_hf_processor(self, **kwargs: object) -> Exaone4_5_Processor:
+        return self.ctx.get_hf_processor(
+            Exaone4_5_Processor,
+            use_fast=kwargs.pop("use_fast", True),
+            **kwargs,
+        )
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Exaone4_5_MultiModalProcessor,
+    info=Exaone4_5_ProcessingInfo,
+    dummy_inputs=Exaone4_5_DummyInputsBuilder,
+)
+class Exaone4_5_ForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        nn.Module.__init__(self)
+
+        config: Exaone4_5_Config = vllm_config.model_config.hf_config
+        self.vllm_config = vllm_config
+        multimodal_config = vllm_config.model_config.multimodal_config
+
+        self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
+        self.config = config
+        self.multimodal_config = multimodal_config
+        self.is_multimodal_pruning_enabled = (
+            multimodal_config.is_multimodal_pruning_enabled()
+        )
+
+        with self._mark_tower_model(vllm_config, {"image", "video"}):
+            self.visual = EXAONE4_5_VisionTransformer(
+                config.vision_config,
+                norm_eps=getattr(config, "rms_norm_eps", 1e-6),
+                quant_config=self.quant_config,
+                prefix=maybe_prefix(prefix, "visual"),
+                use_data_parallel=self.use_data_parallel,
+            )
+
+        with self._mark_language_model(vllm_config):
+            self.language_model = init_vllm_registered_model(
+                vllm_config=vllm_config,
+                prefix=maybe_prefix(prefix, "language_model"),
+                hf_config=config.get_text_config(),
+                architectures=["Exaone4ForCausalLM"],
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["mtp."]),
+        )
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return "<vision><|image_pad|></vision>"
+        if modality.startswith("video"):
+            return "<vision><|video_pad|></vision>"
+
+        raise ValueError("Only image or video modality is supported")
diff --git a/vllm/model_executor/models/exaone4_5_mtp.py b/vllm/model_executor/models/exaone4_5_mtp.py
new file mode 100644
index 000000000000..7711f72e42ca
--- /dev/null
+++ b/vllm/model_executor/models/exaone4_5_mtp.py
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only EXAONE-4_5 MTP model."""
+
+from collections.abc import Iterable
+
+import torch
+from torch import nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.distributed.parallel_state import get_pp_group
+from vllm.logger import init_logger
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import ColumnParallelLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.models.exaone4 import Exaone4DecoderLayer
+from vllm.model_executor.models.exaone_moe_mtp import (
+    ExaoneMoeMTP,
+    ExaoneMoeMultiTokenPredictor,
+)
+from vllm.sequence import IntermediateTensors
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    _require_is_multimodal,
+)
+from .utils import (
+    AutoWeightsLoader,
+    _merge_multimodal_embeddings,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+KVCache = tuple[torch.Tensor, torch.Tensor]
+
+
+@support_torch_compile
+class Exaone4_5MultiTokenPredictor(ExaoneMoeMultiTokenPredictor):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        nn.Module.__init__(self)
+
+        model_config = vllm_config.model_config
+        quant_config = vllm_config.quant_config
+        lora_config = vllm_config.lora_config
+        config = model_config.hf_config
+        text_config = config.text_config
+
+        self.config = config
+        lora_vocab = (
+            (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1))
+            if lora_config
+            else 0
+        )
+        self.vocab_size = config.vocab_size + lora_vocab
+        self.org_vocab_size = config.vocab_size
+
+        self.mtp_start_layer_idx = text_config.num_hidden_layers
+        self.num_mtp_layers = getattr(config, "num_nextn_predict_layers", 1)
+
+        self.embed_tokens = VocabParallelEmbedding(
+            self.vocab_size,
+            text_config.hidden_size,
+            org_num_embeddings=config.vocab_size,
+        )
+
+        self.fc = ColumnParallelLinear(
+            text_config.hidden_size * 2,
+            text_config.hidden_size,
+            gather_output=True,
+            bias=False,
+            return_bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc",
+        )
+        self.layers = nn.ModuleList(
+            Exaone4DecoderLayer(
+                text_config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.layers.{idx}",
+            )
+            for idx in range(self.num_mtp_layers)
+        )
+
+        self.norm = RMSNorm(text_config.hidden_size, eps=text_config.rms_norm_eps)
+        self.pre_fc_norm_hidden = RMSNorm(
+            text_config.hidden_size, eps=text_config.rms_norm_eps
+        )
+        self.pre_fc_norm_embedding = RMSNorm(
+            text_config.hidden_size, eps=text_config.rms_norm_eps
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is None:
+                inputs_embeds = self.get_input_embeddings(input_ids)
+            assert hidden_states.shape[-1] == inputs_embeds.shape[-1]
+            inputs_embeds = self.pre_fc_norm_embedding(inputs_embeds)
+            hidden_states = self.pre_fc_norm_hidden(hidden_states)
+            hidden_states = torch.cat([inputs_embeds, hidden_states], dim=-1)
+            hidden_states = self.fc(hidden_states)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        hidden_states, residual = self.layers[current_step_idx](
+            positions=positions,
+            hidden_states=hidden_states,
+            residual=residual,
+        )
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+
+@support_torch_compile
+class Exaone4_5_MTP(ExaoneMoeMTP, SupportsMultiModal):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        config = vllm_config.model_config.hf_config
+        text_config = config.text_config
+        self.vllm_config = vllm_config
+        self.quant_config = vllm_config.quant_config
+
+        nn.Module.__init__(self)
+        self.config = config
+        self.model = Exaone4_5MultiTokenPredictor(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "mtp")
+        )
+        self.unpadded_vocab_size = config.vocab_size
+        self.lm_head = ParallelLMHead(
+            self.unpadded_vocab_size,
+            text_config.hidden_size,
+            org_num_embeddings=config.vocab_size,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if config.tie_word_embeddings:
+            self.lm_head.weight = self.model.embed_tokens.weight
+        self.logits_processor = LogitsProcessor(
+            self.unpadded_vocab_size, config.vocab_size
+        )
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        inputs_embeds = self._embed_text_input_ids(
+            input_ids,
+            self.model.embed_input_ids,
+            is_multimodal=is_multimodal,
+        )
+
+        if multimodal_embeddings is None or len(multimodal_embeddings) == 0:
+            return inputs_embeds
+
+        is_multimodal = _require_is_multimodal(is_multimodal)
+
+        inputs_embeds = _merge_multimodal_embeddings(
+            inputs_embeds=inputs_embeds,
+            multimodal_embeddings=multimodal_embeddings,
+            is_multimodal=is_multimodal,
+        )
+
+        return inputs_embeds
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        shared_weight_names = ["embed_tokens", "lm_head"]
+
+        def remap_weight_names(weights):
+            for name, weight in weights:
+                if name.startswith("mtp."):
+                    name = name.replace("mtp.", "model.")
+                elif any(key in name for key in shared_weight_names):
+                    if "embed_tokens" in name:
+                        name = name.replace("language_model.", "")
+                else:
+                    continue
+                yield name, weight
+
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(remap_weight_names(weights))
diff --git a/vllm/model_executor/models/exaone_moe.py b/vllm/model_executor/models/exaone_moe.py
index d7282edcf4f6..80b7e0957e82 100644
--- a/vllm/model_executor/models/exaone_moe.py
+++ b/vllm/model_executor/models/exaone_moe.py
@@ -30,7 +30,10 @@
     get_pp_group,
     get_tensor_model_parallel_world_size,
 )
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import ReplicatedLinear
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -116,12 +119,26 @@ def __init__(
             self.physical_expert_start + self.n_local_physical_experts
         )
 
+        if getattr(config, "num_shared_experts", 0) > 0:
+            intermediate_size = config.moe_intermediate_size * config.num_shared_experts
+            self.shared_experts = ExaoneMoeGatedMLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=intermediate_size,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                reduce_results=False,
+                prefix=f"{prefix}.shared_experts",
+            )
+        else:
+            self.shared_experts = None
+
         self.experts = FusedMoE(
+            shared_experts=self.shared_experts,
+            gate=self.gate,
             num_experts=self.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -135,41 +152,16 @@ def __init__(
             num_redundant_experts=self.n_redundant_experts,
         )
 
-        if getattr(config, "num_shared_experts", 0) > 0:
-            intermediate_size = config.moe_intermediate_size * config.num_shared_experts
-            self.shared_experts = ExaoneMoeGatedMLP(
-                hidden_size=config.hidden_size,
-                intermediate_size=intermediate_size,
-                hidden_act=config.hidden_act,
-                quant_config=quant_config,
-                reduce_results=self.experts.must_reduce_shared_expert_outputs(),
-                prefix=f"{prefix}.shared_experts",
-            )
-        else:
-            self.shared_experts = None
-
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         # NOTE: hidden_states can have either 1D or 2D shape.
         orig_shape = hidden_states.shape
         hidden_dim = hidden_states.shape[-1]
         hidden_states = hidden_states.view(-1, hidden_dim)
 
-        # router_logits: (num_tokens, n_experts)
-        router_logits, _ = self.gate(hidden_states)
-
         final_hidden_states = self.experts(
-            hidden_states=hidden_states, router_logits=router_logits
+            hidden_states=hidden_states, router_logits=hidden_states
         )
 
-        if self.shared_experts is not None:
-            shared_output = self.shared_experts(hidden_states)
-            final_hidden_states = final_hidden_states + shared_output
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(  # noqa E501
-                final_hidden_states
-            )
-
         return final_hidden_states.view(orig_shape)
 
 
@@ -337,7 +329,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/exaone_moe_mtp.py b/vllm/model_executor/models/exaone_moe_mtp.py
index b3c71e6aef6e..b3f8552aac58 100644
--- a/vllm/model_executor/models/exaone_moe_mtp.py
+++ b/vllm/model_executor/models/exaone_moe_mtp.py
@@ -184,11 +184,6 @@ class ExaoneMoeMTP(nn.Module):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         config = vllm_config.model_config.hf_config
         self.vllm_config = vllm_config
-        cache_config = vllm_config.cache_config
-        assert not cache_config.enable_prefix_caching, (
-            "ExaoneMoeMTP currently does not support prefix caching"
-        )
-
         self.quant_config = vllm_config.quant_config
 
         super().__init__()
diff --git a/vllm/model_executor/models/extract_hidden_states.py b/vllm/model_executor/models/extract_hidden_states.py
index d969441ac241..8df4823b6973 100644
--- a/vllm/model_executor/models/extract_hidden_states.py
+++ b/vllm/model_executor/models/extract_hidden_states.py
@@ -9,6 +9,7 @@
 """
 
 from collections.abc import Iterable
+from dataclasses import replace
 from typing import ClassVar
 
 import torch
@@ -23,19 +24,18 @@
 )
 from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.model_executor.models.utils import maybe_prefix
-from vllm.utils.torch_utils import kv_cache_dtype_str_to_dtype
+from vllm.utils.torch_utils import is_quantized_kv_cache, kv_cache_dtype_str_to_dtype
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionImpl,
     AttentionMetadataBuilder,
     AttentionType,
     CommonAttentionMetadata,
-    is_quantized_kv_cache,
 )
 from vllm.v1.kv_cache_interface import (
     AttentionSpec,
+    HiddenStateCacheSpec,
     KVCacheSpec,
-    MLAAttentionSpec,
 )
 
 ########## Custom Ops ########
@@ -79,13 +79,12 @@ def dummy_attention(layer_name, _placeholder):
 
 
 def basic_cache(
-    to_cache: torch.Tensor,  # shape: [num_blocks, block_size, num_heads, head_size]
-    kv_cache: torch.Tensor,  # shape: [seq_len, num_heads, head_size]
+    to_cache: torch.Tensor,  # shape: [seq_len, num_heads, head_size]
+    kv_cache: torch.Tensor,  # shape: [num_blocks, block_size, num_heads, head_size]
     slot_mapping: torch.Tensor,  # shape: [seq_len]
 ):
-    num_blocks, block_size, num_heads, head_size = kv_cache.shape
-    token_kv_cache = kv_cache.view(num_blocks * block_size, num_heads, head_size)
-    token_kv_cache[slot_mapping] = to_cache
+    block_size = kv_cache.shape[1]
+    kv_cache[slot_mapping // block_size, slot_mapping % block_size] = to_cache
 
 
 ######### CacheOnlyAttentionBackend ########
@@ -94,7 +93,6 @@ def basic_cache(
 class CacheOnlyAttentionBackend(AttentionBackend):
     """Attention backend that only caches KV without computing attention."""
 
-    accept_output_buffer: bool = False
     supported_dtypes: ClassVar[list[torch.dtype]] = [
         torch.float16,
         torch.bfloat16,
@@ -323,11 +321,9 @@ def get_attn_backend(self) -> type[AttentionBackend]:
         return self.attn_backend
 
     def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
-        # Note: we use MLAAttentionSpec here to because it will
-        # produce page sizes of (block_size * num_kv_heads * head_size * dtype_size)
-        # whereas FullAttentionSpec will add an additional factor of 2
-        return MLAAttentionSpec(
-            block_size=self.block_size,
+        # Re-read block_size: hybrid models may bump it after __init__.
+        return HiddenStateCacheSpec(
+            block_size=vllm_config.cache_config.block_size,
             num_kv_heads=self.num_heads,
             head_size=self.head_size,
             dtype=self.kv_cache_torch_dtype,
@@ -353,6 +349,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
         cache_config = vllm_config.cache_config
 
+        # Hidden states dtype should be independent of KV cache dtype.
+        if cache_config is not None and is_quantized_kv_cache(cache_config.cache_dtype):
+            cache_config = replace(cache_config, cache_dtype="auto")
+
         # Create a single cache-only attention layer
         # Note: We set num_heads <- self.num_hidden_states
         # and head_size <- hidden_size so that we can insert
diff --git a/vllm/model_executor/models/falcon_h1.py b/vllm/model_executor/models/falcon_h1.py
index fba2e216e3fa..b837dc010dac 100644
--- a/vllm/model_executor/models/falcon_h1.py
+++ b/vllm/model_executor/models/falcon_h1.py
@@ -50,6 +50,7 @@
     SupportsPP,
 )
 from .utils import (
+    AutoWeightsLoader,
     PPMissingLayer,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -495,6 +496,63 @@ def forward(
         hidden_states = self.final_layernorm(hidden_states)
         return hidden_states
 
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            if "A_log" in name:
+                name = name.replace("A_log", "A")
+
+            if "mamba" in name:
+                name = name.replace("mamba", "mamba.mamba")
+
+            if "scale" in name:
+                # Remapping the name of kv-scale.
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+
+                name = name.replace(weight_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                # Skip layers on other devices.
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
 
 class FalconH1ForCausalLM(
     nn.Module,
@@ -632,62 +690,8 @@ def compute_logits(
         return logits
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
-        stacked_params_mapping = [
-            # (param_name, shard_name, shard_id)
-            ("qkv_proj", "q_proj", "q"),
-            ("qkv_proj", "k_proj", "k"),
-            ("qkv_proj", "v_proj", "v"),
-            ("gate_up_proj", "gate_proj", 0),
-            ("gate_up_proj", "up_proj", 1),
-        ]
-
-        params_dict = dict(self.named_parameters())
-        loaded_params: set[str] = set()
-        for name, loaded_weight in weights:
-            if "rotary_emb.inv_freq" in name:
-                continue
-
-            if "A_log" in name:
-                name = name.replace("A_log", "A")
-
-            if "mamba" in name:
-                name = name.replace("mamba", "mamba.mamba")
-
-            if "scale" in name:
-                # Remapping the name of kv-scale.
-                name = maybe_remap_kv_scale_name(name, params_dict)
-                if name is None:
-                    continue
-
-            for param_name, weight_name, shard_id in stacked_params_mapping:
-                if weight_name not in name:
-                    continue
-
-                name = name.replace(weight_name, param_name)
-                # Skip loading extra bias for GPTQ models.
-                if name.endswith(".bias") and name not in params_dict:
-                    continue
-                # Skip layers on other devices.
-                if is_pp_missing_parameter(name, self):
-                    continue
-                param = params_dict[name]
-                weight_loader = param.weight_loader
-                weight_loader(param, loaded_weight, shard_id)
-                break
-            else:
-                # Skip loading extra bias for GPTQ models.
-                if name.endswith(".bias") and name not in params_dict:
-                    continue
-                if is_pp_missing_parameter(name, self):
-                    continue
-                if self.tie_word_embeddings and "lm_head" in name:
-                    continue
-
-                param = params_dict[name]
-                weight_loader = getattr(param, "weight_loader", default_weight_loader)
-                weight_loader(param, loaded_weight)
-            loaded_params.add(name)
-
-        if self.tie_word_embeddings:
-            loaded_params.add("lm_head.weight")
-        return loaded_params
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.tie_word_embeddings else None),
+        )
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/fireredasr2.py b/vllm/model_executor/models/fireredasr2.py
index 217bb5b2d138..eea0c7d8897e 100644
--- a/vllm/model_executor/models/fireredasr2.py
+++ b/vllm/model_executor/models/fireredasr2.py
@@ -2,11 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import math
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Annotated, Literal, cast
+from typing import Annotated, cast
 
-import numpy as np
 import torch
-import torch.nn.functional as F
 from torch import nn
 from transformers import (
     BatchFeature,
@@ -15,6 +13,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import MultiModalDataDict, PromptType
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import _ACTIVATION_REGISTRY
@@ -45,6 +44,7 @@
 )
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
+from .conformer_encoder import ConformerEncoder
 from .interfaces import (
     MultiModalEmbeddings,
     SupportsMultiModal,
@@ -84,352 +84,6 @@ class FireRedASR2AudioInputs(TensorSchema):
     ]
 
 
-class Swish(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return x * torch.sigmoid(x)
-
-
-class Conv2dSubsampling(nn.Module):
-    def __init__(self, idim: int, d_model: int, out_channels: int = 32):
-        super().__init__()
-        self.conv = nn.Sequential(
-            nn.Conv2d(1, out_channels, 3, 2),
-            nn.ReLU(),
-            nn.Conv2d(out_channels, out_channels, 3, 2),
-            nn.ReLU(),
-        )
-        subsample_idim = ((idim - 1) // 2 - 1) // 2
-        self.out = ReplicatedLinear(
-            input_size=out_channels * subsample_idim,
-            output_size=d_model,
-            bias=True,
-        )
-
-        self.subsampling = 4
-        left_context = right_context = 3  # both exclude current frame
-        self.context = left_context + 1 + right_context  # 7
-
-    def forward(
-        self, x: torch.Tensor, x_mask: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        x = x.unsqueeze(1)
-        x = self.conv(x)
-        N, C, T, D = x.size()
-        x, _ = self.out(x.transpose(1, 2).contiguous().view(N, T, C * D))
-        mask = x_mask[:, :, :-2:2][:, :, :-2:2]
-        input_lengths = mask[:, -1, :].sum(dim=-1)
-        return x, input_lengths, mask
-
-
-class RelPositionalEncoding(nn.Module):
-    def __init__(self, d_model: int, max_len: int = 5000):
-        super().__init__()
-        pe_positive = torch.zeros(max_len, d_model, requires_grad=False)
-        pe_negative = torch.zeros(max_len, d_model, requires_grad=False)
-        position = torch.arange(0, max_len).unsqueeze(1).float()
-        div_term = torch.exp(
-            torch.arange(0, d_model, 2).float()
-            * -(torch.log(torch.tensor(10000.0)).item() / d_model)
-        )
-        pe_positive[:, 0::2] = torch.sin(position * div_term)
-        pe_positive[:, 1::2] = torch.cos(position * div_term)
-        pe_negative[:, 0::2] = torch.sin(-1 * position * div_term)
-        pe_negative[:, 1::2] = torch.cos(-1 * position * div_term)
-
-        pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0)
-        pe_negative = pe_negative[1:].unsqueeze(0)
-        self.pe = torch.cat([pe_positive, pe_negative], dim=1)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Tmax = 2 * max_len - 1
-        Tmax, T = self.pe.size(1), x.size(1)
-        pos_emb = self.pe[:, Tmax // 2 - T + 1 : Tmax // 2 + T].clone().detach()
-        return pos_emb
-
-
-class ConformerFeedForward(nn.Module):
-    def __init__(self, d_model: int):
-        super().__init__()
-        self.pre_layer_norm = nn.LayerNorm(d_model)
-        self.linear_expand = ReplicatedLinear(
-            input_size=d_model,
-            output_size=d_model * 4,
-            bias=True,
-        )
-        self.nonlinear = Swish()
-        self.linear_project = ReplicatedLinear(
-            input_size=d_model * 4,
-            output_size=d_model,
-            bias=True,
-        )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        residual = x
-        x = self.pre_layer_norm(x)
-        x, _ = self.linear_expand(x)
-        x = self.nonlinear(x)
-        x, _ = self.linear_project(x)
-        output = x + residual
-        return output
-
-
-class EncoderMultiHeadAttention(nn.Module):
-    def __init__(self, n_head: int, d_model: int):
-        super().__init__()
-        assert d_model % n_head == 0
-        self.n_head = n_head
-        self.d_k = d_model // n_head
-        self.d_v = self.d_k
-
-        self.w_qs = ReplicatedLinear(
-            input_size=d_model, output_size=n_head * self.d_k, bias=False
-        )
-        self.w_ks = ReplicatedLinear(
-            input_size=d_model, output_size=n_head * self.d_k, bias=False
-        )
-        self.w_vs = ReplicatedLinear(
-            input_size=d_model, output_size=n_head * self.d_v, bias=False
-        )
-
-        self.layer_norm_q = nn.LayerNorm(d_model)
-        self.layer_norm_k = nn.LayerNorm(d_model)
-        self.layer_norm_v = nn.LayerNorm(d_model)
-
-        self.fc = ReplicatedLinear(
-            input_size=n_head * self.d_v, output_size=d_model, bias=False
-        )
-
-    def forward_qkv(
-        self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
-        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)
-
-        q = self.layer_norm_q(q)
-        k = self.layer_norm_k(k)
-        v = self.layer_norm_v(v)
-
-        q = self.w_qs(q)[0].view(sz_b, len_q, n_head, d_k)
-        k = self.w_ks(k)[0].view(sz_b, len_k, n_head, d_k)
-        v = self.w_vs(v)[0].view(sz_b, len_v, n_head, d_v)
-        q = q.transpose(1, 2)
-        k = k.transpose(1, 2)
-        v = v.transpose(1, 2)
-        return q, k, v
-
-    def forward_output(
-        self, output: torch.Tensor, residual: torch.Tensor, sz_b: int, len_q: int
-    ) -> torch.Tensor:
-        output = output.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
-        fc_out, _ = self.fc(output)
-        output = fc_out
-        output = output + residual
-        return output
-
-    def forward_attention(
-        self, attn: torch.Tensor, v: torch.Tensor, mask: torch.Tensor | None = None
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if mask is not None:
-            mask = mask.unsqueeze(1)
-            mask = mask.eq(0)
-            attn = attn.masked_fill(mask, -float("inf"))
-            attn = torch.softmax(attn, dim=-1).masked_fill(mask, 0.0)
-        else:
-            attn = torch.softmax(attn, dim=-1)
-
-        d_attn = attn
-        output = torch.matmul(d_attn, v)
-
-        return output, attn
-
-
-class RelPosMultiHeadAttention(EncoderMultiHeadAttention):
-    def __init__(self, n_head: int, d_model: int):
-        super().__init__(n_head, d_model)
-        d_k = d_model // n_head
-        self.scale = 1.0 / (d_k**0.5)
-        self.linear_pos = ReplicatedLinear(
-            input_size=d_model, output_size=n_head * d_k, bias=False
-        )
-        self.pos_bias_u = nn.Parameter(torch.empty([n_head, d_k]))
-        self.pos_bias_v = nn.Parameter(torch.empty([n_head, d_k]))
-
-    def _rel_shift(self, x):
-        N, H, T1, T2 = x.size()
-        zero_pad = torch.zeros((N, H, T1, 1), device=x.device, dtype=x.dtype)
-        x_padded = torch.cat([zero_pad, x], dim=-1)
-
-        x_padded = x_padded.view(N, H, T2 + 1, T1)
-        x = x_padded[:, :, 1:].view_as(x)
-        x = x[:, :, :, : x.size(-1) // 2 + 1]
-        return x
-
-    def forward(
-        self,
-        q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
-        pos_emb: torch.Tensor,
-        mask: torch.Tensor | None = None,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        sz_b, len_q = q.size(0), q.size(1)
-
-        residual = q
-        q, k, v = self.forward_qkv(q, k, v)
-
-        q = q.transpose(1, 2)
-        n_batch_pos = pos_emb.size(0)
-        p = self.linear_pos(pos_emb)[0].view(n_batch_pos, -1, self.n_head, self.d_k)
-        p = p.transpose(1, 2)
-
-        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
-        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
-
-        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
-
-        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
-        matrix_bd = self._rel_shift(matrix_bd)
-
-        attn_scores = matrix_ac + matrix_bd
-        attn_scores.mul_(self.scale)
-
-        output, attn = self.forward_attention(attn_scores, v, mask=mask)
-
-        output = self.forward_output(output, residual, sz_b, len_q)
-        return output, attn
-
-
-class ConformerConvolution(nn.Module):
-    def __init__(self, d_model: int, kernel_size: int = 33):
-        super().__init__()
-        assert kernel_size % 2 == 1
-        self.pre_layer_norm = nn.LayerNorm(d_model)
-        self.pointwise_conv1 = nn.Conv1d(
-            d_model, d_model * 4, kernel_size=1, bias=False
-        )
-        self.padding = (kernel_size - 1) // 2
-        self.depthwise_conv = nn.Conv1d(
-            d_model * 2,
-            d_model * 2,
-            kernel_size,
-            stride=1,
-            padding=self.padding,
-            groups=d_model * 2,
-            bias=False,
-        )
-        self.batch_norm = nn.LayerNorm(d_model * 2)
-        self.swish = Swish()
-        self.pointwise_conv2 = nn.Conv1d(
-            d_model * 2, d_model, kernel_size=1, bias=False
-        )
-
-    def forward(
-        self, x: torch.Tensor, mask: torch.Tensor | None = None
-    ) -> torch.Tensor:
-        residual = x
-        out = self.pre_layer_norm(x)
-        out = out.transpose(1, 2)
-        if mask is not None:
-            out.masked_fill_(mask.ne(1), 0.0)
-        out = self.pointwise_conv1(out)
-        out = F.glu(out, dim=1)
-        out = self.depthwise_conv(out)
-
-        out = out.transpose(1, 2)
-        out = self.swish(self.batch_norm(out))
-        out = out.transpose(1, 2)
-
-        out = self.pointwise_conv2(out)
-        if mask is not None:
-            out.masked_fill_(mask.ne(1), 0.0)
-        out = out.transpose(1, 2)
-        return out + residual
-
-
-class RelPosEmbConformerBlock(nn.Module):
-    def __init__(self, d_model, n_head, kernel_size=33):
-        super().__init__()
-        self.ffn1 = ConformerFeedForward(d_model)
-        self.mhsa = RelPosMultiHeadAttention(n_head, d_model)
-        self.conv = ConformerConvolution(d_model, kernel_size)
-        self.ffn2 = ConformerFeedForward(d_model)
-        self.layer_norm = nn.LayerNorm(d_model)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        pos_emb: torch.Tensor,
-        slf_attn_mask: torch.Tensor | None = None,
-        pad_mask: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        out = 0.5 * x + 0.5 * self.ffn1(x)
-        out = self.mhsa(out, out, out, pos_emb, mask=slf_attn_mask)[0]
-        out = self.conv(out, pad_mask)
-        out = 0.5 * out + 0.5 * self.ffn2(out)
-        out = self.layer_norm(out)
-        return out
-
-
-class ConformerEncoder(nn.Module):
-    def __init__(
-        self,
-        idim: int,
-        n_layers_enc: int,
-        n_head: int,
-        d_model: int,
-        kernel_size: int = 33,
-        pe_maxlen: int = 5000,
-    ):
-        super().__init__()
-        self.odim = d_model
-
-        self.input_preprocessor = Conv2dSubsampling(idim, d_model)
-        self.positional_encoding = RelPositionalEncoding(d_model)
-
-        self.layer_stack = nn.ModuleList()
-        for _ in range(n_layers_enc):
-            block = RelPosEmbConformerBlock(d_model, n_head, kernel_size)
-            self.layer_stack.append(block)
-
-    def forward(
-        self, padded_input: torch.Tensor, input_lengths: torch.Tensor, pad: bool = True
-    ):
-        if pad:
-            padded_input = F.pad(
-                padded_input,
-                (0, 0, 0, self.input_preprocessor.context - 1),
-                "constant",
-                0.0,
-            )
-        src_mask = self.padding_position_is_0(padded_input, input_lengths)
-
-        embed_output, input_lengths, src_mask = self.input_preprocessor(
-            padded_input, src_mask
-        )
-        enc_output = embed_output
-
-        pos_emb = self.positional_encoding(embed_output)
-
-        enc_outputs = []
-        for enc_layer in self.layer_stack:
-            enc_output = enc_layer(
-                enc_output, pos_emb, slf_attn_mask=src_mask, pad_mask=src_mask
-            )
-            enc_outputs.append(enc_output)
-
-        return enc_output, input_lengths, src_mask
-
-    def padding_position_is_0(
-        self, padded_input: torch.Tensor, input_lengths: torch.Tensor
-    ) -> torch.Tensor:
-        N, T = padded_input.size()[:2]
-        mask = torch.ones((N, T)).to(padded_input.device)
-        for i in range(N):
-            mask[i, input_lengths[i] :] = 0
-        mask = mask.unsqueeze(dim=1)
-        return mask.to(torch.uint8)
-
-
 class FireRedASR2Adapter(nn.Module):
     def __init__(self, encoder_dim: int, llm_dim: int, downsample_rate: int = 2):
         super().__init__()
@@ -702,14 +356,12 @@ def validate_language(cls, language: str | None) -> str | None:
     @classmethod
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,  # not needed here
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+
         if language is None:
             raise ValueError(
                 "Language must be specified when creating the fireredasr2 prompt"
diff --git a/vllm/model_executor/models/fireredlid.py b/vllm/model_executor/models/fireredlid.py
new file mode 100644
index 000000000000..804ed2bc9fd9
--- /dev/null
+++ b/vllm/model_executor/models/fireredlid.py
@@ -0,0 +1,792 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+FireRedLID – Language Identification model adapted for vLLM.
+
+Architecture:  ConformerEncoder  +  TransformerDecoder (6-layer cross-attn)
+Vocabulary:    120 LID tokens  (dict.txt)
+Output:        Up to 2 tokens  (e.g. "en", "zh mandarin")
+
+This implementation follows the Whisper-style encoder-decoder pattern:
+  • Encoder processes audio features (Fbank + CMVN via FeatureExtractor)
+  • Decoder performs single-step autoregressive forward
+  • vLLM's generation loop handles beam search / sampling
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Annotated, Literal
+
+import numpy as np
+import torch
+from torch import nn
+from transformers import BatchFeature
+
+from vllm.config import ModelConfig, VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextConfig
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.inputs import MultiModalDataDict, PromptType
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention import Attention, CrossAttention
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+)
+from vllm.multimodal.parse import MultiModalDataItems, MultiModalDataParser
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    BaseProcessingInfo,
+    EncDecMultiModalProcessor,
+    PromptReplacement,
+    PromptUpdate,
+)
+from vllm.transformers_utils.processor import cached_processor_from_config
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
+
+from .conformer_encoder import ConformerEncoder
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    SupportsTranscription,
+)
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    maybe_prefix,
+)
+from .whisper_utils import ISO639_1_SUPPORTED_LANGS
+
+logger = init_logger(__name__)
+
+
+class FireRedLIDAudioInputs(TensorSchema):
+    """
+    Dimensions:
+        - b: Batch size
+        - t: Time frames  (variable across utterances)
+        - nmb: Number of mel bins (80)
+    """
+
+    input_features: Annotated[
+        list[torch.Tensor] | None,
+        TensorShape("b", "t", "nmb", dynamic_dims={"t"}),
+    ]
+    speech_lengths: Annotated[
+        list[torch.Tensor] | None,
+        TensorShape("b"),
+    ]
+    fake_token_lengths: Annotated[
+        list[torch.Tensor] | None,
+        TensorShape("b"),
+    ]
+
+
+FireRedLIDEncoder = ConformerEncoder
+
+
+class FireRedLIDPositionalEmbedding(nn.Module):
+    """Absolute sinusoidal positional embedding indexed by `positions`."""
+
+    def __init__(self, d_model: int, max_len: int = 5000):
+        super().__init__()
+        assert d_model % 2 == 0
+        pe = torch.zeros(max_len, d_model, requires_grad=False)
+        position = torch.arange(0, max_len).unsqueeze(1).float()
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2).float()
+            * -(torch.log(torch.tensor(10000.0)).item() / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer("pe", pe, persistent=False)
+
+    def forward(self, position_ids: torch.Tensor) -> torch.Tensor:
+        return self.pe[position_ids]
+
+
+class FireRedLIDAttention(nn.Module):
+    """Base attention with shared QKV/FC projections for the LID decoder."""
+
+    def __init__(
+        self,
+        d_model: int,
+        n_head: int,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+        tp_size = get_tensor_model_parallel_world_size()
+        assert n_head % tp_size == 0
+        self.total_num_heads = n_head
+        self.num_heads = n_head // tp_size
+        self.num_kv_heads = max(1, n_head // tp_size)
+        self.head_dim = d_model // n_head
+        self.scaling = self.head_dim**-0.5
+
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+
+        self.w_qs = ColumnParallelLinear(
+            d_model,
+            d_model,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.w_qs",
+        )
+        self.w_ks = ColumnParallelLinear(
+            d_model,
+            d_model,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.w_ks",
+        )
+        self.w_vs = ColumnParallelLinear(
+            d_model,
+            d_model,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.w_vs",
+        )
+        self.fc = RowParallelLinear(
+            d_model,
+            d_model,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc",
+        )
+        self._init_attn(cache_config, quant_config, prefix)
+
+    def _init_attn(self, cache_config, quant_config, prefix: str) -> None:
+        raise NotImplementedError
+
+
+class FireRedLIDSelfAttention(FireRedLIDAttention):
+    def _init_attn(self, cache_config, quant_config, prefix: str) -> None:
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        q, _ = self.w_qs(hidden_states)
+        k, _ = self.w_ks(hidden_states)
+        v, _ = self.w_vs(hidden_states)
+        attn_output = self.attn(q, k, v)
+        output, _ = self.fc(attn_output)
+        return output
+
+
+class FireRedLIDCrossAttention(FireRedLIDAttention):
+    def _init_attn(self, cache_config, quant_config, prefix: str) -> None:
+        self.attn = CrossAttention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor | None,
+    ) -> torch.Tensor:
+        q, _ = self.w_qs(hidden_states)
+        if encoder_hidden_states is not None:
+            k, _ = self.w_ks(encoder_hidden_states)
+            v, _ = self.w_vs(encoder_hidden_states)
+        else:
+            k = v = None
+
+        attn_output = self.attn(q, k, v)
+        output, _ = self.fc(attn_output)
+        return output
+
+
+class FireRedLIDFFN(nn.Module):
+    def __init__(self, d_model: int, d_ff: int):
+        super().__init__()
+        self.w_1 = ReplicatedLinear(d_model, d_ff, bias=True)
+        self.act = nn.GELU()
+        self.w_2 = ReplicatedLinear(d_ff, d_model, bias=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, _ = self.w_1(x)
+        x = self.act(x)
+        x, _ = self.w_2(x)
+        return x
+
+
+class FireRedLIDDecoderLayer(nn.Module):
+    """vLLM-native decoder layer while preserving FireRedLID parameter names."""
+
+    def __init__(
+        self,
+        d_model: int,
+        n_head: int,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.self_attn_norm = nn.LayerNorm(d_model)
+        self.self_attn = FireRedLIDSelfAttention(
+            d_model,
+            n_head,
+            vllm_config=vllm_config,
+            prefix=f"{prefix}.self_attn",
+        )
+
+        self.cross_attn_norm = nn.LayerNorm(d_model)
+        self.cross_attn = FireRedLIDCrossAttention(
+            d_model,
+            n_head,
+            vllm_config=vllm_config,
+            prefix=f"{prefix}.cross_attn",
+        )
+
+        self.mlp_norm = nn.LayerNorm(d_model)
+        self.mlp = FireRedLIDFFN(d_model, d_model * 4)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor | None,
+    ) -> torch.Tensor:
+        residual = hidden_states
+        hidden_states = self.self_attn_norm(hidden_states)
+        hidden_states = self.self_attn(hidden_states)
+        hidden_states = residual + hidden_states
+
+        residual = hidden_states
+        hidden_states = self.cross_attn_norm(hidden_states)
+        hidden_states = self.cross_attn(hidden_states, encoder_hidden_states)
+        hidden_states = residual + hidden_states
+
+        residual = hidden_states
+        hidden_states = self.mlp_norm(hidden_states)
+        hidden_states = residual + self.mlp(hidden_states)
+
+        return hidden_states
+
+
+class FireRedLIDDecoder(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.pad_id = getattr(config, "pad_token_id", 2)
+        self.n_layers = getattr(config, "n_layers_lid_dec", 6)
+        self.d_model = getattr(config, "d_model", 1280)
+        self.scale = self.d_model**0.5
+
+        self.tgt_word_emb = nn.Embedding(
+            getattr(config, "vocab_size", 120),
+            self.d_model,
+            padding_idx=self.pad_id,
+        )
+        self.positional_encoding = FireRedLIDPositionalEmbedding(
+            self.d_model,
+            max_len=getattr(config, "pe_maxlen", 5000),
+        )
+
+        self.layer_stack = nn.ModuleList(
+            [
+                FireRedLIDDecoderLayer(
+                    self.d_model,
+                    getattr(config, "n_head", 20),
+                    vllm_config=vllm_config,
+                    prefix=f"{prefix}.layer_stack.{idx}",
+                )
+                for idx in range(self.n_layers)
+            ]
+        )
+        self.layer_norm_out = nn.LayerNorm(self.d_model)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        encoder_hidden_states: torch.Tensor | None,
+    ) -> torch.Tensor:
+        hidden_states = self.tgt_word_emb(input_ids) * self.scale
+        hidden_states = hidden_states + self.positional_encoding(positions)
+
+        for layer in self.layer_stack:
+            hidden_states = layer(hidden_states, encoder_hidden_states)
+
+        hidden_states = self.layer_norm_out(hidden_states)
+        return hidden_states
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.tgt_word_emb(input_ids)
+
+
+class FireRedLIDModel(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+
+        self.encoder = FireRedLIDEncoder(
+            idim=getattr(config, "idim", 80),
+            n_layers_enc=getattr(config, "n_layers_enc", 16),
+            n_head=getattr(config, "n_head", 20),
+            d_model=getattr(config, "d_model", 1280),
+            kernel_size=getattr(config, "kernel_size", 33),
+            pe_maxlen=getattr(config, "pe_maxlen", 5000),
+        )
+
+        self.decoder = FireRedLIDDecoder(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "decoder"),
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        encoder_outputs: list[torch.Tensor] | None = None,
+    ) -> torch.Tensor:
+        enc_states = (
+            torch.cat(encoder_outputs, dim=0)
+            if encoder_outputs and len(encoder_outputs) > 0
+            else None
+        )
+        decoder_outputs = self.decoder(
+            input_ids=input_ids,
+            positions=positions,
+            encoder_hidden_states=enc_states,
+        )
+        return decoder_outputs
+
+    def get_encoder_outputs(
+        self,
+        speech: torch.Tensor | list[torch.Tensor],
+        speech_lengths: torch.Tensor | list[torch.Tensor],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Run the encoder and return padded outputs plus true sequence lengths."""
+        enc_output, enc_lengths, _ = self.encoder(speech, speech_lengths)
+        return enc_output, enc_lengths
+
+
+class FireRedLIDProcessingInfo(BaseProcessingInfo):
+    def get_hf_config(self):
+        return self.ctx.get_hf_config()
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"audio": 1}
+
+    def get_feature_extractor(self, **kwargs):
+        hf_processor = self.get_hf_processor(**kwargs)
+        feature_extractor = hf_processor.feature_extractor
+        return feature_extractor
+
+    def get_data_parser(self) -> MultiModalDataParser:
+        feature_extractor = self.get_feature_extractor()
+        return MultiModalDataParser(
+            target_sr=feature_extractor.sampling_rate,
+            target_channels=1,
+        )
+
+    @property
+    def skip_prompt_length_check(self) -> bool:
+        return True
+
+    def get_num_audio_tokens(self) -> int:
+        # For encoder profiling – return a reasonable dummy length.
+        # This doesn't affect actual inference since encoder processes
+        # variable-length features.
+        return 1
+
+
+class FireRedLIDDummyInputsBuilder(BaseDummyInputsBuilder[FireRedLIDProcessingInfo]):
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        return "<sos>"
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions],
+    ) -> MultiModalDataDict:
+        feature_extractor = self.info.get_feature_extractor()
+        sampling_rate = feature_extractor.sampling_rate
+        audio_len = feature_extractor.chunk_length * sampling_rate
+        num_audios = mm_counts.get("audio", 0)
+        audio_overrides = mm_options.get("audio")
+        return {
+            "audio": self._get_dummy_audios(
+                length=audio_len,
+                num_audios=num_audios,
+                overrides=audio_overrides,
+            )
+        }
+
+
+class FireRedLIDMultiModalProcessor(
+    EncDecMultiModalProcessor[FireRedLIDProcessingInfo]
+):
+    def create_encoder_prompt(
+        self,
+        prompt: str | list[int],
+        mm_items: MultiModalDataItems,
+    ) -> str | list[int]:
+        # Dummy encoder prompt for profiling (encoder only processes audio).
+        return [0]
+
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        if mm_data:
+            feature_extractor = self.info.get_feature_extractor(**mm_kwargs)
+            mm_data = dict(audio=mm_data.pop("audios"))
+            mm_kwargs = dict(
+                **mm_kwargs,
+                sampling_rate=feature_extractor.sampling_rate,
+            )
+        processed_outputs = super()._call_hf_processor(
+            prompt=prompt,
+            mm_data=mm_data,
+            mm_kwargs=mm_kwargs,
+            tok_kwargs=tok_kwargs,
+        )
+        if "labels" in processed_outputs:
+            processed_outputs["input_ids"] = processed_outputs.pop("labels")
+        return processed_outputs
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(
+            input_features=MultiModalFieldConfig.batched("audio"),
+            speech_lengths=MultiModalFieldConfig.batched("audio"),
+            fake_token_lengths=MultiModalFieldConfig.batched("audio"),
+        )
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptUpdate]:
+        out_mm_data = out_mm_kwargs.get_data()
+        fake_token_lengths = out_mm_data.get("fake_token_lengths")
+
+        if fake_token_lengths is None:
+            # Fallback to max encoder output length if not available
+            audio_output_lengths = []
+        else:
+            assert isinstance(fake_token_lengths, torch.Tensor)
+            audio_output_lengths = fake_token_lengths.tolist()
+
+        def get_replacement(item_idx: int):
+            if audio_output_lengths:
+                num_tokens = int(audio_output_lengths[item_idx])
+            else:
+                num_tokens = self.info.get_num_audio_tokens()
+            return [0] * num_tokens
+
+        return [
+            PromptReplacement(
+                modality="audio",
+                target=[0],
+                replacement=get_replacement,
+            )
+        ]
+
+
+# FireRedLID supports a wider set of languages than Whisper's shared list.
+# Only ISO 639-1 codes are listed; FireRedLID's dialect tokens (mandarin,
+# xinan, wu, …) are output tokens but not valid language *request* codes.
+_FIREREDLID_SUPPORTED_LANGUAGES: Mapping[str, str] = {
+    **ISO639_1_SUPPORTED_LANGS,
+    "am": "Amharic",
+    "as": "Assamese",
+    "ba": "Bashkir",
+    "bn": "Bengali",
+    "bo": "Tibetan",
+    "br": "Breton",
+    "eu": "Basque",
+    "fo": "Faroese",
+    "gu": "Gujarati",
+    "ha": "Hausa",
+    "haw": "Hawaiian",
+    "ht": "Haitian Creole",
+    "jw": "Javanese",
+    "ka": "Georgian",
+    "km": "Khmer",
+    "la": "Latin",
+    "lb": "Luxembourgish",
+    "ln": "Lingala",
+    "lo": "Lao",
+    "mg": "Malagasy",
+    "ml": "Malayalam",
+    "mn": "Mongolian",
+    "mt": "Maltese",
+    "my": "Myanmar",
+    "nn": "Nynorsk",
+    "oc": "Occitan",
+    "pa": "Panjabi",
+    "ps": "Pashto",
+    "sa": "Sanskrit",
+    "sd": "Sindhi",
+    "si": "Sinhala",
+    "sn": "Shona",
+    "so": "Somali",
+    "sq": "Albanian",
+    "su": "Sundanese",
+    "te": "Telugu",
+    "tg": "Tajik",
+    "tk": "Turkmen",
+    "tt": "Tatar",
+    "uz": "Uzbek",
+    "yi": "Yiddish",
+    "yo": "Yoruba",
+    "yue": "Cantonese",
+}
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    FireRedLIDMultiModalProcessor,
+    info=FireRedLIDProcessingInfo,
+    dummy_inputs=FireRedLIDDummyInputsBuilder,
+)
+class FireRedLIDForConditionalGeneration(
+    nn.Module, SupportsTranscription, SupportsMultiModal
+):
+    # -- SupportsTranscription protocol attributes --
+    supports_transcription_only = True
+    supported_languages = _FIREREDLID_SUPPORTED_LANGUAGES
+
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_substr={
+            "encoder.": "model.encoder.",
+            "lid_decoder.": "model.decoder.",
+            # Encoder FFN: nn.Sequential indices → named children
+            "net.0": "pre_layer_norm",
+            "net.1": "linear_expand",
+            "net.4": "linear_project",
+        }
+    )
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.config = config
+        self.dtype = vllm_config.model_config.dtype
+
+        with self._mark_composite_model(
+            vllm_config,
+            language_targets=FireRedLIDDecoder,
+            tower_targets={"audio": FireRedLIDEncoder},
+        ):
+            self.model = FireRedLIDModel(
+                vllm_config=vllm_config,
+                prefix=maybe_prefix(prefix, "model"),
+            )
+
+        self.proj_out = ParallelLMHead(
+            getattr(config, "vocab_size", 120),
+            getattr(config, "d_model", 1280),
+            quant_config=vllm_config.quant_config,
+            prefix=maybe_prefix(prefix, "proj_out"),
+        )
+        self.proj_out = self.proj_out.tie_weights(self.model.decoder.tgt_word_emb)
+
+        logit_scale = getattr(config, "logit_scale", 1.0)
+        self.logits_processor = LogitsProcessor(
+            getattr(config, "vocab_size", 120),
+            scale=logit_scale,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        encoder_outputs: list[torch.Tensor] | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        if encoder_outputs is None:
+            encoder_outputs = []
+        decoder_outputs = self.model(
+            input_ids=input_ids,
+            positions=positions,
+            encoder_outputs=encoder_outputs,
+        )
+        return decoder_outputs
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        """Run encoder on audio features and return per-item embeddings."""
+        audio_input = self._parse_and_validate_audio_input(**kwargs)
+
+        speech = audio_input["input_features"]
+        speech_lengths = audio_input["speech_lengths"]
+        if speech is None or speech_lengths is None:
+            return []
+
+        # When audio items have different time lengths, vLLM's
+        # MultiModalBatchedField._reduce_data returns a plain
+        # list[Tensor] instead of a stacked Tensor.  The encoder
+        # expects a padded [B, Tmax, feat_dim] Tensor, so we
+        # normalise both speech and speech_lengths here.
+        if isinstance(speech, (list, tuple)):
+            # Each element: [Ti, feat_dim]  (or [1, Ti, feat_dim])
+            tensors = [
+                s.squeeze(0) if s.dim() == 3 and s.size(0) == 1 else s for s in speech
+            ]
+            device = tensors[0].device
+            dtype = tensors[0].dtype
+            feat_dim = tensors[0].shape[-1]
+            lengths = torch.tensor(
+                [t.size(0) for t in tensors],
+                device=device,
+                dtype=torch.int32,
+            )
+            t_max = int(lengths.max().item())
+            # Pre-allocate zero-padded batch tensor
+            speech = torch.zeros(
+                (len(tensors), t_max, feat_dim),
+                device=device,
+                dtype=dtype,
+            )
+            for i, t in enumerate(tensors):
+                speech[i, : t.size(0)] = t
+            speech_lengths = lengths
+        else:
+            # Already a batched Tensor [B, T, feat_dim]
+            if speech.dim() == 2:
+                speech = speech.unsqueeze(0)
+
+        speech_lengths = torch.as_tensor(
+            speech_lengths, dtype=torch.int32, device=speech.device
+        )
+
+        enc_output, enc_lengths = self.model.get_encoder_outputs(
+            speech=speech,
+            speech_lengths=speech_lengths,
+        )
+
+        # vLLM expects one 2D tensor per multimodal item. Slice each batch entry
+        # by the true encoder length so cross-attention never sees padded frames.
+        return tuple(
+            enc_output[i, : max(0, int(enc_lengths[i].item()))]
+            for i in range(enc_output.size(0))
+        )
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.model.decoder.embed_input_ids(input_ids)
+
+    def _parse_and_validate_audio_input(
+        self, **kwargs: object
+    ) -> FireRedLIDAudioInputs:
+        input_features = kwargs.pop("input_features", None)
+        speech_lengths = kwargs.pop("speech_lengths", None)
+        fake_token_lengths = kwargs.pop("fake_token_lengths", None)
+        return FireRedLIDAudioInputs(
+            input_features=input_features,
+            speech_lengths=speech_lengths,
+            fake_token_lengths=fake_token_lengths,
+        )
+
+    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        logits = self.logits_processor(self.proj_out, hidden_states)
+        return logits
+
+    @classmethod
+    def validate_language(cls, language: str | None) -> str | None:
+        # FireRedLID is a language *identification* model – the caller does
+        # not need to specify a language up-front.  Accept None silently.
+        if language is None:
+            return None
+        return super().validate_language(language)
+
+    @classmethod
+    def get_generation_prompt(
+        cls,
+        audio: np.ndarray,
+        stt_config: SpeechToTextConfig,
+        model_config: ModelConfig,
+        language: str | None,
+        task_type: Literal["transcribe", "translate"],
+        request_prompt: str,
+        to_language: str | None,
+    ) -> PromptType:
+        """Build the prompt for the FireRedLID encoder-decoder model.
+
+        The decoder receives a single <sos> token; the encoder processes
+        the raw audio waveform via the multimodal pipeline.
+        """
+        prompt: PromptType = {
+            "encoder_prompt": {
+                "prompt": "",
+                "multi_modal_data": {
+                    "audio": (audio, int(stt_config.sample_rate)),
+                },
+            },
+            "decoder_prompt": {
+                "prompt": "<sos>",
+            },
+        }
+        return prompt
+
+    @classmethod
+    def get_speech_to_text_config(
+        cls,
+        model_config: ModelConfig,
+        task_type: Literal["transcribe", "translate"],
+    ) -> SpeechToTextConfig:
+        processor = cached_processor_from_config(model_config)
+        return SpeechToTextConfig(
+            max_audio_clip_s=processor.feature_extractor.chunk_length,
+            sample_rate=processor.feature_extractor.sampling_rate,
+            # LID output is at most 2 tokens – no chunking needed.
+            min_energy_split_window_size=None,
+        )
+
+    @classmethod
+    def post_process_output(cls, text: str) -> str:
+        # Strip any leading/trailing whitespace from the raw LID output.
+        return text.strip()
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=[
+                # Position encoding buffers are rebuilt at init
+                "model.encoder.positional_encoding.pe",
+                "model.decoder.positional_encoding.pe",
+                # Tied output projection (shared with embedding)
+                "model.decoder.tgt_word_prj.weight",
+                "proj_out.",
+            ],
+        )
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/flex_olmo.py b/vllm/model_executor/models/flex_olmo.py
index 67be99a879ff..2ff9d860567d 100644
--- a/vllm/model_executor/models/flex_olmo.py
+++ b/vllm/model_executor/models/flex_olmo.py
@@ -20,7 +20,9 @@
 from vllm.config import VllmConfig
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import ReplicatedLinear
 from vllm.model_executor.models.olmoe import OlmoeAttention, OlmoeForCausalLM
@@ -76,7 +78,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             top_k=hf_config.num_experts_per_tok,
             hidden_size=hf_config.hidden_size,
             intermediate_size=hf_config.intermediate_size,
-            reduce_results=True,
             renormalize=False,
             quant_config=None,
             tp_size=tp_size,
diff --git a/vllm/model_executor/models/funasr.py b/vllm/model_executor/models/funasr.py
index 98313db79806..4b5a1c025937 100644
--- a/vllm/model_executor/models/funasr.py
+++ b/vllm/model_executor/models/funasr.py
@@ -3,9 +3,8 @@
 
 import math
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Annotated, Literal, cast
+from typing import Annotated, cast
 
-import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
@@ -16,6 +15,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.inputs import MultiModalDataDict, PromptType
 from vllm.logger import init_logger
@@ -876,20 +876,25 @@ def validate_language(cls, language: str | None) -> str | None:
     @classmethod
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,  # not needed here
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+        hotwords = stt_params.hotwords
+
         if language is None:
             raise ValueError(
                 "Language must be specified when creating the funasr prompt"
             )
 
-        funasr_prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n语音转写：<|AUDIO|><|im_end|>\n<|im_start|>assistant\n"  # noqa: E501
+        if hotwords is not None:
+            funasr_prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n请结合上下文信息，更加准确地完成语音转写任务。如果没有相关信息，我们会留空。\n\n\n**上下文信息：**\n\n\n热词列表：[{}]\n语音转写：<|AUDIO|><|im_end|>\n<|im_start|>assistant\n".format(  # noqa: E501
+                hotwords
+            )
+        else:
+            funasr_prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n语音转写：<|AUDIO|><|im_end|>\n<|im_start|>assistant\n"  # noqa: E501
+
         prompt = {
             "prompt": funasr_prompt,
             "multi_modal_data": {
diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py
index b2352a3c9268..f61f7c6f780b 100644
--- a/vllm/model_executor/models/gemma3.py
+++ b/vllm/model_executor/models/gemma3.py
@@ -26,7 +26,7 @@
 from vllm.config import CacheConfig, VllmConfig
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.logger import init_logger
-from vllm.model_executor.layers.activation import GeluAndMul
+from vllm.model_executor.layers.activation import get_act_and_mul_fn
 from vllm.model_executor.layers.attention import (
     Attention,
     EncoderOnlyAttention,
@@ -88,13 +88,7 @@ def __init__(
             quant_config=quant_config,
             prefix=f"{prefix}.down_proj",
         )
-        if hidden_activation != "gelu_pytorch_tanh":
-            raise ValueError(
-                "Gemma3 uses `gelu_pytorch_tanh` as the hidden activation "
-                "function. Please set `hidden_act` and `hidden_activation` to "
-                "`gelu_pytorch_tanh`."
-            )
-        self.act_fn = GeluAndMul(approximate="tanh")
+        self.act_fn = get_act_and_mul_fn(hidden_activation)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         gate_up, _ = self.gate_up_proj(x)
diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py
index 0f059b6d1340..6ecadbcd6703 100644
--- a/vllm/model_executor/models/gemma3_mm.py
+++ b/vllm/model_executor/models/gemma3_mm.py
@@ -305,7 +305,7 @@ def _get_mm_fields_config(
 
         return dict(
             pixel_values=MultiModalFieldConfig.flat_from_sizes("image", num_patches),
-            num_patches=MultiModalFieldConfig.batched("image"),
+            num_patches=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
         )
 
     def _get_prompt_updates(
diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py
index 342d6c476dfa..2b5266f0c9f6 100644
--- a/vllm/model_executor/models/gemma3n_mm.py
+++ b/vllm/model_executor/models/gemma3n_mm.py
@@ -3,7 +3,6 @@
 from collections.abc import Iterable, Mapping, Sequence
 from typing import Annotated, Any, Literal
 
-import numpy as np
 import torch
 from torch import nn
 from transformers import AutoModel, BatchFeature
@@ -19,6 +18,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import MultiModalDataDict, PromptType, TextPrompt
 from vllm.logger import init_logger
 from vllm.model_executor.layers.layernorm import RMSNorm
@@ -421,6 +421,7 @@ def __init__(
             self.multimodal_hidden_size,
             self.text_hidden_size,
             bias=False,
+            input_is_parallel=False,  # scatter the full-width input internally
         )
 
         self.embedding_post_projection_norm = RMSNorm(
@@ -769,21 +770,17 @@ def get_placeholder_str(cls, modality: str, i: int) -> str | None:
             raise ValueError(f"Unsupported modality: {modality}")
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        stt_config: SpeechToTextConfig,
-        model_config: ModelConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
         """
         Gemma3n supports "free-form" transcription.
         We fix its prompt here to standardize transcriptions/translations
         requests.
         """
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+        task_type = stt_params.task_type
+        to_language = stt_params.to_language
         # Transcribe this audio [into <>] | for transcription
         # Translate this audio [from <> into <>] | for translation
         prompt = "<start_of_turn>user\n"
diff --git a/vllm/model_executor/models/gemma4.py b/vllm/model_executor/models/gemma4.py
new file mode 100644
index 000000000000..75f6945cccb5
--- /dev/null
+++ b/vllm/model_executor/models/gemma4.py
@@ -0,0 +1,1721 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Copyright 2025 The vLLM team.
+# Copyright 2025 Google Inc. HuggingFace Inc. team. All rights reserved.
+#
+#
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Gemma 4 model implementation for vLLM."""
+
+from collections.abc import Iterable
+from dataclasses import replace
+from itertools import islice
+
+import regex as re
+import torch
+from torch import nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed import (
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.forward_context import get_forward_context
+from vllm.logger import init_logger
+from vllm.model_executor.layers.activation import get_act_and_mul_fn
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    GateLinear,
+    fused_moe_make_expert_params_mapping,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.backends.utils import KVSharingFastPrefillMetadata
+
+from .interfaces import (
+    EagleModelMixin,
+    MixtureOfExperts,
+    SupportsEagle3,
+    SupportsLoRA,
+    SupportsPP,
+)
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_layers,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+def _remap_gemma4_expert_weight_name(name: str) -> str:
+    return re.sub(r"(?<!\.moe)\.experts\.(\d+)\.", r".moe.experts.\1.", name)
+
+
+@triton.jit
+def _gemma4_routing_kernel(
+    gating_ptr,
+    per_expert_scale_ptr,
+    topk_weights_ptr,
+    topk_ids_ptr,
+    E: tl.constexpr,
+    K: tl.constexpr,
+    BLOCK_E: tl.constexpr,
+):
+    pid = tl.program_id(0)
+    offs_e = tl.arange(0, BLOCK_E)
+    valid = offs_e < E
+
+    logits = tl.load(
+        gating_ptr + pid * E + offs_e,
+        mask=valid,
+        other=-float("inf"),
+    ).to(tl.float32)
+
+    max_l = tl.max(logits, axis=0)
+
+    # Float32 → ascending-sortable bijection
+    MIN32 = -2147483648
+    logit_bits = logits.to(tl.int32, bitcast=True)
+    sign_b = logit_bits >> 31
+    key = tl.where(sign_b == 0, logit_bits ^ -1, logit_bits ^ MIN32)
+    key = tl.where(valid, key, 0x7FFFFFFF)
+    sk64 = key.to(tl.int64) & 0x00000000FFFFFFFF
+    packed = (sk64 << 32) | offs_e.to(tl.int64)
+    sorted_p = tl.sort(packed, descending=False)
+
+    # Vectorized extraction of ALL sorted elements — no K-loop, no cross-lane reductions
+    all_keys = ((sorted_p >> 32) & 0x00000000FFFFFFFF).to(tl.int32)
+    all_ids = (sorted_p & 0x00000000FFFFFFFF).to(tl.int32)
+
+    # Inverse bijection: recover original logit bits
+    sign_k = all_keys >> 31
+    all_bits = tl.where(sign_k < 0, all_keys ^ -1, all_keys ^ MIN32)
+    all_logits = all_bits.to(tl.float32, bitcast=True)
+
+    # Compute raw_exp for ALL BLOCK_E elements — vectorized, ~2 VALU clocks
+    all_raw_exp = tl.math.exp2((all_logits - max_l) * 1.4426950408889634)
+
+    # Sum only top-K for renorm — ONE masked reduction
+    top_mask = offs_e < K
+    renorm_raw = tl.sum(tl.where(top_mask, all_raw_exp, 0.0), axis=0)
+    renorm_raw = tl.where(renorm_raw > 0.0, renorm_raw, 1.0)
+    inv_renorm = 1.0 / renorm_raw
+
+    # Load scales for top-K only (masked gather; scale array is tiny → L1 cached)
+    all_scales = tl.load(
+        per_expert_scale_ptr + all_ids.to(tl.int64),
+        mask=top_mask,
+        other=1.0,
+    ).to(tl.float32)
+
+    # Final weights: vectorized multiply (only top-K will be stored)
+    all_weights = (all_raw_exp * inv_renorm * all_scales).to(tl.float32)
+
+    # Write results with TWO masked stores — replaces K × 2 serial scalar stores
+    base_off = pid * K + offs_e
+    tl.store(topk_ids_ptr + base_off, all_ids, mask=top_mask)
+    tl.store(topk_weights_ptr + base_off, all_weights, mask=top_mask)
+
+
+def gemma4_fused_routing_kernel_triton(
+    gating_output: torch.Tensor,
+    topk: int,
+    per_expert_scale: torch.Tensor,
+    num_warps: int = 1,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    gating_output = gating_output.contiguous()
+    per_expert_scale = per_expert_scale.contiguous()
+    T, E = gating_output.shape
+    weights = torch.empty(T, topk, dtype=torch.float32, device=gating_output.device)
+    ids = torch.empty(T, topk, dtype=torch.int32, device=gating_output.device)
+    BLOCK_E = triton.next_power_of_2(E)
+    _gemma4_routing_kernel[(T,)](
+        gating_output,
+        per_expert_scale,
+        weights,
+        ids,
+        E,
+        topk,
+        BLOCK_E,
+        num_warps=num_warps,
+    )
+    return weights, ids
+
+
+def gemma4_routing_function_torch(
+    gating_output: torch.Tensor,
+    topk: int,
+    per_expert_scale: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    _, topk_ids = torch.topk(gating_output, k=topk, dim=-1)
+    router_probabilities = torch.nn.functional.softmax(gating_output, dim=-1)
+    indicator = torch.nn.functional.one_hot(
+        topk_ids, num_classes=gating_output.size(-1)
+    ).sum(dim=-2)
+    gate_weights = indicator * router_probabilities
+    renorm_factor = torch.sum(gate_weights, dim=-1, keepdim=True)
+    renorm_factor = torch.where(renorm_factor > 0.0, renorm_factor, 1.0)
+    dispatch_weights = gate_weights / renorm_factor
+
+    topk_weights = dispatch_weights.gather(1, topk_ids)
+
+    # Fold per_expert_scale into routing weights
+    expert_scales = per_expert_scale[topk_ids].to(topk_weights.dtype)
+    topk_weights = topk_weights * expert_scales
+    return topk_weights.to(torch.float32), topk_ids.to(torch.int32)
+
+
+def _get_text_config(config):
+    """Dereference text_config if config is a nested Gemma4Config.
+
+    Gemma4 checkpoints use architectures=["Gemma4ForConditionalGeneration"]
+    which yields a Gemma4Config with nested text_config. This function
+    transparently returns the text config regardless of nesting.
+    """
+    if hasattr(config, "text_config"):
+        return config.text_config
+    return config
+
+
+class Gemma4MLP(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_activation: str,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size,
+            [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.down_proj",
+        )
+        self.act_fn = get_act_and_mul_fn(hidden_activation)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+class Gemma4Router(nn.Module):
+    """Router for Gemma4 MoE that preprocesses input before projection.
+
+    Applies RMSNorm (no learned weight), root_size scaling
+    (hidden_size^{-0.5}), then a learned per-dimension scale before
+    projecting to expert logits.
+
+    This preprocessing is applied ONLY to the router's input, not to
+    the expert MLPs' input.
+    """
+
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+
+        # RMSNorm without learned weight — pure normalization only
+        self.norm = RMSNorm(self.hidden_size, eps=config.rms_norm_eps, has_weight=False)
+        # Per-dimension learned scale, applied after norm + root_size
+        self.scale = nn.Parameter(torch.ones(self.hidden_size))
+        # Constant 1/sqrt(hidden_size) scaling factor
+        self.register_buffer(
+            "root_size",
+            torch.tensor(self.hidden_size**-0.5),
+            persistent=False,
+        )
+        # Project to expert logits; replicated across TP for consistent routing
+        # GateLinear supports bf16 W/A → fp32 output, which is important
+        # because the topk kernel often needs fp32 for stable routing.
+        self.proj = GateLinear(
+            self.hidden_size,
+            config.num_experts,
+            bias=False,
+            out_dtype=torch.float32,
+            prefix=f"{prefix}.proj",
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Returns raw router logits [T, E]."""
+        x = self.norm(x)
+        x = x * self.root_size.to(x.dtype)
+        x = x * self.scale.to(x.dtype)
+        router_logits, _ = self.proj(x)
+        return router_logits
+
+
+class Gemma4MoE(nn.Module):
+    """Mixture of Experts for Gemma4 using vLLM's FusedMoE.
+
+    Wraps FusedMoE with custom routing. The router projection is
+    external (Gemma4Router) — this class only handles expert dispatch.
+
+    Gemma4 routing: softmax over ALL experts → top-k → renormalize.
+    per_expert_scale is folded into routing weights for mathematical
+    correctness with FusedMoE's fused kernel.
+    """
+
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_experts = config.num_experts
+
+        # Per-expert output scale folded into routing weights so that
+        # FusedMoE's fused kernel computes: Σ_e (expert_e * w_e * scale_e)
+        self.per_expert_scale = nn.Parameter(torch.ones(config.num_experts))
+
+        # Gemma4 routing: softmax over ALL experts → top-k → renormalize.
+        # FusedMoE's built-in fused_topk scopes softmax differently, so
+        # a custom routing function is needed for numerical correctness.
+        # NOTE: self.per_expert_scale is read at call time (not captured into
+        # a local) so that torch.func.functional_call parameter substitution
+        # reaches the routing function correctly.
+        def routing_function(
+            hidden_states: torch.Tensor,
+            gating_output: torch.Tensor,
+            topk: int,
+            renormalize: bool,
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            if current_platform.is_cuda_alike() or current_platform.is_xpu():
+                return gemma4_fused_routing_kernel_triton(
+                    gating_output, topk, self.per_expert_scale
+                )
+
+            return gemma4_routing_function_torch(
+                gating_output, topk, self.per_expert_scale
+            )
+
+        # FusedMoE experts with custom Gemma4 routing
+        self.experts = FusedMoE(
+            num_experts=config.num_experts,
+            top_k=config.top_k_experts,
+            hidden_size=config.hidden_size,
+            intermediate_size=getattr(
+                config,
+                "moe_intermediate_size",
+                getattr(config, "expert_intermediate_size", None),
+            ),
+            renormalize=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            custom_routing_function=routing_function,
+            activation="gelu_tanh",
+        )
+
+    def forward(self, x: torch.Tensor, router_logits: torch.Tensor) -> torch.Tensor:
+        return self.experts(x, router_logits)
+
+
+class Gemma4Attention(nn.Module):
+    def __init__(
+        self,
+        config,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        max_position_embeddings: int,
+        use_k_eq_v: bool = False,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        attn_logits_soft_cap: float | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.hidden_size = hidden_size
+        self.use_k_eq_v = use_k_eq_v
+
+        tp_size = get_tensor_model_parallel_world_size()
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = head_dim
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        # Gemma4 uses scaling=1.0.
+        # Unlike Gemma2/3, query_pre_attn_scalar is NOT used here;
+        # Q/K norms with learnable weights handle scaling implicitly.
+        self.scaling = 1.0
+
+        # QKVParallelLinear handles GQA correctly for all layer types.
+        # k_eq_v layers load K weights into both K and V slots via
+        # _weight_iterator remapping — no structural difference needed.
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=config.attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=config.attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        # Q/K norms: output = norm(x) * weight (learnable per-head scale)
+        self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        # V norm: no learnable scale (pure normalization only)
+        self.v_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, has_weight=False)
+
+        # Determine layer type and sliding window
+        layer_idx = extract_layer_index(prefix)
+        layer_type = config.layer_types[layer_idx]
+        self.is_sliding = layer_type == "sliding_attention"
+        sliding_window = config.sliding_window if self.is_sliding else None
+
+        # Initialize RoPE based on layer type.
+        # Gemma4 uses different RoPE parameters for sliding vs full attention.
+        if layer_type in config.rope_parameters:
+            # Per-layer-type rope config (dict format).
+            # rope_parameters already contains the correct
+            # partial_rotary_factor per layer type (1.0 for full
+            # attention, 1.0 for sliding). Do NOT override with
+            # global_partial_rotary_factor — that config key is
+            # not needed for Gemma4 — config uses per-layer rope_parameters.
+            rope_parameters = dict(config.rope_parameters[layer_type])
+        else:
+            # Legacy config format fallback.
+            rope_parameters = dict(config.rope_parameters.copy())
+            if self.is_sliding:
+                rope_parameters["rope_theta"] = getattr(
+                    config, "rope_local_base_freq", 10000.0
+                )
+
+        # KV sharing: layers in the last `num_kv_shared_layers` share KV
+        # cache with earlier layers of the same type.
+        kv_sharing_target_layer_name = None
+        self.is_kv_shared_layer = False
+        num_kv_shared_layers = getattr(config, "num_kv_shared_layers", 0)
+        if num_kv_shared_layers > 0:
+            first_kv_shared_layer_idx = config.num_hidden_layers - num_kv_shared_layers
+            if layer_idx >= first_kv_shared_layer_idx:
+                self.is_kv_shared_layer = True
+                # Find the last non-shared layer of the same attention type
+                prev_layers = config.layer_types[:first_kv_shared_layer_idx]
+                current_layer_type = config.layer_types[layer_idx]
+                kv_shared_layer_index = (
+                    len(prev_layers) - 1 - prev_layers[::-1].index(current_layer_type)
+                )
+                if kv_shared_layer_index >= 0:
+                    if ".layers." in prefix:
+                        param_name_before_layers = prefix.split(".layers.")[0]
+                    else:
+                        raise ValueError(
+                            "Unexpected prefix format for Gemma4Attention: "
+                            f"'{prefix}'. Expected to contain '.layers.'."
+                        )
+                    kv_sharing_target_layer_name = (
+                        f"{param_name_before_layers}.layers."
+                        f"{kv_shared_layer_index}.self_attn.attn"
+                    )
+
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,
+        )
+
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            logits_soft_cap=attn_logits_soft_cap,
+            per_layer_sliding_window=sliding_window,
+            kv_sharing_target_layer_name=kv_sharing_target_layer_name,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        **kwargs,
+    ) -> torch.Tensor:
+        # Unified QKV path (works for both k_eq_v and standard layers).
+        # For k_eq_v, K weights are loaded into both K and V slots of
+        # qkv_proj, so V == K automatically.
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+        # Q norm (always applied)
+        q = q.unflatten(-1, (self.num_heads, self.head_dim))
+        q = self.q_norm(q)
+        q = q.flatten(-2, -1)
+
+        if not self.is_kv_shared_layer:
+            # Non-shared: apply K norm + RoPE, V norm
+            k = k.unflatten(-1, (self.num_kv_heads, self.head_dim))
+            k = self.k_norm(k)
+            k = k.flatten(-2, -1)
+            q, k = self.rotary_emb(positions, q, k)
+
+            v = v.unflatten(-1, (self.num_kv_heads, self.head_dim))
+            v = self.v_norm(v)
+            v = v.flatten(-2, -1)
+        else:
+            # Shared: only apply RoPE to Q
+            q = self.rotary_emb(positions, q, k)[0]
+
+        attn_output = self.attn(q, k, v)
+        output, _ = self.o_proj(attn_output)
+
+        return output
+
+
+class Gemma4DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.hidden_size_per_layer_input = getattr(
+            config, "hidden_size_per_layer_input", 0
+        )
+
+        layer_idx = extract_layer_index(prefix)
+        self.layer_idx = layer_idx
+
+        # Gemma4 uses different head dimensions for sliding vs full attention
+        layer_type = config.layer_types[layer_idx]
+        self.is_full_attention = layer_type == "full_attention"
+        if self.is_full_attention:
+            head_dim = getattr(config, "global_head_dim", config.head_dim)
+        else:
+            head_dim = config.head_dim
+
+        # Determine if this full-attention layer uses k_eq_v
+        # (laptop variant: no v_proj, K reused as V on full attention layers)
+        use_k_eq_v = self.is_full_attention and getattr(
+            config, "attention_k_eq_v", False
+        )
+
+        # For k_eq_v full-attention layers, use num_global_key_value_heads
+        # as the KV head count when k_eq_v is enabled.
+        if use_k_eq_v:
+            num_kv_heads = getattr(
+                config, "num_global_key_value_heads", config.num_key_value_heads
+            )
+        else:
+            num_kv_heads = config.num_key_value_heads
+
+        self.self_attn = Gemma4Attention(
+            config=config,
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=num_kv_heads,
+            head_dim=head_dim,
+            max_position_embeddings=config.max_position_embeddings,
+            use_k_eq_v=use_k_eq_v,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            attn_logits_soft_cap=getattr(config, "attn_logit_softcapping", None),
+            prefix=f"{prefix}.self_attn",
+        )
+
+        # Compute per-layer intermediate_size from config.
+        # When use_double_wide_mlp is set, intermediate_size doubles for
+        # KV-shared layers (layers >= first_kv_shared_layer_idx).
+        first_kv_shared_layer_idx = config.num_hidden_layers - getattr(
+            config, "num_kv_shared_layers", 0
+        )
+        is_kv_shared_layer = layer_idx >= first_kv_shared_layer_idx > 0
+        use_double_wide_mlp = (
+            getattr(config, "use_double_wide_mlp", False) and is_kv_shared_layer
+        )
+        layer_intermediate_size = config.intermediate_size * (
+            2 if use_double_wide_mlp else 1
+        )
+
+        self.mlp = Gemma4MLP(
+            hidden_size=self.hidden_size,
+            intermediate_size=layer_intermediate_size,
+            hidden_activation=config.hidden_activation,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+
+        # Layer norms: output = norm(x) * weight
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.pre_feedforward_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.post_feedforward_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+
+        # MoE (Mixture of Experts) — router + expert block parallel to MLP
+        self.enable_moe_block = getattr(config, "enable_moe_block", False) or getattr(
+            config, "use_second_mlp_block", False
+        )
+        if self.enable_moe_block:
+            self.router = Gemma4Router(
+                config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.router",
+            )
+            self.moe = Gemma4MoE(
+                config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.moe",
+            )
+            self.post_feedforward_layernorm_1 = RMSNorm(
+                config.hidden_size, eps=config.rms_norm_eps
+            )
+            self.post_feedforward_layernorm_2 = RMSNorm(
+                config.hidden_size, eps=config.rms_norm_eps
+            )
+            self.pre_feedforward_layernorm_2 = RMSNorm(
+                config.hidden_size, eps=config.rms_norm_eps
+            )
+        else:
+            self.router = None
+            self.moe = None
+            self.post_feedforward_layernorm_1 = None
+            self.post_feedforward_layernorm_2 = None
+            self.pre_feedforward_layernorm_2 = None
+
+        # Per-Layer Embedding (PLE) components — present in each decoder layer
+        if (
+            self.hidden_size_per_layer_input is not None
+            and self.hidden_size_per_layer_input > 0
+        ):
+            # Gate: projects hidden_states → per-layer dim for gating
+            self.per_layer_input_gate = ReplicatedLinear(
+                self.hidden_size,
+                self.hidden_size_per_layer_input,
+                bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.per_layer_input_gate",
+                return_bias=False,
+            )
+            # Projection: projects gated per-layer input back → hidden size
+            self.per_layer_projection = ReplicatedLinear(
+                self.hidden_size_per_layer_input,
+                self.hidden_size,
+                bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.per_layer_projection",
+                return_bias=False,
+            )
+            # Post-PLE norm: output = norm(x) * weight
+            self.post_per_layer_input_norm = RMSNorm(
+                config.hidden_size, eps=config.rms_norm_eps
+            )
+        else:
+            self.per_layer_input_gate = None
+            self.per_layer_projection = None
+            self.post_per_layer_input_norm = None
+
+        # Layer scalar (loaded from checkpoint) — applies to ALL text layers
+        self.register_buffer("layer_scalar", torch.ones(1))
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+        per_layer_input: torch.Tensor | None = None,
+        **kwargs,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # Gemma4 residual pattern:
+        # 1. input_norm(x) → attn → post_attn_norm → ADD residual
+        # 2. pre_ff_norm → mlp → post_ff_norm → ADD residual
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+            **kwargs,
+        )
+
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = hidden_states + residual
+        residual = hidden_states
+
+        # MLP runs unconditionally (same inputs for MoE and non-MoE)
+        hidden_states = self.pre_feedforward_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+
+        if self.enable_moe_block:
+            hidden_states_1 = self.post_feedforward_layernorm_1(hidden_states)
+
+            # Router and MoE experts see the residual (pre-MLP state),
+            # matching the HF transformers forward path
+            router_logits = self.router(residual)
+            hidden_states_2 = self.pre_feedforward_layernorm_2(residual)
+            hidden_states_2 = self.moe(hidden_states_2, router_logits)
+            hidden_states_2 = self.post_feedforward_layernorm_2(hidden_states_2)
+
+            # Combine MLP and MoE outputs
+            hidden_states = hidden_states_1 + hidden_states_2
+
+        hidden_states = self.post_feedforward_layernorm(hidden_states)
+        hidden_states = hidden_states + residual
+
+        # Apply PLE (Per-Layer Embedding) if configured
+        if per_layer_input is not None and self.per_layer_input_gate is not None:
+            gate = self.per_layer_input_gate(hidden_states)
+            gate = torch.nn.functional.gelu(gate, approximate="tanh")
+            gated_per_layer = gate * per_layer_input
+            per_layer_contribution = self.per_layer_projection(gated_per_layer)
+            per_layer_contribution = self.post_per_layer_input_norm(
+                per_layer_contribution
+            )
+            hidden_states = hidden_states + per_layer_contribution
+
+        # Apply layer scalar for full-attention layers
+        # Apply per-layer scalar (all text layers)
+        hidden_states = hidden_states * self.layer_scalar
+
+        return hidden_states, None
+
+
+def _run_decoder_layers(
+    decoder_layers: list[Gemma4DecoderLayer],
+    layer_idx_start: int,
+    positions: torch.Tensor,
+    hidden_states: torch.Tensor,
+    per_layer_inputs: torch.Tensor | None = None,
+    **kwargs,
+) -> torch.Tensor:
+    """Run a slice of decoder layers with PLE extraction."""
+    residual = None
+    for idx, layer in enumerate(decoder_layers):
+        layer_idx = idx + layer_idx_start
+        layer_per_input = (
+            per_layer_inputs[:, layer_idx, :] if per_layer_inputs is not None else None
+        )
+        hidden_states, residual = layer(
+            positions,
+            hidden_states,
+            residual,
+            per_layer_input=layer_per_input,
+            **kwargs,
+        )
+    return hidden_states
+
+
+@support_torch_compile(
+    enable_if=lambda vllm_config: vllm_config.cache_config.kv_sharing_fast_prefill
+)
+class Gemma4SelfDecoderLayers(nn.Module):
+    """Compiled wrapper: embedding + non-KV-shared layers (YOCO first half).
+
+    Owns the embedding and PLE modules so they are inside the compiled
+    graph. Gemma4Model delegates embedding methods here.
+    """
+
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        decoder_layers: list[Gemma4DecoderLayer],
+        layer_idx_start: int,
+        embed_tokens: VocabParallelEmbedding,
+        normalizer: torch.Tensor,
+        embed_tokens_per_layer: VocabParallelEmbedding | None,
+        embed_scale_per_layer: torch.Tensor | None,
+        per_layer_model_projection: ColumnParallelLinear | None,
+        per_layer_projection_norm: RMSNorm | None,
+        per_layer_input_scale: torch.Tensor | None,
+        per_layer_projection_scale: torch.Tensor | None,
+    ):
+        super().__init__()
+        self.decoder_layers = decoder_layers
+        self.layer_idx_start = layer_idx_start
+
+        config = _get_text_config(vllm_config.model_config.hf_config)
+        self.config = config
+        self.hidden_size_per_layer_input = getattr(
+            config, "hidden_size_per_layer_input", 0
+        )
+        self.vocab_size_per_layer_input = getattr(
+            config, "vocab_size_per_layer_input", config.vocab_size
+        )
+
+        # Shared references to modules owned by Gemma4Model — must be
+        # inside this nn.Module so torch.compile captures them.
+        self.embed_tokens = embed_tokens
+        self.normalizer = normalizer
+        self.embed_tokens_per_layer = embed_tokens_per_layer
+        self.embed_scale_per_layer = embed_scale_per_layer
+        self.per_layer_model_projection = per_layer_model_projection
+        self.per_layer_projection_norm = per_layer_projection_norm
+        self.per_layer_input_scale = per_layer_input_scale
+        self.per_layer_projection_scale = per_layer_projection_scale
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids) * self.normalizer
+
+    def get_per_layer_inputs(self, input_ids: torch.Tensor) -> torch.Tensor | None:
+        """Get per-layer embeddings from embed_tokens_per_layer.
+
+        Returns:
+            Per-layer embeddings (num_tokens, num_layers,
+            hidden_size_per_layer_input)
+        """
+        if self.embed_tokens_per_layer is None:
+            return None
+        per_layer_inputs_mask = torch.logical_and(
+            input_ids >= 0,
+            input_ids < self.vocab_size_per_layer_input,
+        )
+        per_layer_inputs_tokens = torch.where(
+            per_layer_inputs_mask, input_ids, torch.zeros_like(input_ids)
+        )
+        per_layer_embeds = self.embed_tokens_per_layer(per_layer_inputs_tokens)
+        per_layer_embeds = per_layer_embeds * self.embed_scale_per_layer
+        return per_layer_embeds.reshape(
+            *input_ids.shape,
+            self.config.num_hidden_layers,
+            self.hidden_size_per_layer_input,
+        )
+
+    def project_per_layer_inputs(
+        self,
+        inputs_embeds: torch.Tensor,
+        per_layer_inputs: torch.Tensor | None,
+    ) -> torch.Tensor | None:
+        """Project inputs_embeds and combine with per_layer_inputs.
+
+        Steps:
+        1. Project inputs_embeds: hidden_size → total_ple_dim
+        2. Scale by hidden_size^{-0.5}
+        3. Reshape to (num_tokens, num_layers, per_layer_dim)
+        4. Normalize with per_layer_projection_norm
+        5. Combine: (projection + per_layer_inputs) * 1/sqrt(2)
+        """
+        if self.per_layer_model_projection is None:
+            return None
+        per_layer_projection = self.per_layer_model_projection(inputs_embeds)
+        per_layer_projection = per_layer_projection * self.per_layer_projection_scale
+        per_layer_projection = per_layer_projection.reshape(
+            *inputs_embeds.shape[:-1],
+            self.config.num_hidden_layers,
+            self.hidden_size_per_layer_input,
+        )
+        per_layer_projection = self.per_layer_projection_norm(per_layer_projection)
+        if per_layer_inputs is None:
+            return per_layer_projection
+        return (per_layer_projection + per_layer_inputs) * self.per_layer_input_scale
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        per_layer_inputs: torch.Tensor | None = None,
+        **kwargs,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        if inputs_embeds is not None:
+            hidden_states = inputs_embeds
+            per_layer_inputs = self.project_per_layer_inputs(
+                hidden_states, per_layer_inputs
+            )
+        else:
+            hidden_states = self.embed_input_ids(input_ids)
+            per_layer_embeds = self.get_per_layer_inputs(input_ids)
+            per_layer_inputs = self.project_per_layer_inputs(
+                hidden_states, per_layer_embeds
+            )
+
+        hidden_states = _run_decoder_layers(
+            self.decoder_layers,
+            self.layer_idx_start,
+            positions,
+            hidden_states,
+            per_layer_inputs,
+            **kwargs,
+        )
+        return hidden_states, per_layer_inputs
+
+
+@support_torch_compile(
+    enable_if=lambda vllm_config: vllm_config.cache_config.kv_sharing_fast_prefill
+)
+class Gemma4CrossDecoderLayers(nn.Module):
+    """Cross-decoder layers (YOCO second half, KV-shared)."""
+
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        decoder_layers: list[Gemma4DecoderLayer],
+        layer_idx_start: int,
+    ):
+        super().__init__()
+        self.decoder_layers = decoder_layers
+        self.layer_idx_start = layer_idx_start
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        per_layer_inputs: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        return _run_decoder_layers(
+            self.decoder_layers,
+            self.layer_idx_start,
+            positions,
+            hidden_states,
+            per_layer_inputs,
+            **kwargs,
+        )
+
+
+@support_torch_compile(
+    enable_if=lambda vllm_config: not vllm_config.cache_config.kv_sharing_fast_prefill
+)
+class Gemma4Model(nn.Module, EagleModelMixin):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = _get_text_config(vllm_config.model_config.hf_config)
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        # PLE config values (default to 0 if not present — disables PLE)
+        self.hidden_size_per_layer_input = getattr(
+            config, "hidden_size_per_layer_input", 0
+        )
+        self.vocab_size_per_layer_input = getattr(
+            config, "vocab_size_per_layer_input", config.vocab_size
+        )
+
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.embed_tokens",
+        )
+
+        # Per-Layer Embedding (PLE) components
+        if (
+            self.hidden_size_per_layer_input is not None
+            and self.hidden_size_per_layer_input > 0
+        ):
+            total_ple_dim = self.hidden_size_per_layer_input * config.num_hidden_layers
+            self.embed_tokens_per_layer = VocabParallelEmbedding(
+                self.vocab_size_per_layer_input,
+                total_ple_dim,
+                quant_config=quant_config,
+                prefix=f"{prefix}.embed_tokens_per_layer",
+            )
+            # Scaled embedding factor (from config, not hardcoded)
+            # Register as buffer so it moves to GPU with the model
+            # and interacts correctly with torch.compile AOT caching.
+            self.register_buffer(
+                "embed_scale_per_layer",
+                torch.tensor(self.hidden_size_per_layer_input**0.5),
+                persistent=False,
+            )
+            # Projection: hidden_size → total_ple_dim
+            # ColumnParallelLinear with gather_output=True
+            self.per_layer_model_projection = ColumnParallelLinear(
+                config.hidden_size,
+                total_ple_dim,
+                bias=False,
+                gather_output=True,
+                return_bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.per_layer_model_projection",
+            )
+            # PLE projection norm: output = norm(x) * weight
+            self.per_layer_projection_norm = RMSNorm(
+                self.hidden_size_per_layer_input,
+                eps=config.rms_norm_eps,
+            )
+            # Scale factor for combining projection + per_layer_inputs
+            # Register as buffer so it moves to GPU with the model
+            # and interacts correctly with torch.compile AOT caching.
+            self.register_buffer(
+                "per_layer_input_scale",
+                torch.rsqrt(torch.tensor(2.0)),
+                persistent=False,
+            )
+            # Scaled projection: multiply output by hidden_size**-0.5.
+            # Register as buffer for GPU placement and torch.compile.
+            self.register_buffer(
+                "per_layer_projection_scale",
+                torch.tensor(config.hidden_size**-0.5),
+                persistent=False,
+            )
+        else:
+            self.embed_tokens_per_layer = None
+            self.embed_scale_per_layer = None
+            self.per_layer_model_projection = None
+            self.per_layer_projection_norm = None
+            self.per_layer_input_scale = None
+            self.per_layer_projection_scale = None
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: Gemma4DecoderLayer(
+                config,
+                cache_config=cache_config,
+                quant_config=quant_config,
+                prefix=prefix,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+        # Final norm: output = norm(x) * weight
+        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        # Embedding scale = sqrt(hidden_size)
+        # Downcast to model dtype (bfloat16 etc.) for numerical parity
+        self.register_buffer(
+            "normalizer",
+            torch.tensor(config.hidden_size**0.5),
+            persistent=False,
+        )
+
+        # --- You Only Cache Once (YOCO) split for fast prefill ---
+        first_kv_shared_layer_idx = config.num_hidden_layers - getattr(
+            config, "num_kv_shared_layers", 0
+        )
+
+        from vllm.compilation.backends import set_model_tag
+
+        # Layers 0..(K-1) are self-decoder layers in YOCO
+        with set_model_tag("self_decoder"):
+            self.self_decoder = Gemma4SelfDecoderLayers(
+                vllm_config=vllm_config,
+                prefix=f"{prefix}.self_decoder",
+                decoder_layers=self.layers[:first_kv_shared_layer_idx],
+                layer_idx_start=0,
+                embed_tokens=self.embed_tokens,
+                normalizer=self.normalizer,
+                embed_tokens_per_layer=getattr(self, "embed_tokens_per_layer", None),
+                embed_scale_per_layer=getattr(self, "embed_scale_per_layer", None),
+                per_layer_model_projection=getattr(
+                    self, "per_layer_model_projection", None
+                ),
+                per_layer_projection_norm=getattr(
+                    self, "per_layer_projection_norm", None
+                ),
+                per_layer_input_scale=getattr(self, "per_layer_input_scale", None),
+                per_layer_projection_scale=getattr(
+                    self, "per_layer_projection_scale", None
+                ),
+            )
+        # Layers K..(N-1) are cross-decoder layers in YOCO
+        with set_model_tag("cross_decoder"):
+            self.cross_decoder = Gemma4CrossDecoderLayers(
+                vllm_config=vllm_config,
+                prefix=f"{prefix}.cross_decoder",
+                decoder_layers=self.layers[first_kv_shared_layer_idx:],
+                layer_idx_start=first_kv_shared_layer_idx,
+            )
+
+        self.fast_prefill_enabled = cache_config.kv_sharing_fast_prefill
+
+        if self.fast_prefill_enabled:
+            # Allocate static buffers for CUDAGraph
+            max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+            device = next(self.parameters()).device
+            self.positions = torch.zeros(
+                max_num_tokens, dtype=torch.int64, device=device
+            )
+            self.hidden_states = torch.zeros(
+                (max_num_tokens, config.hidden_size),
+                dtype=self.embed_tokens.weight.dtype,
+                device=device,
+            )
+            if (
+                self.hidden_size_per_layer_input
+                and self.hidden_size_per_layer_input > 0
+            ):
+                self.per_layer_inputs = torch.zeros(
+                    (
+                        max_num_tokens,
+                        config.num_hidden_layers,
+                        self.hidden_size_per_layer_input,
+                    ),
+                    dtype=self.embed_tokens.weight.dtype,
+                    device=device,
+                )
+            else:
+                self.per_layer_inputs = None
+
+        # Custom factory that includes per_layer_inputs for PLE-enabled PP.
+        # per_layer_inputs has shape (batch, num_layers, per_layer_dim),
+        # which differs from the standard (batch, hidden_size) shape,
+        # so we can't use the default factory.
+        ple_dim = self.hidden_size_per_layer_input
+        num_layers = config.num_hidden_layers
+        hidden_size = config.hidden_size
+
+        def _make_empty_intermediate_tensors(
+            batch_size: int,
+            dtype: torch.dtype,
+            device: torch.device,
+        ) -> IntermediateTensors:
+            tensors: dict[str, torch.Tensor] = {
+                "hidden_states": torch.zeros(
+                    (batch_size, hidden_size),
+                    dtype=dtype,
+                    device=device,
+                ),
+            }
+            if ple_dim and ple_dim > 0:
+                tensors["per_layer_inputs"] = torch.zeros(
+                    (batch_size, num_layers, ple_dim),
+                    dtype=dtype,
+                    device=device,
+                )
+            return IntermediateTensors(tensors)
+
+        self.make_empty_intermediate_tensors = _make_empty_intermediate_tensors
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.self_decoder.embed_input_ids(input_ids)
+
+    def get_per_layer_inputs(self, input_ids: torch.Tensor) -> torch.Tensor | None:
+        """Get per-layer embeddings from embed_tokens_per_layer.
+
+        Returns:
+            Per-layer embeddings (num_tokens, num_layers,
+            hidden_size_per_layer_input)
+        """
+        return self.self_decoder.get_per_layer_inputs(input_ids)
+
+    def project_per_layer_inputs(
+        self,
+        inputs_embeds: torch.Tensor,
+        per_layer_inputs: torch.Tensor | None,
+    ) -> torch.Tensor | None:
+        """Project inputs_embeds and combine with per_layer_inputs.
+
+        Steps:
+        1. Project inputs_embeds: hidden_size → total_ple_dim
+        2. Scale by hidden_size^{-0.5}
+        3. Reshape to (num_tokens, num_layers, per_layer_dim)
+        4. Normalize with per_layer_projection_norm
+        5. Combine: (projection + per_layer_inputs) * 1/sqrt(2)
+        """
+        return self.self_decoder.project_per_layer_inputs(
+            inputs_embeds, per_layer_inputs
+        )
+
+    def fast_prefill_forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        per_layer_inputs: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
+        logits_indices_padded, num_logits_indices = None, None
+        attn_metadata = get_forward_context().attn_metadata
+
+        if attn_metadata is not None:
+            assert isinstance(attn_metadata, dict)
+            layer_attn_metadata = attn_metadata[
+                self.layers[-1].self_attn.attn.layer_name
+            ]
+            if isinstance(layer_attn_metadata, KVSharingFastPrefillMetadata):
+                logits_indices_padded = layer_attn_metadata.logits_indices_padded
+                num_logits_indices = layer_attn_metadata.num_logits_indices
+
+        batch_size = positions.size(0)
+        self.positions[:batch_size].copy_(positions)
+        self_decoder_hidden_states, per_layer_inputs = self.self_decoder(
+            input_ids=input_ids,
+            positions=self.positions[:batch_size],
+            inputs_embeds=inputs_embeds,
+            per_layer_inputs=per_layer_inputs,
+            **kwargs,
+        )
+
+        if logits_indices_padded is None:
+            logits_indices_padded = torch.arange(
+                batch_size,
+                dtype=positions.dtype,
+                device=positions.device,
+            )
+
+        # NOTE: Keep .clone() until fix in
+        # https://github.com/vllm-project/vllm/pull/22282
+        hidden_states = self_decoder_hidden_states.clone()
+
+        num_padded = logits_indices_padded.size(0)
+        self.positions[:num_padded].copy_(positions[logits_indices_padded])
+        self.hidden_states[:num_padded].copy_(
+            self_decoder_hidden_states[logits_indices_padded]
+        )
+        if self.per_layer_inputs is not None and per_layer_inputs is not None:
+            self.per_layer_inputs[:num_padded].copy_(
+                per_layer_inputs[logits_indices_padded]
+            )
+
+        # Update batch_descriptor so the cross-decoder's piecewise
+        # CUDAGraphWrapper dispatches to the correct (reduced) batch size.
+        forward_context = get_forward_context()
+        orig_batch_desc = forward_context.batch_descriptor
+        if orig_batch_desc is not None:
+            forward_context.batch_descriptor = replace(
+                orig_batch_desc, num_tokens=num_padded
+            )
+
+        cross_per_layer = (
+            self.per_layer_inputs[:num_padded]
+            if self.per_layer_inputs is not None
+            else None
+        )
+        cross_hidden_states = self.cross_decoder(
+            self.positions[:num_padded],
+            self.hidden_states[:num_padded],
+            cross_per_layer,
+            **kwargs,
+        )
+
+        # Restore the original batch_descriptor
+        forward_context.batch_descriptor = orig_batch_desc
+
+        if num_logits_indices is not None:
+            assert num_logits_indices > 0
+            hidden_states[logits_indices_padded[:num_logits_indices]] = (
+                cross_hidden_states[:num_logits_indices]
+            )
+        else:
+            hidden_states = cross_hidden_states
+
+        return hidden_states
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None,
+        inputs_embeds: torch.Tensor | None = None,
+        per_layer_inputs: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor | IntermediateTensors | tuple[torch.Tensor, list[torch.Tensor]]:
+        if self.fast_prefill_enabled:
+            hidden_states = self.fast_prefill_forward(
+                input_ids,
+                positions,
+                inputs_embeds,
+                per_layer_inputs,
+                **kwargs,
+            )
+            hidden_states = self.norm(hidden_states)
+            return hidden_states
+
+        # Normal (non-fast-prefill) path with PP support
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+                # When called from the multimodal wrapper, raw PLE
+                # embeddings are pre-computed and passed explicitly.
+                # Project them through per_layer_model_projection.
+                per_layer_inputs = self.project_per_layer_inputs(
+                    hidden_states, per_layer_inputs
+                )
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+                # Compute per-layer inputs for PLE
+                per_layer_embeds = self.get_per_layer_inputs(input_ids)
+                per_layer_inputs = self.project_per_layer_inputs(
+                    hidden_states, per_layer_embeds
+                )
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            if per_layer_inputs is not None:
+                per_layer_inputs = intermediate_tensors["per_layer_inputs"]
+        residual = None
+        aux_hidden_states = self._maybe_add_hidden_state([], 0, hidden_states, residual)
+        for layer_idx, layer in enumerate(
+            islice(self.layers, self.start_layer, self.end_layer)
+        ):
+            # Extract the per-layer embedding for this specific layer
+            if per_layer_inputs is not None:
+                actual_layer_idx = self.start_layer + layer_idx
+                layer_per_input = per_layer_inputs[
+                    :, actual_layer_idx, :
+                ]  # (num_tokens, per_layer_dim)
+            else:
+                layer_per_input = None
+            hidden_states, residual = layer(
+                positions,
+                hidden_states,
+                residual,
+                per_layer_input=layer_per_input,
+                **kwargs,
+            )
+            self._maybe_add_hidden_state(
+                aux_hidden_states, layer_idx + 1, hidden_states, residual
+            )
+        if not get_pp_group().is_last_rank:
+            tensors: dict[str, torch.Tensor] = {
+                "hidden_states": hidden_states,
+            }
+            if per_layer_inputs is not None:
+                tensors["per_layer_inputs"] = per_layer_inputs
+            return IntermediateTensors(tensors)
+        # Gemma4 incorporates residual into hidden_states directly
+        # Apply norm without residual fusion when possible.
+        if residual is None:
+            hidden_states = self.norm(hidden_states)
+        else:
+            hidden_states, _ = self.norm(hidden_states, residual)
+
+        if len(aux_hidden_states) > 0:
+            return hidden_states, aux_hidden_states
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        # MoE expert weight mapping: checkpoint can have either:
+        #   1. 3D packed tensors (exploded in _weight_iterator to per-expert 2D)
+        #   2. Already per-expert 2D weights (if quantized)
+        # Map to FusedMoE parameters:
+        #   moe.experts.{id}.gate_proj → FusedMoE w1 (shard of w13)
+        #   moe.experts.{id}.up_proj   → FusedMoE w3 (shard of w13)
+        #   moe.experts.{id}.down_proj → FusedMoE w2
+        num_experts = getattr(self.config, "num_experts", None) or 0
+        # Strategy A: dot-separated suffix
+        # (standard AWQ/GPTQ e.g. .qweight, .scales, .weight)
+        dot_suffix_expert_params_mapping = fused_moe_make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=num_experts,
+        )
+        # Strategy B: underscore-separated suffix
+        # (CompressedTensors-format AWQ/W4A16 _packed, _scale)
+        underscore_suffix_expert_params_mapping = [
+            (
+                f"{param_name}weight_",
+                f"{weight_name.rstrip('.')}_",
+                expert_id,
+                shard_id,
+            )
+            for (
+                param_name,
+                weight_name,
+                expert_id,
+                shard_id,
+            ) in dot_suffix_expert_params_mapping
+        ]
+        expert_params_mapping = (
+            dot_suffix_expert_params_mapping + underscore_suffix_expert_params_mapping
+        )
+        params_dict = dict(self.named_parameters())
+        # Include buffers (e.g. layer_scalar) so they can be loaded too
+        params_dict.update(dict(self.named_buffers()))
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = loaded_weight[0]
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            if name.endswith((".k_scale", ".v_scale", ".q_scale", ".prob_scale")):
+                remapped_name = maybe_remap_kv_scale_name(name, params_dict)
+                if remapped_name is not None and remapped_name in params_dict:
+                    param = params_dict[remapped_name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(remapped_name)
+                    continue
+
+            for param_name, shard_name, shard_id in stacked_params_mapping:
+                if shard_name not in name:
+                    continue
+                stacked_name = name.replace(shard_name, param_name)
+                # k_eq_v layers use separate q_proj/k_proj instead of
+                # packed qkv_proj. If the stacked param doesn't exist,
+                # skip this mapping and fall through to direct load.
+                if stacked_name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(stacked_name, self):
+                    continue
+                param = params_dict[stacked_name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(stacked_name)
+                break
+            else:
+                for (
+                    param_name,
+                    weight_name,
+                    expert_id,
+                    shard_id,
+                ) in expert_params_mapping:
+                    # Match both:
+                    #  - Bare weights: "experts.0.down_proj" (from 3D explosion)
+                    #  - With suffix: "experts.0.down_proj.weight_scale" (2D quantized)
+                    # weight_name has trailing dot, so check with and without it
+                    weight_name_base = weight_name.rstrip(".")
+                    if weight_name in name:
+                        # Has suffix (e.g., .weight_scale)
+                        moe_name = name.replace(weight_name, param_name)
+                    elif name.endswith(weight_name_base):
+                        # Bare weight (no suffix)
+                        moe_name = name.replace(
+                            weight_name_base, param_name.rstrip("_") + "_weight"
+                        )
+                    else:
+                        continue
+                    if moe_name not in params_dict:
+                        continue
+                    if is_pp_missing_parameter(moe_name, self):
+                        continue
+                    param = params_dict[moe_name]
+                    # Expert weights are already in the correct
+                    # orientation for FusedMoE after _weight_iterator:
+                    #   gate/up: [I, H] → w1/w3 expects [I, H]
+                    #   down:    [H, I] → w2 expects [H, I]
+                    # Scales and other quantization params may be 1D or scalar.
+                    weight_loader = param.weight_loader
+                    weight_loader(
+                        param,
+                        loaded_weight,
+                        moe_name,  # Pass mapped name (handles both weights and scales)
+                        shard_id=shard_id,
+                        expert_id=expert_id,
+                    )
+                    loaded_params.add(moe_name)
+                    break
+                else:
+                    if name.endswith(".bias") and name not in params_dict:
+                        continue
+                    name = maybe_remap_kv_scale_name(name, params_dict)
+                    if name is None:
+                        continue
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+
+class Gemma4ForCausalLM(
+    nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts, SupportsEagle3
+):
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # Gemma4ForConditionalGeneration already loads the text stack
+            # from `model.language_model.*`. We reuse that same checkpoint
+            # and adapter naming for the text-only Gemma4ForCausalLM path,
+            # so LoRA keys from the conditional wrapper map onto `model.*`.
+            "model.language_model.": "model.",
+        },
+        orig_to_new_substr={
+            # Gemma4ForConditionalGeneration names MoE adapter targets under
+            # `...moe.experts.*`, while the text-only model exposes them
+            # under `...moe.*`.
+            ".moe.experts.gate_up_proj": ".moe.gate_up_proj",
+            ".moe.experts.down_proj": ".moe.down_proj",
+        },
+    )
+    # Note: qkv_proj packing applies to non-k_eq_v layers (sliding
+    # attention and full attention without k_eq_v). k_eq_v layers use
+    # separate q_proj + k_proj without packing.
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        config = _get_text_config(vllm_config.model_config.hf_config)
+        quant_config = vllm_config.quant_config
+
+        super().__init__()
+        self.config = config
+        self.quant_config = quant_config
+        self.model = Gemma4Model(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+        )
+
+        self.lm_head = ParallelLMHead(
+            config.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if config.tie_word_embeddings:
+            self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
+
+        self.logits_processor = LogitsProcessor(
+            config.vocab_size,
+            soft_cap=getattr(config, "final_logit_softcapping", None),
+        )
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+        # --- MixtureOfExperts protocol ---
+        self.expert_weights: list[list[torch.Tensor]] = []
+        self.moe_layers: list[nn.Module] = []
+        example_moe: Gemma4MoE | None = None
+
+        for layer in self.model.layers:
+            if hasattr(layer, "moe") and isinstance(layer.moe, Gemma4MoE):
+                example_moe = layer.moe
+                self.moe_layers.append(layer.moe.experts)
+
+        self.num_moe_layers = len(self.moe_layers)
+
+        if example_moe is not None:
+            self.num_logical_experts = example_moe.num_experts
+            self.num_physical_experts = example_moe.num_experts
+            self.num_local_physical_experts = example_moe.num_experts
+            self.num_routed_experts = example_moe.num_experts
+        else:
+            self.num_logical_experts = 0
+            self.num_physical_experts = 0
+            self.num_local_physical_experts = 0
+            self.num_routed_experts = 0
+
+        self.num_expert_groups = 1
+        self.num_shared_experts = 0
+        self.num_redundant_experts = 0
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor | IntermediateTensors | tuple[torch.Tensor, list[torch.Tensor]]:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds, **kwargs
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.logits_processor(self.lm_head, hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        # Checkpoint weight names use "language_model." prefix (from the
+        # Gemma4ForConditionalGeneration wrapper). Strip it to map to our
+        # model tree which is just "model.*".
+        def _weight_iterator():
+            use_k_eq_v = getattr(self.config, "attention_k_eq_v", False)
+            # Build set of k_eq_v layer indices (full_attention layers
+            # when attention_k_eq_v is enabled). These layers have k_proj
+            # but no v_proj in checkpoint — we duplicate k_proj as v_proj.
+            k_eq_v_layer_indices: set[int] = set()
+            if use_k_eq_v:
+                for idx, lt in enumerate(self.config.layer_types):
+                    if lt == "full_attention":
+                        k_eq_v_layer_indices.add(idx)
+
+            for name, weight in weights:
+                # Remap "language_model." → "" to match our model tree.
+                # Checkpoint: model.language_model.layers.X.*
+                # Our model:  model.layers.X.*
+                name = name.replace("language_model.", "")
+
+                # Remap new HF checkpoint naming to internal vLLM
+                # naming: HF moved per_expert_scale to router and
+                # renamed moe → experts in the MoE block.
+                name = name.replace(
+                    ".router.per_expert_scale",
+                    ".moe.per_expert_scale",
+                )
+                if ".experts.gate_up_proj" in name:
+                    name = name.replace(
+                        ".experts.gate_up_proj",
+                        ".moe.gate_up_proj",
+                    )
+                elif ".experts.down_proj" in name:
+                    name = name.replace(
+                        ".experts.down_proj",
+                        ".moe.down_proj",
+                    )
+
+                # Remap individual 2D expert weights:
+                # .experts.{id}.{proj} → .moe.experts.{id}.{proj}
+                # (This handles per-expert 2D quantized weights)
+                name = _remap_gemma4_expert_weight_name(name)
+
+                # MoE expert weights: checkpoint stores as 3D packed
+                # tensors.  Explode into per-expert 2D weights for
+                # FusedMoE weight_loader.
+                #
+                # Checkpoint format:
+                #   moe.gate_up_proj: [E, 2*I, H]  (fused gate + up)
+                #   moe.down_proj:    [E, H, I]
+                #
+                # FusedMoE expects per-expert:
+                #   w1 (gate): [I, H]   — first half of gate_up
+                #   w3 (up):   [I, H]   — second half of gate_up
+                #   w2 (down): [H, I]   — as-is from checkpoint
+                #
+                # No transpose needed: checkpoint orientation already
+                # matches FusedMoE's expected layout.
+                if "moe.gate_up_proj" in name and weight.dim() == 3:
+                    num_experts = weight.size(0)
+                    intermediate_size = weight.size(1) // 2
+                    for expert_id in range(num_experts):
+                        gate_weight = weight[expert_id, :intermediate_size, :]
+                        up_weight = weight[expert_id, intermediate_size:, :]
+                        base = name.replace("moe.", f"moe.experts.{expert_id}.")
+                        yield base.replace("gate_up_proj", "gate_proj"), gate_weight
+                        yield base.replace("gate_up_proj", "up_proj"), up_weight
+                    continue
+
+                if "moe.down_proj" in name and weight.dim() == 3:
+                    num_experts = weight.size(0)
+                    for expert_id in range(num_experts):
+                        expert_name = name.replace("moe.", f"moe.experts.{expert_id}.")
+                        yield expert_name, weight[expert_id]
+                    continue
+
+                # k_eq_v layers: checkpoint has k_proj but no v_proj.
+                # QKVParallelLinear expects both, so duplicate k_proj
+                # as v_proj so V gets identical weights to K.
+                # ONLY for full_attention layers — sliding layers have
+                # their own real v_proj weights.
+                if "self_attn.k_proj" in name and k_eq_v_layer_indices:
+                    m = re.search(r"layers\.(\d+)\.", name)
+                    if m and int(m.group(1)) in k_eq_v_layer_indices:
+                        yield name, weight
+                        yield name.replace("k_proj", "v_proj"), weight.clone()
+                        continue
+
+                yield name, weight
+
+        # Skip multimodal weights — handled by the multimodal wrapper.
+        # Also skip lm_head when weights are tied.
+        skip = [
+            "audio_tower.",
+            "vision_tower.",
+            "embed_audio.",
+            "embed_vision.",
+        ]
+        if self.config.tie_word_embeddings:
+            skip.append("lm_head.")
+
+        loader = AutoWeightsLoader(self, skip_substrs=skip)
+        return loader.load_weights(_weight_iterator())
diff --git a/vllm/model_executor/models/gemma4_mm.py b/vllm/model_executor/models/gemma4_mm.py
new file mode 100644
index 000000000000..b546040b7414
--- /dev/null
+++ b/vllm/model_executor/models/gemma4_mm.py
@@ -0,0 +1,1601 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Gemma 4 multimodal model (image + audio + video support).
+
+Adds vision tower, audio tower, and multimodal embedders on top of the
+text-only Gemma4ForCausalLM.  The vision/audio encoders are loaded via
+AutoModel.from_config and run in eager mode while the language model uses
+the vLLM-optimized path.
+
+Video support:  Gemma4 does **not** have a native video tower.  Videos are
+decomposed into timestamped image frames (up to 32 frames at 70 soft tokens
+each) and fed through the same vision tower as regular images.  The
+processor inserts ``mm:ss`` timestamps between frames so the model can
+reason about temporal order.
+"""
+
+import math
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Annotated, Any, Literal
+
+import numpy as np
+import torch
+from PIL import Image as PILImage
+from torch import nn
+from transformers import AutoModel, BatchFeature
+from transformers.models.gemma4 import (
+    Gemma4Config,
+    Gemma4Processor,
+    Gemma4VisionConfig,
+)
+from transformers.models.gemma4.configuration_gemma4 import (
+    Gemma4AudioConfig,
+    Gemma4TextConfig,
+)
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
+from vllm.inputs import MultiModalDataDict
+from vllm.logger import init_logger
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import ReplicatedLinear
+from vllm.model_executor.models.gemma4 import Gemma4ForCausalLM
+from vllm.model_executor.models.module_mapping import MultiModelKeys
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+    VideoItem,
+)
+from vllm.multimodal.parse import (
+    AudioProcessorItems,
+    ImageProcessorItems,
+    MultiModalDataItems,
+    MultiModalDataParser,
+)
+from vllm.multimodal.processing import BaseDummyInputsBuilder
+from vllm.multimodal.processing.processor import (
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    PromptReplacement,
+    PromptUpdate,
+    PromptUpdateDetails,
+)
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsEagle3,
+    SupportsLoRA,
+    SupportsMultiModal,
+    SupportsPP,
+)
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    init_vllm_registered_model,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+# Video constants — match transformers Gemma4VideoProcessor defaults.
+_SUPPORTED_SOFT_TOKENS = (70, 140, 280, 560, 1120)
+_VIDEO_MAX_SOFT_TOKENS = 70  # soft tokens per video frame (vs 280 for images)
+_VIDEO_MAX_FRAMES = 32  # max sampled frames per video
+
+
+def _get_max_soft_tokens(
+    merged_kwargs: Mapping[str, object],
+) -> tuple[object | None, bool]:
+    """Return configured image max_soft_tokens and whether it is top-level."""
+    val = merged_kwargs.get("max_soft_tokens")
+    if val is not None:
+        return val, True
+
+    images_kwargs = merged_kwargs.get("images_kwargs")
+    if isinstance(images_kwargs, Mapping):
+        return images_kwargs.get("max_soft_tokens"), False
+
+    return None, False
+
+
+# ---------------------------------------------------------------------------
+# Input schema
+# ---------------------------------------------------------------------------
+
+
+class Gemma4ImagePixelInputs(TensorSchema):
+    """
+    Pre-patchified image inputs from the Gemma4 image processor.
+
+    Dimensions:
+        - bn: Batch size * number of images
+        - np: Number of patches (max_patches = max_soft_tokens * pooling_kernel_size²)
+        - pp: Patch pixels (patch_size² * 3)
+
+    The HF Gemma4ImageProcessor outputs pixel_values as
+    (batch, max_patches, patch_pixels) — already patchified with
+    zero-padding for patches beyond the real image content.
+    pixel_position_ids provides (x, y) coordinates per patch,
+    with (-1, -1) for padding patches.
+    """
+
+    type: Literal["pixel_values"] = "pixel_values"
+    pixel_values: Annotated[
+        torch.Tensor | list[torch.Tensor],
+        TensorShape("bn", "np", "pp", dynamic_dims={"np"}),
+    ]
+    pixel_position_ids: Annotated[
+        torch.Tensor | list[torch.Tensor],
+        TensorShape("bn", "np", 2, dynamic_dims={"np"}),
+    ]
+
+
+class Gemma4AudioInputs(TensorSchema):
+    """
+    Dimensions:
+        - bn: Batch size * number of audios
+        - s: Sequence length (MEL spectrogram frames)
+        - f: Number of features (MEL bins)
+    """
+
+    type: Literal["audio"] = "audio"
+    input_features_padded: Annotated[
+        torch.Tensor, TensorShape("bn", "s", "f", dynamic_dims={"s"})
+    ]
+    input_features_mask: Annotated[
+        torch.Tensor, TensorShape("bn", "s", dynamic_dims={"s"})
+    ]
+
+
+Gemma4ImageInputs = Gemma4ImagePixelInputs
+
+
+class Gemma4VideoInputs(TensorSchema):
+    """Video frame inputs — same tensor format as image inputs.
+
+    Gemma4 has no separate video tower; video frames are processed
+    through the vision tower at lower resolution (max_soft_tokens=70).
+    """
+
+    type: Literal["pixel_values_videos"] = "pixel_values_videos"
+    pixel_values_videos: Annotated[
+        torch.Tensor,
+        TensorShape("bn", "np", "pp"),
+    ]
+    pixel_position_ids_videos: Annotated[
+        torch.Tensor,
+        TensorShape("bn", "np", 2),
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Processing info
+# ---------------------------------------------------------------------------
+
+
+class Gemma4ProcessingInfo(BaseProcessingInfo):
+    def get_hf_config(self):
+        return self.ctx.get_hf_config(Gemma4Config)
+
+    def get_default_tok_params(self):
+        """Gemma4's chat template already embeds a literal ``<bos>`` token in
+        the rendered text.  If ``add_special_tokens=True`` (the base-class
+        default), the tokenizer prepends *another* BOS, producing a
+        ``[2, 2, ...]`` double-BOS sequence that the model was not trained on.
+
+        Setting ``add_special_tokens=False`` here prevents the duplicate and
+        ensures both ``llm.generate()`` and the chat/completions API behave
+        correctly for IT models. For PT models (without chat template), we
+        keep the default (True) to ensure BOS is added for raw prompts.
+        """
+        tokenizer = self.ctx.get_tokenizer()
+        has_chat_template = getattr(tokenizer, "chat_template", None) is not None
+
+        params = super().get_default_tok_params()
+        if has_chat_template:
+            params = params.with_kwargs(add_special_tokens=False)
+        return params
+
+    def get_hf_processor(self, **kwargs: object) -> Gemma4Processor:
+        return self.ctx.get_hf_processor(
+            Gemma4Processor,
+            **kwargs,
+        )
+
+    def validate_num_items(self, modality: str, num_items: int) -> None:
+        if (
+            modality == "audio"
+            and num_items > 0
+            and self.get_hf_config().audio_config is None
+        ):
+            model = self.ctx.model_config.model
+            raise ValueError(
+                f"Audio input was provided but the model "
+                f"'{model}' does not have an audio tower. "
+                f"Audio inference is only supported for Gemma4 "
+                f"models that include an audio_config "
+                f"(i.e., models that include an audio_config)."
+            )
+        super().validate_num_items(modality, num_items)
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        limits: dict[str, int | None] = {"image": None}
+        if self.get_hf_config().audio_config is not None:
+            limits["audio"] = None
+        limits["video"] = None
+        return limits
+
+    def get_mm_max_tokens_per_item(
+        self, seq_len: int, mm_counts: Mapping[str, int]
+    ) -> Mapping[str, int] | None:
+        config = self.get_hf_config()
+        # Upper bound: the pooler outputs max_soft_tokens slots per image.
+        # After padding is stripped the actual count is ≤ this value, but
+        # vLLM needs the max for memory planning.
+        tokens_per_image = config.vision_config.default_output_length
+        merged_kwargs = self.ctx.get_merged_mm_kwargs({})
+        val, _ = _get_max_soft_tokens(merged_kwargs)
+        if isinstance(val, int) and val in _SUPPORTED_SOFT_TOKENS:
+            tokens_per_image = val
+        tokens: dict[str, int] = {"image": tokens_per_image}
+        if config.audio_config is not None:
+            # Audio max tokens from the processor's audio_seq_length.
+            processor = self.get_hf_processor()
+            tokens["audio"] = processor.audio_seq_length
+        # Video: each frame ≤ 70 soft tokens + boi + eoi + ~6 ts tokens.
+        num_frames = _VIDEO_MAX_FRAMES
+        mm_config = self.ctx.model_config.get_multimodal_config()
+        video_opts = mm_config.limit_per_prompt.get("video")
+        if (
+            isinstance(video_opts, VideoDummyOptions)
+            and video_opts.num_frames is not None
+        ):
+            num_frames = min(num_frames, video_opts.num_frames)
+        tokens["video"] = num_frames * (_VIDEO_MAX_SOFT_TOKENS + 2 + 6)
+        return tokens
+
+    def get_data_parser(self) -> MultiModalDataParser:
+        config = self.get_hf_config()
+        kwargs: dict[str, Any] = {"video_needs_metadata": True}
+        if getattr(config, "audio_config", None) is not None:
+            processor = self.get_hf_processor()
+            kwargs["target_sr"] = processor.feature_extractor.sampling_rate
+        return MultiModalDataParser(**kwargs)
+
+    def _compute_num_soft_tokens(
+        self,
+        image_width: int,
+        image_height: int,
+        max_soft_tokens: int | None = None,
+    ) -> int:
+        """Compute the number of soft tokens the vision tower produces
+        for an image of the given dimensions, after padding is stripped.
+
+        Args:
+            max_soft_tokens: Override for the vision config's
+                ``default_output_length``.  When *None*, the value from
+                the model config is used.
+        """
+        vision_cfg = self.get_hf_config().vision_config
+        patch_size = vision_cfg.patch_size
+        pooling_kernel_size = vision_cfg.pooling_kernel_size
+
+        if max_soft_tokens is None:
+            max_soft_tokens = vision_cfg.default_output_length
+
+        unit = patch_size * pooling_kernel_size
+        max_patches = max_soft_tokens * pooling_kernel_size**2
+        num_patches_orig = (image_height / patch_size) * (image_width / patch_size)
+        scale = math.sqrt(max_patches / num_patches_orig)
+        target_h = max(unit, int(math.floor(image_height * scale / unit)) * unit)
+        target_w = max(unit, int(math.floor(image_width * scale / unit)) * unit)
+        num_patches = (target_h // patch_size) * (target_w // patch_size)
+        # Clamp to ``max_soft_tokens``: extreme aspect ratios (e.g. 3x900)
+        # cause the floor() above to round one dim up to ``unit`` while the
+        # other scales freely, which over-shoots ``max_patches``. The HF
+        # Gemma 4 image processor caps its vision-tower output at
+        # ``max_soft_tokens``, so without this clamp the prompt-side
+        # placeholder count exceeds the encoder output and
+        # ``_merge_multimodal_embeddings`` crashes.
+        return min(num_patches // (pooling_kernel_size**2), max_soft_tokens)
+
+    def get_image_repl(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+        processor: Gemma4Processor | None,
+        max_soft_tokens: int | None = None,
+    ) -> PromptUpdateDetails[list[int]]:
+        """Return the dynamic image token sequence for this image.
+
+        Computes the exact number of soft tokens the vision tower will
+        produce after stripping padding.
+
+        Args:
+            max_soft_tokens: Override for the default token budget.
+                When *None*, falls back to the model config value.
+        """
+        if processor is None:
+            processor = self.get_hf_processor()
+
+        num_soft = self._compute_num_soft_tokens(
+            image_width,
+            image_height,
+            max_soft_tokens=max_soft_tokens,
+        )
+        config = self.get_hf_config()
+        token_ids = (
+            [config.boi_token_id]
+            + [processor.image_token_id] * num_soft
+            + [config.eoi_token_id]
+        )
+        return PromptUpdateDetails.select_token_id(token_ids, processor.image_token_id)
+
+    def get_audio_repl(
+        self,
+        *,
+        audio_len: int,
+        processor: Gemma4Processor | None,
+    ) -> PromptUpdateDetails[list[int]]:
+        """Return the dynamic audio token sequence for this audio.
+
+        Computes the number of soft tokens from the audio waveform
+        length using ``ceil(duration_ms / audio_ms_per_token)``.
+        """
+        if processor is None:
+            processor = self.get_hf_processor()
+
+        sampling_rate = processor.feature_extractor.sampling_rate
+        num_tokens = processor._compute_audio_num_tokens(
+            torch.zeros(audio_len), sampling_rate
+        )
+        config = self.get_hf_config()
+        token_ids = (
+            [config.boa_token_id]
+            + [processor.audio_token_id] * num_tokens
+            + [config.eoa_token_id]
+        )
+        return PromptUpdateDetails.select_token_id(token_ids, processor.audio_token_id)
+
+    def get_video_repl(
+        self,
+        *,
+        timestamps: list[float],
+        num_soft_tokens_per_frame: list[int],
+        processor: Gemma4Processor,
+    ) -> PromptUpdateDetails[list[int]]:
+        """Build the full token replacement for one video.
+
+        Produces the same interleaved sequence as the HF Gemma4Processor:
+            mm:ss <boi><|video|>*N<eoi> mm:ss <boi><|video|>*N<eoi> ...
+        """
+        tokenizer = self.ctx.get_tokenizer()
+        config = self.get_hf_config()
+
+        boi_token_id = config.boi_token_id
+        eoi_token_id = config.eoi_token_id
+        video_token_id = processor.video_token_id
+
+        all_token_ids: list[int] = []
+        for i, (ts, n_tokens) in enumerate(zip(timestamps, num_soft_tokens_per_frame)):
+            # mm:ss timestamp — matches transformers: int-truncated,
+            # zero-padded.
+            minutes = int(ts // 60)
+            seconds = int(ts % 60)
+            ts_str = f"{minutes:02d}:{seconds:02d}"
+
+            prefix = f" {ts_str} " if i > 0 else f"{ts_str} "
+            ts_token_ids = tokenizer.encode(prefix, add_special_tokens=False)
+            all_token_ids.extend(ts_token_ids)
+
+            all_token_ids.append(boi_token_id)
+            all_token_ids.extend([video_token_id] * n_tokens)
+            all_token_ids.append(eoi_token_id)
+
+        return PromptUpdateDetails.select_token_id(all_token_ids, video_token_id)
+
+
+# ---------------------------------------------------------------------------
+# Dummy inputs builder
+# ---------------------------------------------------------------------------
+
+
+class Gemma4DummyInputsBuilder(BaseDummyInputsBuilder[Gemma4ProcessingInfo]):
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+        num_audios = mm_counts.get("audio", 0)
+        num_videos = mm_counts.get("video", 0)
+        processor = self.info.get_hf_processor()
+        # Use image_token (<|image|>) with tab prefix — this is what the
+        # Gemma4 chat template inserts per image (\t<|image|>).
+        # _get_prompt_updates targets image_token and expands it to the
+        # full_image_sequence.
+        text = ("\t" + processor.image_token) * num_images
+        if num_audios > 0 and processor.audio_token:
+            text += processor.audio_token * num_audios
+        if num_videos > 0:
+            text += processor.video_token * num_videos
+        return text
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions] | None = None,
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        num_audios = mm_counts.get("audio", 0)
+        num_videos = mm_counts.get("video", 0)
+        processor = self.info.get_hf_processor()
+        image_processor = processor.image_processor
+        # Use processor's configured image size for dummies.
+        # Gemma4ImageProcessor sets size=None (it uses patch_size /
+        # max_soft_tokens instead of the standard size dict), so we
+        # guard against None with `or {}`.
+        size = getattr(image_processor, "size", None) or {}
+        img_width = size.get("width", 224)
+        img_height = size.get("height", 224)
+
+        image_overrides = mm_options.get("image") if mm_options else None
+        audio_overrides = mm_options.get("audio") if mm_options else None
+        video_overrides = mm_options.get("video") if mm_options else None
+
+        data: MultiModalDataDict = {
+            "image": self._get_dummy_images(
+                width=img_width,
+                height=img_height,
+                num_images=num_images,
+                overrides=image_overrides,
+            ),
+        }
+
+        if num_audios > 0:
+            audio_len = processor.feature_extractor.fft_length
+            data["audio"] = self._get_dummy_audios(
+                length=audio_len,
+                num_audios=num_audios,
+                overrides=audio_overrides,
+            )
+
+        if num_videos > 0:
+            data["video"] = self._get_dummy_videos(
+                width=img_width,
+                height=img_height,
+                num_frames=_VIDEO_MAX_FRAMES,
+                num_videos=num_videos,
+                overrides=video_overrides,
+            )
+
+        return data
+
+    def _get_dummy_videos(
+        self,
+        *,
+        width: int,
+        height: int,
+        num_frames: int,
+        num_videos: int,
+        overrides: VideoDummyOptions | None = None,
+    ) -> list[VideoItem]:
+        num_frames = max(num_frames, 2)
+        videos = super()._get_dummy_videos(
+            width=width,
+            height=height,
+            num_frames=num_frames,
+            num_videos=num_videos,
+            overrides=overrides,
+        )
+        videos = [v.copy() for v in videos]
+
+        video_items: list[VideoItem] = []
+        for video in videos:
+            video_num_frames = video.shape[0]
+            video_metadata = {
+                "fps": 2.0,
+                "duration": video_num_frames / 2.0,
+                "total_num_frames": video_num_frames,
+                "frames_indices": list(range(video_num_frames)),
+                "video_backend": "opencv",
+                "do_sample_frames": False,
+            }
+            video_items.append((video, video_metadata))
+
+        return video_items
+
+
+# ---------------------------------------------------------------------------
+# Multimodal processor
+# ---------------------------------------------------------------------------
+
+
+class Gemma4MultiModalProcessor(BaseMultiModalProcessor[Gemma4ProcessingInfo]):
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        merged_kwargs = self.info.ctx.get_merged_mm_kwargs(mm_kwargs)
+        val, is_top_level_max_soft_tokens = _get_max_soft_tokens(merged_kwargs)
+
+        if val is not None and val not in _SUPPORTED_SOFT_TOKENS:
+            raise ValueError(
+                f"Unsupported max_soft_tokens value: {val}. "
+                f"Valid values are {_SUPPORTED_SOFT_TOKENS}."
+            )
+
+        mm_data = dict(mm_data)
+
+        # ---- VIDEO HANDLING ----
+        # Gemma4 decomposes video into timestamped image frames.
+        # Each frame is processed with max_soft_tokens=70 through the
+        # same vision tower, matching transformers processing_gemma4.py.
+        video_outputs: dict[str, Any] = {}
+        if videos := mm_data.pop("videos", []):
+            processor = self.info.get_hf_processor()
+
+            all_video_pixel_values: list[torch.Tensor] = []
+            all_video_position_ids: list[torch.Tensor] = []
+            video_num_soft_tokens_per_video: list[list[int]] = []
+            video_timestamps_per_video: list[list[float]] = []
+            video_frame_counts: list[int] = []
+
+            video_replacements: list[str] = []
+
+            for item in videos:
+                video_array, metadata = item
+
+                # Convert frames to PIL images
+                if isinstance(video_array, np.ndarray):
+                    frames = [
+                        PILImage.fromarray(video_array[i])
+                        for i in range(video_array.shape[0])
+                    ]
+                else:
+                    frames = list(video_array)
+
+                # Compute timestamps from metadata (same as transformers)
+                fps = metadata.get("fps") or 24
+                frame_indices = metadata.get("frames_indices", list(range(len(frames))))
+                timestamps = [idx / fps for idx in frame_indices]
+
+                # Process frames as images with max_soft_tokens=70
+                video_mm_kwargs = dict(mm_kwargs)
+                video_mm_kwargs["max_soft_tokens"] = _VIDEO_MAX_SOFT_TOKENS
+
+                dummy_prompt = ("\t" + processor.image_token) * len(frames)
+
+                frame_outputs = super()._call_hf_processor(
+                    prompt=dummy_prompt,
+                    mm_data={"images": frames},
+                    mm_kwargs=video_mm_kwargs,
+                    tok_kwargs=tok_kwargs,
+                )
+
+                # Remap HF key name
+                if "image_position_ids" in frame_outputs:
+                    frame_outputs["pixel_position_ids"] = frame_outputs.pop(
+                        "image_position_ids"
+                    )
+
+                all_video_pixel_values.append(frame_outputs["pixel_values"])
+                all_video_position_ids.append(frame_outputs["pixel_position_ids"])
+
+                # Compute soft tokens per frame
+                num_soft_per_frame = []
+                for img in frames:
+                    w, h = img.size
+                    n = self.info._compute_num_soft_tokens(
+                        w, h, max_soft_tokens=_VIDEO_MAX_SOFT_TOKENS
+                    )
+                    num_soft_per_frame.append(n)
+
+                video_num_soft_tokens_per_video.append(num_soft_per_frame)
+                video_timestamps_per_video.append(timestamps)
+                video_frame_counts.append(len(frames))
+
+                # Build expanded replacement text for this video.
+                ts_strs = [f"{int(s // 60):02d}:{int(s % 60):02d}" for s in timestamps]
+                replacement = " ".join(
+                    f"{t} {processor.boi_token}"
+                    f"{processor.video_token * n}"
+                    f"{processor.eoi_token}"
+                    for t, n in zip(ts_strs, num_soft_per_frame)
+                )
+                video_replacements.append(replacement)
+
+            # Replace all <|video|> placeholders at once. We split on
+            # video_token to get N+1 parts, then interleave with the
+            # N replacement strings. This avoids the iterative
+            # split-replace bug where replacement text (which itself
+            # contains <|video|> tokens) collides with later splits.
+            vt = processor.video_token
+            parts = prompt.split(vt, len(video_replacements))
+
+            # NOTE: len(parts) <= len(video_replacements) + 1
+            parts_with_repl: list[str] = []
+            for part, repl in zip(parts, video_replacements):
+                parts_with_repl.extend([part, repl])
+            parts_with_repl.extend(parts[len(video_replacements) :])
+
+            prompt = "".join(parts_with_repl)
+
+            video_outputs = {
+                "pixel_values_videos": torch.cat(all_video_pixel_values, dim=0),
+                "pixel_position_ids_videos": torch.cat(all_video_position_ids, dim=0),
+                "video_frame_counts": torch.tensor(video_frame_counts),
+                "video_num_soft_tokens": video_num_soft_tokens_per_video,
+                "video_timestamps": video_timestamps_per_video,
+            }
+
+        # The processor accepts 'audio' not 'audios'.
+        if "audios" in mm_data:
+            mm_data["audio"] = mm_data.pop("audios")
+
+        # Warn if any audio waveform exceeds the model's max duration.
+        if "audio" in mm_data:
+            processor = self.info.get_hf_processor()
+            sr = processor.feature_extractor.sampling_rate
+            max_tokens = processor.audio_seq_length
+            ms_per_tok = processor.audio_ms_per_token
+            max_duration_s = max_tokens * ms_per_tok / 1000.0
+            audios = mm_data["audio"]
+            if not isinstance(audios, (list, tuple)):
+                audios = [audios]
+            for i, waveform in enumerate(audios):
+                duration_s = len(waveform) / sr
+                if duration_s > max_duration_s:
+                    logger.warning(
+                        "Audio duration exceeds max: %f > %f seconds",
+                        duration_s,
+                        max_duration_s,
+                    )
+        # vLLM's call_hf_processor (context.py) re-merges
+        # mm_processor_kwargs from the model config on every call via:
+        #   config_kwargs | incoming_kwargs  (right side wins)
+        #
+        # If we strip max_soft_tokens from incoming, the re-merge puts
+        # back the config's global default (e.g. 280), ignoring any
+        # per-prompt override.  Instead, we keep it in the kwargs with
+        # the validated per-prompt value so it wins during the merge.
+        #
+        # NOTE: This requires a corresponding type annotation on the
+        # HF side (Gemma4ProcessorKwargs.images_kwargs) so that
+        # _merge_kwargs routes max_soft_tokens into images_kwargs.
+        patched_mm_kwargs = dict(mm_kwargs)
+        if val is not None and is_top_level_max_soft_tokens:
+            patched_mm_kwargs["max_soft_tokens"] = val
+
+        processed_outputs = super()._call_hf_processor(
+            prompt,
+            mm_data,
+            patched_mm_kwargs,
+            tok_kwargs,
+        )
+
+        # HF uses 'image_position_ids'; vLLM uses 'pixel_position_ids'.
+        # Remap here to keep a single translation point.
+        if "image_position_ids" in processed_outputs:
+            processed_outputs["pixel_position_ids"] = processed_outputs.pop(
+                "image_position_ids"
+            )
+
+        if "input_features" in processed_outputs:
+            # Unpad per-item so each item's cache entry is
+            # self-contained. The batched() field config in
+            # _get_mm_fields_config will re-pad all fields to the
+            # batch's max length at batch time, ensuring consistent
+            # padding regardless of cache history.
+            masks = processed_outputs["input_features_mask"]
+            unpadded_features = [
+                f[mask]
+                for f, mask in zip(
+                    processed_outputs["input_features"],
+                    masks,
+                )
+            ]
+            unpadded_masks = [mask[mask] for mask in masks]
+            processed_outputs["input_features"] = unpadded_features
+            processed_outputs["input_features_padded"] = unpadded_features
+            processed_outputs["input_features_mask"] = unpadded_masks
+
+        # Merge video outputs into the final result
+        combined_outputs = dict(processed_outputs, **video_outputs)
+        return BatchFeature(combined_outputs)
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        fields = dict(
+            pixel_values=MultiModalFieldConfig.batched("image"),
+            pixel_position_ids=MultiModalFieldConfig.batched("image"),
+            input_features_padded=MultiModalFieldConfig.batched("audio"),
+            input_features_mask=MultiModalFieldConfig.batched("audio"),
+        )
+
+        # Video fields: frames stored flat, split per video by
+        # video_frame_counts.
+        video_frame_counts = hf_inputs.get("video_frame_counts")
+        if video_frame_counts is not None:
+            vfc = video_frame_counts
+            if not isinstance(vfc, torch.Tensor):
+                vfc = torch.tensor(vfc)
+            fields.update(
+                pixel_values_videos=(
+                    MultiModalFieldConfig.flat_from_sizes("video", vfc)
+                ),
+                pixel_position_ids_videos=(
+                    MultiModalFieldConfig.flat_from_sizes("video", vfc)
+                ),
+                video_frame_counts=MultiModalFieldConfig.batched(
+                    "video",
+                ),
+                video_num_soft_tokens=MultiModalFieldConfig.batched(
+                    "video", keep_on_cpu=True
+                ),
+                video_timestamps=MultiModalFieldConfig.batched(
+                    "video", keep_on_cpu=True
+                ),
+            )
+
+        return fields
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, Any],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptUpdate]:
+        hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
+
+        prompt_updates = []
+
+        if "image" in mm_items:
+            # Target image_token (<|image|>) — the single placeholder the
+            # Gemma4 chat template inserts once per image in the prompt.
+            # vLLM tokenizes the prompt without token expansion, so only
+            # one image_token exists per image in the token stream.
+            # The replacement expands it to the full image sequence
+            # (boi + N×image_token + eoi, where N = max_soft_tokens).
+            image_token = hf_processor.image_token
+
+            def get_replacement_image(item_idx: int):
+                images = mm_items.get_items("image", ImageProcessorItems)
+                image_size = images.get_image_size(item_idx)
+                # Resolve the effective max_soft_tokens by merging
+                # per-prompt kwargs with the config-level defaults,
+                # consistent with how _call_hf_processor resolves it.
+                # Without this merge, a missing per-prompt override
+                # would fall back to vision_cfg.default_output_length
+                # instead of the config's mm_processor_kwargs default.
+                merged_kwargs = self.info.ctx.get_merged_mm_kwargs(
+                    hf_processor_mm_kwargs,
+                )
+                val, _ = _get_max_soft_tokens(merged_kwargs)
+                max_soft_tokens = (
+                    val
+                    if isinstance(val, int) and val in _SUPPORTED_SOFT_TOKENS
+                    else None
+                )
+                return self.info.get_image_repl(
+                    image_width=image_size.width,
+                    image_height=image_size.height,
+                    processor=hf_processor,
+                    max_soft_tokens=max_soft_tokens,
+                )
+
+            prompt_updates.append(
+                PromptReplacement(
+                    modality="image",
+                    target=image_token,
+                    replacement=get_replacement_image,
+                )
+            )
+
+        if "video" in mm_items:
+            video_token = hf_processor.video_token
+
+            def get_replacement_video(item_idx: int):
+                out_item = out_mm_kwargs["video"][item_idx]
+                timestamps = out_item["video_timestamps"].data
+                num_soft = out_item["video_num_soft_tokens"].data
+                return self.info.get_video_repl(
+                    timestamps=timestamps,
+                    num_soft_tokens_per_frame=num_soft,
+                    processor=hf_processor,
+                )
+
+            prompt_updates.append(
+                PromptReplacement(
+                    modality="video",
+                    target=video_token,
+                    replacement=get_replacement_video,
+                )
+            )
+
+        if "audio" in mm_items:
+            audio_token = hf_processor.audio_token
+
+            def get_replacement_audio(item_idx: int):
+                audios = mm_items.get_items("audio", AudioProcessorItems)
+                audio_len = audios.get_audio_length(item_idx)
+                return self.info.get_audio_repl(
+                    audio_len=audio_len,
+                    processor=hf_processor,
+                )
+
+            prompt_updates.append(
+                PromptReplacement(
+                    modality="audio",
+                    target=audio_token,
+                    replacement=get_replacement_audio,
+                )
+            )
+
+        return prompt_updates
+
+    # NOTE: Gemma3/Gemma3n override _apply_token_matches and
+    # _find_mm_placeholders to merge adjacent newline tokens that arise
+    # when full_image_sequence contains "\n\n" wrappers.  Gemma4's
+    # full_image_sequence has NO newlines (just BOI + 280×image_token +
+    # EOI), so the base class implementations work correctly as-is.
+
+
+# ---------------------------------------------------------------------------
+# Multimodal embedder
+# ---------------------------------------------------------------------------
+
+
+class Gemma4MultimodalEmbedder(nn.Module):
+    """Projects vision/audio soft tokens into LM embedding space.
+
+    Architecture:
+        inputs_embeds → embedding_projection → embedding_post_projection_norm
+
+    Unlike Gemma3n which has separate hard/soft embedding paths with
+    per-path normalization and a learned embedding table, Gemma4 uses a
+    simplified 2-layer design: a linear projection followed by RMSNorm
+    (without learnable scale).  The checkpoint confirms this — only
+    ``embedding_projection.weight`` exists; there is no embedding table
+    or pre-projection norm weights.
+    """
+
+    def __init__(
+        self,
+        multimodal_config: Gemma4VisionConfig | Gemma4AudioConfig,
+        text_config: Gemma4TextConfig,
+    ):
+        super().__init__()
+
+        self.eps = multimodal_config.rms_norm_eps
+        self.text_hidden_size = text_config.hidden_size
+
+        # Audio tower uses output_proj_dims (1536) rather than hidden_size
+        # (1024); vision uses hidden_size (768) directly.
+        embedding_dim = (
+            getattr(multimodal_config, "output_proj_dims", None)
+            or multimodal_config.hidden_size
+        )
+
+        self.embedding_pre_projection_norm = RMSNorm(
+            embedding_dim,
+            eps=self.eps,
+            has_weight=False,
+        )
+
+        self.embedding_projection = ReplicatedLinear(
+            embedding_dim,
+            self.text_hidden_size,
+            bias=False,
+        )
+
+    def forward(self, inputs_embeds: torch.Tensor) -> torch.Tensor:
+        """Project soft tokens from a multimodal tower into LM space."""
+        embs_normed = self.embedding_pre_projection_norm(inputs_embeds)
+        embs_proj, _ = self.embedding_projection(embs_normed)
+        return embs_proj
+
+
+# ---------------------------------------------------------------------------
+# Main model
+# ---------------------------------------------------------------------------
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Gemma4MultiModalProcessor,
+    info=Gemma4ProcessingInfo,
+    dummy_inputs=Gemma4DummyInputsBuilder,
+)
+class Gemma4ForConditionalGeneration(
+    nn.Module,
+    SupportsMultiModal,
+    SupportsPP,
+    SupportsLoRA,
+    SupportsEagle3,
+):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    # Maps checkpoint prefixes to vLLM module paths.
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "model.embed_audio.": "embed_audio.",
+            "model.embed_vision.": "embed_vision.",
+            "model.language_model.": "language_model.model.",
+            "model.vision_tower.": "vision_tower.",
+            "model.audio_tower.": "audio_tower.",
+            "lm_head.": "language_model.lm_head.",
+            "model": "language_model.model",
+        }
+    )
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        multimodal_config = vllm_config.model_config.multimodal_config
+        self.config = config
+        self.quant_config = quant_config
+        self.multimodal_config = multimodal_config
+
+        # ---- Vision tower (shared by image and video) ----
+        with self._mark_tower_model(vllm_config, {"image", "video"}):
+            self.vision_tower = AutoModel.from_config(config=config.vision_config)
+            self.embed_vision = Gemma4MultimodalEmbedder(
+                config.vision_config, config.text_config
+            )
+
+        # ---- Audio tower (variants with audio_config) ----
+        if config.audio_config is not None:
+            with self._mark_tower_model(vllm_config, "audio"):
+                self.audio_tower = AutoModel.from_config(config=config.audio_config)
+                # AutoModel.from_config does NOT call post_init(),
+                # which is needed to initialize buffers that are absent
+                # from the checkpoint (e.g. inv_timescales for relative
+                # position embeddings, softcap, gradient_clipping).
+                self.audio_tower.post_init()
+                self.embed_audio = Gemma4MultimodalEmbedder(
+                    config.audio_config, config.text_config
+                )
+        else:
+            self.audio_tower = None
+            self.embed_audio = None
+
+        # ---- Language model (vLLM optimised) ----
+        with self._mark_language_model(vllm_config):
+            self.language_model: Gemma4ForCausalLM = init_vllm_registered_model(
+                vllm_config=vllm_config,
+                hf_config=config.text_config,
+                prefix=maybe_prefix(prefix, "language_model"),
+                architectures=["Gemma4ForCausalLM"],
+            )
+
+            # Pre-allocate PLE buffer for CUDA graph compatibility.
+            # Some variants have hidden_size_per_layer_input=None (no PLE).
+            ple_dim = config.text_config.hidden_size_per_layer_input
+            if ple_dim is not None:
+                self.per_layer_embeddings = torch.zeros(
+                    vllm_config.scheduler_config.max_num_batched_tokens,
+                    config.text_config.num_hidden_layers,
+                    ple_dim,
+                    device=(self.language_model.model.embed_tokens.weight.device),
+                    dtype=(self.language_model.model.embed_tokens.weight.dtype),
+                )
+            else:
+                self.per_layer_embeddings = None
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+        # --- Precompute full-attention layer indices for bidi clearing ---
+        self._full_attn_layer_idxs: frozenset[int] = frozenset()
+        text_config = config.text_config
+        if getattr(text_config, "use_bidirectional_attention", None) == "vision":
+            layer_types = getattr(text_config, "layer_types", None)
+            if layer_types:
+                self._full_attn_layer_idxs = frozenset(
+                    i for i, lt in enumerate(layer_types) if lt != "sliding_attention"
+                )
+
+        # --- MixtureOfExperts delegation to language_model ---
+        self.expert_weights = self.language_model.expert_weights
+        self.moe_layers = self.language_model.moe_layers
+        self.num_moe_layers = self.language_model.num_moe_layers
+        self.num_logical_experts = self.language_model.num_logical_experts
+        self.num_physical_experts = self.language_model.num_physical_experts
+        self.num_local_physical_experts = self.language_model.num_local_physical_experts
+        self.num_routed_experts = self.language_model.num_routed_experts
+        self.num_expert_groups = self.language_model.num_expert_groups
+        self.num_shared_experts = self.language_model.num_shared_experts
+        self.num_redundant_experts = self.language_model.num_redundant_experts
+
+    # ------------------------------------------------------------------ #
+    # Input parsing
+    # ------------------------------------------------------------------ #
+
+    def _parse_and_validate_image_input(
+        self, **kwargs: object
+    ) -> Gemma4ImageInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        pixel_position_ids = kwargs.pop("pixel_position_ids", None)
+        image_embeds = kwargs.pop("image_embeds", None)
+        assert image_embeds is None, "Gemma4 does not support image_embeds."
+        if pixel_values is None:
+            return None
+        return Gemma4ImagePixelInputs(
+            pixel_values=pixel_values,
+            pixel_position_ids=pixel_position_ids,
+        )
+
+    def _parse_and_validate_audio_input(
+        self, **kwargs: object
+    ) -> Gemma4AudioInputs | None:
+        input_features_padded = kwargs.pop("input_features_padded", None)
+        if input_features_padded is None:
+            return None
+        input_features_mask = kwargs.pop("input_features_mask", None)
+        if input_features_mask is None:
+            return None
+        return Gemma4AudioInputs(
+            input_features_padded=input_features_padded,
+            input_features_mask=input_features_mask,
+        )
+
+    def _parse_and_validate_video_input(
+        self, **kwargs: object
+    ) -> dict[str, torch.Tensor] | None:
+        pixel_values_videos = kwargs.pop("pixel_values_videos", None)
+        pixel_position_ids_videos = kwargs.pop("pixel_position_ids_videos", None)
+        video_frame_counts = kwargs.pop("video_frame_counts", None)
+        if pixel_values_videos is None:
+            return None
+        return {
+            "pixel_values_videos": pixel_values_videos,
+            "pixel_position_ids_videos": pixel_position_ids_videos,
+            "video_frame_counts": video_frame_counts,
+        }
+
+    def _parse_and_validate_multimodal_inputs(
+        self, **kwargs: object
+    ) -> dict[str, Gemma4ImageInputs | Gemma4AudioInputs | Gemma4VideoInputs | None]:
+        mm_input_by_modality = {}
+        for input_key in list(kwargs):
+            if (
+                input_key in ("pixel_values", "image_embeds")
+                and "image" not in mm_input_by_modality
+            ):
+                mm_input_by_modality["image"] = self._parse_and_validate_image_input(
+                    **kwargs
+                )
+            if (
+                input_key == "pixel_values_videos"
+                and "video" not in mm_input_by_modality
+            ):
+                mm_input_by_modality["video"] = self._parse_and_validate_video_input(
+                    **kwargs
+                )
+            if (
+                input_key == "input_features_padded"
+                and "audio" not in mm_input_by_modality
+            ):
+                mm_input_by_modality["audio"] = self._parse_and_validate_audio_input(
+                    **kwargs
+                )
+        return mm_input_by_modality
+
+    @staticmethod
+    def _encoder_chunk(
+        patches_per_item: int,
+        free_bytes: int,
+        total_bytes: int,
+        position_embedding_size: int,
+    ) -> int:
+        """Max chunk size whose F.one_hot transient fits in the budget.
+
+        The dominant transient inside HF's ``Gemma4VisionPatchEmbedder.
+        _position_embeddings`` is
+        ``F.one_hot(clamped_positions, num_classes=position_embedding_size)``
+        with shape ``(chunk, patches, 2, position_embedding_size)``,
+        int64, plus its simultaneous cast to the position embedding
+        table dtype. That, not the encoder residual stream, sets peak
+        memory.
+        """
+        if patches_per_item <= 0:
+            return 1
+        # Half of currently-free, capped at 10% of total so we leave room
+        # for the rest of profile_run / the subsequent encoder + pooler.
+        budget = min(free_bytes // 2, total_bytes // 10)
+        if budget <= 0:
+            return 1
+        # F.one_hot allocates (chunk, patches, 2, pos_emb_size) int64
+        # (the inner 2 is the (x, y) coordinate axis, 8 is sizeof(int64)).
+        # Outer 2x covers the int64 buffer and its concurrent bf16 cast
+        # plus the matmul output that live alongside it at peak.
+        cost = patches_per_item * 4 * position_embedding_size * 8
+        return max(1, budget // cost) if cost > 0 else 1
+
+    # ------------------------------------------------------------------ #
+    # Image processing
+    # ------------------------------------------------------------------ #
+
+    def _process_image_input(
+        self,
+        image_input: Gemma4ImageInputs,
+    ) -> list[torch.Tensor]:
+        """Batch-encode images through the vision tower.
+
+        Groups images by patch count (resolution bucket) so each
+        encoder call processes a uniform-shape batch with no
+        cross-resolution padding.  Pooling and projection are then
+        applied over a single concatenated tensor for all images.
+        """
+        pixel_values = image_input["pixel_values"]
+        pixel_position_ids = image_input["pixel_position_ids"]
+
+        vt = self.vision_tower
+        vision_cfg = self.config.vision_config
+        pooling_k2 = vision_cfg.pooling_kernel_size**2
+
+        # Concurrent requests with different image resolutions may
+        # arrive as a list of per-image tensors, while same-resolution
+        # batches may arrive as a stacked tensor.
+        buckets: dict[int, list[tuple[int, torch.Tensor, torch.Tensor]]] = {}
+        total_images = (
+            len(pixel_values)
+            if isinstance(pixel_values, list)
+            else pixel_values.shape[0]
+        )
+
+        for idx in range(total_images):
+            pv = pixel_values[idx]
+            pp = pixel_position_ids[idx]
+            buckets.setdefault(pv.shape[0], []).append((idx, pv, pp))
+
+        # Encode each resolution bucket in memory-safe chunks. Re-read
+        # free memory per bucket because the previous bucket's encoder
+        # pass has already allocated activations we should account for.
+        last_hidden_states_map: dict[int, torch.Tensor] = {}
+        for patches, items in buckets.items():
+            free, total = current_platform.mem_get_info()
+            max_batch_size = min(
+                len(items),
+                self._encoder_chunk(
+                    patches, free, total, vision_cfg.position_embedding_size
+                ),
+            )
+
+            for chunk_idx in range(0, len(items), max_batch_size):
+                chunk_items = items[chunk_idx : chunk_idx + max_batch_size]
+
+                pv_tensor = torch.cat(
+                    [item[1].unsqueeze(0) for item in chunk_items], dim=0
+                )
+                pp_tensor = torch.cat(
+                    [item[2].unsqueeze(0) for item in chunk_items], dim=0
+                )
+                pad_tensor = (pp_tensor == -1).all(dim=-1)
+
+                inputs_embeds = vt.patch_embedder(pv_tensor, pp_tensor, pad_tensor)
+                encoder_outputs = vt.encoder(
+                    inputs_embeds=inputs_embeds,
+                    attention_mask=~pad_tensor,
+                    pixel_position_ids=pp_tensor,
+                )
+                hidden_states = encoder_outputs.last_hidden_state
+
+                for i, (orig_idx, _, _) in enumerate(chunk_items):
+                    last_hidden_states_map[orig_idx] = hidden_states[i]
+
+        # Pool per image to strip padding and reduce spatial resolution.
+        all_valid_states: list[torch.Tensor] = [None] * total_images  # type: ignore[list-item]
+        valid_lens = [0] * total_images
+
+        for orig_idx in range(total_images):
+            chunk_hidden = last_hidden_states_map[orig_idx]
+            output_length = chunk_hidden.shape[0] // pooling_k2
+
+            single_hidden = chunk_hidden.unsqueeze(0)
+            single_pos_ids = pixel_position_ids[orig_idx].unsqueeze(0)
+            padding_positions = (single_pos_ids == -1).all(dim=-1)
+
+            pooled_states, valid_mask = vt.pooler(
+                hidden_states=single_hidden,
+                pixel_position_ids=single_pos_ids,
+                padding_positions=padding_positions,
+                output_length=output_length,
+            )
+            valid_states = pooled_states[valid_mask]
+
+            if getattr(vt.config, "standardize", False):
+                valid_states = (valid_states - vt.std_bias) * vt.std_scale
+
+            all_valid_states[orig_idx] = valid_states
+            valid_lens[orig_idx] = valid_states.shape[0]
+
+        target_dtype = self.embed_vision.embedding_projection.weight.dtype
+
+        # Project all images in a single batched call.
+        flat_valid_states = torch.cat(all_valid_states, dim=0).to(target_dtype)
+        flat_proj_embs = self.embed_vision(
+            inputs_embeds=flat_valid_states.unsqueeze(0)
+        ).squeeze(0)
+
+        # Split back into per-image tensors (slicing returns views).
+        per_image_embeddings: list[torch.Tensor] = []
+        offset = 0
+        for length in valid_lens:
+            per_image_embeddings.append(flat_proj_embs[offset : offset + length])
+            offset += length
+
+        return per_image_embeddings
+
+    # ------------------------------------------------------------------ #
+    # Video processing (frames through vision tower)
+    # ------------------------------------------------------------------ #
+
+    def _process_video_input(
+        self,
+        video_input: dict[str, torch.Tensor],
+    ) -> list[torch.Tensor]:
+        """Batch-encode video frames through the vision tower.
+
+        Gemma4 has no separate video tower; video frames are images at
+        lower resolution (max_soft_tokens=70).  All frames across all
+        videos in the batch are encoded together in chunks, then pooled
+        and projected in a single batched call.
+
+        Returns one concatenated embedding tensor per video (not per
+        frame), matching the flat_from_sizes grouping that vLLM expects
+        for embed_multimodal.
+        """
+        pixel_values = video_input["pixel_values_videos"]
+        pixel_position_ids = video_input["pixel_position_ids_videos"]
+        frame_counts = video_input["video_frame_counts"]
+
+        vt = self.vision_tower
+        vision_cfg = self.config.vision_config
+        pooling_k2 = vision_cfg.pooling_kernel_size**2
+        target_dtype = self.embed_vision.embedding_projection.weight.dtype
+
+        if isinstance(frame_counts, torch.Tensor):
+            fc_list = frame_counts.tolist()
+        else:
+            fc_list = list(frame_counts)
+
+        total_frames = pixel_values.shape[0]
+        free, total = current_platform.mem_get_info()
+        max_batch_size = min(
+            total_frames,
+            self._encoder_chunk(
+                pixel_values.shape[1],
+                free,
+                total,
+                vision_cfg.position_embedding_size,
+            ),
+        )
+
+        padding_positions = (pixel_position_ids == -1).all(dim=-1)
+
+        # Encode frames in chunks bounded by _encoder_chunk.
+        last_hidden_states_list: list[torch.Tensor] = []
+        for i in range(0, total_frames, max_batch_size):
+            pv_chunk = pixel_values[i : i + max_batch_size]
+            pp_chunk = pixel_position_ids[i : i + max_batch_size]
+            pad_chunk = padding_positions[i : i + max_batch_size]
+
+            inputs_embeds = vt.patch_embedder(pv_chunk, pp_chunk, pad_chunk)
+            encoder_outputs = vt.encoder(
+                inputs_embeds=inputs_embeds,
+                attention_mask=~pad_chunk,
+                pixel_position_ids=pp_chunk,
+            )
+            last_hidden_states_list.append(encoder_outputs.last_hidden_state)
+
+        last_hidden_states = torch.cat(last_hidden_states_list, dim=0)
+
+        # Pool per frame to strip padding and reduce spatial resolution.
+        output_length = pixel_values.shape[1] // pooling_k2
+        all_frame_valid_states: list[torch.Tensor] = []
+        frame_valid_lens: list[int] = []
+
+        for i in range(total_frames):
+            single_hidden = last_hidden_states[i].unsqueeze(0)
+            single_pos_ids = pixel_position_ids[i].unsqueeze(0)
+            single_pad_pos = padding_positions[i].unsqueeze(0)
+
+            pooled_states, valid_mask = vt.pooler(
+                hidden_states=single_hidden,
+                pixel_position_ids=single_pos_ids,
+                padding_positions=single_pad_pos,
+                output_length=output_length,
+            )
+            valid_states = pooled_states[valid_mask]
+
+            if getattr(vt.config, "standardize", False):
+                valid_states = (valid_states - vt.std_bias) * vt.std_scale
+
+            all_frame_valid_states.append(valid_states)
+            frame_valid_lens.append(valid_states.shape[0])
+
+        # Project all frames in a single batched call.
+        flat_valid_states = torch.cat(all_frame_valid_states, dim=0).to(target_dtype)
+        flat_proj_embs = self.embed_vision(
+            inputs_embeds=flat_valid_states.unsqueeze(0)
+        ).squeeze(0)
+
+        # Regroup into per-video tensors (slicing returns views).
+        per_video_embeddings: list[torch.Tensor] = []
+        frame_idx = 0
+        offset = 0
+        for count in fc_list:
+            video_tokens = sum(frame_valid_lens[frame_idx : frame_idx + count])
+            per_video_embeddings.append(flat_proj_embs[offset : offset + video_tokens])
+            offset += video_tokens
+            frame_idx += count
+
+        return per_video_embeddings
+
+    # ------------------------------------------------------------------ #
+    # Audio processing
+    # ------------------------------------------------------------------ #
+
+    def _process_audio_input(
+        self,
+        audio_input: Gemma4AudioInputs,
+    ) -> list[torch.Tensor]:
+        input_features = audio_input["input_features_padded"].squeeze(1)
+        input_features_mask = audio_input["input_features_mask"].squeeze(1)
+
+        # Run audio tower — mask uses standard HF convention
+        # (True=valid, False=padding).
+        audio_outputs = self.audio_tower(input_features, input_features_mask)
+        if isinstance(audio_outputs, tuple):
+            audio_encodings, audio_mask = audio_outputs
+        else:
+            audio_encodings = audio_outputs.last_hidden_state
+            audio_mask = audio_outputs.attention_mask
+
+        # Project into LM embedding space.
+        audio_features = self.embed_audio(inputs_embeds=audio_encodings)
+
+        # Strip padding per-batch element: only keep real (non-padding)
+        # tokens. audio_mask is True for valid positions (HF convention).
+        per_audio = []
+        for enc, mask in zip(audio_features, audio_mask, strict=True):
+            per_audio.append(enc[mask])  # [num_real, hidden_size]
+
+        return per_audio
+
+    # ------------------------------------------------------------------ #
+    # MultiModalEmbeddings interface
+    # ------------------------------------------------------------------ #
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        mm_input_by_modality = self._parse_and_validate_multimodal_inputs(**kwargs)
+        multimodal_embeddings: list[torch.Tensor] = []
+
+        for modality, multimodal_input in mm_input_by_modality.items():
+            if multimodal_input is None:
+                continue
+            if modality == "image":
+                multimodal_embeddings.extend(
+                    self._process_image_input(multimodal_input)
+                )
+            elif modality == "video":
+                multimodal_embeddings.extend(
+                    self._process_video_input(multimodal_input)
+                )
+            elif modality == "audio":
+                multimodal_embeddings.extend(
+                    self._process_audio_input(multimodal_input)
+                )
+
+        return multimodal_embeddings
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        # Cache per-layer embeddings (PLE) for the language model's
+        # forward pass.  During profiling embed_input_ids is not called,
+        # so the pre-allocated zeros are used instead.
+        if self.per_layer_embeddings is not None:
+            # Mask multimodal tokens (image/audio) to 0 for PLE
+            # computation (using token_type_ids == 0 as text_mask).
+            # Replicate this: map image token positions to token 0.
+            if is_multimodal is not None:
+                ple_input_ids = torch.where(
+                    is_multimodal.to(input_ids.device, non_blocking=True),
+                    torch.zeros_like(input_ids),
+                    input_ids,
+                )
+            else:
+                ple_input_ids = input_ids
+
+            per_layer_inputs = self.language_model.model.get_per_layer_inputs(
+                ple_input_ids
+            )
+            if per_layer_inputs is not None:
+                per_layer_inputs = per_layer_inputs.reshape(
+                    -1,
+                    self.config.text_config.num_hidden_layers,
+                    self.config.text_config.hidden_size_per_layer_input,
+                )
+                self.per_layer_embeddings[: per_layer_inputs.shape[0]].copy_(
+                    per_layer_inputs
+                )
+
+        if multimodal_embeddings is None or is_multimodal is None:
+            return super().embed_input_ids(input_ids)
+
+        return super().embed_input_ids(
+            input_ids,
+            multimodal_embeddings=multimodal_embeddings,
+            is_multimodal=is_multimodal,
+        )
+
+    # ------------------------------------------------------------------ #
+    # Forward
+    # ------------------------------------------------------------------ #
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> IntermediateTensors:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        # Select the pre-cached PLEs for this batch (None when PLE
+        # is disabled for variants without PLE).
+        per_layer_inputs = (
+            self.per_layer_embeddings[: inputs_embeds.shape[0]]
+            if self.per_layer_embeddings is not None and inputs_embeds is not None
+            else None
+        )
+
+        # Gemma4 bidi: clear mm_prefix_range for full_attention layers.
+        # Must run here (outside @support_torch_compile boundary) because
+        # _run_decoder_layers is inside a compiled graph where Python
+        # side effects are eliminated.
+        self._clear_mm_prefix_for_full_attn_layers()
+
+        hidden_states = self.language_model.model(
+            input_ids,
+            positions,
+            per_layer_inputs=per_layer_inputs,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+            **kwargs,
+        )
+
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    # ------------------------------------------------------------------ #
+    # Bidirectional attention helpers
+    # ------------------------------------------------------------------ #
+
+    def _clear_mm_prefix_for_full_attn_layers(self) -> None:
+        """Clear mm_prefix_range for non-sliding layers.
+
+        Gemma4 with use_bidirectional_attention='vision' applies
+        bidirectional attention only to sliding_attention layers.
+        Full attention layers use plain causal masking.
+
+        Uses _full_attn_layer_idxs (precomputed in __init__) for O(1)
+        lookup instead of per-call regex parsing.
+        """
+        if not self._full_attn_layer_idxs:
+            return
+
+        from vllm.forward_context import get_forward_context
+
+        attn_metadata = get_forward_context().attn_metadata
+        if attn_metadata is None:
+            return
+
+        def _process(metadata_dict: dict) -> None:
+            for layer_name, metadata in metadata_dict.items():
+                if ".layers." not in layer_name:
+                    continue
+                try:
+                    layer_idx = int(layer_name.split(".layers.")[1].split(".")[0])
+                except (ValueError, IndexError):
+                    continue
+                if layer_idx in self._full_attn_layer_idxs:
+                    if hasattr(metadata, "mm_prefix_range"):
+                        metadata.mm_prefix_range = None
+                    if hasattr(metadata, "mm_prefix_range_tensor"):
+                        metadata.mm_prefix_range_tensor = None
+
+        if isinstance(attn_metadata, list):
+            for ub_metadata in attn_metadata:
+                _process(ub_metadata)
+        elif isinstance(attn_metadata, dict):
+            _process(attn_metadata)
+
+    # ------------------------------------------------------------------ #
+    # Weight loading
+    # ------------------------------------------------------------------ #
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        # Some checkpoints have vestigial embed_vision.embedding and
+        # embed_audio.embedding weights from the Gemma3n architecture
+        # that are not used by Gemma4's MultimodalEmbedder (which only
+        # has embedding_projection + embedding_post_projection_norm).
+        ignore_prefixes = [
+            "embed_vision.embedding.",
+            "embed_audio.embedding.",
+        ]
+        # Models without audio tower should skip
+        # audio weights entirely.
+        if self.audio_tower is None:
+            ignore_prefixes.extend(
+                [
+                    "audio_tower.",
+                    "embed_audio.",
+                ]
+            )
+        loader = AutoWeightsLoader(
+            self,
+            ignore_unexpected_prefixes=ignore_prefixes,
+        )
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+
+    # ------------------------------------------------------------------ #
+    # LoRA / multimodal mapping
+    # ------------------------------------------------------------------ #
+
+    def get_mm_mapping(self) -> MultiModelKeys:
+        """Get the module prefix mapping for multimodal models."""
+        connectors = ["embed_vision"]
+        tower_models = ["vision_tower"]
+        if self.audio_tower is not None:
+            connectors.append("embed_audio")
+            tower_models.append("audio_tower")
+
+        return MultiModelKeys.from_string_field(
+            language_model="language_model",
+            connector=connectors,
+            tower_model=tower_models,
+        )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality == "image":
+            return "<image_soft_token>"
+        if modality == "audio":
+            return "<audio_soft_token>"
+        if modality == "video":
+            return "<|video|>"
+        raise ValueError(f"Unsupported modality: {modality}")
diff --git a/vllm/model_executor/models/gemma4_mtp.py b/vllm/model_executor/models/gemma4_mtp.py
new file mode 100644
index 000000000000..c294ffc6f9a7
--- /dev/null
+++ b/vllm/model_executor/models/gemma4_mtp.py
@@ -0,0 +1,603 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Gemma4 MTP (Multi-Token Prediction) model.
+
+The Gemma4 assistant model is a lightweight decoder that shares KV cache
+with the target (backbone) model.  All assistant decoder layers are
+KV-shared: they only have Q projections (no K/V projections or norms),
+and read K/V from the target model's cache at runtime.
+
+Checkpoint layout (``gemma4_assistant``)::
+
+    model.embed_tokens.*          -- token embeddings
+    model.layers.{i}.*            -- decoder layers (Q-only attention + MLP)
+    model.norm.*                  -- final RMSNorm
+    pre_projection.*              -- Linear(2 * backbone_hidden_size, hidden_size)
+    post_projection.*             -- Linear(hidden_size, backbone_hidden_size)
+    lm_head.*                     -- language model head (tied to embed_tokens)
+    masked_embedding.centroids.*  -- centroid projection (when use_ordered_embeddings)
+    masked_embedding.token_ordering -- token-to-centroid mapping buffer
+"""
+
+from collections.abc import Iterable
+
+import torch
+from torch import nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed import (
+    get_tensor_model_parallel_world_size,
+    tensor_model_parallel_all_gather,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.sequence import IntermediateTensors
+
+from .gemma4 import Gemma4MLP, _get_text_config
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    extract_layer_index,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+class Gemma4MTPMaskedEmbedder(nn.Module):
+    """Sparse logit computation via centroid-based vocabulary masking.
+
+    Instead of computing logits against the full vocabulary, projects
+    hidden states to centroid scores, selects top-K centroids, and
+    computes logits only for the ~top_k * (vocab_size / num_centroids)
+    tokens belonging to those centroids.
+    """
+
+    token_ordering: torch.Tensor
+
+    def __init__(
+        self,
+        hidden_size: int,
+        vocab_size: int,
+        num_centroids: int,
+        centroid_intermediate_top_k: int,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.vocab_size = vocab_size
+        self.num_centroids = num_centroids
+        self.centroid_intermediate_top_k = centroid_intermediate_top_k
+        self.vocab_size_per_centroid = vocab_size // num_centroids
+        self.num_selected = centroid_intermediate_top_k * self.vocab_size_per_centroid
+
+        self.centroids = nn.Linear(hidden_size, num_centroids, bias=False)
+        self.register_buffer(
+            "token_ordering",
+            torch.empty(vocab_size, dtype=torch.long),
+        )
+
+    def _select_and_score(
+        self,
+        hidden_states: torch.Tensor,
+        lm_head_weight: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Centroid selection + sparse dot product.
+
+        Returns:
+            logits: (num_tokens, num_selected) sparse logits.
+            indices: (num_tokens, num_selected) corresponding vocab indices.
+        """
+        num_tokens = hidden_states.shape[0]
+        _, top_k_indices = torch.topk(
+            self.centroids(hidden_states),
+            k=self.centroid_intermediate_top_k,
+            dim=-1,
+        )
+        clusters = self.token_ordering.view(
+            self.num_centroids,
+            self.vocab_size_per_centroid,
+        )
+        selected = clusters[top_k_indices]
+        embeddings = lm_head_weight[selected.reshape(-1)].view(
+            num_tokens,
+            self.num_selected,
+            self.hidden_size,
+        )
+        logits = torch.einsum("td,tsd->ts", hidden_states, embeddings)
+        return logits, selected.view(num_tokens, -1)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        lm_head_weight: torch.Tensor,
+    ) -> torch.Tensor:
+        """Full-vocab logits with non-selected positions masked to -inf."""
+        logits, indices = self._select_and_score(hidden_states, lm_head_weight)
+        output = torch.full(
+            (hidden_states.shape[0], self.vocab_size),
+            fill_value=torch.finfo(hidden_states.dtype).min,
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+        return output.scatter_(-1, indices, logits)
+
+    def get_top_tokens(
+        self,
+        hidden_states: torch.Tensor,
+        lm_head_weight: torch.Tensor,
+    ) -> torch.Tensor:
+        """Sparse argmax — returns vocab token IDs without full-vocab tensor."""
+        logits, indices = self._select_and_score(hidden_states, lm_head_weight)
+        return indices.gather(-1, logits.argmax(-1, keepdim=True)).squeeze(-1)
+
+
+class Gemma4MTPAttention(nn.Module):
+    """Q-only attention for Gemma4 MTP layers.
+
+    K/V come from the target model's KV cache via
+    ``kv_sharing_target_layer_name`` (set by the proposer after
+    model construction).
+    """
+
+    def __init__(
+        self,
+        config,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        max_position_embeddings: int,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        attn_logits_soft_cap: float | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.hidden_size = hidden_size
+
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = head_dim
+        self.q_size = self.num_heads * self.head_dim
+        self.scaling = 1.0
+
+        self.q_proj = ColumnParallelLinear(
+            hidden_size,
+            self.total_num_heads * self.head_dim,
+            bias=config.attention_bias,
+            quant_config=None,
+            prefix=f"{prefix}.q_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=config.attention_bias,
+            quant_config=None,
+            prefix=f"{prefix}.o_proj",
+        )
+        self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+
+        layer_idx = extract_layer_index(prefix)
+        layer_type = config.layer_types[layer_idx]
+        self.is_sliding = layer_type == "sliding_attention"
+        sliding_window = config.sliding_window if self.is_sliding else None
+
+        if layer_type in config.rope_parameters:
+            rope_parameters = dict(config.rope_parameters[layer_type])
+        else:
+            rope_parameters = dict(config.rope_parameters.copy())
+            if self.is_sliding:
+                rope_parameters["rope_theta"] = getattr(
+                    config, "rope_local_base_freq", 10000.0
+                )
+
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,
+        )
+
+        # kv_sharing_target_layer_name is set after model construction
+        # by Gemma4Proposer._setup_gemma4_kv_sharing().
+        self.is_kv_shared_layer = True
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            logits_soft_cap=attn_logits_soft_cap,
+            per_layer_sliding_window=sliding_window,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        **kwargs,
+    ) -> torch.Tensor:
+        q, _ = self.q_proj(hidden_states)
+
+        q = q.unflatten(-1, (self.num_heads, self.head_dim))
+        q = self.q_norm(q)
+        q = q.flatten(-2, -1)
+
+        q, _ = self.rotary_emb(positions, q, None)
+
+        # Attention reads K/V from the target's cache via KV sharing;
+        # these dummy tensors are never consumed but required by the API.
+        num_tokens = q.shape[0]
+        kv_dummy = torch.empty(
+            num_tokens,
+            self.num_kv_heads * self.head_dim,
+            dtype=q.dtype,
+            device=q.device,
+        )
+        attn_output = self.attn(q, kv_dummy, kv_dummy)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class Gemma4MTPDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+
+        layer_idx = extract_layer_index(prefix)
+        layer_type = config.layer_types[layer_idx]
+        is_full_attention = layer_type == "full_attention"
+        head_dim = (
+            getattr(config, "global_head_dim", config.head_dim)
+            if is_full_attention
+            else config.head_dim
+        )
+
+        self.self_attn = Gemma4MTPAttention(
+            config=config,
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=config.num_key_value_heads,
+            head_dim=head_dim,
+            max_position_embeddings=config.max_position_embeddings,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            attn_logits_soft_cap=getattr(config, "attn_logit_softcapping", None),
+            prefix=f"{prefix}.self_attn",
+        )
+
+        text_config = _get_text_config(config)
+        self.mlp = Gemma4MLP(
+            hidden_size=self.hidden_size,
+            intermediate_size=text_config.intermediate_size,
+            hidden_activation=text_config.hidden_activation,
+            quant_config=None,
+            prefix=f"{prefix}.mlp",
+        )
+
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.pre_feedforward_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.post_feedforward_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+
+        self.register_buffer("layer_scalar", torch.ones(1))
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+        **kwargs,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        residual = hidden_states
+        hidden_states = self.input_layernorm(residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+            **kwargs,
+        )
+
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = hidden_states + residual
+        residual = hidden_states
+
+        hidden_states = self.pre_feedforward_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+
+        hidden_states = self.post_feedforward_layernorm(hidden_states)
+        hidden_states = hidden_states + residual
+
+        hidden_states = hidden_states * self.layer_scalar
+        return hidden_states, None
+
+
+class Gemma4MultiTokenPredictor(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.speculative_config.draft_model_config.hf_config
+        text_config = _get_text_config(config)
+        self.config = text_config
+
+        self.hidden_size = text_config.hidden_size
+        self.backbone_hidden_size = getattr(
+            config, "backbone_hidden_size", self.hidden_size
+        )
+        self.vocab_size = text_config.vocab_size
+        self.num_mtp_layers = text_config.num_hidden_layers
+
+        self.embed_tokens = VocabParallelEmbedding(
+            self.vocab_size,
+            self.hidden_size,
+        )
+
+        self.pre_projection = ColumnParallelLinear(
+            2 * self.backbone_hidden_size,
+            self.hidden_size,
+            bias=False,
+            gather_output=True,
+            prefix=f"{prefix}.pre_projection",
+        )
+
+        self.post_projection = RowParallelLinear(
+            self.hidden_size,
+            self.backbone_hidden_size,
+            bias=False,
+            input_is_parallel=False,
+            prefix=f"{prefix}.post_projection",
+        )
+
+        self.layers = nn.ModuleList(
+            Gemma4MTPDecoderLayer(
+                text_config,
+                cache_config=vllm_config.cache_config,
+                quant_config=vllm_config.quant_config,
+                prefix=f"{prefix}.layers.{idx}",
+            )
+            for idx in range(self.num_mtp_layers)
+        )
+
+        self.norm = RMSNorm(self.hidden_size, eps=text_config.rms_norm_eps)
+
+        # After embedding sharing, embed_tokens is replaced with the
+        # target model's backbone-dim embedding.  Scale by
+        # sqrt(backbone_hidden_size) to match the target's convention.
+        self.register_buffer(
+            "normalizer",
+            torch.tensor(self.backbone_hidden_size**0.5),
+            persistent=False,
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids) * self.normalizer
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        params_dict.update(dict(self.named_buffers()))
+        loaded_params: set[str] = set()
+
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                if name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                if name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Returns (draft_hidden_states, backbone_hidden_states).
+
+        draft_hidden_states: draft-dim, used by compute_logits via lm_head.
+        backbone_hidden_states: backbone-dim, stored in the proposer's
+            hidden-state buffer and fed back as input to the next step.
+        """
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_input_ids(input_ids)
+
+        combined = torch.cat([inputs_embeds, hidden_states], dim=-1)
+        hidden_states, _ = self.pre_projection(combined)
+
+        residual = None
+        for layer in self.layers:
+            hidden_states, residual = layer(
+                positions=positions,
+                hidden_states=hidden_states,
+                residual=residual,
+            )
+
+        draft_hidden_states = self.norm(hidden_states)
+
+        backbone_hidden_states, _ = self.post_projection(draft_hidden_states)
+        return draft_hidden_states, backbone_hidden_states
+
+
+@support_torch_compile
+class Gemma4MTP(nn.Module):
+    """Gemma4 Multi-Token Prediction model for speculative decoding.
+
+    forward() returns (draft_hidden_states, backbone_hidden_states).
+    The proposer uses draft_hidden_states for compute_logits (via
+    the draft-dim lm_head) and backbone_hidden_states for the
+    hidden-state feedback buffer.
+    """
+
+    has_own_lm_head = True
+
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "pre_projection.": "model.pre_projection.",
+            "post_projection.": "model.post_projection.",
+        },
+    )
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.speculative_config.draft_model_config.hf_config
+        text_config = _get_text_config(config)
+        self.config = config
+
+        self.model = Gemma4MultiTokenPredictor(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "draft_model"),
+        )
+
+        # lm_head operates in draft-dim.  Tied to embed_tokens at init
+        # so load_weights populates both from a single checkpoint entry.
+        # After embedding sharing, lm_head.weight still references the
+        # original draft-dim tensor.
+        self.lm_head = ParallelLMHead(
+            text_config.vocab_size,
+            text_config.hidden_size,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if getattr(config, "tie_word_embeddings", True):
+            self.lm_head.weight = self.model.embed_tokens.weight
+
+        self.logits_processor = LogitsProcessor(
+            text_config.vocab_size,
+            soft_cap=getattr(text_config, "final_logit_softcapping", None),
+        )
+
+        if getattr(config, "use_ordered_embeddings", False):
+            num_centroids = getattr(config, "num_centroids", 2048)
+            top_k = getattr(config, "centroid_intermediate_top_k", 32)
+            self.masked_embedding = Gemma4MTPMaskedEmbedder(
+                hidden_size=text_config.hidden_size,
+                vocab_size=text_config.vocab_size,
+                num_centroids=num_centroids,
+                centroid_intermediate_top_k=top_k,
+            )
+            logger.info(
+                "Gemma4 MTP: centroids masking enabled "
+                "(num_centroids=%d, top_k=%d, active_tokens=%d/%d).",
+                num_centroids,
+                top_k,
+                top_k * (text_config.vocab_size // num_centroids),
+                text_config.vocab_size,
+            )
+        else:
+            self.masked_embedding = None
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+        **kwargs: object,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        return self.model(
+            input_ids,
+            positions,
+            hidden_states,
+            intermediate_tensors,
+            inputs_embeds,
+            spec_step_idx,
+        )
+
+    def _get_full_lm_head_weight(self) -> torch.Tensor:
+        lm_head_weight = self.lm_head.weight
+        tp_size = get_tensor_model_parallel_world_size()
+        if tp_size > 1:
+            lm_head_weight = tensor_model_parallel_all_gather(
+                lm_head_weight,
+                dim=0,
+            )
+        return lm_head_weight[: self.masked_embedding.vocab_size]
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor | None:
+        if self.masked_embedding is not None:
+            return self.masked_embedding(
+                hidden_states,
+                self._get_full_lm_head_weight(),
+            )
+        return self.logits_processor(self.lm_head, hidden_states)
+
+    def get_top_tokens(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        """Sparse argmax via centroids masking. Returns token IDs directly."""
+        return self.masked_embedding.get_top_tokens(
+            hidden_states,
+            self._get_full_lm_head_weight(),
+        )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
index 85f422342a95..20caa8672dee 100644
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -723,6 +723,7 @@ def rot_pos_emb(
         # Use pre-computed cos_sin_cache from RotaryEmbedding
         cos, sin = self.rotary_pos_emb.get_cos_sin(max_grid_size)
 
+        pos_ids = pos_ids.to(cos.device, non_blocking=True)
         cos_combined = cos[pos_ids].flatten(1)
         sin_combined = sin[pos_ids].flatten(1)
         return cos_combined, sin_combined, pos_ids
diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
index d0e6cb6ada8b..98cc9a50adcc 100644
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -42,7 +42,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -178,13 +181,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -192,8 +194,8 @@ def __init__(
             topk_group=config.topk_group,
             prefix=f"{prefix}.experts",
             scoring_func="sigmoid",
-            # we do scaling outside, set factor to 1.0 to avoid double mul
-            routed_scaling_factor=1.0,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
             e_score_correction_bias=self.gate.e_score_correction_bias,
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts,
@@ -207,23 +209,9 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         # router_logits: (num_tokens, n_experts)
         router_logits = self.gate(hidden_states.to(dtype=torch.float32))
 
-        fused_moe_out = self.experts(
+        final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-
-        if self.shared_experts is not None:
-            shared_output, final_hidden_states = fused_moe_out
-            assert shared_output is not None
-            final_hidden_states = (
-                final_hidden_states * self.routed_scaling_factor + shared_output
-            )
-        else:
-            final_hidden_states = fused_moe_out * self.routed_scaling_factor
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
         return final_hidden_states.view(num_tokens, hidden_dim)
 
 
@@ -481,7 +469,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -518,16 +506,24 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                 # for mlp.experts[0].gate_gate_up_proj, which breaks load.
                 if ("mlp.experts." in name) and name not in params_dict:
                     continue
+
                 name = name.replace(weight_name, param_name)
                 # Skip loading extra bias for GPTQ models.
                 if name.endswith(".bias") and name not in params_dict:
                     continue
+
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
                 if is_pp_missing_parameter(name, self):
                     continue
 
                 param = params_dict[name]
-                weight_loader = param.weight_loader
-                weight_loader(param, loaded_weight, shard_id)
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                if weight_loader == default_weight_loader:
+                    weight_loader(param, loaded_weight)
+                else:
+                    weight_loader(param, loaded_weight, shard_id)
                 break
             else:
                 is_expert_weight = False
diff --git a/vllm/model_executor/models/glm4_moe_lite.py b/vllm/model_executor/models/glm4_moe_lite.py
index 6d96f748e3ea..77aaa179aa52 100644
--- a/vllm/model_executor/models/glm4_moe_lite.py
+++ b/vllm/model_executor/models/glm4_moe_lite.py
@@ -41,7 +41,9 @@
     get_pp_group,
 )
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -308,7 +310,7 @@ def make_empty_intermediate_tensors(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -334,7 +336,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -616,7 +618,7 @@ def compute_logits(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/glm4_moe_lite_mtp.py b/vllm/model_executor/models/glm4_moe_lite_mtp.py
index efa96c40d042..596cb48face0 100644
--- a/vllm/model_executor/models/glm4_moe_lite_mtp.py
+++ b/vllm/model_executor/models/glm4_moe_lite_mtp.py
@@ -32,7 +32,10 @@
 
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import VllmConfig
-from vllm.model_executor.layers.fused_moe import FusedMoE, SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -260,7 +263,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1),
         ]
 
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py
index cde94673e53a..791ecabebebc 100644
--- a/vllm/model_executor/models/glm4_moe_mtp.py
+++ b/vllm/model_executor/models/glm4_moe_mtp.py
@@ -31,7 +31,10 @@
 from transformers import PretrainedConfig
 
 from vllm.config import CacheConfig, ParallelConfig, VllmConfig
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -247,7 +250,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/glmasr.py b/vllm/model_executor/models/glmasr.py
index 0d54588cc2b0..cd168b6b4617 100644
--- a/vllm/model_executor/models/glmasr.py
+++ b/vllm/model_executor/models/glmasr.py
@@ -4,7 +4,6 @@
 from collections.abc import Iterable, Mapping, Sequence
 from typing import Annotated, Any, Literal, TypeAlias
 
-import numpy as np
 import torch
 import torch.nn as nn
 from transformers import BatchFeature
@@ -13,6 +12,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.distributed.parallel_state import get_tensor_model_parallel_world_size
 from vllm.inputs import ModalityData, MultiModalDataDict, PromptType, TokensPrompt
 from vllm.model_executor.layers.activation import get_act_fn
@@ -66,7 +66,7 @@
     SupportsTranscription,
 )
 from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
-from .whisper import ISO639_1_SUPPORTED_LANGS
+from .whisper import ISO639_1_SUPPORTED_LANGS, _create_fake_bias_for_k_proj
 
 
 class GlmAsrEncoderRotaryEmbedding(nn.Module):
@@ -499,6 +499,8 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         """Custom weight loading to handle q_proj/k_proj/v_proj -> qkv_proj mapping."""
         from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
+        weights = _create_fake_bias_for_k_proj(weights, ".k_proj.weight")
+
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "q_proj", "q"),
@@ -1129,17 +1131,12 @@ def get_speech_to_text_config(
         )
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
         """Get the generation prompt to be used for transcription requests."""
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        task_type = stt_params.task_type
+        to_language = stt_params.to_language
         tokenizer = cached_tokenizer_from_config(model_config)
         audio_token = cls._get_audio_token(model_config)
 
diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py
index 482056250a1e..d12db96c5d46 100644
--- a/vllm/model_executor/models/gpt_oss.py
+++ b/vllm/model_executor/models/gpt_oss.py
@@ -20,11 +20,15 @@
     tensor_model_parallel_all_gather,
 )
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE, GateLinear
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.fused_moe.config import FusedMoEParallelConfig
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
+    ReplicatedLinear,
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -174,11 +178,13 @@ def __init__(
         self.hidden_size = config.hidden_size
         self.experts_per_token = config.num_experts_per_tok
         self.world_size = dist.get_world_size() if dist.is_initialized() else 1
-        self.router = GateLinear(
+        self.router = ReplicatedLinear(
             config.hidden_size,
             config.num_local_experts,
             bias=True,
+            quant_config=None,
             prefix=f"{prefix}.router",
+            return_bias=False,
         )
         assert config.intermediate_size % self.world_size == 0
         self.experts = FusedMoE(
@@ -186,7 +192,6 @@ def __init__(
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.intermediate_size,
-            reduce_results=True,
             renormalize=True,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -206,7 +211,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                 self, x[:, : self.hidden_size], self.router.weight, self.router.bias
             )
         else:
-            g, _ = self.router(x)
+            g = self.router(x)
         x = self.experts(hidden_states=x, router_logits=g)[:, : self.hidden_size]
 
         if self.is_sequence_parallel:
@@ -329,7 +334,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, weight scales, activation scales
         # (param_name, weight_name, expert_id, shard_id)
         # NOTE: this is only used for quark.
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
@@ -557,6 +562,14 @@ def _load_weights_quark(
                 pcp_rank=get_pcp_group().rank_in_group,
             )
 
+        def _is_mxfp4(weight_dtype: str | None) -> bool:
+            """Return True for any MXFP4 weight-dtype variant.
+
+            Covers "gpt_oss_mxfp4" (GptOssMxfp4MoEMethod) and "mxfp4"
+            (QuarkMoEMethod with fp4 weights) and any future variants.
+            """
+            return weight_dtype is not None and "mxfp4" in weight_dtype
+
         def _get_moe_weight_dtype(layer_id: int = 0) -> str | None:
             """Helper function to get MoE quantization weight dtype.
 
@@ -575,7 +588,7 @@ def _get_moe_weight_dtype(layer_id: int = 0) -> str | None:
 
         moe_weight_dtype = _get_moe_weight_dtype(layer_id=0)
 
-        if moe_weight_dtype == "mxfp4":
+        if _is_mxfp4(moe_weight_dtype):
             # MXFP4 requires OCP_MX_BLOCK_SIZE alignment
             intermediate_size_block = intermediate_size // OCP_MX_BLOCK_SIZE
             per_rank_intermediate_size_block = cdiv(intermediate_size_block, tp_size)
@@ -679,7 +692,7 @@ def kv_cache_scale_loader(
                 continue
 
             # Unified handler for mxfp4 weights and scales
-            elif moe_quant_method == "mxfp4" and any(
+            elif _is_mxfp4(moe_quant_method) and any(
                 name.endswith(suffix)
                 for suffix in [
                     ".w13_weight_scale",
@@ -1113,8 +1126,22 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             if hasattr(self.config, "quantization_config")
             else None
         )
-
+        # Normalize the checkpoint's quant_method to the internal name.
+        # Note: there are three places where "mxfp4" -> "gpt_oss_mxfp4"
+        # normalization occurs, each serving a different data path:
+        #   1. GptOssMxfp4Config.override_quantization_method() — sets
+        #      ModelConfig.quantization (used to select the QuantizationConfig
+        #      class at model init time), reading from model_arch_config which
+        #      is a snapshot taken before verify_and_update_model_config runs.
+        #   2. GptOssForCausalLMConfig.verify_and_update_model_config() —
+        #      patches hf_config.quantization_config in-place (a separate copy
+        #      of the dict from model_arch_config) for later hf_config lookups.
+        #   3. Here — reads directly from self.config (the raw HF config) which
+        #      may still carry the original "mxfp4" string from the checkpoint.
         if quant_method == "mxfp4":
+            quant_method = "gpt_oss_mxfp4"
+
+        if quant_method == "gpt_oss_mxfp4":
             return self._load_weights_mxfp4(
                 ep_rank_end,
                 ep_rank_start,
diff --git a/vllm/model_executor/models/granite4_vision.py b/vllm/model_executor/models/granite4_vision.py
new file mode 100644
index 000000000000..c6e4df2992cb
--- /dev/null
+++ b/vllm/model_executor/models/granite4_vision.py
@@ -0,0 +1,931 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""vLLM implementation of Granite 4 Vision.
+
+Uses GraniteForCausalLM as the language backbone with SigLIP vision encoder
+and deepstack feature injection via WindowQFormer projectors.
+
+LoRA support: use --enable-lora --default-mm-loras for LM-only LoRA adapters.
+"""
+
+import math
+from collections.abc import Iterable, Mapping
+from fractions import Fraction
+from itertools import islice
+
+import torch
+import torch.nn as nn
+from transformers import BatchFeature
+from transformers.models.blip_2.configuration_blip_2 import Blip2QFormerConfig
+from transformers.models.llava_next.modeling_llava_next import (
+    get_anyres_image_grid_shape,
+    image_size_to_num_patches,
+    unpad_image,
+)
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed.parallel_state import get_pp_group
+from vllm.logger import init_logger
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
+from vllm.model_executor.models.granite import GraniteForCausalLM, GraniteModel
+from vllm.model_executor.models.interfaces import (
+    MultiModalEmbeddings,
+    SupportsLoRA,
+    SupportsMultiModal,
+    SupportsPP,
+)
+from vllm.model_executor.models.llava import LlavaDummyInputsBuilder
+from vllm.model_executor.models.llava_next import (
+    BaseLlavaNextMultiModalProcessor,
+    LlavaNextImageEmbeddingInputs,
+    LlavaNextImageInputs,
+    LlavaNextImagePixelInputs,
+    LlavaNextProcessingInfo,
+)
+from vllm.model_executor.models.module_mapping import MultiModelKeys
+from vllm.model_executor.models.siglip import SiglipVisionModel
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    WeightsMapper,
+    maybe_prefix,
+)
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import MultiModalFieldConfig
+from vllm.sequence import IntermediateTensors
+
+from .blip2 import Blip2QFormerModel
+
+logger = init_logger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Downsampler modules (translated from HF downsampling.py)
+# ---------------------------------------------------------------------------
+
+
+class InterpolateDownsampler:
+    """Spatial downsampling via area interpolation."""
+
+    def __init__(self, config, mode="area"):
+        self.orig_image_side = (
+            config.vision_config.image_size // config.vision_config.patch_size
+        )
+        self.new_image_side = int(
+            self.orig_image_side * Fraction(config.downsample_rate)
+        )
+        self.mode = mode
+
+    def __call__(self, image_features: torch.Tensor) -> torch.Tensor:
+        batch_size, _, dim = image_features.size()
+        up_shape = [batch_size, self.orig_image_side, self.orig_image_side, dim]
+        large = image_features.view(up_shape).permute(0, 3, 1, 2)
+        small = torch.nn.functional.interpolate(
+            large,
+            size=(self.new_image_side, self.new_image_side),
+            mode=self.mode,
+        )
+        return small.permute(0, 2, 3, 1).flatten(1, 2)
+
+
+class SpatialOffsetDownsampler:
+    """Sample one position from each 2x2 block (offset 0-3 = TL/TR/BL/BR)."""
+
+    def __init__(self, config, offset: int = 0):
+        self.orig_image_side = (
+            config.vision_config.image_size // config.vision_config.patch_size
+        )
+        self.new_image_side = self.orig_image_side // 2
+        offsets = [(0, 0), (0, 1), (1, 0), (1, 1)]
+        self.offset_h, self.offset_w = offsets[offset]
+
+    def __call__(self, image_features: torch.Tensor) -> torch.Tensor:
+        B, _, C = image_features.shape
+        features_2d = image_features.reshape(
+            B, self.orig_image_side, self.orig_image_side, C
+        )
+        n = self.new_image_side
+        blocks = features_2d.reshape(B, n, 2, n, 2, C)
+        sampled = blocks[:, :, self.offset_h, :, self.offset_w, :]
+        return sampled.reshape(B, -1, C)
+
+
+class WindowQFormerDownsampler(nn.Module):
+    """Window-based QFormer downsampler (matches HF downsampling.py exactly)."""
+
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        cache_config: CacheConfig | None = None,
+        spatial_offset: int | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        llm_hidden_size = config.text_config.hidden_size
+        vision_hidden_size = config.vision_config.hidden_size
+
+        self.dropout = nn.Dropout(config.projector_dropout)
+
+        if spatial_offset is not None:
+            self.downsampler = SpatialOffsetDownsampler(config, offset=spatial_offset)
+        else:
+            self.downsampler = InterpolateDownsampler(config)
+
+        qformer_config = Blip2QFormerConfig(
+            hidden_size=vision_hidden_size,
+            num_attention_heads=vision_hidden_size // 64,
+            intermediate_size=3072,
+            num_hidden_layers=1,
+            encoder_hidden_size=vision_hidden_size,
+            cross_attention_frequency=1,
+            max_position_embeddings=2048,
+            use_qformer_text_input=False,
+        )
+        self.qformer = Blip2QFormerModel(
+            qformer_config,
+            quant_config=quant_config,
+            cache_config=cache_config,
+            prefix=maybe_prefix(prefix, "qformer"),
+        )
+
+        self.image_side = (
+            config.vision_config.image_size // config.vision_config.patch_size
+        )
+        q, w = config.downsample_rate.split("/")
+        self.query_side, self.window_side = int(q), int(w)
+        self.query_length = self.query_side**2
+
+        embed_std = 1 / math.sqrt(vision_hidden_size)
+        self.norm = nn.LayerNorm(vision_hidden_size, eps=1e-6)
+        self.query = nn.Parameter(
+            torch.randn(1, self.query_length, vision_hidden_size) * embed_std
+        )
+        self.image_positions = nn.Parameter(
+            torch.randn(1, self.window_side**2, vision_hidden_size) * embed_std
+        )
+        self.out_linear = nn.Linear(vision_hidden_size, llm_hidden_size, bias=True)
+
+    def _win(self, x: torch.Tensor, side: int, win: int) -> torch.Tensor:
+        """(B, side*side, C) → (B*n*n, win*win, C) where n=side//win."""
+        B, _, C = x.shape
+        n = side // win
+        return (
+            x.view(B, side, side, C)
+            .view(B, n, win, n, win, C)
+            .transpose(2, 3)
+            .flatten(0, 2)
+            .flatten(1, 2)
+        )
+
+    def _unwin(self, xw: torch.Tensor, n: int, win: int) -> torch.Tensor:
+        """(B*n*n, win*win, C) → (B, (n*win)^2, C)."""
+        Bnn, _, C = xw.shape
+        B = Bnn // (n * n)
+        side = n * win
+        return (
+            xw.view(B, n, n, win, win, C)
+            .transpose(2, 3)
+            .contiguous()
+            .view(B, side, side, C)
+            .flatten(1, 2)
+        )
+
+    def forward(self, image_features: torch.Tensor) -> torch.Tensor:
+        B, HW, C = image_features.shape
+        assert self.image_side * self.image_side == HW
+        n = self.image_side // self.window_side
+
+        image_features = self.norm(image_features)
+        enc = self._win(image_features, self.image_side, self.window_side)
+
+        downsampled = self.downsampler(image_features)
+        new_side = n * self.query_side
+        downsampled_w = self._win(downsampled, new_side, self.query_side)
+
+        query_embeds = self.query + downsampled_w
+        encoder_embeds = self.dropout(enc + self.image_positions)
+        out_w = self.qformer(
+            query_embeds=query_embeds,
+            encoder_hidden_states=encoder_embeds,
+        )
+
+        out = self._unwin(out_w, n=n, win=self.query_side)
+        out = self.dropout(out)
+        return self.out_linear(out)
+
+
+# ---------------------------------------------------------------------------
+# LLM subclasses with deepstack injection in the layer loop
+# ---------------------------------------------------------------------------
+
+
+@support_torch_compile(
+    dynamic_arg_dims={
+        "input_ids": 0,
+        "positions": 0,
+        "intermediate_tensors": 0,
+        "inputs_embeds": 0,
+        "deepstack_input_embeds": 0,
+    }
+)
+class Granite4VisionLLMModel(GraniteModel):
+    """GraniteModel with deepstack feature injection in the layer loop."""
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        deepstack_input_embeds: IntermediateTensors | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+                hidden_states = hidden_states * self.config.embedding_multiplier
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            # Recover deepstack features forwarded from the previous PP rank.
+            if deepstack_input_embeds is None:
+                ds_keys = [
+                    k for k in intermediate_tensors.tensors if k.startswith("ds_")
+                ]
+                if ds_keys:
+                    deepstack_input_embeds = IntermediateTensors(
+                        {k: intermediate_tensors[k] for k in ds_keys}
+                    )
+
+        for layer_idx, layer in islice(
+            enumerate(self.layers), self.start_layer, self.end_layer
+        ):
+            if deepstack_input_embeds is not None:
+                key = f"ds_{layer_idx}"
+                if key in deepstack_input_embeds.tensors:
+                    feat = deepstack_input_embeds[key]
+                    # Resize to match hidden_states in case of CUDA graph padding
+                    num_tokens = hidden_states.size(0)
+                    buf_len = feat.shape[0]
+                    if buf_len != num_tokens:
+                        feat = torch.nn.functional.pad(
+                            feat[:num_tokens],
+                            (0, 0, 0, max(0, num_tokens - buf_len)),
+                        )
+                    hidden_states = hidden_states + feat
+            hidden_states = layer(positions, hidden_states)
+
+        if not get_pp_group().is_last_rank:
+            # Forward hidden_states and any deepstack features for later ranks.
+            it = {"hidden_states": hidden_states}
+            if deepstack_input_embeds is not None:
+                remaining = {
+                    k: v
+                    for k, v in deepstack_input_embeds.tensors.items()
+                    if int(k.split("_")[1]) >= self.end_layer
+                }
+                it.update(remaining)
+            return IntermediateTensors(it)
+
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+
+class Granite4VisionLLMForCausalLM(GraniteForCausalLM):
+    """GraniteForCausalLM backed by Granite4VisionLLMModel."""
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        nn.Module.__init__(self)
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+        self.model = Granite4VisionLLMModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+            if config.tie_word_embeddings:
+                self.lm_head.weight = self.model.embed_tokens.weight
+            logit_scale = getattr(config, "logit_scale", 1.0)
+            if hasattr(config, "logits_scaling"):
+                logit_scale /= config.logits_scaling
+            self.logits_processor = LogitsProcessor(
+                config.vocab_size, scale=logit_scale
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+
+    def make_empty_intermediate_tensors(
+        self, batch_size: int, dtype: torch.dtype, device: torch.device
+    ) -> IntermediateTensors:
+        tensors = super().make_empty_intermediate_tensors(batch_size, dtype, device)
+        # Include deepstack buffers so non-first PP ranks receive them.
+        # _ds_layer_indices is set directly on this instance by the outer model.
+        for llm_layer in getattr(self, "_ds_layer_indices", []):
+            tensors.tensors[f"ds_{llm_layer}"] = torch.zeros(
+                (batch_size, self.config.hidden_size), dtype=dtype, device=device
+            )
+        return tensors
+
+
+# ---------------------------------------------------------------------------
+# Processing info / processor (reuses LlavaNext patterns)
+# ---------------------------------------------------------------------------
+
+
+class Granite4VisionProcessingInfo(LlavaNextProcessingInfo):
+    def get_hf_config(self):
+        return self.ctx.get_hf_config()
+
+    def get_hf_processor(self, **kwargs):
+        return self.ctx.get_hf_processor(**kwargs)
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        hf_config = self.get_hf_config()
+        vision_encoder_info = self.get_vision_encoder_info()
+
+        # After QFormer downsampling, patch grid is scaled by downsample_rate
+        ds_rate = Fraction(hf_config.downsample_rate)
+        patch_grid = vision_encoder_info.get_patch_grid_length()  # 24 for 384/16
+        downsampled_grid = int(patch_grid * ds_rate)  # 12 for rate 4/8
+
+        # Base feature: downsampled_grid^2
+        base_feature_size = downsampled_grid * downsampled_grid
+
+        num_patch_height, num_patch_width = get_anyres_image_grid_shape(
+            image_size=(image_height, image_width),
+            grid_pinpoints=hf_config.image_grid_pinpoints,
+            patch_size=vision_encoder_info.get_image_size(),
+        )
+
+        (
+            unpadded_feature_size,
+            newline_feature_size,
+        ) = self._get_num_unpadded_features(
+            original_height=image_height,
+            original_width=image_width,
+            npatches=downsampled_grid,
+            num_patch_height=num_patch_height,
+            num_patch_width=num_patch_width,
+        )
+
+        return unpadded_feature_size + newline_feature_size + base_feature_size
+
+
+class Granite4VisionMultiModalProcessor(
+    BaseLlavaNextMultiModalProcessor[Granite4VisionProcessingInfo]
+):
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(
+            pixel_values=MultiModalFieldConfig.batched("image"),
+            image_sizes=MultiModalFieldConfig.batched("image"),
+        )
+
+
+# ---------------------------------------------------------------------------
+# Top-level model
+# ---------------------------------------------------------------------------
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Granite4VisionMultiModalProcessor,
+    info=Granite4VisionProcessingInfo,
+    dummy_inputs=LlavaDummyInputsBuilder,
+)
+class Granite4VisionForConditionalGeneration(
+    nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP
+):
+    """vLLM implementation of Granite 4 Vision.
+
+    Architecture:
+    - SigLIP vision tower -> WindowQFormerDownsampler projectors
+    - Deepstack: 4 vision layers projected and injected at 4 LLM layers
+    - Spatial: 4 offset groups from last vision layer injected at 4 more LLM layers
+    - Granite language backbone with embedding_multiplier
+    - logits_scaling via LogitsProcessor
+
+    The outer model runs the LLM layer loop directly (like HF does) to inject
+    deepstack features. This avoids wrapping the inner model and keeps weight
+    loading simple.
+
+    LoRA support:
+    - Full merge: --hf-overrides '{"adapter_path": "path/to/lora"}' merges
+      LM-only LoRA deltas at load time (W += scaling * B @ A).
+    - Native LoRA: --enable-lora --default-mm-loras '{"image": "path/to/lora"}'
+      lets vLLM runtime serve LM LoRA per-request.
+    Both modes expect a LM-only adapter (no modules_to_save).
+    """
+
+    # LoRA class attributes (matches GraniteForCausalLM)
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+    embedding_modules = {}
+
+    # Weight mapping: HF checkpoint -> vLLM parameter names
+    # HF: model.language_model.layers.0...
+    # vLLM: language_model.model.layers.0...
+    # (because GraniteForCausalLM.model = GraniteModel)
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "model.language_model.": "language_model.model.",
+            "model.layerwise_projectors.": "layerwise_projectors.",
+            "model.spatial_projectors.": "spatial_projectors.",
+            "model.image_newline": "image_newline",
+            "model.vision_tower.": "vision_tower.",
+            "lm_head.": "language_model.lm_head.",
+        }
+    )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return "<image>"
+        raise ValueError(f"Only image modality is supported, got {modality}")
+
+    def get_mm_mapping(self) -> MultiModelKeys:
+        return MultiModelKeys.from_string_field(
+            language_model="language_model",
+            connector=["layerwise_projectors", "spatial_projectors"],
+            tower_model="vision_tower",
+        )
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.vllm_config = vllm_config
+
+        # ----- Vision tower + projectors (marked as tower) -----
+        with self._mark_tower_model(vllm_config, "image"):
+            # Do NOT use init_vision_tower_for_llava here — it truncates the
+            # encoder to vision_feature_layer depth. Deepstack needs ALL hidden
+            # states (deepstack_layer_map uses negative indices into the full
+            # encoder output list).
+            self.vision_tower = SiglipVisionModel(
+                config.vision_config,
+                quant_config=quant_config,
+                require_post_norm=False,
+                prefix=maybe_prefix(prefix, "vision_tower"),
+            )
+
+            # image_newline parameter
+            if config.use_image_newline_parameter:
+                self.image_newline = nn.Parameter(
+                    torch.empty(config.text_config.hidden_size)
+                )
+            else:
+                self.image_newline = None
+
+            cache_config = vllm_config.cache_config
+
+            # Deepstack projectors: one per (vision_layer, llm_layer) pair
+            self.layerwise_projectors = nn.ModuleList(
+                [
+                    WindowQFormerDownsampler(
+                        config,
+                        quant_config=quant_config,
+                        cache_config=cache_config,
+                        prefix=maybe_prefix(prefix, f"layerwise_projectors.{i}"),
+                    )
+                    for i in range(len(config.deepstack_layer_map))
+                ]
+            )
+
+            # Spatial projectors: 4 offset groups
+            self.spatial_projectors = None
+            if config.use_spatial_sampling:
+                self.spatial_projectors = nn.ModuleList(
+                    [
+                        WindowQFormerDownsampler(
+                            config,
+                            quant_config=quant_config,
+                            cache_config=cache_config,
+                            spatial_offset=i,
+                            prefix=maybe_prefix(prefix, f"spatial_projectors.{i}"),
+                        )
+                        for i in range(4)
+                    ]
+                )
+
+        # ----- Language model (marked as LM) -----
+        with self._mark_language_model(vllm_config):
+            self.language_model = Granite4VisionLLMForCausalLM(
+                vllm_config=vllm_config.with_hf_config(config.text_config),
+                prefix=maybe_prefix(prefix, "language_model"),
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+        # Store config values we need
+        self._deepstack_layer_map = config.deepstack_layer_map  # [[-19, 9], ...]
+        self._use_spatial_sampling = getattr(config, "use_spatial_sampling", False)
+        self._spatial_vision_layer = getattr(config, "spatial_vision_layer", -1)
+        self._spatial_target_layers = getattr(config, "spatial_target_layers", [])
+        self._vision_feature_select_strategy = getattr(
+            config, "vision_feature_select_strategy", "full"
+        )
+        self._downsample_rate = Fraction(config.downsample_rate)
+
+        # Ordered list of LLM layer indices for each deepstack level.
+        # Pre-populated from config so it's available during CUDA graph capture
+        # (before any embed_multimodal call).
+        self._ds_layer_indices: list[int] = [
+            llm_layer for _, llm_layer in config.deepstack_layer_map
+        ] + list(getattr(config, "spatial_target_layers", []))
+
+        # Share ds_layer_indices with the LLM causal model so
+        # make_empty_intermediate_tensors includes the correct keys
+        # (its self.config is text_config, no deepstack_layer_map).
+        self.language_model._ds_layer_indices = self._ds_layer_indices
+
+        # Pre-allocated persistent GPU buffers for deepstack features.
+        # Written via .copy_() in embed_input_ids(), read by forward() via a
+        # slice. Because the buffer address is fixed, CUDA graph replay sees
+        # the updated values written just before each prefill.
+        # Shape: (max_num_batched_tokens, lm_hidden_size) per level.
+        n_layerwise = len(config.deepstack_layer_map)
+        n_spatial = len(getattr(config, "spatial_target_layers", []))
+        num_ds_levels = n_layerwise + n_spatial
+        lm_hidden = config.text_config.hidden_size
+        max_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        # Allocated on CPU first; moved to GPU in embed_input_ids on first use.
+        self._ds_buffers: list[torch.Tensor] = [
+            torch.zeros(max_tokens, lm_hidden) for _ in range(num_ds_levels)
+        ]
+        self._ds_num_tokens: int = 0  # tokens written in last embed_input_ids call
+
+    # ----- Vision feature extraction -----
+
+    def _get_vision_hidden_states(
+        self, pixel_values: torch.Tensor
+    ) -> list[torch.Tensor]:
+        """Run vision tower and return all hidden states (including input embeddings).
+
+        Uses SiglipEncoder's built-in return_all_hidden_states support.
+        Returns list[Tensor] where index 0 = embeddings, index i = after layer i-1.
+        """
+        vt = self.vision_tower
+        vm = vt.vision_model if hasattr(vt, "vision_model") else vt
+
+        hidden_states = vm.embeddings(pixel_values)
+        all_hidden_states = vm.encoder(
+            inputs_embeds=hidden_states,
+            return_all_hidden_states=True,
+        )
+        return all_hidden_states
+
+    def _pack_and_unpad_image_features(
+        self,
+        image_features: list[torch.Tensor] | tuple[torch.Tensor, ...],
+        image_sizes: torch.Tensor,
+    ) -> list[torch.Tensor]:
+        """Reshape, unpad, and pack image features.
+
+        Matches HF Granite4VisionModel.pack_and_unpad_image_features exactly.
+        """
+        config = self.config
+        ds_rate = self._downsample_rate
+        new_image_features = []
+
+        for image_idx, image_feature in enumerate(image_features):
+            if image_feature.shape[0] > 1:
+                # Multi-patch: first is base, rest are high-res
+                base_image_feature = image_feature[0]
+                image_feature = image_feature[1:]
+
+                height = width = (
+                    config.vision_config.image_size // config.vision_config.patch_size
+                )
+                # After QFormer downsampling
+                height = int(height * ds_rate)
+                width = int(width * ds_rate)
+
+                num_patch_height, num_patch_width = get_anyres_image_grid_shape(
+                    image_sizes[image_idx],
+                    config.image_grid_pinpoints,
+                    config.vision_config.image_size,
+                )
+
+                image_feature = image_feature.view(
+                    num_patch_height, num_patch_width, height, width, -1
+                )
+                image_feature = (
+                    image_feature.permute(4, 0, 2, 1, 3)
+                    .contiguous()
+                    .flatten(1, 2)
+                    .flatten(2, 3)
+                )
+                image_feature = unpad_image(image_feature, image_sizes[image_idx])
+
+                if self.image_newline is not None:
+                    image_feature = torch.cat(
+                        (
+                            image_feature,
+                            self.image_newline[:, None, None]
+                            .expand(*image_feature.shape[:-1], 1)
+                            .to(image_feature.device, image_feature.dtype),
+                        ),
+                        dim=-1,
+                    )
+
+                image_feature = image_feature.flatten(1, 2).transpose(0, 1)
+                image_feature = torch.cat((base_image_feature, image_feature), dim=0)
+            else:
+                image_feature = image_feature[0]
+                if self.image_newline is not None:
+                    image_feature = torch.cat(
+                        (image_feature, self.image_newline[None].to(image_feature)),
+                        dim=0,
+                    )
+
+            new_image_features.append(image_feature)
+
+        return new_image_features
+
+    def _get_all_layer_features(
+        self,
+        pixel_values: torch.Tensor,
+        image_sizes: torch.Tensor,
+    ) -> tuple[list[int], list[torch.Tensor]]:
+        """Extract deepstack + spatial features for all levels.
+
+        Returns:
+          llm_layer_indices: ordered list of target LLM layer indices
+          per_image_packed:  one tensor per image, shape
+                             (num_tokens_i, lm_hidden_size * num_levels),
+                             all levels packed on dim=-1.
+
+        Packing on dim=-1 means the framework's token-level slicing for
+        chunked prefill preserves all levels intact.
+        """
+        select_strategy = self._vision_feature_select_strategy
+
+        image_num_patches = [
+            image_size_to_num_patches(
+                image_size=imsize,
+                grid_pinpoints=self.config.image_grid_pinpoints,
+                patch_size=self.config.vision_config.image_size,
+            )
+            for imsize in image_sizes
+        ]
+
+        if pixel_values.dim() == 5:
+            pixel_values = torch.cat(
+                [pv[:np_] for pv, np_ in zip(pixel_values, image_num_patches)],
+                dim=0,
+            )
+
+        all_hidden_states = self._get_vision_hidden_states(pixel_values)
+
+        # Collect per-level: (llm_layer, [per_image_tensor, ...])
+        levels: list[tuple[int, list[torch.Tensor]]] = []
+
+        for proj_idx, (vision_layer, llm_layer) in enumerate(self._deepstack_layer_map):
+            selected = all_hidden_states[vision_layer]
+            if select_strategy == "default":
+                selected = selected[:, 1:]
+            projected = self.layerwise_projectors[proj_idx](selected)
+            per_image = self._pack_and_unpad_image_features(
+                torch.split(projected, image_num_patches, dim=0), image_sizes
+            )
+            levels.append((llm_layer, per_image))
+
+        if self._use_spatial_sampling and self.spatial_projectors is not None:
+            spatial_hidden = all_hidden_states[self._spatial_vision_layer]
+            if select_strategy == "default":
+                spatial_hidden = spatial_hidden[:, 1:]
+            for group_idx, llm_layer in enumerate(self._spatial_target_layers):
+                projected = self.spatial_projectors[group_idx](spatial_hidden)
+                per_image = self._pack_and_unpad_image_features(
+                    torch.split(projected, image_num_patches, dim=0), image_sizes
+                )
+                levels.append((llm_layer, per_image))
+
+        llm_layer_indices = [llm_layer for llm_layer, _ in levels]
+        num_images = len(image_sizes)
+        per_image_packed = [
+            torch.cat([levels[lvl][1][img] for lvl in range(len(levels))], dim=-1)
+            for img in range(num_images)
+        ]
+
+        return llm_layer_indices, per_image_packed
+
+    # ----- Multimodal interface -----
+
+    def _parse_and_validate_image_input(
+        self, **kwargs: object
+    ) -> LlavaNextImageInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        image_sizes = kwargs.pop("image_sizes", None)
+        image_embeds = kwargs.pop("image_embeds", None)
+
+        if pixel_values is None and image_embeds is None:
+            return None
+
+        if pixel_values is not None:
+            expected_h = expected_w = self.config.vision_config.image_size
+            return LlavaNextImagePixelInputs(
+                type="pixel_values",
+                pixel_values=pixel_values,
+                image_sizes=image_sizes,
+                resolve_bindings={"h": expected_h, "w": expected_w},
+            )
+
+        if image_embeds is not None:
+            return LlavaNextImageEmbeddingInputs(
+                type="image_embeds",
+                data=image_embeds,
+            )
+
+        raise AssertionError("Unreachable")
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        """Run vision tower and return per-image packed feature tensors.
+
+        Each returned tensor has shape (num_tokens_i, lm_hidden_size * num_levels)
+        with all deepstack levels packed on dim=-1. The framework caches these
+        tensors and slices along dim=0 for chunked prefill — all levels survive
+        intact because slicing is token-wise, not feature-wise.
+
+        embed_input_ids() splits the packed tensor back into per-level buffers.
+        """
+        image_input = self._parse_and_validate_image_input(**kwargs)
+        if image_input is None:
+            return []
+
+        if image_input["type"] == "image_embeds":
+            return [image_input["data"]]
+
+        pixel_values = image_input["pixel_values"]
+        image_sizes = image_input.get("image_sizes")
+
+        if isinstance(pixel_values, list):
+            pixel_values = torch.cat(pixel_values, dim=0)
+
+        llm_layer_indices, per_image_packed = self._get_all_layer_features(
+            pixel_values, image_sizes
+        )
+        self._ds_layer_indices = llm_layer_indices
+        return per_image_packed
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+        handle_oov_mm_token: bool = True,
+    ) -> torch.Tensor:
+        """Merge text and vision embeddings, apply embedding_multiplier.
+
+        HF flow:
+        1. inputs_embeds = embed_tokens(input_ids)
+        2. inputs_embeds.masked_fill(vision_mask, 0.0)
+        3. hidden_states = inputs_embeds * embedding_multiplier
+        4. layer loop injects deepstack features at target layers
+
+        multimodal_embeddings contains packed tensors from embed_multimodal():
+        shape (num_tokens_i, lm_hidden_size * num_levels). We split on dim=-1
+        to get per-level features, build batch-sized buffers (zero at text
+        positions), and store in self._ds_features for forward().
+        """
+        lm_inner = self.language_model.model
+
+        has_vision = (
+            multimodal_embeddings is not None
+            and is_multimodal is not None
+            and len(multimodal_embeddings) > 0
+            and is_multimodal.any()
+        )
+
+        if not has_vision:
+            self._ds_num_tokens = 0
+            embeds = lm_inner.embed_input_ids(input_ids)
+            return embeds * lm_inner.config.embedding_multiplier
+
+        # 1. Text embeddings
+        text_embeds = lm_inner.embed_input_ids(input_ids)
+
+        # 2. Zero image positions (matches HF masked_fill(vision_mask, 0.0))
+        text_embeds[is_multimodal] = 0.0
+
+        # 3. Apply embedding_multiplier
+        inputs_embeds = text_embeds * lm_inner.config.embedding_multiplier
+
+        # 4. Split packed tensors into per-level features and build buffers.
+        #    multimodal_embeddings is a list of per-image packed tensors
+        #    (possibly a chunk slice from the framework's encoder cache).
+        #    Concatenate along token dim → (total_mm_tokens, lm_h * num_levels).
+        N, lm_h = inputs_embeds.shape
+        all_packed = torch.cat(
+            [t.to(dtype=inputs_embeds.dtype) for t in multimodal_embeddings],
+            dim=0,
+        )
+        level_features = all_packed.split(lm_h, dim=-1)  # num_levels tensors
+
+        # Ensure persistent buffers are on the right device/dtype (first call).
+        buf0 = self._ds_buffers[0]
+        if buf0.device != inputs_embeds.device or buf0.dtype != inputs_embeds.dtype:
+            self._ds_buffers = [
+                b.to(device=inputs_embeds.device, dtype=inputs_embeds.dtype)
+                for b in self._ds_buffers
+            ]
+
+        for level_idx in range(len(self._ds_layer_indices)):
+            target = self._ds_buffers[level_idx][:N]
+            target.zero_()
+            target[is_multimodal] = level_features[level_idx]
+
+        self._ds_num_tokens = N
+        return inputs_embeds
+
+    # ----- Forward -----
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> torch.Tensor | IntermediateTensors:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        # Build IntermediateTensors from pre-allocated persistent buffers.
+        # Always pass deepstack when inputs_embeds is non-None (prefill path),
+        # including during CUDA graph capture (buffers are zero → no-op injection).
+        # This ensures the graph captures the injection code path.
+        if (
+            inputs_embeds is not None
+            and get_pp_group().is_first_rank
+            and self._ds_layer_indices
+        ):
+            n = inputs_embeds.size(0)
+            ds: IntermediateTensors | None = IntermediateTensors(
+                {
+                    f"ds_{llm_layer}": self._ds_buffers[lvl][:n]
+                    for lvl, llm_layer in enumerate(self._ds_layer_indices)
+                }
+            )
+        else:
+            ds = None
+
+        hidden_states = self.language_model.model(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+            deepstack_input_embeds=ds,
+        )
+
+        # Clear buffers after use so stale features don't leak into the next request.
+        if (
+            inputs_embeds is not None
+            and get_pp_group().is_first_rank
+            and self._ds_num_tokens > 0
+        ):
+            n = self._ds_num_tokens
+            for buf in self._ds_buffers:
+                buf[:n].zero_()
+            self._ds_num_tokens = 0
+
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        # GraniteForCausalLM.compute_logits uses
+        # LogitsProcessor(scale=1/logits_scaling)
+        return self.language_model.compute_logits(hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py
index dca54425c706..5b4959dc2055 100644
--- a/vllm/model_executor/models/granite_speech.py
+++ b/vllm/model_executor/models/granite_speech.py
@@ -26,9 +26,8 @@
 
 import math
 from collections.abc import Iterable, Mapping
-from typing import Annotated, Literal
+from typing import Annotated
 
-import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
@@ -36,6 +35,7 @@
 
 from vllm.config import CacheConfig, ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import MultiModalDataDict, PromptType, TokensPrompt
 from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -143,7 +143,7 @@ def _get_mm_fields_config(
     ) -> Mapping[str, MultiModalFieldConfig]:
         return dict(
             input_features=MultiModalFieldConfig.batched("audio"),
-            audio_embed_sizes=MultiModalFieldConfig.batched("audio"),
+            audio_embed_sizes=MultiModalFieldConfig.batched("audio", keep_on_cpu=True),
         )
 
     def _get_prompt_updates(
@@ -389,10 +389,8 @@ def forward(
         # shaw's relative positional embedding
         dist = attention_dists.to(hidden_states.device)
         rel_pos_emb = self.rel_pos_emb(dist)
-        rel_pos_emb_expanded = rel_pos_emb.view([1, 1, 1] + list(rel_pos_emb.shape))
         pos_attn = (
-            torch.sum(query_states.unsqueeze(-2) * rel_pos_emb_expanded, dim=-1)
-            * self.scale
+            torch.einsum("bnhid,ijd->bnhij", query_states, rel_pos_emb) * self.scale
         )
 
         if remainder > 0:
@@ -620,7 +618,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.encoder = GraniteSpeechCTCEncoder(
                 config=config.encoder_config,
                 quant_config=quant_config,
-                prefix=f"{prefix}.encoder",
+                prefix=maybe_prefix(prefix, "encoder"),
             )
 
             # Blip2 QFormer
@@ -628,7 +626,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 config=config,
                 quant_config=quant_config,
                 cache_config=cache_config,
-                prefix=f"{prefix}.projector",
+                prefix=maybe_prefix(prefix, "projector"),
             )
 
         self.make_empty_intermediate_tensors = (
@@ -717,13 +715,13 @@ def _build_input_features_mask(
             torch.Tensor: Mask of shape (bsz, num_features) to be applied to
             the audio features prior to splitting the audio embeddings.
         """
-        most_audio_features = torch.max(audio_embed_sizes).item()
-        mask_indices = torch.arange(
-            most_audio_features,
-            device=audio_embed_sizes.device,
-        ).view(1, -1)
+        most_audio_features = int(torch.max(audio_embed_sizes))
+        mask_indices = torch.arange(most_audio_features).view(1, -1)
         input_features_mask = mask_indices < audio_embed_sizes.view(-1, 1)
-        return input_features_mask
+        target_device = self.encoder.input_linear.weight.device
+        if target_device == input_features_mask.device:
+            return input_features_mask
+        return input_features_mask.pin_memory().to(target_device, non_blocking=True)
 
     def _pad_and_stack_input_features(
         self,
@@ -852,15 +850,14 @@ def get_mm_mapping(self) -> MultiModelKeys:
     @classmethod
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
         """Get the generation prompt to be used for transcription requests."""
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        task_type = stt_params.task_type
+        to_language = stt_params.to_language
+
         # Audio placeholders don't use an index, so value doesn't matter
         audio_tok = cls.get_placeholder_str("audio", 0)
 
diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py
index 171b2e0ec5a0..e3585a6dd746 100644
--- a/vllm/model_executor/models/granitemoe.py
+++ b/vllm/model_executor/models/granitemoe.py
@@ -39,7 +39,10 @@
     tensor_model_parallel_all_gather,
 )
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -104,7 +107,6 @@ def __init__(
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=True,
             quant_config=quant_config,
             tp_size=tp_size,
@@ -352,7 +354,7 @@ def _load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py
index 0bd6a8f3d606..f06122a7fd19 100644
--- a/vllm/model_executor/models/grok1.py
+++ b/vllm/model_executor/models/grok1.py
@@ -38,7 +38,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import GeluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -209,7 +212,6 @@ def __init__(
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=renormalize,
             quant_config=quant_config,
             tp_size=tp_size,
@@ -520,7 +522,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Map expert parameter names to standard names
         num_experts = _get_num_experts(self.config)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name=self.ckpt_gate_proj_name,
             ckpt_down_proj_name=self.ckpt_down_proj_name,
diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py
index a0130402c66f..b900c0ed83ea 100644
--- a/vllm/model_executor/models/hunyuan_v1.py
+++ b/vllm/model_executor/models/hunyuan_v1.py
@@ -39,11 +39,13 @@
     get_ep_group,
     get_pp_group,
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -439,13 +441,12 @@ def __init__(
         else:
             self.shared_mlp = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_mlp,
             num_experts=self.n_routed_experts,
             top_k=top_k,
             hidden_size=config.hidden_size,
             intermediate_size=intermediate_size,
-            reduce_results=False,
             renormalize=top_k > 1,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -464,11 +465,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-        if self.shared_mlp is not None:
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
-
-        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
 
         return final_hidden_states.view(orig_shape)
 
@@ -719,7 +715,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         if _is_moe(self.config):
             # Params for weights, fp8 weight scales, fp8 activation scales
             # (param_name, weight_name, expert_id, shard_id)
-            return SharedFusedMoE.make_expert_params_mapping(
+            return fused_moe_make_expert_params_mapping(
                 self,
                 ckpt_gate_proj_name="gate_proj",
                 ckpt_down_proj_name="down_proj",
@@ -934,7 +930,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.config = config
         self.quant_config = quant_config
 
-        self.model = HunYuanModel(vllm_config=vllm_config, prefix="model")
+        self.model = HunYuanModel(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+        )
         if get_pp_group().is_last_rank:
             self.lm_head = ParallelLMHead(
                 config.vocab_size,
diff --git a/vllm/model_executor/models/hy_v3.py b/vllm/model_executor/models/hy_v3.py
new file mode 100644
index 000000000000..bfff84b80490
--- /dev/null
+++ b/vllm/model_executor/models/hy_v3.py
@@ -0,0 +1,707 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# coding=utf-8
+# Copyright 2026 The HY team.
+# Copyright 2023 The vLLM team.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Inference-only HY model compatible with HuggingFace weights."""
+
+import typing
+from collections.abc import Callable, Iterable
+from itertools import islice
+from typing import Any
+
+import torch
+from torch import nn
+from transformers import PretrainedConfig
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config
+from vllm.distributed import (
+    get_ep_group,
+    get_pp_group,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.activation import SiluAndMul
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.fused_moe import FusedMoE, GateLinear
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.transformers_utils.configs.hy_v3 import HYV3Config
+
+from .interfaces import SupportsLoRA, SupportsPP
+from .utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+class HYV3FeedForward(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = True,
+        expert_gate: torch.nn.Linear | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size,
+            [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            prefix=f"{prefix}.down_proj",
+        )
+        if hidden_act != "silu":
+            raise ValueError(
+                f"Unsupported activation: {hidden_act}. Only silu is supported for now."
+            )
+        self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        gate_up, _ = self.gate_up_proj(x)
+        out = self.act_fn(gate_up)
+        out, _ = self.down_proj(out)
+        return out
+
+
+class HYV3MoEFused(nn.Module):
+    def __init__(
+        self,
+        config: HYV3Config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        enable_eplb: bool = False,
+    ):
+        super().__init__()
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.ep_group = get_ep_group().device_group
+        self.ep_rank = get_ep_group().rank_in_group
+        self.ep_size = self.ep_group.size()
+        self.n_routed_experts = config.num_experts
+        if self.tp_size > config.num_experts:
+            raise ValueError(
+                f"Tensor parallel size {self.tp_size} is greater than "
+                f"the number of experts {config.num_experts}."
+            )
+        top_k = config.num_experts_per_tok
+        intermediate_size = config.expert_hidden_dim
+        router_scaling_factor = getattr(config, "router_scaling_factor", 1.0)
+        vllm_config = get_current_vllm_config()
+        eplb_config = vllm_config.parallel_config.eplb_config
+        self.enable_eplb = enable_eplb
+
+        self.n_logical_experts = self.n_routed_experts
+        self.n_redundant_experts = eplb_config.num_redundant_experts
+        self.n_physical_experts = self.n_logical_experts + self.n_redundant_experts
+        self.n_local_physical_experts = self.n_physical_experts // self.ep_size
+        self.physical_expert_start = self.ep_rank * self.n_local_physical_experts
+        self.physical_expert_end = (
+            self.physical_expert_start + self.n_local_physical_experts
+        )
+        self.gate = GateLinear(
+            config.hidden_size,
+            config.num_experts,
+            bias=False,
+            out_dtype=torch.float32,
+            params_dtype=torch.float32,
+            prefix=f"{prefix}.gate",
+        )
+
+        if config.num_shared_experts > 0:
+            self.shared_mlp = HYV3FeedForward(
+                hidden_size=config.hidden_size,
+                intermediate_size=config.expert_hidden_dim * config.num_shared_experts,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                prefix=f"{prefix}",
+                reduce_results=False,
+            )
+        else:
+            self.shared_mlp = None
+
+        self.expert_bias = nn.Parameter(torch.empty(config.num_experts))
+        scoring_func = "sigmoid"
+        e_score_correction_bias = self.expert_bias
+
+        self.experts = FusedMoE(
+            num_experts=self.n_routed_experts,
+            top_k=top_k,
+            hidden_size=config.hidden_size,
+            intermediate_size=intermediate_size,
+            renormalize=config.route_norm,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            enable_eplb=self.enable_eplb,
+            num_redundant_experts=self.n_redundant_experts,
+            scoring_func=scoring_func,
+            use_grouped_topk=True,
+            num_expert_group=1,
+            topk_group=1,
+            routed_scaling_factor=router_scaling_factor,
+            e_score_correction_bias=e_score_correction_bias,
+            n_shared_experts=config.num_shared_experts,
+            shared_experts=self.shared_mlp,
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        orig_shape = hidden_states.shape
+        hidden_dim = hidden_states.shape[-1]
+        hidden_states = hidden_states.view(-1, hidden_dim)
+
+        # router_logits: (num_tokens, n_experts)
+        router_logits, _ = self.gate(hidden_states)
+
+        final_hidden_states = self.experts(
+            hidden_states=hidden_states, router_logits=router_logits
+        )
+        return final_hidden_states.view(orig_shape)
+
+
+class HYV3Attention(nn.Module):
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        rope_parameters: dict[str, Any],
+        max_position_embeddings: int = 8192,
+        head_dim: int | None = None,
+        rms_norm_eps: float = 1e-5,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        dual_chunk_attention_config: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            # Number of KV heads is greater than TP size, so we partition
+            # the KV heads across multiple tensor parallel GPUs.
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            # Number of KV heads is less than TP size, so we replicate
+            # the KV heads across multiple tensor parallel GPUs.
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+
+        if hasattr(config, "head_dim") and config.head_dim:
+            self.head_dim = config.head_dim
+        else:
+            self.head_dim = head_dim or (hidden_size // self.total_num_heads)
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+        self.use_qk_norm = getattr(config, "qk_norm", False)
+        self.max_position_embeddings = max_position_embeddings
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            quant_config=quant_config,
+            bias=None,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,
+        )
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+        if self.use_qk_norm:
+            self.q_norm = RMSNorm(self.head_dim, rms_norm_eps)
+            self.k_norm = RMSNorm(self.head_dim, rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+        output_shape = None
+        if self.use_qk_norm:
+            q_by_head = q.view(
+                *q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim
+            )
+            q_by_head = self.q_norm(q_by_head)
+            q = q_by_head.view(q.shape)
+
+            k_by_head = k.view(
+                *k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim
+            )
+            k_by_head = self.k_norm(k_by_head)
+            k = k_by_head.view(k.shape)
+        q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v, output_shape)
+        attn_output = attn_output.view(q.shape[0], -1)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class HYV3DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        layer_idx = int(prefix.split(".")[-1])
+        max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
+        self.self_attn = HYV3Attention(
+            config=config,
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=config.num_key_value_heads,
+            rope_parameters=config.rope_parameters,
+            max_position_embeddings=max_position_embeddings,
+            head_dim=config.head_dim,
+            rms_norm_eps=config.rms_norm_eps,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.self_attn",
+        )
+        self.input_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+        if not hasattr(config, "first_k_dense_replace"):
+            raise ValueError("first_k_dense_replace not exist,please check config")
+        if layer_idx < config.first_k_dense_replace:
+            self.mlp = HYV3FeedForward(
+                hidden_size=config.hidden_size,
+                intermediate_size=config.intermediate_size,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+            )
+            self.block_type = "feedforward"
+        else:
+            self.mlp = HYV3MoEFused(
+                config=config, quant_config=quant_config, prefix=f"{prefix}.mlp"
+            )
+            self.block_type = "moe"
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+        idx: int = -1,
+    ) -> torch.Tensor:
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+
+        hidden_states = self.mlp(hidden_states)
+
+        return hidden_states, residual
+
+
+@support_torch_compile
+class HYV3Model(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+
+        parallel_config = vllm_config.parallel_config
+        eplb_config = parallel_config.eplb_config
+        self.num_redundant_experts = eplb_config.num_redundant_experts
+
+        self.vocab_size = config.vocab_size
+        self.config = config
+        self.quant_config = quant_config
+
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+        )
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: HYV3DecoderLayer(
+                config=config,
+                cache_config=cache_config,
+                quant_config=quant_config,
+                prefix=prefix,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+        self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )
+
+        # Set MoE hyperparameters
+        self.expert_weights = []
+        self.num_expert_groups = 1
+        self.moe_layers = []
+        example_layer = None
+        for layer in self.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
+            assert isinstance(layer, HYV3DecoderLayer)
+            if layer.block_type == "moe":
+                example_layer = layer.mlp
+                self.moe_layers.append(layer.mlp.experts)
+
+        if example_layer is None:
+            self.num_moe_layers = 0
+            raise RuntimeError("No MoE layer found in model.layers.")
+
+        self.num_moe_layers = len(self.moe_layers)
+        self.num_logical_experts = getattr(example_layer, "n_logical_experts", None)
+        self.num_physical_experts = getattr(example_layer, "n_physical_experts", None)
+        self.num_local_physical_experts = getattr(
+            example_layer, "n_local_physical_experts", None
+        )
+        self.num_routed_experts = getattr(example_layer, "n_routed_experts", None)
+        self.num_redundant_experts = getattr(example_layer, "n_redundant_experts", None)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def update_physical_experts_metadata(
+        self,
+        num_physical_experts: int,
+        num_local_physical_experts: int,
+    ) -> None:
+        assert self.num_local_physical_experts == num_local_physical_experts
+        self.num_physical_experts = num_physical_experts
+        self.num_local_physical_experts = num_local_physical_experts
+        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
+        for layer in self.layers:
+            if isinstance(layer.mlp, HYV3MoEFused):
+                moe = layer.mlp
+                moe.n_local_physical_experts = num_local_physical_experts
+                moe.n_physical_experts = num_physical_experts
+                moe.n_redundant_experts = self.num_redundant_experts
+                moe.experts.update_expert_map()
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return FusedMoE.make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.num_experts,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+
+        for idx, layer in enumerate(
+            islice(self.layers, self.start_layer, self.end_layer)
+        ):
+            hidden_states, residual = layer(positions, hidden_states, residual, idx=idx)
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+
+        hidden_states = hidden_states + residual
+        residual = hidden_states
+
+        hidden_states = self.norm(hidden_states)
+
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        expert_params_mapping = self.get_expert_mapping()
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if self.config.tie_word_embeddings and "lm_head.weight" in name:
+                continue
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = (
+                    loaded_weight if loaded_weight.dim() == 0 else loaded_weight[0]
+                )
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+            if "scale" in name:
+                # Remapping the name of FP8 kv-scale.
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+            is_found = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if "mlp.experts" in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+
+                # Skip layers on other devices.
+                if is_pp_missing_parameter(name, self):
+                    continue
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                is_found = True
+                break
+            if is_found:
+                continue
+
+            if name.endswith(".bias") and name not in params_dict:
+                continue
+            is_expert_weight = False
+            for mapping in expert_params_mapping:
+                param_name, weight_name, expert_id, shard_id = mapping
+                if weight_name not in name:
+                    continue
+                is_expert_weight = True
+                name_mapped = name.replace(weight_name, param_name)
+                # Skip layers on other devices.
+                if is_pp_missing_parameter(name_mapped, self):
+                    continue
+
+                param = params_dict[name_mapped]
+                weight_loader = typing.cast(Callable[..., bool], param.weight_loader)
+                success = weight_loader(
+                    param,
+                    loaded_weight,
+                    name_mapped,
+                    shard_id=shard_id,
+                    expert_id=expert_id,
+                    return_success=True,
+                )
+                if success:
+                    name = name_mapped
+                    break
+            else:
+                if is_expert_weight:
+                    # We've checked that this is an expert weight
+                    # However it's not mapped locally to this rank
+                    # So we simply skip it
+                    continue
+                if name is None:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+                if "router.gate." in name:
+                    name = name.replace("router.", "")
+
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+
+def get_spec_layer_idx_from_weight_name(
+    config: PretrainedConfig, weight_name: str
+) -> int | None:
+    # HYV3MTP is enabled only when num_nextn_predict_layers is greater than 1
+    if (
+        hasattr(config, "num_nextn_predict_layers")
+        and config.num_nextn_predict_layers > 0
+    ):
+        layer_idx = config.num_hidden_layers
+        for i in range(config.num_nextn_predict_layers):
+            if weight_name.startswith(f"model.layers.{layer_idx + i}."):
+                return layer_idx + i
+    return None
+
+
+class HYV3ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        parallel_config = vllm_config.parallel_config
+        eplb_config = parallel_config.eplb_config
+        self.num_redundant_experts = eplb_config.num_redundant_experts
+
+        self.model = HYV3Model(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        self.lm_head = ParallelLMHead(
+            config.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if self.config.tie_word_embeddings:
+            self.lm_head.weight = self.model.embed_tokens.weight
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        def _filter_weights(weights):
+            for name, weight in weights:
+                spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
+                if spec_layer is not None:
+                    continue
+                yield name, weight
+
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
+        )
+        return loader.load_weights(_filter_weights(weights))
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
diff --git a/vllm/model_executor/models/hy_v3_mtp.py b/vllm/model_executor/models/hy_v3_mtp.py
new file mode 100644
index 000000000000..8594a38c3ab9
--- /dev/null
+++ b/vllm/model_executor/models/hy_v3_mtp.py
@@ -0,0 +1,470 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# coding=utf-8
+# Copyright 2026 The HY team.
+# Copyright 2023 The vLLM team.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Inference-only HY V3 MTP model compatible with HuggingFace weights."""
+
+from collections.abc import Iterable
+
+import regex as re
+import torch
+from torch import nn
+from transformers import PretrainedConfig
+
+from vllm.config import CacheConfig, ModelConfig, VllmConfig
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.v1.outputs import SamplerOutput
+from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.sample.sampler import Sampler
+
+from .hy_v3 import HYV3DecoderLayer, get_spec_layer_idx_from_weight_name
+from .utils import is_pp_missing_parameter, maybe_prefix
+
+
+def _is_moe(config: PretrainedConfig) -> bool:
+    return bool(
+        getattr(config, "num_experts", None)
+        and (
+            (isinstance(config.num_experts, int) and config.num_experts > 1)
+            or (isinstance(config.num_experts, list) and max(config.num_experts) > 1)
+        )
+    )
+
+
+def _get_cla_factor(config: PretrainedConfig) -> int:
+    if not getattr(config, "use_cla", False):
+        return 1
+    return getattr(config, "cla_share_factor", 1)
+
+
+class HYV3SharedHead(nn.Module):
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        quant_config: QuantizationConfig | None = None,
+    ) -> None:
+        super().__init__()
+        self.head = ParallelLMHead(
+            config.vocab_size, config.hidden_size, quant_config=quant_config
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        return hidden_states
+
+
+class HYV3MultiTokenPredictorLayer(nn.Module):
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        prefix: str,
+        model_config: ModelConfig,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+    ) -> None:
+        super().__init__()
+
+        self.enorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.eh_proj = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False)
+        self.shared_head = HYV3SharedHead(config=config, quant_config=quant_config)
+        self.mtp_block = HYV3DecoderLayer(
+            config=config,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+        # Final layernorm applied after transformer block, before logits
+        # projection (matches HF HYV3MTPDecoderLayer.final_layernorm)
+        self.final_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_index: int = 0,
+    ) -> torch.Tensor:
+        assert inputs_embeds is not None
+        # masking inputs at position 0, as not needed by MTP
+        inputs_embeds[positions == 0] = 0
+        inputs_embeds = self.enorm(inputs_embeds)
+        previous_hidden_states = self.hnorm(previous_hidden_states)
+
+        hidden_states = self.eh_proj(
+            torch.cat([inputs_embeds, previous_hidden_states], dim=-1)
+        )
+
+        # HYV3DecoderLayer returns (hidden_states, residual)
+        hidden_states, residual = self.mtp_block(
+            positions=positions, hidden_states=hidden_states, residual=None
+        )
+        hidden_states = residual + hidden_states
+        hidden_states = self.final_layernorm(hidden_states)
+        return hidden_states
+
+
+class HYV3MultiTokenPredictor(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.mtp_start_layer_idx = config.num_hidden_layers
+        self.num_mtp_layers = config.num_nextn_predict_layers
+
+        # to map the exact layer index from weights
+        self.layers = torch.nn.ModuleDict(
+            {
+                str(idx): HYV3MultiTokenPredictorLayer(
+                    config,
+                    f"{prefix}.layers.{idx}",
+                    model_config=vllm_config.model_config,
+                    cache_config=vllm_config.cache_config,
+                    quant_config=vllm_config.quant_config,
+                )
+                for idx in range(
+                    self.mtp_start_layer_idx,
+                    self.mtp_start_layer_idx + self.num_mtp_layers,
+                )
+            }
+        )
+
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+        )
+
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        return self.layers[str(self.mtp_start_layer_idx + current_step_idx)](
+            input_ids,
+            positions,
+            previous_hidden_states,
+            inputs_embeds,
+            current_step_idx,
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        mtp_layer = self.layers[str(self.mtp_start_layer_idx + current_step_idx)]
+        logits = self.logits_processor(
+            mtp_layer.shared_head.head, mtp_layer.shared_head(hidden_states)
+        )
+        return logits
+
+
+class HYV3MTP(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        self.config = vllm_config.model_config.hf_config
+        self.quant_config = vllm_config.quant_config
+        self.model = HYV3MultiTokenPredictor(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+        self.sampler = Sampler()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        hidden_states = self.model(
+            input_ids, positions, hidden_states, inputs_embeds, spec_step_idx
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor | None:
+        return self.model.compute_logits(hidden_states, spec_step_idx)
+
+    def sample(
+        self,
+        logits: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> SamplerOutput | None:
+        next_tokens = self.sampler(logits, sampling_metadata)
+        return next_tokens
+
+    def _split_qkv_weight(self, qkv: torch.Tensor):
+        num_attention_heads = self.config.num_attention_heads
+        num_kv_heads = getattr(
+            self.config, "num_key_value_heads", self.config.num_attention_heads
+        )
+        num_key_value_groups = num_attention_heads // num_kv_heads
+        hidden_size = self.config.hidden_size
+
+        if hasattr(self.config, "head_dim"):
+            attention_head_dim = self.config.head_dim
+        elif hasattr(self.config, "attention_head_dim"):
+            attention_head_dim = self.config.attention_head_dim
+        else:
+            attention_head_dim = self.config.hidden_size // num_attention_heads
+
+        qkv = qkv.reshape(
+            num_kv_heads, num_key_value_groups + 2, attention_head_dim, hidden_size
+        )
+        q, k, v = torch.split(qkv, (num_key_value_groups, 1, 1), dim=1)
+        q = q.reshape(-1, hidden_size)
+        k = k.reshape(-1, hidden_size)
+        v = v.reshape(-1, hidden_size)
+        return torch.concat((q, k, v))
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        cla_factor = _get_cla_factor(self.config)
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+
+        num_attention_heads = self.config.num_attention_heads
+        num_kv_heads = getattr(
+            self.config, "num_key_value_heads", self.config.num_attention_heads
+        )
+        split_params_mapping = [
+            (".gate_up_proj", ".gate_and_up_proj", 2, [(1, 1), (0, 1)], None),
+            (
+                ".qkv_proj",
+                ".qkv_proj",
+                num_attention_heads + num_kv_heads * 2,
+                [("q", num_attention_heads), ("k", num_kv_heads), ("v", num_kv_heads)],
+                self._split_qkv_weight,
+            ),
+        ]
+
+        if _is_moe(self.config):
+            expert_params_mapping = FusedMoE.make_expert_params_mapping(
+                self,
+                ckpt_gate_proj_name="gate_proj",
+                ckpt_down_proj_name="down_proj",
+                ckpt_up_proj_name="up_proj",
+                num_experts=self.config.num_experts,
+            )
+        else:
+            expert_params_mapping = {}
+
+        params_dict = dict(self.named_parameters())
+
+        # V3 shared weights mapping:
+        # - embed_tokens: from main model's model.embed_tokens.weight
+        # - lm_head: from main model's lm_head.weight → MTP shared_head.head
+        #   (HF infer_mtp uses head_weight=self.lm_head.weight, not the
+        #    checkpoint's model.layers.<N>.shared_head.weight)
+        # - No norm mapping (V3 MTP has no intermediate norm before lm_head)
+        mtp_start = self.config.num_hidden_layers
+        v3_shared_weights = {
+            "model.embed_tokens.weight": "model.embed_tokens.weight",
+            "lm_head.weight": f"model.layers.{mtp_start}.shared_head.head.weight",
+        }
+
+        for name, loaded_weight in weights:
+            # Intercept shared weights before any other processing
+            if name in v3_shared_weights:
+                target_name = v3_shared_weights[name]
+                if target_name in params_dict:
+                    param = params_dict[target_name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                continue
+
+            if "rotary_emb.inv_freq" in name:
+                continue
+            if "gate_proj_bias" in name:
+                name = name.replace("gate_proj_bias", "gate_proj.bias")
+            if "up_proj_bias" in name:
+                name = name.replace("up_proj_bias", "up_proj.bias")
+            if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name:
+                continue
+            if self.config.tie_word_embeddings and "lm_head.weight" in name:
+                continue
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = loaded_weight[0]
+                weight_loader(param, loaded_weight)
+                continue
+            spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
+            if spec_layer is None:
+                continue
+            name = self._rewrite_spec_layer_name(spec_layer, name)
+            # Skip weights that _rewrite_spec_layer_name marked for skipping
+            if name == "__skip__":
+                continue
+            if "scale" in name:
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+            is_found = False
+
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if "mlp.experts" in name:
+                    continue
+                if weight_name == ".q_proj":
+                    match = re.search(r"layers\.\d+", name)
+                    if match:
+                        layer_id = int(match.group(0).split(".")[-1])
+                        if cla_factor > 1 and layer_id % cla_factor != 0:
+                            continue
+                name = name.replace(weight_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+
+                if is_pp_missing_parameter(name, self):
+                    continue
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+
+                is_found = True
+                break
+            if is_found:
+                continue
+
+            for param_name, weight_name, den, split_param, func in split_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+
+                if is_pp_missing_parameter(name, self):
+                    continue
+
+                assert loaded_weight.shape[0] % den == 0
+                units = loaded_weight.shape[0] // den
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                offset = 0
+                for shard_id, num in split_param:
+                    new_offset = offset + num * units
+                    if func:
+                        weight_loader(
+                            param, func(loaded_weight)[offset:new_offset], shard_id
+                        )
+                    else:
+                        weight_loader(param, loaded_weight[offset:new_offset], shard_id)
+                    offset = new_offset
+
+                break
+            else:
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                for mapping in expert_params_mapping:
+                    param_name, weight_name, expert_id, shard_id = mapping
+                    if weight_name not in name:
+                        continue
+                    name = name.replace(weight_name, param_name)
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    param = params_dict[name]
+                    weight_loader = param.weight_loader
+                    weight_loader(
+                        param,
+                        loaded_weight,
+                        name,
+                        shard_id=shard_id,
+                        expert_id=expert_id,
+                    )
+                    break
+                else:
+                    if is_pp_missing_parameter(name, self):
+                        continue
+
+                    if "mlp.gate.wg." in name:
+                        name = name.replace("wg.", "")
+                    # V3 checkpoint: mlp.router.gate -> mlp.gate
+                    if "mlp.router.gate." in name:
+                        name = name.replace("router.gate.", "gate.")
+
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+
+    def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str:
+        """Rewrite spec layer weight names to match vLLM module structure."""
+        # Skip embed_tokens (doesn't exist in V3 MTP checkpoint under spec
+        # layer) and shared_head (we use main model's lm_head instead)
+        if f"model.layers.{spec_layer}.embed_tokens" in name:
+            return "__skip__"
+        if f"model.layers.{spec_layer}.shared_head" in name:
+            return "__skip__"
+
+        spec_layer_weight_names = ["enorm", "hnorm", "eh_proj", "final_layernorm"]
+        spec_layer_weight = False
+        for weight_name in spec_layer_weight_names:
+            if weight_name in name:
+                spec_layer_weight = True
+                break
+        if not spec_layer_weight:
+            # Transformer block weights go under .mtp_block
+            name = name.replace(
+                f"model.layers.{spec_layer}.", f"model.layers.{spec_layer}.mtp_block."
+            )
+        return name
diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py
index d3ffdd4cf29a..cf7d51c86c13 100644
--- a/vllm/model_executor/models/idefics3.py
+++ b/vllm/model_executor/models/idefics3.py
@@ -355,7 +355,7 @@ def _get_mm_fields_config(
                 "image", num_patches
             ),
             image_embeds=MultiModalFieldConfig.batched("image"),
-            num_patches=MultiModalFieldConfig.batched("image"),
+            num_patches=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
         )
 
     def _get_prompt_updates(
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
index fa59931d0ed4..3d46bda7ffb9 100644
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -29,7 +29,7 @@
 from transformers.models.whisper.tokenization_whisper import LANGUAGES
 from typing_extensions import Self, TypeIs
 
-from vllm.config import ModelConfig, SpeechToTextConfig
+from vllm.config import ModelConfig, SpeechToTextConfig, SpeechToTextParams
 from vllm.inputs import PromptType, TokensPrompt
 from vllm.logger import init_logger
 from vllm.model_executor.layers.mamba.mamba_utils import MambaStateCopyFunc
@@ -46,10 +46,11 @@
     from vllm.multimodal.inputs import MultiModalFeatureSpec
     from vllm.multimodal.registry import _ProcessorFactories
     from vllm.sequence import IntermediateTensors
-    from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
+    from vllm.v1.worker.encoder_cudagraph_defs import (
         EncoderCudaGraphCaptureInputs,
         EncoderCudaGraphConfig,
         EncoderCudaGraphReplayBuffers,
+        EncoderItemSpec,
     )
 else:
     VllmConfig = object
@@ -207,7 +208,8 @@ def get_language_model(self) -> VllmModel:
 
         raise NotImplementedError(
             f"No language model found in {type(self).__name__}! "
-            "You should initialize it via `_mark_language_model`."
+            "You should initialize it via `_mark_language_model`, "
+            "and make sure `embed_input_ids` is implemented."
         )
 
     @contextmanager
@@ -362,7 +364,9 @@ def _embed_text_input_ids(
             # to ensure that any external configuration requiring offset tracking,
             # e.g., LoRA, are applied correctly regardless of whether or not
             # we have multimodal tokens.
-            in_vocab_ids = input_ids.masked_fill(is_multimodal, 0)
+            in_vocab_ids = input_ids.masked_fill(
+                is_multimodal.to(device=input_ids.device, non_blocking=True), 0
+            )
             return embed_input_ids(in_vocab_ids)
 
         return embed_input_ids(input_ids)
@@ -1096,6 +1100,12 @@ class SupportsTranscription(Protocol):
     :meth:`get_language_token_ids`.
     """
 
+    no_space_languages: ClassVar[set[str]] = {"ja", "zh"}
+    """
+    Languages that don't need a space between words.
+    For example, Japanese (ja) and Chinese (zh) don't need a space between words.
+    """
+
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         # language codes in supported_languages
@@ -1111,13 +1121,7 @@ def __init_subclass__(cls, **kwargs):
     @classmethod
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        stt_config: SpeechToTextConfig,
-        model_config: ModelConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
         """Get the prompt for the ASR model.
         The model has control over the construction, as long as it
@@ -1516,6 +1520,19 @@ class SupportsEncoderCudaGraph(Protocol):
 
     def get_encoder_cudagraph_config(self) -> "EncoderCudaGraphConfig": ...
 
+    def get_input_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> str:
+        """Return the modality of the inputs."""
+        ...
+
+    def get_max_frames_per_video(
+        self,
+    ) -> int:
+        """Return model-specific max frames per video."""
+        ...
+
     def get_encoder_cudagraph_budget_range(
         self,
         vllm_config: "VllmConfig",
@@ -1528,35 +1545,20 @@ def get_encoder_cudagraph_budget_range(
           (e.g. max_num_batched_tokens)
 
         Used when ``encoder_cudagraph_token_budgets`` and/or
-        ``encoder_cudagraph_max_images_per_batch`` are not explicitly
+        ``encoder_cudagraph_max_vision_items_per_batch`` are not explicitly
         specified by the user.
         """
         ...
 
-    def get_encoder_cudagraph_num_items(
+    def get_encoder_cudagraph_item_specs(
         self,
         mm_kwargs: dict[str, Any],
-    ) -> int:
-        """Return the number of items (e.g. images) in the batch."""
-        ...
+    ) -> list["EncoderItemSpec"]:
+        """Return specs describing each item in the batch.
 
-    def get_encoder_cudagraph_per_item_output_tokens(
-        self,
-        mm_kwargs: dict[str, Any],
-    ) -> list[int]:
-        """Return output token count for each item.
-
-        Used for greedy packing and DP load balancing.
-        """
-        ...
-
-    def get_encoder_cudagraph_per_item_input_sizes(
-        self,
-        mm_kwargs: dict[str, Any],
-    ) -> list[int]:
-        """Return input size (e.g. patch count) for each item.
-
-        Used for input tensor slicing offsets.
+        Replaces the former separate methods for num_items,
+        per_item_output_tokens, and per_item_input_sizes.
+        The manager derives all three from this single return value.
         """
         ...
 
@@ -1578,10 +1580,32 @@ def select_encoder_cudagraph_items(
         """
         ...
 
+    def postprocess_encoder_output(
+        self,
+        output: torch.Tensor,
+        indices: list[int],
+        per_item_out_tokens: list[int],
+        dest: dict[int, torch.Tensor] | list[torch.Tensor | None],
+        clone: bool = False,
+        batch_mm_kwargs: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Post-process encoder output, directly call scatter_output_slices by default.
+
+        By default, delegates directly to scatter_output_slices.
+        Override this for models that require additional processing on the raw
+        encoder output prior to scattering, e.g. Step3-VL, which merges features
+        according to dynamic patch counts before scattering.
+        """
+        from vllm.model_executor.models.utils import scatter_output_slices
+
+        scatter_output_slices(output, indices, per_item_out_tokens, dest, clone)
+
     def prepare_encoder_cudagraph_capture_inputs(
         self,
         token_budget: int,
         max_batch_size: int,
+        max_frames_per_batch: int,
         device: torch.device,
         dtype: torch.dtype,
     ) -> "EncoderCudaGraphCaptureInputs":
@@ -1592,6 +1616,7 @@ def prepare_encoder_cudagraph_replay_buffers(
         self,
         mm_kwargs: dict[str, Any],
         max_batch_size: int,
+        max_frames_per_batch: int,
     ) -> "EncoderCudaGraphReplayBuffers":
         """Compute buffer values from actual batch inputs for replay."""
         ...
diff --git a/vllm/model_executor/models/interns1_pro.py b/vllm/model_executor/models/interns1_pro.py
index 28331b8ef3e8..36f669179c53 100644
--- a/vllm/model_executor/models/interns1_pro.py
+++ b/vllm/model_executor/models/interns1_pro.py
@@ -41,7 +41,9 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -176,7 +178,6 @@ def __init__(
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=True,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
diff --git a/vllm/model_executor/models/interns2_preview.py b/vllm/model_executor/models/interns2_preview.py
new file mode 100644
index 000000000000..6efc98aabc1f
--- /dev/null
+++ b/vllm/model_executor/models/interns2_preview.py
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Iterable
+
+import torch
+from transformers import AutoProcessor
+
+from vllm.multimodal import MULTIMODAL_REGISTRY
+
+from .qwen3_5 import Qwen3_5MoeForConditionalGeneration
+from .qwen3_vl import (
+    Qwen3VLDummyInputsBuilder,
+    Qwen3VLMultiModalProcessor,
+    Qwen3VLProcessingInfo,
+)
+from .utils import AutoWeightsLoader
+
+
+class InternS2PreviewProcessingInfo(Qwen3VLProcessingInfo):
+    def get_hf_config(self):
+        return self.ctx.get_hf_config()
+
+    def get_hf_processor(self, **kwargs: object) -> AutoProcessor:
+        return self.ctx.get_hf_processor(**kwargs)
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Qwen3VLMultiModalProcessor,
+    info=InternS2PreviewProcessingInfo,
+    dummy_inputs=Qwen3VLDummyInputsBuilder,
+)
+class InternS2PreviewForConditionalGeneration(Qwen3_5MoeForConditionalGeneration):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=["mtp.", "model.time_series.", "time_series."],
+        )
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index c8611a499362..f3918e302b47 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -247,9 +247,11 @@ def _get_image_fields_config(self, hf_inputs: BatchFeature):
             pixel_values_flat=MultiModalFieldConfig.flat_from_sizes(
                 "image", image_num_patches
             ),
-            image_num_patches=MultiModalFieldConfig.batched("image"),
+            image_num_patches=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
             image_embeds=MultiModalFieldConfig.batched("image"),
-            image_token_id=MultiModalFieldConfig.shared("image", num_images),
+            image_token_id=MultiModalFieldConfig.shared(
+                "image", num_images, keep_on_cpu=True
+            ),
         )
 
     def _get_mm_fields_config(
@@ -475,7 +477,7 @@ def _get_video_fields_config(self, hf_inputs: BatchFeature):
             pixel_values_flat_video=MultiModalFieldConfig.flat_from_sizes(
                 "video", video_num_patches
             ),
-            video_num_patches=MultiModalFieldConfig.batched("video"),
+            video_num_patches=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
             video_token_id=MultiModalFieldConfig.shared("video", num_videos),
         )
 
diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py
index 980bcffb5f9b..84e96def6c1f 100644
--- a/vllm/model_executor/models/jamba.py
+++ b/vllm/model_executor/models/jamba.py
@@ -14,7 +14,10 @@
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.distributed.parallel_state import get_pp_group
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -90,7 +93,6 @@ def __init__(
             self.intermediate_size,
             tp_size=tp_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=False,
             use_grouped_topk=False,
             quant_config=quant_config,
@@ -379,7 +381,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/jina.py b/vllm/model_executor/models/jina.py
new file mode 100644
index 000000000000..2b07937df08e
--- /dev/null
+++ b/vllm/model_executor/models/jina.py
@@ -0,0 +1,258 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Adapted from https://huggingface.co/jinaai/jina-reranker-v3/blob/main/modeling.py
+import json
+import logging
+from collections import defaultdict
+from collections.abc import Iterable
+
+import torch
+from safetensors.torch import load as safetensors_load
+from torch import nn
+
+from vllm.config import VllmConfig
+from vllm.sequence import IntermediateTensors
+from vllm.tasks import PoolingTask
+from vllm.transformers_utils.repo_utils import get_hf_file_bytes
+from vllm.v1.pool.metadata import PoolingMetadata
+
+from ..layers.pooler import DispatchPooler
+from ..layers.pooler.tokwise import (
+    StepPool,
+    TokenPooler,
+    TokenPoolingMethodOutputItem,
+)
+from .interfaces import SupportsLateInteraction
+from .interfaces_base import VllmModelForPooling
+from .qwen3 import Qwen3ForCausalLM, Qwen3Model
+from .utils import AutoWeightsLoader, maybe_prefix
+
+logger = logging.getLogger(__name__)
+
+
+class JinaForRanking(nn.Module, SupportsLateInteraction):
+    is_pooling_model = True
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+
+        self.config = config
+        self.projector_dim: int = config.embedding_size
+
+        self.vllm_config = vllm_config
+        self.quant_config = quant_config
+        self.model = Qwen3Model(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+        self.projector = nn.Sequential(
+            nn.Linear(config.hidden_size, config.hidden_size // 2, bias=False),
+            nn.ReLU(),
+            nn.Linear(config.hidden_size // 2, self.projector_dim, bias=False),
+        )
+
+        self.pooler = DispatchPooler(
+            {
+                "token_embed": TokenPooler(
+                    pooling=JinaForRankingPool(self.projector),
+                )
+            }
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self, skip_prefixes=(["lm_head."]))
+        return loader.load_weights(weights)
+
+
+class JinaForRankingPool(StepPool):
+    def __init__(self, projector: nn.Sequential):
+        super().__init__()
+
+        self.doc_token_id = 151670
+        self.query_token_id = 151671
+        self.projector = projector
+
+    def get_supported_tasks(self) -> set[PoolingTask]:
+        return {"token_embed"}
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        pooling_metadata: PoolingMetadata,
+    ) -> list[TokenPoolingMethodOutputItem]:
+        pooled_data_lst = super().forward(hidden_states, pooling_metadata)
+        prompt_token_ids = pooling_metadata.get_prompt_token_ids()
+
+        embeds_list = list[torch.Tensor | None]()
+        for data, token_ids in zip(pooled_data_lst, prompt_token_ids):
+            # for unfinished chunked prefill
+            if data is None:
+                embeds_list.append(None)
+            else:
+                docs_indexes = torch.where(torch.eq(token_ids, self.doc_token_id))[0]
+                query_indexes = torch.where(torch.eq(token_ids, self.query_token_id))[0]
+
+                # The JinaForRanking model concatenates docs first, then query.
+                # Let's stay consistent with this novel design.
+                indexes = torch.cat([docs_indexes, query_indexes])
+                embeds = self.projector(data[indexes])
+                embeds_list.append(embeds)
+
+        return embeds_list
+
+
+# jina-embeddings-v5-text-small wraps Qwen3-0.6B-Base with four task-specific
+# LoRA adapters. This implementation merges the selected adapter into the base
+# weights at load time to avoid any runtime dependency on peft.
+#
+# Task selection:
+#     Pass --hf-overrides '{"jina_task": "retrieval"}' to select one of:
+#     retrieval (default), text-matching, classification, clustering.
+
+_DEFAULT_TASK = "retrieval"
+_SUPPORTED_TASKS = {"retrieval", "text-matching", "classification", "clustering"}
+
+
+def _load_adapter(
+    model: str,
+    task: str,
+    revision: str | None,
+) -> tuple[dict, dict[str, torch.Tensor]] | None:
+    """Load adapter config and weights from a local path or HF repo.
+
+    Returns (adapter_config, adapter_weights) or None if not found.
+    """
+    config_bytes = get_hf_file_bytes(
+        f"adapters/{task}/adapter_config.json",
+        model,
+        revision,
+    )
+    if config_bytes is None:
+        return None
+
+    adapter_config = json.loads(config_bytes)
+
+    weights_bytes = get_hf_file_bytes(
+        f"adapters/{task}/adapter_model.safetensors",
+        model,
+        revision,
+    )
+    if weights_bytes is None:
+        return None
+
+    adapter_weights = safetensors_load(weights_bytes)
+    return adapter_config, adapter_weights
+
+
+def _build_lora_pairs(adapter_weights: dict) -> dict:
+    """Group raw adapter tensors into {base_key: {"A": tensor, "B": tensor}} pairs.
+
+    Transforms adapter keys like:
+        base_model.model.layers.0.self_attn.q_proj.lora_A.weight
+    Into base keys like:
+        layers.0.self_attn.q_proj.weight
+    """
+    lora_pairs = defaultdict(dict)
+    for key, tensor in adapter_weights.items():
+        clean_key = key
+        if clean_key.startswith("base_model.model."):
+            clean_key = clean_key[len("base_model.model.") :]
+
+        if ".lora_A." in clean_key:
+            base_key = clean_key.split(".lora_A.")[0] + ".weight"
+            lora_pairs[base_key]["A"] = tensor
+        elif ".lora_B." in clean_key:
+            base_key = clean_key.split(".lora_B.")[0] + ".weight"
+            lora_pairs[base_key]["B"] = tensor
+
+    return dict(lora_pairs)
+
+
+class JinaEmbeddingsV5Model(Qwen3ForCausalLM, VllmModelForPooling):
+    """Jina Embeddings V5 with task-specific LoRA adapters merged at load time.
+
+    Extends Qwen3ForCausalLM (the underlying architecture) and declares itself
+    as a pooling model so that as_embedding_model() does not wrap it.
+    """
+
+    is_pooling_model = True
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__(vllm_config=vllm_config, prefix=prefix)
+
+        self._model_name = vllm_config.model_config.model
+        self._revision = vllm_config.model_config.revision
+
+        self._task = getattr(
+            vllm_config.model_config.hf_config, "jina_task", _DEFAULT_TASK
+        )
+        if self._task not in _SUPPORTED_TASKS:
+            logger.warning(
+                "Unknown jina_task=%r. Falling back to %r.",
+                self._task,
+                _DEFAULT_TASK,
+            )
+            self._task = _DEFAULT_TASK
+
+        pooler_config = vllm_config.model_config.pooler_config
+        assert pooler_config is not None
+        self.pooler = DispatchPooler.for_embedding(pooler_config)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        lora_pairs: dict = {}
+        scaling = 1.0
+
+        result = _load_adapter(self._model_name, self._task, self._revision)
+        if result is None:
+            logger.warning(
+                "No adapter found for task %r in %r. Loading raw base weights.",
+                self._task,
+                self._model_name,
+            )
+        else:
+            adapter_config, adapter_weights = result
+            scaling = adapter_config["lora_alpha"] / adapter_config["r"]
+            lora_pairs = _build_lora_pairs(adapter_weights)
+            logger.info(
+                "Loaded %d adapter tensors for task %r (scaling=%.4f, %d LoRA pairs)",
+                len(adapter_weights),
+                self._task,
+                scaling,
+                len(lora_pairs),
+            )
+
+        def _merge_weights(
+            weights: Iterable[tuple[str, torch.Tensor]],
+        ) -> Iterable[tuple[str, torch.Tensor]]:
+            for name, tensor in weights:
+                clean_name = name
+                if clean_name.startswith("model."):
+                    clean_name = clean_name[len("model.") :]
+
+                if clean_name in lora_pairs:
+                    pair = lora_pairs[clean_name]
+                    if "A" in pair and "B" in pair:
+                        lora_A = pair["A"].to(device=tensor.device, dtype=tensor.dtype)
+                        lora_B = pair["B"].to(device=tensor.device, dtype=tensor.dtype)
+                        tensor = tensor + (lora_B @ lora_A) * scaling
+                yield name, tensor
+
+        loaded = self.model.load_weights(_merge_weights(weights))
+        return {f"model.{name}" for name in loaded}
diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py
index a987c89ae094..86d7edc25f92 100644
--- a/vllm/model_executor/models/keye.py
+++ b/vllm/model_executor/models/keye.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import math
 from abc import abstractmethod
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Iterable, Iterator, Mapping, Sequence
 from functools import partial
 from typing import Annotated, Any, Literal, TypeAlias, TypeVar
 
@@ -729,7 +729,7 @@ def __init__(
         self.vision_model = KeyeSiglipVisionTransformer(
             config,
             quant_config=quant_config,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
         )
         self.quant_config = quant_config
 
@@ -1595,91 +1595,92 @@ def _process_video_input(
             self._process_video_embeds(video_type, video_grid_thw, pixel_values_videos)
         )
 
+    @staticmethod
+    def _split_video_grid_thw(
+        grid_thw: torch.Tensor | list[list[int]] | list[int],
+    ) -> list[list[int]]:
+        """
+        Split video grid_thw along the t dimension into per-frame rows.
+
+        This preserves Keye's current M-RoPE behavior, where a video is emitted
+        as consecutive frame-level multimodal blocks rather than a single block
+        spanning the whole video.
+        """
+        if isinstance(grid_thw, list):
+            if len(grid_thw) == 0:
+                return []
+            if isinstance(grid_thw[0], int):
+                grid_thw = torch.tensor([grid_thw], dtype=torch.long)
+            else:
+                grid_thw = torch.tensor(grid_thw, dtype=torch.long)
+        elif grid_thw.ndim == 1:
+            grid_thw = grid_thw.unsqueeze(0)
+
+        if grid_thw.numel() == 0:
+            return []
+
+        t, hw = grid_thw[:, 0], grid_thw[:, 1:]
+        ones = torch.ones_like(hw[:, :1])
+        out = torch.cat([ones, hw], dim=1).repeat_interleave(t, dim=0)
+        return out.tolist()
+
+    def iter_mm_grid_thw(
+        self, mm_features: list[MultiModalFeatureSpec]
+    ) -> Iterator[tuple[int, int, int, int]]:
+        spatial_merge_size = self.config.vision_config.spatial_merge_size
+
+        for mm_feature in sorted(mm_features, key=lambda f: f.mm_position.offset):
+            if mm_feature.data is None:
+                raise ValueError("M-RoPE calculation requires multimodal feature data")
+
+            if mm_feature.modality == "image":
+                grid_thw = mm_feature.data["image_grid_thw"].data
+                if isinstance(grid_thw, torch.Tensor):
+                    if grid_thw.ndim == 2:
+                        assert grid_thw.shape[0] == 1
+                        t, h, w = grid_thw[0].tolist()
+                    else:
+                        t, h, w = grid_thw.tolist()
+                else:
+                    if isinstance(grid_thw[0], list):
+                        assert len(grid_thw) == 1
+                        t, h, w = grid_thw[0]
+                    else:
+                        t, h, w = grid_thw
+
+                yield (
+                    mm_feature.mm_position.offset,
+                    t,
+                    h // spatial_merge_size,
+                    w // spatial_merge_size,
+                )
+            elif mm_feature.modality == "video":
+                current_offset = mm_feature.mm_position.offset
+                for t, h, w in self._split_video_grid_thw(
+                    mm_feature.data["video_grid_thw"].data
+                ):
+                    llm_grid_h = h // spatial_merge_size
+                    llm_grid_w = w // spatial_merge_size
+                    yield (current_offset, t, llm_grid_h, llm_grid_w)
+                    current_offset += t * llm_grid_h * llm_grid_w
+            else:
+                raise ValueError(f"Unsupported modality: {mm_feature.modality}")
+
     def get_mrope_input_positions(
         self,
         input_tokens: list[int],
         mm_features: list[MultiModalFeatureSpec],
     ) -> tuple[torch.Tensor, int]:
-        kwargs = MultiModalFeatureSpec.gather_kwargs(
-            mm_features,
-            {"image_grid_thw", "video_grid_thw"},
-        )
-        image_grid_thw = [item.tolist() for item in kwargs.get("image_grid_thw", [])]
-        video_grid_thw = [item.tolist() for item in kwargs.get("video_grid_thw", [])]
-
-        if isinstance(video_grid_thw, list) and len(video_grid_thw) > 0:
-            video_grid_thw = video_grid_thw[0]
-
-        def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
-            """
-            Split grid_thw along the t dimension.
-
-            Args:
-                grid_thw: shape [N, 3] tensor or nested list of [t, h, w].
-
-            Returns:
-                List of [1, h, w] rows, repeated t times for each original row.
-            """
-
-            if isinstance(grid_thw, list):
-                grid_thw = torch.tensor(grid_thw, dtype=torch.long)
-
-            if grid_thw.numel() == 0:
-                return []
-
-            t, hw = grid_thw[:, 0], grid_thw[:, 1:]
-            ones = torch.ones_like(hw[:, :1])  # [N,1]
-            out = torch.cat([ones, hw], dim=1).repeat_interleave(t, dim=0)
-            return out.tolist()
-
-        video_grid_thw = split_thw(video_grid_thw)
-
-        hf_config = self.config
-        image_token_id = hf_config.image_token_id
-        video_token_id = hf_config.video_token_id
-        spatial_merge_size = hf_config.vision_config.spatial_merge_size
-
-        image_nums = len(image_grid_thw)
-        frame_nums = len(video_grid_thw)
         llm_pos_ids_list: list = []
-
         st = 0
-        remain_images, remain_frames = image_nums, frame_nums
-
-        image_index, video_index = 0, 0
-        for _ in range(image_nums + frame_nums):
-            if remain_images > 0:
-                try:
-                    ed_image = input_tokens.index(image_token_id, st)
-                except ValueError:
-                    ed_image = len(input_tokens) + 1
-            else:
-                ed_image = len(input_tokens) + 1
-            if remain_frames > 0:
-                try:
-                    ed_video = input_tokens.index(video_token_id, st)
-                except ValueError:
-                    ed_video = len(input_tokens) + 1
-            else:
-                ed_video = len(input_tokens) + 1
 
-            if ed_image < ed_video:
-                t, h, w = image_grid_thw[image_index]
-                image_index += 1
-                remain_images -= 1
-                ed = ed_image
-            else:
-                t, h, w = video_grid_thw[video_index]
-                video_index += 1
-                remain_frames -= 1
-                ed = ed_video
-
-            llm_grid_t, llm_grid_h, llm_grid_w = (
-                t,
-                h // spatial_merge_size,
-                w // spatial_merge_size,
-            )
-            text_len = ed - st
+        for (
+            offset,
+            llm_grid_t,
+            llm_grid_h,
+            llm_grid_w,
+        ) in self.iter_mm_grid_thw(mm_features):
+            text_len = offset - st
 
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
             llm_pos_ids_list.append(
@@ -1711,7 +1712,7 @@ def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
             llm_pos_ids_list.append(
                 torch.stack([t_index, h_index, w_index]) + text_len + st_idx
             )
-            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
+            st = offset + llm_grid_t * llm_grid_h * llm_grid_w
 
         if st < len(input_tokens):
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
diff --git a/vllm/model_executor/models/keye_vl1_5.py b/vllm/model_executor/models/keye_vl1_5.py
index bc33f5d7d723..a8400b8d17ee 100644
--- a/vllm/model_executor/models/keye_vl1_5.py
+++ b/vllm/model_executor/models/keye_vl1_5.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import itertools
-from collections.abc import Mapping, Sequence
+from collections.abc import Iterator, Mapping, Sequence
 from functools import partial
 from typing import Annotated, Any, Literal, TypeAlias
 
@@ -608,91 +608,67 @@ def _process_video_input(
             new_video_embeds.append(video_embeds[start:end])
         return tuple(new_video_embeds)
 
+    def iter_mm_grid_thw(
+        self, mm_features: list[MultiModalFeatureSpec]
+    ) -> Iterator[tuple[int, int, int, int]]:
+        spatial_merge_size = self.config.vision_config.spatial_merge_size
+
+        for mm_feature in sorted(mm_features, key=lambda f: f.mm_position.offset):
+            if mm_feature.data is None:
+                raise ValueError("M-RoPE calculation requires multimodal feature data")
+
+            embed_ranges = mm_feature.mm_position.extract_embeds_range()
+            if mm_feature.modality == "image":
+                assert len(embed_ranges) == 1
+                grid_thw = mm_feature.data["image_grid_thw"].data
+                if isinstance(grid_thw, torch.Tensor):
+                    if grid_thw.ndim == 2:
+                        assert grid_thw.shape[0] == 1
+                        t, h, w = grid_thw[0].tolist()
+                    else:
+                        t, h, w = grid_thw.tolist()
+                else:
+                    if isinstance(grid_thw[0], list):
+                        assert len(grid_thw) == 1
+                        t, h, w = grid_thw[0]
+                    else:
+                        t, h, w = grid_thw
+
+                yield (
+                    embed_ranges[0][0],
+                    t,
+                    h // spatial_merge_size,
+                    w // spatial_merge_size,
+                )
+            elif mm_feature.modality == "video":
+                split_video_grids = split_thw(mm_feature.data["video_grid_thw"].data)
+                assert len(embed_ranges) == split_video_grids.shape[0]
+                for (start_idx, end_idx), (t, h, w) in zip(
+                    embed_ranges, split_video_grids.tolist()
+                ):
+                    llm_grid_h = h // spatial_merge_size
+                    llm_grid_w = w // spatial_merge_size
+                    num_mm_tokens = t * llm_grid_h * llm_grid_w
+                    assert end_idx - start_idx + 1 == num_mm_tokens
+                    yield (start_idx, t, llm_grid_h, llm_grid_w)
+            else:
+                raise ValueError(f"Unsupported modality: {mm_feature.modality}")
+
     def get_mrope_input_positions(
         self,
         input_tokens: list[int],
         mm_features: list[MultiModalFeatureSpec],
     ) -> tuple[torch.Tensor, int]:
-        kwargs = MultiModalFeatureSpec.gather_kwargs(
-            mm_features,
-            {"image_grid_thw", "video_grid_thw"},
-        )
-        image_grid_thw = [item.tolist() for item in kwargs.get("image_grid_thw", [])]
-        video_grid_thw = [item.tolist() for item in kwargs.get("video_grid_thw", [])]
-
-        if isinstance(video_grid_thw, list) and len(video_grid_thw) > 0:
-            video_grid_thw = video_grid_thw[0]
-
-        def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
-            """
-            Split grid_thw along the t dimension.
-
-            Args:
-                grid_thw: shape [N, 3] tensor or nested list of [t, h, w].
-
-            Returns:
-                List of [1, h, w] rows, repeated t times for each original row.
-            """
-
-            if isinstance(grid_thw, list):
-                grid_thw = torch.tensor(grid_thw, dtype=torch.long)
-
-            if grid_thw.numel() == 0:
-                return []
-
-            t, hw = grid_thw[:, 0], grid_thw[:, 1:]
-            ones = torch.ones_like(hw[:, :1])  # [N,1]
-            out = torch.cat([ones, hw], dim=1).repeat_interleave(t, dim=0)
-            return out.tolist()
-
-        video_grid_thw = split_thw(video_grid_thw)
-
-        hf_config = self.config
-        image_token_id = hf_config.image_token_id
-        video_token_id = hf_config.video_token_id
-        spatial_merge_size = hf_config.vision_config.spatial_merge_size
-
-        image_nums = len(image_grid_thw)
-        frame_nums = len(video_grid_thw)
         llm_pos_ids_list: list = []
-
         st = 0
-        remain_images, remain_frames = image_nums, frame_nums
-
-        image_index, video_index = 0, 0
-        for _ in range(image_nums + frame_nums):
-            if remain_images > 0:
-                try:
-                    ed_image = input_tokens.index(image_token_id, st)
-                except ValueError:
-                    ed_image = len(input_tokens) + 1
-            else:
-                ed_image = len(input_tokens) + 1
-            if remain_frames > 0:
-                try:
-                    ed_video = input_tokens.index(video_token_id, st)
-                except ValueError:
-                    ed_video = len(input_tokens) + 1
-            else:
-                ed_video = len(input_tokens) + 1
 
-            if ed_image < ed_video:
-                t, h, w = image_grid_thw[image_index]
-                image_index += 1
-                remain_images -= 1
-                ed = ed_image
-            else:
-                t, h, w = video_grid_thw[video_index]
-                video_index += 1
-                remain_frames -= 1
-                ed = ed_video
-
-            llm_grid_t, llm_grid_h, llm_grid_w = (
-                t,
-                h // spatial_merge_size,
-                w // spatial_merge_size,
-            )
-            text_len = ed - st
+        for (
+            offset,
+            llm_grid_t,
+            llm_grid_h,
+            llm_grid_w,
+        ) in self.iter_mm_grid_thw(mm_features):
+            text_len = offset - st
 
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
             llm_pos_ids_list.append(
@@ -724,7 +700,7 @@ def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
             llm_pos_ids_list.append(
                 torch.stack([t_index, h_index, w_index]) + text_len + st_idx
             )
-            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
+            st = offset + llm_grid_t * llm_grid_h * llm_grid_w
 
         if st < len(input_tokens):
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
diff --git a/vllm/model_executor/models/kimi_audio.py b/vllm/model_executor/models/kimi_audio.py
index fc5065065e96..6232c39a4495 100644
--- a/vllm/model_executor/models/kimi_audio.py
+++ b/vllm/model_executor/models/kimi_audio.py
@@ -14,6 +14,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import PromptType, TokensPrompt
 from vllm.model_executor.model_loader import DefaultModelLoader
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
@@ -91,6 +92,7 @@ def __init__(
         whisper_config = HFWhisperConfig.from_pretrained(
             model_path,
             subfolder=KIMIA_WHISPER_SUBFOLDER,
+            revision=vllm_config.model_config.revision,
         )
 
         super().__init__(
@@ -425,7 +427,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             DefaultModelLoader.Source(
                 model_or_path=vllm_config.model_config.model,
                 subfolder="whisper-large-v3",
-                revision=None,
+                revision=vllm_config.model_config.revision,
             )
         ]
 
@@ -626,16 +628,12 @@ def get_speech_to_text_config(
         )
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        task_type = stt_params.task_type
+        request_prompt = stt_params.request_prompt
+
         tokenizer = cached_get_tokenizer(
             model_config.tokenizer,
             tokenizer_cls=KimiAudioTokenizer,
diff --git a/vllm/model_executor/models/kimi_k25.py b/vllm/model_executor/models/kimi_k25.py
index a9b85f073550..89cda63c8059 100644
--- a/vllm/model_executor/models/kimi_k25.py
+++ b/vllm/model_executor/models/kimi_k25.py
@@ -110,10 +110,33 @@ def __init__(self, ctx: InputProcessingContext) -> None:
         tokenizer = self.get_tokenizer()
         image_processor = cached_get_image_processor(
             self.ctx.model_config.model,
+            revision=self.ctx.model_config.revision,
             trust_remote_code=self.ctx.model_config.trust_remote_code,
         )
 
-        self.media_token_id = media_token_id = hf_config.media_placeholder_token_id
+        # Resolve token ID from the tokenizer because transformers v5
+        # may remap token IDs vs config.json.
+        config_token_id = hf_config.media_placeholder_token_id
+        resolved_token_id = tokenizer.convert_tokens_to_ids("<|media_pad|>")
+        is_valid_resolved = isinstance(resolved_token_id, int) and (
+            tokenizer.unk_token_id is None
+            or resolved_token_id != tokenizer.unk_token_id
+        )
+        if is_valid_resolved and resolved_token_id != config_token_id:
+            logger.warning_once(
+                "Kimi-K2.5 config.media_placeholder_token_id (%d) disagrees "
+                "with tokenizer mapping for <|media_pad|> (%d). "
+                "Using tokenizer value.",
+                config_token_id,
+                resolved_token_id,
+            )
+            media_token_id = resolved_token_id
+            # Patch config so downstream code also sees the correct ID.
+            hf_config.media_placeholder_token_id = resolved_token_id
+        else:
+            media_token_id = config_token_id
+
+        self.media_token_id = media_token_id
         self.media_token = tokenizer.decode(media_token_id)
 
         self.image_processor = image_processor
@@ -232,8 +255,7 @@ def _get_prompt_updates(
         hf_processor_mm_kwargs: Mapping[str, Any],
         out_mm_kwargs: MultiModalKwargsItems,
     ) -> Sequence[PromptUpdate]:
-        hf_config = self.info.get_hf_config()
-        media_token_id = hf_config.media_placeholder_token_id
+        media_token_id = self.info.media_token_id
 
         def get_replacement(item_idx: int):
             media = mm_items.get_items("vision_chunk", (VisionChunkProcessorItems,))
@@ -317,9 +339,12 @@ def __init__(
                 quant_config=self._maybe_ignore_quant_config(quant_config),
                 prefix=maybe_prefix(prefix, "vision_tower"),
             )
-            self.vision_tower = self.vision_tower.to(
-                device=self.device, dtype=model_config.dtype
-            )
+            if self._maybe_ignore_quant_config(quant_config) is not None:
+                self.vision_tower = self.vision_tower.to(device=self.device)
+            else:
+                self.vision_tower = self.vision_tower.to(
+                    device=self.device, dtype=model_config.dtype
+                )
 
             self.mm_projector = KimiK25MultiModalProjector(
                 config=config.vision_config,
diff --git a/vllm/model_executor/models/kimi_k25_vit.py b/vllm/model_executor/models/kimi_k25_vit.py
index 69524293c54b..237c28506ed0 100644
--- a/vllm/model_executor/models/kimi_k25_vit.py
+++ b/vllm/model_executor/models/kimi_k25_vit.py
@@ -618,6 +618,9 @@ def mm_projector_forward(mm_projector: torch.nn.Module, vt_output: list[torch.Te
     """Apply MM projector to vision tower outputs."""
     num_embedding_list = [x.shape[0] for x in vt_output]
     batched = torch.cat(vt_output, dim=0)
+    projector_dtype = mm_projector.pre_norm.weight.dtype
+    if batched.dtype != projector_dtype:
+        batched = batched.to(projector_dtype)
     proj_out = mm_projector(batched)
     proj_out = proj_out.reshape(-1, proj_out.shape[-1])
     proj_out = torch.split(proj_out, num_embedding_list)
diff --git a/vllm/model_executor/models/kimi_linear.py b/vllm/model_executor/models/kimi_linear.py
index 4cd7b63c1472..a891950fa579 100644
--- a/vllm/model_executor/models/kimi_linear.py
+++ b/vllm/model_executor/models/kimi_linear.py
@@ -7,16 +7,17 @@
 from torch import nn
 
 from vllm.compilation.decorators import support_torch_compile
-from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
+from vllm.config import CacheConfig, VllmConfig
 from vllm.distributed import (
     get_pp_group,
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.fused_moe import FusedMoE
-from vllm.model_executor.layers.kda import KimiDeltaAttention
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -25,6 +26,9 @@
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.mamba.gdn.kimi_gdn_linear_attn import (
+    KimiGatedDeltaNetAttention,
+)
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateCopyFunc,
     MambaStateCopyFuncCalculator,
@@ -132,12 +136,25 @@ def __init__(
 
         self.gate.e_score_correction_bias = nn.Parameter(torch.empty(num_experts))
 
+        if self.num_shared_experts is not None:
+            intermediate_size = moe_intermediate_size * self.num_shared_experts
+            self.shared_experts = KimiMLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=intermediate_size,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                reduce_results=False,
+                prefix=f"{prefix}.shared_experts",
+            )
+        else:
+            self.shared_experts = None
+
         self.experts = FusedMoE(
+            shared_experts=self.shared_experts,
             num_experts=num_experts,
             top_k=config.num_experts_per_token,
             hidden_size=hidden_size,
             intermediate_size=moe_intermediate_size,
-            reduce_results=False,
             renormalize=moe_renormalize,
             quant_config=quant_config,
             use_grouped_topk=config.use_grouped_topk,
@@ -146,34 +163,16 @@ def __init__(
             prefix=f"{prefix}.experts",
             scoring_func=config.moe_router_activation_func,
             e_score_correction_bias=self.gate.e_score_correction_bias,
+            routed_scaling_factor=self.routed_scaling_factor,
         )
 
-        if self.num_shared_experts is not None:
-            intermediate_size = moe_intermediate_size * self.num_shared_experts
-            self.shared_experts = KimiMLP(
-                hidden_size=config.hidden_size,
-                intermediate_size=intermediate_size,
-                hidden_act=config.hidden_act,
-                quant_config=quant_config,
-                reduce_results=False,
-                prefix=f"{prefix}.shared_experts",
-            )
-
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         num_tokens, hidden_size = hidden_states.shape
         hidden_states = hidden_states.view(-1, hidden_size)
-        if self.num_shared_experts is not None:
-            shared_output = self.shared_experts(hidden_states)
         router_logits, _ = self.gate(hidden_states)
-        final_hidden_states = (
-            self.experts(hidden_states=hidden_states, router_logits=router_logits)
-            * self.routed_scaling_factor
+        final_hidden_states = self.experts(
+            hidden_states=hidden_states, router_logits=router_logits
         )
-        if shared_output is not None:
-            final_hidden_states = final_hidden_states + shared_output
-
-        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
         return final_hidden_states.view(num_tokens, hidden_size)
 
 
@@ -289,26 +288,22 @@ class KimiDecoderLayer(nn.Module):
     def __init__(
         self,
         config: KimiLinearConfig,
-        layer_idx: int,
-        cache_config: CacheConfig | None = None,
-        quant_config: QuantizationConfig | None = None,
-        parallel_config: ParallelConfig | None = None,
-        model_config: ModelConfig | None = None,
+        vllm_config: VllmConfig,
         prefix: str = "",
-        **kwargs,
     ) -> None:
         super().__init__()
         self.hidden_size = config.hidden_size
 
         self.is_moe = config.is_moe
+        layer_idx = int(prefix.rsplit(".", 1)[1])
+        model_config = vllm_config.model_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
 
         if config.is_kda_layer(layer_idx):
-            self.self_attn = KimiDeltaAttention(
-                layer_idx=layer_idx,
-                hidden_size=config.hidden_size,
-                quant_config=quant_config,
-                cache_config=cache_config,
-                model_config=config,
+            self.self_attn = KimiGatedDeltaNetAttention(
+                config,
+                vllm_config,
                 prefix=f"{prefix}.self_attn",
             )
         else:
@@ -388,10 +383,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
 
         config = vllm_config.model_config.hf_text_config
-        model_config = vllm_config.model_config
-        cache_config = vllm_config.cache_config
-        quant_config = vllm_config.quant_config
-        parallel_config = vllm_config.parallel_config
         self.config = config
 
         self.vocab_size = config.vocab_size
@@ -405,19 +396,11 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         else:
             self.embed_tokens = PPMissingLayer()
 
-        extra_kwargs = {}
-
         def get_layer(prefix: str):
-            layer_idx = int(prefix.rsplit(".", 1)[1])
             return KimiDecoderLayer(
                 config,
-                layer_idx,
-                cache_config,
-                quant_config,
-                parallel_config,
-                model_config,
+                vllm_config,
                 prefix,
-                **extra_kwargs,
             )
 
         self.start_layer, self.end_layer, self.layers = make_layers(
@@ -482,7 +465,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         if self.config.is_moe:
             # Params for weights, fp8 weight scales, fp8 activation scales
             # (param_name, weight_name, expert_id, shard_id)
-            expert_params_mapping = FusedMoE.make_expert_params_mapping(
+            expert_params_mapping = fused_moe_make_expert_params_mapping(
                 self,
                 ckpt_gate_proj_name="w1",
                 ckpt_down_proj_name="w2",
diff --git a/vllm/model_executor/models/laguna.py b/vllm/model_executor/models/laguna.py
new file mode 100644
index 000000000000..f79f6097c611
--- /dev/null
+++ b/vllm/model_executor/models/laguna.py
@@ -0,0 +1,905 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Laguna model compatible with HuggingFace weights."""
+
+import typing
+from collections.abc import Callable, Iterable
+from itertools import islice
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config
+from vllm.distributed import (
+    get_ep_group,
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    QKVParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.model_executor.models.interfaces import (
+    EagleModelMixin,
+    SupportsEagle3,
+    SupportsLoRA,
+    SupportsPP,
+)
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+from vllm.sequence import IntermediateTensors
+
+logger = init_logger(__name__)
+
+
+class LagunaMLP(nn.Module):
+    """Dense MLP for Laguna (used in mlp_only_layers)."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = True,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        # gate_proj and up_proj are kept as separate ColumnParallelLinear
+        # rather than merged via MergedColumnParallelLinear. The merged form
+        # requires per-partition NVFP4 global scales (weight_global_scale,
+        # input_global_scale) to be packed into a length-2 PerTensorScaleParameter
+        # and then collapsed via .max() in process_weights_after_loading; this
+        # doesn't round-trip cleanly through Marlin's NVFP4 stacked-layer code
+        # path. Splitting yields one global scale per Linear, exactly matching
+        # the standard compressed-tensors per-Linear schema on disk.
+        self.gate_proj = ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_proj",
+        )
+        self.up_proj = ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            prefix=f"{prefix}.down_proj",
+        )
+        if hidden_act != "silu":
+            raise ValueError(
+                f"Unsupported activation: {hidden_act}. Only silu is supported."
+            )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate, _ = self.gate_proj(x)
+        up, _ = self.up_proj(x)
+        x, _ = self.down_proj(F.silu(gate) * up)
+        return x
+
+
+class LagunaMoE(nn.Module):
+    """Sparse MoE block for Laguna with optional shared expert and sigmoid routing.
+
+    Key differences from other MoE implementations:
+    - Uses SIGMOID routing activation (not softmax)
+    - Shared expert runs in parallel with routed experts (when enabled)
+    - Matches HF reference: modular_laguna.py LagunaSparseMoeBlock
+    """
+
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        enable_eplb: bool = False,
+    ):
+        super().__init__()
+        self.config = config
+        self.num_experts = config.num_experts
+        self.top_k = config.num_experts_per_tok
+
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.ep_group = get_ep_group().device_group
+        self.ep_rank = self.ep_group.rank()
+        self.ep_size = self.ep_group.size()
+
+        self.n_routed_experts = config.num_experts
+        self.n_shared_experts = 1 if config.shared_expert_intermediate_size > 0 else 0
+        self.routed_scaling_factor = float(
+            getattr(config, "moe_routed_scaling_factor", 1.0)
+        )
+
+        if self.tp_size > config.num_experts:
+            raise ValueError(
+                f"Tensor parallel size {self.tp_size} is greater than "
+                f"the number of experts {config.num_experts}."
+            )
+
+        # Load balancing settings.
+        vllm_config = get_current_vllm_config()
+        eplb_config = vllm_config.parallel_config.eplb_config
+        self.enable_eplb = enable_eplb
+        eplb_config.num_redundant_experts = (
+            eplb_config.num_redundant_experts
+            if eplb_config.num_redundant_experts is not None
+            else 0
+        )
+        self.n_redundant_experts = eplb_config.num_redundant_experts
+        self.n_logical_experts = self.n_routed_experts
+        self.n_physical_experts = self.n_logical_experts + self.n_redundant_experts
+        self.n_local_physical_experts = self.n_physical_experts // self.ep_size
+        self.physical_expert_start = self.ep_rank * self.n_local_physical_experts
+        self.physical_expert_end = (
+            self.physical_expert_start + self.n_local_physical_experts
+        )
+
+        # Router gate
+        self.gate = ReplicatedLinear(
+            config.hidden_size,
+            config.num_experts,
+            bias=False,
+            quant_config=None,
+            prefix=f"{prefix}.gate",
+        )
+
+        # Shared expert (optional) - passed to FusedMoE for overlap optimization
+        self.shared_expert: LagunaMLP | None
+        if config.shared_expert_intermediate_size > 0:
+            self.shared_expert = LagunaMLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=config.shared_expert_intermediate_size,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                reduce_results=False,  # Reduce after shared+routed combine
+                prefix=f"{prefix}.shared_expert",
+            )
+        else:
+            self.shared_expert = None
+
+        # Auxiliary-loss-free load-balancing bias (arXiv:2408.15664). The
+        # checkpoint stores one [num_experts] tensor per MoE layer at
+        # `mlp.experts.e_score_correction_bias`; registering it as a Parameter
+        # on the FusedMoE lets the weight loader pick it up and the router
+        # add it during top-k selection. The fused top-k bias router requires
+        # float32 regardless of model dtype.
+        e_score_correction_bias = torch.nn.Parameter(
+            torch.zeros(config.num_experts, dtype=torch.float32),
+            requires_grad=False,
+        )
+
+        # FusedMoE with SIGMOID routing. Passing `shared_experts=` lets the
+        # layer overlap the shared-expert compute with the all2all dispatch.
+        # `apply_routed_scale_to_output=True` makes FusedMoE handle the
+        # routed_scaling_factor, shared+routed combine, and TP all-reduce
+        # internally, so forward() just returns the final hidden states.
+        self.experts = FusedMoE(
+            shared_experts=self.shared_expert,
+            num_experts=config.num_experts,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            renormalize=config.norm_topk_prob,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            scoring_func="sigmoid",
+            use_grouped_topk=False,
+            apply_router_weight_on_input=bool(config.moe_apply_router_weight_on_input),
+            e_score_correction_bias=e_score_correction_bias,
+            enable_eplb=self.enable_eplb,
+            num_redundant_experts=self.n_redundant_experts,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        orig_shape = hidden_states.shape
+        hidden_dim = hidden_states.shape[-1]
+        hidden_states = hidden_states.view(-1, hidden_dim)
+
+        router_logits, _ = self.gate(hidden_states)
+        router_logits = router_logits.float()
+        softcap = getattr(self.config, "moe_router_logit_softcapping", 0.0) or 0.0
+        if softcap > 0.0:
+            router_logits = torch.tanh(router_logits / softcap) * softcap
+
+        final_hidden_states = self.experts(hidden_states, router_logits)
+        return final_hidden_states.view(orig_shape)
+
+
+class LagunaAttention(nn.Module):
+    """Laguna attention with optional softplus output gating.
+
+    Supports per-layer sliding window attention when ``config.layer_types``
+    is present.  Layers whose type is ``"sliding_attention"`` use
+    ``config.sliding_window``; all other layers (typically labelled
+    ``"full_attention"``) use full attention.  When ``layer_types`` is
+    absent every layer defaults to full attention for backwards
+    compatibility.
+    """
+
+    def __init__(
+        self,
+        config,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        max_position_embeddings: int = 131072,
+        head_dim: int | None = None,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        attention_sink: bool = False,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = head_dim or (hidden_size // self.total_num_heads)
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+        self.max_position_embeddings = max_position_embeddings
+
+        # Gating flag
+        self.gating = config.gating
+
+        # Per-layer sliding window (follows Gemma2/Cohere2 convention)
+        layer_types = getattr(config, "layer_types", None)
+        if layer_types is not None:
+            layer_idx = extract_layer_index(prefix)
+            is_sliding = layer_types[layer_idx] == "sliding_attention"
+            self.sliding_window = config.sliding_window if is_sliding else None
+        else:
+            self.sliding_window = None
+
+        # QKV projection (no bias for Laguna)
+        self.qkv_proj = QKVParallelLinear(
+            self.hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=config.qkv_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+
+        # Output projection
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            self.hidden_size,
+            bias=config.attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        # Gating projection (Laguna-specific, optional)
+        # config.gating may be:
+        #   - True / "per-element": one gate per (head, head_dim) channel
+        #   - "per-head":           one gate per head, broadcast across head_dim
+        if self.gating:
+            # v5 LagunaConfig uses ``gating=True`` for per-head; older configs
+            # used ``"per-head"``. Accept both. ``"per-element"`` (or legacy
+            # ``True``) means per-element gating with output size num_heads ×
+            # head_dim.
+            gate_per_head = self.gating is True or self.gating == "per-head"
+            g_out = (
+                self.total_num_heads
+                if gate_per_head
+                else self.total_num_heads * self.head_dim
+            )
+            self.g_proj = ColumnParallelLinear(
+                hidden_size,
+                g_out,
+                bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.g_proj",
+            )
+            self.gate_per_head = gate_per_head
+        else:
+            self.g_proj = None
+            self.gate_per_head = False
+
+        # Attention sinks (learnable per-head bias for SWA layers)
+        sinks = None
+        if attention_sink:
+            self.sink = torch.nn.Parameter(
+                torch.empty(self.total_num_heads // tp_size, requires_grad=False)
+            )
+            sinks = self.sink
+
+        # Resolve rope params per-layer-type. ``config.rope_parameters`` is
+        # either a flat dict (legacy) or a nested ``{layer_type: rope_dict}``
+        # (v5 Laguna-XS schema). The v5 form is unhashable as-is and would
+        # crash `get_rope`'s cache lookup, so always pull out the layer's
+        # sub-dict before forwarding.
+        layer_type = (
+            layer_types[extract_layer_index(prefix)]
+            if layer_types is not None
+            else "full_attention"
+        )
+        is_sliding = layer_type == "sliding_attention"
+
+        top_rope = getattr(config, "rope_parameters", None) or {}
+        if any(isinstance(v, dict) for v in top_rope.values()):
+            # Nested per-layer-type form.
+            base_rope = top_rope.get(layer_type) or top_rope.get("full_attention") or {}
+        else:
+            base_rope = top_rope
+
+        # Older flat-rope ckpts can carry a separate `swa_rope_parameters`
+        # for SWA layers. Prefer it when present; otherwise the nested
+        # rope dict above already supplies the correct sub-config.
+        swa_rope = getattr(config, "swa_rope_parameters", None)
+        if (
+            is_sliding
+            and swa_rope is None
+            and not any(isinstance(v, dict) for v in top_rope.values())
+        ):
+            logger.warning_once(
+                "Laguna config has sliding_attention layers but neither "
+                "`swa_rope_parameters` nor a nested per-layer-type "
+                "`rope_parameters` — SWA layers will reuse the global rope. "
+                "If the checkpoint was trained with distinct SWA rope "
+                "(theta / partial_rotary_factor), regenerate its HF config "
+                "to include either form."
+            )
+        rope_params = swa_rope if (is_sliding and swa_rope is not None) else base_rope
+        # `partial_rotary_factor` may live on the top-level config (main attention)
+        # or on the per-layer rope dict itself (e.g. SWA can differ). Inject the
+        # top-level value into `rope_params` if the dict doesn't already set it.
+        top_partial = getattr(config, "partial_rotary_factor", None)
+        if top_partial is not None and "partial_rotary_factor" not in rope_params:
+            rope_params = {**rope_params, "partial_rotary_factor": top_partial}
+
+        # Rotary embeddings (YaRN)
+        self.rotary_emb = get_rope(
+            head_size=self.head_dim,
+            max_position=max_position_embeddings,
+            is_neox_style=True,
+            rope_parameters=rope_params,
+        )
+
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            per_layer_sliding_window=self.sliding_window,
+            prefix=f"{prefix}.attn",
+            sinks=sinks,
+        )
+
+        # QK normalization (like Qwen3)
+        self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+        q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
+        q_by_head = self.q_norm(q_by_head)
+        q = q_by_head.view(q.shape)
+
+        k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
+        k_by_head = self.k_norm(k_by_head)
+        k = k_by_head.view(k.shape)
+
+        q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v)
+
+        # Apply gating if enabled (compute softplus in float32 for precision)
+        if self.gating and self.g_proj is not None:
+            gate, _ = self.g_proj(hidden_states)
+            gate = F.softplus(gate.float()).type_as(attn_output)
+            if self.gate_per_head:
+                # gate: [..., num_heads]; broadcast across head_dim
+                attn_shape = attn_output.shape
+                attn_output = (
+                    attn_output.view(*attn_shape[:-1], self.num_heads, self.head_dim)
+                    * gate.unsqueeze(-1)
+                ).view(attn_shape)
+            else:
+                attn_output = attn_output * gate
+
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class LagunaDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        enable_eplb: bool = False,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        layer_idx = extract_layer_index(prefix)
+
+        # Determine if this layer uses sliding window attention
+        layer_types = getattr(config, "layer_types", None)
+        is_sliding = (
+            layer_types is not None and layer_types[layer_idx] == "sliding_attention"
+        )
+
+        # Enable attention sinks on SWA layers when configured
+        attention_sink = is_sliding and getattr(
+            config, "swa_attention_sink_enabled", False
+        )
+
+        # Optional per-layer override of head count (Laguna-XS).
+        per_layer_heads = getattr(config, "num_attention_heads_per_layer", None)
+        layer_num_heads = (
+            per_layer_heads[layer_idx]
+            if per_layer_heads is not None
+            else config.num_attention_heads
+        )
+
+        self.self_attn = LagunaAttention(
+            config=config,
+            hidden_size=self.hidden_size,
+            num_heads=layer_num_heads,
+            num_kv_heads=config.num_key_value_heads,
+            max_position_embeddings=config.max_position_embeddings,
+            head_dim=getattr(config, "head_dim", None),
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.self_attn",
+            attention_sink=attention_sink,
+        )
+
+        # Check if this layer uses MoE or dense MLP (matches Qwen2/Qwen3 convention)
+        mlp_only_layers = (
+            [] if not hasattr(config, "mlp_only_layers") else config.mlp_only_layers
+        )
+        self.is_moe_layer = (
+            (layer_idx not in mlp_only_layers)
+            and (config.num_experts > 0)
+            and ((layer_idx + 1) % config.decoder_sparse_step == 0)
+        )
+
+        if self.is_moe_layer:
+            self.mlp = LagunaMoE(
+                config=config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+                enable_eplb=enable_eplb,
+            )
+        else:
+            self.mlp = LagunaMLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=config.intermediate_size,
+                hidden_act=config.hidden_act,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+            )
+
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # Self Attention
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+
+        # Fully Connected
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+        hidden_states = self.mlp(hidden_states)
+
+        return hidden_states, residual
+
+
+@support_torch_compile
+class LagunaModel(nn.Module, EagleModelMixin):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+        enable_eplb = vllm_config.parallel_config.enable_eplb
+        eplb_config = vllm_config.parallel_config.eplb_config
+        self.num_redundant_experts = eplb_config.num_redundant_experts
+        self.config = config
+        self.quant_config = quant_config
+
+        # Disable the model-level sliding-window fallback in Attention.__init__.
+        # Laguna drives SWA per-layer via `layer_types`, passing
+        # `per_layer_sliding_window=self.sliding_window` (None for global
+        # layers). Without this, global layers whose `per_layer_sliding_window`
+        # is None would pick up `cache_config.sliding_window`
+        # (populated from `config.sliding_window`) as a fallback, silently
+        # applying a 512-token window to full-attention layers.
+        if cache_config is not None:
+            cache_config.sliding_window = None
+
+        self.vocab_size = config.vocab_size
+
+        if get_pp_group().is_first_rank or (
+            config.tie_word_embeddings and get_pp_group().is_last_rank
+        ):
+            self.embed_tokens = VocabParallelEmbedding(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=f"{prefix}.embed_tokens",
+            )
+        else:
+            self.embed_tokens = PPMissingLayer()
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: LagunaDecoderLayer(
+                config=config,
+                cache_config=cache_config,
+                quant_config=quant_config,
+                prefix=prefix,
+                enable_eplb=enable_eplb,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+
+        if get_pp_group().is_last_rank:
+            self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        else:
+            self.norm = PPMissingLayer()
+
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_tokens(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+
+        aux_hidden_states = self._maybe_add_hidden_state(
+            [], self.start_layer, hidden_states, residual
+        )
+        for layer_idx, layer in enumerate(
+            islice(self.layers, self.start_layer, self.end_layer),
+            start=self.start_layer,
+        ):
+            hidden_states, residual = layer(positions, hidden_states, residual)
+            self._maybe_add_hidden_state(
+                aux_hidden_states, layer_idx + 1, hidden_states, residual
+            )
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+        if len(aux_hidden_states) > 0:
+            return hidden_states, aux_hidden_states
+        return hidden_states
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        """Get expert parameter mapping for weight loading.
+
+        Returns mapping tuples of (param_name, weight_name, expert_id, shard_id)
+        that handle both weights and quantization scales.
+        """
+        return fused_moe_make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.num_experts,
+            num_redundant_experts=self.num_redundant_experts,
+        )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            # gate_proj and up_proj are loaded as separate Linears (see
+            # LagunaMLP) so no merge entry is needed here.
+        ]
+
+        # Suffixes to skip for GPTQ/modelopt models if param doesn't exist
+        ignore_suffixes = (
+            ".bias",
+            "_bias",
+            ".k_scale",
+            "_k_scale",
+            ".v_scale",
+            "_v_scale",
+            ".weight_scale",
+            "_weight_scale",
+            ".input_scale",
+            "_input_scale",
+        )
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        expert_params_mapping = self.get_expert_mapping()
+
+        tp_rank = get_tensor_model_parallel_rank()
+
+        for name, loaded_weight in weights:
+            # Handle attention sinks (distributed across ranks). Derive the
+            # per-rank slice from the parameter's own shape so per-layer
+            # variations in head count are handled correctly.
+            if "sink" in name:
+                param = params_dict.get(name)
+                if param is not None:
+                    layer_heads_per_rank = param.shape[0]
+                    layer_head_start = tp_rank * layer_heads_per_rank
+                    narrow_weight = loaded_weight.narrow(
+                        0, layer_head_start, layer_heads_per_rank
+                    )
+                    param.data.copy_(narrow_weight)
+                    loaded_params.add(name)
+                continue
+
+            # Handle KV cache quantization scales
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                assert loaded_weight.numel() == 1, (
+                    f"KV scale numel {loaded_weight.numel()} != 1"
+                )
+                loaded_weight = loaded_weight.squeeze()
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            # Handle stacked params (QKV, gate_up for
+            # non-expert layers and shared_expert)
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                # Skip expert weights - handled below via expert_params_mapping
+                if "mlp.experts" in name and "shared_expert" not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+
+                if name.endswith(ignore_suffixes) and name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+                # Remap FP8 kv_scale names for backwards compatibility
+                if name.endswith("scale"):
+                    name = maybe_remap_kv_scale_name(name, params_dict)
+                    if name is None:
+                        continue
+                if name not in params_dict:
+                    continue
+
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                if weight_loader == default_weight_loader:
+                    weight_loader(param, loaded_weight)
+                else:
+                    weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                # Try expert params mapping (handles weights + quantization scales)
+                is_expert_weight = False
+                for mapping in expert_params_mapping:
+                    param_name, weight_name, expert_id, shard_id = mapping
+                    if weight_name not in name:
+                        continue
+
+                    # Mark as expert weight so we skip regular loading below
+                    is_expert_weight = True
+
+                    # Create mapped name without modifying original
+                    name_mapped = name.replace(weight_name, param_name)
+
+                    if is_pp_missing_parameter(name_mapped, self):
+                        continue
+                    if (
+                        name_mapped.endswith(ignore_suffixes)
+                        and name_mapped not in params_dict
+                    ):
+                        continue
+                    if name_mapped not in params_dict:
+                        continue
+
+                    param = params_dict[name_mapped]
+                    # Use return_success to handle expert parallelism correctly
+                    weight_loader = typing.cast(
+                        Callable[..., bool], param.weight_loader
+                    )
+                    success = weight_loader(
+                        param,
+                        loaded_weight,
+                        name_mapped,
+                        shard_id=shard_id,
+                        expert_id=expert_id,
+                        return_success=True,
+                    )
+                    if success:
+                        loaded_params.add(name_mapped)
+                        break
+                else:
+                    # Expert weight not mapped to this rank - skip
+                    if is_expert_weight:
+                        continue
+
+                    # Remap kv_scale names before the ignore_suffixes filter:
+                    # the suffix list includes .k_scale/.v_scale, so filtering
+                    # first drops the checkpoint key before remap can rewrite
+                    # it to the .attn.* name that exists in params_dict.
+                    name = maybe_remap_kv_scale_name(name, params_dict)
+                    if name is None:
+                        continue
+
+                    if name.endswith(ignore_suffixes) and name not in params_dict:
+                        continue
+
+                    if is_pp_missing_parameter(name, self):
+                        continue
+
+                    if name not in params_dict:
+                        continue
+
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+
+        return loaded_params
+
+
+class LagunaForCausalLM(nn.Module, SupportsPP, SupportsLoRA, SupportsEagle3):
+    fall_back_to_pt_during_load = False
+
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        self.model = LagunaModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+            if self.config.tie_word_embeddings:
+                self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
+        else:
+            self.lm_head = PPMissingLayer()
+
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
+        )
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/lfm2_moe.py b/vllm/model_executor/models/lfm2_moe.py
index d955b7127adc..55b00d2b9ea2 100644
--- a/vllm/model_executor/models/lfm2_moe.py
+++ b/vllm/model_executor/models/lfm2_moe.py
@@ -15,7 +15,10 @@
 )
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -150,7 +153,6 @@ def __init__(
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,  # needed for softmax score func
@@ -161,6 +163,7 @@ def __init__(
             num_redundant_experts=self.n_redundant_experts,
             scoring_func="sigmoid",
             e_score_correction_bias=self.gate.e_score_correction_bias,
+            routed_scaling_factor=self.routed_scaling_factor,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -170,16 +173,10 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
         # router_logits: (num_tokens, n_experts)
         router_logits, _ = self.gate(hidden_states)
-        final_hidden_states = (
-            self.experts(hidden_states=hidden_states, router_logits=router_logits)
-            * self.routed_scaling_factor
+        final_hidden_states = self.experts(
+            hidden_states=hidden_states, router_logits=router_logits
         )
 
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(  # noqa E501
-                final_hidden_states
-            )
-
         return final_hidden_states.view(orig_shape)
 
 
@@ -488,7 +485,7 @@ def forward(
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
diff --git a/vllm/model_executor/models/lfm2_siglip2.py b/vllm/model_executor/models/lfm2_siglip2.py
index 70ffa2afccf8..cb51c6bd8cc8 100644
--- a/vllm/model_executor/models/lfm2_siglip2.py
+++ b/vllm/model_executor/models/lfm2_siglip2.py
@@ -25,6 +25,7 @@
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 
+from .utils import maybe_prefix
 from .vision import (
     is_vit_use_data_parallel,
     resolve_visual_encoder_outputs,
@@ -472,7 +473,7 @@ def __init__(
             quant_config=quant_config,
             num_hidden_layers_override=num_hidden_layers_override,
             require_post_norm=require_post_norm,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
         )
 
     def forward(
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index 2ecced3df8ba..3c797d05e932 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -337,20 +337,15 @@ def get_quant_config(self, vllm_config: VllmConfig) -> QuantizationConfig | None
         return vllm_config.quant_config
 
 
-def llama_model_invariants(
-    input_ids, positions, intermediate_tensors=None, inputs_embeds=None
-):
-    """Shape invariants for Llama model compilation, those are translated to
-    runtime assertions for unbacked dynamic shapes and are compiled away for
-    backed"""
-    if input_ids is not None:
-        torch._check(positions.size()[0] == input_ids.size()[0])
-
-
 @support_torch_compile(
     # TODO[#32068]: Investigate recompilation
     # mark_unbacked_dims={"input_ids": 0},
-    shape_invariants=llama_model_invariants
+    dynamic_arg_dims={
+        "input_ids": {0: "b"},
+        "positions": {0: "b"},
+        "intermediate_tensors": {0: "b"},
+        "inputs_embeds": {0: "b"},
+    },
 )
 class LlamaModel(nn.Module, EagleModelMixin):
     def __init__(
diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py
index b84b4e2ae512..bfcb72a6a744 100644
--- a/vllm/model_executor/models/llama4.py
+++ b/vllm/model_executor/models/llama4.py
@@ -36,7 +36,10 @@
     Attention,
     ChunkedLocalAttention,
 )
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -127,7 +130,7 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = ""):
         self.n_physical_experts = self.n_local_experts + self.n_redundant_experts
         self.n_local_physical_experts = self.n_physical_experts // self.ep_size
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_expert,
             num_experts=config.num_local_experts,
             top_k=config.num_experts_per_tok,
@@ -135,7 +138,6 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = ""):
             custom_routing_function=Llama4MoE.custom_routing_function,
             intermediate_size=intermediate_size_moe,
             apply_router_weight_on_input=True,
-            reduce_results=False,
             renormalize=False,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -151,19 +153,14 @@ def forward(self, hidden_states):
 
         router_logits, _ = self.router(hidden_states)
 
-        shared_out, routed_out = self.experts(
+        experts_out = self.experts(
             hidden_states=hidden_states,
             router_logits=router_logits,
         )
-        experts_out = routed_out + shared_out
 
         if self.is_sequence_parallel:
             experts_out = tensor_model_parallel_all_gather(experts_out, 0)
             experts_out = experts_out[:num_tokens]
-        elif self.tp_size > 1:
-            experts_out = self.experts.maybe_all_reduce_tensor_model_parallel(
-                experts_out
-            )
 
         return experts_out
 
@@ -420,7 +417,7 @@ def load_moe_expert_weights(
             params_dict: The dictionary of module parameters.
             loaded_params: The set of already loaded parameters.
             expert_params_mapping: The mapping of expert parameters. Must be
-                generated by SharedFusedMoE.make_expert_params_mapping().
+                generated by fused_moe_make_expert_params_mapping().
             fused: Whether the expert weights are fused into a single weight
                 tensor or are separate weight tensors for each expert.
                 When fused is True, loaded_weight should have shape of:
@@ -560,7 +557,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         fused_experts_params = False
         # Expert parameter mapping for the case where the expert weights are
         # not fused into a single weight tensor.
-        expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -570,7 +567,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         )
         # Expert parameter mapping for the case where the expert weights are
         # fused into a single weight tensor.
-        expert_params_mapping_fused = SharedFusedMoE.make_expert_params_mapping(
+        expert_params_mapping_fused = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_up_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py
index 6c7b53d4d525..962377fd178d 100644
--- a/vllm/model_executor/models/llama4_eagle.py
+++ b/vllm/model_executor/models/llama4_eagle.py
@@ -176,7 +176,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         )
         self.model = LlamaModel(
             vllm_config=vllm_config,
-            prefix="model",
+            prefix=maybe_prefix(prefix, "model"),
             start_layer_id=target_layer_num,
             quant_config=quant_config,
         )
diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py
index 99a69adf1fc3..585c8f6dbd26 100644
--- a/vllm/model_executor/models/llama_eagle.py
+++ b/vllm/model_executor/models/llama_eagle.py
@@ -174,7 +174,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             vllm_config.parallel_config
         )
         self.model = LlamaModel(
-            vllm_config=vllm_config, prefix="model", start_layer_id=target_layer_num
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
         )
 
         logit_scale = getattr(self.config, "logit_scale", 1.0)
diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py
index fcec4a4d8609..9fd6652aa24f 100644
--- a/vllm/model_executor/models/llama_eagle3.py
+++ b/vllm/model_executor/models/llama_eagle3.py
@@ -145,12 +145,16 @@ def __init__(
         # Get drafter's quantization config
         self.quant_config = get_draft_quant_config(vllm_config)
 
-        eagle_config = getattr(self.config, "eagle_config", None)
-        if eagle_config is not None and "use_aux_hidden_state" in eagle_config:
+        eagle_config = getattr(self.config, "eagle_config", None) or {}
+        if "use_aux_hidden_state" in eagle_config:
             self.use_aux_hidden_state = eagle_config["use_aux_hidden_state"]
         else:
             self.use_aux_hidden_state = True
-        self.norm_before_fc = getattr(self.config, "norm_before_fc", False)
+        self.norm_before_fc = bool(
+            eagle_config.get(
+                "norm_before_fc", getattr(self.config, "norm_before_fc", False)
+            )
+        )
 
         current_vllm_config = get_current_vllm_config()
 
@@ -172,19 +176,40 @@ def __init__(
             ]
         )
         if self.use_aux_hidden_state:
-            if hasattr(self.config, "target_hidden_size"):
-                fc_input_size = self.config.target_hidden_size * 3
-            else:
-                fc_input_size = self.config.hidden_size * 3
+            self.num_aux_hidden_states = getattr(
+                self.config, "num_aux_hidden_states", None
+            )
+            if self.num_aux_hidden_states is None:
+                eagle_config = getattr(self.config, "eagle_config", None) or {}
+                layer_ids = eagle_config.get("eagle_aux_hidden_state_layer_ids")
+                self.num_aux_hidden_states = len(layer_ids) if layer_ids else 3
+
+            target_hidden_size = getattr(
+                self.config, "target_hidden_size", self.config.hidden_size
+            )
+            self.fc_input_size = target_hidden_size * self.num_aux_hidden_states
+
             if self.norm_before_fc:
                 self.input_norm = RMSNorm(
-                    fc_input_size,
+                    self.fc_input_size,
                     eps=self.config.rms_norm_eps,
                 )
             else:
                 self.input_norm = None
+
+            use_fc_norm = getattr(self.config, "fc_norm", False)
+            if use_fc_norm:
+                self.fc_norm = nn.ModuleList(
+                    [
+                        RMSNorm(target_hidden_size, eps=self.config.rms_norm_eps)
+                        for _ in range(self.num_aux_hidden_states)
+                    ]
+                )
+            else:
+                self.fc_norm = None
+
             self.fc = ReplicatedLinear(
-                input_size=fc_input_size,
+                input_size=self.fc_input_size,
                 output_size=self.config.hidden_size,
                 bias=False,
                 params_dtype=vllm_config.model_config.dtype,
@@ -192,6 +217,8 @@ def __init__(
                 prefix=maybe_prefix(prefix, "fc"),
                 return_bias=False,
             )
+
+        self.norm_output = getattr(self.config, "norm_output", False)
         self.norm = RMSNorm(
             self.config.hidden_size,
             eps=self.config.rms_norm_eps,
@@ -220,7 +247,11 @@ def forward(
                 residual=residual,
             )
         hidden_states, hidden_prenorm = self.norm(hidden_states, residual)
-        return hidden_states, hidden_prenorm
+
+        # norm_output variant uses the post-norm hidden states.
+        aux_output = hidden_states if self.norm_output else hidden_prenorm
+
+        return hidden_states, aux_output
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
@@ -287,7 +318,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         # proper layer_types indexing in draft models
         self.config.target_layer_count = target_layer_num
         self.model = LlamaModel(
-            vllm_config=vllm_config, prefix="model", start_layer_id=target_layer_num
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
         )
 
         logit_scale = getattr(self.config, "logit_scale", 1.0)
@@ -310,11 +343,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         if self.use_parallel_drafting:
             self.register_buffer(
                 "mask_hidden",
-                torch.zeros(
-                    1,
-                    (3 if self.model.use_aux_hidden_state else 1)
-                    * self.config.hidden_size,
-                ),
+                torch.zeros(1, self.model.fc_input_size),
                 persistent=False,
             )
 
@@ -369,6 +398,16 @@ def combine_hidden_states(
 
         if self.model.norm_before_fc:
             hidden_states = self.model.input_norm(hidden_states)
+
+        # `norm_before_fc` adds a single RMSNorm before the FC layer, whereas `fc_norm`
+        # applies separate RMSNorms to each chunk of the hidden states.
+        if self.model.fc_norm is not None:
+            chunks = hidden_states.chunk(self.model.num_aux_hidden_states, dim=-1)
+            hidden_states = torch.cat(
+                [norm(chunk) for norm, chunk in zip(self.model.fc_norm, chunks)],
+                dim=-1,
+            )
+
         return self.model.fc(hidden_states)
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py
index 739c90a4292b..fdd1d41af5a3 100644
--- a/vllm/model_executor/models/llava_next.py
+++ b/vllm/model_executor/models/llava_next.py
@@ -214,7 +214,7 @@ def _get_mm_fields_config(
     ) -> Mapping[str, MultiModalFieldConfig]:
         return dict(
             pixel_values=MultiModalFieldConfig.batched("image"),
-            image_sizes=MultiModalFieldConfig.batched("image"),
+            image_sizes=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
             image_embeds=MultiModalFieldConfig.batched("image"),
         )
 
diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py
index 638d9ba9d892..1beec4207d53 100644
--- a/vllm/model_executor/models/llava_onevision.py
+++ b/vllm/model_executor/models/llava_onevision.py
@@ -316,7 +316,7 @@ def _get_mm_fields_config(
     ) -> Mapping[str, MultiModalFieldConfig]:
         return dict(
             pixel_values=MultiModalFieldConfig.batched("image"),
-            image_sizes=MultiModalFieldConfig.batched("image"),
+            image_sizes=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
             image_embeds=MultiModalFieldConfig.batched("image"),
             pixel_values_videos=MultiModalFieldConfig.batched("video"),
         )
diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py
index a9e2c2268ee1..3dd1118aa8a4 100644
--- a/vllm/model_executor/models/longcat_flash.py
+++ b/vllm/model_executor/models/longcat_flash.py
@@ -46,7 +46,10 @@
 from vllm.distributed import get_pp_group
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.fused_moe import FusedMoE, ZeroExpertFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -66,6 +69,7 @@
 
 from .interfaces import SupportsLoRA, SupportsPP
 from .utils import (
+    AutoWeightsLoader,
     PPMissingLayer,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -292,17 +296,14 @@ def __init__(
             prefix=f"{prefix}.gate",
         )
 
-        assert config.zero_expert_num is not None
         assert config.zero_expert_type is not None
-        self.experts = ZeroExpertFusedMoE(
-            zero_expert_num=config.zero_expert_num,
+        self.experts = FusedMoE(
             zero_expert_type=config.zero_expert_type,
-            router=self.router,
+            e_score_correction_bias=self.router.e_score_correction_bias,
             num_experts=num_experts,
             top_k=top_k,
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
-            reduce_results=True,
             params_dtype=params_dtype,
             renormalize=False,
             quant_config=quant_config,
@@ -332,7 +333,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             hidden_states_padded.to(self.router_params_dtype)
         )
 
-        # ZeroExpertFusedMoE handles routing memoization and zero expert computation
+        # FusedMoE handles routing memoization and zero expert computation
         # internally. Pass full router_logits (including zero experts) so that
         # zero experts can be properly identified in routing.
         final_hidden_states = self.experts(
@@ -485,6 +486,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         cache_config = vllm_config.cache_config
         quant_config = vllm_config.quant_config
         self.config = config
+        self.quant_config = quant_config
 
         self.vocab_size = config.vocab_size
 
@@ -551,81 +553,10 @@ def forward(
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
-
-class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
-    """Flash model for causal language modeling."""
-
-    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
-    }
-
-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-        super().__init__()
-        config = FlashConfig(**vllm_config.model_config.hf_config.__dict__)
-        quant_config = vllm_config.quant_config
-
-        self.config = config
-        config.intermediate_size = (
-            config.ffn_hidden_size
-            if hasattr(config, "ffn_hidden_size")
-            else config.intermediate_size
-        )
-
-        self.quant_config = quant_config
-
-        self.model = FlashModel(
-            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
-        )
-
-        if get_pp_group().is_last_rank:
-            self.lm_head = ParallelLMHead(
-                config.vocab_size,
-                config.hidden_size,
-                quant_config=quant_config,
-                prefix=maybe_prefix(prefix, "lm_head"),
-            )
-        else:
-            self.lm_head = PPMissingLayer()
-
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors
-        )
-
-    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.embed_input_ids(input_ids)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor | None,
-        positions: torch.Tensor,
-        intermediate_tensors: IntermediateTensors | None = None,
-        inputs_embeds: torch.Tensor | None = None,
-    ) -> torch.Tensor | IntermediateTensors:
-        hidden_states = self.model(
-            input_ids, positions, intermediate_tensors, inputs_embeds
-        )
-        return hidden_states
-
-    def compute_logits(
-        self,
-        hidden_states: torch.Tensor,
-    ) -> torch.Tensor | None:
-        logits = self.logits_processor(self.lm_head, hidden_states)
-        return logits
-
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -730,9 +661,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             loaded_params.add(name)
         for layer_id in range(self.config.num_hidden_layers):
             for i in range(2):
-                if isinstance(self.model.layers[layer_id], PPMissingLayer):
+                if isinstance(self.layers[layer_id], PPMissingLayer):
                     continue
-                self_attn = self.model.layers[layer_id].self_attn[i]
+                self_attn = self.layers[layer_id].self_attn[i]
                 if hasattr(
                     self.quant_config, "weight_block_size"
                 ) and self_attn.kv_b_proj.weight.dtype in (
@@ -765,3 +696,81 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         self.config.hidden_size / self.config.kv_lora_rank
                     ) ** 0.5
         return loaded_params
+
+
+class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
+    """Flash model for causal language modeling."""
+
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = FlashConfig(**vllm_config.model_config.hf_config.__dict__)
+        quant_config = vllm_config.quant_config
+
+        self.config = config
+        config.intermediate_size = (
+            config.ffn_hidden_size
+            if hasattr(config, "ffn_hidden_size")
+            else config.intermediate_size
+        )
+
+        self.quant_config = quant_config
+
+        self.model = FlashModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/mimo_audio.py b/vllm/model_executor/models/mimo_audio.py
new file mode 100644
index 000000000000..91d46b1ceacd
--- /dev/null
+++ b/vllm/model_executor/models/mimo_audio.py
@@ -0,0 +1,1389 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""MiMo audio: tokenizer, encoding utilities, and audio encoder.
+
+Ported from SGLang's mimo_audio.py.
+Audio tokenizer adapted from https://github.com/XiaomiMiMo/MiMo-Audio-Tokenizer.git
+"""
+
+import dataclasses
+import json
+import logging
+import math
+import os
+import typing as tp
+from dataclasses import dataclass
+from functools import wraps
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_utils import PreTrainedModel
+from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
+from transformers.models.qwen2.modeling_qwen2 import Qwen2Model
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Vector quantization (from MiMo-Audio-Tokenizer)
+# ---------------------------------------------------------------------------
+
+
+def _vq_default(val: tp.Any, d: tp.Any) -> tp.Any:
+    return val if val is not None else d
+
+
+def _ema_inplace(moving_avg, new, decay: float):
+    if dist.is_initialized():
+        dist.all_reduce(new, op=dist.ReduceOp.SUM)
+    moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay))
+
+
+def _laplace_smoothing(x, n_categories: int, epsilon: float = 1e-5):
+    return (x + epsilon) / (x.sum() + n_categories * epsilon)
+
+
+def _uniform_init(*shape: int):
+    t = torch.empty(shape)
+    nn.init.kaiming_uniform_(t)
+    return t
+
+
+def _sample_vectors(samples, num: int):
+    num_samples, device = samples.shape[0], samples.device
+
+    if num_samples >= num:
+        indices = torch.randperm(num_samples, device=device)[:num]
+    else:
+        indices = torch.randint(0, num_samples, (num,), device=device)
+
+    selected_samples = samples[indices]
+
+    if dist.is_initialized():
+        dist.broadcast(selected_samples, src=0)
+
+    return selected_samples
+
+
+def _kmeans(samples, num_clusters: int, num_iters: int = 10):
+    dim, dtype = samples.shape[-1], samples.dtype
+
+    means = _sample_vectors(samples, num_clusters)
+
+    for _ in range(num_iters):
+        dists = -(
+            samples.pow(2).sum(1, keepdim=True)
+            - 2 * samples @ means.t()
+            + means.t().pow(2).sum(0, keepdim=True)
+        )
+
+        buckets = dists.max(dim=-1).indices
+        bins = torch.bincount(buckets, minlength=num_clusters)
+
+        new_means = buckets.new_zeros(num_clusters, dim, dtype=dtype)
+        new_means = new_means.scatter_add_(
+            0, repeat(buckets, "n -> n d", d=dim), samples
+        )
+
+        if dist.is_initialized():
+            dist.all_reduce(bins, op=dist.ReduceOp.SUM)
+            dist.all_reduce(new_means, op=dist.ReduceOp.SUM)
+
+        zero_mask = bins == 0
+        bins_min_clamped = bins.masked_fill(zero_mask, 1)
+
+        new_means = new_means / bins_min_clamped[..., None]
+
+        means = torch.where(zero_mask[..., None], means, new_means)
+
+    return means, bins
+
+
+def _rotate_half(x):
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_embed = (q * cos) + (_rotate_half(q) * sin)
+    k_embed = (k * cos) + (_rotate_half(k) * sin)
+    return q_embed, k_embed
+
+
+def _compute_default_rope_parameters(
+    config=None, device=None, seq_len=None, **rope_kwargs
+):
+    if config is not None and len(rope_kwargs) > 0:
+        raise ValueError(
+            "Unexpected arguments: `**rope_kwargs` and `config` are mutually exclusive"
+        )
+    if len(rope_kwargs) > 0:
+        base = rope_kwargs["base"]
+        dim = rope_kwargs["dim"]
+    elif config is not None:
+        base = config.rope_theta
+        partial_rotary_factor = (
+            config.partial_rotary_factor
+            if hasattr(config, "partial_rotary_factor")
+            else 1.0
+        )
+        head_dim = (
+            getattr(config, "head_dim", None)
+            or config.hidden_size // config.num_attention_heads
+        )
+        dim = int(head_dim * partial_rotary_factor)
+    attention_factor = 1.0
+    inv_freq = 1.0 / (
+        base
+        ** (
+            torch.arange(0, dim, 2, dtype=torch.int64).to(
+                device=device, dtype=torch.float
+            )
+            / dim
+        )
+    )
+    return inv_freq, attention_factor
+
+
+_ROPE_INIT_FUNCTIONS = {
+    "default": _compute_default_rope_parameters,
+}
+
+
+def _dynamic_rope_update(rope_forward):
+    def dynamic_frequency_update(self, position_ids, device):
+        seq_len = torch.max(position_ids) + 1
+        if seq_len > self.max_seq_len_cached:
+            inv_freq, self.attention_scaling = self.rope_init_fn(
+                self.config, device, seq_len=seq_len
+            )
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+            self.max_seq_len_cached = seq_len
+
+        if (
+            seq_len < self.original_max_seq_len
+            and self.max_seq_len_cached > self.original_max_seq_len
+        ):
+            self.original_inv_freq = self.original_inv_freq.to(device)
+            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+            self.max_seq_len_cached = self.original_max_seq_len
+
+    @wraps(rope_forward)
+    def wrapper(self, x, position_ids):
+        if "dynamic" in self.rope_type:
+            dynamic_frequency_update(self, position_ids, device=x.device)
+        return rope_forward(self, x, position_ids)
+
+    return wrapper
+
+
+class AudioRotaryEmbedding(nn.Module):
+    def __init__(self, base, dim, max_seq_len, rope_type="default", device=None):
+        super().__init__()
+        self.max_seq_len = max_seq_len
+        self.rope_type = rope_type
+        self.rope_init_fn = _ROPE_INIT_FUNCTIONS[self.rope_type]
+        inv_freq, self.attention_scaling = self.rope_init_fn(
+            device=device, base=base, dim=dim
+        )
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+        self.original_inv_freq = self.inv_freq
+
+    @torch.no_grad()
+    @_dynamic_rope_update
+    def forward(self, x, position_ids):
+        inv_freq_expanded = self.inv_freq[:, None].float().expand(-1, 1).to(x.device)
+        position_ids_expanded = position_ids[None, :].float()
+        device_type = (
+            x.device.type
+            if isinstance(x.device.type, str) and x.device.type != "mps"
+            else "cpu"
+        )
+        with torch.autocast(device_type=device_type, enabled=False):
+            freqs = (
+                inv_freq_expanded.float() @ position_ids_expanded.float()
+            ).transpose(0, 1)
+            emb = torch.cat((freqs, freqs), dim=-1)
+            cos = emb.cos() * self.attention_scaling
+            sin = emb.sin() * self.attention_scaling
+        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
+
+
+class EuclideanCodebook(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        codebook_size: int,
+        kmeans_init: int = False,
+        kmeans_iters: int = 10,
+        decay: float = 0.99,
+        epsilon: float = 1e-5,
+        threshold_ema_dead_code: int = 2,
+    ):
+        super().__init__()
+        self.decay = decay
+        init_fn: tp.Callable[..., torch.Tensor] | tp.Any = (
+            _uniform_init if not kmeans_init else torch.zeros
+        )
+        embed = init_fn(codebook_size, dim)
+
+        self.codebook_size = codebook_size
+        self.kmeans_iters = kmeans_iters
+        self.epsilon = epsilon
+        self.threshold_ema_dead_code = threshold_ema_dead_code
+
+        self.register_buffer("inited", torch.Tensor([not kmeans_init]))
+        self.register_buffer("cluster_size", torch.zeros(codebook_size))
+        self.register_buffer("embed", embed)
+        self.register_buffer("embed_avg", embed.clone())
+
+    @torch.jit.ignore
+    def init_embed_(self, data):
+        if self.inited:
+            return
+
+        embed, cluster_size = _kmeans(data, self.codebook_size, self.kmeans_iters)
+        self.embed.data.copy_(embed)
+        self.embed_avg.data.copy_(embed.clone())
+        self.cluster_size.data.copy_(cluster_size)
+        self.inited.data.copy_(torch.Tensor([True]))
+
+    def replace_(self, samples, mask):
+        replace_num = mask.sum()
+        modified_codebook = self.embed.clone()
+        modified_codebook[mask] = _sample_vectors(samples, replace_num)
+        self.embed.data.copy_(modified_codebook)
+
+    def expire_codes_(self, batch_samples):
+        if self.threshold_ema_dead_code == 0:
+            return
+
+        expired_codes = self.cluster_size < self.threshold_ema_dead_code
+        if not torch.any(expired_codes):
+            return
+
+        batch_samples = rearrange(batch_samples, "... d -> (...) d")
+        self.replace_(batch_samples, mask=expired_codes)
+
+    def preprocess(self, x):
+        x = rearrange(x, "... d -> (...) d")
+        return x
+
+    def quantize(self, x):
+        embed = self.embed.t()
+        dist_val = -(
+            x.pow(2).sum(1, keepdim=True)
+            - 2 * x @ embed
+            + embed.pow(2).sum(0, keepdim=True)
+        )
+        embed_ind = dist_val.max(dim=-1).indices
+        return embed_ind
+
+    def postprocess_emb(self, embed_ind, shape):
+        return embed_ind.view(*shape[:-1])
+
+    def dequantize(self, embed_ind):
+        quantize = F.embedding(embed_ind, self.embed)
+        return quantize
+
+    def encode(self, x):
+        shape = x.shape
+        x = self.preprocess(x)
+        embed_ind = self.quantize(x)
+        embed_ind = self.postprocess_emb(embed_ind, shape)
+        return embed_ind
+
+    def decode(self, embed_ind):
+        quantize = self.dequantize(embed_ind)
+        return quantize
+
+    def forward(self, x):
+        shape, dtype = x.shape, x.dtype
+        x = self.preprocess(x)
+
+        self.init_embed_(x)
+
+        embed_ind = self.quantize(x)
+        embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype)
+        embed_ind = self.postprocess_emb(embed_ind, shape)
+        quantize = self.dequantize(embed_ind)
+
+        if self.training:
+            self.expire_codes_(x)
+            _ema_inplace(self.cluster_size, embed_onehot.sum(0), self.decay)
+            embed_sum = x.t() @ embed_onehot
+            _ema_inplace(self.embed_avg, embed_sum.t().contiguous(), self.decay)
+            cluster_size = (
+                _laplace_smoothing(self.cluster_size, self.codebook_size, self.epsilon)
+                * self.cluster_size.sum()
+            )
+            embed_normalized = self.embed_avg / cluster_size.unsqueeze(1)
+            self.embed.data.copy_(embed_normalized)
+
+        return quantize, embed_ind
+
+
+class VectorQuantization(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        codebook_size: int,
+        codebook_dim: int | None = None,
+        decay: float = 0.99,
+        epsilon: float = 1e-5,
+        kmeans_init: bool = True,
+        kmeans_iters: int = 50,
+        threshold_ema_dead_code: int = 2,
+        commitment_weight: float = 1.0,
+    ):
+        super().__init__()
+        _codebook_dim: int = _vq_default(codebook_dim, dim)
+
+        requires_projection = _codebook_dim != dim
+        self.project_in = (
+            nn.Linear(dim, _codebook_dim) if requires_projection else nn.Identity()
+        )
+        self.project_out = (
+            nn.Linear(_codebook_dim, dim) if requires_projection else nn.Identity()
+        )
+
+        self.epsilon = epsilon
+        self.commitment_weight = commitment_weight
+
+        self._codebook = EuclideanCodebook(
+            dim=_codebook_dim,
+            codebook_size=codebook_size,
+            kmeans_init=kmeans_init,
+            kmeans_iters=kmeans_iters,
+            decay=decay,
+            epsilon=epsilon,
+            threshold_ema_dead_code=threshold_ema_dead_code,
+        )
+        self.codebook_size = codebook_size
+
+    @property
+    def codebook(self):
+        return self._codebook.embed
+
+    def encode(self, x):
+        x = self.project_in(x)
+        embed_in = self._codebook.encode(x)
+        return embed_in
+
+    def decode(self, embed_ind):
+        quantize = self._codebook.decode(embed_ind)
+        quantize = self.project_out(quantize)
+        return quantize
+
+    def forward(self, x):
+        device = x.device
+        x = self.project_in(x)
+
+        quantize, embed_ind = self._codebook(x)
+
+        if self.training:
+            quantize = x + (quantize - x).detach()
+
+        loss = torch.tensor([0.0], device=device, requires_grad=self.training)
+
+        quantize = self.project_out(quantize)
+        return quantize, embed_ind, loss
+
+
+class ResidualVectorQuantization(nn.Module):
+    def __init__(self, *, num_quantizers, codebook_size, **kwargs):
+        super().__init__()
+        if isinstance(codebook_size, int):
+            codebook_size = [codebook_size] * num_quantizers
+        elif len(codebook_size) < num_quantizers:
+            codebook_size += [codebook_size[-1]] * (num_quantizers - len(codebook_size))
+        self.layers = nn.ModuleList(
+            [
+                VectorQuantization(codebook_size=codebook_size[i], **kwargs)
+                for i in range(num_quantizers)
+            ]
+        )
+
+    def forward(self, x, n_q: int | None = None, layers: list | None = None):
+        quantized_out = 0.0
+        residual = x
+
+        all_losses = []
+        all_indices = []
+        out_quantized = []
+
+        n_q = n_q or len(self.layers)
+
+        for i, layer in enumerate(self.layers[:n_q]):
+            quantized, indices, loss = layer(residual)
+            residual = residual - quantized
+            quantized_out = quantized_out + quantized
+
+            all_indices.append(indices)
+            all_losses.append(loss)
+            if layers and i in layers:
+                out_quantized.append(quantized_out)
+
+        out_losses, out_indices = map(torch.stack, (all_losses, all_indices))
+        return quantized_out, out_indices, out_losses, out_quantized
+
+    def encode(
+        self, x: torch.Tensor, n_q: int | None = None, st: int | None = None
+    ) -> torch.Tensor:
+        residual = x
+        all_indices = []
+        n_q = len(self.layers) if n_q is None else n_q
+        st = 0 if st is None else st
+        for layer in self.layers[st:n_q]:
+            indices = layer.encode(residual)
+            quantized = layer.decode(indices)
+            residual = residual - quantized
+            all_indices.append(indices)
+        out_indices = torch.stack(all_indices)
+        return out_indices
+
+    def decode(self, q_indices: torch.Tensor, st: int = 0) -> torch.Tensor:
+        quantized_out = self.layers[st].decode(q_indices[0])
+        for i in range(1, len(q_indices)):
+            layer = self.layers[st + i]
+            quantized = layer.decode(q_indices[i])
+            quantized_out = quantized_out + quantized
+        return quantized_out
+
+
+class ResidualVectorQuantizer(nn.Module):
+    def __init__(
+        self,
+        dimension: int = 256,
+        n_q: int = 8,
+        bins: int | list = 1024,
+        decay: float = 0.99,
+        kmeans_init: bool = True,
+        kmeans_iters: int = 50,
+        threshold_ema_dead_code: int = 2,
+    ):
+        super().__init__()
+        self.n_q = n_q
+        self.dimension = dimension
+        self.bins = bins
+        self.decay = decay
+        self.kmeans_init = kmeans_init
+        self.kmeans_iters = kmeans_iters
+        self.threshold_ema_dead_code = threshold_ema_dead_code
+        self.vq = ResidualVectorQuantization(
+            dim=self.dimension,
+            codebook_size=self.bins,
+            num_quantizers=self.n_q,
+            decay=self.decay,
+            kmeans_init=self.kmeans_init,
+            kmeans_iters=self.kmeans_iters,
+            threshold_ema_dead_code=self.threshold_ema_dead_code,
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        n_q: int | None = None,
+        layers: list | None = None,
+    ):
+        n_q = n_q if n_q else self.n_q
+        quantized, codes, commit_loss, quantized_list = self.vq(
+            x, n_q=n_q, layers=layers
+        )
+        return quantized, codes, torch.mean(commit_loss), quantized_list
+
+    def encode(
+        self, x: torch.Tensor, n_q: int | None = None, st: int | None = None
+    ) -> torch.Tensor:
+        n_q = n_q if n_q else self.n_q
+        st = st or 0
+        codes = self.vq.encode(x, n_q=n_q, st=st)
+        return codes
+
+    def decode(self, codes: torch.Tensor, st: int = 0) -> torch.Tensor:
+        quantized = self.vq.decode(codes, st=st)
+        return quantized
+
+
+# ---------------------------------------------------------------------------
+# Audio tokenizer
+# ---------------------------------------------------------------------------
+
+
+class MiMoAudioTokenizerConfig(PretrainedConfig):
+    model_type = "mimo_audio_tokenizer"
+
+    def __init__(
+        self,
+        max_audio_seconds: int = 1800,
+        stride_size: int = 2,
+        avg_pooler: int = 1,
+        d_model: int = 768,
+        scale_embedding: bool = True,
+        kernel_size: int = 3,
+        activation_function: str = "gelu",
+        encoder_layers: int = 8,
+        encoder_skip_layer_id: int = None,
+        encoder_attention_heads: int = 12,
+        encoder_ffn_dim: int = 3072,
+        encoder_causal: bool = False,
+        encoder_attn_window_size: list = None,
+        decoder_layers: int = 8,
+        decoder_attention_heads: int = 12,
+        decoder_ffn_dim: int = 3072,
+        decoder_kernel_size: int = 3,
+        decoder_stride_size: int = 2,
+        decoder_causal: bool = True,
+        decoder_attn_window_size: list = None,
+        nfft: int = 1024,
+        vocoder_dim: int = 512,
+        vocoder_intermediate_dim: int = 4096,
+        vocoder_num_layers: int = 30,
+        n_mels: int = 80,
+        sampling_rate: int = 24000,
+        hop_length: int = 240,
+        window_size: int = 1024,
+        vocoder_padding: str = "same",
+        fmin: int = 0,
+        fmax: int = None,
+        num_quantizers: int = 12,
+        codebook_size: list = None,
+        threshold_ema_dead_code: int = 10,
+        position_embedding_type: str = "rope",
+        rope_theta: int = 10000,
+        rope_type: str = "default",
+        ln_type: str = "LayerNorm",
+        vocoder_attention_heads: int = 4,
+        vocoder_attn_window_size: list = None,
+        use_istft_only: bool = False,
+        hybrid_attention: bool = False,
+        hybrid_block_size: int = 8,
+        swa_per_block: int = 2,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.max_audio_seconds = max_audio_seconds
+        self.stride_size = stride_size
+        self.avg_pooler = avg_pooler
+        self.d_model = d_model
+        self.scale_embedding = scale_embedding
+        self.kernel_size = kernel_size
+        self.activation_function = activation_function
+        self.encoder_layers = encoder_layers
+        self.encoder_skip_layer_id = encoder_skip_layer_id
+        self.encoder_attention_heads = encoder_attention_heads
+        self.encoder_ffn_dim = encoder_ffn_dim
+        self.encoder_causal = encoder_causal
+        self.encoder_attn_window_size = (
+            encoder_attn_window_size
+            if encoder_attn_window_size is not None
+            else [-1, -1]
+        )
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.decoder_kernel_size = decoder_kernel_size
+        self.decoder_stride_size = decoder_stride_size
+        self.decoder_causal = decoder_causal
+        self.decoder_attn_window_size = (
+            decoder_attn_window_size
+            if decoder_attn_window_size is not None
+            else [-1, -1]
+        )
+        self.nfft = nfft
+        self.vocoder_dim = vocoder_dim
+        self.vocoder_intermediate_dim = vocoder_intermediate_dim
+        self.vocoder_num_layers = vocoder_num_layers
+        self.n_mels = n_mels
+        self.sampling_rate = sampling_rate
+        self.hop_length = hop_length
+        self.window_size = window_size
+        self.vocoder_padding = vocoder_padding
+        self.fmin = fmin
+        self.fmax = fmax
+        self.num_quantizers = num_quantizers
+        self.codebook_size = codebook_size if codebook_size is not None else [1024]
+        self.threshold_ema_dead_code = threshold_ema_dead_code
+        self.position_embedding_type = position_embedding_type
+        self.rope_theta = rope_theta
+        self.rope_type = rope_type
+        self.ln_type = ln_type
+        self.vocoder_attention_heads = vocoder_attention_heads
+        self.vocoder_attn_window_size = (
+            vocoder_attn_window_size
+            if vocoder_attn_window_size is not None
+            else [40, 10]
+        )
+        self.use_istft_only = use_istft_only
+        self.hybrid_attention = hybrid_attention
+        self.hybrid_block_size = hybrid_block_size
+        self.swa_per_block = swa_per_block
+
+
+def get_sequence_mask(inputs, inputs_length):
+    if inputs.dim() == 3:
+        bsz, tgt_len, _ = inputs.size()
+    else:
+        bsz, tgt_len = inputs_length.shape[0], torch.max(inputs_length)
+    sequence_mask = torch.arange(0, tgt_len).to(inputs.device)
+    sequence_mask = torch.lt(sequence_mask, inputs_length.reshape(bsz, 1)).view(
+        bsz, tgt_len, 1
+    )
+    unpacking_index = torch.cumsum(sequence_mask.to(torch.int64).view(-1), dim=0) - 1
+    return sequence_mask, unpacking_index
+
+
+def unpack_hidden_states(
+    hidden_states, lengths, sequence_mask=None, unpacking_index=None
+):
+    bsz = lengths.shape[0]
+    if sequence_mask is None or unpacking_index is None:
+        sequence_mask, unpacking_index = get_sequence_mask(hidden_states, lengths)
+    hidden_states = torch.index_select(hidden_states, 0, unpacking_index).view(
+        bsz, torch.max(lengths), hidden_states.shape[-1]
+    )
+    return torch.where(sequence_mask, hidden_states, 0)
+
+
+def get_position_ids(lengths):
+    total_len = lengths.sum()
+    offset = torch.cat([torch.zeros(1).to(lengths), lengths[:-1].cumsum(dim=0)])
+    offset = torch.repeat_interleave(offset, lengths)
+    return torch.arange(0, total_len).to(offset) - offset
+
+
+LAYER_NORM = {"LayerNorm": nn.LayerNorm}
+
+
+class AudioEncoderAttention(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        num_heads: int,
+        window_size: tuple[int, int] = (-1, -1),
+        causal: bool = False,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        self.window_size = window_size
+        self.causal = causal
+
+        self.k_proj = nn.Linear(embed_dim, embed_dim, bias=False)
+        self.v_proj = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.q_proj = nn.Linear(embed_dim, embed_dim, bias=True)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        max_seqlen: int,
+        rope_position_embeddings=None,
+    ):
+        from vllm.vllm_flash_attn import flash_attn_varlen_func
+
+        bsz, _ = hidden_states.size()
+
+        query_states = self.q_proj(hidden_states).view(
+            bsz, self.num_heads, self.head_dim
+        )
+        key_states = self.k_proj(hidden_states).view(bsz, self.num_heads, self.head_dim)
+        value_states = self.v_proj(hidden_states).view(
+            bsz, self.num_heads, self.head_dim
+        )
+
+        if rope_position_embeddings is not None:
+            cos, sin = rope_position_embeddings
+            query_states, key_states = apply_rotary_pos_emb(
+                query_states, key_states, cos, sin
+            )
+
+        attn_output = flash_attn_varlen_func(
+            query_states,
+            key_states,
+            value_states,
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_k=cu_seqlens,
+            max_seqlen_q=max_seqlen,
+            max_seqlen_k=max_seqlen,
+            causal=self.causal,
+            window_size=list(self.window_size),
+        )
+
+        attn_output = attn_output.reshape(bsz, self.embed_dim)
+        attn_output = self.out_proj(attn_output)
+        return attn_output
+
+
+class AudioEncoderTransformerLayer(nn.Module):
+    def __init__(
+        self,
+        config: MiMoAudioTokenizerConfig,
+        causal: bool,
+        attn_window_size: tuple[int, int] = (-1, -1),
+    ):
+        super().__init__()
+        self.embed_dim = config.d_model
+
+        self.self_attn = AudioEncoderAttention(
+            embed_dim=self.embed_dim,
+            num_heads=config.encoder_attention_heads,
+            window_size=attn_window_size,
+            causal=causal,
+        )
+        self.self_attn_layer_norm = LAYER_NORM[config.ln_type](self.embed_dim)
+
+        self.activation_fn = ACT2FN[config.activation_function]
+        self.fc1 = nn.Linear(self.embed_dim, config.encoder_ffn_dim)
+        self.fc2 = nn.Linear(config.encoder_ffn_dim, self.embed_dim)
+        self.final_layer_norm = LAYER_NORM[config.ln_type](self.embed_dim)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        max_seqlen: int,
+        rope_position_embeddings: tuple[torch.Tensor, torch.Tensor],
+    ) -> torch.Tensor:
+        residual = hidden_states
+        hidden_states = self.self_attn_layer_norm(hidden_states)
+        hidden_states = self.self_attn(
+            hidden_states,
+            cu_seqlens,
+            max_seqlen,
+            rope_position_embeddings=rope_position_embeddings,
+        )
+        hidden_states = residual + hidden_states
+
+        residual = hidden_states
+        hidden_states = self.final_layer_norm(hidden_states)
+        hidden_states = self.activation_fn(self.fc1(hidden_states))
+        hidden_states = self.fc2(hidden_states)
+        hidden_states = residual + hidden_states
+
+        return hidden_states
+
+
+class AudioEncoder(nn.Module):
+    def __init__(
+        self,
+        config: MiMoAudioTokenizerConfig,
+    ):
+        super().__init__()
+        self.config = config
+        self.max_source_positions = (
+            config.max_audio_seconds * config.sampling_rate // config.hop_length
+        ) // config.stride_size
+        self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
+        self.skip_layer_idx = config.encoder_skip_layer_id
+
+        self.conv1 = nn.Conv1d(
+            config.n_mels,
+            config.d_model,
+            kernel_size=config.kernel_size,
+            padding=1,
+        )
+        self.conv2 = nn.Conv1d(
+            config.d_model,
+            config.d_model,
+            kernel_size=config.kernel_size,
+            stride=config.stride_size,
+            padding=1,
+        )
+
+        self.position_embedding = AudioRotaryEmbedding(
+            config.rope_theta,
+            config.d_model // config.encoder_attention_heads,
+            self.max_source_positions,
+            config.rope_type,
+        )
+
+        attn_window_sizes = []
+        if config.hybrid_attention:
+            for i in range(config.encoder_layers):
+                if i % config.swa_per_block < config.swa_per_block - 1:
+                    attn_window_sizes.append(tuple(config.encoder_attn_window_size))
+                else:
+                    attn_window_sizes.append((-1, -1))
+        else:
+            attn_window_sizes = [
+                tuple(config.encoder_attn_window_size)
+            ] * config.encoder_layers
+
+        self.layers = nn.ModuleList(
+            [
+                AudioEncoderTransformerLayer(
+                    config=config,
+                    causal=config.encoder_causal,
+                    attn_window_size=attn_window_sizes[i],
+                )
+                for i in range(config.encoder_layers)
+            ]
+        )
+
+        self.layer_norm = LAYER_NORM[config.ln_type](config.d_model)
+
+        if config.avg_pooler != 1:
+            self.down_sample_layer = nn.Sequential(
+                nn.Conv1d(
+                    config.d_model,
+                    config.d_model,
+                    config.avg_pooler,
+                    config.avg_pooler,
+                    bias=False,
+                ),
+                nn.GELU(),
+            )
+            self.down_sample_norm = LAYER_NORM[config.ln_type](config.d_model)
+        else:
+            self.down_sample_layer = None
+
+        if config.num_quantizers != 0:
+            self.quantizer = ResidualVectorQuantizer(
+                dimension=config.d_model,
+                n_q=config.num_quantizers,
+                bins=config.codebook_size,
+                threshold_ema_dead_code=config.threshold_ema_dead_code,
+            )
+        else:
+            self.quantizer = None
+
+    def get_features(self, input_features, output_length):
+        input_features = input_features.to(self.conv1.weight)
+        inputs_embeds = nn.functional.gelu(self.conv1(input_features))
+        inputs_embeds = nn.functional.gelu(self.conv2(inputs_embeds))
+        inputs_embeds = inputs_embeds.permute(0, 2, 1)
+        bsz, tgt_len, _ = inputs_embeds.size()
+        hidden_states = inputs_embeds
+
+        position_ids = get_position_ids(output_length).long().to(input_features.device)
+        rope_position_embeddings = self.position_embedding(input_features, position_ids)
+
+        attention_mask, unpacking_index = get_sequence_mask(
+            hidden_states, output_length
+        )
+        hidden_states = torch.masked_select(hidden_states, attention_mask).view(
+            torch.sum(output_length), self.config.d_model
+        )
+
+        cu_seqlens = F.pad(
+            torch.cumsum(output_length, dim=0), (1, 0), "constant", 0
+        ).to(device=hidden_states.device, dtype=torch.int32)
+        max_seqlen = torch.max(output_length).to(torch.int32).item()
+
+        skip_connect_hidden_states = 0.0
+        for idx, encoder_layer in enumerate(self.layers):
+            hidden_states = encoder_layer(
+                hidden_states,
+                cu_seqlens,
+                max_seqlen,
+                rope_position_embeddings=rope_position_embeddings,
+            )
+            if (self.skip_layer_idx is not None) and idx == self.skip_layer_idx - 1:
+                skip_connect_hidden_states = hidden_states.clone()
+
+        hidden_states += skip_connect_hidden_states
+        hidden_states = self.layer_norm(hidden_states)
+
+        if self.down_sample_layer is not None:
+            hidden_states = torch.index_select(hidden_states, 0, unpacking_index).view(
+                bsz, tgt_len, self.config.d_model
+            )
+            if hidden_states.size(1) % self.config.avg_pooler:
+                pad_len = (
+                    self.config.avg_pooler
+                    - hidden_states.size(1) % self.config.avg_pooler
+                )
+                hidden_states = torch.nn.functional.pad(
+                    hidden_states, (0, 0, 0, pad_len), mode="constant", value=0.0
+                )
+                tgt_len += pad_len
+            tgt_len = tgt_len // self.config.avg_pooler
+            hidden_states = self.down_sample_layer(hidden_states.transpose(1, 2))
+            output_length = (
+                output_length // self.config.avg_pooler
+                + (output_length % self.config.avg_pooler != 0).int()
+            )
+            hidden_states = hidden_states.transpose(1, 2)
+            attention_mask, unpacking_index = get_sequence_mask(
+                hidden_states, output_length
+            )
+            hidden_states = torch.masked_select(hidden_states, attention_mask).view(
+                torch.sum(output_length), self.config.d_model
+            )
+            hidden_states = self.down_sample_norm(hidden_states)
+
+        return (
+            hidden_states,
+            output_length,
+            attention_mask,
+            unpacking_index,
+            tgt_len,
+            bsz,
+        )
+
+    def get_output_length(self, mel_len):
+        tgt_len = mel_len + 3 - self.config.kernel_size
+        return (tgt_len + 2 - self.config.kernel_size) // self.config.stride_size + 1
+
+    @torch.no_grad()
+    def encode(
+        self,
+        input_features,
+        input_lens=None,
+        output_length=None,
+        return_codes_only=False,
+        n_q=None,
+        use_quantizer=True,
+    ):
+        if output_length is None:
+            output_length = self.get_output_length(input_lens)
+        input_features = unpack_hidden_states(input_features, input_lens)
+        hidden_states, output_length, attention_mask, unpacking_index, tgt_len, bsz = (
+            self.get_features(
+                input_features=input_features.transpose(1, 2),
+                output_length=output_length,
+            )
+        )
+
+        dtype = hidden_states.dtype
+        if use_quantizer and self.quantizer is not None:
+            self.quantizer.float()
+            codes = self.quantizer.encode(hidden_states.float(), n_q=n_q)
+            if return_codes_only:
+                return codes, output_length
+            hidden_states = self.quantizer.decode(codes)
+            hidden_states = hidden_states.to(dtype)
+        else:
+            codes = None
+
+        hidden_states_packed = hidden_states.clone()
+        hidden_states = torch.index_select(hidden_states, 0, unpacking_index).view(
+            bsz, tgt_len, self.config.d_model
+        )
+        hidden_states = torch.where(attention_mask, hidden_states, 0)
+        return hidden_states, hidden_states_packed, output_length, codes
+
+    @torch.no_grad()
+    def decode_vq(self, codes):
+        self.quantizer.float()
+        return self.quantizer.decode(codes)
+
+
+class MiMoAudioTokenizer(PreTrainedModel):
+    config_class = MiMoAudioTokenizerConfig
+
+    def __init__(self, config: MiMoAudioTokenizerConfig):
+        super().__init__(config)
+        self.config = config
+        self.sampling_rate = config.sampling_rate
+        self.encoder = AudioEncoder(config=config)
+        self.downsample_rate = int(config.hop_length * 2 * config.avg_pooler)
+
+    def get_output_length(self, mel_len):
+        tgt_len = mel_len + 3 - self.config.kernel_size
+        return (tgt_len + 2 - self.config.kernel_size) // self.config.stride_size + 1
+
+    @torch.no_grad()
+    def encode(self, mels, input_lens, use_quantizer=True):
+        input_features = mels
+        encoder_output_length = self.get_output_length(input_lens)
+        hidden_states, hidden_states_packed, encoder_output_length, codes = (
+            self.encoder.encode(
+                input_features, input_lens=input_lens, use_quantizer=use_quantizer
+            )
+        )
+        return hidden_states, hidden_states_packed, encoder_output_length, codes
+
+
+# ---------------------------------------------------------------------------
+# Audio encoding utilities
+# ---------------------------------------------------------------------------
+
+
+def group_by_length(features: torch.Tensor, lengths: torch.Tensor, max_length: int):
+    if features.size(0) != lengths.sum().item():
+        raise ValueError(
+            f"Feature size mismatch: {features.size(0)} vs {lengths.sum().item()}"
+        )
+
+    split_points = []
+    current_sum = 0
+
+    for i, seq_len in enumerate(lengths):
+        if current_sum + seq_len > max_length and current_sum > 0:
+            split_points.append(i)
+            current_sum = seq_len.item()
+        else:
+            current_sum += seq_len.item()
+
+    group_sizes = []
+    prev = 0
+    for point in split_points:
+        group_sizes.append(point - prev)
+        prev = point
+    if prev < len(lengths):
+        group_sizes.append(len(lengths) - prev)
+
+    len_groups = torch.split(lengths, group_sizes)
+    feature_sizes = [group.sum().item() for group in len_groups]
+    feature_groups = torch.split(features, feature_sizes)
+
+    return feature_groups, len_groups
+
+
+@torch.no_grad()
+def encode_batch(
+    audio_tokenizer_encoder,
+    input_features: torch.Tensor,
+    input_lens: torch.Tensor,
+    max_length: int = 256000,
+):
+    feature_groups, len_groups = group_by_length(input_features, input_lens, max_length)
+
+    encoded_parts = []
+    for features, lengths in zip(feature_groups, len_groups):
+        codes, _ = audio_tokenizer_encoder.encode(
+            input_features=features, input_lens=lengths, return_codes_only=True
+        )
+        encoded_parts.append(codes)
+
+    return torch.cat(encoded_parts, dim=-1)
+
+
+def _segment_lengths_for_mel(mel: torch.Tensor, segment_size: int):
+    """Split mel into segments of segment_size with a possible shorter remainder."""
+    input_len = mel.size(0)
+    segs = [segment_size] * (input_len // segment_size)
+    if input_len % segment_size > 0:
+        segs.append(input_len % segment_size)
+    return segs
+
+
+@torch.no_grad()
+def tokenize_audio_batch(mels, audio_tokenizer_encoder, segment_size=6000, device=None):
+    """Tokenize multiple mels in one encode_batch call.
+
+    Returns list of code tensors, each [T_i, C] for that mel.
+    """
+    if not mels:
+        return []
+    if device is None:
+        device = next(audio_tokenizer_encoder.parameters()).device
+    input_len_seg_per_mel = [_segment_lengths_for_mel(m, segment_size) for m in mels]
+    input_lens_flat = [s for segs in input_len_seg_per_mel for s in segs]
+    input_features = torch.cat([m.to(device) for m in mels], dim=0)
+    input_lens_t = torch.tensor(input_lens_flat, dtype=torch.long, device=device)
+    codes_packed = encode_batch(
+        audio_tokenizer_encoder,
+        input_features=input_features,
+        input_lens=input_lens_t,
+    )
+    codes = codes_packed.transpose(0, 1).detach()  # [total_code_T, C]
+    code_lengths = []
+    for segs in input_len_seg_per_mel:
+        out_len = audio_tokenizer_encoder.get_output_length(
+            torch.tensor(segs, dtype=torch.long, device=device)
+        )
+        if getattr(audio_tokenizer_encoder, "down_sample_layer", None) is not None:
+            avg = audio_tokenizer_encoder.config.avg_pooler
+            out_len = out_len // avg + (out_len % avg != 0).long()
+        code_lengths.append(out_len.sum().item())
+    code_list = torch.split(codes, code_lengths)
+    return list(code_list)
+
+
+# ---------------------------------------------------------------------------
+# MimoAudioEncoderConfig
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class MimoAudioEncoderConfig:
+    """Config for MimoAudioEncoder.
+
+    Field names match the audio_config dict in the model checkpoint.
+    """
+
+    speech_vocab_size: str = "1025-1025-129-129-129-129-129-129"
+    speech_zeroemb_idx: str = "1024-1024-128-128-128-128-128-128"
+    group_size: int = 4
+    audio_channels: int = 8
+    input_local_layers: int = 6
+    input_local_dim: int = 1024
+    input_full_attention: bool = True
+    input_local_attn_heads: int = 64
+    input_local_head_dim: int = 16
+    input_local_intermediate_size: int = 4096
+    input_local_hidden_dropout: float = 0.0
+    out_hidden_size: int = 4096
+    rope_theta: float = 640000.0
+    partial_rotary_factor: float = 0.334
+    projection_layers: int = 1
+    add_post_norm: bool = False
+    audio_segment_size: int = 6000
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "MimoAudioEncoderConfig":
+        known = {f.name for f in dataclasses.fields(cls)}
+        return cls(**{k: v for k, v in d.items() if k in known})
+
+
+# ---------------------------------------------------------------------------
+# AudioProjection
+# ---------------------------------------------------------------------------
+
+
+class AudioProjection(nn.Module):
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        output_size: int,
+    ) -> None:
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.Linear(input_size, hidden_size, bias=False),
+            nn.GELU(),
+            nn.Linear(hidden_size, output_size, bias=False),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.mlp(x)
+
+
+# ---------------------------------------------------------------------------
+# MimoAudioEncoder
+# ---------------------------------------------------------------------------
+
+
+class MimoAudioEncoder(nn.Module):
+    """Audio encoder for MiMo-V2-Omni.
+
+    Encodes mel spectrograms into LLM-compatible embeddings via:
+      1. Audio tokenizer (VQ codes)
+      2. Speech embeddings lookup
+      3. Local Qwen2 transformer
+      4. Linear projection
+    """
+
+    def __init__(self, config, model_path: str = "") -> None:
+        super().__init__()
+        if isinstance(config, dict):
+            config = MimoAudioEncoderConfig.from_dict(config)
+        self.config = config
+        self.audio_channels = config.audio_channels
+        self.audio_group_size = config.group_size
+        self.audio_segment_size = config.audio_segment_size
+
+        speech_vocab_sizes = self._parse_maybe_list(
+            config.speech_vocab_size, config.audio_channels
+        )
+        speech_empty_ids = self._parse_maybe_list(
+            config.speech_zeroemb_idx, config.audio_channels
+        )
+
+        input_local_config = Qwen2Config(
+            hidden_size=config.input_local_dim,
+            num_hidden_layers=config.input_local_layers,
+            num_attention_heads=config.input_local_attn_heads,
+            num_key_value_heads=config.input_local_attn_heads,
+            intermediate_size=config.input_local_intermediate_size,
+            attention_dropout=config.input_local_hidden_dropout,
+            rope_theta=config.rope_theta,
+            partial_rotary_factor=config.partial_rotary_factor,
+        )
+
+        self.input_local_transformer = Qwen2Model(input_local_config)
+
+        if not config.add_post_norm:
+            self.input_local_transformer.norm = nn.Identity()
+
+        self.speech_embeddings = nn.ModuleList(
+            [
+                nn.Embedding(
+                    speech_vocab_sizes[i],
+                    config.input_local_dim,
+                    padding_idx=speech_empty_ids[i],
+                )
+                for i in range(config.audio_channels)
+            ]
+        )
+
+        if config.projection_layers == 1:
+            self.projection = nn.Linear(
+                config.input_local_dim * config.group_size,
+                config.out_hidden_size,
+                bias=False,
+            )
+        elif config.projection_layers == 2:
+            self.projection = AudioProjection(
+                config.input_local_dim * config.group_size,
+                config.input_local_dim * config.group_size * 4,
+                config.out_hidden_size,
+            )
+        else:
+            raise ValueError(f"Invalid projection_layers: {config.projection_layers}")
+
+        self.audio_tokenizer: MiMoAudioTokenizer | None = None
+        if model_path:
+            audio_tokenizer_path = os.path.join(model_path, "audio_tokenizer")
+            if os.path.exists(audio_tokenizer_path):
+                dev = torch.get_default_device()
+                self.audio_tokenizer = self._load_audio_tokenizer(
+                    audio_tokenizer_path, dev
+                )
+            else:
+                logger.warning(
+                    "Audio tokenizer not found at %s, audio encoding disabled",
+                    audio_tokenizer_path,
+                )
+
+    @staticmethod
+    def _load_audio_tokenizer(path: str, device: torch.device) -> MiMoAudioTokenizer:
+        """Load MiMoAudioTokenizer from directory."""
+        from safetensors.torch import load_file
+
+        config_path = os.path.join(path, "config.json")
+        with open(config_path) as f:
+            config_dict = json.load(f)
+        config = MiMoAudioTokenizer.config_class(**config_dict)
+        model = MiMoAudioTokenizer(config)
+        safetensors_path = os.path.join(path, "model.safetensors")
+        bin_path = os.path.join(path, "pytorch_model.bin")
+        if os.path.exists(safetensors_path):
+            state_dict = load_file(safetensors_path, device="cpu")
+        elif os.path.exists(bin_path):
+            state_dict = torch.load(bin_path, map_location="cpu", weights_only=True)
+        else:
+            raise FileNotFoundError(
+                f"No model weights found in {path} "
+                "(expected model.safetensors or pytorch_model.bin)"
+            )
+        model.load_state_dict(state_dict, strict=False)
+        model = model.to(device=device, dtype=torch.bfloat16)
+        model.eval()
+        model.requires_grad_(False)
+        return model
+
+    def _parse_maybe_list(self, value, length: int) -> list[int]:
+        if isinstance(value, str) and "-" in value:
+            return [int(s) for s in value.split("-")]
+        return [int(value)] * length
+
+    def apply_input_local_transformer(self, speech_embeddings: torch.Tensor):
+        output = self.input_local_transformer(
+            inputs_embeds=speech_embeddings,
+            return_dict=True,
+            is_causal=not self.config.input_full_attention,
+        )
+        return output.last_hidden_state
+
+    def apply_speech_embeddings(self, audio_codes: torch.Tensor) -> torch.Tensor:
+        num_segments = audio_codes.shape[0]
+        _audio_embeddings = torch.zeros(
+            (num_segments, self.config.group_size, self.config.input_local_dim),
+            dtype=next(self.speech_embeddings[0].parameters()).dtype,
+            device=audio_codes.device,
+        )
+        for i in range(self.config.audio_channels):
+            _audio_embeddings.add_(self.speech_embeddings[i](audio_codes[:, :, i]))
+        return _audio_embeddings
+
+    def process_audio(self, audio: torch.Tensor) -> torch.Tensor:
+        """Pad audio codes to group_size boundary.
+
+        Args:
+            audio: [T, audio_channels] code tensor
+
+        Returns:
+            [T//group_size, group_size, audio_channels]
+        """
+        T = audio.shape[0]
+        audio = audio[:, : self.audio_channels]
+        padded_T = (
+            (T + self.audio_group_size - 1)
+            // self.audio_group_size
+            * self.audio_group_size
+        )
+        padded_audio = torch.cat(
+            [
+                audio,
+                torch.zeros(
+                    padded_T - T,
+                    self.audio_channels,
+                    dtype=torch.int32,
+                    device=audio.device,
+                )
+                + audio[-1, :],
+            ],
+            dim=0,
+        )
+        padded_audio = padded_audio.reshape(
+            padded_T // self.audio_group_size,
+            self.audio_group_size,
+            self.audio_channels,
+        )
+        return padded_audio
+
+    def get_audio_feature(
+        self, mel_specs: list[torch.Tensor]
+    ) -> tuple[torch.Tensor, list[int]]:
+        """Encode mel spectrograms into LLM embedding space.
+
+        Args:
+            mel_specs: list of mel spectrogram tensors, each [T, n_mels]
+
+        Returns:
+            Tuple of:
+            - audio_embeds: [total_tokens, out_hidden_size] concatenated embeddings
+            - item_token_lens: list of int, number of tokens per input item
+        """
+        if self.audio_tokenizer is None:
+            raise RuntimeError(
+                "audio_tokenizer is not loaded. "
+                "Ensure model_path points to a directory containing audio_tokenizer/."
+            )
+
+        if not mel_specs:
+            device = next(self.projection.parameters()).device
+            dtype = next(self.projection.parameters()).dtype
+            return (
+                torch.empty(0, self.config.out_hidden_size, device=device, dtype=dtype),
+                [],
+            )
+
+        device = next(self.audio_tokenizer.encoder.parameters()).device
+        code_list = tokenize_audio_batch(
+            mel_specs,
+            self.audio_tokenizer.encoder,
+            segment_size=self.audio_segment_size,
+            device=device,
+        )
+
+        item_token_lens: list[int] = []
+        codecs_to_concat = []
+        for codecs in code_list:
+            padded_codes = self.process_audio(codecs)
+            codecs_to_concat.append(padded_codes)
+            item_token_lens.append(padded_codes.shape[0])
+
+        audio_codes = torch.cat(
+            codecs_to_concat, dim=0
+        )  # [total_T//group_size, group_size, audio_channels]
+
+        _audio_embeddings = self.apply_speech_embeddings(audio_codes)
+        audio_embeds = self.apply_input_local_transformer(_audio_embeddings)
+        B = audio_embeds.shape[0]
+        audio_embeds = self.projection(audio_embeds.reshape(B, -1))
+        return audio_embeds, item_token_lens
diff --git a/vllm/model_executor/models/mimo_v2_flash.py b/vllm/model_executor/models/mimo_v2.py
similarity index 94%
rename from vllm/model_executor/models/mimo_v2_flash.py
rename to vllm/model_executor/models/mimo_v2.py
index 43475ed690c9..3f466162649c 100644
--- a/vllm/model_executor/models/mimo_v2_flash.py
+++ b/vllm/model_executor/models/mimo_v2.py
@@ -6,6 +6,7 @@
 import torch
 from torch import nn
 
+from vllm.compilation.decorators import support_torch_compile
 from vllm.config import (
     CacheConfig,
     VllmConfig,
@@ -22,7 +23,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -43,6 +47,9 @@
 from vllm.model_executor.models.utils import sequence_parallel_chunk
 from vllm.sequence import IntermediateTensors
 from vllm.v1.attention.backend import AttentionType
+from vllm.v1.attention.backends.flash_attn_diffkv import (
+    FlashAttentionDiffKVBackend,
+)
 
 from .interfaces import MixtureOfExperts, SupportsPP
 from .utils import (
@@ -162,7 +169,6 @@ def __init__(
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=True,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -263,7 +269,7 @@ def __init__(
             self.total_num_heads * self.v_head_dim,
             hidden_size,
             bias=False,
-            quant_config=quant_config,
+            quant_config=quant_config if "mtp.layers" not in prefix else None,
             reduce_results=True,
             prefix=f"{prefix}.o_proj",
         )
@@ -285,6 +291,15 @@ def __init__(
         )
 
         sliding_window = sliding_window_size if sliding_window_size > -1 else None
+
+        # Use DiffKV backend when V has a different head dim than K
+        if self.v_head_dim != self.head_dim:
+            FlashAttentionDiffKVBackend.set_head_size_v(self.v_head_dim)
+            attn_backend = FlashAttentionDiffKVBackend
+            logger.info_once("Using FlashAttentionDiffKVBackend for attention.")
+        else:
+            attn_backend = None
+
         self.attn = Attention(
             self.num_heads,
             self.head_dim,
@@ -296,6 +311,8 @@ def __init__(
             attn_type=AttentionType.DECODER,
             prefix=f"{prefix}.attn",
             sinks=self.attention_sink_bias,
+            attn_backend=attn_backend,
+            head_size_v=self.v_head_dim,
         )
 
     def forward(
@@ -311,16 +328,8 @@ def forward(
         if self.v_scale is not None:
             v = v * self.v_scale
 
-        v = v.view(-1, self.num_kv_heads, self.v_head_dim)
-        v = torch.nn.functional.pad(v, [0, self.head_dim - self.v_head_dim], value=0)
-        v = v.view(-1, self.num_kv_heads * self.head_dim)
-
         attn_output = self.attn(q, k, v)
 
-        attn_output = attn_output.view(-1, self.num_heads, self.head_dim)[
-            ..., : self.v_head_dim
-        ].reshape(-1, self.num_heads * self.v_head_dim)
-
         output, _ = self.o_proj(attn_output)
         return output
 
@@ -432,6 +441,7 @@ def is_compressed_softmax_layer(self) -> bool:
         return self.config.hybrid_layer_pattern[self.layer_id] == 1
 
 
+@support_torch_compile
 class MiMoV2Model(nn.Module):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
@@ -512,7 +522,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -595,7 +605,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
             if expert_matched:
                 continue
-
+            # Support fused qkv_proj checkpoint (Pro format)
+            if "qkv_proj" in name:
+                if name in params_dict:
+                    param = params_dict[name]
+                    loaded_weight = loaded_weight.chunk(tp_size, dim=0)[tp_rank]
+                    default_weight_loader(param, loaded_weight)
+                continue
             stacked_matched = False
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
@@ -654,6 +670,11 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
 
 class MiMoV2FlashForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
         config = vllm_config.model_config.hf_config
@@ -710,3 +731,10 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self)
         return loader.load_weights(weights)
+
+
+class MiMoV2ForCausalLM(MiMoV2FlashForCausalLM):
+    packed_modules_mapping = {
+        "qkv_proj": ["qkv_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
diff --git a/vllm/model_executor/models/mimo_v2_mtp.py b/vllm/model_executor/models/mimo_v2_mtp.py
new file mode 100644
index 000000000000..c863cedaeb88
--- /dev/null
+++ b/vllm/model_executor/models/mimo_v2_mtp.py
@@ -0,0 +1,366 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Inference-only MiMo-V2 MTP (Multi-Token Prediction) draft model.
+
+Supports both MiMo-V2-Pro and MiMo-V2-Flash checkpoints.
+
+Checkpoint weight layout (model.mtp.layers.{idx}.*):
+  enorm            - RMSNorm for token embeddings
+  hnorm            - RMSNorm for previous hidden states
+  eh_proj          - ReplicatedLinear(hidden*2 -> hidden)
+  input_layernorm  - pre-attention RMSNorm
+  self_attn.*      - attention weights; format differs by variant:
+                       Pro:   fused qkv_proj  [Q;K;V] concatenated
+                       Flash: separate q_proj, k_proj, v_proj
+  pre_mlp_layernorm - post-attention / pre-MLP RMSNorm
+  mlp.*            - dense MLP (gate_proj / up_proj / down_proj)
+  final_layernorm  - norm applied before logit computation
+"""
+
+from collections.abc import Iterable
+
+import torch
+import torch.nn as nn
+from transformers import PretrainedConfig
+
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import ReplicatedLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.sequence import IntermediateTensors
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    _require_is_multimodal,
+)
+from .mimo_v2 import MiMoV2Attention, MiMoV2MLP
+from .utils import _merge_multimodal_embeddings, maybe_prefix
+
+# MiMo-V2 checkpoints contain multiple MTP layers, but vLLM currently supports
+# only the first layer
+_MIMO_V2_PRO_NUM_MTP_LAYERS = 1
+_MIMO_V2_FLASH_NUM_MTP_LAYERS = 1
+
+
+class MiMoV2MTPLayer(nn.Module):
+    """Single MTP predictor layer for MiMo-V2 (Pro and Flash).
+
+    Mirrors the single-layer MiMo-V2 nextn reference implementation.
+    """
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        prefix: str,
+        quant_config: QuantizationConfig | None = None,
+    ) -> None:
+        super().__init__()
+
+        # Predictor head components
+        self.enorm = RMSNorm(config.hidden_size, eps=config.layernorm_epsilon)
+        self.hnorm = RMSNorm(config.hidden_size, eps=config.layernorm_epsilon)
+        self.eh_proj = ReplicatedLinear(
+            config.hidden_size * 2, config.hidden_size, bias=False
+        )
+
+        # MTP uses the SWA attention configuration
+        # implementation.
+        swa_rope_theta = getattr(
+            config,
+            "swa_rope_theta",
+            getattr(config, "rope_theta", 1000000),
+        )
+        sliding_window_size = getattr(config, "sliding_window_size", -1)
+
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.layernorm_epsilon)
+        self.self_attn = MiMoV2Attention(
+            hidden_size=config.hidden_size,
+            num_heads=config.swa_num_attention_heads,
+            num_kv_heads=config.swa_num_key_value_heads,
+            head_dim=config.swa_head_dim,
+            v_head_dim=getattr(config, "swa_v_head_dim", None),
+            v_scale=getattr(config, "attention_value_scale", None),
+            sliding_window_size=sliding_window_size,
+            attention_bias=config.attention_bias,
+            add_swa_attention_sink_bias=getattr(
+                config, "add_swa_attention_sink_bias", False
+            ),
+            layer_id=0,
+            rope_theta=swa_rope_theta,
+            max_position_embeddings=getattr(config, "max_position_embeddings", 32768),
+            quant_config=quant_config,
+            partial_rotary_factor=getattr(config, "partial_rotary_factor", 1.0),
+            prefix=f"{prefix}.self_attn",
+        )
+        self.pre_mlp_layernorm = RMSNorm(
+            config.hidden_size, eps=config.layernorm_epsilon
+        )
+        self.mlp = MiMoV2MLP(
+            hidden_size=config.hidden_size,
+            intermediate_size=config.intermediate_size,
+            hidden_act=config.hidden_act,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+        self.final_layernorm = RMSNorm(config.hidden_size, eps=config.layernorm_epsilon)
+
+    def forward(
+        self,
+        inputs_embeds: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        # Combine token embedding and previous hidden state
+        h, _ = self.eh_proj(
+            torch.cat(
+                [self.enorm(inputs_embeds), self.hnorm(previous_hidden_states)], dim=-1
+            )
+        )
+
+        # Transformer block with fused residual norms
+        residual = h
+        h = self.input_layernorm(h)
+        h = self.self_attn(positions=positions, hidden_states=h)
+        h, residual = self.pre_mlp_layernorm(h, residual)
+        h = self.mlp(h)
+        h = h + residual
+
+        return self.final_layernorm(h)
+
+
+class _MiMoV2MTPLayers(nn.Module):
+    """Thin wrapper so parameter paths match checkpoint: model.mtp.layers.*"""
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        num_mtp_layers: int,
+        quant_config: QuantizationConfig | None,
+        prefix: str,
+    ) -> None:
+        super().__init__()
+        self.layers = nn.ModuleDict(
+            {
+                str(i): MiMoV2MTPLayer(
+                    config=config,
+                    prefix=f"{prefix}.{i}",
+                    quant_config=quant_config,
+                )
+                for i in range(num_mtp_layers)
+            }
+        )
+
+
+class MiMoV2MultiTokenPredictor(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        spec_cfg = vllm_config.speculative_config
+        assert spec_cfg is not None
+        num_mtp_layers = 1
+
+        self.num_mtp_layers = num_mtp_layers
+
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+        )
+
+        self.mtp = _MiMoV2MTPLayers(
+            config=config,
+            num_mtp_layers=num_mtp_layers,
+            quant_config=vllm_config.quant_config,
+            prefix=maybe_prefix(prefix, "mtp.layers"),
+        )
+
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_input_ids(input_ids)
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        return self.mtp.layers[str(current_step_idx)](
+            inputs_embeds, positions, previous_hidden_states
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        lm_head: ParallelLMHead,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        return self.logits_processor(lm_head, hidden_states)
+
+
+class MiMoV2MTP(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+        self.config = vllm_config.model_config.hf_config
+        self.model = MiMoV2MultiTokenPredictor(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        self.lm_head = ParallelLMHead(
+            self.config.vocab_size,
+            self.config.hidden_size,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        return self.model(
+            input_ids, positions, hidden_states, inputs_embeds, spec_step_idx
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor | None:
+        return self.model.compute_logits(hidden_states, self.lm_head, spec_step_idx)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        tp_rank = get_tensor_model_parallel_rank()
+        tp_size = get_tensor_model_parallel_world_size()
+
+        stacked_params_mapping = [
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+            # Flash format: separate projections → fused qkv_proj
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            # Only load MTP-related weights, shared embeddings, and lm_head
+            if (
+                "model.mtp" not in name
+                and "model.embed_tokens" not in name
+                and not name.startswith("lm_head")
+            ):
+                continue
+
+            # Support fused qkv_proj checkpoint (Pro format).
+            # The checkpoint is stored pre-sharded for TP=8 as
+            # [Q_rank0, K_rank0, V_rank0, Q_rank1, ...], so splitting along
+            # dim 0 with chunk(tp_size) gives each rank its Q+K+V slice for
+            # both the FP8 weight and the block weight_scale_inv. This matches
+            # how the main model loads the same layout.
+            if "qkv_proj" in name:
+                if name in params_dict:
+                    param = params_dict[name]
+                    loaded_weight = loaded_weight.chunk(tp_size, dim=0)[tp_rank]
+                    default_weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+                continue
+
+            # gate_proj/up_proj → gate_up_proj stacking (both formats);
+            # Flash: q_proj/k_proj/v_proj → qkv_proj merging.
+            stacked_matched = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name_rewritten = name.replace(weight_name, param_name)
+                if (
+                    name_rewritten.endswith(".bias")
+                    and name_rewritten not in params_dict
+                ):
+                    continue
+                if name_rewritten not in params_dict:
+                    continue
+                param = params_dict[name_rewritten]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name_rewritten)
+                stacked_matched = True
+                break
+
+            if stacked_matched:
+                continue
+
+            if name.endswith(".bias") and name not in params_dict:
+                continue
+            if name not in params_dict:
+                continue
+
+            param = params_dict[name]
+            # attention_sink_bias is head-parallel; slice by tp
+            if "attention_sink_bias" in name:
+                total_heads = loaded_weight.shape[0]
+                heads_per_rank = total_heads // tp_size
+                loaded_weight = loaded_weight.narrow(
+                    0, tp_rank * heads_per_rank, heads_per_rank
+                )
+
+            weight_loader = getattr(param, "weight_loader", default_weight_loader)
+            weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+
+class MiMoV2OmniMTP(MiMoV2MTP, SupportsMultiModal):
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        inputs_embeds = self._embed_text_input_ids(
+            input_ids,
+            self.model.embed_input_ids,
+            is_multimodal=is_multimodal,
+        )
+
+        if multimodal_embeddings is None or len(multimodal_embeddings) == 0:
+            return inputs_embeds
+
+        is_multimodal = _require_is_multimodal(is_multimodal)
+
+        inputs_embeds = _merge_multimodal_embeddings(
+            inputs_embeds=inputs_embeds,
+            multimodal_embeddings=multimodal_embeddings,
+            is_multimodal=is_multimodal,
+        )
+
+        return inputs_embeds
diff --git a/vllm/model_executor/models/mimo_v2_omni.py b/vllm/model_executor/models/mimo_v2_omni.py
new file mode 100644
index 000000000000..1cd2c6919a34
--- /dev/null
+++ b/vllm/model_executor/models/mimo_v2_omni.py
@@ -0,0 +1,1488 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import math
+from collections.abc import Callable, Iterable, Mapping, Sequence
+from functools import partial
+from typing import Any
+
+import einops
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import BatchFeature, PretrainedConfig
+from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.distributed import parallel_state
+from vllm.distributed import utils as dist_utils
+from vllm.inputs import MultiModalDataDict
+from vllm.model_executor.layers.activation import get_act_and_mul_fn
+from vllm.model_executor.layers.attention import MMEncoderAttention
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.rotary_embedding.common import ApplyRotaryEmb
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.vision import is_vit_use_data_parallel
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import MultiModalFieldConfig, MultiModalKwargsItems
+from vllm.multimodal.parse import ImageSize, MultiModalDataItems
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    PromptReplacement,
+    PromptUpdate,
+    PromptUpdateDetails,
+)
+from vllm.transformers_utils.configs.mimo_v2_omni import Mimo_VLVisionConfig
+from vllm.transformers_utils.processors.mimo_v2_omni import (
+    MiMoOmniProcessor,
+    VideoAudioInput,
+    _format_timestamp,
+)
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    SupportsPP,
+    SupportsQuant,
+)
+from .mimo_audio import MimoAudioEncoder
+from .mimo_v2 import MiMoV2FlashForCausalLM
+from .qwen2_5_vl import (
+    Qwen2_5_VisionMLP,
+    Qwen2_5_VisionPatchEmbed,
+    Qwen2_5_VLImageEmbeddingInputs,
+    Qwen2_5_VLImageInputs,
+    Qwen2_5_VLImagePixelInputs,
+    Qwen2_5_VLVideoEmbeddingInputs,
+    Qwen2_5_VLVideoInputs,
+    Qwen2_5_VLVideoPixelInputs,
+)
+from .qwen2_vl import _create_qwen2vl_field_factory
+from .utils import AutoWeightsLoader, IntermediateTensors, WeightsMapper, maybe_prefix
+
+
+class MiMoVisionMLP(Qwen2_5_VisionMLP):
+    pass
+
+
+class MiMoVisionPatchEmbed(Qwen2_5_VisionPatchEmbed):
+    pass
+
+
+class MiMoVisionPatchMerger(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        context_dim: int,
+        norm_layer: Callable[[int], nn.Module] | None = None,
+        spatial_merge_size: int = 2,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        use_data_parallel = is_vit_use_data_parallel()
+        self.hidden_size = context_dim * (spatial_merge_size**2)
+        if norm_layer is None:
+            norm_layer = partial(nn.LayerNorm, eps=1e-6)
+        self.ln_q = norm_layer(context_dim)
+
+        self.mlp = nn.Sequential(
+            ColumnParallelLinear(
+                self.hidden_size,
+                self.hidden_size,
+                bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp.0",
+                return_bias=False,
+                disable_tp=use_data_parallel,
+            ),
+            nn.GELU(),
+            RowParallelLinear(
+                self.hidden_size,
+                d_model,
+                bias=False,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp.2",
+                return_bias=False,
+                disable_tp=use_data_parallel,
+            ),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ln_q(x)
+        x = x.view(-1, self.hidden_size)
+        out = self.mlp(x)
+        return out
+
+
+class MiMoVisionAttention(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        num_heads: int,
+        num_kv_heads: int,
+        qk_channels: int,
+        kv_channels: int,
+        use_sink: bool = False,
+        visual_token_window_size: int = 64,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        use_data_parallel = is_vit_use_data_parallel()
+        self.tp_size = (
+            1
+            if use_data_parallel
+            else parallel_state.get_tensor_model_parallel_world_size()
+        )
+        self.tp_rank = parallel_state.get_tensor_model_parallel_rank()
+
+        self.num_heads = num_heads
+        self.num_kv_heads = num_kv_heads
+        self.qk_channels = qk_channels
+        self.kv_channels = kv_channels
+        self.embed_dim = embed_dim
+
+        self.num_heads_per_partition = dist_utils.divide(num_heads, self.tp_size)
+        self.num_kv_heads_per_partition = dist_utils.divide(num_kv_heads, self.tp_size)
+
+        # Attention scale uses the Q/K head dimension (qk_channels)
+        self.scale = qk_channels**-0.5
+
+        # QKV: Q is (num_heads * qk_channels), KV are (num_kv_heads * kv_channels)
+        self.qkv = QKVParallelLinear(
+            hidden_size=embed_dim,
+            head_size=qk_channels,
+            total_num_heads=num_heads,
+            total_num_kv_heads=num_kv_heads,
+            v_head_size=kv_channels,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv",
+            disable_tp=use_data_parallel,
+        )
+
+        # Output projection: input is (num_heads * kv_channels) after attention
+        self.proj = RowParallelLinear(
+            input_size=num_heads * kv_channels,
+            output_size=embed_dim,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.proj",
+            disable_tp=use_data_parallel,
+        )
+
+        # For full attention (non-window blocks)
+        self.attn = MMEncoderAttention(
+            num_heads=self.num_heads_per_partition,
+            head_size=kv_channels,
+            scale=self.scale,
+            num_kv_heads=self.num_kv_heads_per_partition,
+            prefix=f"{prefix}.attn",
+        )
+
+        # Rotary embeddings applied separately to Q and K
+        self.apply_rotary_emb = ApplyRotaryEmb(enforce_enable=True)
+
+        # Sink attention weights (loaded but not used in vLLM flash_attn)
+        # The checkpoint stores these only for non-full-attention blocks
+        self.use_sink = use_sink
+        if use_sink:
+            self.sinks = nn.Parameter(
+                torch.empty(num_heads),
+                requires_grad=False,
+            )
+        else:
+            self.sinks = None
+
+        self.visual_token_window_size = visual_token_window_size
+
+    def _forward_window_attn(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        max_seqlen: torch.Tensor,
+    ) -> torch.Tensor:
+        """Window attention via flash_attn_varlen_func with window_size."""
+        from vllm.vllm_flash_attn import flash_attn_varlen_func
+
+        w = self.visual_token_window_size
+        output = flash_attn_varlen_func(
+            q,
+            k,
+            v,
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_k=cu_seqlens,
+            max_seqlen_q=max_seqlen,
+            max_seqlen_k=max_seqlen,
+            softmax_scale=self.scale,
+            causal=False,
+            window_size=[w, w],
+        )
+        return output
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+        max_seqlen: torch.Tensor,
+        full_attn: bool = True,
+    ) -> torch.Tensor:
+        """
+        Args:
+            x: [seq_len, batch=1, embed_dim]  (seq-first convention)
+            cu_seqlens: cumulative sequence lengths [num_seqs+1], int32
+            rotary_pos_emb_cos: [seq_len, qk_channels // 2]
+            rotary_pos_emb_sin: [seq_len, qk_channels // 2]
+            max_seqlen: maximum sequence length
+            full_attn: if True, full attention; if False, window attention
+        """
+        # [seq_len, 1, embed_dim] -> QKV projection
+        qkv, _ = self.qkv(x)  # [seq_len, 1, q_size + kv_size + kv_size]
+        seq_len, batch_size, _ = qkv.shape
+
+        q_size = self.num_heads_per_partition * self.qk_channels
+        kv_size = self.num_kv_heads_per_partition * self.kv_channels
+        q, k, v = qkv.split([q_size, kv_size, kv_size], dim=-1)
+
+        # Rearrange to [batch, seq, head, head_dim] for rotary application
+        q = einops.rearrange(q, "s b (h d) -> b s h d", h=self.num_heads_per_partition)
+        k = einops.rearrange(
+            k, "s b (h d) -> b s h d", h=self.num_kv_heads_per_partition
+        )
+        v = einops.rearrange(
+            v, "s b (h d) -> b s h d", h=self.num_kv_heads_per_partition
+        )
+
+        # Apply rotary embeddings to Q and K independently (handles GQA)
+        if rotary_pos_emb_cos is not None and rotary_pos_emb_sin is not None:
+            q = self.apply_rotary_emb(q, rotary_pos_emb_cos, rotary_pos_emb_sin)
+            k = self.apply_rotary_emb(k, rotary_pos_emb_cos, rotary_pos_emb_sin)
+
+        if full_attn:
+            # Full attention via MMEncoderAttention
+            # Flatten to [batch, seq, heads * head_dim]
+            q_flat = q.reshape(batch_size, seq_len, -1)
+            k_flat = k.reshape(batch_size, seq_len, -1)
+            v_flat = v.reshape(batch_size, seq_len, -1)
+            context_layer = self.attn(
+                query=q_flat,
+                key=k_flat,
+                value=v_flat,
+                cu_seqlens=cu_seqlens,
+                max_seqlen=max_seqlen,
+            )
+            # context_layer: [batch, seq, num_heads, head_dim] or [batch, seq, hidden]
+            # Ensure shape is [seq, batch, num_heads * kv_channels]
+            if context_layer.dim() == 4:
+                context_layer = einops.rearrange(
+                    context_layer, "b s h d -> s b (h d)"
+                ).contiguous()
+            else:
+                context_layer = einops.rearrange(
+                    context_layer, "b s d -> s b d"
+                ).contiguous()
+        else:
+            # Window attention via flash_attn_varlen_func with window_size
+            # Flatten batch dimension: [seq, head, head_dim]
+            q_varlen = einops.rearrange(q, "b s h d -> (b s) h d")
+            k_varlen = einops.rearrange(k, "b s h d -> (b s) h d")
+            v_varlen = einops.rearrange(v, "b s h d -> (b s) h d")
+            output = self._forward_window_attn(
+                q_varlen, k_varlen, v_varlen, cu_seqlens, max_seqlen
+            )
+            # output: [total_tokens, num_heads, kv_channels]
+            context_layer = einops.rearrange(
+                output, "(b s) h d -> s b (h d)", b=batch_size
+            ).contiguous()
+
+        output, _ = self.proj(context_layer)
+        return output
+
+
+class MiMoVisionBlock(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        num_kv_heads: int,
+        qk_channels: int,
+        kv_channels: int,
+        mlp_hidden_dim: int,
+        act_fn: Callable[[torch.Tensor], torch.Tensor] = F.silu,
+        norm_eps: float = 1e-6,
+        use_sink: bool = False,
+        visual_token_window_size: int = 64,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.norm1 = RMSNorm(dim, eps=norm_eps)
+        self.norm2 = RMSNorm(dim, eps=norm_eps)
+        self.attn = MiMoVisionAttention(
+            embed_dim=dim,
+            num_heads=num_heads,
+            num_kv_heads=num_kv_heads,
+            qk_channels=qk_channels,
+            kv_channels=kv_channels,
+            use_sink=use_sink,
+            visual_token_window_size=visual_token_window_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+        self.mlp = MiMoVisionMLP(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_fn=act_fn,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+        max_seqlen: torch.Tensor,
+        full_attn: bool = True,
+    ) -> torch.Tensor:
+        # x: [seq_len, batch=1, dim]
+        x_attn = self.attn(
+            self.norm1(x),
+            cu_seqlens=cu_seqlens,
+            rotary_pos_emb_cos=rotary_pos_emb_cos,
+            rotary_pos_emb_sin=rotary_pos_emb_sin,
+            max_seqlen=max_seqlen,
+            full_attn=full_attn,
+        )
+        # Fused residual add + norm2
+        x_norm, residual = self.norm2(x, residual=x_attn)
+        x = residual + self.mlp(x_norm)
+        return x
+
+
+class MiMoVisionTransformer(nn.Module):
+    def __init__(
+        self,
+        vision_cfg: PretrainedConfig,
+        *,
+        norm_eps: float = 1e-6,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.spatial_merge_size = vision_cfg.spatial_merge_size
+        self.spatial_merge_unit = self.spatial_merge_size**2
+        self.fullatt_block_indexes = vision_cfg.fullatt_block_indexes
+        self.vit_window_attn_types = vision_cfg.vit_window_attn_types
+        self.visual_token_window_size = vision_cfg.visual_token_window_size
+        self.hidden_size = vision_cfg.hidden_size
+        self.num_heads = vision_cfg.num_heads
+        self.num_kv_heads = vision_cfg.num_key_value_heads
+        self.qk_channels = vision_cfg.qk_channels
+        self.kv_channels = vision_cfg.kv_channels
+
+        self.patch_embed = MiMoVisionPatchEmbed(
+            patch_size=vision_cfg.patch_size,
+            temporal_patch_size=vision_cfg.temporal_patch_size,
+            in_channels=vision_cfg.in_channels,
+            hidden_size=vision_cfg.hidden_size,
+        )
+
+        norm_layer = partial(RMSNorm, eps=norm_eps)
+
+        # Rotary embedding for 2D positions.
+        # With partial_rotary_factor=0.5 and head_size=qk_channels:
+        #   rotary_dim = qk_channels // 2
+        #   get_cos_sin returns cos, sin each of shape [pos, rotary_dim // 2]
+        # After indexing with 2D pos_ids and flattening:
+        #   result shape = [tokens, rotary_dim] = [tokens, qk_channels // 2]
+        # which is what ApplyRotaryEmb expects as cos/sin input.
+        self.rotary_pos_emb = get_rope(
+            head_size=vision_cfg.qk_channels,
+            max_position=8192,
+            is_neox_style=True,
+            rope_parameters={"partial_rotary_factor": 0.5},
+        )
+
+        self.blocks = nn.ModuleList(
+            [
+                MiMoVisionBlock(
+                    dim=vision_cfg.hidden_size,
+                    num_heads=vision_cfg.num_heads,
+                    num_kv_heads=vision_cfg.num_key_value_heads,
+                    qk_channels=vision_cfg.qk_channels,
+                    kv_channels=vision_cfg.kv_channels,
+                    mlp_hidden_dim=vision_cfg.intermediate_size,
+                    act_fn=get_act_and_mul_fn(vision_cfg.hidden_act),
+                    norm_eps=norm_eps,
+                    use_sink=(
+                        vision_cfg.use_sink
+                        and i not in vision_cfg.fullatt_block_indexes
+                    ),
+                    visual_token_window_size=vision_cfg.visual_token_window_size,
+                    quant_config=quant_config,
+                    prefix=f"{prefix}.blocks.{i}",
+                )
+                for i in range(vision_cfg.depth)
+            ]
+        )
+
+        self.merger = MiMoVisionPatchMerger(
+            d_model=vision_cfg.out_hidden_size,
+            context_dim=vision_cfg.hidden_size,
+            norm_layer=norm_layer,
+            spatial_merge_size=vision_cfg.spatial_merge_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.merger",
+        )
+
+    @property
+    def dtype(self) -> torch.dtype:
+        return self.patch_embed.proj.weight.dtype
+
+    @property
+    def device(self) -> torch.device:
+        return self.patch_embed.proj.weight.device
+
+    def apply_index(self, tensor: torch.Tensor, index: torch.Tensor) -> torch.Tensor:
+        """Reindex tensor at the spatial_merge_unit granularity."""
+        tensor = tensor.unflatten(0, (-1, self.spatial_merge_unit))
+        tensor = tensor[index]
+        tensor = tensor.flatten(0, 1)
+        return tensor
+
+    def get_window_index_1d(
+        self, grid_thw: torch.Tensor, col: bool = True
+    ) -> torch.Tensor:
+        """Compute 1D window indices for col-based or row-based SWA reordering."""
+        window_index: list[torch.Tensor] = []
+        window_index_id = 0
+        for grid_t, grid_h, grid_w in grid_thw:
+            llm_grid_h = grid_h // self.spatial_merge_size
+            llm_grid_w = grid_w // self.spatial_merge_size
+            index = torch.arange(grid_t * llm_grid_h * llm_grid_w).reshape(
+                grid_t, llm_grid_h, llm_grid_w
+            )
+            index_new = index.transpose(1, 2).reshape(-1) if col else index.reshape(-1)
+            window_index.append(index_new + window_index_id)
+            window_index_id += int((grid_t * llm_grid_h * llm_grid_w).item())
+        return torch.cat(window_index, dim=0)
+
+    def rot_pos_emb(self, grid_thw: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """Compute 2D rotary position embedding cos/sin for given grid sizes.
+
+        Returns:
+            cos: [total_tokens, qk_channels // 2]
+            sin: [total_tokens, qk_channels // 2]
+        """
+        cos_list, sin_list = [], []
+        for i in range(grid_thw.size(0)):
+            t, h, w = int(grid_thw[i, 0]), int(grid_thw[i, 1]), int(grid_thw[i, 2])
+
+            # Build 2D position IDs with spatial_merge_size interleaving
+            hpos_ids = torch.arange(h).unsqueeze(1).expand(-1, w)
+            hpos_ids = (
+                hpos_ids.reshape(
+                    h // self.spatial_merge_size,
+                    self.spatial_merge_size,
+                    w // self.spatial_merge_size,
+                    self.spatial_merge_size,
+                )
+                .permute(0, 2, 1, 3)
+                .flatten()
+            )
+            wpos_ids = torch.arange(w).unsqueeze(0).expand(h, -1)
+            wpos_ids = (
+                wpos_ids.reshape(
+                    h // self.spatial_merge_size,
+                    self.spatial_merge_size,
+                    w // self.spatial_merge_size,
+                    self.spatial_merge_size,
+                )
+                .permute(0, 2, 1, 3)
+                .flatten()
+            )
+            pos_ids = torch.stack([hpos_ids, wpos_ids], dim=-1).repeat(t, 1)
+            # pos_ids: [t*h*w, 2]
+
+            max_grid_size = max(h, w)
+            # get_cos_sin returns cos, sin each of shape [max_grid_size, rotary_dim//2]
+            # where rotary_dim = qk_channels // 2 (from partial_rotary_factor=0.5)
+            cos, sin = self.rotary_pos_emb.get_cos_sin(max_grid_size)
+
+            # [t*h*w, 2, rotary_dim//2] -> [t*h*w, rotary_dim] (= qk_channels // 2)
+            cos_img = cos[pos_ids].flatten(1)
+            sin_img = sin[pos_ids].flatten(1)
+            cos_list.append(cos_img)
+            sin_list.append(sin_img)
+
+        return torch.cat(cos_list, dim=0), torch.cat(sin_list, dim=0)
+
+    def forward(self, x: torch.Tensor, grid_thw: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: [total_tokens, C] pre-flattened patches
+            grid_thw: [num_images, 3] tensor of (t, h, w) for each image/video
+        Returns:
+            [merged_tokens, out_hidden_size]
+        """
+        # Ensure grid_thw is a tensor
+        if not isinstance(grid_thw, torch.Tensor):
+            grid_thw = torch.tensor(grid_thw, dtype=torch.long)
+
+        # Move to visual model device/dtype
+        x = x.to(device=self.device, dtype=self.dtype)
+
+        # Patch embedding: [total_tokens, hidden_size]
+        x = self.patch_embed(x)
+
+        # Compute 2D rotary positional embeddings
+        # cos, sin: [total_tokens, qk_channels // 2]
+        rotary_cos, rotary_sin = self.rot_pos_emb(grid_thw)
+        rotary_cos = rotary_cos.to(device=x.device)
+        rotary_sin = rotary_sin.to(device=x.device)
+
+        # Compute cu_seqlens for flash_attn (per-image/video sequence lengths)
+        seqlens = torch.repeat_interleave(
+            grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
+        )
+        cu_seqlens = torch.cat(
+            [
+                torch.tensor([0], device=x.device, dtype=torch.int32),
+                seqlens.cumsum(dim=0).to(device=x.device, dtype=torch.int32),
+            ]
+        )
+        max_seqlen = seqlens.max()
+
+        # Precompute col-based window index for type=1 (col SWA) layers
+        window_index_1d_col = self.get_window_index_1d(grid_thw, col=True).to(
+            device=x.device
+        )
+        reverse_window_index_1d_col = torch.argsort(window_index_1d_col)
+
+        # Col-based rotary embeddings (reordered at spatial_merge_unit granularity).
+        # apply_index reorders groups of spatial_merge_unit tokens, just like x.
+        col_cos = self.apply_index(rotary_cos, window_index_1d_col)
+        col_sin = self.apply_index(rotary_sin, window_index_1d_col)
+
+        # Add batch dimension: [total_tokens, 1, hidden_size]
+        x = x.unsqueeze(1)
+
+        for i, blk in enumerate(self.blocks):
+            window_attn_type = self.vit_window_attn_types[i]
+
+            # Reorder tokens to col-based layout when entering col-SWA region
+            if window_attn_type == 1 and (
+                i == 0 or self.vit_window_attn_types[i - 1] != 1
+            ):
+                x = self.apply_index(x, window_index_1d_col)
+
+            # Restore row-based order when leaving col-SWA region
+            if (
+                i > 0
+                and window_attn_type != 1
+                and self.vit_window_attn_types[i - 1] == 1
+            ):
+                x = self.apply_index(x, reverse_window_index_1d_col)
+
+            # Use col-based embeddings for col-SWA layers
+            cos_now = col_cos if window_attn_type == 1 else rotary_cos
+            sin_now = col_sin if window_attn_type == 1 else rotary_sin
+
+            full_attn = i in self.fullatt_block_indexes
+            x = blk(
+                x,
+                cu_seqlens=cu_seqlens,
+                rotary_pos_emb_cos=cos_now,
+                rotary_pos_emb_sin=sin_now,
+                max_seqlen=max_seqlen,
+                full_attn=full_attn,
+            )
+
+        # Restore row-based order if last block was col-SWA
+        if self.vit_window_attn_types[-1] == 1:
+            x = self.apply_index(x, reverse_window_index_1d_col)
+
+        # Remove batch dim and merge spatial tokens
+        # x: [total_tokens, 1, hidden_size] -> [total_tokens, hidden_size]
+        x = x.squeeze(1)
+        x = self.merger(x)
+        return x
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            ("mlp.gate_up_proj", "mlp.gate_proj", 0),
+            ("mlp.gate_up_proj", "mlp.up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
+        loaded_params: set[str] = set()
+
+        for name, loaded_weight in weights:
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
+
+class MiMoV2OmniProcessingInfo(BaseProcessingInfo):
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"audio": None, "image": None, "video": None}
+
+    def get_hf_config(self):
+        config = self.ctx.get_hf_config()
+        if isinstance(config.vision_config, dict):
+            config.vision_config = Mimo_VLVisionConfig.from_dict(config.vision_config)
+        return config
+
+    def get_hf_processor(self, **kwargs: object) -> MiMoOmniProcessor:
+        hf_config = self.get_hf_config()
+        tokenizer = self.get_tokenizer()
+        return MiMoOmniProcessor.from_hf_config(tokenizer, hf_config)
+
+    def get_image_processor(self, **kwargs: object):
+        return self.get_hf_processor(**kwargs).image_processor
+
+    def get_data_parser(self):
+        from vllm.multimodal.parse import MultiModalDataParser
+
+        return MultiModalDataParser(target_sr=24000.0)
+
+    def get_mm_max_tokens_per_item(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> Mapping[str, int]:
+        return {
+            "image": self.get_max_image_tokens(),
+            "video": self.get_max_video_tokens(seq_len, mm_counts),
+        }
+
+    def _get_vision_info(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+        num_frames: int = 1,
+        do_resize: bool = True,
+        image_processor,
+        mm_kwargs: Mapping[str, object],
+    ) -> tuple[ImageSize, int]:
+        hf_config = self.get_hf_config()
+        vision_config = hf_config.vision_config
+        patch_size = vision_config.patch_size
+        merge_size = vision_config.spatial_merge_size
+        temporal_patch_size = vision_config.temporal_patch_size
+        tokens_per_second = vision_config.tokens_per_second
+
+        mm_kwargs = self.ctx.get_merged_mm_kwargs(mm_kwargs)
+        size = image_processor.size
+        if override_size := mm_kwargs.get("size"):
+            size = size | override_size
+        if (override_min_pixels := mm_kwargs.get("min_pixels")) is not None:
+            size = size | {"shortest_edge": override_min_pixels}
+        if (override_max_pixels := mm_kwargs.get("max_pixels")) is not None:
+            size = size | {"longest_edge": override_max_pixels}
+
+        if do_resize:
+            resized_height, resized_width = smart_resize(
+                height=image_height,
+                width=image_width,
+                factor=patch_size * merge_size,
+                min_pixels=size["shortest_edge"],
+                max_pixels=size["longest_edge"],
+            )
+            preprocessed_size = ImageSize(width=resized_width, height=resized_height)
+        else:
+            preprocessed_size = ImageSize(width=image_width, height=image_height)
+
+        # For video, MiMo resamples to tokens_per_second fps before temporal patching,
+        # effective tokens = num_frames * tokens_per_second / temporal_patch_size.
+        # For images (num_frames == 1) no resampling is applied.
+        if num_frames > 1:
+            effective_frames = num_frames * tokens_per_second
+        else:
+            effective_frames = num_frames
+        padded_num_frames = effective_frames + effective_frames % temporal_patch_size
+        grid_t = max(padded_num_frames // temporal_patch_size, 1)
+        grid_h = preprocessed_size.height // patch_size
+        grid_w = preprocessed_size.width // patch_size
+        num_patches = grid_t * grid_h * grid_w
+        num_vision_tokens = num_patches // (merge_size**2)
+        return preprocessed_size, num_vision_tokens
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+        image_processor,
+        mm_kwargs: Mapping[str, object],
+    ) -> int:
+        _, num_image_tokens = self._get_vision_info(
+            image_width=image_width,
+            image_height=image_height,
+            num_frames=1,
+            image_processor=image_processor,
+            mm_kwargs=mm_kwargs,
+        )
+        return num_image_tokens
+
+    def get_num_video_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+        num_frames: int,
+        image_processor,
+        mm_kwargs: Mapping[str, object],
+    ) -> int:
+        _, num_video_tokens = self._get_vision_info(
+            image_width=image_width,
+            image_height=image_height,
+            num_frames=num_frames,
+            image_processor=image_processor,
+            mm_kwargs=mm_kwargs,
+        )
+        return num_video_tokens
+
+    def get_image_size_with_most_features(
+        self, max_pixels: int | None = None
+    ) -> ImageSize:
+        hf_config = self.get_hf_config()
+        vision_config = hf_config.vision_config
+        patch_size = vision_config.patch_size
+        merge_size = vision_config.spatial_merge_size
+
+        if max_pixels is None:
+            image_processor = self.get_image_processor()
+            mm_kwargs = self.ctx.get_merged_mm_kwargs({})
+            size = image_processor.size
+            if override_size := mm_kwargs.get("size"):
+                size = size | override_size
+            if (override_min_pixels := mm_kwargs.get("min_pixels")) is not None:
+                size = size | {"shortest_edge": override_min_pixels}
+            if (override_max_pixels := mm_kwargs.get("max_pixels")) is not None:
+                size = size | {"longest_edge": override_max_pixels}
+            max_pixels = size["longest_edge"]
+
+        unit = patch_size * merge_size
+        max_seq_len = max_pixels // (unit * unit)
+
+        def closest_factor_pair(n: int) -> tuple[int, int]:
+            for d in range(math.isqrt(n), 0, -1):
+                if n % d == 0:
+                    return d, n // d
+            return 1, n
+
+        height_factor, width_factor = 1, max_seq_len
+        for seq_len in range(max_seq_len, 0, -1):
+            height_factor, width_factor = closest_factor_pair(seq_len)
+            if width_factor / height_factor <= 200:
+                break
+
+        return ImageSize(width=unit * width_factor, height=unit * height_factor)
+
+    def get_max_image_tokens(self) -> int:
+        image_processor = self.get_image_processor()
+        target_width, target_height = self.get_image_size_with_most_features()
+        return self.get_num_image_tokens(
+            image_width=target_width,
+            image_height=target_height,
+            image_processor=image_processor,
+            mm_kwargs={},
+        )
+
+    def _get_max_video_frames(self, max_tokens: int, start_num_frames: int = 1) -> int:
+        image_processor = self.get_image_processor()
+        target_width, target_height = self.get_image_size_with_most_features()
+        num_frames = start_num_frames
+        while True:
+            next_num_frames = num_frames + 1
+            next_max_tokens = self.get_num_video_tokens(
+                image_width=target_width,
+                image_height=target_height,
+                num_frames=next_num_frames,
+                image_processor=image_processor,
+                mm_kwargs={},
+            )
+            if next_max_tokens > max_tokens:
+                break
+            num_frames = next_num_frames
+        return num_frames
+
+    def get_num_frames_with_most_features(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        max_frames_per_video: int = 14,
+    ) -> int:
+        max_videos = mm_counts.get("video", 0)
+        max_total_frames = self._get_max_video_frames(seq_len)
+        max_frames_per_video = min(
+            max_total_frames // max(max_videos, 1), max_frames_per_video
+        )
+        return max(max_frames_per_video, 1)
+
+    def get_max_video_tokens(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> int:
+        image_processor = self.get_image_processor()
+        target_width, target_height = self.get_image_size_with_most_features()
+        return self.get_num_video_tokens(
+            image_width=target_width,
+            image_height=target_height,
+            num_frames=self.get_num_frames_with_most_features(seq_len, mm_counts),
+            image_processor=image_processor,
+            mm_kwargs={},
+        )
+
+
+class MiMoV2OmniMultiModalProcessor(BaseMultiModalProcessor[MiMoV2OmniProcessingInfo]):
+    """vLLM multimodal processor for MiMo-Omni (image + video).
+
+    Key differences from Qwen2.5-VL:
+    - Videos use timestamp tokens between temporal grid positions.
+    - The HF processor expects ``(TCHW_tensor, timestamps_T_tensor)`` video
+      tuples rather than plain numpy arrays.
+    - ``video_start_times`` is tracked so prompt-update reconstruction can
+      regenerate the exact same timestamp token IDs.
+    """
+
+    # fps assumed for vllm-decoded video (numpy T,H,W,C arrays).
+    # The video loader samples ~32 frames; treat each frame as 1 s apart so
+    # MiMoVLProcessor sees 1 fps input and resamples internally.
+    _INPUT_FPS: float = 1.0
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        merge_size = self.info.get_hf_config().vision_config.spatial_merge_size
+        fields: dict[str, MultiModalFieldConfig] = dict(
+            **_create_qwen2vl_field_factory(merge_size)(hf_inputs),
+            second_per_grid_ts=MultiModalFieldConfig.batched("video"),
+            video_start_times=MultiModalFieldConfig.batched("video"),
+            audio_features=MultiModalFieldConfig.batched("audio"),
+            audio_token_lens=MultiModalFieldConfig.batched("audio"),
+        )
+        # video_audio fields: only present when video_audio content was processed
+        if "video_audio_n_segs" in hf_inputs:
+            fields["video_audio_n_segs"] = MultiModalFieldConfig.batched("video")
+        # video_audio_seg_lens: list of per-video 1D tensors, batched("video")
+        if "video_audio_seg_lens" in hf_inputs:
+            fields["video_audio_seg_lens"] = MultiModalFieldConfig.batched("video")
+        if "va_audio_features" in hf_inputs:
+            fields["va_audio_features"] = MultiModalFieldConfig.batched("va_audio")
+        return fields
+
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        """Convert numpy video arrays to (TCHW, timestamps) tuples for MiMo.
+        Also remap 'audios' → 'audio' since MiMoOmniProcessor.__call__ uses
+        the singular form.
+        """
+        # Remap audios → audio (MiMoOmniProcessor uses singular param name)
+        if "audios" in mm_data:
+            mm_data = {**mm_data, "audio": mm_data["audios"]}
+            mm_data = {k: v for k, v in mm_data.items() if k != "audios"}
+
+        # Handle video_audio items: convert video part to (TCHW, timestamps) tuple
+        if "video_audio" in mm_data:
+            va_converted: list[VideoAudioInput] = []
+            for va_item in mm_data["video_audio"]:
+                if isinstance(va_item, VideoAudioInput):
+                    vid = va_item.video
+                else:
+                    # Expect (video_frames, audio_source) tuple
+                    vid, audio_src = va_item
+                    va_item = VideoAudioInput(video=vid, audio=audio_src)
+                    vid = vid
+                # Convert video frames to (TCHW, timestamps) if needed
+                if (
+                    isinstance(vid, tuple)
+                    and len(vid) == 2
+                    and isinstance(vid[0], torch.Tensor)
+                    and isinstance(vid[1], torch.Tensor)
+                ):
+                    va_converted.append(va_item)
+                else:
+                    if isinstance(vid, np.ndarray):
+                        frames = torch.from_numpy(vid)
+                    elif isinstance(vid, torch.Tensor):
+                        frames = vid
+                    else:
+                        frames = torch.tensor(np.array(vid))
+                    if frames.ndim == 4 and frames.shape[-1] in (1, 3, 4):
+                        frames = frames.permute(0, 3, 1, 2).float()
+                    else:
+                        frames = frames.float()
+                    T = frames.shape[0]
+                    timestamps = torch.arange(T, dtype=torch.float32) / self._INPUT_FPS
+                    va_converted.append(
+                        VideoAudioInput(
+                            video=(frames, timestamps),
+                            audio=va_item.audio,
+                        )
+                    )
+            mm_data = {**mm_data, "video_audio": va_converted}
+
+        if "videos" in mm_data:
+            converted: list[tuple[torch.Tensor, torch.Tensor]] = []
+            for video in mm_data["videos"]:
+                if (
+                    isinstance(video, tuple)
+                    and len(video) == 2
+                    and isinstance(video[0], torch.Tensor)
+                    and isinstance(video[1], torch.Tensor)
+                ):
+                    # already in MiMo format
+                    converted.append(video)
+                else:
+                    # numpy (T, H, W, C) or torch (T, H, W, C) / (T, C, H, W)
+                    if isinstance(video, np.ndarray):
+                        frames = torch.from_numpy(video)
+                    elif isinstance(video, torch.Tensor):
+                        frames = video
+                    else:
+                        frames = torch.tensor(np.array(video))
+
+                    if frames.ndim == 4 and frames.shape[-1] in (1, 3, 4):
+                        # THWC → TCHW
+                        frames = frames.permute(0, 3, 1, 2).float()
+                    else:
+                        frames = frames.float()
+
+                    T = frames.shape[0]
+                    timestamps = torch.arange(T, dtype=torch.float32) / self._INPUT_FPS
+                    converted.append((frames, timestamps))
+
+            mm_data = {**mm_data, "videos": converted}
+
+        return super()._call_hf_processor(prompt, mm_data, mm_kwargs, tok_kwargs)
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, Any],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptUpdate]:
+        hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
+        hf_config = self.info.get_hf_config()
+        tokenizer = self.info.get_tokenizer()
+        vocab = tokenizer.get_vocab()
+
+        merge_size = hf_config.vision_config.spatial_merge_size
+        p = hf_processor.mimo_processor
+
+        image_pad_id = vocab[hf_processor.image_token]
+        video_pad_id = vocab[hf_processor.video_token]
+        audio_pad_id = vocab.get("<|audio_pad|>")
+        vision_start_id = p.vision_start_token_id
+        vision_end_id = p.vision_end_token_id
+        video_start_id = p.video_start_token_id
+        video_end_id = p.video_end_token_id
+        audio_start_id = p.audio_start_token_id
+        audio_end_id = p.audio_end_token_id
+
+        def get_image_replacement(item_idx: int) -> PromptUpdateDetails:
+            out_item = out_mm_kwargs["image"][item_idx]
+            grid_thw = out_item["image_grid_thw"].data
+            n_tokens = int(grid_thw.prod()) // merge_size**2
+            return [image_pad_id] * n_tokens
+
+        def get_video_replacement(item_idx: int) -> PromptUpdateDetails:
+            out_item = out_mm_kwargs["video"][item_idx]
+            grid_thw = out_item["video_grid_thw"].data
+            spt = float(out_item["second_per_grid_ts"].data)
+            start = float(out_item["video_start_times"].data)
+
+            T, H, W = map(int, grid_thw)
+            n_per_grid = H * W // (merge_size * merge_size)
+
+            # Check if this is a video_audio item
+            n_segs_field = out_item.get("video_audio_n_segs")
+            n_segs_val = int(n_segs_field.data) if n_segs_field is not None else 0
+            va_seg_lens: list[int] | None = None
+            if n_segs_val > 0:
+                seg_lens_field = out_item.get("video_audio_seg_lens")
+                if seg_lens_field is not None:
+                    va_seg_lens = seg_lens_field.data[:n_segs_val].tolist()
+
+            full: list[int] = [video_start_id]
+            is_embed_mask: list[bool] = [False]
+
+            if va_seg_lens is None:
+                # Regular video: timestamp + vision tokens per grid
+                for j in range(T):
+                    ts_text = _format_timestamp(start + j * spt)
+                    ts_ids = tokenizer.encode(ts_text, add_special_tokens=False)
+                    full.extend(ts_ids)
+                    is_embed_mask.extend([False] * len(ts_ids))
+                    full.append(vision_start_id)
+                    is_embed_mask.append(False)
+                    full.extend([video_pad_id] * n_per_grid)
+                    is_embed_mask.extend([True] * n_per_grid)
+                    full.append(vision_end_id)
+                    is_embed_mask.append(False)
+            else:
+                # video_audio: interleaved vision+audio per group
+                n_groups = len(va_seg_lens)
+                frames_per_group = T // n_groups  # 1 for il=0, T for il=-1
+                for g in range(n_groups):
+                    # Timestamp for first frame of this group
+                    frame0 = g * frames_per_group
+                    ts_text = _format_timestamp(start + frame0 * spt)
+                    ts_ids = tokenizer.encode(ts_text, add_special_tokens=False)
+                    full.extend(ts_ids)
+                    is_embed_mask.extend([False] * len(ts_ids))
+                    # Vision tokens for all frames in this group
+                    for f in range(frames_per_group):
+                        full.append(vision_start_id)
+                        is_embed_mask.append(False)
+                        full.extend([video_pad_id] * n_per_grid)
+                        is_embed_mask.extend([True] * n_per_grid)
+                        full.append(vision_end_id)
+                        is_embed_mask.append(False)
+                    # Audio tokens for this group
+                    seg_len = va_seg_lens[g]
+                    full.append(audio_start_id)
+                    is_embed_mask.append(False)
+                    full.extend([audio_pad_id] * seg_len)
+                    is_embed_mask.extend([True] * seg_len)
+                    full.append(audio_end_id)
+                    is_embed_mask.append(False)
+
+            full.append(video_end_id)
+            is_embed_mask.append(False)
+
+            embed_t = torch.tensor(is_embed_mask)
+            return PromptUpdateDetails(
+                full=full,
+                is_embed=lambda _tok, _seq: embed_t,
+            )
+
+        def get_audio_replacement(item_idx: int) -> PromptUpdateDetails:
+            out_item = out_mm_kwargs["audio"][item_idx]
+            tok_len = int(out_item["audio_token_lens"].data)
+            return [audio_pad_id] * tok_len
+
+        updates: list[PromptUpdate] = [
+            PromptReplacement(
+                modality="image",
+                target=[image_pad_id],
+                replacement=get_image_replacement,
+            ),
+            PromptReplacement(
+                modality="video",
+                target=[video_pad_id],
+                replacement=get_video_replacement,
+            ),
+        ]
+        if audio_pad_id is not None and audio_start_id is not None:
+            updates.append(
+                PromptReplacement(
+                    modality="audio",
+                    target=[audio_pad_id],
+                    replacement=get_audio_replacement,
+                )
+            )
+        return updates
+
+
+class MiMoV2OmniDummyInputsBuilder(BaseDummyInputsBuilder[MiMoV2OmniProcessingInfo]):
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+        num_videos = mm_counts.get("video", 0)
+        num_audios = mm_counts.get("audio", 0)
+        image_ph = "<|vision_start|><|image_pad|><|vision_end|>"
+        video_ph = "<|vision_start|><|video_pad|><|vision_end|>"
+        audio_ph = "<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>"
+        return image_ph * num_images + video_ph * num_videos + audio_ph * num_audios
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions],
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        num_videos = mm_counts.get("video", 0)
+
+        target_width, target_height = self.info.get_image_size_with_most_features()
+        target_num_frames = self.info.get_num_frames_with_most_features(
+            seq_len, mm_counts
+        )
+
+        return {
+            "image": self._get_dummy_images(
+                width=target_width,
+                height=target_height,
+                num_images=num_images,
+                overrides=mm_options.get("image"),
+            ),
+            "video": self._get_dummy_videos(
+                width=target_width,
+                height=target_height,
+                num_frames=target_num_frames,
+                num_videos=num_videos,
+                overrides=mm_options.get("video"),
+            ),
+        }
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    MiMoV2OmniMultiModalProcessor,
+    info=MiMoV2OmniProcessingInfo,
+    dummy_inputs=MiMoV2OmniDummyInputsBuilder,
+)
+class MiMoV2OmniForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant):
+    # To ensure correct weight loading and mapping.
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # audio encoder
+            "speech_embeddings.": "audio_encoder.speech_embeddings.",
+            # mapping for new names in checkpoint saved after transformers v4.52
+            "model.language_model.": "language_model.model.",
+            "model.visual.": "visual.",
+            # mapping for original checkpoint
+            "lm_head.": "language_model.lm_head.",
+            "model.": "language_model.model.",
+        }
+    )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return "<|vision_start|><|image_pad|><|vision_end|>"
+        if modality.startswith("video"):
+            return "<|vision_start|><|video_pad|><|vision_end|>"
+        if modality.startswith("audio"):
+            return "<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>"
+
+        raise ValueError(f"Unsupported modality: {modality}")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.config = config
+        # Omni ViT/Audio Encoder BF16
+        vision_config = (
+            Mimo_VLVisionConfig.from_dict(config.vision_config)
+            if isinstance(config.vision_config, dict)
+            else config.vision_config
+        )
+        with self._mark_tower_model(vllm_config, {"image", "video"}):
+            self.visual = MiMoVisionTransformer(
+                vision_config,
+                norm_eps=getattr(vllm_config, "rms_norm_eps", 1e-6),
+                quant_config=None,
+                prefix=maybe_prefix(prefix, "visual"),
+            )
+        audio_config = getattr(config, "audio_config", None)
+        model_path = vllm_config.model_config.model
+        if audio_config is not None:
+            with self._mark_tower_model(vllm_config, "audio"):
+                self.audio_encoder = MimoAudioEncoder(
+                    audio_config, model_path=model_path
+                )
+        else:
+            self.audio_encoder = None
+        with self._mark_language_model(vllm_config):
+            self.language_model = MiMoV2FlashForCausalLM(
+                vllm_config=vllm_config,
+                prefix=maybe_prefix(prefix, "language_model"),
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+    def _parse_and_validate_image_input(
+        self, **kwargs: object
+    ) -> Qwen2_5_VLImageInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        image_embeds = kwargs.pop("image_embeds", None)
+        image_grid_thw = kwargs.pop("image_grid_thw", None)
+
+        if pixel_values is None and image_embeds is None:
+            return None
+
+        if pixel_values is not None:
+            return Qwen2_5_VLImagePixelInputs(
+                type="pixel_values",
+                pixel_values=pixel_values,
+                image_grid_thw=image_grid_thw,
+            )
+
+        if image_embeds is not None:
+            return Qwen2_5_VLImageEmbeddingInputs(
+                type="image_embeds",
+                image_embeds=image_embeds,
+                image_grid_thw=image_grid_thw,
+            )
+
+    def _parse_and_validate_video_input(
+        self, **kwargs: object
+    ) -> Qwen2_5_VLVideoInputs | None:
+        pixel_values_videos = kwargs.pop("pixel_values_videos", None)
+        video_embeds = kwargs.pop("video_embeds", None)
+        video_grid_thw = kwargs.pop("video_grid_thw", None)
+        second_per_grid_ts = kwargs.pop("second_per_grid_ts", None)
+
+        if pixel_values_videos is None and video_embeds is None:
+            return None
+
+        if pixel_values_videos is not None:
+            return Qwen2_5_VLVideoPixelInputs(
+                type="pixel_values_videos",
+                pixel_values_videos=pixel_values_videos,
+                video_grid_thw=video_grid_thw,
+                second_per_grid_ts=second_per_grid_ts,
+            )
+
+        if video_embeds is not None:
+            return Qwen2_5_VLVideoEmbeddingInputs(
+                type="video_embeds",
+                video_embeds=video_embeds,
+                video_grid_thw=video_grid_thw,
+                second_per_grid_ts=second_per_grid_ts,
+            )
+
+    def _process_image_input(
+        self, image_input: Qwen2_5_VLImageInputs
+    ) -> tuple[torch.Tensor, ...]:
+        grid_thw = image_input["image_grid_thw"]
+        assert grid_thw.ndim == 2
+        grid_thw_list = grid_thw.tolist()
+
+        if image_input["type"] == "image_embeds":
+            image_embeds = image_input["image_embeds"].type(self.visual.dtype)
+        else:
+            pixel_values = image_input["pixel_values"]
+            image_embeds = self.visual(pixel_values, grid_thw=grid_thw_list)
+
+        # Split concatenated embeddings for each image item.
+        merge_size = self.visual.spatial_merge_size
+        sizes = (grid_thw.prod(-1) // merge_size // merge_size).tolist()
+        return image_embeds.split(sizes)
+
+    def _process_video_input(
+        self, video_input: Qwen2_5_VLVideoInputs
+    ) -> tuple[torch.Tensor, ...]:
+        grid_thw = video_input["video_grid_thw"]
+        assert grid_thw.ndim == 2
+        grid_thw_list = grid_thw.tolist()
+
+        if video_input["type"] == "video_embeds":
+            video_embeds = video_input["video_embeds"].type(self.visual.dtype)
+        else:
+            pixel_values_videos = video_input["pixel_values_videos"]
+            video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw_list)
+
+        # Split concatenated embeddings for each video item.
+        merge_size = self.visual.spatial_merge_size
+        sizes = (grid_thw.prod(-1) // merge_size // merge_size).tolist()
+        return video_embeds.split(sizes)
+
+    def _parse_and_validate_audio_input(self, **kwargs: object) -> dict | None:
+        audio_features = kwargs.pop("audio_features", None)
+        audio_token_lens = kwargs.pop("audio_token_lens", None)
+        if audio_features is None:
+            return None
+        return {
+            "type": "audio",
+            "audio_features": audio_features,
+            "audio_token_lens": audio_token_lens,
+        }
+
+    def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict:
+        mm_input_by_modality = {}
+
+        # Preserve the order of modalities if there are multiple of them
+        # from the order of kwargs.
+        for input_key in kwargs:
+            if (
+                input_key in ("pixel_values", "image_embeds")
+                and "image" not in mm_input_by_modality
+            ):
+                mm_input_by_modality["image"] = self._parse_and_validate_image_input(
+                    **kwargs
+                )
+            if (
+                input_key in ("pixel_values_videos", "video_embeds")
+                and "video" not in mm_input_by_modality
+            ):
+                mm_input_by_modality["video"] = self._parse_and_validate_video_input(
+                    **kwargs
+                )
+            if input_key == "audio_features" and "audio" not in mm_input_by_modality:
+                mm_input_by_modality["audio"] = self._parse_and_validate_audio_input(
+                    **kwargs
+                )
+        return mm_input_by_modality
+
+    def _process_audio_input(self, audio_input: dict) -> tuple[torch.Tensor, ...]:
+        mel_specs = audio_input["audio_features"]
+        if self.audio_encoder is None:
+            return ()
+        # Normalize to List[2D-Tensor].
+        # MultiModalBatchedField._reduce_data either wraps a single [T, 128]
+        # into [1, T, 128] via unsqueeze(0) or stacks N same-T items into
+        # [N, T, 128]. Indexing along dim-0 extracts the per-item [T, 128].
+        if isinstance(mel_specs, torch.Tensor):
+            mel_specs = list(mel_specs)  # [1,T,128] or [N,T,128] → [[T,128],...]
+        if not mel_specs:
+            return ()
+        audio_embeds, item_token_lens = self.audio_encoder.get_audio_feature(mel_specs)
+        return tuple(audio_embeds.split(item_token_lens))
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        # Pop video_audio-specific fields before main mm parsing
+        video_audio_n_segs = kwargs.pop("video_audio_n_segs", None)
+        video_audio_seg_lens = kwargs.pop("video_audio_seg_lens", None)
+        va_audio_features = kwargs.pop("va_audio_features", None)
+
+        mm_input_by_modality = self._parse_and_validate_multimodal_inputs(**kwargs)
+        if not mm_input_by_modality and va_audio_features is None:
+            return []
+
+        # The result multimodal_embeddings is tuple of tensors, with each
+        # tensor corresponding to a multimodal data item (image, video, or audio).
+        multimodal_embeddings: list[torch.Tensor] = []
+
+        # Pre-process va audio: one mel spec per va video → per-video audio embeddings
+        # keyed by va video index (0-based among va videos only)
+        va_audio_embs_list: list[tuple[torch.Tensor, ...]] = []
+        if va_audio_features is not None and self.audio_encoder is not None:
+            mel_list = (
+                list(va_audio_features)
+                if isinstance(va_audio_features, torch.Tensor)
+                else list(va_audio_features)
+            )
+            for mel_spec in mel_list:
+                embs, tok_lens = self.audio_encoder.get_audio_feature([mel_spec])
+                # tok_lens is a list/tensor with one entry (total tokens for this mel)
+                va_audio_embs_list.append(embs)  # shape (total_tok, hidden)
+
+        va_cursor = 0  # index into va_audio_embs_list
+
+        # NOTE: Iterate in dict insertion order to preserve token sequence order.
+        for modality in mm_input_by_modality:
+            multimodal_input = mm_input_by_modality[modality]
+            if modality == "image":
+                multimodal_embeddings.extend(
+                    self._process_image_input(multimodal_input)
+                )
+            elif modality == "video":
+                video_embs_tuple = self._process_video_input(multimodal_input)
+                if video_audio_n_segs is None:
+                    multimodal_embeddings.extend(video_embs_tuple)
+                else:
+                    grid_thw = multimodal_input["video_grid_thw"]
+                    for i, vid_embs in enumerate(video_embs_tuple):
+                        n_segs = int(video_audio_n_segs[i])
+                        if n_segs == 0 or not va_audio_embs_list:
+                            multimodal_embeddings.append(vid_embs)
+                        else:
+                            T = int(grid_thw[i][0])
+                            n_per_grid = vid_embs.shape[0] // T
+                            frames = list(vid_embs.split(n_per_grid, dim=0))
+                            frames_per_group = T // n_segs
+                            # Per-group audio token lengths for this va video
+                            # video_audio_seg_lens is (num_videos, max_T); row i
+                            # has valid values in [:n_segs], rest are zeros.
+                            seg_lens = video_audio_seg_lens[i][:n_segs].tolist()
+                            # Split full audio embs for this va video by group lengths
+                            full_va_embs = va_audio_embs_list[va_cursor]
+                            va_cursor += 1
+                            group_audio_embs = full_va_embs.split(seg_lens)
+                            # Interleave: all vid frames in group, then audio for group
+                            for g in range(n_segs):
+                                for f in range(frames_per_group):
+                                    multimodal_embeddings.append(
+                                        frames[g * frames_per_group + f]
+                                    )
+                                multimodal_embeddings.append(group_audio_embs[g])
+            elif modality == "audio":
+                multimodal_embeddings.extend(
+                    self._process_audio_input(multimodal_input)
+                )
+        return tuple(multimodal_embeddings)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> torch.Tensor | IntermediateTensors:
+        """Run forward pass for Qwen2.5-VL.
+
+        Args:
+            input_ids: Flattened (concatenated) input_ids corresponding to a
+                batch.
+            positions: Flattened (concatenated) position ids corresponding to a
+                batch. **NOTE**: If mrope is enabled (default setting for
+                Qwen2.5-VL opensource models), the shape will be `(3, seq_len)`,
+                otherwise it will be `(seq_len,).
+        """
+
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        hidden_states = self.language_model.model(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        audio_loaded: set[str] = set()
+
+        loader = AutoWeightsLoader(self, skip_prefixes=["audio_tokenizer."])
+        auto_loaded = loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+        return audio_loaded | auto_loaded
diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py
index 79162eef3f64..3497dbc2e3b9 100644
--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -546,8 +546,9 @@ def get_hf_processor(self, **kwargs: object):
         # NumPy arrays are considered as Iterable but not Sequence in
         # https://github.com/huggingface/transformers/blob/main/src/transformers/image_transforms.py#L428
         image_processor = hf_processor.image_processor  # type: ignore
-        for attr in ("mean", "std"):
-            val = getattr(image_processor, attr)
+        # transformers v5+ renamed `mean`/`std` -> `image_mean`/`image_std`
+        for attr in ("mean", "std", "image_mean", "image_std"):
+            val = getattr(image_processor, attr, None)
             if isinstance(val, np.ndarray):
                 setattr(image_processor, attr, val.tolist())
 
@@ -585,6 +586,50 @@ def get_slice_image_placeholder(
         if version == (2, 0) or version == (2, 5):
             return image_processor.get_slice_image_placeholder(image_size)
 
+        if version == (4, 6):
+            if max_slice_nums is None:
+                max_slice_nums = image_processor.max_slice_nums
+            grids = image_processor.get_sliced_grid(
+                image_size,
+                max_slice_nums=max_slice_nums,
+            )
+            patch_size = image_processor.patch_size
+            scale_resolution = image_processor.scale_resolution
+
+            allow_upscale = grids is None
+            best_size = image_processor.find_best_resize(
+                image_size,
+                scale_resolution,
+                patch_size,
+                allow_upscale=allow_upscale,
+            )
+            h_patches = best_size[1] // patch_size
+            w_patches = best_size[0] // patch_size
+            source_image_visual_tokens = (h_patches // 4) * (w_patches // 4)
+
+            if grids is not None:
+                refine_size = image_processor.get_refine_size(
+                    image_size,
+                    grids,
+                    scale_resolution,
+                    patch_size,
+                    allow_upscale=True,
+                )
+                pw = refine_size[0] // grids[0]
+                ph = refine_size[1] // grids[1]
+                patch_visual_tokens = (ph // patch_size // 4) * (pw // patch_size // 4)
+            else:
+                patch_visual_tokens = source_image_visual_tokens
+
+            return image_processor.get_slice_image_placeholder(
+                grids if grids is not None else [0, 0],
+                image_idx=image_idx,
+                max_slice_nums=max_slice_nums,
+                use_image_id=use_image_id,
+                source_image_visual_tokens=source_image_visual_tokens,
+                patch_visual_tokens=patch_visual_tokens,
+            )
+
         return image_processor.get_slice_image_placeholder(
             image_size,
             image_idx=image_idx,
@@ -618,11 +663,44 @@ def get_num_image_tokens(
         max_slice_nums: int | None = None,
     ) -> int:
         image_processor = self.get_image_processor()
+        version = self.get_model_version()
 
         grid = self.get_sliced_grid(
             image_size,
             max_slice_nums=max_slice_nums,
         )
+
+        if version == (4, 6):
+            patch_size = image_processor.patch_size
+            scale_resolution = image_processor.scale_resolution
+
+            allow_upscale = grid is None
+            best_size = image_processor.find_best_resize(
+                image_size,
+                scale_resolution,
+                patch_size,
+                allow_upscale=allow_upscale,
+            )
+            h_p = best_size[1] // patch_size
+            w_p = best_size[0] // patch_size
+            source_tokens = (h_p // 4) * (w_p // 4)
+
+            if grid is None:
+                return source_tokens
+
+            refine_size = image_processor.get_refine_size(
+                image_size,
+                grid,
+                scale_resolution,
+                patch_size,
+                allow_upscale=True,
+            )
+            pw = refine_size[0] // grid[0]
+            ph = refine_size[1] // grid[1]
+            patch_tokens = (ph // patch_size // 4) * (pw // patch_size // 4)
+            ncols, nrows = grid
+            return source_tokens + ncols * nrows * patch_tokens
+
         if grid is None:
             ncols = nrows = 0
         else:
@@ -839,7 +917,7 @@ def _base_call_hf_processor(
         out_keys: set[str],
     ) -> dict[str, NestedTensors]:
         # This processor supports zipping prompt and mm_data together
-        if self.info.get_model_version() in {(2, 6), (4, 0), (4, 5)}:
+        if self.info.get_model_version() in {(2, 6), (4, 0), (4, 5), (4, 6)}:
             inputs = super()._call_hf_processor(
                 prompt=prompts,  # type: ignore
                 mm_data=mm_data,
@@ -971,10 +1049,15 @@ def _recompute_cached_prompt_update(
             if version == (2, 0) or version == (2, 5):
                 im_start = image_processor.im_start_token
                 im_end = image_processor.im_end_token
-            else:
+            elif hasattr(image_processor, "im_id_start"):
                 im_start = image_processor.im_id_start
                 im_end = image_processor.im_id_end
+            else:
+                # transformers v5.7+ keeps im_id tokens on the tokenizer.
+                im_start = getattr(tokenizer, "image_id_start_token", "<image_id>")
+                im_end = getattr(tokenizer, "image_id_end_token", "</image_id>")
 
+            embed_text = getattr(tokenizer, "image_token", "<unk>")
             new_update = new_update.with_content(
                 PromptUpdateDetails.select_text(
                     text.replace(
@@ -982,7 +1065,7 @@ def _recompute_cached_prompt_update(
                         f"{im_start}{new_item_idx}{im_end}",
                         1,
                     ),
-                    "<unk>",
+                    embed_text,
                 )
             )
 
@@ -1050,9 +1133,17 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 quant_config=quant_config,
                 prefix=maybe_prefix(prefix, "resampler"),
             )
+            self._resampler_moved = False
 
         self.make_empty_intermediate_tensors = self.llm.make_empty_intermediate_tensors
 
+    def _ensure_resampler_device(self) -> None:
+        if self._resampler_moved:
+            return
+        # Only move device, DO NOT touch dtype (fp8 quant needs its own dtype)
+        self.resampler.to(current_platform.device_type)
+        self._resampler_moved = True
+
     def _parse_and_validate_vision_input(
         self,
         modality: str,
@@ -1171,7 +1262,9 @@ def compute_logits(
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self)
-        return loader.load_weights(weights)
+        loaded = loader.load_weights(weights)
+        self._ensure_resampler_device()
+        return loaded
 
     def get_mm_mapping(self) -> MultiModelKeys:
         """
@@ -1276,9 +1369,7 @@ def init_resampler(
                 prefix=prefix,
             )
 
-        return resampler.to(
-            device=current_platform.device_type, dtype=torch.get_default_dtype()
-        )
+        return resampler.to(dtype=torch.get_default_dtype())
 
     def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
         pixel_values = data["pixel_values"]
@@ -1359,9 +1450,7 @@ def init_resampler(
                 prefix=prefix,
             )
 
-        return resampler.to(
-            device=current_platform.device_type, dtype=torch.get_default_dtype()
-        )
+        return resampler.to(dtype=torch.get_default_dtype())
 
     def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
         pixel_values = data["pixel_values"]
@@ -1452,11 +1541,8 @@ def init_resampler(
                 quant_config=quant_config,
                 prefix=prefix,
             )
-        target_device = current_platform.device_type
-        target_dtype = torch.get_default_dtype()
-        if any(p.is_meta for p in resampler.parameters()):
-            return resampler.to_empty(device=target_device).to(dtype=target_dtype)
-        return resampler.to(device=target_device, dtype=target_dtype)
+
+        return resampler.to(dtype=torch.get_default_dtype())
 
     def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
         pixel_values = data["pixel_values"]
@@ -1491,7 +1577,9 @@ def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tens
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self, skip_prefixes=["apm.", "audio", "tts"])
-        return loader.load_weights(weights)
+        loaded = loader.load_weights(weights)
+        self._ensure_resampler_device()
+        return loaded
 
 
 class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
@@ -1551,10 +1639,7 @@ def init_resampler(
                 quant_config=quant_config,
                 prefix=prefix,
             )
-
-        return resampler.to(
-            device=current_platform.device_type, dtype=torch.get_default_dtype()
-        )
+        return resampler.to(dtype=torch.get_default_dtype())
 
     def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
         pixel_values = data["pixel_values"]
@@ -1589,7 +1674,9 @@ def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tens
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self, skip_prefixes=["apm.", "audio", "tts"])
-        return loader.load_weights(weights)
+        loaded = loader.load_weights(weights)
+        self._ensure_resampler_device()
+        return loaded
 
 
 class MiniCPMV4_5(MiniCPMVBaseModel, SupportsLoRA):
@@ -1649,11 +1736,8 @@ def init_resampler(
                 quant_config=quant_config,
                 prefix=prefix,
             )
-        target_device = current_platform.device_type
-        target_dtype = torch.get_default_dtype()
-        if any(p.is_meta for p in resampler.parameters()):
-            return resampler.to_empty(device=target_device).to(dtype=target_dtype)
-        return resampler.to(device=target_device, dtype=target_dtype)
+
+        return resampler.to(dtype=torch.get_default_dtype())
 
     def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
         pixel_values = data["pixel_values"]
@@ -1692,7 +1776,9 @@ def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tens
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self, skip_prefixes=["apm.", "audio", "tts"])
-        return loader.load_weights(weights)
+        loaded = loader.load_weights(weights)
+        self._ensure_resampler_device()
+        return loaded
 
 
 _SUPPORT_VERSION = {
diff --git a/vllm/model_executor/models/minicpmv4_6.py b/vllm/model_executor/models/minicpmv4_6.py
new file mode 100644
index 000000000000..d2d465b7e5a4
--- /dev/null
+++ b/vllm/model_executor/models/minicpmv4_6.py
@@ -0,0 +1,1233 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only MiniCPM-V 4.6 model (MiniCPMV4_6ForConditionalGeneration)."""
+
+from collections.abc import Iterable, Mapping
+from typing import Any
+
+import torch
+from torch import nn
+from transformers import MiniCPMV4_6Config
+
+from vllm.config import VllmConfig
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.attention import MMEncoderAttention
+from vllm.model_executor.layers.linear import (
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.mamba.mamba_utils import (
+    MambaStateCopyFuncCalculator,
+    MambaStateDtypeCalculator,
+    MambaStateShapeCalculator,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFeatureSpec,
+    MultiModalFieldConfig,
+    NestedTensors,
+)
+from vllm.multimodal.parse import ImageProcessorItems, VideoProcessorItems
+from vllm.multimodal.processing.processor import (
+    PromptReplacement,
+    PromptUpdateDetails,
+)
+from vllm.sequence import IntermediateTensors
+
+from .idefics2_vision_model import Idefics2VisionTransformer
+from .interfaces import (
+    HasInnerState,
+    IsHybrid,
+    MultiModalEmbeddings,
+    SupportsMRoPE,
+    SupportsMultiModal,
+    SupportsPP,
+    _require_is_multimodal,
+)
+from .minicpmv import (
+    MiniCPMVDummyInputsBuilder,
+    MiniCPMVImageEmbeddingInputs,
+    MiniCPMVImageEmbeddingItems,
+    MiniCPMVImagePixelInputs,
+    MiniCPMVMultiModalProcessor,
+    MiniCPMVProcessingInfo,
+    MiniCPMVVideoEmbeddingItems,
+)
+from .module_mapping import MultiModelKeys
+from .qwen3_5 import Qwen3_5ForCausalLM
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    _merge_multimodal_embeddings,
+    flatten_bn,
+    maybe_prefix,
+)
+from .vision import is_vit_use_data_parallel
+
+
+def _minicpmv4_6_field_config(hf_inputs: Mapping[str, torch.Tensor]):
+    fields = dict(
+        pixel_values=MultiModalFieldConfig.batched("image"),
+        tgt_sizes=MultiModalFieldConfig.batched("image"),
+        image_embeds=MultiModalFieldConfig.batched("image"),
+        video_pixel_values=MultiModalFieldConfig.batched("video"),
+        video_image_sizes=MultiModalFieldConfig.batched("video"),
+        video_tgt_sizes=MultiModalFieldConfig.batched("video"),
+        video_embeds=MultiModalFieldConfig.batched("video"),
+    )
+    if "use_vit_merger" in hf_inputs:
+        fields["use_vit_merger"] = MultiModalFieldConfig.batched("image")
+    return fields
+
+
+class MiniCPMV4_6MultiModalProcessor(MiniCPMVMultiModalProcessor):
+    def _resolve_downsample_mode(
+        self,
+        mm_kwargs: Mapping[str, object],
+    ) -> str:
+        ds = mm_kwargs.get("downsample_mode")
+        if ds is not None:
+            return str(ds)
+        return self.info._get_downsample_mode()
+
+    def get_image_prompt_texts(
+        self,
+        image_size,
+        image_idx: int = 0,
+        downsample_mode: str | None = None,
+    ) -> str:
+        return self.info.get_slice_image_placeholder(
+            image_size,
+            image_idx=image_idx,
+            downsample_mode=downsample_mode,
+        )
+
+    def get_video_prompt_texts(
+        self,
+        image_size,
+        num_frames: int,
+        downsample_mode: str | None = None,
+        video_idx: int = 0,
+    ) -> str:
+        # Match transformers v5.7+ MiniCPMV4_6Processor video formatting:
+        #   <image_id>{video_idx}</image_id>(<image>VIDEO*src</image>
+        #     <slice>VIDEO*patch</slice>...)*num_frames
+        # Crucially the visual token inside each frame is ``<|video_pad|>``
+        # (tokenizer.video_token), NOT ``<|image_pad|>`` — they share the same
+        # embedding-injection role but the language model is conditioned on
+        # which one is used. Using image_token for video silently produces
+        # garbage descriptions.
+        info = self.info
+        grids, source_tokens, patch_tokens = info._compute_visual_tokens(
+            image_size,
+            max_slice_nums=info.get_video_max_slice_num(),
+            downsample_mode=downsample_mode,
+        )
+        tokenizer = info.get_tokenizer()
+        video_token = getattr(tokenizer, "video_token", "<|video_pad|>")
+        image_start = getattr(tokenizer, "image_start_token", "<image>")
+        image_end = getattr(tokenizer, "image_end_token", "</image>")
+        slice_start = getattr(tokenizer, "slice_start_token", "<slice>")
+        slice_end = getattr(tokenizer, "slice_end_token", "</slice>")
+        id_start = getattr(tokenizer, "image_id_start_token", "<image_id>")
+        id_end = getattr(tokenizer, "image_id_end_token", "</image_id>")
+
+        per_frame = image_start + video_token * source_tokens + image_end
+        if grids[0] > 0 and grids[1] > 0 and patch_tokens > 0:
+            slice_ph = slice_start + video_token * patch_tokens + slice_end
+            rows = [slice_ph * grids[0] for _ in range(grids[1])]
+            per_frame += "\n".join(rows)
+
+        body = per_frame * num_frames
+        return f"{id_start}{video_idx}{id_end}" + body
+
+    def process_images(
+        self,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> Mapping[str, NestedTensors]:
+        if (images := mm_data.get("images")) is None:
+            return {}
+
+        mm_items = self.info.parse_mm_data({"image": images}, validate=False)
+        parsed_images = mm_items.get_items(
+            "image", (MiniCPMVImageEmbeddingItems, ImageProcessorItems)
+        )
+
+        if isinstance(parsed_images, MiniCPMVImageEmbeddingItems):
+            return {}
+
+        # transformers v5.7+ MiniCPMV4_6ImageProcessor returns
+        # `pixel_values` (1, C, P, sum_W) where all slices are fused along W
+        # (NaViT-style), and `target_sizes` (n_slices, 2). vLLM expects each
+        # image entry to be a 4D tensor (n_slices, C, P, L_max_padded).
+        n_images = len(parsed_images)
+        image_processor = self.info.get_image_processor()
+        patch_size = image_processor.patch_size
+        per_image_pixel_values: list[torch.Tensor] = []
+        per_image_tgt_sizes: list[torch.Tensor] = []
+        for image in parsed_images:
+            ip_out = image_processor([image], **mm_kwargs)
+            pv = ip_out["pixel_values"]  # (1, C, P, sum_W)
+            ts = ip_out["target_sizes"]  # (n_slices, 2)
+            if pv.ndim == 4 and pv.shape[0] == 1:
+                pv = pv.squeeze(0)  # (C, P, sum_W)
+            ts_long = ts.to(torch.long)
+            split_widths = (ts_long[:, 0] * ts_long[:, 1] * patch_size).tolist()
+            slices = torch.split(pv, split_widths, dim=-1)
+            n_slices = len(slices)
+            l_max = max(s.shape[-1] for s in slices)
+            out = torch.zeros(
+                n_slices,
+                pv.shape[0],
+                pv.shape[1],
+                l_max,
+                dtype=pv.dtype,
+                device=pv.device,
+            )
+            for i, s in enumerate(slices):
+                out[i, :, :, : s.shape[-1]] = s
+            per_image_pixel_values.append(out)
+            per_image_tgt_sizes.append(ts_long)
+
+        image_inputs: dict = {
+            "pixel_values": per_image_pixel_values,
+            "tgt_sizes": per_image_tgt_sizes,
+        }
+
+        ds_mode = self._resolve_downsample_mode(mm_kwargs)
+        insert_layer_id = getattr(
+            self.info.get_hf_config(),
+            "insert_layer_id",
+            -1,
+        )
+        merger_flag = ds_mode != "4x" and insert_layer_id >= 0
+        image_inputs["use_vit_merger"] = [
+            torch.tensor([merger_flag], dtype=torch.bool) for _ in range(n_images)
+        ]
+        return image_inputs
+
+    def process_videos(
+        self,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> Mapping[str, NestedTensors]:
+        if (videos := mm_data.get("videos")) is None:
+            return {}
+
+        mm_items = self.info.parse_mm_data({"video": videos}, validate=False)
+        parsed_videos = mm_items.get_items(
+            "video", (MiniCPMVVideoEmbeddingItems, VideoProcessorItems)
+        )
+
+        if isinstance(parsed_videos, MiniCPMVVideoEmbeddingItems):
+            return {}
+
+        # Treat each video as a sequence of frames. The transformers v5.7+
+        # `MiniCPMV4_6ImageProcessor` returns NaViT-style fused `pixel_values`;
+        # we run it per-frame, split the slices, then re-pack each video into
+        # a single 4D tensor (sum_slices, C, P, L_max_video).
+        image_processor = self.info.get_image_processor()
+        patch_size = image_processor.patch_size
+        video_max_slice = self.info.get_video_max_slice_num()
+        video_mm_kwargs = {**mm_kwargs, "max_slice_nums": video_max_slice}
+
+        per_video_pixel_values: list[torch.Tensor] = []
+        per_video_tgt_sizes: list[torch.Tensor] = []
+
+        for video in parsed_videos:
+            # video is iterable of frames (PIL Image or numpy array).
+            all_slices: list[torch.Tensor] = []
+            ts_list: list[torch.Tensor] = []
+            for frame in video:
+                ip_out = image_processor([frame], **video_mm_kwargs)
+                pv = ip_out["pixel_values"]  # (1, C, P, sum_W)
+                ts = ip_out["target_sizes"]  # (n_slices, 2)
+                if pv.ndim == 4 and pv.shape[0] == 1:
+                    pv = pv.squeeze(0)  # (C, P, sum_W)
+                ts_long = ts.to(torch.long)
+                split_widths = (ts_long[:, 0] * ts_long[:, 1] * patch_size).tolist()
+                slices = torch.split(pv, split_widths, dim=-1)
+                all_slices.extend(slices)
+                ts_list.append(ts_long)
+
+            if not all_slices:
+                continue
+
+            l_max = max(s.shape[-1] for s in all_slices)
+            n_total = len(all_slices)
+            C, P = all_slices[0].shape[0], all_slices[0].shape[1]
+            out = torch.zeros(
+                n_total,
+                C,
+                P,
+                l_max,
+                dtype=all_slices[0].dtype,
+                device=all_slices[0].device,
+            )
+            for i, s in enumerate(all_slices):
+                out[i, :, :, : s.shape[-1]] = s
+
+            per_video_pixel_values.append(out)
+            per_video_tgt_sizes.append(torch.cat(ts_list, dim=0))
+
+        if not per_video_pixel_values:
+            return {}
+
+        return {
+            "video_pixel_values": per_video_pixel_values,
+            "video_tgt_sizes": per_video_tgt_sizes,
+        }
+
+    def _get_prompt_updates(
+        self,
+        mm_items,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs,
+    ):
+        ds_mode = self._resolve_downsample_mode(hf_processor_mm_kwargs)
+
+        placeholders = [
+            ("image", self.info.image_pattern),
+            ("video", self.info.video_pattern),
+        ]
+        tokenizer = self.info.get_tokenizer()
+        additional_placeholders = []
+        for modality, pattern in placeholders:
+            sub_pattern = tokenizer.decode(
+                tokenizer.encode(pattern, add_special_tokens=False)
+            )
+            if sub_pattern != pattern:
+                additional_placeholders.append((modality, sub_pattern))
+        placeholders += additional_placeholders
+
+        # The 4.6 chat_template emits `<|image_pad|>` / `<|video_pad|>` rather
+        # than `<unk>`, so use those tokens as the embedding selector.
+        image_embed_text = getattr(tokenizer, "image_token", "<|image_pad|>")
+        video_embed_text = getattr(tokenizer, "video_token", "<|video_pad|>")
+
+        def get_image_replacement(item_idx: int):
+            images = mm_items.get_items(
+                "image",
+                (MiniCPMVImageEmbeddingItems, ImageProcessorItems),
+            )
+            image_size = images.get_image_size(item_idx)
+            return PromptUpdateDetails.select_text(
+                self.get_image_prompt_texts(
+                    image_size,
+                    item_idx,
+                    downsample_mode=ds_mode,
+                ),
+                image_embed_text,
+            )
+
+        def get_video_replacement(item_idx: int):
+            videos = mm_items.get_items(
+                "video",
+                (MiniCPMVVideoEmbeddingItems, VideoProcessorItems),
+            )
+            frame_size = videos.get_frame_size(item_idx)
+            num_frames = videos.get_num_frames(item_idx)
+            return PromptUpdateDetails.select_text(
+                self.get_video_prompt_texts(
+                    frame_size,
+                    num_frames,
+                    downsample_mode=ds_mode,
+                    video_idx=item_idx,
+                ),
+                video_embed_text,
+            )
+
+        get_replacement = {
+            "image": get_image_replacement,
+            "video": get_video_replacement,
+        }
+
+        return [
+            PromptReplacement(
+                modality=modality,
+                target=pattern,
+                replacement=get_replacement[modality],
+            )
+            for modality, pattern in placeholders
+        ]
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return _minicpmv4_6_field_config(hf_inputs)
+
+
+class MiniCPMV4_6ProcessingInfo(MiniCPMVProcessingInfo):
+    # transformers v5.7+ chat_template emits these as image/video placeholders.
+    image_pattern = "<|image_pad|>"
+    video_pattern = "<|video_pad|>"
+
+    def get_hf_config(self):
+        return self.ctx.get_hf_config()
+
+    def _get_expected_hidden_size(self) -> int:
+        config = self.get_hf_config()
+        if hasattr(config, "text_config") and config.text_config is not None:
+            return config.text_config.hidden_size
+        return config.hidden_size
+
+    def get_model_version(self):
+        return (4, 6)
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"image": None, "video": None}
+
+    def get_image_max_slice_num(self) -> int:
+        config = self.get_hf_config()
+        if hasattr(config, "slice_config") and config.slice_config is not None:
+            return getattr(config.slice_config, "max_slice_nums", 9)
+        return getattr(config, "max_slice_nums", 9)
+
+    def get_video_max_slice_num(self) -> int:
+        # Override the base class default of 1: transformers v5.7+
+        # `MiniCPMV4_6VideoProcessor` keeps the same max_slice_nums (default 9)
+        # as the image processor so that high-res frames get sliced.
+        try:
+            hf_processor = self.get_hf_processor()
+            video_processor = getattr(hf_processor, "video_processor", None)
+            if video_processor is not None:
+                return int(getattr(video_processor, "max_slice_nums", 9))
+        except Exception:
+            pass
+        return self.get_image_max_slice_num()
+
+    def _get_downsample_mode(
+        self,
+        downsample_mode: str | None = None,
+    ) -> str:
+        if downsample_mode is not None:
+            return downsample_mode
+        image_processor = self.get_image_processor()
+        return getattr(image_processor, "downsample_mode", "16x")
+
+    def _compute_visual_tokens(
+        self,
+        image_size,
+        max_slice_nums: int | None = None,
+        downsample_mode: str | None = None,
+    ) -> tuple[list[int], int, int]:
+        """Compute grid, source_image_visual_tokens and patch_visual_tokens.
+
+        Args:
+            downsample_mode: ``"16x"`` (default, full merge) or ``"4x"``
+                (skip vit_merger, 4x more visual tokens).
+
+        Returns:
+            (grids, source_image_visual_tokens, patch_visual_tokens)
+            grids is [0, 0] when no slicing occurs.
+        """
+        image_processor = self.get_image_processor()
+        if max_slice_nums is None:
+            max_slice_nums = image_processor.max_slice_nums
+
+        patch_size = image_processor.patch_size
+        scale_res = image_processor.scale_resolution
+        downsample_mode = self._get_downsample_mode(downsample_mode)
+        token_divisor = 4 if downsample_mode == "4x" else 16
+
+        # transformers v5.7+ requires `scale_resolution` arg
+        try:
+            grids = image_processor.get_sliced_grid(
+                image_size,
+                max_slice_nums,
+                scale_res,
+            )
+        except TypeError:
+            grids = image_processor.get_sliced_grid(
+                image_size,
+                max_slice_nums,
+            )
+
+        if grids is None:
+            best_size = image_processor.find_best_resize(
+                image_size,
+                scale_res,
+                patch_size,
+                allow_upscale=True,
+            )
+            source_tokens = (
+                best_size[0] * best_size[1] // (patch_size * patch_size * token_divisor)
+            )
+            return [0, 0], source_tokens, 0
+
+        best_resize = image_processor.find_best_resize(
+            image_size,
+            scale_res,
+            patch_size,
+        )
+        source_tokens = (
+            best_resize[0] * best_resize[1] // (patch_size * patch_size * token_divisor)
+        )
+        refine_size = image_processor.get_refine_size(
+            image_size,
+            grids,
+            scale_res,
+            patch_size,
+            allow_upscale=True,
+        )
+        patch_w = refine_size[0] // grids[0]
+        patch_h = refine_size[1] // grids[1]
+        patch_tokens = patch_w * patch_h // (patch_size * patch_size * token_divisor)
+        return grids, source_tokens, patch_tokens
+
+    def get_slice_image_placeholder(
+        self,
+        image_size,
+        image_idx: int = 0,
+        max_slice_nums: int | None = None,
+        use_image_id: bool = True,
+        downsample_mode: str | None = None,
+    ) -> str:
+        grids, source_tokens, patch_tokens = self._compute_visual_tokens(
+            image_size,
+            max_slice_nums,
+            downsample_mode=downsample_mode,
+        )
+        image_processor = self.get_image_processor()
+        # transformers v5.7+ removed `get_slice_image_placeholder` from the
+        # image_processor and moved the logic into MiniCPMV4_6Processor.
+        # Replicate it here using tokenizer special tokens.
+        if hasattr(image_processor, "get_slice_image_placeholder"):
+            return image_processor.get_slice_image_placeholder(
+                grids,
+                image_idx=image_idx,
+                max_slice_nums=max_slice_nums,
+                use_image_id=use_image_id,
+                source_image_visual_tokens=source_tokens,
+                patch_visual_tokens=patch_tokens,
+            )
+        tokenizer = self.get_tokenizer()
+        image_token = getattr(tokenizer, "image_token", "<|image_pad|>")
+        image_start = getattr(tokenizer, "image_start_token", "<image>")
+        image_end = getattr(tokenizer, "image_end_token", "</image>")
+        slice_start = getattr(tokenizer, "slice_start_token", "<slice>")
+        slice_end = getattr(tokenizer, "slice_end_token", "</slice>")
+        id_start = getattr(tokenizer, "image_id_start_token", "<image_id>")
+        id_end = getattr(tokenizer, "image_id_end_token", "</image_id>")
+
+        placeholder = image_start + image_token * source_tokens + image_end
+        if use_image_id:
+            placeholder = f"{id_start}{image_idx}{id_end}" + placeholder
+
+        num_cols, num_rows = grids[0], grids[1]
+        if num_cols > 0 and num_rows > 0 and patch_tokens > 0:
+            slice_ph = slice_start + image_token * patch_tokens + slice_end
+            slices = [slice_ph * num_cols for _ in range(num_rows)]
+            placeholder += "\n".join(slices)
+        return placeholder
+
+    def get_num_image_tokens(
+        self,
+        image_size,
+        max_slice_nums: int | None = None,
+        downsample_mode: str | None = None,
+    ) -> int:
+        grids, source_tokens, patch_tokens = self._compute_visual_tokens(
+            image_size,
+            max_slice_nums,
+            downsample_mode=downsample_mode,
+        )
+        return source_tokens + grids[0] * grids[1] * patch_tokens
+
+
+class MiniCPMV4_6ViTWindowAttentionSelfAttn(nn.Module):
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        use_data_parallel = is_vit_use_data_parallel()
+        self.embed_dim = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = self.embed_dim // self.num_heads
+        self.scale = self.head_dim**-0.5
+
+        tp_size = 1 if use_data_parallel else get_tensor_model_parallel_world_size()
+        assert self.num_heads % tp_size == 0
+        self.num_heads_per_partition = self.num_heads // tp_size
+
+        self.qkv_proj = QKVParallelLinear(
+            self.embed_dim,
+            self.head_dim,
+            self.num_heads,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+            disable_tp=use_data_parallel,
+        )
+        self.out_proj = RowParallelLinear(
+            self.embed_dim,
+            self.embed_dim,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.out_proj",
+            disable_tp=use_data_parallel,
+        )
+        self.attn = MMEncoderAttention(
+            self.num_heads_per_partition,
+            self.head_dim,
+            self.scale,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.chunk(3, dim=-1)
+        attn_out = self.attn(q, k, v)
+        out, _ = self.out_proj(attn_out)
+        return out
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                mapped_name = name.replace(weight_name, param_name, 1)
+                if mapped_name not in params_dict:
+                    continue
+                param = params_dict[mapped_name]
+                param.weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                if name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
+
+class MiniCPMV4_6ViTWindowAttentionMerger(nn.Module):
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.window_kernel_size = (2, 2)
+        self.embed_dim = config.hidden_size
+
+        self.self_attn = MiniCPMV4_6ViTWindowAttentionSelfAttn(
+            config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.self_attn",
+        )
+        self.layer_norm1 = nn.LayerNorm(
+            self.embed_dim,
+            eps=config.layer_norm_eps,
+        )
+
+        hidden_4x = self.embed_dim * 4
+        inter_4x = config.intermediate_size * 4
+
+        self.pre_norm = nn.LayerNorm(hidden_4x, eps=config.layer_norm_eps)
+        self.linear_1 = nn.Linear(hidden_4x, inter_4x, bias=True)
+        self.act = get_act_fn("gelu_pytorch_tanh")
+        self.linear_2 = nn.Linear(inter_4x, self.embed_dim, bias=True)
+
+    def _apply_window_attention(
+        self,
+        valid_states: torch.Tensor,
+        H: int,
+        W: int,
+    ) -> torch.Tensor:
+        D = valid_states.shape[-1]
+        wh, ww = self.window_kernel_size
+        nh, nw = H // wh, W // ww
+        num_windows = nh * nw
+
+        x = valid_states.view(H, W, D)
+        x = x.view(nh, wh, nw, ww, D).permute(0, 2, 1, 3, 4).contiguous()
+        x = x.view(num_windows, wh * ww, D)
+
+        x = self.self_attn(x)
+
+        x = x.view(nh, nw, wh, ww, D).permute(0, 2, 1, 3, 4).contiguous()
+        return x.view(H * W, D)
+
+    def _apply_mlp_downsample(
+        self,
+        valid_states: torch.Tensor,
+        H: int,
+        W: int,
+    ) -> torch.Tensor:
+        D = valid_states.shape[-1]
+        wh, ww = self.window_kernel_size
+        nh, nw = H // wh, W // ww
+
+        x = valid_states.view(H, W, D)
+        x = x.view(nh, wh, nw, ww, D).permute(0, 2, 1, 3, 4).contiguous()
+
+        residual = x.reshape(nh * nw, wh * ww, D).mean(dim=1)
+        x = x.reshape(nh * nw, wh * ww * D)
+
+        x = self.pre_norm(x)
+        x = self.linear_1(x)
+        x = self.act(x)
+        x = self.linear_2(x)
+        return x + residual
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        tgt_sizes: torch.Tensor,
+        attention_mask: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor | None]:
+        B, _L, D = hidden_states.shape
+        device = hidden_states.device
+        dtype = hidden_states.dtype
+
+        all_merged = []
+        new_tgt_sizes = torch.zeros_like(tgt_sizes)
+
+        for b in range(B):
+            H, W = tgt_sizes[b].tolist()
+            hs = hidden_states[b, : H * W, :]
+
+            residual = hs
+            hs = self.layer_norm1(hs)
+            hs = residual + self._apply_window_attention(hs, H, W)
+
+            wh, ww = self.window_kernel_size
+            new_H, new_W = H // wh, W // ww
+            all_merged.append(self._apply_mlp_downsample(hs, H, W))
+            new_tgt_sizes[b] = torch.tensor(
+                [new_H, new_W],
+                device=device,
+                dtype=tgt_sizes.dtype,
+            )
+
+        new_num_patches = new_tgt_sizes[:, 0] * new_tgt_sizes[:, 1]
+        new_max_patches = int(new_num_patches.max().item())
+        new_hidden = torch.zeros(
+            B,
+            new_max_patches,
+            D,
+            device=device,
+            dtype=dtype,
+        )
+        for b, merged in enumerate(all_merged):
+            new_hidden[b, : merged.shape[0], :] = merged
+
+        # Build new attention mask after spatial downsampling
+        new_attention_mask: torch.Tensor | None = None
+        if attention_mask is not None:
+            mask = torch.zeros(
+                B,
+                new_max_patches,
+                dtype=torch.bool,
+                device=device,
+            )
+            for b in range(B):
+                mask[b, : int(new_num_patches[b].item())] = True
+            min_val = torch.finfo(dtype).min
+            new_attention_mask = (~mask).to(dtype=dtype) * min_val
+            new_attention_mask = new_attention_mask[:, None, None, :]
+
+        return new_hidden, new_tgt_sizes, new_attention_mask
+
+
+class MiniCPMV4_6DownsampleMLP(nn.Module):
+    """Match HF (transformers v5.7+) parameter naming: pre_norm/linear_1/
+    act/linear_2 (instead of pre_norm + Sequential(mlp.0/mlp.2))."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        llm_embed_dim: int,
+        merge_kernel_size: tuple[int, int] = (2, 2),
+    ):
+        super().__init__()
+        self.merge_kernel_size = merge_kernel_size
+        self.hidden_size = hidden_size * merge_kernel_size[0] * merge_kernel_size[1]
+        self.pre_norm = nn.LayerNorm(self.hidden_size, eps=1e-6)
+        self.linear_1 = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
+        self.act = get_act_fn("gelu")
+        self.linear_2 = nn.Linear(self.hidden_size, llm_embed_dim, bias=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.pre_norm(x)
+        x = self.linear_1(x)
+        x = self.act(x)
+        x = self.linear_2(x)
+        return x
+
+
+class MiniCPMV4_6Merger(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        llm_embed_dim: int,
+        merge_kernel_size: tuple[int, int] = (2, 2),
+        times: int = 1,
+    ):
+        super().__init__()
+        self.merge_kernel_size = merge_kernel_size
+        self.times = times
+        self.mlp = nn.ModuleList(
+            [
+                MiniCPMV4_6DownsampleMLP(
+                    hidden_size,
+                    llm_embed_dim if i == times - 1 else hidden_size,
+                    merge_kernel_size,
+                )
+                for i in range(times)
+            ]
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        tgt_sizes: torch.Tensor,
+    ) -> list[torch.Tensor]:
+        """
+        Args:
+            hidden_states: (B, max_patches, D) padded batch.
+            tgt_sizes: (B, 2) actual (H, W) per sample.
+        """
+        m1, m2 = self.merge_kernel_size
+        results = []
+
+        for b in range(len(tgt_sizes)):
+            h, w = tgt_sizes[b].tolist()
+            n_patches = h * w
+            hs = hidden_states[b, :n_patches, :]
+
+            hs = hs.reshape(h // m1, m1, w // m2, m2, -1)
+            hs = hs.permute(0, 2, 1, 3, 4).reshape(
+                (h // m1) * (w // m2),
+                m1 * m2 * hs.shape[-1],
+            )
+            hs = self.mlp[0](hs)
+
+            if self.times > 1:
+                cur_h, cur_w = h // m1, w // m2
+                for t in range(1, self.times):
+                    cur_h, cur_w = cur_h // m1, cur_w // m2
+                    hs = hs.reshape(cur_h, m1, cur_w, m2, -1)
+                    hs = hs.permute(0, 2, 1, 3, 4).reshape(
+                        cur_h * cur_w,
+                        m1 * m2 * hs.shape[-1],
+                    )
+                    hs = self.mlp[t](hs)
+
+            results.append(hs)
+
+        return results
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    MiniCPMV4_6MultiModalProcessor,
+    info=MiniCPMV4_6ProcessingInfo,
+    dummy_inputs=MiniCPMVDummyInputsBuilder,
+)
+class MiniCPMV4_6ForConditionalGeneration(
+    nn.Module,
+    SupportsMultiModal,
+    SupportsPP,
+    HasInnerState,
+    IsHybrid,
+    SupportsMRoPE,
+):
+    supports_encoder_tp_data = True
+
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # transformers v5.7+ uses `vision_tower` and nests `vit_merger`
+            # inside it. Order matters: more specific prefix must come first.
+            "model.vision_tower.vit_merger.": "vit_merger.",
+            "model.vision_tower.": "vpm.",
+            "model.vpm.": "vpm.",
+            "model.vit_merger.": "vit_merger.",
+            "model.merger.": "merger.",
+            "model.language_model.": "language_model.model.",
+            "lm_head.": "language_model.lm_head.",
+        }
+    )
+
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+        "in_proj_qkvz": ["in_proj_qkv", "in_proj_z"],
+        "in_proj_ba": ["in_proj_b", "in_proj_a"],
+    }
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        # transformers v5.7+ chat_template uses these tokens.
+        if modality.startswith("image"):
+            return "<|image_pad|>"
+        if modality.startswith("video"):
+            return "<|video_pad|>"
+        raise ValueError("Only image or video modality is supported")
+
+    def get_mrope_input_positions(
+        self,
+        input_tokens: list[int],
+        mm_features: list["MultiModalFeatureSpec"],
+    ) -> tuple[torch.Tensor, int]:
+        """MiniCPM-V uses embedding injection for vision, not spatial M-RoPE.
+
+        All tokens (text and vision placeholders) get identical sequential
+        positions duplicated across the 3 M-RoPE channels expected by the
+        Qwen3.5 backbone.
+        """
+        seq_len = len(input_tokens)
+        positions = torch.arange(seq_len).unsqueeze(0).expand(3, -1)
+        return positions, 0
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config: MiniCPMV4_6Config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        multimodal_config = vllm_config.model_config.multimodal_config
+
+        self.config = config
+        self.multimodal_config = multimodal_config
+        self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
+
+        # --- Vision tower ---
+        with self._mark_tower_model(vllm_config, {"image"}):
+            self.vpm = Idefics2VisionTransformer(
+                config.vision_config,
+                quant_config=quant_config,
+                apply_encoder_attention_mask=True,
+                prefix=maybe_prefix(prefix, "vpm"),
+            )
+            if config.drop_vision_last_layer:
+                self.vpm.encoder.layers = self.vpm.encoder.layers[:-1]
+
+            self.vit_merger = MiniCPMV4_6ViTWindowAttentionMerger(
+                config.vision_config,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "vit_merger"),
+            )
+            self.merger = MiniCPMV4_6Merger(
+                hidden_size=config.vision_config.hidden_size,
+                llm_embed_dim=config.text_config.hidden_size,
+            )
+
+        # --- Language model ---
+        # Temporarily swap top-level model_type so that Qwen3_5ForCausalLM
+        # picks up the expected text config when introspecting the hf config.
+        with self._mark_language_model(vllm_config):
+            saved_model_type = config.model_type
+            config.model_type = "qwen3_5_text"
+            try:
+                self.language_model = Qwen3_5ForCausalLM(
+                    vllm_config=vllm_config,
+                    prefix=maybe_prefix(prefix, "language_model"),
+                )
+            finally:
+                config.model_type = saved_model_type
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+    # ----- Multimodal parsing -----
+
+    def _parse_and_validate_vision_input(
+        self,
+        **kwargs: object,
+    ) -> MiniCPMVImagePixelInputs | MiniCPMVImageEmbeddingInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        image_embeds = kwargs.pop("image_embeds", None)
+
+        if pixel_values is None and image_embeds is None:
+            return None
+
+        if image_embeds is not None:
+            return MiniCPMVImageEmbeddingInputs(
+                type="image_embeds",
+                image_embeds=image_embeds,
+            )
+
+        tgt_sizes = kwargs.pop("tgt_sizes")
+        num_slices_flat = torch.tensor([len(ps) for ps in pixel_values])
+        pixel_values_flat = flatten_bn(pixel_values)
+        tgt_sizes_flat = flatten_bn(tgt_sizes, concat=True)
+
+        return MiniCPMVImagePixelInputs(
+            type="pixel_values",
+            pixel_values=pixel_values_flat,
+            tgt_sizes=tgt_sizes_flat,
+            num_slices=num_slices_flat,
+        )
+
+    # ----- Vision forward -----
+
+    def get_vision_hidden_states(
+        self,
+        data: MiniCPMVImagePixelInputs,
+        downsample_mode: str | None = None,
+    ) -> list[torch.Tensor]:
+        pixel_values = data["pixel_values"]
+        tgt_sizes = data["tgt_sizes"]
+
+        B = len(pixel_values)
+        P = pixel_values[0].shape[-2]
+        L = max(item.shape[-1] for item in pixel_values)
+        device = pixel_values[0].device
+        target_dtype = self.vpm.embeddings.patch_embedding.weight.dtype
+
+        all_pixel_values = torch.zeros(
+            B,
+            3,
+            P,
+            L,
+            dtype=target_dtype,
+            device=device,
+        )
+        for i, pv in enumerate(pixel_values):
+            all_pixel_values[i, ..., : pv.shape[-1]] = pv.to(target_dtype)
+
+        num_patches = tgt_sizes.prod(-1)
+        max_patches = int(num_patches.max().item())
+        patch_attn_mask = torch.zeros(
+            B,
+            max_patches,
+            dtype=torch.bool,
+            device=device,
+        )
+        for i in range(B):
+            patch_attn_mask[i, : num_patches[i]] = True
+
+        hidden_states = self.vpm.embeddings(
+            all_pixel_values,
+            patch_attention_mask=patch_attn_mask.unsqueeze(1),
+            tgt_sizes=tgt_sizes,
+        )
+
+        if torch.any(~patch_attn_mask):
+            mask_dtype = hidden_states.dtype
+            min_val = torch.finfo(mask_dtype).min
+            attention_mask = (~patch_attn_mask).to(dtype=mask_dtype) * min_val
+            attention_mask = attention_mask[:, None, None, :]
+        else:
+            attention_mask = None
+
+        # Encoder layers with mid-encoder merger injection
+        insert_layer_id = getattr(self.config, "insert_layer_id", -1)
+        if downsample_mode is None:
+            downsample_mode = getattr(self.config, "downsample_mode", "16x")
+        use_vit_merger = downsample_mode != "4x" and insert_layer_id >= 0
+
+        for layer in self.vpm.encoder.layers[: insert_layer_id + 1]:
+            hidden_states = layer(hidden_states, attention_mask=attention_mask)
+
+        if use_vit_merger:
+            hidden_states, tgt_sizes, attention_mask = self.vit_merger(
+                hidden_states,
+                tgt_sizes,
+                attention_mask,
+            )
+
+        for layer in self.vpm.encoder.layers[insert_layer_id + 1 :]:
+            hidden_states = layer(hidden_states, attention_mask=attention_mask)
+
+        # 4. Post layernorm
+        hidden_states = self.vpm.post_layernorm(hidden_states)
+
+        # 5. MLP merger → list of per-slice tensors
+        return self.merger(hidden_states, tgt_sizes)
+
+    def _process_vision_input(self, image_input, use_vit_merger=None):
+        if image_input["type"] == "image_embeds":
+            return image_input["image_embeds"]
+
+        downsample_mode = None
+        if use_vit_merger is not None:
+            downsample_mode = "16x" if use_vit_merger else "4x"
+        image_features = self.get_vision_hidden_states(
+            image_input,
+            downsample_mode=downsample_mode,
+        )
+        num_slices = image_input["num_slices"]
+        results = []
+        idx = 0
+        for n in num_slices.tolist():
+            group = image_features[idx : idx + n]
+            results.append(torch.cat(group, dim=0))
+            idx += n
+        return results
+
+    # ----- Multimodal embedding interface -----
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        use_vit_merger_tensors = kwargs.pop("use_vit_merger", None)
+        use_vit_merger = None
+        if use_vit_merger_tensors is not None:
+            if isinstance(use_vit_merger_tensors, torch.Tensor):
+                use_vit_merger = bool(use_vit_merger_tensors.any().item())
+            elif isinstance(use_vit_merger_tensors, list | tuple):
+                use_vit_merger = any(
+                    bool(t.any().item()) if isinstance(t, torch.Tensor) else bool(t)
+                    for t in use_vit_merger_tensors
+                )
+
+        # Split kwargs into image / video buckets (videos are processed via
+        # the same vision pipeline; their fields just carry a ``video_`` prefix).
+        image_kwargs = {
+            k: v
+            for k, v in kwargs.items()
+            if k in ("pixel_values", "image_embeds", "tgt_sizes")
+        }
+        video_kwargs = {
+            k.removeprefix("video_"): v
+            for k, v in kwargs.items()
+            if k.startswith("video_")
+        }
+
+        multimodal_embeddings: tuple[torch.Tensor, ...] = ()
+
+        if (
+            image_kwargs.get("pixel_values") is not None
+            or image_kwargs.get("image_embeds") is not None
+        ):
+            image_input = self._parse_and_validate_vision_input(**image_kwargs)
+            if image_input is not None:
+                multimodal_embeddings += tuple(
+                    self._process_vision_input(
+                        image_input,
+                        use_vit_merger=use_vit_merger,
+                    )
+                )
+
+        if (
+            video_kwargs.get("pixel_values") is not None
+            or video_kwargs.get("image_embeds") is not None
+        ):
+            video_input = self._parse_and_validate_vision_input(**video_kwargs)
+            if video_input is not None:
+                multimodal_embeddings += tuple(
+                    self._process_vision_input(
+                        video_input,
+                        use_vit_merger=use_vit_merger,
+                    )
+                )
+
+        if not multimodal_embeddings:
+            return []
+        return multimodal_embeddings
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        inputs_embeds = self._embed_text_input_ids(
+            input_ids,
+            self.language_model.embed_input_ids,
+            is_multimodal=is_multimodal,
+        )
+        if multimodal_embeddings is None or len(multimodal_embeddings) == 0:
+            return inputs_embeds
+
+        is_multimodal = _require_is_multimodal(is_multimodal)
+        return _merge_multimodal_embeddings(
+            inputs_embeds=inputs_embeds,
+            multimodal_embeddings=multimodal_embeddings,
+            is_multimodal=is_multimodal,
+        )
+
+    # ----- Forward / Logits -----
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: Any,
+    ) -> torch.Tensor:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        return self.language_model.model(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    # ----- Weight loading -----
+
+    def load_weights(
+        self,
+        weights: Iterable[tuple[str, torch.Tensor]],
+    ) -> set[str]:
+        loader = AutoWeightsLoader(self, skip_prefixes=["mtp."])
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+
+    def get_mm_mapping(self) -> MultiModelKeys:
+        return MultiModelKeys.from_string_field(
+            language_model="language_model",
+            connector=["vit_merger", "merger"],
+            tower_model="vpm",
+        )
+
+    # ----- Mamba / Hybrid state helpers (same as Qwen3.5 VLM) -----
+
+    @classmethod
+    def get_mamba_state_dtype_from_config(cls, vllm_config):
+        return MambaStateDtypeCalculator.gated_delta_net_state_dtype(
+            vllm_config.model_config.dtype,
+            vllm_config.cache_config.mamba_cache_dtype,
+            vllm_config.cache_config.mamba_ssm_cache_dtype,
+        )
+
+    @classmethod
+    def get_mamba_state_shape_from_config(cls, vllm_config):
+        parallel_config = vllm_config.parallel_config
+        hf_config = vllm_config.model_config.hf_text_config
+        tp_size = parallel_config.tensor_parallel_size
+        num_spec = (
+            vllm_config.speculative_config.num_speculative_tokens
+            if vllm_config.speculative_config
+            else 0
+        )
+        return MambaStateShapeCalculator.gated_delta_net_state_shape(
+            tp_size,
+            hf_config.linear_num_key_heads,
+            hf_config.linear_num_value_heads,
+            hf_config.linear_key_head_dim,
+            hf_config.linear_value_head_dim,
+            hf_config.linear_conv_kernel_dim,
+            num_spec,
+        )
+
+    @classmethod
+    def get_mamba_state_copy_func(cls):
+        return MambaStateCopyFuncCalculator.gated_delta_net_state_copy_func()
diff --git a/vllm/model_executor/models/minimax_m2.py b/vllm/model_executor/models/minimax_m2.py
index 426caea1f047..dbcafd16c5ed 100644
--- a/vllm/model_executor/models/minimax_m2.py
+++ b/vllm/model_executor/models/minimax_m2.py
@@ -24,25 +24,30 @@
 """Inference-only MiniMaxM2 model."""
 
 from collections.abc import Iterable
+from itertools import islice
 from typing import Any
 
 import torch
 from torch import nn
 from transformers import PretrainedConfig
 
+import vllm.envs as envs
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, ModelConfig, VllmConfig
 from vllm.distributed import (
     get_pp_group,
+    get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
+from vllm.model_executor.layers.fused_moe.router.gate_linear import GateLinear
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
-    ReplicatedLinear,
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -57,9 +62,10 @@
     default_weight_loader,
     maybe_remap_kv_scale_name,
 )
+from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
 
-from .interfaces import SupportsLoRA, SupportsPP
+from .interfaces import EagleModelMixin, SupportsEagle3, SupportsLoRA, SupportsPP
 from .utils import (
     AutoWeightsLoader,
     PPMissingLayer,
@@ -70,6 +76,17 @@
 )
 
 
+def _enable_router_pdl() -> bool:
+    is_hopper_or_blackwell = current_platform.is_device_capability(
+        (9, 0)
+    ) or current_platform.is_device_capability_family(100)
+    return (
+        current_platform.is_cuda()
+        and is_hopper_or_blackwell
+        and envs.TRTLLM_ENABLE_PDL
+    )
+
+
 class MiniMaxM2MoE(nn.Module):
     def __init__(
         self,
@@ -103,19 +120,19 @@ def __init__(
             e_score_correction_bias=self.e_score_correction_bias,
             hidden_size=config.hidden_size,
             intermediate_size=config.intermediate_size,
-            reduce_results=False,
             renormalize=True,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
             router_logits_dtype=torch.float32,
+            enable_router_pdl=_enable_router_pdl(),
         )
 
-        self.gate = ReplicatedLinear(
+        self.gate = GateLinear(
             config.hidden_size,
             config.num_local_experts,
             bias=False,
+            out_dtype=torch.float32,
             params_dtype=torch.float32,
-            quant_config=None,
             prefix=f"{prefix}.gate",
         )
 
@@ -127,15 +144,12 @@ def ebias_weight_loader(param: nn.Parameter, loaded_weight: torch.Tensor) -> Non
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         num_tokens, hidden_dim = hidden_states.shape
         hidden_states = hidden_states.view(-1, hidden_dim)
-
         # router_logits: (num_tokens, n_experts)
-        router_logits, _ = self.gate(hidden_states.to(torch.float32))
+        # Dtype conversion (bf16->fp32) is handled inside the kernel.
+        router_logits, _ = self.gate(hidden_states)
         final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-        final_hidden_states = final_hidden_states
-        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
 
         return final_hidden_states.view(num_tokens, hidden_dim)
 
@@ -221,9 +235,21 @@ def __init__(
         self.q_norm = MiniMaxText01RMSNormTP(
             self.head_dim * self.total_num_heads, eps=rms_norm_eps
         )
-        self.k_norm = MiniMaxText01RMSNormTP(
-            self.head_dim * self.total_num_kv_heads, eps=rms_norm_eps
-        )
+        if self.total_num_kv_heads >= tp_size:
+            self.k_norm = MiniMaxText01RMSNormTP(
+                self.head_dim * self.total_num_kv_heads, eps=rms_norm_eps
+            )
+        else:
+            # KV heads are replicated across TP ranks; shard k_norm weight by
+            # total_num_kv_heads rather than tp_size to avoid incorrect sharding.
+            num_kv_head_replicas = tp_size // self.total_num_kv_heads
+            self.k_norm = MiniMaxText01RMSNormTP(
+                self.head_dim * self.total_num_kv_heads,
+                eps=rms_norm_eps,
+                weight_shard_world_size=self.total_num_kv_heads,
+                weight_shard_rank=get_tensor_model_parallel_rank()
+                // num_kv_head_replicas,
+            )
 
     def forward(
         self,
@@ -232,9 +258,7 @@ def forward(
     ) -> torch.Tensor:
         qkv, _ = self.qkv_proj(hidden_states)
         q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
-        q, k = MiniMaxText01RMSNormTP.forward_qk(
-            self.q_norm, self.k_norm, q.contiguous(), k.contiguous()
-        )
+        q, k = MiniMaxText01RMSNormTP.forward_qk(self.q_norm, self.k_norm, q, k)
         q, k = self.rotary_emb(positions, q, k)
         attn_output = self.attn(q, k, v)
         output, _ = self.o_proj(attn_output)
@@ -313,7 +337,7 @@ def forward(
 
 
 @support_torch_compile
-class MiniMaxM2Model(nn.Module):
+class MiniMaxM2Model(nn.Module, EagleModelMixin):
     fall_back_to_pt_during_load = False
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
@@ -331,7 +355,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.embed_tokens = VocabParallelEmbedding(
                 config.vocab_size,
                 config.hidden_size,
-                quant_config=None,
+                quant_config=quant_config,
                 prefix=f"{prefix}.embed_tokens",
             )
         else:
@@ -366,7 +390,7 @@ def forward(
         positions: torch.Tensor,
         intermediate_tensors: IntermediateTensors | None,
         inputs_embeds: torch.Tensor | None = None,
-    ) -> torch.Tensor | IntermediateTensors:
+    ) -> torch.Tensor | IntermediateTensors | tuple[torch.Tensor, list[torch.Tensor]]:
         if get_pp_group().is_first_rank:
             if inputs_embeds is not None:
                 hidden_states = inputs_embeds
@@ -378,18 +402,28 @@ def forward(
             hidden_states = intermediate_tensors["hidden_states"]
             residual = intermediate_tensors["residual"]
 
-        for layer in self.layers[self.start_layer : self.end_layer]:
+        aux_hidden_states = self._maybe_add_hidden_state([], 0, hidden_states, residual)
+        for idx, layer in enumerate(
+            islice(self.layers, self.start_layer, self.end_layer)
+        ):
             hidden_states, residual = layer(positions, hidden_states, residual)
+            self._maybe_add_hidden_state(
+                aux_hidden_states, idx + 1, hidden_states, residual
+            )
 
         if not get_pp_group().is_last_rank:
             return IntermediateTensors(
                 {"hidden_states": hidden_states, "residual": residual}
             )
         hidden_states, _ = self.norm(hidden_states, residual)
+
+        if len(aux_hidden_states) > 0:
+            return hidden_states, aux_hidden_states
+
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
@@ -496,7 +530,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         return loaded_params
 
 
-class MiniMaxM2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
+class MiniMaxM2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3):
     packed_modules_mapping = {
         "qkv_proj": [
             "q_proj",
@@ -518,7 +552,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         )
         if get_pp_group().is_last_rank:
             self.lm_head = ParallelLMHead(
-                config.vocab_size, config.hidden_size, quant_config=None
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
             )
         else:
             self.lm_head = PPMissingLayer()
diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py
index 21d74d8b0580..c73fbf7009d6 100644
--- a/vllm/model_executor/models/minimax_text_01.py
+++ b/vllm/model_executor/models/minimax_text_01.py
@@ -24,7 +24,9 @@
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -162,7 +164,6 @@ def __init__(
             hidden_size=self.hidden_size,
             intermediate_size=self.intermediate_size * self.tp_size,
             params_dtype=self.params_dtype,
-            reduce_results=True,
             renormalize=True,
             quant_config=self.quant_config,
             tp_size=self.tp_size,
diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py
index 0ece3dda2e50..025ce564083c 100644
--- a/vllm/model_executor/models/mistral3.py
+++ b/vllm/model_executor/models/mistral3.py
@@ -382,7 +382,14 @@ class Mistral3ForConditionalGeneration(
             # Some PEFT LoRAs are trained against the text submodule directly
             # and produce names like `base_model.model.model.layers.*`.
             "model.": "language_model.model.",
-        }
+        },
+        orig_to_new_suffix={
+            # FP8 quantized HF checkpoints use "activation_scale" and
+            # "weight_scale_inv" but vLLM's FP8 linear layers register
+            # them as "input_scale" and "weight_scale"
+            ".activation_scale": ".input_scale",
+            ".weight_scale_inv": ".weight_scale",
+        },
     )
 
     @classmethod
@@ -402,13 +409,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
         self.config = config
         self.multimodal_config = multimodal_config
 
-        # NOTE: These are special cases for Pixtral-12B in the HF-format
+        # NOTE: This is a special case for Pixtral-12B in the HF-format
         # https://huggingface.co/mistral-community/pixtral-12b/blob/main/config.json  # noqa
-        if (
-            config.text_config.architectures is None
-            and config.text_config.model_type == "mistral"
-        ):
-            config.text_config.architectures = ["MistralForCausalLM"]
         if (
             config.projector_hidden_act is None
             and config.vision_config.hidden_act == "gelu"
diff --git a/vllm/model_executor/models/mistral_eagle.py b/vllm/model_executor/models/mistral_eagle.py
new file mode 100644
index 000000000000..8865742d6495
--- /dev/null
+++ b/vllm/model_executor/models/mistral_eagle.py
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable
+
+import torch
+import torch.nn as nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import RowParallelLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
+from vllm.model_executor.models.interfaces import MultiModalEmbeddings
+from vllm.model_executor.models.llama import LlamaConfig
+from vllm.model_executor.models.mistral import (
+    MistralDecoderLayer,
+    MistralForCausalLM,
+    MistralModel,
+)
+from vllm.model_executor.models.utils import (
+    _merge_multimodal_embeddings,
+    get_draft_quant_config,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+class EagleMistralDecoderLayer(MistralDecoderLayer):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        config: LlamaConfig | None = None,
+    ) -> None:
+        super().__init__(vllm_config, prefix=prefix, config=config)
+
+    def get_quant_config(self, vllm_config: VllmConfig) -> QuantizationConfig | None:
+        return get_draft_quant_config(vllm_config)
+
+
+@support_torch_compile
+class EagleMistralModel(MistralModel):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        start_layer_id: int = 0,
+    ) -> None:
+        # Bypass MistralModel.__init__ to avoid creating duplicate attention
+        # layer entries in the global context.
+        nn.Module.__init__(self)
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        self.vocab_size = self.config.vocab_size
+        # Get drafter's quantization config
+        self.quant_config = get_draft_quant_config(vllm_config)
+
+        self.embed_tokens = VocabParallelEmbedding(
+            self.config.vocab_size,
+            self.config.hidden_size,
+            prefix=maybe_prefix(prefix, "embed_tokens"),
+            quant_config=self.quant_config,
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                EagleMistralDecoderLayer(
+                    vllm_config,
+                    prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
+                    config=self.config,
+                )
+                for i in range(self.config.num_hidden_layers)
+            ]
+        )
+        self.fc = RowParallelLinear(
+            self.config.hidden_size * 2,
+            self.config.hidden_size,
+            bias=False,
+            input_is_parallel=False,
+            quant_config=self.quant_config,
+            prefix=maybe_prefix(prefix, "fc"),
+            return_bias=False,
+        )
+        self.norm = RMSNorm(self.config.hidden_size, eps=self.config.rms_norm_eps)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_input_ids(input_ids)
+        hidden_states = self.fc(torch.cat((inputs_embeds, hidden_states), dim=-1))
+        residual = None
+        for layer in self.layers:
+            hidden_states, residual = layer(
+                positions,
+                hidden_states,
+                residual,
+            )
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states, hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        # Pretend embed_tokens is loaded; the actual weight is shared
+        # from the target model at runtime by `load_eagle_model`.
+        return super().load_weights(weights) | {"embed_tokens.weight"}
+
+
+class EagleMistralForCausalLM(MistralForCausalLM):
+    mistral_mapping = MistralForCausalLM.mistral_mapping | {
+        "eagle_linear": "model.fc",
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        # Bypass MistralForCausalLM.__init__ to use the draft model config
+        # and to avoid creating an lm_head.
+        nn.Module.__init__(self)
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        target_layer_num = vllm_config.model_config.get_num_layers(
+            vllm_config.parallel_config
+        )
+        self.model = EagleMistralModel(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
+        )
+
+        logit_scale = getattr(self.config, "logit_scale", 1.0)
+        self.logits_processor = LogitsProcessor(
+            self.config.vocab_size, scale=logit_scale
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        return self.model(input_ids, positions, hidden_states, inputs_embeds)
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: MultiModalEmbeddings | None = None,
+        *,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        inputs_embeds = super().embed_input_ids(input_ids)
+
+        if multimodal_embeddings is None or len(multimodal_embeddings) == 0:
+            return inputs_embeds
+
+        assert is_multimodal is not None
+
+        return _merge_multimodal_embeddings(
+            inputs_embeds=inputs_embeds,
+            multimodal_embeddings=multimodal_embeddings,
+            is_multimodal=is_multimodal,
+        )
diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py
index 376fd7a1709d..cbfc254dda36 100644
--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@@ -40,7 +40,10 @@
     get_tensor_model_parallel_world_size,
 )
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -132,7 +135,6 @@ def __init__(
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=True,
             quant_config=quant_config,
             tp_size=tp_size,
@@ -365,7 +367,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
index 227ef2fa669a..8fe1be721c79 100644
--- a/vllm/model_executor/models/mllama4.py
+++ b/vllm/model_executor/models/mllama4.py
@@ -40,7 +40,9 @@
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.inputs import MultiModalDataDict
 from vllm.model_executor.layers.attention import MMEncoderAttention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     QKVParallelLinear,
@@ -1072,7 +1074,7 @@ def _load_other_weights(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/mlp_speculator.py b/vllm/model_executor/models/mlp_speculator.py
index 48604d8e5103..612baba8eaa6 100644
--- a/vllm/model_executor/models/mlp_speculator.py
+++ b/vllm/model_executor/models/mlp_speculator.py
@@ -17,8 +17,6 @@
 
 from .utils import maybe_prefix
 
-SQRT2 = 2**0.5
-
 
 class MLPSpeculatorLayerNorm(nn.Module):
     """
@@ -171,57 +169,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
             config.vocab_size, config.vocab_size, 1.0
         )
 
-    # NOTE(woosuk): This method is commented out because it is old code
-    # using V0. We should either port it to V1 or remove it.
-
-    # def generate_proposals(
-    #     self,
-    #     input_ids: torch.Tensor,
-    #     previous_hidden_states: torch.Tensor,
-    #     num_predict_tokens: int,
-    #     sampling_metadata: SamplingMetadata,
-    # ) -> list[SamplerOutput]:
-    #     if num_predict_tokens > self.max_speculative_tokens:
-    #         raise ValueError(f"Max speculative tokens for model is "
-    #                          f"{self.max_speculative_tokens}, but "
-    #                          f"{num_predict_tokens} were requested")
-
-    #     # b x 1 x d
-    #     previous_hidden_states = previous_hidden_states.unsqueeze(1)
-
-    #     if self.scale_input:
-    #         previous_hidden_states = self.ln0(previous_hidden_states) / SQRT2
-
-    #     # b x 1
-    #     last_tokens = input_ids.unsqueeze(1)
-
-    #     next_tokens = []
-
-    #     for head_index in range(num_predict_tokens):
-
-    #         # Project and predict
-    #         z = self.emb[head_index](last_tokens)  # b k d
-    #         states = self.proj[head_index](previous_hidden_states)
-
-    #         # Weighted add of state_weight*state and emb_weight*z
-    #         # Let subsequent LN take care of denominator
-    #         # state_weight is close to 1, so shouldn't be any precision issues
-    #         states.add_(z, alpha=self.emb_weight / self.state_weight)
-
-    #         states = self.activation(self.ln[head_index](states))  # b k d
-    #         previous_hidden_states = states
-    #         # TODO: not yet supporting top_k_tokens_per_head
-    #         states = states.flatten(0, 1)
-
-    #         logits = self.logits_processor(self.head[head_index], states,
-    #                                        sampling_metadata)
-
-    #         output = self.sampler(logits, sampling_metadata)
-    #         last_tokens = output.sampled_token_ids
-    #         next_tokens.append(output)
-
-    #     return next_tokens
-
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
diff --git a/vllm/model_executor/models/molmo2.py b/vllm/model_executor/models/molmo2.py
index aa58fa6d1583..9ad3810e41fd 100644
--- a/vllm/model_executor/models/molmo2.py
+++ b/vllm/model_executor/models/molmo2.py
@@ -489,7 +489,7 @@ def __init__(
         self.transformer = Molmo2VisionBlockCollection(
             config,
             quant_config,
-            prefix=f"{prefix}.transformer",
+            prefix=maybe_prefix(prefix, "transformer"),
         )
 
     def add_pos_emb(self, x: torch.Tensor, patch_num: int) -> torch.Tensor:
@@ -1338,6 +1338,9 @@ def exif_transpose(
 def build_flat_image_bool_length(
     image_grids: torch.LongTensor,
     hf_config: PretrainedConfig,
+    image_use_col_tokens: bool = True,
+    use_single_crop_col_tokens: bool | None = None,
+    use_single_crop_start_token: bool = True,
 ) -> tuple[torch.LongTensor, torch.LongTensor]:
     image_patch_id = hf_config.image_patch_id
     low_res_image_start_id = hf_config.low_res_image_start_token_id
@@ -1353,7 +1356,17 @@ def build_flat_image_bool_length(
     h = image_grids[:, 2]
     w = image_grids[:, 3]
 
-    lengths = resized_h * resized_w + h * (w + 1) + 4  # [B]
+    low_res_use_col_tokens = (
+        image_use_col_tokens
+        if use_single_crop_col_tokens is None
+        else use_single_crop_col_tokens
+    )
+    low_res_extra = int(low_res_use_col_tokens)
+    high_res_extra = int(image_use_col_tokens)
+
+    lengths = (
+        resized_h * (resized_w + low_res_extra) + h * (w + high_res_extra) + 4
+    )  # [B]
     total_len = int(lengths.sum().item())
 
     flat = torch.empty(total_len, dtype=torch.long, device=device)
@@ -1363,16 +1376,24 @@ def build_flat_image_bool_length(
         resized_h_i, resized_w_i, h_i, w_i = image_grids[i].tolist()
         L_i = int(lengths[i].item())
 
-        num_low_res_patches = resized_h_i * resized_w_i
-
         idx = offset
 
-        flat[idx] = low_res_image_start_id
+        flat[idx] = (
+            low_res_image_start_id if use_single_crop_start_token else image_start_id
+        )
         idx += 1
 
-        if num_low_res_patches > 0:
-            flat[idx : idx + num_low_res_patches] = image_patch_id
-            idx += num_low_res_patches
+        low_res_block_len = resized_w_i + low_res_extra
+        if low_res_block_len > 0 and resized_h_i > 0:
+            line = torch.empty(low_res_block_len, dtype=torch.long, device=device)
+            if resized_w_i > 0:
+                line[:resized_w_i] = image_patch_id
+            if low_res_use_col_tokens:
+                line[resized_w_i] = image_col_id
+
+            block = line.repeat(resized_h_i)
+            flat[idx : idx + resized_h_i * low_res_block_len] = block
+            idx += resized_h_i * low_res_block_len
 
         flat[idx] = image_end_id
         idx += 1
@@ -1380,12 +1401,13 @@ def build_flat_image_bool_length(
         flat[idx] = image_start_id
         idx += 1
 
-        block_len = w_i + 1
+        block_len = w_i + high_res_extra
         if block_len > 0 and h_i > 0:
             line = torch.empty(block_len, dtype=torch.long, device=device)
             if w_i > 0:
                 line[:w_i] = image_patch_id
-            line[w_i] = image_col_id
+            if image_use_col_tokens:
+                line[w_i] = image_col_id
 
             block = line.repeat(h_i)
             flat[idx : idx + h_i * block_len] = block
@@ -2108,7 +2130,13 @@ def patched_call(text=None, images=None, videos=None, **kwargs) -> BatchFeature:
             (
                 processed_outputs["image_tokens"],
                 processed_outputs["num_image_tokens"],
-            ) = build_flat_image_bool_length(image_grids, hf_config)
+            ) = build_flat_image_bool_length(
+                image_grids,
+                hf_config,
+                image_use_col_tokens=hf_processor.image_use_col_tokens,
+                use_single_crop_col_tokens=hf_processor.use_single_crop_col_tokens,
+                use_single_crop_start_token=hf_processor.use_single_crop_start_token,
+            )
 
         return BatchFeature({**processed_outputs, **all_video_outputs})
 
diff --git a/vllm/model_executor/models/moondream3.py b/vllm/model_executor/models/moondream3.py
new file mode 100644
index 000000000000..d5f3e6b195fb
--- /dev/null
+++ b/vllm/model_executor/models/moondream3.py
@@ -0,0 +1,1423 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Inference-only Moondream3 model implementation."""
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+from functools import cached_property
+from itertools import islice
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import BatchFeature
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.distributed import (
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+    tensor_model_parallel_all_gather,
+    tensor_model_parallel_all_reduce,
+)
+from vllm.inputs import MultiModalDataDict
+from vllm.logger import init_logger
+from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.attention.mm_encoder_attention import (
+    MMEncoderAttention,
+)
+from vllm.model_executor.layers.fused_moe import MoEActivation, fused_experts
+from vllm.model_executor.layers.fused_moe.config import biased_moe_quant_config
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    QKVParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+)
+from vllm.multimodal.parse import ImageSize, MultiModalDataItems
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    PromptReplacement,
+    PromptUpdate,
+    PromptUpdateDetails,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.transformers_utils.configs.moondream3 import (
+    Moondream3Config,
+    Moondream3TextConfig,
+    Moondream3VisionConfig,
+)
+from vllm.transformers_utils.processors.moondream3 import Moondream3Processor
+
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    SupportsPP,
+)
+from .utils import (
+    extract_layer_index,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+# ============================================================================
+# Image Processing Utilities
+# ============================================================================
+
+
+def reconstruct_from_crops(
+    crops: torch.Tensor,
+    tiling: tuple[int, int],
+    overlap_margin: int,
+    patch_size: int = 14,
+) -> torch.Tensor:
+    """Reconstruct features from overlapping crops."""
+    tiling_h, tiling_w = tiling
+    crop_height, crop_width = crops[0].shape[:2]
+    margin_pixels = overlap_margin * patch_size
+
+    output_h = (crop_height - 2 * margin_pixels) * tiling_h + 2 * margin_pixels
+    output_w = (crop_width - 2 * margin_pixels) * tiling_w + 2 * margin_pixels
+
+    reconstructed = torch.zeros(
+        (output_h, output_w, crops[0].shape[2]),
+        device=crops[0].device,
+        dtype=crops[0].dtype,
+    )
+
+    for i, crop in enumerate(crops):
+        tile_y = i // tiling_w
+        tile_x = i % tiling_w
+
+        x_start = 0 if tile_x == 0 else margin_pixels
+        x_end = crop_width if tile_x == tiling_w - 1 else crop_width - margin_pixels
+        y_start = 0 if tile_y == 0 else margin_pixels
+        y_end = crop_height if tile_y == tiling_h - 1 else crop_height - margin_pixels
+
+        out_x = tile_x * (crop_width - 2 * margin_pixels)
+        out_y = tile_y * (crop_height - 2 * margin_pixels)
+
+        reconstructed[
+            out_y + y_start : out_y + y_end, out_x + x_start : out_x + x_end
+        ] = crop[y_start:y_end, x_start:x_end]
+
+    return reconstructed
+
+
+# ============================================================================
+# Vision Encoder Components
+# ============================================================================
+
+
+class Moondream3VisionMLP(nn.Module):
+    """MLP for vision encoder blocks."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.fc1 = ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc1",
+        )
+        self.act = get_act_fn("gelu_pytorch_tanh")
+        self.fc2 = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc2",
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, _ = self.fc1(x)
+        x = self.act(x)
+        x, _ = self.fc2(x)
+        return x
+
+
+class Moondream3VisionAttention(nn.Module):
+    """Self-attention for vision encoder (bidirectional)."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+        self.head_dim = hidden_size // num_heads
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size=hidden_size,
+            head_size=self.head_dim,
+            total_num_heads=num_heads,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.out_proj = RowParallelLinear(
+            input_size=hidden_size,
+            output_size=hidden_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.out_proj",
+        )
+
+        tp_size = get_tensor_model_parallel_world_size()
+        self.num_heads_per_partition = num_heads // tp_size
+
+        self.attn = MMEncoderAttention(
+            num_heads=self.num_heads_per_partition,
+            head_size=self.head_dim,
+            scale=self.head_dim**-0.5,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.chunk(3, dim=-1)
+        out = self.attn(q, k, v)
+        out, _ = self.out_proj(out)
+        return out
+
+
+class Moondream3VisionBlock(nn.Module):
+    """Transformer block for vision encoder."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        num_heads: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(hidden_size, eps=1e-5)
+        self.attn = Moondream3VisionAttention(
+            hidden_size=hidden_size,
+            num_heads=num_heads,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+        self.ln2 = nn.LayerNorm(hidden_size, eps=1e-5)
+        self.mlp = Moondream3VisionMLP(
+            hidden_size=hidden_size,
+            intermediate_size=intermediate_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+
+
+class Moondream3VisionEncoder(nn.Module):
+    """Vision encoder (SigLIP-style ViT)."""
+
+    def __init__(
+        self,
+        config: Moondream3VisionConfig,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.config = config
+
+        # Patch embedding
+        self.patch_emb = nn.Linear(
+            config.enc_patch_size * config.enc_patch_size * 3,
+            config.enc_dim,
+            bias=True,
+        )
+
+        # Position embeddings (27x27 = 729 patches for 378x378 / 14)
+        num_patches = (config.crop_size // config.enc_patch_size) ** 2
+        self.pos_emb = nn.Parameter(torch.zeros(1, num_patches, config.enc_dim))
+
+        # Transformer blocks
+        self.blocks = nn.ModuleList(
+            [
+                Moondream3VisionBlock(
+                    hidden_size=config.enc_dim,
+                    intermediate_size=config.enc_ff_dim,
+                    num_heads=config.enc_n_heads,
+                    quant_config=quant_config,
+                    prefix=f"{prefix}.blocks.{i}",
+                )
+                for i in range(config.enc_n_layers)
+            ]
+        )
+
+        self.post_ln = nn.LayerNorm(config.enc_dim, eps=1e-5)
+
+    def create_patches(self, images: torch.Tensor) -> torch.Tensor:
+        """Convert images to patch embeddings.
+
+        Args:
+            images: (batch, channels, height, width)
+
+        Returns:
+            patches: (batch, num_patches, patch_dim)
+        """
+        patch_size = self.config.enc_patch_size
+        batch, channels, height, width = images.shape
+        patches_h = height // patch_size
+        patches_w = width // patch_size
+
+        # Unfold into patches
+        patches = images.unfold(2, patch_size, patch_size).unfold(
+            3, patch_size, patch_size
+        )
+        # (batch, channels, patches_h, patches_w, patch_size, patch_size)
+        patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
+        # (batch, patches_h, patches_w, channels, patch_size, patch_size)
+        patches = patches.view(batch, patches_h * patches_w, -1)
+        # (batch, num_patches, channels * patch_size * patch_size)
+
+        return patches
+
+    def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
+        """Encode images.
+
+        Args:
+            pixel_values: (batch, channels, height, width)
+
+        Returns:
+            features: (batch, num_patches, hidden_size)
+        """
+        # Create patches and embed
+        patches = self.create_patches(pixel_values)
+        x = self.patch_emb(patches)
+
+        # Add position embeddings
+        x = x + self.pos_emb
+
+        # Apply transformer blocks
+        for block in self.blocks:
+            x = block(x)
+
+        # Final layer norm
+        x = self.post_ln(x)
+
+        return x
+
+
+class Moondream3VisionProjection(nn.Module):
+    """Projects vision features to text embedding dimension."""
+
+    def __init__(
+        self,
+        input_dim: int,
+        inner_dim: int,
+        output_dim: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        # Input is concatenated global and local features (2 * input_dim)
+        self.fc1 = ColumnParallelLinear(
+            input_dim * 2,
+            inner_dim,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc1",
+        )
+        self.act = get_act_fn("gelu_pytorch_tanh")
+        self.fc2 = RowParallelLinear(
+            inner_dim,
+            output_dim,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc2",
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, _ = self.fc1(x)
+        x = self.act(x)
+        x, _ = self.fc2(x)
+        return x
+
+
+# ============================================================================
+# Text Decoder Components
+# ============================================================================
+
+
+class Moondream3TextMLP(nn.Module):
+    """Standard MLP for non-MoE layers (layers 0-3)."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.fc1 = ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc1",
+        )
+        self.act = get_act_fn("gelu_pytorch_tanh")
+        self.fc2 = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fc2",
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, _ = self.fc1(x)
+        x = self.act(x)
+        x, _ = self.fc2(x)
+        return x
+
+
+class Moondream3TextMoE(nn.Module):
+    """Mixture of Experts layer for layers 4+ with expert parallelism.
+
+    Moondream3 uses a custom GeGLU activation: gelu(h) * (g + 1)
+    where fc1 outputs [gate, up] and the activation is gelu(gate) * (up + 1).
+
+    Uses expert parallelism where each GPU stores num_experts/tp_size experts.
+    Routing and communication handled via all-to-all or replicated computation.
+
+    Checkpoint format:
+    - fc1.weight: [num_experts, expert_inner_dim * 2, hidden_size] (gate+up)
+    - fc2.weight: [num_experts, hidden_size, expert_inner_dim] (down)
+    - router.weight: [num_experts, hidden_size]
+    - router.bias: [num_experts]
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        expert_inner_dim: int,
+        num_experts: int,
+        experts_per_token: int,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.expert_inner_dim = expert_inner_dim
+        self.num_experts = num_experts
+        self.experts_per_token = experts_per_token
+
+        # Expert parallelism: each GPU stores a subset of experts
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.experts_per_rank = num_experts // self.tp_size
+        self.num_local_experts = self.experts_per_rank
+
+        # Router (gate) - use ReplicatedLinear for compatibility
+        self.gate = ReplicatedLinear(
+            hidden_size,
+            num_experts,
+            bias=True,
+            quant_config=None,
+            prefix=f"{prefix}.gate",
+        )
+
+        # Local expert weights (only store experts_per_rank experts)
+        # fc1: [experts_per_rank, expert_inner_dim * 2, hidden_size]
+        # fc2: [experts_per_rank, hidden_size, expert_inner_dim]
+        self.fc1_weight = nn.Parameter(
+            torch.empty(self.num_local_experts, expert_inner_dim * 2, hidden_size)
+        )
+        self.fc2_weight = nn.Parameter(
+            torch.empty(self.num_local_experts, hidden_size, expert_inner_dim)
+        )
+        self._use_fused_moe = True
+
+        local_expert_start = get_tensor_model_parallel_rank() * self.experts_per_rank
+        expert_map = torch.full((num_experts,), -1, dtype=torch.int32)
+        expert_map[local_expert_start : local_expert_start + self.num_local_experts] = (
+            torch.arange(self.num_local_experts, dtype=torch.int32)
+        )
+        self.register_buffer("_expert_map", expert_map, persistent=False)
+
+        # Preserve Moondream3's exact GeGLU variant (gelu(h) * (g + 1)) by
+        # adding +1 bias to the second half of the fused fc1 activations.
+        fused_w1_bias = torch.zeros(
+            self.num_local_experts,
+            expert_inner_dim * 2,
+            dtype=torch.float32,
+        )
+        fused_w1_bias[:, expert_inner_dim:] = 1.0
+        self.register_buffer("_fused_w1_bias", fused_w1_bias, persistent=False)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass with expert parallelism and custom GeGLU activation."""
+
+        # Get router logits and compute top-k
+        router_logits, _ = self.gate(x)  # [num_tokens, num_experts]
+        topk_logits, topk_ids = torch.topk(
+            router_logits, self.experts_per_token, dim=-1
+        )
+        # Softmax over selected experts
+        topk_weights = F.softmax(topk_logits, dim=-1, dtype=torch.float32).to(x.dtype)
+
+        if self._use_fused_moe and x.is_cuda:
+            try:
+                out = fused_experts(
+                    hidden_states=x.contiguous(),
+                    w1=self.fc1_weight,
+                    w2=self.fc2_weight,
+                    topk_weights=topk_weights.contiguous(),
+                    topk_ids=topk_ids.contiguous(),
+                    activation=MoEActivation.GELU,
+                    global_num_experts=self.num_experts,
+                    expert_map=self._expert_map,
+                    quant_config=biased_moe_quant_config(self._fused_w1_bias, None),
+                )
+                out = tensor_model_parallel_all_reduce(out)
+                return out
+            except (NotImplementedError, RuntimeError) as exc:
+                self._use_fused_moe = False
+                logger.warning_once(
+                    "Disabling fused Moondream3 MoE path and falling back to "
+                    "the Python expert loop: %s",
+                    str(exc),
+                )
+
+        tp_rank = get_tensor_model_parallel_rank()
+        # Compute local expert range
+        local_expert_start = tp_rank * self.experts_per_rank
+
+        # Fallback path for environments where fused kernels are unavailable.
+        out = x.new_zeros(x.shape)
+
+        for local_expert_idx in range(self.num_local_experts):
+            global_expert_id = local_expert_start + local_expert_idx
+
+            # Find tokens assigned to this expert
+            token_pos, which_k = (topk_ids == global_expert_id).nonzero(as_tuple=True)
+            if token_pos.numel() == 0:
+                continue
+
+            # Get tokens and their routing weights
+            x_tok = x.index_select(0, token_pos)  # [n_tokens, hidden_size]
+            gate_tok = topk_weights[token_pos, which_k]  # [n_tokens]
+
+            # fc1: [expert_inner_dim * 2, hidden_size]
+            # h_full: [n_tokens, expert_inner_dim * 2]
+            h_full = F.linear(x_tok, self.fc1_weight[local_expert_idx])
+
+            # GeGLU with (g + 1): h, g = split; output = gelu(h) * (g + 1)
+            # HF MoE uses exact GELU (not tanh approximation).
+            h, g = h_full.chunk(2, dim=-1)  # Each [n_tokens, expert_inner_dim]
+            h = F.gelu(h) * (g + 1.0)
+
+            # fc2: [hidden_size, expert_inner_dim]
+            # y: [n_tokens, hidden_size]
+            y = F.linear(h, self.fc2_weight[local_expert_idx])
+
+            # Apply routing weight
+            y = y * gate_tok.unsqueeze(-1)
+
+            # Accumulate output
+            out.index_add_(0, token_pos, y)
+
+        # All-reduce to combine results from all experts across GPUs
+        out = tensor_model_parallel_all_reduce(out)
+
+        return out
+
+
+class Moondream3Attention(nn.Module):
+    """Decoder attention with RoPE and tau scaling.
+
+    Moondream3 uses a tau attention mechanism that scales Q and V
+    based on both token content and position.
+    """
+
+    def __init__(
+        self,
+        config: Moondream3TextConfig,
+        layer_idx: int,
+        cache_config=None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.hidden_size = config.dim
+        self.num_heads = config.n_heads
+        self.num_kv_heads = config.n_kv_heads
+        self.head_dim = config.dim // config.n_heads
+
+        tp_size = get_tensor_model_parallel_world_size()
+        self.num_heads_per_partition = self.num_heads // tp_size
+        self.num_kv_heads_per_partition = max(1, self.num_kv_heads // tp_size)
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size=self.hidden_size,
+            head_size=self.head_dim,
+            total_num_heads=self.num_heads,
+            total_num_kv_heads=self.num_kv_heads,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+
+        self.out_proj = RowParallelLinear(
+            input_size=self.hidden_size,
+            output_size=self.hidden_size,
+            bias=True,
+            quant_config=quant_config,
+            prefix=f"{prefix}.out_proj",
+        )
+
+        # Moondream uses 32-dim rotation out of 64-dim head (partial_rotary_factor=0.5)
+        # HF Moondream uses non-interleaved RoPE (split by half)
+        # In vLLM, is_neox_style=True means split by half (GPT-NeoX style)
+        rope_parameters = {
+            "rope_theta": config.rope_theta,
+            "partial_rotary_factor": 32 / self.head_dim,  # 32/64 = 0.5
+        }
+        self.rotary_emb = get_rope(
+            head_size=self.head_dim,
+            max_position=config.max_context,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,  # Moondream uses split-by-half (GPT-NeoX) style
+        )
+
+        self.scaling = self.head_dim**-0.5
+        self.attn = Attention(
+            num_heads=self.num_heads_per_partition,
+            head_size=self.head_dim,
+            scale=self.scaling,
+            num_kv_heads=self.num_kv_heads_per_partition,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+
+        # Tau scaling parameters for position-dependent attention
+        # These are learned during training to modulate attention based on position
+        # tau_wq and tau_wv need full qkv_dim for correct computation
+        # Only heads are partitioned, qkv dimension is kept full for all-gather
+        qkv_dim = self.hidden_size * 3  # Q + K + V dimension (full)
+        self.tau_alpha = nn.Parameter(torch.zeros(self.num_heads_per_partition))
+        self.tau_wq = nn.Parameter(torch.zeros(self.num_heads_per_partition, qkv_dim))
+        self.tau_wv = nn.Parameter(torch.zeros(self.num_heads_per_partition, qkv_dim))
+        self.tp_size = tp_size
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+
+        q, k, v = qkv.split(
+            [
+                self.num_heads_per_partition * self.head_dim,
+                self.num_kv_heads_per_partition * self.head_dim,
+                self.num_kv_heads_per_partition * self.head_dim,
+            ],
+            dim=-1,
+        )
+
+        # Apply tau scaling to Q and V
+        # Tau scaling has two components:
+        # 1. Token-based: tok_q = tanh(gelu(qkv) @ tau_wq.T)
+        # 2. Position-based: tau_pos = 1 + (sigmoid(alpha * log(pos+1)) - 0.5)
+        # Final: tau = tok + tau_pos
+        #
+        # For TP, tau weights are sharded by head, but qkv_dim is kept full
+
+        # Get full qkv for tau computation
+        # With TP, reconstruct qkv in correct layout [q_full, k_full, v_full]
+        # (all-gather would produce [q_0, k_0, v_0, q_1, k_1, v_1] - wrong)
+        if self.tp_size > 1:
+            # All-gather once, then reconstruct [q_full, k_full, v_full].
+            qkv_full_sharded = tensor_model_parallel_all_gather(qkv.contiguous())
+            q_local_dim = q.shape[-1]
+            kv_local_dim = k.shape[-1]
+            qkv_full_sharded = qkv_full_sharded.view(
+                qkv.shape[0],
+                self.tp_size,
+                q_local_dim + 2 * kv_local_dim,
+            )
+            q_full = qkv_full_sharded[:, :, :q_local_dim].reshape(qkv.shape[0], -1)
+            k_full = qkv_full_sharded[
+                :, :, q_local_dim : q_local_dim + kv_local_dim
+            ].reshape(qkv.shape[0], -1)
+            v_full = qkv_full_sharded[:, :, q_local_dim + kv_local_dim :].reshape(
+                qkv.shape[0], -1
+            )
+            qkv_full = torch.cat([q_full, k_full, v_full], dim=-1).contiguous()
+        else:
+            qkv_full = qkv
+
+        # Compute tau scaling factors matching HF implementation exactly:
+        # tok_feat = gelu(qkv)
+        # tok_q = tanh(tok_feat @ tau_wq.T)  # [num_tokens, num_heads]
+        # tau_pos = 1 + (sigmoid(alpha * log(pos+1)) - 0.5)  # [num_heads, num_tokens]
+        # tau = (tok_q.T + tau_pos).T  # [num_tokens, num_heads]
+        num_tokens = qkv_full.shape[0]
+        orig_dtype = q.dtype
+
+        # Token-based component
+        tok_feat = F.gelu(qkv_full)  # Apply GELU activation
+        tok_q = torch.tanh(tok_feat @ self.tau_wq.t())  # [N, H_per_partition]
+        tok_v = torch.tanh(tok_feat @ self.tau_wv.t())  # [N, H_per_partition]
+
+        # Position-based component
+        # tau_pos = 1 + (sigmoid(alpha * log(pos+1)) - 0.5)
+        # positions is [num_tokens], need to compute for each head
+        # tau_alpha: [num_heads_per_partition]
+        pos_float = (positions.to(orig_dtype) + 1.0).clamp(min=1e-6)
+        pos_log = pos_float.log()  # [num_tokens]
+        # alpha[:, None] * pos_log[None, :] -> [num_heads, num_tokens]
+        tau_pos = 1.0 + (
+            torch.sigmoid(self.tau_alpha[:, None] * pos_log[None, :]) - 0.5
+        )  # [H_per_partition, N]
+
+        # Combine token and position components
+        tau_q = (tok_q + tau_pos.t()).to(orig_dtype)  # [N, H_per_partition]
+        tau_v = (tok_v + tau_pos.t()).to(orig_dtype)  # [N, H_per_partition]
+
+        # Reshape q and v to apply per-head tau scaling
+        q = q.view(num_tokens, self.num_heads_per_partition, self.head_dim)
+        v = v.view(num_tokens, self.num_kv_heads_per_partition, self.head_dim)
+
+        # Apply tau scaling
+        q = q * tau_q.unsqueeze(-1)
+        v = v * tau_v[:, : self.num_kv_heads_per_partition].unsqueeze(-1)
+
+        # Reshape back
+        q = q.view(num_tokens, -1)
+        v = v.view(num_tokens, -1)
+
+        q, k = self.rotary_emb(positions, q, k)
+
+        attn_output = self.attn(q, k, v)
+
+        output, _ = self.out_proj(attn_output)
+        return output
+
+
+class Moondream3DecoderLayer(nn.Module):
+    """Decoder layer with attention + MLP/MoE."""
+
+    def __init__(
+        self,
+        config: Moondream3TextConfig,
+        cache_config=None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        layer_idx = extract_layer_index(prefix)
+        self.layer_idx = layer_idx
+
+        self.ln = nn.LayerNorm(config.dim, eps=1e-5, bias=True)
+
+        self.attn = Moondream3Attention(
+            config=config,
+            layer_idx=layer_idx,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+
+        # Use MoE for layers >= moe_start_layer, standard MLP otherwise
+        if layer_idx >= config.moe_start_layer:
+            self.mlp = Moondream3TextMoE(
+                hidden_size=config.dim,
+                expert_inner_dim=config.moe_expert_inner_dim,
+                num_experts=config.moe_num_experts,
+                experts_per_token=config.moe_experts_per_token,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+            )
+        else:
+            self.mlp = Moondream3TextMLP(
+                hidden_size=config.dim,
+                intermediate_size=config.ff_dim,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+            )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        # Pre-norm architecture
+        normed = self.ln(hidden_states)
+        attn_out = self.attn(positions, normed)
+        mlp_out = self.mlp(normed)
+        hidden_states = hidden_states + attn_out + mlp_out
+        return hidden_states
+
+
+class Moondream3TextModel(nn.Module):
+    """Text decoder model."""
+
+    def __init__(
+        self,
+        config: Moondream3TextConfig,
+        cache_config=None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.config = config
+
+        self.wte = VocabParallelEmbedding(
+            config.vocab_size,
+            config.dim,
+            prefix=f"{prefix}.wte",
+        )
+
+        blocks_prefix = maybe_prefix(prefix, "blocks")
+        self.start_layer, self.end_layer, self.blocks = make_layers(
+            config.n_layers,
+            lambda prefix: Moondream3DecoderLayer(
+                config=config,
+                cache_config=cache_config,
+                quant_config=quant_config,
+                prefix=prefix,
+            ),
+            prefix=blocks_prefix,
+        )
+
+        self.post_ln = nn.LayerNorm(config.dim, eps=1e-5, bias=True)
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states"], config.dim
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.wte(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        pp_group = get_pp_group()
+        if pp_group.is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                assert input_ids is not None
+                hidden_states = self.embed_input_ids(input_ids)
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+
+        for i, layer in enumerate(
+            islice(self.blocks, self.start_layer, self.end_layer)
+        ):
+            hidden_states = layer(positions, hidden_states)
+
+        if not pp_group.is_last_rank:
+            return IntermediateTensors({"hidden_states": hidden_states})
+
+        hidden_states = self.post_ln(hidden_states)
+        return hidden_states
+
+
+@dataclass(frozen=True)
+class Moondream3ImageInput:
+    """Container holding per-image inputs for embedding."""
+
+    pixel_values: torch.Tensor
+    tiling: tuple[int, int] | None
+
+
+# ============================================================================
+# Multimodal Processing
+# ============================================================================
+
+
+class Moondream3ProcessingInfo(BaseProcessingInfo):
+    """Processing info for Moondream3."""
+
+    def get_hf_config(self):
+        return self.ctx.get_hf_config()
+
+    def get_hf_processor(self, **kwargs: object):
+        return self.ctx.get_hf_processor(Moondream3Processor, **kwargs)
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"image": 1}
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        # HF pre-fills BOS together with the fixed 27x27 vision grid under
+        # the same bidirectional prefix mask: 1 BOS + 729 image embeddings.
+        return 730
+
+    def get_image_size_with_most_features(self) -> ImageSize:
+        return ImageSize(width=378, height=378)
+
+    def get_max_image_tokens(self) -> int:
+        return 730
+
+    def get_mm_max_tokens_per_item(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> Mapping[str, int]:
+        return {"image": self.get_max_image_tokens()}
+
+
+class Moondream3DummyInputsBuilder(BaseDummyInputsBuilder[Moondream3ProcessingInfo]):
+    """Dummy inputs builder for profiling."""
+
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        return (
+            "<|endoftext|><image><|md_reserved_0|>query<|md_reserved_1|>"
+            "What is this image?<|md_reserved_2|>"
+        )
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions] | None = None,
+        mm_processor_kwargs: Mapping[str, object] | None = None,
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        return {
+            "image": self._get_dummy_images(
+                width=378,
+                height=378,
+                num_images=num_images,
+            )
+        }
+
+
+class Moondream3MultiModalProcessor(BaseMultiModalProcessor[Moondream3ProcessingInfo]):
+    """Multimodal processor for Moondream3."""
+
+    image_placeholder: str = "<image>"
+    bos_image_placeholder: str = "<|endoftext|><image>"
+
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        # Moondream3's processor handles images directly rather than exposing a
+        # separate `image_processor`, so keep the cache path on text+MM calls.
+        return super()._call_hf_processor(prompt, mm_data, mm_kwargs, tok_kwargs)
+
+    @cached_property
+    def bos_image_placeholder_tokens(self) -> list[int]:
+        tokenizer = self.info.get_tokenizer()
+        token_ids = tokenizer.encode(
+            self.bos_image_placeholder,
+            add_special_tokens=False,
+        )
+        if len(token_ids) < 2:
+            raise ValueError(
+                "Tokenizer could not encode Moondream3 BOS/image placeholder "
+                f"{self.bos_image_placeholder!r}."
+            )
+        return token_ids
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return {
+            "pixel_values": MultiModalFieldConfig.batched("image"),
+            "tilings": MultiModalFieldConfig.batched("image", keep_on_cpu=True),
+        }
+
+    def _hf_processor_applies_updates(
+        self,
+        prompt_text: str,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
+    ) -> bool:
+        # Moondream3 HF processor does NOT expand placeholder tokens.
+        # vLLM expands BOS + <image> so the whole HF image prefix is marked
+        # bidirectional by the multimodal prefix-LM mask.
+        return False
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> list[PromptUpdate]:
+        image_size = self.info.get_image_size_with_most_features()
+        num_image_tokens = self.info.get_num_image_tokens(
+            image_width=image_size.width,
+            image_height=image_size.height,
+        )
+        placeholder_tokens = self.bos_image_placeholder_tokens
+        bos_token = placeholder_tokens[0]
+        image_token = placeholder_tokens[-1]
+        return [
+            PromptReplacement(
+                modality="image",
+                target=placeholder_tokens,
+                replacement=PromptUpdateDetails(
+                    full=[bos_token] + [image_token] * (num_image_tokens - 1),
+                ),
+            ),
+        ]
+
+
+# ============================================================================
+# Main Model
+# ============================================================================
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Moondream3MultiModalProcessor,
+    info=Moondream3ProcessingInfo,
+    dummy_inputs=Moondream3DummyInputsBuilder,
+)
+class Moondream3ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
+    """Moondream3 multimodal model for causal language modeling.
+
+    vLLM supports the standard autoregressive Moondream3 query and caption
+    prompt formats. The region-module point/detect skills require custom
+    coordinate decoding and are intentionally not exposed here.
+    """
+
+    supports_multimodal = True
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+    }
+
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+
+        hf_config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        cache_config = vllm_config.cache_config
+
+        # Reuse the transformers_utils config implementation.
+        if isinstance(hf_config, Moondream3Config):
+            self.config = hf_config
+        else:
+            config_dict = hf_config.config if hasattr(hf_config, "config") else {}
+            self.config = Moondream3Config(config=config_dict)
+
+        with self._mark_tower_model(vllm_config, "image"):
+            # Vision encoder
+            self.vision = Moondream3VisionEncoder(
+                config=self.config.vision_config,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "vision"),
+            )
+
+            # Vision projection
+            self.vision_proj = Moondream3VisionProjection(
+                input_dim=self.config.vision_config.enc_dim,
+                inner_dim=self.config.vision_config.proj_inner_dim,
+                output_dim=self.config.text_config.dim,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "vision_proj"),
+            )
+
+        with self._mark_language_model(vllm_config):
+            # Text decoder
+            self.text = Moondream3TextModel(
+                config=self.config.text_config,
+                cache_config=cache_config,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "text"),
+            )
+
+            # LM head (with bias - Moondream3 has lm_head bias)
+            self.lm_head = ParallelLMHead(
+                self.config.text_config.vocab_size,
+                self.config.text_config.dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+
+        self.logits_processor = LogitsProcessor(self.config.text_config.vocab_size)
+        self.make_empty_intermediate_tensors = self.text.make_empty_intermediate_tensors
+        self._answer_id = getattr(
+            self.config,
+            "answer_token_id",
+            getattr(hf_config, "answer_token_id", 3),
+        )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality == "image":
+            return "<image>"
+        return None
+
+    def get_language_model(self) -> nn.Module:
+        return self.text
+
+    def get_num_mm_encoder_tokens(self, num_image_tokens: int) -> int:
+        return num_image_tokens
+
+    def get_num_mm_connector_tokens(self, num_vision_tokens: int) -> int:
+        return num_vision_tokens
+
+    def _split_pixel_values(
+        self,
+        pixel_values: object,
+    ) -> list[torch.Tensor]:
+        # The processor should standardize image inputs into:
+        # - torch.Tensor [num_images, num_crops, C, H, W], or
+        # - list[torch.Tensor[num_crops, C, H, W]] for ragged crops.
+        if isinstance(pixel_values, torch.Tensor):
+            if pixel_values.dim() != 5:
+                raise ValueError(
+                    "Expected `pixel_values` tensor with shape "
+                    "[num_images, num_crops, C, H, W], got "
+                    f"{tuple(pixel_values.shape)}."
+                )
+            return [pv.contiguous() for pv in pixel_values]
+
+        if isinstance(pixel_values, (list, tuple)):
+            tensors: list[torch.Tensor] = []
+            for value in pixel_values:
+                if not isinstance(value, torch.Tensor):
+                    raise TypeError(
+                        "Expected each `pixel_values` element to be a tensor, "
+                        f"got {type(value)!r}."
+                    )
+                if value.dim() != 4:
+                    raise ValueError(
+                        f"Unsupported pixel_values element shape {tuple(value.shape)}."
+                    )
+                tensors.append(value.contiguous())
+            return tensors
+
+        raise TypeError(
+            "pixel_values must be a tensor or a sequence of tensors, "
+            f"got {type(pixel_values)!r}."
+        )
+
+    def _split_tilings(
+        self,
+        tilings: object,
+        expected: int,
+    ) -> list[tuple[int, int] | None]:
+        if tilings is None:
+            return [None] * expected
+
+        if isinstance(tilings, torch.Tensor):
+            if tilings.dim() != 2 or tilings.shape[1] != 2:
+                raise ValueError(
+                    "Expected `tilings` tensor with shape [num_images, 2], got "
+                    f"{tuple(tilings.shape)}."
+                )
+            tiling_items = tilings.tolist()
+        elif isinstance(tilings, (list, tuple)):
+            tiling_items = list(tilings)
+        else:
+            raise TypeError(
+                "tilings must be None, a tensor or a sequence of tuples, "
+                f"got {type(tilings)!r}."
+            )
+
+        if len(tiling_items) != expected:
+            raise ValueError(
+                "Mismatch between the number of pixel_values entries "
+                f"({expected}) and tilings ({len(tiling_items)})."
+            )
+
+        normalized: list[tuple[int, int] | None] = []
+        for tiling in tiling_items:
+            if tiling is None:
+                normalized.append(None)
+                continue
+            if isinstance(tiling, torch.Tensor):
+                tiling = tiling.tolist()
+            if isinstance(tiling, (list, tuple)) and len(tiling) == 2:
+                normalized.append((int(tiling[0]), int(tiling[1])))
+            else:
+                raise ValueError(
+                    f"Each tiling entry must be a pair of integers, got {tiling!r}."
+                )
+        return normalized
+
+    def _parse_image_inputs(self, **kwargs: object) -> list[Moondream3ImageInput]:
+        pixel_values = kwargs.get("pixel_values")
+        if pixel_values is None:
+            return []
+
+        pixel_values_list = self._split_pixel_values(pixel_values)
+        tilings_list = self._split_tilings(
+            kwargs.get("tilings"), len(pixel_values_list)
+        )
+
+        image_inputs: list[Moondream3ImageInput] = []
+        for value, tiling in zip(pixel_values_list, tilings_list):
+            if value.dim() != 4:
+                raise ValueError(
+                    f"Expected 4D tensor for crops, got {tuple(value.shape)}."
+                )
+            image_inputs.append(Moondream3ImageInput(pixel_values=value, tiling=tiling))
+        return image_inputs
+
+    def _encode_image_input(self, image_input: Moondream3ImageInput) -> torch.Tensor:
+        pixel_values = image_input.pixel_values
+        if pixel_values.dim() != 4:
+            raise ValueError(
+                f"Expected 4D tensor for crops, got {tuple(pixel_values.shape)}."
+            )
+
+        device = self.vision.patch_emb.weight.device
+        dtype = self.vision.patch_emb.weight.dtype
+        pixel_values = pixel_values.to(device=device, dtype=dtype)
+
+        features = self.vision(pixel_values)
+
+        # Grid size = crop_size / patch_size (e.g., 378 / 14 = 27)
+        grid_size = (
+            self.config.vision_config.crop_size
+            // self.config.vision_config.enc_patch_size
+        )
+        enc_dim = self.config.vision_config.enc_dim
+        global_features = features[0]
+
+        if features.shape[0] > 1:
+            if image_input.tiling is None:
+                raise ValueError(
+                    "Missing tiling metadata for multi-crop Moondream image."
+                )
+            local = features[1:].contiguous().view(-1, grid_size, grid_size, enc_dim)
+            reconstructed = reconstruct_from_crops(
+                local,
+                image_input.tiling,
+                overlap_margin=self.config.vision_config.overlap_margin,
+                patch_size=1,
+            )
+        else:
+            reconstructed = global_features.view(grid_size, grid_size, enc_dim)
+
+        recon = reconstructed.permute(2, 0, 1).contiguous()
+        # Mirror HF reference behavior: reconstructed local features are pooled
+        # to enc_n_layers x enc_n_layers. For moondream3-preview this is 27x27.
+        pooled_size = self.config.vision_config.enc_n_layers
+        if pooled_size != grid_size:
+            logger.warning_once(
+                "Moondream3 pooled_size (%d) differs from crop grid (%d). "
+                "Using enc_n_layers to match HF reference behavior.",
+                pooled_size,
+                grid_size,
+            )
+        recon = F.adaptive_avg_pool2d(recon, output_size=(pooled_size, pooled_size))
+        recon = recon.permute(1, 2, 0).contiguous().view(-1, enc_dim)
+
+        combined = torch.cat([global_features, recon], dim=-1).unsqueeze(0)
+        projected = self.vision_proj(combined).squeeze(0)
+
+        # Note: Vision embeddings are already synchronized across TP ranks
+        # because the vision projection uses RowParallelLinear which performs
+        # all-reduce internally, ensuring identical outputs on all ranks.
+
+        return projected
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        """Generate the HF image prefix: BOS embedding + 729 image embeddings."""
+        image_inputs = self._parse_image_inputs(**kwargs)
+        if not image_inputs:
+            return []
+
+        device = self.vision.patch_emb.weight.device
+        bos_ids = torch.tensor([self.config.bos_token_id], device=device)
+        bos_embedding = self.text.embed_input_ids(bos_ids)
+
+        embeddings: list[torch.Tensor] = []
+        for image_input in image_inputs:
+            image_embeddings = self._encode_image_input(image_input)
+            embeddings.append(
+                torch.cat([bos_embedding.to(image_embeddings.dtype), image_embeddings])
+            )
+        return embeddings
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.text(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        if logits is not None:
+            logits[:, self._answer_id] = float("-inf")
+        return logits
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        """Load weights with remapping from HuggingFace format."""
+
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        # Get expert intermediate size for fc1 splitting
+
+        for name, loaded_weight in weights:
+            # Map from HF naming to vLLM naming
+            # model.vision.* -> vision.*
+            # model.text.* -> text.*
+            if name.startswith("model."):
+                name = name[6:]  # Remove "model." prefix
+
+            # Specific name mappings
+            # Vision projection: vision.proj_mlp.fc1 -> vision_proj.fc1
+            name = name.replace("vision.proj_mlp.", "vision_proj.")
+
+            # Text embedding: text.wte (no suffix) -> text.wte.weight
+            if name == "text.wte":
+                name = "text.wte.weight"
+
+            # LM head: text.lm_head -> lm_head
+            name = name.replace("text.lm_head.", "lm_head.")
+
+            # Attention mapping
+            name = name.replace(".attn.qkv.", ".attn.qkv_proj.")
+            name = name.replace(".attn.proj.", ".attn.out_proj.")
+
+            # Tau attention scaling weights
+            # HF format: .attn.tau.alpha -> .attn.tau_alpha
+            name = name.replace(".attn.tau.alpha", ".attn.tau_alpha")
+            name = name.replace(".attn.tau.wq", ".attn.tau_wq")
+            name = name.replace(".attn.tau.wv", ".attn.tau_wv")
+
+            # MoE router mapping: mlp.router -> mlp.gate
+            name = name.replace(".mlp.router.", ".mlp.gate.")
+
+            # Handle MoE expert weights for layers 4+ with expert parallelism
+            # fc1.weight: [n_experts, expert_inner_dim * 2, hidden_size] (gate+up)
+            # fc2.weight: [n_experts, hidden_size, expert_inner_dim] (down)
+            # Each GPU stores n_experts/tp_size experts
+            # Note: Only 3D weights are MoE, 2D weights are standard MLP
+            if ".mlp.fc1.weight" in name and loaded_weight.dim() == 3:
+                from vllm.distributed import get_tensor_model_parallel_rank
+
+                tp_size = get_tensor_model_parallel_world_size()
+                tp_rank = get_tensor_model_parallel_rank()
+                num_experts = loaded_weight.shape[0]
+                experts_per_rank = num_experts // tp_size
+                expert_start = tp_rank * experts_per_rank
+                expert_end = expert_start + experts_per_rank
+                # Shard by expert dimension
+                loaded_weight = loaded_weight[expert_start:expert_end].contiguous()
+                # Map to our custom MoE format: mlp.fc1_weight
+                name = name.replace(".mlp.fc1.weight", ".mlp.fc1_weight")
+
+            if ".mlp.fc2.weight" in name and loaded_weight.dim() == 3:
+                from vllm.distributed import get_tensor_model_parallel_rank
+
+                tp_size = get_tensor_model_parallel_world_size()
+                tp_rank = get_tensor_model_parallel_rank()
+                num_experts = loaded_weight.shape[0]
+                experts_per_rank = num_experts // tp_size
+                expert_start = tp_rank * experts_per_rank
+                expert_end = expert_start + experts_per_rank
+                # Shard by expert dimension
+                loaded_weight = loaded_weight[expert_start:expert_end].contiguous()
+                # Map to our custom MoE format: mlp.fc2_weight
+                name = name.replace(".mlp.fc2.weight", ".mlp.fc2_weight")
+
+            # Handle tau weights with tensor parallelism
+            # tau_alpha: [num_heads] -> [num_heads/tp]
+            # tau_wq: [num_heads, qkv_dim] -> [num_heads/tp, qkv_dim/tp]
+            # tau_wv: [num_heads, qkv_dim] -> [num_heads/tp, qkv_dim/tp]
+            if ".tau_alpha" in name:
+                from vllm.distributed import get_tensor_model_parallel_rank
+
+                tp_size = get_tensor_model_parallel_world_size()
+                tp_rank = get_tensor_model_parallel_rank()
+                num_heads = loaded_weight.shape[0]
+                heads_per_partition = num_heads // tp_size
+                start = tp_rank * heads_per_partition
+                end = start + heads_per_partition
+                loaded_weight = loaded_weight[start:end].contiguous()
+
+            if ".tau_wq" in name or ".tau_wv" in name:
+                from vllm.distributed import get_tensor_model_parallel_rank
+
+                tp_size = get_tensor_model_parallel_world_size()
+                tp_rank = get_tensor_model_parallel_rank()
+                num_heads, qkv_dim = loaded_weight.shape
+                heads_per_partition = num_heads // tp_size
+                # Only shard by head dimension, keep full qkv_dim for all-gather
+                head_start = tp_rank * heads_per_partition
+                head_end = head_start + heads_per_partition
+                loaded_weight = loaded_weight[head_start:head_end, :].contiguous()
+
+            if name in params_dict:
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+                loaded_params.add(name)
+
+        return loaded_params
diff --git a/vllm/model_executor/models/musicflamingo.py b/vllm/model_executor/models/musicflamingo.py
index f4e3bbe379a3..497b2e63a7e9 100644
--- a/vllm/model_executor/models/musicflamingo.py
+++ b/vllm/model_executor/models/musicflamingo.py
@@ -32,9 +32,9 @@
 
 from vllm.config import VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.inputs import MultiModalDataDict
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import (
-    MultiModalDataDict,
     MultiModalFieldConfig,
     MultiModalKwargsItems,
 )
diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py
index 182e0f1599fe..64667503d578 100644
--- a/vllm/model_executor/models/nano_nemotron_vl.py
+++ b/vllm/model_executor/models/nano_nemotron_vl.py
@@ -7,7 +7,6 @@
 #     LICENSE is in root directory.
 # --------------------------------------------------------
 
-import copy
 import math
 import warnings
 from collections.abc import Iterable, Mapping, Sequence
@@ -17,7 +16,7 @@
 
 import torch
 import torch.nn as nn
-from transformers import BatchFeature
+from transformers import BatchFeature, PretrainedConfig
 
 from vllm.config import VllmConfig
 from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
@@ -38,6 +37,7 @@
 from vllm.model_executor.models.parakeet import ParakeetExtractor, ProjectedParakeet
 from vllm.model_executor.models.radio import RadioModel, calc_seq_lens
 from vllm.model_executor.models.utils import (
+    WeightsMapper,
     init_vllm_registered_model,
     maybe_prefix,
 )
@@ -210,11 +210,15 @@ def get_hf_processor(self, **kwargs: object) -> NanoNemotronVLProcessor:
 
     @cached_property
     def is_dynamic_tiler(self) -> bool:
-        return self.get_hf_processor().dynamic_tiler is not None
+        return BaseNanoNemotronVLProcessor.use_dynamic_resolution(self.get_hf_config())
 
-    @cached_property
+    @property
     def supports_video(self):
-        return self.get_hf_processor().supports_video
+        return True
+
+    @property
+    def supports_audio(self) -> bool:
+        return self.sound_config is not None
 
     def get_video_token(self) -> str | None:
         return IMG_CONTEXT
@@ -223,8 +227,8 @@ def get_video_pruning_rate(self) -> float | None:
         return self.ctx.get_mm_config().video_pruning_rate
 
     @property
-    def audio_extractor(self) -> ParakeetExtractor | None:
-        return self.get_hf_processor().audio_extractor
+    def sound_config(self) -> PretrainedConfig | None:
+        return getattr(self.get_hf_config(), "sound_config", None)
 
     def get_default_tok_params(self) -> TokenizeParams:
         return super().get_default_tok_params().with_kwargs(add_special_tokens=False)
@@ -232,14 +236,14 @@ def get_default_tok_params(self) -> TokenizeParams:
     def get_supported_mm_limits(self) -> Mapping[str, int | None]:
         image_limit = {"image": None}
         video_limit = {"video": None} if self.supports_video else {}
-        audio_limit = {"audio": None} if self.audio_extractor is not None else {}
+        audio_limit = {"audio": None} if self.supports_audio else {}
         return {**image_limit, **video_limit, **audio_limit}
 
     def get_data_parser(self):
         target_sr = None
         target_channels = None
-        if extractor := self.audio_extractor:
-            target_sr = extractor.sampling_rate
+        if self.sound_config:
+            target_sr = self.sound_config.sampling_rate
             target_channels = 1
 
         return MultiModalDataParser(
@@ -285,6 +289,35 @@ def get_max_image_tokens(self) -> int:
             max_num_tiles=max_num_tiles,
         )
 
+    def get_dummy_image_size_and_max_tokens(
+        self, mm_counts: Mapping[str, int]
+    ) -> tuple[tuple[int, int], int]:
+        processor = self.get_hf_processor()
+        num_images = mm_counts.get("image", 0)
+
+        if tiler := processor.dynamic_tiler:
+            budget = tiler.max_num_tokens_available(text_prompt_length=num_images)
+            target_width, target_height = (
+                tiler.width_and_height_for_max_num_tokens_available(budget)
+            )
+            return (
+                (target_width, target_height),
+                tiler._get_num_embeddings(target_width, target_height),
+            )
+
+        max_num_tiles = processor.max_num_tiles
+        target_width, target_height = self.get_image_size_with_most_features(
+            max_num_tiles
+        )
+        return (
+            (target_width, target_height),
+            processor.get_num_image_tokens(
+                image_width=target_width,
+                image_height=target_height,
+                max_num_tiles=max_num_tiles,
+            ),
+        )
+
     def get_num_frames_with_most_features(
         self,
         seq_len: int,
@@ -303,10 +336,44 @@ def get_num_frames_with_most_features(
         max_frames_per_video = max_tubelets_per_video * T
         return max(max_frames_per_video, 1)
 
+    def get_mm_max_tokens_per_item(
+        self, seq_len: int, mm_counts: Mapping[str, int]
+    ) -> Mapping[str, int]:
+        mm_max_tokens: dict[str, int] = {}
+
+        if mm_counts.get("image", 0) > 0:
+            _, mm_max_tokens["image"] = self.get_dummy_image_size_and_max_tokens(
+                mm_counts
+            )
+
+        if mm_counts.get("video", 0) > 0:
+            assert self.supports_video
+            mm_max_tokens["video"] = seq_len
+
+        if mm_counts.get("audio", 0) > 0:
+            assert self.supports_audio
+            mm_max_tokens["audio"] = seq_len
+
+        return mm_max_tokens
+
 
 class NanoNemotronVLMultiModalProcessor(
     BaseMultiModalProcessor[NanoNemotronVLProcessingInfo]
 ):
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        """
+        Bypass `call_hf_processor_mm_only` by no-op overriding`_call_hf_processor`,
+        so it chooses this path:
+        `type(self)._call_hf_processor != BaseMultiModalProcessor._call_hf_processor`
+        """
+        return super()._call_hf_processor(prompt, mm_data, mm_kwargs, tok_kwargs)
+
     def _get_image_fields_config(self, hf_inputs: BatchFeature):
         if self.info.is_dynamic_tiler:
             pixel_values_flat = MultiModalFieldConfig.batched("image")
@@ -357,7 +424,7 @@ def _get_mm_fields_config(
         fields = self._get_image_fields_config(hf_inputs)
         if self.info.supports_video:
             fields |= self._get_video_fields_config(hf_inputs)
-        if self.info.audio_extractor:
+        if self.info.supports_audio:
             fields |= self._get_audio_fields_config(hf_inputs)
 
         return fields
@@ -385,9 +452,8 @@ def get_image_replacement(item_idx: int):
 
             if isinstance(images, ImageEmbeddingItems):
                 feature_size = images.get_feature_size(item_idx)
-            elif tiler := hf_processor.dynamic_tiler:
-                image = images.get(item_idx)
-                feature_size = tiler.get_cached_feature_size(image)
+            elif self.info.is_dynamic_tiler:
+                feature_size = out_mm_data["num_tokens_per_image"][item_idx]
             else:
                 image_size = images.get_image_size(item_idx)
                 max_num_tiles = hf_processor.max_num_tiles
@@ -522,7 +588,7 @@ def _get_prompt_updates(
             prompt_repls.append(
                 self._get_prompt_repl_video(mm_items, hf_processor, out_mm_data)
             )
-        if self.info.audio_extractor:
+        if self.info.supports_audio:
             prompt_repls.append(
                 self._get_prompt_repl_audio(mm_items, hf_processor, out_mm_data)
             )
@@ -532,19 +598,26 @@ def _get_prompt_updates(
     def _extract_audio_from_videos(
         self,
         mm_items: MultiModalDataItems,
-    ) -> tuple[MultiModalDataItems, list[AudioItem]]:
+    ) -> tuple[MultiModalDataItems, list[AudioItem], list[bool]]:
         """Extract audio tracks from video bytes in *mm_items*.
 
+        Videos whose bytes are missing or that contain no audio stream are
+        silently skipped.  The returned *has_audio* mask is aligned with
+        the video list so callers know which ``<video>`` tokens need an
+        accompanying audio context.
+
         Returns:
-            The augmented *mm_items* (with audio added) and the list of
-            extracted audio items.
+            A 3-tuple of (augmented mm_items, extracted audio items,
+            per-video boolean mask indicating which videos have audio).
         """
         videos = mm_items.get_items("video", VideoProcessorItems)
         assert isinstance(videos.metadata, list)
+
         metadata_list = videos.metadata
 
         audio_items: list[AudioItem] = []
-        for metadata in metadata_list:
+        has_audio: list[bool] = []
+        for idx, metadata in enumerate(metadata_list):
             video_bytes = metadata.get("original_video_bytes")
             if video_bytes is None or len(video_bytes) == 0:
                 raise ValueError(
@@ -553,7 +626,16 @@ def _extract_audio_from_videos(
                     "video must be loaded with keep_video_bytes=True (e.g. via "
                     "the chat API with a model that sets use_audio_in_video)."
                 )
-            audio_items.append(load_audio_pyav(BytesIO(video_bytes)))
+            try:
+                audio_items.append(load_audio_pyav(BytesIO(video_bytes)))
+                has_audio.append(True)
+            except Exception:
+                logger.debug(
+                    "Video %d: no audio stream found, skipping audio extraction.",
+                    idx,
+                    exc_info=True,
+                )
+                has_audio.append(False)
 
         # Create a new VideoProcessorItems with metadata that does not contain
         # the large video bytes, to avoid modifying the input `mm_items`.
@@ -563,45 +645,83 @@ def _extract_audio_from_videos(
         ]
         new_videos = VideoProcessorItems(data=videos.data, metadata=new_metadata_list)
 
-        audio_parsed = self.data_parser.parse_mm_data({"audio": audio_items})
+        audio_parsed = {}
+        if audio_items:
+            audio_parsed = self.data_parser.parse_mm_data({"audio": audio_items})
 
         # Create a new MultiModalDataItems with the new video and audio items.
         new_mm_items_dict = {**mm_items, **audio_parsed, "video": new_videos}
         mm_items = MultiModalDataItems(new_mm_items_dict)
 
-        return mm_items, audio_items
+        return mm_items, audio_items, has_audio
 
     def apply(
         self,
         inputs: ProcessorInputs,
         timing_ctx: TimingContext,
     ) -> MultiModalInput:
-        use_audio_in_video = bool(
-            inputs.hf_processor_mm_kwargs.get("use_audio_in_video", False)
+        mm_config = self.info.ctx.model_config.get_multimodal_config()
+        merged_kwargs = mm_config.merge_mm_processor_kwargs(
+            inputs.hf_processor_mm_kwargs
         )
+        use_audio_in_video = bool(merged_kwargs.get("use_audio_in_video", False))
+
         inputs.hf_processor_mm_kwargs = {
             k: v
             for k, v in inputs.hf_processor_mm_kwargs.items()
             if k != "use_audio_in_video"
         }
 
-        if not (
-            use_audio_in_video
-            and "video" in inputs.mm_data_items
-            and "audio" not in inputs.mm_data_items
-        ):
+        if not (use_audio_in_video and "video" in inputs.mm_data_items):
             return super().apply(inputs, timing_ctx)
 
-        mm_items, audio_items = self._extract_audio_from_videos(inputs.mm_data_items)
-        inputs.mm_data_items = mm_items
+        mm_items = inputs.mm_data_items
+        if "audio" in mm_items:
+            # Audio was pre-populated by upstream (e.g., OpenAI chat endpoint).
+            # Reuse existing audio items; validate 1:1 correspondence.
+            videos = mm_items.get_items("video", VideoProcessorItems)
+            audios = mm_items.get_items("audio", AudioProcessorItems)
+            if len(audios) != len(videos):
+                raise ValueError(
+                    "use_audio_in_video requires equal number of audio and "
+                    f"video items, got num_audios={len(audios)}, "
+                    f"num_videos={len(videos)}"
+                )
+            audio_items = audios.get_all()
+            has_audio = [True] * len(videos)
+            logger.info(
+                "Using %d pre-populated audio item(s) from upstream.",
+                len(audio_items),
+            )
+        else:
+            # Extract audio from video bytes (library usage path).
+            mm_items, audio_items, has_audio = self._extract_audio_from_videos(mm_items)
+            inputs.mm_data_items = mm_items
+            logger.info(
+                "Extracted audio from video bytes: %d audio(s), has_audio=%s.",
+                len(audio_items),
+                has_audio,
+            )
+
+        if not audio_items:
+            return super().apply(inputs, timing_ctx)
 
         prompt = inputs.prompt
         tokenizer = self.info.get_tokenizer()
         if not isinstance(prompt, str):
             prompt = tokenizer.decode(prompt, skip_special_tokens=False)
 
-        for _ in audio_items:
-            prompt = prompt.replace("<video>", "<video>" + AUDIO_CONTEXT, 1)
+        # Inject AUDIO_CONTEXT only after <video> tokens whose video
+        # actually contained an audio stream (preserving video-audio pairing).
+        tag = "<video>"
+        head, *rest = prompt.split(tag)
+        rebuilt = [head]
+        for append_audio, part in zip(has_audio, rest, strict=True):
+            rebuilt.append(tag)
+            if append_audio:
+                rebuilt.append(AUDIO_CONTEXT)
+            rebuilt.append(part)
+        prompt = "".join(rebuilt)
 
         inputs.prompt = tokenizer.encode(prompt, add_special_tokens=False)
 
@@ -692,17 +812,10 @@ def get_dummy_mm_data(
         mm_options: Mapping[str, BaseDummyOptions],
     ) -> MultiModalDataDict:
         num_images = mm_counts.get("image", 0)
+        (target_width, target_height), _ = (
+            self.info.get_dummy_image_size_and_max_tokens(mm_counts)
+        )
         processor = self.info.get_hf_processor()
-        if tiler := processor.dynamic_tiler:
-            budget = tiler.max_num_tokens_available(text_prompt_length=num_images)
-            target_width, target_height = (
-                tiler.width_and_height_for_max_num_tokens_available(budget)
-            )
-        else:
-            max_num_tiles = 12
-            target_width, target_height = self.info.get_image_size_with_most_features(
-                max_num_tiles
-            )
 
         image_overrides = mm_options.get("image")
 
@@ -758,12 +871,14 @@ def get_dummy_mm_data(
         else:
             dummy_video = {}
 
-        if extractor := self.info.audio_extractor:
+        if sound_config := self.info.sound_config:
             num_audios = mm_counts.get("audio", 0)
             audio_overrides = mm_options.get("audio") if mm_options else None
             tokens_per_audio = max(1, seq_len // max(num_audios, 1))
-            max_audio_num_samples = MAX_AUDIO_LEN_S * extractor.sampling_rate
-            calculated_max_audio_num_samples = extractor.audio_length(tokens_per_audio)
+            max_audio_num_samples = MAX_AUDIO_LEN_S * sound_config.sampling_rate
+            calculated_max_audio_num_samples = ParakeetExtractor.audio_length(
+                sound_config, tokens_per_audio
+            )
             audio_len = min(max_audio_num_samples, calculated_max_audio_num_samples)
             dummy_audio = {
                 "audio": self._get_dummy_audios(
@@ -789,6 +904,12 @@ class NemotronH_Nano_VL_V2(
     requires_sequential_video_encoding = True
     """Temporarily needed for dynamic res video w/ conv3d, doesn't support bs>1 yet"""
 
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "language_model.backbone": "language_model.model",
+        },
+    )
+
     @classmethod
     def get_placeholder_str(cls, modality: str, i: int) -> str | None:
         if modality.startswith("image"):
@@ -891,38 +1012,27 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             )
 
     def pixel_shuffle(self, x, scale_factor=0.5):
-        n, w, h, c = x.size()
-        # N, W, H, C --> N, W, H * scale, C // scale
-        x = x.view(
-            n,
-            w,
-            int(h * scale_factor),
-            int(c / scale_factor),
-        )
-        # N, W, H * scale, C // scale --> N, H * scale, W, C // scale
-        x = x.permute(0, 2, 1, 3).contiguous()
-        # N, H * scale, W, C // scale -->
-        # N, H * scale, W * scale, C // (scale ** 2)
-        x = x.view(
-            n,
-            int(h * scale_factor),
-            int(w * scale_factor),
-            int(c / (scale_factor * scale_factor)),
-        )
+        n, h, w, c = x.size()
+        r = int(1 / scale_factor)
+        new_h = h // r
+        new_w = w // r
+        new_c = c * r * r
+
+        x = x.view(n, new_h, r, new_w, r, c)
         if self.ps_version == "v1":
             warnings.warn(
                 "In ps_version 'v1', the height and width have not "
                 "been swapped back, which results in a transposed image.",
                 stacklevel=2,
             )
+            x = x.permute(0, 3, 1, 2, 4, 5).reshape(n, new_w, new_h, new_c)
         else:
-            x = x.permute(0, 2, 1, 3).contiguous()
+            x = x.permute(0, 1, 3, 2, 4, 5).reshape(n, new_h, new_w, new_c)
         return x
 
     def pixel_shuffle_dynamic_res(
         self, x: torch.Tensor, *, imgs_sizes: list[tuple[int, int]]
     ) -> torch.Tensor:
-        scale_factor = self.downsample_ratio
         patch_dim = self.patch_size
         seq_lens = calc_seq_lens(imgs_sizes, patch_dim)
         splits = torch.split(x, seq_lens, dim=-2)
@@ -931,22 +1041,8 @@ def pixel_shuffle_dynamic_res(
             h = imgs_sizes[i][0] // patch_dim
             w = imgs_sizes[i][1] // patch_dim
             sv = sv.reshape(sv.shape[0], h, w, -1)
-
-            n, h, w, c = sv.size()
-
-            sv = sv.view(n, h, int(w * scale_factor), int(c / scale_factor))
-            sv = sv.permute(0, 2, 1, 3).contiguous()
-            sv = sv.view(
-                n,
-                int(w * scale_factor),
-                int(h * scale_factor),
-                int(c / (scale_factor * scale_factor)),
-            )
-
-            if self.ps_version == "v2":
-                sv = sv.permute(0, 2, 1, 3).contiguous()
-
-            sv = sv.reshape(sv.shape[0], -1, sv.shape[-1])
+            sv = self.pixel_shuffle(sv, scale_factor=self.downsample_ratio)
+            sv = sv.flatten(1, 2)
             out.append(sv)
 
         x = torch.cat(out, dim=-2)
@@ -1015,16 +1111,22 @@ def _parse_and_validate_image_input(
                 data=image_embeds,
             )
 
+        pixel_values_flat = kwargs.pop("pixel_values_flat", None)
+        if pixel_values_flat is None:
+            return None
+
         if self.dynamic_resolution:
             pixel_values_flat = DynamicResolutionImageTiler.stack(
-                kwargs.pop("pixel_values_flat"), self.patch_size
+                pixel_values_flat, self.patch_size
             )
             return NanoNemotronVLImagePixelInputsDynamic(
                 pixel_values_flat=pixel_values_flat, **kwargs
             )
         else:
             return NanoNemotronVLImagePixelInputs(
-                num_patches=kwargs.pop("image_num_patches"), **kwargs
+                pixel_values_flat=pixel_values_flat,
+                num_patches=kwargs.pop("image_num_patches"),
+                **kwargs,
             )
 
     def _process_image_input_dynamic(
@@ -1201,7 +1303,6 @@ def _create_final_video_embeddings(
         These embeddings will replace the placeholder embeddings to create
         input_embeds for the LLM.
         """
-        device = video_embeddings.device
         tokenizer = cached_tokenizer_from_config(self.model_config)
 
         # Generate video replacement token IDs using get_video_repl
@@ -1218,6 +1319,7 @@ def _create_final_video_embeddings(
             img_context_token_ids=self._img_context_token_ids,
             video_temporal_patch_size=video_temporal_patch_size,
         )
+        device = video_embeddings.device
 
         # video_repl.full is a list of token IDs
         repl_token_ids = torch.tensor(video_repl.full, device=device)
@@ -1397,6 +1499,11 @@ def compute_logits(
         return self.language_model.compute_logits(hidden_states)
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        mm_config = self.model_config.multimodal_config
+        load_multimodal_weights = not all(
+            mm_config.get_limit_per_prompt(modality) == 0
+            for modality in ("image", "video", "audio")
+        )
         adapter_dict = dict(self.mlp1.named_parameters())
 
         def is_llm(name: str) -> bool:
@@ -1411,33 +1518,54 @@ def is_vision_weights(name: str) -> bool:
         def is_sound_weights(name: str) -> bool:
             return name.startswith("sound")
 
-        # Separate weights by component
-        llm_weights = []
-        vision_weights = []
-        sound_weights = []
-
-        for name, w in weights:
-            if is_llm(name):
-                # Strip 'language_model.' prefix for LLM weights
-                llm_weights.append((".".join(name.split(".")[1:]), w))
-            elif is_adapter_weights((name, w)):
+        # LLM weights (the bulk of the model) are streamed lazily through a
+        # generator so each tensor is copied into its parameter before the
+        # iterator advances, avoiding stale-reference corruption with
+        # reusable-buffer streamers. The smaller mm components (mlp1, vision,
+        # sound) are detach+cloned on append so they are independent of any
+        # reusable buffer the streamer may use, then loaded after the LLM.
+        adapter_weights: list[tuple[str, torch.Tensor]] = []
+        vision_weights: list[tuple[str, torch.Tensor]] = []
+        sound_weights: list[tuple[str, torch.Tensor]] = []
+
+        def llm_weights_gen():
+            for name, w in weights:
+                if is_llm(name):
+                    # Strip 'language_model.' prefix for LLM weights
+                    yield ".".join(name.split(".")[1:]), w
+                elif is_adapter_weights((name, w)):
+                    if not load_multimodal_weights:
+                        continue
+                    trimmed_name = ".".join(name.split(".")[1:])
+                    adapter_weights.append((trimmed_name, w.detach().clone()))
+                elif is_vision_weights(name):
+                    if not load_multimodal_weights:
+                        continue
+                    # Convert: vision_model.radio_model.* → radio_model.*
+                    hf_key = name[len("vision_model.") :]
+                    vision_weights.append((hf_key, w.detach().clone()))
+                elif is_sound_weights(name):
+                    if not load_multimodal_weights:
+                        continue
+                    assert self.sound_encoder is not None
+                    sound_weights.append((name, w.detach().clone()))
+
+        # Fully drain the generator so every mm tensor is buffered, even if
+        # the LLM loader stops iterating early.
+        llm_weights_iter = llm_weights_gen()
+        self.language_model.load_weights(llm_weights_iter)
+        for _ in llm_weights_iter:
+            pass
+
+        if load_multimodal_weights:
+            for trimmed_name, w in adapter_weights:
                 # Load vision-language adapter weights directly
-                trimmed_name = ".".join(name.split(".")[1:])
                 param = adapter_dict[trimmed_name]
                 with torch.no_grad():
                     default_weight_loader(param, w)
-            elif is_vision_weights(name):
-                # Convert: vision_model.radio_model.* → radio_model.*
-                hf_key = name[len("vision_model.") :]  # Remove "vision_model." prefix
-                vision_weights.append((hf_key, w))
-            elif is_sound_weights(name):
-                assert self.sound_encoder is not None
-                sound_weights.append((name, w))
-
-        self.language_model.load_weights(llm_weights)
-        self.vision_model.load_weights(vision_weights)
-        if self.sound_encoder is not None and len(sound_weights) > 0:
-            self.sound_encoder.load_weights(sound_weights)
+            self.vision_model.load_weights(vision_weights)
+            if self.sound_encoder is not None and len(sound_weights) > 0:
+                self.sound_encoder.load_weights(sound_weights)
 
     def get_vit_model_from_radio_config(self, hf_config):
         hf_config_vision = hf_config.vision_config
@@ -1484,15 +1612,13 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int):
     @classmethod
     def get_mamba_state_shape_from_config(cls, vllm_config: "VllmConfig"):
         text_config = vllm_config.model_config.hf_config.text_config
-        temp_vllm_config = copy.deepcopy(vllm_config)
-        temp_vllm_config.model_config.hf_config = text_config
+        temp_vllm_config = vllm_config.with_hf_config(text_config)
         return NemotronHForCausalLM.get_mamba_state_shape_from_config(temp_vllm_config)
 
     @classmethod
     def get_mamba_state_dtype_from_config(cls, vllm_config: "VllmConfig"):
         text_config = vllm_config.model_config.hf_config.text_config
-        temp_vllm_config = copy.deepcopy(vllm_config)
-        temp_vllm_config.model_config.hf_config = text_config
+        temp_vllm_config = vllm_config.with_hf_config(text_config)
         return NemotronHForCausalLM.get_mamba_state_dtype_from_config(temp_vllm_config)
 
     @classmethod
diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py
index 4ec794eccf72..8915d8172c56 100644
--- a/vllm/model_executor/models/nemotron_h.py
+++ b/vllm/model_executor/models/nemotron_h.py
@@ -34,9 +34,10 @@
 from vllm.model_executor.layers.activation import ReLUSquaredActivation
 from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
     GateLinear,
-    SharedFusedMoE,
     activation_without_mul,
+    fused_moe_make_expert_params_mapping,
 )
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
@@ -63,9 +64,12 @@
     maybe_remap_kv_scale_name,
 )
 from vllm.model_executor.models.interfaces import (
+    EagleModelMixin,
     HasInnerState,
     IsHybrid,
     MixtureOfExperts,
+    SupportsEagle,
+    SupportsEagle3,
     SupportsLoRA,
     SupportsMambaPrefixCaching,
     SupportsPP,
@@ -210,13 +214,12 @@ def __init__(
             self.fc1_latent_proj = None
             self.fc2_latent_proj = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=self.moe_hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -231,6 +234,10 @@ def __init__(
             num_redundant_experts=self.n_redundant_experts,
             is_sequence_parallel=self.is_sequence_parallel,
             routed_input_transform=self.fc1_latent_proj,
+            routed_output_transform=self.fc2_latent_proj,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
+            router_logits_dtype=self.gate.out_dtype,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -243,38 +250,15 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         # router_logits: (num_tokens, n_experts)
         router_logits, _ = self.gate(hidden_states)
 
-        # SharedFusedMoE handles:
-        #   - shared experts (with original hidden_states)
-        #   - routed_input_transform (fc1_latent_proj) for latent MoE
-        #   - multistream parallelism between shared and routed experts
-        shared_output, final_hidden_states = self.experts(
+        final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
 
-        # Fix FP16 overflow
-        # See DeepseekV2DecoderLayer for more details.
-        if hidden_states.dtype != torch.float16:
-            final_hidden_states *= self.routed_scaling_factor
-        elif self.shared_experts is not None:
-            shared_output *= 1.0 / self.routed_scaling_factor
-
-        # TODO: See SharedFusedMoE.apply_routed_input_transform
-        # for bandwidth optimization
-        if self.use_latent_moe:
-            final_hidden_states, _ = self.fc2_latent_proj(final_hidden_states)
-
-        if self.shared_experts is not None:
-            final_hidden_states += shared_output
-
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
 
         return final_hidden_states.view(num_tokens, hidden_dim)
 
@@ -558,7 +542,7 @@ def forward(
 
 
 @support_torch_compile
-class NemotronHModel(nn.Module):
+class NemotronHModel(nn.Module, EagleModelMixin):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
 
@@ -624,18 +608,27 @@ def forward(
             hidden_states = intermediate_tensors["hidden_states"]
             residual = intermediate_tensors["residual"]
 
-        for layer in islice(self.layers, self.start_layer, self.end_layer):
+        aux_hidden_states = self._maybe_add_hidden_state([], 0, hidden_states, residual)
+        for idx, layer in enumerate(
+            islice(self.layers, self.start_layer, self.end_layer)
+        ):
             hidden_states, residual = layer(
                 positions=positions,
                 hidden_states=hidden_states,
                 residual=residual,
             )
+            self._maybe_add_hidden_state(
+                aux_hidden_states, idx + 1, hidden_states, residual
+            )
 
         if not get_pp_group().is_last_rank:
             return IntermediateTensors(
                 {"hidden_states": hidden_states, "residual": residual}
             )
         hidden_states, _ = self.norm_f(hidden_states, residual)
+
+        if len(aux_hidden_states) > 0:
+            return hidden_states, aux_hidden_states
         return hidden_states
 
     def is_spec_layer(self, config: NemotronHConfig, weight_name: str) -> bool:
@@ -672,7 +665,7 @@ def _get_max_n_routed_experts(self) -> int:
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         if self.has_moe:
             # (param_name, weight_name, expert_id, shard_id)
-            expert_params_mapping = SharedFusedMoE.make_expert_params_mapping(
+            expert_params_mapping = fused_moe_make_expert_params_mapping(
                 # - FusedMoe.w1 (aka gate_proj) should be up_proj since that's
                 #   what the activation is applied to
                 # - FusedMoe.w3 (aka up_proj) should be ignored since we're
@@ -786,6 +779,8 @@ class NemotronHForCausalLM(
     HasInnerState,
     SupportsLoRA,
     SupportsPP,
+    SupportsEagle,
+    SupportsEagle3,
     IsHybrid,
     SupportsQuant,
     MixtureOfExperts,
diff --git a/vllm/model_executor/models/nemotron_h_mtp.py b/vllm/model_executor/models/nemotron_h_mtp.py
index 12551d4254ed..fe737438c30f 100644
--- a/vllm/model_executor/models/nemotron_h_mtp.py
+++ b/vllm/model_executor/models/nemotron_h_mtp.py
@@ -11,7 +11,9 @@
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, ModelConfig, VllmConfig
 from vllm.config.parallel import ParallelConfig
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import ColumnParallelLinear
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -399,7 +401,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         if getattr(self.config, "model_type", None) == "nemotron_h_puzzle":
             num_experts = self.config.mtp_n_routed_experts
         if num_experts is not None:
-            expert_params_mapping = FusedMoE.make_expert_params_mapping(
+            expert_params_mapping = fused_moe_make_expert_params_mapping(
                 self,
                 ckpt_gate_proj_name="up_proj",
                 ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/nemotron_parse.py b/vllm/model_executor/models/nemotron_parse.py
index ae417f095eb4..6e52e7cfe7f4 100644
--- a/vllm/model_executor/models/nemotron_parse.py
+++ b/vllm/model_executor/models/nemotron_parse.py
@@ -281,6 +281,9 @@ def __init__(
         self.layernorm_embedding = nn.LayerNorm(config.d_model)
         self.layer_norm = nn.LayerNorm(config.d_model)
 
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
     def forward(
         self,
         decoder_input_ids: torch.Tensor | None,
diff --git a/vllm/model_executor/models/olmo_hybrid.py b/vllm/model_executor/models/olmo_hybrid.py
index 97e56b3ff6f9..49969cdc5863 100644
--- a/vllm/model_executor/models/olmo_hybrid.py
+++ b/vllm/model_executor/models/olmo_hybrid.py
@@ -26,54 +26,38 @@
 from itertools import islice
 
 import torch
-from einops import rearrange
 from torch import nn
-from transformers.activations import ACT2FN
 
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import (
-    CacheConfig,
-    ModelConfig,
-    SpeculativeConfig,
     VllmConfig,
-    get_current_vllm_config,
 )
 from vllm.distributed import (
-    divide,
     get_pp_group,
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_gather,
 )
 from vllm.distributed.utils import split_tensor_along_last_dim
-from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fla.ops import (
-    chunk_gated_delta_rule,
-    fused_recurrent_gated_delta_rule,
-)
-from vllm.model_executor.layers.layernorm import RMSNorm, RMSNormGated
+from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
-    ColumnParallelLinear,
     MergedColumnParallelLinear,
     QKVParallelLinear,
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.layers.mamba.abstract import MambaBase
+from vllm.model_executor.layers.mamba.gdn.olmo_gdn_linear_attn import (
+    OlmoHybridGatedDeltaNetAttention,
+)
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateCopyFunc,
     MambaStateCopyFuncCalculator,
     MambaStateDtypeCalculator,
     MambaStateShapeCalculator,
 )
-from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
-    causal_conv1d_fn,
-    causal_conv1d_update,
-)
-from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     ParallelLMHead,
@@ -81,16 +65,8 @@
 )
 from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader,
-    sharded_weight_loader,
 )
-from vllm.model_executor.utils import set_weight_attrs
-from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
-from vllm.triton_utils import tl, triton
-from vllm.triton_utils.allocation import set_triton_allocator
-from vllm.utils.torch_utils import direct_register_custom_op
-from vllm.v1.attention.backend import AttentionMetadata
-from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
 
 from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP
 from .utils import (
@@ -105,496 +81,6 @@
 logger = init_logger(__name__)
 
 
-def _make_fused_conv1d_weight_loader(dims, tp_size, tp_rank):
-    """Weight loader for loading separate HF conv weights into a fused conv1d.
-
-    dims: list of original (un-sharded) dims per section,
-          e.g. [key_dim, key_dim, value_dim]
-    """
-    sharded_dims = [d // tp_size for d in dims]
-
-    def weight_loader(param, loaded_weight, loaded_shard_id=None):
-        if loaded_weight.dim() == 2:
-            loaded_weight = loaded_weight.unsqueeze(1)
-        dim = dims[loaded_shard_id]
-        shard_size = dim // tp_size
-        tp_start = tp_rank * shard_size
-        sharded_weight = loaded_weight[tp_start : tp_start + shard_size]
-        offset = sum(sharded_dims[:loaded_shard_id])
-        param.data[offset : offset + shard_size].copy_(sharded_weight)
-
-    return weight_loader
-
-
-class OlmoHybridGatedDeltaNet(nn.Module, MambaBase):
-    """
-    Gated DeltaNet linear attention layer for OLMo Hybrid.
-
-    This implements the linear attention mechanism that replaces sliding window
-    attention in the hybrid architecture.
-    """
-
-    @property
-    def mamba_type(self) -> str:
-        return "gdn_attention"
-
-    def get_state_dtype(self) -> tuple[torch.dtype, torch.dtype]:
-        return MambaStateDtypeCalculator.gated_delta_net_state_dtype(
-            self.model_config.dtype,
-            self.cache_config.mamba_cache_dtype,
-            self.cache_config.mamba_ssm_cache_dtype,
-        )
-
-    def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]:
-        return MambaStateShapeCalculator.gated_delta_net_state_shape(
-            self.tp_size,
-            self.num_k_heads,
-            self.num_v_heads,
-            self.head_k_dim,
-            self.head_v_dim,
-            self.conv_kernel_size,
-            self.num_spec,
-        )
-
-    def __init__(
-        self,
-        config,
-        model_config: ModelConfig | None = None,
-        cache_config: CacheConfig | None = None,
-        quant_config: QuantizationConfig | None = None,
-        speculative_config: SpeculativeConfig | None = None,
-        prefix: str = "",
-    ) -> None:
-        super().__init__()
-        self.tp_size = get_tensor_model_parallel_world_size()
-        self.tp_rank = get_tensor_model_parallel_rank()
-        self.hidden_size = config.hidden_size
-        self.num_v_heads = config.linear_num_value_heads
-        self.num_k_heads = config.linear_num_key_heads
-        self.head_k_dim = config.linear_key_head_dim
-        self.head_v_dim = config.linear_value_head_dim
-        self.key_dim = self.head_k_dim * self.num_k_heads
-        self.value_dim = self.head_v_dim * self.num_v_heads
-
-        self.conv_kernel_size = config.linear_conv_kernel_dim
-        self.layer_idx = extract_layer_index(prefix)
-        self.activation = config.hidden_act
-        self.act = ACT2FN[config.hidden_act]
-        self.layer_norm_epsilon = config.rms_norm_eps
-        assert getattr(config, "linear_use_gate", True), (
-            "OlmoHybridGatedDeltaNet requires linear_use_gate=True"
-        )
-        self.allow_neg_eigval = getattr(config, "linear_allow_neg_eigval", False)
-        self.prefix = prefix
-
-        self.config = config
-        self.model_config = model_config
-        self.cache_config = cache_config
-        self.quant_config = quant_config
-        self.speculative_config = speculative_config
-        self.num_spec = (
-            self.speculative_config.num_speculative_tokens
-            if self.speculative_config
-            else 0
-        )
-
-        # Fused QKVG projection: 1 matmul instead of 4
-        self.in_proj_qkvg = MergedColumnParallelLinear(
-            input_size=self.hidden_size,
-            output_sizes=[self.key_dim, self.key_dim, self.value_dim, self.value_dim],
-            bias=False,
-            quant_config=quant_config,
-            prefix=f"{prefix}.in_proj_qkvg",
-        )
-
-        # Separate B and A projections to preserve numerical precision.
-        # Fusing these into one matmul changes FP accumulation order for the
-        # gating scalars, which compounds through the GDN recurrent state.
-        self.b_proj = ColumnParallelLinear(
-            input_size=self.hidden_size,
-            output_size=self.num_v_heads,
-            bias=False,
-            quant_config=quant_config,
-            prefix=f"{prefix}.b_proj",
-        )
-        self.a_proj = ColumnParallelLinear(
-            input_size=self.hidden_size,
-            output_size=self.num_v_heads,
-            bias=False,
-            quant_config=quant_config,
-            prefix=f"{prefix}.a_proj",
-        )
-
-        # Fused conv1d: single parameter instead of 3
-        self.conv_dim = self.key_dim * 2 + self.value_dim
-        self.conv1d = ColumnParallelLinear(
-            input_size=self.conv_kernel_size,
-            output_size=self.conv_dim,
-            bias=False,
-            prefix=f"{prefix}.conv1d",
-        )
-        self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1)
-        delattr(self.conv1d.weight, "weight_loader")
-        set_weight_attrs(
-            self.conv1d.weight,
-            {
-                "weight_loader": _make_fused_conv1d_weight_loader(
-                    [self.key_dim, self.key_dim, self.value_dim],
-                    self.tp_size,
-                    self.tp_rank,
-                )
-            },
-        )
-
-        self.dt_bias = nn.Parameter(
-            torch.ones(self.num_v_heads // self.tp_size),
-        )
-        self.A_log = nn.Parameter(
-            torch.empty(
-                divide(self.num_v_heads, self.tp_size),
-            )
-        )
-
-        set_weight_attrs(self.A_log, {"weight_loader": sharded_weight_loader(0)})
-        set_weight_attrs(self.dt_bias, {"weight_loader": sharded_weight_loader(0)})
-
-        # use eps=1e-5 to match FLA's FusedRMSNormGated
-        self.o_norm = RMSNormGated(
-            self.head_v_dim,
-            eps=1e-5,
-            group_size=None,
-            norm_before_gate=True,
-            device=current_platform.current_device(),
-            dtype=config.torch_dtype if hasattr(config, "torch_dtype") else None,
-        )
-
-        self.o_proj = RowParallelLinear(
-            self.value_dim,
-            self.hidden_size,
-            bias=False,
-            input_is_parallel=True,
-            quant_config=quant_config,
-            prefix=f"{prefix}.o_proj",
-        )
-
-        # FLA triton kernels need a PyTorch-backed allocator for scratch
-        # memory (required by triton >= 3.x autotuner). Set once at init.
-        set_triton_allocator(current_platform.current_device())
-
-        compilation_config = get_current_vllm_config().compilation_config
-        if prefix in compilation_config.static_forward_context:
-            raise ValueError(f"Duplicate layer name: {prefix}")
-        compilation_config.static_forward_context[prefix] = self
-
-    def rearrange_mixed_qkv(self, mixed_qkv):
-        if mixed_qkv is None:
-            return None, None, None
-        query, key, value = torch.split(
-            mixed_qkv,
-            [
-                self.key_dim // self.tp_size,
-                self.key_dim // self.tp_size,
-                self.value_dim // self.tp_size,
-            ],
-            dim=-1,
-        )
-
-        num_k_heads = self.num_k_heads // self.tp_size
-        num_v_heads = self.num_v_heads // self.tp_size
-
-        query = rearrange(query, "l (h d) -> 1 l h d", h=num_k_heads, d=self.head_k_dim)
-        key = rearrange(key, "l (h d) -> 1 l h d", h=num_k_heads, d=self.head_k_dim)
-        value = rearrange(value, "l (h d) -> 1 l h d", h=num_v_heads, d=self.head_v_dim)
-
-        # GQA expansion if needed
-        if num_v_heads > num_k_heads:
-            expand_ratio = num_v_heads // num_k_heads
-            query = query.unsqueeze(3).expand(-1, -1, -1, expand_ratio, -1)
-            query = query.reshape(1, query.shape[1], num_v_heads, self.head_k_dim)
-            key = key.unsqueeze(3).expand(-1, -1, -1, expand_ratio, -1)
-            key = key.reshape(1, key.shape[1], num_v_heads, self.head_k_dim)
-
-        return query.contiguous(), key.contiguous(), value.contiguous()
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        output: torch.Tensor,
-    ):
-        # NOTE: We wrap the ENTIRE linear attention forward (projections +
-        # core recurrence + output norm + output projection) in a single
-        # custom op, rather than just wrapping the recurrent core like
-        # other GDN models (e.g. Qwen3Next) do.
-        #
-        # Why: torch.compile with inductor generates fused kernels for
-        # matmuls and pointwise ops. These fused kernels can differ in
-        # floating-point accumulation order from eager-mode cuBLAS,
-        # introducing small numerical differences (~1e-7 per op). For
-        # standard transformer attention this is harmless because each
-        # position is computed independently. But for the GDN recurrent
-        # state, these tiny input differences compound at every timestep
-        # across the full sequence length, causing severe logprob
-        # divergence (e.g. ~15% top-1 agreement with eager baseline).
-        #
-        # By making the full forward opaque to inductor, the projections
-        # and output norm run with eager-mode kernels (cuBLAS, triton),
-        # preserving numerical consistency. The tradeoff is reduced
-        # compilation speedup (~1.5x vs ~3x), but logprob agreement
-        # improves from ~15% to ~83% top-1 vs eager.
-        #
-        # The remaining ~17% divergence comes from inductor compiling
-        # the MLP and transformer attention layers that are NOT wrapped
-        # in custom ops -- their small precision differences propagate
-        # as inputs to the GDN layers from outside.
-        torch.ops.vllm.olmo_hybrid_gdn_full_forward(
-            hidden_states,
-            output,
-            self.prefix,
-        )
-
-    def _full_forward(
-        self,
-        hidden_states: torch.Tensor,
-        output: torch.Tensor,
-    ):
-        num_tokens = hidden_states.size(0)
-
-        # ============================================================
-        # Part 1: Input Projection (2 fused matmuls instead of 6)
-        # ============================================================
-        projected_qkvg, _ = self.in_proj_qkvg(hidden_states)
-        conv_dim_sharded = (self.key_dim * 2 + self.value_dim) // self.tp_size
-        mixed_qkv = projected_qkvg[..., :conv_dim_sharded]
-        gate = projected_qkvg[..., conv_dim_sharded:]
-
-        b, _ = self.b_proj(hidden_states)
-        a, _ = self.a_proj(hidden_states)
-
-        # ============================================================
-        # Part 2: Core Attention
-        # ============================================================
-        core_attn_out = torch.zeros(
-            (num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim),
-            dtype=hidden_states.dtype,
-            device=hidden_states.device,
-        )
-
-        self._forward_core(
-            mixed_qkv=mixed_qkv,
-            b=b,
-            a=a,
-            core_attn_out=core_attn_out,
-        )
-
-        # ============================================================
-        # Part 3: Output Projection
-        # ============================================================
-        gate = gate.view(num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim)
-        core_attn_out_flat = core_attn_out.reshape(-1, core_attn_out.shape[-1])
-        gate_flat = gate.reshape(-1, gate.shape[-1])
-        core_attn_out_normed = self.o_norm(core_attn_out_flat, gate_flat)
-        core_attn_out = core_attn_out_normed.view(
-            num_tokens, self.num_v_heads // self.tp_size, self.head_v_dim
-        )
-
-        core_attn_out = rearrange(core_attn_out, "l h d -> l (h d)")
-        output[:num_tokens], _ = self.o_proj(core_attn_out)
-
-    def _forward_core(
-        self,
-        mixed_qkv: torch.Tensor,
-        b: torch.Tensor,
-        a: torch.Tensor,
-        core_attn_out: torch.Tensor,
-    ):
-        """
-        Core attention computation (called by custom op).
-        """
-        forward_context = get_forward_context()
-        attn_metadata: AttentionMetadata = forward_context.attn_metadata
-
-        if attn_metadata is None:
-            # V1 profile run
-            return
-
-        assert isinstance(attn_metadata, dict)
-        attn_metadata = attn_metadata[self.prefix]
-        assert isinstance(attn_metadata, GDNAttentionMetadata)
-        has_initial_state = attn_metadata.has_initial_state
-        spec_query_start_loc = attn_metadata.spec_query_start_loc
-        non_spec_query_start_loc = attn_metadata.non_spec_query_start_loc
-        spec_sequence_masks = attn_metadata.spec_sequence_masks
-        spec_token_indx = attn_metadata.spec_token_indx
-        non_spec_token_indx = attn_metadata.non_spec_token_indx
-        spec_state_indices_tensor = attn_metadata.spec_state_indices_tensor
-        non_spec_state_indices_tensor = attn_metadata.non_spec_state_indices_tensor
-        self_kv_cache = self.kv_cache
-        conv_state = self_kv_cache[0].transpose(-1, -2)
-        ssm_state = self_kv_cache[1]
-        num_actual_tokens = attn_metadata.num_actual_tokens
-        num_accepted_tokens = attn_metadata.num_accepted_tokens
-
-        mixed_qkv = mixed_qkv[:num_actual_tokens]
-        b = b[:num_actual_tokens]
-        a = a[:num_actual_tokens]
-
-        conv_weights = self.conv1d.weight.view(
-            self.conv1d.weight.size(0), self.conv1d.weight.size(2)
-        )
-
-        if spec_sequence_masks is not None:
-            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
-                mixed_qkv_spec = mixed_qkv
-                mixed_qkv_non_spec = None
-            else:
-                mixed_qkv_spec = mixed_qkv.index_select(0, spec_token_indx)
-                mixed_qkv_non_spec = mixed_qkv.index_select(0, non_spec_token_indx)
-        else:
-            mixed_qkv_spec = None
-            mixed_qkv_non_spec = mixed_qkv
-
-        if spec_sequence_masks is not None:
-            mixed_qkv_spec = causal_conv1d_update(
-                mixed_qkv_spec,
-                conv_state,
-                conv_weights,
-                None,  # no bias
-                self.activation,
-                conv_state_indices=spec_state_indices_tensor[:, 0][
-                    : attn_metadata.num_spec_decodes
-                ],
-                num_accepted_tokens=num_accepted_tokens,
-                query_start_loc=spec_query_start_loc,
-                max_query_len=spec_state_indices_tensor.size(-1),
-                validate_data=False,
-            )
-
-        if attn_metadata.num_prefills > 0:
-            mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1)
-            mixed_qkv_non_spec = causal_conv1d_fn(
-                mixed_qkv_non_spec_T,
-                conv_weights,
-                None,
-                activation=self.activation,
-                conv_states=conv_state,
-                has_initial_state=has_initial_state,
-                cache_indices=non_spec_state_indices_tensor,
-                query_start_loc=non_spec_query_start_loc,
-                metadata=attn_metadata,
-            ).transpose(0, 1)
-        elif attn_metadata.num_decodes > 0:
-            mixed_qkv_non_spec = causal_conv1d_update(
-                mixed_qkv_non_spec,
-                conv_state,
-                conv_weights,
-                None,
-                self.activation,
-                conv_state_indices=non_spec_state_indices_tensor[
-                    : attn_metadata.num_decodes
-                ],
-                validate_data=True,
-            )
-        else:
-            mixed_qkv_non_spec = None
-
-        query_spec, key_spec, value_spec = self.rearrange_mixed_qkv(mixed_qkv_spec)
-        query_non_spec, key_non_spec, value_non_spec = self.rearrange_mixed_qkv(
-            mixed_qkv_non_spec
-        )
-
-        g, beta = fused_olmo_hybrid_gdn_gating(
-            self.A_log, a, b, self.dt_bias, self.allow_neg_eigval
-        )
-
-        if spec_sequence_masks is not None:
-            if attn_metadata.num_prefills == 0 and attn_metadata.num_decodes == 0:
-                g_spec = g
-                beta_spec = beta
-                g_non_spec = None
-                beta_non_spec = None
-            else:
-                g_spec = g.index_select(1, spec_token_indx)
-                beta_spec = beta.index_select(1, spec_token_indx)
-                g_non_spec = g.index_select(1, non_spec_token_indx)
-                beta_non_spec = beta.index_select(1, non_spec_token_indx)
-        else:
-            g_spec = None
-            beta_spec = None
-            g_non_spec = g
-            beta_non_spec = beta
-
-        if spec_sequence_masks is not None:
-            core_attn_out_spec, last_recurrent_state = fused_recurrent_gated_delta_rule(
-                q=query_spec,
-                k=key_spec,
-                v=value_spec,
-                g=g_spec,
-                beta=beta_spec,
-                initial_state=ssm_state,
-                inplace_final_state=True,
-                cu_seqlens=spec_query_start_loc[: attn_metadata.num_spec_decodes + 1],
-                ssm_state_indices=spec_state_indices_tensor,
-                num_accepted_tokens=num_accepted_tokens,
-                use_qk_l2norm_in_kernel=True,
-            )
-        else:
-            core_attn_out_spec, last_recurrent_state = None, None
-
-        if attn_metadata.num_prefills > 0:
-            initial_state = ssm_state[non_spec_state_indices_tensor].contiguous()
-            initial_state[~has_initial_state, ...] = 0
-            (
-                core_attn_out_non_spec,
-                last_recurrent_state,
-            ) = chunk_gated_delta_rule(
-                q=query_non_spec,
-                k=key_non_spec,
-                v=value_non_spec,
-                g=g_non_spec,
-                beta=beta_non_spec,
-                initial_state=initial_state,
-                output_final_state=True,
-                cu_seqlens=non_spec_query_start_loc,
-                use_qk_l2norm_in_kernel=True,
-            )
-            ssm_state[non_spec_state_indices_tensor] = last_recurrent_state.to(
-                ssm_state.dtype
-            )
-        elif attn_metadata.num_decodes > 0:
-            core_attn_out_non_spec, last_recurrent_state = (
-                fused_recurrent_gated_delta_rule(
-                    q=query_non_spec,
-                    k=key_non_spec,
-                    v=value_non_spec,
-                    g=g_non_spec,
-                    beta=beta_non_spec,
-                    initial_state=ssm_state,
-                    inplace_final_state=True,
-                    cu_seqlens=non_spec_query_start_loc[
-                        : attn_metadata.num_decodes + 1
-                    ],
-                    ssm_state_indices=non_spec_state_indices_tensor,
-                    use_qk_l2norm_in_kernel=True,
-                )
-            )
-        else:
-            core_attn_out_non_spec, last_recurrent_state = None, None
-
-        if spec_sequence_masks is not None and core_attn_out_non_spec is not None:
-            merged_out = torch.empty(
-                (1, num_actual_tokens, *core_attn_out_spec.shape[2:]),
-                dtype=core_attn_out_non_spec.dtype,
-                device=core_attn_out_non_spec.device,
-            )
-            merged_out.index_copy_(1, spec_token_indx, core_attn_out_spec)
-            merged_out.index_copy_(1, non_spec_token_indx, core_attn_out_non_spec)
-            core_attn_out[:num_actual_tokens] = merged_out.squeeze(0)
-        elif spec_sequence_masks is not None:
-            core_attn_out[:num_actual_tokens] = core_attn_out_spec.squeeze(0)
-        else:
-            core_attn_out[:num_actual_tokens] = core_attn_out_non_spec.squeeze(0)
-
-
 class OlmoHybridAttention(nn.Module):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
@@ -742,22 +228,15 @@ class OlmoHybridDecoderLayer(nn.Module):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
         super().__init__()
         config = vllm_config.model_config.hf_config
-        model_config = vllm_config.model_config
-        cache_config = vllm_config.cache_config
-        quant_config = vllm_config.quant_config
-        speculative_config = vllm_config.speculative_config
 
         layer_idx = extract_layer_index(prefix)
         self.layer_type = config.layer_types[layer_idx]
         self.layer_idx = layer_idx
 
         if self.layer_type == "linear_attention":
-            self.linear_attn = OlmoHybridGatedDeltaNet(
+            self.linear_attn = OlmoHybridGatedDeltaNetAttention(
                 config,
-                model_config=model_config,
-                cache_config=cache_config,
-                quant_config=quant_config,
-                speculative_config=speculative_config,
+                vllm_config,
                 prefix=f"{prefix}.linear_attn",
             )
             self.input_layernorm = RMSNorm(
@@ -1060,113 +539,3 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
             ),
         )
         return loader.load_weights(weights)
-
-
-def olmo_hybrid_gdn_full_forward(
-    hidden_states: torch.Tensor,
-    output: torch.Tensor,
-    layer_name: str,
-) -> None:
-    """Full linear attention forward wrapped as a custom op.
-
-    Prevents inductor from compiling the projections around the GDN core,
-    which would introduce numerical divergence that compounds through
-    the recurrent state.
-    """
-    forward_context: ForwardContext = get_forward_context()
-    self = forward_context.no_compile_layers[layer_name]
-    self._full_forward(
-        hidden_states=hidden_states,
-        output=output,
-    )
-
-
-def olmo_hybrid_gdn_full_forward_fake(
-    hidden_states: torch.Tensor,
-    output: torch.Tensor,
-    layer_name: str,
-) -> None:
-    """Fake implementation for torch.compile."""
-    return
-
-
-direct_register_custom_op(
-    op_name="olmo_hybrid_gdn_full_forward",
-    op_func=olmo_hybrid_gdn_full_forward,
-    mutates_args=["output"],
-    fake_impl=olmo_hybrid_gdn_full_forward_fake,
-)
-
-
-@triton.jit
-def fused_olmo_hybrid_gdn_gating_kernel(
-    g,
-    beta_output,
-    A_log,
-    a,
-    b,
-    dt_bias,
-    seq_len,
-    allow_neg_eigval: tl.constexpr,
-    NUM_HEADS: tl.constexpr,
-    beta: tl.constexpr,
-    threshold: tl.constexpr,
-    BLK_HEADS: tl.constexpr,
-):
-    i_b, i_s, i_d = tl.program_id(0), tl.program_id(1), tl.program_id(2)
-    head_off = i_d * BLK_HEADS + tl.arange(0, BLK_HEADS)
-    off = i_b * seq_len * NUM_HEADS + i_s * NUM_HEADS + head_off
-    mask = head_off < NUM_HEADS
-    blk_A_log = tl.load(A_log + head_off, mask=mask)
-    blk_a = tl.load(a + off, mask=mask)
-    blk_b = tl.load(b + off, mask=mask)
-    blk_bias = tl.load(dt_bias + head_off, mask=mask)
-
-    # g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias)
-    x = blk_a.to(tl.float32) + blk_bias.to(tl.float32)
-    softplus_x = tl.where(
-        beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x
-    )
-    blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x
-    tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask)
-
-    # beta = self.b_proj(hidden_states).sigmoid()
-    # if self.allow_neg_eigval: beta = beta * 2.0
-    blk_beta_output = tl.sigmoid(blk_b.to(tl.float32))
-    if allow_neg_eigval:
-        blk_beta_output = blk_beta_output * 2.0
-    tl.store(
-        beta_output + off, blk_beta_output.to(beta_output.dtype.element_ty), mask=mask
-    )
-
-
-def fused_olmo_hybrid_gdn_gating(
-    A_log: torch.Tensor,
-    a: torch.Tensor,
-    b: torch.Tensor,
-    dt_bias: torch.Tensor,
-    allow_neg_eigval: bool = False,
-    beta: float = 1.0,
-    threshold: float = 20.0,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    batch, num_heads = a.shape
-    seq_len = 1
-    grid = (batch, seq_len, triton.cdiv(num_heads, 8))
-    g = torch.empty(1, batch, num_heads, dtype=torch.float32, device=a.device)
-    beta_output = torch.empty(1, batch, num_heads, dtype=torch.float32, device=b.device)
-    fused_olmo_hybrid_gdn_gating_kernel[grid](
-        g,
-        beta_output,
-        A_log,
-        a,
-        b,
-        dt_bias,
-        seq_len,
-        allow_neg_eigval,
-        num_heads,
-        beta,
-        threshold,
-        8,
-        num_warps=1,
-    )
-    return g, beta_output
diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py
index f0afe0e997cc..1f342ad1733d 100644
--- a/vllm/model_executor/models/olmoe.py
+++ b/vllm/model_executor/models/olmoe.py
@@ -32,7 +32,10 @@
 from vllm.distributed.utils import split_tensor_along_last_dim
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
@@ -98,7 +101,6 @@ def __init__(
             top_k=top_k,
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
-            reduce_results=True,
             renormalize=False,
             quant_config=quant_config,
             tp_size=tp_size,
@@ -337,7 +339,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/openpangu.py b/vllm/model_executor/models/openpangu.py
index 994ae82529ab..68ab4a9ae4cb 100644
--- a/vllm/model_executor/models/openpangu.py
+++ b/vllm/model_executor/models/openpangu.py
@@ -44,7 +44,10 @@
     Attention,
     StaticSinkAttention,
 )
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -200,13 +203,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=config.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             use_grouped_topk=True,
@@ -214,8 +216,8 @@ def __init__(
             topk_group=1,
             prefix=f"{prefix}.experts",
             scoring_func="sigmoid",
-            # we do scaling outside, set factor to 1.0 to avoid double mul
-            routed_scaling_factor=1.0,
+            routed_scaling_factor=self.routed_scaling_factor,
+            apply_routed_scale_to_output=True,
             e_score_correction_bias=self.gate.e_score_correction_bias,
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts,
@@ -234,33 +236,15 @@ def forward(
 
         router_logits, _ = self.gate(hidden_states)
 
-        fused_moe_out = self.experts(
+        final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
 
-        shared_output, final_hidden_states = fused_moe_out
-        if self.shared_experts is None:
-            assert shared_output is None
-
-        if hidden_states.dtype != torch.float16:
-            final_hidden_states *= self.routed_scaling_factor
-        elif self.shared_experts is not None:
-            assert shared_output is not None
-            shared_output *= 1.0 / self.routed_scaling_factor
-
-        if self.shared_experts is not None:
-            assert shared_output is not None
-            final_hidden_states += shared_output
-
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
 
         return final_hidden_states.view(num_tokens, hidden_dim)
 
@@ -1168,7 +1152,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         ]
         has_experts = hasattr(self.config, "n_routed_experts")
         if has_experts:
-            expert_merge_mapping = SharedFusedMoE.make_expert_params_mapping(
+            expert_merge_mapping = fused_moe_make_expert_params_mapping(
                 self,
                 ckpt_gate_proj_name="gate_proj",
                 ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/openpangu_mtp.py b/vllm/model_executor/models/openpangu_mtp.py
index 91b454a4bc38..3a04ccdff5be 100644
--- a/vllm/model_executor/models/openpangu_mtp.py
+++ b/vllm/model_executor/models/openpangu_mtp.py
@@ -28,7 +28,9 @@
 
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -147,7 +149,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1),
         ]
 
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/openpangu_vl.py b/vllm/model_executor/models/openpangu_vl.py
index e9288e6ddb14..04a6dba41f42 100644
--- a/vllm/model_executor/models/openpangu_vl.py
+++ b/vllm/model_executor/models/openpangu_vl.py
@@ -44,8 +44,7 @@
     RowParallelLinear,
 )
 from vllm.model_executor.layers.quantization import QuantizationConfig
-from vllm.model_executor.layers.quantization.gptq import GPTQConfig
-from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
+from vllm.model_executor.layers.quantization.auto_gptq import AutoGPTQConfig
 from vllm.model_executor.layers.rotary_embedding.common import ApplyRotaryEmb
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.models.interfaces import (
@@ -857,7 +856,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         with self._mark_language_model(vllm_config):
             self.language_model = init_vllm_registered_model(
                 vllm_config=vllm_config,
-                prefix=maybe_prefix("openpangu", "language_model"),
+                prefix=maybe_prefix(prefix, "openpangu.language_model"),
                 architectures=["PanguEmbeddedForCausalLM"],
             )
 
@@ -883,7 +882,7 @@ def _parse_preprocess_params(self, vision_config):
         self.image_std = tuple(image_processor.image_std)
 
     def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
-        if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
+        if isinstance(quant_config, AutoGPTQConfig):
             return None
         return quant_config
 
diff --git a/vllm/model_executor/models/openvla.py b/vllm/model_executor/models/openvla.py
new file mode 100644
index 000000000000..9946bf026436
--- /dev/null
+++ b/vllm/model_executor/models/openvla.py
@@ -0,0 +1,528 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Annotated, Literal
+
+import torch
+import torch.nn as nn
+from transformers import BatchFeature
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.inputs import MultiModalDataDict
+from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.models.interfaces import (
+    MultiModalEmbeddings,
+    SupportsMultiModal,
+    SupportsPP,
+)
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import MultiModalFieldConfig, MultiModalKwargsItems
+from vllm.multimodal.parse import (
+    ImageEmbeddingItems,
+    ImageProcessorItems,
+    ImageSize,
+    MultiModalDataItems,
+)
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+    InputProcessingContext,
+    PromptIndexTargets,
+    PromptInsertion,
+    PromptUpdate,
+    PromptUpdateDetails,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.transformers_utils.configs import OpenVLAConfig
+from vllm.transformers_utils.processors.openvla import (
+    OpenVLAImageProcessor,
+    OpenVLAProcessor,
+)
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
+
+from .module_mapping import MultiModelKeys
+from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix
+
+# openvla/openvla-7b uses 224x224 images with ViT patch size 14, yielding a
+# 16x16 image-token grid.
+_OPENVLA_IMAGE_SIZE = 224
+_OPENVLA_PATCH_SIZE = 14
+_OPENVLA_TIMM_MODEL_IDS = (
+    "vit_large_patch14_reg4_dinov2.lvd142m",
+    "vit_so400m_patch14_siglip_224",
+)
+_OPENVLA_TIMM_OVERRIDE_ACT_LAYERS = (None, None)
+_OPENVLA_IMAGE_SIZES = (_OPENVLA_IMAGE_SIZE, _OPENVLA_IMAGE_SIZE)
+
+
+def _get_num_image_tokens(image_size: int) -> int:
+    return (image_size // _OPENVLA_PATCH_SIZE) ** 2
+
+
+class OpenVLAImagePixelInputs(TensorSchema):
+    """
+    Dimensions:
+        - bn: Batch size * number of images
+        - c: Number of channels (6)
+        - h: Height
+        - w: Width
+    """
+
+    type: Literal["pixel_values"] = "pixel_values"
+    data: Annotated[torch.Tensor, TensorShape("bn", 6, "h", "w")]
+
+
+class PrismaticVisionBackbone(nn.Module):
+    """OpenVLA's fused DINOv2 + SigLIP vision backbone."""
+
+    def __init__(
+        self,
+        *,
+        image_sizes: Sequence[int],
+        timm_model_ids: Sequence[str],
+        timm_override_act_layers: Sequence[str | None],
+        use_fused_vision_backbone: bool,
+    ) -> None:
+        super().__init__()
+        if not use_fused_vision_backbone:
+            raise ValueError(
+                "OpenVLA currently supports only the fused DINOv2 + SigLIP "
+                "vision backbone."
+            )
+        if tuple(image_sizes) != _OPENVLA_IMAGE_SIZES:
+            raise ValueError(
+                "OpenVLA currently supports only 224x224 image inputs, "
+                f"got image_sizes={list(image_sizes)}."
+            )
+        if tuple(timm_model_ids) != _OPENVLA_TIMM_MODEL_IDS:
+            raise ValueError(
+                "OpenVLA currently supports only the dinosiglip-vit-so-224px "
+                "vision backbone, got "
+                f"timm_model_ids={list(timm_model_ids)}."
+            )
+        if tuple(timm_override_act_layers) != _OPENVLA_TIMM_OVERRIDE_ACT_LAYERS:
+            raise ValueError(
+                "OpenVLA currently supports only the default timm activation "
+                "layers, got "
+                f"timm_override_act_layers={list(timm_override_act_layers)}."
+            )
+
+        self.image_size = image_sizes[0]
+        self.use_fused_vision_backbone = use_fused_vision_backbone
+
+        self.embed_dim = 2176 if use_fused_vision_backbone else 1024
+
+        try:
+            import timm
+        except ImportError as e:
+            raise ImportError(
+                "Please install timm to use OpenVLA. OpenVLA verification "
+                "used timm==0.9.10."
+            ) from e
+
+        self.dinov2_featurizer = timm.create_model(
+            timm_model_ids[0],
+            pretrained=False,
+            num_classes=0,
+            img_size=self.image_size,
+            act_layer=timm_override_act_layers[0],
+        )
+        self.siglip_featurizer = (
+            timm.create_model(
+                timm_model_ids[1],
+                pretrained=False,
+                num_classes=0,
+                img_size=self.image_size,
+                act_layer=timm_override_act_layers[1],
+            )
+            if use_fused_vision_backbone
+            else None
+        )
+
+    def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
+        if self.dinov2_featurizer is None:
+            raise RuntimeError("OpenVLA vision backbone is not initialized.")
+
+        if self.use_fused_vision_backbone and pixel_values.shape[1] != 6:
+            raise ValueError(
+                "OpenVLA fused DINOv2 + SigLIP backbone expects 6-channel "
+                "image inputs: 3 DINOv2-normalized channels followed by 3 "
+                "SigLIP-normalized channels, "
+                f"got {pixel_values.shape[1]} channels."
+            )
+
+        dinov2_pixels = pixel_values[:, :3]
+
+        num_dinov2_blocks = len(self.dinov2_featurizer.blocks)
+        dinov2_features = self.dinov2_featurizer.get_intermediate_layers(
+            dinov2_pixels, n={num_dinov2_blocks - 2}
+        )[0]
+
+        if self.siglip_featurizer is not None:
+            siglip_pixels = pixel_values[:, 3:]
+            num_siglip_blocks = len(self.siglip_featurizer.blocks)
+            siglip_features = self.siglip_featurizer.get_intermediate_layers(
+                siglip_pixels, n={num_siglip_blocks - 2}
+            )[0]
+            return torch.cat([dinov2_features, siglip_features], dim=-1)
+
+        return dinov2_features
+
+
+class PrismaticProjector(nn.Module):
+    """Project Prismatic vision features into the language-model hidden size."""
+
+    def __init__(
+        self,
+        *,
+        vision_dim: int,
+        text_dim: int,
+        use_fused_vision_backbone: bool,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.use_fused_vision_backbone = use_fused_vision_backbone
+
+        if use_fused_vision_backbone:
+            intermediate_dim = 4 * vision_dim
+            self.fc1 = ColumnParallelLinear(
+                vision_dim,
+                intermediate_dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=f"{prefix}.fc1",
+            )
+            self.act_fn1 = get_act_fn("gelu")
+            self.fc2 = RowParallelLinear(
+                intermediate_dim,
+                text_dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=f"{prefix}.fc2",
+            )
+            self.act_fn2 = get_act_fn("gelu")
+            self.fc3 = ReplicatedLinear(
+                text_dim,
+                text_dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=f"{prefix}.fc3",
+            )
+        else:
+            self.fc1 = ColumnParallelLinear(
+                vision_dim,
+                text_dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=f"{prefix}.fc1",
+            )
+            self.act_fn1 = get_act_fn("gelu")
+            self.fc2 = RowParallelLinear(
+                text_dim,
+                text_dim,
+                bias=True,
+                quant_config=quant_config,
+                prefix=f"{prefix}.fc2",
+            )
+
+    def forward(self, image_features: torch.Tensor) -> torch.Tensor:
+        hidden_states, _ = self.fc1(image_features)
+        hidden_states = self.act_fn1(hidden_states)
+        hidden_states, _ = self.fc2(hidden_states)
+
+        if self.use_fused_vision_backbone:
+            hidden_states = self.act_fn2(hidden_states)
+            hidden_states, _ = self.fc3(hidden_states)
+
+        return hidden_states
+
+
+class OpenVLAProcessingInfo(BaseProcessingInfo):
+    def __init__(self, ctx: InputProcessingContext) -> None:
+        super().__init__(ctx)
+        self.hf_processor = OpenVLAProcessor(
+            image_processor=OpenVLAImageProcessor(
+                image_size=self.get_hf_config().image_sizes[0],
+            ),
+            tokenizer=self.get_tokenizer(),
+        )
+
+    def get_hf_config(self) -> OpenVLAConfig:
+        return self.ctx.get_hf_config(OpenVLAConfig)
+
+    def get_hf_processor(self, **kwargs: object) -> OpenVLAProcessor:
+        return self.hf_processor
+
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"image": 1}
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        image_size = self.get_hf_config().image_sizes[0]
+        return _get_num_image_tokens(image_size)
+
+    def get_image_size_with_most_features(self) -> ImageSize:
+        image_size = self.get_hf_config().image_sizes[0]
+        return ImageSize(width=image_size, height=image_size)
+
+    def get_mm_max_tokens_per_item(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> Mapping[str, int] | None:
+        image_size = self.get_hf_config().image_sizes[0]
+        return {"image": _get_num_image_tokens(image_size)}
+
+
+class OpenVLADummyInputsBuilder(BaseDummyInputsBuilder[OpenVLAProcessingInfo]):
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        return ""
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions],
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        image_overrides = mm_options.get("image")
+        image_size = self.info.get_image_size_with_most_features()
+
+        return {
+            "image": self._get_dummy_images(
+                width=image_size.width,
+                height=image_size.height,
+                num_images=num_images,
+                overrides=image_overrides,
+            )
+        }
+
+
+class OpenVLAMultiModalProcessor(BaseMultiModalProcessor[OpenVLAProcessingInfo]):
+    """Processor contract for OpenVLA image inputs.
+
+    OpenVLA feeds the same RGB image to DINOv2 and SigLIP after different
+    normalizations. The processor exposes this as one 6-channel tensor:
+    channels 0-2 are DINOv2-normalized and channels 3-5 are SigLIP-normalized.
+    """
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(pixel_values=MultiModalFieldConfig.batched("image"))
+
+    def _hf_processor_applies_updates(
+        self,
+        prompt_text: str,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
+    ) -> bool:
+        return False
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptUpdate]:
+        hf_config = self.info.get_hf_config()
+        image_token_id = hf_config.image_token_index
+
+        tokenizer = self.info.get_tokenizer()
+        bos_token_id = tokenizer.bos_token_id
+
+        def get_insertion(item_idx: int) -> PromptUpdateDetails[list[int]]:
+            images = mm_items.get_items(
+                "image", (ImageEmbeddingItems, ImageProcessorItems)
+            )
+            if isinstance(images, ImageEmbeddingItems):
+                num_image_tokens = images.get_feature_size(item_idx)
+            else:
+                image_size = images.get_image_size(item_idx)
+                num_image_tokens = self.info.get_num_image_tokens(
+                    image_width=image_size.width,
+                    image_height=image_size.height,
+                )
+
+            image_tokens = [image_token_id] * num_image_tokens
+            return PromptUpdateDetails.select_token_id(
+                image_tokens,
+                embed_token_id=image_token_id,
+            )
+
+        return [
+            PromptInsertion(
+                modality="image",
+                target=PromptIndexTargets.prefix(
+                    [bos_token_id] if bos_token_id is not None else []
+                ),
+                insertion=get_insertion,
+            )
+        ]
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    OpenVLAMultiModalProcessor,
+    info=OpenVLAProcessingInfo,
+    dummy_inputs=OpenVLADummyInputsBuilder,
+)
+class OpenVLAForActionPrediction(nn.Module, SupportsMultiModal, SupportsPP):
+    """OpenVLA wrapper with vLLM language-model execution wired in."""
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return None
+        raise ValueError("Only image modality is supported")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.multimodal_config = vllm_config.model_config.multimodal_config
+        self.image_token_id = config.image_token_index
+        self.n_action_bins = config.n_action_bins
+        self.num_patches = _get_num_image_tokens(config.image_sizes[0])
+
+        with self._mark_tower_model(vllm_config, "image"):
+            self.vision_backbone = PrismaticVisionBackbone(
+                image_sizes=config.image_sizes,
+                timm_model_ids=config.timm_model_ids,
+                timm_override_act_layers=config.timm_override_act_layers,
+                use_fused_vision_backbone=config.use_fused_vision_backbone,
+            )
+            self.projector = PrismaticProjector(
+                vision_dim=self.vision_backbone.embed_dim,
+                text_dim=config.text_config.hidden_size,
+                use_fused_vision_backbone=config.use_fused_vision_backbone,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "projector"),
+            )
+
+        with self._mark_language_model(vllm_config):
+            self.language_model = init_vllm_registered_model(
+                vllm_config=vllm_config,
+                hf_config=config.text_config,
+                prefix=maybe_prefix(prefix, "language_model"),
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+    def get_language_model(self) -> nn.Module:
+        return self.language_model
+
+    def _parse_and_validate_image_input(
+        self,
+        **kwargs: object,
+    ) -> OpenVLAImagePixelInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        if pixel_values is None:
+            return None
+
+        return OpenVLAImagePixelInputs(
+            type="pixel_values",
+            data=pixel_values,
+            resolve_bindings={
+                "h": self.config.image_sizes[0],
+                "w": self.config.image_sizes[0],
+            },
+        )
+
+    def _process_image_input(
+        self,
+        image_input: OpenVLAImagePixelInputs,
+    ) -> torch.Tensor:
+        if self.vision_backbone.dinov2_featurizer is None:
+            raise RuntimeError("OpenVLA vision backbone is not initialized.")
+
+        pixel_values = image_input["data"].to(
+            dtype=self.vision_backbone.dinov2_featurizer.patch_embed.proj.weight.dtype
+        )
+        vision_features = self.vision_backbone(pixel_values)
+        return self.projector(vision_features)
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        image_input = self._parse_and_validate_image_input(**kwargs)
+        if image_input is None:
+            return []
+
+        return self._process_image_input(image_input)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> torch.Tensor | IntermediateTensors:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        return self.language_model.model(
+            input_ids,
+            positions,
+            intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+
+    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    def get_mm_mapping(self) -> MultiModelKeys:
+        return MultiModelKeys.from_string_field(
+            language_model="language_model",
+            connector="projector",
+            tower_model="vision_backbone",
+        )
+
+    def get_num_mm_encoder_tokens(self, num_image_tokens: int) -> int:
+        return num_image_tokens
+
+    def get_num_mm_connector_tokens(self, num_vision_tokens: int) -> int:
+        return num_vision_tokens
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        def maybe_rename_vision_weights(
+            weights: Iterable[tuple[str, torch.Tensor]],
+        ) -> Iterable[tuple[str, torch.Tensor]]:
+            for name, weight in weights:
+                if name.startswith("vision_backbone.featurizer."):
+                    name = name.replace(
+                        "vision_backbone.featurizer.",
+                        "vision_backbone.dinov2_featurizer.",
+                        1,
+                    )
+                elif name.startswith("vision_backbone.fused_featurizer."):
+                    name = name.replace(
+                        "vision_backbone.fused_featurizer.",
+                        "vision_backbone.siglip_featurizer.",
+                        1,
+                    )
+                # HF uses .scale_factor, timm uses .gamma
+                if ".ls1.scale_factor" in name or ".ls2.scale_factor" in name:
+                    name = name.replace(".scale_factor", ".gamma")
+                yield name, weight
+
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(maybe_rename_vision_weights(weights))
diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py
index 3d9cf1c3415f..f25585fd7643 100644
--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -96,7 +96,7 @@ def __init__(
         self.backbone = self._init_backbone(
             config=config,
             quant_config=quant_config,
-            prefix=f"{prefix}.backbone",
+            prefix=maybe_prefix(prefix, "backbone"),
         )
         # reserved tokens for IMAGE_INDICATORS
         head_dim = config.vocab_size - len(IMAGE_INDICATOR_IDS)
@@ -442,7 +442,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.visual_tokenizer = VisualTokenizer(
                 config=config.visual_tokenizer_config,
                 quant_config=quant_config,
-                prefix=f"{prefix}.visual_tokenizer",
+                prefix=maybe_prefix(prefix, "visual_tokenizer"),
             )
             self.vte = VisualEmbedding(
                 self.config.visual_tokenizer_config.vocab_size, self.config.hidden_size
diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py
index 4acad73c502e..6dbed78a6fc6 100644
--- a/vllm/model_executor/models/ovis2_5.py
+++ b/vllm/model_executor/models/ovis2_5.py
@@ -465,7 +465,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 config=config.vit_config,
                 visual_vocab_size=config.visual_vocab_size,
                 quant_config=quant_config,
-                prefix=f"{prefix}.visual_tokenizer",
+                prefix=maybe_prefix(prefix, "visual_tokenizer"),
             )
             self.vte = VisualEmbedding(config.visual_vocab_size, config.hidden_size)
 
diff --git a/vllm/model_executor/models/paddleocr_vl.py b/vllm/model_executor/models/paddleocr_vl.py
index 515be154320f..cd88009c739a 100644
--- a/vllm/model_executor/models/paddleocr_vl.py
+++ b/vllm/model_executor/models/paddleocr_vl.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import math
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Iterable, Iterator, Mapping, Sequence
 from functools import partial
 from typing import Annotated, Literal
 
@@ -200,7 +200,13 @@ def get_image_size_with_most_features(self) -> ImageSize:
         merge_size = hf_config.vision_config.spatial_merge_size
         patch_size = hf_config.vision_config.patch_size
         factor = merge_size * patch_size
-        max_num_tokens = image_processor.max_pixels // (factor**2)
+        if self.ctx.model_config.trust_remote_code:
+            # Defined in HF Hub repo
+            max_pixels = image_processor.max_pixels
+        else:
+            # Defined in Transformers library (requires v5.0 or above)
+            max_pixels = image_processor.size.longest_edge
+        max_num_tokens = max_pixels // (factor**2)
         # Find factors of max_num_tokens close to its square root
         # to create a dummy image with a reasonable aspect ratio.
         h_patches = int(math.sqrt(max_num_tokens))
@@ -889,7 +895,7 @@ def __init__(
         self.vision_model = SiglipVisionTransformer(
             config,
             quant_config=quant_config,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
         )
         self.quant_config = quant_config
 
@@ -1050,121 +1056,83 @@ def compute_logits(
     ) -> torch.Tensor | None:
         return self.language_model.compute_logits(hidden_states)
 
+    def iter_mm_grid_thw(
+        self, mm_features: list[MultiModalFeatureSpec]
+    ) -> Iterator[tuple[int, int, int, int, float]]:
+        """
+        Iterate over multimodal features and yield grid information.
+
+        Args:
+            mm_features: List of multimodal feature specifications
+
+        Yields:
+            Tuple of (offset, grid_t, grid_h, grid_w, t_factor) for each frame/image
+        """
+        spatial_merge_size = self.config.vision_config.spatial_merge_size
+        tokens_per_second = getattr(self.config.vision_config, "tokens_per_second", 1.0)
+        for mm_feature in sorted(mm_features, key=lambda f: f.mm_position.offset):
+            offset = mm_feature.mm_position.offset
+            if mm_feature.modality == "image":
+                t, h, w = mm_feature.data["image_grid_thw"].data.tolist()
+                assert t == 1, f"Image must have 1 frame, got {t}"
+                yield offset, 1, h // spatial_merge_size, w // spatial_merge_size, 1.0
+            elif mm_feature.modality == "video":
+                t, h, w = mm_feature.data["video_grid_thw"].data.tolist()
+                second_per_grid_ts = 1.0
+                if mm_feature.data.get("second_per_grid_ts", None):
+                    second_per_grid_ts = mm_feature.data[
+                        "second_per_grid_ts"
+                    ].data.item()
+                t_factor = second_per_grid_ts * tokens_per_second
+                yield (
+                    offset,
+                    t,
+                    h // spatial_merge_size,
+                    w // spatial_merge_size,
+                    t_factor,
+                )
+            else:
+                raise ValueError(f"Unsupported modality: {mm_feature.modality}")
+
     def get_mrope_input_positions(
         self,
         input_tokens: list[int],
         mm_features: list[MultiModalFeatureSpec],
     ) -> tuple[torch.Tensor, int]:
-        kwargs = MultiModalFeatureSpec.gather_kwargs(
-            mm_features,
-            {"image_grid_thw", "video_grid_thw", "second_per_grid_ts"},
-        )
-        image_grid_thw = [item.tolist() for item in kwargs.get("image_grid_thw", [])]
-        video_grid_thw = [item.tolist() for item in kwargs.get("video_grid_thw", [])]
-        second_per_grid_ts = kwargs.get("second_per_grid_ts", [])
-
-        hf_config = self.config
-        image_token_id = hf_config.image_token_id
-        video_token_id = hf_config.video_token_id
-        vision_start_token_id = hf_config.vision_start_token_id
-        spatial_merge_size = hf_config.vision_config.spatial_merge_size
-        tokens_per_second = getattr(hf_config.vision_config, "tokens_per_second", 1.0)
-
-        input_tokens_tensor = torch.tensor(input_tokens)
-        vision_start_indices = torch.argwhere(
-            input_tokens_tensor == vision_start_token_id
-        ).squeeze(1)
-        vision_tokens = input_tokens_tensor[vision_start_indices + 1]
-        image_nums = (vision_tokens == image_token_id).sum()
-        video_nums = (vision_tokens == video_token_id).sum()
         llm_pos_ids_list: list = []
-
         st = 0
-        remain_images, remain_videos = image_nums, video_nums
-
-        image_index, video_index = 0, 0
-        for _ in range(image_nums + video_nums):
-            video_second_per_grid_t = 0.0
-            if remain_images > 0:
-                try:
-                    ed_image = input_tokens.index(image_token_id, st)
-                except ValueError:
-                    ed_image = len(input_tokens) + 1
-            else:
-                ed_image = len(input_tokens) + 1
-            if remain_videos > 0:
-                try:
-                    ed_video = input_tokens.index(video_token_id, st)
-                except ValueError:
-                    ed_video = len(input_tokens) + 1
-            else:
-                ed_video = len(input_tokens) + 1
-            if ed_image < ed_video:
-                t, h, w = image_grid_thw[image_index]
-                image_index += 1
-                remain_images -= 1
-                ed = ed_image
-            else:
-                t, h, w = video_grid_thw[video_index]
-                video_second_per_grid_t = 1.0
-                if second_per_grid_ts:
-                    video_second_per_grid_t = second_per_grid_ts[video_index]
-                video_index += 1
-                remain_videos -= 1
-                ed = ed_video
-
-            llm_grid_t, llm_grid_h, llm_grid_w = (
-                t,
-                h // spatial_merge_size,
-                w // spatial_merge_size,
-            )
-            text_len = ed - st
 
+        for (
+            offset,
+            llm_grid_t,
+            llm_grid_h,
+            llm_grid_w,
+            t_factor,
+        ) in self.iter_mm_grid_thw(mm_features):
+            text_len = offset - st
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
             llm_pos_ids_list.append(
-                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx
+                np.broadcast_to(np.arange(text_len), (3, text_len)) + st_idx
             )
 
-            t_index = (
-                (
-                    torch.arange(llm_grid_t)
-                    .view(-1, 1)
-                    .expand(-1, llm_grid_h * llm_grid_w)
-                    * video_second_per_grid_t
-                    * tokens_per_second
-                )
-                .long()
-                .flatten()
-            )
+            grid_indices = np.indices((llm_grid_t, llm_grid_h, llm_grid_w))
+            if t_factor != 1.0:
+                grid_indices[0] = (grid_indices[0] * t_factor).astype(np.int64)
 
-            h_index = (
-                torch.arange(llm_grid_h)
-                .view(1, -1, 1)
-                .expand(llm_grid_t, -1, llm_grid_w)
-                .flatten()
-            )
-            w_index = (
-                torch.arange(llm_grid_w)
-                .view(1, 1, -1)
-                .expand(llm_grid_t, llm_grid_h, -1)
-                .flatten()
-            )
-            llm_pos_ids_list.append(
-                torch.stack([t_index, h_index, w_index]) + text_len + st_idx
-            )
-            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
+            llm_pos_ids_list.append(grid_indices.reshape(3, -1) + text_len + st_idx)
+            st = offset + llm_grid_t * llm_grid_h * llm_grid_w
 
         if st < len(input_tokens):
             st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
             text_len = len(input_tokens) - st
             llm_pos_ids_list.append(
-                torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx
+                np.broadcast_to(np.arange(text_len), (3, text_len)) + st_idx
             )
 
-        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
+        llm_positions = np.concatenate(llm_pos_ids_list, axis=1).reshape(3, -1)
         mrope_position_delta = (llm_positions.max() + 1 - len(input_tokens)).item()
 
-        return llm_positions, mrope_position_delta
+        return torch.from_numpy(llm_positions), mrope_position_delta
 
     def _parse_and_validate_image_input(
         self, **kwargs: object
diff --git a/vllm/model_executor/models/parakeet.py b/vllm/model_executor/models/parakeet.py
index 1a3fd5bad0c0..5671ba05c56d 100644
--- a/vllm/model_executor/models/parakeet.py
+++ b/vllm/model_executor/models/parakeet.py
@@ -5,19 +5,24 @@
 """
 
 from collections.abc import Iterable
-from dataclasses import asdict
+from functools import cache
+from typing import Any
 
 import numpy as np
 import torch
 import torch.nn as nn
 from transformers import ParakeetEncoder as HFParakeetEncoder
-from transformers import ParakeetFeatureExtractor, PretrainedConfig
+from transformers import PretrainedConfig
+from transformers.audio_utils import mel_filter_bank
 
+from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import ReLUSquaredActivation
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.transformers_utils.configs.parakeet import ExtractorConfig, ParakeetConfig
 
+logger = init_logger(__name__)
+
 
 class ParakeetProjection(nn.Module):
     def __init__(self, config: ParakeetConfig) -> None:
@@ -94,6 +99,8 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             if target is None:
                 target = buffers_dict.get(target_name)
             if target is None:
+                if self._can_skip_missing_named_param(target_name):
+                    continue
                 raise ValueError(f"Unknown weight: {name}")
             weight_loader = getattr(target, "weight_loader", default_weight_loader)
             with torch.no_grad():
@@ -102,17 +109,146 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         return loaded_params
 
+    def _can_skip_missing_named_param(self, target_name: str) -> bool:
+        if self.config.convolution_bias:
+            return False
+
+        # In transformers v5 (not v4), `convolution_bias=False` is
+        # propagated from parakeet config. If `False`, torch.conv1d will
+        # *skip registering the param*, thus it will be missing in the
+        # module's named params. *If* you happen to also have the bias
+        # tensors in the weights, it will cause a mismatch between the
+        # weights and the params.
+        # This allows us to have `convolution_bias=False` in the sound config,
+        # but still allow for the weights to exist.
+
+        return target_name.endswith(
+            (
+                ".conv.pointwise_conv1.bias",
+                ".conv.depthwise_conv.bias",
+                ".conv.pointwise_conv2.bias",
+            )
+        )
 
-class ParakeetExtractor(ParakeetFeatureExtractor):
+
+EPSILON = 1e-5
+LOG_ZERO_GUARD_VALUE = 2**-24
+
+
+class ParakeetExtractor:
     def __init__(self, config: PretrainedConfig) -> None:
         self.config = ExtractorConfig.from_hf_config(config)
-        super().__init__(**asdict(self.config))
+        """`config` is named *exactly* for `._get_subsampling_output_length` below"""
         self._clip_target_samples = int(
-            round(self.config.clip_duration_s * self.sampling_rate)
+            round(self.config.clip_duration_s * self.config.sampling_rate)
         )
         self._tail_min_samples = int(
-            round(self.config.clip_min_duration_s * self.sampling_rate)
+            round(self.config.clip_min_duration_s * self.config.sampling_rate)
+        )
+
+    @staticmethod
+    @cache
+    def _get_window(win_length: int, device: str) -> torch.Tensor:
+        return torch.hann_window(win_length, periodic=False, device=device)
+
+    @staticmethod
+    @cache
+    def _get_mel_filters(
+        feature_size: int, sampling_rate: int, n_fft: int, device: str
+    ) -> torch.Tensor:
+        filter_bank = mel_filter_bank(
+            num_frequency_bins=n_fft // 2 + 1,
+            num_mel_filters=feature_size,
+            min_frequency=0.0,
+            max_frequency=sampling_rate / 2,
+            sampling_rate=sampling_rate,
+            norm="slaney",
+            mel_scale="slaney",
         )
+        return torch.from_numpy(filter_bank.T).to(device=device, dtype=torch.float32)
+
+    def _torch_extract_fbank_features(self, waveform: torch.Tensor, device: str):
+        # spectrogram
+        device = str(torch.device(device))
+        cfg = self.config
+        window = self._get_window(cfg.win_length, device)
+        stft = torch.stft(
+            waveform,
+            self.config.n_fft,
+            hop_length=cfg.hop_length,
+            win_length=cfg.win_length,
+            window=window,
+            return_complex=True,
+            pad_mode="constant",
+        )
+        mel_filters = self._get_mel_filters(
+            cfg.feature_size, cfg.sampling_rate, cfg.n_fft, device
+        )
+        return self._apply_mel_filters(stft, mel_filters)
+
+    @torch.compile(dynamic=True)
+    def _apply_mel_filters(
+        self, stft_output: torch.Tensor, mel_filters: torch.Tensor
+    ) -> torch.Tensor:
+        magnitudes = stft_output.real.square() + stft_output.imag.square()
+        mel_spec = mel_filters @ magnitudes
+        mel_spec = torch.log(mel_spec + LOG_ZERO_GUARD_VALUE)
+        return mel_spec.permute(0, 2, 1)
+
+    @torch.compile(dynamic=True)
+    def _apply_preemphasis(
+        self, input_features: torch.Tensor, audio_lengths: torch.Tensor
+    ) -> torch.Tensor:
+        timemask = torch.arange(
+            input_features.shape[1], device=input_features.device
+        ).unsqueeze(0) < audio_lengths.unsqueeze(1)
+        input_features = torch.cat(
+            [
+                input_features[:, :1],
+                input_features[:, 1:]
+                - self.config.preemphasis * input_features[:, :-1],
+            ],
+            dim=1,
+        )
+        input_features = input_features.masked_fill(~timemask, 0.0)
+        return input_features
+
+    @torch.compile(dynamic=True)
+    def _normalize_mel_features(
+        self, mel_features: torch.Tensor, audio_lengths: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        features_lengths = torch.floor_divide(
+            audio_lengths + self.config.n_fft // 2 * 2 - self.config.n_fft,
+            self.config.hop_length,
+        )
+        attention_mask = (
+            torch.arange(mel_features.shape[1], device=mel_features.device)[None, :]
+            < features_lengths[:, None]
+        )
+        mask = attention_mask.unsqueeze(-1)
+        lengths = attention_mask.sum(dim=1)
+        mel_features_masked = mel_features * mask
+        mean = (mel_features_masked.sum(dim=1) / lengths.unsqueeze(-1)).unsqueeze(1)
+        variance = ((mel_features_masked - mean) ** 2 * mask).sum(dim=1) / (
+            lengths - 1
+        ).unsqueeze(-1)
+        std = torch.sqrt(variance).unsqueeze(1)
+        return (mel_features - mean) / (std + EPSILON) * mask, attention_mask
+
+    def _pad_raw_speech(
+        self, raw_speech: list[torch.Tensor], max_len: int, device: str
+    ) -> torch.Tensor:
+        output = torch.full(
+            (len(raw_speech), max_len),
+            self.config.padding_value,
+            device=device,
+            dtype=torch.float32,
+        )
+        dsts = [output[i, : raw_speech[i].shape[0]] for i in range(len(raw_speech))]
+        srcs = [s.squeeze(-1) for s in raw_speech]
+        # single kernel horizontal fusion
+        torch._foreach_copy_(dsts, srcs)
+        return output
 
     def _clip_sizes(self, audio_len: int) -> list[int]:
         audio_len = max(audio_len, self._tail_min_samples)
@@ -125,39 +261,75 @@ def _clip_sizes(self, audio_len: int) -> list[int]:
     def audio_token_count(self, audio_len: int) -> int:
         total_tokens = 0
         for clip_size in self._clip_sizes(audio_len):
-            num_frames = clip_size // self.hop_length
+            num_frames = clip_size // self.config.hop_length
             n_tokens = HFParakeetEncoder._get_subsampling_output_length(
                 self, torch.tensor([num_frames], dtype=torch.float)
             )
             total_tokens += int(n_tokens.item())
         return max(1, total_tokens)
 
-    def split_audio_into_clips(self, audio: np.ndarray) -> list[np.ndarray]:
+    def split_audio_into_clips(self, audio: torch.Tensor) -> list[torch.Tensor]:
         assert audio.ndim == 1
         audio_len = int(audio.shape[0])
         clip_sizes = self._clip_sizes(audio_len)
         target_len = sum(clip_sizes)
         if audio_len < target_len:
-            audio = np.pad(audio, (0, target_len - audio_len))
+            audio = torch.nn.functional.pad(audio, (0, target_len - audio_len))
 
-        clips = list[np.ndarray]()
+        clips = list[torch.Tensor]()
         offset = 0
         for clip_size in clip_sizes:
             clips.append(audio[offset : offset + clip_size])
             offset += clip_size
         return clips
 
-    def __call__(self, raw_speech: list[np.ndarray], *args, **kwargs):
-        audio_clips = list[np.ndarray]()
+    def __call__(
+        self,
+        raw_speech: list[np.ndarray],
+        *,
+        device: str = "cpu",
+    ) -> dict[str, Any]:
+        raw_speech = [
+            torch.as_tensor(speech, device=device, dtype=torch.float32)
+            for speech in raw_speech
+        ]
+
+        for i, speech in enumerate(raw_speech):
+            if len(speech.shape) > 1:
+                logger.warning(
+                    "Only mono-channel audio is supported for input to %s. "
+                    "We will take the mean of the channels to convert to mono.",
+                    self.__class__.__name__,
+                )
+                raw_speech[i] = speech.mean(-1)
+
+        audio_clips = list[torch.Tensor]()
         audio_num_clips = list[int]()
         for audio in raw_speech:
             clips = self.split_audio_into_clips(audio)
             audio_clips.extend(clips)
             audio_num_clips.append(len(clips))
+        raw_speech = audio_clips
 
-        outputs = super().__call__(audio_clips, *args, **kwargs)
-        outputs["audio_num_clips"] = audio_num_clips
-        return outputs
+        audio_lengths = torch.tensor(
+            [len(speech) for speech in raw_speech], dtype=torch.long, device=device
+        )
+
+        max_length = max(len(speech) for speech in raw_speech)
+        input_features = self._pad_raw_speech(raw_speech, max_length, device)
+        input_features = self._apply_preemphasis(input_features, audio_lengths)
+        input_features = self._torch_extract_fbank_features(input_features, device)
+        input_features, attention_mask = self._normalize_mel_features(
+            input_features, audio_lengths
+        )
+
+        return {
+            "input_audio_features": input_features,
+            "feature_attention_mask": attention_mask,
+            "audio_num_clips": audio_num_clips,
+        }
 
-    def audio_length(self, audio_tokens: int) -> int:
-        return int(audio_tokens * self.config.subsampling_factor * self.hop_length)
+    @staticmethod
+    def audio_length(raw_config: PretrainedConfig, audio_tokens: int) -> int:
+        config = ExtractorConfig.from_hf_config(raw_config)
+        return int(audio_tokens * config.subsampling_factor * config.hop_length)
diff --git a/vllm/model_executor/models/param2moe.py b/vllm/model_executor/models/param2moe.py
new file mode 100644
index 000000000000..e8ea2dbc0e60
--- /dev/null
+++ b/vllm/model_executor/models/param2moe.py
@@ -0,0 +1,883 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# Copyright 2026 BharatGen AI team. All rights reserved.
+#
+# This code has been modified to accommodate Param2MoE's GQA-based MoE architecture.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from collections.abc import Iterable, Iterator
+from itertools import islice
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed import (
+    get_pp_group,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.model_executor.layers.activation import SiluAndMul
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.sequence import IntermediateTensors
+
+from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP
+from .utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+
+
+def _is_expert_bias_name(name: str) -> bool:
+    """True when the weight is the MoE router's per-expert score bias."""
+    return name.endswith(".mlp.gate.expert_bias")
+
+
+def _zero_mean_tensor(t: torch.Tensor) -> torch.Tensor:
+    if t.numel() == 0:
+        return t
+    return t - t.mean()
+
+
+def _rename_and_normalize_weights(
+    weights: Iterable[tuple[str, torch.Tensor]],
+) -> Iterator[tuple[str, torch.Tensor]]:
+    """
+    Translate HuggingFace Param2MoE weight names to vLLM internal names
+    and zero-mean the expert-bias tensor so the router stays balanced.
+
+    Mapping table (HF → vLLM):
+      model.word_embeddings.*              → model.embed_tokens.*
+      *.attention.query_key_value.*        → *.self_attn.qkv_proj.*
+      *.attention.dense.*                  → *.self_attn.o_proj.*
+      *.attention.query_layernorm.*        → *.self_attn.q_layernorm.*
+      *.attention.key_layernorm.*          → *.self_attn.k_layernorm.*
+      *.mlp.gate.expert_bias               → *.mlp.gate.e_score_correction_bias
+        (also zero-meant for load balance)
+    """
+    for name, w in weights:
+        # Embedding table
+        name = name.replace("model.word_embeddings.", "model.embed_tokens.")
+        # Fused QKV projection  (HF: query_key_value → vLLM: qkv_proj)
+        name = name.replace(".attention.query_key_value.", ".self_attn.qkv_proj.")
+        # Output projection  (HF: dense → vLLM: o_proj)
+        name = name.replace(".attention.dense.", ".self_attn.o_proj.")
+        # Per-head query norm
+        name = name.replace(".attention.query_layernorm.", ".self_attn.q_layernorm.")
+        # Per-head key norm
+        name = name.replace(".attention.key_layernorm.", ".self_attn.k_layernorm.")
+        # Catch any remaining .attention. → .self_attn. prefixes
+        # (e.g. future bias params on the projection layers)
+        name = name.replace(".attention.", ".self_attn.")
+
+        # Expert-score bias: rename + zero-mean
+        if name.endswith(".mlp.gate.expert_bias"):
+            name = name.replace(
+                ".mlp.gate.expert_bias",
+                ".mlp.gate.e_score_correction_bias",
+            )
+            w = _zero_mean_tensor(w)
+
+        yield name, w
+
+
+class Param2MoEAttention(nn.Module):
+    """
+    Grouped-Query Attention (GQA) for Param2MoE.
+
+    Notable differences from a vanilla GQA layer:
+      * The checkpoint fuses Q, K, V into a single ``query_key_value`` weight.
+        vLLM receives it already renamed to ``qkv_proj`` by the weight-name
+        translator and splits it during ``load_weights``.
+      * Optional per-head RMS norms on Q and K (``use_qk_norm=True``).
+    """
+
+    def __init__(
+        self,
+        config,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.num_kv_heads = config.num_key_value_heads
+        self.head_dim = config.head_dim or (self.hidden_size // self.num_heads)
+        self.use_qk_norm: bool = getattr(config, "use_qk_norm", False)
+
+        tp_size = get_tensor_model_parallel_world_size()
+        assert self.num_heads % tp_size == 0, (
+            f"num_attention_heads ({self.num_heads}) must be divisible "
+            f"by tensor-parallel world size ({tp_size})."
+        )
+        assert self.num_kv_heads % tp_size == 0, (
+            f"num_key_value_heads ({self.num_kv_heads}) must be divisible "
+            f"by tensor-parallel world size ({tp_size})."
+        )
+        self.num_local_heads = self.num_heads // tp_size
+        self.num_local_kv_heads = self.num_kv_heads // tp_size
+
+        # Sizes after TP split (used in forward to split qkv output)
+        self.q_size_local = self.num_local_heads * self.head_dim
+        self.kv_size_local = self.num_local_kv_heads * self.head_dim
+
+        self.scaling = self.head_dim**-0.5
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size=self.hidden_size,
+            head_size=self.head_dim,
+            total_num_heads=self.num_heads,
+            total_num_kv_heads=self.num_kv_heads,
+            bias=getattr(config, "use_qkv_bias", False),
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+
+        self.o_proj = RowParallelLinear(
+            input_size=self.num_heads * self.head_dim,
+            output_size=self.hidden_size,
+            bias=getattr(config, "use_bias", False),
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        if self.use_qk_norm:
+            self.q_layernorm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+            self.k_layernorm = RMSNorm(self.head_dim, eps=config.rms_norm_eps)
+
+        # `partial_rotary_factor` defaults to 1.0 (full RoPE) if not in config
+        partial_rotary_factor: float = getattr(config, "partial_rotary_factor", 1.0)
+        rope_dim = int(self.head_dim * partial_rotary_factor)
+
+        rope_parameters: dict = {
+            "rope_type": "default",
+            "base": config.rope_theta,
+        }
+        if config.rope_scaling is not None:
+            rope_parameters.update(config.rope_scaling)
+            # Normalise key: some checkpoints use "type", vLLM wants "rope_type"
+            if "type" in rope_parameters and "rope_type" not in rope_parameters:
+                rope_parameters["rope_type"] = rope_parameters.pop("type")
+
+        self.rotary_emb = get_rope(
+            rope_dim,
+            max_position=config.max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,
+        )
+
+        self.attn = Attention(
+            num_heads=self.num_local_heads,
+            head_size=self.head_dim,
+            scale=self.scaling,
+            num_kv_heads=self.num_local_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split(
+            [self.q_size_local, self.kv_size_local, self.kv_size_local],
+            dim=-1,
+        )
+        q = q.contiguous()
+        k = k.contiguous()
+        v = v.contiguous()
+
+        if self.use_qk_norm:
+            T = q.shape[0]
+            q = self.q_layernorm(q.view(T, self.num_local_heads, self.head_dim)).view(
+                T, self.q_size_local
+            )
+            k = self.k_layernorm(
+                k.view(T, self.num_local_kv_heads, self.head_dim)
+            ).view(T, self.kv_size_local)
+
+        q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class Param2MoEMLP(nn.Module):
+    """SwiGLU feed-forward block used for dense layers."""
+
+    def __init__(
+        self,
+        intermediate_size: int,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = True,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        self.gate_up_proj = MergedColumnParallelLinear(
+            input_size=config.hidden_size,
+            output_sizes=[intermediate_size, intermediate_size],
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            input_size=intermediate_size,
+            output_size=config.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            prefix=f"{prefix}.down_proj",
+        )
+        self.act_fn = SiluAndMul()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+class Param2MoEMoEBlock(nn.Module):
+    """
+    Mixture-of-Experts block for Param2MoE.
+
+    Routing:
+      * Sigmoid scoring  (config.score_function = "sigmoid")
+      * Grouped top-k   (n_group, topk_group)
+      * Per-expert bias  (gate.expert_bias → e_score_correction_bias)
+      * routed_scaling_factor normalisation
+
+    One set of shared (always-active) experts is added on top.
+    """
+
+    def __init__(
+        self,
+        config,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        self.config = config
+        self.tp_size = get_tensor_model_parallel_world_size()
+        self.hidden_size = config.hidden_size
+
+        self.num_experts: int = config.num_experts
+        self.top_k: int = config.num_experts_per_tok
+        self.routed_scaling_factor: float = getattr(
+            config, "routed_scaling_factor", 1.0
+        )
+
+        self.n_group: int | None = getattr(config, "n_group", None)
+        self.topk_group: int | None = getattr(config, "topk_group", None)
+        self.use_grouped_topk: bool = (
+            self.n_group is not None and self.topk_group is not None
+        )
+
+        self.norm_expert_prob: bool = getattr(config, "norm_topk_prob", True)
+        self.score_function: str = getattr(config, "score_function", "sigmoid")
+
+        self.gate = nn.Linear(
+            self.hidden_size,
+            self.num_experts,
+            bias=False,
+        )
+
+        if getattr(config, "moe_router_enable_expert_bias", True):
+            self.gate.e_score_correction_bias = nn.Parameter(
+                torch.zeros(self.num_experts, dtype=torch.float32)
+            )
+        else:
+            self.gate.e_score_correction_bias = None  # type: ignore[assignment]
+
+        self.num_shared_experts: int = getattr(config, "num_shared_experts", 1)
+        if self.num_shared_experts > 0:
+            # If moe_shared_expert_intermediate_size is present in the config
+            # it already encodes the TOTAL intermediate size across all shared
+            # experts (i.e. it equals moe_intermediate_size * num_shared_experts).
+            # Do NOT multiply again.  Fall back to computing the product only
+            # when the dedicated field is absent.
+            if (
+                hasattr(config, "moe_shared_expert_intermediate_size")
+                and config.moe_shared_expert_intermediate_size is not None
+            ):
+                shared_int: int = config.moe_shared_expert_intermediate_size
+            else:
+                shared_int = config.moe_intermediate_size * self.num_shared_experts
+            self.shared_experts = Param2MoEMLP(
+                intermediate_size=shared_int,
+                config=config,
+                quant_config=quant_config,
+                reduce_results=False,
+                prefix=f"{prefix}.shared_experts",
+            )
+        else:
+            self.shared_experts = None  # type: ignore[assignment]
+
+        self.experts = FusedMoE(
+            shared_experts=self.shared_experts,
+            num_experts=self.num_experts,
+            top_k=self.top_k,
+            hidden_size=self.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            renormalize=self.norm_expert_prob,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            scoring_func=self.score_function,
+            e_score_correction_bias=self.gate.e_score_correction_bias,
+            num_expert_group=self.n_group,
+            topk_group=self.topk_group,
+            use_grouped_topk=self.use_grouped_topk,
+            routed_scaling_factor=self.routed_scaling_factor,
+        )
+
+    def maybe_get_fused_moe(self) -> FusedMoE:
+        return self.experts
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        num_tokens, hidden_dim = hidden_states.shape
+        hidden_states = hidden_states.view(-1, hidden_dim)
+
+        # Router: both input and weight must be float32 for numerical
+        # stability (mirrors the original Param2MoEGate behaviour).
+        # The gate nn.Linear weight lives in the model dtype (bfloat16),
+        # so we must cast both explicitly via F.linear instead of calling
+        # self.gate() which would hit a dtype mismatch.
+        router_logits = F.linear(
+            hidden_states.float(),
+            self.gate.weight.float(),
+        ).to(hidden_states.dtype)
+
+        expert_output = self.experts(
+            hidden_states=hidden_states,
+            router_logits=router_logits,
+        )
+
+        return expert_output.view(num_tokens, hidden_dim)
+
+
+class Param2MoEDecoderLayer(nn.Module):
+    """
+    Single transformer decoder block.
+
+    Dense for the first ``first_k_dense_replace`` layers; MoE thereafter.
+    """
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+
+        hidden_size = config.hidden_size
+        # Derive the layer index from the prefix (e.g. "model.layers.3")
+        layer_idx = int(prefix.split(".")[-1])
+
+        self.input_layernorm = RMSNorm(hidden_size, eps=config.rms_norm_eps)
+        self.self_attn = Param2MoEAttention(
+            config=config,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.self_attn",
+        )
+        self.post_attention_layernorm = RMSNorm(hidden_size, eps=config.rms_norm_eps)
+
+        first_k_dense: int = getattr(config, "first_k_dense_replace", 1)
+        is_moe_layer = config.num_experts is not None and layer_idx >= first_k_dense
+
+        if is_moe_layer:
+            self.mlp = Param2MoEMoEBlock(
+                config=config,
+                quant_config=quant_config,
+                prefix=f"{prefix}.mlp",
+            )
+        else:
+            self.mlp = Param2MoEMLP(  # type: ignore[assignment]
+                intermediate_size=config.intermediate_size,
+                config=config,
+                quant_config=quant_config,
+                reduce_results=True,
+                prefix=f"{prefix}.mlp",
+            )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        positions: torch.Tensor,
+        residual: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # Pre-norm + attention
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+
+        # Pre-norm + MLP
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+        hidden_states = self.mlp(hidden_states)
+        return hidden_states, residual
+
+
+class Param2MoEModel(nn.Module):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+
+        self.config = config
+        self.vocab_size = config.vocab_size
+        self.embed_dim = config.hidden_size
+        self.tie_word_embeddings: bool = getattr(config, "tie_word_embeddings", False)
+
+        # Embedding  (HF name: word_embeddings → vLLM name: embed_tokens)
+        if get_pp_group().is_first_rank or (
+            self.tie_word_embeddings and get_pp_group().is_last_rank
+        ):
+            self.embed_tokens = VocabParallelEmbedding(
+                self.vocab_size,
+                self.embed_dim,
+                quant_config=quant_config,
+                prefix=f"{prefix}.embed_tokens",
+            )
+        else:
+            self.embed_tokens = PPMissingLayer()
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: Param2MoEDecoderLayer(
+                vllm_config=vllm_config,
+                prefix=prefix,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )
+
+        if get_pp_group().is_last_rank:
+            self.norm = RMSNorm(self.embed_dim, eps=config.rms_norm_eps)
+        else:
+            self.norm = PPMissingLayer()
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            hidden_states, residual = layer(hidden_states, positions, residual)
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+
+        if residual is None:
+            hidden_states = self.norm(hidden_states)
+        else:
+            hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
+    def load_weights(
+        self,
+        weights: Iterable[tuple[str, torch.Tensor]],
+    ) -> set[str]:
+        """
+        Custom weight loader for the inner Param2MoEModel.
+
+        Receives weights that have already been renamed/normalised by the
+        outer model and whose ``model.`` prefix has been stripped by
+        ``AutoWeightsLoader``.  Handles:
+          1. Fused QKV split (query_key_value → qkv_proj q/k/v shards).
+          2. gate_proj + up_proj → gate_up_proj stacking (dense + shared-exp).
+          3. Routed-expert weights via the fused-MoE mapping.
+          4. All remaining weights via their default loader.
+        """
+        config = self.config
+        num_heads: int = config.num_attention_heads
+        num_kv_heads: int = config.num_key_value_heads
+        head_dim: int = config.head_dim or (config.hidden_size // num_heads)
+        q_split = num_heads * head_dim
+        kv_split = num_kv_heads * head_dim
+
+        stacked_params_mapping = [
+            # (vllm_param_name, ckpt_weight_name, shard_id)
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
+        loaded_params: set[str] = set()
+        expert_params_mapping = self.get_expert_mapping()
+
+        for name, loaded_weight in weights:
+            # ------------------------------------------------------------------
+            # 1. Fused QKV: split into q / k / v shards for QKVParallelLinear
+            # ------------------------------------------------------------------
+            if name.endswith(".self_attn.qkv_proj.weight"):
+                if name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                q_w = loaded_weight[:q_split, :]
+                k_w = loaded_weight[q_split : q_split + kv_split, :]
+                v_w = loaded_weight[q_split + kv_split :, :]
+                weight_loader(param, q_w, "q")
+                weight_loader(param, k_w, "k")
+                weight_loader(param, v_w, "v")
+                loaded_params.add(name)
+                continue
+
+            # ------------------------------------------------------------------
+            # 2. gate_proj / up_proj → gate_up_proj (dense MLP + shared-exp.)
+            # ------------------------------------------------------------------
+            matched_stacked = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if "mlp.experts" in name:  # routed experts handled below
+                    continue
+                new_name = name.replace(weight_name, param_name)
+                if new_name.endswith(".bias") and new_name not in params_dict:
+                    continue
+                if new_name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(new_name, self):
+                    continue
+
+                param = params_dict[new_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(new_name)
+                matched_stacked = True
+                break
+
+            if matched_stacked:
+                continue
+
+            # ------------------------------------------------------------------
+            # 3. Routed expert weights → fused-MoE kernel layout
+            # ------------------------------------------------------------------
+            matched_expert = False
+            for (
+                param_name,
+                weight_name,
+                expert_id,
+                shard_id,
+            ) in expert_params_mapping:
+                if weight_name not in name:
+                    continue
+                new_name = name.replace(weight_name, param_name)
+                if is_pp_missing_parameter(new_name, self):
+                    continue
+                if new_name not in params_dict:
+                    continue
+
+                param = params_dict[new_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(
+                    param,
+                    loaded_weight,
+                    name,
+                    shard_id=shard_id,
+                    expert_id=expert_id,
+                )
+                loaded_params.add(new_name)
+                matched_expert = True
+                break
+
+            if matched_expert:
+                continue
+
+            # ------------------------------------------------------------------
+            # 4. All other weights: direct load (layernorms, embed_tokens, …)
+            # ------------------------------------------------------------------
+            if name.endswith(".bias") and name not in params_dict:
+                continue
+            if name not in params_dict:
+                continue
+            if is_pp_missing_parameter(name, self):
+                continue
+
+            param = params_dict[name]
+            weight_loader = getattr(param, "weight_loader", default_weight_loader)
+            try:
+                weight_loader(param, loaded_weight)
+            except Exception as e:
+                raise RuntimeError(
+                    f"[param2moe] Failed to load weight '{name}' "
+                    f"with shape {tuple(loaded_weight.shape)} "
+                    f"into param type {type(param).__name__}: {e}"
+                ) from e
+            loaded_params.add(name)
+
+        return loaded_params
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return fused_moe_make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="gate_proj",
+            ckpt_down_proj_name="down_proj",
+            ckpt_up_proj_name="up_proj",
+            num_experts=self.config.num_experts,
+        )
+
+
+class Param2MoEMixtureOfExperts(MixtureOfExperts):
+    """Implements the vLLM MixtureOfExperts protocol for Param2MoE."""
+
+    expert_weights: list[torch.Tensor]
+
+    def extract_moe_parameters(self, example_moe: Param2MoEMoEBlock | None) -> None:
+        if example_moe is None:
+            raise RuntimeError(
+                "No Param2MoEMoEBlock found in model.layers. "
+                "Check first_k_dense_replace and num_experts in config."
+            )
+        self.num_logical_experts = example_moe.num_experts
+        self.num_routed_experts = example_moe.num_experts
+        self.num_shared_experts = example_moe.num_shared_experts
+
+        self.num_physical_experts = self.num_logical_experts
+        self.num_local_physical_experts = self.num_logical_experts
+        self.num_redundant_experts = 0
+
+    def update_physical_experts_metadata(
+        self,
+        num_physical_experts: int,
+        num_local_physical_experts: int,
+    ) -> None:
+        self.num_physical_experts = num_physical_experts
+        self.num_local_physical_experts = num_local_physical_experts
+        self.num_redundant_experts = num_physical_experts - self.num_logical_experts
+
+        for moe in self.moe_mlp_layers:
+            moe.n_physical_experts = num_physical_experts
+            moe.n_local_physical_experts = num_local_physical_experts
+            moe.n_redundant_experts = self.num_redundant_experts
+
+            fused = moe.experts
+            if hasattr(fused, "n_local_physical_experts"):
+                fused.n_local_physical_experts = num_local_physical_experts
+            if hasattr(fused, "n_physical_experts"):
+                fused.n_physical_experts = num_physical_experts
+            if hasattr(fused, "n_redundant_experts"):
+                fused.n_redundant_experts = self.num_redundant_experts
+            if hasattr(fused, "update_expert_map"):
+                fused.update_expert_map()
+
+    def set_eplb_state(
+        self,
+        expert_load_view: torch.Tensor,
+        logical_to_physical_map: torch.Tensor,
+        logical_replica_count: torch.Tensor,
+    ) -> None:
+        self.expert_weights.clear()
+        for layer_idx, layer in enumerate(self.moe_layers):
+            if hasattr(layer, "get_expert_weights"):
+                self.expert_weights.append(layer.get_expert_weights())
+            if hasattr(layer, "set_eplb_state"):
+                layer.set_eplb_state(
+                    moe_layer_idx=layer_idx,
+                    expert_load_view=expert_load_view,
+                    logical_to_physical_map=logical_to_physical_map,
+                    logical_replica_count=logical_replica_count,
+                )
+
+
+class Param2MoEForCausalLM(
+    nn.Module, SupportsPP, SupportsLoRA, Param2MoEMixtureOfExperts
+):
+    """
+    vLLM-native Param2MoE CausalLM.
+
+    Uses Grouped-Query Attention (GQA) with a Sigmoid-scored,
+    grouped-topk Mixture-of-Experts MLP.
+    """
+
+    # LoRA packed-module mapping. The fused gate_up_proj handles
+    # gate_proj and up_proj from the checkpoint.
+    packed_modules_mapping = {
+        "qkv_proj": ["query_key_value"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
+    # Modules eligible for LoRA adaptation.
+    supported_lora_modules = [
+        "qkv_proj",
+        "o_proj",
+        "gate_up_proj",
+        "down_proj",
+    ]
+
+    # Embedding layers and their weight-tying counterparts.
+    embedding_modules = {
+        "embed_tokens": "input_embeddings",
+        "lm_head": "output_embeddings",
+    }
+
+    # Modules that need vocab-size padding for LoRA.
+    embedding_padding_modules = ["lm_head"]
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+
+        self.config = config
+        self.quant_config = quant_config
+
+        self.model = Param2MoEModel(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+        )
+
+        self.tie_word_embeddings: bool = getattr(config, "tie_word_embeddings", False)
+        if get_pp_group().is_last_rank:
+            if self.tie_word_embeddings:
+                self.lm_head = self.model.embed_tokens
+            else:
+                self.lm_head = ParallelLMHead(
+                    config.vocab_size,
+                    config.hidden_size,
+                    quant_config=quant_config,
+                    prefix=maybe_prefix(prefix, "lm_head"),
+                )
+            self.logits_processor = LogitsProcessor(config.vocab_size)
+        else:
+            self.lm_head = PPMissingLayer()
+            self.logits_processor = None  # type: ignore[assignment]
+
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+        self.expert_weights: list[torch.Tensor] = []
+        self.num_moe_layers: int = 0
+        self.moe_layers: list = []
+        self.moe_mlp_layers: list = []
+
+        example_moe: Param2MoEMoEBlock | None = None
+        for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+            if isinstance(layer.mlp, Param2MoEMoEBlock):
+                example_moe = layer.mlp
+                self.moe_mlp_layers.append(layer.mlp)
+                self.moe_layers.append(layer.mlp.experts)
+                self.num_moe_layers += 1
+
+        if self.config.num_experts is not None:
+            self.extract_moe_parameters(example_moe)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        return self.model(
+            input_ids=input_ids,
+            positions=positions,
+            intermediate_tensors=intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        if not get_pp_group().is_last_rank:
+            return None
+        return self.logits_processor(self.lm_head, hidden_states)
+
+    def load_weights(
+        self,
+        weights: Iterable[tuple[str, torch.Tensor]],
+    ) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(_rename_and_normalize_weights(weights))
diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py
index 2db95b857563..6163b809670c 100644
--- a/vllm/model_executor/models/phi4mm.py
+++ b/vllm/model_executor/models/phi4mm.py
@@ -1034,7 +1034,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.vision_encoder = Phi4MMImageEncoder(
                 config,
                 quant_config,
-                prefix="model.vision_embed_tokens",
+                prefix=maybe_prefix(prefix, "model.vision_embed_tokens"),
                 model_dir=config._name_or_path,
             )
 
diff --git a/vllm/model_executor/models/phi4mm_audio.py b/vllm/model_executor/models/phi4mm_audio.py
index c3b09ed590dd..cd975227054d 100644
--- a/vllm/model_executor/models/phi4mm_audio.py
+++ b/vllm/model_executor/models/phi4mm_audio.py
@@ -586,7 +586,9 @@ def forward_embeddings(
             seq_len, batch_size, self.chunk_size, self.left_chunk
         )
         device = xs_pad.device
-        enc_streaming_mask = enc_streaming_mask.to(device)
+        enc_streaming_mask = enc_streaming_mask.contiguous().to(
+            device, non_blocking=True
+        )
         xs_pad = xs_pad.to(device)
 
         input_tensor = xs_pad
@@ -605,7 +607,9 @@ def forward_embeddings(
                 seq_len, batch_size, chunk_size_nc, left_chunk_nc
             )
             if device.type != "cpu":
-                enc_streaming_mask_nc = enc_streaming_mask_nc.to(device)
+                enc_streaming_mask_nc = enc_streaming_mask_nc.contiguous().to(
+                    device, non_blocking=True
+                )
             if masks is not None:
                 hs_mask_nc = masks & enc_streaming_mask_nc
             else:
@@ -917,7 +921,9 @@ def calculate_hs_mask(
         enc_streaming_mask = self._streaming_mask(
             max_audio_length, batch_size, self.chunk_size, self.left_chunk
         )
-        enc_streaming_mask = enc_streaming_mask.to(device)
+        enc_streaming_mask = enc_streaming_mask.contiguous().to(
+            device, non_blocking=True
+        )
         if mask is None:
             return enc_streaming_mask
 
diff --git a/vllm/model_executor/models/phi4siglip.py b/vllm/model_executor/models/phi4siglip.py
new file mode 100644
index 000000000000..d71a572f6ad8
--- /dev/null
+++ b/vllm/model_executor/models/phi4siglip.py
@@ -0,0 +1,429 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""vLLM support for microsoft/Phi-4-reasoning-vision-15B.
+
+Architecture: Siglip2 vision tower + MLP projector + Phi3 language model.
+"""
+
+import math
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Annotated, Any, Literal
+
+import torch
+import torch.nn as nn
+from transformers import BatchFeature, PretrainedConfig, Siglip2VisionConfig
+
+from vllm.config import VllmConfig
+from vllm.config.multimodal import BaseDummyOptions
+from vllm.inputs import MultiModalDataDict
+from vllm.logger import init_logger
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.multimodal.inputs import (
+    MultiModalFieldConfig,
+    MultiModalKwargsItems,
+)
+from vllm.multimodal.parse import (
+    ImageSize,
+    MultiModalDataItems,
+)
+from vllm.multimodal.processing import (
+    BaseDummyInputsBuilder,
+    PromptReplacement,
+    PromptUpdate,
+)
+from vllm.multimodal.processing.processor import (
+    BaseMultiModalProcessor,
+    BaseProcessingInfo,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.utils.tensor_schema import TensorSchema, TensorShape
+
+from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
+from .lfm2_siglip2 import Siglip2Model
+from .llava import LlavaMultiModalProjector
+from .utils import (
+    AutoWeightsLoader,
+    WeightsMapper,
+    init_vllm_registered_model,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+IMAGE_TOKEN_INDEX = -200
+DEFAULT_IMAGE_TOKEN = "<image>"
+
+# The HF processor replaces "<image>" with IMAGE_TOKEN_INDEX (-200) in input_ids.
+# Negative token IDs cause OverflowError during decoding, so we remap to a real
+# in-vocabulary token.  The Phi-4-reasoning-vision tokenizer ships with reserved
+# dummy tokens (<|dummy_0|> … <|dummy_83|>); we reuse the first one as the
+# image placeholder.  This mirrors how Phi-3-vision uses its dedicated <|image|>
+# token (ID 32044).
+_IMAGE_TOKEN_ID = 100256  # <|dummy_0|> in the Phi-4 tokenizer
+
+
+# ---------------------------------------------------------------------------
+# Processing
+# ---------------------------------------------------------------------------
+
+
+class Phi4SiglipProcessingInfo(BaseProcessingInfo):
+    def get_supported_mm_limits(self) -> Mapping[str, int | None]:
+        return {"image": None}
+
+    def _get_vision_config(self) -> dict:
+        return self.get_hf_config().vision_config  # type: ignore[attr-defined]
+
+    def _get_patch_size(self) -> int:
+        vc = self._get_vision_config()
+        if isinstance(vc, dict):
+            return vc.get("patch_size", 16)
+        return getattr(vc, "patch_size", 16)
+
+    def _get_max_num_patches(self) -> int:
+        return getattr(self.get_hf_config(), "max_num_patches", 3600)
+
+    def _get_min_num_patches(self) -> int:
+        return getattr(self.get_hf_config(), "min_num_patches", 256)
+
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        patch_size = self._get_patch_size()
+        min_patches = self._get_min_num_patches()
+        max_patches = self._get_max_num_patches()
+
+        num_patches_h = image_height // patch_size
+        num_patches_w = image_width // patch_size
+        num_patches = max(num_patches_h * num_patches_w, 1)
+        num_patches = max(min(num_patches, max_patches), min_patches)
+        return num_patches
+
+    def get_image_size_with_most_features(self) -> ImageSize:
+        patch_size = self._get_patch_size()
+        max_patches = self._get_max_num_patches()
+        side = int(math.sqrt(max_patches)) * patch_size
+        return ImageSize(width=side, height=side)
+
+    def get_mm_max_tokens_per_item(
+        self, seq_len: int, mm_counts: Mapping[str, int]
+    ) -> Mapping[str, int]:
+        return {"image": self._get_max_num_patches()}
+
+
+class Phi4SiglipDummyInputsBuilder(
+    BaseDummyInputsBuilder[Phi4SiglipProcessingInfo],
+):
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+        return DEFAULT_IMAGE_TOKEN * num_images
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+        mm_options: Mapping[str, BaseDummyOptions],
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+        size = self.info.get_image_size_with_most_features()
+        return {
+            "image": self._get_dummy_images(
+                width=size.width,
+                height=size.height,
+                num_images=num_images,
+                overrides=mm_options.get("image"),
+            ),
+        }
+
+
+class Phi4SiglipMultiModalProcessor(
+    BaseMultiModalProcessor[Phi4SiglipProcessingInfo],
+):
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        processed = super()._call_hf_processor(
+            prompt=prompt,
+            mm_data=mm_data,
+            mm_kwargs=mm_kwargs,
+            tok_kwargs=tok_kwargs,
+        )
+
+        # The HF processor's tokenizer_image_token() replaces the "<image>"
+        # string with IMAGE_TOKEN_INDEX (-200) in input_ids.  This breaks
+        # vLLM's prompt-replacement pipeline which needs to find "<image>"
+        # as normal sub-tokens.  Re-tokenize with the plain tokenizer so
+        # that "<image>" stays as sub-tokens and can be located by
+        # PromptReplacement.
+        # NOTE: tokenizer.__call__() (not .encode()) must be used so that
+        # added/special tokens like <|user|>, <|end|> are kept as single IDs.
+        tokenizer = self.info.get_tokenizer()
+        new_ids = tokenizer(prompt).input_ids
+        processed["input_ids"] = torch.tensor([new_ids])
+
+        return processed
+
+    def _hf_processor_applies_updates(
+        self,
+        prompt_text: str,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
+    ) -> bool:
+        # The HF processor replaces "<image>" with a single -200 placeholder
+        # but does NOT expand it into N vision-encoder tokens.  Since we also
+        # re-tokenize the prompt (see _call_hf_processor), prompt updates are
+        # never applied by the HF processor — vLLM handles the expansion via
+        # _apply_prompt_updates.
+        return False
+
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(
+            pixel_values=MultiModalFieldConfig.batched("image"),
+            pixel_attention_mask=MultiModalFieldConfig.batched("image"),
+            spatial_shapes=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
+        )
+
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, Any],
+        out_mm_kwargs: MultiModalKwargsItems,
+    ) -> Sequence[PromptUpdate]:
+        def get_replacement(item_idx: int):
+            # Read the actual patch grid from the NaFlex processor's
+            # spatial_shapes output (same pattern as LFM2-VL).  This avoids
+            # predicting from raw image dimensions, which can diverge from
+            # the NaFlex resize/tile logic.
+            out_item = out_mm_kwargs["image"][item_idx]
+            spatial_shapes = out_item["spatial_shapes"].data
+            assert isinstance(spatial_shapes, torch.Tensor)
+            num_tokens = int(spatial_shapes.prod().item())
+            return [_IMAGE_TOKEN_ID] * num_tokens
+
+        return [
+            PromptReplacement(
+                modality="image",
+                target=DEFAULT_IMAGE_TOKEN,
+                replacement=get_replacement,
+            ),
+        ]
+
+
+# ---------------------------------------------------------------------------
+# Input schemas
+# ---------------------------------------------------------------------------
+
+
+class Phi4SiglipImagePixelInputs(TensorSchema):
+    """
+    Dimensions:
+        - bn: Batch size * number of images
+        - d: Max number of patches (padded across images in the batch)
+        - fd: Features per patch (patch_size * patch_size * channels)
+    """
+
+    type: Literal["pixel_values"] = "pixel_values"
+    pixel_values: Annotated[torch.Tensor, TensorShape("bn", "d", "fd")]
+    pixel_attention_mask: Annotated[torch.Tensor, TensorShape("bn", "d")]
+    spatial_shapes: Annotated[torch.Tensor, TensorShape("bn", 2)]
+
+
+# ---------------------------------------------------------------------------
+# Model
+# ---------------------------------------------------------------------------
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    Phi4SiglipMultiModalProcessor,
+    info=Phi4SiglipProcessingInfo,
+    dummy_inputs=Phi4SiglipDummyInputsBuilder,
+)
+class Phi4ForCausalLMV(nn.Module, SupportsMultiModal, SupportsPP):
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "model.vision_tower.vision_tower.vision_model.head.": None,
+            "model.vision_tower.vision_tower.": "vision_tower.",
+            "model.mm_projector.0.": "multi_modal_projector.linear_1.",
+            "model.mm_projector.2.": "multi_modal_projector.linear_2.",
+            "lm_head.": "language_model.lm_head.",
+            "model.": "language_model.model.",
+        },
+    )
+
+    @classmethod
+    def get_placeholder_str(cls, modality: str, i: int) -> str | None:
+        if modality.startswith("image"):
+            return DEFAULT_IMAGE_TOKEN
+        raise ValueError("Only image modality is supported")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
+        super().__init__()
+
+        config: PretrainedConfig = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+
+        vision_config_dict: dict = getattr(config, "vision_config", {})
+        if isinstance(vision_config_dict, dict):
+            if "patch_size" not in vision_config_dict:
+                vision_config_dict["patch_size"] = 16
+            siglip2_config = Siglip2VisionConfig(**vision_config_dict)
+        else:
+            siglip2_config = vision_config_dict
+
+        vision_hidden_size: int = config.mm_hidden_size  # type: ignore[attr-defined]
+        text_hidden_size: int = config.hidden_size  # type: ignore[attr-defined]
+
+        with self._mark_tower_model(vllm_config, "image"):
+            layer_idx = -2
+            num_hidden_layers = siglip2_config.num_hidden_layers + layer_idx + 1
+
+            self.vision_tower = Siglip2Model(
+                siglip2_config,
+                quant_config=quant_config,
+                num_hidden_layers_override=num_hidden_layers,
+                require_post_norm=False,
+                prefix=maybe_prefix(prefix, "vision_tower"),
+            )
+            self.multi_modal_projector = LlavaMultiModalProjector(
+                vision_hidden_size=vision_hidden_size,
+                text_hidden_size=text_hidden_size,
+                projector_hidden_act="gelu",
+                multimodal_projector_bias=True,
+                quant_config=quant_config,
+                prefix=maybe_prefix(prefix, "multi_modal_projector"),
+            )
+
+        with self._mark_language_model(vllm_config):
+            self.language_model = init_vllm_registered_model(
+                vllm_config=vllm_config,
+                hf_config=config,
+                prefix=maybe_prefix(prefix, "language_model"),
+                architectures=["Phi3ForCausalLM"],
+            )
+
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
+        self.configure_mm_token_handling(
+            vocab_size=config.vocab_size,  # type: ignore[attr-defined]
+            mm_token_ids=[_IMAGE_TOKEN_ID],
+        )
+
+    def _packed_from_padded(
+        self,
+        pixel_values: torch.Tensor,
+        pixel_attention_mask: torch.Tensor,
+        spatial_shapes: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Convert padded NaFlex tensors to packed format for Siglip2Model."""
+        valid_counts = pixel_attention_mask.sum(dim=1).to(torch.int32)
+        pixel_values_packed = pixel_values[pixel_attention_mask.bool()]
+        cu_seqlens = torch.zeros(
+            len(valid_counts) + 1,
+            dtype=torch.int32,
+            device=pixel_values.device,
+        )
+        cu_seqlens[1:] = valid_counts.cumsum(0)
+        max_seqlen = valid_counts.max()
+        return (
+            pixel_values_packed,
+            spatial_shapes,
+            cu_seqlens,
+            max_seqlen,
+        )
+
+    def _parse_and_validate_image_input(
+        self, **kwargs: object
+    ) -> Phi4SiglipImagePixelInputs | None:
+        pixel_values = kwargs.pop("pixel_values", None)
+        pixel_attention_mask = kwargs.pop("pixel_attention_mask", None)
+        spatial_shapes = kwargs.pop("spatial_shapes", None)
+        if pixel_values is None:
+            return None
+
+        return Phi4SiglipImagePixelInputs(
+            type="pixel_values",
+            pixel_values=pixel_values,
+            pixel_attention_mask=pixel_attention_mask,
+            spatial_shapes=spatial_shapes,
+        )
+
+    def _process_image_input(
+        self, image_input: Phi4SiglipImagePixelInputs
+    ) -> MultiModalEmbeddings:
+        pixel_values = image_input["pixel_values"]
+        pixel_attention_mask = image_input["pixel_attention_mask"]
+        spatial_shapes = image_input["spatial_shapes"]
+
+        (
+            pixel_values_packed,
+            spatial_shapes_packed,
+            cu_seqlens,
+            max_seqlen,
+        ) = self._packed_from_padded(pixel_values, pixel_attention_mask, spatial_shapes)
+
+        vision_features = self.vision_tower(
+            pixel_values_packed=pixel_values_packed,
+            spatial_shapes=spatial_shapes_packed,
+            cu_seqlens=cu_seqlens,
+            max_seqlen=max_seqlen,
+            select_layers=[-2],
+        )
+
+        if vision_features.dim() == 3:
+            vision_features = vision_features.squeeze(0)
+
+        image_features = self.multi_modal_projector(vision_features)
+
+        valid_counts = pixel_attention_mask.sum(dim=1).tolist()
+        return torch.split(image_features, [int(c) for c in valid_counts])
+
+    def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
+        image_input = self._parse_and_validate_image_input(**kwargs)
+        if image_input is None:
+            return []
+
+        return self._process_image_input(image_input)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs: object,
+    ) -> torch.Tensor | IntermediateTensors:
+        if intermediate_tensors is not None:
+            inputs_embeds = None
+
+        hidden_states = self.language_model.model(
+            input_ids,
+            positions,
+            intermediate_tensors,
+            inputs_embeds=inputs_embeds,
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        return self.language_model.compute_logits(hidden_states)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py
index 0b55b7ec8392..5770420ce565 100644
--- a/vllm/model_executor/models/phimoe.py
+++ b/vllm/model_executor/models/phimoe.py
@@ -35,7 +35,10 @@
 from vllm.config import CacheConfig, VllmConfig
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.linear import (
     QKVParallelLinear,
     ReplicatedLinear,
@@ -281,7 +284,6 @@ def __init__(
             hidden_size=hidden_size,
             intermediate_size=intermediate_size,
             params_dtype=params_dtype,
-            reduce_results=True,
             renormalize=False,
             quant_config=quant_config,
             tp_size=tp_size,
@@ -515,7 +517,7 @@ def forward(
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return FusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="w1",
             ckpt_down_proj_name="w2",
diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py
index 0d891b8c9f20..447d6edd9864 100644
--- a/vllm/model_executor/models/pixtral.py
+++ b/vllm/model_executor/models/pixtral.py
@@ -8,7 +8,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from mistral_common.protocol.instruct.chunk import ImageChunk, TextChunk
 from mistral_common.protocol.instruct.messages import UserMessage
 from mistral_common.protocol.instruct.request import ChatCompletionRequest
@@ -26,16 +25,18 @@
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.distributed import divide, get_tensor_model_parallel_world_size
 from vllm.inputs import MultiModalDataDict
-from vllm.model_executor.layers.activation import get_act_and_mul_fn
+from vllm.model_executor.layers.activation import SiluAndMul, get_act_and_mul_fn
 from vllm.model_executor.layers.conv import Conv2dLayer
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
     QKVParallelLinear,
+    ReplicatedLinear,
     RowParallelLinear,
 )
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.utils import WeightsMapper
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems
 from vllm.multimodal.inputs import (
     MultiModalFieldConfig,
@@ -293,6 +294,23 @@ def _cached_apply_hf_processor(
 class PixtralForConditionalGeneration(
     nn.Module, SupportsLoRA, SupportsEagle3, SupportsMultiModal, SupportsPP
 ):
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            "model.language_model.": "language_model.model.",
+            "model.vision_tower.": "vision_encoder.",
+            "model.multi_modal_projector.": "vision_language_adapter.",
+        },
+        orig_to_new_substr={
+            ".linear_1.": ".w_in.",
+            ".linear_2.": ".w_out.",
+        },
+    )
+
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
     @classmethod
     def get_placeholder_str(cls, modality: str, i: int) -> str | None:
         if modality.startswith("image"):
@@ -325,7 +343,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             )
 
         with self._mark_tower_model(vllm_config, "image"):
-            self.vision_encoder = VisionTransformer(self.vision_args)
+            self.vision_encoder = VisionTransformer(
+                self.vision_args,
+                prefix=maybe_prefix(prefix, "vision_encoder"),
+            )
             self.pre_mm_projector_norm = (
                 RMSNorm(self.vision_args.hidden_size, eps=1e-5)
                 if self.vision_args.add_pre_mm_projector_layer_norm
@@ -435,6 +456,29 @@ def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]:
         return self.language_model.get_eagle3_aux_hidden_state_layers()
 
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        _vision_encoder_stacked_params = [
+            # (param_name, shard_name, shard_id)
+            # HF format
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+            # Mistral native (consolidated) format
+            (".qkv_proj", ".wq", "q"),
+            (".qkv_proj", ".wk", "k"),
+            (".qkv_proj", ".wv", "v"),
+            (".gate_up_proj", ".w1", 0),
+            (".gate_up_proj", ".w3", 1),
+        ]
+
+        # Remap Mistral native names to HF-style names
+        # used by the vLLM vision encoder modules.
+        _vision_encoder_name_remap = {
+            ".wo.": ".o_proj.",
+            ".w2.": ".down_proj.",
+        }
+
         def is_vision_encoder_weights(weight: tuple[str, torch.Tensor]):
             return weight[0].startswith(("vision_encoder", "vision_tower"))
 
@@ -449,7 +493,6 @@ def is_patch_merger(weight: tuple[str, torch.Tensor]):
         def is_pre_mm_projector_norm(weight: tuple[str, torch.Tensor]):
             return weight[0].startswith("pre_mm_projector_norm")
 
-        # Get references to parameters for direct loading
         vision_encoder_dict = (
             dict(self.vision_encoder.named_parameters())
             if self.vision_encoder is not None
@@ -472,29 +515,46 @@ def is_pre_mm_projector_norm(weight: tuple[str, torch.Tensor]):
         )
 
         def llm_weights_generator():
-            # Single pass over weights
             for name, w in weights:
                 if is_vision_encoder_weights((name, w)):
                     if _is_layer_none_or_staged(self.vision_encoder):
                         continue
-                    # Load vision encoder weights directly
                     trimmed_name = ".".join(name.split(".")[1:])
-                    param = vision_encoder_dict.get(trimmed_name)
-                    if param is not None:
-                        with torch.no_grad():
-                            default_weight_loader(param, w)
+                    for (
+                        param_name,
+                        weight_name,
+                        shard_id,
+                    ) in _vision_encoder_stacked_params:
+                        if weight_name in trimmed_name:
+                            trimmed_name = trimmed_name.replace(weight_name, param_name)
+                            param = vision_encoder_dict[trimmed_name]
+                            weight_loader = param.weight_loader
+                            weight_loader(param, w, shard_id)
+                            break
+                    else:
+                        for old, new in _vision_encoder_name_remap.items():
+                            if old in trimmed_name:
+                                trimmed_name = trimmed_name.replace(old, new)
+                                break
+
+                        param = vision_encoder_dict.get(trimmed_name)
+                        if param is not None:
+                            weight_loader = getattr(
+                                param, "weight_loader", default_weight_loader
+                            )
+                            weight_loader(param, w)
                 elif is_patch_merger((name, w)):
                     if _is_layer_none_or_staged(self.patch_merger):
                         continue
-                    # Load vision patch merger weights directly
                     trimmed_name = ".".join(name.split(".")[1:])
                     param = patch_merger_dict[trimmed_name]
-                    with torch.no_grad():
-                        default_weight_loader(param, w)
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, w)
                 elif is_pre_mm_projector_norm((name, w)):
                     if _is_layer_none_or_staged(self.pre_mm_projector_norm):
                         continue
-                    # Load vision pre_mm_projector_norm weights directly
                     trimmed_name = ".".join(name.split(".")[1:])
                     param = pre_mm_projector_norm_dict[trimmed_name]
                     with torch.no_grad():
@@ -502,26 +562,23 @@ def llm_weights_generator():
                 elif is_vision_lang_adapter_weights((name, w)):
                     if _is_layer_none_or_staged(self.vision_language_adapter):
                         continue
-                    # Load vision-language adapter weights directly
                     trimmed_name = ".".join(name.split(".")[1:])
                     param = vision_lang_adapter_dict.get(trimmed_name)
                     if param is not None:
-                        with torch.no_grad():
-                            default_weight_loader(param, w)
+                        weight_loader = getattr(
+                            param, "weight_loader", default_weight_loader
+                        )
+                        weight_loader(param, w)
                 else:
-                    # LLM weights: yield them to be loaded
-                    # by language_model.load_weights
-                    # Strip "language_model." prefix if present (HF sharded format)
                     name = name.removeprefix("language_model.")
                     yield (name, w)
 
-        # Now we call the language model load with the generator
         self.language_model.load_weights(llm_weights_generator())
 
     def get_mm_mapping(self) -> MultiModelKeys:
         return MultiModelKeys.from_string_field(
-            language_model="language_model",
-            connector="vision_language_adapter",
+            language_model="language_model.",
+            connector="vision_language_adapter.",
             tower_model="vision_encoder",
         )
 
@@ -614,29 +671,78 @@ def apply_rotary_emb_vit(
 
 
 class FeedForward(nn.Module):
-    def __init__(self, args: VisionEncoderArgs):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        quant_config: QuantizationConfig | None = None,
+        bias: bool = False,
+        prefix: str = "",
+        reduce_results: bool = True,
+        disable_tp: bool = False,
+    ) -> None:
         super().__init__()
-        assert args.intermediate_size is not None
-        self.w1 = nn.Linear(args.hidden_size, args.intermediate_size, bias=False)
-        self.w2 = nn.Linear(args.intermediate_size, args.hidden_size, bias=False)
-        self.w3 = nn.Linear(args.hidden_size, args.intermediate_size, bias=False)
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.w2(F.silu(self.w1(x)) * self.w3(x))
+        self.gate_up_proj = MergedColumnParallelLinear(
+            input_size=hidden_size,
+            output_sizes=[intermediate_size] * 2,
+            bias=bias,
+            quant_config=quant_config,
+            disable_tp=disable_tp,
+            prefix=f"{prefix}.w13",
+        )
+        self.down_proj = RowParallelLinear(
+            input_size=intermediate_size,
+            output_size=hidden_size,
+            bias=bias,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            disable_tp=disable_tp,
+            prefix=f"{prefix}.w2",
+        )
+
+        self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        x, _ = self.gate_up_proj(x)
+        x = self.act_fn(x)
+        x, _ = self.down_proj(x)
+        return x
 
 
 class Attention(nn.Module):
-    def __init__(self, args: VisionEncoderArgs):
+    def __init__(
+        self,
+        args: VisionEncoderArgs,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        disable_tp: bool = False,
+    ):
         super().__init__()
         self.args = args
         assert not args.hidden_size % args.num_attention_heads
-        self.n_heads = args.num_attention_heads
         self.head_dim = args.hidden_size // args.num_attention_heads
 
-        self.wq = nn.Linear(args.hidden_size, args.hidden_size, bias=False)
-        self.wk = nn.Linear(args.hidden_size, args.hidden_size, bias=False)
-        self.wv = nn.Linear(args.hidden_size, args.hidden_size, bias=False)
-        self.wo = nn.Linear(args.hidden_size, args.hidden_size, bias=False)
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size=args.hidden_size,
+            head_size=self.head_dim,
+            total_num_heads=args.num_attention_heads,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.wqkv",
+            disable_tp=disable_tp,
+        )
+        self.o_proj = RowParallelLinear(
+            input_size=args.hidden_size,
+            output_size=args.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.wo",
+            disable_tp=disable_tp,
+        )
+
+        tp_size = 1 if disable_tp else get_tensor_model_parallel_world_size()
+        self.n_heads = divide(args.num_attention_heads, tp_size)
 
     def forward(
         self,
@@ -646,7 +752,8 @@ def forward(
     ) -> torch.Tensor:
         batch, patches, _ = x.shape
 
-        q, k, v = self.wq(x), self.wk(x), self.wv(x)
+        qkv, _ = self.qkv_proj(x)
+        q, k, v = qkv.chunk(3, dim=-1)
         q = q.reshape(batch, patches, self.n_heads, self.head_dim)
         k = k.reshape(batch, patches, self.n_heads, self.head_dim)
         v = v.reshape(batch, patches, self.n_heads, self.head_dim)
@@ -663,14 +770,32 @@ def forward(
             out = out.transpose(1, 2)
 
         out = out.reshape(batch, patches, self.n_heads * self.head_dim)
-        return self.wo(out)
+        out, _ = self.o_proj(out)
+        return out
 
 
 class TransformerBlock(nn.Module):
-    def __init__(self, args: VisionEncoderArgs):
+    def __init__(
+        self,
+        args: VisionEncoderArgs,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        disable_tp: bool = False,
+    ):
         super().__init__()
-        self.attention = Attention(args)
-        self.feed_forward = FeedForward(args)
+        self.attention = Attention(
+            args,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attention",
+            disable_tp=disable_tp,
+        )
+        self.feed_forward = FeedForward(
+            args.hidden_size,
+            args.intermediate_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.feed_forward",
+            disable_tp=disable_tp,
+        )
         self.attention_norm = RMSNorm(args.hidden_size, eps=1e-5)
         self.ffn_norm = RMSNorm(args.hidden_size, eps=1e-5)
 
@@ -690,11 +815,24 @@ def forward(
 
 
 class Transformer(nn.Module):
-    def __init__(self, args: VisionEncoderArgs):
+    def __init__(
+        self,
+        args: VisionEncoderArgs,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        disable_tp: bool = False,
+    ):
         super().__init__()
         self.layers = torch.nn.ModuleList()
-        for _ in range(args.num_hidden_layers):
-            self.layers.append(TransformerBlock(args))
+        for idx in range(args.num_hidden_layers):
+            self.layers.append(
+                TransformerBlock(
+                    args,
+                    quant_config=quant_config,
+                    prefix=f"{prefix}.layers.{idx}",
+                    disable_tp=disable_tp,
+                )
+            )
 
     def forward(
         self,
@@ -727,9 +865,15 @@ def position_meshgrid(
 
 
 class VisionTransformer(nn.Module):
-    def __init__(self, args: VisionEncoderArgs):
+    def __init__(
+        self,
+        args: VisionEncoderArgs,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ):
         super().__init__()
         self.args = args
+        disable_tp = is_vit_use_data_parallel()
         self.patch_conv = Conv2dLayer(
             in_channels=args.num_channels,
             out_channels=args.hidden_size,
@@ -738,7 +882,12 @@ def __init__(self, args: VisionEncoderArgs):
             bias=False,
         )
         self.ln_pre = RMSNorm(args.hidden_size, eps=1e-5)
-        self.transformer = Transformer(args)
+        self.transformer = Transformer(
+            args,
+            quant_config=quant_config,
+            prefix=f"{prefix}.transformer",
+            disable_tp=disable_tp,
+        )
 
         head_dim = self.args.hidden_size // self.args.num_attention_heads
         assert head_dim % 2 == 0, "ROPE requires even head_dim"
@@ -822,13 +971,16 @@ class VisionLanguageAdapter(nn.Module):
     def __init__(self, args: VisionEncoderArgs, dim: int):
         super().__init__()
         assert isinstance(args, VisionEncoderArgs)
-        self.w_in = nn.Linear(
+        self.w_in = ReplicatedLinear(
             args.hidden_size,
             dim,
             bias=args.adapter_bias,
+            return_bias=False,
         )
         self.gelu = nn.GELU()
-        self.w_out = nn.Linear(dim, dim, bias=args.adapter_bias)
+        self.w_out = ReplicatedLinear(
+            dim, dim, bias=args.adapter_bias, return_bias=False
+        )
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.w_out(self.gelu(self.w_in(x)))
@@ -852,10 +1004,8 @@ def __init__(
         self.spatial_merge_size = spatial_merge_size
         self.mlp_input_dim = mlp_input_dim
 
-        self.merging_layer = nn.Linear(
-            mlp_input_dim,
-            vision_encoder_dim,
-            bias=use_mlp_bias,
+        self.merging_layer = ReplicatedLinear(
+            mlp_input_dim, vision_encoder_dim, bias=use_mlp_bias, return_bias=False
         )
 
     def forward(
diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py
index e38b7b166ba9..5fd925cf0bee 100644
--- a/vllm/model_executor/models/plamo2.py
+++ b/vllm/model_executor/models/plamo2.py
@@ -32,15 +32,16 @@
     MambaStateCopyFuncCalculator,
     MambaStateDtypeCalculator,
     MambaStateShapeCalculator,
+    is_conv_state_dim_first,
 )
 from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
     causal_conv1d_fn,
     causal_conv1d_update,
 )
-from vllm.model_executor.layers.mamba.ops.mamba_ssm import selective_state_update
 from vllm.model_executor.layers.mamba.ops.ssd_combined import (
     mamba_chunk_scan_combined_varlen,
 )
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import selective_state_update
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -71,6 +72,7 @@
 from vllm.utils.torch_utils import direct_register_custom_op
 from vllm.v1.attention.backend import AttentionMetadata
 from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
 
 # Only used for type hinting.
 if TYPE_CHECKING:
@@ -266,7 +268,13 @@ def forward_impl(
             assert isinstance(attn_metadata, Mamba2AttentionMetadata)
             self_kv_cache = self.kv_cache
             # conv_state = (..., dim, width-1) yet contiguous along 'dim'
-            conv_state = self_kv_cache[0].transpose(-1, -2)
+            # conv_state must be (..., dim, width-1) for the conv kernels.
+            # DS layout stores it that way directly; SD layout needs a transpose.
+            conv_state = (
+                self_kv_cache[0]
+                if is_conv_state_dim_first()
+                else self_kv_cache[0].transpose(-1, -2)
+            )
             ssm_state = self_kv_cache[1]
             state_indices_tensor_p = attn_metadata.state_indices_tensor_p
             state_indices_tensor_d = attn_metadata.state_indices_tensor_d
@@ -440,8 +448,8 @@ def forward_impl(
                 B,
                 C,
                 D,
+                dt_bias,
                 z=gate_d.reshape(num_decodes, -1, self.head_dim),
-                dt_bias=dt_bias,
                 dt_softplus=True,
                 state_batch_indices=state_indices_tensor_d,
                 out=preallocated_ssm_out_d.view(num_decodes, -1, self.head_dim),
@@ -471,8 +479,8 @@ def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]:
         )
 
     @property
-    def mamba_type(self) -> str:
-        return "mamba2"
+    def mamba_type(self) -> MambaAttentionBackendEnum:
+        return MambaAttentionBackendEnum.MAMBA2
 
 
 def plamo2_mamba_mixer(
@@ -790,6 +798,83 @@ def forward(
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            # Update the weight names to be compatible with the vllm version
+            # of the model.
+            # Do not change the order of the replacements.
+            replacements = {
+                # Rename incompatible weight names.
+                ".A_log": ".A",
+                ".B_norm_weight": ".B_norm.weight",
+                ".C_norm_weight": ".C_norm.weight",
+                ".dt_norm_weight": ".dt_norm.weight",
+                ".q_weight": ".q_norm.weight",
+                ".k_weight": ".k_norm.weight",
+            }
+            # Apply replacements based on the defined mappings
+            for old, new in replacements.items():
+                if old in name:
+                    name = name.replace(old, new)
+
+            # Reshape the in_proj weights to match the shape expected
+            # by MergedColumnParallelLinear.
+            # This works both for unquantized weights and
+            # for quantized weights.
+            # In the quantized case, the weights are already transposed.
+            # Also, in addition to the quantized weights,
+            # the zero points and scales have to be reshaped as well.
+            # Packing should not be affected by this.
+            if (
+                ".mixer.in_proj.weight" in name
+                or "mixer.in_proj.qweight" in name
+                or "mixer.in_proj.scales" in name
+                or "mixer.in_proj.qzeros" in name
+            ):
+                if "mixer.in_proj.weight" in name:
+                    loaded_weight = loaded_weight.transpose(0, 1)
+                # for weight:
+                # loaded_weight.shape[0] == self.config.hidden_size
+                # for qweight:
+                # loaded_weight.shape[0] == self.config.hidden_size // param.pack_factor  # noqa
+                # for scales and qzeros:
+                # loaded_weight.shape[0] == self.config.hidden_size // self.vllm_config.quant_config.group_size  # noqa
+                loaded_weight = loaded_weight.reshape(
+                    loaded_weight.shape[0], self.config.mamba_num_heads, -1
+                )
+                gate_weight, hidden_states_weight = loaded_weight.chunk(2, dim=-1)
+                gate_weight = gate_weight.reshape(loaded_weight.shape[0], -1)
+                hidden_states_weight = hidden_states_weight.reshape(
+                    loaded_weight.shape[0], -1
+                )
+                loaded_weight = torch.cat([gate_weight, hidden_states_weight], dim=-1)
+                if "mixer.in_proj.weight" in name:
+                    loaded_weight = loaded_weight.transpose(0, 1)
+
+            # Offset parameter with vllm's RMSNorm haven't been supported yet.
+            if ".pre_mixer_norm" in name:
+                loaded_weight += 1.0
+            elif ".post_mixer_norm" in name:
+                loaded_weight += 1.0 / 5
+            elif ".pre_mlp_norm" in name:
+                loaded_weight += 1.0
+            elif ".post_mlp_norm" in name:
+                loaded_weight += 1.0 / (5**1.5)
+            elif name == "norm.weight":
+                loaded_weight += 1.0
+
+            # Skip layers on other devices.
+            if is_pp_missing_parameter(name, self):
+                continue
+
+            param = params_dict[name]
+            weight_loader = getattr(param, "weight_loader", default_weight_loader)
+            weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
 
 class Plamo2ForCausalLM(
     torch.nn.Module, HasInnerState, SupportsLoRA, SupportsPP, IsHybrid
@@ -899,88 +984,9 @@ def compute_logits(
         logits = self.logits_processor(self.lm_head, hidden_states)
         return logits
 
-    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
-        params_dict = dict(self.named_parameters())
-        for name, loaded_weight in weights:
-            # Both tie_word_embeddings=True and lm_head.weight in the safetensor
-            # at the same time causes dict key access error.
-            if name == "lm_head.weight" and self.config.tie_word_embeddings:
-                assert "lm_head.weight" not in params_dict
-                continue
-            # Same workaround as AutoWeightsLoader for GPTQModel
-            if any(
-                substr in name
-                for substr in AutoWeightsLoader.ROTARY_EMBEDS_UNUSED_WEIGHTS
-            ):
-                continue
-
-            # Update the weight names to be compatible with the vllm version
-            # of the model.
-            # Do not change the order of the replacements.
-            replacements = {
-                # Rename incompatible weight names.
-                ".A_log": ".A",
-                ".B_norm_weight": ".B_norm.weight",
-                ".C_norm_weight": ".C_norm.weight",
-                ".dt_norm_weight": ".dt_norm.weight",
-                ".q_weight": ".q_norm.weight",
-                ".k_weight": ".k_norm.weight",
-            }
-            # Apply replacements based on the defined mappings
-            for old, new in replacements.items():
-                if old in name:
-                    name = name.replace(old, new)
-
-            # Reshape the in_proj weights to match the shape expected
-            # by MergedColumnParallelLinear.
-            # This works both for unquantized weights and
-            # for quantized weights.
-            # In the quantized case, the weights are already transposed.
-            # Also, in addition to the quantized weights,
-            # the zero points and scales have to be reshaped as well.
-            # Packing should not be affected by this.
-            if (
-                ".mixer.in_proj.weight" in name
-                or "mixer.in_proj.qweight" in name
-                or "mixer.in_proj.scales" in name
-                or "mixer.in_proj.qzeros" in name
-            ):
-                if "mixer.in_proj.weight" in name:
-                    loaded_weight = loaded_weight.transpose(0, 1)
-                # for weight:
-                # loaded_weight.shape[0] == self.config.hidden_size
-                # for qweight:
-                # loaded_weight.shape[0] == self.config.hidden_size // param.pack_factor  # noqa
-                # for scales and qzeros:
-                # loaded_weight.shape[0] == self.config.hidden_size // self.vllm_config.quant_config.group_size  # noqa
-                loaded_weight = loaded_weight.reshape(
-                    loaded_weight.shape[0], self.config.mamba_num_heads, -1
-                )
-                gate_weight, hidden_states_weight = loaded_weight.chunk(2, dim=-1)
-                gate_weight = gate_weight.reshape(loaded_weight.shape[0], -1)
-                hidden_states_weight = hidden_states_weight.reshape(
-                    loaded_weight.shape[0], -1
-                )
-                loaded_weight = torch.cat([gate_weight, hidden_states_weight], dim=-1)
-                if "mixer.in_proj.weight" in name:
-                    loaded_weight = loaded_weight.transpose(0, 1)
-
-            # Offset parameter with vllm's RMSNorm haven't been supported yet.
-            if ".pre_mixer_norm" in name:
-                loaded_weight += 1.0
-            elif ".post_mixer_norm" in name:
-                loaded_weight += 1.0 / 5
-            elif ".pre_mlp_norm" in name:
-                loaded_weight += 1.0
-            elif ".post_mlp_norm" in name:
-                loaded_weight += 1.0 / (5**1.5)
-            elif "model.norm.weight" in name:
-                loaded_weight += 1.0
-
-            # Skip layers on other devices.
-            if is_pp_missing_parameter(name, self):
-                continue
-
-            param = params_dict[name]
-            weight_loader = getattr(param, "weight_loader", default_weight_loader)
-            weight_loader(param, loaded_weight)
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
+        )
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/qianfan_ocr.py b/vllm/model_executor/models/qianfan_ocr.py
new file mode 100644
index 000000000000..ef2bec1e2900
--- /dev/null
+++ b/vllm/model_executor/models/qianfan_ocr.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# QianfanOCR is built on InternVL with a Qwen3 language backbone.
+# The model architecture and weights are fully compatible with InternVLChatModel,
+# only the config model_type / architectures strings differ.
+
+from transformers import PretrainedConfig
+
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.transformers_utils.processors.internvl import (
+    InternVLImageProcessor,
+    InternVLProcessor,
+)
+
+from .internvl import (
+    BaseInternVLDummyInputsBuilder,
+    BaseInternVLMultiModalProcessor,
+    BaseInternVLProcessingInfo,
+    InternVLChatModel,
+)
+
+
+class QianfanOCRProcessingInfo(BaseInternVLProcessingInfo):
+    """Image-only ProcessingInfo for QianfanOCR (no video support)."""
+
+    def get_hf_processor(self, **kwargs: object) -> InternVLProcessor:
+        config = self.get_hf_config()
+        vision_config = config.vision_config
+
+        kwargs = self.ctx.get_merged_mm_kwargs(kwargs)
+        kwargs.setdefault("image_size", vision_config.image_size)
+        kwargs.setdefault("min_dynamic_patch", config.min_dynamic_patch)
+        kwargs.setdefault("max_dynamic_patch", config.max_dynamic_patch)
+        kwargs.setdefault("dynamic_image_size", config.dynamic_image_size)
+        kwargs.setdefault("use_thumbnail", config.use_thumbnail)
+
+        image_processor = InternVLImageProcessor(**kwargs)
+        image_size = image_processor.image_size
+        patch_size = vision_config.patch_size
+        downsample_ratio = config.downsample_ratio
+        image_seq_length = int((image_size // patch_size) ** 2 * (downsample_ratio**2))
+
+        return InternVLProcessor(
+            tokenizer=self.get_tokenizer(),
+            image_processor=image_processor,
+            video_processor=None,
+            image_seq_length=image_seq_length,
+            ctx_video_token=None,
+        )
+
+
+@MULTIMODAL_REGISTRY.register_processor(
+    BaseInternVLMultiModalProcessor,
+    info=QianfanOCRProcessingInfo,
+    dummy_inputs=BaseInternVLDummyInputsBuilder,
+)
+class QianfanOCRForConditionalGeneration(InternVLChatModel):
+    """QianfanOCR multimodal model.
+
+    Identical in structure to InternVLChatModel (InternViT vision encoder +
+    pixel-shuffle MLP connector + Qwen3 language model).  This class exists
+    solely to register the ``QianfanOCRForConditionalGeneration`` architecture
+    name that appears in the model's config.json.
+    """
+
+    def _patch_quant_config(
+        self, config: PretrainedConfig, quant_config: QuantizationConfig
+    ) -> None:
+        super()._patch_quant_config(config, quant_config)
+        # ignore vit layers to preserve model performance
+        if isinstance(quant_config, Fp8Config):
+            _FP8_IGNORED_LAYERS = [
+                *(
+                    layer
+                    for i in range(config.vision_config.num_hidden_layers)
+                    for layer in [
+                        f"vision_model.encoder.layers.{i}.attn.qkv",
+                        f"vision_model.encoder.layers.{i}.attn.proj",
+                        f"vision_model.encoder.layers.{i}.mlp.fc1",
+                        f"vision_model.encoder.layers.{i}.mlp.fc2",
+                    ]
+                ),
+                "language_model.lm_head",
+                "mlp1.1",
+                "mlp1.3",
+            ]
+            for layer in _FP8_IGNORED_LAYERS:
+                if layer not in quant_config.ignored_layers:
+                    quant_config.ignored_layers.append(layer)
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 27aa6175b9bc..b83fedc70db0 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -312,48 +312,15 @@ def forward(
         return hidden_states, residual
 
 
-def qwen_2_model_invariants(
-    input_ids: torch.Tensor,
-    positions: torch.Tensor,
-    intermediate_tensors: IntermediateTensors | None = None,
-    inputs_embeds: torch.Tensor | None = None,
-):
-    """Shape invariants for Qwen2Model Model, those are translated to
-    runtime assertions for unbacked dynamic shapes and are compiled away for
-    backed"""
-    # All these should be equal.
-    # input_ids.size()[0]
-    # positions.size()[-1]
-    # intermediate_tensors["hidden_states"].size()[0]
-    # inputs_embeds.size()[0]
-    torch._check(input_ids.size()[0] == positions.size()[-1])
-    if intermediate_tensors is not None:
-        torch._check(
-            input_ids.size()[0] == intermediate_tensors["hidden_states"].size()[0]
-        )
-
-    if inputs_embeds is not None:
-        torch._check(input_ids.size()[0] == inputs_embeds.size()[0])
-
-    # Hidden dimensions should match (hidden_size)
-    # intermediate_tensors["hidden_states"].size()[1]
-    # inputs_embeds.size()[1]
-    if inputs_embeds is not None and intermediate_tensors is not None:
-        torch._check(
-            inputs_embeds.size()[1] == intermediate_tensors["hidden_states"].size()[1]
-        )
-
-
 @support_torch_compile(
     dynamic_arg_dims={
-        "input_ids": 0,
+        "input_ids": {0: "b"},
         # positions is of shape (3, seq_len) if mrope is enabled for qwen2-vl,
         # otherwise (seq_len, ).
-        "positions": -1,
-        "intermediate_tensors": 0,
-        "inputs_embeds": 0,
-    },
-    shape_invariants=qwen_2_model_invariants,
+        "positions": {-1: "b"},
+        "intermediate_tensors": {0: "b"},
+        "inputs_embeds": {0: "b"},
+    }
 )
 class Qwen2Model(nn.Module, EagleModelMixin):
     def __init__(
diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py
index 8e106baec262..ec74cb0f25c4 100644
--- a/vllm/model_executor/models/qwen2_5_omni_thinker.py
+++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py
@@ -211,15 +211,12 @@ def merge_interleaved_embeddings(
 
     # Scatter each modality to its positions
     if video_embeds:
-        video_positions = is_video.nonzero(as_tuple=True)[0]
-        inputs_embeds[video_positions] = torch.cat(video_embeds, dim=0)
+        inputs_embeds[is_video] = torch.cat(video_embeds, dim=0)
     if audio_embeds:
-        audio_positions = is_audio.nonzero(as_tuple=True)[0]
-        inputs_embeds[audio_positions] = torch.cat(audio_embeds, dim=0)
+        inputs_embeds[is_audio] = torch.cat(audio_embeds, dim=0)
     if other_embeds:
         other_mask = is_multimodal & ~is_video & ~is_audio
-        other_positions = other_mask.nonzero(as_tuple=True)[0]
-        inputs_embeds[other_positions] = torch.cat(other_embeds, dim=0)
+        inputs_embeds[other_mask] = torch.cat(other_embeds, dim=0)
 
     return inputs_embeds
 
@@ -281,15 +278,15 @@ def _qwen2_5_omni_thinker_field_config(hf_inputs: Mapping[str, torch.Tensor]):
             image_embeds=MultiModalFieldConfig.flat_from_sizes(
                 "image", image_embed_grid_sizes
             ),
-            image_grid_thw=MultiModalFieldConfig.batched("image"),
+            image_grid_thw=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
             pixel_values_videos=MultiModalFieldConfig.flat_from_sizes(
                 "video", video_grid_sizes
             ),
             video_embeds=MultiModalFieldConfig.flat_from_sizes(
                 "video", video_embed_grid_sizes
             ),
-            video_grid_thw=MultiModalFieldConfig.batched("video"),
-            second_per_grid_ts=MultiModalFieldConfig.batched("video"),
+            video_grid_thw=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
+            second_per_grid_ts=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
             use_audio_in_video=MultiModalFieldConfig.shared("video", num_videos),
         )
 
@@ -955,8 +952,6 @@ def _parse_and_validate_video_input(
     def _process_audio_input(
         self,
         audio_input: Qwen2_5OmniAudioFeatureInputs,
-        audio_hashes: list[str] | None = None,
-        cached_audio_features: torch.Tensor | None = None,
     ) -> torch.Tensor:
         input_features = audio_input["input_features"]
         audio_feature_lengths = audio_input["audio_feature_lengths"]
@@ -993,8 +988,6 @@ def _process_image_input(
     def _process_video_input(
         self,
         video_input: Qwen2_5_VLVideoInputs,
-        video_hashes: list[str] = None,
-        cached_video_embeds: torch.Tensor = None,
     ) -> torch.Tensor:
         if video_input["type"] == "video_embeds":
             return video_input["video_embeds"].type(self.visual.dtype)
@@ -1457,8 +1450,9 @@ def embed_input_ids(
         video_token_id = self.config.video_token_index
         audio_token_id = self.config.audio_token_index
 
-        is_video = is_multimodal & (input_ids == video_token_id)
-        is_audio = is_multimodal & (input_ids == audio_token_id)
+        input_ids_cpu = input_ids.cpu()
+        is_video = is_multimodal & (input_ids_cpu == video_token_id)
+        is_audio = is_multimodal & (input_ids_cpu == audio_token_id)
 
         num_video = is_video.sum().item()
         num_audio = is_audio.sum().item()
diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index c11684b4b89b..0dec414304cc 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -84,12 +84,15 @@
 from vllm.sequence import IntermediateTensors
 from vllm.utils.platform_utils import is_pin_memory_available
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
+from vllm.utils.torch_utils import async_tensor_h2d
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
+from vllm.v1.worker.encoder_cudagraph_defs import EncoderCudaGraphReplayBuffers
 
 from .interfaces import (
     MultiModalEmbeddings,
     SupportsEagle,
     SupportsEagle3,
+    SupportsEncoderCudaGraph,
     SupportsLoRA,
     SupportsMRoPE,
     SupportsMultiModal,
@@ -110,6 +113,7 @@
     maybe_prefix,
 )
 from .vision import (
+    get_fp8_padded_hidden_size,
     get_vit_attn_backend,
     is_vit_use_data_parallel,
     run_dp_sharded_mrope_vision_model,
@@ -366,7 +370,8 @@ def forward(
         rotary_pos_emb_cos: torch.Tensor,
         rotary_pos_emb_sin: torch.Tensor,
         max_seqlen: torch.Tensor,  # Only used for Flash Attention
-        sequence_lengths: torch.Tensor,  # Only used for FlashInfer CuDNN backend
+        # Only used for FlashInfer CuDNN backend.
+        sequence_lengths: torch.Tensor | None,
     ) -> torch.Tensor:
         # [s, b, c] --> [s, b, head * 3 * head_dim]
         x, _ = self.qkv(x)
@@ -423,6 +428,7 @@ def forward(
     dynamic_arg_dims={
         "x": 0,
         "cu_seqlens": 0,
+        "sequence_lengths": 0,
         "rotary_pos_emb_cos": 0,
         "rotary_pos_emb_sin": 0,
     },
@@ -468,6 +474,8 @@ def forward(
         rotary_pos_emb_cos: torch.Tensor,
         rotary_pos_emb_sin: torch.Tensor,
         max_seqlen: torch.Tensor,  # Only used for Flash Attention
+        # Only used for FlashInfer CuDNN backend.
+        sequence_lengths: torch.Tensor | None = None,
     ) -> torch.Tensor:
         x_attn = self.attn(
             self.norm1(x),
@@ -475,7 +483,7 @@ def forward(
             rotary_pos_emb_cos=rotary_pos_emb_cos,
             rotary_pos_emb_sin=rotary_pos_emb_sin,
             max_seqlen=max_seqlen,
-            sequence_lengths=None,
+            sequence_lengths=sequence_lengths,
         )
         x_fused_norm, residual = self.norm2(x, residual=x_attn)
         x = residual + self.mlp(x_fused_norm)
@@ -595,6 +603,12 @@ def __init__(
         self.spatial_merge_size = vision_config.spatial_merge_size
         self.fullatt_block_indexes = vision_config.fullatt_block_indexes
         self.spatial_merge_unit = self.spatial_merge_size**2
+        use_data_parallel = is_vit_use_data_parallel()
+        self.tp_size = (
+            1
+            if use_data_parallel
+            else parallel_state.get_tensor_model_parallel_world_size()
+        )
         self.patch_embed = Qwen2_5_VisionPatchEmbed(
             patch_size=patch_size,
             temporal_patch_size=temporal_patch_size,
@@ -604,6 +618,11 @@ def __init__(
 
         norm_layer = partial(RMSNorm, eps=norm_eps)
         head_dim = self.hidden_size // self.num_heads
+        # FP8 attention: Q/K/V become independent contiguous tensors after
+        # quantization, so FlashInfer cu_seqlens uses uniform stride.
+        self.fp8_padded_hidden_size = get_fp8_padded_hidden_size(
+            self.num_heads, head_dim
+        )
         self.rotary_pos_emb = get_rope(
             head_size=head_dim,
             max_position=8192,
@@ -677,6 +696,7 @@ def rotary_pos_emb_thw(self, t, h, w):
         # Use pre-computed cos_sin_cache from RotaryEmbedding
         cos, sin = self.rotary_pos_emb.get_cos_sin(max_size)
 
+        pos_ids = pos_ids.to(cos.device, non_blocking=True)
         cos_combined = cos[pos_ids].flatten(1)
         sin_combined = sin[pos_ids].flatten(1)
 
@@ -735,9 +755,10 @@ def get_rope_by_thw(self, t, h, w):
         window_index_thw, cu_seqlens_window_thw = self.get_window_index_thw(t, h, w)
         cos_thw, sin_thw = self.rotary_pos_emb_thw(t, h, w)
 
-        cos_thw = cos_thw[window_index_thw, :, :]
+        window_index_thw_dev = window_index_thw.to(cos_thw.device, non_blocking=True)
+        cos_thw = cos_thw[window_index_thw_dev, :, :]
         cos_thw = cos_thw.flatten(start_dim=0, end_dim=1)
-        sin_thw = sin_thw[window_index_thw, :, :]
+        sin_thw = sin_thw[window_index_thw_dev, :, :]
         sin_thw = sin_thw.flatten(start_dim=0, end_dim=1)
 
         cu_seqlens_thw = torch.repeat_interleave(
@@ -771,22 +792,54 @@ def invert_permutation(perm: torch.Tensor) -> torch.Tensor:
         inv[perm] = torch.arange(perm.numel(), device=perm.device, dtype=perm.dtype)
         return inv
 
-    def forward(
+    def prepare_encoder_metadata(
         self,
-        x: torch.Tensor,
         grid_thw: list[list[int]],
-    ) -> torch.Tensor:
+        *,
+        max_batch_size: int | None = None,
+        max_frames_per_batch: int | None = None,
+        max_window_seqs_per_batch: int | None = None,
+        max_seqlen_override: int | None = None,
+        max_seqlen_window_override: int | None = None,
+        device: torch.device | None = None,
+    ) -> dict[str, torch.Tensor]:
+        """Compute encoder metadata from grid_thw.
+
+        Shared by the eager forward path, CUDA graph capture, and
+        CUDA graph replay to avoid duplicated implementation.
+
+        Args:
+            grid_thw: Grid configurations as list of [t, h, w].
+            max_batch_size: If set, pad cu_seqlens to this size
+                (needed for CUDA graph capture/replay).
+            max_frames_per_batch: If set, overrides max_batch_size for
+                cu_seqlens padding. For video inputs each item contributes
+                T attention sequences (frames); this sizes the buffer to
+                the total frame budget so video replays never overflow.
+            max_window_seqs_per_batch: If set, pad cu_window_seqlens to this
+                number of window sequences. This keeps cu_window_seqlens shape
+                stable across capture/replay for CUDA graph safety.
+            max_seqlen_override: If set, use this value for max_seqlen
+                instead of computing from cu_seqlens (needed for CUDA
+                graph capture to cover worst-case replay scenarios).
+            max_seqlen_window_override: If set, use this value for
+                window-attention max_seqlen instead of computing from
+                cu_window_seqlens (needed for CUDA graph capture to
+                cover worst-case replay scenarios).
+            device: Device to place tensors on. Defaults to self.device.
+        """
+
+        if device is None:
+            device = self.device
+        metadata: dict[str, torch.Tensor] = {}
+
         # patchify
-        seq_len, _ = x.size()
         rotary_pos_emb_cos = []
         rotary_pos_emb_sin = []
         window_index: list = []
         cu_window_seqlens: list = [torch.tensor([0], dtype=torch.int32)]
         cu_seqlens: list = []
 
-        hidden_states = x.to(device=self.device, dtype=self.dtype)
-        hidden_states = self.patch_embed(hidden_states)
-
         window_index_id = 0
         cu_window_seqlens_last = 0
         for t, h, w in grid_thw:
@@ -825,23 +878,137 @@ def forward(
         cu_seqlens = torch.cumsum(cu_seqlens, dim=0, dtype=torch.int32)
         cu_seqlens = F.pad(cu_seqlens, (1, 0), "constant", 0)
 
-        # transformers
-        # pre-compute seqlens for window/full attn to reduce cuMemcpy operations
-        max_seqlen_full = self.compute_attn_mask_seqlen(cu_seqlens)
-        max_seqlen_window = self.compute_attn_mask_seqlen(cu_window_seqlens)
+        # Pad cu_seqlens to the required number of sequences.
+        # For videos each item contributes T frames = T attention sequences,
+        # so the total can exceed max_batch_size. max_frames_per_batch
+        # overrides the pad target when set.
+        pad_to = (
+            max_frames_per_batch if max_frames_per_batch is not None else max_batch_size
+        )
+        if pad_to is not None:
+            num_seqs = len(cu_seqlens) - 1
+            if num_seqs < pad_to:
+                cu_seqlens = torch.cat(
+                    (
+                        cu_seqlens,
+                        torch.full(
+                            (pad_to - num_seqs,),
+                            cu_seqlens[-1],
+                            dtype=cu_seqlens.dtype,
+                            device=cu_seqlens.device,
+                        ),
+                    )
+                )
+
+        # Pad cu_window_seqlens to a stable number of window sequences.
+        # Like cu_seqlens, we repeat the last cumulative offset so padded
+        # entries represent empty sequences.
+        if max_window_seqs_per_batch is not None:
+            num_window_seqs = len(cu_window_seqlens) - 1
+            if num_window_seqs < max_window_seqs_per_batch:
+                cu_window_seqlens = torch.cat(
+                    (
+                        cu_window_seqlens,
+                        torch.full(
+                            (max_window_seqs_per_batch - num_window_seqs,),
+                            cu_window_seqlens[-1],
+                            dtype=cu_window_seqlens.dtype,
+                            device=cu_window_seqlens.device,
+                        ),
+                    )
+                )
+
+        cu_seqlens_np = cu_seqlens.cpu().numpy()
+        cu_window_seqlens_np = cu_window_seqlens.cpu().numpy()
 
-        cu_seqlens = cu_seqlens.to(device=self.device, non_blocking=True)
-        cu_window_seqlens = cu_window_seqlens.to(device=self.device, non_blocking=True)
-        rotary_pos_emb_cos = rotary_pos_emb_cos.to(
-            device=self.device, non_blocking=True
+        # FlashInfer needs the real per-sequence lengths in addition to
+        # cu_seqlens. For other backends this returns None and is ignored.
+        sequence_lengths_full = MMEncoderAttention.maybe_compute_seq_lens(
+            self.attn_backend, cu_seqlens_np, device
         )
-        rotary_pos_emb_sin = rotary_pos_emb_sin.to(
-            device=self.device, non_blocking=True
+        sequence_lengths_window = MMEncoderAttention.maybe_compute_seq_lens(
+            self.attn_backend, cu_window_seqlens_np, device
         )
-        window_index = window_index.to(device=hidden_states.device, non_blocking=True)
-        reverse_indices = reverse_indices.to(
-            device=hidden_states.device, non_blocking=True
+
+        # Pre-compute max sequence lengths for window/full attention. FlashInfer
+        # buckets this value for cuDNN graph reuse; other backends keep the exact
+        # maximum. Keep the scalar on CPU because attention wrappers call .item().
+        if max_seqlen_override is None:
+            max_seqlen_full_val = MMEncoderAttention.compute_max_seqlen(
+                self.attn_backend, cu_seqlens_np
+            )
+        else:
+            max_seqlen_full_val = max_seqlen_override
+        max_seqlen_full = torch.tensor(max_seqlen_full_val, dtype=torch.int32)
+        if max_seqlen_window_override is None:
+            max_seqlen_window_val = MMEncoderAttention.compute_max_seqlen(
+                self.attn_backend, cu_window_seqlens_np
+            )
+        else:
+            max_seqlen_window_val = max_seqlen_window_override
+        max_seqlen_window = torch.tensor(max_seqlen_window_val, dtype=torch.int32)
+
+        # FlashInfer uses backend-specific cu_seqlens offsets into the flattened
+        # Q/K/O and V buffers. Other backends receive the original cumulative
+        # token offsets unchanged.
+        cu_seqlens = MMEncoderAttention.maybe_recompute_cu_seqlens(
+            self.attn_backend,
+            cu_seqlens_np,
+            self.hidden_size,
+            self.tp_size,
+            device,
+            fp8_padded_hidden_size=self.fp8_padded_hidden_size,
         )
+        cu_window_seqlens = MMEncoderAttention.maybe_recompute_cu_seqlens(
+            self.attn_backend,
+            cu_window_seqlens_np,
+            self.hidden_size,
+            self.tp_size,
+            device,
+            fp8_padded_hidden_size=self.fp8_padded_hidden_size,
+        )
+        rotary_pos_emb_cos = rotary_pos_emb_cos.to(device=device, non_blocking=True)
+        rotary_pos_emb_sin = rotary_pos_emb_sin.to(device=device, non_blocking=True)
+        window_index = window_index.to(device=device, non_blocking=True)
+        reverse_indices = reverse_indices.to(device=device, non_blocking=True)
+
+        metadata["rotary_pos_emb_cos"] = rotary_pos_emb_cos
+        metadata["rotary_pos_emb_sin"] = rotary_pos_emb_sin
+        metadata["window_index"] = window_index
+        metadata["reverse_indices"] = reverse_indices
+        metadata["cu_seqlens"] = cu_seqlens
+        metadata["cu_window_seqlens"] = cu_window_seqlens
+        metadata["max_seqlen_full"] = max_seqlen_full
+        metadata["max_seqlen_window"] = max_seqlen_window
+        metadata["sequence_lengths_full"] = sequence_lengths_full
+        metadata["sequence_lengths_window"] = sequence_lengths_window
+
+        return metadata
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        grid_thw: list[list[int]],
+        *,
+        encoder_metadata: dict[str, torch.Tensor] | None = None,
+    ) -> torch.Tensor:
+        hidden_states = x.to(device=self.device, dtype=self.dtype)
+        hidden_states = self.patch_embed(hidden_states)
+
+        seq_len = hidden_states.shape[0]
+        if encoder_metadata is None:
+            encoder_metadata = self.prepare_encoder_metadata(grid_thw)
+
+        rotary_pos_emb_cos = encoder_metadata["rotary_pos_emb_cos"]
+        rotary_pos_emb_sin = encoder_metadata["rotary_pos_emb_sin"]
+        window_index = encoder_metadata["window_index"]
+        reverse_indices = encoder_metadata["reverse_indices"]
+        cu_seqlens = encoder_metadata["cu_seqlens"]
+        cu_window_seqlens = encoder_metadata["cu_window_seqlens"]
+        max_seqlen_full = encoder_metadata["max_seqlen_full"]
+        max_seqlen_window = encoder_metadata["max_seqlen_window"]
+        sequence_lengths_full = encoder_metadata.get("sequence_lengths_full")
+        sequence_lengths_window = encoder_metadata.get("sequence_lengths_window")
 
         hidden_states = hidden_states.reshape(
             seq_len // self.spatial_merge_unit, self.spatial_merge_unit, -1
@@ -855,9 +1022,11 @@ def forward(
             if layer_num in self.fullatt_block_indexes:
                 cu_seqlens_now = cu_seqlens
                 max_seqlen_now = max_seqlen_full
+                sequence_lengths_now = sequence_lengths_full
             else:
                 cu_seqlens_now = cu_window_seqlens
                 max_seqlen_now = max_seqlen_window
+                sequence_lengths_now = sequence_lengths_window
 
             hidden_states = blk(
                 hidden_states,
@@ -865,6 +1034,7 @@ def forward(
                 rotary_pos_emb_cos=rotary_pos_emb_cos,
                 rotary_pos_emb_sin=rotary_pos_emb_sin,
                 max_seqlen=max_seqlen_now,
+                sequence_lengths=sequence_lengths_now,
             )
 
         # For Qwen2.5-VL-3B, float16 will overflow at last block
@@ -926,7 +1096,7 @@ def _get_mm_fields_config(
     ) -> Mapping[str, MultiModalFieldConfig]:
         return dict(
             **super()._get_mm_fields_config(hf_inputs, hf_processor_mm_kwargs),
-            second_per_grid_ts=MultiModalFieldConfig.batched("video"),
+            second_per_grid_ts=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
         )
 
     def _call_hf_processor(
@@ -1003,6 +1173,7 @@ def get_replacement_qwen2vl(item_idx: int, modality: str):
 class Qwen2_5_VLForConditionalGeneration(
     nn.Module,
     SupportsMultiModal,
+    SupportsEncoderCudaGraph,
     SupportsLoRA,
     SupportsPP,
     SupportsQuant,
@@ -1124,6 +1295,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
         self.config = config
+        self.model_config = vllm_config.model_config
         self.vllm_config = vllm_config
         self.multimodal_config = multimodal_config
         self.video_pruning_rate = multimodal_config.video_pruning_rate
@@ -1249,7 +1421,9 @@ def _postprocess_image_embeds_evs(
         grid_thw_list = grid_thw.tolist()
         image_embeds_out = []
         for emb, size in zip(image_embeds_split, grid_thw_list):
-            positions = compute_mrope_for_media(size, merge_size).to(emb.device)
+            positions = compute_mrope_for_media(size, merge_size).to(
+                emb.device, non_blocking=True
+            )
             emb = torch.cat([emb, positions], dim=1)
             image_embeds_out.append(emb)
         image_embeds_split = image_embeds_out
@@ -1331,7 +1505,7 @@ def _postprocess_video_embeds_evs(
                 merge_size,
                 tokens_per_second=tokens_per_second,
                 video_second_per_grid=video_second_per_grid_t.item(),
-            ).to(emb.device)
+            ).to(emb.device, non_blocking=True)
 
             emb = emb[retention_mask]
             positions = positions[retention_mask]
@@ -1376,8 +1550,8 @@ def recompute_mrope_positions(
             else mrope_positions.device
         )
 
-        # Tensors
-        input_ids_t = torch.as_tensor(input_ids, device=device, dtype=torch.long)
+        # Tensors.
+        input_ids_t = async_tensor_h2d(input_ids, dtype=torch.long, device=device)
 
         mm_embeddings_out = [mm[:, :-4] for mm in multimodal_embeddings]
         mm_embeddings_pos = [
@@ -1447,6 +1621,309 @@ def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
                 multimodal_embeddings += tuple(video_embeddings)
         return multimodal_embeddings
 
+    # -- SupportsEncoderCudaGraph protocol methods --
+
+    def get_encoder_cudagraph_config(self):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphConfig,
+        )
+
+        # NOTE: With EVS pruning enabled, multimodal embeddings are post-processed
+        # (append positions for image and prune+append positions for video) in
+        # embed_multimodal(). The encoder CUDA graph path bypasses that postprocess
+        # hook, so disable CUDA graph for all modalities to avoid inconsistent
+        # embedding formats between eager and cudagraph paths.
+        modalities = [] if self.is_multimodal_pruning_enabled else ["image", "video"]
+
+        max_frames = self.get_max_frames_per_video() if "video" in modalities else 1
+        return EncoderCudaGraphConfig(
+            modalities=modalities,
+            input_key_by_modality={
+                "image": "pixel_values",
+                "video": "pixel_values_videos",
+            },
+            buffer_keys=[
+                "rotary_pos_emb_cos",
+                "rotary_pos_emb_sin",
+                "window_index",
+                "reverse_indices",
+                "cu_seqlens",
+                "cu_window_seqlens",
+                "max_seqlen_full",
+                "max_seqlen_window",
+                "sequence_lengths_full",
+                "sequence_lengths_window",
+            ],
+            out_hidden_size=self.visual.out_hidden_size,
+            max_frames_per_video=max_frames,
+        )
+
+    def get_input_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> str:
+        if "image_grid_thw" in mm_kwargs:
+            return "image"
+        elif "video_grid_thw" in mm_kwargs:
+            return "video"
+        raise AssertionError("This line should be unreachable.")
+
+    def get_max_frames_per_video(self) -> int:
+        mm_registry = MULTIMODAL_REGISTRY
+        info = mm_registry.get_processing_info(self.model_config)
+        max_frames_per_video = info.get_num_frames_with_most_features(
+            seq_len=self.model_config.max_model_len,
+            mm_counts={"video": self.multimodal_config.get_limit_per_prompt("video")},
+        )
+        return max_frames_per_video
+
+    def get_encoder_cudagraph_budget_range(
+        self,
+        vllm_config: VllmConfig,
+    ) -> tuple[int, int]:
+        # Min: estimated smallest possible encoder input.
+        # 224x224 image → 16x16 patches (patch_size=14)
+        #                 spatial_merge_size=2 → 8x8 = 64 tokens
+        min_budget = 64
+        # Max: capped by max_num_batched_tokens
+        max_budget = min(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            self.model_config.max_model_len,
+        )
+        return (min_budget, max_budget)
+
+    def _get_pixel_values_by_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> torch.Tensor:
+        if self.get_input_modality(mm_kwargs) == "image":
+            pixel_values = mm_kwargs["pixel_values"]
+        else:
+            pixel_values = mm_kwargs["pixel_values_videos"]
+        return pixel_values
+
+    def _get_grid_thw_by_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> list[tuple[int, int, int]]:
+        grid_thw_key = f"{self.get_input_modality(mm_kwargs)}_grid_thw"
+        grid_thw = mm_kwargs[grid_thw_key]
+        if not isinstance(grid_thw, list):
+            grid_thw = grid_thw.tolist()
+        return grid_thw
+
+    def get_encoder_cudagraph_item_specs(
+        self,
+        mm_kwargs: dict[str, Any],
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import EncoderItemSpec
+
+        m = self.visual.spatial_merge_size
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return [
+            EncoderItemSpec(
+                input_size=t * h * w,
+                output_tokens=t * (h // m) * (w // m),
+            )
+            for t, h, w in grid_thw
+        ]
+
+    def select_encoder_cudagraph_items(
+        self,
+        mm_kwargs: dict[str, Any],
+        indices: list[int],
+    ) -> dict[str, Any]:
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+
+        if len(indices) == 0:
+            if self.get_input_modality(mm_kwargs) == "image":
+                return {
+                    "pixel_values": pixel_values[:0],
+                    "image_grid_thw": [],
+                }
+            elif self.get_input_modality(mm_kwargs) == "video":
+                return {
+                    "pixel_values_videos": pixel_values[:0],
+                    "video_grid_thw": [],
+                }
+            else:
+                raise AssertionError("This line should be unreachable.")
+
+        # Compute cumulative patch offsets for slicing pixel_values
+        patches_per_item = [t * h * w for t, h, w in grid_thw]
+        cum_patches = [0]
+        for p in patches_per_item:
+            cum_patches.append(cum_patches[-1] + p)
+
+        selected_pv = torch.cat(
+            [pixel_values[cum_patches[i] : cum_patches[i + 1]] for i in indices]
+        )
+        selected_grid = [grid_thw[i] for i in indices]
+
+        if self.get_input_modality(mm_kwargs) == "image":
+            return {
+                "pixel_values": selected_pv,
+                "image_grid_thw": selected_grid,
+            }
+        elif self.get_input_modality(mm_kwargs) == "video":
+            return {
+                "pixel_values_videos": selected_pv,
+                "video_grid_thw": selected_grid,
+            }
+        else:
+            raise AssertionError("This line should be unreachable.")
+
+    def prepare_encoder_cudagraph_capture_inputs(
+        self,
+        token_budget: int,
+        max_batch_size: int,
+        max_frames_per_batch: int,
+        device: torch.device,
+        dtype: torch.dtype,
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphCaptureInputs,
+        )
+
+        spatial_merge_size = self.visual.spatial_merge_size
+        max_window_seqs_per_batch = min(
+            self.vllm_config.scheduler_config.max_num_batched_tokens,
+            self.model_config.max_model_len,
+        )
+        # Use ceil here (not floor) so total captured capacity is never smaller
+        # than token_budget when token_budget is not divisible by max_batch_size
+        # (e.g., 324 budget with max_batch_size=8). Floor under-allocates
+        # input_buffer and can fail replay copy for valid single-item batches.
+        per_mm_item_output = (token_budget + max_batch_size - 1) // max_batch_size
+
+        frames_per_item = max_frames_per_batch // max_batch_size
+        if frames_per_item > 1:
+            # Build the capture grid using a video-format layout so that
+            # cu_seqlens is sized for video replays from the start.
+            # cu_seqlens has one entry per attention sequence (one per frame),
+            # so using T > 1 per item makes the buffer large enough without
+            # relying solely on padding.
+            # Ceiling ensures frames_per_item * tokens_per_frame >= per_mm_item_output
+            # so the pixel_values buffer covers any valid single-item replay.
+            tokens_per_frame = (
+                per_mm_item_output + frames_per_item - 1
+            ) // frames_per_item
+            # Video-format grid_config (T=frames_per_item).
+            grid_config = [
+                [
+                    frames_per_item,
+                    spatial_merge_size,
+                    tokens_per_frame * spatial_merge_size,
+                ]
+                for _ in range(max_batch_size)
+            ]
+        else:
+            # Image-format grid_config (T=1).
+            grid_config = [
+                [1, spatial_merge_size, per_mm_item_output * spatial_merge_size]
+                for _ in range(max_batch_size)
+            ]
+
+        # Create dummy pixel_values
+        patch_embed = self.visual.patch_embed
+        in_channels = patch_embed.proj.in_channels
+        patch_size = patch_embed.patch_size
+        temporal_patch_size = patch_embed.temporal_patch_size
+        total_patches = sum(t * h * w for t, h, w in grid_config)
+        flattened_patch_size = (
+            in_channels * temporal_patch_size * patch_size * patch_size
+        )
+        dummy_pixel_values = torch.randn(
+            total_patches, flattened_patch_size, device=device, dtype=dtype
+        )
+
+        # Override max_seqlen with a safe upper bound for capture.
+        # max_seqlen.item() gets baked into the CUDA graph (not replayed),
+        # so the capture value must cover any replay scenario.
+        # Worst case: 1 item consuming the full budget ->
+        # seq_len = token_budget * spatial_merge_size^2.
+        # For window-attention, each local window is bounded by fixed geometry:
+        # (window_size / patch_size / spatial_merge_size)^2 windows in merged
+        # token space, multiplied by spatial_merge_size^2 to map back to the
+        # unmerged sequence length used by attention kernels.
+        vit_merger_window_size = (
+            self.visual.window_size
+            // self.visual.spatial_merge_size
+            // self.visual.patch_size
+        )
+        max_seqlen_window_override = vit_merger_window_size**2 * (spatial_merge_size**2)
+        buffers = self.visual.prepare_encoder_metadata(
+            grid_config,
+            max_batch_size=max_batch_size,
+            max_frames_per_batch=max_frames_per_batch,
+            max_window_seqs_per_batch=max_window_seqs_per_batch,
+            max_seqlen_override=token_budget * (spatial_merge_size**2),
+            max_seqlen_window_override=max_seqlen_window_override,
+            device=device,
+        )
+
+        # Just use image-modality dummy input_buffer for capturing, since it's also
+        # compatible for video inputs (has the same shape: [num_patches, C*T*P*P]).
+        mm_kwargs = {
+            "pixel_values": dummy_pixel_values,
+            "image_grid_thw": grid_config,
+        }
+
+        return EncoderCudaGraphCaptureInputs(
+            mm_kwargs=mm_kwargs,
+            buffers=buffers,
+        )
+
+    def prepare_encoder_cudagraph_replay_buffers(
+        self,
+        mm_kwargs: dict[str, Any],
+        max_batch_size: int,
+        max_frames_per_batch: int,
+    ):
+        modality = self.get_input_modality(mm_kwargs)
+        grid_thw_list = self._get_grid_thw_by_modality(mm_kwargs)
+
+        if modality == "image":
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_batch_size=max_batch_size,
+                max_window_seqs_per_batch=min(
+                    self.vllm_config.scheduler_config.max_num_batched_tokens,
+                    self.model_config.max_model_len,
+                ),
+            )
+        elif modality == "video":
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_frames_per_batch=max_frames_per_batch,
+                max_window_seqs_per_batch=min(
+                    self.vllm_config.scheduler_config.max_num_batched_tokens,
+                    self.model_config.max_model_len,
+                ),
+            )
+        else:
+            raise AssertionError("This line should be unreachable.")
+
+        return EncoderCudaGraphReplayBuffers(buffers=buffers)
+
+    def encoder_cudagraph_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+        buffers: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return self.visual(pixel_values, grid_thw, encoder_metadata=buffers)
+
+    def encoder_eager_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> torch.Tensor:
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return self.visual(pixel_values, grid_thw)
+
     def forward(
         self,
         input_ids: torch.Tensor | None,
diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py
index e7e8d74714cd..9692d9615b51 100644
--- a/vllm/model_executor/models/qwen2_audio.py
+++ b/vllm/model_executor/models/qwen2_audio.py
@@ -439,9 +439,9 @@ def _process_audio_input(
         num_audios, max_audio_tokens, embed_dim = audio_features.shape
         audio_output_lengths = audio_output_lengths.unsqueeze(1)
         audio_features_mask = (
-            torch.arange(max_audio_tokens)
-            .expand(num_audios, max_audio_tokens)
-            .to(audio_output_lengths.device)
+            torch.arange(max_audio_tokens, device=audio_output_lengths.device).expand(
+                num_audios, max_audio_tokens
+            )
             < audio_output_lengths
         )
         masked_audio_features = audio_features[audio_features_mask].view(-1, embed_dim)
diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py
index 4b0c756165a5..77eea390eda9 100644
--- a/vllm/model_executor/models/qwen2_moe.py
+++ b/vllm/model_executor/models/qwen2_moe.py
@@ -40,7 +40,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -80,6 +83,7 @@ def __init__(
         quant_config: QuantizationConfig | None = None,
         reduce_results: bool = True,
         expert_gate: torch.nn.Linear | None = None,
+        is_sequence_parallel: bool = False,
         prefix: str = "",
     ) -> None:
         super().__init__()
@@ -88,6 +92,7 @@ def __init__(
             [intermediate_size] * 2,
             bias=False,
             quant_config=quant_config,
+            disable_tp=is_sequence_parallel,
             prefix=f"{prefix}.gate_up_proj",
         )
         self.down_proj = RowParallelLinear(
@@ -96,6 +101,7 @@ def __init__(
             bias=False,
             quant_config=quant_config,
             reduce_results=reduce_results,
+            disable_tp=is_sequence_parallel,
             prefix=f"{prefix}.down_proj",
         )
         if hidden_act != "silu":
@@ -161,13 +167,12 @@ def __init__(
         else:
             self.shared_expert = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_expert,
             num_experts=config.num_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -184,12 +189,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-        if self.shared_expert is not None:
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(  # noqa E501
-                final_hidden_states
-            )
 
         return final_hidden_states.view(orig_shape)
 
@@ -422,7 +421,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py
index 176f45781081..869e044c2d4c 100644
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
@@ -89,9 +89,11 @@
 from vllm.tokenizers import TokenizerLike
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
+from vllm.v1.worker.encoder_cudagraph_defs import EncoderCudaGraphReplayBuffers
 
 from .interfaces import (
     MultiModalEmbeddings,
+    SupportsEncoderCudaGraph,
     SupportsLoRA,
     SupportsMRoPE,
     SupportsMultiModal,
@@ -632,6 +634,7 @@ def rot_pos_emb(
         # Use pre-computed cos_sin_cache from RotaryEmbedding
         cos, sin = self.rotary_pos_emb.get_cos_sin(max_grid_size)
 
+        pos_ids = pos_ids.to(cos.device, non_blocking=True)
         cos_combined = cos[pos_ids].flatten(1)
         sin_combined = sin[pos_ids].flatten(1)
         return cos_combined, sin_combined
@@ -646,38 +649,84 @@ def compute_attn_mask_seqlen(self, cu_seqlens: torch.Tensor) -> int | None:
             max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()
         return max_seqlen
 
+    def prepare_encoder_metadata(
+        self,
+        grid_thw: list[list[int]],
+        *,
+        max_batch_size: int | None = None,
+        max_frames_per_batch: int | None = None,
+        max_seqlen_override: int | None = None,
+        device: torch.device | None = None,
+    ) -> dict[str, torch.Tensor]:
+        if device is None:
+            device = self.device
+
+        # Position embeddings.
+        rotary_pos_emb_cos, rotary_pos_emb_sin = self.rot_pos_emb(grid_thw)
+
+        grid_thw_np = np.array(grid_thw, dtype=np.int32)
+        cu_seqlens = np.repeat(
+            grid_thw_np[:, 1] * grid_thw_np[:, 2],
+            grid_thw_np[:, 0],
+        ).cumsum(dtype=np.int32)
+        cu_seqlens = np.concatenate([np.zeros(1, dtype=np.int32), cu_seqlens])
+        cu_seqlens = torch.from_numpy(cu_seqlens)
+
+        # Keep cu_seqlens shape stable across capture/replay.
+        pad_to = (
+            max_frames_per_batch if max_frames_per_batch is not None else max_batch_size
+        )
+        if pad_to is not None:
+            num_seqs = len(cu_seqlens) - 1
+            if num_seqs < pad_to:
+                cu_seqlens = torch.cat(
+                    (
+                        cu_seqlens,
+                        torch.full(
+                            (pad_to - num_seqs,),
+                            cu_seqlens[-1],
+                            dtype=cu_seqlens.dtype,
+                            device=cu_seqlens.device,
+                        ),
+                    )
+                )
+
+        # Compute (or override) max seqlen used by FA-style backends.
+        if max_seqlen_override is None:
+            max_seqlen = self.compute_attn_mask_seqlen(cu_seqlens)
+        else:
+            max_seqlen = torch.tensor(max_seqlen_override, dtype=torch.int32)
+
+        return {
+            "rotary_pos_emb_cos": rotary_pos_emb_cos,
+            "rotary_pos_emb_sin": rotary_pos_emb_sin,
+            "cu_seqlens": cu_seqlens.to(device=device, non_blocking=True),
+            "max_seqlen": max_seqlen,
+        }
+
     def forward(
         self,
         x: torch.Tensor,
         grid_thw: torch.Tensor | list[list[int]],
+        *,
+        encoder_metadata: dict[str, torch.Tensor] | None = None,
     ) -> torch.Tensor:
         # patchify
         x = x.to(device=self.device, dtype=self.dtype)
         x = self.patch_embed(x)
 
-        if isinstance(grid_thw, list):
-            grid_thw_list = grid_thw
-            grid_thw = np.array(grid_thw, dtype=np.int32)
-        else:
-            grid_thw_list = grid_thw.tolist()
-            grid_thw = grid_thw.numpy()
+        grid_thw_list = grid_thw if isinstance(grid_thw, list) else grid_thw.tolist()
 
-        # compute position embedding
-        rotary_pos_emb_cos, rotary_pos_emb_sin = self.rot_pos_emb(grid_thw_list)
+        if encoder_metadata is None:
+            encoder_metadata = self.prepare_encoder_metadata(grid_thw_list)
 
-        # compute cu_seqlens
-        cu_seqlens = np.repeat(grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]).cumsum(
-            axis=0, dtype=np.int32
-        )
-        cu_seqlens = np.concatenate([np.zeros(1, dtype=np.int32), cu_seqlens])
-        cu_seqlens = torch.from_numpy(cu_seqlens)
+        rotary_pos_emb_cos = encoder_metadata["rotary_pos_emb_cos"]
+        rotary_pos_emb_sin = encoder_metadata["rotary_pos_emb_sin"]
+        cu_seqlens = encoder_metadata["cu_seqlens"]
+        max_seqlen = encoder_metadata["max_seqlen"]
 
         # transformers
         x = x.unsqueeze(1)
-
-        # pre-compute seqlens for attn mask to reduce cuMemcpy operations
-        max_seqlen = self.compute_attn_mask_seqlen(cu_seqlens)
-        cu_seqlens = cu_seqlens.to(self.device, non_blocking=True)
         for blk in self.blocks:
             x = blk(
                 x,
@@ -1127,7 +1176,12 @@ def _get_mm_fields_config(
     dummy_inputs=Qwen2VLDummyInputsBuilder,
 )
 class Qwen2VLForConditionalGeneration(
-    nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
+    nn.Module,
+    SupportsMultiModal,
+    SupportsLoRA,
+    SupportsPP,
+    SupportsMRoPE,
+    SupportsEncoderCudaGraph,
 ):
     # To ensure correct weight loading and mapping.
     hf_to_vllm_mapper = WeightsMapper(
@@ -1234,7 +1288,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         config: Qwen2VLConfig = vllm_config.model_config.hf_config
         quant_config = vllm_config.quant_config
         multimodal_config = vllm_config.model_config.multimodal_config
-
+        self.model_config = vllm_config.model_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
         self.config = config
         self.multimodal_config = multimodal_config
@@ -1396,6 +1450,235 @@ def embed_multimodal(self, **kwargs: object) -> MultiModalEmbeddings:
 
         return multimodal_embeddings
 
+    # -- SupportsEncoderCudaGraph protocol methods --
+
+    def get_encoder_cudagraph_config(self):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphConfig,
+        )
+
+        max_frames = self.get_max_frames_per_video()
+        return EncoderCudaGraphConfig(
+            modalities=["image", "video"],
+            input_key_by_modality={
+                "image": "pixel_values",
+                "video": "pixel_values_videos",
+            },
+            buffer_keys=[
+                "rotary_pos_emb_cos",
+                "rotary_pos_emb_sin",
+                "cu_seqlens",
+                "max_seqlen",
+            ],
+            out_hidden_size=self.visual.out_hidden_size,
+            max_frames_per_video=max_frames,
+        )
+
+    def get_input_modality(self, mm_kwargs: dict[str, Any]) -> str:
+        if "image_grid_thw" in mm_kwargs:
+            return "image"
+        return "video"
+
+    def get_max_frames_per_video(self) -> int:
+        mm_registry = MULTIMODAL_REGISTRY
+        info = mm_registry.get_processing_info(self.model_config)
+        max_frames_per_video = info.get_num_frames_with_most_features(
+            seq_len=self.model_config.max_model_len,
+            mm_counts={"video": self.multimodal_config.get_limit_per_prompt("video")},
+        )
+        return max_frames_per_video
+
+    def get_encoder_cudagraph_budget_range(
+        self,
+        vllm_config: VllmConfig,
+    ) -> tuple[int, int]:
+        # Min: estimated smallest possible encoder input.
+        # 224x224 image -> 16x16 patches (patch_size=14)
+        #                spatial_merge_size=2 -> 8x8 = 64 tokens
+        min_budget = 64
+        # Max: capped by max_num_batched_tokens
+        max_budget = min(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            self.model_config.max_model_len,
+        )
+        return (min_budget, max_budget)
+
+    def _get_pixel_values_by_modality(self, mm_kwargs: dict[str, Any]) -> torch.Tensor:
+        if self.get_input_modality(mm_kwargs) == "image":
+            pixel_values = mm_kwargs["pixel_values"]
+        else:
+            pixel_values = mm_kwargs["pixel_values_videos"]
+        return pixel_values
+
+    def _get_grid_thw_by_modality(self, mm_kwargs: dict[str, Any]) -> list[list[int]]:
+        grid_thw_key = f"{self.get_input_modality(mm_kwargs)}_grid_thw"
+        grid_thw = mm_kwargs[grid_thw_key]
+        if not isinstance(grid_thw, list):
+            grid_thw = grid_thw.tolist()
+        return grid_thw
+
+    def get_encoder_cudagraph_item_specs(
+        self,
+        mm_kwargs: dict[str, Any],
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import EncoderItemSpec
+
+        m = self.visual.spatial_merge_size
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return [
+            EncoderItemSpec(
+                input_size=t * h * w,
+                output_tokens=t * (h // m) * (w // m),
+            )
+            for t, h, w in grid_thw
+        ]
+
+    def select_encoder_cudagraph_items(
+        self, mm_kwargs: dict[str, Any], indices: list[int]
+    ) -> dict[str, Any]:
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+
+        if len(indices) == 0:
+            if self.get_input_modality(mm_kwargs) == "image":
+                return {
+                    "pixel_values": pixel_values[:0],
+                    "image_grid_thw": [],
+                }
+            else:
+                return {
+                    "pixel_values_videos": pixel_values[:0],
+                    "video_grid_thw": [],
+                }
+
+        # Compute cumulative patch offsets for slicing pixel_values.
+        patches_per_item = [t * h * w for t, h, w in grid_thw]
+        cum_patches = [0]
+        for p in patches_per_item:
+            cum_patches.append(cum_patches[-1] + p)
+
+        selected_pv = torch.cat(
+            [pixel_values[cum_patches[i] : cum_patches[i + 1]] for i in indices]
+        )
+        selected_grid = [grid_thw[i] for i in indices]
+
+        if self.get_input_modality(mm_kwargs) == "image":
+            return {
+                "pixel_values": selected_pv,
+                "image_grid_thw": selected_grid,
+            }
+        else:
+            return {
+                "pixel_values_videos": selected_pv,
+                "video_grid_thw": selected_grid,
+            }
+
+    def prepare_encoder_cudagraph_capture_inputs(
+        self,
+        token_budget: int,
+        max_batch_size: int,
+        max_frames_per_batch: int,
+        device: torch.device,
+        dtype: torch.dtype,
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphCaptureInputs,
+        )
+
+        spatial_merge_size = self.visual.spatial_merge_size
+        # Use ceil so captured capacity is never smaller than token_budget.
+        per_mm_item_output = (token_budget + max_batch_size - 1) // max_batch_size
+
+        frames_per_item = max_frames_per_batch // max_batch_size
+        if frames_per_item > 1:
+            tokens_per_frame = (
+                per_mm_item_output + frames_per_item - 1
+            ) // frames_per_item
+            grid_config = [
+                [
+                    frames_per_item,
+                    spatial_merge_size,
+                    tokens_per_frame * spatial_merge_size,
+                ]
+                for _ in range(max_batch_size)
+            ]
+        else:
+            grid_config = [
+                [1, spatial_merge_size, per_mm_item_output * spatial_merge_size]
+                for _ in range(max_batch_size)
+            ]
+
+        # Create dummy pixel_values.
+        patch_embed = self.visual.patch_embed
+        in_channels = patch_embed.proj.in_channels
+        patch_size = patch_embed.patch_size
+        temporal_patch_size = patch_embed.temporal_patch_size
+        total_patches = sum(t * h * w for t, h, w in grid_config)
+        flattened_patch_size = (
+            in_channels * temporal_patch_size * patch_size * patch_size
+        )
+        dummy_pixel_values = torch.randn(
+            total_patches, flattened_patch_size, device=device, dtype=dtype
+        )
+
+        # max_seqlen.item() gets baked into the CUDA graph at capture time.
+        buffers = self.visual.prepare_encoder_metadata(
+            grid_config,
+            max_batch_size=max_batch_size,
+            max_frames_per_batch=max_frames_per_batch,
+            max_seqlen_override=token_budget * (spatial_merge_size**2),
+            device=device,
+        )
+
+        # Capture with image-format kwargs; pixel_values shape is compatible with
+        # both image and video replay paths.
+        mm_kwargs = {
+            "pixel_values": dummy_pixel_values,
+            "image_grid_thw": grid_config,
+        }
+
+        return EncoderCudaGraphCaptureInputs(
+            mm_kwargs=mm_kwargs,
+            buffers=buffers,
+        )
+
+    def prepare_encoder_cudagraph_replay_buffers(
+        self,
+        mm_kwargs: dict[str, Any],
+        max_batch_size: int,
+        max_frames_per_batch: int,
+    ) -> EncoderCudaGraphReplayBuffers:
+        modality = self.get_input_modality(mm_kwargs)
+        grid_thw_list = self._get_grid_thw_by_modality(mm_kwargs)
+
+        if modality == "image":
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_batch_size=max_batch_size,
+            )
+        else:
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_frames_per_batch=max_frames_per_batch,
+            )
+
+        return EncoderCudaGraphReplayBuffers(buffers=buffers)
+
+    def encoder_cudagraph_forward(
+        self, mm_kwargs: dict[str, Any], buffers: dict[str, torch.Tensor]
+    ) -> torch.Tensor:
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return self.visual(pixel_values, grid_thw, encoder_metadata=buffers)
+
+    def encoder_eager_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> torch.Tensor:
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return self.visual(pixel_values, grid_thw)
+
     def forward(
         self,
         input_ids: torch.Tensor | None,
diff --git a/vllm/model_executor/models/qwen3.py b/vllm/model_executor/models/qwen3.py
index 91931f9f424f..6dec60232b1d 100644
--- a/vllm/model_executor/models/qwen3.py
+++ b/vllm/model_executor/models/qwen3.py
@@ -285,6 +285,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
         self.config = config
 
+        self.vllm_config = vllm_config
         self.quant_config = quant_config
         self.model = Qwen3Model(
             vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
diff --git a/vllm/model_executor/models/qwen3_5.py b/vllm/model_executor/models/qwen3_5.py
index 4cbe9b88f621..e698fe7a5fd8 100644
--- a/vllm/model_executor/models/qwen3_5.py
+++ b/vllm/model_executor/models/qwen3_5.py
@@ -40,7 +40,9 @@
     GemmaRMSNorm as Qwen3_5RMSNorm,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.layers.mamba.gdn_linear_attn import GatedDeltaNetAttention
+from vllm.model_executor.layers.mamba.gdn.qwen_gdn_linear_attn import (
+    QwenGatedDeltaNetAttention,
+)
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateCopyFunc,
     MambaStateCopyFuncCalculator,
@@ -133,12 +135,11 @@ def __init__(
         self.layer_idx = extract_layer_index(prefix)
 
         if self.layer_type == "linear_attention":
-            self.linear_attn = GatedDeltaNetAttention(
+            self.linear_attn = QwenGatedDeltaNetAttention(
                 config=config,
                 vllm_config=vllm_config,
                 prefix=f"{prefix}.linear_attn",
                 gqa_interleaved_layout=False,
-                create_in_proj_qkvz=vllm_config.lora_config is None,
             )
         elif self.layer_type == "full_attention":
             self.self_attn = Qwen3NextAttention(
@@ -217,7 +218,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.num_redundant_experts = eplb_config.num_redundant_experts
 
         self.config = config
-        self.enable_lora = vllm_config.lora_config is not None
 
         self.vocab_size = config.vocab_size
 
@@ -264,8 +264,8 @@ def load_fused_expert_weights(
                 param,
                 curr_expert_weight,
                 name,
-                shard_id,
-                expert_id,
+                shard_id=shard_id,
+                expert_id=expert_id,
                 return_success=True,
             )
             if success:
@@ -276,6 +276,9 @@ def load_fused_expert_weights(
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
+            # GDN
+            ("in_proj_qkvz", "in_proj_qkv", (0, 1, 2)),
+            ("in_proj_qkvz", "in_proj_z", 3),
             # self attention
             ("qkv_proj", "q_proj", "q"),
             ("qkv_proj", "k_proj", "k"),
@@ -287,28 +290,16 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("in_proj_ba", "in_proj_a", 1),
         ]
 
-        if self.enable_lora:
-            stacked_params_mapping.extend(
-                [
-                    ("in_proj_qkv", "in_proj_qkv", (0, 1, 2)),
-                    ("in_proj_z", "in_proj_z", 0),
-                ]
-            )
-        else:
-            stacked_params_mapping.extend(
-                [
-                    ("in_proj_qkvz", "in_proj_qkv", (0, 1, 2)),
-                    ("in_proj_qkvz", "in_proj_z", 3),
-                ]
-            )
-
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
         expert_params_mapping = self.get_expert_mapping()
         is_fused_expert = False
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
         fused_expert_params_mapping = [
-            ("experts.w13_weight", "experts.gate_up_proj", 0, "w1"),
-            ("experts.w2_weight", "experts.down_proj", 0, "w2"),
+            (f"experts.{base_layer}w13_weight", "experts.gate_up_proj", 0, "w1"),
+            (f"experts.{base_layer}w2_weight", "experts.down_proj", 0, "w2"),
         ]
         num_experts = (
             self.config.num_experts if hasattr(self.config, "num_experts") else 0
@@ -349,10 +340,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                     continue
                 param = params_dict[name]
                 weight_loader = param.weight_loader
-                if param_name == "in_proj_z" and self.enable_lora:
-                    weight_loader(param, loaded_weight)
-                else:
-                    weight_loader(param, loaded_weight, shard_id)
+                weight_loader(param, loaded_weight, shard_id)
                 break
             else:
                 is_expert_weight = False
@@ -482,15 +470,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
         )
 
-        # When LoRA is enabled, GDN uses separate in_proj_qkv and in_proj_z
-        # instead of merged in_proj_qkvz; pack mapping must match.
-        if vllm_config.lora_config:
-            base = getattr(Qwen3_5ForCausalLMBase, "packed_modules_mapping", {})
-            self.packed_modules_mapping = {k: list(v) for k, v in base.items()}
-            self.packed_modules_mapping.pop("in_proj_qkvz", None)
-            self.packed_modules_mapping["in_proj_qkv"] = ["in_proj_qkv"]
-            self.packed_modules_mapping["in_proj_z"] = ["in_proj_z"]
-
         if get_pp_group().is_last_rank:
             if config.tie_word_embeddings:
                 self.lm_head = self.model.embed_tokens
@@ -583,12 +562,12 @@ class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid)
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
         # protocols have not __init__ method, so we need to use nn.Module.__init__
         nn.Module.__init__(self)
-        self.update_packed_mapping(enable_lora=vllm_config.lora_config is not None)
         config: Qwen3_5Config = vllm_config.model_config.hf_config
         quant_config = vllm_config.quant_config
         multimodal_config = vllm_config.model_config.multimodal_config
 
         self.config = config
+        self.model_config = vllm_config.model_config
         self.multimodal_config = multimodal_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
         # Qwen3.5 does not support multimodal pruning (EVS).
@@ -611,16 +590,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
             self.language_model.make_empty_intermediate_tensors
         )
 
-    def update_packed_mapping(self, enable_lora: bool):
-        # When LoRA is enabled, GDN uses separate in_proj_qkv and in_proj_z
-        if enable_lora:
-            base = getattr(
-                Qwen3_5ForConditionalGeneration, "packed_modules_mapping", {}
-            )
-            self.packed_modules_mapping = {k: list(v) for k, v in base.items()}
-            self.packed_modules_mapping.pop("in_proj_qkvz", None)
-            self.packed_modules_mapping["in_proj_qkv"] = ["in_proj_qkv"]
-
     def embed_input_ids(
         self,
         input_ids: torch.Tensor,
@@ -807,12 +776,12 @@ class Qwen3_5MoeForConditionalGeneration(
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
         # protocols have not __init__ method, so we need to use nn.Module.__init__
         nn.Module.__init__(self)
-        self.update_packed_mapping(enable_lora=vllm_config.lora_config is not None)
         config: Qwen3_5MoeConfig = vllm_config.model_config.hf_config
         quant_config = vllm_config.quant_config
         multimodal_config = vllm_config.model_config.multimodal_config
 
         self.config = config
+        self.model_config = vllm_config.model_config
         self.multimodal_config = multimodal_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
         # Qwen3.5 does not support multimodal pruning (EVS).
diff --git a/vllm/model_executor/models/qwen3_5_mtp.py b/vllm/model_executor/models/qwen3_5_mtp.py
index 0eca47492c91..5622065a7ff3 100644
--- a/vllm/model_executor/models/qwen3_5_mtp.py
+++ b/vllm/model_executor/models/qwen3_5_mtp.py
@@ -12,7 +12,9 @@
 from vllm.config import VllmConfig
 from vllm.distributed.parallel_state import get_pp_group
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.linear import ColumnParallelLinear
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -75,13 +77,22 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             config.hidden_size,
         )
 
+        # Workaround: mtp.fc is stored as BF16 in NVFP4 checkpoints but is
+        # missing from hf_quant_config.json exclude_modules. Force unquantized.
+        # Ref: https://github.com/vllm-project/vllm/pull/38650
+        # Ref: https://github.com/NVIDIA/Model-Optimizer/pull/1124
+        fc_quant = (
+            None
+            if (quant_config and quant_config.get_name() == "modelopt_fp4")
+            else quant_config
+        )
         self.fc = ColumnParallelLinear(
             self.config.hidden_size * 2,
             self.config.hidden_size,
             gather_output=True,
             bias=False,
             return_bias=False,
-            quant_config=quant_config,
+            quant_config=fc_quant,
             prefix=f"{prefix}.fc",
         )
 
@@ -164,8 +175,8 @@ def load_fused_expert_weights(
                 param,
                 curr_expert_weight,
                 name,
-                shard_id,
-                expert_id,
+                shard_id=shard_id,
+                expert_id=expert_id,
                 return_success=True,
             )
             if success:
@@ -185,7 +196,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -198,9 +209,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
         is_fused_expert = False
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
         fused_expert_params_mapping = [
-            ("experts.w13_weight", "experts.gate_up_proj", 0, "w1"),
-            ("experts.w2_weight", "experts.down_proj", 0, "w2"),
+            (f"experts.{base_layer}w13_weight", "experts.gate_up_proj", 0, "w1"),
+            (f"experts.{base_layer}w2_weight", "experts.down_proj", 0, "w2"),
         ]
         num_experts = (
             self.config.num_experts if hasattr(self.config, "num_experts") else 0
diff --git a/vllm/model_executor/models/qwen3_asr.py b/vllm/model_executor/models/qwen3_asr.py
index 3015ae031325..950beba77541 100644
--- a/vllm/model_executor/models/qwen3_asr.py
+++ b/vllm/model_executor/models/qwen3_asr.py
@@ -23,9 +23,8 @@
 """Inference-only Qwen3-ASR model."""
 
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Any, Literal
+from typing import Any
 
-import numpy as np
 import torch
 import torch.nn as nn
 from transformers.feature_extraction_utils import BatchFeature
@@ -33,10 +32,12 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import ModalityData, MultiModalDataDict, PromptType, TokensPrompt
 from vllm.logger import init_logger
 from vllm.model_executor.models.interfaces import (
     MultiModalEmbeddings,
+    SupportsLoRA,
     SupportsMRoPE,
     SupportsMultiModal,
     SupportsPP,
@@ -266,7 +267,21 @@ class Qwen3ASRForConditionalGeneration(
     SupportsPP,
     SupportsMRoPE,
     SupportsTranscription,
+    SupportsLoRA,
 ):
+    # LoRA support
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
     supported_languages = ISO639_1_SUPPORTED_LANGS
 
     hf_to_vllm_mapper = WeightsMapper(
@@ -348,8 +363,6 @@ def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict:
     def _process_audio_input(
         self,
         audio_input: Qwen2_5OmniAudioFeatureInputs,
-        audio_hashes: list[str] | None = None,
-        cached_audio_features: torch.Tensor | None = None,
     ) -> torch.Tensor:
         input_features = audio_input["input_features"]
         audio_feature_lengths = audio_input["audio_feature_lengths"]
@@ -513,6 +526,17 @@ def get_mm_mapping(self) -> MultiModelKeys:
             tower_model=["audio_tower."],
         )
 
+    def get_num_mm_encoder_tokens(self, num_audio_tokens: int) -> int:
+        """Return the number of tokens processed by the audio tower encoder.
+
+        Required for LoRA support on the tower module.
+        """
+        # For Qwen3-ASR, the audio tower produces one embedding per audio
+        # placeholder token inserted into the prompt (no additional
+        # merge/downsample step like vision towers). Therefore, the encoder
+        # token budget is identity.
+        return num_audio_tokens
+
     @classmethod
     def get_speech_to_text_config(
         cls, model_config: ModelConfig, task_type: str
@@ -525,17 +549,12 @@ def get_speech_to_text_config(
         )
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
         """Get the generation prompt to be used for transcription requests."""
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        task_type = stt_params.task_type
+        to_language = stt_params.to_language
         tokenizer = cached_tokenizer_from_config(model_config)
         audio_placeholder = cls.get_placeholder_str("audio", 0)
 
diff --git a/vllm/model_executor/models/qwen3_dflash.py b/vllm/model_executor/models/qwen3_dflash.py
new file mode 100644
index 000000000000..231ed646e094
--- /dev/null
+++ b/vllm/model_executor/models/qwen3_dflash.py
@@ -0,0 +1,628 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from transformers import Qwen3Config
+
+from vllm import _custom_ops as ops
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    QKVParallelLinear,
+    ReplicatedLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.multimodal.inputs import NestedTensors
+from vllm.transformers_utils.config import set_default_rope_theta
+from vllm.v1.attention.backend import AttentionType
+
+from .qwen2 import Qwen2MLP as Qwen3MLP
+from .qwen3 import Qwen3ForCausalLM
+from .utils import (
+    AutoWeightsLoader,
+    get_draft_quant_config,
+    maybe_prefix,
+    process_eagle_weight,
+)
+
+logger = init_logger(__name__)
+
+
+class DFlashQwen3Attention(nn.Module):
+    """Attention for DFlash speculative decoding.
+
+    Context KVs are pre-inserted into the KV cache before the forward pass.
+    This layer handles only query tokens via standard attention.
+    Adapted from Qwen3Attention."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        rope_parameters: dict,
+        max_position: int = 4096 * 32,
+        head_dim: int | None = None,
+        rms_norm_eps: float = 1e-06,
+        attention_bias: bool = False,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        attn_type: str = AttentionType.DECODER,
+    ) -> None:
+        super().__init__()
+        self.layer_name = prefix
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = head_dim or hidden_size // self.total_num_heads
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=attention_bias,  # DFlash has o_proj bias when using attention bias
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=max_position,
+            rope_parameters=rope_parameters,
+        )
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=f"{prefix}.attn",
+            attn_type=attn_type,
+        )
+        self.q_norm = RMSNorm(self.head_dim, eps=rms_norm_eps)
+        self.k_norm = RMSNorm(self.head_dim, eps=rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        """DFlash attention assumes that the KV cache is already populated
+        with the context K/V from the target model's hidden states. This forward op
+        computes attention for the query tokens only.
+        See also: precompute_and_store_context_kv"""
+        qkv = F.linear(hidden_states, self.qkv_proj.weight, self.qkv_proj.bias)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+        # Per-head RMSNorm
+        q_shape, k_shape = q.shape, k.shape
+        q = self.q_norm(
+            q.view(*q_shape[:-1], q_shape[-1] // self.head_dim, self.head_dim)
+        ).view(q_shape)
+        k = self.k_norm(
+            k.view(*k_shape[:-1], k_shape[-1] // self.head_dim, self.head_dim)
+        ).view(k_shape)
+
+        q, k = self.rotary_emb(positions, q, k)
+
+        attn_output = self.attn(q, k, v)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class DFlashQwen3DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        *,
+        config: Qwen3Config,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        set_default_rope_theta(config, default_theta=1000000)
+        attn_type = AttentionType.DECODER
+
+        self.self_attn = DFlashQwen3Attention(
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            max_position=config.max_position_embeddings,
+            num_kv_heads=config.num_key_value_heads,
+            rms_norm_eps=config.rms_norm_eps,
+            attention_bias=getattr(config, "attention_bias", False),
+            head_dim=getattr(config, "head_dim", None),
+            cache_config=cache_config,
+            quant_config=quant_config,
+            rope_parameters=config.rope_parameters,
+            prefix=f"{prefix}.self_attn",
+            attn_type=attn_type,
+        )
+        self.mlp = Qwen3MLP(
+            hidden_size=self.hidden_size,
+            intermediate_size=config.intermediate_size,
+            hidden_act=config.hidden_act,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if residual is not None:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+        else:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+        )
+
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+        hidden_states = self.mlp(hidden_states)
+        return hidden_states, residual
+
+
+@support_torch_compile
+class DFlashQwen3Model(nn.Module):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        start_layer_id: int = 0,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        self.vocab_size = self.config.vocab_size
+        self.quant_config = get_draft_quant_config(vllm_config)
+
+        drafter_config = getattr(self.config, "eagle_config", {})
+        drafter_config.update(getattr(self.config, "dflash_config", {}))
+
+        if drafter_config is not None and "use_aux_hidden_state" in drafter_config:
+            self.use_aux_hidden_state = drafter_config["use_aux_hidden_state"]
+        else:
+            self.use_aux_hidden_state = True
+
+        current_vllm_config = get_current_vllm_config()
+
+        self.embed_tokens = VocabParallelEmbedding(
+            self.config.vocab_size,
+            self.config.hidden_size,
+            prefix=maybe_prefix(prefix, "embed_tokens"),
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                DFlashQwen3DecoderLayer(
+                    current_vllm_config,
+                    config=self.config,
+                    cache_config=current_vllm_config.cache_config,
+                    quant_config=self.quant_config,
+                    prefix=maybe_prefix(prefix, f"layers.{layer_idx + start_layer_id}"),
+                )
+                for layer_idx in range(self.config.num_hidden_layers)
+            ]
+        )
+        if self.use_aux_hidden_state:
+            num_features_to_use = self.config.num_hidden_layers
+            if "target_layer_ids" in drafter_config:
+                num_features_to_use = len(drafter_config["target_layer_ids"])
+            elif "layer_ids" in drafter_config:
+                num_features_to_use = len(drafter_config["layer_ids"])
+            if hasattr(self.config, "target_hidden_size"):
+                fc_input_size = self.config.target_hidden_size * num_features_to_use
+            else:
+                fc_input_size = self.config.hidden_size * num_features_to_use
+            self.fc = ReplicatedLinear(
+                input_size=fc_input_size,
+                output_size=self.config.hidden_size,
+                bias=False,
+                params_dtype=vllm_config.model_config.dtype,
+                quant_config=self.quant_config,
+                prefix=maybe_prefix(prefix, "fc"),
+                return_bias=False,
+            )
+        self.hidden_norm = RMSNorm(
+            self.config.hidden_size,
+            eps=self.config.rms_norm_eps,
+        )
+        self.norm = RMSNorm(
+            self.config.hidden_size,
+            eps=self.config.rms_norm_eps,
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def _build_fused_kv_buffers(self) -> None:
+        """Build fused weight buffers for precompute_and_store_context_kv.
+
+        Must be called after weights are loaded. Stacks the KV-projection
+        weights, K-norm weights, and RoPE parameters from every attention
+        layer so that precompute_and_store_context_kv can run one fused
+        GEMM for all layers at once. Also aliases the weight of the hidden_norm.
+        """
+        layers_attn = [layer.self_attn for layer in self.layers]
+        attn0 = layers_attn[0]
+        has_bias = attn0.qkv_proj.bias is not None
+
+        self._hidden_norm_weight = self.hidden_norm.weight.data
+
+        # KV projection weights: [num_layers * 2 * kv_size, hidden_size]
+        kv_weights = [a.qkv_proj.weight[a.q_size :] for a in layers_attn]
+        self._fused_kv_weight = torch.cat(kv_weights, dim=0)
+        if has_bias:
+            kv_biases = [a.qkv_proj.bias[a.q_size :] for a in layers_attn]
+            self._fused_kv_bias: torch.Tensor | None = torch.cat(kv_biases, dim=0)
+        else:
+            self._fused_kv_bias = None
+
+        # K-norm weights: list of [head_dim] tensors, one per layer.
+        self._k_norm_weights = [a.k_norm.weight.data for a in layers_attn]
+
+        # RoPE parameters
+        self._rope_head_size = attn0.rotary_emb.head_size
+        self._rope_cos_sin_cache = attn0.rotary_emb.cos_sin_cache
+        self._rope_is_neox = attn0.rotary_emb.is_neox_style
+        # Validation that RoPE params are the same across all layers
+        for attn in layers_attn[1:]:
+            assert (
+                attn.rotary_emb.head_size == self._rope_head_size
+                and attn.rotary_emb.is_neox_style == self._rope_is_neox
+            ), "All layers must have the same RoPE parameters for DFlash precomputation"
+
+        # Layer metadata
+        self._num_attn_layers = len(layers_attn)
+        self._kv_size = attn0.kv_size
+        self._head_dim = attn0.head_dim
+        self._num_kv_heads = attn0.num_kv_heads
+        self._rms_norm_eps = attn0.q_norm.variance_epsilon
+        # Validation that all layers have the same attention config
+        for attn in layers_attn[1:]:
+            assert (
+                attn.kv_size == self._kv_size
+                and attn.head_dim == self._head_dim
+                and attn.num_kv_heads == self._num_kv_heads
+                and attn.q_norm.variance_epsilon == self._rms_norm_eps
+            ), "All layers must have the same attn config for DFlash precomputation"
+
+        # References to inner Attention layers for direct cache writes
+        self._attn_layers = [layer.self_attn.attn for layer in self.layers]
+
+    def precompute_and_store_context_kv(
+        self,
+        context_states: torch.Tensor,
+        context_positions: torch.Tensor,
+        context_slot_mapping: torch.Tensor | None = None,
+    ) -> None:
+        """Precompute K/V for context states write them into each layer's KV cache.
+
+        Input context states are projected to K/V, normed, and have RoPE applied.
+        Since the context shape is different than the query shape, we can't rely on the
+        regular forward pass to apply torch.compile and CUDA graphs to this section.
+        As such, this function is optimized to minimize the number of torch ops present:
+        we use fused vLLM kernels for RMSNorm and RoPE, fuse the GEMM into one
+        large projection, and avoid cloning buffers (with .contiguous()) where possible.
+
+        When context_slot_mapping is None (e.g. during dummy_run) only
+        the computation runs, and no K/V is written to cache.
+        """
+        if not hasattr(self, "_num_attn_layers"):
+            logger.warning_once(
+                "DFlash buffer initialization was skipped. If dummy weights are not "
+                "in use, this may indicate an error in weight loading."
+            )
+            self._build_fused_kv_buffers()
+
+        num_ctx = context_states.shape[0]
+        L = self._num_attn_layers
+        kv = self._kv_size
+        hd = self._head_dim
+        nkv = self._num_kv_heads
+
+        # --- Fused KV projection (one GEMM for all layers) ---
+        normed_context_states = torch.empty_like(context_states)
+        ops.rms_norm(
+            normed_context_states,
+            context_states,
+            self._hidden_norm_weight,
+            self._rms_norm_eps,
+        )
+        all_kv_flat = F.linear(
+            normed_context_states, self._fused_kv_weight, self._fused_kv_bias
+        )
+        # Single contiguous copy that separates K/V and transposes to
+        # layer-major layout.  Result: [2, L, num_ctx, nkv, hd] contiguous.
+        # Indexing dim-0 gives contiguous [L, num_ctx, nkv, hd] for K and V.
+        all_kv = (
+            all_kv_flat.view(num_ctx, L, 2, nkv, hd).permute(2, 1, 0, 3, 4).contiguous()
+        )
+        all_k = all_kv[0]  # [L, num_ctx, nkv, hd], contiguous
+        all_v = all_kv[1]  # [L, num_ctx, nkv, hd], contiguous
+
+        # --- Per-layer RMSNorm K (3D: [num_ctx, nkv, hd] per layer) ---
+        all_k_normed = torch.empty_like(all_k)
+        for i in range(L):
+            ops.rms_norm(
+                all_k_normed[i],
+                all_k[i],
+                self._k_norm_weights[i],
+                self._rms_norm_eps,
+            )
+
+        # --- Fused RoPE across all layers ---
+        # View as [L * num_ctx, kv] so RoPE sees one big batch (no copy).
+        # In-place RoPE: pass K as the "query" arg with key=None.
+        all_k_flat = all_k_normed.view(L * num_ctx, kv)
+        positions_repeated = context_positions.repeat(L)
+        cos_sin_cache = self._rope_cos_sin_cache
+        if cos_sin_cache.dtype != all_k_flat.dtype:
+            cos_sin_cache = cos_sin_cache.to(dtype=all_k_flat.dtype)
+        ops.rotary_embedding(
+            positions_repeated,
+            all_k_flat,
+            None,
+            self._rope_head_size,
+            cos_sin_cache,
+            self._rope_is_neox,
+        )
+
+        if context_slot_mapping is None:
+            return
+
+        # --- Per-layer cache insert ---
+        all_k_final = all_k_flat.view(L, num_ctx, nkv, hd)
+        for i in range(L):
+            attn = self._attn_layers[i]
+            kv_cache = attn.kv_cache
+            attn.impl.do_kv_cache_update(
+                attn,
+                all_k_final[i],
+                all_v[i],
+                kv_cache,
+                context_slot_mapping,
+            )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        input_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        if input_embeds is None:
+            input_embeds = self.embed_input_ids(input_ids)
+
+        hidden_states = input_embeds
+
+        residual = None
+        for layer in self.layers:
+            hidden_states, residual = layer(
+                positions=positions,
+                hidden_states=hidden_states,
+                residual=residual,
+            )
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".gate_proj", 0),
+            (".gate_up_proj", ".up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if "midlayer." in name:
+                name = name.replace("midlayer.", "layers.0.")
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = (
+                    loaded_weight if loaded_weight.dim() == 0 else loaded_weight[0]
+                )
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+            if "scale" in name:
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+        return loaded_params
+
+
+class DFlashQwen3ForCausalLM(Qwen3ForCausalLM):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        nn.Module.__init__(self)
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
+        if getattr(self.config, "draft_vocab_size", None) is None:
+            self.config.draft_vocab_size = getattr(self.config, "vocab_size", None)
+        target_layer_num = vllm_config.model_config.get_num_layers(
+            vllm_config.parallel_config
+        )
+        self.config.target_layer_count = target_layer_num
+        self.model = DFlashQwen3Model(
+            vllm_config=vllm_config,
+            prefix=maybe_prefix(prefix, "model"),
+            start_layer_id=target_layer_num,
+        )
+
+        logit_scale = getattr(self.config, "logit_scale", 1.0)
+        self.lm_head = ParallelLMHead(
+            self.config.draft_vocab_size,
+            self.config.hidden_size,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        self.logits_processor = LogitsProcessor(
+            self.config.draft_vocab_size, scale=logit_scale
+        )
+        target_vocab_size = vllm_config.model_config.get_vocab_size()
+        if self.config.draft_vocab_size != target_vocab_size:
+            self.draft_id_to_target_id = nn.Parameter(
+                torch.zeros(self.config.draft_vocab_size, dtype=torch.long),
+                requires_grad=False,
+            )
+        else:
+            self.draft_id_to_target_id = None
+
+    def embed_input_ids(
+        self,
+        input_ids: torch.Tensor,
+        multimodal_embeddings: NestedTensors | None = None,
+        is_multimodal: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        return self.model(input_ids, positions, inputs_embeds)
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        if self.draft_id_to_target_id is None:
+            return logits
+
+        base = torch.arange(self.config.draft_vocab_size, device=logits.device)
+        targets = base + self.draft_id_to_target_id
+        logits_new = logits.new_full(
+            (logits.shape[0], self.config.vocab_size),
+            float("-inf"),
+        )
+        logits_new[:, targets] = logits
+        return logits_new
+
+    def precompute_and_store_context_kv(
+        self,
+        context_states: torch.Tensor,
+        context_positions: torch.Tensor,
+        context_slot_mapping: torch.Tensor | None = None,
+    ) -> None:
+        """Precompute projected + RoPE'd K/V and write to cache."""
+        self.model.precompute_and_store_context_kv(
+            context_states, context_positions, context_slot_mapping
+        )
+
+    def combine_hidden_states(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        if not self.model.use_aux_hidden_state:
+            return hidden_states
+        needs_squeeze = hidden_states.dim() == 1
+        if needs_squeeze:
+            hidden_states = hidden_states.unsqueeze(0)
+        result = self.model.fc(hidden_states)
+        if needs_squeeze:
+            result = result.squeeze(0)
+        return result
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        model_weights = {}
+        includes_draft_id_mapping = False
+        includes_embed_tokens = False
+        for name, loaded_weight in weights:
+            assert "mask_hidden" not in name, (
+                "DFlash should use mask_token_id to embed the padding hidden state"
+            )
+            if "t2d" in name:
+                continue
+            if "d2t" in name:
+                name = name.replace("d2t", "draft_id_to_target_id")
+                includes_draft_id_mapping = True
+            elif "lm_head" not in name:
+                name = "model." + name
+            if "embed_tokens" in name:
+                includes_embed_tokens = True
+            model_weights[name] = loaded_weight
+            process_eagle_weight(self, name)
+
+        skip_substrs = []
+        if not includes_draft_id_mapping:
+            skip_substrs.append("draft_id_to_target_id")
+        if not includes_embed_tokens:
+            skip_substrs.append("embed_tokens")
+        if not self.model.use_aux_hidden_state:
+            skip_substrs.append("fc.")
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=None,
+            skip_substrs=skip_substrs,
+        )
+        loader.load_weights(model_weights.items())
+        self.model._build_fused_kv_buffers()
diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
index f2ce070be8b4..4ec1be3367d8 100644
--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -43,7 +43,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     MergedColumnParallelLinear,
@@ -205,14 +208,13 @@ def __init__(
             self.shared_expert_gate = None
             self.shared_expert = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_expert,
             gate=self.gate,
             num_experts=self.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_topk_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -232,24 +234,25 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         if self.is_sequence_parallel:
             hidden_states = sequence_parallel_chunk(hidden_states)
 
-        # router_logits: (num_tokens, n_experts)
-        router_logits, _ = self.gate(hidden_states)
-        shared_out, fused_out = self.experts(
-            hidden_states=hidden_states, router_logits=router_logits
-        )
-        final_hidden_states = (
-            shared_out + fused_out if shared_out is not None else fused_out
-        )
+        if self.experts.is_internal_router:
+            # In this case, the gate/router runs inside the FusedMoE class
+            final_hidden_states = self.experts(
+                hidden_states=hidden_states, router_logits=hidden_states
+            )
+        else:
+            # Actually this will be dead code, since we always pass gate into
+            # FusedMoE in the current implementation. But we keep this code
+            # here for clarity and future flexibility.
+            router_logits, _ = self.gate(hidden_states)
+            final_hidden_states = self.experts(
+                hidden_states=hidden_states, router_logits=router_logits
+            )
 
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(  # noqa E501
-                final_hidden_states
-            )
 
         # return to 1d if input is 1d
         return final_hidden_states.squeeze(0) if is_input_1d else final_hidden_states
@@ -516,7 +519,7 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
index 75f1b506a01a..28e1846662be 100644
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -8,6 +8,7 @@
 import torch
 from torch import nn
 
+from vllm._aiter_ops import rocm_aiter_ops
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import (
     CacheConfig,
@@ -23,7 +24,10 @@
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import (
     GemmaRMSNorm as Qwen3NextRMSNorm,
 )
@@ -33,7 +37,9 @@
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.layers.mamba.gdn_linear_attn import GatedDeltaNetAttention
+from vllm.model_executor.layers.mamba.gdn.qwen_gdn_linear_attn import (
+    QwenGatedDeltaNetAttention,
+)
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateCopyFunc,
     MambaStateCopyFuncCalculator,
@@ -56,6 +62,7 @@
 from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig
 
 from .interfaces import (
+    EagleModelMixin,
     HasInnerState,
     IsHybrid,
     MixtureOfExperts,
@@ -131,7 +138,12 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = ""):
             prefix=f"{prefix}.shared_expert_gate",
         )
 
-        if config.shared_expert_intermediate_size > 0:
+        if (
+            rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
+            or config.shared_expert_intermediate_size <= 0
+        ):
+            self.shared_expert = None
+        else:
             self.shared_expert = Qwen3NextMLP(
                 hidden_size=config.hidden_size,
                 intermediate_size=config.shared_expert_intermediate_size,
@@ -139,25 +151,27 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = ""):
                 quant_config=quant_config,
                 reduce_results=False,
                 expert_gate=self.shared_expert_gate,
+                is_sequence_parallel=self.is_sequence_parallel,
                 prefix=f"{prefix}.shared_expert",
             )
-        else:
-            self.shared_expert = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_expert,
             gate=self.gate,
             num_experts=self.n_routed_experts,
             top_k=config.num_experts_per_tok,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=getattr(config, "norm_topk_prob", True),
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
             enable_eplb=self.enable_eplb,
             num_redundant_experts=self.n_redundant_experts,
             is_sequence_parallel=self.is_sequence_parallel,
+            n_shared_experts=1 if self.shared_expert is None else None,
+            shared_expert_gate=self.shared_expert_gate
+            if self.shared_expert is None
+            else None,
         )
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -181,18 +195,11 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
                 hidden_states=hidden_states, router_logits=router_logits
             )
 
-        if self.shared_expert is not None:
-            final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
-
         if self.is_sequence_parallel:
             final_hidden_states = tensor_model_parallel_all_gather(
                 final_hidden_states, 0
             )
             final_hidden_states = final_hidden_states[:num_tokens]
-        elif self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(  # noqa E501
-                final_hidden_states
-            )
 
         return final_hidden_states.view(orig_shape)
 
@@ -332,7 +339,7 @@ def __init__(
         self.layer_idx = extract_layer_index(prefix)
 
         if self.layer_type == "linear_attention":
-            self.linear_attn = GatedDeltaNetAttention(
+            self.linear_attn = QwenGatedDeltaNetAttention(
                 config,
                 vllm_config=vllm_config,
                 prefix=f"{prefix}.linear_attn",
@@ -454,7 +461,7 @@ def forward(
 
 
 @support_torch_compile
-class Qwen3NextModel(nn.Module):
+class Qwen3NextModel(nn.Module, EagleModelMixin):
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
 
@@ -492,8 +499,6 @@ def get_layer(prefix: str):
         else:
             self.norm = PPMissingLayer()
 
-        self.aux_hidden_state_layers: tuple[int, ...] = ()
-
     def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
         return self.embed_tokens(input_ids)
 
@@ -515,20 +520,19 @@ def forward(
             hidden_states = intermediate_tensors["hidden_states"]
             residual = intermediate_tensors["residual"]
 
-        aux_hidden_states = []
+        aux_hidden_states = self._maybe_add_hidden_state([], 0, hidden_states, residual)
         for layer_idx, layer in enumerate(
             islice(self.layers, self.start_layer, self.end_layer),
             start=self.start_layer,
         ):
-            if layer_idx in self.aux_hidden_state_layers:
-                aux_hidden_states.append(
-                    hidden_states + residual if residual is not None else hidden_states
-                )
             hidden_states, residual = layer(
                 positions=positions,
                 hidden_states=hidden_states,
                 residual=residual,
             )
+            self._maybe_add_hidden_state(
+                aux_hidden_states, layer_idx + 1, hidden_states, residual
+            )
 
         if not get_pp_group().is_last_rank:
             return IntermediateTensors(
@@ -542,12 +546,15 @@ def forward(
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        return SharedFusedMoE.make_expert_params_mapping(
+        num_experts = getattr(self.config, "num_experts", 0)
+        if rocm_aiter_ops.is_fusion_moe_shared_experts_enabled():
+            num_experts += 1
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
             ckpt_up_proj_name="up_proj",
-            num_experts=getattr(self.config, "num_experts", 0),
+            num_experts=num_experts,
             num_redundant_experts=self.num_redundant_experts,
         )
 
@@ -564,6 +571,10 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
         expert_params_mapping = self.get_expert_mapping()
+
+        is_fse = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
+        num_routed = getattr(self.config, "num_experts", 0)
+
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
@@ -577,6 +588,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                 if name is None:
                     continue
 
+            # FSE: remap shared_expert weights to the fused expert slot
+            if is_fse and "mlp.shared_expert." in name:
+                name = name.replace(
+                    "mlp.shared_expert.",
+                    f"mlp.experts.{num_routed}.",
+                )
+
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
                     continue
diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py
index 751d7c23eb97..4d8ff951c098 100644
--- a/vllm/model_executor/models/qwen3_next_mtp.py
+++ b/vllm/model_executor/models/qwen3_next_mtp.py
@@ -7,11 +7,14 @@
 import torch
 from torch import nn
 
+from vllm._aiter_ops import rocm_aiter_ops
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
 from vllm.distributed.parallel_state import get_pp_group
 from vllm.logger import init_logger
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.linear import ColumnParallelLinear
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -145,13 +148,18 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         # Params for weights, fp8 weight scales, fp8 activation scales
         # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = FusedMoE.make_expert_params_mapping(
+        is_fse = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
+        num_experts = self.config.num_experts
+        if is_fse:
+            num_experts += 1
+        expert_params_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
             ckpt_up_proj_name="up_proj",
-            num_experts=self.config.num_experts,
+            num_experts=num_experts,
         )
+        num_routed = self.config.num_experts
 
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
@@ -159,6 +167,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             if "rotary_emb.inv_freq" in name:
                 continue
 
+            # FSE: remap shared_expert weights to the fused expert slot
+            if is_fse and "mlp.shared_expert." in name:
+                name = name.replace(
+                    "mlp.shared_expert.",
+                    f"mlp.experts.{num_routed}.",
+                )
+
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
                     continue
diff --git a/vllm/model_executor/models/qwen3_omni_moe_thinker.py b/vllm/model_executor/models/qwen3_omni_moe_thinker.py
index d9bc02c650d4..6d5123effa5d 100755
--- a/vllm/model_executor/models/qwen3_omni_moe_thinker.py
+++ b/vllm/model_executor/models/qwen3_omni_moe_thinker.py
@@ -24,7 +24,7 @@
 
 from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
 from functools import partial
-from typing import Any, Literal, cast
+from typing import Any, cast
 
 import numpy as np
 import torch
@@ -46,6 +46,7 @@
 
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.inputs import PromptType
 from vllm.logger import init_logger
@@ -57,6 +58,7 @@
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     QKVParallelLinear,
+    ReplicatedLinear,
     RowParallelLinear,
 )
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -78,6 +80,7 @@
 )
 from vllm.sequence import IntermediateTensors
 from vllm.transformers_utils.processor import cached_processor_from_config
+from vllm.utils.torch_utils import async_tensor_h2d
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 from .interfaces import (
@@ -357,7 +360,13 @@ def __init__(
         conv_out_dim = config.downsample_hidden_size * (
             (((config.num_mel_bins + 1) // 2 + 1) // 2 + 1) // 2
         )
-        self.conv_out = nn.Linear(conv_out_dim, config.d_model, bias=False)
+        self.conv_out = ReplicatedLinear(
+            conv_out_dim,
+            config.d_model,
+            bias=False,
+            return_bias=False,
+            prefix=f"{prefix}.conv_out",
+        )
 
         # Transformer encoder layers
         self.layers = nn.ModuleList(
@@ -372,9 +381,21 @@ def __init__(
 
         # Output layers
         self.ln_post = nn.LayerNorm(config.d_model)
-        self.proj1 = nn.Linear(config.d_model, config.d_model)
+        self.proj1 = ReplicatedLinear(
+            config.d_model,
+            config.d_model,
+            bias=True,
+            return_bias=False,
+            prefix=f"{prefix}.proj1",
+        )
         self.act = _ACTIVATION_REGISTRY[config.activation_function]
-        self.proj2 = nn.Linear(config.d_model, config.output_dim)
+        self.proj2 = ReplicatedLinear(
+            config.d_model,
+            config.output_dim,
+            bias=True,
+            return_bias=False,
+            prefix=f"{prefix}.proj2",
+        )
 
         # Get attention backend
         self.attn_backend = get_vit_attn_backend(
@@ -482,9 +503,9 @@ def forward(
             cu_chunk_lens.extend([window_aftercnn] * num_full_chunks)
             if remainder:
                 cu_chunk_lens.append(remainder)
-        cu_seqlens = torch.tensor(cu_chunk_lens, device=aftercnn_lens.device).cumsum(
-            -1, dtype=torch.int32
-        )
+        cu_seqlens = async_tensor_h2d(
+            cu_chunk_lens, dtype=torch.int32, device=aftercnn_lens.device
+        ).cumsum(-1, dtype=torch.int32)
 
         max_seqlen = self.compute_attn_mask_seqlen(cu_seqlens)
 
@@ -841,6 +862,7 @@ def rot_pos_emb(self, grid_thw):
         # Use pre-computed cos_sin_cache from RotaryEmbedding
         cos, sin = self.rotary_pos_emb.get_cos_sin(max_grid_size)
 
+        pos_ids = pos_ids.to(cos.device, non_blocking=True)
         cos_combined = cos[pos_ids].flatten(1)
         sin_combined = sin[pos_ids].flatten(1)
 
@@ -1627,8 +1649,6 @@ class Qwen3OmniMoeConditionalGenerationMixin(Qwen2_5OmniConditionalGenerationMix
     def _process_audio_input(
         self,
         audio_input: Qwen2_5OmniAudioFeatureInputs,
-        audio_hashes: list[str] | None = None,
-        cached_audio_features: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, ...]:
         input_features = audio_input["input_features"]
         audio_feature_lengths = audio_input["audio_feature_lengths"]
@@ -1735,6 +1755,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                     )
                     for _ in range(self.deepstack_num_level)
                 ]
+                # Tracks the valid token span currently stored in the buffer.
+                # Zero means there is no active deepstack payload to consume.
+                self.deepstack_input_embeds_num_tokens = 0
 
         with self._mark_language_model(vllm_config):
             self.language_model = Qwen3MoeLLMForCausalLM(
@@ -1755,6 +1778,8 @@ def _get_deepstack_input_embeds(
     ) -> IntermediateTensors | None:
         if not getattr(self, "deepstack_input_embeds", None):
             return None  # If vision tower is skipped
+        if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0:
+            return None
 
         # get deepstack_input_embeds from buffer, and clear the buffer
         return IntermediateTensors(
@@ -1786,15 +1811,19 @@ def _set_deepstack_input_embeds(self, deepstack_input_embeds: torch.Tensor) -> N
             self.deepstack_input_embeds[idx][:num_tokens].copy_(
                 deepstack_input_embeds[idx]
             )
+        self.deepstack_input_embeds_num_tokens = num_tokens
 
     def _clear_deepstack_input_embeds(self, num_tokens: int) -> None:
         if not getattr(self, "deepstack_input_embeds", None):
             return
+        if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0:
+            return
 
         # clear deepstack_input_embeds in buffer
         if num_tokens > 0:
             for idx in range(self.deepstack_num_level):
                 self.deepstack_input_embeds[idx][:num_tokens].zero_()
+            self.deepstack_input_embeds_num_tokens = 0
 
     def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict:
         mm_input_by_modality = {}
@@ -1869,8 +1898,9 @@ def embed_input_ids(
         # both the deepstack path and the final embedding merge.
         video_token_id = self.config.video_token_id
         audio_token_id = self.config.audio_token_id
-        is_video = is_multimodal & (input_ids == video_token_id)
-        is_audio = is_multimodal & (input_ids == audio_token_id)
+        input_ids_cpu = input_ids.cpu()
+        is_video = is_multimodal & (input_ids_cpu == video_token_id)
+        is_audio = is_multimodal & (input_ids_cpu == audio_token_id)
         num_video = is_video.sum().item()
         num_audio = is_audio.sum().item()
 
@@ -2183,19 +2213,17 @@ def get_speech_to_text_config(
         )
 
     @classmethod
-    def get_generation_prompt(
-        cls,
-        audio: np.ndarray,
-        stt_config: SpeechToTextConfig,
-        model_config: ModelConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
-    ) -> PromptType:
+    def get_generation_prompt(cls, stt_params: SpeechToTextParams) -> PromptType:
         """
         Construct a transcription/translation prompt for Qwen3-Omni.
         """
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        model_config = stt_params.model_config
+        language = stt_params.language
+        task_type = stt_params.task_type
+        to_language = stt_params.to_language
+        request_prompt = stt_params.request_prompt
         # Transcribe this audio [into <language>] | for transcription
         # Translate this audio [from <language> into <to_language>] | for translation
         instruction = "Transcribe" if task_type == "transcribe" else "Translate"
diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py
index 418b75e382f3..d7765351e6dd 100644
--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@@ -96,9 +96,12 @@
 from vllm.sequence import IntermediateTensors
 from vllm.tokenizers.protocol import TokenizerLike
 from vllm.tokenizers.registry import cached_tokenizer_from_config
+from vllm.triton_utils import HAS_TRITON, tl, triton
 from vllm.utils.collection_utils import is_list_of
 from vllm.utils.math_utils import round_up
+from vllm.v1.worker.encoder_cudagraph_defs import EncoderCudaGraphReplayBuffers
 
+from ...utils.torch_utils import async_tensor_h2d
 from .interfaces import (
     MultiModalEmbeddings,
     SupportsEagle,
@@ -134,6 +137,7 @@
     maybe_prefix,
 )
 from .vision import (
+    get_fp8_padded_hidden_size,
     get_vit_attn_backend,
     is_vit_use_data_parallel,
     run_dp_sharded_mrope_vision_model,
@@ -145,6 +149,201 @@
 # of the maximum size.
 DUMMY_VIDEO_NUM_FRAMES = 2048
 
+# ---------------------------------------------------------------------------
+# Triton kernel: fused bilinear position-embedding interpolation
+# ---------------------------------------------------------------------------
+# Replaces many small eager-mode CUDA kernels with a single launch.
+# The spatial-merge reorder is baked into the index math so the output
+# is ready to be added to the patch embeddings directly.
+# ---------------------------------------------------------------------------
+
+if HAS_TRITON:
+
+    @triton.jit
+    def _bilinear_pos_embed_kernel(
+        embed_ptr,
+        output_ptr,
+        H,
+        W,
+        h_scale,
+        w_scale,
+        NUM_GRID: tl.constexpr,
+        M_SIZE: tl.constexpr,
+        HIDDEN_DIM: tl.constexpr,
+        BLOCK_D: tl.constexpr,
+    ):
+        """Fused bilinear pos-embed interpolation with spatial-merge reorder."""
+        pid = tl.program_id(0)
+        total_spatial = H * W
+        spatial_idx = pid % total_spatial
+
+        num_blocks_w = W // M_SIZE
+        block_idx = spatial_idx // (M_SIZE * M_SIZE)
+        local_idx = spatial_idx % (M_SIZE * M_SIZE)
+        br = block_idx // num_blocks_w
+        bc = block_idx % num_blocks_w
+        lr = local_idx // M_SIZE
+        lc = local_idx % M_SIZE
+        row = br * M_SIZE + lr
+        col = bc * M_SIZE + lc
+
+        h_frac = row.to(tl.float32) * h_scale
+        w_frac = col.to(tl.float32) * w_scale
+
+        hf = tl.math.floor(h_frac).to(tl.int32)
+        wf = tl.math.floor(w_frac).to(tl.int32)
+        hc = tl.minimum(hf + 1, NUM_GRID - 1)
+        wc = tl.minimum(wf + 1, NUM_GRID - 1)
+
+        dh = h_frac - hf.to(tl.float32)
+        dw = w_frac - wf.to(tl.float32)
+        w11 = dh * dw
+        w10 = dh - w11
+        w01 = dw - w11
+        w00 = 1.0 - dh - w01
+
+        off00 = (hf * NUM_GRID + wf) * HIDDEN_DIM
+        off01 = (hf * NUM_GRID + wc) * HIDDEN_DIM
+        off10 = (hc * NUM_GRID + wf) * HIDDEN_DIM
+        off11 = (hc * NUM_GRID + wc) * HIDDEN_DIM
+        out_off = pid * HIDDEN_DIM
+
+        # Cast weights to output dtype so the multiply-accumulate stays
+        # in the same precision as the native PyTorch implementation.
+        out_dtype = output_ptr.dtype.element_ty
+        w00_c = w00.to(out_dtype)
+        w01_c = w01.to(out_dtype)
+        w10_c = w10.to(out_dtype)
+        w11_c = w11.to(out_dtype)
+
+        for d in tl.range(0, HIDDEN_DIM, BLOCK_D):
+            cols = d + tl.arange(0, BLOCK_D)
+            mask = cols < HIDDEN_DIM
+
+            e00 = tl.load(embed_ptr + off00 + cols, mask=mask)
+            e01 = tl.load(embed_ptr + off01 + cols, mask=mask)
+            e10 = tl.load(embed_ptr + off10 + cols, mask=mask)
+            e11 = tl.load(embed_ptr + off11 + cols, mask=mask)
+
+            val = w00_c * e00 + w01_c * e01 + w10_c * e10 + w11_c * e11
+
+            tl.store(output_ptr + out_off + cols, val, mask=mask)
+
+    def triton_pos_embed_interpolate(
+        embed_weight: torch.Tensor,
+        t: int,
+        h: int,
+        w: int,
+        num_grid_per_side: int,
+        m_size: int,
+        dtype: torch.dtype,
+    ) -> torch.Tensor:
+        """Launch the fused Triton kernel for one (t,h,w) grid.
+
+        Returns a tensor of shape ``(t * h * w, hidden_dim)`` with the
+        bilinearly-interpolated position embeddings in spatial-merge order.
+        """
+        assert h % m_size == 0 and w % m_size == 0, (
+            f"h={h} and w={w} must be divisible by m_size={m_size}"
+        )
+        hidden_dim = embed_weight.shape[1]
+        total_out = t * h * w
+        output = torch.empty(
+            total_out,
+            hidden_dim,
+            device=embed_weight.device,
+            dtype=dtype,
+        )
+
+        h_scale = float(num_grid_per_side - 1) / float(h - 1) if h > 1 else 0.0
+        w_scale = float(num_grid_per_side - 1) / float(w - 1) if w > 1 else 0.0
+
+        BLOCK_D = triton.next_power_of_2(hidden_dim)
+
+        _bilinear_pos_embed_kernel[(total_out,)](
+            embed_weight,
+            output,
+            h,
+            w,
+            h_scale,
+            w_scale,
+            num_grid_per_side,
+            m_size,
+            hidden_dim,
+            BLOCK_D,
+        )
+        return output
+
+
+def pos_embed_interpolate_native(
+    embed_weight: torch.Tensor,
+    t: int,
+    h: int,
+    w: int,
+    num_grid_per_side: int,
+    m_size: int,
+    dtype: torch.dtype,
+) -> torch.Tensor:
+    """Eager PyTorch bilinear position-embedding interpolation.
+
+    Returns a tensor of shape ``(t * h * w, hidden_dim)`` with the
+    bilinearly-interpolated position embeddings in spatial-merge order.
+    """
+    assert h % m_size == 0 and w % m_size == 0, (
+        f"h={h} and w={w} must be divisible by m_size={m_size}"
+    )
+    hidden_dim = embed_weight.shape[1]
+    device = embed_weight.device
+
+    h_idxs = torch.linspace(
+        0,
+        num_grid_per_side - 1,
+        h,
+        dtype=torch.float32,
+        device=device,
+    )
+    w_idxs = torch.linspace(
+        0,
+        num_grid_per_side - 1,
+        w,
+        dtype=torch.float32,
+        device=device,
+    )
+
+    h_floor = h_idxs.to(torch.long)
+    w_floor = w_idxs.to(torch.long)
+    h_ceil = torch.clamp(h_floor + 1, max=num_grid_per_side - 1)
+    w_ceil = torch.clamp(w_floor + 1, max=num_grid_per_side - 1)
+
+    dh = h_idxs - h_floor
+    dw = w_idxs - w_floor
+
+    dh_grid, dw_grid = torch.meshgrid(dh, dw, indexing="ij")
+    h_floor_grid, w_floor_grid = torch.meshgrid(h_floor, w_floor, indexing="ij")
+    h_ceil_grid, w_ceil_grid = torch.meshgrid(h_ceil, w_ceil, indexing="ij")
+
+    w11 = dh_grid * dw_grid
+    w10 = dh_grid - w11
+    w01 = dw_grid - w11
+    w00 = 1 - dh_grid - w01
+
+    h_grid = torch.stack([h_floor_grid, h_floor_grid, h_ceil_grid, h_ceil_grid])
+    w_grid = torch.stack([w_floor_grid, w_ceil_grid, w_floor_grid, w_ceil_grid])
+    h_grid_idx = h_grid * num_grid_per_side
+
+    indices = (h_grid_idx + w_grid).reshape(4, -1)
+    weights = torch.stack([w00, w01, w10, w11], dim=0).reshape(4, -1, 1)
+    weights = weights.to(dtype=dtype)
+
+    embeds = embed_weight[indices]
+    embeds *= weights
+    combined = embeds.sum(dim=0)
+
+    combined = combined.reshape(h // m_size, m_size, w // m_size, m_size, hidden_dim)
+    combined = combined.permute(0, 2, 1, 3, 4).reshape(1, -1, hidden_dim)
+    repeated = combined.expand(t, -1, -1).reshape(-1, hidden_dim)
+    return repeated.to(dtype=dtype)
+
 
 class Qwen3_VisionPatchEmbed(nn.Module):
     def __init__(
@@ -365,6 +564,13 @@ def __init__(
 
         norm_layer = partial(nn.LayerNorm, eps=norm_eps)
         head_dim = self.hidden_size // self.num_heads
+
+        # FP8 attention: Q/K/V become independent contiguous tensors
+        # after quantization, so cu_seqlens uses uniform stride (no 3x V).
+        self.fp8_padded_hidden_size = get_fp8_padded_hidden_size(
+            self.num_heads, head_dim
+        )
+
         self.rotary_pos_emb = get_rope(
             head_size=head_dim,
             max_position=8192,
@@ -470,63 +676,22 @@ def rot_pos_emb(self, grid_thw: list[list[int]]):
         return cos_combined, sin_combined
 
     def fast_pos_embed_interpolate(self, grid_thw: list[list[int]]) -> torch.Tensor:
-        num_grid_per_side = self.num_grid_per_side
-        m_size = self.spatial_merge_size
-        hidden_dim = self.pos_embed.embedding_dim
-
+        interpolate_fn = (
+            triton_pos_embed_interpolate if HAS_TRITON else pos_embed_interpolate_native
+        )
         outputs = []
         for t, h, w in grid_thw:
-            h_idxs = torch.linspace(
-                0, num_grid_per_side - 1, h, dtype=torch.float32, device=self.device
-            )
-            w_idxs = torch.linspace(
-                0, num_grid_per_side - 1, w, dtype=torch.float32, device=self.device
-            )
-
-            h_floor = h_idxs.to(torch.long)
-            w_floor = w_idxs.to(torch.long)
-            h_ceil = torch.clamp(h_floor + 1, max=num_grid_per_side - 1)
-            w_ceil = torch.clamp(w_floor + 1, max=num_grid_per_side - 1)
-
-            dh = h_idxs - h_floor
-            dw = w_idxs - w_floor
-
-            # Create meshgrid view for all h, w vars
-            dh_grid, dw_grid = torch.meshgrid(dh, dw, indexing="ij")
-            h_floor_grid, w_floor_grid = torch.meshgrid(h_floor, w_floor, indexing="ij")
-            h_ceil_grid, w_ceil_grid = torch.meshgrid(h_ceil, w_ceil, indexing="ij")
-
-            # original computation of weights
-            # w00 = (1 - dh_grid) * (1 - dw_grid)
-            # w01 = (1 - dh_grid) * dw_grid
-            # w10 = dh_grid * (1 - dw_grid)
-            # w11 = dh_grid * dw_grid
-            # we reuse w11 here to avoid duplicate
-            # dh_grid * dw_grid computation
-            w11 = dh_grid * dw_grid
-            w10 = dh_grid - w11
-            w01 = dw_grid - w11
-            w00 = 1 - dh_grid - w01
-
-            h_grid = torch.stack([h_floor_grid, h_floor_grid, h_ceil_grid, h_ceil_grid])
-            w_grid = torch.stack([w_floor_grid, w_ceil_grid, w_floor_grid, w_ceil_grid])
-            h_grid_idx = h_grid * num_grid_per_side
-
-            indices = (h_grid_idx + w_grid).reshape(4, -1)
-            weights = torch.stack([w00, w01, w10, w11], dim=0).reshape(4, -1, 1)
-            weights = weights.to(dtype=self.dtype)
-
-            embeds = self.pos_embed(indices)
-            embeds *= weights
-            combined = embeds.sum(dim=0)
-
-            combined = combined.reshape(
-                h // m_size, m_size, w // m_size, m_size, hidden_dim
+            outputs.append(
+                interpolate_fn(
+                    self.pos_embed.weight,
+                    t,
+                    h,
+                    w,
+                    self.num_grid_per_side,
+                    self.spatial_merge_size,
+                    self.dtype,
+                )
             )
-            combined = combined.permute(0, 2, 1, 3, 4).reshape(1, -1, hidden_dim)
-            repeated = combined.expand(t, -1, -1).reshape(-1, hidden_dim)
-            outputs.append(repeated)
-
         return torch.cat(outputs, dim=0)
 
     def prepare_encoder_metadata(
@@ -534,6 +699,7 @@ def prepare_encoder_metadata(
         grid_thw_list: list[list[int]],
         *,
         max_batch_size: int | None = None,
+        max_frames_per_batch: int | None = None,
         max_seqlen_override: int | None = None,
         device: torch.device | None = None,
     ) -> dict[str, torch.Tensor | None]:
@@ -546,6 +712,10 @@ def prepare_encoder_metadata(
             grid_thw_list: Grid configurations as list of [t, h, w].
             max_batch_size: If set, pad cu_seqlens to this size
                 (needed for CUDA graph capture/replay).
+            max_frames_per_batch: If set, overrides max_batch_size for
+                cu_seqlens padding. For video inputs each item contributes
+                T attention sequences (frames); this sizes the buffer to
+                the total frame budget so video replays never overflow.
             max_seqlen_override: If set, use this value for max_seqlen
                 instead of computing from cu_seqlens (needed for CUDA
                 graph capture to cover worst-case replay scenarios).
@@ -570,15 +740,21 @@ def prepare_encoder_metadata(
         )
         cu_seqlens = np.concatenate([np.zeros(1, dtype=np.int32), cu_seqlens])
 
-        # Pad cu_seqlens if max_batch_size specified
-        if max_batch_size is not None:
+        # Pad cu_seqlens to the required number of sequences.
+        # For videos each item contributes T frames = T attention sequences,
+        # so the total can exceed max_batch_size. max_frames_per_batch
+        # overrides the pad target when set.
+        pad_to = (
+            max_frames_per_batch if max_frames_per_batch is not None else max_batch_size
+        )
+        if pad_to is not None:
             num_seqs = len(cu_seqlens) - 1
-            if num_seqs < max_batch_size:
+            if num_seqs < pad_to:
                 cu_seqlens = np.concatenate(
                     [
                         cu_seqlens,
                         np.full(
-                            max_batch_size - num_seqs,
+                            pad_to - num_seqs,
                             cu_seqlens[-1],
                             dtype=np.int32,
                         ),
@@ -609,6 +785,7 @@ def prepare_encoder_metadata(
             self.hidden_size,
             self.tp_size,
             device,
+            fp8_padded_hidden_size=self.fp8_padded_hidden_size,
         )
 
         return metadata
@@ -1473,6 +1650,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
         multimodal_config = vllm_config.model_config.multimodal_config
 
         self.config = config
+        self.model_config = vllm_config.model_config
         self._tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
         self.multimodal_config = multimodal_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
@@ -1507,6 +1685,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
                     )
                     for _ in range(self.deepstack_num_level)
                 ]
+                # Tracks the valid token span currently stored in the buffer.
+                # Zero means there is no active deepstack payload to consume.
+                self.deepstack_input_embeds_num_tokens = 0
 
         with self._mark_language_model(vllm_config):
             self.language_model = Qwen3LLMForCausalLM(
@@ -1517,7 +1698,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
         if not get_pp_group().is_first_rank and hasattr(
             config.vision_config, "deepstack_visual_indexes"
         ):
-            assert self.language_model.start_layer >= len(
+            assert self.language_model.model.start_layer >= len(
                 config.vision_config.deepstack_visual_indexes
             ), (
                 "start_layer should be greater than or equal to "
@@ -1534,6 +1715,8 @@ def _get_deepstack_input_embeds(
     ) -> IntermediateTensors | None:
         if not getattr(self, "deepstack_input_embeds", None):
             return None  # If vision tower is skipped
+        if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0:
+            return None
 
         # get deepstack_input_embeds from buffer, and clear the buffer
         return IntermediateTensors(
@@ -1565,26 +1748,43 @@ def _set_deepstack_input_embeds(self, deepstack_input_embeds: torch.Tensor) -> N
             self.deepstack_input_embeds[idx][:num_tokens].copy_(
                 deepstack_input_embeds[idx]
             )
+        self.deepstack_input_embeds_num_tokens = num_tokens
 
     def _clear_deepstack_input_embeds(self, num_tokens: int) -> None:
         if not getattr(self, "deepstack_input_embeds", None):
             return
+        if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0:
+            return
 
         # clear deepstack_input_embeds in buffer
         if num_tokens > 0:
             for idx in range(self.deepstack_num_level):
                 self.deepstack_input_embeds[idx][:num_tokens].zero_()
+            self.deepstack_input_embeds_num_tokens = 0
 
     # -- SupportsEncoderCudaGraph protocol methods --
 
     def get_encoder_cudagraph_config(self):
-        from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
+        from vllm.v1.worker.encoder_cudagraph_defs import (
             EncoderCudaGraphConfig,
         )
 
+        # When EVS pruning is enabled, embed_multimodal post-processes both
+        # image and video embeddings (mrope positions are appended for image,
+        # prune+append for video). The encoder CUDA graph path bypasses that
+        # post-process, producing inconsistent embedding formats vs eager. So
+        # disable CUDA graph for all modalities when pruning is on.
+        modalities = [] if self.is_multimodal_pruning_enabled else ["image", "video"]
+
+        # Compute max_frames_per_video for budget sizing.
+        max_frames = self.get_max_frames_per_video() if "video" in modalities else 1
+
         return EncoderCudaGraphConfig(
-            modalities=["image"],
-            input_key="pixel_values",
+            modalities=modalities,
+            input_key_by_modality={
+                "image": "pixel_values",
+                "video": "pixel_values_videos",
+            },
             buffer_keys=[
                 "pos_embeds",
                 "rotary_pos_emb_cos",
@@ -1594,51 +1794,100 @@ def get_encoder_cudagraph_config(self):
                 "sequence_lengths",
             ],
             out_hidden_size=self.visual.out_hidden_size,
+            max_frames_per_video=max_frames,
         )
 
+    def get_input_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> str:
+        if "image_grid_thw" in mm_kwargs:
+            return "image"
+        elif "video_grid_thw" in mm_kwargs:
+            return "video"
+        raise AssertionError("This line should be unreachable.")
+
+    def get_max_frames_per_video(self) -> int:
+        mm_registry = MULTIMODAL_REGISTRY
+        info = mm_registry.get_processing_info(self.model_config)
+        max_frames_per_video = info.get_num_frames_with_most_features(
+            seq_len=self.model_config.max_model_len,
+            mm_counts={"video": self.multimodal_config.get_limit_per_prompt("video")},
+        )
+        return max_frames_per_video
+
     def get_encoder_cudagraph_budget_range(
         self,
         vllm_config,
     ) -> tuple[int, int]:
         # Min: estimated smallest possible encoder input.
-        # 224x224 image → 16x16 patches, spatial_merge_size=2 → 8x8 = 64 tokens
+        # 224x224 image → 16x16 patches (patch_size=14)
+        #                 spatial_merge_size=2 → 8x8 = 64 tokens
         min_budget = 64
         # Max: capped by max_num_batched_tokens
-        max_budget = vllm_config.scheduler_config.max_num_batched_tokens
+        # TODO(shen-shanshan): the max_budget auto-infer needs to be optimized later.
+        max_budget = min(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            self.model_config.max_model_len,
+        )
         return (min_budget, max_budget)
 
-    def get_encoder_cudagraph_num_items(
+    def _get_pixel_values_by_modality(
         self,
         mm_kwargs: dict[str, Any],
-    ) -> int:
-        return len(mm_kwargs["image_grid_thw"])
-
-    def get_encoder_cudagraph_per_item_output_tokens(
+    ) -> torch.Tensor:
+        modality = self.get_input_modality(mm_kwargs)
+        if modality == "image":
+            return mm_kwargs["pixel_values"]
+        elif modality == "video":
+            return mm_kwargs["pixel_values_videos"]
+        raise AssertionError("This line should be unreachable.")
+
+    def _get_grid_thw_by_modality(
         self,
         mm_kwargs: dict[str, Any],
-    ) -> list[int]:
-        m = self.visual.spatial_merge_size
-        return [t * (h // m) * (w // m) for t, h, w in mm_kwargs["image_grid_thw"]]
-
-    def get_encoder_cudagraph_per_item_input_sizes(
+    ) -> list[tuple[int, int, int]]:
+        grid_thw_key = f"{self.get_input_modality(mm_kwargs)}_grid_thw"
+        grid_thw = mm_kwargs[grid_thw_key]
+        if not isinstance(grid_thw, list):
+            grid_thw = grid_thw.tolist()
+        return grid_thw
+
+    def get_encoder_cudagraph_item_specs(
         self,
         mm_kwargs: dict[str, Any],
-    ) -> list[int]:
-        return [t * h * w for t, h, w in mm_kwargs["image_grid_thw"]]
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import EncoderItemSpec
+
+        m = self.visual.spatial_merge_size
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        return [
+            EncoderItemSpec(
+                input_size=t * h * w,
+                output_tokens=t * (h // m) * (w // m),
+            )
+            for t, h, w in grid_thw
+        ]
 
     def select_encoder_cudagraph_items(
         self,
         mm_kwargs: dict[str, Any],
         indices: list[int],
     ) -> dict[str, Any]:
-        grid_thw = mm_kwargs["image_grid_thw"]
-        pixel_values = mm_kwargs["pixel_values"]
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
 
         if len(indices) == 0:
-            return {
-                "pixel_values": pixel_values[:0],
-                "image_grid_thw": [],
-            }
+            if self.get_input_modality(mm_kwargs) == "image":
+                return {
+                    "pixel_values": pixel_values[:0],
+                    "image_grid_thw": [],
+                }
+            else:
+                return {
+                    "pixel_values_videos": pixel_values[:0],
+                    "video_grid_thw": [],
+                }
 
         # Compute cumulative patch offsets for slicing pixel_values
         patches_per_item = [t * h * w for t, h, w in grid_thw]
@@ -1651,31 +1900,62 @@ def select_encoder_cudagraph_items(
         )
         selected_grid = [grid_thw[i] for i in indices]
 
-        return {
-            "pixel_values": selected_pv,
-            "image_grid_thw": selected_grid,
-        }
+        if self.get_input_modality(mm_kwargs) == "image":
+            return {
+                "pixel_values": selected_pv,
+                "image_grid_thw": selected_grid,
+            }
+        else:
+            return {
+                "pixel_values_videos": selected_pv,
+                "video_grid_thw": selected_grid,
+            }
 
     def prepare_encoder_cudagraph_capture_inputs(
         self,
         token_budget: int,
         max_batch_size: int,
+        max_frames_per_batch: int,
         device: torch.device,
         dtype: torch.dtype,
     ):
-        from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
+        from vllm.v1.worker.encoder_cudagraph_defs import (
             EncoderCudaGraphCaptureInputs,
         )
 
         spatial_merge_size = self.visual.spatial_merge_size
-        per_image_output = token_budget // max_batch_size
-
-        # Synthetic rectangular grid: [1, merge, per_image_output * merge]
-        # produces exactly per_image_output tokens per image.
-        grid_config = [
-            [1, spatial_merge_size, per_image_output * spatial_merge_size]
-            for _ in range(max_batch_size)
-        ]
+        # Ceil so the buffer fits the worst case of one item using the full
+        # budget. Floor under-allocates when budget is not a multiple of
+        # max_batch_size.
+        per_mm_item_output = (token_budget + max_batch_size - 1) // max_batch_size
+
+        frames_per_item = max_frames_per_batch // max_batch_size
+        if frames_per_item > 1:
+            # Build the capture grid using a video-format layout so that
+            # cu_seqlens is sized for video replays from the start.
+            # cu_seqlens has one entry per attention sequence (one per frame),
+            # so using T > 1 per item makes the buffer large enough without
+            # relying solely on padding.
+            # Ceiling ensures frames_per_item * tokens_per_frame >= per_mm_item_output
+            # so the pixel_values buffer covers any valid single-item replay.
+            tokens_per_frame = (
+                per_mm_item_output + frames_per_item - 1
+            ) // frames_per_item
+            # Video-format grid_config (T=frames_per_item).
+            grid_config = [
+                [
+                    frames_per_item,
+                    spatial_merge_size,
+                    tokens_per_frame * spatial_merge_size,
+                ]
+                for _ in range(max_batch_size)
+            ]
+        else:
+            # Image-format grid_config (T=1).
+            grid_config = [
+                [1, spatial_merge_size, per_mm_item_output * spatial_merge_size]
+                for _ in range(max_batch_size)
+            ]
 
         # Create dummy pixel_values
         patch_embed = self.visual.patch_embed
@@ -1693,15 +1973,18 @@ def prepare_encoder_cudagraph_capture_inputs(
         # Override max_seqlen with a safe upper bound for capture.
         # max_seqlen.item() gets baked into the CUDA graph (not replayed),
         # so the capture value must cover any replay scenario.
-        # Worst case: 1 image consuming the full budget ->
+        # Worst case: 1 item consuming the full budget ->
         # seq_len = token_budget * spatial_merge_size^2.
         buffers = self.visual.prepare_encoder_metadata(
             grid_config,
             max_batch_size=max_batch_size,
+            max_frames_per_batch=max_frames_per_batch,
             max_seqlen_override=token_budget * (spatial_merge_size**2),
             device=device,
         )
 
+        # Just use image-modality dummy input_buffer for capturing, since it's also
+        # compatible for video inputs (has the same shape: [num_patches, C*T*P*P]).
         mm_kwargs = {
             "pixel_values": dummy_pixel_values,
             "image_grid_thw": grid_config,
@@ -1716,17 +1999,23 @@ def prepare_encoder_cudagraph_replay_buffers(
         self,
         mm_kwargs: dict[str, Any],
         max_batch_size: int,
+        max_frames_per_batch: int,
     ):
-        from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
-            EncoderCudaGraphReplayBuffers,
-        )
+        modality = self.get_input_modality(mm_kwargs)
+        grid_thw_list = self._get_grid_thw_by_modality(mm_kwargs)
 
-        grid_thw_list = mm_kwargs["image_grid_thw"]
-
-        buffers = self.visual.prepare_encoder_metadata(
-            grid_thw_list,
-            max_batch_size=max_batch_size,
-        )
+        if modality == "image":
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_batch_size=max_batch_size,
+            )
+        elif modality == "video":
+            buffers = self.visual.prepare_encoder_metadata(
+                grid_thw_list,
+                max_frames_per_batch=max_frames_per_batch,
+            )
+        else:
+            raise AssertionError("This line should be unreachable.")
 
         return EncoderCudaGraphReplayBuffers(buffers=buffers)
 
@@ -1735,16 +2024,16 @@ def encoder_cudagraph_forward(
         mm_kwargs: dict[str, Any],
         buffers: dict[str, torch.Tensor],
     ) -> torch.Tensor:
-        pixel_values = mm_kwargs["pixel_values"]
-        grid_thw = mm_kwargs["image_grid_thw"]
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
         return self.visual(pixel_values, grid_thw, encoder_metadata=buffers)
 
     def encoder_eager_forward(
         self,
         mm_kwargs: dict[str, Any],
     ) -> torch.Tensor:
-        pixel_values = mm_kwargs["pixel_values"]
-        grid_thw = mm_kwargs["image_grid_thw"]
+        pixel_values = self._get_pixel_values_by_modality(mm_kwargs)
+        grid_thw = self._get_grid_thw_by_modality(mm_kwargs)
         return self.visual(pixel_values, grid_thw)
 
     def _parse_and_validate_image_input(
@@ -1873,7 +2162,9 @@ def _postprocess_image_embeds_evs(
             grid_thw_list = grid_thw.tolist()
             image_embeds_out = []
             for emb, size in zip(image_embeds_split, grid_thw_list):
-                positions = compute_mrope_for_media(size, merge_size).to(emb.device)
+                positions = compute_mrope_for_media(size, merge_size).to(
+                    emb.device, non_blocking=True
+                )
                 positions = torch.cat(
                     [
                         positions,
@@ -1977,7 +2268,6 @@ def _create_final_video_embeddings(
         These embeddings will replace the placeholder embeddings to create
         input_embeds for the LLM.
         """
-        device = video_embeddings.device
 
         # Generate video replacement token IDs using get_video_repl
         # This tokenizes each frame separator independently, then uses pre-tokenized
@@ -1993,8 +2283,10 @@ def _create_final_video_embeddings(
             select_token_id=self.is_multimodal_pruning_enabled,
         )
 
-        repl_token_ids = torch.tensor(video_repl.full, device=device)
-        embed_token_id = _cached_tensor(self.config.video_token_id, device=device)
+        repl_token_ids = torch.tensor(video_repl.full)
+        embed_token_id = _cached_tensor(
+            self.config.video_token_id, repl_token_ids.device
+        )
         is_video_embed = torch.isin(repl_token_ids, embed_token_id)
 
         # Get text embeddings for indicator tokens (has only `visual_dim``).
@@ -2103,7 +2395,7 @@ def _get_expanded_positions(
                 input_tokens=unpruned_token_ids,
                 mm_features=[mm_feature],
             )[0]
-            .to(device)
+            .to(device, non_blocking=True)
             .permute(1, 0)
         )
         full_is_video_embed = unpruned_token_ids_tensor == embed_token_id
@@ -2351,7 +2643,7 @@ def _recompute_mrope_positions(
         )
 
         # Tensors
-        input_ids_t = torch.as_tensor(input_ids, device=device, dtype=torch.long)
+        input_ids_t = async_tensor_h2d(input_ids, device=device, dtype=torch.long)
 
         mm_embeddings_out = []
         mm_embeddings_pos = []
diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py
index a9c01ccf5959..195b3355e3e3 100644
--- a/vllm/model_executor/models/qwen3_vl_moe.py
+++ b/vllm/model_executor/models/qwen3_vl_moe.py
@@ -152,8 +152,8 @@ def load_fused_expert_weights(
                 param,
                 curr_expert_weight,
                 name,
-                shard_id,
-                expert_id,
+                shard_id=shard_id,
+                expert_id=expert_id,
                 return_success=True,
             )
             if success:
@@ -183,9 +183,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         loaded_params: set[str] = set()
         expert_params_mapping = self.get_expert_mapping()
         is_fused_expert = False
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
         fused_expert_params_mapping = [
-            ("experts.w13_weight", "experts.gate_up_proj", 0, "w1"),
-            ("experts.w2_weight", "experts.down_proj", 0, "w2"),
+            (f"experts.{base_layer}w13_weight", "experts.gate_up_proj", 0, "w1"),
+            (f"experts.{base_layer}w2_weight", "experts.down_proj", 0, "w2"),
         ]
         num_experts = self.config.num_experts
         for name, loaded_weight in weights:
@@ -451,7 +454,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         if not get_pp_group().is_first_rank and hasattr(
             config.vision_config, "deepstack_visual_indexes"
         ):
-            assert self.language_model.start_layer >= len(
+            assert self.language_model.model.start_layer >= len(
                 config.vision_config.deepstack_visual_indexes
             ), (
                 "start_layer should be greater than or equal to "
diff --git a/vllm/model_executor/models/radio.py b/vllm/model_executor/models/radio.py
index 9d1a070ca7d2..7ec320c5348d 100644
--- a/vllm/model_executor/models/radio.py
+++ b/vllm/model_executor/models/radio.py
@@ -176,7 +176,6 @@ def __init__(
                 temporal_patch_size=temporal_patch_size,
                 **factory,
             )
-            self._video_embedder_loaded = False
 
         if abs_pos:
             scale = embed_dim**-0.5
@@ -225,12 +224,7 @@ def forward_video(self, x: torch.Tensor) -> torch.Tensor:
         Returns:
             Embedded patches with temporal compression applied.
         """
-        if not self._video_embedder_loaded:
-            raise ValueError(
-                "Temporal compression (video_temporal_patch_size > 1) requires "
-                "video_embedder weights, but they were never loaded. "
-                "Ensure the checkpoint was trained with temporal compression."
-            )
+        assert self.temporal_patch_size > 1
         T = self.temporal_patch_size
         input_size = x.shape[2:]
 
@@ -794,9 +788,6 @@ def load_weights(self, weights) -> set[str]:
                 weight_loader(param, weight)
                 loaded_params.add(vllm_key)
 
-        if "model.patch_generator.video_embedder.weight" in loaded_params:
-            self.model.patch_generator._video_embedder_loaded = True
-
         return loaded_params
 
     def _extract_final(
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 2c72c5d685e1..559328d4a882 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -6,6 +6,7 @@
 """
 
 import importlib
+import importlib.util
 import json
 import os
 import pickle
@@ -89,6 +90,7 @@
     "ChatGLMForConditionalGeneration": ("chatglm", "ChatGLMForCausalLM"),
     "CohereForCausalLM": ("commandr", "CohereForCausalLM"),
     "Cohere2ForCausalLM": ("commandr", "CohereForCausalLM"),
+    "Cohere2MoeForCausalLM": ("cohere2_moe", "Cohere2MoeForCausalLM"),
     "CwmForCausalLM": ("llama", "LlamaForCausalLM"),
     "DbrxForCausalLM": ("dbrx", "DbrxForCausalLM"),
     "DeciLMForCausalLM": ("nemotron_nas", "DeciLMForCausalLM"),
@@ -96,6 +98,7 @@
     "DeepseekV2ForCausalLM": ("deepseek_v2", "DeepseekV2ForCausalLM"),
     "DeepseekV3ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),
     "DeepseekV32ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),
+    "DeepseekV4ForCausalLM": ("vllm.models.deepseek_v4", "DeepseekV4ForCausalLM"),
     "Dots1ForCausalLM": ("dots1", "Dots1ForCausalLM"),
     "Ernie4_5ForCausalLM": ("ernie45", "Ernie4_5ForCausalLM"),
     "Ernie4_5_MoeForCausalLM": ("ernie45_moe", "Ernie4_5_MoeForCausalLM"),
@@ -110,7 +113,9 @@
     "GemmaForCausalLM": ("gemma", "GemmaForCausalLM"),
     "Gemma2ForCausalLM": ("gemma2", "Gemma2ForCausalLM"),
     "Gemma3ForCausalLM": ("gemma3", "Gemma3ForCausalLM"),
+    "Rnj1ForCausalLM": ("rnj1", "Rnj1ForCausalLM"),
     "Gemma3nForCausalLM": ("gemma3n", "Gemma3nForCausalLM"),
+    "Gemma4ForCausalLM": ("gemma4", "Gemma4ForCausalLM"),
     "Qwen3NextForCausalLM": ("qwen3_next", "Qwen3NextForCausalLM"),
     "GlmForCausalLM": ("glm", "GlmForCausalLM"),
     "Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"),
@@ -131,6 +136,7 @@
     "Grok1ForCausalLM": ("grok1", "GrokForCausalLM"),
     "HunYuanMoEV1ForCausalLM": ("hunyuan_v1", "HunYuanMoEV1ForCausalLM"),
     "HunYuanDenseV1ForCausalLM": ("hunyuan_v1", "HunYuanDenseV1ForCausalLM"),
+    "HYV3ForCausalLM": ("hy_v3", "HYV3ForCausalLM"),
     "HCXVisionForCausalLM": ("hyperclovax_vision", "HCXVisionForCausalLM"),
     "HCXVisionV2ForCausalLM": ("hyperclovax_vision_v2", "HCXVisionV2ForCausalLM"),
     "HyperCLOVAXForCausalLM": ("hyperclovax", "HyperCLOVAXForCausalLM"),
@@ -146,6 +152,7 @@
     "KimiLinearForCausalLM": ("kimi_linear", "KimiLinearForCausalLM"),
     "Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
     "Lfm2MoeForCausalLM": ("lfm2_moe", "Lfm2MoeForCausalLM"),
+    "LagunaForCausalLM": ("laguna", "LagunaForCausalLM"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"),
     # For decapoda-research/llama-*
@@ -159,6 +166,7 @@
     "MiniMaxText01ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
     "MiniMaxM1ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
     "MiniMaxM2ForCausalLM": ("minimax_m2", "MiniMaxM2ForCausalLM"),
+    "Ministral3ForCausalLM": ("mistral", "MistralForCausalLM"),
     "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
     "MistralLarge3ForCausalLM": ("mistral_large_3", "MistralLarge3ForCausalLM"),
     "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
@@ -166,7 +174,8 @@
     "MptForCausalLM": ("mpt", "MPTForCausalLM"),
     "MPTForCausalLM": ("mpt", "MPTForCausalLM"),
     "MiMoForCausalLM": ("mimo", "MiMoForCausalLM"),
-    "MiMoV2FlashForCausalLM": ("mimo_v2_flash", "MiMoV2FlashForCausalLM"),
+    "MiMoV2FlashForCausalLM": ("mimo_v2", "MiMoV2FlashForCausalLM"),
+    "MiMoV2ForCausalLM": ("mimo_v2", "MiMoV2ForCausalLM"),
     "NemotronForCausalLM": ("nemotron", "NemotronForCausalLM"),
     "NemotronHForCausalLM": ("nemotron_h", "NemotronHForCausalLM"),
     "NemotronHPuzzleForCausalLM": ("nemotron_h", "NemotronHForCausalLM"),
@@ -181,6 +190,7 @@
     "PanguEmbeddedForCausalLM": ("openpangu", "PanguEmbeddedForCausalLM"),
     "PanguProMoEV2ForCausalLM": ("openpangu", "PanguProMoEV2ForCausalLM"),
     "PanguUltraMoEForCausalLM": ("openpangu", "PanguUltraMoEForCausalLM"),
+    "Param2MoEForCausalLM": ("param2moe", "Param2MoEForCausalLM"),
     "PersimmonForCausalLM": ("persimmon", "PersimmonForCausalLM"),
     "PhiForCausalLM": ("phi", "PhiForCausalLM"),
     "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
@@ -205,6 +215,7 @@
     "SolarForCausalLM": ("solar", "SolarForCausalLM"),
     "TeleChatForCausalLM": ("telechat2", "TeleChat2ForCausalLM"),
     "TeleChat2ForCausalLM": ("telechat2", "TeleChat2ForCausalLM"),
+    "TeleChat3ForCausalLM": ("llama", "LlamaForCausalLM"),
     "TeleFLMForCausalLM": ("teleflm", "TeleFLMForCausalLM"),
     "XverseForCausalLM": ("llama", "LlamaForCausalLM"),
     "Zamba2ForCausalLM": ("zamba2", "Zamba2ForCausalLM"),
@@ -223,6 +234,7 @@
     "GritLM": ("gritlm", "GritLM"),
     "GteModel": ("bert_with_rope", "SnowflakeGteNewModel"),
     "GteNewModel": ("bert_with_rope", "GteNewModel"),
+    "JinaEmbeddingsV5Model": ("jina", "JinaEmbeddingsV5Model"),
     "LlamaBidirectionalModel": ("llama", "LlamaBidirectionalModel"),
     "LlamaModel": ("llama", "LlamaForCausalLM"),
     **{
@@ -270,6 +282,7 @@
     "ColBERTModernBertModel": ("colbert", "ColBERTModernBertModel"),
     "ColBERTJinaRobertaModel": ("colbert", "ColBERTJinaRobertaModel"),
     "ColBERTLfm2Model": ("colbert", "ColBERTLfm2Model"),
+    "JinaForRanking": ("jina", "JinaForRanking"),
     # [Multimodal]
     "ColModernVBertForRetrieval": ("colmodernvbert", "ColModernVBertForRetrieval"),
     "ColPaliForRetrieval": ("colpali", "ColPaliModel"),
@@ -350,6 +363,8 @@
         "chameleon",
         "ChameleonForConditionalGeneration",
     ),
+    "Cheers": ("cheers", "CheersForConditionalGeneration"),
+    "CheersForConditionalGeneration": ("cheers", "CheersForConditionalGeneration"),
     "Cohere2VisionForConditionalGeneration": (
         "cohere2_vision",
         "Cohere2VisionForConditionalGeneration",
@@ -366,11 +381,19 @@
         "ernie45_vl",
         "Ernie4_5_VLMoeForConditionalGeneration",
     ),
+    "Exaone4_5_ForConditionalGeneration": (
+        "exaone4_5",
+        "Exaone4_5_ForConditionalGeneration",
+    ),  # noqa: E501
     "FireRedASR2ForConditionalGeneration": (
         "fireredasr2",
         "FireRedASR2ForConditionalGeneration",
     ),
     "FunASRForConditionalGeneration": ("funasr", "FunASRForConditionalGeneration"),
+    "FireRedLIDForConditionalGeneration": (
+        "fireredlid",
+        "FireRedLIDForConditionalGeneration",
+    ),
     "FunAudioChatForConditionalGeneration": (
         "funaudiochat",
         "FunAudioChatForConditionalGeneration",
@@ -381,6 +404,7 @@
         "gemma3n_mm",
         "Gemma3nForConditionalGeneration",
     ),
+    "Gemma4ForConditionalGeneration": ("gemma4_mm", "Gemma4ForConditionalGeneration"),
     "GlmAsrForConditionalGeneration": ("glmasr", "GlmAsrForConditionalGeneration"),
     "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
     "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"),
@@ -390,6 +414,10 @@
         "granite_speech",
         "GraniteSpeechForConditionalGeneration",
     ),
+    "Granite4VisionForConditionalGeneration": (
+        "granite4_vision",
+        "Granite4VisionForConditionalGeneration",
+    ),
     "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
     "HunYuanVLForConditionalGeneration": (
         "hunyuan_vision",
@@ -408,6 +436,10 @@
         "interns1_pro",
         "InternS1ProForConditionalGeneration",
     ),
+    "InternS2PreviewForConditionalGeneration": (
+        "interns2_preview",
+        "InternS2PreviewForConditionalGeneration",
+    ),
     "Idefics3ForConditionalGeneration": (
         "idefics3",
         "Idefics3ForConditionalGeneration",
@@ -444,25 +476,35 @@
     ),
     "MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"),
     "MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
+    "MiMoV2OmniForCausalLM": ("mimo_v2_omni", "MiMoV2OmniForCausalLM"),
     "MiniMaxVL01ForConditionalGeneration": (
         "minimax_vl_01",
         "MiniMaxVL01ForConditionalGeneration",
     ),
     "MiniCPMO": ("minicpmo", "MiniCPMO"),
     "MiniCPMV": ("minicpmv", "MiniCPMV"),
+    "MiniCPMV4_6ForConditionalGeneration": (
+        "minicpmv4_6",
+        "MiniCPMV4_6ForConditionalGeneration",
+    ),
     "Mistral3ForConditionalGeneration": (
         "mistral3",
         "Mistral3ForConditionalGeneration",
     ),
     "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
     "Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"),
+    "Moondream3ForCausalLM": ("moondream3", "Moondream3ForCausalLM"),
+    "HfMoondream": ("moondream3", "Moondream3ForCausalLM"),
     "NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
+    "NemotronH_Nano_Omni_Reasoning_V3": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
+    "NemotronH_Super_Omni_Reasoning_V3": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
     "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
     "OpenCUAForConditionalGeneration": ("opencua", "OpenCUAForConditionalGeneration"),
     "OpenPanguVLForConditionalGeneration": (
         "openpangu_vl",
         "OpenPanguVLForConditionalGeneration",
     ),
+    "OpenVLAForActionPrediction": ("openvla", "OpenVLAForActionPrediction"),
     "Ovis": ("ovis", "Ovis"),
     "Ovis2_5": ("ovis2_5", "Ovis2_5"),
     "Ovis2_6ForCausalLM": ("ovis2_5", "Ovis2_5"),
@@ -476,8 +518,13 @@
         "PaliGemmaForConditionalGeneration",
     ),
     "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
+    "Phi4ForCausalLMV": ("phi4siglip", "Phi4ForCausalLMV"),
     "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
     "PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"),
+    "QianfanOCRForConditionalGeneration": (
+        "qianfan_ocr",
+        "QianfanOCRForConditionalGeneration",
+    ),
     "QwenVLForConditionalGeneration": ("qwen_vl", "QwenVLForConditionalGeneration"),
     "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),
     "Qwen2_5_VLForConditionalGeneration": (
@@ -543,13 +590,21 @@
 _SPECULATIVE_DECODING_MODELS = {
     "ExtractHiddenStatesModel": ("extract_hidden_states", "ExtractHiddenStatesModel"),
     "MiMoMTPModel": ("mimo_mtp", "MiMoMTP"),
+    "MiMoV2MTPModel": ("mimo_v2_mtp", "MiMoV2MTP"),
+    "MiMoV2OmniMTPModel": ("mimo_v2_mtp", "MiMoV2OmniMTP"),
+    "EagleCohereForCausalLM": ("cohere_eagle", "EagleCohereForCausalLM"),
     "EagleLlamaForCausalLM": ("llama_eagle", "EagleLlamaForCausalLM"),
     "EagleLlama4ForCausalLM": ("llama4_eagle", "EagleLlama4ForCausalLM"),
     "EagleMiniCPMForCausalLM": ("minicpm_eagle", "EagleMiniCPMForCausalLM"),
+    "DFlashDraftModel": ("qwen3_dflash", "DFlashQwen3ForCausalLM"),
+    "PEagleDraftModel": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
+    "PeagleLlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
     "Eagle3LlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
+    "Eagle3MiniMaxM2ForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
     "LlamaForCausalLMEagle3": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
     "Eagle3Qwen2_5vlForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
     "Eagle3Qwen3vlForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
+    "EagleMistralForCausalLM": ("mistral_eagle", "EagleMistralForCausalLM"),
     "EagleMistralLarge3ForCausalLM": (
         "mistral_large_3_eagle",
         "EagleMistralLarge3ForCausalLM",
@@ -558,8 +613,11 @@
     "Eagle3DeepseekV3ForCausalLM": ("deepseek_eagle3", "Eagle3DeepseekV2ForCausalLM"),
     "EagleDeepSeekMTPModel": ("deepseek_eagle", "EagleDeepseekV3ForCausalLM"),
     "DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"),
+    "DeepSeekV4MTPModel": ("vllm.models.deepseek_v4", "DeepSeekV4MTP"),
+    "Gemma4MTPModel": ("gemma4_mtp", "Gemma4MTP"),
     "ErnieMTPModel": ("ernie_mtp", "ErnieMTP"),
     "ExaoneMoeMTP": ("exaone_moe_mtp", "ExaoneMoeMTP"),
+    "Exaone4_5_MTP": ("exaone4_5_mtp", "Exaone4_5_MTP"),
     "NemotronHMTPModel": ("nemotron_h_mtp", "NemotronHMTP"),
     "LongCatFlashMTPModel": ("longcat_flash_mtp", "LongCatFlashMTP"),
     "Glm4MoeMTPModel": ("glm4_moe_mtp", "Glm4MoeMTP"),
@@ -571,6 +629,7 @@
     "Step3p5MTP": ("step3p5_mtp", "Step3p5MTP"),
     "Qwen3_5MTP": ("qwen3_5_mtp", "Qwen3_5MTP"),
     "Qwen3_5MoeMTP": ("qwen3_5_mtp", "Qwen3_5MoeMTP"),
+    "HYV3MTPModel": ("hy_v3_mtp", "HYV3MTP"),
     # Temporarily disabled.
     # # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
     # "MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
@@ -813,10 +872,21 @@ def _save_modelinfo_to_cache(self, mi: _ModelInfo, module_hash: str) -> None:
 
     @logtime(logger=logger, msg="Registry inspect model class")
     def inspect_model_cls(self) -> _ModelInfo:
-        model_path = Path(__file__).parent / f"{self.module_name.split('.')[-1]}.py"
+        # Modules registered with a non-default location (e.g. the
+        # hardware-isolated ``vllm.models.<name>`` layout) live outside
+        # ``vllm/model_executor/models``. Resolve the module spec directly
+        # so the file-hash cache stays warm for them.
+        if self.module_name.startswith("vllm.model_executor.models."):
+            model_path = Path(__file__).parent / f"{self.module_name.split('.')[-1]}.py"
+        else:
+            try:
+                spec = importlib.util.find_spec(self.module_name)
+            except (ImportError, ValueError):
+                spec = None
+            model_path = Path(spec.origin) if spec is not None and spec.origin else None
         module_hash = None
 
-        if model_path.exists():
+        if model_path is not None and model_path.exists():
             with open(model_path, "rb") as f:
                 module_hash = safe_hash(f.read(), usedforsecurity=False).hexdigest()
 
@@ -1007,6 +1077,7 @@ def _try_resolve_transformers(
                         module,
                         model_config.model,
                         revision=model_config.revision,
+                        code_revision=model_config.code_revision,
                         trust_remote_code=model_config.trust_remote_code,
                         warn_on_fail=False,
                     )
@@ -1020,6 +1091,7 @@ def _try_resolve_transformers(
                         module,
                         model_config.model,
                         revision=model_config.revision,
+                        code_revision=model_config.code_revision,
                         trust_remote_code=model_config.trust_remote_code,
                         warn_on_fail=True,
                     )
@@ -1269,10 +1341,19 @@ def is_transcription_only_model(
         return model_cls.supports_transcription_only
 
 
+def _resolve_module_name(mod_relname: str) -> str:
+    # Allow registry entries to point at fully-qualified module paths (e.g.
+    # ``vllm.models.deepseek_v4``) for models that live outside the legacy
+    # ``vllm.model_executor.models`` flat layout.
+    if mod_relname.startswith("vllm."):
+        return mod_relname
+    return f"vllm.model_executor.models.{mod_relname}"
+
+
 ModelRegistry = _ModelRegistry(
     {
         model_arch: _LazyRegisteredModel(
-            module_name=f"vllm.model_executor.models.{mod_relname}",
+            module_name=_resolve_module_name(mod_relname),
             class_name=cls_name,
         )
         for model_arch, (mod_relname, cls_name) in _VLLM_MODELS.items()
diff --git a/vllm/model_executor/models/rnj1.py b/vllm/model_executor/models/rnj1.py
new file mode 100644
index 000000000000..f83577b7a397
--- /dev/null
+++ b/vllm/model_executor/models/rnj1.py
@@ -0,0 +1,470 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+#
+# RNJ-1 model: Gemma3-based architecture with chunked (block-local) attention.
+# Chunked attention restricts local layers to attend within aligned blocks,
+# with lookback to one previous block.
+from collections.abc import Iterable
+from itertools import islice
+
+import torch
+from torch import nn
+from transformers import Gemma3TextConfig
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import CacheConfig, VllmConfig
+from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
+from vllm.logger import init_logger
+from vllm.model_executor.layers.activation import GeluAndMul
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.layernorm import GemmaRMSNorm
+from vllm.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import (
+    default_weight_loader,
+    maybe_remap_kv_scale_name,
+)
+from vllm.sequence import IntermediateTensors
+from vllm.v1.attention.backend import AttentionType
+
+from .interfaces import SupportsLoRA, SupportsPP
+from .utils import (
+    AutoWeightsLoader,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)
+
+logger = init_logger(__name__)
+
+
+class Rnj1MLP(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_activation: str,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size,
+            [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.down_proj",
+        )
+        if hidden_activation != "gelu_pytorch_tanh":
+            raise ValueError(
+                "RNJ-1 uses `gelu_pytorch_tanh` as the hidden activation "
+                "function. Please set `hidden_act` and `hidden_activation` to "
+                "`gelu_pytorch_tanh`."
+            )
+        self.act_fn = GeluAndMul(approximate="tanh")
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+class Rnj1Attention(nn.Module):
+    def __init__(
+        self,
+        config: Gemma3TextConfig,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        max_position_embeddings: int,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        attn_logits_soft_cap: float | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = head_dim
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = config.query_pre_attn_scalar**-0.5
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=config.attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.qkv_proj",
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=config.attention_bias,
+            quant_config=quant_config,
+            prefix=f"{prefix}.o_proj",
+        )
+
+        self.q_norm = GemmaRMSNorm(self.head_dim, eps=config.rms_norm_eps)
+        self.k_norm = GemmaRMSNorm(self.head_dim, eps=config.rms_norm_eps)
+
+        layer_idx = extract_layer_index(prefix)
+        layer_type = config.layer_types[layer_idx]
+        self.is_chunked = layer_type == "chunked_attention"
+        self.chunk_lookback = 1 if self.is_chunked else -1
+        sliding_window = config.sliding_window if self.is_chunked else None
+
+        # Initialize the rotary embedding.
+        # Expects v5-style rope_parameters keyed by layer type.
+        if layer_type in config.rope_parameters:
+            rope_parameters = config.rope_parameters[layer_type]
+        else:
+            rope_parameters = config.rope_parameters
+
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=True,
+        )
+
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            attn_type=AttentionType.DECODER,
+            logits_soft_cap=attn_logits_soft_cap,
+            per_layer_sliding_window=sliding_window,
+            chunk_lookback=self.chunk_lookback,
+            prefix=f"{prefix}.attn",
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        **kwargs,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+        q = q.unflatten(-1, (self.num_heads, self.head_dim))
+        q = self.q_norm(q)
+        q = q.flatten(-2, -1)
+        k = k.unflatten(-1, (self.num_kv_heads, self.head_dim))
+        k = self.k_norm(k)
+        k = k.flatten(-2, -1)
+
+        q, k = self.rotary_emb(positions, q, k)
+        attn_output = self.attn(q, k, v)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class Rnj1DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        config: Gemma3TextConfig,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.self_attn = Rnj1Attention(
+            config=config,
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=config.num_key_value_heads,
+            head_dim=config.head_dim,
+            max_position_embeddings=config.max_position_embeddings,
+            cache_config=cache_config,
+            quant_config=quant_config,
+            attn_logits_soft_cap=None,
+            prefix=f"{prefix}.self_attn",
+        )
+        self.hidden_size = config.hidden_size
+        self.mlp = Rnj1MLP(
+            hidden_size=self.hidden_size,
+            intermediate_size=config.intermediate_size,
+            hidden_activation=config.hidden_activation,
+            quant_config=quant_config,
+            prefix=f"{prefix}.mlp",
+        )
+        self.input_layernorm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = GemmaRMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.pre_feedforward_layernorm = GemmaRMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+        self.post_feedforward_layernorm = GemmaRMSNorm(
+            config.hidden_size, eps=config.rms_norm_eps
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: torch.Tensor | None,
+        **kwargs,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+            **kwargs,
+        )
+        hidden_states = self.post_attention_layernorm(hidden_states)
+
+        hidden_states, residual = self.pre_feedforward_layernorm(
+            hidden_states, residual
+        )
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = self.post_feedforward_layernorm(hidden_states)
+        return hidden_states, residual
+
+
+@support_torch_compile
+class Rnj1Model(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        cache_config = vllm_config.cache_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.quant_config = quant_config
+
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=f"{prefix}.embed_tokens",
+        )
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: Rnj1DecoderLayer(
+                config, cache_config, quant_config, prefix=prefix
+            ),
+            prefix=f"{prefix}.layers",
+        )
+        self.norm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        normalizer = self.config.hidden_size**0.5
+        self.register_buffer("normalizer", torch.tensor(normalizer), persistent=False)
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids) * self.normalizer
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+            residual = None
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+            residual = intermediate_tensors["residual"]
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            hidden_states, residual = layer(
+                positions,
+                hidden_states,
+                residual,
+                **kwargs,
+            )
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+        for name, loaded_weight in weights:
+            if (
+                self.quant_config
+                and self.quant_config.get_name() == "gguf"
+                and name.endswith("norm.weight")
+            ):
+                loaded_weight -= 1
+
+            if self.quant_config is not None and (
+                scale_name := self.quant_config.get_cache_scale(name)
+            ):
+                param = params_dict[scale_name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                loaded_weight = loaded_weight[0]
+                weight_loader(param, loaded_weight)
+                loaded_params.add(scale_name)
+                continue
+
+            if name.endswith((".k_scale", ".v_scale", ".q_scale", ".prob_scale")):
+                remapped_name = maybe_remap_kv_scale_name(name, params_dict)
+                if remapped_name is not None and remapped_name in params_dict:
+                    param = params_dict[remapped_name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(remapped_name)
+                    continue
+
+            for param_name, shard_name, shard_id in stacked_params_mapping:
+                if shard_name not in name:
+                    continue
+                name = name.replace(shard_name, param_name)
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                name = maybe_remap_kv_scale_name(name, params_dict)
+                if name is None:
+                    continue
+                if is_pp_missing_parameter(name, self):
+                    continue
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
+                weight_loader(param, loaded_weight)
+            loaded_params.add(name)
+
+        return loaded_params
+
+
+class Rnj1ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+
+        super().__init__()
+        self.config = config
+        self.quant_config = quant_config
+        self.model = Rnj1Model(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+        self.lm_head = ParallelLMHead(
+            config.vocab_size,
+            config.hidden_size,
+            quant_config=quant_config,
+            prefix=maybe_prefix(prefix, "lm_head"),
+        )
+        if config.tie_word_embeddings:
+            self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
+
+        self.logits_processor = LogitsProcessor(
+            config.vocab_size, soft_cap=config.final_logit_softcapping
+        )
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        **kwargs,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds, **kwargs
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(
+            self,
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
+        )
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/roberta.py b/vllm/model_executor/models/roberta.py
index c7c292e70927..43c37b01e4c7 100644
--- a/vllm/model_executor/models/roberta.py
+++ b/vllm/model_executor/models/roberta.py
@@ -203,7 +203,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.secondary_weights = [
             DefaultModelLoader.Source(
                 model_or_path=vllm_config.model_config.model,
-                revision=None,
+                revision=vllm_config.model_config.revision,
                 prefix=prefix,
                 allow_patterns_overrides=[filename],
             )
diff --git a/vllm/model_executor/models/sarvam.py b/vllm/model_executor/models/sarvam.py
index fa5ec44d7e72..a0ab6c0ce260 100644
--- a/vllm/model_executor/models/sarvam.py
+++ b/vllm/model_executor/models/sarvam.py
@@ -35,7 +35,10 @@
     get_tensor_model_parallel_world_size,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -335,13 +338,12 @@ def __init__(
         else:
             self.shared_experts = None
 
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.shared_experts,
             num_experts=self.num_experts,
             top_k=self.top_k,
             hidden_size=self.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=self.norm_expert_prob,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -353,7 +355,7 @@ def __init__(
             routed_scaling_factor=self.routed_scaling_factor,
         )
 
-    def maybe_get_fused_moe(self) -> SharedFusedMoE:
+    def maybe_get_fused_moe(self) -> FusedMoE:
         return self.experts
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -370,20 +372,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             router_logits=router_logits,
         )
 
-        if self.shared_experts is not None:
-            shared_output, expert_output = final_hidden
-        else:
-            shared_output, expert_output = None, final_hidden
-
-        if shared_output is not None:
-            expert_output = expert_output + shared_output
-
-        if self.tp_size > 1:
-            expert_output = self.experts.maybe_all_reduce_tensor_model_parallel(
-                expert_output
-            )
-
-        return expert_output.view(num_tokens, hidden_dim)
+        return final_hidden.view(num_tokens, hidden_dim)
 
 
 class SarvamMLABlock(nn.Module):
@@ -543,7 +532,7 @@ def forward(
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return SharedFusedMoE.make_expert_params_mapping(
+        return fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py
index ce3a260d0ef6..28d725e7a36c 100644
--- a/vllm/model_executor/models/siglip.py
+++ b/vllm/model_executor/models/siglip.py
@@ -867,7 +867,7 @@ def __init__(
             quant_config=quant_config,
             num_hidden_layers_override=num_hidden_layers_override,
             require_post_norm=require_post_norm,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
             use_head=use_head,
         )
 
diff --git a/vllm/model_executor/models/siglip2navit.py b/vllm/model_executor/models/siglip2navit.py
index 6c7c33b75481..906a51bd7b1e 100644
--- a/vllm/model_executor/models/siglip2navit.py
+++ b/vllm/model_executor/models/siglip2navit.py
@@ -29,6 +29,7 @@
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.platforms import current_platform
 
+from .utils import maybe_prefix
 from .vision import is_vit_use_data_parallel
 
 
@@ -598,7 +599,7 @@ def __init__(
         self.vision_model = Siglip2VisionTransformer(
             config,
             quant_config=quant_config,
-            prefix=f"{prefix}.vision_model",
+            prefix=maybe_prefix(prefix, "vision_model"),
         )
 
     def forward(
diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py
index 18b689166a5f..a0e7e16a9bbf 100644
--- a/vllm/model_executor/models/step3_text.py
+++ b/vllm/model_executor/models/step3_text.py
@@ -14,12 +14,13 @@
 from vllm.distributed import (
     get_pp_group,
     get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -40,6 +41,7 @@
 
 from .interfaces import SupportsPP
 from .utils import (
+    AutoWeightsLoader,
     PPMissingLayer,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -71,7 +73,6 @@ def __init__(
             top_k=config.moe_top_k,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_expert_weight,
             quant_config=quant_config,
             prefix=f"{prefix}.experts",
@@ -94,8 +95,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         final_hidden_states = self.experts(
             hidden_states=hidden_states, router_logits=router_logits
         )
-        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
 
         return final_hidden_states.view(orig_shape)
 
@@ -384,55 +383,6 @@ def forward(
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
-
-class Step3TextForCausalLM(nn.Module, SupportsPP):
-    def __init__(
-        self,
-        *,
-        vllm_config: VllmConfig,
-        prefix: str = "",
-    ):
-        super().__init__()
-        config = vllm_config.model_config.hf_config
-
-        self.config = config
-        self.vllm_config = vllm_config
-
-        self.model = Step3TextModel(vllm_config=vllm_config, prefix=prefix)
-
-        if get_pp_group().is_last_rank:
-            self.lm_head = ParallelLMHead(
-                config.vocab_size,
-                config.hidden_size,
-                prefix=maybe_prefix(prefix, "lm_head"),
-            )
-            self.logits_processor = LogitsProcessor(config.vocab_size)
-        else:
-            self.lm_head = PPMissingLayer()
-
-        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors
-        )
-
-    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
-        return self.model.embed_input_ids(input_ids)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor | None,
-        positions: torch.Tensor,
-        intermediate_tensors: IntermediateTensors | None = None,
-        inputs_embeds: torch.Tensor | None = None,
-    ):
-        hidden_states = self.model(
-            input_ids, positions, intermediate_tensors, inputs_embeds
-        )
-        return hidden_states
-
-    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        logits = self.logits_processor(self.lm_head, hidden_states)
-        return logits
-
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         qkv_params_mapping = [
             # (param_name, shard_name, relative_start_idx, relative_end_idx)
@@ -467,11 +417,14 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
         ]
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
 
         expert_params_mapping = [
-            (".moe.experts.w13_weight", ".moe.gate_proj.weight", "w1"),
-            (".moe.experts.w13_weight", ".moe.up_proj.weight", "w3"),
-            (".moe.experts.w2_weight", ".moe.down_proj.weight", "w2"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.gate_proj.weight", "w1"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.up_proj.weight", "w3"),
+            (f".moe.experts.{base_layer}w2_weight", ".moe.down_proj.weight", "w2"),
         ]
 
         disable_moe_stacked_params = [data[1] for data in expert_params_mapping]
@@ -552,3 +505,56 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
                         weight_loader(param, loaded_weight)
                         loaded_params.add(name)
         return loaded_params
+
+
+class Step3TextForCausalLM(nn.Module, SupportsPP):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+
+        self.config = config
+        self.vllm_config = vllm_config
+
+        self.model = Step3TextModel(vllm_config=vllm_config, prefix=prefix)
+
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+            self.logits_processor = LogitsProcessor(config.vocab_size)
+        else:
+            self.lm_head = PPMissingLayer()
+
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ):
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py
index dc4b42961922..efe552f55491 100644
--- a/vllm/model_executor/models/step3_vl.py
+++ b/vllm/model_executor/models/step3_vl.py
@@ -46,7 +46,12 @@
 )
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
-from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
+from .interfaces import (
+    MultiModalEmbeddings,
+    SupportsEncoderCudaGraph,
+    SupportsMultiModal,
+    SupportsPP,
+)
 from .utils import (
     AutoWeightsLoader,
     WeightsMapper,
@@ -487,7 +492,9 @@ def forward(
     info=Step3VLProcessingInfo,
     dummy_inputs=Step3VLDummyInputsBuilder,
 )
-class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
+class Step3VLForConditionalGeneration(
+    nn.Module, SupportsMultiModal, SupportsPP, SupportsEncoderCudaGraph
+):
     hf_to_vllm_mapper = WeightsMapper(
         orig_to_new_prefix={
             "model.": "language_model.model.",
@@ -510,6 +517,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
         multimodal_config = vllm_config.model_config.multimodal_config
 
         self.config = config
+        self.model_config = vllm_config.model_config
         self.multimodal_config = multimodal_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
 
@@ -570,6 +578,17 @@ def device(self):
     def dtype(self):
         return next(self.parameters()).dtype
 
+    @staticmethod
+    def _compute_spatial_tokens(size, patch_size, stride):
+        # Compute the number of spatial tokens after two rounds of
+        # downsampling with given patch size and stride.
+        grid = size // patch_size
+        vit_tokens = grid * grid
+        spatial = int(math.sqrt(vit_tokens))
+        h1 = (spatial - 2) // stride + 1
+        h2 = (h1 - 1) // 2 + 1
+        return h2 * h2
+
     def _parse_and_validate_image_input(
         self, **kwargs: object
     ) -> Step3VLImageInputs | None:
@@ -592,7 +611,7 @@ def _parse_and_validate_image_input(
         if image_embeds is not None:
             return Step3VLImageEmbeddingInputs(
                 type="image_embeds",
-                image_embeds=image_embeds.to(self.dtype),
+                data=image_embeds.to(self.dtype),
             )
 
         raise AssertionError("This line should be unreachable.")
@@ -615,15 +634,19 @@ def _process_image_input(
         self, image_input: Step3VLImageInputs
     ) -> tuple[torch.Tensor, ...]:
         if image_input["type"] == "image_embeds":
-            image_features = image_input["image_embeds"]
-        else:
-            image_features = self._get_vision_model_output(image_input["pixel_values"])
-            patch_image_features = (
-                self._get_vision_model_output(image_input["patch_pixel_values"])
-                if len(image_input["patch_pixel_values"]) > 0
-                else None
-            )
-            num_patches = image_input["num_patches"]
+            image_features = image_input["data"]
+            return [
+                image_features[i].view(-1, image_features.shape[-1])
+                for i in range(image_features.shape[0])
+            ]
+
+        image_features = self._get_vision_model_output(image_input["pixel_values"])
+        patch_image_features = (
+            self._get_vision_model_output(image_input["patch_pixel_values"])
+            if len(image_input["patch_pixel_values"]) > 0
+            else None
+        )
+        num_patches = image_input["num_patches"]
 
         image_features = self._process_image_features(image_features)
         patch_image_features = (
@@ -672,6 +695,293 @@ def embed_input_ids(
             is_multimodal=is_multimodal,
         )
 
+    def get_encoder_cudagraph_config(self):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphConfig,
+        )
+
+        return EncoderCudaGraphConfig(
+            modalities=["image"],
+            input_key_by_modality={"image": "pixel_values"},
+            buffer_keys=["patch_pixel_values"],
+            out_hidden_size=self.config.hidden_size,
+        )
+
+    def get_input_modality(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> str:
+        return "image"
+
+    def get_encoder_cudagraph_budget_range(
+        self,
+        vllm_config: "VllmConfig",
+    ) -> tuple[int, int]:
+        # An image without patches
+        min_budget = self._compute_spatial_tokens(
+            self.config.vision_config.image_size,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+        max_budget = min(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            self.model_config.max_model_len,
+        )
+        return min_budget, max_budget
+
+    def get_encoder_cudagraph_item_specs(
+        self,
+        mm_kwargs: dict[str, Any],
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import EncoderItemSpec
+
+        num_patches = mm_kwargs.get("num_patches")
+        img_output_tokens = self._compute_spatial_tokens(
+            self.config.vision_config.image_size,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+
+        # NOTE: 504 is the hard coded size for each patch after processing
+        # by the vision model, which is determined by the current architecture
+        # of the vision model and may need to be updated if the architecture changes.
+        # The number of tokens for each patch is calculated based on this
+        # size and the patch size.
+        patch_output_tokens = self._compute_spatial_tokens(
+            504,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+
+        img_grid = (
+            self.config.vision_config.image_size // self.config.vision_config.patch_size
+        )
+        patch_grid = 504 // self.config.vision_config.patch_size
+        total_image_pixel = img_grid * img_grid
+        total_patch_pixel = patch_grid * patch_grid
+
+        return [
+            EncoderItemSpec(
+                input_size=(total_image_pixel + num_patch * total_patch_pixel),
+                output_tokens=(img_output_tokens + num_patch * patch_output_tokens),
+            )
+            for num_patch in num_patches
+        ]
+
+    def select_encoder_cudagraph_items(
+        self,
+        mm_kwargs: dict[str, Any],
+        indices: list[int],
+    ) -> dict[str, Any]:
+        pixel_values = mm_kwargs["pixel_values"]
+        patch_pixel_values = mm_kwargs["patch_pixel_values"]
+        num_patches = mm_kwargs["num_patches"]
+
+        # calcute the accumulated patch counts
+        cum_patches = [0]
+        for p in num_patches:
+            cum_patches.append(cum_patches[-1] + p)
+
+        if len(indices) == 0:
+            return {
+                "pixel_values": pixel_values[:0],
+                "patch_pixel_values": patch_pixel_values[:0],
+                "num_patches": num_patches[:0],
+            }
+
+        selected_pv = pixel_values[indices]
+        selected_np = num_patches[indices]
+        selected_ppv = torch.cat(
+            [patch_pixel_values[cum_patches[i] : cum_patches[i + 1]] for i in indices]
+        )
+
+        return {
+            "pixel_values": selected_pv,
+            "patch_pixel_values": selected_ppv,
+            "num_patches": selected_np,
+        }
+
+    def prepare_encoder_cudagraph_capture_inputs(
+        self,
+        token_budget: int,
+        max_batch_size: int,
+        max_frames_per_batch: int,
+        device: torch.device,
+        dtype: torch.dtype,
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphCaptureInputs,
+        )
+
+        # For pixel_value, the max input size is max_batch_size
+        img_output_tokens = self._compute_spatial_tokens(
+            self.config.vision_config.image_size,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+        patch_output_tokens = self._compute_spatial_tokens(
+            504,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+        dummy_pixel_values = torch.randn(
+            max_batch_size,
+            3,
+            self.config.vision_config.image_size,
+            self.config.vision_config.image_size,
+            device=device,
+            dtype=dtype,
+        )
+        # max_num_patches is the max total patches across the whole batch.
+        # token_budget = max_batch_size * img_out + max_num_patches * patch_out
+        max_num_patches = max(
+            0,
+            (token_budget - max_batch_size * img_output_tokens) // patch_output_tokens,
+        )
+        dummy_patch_pixel_values = torch.randn(
+            max_num_patches,
+            3,
+            504,
+            504,
+            device=device,
+            dtype=dtype,
+        )
+        # num_patches is NOT in buffers -- the per-item merge is done
+        # CPU-side by finalize_encoder_cudagraph_output using the actual
+        # batch's num_patches from mm_kwargs.
+        mm_kwargs = {
+            "pixel_values": dummy_pixel_values,
+            "patch_pixel_values": dummy_patch_pixel_values,
+        }
+
+        buffers = {
+            "patch_pixel_values": dummy_patch_pixel_values,
+        }
+
+        return EncoderCudaGraphCaptureInputs(
+            mm_kwargs=mm_kwargs,
+            buffers=buffers,
+        )
+
+    def encoder_cudagraph_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+        buffers: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        # Graph captures only the compute (vision model + conv projector).
+        # Per-item merge happens CPU-side in finalize_encoder_cudagraph_output
+        # using actual num_patches from the batch data.
+        pixel_values = mm_kwargs["pixel_values"]
+        patch_pixel_values = buffers["patch_pixel_values"]
+
+        image_features = self._process_image_features(
+            self._get_vision_model_output(pixel_values)
+        )
+
+        has_patches = len(patch_pixel_values) > 0
+        if has_patches:
+            patch_features = self._process_image_features(
+                self._get_vision_model_output(patch_pixel_values)
+            )
+
+        # Deterministic single cat: [all_img_flat, all_patch_flat]
+        img_flat = image_features.reshape(-1, image_features.shape[-1])
+        if has_patches:
+            patch_flat = patch_features.reshape(-1, patch_features.shape[-1])
+            return torch.cat([img_flat, patch_flat], dim=0)
+        return img_flat
+
+    def encoder_eager_forward(
+        self,
+        mm_kwargs: dict[str, Any],
+    ) -> torch.Tensor:
+        image_input = Step3VLImagePixelInputs(
+            type="pixel_values",
+            pixel_values=mm_kwargs["pixel_values"],
+            patch_pixel_values=mm_kwargs["patch_pixel_values"],
+            num_patches=mm_kwargs["num_patches"],
+        )
+        vision_embeddings = self._process_image_input(image_input)
+        return torch.cat(vision_embeddings, dim=0)
+
+    def postprocess_encoder_output(
+        self,
+        output: torch.Tensor,
+        indices: list[int],
+        per_item_out_tokens: list[int],
+        dest: dict[int, torch.Tensor] | list[torch.Tensor | None],
+        clone: bool = False,
+        batch_mm_kwargs: dict[str, Any] | None = None,
+    ):
+        """CPU-side per-item merge after graph replay.
+
+        The graph output is ``[all_img_flat, all_patch_flat]``.
+        This method splits the flat output into image and patch features,
+        then reassembles per-item embeddings using the *actual* batch
+        ``num_patches`` from ``batch_mm_kwargs`` (not the capture-time values).
+        """
+        num_patches = batch_mm_kwargs["num_patches"]
+        hidden = output.shape[-1]
+        bsz = len(indices)
+
+        img_out = self._compute_spatial_tokens(
+            self.config.vision_config.image_size,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+        patch_out = self._compute_spatial_tokens(
+            504,
+            self.config.vision_config.patch_size,
+            self.config.understand_projector_stride,
+        )
+
+        # Valid portion: bsz images, actual_total_patches patches
+        actual_np = [int(np) for np in num_patches]
+        total_patches = sum(actual_np)
+        img_tokens = bsz * img_out
+        patch_tokens = total_patches * patch_out
+
+        img_part = output[:img_tokens].reshape(bsz, img_out, hidden)
+        if total_patches > 0:
+            patch_part = output[img_tokens : img_tokens + patch_tokens].reshape(
+                -1, patch_out, hidden
+            )
+        else:
+            patch_part = None
+
+        merged: dict[int, torch.Tensor] = {}
+        cur_patch = 0
+        for i, idx in enumerate(indices):
+            np = actual_np[i]
+            parts: list[torch.Tensor] = []
+            if patch_part is not None and np > 0:
+                parts.append(patch_part[cur_patch : cur_patch + np].reshape(-1, hidden))
+                cur_patch += np
+            parts.append(img_part[i].reshape(-1, hidden))
+            merged[idx] = torch.cat(parts, dim=0) if len(parts) > 1 else parts[0]
+
+        out = [merged[i] for i in indices]
+        for i, idx in enumerate(indices):
+            dest[idx] = out[i]
+
+    def prepare_encoder_cudagraph_replay_buffers(
+        self,
+        mm_kwargs: dict[str, Any],
+        max_batch_size: int,
+        max_frames_per_batch: int,
+    ):
+        from vllm.v1.worker.encoder_cudagraph_defs import (
+            EncoderCudaGraphReplayBuffers,
+        )
+
+        # Only patch_pixel_values lives in the buffers dict; num_patches is
+        # processed CPU-side by finalize_encoder_cudagraph_output.
+        return EncoderCudaGraphReplayBuffers(
+            buffers={
+                "patch_pixel_values": mm_kwargs["patch_pixel_values"],
+            },
+        )
+
     def forward(
         self,
         input_ids: torch.Tensor | None,
diff --git a/vllm/model_executor/models/step3p5.py b/vllm/model_executor/models/step3p5.py
index bb4bf14a9632..cd73f6e26d9a 100644
--- a/vllm/model_executor/models/step3p5.py
+++ b/vllm/model_executor/models/step3p5.py
@@ -23,8 +23,10 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.activation import SiluAndMul, SwigluStepAndMul
 from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.fused_moe import FusedMoE
-from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.layers.layernorm import GemmaRMSNorm
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
@@ -372,14 +374,13 @@ def __init__(
             quant_config=quant_config,
             prefix=f"{prefix}.share_expert",
         )
-        self.experts = SharedFusedMoE(
+        self.experts = FusedMoE(
             shared_experts=self.share_expert,
             gate=self.gate,
             num_experts=config.moe_num_experts,
             top_k=config.moe_top_k,
             hidden_size=config.hidden_size,
             intermediate_size=config.moe_intermediate_size,
-            reduce_results=False,
             renormalize=config.norm_expert_weight,
             quant_config=quant_config,
             activation=activation,
@@ -397,30 +398,16 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         hidden_states = hidden_states.view(-1, hidden_dim)
 
         if self.experts.is_internal_router:
-            # In this case, the gate/router runs inside the FusedMoE class
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=hidden_states
             )
         else:
-            # router_logits: (num_tokens, n_experts)
+            # TODO(bnell): this gate could be moved into the FusedMoE?
             router_logits, _ = self.gate(hidden_states)
-            fused_moe_out = self.experts(
+            final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=router_logits
             )
 
-        shared_output, final_hidden_states = fused_moe_out
-        if self.share_expert is None:
-            assert shared_output is None
-
-        if self.share_expert is not None:
-            assert shared_output is not None
-            final_hidden_states += shared_output
-
-        if self.tp_size > 1:
-            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
-                final_hidden_states
-            )
-
         return final_hidden_states.view(num_tokens, hidden_dim)
 
 
@@ -641,16 +628,19 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
 
         # Old packed 3D format: .moe.gate_proj.weight [num_experts, out, in]
         expert_params_mapping = [
-            (".moe.experts.w13_weight", ".moe.gate_proj.weight", "w1"),
-            (".moe.experts.w13_weight", ".moe.up_proj.weight", "w3"),
-            (".moe.experts.w2_weight", ".moe.down_proj.weight", "w2"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.gate_proj.weight", "w1"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.up_proj.weight", "w3"),
+            (f".moe.experts.{base_layer}w2_weight", ".moe.down_proj.weight", "w2"),
         ]
 
         # New per-expert format: .moe.experts.E.gate_proj.weight_packed [out, in]
-        per_expert_mapping = FusedMoE.make_expert_params_mapping(
+        per_expert_mapping = fused_moe_make_expert_params_mapping(
             self,
             ckpt_gate_proj_name="gate_proj",
             ckpt_down_proj_name="down_proj",
@@ -827,6 +817,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
 
 
 class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
+    # Required so quantization exclude lists match fused module prefixes.
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
     hf_to_vllm_mapper = WeightsMapper(
         orig_to_new_substr={".share_expert.": ".moe.share_expert."}
     )
diff --git a/vllm/model_executor/models/step3p5_mtp.py b/vllm/model_executor/models/step3p5_mtp.py
index 83e43dce5114..092f7a31aa7d 100644
--- a/vllm/model_executor/models/step3p5_mtp.py
+++ b/vllm/model_executor/models/step3p5_mtp.py
@@ -181,14 +181,17 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
             ("gate_up_proj", "gate_proj", 0),
             ("gate_up_proj", "up_proj", 1),
         ]
+        params_dict = dict(self.named_parameters())
+        base_layer = (
+            "base_layer." if any(".base_layer." in name for name in params_dict) else ""
+        )
 
         expert_params_mapping = [
-            (".moe.experts.w13_weight", ".moe.gate_proj.weight", "w1"),
-            (".moe.experts.w13_weight", ".moe.up_proj.weight", "w3"),
-            (".moe.experts.w2_weight", ".moe.down_proj.weight", "w2"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.gate_proj.weight", "w1"),
+            (f".moe.experts.{base_layer}w13_weight", ".moe.up_proj.weight", "w3"),
+            (f".moe.experts.{base_layer}w2_weight", ".moe.down_proj.weight", "w2"),
         ]
 
-        params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
diff --git a/vllm/model_executor/models/step_vl.py b/vllm/model_executor/models/step_vl.py
index 4669771f4bc0..53dd3ebf48b3 100644
--- a/vllm/model_executor/models/step_vl.py
+++ b/vllm/model_executor/models/step_vl.py
@@ -500,6 +500,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
         quant_config = vllm_config.quant_config
 
         self.config = config
+        self.model_config = vllm_config.model_config
         self.multimodal_config = multimodal_config
         self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
 
diff --git a/vllm/model_executor/models/transformers/__init__.py b/vllm/model_executor/models/transformers/__init__.py
index 93cd8ff50766..cb224e5cbc0e 100644
--- a/vllm/model_executor/models/transformers/__init__.py
+++ b/vllm/model_executor/models/transformers/__init__.py
@@ -16,13 +16,11 @@
 # limitations under the License.
 """Wrapper around `transformers` models"""
 
-from vllm.compilation.decorators import support_torch_compile
 from vllm.model_executor.models.transformers.base import Base
 from vllm.model_executor.models.transformers.causal import CausalMixin
 from vllm.model_executor.models.transformers.legacy import LegacyMixin
 from vllm.model_executor.models.transformers.moe import MoEMixin
 from vllm.model_executor.models.transformers.multimodal import (
-    DYNAMIC_ARG_DIMS,
     MultiModalDummyInputsBuilder,
     MultiModalMixin,
     MultiModalProcessingInfo,
@@ -32,16 +30,13 @@
     EmbeddingMixin,
     SequenceClassificationMixin,
 )
-from vllm.model_executor.models.transformers.utils import can_enable_torch_compile
 from vllm.multimodal import MULTIMODAL_REGISTRY
 
 
 # Text only models
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersForCausalLM(CausalMixin, Base): ...
 
 
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersMoEForCausalLM(MoEMixin, CausalMixin, Base): ...
 
 
@@ -51,9 +46,6 @@ class TransformersMoEForCausalLM(MoEMixin, CausalMixin, Base): ...
     info=MultiModalProcessingInfo,
     dummy_inputs=MultiModalDummyInputsBuilder,
 )
-@support_torch_compile(
-    dynamic_arg_dims=DYNAMIC_ARG_DIMS, enable_if=can_enable_torch_compile
-)
 class TransformersMultiModalForCausalLM(MultiModalMixin, CausalMixin, Base): ...
 
 
@@ -62,20 +54,15 @@ class TransformersMultiModalForCausalLM(MultiModalMixin, CausalMixin, Base): ...
     info=MultiModalProcessingInfo,
     dummy_inputs=MultiModalDummyInputsBuilder,
 )
-@support_torch_compile(
-    dynamic_arg_dims=DYNAMIC_ARG_DIMS, enable_if=can_enable_torch_compile
-)
 class TransformersMultiModalMoEForCausalLM(
     MoEMixin, MultiModalMixin, CausalMixin, Base
 ): ...
 
 
 # Embedding models
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersEmbeddingModel(EmbeddingMixin, LegacyMixin, Base): ...
 
 
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersMoEEmbeddingModel(EmbeddingMixin, MoEMixin, Base): ...
 
 
@@ -84,20 +71,15 @@ class TransformersMoEEmbeddingModel(EmbeddingMixin, MoEMixin, Base): ...
     info=MultiModalProcessingInfo,
     dummy_inputs=MultiModalDummyInputsBuilder,
 )
-@support_torch_compile(
-    dynamic_arg_dims=DYNAMIC_ARG_DIMS, enable_if=can_enable_torch_compile
-)
 class TransformersMultiModalEmbeddingModel(EmbeddingMixin, MultiModalMixin, Base): ...
 
 
 # Sequence classification models
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersForSequenceClassification(
     SequenceClassificationMixin, LegacyMixin, Base
 ): ...
 
 
-@support_torch_compile(enable_if=can_enable_torch_compile)
 class TransformersMoEForSequenceClassification(
     SequenceClassificationMixin, MoEMixin, Base
 ): ...
@@ -108,9 +90,6 @@ class TransformersMoEForSequenceClassification(
     info=MultiModalProcessingInfo,
     dummy_inputs=MultiModalDummyInputsBuilder,
 )
-@support_torch_compile(
-    dynamic_arg_dims=DYNAMIC_ARG_DIMS, enable_if=can_enable_torch_compile
-)
 class TransformersMultiModalForSequenceClassification(
     SequenceClassificationMixin, MultiModalMixin, Base
 ): ...
diff --git a/vllm/model_executor/models/transformers/base.py b/vllm/model_executor/models/transformers/base.py
index d32bfe6cabbd..35897ce7dbca 100644
--- a/vllm/model_executor/models/transformers/base.py
+++ b/vllm/model_executor/models/transformers/base.py
@@ -29,6 +29,7 @@
 from transformers import AutoModel
 from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
 
+from vllm.compilation.decorators import support_torch_compile
 from vllm.config.utils import getattr_iter
 from vllm.distributed import get_pp_group, get_tp_group
 from vllm.distributed.utils import get_pp_indices
@@ -47,6 +48,7 @@
 )
 from vllm.model_executor.models.interfaces_base import VllmModel
 from vllm.model_executor.models.transformers.utils import (
+    can_enable_torch_compile,
     get_feature_request_tip,
     init_on_device_without_buffers,
     log_replacement,
@@ -117,6 +119,7 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
         self.config = vllm_config.model_config.hf_config
         self.text_config = self.config.get_text_config()
         self.cache_config = vllm_config.cache_config
+        self.compilation_config = vllm_config.compilation_config
         self.device_config = vllm_config.device_config
         self.model_config = vllm_config.model_config
         self.parallel_config = vllm_config.parallel_config
@@ -146,7 +149,7 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
         if self.quant_config:
             quant_method_name = self.quant_config.get_name()
             # Check for unsupported quantization methods.
-            if quant_method_name == "mxfp4":
+            if quant_method_name in ("mxfp4", "gpt_oss_mxfp4"):
                 raise NotImplementedError(
                     "Transformers modeling backend does "
                     "not support MXFP4 quantization yet."
@@ -155,14 +158,16 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
             if "gptq" in quant_method_name:
                 self.ignore_unexpected_suffixes.append(".bias")
 
-        # Patch config and init on "meta" to delay allocating GPU tensors
         self._patch_config()
+        from_config_kwargs = dict(
+            config=self.config,
+            dtype=self.model_config.dtype,
+            trust_remote_code=self.model_config.trust_remote_code,
+        )
+        self._decorate_for_torch_compile(**from_config_kwargs)
+        # Init on "meta" to delay allocating GPU tensors
         with init_on_device_without_buffers("meta"):
-            self.model: PreTrainedModel = AutoModel.from_config(
-                self.config,
-                dtype=self.model_config.dtype,
-                trust_remote_code=self.model_config.trust_remote_code,
-            )
+            self.model: PreTrainedModel = AutoModel.from_config(**from_config_kwargs)
 
         # Create weight name to module qualname mapper
         self._create_hf_to_vllm_mapper()
@@ -218,6 +223,77 @@ def _patch_config(self):
             if sub_config.dtype != (dtype := self.config.dtype):
                 sub_config.dtype = dtype
 
+    def _get_decoder_cls(self, **kwargs: dict) -> type[PreTrainedModel]:
+        """
+        Get the decoder class from the model.
+
+        Args:
+            kwargs: The kwargs to create the model.
+
+        Returns:
+            The decoder class.
+        """
+        with torch.device("meta"):
+            model: PreTrainedModel = AutoModel.from_config(**kwargs)
+        decoder_cls = type(model.get_decoder())
+        logger.debug("Identified decoder class as: %s", decoder_cls)
+        del model
+        return decoder_cls
+
+    def _decorate_cls_for_torch_compile(
+        self,
+        cls: type[PreTrainedModel],
+        dynamic_arg_dims: dict[str, int] | None,
+        enable_if: Callable[["VllmConfig"], bool],
+        is_encoder: bool,
+    ):
+        """
+        Decorate `cls` to indicate to vLLM that it supports torch compile.
+
+        Args:
+            cls: The PreTrainedModel class to decorate.
+            dynamic_arg_dims: A mapping from argument name to the dynamic dimensions
+                of the argument. If None, default dynamic arg dims will be used. See
+                [`support_torch_compile`][vllm.compilation.decorators.support_torch_compile]
+                for more details.
+            enable_if: A function which takes in the vLLM config and returns whether
+                torch compile should be enabled for this class.
+            is_encoder: Whether the class being decorated is an encoder.
+        """
+        logger.debug(
+            "Decorating `%s` as %s for torch compile with dynamic_arg_dims of %s",
+            cls.__name__,
+            "encoder" if is_encoder else "decoder",
+            dynamic_arg_dims,
+        )
+
+        support_torch_compile(
+            dynamic_arg_dims=dynamic_arg_dims,
+            enable_if=enable_if,
+            is_encoder=is_encoder,
+        )(cls)
+
+    def _decorate_for_torch_compile(self, **kwargs: dict):
+        """
+        Decorate the model's decoder class to indicate to vLLM that it supports torch
+        compile if `can_enable_torch_compile` is True.
+
+        Args:
+            kwargs: The kwargs to create the model, which are needed to get the decoder
+                class.
+        """
+        self._decorate_cls_for_torch_compile(
+            cls=self._get_decoder_cls(**kwargs),
+            # Applied to a PreTrainedModel so the batch dimension will exist
+            dynamic_arg_dims=dict[str, int](
+                input_ids=1,  # shape: [1, seq_len]
+                inputs_embeds=1,  # shape: [1, seq_len, hidden_size]
+                position_ids=-1,  # shape: [1, seq_len] or [3, 1, seq_len] for mrope
+            ),
+            enable_if=can_enable_torch_compile,
+            is_encoder=False,
+        )
+
     def _create_hf_to_vllm_mapper(self):
         """
         Create a WeightsMapper to map checkpoint weight names to module qualnames.
@@ -553,11 +629,6 @@ def forward(
             input_ids = None
             inputs_embeds = intermediate_tensors["hidden_states"]
 
-        if input_ids is not None:
-            input_ids = input_ids[None, ...]
-        if inputs_embeds is not None:
-            inputs_embeds = inputs_embeds[None, ...]
-
         # If the model scales embeddings inside the input embedding layer we must
         # ensure they are scaled here since VocabParallelEmbedding will not do it
         if (
@@ -568,22 +639,29 @@ def forward(
             inputs_embeds = self.embed_input_ids(input_ids)
             input_ids = None
 
-        if self.model_config.uses_mrope:
-            position_ids = positions[:, None]
-        else:
-            position_ids = positions[None, ...]
+        # Add batch dimension before entering Transformers model
+        if input_ids is not None and input_ids.ndim == 1:
+            # [seq_len] -> [1, seq_len]
+            input_ids = input_ids[None, ...]
+        if inputs_embeds is not None and inputs_embeds.ndim == 2:
+            # [seq_len, hidden_size] -> [1, seq_len, hidden_size]
+            inputs_embeds = inputs_embeds[None, ...]
+        if positions.ndim == 1:
+            # [seq_len] -> [1, seq_len]
+            positions = positions[None, ...]
 
         outputs = self.model(
             input_ids=input_ids,
             inputs_embeds=inputs_embeds,
             use_cache=False,
-            position_ids=position_ids,
+            position_ids=positions,
             attention_instances=self.attention_instances,
             return_dict=False,
             **self._output_aux_hidden_states_kwargs,
             **kwargs,
         )
-        # We must remove the batch dimension from these outputs
+
+        # Remove batch dimension after exiting Transformers model
         hidden_states = outputs[0][0, ...]
         if self._output_aux_hidden_states_kwargs:
             aux_hidden_states = [x[0][0, ...] for x in outputs[1:]]
diff --git a/vllm/model_executor/models/transformers/moe.py b/vllm/model_executor/models/transformers/moe.py
index 5f8352faed50..51a51799ffc0 100644
--- a/vllm/model_executor/models/transformers/moe.py
+++ b/vllm/model_executor/models/transformers/moe.py
@@ -24,8 +24,11 @@
 from vllm.config.utils import getattr_iter
 from vllm.distributed import get_dp_group, get_ep_group
 from vllm.forward_context import ForwardContext, get_forward_context
-from vllm.model_executor.custom_op import CustomOp
-from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.fused_moe import (
+    FusedMoE,
+    fused_moe_make_expert_params_mapping,
+)
 from vllm.model_executor.models.interfaces import MixtureOfExperts
 from vllm.model_executor.models.utils import maybe_prefix
 from vllm.platforms import current_platform
@@ -38,7 +41,7 @@
 
 
 # --8<-- [start:transformers_fused_moe]
-@CustomOp.register("transformers_fused_moe")
+@PluggableLayer.register("transformers_fused_moe")
 class TransformersFusedMoE(FusedMoE):
     """Custom FusedMoE for the Transformers modeling backend."""
 
@@ -94,6 +97,8 @@ def transformers_moe_forward(
     self = forward_context.no_compile_layers[layer_name]
     self._topk_ids = topk_ids
     # Clone hidden_states because it will be mutated in-place in FusedMoE
+    # TODO(bnell): figure out a way to avoid calling runner directly.
+    # it is a hack that the weight are being passed via logits.
     return self.runner.forward(hidden_states.clone(), topk_weights)
 
 
@@ -177,7 +182,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
         num_redundant_experts = self.parallel_config.eplb_config.num_redundant_experts
         for gate_proj, down_proj, up_proj in ckpt_names:
             expert_mapping.extend(
-                FusedMoE.make_expert_params_mapping(
+                fused_moe_make_expert_params_mapping(
                     self,
                     ckpt_gate_proj_name=gate_proj,
                     ckpt_down_proj_name=down_proj,
@@ -202,8 +207,6 @@ def recursive_replace(self):
         )
         assert intermediate_size is not None
 
-        # If there are shared experts, the results are
-        # reduced after mlp.forward() not inside FusedMoE
         num_shared_experts = getattr_iter(
             text_config,
             [
@@ -212,17 +215,6 @@ def recursive_replace(self):
             ],
             0,
         )
-        reduce_results = num_shared_experts == 0
-
-        def add_all_reduce(mlp: nn.Module):
-            """Adds an all-reduce to the output of `mlp.forward()`."""
-
-            class MLPWithAllReduce(mlp.__class__):
-                def forward(self, *args, **kwargs):
-                    output = super().forward(*args, **kwargs)
-                    return self.experts.maybe_all_reduce_tensor_model_parallel(output)
-
-            mlp.__class__ = MLPWithAllReduce
 
         # Unused kwargs since we use custom_routing_function:
         # - `scoring_func` and `e_score_correction_bias` only used for grouped
@@ -287,14 +279,11 @@ def _recursive_replace(module: nn.Module, prefix: str):
                         if "bias" in experts_param_name:
                             has_bias = True
                             break
-                    # Double check there are no shared experts
-                    nonlocal reduce_results
-                    if reduce_results:
+                    # If the config does not specify num_shared_experts, but
+                    # the model has shared experts, we assume there is one.
+                    if self.num_shared_experts == 0:
                         for mlp_param_name, _ in mlp.named_parameters():
                             if "shared_expert" in mlp_param_name:
-                                reduce_results = False
-                                # If the config does not specify num_shared_experts, but
-                                # the model has shared experts, we assume there is one.
                                 self.num_shared_experts = 1
                                 break
                     # Replace experts module with FusedMoE
@@ -303,7 +292,6 @@ def _recursive_replace(module: nn.Module, prefix: str):
                         top_k=top_k,
                         hidden_size=hidden_size,
                         intermediate_size=intermediate_size,
-                        reduce_results=reduce_results,
                         renormalize=renormalize,
                         # Hard coded because topk happens in Transformers
                         use_grouped_topk=False,
@@ -324,13 +312,6 @@ def _recursive_replace(module: nn.Module, prefix: str):
                     self.moe_layers.append(fused_experts)
                     self.expert_weights.append(fused_experts.get_expert_weights())
                     self.num_moe_layers += 1
-                    # If results are not all-reduced in FusedMoE, ensure they
-                    # are all-reduced at the end of mlp.forward() if tensor
-                    # parallel or expert parallel is enabled
-                    if not reduce_results and (
-                        fused_experts.tp_size > 1 or fused_experts.ep_size > 1
-                    ):
-                        add_all_reduce(mlp)
                 else:
                     _recursive_replace(child_module, prefix=qual_name)
 
diff --git a/vllm/model_executor/models/transformers/multimodal.py b/vllm/model_executor/models/transformers/multimodal.py
index ddcd91f61e4a..4d900b5dde62 100644
--- a/vllm/model_executor/models/transformers/multimodal.py
+++ b/vllm/model_executor/models/transformers/multimodal.py
@@ -20,7 +20,9 @@
 from typing import TYPE_CHECKING
 
 import torch
+from transformers import AutoModel
 
+from vllm.compilation.decorators import should_torch_compile_mm_encoder
 from vllm.config.utils import getattr_iter
 from vllm.inputs import MultiModalDataDict, MultiModalInput, mm_input
 from vllm.logger import init_logger
@@ -46,19 +48,11 @@
 from vllm.sequence import IntermediateTensors
 
 if TYPE_CHECKING:
-    from transformers import BatchFeature
+    from transformers import BatchFeature, PreTrainedModel
 
     from vllm.config import VllmConfig
     from vllm.config.multimodal import BaseDummyOptions
 
-DYNAMIC_ARG_DIMS = {
-    "input_ids": 0,
-    # set `positions` to last dim to support Qwen-mrope
-    "positions": -1,
-    "intermediate_tensors": 0,
-    "inputs_embeds": 0,
-}
-
 logger = init_logger(__name__)
 
 
@@ -158,7 +152,9 @@ def _get_mm_fields_config(
         # Keep these as batched, as they always have batch size as first dim
         mm_fields["image_grid_thw"] = MultiModalFieldConfig.batched("image")
         mm_fields["video_grid_thw"] = MultiModalFieldConfig.batched("image")
-        mm_fields["num_image_patches"] = MultiModalFieldConfig.batched("image")
+        mm_fields["num_image_patches"] = MultiModalFieldConfig.batched(
+            "image", keep_on_cpu=True
+        )
         return mm_fields
 
     def _get_hf_mm_data(
@@ -274,6 +270,66 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
         # Skip SupportsMRoPE.__init__ and call the next class in MRO
         super(SupportsMRoPE, self).__init__(vllm_config=vllm_config, prefix=prefix)
 
+    def _get_encoder_cls(
+        self, modality: str = "image", **kwargs: dict
+    ) -> type["PreTrainedModel"]:
+        """
+        Get the encoder class from the model.
+
+        Args:
+            kwargs: The kwargs to create the model.
+
+        Returns:
+            The encoder class.
+        """
+        with torch.device("meta"):
+            model: PreTrainedModel = AutoModel.from_config(**kwargs)
+        encoder_cls = type(model.get_encoder(modality=modality))
+        logger.debug("Identified encoder class as: %s", encoder_cls)
+        if type(model) is encoder_cls:
+            raise ValueError(
+                "Unable to infer vision encoder class from the model. "
+                "You must either: update the model so that "
+                "https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.get_encoder"
+                " can detect the vision encoder correctly, or remove "
+                "'compile_mm_encoder'."
+            )
+        del model
+        return encoder_cls
+
+    def _decorate_for_torch_compile(self, **kwargs: dict):
+        """
+        Decorate the model's decoder and encoder classes to indicate to vLLM that they
+        support torch compile if `can_enable_torch_compile` and
+        `should_torch_compile_mm_encoder` are True respectively.
+
+        Args:
+            kwargs: The kwargs to create the model, which are needed to get the decoder
+                and encoder classes.
+        """
+        super()._decorate_for_torch_compile(**kwargs)
+        # Decorate the vision encoder model class to support torch compile if needed
+        if self.compilation_config.compile_mm_encoder:
+            self.check_version("5.0.0", "multimodal encoder compilation support")
+            logger.warning_once(
+                "Multimodal encoder compilation with the Transformers modeling backend "
+                "is an experimental feature. It relies on:\n"
+                "- The vision encoder being torch compilable.\n"
+                "- All vision encoder tensor inputs must be type hinted as either "
+                "`torch.Tensor` or `torch.FloatTensor`.\n"
+                "- The 0-th dimension of all tensor inputs to the vision encoder being "
+                "the dynamic dimension (i.e., sequence length or number of patches).\n"
+                "Please report any issues you encounter to help us improve it."
+            )
+            self._decorate_cls_for_torch_compile(
+                cls=self._get_encoder_cls(**kwargs),
+                # TODO: properly infer dynamic_arg_dims based on the encoder's forward
+                # method signature. Currently we assume dim 0 for all tensor inputs.
+                dynamic_arg_dims=None,
+                enable_if=should_torch_compile_mm_encoder,
+                is_encoder=True,
+            )
+
     def forward(
         self,
         input_ids: torch.Tensor | None,
@@ -285,6 +341,10 @@ def forward(
         # Gemma3 and PaliGemma needs `token_type_ids` to work correctly
         # Other models will not have `token_type_ids` in kwargs
         kwargs = {k: v for k, v in kwargs.items() if k == "token_type_ids"}
+        # Positions shape handling for MRoPE models
+        if self.model_config.uses_mrope:
+            # [3, seq_len] -> [3, 1, seq_len]
+            positions = positions[:, None]
         model_output = super().forward(
             input_ids, positions, intermediate_tensors, inputs_embeds, **kwargs
         )
diff --git a/vllm/model_executor/models/transformers/utils.py b/vllm/model_executor/models/transformers/utils.py
index e47f3bba5cfb..04d6de28efd0 100644
--- a/vllm/model_executor/models/transformers/utils.py
+++ b/vllm/model_executor/models/transformers/utils.py
@@ -94,7 +94,15 @@ def wrapper(*args, **kwargs):
             setattr(torch, torch_function_name, old_torch_function)
 
 
-Style = Literal["colwise", "colwise_rep", "rowwise", "rowwise_rep", "replicate"]
+Style = Literal[
+    "colwise",
+    "rowwise",
+    "replicate",
+    "colwise_gather_output",
+    "rowwise_split_input",
+    "colwise_rep",
+    "rowwise_rep",
+]
 
 
 def replace_linear_class(
@@ -120,10 +128,14 @@ def replace_linear_class(
 
     vllm_linear_cls, vllm_linear_kwargs = {
         "colwise": (ColumnParallelLinear, {}),
-        "colwise_rep": (ColumnParallelLinear, {"gather_output": True}),
         "rowwise": (RowParallelLinear, {}),
-        "rowwise_rep": (RowParallelLinear, {"input_is_parallel": False}),
         "replicate": (ReplicatedLinear, {}),
+        # Transformers v5
+        "colwise_gather_output": (ColumnParallelLinear, {"gather_output": True}),
+        "rowwise_split_input": (RowParallelLinear, {"input_is_parallel": False}),
+        # Transformers v4
+        "colwise_rep": (ColumnParallelLinear, {"gather_output": True}),
+        "rowwise_rep": (RowParallelLinear, {"input_is_parallel": False}),
     }.get(style, (ReplicatedLinear, {}))
 
     return vllm_linear_cls(
diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py
index 83241b329da3..986255d86f09 100644
--- a/vllm/model_executor/models/ultravox.py
+++ b/vllm/model_executor/models/ultravox.py
@@ -566,7 +566,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.secondary_weights.append(
                 DefaultModelLoader.Source(
                     model_or_path=config.audio_model_id,
-                    revision=None,
+                    revision=vllm_config.model_config.revision,
                     prefix="audio_tower.",
                 )
             )
@@ -576,7 +576,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.secondary_weights.append(
                 DefaultModelLoader.Source(
                     model_or_path=config.text_model_id,
-                    revision=None,
+                    revision=vllm_config.model_config.revision,
                     prefix="language_model.",
                 )
             )
diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py
index 8abaa557f9c6..095d0e363d5d 100644
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -30,10 +30,8 @@
 from vllm.multimodal import NestedTensors
 from vllm.sequence import IntermediateTensors
 from vllm.utils.math_utils import cdiv
-from vllm.utils.platform_utils import (
-    is_pin_memory_available,
-)
 from vllm.utils.torch_utils import (
+    async_tensor_h2d,
     direct_register_custom_op,
 )
 
@@ -233,8 +231,15 @@ def _add_loadable_non_param_tensors(
     ):
         """
         Add tensor names that are not in the model params that may be in the
-        safetensors, e.g., batch normalization stats.
+        safetensors, e.g., batch normalization stats and registered buffers.
         """
+        # Add persistent registered buffers.
+        # Non-persistent buffers are excluded, matching PyTorch state_dict().
+        non_persistent = getattr(module, "_non_persistent_buffers_set", set())
+        for buf_name, buf in module.named_buffers(recurse=False):
+            if buf_name not in child_params and buf_name not in non_persistent:
+                child_params[buf_name] = buf
+
         if isinstance(
             module,
             (
@@ -468,14 +473,8 @@ def _merge_multimodal_embeddings(
     input_dtype = inputs_embeds.dtype
 
     try:
-        # For debugging
-        # inputs_embeds[is_multimodal] = mm_embeds_flat.to(dtype=input_dtype)
-
-        # NOTE: This can avoid D2H sync (#22105), but fails to
-        # raise an error if is_multimodal.sum() < len(mm_embeds_flat)
-        inputs_embeds.masked_scatter_(
-            is_multimodal.unsqueeze(-1), mm_embeds_flat.to(dtype=input_dtype)
-        )
+        # If is_multimodal is on CPU this avoids a D2H sync
+        inputs_embeds[is_multimodal] = mm_embeds_flat.to(dtype=input_dtype)
     except RuntimeError as e:
         num_actual_tokens = len(mm_embeds_flat)
         num_expected_tokens = is_multimodal.sum().item()
@@ -488,7 +487,7 @@ def _merge_multimodal_embeddings(
                 f"multimodal tokens to {num_expected_tokens} placeholders"
             ) from e
 
-        raise ValueError("Error during masked scatter operation") from e
+        raise ValueError("Error during index put operation") from e
 
     return inputs_embeds
 
@@ -497,10 +496,9 @@ def isin_list(
     elements: torch.Tensor,
     test_elements_list: list[int],
 ) -> torch.Tensor:
-    test_elements = torch.tensor(
-        test_elements_list,
-        pin_memory=is_pin_memory_available(),
-    ).to(device=elements.device, non_blocking=True)
+    test_elements = async_tensor_h2d(
+        test_elements_list, dtype=torch.int64, device=elements.device
+    )
 
     return torch.isin(elements, test_elements)
 
@@ -771,14 +769,9 @@ def extract_layer_index(layer_name: str, num_attn_module: int = 1) -> int:
         return layer_index
 
 
-def cast_overflow_tensors(
-    tensors: torch.Tensor,
-    offset: float = 1000,
-) -> torch.Tensor:
-    if tensors.isinf().any() or tensors.isnan().any():
-        clamp_value = torch.finfo(tensors.dtype).max - offset
-        tensors = torch.clamp(tensors, min=-clamp_value, max=clamp_value)
-    return tensors
+def cast_overflow_tensors(tensors: torch.Tensor, offset: float = 1000) -> torch.Tensor:
+    clamp_value = torch.finfo(tensors.dtype).max - offset
+    return torch.clamp(tensors, min=-clamp_value, max=clamp_value)
 
 
 def fast_topk(
@@ -883,3 +876,19 @@ def get_layer_index(feature_layer_index: int, num_hidden_layers: int) -> int:
     if feature_layer_index < 0:
         return num_hidden_layers + feature_layer_index + 1
     return feature_layer_index
+
+
+def scatter_output_slices(
+    output: torch.Tensor,
+    indices: list[int],
+    per_item_out_tokens: list[int],
+    dest: dict[int, torch.Tensor] | list[torch.Tensor | None],
+    clone: bool = False,
+) -> None:
+    """Slice a concatenated output tensor and scatter into dest by index."""
+    offset = 0
+    for idx in indices:
+        n_tok = per_item_out_tokens[idx]
+        sliced = output[offset : offset + n_tok]
+        dest[idx] = sliced.clone() if clone else sliced
+        offset += n_tok
diff --git a/vllm/model_executor/models/vision.py b/vllm/model_executor/models/vision.py
index e6a243006759..0582c125c66a 100644
--- a/vllm/model_executor/models/vision.py
+++ b/vllm/model_executor/models/vision.py
@@ -10,7 +10,7 @@
 import torch
 from transformers import PretrainedConfig
 
-from vllm.config import MultiModalConfig, VllmConfig, get_current_vllm_config
+from vllm.config import MultiModalConfig, get_current_vllm_config_or_none
 from vllm.distributed import (
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
@@ -18,6 +18,7 @@
 )
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
+from vllm.utils.math_utils import round_up
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 logger = init_logger(__name__)
@@ -102,45 +103,48 @@ def get_vit_attn_backend(
     """
     Get the attention backend for Vision Transformer.
     """
-    try:
-        vllm_config: VllmConfig = get_current_vllm_config()
-        model_config = vllm_config.model_config
-        multimodal_config: MultiModalConfig | None = (
-            model_config.multimodal_config if model_config is not None else None
-        )
-    except (AssertionError, AttributeError):
-        multimodal_config = None
-
+    mm_cfg = get_multimodal_config()
     attn_backend_override = (
-        multimodal_config.mm_encoder_attn_backend
-        if multimodal_config is not None
-        else None
+        mm_cfg.mm_encoder_attn_backend if mm_cfg is not None else None
     )
-    attn_backend = _get_vit_attn_backend(
+    return _get_vit_attn_backend(
         head_size,
         dtype,
         attn_backend_override=attn_backend_override,
     )
-    return attn_backend
+
+
+def get_multimodal_config() -> MultiModalConfig | None:
+    """Return the current ``MultiModalConfig``, or ``None`` when no engine
+    config context is active (e.g., during unit tests) or when the current
+    ``model_config`` does not carry a ``multimodal_config`` (e.g., minimal
+    stubs used in tests)."""
+    vllm_config = get_current_vllm_config_or_none()
+    if vllm_config is None or vllm_config.model_config is None:
+        return None
+    return getattr(vllm_config.model_config, "multimodal_config", None)
+
+
+def get_fp8_padded_hidden_size(num_heads: int, head_dim: int) -> int | None:
+    """Return the padded hidden size for FP8 ViT encoder attention, or
+    ``None`` when FP8 is not enabled.
+
+    cuDNN FP8 prefill attention requires ``head_dim`` to be a multiple of
+    16. For non-aligned ``head_dim`` (e.g. 72), Q/K/V are padded to the
+    nearest multiple of 16.
+    """
+    mm_cfg = get_multimodal_config()
+    if mm_cfg is None or mm_cfg.mm_encoder_attn_dtype != "fp8":
+        return None
+    return num_heads * round_up(head_dim, 16)
 
 
 def is_vit_use_data_parallel():
     """
     Get the tensor parallel type for Vision Transformer.
     """
-    try:
-        vllm_config: VllmConfig = get_current_vllm_config()
-        model_config = vllm_config.model_config
-        multimodal_config: MultiModalConfig | None = (
-            model_config.multimodal_config if model_config is not None else None
-        )
-    except (AssertionError, AttributeError):
-        multimodal_config = None
-
-    mm_encoder_tp_mode = (
-        multimodal_config.mm_encoder_tp_mode if multimodal_config is not None else None
-    )
-    return mm_encoder_tp_mode == "data"
+    mm_cfg = get_multimodal_config()
+    return mm_cfg is not None and mm_cfg.mm_encoder_tp_mode == "data"
 
 
 VisionFeatureSelectStrategyStr = Literal["class", "default", "full"]
diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py
index d44960ca8117..baf6d5c7394f 100644
--- a/vllm/model_executor/models/voxtral.py
+++ b/vllm/model_executor/models/voxtral.py
@@ -4,7 +4,7 @@
 import math
 from collections.abc import Iterable, Mapping, Sequence
 from functools import partial
-from typing import Literal, cast
+from typing import cast
 
 import numpy as np
 import regex as re
@@ -19,6 +19,7 @@
 
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.inputs import MultiModalDataDict, PromptType, TokensPrompt
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -348,6 +349,10 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
                 dim=config.text_config.hidden_size,
             )
 
+        self.make_empty_intermediate_tensors = (
+            self.language_model.make_empty_intermediate_tensors
+        )
+
     def get_mm_mapping(self) -> MultiModelKeys:
         """Get module prefix for multimodal models to filter LoRA modules."""
         return MultiModelKeys.from_string_field(
@@ -446,14 +451,13 @@ def get_speech_to_text_config(
     # for speech-to-text transcription
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+
         tokenizer = cached_tokenizer_from_config(model_config)
         audio = Audio(audio, int(stt_config.sample_rate), format="wav")  # lossless
         req = TranscriptionRequest(
diff --git a/vllm/model_executor/models/voxtral_realtime.py b/vllm/model_executor/models/voxtral_realtime.py
index b70714a0d83a..2628e1443e2d 100644
--- a/vllm/model_executor/models/voxtral_realtime.py
+++ b/vllm/model_executor/models/voxtral_realtime.py
@@ -4,7 +4,6 @@
 import asyncio
 import math
 from collections.abc import AsyncGenerator, Iterable, Iterator, Mapping
-from typing import Literal
 
 import numpy as np
 import torch
@@ -18,6 +17,7 @@
 
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.engine.protocol import StreamingInput
 from vllm.envs import VLLM_ENGINE_ITERATION_TIMEOUT_S
 from vllm.inputs import PromptType, TokensPrompt
@@ -465,14 +465,13 @@ def get_speech_to_text_config(
     # for speech-to-text transcription
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
+        audio = stt_params.audio
+        model_config = stt_params.model_config
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+
         tokenizer = cached_tokenizer_from_config(model_config)
         audio = Audio(audio, int(stt_config.sample_rate), format="wav")  # lossless
 
diff --git a/vllm/model_executor/models/voyage.py b/vllm/model_executor/models/voyage.py
index bfc06a10e841..92d6d7633f02 100644
--- a/vllm/model_executor/models/voyage.py
+++ b/vllm/model_executor/models/voyage.py
@@ -2,23 +2,20 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from __future__ import annotations
 
-from collections import defaultdict
 from collections.abc import Iterable
 
-import regex as re
 import torch
 import torch.nn as nn
 
-from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.config import VllmConfig
 from vllm.model_executor.models.qwen3 import Qwen3Model
-from vllm.model_executor.models.utils import WeightsMapper
+from vllm.model_executor.models.utils import AutoWeightsLoader, maybe_prefix
+from vllm.sequence import IntermediateTensors
 
 WeightItem = tuple[str, torch.Tensor]
 
-_LAYER_RE = re.compile(r"^layers\.(\d+)\.(.+)$")
 
-
-class VoyageQwen3BidirectionalEmbedModel(Qwen3Model):
+class VoyageQwen3BidirectionalEmbedModel(nn.Module):
     """
     Qwen3Model + Voyage embedding head + bidirectional attention.
 
@@ -32,16 +29,14 @@ class VoyageQwen3BidirectionalEmbedModel(Qwen3Model):
       - mlp.gate_up_proj (fused)
       - self_attn.qkv_proj (fused)
       - No "model." prefix
-
-    We remap/fuse weights using generator pipeline and load directly
-    (bypassing parent's stacked_params_mapping which would cause
-    double-transformation like qkv_proj -> qkqkv_proj).
     """
 
-    hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""})
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        self.config = vllm_config.model_config.hf_config
+        self.model = Qwen3Model(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
 
         # Embedding head (hidden_size -> num_labels, bias=False)
         self.linear = nn.Linear(
@@ -50,81 +45,23 @@ def __init__(self, *args, **kwargs):
             bias=False,
         )
 
-    def forward(self, *args, **kwargs):
-        out = super().forward(*args, **kwargs)
-        return self.linear(out)
-
-    def _fuse_qkv_proj(self, weights: Iterable[WeightItem]) -> Iterable[WeightItem]:
-        """Fuse q_proj, k_proj, v_proj into qkv_proj."""
-        qkv_buf: dict[int, dict[str, torch.Tensor]] = defaultdict(dict)
-        qkv_suffixes = {
-            "self_attn.q_proj.weight": "q",
-            "self_attn.k_proj.weight": "k",
-            "self_attn.v_proj.weight": "v",
-        }
-
-        for name, tensor in weights:
-            m = _LAYER_RE.match(name)
-            if m and m.group(2) in qkv_suffixes:
-                layer_idx = int(m.group(1))
-                qkv_buf[layer_idx][qkv_suffixes[m.group(2)]] = tensor
-            else:
-                yield name, tensor
-
-        # Yield fused QKV weights
-        for layer_idx in sorted(qkv_buf.keys()):
-            parts = qkv_buf[layer_idx]
-            if all(p in parts for p in ("q", "k", "v")):
-                fused = torch.cat([parts["q"], parts["k"], parts["v"]], dim=0)
-                yield f"layers.{layer_idx}.self_attn.qkv_proj.weight", fused
-            elif parts:
-                missing = [p for p in ("q", "k", "v") if p not in parts]
-                raise ValueError(f"Layer {layer_idx} missing QKV parts: {missing}")
-
-    def _fuse_gate_up_proj(self, weights: Iterable[WeightItem]) -> Iterable[WeightItem]:
-        """Fuse gate_proj and up_proj into gate_up_proj."""
-        mlp_buf: dict[int, dict[str, torch.Tensor]] = defaultdict(dict)
-        mlp_suffixes = {
-            "mlp.gate_proj.weight": "gate",
-            "mlp.up_proj.weight": "up",
-        }
-
-        for name, tensor in weights:
-            m = _LAYER_RE.match(name)
-            if m and m.group(2) in mlp_suffixes:
-                layer_idx = int(m.group(1))
-                mlp_buf[layer_idx][mlp_suffixes[m.group(2)]] = tensor
-            else:
-                yield name, tensor
+        self.make_empty_intermediate_tensors = (
+            self.model.make_empty_intermediate_tensors
+        )
 
-        # Yield fused gate_up weights
-        for layer_idx in sorted(mlp_buf.keys()):
-            parts = mlp_buf[layer_idx]
-            if all(p in parts for p in ("gate", "up")):
-                fused = torch.cat([parts["gate"], parts["up"]], dim=0)
-                yield f"layers.{layer_idx}.mlp.gate_up_proj.weight", fused
-            elif parts:
-                missing = [p for p in ("gate", "up") if p not in parts]
-                raise ValueError(f"Layer {layer_idx} missing MLP parts: {missing}")
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        out = self.model(input_ids, positions, intermediate_tensors, inputs_embeds)
+        return self.linear(out)
 
     def load_weights(self, weights: Iterable[WeightItem]) -> set[str]:
-        """Remap, fuse, and load weights using generator pipeline."""
-        # Chain weight transformations
-        weights = self.hf_to_vllm_mapper.apply(weights)
-        weights = self._fuse_qkv_proj(weights)
-        weights = self._fuse_gate_up_proj(weights)
-
-        # Load weights directly into model parameters
-        # (bypass parent's stacked_params_mapping)
-        params_dict = dict(self.named_parameters())
-        loaded_params: set[str] = set()
-
-        for name, loaded_weight in weights:
-            if name not in params_dict:
-                continue
-            param = params_dict[name]
-            weight_loader = getattr(param, "weight_loader", default_weight_loader)
-            weight_loader(param, loaded_weight)
-            loaded_params.add(name)
-
-        return loaded_params
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights)
diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py
index f0f6f619b022..628186e7598b 100644
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -5,7 +5,7 @@
 import math
 from collections.abc import Iterable, Mapping, Sequence
 from contextlib import nullcontext
-from typing import Annotated, Literal
+from typing import Annotated
 
 import numpy as np
 import torch
@@ -20,6 +20,7 @@
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, ModelConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
+from vllm.config.speech_to_text import SpeechToTextParams
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.inputs import (
     ExplicitEncoderDecoderPrompt,
@@ -830,14 +831,14 @@ def validate_language(cls, language: str | None) -> str | None:
     @classmethod
     def get_generation_prompt(
         cls,
-        audio: np.ndarray,
-        model_config: ModelConfig,  # not needed here
-        stt_config: SpeechToTextConfig,
-        language: str | None,
-        task_type: Literal["transcribe", "translate"],
-        request_prompt: str,
-        to_language: str | None,
+        stt_params: SpeechToTextParams,
     ) -> PromptType:
+        audio = stt_params.audio
+        stt_config = stt_params.stt_config
+        language = stt_params.language
+        task_type = stt_params.task_type
+        request_prompt = stt_params.request_prompt
+
         if language is None:
             raise ValueError(
                 "Language must be specified when creating the Whisper prompt"
diff --git a/vllm/model_executor/models/whisper_causal.py b/vllm/model_executor/models/whisper_causal.py
index 8e4322ea335d..dfbf69418a6c 100644
--- a/vllm/model_executor/models/whisper_causal.py
+++ b/vllm/model_executor/models/whisper_causal.py
@@ -184,7 +184,7 @@ def forward(
             value: torch.Tensor,
             kv_cache: torch.Tensor,
             attn_metadata: AttentionMetadata,
-            output: torch.Tensor | None = None,
+            output: torch.Tensor,
             output_scale: torch.Tensor | None = None,
             output_block_scale: torch.Tensor | None = None,
         ) -> torch.Tensor:
diff --git a/vllm/model_executor/offloader/__init__.py b/vllm/model_executor/offloader/__init__.py
index a6522ff7c0a3..f1b49c69ef93 100644
--- a/vllm/model_executor/offloader/__init__.py
+++ b/vllm/model_executor/offloader/__init__.py
@@ -8,6 +8,7 @@
     create_offloader,
     get_offloader,
     set_offloader,
+    should_pin_memory,
 )
 from vllm.model_executor.offloader.prefetch import PrefetchOffloader
 from vllm.model_executor.offloader.uva import UVAOffloader
@@ -20,4 +21,5 @@
     "create_offloader",
     "get_offloader",
     "set_offloader",
+    "should_pin_memory",
 ]
diff --git a/vllm/model_executor/offloader/base.py b/vllm/model_executor/offloader/base.py
index 7cb0ddfd1848..ceff60cd4cde 100644
--- a/vllm/model_executor/offloader/base.py
+++ b/vllm/model_executor/offloader/base.py
@@ -10,7 +10,9 @@
 
 import torch.nn as nn
 
+import vllm.envs as envs
 from vllm.logger import init_logger
+from vllm.utils.platform_utils import is_pin_memory_available
 
 if TYPE_CHECKING:
     from vllm.config import OffloadConfig
@@ -18,6 +20,18 @@
 logger = init_logger(__name__)
 
 
+def should_pin_memory() -> bool:
+    """Check if pinned memory should be used for weight offloading.
+
+    Combines the platform capability check with the user override env var.
+    On unified-memory systems (e.g. GH200) pinned memory eats into GPU
+    memory, so users can disable it via VLLM_WEIGHT_OFFLOADING_DISABLE_PIN_MEMORY.
+    """
+    return (
+        is_pin_memory_available() and not envs.VLLM_WEIGHT_OFFLOADING_DISABLE_PIN_MEMORY
+    )
+
+
 """
 class relation:
 
@@ -104,11 +118,9 @@ def set_offloader(instance: BaseOffloader) -> None:
     global _instance
     _instance = instance
     if isinstance(instance, NoopOffloader):
-        logger.debug_once(
-            "Offloader set to NoopOffloader (no offloading).", scope="local"
-        )
+        logger.debug_once("Offloader set to NoopOffloader (no offloading).")
     else:
-        logger.info_once("Offloader set to %s", type(instance).__name__, scope="local")
+        logger.info_once("Offloader set to %s", type(instance).__name__)
 
 
 def create_offloader(offload_config: "OffloadConfig") -> BaseOffloader:
diff --git a/vllm/model_executor/offloader/prefetch.py b/vllm/model_executor/offloader/prefetch.py
index 5bdde8c3a18a..466d8c13ce76 100644
--- a/vllm/model_executor/offloader/prefetch.py
+++ b/vllm/model_executor/offloader/prefetch.py
@@ -20,8 +20,8 @@
 # Import prefetch_ops to register custom ops at module load time
 import vllm.model_executor.offloader.prefetch_ops  # noqa: F401
 from vllm.logger import init_logger
-from vllm.model_executor.offloader.base import BaseOffloader
-from vllm.utils.platform_utils import is_pin_memory_available
+from vllm.model_executor.offloader.base import BaseOffloader, should_pin_memory
+from vllm.utils.torch_utils import get_dtype_size
 
 logger = init_logger(__name__)
 
@@ -54,7 +54,7 @@ def num_bytes(self) -> int:
         numel = 1
         for dim in self.shape:
             numel *= dim
-        return numel * torch.finfo(self.dtype).bits // 8
+        return numel * get_dtype_size(self.dtype)
 
 
 class StaticBufferPool:
@@ -528,7 +528,7 @@ def start_onload_to_static(self):
                 gpu_buffer = offloader._gpu_buffer
                 assert cpu_storage is not None, "CPU storage not initialized"
                 assert gpu_buffer is not None, "GPU buffer not assigned"
-                assert not is_pin_memory_available() or cpu_storage.is_pinned(), (
+                assert not should_pin_memory() or cpu_storage.is_pinned(), (
                     f"CPU storage for {name} is not pinned! "
                     "non_blocking=True H2D copy from non-pinned memory "
                     "causes stream synchronization that breaks "
@@ -629,7 +629,7 @@ def _offload_to_cpu_internal(self):
         original GPU tensor is garbage collected.
         """
         param = self._param
-        pin_memory = is_pin_memory_available()
+        pin_memory = should_pin_memory()
 
         # Create pinned CPU storage and copy current GPU data
         self._cpu_storage = torch.empty_strided(
@@ -666,7 +666,7 @@ def _update_cpu_storage_from_param(self) -> None:
         param = self._param
 
         if param.data.device.type == "cpu":
-            if is_pin_memory_available() and not param.data.is_pinned():
+            if should_pin_memory() and not param.data.is_pinned():
                 pinned = torch.empty_strided(
                     size=param.data.size(),
                     stride=param.data.stride(),
diff --git a/vllm/model_executor/offloader/uva.py b/vllm/model_executor/offloader/uva.py
index c524e43cddae..51eb1a14fcb0 100644
--- a/vllm/model_executor/offloader/uva.py
+++ b/vllm/model_executor/offloader/uva.py
@@ -10,9 +10,9 @@
 
 import vllm.envs as envs
 from vllm.logger import init_logger
-from vllm.model_executor.offloader.base import BaseOffloader
+from vllm.model_executor.offloader.base import BaseOffloader, should_pin_memory
 from vllm.utils.mem_utils import format_gib
-from vllm.utils.platform_utils import is_pin_memory_available, is_uva_available
+from vllm.utils.platform_utils import is_uva_available
 from vllm.utils.torch_utils import get_accelerator_view_from_cpu_tensor
 
 logger = init_logger(__name__)
@@ -43,10 +43,7 @@ def __init__(
         self.cpu_offload_bytes = 0
         self.cpu_offload_params = cpu_offload_params or set()
 
-        self.pin_memory = (
-            is_pin_memory_available()
-            and not envs.VLLM_WEIGHT_OFFLOADING_DISABLE_PIN_MEMORY
-        )
+        self.pin_memory = should_pin_memory()
         self.uva_offloading = (
             is_uva_available() and not envs.VLLM_WEIGHT_OFFLOADING_DISABLE_UVA
         )
diff --git a/vllm/model_executor/parameter.py b/vllm/model_executor/parameter.py
index 410e277493b0..4106672d5011 100644
--- a/vllm/model_executor/parameter.py
+++ b/vllm/model_executor/parameter.py
@@ -605,8 +605,8 @@ def _adjust_shard_indexes_for_marlin(shard_size, shard_offset, marlin_tile_size)
 def _adjust_shard_indexes_for_packing(
     shard_size, shard_offset, packed_factor, marlin_tile_size
 ):
-    shard_size = shard_size // packed_factor
-    shard_offset = shard_offset // packed_factor
+    shard_size = round(shard_size // packed_factor)
+    shard_offset = round(shard_offset // packed_factor)
     if marlin_tile_size is not None:
         return _adjust_shard_indexes_for_marlin(
             shard_size=shard_size,
diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py
index 0b844d1493d9..a0269be855a9 100644
--- a/vllm/model_executor/utils.py
+++ b/vllm/model_executor/utils.py
@@ -45,7 +45,10 @@ def set_weight_attrs(
 
 
 def replace_parameter(
-    layer: torch.nn.Module, param_name: str, new_data: torch.Tensor | None
+    layer: torch.nn.Module,
+    param_name: str,
+    new_data: torch.Tensor | None,
+    prefer_copy: bool = False,
 ):
     """
     Replace a parameter of a layer while maintaining the ability to reload the weight.
@@ -57,6 +60,12 @@ def replace_parameter(
         layer: Layer containing parameter to replace
         param_name: Name of parameter to replace
         new_data: New data of the new parameter, or None to set the parameter to None
+        prefer_copy: If True and the existing parameter is compatible with
+            ``new_data`` (same shape, dtype, and device), copy ``new_data``
+            into the existing parameter in place rather than re-registering
+            a new parameter. This preserves the parameter's storage address
+            (``data_ptr``), which is required for captured CUDA graphs to
+            remain valid across weight updates (e.g. in RL training loops).
     """
     # should not be used on a tied/shared param
 
@@ -67,9 +76,21 @@ def replace_parameter(
 
     if isinstance(new_data, torch.nn.Parameter):
         new_data = new_data.data
-    new_param = torch.nn.Parameter(new_data, requires_grad=False)
 
     old_param: torch.nn.Parameter | None = getattr(layer, param_name, None)
+
+    if (
+        prefer_copy
+        and old_param is not None
+        and old_param.shape == new_data.shape
+        and old_param.dtype == new_data.dtype
+        and old_param.device == new_data.device
+    ):
+        old_param.copy_(new_data)
+        return
+
+    new_param = torch.nn.Parameter(new_data, requires_grad=False)
+
     if old_param is not None and hasattr(old_param, "weight_loader"):
         weight_loader = old_param.weight_loader
         set_weight_attrs(new_param, {"weight_loader": weight_loader})
diff --git a/vllm/model_executor/warmup/deep_gemm_warmup.py b/vllm/model_executor/warmup/deep_gemm_warmup.py
index 1cafccd49670..9b11d1df859c 100644
--- a/vllm/model_executor/warmup/deep_gemm_warmup.py
+++ b/vllm/model_executor/warmup/deep_gemm_warmup.py
@@ -11,15 +11,15 @@
 
 import vllm.envs as envs
 from vllm.distributed.parallel_state import get_dp_group, is_global_first_rank
-from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
 from vllm.model_executor.layers.fused_moe.deep_gemm_utils import compute_aligned_M
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE, FusedMoEModularMethod
-from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
+from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import DeepGemmExperts
+from vllm.model_executor.layers.fused_moe.experts.triton_deep_gemm_moe import (
     TritonOrDeepGemmExperts,
 )
+from vllm.model_executor.layers.fused_moe.layer import FusedMoE
 from vllm.model_executor.layers.linear import LinearBase
 from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod
-from vllm.model_executor.layers.quantization.mxfp8 import Mxfp8OnlineLinearMethod
+from vllm.model_executor.layers.quantization.online.mxfp8 import Mxfp8OnlineLinearMethod
 from vllm.tracing import instrument
 from vllm.utils.deep_gemm import (
     fp8_gemm_nt,
@@ -168,14 +168,12 @@ def _fused_moe_grouped_gemm_may_use_deep_gemm(module: torch.nn.Module) -> bool:
     ):
         return False
 
-    if not isinstance(module.quant_method, FusedMoEModularMethod):
-        # modular kernels could invoke deep_gemm_moe_fp8
-        return True
+    moe_kernel = getattr(module.quant_method, "moe_kernel", None)
+    if moe_kernel is None:
+        return False
 
-    # Further check if the ModularKernel implementation uses the DeepGemmExperts
-    return isinstance(
-        module.quant_method.moe_kernel, (DeepGemmExperts, TritonOrDeepGemmExperts)
-    )
+    fused_experts = moe_kernel.impl.fused_experts
+    return isinstance(fused_experts, (DeepGemmExperts, TritonOrDeepGemmExperts))
 
 
 FP8_GEMM_NT_WARMUP_CACHE: set[torch.Size] = set()
diff --git a/vllm/model_executor/warmup/kernel_warmup.py b/vllm/model_executor/warmup/kernel_warmup.py
index 70abd8a6c503..c3725064a6d9 100644
--- a/vllm/model_executor/warmup/kernel_warmup.py
+++ b/vllm/model_executor/warmup/kernel_warmup.py
@@ -6,11 +6,14 @@
 happen during model execution.
 """
 
+import hashlib
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 import torch
 
 import vllm.envs as envs
+from vllm.compilation.caching import aot_compile_hash_factors
 from vllm.logger import init_logger
 from vllm.model_executor.warmup.deep_gemm_warmup import deep_gemm_warmup
 from vllm.platforms import current_platform
@@ -24,6 +27,31 @@
 logger = init_logger(__name__)
 
 
+def _flashinfer_autotune_cache_hash(runner: "GPUModelRunner") -> str:
+    factors = aot_compile_hash_factors(runner.vllm_config)
+    return hashlib.sha256(str(factors).encode()).hexdigest()
+
+
+def _resolve_flashinfer_autotune_file(runner: "GPUModelRunner") -> Path:
+    override_dir = envs.VLLM_FLASHINFER_AUTOTUNE_CACHE_DIR
+    if override_dir:
+        root = Path(override_dir).expanduser()
+    else:
+        from flashinfer.jit import env as flashinfer_jit_env
+
+        flashinfer_workspace = flashinfer_jit_env.FLASHINFER_WORKSPACE_DIR
+        root = (
+            Path(envs.VLLM_CACHE_ROOT)
+            / "flashinfer_autotune_cache"
+            / flashinfer_workspace.parent.name
+            / flashinfer_workspace.name
+        )
+
+    output_dir = root / _flashinfer_autotune_cache_hash(runner)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    return output_dir / "autotune_configs.json"
+
+
 def kernel_warmup(worker: "Worker"):
     # Deep GEMM warmup
     do_deep_gemm_warmup = (
@@ -78,6 +106,12 @@ def _is_flashinfer_backend(backend):
         )
 
 
+# TODO: remove once FlashInfer upstream fixes the persistent file cache
+# to resolve collisions like `use_8x4_sf_layout=True/False`, which causes
+# invalid tactics to be chosen
+_FLASHINFER_USE_PERSISTENT_CACHE = False
+
+
 def flashinfer_autotune(runner: "GPUModelRunner") -> None:
     """
     Autotune FlashInfer operations.
@@ -87,23 +121,71 @@ def flashinfer_autotune(runner: "GPUModelRunner") -> None:
     future calls to FlashInfer will use the best implementation.
     Without autotuning, FlashInfer will rely on heuristics, which may
     be significantly slower.
+
+    Tuning is performed only on rank 0. The resulting cache is broadcast
+    to every rank so all ranks dispatch the same kernel tactic.
     """
     import vllm.utils.flashinfer as fi_utils
+    from vllm.distributed.parallel_state import get_world_group
 
-    with torch.inference_mode(), fi_utils.autotune():
-        # Certain FlashInfer kernels (e.g. nvfp4 routed moe) are
-        # incompatible with autotuning. This state is used to skip
-        # those kernels during the autotuning process.
-        fi_utils._is_fi_autotuning = True
-
-        # We skip EPLB here since we don't want to record dummy metrics
-        # When autotuning with number of tokens m, flashinfer will autotune
-        # operations for all number of tokens up to m.
-        # So we only need to run with the max number of tokens.
-        runner._dummy_run(
-            runner.scheduler_config.max_num_batched_tokens,
-            skip_eplb=True,
-            is_profile=True,
+    if not _FLASHINFER_USE_PERSISTENT_CACHE:
+        with torch.inference_mode(), fi_utils.autotune():
+            runner._dummy_run(
+                num_tokens=runner.scheduler_config.max_num_batched_tokens,
+                skip_eplb=True,
+                is_profile=True,
+            )
+        get_world_group().barrier()
+        return
+
+    world = get_world_group()
+    is_leader = world.rank_in_group == 0
+
+    cache_path = _resolve_flashinfer_autotune_file(runner)
+    if is_leader:
+        logger.info("Using FlashInfer autotune cache file: %s", cache_path)
+
+    # We skip EPLB here since we don't want to record dummy metrics.
+    # When autotuning with number of tokens m, flashinfer will autotune
+    # operations for all number of tokens up to m, so we only need to
+    # run with the max number of tokens.
+    dummy_run_kwargs = dict(
+        num_tokens=runner.scheduler_config.max_num_batched_tokens,
+        skip_eplb=True,
+        is_profile=True,
+    )
+
+    with torch.inference_mode():
+        if is_leader:
+            with fi_utils.autotune(tune_mode=True, cache=str(cache_path)):
+                runner._dummy_run(**dummy_run_kwargs)
+        else:
+            runner._dummy_run(**dummy_run_kwargs)
+
+    # Broadcast autotune cache from rank 0 to all other ranks so every
+    # rank loads the same set of chosen tactics.
+    tune_results: bytes | None = None
+    if is_leader and cache_path.exists():
+        with open(cache_path, "rb") as f:
+            tune_results = f.read()
+
+    tune_results = world.broadcast_object(tune_results, src=0)
+
+    if tune_results is None:
+        logger.warning(
+            "No FlashInfer autotune cache entries found."
+            "Falling back to default tactics."
         )
+    else:
+        if not is_leader and world.local_rank == 0:
+            with open(cache_path, "wb") as f:
+                f.write(tune_results)
+        world.barrier()
+        from flashinfer.autotuner import AutoTuner
 
-        fi_utils._is_fi_autotuning = False
+        AutoTuner.get().load_configs(str(cache_path))
+        logger.info(
+            "FlashInfer autotune cache loaded on rank %d from %s.",
+            world.rank_in_group,
+            cache_path,
+        )
diff --git a/vllm/models/__init__.py b/vllm/models/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/models/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/models/deepseek_v4/__init__.py b/vllm/models/deepseek_v4/__init__.py
new file mode 100644
index 000000000000..abaa794f98c0
--- /dev/null
+++ b/vllm/models/deepseek_v4/__init__.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""DeepSeek V4 model — hardware-isolated entry point.
+
+The actual implementation lives under ``nvidia/`` and ``amd/``; this module
+picks the right one for the current platform and re-exports the public
+classes used by the model registry and quantization config lookup.
+"""
+
+from typing import TYPE_CHECKING
+
+from vllm.platforms import current_platform
+
+from .quant_config import DeepseekV4FP8Config
+
+# Pick the per-platform implementation. The NVIDIA branch is the static
+# default that mypy sees; the ROCm branch overrides it at runtime and is
+# kept type-compatible via ``# type: ignore[assignment]``.
+if TYPE_CHECKING or not current_platform.is_rocm():
+    from .nvidia.model import DeepseekV4ForCausalLM
+    from .nvidia.mtp import DeepSeekV4MTP
+else:
+    from .amd.model import DeepseekV4ForCausalLM  # type: ignore[assignment]
+    from .amd.mtp import DeepSeekV4MTP  # type: ignore[assignment]
+
+__all__ = [
+    "DeepSeekV4MTP",
+    "DeepseekV4FP8Config",
+    "DeepseekV4ForCausalLM",
+]
diff --git a/vllm/models/deepseek_v4/amd/__init__.py b/vllm/models/deepseek_v4/amd/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/models/deepseek_v4/amd/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/models/deepseek_v4/amd/model.py b/vllm/models/deepseek_v4/amd/model.py
new file mode 100644
index 000000000000..d69bad8d38de
--- /dev/null
+++ b/vllm/models/deepseek_v4/amd/model.py
@@ -0,0 +1,1612 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import typing
+from collections.abc import Callable, Iterable
+from itertools import islice
+
+import regex as re
+import torch
+import torch.nn as nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_ep_group,
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.forward_context import get_forward_context
+from vllm.model_executor.layers.activation import SiluAndMul, SiluAndMulWithClamp
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe.router.fused_topk_bias_router import (
+    fused_topk_bias,
+)
+from vllm.model_executor.layers.fused_moe.router.norm_gate_linear import (
+    NormGateLinear,
+)
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    MergedColumnParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.mhc import (
+    HCHeadOp,
+    MHCFusedPostPreOp,
+    MHCPostOp,
+    MHCPreOp,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.interfaces import SupportsPP
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    WeightsMapper,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_layers,
+    maybe_prefix,
+)
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.models.deepseek_v4.nvidia.ops.attention import (
+    DeepseekV4Indexer,
+    DeepseekV4MLAModules,
+    DeepseekV4MultiHeadLatentAttentionWrapper,
+)
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+class DeepseekV4MLP(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        swiglu_limit: float | None = None,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = True,
+        is_sequence_parallel: bool = False,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        # If is_sequence_parallel, the input and output tensors are sharded
+        # across the ranks within the tp_group. In this case the weights are
+        # replicated and no collective ops are needed.
+        # Otherwise we use standard TP with an allreduce at the end.
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size,
+            [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            disable_tp=is_sequence_parallel,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            disable_tp=is_sequence_parallel,
+            prefix=f"{prefix}.down_proj",
+        )
+        if hidden_act != "silu":
+            raise ValueError(
+                f"Unsupported activation: {hidden_act}. Only silu is supported for now."
+            )
+        if swiglu_limit is not None:
+            self.act_fn = SiluAndMulWithClamp(swiglu_limit)
+        else:
+            self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+@triton.jit
+def _deepseek_v4_stage_mega_moe_inputs_kernel(
+    hidden_states,
+    x_fp8,
+    x_sf,
+    topk_ids,
+    topk_weights,
+    topk_idx_out,
+    topk_weights_out,
+    hidden_stride_m: tl.constexpr,
+    hidden_stride_k: tl.constexpr,
+    x_stride_m: tl.constexpr,
+    x_stride_k: tl.constexpr,
+    x_sf_stride_m: tl.constexpr,
+    x_sf_stride_k: tl.constexpr,
+    topk_ids_stride_m: tl.constexpr,
+    topk_ids_stride_k: tl.constexpr,
+    topk_weights_stride_m: tl.constexpr,
+    topk_weights_stride_k: tl.constexpr,
+    topk_idx_stride_m: tl.constexpr,
+    topk_idx_stride_k: tl.constexpr,
+    topk_weights_out_stride_m: tl.constexpr,
+    topk_weights_out_stride_k: tl.constexpr,
+    hidden_size: tl.constexpr,
+    top_k: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+    GROUP_K: tl.constexpr,
+    BLOCK_TOPK: tl.constexpr,
+) -> None:
+    token_id = tl.program_id(0)
+    k_block_id = tl.program_id(1)
+
+    k_offsets = k_block_id * BLOCK_K + tl.arange(0, BLOCK_K)
+    k_mask = k_offsets < hidden_size
+    hidden = tl.load(
+        hidden_states + token_id * hidden_stride_m + k_offsets * hidden_stride_k,
+        mask=k_mask,
+        other=0.0,
+    ).to(tl.float32)
+
+    num_groups: tl.constexpr = BLOCK_K // GROUP_K
+    hidden_groups = tl.reshape(tl.abs(hidden), [num_groups, GROUP_K])
+    amax = tl.max(hidden_groups, axis=1)
+    amax = tl.maximum(amax, 1.0e-4)
+
+    scale = amax / 448.0
+    scale_bits = scale.to(tl.uint32, bitcast=True)
+    scale_exp = ((scale_bits >> 23) & 0xFF) + ((scale_bits & 0x7FFFFF) != 0).to(
+        tl.uint32
+    )
+    scale_exp = tl.minimum(tl.maximum(scale_exp, 1), 254)
+    rounded_scale = (scale_exp << 23).to(tl.float32, bitcast=True)
+
+    hidden_groups = tl.reshape(hidden, [num_groups, GROUP_K])
+    scaled = hidden_groups * (1.0 / rounded_scale)[:, None]
+    scaled = tl.reshape(scaled, [BLOCK_K])
+    fp8 = scaled.to(tl.float8e4nv)
+    tl.store(
+        x_fp8 + token_id * x_stride_m + k_offsets * x_stride_k,
+        fp8,
+        mask=k_mask,
+    )
+
+    scale_offsets = tl.arange(0, num_groups)
+    packed_scale = tl.sum(scale_exp << (scale_offsets * 8), axis=0).to(tl.int32)
+    tl.store(
+        x_sf + token_id * x_sf_stride_m + k_block_id * x_sf_stride_k,
+        packed_scale,
+    )
+
+    if k_block_id == 0:
+        topk_offsets = tl.arange(0, BLOCK_TOPK)
+        topk_mask = topk_offsets < top_k
+
+        ids = tl.load(
+            topk_ids + token_id * topk_ids_stride_m + topk_offsets * topk_ids_stride_k,
+            mask=topk_mask,
+            other=0,
+        ).to(tl.int64)
+        tl.store(
+            topk_idx_out
+            + token_id * topk_idx_stride_m
+            + topk_offsets * topk_idx_stride_k,
+            ids,
+            mask=topk_mask,
+        )
+
+        weights = tl.load(
+            topk_weights
+            + token_id * topk_weights_stride_m
+            + topk_offsets * topk_weights_stride_k,
+            mask=topk_mask,
+            other=0.0,
+        )
+        tl.store(
+            topk_weights_out
+            + token_id * topk_weights_out_stride_m
+            + topk_offsets * topk_weights_out_stride_k,
+            weights,
+            mask=topk_mask,
+        )
+
+
+def _stage_deepseek_v4_mega_moe_inputs(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    x_fp8: torch.Tensor,
+    x_sf: torch.Tensor,
+    topk_idx_out: torch.Tensor,
+    topk_weights_out: torch.Tensor,
+) -> None:
+    num_tokens, hidden_size = hidden_states.shape
+    if num_tokens == 0:
+        return
+    if hidden_size % 128 != 0:
+        raise ValueError(
+            "DeepSeek V4 MegaMoE input staging requires hidden_size to be "
+            "a multiple of 128."
+        )
+    top_k = topk_ids.shape[1]
+    if topk_weights.shape != topk_ids.shape:
+        raise ValueError(
+            "DeepSeek V4 MegaMoE input staging requires topk_weights and "
+            "topk_ids to have the same shape."
+        )
+
+    block_k = 128
+    grid = (num_tokens, triton.cdiv(hidden_size, block_k))
+    block_topk = triton.next_power_of_2(top_k)
+    _deepseek_v4_stage_mega_moe_inputs_kernel[grid](
+        hidden_states,
+        x_fp8,
+        x_sf,
+        topk_ids,
+        topk_weights,
+        topk_idx_out,
+        topk_weights_out,
+        hidden_states.stride(0),
+        hidden_states.stride(1),
+        x_fp8.stride(0),
+        x_fp8.stride(1),
+        x_sf.stride(0),
+        x_sf.stride(1),
+        topk_ids.stride(0),
+        topk_ids.stride(1),
+        topk_weights.stride(0),
+        topk_weights.stride(1),
+        topk_idx_out.stride(0),
+        topk_idx_out.stride(1),
+        topk_weights_out.stride(0),
+        topk_weights_out.stride(1),
+        hidden_size,
+        top_k,
+        BLOCK_K=block_k,
+        GROUP_K=32,
+        BLOCK_TOPK=block_topk,
+        num_warps=4,
+    )
+
+
+def make_deepseek_v4_expert_params_mapping(
+    num_experts: int,
+) -> list[tuple[str, str, int, str]]:
+    return [
+        (
+            "experts.w13_" if shard_id in ("w1", "w3") else "experts.w2_",
+            f"experts.{expert_id}.{weight_name}.",
+            expert_id,
+            shard_id,
+        )
+        for expert_id in range(num_experts)
+        for shard_id, weight_name in [
+            ("w1", "w1"),
+            ("w2", "w2"),
+            ("w3", "w3"),
+        ]
+    ]
+
+
+class DeepseekV4MegaMoEExperts(nn.Module):
+    _symm_buffer_cache: dict[tuple[int, int, int, int, int, int, int], object] = {}
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        *,
+        num_experts: int,
+        num_local_experts: int,
+        experts_start_idx: int,
+        top_k: int,
+        hidden_size: int,
+        intermediate_size: int,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.prefix = prefix
+        self.num_experts = num_experts
+        self.num_local_experts = num_local_experts
+        self.experts_start_idx = experts_start_idx
+        self.experts_end_idx = experts_start_idx + num_local_experts
+        self.top_k = top_k
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+
+        weight_attrs = {"weight_loader": self.weight_loader}
+        self.w13_weight = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                2 * intermediate_size,
+                hidden_size // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w13_weight, weight_attrs)
+
+        self.w13_weight_scale = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                2 * intermediate_size,
+                hidden_size // 32,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w13_weight_scale, weight_attrs)
+        self.w13_weight_scale.quant_method = "block"
+
+        self.w2_weight = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                hidden_size,
+                intermediate_size // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w2_weight, weight_attrs)
+
+        self.w2_weight_scale = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                hidden_size,
+                intermediate_size // 32,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w2_weight_scale, weight_attrs)
+        self.w2_weight_scale.quant_method = "block"
+
+        self._transformed_l1_weights: tuple[torch.Tensor, torch.Tensor] | None = None
+        self._transformed_l2_weights: tuple[torch.Tensor, torch.Tensor] | None = None
+
+        # Register in the static forward context so the custom-op wrapper
+        # can look up this module by name from within a torch.compile graph.
+        compilation_config = vllm_config.compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+    def _map_global_expert_id(self, expert_id: int) -> int:
+        if expert_id < self.experts_start_idx or expert_id >= self.experts_end_idx:
+            return -1
+        return expert_id - self.experts_start_idx
+
+    def weight_loader(
+        self,
+        param: nn.Parameter,
+        loaded_weight: torch.Tensor,
+        weight_name: str,
+        shard_id: str,
+        expert_id: int,
+        return_success: bool = False,
+    ) -> bool | None:
+        local_expert_id = self._map_global_expert_id(expert_id)
+        if local_expert_id == -1:
+            return False if return_success else None
+
+        expert_data = param.data[local_expert_id]
+        if shard_id in ("w1", "w3"):
+            if "w13_" not in weight_name:
+                return False if return_success else None
+            shard_offset = 0 if shard_id == "w1" else self.intermediate_size
+            expert_data = expert_data.narrow(0, shard_offset, self.intermediate_size)
+        elif shard_id == "w2":
+            if "w2_" not in weight_name:
+                return False if return_success else None
+        else:
+            raise ValueError(f"Unsupported expert shard id: {shard_id}")
+
+        if expert_data.shape != loaded_weight.shape:
+            raise ValueError(
+                f"DeepSeek V4 MegaMoE expert weight shape mismatch for "
+                f"{weight_name}: parameter shard {tuple(expert_data.shape)} "
+                f"vs checkpoint {tuple(loaded_weight.shape)}"
+            )
+        expert_data.copy_(loaded_weight)
+        return True if return_success else None
+
+    @staticmethod
+    def _ue8m0_uint8_to_float(sf: torch.Tensor) -> torch.Tensor:
+        return (sf.to(torch.int32) << 23).view(torch.float32)
+
+    def _check_runtime_supported(self) -> None:
+        if not torch.cuda.is_available():
+            raise NotImplementedError("DeepSeek V4 MegaMoE requires CUDA.")
+        device = self.w13_weight.device
+        if device.type != "cuda":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE expert weights must be loaded on CUDA."
+            )
+        if torch.cuda.get_device_capability(device)[0] != 10:
+            raise NotImplementedError("DeepGEMM MegaMoE requires SM100 GPUs.")
+        if self.hidden_size % 128 != 0 or self.intermediate_size % 128 != 0:
+            raise ValueError(
+                "DeepGEMM MegaMoE requires hidden and intermediate sizes "
+                "to be multiples of 128."
+            )
+
+    def finalize_weights(self) -> None:
+        if self._transformed_l1_weights is not None:
+            return
+
+        self._check_runtime_supported()
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        w13_scale = deep_gemm.transform_sf_into_required_layout(
+            self._ue8m0_uint8_to_float(self.w13_weight_scale.data).contiguous(),
+            2 * self.intermediate_size,
+            self.hidden_size,
+            (1, 32),
+            self.num_local_experts,
+        )
+        w2_scale = deep_gemm.transform_sf_into_required_layout(
+            self._ue8m0_uint8_to_float(self.w2_weight_scale.data).contiguous(),
+            self.hidden_size,
+            self.intermediate_size,
+            (1, 32),
+            self.num_local_experts,
+        )
+        self._transformed_l1_weights, self._transformed_l2_weights = (
+            deep_gemm.transform_weights_for_mega_moe(
+                (self.w13_weight.data.view(torch.int8).contiguous(), w13_scale),
+                (self.w2_weight.data.view(torch.int8).contiguous(), w2_scale),
+            )
+        )
+        # Drop the original loader-side parameters: the MegaMoE kernels only
+        # consume the transformed views above. transform_weights_for_mega_moe
+        # allocates a fresh tensor for the L1 weight (see _interleave_l1_weights)
+        # and fresh SF tensors for L1/L2; the L2 weight is the only tensor that
+        # aliases the original storage, and _transformed_l2_weights still holds
+        # it, so the storage stays live after we drop the Parameter.
+        self.w13_weight = None
+        self.w13_weight_scale = None
+        self.w2_weight = None
+        self.w2_weight_scale = None
+
+    def get_symm_buffer(self):
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        group = get_ep_group().device_group
+        device = torch.accelerator.current_device_index()
+        key = (
+            id(group),
+            device,
+            self.num_experts,
+            self.max_num_tokens,
+            self.top_k,
+            self.hidden_size,
+            self.intermediate_size,
+        )
+        symm_buffer = self._symm_buffer_cache.get(key)
+        if symm_buffer is None:
+            symm_buffer = deep_gemm.get_symm_buffer_for_mega_moe(
+                group,
+                self.num_experts,
+                self.max_num_tokens,
+                self.top_k,
+                self.hidden_size,
+                self.intermediate_size,
+            )
+            self._symm_buffer_cache[key] = symm_buffer
+        return symm_buffer
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        *,
+        activation_clamp: float | None,
+        fast_math: bool = True,
+    ) -> torch.Tensor:
+        if hidden_states.shape[0] > self.max_num_tokens:
+            raise ValueError(
+                f"DeepSeek V4 MegaMoE got {hidden_states.shape[0]} tokens, "
+                f"but the symmetric buffer was sized for {self.max_num_tokens}."
+            )
+        y = torch.empty_like(hidden_states, dtype=torch.bfloat16)
+        torch.ops.vllm.deepseek_v4_mega_moe_experts(
+            hidden_states,
+            topk_weights,
+            topk_ids,
+            y,
+            self.prefix,
+            activation_clamp,
+            fast_math,
+        )
+        return y
+
+    def _run_mega_moe(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        y: torch.Tensor,
+        activation_clamp: float | None,
+        fast_math: bool,
+    ) -> None:
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        symm_buffer = self.get_symm_buffer()
+        num_tokens = hidden_states.shape[0]
+        _stage_deepseek_v4_mega_moe_inputs(
+            hidden_states,
+            topk_weights,
+            topk_ids,
+            symm_buffer.x[:num_tokens],
+            symm_buffer.x_sf[:num_tokens],
+            symm_buffer.topk_idx[:num_tokens],
+            symm_buffer.topk_weights[:num_tokens],
+        )
+
+        # This method must have been already called during the weight loading phase.
+        # We call it again here to cover the dummy weight loading case.
+        self.finalize_weights()
+
+        assert self._transformed_l1_weights is not None
+        assert self._transformed_l2_weights is not None
+        deep_gemm.fp8_fp4_mega_moe(
+            y,
+            self._transformed_l1_weights,
+            self._transformed_l2_weights,
+            symm_buffer,
+            activation_clamp=activation_clamp,
+            fast_math=fast_math,
+        )
+
+
+DeepseekV4MegaMoEExperts.weight_loader.supports_moe_loading = True  # type: ignore[attr-defined]
+
+
+def _deepseek_v4_mega_moe_experts_op(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+    activation_clamp: float | None,
+    fast_math: bool,
+) -> None:
+    self = get_forward_context().no_compile_layers[layer_name]
+    self._run_mega_moe(
+        hidden_states,
+        topk_weights,
+        topk_ids,
+        out,
+        activation_clamp,
+        fast_math,
+    )
+
+
+def _deepseek_v4_mega_moe_experts_op_fake(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+    activation_clamp: float | None,
+    fast_math: bool,
+) -> None:
+    return None
+
+
+direct_register_custom_op(
+    op_name="deepseek_v4_mega_moe_experts",
+    op_func=_deepseek_v4_mega_moe_experts_op,
+    mutates_args=["out"],
+    fake_impl=_deepseek_v4_mega_moe_experts_op_fake,
+)
+
+
+class DeepseekV4MoE(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+
+        self.tp_size = get_tensor_model_parallel_world_size()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.prefix = prefix
+        self.use_mega_moe = (
+            vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
+        )
+        if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently requires expert parallel. "
+                "Enable it with --enable-expert-parallel, or pick a different "
+                "moe backend."
+            )
+
+        self.routed_scaling_factor = getattr(config, "routed_scaling_factor", 1.0)
+        self.hidden_size = config.hidden_size
+
+        self.n_routed_experts = config.n_routed_experts
+        self.n_activated_experts = config.num_experts_per_tok
+        self.moe_intermediate_size = config.moe_intermediate_size
+        self.swiglu_limit = config.swiglu_limit
+        self.renormalize = config.norm_topk_prob
+        self.scoring_func = getattr(config, "scoring_func", "sqrtsoftplus")
+        if self.use_mega_moe and self.scoring_func != "sqrtsoftplus":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently supports sqrtsoftplus routing only."
+            )
+        if self.use_mega_moe and getattr(config, "expert_dtype", "fp4") != "fp4":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE only supports fp4 experts; got expert_dtype="
+                f"{config.expert_dtype!r}. Drop --kernel-config moe_backend="
+                "deep_gemm_mega_moe for this checkpoint."
+            )
+
+        # Fused RMSNorm + gate: owns both ffn_norm and the gate matmul.
+        self.norm_gate = NormGateLinear(
+            hidden_size=config.hidden_size,
+            num_experts=config.n_routed_experts,
+            rms_eps=config.rms_norm_eps,
+            prefix=f"{prefix}.norm_gate",
+        )
+        # Routing-side tensors live on ``norm_gate`` directly (not on the
+        # inner gate); they are initialized to None in NormGatedLinear and
+        # populated below depending on the MoE variant.
+        is_hash_moe = extract_layer_index(prefix) < config.num_hash_layers
+        self.hash_indices_dtype = torch.int64 if self.use_mega_moe else torch.int32
+        if is_hash_moe:
+            # hash MoE doesn't use e_score_correction_bias
+            # Use randint instead of empty to avoid garbage values causing
+            # invalid memory access in dummy mode (--load-format="dummy")
+            self.norm_gate.tid2eid = nn.Parameter(
+                torch.randint(
+                    0,
+                    config.n_routed_experts,
+                    (config.vocab_size, config.num_experts_per_tok),
+                    dtype=self.hash_indices_dtype,
+                ),
+                requires_grad=False,
+            )
+        elif getattr(config, "topk_method", None) == "noaux_tc":
+            self.norm_gate.e_score_correction_bias = nn.Parameter(
+                torch.empty(config.n_routed_experts, dtype=torch.float32),
+                requires_grad=False,
+            )
+
+        if config.n_shared_experts is None:
+            self.shared_experts = None
+        else:
+            intermediate_size = config.moe_intermediate_size * config.n_shared_experts
+
+            self.shared_experts = DeepseekV4MLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=intermediate_size,
+                hidden_act=config.hidden_act,
+                swiglu_limit=self.swiglu_limit,
+                quant_config=quant_config,
+                reduce_results=self.use_mega_moe,
+                prefix=f"{prefix}.shared_experts",
+            )
+
+        if self.use_mega_moe:
+            self._init_mega_moe_experts(vllm_config, config, prefix)
+        else:
+            self._init_fused_moe_experts(config, quant_config, prefix)
+
+    def _init_mega_moe_experts(
+        self,
+        vllm_config: VllmConfig,
+        config,
+        prefix: str,
+    ) -> None:
+        self.ep_group = get_ep_group()
+        self.ep_size = self.ep_group.world_size
+        self.ep_rank = self.ep_group.rank_in_group
+        assert config.n_routed_experts % self.ep_size == 0
+
+        self.n_local_experts = config.n_routed_experts // self.ep_size
+        self.experts_start_idx = self.ep_rank * self.n_local_experts
+        self.experts_end_idx = self.experts_start_idx + self.n_local_experts
+
+        self.experts = DeepseekV4MegaMoEExperts(
+            vllm_config,
+            num_experts=config.n_routed_experts,
+            num_local_experts=self.n_local_experts,
+            experts_start_idx=self.experts_start_idx,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            prefix=f"{prefix}.experts",
+        )
+
+    def _init_fused_moe_experts(
+        self,
+        config,
+        quant_config,
+        prefix: str,
+    ) -> None:
+        self.tp_rank = get_tensor_model_parallel_rank()
+        assert config.n_routed_experts % self.tp_size == 0
+
+        self.n_local_experts = config.n_routed_experts // self.tp_size
+        self.experts_start_idx = self.tp_rank * self.n_local_experts
+        self.experts_end_idx = self.experts_start_idx + self.n_local_experts
+        # We don't pass `gate` into FusedMoE
+        self.experts = FusedMoE(
+            shared_experts=self.shared_experts,
+            num_experts=config.n_routed_experts,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            renormalize=config.norm_topk_prob,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            scoring_func=self.scoring_func,
+            routed_scaling_factor=self.routed_scaling_factor,
+            e_score_correction_bias=self.norm_gate.e_score_correction_bias,
+            hash_indices_table=self.norm_gate.tid2eid,
+            swiglu_limit=self.swiglu_limit,
+            router_logits_dtype=torch.float32,
+        )
+
+    def forward(
+        self, hidden_states: torch.Tensor, input_ids: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        if self.norm_gate.tid2eid is not None and input_ids is None:
+            raise ValueError("DeepSeek V4 hash MoE routing requires input_ids.")
+
+        if not self.use_mega_moe:
+            return self._forward_fused_moe(hidden_states, input_ids)
+
+        org_shape = hidden_states.shape
+        normed_x, router_logits = self.norm_gate(hidden_states)
+        topk_weights, topk_ids = fused_topk_bias(
+            hidden_states=normed_x,
+            gating_output=router_logits,
+            scoring_func=self.scoring_func,
+            e_score_correction_bias=self.norm_gate.e_score_correction_bias.data
+            if self.norm_gate.e_score_correction_bias is not None
+            else None,
+            topk=self.n_activated_experts,
+            renormalize=self.renormalize,
+            indices_type=self.hash_indices_dtype,
+            input_tokens=input_ids,
+            hash_indices_table=self.norm_gate.tid2eid,
+            routed_scaling_factor=self.routed_scaling_factor,
+        )
+        activation_clamp = (
+            float(self.swiglu_limit) if self.swiglu_limit is not None else None
+        )
+        final_hidden_states = self.experts(
+            normed_x,
+            topk_weights,
+            topk_ids,
+            activation_clamp=activation_clamp,
+        )
+
+        if self.shared_experts is not None:
+            shared_output = self.shared_experts(normed_x)
+            final_hidden_states += shared_output
+
+        return final_hidden_states.view(org_shape)
+
+    def _forward_fused_moe(
+        self, hidden_states: torch.Tensor, input_ids: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        assert not self.experts.is_internal_router
+        org_shape = hidden_states.shape
+        normed_x, router_logits = self.norm_gate(hidden_states)
+        final_hidden_states = self.experts(
+            hidden_states=normed_x,
+            router_logits=router_logits,
+            input_ids=input_ids,
+        )
+
+        return final_hidden_states.view(org_shape)
+
+    def finalize_mega_moe_weights(self) -> None:
+        if self.use_mega_moe:
+            self.experts.finalize_weights()
+
+
+class DeepseekV4Attention(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str,
+        topk_indices_buffer: torch.Tensor | None = None,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        layer_id = extract_layer_index(prefix)
+
+        self.layer_id = layer_id
+        self.hidden_size = config.hidden_size
+        self.n_heads = config.num_attention_heads
+        tp_size = get_tensor_model_parallel_world_size()
+        assert self.n_heads % tp_size == 0
+
+        self.n_local_heads = self.n_heads // tp_size
+        self.q_lora_rank = config.q_lora_rank
+        self.o_lora_rank = config.o_lora_rank
+        self.head_dim = config.head_dim
+        self.rope_head_dim = config.qk_rope_head_dim
+        self.nope_head_dim = self.head_dim - self.rope_head_dim
+        self.n_groups = config.o_groups
+        self.n_local_groups = self.n_groups // tp_size
+        self.window_size = config.sliding_window
+        # NOTE(zyongye) Compress ratio can't be 0
+        # we do this for because MTP layer is not included
+        # in the compress ratio list
+        if layer_id < config.num_hidden_layers:
+            self.compress_ratio = max(1, config.compress_ratios[layer_id])
+        else:
+            self.compress_ratio = 1
+        self.eps = config.rms_norm_eps
+        self.max_position_embeddings = config.max_position_embeddings
+
+        # Padded to min 64 heads for FlashMLA, initialized to -inf
+        # (no sink effect). Weight loading fills the first n_local_heads slots.
+        padded_heads = max(self.n_local_heads, 64)
+        self.attn_sink = nn.Parameter(
+            torch.full((padded_heads,), -float("inf"), dtype=torch.float32),
+            requires_grad=False,
+        )
+
+        self.fused_wqa_wkv = MergedColumnParallelLinear(
+            self.hidden_size,
+            [self.q_lora_rank, self.head_dim],
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fused_wqa_wkv",
+            disable_tp=True,  # fused ReplicatedLinear
+        )
+        self.q_norm = RMSNorm(self.q_lora_rank, self.eps)
+        self.wq_b = ColumnParallelLinear(
+            self.q_lora_rank,
+            self.n_heads * self.head_dim,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wq_b",
+        )
+
+        self.kv_norm = RMSNorm(self.head_dim, self.eps)
+        self.wo_a = ColumnParallelLinear(
+            self.n_heads * self.head_dim // self.n_groups,
+            self.n_groups * self.o_lora_rank,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wo_a",
+        )
+        self.wo_a.is_bmm = True
+        self.wo_a.bmm_batch_size = self.n_local_groups
+        self.wo_b = RowParallelLinear(
+            self.n_groups * self.o_lora_rank,
+            self.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wo_b",
+        )
+        self.softmax_scale = self.head_dim**-0.5
+        self.scale_fmt = config.quantization_config["scale_fmt"]
+
+        self.rope_parameters = config.rope_scaling
+
+        # Initialize rotary embedding BEFORE DeepseekV4MLAModules (which needs it)
+        rope_parameters = config.rope_parameters
+        rope_parameters["rope_theta"] = (
+            config.compress_rope_theta if self.compress_ratio > 1 else config.rope_theta
+        )
+        if config.rope_parameters["rope_type"] != "default":
+            config.rope_parameters["rope_type"] = (
+                "deepseek_yarn"
+                if config.rope_parameters.get("apply_yarn_scaling", True)
+                else "deepseek_llama_scaling"
+            )
+        rope_parameters["mscale"] = 0  # Disable mscale
+        rope_parameters["mscale_all_dim"] = 0  # Disable mscale
+        rope_parameters["is_deepseek_v4"] = True
+        rope_parameters["rope_dim"] = self.rope_head_dim
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=self.max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=False,
+        )
+
+        self.indexer = None
+        if self.compress_ratio == 4:
+            # Only C4A uses sparse attention and hence has indexer.
+            self.indexer = DeepseekV4Indexer(
+                vllm_config,
+                config=config,
+                hidden_size=self.hidden_size,
+                q_lora_rank=self.q_lora_rank,
+                quant_config=quant_config,
+                cache_config=vllm_config.cache_config,
+                topk_indices_buffer=topk_indices_buffer,
+                compress_ratio=self.compress_ratio,
+                prefix=f"{prefix}.indexer",
+            )
+
+        mla_modules = DeepseekV4MLAModules(
+            vllm_config=vllm_config,
+            fused_wqa_wkv=self.fused_wqa_wkv,
+            q_norm=self.q_norm,
+            wq_b=self.wq_b,
+            kv_norm=self.kv_norm,
+            wo_a=self.wo_a,
+            wo_b=self.wo_b,
+            attn_sink=self.attn_sink,
+            rotary_emb=self.rotary_emb,
+            indexer=self.indexer,
+            indexer_rotary_emb=self.rotary_emb,
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+        self.mla_attn = DeepseekV4MultiHeadLatentAttentionWrapper(
+            hidden_size=self.hidden_size,
+            num_heads=self.n_local_heads,
+            head_dim=self.head_dim,
+            scale=self.softmax_scale,
+            qk_nope_head_dim=self.nope_head_dim,
+            qk_rope_head_dim=self.rope_head_dim,
+            v_head_dim=self.head_dim,
+            q_lora_rank=self.q_lora_rank,
+            kv_lora_rank=self.head_dim,
+            o_lora_rank=self.o_lora_rank,
+            mla_modules=mla_modules,
+            window_size=self.window_size,
+            compress_ratio=self.compress_ratio,
+            cache_config=vllm_config.cache_config,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        llama_4_scaling: torch.Tensor | None,
+    ):
+        return self.mla_attn(positions, hidden_states, llama_4_scaling)
+
+
+class DeepseekV4DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        vllm_config,
+        prefix,
+        topk_indices_buffer: torch.Tensor | None = None,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ):
+        super().__init__()
+
+        # Lazy import to avoid top-level tilelang dependency.
+        # Registers both torch.ops.vllm.mhc_pre and mhc_post
+        import vllm.model_executor.layers.mhc  # noqa: F401
+
+        config = vllm_config.model_config.hf_config
+        self.hidden_size = config.hidden_size
+
+        self.rms_norm_eps = config.rms_norm_eps
+        self.attn = DeepseekV4Attention(
+            vllm_config,
+            prefix=f"{prefix}.attn",
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+        self.ffn = DeepseekV4MoE(vllm_config, prefix=f"{prefix}.ffn")
+
+        self.attn_norm = RMSNorm(self.hidden_size, self.rms_norm_eps)
+        # ``ffn_norm`` is owned by ``self.ffn.norm_gate`` (fused with the
+        # router gate matmul); see ``NormGatedLinear``.
+        self.hc_mult = config.hc_mult
+        self.hc_sinkhorn_iters = config.hc_sinkhorn_iters
+        self.hc_eps = config.hc_eps
+        self.hc_post_alpha = 2.0
+        mix_hc = (2 + self.hc_mult) * self.hc_mult
+        hc_dim = self.hc_mult * self.hidden_size
+        self.hc_attn_fn = nn.Parameter(
+            torch.empty(
+                (mix_hc, hc_dim),
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_fn = nn.Parameter(
+            torch.empty(
+                (mix_hc, hc_dim),
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_attn_base = nn.Parameter(
+            torch.empty(
+                mix_hc,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_base = nn.Parameter(
+            torch.empty(
+                mix_hc,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_attn_scale = nn.Parameter(
+            torch.empty(
+                3,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_scale = nn.Parameter(
+            torch.empty(
+                3,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.mhc_pre = MHCPreOp()
+        self.mhc_post = MHCPostOp()
+        self.mhc_fused_post_pre = MHCFusedPostPreOp()
+
+    def hc_pre(
+        self,
+        x: torch.Tensor,
+        hc_fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+    ):
+        post_mix, res_mix, layer_input = self.mhc_pre(
+            residual=x,
+            fn=hc_fn,
+            hc_scale=hc_scale,
+            hc_base=hc_base,
+            rms_eps=self.rms_norm_eps,
+            hc_pre_eps=self.hc_eps,
+            hc_sinkhorn_eps=self.hc_eps,
+            hc_post_mult_value=self.hc_post_alpha,
+            sinkhorn_repeat=self.hc_sinkhorn_iters,
+        )
+        return layer_input, post_mix, res_mix
+
+    def hc_post(
+        self,
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post: torch.Tensor,
+        comb: torch.Tensor,
+    ):
+        return self.mhc_post(x, residual, post, comb)
+
+    def _forward_cuda(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        if residual is None:
+            # Run standalone hc_pre on first layer
+            residual = x
+            x, post_mix, res_mix = self.hc_pre(
+                x, self.hc_attn_fn, self.hc_attn_scale, self.hc_attn_base
+            )
+        else:
+            residual, post_mix, res_mix, x = self.mhc_fused_post_pre(
+                x,
+                residual,
+                post_mix,
+                res_mix,
+                self.hc_attn_fn,
+                self.hc_attn_scale,
+                self.hc_attn_base,
+                self.rms_norm_eps,
+                self.hc_eps,
+                self.hc_eps,
+                self.hc_post_alpha,
+                self.hc_sinkhorn_iters,
+            )
+
+        x = self.attn_norm(x)
+        x = self.attn(positions, x, None)
+
+        residual, post_mix, res_mix, x = self.mhc_fused_post_pre(
+            x,
+            residual,
+            post_mix,
+            res_mix,
+            self.hc_ffn_fn,
+            self.hc_ffn_scale,
+            self.hc_ffn_base,
+            self.rms_norm_eps,
+            self.hc_eps,
+            self.hc_eps,
+            self.hc_post_alpha,
+            self.hc_sinkhorn_iters,
+        )
+        # ffn_norm is now folded into self.ffn.norm_gate; ffn() takes
+        # the pre-norm activation directly.
+        x = self.ffn(x, input_ids)
+        return x, residual, post_mix, res_mix
+
+    def _forward_rocm(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None
+    ]:
+        residual = x
+        x, post, comb = self.hc_pre(
+            x, self.hc_attn_fn, self.hc_attn_scale, self.hc_attn_base
+        )
+        x = self.attn_norm(x)
+        x = self.attn(positions, x, None)
+        x = self.hc_post(x, residual, post, comb)
+
+        residual = x
+        x, post, comb = self.hc_pre(
+            x, self.hc_ffn_fn, self.hc_ffn_scale, self.hc_ffn_base
+        )
+        # ffn_norm is now folded into self.ffn.norm_gate; ffn() takes
+        # the pre-norm activation directly.
+        x = self.ffn(x, input_ids)
+        x = self.hc_post(x, residual, post, comb)
+        return x, None, None, None
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None
+    ]:
+        if current_platform.is_rocm():
+            return self._forward_rocm(
+                x, positions, input_ids, post_mix, res_mix, residual
+            )
+
+        return self._forward_cuda(x, positions, input_ids, post_mix, res_mix, residual)
+
+
+@support_torch_compile
+class DeepseekV4Model(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.use_mega_moe = (
+            vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
+        )
+        if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently requires expert parallel. "
+                "Enable it with --enable-expert-parallel, or pick a different "
+                "moe backend."
+            )
+        self.vocab_size = config.vocab_size
+        self.hc_eps = config.hc_eps
+        self.hc_mult = config.hc_mult
+        self.hc_dim = self.hc_mult * config.hidden_size
+        self.rms_norm_eps = config.rms_norm_eps
+
+        # Three aux streams: one per non-default input GEMM in
+        # DeepseekV4MultiHeadLatentAttentionWrapper.attn_gemm_parallel_execute
+        # (compressor kv_score, indexer.weights_proj, indexer.compressor
+        # kv_score). fused_wqa_wkv stays on the default stream.
+        # Disable them on ROCm because of hang issues.
+        aux_stream_list = (
+            None
+            if current_platform.is_rocm()
+            else [torch.cuda.Stream() for _ in range(3)]
+        )
+
+        self.device = current_platform.device_type
+        # Reserved topk indices buffer for all Indexer layers to reuse.
+        self.topk_indices_buffer = torch.empty(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            config.index_topk,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+        if get_pp_group().is_first_rank:
+            self.embed_tokens = VocabParallelEmbedding(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=f"{prefix}.embed_tokens",
+            )
+        else:
+            self.embed_tokens = PPMissingLayer()
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: DeepseekV4DecoderLayer(
+                vllm_config,
+                prefix=prefix,
+                topk_indices_buffer=self.topk_indices_buffer,
+                aux_stream_list=aux_stream_list,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+
+        if get_pp_group().is_last_rank:
+            self.norm = RMSNorm(config.hidden_size, self.rms_norm_eps)
+        else:
+            self.norm = PPMissingLayer()
+
+        self.hc_head_fn = nn.Parameter(
+            torch.empty(
+                self.hc_mult,
+                self.hc_dim,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_head_base = nn.Parameter(
+            torch.empty(
+                self.hc_mult,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_head_scale = nn.Parameter(
+            torch.empty(1, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_op = HCHeadOp()
+        # Pre-hc_head residual stream buffer for the MTP draft. Stable
+        # address (outside the cudagraph pool) so the copy_ in forward()
+        # refreshes it correctly across captured shapes.
+        # refreshes it correctly across captured shapes. Only allocated on
+        # the last PP rank — that's where MTP target hidden states are
+        # produced.
+        if get_pp_group().is_last_rank:
+            self._mtp_hidden_buffer = torch.empty(
+                vllm_config.scheduler_config.max_num_batched_tokens,
+                self.hc_dim,
+                dtype=vllm_config.model_config.dtype,
+                device=self.device,
+            )
+        else:
+            self._mtp_hidden_buffer = None
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def make_empty_intermediate_tensors(
+        self,
+        batch_size: int,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> IntermediateTensors:
+        # PP intermediate tensors carry the multi-stream hidden_states
+        # of shape (num_tokens, hc_mult, hidden_size) — V4 expands the
+        # token embedding to hc_mult streams before the first decoder
+        # layer and keeps that shape until hc_head() collapses it.
+        return IntermediateTensors(
+            {
+                "hidden_states": torch.zeros(
+                    (batch_size, self.hc_mult, self.config.hidden_size),
+                    dtype=dtype,
+                    device=device,
+                ),
+            }
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+            hidden_states = hidden_states.unsqueeze(-2).repeat(1, self.hc_mult, 1)
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+
+        if self.use_mega_moe:
+            input_ids = input_ids.to(torch.int64)
+
+        residual, post_mix, res_mix = None, None, None
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            hidden_states, residual, post_mix, res_mix = layer(
+                hidden_states,
+                positions,
+                input_ids,
+                post_mix,
+                res_mix,
+                residual,
+            )
+        if layer is not None and current_platform.is_cuda():
+            hidden_states = layer.hc_post(hidden_states, residual, post_mix, res_mix)
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors({"hidden_states": hidden_states})
+
+        # Stash pre-hc_head residual for the MTP draft (captured copy_).
+        num_tokens = hidden_states.shape[0]
+        self._mtp_hidden_buffer[:num_tokens].copy_(hidden_states.flatten(1))
+
+        hidden_states = self.hc_head_op(
+            hidden_states,
+            self.hc_head_fn,
+            self.hc_head_scale,
+            self.hc_head_base,
+            self.rms_norm_eps,
+            self.hc_eps,
+        )
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("gate_up_proj", "w1", 0),
+            ("gate_up_proj", "w3", 1),
+            ("attn.fused_wqa_wkv", "attn.wq_a", 0),
+            ("attn.fused_wqa_wkv", "attn.wkv", 1),
+            ("compressor.fused_wkv_wgate", "compressor.wkv", 0),
+            ("compressor.fused_wkv_wgate", "compressor.wgate", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        # TP for attention
+        tp_size = get_tensor_model_parallel_world_size()
+        tp_rank = get_tensor_model_parallel_rank()
+        n_head = self.config.num_attention_heads
+        n_local_head = n_head // tp_size
+        head_rank_start = n_local_head * tp_rank
+        head_rank_end = n_local_head * (tp_rank + 1)
+
+        # Pre-compute expert mapping ONCE.
+        expert_mapping = self.get_expert_mapping()
+
+        for name, loaded_weight in weights:
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                # Skip non-stacked layers and experts (experts handled below).
+                if ".experts." in name:
+                    continue
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+
+                if is_pp_missing_parameter(name, self):
+                    break
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                if ".experts." in name:
+                    # E8M0 scales are stored as float8_e8m0fnu in
+                    # checkpoints but the MoE param is uint8. copy_()
+                    # would do a numeric conversion (e.g. 2^-7 → 0),
+                    # destroying the raw exponent bytes.
+                    if (
+                        "weight_scale" in name
+                        and loaded_weight.dtype == torch.float8_e8m0fnu
+                    ):
+                        loaded_weight = loaded_weight.view(torch.uint8)
+                    for mapping in expert_mapping:
+                        param_name, weight_name, expert_id, expert_shard_id = mapping
+                        if weight_name not in name:
+                            continue
+                        name_mapped = name.replace(weight_name, param_name)
+                        if is_pp_missing_parameter(name_mapped, self):
+                            continue
+                        param = params_dict[name_mapped]
+                        # We should ask the weight loader to return success or not
+                        # here since otherwise we may skip experts with other
+                        # available replicas.
+                        weight_loader = typing.cast(
+                            Callable[..., bool], param.weight_loader
+                        )
+                        success = weight_loader(
+                            param,
+                            loaded_weight,
+                            name_mapped,
+                            shard_id=expert_shard_id,
+                            expert_id=expert_id,
+                            return_success=True,
+                        )
+                        if success:
+                            name = name_mapped
+                            break
+                    loaded_params.add(name_mapped)
+                    continue
+                elif "attn_sink" in name:
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    narrow_weight = loaded_weight[head_rank_start:head_rank_end]
+                    n = narrow_weight.shape[0]
+                    params_dict[name][:n].copy_(narrow_weight)
+                    loaded_params.add(name)
+                    continue
+                else:
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+                    continue
+
+        return loaded_params
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        first_layer = next(iter(islice(self.layers, self.start_layer, self.end_layer)))
+        if first_layer.ffn.use_mega_moe:
+            return make_deepseek_v4_expert_params_mapping(self.config.n_routed_experts)
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return FusedMoE.make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="w1",
+            ckpt_down_proj_name="w2",
+            ckpt_up_proj_name="w3",
+            num_experts=self.config.n_routed_experts,
+        )
+
+    def finalize_mega_moe_weights(self) -> None:
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            layer.ffn.finalize_mega_moe_weights()
+
+
+def _make_deepseek_v4_weights_mapper(expert_dtype: str) -> WeightsMapper:
+    if expert_dtype == "fp4":
+        # MXFP4 experts use Mxfp4MoEMethod, which registers scales as
+        # ``w{1,2,3}_weight_scale`` (no _inv suffix). FP8 linear and
+        # shared experts use Fp8LinearMethod's block scales, which
+        # register as ``weight_scale_inv``.
+        scale_regex = {
+            re.compile(r"(\.experts\.\d+\.w[123])\.scale$"): r"\1.weight_scale",
+            re.compile(r"\.scale$"): ".weight_scale_inv",
+        }
+    else:
+        # FP8 experts use Fp8MoEMethod (block_quant=True), which registers
+        # scales as ``w{13,2}_weight_scale_inv``. Map all ``.scale`` keys
+        # there.
+        scale_regex = {
+            re.compile(r"\.scale$"): ".weight_scale_inv",
+        }
+    return WeightsMapper(
+        orig_to_new_prefix={
+            "layers.": "model.layers.",
+            "embed.": "model.embed.",
+            "norm.": "model.norm.",
+            "hc_head": "model.hc_head",
+            "mtp.": "model.mtp.",
+        },
+        orig_to_new_regex=scale_regex,
+        orig_to_new_suffix={
+            "head.weight": "lm_head.weight",
+            "embed.weight": "embed_tokens.weight",
+            # Pre-MoE norm + gate are now owned by ``DeepseekV4MoE.norm_gate``
+            # (see NormGatedLinear).
+            ".ffn_norm.weight": ".ffn.norm_gate.norm.weight",
+            ".ffn.gate.weight": ".ffn.norm_gate.gate.weight",
+            ".ffn.gate.bias": ".ffn.norm_gate.e_score_correction_bias",
+            # Hash MoE table also moved off the inner gate.
+            ".ffn.gate.tid2eid": ".ffn.norm_gate.tid2eid",
+        },
+        orig_to_new_substr={
+            ".attn.compressor.": ".attn.mla_attn.compressor.",
+            ".shared_experts.w2": ".shared_experts.down_proj",
+        },
+    )
+
+
+class DeepseekV4ForCausalLM(nn.Module, SupportsPP):
+    model_cls = DeepseekV4Model
+
+    # Default mapper assumes the original FP4-expert checkpoint layout.
+    # Overridden per-instance in __init__ when expert_dtype != "fp4".
+    hf_to_vllm_mapper = _make_deepseek_v4_weights_mapper("fp4")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        self.config = config
+        expert_dtype = getattr(config, "expert_dtype", "fp4")
+        if expert_dtype != "fp4":
+            self.hf_to_vllm_mapper = _make_deepseek_v4_weights_mapper(expert_dtype)
+
+        self.model = self.model_cls(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (  # type: ignore[method-assign]
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def get_mtp_target_hidden_states(self) -> torch.Tensor | None:
+        """Pre-hc_head residual stream buffer (max_num_batched_tokens,
+        hc_mult * hidden_size) for the MTP draft model. Populated by
+        forward(); valid after each target step."""
+        return getattr(self.model, "_mtp_hidden_buffer", None)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self, skip_substrs=["mtp."])
+        loaded_params = loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+        self.model.finalize_mega_moe_weights()
+        return loaded_params
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
diff --git a/vllm/models/deepseek_v4/amd/mtp.py b/vllm/models/deepseek_v4/amd/mtp.py
new file mode 100644
index 000000000000..071abe2f4a49
--- /dev/null
+++ b/vllm/models/deepseek_v4/amd/mtp.py
@@ -0,0 +1,520 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""MTP draft model for DeepSeek V4 (internal codename: DeepseekV4).
+
+Split from ``deepseek_mtp.py`` because the V4 architecture introduces several
+pieces that have no analogue in V3/V32:
+  * separate ``e_proj`` / ``h_proj`` with fp8 linear quantization (instead of
+    the fused ``eh_proj``);
+  * ``hc_head`` hypercompressed vocab projection applied in ``compute_logits``;
+  * ``DeepseekV4DecoderLayer`` with its own aux-stream management;
+  * V4-specific checkpoint weight-name remapping in ``load_weights``.
+"""
+
+import typing
+from collections.abc import Callable, Iterable
+
+import regex as re
+import torch
+import torch.nn as nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import ReplicatedLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.mhc import HCHeadOp
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.deepseek_mtp import SharedHead
+from vllm.model_executor.models.deepseek_v2 import get_spec_layer_idx_from_weight_name
+from vllm.model_executor.models.utils import maybe_prefix
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+
+from .model import (
+    DeepseekV4DecoderLayer,
+    make_deepseek_v4_expert_params_mapping,
+)
+
+logger = init_logger(__name__)
+
+# MoE expert scales are fused into per-layer w13/w2 tensors. The exact
+# parameter suffix depends on which FusedMoE method handles the experts:
+# - fp4 experts (Mxfp4MoEMethod) register ``w{1,2,3}_weight_scale``;
+# - fp8 experts (Fp8MoEMethod with block_quant=True) register
+#   ``w{1,2,3}_weight_scale_inv``.
+# Other FP8 linear scales (including shared experts) always use
+# ``.weight_scale_inv``. Mirrors the per-instance mapper built by
+# ``_make_deepseek_v4_weights_mapper`` in deepseek_v4.py.
+_EXPERT_SCALE_RE = re.compile(r"\.experts\.\d+\.w[123]\.scale$")
+
+
+class DeepSeekV4MultiTokenPredictorLayer(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        topk_indices_buffer: torch.Tensor,
+        prefix: str,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ) -> None:
+        super().__init__()
+
+        assert vllm_config.speculative_config is not None
+        config = vllm_config.speculative_config.draft_model_config.hf_config
+        self.config = config
+        quant_config = vllm_config.quant_config
+        self.rms_norm_eps = config.rms_norm_eps
+
+        self.enorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        # V4 keeps e_ and h_ proj separate (with fp8 linear quant) rather than
+        # fusing them the way V3 does with eh_proj.
+        self.e_proj = ReplicatedLinear(
+            config.hidden_size,
+            config.hidden_size,
+            bias=False,
+            return_bias=False,
+            quant_config=quant_config,
+        )
+        self.h_proj = ReplicatedLinear(
+            config.hidden_size,
+            config.hidden_size,
+            bias=False,
+            return_bias=False,
+            quant_config=quant_config,
+        )
+
+        self.hc_eps = config.hc_eps
+        self.hc_mult = config.hc_mult
+        self.hc_dim = self.hc_mult * config.hidden_size
+        self.hc_head_fn = nn.Parameter(
+            torch.empty(self.hc_mult, self.hc_dim, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_base = nn.Parameter(
+            torch.empty(self.hc_mult, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_scale = nn.Parameter(
+            torch.empty(1, dtype=torch.float32),
+            requires_grad=False,
+        )
+
+        self.shared_head = SharedHead(
+            config=config, prefix=prefix, quant_config=quant_config
+        )
+        self.mtp_block = DeepseekV4DecoderLayer(
+            vllm_config,
+            prefix,
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+
+        self.hc_head_op = HCHeadOp()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_index: int = 0,
+    ) -> torch.Tensor:
+        assert inputs_embeds is not None
+        # masking inputs at position 0, as not needed by MTP
+        inputs_embeds = torch.where(positions.unsqueeze(-1) == 0, 0, inputs_embeds)
+        inputs_embeds = self.enorm(inputs_embeds)
+
+        # Target stashes pre-hc_head residual as flat (T, hc_mult * D);
+        # reshape to (T, hc_mult, D) — the training-time layout.
+        previous_hidden_states = previous_hidden_states.view(
+            -1, self.hc_mult, self.config.hidden_size
+        )
+        previous_hidden_states = self.hnorm(previous_hidden_states)
+        hidden_states = self.h_proj(previous_hidden_states) + self.e_proj(
+            inputs_embeds
+        ).unsqueeze(-2)
+        hidden_states, residual, post_mix, res_mix = self.mtp_block(
+            positions=positions, x=hidden_states, input_ids=None
+        )
+        if current_platform.is_cuda():
+            hidden_states = self.mtp_block.hc_post(
+                hidden_states, residual, post_mix, res_mix
+            )
+        # Return the flat pre-hc_head residual so it can be re-fed as the
+        # next spec step's `previous_hidden_states` when
+        # num_speculative_tokens > 1. hc_head is deferred to compute_logits.
+        return hidden_states.flatten(1)
+
+
+class DeepSeekV4MultiTokenPredictor(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.mtp_start_layer_idx = config.num_hidden_layers
+        self.num_mtp_layers = config.num_nextn_predict_layers
+        self.device = current_platform.device_type
+
+        topk_tokens = config.index_topk
+        self.topk_indices_buffer = torch.empty(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            topk_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+        # Three aux streams shared across all MTP layers, mirroring
+        # DeepseekV4Model. ROCm runs the same work serially for now.
+        aux_stream_list = (
+            None
+            if current_platform.is_rocm()
+            else [torch.cuda.Stream() for _ in range(3)]
+        )
+
+        # to map the exact layer index from weights
+        self.layers = torch.nn.ModuleDict(
+            {
+                str(idx): DeepSeekV4MultiTokenPredictorLayer(
+                    vllm_config,
+                    self.topk_indices_buffer,
+                    f"{prefix}.layers.{idx}",
+                    aux_stream_list=aux_stream_list,
+                )
+                for idx in range(
+                    self.mtp_start_layer_idx,
+                    self.mtp_start_layer_idx + self.num_mtp_layers,
+                )
+            }
+        )
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+            prefix=maybe_prefix(prefix, "embed_tokens"),
+        )
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        return self.layers[str(self.mtp_start_layer_idx + current_step_idx)](
+            input_ids,
+            positions,
+            previous_hidden_states,
+            inputs_embeds,
+            current_step_idx,
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        mtp_layer = self.layers[str(self.mtp_start_layer_idx + current_step_idx)]
+        # MTP forward returns the pre-hc_head residual (T, hc_mult * D); apply
+        # hc_head here so logits are computed from the dense hidden state.
+        hidden_states = hidden_states.view(
+            -1, mtp_layer.hc_mult, mtp_layer.config.hidden_size
+        )
+        hidden_states = mtp_layer.hc_head_op(
+            hidden_states,
+            mtp_layer.hc_head_fn,
+            mtp_layer.hc_head_scale,
+            mtp_layer.hc_head_base,
+            mtp_layer.rms_norm_eps,
+            mtp_layer.hc_eps,
+        )
+        logits = self.logits_processor(
+            mtp_layer.shared_head.head, mtp_layer.shared_head(hidden_states)
+        )
+        return logits
+
+
+@support_torch_compile
+class DeepSeekV4MTP(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        self.config = vllm_config.model_config.hf_config
+        self.quant_config = vllm_config.quant_config
+        self.model = DeepSeekV4MultiTokenPredictor(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        hidden_states = self.model(
+            input_ids, positions, hidden_states, inputs_embeds, spec_step_idx
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor | None:
+        return self.model.compute_logits(hidden_states, spec_step_idx)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        # Weight name remapping for checkpoint compatibility.
+        # Maps checkpoint weight paths to model parameter paths.
+        WEIGHT_NAME_REMAPPING: dict[str, str] = {
+            ".emb.tok_emb.weight": ".embed_tokens.weight",
+            ".head.weight": ".shared_head.head.weight",
+            ".norm.weight": ".shared_head.norm.weight",
+            # Pre-MoE norm + gate are now owned by
+            # ``DeepseekV4MoE.norm_gate`` (see NormGatedLinear).
+            ".ffn_norm.weight": ".ffn.norm_gate.norm.weight",
+            ".ffn.gate.weight": ".ffn.norm_gate.gate.weight",
+            ".ffn.gate.tid2eid": ".ffn.norm_gate.tid2eid",
+        }
+
+        def _remap_weight_name(name: str) -> str:
+            """Remap checkpoint weight names to model parameter names."""
+            for old_pattern, new_pattern in WEIGHT_NAME_REMAPPING.items():
+                if old_pattern in name:
+                    name = name.replace(old_pattern, new_pattern)
+            return name
+
+        def _find_mtp_layer_idx(name: str) -> int:
+            subnames = name.split(".")
+            for subname in subnames:
+                try:
+                    # we return the first encountered integer
+                    return int(subname)
+                except ValueError:
+                    continue
+            return 0
+
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("gate_up_proj", "w1", 0),
+            ("gate_up_proj", "w3", 1),
+            ("attn.fused_wqa_wkv", "attn.wq_a", 0),
+            ("attn.fused_wqa_wkv", "attn.wkv", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        # TP for attention
+        tp_size = get_tensor_model_parallel_world_size()
+        tp_rank = get_tensor_model_parallel_rank()
+        n_head = self.config.num_attention_heads
+        n_local_head = n_head // tp_size
+        head_rank_start = n_local_head * tp_rank
+        head_rank_end = n_local_head * (tp_rank + 1)
+
+        # Pre-compute expert mapping ONCE.
+        first_layer = next(iter(self.model.layers.values()))
+        if first_layer.mtp_block.ffn.use_mega_moe:
+            expert_mapping = make_deepseek_v4_expert_params_mapping(
+                self.config.n_routed_experts
+            )
+        else:
+            expert_mapping = FusedMoE.make_expert_params_mapping(
+                self,
+                ckpt_gate_proj_name="w1",
+                ckpt_down_proj_name="w2",
+                ckpt_up_proj_name="w3",
+                num_experts=self.config.n_routed_experts,
+            )
+
+        # FP8 experts register ``..._weight_scale_inv`` (block_quant) while
+        # FP4/MXFP4 experts register ``..._weight_scale``. Choose the suffix
+        # for the rename below based on the model's expert dtype.
+        expert_scale_suffix = (
+            ".weight_scale"
+            if getattr(self.config, "expert_dtype", "fp4") == "fp4"
+            else ".weight_scale_inv"
+        )
+
+        for name, loaded_weight in weights:
+            mtp_layer_idx = _find_mtp_layer_idx(name)
+            # V4 checkpoints store MTP weights as `mtp.{i}.*`; remap to
+            # `model.layers.{num_hidden_layers + i}.*` so that
+            # get_spec_layer_idx_from_weight_name can identify them.
+            name = name.replace(
+                f"mtp.{mtp_layer_idx}.",
+                f"model.layers.{self.config.num_hidden_layers + mtp_layer_idx}.",
+            )
+
+            spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
+            if spec_layer is None:
+                continue
+
+            name = _remap_weight_name(name)
+            name = self._rewrite_spec_layer_name(spec_layer, name)
+
+            if spec_layer != self.model.mtp_start_layer_idx and ".layers" not in name:
+                continue
+            if name.endswith(".scale"):
+                suffix = (
+                    expert_scale_suffix
+                    if _EXPERT_SCALE_RE.search(name)
+                    else ".weight_scale_inv"
+                )
+                name = name.removesuffix(".scale") + suffix
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                # Skip non-stacked layers and experts (experts handled below).
+                if ".experts." in name:
+                    continue
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                if ".experts." in name:
+                    # Reinterpret E8M0 scales as uint8 to preserve raw
+                    # exponent bytes; numeric copy_() would zero them.
+                    # Mirrors the main DeepseekV4 loader.
+                    if (
+                        "weight_scale" in name
+                        and loaded_weight.dtype == torch.float8_e8m0fnu
+                    ):
+                        loaded_weight = loaded_weight.view(torch.uint8)
+                    for mapping in expert_mapping:
+                        param_name, weight_name, expert_id, expert_shard_id = mapping
+                        if weight_name not in name:
+                            continue
+                        name_mapped = name.replace(weight_name, param_name)
+                        param = params_dict[name_mapped]
+                        # We should ask the weight loader to return success or not
+                        # here since otherwise we may skip experts with other
+                        # available replicas.
+                        weight_loader = typing.cast(
+                            Callable[..., bool], param.weight_loader
+                        )
+                        success = weight_loader(
+                            param,
+                            loaded_weight,
+                            name_mapped,
+                            shard_id=expert_shard_id,
+                            expert_id=expert_id,
+                            return_success=True,
+                        )
+                        if success:
+                            name = name_mapped
+                            loaded_params.add(name_mapped)
+                            break
+                    continue
+                elif "attn_sink" in name:
+                    narrow_weight = loaded_weight[head_rank_start:head_rank_end]
+                    n = narrow_weight.shape[0]
+                    params_dict[name][:n].copy_(narrow_weight)
+                    loaded_params.add(name)
+                    continue
+                else:
+                    if ".shared_experts.w2" in name:
+                        name = name.replace(
+                            ".shared_experts.w2", ".shared_experts.down_proj"
+                        )
+                    if name.endswith(".ffn.gate.bias"):
+                        # ``e_score_correction_bias`` lives on
+                        # ``norm_gate`` directly (not on the inner gate).
+                        name = name.replace(
+                            ".ffn.gate.bias",
+                            ".ffn.norm_gate.e_score_correction_bias",
+                        )
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+                    continue
+
+        loaded_layers: set[int] = set()
+        for param_name in loaded_params:
+            spec_layer = get_spec_layer_idx_from_weight_name(self.config, param_name)
+            if spec_layer is not None:
+                loaded_layers.add(spec_layer)
+        for layer_idx in range(
+            self.model.mtp_start_layer_idx,
+            self.model.mtp_start_layer_idx + self.model.num_mtp_layers,
+        ):
+            if layer_idx not in loaded_layers:
+                raise ValueError(
+                    f"MTP speculative decoding layer {layer_idx} weights "
+                    f"missing from checkpoint. The checkpoint may have "
+                    f"been quantized without including the MTP layers. "
+                    f"Use a checkpoint that includes MTP layer weights, "
+                    f"or disable speculative decoding."
+                )
+        self.finalize_mega_moe_weights()
+        logger.info_once("MTP draft model loaded: %d params", len(loaded_params))
+        return loaded_params
+
+    def finalize_mega_moe_weights(self) -> None:
+        for layer in self.model.layers.values():
+            layer.mtp_block.ffn.finalize_mega_moe_weights()
+
+    def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str:
+        """
+        Rewrite the weight name to match the format of the original model.
+        Add .mtp_block for modules in transformer layer block for spec layer
+        and rename shared layer weights to be top level.
+        """
+        spec_layer_weight_names = [
+            "embed_tokens",
+            "enorm",
+            "hnorm",
+            "h_proj",
+            "e_proj",
+            "shared_head",
+            "hc_head_fn",
+            "hc_head_base",
+            "hc_head_scale",
+        ]
+        shared_weight_names = ["embed_tokens"]
+        spec_layer_weight = False
+        shared_weight = False
+        for weight_name in spec_layer_weight_names:
+            if weight_name in name:
+                spec_layer_weight = True
+                if weight_name in shared_weight_names:
+                    shared_weight = True
+                break
+        if not spec_layer_weight:
+            # treat rest weights as weights for transformer layer block
+            name = name.replace(
+                f"model.layers.{spec_layer}.", f"model.layers.{spec_layer}.mtp_block."
+            )
+        elif shared_weight:
+            # treat shared weights as top level weights
+            name = name.replace(f"model.layers.{spec_layer}.", "model.")
+        return name
diff --git a/vllm/models/deepseek_v4/amd/rocm.py b/vllm/models/deepseek_v4/amd/rocm.py
new file mode 100644
index 000000000000..24a58a51b54c
--- /dev/null
+++ b/vllm/models/deepseek_v4/amd/rocm.py
@@ -0,0 +1,852 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, cast
+
+import torch
+
+from vllm.forward_context import get_forward_context
+from vllm.models.deepseek_v4.common.ops import dequantize_and_gather_k_cache
+from vllm.models.deepseek_v4.nvidia.flashmla import (
+    DeepseekV4FlashMLASparseBackend,
+    DeepseekV4SparseMLAAttentionImpl,
+)
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.backend import (
+    CommonAttentionMetadata,
+)
+from vllm.v1.attention.backends.mla.flashmla_sparse import (
+    FlashMLASparseMetadata,
+    FlashMLASparseMetadataBuilder,
+)
+from vllm.v1.attention.backends.mla.sparse_swa import (
+    DeepseekSparseSWAMetadata,
+    DeepseekSparseSWAMetadataBuilder,
+)
+from vllm.v1.attention.ops.rocm_aiter_mla_sparse import (
+    build_ragged_indices_from_dense,
+    rocm_sparse_attn_decode,
+    rocm_sparse_attn_prefill,
+)
+from vllm.v1.worker.workspace import current_workspace_manager
+
+if TYPE_CHECKING:
+    from vllm.models.deepseek_v4.nvidia.ops.attention import (
+        DeepseekV4MLAAttention,
+    )
+
+
+def _build_indptr_from_lengths(lengths: torch.Tensor) -> torch.Tensor:
+    lengths = lengths.to(dtype=torch.int32).contiguous()
+    indptr = torch.zeros(lengths.shape[0] + 1, dtype=torch.int32, device=lengths.device)
+    torch.cumsum(lengths, dim=0, out=indptr[1:])
+    return indptr
+
+
+# ROCm sparse prefill keeps this dense combine local so AMD-specific SWA changes
+# do not touch the shared DeepSeek V4 cache utilities.
+_SPARSE_PREFILL_TOPK_ALIGNMENT = 128
+
+
+@triton.jit
+def _combine_topk_swa_indices_kernel(
+    combined_indices_ptr,
+    combined_indices_stride,
+    combined_lens_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    query_start_loc_ptr,
+    seq_lens_ptr,
+    gather_lens_ptr,
+    M,
+    N,
+    TOP_K: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    WINDOW_SIZE: tl.constexpr,
+    TOPK_WIDTH: tl.constexpr,
+    PADDED_TOP_K: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+    worker_id = tl.program_id(1)
+    num_workers = tl.num_programs(1)
+
+    base = tl.load(query_start_loc_ptr)
+    query_start = tl.load(query_start_loc_ptr + batch_idx) - base
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1) - base
+    query_len = query_end - query_start
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    gather_len = tl.load(gather_lens_ptr + batch_idx)
+    start_pos = seq_len - query_len
+    gather_start = seq_len - gather_len
+
+    for token_idx in range(query_start + worker_id, query_end, num_workers):
+        token_idx_in_query = token_idx - query_start
+        pos = start_pos + token_idx_in_query
+        topk_len = tl.minimum((pos + 1) // COMPRESS_RATIO, TOP_K)
+        swa_len = tl.minimum(pos + 1, WINDOW_SIZE)
+
+        topk_offset = tl.arange(0, PADDED_TOP_K)
+        topk_mask = topk_offset < topk_len
+        safe_topk_offset = tl.where(topk_offset < TOPK_WIDTH, topk_offset, 0)
+        topk_indices = tl.load(
+            topk_indices_ptr + token_idx * topk_indices_stride + safe_topk_offset,
+            mask=topk_mask,
+            other=-1,
+        )
+        valid_topk = (topk_indices >= 0) & (topk_indices < N)
+        topk_indices = tl.where(valid_topk, topk_indices + M * batch_idx, -1)
+        tl.store(
+            combined_indices_ptr + token_idx * combined_indices_stride + topk_offset,
+            topk_indices,
+            mask=topk_mask,
+        )
+
+        swa_offset = tl.arange(0, WINDOW_SIZE)
+        tl.store(
+            combined_indices_ptr
+            + token_idx * combined_indices_stride
+            + topk_len
+            + swa_offset,
+            M * batch_idx + N + swa_offset + pos - swa_len + 1 - gather_start,
+            mask=swa_offset < swa_len,
+        )
+
+        tl.store(combined_lens_ptr + token_idx, topk_len + swa_len)
+
+
+def combine_topk_swa_indices(
+    topk_indices: torch.Tensor,
+    query_start_loc: torch.Tensor,
+    seq_lens: torch.Tensor,
+    gather_lens: torch.Tensor,
+    window_size: int,
+    compress_ratio: int,
+    topk: int,
+    M: int,
+    N: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    topk_indices = topk_indices.reshape(topk_indices.shape[0], -1).contiguous()
+    num_tokens = topk_indices.shape[0]
+    num_reqs = seq_lens.shape[0]
+    combined_topk = (
+        (topk + window_size + _SPARSE_PREFILL_TOPK_ALIGNMENT - 1)
+        // _SPARSE_PREFILL_TOPK_ALIGNMENT
+        * _SPARSE_PREFILL_TOPK_ALIGNMENT
+    )
+    combined_indices = torch.full(
+        (num_tokens, combined_topk),
+        fill_value=-1,
+        dtype=torch.int32,
+        device=topk_indices.device,
+    )
+    combined_lens = torch.empty(
+        num_tokens, dtype=torch.int32, device=topk_indices.device
+    )
+
+    num_workers = 128
+    _combine_topk_swa_indices_kernel[(num_reqs, num_workers)](
+        combined_indices,
+        combined_indices.stride(0),
+        combined_lens,
+        topk_indices,
+        topk_indices.stride(0),
+        query_start_loc,
+        seq_lens,
+        gather_lens,
+        M,
+        N,
+        TOP_K=topk,
+        COMPRESS_RATIO=compress_ratio,
+        WINDOW_SIZE=window_size,
+        TOPK_WIDTH=topk_indices.shape[-1],
+        PADDED_TOP_K=triton.next_power_of_2(topk_indices.shape[-1]),
+    )
+    return combined_indices, combined_lens
+
+
+@triton.jit
+def _compute_topk_lens_kernel(
+    topk_lens_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    topk,
+    is_valid_token_ptr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    is_valid_token = tl.load(is_valid_token_ptr + token_idx)
+
+    count = tl.zeros((), dtype=tl.int32)
+    for i in range(0, topk, TRITON_BLOCK_SIZE):
+        offset = i + tl.arange(0, TRITON_BLOCK_SIZE)
+        mask = offset < topk
+        local_idx = tl.load(
+            topk_indices_ptr + token_idx * topk_indices_stride + offset,
+            mask=mask,
+            other=-1,
+        )
+        count += tl.sum((local_idx >= 0).to(tl.int32), axis=0)
+
+    tl.store(topk_lens_ptr + token_idx, tl.where(is_valid_token, count, 0))
+
+
+@triton.jit
+def _pack_global_topk_ragged_kernel(
+    global_topk_ragged_ptr,
+    topk_indptr_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    token_to_req_indices_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    topk,
+    BLOCK_SIZE: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    block_idx = tl.program_id(1)
+    offset = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+
+    out_start = tl.load(topk_indptr_ptr + token_idx)
+    out_end = tl.load(topk_indptr_ptr + token_idx + 1)
+    out_len = out_end - out_start
+    if block_idx * BLOCK_SIZE >= out_len:
+        return
+
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+    mask = (offset < out_len) & (offset < topk)
+    local_idx = tl.load(
+        topk_indices_ptr + token_idx * topk_indices_stride + offset,
+        mask=mask,
+        other=-1,
+    )
+    valid = mask & (local_idx >= 0)
+    block_indices = local_idx // block_size
+    block_numbers = tl.load(
+        block_table_ptr + req_idx * block_table_stride + block_indices,
+        mask=valid,
+        other=0,
+    )
+    block_offsets = local_idx % block_size
+    slot_ids = tl.where(valid, block_numbers * block_size + block_offsets, -1)
+    tl.store(global_topk_ragged_ptr + out_start + offset, slot_ids, mask=mask)
+
+
+def compute_global_topk_ragged_indices_and_indptr(
+    topk_indices: torch.Tensor,
+    token_to_req_indices: torch.Tensor,
+    block_table: torch.Tensor,
+    block_size: int,
+    is_valid_token: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    topk_indices = topk_indices.reshape(topk_indices.shape[0], -1).contiguous()
+    num_tokens = topk_indices.shape[0]
+    topk = topk_indices.shape[1]
+
+    topk_lens = torch.empty(num_tokens, dtype=torch.int32, device=topk_indices.device)
+    _compute_topk_lens_kernel[(num_tokens,)](
+        topk_lens,
+        topk_indices,
+        topk_indices.stride(0),
+        topk,
+        is_valid_token,
+        TRITON_BLOCK_SIZE=1024,
+    )
+
+    topk_indptr = _build_indptr_from_lengths(topk_lens)
+    global_topk_ragged = torch.empty(
+        num_tokens * topk,
+        dtype=torch.int32,
+        device=topk_indices.device,
+    )
+    if global_topk_ragged.numel() > 0:
+        block = 128
+        _pack_global_topk_ragged_kernel[(num_tokens, triton.cdiv(topk, block))](
+            global_topk_ragged,
+            topk_indptr,
+            topk_indices,
+            topk_indices.stride(0),
+            token_to_req_indices,
+            block_table,
+            block_table.stride(0),
+            block_size,
+            topk,
+            BLOCK_SIZE=block,
+        )
+    return global_topk_ragged, topk_indptr, topk_lens
+
+
+@triton.jit
+def _compute_combined_lens_kernel(
+    combined_lens_ptr,
+    query_start_loc_ptr,
+    seq_lens_ptr,
+    TOP_K: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    WINDOW_SIZE: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+    worker_id = tl.program_id(1)
+    num_workers = tl.num_programs(1)
+
+    base = tl.load(query_start_loc_ptr)
+    query_start = tl.load(query_start_loc_ptr + batch_idx) - base
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1) - base
+    query_len = query_end - query_start
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    start_pos = seq_len - query_len
+
+    for token_idx in range(query_start + worker_id, query_end, num_workers):
+        token_idx_in_query = token_idx - query_start
+        pos = start_pos + token_idx_in_query
+        topk_len = tl.minimum((pos + 1) // COMPRESS_RATIO, TOP_K)
+        swa_len = tl.minimum(pos + 1, WINDOW_SIZE)
+        tl.store(combined_lens_ptr + token_idx, topk_len + swa_len)
+
+
+@triton.jit
+def _combine_topk_swa_indices_ragged_kernel(
+    combined_ragged_ptr,
+    combined_indptr_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    query_start_loc_ptr,
+    seq_lens_ptr,
+    gather_lens_ptr,
+    M,
+    N,
+    topk_width,
+    TOP_K: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    WINDOW_SIZE: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+    worker_id = tl.program_id(1)
+    block_idx = tl.program_id(2)
+    num_workers = tl.num_programs(1)
+
+    base = tl.load(query_start_loc_ptr)
+    query_start = tl.load(query_start_loc_ptr + batch_idx) - base
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1) - base
+    query_len = query_end - query_start
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    gather_len = tl.load(gather_lens_ptr + batch_idx)
+    start_pos = seq_len - query_len
+    gather_start = seq_len - gather_len
+
+    for token_idx in range(query_start + worker_id, query_end, num_workers):
+        token_idx_in_query = token_idx - query_start
+        pos = start_pos + token_idx_in_query
+        topk_len = tl.minimum((pos + 1) // COMPRESS_RATIO, TOP_K)
+        swa_len = tl.minimum(pos + 1, WINDOW_SIZE)
+        combined_len = topk_len + swa_len
+
+        offset = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+        if block_idx * BLOCK_SIZE < combined_len:
+            out_start = tl.load(combined_indptr_ptr + token_idx)
+            topk_mask = (offset < topk_len) & (offset < topk_width)
+            topk_vals = tl.load(
+                topk_indices_ptr + token_idx * topk_indices_stride + offset,
+                mask=topk_mask,
+                other=-1,
+            )
+            tl.store(
+                combined_ragged_ptr + out_start + offset,
+                topk_vals + M * batch_idx,
+                mask=topk_mask,
+            )
+
+            swa_offset = offset - topk_len
+            swa_mask = (offset >= topk_len) & (swa_offset < swa_len)
+            tl.store(
+                combined_ragged_ptr + out_start + offset,
+                M * batch_idx + N + swa_offset + pos - swa_len + 1 - gather_start,
+                mask=swa_mask,
+            )
+
+
+def combine_topk_swa_indices_ragged(
+    topk_indices: torch.Tensor,
+    query_start_loc: torch.Tensor,
+    seq_lens: torch.Tensor,
+    gather_lens: torch.Tensor,
+    window_size: int,
+    compress_ratio: int,
+    topk: int,
+    M: int,
+    N: int,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    topk_indices = topk_indices.reshape(topk_indices.shape[0], -1).contiguous()
+    num_tokens = topk_indices.shape[0]
+    num_reqs = seq_lens.shape[0]
+    combined_lens = torch.empty(
+        num_tokens, dtype=torch.int32, device=topk_indices.device
+    )
+
+    num_workers = 128
+    _compute_combined_lens_kernel[(num_reqs, num_workers)](
+        combined_lens,
+        query_start_loc,
+        seq_lens,
+        TOP_K=topk,
+        COMPRESS_RATIO=compress_ratio,
+        WINDOW_SIZE=window_size,
+    )
+
+    combined_indptr = _build_indptr_from_lengths(combined_lens)
+    combined_ragged = torch.empty(
+        num_tokens * (topk + window_size),
+        dtype=torch.int32,
+        device=topk_indices.device,
+    )
+    if combined_ragged.numel() > 0:
+        block = 128
+        _combine_topk_swa_indices_ragged_kernel[
+            (num_reqs, num_workers, triton.cdiv(topk + window_size, block))
+        ](
+            combined_ragged,
+            combined_indptr,
+            topk_indices,
+            topk_indices.stride(0),
+            query_start_loc,
+            seq_lens,
+            gather_lens,
+            M,
+            N,
+            topk_indices.shape[-1],
+            TOP_K=topk,
+            COMPRESS_RATIO=compress_ratio,
+            WINDOW_SIZE=window_size,
+            BLOCK_SIZE=block,
+        )
+    return combined_ragged, combined_indptr, combined_lens
+
+
+def _copy_ragged_to_graph_buffers(
+    ragged_indices: torch.Tensor,
+    ragged_indptr: torch.Tensor,
+    ragged_indices_buffer: torch.Tensor,
+    ragged_indptr_buffer: torch.Tensor,
+    num_rows: int,
+    max_entries_per_row: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Copy dynamic ragged metadata into persistent CUDA graph buffers.
+
+    FULL decode graphs capture kernel argument addresses. Keep the returned
+    tensors backed by stable storage, while indptr continues to bound reads.
+    """
+    indptr_out = ragged_indptr_buffer[: num_rows + 1]
+    indptr_out.copy_(ragged_indptr, non_blocking=True)
+
+    max_entries = max(num_rows * max_entries_per_row, 1)
+    ragged_out = ragged_indices_buffer[:max_entries]
+    nnz = ragged_indices.numel()
+    if nnz > 0:
+        ragged_out[:nnz].copy_(ragged_indices, non_blocking=True)
+    return ragged_out, indptr_out
+
+
+@dataclass
+class DeepseekV4ROCMAiterMLASparseMetadata(FlashMLASparseMetadata):
+    """ROCm-specific DeepSeek V4 metadata carrying ragged decode topk."""
+
+    c128a_decode_topk_ragged_indices: torch.Tensor | None = None
+    c128a_decode_topk_ragged_indptr: torch.Tensor | None = None
+
+
+@dataclass
+class DeepseekV4ROCMAiterSparseSWAMetadata(DeepseekSparseSWAMetadata):
+    decode_swa_ragged_indices: torch.Tensor | None = None
+    decode_swa_ragged_indptr: torch.Tensor | None = None
+
+
+class DeepseekV4ROCMAiterMLASparseMetadataBuilder(FlashMLASparseMetadataBuilder):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.c128a_decode_topk_ragged_indices_buffer: torch.Tensor | None = None
+        self.c128a_decode_topk_ragged_indptr_buffer: torch.Tensor | None = None
+        if self.is_deepseek_v4 and self.compress_ratio == 128:
+            max_tokens = self.vllm_config.scheduler_config.max_num_batched_tokens
+            self.c128a_decode_topk_ragged_indices_buffer = torch.empty(
+                max_tokens * self.c128a_max_compressed,
+                dtype=torch.int32,
+                device=self.device,
+            )
+            self.c128a_decode_topk_ragged_indptr_buffer = torch.empty(
+                max_tokens + 1,
+                dtype=torch.int32,
+                device=self.device,
+            )
+
+    def build(
+        self,
+        common_prefix_len: int,
+        common_attn_metadata: CommonAttentionMetadata,
+        fast_build: bool = False,
+    ) -> DeepseekV4ROCMAiterMLASparseMetadata:
+        base = super().build(
+            common_prefix_len=common_prefix_len,
+            common_attn_metadata=common_attn_metadata,
+            fast_build=fast_build,
+        )
+
+        ragged_indices = None
+        ragged_indptr = None
+        dense_decode = base.c128a_global_decode_topk_indices
+        decode_lens = base.c128a_decode_topk_lens
+        if dense_decode is not None and decode_lens is not None:
+            ragged_indices, ragged_indptr = build_ragged_indices_from_dense(
+                dense_decode.reshape(dense_decode.shape[0], -1),
+                decode_lens,
+            )
+            assert self.c128a_decode_topk_ragged_indices_buffer is not None
+            assert self.c128a_decode_topk_ragged_indptr_buffer is not None
+            ragged_indices, ragged_indptr = _copy_ragged_to_graph_buffers(
+                ragged_indices,
+                ragged_indptr,
+                self.c128a_decode_topk_ragged_indices_buffer,
+                self.c128a_decode_topk_ragged_indptr_buffer,
+                dense_decode.shape[0],
+                self.c128a_max_compressed,
+            )
+
+        return DeepseekV4ROCMAiterMLASparseMetadata(
+            **vars(base),
+            c128a_decode_topk_ragged_indices=ragged_indices,
+            c128a_decode_topk_ragged_indptr=ragged_indptr,
+        )
+
+
+class DeepseekV4ROCMAiterSparseSWAMetadataBuilder(DeepseekSparseSWAMetadataBuilder):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        max_tokens = self.vllm_config.scheduler_config.max_num_batched_tokens
+        self.decode_swa_ragged_indices_buffer = torch.empty(
+            max_tokens * self.window_size,
+            dtype=torch.int32,
+            device=self.device,
+        )
+        self.decode_swa_ragged_indptr_buffer = torch.empty(
+            max_tokens + 1,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+    def build(
+        self,
+        common_prefix_len: int,
+        common_attn_metadata: CommonAttentionMetadata,
+        fast_build: bool = False,
+    ) -> DeepseekV4ROCMAiterSparseSWAMetadata:
+        base = super().build(
+            common_prefix_len=common_prefix_len,
+            common_attn_metadata=common_attn_metadata,
+            fast_build=fast_build,
+        )
+
+        ragged_indices = None
+        ragged_indptr = None
+        if (
+            base.num_decode_tokens > 0
+            and base.decode_swa_indices is not None
+            and base.decode_swa_lens is not None
+        ):
+            ragged_indices, ragged_indptr = build_ragged_indices_from_dense(
+                base.decode_swa_indices.reshape(base.num_decode_tokens, -1),
+                base.decode_swa_lens,
+            )
+            ragged_indices, ragged_indptr = _copy_ragged_to_graph_buffers(
+                ragged_indices,
+                ragged_indptr,
+                self.decode_swa_ragged_indices_buffer,
+                self.decode_swa_ragged_indptr_buffer,
+                base.num_decode_tokens,
+                self.window_size,
+            )
+
+        return DeepseekV4ROCMAiterSparseSWAMetadata(
+            **vars(base),
+            decode_swa_ragged_indices=ragged_indices,
+            decode_swa_ragged_indptr=ragged_indptr,
+        )
+
+
+class DeepseekV4ROCMAiterMLASparseBackend(DeepseekV4FlashMLASparseBackend):
+    @staticmethod
+    def get_name() -> str:
+        return "ROCM_V4_FLASHMLA_SPARSE"
+
+    @staticmethod
+    def get_builder_cls() -> type["DeepseekV4ROCMAiterMLASparseMetadataBuilder"]:
+        return DeepseekV4ROCMAiterMLASparseMetadataBuilder
+
+    @staticmethod
+    def get_impl_cls() -> type["DeepseekV4SparseMLAAttentionImpl"]:
+        return DeepseekV4ROCMAiterMLASparseImpl
+
+
+class DeepseekV4ROCMAiterMLASparseImpl(DeepseekV4SparseMLAAttentionImpl):
+    """ROCm sparse MLA implementation used by DeepSeek V4's custom MLA layer."""
+
+    backend_cls = DeepseekV4ROCMAiterMLASparseBackend
+
+    @classmethod
+    def forward_mqa(  # type: ignore[override]
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        kv: torch.Tensor,
+        positions: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        assert output.shape == q.shape, (
+            f"output buffer shape {output.shape} must match q shape {q.shape}"
+        )
+        assert output.dtype == q.dtype, (
+            f"output buffer dtype {output.dtype} must match q dtype {q.dtype}"
+        )
+
+        forward_context = get_forward_context()
+        attn_metadata = forward_context.attn_metadata
+
+        if attn_metadata is None:
+            # Warmup dummy run: no real metadata. Reserve the same bf16
+            # gather workspace _forward_prefill would; the dequantize / topk
+            # / sparse_fwd kernels are skipped this step.
+            swa_only = layer.compress_ratio <= 1
+            N = (
+                0
+                if swa_only
+                else (layer.max_model_len + layer.compress_ratio - 1)
+                // layer.compress_ratio
+            )
+            M = N + layer.window_size + layer.max_num_batched_tokens
+            current_workspace_manager().get_simultaneous(
+                ((cls.PREFILL_CHUNK_SIZE, M, q.shape[-1]), torch.bfloat16),
+            )
+            output.zero_()
+            return
+
+        assert isinstance(attn_metadata, dict)
+        rocm_metadata = cast(
+            DeepseekV4ROCMAiterMLASparseMetadata | None,
+            attn_metadata.get(layer.prefix),
+        )
+        swa_metadata = cast(
+            DeepseekV4ROCMAiterSparseSWAMetadata | None,
+            attn_metadata.get(layer.swa_cache_layer.prefix),
+        )
+        assert swa_metadata is not None
+
+        swa_only = layer.compress_ratio <= 1
+        self_kv_cache = layer.kv_cache if not swa_only else None
+        swa_kv_cache = layer.swa_cache_layer.kv_cache
+
+        num_decodes = swa_metadata.num_decodes
+        num_prefills = swa_metadata.num_prefills
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        if num_prefills > 0:
+            cls._forward_prefill(
+                layer=layer,
+                q=q[num_decode_tokens:],
+                positions=positions[num_decode_tokens:],
+                compressed_k_cache=self_kv_cache,
+                swa_k_cache=swa_kv_cache,
+                output=output[num_decode_tokens:],
+                attn_metadata=rocm_metadata,
+                swa_metadata=swa_metadata,
+            )
+        if num_decodes > 0:
+            cls._forward_decode(
+                layer=layer,
+                q=q[:num_decode_tokens],
+                kv_cache=self_kv_cache,
+                swa_metadata=swa_metadata,
+                attn_metadata=rocm_metadata,
+                swa_only=swa_only,
+                output=output[:num_decode_tokens],
+            )
+
+    @classmethod
+    def _forward_decode(
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        kv_cache: torch.Tensor | None,
+        swa_metadata: DeepseekV4ROCMAiterSparseSWAMetadata,
+        attn_metadata: DeepseekV4ROCMAiterMLASparseMetadata | None,
+        swa_only: bool,
+        output: torch.Tensor,
+    ) -> None:
+        num_decodes = swa_metadata.num_decodes
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        topk_indices = None
+        topk_lens = None
+        topk_ragged_indices = None
+        topk_ragged_indptr = None
+        if not swa_only:
+            assert attn_metadata is not None
+            assert swa_metadata.is_valid_token is not None
+            block_size = attn_metadata.block_size // layer.compress_ratio
+            is_valid = swa_metadata.is_valid_token[:num_decode_tokens]
+            if layer.compress_ratio == 4:
+                assert layer.topk_indices_buffer is not None
+                (
+                    topk_ragged_indices,
+                    topk_ragged_indptr,
+                    topk_lens,
+                ) = compute_global_topk_ragged_indices_and_indptr(
+                    layer.topk_indices_buffer[:num_decode_tokens],
+                    swa_metadata.token_to_req_indices,
+                    attn_metadata.block_table[:num_decodes],
+                    block_size,
+                    is_valid,
+                )
+            else:
+                topk_indices = attn_metadata.c128a_global_decode_topk_indices
+                topk_lens = attn_metadata.c128a_decode_topk_lens
+                topk_ragged_indices = attn_metadata.c128a_decode_topk_ragged_indices
+                topk_ragged_indptr = attn_metadata.c128a_decode_topk_ragged_indptr
+
+        rocm_sparse_attn_decode(
+            q=q,
+            kv_cache=kv_cache,
+            swa_k_cache=layer.swa_cache_layer.kv_cache,
+            swa_only=swa_only,
+            topk_indices=topk_indices,
+            topk_lens=topk_lens,
+            swa_indices=swa_metadata.decode_swa_indices,
+            swa_lens=swa_metadata.decode_swa_lens,
+            swa_ragged_indices=swa_metadata.decode_swa_ragged_indices,
+            swa_ragged_indptr=swa_metadata.decode_swa_ragged_indptr,
+            topk_ragged_indices=topk_ragged_indices,
+            topk_ragged_indptr=topk_ragged_indptr,
+            attn_sink=layer.attn_sink,
+            scale=layer.scale,
+            head_dim=layer.head_dim,
+            nope_head_dim=layer.nope_head_dim,
+            rope_head_dim=layer.rope_head_dim,
+            output=output,
+        )
+
+    @classmethod
+    def _forward_prefill(
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        positions: torch.Tensor,
+        compressed_k_cache: torch.Tensor | None,
+        swa_k_cache: torch.Tensor,
+        output: torch.Tensor,
+        attn_metadata: DeepseekV4ROCMAiterMLASparseMetadata | None,
+        swa_metadata: DeepseekV4ROCMAiterSparseSWAMetadata,
+    ) -> None:
+        swa_only = attn_metadata is None
+
+        num_prefills = swa_metadata.num_prefills
+        num_prefill_tokens = swa_metadata.num_prefill_tokens
+        num_decodes = swa_metadata.num_decodes
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        seq_lens = swa_metadata.prefill_seq_lens
+        gather_lens = swa_metadata.prefill_gather_lens
+        assert seq_lens is not None
+        assert gather_lens is not None
+
+        query_start_loc_cpu = swa_metadata.query_start_loc_cpu
+        query_start_loc = swa_metadata.query_start_loc
+        assert query_start_loc_cpu is not None
+        assert query_start_loc is not None
+        prefill_token_base = query_start_loc_cpu[num_decodes]
+
+        if not swa_only:
+            if layer.compress_ratio == 4:
+                assert layer.topk_indices_buffer is not None
+                topk_indices = layer.topk_indices_buffer[num_decode_tokens:]
+                topk_indices = topk_indices[:num_prefill_tokens]
+            else:
+                assert attn_metadata is not None
+                topk_indices = attn_metadata.c128a_prefill_topk_indices
+            assert topk_indices is not None
+            top_k = topk_indices.shape[-1]
+            N = (layer.max_model_len + layer.compress_ratio - 1) // layer.compress_ratio
+        else:
+            assert layer.topk_indices_buffer is not None
+            topk_indices = layer.topk_indices_buffer[num_decode_tokens:]
+            top_k = 0
+            N = 0
+
+        M = N + layer.window_size + layer.max_num_batched_tokens
+        num_chunks = (num_prefills + cls.PREFILL_CHUNK_SIZE - 1) // (
+            cls.PREFILL_CHUNK_SIZE
+        )
+
+        workspace_manager = current_workspace_manager()
+        kv = workspace_manager.get_simultaneous(
+            ((cls.PREFILL_CHUNK_SIZE, M, q.shape[-1]), torch.bfloat16),
+        )[0]
+        for chunk_idx in range(num_chunks):
+            chunk_start = chunk_idx * cls.PREFILL_CHUNK_SIZE
+            chunk_end = min(chunk_start + cls.PREFILL_CHUNK_SIZE, num_prefills)
+            chunk_size = chunk_end - chunk_start
+            if not swa_only:
+                assert attn_metadata is not None
+                assert compressed_k_cache is not None
+                block_table = attn_metadata.block_table[num_decodes:]
+                dequantize_and_gather_k_cache(
+                    kv[:chunk_size],
+                    compressed_k_cache,
+                    seq_lens=seq_lens[chunk_start:chunk_end] // layer.compress_ratio,
+                    gather_lens=None,
+                    block_table=block_table[chunk_start:chunk_end],
+                    block_size=attn_metadata.block_size // layer.compress_ratio,
+                    offset=0,
+                )
+
+            swa_block_table = swa_metadata.block_table[num_decodes:]
+            dequantize_and_gather_k_cache(
+                kv[:chunk_size],
+                swa_k_cache,
+                seq_lens=seq_lens[chunk_start:chunk_end],
+                gather_lens=gather_lens[chunk_start:chunk_end],
+                block_table=swa_block_table[chunk_start:chunk_end],
+                block_size=swa_metadata.block_size,
+                offset=N,
+            )
+
+            query_start = (
+                query_start_loc_cpu[num_decodes + chunk_start] - prefill_token_base
+            )
+            query_end = (
+                query_start_loc_cpu[num_decodes + chunk_end] - prefill_token_base
+            )
+
+            combined_indices, combined_lens = combine_topk_swa_indices(
+                topk_indices[query_start:query_end],
+                query_start_loc[
+                    num_decodes + chunk_start : num_decodes + chunk_end + 1
+                ],
+                seq_lens[chunk_start:chunk_end],
+                gather_lens[chunk_start:chunk_end],
+                layer.window_size,
+                layer.compress_ratio,
+                top_k,
+                M,
+                N,
+            )
+            rocm_sparse_attn_prefill(
+                q=q[query_start:query_end],
+                kv=kv.view(-1, 1, q.shape[-1]),
+                indices=combined_indices,
+                topk_length=combined_lens,
+                scale=layer.scale,
+                head_dim=layer.head_dim,
+                nope_head_dim=layer.nope_head_dim,
+                rope_head_dim=layer.rope_head_dim,
+                attn_sink=layer.attn_sink,
+                output=output[query_start:query_end],
+            )
diff --git a/vllm/models/deepseek_v4/common/__init__.py b/vllm/models/deepseek_v4/common/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/models/deepseek_v4/common/ops/__init__.py b/vllm/models/deepseek_v4/common/ops/__init__.py
new file mode 100644
index 000000000000..959a79f292a5
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/__init__.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from .cache_utils import (
+    combine_topk_swa_indices,
+    compute_global_topk_indices_and_lens,
+    dequantize_and_gather_k_cache,
+    quantize_and_insert_k_cache,
+)
+from .fused_indexer_q import MXFP4_BLOCK_SIZE, fused_indexer_q_rope_quant
+from .fused_inv_rope_fp8_quant import fused_inv_rope_fp8_quant
+from .fused_qk_rmsnorm import fused_q_kv_rmsnorm
+
+__all__ = [
+    "MXFP4_BLOCK_SIZE",
+    "combine_topk_swa_indices",
+    "compute_global_topk_indices_and_lens",
+    "dequantize_and_gather_k_cache",
+    "fused_indexer_q_rope_quant",
+    "fused_inv_rope_fp8_quant",
+    "fused_q_kv_rmsnorm",
+    "quantize_and_insert_k_cache",
+]
diff --git a/vllm/models/deepseek_v4/common/ops/cache_utils.py b/vllm/models/deepseek_v4/common/ops/cache_utils.py
new file mode 100644
index 000000000000..ac66751e3111
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/cache_utils.py
@@ -0,0 +1,594 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Triton kernels for DeepseekV4 paged K-cache management and sparse-attention index
+preparation.
+
+- quantize_and_insert_k_cache: quantize bf16 K to UE8M0 FP8 and insert into
+  the paged cache.
+- dequantize_and_gather_k_cache: gather and dequantize FP8 K from the paged
+  cache for sparse/SWA prefill.
+- compute_global_topk_indices_and_lens: map local topk indices to global KV
+  cache slots and count valid entries.
+- combine_topk_swa_indices: concatenate topk compressed indices with SWA
+  window indices for sparse prefill.
+"""
+
+import torch
+
+from vllm.triton_utils import tl, triton
+from vllm.utils.import_utils import has_cutedsl
+
+
+@triton.jit
+def quantize_and_insert_k_kernel(
+    # Input tensors
+    k_ptr,  # [num_tokens, 512] bf16
+    slot_mapping_ptr,  # [num_tokens] int64
+    # Output tensor
+    k_cache_ptr,  # [num_blocks, block_bytes] as uint8 (flattened view)
+    # Dimensions
+    num_tokens,
+    input_dim: tl.constexpr,  # 512
+    fp8_dim: tl.constexpr,  # 448
+    bf16_dim: tl.constexpr,  # 64
+    scale_dim: tl.constexpr,  # 8
+    quant_block: tl.constexpr,  # 64 (quantization block size)
+    cache_block_size: tl.constexpr,  # 64 (paged cache block size)
+    token_data_size: tl.constexpr,  # 576 bytes per token data
+    block_stride: tl.constexpr,  # total bytes per block (padded)
+    fp8_max: tl.constexpr,
+    n_quant_blocks: tl.constexpr,  # 8 (7 real + 1 padding)
+):
+    """
+    Quantize K tensor and insert into paged K cache.
+
+    K Cache block layout (block_size=64 tokens):
+    - [0, 64*576): Token data, each token has 448 fp8 + 128 bf16
+    - [64*576, 64*576 + 64*8): Scales, each token has 8 uint8 scales
+    - [64*576 + 64*8, block_stride): Padding
+
+    One program per token.
+    """
+    pid = tl.program_id(0)
+
+    if pid >= num_tokens:
+        return
+
+    # Get slot mapping
+    slot_idx = tl.load(slot_mapping_ptr + pid)
+    if slot_idx == -1:
+        return
+
+    block_idx = slot_idx // cache_block_size
+    pos_in_block = slot_idx % cache_block_size
+
+    # Input pointer for this token
+    input_row_ptr = k_ptr + pid * input_dim
+
+    # int64: block_idx * block_stride can exceed 2^31 with many KV-cache blocks
+    # (e.g. >= 57K at block_stride ~37K). Matches gather path below.
+    cache_block_ptr = k_cache_ptr + block_idx.to(tl.int64) * block_stride
+
+    # Token data pointer: token data is stored contiguously at start of block
+    # Each token's data is at offset pos_in_block * token_data_size
+    token_data_ptr = cache_block_ptr + pos_in_block * token_data_size
+
+    # Scale pointer: scales are stored after ALL token data in the block
+    # Scale for this token is at offset (64 * 576) + pos_in_block * 8
+    token_scale_ptr = (
+        cache_block_ptr + cache_block_size * token_data_size + pos_in_block * scale_dim
+    )
+
+    # Token data layout: [0:448] fp8, [448:576] bf16
+    token_fp8_ptr = token_data_ptr
+    token_bf16_ptr = token_data_ptr + fp8_dim
+
+    # ========== Quantize and store FP8 portion (first 448 elements) ==========
+    # Using UE8M0 quantization strategy (scale is power of 2, stored as uint8 exponent)
+    for qblock_idx in tl.static_range(n_quant_blocks):
+        qblock_start = qblock_idx * quant_block
+
+        if qblock_start < fp8_dim:
+            offsets = qblock_start + tl.arange(0, quant_block)
+            mask = offsets < fp8_dim
+
+            # Load bf16 input
+            x = tl.load(input_row_ptr + offsets, mask=mask, other=0.0)
+
+            # Compute absmax scale (same as CUDA kernel)
+            abs_x = tl.abs(x)
+            block_max = tl.max(abs_x, axis=0)
+            block_max = tl.maximum(block_max, 1e-4)  # Match CUDA: fmaxf(amax, 1e-4)
+
+            # UE8M0: Round scale UP to next power of 2
+            # scale = 2^ceil(log2(block_max / fp8_max))
+            raw_scale = block_max / fp8_max
+            log_scale = tl.log2(raw_scale)
+            exponent = tl.ceil(log_scale)  # Round UP to next integer exponent
+            scale = tl.exp2(exponent)  # scale = 2^exponent (power of 2)
+
+            # Quantize to fp8: fp8_value = bf16_value / scale
+            x_scaled = x / scale
+            x_clamped = tl.clamp(x_scaled, -fp8_max, fp8_max)
+
+            # Convert to fp8, then bitcast to uint8 for storage
+            x_fp8 = x_clamped.to(tl.float8e4nv)
+            x_uint8 = x_fp8.to(tl.uint8, bitcast=True)
+
+            # Store as uint8 (1 byte each)
+            tl.store(token_fp8_ptr + offsets, x_uint8, mask=mask)
+
+            # UE8M0 scale encoding: stored_value = exponent + 127 (bias)
+            # During dequant: scale = 2^(stored_value - 127)
+            encoded_scale = exponent + 127.0
+            encoded_scale = tl.maximum(tl.minimum(encoded_scale, 255.0), 0.0)
+            tl.store(token_scale_ptr + qblock_idx, encoded_scale.to(tl.uint8))
+
+    # Padding scale at index 7
+    tl.store(token_scale_ptr + 7, tl.zeros((), dtype=tl.uint8))
+
+    # ========== Store BF16 portion (last 64 elements, no quantization) ==========
+    bf16_input_offset = fp8_dim
+
+    # Process bf16 in chunks of 16
+    bf16_out_ptr = token_bf16_ptr.to(tl.pointer_type(tl.bfloat16))
+    for i in tl.static_range(bf16_dim // 16):
+        chunk_offsets = i * 16 + tl.arange(0, 16)
+        bf16_vals = tl.load(input_row_ptr + bf16_input_offset + chunk_offsets)
+        tl.store(bf16_out_ptr + chunk_offsets, bf16_vals)
+
+
+def quantize_and_insert_k_cache(
+    k: torch.Tensor,  # [num_tokens, 512] bf16
+    k_cache: torch.Tensor,  # [num_blocks, block_bytes] uint8
+    slot_mapping: torch.Tensor,  # [num_tokens] int64
+    block_size: int = 64,
+    is_ue8m0: bool = True,
+):
+    """
+    Quantize K tensor and insert into paged K cache.
+
+    K Cache block layout (block_size=64 tokens):
+    - First 64 * 576 = 36864 bytes: Token data
+      - Each token: 448 bytes (fp8) + 128 bytes (bf16)
+    - Next 64 * 8 = 512 bytes: Scales
+      - Each token: 8 bytes (uint8 scales, 7 real + 1 padding)
+    - Padded to multiple of 576
+    """
+    assert k.dim() == 2 and k.shape[1] == 512, (
+        f"K must be [num_tokens, 512], got {k.shape}"
+    )
+    assert k.dtype == torch.bfloat16, f"K must be bf16, got {k.dtype}"
+    assert is_ue8m0, "Only support ue8m0 quantization."
+
+    # NOTE: When using DP, slot_mapping.shape[0] can be less than k.shape[0] due to
+    # padding. Always use slot_mapping.shape[0] as the token count.
+    num_tokens = slot_mapping.shape[0]
+    block_stride = k_cache.stride(0)  # bytes per block
+
+    TOKEN_FP8_DIM = 448
+    TOKEN_BF16_DIM = 64
+    TOKEN_SCALE_DIM = 8
+    QUANT_BLOCK_SIZE = 64
+    FP8_MAX = 448.0
+    TOKEN_DATA_SIZE = TOKEN_FP8_DIM + TOKEN_BF16_DIM * 2
+
+    grid = (num_tokens,)
+
+    quantize_and_insert_k_kernel[grid](
+        k,
+        slot_mapping,
+        k_cache,
+        num_tokens,
+        input_dim=512,
+        fp8_dim=TOKEN_FP8_DIM,
+        bf16_dim=TOKEN_BF16_DIM,
+        scale_dim=TOKEN_SCALE_DIM,
+        quant_block=QUANT_BLOCK_SIZE,
+        cache_block_size=block_size,
+        token_data_size=TOKEN_DATA_SIZE,
+        block_stride=block_stride,
+        fp8_max=FP8_MAX,
+        n_quant_blocks=8,
+    )
+
+
+@triton.jit
+def _dequantize_and_gather_k_kernel(
+    out_ptr,
+    out_stride0,
+    out_stride1,
+    k_cache_ptr,
+    seq_lens_ptr,
+    block_table_ptr,
+    offset,
+    gather_lens_ptr,
+    # Constants
+    max_blocks_per_seq: tl.constexpr,
+    fp8_dim: tl.constexpr,  # 448
+    bf16_dim: tl.constexpr,  # 64
+    scale_dim: tl.constexpr,  # 8
+    quant_block: tl.constexpr,  # 64 (quantization block size)
+    cache_block_size: tl.constexpr,  # 64 or 128 (paged cache block size)
+    token_data_size: tl.constexpr,  # 576 bytes per token data
+    block_stride: tl.constexpr,  # total bytes per block (padded) int32
+    output_dim: tl.constexpr,  # 512
+    fp8_max: tl.constexpr,
+    n_quant_blocks: tl.constexpr,  # 7 real blocks
+):
+    batch_idx = tl.program_id(0)
+    worker_id = tl.program_id(1)
+    num_workers = tl.num_programs(1)
+
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    if gather_lens_ptr is not None:  # noqa: SIM108
+        gather_len = tl.load(gather_lens_ptr + batch_idx)
+    else:
+        # Gather all tokens
+        gather_len = seq_len
+    start_pos = seq_len - gather_len
+
+    for i in range(worker_id, gather_len, num_workers):
+        # Calculate the actual token index in the sequence
+        pos = start_pos + i
+
+        # Calculate which block and position within block
+        block_in_seq = pos // cache_block_size
+        pos_in_block = pos % cache_block_size
+
+        # Get physical block index from block table
+        block_table_row_ptr = block_table_ptr + batch_idx * max_blocks_per_seq
+        physical_block_idx = tl.load(block_table_row_ptr + block_in_seq)  # int32
+
+        # int64: physical_block_idx * block_stride can exceed 2^31 with many
+        # KV-cache blocks (e.g. >= 57K at block_stride ~37K).
+        cache_block_ptr = k_cache_ptr + physical_block_idx.to(tl.int64) * block_stride
+
+        # Token data pointer
+        token_data_ptr = cache_block_ptr + pos_in_block * token_data_size
+
+        # Scale pointer: after all token data
+        token_scale_ptr = (
+            cache_block_ptr
+            + cache_block_size * token_data_size
+            + pos_in_block * scale_dim
+        )
+
+        # Token data layout: [0:448] fp8, [448:576] bf16
+        token_fp8_ptr = token_data_ptr
+        token_bf16_ptr = token_data_ptr + fp8_dim
+
+        # Output pointer for this token (flattened)
+        output_row_ptr = out_ptr + batch_idx * out_stride0 + (offset + i) * out_stride1
+
+        # ========== Dequantize FP8 portion using UE8M0 ==========
+        for qblock_idx in tl.static_range(n_quant_blocks):
+            qblock_start = qblock_idx * quant_block
+
+            if qblock_start < fp8_dim:
+                offsets = qblock_start + tl.arange(0, quant_block)
+                mask = offsets < fp8_dim
+
+                # Load quantized fp8 values (stored as uint8)
+                x_uint8 = tl.load(token_fp8_ptr + offsets, mask=mask, other=0)
+
+                # Bitcast uint8 back to fp8
+                x_fp8 = x_uint8.to(tl.float8e4nv, bitcast=True)
+
+                # Convert fp8 to float32 for computation
+                x_float = x_fp8.to(tl.float32)
+
+                # Load and decode UE8M0 scale
+                # UE8M0: scale = 2^(stored_value - 127)
+                encoded_scale = tl.load(token_scale_ptr + qblock_idx)
+                exponent = encoded_scale.to(tl.float32) - 127.0
+                scale = tl.exp2(exponent)
+
+                # Dequantize: bf16_value = fp8_value * scale
+                x_dequant = x_float * scale
+
+                # Store as bf16
+                tl.store(output_row_ptr + offsets, x_dequant.to(tl.bfloat16), mask=mask)
+
+        # ========== Copy BF16 portion directly ==========
+        bf16_output_offset = fp8_dim  # After 448 elements in output
+
+        # Read bf16 from cache
+        bf16_cache_ptr = token_bf16_ptr.to(tl.pointer_type(tl.bfloat16))
+
+        # Process in chunks of 16
+        for j in tl.static_range(bf16_dim // 16):
+            chunk_offsets = j * 16 + tl.arange(0, 16)
+            bf16_vals = tl.load(bf16_cache_ptr + chunk_offsets)
+            tl.store(output_row_ptr + bf16_output_offset + chunk_offsets, bf16_vals)
+
+
+def dequantize_and_gather_k_cache_triton(
+    # [num_reqs, max_num_tokens, head_size]
+    out: torch.Tensor,
+    # [num_blocks, block_size, head_bytes]
+    k_cache: torch.Tensor,
+    # [num_reqs]
+    seq_lens: torch.Tensor,
+    # [num_reqs]
+    gather_lens: torch.Tensor | None,
+    # [num_reqs, max_blocks_per_seq]
+    block_table: torch.Tensor,
+    block_size: int,
+    offset: int,
+) -> None:
+    TOKEN_FP8_DIM = 448
+    TOKEN_BF16_DIM = 64
+    TOKEN_SCALE_DIM = 8
+    QUANT_BLOCK_SIZE = 64
+    FP8_MAX = 448.0
+    TOKEN_DATA_SIZE = TOKEN_FP8_DIM + TOKEN_BF16_DIM * 2
+
+    num_reqs = seq_lens.shape[0]
+    NUM_WORKERS = 128
+    _dequantize_and_gather_k_kernel[(num_reqs, NUM_WORKERS)](
+        out,
+        out.stride(0),
+        out.stride(1),
+        k_cache,
+        seq_lens,
+        block_table,
+        offset,
+        gather_lens,
+        max_blocks_per_seq=block_table.shape[-1],
+        fp8_dim=TOKEN_FP8_DIM,
+        bf16_dim=TOKEN_BF16_DIM,
+        scale_dim=TOKEN_SCALE_DIM,
+        quant_block=QUANT_BLOCK_SIZE,
+        cache_block_size=block_size,
+        token_data_size=TOKEN_DATA_SIZE,
+        block_stride=k_cache.stride(0),
+        output_dim=512,
+        fp8_max=FP8_MAX,
+        n_quant_blocks=7,
+    )
+
+
+def dequantize_and_gather_k_cache(
+    # [num_reqs, max_num_tokens, head_size]
+    out: torch.Tensor,
+    # [num_blocks, block_size, head_bytes]
+    k_cache: torch.Tensor,
+    # [num_reqs]
+    seq_lens: torch.Tensor,
+    # [num_reqs]
+    gather_lens: torch.Tensor | None,
+    # [num_reqs, max_blocks_per_seq]
+    block_table: torch.Tensor,
+    block_size: int,
+    offset: int,
+) -> None:
+    if has_cutedsl():
+        # lazily import, otherwise some tests fail due to CUDA driver init failure.
+        from vllm.models.deepseek_v4.nvidia.ops.dequant_gather_k_cutedsl import (
+            dequantize_and_gather_k_cache_cutedsl,
+        )
+
+        dequantize_and_gather_k_cache_cutedsl(
+            out, k_cache, seq_lens, gather_lens, block_table, block_size, offset
+        )
+        return
+
+    dequantize_and_gather_k_cache_triton(
+        out, k_cache, seq_lens, gather_lens, block_table, block_size, offset
+    )
+
+
+def compute_global_topk_indices_and_lens(
+    topk_indices: torch.Tensor,
+    token_to_req_indices: torch.Tensor,
+    block_table: torch.Tensor,
+    block_size: int,
+    is_valid_token: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Map local topk indices to global KV cache slots and count valid entries.
+
+    Fuses three operations into a single kernel:
+    1. Block-table lookup (local index → global slot id)
+    2. Valid-entry counting (topk_lens per token)
+    3. Masking padding tokens to length 0
+    """
+    num_tokens = topk_indices.shape[0]
+    global_topk_indices = torch.empty_like(topk_indices)
+    topk_lens = torch.empty(num_tokens, dtype=torch.int32, device=topk_indices.device)
+    _compute_global_topk_indices_and_lens_kernel[(num_tokens,)](
+        global_topk_indices,
+        global_topk_indices.stride(0),
+        topk_lens,
+        topk_indices,
+        topk_indices.stride(0),
+        topk_indices.shape[-1],
+        token_to_req_indices,
+        block_table,
+        block_table.stride(0),
+        block_size,
+        is_valid_token,
+        TRITON_BLOCK_SIZE=1024,
+    )
+    return global_topk_indices, topk_lens
+
+
+@triton.jit
+def _compute_global_topk_indices_and_lens_kernel(
+    global_topk_indices_ptr,
+    global_topk_indices_stride,
+    topk_lens_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    topk,
+    token_to_req_indices_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    is_valid_token_ptr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    is_valid_token = tl.load(is_valid_token_ptr + token_idx)
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+
+    count = tl.zeros((), dtype=tl.int32)
+    for i in range(0, topk, TRITON_BLOCK_SIZE):
+        offset = i + tl.arange(0, TRITON_BLOCK_SIZE)
+        mask = offset < topk
+
+        local_idx = tl.load(
+            topk_indices_ptr + token_idx * topk_indices_stride + offset,
+            mask=mask,
+            other=-1,
+        )
+        is_valid = local_idx >= 0
+
+        block_indices = local_idx // block_size
+        block_numbers = tl.load(
+            block_table_ptr + req_idx * block_table_stride + block_indices,
+            mask=mask & is_valid,
+        )
+        block_offsets = local_idx % block_size
+
+        slot_ids = block_numbers * block_size + block_offsets
+        slot_ids = tl.where(is_valid, slot_ids, -1)
+        tl.store(
+            global_topk_indices_ptr + token_idx * global_topk_indices_stride + offset,
+            slot_ids,
+            mask=mask,
+        )
+        count += tl.sum(is_valid.to(tl.int32), axis=0)
+
+    # Zero out length for padding tokens.
+    tl.store(topk_lens_ptr + token_idx, tl.where(is_valid_token, count, 0))
+
+
+# FlashMLA sparse prefill asserts `params.topk % B_TOPK == 0` (see
+# flashmla/csrc/sm100/prefill/sparse/fwd/head{64,128}/phase1.cuh). B_TOPK is
+# 64 for the h_q=64 kernel and 128 for h_q=128; pad to 128 to satisfy both.
+# The extra slots stay as -1 sentinels and `combined_lens` caps the valid
+# range via `topk_length`, so padding is a no-op at kernel level.
+_SPARSE_PREFILL_TOPK_ALIGNMENT = 128
+
+
+def combine_topk_swa_indices(
+    topk_indices: torch.Tensor,
+    query_start_loc: torch.Tensor,
+    seq_lens: torch.Tensor,
+    gather_lens: torch.Tensor,
+    window_size: int,
+    compress_ratio: int,
+    topk: int,
+    M: int,
+    N: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    num_tokens = topk_indices.shape[0]
+    num_reqs = seq_lens.shape[0]
+    combined_topk = (
+        (topk + window_size + _SPARSE_PREFILL_TOPK_ALIGNMENT - 1)
+        // _SPARSE_PREFILL_TOPK_ALIGNMENT
+        * _SPARSE_PREFILL_TOPK_ALIGNMENT
+    )
+    combined_indices = torch.full(
+        (num_tokens, combined_topk),
+        fill_value=-1,
+        dtype=torch.int32,
+        device=topk_indices.device,
+    )
+    combined_lens = torch.empty(
+        num_tokens, dtype=torch.int32, device=topk_indices.device
+    )
+
+    NUM_WORKERS = 128
+    _combine_topk_swa_indices_kernel[(num_reqs, NUM_WORKERS)](
+        combined_indices,
+        combined_indices.stride(0),
+        combined_lens,
+        topk_indices,
+        topk_indices.stride(0),
+        query_start_loc,
+        seq_lens,
+        gather_lens,
+        M,
+        N,
+        TOP_K=topk,
+        COMPRESS_RATIO=compress_ratio,
+        WINDOW_SIZE=window_size,
+        PADDED_TOP_K=triton.next_power_of_2(topk_indices.shape[-1]),
+    )
+    return combined_indices, combined_lens
+
+
+@triton.jit
+def _combine_topk_swa_indices_kernel(
+    combined_indices_ptr,
+    combined_indices_stride,
+    combined_lens_ptr,
+    topk_indices_ptr,
+    topk_indices_stride,
+    query_start_loc_ptr,
+    seq_lens_ptr,
+    gather_lens_ptr,
+    M,
+    N,
+    TOP_K: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    WINDOW_SIZE: tl.constexpr,
+    PADDED_TOP_K: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+    worker_id = tl.program_id(1)
+    num_workers = tl.num_programs(1)
+
+    # query_start_loc is a global tensor; rebase to chunk-local offsets
+    # by subtracting the chunk's starting value.
+    base = tl.load(query_start_loc_ptr)
+    query_start = tl.load(query_start_loc_ptr + batch_idx) - base
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1) - base
+    query_len = query_end - query_start
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    gather_len = tl.load(gather_lens_ptr + batch_idx)
+    start_pos = seq_len - query_len
+    # The SWA portion of the gathered buffer starts from position
+    # (seq_len - gather_len), not position 0. We need this offset
+    # to correctly index into the gathered buffer.
+    gather_start = seq_len - gather_len
+
+    for token_idx in range(query_start + worker_id, query_end, num_workers):
+        # topk_len is fully determined by the query token's absolute position:
+        # both the C4A indexer and the C128A metadata builder emit
+        # min((pos + 1) // compress_ratio, topk_tokens) valid entries.
+        # Caller passes TOP_K=0 for SWA-only layers to zero this out.
+        token_idx_in_query = token_idx - query_start
+        pos = start_pos + token_idx_in_query
+        topk_len = tl.minimum((pos + 1) // COMPRESS_RATIO, TOP_K)
+        swa_len = tl.minimum(pos + 1, WINDOW_SIZE)
+
+        offset = tl.arange(0, PADDED_TOP_K)
+        mask = offset < topk_len
+        topk_indices = tl.load(
+            topk_indices_ptr + token_idx * topk_indices_stride + offset,
+            mask=mask,
+        )
+        tl.store(
+            combined_indices_ptr + token_idx * combined_indices_stride + offset,
+            topk_indices + M * batch_idx,
+            mask=mask,
+        )
+        offset = tl.arange(0, WINDOW_SIZE)
+        # Index into gathered buffer: N + (position - gather_start)
+        # For positions [pos - swa_len + 1, pos], the buffer indices are:
+        # [N + pos - swa_len + 1 - gather_start, N + pos - gather_start]
+        tl.store(
+            combined_indices_ptr
+            + token_idx * combined_indices_stride
+            + topk_len
+            + offset,
+            M * batch_idx + N + offset + pos - swa_len + 1 - gather_start,
+            mask=offset < swa_len,
+        )
+
+        combined_len = topk_len + swa_len
+        tl.store(combined_lens_ptr + token_idx, combined_len)
diff --git a/vllm/models/deepseek_v4/common/ops/fused_compress_quant_cache.py b/vllm/models/deepseek_v4/common/ops/fused_compress_quant_cache.py
new file mode 100644
index 000000000000..2f97d8733c95
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/fused_compress_quant_cache.py
@@ -0,0 +1,584 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Fused compressor + FP8/MXFP4 UE8M0 quantization + KV cache insert kernels.
+
+Three specialized kernels:
+  - _fused_kv_compress_norm_rope_insert_sparse_attn:
+        head=512, nope=448 FP8 + rope=64 bf16
+  - _fused_kv_compress_norm_rope_insert_indexer_attn:
+        head=128, all FP8, 1 block/token
+  - _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn:
+        head=128, MXFP4 (block=32), 4 ue8m0 bytes
+
+RoPE is register-based via tl.reshape -> tl.split -> tl.interleave (or the
+even/odd halves are consumed directly for MXFP4, no interleave needed).
+FP8 UE8M0 quant uses tl.reshape to tile [N_QUANT_BLOCKS, QUANT_BLOCK] for
+per-block absmax entirely in registers. MXFP4 does the same tiling on the
+even/odd halves, producing (N_QUANT_BLOCKS, MXFP4_BLOCK/2) packed nibbles
+and N_QUANT_BLOCKS ue8m0 bytes.
+"""
+
+from vllm.triton_utils import tl, triton
+
+from .fused_indexer_q import _fp32x2_to_fp4x2
+
+
+# =============================================================================
+# DeepseekV4 Attention path (head=512, nope=448 FP8 + rope=64 bf16)
+# =============================================================================
+@triton.jit
+def _fused_kv_compress_norm_rope_insert_sparse_attn(
+    # ── state cache (compressor internal state) ──
+    state_cache_ptr,
+    state_cache_stride0,
+    state_cache_stride1,
+    # ── metadata ──
+    token_to_req_indices_ptr,
+    positions_ptr,
+    slot_mapping_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    # ── RMSNorm ──
+    rms_norm_weight_ptr,
+    rms_norm_eps,
+    # ── RoPE ──
+    cos_sin_cache_ptr,
+    cos_sin_stride,
+    # ── KV cache output ──
+    k_cache_ptr,
+    kv_slot_mapping_ptr,
+    kv_cache_block_size,
+    # ── constexprs ──
+    HEAD_SIZE: tl.constexpr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+    STATE_WIDTH: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    OVERLAP: tl.constexpr,
+    ROPE_HEAD_DIM: tl.constexpr,
+    FP8_MAX: tl.constexpr,  # 448.0
+    QUANT_BLOCK: tl.constexpr,  # 64 for DeepseekV4
+    TOKEN_STRIDE: tl.constexpr,  # 576 for DeepseekV4
+    SCALE_DIM: tl.constexpr,  # 8 for DeepseekV4 (7 real + 1 pad)
+    KV_BLOCK_STRIDE: tl.constexpr,
+):
+    """Fused compress → RMSNorm → FP8 quant (nope) → RoPE → bf16 store (rope).
+
+    One program per token; early-exits for non-boundary positions.
+
+    Cache block layout (``block_size`` tokens):
+      [0, bs*576):       token data (448 fp8 + 128 bf16 each)
+      [bs*576, +bs*8):   uint8 UE8M0 scales (7 real + 1 pad each)
+    """
+    token_idx = tl.program_id(0)
+
+    slot_id = tl.load(slot_mapping_ptr + token_idx)
+    if slot_id < 0:
+        return
+
+    position = tl.load(positions_ptr + token_idx)
+    if (position + 1) % COMPRESS_RATIO != 0:
+        return
+
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+
+    # ── Gather state cache entries ────────────────────────────────────
+    start = position - (1 + OVERLAP) * COMPRESS_RATIO + 1
+    tokens = tl.arange(0, (1 + OVERLAP) * COMPRESS_RATIO)
+    pos = start + tokens
+    mask_pos = pos >= 0
+
+    block_indices = pos // block_size
+    block_numbers = tl.load(
+        block_table_ptr + req_idx * block_table_stride + block_indices,
+        mask=mask_pos,
+        other=0,
+    )
+    block_offsets = pos % block_size
+    head_offset = (tokens >= COMPRESS_RATIO).to(tl.int32) * HEAD_SIZE
+
+    block = tl.arange(0, TRITON_BLOCK_SIZE)
+    mask = block < HEAD_SIZE
+    block_numbers_i64 = block_numbers.to(tl.int64)
+
+    # Precomputed row base shared by score and kv loads
+    row_base = (
+        state_cache_ptr
+        + block_numbers_i64 * state_cache_stride0
+        + block_offsets * state_cache_stride1
+        + head_offset
+    )
+
+    combined_mask = mask_pos[:, None] & mask[None, :]
+
+    # ── Softmax + weighted sum ───────────────────────────────────────
+    score = tl.load(
+        row_base[:, None] + STATE_WIDTH + block[None, :],
+        mask=combined_mask,
+        other=float("-inf"),
+    )
+    score = tl.softmax(score, dim=0)
+
+    kv = tl.load(
+        row_base[:, None] + block[None, :],
+        mask=combined_mask,
+        other=0.0,
+    )
+
+    compressed_kv = tl.sum(kv * score, axis=0)  # [TRITON_BLOCK_SIZE] fp32
+
+    # ── RMSNorm (fp32 throughout) ──────────────────────────────────────
+    rms_w = tl.load(rms_norm_weight_ptr + block, mask=mask, other=0.0)
+    variance = tl.sum(compressed_kv * compressed_kv, axis=0) / HEAD_SIZE
+    rrms = tl.rsqrt(variance + rms_norm_eps)
+    normed = compressed_kv * rrms * rms_w
+
+    # ── KV cache pointers ────────────────────────────────────────────
+    kv_slot_idx = tl.load(kv_slot_mapping_ptr + token_idx)
+    if kv_slot_idx < 0:
+        return
+    kv_block_idx = kv_slot_idx // kv_cache_block_size
+    kv_pos_in_block = kv_slot_idx % kv_cache_block_size
+
+    cache_block_ptr = k_cache_ptr + kv_block_idx.to(tl.int64) * KV_BLOCK_STRIDE
+    fp8_ptr = cache_block_ptr + kv_pos_in_block * TOKEN_STRIDE
+    scale_ptr = (
+        cache_block_ptr
+        + kv_cache_block_size * TOKEN_STRIDE
+        + kv_pos_in_block * SCALE_DIM
+    )
+
+    NOPE_HEAD_DIM: tl.constexpr = HEAD_SIZE - ROPE_HEAD_DIM  # 448
+    HALF_ROPE: tl.constexpr = ROPE_HEAD_DIM // 2  # 32
+
+    # FP8 UE8M0 quant: cast fp32 → bf16 → fp32 before quant to match reference.
+    N_QUANT_BLOCKS: tl.constexpr = TRITON_BLOCK_SIZE // QUANT_BLOCK
+    N_NOPE_BLOCKS: tl.constexpr = NOPE_HEAD_DIM // QUANT_BLOCK  # 7
+    INV_FP8_MAX: tl.constexpr = 1.0 / FP8_MAX
+
+    quant_input = normed.to(tl.bfloat16).to(tl.float32)
+    quant_2d = tl.reshape(quant_input, (N_QUANT_BLOCKS, QUANT_BLOCK))
+    abs_2d = tl.abs(quant_2d)
+    block_absmax = tl.max(abs_2d, axis=1)  # [N_QUANT_BLOCKS] fp32
+    block_absmax = tl.maximum(block_absmax, 1e-4)
+
+    raw_scales = block_absmax * INV_FP8_MAX
+    exponents = tl.ceil(tl.log2(raw_scales))
+    inv_scales = tl.exp2(-exponents)
+    inv_scales_col = tl.reshape(inv_scales, (N_QUANT_BLOCKS, 1))
+    x_scaled = quant_2d * inv_scales_col
+    x_clamped = tl.clamp(x_scaled, -FP8_MAX, FP8_MAX)
+    x_fp8 = x_clamped.to(tl.float8e4nv)
+    x_uint8 = x_fp8.to(tl.uint8, bitcast=True)
+    x_uint8_flat = tl.reshape(x_uint8, (TRITON_BLOCK_SIZE,))
+
+    nope_mask = block < NOPE_HEAD_DIM
+    tl.store(fp8_ptr + block, x_uint8_flat, mask=nope_mask)
+
+    scale_idx = tl.arange(0, N_QUANT_BLOCKS)
+    encoded = exponents + 127.0
+    encoded = tl.maximum(tl.minimum(encoded, 255.0), 0.0)
+    tl.store(
+        scale_ptr + scale_idx,
+        encoded.to(tl.uint8),
+        mask=scale_idx < N_NOPE_BLOCKS,
+    )
+    tl.store(scale_ptr + N_NOPE_BLOCKS, tl.zeros((), dtype=tl.uint8))
+
+    # Register-based GPT-J RoPE in fp32.
+    NUM_PAIRS: tl.constexpr = TRITON_BLOCK_SIZE // 2
+    NOPE_PAIRS: tl.constexpr = NOPE_HEAD_DIM // 2
+
+    pair_2d = tl.reshape(normed, (NUM_PAIRS, 2))
+    even, odd = tl.split(pair_2d)  # each [NUM_PAIRS] fp32
+
+    pair_idx = tl.arange(0, NUM_PAIRS)
+    rope_pair_local = pair_idx - NOPE_PAIRS
+    is_rope_pair = rope_pair_local >= 0
+    cs_idx = tl.maximum(rope_pair_local, 0)
+
+    compressed_pos = (position // COMPRESS_RATIO) * COMPRESS_RATIO
+    cache_base = cos_sin_cache_ptr + compressed_pos * cos_sin_stride
+    cos_v = tl.load(cache_base + cs_idx, mask=is_rope_pair, other=1.0)
+    sin_v = tl.load(cache_base + HALF_ROPE + cs_idx, mask=is_rope_pair, other=0.0)
+
+    new_even = even * cos_v - odd * sin_v
+    new_odd = odd * cos_v + even * sin_v
+    result = tl.interleave(new_even, new_odd)  # [TRITON_BLOCK_SIZE] fp32
+
+    # Store rotated rope portion as bf16 into the cache's bf16 area.
+    bf16_ptr = (fp8_ptr + NOPE_HEAD_DIM).to(tl.pointer_type(tl.bfloat16))
+    rope_local = block - NOPE_HEAD_DIM
+    is_rope = (block >= NOPE_HEAD_DIM) & mask
+    tl.store(bf16_ptr + rope_local, result.to(tl.bfloat16), mask=is_rope)
+
+
+# =============================================================================
+# Indexer path (head=128, all FP8, single quant block)
+# =============================================================================
+@triton.jit
+def _fused_kv_compress_norm_rope_insert_indexer_attn(
+    # ── state cache (compressor internal state) ──
+    state_cache_ptr,
+    state_cache_stride0,
+    state_cache_stride1,
+    # ── metadata ──
+    token_to_req_indices_ptr,
+    positions_ptr,
+    slot_mapping_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    # ── RMSNorm ──
+    rms_norm_weight_ptr,
+    rms_norm_eps,
+    # ── RoPE ──
+    cos_sin_cache_ptr,
+    cos_sin_stride,
+    # ── KV cache output ──
+    k_cache_ptr,
+    kv_slot_mapping_ptr,
+    kv_cache_block_size,
+    # ── constexprs ──
+    HEAD_SIZE: tl.constexpr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+    STATE_WIDTH: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    OVERLAP: tl.constexpr,
+    ROPE_HEAD_DIM: tl.constexpr,
+    FP8_MAX: tl.constexpr,  # 448.0
+    QUANT_BLOCK: tl.constexpr,  # 128 for indexer
+    TOKEN_STRIDE: tl.constexpr,  # 128 for indexer
+    SCALE_DIM: tl.constexpr,  # 4 for indexer (1 float32)
+    KV_BLOCK_STRIDE: tl.constexpr,
+):
+    """Fused compress → RMSNorm → RoPE → FP8 quant → store.
+
+    One program per token; early-exits for non-boundary positions.
+
+    Cache block layout:
+      [0, bs*128):       FP8 data (128 bytes/token)
+      [bs*128, +bs*4):   float32 scales (4 bytes/token)
+
+    For head_dim=128 we have exactly one quant block, so we skip the
+    [N_QUANT_BLOCKS, QUANT_BLOCK] reshape entirely and use a flat
+    ``tl.max`` reduction.
+    """
+    token_idx = tl.program_id(0)
+
+    slot_id = tl.load(slot_mapping_ptr + token_idx)
+    if slot_id < 0:
+        return
+
+    position = tl.load(positions_ptr + token_idx)
+    if (position + 1) % COMPRESS_RATIO != 0:
+        return
+
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+
+    # ── Gather state cache entries ────────────────────────────────────
+    start = position - (1 + OVERLAP) * COMPRESS_RATIO + 1
+    tokens = tl.arange(0, (1 + OVERLAP) * COMPRESS_RATIO)
+    pos = start + tokens
+    mask_pos = pos >= 0
+
+    block_indices = pos // block_size
+    block_numbers = tl.load(
+        block_table_ptr + req_idx * block_table_stride + block_indices,
+        mask=mask_pos,
+        other=0,
+    )
+    block_offsets = pos % block_size
+    head_offset = (tokens >= COMPRESS_RATIO).to(tl.int32) * HEAD_SIZE
+
+    block = tl.arange(0, TRITON_BLOCK_SIZE)
+    mask = block < HEAD_SIZE
+    block_numbers_i64 = block_numbers.to(tl.int64)
+
+    row_base = (
+        state_cache_ptr
+        + block_numbers_i64 * state_cache_stride0
+        + block_offsets * state_cache_stride1
+        + head_offset
+    )
+
+    combined_mask = mask_pos[:, None] & mask[None, :]
+
+    score = tl.load(
+        row_base[:, None] + STATE_WIDTH + block[None, :],
+        mask=combined_mask,
+        other=float("-inf"),
+    )
+    score = tl.softmax(score, dim=0)
+
+    kv = tl.load(
+        row_base[:, None] + block[None, :],
+        mask=combined_mask,
+        other=0.0,
+    )
+
+    compressed_kv = tl.sum(kv * score, axis=0)  # [TRITON_BLOCK_SIZE] fp32
+
+    # ── RMSNorm (fp32 throughout) ──────────────────────────────────────
+    rms_w = tl.load(rms_norm_weight_ptr + block, mask=mask, other=0.0)
+    variance = tl.sum(compressed_kv * compressed_kv, axis=0) / HEAD_SIZE
+    rrms = tl.rsqrt(variance + rms_norm_eps)
+    normed = compressed_kv * rrms * rms_w
+
+    # ── KV cache pointers ────────────────────────────────────────────
+    kv_slot_idx = tl.load(kv_slot_mapping_ptr + token_idx)
+    if kv_slot_idx < 0:
+        return
+    kv_block_idx = kv_slot_idx // kv_cache_block_size
+    kv_pos_in_block = kv_slot_idx % kv_cache_block_size
+
+    cache_block_ptr = k_cache_ptr + kv_block_idx.to(tl.int64) * KV_BLOCK_STRIDE
+    fp8_ptr = cache_block_ptr + kv_pos_in_block * TOKEN_STRIDE
+    scale_ptr = (
+        cache_block_ptr
+        + kv_cache_block_size * TOKEN_STRIDE
+        + kv_pos_in_block * SCALE_DIM
+    )
+
+    NOPE_HEAD_DIM: tl.constexpr = HEAD_SIZE - ROPE_HEAD_DIM
+    HALF_ROPE: tl.constexpr = ROPE_HEAD_DIM // 2
+
+    # ── Register-based GPT-J forward RoPE in fp32 ─────────────────────
+    NUM_PAIRS: tl.constexpr = TRITON_BLOCK_SIZE // 2
+    NOPE_PAIRS: tl.constexpr = NOPE_HEAD_DIM // 2
+
+    normed_2d = tl.reshape(normed, (NUM_PAIRS, 2))
+    even, odd = tl.split(normed_2d)  # each [NUM_PAIRS] fp32
+
+    pair_idx = tl.arange(0, NUM_PAIRS)
+    rope_pair_local = pair_idx - NOPE_PAIRS
+    is_rope_pair = rope_pair_local >= 0
+    cs_idx = tl.maximum(rope_pair_local, 0)
+
+    compressed_pos = (position // COMPRESS_RATIO) * COMPRESS_RATIO
+    cache_base = cos_sin_cache_ptr + compressed_pos * cos_sin_stride
+    cos_v = tl.load(cache_base + cs_idx, mask=is_rope_pair, other=1.0)
+    sin_v = tl.load(cache_base + HALF_ROPE + cs_idx, mask=is_rope_pair, other=0.0)
+
+    new_even = even * cos_v - odd * sin_v
+    new_odd = odd * cos_v + even * sin_v
+    result = tl.interleave(new_even, new_odd)  # fp32
+
+    # ── FP8 UE8M0 quant: single block, flat reduction ────────────────
+    tl.static_assert(
+        TRITON_BLOCK_SIZE == QUANT_BLOCK,
+        "Indexer expects one quant block (QUANT_BLOCK == TRITON_BLOCK_SIZE)",
+    )
+    INV_FP8_MAX: tl.constexpr = 1.0 / FP8_MAX
+
+    result_bf16 = result.to(tl.bfloat16).to(tl.float32)
+    absmax = tl.max(tl.abs(result_bf16), axis=0)  # scalar
+    absmax = tl.maximum(absmax, 1e-4)
+    raw_scale = absmax * INV_FP8_MAX
+    exponent = tl.ceil(tl.log2(raw_scale))
+    inv_scale = tl.exp2(-exponent)
+
+    x_scaled = result_bf16 * inv_scale
+    x_clamped = tl.clamp(x_scaled, -FP8_MAX, FP8_MAX)
+    x_fp8 = x_clamped.to(tl.float8e4nv)
+    x_uint8 = x_fp8.to(tl.uint8, bitcast=True)
+
+    tl.store(fp8_ptr + block, x_uint8, mask=mask)
+
+    # Single float32 scale
+    scale_val = tl.exp2(exponent)
+    tl.store(scale_ptr.to(tl.pointer_type(tl.float32)), scale_val)
+
+
+# =============================================================================
+# Indexer path (head=128, MXFP4: 2 nibbles/byte + ue8m0 per 32-elem block)
+# =============================================================================
+@triton.jit
+def _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn(
+    # ── state cache (compressor internal state) ──
+    state_cache_ptr,
+    state_cache_stride0,
+    state_cache_stride1,
+    # ── metadata ──
+    token_to_req_indices_ptr,
+    positions_ptr,
+    slot_mapping_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    # ── RMSNorm ──
+    rms_norm_weight_ptr,
+    rms_norm_eps,
+    # ── RoPE ──
+    cos_sin_cache_ptr,
+    cos_sin_stride,
+    # ── KV cache output ──
+    k_cache_ptr,
+    kv_slot_mapping_ptr,
+    kv_cache_block_size,
+    # ── constexprs ──
+    HEAD_SIZE: tl.constexpr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+    STATE_WIDTH: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+    OVERLAP: tl.constexpr,
+    ROPE_HEAD_DIM: tl.constexpr,
+    FP8_MAX: tl.constexpr,  # unused for MXFP4 (kept for signature parity)
+    QUANT_BLOCK: tl.constexpr,  # 32 for MXFP4
+    TOKEN_STRIDE: tl.constexpr,  # HEAD_SIZE // 2 = 64 packed bytes/token
+    SCALE_DIM: tl.constexpr,  # HEAD_SIZE // QUANT_BLOCK = 4 ue8m0 bytes/token
+    KV_BLOCK_STRIDE: tl.constexpr,
+):
+    """Fused compress → RMSNorm → RoPE → MXFP4 quant → store.
+
+    One program per token; early-exits for non-boundary positions.
+
+    Cache block layout (``block_size`` tokens per cache block):
+      [0, bs*TOKEN_STRIDE):        packed MXFP4 nibbles (2 values/byte)
+      [bs*TOKEN_STRIDE, +bs*SCALE_DIM): ue8m0 scale bytes (one per 32-elem block)
+
+    MXFP4 format:
+      - E2M1 4-bit values packed two per byte (low nibble first, then high).
+      - Per-32-element block scale = 2^ceil(log2(amax / 6.0)), stored ue8m0
+        (byte = exponent + 127).
+      - Max representable magnitude = 6.0.
+    """
+    token_idx = tl.program_id(0)
+
+    slot_id = tl.load(slot_mapping_ptr + token_idx)
+    if slot_id < 0:
+        return
+
+    position = tl.load(positions_ptr + token_idx)
+    if (position + 1) % COMPRESS_RATIO != 0:
+        return
+
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+
+    # ── Gather state cache entries ────────────────────────────────────
+    start = position - (1 + OVERLAP) * COMPRESS_RATIO + 1
+    tokens = tl.arange(0, (1 + OVERLAP) * COMPRESS_RATIO)
+    pos = start + tokens
+    mask_pos = pos >= 0
+
+    block_indices = pos // block_size
+    block_numbers = tl.load(
+        block_table_ptr + req_idx * block_table_stride + block_indices,
+        mask=mask_pos,
+        other=0,
+    )
+    block_offsets = pos % block_size
+    head_offset = (tokens >= COMPRESS_RATIO).to(tl.int32) * HEAD_SIZE
+
+    block = tl.arange(0, TRITON_BLOCK_SIZE)
+    mask = block < HEAD_SIZE
+    block_numbers_i64 = block_numbers.to(tl.int64)
+
+    row_base = (
+        state_cache_ptr
+        + block_numbers_i64 * state_cache_stride0
+        + block_offsets * state_cache_stride1
+        + head_offset
+    )
+
+    combined_mask = mask_pos[:, None] & mask[None, :]
+
+    score = tl.load(
+        row_base[:, None] + STATE_WIDTH + block[None, :],
+        mask=combined_mask,
+        other=float("-inf"),
+    )
+    score = tl.softmax(score, dim=0)
+
+    kv = tl.load(
+        row_base[:, None] + block[None, :],
+        mask=combined_mask,
+        other=0.0,
+    )
+
+    compressed_kv = tl.sum(kv * score, axis=0)  # [TRITON_BLOCK_SIZE] fp32
+
+    # ── RMSNorm (fp32 throughout) ──────────────────────────────────────
+    rms_w = tl.load(rms_norm_weight_ptr + block, mask=mask, other=0.0)
+    variance = tl.sum(compressed_kv * compressed_kv, axis=0) / HEAD_SIZE
+    rrms = tl.rsqrt(variance + rms_norm_eps)
+    normed = compressed_kv * rrms * rms_w
+
+    # ── KV cache pointers (segregated: values first, then scales) ────
+    kv_slot_idx = tl.load(kv_slot_mapping_ptr + token_idx)
+    if kv_slot_idx < 0:
+        return
+    kv_block_idx = kv_slot_idx // kv_cache_block_size
+    kv_pos_in_block = kv_slot_idx % kv_cache_block_size
+
+    cache_block_ptr = k_cache_ptr + kv_block_idx.to(tl.int64) * KV_BLOCK_STRIDE
+    val_ptr = cache_block_ptr + kv_pos_in_block * TOKEN_STRIDE
+    scale_ptr = (
+        cache_block_ptr
+        + kv_cache_block_size * TOKEN_STRIDE
+        + kv_pos_in_block * SCALE_DIM
+    )
+
+    NOPE_HEAD_DIM: tl.constexpr = HEAD_SIZE - ROPE_HEAD_DIM
+    HALF_ROPE: tl.constexpr = ROPE_HEAD_DIM // 2
+
+    # ── Register-based GPT-J forward RoPE in fp32 ─────────────────────
+    # We keep the even/odd halves (no tl.interleave afterwards) because the
+    # MXFP4 per-block absmax / pack naturally operates on (even, odd) pairs.
+    NUM_PAIRS: tl.constexpr = TRITON_BLOCK_SIZE // 2
+    NOPE_PAIRS: tl.constexpr = NOPE_HEAD_DIM // 2
+
+    normed_2d = tl.reshape(normed, (NUM_PAIRS, 2))
+    even, odd = tl.split(normed_2d)  # each [NUM_PAIRS] fp32
+
+    pair_idx = tl.arange(0, NUM_PAIRS)
+    rope_pair_local = pair_idx - NOPE_PAIRS
+    is_rope_pair = rope_pair_local >= 0
+    cs_idx = tl.maximum(rope_pair_local, 0)
+
+    compressed_pos = (position // COMPRESS_RATIO) * COMPRESS_RATIO
+    cache_base = cos_sin_cache_ptr + compressed_pos * cos_sin_stride
+    cos_v = tl.load(cache_base + cs_idx, mask=is_rope_pair, other=1.0)
+    sin_v = tl.load(cache_base + HALF_ROPE + cs_idx, mask=is_rope_pair, other=0.0)
+
+    new_even = even * cos_v - odd * sin_v
+    new_odd = odd * cos_v + even * sin_v
+
+    # bf16 roundtrip for parity with reference / Q-side kernel numerics.
+    new_even = new_even.to(tl.bfloat16).to(tl.float32)
+    new_odd = new_odd.to(tl.bfloat16).to(tl.float32)
+
+    # ── MXFP4 quant: tile even/odd halves into (N_BLOCKS, HALF_BLOCK) ──
+    # Each MXFP4 block of QUANT_BLOCK elements = HALF_BLOCK consecutive pairs,
+    # so (N_BLOCKS, HALF_BLOCK) rows of even/odd each land exactly one block.
+    N_QUANT_BLOCKS: tl.constexpr = HEAD_SIZE // QUANT_BLOCK
+    HALF_BLOCK: tl.constexpr = QUANT_BLOCK // 2
+    tl.static_assert(TRITON_BLOCK_SIZE == HEAD_SIZE)
+    tl.static_assert(HEAD_SIZE % QUANT_BLOCK == 0)
+    tl.static_assert(TOKEN_STRIDE == HEAD_SIZE // 2)
+    tl.static_assert(SCALE_DIM == N_QUANT_BLOCKS)
+
+    even_2d = tl.reshape(new_even, (N_QUANT_BLOCKS, HALF_BLOCK))
+    odd_2d = tl.reshape(new_odd, (N_QUANT_BLOCKS, HALF_BLOCK))
+
+    amax = tl.maximum(
+        tl.max(tl.abs(even_2d), axis=1),
+        tl.max(tl.abs(odd_2d), axis=1),
+    )
+    amax = tl.maximum(amax, 6.0 * (2**-126))
+
+    # ue8m0 block scale: 2^ceil(log2(amax / 6.0)), stored as (exp + 127) byte.
+    log2_ratio = tl.ceil(tl.log2(amax * (1.0 / 6.0)))
+    log2_ratio = tl.minimum(tl.maximum(log2_ratio, -127.0), 127.0)
+    inv_scale = tl.exp2(-log2_ratio)
+    ue8m0 = (log2_ratio + 127.0).to(tl.uint8)  # [N_QUANT_BLOCKS]
+
+    inv_scale_col = tl.reshape(inv_scale, (N_QUANT_BLOCKS, 1))
+    packed = _fp32x2_to_fp4x2(
+        even_2d * inv_scale_col, odd_2d * inv_scale_col
+    )  # (N_BLOCKS, HALF_BLOCK) uint8
+    packed_flat = tl.reshape(packed, (TOKEN_STRIDE,))
+
+    tl.store(val_ptr + tl.arange(0, TOKEN_STRIDE), packed_flat)
+    tl.store(scale_ptr + tl.arange(0, SCALE_DIM), ue8m0)
diff --git a/vllm/models/deepseek_v4/common/ops/fused_indexer_q.py b/vllm/models/deepseek_v4/common/ops/fused_indexer_q.py
new file mode 100644
index 000000000000..d5aaf10feba4
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/fused_indexer_q.py
@@ -0,0 +1,438 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import torch
+
+from vllm.triton_utils import tl, triton
+from vllm.utils.import_utils import has_cutedsl
+
+# MXFP4: 32 elements per block, packed 2 nibbles per byte, ue8m0 block scale.
+MXFP4_BLOCK_SIZE = 32
+
+
+@triton.jit
+def _get_cos_sin(
+    cos_sin_cache_ptr,
+    cos_sin_cache_stride,
+    pos,
+    HALF_ROT_DIM: tl.constexpr,
+):
+    block = tl.arange(0, HALF_ROT_DIM)
+    cos = tl.load(cos_sin_cache_ptr + pos * cos_sin_cache_stride + block)
+    cos = cos.to(tl.float32)
+    sin = tl.load(cos_sin_cache_ptr + pos * cos_sin_cache_stride + block + HALF_ROT_DIM)
+    sin = sin.to(tl.float32)
+    return cos, sin
+
+
+@triton.jit
+def _fp32x2_to_fp4x2(x_lo, x_hi):
+    # NOTE: $1 is high nibble, $2 is low nibble
+    return tl.inline_asm_elementwise(
+        """
+        {
+            .reg .b8 tmp;
+            cvt.rn.satfinite.e2m1x2.f32 tmp, $1, $2;
+            cvt.u32.u8 $0, tmp;
+        }
+        """,
+        constraints="=r,f,f",
+        args=[x_hi, x_lo],
+        dtype=tl.uint32,
+        is_pure=True,
+        pack=1,
+    ).to(tl.uint8)
+
+
+@triton.jit
+def _quantize_mxfp4_pair(x_lo, x_hi):
+    """Quantize a block of MXFP4_BLOCK_SIZE fp32 values given as two
+    interleaved halves (x_lo = values at even positions in the block,
+    x_hi = values at odd positions). Returns:
+        - packed : uint8[BLOCK/2]  (low nibble = quant(x_lo), high = quant(x_hi))
+        - ue8m0  : scalar uint8    (block scale = 2^(ue8m0 - 127))
+    """
+    amax = tl.maximum(tl.max(tl.abs(x_lo)), tl.max(tl.abs(x_hi)))
+    # 6 * 2^-126 is from https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/blob/main/inference/kernel.py#L163
+    amax = tl.maximum(amax, 6.0 * (2**-126))
+    # ue8m0 block scale: 2^ceil(log2(amax/6.0)).
+    log2_ratio = tl.math.ceil(tl.math.log2(amax * (1.0 / 6.0)))
+    log2_ratio = tl.minimum(tl.maximum(log2_ratio, -127.0), 127.0)
+    scale = tl.math.exp2(log2_ratio)
+    ue8m0 = (log2_ratio + 127.0).to(tl.uint8)
+
+    inv_scale = 1.0 / scale
+    packed = _fp32x2_to_fp4x2(x_lo * inv_scale, x_hi * inv_scale)
+    return packed, ue8m0
+
+
+@triton.jit
+def _fused_indexer_q_rope_quant_kernel(
+    pos_ptr,
+    # Index Q RoPE
+    index_q_ptr,
+    index_q_stride0,
+    index_q_stride1,
+    index_q_cos_sin_ptr,
+    index_q_cos_sin_stride,
+    INDEX_Q_HALF_ROT_DIM: tl.constexpr,
+    # Index Q Quantize
+    index_q_fp8_ptr,
+    index_q_fp8_stride0,
+    index_q_fp8_stride1,
+    INDEX_Q_HEAD_DIM: tl.constexpr,
+    # Index weights
+    index_weights_ptr,
+    index_weights_stride,
+    index_weights_softmax_scale,
+    index_weights_head_scale,
+    index_weights_out_ptr,
+    index_weights_out_stride,
+):
+    # Layout matches the unfused reference (DeepseekV4ScalingRotaryEmbedding
+    # + per_token_group_quant_fp8): GPT-J interleaved RoPE applied to the
+    # LAST rope_dim dims of each head; the leading [0, NOPE_DIM) is passed
+    # through unchanged.
+    INDEX_Q_ROT_DIM: tl.constexpr = 2 * INDEX_Q_HALF_ROT_DIM
+    INDEX_Q_NOPE_DIM: tl.constexpr = INDEX_Q_HEAD_DIM - INDEX_Q_ROT_DIM
+    tl.static_assert(INDEX_Q_NOPE_DIM >= 0)
+
+    tok_idx = tl.program_id(0)
+    head_idx = tl.program_id(1)
+
+    pos = tl.load(pos_ptr + tok_idx)
+    cos, sin = _get_cos_sin(
+        index_q_cos_sin_ptr,
+        index_q_cos_sin_stride,
+        pos,
+        INDEX_Q_HALF_ROT_DIM,
+    )
+    half_offset = tl.arange(0, INDEX_Q_HALF_ROT_DIM)
+    base_ptr = index_q_ptr + tok_idx * index_q_stride0 + head_idx * index_q_stride1
+
+    # Interleaved (GPT-J) RoPE on dims [NOPE_DIM, HEAD_DIM):
+    #   even = q[NOPE_DIM + 2*i],  odd = q[NOPE_DIM + 2*i + 1]
+    rot_base = base_ptr + INDEX_Q_NOPE_DIM
+    x_even = tl.load(rot_base + half_offset * 2).to(tl.float32)
+    x_odd = tl.load(rot_base + half_offset * 2 + 1).to(tl.float32)
+    r_even = x_even * cos - x_odd * sin
+    r_odd = x_odd * cos + x_even * sin
+
+    # Match reference numerics: fp32 → bf16 → fp32 before the ue8m0 absmax.
+    # Same pattern as the K-side compressor kernel (fused_compress_quant_cache.py).
+    r_even = r_even.to(tl.bfloat16).to(tl.float32)
+    r_odd = r_odd.to(tl.bfloat16).to(tl.float32)
+
+    amax = tl.maximum(tl.max(tl.abs(r_even)), tl.max(tl.abs(r_odd)))
+    if INDEX_Q_NOPE_DIM > 0:
+        nope_offset = tl.arange(0, INDEX_Q_NOPE_DIM)
+        x_nope = tl.load(base_ptr + nope_offset).to(tl.float32)
+        amax = tl.maximum(amax, tl.max(tl.abs(x_nope)))
+    index_q_scale = tl.div_rn(tl.maximum(amax, 1e-4), 448.0)
+    index_q_scale = tl.math.exp2(tl.math.ceil(tl.math.log2(index_q_scale)))
+
+    # Store quantized values to index_q_fp8
+    fp8_base_ptr = (
+        index_q_fp8_ptr + tok_idx * index_q_fp8_stride0 + head_idx * index_q_fp8_stride1
+    )
+    if INDEX_Q_NOPE_DIM > 0:
+        tl.store(
+            fp8_base_ptr + nope_offset,
+            tl.div_rn(x_nope, index_q_scale).to(tl.float8e4nv),
+        )
+    fp8_rot_base = fp8_base_ptr + INDEX_Q_NOPE_DIM
+    tl.store(
+        fp8_rot_base + half_offset * 2,
+        tl.div_rn(r_even, index_q_scale).to(tl.float8e4nv),
+    )
+    tl.store(
+        fp8_rot_base + half_offset * 2 + 1,
+        tl.div_rn(r_odd, index_q_scale).to(tl.float8e4nv),
+    )
+
+    # FP8 weight-fold contract:
+    #   index_weights_out = index_weights * q_scale * softmax_scale * head_scale
+    # The per-token-per-head q_scale (fp32) IS folded into the output weights
+    # here because FP8 Q is stored WITHOUT a companion scale tensor — the
+    # downstream fp8_fp4_mqa_logits/fp8_fp4_paged_mqa_logits kernels use `weights` to
+    # apply per-token Q scale inline. See the MXFP4 kernel below for the
+    # contrasting convention (scales live with the Q values, weights are NOT
+    # q-scaled).
+    index_weights = tl.load(
+        index_weights_ptr + tok_idx * index_weights_stride + head_idx
+    )
+    index_weights = index_weights.to(tl.float32)
+    index_weights *= index_q_scale
+    index_weights *= index_weights_softmax_scale
+    index_weights *= index_weights_head_scale
+    tl.store(
+        index_weights_out_ptr + tok_idx * index_weights_out_stride + head_idx,
+        index_weights,
+    )
+
+
+@triton.jit
+def _fused_indexer_q_rope_mxfp4_kernel(
+    pos_ptr,
+    # Index Q RoPE input (fp/bf16)
+    index_q_ptr,
+    index_q_stride0,
+    index_q_stride1,
+    index_q_cos_sin_ptr,
+    index_q_cos_sin_stride,
+    INDEX_Q_HALF_ROT_DIM: tl.constexpr,
+    # MXFP4 Q outputs
+    index_q_mxfp4_ptr,  # uint8, (T, H, HEAD_DIM // 2)
+    index_q_mxfp4_stride0,
+    index_q_mxfp4_stride1,
+    index_q_scale_ptr,  # uint8 ue8m0, (T, H, HEAD_DIM // BLOCK)
+    index_q_scale_stride0,
+    index_q_scale_stride1,
+    INDEX_Q_HEAD_DIM: tl.constexpr,
+    MXFP4_BLOCK: tl.constexpr,
+    # Weights (NO per-token q_scale fold for MXFP4; per-block scales stay
+    # with the Q values in the output scale tensor).
+    index_weights_ptr,
+    index_weights_stride,
+    index_weights_softmax_scale,
+    index_weights_head_scale,
+    index_weights_out_ptr,
+    index_weights_out_stride,
+):
+    INDEX_Q_ROT_DIM: tl.constexpr = 2 * INDEX_Q_HALF_ROT_DIM
+    INDEX_Q_NOPE_DIM: tl.constexpr = INDEX_Q_HEAD_DIM - INDEX_Q_ROT_DIM
+    NUM_NOPE_BLOCKS: tl.constexpr = INDEX_Q_NOPE_DIM // MXFP4_BLOCK
+    NUM_ROPE_BLOCKS: tl.constexpr = INDEX_Q_ROT_DIM // MXFP4_BLOCK
+    HALF_BLOCK: tl.constexpr = MXFP4_BLOCK // 2
+    tl.static_assert(INDEX_Q_NOPE_DIM >= 0)
+    tl.static_assert(INDEX_Q_NOPE_DIM % MXFP4_BLOCK == 0)
+    tl.static_assert(INDEX_Q_ROT_DIM % MXFP4_BLOCK == 0)
+    tl.static_assert(MXFP4_BLOCK % 2 == 0)
+
+    tok_idx = tl.program_id(0)
+    head_idx = tl.program_id(1)
+
+    pos = tl.load(pos_ptr + tok_idx)
+
+    q_base = index_q_ptr + tok_idx * index_q_stride0 + head_idx * index_q_stride1
+    out_base = (
+        index_q_mxfp4_ptr
+        + tok_idx * index_q_mxfp4_stride0
+        + head_idx * index_q_mxfp4_stride1
+    )
+    scale_base = (
+        index_q_scale_ptr
+        + tok_idx * index_q_scale_stride0
+        + head_idx * index_q_scale_stride1
+    )
+
+    half_off = tl.arange(0, HALF_BLOCK)
+
+    # ---- NoPE blocks: direct load, pair as (even-index, odd-index) values ----
+    for b in tl.static_range(NUM_NOPE_BLOCKS):
+        base = b * MXFP4_BLOCK
+        x_lo = tl.load(q_base + base + half_off * 2).to(tl.float32)
+        x_hi = tl.load(q_base + base + half_off * 2 + 1).to(tl.float32)
+        packed, ue8m0 = _quantize_mxfp4_pair(x_lo, x_hi)
+        tl.store(out_base + base // 2 + half_off, packed)
+        tl.store(scale_base + b, ue8m0)
+
+    # ---- RoPE blocks: apply GPT-J interleaved RoPE to the block's 16 pairs,
+    # then quantize. Each block covers HALF_BLOCK (=16) cos/sin pairs. ----
+    rot_q_base = q_base + INDEX_Q_NOPE_DIM
+    for b in tl.static_range(NUM_ROPE_BLOCKS):
+        pair_off = b * HALF_BLOCK + half_off  # indices in [0, HALF_ROT_DIM)
+        cos_b = tl.load(
+            index_q_cos_sin_ptr + pos * index_q_cos_sin_stride + pair_off
+        ).to(tl.float32)
+        sin_b = tl.load(
+            index_q_cos_sin_ptr
+            + pos * index_q_cos_sin_stride
+            + pair_off
+            + INDEX_Q_HALF_ROT_DIM
+        ).to(tl.float32)
+        x_even = tl.load(rot_q_base + pair_off * 2).to(tl.float32)
+        x_odd = tl.load(rot_q_base + pair_off * 2 + 1).to(tl.float32)
+        r_even = x_even * cos_b - x_odd * sin_b
+        r_odd = x_odd * cos_b + x_even * sin_b
+        # bf16 roundtrip for parity with the FP8 kernel / reference numerics.
+        r_even = r_even.to(tl.bfloat16).to(tl.float32)
+        r_odd = r_odd.to(tl.bfloat16).to(tl.float32)
+        packed, ue8m0 = _quantize_mxfp4_pair(r_even, r_odd)
+        rope_byte_off = (INDEX_Q_NOPE_DIM + b * MXFP4_BLOCK) // 2
+        tl.store(out_base + rope_byte_off + half_off, packed)
+        tl.store(scale_base + NUM_NOPE_BLOCKS + b, ue8m0)
+
+    # MXFP4 weight-fold contract:
+    #   index_weights_out = index_weights * softmax_scale * head_scale
+    # NOTE: q_scale is NOT folded here (contrast with the FP8 kernel above).
+    # MXFP4 Q emits a separate ue8m0 scale tensor of shape
+    # (T, H, HEAD_DIM // MXFP4_BLOCK) alongside the packed values, so each
+    # per-block scale is applied by the downstream MXFP4 logits kernel when
+    # dequantizing Q — there is no per-token scalar to fold into `weights`.
+    index_weights = tl.load(
+        index_weights_ptr + tok_idx * index_weights_stride + head_idx
+    ).to(tl.float32)
+    index_weights *= index_weights_softmax_scale
+    index_weights *= index_weights_head_scale
+    tl.store(
+        index_weights_out_ptr + tok_idx * index_weights_out_stride + head_idx,
+        index_weights,
+    )
+
+
+def fused_indexer_q_rope_quant(
+    positions: torch.Tensor,
+    index_q: torch.Tensor,
+    index_q_cos_sin_cache: torch.Tensor,
+    # Index weights
+    index_weights: torch.Tensor,
+    index_weights_softmax_scale: float,
+    index_weights_head_scale: float,
+    use_fp4: bool = False,
+) -> tuple[
+    torch.Tensor | tuple[torch.Tensor, torch.Tensor],
+    torch.Tensor,
+]:
+    """Fused RoPE + quantize Q for the sparse indexer.
+
+    Weight-fold semantics (important — the two paths differ):
+
+    FP8 path (use_fp4=False, default):
+        q_fp8      : (T, H, HEAD_DIM) float8_e4m3fn, per-token-per-head
+                     scalar scale (NOT stored — folded into weights below)
+        weights_out = weights * q_scale * softmax_scale * head_scale
+        Rationale: a single per-token q_scale is a scalar the downstream FP8
+        logits kernel would otherwise multiply in. Folding it into `weights`
+        avoids emitting a separate tensor and is free for the logits kernel.
+
+    MXFP4 path (use_fp4=True):
+        q_packed   : (T, H, HEAD_DIM // 2) uint8 (2 E2M1 nibbles per byte)
+        q_scale    : (T, H, HEAD_DIM // MXFP4_BLOCK_SIZE) uint8 ue8m0 bytes
+        weights_out = weights * softmax_scale * head_scale
+        Rationale: MXFP4 has PER-BLOCK (32-element) scales that live with
+        the Q values — they cannot be folded into a per-token weight
+        scalar, so `weights` carries only the softmax and head scales.
+
+    Returns (q_quant, weights_out) where q_quant is either a Tensor (FP8) or
+    a (values, scales) tuple (MXFP4). This matches the union type accepted
+    by `SparseAttnIndexer.forward_*`.
+    """
+    assert positions.ndim == 1
+    assert index_q.ndim == 3
+    assert index_q_cos_sin_cache.ndim == 2
+
+    num_tokens = positions.shape[0]
+    num_index_q_heads = index_q.shape[1]
+    index_q_head_dim = index_q.shape[2]
+
+    index_weights_out = torch.empty_like(index_weights, dtype=torch.float32)
+
+    if use_fp4:
+        assert index_q_head_dim % MXFP4_BLOCK_SIZE == 0, (
+            f"head_dim={index_q_head_dim} must be a multiple of MXFP4 block "
+            f"size {MXFP4_BLOCK_SIZE}"
+        )
+        num_scale_blocks = index_q_head_dim // MXFP4_BLOCK_SIZE
+        index_q_packed = torch.empty(
+            (num_tokens, num_index_q_heads, index_q_head_dim // 2),
+            dtype=torch.uint8,
+            device=index_q.device,
+        )
+        index_q_scale = torch.empty(
+            (num_tokens, num_index_q_heads, num_scale_blocks),
+            dtype=torch.uint8,
+            device=index_q.device,
+        )
+        if has_cutedsl():
+            # lazily import, otherwise some tests fail due to CUDA driver init failure.
+            from vllm.models.deepseek_v4.nvidia.ops.fused_indexer_q_cutedsl import (
+                fused_indexer_q_rope_quant_mxfp4_cutedsl,
+            )
+
+            fused_indexer_q_rope_quant_mxfp4_cutedsl(
+                positions,
+                index_q,
+                index_q_cos_sin_cache,
+                index_weights,
+                index_weights_softmax_scale,
+                index_weights_head_scale,
+                index_q_packed,
+                index_q_scale,
+                index_weights_out,
+            )
+        else:
+            _fused_indexer_q_rope_mxfp4_kernel[(num_tokens, num_index_q_heads)](
+                positions,
+                index_q,
+                index_q.stride(0),
+                index_q.stride(1),
+                index_q_cos_sin_cache,
+                index_q_cos_sin_cache.stride(0),
+                index_q_cos_sin_cache.shape[-1] // 2,
+                index_q_packed,
+                index_q_packed.stride(0),
+                index_q_packed.stride(1),
+                index_q_scale,
+                index_q_scale.stride(0),
+                index_q_scale.stride(1),
+                index_q_head_dim,
+                MXFP4_BLOCK_SIZE,
+                index_weights,
+                index_weights.stride(0),
+                index_weights_softmax_scale,
+                index_weights_head_scale,
+                index_weights_out,
+                index_weights_out.stride(0),
+                num_warps=1,  # TODO: Tune this
+            )
+
+        # Values stay uint8 (2 E2M1 nibbles per byte). Scales are 4 ue8m0
+        # bytes per (token, head) reinterpreted as one int32, then squeezed
+        # from (T, H, 1) to (T, H) to match DeepGEMM's expected q_sf rank
+        # (prefill wants 2-D (seq_len, num_heads); decode reshapes this to
+        # 3-D (batch, next_n, num_heads)).
+        return (
+            index_q_packed,
+            index_q_scale.view(torch.int32).squeeze(-1),
+        ), index_weights_out
+
+    index_q_fp8 = torch.empty_like(index_q, dtype=torch.float8_e4m3fn)
+    if has_cutedsl():
+        # lazily import, otherwise some tests fail due to CUDA driver init failure.
+        from vllm.models.deepseek_v4.nvidia.ops.fused_indexer_q_cutedsl import (
+            fused_indexer_q_rope_quant_fp8_cutedsl,
+        )
+
+        fused_indexer_q_rope_quant_fp8_cutedsl(
+            positions,
+            index_q,
+            index_q_cos_sin_cache,
+            index_weights,
+            index_weights_softmax_scale,
+            index_weights_head_scale,
+            index_q_fp8,
+            index_weights_out,
+        )
+    else:
+        _fused_indexer_q_rope_quant_kernel[(num_tokens, num_index_q_heads)](
+            positions,
+            index_q,
+            index_q.stride(0),
+            index_q.stride(1),
+            index_q_cos_sin_cache,
+            index_q_cos_sin_cache.stride(0),
+            index_q_cos_sin_cache.shape[-1] // 2,
+            index_q_fp8,
+            index_q_fp8.stride(0),
+            index_q_fp8.stride(1),
+            index_q_head_dim,
+            index_weights,
+            index_weights.stride(0),
+            index_weights_softmax_scale,
+            index_weights_head_scale,
+            index_weights_out,
+            index_weights_out.stride(0),
+            num_warps=1,  # TODO: Tune this
+        )
+    return index_q_fp8, index_weights_out
diff --git a/vllm/models/deepseek_v4/common/ops/fused_inv_rope_fp8_quant.py b/vllm/models/deepseek_v4/common/ops/fused_inv_rope_fp8_quant.py
new file mode 100644
index 000000000000..97fc0962c2b4
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/fused_inv_rope_fp8_quant.py
@@ -0,0 +1,318 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Fused inverse RoPE + block-scaled FP8 quantization kernel for DeepseekV4 attention.
+
+Output scale format is pre-transformed (MN-major TMA-aligned; FP32 on SM90,
+INT32-packed UE8M0 on SM100) so fp8_einsum skips transform_sf_into_required_layout.
+"""
+
+import torch
+
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+@triton.jit(do_not_specialize=["num_tokens"])
+def _fused_inv_rope_fp8_quant_per_head(
+    o_ptr,
+    positions_ptr,
+    cos_sin_cache_ptr,
+    fp8_ptr,
+    scale_ptr,
+    num_tokens,
+    heads_per_group: tl.constexpr,
+    o_stride_token,
+    o_stride_head,
+    cache_stride_pos,
+    fp8_stride_group,
+    fp8_stride_token,
+    scale_stride_group,
+    scale_stride_k,
+    fp8_max: tl.constexpr,
+    eps: tl.constexpr,
+    QUANT_GROUP_SIZE: tl.constexpr,
+    CHUNKS_PER_HEAD: tl.constexpr,
+    ROPE_START: tl.constexpr,
+    HALF_ROPE: tl.constexpr,
+    TMA_ALIGNED_SCALES: tl.constexpr,
+):
+    # int64: stride multiply overflows int32 past num_tokens=32768 (IMA).
+    pid_token = tl.program_id(0).to(tl.int64)
+    pid_gh = tl.program_id(1).to(tl.int64)
+
+    g = pid_gh // heads_per_group
+    head_in_group = pid_gh % heads_per_group
+    global_head = pid_gh
+    qb_start = head_in_group * CHUNKS_PER_HEAD
+
+    # Padding rows in the TMA-aligned scale buffer: fill with zero and skip quant.
+    if pid_token >= num_tokens:
+        if TMA_ALIGNED_SCALES:
+            scale_addr = (
+                scale_ptr
+                + g * scale_stride_group
+                + pid_token
+                + head_in_group * scale_stride_k
+            )
+            tl.store(scale_addr, tl.zeros((), dtype=tl.int32))
+        else:
+            block_offsets = tl.arange(0, CHUNKS_PER_HEAD)
+            qb_indices = qb_start + block_offsets
+            scale_addrs = (
+                scale_ptr
+                + g * scale_stride_group
+                + pid_token
+                + qb_indices * scale_stride_k
+            )
+            tl.store(scale_addrs, tl.zeros((CHUNKS_PER_HEAD,), dtype=tl.float32))
+        return
+
+    input_base = o_ptr + pid_token * o_stride_token + global_head * o_stride_head
+
+    HEAD_DIM: tl.constexpr = CHUNKS_PER_HEAD * QUANT_GROUP_SIZE
+    offsets = tl.arange(0, HEAD_DIM)
+    x = tl.load(input_base + offsets).to(tl.float32)
+
+    rope_abs_start: tl.constexpr = (CHUNKS_PER_HEAD - 1) * QUANT_GROUP_SIZE + ROPE_START
+    pos = tl.load(positions_ptr + pid_token)
+    cache_base = cos_sin_cache_ptr + pos * cache_stride_pos
+    is_rope = offsets >= rope_abs_start
+    rope_local = offsets - rope_abs_start
+
+    x_partner = tl.load(input_base + (offsets ^ 1), mask=is_rope, other=0.0).to(
+        tl.float32
+    )
+    cs_idx = tl.maximum(rope_local >> 1, 0)
+    cos_v = tl.load(cache_base + cs_idx, mask=is_rope, other=1.0)
+    sin_v = tl.load(cache_base + HALF_ROPE + cs_idx, mask=is_rope, other=0.0)
+    x_add = x * cos_v + x_partner * sin_v
+    x_sub = x * cos_v - x_partner * sin_v
+    is_even = (rope_local & 1) == 0
+    rotated = tl.where(is_even, x_add, x_sub)
+    x = tl.where(is_rope, rotated, x)
+
+    x_2d = tl.reshape(tl.abs(x), (CHUNKS_PER_HEAD, QUANT_GROUP_SIZE))
+    block_absmax = tl.maximum(tl.max(x_2d, axis=1), eps)
+    scale_raw = block_absmax * (1.0 / fp8_max)
+    scales = tl.math.exp2(tl.ceil(tl.log2(scale_raw)))
+
+    scales_exp = tl.reshape(
+        tl.broadcast_to(
+            tl.reshape(scales, (CHUNKS_PER_HEAD, 1)),
+            (CHUNKS_PER_HEAD, QUANT_GROUP_SIZE),
+        ),
+        (HEAD_DIM,),
+    )
+    x_quant = tl.clamp(x / scales_exp, -fp8_max, fp8_max).to(tl.float8e4nv)
+
+    fp8_base = (
+        fp8_ptr
+        + g * fp8_stride_group
+        + pid_token * fp8_stride_token
+        + qb_start * QUANT_GROUP_SIZE
+    )
+    tl.store(fp8_base + offsets, x_quant)
+
+    block_offsets = tl.arange(0, CHUNKS_PER_HEAD)
+    qb_indices = qb_start + block_offsets
+    if TMA_ALIGNED_SCALES:
+        scale_bits = scales.to(tl.int32, bitcast=True)
+        ue8m0_bytes = (scale_bits >> 23) & 0xFF
+        packed_val = tl.sum(ue8m0_bytes << (block_offsets * 8))
+        scale_addr = (
+            scale_ptr
+            + g * scale_stride_group
+            + pid_token
+            + head_in_group * scale_stride_k
+        )
+        tl.store(scale_addr, packed_val)
+    else:
+        scale_addrs = (
+            scale_ptr + g * scale_stride_group + pid_token + qb_indices * scale_stride_k
+        )
+        tl.store(scale_addrs, scales)
+
+
+def fused_inv_rope_fp8_quant(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    n_groups: int,
+    heads_per_group: int,
+    nope_dim: int = 448,
+    rope_dim: int = 64,
+    quant_group_size: int = 128,
+    tma_aligned_scales: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Fused inverse RoPE + block-scaled FP8 quantization.
+
+    Args:
+        o: Attention output [num_tokens, num_heads, head_dim] bf16.
+        positions: Token positions [num_tokens] int64.
+        cos_sin_cache: Precomputed [max_pos, rope_dim] with cos||sin.
+        n_groups: Number of output groups.
+        heads_per_group: Heads per group.
+        nope_dim: Non-RoPE dimensions per head (default 448).
+        rope_dim: RoPE dimensions per head (default 64).
+        quant_group_size: FP8 quantization block size (default 128).
+        tma_aligned_scales: Output INT32 packed UE8M0 for SM100 (True)
+                            or FP32 for SM90 (False).
+
+    Returns:
+        o_fp8: [T, G, D] float8_e4m3fn, strides (D, T*D, 1).
+        o_scale: Pre-transformed scale tensor for fp8_einsum.
+    """
+    from vllm.utils.deep_gemm import get_tma_aligned_size
+
+    num_tokens, num_heads, head_dim = o.shape
+    assert num_heads == n_groups * heads_per_group
+    assert head_dim == nope_dim + rope_dim
+    assert head_dim % quant_group_size == 0
+    assert nope_dim % quant_group_size == (quant_group_size - rope_dim)
+    assert rope_dim % 2 == 0
+    assert cos_sin_cache.shape[-1] == rope_dim
+    assert cos_sin_cache.dtype == torch.float32
+
+    d = heads_per_group * head_dim
+    num_scale_blocks = d // quant_group_size
+    chunks_per_head = head_dim // quant_group_size
+
+    fp8_dtype = torch.float8_e4m3fn
+    fp8_max = torch.finfo(fp8_dtype).max
+
+    tma_aligned_T = get_tma_aligned_size(num_tokens, 4)
+    if tma_aligned_scales:
+        packed_sf_k = (num_scale_blocks + 3) // 4
+        scale_inner = packed_sf_k
+    else:
+        scale_inner = num_scale_blocks
+
+    # Run kernel through a custom op so inductor sees an opaque boundary.
+    # It's a pytorch bug, see https://github.com/vllm-project/vllm/issues/41106
+    fp8_buf, scale_buf = torch.ops.vllm.fused_inv_rope_fp8_quant_kernel(
+        o,
+        positions,
+        cos_sin_cache,
+        heads_per_group,
+        quant_group_size,
+        chunks_per_head,
+        nope_dim % quant_group_size,
+        rope_dim // 2,
+        tma_aligned_scales,
+        fp8_max,
+        tma_aligned_T,
+        num_tokens,
+        n_groups,
+        d,
+        scale_inner,
+    )
+    return fp8_buf.transpose(0, 1), scale_buf.transpose(0, 1)
+
+
+def _fused_inv_rope_fp8_quant_kernel_impl(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    heads_per_group: int,
+    quant_group_size: int,
+    chunks_per_head: int,
+    rope_start: int,
+    half_rope: int,
+    tma_aligned_scales: bool,
+    fp8_max: float,
+    tma_aligned_T: int,
+    num_tokens: int,
+    n_groups: int,
+    d: int,
+    scale_inner: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    fp8_buf = torch.empty(
+        (n_groups, num_tokens, d),
+        dtype=torch.float8_e4m3fn,
+        device=o.device,
+    )
+    scale_dtype = torch.int32 if tma_aligned_scales else torch.float32
+    scale_buf = torch.empty(
+        n_groups * scale_inner * tma_aligned_T,
+        dtype=scale_dtype,
+        device=o.device,
+    ).as_strided(
+        (n_groups, num_tokens, scale_inner),
+        (scale_inner * tma_aligned_T, 1, tma_aligned_T),
+    )
+    grid = (tma_aligned_T, n_groups * heads_per_group)
+    pdl_kwargs = (
+        {}
+        if current_platform.is_rocm() or current_platform.is_xpu()
+        else {"launch_pdl": False}
+    )
+    _fused_inv_rope_fp8_quant_per_head[grid](
+        o,
+        positions,
+        cos_sin_cache,
+        fp8_buf,
+        scale_buf,
+        num_tokens,
+        heads_per_group=heads_per_group,
+        o_stride_token=o.stride(0),
+        o_stride_head=o.stride(1),
+        cache_stride_pos=cos_sin_cache.stride(0),
+        fp8_stride_group=fp8_buf.stride(0),
+        fp8_stride_token=fp8_buf.stride(1),
+        scale_stride_group=scale_buf.stride(0),
+        scale_stride_k=scale_buf.stride(2),
+        fp8_max=fp8_max,
+        eps=1e-10,
+        QUANT_GROUP_SIZE=quant_group_size,
+        CHUNKS_PER_HEAD=chunks_per_head,
+        ROPE_START=rope_start,
+        HALF_ROPE=half_rope,
+        TMA_ALIGNED_SCALES=tma_aligned_scales,
+        num_stages=1,
+        **pdl_kwargs,
+        num_warps=1,
+    )
+    return fp8_buf, scale_buf
+
+
+def _fused_inv_rope_fp8_quant_kernel_fake(
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    heads_per_group: int,
+    quant_group_size: int,
+    chunks_per_head: int,
+    rope_start: int,
+    half_rope: int,
+    tma_aligned_scales: bool,
+    fp8_max: float,
+    tma_aligned_T: int,
+    num_tokens: int,
+    n_groups: int,
+    d: int,
+    scale_inner: int,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    fp8_buf = torch.empty(
+        (n_groups, num_tokens, d),
+        dtype=torch.float8_e4m3fn,
+        device=o.device,
+    )
+    scale_dtype = torch.int32 if tma_aligned_scales else torch.float32
+    scale_buf = torch.empty(
+        n_groups * scale_inner * tma_aligned_T,
+        dtype=scale_dtype,
+        device=o.device,
+    ).as_strided(
+        (n_groups, num_tokens, scale_inner),
+        (scale_inner * tma_aligned_T, 1, tma_aligned_T),
+    )
+    return fp8_buf, scale_buf
+
+
+direct_register_custom_op(
+    op_name="fused_inv_rope_fp8_quant_kernel",
+    op_func=_fused_inv_rope_fp8_quant_kernel_impl,
+    fake_impl=_fused_inv_rope_fp8_quant_kernel_fake,
+)
diff --git a/vllm/models/deepseek_v4/common/ops/fused_qk_rmsnorm.py b/vllm/models/deepseek_v4/common/ops/fused_qk_rmsnorm.py
new file mode 100644
index 000000000000..0dd348a46e26
--- /dev/null
+++ b/vllm/models/deepseek_v4/common/ops/fused_qk_rmsnorm.py
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.triton_utils import tl, triton
+
+
+@triton.jit
+def _fused_q_kv_rmsnorm_kernel(
+    q_ptr,
+    q_out_ptr,
+    q_weight_ptr,
+    q_in_stride,
+    q_out_stride,
+    kv_ptr,
+    kv_out_ptr,
+    kv_weight_ptr,
+    kv_in_stride,
+    kv_out_stride,
+    eps,
+    Q_SIZE: tl.constexpr,
+    KV_SIZE: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    # num_tokens goes on grid-x (max 2**31 - 1); task goes on grid-y.
+    # CUDA's grid-y/z are capped at 65535, so putting num_tokens there crashes
+    # the launch at max-num-batched-tokens >= 65536 with "invalid argument".
+    # int64: q_in_stride can be ~24K (128 heads × 192) and overflows int32
+    # past num_tokens ~87K under large chunked prefill.
+    token_idx = tl.program_id(0).to(tl.int64)
+    pid_task = tl.program_id(1)
+
+    if pid_task == 0:
+        SIZE = Q_SIZE
+        row_in = q_ptr + token_idx * q_in_stride
+        weight_ptr = q_weight_ptr
+        row_out = q_out_ptr + token_idx * q_out_stride
+    else:
+        SIZE = KV_SIZE
+        row_in = kv_ptr + token_idx * kv_in_stride
+        weight_ptr = kv_weight_ptr
+        row_out = kv_out_ptr + token_idx * kv_out_stride
+
+    # RMSNorm in fp32 throughout — matches csrc/layernorm_kernels.cu's
+    # `(scalar_t)(x * s_variance * w)` and DeepseekV4's compressor kernel, which
+    # keep x, rrms, and w all in fp32 and perform a single cast at store.
+    block = tl.arange(0, BLOCK_SIZE)
+    mask = block < SIZE
+    x = tl.load(row_in + block, mask=mask, other=0.0).to(tl.float32)
+    variance = tl.sum(x * x, axis=0) / SIZE
+    rrms = tl.rsqrt(variance + eps)
+    w = tl.load(weight_ptr + block, mask=mask, other=0.0).to(tl.float32)
+    y = x * rrms * w
+    tl.store(row_out + block, y.to(row_out.dtype.element_ty), mask=mask)
+
+
+def fused_q_kv_rmsnorm(
+    qr: torch.Tensor,
+    kv: torch.Tensor,
+    q_weight: torch.Tensor,
+    kv_weight: torch.Tensor,
+    eps: float,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    assert qr.ndim == 2 and kv.ndim == 2
+    assert qr.shape[0] == kv.shape[0], (
+        f"token dim mismatch: qr={qr.shape}, kv={kv.shape}"
+    )
+    assert qr.stride(-1) == 1 and kv.stride(-1) == 1
+    assert q_weight.is_contiguous() and kv_weight.is_contiguous()
+
+    q_size = qr.shape[1]
+    kv_size = kv.shape[1]
+    num_tokens = qr.shape[0]
+    qr_out = torch.empty_like(qr)
+    kv_out = torch.empty_like(kv)
+    if num_tokens == 0:
+        return qr_out, kv_out
+
+    block_size = triton.next_power_of_2(max(q_size, kv_size))
+    _fused_q_kv_rmsnorm_kernel[(num_tokens, 2)](
+        qr,
+        qr_out,
+        q_weight,
+        qr.stride(0),
+        qr_out.stride(0),
+        kv,
+        kv_out,
+        kv_weight,
+        kv.stride(0),
+        kv_out.stride(0),
+        eps,
+        Q_SIZE=q_size,
+        KV_SIZE=kv_size,
+        BLOCK_SIZE=block_size,
+    )
+    return qr_out, kv_out
diff --git a/vllm/models/deepseek_v4/compressor.py b/vllm/models/deepseek_v4/compressor.py
new file mode 100644
index 000000000000..4428a7e8802d
--- /dev/null
+++ b/vllm/models/deepseek_v4/compressor.py
@@ -0,0 +1,434 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import dataclass
+from typing import Any, ClassVar, cast
+
+import torch
+from torch import nn
+
+from vllm.config import VllmConfig, get_current_vllm_config
+from vllm.forward_context import get_forward_context
+from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import MergedColumnParallelLinear
+from vllm.models.deepseek_v4.common.ops.fused_compress_quant_cache import (
+    _fused_kv_compress_norm_rope_insert_indexer_attn,
+    _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn,
+    _fused_kv_compress_norm_rope_insert_sparse_attn,
+)
+from vllm.models.deepseek_v4.common.ops.fused_indexer_q import MXFP4_BLOCK_SIZE
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    AttentionCGSupport,
+    AttentionMetadataBuilder,
+    CommonAttentionMetadata,
+    MultipleOf,
+)
+from vllm.v1.kv_cache_interface import (
+    KVCacheSpec,
+    MLAAttentionSpec,
+    SlidingWindowMLASpec,
+)
+
+
+class CompressorBackend(AttentionBackend):
+    def __init__(self):
+        super().__init__()
+
+    @staticmethod
+    def get_name() -> str:
+        return "CompressorBackend"
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [MultipleOf(1)]
+
+    @classmethod
+    def get_supported_head_sizes(cls) -> list[int]:
+        return [512, 1024]
+
+    @staticmethod
+    def get_builder_cls() -> type["CompressorMetadataBuilder"]:
+        return CompressorMetadataBuilder
+
+    @staticmethod
+    def get_kv_cache_shape(
+        num_blocks: int,
+        block_size: int,
+        num_kv_heads: int,
+        head_size: int,
+        cache_dtype_str: str = "auto",
+    ) -> tuple[int, ...]:
+        assert num_kv_heads == 1
+        return (num_blocks, block_size, head_size)
+
+    @staticmethod
+    def get_kv_cache_stride_order(
+        include_num_layers_dimension: bool = False,
+    ) -> tuple[int, ...]:
+        if include_num_layers_dimension:
+            return (0, 1, 2, 3)
+        return (0, 1, 2)
+
+
+@dataclass
+class CompressorMetadata:
+    block_table: torch.Tensor
+    slot_mapping: torch.Tensor
+    block_size: int
+
+    token_to_req_indices: torch.Tensor | None = None  # [num_tokens]
+
+
+class CompressorMetadataBuilder(AttentionMetadataBuilder):
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.ALWAYS
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert isinstance(self.kv_cache_spec, SlidingWindowMLASpec | MLAAttentionSpec)
+        mla_spec = cast(SlidingWindowMLASpec | MLAAttentionSpec, self.kv_cache_spec)
+        self.block_size = mla_spec.block_size
+
+        self.token_to_req_indices = torch.zeros(
+            self.vllm_config.scheduler_config.max_num_batched_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+    def build(
+        self,
+        common_prefix_len: int,
+        common_attn_metadata: CommonAttentionMetadata,
+        fast_build: bool = False,
+    ) -> CompressorMetadata:
+        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
+        num_reqs = common_attn_metadata.num_reqs
+        query_lens = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
+        x = torch.repeat_interleave(torch.arange(num_reqs), query_lens).pin_memory()
+        token_to_req_indices = self.token_to_req_indices[: x.shape[0]]
+        token_to_req_indices.copy_(x, non_blocking=True)
+        return CompressorMetadata(
+            block_table=common_attn_metadata.block_table_tensor.clamp_(min=0),
+            slot_mapping=common_attn_metadata.slot_mapping,
+            block_size=self.block_size,
+            token_to_req_indices=token_to_req_indices,
+        )
+
+
+class CompressorStateCache(torch.nn.Module, AttentionLayerBase):
+    def __init__(
+        self,
+        state_dim: int,
+        dtype: torch.dtype,
+        compress_ratio: int,
+        prefix: str,
+    ):
+        super().__init__()
+        self.state_dim = state_dim
+        self.dtype = dtype
+        self.prefix = prefix
+        self.kv_cache = torch.tensor([])
+        compilation_config = get_current_vllm_config().compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+        assert self.dtype == torch.float32
+        assert compress_ratio in [4, 128]
+        coff = 1 + (compress_ratio == 4)
+        self.sliding_window = coff * compress_ratio
+        # Block size is constrained by tensor sharing between compressor states
+        # and KV blocks. Since compressor states share the same physical tensor
+        # as KV blocks, they must use the same page size.
+        # The KV block shape [256//4, head_dim] = [64, 584] determines:
+        # - C4 compressor block shape [4, 2*512*2*4] -> block_size = 4
+        # - C128 compressor block shape [8, 512*2*4] -> block_size = 8
+        # TODO(yifan): make block size automatically determined and configurable.
+        if compress_ratio == 4:
+            self.block_size = 4
+        elif compress_ratio == 128:
+            self.block_size = 8
+        else:
+            raise ValueError(f"Invalid compress ratio: {compress_ratio}")
+
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
+        return SlidingWindowMLASpec(  # only has one vector instead of K + V
+            block_size=self.block_size,
+            num_kv_heads=1,
+            head_size=self.state_dim,
+            dtype=self.dtype,
+            sliding_window=self.sliding_window,
+            alignment=576,  # NOTE: FlashMLA requires 576B alignment
+        )
+
+    def forward(self): ...
+
+    def get_attn_backend(self) -> type[AttentionBackend]:
+        return CompressorBackend
+
+
+class DeepseekCompressor(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        compress_ratio: int,
+        hidden_size: int,
+        head_dim: int,
+        rotate: bool = False,
+        prefix: str = "",
+        k_cache_prefix="",
+        use_fp4_cache: bool = False,
+    ):
+        super().__init__()
+        self.compress_ratio = compress_ratio
+        self.hidden_size = hidden_size
+        self.head_dim = head_dim
+        self.rotate = rotate
+        self.prefix = prefix
+        self.k_cache_prefix = k_cache_prefix
+        self.use_fp4_cache = use_fp4_cache
+
+        config = vllm_config.model_config.hf_config
+        self.rope_head_dim = config.qk_rope_head_dim
+        self.nope_head_dim = self.head_dim - self.rope_head_dim
+        self.rms_norm_eps = config.rms_norm_eps
+        self.device = current_platform.device_type
+        self.max_num_reqs = vllm_config.scheduler_config.max_num_seqs
+        self.max_model_len = vllm_config.model_config.max_model_len
+
+        self.overlap = compress_ratio == 4
+        self.coff = 1 + self.overlap
+
+        state_dtype = torch.float32
+        self.ape = nn.Parameter(
+            torch.empty(
+                (compress_ratio, self.coff * self.head_dim),
+                dtype=state_dtype,
+                device=self.device,
+            ),
+            requires_grad=False,
+        )
+
+        self.fused_wkv_wgate = MergedColumnParallelLinear(
+            self.hidden_size,
+            [self.coff * self.head_dim, self.coff * self.head_dim],
+            bias=False,
+            return_bias=False,
+            quant_config=None,
+            disable_tp=True,
+            prefix=f"{prefix}.fused_wkv_wgate",
+        )
+        self.norm = RMSNorm(self.head_dim, self.rms_norm_eps)
+
+        self.state_cache = CompressorStateCache(
+            state_dim=2 * self.coff * self.head_dim,  # kv_state + score_state
+            dtype=state_dtype,
+            compress_ratio=compress_ratio,
+            prefix=f"{prefix}.state_cache",
+        )
+
+        # Save reference to static_forward_context for forward-time KV cache lookup.
+        # get_current_vllm_config() is only available during __init__, not forward.
+        self._static_forward_context = (
+            vllm_config.compilation_config.static_forward_context
+        )
+
+        if self.head_dim == 512:
+            assert not use_fp4_cache, (
+                "MXFP4 cache is only supported for indexer (head=128)"
+            )
+            self._fused_kernel = _fused_kv_compress_norm_rope_insert_sparse_attn
+            self._quant_block = 64
+            self._token_stride = self.nope_head_dim + self.rope_head_dim * 2
+            self._scale_dim = self.nope_head_dim // 64 + 1  # 7 real + 1 pad
+            self._num_warps = 4
+        elif self.head_dim == 128:
+            if use_fp4_cache:
+                self._fused_kernel = (
+                    _fused_kv_compress_norm_rope_insert_indexer_mxfp4_attn
+                )
+                self._quant_block = MXFP4_BLOCK_SIZE
+                self._token_stride = self.head_dim // 2
+                self._scale_dim = self.head_dim // MXFP4_BLOCK_SIZE
+            else:
+                self._fused_kernel = _fused_kv_compress_norm_rope_insert_indexer_attn
+                self._quant_block = 128
+                self._token_stride = self.head_dim
+                self._scale_dim = 4  # single float32 scale
+            self._num_warps = 1
+        else:
+            raise ValueError(
+                f"Unsupported head_dim for fused quant+cache: {self.head_dim}"
+            )
+
+    def forward(
+        self,
+        # [num_tokens, 2 * self.coff * self.head_dim]
+        kv_score: torch.Tensor,
+        # [num_tokens]
+        positions: torch.Tensor,
+        rotary_emb,
+    ) -> None:
+        # Each of shape [num_tokens, coff * self.head_dim]
+        # input bf16, output are fp32
+        kv, score = kv_score.split(
+            [self.coff * self.head_dim, self.coff * self.head_dim], dim=-1
+        )
+
+        # Get the metadata and handle dummy profiling run.
+        attn_metadata = get_forward_context().attn_metadata
+        if not isinstance(attn_metadata, dict):
+            return
+
+        state_metadata = cast(
+            CompressorMetadata, attn_metadata[self.state_cache.prefix]
+        )
+        token_to_req_indices = state_metadata.token_to_req_indices
+        slot_mapping = state_metadata.slot_mapping
+        num_actual = slot_mapping.shape[0]
+        block_table = state_metadata.block_table
+        block_size = state_metadata.block_size
+
+        # [num_blocks, block_size, kv_dim+score_dim], where kv_dim == score_dim
+        state_cache = self.state_cache.kv_cache
+        # kv_state stored in first half, score_state stored in second half
+        state_width = state_cache.shape[-1] // 2
+        pdl_kwargs = (
+            {}
+            if current_platform.is_rocm() or current_platform.is_xpu()
+            else {"launch_pdl": False}
+        )
+
+        # Store the KV and score (with fused APE addition) in the state.
+        # NOTE: PDL is disabled — both this kernel and _fused_kernel below
+        # depend on preceding kernel outputs (kv/score from the cublas GEMM;
+        # state_cache from this kernel) but neither emits/waits on PDL grid
+        # dependency primitives, so launch_pdl=True caused a read-after-write
+        # race and non-deterministic output.
+        _save_partial_states_kernel[(num_actual,)](
+            kv,
+            kv.stride(0),
+            score,
+            score.stride(0),
+            self.ape,
+            self.ape.stride(0),
+            positions,
+            state_cache,
+            state_cache.stride(0),
+            state_cache.stride(1),
+            slot_mapping,
+            block_size,
+            HEAD_SIZE=kv.shape[-1],
+            TRITON_BLOCK_SIZE=triton.next_power_of_2(kv.shape[-1]),
+            STATE_WIDTH=state_width,
+            COMPRESS_RATIO=self.compress_ratio,
+            **pdl_kwargs,
+        )
+
+        # Fused: compress → RMSNorm → RoPE → FP8 quant → KV cache write.
+        # RoPE requirements (kernel applies forward GPT-J style rotation):
+        # - is_neox_style=False (interleaved pairs, NOT split-half)
+        # - cos_sin_cache layout: [max_pos, rope_head_dim] with first half cos,
+        #   second half sin (per-pair, length rope_head_dim // 2 each)
+        # - applied to LAST rope_head_dim elements of head_dim
+        # - position used: (positions // compress_ratio) * compress_ratio
+        cos_sin_cache = rotary_emb.cos_sin_cache
+        k_cache_metadata = cast(Any, attn_metadata[self.k_cache_prefix])
+        kv_cache = self._static_forward_context[self.k_cache_prefix].kv_cache
+
+        self._fused_kernel[(num_actual,)](
+            # state cache
+            state_cache,
+            state_cache.stride(0),
+            state_cache.stride(1),
+            # metadata
+            token_to_req_indices,
+            positions,
+            slot_mapping,
+            block_table,
+            block_table.stride(0),
+            block_size,
+            # RMSNorm
+            self.norm.weight,
+            self.rms_norm_eps,
+            # RoPE
+            cos_sin_cache,
+            cos_sin_cache.stride(0),
+            # KV cache
+            kv_cache,
+            k_cache_metadata.slot_mapping,
+            kv_cache.shape[1],  # paged KV cache block size (tokens per block)
+            # constexprs
+            HEAD_SIZE=self.head_dim,
+            TRITON_BLOCK_SIZE=triton.next_power_of_2(self.head_dim),
+            STATE_WIDTH=state_width,
+            COMPRESS_RATIO=self.compress_ratio,
+            OVERLAP=self.overlap,
+            ROPE_HEAD_DIM=self.rope_head_dim,
+            FP8_MAX=448.0,
+            QUANT_BLOCK=self._quant_block,
+            TOKEN_STRIDE=self._token_stride,
+            SCALE_DIM=self._scale_dim,
+            KV_BLOCK_STRIDE=kv_cache.stride(0),
+            num_warps=self._num_warps,
+            **pdl_kwargs,
+        )
+
+
+@triton.jit
+def _save_partial_states_kernel(
+    kv_ptr,
+    kv_stride,
+    score_ptr,
+    score_stride,
+    ape_ptr,
+    ape_stride,
+    positions_ptr,
+    state_cache_ptr,
+    state_cache_stride0,
+    state_cache_stride1,
+    slot_mapping_ptr,
+    block_size,
+    HEAD_SIZE: tl.constexpr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+    # state_cache last dim packs [kv_state, score_state], each STATE_WIDTH wide.
+    STATE_WIDTH: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    slot_id = tl.load(slot_mapping_ptr + token_idx)
+
+    # Skip padded / invalid tokens (slot_id == -1 is the PAD sentinel used
+    # by vLLM).  During CUDA graph replay the batch may contain padding
+    # tokens whose slot_mapping is -1; writing to kv_state[-1] would be an
+    # illegal memory access.
+    if slot_id < 0:
+        return
+
+    block_idx = slot_id // block_size
+    pos_in_block = slot_id % block_size
+    base_ptr = (
+        state_cache_ptr
+        + block_idx * state_cache_stride0
+        + pos_in_block * state_cache_stride1
+    )
+
+    block = tl.arange(0, TRITON_BLOCK_SIZE)
+    mask = block < HEAD_SIZE
+
+    kv = tl.load(kv_ptr + token_idx * kv_stride + block, mask=mask)
+    tl.store(base_ptr + block, kv, mask=mask)
+
+    # Fused: score += ape[position % compress_ratio]
+    position = tl.load(positions_ptr + token_idx)
+    ape_row = position % COMPRESS_RATIO
+    ape = tl.load(ape_ptr + ape_row * ape_stride + block, mask=mask)
+    score = tl.load(score_ptr + token_idx * score_stride + block, mask=mask)
+    tl.store(
+        base_ptr + STATE_WIDTH + block,
+        score + ape,
+        mask=mask,
+    )
diff --git a/vllm/models/deepseek_v4/nvidia/__init__.py b/vllm/models/deepseek_v4/nvidia/__init__.py
new file mode 100644
index 000000000000..208f01a7cb5e
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
diff --git a/vllm/models/deepseek_v4/nvidia/flashmla.py b/vllm/models/deepseek_v4/nvidia/flashmla.py
new file mode 100644
index 000000000000..41630456561a
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/flashmla.py
@@ -0,0 +1,402 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import abstractmethod
+from typing import TYPE_CHECKING, ClassVar, cast
+
+import torch
+
+from vllm.forward_context import get_forward_context
+from vllm.models.deepseek_v4.common.ops import (
+    combine_topk_swa_indices,
+    compute_global_topk_indices_and_lens,
+    dequantize_and_gather_k_cache,
+)
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    MultipleOf,
+    SparseMLAAttentionImpl,
+)
+from vllm.v1.attention.backends.mla.flashmla_sparse import (
+    FlashMLASparseBackend,
+    FlashMLASparseMetadata,
+)
+from vllm.v1.attention.ops.flashmla import (
+    flash_mla_sparse_fwd,
+    flash_mla_with_kvcache,
+)
+from vllm.v1.worker.workspace import current_workspace_manager
+
+if TYPE_CHECKING:
+    from vllm.models.deepseek_v4.nvidia.ops.attention import (
+        DeepseekV4MLAAttention,
+    )
+    from vllm.v1.attention.backends.mla.sparse_swa import DeepseekSparseSWAMetadata
+
+
+class DeepseekV4SparseMLAAttentionImpl(SparseMLAAttentionImpl[FlashMLASparseMetadata]):
+    """Abstract parent for DeepseekV4 sparse MLA impls.
+
+    V4 sparse MLA is driven by the layer (``DeepseekV4MLAAttention.forward``)
+    rather than the v1 framework, so ``forward_mqa`` is overridden with a
+    classmethod that takes the layer as its first argument. This Liskov-broken
+    override is intentional: the grandparent's instance-method ``forward_mqa``
+    is never called on V4 layers.
+    """
+
+    backend_cls: ClassVar[type[AttentionBackend]]
+
+    # Prefill is processed in fixed-size chunks; this bounds the bf16 kv-gather
+    # workspace allocated in _forward_prefill and is also read by the V4 layer's
+    # dummy-run path to pre-reserve that workspace.
+    PREFILL_CHUNK_SIZE: ClassVar[int] = 4
+
+    @classmethod
+    @abstractmethod
+    def forward_mqa(  # type: ignore[override]
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        kv: torch.Tensor,
+        positions: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        raise NotImplementedError
+
+
+class DeepseekV4FlashMLASparseBackend(FlashMLASparseBackend):
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [256]
+
+    @staticmethod
+    def get_name() -> str:
+        return "V4_FLASHMLA_SPARSE"
+
+    @staticmethod
+    def get_impl_cls() -> type["DeepseekV4SparseMLAAttentionImpl"]:
+        return DeepseekV4FlashMLASparseImpl
+
+    @classmethod
+    def get_supported_head_sizes(cls) -> list[int]:
+        # DeepSeek V4 layout: 448 NoPE + 64 RoPE = 512 (overrides the
+        # V3.2 default of 576 from FlashMLASparseBackend).
+        return [512]
+
+    @staticmethod
+    def get_kv_cache_shape(
+        num_blocks: int,
+        block_size: int,
+        num_kv_heads: int,
+        head_size: int,
+        cache_dtype_str: str = "auto",
+    ) -> tuple[int, ...]:
+        if cache_dtype_str == "fp8_ds_mla":
+            # DeepseekV4 main MLA: 584B per token (448 NoPE + 128 RoPE + 8 fp8 scale).
+            # head_size passed in is the semantic head_dim (512).
+            return (num_blocks, block_size, 584)
+        else:
+            return (num_blocks, block_size, head_size)
+
+
+class DeepseekV4FlashMLASparseImpl(DeepseekV4SparseMLAAttentionImpl):
+    """FlashMLA sparse MLA implementation for DeepSeek V4's custom MLA layer."""
+
+    backend_cls = DeepseekV4FlashMLASparseBackend
+
+    @classmethod
+    def forward_mqa(  # type: ignore[override]
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        kv: torch.Tensor,
+        positions: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        assert output.shape == q.shape, (
+            f"output buffer shape {output.shape} must match q shape {q.shape}"
+        )
+        assert output.dtype == q.dtype, (
+            f"output buffer dtype {output.dtype} must match q dtype {q.dtype}"
+        )
+
+        # Get SWA and indexer metadata from forward context
+        forward_context = get_forward_context()
+        attn_metadata = forward_context.attn_metadata
+
+        if attn_metadata is None:
+            # Warmup dummy run: no real metadata. Reserve the same bf16
+            # gather workspace _forward_prefill would; the dequantize / topk
+            # / sparse_fwd kernels are skipped this step.
+            swa_only = layer.compress_ratio <= 1
+            N = (
+                0
+                if swa_only
+                else (layer.max_model_len + layer.compress_ratio - 1)
+                // layer.compress_ratio
+            )
+            M = N + layer.window_size + layer.max_num_batched_tokens
+            current_workspace_manager().get_simultaneous(
+                ((cls.PREFILL_CHUNK_SIZE, M, q.shape[-1]), torch.bfloat16),
+            )
+            output.zero_()
+            return
+
+        assert isinstance(attn_metadata, dict)
+        flashmla_metadata = cast(
+            FlashMLASparseMetadata | None, attn_metadata.get(layer.prefix)
+        )
+        swa_metadata = cast(
+            "DeepseekSparseSWAMetadata | None",
+            attn_metadata.get(layer.swa_cache_layer.prefix),
+        )
+        assert swa_metadata is not None
+
+        swa_only = layer.compress_ratio <= 1
+        # SWA-only layers (compress_ratio <= 1) don't have their own KV cache
+        # allocation, so layer.kv_cache may be empty after profiling cleanup.
+        self_kv_cache = layer.kv_cache if not swa_only else None
+        swa_kv_cache = layer.swa_cache_layer.kv_cache
+
+        # Split prefill and decode
+        num_decodes = swa_metadata.num_decodes
+        num_prefills = swa_metadata.num_prefills
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        if num_prefills > 0:
+            cls._forward_prefill(
+                layer=layer,
+                q=q[num_decode_tokens:],
+                positions=positions[num_decode_tokens:],
+                compressed_k_cache=self_kv_cache,
+                swa_k_cache=swa_kv_cache,
+                output=output[num_decode_tokens:],
+                attn_metadata=flashmla_metadata,
+                swa_metadata=swa_metadata,
+            )
+        if num_decodes > 0:
+            cls._forward_decode(
+                layer=layer,
+                q=q[:num_decode_tokens],
+                kv_cache=self_kv_cache,
+                swa_metadata=swa_metadata,
+                attn_metadata=flashmla_metadata,
+                swa_only=swa_only,
+                output=output[:num_decode_tokens],
+            )
+
+    @classmethod
+    def _forward_decode(
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        kv_cache: torch.Tensor | None,  # Only used when compress_ratio > 1
+        swa_metadata: "DeepseekSparseSWAMetadata",
+        attn_metadata: FlashMLASparseMetadata | None,
+        swa_only: bool,
+        output: torch.Tensor,
+    ) -> None:
+        num_decodes = swa_metadata.num_decodes
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        topk_indices = None
+        topk_lens = None
+        if not swa_only:
+            assert attn_metadata is not None
+            assert swa_metadata.is_valid_token is not None
+            block_size = attn_metadata.block_size // layer.compress_ratio
+            is_valid = swa_metadata.is_valid_token[:num_decode_tokens]
+            if layer.compress_ratio == 4:
+                # C4A: local indices differ per layer (filled by Indexer).
+                assert layer.topk_indices_buffer is not None
+                global_indices, topk_lens = compute_global_topk_indices_and_lens(
+                    layer.topk_indices_buffer[:num_decode_tokens],
+                    swa_metadata.token_to_req_indices,
+                    attn_metadata.block_table[:num_decodes],
+                    block_size,
+                    is_valid,
+                )
+                topk_indices = global_indices.view(num_decode_tokens, 1, -1)
+            else:
+                # C128A: pre-computed during metadata build.
+                topk_indices = attn_metadata.c128a_global_decode_topk_indices
+                topk_lens = attn_metadata.c128a_decode_topk_lens
+
+        swa_indices = swa_metadata.decode_swa_indices
+        swa_lens = swa_metadata.decode_swa_lens
+
+        # We treat queries in the same seq as different queries
+        # and later we only attend by generated indices.
+        # q arrives pre-padded to layer.padded_heads by the outer wrapper.
+        q = q.unsqueeze(1)
+
+        # Prepare SWA cache (num_blocks, swa_block_size, 1, head_bytes)
+        # Use unsqueeze to preserve strides (handles padded blocks correctly)
+        swa_cache = layer.swa_cache_layer.kv_cache.unsqueeze(-2)
+        # Reshape KV cache to (num_blocks, block_size, 1, head_bytes)
+        if kv_cache is not None:
+            kv_cache = kv_cache.unsqueeze(-2)
+
+        # One FlashMLASchedMeta per layer type, shared across all same-type
+        # layers within this decode step. The first forward call per type
+        # triggers the in-kernel planner (allocating tile_scheduler_metadata
+        # and num_splits via PyTorch's graph-aware allocator so CUDA graph
+        # capture reuses the same addresses on replay); subsequent same-type
+        # layers see have_initialized=True and skip the planner.
+        if layer.compress_ratio <= 1:
+            tile_metadata = swa_metadata.tile_sched_swaonly
+        elif layer.compress_ratio == 4:
+            tile_metadata = swa_metadata.tile_sched_c4a
+        elif layer.compress_ratio == 128:
+            tile_metadata = swa_metadata.tile_sched_c128a
+        else:
+            raise ValueError(
+                f"Unsupported compress_ratio={layer.compress_ratio}; "
+                "expected 1, 4, or 128."
+            )
+        assert tile_metadata is not None, (
+            "swa_metadata missing tile_sched entry for "
+            f"compress_ratio={layer.compress_ratio}; "
+            "DeepseekSparseSWAMetadataBuilder.build_tile_scheduler did not "
+            "allocate one for this layer type."
+        )
+
+        out, _ = flash_mla_with_kvcache(
+            q=q,
+            k_cache=swa_cache,
+            block_table=None,
+            head_dim_v=512,
+            tile_scheduler_metadata=tile_metadata,
+            cache_seqlens=None,
+            is_fp8_kvcache=True,
+            indices=swa_indices,
+            topk_length=swa_lens,
+            softmax_scale=layer.scale,
+            attn_sink=layer.attn_sink,
+            extra_k_cache=kv_cache if not swa_only else None,
+            extra_indices_in_kvcache=topk_indices,
+            extra_topk_length=topk_lens,
+            out=output.unsqueeze(1),
+        )
+
+    @classmethod
+    def _forward_prefill(
+        cls,
+        layer: "DeepseekV4MLAAttention",
+        q: torch.Tensor,
+        positions: torch.Tensor,
+        compressed_k_cache: torch.Tensor | None,  # Only used when compress_ratio > 1
+        swa_k_cache: torch.Tensor,
+        output: torch.Tensor,
+        attn_metadata: FlashMLASparseMetadata | None,
+        swa_metadata: "DeepseekSparseSWAMetadata",
+    ) -> None:
+        swa_only = attn_metadata is None
+
+        num_prefills = swa_metadata.num_prefills
+        num_prefill_tokens = swa_metadata.num_prefill_tokens
+        num_decodes = swa_metadata.num_decodes
+        num_decode_tokens = swa_metadata.num_decode_tokens
+
+        # Use pre-computed prefill metadata.
+        seq_lens = swa_metadata.prefill_seq_lens
+        gather_lens = swa_metadata.prefill_gather_lens
+        assert seq_lens is not None
+        assert gather_lens is not None
+
+        # Derive prefill-local token offsets from the full query_start_loc_cpu.
+        query_start_loc_cpu = swa_metadata.query_start_loc_cpu
+        query_start_loc = swa_metadata.query_start_loc
+        assert query_start_loc_cpu is not None
+        assert query_start_loc is not None
+        prefill_token_base = query_start_loc_cpu[num_decodes]
+
+        if not swa_only:
+            if layer.compress_ratio == 4:
+                assert layer.topk_indices_buffer is not None
+                topk_indices = layer.topk_indices_buffer[num_decode_tokens:]
+                topk_indices = topk_indices[:num_prefill_tokens]
+            else:
+                # C128A: pre-computed during metadata build.
+                assert attn_metadata is not None
+                topk_indices = attn_metadata.c128a_prefill_topk_indices
+            top_k = topk_indices.shape[-1]
+            # Compressed region must fit the full compressed pool (seq_len //
+            # compress_ratio), not just top_k. top_k bounds how many indices
+            # the indexer selects, not the pool size it indexes into.
+            N = (layer.max_model_len + layer.compress_ratio - 1) // layer.compress_ratio
+        else:
+            # NOTE(woosuk): topk_indices will not be used for SWA-only layers.
+            assert layer.topk_indices_buffer is not None
+            topk_indices = layer.topk_indices_buffer[num_decode_tokens:]
+            top_k = 0
+            N = 0
+
+        M = N + layer.window_size + layer.max_num_batched_tokens
+        chunk_size_const = cls.PREFILL_CHUNK_SIZE
+        num_chunks = (num_prefills + chunk_size_const - 1) // chunk_size_const
+
+        workspace_manager = current_workspace_manager()
+        kv = workspace_manager.get_simultaneous(
+            ((chunk_size_const, M, q.shape[-1]), torch.bfloat16),
+        )[0]
+        for chunk_idx in range(num_chunks):
+            chunk_start = chunk_idx * chunk_size_const
+            chunk_end = min(chunk_start + chunk_size_const, num_prefills)
+            chunk_size = chunk_end - chunk_start
+            if not swa_only:
+                # Gather compressed KV
+                assert attn_metadata is not None
+                block_table = attn_metadata.block_table[num_decodes:]
+                dequantize_and_gather_k_cache(
+                    kv[:chunk_size],
+                    compressed_k_cache,
+                    seq_lens=seq_lens[chunk_start:chunk_end] // layer.compress_ratio,
+                    gather_lens=None,
+                    block_table=block_table[chunk_start:chunk_end],
+                    block_size=attn_metadata.block_size // layer.compress_ratio,
+                    offset=0,
+                )
+
+            # Gather SWA KV
+            swa_block_table = swa_metadata.block_table[num_decodes:]
+            dequantize_and_gather_k_cache(
+                kv[:chunk_size],
+                swa_k_cache,
+                seq_lens=seq_lens[chunk_start:chunk_end],
+                gather_lens=gather_lens[chunk_start:chunk_end],
+                block_table=swa_block_table[chunk_start:chunk_end],
+                block_size=swa_metadata.block_size,
+                offset=N,
+            )
+
+            # Combine the topk indices and SWA indices for gathered KV cache
+            query_start = (
+                query_start_loc_cpu[num_decodes + chunk_start] - prefill_token_base
+            )
+            query_end = (
+                query_start_loc_cpu[num_decodes + chunk_end] - prefill_token_base
+            )
+
+            combined_indices, combined_lens = combine_topk_swa_indices(
+                topk_indices[query_start:query_end],
+                query_start_loc[
+                    num_decodes + chunk_start : num_decodes + chunk_end + 1
+                ],
+                seq_lens[chunk_start:chunk_end],
+                gather_lens[chunk_start:chunk_end],
+                layer.window_size,
+                layer.compress_ratio,
+                top_k,
+                M,
+                N,
+            )
+            flash_mla_sparse_fwd(
+                q=q[query_start:query_end],
+                kv=kv.view(-1, 1, q.shape[-1]),
+                indices=combined_indices.unsqueeze(1),
+                sm_scale=layer.scale,
+                attn_sink=layer.attn_sink,
+                topk_length=combined_lens,
+                out=output[query_start:query_end],
+            )
diff --git a/vllm/models/deepseek_v4/nvidia/model.py b/vllm/models/deepseek_v4/nvidia/model.py
new file mode 100644
index 000000000000..6c4f058cfb1e
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/model.py
@@ -0,0 +1,1638 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import typing
+from collections.abc import Callable, Iterable
+from itertools import islice
+
+import regex as re
+import torch
+import torch.nn as nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_ep_group,
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.forward_context import get_forward_context
+from vllm.model_executor.layers.activation import SiluAndMul, SiluAndMulWithClamp
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe.router.fused_topk_bias_router import (
+    fused_topk_bias,
+)
+from vllm.model_executor.layers.fused_moe.router.gate_linear import GateLinear
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    MergedColumnParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.mhc import (
+    HCHeadOp,
+    MHCFusedPostPreOp,
+    MHCPostOp,
+    MHCPreOp,
+)
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.interfaces import SupportsPP
+from vllm.model_executor.models.utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    WeightsMapper,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_layers,
+    maybe_prefix,
+)
+from vllm.model_executor.utils import set_weight_attrs
+from vllm.models.deepseek_v4.nvidia.ops.attention import (
+    DeepseekV4Indexer,
+    DeepseekV4MLAModules,
+    DeepseekV4MultiHeadLatentAttentionWrapper,
+)
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import direct_register_custom_op
+
+
+class DeepseekV4MLP(nn.Module):
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        swiglu_limit: float | None = None,
+        quant_config: QuantizationConfig | None = None,
+        reduce_results: bool = True,
+        is_sequence_parallel: bool = False,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+
+        # If is_sequence_parallel, the input and output tensors are sharded
+        # across the ranks within the tp_group. In this case the weights are
+        # replicated and no collective ops are needed.
+        # Otherwise we use standard TP with an allreduce at the end.
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size,
+            [intermediate_size] * 2,
+            bias=False,
+            quant_config=quant_config,
+            disable_tp=is_sequence_parallel,
+            prefix=f"{prefix}.gate_up_proj",
+        )
+        self.down_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            reduce_results=reduce_results,
+            disable_tp=is_sequence_parallel,
+            prefix=f"{prefix}.down_proj",
+        )
+        if hidden_act != "silu":
+            raise ValueError(
+                f"Unsupported activation: {hidden_act}. Only silu is supported for now."
+            )
+        if swiglu_limit is not None:
+            self.act_fn = SiluAndMulWithClamp(swiglu_limit)
+        else:
+            self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+@triton.jit
+def _deepseek_v4_stage_mega_moe_inputs_kernel(
+    hidden_states,
+    x_fp8,
+    x_sf,
+    topk_ids,
+    topk_weights,
+    topk_idx_out,
+    topk_weights_out,
+    hidden_stride_m: tl.constexpr,
+    hidden_stride_k: tl.constexpr,
+    x_stride_m: tl.constexpr,
+    x_stride_k: tl.constexpr,
+    x_sf_stride_m: tl.constexpr,
+    x_sf_stride_k: tl.constexpr,
+    topk_ids_stride_m: tl.constexpr,
+    topk_ids_stride_k: tl.constexpr,
+    topk_weights_stride_m: tl.constexpr,
+    topk_weights_stride_k: tl.constexpr,
+    topk_idx_stride_m: tl.constexpr,
+    topk_idx_stride_k: tl.constexpr,
+    topk_weights_out_stride_m: tl.constexpr,
+    topk_weights_out_stride_k: tl.constexpr,
+    hidden_size: tl.constexpr,
+    top_k: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+    GROUP_K: tl.constexpr,
+    BLOCK_TOPK: tl.constexpr,
+) -> None:
+    token_id = tl.program_id(0)
+    k_block_id = tl.program_id(1)
+
+    k_offsets = k_block_id * BLOCK_K + tl.arange(0, BLOCK_K)
+    k_mask = k_offsets < hidden_size
+    hidden = tl.load(
+        hidden_states + token_id * hidden_stride_m + k_offsets * hidden_stride_k,
+        mask=k_mask,
+        other=0.0,
+    ).to(tl.float32)
+
+    num_groups: tl.constexpr = BLOCK_K // GROUP_K
+    hidden_groups = tl.reshape(tl.abs(hidden), [num_groups, GROUP_K])
+    amax = tl.max(hidden_groups, axis=1)
+    amax = tl.maximum(amax, 1.0e-4)
+
+    scale = amax / 448.0
+    scale_bits = scale.to(tl.uint32, bitcast=True)
+    scale_exp = ((scale_bits >> 23) & 0xFF) + ((scale_bits & 0x7FFFFF) != 0).to(
+        tl.uint32
+    )
+    scale_exp = tl.minimum(tl.maximum(scale_exp, 1), 254)
+    rounded_scale = (scale_exp << 23).to(tl.float32, bitcast=True)
+
+    hidden_groups = tl.reshape(hidden, [num_groups, GROUP_K])
+    scaled = hidden_groups * (1.0 / rounded_scale)[:, None]
+    scaled = tl.reshape(scaled, [BLOCK_K])
+    fp8 = scaled.to(tl.float8e4nv)
+    tl.store(
+        x_fp8 + token_id * x_stride_m + k_offsets * x_stride_k,
+        fp8,
+        mask=k_mask,
+    )
+
+    scale_offsets = tl.arange(0, num_groups)
+    packed_scale = tl.sum(scale_exp << (scale_offsets * 8), axis=0).to(tl.int32)
+    tl.store(
+        x_sf + token_id * x_sf_stride_m + k_block_id * x_sf_stride_k,
+        packed_scale,
+    )
+
+    if k_block_id == 0:
+        topk_offsets = tl.arange(0, BLOCK_TOPK)
+        topk_mask = topk_offsets < top_k
+
+        ids = tl.load(
+            topk_ids + token_id * topk_ids_stride_m + topk_offsets * topk_ids_stride_k,
+            mask=topk_mask,
+            other=0,
+        ).to(tl.int64)
+        tl.store(
+            topk_idx_out
+            + token_id * topk_idx_stride_m
+            + topk_offsets * topk_idx_stride_k,
+            ids,
+            mask=topk_mask,
+        )
+
+        weights = tl.load(
+            topk_weights
+            + token_id * topk_weights_stride_m
+            + topk_offsets * topk_weights_stride_k,
+            mask=topk_mask,
+            other=0.0,
+        )
+        tl.store(
+            topk_weights_out
+            + token_id * topk_weights_out_stride_m
+            + topk_offsets * topk_weights_out_stride_k,
+            weights,
+            mask=topk_mask,
+        )
+
+
+def _stage_deepseek_v4_mega_moe_inputs(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    x_fp8: torch.Tensor,
+    x_sf: torch.Tensor,
+    topk_idx_out: torch.Tensor,
+    topk_weights_out: torch.Tensor,
+) -> None:
+    num_tokens, hidden_size = hidden_states.shape
+    if num_tokens == 0:
+        return
+    if hidden_size % 128 != 0:
+        raise ValueError(
+            "DeepSeek V4 MegaMoE input staging requires hidden_size to be "
+            "a multiple of 128."
+        )
+    top_k = topk_ids.shape[1]
+    if topk_weights.shape != topk_ids.shape:
+        raise ValueError(
+            "DeepSeek V4 MegaMoE input staging requires topk_weights and "
+            "topk_ids to have the same shape."
+        )
+
+    block_k = 128
+    grid = (num_tokens, triton.cdiv(hidden_size, block_k))
+    block_topk = triton.next_power_of_2(top_k)
+    _deepseek_v4_stage_mega_moe_inputs_kernel[grid](
+        hidden_states,
+        x_fp8,
+        x_sf,
+        topk_ids,
+        topk_weights,
+        topk_idx_out,
+        topk_weights_out,
+        hidden_states.stride(0),
+        hidden_states.stride(1),
+        x_fp8.stride(0),
+        x_fp8.stride(1),
+        x_sf.stride(0),
+        x_sf.stride(1),
+        topk_ids.stride(0),
+        topk_ids.stride(1),
+        topk_weights.stride(0),
+        topk_weights.stride(1),
+        topk_idx_out.stride(0),
+        topk_idx_out.stride(1),
+        topk_weights_out.stride(0),
+        topk_weights_out.stride(1),
+        hidden_size,
+        top_k,
+        BLOCK_K=block_k,
+        GROUP_K=32,
+        BLOCK_TOPK=block_topk,
+        num_warps=4,
+    )
+
+
+def make_deepseek_v4_expert_params_mapping(
+    num_experts: int,
+) -> list[tuple[str, str, int, str]]:
+    return [
+        (
+            "experts.w13_" if shard_id in ("w1", "w3") else "experts.w2_",
+            f"experts.{expert_id}.{weight_name}.",
+            expert_id,
+            shard_id,
+        )
+        for expert_id in range(num_experts)
+        for shard_id, weight_name in [
+            ("w1", "w1"),
+            ("w2", "w2"),
+            ("w3", "w3"),
+        ]
+    ]
+
+
+class DeepseekV4MegaMoEExperts(nn.Module):
+    _symm_buffer_cache: dict[tuple[int, int, int, int, int, int, int], object] = {}
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        *,
+        num_experts: int,
+        num_local_experts: int,
+        experts_start_idx: int,
+        top_k: int,
+        hidden_size: int,
+        intermediate_size: int,
+        prefix: str = "",
+    ):
+        super().__init__()
+        self.prefix = prefix
+        self.num_experts = num_experts
+        self.num_local_experts = num_local_experts
+        self.experts_start_idx = experts_start_idx
+        self.experts_end_idx = experts_start_idx + num_local_experts
+        self.top_k = top_k
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+
+        weight_attrs = {"weight_loader": self.weight_loader}
+        self.w13_weight = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                2 * intermediate_size,
+                hidden_size // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w13_weight, weight_attrs)
+
+        self.w13_weight_scale = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                2 * intermediate_size,
+                hidden_size // 32,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w13_weight_scale, weight_attrs)
+        self.w13_weight_scale.quant_method = "block"
+
+        self.w2_weight = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                hidden_size,
+                intermediate_size // 2,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w2_weight, weight_attrs)
+
+        self.w2_weight_scale = nn.Parameter(
+            torch.zeros(
+                num_local_experts,
+                hidden_size,
+                intermediate_size // 32,
+                dtype=torch.uint8,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(self.w2_weight_scale, weight_attrs)
+        self.w2_weight_scale.quant_method = "block"
+
+        self._transformed_l1_weights: tuple[torch.Tensor, torch.Tensor] | None = None
+        self._transformed_l2_weights: tuple[torch.Tensor, torch.Tensor] | None = None
+
+        # Register in the static forward context so the custom-op wrapper
+        # can look up this module by name from within a torch.compile graph.
+        compilation_config = vllm_config.compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+    def _map_global_expert_id(self, expert_id: int) -> int:
+        if expert_id < self.experts_start_idx or expert_id >= self.experts_end_idx:
+            return -1
+        return expert_id - self.experts_start_idx
+
+    def weight_loader(
+        self,
+        param: nn.Parameter,
+        loaded_weight: torch.Tensor,
+        weight_name: str,
+        shard_id: str,
+        expert_id: int,
+        return_success: bool = False,
+    ) -> bool | None:
+        local_expert_id = self._map_global_expert_id(expert_id)
+        if local_expert_id == -1:
+            return False if return_success else None
+
+        expert_data = param.data[local_expert_id]
+        if shard_id in ("w1", "w3"):
+            if "w13_" not in weight_name:
+                return False if return_success else None
+            shard_offset = 0 if shard_id == "w1" else self.intermediate_size
+            expert_data = expert_data.narrow(0, shard_offset, self.intermediate_size)
+        elif shard_id == "w2":
+            if "w2_" not in weight_name:
+                return False if return_success else None
+        else:
+            raise ValueError(f"Unsupported expert shard id: {shard_id}")
+
+        if expert_data.shape != loaded_weight.shape:
+            raise ValueError(
+                f"DeepSeek V4 MegaMoE expert weight shape mismatch for "
+                f"{weight_name}: parameter shard {tuple(expert_data.shape)} "
+                f"vs checkpoint {tuple(loaded_weight.shape)}"
+            )
+        expert_data.copy_(loaded_weight)
+        return True if return_success else None
+
+    @staticmethod
+    def _ue8m0_uint8_to_float(sf: torch.Tensor) -> torch.Tensor:
+        return (sf.to(torch.int32) << 23).view(torch.float32)
+
+    def _check_runtime_supported(self) -> None:
+        if not torch.cuda.is_available():
+            raise NotImplementedError("DeepSeek V4 MegaMoE requires CUDA.")
+        device = self.w13_weight.device
+        if device.type != "cuda":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE expert weights must be loaded on CUDA."
+            )
+        if torch.cuda.get_device_capability(device)[0] != 10:
+            raise NotImplementedError("DeepGEMM MegaMoE requires SM100 GPUs.")
+        if self.hidden_size % 128 != 0 or self.intermediate_size % 128 != 0:
+            raise ValueError(
+                "DeepGEMM MegaMoE requires hidden and intermediate sizes "
+                "to be multiples of 128."
+            )
+
+    def finalize_weights(self) -> None:
+        if self._transformed_l1_weights is not None:
+            return
+
+        self._check_runtime_supported()
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        w13_scale = deep_gemm.transform_sf_into_required_layout(
+            self._ue8m0_uint8_to_float(self.w13_weight_scale.data).contiguous(),
+            2 * self.intermediate_size,
+            self.hidden_size,
+            (1, 32),
+            self.num_local_experts,
+        )
+        w2_scale = deep_gemm.transform_sf_into_required_layout(
+            self._ue8m0_uint8_to_float(self.w2_weight_scale.data).contiguous(),
+            self.hidden_size,
+            self.intermediate_size,
+            (1, 32),
+            self.num_local_experts,
+        )
+        self._transformed_l1_weights, self._transformed_l2_weights = (
+            deep_gemm.transform_weights_for_mega_moe(
+                (self.w13_weight.data.view(torch.int8).contiguous(), w13_scale),
+                (self.w2_weight.data.view(torch.int8).contiguous(), w2_scale),
+            )
+        )
+        # Drop the original loader-side parameters: the MegaMoE kernels only
+        # consume the transformed views above. transform_weights_for_mega_moe
+        # allocates a fresh tensor for the L1 weight (see _interleave_l1_weights)
+        # and fresh SF tensors for L1/L2; the L2 weight is the only tensor that
+        # aliases the original storage, and _transformed_l2_weights still holds
+        # it, so the storage stays live after we drop the Parameter.
+        self.w13_weight = None
+        self.w13_weight_scale = None
+        self.w2_weight = None
+        self.w2_weight_scale = None
+
+    def get_symm_buffer(self):
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        group = get_ep_group().device_group
+        device = torch.accelerator.current_device_index()
+        key = (
+            id(group),
+            device,
+            self.num_experts,
+            self.max_num_tokens,
+            self.top_k,
+            self.hidden_size,
+            self.intermediate_size,
+        )
+        symm_buffer = self._symm_buffer_cache.get(key)
+        if symm_buffer is None:
+            symm_buffer = deep_gemm.get_symm_buffer_for_mega_moe(
+                group,
+                self.num_experts,
+                self.max_num_tokens,
+                self.top_k,
+                self.hidden_size,
+                self.intermediate_size,
+            )
+            self._symm_buffer_cache[key] = symm_buffer
+        return symm_buffer
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        *,
+        activation_clamp: float | None,
+        fast_math: bool = True,
+    ) -> torch.Tensor:
+        if hidden_states.shape[0] > self.max_num_tokens:
+            raise ValueError(
+                f"DeepSeek V4 MegaMoE got {hidden_states.shape[0]} tokens, "
+                f"but the symmetric buffer was sized for {self.max_num_tokens}."
+            )
+        y = torch.empty_like(hidden_states, dtype=torch.bfloat16)
+        torch.ops.vllm.deepseek_v4_mega_moe_experts(
+            hidden_states,
+            topk_weights,
+            topk_ids,
+            y,
+            self.prefix,
+            activation_clamp,
+            fast_math,
+        )
+        return y
+
+    def _run_mega_moe(
+        self,
+        hidden_states: torch.Tensor,
+        topk_weights: torch.Tensor,
+        topk_ids: torch.Tensor,
+        y: torch.Tensor,
+        activation_clamp: float | None,
+        fast_math: bool,
+    ) -> None:
+        import vllm.third_party.deep_gemm as deep_gemm
+
+        symm_buffer = self.get_symm_buffer()
+        num_tokens = hidden_states.shape[0]
+        _stage_deepseek_v4_mega_moe_inputs(
+            hidden_states,
+            topk_weights,
+            topk_ids,
+            symm_buffer.x[:num_tokens],
+            symm_buffer.x_sf[:num_tokens],
+            symm_buffer.topk_idx[:num_tokens],
+            symm_buffer.topk_weights[:num_tokens],
+        )
+
+        # This method must have been already called during the weight loading phase.
+        # We call it again here to cover the dummy weight loading case.
+        self.finalize_weights()
+
+        assert self._transformed_l1_weights is not None
+        assert self._transformed_l2_weights is not None
+        deep_gemm.fp8_fp4_mega_moe(
+            y,
+            self._transformed_l1_weights,
+            self._transformed_l2_weights,
+            symm_buffer,
+            activation_clamp=activation_clamp,
+            fast_math=fast_math,
+        )
+
+
+DeepseekV4MegaMoEExperts.weight_loader.supports_moe_loading = True  # type: ignore[attr-defined]
+
+
+def _deepseek_v4_mega_moe_experts_op(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+    activation_clamp: float | None,
+    fast_math: bool,
+) -> None:
+    self = get_forward_context().no_compile_layers[layer_name]
+    self._run_mega_moe(
+        hidden_states,
+        topk_weights,
+        topk_ids,
+        out,
+        activation_clamp,
+        fast_math,
+    )
+
+
+def _deepseek_v4_mega_moe_experts_op_fake(
+    hidden_states: torch.Tensor,
+    topk_weights: torch.Tensor,
+    topk_ids: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+    activation_clamp: float | None,
+    fast_math: bool,
+) -> None:
+    return None
+
+
+direct_register_custom_op(
+    op_name="deepseek_v4_mega_moe_experts",
+    op_func=_deepseek_v4_mega_moe_experts_op,
+    mutates_args=["out"],
+    fake_impl=_deepseek_v4_mega_moe_experts_op_fake,
+)
+
+
+class DeepseekV4MoE(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+    ):
+        super().__init__()
+
+        self.tp_size = get_tensor_model_parallel_world_size()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.prefix = prefix
+        self.use_mega_moe = (
+            vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
+        )
+        if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently requires expert parallel. "
+                "Enable it with --enable-expert-parallel, or pick a different "
+                "moe backend."
+            )
+
+        self.routed_scaling_factor = getattr(config, "routed_scaling_factor", 1.0)
+        self.hidden_size = config.hidden_size
+
+        self.n_routed_experts = config.n_routed_experts
+        self.n_activated_experts = config.num_experts_per_tok
+        self.moe_intermediate_size = config.moe_intermediate_size
+        self.swiglu_limit = config.swiglu_limit
+        self.renormalize = config.norm_topk_prob
+        self.scoring_func = getattr(config, "scoring_func", "sqrtsoftplus")
+        if self.use_mega_moe and self.scoring_func != "sqrtsoftplus":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently supports sqrtsoftplus routing only."
+            )
+        if self.use_mega_moe and getattr(config, "expert_dtype", "fp4") != "fp4":
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE only supports fp4 experts; got expert_dtype="
+                f"{config.expert_dtype!r}. Drop --kernel-config moe_backend="
+                "deep_gemm_mega_moe for this checkpoint."
+            )
+
+        self.gate = GateLinear(
+            input_size=config.hidden_size,
+            output_size=config.n_routed_experts,
+            bias=False,
+            out_dtype=torch.float32,
+            prefix=f"{prefix}.gate",
+        )
+
+        self.gate.e_score_correction_bias = None
+        self.gate.tid2eid = None
+        is_hash_moe = extract_layer_index(prefix) < config.num_hash_layers
+        self.hash_indices_dtype = torch.int64 if self.use_mega_moe else torch.int32
+        if is_hash_moe:
+            # hash MoE doesn't use e_score_correction_bias
+            # Use randint instead of empty to avoid garbage values causing
+            # invalid memory access in dummy mode (--load-format="dummy")
+            self.gate.tid2eid = nn.Parameter(
+                torch.randint(
+                    0,
+                    config.n_routed_experts,
+                    (config.vocab_size, config.num_experts_per_tok),
+                    dtype=self.hash_indices_dtype,
+                ),
+                requires_grad=False,
+            )
+        elif getattr(config, "topk_method", None) == "noaux_tc":
+            self.gate.e_score_correction_bias = nn.Parameter(
+                torch.empty(config.n_routed_experts, dtype=torch.float32),
+                requires_grad=False,
+            )
+
+        if config.n_shared_experts is None:
+            self.shared_experts = None
+        else:
+            intermediate_size = config.moe_intermediate_size * config.n_shared_experts
+
+            self.shared_experts = DeepseekV4MLP(
+                hidden_size=config.hidden_size,
+                intermediate_size=intermediate_size,
+                hidden_act=config.hidden_act,
+                swiglu_limit=self.swiglu_limit,
+                quant_config=quant_config,
+                reduce_results=self.use_mega_moe,
+                prefix=f"{prefix}.shared_experts",
+            )
+
+        if self.use_mega_moe:
+            self._init_mega_moe_experts(vllm_config, config, prefix)
+        else:
+            self._init_fused_moe_experts(config, quant_config, prefix)
+
+    def _init_mega_moe_experts(
+        self,
+        vllm_config: VllmConfig,
+        config,
+        prefix: str,
+    ) -> None:
+        self.ep_group = get_ep_group()
+        self.ep_size = self.ep_group.world_size
+        self.ep_rank = self.ep_group.rank_in_group
+        assert config.n_routed_experts % self.ep_size == 0
+
+        self.n_local_experts = config.n_routed_experts // self.ep_size
+        self.experts_start_idx = self.ep_rank * self.n_local_experts
+        self.experts_end_idx = self.experts_start_idx + self.n_local_experts
+
+        self.experts = DeepseekV4MegaMoEExperts(
+            vllm_config,
+            num_experts=config.n_routed_experts,
+            num_local_experts=self.n_local_experts,
+            experts_start_idx=self.experts_start_idx,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            prefix=f"{prefix}.experts",
+        )
+
+    def _init_fused_moe_experts(
+        self,
+        config,
+        quant_config,
+        prefix: str,
+    ) -> None:
+        self.tp_rank = get_tensor_model_parallel_rank()
+        assert config.n_routed_experts % self.tp_size == 0
+
+        self.n_local_experts = config.n_routed_experts // self.tp_size
+        self.experts_start_idx = self.tp_rank * self.n_local_experts
+        self.experts_end_idx = self.experts_start_idx + self.n_local_experts
+
+        self.experts = FusedMoE(
+            shared_experts=self.shared_experts,
+            gate=self.gate,
+            num_experts=config.n_routed_experts,
+            top_k=config.num_experts_per_tok,
+            hidden_size=config.hidden_size,
+            intermediate_size=config.moe_intermediate_size,
+            renormalize=config.norm_topk_prob,
+            quant_config=quant_config,
+            prefix=f"{prefix}.experts",
+            scoring_func=self.scoring_func,
+            routed_scaling_factor=self.routed_scaling_factor,
+            e_score_correction_bias=self.gate.e_score_correction_bias,
+            hash_indices_table=self.gate.tid2eid,
+            swiglu_limit=self.swiglu_limit,
+            router_logits_dtype=torch.float32,
+        )
+
+    def forward(
+        self, hidden_states: torch.Tensor, input_ids: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        if self.gate.tid2eid is not None and input_ids is None:
+            raise ValueError("DeepSeek V4 hash MoE routing requires input_ids.")
+
+        if not self.use_mega_moe:
+            return self._forward_fused_moe(hidden_states, input_ids)
+
+        org_shape = hidden_states.shape
+        router_logits, _ = self.gate(hidden_states)
+        topk_weights, topk_ids = fused_topk_bias(
+            hidden_states=hidden_states,
+            gating_output=router_logits,
+            scoring_func=self.scoring_func,
+            e_score_correction_bias=self.gate.e_score_correction_bias.data
+            if self.gate.e_score_correction_bias is not None
+            else None,
+            topk=self.n_activated_experts,
+            renormalize=self.renormalize,
+            indices_type=self.hash_indices_dtype,
+            input_tokens=input_ids,
+            hash_indices_table=self.gate.tid2eid,
+            routed_scaling_factor=self.routed_scaling_factor,
+        )
+        activation_clamp = (
+            float(self.swiglu_limit) if self.swiglu_limit is not None else None
+        )
+        final_hidden_states = self.experts(
+            hidden_states,
+            topk_weights,
+            topk_ids,
+            activation_clamp=activation_clamp,
+        )
+
+        if self.shared_experts is not None:
+            shared_output = self.shared_experts(hidden_states)
+            final_hidden_states += shared_output
+
+        return final_hidden_states.view(org_shape)
+
+    def _forward_fused_moe(
+        self, hidden_states: torch.Tensor, input_ids: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        org_shape = hidden_states.shape
+        if self.experts.is_internal_router:
+            # In this case, the gate/router runs inside the FusedMoE class
+            final_hidden_states = self.experts(
+                hidden_states=hidden_states,
+                router_logits=hidden_states,
+                input_ids=input_ids,
+            )
+        else:
+            router_logits, _ = self.gate(hidden_states)
+            final_hidden_states = self.experts(
+                hidden_states=hidden_states,
+                router_logits=router_logits,
+                input_ids=input_ids,
+            )
+
+        return final_hidden_states.view(org_shape)
+
+    def finalize_mega_moe_weights(self) -> None:
+        if self.use_mega_moe:
+            self.experts.finalize_weights()
+
+
+class DeepseekV4Attention(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        prefix: str,
+        topk_indices_buffer: torch.Tensor | None = None,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        layer_id = extract_layer_index(prefix)
+
+        self.layer_id = layer_id
+        self.hidden_size = config.hidden_size
+        self.n_heads = config.num_attention_heads
+        tp_size = get_tensor_model_parallel_world_size()
+        assert self.n_heads % tp_size == 0
+
+        self.n_local_heads = self.n_heads // tp_size
+        self.q_lora_rank = config.q_lora_rank
+        self.o_lora_rank = config.o_lora_rank
+        self.head_dim = config.head_dim
+        self.rope_head_dim = config.qk_rope_head_dim
+        self.nope_head_dim = self.head_dim - self.rope_head_dim
+        self.n_groups = config.o_groups
+        self.n_local_groups = self.n_groups // tp_size
+        self.window_size = config.sliding_window
+        # NOTE(zyongye) Compress ratio can't be 0
+        # we do this for because MTP layer is not included
+        # in the compress ratio list
+        if layer_id < config.num_hidden_layers:
+            self.compress_ratio = max(1, config.compress_ratios[layer_id])
+        else:
+            self.compress_ratio = 1
+        self.eps = config.rms_norm_eps
+        self.max_position_embeddings = config.max_position_embeddings
+
+        # Padded to min 64 heads for FlashMLA, initialized to -inf
+        # (no sink effect). Weight loading fills the first n_local_heads slots.
+        padded_heads = max(self.n_local_heads, 64)
+        self.attn_sink = nn.Parameter(
+            torch.full((padded_heads,), -float("inf"), dtype=torch.float32),
+            requires_grad=False,
+        )
+
+        self.fused_wqa_wkv = MergedColumnParallelLinear(
+            self.hidden_size,
+            [self.q_lora_rank, self.head_dim],
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.fused_wqa_wkv",
+            disable_tp=True,  # fused ReplicatedLinear
+        )
+        self.q_norm = RMSNorm(self.q_lora_rank, self.eps)
+        self.wq_b = ColumnParallelLinear(
+            self.q_lora_rank,
+            self.n_heads * self.head_dim,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wq_b",
+        )
+
+        self.kv_norm = RMSNorm(self.head_dim, self.eps)
+        self.wo_a = ColumnParallelLinear(
+            self.n_heads * self.head_dim // self.n_groups,
+            self.n_groups * self.o_lora_rank,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wo_a",
+        )
+        self.wo_a.is_bmm = True
+        self.wo_a.bmm_batch_size = self.n_local_groups
+        self.wo_b = RowParallelLinear(
+            self.n_groups * self.o_lora_rank,
+            self.hidden_size,
+            bias=False,
+            quant_config=quant_config,
+            return_bias=False,
+            prefix=f"{prefix}.wo_b",
+        )
+        self.softmax_scale = self.head_dim**-0.5
+        self.scale_fmt = config.quantization_config["scale_fmt"]
+
+        self.rope_parameters = config.rope_scaling
+
+        # Initialize rotary embedding BEFORE DeepseekV4MLAModules (which needs it)
+        rope_parameters = config.rope_parameters
+        rope_parameters["rope_theta"] = (
+            config.compress_rope_theta if self.compress_ratio > 1 else config.rope_theta
+        )
+        if config.rope_parameters["rope_type"] != "default":
+            config.rope_parameters["rope_type"] = (
+                "deepseek_yarn"
+                if config.rope_parameters.get("apply_yarn_scaling", True)
+                else "deepseek_llama_scaling"
+            )
+        rope_parameters["mscale"] = 0  # Disable mscale
+        rope_parameters["mscale_all_dim"] = 0  # Disable mscale
+        rope_parameters["is_deepseek_v4"] = True
+        rope_parameters["rope_dim"] = self.rope_head_dim
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            max_position=self.max_position_embeddings,
+            rope_parameters=rope_parameters,
+            is_neox_style=False,
+        )
+
+        self.indexer = None
+        if self.compress_ratio == 4:
+            # Only C4A uses sparse attention and hence has indexer.
+            # aux_stream_list[0] runs indexer.forward() in the wrapper; [2] is
+            # free here (outer GEMMs joined) for the inner overlap of
+            # wq_b+fused_indexer_q_rope_quant vs compressor.
+            indexer_aux_stream = (
+                aux_stream_list[2] if aux_stream_list is not None else None
+            )
+            self.indexer = DeepseekV4Indexer(
+                vllm_config,
+                config=config,
+                hidden_size=self.hidden_size,
+                q_lora_rank=self.q_lora_rank,
+                quant_config=quant_config,
+                cache_config=vllm_config.cache_config,
+                topk_indices_buffer=topk_indices_buffer,
+                compress_ratio=self.compress_ratio,
+                prefix=f"{prefix}.indexer",
+                aux_stream=indexer_aux_stream,
+            )
+
+        mla_modules = DeepseekV4MLAModules(
+            vllm_config=vllm_config,
+            fused_wqa_wkv=self.fused_wqa_wkv,
+            q_norm=self.q_norm,
+            wq_b=self.wq_b,
+            kv_norm=self.kv_norm,
+            wo_a=self.wo_a,
+            wo_b=self.wo_b,
+            attn_sink=self.attn_sink,
+            rotary_emb=self.rotary_emb,
+            indexer=self.indexer,
+            indexer_rotary_emb=self.rotary_emb,
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+        self.mla_attn = DeepseekV4MultiHeadLatentAttentionWrapper(
+            hidden_size=self.hidden_size,
+            num_heads=self.n_local_heads,
+            head_dim=self.head_dim,
+            scale=self.softmax_scale,
+            qk_nope_head_dim=self.nope_head_dim,
+            qk_rope_head_dim=self.rope_head_dim,
+            v_head_dim=self.head_dim,
+            q_lora_rank=self.q_lora_rank,
+            kv_lora_rank=self.head_dim,
+            o_lora_rank=self.o_lora_rank,
+            mla_modules=mla_modules,
+            window_size=self.window_size,
+            compress_ratio=self.compress_ratio,
+            cache_config=vllm_config.cache_config,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        llama_4_scaling: torch.Tensor | None,
+    ):
+        return self.mla_attn(positions, hidden_states, llama_4_scaling)
+
+
+class DeepseekV4DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        vllm_config,
+        prefix,
+        topk_indices_buffer: torch.Tensor | None = None,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ):
+        super().__init__()
+
+        # Lazy import to avoid top-level tilelang dependency.
+        # Registers both torch.ops.vllm.mhc_pre and mhc_post
+        import vllm.model_executor.layers.mhc  # noqa: F401
+
+        config = vllm_config.model_config.hf_config
+        self.hidden_size = config.hidden_size
+
+        self.rms_norm_eps = config.rms_norm_eps
+        self.attn = DeepseekV4Attention(
+            vllm_config,
+            prefix=f"{prefix}.attn",
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+        self.ffn = DeepseekV4MoE(vllm_config, prefix=f"{prefix}.ffn")
+
+        self.attn_norm = RMSNorm(self.hidden_size, self.rms_norm_eps)
+        self.ffn_norm = RMSNorm(self.hidden_size, self.rms_norm_eps)
+        self.hc_mult = config.hc_mult
+        self.hc_sinkhorn_iters = config.hc_sinkhorn_iters
+        self.hc_eps = config.hc_eps
+        self.hc_post_alpha = 2.0
+        mix_hc = (2 + self.hc_mult) * self.hc_mult
+        hc_dim = self.hc_mult * self.hidden_size
+        self.hc_attn_fn = nn.Parameter(
+            torch.empty(
+                (mix_hc, hc_dim),
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_fn = nn.Parameter(
+            torch.empty(
+                (mix_hc, hc_dim),
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_attn_base = nn.Parameter(
+            torch.empty(
+                mix_hc,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_base = nn.Parameter(
+            torch.empty(
+                mix_hc,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_attn_scale = nn.Parameter(
+            torch.empty(
+                3,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_ffn_scale = nn.Parameter(
+            torch.empty(
+                3,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.mhc_pre = MHCPreOp()
+        self.mhc_post = MHCPostOp()
+        self.mhc_fused_post_pre = MHCFusedPostPreOp()
+
+    def hc_pre(
+        self,
+        x: torch.Tensor,
+        hc_fn: torch.Tensor,
+        hc_scale: torch.Tensor,
+        hc_base: torch.Tensor,
+        norm_weight: torch.Tensor | None = None,
+        norm_eps: float = 1e-6,
+    ):
+        post_mix, res_mix, layer_input = self.mhc_pre(
+            residual=x,
+            fn=hc_fn,
+            hc_scale=hc_scale,
+            hc_base=hc_base,
+            rms_eps=self.rms_norm_eps,
+            hc_pre_eps=self.hc_eps,
+            hc_sinkhorn_eps=self.hc_eps,
+            hc_post_mult_value=self.hc_post_alpha,
+            sinkhorn_repeat=self.hc_sinkhorn_iters,
+            norm_weight=norm_weight,
+            norm_eps=norm_eps,
+        )
+        return layer_input, post_mix, res_mix
+
+    def hc_post(
+        self,
+        x: torch.Tensor,
+        residual: torch.Tensor,
+        post: torch.Tensor,
+        comb: torch.Tensor,
+    ):
+        return self.mhc_post(x, residual, post, comb)
+
+    def _forward_cuda(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        attn_norm_weight = self.attn_norm.weight.data
+        attn_norm_eps = self.attn_norm.variance_epsilon
+        if residual is None:
+            # Run standalone hc_pre on first layer
+            residual = x
+            x, post_mix, res_mix = self.hc_pre(
+                x,
+                self.hc_attn_fn,
+                self.hc_attn_scale,
+                self.hc_attn_base,
+                norm_weight=attn_norm_weight,
+                norm_eps=attn_norm_eps,
+            )
+        else:
+            residual, post_mix, res_mix, x = self.mhc_fused_post_pre(
+                x,
+                residual,
+                post_mix,
+                res_mix,
+                self.hc_attn_fn,
+                self.hc_attn_scale,
+                self.hc_attn_base,
+                self.rms_norm_eps,
+                self.hc_eps,
+                self.hc_eps,
+                self.hc_post_alpha,
+                self.hc_sinkhorn_iters,
+                n_splits=1,
+                tile_n=1,
+                norm_weight=attn_norm_weight,
+                norm_eps=attn_norm_eps,
+            )
+
+        # attn_norm is fused into hc_pre / mhc_fused_post_pre above.
+        x = self.attn(positions, x, None)
+
+        ffn_norm_weight = self.ffn_norm.weight.data
+        ffn_norm_eps = self.ffn_norm.variance_epsilon
+        residual, post_mix, res_mix, x = self.mhc_fused_post_pre(
+            x,
+            residual,
+            post_mix,
+            res_mix,
+            self.hc_ffn_fn,
+            self.hc_ffn_scale,
+            self.hc_ffn_base,
+            self.rms_norm_eps,
+            self.hc_eps,
+            self.hc_eps,
+            self.hc_post_alpha,
+            self.hc_sinkhorn_iters,
+            n_splits=1,
+            tile_n=1,
+            norm_weight=ffn_norm_weight,
+            norm_eps=ffn_norm_eps,
+        )
+        # ffn_norm is fused into mhc_fused_post_pre above; ffn() takes the
+        # already-normed activation directly.
+        x = self.ffn(x, input_ids)
+        return x, residual, post_mix, res_mix
+
+    def _forward_native(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None
+    ]:
+        residual = x
+        x, post, comb = self.hc_pre(
+            x, self.hc_attn_fn, self.hc_attn_scale, self.hc_attn_base
+        )
+        x = self.attn_norm(x)
+        x = self.attn(positions, x, None)
+        x = self.hc_post(x, residual, post, comb)
+
+        residual = x
+        x, post, comb = self.hc_pre(
+            x, self.hc_ffn_fn, self.hc_ffn_scale, self.hc_ffn_base
+        )
+        x = self.ffn_norm(x)
+        x = self.ffn(x, input_ids)
+        x = self.hc_post(x, residual, post, comb)
+        return x, None, None, None
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        positions: torch.Tensor,
+        input_ids: torch.Tensor | None,
+        post_mix: torch.Tensor | None = None,
+        res_mix: torch.Tensor | None = None,
+        residual: torch.Tensor | None = None,
+    ) -> tuple[
+        torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None
+    ]:
+        if current_platform.is_rocm() or current_platform.is_xpu():
+            return self._forward_native(
+                x, positions, input_ids, post_mix, res_mix, residual
+            )
+
+        return self._forward_cuda(x, positions, input_ids, post_mix, res_mix, residual)
+
+
+@support_torch_compile
+class DeepseekV4Model(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        quant_config = vllm_config.quant_config
+        self.config = config
+        self.use_mega_moe = (
+            vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
+        )
+        if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
+            raise NotImplementedError(
+                "DeepSeek V4 MegaMoE currently requires expert parallel. "
+                "Enable it with --enable-expert-parallel, or pick a different "
+                "moe backend."
+            )
+        self.vocab_size = config.vocab_size
+        self.hc_eps = config.hc_eps
+        self.hc_mult = config.hc_mult
+        self.hc_dim = self.hc_mult * config.hidden_size
+        self.rms_norm_eps = config.rms_norm_eps
+
+        # Three aux streams: one per non-default input GEMM in
+        # DeepseekV4MultiHeadLatentAttentionWrapper.attn_gemm_parallel_execute
+        # (compressor kv_score, indexer.weights_proj, indexer.compressor
+        # kv_score). fused_wqa_wkv stays on the default stream.
+        # Disable them on ROCm / XPU because of hang issues / no overlap.
+        aux_stream_list = (
+            None
+            if current_platform.is_rocm() or current_platform.is_xpu()
+            else [torch.cuda.Stream() for _ in range(3)]
+        )
+
+        self.device = current_platform.device_type
+        # Reserved topk indices buffer for all Indexer layers to reuse.
+        self.topk_indices_buffer = torch.empty(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            config.index_topk,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+        if get_pp_group().is_first_rank:
+            self.embed_tokens = VocabParallelEmbedding(
+                config.vocab_size,
+                config.hidden_size,
+                quant_config=quant_config,
+                prefix=f"{prefix}.embed_tokens",
+            )
+        else:
+            self.embed_tokens = PPMissingLayer()
+
+        self.start_layer, self.end_layer, self.layers = make_layers(
+            config.num_hidden_layers,
+            lambda prefix: DeepseekV4DecoderLayer(
+                vllm_config,
+                prefix=prefix,
+                topk_indices_buffer=self.topk_indices_buffer,
+                aux_stream_list=aux_stream_list,
+            ),
+            prefix=f"{prefix}.layers",
+        )
+
+        if get_pp_group().is_last_rank:
+            self.norm = RMSNorm(config.hidden_size, self.rms_norm_eps)
+        else:
+            self.norm = PPMissingLayer()
+
+        self.hc_head_fn = nn.Parameter(
+            torch.empty(
+                self.hc_mult,
+                self.hc_dim,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_head_base = nn.Parameter(
+            torch.empty(
+                self.hc_mult,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        self.hc_head_scale = nn.Parameter(
+            torch.empty(1, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_op = HCHeadOp()
+        # Pre-hc_head residual stream buffer for the MTP draft. Stable
+        # address (outside the cudagraph pool) so the copy_ in forward()
+        # refreshes it correctly across captured shapes.
+        # refreshes it correctly across captured shapes. Only allocated on
+        # the last PP rank — that's where MTP target hidden states are
+        # produced.
+        if get_pp_group().is_last_rank:
+            self._mtp_hidden_buffer = torch.empty(
+                vllm_config.scheduler_config.max_num_batched_tokens,
+                self.hc_dim,
+                dtype=vllm_config.model_config.dtype,
+                device=self.device,
+            )
+        else:
+            self._mtp_hidden_buffer = None
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def make_empty_intermediate_tensors(
+        self,
+        batch_size: int,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> IntermediateTensors:
+        # PP intermediate tensors carry the multi-stream hidden_states
+        # of shape (num_tokens, hc_mult, hidden_size) — V4 expands the
+        # token embedding to hc_mult streams before the first decoder
+        # layer and keeps that shape until hc_head() collapses it.
+        return IntermediateTensors(
+            {
+                "hidden_states": torch.zeros(
+                    (batch_size, self.hc_mult, self.config.hidden_size),
+                    dtype=dtype,
+                    device=device,
+                ),
+            }
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        if get_pp_group().is_first_rank:
+            if inputs_embeds is not None:
+                hidden_states = inputs_embeds
+            else:
+                hidden_states = self.embed_input_ids(input_ids)
+            hidden_states = hidden_states.unsqueeze(-2).repeat(1, self.hc_mult, 1)
+        else:
+            assert intermediate_tensors is not None
+            hidden_states = intermediate_tensors["hidden_states"]
+
+        if self.use_mega_moe:
+            input_ids = input_ids.to(torch.int64)
+
+        residual, post_mix, res_mix = None, None, None
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            hidden_states, residual, post_mix, res_mix = layer(
+                hidden_states,
+                positions,
+                input_ids,
+                post_mix,
+                res_mix,
+                residual,
+            )
+        if layer is not None and current_platform.is_cuda():
+            hidden_states = layer.hc_post(hidden_states, residual, post_mix, res_mix)
+
+        if not get_pp_group().is_last_rank:
+            return IntermediateTensors({"hidden_states": hidden_states})
+
+        # Stash pre-hc_head residual for the MTP draft (captured copy_).
+        num_tokens = hidden_states.shape[0]
+        self._mtp_hidden_buffer[:num_tokens].copy_(hidden_states.flatten(1))
+
+        hidden_states = self.hc_head_op(
+            hidden_states,
+            self.hc_head_fn,
+            self.hc_head_scale,
+            self.hc_head_base,
+            self.rms_norm_eps,
+            self.hc_eps,
+        )
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("gate_up_proj", "w1", 0),
+            ("gate_up_proj", "w3", 1),
+            ("attn.fused_wqa_wkv", "attn.wq_a", 0),
+            ("attn.fused_wqa_wkv", "attn.wkv", 1),
+            ("compressor.fused_wkv_wgate", "compressor.wkv", 0),
+            ("compressor.fused_wkv_wgate", "compressor.wgate", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        # TP for attention
+        tp_size = get_tensor_model_parallel_world_size()
+        tp_rank = get_tensor_model_parallel_rank()
+        n_head = self.config.num_attention_heads
+        n_local_head = n_head // tp_size
+        head_rank_start = n_local_head * tp_rank
+        head_rank_end = n_local_head * (tp_rank + 1)
+
+        # Pre-compute expert mapping ONCE.
+        expert_mapping = self.get_expert_mapping()
+
+        for name, loaded_weight in weights:
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                # Skip non-stacked layers and experts (experts handled below).
+                if ".experts." in name:
+                    continue
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+
+                if is_pp_missing_parameter(name, self):
+                    break
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                if ".experts." in name:
+                    # E8M0 scales are stored as float8_e8m0fnu in
+                    # checkpoints but the MoE param is uint8. copy_()
+                    # would do a numeric conversion (e.g. 2^-7 → 0),
+                    # destroying the raw exponent bytes.
+                    if (
+                        "weight_scale" in name
+                        and loaded_weight.dtype == torch.float8_e8m0fnu
+                    ):
+                        loaded_weight = loaded_weight.view(torch.uint8)
+                    for mapping in expert_mapping:
+                        param_name, weight_name, expert_id, expert_shard_id = mapping
+                        if weight_name not in name:
+                            continue
+                        name_mapped = name.replace(weight_name, param_name)
+                        if is_pp_missing_parameter(name_mapped, self):
+                            continue
+                        param = params_dict[name_mapped]
+                        # We should ask the weight loader to return success or not
+                        # here since otherwise we may skip experts with other
+                        # available replicas.
+                        weight_loader = typing.cast(
+                            Callable[..., bool], param.weight_loader
+                        )
+                        success = weight_loader(
+                            param,
+                            loaded_weight,
+                            name_mapped,
+                            shard_id=expert_shard_id,
+                            expert_id=expert_id,
+                            return_success=True,
+                        )
+                        if success:
+                            name = name_mapped
+                            break
+                    loaded_params.add(name_mapped)
+                    continue
+                elif "attn_sink" in name:
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    narrow_weight = loaded_weight[head_rank_start:head_rank_end]
+                    n = narrow_weight.shape[0]
+                    params_dict[name][:n].copy_(narrow_weight)
+                    loaded_params.add(name)
+                    continue
+                else:
+                    if is_pp_missing_parameter(name, self):
+                        continue
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+                    continue
+
+        return loaded_params
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        first_layer = next(iter(islice(self.layers, self.start_layer, self.end_layer)))
+        if first_layer.ffn.use_mega_moe:
+            return make_deepseek_v4_expert_params_mapping(self.config.n_routed_experts)
+        # Params for weights, fp8 weight scales, fp8 activation scales
+        # (param_name, weight_name, expert_id, shard_id)
+        return FusedMoE.make_expert_params_mapping(
+            self,
+            ckpt_gate_proj_name="w1",
+            ckpt_down_proj_name="w2",
+            ckpt_up_proj_name="w3",
+            num_experts=self.config.n_routed_experts,
+        )
+
+    def finalize_mega_moe_weights(self) -> None:
+        for layer in islice(self.layers, self.start_layer, self.end_layer):
+            layer.ffn.finalize_mega_moe_weights()
+
+
+def _make_deepseek_v4_weights_mapper(expert_dtype: str) -> WeightsMapper:
+    if expert_dtype == "fp4":
+        # MXFP4 experts use Mxfp4MoEMethod, which registers scales as
+        # ``w{1,2,3}_weight_scale`` (no _inv suffix). FP8 linear and
+        # shared experts use Fp8LinearMethod's block scales, which
+        # register as ``weight_scale_inv``.
+        scale_regex = {
+            re.compile(r"(\.experts\.\d+\.w[123])\.scale$"): r"\1.weight_scale",
+            re.compile(r"\.scale$"): ".weight_scale_inv",
+        }
+    else:
+        # FP8 experts use Fp8MoEMethod (block_quant=True), which registers
+        # scales as ``w{13,2}_weight_scale_inv``. Map all ``.scale`` keys
+        # there.
+        scale_regex = {
+            re.compile(r"\.scale$"): ".weight_scale_inv",
+        }
+    return WeightsMapper(
+        orig_to_new_prefix={
+            "layers.": "model.layers.",
+            "embed.": "model.embed.",
+            "norm.": "model.norm.",
+            "hc_head": "model.hc_head",
+            "mtp.": "model.mtp.",
+        },
+        orig_to_new_regex=scale_regex,
+        orig_to_new_suffix={
+            "head.weight": "lm_head.weight",
+            "embed.weight": "embed_tokens.weight",
+            ".ffn.gate.bias": ".ffn.gate.e_score_correction_bias",
+        },
+        orig_to_new_substr={
+            ".attn.compressor.": ".attn.mla_attn.compressor.",
+            ".shared_experts.w2": ".shared_experts.down_proj",
+        },
+    )
+
+
+class DeepseekV4ForCausalLM(nn.Module, SupportsPP):
+    model_cls = DeepseekV4Model
+
+    # Default mapper assumes the original FP4-expert checkpoint layout.
+    # Overridden per-instance in __init__ when expert_dtype != "fp4".
+    hf_to_vllm_mapper = _make_deepseek_v4_weights_mapper("fp4")
+
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+
+        config = vllm_config.model_config.hf_config
+        self.config = config
+        expert_dtype = getattr(config, "expert_dtype", "fp4")
+        if expert_dtype != "fp4":
+            self.hf_to_vllm_mapper = _make_deepseek_v4_weights_mapper(expert_dtype)
+
+        self.model = self.model_cls(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+        if get_pp_group().is_last_rank:
+            self.lm_head = ParallelLMHead(
+                config.vocab_size,
+                config.hidden_size,
+                prefix=maybe_prefix(prefix, "lm_head"),
+            )
+        else:
+            self.lm_head = PPMissingLayer()
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.make_empty_intermediate_tensors = (  # type: ignore[method-assign]
+            self.model.make_empty_intermediate_tensors
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+    ) -> torch.Tensor | None:
+        logits = self.logits_processor(self.lm_head, hidden_states)
+        return logits
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+    ) -> torch.Tensor | IntermediateTensors:
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
+        return hidden_states
+
+    def get_mtp_target_hidden_states(self) -> torch.Tensor | None:
+        """Pre-hc_head residual stream buffer (max_num_batched_tokens,
+        hc_mult * hidden_size) for the MTP draft model. Populated by
+        forward(); valid after each target step."""
+        return getattr(self.model, "_mtp_hidden_buffer", None)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self, skip_substrs=["mtp."])
+        loaded_params = loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+        self.model.finalize_mega_moe_weights()
+        return loaded_params
+
+    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
+        return self.model.get_expert_mapping()
diff --git a/vllm/models/deepseek_v4/nvidia/mtp.py b/vllm/models/deepseek_v4/nvidia/mtp.py
new file mode 100644
index 000000000000..9d0ac435305e
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/mtp.py
@@ -0,0 +1,515 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""MTP draft model for DeepSeek V4 (internal codename: DeepseekV4).
+
+Split from ``deepseek_mtp.py`` because the V4 architecture introduces several
+pieces that have no analogue in V3/V32:
+  * separate ``e_proj`` / ``h_proj`` with fp8 linear quantization (instead of
+    the fused ``eh_proj``);
+  * ``hc_head`` hypercompressed vocab projection applied in ``compute_logits``;
+  * ``DeepseekV4DecoderLayer`` with its own aux-stream management;
+  * V4-specific checkpoint weight-name remapping in ``load_weights``.
+"""
+
+import typing
+from collections.abc import Callable, Iterable
+
+import regex as re
+import torch
+import torch.nn as nn
+
+from vllm.compilation.decorators import support_torch_compile
+from vllm.config import VllmConfig
+from vllm.distributed import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+)
+from vllm.logger import init_logger
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.linear import ReplicatedLinear
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.mhc import HCHeadOp
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding,
+)
+from vllm.model_executor.model_loader.weight_utils import default_weight_loader
+from vllm.model_executor.models.deepseek_mtp import SharedHead
+from vllm.model_executor.models.deepseek_v2 import get_spec_layer_idx_from_weight_name
+from vllm.model_executor.models.utils import maybe_prefix
+from vllm.platforms import current_platform
+from vllm.sequence import IntermediateTensors
+
+from .model import (
+    DeepseekV4DecoderLayer,
+    make_deepseek_v4_expert_params_mapping,
+)
+
+logger = init_logger(__name__)
+
+# MoE expert scales are fused into per-layer w13/w2 tensors. The exact
+# parameter suffix depends on which FusedMoE method handles the experts:
+# - fp4 experts (Mxfp4MoEMethod) register ``w{1,2,3}_weight_scale``;
+# - fp8 experts (Fp8MoEMethod with block_quant=True) register
+#   ``w{1,2,3}_weight_scale_inv``.
+# Other FP8 linear scales (including shared experts) always use
+# ``.weight_scale_inv``. Mirrors the per-instance mapper built by
+# ``_make_deepseek_v4_weights_mapper`` in deepseek_v4.py.
+_EXPERT_SCALE_RE = re.compile(r"\.experts\.\d+\.w[123]\.scale$")
+
+
+class DeepSeekV4MultiTokenPredictorLayer(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        topk_indices_buffer: torch.Tensor,
+        prefix: str,
+        aux_stream_list: list[torch.cuda.Stream] | None = None,
+    ) -> None:
+        super().__init__()
+
+        assert vllm_config.speculative_config is not None
+        config = vllm_config.speculative_config.draft_model_config.hf_config
+        self.config = config
+        quant_config = vllm_config.quant_config
+        self.rms_norm_eps = config.rms_norm_eps
+
+        self.enorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        # V4 keeps e_ and h_ proj separate (with fp8 linear quant) rather than
+        # fusing them the way V3 does with eh_proj.
+        self.e_proj = ReplicatedLinear(
+            config.hidden_size,
+            config.hidden_size,
+            bias=False,
+            return_bias=False,
+            quant_config=quant_config,
+        )
+        self.h_proj = ReplicatedLinear(
+            config.hidden_size,
+            config.hidden_size,
+            bias=False,
+            return_bias=False,
+            quant_config=quant_config,
+        )
+
+        self.hc_eps = config.hc_eps
+        self.hc_mult = config.hc_mult
+        self.hc_dim = self.hc_mult * config.hidden_size
+        self.hc_head_fn = nn.Parameter(
+            torch.empty(self.hc_mult, self.hc_dim, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_base = nn.Parameter(
+            torch.empty(self.hc_mult, dtype=torch.float32),
+            requires_grad=False,
+        )
+        self.hc_head_scale = nn.Parameter(
+            torch.empty(1, dtype=torch.float32),
+            requires_grad=False,
+        )
+
+        self.shared_head = SharedHead(
+            config=config, prefix=prefix, quant_config=quant_config
+        )
+        self.mtp_block = DeepseekV4DecoderLayer(
+            vllm_config,
+            prefix,
+            topk_indices_buffer=topk_indices_buffer,
+            aux_stream_list=aux_stream_list,
+        )
+
+        self.hc_head_op = HCHeadOp()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_index: int = 0,
+    ) -> torch.Tensor:
+        assert inputs_embeds is not None
+        # masking inputs at position 0, as not needed by MTP
+        inputs_embeds = torch.where(positions.unsqueeze(-1) == 0, 0, inputs_embeds)
+        inputs_embeds = self.enorm(inputs_embeds)
+
+        # Target stashes pre-hc_head residual as flat (T, hc_mult * D);
+        # reshape to (T, hc_mult, D) — the training-time layout.
+        previous_hidden_states = previous_hidden_states.view(
+            -1, self.hc_mult, self.config.hidden_size
+        )
+        previous_hidden_states = self.hnorm(previous_hidden_states)
+        hidden_states = self.h_proj(previous_hidden_states) + self.e_proj(
+            inputs_embeds
+        ).unsqueeze(-2)
+        hidden_states, residual, post_mix, res_mix = self.mtp_block(
+            positions=positions, x=hidden_states, input_ids=None
+        )
+        if current_platform.is_cuda():
+            hidden_states = self.mtp_block.hc_post(
+                hidden_states, residual, post_mix, res_mix
+            )
+        # Return the flat pre-hc_head residual so it can be re-fed as the
+        # next spec step's `previous_hidden_states` when
+        # num_speculative_tokens > 1. hc_head is deferred to compute_logits.
+        return hidden_states.flatten(1)
+
+
+class DeepSeekV4MultiTokenPredictor(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        config = vllm_config.model_config.hf_config
+        self.mtp_start_layer_idx = config.num_hidden_layers
+        self.num_mtp_layers = config.num_nextn_predict_layers
+        self.device = current_platform.device_type
+
+        topk_tokens = config.index_topk
+        self.topk_indices_buffer = torch.empty(
+            vllm_config.scheduler_config.max_num_batched_tokens,
+            topk_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+        # Three aux streams shared across all MTP layers, mirroring
+        # DeepseekV4Model. ROCm runs the same work serially for now.
+        aux_stream_list = (
+            None
+            if current_platform.is_rocm()
+            else [torch.cuda.Stream() for _ in range(3)]
+        )
+
+        # to map the exact layer index from weights
+        self.layers = torch.nn.ModuleDict(
+            {
+                str(idx): DeepSeekV4MultiTokenPredictorLayer(
+                    vllm_config,
+                    self.topk_indices_buffer,
+                    f"{prefix}.layers.{idx}",
+                    aux_stream_list=aux_stream_list,
+                )
+                for idx in range(
+                    self.mtp_start_layer_idx,
+                    self.mtp_start_layer_idx + self.num_mtp_layers,
+                )
+            }
+        )
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+            prefix=maybe_prefix(prefix, "embed_tokens"),
+        )
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.embed_tokens(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        previous_hidden_states: torch.Tensor,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        return self.layers[str(self.mtp_start_layer_idx + current_step_idx)](
+            input_ids,
+            positions,
+            previous_hidden_states,
+            inputs_embeds,
+            current_step_idx,
+        )
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        current_step_idx = spec_step_idx % self.num_mtp_layers
+        mtp_layer = self.layers[str(self.mtp_start_layer_idx + current_step_idx)]
+        # MTP forward returns the pre-hc_head residual (T, hc_mult * D); apply
+        # hc_head here so logits are computed from the dense hidden state.
+        hidden_states = hidden_states.view(
+            -1, mtp_layer.hc_mult, mtp_layer.config.hidden_size
+        )
+        hidden_states = mtp_layer.hc_head_op(
+            hidden_states,
+            mtp_layer.hc_head_fn,
+            mtp_layer.hc_head_scale,
+            mtp_layer.hc_head_base,
+            mtp_layer.rms_norm_eps,
+            mtp_layer.hc_eps,
+        )
+        logits = self.logits_processor(
+            mtp_layer.shared_head.head, mtp_layer.shared_head(hidden_states)
+        )
+        return logits
+
+
+@support_torch_compile
+class DeepSeekV4MTP(nn.Module):
+    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+        super().__init__()
+        self.config = vllm_config.model_config.hf_config
+        self.quant_config = vllm_config.quant_config
+        self.model = DeepSeekV4MultiTokenPredictor(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
+
+    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.model.embed_input_ids(input_ids)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor | None,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        intermediate_tensors: IntermediateTensors | None = None,
+        inputs_embeds: torch.Tensor | None = None,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor:
+        hidden_states = self.model(
+            input_ids, positions, hidden_states, inputs_embeds, spec_step_idx
+        )
+        return hidden_states
+
+    def compute_logits(
+        self,
+        hidden_states: torch.Tensor,
+        spec_step_idx: int = 0,
+    ) -> torch.Tensor | None:
+        return self.model.compute_logits(hidden_states, spec_step_idx)
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        # Weight name remapping for checkpoint compatibility.
+        # Maps checkpoint weight paths to model parameter paths.
+        WEIGHT_NAME_REMAPPING: dict[str, str] = {
+            ".emb.tok_emb.weight": ".embed_tokens.weight",
+            ".head.weight": ".shared_head.head.weight",
+            ".norm.weight": ".shared_head.norm.weight",
+        }
+
+        def _remap_weight_name(name: str) -> str:
+            """Remap checkpoint weight names to model parameter names."""
+            for old_pattern, new_pattern in WEIGHT_NAME_REMAPPING.items():
+                if old_pattern in name:
+                    name = name.replace(old_pattern, new_pattern)
+            return name
+
+        def _find_mtp_layer_idx(name: str) -> int:
+            subnames = name.split(".")
+            for subname in subnames:
+                try:
+                    # we return the first encountered integer
+                    return int(subname)
+                except ValueError:
+                    continue
+            return 0
+
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("gate_up_proj", "w1", 0),
+            ("gate_up_proj", "w3", 1),
+            ("attn.fused_wqa_wkv", "attn.wq_a", 0),
+            ("attn.fused_wqa_wkv", "attn.wkv", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        loaded_params: set[str] = set()
+
+        # TP for attention
+        tp_size = get_tensor_model_parallel_world_size()
+        tp_rank = get_tensor_model_parallel_rank()
+        n_head = self.config.num_attention_heads
+        n_local_head = n_head // tp_size
+        head_rank_start = n_local_head * tp_rank
+        head_rank_end = n_local_head * (tp_rank + 1)
+
+        # Pre-compute expert mapping ONCE.
+        first_layer = next(iter(self.model.layers.values()))
+        if first_layer.mtp_block.ffn.use_mega_moe:
+            expert_mapping = make_deepseek_v4_expert_params_mapping(
+                self.config.n_routed_experts
+            )
+        else:
+            expert_mapping = FusedMoE.make_expert_params_mapping(
+                self,
+                ckpt_gate_proj_name="w1",
+                ckpt_down_proj_name="w2",
+                ckpt_up_proj_name="w3",
+                num_experts=self.config.n_routed_experts,
+            )
+
+        # FP8 experts register ``..._weight_scale_inv`` (block_quant) while
+        # FP4/MXFP4 experts register ``..._weight_scale``. Choose the suffix
+        # for the rename below based on the model's expert dtype.
+        expert_scale_suffix = (
+            ".weight_scale"
+            if getattr(self.config, "expert_dtype", "fp4") == "fp4"
+            else ".weight_scale_inv"
+        )
+
+        for name, loaded_weight in weights:
+            mtp_layer_idx = _find_mtp_layer_idx(name)
+            # V4 checkpoints store MTP weights as `mtp.{i}.*`; remap to
+            # `model.layers.{num_hidden_layers + i}.*` so that
+            # get_spec_layer_idx_from_weight_name can identify them.
+            name = name.replace(
+                f"mtp.{mtp_layer_idx}.",
+                f"model.layers.{self.config.num_hidden_layers + mtp_layer_idx}.",
+            )
+
+            spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
+            if spec_layer is None:
+                continue
+
+            name = _remap_weight_name(name)
+            name = self._rewrite_spec_layer_name(spec_layer, name)
+
+            if spec_layer != self.model.mtp_start_layer_idx and ".layers" not in name:
+                continue
+            if name.endswith(".scale"):
+                suffix = (
+                    expert_scale_suffix
+                    if _EXPERT_SCALE_RE.search(name)
+                    else ".weight_scale_inv"
+                )
+                name = name.removesuffix(".scale") + suffix
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                # Skip non-stacked layers and experts (experts handled below).
+                if ".experts." in name:
+                    continue
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                loaded_params.add(name)
+                break
+            else:
+                if ".experts." in name:
+                    # Reinterpret E8M0 scales as uint8 to preserve raw
+                    # exponent bytes; numeric copy_() would zero them.
+                    # Mirrors the main DeepseekV4 loader.
+                    if (
+                        "weight_scale" in name
+                        and loaded_weight.dtype == torch.float8_e8m0fnu
+                    ):
+                        loaded_weight = loaded_weight.view(torch.uint8)
+                    for mapping in expert_mapping:
+                        param_name, weight_name, expert_id, expert_shard_id = mapping
+                        if weight_name not in name:
+                            continue
+                        name_mapped = name.replace(weight_name, param_name)
+                        param = params_dict[name_mapped]
+                        # We should ask the weight loader to return success or not
+                        # here since otherwise we may skip experts with other
+                        # available replicas.
+                        weight_loader = typing.cast(
+                            Callable[..., bool], param.weight_loader
+                        )
+                        success = weight_loader(
+                            param,
+                            loaded_weight,
+                            name_mapped,
+                            shard_id=expert_shard_id,
+                            expert_id=expert_id,
+                            return_success=True,
+                        )
+                        if success:
+                            name = name_mapped
+                            loaded_params.add(name_mapped)
+                            break
+                    continue
+                elif "attn_sink" in name:
+                    narrow_weight = loaded_weight[head_rank_start:head_rank_end]
+                    n = narrow_weight.shape[0]
+                    params_dict[name][:n].copy_(narrow_weight)
+                    loaded_params.add(name)
+                    continue
+                else:
+                    if ".shared_experts.w2" in name:
+                        name = name.replace(
+                            ".shared_experts.w2", ".shared_experts.down_proj"
+                        )
+                    if name.endswith(".ffn.gate.bias"):
+                        # ``e_score_correction_bias`` lives on the gate
+                        # under a different attribute name.
+                        name = name.replace(
+                            ".ffn.gate.bias",
+                            ".ffn.gate.e_score_correction_bias",
+                        )
+                    param = params_dict[name]
+                    weight_loader = getattr(
+                        param, "weight_loader", default_weight_loader
+                    )
+                    weight_loader(param, loaded_weight)
+                    loaded_params.add(name)
+                    continue
+
+        loaded_layers: set[int] = set()
+        for param_name in loaded_params:
+            spec_layer = get_spec_layer_idx_from_weight_name(self.config, param_name)
+            if spec_layer is not None:
+                loaded_layers.add(spec_layer)
+        for layer_idx in range(
+            self.model.mtp_start_layer_idx,
+            self.model.mtp_start_layer_idx + self.model.num_mtp_layers,
+        ):
+            if layer_idx not in loaded_layers:
+                raise ValueError(
+                    f"MTP speculative decoding layer {layer_idx} weights "
+                    f"missing from checkpoint. The checkpoint may have "
+                    f"been quantized without including the MTP layers. "
+                    f"Use a checkpoint that includes MTP layer weights, "
+                    f"or disable speculative decoding."
+                )
+        self.finalize_mega_moe_weights()
+        logger.info_once("MTP draft model loaded: %d params", len(loaded_params))
+        return loaded_params
+
+    def finalize_mega_moe_weights(self) -> None:
+        for layer in self.model.layers.values():
+            layer.mtp_block.ffn.finalize_mega_moe_weights()
+
+    def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str:
+        """
+        Rewrite the weight name to match the format of the original model.
+        Add .mtp_block for modules in transformer layer block for spec layer
+        and rename shared layer weights to be top level.
+        """
+        spec_layer_weight_names = [
+            "embed_tokens",
+            "enorm",
+            "hnorm",
+            "h_proj",
+            "e_proj",
+            "shared_head",
+            "hc_head_fn",
+            "hc_head_base",
+            "hc_head_scale",
+        ]
+        shared_weight_names = ["embed_tokens"]
+        spec_layer_weight = False
+        shared_weight = False
+        for weight_name in spec_layer_weight_names:
+            if weight_name in name:
+                spec_layer_weight = True
+                if weight_name in shared_weight_names:
+                    shared_weight = True
+                break
+        if not spec_layer_weight:
+            # treat rest weights as weights for transformer layer block
+            name = name.replace(
+                f"model.layers.{spec_layer}.", f"model.layers.{spec_layer}.mtp_block."
+            )
+        elif shared_weight:
+            # treat shared weights as top level weights
+            name = name.replace(f"model.layers.{spec_layer}.", "model.")
+        return name
diff --git a/vllm/models/deepseek_v4/nvidia/ops/__init__.py b/vllm/models/deepseek_v4/nvidia/ops/__init__.py
new file mode 100644
index 000000000000..37276e1816f0
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/ops/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NVIDIA-only (cutedsl/cutlass) kernels for DeepSeek V4.
+
+These modules import ``cutlass``/``cutedsl`` at module top level, so they must
+not be imported on non-CUDA platforms. Callers should gate on
+``vllm.utils.import_utils.has_cutedsl()`` before importing from here.
+"""
diff --git a/vllm/models/deepseek_v4/nvidia/ops/attention.py b/vllm/models/deepseek_v4/nvidia/ops/attention.py
new file mode 100644
index 000000000000..5a9837378851
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/ops/attention.py
@@ -0,0 +1,927 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+DeepseekV4 MLA Attention Layer
+"""
+
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, cast
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import DeepseekV2Config, DeepseekV3Config
+
+import vllm.envs as envs
+from vllm.compilation.breakable_cudagraph import eager_break_during_capture
+from vllm.model_executor.layers.linear import (
+    ReplicatedLinear,
+)
+from vllm.model_executor.layers.sparse_attn_indexer import SparseAttnIndexer
+from vllm.models.deepseek_v4.common.ops import (
+    fused_indexer_q_rope_quant,
+    fused_inv_rope_fp8_quant,
+    fused_q_kv_rmsnorm,
+)
+from vllm.utils.deep_gemm import fp8_einsum
+from vllm.utils.torch_utils import direct_register_custom_op
+from vllm.v1.attention.ops.rocm_aiter_mla_sparse import rocm_inv_rope_einsum
+
+if TYPE_CHECKING:
+    from vllm.v1.attention.backends.mla.sparse_swa import (
+        DeepseekSparseSWAMetadata,
+    )
+
+from vllm.config import (
+    CacheConfig,
+    VllmConfig,
+    get_current_vllm_config,
+)
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.forward_context import ForwardContext, get_forward_context
+from vllm.logger import init_logger
+from vllm.model_executor.custom_op import PluggableLayer
+from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+from vllm.model_executor.layers.layernorm import RMSNorm
+from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.quantization.input_quant_fp8 import (
+    QuantFP8,
+)
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    GroupShape,
+)
+from vllm.models.deepseek_v4.compressor import DeepseekCompressor
+from vllm.platforms import current_platform
+from vllm.utils.multi_stream_utils import (
+    execute_in_parallel,
+    maybe_execute_in_parallel,
+)
+from vllm.v1.attention.backend import AttentionBackend, AttentionMetadata
+from vllm.v1.attention.backends.mla.flashmla_sparse import (
+    FlashMLASparseBackend,
+)
+from vllm.v1.attention.backends.mla.indexer import (
+    DeepseekV4IndexerBackend,
+    get_max_prefill_buffer_size,
+)
+from vllm.v1.attention.backends.mla.sparse_swa import DeepseekV4SWACache
+from vllm.v1.kv_cache_interface import KVCacheSpec, MLAAttentionSpec
+
+if TYPE_CHECKING:
+    from vllm.models.deepseek_v4.nvidia.flashmla import (
+        DeepseekV4SparseMLAAttentionImpl,
+    )
+
+logger = init_logger(__name__)
+
+
+def _select_v4_sparse_impl() -> "type[DeepseekV4SparseMLAAttentionImpl]":
+    """Pick the platform-specific V4 sparse MLA impl class. Sole platform check."""
+    if current_platform.is_rocm():
+        from vllm.models.deepseek_v4.amd.rocm import (
+            DeepseekV4ROCMAiterMLASparseImpl,
+        )
+
+        return DeepseekV4ROCMAiterMLASparseImpl
+    from vllm.models.deepseek_v4.nvidia.flashmla import (
+        DeepseekV4FlashMLASparseImpl,
+    )
+
+    return DeepseekV4FlashMLASparseImpl
+
+
+@dataclass
+class DeepseekV4MLAModules:
+    """Modules used in DeepseekV4 MLA."""
+
+    vllm_config: VllmConfig
+    fused_wqa_wkv: torch.nn.Module
+    q_norm: torch.nn.Module
+    wq_b: torch.nn.Module
+    kv_norm: torch.nn.Module
+    wo_a: torch.nn.Module
+    wo_b: torch.nn.Module
+    attn_sink: torch.nn.Module
+    rotary_emb: torch.nn.Module
+    indexer: torch.nn.Module | None
+    indexer_rotary_emb: torch.nn.Module
+    topk_indices_buffer: torch.Tensor | None
+    aux_stream_list: list[torch.cuda.Stream] | None = None
+
+
+# --8<-- [start:multi_head_latent_attention]
+@PluggableLayer.register("deepseek_v4_multi_head_latent_attention")
+class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
+    """Pluggable MLA layer which allows OOT backends to add
+    custom implementations of the outer MLA layer (including rope & o_proj).
+    Note that currently oot platforms can still use CustomOp.register_oot to
+    replace MLA layer entirely, although we use PluggableLayer to register
+    this layer now.
+
+    This class takes positions and hidden_states as input.
+    The input tensors can either contain prefill tokens or decode tokens.
+    The class does the following:
+
+    1. MLA Preprocess.
+    2. Perform multi-head attention to prefill tokens and
+       multi-query attention to decode tokens separately.
+    3. Return the output tensor.
+    """
+
+    # --8<-- [end:multi_head_latent_attention]
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        head_dim: int,
+        scale: float,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        q_lora_rank: int | None,
+        kv_lora_rank: int,
+        o_lora_rank: int | None,
+        mla_modules: DeepseekV4MLAModules,
+        window_size: int,
+        compress_ratio: int | None,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.n_local_heads = num_heads
+        self.head_dim = head_dim
+        self.scale = scale
+
+        # FlashMLA sparse kernel only supports 64 or 128 heads; pad up to the
+        # next supported size. Must match DeepseekV4MLAAttention.padded_heads.
+        if num_heads <= 64:
+            self.padded_heads = 64
+        elif num_heads <= 128:
+            self.padded_heads = 128
+        else:
+            raise ValueError(
+                f"DeepseekV4 attention does not support {num_heads} heads "
+                "(must be <= 128)."
+            )
+
+        self.q_lora_rank = q_lora_rank
+        self.kv_lora_rank = kv_lora_rank
+        self.window_size = window_size
+        self.compress_ratio = compress_ratio if compress_ratio is not None else 1
+        self.prefix = prefix
+
+        # Extract config from vllm_config
+        config = mla_modules.vllm_config.model_config.hf_config
+        tp_size = get_tensor_model_parallel_world_size()
+
+        # DeepseekV4-specific attributes (num_heads is already TP-adjusted)
+        self.eps = config.rms_norm_eps
+        self.rope_head_dim = config.qk_rope_head_dim
+        self.nope_head_dim = head_dim - self.rope_head_dim
+        self.n_local_groups = config.o_groups // tp_size
+        self.o_lora_rank = config.o_lora_rank
+
+        # Store projection modules
+        self.fused_wqa_wkv = mla_modules.fused_wqa_wkv
+        self.q_norm = mla_modules.q_norm
+        self.wq_b = mla_modules.wq_b
+
+        self.kv_norm = mla_modules.kv_norm
+        self.wo_a = mla_modules.wo_a
+
+        self._wo_a_act_quant = QuantFP8(
+            static=False,
+            group_shape=GroupShape(1, 128),
+            use_ue8m0=True,
+        )
+        # Bypass packed-for-deepgemm path — we need FP32 scales (not packed
+        # INT32) so fp8_einsum can handle layout transform internally.
+        self._wo_a_act_quant.use_deep_gemm_supported = False
+        self.wo_b = mla_modules.wo_b
+
+        # Pick fp8_einsum recipe based on GPU arch:
+        # SM90: FP32 block scales stay [g, r/128, d/128] → sfb_gran_mn=128
+        # SM100: INT32 packed scales become [g, r, ...] → sfb_gran_mn=1
+        cap = current_platform.get_device_capability()
+        assert cap is not None, "DeepseekV4 attention requires a CUDA device"
+        self._einsum_recipe = (1, 128, 128) if cap.major <= 9 else (1, 1, 128)
+        self._tma_aligned_scales = cap.major >= 10
+
+        self.rotary_emb = mla_modules.rotary_emb
+        self.indexer_rotary_emb = mla_modules.indexer_rotary_emb
+        self.topk_indices_buffer = mla_modules.topk_indices_buffer
+
+        self.indexer = mla_modules.indexer
+
+        # Per-head RMS normalization for Q (no learnable weights)
+        self.q_head_norm = RMSNorm(head_dim, eps=self.eps, has_weight=False)
+
+        # TODO(yifan): currently hardcoded for FP8 sparse, make it more generic
+        head_bytes = (
+            self.nope_head_dim  # 448 fp8 NoPE
+            + self.rope_head_dim * 2  # 64 bf16 RoPE
+            + self.nope_head_dim // 64  # 7B scale factors
+            + 1  # 1B pad
+        )
+
+        # Will be None on ROCm for now.
+        self.aux_stream_list = mla_modules.aux_stream_list
+        # [0]: GEMM start / post-GEMM event0. [1..3]: GEMM done events;
+        # [1] doubles as post-GEMM event1. Reuse is safe: GEMM fully joins
+        # before post-GEMM starts.
+        self.ln_events = [torch.cuda.Event() for _ in range(4)]
+
+        assert cache_config is not None, "DeepseekV4 attention requires cache_config"
+        self.swa_cache_layer = DeepseekV4SWACache(
+            head_dim=self.head_dim,
+            window_size=self.window_size,
+            dtype=torch.uint8,
+            prefix=f"{prefix}.swa_cache",
+            cache_config=cache_config,
+        )
+
+        self.mla_attn = DeepseekV4MLAAttention(
+            num_heads=self.n_local_heads,
+            head_dim=self.head_dim,
+            scale=self.scale,
+            qk_nope_head_dim=self.nope_head_dim,
+            qk_rope_head_dim=self.rope_head_dim,
+            q_lora_rank=self.q_lora_rank,
+            kv_lora_rank=self.kv_lora_rank,
+            compress_ratio=self.compress_ratio,
+            window_size=self.window_size,
+            head_bytes=head_bytes,
+            swa_cache_layer=self.swa_cache_layer,
+            attn_sink=mla_modules.attn_sink,  # already padded with -inf
+            cache_config=cache_config,
+            quant_config=quant_config,
+            prefix=prefix,
+            indexer=self.indexer,
+            topk_indices_buffer=self.topk_indices_buffer,
+        )
+        # Register this layer in the compilation config's static forward context
+        # This allows the custom op to retrieve the layer during execution
+        compilation_config = mla_modules.vllm_config.compilation_config
+        # HACK
+        self.layer_name = prefix + ".deepseek_v4_multi_head_latent_attention"
+        if self.layer_name in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {self.layer_name}")
+        compilation_config.static_forward_context[self.layer_name] = self
+
+        # Create the compressor for layers with compress_ratio > 1; after
+        # creating the DeepseekV4MLAAttention layer to get its cache.
+        self.compressor = None
+        if self.compress_ratio > 1:
+            self.compressor = DeepseekCompressor(
+                vllm_config=mla_modules.vllm_config,
+                compress_ratio=self.compress_ratio,
+                hidden_size=self.hidden_size,
+                head_dim=self.head_dim,
+                rotate=True,
+                prefix=f"{prefix}.compressor",
+                k_cache_prefix=self.mla_attn.prefix,
+            )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        llama_4_scaling: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        # Pre-allocate attention output with FlashMLA-padded head count.
+        # The op writes into `o_padded`; we slice to n_local_heads after.
+        num_tokens = hidden_states.shape[0]
+        o_padded = torch.empty(
+            (num_tokens, self.padded_heads, self.head_dim),
+            dtype=hidden_states.dtype,
+            device=hidden_states.device,
+        )
+
+        # Attention (inside custom op for torch.compile boundary)
+        torch.ops.vllm.deepseek_v4_attention(
+            hidden_states,
+            positions,
+            o_padded,
+            self.layer_name,
+        )
+        o = o_padded[:, : self.n_local_heads, :]
+
+        # Keep ROCm on the BF16 reference wo_a path util kernel ready.
+        if current_platform.is_rocm():
+            z = rocm_inv_rope_einsum(
+                self.rotary_emb,
+                o,
+                positions,
+                self.rope_head_dim,
+                self.n_local_groups,
+                self.o_lora_rank,
+                self.wo_a,
+            )
+            return self.wo_b(z.flatten(1))
+
+        # O projection: inverse RoPE + FP8 quant + einsum + wo_b
+        o_fp8, o_scale = fused_inv_rope_fp8_quant(
+            o,
+            positions,
+            self.rotary_emb.cos_sin_cache,
+            n_groups=self.n_local_groups,
+            heads_per_group=self.n_local_heads // self.n_local_groups,
+            nope_dim=self.nope_head_dim,
+            rope_dim=self.rope_head_dim,
+            tma_aligned_scales=self._tma_aligned_scales,
+        )
+
+        wo_a_fp8 = self.wo_a.weight
+        wo_a_scale = self.wo_a.weight_scale_inv
+
+        z = torch.empty(
+            (num_tokens, self.n_local_groups, self.o_lora_rank),
+            device=o.device,
+            dtype=torch.bfloat16,
+        )
+        torch.ops.vllm.deepseek_v4_fp8_einsum(
+            o_fp8,
+            o_scale,
+            wo_a_fp8,
+            wo_a_scale,
+            z,
+            "bhr,hdr->bhd",
+            list(self._einsum_recipe),
+        )
+
+        return self.wo_b(z.flatten(1))
+
+    def attn_gemm_parallel_execute(self, hidden_states) -> tuple[Any, ...]:
+        aux_streams = self.aux_stream_list
+        if aux_streams is not None:
+            assert len(aux_streams) >= 3
+            aux_streams = aux_streams[:3]
+
+        # fused_wqa_wkv (heaviest) on default; the three lighter input GEMMs
+        # on aux streams 0..2 when their owning module exists. ln_events[0]
+        # is the fan-out start event; ln_events[1..3] are per-aux done events.
+        # On ROCm, aux_streams is None and execute_in_parallel runs serially.
+        aux_fns: list[Callable[[], Any] | None] = [None, None, None]
+
+        if self.compressor is not None:
+            # Local ref so the closure keeps a non-None type for mypy.
+            compressor = self.compressor
+
+            def compressor_kv_score() -> torch.Tensor:
+                return torch.mm(
+                    hidden_states,
+                    compressor.fused_wkv_wgate.weight.T,
+                    out_dtype=torch.float32,
+                )
+
+            aux_fns[0] = compressor_kv_score
+
+        if self.indexer is not None:
+            indexer = self.indexer
+
+            def indexer_weights_proj() -> torch.Tensor:
+                # ReplicatedLinear returns (output, bias); bias is None.
+                weights, _ = indexer.weights_proj(hidden_states)
+                return weights
+
+            def indexer_compressor_kv_score() -> torch.Tensor:
+                return torch.mm(
+                    hidden_states,
+                    indexer.compressor.fused_wkv_wgate.weight.T,
+                    out_dtype=torch.float32,
+                )
+
+            aux_fns[1] = indexer_weights_proj
+            aux_fns[2] = indexer_compressor_kv_score
+
+        def fused_wqa_wkv() -> torch.Tensor:
+            # MergedColumnParallelLinear returns (output, bias); bias is None.
+            qr_kv, _ = self.fused_wqa_wkv(hidden_states)
+            return qr_kv
+
+        qr_kv, (kv_score, indexer_weights, indexer_kv_score) = execute_in_parallel(
+            fused_wqa_wkv,
+            aux_fns,
+            self.ln_events[0],
+            self.ln_events[1:4],
+            aux_streams,
+            enable=hidden_states.shape[0]
+            <= envs.VLLM_MULTI_STREAM_GEMM_TOKEN_THRESHOLD,
+        )
+
+        return qr_kv, kv_score, indexer_kv_score, indexer_weights
+
+    def attention_impl(
+        self,
+        hidden_states: torch.Tensor,
+        positions: torch.Tensor,
+        out: torch.Tensor,  # [num_tokens, padded_heads, head_dim], written in place
+    ) -> None:
+        forward_context = get_forward_context()
+        attn_metadata = forward_context.attn_metadata
+
+        qr_kv, kv_score, indexer_kv_score, indexer_weights = (
+            self.attn_gemm_parallel_execute(hidden_states)
+        )
+
+        qr, kv = qr_kv.split([self.q_lora_rank, self.head_dim], dim=-1)
+        qr, kv = fused_q_kv_rmsnorm(
+            qr,
+            kv,
+            self.q_norm.weight.data,
+            self.kv_norm.weight.data,
+            self.eps,
+        )
+
+        # wq_b + kv_insert (+ MLA compressor when an indexer is present) ride
+        # on the default stream so q stays on its consumer stream (mla_attn
+        # downstream reads q on default). Indexer/compressor go on aux for
+        # overlap with default's GEMM + cache write.
+        if self.indexer is not None:
+            aux_streams = self.aux_stream_list
+            indexer = self.indexer
+            # Local ref so the closure keeps a non-None type for mypy.
+            assert self.compressor is not None
+            compressor = self.compressor
+
+            def wq_b_kv_insert() -> torch.Tensor:
+                q = self.wq_b(qr).view(-1, self.n_local_heads, self.head_dim)
+                self._fused_qnorm_rope_kv_insert(q, kv, positions, attn_metadata)
+                return q
+
+            # 3-way overlap (matches TRT-LLM PR #14142 Level 1): default runs
+            # wq_b+kv_insert; slot [0] runs the full indexer; slot [1] runs the
+            # MLA compressor. Slot [2] is reserved for the indexer's inner
+            # overlap. ROCm (aux_streams is None) falls back to sequential.
+            q, _ = execute_in_parallel(
+                wq_b_kv_insert,
+                [
+                    lambda: indexer(
+                        hidden_states,
+                        qr,
+                        indexer_kv_score,
+                        indexer_weights,
+                        positions,
+                        self.indexer_rotary_emb,
+                    ),
+                    lambda: compressor(kv_score, positions, self.rotary_emb),
+                ],
+                self.ln_events[0],
+                [self.ln_events[1], self.ln_events[2]],
+                [aux_streams[0], aux_streams[1]] if aux_streams is not None else None,
+                enable=aux_streams is not None,
+            )
+        elif self.compressor is not None:
+            # wq_b + kv_insert on default, compressor on aux.
+            aux_stream = (
+                self.aux_stream_list[0] if self.aux_stream_list is not None else None
+            )
+            compressor = self.compressor
+
+            def wq_b_kv_insert() -> torch.Tensor:
+                q = self.wq_b(qr).view(-1, self.n_local_heads, self.head_dim)
+                self._fused_qnorm_rope_kv_insert(q, kv, positions, attn_metadata)
+                return q
+
+            q, _ = maybe_execute_in_parallel(
+                wq_b_kv_insert,
+                lambda: compressor(kv_score, positions, self.rotary_emb),
+                self.ln_events[0],
+                self.ln_events[1],
+                aux_stream,
+            )
+        else:
+            # SWA-only layer: no compressor, no overlap.
+            q = self.wq_b(qr).view(-1, self.n_local_heads, self.head_dim)
+            self._fused_qnorm_rope_kv_insert(q, kv, positions, attn_metadata)
+
+        # Pad q to FlashMLA-required head count (64 or 128)
+        if self.n_local_heads < self.padded_heads:
+            pad_size = self.padded_heads - self.n_local_heads
+            q = F.pad(q, (0, 0, 0, pad_size), value=0.0)
+
+        # MLA attention writes into the pre-allocated `out` buffer
+        # ([num_tokens, padded_heads, head_dim]).
+        self.mla_attn(q, kv, positions, output=out)
+
+    def _fused_qnorm_rope_kv_insert(
+        self,
+        q: torch.Tensor,
+        kv: torch.Tensor,
+        positions: torch.Tensor,
+        attn_metadata: (
+            dict[str, AttentionMetadata] | list[dict[str, AttentionMetadata]] | None
+        ),
+    ) -> None:
+        if not isinstance(attn_metadata, dict):
+            return
+
+        swa_metadata = cast(
+            "DeepseekSparseSWAMetadata | None",
+            attn_metadata.get(self.swa_cache_layer.prefix),
+        )
+        assert swa_metadata is not None
+
+        swa_kv_cache = self.swa_cache_layer.kv_cache
+        swa_kv_cache_2d = swa_kv_cache.view(swa_kv_cache.shape[0], -1)
+
+        # Horizontally fused:
+        #   Q side:  q_head_norm (per-head RMSNorm, no weight) + GPT-J RoPE
+        #   KV side: GPT-J RoPE + UE8M0 FP8 quant + paged cache insert
+        # kv is unchanged; mla_attn reads kv solely via swa_kv_cache.
+        torch.ops._C.fused_deepseek_v4_qnorm_rope_kv_rope_quant_insert(
+            q,
+            kv,
+            swa_kv_cache_2d,
+            swa_metadata.slot_mapping,
+            positions.to(torch.int64),
+            self.rotary_emb.cos_sin_cache,
+            self.eps,
+            swa_metadata.block_size,
+        )
+
+
+@eager_break_during_capture
+def deepseek_v4_attention(
+    hidden_states: torch.Tensor,
+    positions: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+) -> None:
+    forward_context: ForwardContext = get_forward_context()
+    self = forward_context.no_compile_layers[layer_name]
+    self.attention_impl(hidden_states, positions, out)
+
+
+def deepseek_v4_attention_fake(
+    hidden_states: torch.Tensor,
+    positions: torch.Tensor,
+    out: torch.Tensor,
+    layer_name: str,
+) -> None:
+    return None
+
+
+direct_register_custom_op(
+    op_name="deepseek_v4_attention",
+    op_func=deepseek_v4_attention,
+    mutates_args=["out"],
+    fake_impl=deepseek_v4_attention_fake,
+)
+
+
+def deepseek_v4_fp8_einsum(
+    a: torch.Tensor,
+    a_scale: torch.Tensor,
+    b: torch.Tensor,
+    b_scale: torch.Tensor,
+    out: torch.Tensor,
+    equation: str,
+    recipe: list[int],
+) -> None:
+    fp8_einsum(equation, (a, a_scale), (b, b_scale), out, recipe=tuple(recipe))
+
+
+def deepseek_v4_fp8_einsum_fake(
+    a: torch.Tensor,
+    a_scale: torch.Tensor,
+    b: torch.Tensor,
+    b_scale: torch.Tensor,
+    out: torch.Tensor,
+    equation: str,
+    recipe: list[int],
+) -> None:
+    return None
+
+
+direct_register_custom_op(
+    op_name="deepseek_v4_fp8_einsum",
+    op_func=deepseek_v4_fp8_einsum,
+    mutates_args=["out"],
+    fake_impl=deepseek_v4_fp8_einsum_fake,
+)
+
+
+class DeepseekV4MLAAttention(nn.Module, AttentionLayerBase):
+    # FlashMLA FP8 sparse only supports 64 or 128 heads
+    SUPPORTED_HEAD_COUNTS = (64, 128)
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_dim: int,
+        scale: float,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        q_lora_rank: int | None,
+        kv_lora_rank: int,
+        compress_ratio: int,
+        window_size: int,
+        head_bytes: int,
+        swa_cache_layer: DeepseekV4SWACache,
+        attn_sink: torch.Tensor,
+        cache_config: CacheConfig | None = None,
+        quant_config: QuantizationConfig | None = None,
+        prefix: str = "",
+        # Sparse MLA Args
+        indexer: object | None = None,
+        topk_indices_buffer: torch.Tensor | None = None,
+        aux_stream: torch.cuda.Stream | None = None,
+        **extra_impl_args,
+    ) -> None:
+        super().__init__()
+        self.impl_cls = _select_v4_sparse_impl()
+        self.backend_cls = self.impl_cls.backend_cls
+        self.num_heads = num_heads
+        self.num_kv_heads = 1
+        self.head_dim = head_dim
+        self.scale = scale
+        self.window_size = window_size
+        self.head_bytes = head_bytes
+        self.compress_ratio = compress_ratio
+        self.q_lora_rank = q_lora_rank
+        self.kv_lora_rank = kv_lora_rank
+        self.nope_head_dim = qk_nope_head_dim
+        self.rope_head_dim = qk_rope_head_dim
+        self.indexer = indexer
+        self.topk_indices_buffer = topk_indices_buffer
+
+        self.prefix = prefix  # Alias for compatibility with compressor
+
+        self.aux_stream = aux_stream
+        self.ln_events = [torch.cuda.Event(), torch.cuda.Event()]
+
+        # Determine padded head count for FlashMLA
+        if num_heads not in self.SUPPORTED_HEAD_COUNTS:
+            if num_heads < 64:
+                self.padded_heads = 64
+            elif num_heads < 128:
+                self.padded_heads = 128
+            else:
+                raise ValueError(
+                    f"DeepseekV4MLAAttention does not support {num_heads} heads. "
+                    f"Supported: <= 128 (will be padded to 64 or 128)"
+                )
+        else:
+            self.padded_heads = num_heads
+
+        # Store attention sink
+        assert attn_sink is not None
+        self.attn_sink: torch.Tensor = attn_sink
+        # Store SWA cache
+        assert swa_cache_layer is not None
+        self.swa_cache_layer: DeepseekV4SWACache = swa_cache_layer
+
+        # Get vllm config for cache setup
+        vllm_config = get_current_vllm_config()
+        self.max_num_batched_tokens = (
+            vllm_config.scheduler_config.max_num_batched_tokens
+        )
+        self.max_model_len = vllm_config.model_config.max_model_len
+        # DeepseekV4 only supports fp8 kv-cache format for now.
+        kv_cache_dtype = cache_config.cache_dtype if cache_config is not None else "fp8"
+
+        assert kv_cache_dtype.startswith("fp8"), (
+            f"DeepseekV4 only supports fp8 kv-cache format for now, "
+            f"got {kv_cache_dtype}"
+        )
+        assert issubclass(self.get_attn_backend(), FlashMLASparseBackend), (
+            "Only FlashMLA Sparse Attention backend is supported for DeepseekV4 for now"
+        )
+        # FlashMLA Sparse Attention fp8 backend uses "fp8_ds_mla" kv-cache format
+        # Automatically convert fp8 kv-cache format to "fp8_ds_mla"
+        if (
+            issubclass(self.get_attn_backend(), FlashMLASparseBackend)
+            and kv_cache_dtype.startswith("fp8")
+            and kv_cache_dtype != "fp8_ds_mla"
+        ):
+            assert cache_config is not None
+            cache_config.cache_dtype = "fp8_ds_mla"
+            kv_cache_dtype = "fp8_ds_mla"
+            logger.info_once("Using DeepSeek's fp8_ds_mla KV cache format.")
+
+        self.kv_cache_dtype = kv_cache_dtype
+
+        # Register with compilation context for metadata lookup
+        compilation_config = vllm_config.compilation_config
+        if prefix and prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        if prefix:
+            compilation_config.static_forward_context[prefix] = self
+
+        self.kv_cache = torch.tensor([])
+
+    def get_attn_backend(self) -> type[AttentionBackend]:
+        return self.backend_cls
+
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
+        if (
+            self.compress_ratio <= 1
+        ):  # SWA part. Allocated separately as DeepseekV4SWACache.
+            return None
+        return MLAAttentionSpec(
+            block_size=vllm_config.cache_config.block_size,
+            num_kv_heads=1,
+            head_size=self.head_dim,
+            dtype=torch.uint8,
+            compress_ratio=self.compress_ratio,
+            cache_dtype_str=self.kv_cache_dtype,
+            alignment=576,  # NOTE: FlashMLA requires 576B alignment
+            model_version="deepseek_v4",
+        )
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        kv: torch.Tensor,
+        positions: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        self.impl_cls.forward_mqa(self, q, kv, positions, output)
+
+
+class DeepseekV4IndexerCache(torch.nn.Module, AttentionLayerBase):
+    def __init__(
+        self,
+        head_dim: int,
+        dtype: torch.dtype,
+        prefix: str,
+        cache_config: CacheConfig,
+        compress_ratio: int = 1,
+    ):
+        super().__init__()
+        self.kv_cache = torch.tensor([])
+        self.head_dim = head_dim
+        self.prefix = prefix
+        self.cache_config = cache_config
+        self.dtype = dtype
+        self.compress_ratio = compress_ratio
+        compilation_config = get_current_vllm_config().compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
+        # head_dim already carries the fp8 scale padding
+        # compress_ratio=1 for V3.2, >1 for DeepseekV4; both use the same cache layout.
+        return MLAAttentionSpec(
+            block_size=self.cache_config.block_size,
+            num_kv_heads=1,
+            head_size=self.head_dim,
+            dtype=self.dtype,
+            compress_ratio=self.compress_ratio,
+            # DeepseekV4 aligns indexer pages to FlashMLA's 576B so they can pack with
+            # the indexer's compressor state cache. V3.2 keeps the legacy layout.
+            alignment=576,
+        )
+
+    def forward(self): ...
+
+    def get_attn_backend(self) -> type[AttentionBackend]:
+        return DeepseekV4IndexerBackend
+
+
+class DeepseekV4Indexer(nn.Module):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        config: DeepseekV2Config | DeepseekV3Config,
+        hidden_size: int,
+        q_lora_rank: int,
+        quant_config: QuantizationConfig | None,
+        cache_config: CacheConfig | None,
+        topk_indices_buffer: torch.Tensor | None,
+        compress_ratio: int = 1,
+        prefix: str = "",
+        aux_stream: torch.cuda.Stream | None = None,
+    ):
+        super().__init__()
+        self.vllm_config = vllm_config
+        self.config = config
+        self.quant_config = quant_config
+        # self.indexer_cfg = config.attn_module_list_cfg[0]["attn_index"]
+        self.topk_tokens = config.index_topk
+        self.n_head = config.index_n_heads  # 64
+        self.head_dim = config.index_head_dim  # 128
+        self.rope_dim = config.qk_rope_head_dim  # 64
+        self.q_lora_rank = q_lora_rank  # 1536
+        self.compress_ratio = compress_ratio
+        self.use_fp4_kv = self.vllm_config.attention_config.use_fp4_indexer_cache
+        logger.info_once(
+            "Using %s indexer cache for Lightning Indexer.",
+            "MXFP4" if self.use_fp4_kv else "FP8",
+        )
+
+        # no tensor parallel, just replicated
+        self.wq_b = ReplicatedLinear(
+            self.q_lora_rank,
+            self.head_dim * self.n_head,
+            bias=False,
+            quant_config=quant_config,
+            prefix=f"{prefix}.wq_b",
+        )
+        self.weights_proj = ReplicatedLinear(
+            hidden_size,
+            self.n_head,
+            bias=False,
+            quant_config=None,
+            prefix=f"{prefix}.weights_proj",
+        )
+        self.softmax_scale = self.head_dim**-0.5
+
+        self.scale_fmt = "ue8m0"
+        self.quant_block_size = 128  # TODO: get from config
+        self.topk_indices_buffer = topk_indices_buffer
+
+        self.max_model_len = (
+            vllm_config.model_config.max_model_len // self.compress_ratio
+        )
+        self.prefix = prefix
+
+        self.max_total_seq_len = (
+            get_max_prefill_buffer_size(vllm_config) // self.compress_ratio
+        )
+
+        assert cache_config is not None, "Deepseek V4 indexer requires cache_config"
+        # NOTE(yifan): FP8 indxer cache use the same layout as V3.2:
+        # head_dim bytes = 128 fp8 + 4 fp32 scale = 132.
+        # For FP4 indexer cache, we still allocate the same amount of memory as FP8,
+        # but only use the first half of the memory.
+        k_cache_head_dim = self.head_dim + self.head_dim // self.quant_block_size * 4
+        self.k_cache = DeepseekV4IndexerCache(
+            head_dim=k_cache_head_dim,
+            dtype=torch.uint8,
+            prefix=f"{prefix}.k_cache",
+            cache_config=cache_config,
+            compress_ratio=self.compress_ratio,
+        )
+        self.compressor = DeepseekCompressor(
+            vllm_config=vllm_config,
+            compress_ratio=self.compress_ratio,
+            hidden_size=hidden_size,
+            head_dim=self.head_dim,
+            rotate=True,
+            prefix=f"{prefix}.compressor",
+            k_cache_prefix=self.k_cache.prefix,
+            use_fp4_cache=self.use_fp4_kv,
+        )
+
+        self.indexer_op = SparseAttnIndexer(
+            self.k_cache,
+            self.quant_block_size,
+            self.scale_fmt,
+            self.topk_tokens,
+            self.head_dim,
+            self.max_model_len,
+            self.max_total_seq_len,
+            self.topk_indices_buffer,
+            skip_k_cache_insert=True,
+            use_fp4_cache=self.use_fp4_kv,
+        )
+
+        # None on ROCm — maybe_execute_in_parallel falls back to sequential.
+        self.aux_stream = aux_stream
+        self.ln_events: list[torch.cuda.Event] = [
+            torch.cuda.Event(),
+            torch.cuda.Event(),
+        ]
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        qr: torch.Tensor,
+        compressed_kv_score: torch.Tensor,
+        indexer_weights: torch.Tensor,
+        positions: torch.Tensor,
+        rotary_emb: nn.Module,
+    ) -> torch.Tensor:
+        compressor = self.compressor
+
+        def wq_b_and_q_quant():
+            # ReplicatedLinear returns (output, bias); bias is None.
+            q, _ = self.wq_b(qr)
+            q = q.view(-1, self.n_head, self.head_dim)
+            return fused_indexer_q_rope_quant(
+                positions,
+                q,
+                rotary_emb.cos_sin_cache,
+                indexer_weights,
+                self.softmax_scale,
+                self.n_head**-0.5,
+                use_fp4=self.use_fp4_kv,
+            )
+
+        # compressor returns None and writes K to the indexer KV cache; the
+        # join orders that write before indexer_op (skip_k_cache_insert=True).
+        (q_quant, weights), k = maybe_execute_in_parallel(
+            wq_b_and_q_quant,
+            lambda: compressor(compressed_kv_score, positions, rotary_emb),
+            self.ln_events[0],
+            self.ln_events[1],
+            self.aux_stream,
+        )
+        return self.indexer_op(hidden_states, q_quant, k, weights)
diff --git a/vllm/models/deepseek_v4/nvidia/ops/cutedsl_utils.py b/vllm/models/deepseek_v4/nvidia/ops/cutedsl_utils.py
new file mode 100644
index 000000000000..6cb53cd07078
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/ops/cutedsl_utils.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import cutlass
+import cutlass.cute as cute
+from cutlass import Float32, Uint32
+from cutlass._mlir import ir
+from cutlass._mlir.dialects import llvm, vector
+from cutlass.cutlass_dsl import T, dsl_user_op
+
+
+@dsl_user_op
+def _recast_val(x, dtype, *, loc=None, ip=None):
+    return dtype(llvm.bitcast(dtype.mlir_type, x.ir_value(loc=loc, ip=ip)))
+
+
+@dsl_user_op
+def _fp32x2_to_bf16x2(a: Float32, b: Float32, *, loc=None, ip=None) -> Uint32:
+    out = llvm.inline_asm(
+        T.i32(),
+        [a.ir_value(loc=loc, ip=ip), b.ir_value(loc=loc, ip=ip)],
+        "cvt.rn.bf16x2.f32 $0, $2, $1;",
+        "=r,f,f",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
+
+
+@dsl_user_op
+def _bf16x2_to_fp32(data: Uint32, *, loc=None, ip=None) -> tuple[Float32, Float32]:
+    out = llvm.inline_asm(
+        llvm.StructType.get_literal([T.f32(), T.f32()]),
+        [data.ir_value(loc=loc, ip=ip)],
+        "shl.b32 $0, $2, 16;\n\tand.b32 $1, $2, 0xFFFF0000;\n",
+        "=f,=f,r",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return (
+        Float32(llvm.extractvalue(T.f32(), out, [0], loc=loc, ip=ip)),
+        Float32(llvm.extractvalue(T.f32(), out, [1], loc=loc, ip=ip)),
+    )
+
+
+@dsl_user_op
+def _bf16x2_abs(a: Uint32, *, loc=None, ip=None) -> Uint32:
+    out = llvm.inline_asm(
+        T.i32(),
+        [a.ir_value(loc=loc, ip=ip)],
+        "abs.bf16x2 $0, $1;",
+        "=r,r",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
+
+
+@dsl_user_op
+def _bf16x2_max(a: Uint32, b: Uint32, *, loc=None, ip=None) -> Uint32:
+    out = llvm.inline_asm(
+        T.i32(),
+        [a.ir_value(loc=loc, ip=ip), b.ir_value(loc=loc, ip=ip)],
+        "max.bf16x2 $0, $1, $2;",
+        "=r,r,r",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
+
+
+@dsl_user_op
+def _bf16x2_mul(a: Uint32, b: Uint32, *, loc=None, ip=None) -> Uint32:
+    out = llvm.inline_asm(
+        T.i32(),
+        [a.ir_value(loc=loc, ip=ip), b.ir_value(loc=loc, ip=ip)],
+        "mul.rn.bf16x2 $0, $1, $2;",
+        "=r,r,r",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
+
+
+@dsl_user_op
+def _fp8x4_to_bf16x4(x: Uint32, *, loc=None, ip=None) -> cute.TensorSSA:
+    # there is only fp8->fp16 conversion, hence we need to go
+    # round trip through fp16.
+    out = llvm.inline_asm(
+        llvm.StructType.get_literal([T.i32()] * 2),
+        [x.ir_value(loc=loc, ip=ip)],
+        "{\n\t"
+        ".reg .b16 x0, x1;\n\t"
+        ".reg .b16 t00, t01, t10, t11;\n\t"
+        "mov.b32 {x0, x1}, $2;\n\t"
+        "cvt.rn.f16x2.e4m3x2 $0, x0;\n\t"
+        "cvt.rn.f16x2.e4m3x2 $1, x1;\n\t"
+        "mov.b32 {t00, t01}, $0;\n\t"
+        "mov.b32 {t10, t11}, $1;\n\t"
+        "cvt.rn.bf16.f16 t00, t00;\n\t"
+        "cvt.rn.bf16.f16 t01, t01;\n\t"
+        "cvt.rn.bf16.f16 t10, t10;\n\t"
+        "cvt.rn.bf16.f16 t11, t11;\n\t"
+        "mov.b32 $0, {t00, t01};\n\t"
+        "mov.b32 $1, {t10, t11};\n\t"
+        "}\n",
+        "=r,=r,r",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    vec = vector.from_elements(
+        ir.VectorType.get([2], T.i32(), loc=loc),
+        [llvm.extractvalue(T.i32(), out, [i], loc=loc, ip=ip) for i in range(2)],
+        loc=loc,
+        ip=ip,
+    )
+    return cute.TensorSSA(vec, 2, Uint32)
+
+
+@dsl_user_op
+def _fp32x4_to_fp8x4(
+    a0: Float32,
+    a1: Float32,
+    a2: Float32,
+    a3: Float32,
+    *,
+    loc=None,
+    ip=None,
+) -> Uint32:
+    # Pack four FP32 values into one b32 of four e4m3 bytes, byte order
+    # {a0, a1, a2, a3} from low to high address.
+    out = llvm.inline_asm(
+        T.i32(),
+        [
+            a0.ir_value(loc=loc, ip=ip),
+            a1.ir_value(loc=loc, ip=ip),
+            a2.ir_value(loc=loc, ip=ip),
+            a3.ir_value(loc=loc, ip=ip),
+        ],
+        "{\n\t"
+        ".reg .b16 t0, t1;\n\t"
+        "cvt.rn.satfinite.e4m3x2.f32 t0, $2, $1;\n\t"
+        "cvt.rn.satfinite.e4m3x2.f32 t1, $4, $3;\n\t"
+        "mov.b32 $0, {t0, t1};\n\t"
+        "}\n",
+        "=r,f,f,f,f",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
+
+
+@dsl_user_op
+def _fp32x8_to_fp4x8(
+    vals: cute.Tensor,
+    offset: cutlass.Constexpr[int],
+    *,
+    loc=None,
+    ip=None,
+) -> Uint32:
+    # Pack eight scaled FP32 values into four E2M1x2 bytes, returned as one b32.
+    assert vals.element_type is Float32
+    out = llvm.inline_asm(
+        T.i32(),
+        [vals[offset + i].ir_value(loc=loc, ip=ip) for i in range(8)],
+        "{\n\t"
+        ".reg .b8 x0, x1, x2, x3;\n\t"
+        "cvt.rn.satfinite.e2m1x2.f32 x0, $2, $1;\n\t"
+        "cvt.rn.satfinite.e2m1x2.f32 x1, $4, $3;\n\t"
+        "cvt.rn.satfinite.e2m1x2.f32 x2, $6, $5;\n\t"
+        "cvt.rn.satfinite.e2m1x2.f32 x3, $8, $7;\n\t"
+        "mov.b32 $0, {x0, x1, x2, x3};\n\t"
+        "}\n",
+        "=r,f,f,f,f,f,f,f,f",
+        has_side_effects=False,
+        is_align_stack=False,
+    )
+    return Uint32(out)
diff --git a/vllm/models/deepseek_v4/nvidia/ops/dequant_gather_k_cutedsl.py b/vllm/models/deepseek_v4/nvidia/ops/dequant_gather_k_cutedsl.py
new file mode 100644
index 000000000000..8ff349cbfe13
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/ops/dequant_gather_k_cutedsl.py
@@ -0,0 +1,334 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from functools import cache
+
+import cutlass
+import cutlass.cute as cute
+import torch
+from cuda.bindings.driver import CUstream
+from cutlass import BFloat16, Int32, Uint8, Uint32
+from cutlass.cute.nvgpu import cpasync
+from quack.compile_utils import make_fake_tensor
+
+from vllm.models.deepseek_v4.nvidia.ops.cutedsl_utils import (
+    _bf16x2_mul,
+    _fp8x4_to_bf16x4,
+)
+
+
+def dequantize_and_gather_k_cache_cutedsl(
+    out: torch.Tensor,
+    k_cache: torch.Tensor,
+    seq_lens: torch.Tensor,
+    gather_lens: torch.Tensor | None,
+    block_table: torch.Tensor,
+    block_size: int,
+    offset: int,
+) -> None:
+    DequantGatherKCacheKernel.compile(
+        block_size=block_size,
+        has_gather_lens=gather_lens is not None,
+    )(out, k_cache, seq_lens, gather_lens, block_table, offset)
+
+
+class DequantGatherKCacheKernel:
+    # Hard-coded for DSv4.
+    head_dim = 512
+    group_size = 64  # 1 scale per 64 elems
+
+    def __init__(self, fp8_dim: int = 448, block_size: int = 64):
+        self.fp8_dim = fp8_dim
+        self.bf16_dim = self.head_dim - fp8_dim
+        self.data_dim = fp8_dim + self.bf16_dim * 2
+        self.block_size = block_size
+
+        self.num_warps = 4
+        self.tb_size = self.num_warps * 32
+        self.num_stages = 4
+
+    @cute.jit
+    def __call__(
+        self,
+        out: cute.Tensor,
+        k_cache: cute.Tensor,
+        seq_lens: cute.Tensor,
+        gather_lens: cute.Tensor | None,
+        block_table: cute.Tensor,
+        offset: Int32,
+        stream: CUstream,
+    ):
+        # Split k_cache into k_data and k_scale. Each [block_size, head_bytes]
+        # block is actually a concat of
+        # [block_size, fp8_dim + bf16_dim * 2] and [block_size, 8].
+        k_data = cute.make_tensor(
+            k_cache.iterator,
+            layout=cute.make_layout(
+                (k_cache.shape[0], self.block_size, self.data_dim),
+                stride=(k_cache.stride[0], self.data_dim, 1),
+            ),
+        )
+        k_scale = cute.make_tensor(
+            k_cache.iterator + (self.block_size * self.data_dim),
+            layout=cute.make_layout(
+                (k_cache.shape[0], self.block_size, 8),
+                stride=(k_cache.stride[0], 8, 1),
+            ),
+        )
+
+        grid = (out.shape[0], 1024, 1)
+        self.kernel(
+            out,
+            k_data,
+            k_scale,
+            seq_lens,
+            gather_lens,
+            block_table,
+            offset,
+        ).launch(grid=grid, block=(self.tb_size, 1, 1), stream=stream)
+
+    @cute.jit
+    def load_g2s(
+        self,
+        k_data_slice: cute.Tensor,
+        k_scale: cute.Tensor,
+        block_table: cute.Tensor,
+        s_kdata_slice: cute.Tensor,
+        s_kscale: cute.Tensor,
+        req_id,
+        pos,
+        lane_id,
+        stage_id,
+    ):
+        # k_data_slice: [num_blocks, block_size, (16, data_dim/16)]
+        # s_kdata_slice: [(4, data_dim/16), num_stages]
+
+        op = cpasync.CopyG2SOp(cute.nvgpu.LoadCacheMode.GLOBAL)
+        cp16_atom = cute.make_copy_atom(op, Uint32, num_bits_per_copy=128)
+        cp8_atom = cute.make_copy_atom(cpasync.CopyG2SOp(), Uint8, num_bits_per_copy=64)
+        page_id = block_table[req_id, pos // self.block_size]
+        block_offset = pos % self.block_size
+
+        # Load the first 512 bytes (32x16B).
+        idx = lane_id
+        src = k_data_slice[page_id, block_offset, (None, idx)]
+        cute.copy(
+            cp16_atom,
+            cute.recast_tensor(src, Uint32),
+            s_kdata_slice[(None, idx), stage_id],
+        )
+
+        # Load the tail 64 bytes.
+        idx += 32
+        if idx < cutlass.const_expr(self.data_dim // 16):
+            src = k_data_slice[page_id, block_offset, (None, idx)]
+            cute.copy(
+                cp16_atom,
+                cute.recast_tensor(src, Uint32),
+                s_kdata_slice[(None, idx), stage_id],
+            )
+        elif idx == cutlass.const_expr(self.data_dim // 16):
+            cute.copy(
+                cp8_atom,
+                k_scale[page_id, block_offset, None],
+                s_kscale[None, stage_id],
+            )
+
+    @cute.kernel
+    def kernel(
+        self,
+        out: cute.Tensor,
+        k_data: cute.Tensor,
+        k_scale: cute.Tensor,
+        seq_lens: cute.Tensor,
+        gather_lens: cute.Tensor | None,
+        block_table: cute.Tensor,
+        offset: Int32,
+    ):
+        req_id, worker_id, _ = cute.arch.block_idx()
+        tid, _, _ = cute.arch.thread_idx()
+        warp_id = cute.arch.make_warp_uniform(tid // 32)
+        lane_id = tid % 32
+
+        _, num_workers, _ = cute.arch.grid_dim()
+
+        # Prepare smem.
+        smem = cutlass.utils.SmemAllocator()
+        s_kdata = smem.allocate_tensor(
+            Uint32,
+            cute.make_layout((self.data_dim // 4, self.num_warps, self.num_stages)),
+            byte_alignment=16,
+        )[None, warp_id, None]
+        s_kscale = smem.allocate_tensor(
+            Uint8,
+            cute.make_layout((8, self.num_warps, self.num_stages)),
+            byte_alignment=8,
+        )[None, warp_id, None]
+
+        # Prepare for 16B cp.async, also for BF16 smem loads later.
+        k_data_slice = cute.logical_divide(k_data, (None, None, 16))
+        s_kdata_16B_slice = cute.logical_divide(s_kdata, (4, None))
+
+        # Load FP8 elems in 8B units, so once dequantized, they are 16B units.
+        s_kdata_8B_slice = cute.logical_divide(s_kdata, (2, None))
+
+        # 16B st.global.
+        out_slice = cute.logical_divide(out, (None, None, 8))
+
+        cp_op = cute.nvgpu.CopyUniversalOp()
+        cp8_atom = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=64)
+        cp16_atom = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=128)
+
+        seq_len = seq_lens[req_id]
+        gather_len = seq_len
+        if cutlass.const_expr(gather_lens is not None):
+            gather_len = gather_lens[req_id]  # type: ignore[index]
+        start_pos = seq_len - gather_len
+
+        # Start prefetch.
+        for i in cutlass.range_constexpr(self.num_stages - 1):
+            next_pos = (
+                start_pos
+                + worker_id * self.num_warps
+                + warp_id
+                + i * num_workers * self.num_warps
+            )
+            if next_pos < seq_len:
+                self.load_g2s(
+                    k_data_slice,
+                    k_scale,
+                    block_table,
+                    s_kdata_16B_slice,
+                    s_kscale,
+                    req_id,
+                    next_pos,
+                    lane_id,
+                    i,
+                )
+            cute.arch.cp_async_commit_group()
+        prefetch_stage = self.num_stages - 1
+        compute_stage = 0
+
+        # Main loop.
+        for i in range(
+            worker_id * self.num_warps + warp_id,
+            gather_len,
+            num_workers * self.num_warps,
+        ):
+            pos = start_pos + i
+
+            # Prefetch next stage.
+            next_pos = pos + num_workers * self.num_warps * (self.num_stages - 1)
+            if next_pos < seq_len:
+                self.load_g2s(
+                    k_data_slice,
+                    k_scale,
+                    block_table,
+                    s_kdata_16B_slice,
+                    s_kscale,
+                    req_id,
+                    next_pos,
+                    lane_id,
+                    prefetch_stage,
+                )
+                prefetch_stage = (prefetch_stage + 1) % self.num_stages
+            cute.arch.cp_async_commit_group()
+
+            # Wait for gmem->smem to finish.
+            cute.arch.cp_async_wait_group(self.num_stages - 1)
+            cute.arch.sync_warp()
+
+            # There are 512 elems per token. As a warp, data0 holds the first
+            # 256 elems and data1 holds the second 256 elems, i.e. each thread
+            # holds 8 FP8 elems. This keeps the dequantized 8 BF16 elems as
+            # contiguous 16B global stores. On Blackwell, this might not be
+            # necessary as we have 32B global stores, but doing it this way
+            # does not seem to be slower.
+            data0 = cute.make_rmem_tensor((2,), Uint32)
+            data1 = cute.make_rmem_tensor((2,), Uint32)
+            cute.copy(cp8_atom, s_kdata_8B_slice[(None, lane_id), compute_stage], data0)
+            cute.copy(
+                cp8_atom,
+                s_kdata_8B_slice[(None, lane_id + 32), compute_stage],
+                data1,
+            )
+
+            # Convert to bf16x2 via bit manipulation. FP8 scales are per 64
+            # elements. An 8-element chunk advances the scale index by
+            # chunk_id * 8 // group_size.
+            scale0_u32 = Uint32(s_kscale[lane_id * 8 // self.group_size, compute_stage])
+            scale0_bf16x2 = (scale0_u32 << Uint32(23)) | (scale0_u32 << Uint32(7))
+            scale1_u32 = Uint32(
+                s_kscale[(lane_id + 32) * 8 // self.group_size, compute_stage]
+            )
+            scale1_bf16x2 = (scale1_u32 << Uint32(23)) | (scale1_u32 << Uint32(7))
+
+            # cvt.rn.scaled::n2::ue8m0.bf16x2.e4m3x2 requires PTX 9.2
+            # (CUDA 13.2).
+            dequant0 = cute.make_rmem_tensor(4, Uint32)
+            dequant1 = cute.make_rmem_tensor(4, Uint32)
+            for j in cutlass.range_constexpr(2):
+                tmp0 = _fp8x4_to_bf16x4(data0[j])
+                tmp1 = _fp8x4_to_bf16x4(data1[j])
+
+                # BF16 multiply is safe because the scales are exact powers of 2.
+                dequant0[j * 2] = _bf16x2_mul(tmp0[0], scale0_bf16x2)
+                dequant1[j * 2] = _bf16x2_mul(tmp1[0], scale1_bf16x2)
+                dequant0[j * 2 + 1] = _bf16x2_mul(tmp0[1], scale0_bf16x2)
+                dequant1[j * 2 + 1] = _bf16x2_mul(tmp1[1], scale1_bf16x2)
+
+            # Last 64 elems are BF16 tail, corresponds to dequant1 of last
+            # 8 threads. We have 448 FP8 + 64 BF16 -> 28x 16B for FP8 +
+            # 8x 16B for BF16.
+            if lane_id + 32 >= self.fp8_dim // 8:
+                idx = self.fp8_dim // 16 + (lane_id + 32) - self.fp8_dim // 8
+                cute.copy(
+                    cp16_atom,
+                    s_kdata_16B_slice[(None, idx), compute_stage],
+                    dequant1,
+                )
+
+            # Store two 16B BF16 chunks per lane: first half, then second half.
+            dst = out_slice[req_id, offset + i, (None, lane_id)]
+            cute.copy(cp16_atom, dequant0, cute.recast_tensor(dst, Uint32))
+
+            dst = out_slice[req_id, offset + i, (None, lane_id + 32)]
+            cute.copy(cp16_atom, dequant1, cute.recast_tensor(dst, Uint32))
+
+            compute_stage = (compute_stage + 1) % self.num_stages
+
+    @cache
+    @staticmethod
+    def compile(
+        fp8_dim: int = 448,
+        block_size: int = 64,
+        has_gather_lens: bool = True,
+    ):
+        num_reqs = cute.sym_int()
+        head_dim = DequantGatherKCacheKernel.head_dim
+        head_bytes = fp8_dim + (head_dim - fp8_dim) * 2 + 8
+
+        out = make_fake_tensor(BFloat16, (num_reqs, cute.sym_int(), head_dim), 16)
+        k_cache = cute.runtime.make_fake_tensor(
+            Uint8,
+            (cute.sym_int(), block_size, head_bytes),
+            stride=(cute.sym_int64(divisibility=32), head_bytes, 1),
+            assumed_align=32,
+        )
+        seq_lens = make_fake_tensor(Int32, (num_reqs,))
+        gather_lens = make_fake_tensor(Int32, (num_reqs,)) if has_gather_lens else None
+        block_table = make_fake_tensor(Int32, (num_reqs, cute.sym_int()))
+
+        kernel = DequantGatherKCacheKernel(fp8_dim, block_size)
+        stream = cute.runtime.make_fake_stream(use_tvm_ffi_env_stream=True)
+        return cute.compile(
+            kernel,
+            out,
+            k_cache,
+            seq_lens,
+            gather_lens,
+            block_table,
+            Int32(0),
+            stream,
+            options="--enable-tvm-ffi",
+        )
diff --git a/vllm/models/deepseek_v4/nvidia/ops/fused_indexer_q_cutedsl.py b/vllm/models/deepseek_v4/nvidia/ops/fused_indexer_q_cutedsl.py
new file mode 100644
index 000000000000..e1e2b40b8be7
--- /dev/null
+++ b/vllm/models/deepseek_v4/nvidia/ops/fused_indexer_q_cutedsl.py
@@ -0,0 +1,613 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from functools import cache
+
+import cutlass
+import cutlass.cute as cute
+import torch
+from cuda.bindings.driver import CUstream
+from cutlass import BFloat16, Float32, Int64, Uint8, Uint32, const_expr
+from quack.compile_utils import make_fake_tensor
+
+from vllm.models.deepseek_v4.nvidia.ops.cutedsl_utils import (
+    _bf16x2_abs,
+    _bf16x2_max,
+    _bf16x2_to_fp32,
+    _fp32x2_to_bf16x2,
+    _fp32x4_to_fp8x4,
+    _fp32x8_to_fp4x8,
+    _recast_val,
+)
+from vllm.vllm_flash_attn.cute import utils as cute_utils
+
+# MXFP4: 32 elements per block, packed 2 nibbles per byte, ue8m0 block scale.
+MXFP4_BLOCK_SIZE = 32
+
+_TORCH_TO_CUTE = {
+    torch.bfloat16: BFloat16,
+    torch.float32: Float32,
+}
+
+
+def fused_indexer_q_rope_quant_mxfp4_cutedsl(
+    positions: torch.Tensor,
+    index_q: torch.Tensor,
+    index_q_cos_sin_cache: torch.Tensor,
+    index_weights: torch.Tensor,
+    index_weights_softmax_scale: float,
+    index_weights_head_scale: float,
+    index_q_packed: torch.Tensor,
+    index_q_scale: torch.Tensor,
+    index_weights_out: torch.Tensor,
+) -> None:
+    num_tokens, num_heads, head_dim = index_q.shape
+    rope_dim = index_q_cos_sin_cache.shape[-1]
+    rope_type = _TORCH_TO_CUTE[index_q_cos_sin_cache.dtype]
+
+    # compile all variants at first invocation
+    for coarsen in (1, 4):
+        IndexerQMxFp4Kernel.compile(head_dim, rope_dim, num_heads, rope_type, coarsen)
+
+    # heuristic
+    coarsen = 1 if num_tokens < 512 else 4
+    compiled = IndexerQMxFp4Kernel.compile(
+        head_dim, rope_dim, num_heads, rope_type, coarsen
+    )
+    scale = float(index_weights_softmax_scale * index_weights_head_scale)
+    compiled(
+        positions,
+        index_q,
+        index_q_cos_sin_cache,
+        index_weights,
+        index_q_packed,
+        index_q_scale,
+        index_weights_out,
+        scale,
+    )
+
+
+def fused_indexer_q_rope_quant_fp8_cutedsl(
+    positions: torch.Tensor,
+    index_q: torch.Tensor,
+    index_q_cos_sin_cache: torch.Tensor,
+    index_weights: torch.Tensor,
+    index_weights_softmax_scale: float,
+    index_weights_head_scale: float,
+    index_q_fp8: torch.Tensor,
+    index_weights_out: torch.Tensor,
+) -> None:
+    num_tokens, num_heads, head_dim = index_q.shape
+    rope_dim = index_q_cos_sin_cache.shape[-1]
+    rope_type = _TORCH_TO_CUTE[index_q_cos_sin_cache.dtype]
+
+    for coarsen in (1, 4):
+        IndexerQFp8Kernel.compile(head_dim, rope_dim, num_heads, rope_type, coarsen)
+
+    coarsen = 1 if num_tokens < 512 else 4
+    compiled = IndexerQFp8Kernel.compile(
+        head_dim, rope_dim, num_heads, rope_type, coarsen
+    )
+    scale = float(index_weights_softmax_scale * index_weights_head_scale)
+    # The cute kernel treats the FP8 buffer as raw bytes (Uint8).
+    compiled(
+        positions,
+        index_q,
+        index_q_cos_sin_cache,
+        index_weights,
+        index_q_fp8.view(torch.uint8),
+        index_weights_out,
+        scale,
+    )
+
+
+class IndexerQRopeQuantKernel:
+    """Shared infrastructure for indexer-Q RoPE+quant fused kernels.
+
+    Subclasses implement ``kernel`` for a particular Q quantization scheme
+    (MXFP4, FP8 e4m3, …). The base class owns the launch geometry and the
+    common preamble: thread/token addressing, the BF16 Q load, and the
+    interleaved-RoPE pass over the trailing ``rope_dim`` lanes.
+    """
+
+    def __init__(
+        self,
+        head_dim: int = 128,
+        rope_dim: int = 64,
+        num_heads: int = 64,
+        cos_sin_dtype: type[cutlass.Numeric] = Float32,
+        coarsen: int = 4,
+    ):
+        self.head_dim = head_dim
+        self.rope_dim = rope_dim
+        self.nope_dim = head_dim - rope_dim
+        self.num_heads = num_heads
+        self.cos_sin_dtype = cos_sin_dtype
+
+        # process multiple heads at the same time to armotize RoPE load costs
+        assert num_heads % coarsen == 0
+        self.coarsen = coarsen
+
+        # later we will use 32B load = 16 BF16 elems
+        # thus, head_dim=128 requires 8 threads to handle.
+        # let's call subwarp = 8 threads.
+        self.subwarp_size = head_dim // 16
+        self.tb_size = 128
+        self.threads_per_token = (self.num_heads // self.coarsen) * self.subwarp_size
+
+    @cute.jit
+    def _load_q_and_rope(
+        self,
+        positions: cute.Tensor,
+        q: cute.Tensor,
+        cos_sin_cache: cute.Tensor,
+    ):
+        """Compute thread indices, load Q (BF16), and apply interleaved RoPE.
+
+        Returns a tuple
+            (q_bf16x2, tid, global_tid, sublane, token_id, head_tile_id,
+             head_start, in_bounds, num_token_heads)
+        where ``q_bf16x2`` is a (coarsen, 8) rmem tile of Uint32 packed
+        bf16x2 pairs covering the 16 BF16 lanes owned by this thread for
+        each of ``coarsen`` heads. RoPE is applied in place to the
+        trailing ``rope_dim`` lanes; the leading nope lanes pass through.
+        """
+        block_id, _, _ = cute.arch.block_idx()
+        tid, _, _ = cute.arch.thread_idx()
+
+        num_tokens = q.shape[0]
+        num_token_heads = num_tokens * self.num_heads
+        global_tid = block_id * self.tb_size + tid
+
+        global_subwarp_id = global_tid // self.subwarp_size
+        sublane = tid % self.subwarp_size
+
+        token_id = global_subwarp_id // (self.num_heads // self.coarsen)
+        head_tile_id = global_subwarp_id % (self.num_heads // self.coarsen)
+        head_start = head_tile_id * self.coarsen
+
+        # NOTE: token_id may exceed bounds, hence we need to add load/store guards
+        # we can't do early exit because CuteDSL doesn't support it. and we also need
+        # all threads in a warp to be active since we utilize warp shuffle later.
+        # must_in_bounds is constexpr, True when 1 threadblock fit within 1 token
+        # position. the compiler will remove bounds check when that happens.
+        must_in_bounds = cutlass.const_expr(self.tb_size % self.threads_per_token == 0)
+        in_bounds = must_in_bounds or (token_id < num_tokens)
+
+        cp_op = cute.nvgpu.CopyUniversalOp()
+
+        _layout = cute.make_layout((self.coarsen, 8), stride=(8, 1))
+        q_bf16x2 = cute.make_rmem_tensor(_layout, Uint32)
+
+        if in_bounds:
+            # we can't do cute.copy() on the whole 2D tile directly because
+            # cute.copy() wants the 1st mode to be covered by the copy atom,
+            # and other modes as for loop. there is no fast way to
+            # "transpose" the tensor view.
+            q_tile = cute.local_tile(
+                q[token_id, None, None],
+                tiler=(self.coarsen, 16),
+                coord=(head_tile_id, sublane),
+            )
+            cp_u32x8 = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=256)
+            for i in cutlass.range_constexpr(self.coarsen):
+                src = cute.recast_tensor(q_tile[i, None], Uint32)
+                cute.copy(cp_u32x8, src, q_bf16x2[i, None])
+
+        # RoPE applies only to the trailing rope_dim values. We keep the rounded
+        # BF16 result in q_bits so the later amax and quantization see BF16.
+        # cos_sin_cache layout: [max_pos, rope_dim]
+        if in_bounds and sublane * 16 >= self.nope_dim:
+            cos_vals = cute.make_rmem_tensor((8,), Float32)
+            sin_vals = cute.make_rmem_tensor((8,), Float32)
+
+            pos = positions[token_id]
+
+            # select 8 elems from cos and sin
+            cos_id = sublane - self.nope_dim // 16
+            sin_id = cos_id + self.rope_dim // 16
+            cos_src = cute.local_tile(
+                cos_sin_cache[pos, None], tiler=(8,), coord=(cos_id,)
+            )
+            sin_src = cute.local_tile(
+                cos_sin_cache[pos, None], tiler=(8,), coord=(sin_id,)
+            )
+
+            cp_f32x8 = cute.make_copy_atom(cp_op, Float32, num_bits_per_copy=256)
+            cp_u32x4 = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=128)
+
+            if const_expr(self.cos_sin_dtype is Float32):
+                cute.copy(cp_f32x8, cos_src, cos_vals)
+                cute.copy(cp_f32x8, sin_src, sin_vals)
+            else:
+                cos_bf16x2 = cute.make_rmem_tensor((4,), Uint32)
+                sin_bf16x2 = cute.make_rmem_tensor((4,), Uint32)
+                cute.copy(cp_u32x4, cute.recast_tensor(cos_src, Uint32), cos_bf16x2)
+                cute.copy(cp_u32x4, cute.recast_tensor(sin_src, Uint32), sin_bf16x2)
+
+                for i in cutlass.range_constexpr(4):
+                    cos0, cos1 = _bf16x2_to_fp32(cos_bf16x2[i])
+                    sin0, sin1 = _bf16x2_to_fp32(sin_bf16x2[i])
+                    cos_vals[i * 2] = cos0
+                    cos_vals[i * 2 + 1] = cos1
+                    sin_vals[i * 2] = sin0
+                    sin_vals[i * 2 + 1] = sin1
+
+            for i in cutlass.range_constexpr(self.coarsen):
+                for j in cutlass.range_constexpr(8):
+                    q0, q1 = _bf16x2_to_fp32(q_bf16x2[i, j])
+                    rot0 = q0 * cos_vals[j] - q1 * sin_vals[j]
+                    rot1 = q0 * sin_vals[j] + q1 * cos_vals[j]
+                    # convert back to BF16 to match numerics
+                    q_bf16x2[i, j] = _fp32x2_to_bf16x2(rot0, rot1)
+
+        return (
+            q_bf16x2,
+            tid,
+            global_tid,
+            sublane,
+            token_id,
+            head_tile_id,
+            head_start,
+            in_bounds,
+            num_token_heads,
+        )
+
+
+class IndexerQMxFp4Kernel(IndexerQRopeQuantKernel):
+    """Eight-thread subwarps process one ``(token, head)`` row."""
+
+    @cute.jit
+    def __call__(
+        self,
+        positions: cute.Tensor,
+        q: cute.Tensor,
+        cos_sin_cache: cute.Tensor,
+        weights: cute.Tensor,
+        q_quant: cute.Tensor,
+        q_scale: cute.Tensor,
+        weights_out: cute.Tensor,
+        scale: Float32,
+        stream: CUstream,
+    ):
+        total_threads = q.shape[0] * self.threads_per_token
+        grid = (cute.ceil_div(total_threads, self.tb_size), 1, 1)
+        self.kernel(
+            positions,
+            q,
+            cos_sin_cache,
+            weights,
+            q_quant,
+            q_scale,
+            weights_out,
+            scale,
+        ).launch(grid=grid, block=(self.tb_size, 1, 1), stream=stream)
+
+    @cute.kernel
+    def kernel(
+        self,
+        positions: cute.Tensor,
+        q: cute.Tensor,
+        cos_sin_cache: cute.Tensor,
+        weights: cute.Tensor,
+        q_quant: cute.Tensor,
+        q_scale: cute.Tensor,
+        weights_out: cute.Tensor,
+        scale: Float32,
+    ):
+        (
+            q_bf16x2,
+            tid,
+            global_tid,
+            sublane,
+            token_id,
+            head_tile_id,
+            head_start,
+            in_bounds,
+            num_token_heads,
+        ) = self._load_q_and_rope(positions, q, cos_sin_cache)
+
+        cp_op = cute.nvgpu.CopyUniversalOp()
+
+        # layout: [coarsen, 8]
+        q_fp4_tile = cute.local_tile(
+            q_quant[token_id, None, None],
+            tiler=(self.coarsen, 8),
+            coord=(head_tile_id, sublane),
+        )
+
+        for i in cutlass.range_constexpr(self.coarsen):
+            # compute amax in packed bf16x2 to save instructions
+            # Each thread holds 16 elems. Two adjacent threads form one 32-elem
+            # MXFP4 block, so a width-2 shuffle gives the block amax.
+            amax_bf16x2 = _bf16x2_abs(q_bf16x2[i, 0])
+            for j in cutlass.range_constexpr(1, 8):
+                amax_bf16x2 = _bf16x2_max(amax_bf16x2, _bf16x2_abs(q_bf16x2[i, j]))
+            amax_bf16x2 = cute_utils.warp_reduce(
+                amax_bf16x2,
+                _bf16x2_max,
+                width=MXFP4_BLOCK_SIZE // 16,
+            )
+            amax_pair = _bf16x2_to_fp32(amax_bf16x2)
+            amax = cute_utils.fmax(amax_pair[0], amax_pair[1])
+
+            if in_bounds:
+                # compute block scale with bit manipulation
+                # UE8M0 stores ceil(log2(fp4_scale)) + 127. Adding the mantissa mask
+                # increments the exponent whenever fp4_scale is not exactly a power of 2
+                eps = cutlass.const_expr(float.fromhex("0x6p-126"))
+                fp4_scale = cute_utils.fmax(amax, eps) * Float32(1.0 / 6.0)
+                bits = _recast_val(fp4_scale, Uint32)
+                ue8m0 = cute_utils.shr_u32(
+                    bits + Uint32(0x7FFFFF), Uint32(23)
+                ) & Uint32(0xFF)
+
+                # Only one of the two threads in an MXFP4 block writes the shared scale.
+                if tid % 2 == 0:
+                    mx_block = sublane // 2
+                    q_scale[token_id, head_start + i, mx_block] = Uint8(ue8m0)
+
+                # If scale = 2^A and ue8m0 = A + 127, then inverse scale has exponent
+                # -A + 127 = 254 - ue8m0.
+                inv_scale_bits = (Uint32(254) - ue8m0) << Uint32(23)
+                inv_fp4_scale = _recast_val(inv_scale_bits, Float32)
+
+                vals = cute.make_rmem_tensor(16, Float32)
+                for j in cutlass.range_constexpr(8):
+                    q0, q1 = _bf16x2_to_fp32(q_bf16x2[i, j])
+                    vals[j * 2] = q0 * inv_fp4_scale
+                    vals[j * 2 + 1] = q1 * inv_fp4_scale
+
+                # pack to FP4
+                packed = cute.make_rmem_tensor((2,), Uint32)
+                packed[0] = _fp32x8_to_fp4x8(vals, 0)
+                packed[1] = _fp32x8_to_fp4x8(vals, 8)
+
+                dst = q_fp4_tile[i, None]
+                cp_u32x2 = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=64)
+                cute.copy(cp_u32x2, packed, cute.recast_tensor(dst, Uint32))
+
+        # Weight scaling is independent of the Q subwarp work. The first
+        # num_tokens * num_heads logical threads cover one weight each.
+        if global_tid < num_token_heads:
+            weight_token_id = global_tid // self.num_heads
+            weight_head_id = global_tid % self.num_heads
+            weights_out[weight_token_id, weight_head_id] = (
+                weights[weight_token_id, weight_head_id].to(Float32) * scale
+            )
+
+    @cache
+    @staticmethod
+    def compile(
+        head_dim: int = 128,
+        rope_dim: int = 64,
+        num_heads: int = 64,
+        cos_sin_dtype: type[cutlass.Numeric] = Float32,
+        coarsen: int = 4,
+    ):
+        num_tokens = cute.sym_int()
+        max_pos = cute.sym_int()
+
+        q = make_fake_tensor(
+            BFloat16, (num_tokens, num_heads, head_dim), divisibility=16
+        )
+        positions = make_fake_tensor(Int64, (num_tokens,), divisibility=1)
+        cos_sin_cache = make_fake_tensor(
+            cos_sin_dtype,
+            (max_pos, rope_dim),
+            divisibility=8,
+        )
+        weights = make_fake_tensor(BFloat16, (num_tokens, num_heads), divisibility=8)
+        q_fp4 = make_fake_tensor(
+            Uint8,
+            (num_tokens, num_heads, head_dim // 2),
+            divisibility=16,
+        )
+        q_scale = make_fake_tensor(
+            Uint8,
+            (num_tokens, num_heads, head_dim // MXFP4_BLOCK_SIZE),
+            divisibility=4,
+        )
+        weights_out = make_fake_tensor(Float32, (num_tokens, num_heads), divisibility=4)
+
+        kernel = IndexerQMxFp4Kernel(
+            head_dim, rope_dim, num_heads, cos_sin_dtype, coarsen
+        )
+        stream = cute.runtime.make_fake_stream(use_tvm_ffi_env_stream=True)
+        return cute.compile(
+            kernel,
+            positions,
+            q,
+            cos_sin_cache,
+            weights,
+            q_fp4,
+            q_scale,
+            weights_out,
+            Float32(0.0),
+            stream,
+            options="--enable-tvm-ffi",
+        )
+
+
+class IndexerQFp8Kernel(IndexerQRopeQuantKernel):
+    """Eight-thread subwarps process one ``(token, head)`` row and emit
+    float8 e4m3fn with a single per-(token, head) scalar scale folded
+    into the per-token weight (mirrors ``_fused_indexer_q_rope_quant_kernel``).
+    """
+
+    def __init__(
+        self,
+        head_dim: int = 128,
+        rope_dim: int = 64,
+        num_heads: int = 64,
+        cos_sin_dtype: type[cutlass.Numeric] = Float32,
+        coarsen: int = 4,
+    ):
+        super().__init__(head_dim, rope_dim, num_heads, cos_sin_dtype, coarsen)
+        # Each subwarp owns `coarsen` heads; we use the first `coarsen`
+        # threads of the subwarp to write the per-head weights using the
+        # fp8 scale computed in the matching loop iteration.
+        assert self.coarsen <= self.subwarp_size, (
+            f"FP8 kernel requires coarsen ({self.coarsen}) <= "
+            f"subwarp_size ({self.subwarp_size}) for the weight-fold step"
+        )
+
+    @cute.jit
+    def __call__(
+        self,
+        positions: cute.Tensor,
+        q: cute.Tensor,
+        cos_sin_cache: cute.Tensor,
+        weights: cute.Tensor,
+        q_fp8: cute.Tensor,
+        weights_out: cute.Tensor,
+        scale: Float32,
+        stream: CUstream,
+    ):
+        total_threads = q.shape[0] * self.threads_per_token
+        grid = (cute.ceil_div(total_threads, self.tb_size), 1, 1)
+        self.kernel(
+            positions,
+            q,
+            cos_sin_cache,
+            weights,
+            q_fp8,
+            weights_out,
+            scale,
+        ).launch(grid=grid, block=(self.tb_size, 1, 1), stream=stream)
+
+    @cute.kernel
+    def kernel(
+        self,
+        positions: cute.Tensor,
+        q: cute.Tensor,
+        cos_sin_cache: cute.Tensor,
+        weights: cute.Tensor,
+        q_fp8: cute.Tensor,
+        weights_out: cute.Tensor,
+        scale: Float32,
+    ):
+        (
+            q_bf16x2,
+            _tid,
+            _global_tid,
+            sublane,
+            token_id,
+            head_tile_id,
+            head_start,
+            in_bounds,
+            _num_token_heads,
+        ) = self._load_q_and_rope(positions, q, cos_sin_cache)
+
+        cp_op = cute.nvgpu.CopyUniversalOp()
+
+        # layout: [coarsen, 16] bytes (one e4m3fn per element).
+        q_fp8_tile = cute.local_tile(
+            q_fp8[token_id, None, None],
+            tiler=(self.coarsen, 16),
+            coord=(head_tile_id, sublane),
+        )
+
+        for i in cutlass.range_constexpr(self.coarsen):
+            # Reduce amax across the full head_dim: each thread already holds
+            # the max over its 16 lanes; a width=subwarp_size warp shuffle
+            # spreads the head-wide max to every lane in the subwarp.
+            amax_bf16x2 = _bf16x2_abs(q_bf16x2[i, 0])
+            for j in cutlass.range_constexpr(1, 8):
+                amax_bf16x2 = _bf16x2_max(amax_bf16x2, _bf16x2_abs(q_bf16x2[i, j]))
+            amax_bf16x2 = cute_utils.warp_reduce(
+                amax_bf16x2,
+                _bf16x2_max,
+                width=self.subwarp_size,
+            )
+            amax_pair = _bf16x2_to_fp32(amax_bf16x2)
+            amax = cute_utils.fmax(amax_pair[0], amax_pair[1])
+
+            # scale = max(amax, eps) / fp8_max, then rounded UP to the next
+            # power of two. Adding the mantissa mask before shifting out the
+            # mantissa bumps the exponent whenever s isn't a pure pow2.
+            fp32_scale = cute_utils.fmax(amax, Float32(1e-4)) * Float32(1.0 / 448.0)
+            bits = _recast_val(fp32_scale, Uint32)
+            scale_exp = cute_utils.shr_u32(
+                bits + Uint32(0x7FFFFF), Uint32(23)
+            ) & Uint32(0xFF)
+
+            # rounded scale = 2^(scale_exp - 127); bit pattern is scale_exp << 23
+            fp8_scale_bits = scale_exp << Uint32(23)
+            fp8_scale = _recast_val(fp8_scale_bits, Float32)
+            # inverse = 2^-(scale_exp - 127); bit pattern is (254 - scale_exp) << 23
+            inv_scale_bits = (Uint32(254) - scale_exp) << Uint32(23)
+            inv_fp8_scale = _recast_val(inv_scale_bits, Float32)
+
+            # Weight fold: weights_out = weights * q_scale * scale_combined.
+            # All threads in the subwarp share the same fp8_scale after the
+            # warp_reduce above, so we let thread `sublane == i` write the
+            # weight for head `head_start + i`.
+            if in_bounds and sublane == i:
+                head_id = head_start + i
+                weights_out[token_id, head_id] = (
+                    weights[token_id, head_id].to(Float32) * scale * fp8_scale
+                )
+
+            if in_bounds:
+                # 16 BF16 → 16 e4m3 bytes per thread, packed into 4 b32s
+                # (one cp.async-shaped 128-bit store per row).
+                packed = cute.make_rmem_tensor((4,), Uint32)
+                for j in cutlass.range_constexpr(4):
+                    q0, q1 = _bf16x2_to_fp32(q_bf16x2[i, j * 2])
+                    q2, q3 = _bf16x2_to_fp32(q_bf16x2[i, j * 2 + 1])
+                    packed[j] = _fp32x4_to_fp8x4(
+                        q0 * inv_fp8_scale,
+                        q1 * inv_fp8_scale,
+                        q2 * inv_fp8_scale,
+                        q3 * inv_fp8_scale,
+                    )
+
+                dst = q_fp8_tile[i, None]
+                cp_u32x4 = cute.make_copy_atom(cp_op, Uint32, num_bits_per_copy=128)
+                cute.copy(cp_u32x4, packed, cute.recast_tensor(dst, Uint32))
+
+    @cache
+    @staticmethod
+    def compile(
+        head_dim: int = 128,
+        rope_dim: int = 64,
+        num_heads: int = 64,
+        cos_sin_dtype: type[cutlass.Numeric] = Float32,
+        coarsen: int = 4,
+    ):
+        num_tokens = cute.sym_int()
+        max_pos = cute.sym_int()
+
+        q = make_fake_tensor(
+            BFloat16, (num_tokens, num_heads, head_dim), divisibility=16
+        )
+        positions = make_fake_tensor(Int64, (num_tokens,), divisibility=1)
+        cos_sin_cache = make_fake_tensor(
+            cos_sin_dtype,
+            (max_pos, rope_dim),
+            divisibility=8,
+        )
+        weights = make_fake_tensor(BFloat16, (num_tokens, num_heads), divisibility=8)
+        q_fp8 = make_fake_tensor(
+            Uint8,
+            (num_tokens, num_heads, head_dim),
+            divisibility=16,
+        )
+        weights_out = make_fake_tensor(Float32, (num_tokens, num_heads), divisibility=4)
+
+        kernel = IndexerQFp8Kernel(
+            head_dim, rope_dim, num_heads, cos_sin_dtype, coarsen
+        )
+        stream = cute.runtime.make_fake_stream(use_tvm_ffi_env_stream=True)
+        return cute.compile(
+            kernel,
+            positions,
+            q,
+            cos_sin_cache,
+            weights,
+            q_fp8,
+            weights_out,
+            Float32(0.0),
+            stream,
+            options="--enable-tvm-ffi",
+        )
diff --git a/vllm/models/deepseek_v4/quant_config.py b/vllm/models/deepseek_v4/quant_config.py
new file mode 100644
index 000000000000..85a78883fd32
--- /dev/null
+++ b/vllm/models/deepseek_v4/quant_config.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Quantization config for DeepSeek V4."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from vllm.config import get_current_vllm_config
+from vllm.model_executor.layers.fused_moe import FusedMoE
+from vllm.model_executor.layers.fused_moe.layer import UnquantizedFusedMoEMethod
+from vllm.model_executor.layers.quantization import QuantizationMethods
+from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+from vllm.model_executor.layers.quantization.mxfp4 import Mxfp4MoEMethod
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    is_layer_skipped,
+)
+
+_DEEPSEEK_V4_EXPERT_DTYPES = ("fp4", "fp8")
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.quantization.modelopt import (
+        ModelOptNvFp4Config,
+    )
+
+
+class DeepseekV4FP8Config(Fp8Config):
+    """FP8 config for DeepSeek V4 with expert-dtype-aware MoE dispatch.
+
+    DeepSeek V4 checkpoints always use FP8 block quantization for
+    linear/attention layers. The MoE expert weights vary by checkpoint:
+    - ``expert_dtype="fp4"`` (e.g. DeepSeek-V4-Flash): MXFP4 experts
+      with ue8m0 (e8m0fnu) FP8 linear scales.
+    - ``expert_dtype="fp8"`` (e.g. DeepSeek-V4-Flash-Base): FP8 block
+      experts with float32 FP8 linear scales.
+
+    The dispatch and the linear scale dtype are both keyed off
+    ``expert_dtype`` from the model's hf_config; missing values default
+    to ``"fp4"`` so existing FP4 checkpoints stay unchanged.
+
+    NOTE: ``expert_dtype`` is resolved lazily because this config is
+    constructed during VllmConfig setup, before ``set_current_vllm_config``
+    is active. Reading hf_config eagerly in ``__init__`` would always see
+    the default ``"fp4"`` and silently misroute Flash-Base checkpoints.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._resolved_expert_dtype: str | None = None
+        self._resolved_moe_quant_algo: str | None = None
+        self._nvfp4_config: ModelOptNvFp4Config | None = None
+        # ``is_scale_e8m0`` is a property that resolves on first read,
+        # by which time the current vllm_config has been set.
+
+    @property
+    def expert_dtype(self) -> str:
+        if self._resolved_expert_dtype is None:
+            try:
+                hf_config = get_current_vllm_config().model_config.hf_config
+            except Exception:
+                # vllm_config not yet set; defer the decision until a
+                # later call lands inside set_current_vllm_config.
+                return "fp4"
+            expert_dtype = getattr(hf_config, "expert_dtype", "fp4")
+            if expert_dtype not in _DEEPSEEK_V4_EXPERT_DTYPES:
+                raise ValueError(
+                    f"Unsupported DeepSeek V4 expert_dtype={expert_dtype!r}; "
+                    f"expected one of {_DEEPSEEK_V4_EXPERT_DTYPES}."
+                )
+            self._resolved_expert_dtype = expert_dtype
+            from vllm.logger import init_logger
+
+            init_logger(__name__).info_once(
+                "DeepSeek V4 expert_dtype resolved to %r", expert_dtype
+            )
+        return self._resolved_expert_dtype
+
+    @property
+    def is_scale_e8m0(self) -> bool:
+        # FP4 checkpoints store FP8 linear scales as e8m0fnu; FP8 expert
+        # checkpoints (Flash-Base) store them as float32.
+        return self.expert_dtype == "fp4"
+
+    def _resolve_moe_overrides(self) -> None:
+        if self._resolved_moe_quant_algo is not None:
+            return
+        try:
+            hf_config = get_current_vllm_config().model_config.hf_config
+        except Exception:
+            return
+        quant_cfg = getattr(hf_config, "quantization_config", None) or {}
+        algo = (quant_cfg.get("moe_quant_algo") or "").upper() or None
+        self._resolved_moe_quant_algo = algo or ""
+
+    @property
+    def moe_quant_algo(self) -> str:
+        self._resolve_moe_overrides()
+        return self._resolved_moe_quant_algo or ""
+
+    def _get_nvfp4_config(self) -> ModelOptNvFp4Config:
+        if self._nvfp4_config is None:
+            from vllm.model_executor.layers.quantization.modelopt import (
+                ModelOptNvFp4Config,
+            )
+
+            self._nvfp4_config = ModelOptNvFp4Config(
+                is_checkpoint_nvfp4_serialized=True,
+                kv_cache_quant_algo=None,
+                exclude_modules=[],
+                group_size=16,
+            )
+        return self._nvfp4_config
+
+    @classmethod
+    def get_name(cls) -> QuantizationMethods:
+        return "deepseek_v4_fp8"
+
+    @classmethod
+    def override_quantization_method(
+        cls, hf_quant_cfg, user_quant, hf_config=None
+    ) -> QuantizationMethods | None:
+        if not (
+            isinstance(hf_quant_cfg, dict)
+            and hf_quant_cfg.get("quant_method") in ("fp8", "deepseek_v4_fp8")
+        ):
+            return None
+        model_type = getattr(hf_config, "model_type", None)
+        if model_type == "deepseek_v4" or user_quant == "deepseek_v4_fp8":
+            return "deepseek_v4_fp8"
+        return None
+
+    def get_quant_method(self, layer, prefix):
+        if isinstance(layer, FusedMoE):
+            if is_layer_skipped(
+                prefix=prefix,
+                ignored_layers=self.ignored_layers,
+                fused_mapping=self.packed_modules_mapping,
+            ):
+                return UnquantizedFusedMoEMethod(layer.moe_config)
+            if self.expert_dtype == "fp4":
+                if self.moe_quant_algo == "NVFP4":
+                    from vllm.model_executor.layers.quantization.modelopt import (
+                        ModelOptNvFp4FusedMoE,
+                    )
+
+                    return ModelOptNvFp4FusedMoE(
+                        quant_config=self._get_nvfp4_config(),
+                        moe_config=layer.moe_config,
+                    )
+                return Mxfp4MoEMethod(layer.moe_config)
+            # expert_dtype == "fp8": fall through to Fp8Config which
+            # returns Fp8MoEMethod with block-wise float32 scales.
+        return super().get_quant_method(layer, prefix)
+
+    def is_mxfp4_quant(self, prefix, layer):
+        if not isinstance(layer, FusedMoE) or self.expert_dtype != "fp4":
+            return False
+        return self.moe_quant_algo != "NVFP4"
diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py
index 0a748a6d15c6..39af49cb1837 100644
--- a/vllm/multimodal/audio.py
+++ b/vllm/multimodal/audio.py
@@ -16,11 +16,6 @@
 except ImportError:
     av = PlaceholderModule("av")  # type: ignore[assignment]
 
-try:
-    import resampy
-except ImportError:
-    resampy = PlaceholderModule("resampy")  # type: ignore[assignment]
-
 try:
     import scipy.signal as scipy_signal
 except ImportError:
@@ -229,26 +224,25 @@ def resample_audio_pyav(
     return result[:expected_len]
 
 
-def resample_audio_resampy(
+def resample_audio_scipy(
     audio: npt.NDArray[np.floating],
     *,
     orig_sr: float,
     target_sr: float,
 ) -> npt.NDArray[np.floating]:
-    return resampy.resample(audio, sr_orig=orig_sr, sr_new=target_sr)
+    orig_sr_int = int(round(orig_sr))
+    target_sr_int = int(round(target_sr))
 
+    if orig_sr_int == target_sr_int:
+        return audio
 
-def resample_audio_scipy(
-    audio: npt.NDArray[np.floating],
-    *,
-    orig_sr: float,
-    target_sr: float,
-) -> npt.NDArray[np.floating]:
-    if orig_sr > target_sr:
-        return scipy_signal.resample_poly(audio, 1, orig_sr // target_sr)
-    elif orig_sr < target_sr:
-        return scipy_signal.resample_poly(audio, target_sr // orig_sr, 1)
-    return audio
+    gcd = math.gcd(orig_sr_int, target_sr_int)
+    return scipy_signal.resample_poly(
+        audio,
+        target_sr_int // gcd,
+        orig_sr_int // gcd,
+        axis=-1,
+    )
 
 
 class AudioResampler:
@@ -257,7 +251,7 @@ class AudioResampler:
     def __init__(
         self,
         target_sr: float | None = None,
-        method: Literal["pyav", "resampy", "scipy"] = "resampy",
+        method: Literal["pyav", "scipy"] = "pyav",
     ):
         self.target_sr = target_sr
         self.method = method
@@ -281,10 +275,6 @@ def resample(
             return audio
         if self.method == "pyav":
             return resample_audio_pyav(audio, orig_sr=orig_sr, target_sr=self.target_sr)
-        if self.method == "resampy":
-            return resample_audio_resampy(
-                audio, orig_sr=orig_sr, target_sr=self.target_sr
-            )
         elif self.method == "scipy":
             return resample_audio_scipy(
                 audio, orig_sr=orig_sr, target_sr=self.target_sr
diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py
index c0df19d4f483..833b405ff651 100644
--- a/vllm/multimodal/cache.py
+++ b/vllm/multimodal/cache.py
@@ -511,11 +511,27 @@ def get_and_update_item(
 
             self._p0_cache[mm_hash] = prompt_updates
             return self.address_as_item(address, monotonic_id), prompt_updates
-        except (ValueError, MemoryError) as e:
-            # put may fail if the object is too large or
-            # the cache is full.
-            # In this case we log the error and keep the original mm_input.
-            logger.debug("Failed to cache mm_input with hash %s: %s", mm_hash, e)
+        except ValueError as e:
+            # `put` raises ValueError either for an oversize item or for a
+            # duplicate key (concurrent insert); the latter is benign so we
+            # only warn on the oversize case. Subsequent UUID-only requests
+            # for an oversize item will fail with a cache miss.
+            if "already exists" not in str(e):
+                logger.warning_once(
+                    "mm_input %s too large to cache; "
+                    "raise --mm-shm-cache-max-object-size-mb. (%s)",
+                    mm_hash,
+                    str(e),
+                )
+            return mm_item
+        except MemoryError as e:
+            # Cache full and protected items prevent eviction.
+            logger.debug(
+                "mm_input %s not cached; shm cache full, "
+                "consider raising --mm-processor-cache-gb. (%s)",
+                mm_hash,
+                str(e),
+            )
             return mm_item
 
     @override
diff --git a/vllm/multimodal/evs.py b/vllm/multimodal/evs.py
index 62611c89719a..8584c08299fc 100644
--- a/vllm/multimodal/evs.py
+++ b/vllm/multimodal/evs.py
@@ -85,7 +85,7 @@ def compute_retention_mask(
     topk_indices = order[:retain_num_tokens]
 
     retention_mask = torch.zeros_like(dissimilarity_flat, dtype=torch.bool)
-    retention_mask[topk_indices] = True
+    retention_mask.index_fill_(0, topk_indices, True)
     retention_mask = retention_mask.reshape(dissimilarity.size())
 
     mask = retention_mask.view(-1)  # "T H W -> (T H W)"
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index 750893272340..d98a1624ac3b 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -145,14 +145,15 @@ class PlaceholderRange:
     """
 
     @cached_property
-    def embeds_cumsum(self) -> torch.Tensor | None:
-        return None if self.is_embed is None else self.is_embed.cumsum(dim=0)
+    def embeds_cumsum(self) -> list[int] | None:
+        # python list so python indexing avoids torch C++ overhead/conversions/deallocs
+        return None if self.is_embed is None else self.is_embed.cumsum(dim=0).tolist()
 
     def get_num_embeds(self) -> int:
         if self.embeds_cumsum is None:
             return self.length
 
-        return int(self.embeds_cumsum[-1])
+        return self.embeds_cumsum[-1] if self.embeds_cumsum else 0
 
     def get_embeds_indices_in_range(
         self, start_idx: int, end_idx: int
@@ -170,10 +171,8 @@ def get_embeds_indices_in_range(
         if self.embeds_cumsum is None:
             return start_idx, end_idx
 
-        embeds_start_idx = (
-            int(self.embeds_cumsum[start_idx - 1]) if start_idx > 0 else 0
-        )
-        embeds_end_idx = int(self.embeds_cumsum[end_idx - 1])
+        embeds_start_idx = self.embeds_cumsum[start_idx - 1] if start_idx > 0 else 0
+        embeds_end_idx = self.embeds_cumsum[end_idx - 1] if end_idx > 0 else 0
 
         return embeds_start_idx, embeds_end_idx
 
@@ -238,12 +237,29 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
         return isinstance(a, torch.Tensor) and torch.equal(b, a)
 
     if isinstance(a, list):
-        return isinstance(b, list) and all(
-            nested_tensors_equal(a_, b_) for a_, b_ in zip(a, b)
+        return (
+            isinstance(b, list)
+            and len(a) == len(b)
+            and all(nested_tensors_equal(a_, b_) for a_, b_ in zip(a, b))
         )
     if isinstance(b, list):
-        return isinstance(a, list) and all(
-            nested_tensors_equal(b_, a_) for b_, a_ in zip(b, a)
+        return (
+            isinstance(a, list)
+            and len(b) == len(a)
+            and all(nested_tensors_equal(b_, a_) for b_, a_ in zip(b, a))
+        )
+
+    if isinstance(a, tuple):
+        return (
+            isinstance(b, tuple)
+            and len(a) == len(b)
+            and all(nested_tensors_equal(a_, b_) for a_, b_ in zip(a, b))
+        )
+    if isinstance(b, tuple):
+        return (
+            isinstance(a, tuple)
+            and len(b) == len(a)
+            and all(nested_tensors_equal(b_, a_) for b_, a_ in zip(b, a))
         )
 
     # Both a and b are scalars
diff --git a/vllm/multimodal/media/audio.py b/vllm/multimodal/media/audio.py
index 47c2743bb99a..37b8662a76b6 100644
--- a/vllm/multimodal/media/audio.py
+++ b/vllm/multimodal/media/audio.py
@@ -9,11 +9,15 @@
 import pybase64
 import torch
 
+from vllm.logger import init_logger
+from vllm.multimodal.audio import resample_audio_pyav
 from vllm.utils.import_utils import PlaceholderModule
 from vllm.utils.serial_utils import tensor2base64
 
 from .base import MediaIO
 
+logger = init_logger(__name__)
+
 try:
     import av
 except ImportError:
@@ -25,15 +29,9 @@
     soundfile = PlaceholderModule("soundfile")  # type: ignore[assignment]
 
 
-try:
-    import resampy
-except ImportError:
-    resampy = PlaceholderModule("resampy")  # type: ignore[assignment]
-
-
-# Public libsndfile error codes exposed via `soundfile.LibsndfileError.code`, soundfile
-# being librosa's main backend. Used to validate if an audio loading error is due to a
-# server error vs a client error (invalid audio file).
+# Public libsndfile error codes exposed via `soundfile.LibsndfileError.code`,
+# soundfile being the main audio loading backend. Used to validate if an audio
+# loading error is due to a server error vs a client error (invalid audio file).
 # 0 = sf_error(NULL) race condition: when multiple threads fail sf_open_virtual
 #     concurrently, one thread may clear the global error before another reads it,
 #     producing code=0 ("Garbled error message from libsndfile" in soundfile).
@@ -126,7 +124,7 @@ def load_audio_soundfile(
         y = np.mean(y, axis=tuple(range(y.ndim - 1)))
 
     if sr is not None and sr != native_sr:
-        y = resampy.resample(y, sr_orig=native_sr, sr_new=sr)
+        y = resample_audio_pyav(y, orig_sr=native_sr, target_sr=sr)
         return y, int(sr)
     return y, native_sr
 
@@ -139,19 +137,27 @@ def load_audio(
 ):
     try:
         return load_audio_soundfile(path, sr=sr, mono=mono)
+    except ImportError as exc:
+        # soundfile (or resampy) is not installed — fall through to pyav.
+        # NOTE: this clause must stay BEFORE ``soundfile.LibsndfileError``
+        # because when soundfile is a PlaceholderModule, evaluating
+        # ``soundfile.LibsndfileError`` itself raises ImportError.
+        logger.error("Failed to load audio via soundfile: %r", exc)
     except soundfile.LibsndfileError as exc:
         # Only fall back for known format-detection failures.
         # Re-raise anything else (e.g. corrupt but recognised format).
         if exc.code not in _BAD_SF_CODES:
             raise
-        # soundfile may have advanced the BytesIO seek position before failing;
-        # reset it so PyAV can read from the beginning.
-        if isinstance(path, BytesIO):
-            path.seek(0)
-        try:
-            return load_audio_pyav(path, sr=sr, mono=mono)
-        except Exception as pyav_exc:
-            raise ValueError("Invalid or unsupported audio file.") from pyav_exc
+    # soundfile may have advanced the BytesIO seek position before failing;
+    # reset it so PyAV can read from the beginning.
+    if isinstance(path, BytesIO):
+        path.seek(0)
+    try:
+        return load_audio_pyav(path, sr=sr, mono=mono)
+    except ImportError:
+        raise  # Let PlaceholderModule's message ("install vllm[audio]") propagate.
+    except Exception as pyav_exc:
+        raise ValueError("Invalid or unsupported audio file.") from pyav_exc
 
 
 class AudioMediaIO(MediaIO[tuple[npt.NDArray, float]]):
diff --git a/vllm/multimodal/media/connector.py b/vllm/multimodal/media/connector.py
index 80aaa2a8293e..babc4c742a34 100644
--- a/vllm/multimodal/media/connector.py
+++ b/vllm/multimodal/media/connector.py
@@ -3,6 +3,11 @@
 
 import asyncio
 import atexit
+import contextlib
+import hashlib
+import os
+import tempfile
+import time
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from typing import Any, TypeVar
@@ -16,6 +21,7 @@
 
 import vllm.envs as envs
 from vllm.connections import HTTPConnection, global_http_connection
+from vllm.logger import init_logger
 from vllm.utils.registry import ExtensionManager
 
 from .audio import AudioEmbeddingMediaIO, AudioMediaIO
@@ -23,6 +29,8 @@
 from .image import ImageEmbeddingMediaIO, ImageMediaIO
 from .video import VideoMediaIO
 
+logger = init_logger(__name__)
+
 _M = TypeVar("_M")
 
 global_thread_pool = ThreadPoolExecutor(
@@ -116,16 +124,124 @@ def __init__(
             allowed_media_domains = []
         self.allowed_media_domains = allowed_media_domains
 
+        # Media download cache (opt-in via VLLM_MEDIA_CACHE)
+        self._media_cache_dir: str | None = None
+        self._media_cache_max_bytes: int = 0
+        self._media_cache_ttl_secs: float = 0
+        media_cache = envs.VLLM_MEDIA_CACHE
+        if media_cache:
+            try:
+                os.makedirs(media_cache, exist_ok=True)
+                # Verify the directory is writable before enabling caching
+                with tempfile.NamedTemporaryFile(dir=media_cache, delete=True):
+                    pass
+                self._media_cache_dir = media_cache
+                self._media_cache_max_bytes = (
+                    envs.VLLM_MEDIA_CACHE_MAX_SIZE_MB * 1024 * 1024
+                )
+                self._media_cache_ttl_secs = envs.VLLM_MEDIA_CACHE_TTL_HOURS * 3600
+                logger.info(
+                    "Media cache enabled at %s (max %d MB, TTL %s hours)",
+                    media_cache,
+                    envs.VLLM_MEDIA_CACHE_MAX_SIZE_MB,
+                    envs.VLLM_MEDIA_CACHE_TTL_HOURS,
+                )
+            except OSError:
+                logger.warning(
+                    "VLLM_MEDIA_CACHE path %s is not writable, media caching disabled",
+                    media_cache,
+                )
+
+    def _get_cached_bytes(self, url: str) -> bytes | None:
+        """Return cached bytes for a URL, or None if not cached/expired."""
+        if not self._media_cache_dir:
+            return None
+        cache_path = self._media_cache_path(url)
+        # Check TTL
+        try:
+            age = time.time() - cache_path.stat().st_mtime
+        except OSError:
+            return None
+        if age > self._media_cache_ttl_secs:
+            cache_path.unlink(missing_ok=True)
+            return None
+        # Touch mtime for LRU ordering
+        try:
+            cache_path.touch()
+            return cache_path.read_bytes()
+        except OSError:
+            return None
+
+    def _put_cached_bytes(self, url: str, data: bytes) -> None:
+        """Store downloaded bytes and evict if over budget."""
+        if not self._media_cache_dir:
+            return
+        cache_path = self._media_cache_path(url)
+        # Atomic write via temp file + rename
+        tmp_path = None
+        try:
+            with tempfile.NamedTemporaryFile(
+                mode="wb", dir=self._media_cache_dir, delete=False
+            ) as tmp_file:
+                tmp_file.write(data)
+                tmp_path = tmp_file.name
+            os.rename(tmp_path, str(cache_path))
+        except OSError:
+            # Another process beat us or disk issue
+            if tmp_path is not None:
+                with contextlib.suppress(OSError):
+                    os.remove(tmp_path)
+            return
+        self._maybe_evict(exclude=cache_path)
+
+    def _maybe_evict(self, exclude: Path | None = None) -> None:
+        """Evict expired entries first, then LRU until under size limit."""
+        cache_dir = Path(self._media_cache_dir)  # type: ignore[arg-type]
+        entries = []
+        expired = []
+        total_size = 0
+        now = time.time()
+        for f in cache_dir.iterdir():
+            if f.name.startswith("."):
+                continue
+            try:
+                stat = f.stat()
+            except OSError:
+                continue
+            age = now - stat.st_mtime
+            if age > self._media_cache_ttl_secs:
+                expired.append(f)
+                continue
+            total_size += stat.st_size
+            # Never evict the file we just wrote
+            if exclude is not None and f.name == exclude.name:
+                continue
+            entries.append((stat.st_mtime, stat.st_size, f))
+
+        # Evict items according to LRU policy
+        entries.sort(key=lambda e: e[0], reverse=True)
+        while total_size > self._media_cache_max_bytes and entries:
+            mtime, size, f = entries.pop()
+            expired.append(f)
+            total_size -= size
+
+        for f in expired:
+            f.unlink(missing_ok=True)
+
+    def _media_cache_path(self, url: str) -> Path:
+        url_hash = hashlib.sha256(url.encode()).hexdigest()[:20]
+        ext = Path(url.split("?", 1)[0]).suffix or ""
+        return Path(self._media_cache_dir) / f"{url_hash}{ext}"  # type: ignore[arg-type]
+
     def _load_data_url(
         self,
-        url_spec: Url,
+        url: str,
         media_io: MediaIO[_M],
     ) -> _M:  # type: ignore[type-var]
-        url_spec_path = url_spec.path or ""
-        data_spec, data = url_spec_path.split(",", 1)
+        # Format per RFC 2397:
+        # data:[<mediatype>][;base64],<data>
+        data_spec, data = url[5:].split(",", 1)
         media_type, data_type = data_spec.split(";", 1)
-        # media_type starts with a leading "/" (e.g., "/video/jpeg")
-        media_type = media_type.lstrip("/")
 
         if data_type != "base64":
             msg = "Only base64 data URLs are supported for now."
@@ -173,11 +289,18 @@ def load_from_url(
         *,
         fetch_timeout: int | None = None,
     ) -> _M:  # type: ignore[type-var]
+        if url[:5].lower() == "data:":
+            return self._load_data_url(url, media_io)
+
         url_spec = parse_url(url)
 
         if url_spec.scheme and url_spec.scheme.startswith("http"):
             self._assert_url_in_allowed_media_domains(url_spec)
 
+            cached = self._get_cached_bytes(url)
+            if cached is not None:
+                return media_io.load_bytes(cached)
+
             connection = self.connection
             data = connection.get_bytes(
                 url_spec.url,
@@ -185,11 +308,9 @@ def load_from_url(
                 allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
             )
 
+            self._put_cached_bytes(url, data)
             return media_io.load_bytes(data)
 
-        if url_spec.scheme == "data":
-            return self._load_data_url(url_spec, media_io)
-
         if url_spec.scheme == "file":
             return self._load_file_url(url_spec, media_io)
 
@@ -203,25 +324,39 @@ async def load_from_url_async(
         *,
         fetch_timeout: int | None = None,
     ) -> _M:
-        url_spec = parse_url(url)
         loop = asyncio.get_running_loop()
 
+        if url[:5].lower() == "data:":
+            future = loop.run_in_executor(
+                global_thread_pool, self._load_data_url, url, media_io
+            )
+            return await future
+
+        url_spec = parse_url(url)
+
         if url_spec.scheme and url_spec.scheme.startswith("http"):
             self._assert_url_in_allowed_media_domains(url_spec)
 
+            cached = await loop.run_in_executor(
+                global_thread_pool, self._get_cached_bytes, url
+            )
+            if cached is not None:
+                future = loop.run_in_executor(
+                    global_thread_pool, media_io.load_bytes, cached
+                )
+                return await future
+
             connection = self.connection
             data = await connection.async_get_bytes(
                 url_spec.url,
                 timeout=fetch_timeout,
                 allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
             )
-            future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data)
-            return await future
 
-        if url_spec.scheme == "data":
-            future = loop.run_in_executor(
-                global_thread_pool, self._load_data_url, url_spec, media_io
+            await loop.run_in_executor(
+                global_thread_pool, self._put_cached_bytes, url, data
             )
+            future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data)
             return await future
 
         if url_spec.scheme == "file":
diff --git a/vllm/multimodal/media/image.py b/vllm/multimodal/media/image.py
index ea4bf7b01527..ea816b760fea 100644
--- a/vllm/multimodal/media/image.py
+++ b/vllm/multimodal/media/image.py
@@ -68,17 +68,19 @@ def _convert_image_mode(
             return convert_image_mode(image, self.image_mode)
 
     def load_bytes(self, data: bytes) -> MediaWithBytes[Image.Image]:
-        image = Image.open(BytesIO(data))
-        return MediaWithBytes(self._convert_image_mode(image), data)
+        try:
+            image = Image.open(BytesIO(data))
+            image.load()
+            image = self._convert_image_mode(image)
+        except (OSError, Image.UnidentifiedImageError) as e:
+            raise ValueError(f"Failed to load image: {e}") from e
+        return MediaWithBytes(image, data)
 
     def load_base64(self, media_type: str, data: str) -> MediaWithBytes[Image.Image]:
         return self.load_bytes(pybase64.b64decode(data, validate=True))
 
     def load_file(self, filepath: Path) -> MediaWithBytes[Image.Image]:
-        with open(filepath, "rb") as f:
-            data = f.read()
-        image = Image.open(BytesIO(data))
-        return MediaWithBytes(self._convert_image_mode(image), data)
+        return self.load_bytes(filepath.read_bytes())
 
     def encode_base64(
         self,
diff --git a/vllm/multimodal/media/video.py b/vllm/multimodal/media/video.py
index 2790d714d25c..404f5a0e7cfe 100644
--- a/vllm/multimodal/media/video.py
+++ b/vllm/multimodal/media/video.py
@@ -80,19 +80,60 @@ def load_base64(
                 "image/jpeg",
             )
 
+            if self.num_frames > 0:
+                frame_parts = data.split(",", self.num_frames)[: self.num_frames]
+            elif self.num_frames == 0:
+                raise ValueError("num_frames must be greater than 0 or -1")
+            else:
+                frame_parts = data.split(",")
+
             frames = np.stack(
-                [np.asarray(load_frame(frame_data)) for frame_data in data.split(",")]
+                [np.asarray(load_frame(frame_data)) for frame_data in frame_parts]
             )
             total = int(frames.shape[0])
             fps = float(self.kwargs.get("fps", 1))
-            duration = total / fps if fps > 0 else 0.0
+
+            # validate and extract frames_indices
+            frames_indices = self.kwargs.get("frames_indices")
+            if frames_indices is not None:
+                if not (
+                    isinstance(frames_indices, list)
+                    and all(isinstance(i, int) for i in frames_indices)
+                ):
+                    raise ValueError("frames_indices must be a list of integers")
+                if len(frames_indices) != total:
+                    raise ValueError(
+                        f"frames_indices length ({len(frames_indices)}) must "
+                        f"match number of frames sent ({total})"
+                    )
+            else:
+                frames_indices = list(range(total))
+
+            # validate and extract total_num_frames
+            total_num_frames = self.kwargs.get("total_num_frames", total)
+            if not isinstance(total_num_frames, int) or total_num_frames < 1:
+                raise ValueError("total_num_frames must be a positive integer")
+            if total_num_frames < total:
+                raise ValueError(
+                    f"total_num_frames ({total_num_frames}) must be >= "
+                    f"number of frames sent ({total})"
+                )
+
+            # validate and extract duration
+            duration = self.kwargs.get("duration")
+            if duration is not None:
+                if not isinstance(duration, (int, float)) or duration < 0:
+                    raise ValueError("duration must be a non-negative number")
+            else:
+                duration = total_num_frames / fps if fps > 0 else 0.0
+
             metadata = {
-                "total_num_frames": total,
+                "total_num_frames": total_num_frames,
                 "fps": fps,
                 "duration": duration,
                 "video_backend": "jpeg_sequence",
-                "frames_indices": list(range(total)),
-                "do_sample_frames": False,
+                "frames_indices": frames_indices,
+                "do_sample_frames": self.kwargs.get("do_sample_frames", False),
             }
             return frames, metadata
 
diff --git a/vllm/multimodal/processing/context.py b/vllm/multimodal/processing/context.py
index ef9710374d81..bed66d0a4e9d 100644
--- a/vllm/multimodal/processing/context.py
+++ b/vllm/multimodal/processing/context.py
@@ -268,28 +268,6 @@ def call_hf_processor(
         try:
             output = hf_processor(**data, **allowed_kwargs)
         except Exception as exc:
-            # See https://github.com/huggingface/tokenizers/issues/537
-            if (
-                isinstance(exc, RuntimeError)
-                and exc
-                and exc.args[0] == "Already borrowed"
-                and num_tries < max_tries
-            ):
-                logger.warning(
-                    "Failed to acquire tokenizer in current thread. "
-                    "Retrying (%d/%d)...",
-                    num_tries,
-                    max_tries,
-                )
-                time.sleep(0.5)
-                return self.call_hf_processor(
-                    hf_processor,
-                    data,
-                    kwargs,
-                    num_tries=num_tries + 1,
-                    max_tries=max_tries,
-                )
-
             msg = (
                 f"Failed to apply {type(hf_processor).__name__} "
                 f"on data={data} with kwargs={allowed_kwargs}"
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index fa414a5928d6..6fdae470839f 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -111,7 +111,15 @@ def supports_multimodal_inputs(self, model_config: "ModelConfig") -> bool:
             return False
 
         mm_config = model_config.get_multimodal_config()
-        info = self._create_processing_info(model_config, tokenizer=None)
+        try:
+            info = self._create_processing_info(model_config, tokenizer=None)
+        except ValueError:
+            logger.warning_once(
+                "Model %s is treated as multimodal but has no registered "
+                "multimodal processor; running in text-only mode.",
+                model_config.model,
+            )
+            return False
 
         # Check if all supported modalities have limit == 0
         if all(
@@ -170,7 +178,11 @@ def _get_model_cls(self, model_config: "ModelConfig") -> "SupportsMultiModal":
         from vllm.model_executor.model_loader import get_model_architecture
 
         model_cls, _ = get_model_architecture(model_config)
-        assert hasattr(model_cls, "_processor_factory")
+        if not hasattr(model_cls, "_processor_factory"):
+            raise ValueError(
+                f"Model class {model_cls.__name__} has no registered "
+                "multimodal processor"
+            )
         return cast("SupportsMultiModal", model_cls)
 
     def _create_processing_ctx(
@@ -193,6 +205,9 @@ def _create_processing_info(
         ctx = self._create_processing_ctx(model_config, tokenizer)
         return factories.info(ctx)
 
+    def get_processing_info(self, model_config: "ModelConfig") -> BaseProcessingInfo:
+        return self._create_processing_info(model_config, tokenizer=None)
+
     def create_processor(
         self,
         model_config: "ModelConfig",
@@ -204,7 +219,8 @@ def create_processor(
         Create a multi-modal processor for a specific model and tokenizer.
         """
         if not model_config.is_multimodal_model:
-            raise ValueError(f"{model_config.model} is not a multimodal model")
+            model_name = model_config.served_model_name or model_config.model
+            raise ValueError(f"{model_name} is not a multimodal model")
 
         model_cls = self._get_model_cls(model_config)
         factories = model_cls._processor_factory
diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py
index 90102151423f..697156a5b4dc 100644
--- a/vllm/multimodal/video.py
+++ b/vllm/multimodal/video.py
@@ -3,7 +3,7 @@
 import math
 from abc import abstractmethod
 from io import BytesIO
-from typing import Any, NamedTuple, cast
+from typing import Any, ClassVar, Literal, NamedTuple, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -19,6 +19,11 @@
     cv2 = PlaceholderModule("cv2")
     vr = PlaceholderModule("cv2").placeholder_attr("videoio_registry")
 
+try:
+    import av
+except ImportError:
+    av = PlaceholderModule("av")  # type: ignore[assignment]
+
 
 logger = init_logger(__name__)
 
@@ -355,8 +360,89 @@ def read_frames(
         return frames, valid_frame_indices
 
 
+class PyAVVideoBackendMixin:
+    """PyAV (in-process FFmpeg bindings) codec utilities.
+
+    Reads stream metadata and decodes target frames via per-frame
+    ``container.seek()``. The seek releases the GIL between frames and
+    scales with the number of sampled frames rather than the video
+    length, enabling concurrent decoding under serving load.
+    """
+
+    @staticmethod
+    def get_metadata(
+        container: "av.container.InputContainer",
+    ) -> VideoSourceMetadata:
+        if not container.streams.video:
+            raise ValueError("No video streams found in container")
+        stream = container.streams.video[0]
+        total_frames = stream.frames or 0
+        fps = float(stream.average_rate) if stream.average_rate else 0.0
+        duration = float(stream.duration * stream.time_base) if stream.duration else 0.0
+        if total_frames == 0 and duration > 0 and fps > 0:
+            total_frames = int(duration * fps)
+        return VideoSourceMetadata(total_frames, fps, duration)
+
+    @staticmethod
+    def decode_frames(
+        container: "av.container.InputContainer",
+        frame_indices: list[int],
+        fps: float,
+        duration: float,
+    ) -> tuple[npt.NDArray, list[int]]:
+        """Decode target frames via per-frame seek + forward decode to PTS."""
+        stream = container.streams.video[0]
+        # SLICE parallelizes within a single frame without the
+        # one-frame-per-thread latency penalty of FRAME threading.
+        stream.thread_type = "SLICE"
+        time_base = stream.time_base
+
+        frames_list: list[npt.NDArray] = []
+        valid_indices: list[int] = []
+        frame_interval = 1.0 / fps if fps > 0 else 0.1
+        max_ts = max(0.0, duration - frame_interval) if duration > 0 else float("inf")
+
+        decoder = None
+        last_pts = None
+        for idx in frame_indices:
+            ts = min(idx / fps, max_ts) if fps > 0 else 0.0
+            pts = int(ts / time_base)
+            # seek() snaps backward to a keyframe; reuse the running decoder
+            # while targets advance monotonically to avoid re-decoding the
+            # GOP prefix once per requested frame.
+            if decoder is None or last_pts is None or pts <= last_pts:
+                container.seek(pts, stream=stream)
+                decoder = container.decode(video=0)
+            chosen = None
+            for frame in decoder:
+                if frame.pts is not None and frame.pts >= pts:
+                    chosen = frame
+                    last_pts = frame.pts
+                    break
+            if chosen is not None:
+                frames_list.append(chosen.to_ndarray(format="rgb24"))
+                valid_indices.append(idx)
+            else:
+                decoder = None
+
+        if not frames_list:
+            return np.empty((0,), dtype=np.uint8), valid_indices
+        return np.stack(frames_list), valid_indices
+
+
 @VIDEO_LOADER_REGISTRY.register("opencv")
-class OpenCVVideoBackend(VideoLoader, OpenCVVideoBackendMixin):
+class VideoBackend(VideoLoader, OpenCVVideoBackendMixin, PyAVVideoBackendMixin):
+    """Uniform-sampling video backend.
+
+    Samples ``num_frames`` uniformly across the video (or one frame every
+    ``1/fps`` seconds, whichever produces fewer frames). The decoding codec
+    is selected via the ``backend`` kwarg (``"opencv"`` or ``"pyav"``),
+    which can be passed through ``--media-io-kwargs``. Defaults to
+    ``"pyav"`` for concurrent decoding.
+    """
+
+    _sampling_suffix: ClassVar[str] = ""
+
     @classmethod
     def compute_frames_index_to_sample(
         cls,
@@ -366,7 +452,6 @@ def compute_frames_index_to_sample(
     ) -> list[int]:
         total_frames_num = source.total_frames_num
         duration = source.duration
-
         num_frames = target.num_frames
         fps = target.fps
         # resample video to target num_frames and fps
@@ -376,16 +461,18 @@ def compute_frames_index_to_sample(
             num_frames_to_sample = min(num_frames, total_frames_num)
         if fps > 0:
             num_frames_to_sample = min(num_frames_to_sample, math.floor(duration * fps))
-        num_frames_to_sample = max(1, num_frames_to_sample)  # at least one sample
+        num_frames_to_sample = max(1, num_frames_to_sample)
 
         if num_frames_to_sample == total_frames_num:
-            frame_idx = list(range(0, num_frames_to_sample))
-        else:
-            uniform_sampled_frames = np.linspace(
-                0, total_frames_num - 1, num_frames_to_sample, dtype=int
-            )
-            frame_idx = uniform_sampled_frames.tolist()
-        return frame_idx
+            return list(range(num_frames_to_sample))
+        return np.linspace(
+            0, total_frames_num - 1, num_frames_to_sample, dtype=int
+        ).tolist()
+
+    @classmethod
+    def _prepare_source(cls, source: VideoSourceMetadata) -> VideoSourceMetadata:
+        """Sampling-algorithm-specific metadata adjustment hook."""
+        return source
 
     @classmethod
     def load_bytes(
@@ -395,55 +482,101 @@ def load_bytes(
         fps: int = -1,
         max_duration: int = 300,
         frame_recovery: bool = False,
+        *,
+        backend: Literal["opencv", "pyav"] = "opencv",
         **kwargs,
     ) -> tuple[npt.NDArray, dict[str, Any]]:
-        """
-        Load video frames from bytes.
+        """Load sampled frames from raw video bytes.
 
         Args:
-            data: Raw video bytes
-            num_frames: Target number of frames to sample (-1 for all)
-            fps: Target FPS for sampling (-1 for original)
-            max_duration: Maximum duration (unused in base backend)
-            frame_recovery: Enable forward-scan recovery for failed frames
+            data: Raw video bytes.
+            num_frames: Target number of frames to sample (``-1`` for all).
+            fps: Target FPS for sampling (``-1`` for original).
+            max_duration: Maximum duration in seconds — only used by the
+                dynamic subclass; ignored here.
+            frame_recovery: Enable forward-scan recovery for failed frames.
+                Only honored by the OpenCV codec.
+            backend: Decoding codec — ``"opencv"`` or ``"pyav"`` .
 
         Returns:
-            Tuple of (frames_array, metadata_dict)
+            Tuple of ``(frames_array, metadata_dict)``.
         """
-        cap = cls.open_video_capture(data)
-
-        source = OpenCVVideoBackendMixin.get_video_metadata(cap)
         target = VideoTargetMetadata(
-            num_frames=num_frames,
-            fps=fps,
-            max_duration=max_duration,
+            num_frames=num_frames, fps=fps, max_duration=max_duration
         )
 
-        # resample video to target num_frames and fps
-        # - the minimum of the two will be used
-        frame_idx = cls.compute_frames_index_to_sample(
-            source=source,
-            target=target,
-        )
+        if backend == "opencv":
+            cap = cls.open_video_capture(data)
+            source = cls._prepare_source(cls.get_video_metadata(cap))
+            frame_idx = cls.compute_frames_index_to_sample(
+                source=source, target=target, **kwargs
+            )
+            frames, valid = cls.read_frames(
+                cap,
+                frame_idx,
+                total_frames_num=source.total_frames_num,
+                frame_recovery=frame_recovery,
+            )
+        elif backend == "pyav":
+            assert not frame_recovery, (
+                "frame_recovery is only available for `opencv` backend"
+            )
+            with av.open(BytesIO(data)) as container:
+                source = cls._prepare_source(cls.get_metadata(container))
+                frame_idx = cls.compute_frames_index_to_sample(
+                    source=source, target=target, **kwargs
+                )
+                frames, valid = cls.decode_frames(
+                    container, frame_idx, source.original_fps, source.duration
+                )
+        else:
+            raise ValueError(
+                f"Unknown video codec backend {backend!r}; "
+                "valid options: 'opencv', 'pyav'."
+            )
 
-        frames, valid_frame_indices = cls.read_frames(
-            cap,
-            frame_idx,
-            total_frames_num=source.total_frames_num,
-            frame_recovery=frame_recovery,
-        )
+        if len(valid) < len(frame_idx):
+            logger.warning(
+                "%s video loading: expected %d frames but got %d.",
+                backend,
+                len(frame_idx),
+                len(valid),
+            )
 
-        metadata = cls.create_hf_metadata(
+        return frames, cls.create_hf_metadata(
             source=source,
-            video_backend="opencv",
-            valid_frame_indices=valid_frame_indices,
+            video_backend=f"{backend}{cls._sampling_suffix}",
+            valid_frame_indices=valid,
         )
 
-        return frames, metadata
-
 
 @VIDEO_LOADER_REGISTRY.register("opencv_dynamic")
-class OpenCVDynamicVideoBackend(VideoLoader, OpenCVVideoBackendMixin):
+class DynamicVideoBackend(VideoBackend):
+    """Duration-aware dynamic-sampling video backend.
+
+    Samples at ``fps`` up to ``max_duration`` seconds, falling back to
+    uniform sampling across the full duration when the video is longer
+    than ``max_duration``. Codec is selectable the same way as
+    :class:`VideoBackend`.
+    """
+
+    _sampling_suffix: ClassVar[str] = "_dynamic"
+
+    @classmethod
+    def _prepare_source(cls, source: VideoSourceMetadata) -> VideoSourceMetadata:
+        # Estimate duration from frame count and fps when the container
+        # does not report it (common for WebM/streaming inputs).
+        if source.duration:
+            return source
+        if source.original_fps > 0:
+            max_frame_idx = source.total_frames_num - 1
+            duration = round(max_frame_idx / source.original_fps) + 1
+        else:
+            duration = 0
+        return VideoSourceMetadata(
+            source.total_frames_num, source.original_fps, duration
+        )
+
     @classmethod
     def compute_frames_index_to_sample(
         cls,
@@ -456,8 +589,8 @@ def compute_frames_index_to_sample(
         original_fps = source.original_fps
         max_duration = target.max_duration
         fps = target.fps
-
         max_frame_idx = source.total_frames_num - 1
+
         # Refer to:
         # https://github.com/huggingface/transformers/blob/v4.55.4/src/transformers/models/glm4v/video_processing_glm4v.py#L103-L140
         frame_indices_list: list[int]
@@ -491,62 +624,20 @@ def load_bytes(
         fps: int = 2,
         max_duration: int = 300,
         frame_recovery: bool = False,
+        *,
+        backend: Literal["opencv", "pyav"] = "opencv",
         **kwargs,
     ) -> tuple[npt.NDArray, dict[str, Any]]:
-        """
-        Load video frames with dynamic sampling based on duration.
-
-        Args:
-            data: Raw video bytes
-            num_frames: Not used in dynamic backend
-            fps: Target FPS for sampling (default: 2)
-            max_duration: Maximum video duration to process (default: 300s)
-            frame_recovery: Enable forward-scan recovery for failed frames
-
-        Returns:
-            Tuple of (frames_array, metadata_dict)
-        """
-        cap = cls.open_video_capture(data)
-
-        orig_source = OpenCVVideoBackendMixin.get_video_metadata(cap)
-        max_frame_idx = orig_source.total_frames_num - 1
-        duration = (
-            orig_source.duration or round(max_frame_idx / orig_source.original_fps) + 1
-        )
-
-        # recompute source metadata with adjusted duration to ensure correct
-        # sampling indices computation
-        source = VideoSourceMetadata(
-            total_frames_num=orig_source.total_frames_num,
-            original_fps=orig_source.original_fps,
-            duration=duration,
-        )
-        target = VideoTargetMetadata(
+        return super().load_bytes(
+            data,
             num_frames=num_frames,
             fps=fps,
             max_duration=max_duration,
-        )
-
-        frame_indices_list = cls.compute_frames_index_to_sample(
-            source=source,
-            target=target,
-        )
-
-        frames, valid_frame_indices = cls.read_frames(
-            cap,
-            frame_indices_list,
-            total_frames_num=source.total_frames_num,
             frame_recovery=frame_recovery,
+            backend=backend,
+            **kwargs,
         )
 
-        metadata = cls.create_hf_metadata(
-            source=source,
-            video_backend="opencv_dynamic",
-            valid_frame_indices=valid_frame_indices,
-        )
-
-        return frames, metadata
-
 
 @VIDEO_LOADER_REGISTRY.register("molmo2")
 class Molmo2VideoBackend(VideoLoader, OpenCVVideoBackendMixin):
@@ -835,7 +926,7 @@ def load_bytes(
 
 
 @VIDEO_LOADER_REGISTRY.register("nemotron_vl")
-class NemotronVLVideoBackend(OpenCVVideoBackend):
+class NemotronVLVideoBackend(VideoBackend):
     @classmethod
     def load_bytes(
         cls,
@@ -844,14 +935,17 @@ def load_bytes(
         fps: int = -1,
         max_duration: int = 300,
         frame_recovery: bool = False,
+        *,
+        backend: Literal["opencv", "pyav"] = "opencv",
         **kwargs,
     ) -> tuple[npt.NDArray, dict[str, Any]]:
-        frames, metadata = OpenCVVideoBackend.load_bytes(
+        frames, metadata = super().load_bytes(
             data,
             num_frames=num_frames,
             fps=fps,
             max_duration=max_duration,
             frame_recovery=frame_recovery,
+            backend=backend,
             **kwargs,
         )
 
diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py
index dd9dc94237dc..2a13f138607b 100644
--- a/vllm/parser/abstract_parser.py
+++ b/vllm/parser/abstract_parser.py
@@ -5,6 +5,7 @@
 import json
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import dataclass, field
 from functools import cached_property
 
 from openai.types.responses import (
@@ -32,18 +33,37 @@
     FunctionCall,
     FunctionDefinition,
 )
-from vllm.entrypoints.openai.responses.protocol import (
-    ResponsesRequest,
-)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.streaming import (
+    extract_named_tool_call_streaming,
+    extract_required_tool_call_streaming,
+)
+from vllm.tool_parsers.utils import Tool
 from vllm.utils import random_uuid
 
 logger = init_logger(__name__)
 
 
+@dataclass
+class StreamState:
+    """Mutable state for ``Parser.parse_delta()``. One per stream."""
+
+    reasoning_ended: bool = False
+    tool_call_text_started: bool = False
+    prompt_reasoning_checked: bool = False
+    previous_text: str = ""
+    previous_token_ids: list[int] = field(default_factory=list)
+    history_tool_call_cnt: int = 0
+    tool_call_id_type: str = "random"
+    # only used for "required" and "named tool" choices,
+    # tracks whether function name has been fully returned in the stream yet
+    function_name_returned: bool = False
+
+
 class Parser:
     """
     Abstract Parser class that unifies ReasoningParser and ToolParser into
@@ -81,6 +101,7 @@ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
         self.model_tokenizer = tokenizer
         self._reasoning_parser: ReasoningParser | None = None
         self._tool_parser: ToolParser | None = None
+        self._stream_state = StreamState()
 
     @cached_property
     def vocab(self) -> dict[str, int]:
@@ -229,7 +250,9 @@ def extract_reasoning_streaming(
 
     # ========== Tool Parser Methods ==========
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         """
         Adjust the request parameters for tool calling.
 
@@ -290,6 +313,18 @@ def extract_tool_calls_streaming(
             A DeltaMessage with tool_calls field, or None.
         """
 
+    @abstractmethod
+    def parse_delta(
+        self,
+        delta_text: str,
+        delta_token_ids: list[int],
+        request: ChatCompletionRequest | ResponsesRequest,
+        prompt_token_ids: list[int] | None = None,
+    ) -> DeltaMessage | None:
+        """Parse a single streaming delta, orchestrating reasoning then
+        tool call extraction via internal stream state.
+        """
+
 
 class DelegatingParser(Parser):
     """
@@ -391,6 +426,17 @@ def extract_response_outputs(
 
         return outputs
 
+    def _get_function_name(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> str:
+        if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):
+            return request.tool_choice.name
+        if request.tool_choice and isinstance(
+            request.tool_choice, ChatCompletionNamedToolChoiceParam
+        ):
+            return request.tool_choice.function.name
+        raise ValueError("Invalid tool_choice for function name extraction.")
+
     def _parse_tool_calls(
         self,
         request: ResponsesRequest,
@@ -408,21 +454,15 @@ def _parse_tool_calls(
         """
         function_calls: list[FunctionCall] = []
 
-        if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):
-            # Forced Function Call (Responses API style)
-            assert content is not None
-            function_calls.append(
-                FunctionCall(name=request.tool_choice.name, arguments=content)
-            )
-            return function_calls, None  # Clear content since tool is called.
-
         if request.tool_choice and isinstance(
-            request.tool_choice, ChatCompletionNamedToolChoiceParam
+            request.tool_choice,
+            (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
         ):
-            # Forced Function Call (Chat Completion API style)
-            assert content is not None
+            # Forced Function Call
+            if content is None:
+                return [], None
             function_calls.append(
-                FunctionCall(name=request.tool_choice.function.name, arguments=content)
+                FunctionCall(name=self._get_function_name(request), arguments=content)
             )
             return function_calls, None  # Clear content since tool is called.
 
@@ -470,6 +510,15 @@ def _parse_tool_calls(
         # No tool calls
         return [], content
 
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        if self._reasoning_parser is not None:
+            request = self._reasoning_parser.adjust_request(request)
+        if self._tool_parser is not None:
+            request = self._tool_parser.adjust_request(request)
+        return request
+
     def extract_reasoning_streaming(
         self,
         previous_text: str,
@@ -523,6 +572,181 @@ def extract_tool_calls_streaming(
             request,
         )
 
+    def _extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest | ResponsesRequest,
+        # The following parameters are used for "required" tool choice parsing and are
+        # tracked in StreamState for streaming parsing.
+        tool_call_idx: int | None = None,
+        tool_call_id_type: str = "random",
+        function_name_returned: bool = False,
+    ) -> tuple[DeltaMessage | None, bool]:
+        assert self._tool_parser is not None
+        supports_required_and_named = self._tool_parser.supports_required_and_named
+        if (
+            supports_required_and_named
+            and request.tool_choice
+            and isinstance(
+                request.tool_choice,
+                (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
+            )
+        ):
+            delta_message, function_name_returned = extract_named_tool_call_streaming(
+                delta_text=delta_text,
+                function_name=self._get_function_name(request),
+                function_name_returned=function_name_returned,
+                tool_call_idx=tool_call_idx,
+                tool_call_id_type=tool_call_id_type,
+                tokenizer=self.model_tokenizer,
+            )
+            return delta_message, function_name_returned
+
+        if supports_required_and_named and request.tool_choice == "required":
+            delta_message, function_name_returned = (
+                extract_required_tool_call_streaming(
+                    previous_text=previous_text,
+                    current_text=current_text,
+                    delta_text=delta_text,
+                    function_name_returned=function_name_returned,
+                    tool_call_idx=tool_call_idx,
+                    tool_call_id_type=tool_call_id_type,
+                )
+            )
+            return delta_message, function_name_returned
+        return self.extract_tool_calls_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+            request,  # type: ignore[arg-type]
+        ), False
+
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        if self._reasoning_parser is None:
+            return False
+        return self._reasoning_parser.is_reasoning_end(input_ids)
+
+    def is_reasoning_end_streaming(
+        self, input_ids: list[int], delta_ids: list[int]
+    ) -> bool:
+        if self._reasoning_parser is None:
+            return False
+        return self._reasoning_parser.is_reasoning_end_streaming(input_ids, delta_ids)
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        if self._reasoning_parser is None:
+            return input_ids
+        return self._reasoning_parser.extract_content_ids(input_ids)
+
+    def _in_reasoning_phase(self, state: StreamState) -> bool:
+        if self._reasoning_parser is None:
+            return False
+        return not state.reasoning_ended
+
+    def _in_tool_call_phase(self, state: StreamState) -> bool:
+        if self._tool_parser is None:
+            return False
+        return state.reasoning_ended
+
+    def parse_delta(
+        self,
+        delta_text: str,
+        delta_token_ids: list[int],
+        request: ChatCompletionRequest | ResponsesRequest,
+        prompt_token_ids: list[int] | None = None,
+    ) -> DeltaMessage | None:
+        state = self._stream_state
+
+        if not state.prompt_reasoning_checked and prompt_token_ids is not None:
+            state.prompt_reasoning_checked = True
+            if self._reasoning_parser is None or self.is_reasoning_end(
+                prompt_token_ids
+            ):
+                state.reasoning_ended = True
+
+        current_text = state.previous_text + delta_text
+        current_token_ids = state.previous_token_ids + delta_token_ids
+        delta_message: DeltaMessage | None = None
+
+        # Reasoning extraction
+        if self._in_reasoning_phase(state):
+            delta_message = self.extract_reasoning_streaming(
+                previous_text=state.previous_text,
+                current_text=current_text,
+                delta_text=delta_text,
+                previous_token_ids=state.previous_token_ids,
+                current_token_ids=current_token_ids,
+                delta_token_ids=delta_token_ids,
+            )
+            if self.is_reasoning_end_streaming(current_token_ids, delta_token_ids):
+                state.reasoning_ended = True
+                current_token_ids = self.extract_content_ids(delta_token_ids)
+                current_text = (
+                    delta_message.content
+                    if delta_message and delta_message.content
+                    else ""
+                )
+                delta_text = current_text
+                delta_token_ids = current_token_ids
+
+        # Tool call extraction
+        if self._in_tool_call_phase(state):
+            if not state.tool_call_text_started:
+                state.tool_call_text_started = True
+                state.previous_text = ""
+                state.previous_token_ids = []
+                delta_text = current_text
+                delta_token_ids = current_token_ids
+
+            # A boundary delta may carry both reasoning and tool call,
+            # save it before the tool parser overwrites delta_message.
+            reasoning = delta_message.reasoning if delta_message else None
+            delta_message, state.function_name_returned = (
+                self._extract_tool_calls_streaming(
+                    previous_text=state.previous_text,
+                    current_text=current_text,
+                    delta_text=delta_text,
+                    previous_token_ids=state.previous_token_ids,
+                    current_token_ids=current_token_ids,
+                    delta_token_ids=delta_token_ids,
+                    request=request,  # type: ignore[arg-type]
+                    tool_call_idx=state.history_tool_call_cnt,
+                    tool_call_id_type=state.tool_call_id_type,
+                    function_name_returned=state.function_name_returned,
+                )
+            )
+            if reasoning:
+                if not delta_message:
+                    delta_message = DeltaMessage()
+                delta_message.reasoning = reasoning
+
+            if (
+                delta_message
+                and delta_message.tool_calls
+                and delta_message.tool_calls[0].id is not None
+            ):
+                state.history_tool_call_cnt += 1
+
+        # No phase active: pass through as content
+        if (
+            delta_message is None
+            and not self._in_reasoning_phase(state)
+            and not self._in_tool_call_phase(state)
+        ):
+            delta_message = DeltaMessage(content=delta_text)
+
+        state.previous_text = current_text
+        state.previous_token_ids = current_token_ids
+        return delta_message
+
 
 class _WrappedParser(DelegatingParser):
     """
@@ -542,10 +766,14 @@ class _WrappedParser(DelegatingParser):
     reasoning_parser_cls: type[ReasoningParser] | None = None
     tool_parser_cls: type[ToolParser] | None = None
 
-    def __init__(self, tokenizer: TokenizerLike):
+    def __init__(
+        self, tokenizer: TokenizerLike, tools: list[Tool] | None = None, **kwargs
+    ):
         super().__init__(tokenizer)
         # Instantiate the underlying parsers from class attributes
         if self.__class__.reasoning_parser_cls is not None:
-            self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
+            self._reasoning_parser = self.__class__.reasoning_parser_cls(
+                tokenizer, **kwargs
+            )
         if self.__class__.tool_parser_cls is not None:
-            self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
+            self._tool_parser = self.__class__.tool_parser_cls(tokenizer, tools)
diff --git a/vllm/parser/minimax_m2_parser.py b/vllm/parser/minimax_m2_parser.py
index ee092d4f542b..34aaa7268446 100644
--- a/vllm/parser/minimax_m2_parser.py
+++ b/vllm/parser/minimax_m2_parser.py
@@ -13,6 +13,9 @@
 from vllm.parser.abstract_parser import DelegatingParser
 from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
 from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import (
+    Tool,
+)
 from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser
 
 logger = init_logger(__name__)
@@ -40,12 +43,18 @@ class MiniMaxM2Parser(DelegatingParser):
     reasoning_parser_cls = MiniMaxM2ReasoningParser
     tool_parser_cls = MinimaxM2ToolParser
 
-    def __init__(self, tokenizer: TokenizerLike):
-        super().__init__(tokenizer)
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        tools: list[Tool] | None = None,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(tokenizer, *args, **kwargs)
 
         # Initialize the underlying parsers
-        self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer)
-        self._tool_parser = MinimaxM2ToolParser(tokenizer)
+        self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer, *args, **kwargs)
+        self._tool_parser = MinimaxM2ToolParser(tokenizer, tools)
 
         logger.debug(
             "vLLM Successfully initialized parser %s!", self.__class__.__name__
diff --git a/vllm/parser/parser_manager.py b/vllm/parser/parser_manager.py
index 5577dfb1d8bb..f8bded62d590 100644
--- a/vllm/parser/parser_manager.py
+++ b/vllm/parser/parser_manager.py
@@ -158,7 +158,7 @@ def _decorator(obj: type[Parser]) -> type[Parser]:
 
             if isinstance(name, str):
                 names = [name]
-            elif is_list_of(name, str):
+            elif name is not None and is_list_of(name, str):
                 names = name
             else:
                 names = [class_name]
diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
index af344acfcbc7..645da0a1fe97 100644
--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
@@ -177,7 +177,6 @@ def cpu_platform_plugin() -> str | None:
                 logger.debug(
                     "Confirmed CPU platform is available because the machine is MacOS."
                 )
-
     except Exception as e:
         logger.debug("CPU platform is not available because: %s", str(e))
 
diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py
index 7fbad3e4c76e..999bcfcc6dbc 100644
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -2,21 +2,21 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import glob
-import json
 import os
 import platform
 import subprocess
 import sys
-from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-import psutil
-import regex as re
 import torch
 
-from vllm import envs
 from vllm.logger import init_logger
-from vllm.v1.attention.backend import is_quantized_kv_cache
+from vllm.utils.cpu_resource_utils import (
+    DEVICE_CONTROL_ENV_VAR,
+    get_memory_node_info,
+    get_visible_memory_node,
+)
+from vllm.utils.mem_constants import GiB_bytes
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 from .interface import CpuArchEnum, Platform, PlatformEnum
@@ -39,43 +39,13 @@ def get_max_threads(pid=0):
         raise NotImplementedError("Unsupported OS")
 
 
-@dataclass
-class LogicalCPUInfo:
-    id: int = -1
-    physical_core: int = -1
-    numa_node: int = -1
-
-    @classmethod
-    def _int(cls, value: str) -> int:
-        try:
-            int_value = int(value)
-        except Exception:
-            int_value = -1
-        return int_value
-
-    @staticmethod
-    def json_decoder(obj_dict: dict):
-        id = obj_dict.get("cpu")
-        physical_core = obj_dict.get("core")
-        numa_node = obj_dict.get("node")
-
-        if not (id is None or physical_core is None or numa_node is None):
-            return LogicalCPUInfo(
-                id=LogicalCPUInfo._int(id),
-                physical_core=LogicalCPUInfo._int(physical_core),
-                numa_node=LogicalCPUInfo._int(numa_node),
-            )
-        else:
-            return obj_dict
-
-
 class CpuPlatform(Platform):
     _enum = PlatformEnum.CPU
     device_name: str = "cpu"
     device_type: str = "cpu"
     dispatch_key: str = "CPU"
     dist_backend: str = "gloo"
-    device_control_env_var = "CPU_VISIBLE_MEMORY_NODES"
+    device_control_env_var = DEVICE_CONTROL_ENV_VAR
 
     @property
     def supported_dtypes(self) -> list[torch.dtype]:
@@ -118,29 +88,9 @@ def get_attn_backend_cls(
 
     @classmethod
     def get_device_total_memory(cls, device_id: int = 0) -> int:
-        from vllm.utils.mem_constants import GiB_bytes
-        from vllm.utils.mem_utils import format_gib
-
-        kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE
-        node_dir = "/sys/devices/system/node"
-        if kv_cache_space is None:
-            nodes = (
-                [d for d in os.listdir(node_dir) if d.startswith("node")]
-                if os.path.exists(node_dir)
-                else []
-            )
-            num_numa_nodes = len(nodes) or 1
-            free_cpu_memory = psutil.virtual_memory().total // num_numa_nodes
-            DEFAULT_CPU_MEM_UTILIZATION = 0.5
-            kv_cache_space = int(free_cpu_memory * DEFAULT_CPU_MEM_UTILIZATION)
-            logger.warning_once(
-                "VLLM_CPU_KVCACHE_SPACE not set. Using %s GiB for KV cache.",
-                format_gib(kv_cache_space),
-            )
-        else:
-            kv_cache_space *= GiB_bytes
+        meminfo = get_memory_node_info(device_id)
 
-        return kv_cache_space
+        return meminfo.total_memory
 
     @classmethod
     def set_device(cls, device: torch.device) -> None:
@@ -149,6 +99,10 @@ def set_device(cls, device: torch.device) -> None:
         """
         torch.cpu.set_device(device)
 
+    @classmethod
+    def manual_seed_all(cls, seed: int) -> None:
+        pass
+
     @classmethod
     def inference_mode(cls):
         return torch.no_grad()
@@ -171,47 +125,26 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                 "otherwise the performance is not optimized."
             )
 
+        # Lagecy setting
+        env_key = "VLLM_CPU_KVCACHE_SPACE"
+        if env_key in os.environ and os.environ[env_key] != "":
+            kv_cache_space = int(os.environ[env_key])
+            cache_config.kv_cache_memory_bytes = kv_cache_space * GiB_bytes
+
         scheduler_config = vllm_config.scheduler_config
         # async scheduling is not required on CPU
         scheduler_config.async_scheduling = False
-        if (
-            scheduler_config.enable_chunked_prefill
-            or cache_config.enable_prefix_caching
-        ) and is_quantized_kv_cache(cache_config.cache_dtype):
-            raise RuntimeError(
-                "Chunked-prefill and prefix-cache on the CPU "
-                "backend is not compatible with FP8 KV cache."
-            )
-
-        if cache_config.cache_dtype.startswith("fp8"):
-            logger.warning(
-                "CPU backend doesn't support KV cache quantization fallback to auto."
-            )
-            cache_config.cache_dtype = "auto"
-
-        cache_config.cpu_kvcache_space_bytes = CpuPlatform.get_device_total_memory()
-
-        # reserve at least one core for nixl_connector under p/d case
-        if vllm_config.kv_transfer_config and (
-            envs.VLLM_CPU_NUM_OF_RESERVED_CPU == 0
-            or envs.VLLM_CPU_NUM_OF_RESERVED_CPU is None
-        ):
-            os.environ["VLLM_CPU_NUM_OF_RESERVED_CPU"] = "1"
 
         parallel_config = vllm_config.parallel_config
         if (
-            parallel_config.world_size > 1
-            and parallel_config.distributed_executor_backend is not None
-            and parallel_config.distributed_executor_backend != "mp"
+            os.environ.get("VLLM_ENABLE_V1_MULTIPROCESSING", "1") == "1"
+            and parallel_config.distributed_executor_backend == "uni"
         ):
-            logger.warning(
-                (
-                    "%s is not supported on CPU, fallback to mp "
-                    "distributed executor backend."
-                ),
-                parallel_config.distributed_executor_backend,
-            )
+            # OMP requires the MP executor to function correctly, UniProc
+            # is not supported as it is not possible to set the OMP
+            # environment correctly
             parallel_config.distributed_executor_backend = "mp"
+
         if parallel_config.worker_cls == "auto":
             parallel_config.worker_cls = "vllm.v1.worker.cpu_worker.CPUWorker"
         # Disable DBO
@@ -219,6 +152,20 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             logger.warning("Dual-Batch Overlap is not supported on CPU, disabled.")
             parallel_config.enable_dbo = False
 
+        if torch.cpu._is_amx_tile_supported() and (
+            model_config is not None
+            and model_config.get_num_layers_by_block_type(
+                parallel_config, "linear_attention"
+            )
+            > 0
+        ):
+            cache_config.enable_prefix_caching = False
+            scheduler_config.enable_chunked_prefill = False
+            logger.warning(
+                "Disabled unsupported prefix caching and chunked prefill "
+                "for linear attention on AMX CPU platforms."
+            )
+
         # Note: workaround for v1 gpu_model_runner
         from vllm.config import CompilationMode
 
@@ -249,10 +196,18 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                     "cpp.dynamic_threads": True,
                 }
             )
+            compilation_config.ir_enable_torch_wrap = False
 
         if vllm_config.lora_config is not None:
             compilation_config.mode = CompilationMode.NONE
 
+        if (
+            cls.get_cpu_architecture() == CpuArchEnum.ARM
+            and "+gelu" not in compilation_config.custom_ops
+            and "-gelu" not in compilation_config.custom_ops
+        ):
+            compilation_config.custom_ops.append("+gelu")
+
         vllm_config.profiler_config.torch_profiler_dump_cuda_time_total = False
 
         assert vllm_config.device_config.device_type == "cpu"
@@ -267,14 +222,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # variable "NUMEXPR_MAX_THREADS" (64)'.
         os.environ["NUMEXPR_MAX_THREADS"] = str(get_max_threads())
 
-        if envs.VLLM_CPU_OMP_THREADS_BIND != "nobind":
-            # Set default threads num for OpenMP parallel
-            os.environ["OMP_NUM_THREADS"] = str(torch.get_num_threads())
-        else:
-            # In this case, setting the OpenMP configuration via
-            # OMP_NUM_THREADS is up to the user.
-            logger.info("Disabling binding processes to CPU cores...")
-
         # Disable torch async compiling which won't work with daemonic processes
         os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
 
@@ -284,50 +231,22 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # Avoid inductor generates num_thread() and breaks the thread binding
         os.environ["TORCHINDUCTOR_CPP_DYNAMIC_THREADS"] = "1"
 
-        ld_preload_str = os.getenv("LD_PRELOAD", "")
-
-        # Intel OpenMP setting
-        if "libiomp5.so" in ld_preload_str:
-            # The time(milliseconds) that a thread should wait after
-            # completing the execution of a parallel region, before sleeping.
-            os.environ["KMP_BLOCKTIME"] = "1"
-            # Prevents the CPU to run into low performance state
-            os.environ["KMP_TPAUSE"] = "0"
-            # Provides fine granularity parallelism
-            os.environ["KMP_FORKJOIN_BARRIER_PATTERN"] = "dist,dist"
-            os.environ["KMP_PLAIN_BARRIER_PATTERN"] = "dist,dist"
-            os.environ["KMP_REDUCTION_BARRIER_PATTERN"] = "dist,dist"
+        # For efficient conv state memory access
+        if torch.cpu._is_amx_tile_supported():
+            os.environ["VLLM_SSM_CONV_STATE_LAYOUT"] = "SD"
 
+        ld_preload_str = os.getenv("LD_PRELOAD", "")
         cpu_architecture = Platform.get_cpu_architecture()
 
-        # LD_PRELOAD libtcmalloc, bundled under vllm/libs to reduce
-        # memory allocation overhead
-        if (
-            platform.system() == "Linux"
-            and cpu_architecture in (CpuArchEnum.ARM, CpuArchEnum.X86)
-            and "libtcmalloc" not in ld_preload_str
-        ):
-            vllm_pkg = os.path.dirname(os.path.dirname(__file__))
-            tcmalloc_so = None
-            for pattern in ("libtcmalloc_minimal*.so*", "libtcmalloc.so*"):
-                tcmalloc_so_candidates = glob.glob(
-                    os.path.join(vllm_pkg, "libs", pattern)
-                )
-                if tcmalloc_so_candidates:
-                    tcmalloc_so = tcmalloc_so_candidates[0]
-                    break
-
-            if tcmalloc_so is not None:
-                if ld_preload_str:
-                    ld_preload_str = f"{tcmalloc_so}:{ld_preload_str}"
-                else:
-                    ld_preload_str = tcmalloc_so
-                os.environ["LD_PRELOAD"] = ld_preload_str
-
         if (
             platform.system() == "Linux"
-            and cpu_architecture in (CpuArchEnum.ARM, CpuArchEnum.POWERPC)
-            and not ("libomp" in ld_preload_str or "libgomp" in ld_preload_str)
+            and cpu_architecture
+            in (CpuArchEnum.ARM, CpuArchEnum.POWERPC, CpuArchEnum.X86)
+            and not (
+                "libomp" in ld_preload_str
+                or "libgomp" in ld_preload_str
+                or "libiomp" in ld_preload_str
+            )
         ):
             # We need to LD_PRELOAD PyTorch's libgomp, otherwise only
             # one core will be properly utilized when we thread-bind
@@ -338,7 +257,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             # We need to find the location of PyTorch's libgomp
             torch_pkg = os.path.dirname(torch.__file__)
             site_root = os.path.dirname(torch_pkg)
-            # Search both torch.libs and torch/lib - See: https://github.com/vllm-project/vllm/issues/30470
+            # Search both torch.libs and torch/lib - See:
+            # https://github.com/vllm-project/vllm/issues/30470
             torch_libs_paths = [
                 os.path.join(site_root, "torch.libs"),
                 os.path.join(torch_pkg, "lib"),
@@ -355,6 +275,30 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                 ld_preload_str += pytorch_libgomp_so
                 os.environ["LD_PRELOAD"] = ld_preload_str
 
+        # LD_PRELOAD libtcmalloc, bundled under vllm/libs to reduce
+        # memory allocation overhead
+        if (
+            platform.system() == "Linux"
+            and cpu_architecture in (CpuArchEnum.ARM, CpuArchEnum.X86)
+            and "libtcmalloc" not in ld_preload_str
+        ):
+            vllm_pkg = os.path.dirname(os.path.dirname(__file__))
+            tcmalloc_so = None
+            for pattern in ("libtcmalloc_minimal*.so*", "libtcmalloc.so*"):
+                tcmalloc_so_candidates = glob.glob(
+                    os.path.join(vllm_pkg, "libs", pattern)
+                )
+                if tcmalloc_so_candidates:
+                    tcmalloc_so = tcmalloc_so_candidates[0]
+                    break
+
+            if tcmalloc_so is not None:
+                if ld_preload_str:
+                    ld_preload_str = f"{tcmalloc_so}:{ld_preload_str}"
+                else:
+                    ld_preload_str = tcmalloc_so
+                os.environ["LD_PRELOAD"] = ld_preload_str
+
         os.environ["LOCAL_WORLD_SIZE"] = str(
             vllm_config.parallel_config.tensor_parallel_size
         )
@@ -372,51 +316,16 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
 
     @classmethod
     def update_block_size_for_backend(cls, vllm_config: "VllmConfig") -> None:
-        # TODO: CPU still sets block_size in check_and_update_config.
-        # Move that logic here so block_size is chosen by the backend.
-        pass
-
-    @classmethod
-    def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]]:
-        assert platform.system() == "Linux"
-
-        # Init LogicalCPUInfo from lscpu
-        lscpu_output = subprocess.check_output(
-            "lscpu -J -e=CPU,CORE,NODE", shell=True, text=True
-        )
-        lscpu_output = re.sub(r'"node":\s*-\s*(,|\n)', r'"node": 0\1', lscpu_output)
-        logical_cpu_list: list[LogicalCPUInfo] = json.loads(
-            lscpu_output, object_hook=LogicalCPUInfo.json_decoder
-        )["cpus"]
-
-        # Filter CPUs with invalid attributes
-        logical_cpu_list = [
-            x
-            for x in logical_cpu_list
-            if -1 not in (x.id, x.physical_core, x.numa_node)
-        ]
-
-        # Filter allowed CPUs
-        if hasattr(os, "sched_getaffinity"):
-            allowed_cpu_id_list = os.sched_getaffinity(0)
-        else:
-            raise NotImplementedError("Unsupported OS")
-        logical_cpu_list = [x for x in logical_cpu_list if x.id in allowed_cpu_id_list]
-
-        # Get allowed NUMA nodes
-        allowed_numa_nodes = set()
-        for x in logical_cpu_list:
-            allowed_numa_nodes.add(x.numa_node)  # type: ignore
-        allowed_numa_nodes_list = sorted(allowed_numa_nodes)
+        model_config = vllm_config.model_config
+        if model_config is None or not model_config.is_hybrid:
+            return
 
-        env_key = CpuPlatform.device_control_env_var
-        if env_key in os.environ and os.environ[env_key] != "":
-            visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
-            allowed_numa_nodes_list = [
-                x for x in sorted(list(set(visible_nodes))) if x in allowed_numa_nodes
-            ]
+        # reconcile attention and mamba page sizes
+        backend_cls = cls._find_non_ssm_backend(vllm_config)
+        if backend_cls is None:
+            return
 
-        return allowed_numa_nodes_list, logical_cpu_list
+        cls._align_hybrid_block_size(vllm_config, backend_cls)
 
     @classmethod
     def discover_numa_topology(cls) -> list[list[int]]:
@@ -533,3 +442,59 @@ def import_kernels(cls) -> None:
                 import vllm._C  # noqa: F401
             except ImportError as e:
                 logger.warning("Failed to import from vllm._C: %r", e)
+
+    @classmethod
+    def pack_kv_cache(
+        cls,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_cache: torch.Tensor,
+        value_cache: torch.Tensor,
+        block_ids: list[int],
+        indices: torch.Tensor,
+    ) -> None:
+        """
+        Rewrite the kv cache shape for the current platform.
+        """
+        # Import lazily: cpu_attn pulls in _custom_ops, which needs a fully
+        # initialized vllm.platforms (avoid circular import while CpuPlatform loads).
+        from vllm._custom_ops import cpu_attn_reshape_and_cache
+        from vllm.v1.attention.backends.cpu_attn import _get_attn_isa
+
+        dtype = key.dtype
+        # For CPU_ATTN, the shape is [N, num_kv_heads, block_size, head_size]
+        _, _, block_size, head_size = key_cache.shape
+        key = key.permute(0, 2, 1, 3).flatten(0, 1)
+        value = value.permute(0, 2, 1, 3).flatten(0, 1)
+
+        isa = _get_attn_isa(dtype, block_size, head_size)
+        block_offsets = torch.arange(block_size, device="cpu", dtype=torch.long)
+        num_blocks = len(block_ids)
+        slot_mapping = (
+            block_offsets.reshape(1, block_size)
+            + indices.reshape(num_blocks, 1) * block_size
+        ).flatten()
+        if key_cache.dtype == torch.uint8:
+            raise NotImplementedError(
+                "FP8 KV cache is not yet supported with KV transfer on CPU"
+            )
+        cpu_attn_reshape_and_cache(
+            key,
+            value,
+            key_cache,
+            value_cache,
+            slot_mapping,
+            isa,
+        )
+
+    @classmethod
+    def get_current_memory_usage(
+        cls, device: torch.types.Device | None = None
+    ) -> float:
+        allowed_mem_node_list = get_visible_memory_node()
+        mem_status_list = [get_memory_node_info(i) for i in allowed_mem_node_list]
+        memory_usage = 0
+        for s in mem_status_list:
+            memory_usage += s.total_memory - s.available_memory
+
+        return memory_usage
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index 50a79cbb0b8d..4a5be741d06b 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -9,7 +9,7 @@
 import os
 from collections.abc import Callable
 from datetime import timedelta
-from functools import cache, wraps
+from functools import cache, lru_cache, wraps
 from typing import TYPE_CHECKING, TypeVar
 
 import torch
@@ -20,9 +20,10 @@
 # import custom ops, trigger op registration
 import vllm._C  # noqa
 import vllm._C_stable_libtorch  # noqa
+import vllm.envs as envs
 from vllm.logger import init_logger
 from vllm.utils.import_utils import import_pynvml
-from vllm.utils.torch_utils import cuda_device_count_stateless
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 from .interface import DeviceCapability, Platform, PlatformEnum
@@ -30,6 +31,7 @@
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
     from vllm.config.cache import CacheDType
+    from vllm.config.kernel import IrOpPriorityConfig
     from vllm.v1.attention.selector import AttentionSelectorConfig
 else:
     VllmConfig = None
@@ -47,6 +49,32 @@
 torch.backends.cuda.enable_cudnn_sdp(False)
 
 
+@lru_cache(maxsize=8)
+def _cuda_device_count_stateless(cuda_visible_devices: str | None = None) -> int:
+    """Get number of CUDA devices, caching based on the value of CUDA_VISIBLE_DEVICES
+    at the time of call.
+
+    This should be used instead of torch.accelerator.device_count() unless
+    CUDA_VISIBLE_DEVICES has already been set to the desired value.
+
+    # This can be removed and simply replaced with torch.cuda.get_device_count
+    # after https://github.com/pytorch/pytorch/pull/122815 is released."""
+    # Note: cuda_visible_devices is not used, but we keep it as an argument for
+    # LRU Cache purposes.
+
+    # Code below is based on
+    # https://github.com/pytorch/pytorch/blob/
+    # c1cd946818442aca8c7f812b16d187ce1586c3bc/
+    # torch/cuda/__init__.py#L831C1-L831C17
+    import torch.cuda
+
+    if not torch.cuda._is_compiled():
+        return 0
+    raw_count = torch.cuda._device_count_nvml()
+    r = torch._C._cuda_getDeviceCount() if raw_count < 0 else raw_count
+    return r
+
+
 @cache
 def _get_backend_priorities(
     use_mla: bool,
@@ -60,7 +88,7 @@ def _get_backend_priorities(
             # Sparse MLA backend priorities
             # See https://github.com/vllm-project/vllm/issues/35807 for
             # benchmark results
-            if kv_cache_dtype is not None and kv_cache_dtype.startswith("fp8"):
+            if kv_cache_dtype is not None and is_quantized_kv_cache(kv_cache_dtype):
                 # Prefer FlashInfer for fp8 kv cache
                 sparse_backends = [
                     AttentionBackendEnum.FLASHINFER_MLA_SPARSE,
@@ -82,6 +110,10 @@ def _get_backend_priorities(
 
             return [
                 AttentionBackendEnum.FLASHINFER_MLA,
+                # R1 dims + FP8 KV only; rejected by supports_combination
+                # otherwise. Behind FLASHINFER_MLA: wins past bs≈8, regresses
+                # at bs≤2.
+                AttentionBackendEnum.TOKENSPEED_MLA,
                 AttentionBackendEnum.CUTLASS_MLA,
                 AttentionBackendEnum.FLASH_ATTN_MLA,
                 AttentionBackendEnum.FLASHMLA,
@@ -103,6 +135,7 @@ def _get_backend_priorities(
                 AttentionBackendEnum.FLASH_ATTN,
                 AttentionBackendEnum.TRITON_ATTN,
                 AttentionBackendEnum.FLEX_ATTENTION,
+                AttentionBackendEnum.TURBOQUANT,
             ]
         else:
             return [
@@ -110,6 +143,7 @@ def _get_backend_priorities(
                 AttentionBackendEnum.FLASHINFER,
                 AttentionBackendEnum.TRITON_ATTN,
                 AttentionBackendEnum.FLEX_ATTENTION,
+                AttentionBackendEnum.TURBOQUANT,
             ]
 
 
@@ -160,10 +194,20 @@ def set_device(cls, device: torch.device) -> None:
         # for why and when it is needed
         _ = torch.zeros(1, device=device)
 
+    @classmethod
+    def manual_seed_all(cls, seed: int) -> None:
+        torch.cuda.manual_seed_all(seed)
+
     @classmethod
     def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
         raise NotImplementedError
 
+    @classmethod
+    def get_cuda_runtime_major(cls) -> int:
+        """Major ``torch.version.cuda`` version, or ``0`` if undetermined."""
+        major = (torch.version.cuda or "0").split(".", 1)[0]
+        return int(major) if major.isdigit() else 0
+
     @classmethod
     def get_device_name(cls, device_id: int = 0) -> str:
         raise NotImplementedError
@@ -335,7 +379,6 @@ def get_attn_backend_cls(
             "Using %s attention backend out of potential backends: %s.",
             selected_backend.name,
             "[" + ", ".join(f"'{b[0].name}'" for b in valid_backends_priorities) + "]",
-            scope="local",
         )
 
         return selected_backend.get_path()
@@ -389,7 +432,6 @@ def get_vit_attn_backend(
                 if is_backend_supported:
                     logger.info_once(
                         f"Using backend {vit_attn_backend} for vit attention",
-                        scope="local",
                     )
                     return vit_attn_backend
             except ImportError:
@@ -456,7 +498,7 @@ def stateless_init_device_torch_dist_pg(
 
     @classmethod
     def device_count(cls) -> int:
-        return cuda_device_count_stateless()
+        return _cuda_device_count_stateless(envs.CUDA_VISIBLE_DEVICES)
 
     @classmethod
     def check_if_supports_dtype(cls, dtype: torch.dtype):
@@ -516,6 +558,10 @@ def support_deep_gemm(cls) -> bool:
         """Currently, only Hopper and Blackwell GPUs are supported."""
         return cls.is_device_capability(90) or cls.is_device_capability_family(100)
 
+    @classmethod
+    def is_integrated_gpu(cls, device_id: int = 0) -> bool:
+        return bool(torch.cuda.get_device_properties(device_id).is_integrated)
+
     @classmethod
     def num_compute_units(cls, device_id: int = 0) -> int:
         return torch.cuda.get_device_properties(device_id).multi_processor_count
@@ -524,6 +570,28 @@ def num_compute_units(cls, device_id: int = 0) -> int:
     def use_custom_op_collectives(cls) -> bool:
         return True
 
+    @classmethod
+    def get_default_ir_op_priority(cls, vllm_config: VllmConfig) -> IrOpPriorityConfig:
+        from vllm.config.compilation import CompilationMode
+        from vllm.config.kernel import IrOpPriorityConfig
+
+        # Native used by default when compiling,
+        # use vllm_c kernels where available when no codegen
+        cc = vllm_config.compilation_config
+        using_inductor = cc.backend == "inductor" and cc.mode != CompilationMode.NONE
+        default = ["native"] if using_inductor else ["vllm_c", "native"]
+
+        # Use oink if enabled for rms_norm
+        # TODO(Laurawly/luka): remove this env var,
+        #  users can just use IR op priority directly
+        rms_norm = default
+        if envs.VLLM_USE_OINK_OPS:
+            rms_norm = ["oink"] + default
+
+        return IrOpPriorityConfig.with_default(
+            default, rms_norm=rms_norm, fused_add_rms_norm=rms_norm
+        )
+
 
 # NVML utils
 # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
@@ -605,6 +673,133 @@ def _get_physical_device_name(cls, device_id: int = 0) -> str:
         handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
         return pynvml.nvmlDeviceGetName(handle)
 
+    @classmethod
+    @with_nvml_context
+    def get_device_numa_node(cls, device_id: int = 0) -> int | None:
+        """Get the NUMA node ID for a GPU device."""
+        physical_device_id = cls.device_id_to_physical_device_id(device_id)
+        handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id)
+
+        try:
+            numa_node = pynvml.nvmlDeviceGetNumaNodeId(handle)
+            if cls._numa_node_has_cpus(numa_node):
+                return numa_node
+            # On non-CDMM Grace-Blackwell systems (e.g. GB200), each GPU's HBM
+            # is a separate NUMA node with no CPUs.  Fall through to
+            # CPU-affinity-based detection to find the nearest CPU node.
+            logger.debug(
+                "NUMA node %d for GPU %d has no CPUs (non-CDMM topology), "
+                "falling back to CPU-affinity-based detection",
+                numa_node,
+                device_id,
+            )
+        except Exception:
+            pass
+
+        try:
+            cpu_ids = cls._get_device_cpu_affinity(handle)
+            if cpu_ids:
+                numa_node = cls._get_numa_node_for_cpu(cpu_ids[0])
+                if numa_node is not None:
+                    logger.debug(
+                        "Determined NUMA node %d for GPU %d via CPU affinity",
+                        numa_node,
+                        device_id,
+                    )
+                    return numa_node
+        except Exception as e:
+            logger.warning("Failed to get NUMA node for GPU %d: %s", device_id, e)
+
+        return None
+
+    @classmethod
+    def _numa_node_has_cpus(cls, node_id: int) -> bool:
+        """Check whether a NUMA node has any CPUs assigned to it."""
+        from pathlib import Path
+
+        cpulist_file = Path(f"/sys/devices/system/node/node{node_id}/cpulist")
+        try:
+            return cpulist_file.read_text().strip() != ""
+        except (OSError, ValueError):
+            return False
+
+    @classmethod
+    def _get_device_cpu_affinity(cls, handle) -> list[int]:
+        """Get the list of CPU IDs associated with a GPU via NVML."""
+        cpu_count = os.cpu_count()
+        if cpu_count is None:
+            return []
+
+        cpu_set_size = (cpu_count + 63) // 64
+        cpu_affinity_mask = pynvml.nvmlDeviceGetCpuAffinity(handle, cpu_set_size)
+
+        cpu_ids = []
+        for i, mask in enumerate(cpu_affinity_mask):
+            for bit in range(64):
+                cpu_id = i * 64 + bit
+                if cpu_id >= cpu_count:
+                    break
+                if mask & (1 << bit):
+                    cpu_ids.append(cpu_id)
+        return cpu_ids
+
+    @classmethod
+    def _get_numa_node_for_cpu(cls, cpu_id: int) -> int | None:
+        """Determine which NUMA node a CPU belongs to."""
+        from pathlib import Path
+
+        node_path = Path("/sys/devices/system/node")
+        if not node_path.exists():
+            return None
+
+        for node_dir in node_path.iterdir():
+            if not node_dir.name.startswith("node"):
+                continue
+            try:
+                node_id = int(node_dir.name[4:])
+                cpulist_file = node_dir / "cpulist"
+                if cpulist_file.exists():
+                    cpulist = cpulist_file.read_text().strip()
+                    if cls._cpu_in_cpulist(cpu_id, cpulist):
+                        return node_id
+            except (ValueError, OSError):
+                continue
+        return None
+
+    @classmethod
+    def _cpu_in_cpulist(cls, cpu_id: int, cpulist: str) -> bool:
+        """Check if a CPU ID is in a cpulist string such as '0-3,8-11'."""
+        for part in cpulist.split(","):
+            part = part.strip()
+            if "-" in part:
+                start, end = part.split("-", 1)
+                if int(start) <= cpu_id <= int(end):
+                    return True
+            elif part.isdigit() and int(part) == cpu_id:
+                return True
+        return False
+
+    @classmethod
+    @with_nvml_context
+    def get_all_device_numa_nodes(cls) -> list[int] | None:
+        """Get NUMA nodes for all visible GPU devices."""
+        try:
+            numa_nodes = []
+            for device_id in range(cls.device_count()):
+                numa_node = cls.get_device_numa_node(device_id)
+                if numa_node is None:
+                    logger.warning(
+                        "Could not detect NUMA node for GPU %d, "
+                        "disabling automatic NUMA binding",
+                        device_id,
+                    )
+                    return None
+                numa_nodes.append(numa_node)
+            return numa_nodes
+        except Exception as e:
+            logger.warning("Failed to get NUMA nodes for GPUs: %s", e)
+            return None
+
     @classmethod
     @with_nvml_context
     def log_warnings(cls):
@@ -647,6 +842,14 @@ def is_fully_connected(cls, physical_device_ids: list[int]) -> bool:
         )
         return False
 
+    @classmethod
+    def get_device_numa_node(cls, device_id: int = 0) -> int | None:
+        return None
+
+    @classmethod
+    def get_all_device_numa_nodes(cls) -> list[int] | None:
+        return None
+
 
 # Autodetect either NVML-enabled or non-NVML platform
 # based on whether NVML is available.
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 281e91999ef3..9a93ef9f82a7 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -17,10 +17,12 @@
     from torch.distributed import PrefixStore, ProcessGroup
 
     from vllm.config import VllmConfig
+    from vllm.config.kernel import IrOpPriorityConfig
     from vllm.inputs import EngineInput
     from vllm.pooling_params import PoolingParams
     from vllm.sampling_params import SamplingParams
     from vllm.utils.argparse_utils import FlexibleArgumentParser
+    from vllm.v1.attention.backend import AttentionBackend
     from vllm.v1.attention.selector import AttentionSelectorConfig
 else:
     FlexibleArgumentParser = object
@@ -84,6 +86,9 @@ def __gt__(self, other: Any) -> bool:
             return NotImplemented
         return (self.major, self.minor) > (other.major, other.minor)
 
+    def __hash__(self) -> int:
+        return hash((self.major, self.minor))
+
     def as_version_str(self) -> str:
         return f"{self.major}.{self.minor}"
 
@@ -167,6 +172,10 @@ def is_xpu(self) -> bool:
     def is_cpu(self) -> bool:
         return self._enum == PlatformEnum.CPU
 
+    def uses_host_device_handling(self) -> bool:
+        """Whether vLLM should leave DeviceConfig.device unset."""
+        return self.is_tpu()
+
     def is_zen_cpu(self) -> bool:
         return False
 
@@ -205,6 +214,15 @@ def get_compile_backend(cls) -> str:
         """
         return cls.simple_compile_backend
 
+    @classmethod
+    def import_ir_kernels(cls) -> None:
+        """
+        The default implementation imports ``vllm.kernels``, which registers
+        the built-in IR op implementations. Out-of-tree (OOT) platforms should
+        override this method to import their own kernel modules.
+        """
+        import vllm.kernels  # noqa: F401
+
     @classmethod
     def device_id_to_physical_device_id(cls, device_id: int):
         # Treat empty device control env var as unset. This is a valid
@@ -380,6 +398,11 @@ def set_device(cls, device: torch.device) -> None:
         """
         raise NotImplementedError
 
+    @classmethod
+    def manual_seed_all(cls, seed: int) -> None:
+        """Set RNG seed across all devices for the current platform."""
+        raise NotImplementedError
+
     @classmethod
     def pre_register_and_update(
         cls, parser: FlexibleArgumentParser | None = None
@@ -423,55 +446,236 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         """
         pass
 
+    @classmethod
+    def _find_non_ssm_backend(
+        cls, vllm_config: "VllmConfig"
+    ) -> "type[AttentionBackend] | None":
+        """Find the first non-SSM attention backend from model layers."""
+        from vllm.config.vllm import get_layers_from_vllm_config
+        from vllm.model_executor.layers.attention_layer_base import (
+            AttentionLayerBase,
+        )
+
+        attn_layers = get_layers_from_vllm_config(
+            vllm_config,
+            AttentionLayerBase,  # type: ignore[type-abstract]
+        )
+        for layer in attn_layers.values():
+            b = layer.get_attn_backend()
+            if not b.is_ssm():
+                return b
+        return None
+
     @classmethod
     def update_block_size_for_backend(cls, vllm_config: "VllmConfig") -> None:
         """
         Ensure block_size is compatible with the attention backend.
+        For hybrid models, also aligns block_size with mamba page sizes.
         """
         from vllm.config.cache import CacheConfig
+        from vllm.config.vllm import set_current_vllm_config
 
         cache_config = vllm_config.cache_config
-        if cache_config.user_specified_block_size:
-            # User specified --block-size; keep it.
-            return
-
         model_config = vllm_config.model_config
+
         # model_config may be None during testing.
-        # Skip hybrid models — their block_size is managed by
-        # HybridAttentionMambaModelConfig.
-        if model_config is None or model_config.is_hybrid:
-            cache_config.block_size = CacheConfig.DEFAULT_BLOCK_SIZE
+        if not model_config:
             return
 
-        from vllm.config.vllm import (
-            get_layers_from_vllm_config,
-            set_current_vllm_config,
-        )
-        from vllm.model_executor.layers.attention_layer_base import (
-            AttentionLayerBase,
+        backend_cls = cls._find_non_ssm_backend(vllm_config)
+        if backend_cls is None:
+            return
+
+        # Phase 1: Pick block size from backend (skip if user set --block-size)
+        if not cache_config.user_specified_block_size:
+            with set_current_vllm_config(vllm_config):
+                preferred = backend_cls.get_preferred_block_size(
+                    CacheConfig.DEFAULT_BLOCK_SIZE
+                )
+            if preferred != CacheConfig.DEFAULT_BLOCK_SIZE:
+                logger.info(
+                    "Setting kv cache block size to %d for %s backend.",
+                    preferred,
+                    backend_cls.get_name(),
+                )
+            cache_config.block_size = preferred
+
+        # Phase 2: Align block/mamba sizes for hybrid models
+        # (may override user settings).
+        if model_config.is_hybrid:
+            cls._align_hybrid_block_size(vllm_config, backend_cls)
+
+    @classmethod
+    def _align_hybrid_block_size(
+        cls,
+        vllm_config: "VllmConfig",
+        backend_cls: "type[AttentionBackend]",
+    ) -> None:
+        """
+        For hybrid attention/mamba models, ensure that the attention page
+        size is >= the mamba page size, and pad the mamba page size to match.
+        """
+        from math import lcm
+
+        from vllm.config.vllm import set_current_vllm_config
+        from vllm.model_executor.models import ModelRegistry
+        from vllm.utils.math_utils import cdiv
+        from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
+        from vllm.v1.attention.backend import MultipleOf
+        from vllm.v1.kv_cache_interface import (
+            FullAttentionSpec,
+            MambaSpec,
+            MLAAttentionSpec,
+            get_kv_quant_mode,
         )
 
-        attn_layers = get_layers_from_vllm_config(
-            vllm_config,
-            AttentionLayerBase,  # type: ignore[type-abstract]
+        cache_config = vllm_config.cache_config
+        model_config = vllm_config.model_config
+        parallel_config = vllm_config.parallel_config
+
+        if cache_config.cache_dtype == "auto":
+            kv_cache_dtype = model_config.dtype
+        else:
+            kv_cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_config.cache_dtype]
+
+        kv_quant_mode = get_kv_quant_mode(cache_config.cache_dtype)
+
+        # Compute attention page size for 1 token
+        if model_config.use_mla:
+            attn_page_size_1_token = MLAAttentionSpec(
+                block_size=1,
+                num_kv_heads=model_config.get_num_kv_heads(parallel_config),
+                head_size=model_config.get_head_size(),
+                dtype=kv_cache_dtype,
+                kv_quant_mode=kv_quant_mode,
+            ).page_size_bytes
+        elif cache_config.cache_dtype.startswith("turboquant_"):
+            # TQ has a packed K|V layout; the standard FullAttentionSpec
+            # formula over-sizes it and trips unify_kv_cache_spec_page_size
+            # when all attention layers are TQ. With mixed skip+TQ the skip
+            # layers still use the standard layout — take max so mamba
+            # padding covers the largest actual page.
+            from vllm.model_executor.layers.quantization.turboquant.config import (
+                TurboQuantConfig,
+            )
+            from vllm.v1.kv_cache_interface import TQFullAttentionSpec
+
+            tq_cfg = TurboQuantConfig.from_cache_dtype(
+                cache_config.cache_dtype, model_config.get_head_size()
+            )
+            tq_page = TQFullAttentionSpec(
+                block_size=1,
+                num_kv_heads=model_config.get_num_kv_heads(parallel_config),
+                head_size=model_config.get_head_size(),
+                head_size_v=model_config.get_head_size(),
+                dtype=kv_cache_dtype,
+                kv_quant_mode=kv_quant_mode,
+                tq_slot_size=tq_cfg.slot_size_aligned,
+            ).page_size_bytes
+            if cache_config.kv_cache_dtype_skip_layers:
+                skip_page = FullAttentionSpec(
+                    block_size=1,
+                    num_kv_heads=model_config.get_num_kv_heads(parallel_config),
+                    head_size=model_config.get_head_size(),
+                    dtype=model_config.dtype,
+                ).page_size_bytes
+                # lcm, not max: skip_page is often not a multiple of
+                # tq_page, so max would leave per-layer page sizes
+                # un-unifiable downstream.
+                attn_page_size_1_token = lcm(tq_page, skip_page)
+            else:
+                attn_page_size_1_token = tq_page
+        else:
+            attn_page_size_1_token = FullAttentionSpec(
+                block_size=1,
+                num_kv_heads=model_config.get_num_kv_heads(parallel_config),
+                head_size=model_config.get_head_size(),
+                dtype=kv_cache_dtype,
+                kv_quant_mode=kv_quant_mode,
+            ).page_size_bytes
+
+        # Compute mamba page size
+        model_cls, _ = ModelRegistry.resolve_model_cls(
+            model_config.architecture,
+            model_config=model_config,
         )
-        if not attn_layers:
-            cache_config.block_size = CacheConfig.DEFAULT_BLOCK_SIZE
+        mamba_page_size = MambaSpec(
+            shapes=model_cls.get_mamba_state_shape_from_config(vllm_config),
+            dtypes=model_cls.get_mamba_state_dtype_from_config(vllm_config),
+            block_size=-1,
+        ).page_size_bytes
+
+        if mamba_page_size == 0:
             return
 
-        first_layer = next(iter(attn_layers.values()))
-        backend_cls = first_layer.get_attn_backend()
+        # mamba_block_size here should either be user specified value or None
+        mamba_block_size = (
+            cache_config.mamba_block_size
+            if cache_config.user_specified_mamba_block_size
+            else None
+        )
+
+        # Get kernel block alignment from the backend's supported sizes
         with set_current_vllm_config(vllm_config):
-            preferred = backend_cls.get_preferred_block_size(
-                CacheConfig.DEFAULT_BLOCK_SIZE
+            kernel_block_alignment_size = max(
+                min(
+                    s.base if isinstance(s, MultipleOf) else s
+                    for s in backend_cls.get_supported_kernel_block_sizes()
+                ),
+                cache_config.block_size,
+            )
+
+        if cache_config.mamba_cache_mode == "all":
+            # With prefix caching, align to mamba chunk size for kernel perf
+            # TODO(tdoublep): this constraint can be relaxed fairly
+            # easily by changing the way we layout chunks in the
+            # mamba2 kernels.
+            base_chunk_size = mamba_block_size or model_config.get_mamba_chunk_size()
+            assert base_chunk_size is not None
+            attn_tokens_per_mamba_state = cdiv(mamba_page_size, attn_page_size_1_token)
+            chunk_size = lcm(base_chunk_size, kernel_block_alignment_size)
+            attn_block_size = chunk_size * cdiv(attn_tokens_per_mamba_state, chunk_size)
+            cache_config.mamba_block_size = attn_block_size
+        else:
+            # Without prefix caching, use minimum block size that satisfies
+            # both backend alignment and mamba page size compatibility
+            attn_block_size = kernel_block_alignment_size * cdiv(
+                mamba_page_size,
+                kernel_block_alignment_size * attn_page_size_1_token,
+            )
+
+        if cache_config.block_size < attn_block_size:
+            cache_config.block_size = attn_block_size
+            logger.info(
+                "Setting attention block size to %d tokens "
+                "to ensure that attention page size is >= mamba page size.",
+                attn_block_size,
+            )
+
+        if cache_config.mamba_cache_mode == "align":
+            cache_config.mamba_block_size = cache_config.block_size
+
+        # Pad mamba page size to exactly match attention page size
+        attn_page_size = cache_config.block_size * attn_page_size_1_token
+        assert attn_page_size >= mamba_page_size
+
+        if attn_page_size == mamba_page_size:
+            return
+
+        if (
+            cache_config.mamba_page_size_padded is None
+            or cache_config.mamba_page_size_padded != attn_page_size
+        ):
+            cache_config.mamba_page_size_padded = attn_page_size
+            mamba_padding_pct = (
+                100 * (attn_page_size - mamba_page_size) / mamba_page_size
             )
-        if preferred != CacheConfig.DEFAULT_BLOCK_SIZE:
             logger.info(
-                "Setting kv cache block size to %d for %s backend.",
-                preferred,
-                backend_cls.get_name(),
+                "Padding mamba page size by %.2f%% to ensure "
+                "that mamba page size and attention page size are "
+                "exactly equal.",
+                mamba_padding_pct,
             )
-        cache_config.block_size = preferred
 
     @classmethod
     def verify_model_arch(cls, model_arch: str) -> None:
@@ -573,6 +777,18 @@ def get_device_communicator_cls(cls) -> str:
         """
         return "vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase"  # noqa
 
+    @classmethod
+    def is_integrated_gpu(cls, device_id: int = 0) -> bool:
+        """
+        Returns whether the GPU is an integrated (UMA) device that shares
+        system memory with the CPU.
+
+        On UMA systems (e.g. NVIDIA GH200, DGX Spark, Jetson Orin),
+        cudaMemGetInfo may underreport free memory because it does not
+        account for reclaimable OS memory (page cache, buffers).
+        """
+        return False
+
     @classmethod
     def supports_mx(cls) -> bool:
         """
@@ -790,6 +1006,16 @@ def num_compute_units(cls, device_id: int = 0) -> int:
             "num_compute_units is not implemented for the current platform."
         )
 
+    @classmethod
+    def get_default_ir_op_priority(
+        cls, vllm_config: "VllmConfig"
+    ) -> "IrOpPriorityConfig":
+        """Get the default IR op priority for the current platform."""
+        from vllm.config.kernel import IrOpPriorityConfig
+
+        # Native always used by default. Platforms can override this behavior.
+        return IrOpPriorityConfig.with_default(["native"])
+
 
 class UnspecifiedPlatform(Platform):
     _enum = PlatformEnum.UNSPECIFIED
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index d3836a467e1b..114d236f1319 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -13,13 +13,13 @@
 
 import vllm.envs as envs
 from vllm.logger import init_logger
-from vllm.utils.torch_utils import cuda_device_count_stateless
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 
 from .interface import DeviceCapability, Platform, PlatformEnum
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
+    from vllm.config.kernel import IrOpPriorityConfig
     from vllm.v1.attention.selector import AttentionSelectorConfig
 
 logger = init_logger(__name__)
@@ -33,6 +33,7 @@
         amdsmi_init,
         amdsmi_shut_down,
         amdsmi_topo_get_link_type,
+        amdsmi_topo_get_numa_node_number,
     )
 except ImportError as e:
     logger.warning("Failed to import from amdsmi with %r", e)
@@ -43,6 +44,11 @@
     logger.warning("Failed to import from vllm._C with %r", e)
 
 # import custom ops, trigger op registration
+try:
+    import vllm._C_stable_libtorch  # noqa: F401
+except ImportError as e:
+    logger.warning("Failed to import from vllm._C_stable_libtorch with %r", e)
+
 try:
     import vllm._rocm_C  # noqa: F401
 except ImportError as e:
@@ -64,9 +70,47 @@
     "0x74a9": "AMD_Instinct_MI300X_HF",
     "0x74bd": "AMD_Instinct_MI300X_HF",
     "0x744c": "AMD_Radeon_RX7900XTX",
+    # RDNA 3.5 APUs (Strix Point / Strix Halo)
+    "0x150e": "AMD_Radeon_890M",  # gfx1150, Strix Point
+    "0x1586": "AMD_Radeon_8060S",  # gfx1151, Strix Halo
+    # RDNA 4 discrete (Navi 48)
+    "0x7550": "AMD_Radeon_RX9070XT",  # gfx1201
+    "0x7551": "AMD_Radeon_R9700",  # gfx1201
 }
 
 
+@lru_cache(maxsize=8)
+def _rocm_device_count_stateless(cuda_visible_devices: str | None = None) -> int:
+    """Get number of ROCm devices, caching based on the value of CUDA_VISIBLE_DEVICES
+    at the time of call.
+
+    This should be used instead of torch.accelerator.device_count() unless
+    CUDA_VISIBLE_DEVICES has already been set to the desired value.
+
+    # This can be removed and simply replaced with torch.cuda.get_device_count
+    # after https://github.com/pytorch/pytorch/pull/122815 is released."""
+    # Note: cuda_visible_devices is not used, but we keep it as an argument for
+    # LRU Cache purposes.
+
+    # Code below is based on
+    # https://github.com/pytorch/pytorch/blob/
+    # c1cd946818442aca8c7f812b16d187ce1586c3bc/
+    # torch/cuda/__init__.py#L831C1-L831C17
+    import torch.cuda
+
+    if not torch.cuda._is_compiled():
+        return 0
+    # ROCm uses amdsmi instead of nvml for stateless device count
+    # This requires a sufficiently modern version of Torch 2.4.0
+    raw_count = (
+        torch.cuda._device_count_amdsmi()
+        if (hasattr(torch.cuda, "_device_count_amdsmi"))
+        else -1
+    )
+    r = torch._C._cuda_getDeviceCount() if raw_count < 0 else raw_count
+    return r
+
+
 def _sync_hip_cuda_env_vars():
     """Ensure HIP_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES are consistent.
     Treats empty string as unset. Raises on genuine conflicts."""
@@ -146,8 +190,10 @@ def _get_gcn_arch() -> str:
 _GCN_ARCH = _get_gcn_arch()
 
 _ON_GFX1X = any(arch in _GCN_ARCH for arch in ["gfx11", "gfx12"])
+_ON_GFX12X = any(arch in _GCN_ARCH for arch in ["gfx12"])
 _ON_MI3XX = any(arch in _GCN_ARCH for arch in ["gfx942", "gfx950"])
 _ON_GFX9 = any(arch in _GCN_ARCH for arch in ["gfx90a", "gfx942", "gfx950"])
+_ON_GFX90A = "gfx90a" in _GCN_ARCH
 _ON_GFX942 = "gfx942" in _GCN_ARCH
 _ON_GFX950 = "gfx950" in _GCN_ARCH
 
@@ -227,6 +273,10 @@ def on_gfx1x() -> bool:
     return _ON_GFX1X
 
 
+def on_gfx12x() -> bool:
+    return _ON_GFX12X
+
+
 def on_mi3xx() -> bool:
     return _ON_MI3XX
 
@@ -235,6 +285,10 @@ def on_gfx9() -> bool:
     return _ON_GFX9
 
 
+def on_gfx90a() -> bool:
+    return _ON_GFX90A
+
+
 def on_gfx942() -> bool:
     return _ON_GFX942
 
@@ -334,6 +388,7 @@ def _get_backend_priorities(
     if is_aiter_found_and_supported():
         backends.append(AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN)
     backends.append(AttentionBackendEnum.TRITON_ATTN)
+    backends.append(AttentionBackendEnum.TURBOQUANT)
 
     return backends
 
@@ -357,16 +412,26 @@ class RocmPlatform(Platform):
         "awq",
         "awq_marlin",  # will be overwritten with awq
         "gptq",
-        "gptq_marlin",  # will be overwritten with gptq
+        "gptq_marlin",
+        "auto_gptq",
         "fp8",
+        "deepseek_v4_fp8",
         "compressed-tensors",
         "fbgemm_fp8",
         "gguf",
         "quark",
         "mxfp4",
-        "petit_nvfp4",
+        "mxfp8",
         "torchao",
         "bitsandbytes",
+        "modelopt",
+        "modelopt_fp4",
+        "modelopt_mxfp8",
+        "modelopt_mixed",
+        "fp8_per_tensor",
+        "fp8_per_block",
+        "online",
+        "gpt_oss_mxfp4",
     ]
 
     @classmethod
@@ -562,6 +627,10 @@ def set_device(cls, device: torch.device) -> None:
         """
         torch.cuda.set_device(device)
 
+    @classmethod
+    def manual_seed_all(cls, seed: int) -> None:
+        torch.cuda.manual_seed_all(seed)
+
     @classmethod
     @lru_cache(maxsize=8)
     def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
@@ -604,9 +673,9 @@ def get_device_name(cls, device_id: int = 0) -> str:
         physical_device_id = cls.device_id_to_physical_device_id(device_id)
         handle = amdsmi_get_processor_handles()[physical_device_id]
         asic_info = amdsmi_get_gpu_asic_info(handle)
-        device_name: str = asic_info["device_id"]
-        if device_name in _ROCM_DEVICE_ID_NAME_MAP:
-            return _ROCM_DEVICE_ID_NAME_MAP[device_name]
+        asic_info_device_id: str = asic_info["device_id"]
+        if asic_info_device_id in _ROCM_DEVICE_ID_NAME_MAP:
+            return _ROCM_DEVICE_ID_NAME_MAP[asic_info_device_id]
         return asic_info["market_name"]
 
     @classmethod
@@ -631,21 +700,11 @@ def get_device_total_memory(cls, device_id: int = 0) -> int:
     @classmethod
     def apply_config_platform_defaults(cls, vllm_config: "VllmConfig") -> None:
         from vllm._aiter_ops import rocm_aiter_ops
-        from vllm.config.compilation import CUDAGraphMode
 
         compilation_config = vllm_config.compilation_config
-        is_eager_execution = compilation_config.cudagraph_mode == CUDAGraphMode.NONE
         use_aiter_fused_moe = rocm_aiter_ops.is_fused_moe_enabled()
-        use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled()
         use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled()
         use_aiter_fused_se = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
-        #  Aiter rms norm perform best when CUDA Graph capture is enabled.
-        if (
-            use_aiter_rms_norm
-            and not is_eager_execution
-            and "-rms_norm" not in compilation_config.custom_ops
-        ):
-            compilation_config.custom_ops.append("+rms_norm")
 
         if use_aiter_fp8_linear and "-quant_fp8" not in compilation_config.custom_ops:
             compilation_config.custom_ops.append("+quant_fp8")
@@ -730,9 +789,9 @@ def get_punica_wrapper(cls) -> str:
     def get_current_memory_usage(
         cls, device: torch.types.Device | None = None
     ) -> float:
+        torch.cuda.empty_cache()
         torch.cuda.reset_peak_memory_stats(device)
-        free_mem, total_mem = torch.cuda.mem_get_info(device)
-        return total_mem - free_mem
+        return torch.cuda.max_memory_allocated(device)
 
     @classmethod
     def get_device_communicator_cls(cls) -> str:
@@ -746,7 +805,7 @@ def supports_mx(cls) -> bool:
 
     @classmethod
     def supports_fp8(cls) -> bool:
-        return any(gfx in _GCN_ARCH for gfx in ["gfx94", "gfx95", "gfx12"])
+        return on_gfx9() or on_gfx12x()
 
     @classmethod
     def is_fp8_fnuz(cls) -> bool:
@@ -810,7 +869,7 @@ def stateless_init_device_torch_dist_pg(
 
     @classmethod
     def device_count(cls) -> int:
-        return cuda_device_count_stateless()
+        return _rocm_device_count_stateless(getattr(envs, cls.device_control_env_var))
 
     @classmethod
     def check_if_supports_dtype(cls, dtype: torch.dtype):
@@ -872,3 +931,59 @@ def num_compute_units(cls, device_id: int = 0) -> int:
     @classmethod
     def use_custom_op_collectives(cls) -> bool:
         return True
+
+    @classmethod
+    def get_default_ir_op_priority(
+        cls, vllm_config: "VllmConfig"
+    ) -> "IrOpPriorityConfig":
+        from vllm.config.compilation import CompilationMode, CUDAGraphMode
+        from vllm.config.kernel import IrOpPriorityConfig
+
+        # Native used by default when compiling,
+        # use vllm_c kernels where available when no codegen
+        # TODO(luka/TJ) use aiter, vllm_c, native by default on ROCm
+        cc = vllm_config.compilation_config
+        using_inductor = cc.backend == "inductor" and cc.mode != CompilationMode.NONE
+        default = ["native"] if using_inductor else ["vllm_c", "native"]
+
+        #  Aiter rms norm perform best when CUDA Graph capture is enabled.
+        # TODO(luka/TJ) remove env vars completely
+        if (
+            cc.cudagraph_mode != CUDAGraphMode.NONE
+            and envs.VLLM_ROCM_USE_AITER
+            and envs.VLLM_ROCM_USE_AITER_RMSNORM
+        ):
+            rms_norm = ["aiter"] + default
+        else:
+            rms_norm = default
+
+        return IrOpPriorityConfig.with_default(
+            default, rms_norm=rms_norm, fused_add_rms_norm=rms_norm
+        )
+
+    @classmethod
+    @with_amdsmi_context
+    def get_all_device_numa_nodes(cls) -> list[int] | None:
+        """Get NUMA nodes for all visible GPU devices."""
+        try:
+            handles = amdsmi_get_processor_handles()
+            numa_nodes = []
+            for device_id in range(cls.device_count()):
+                physical_device_id = cls.device_id_to_physical_device_id(device_id)
+                try:
+                    numa_node = amdsmi_topo_get_numa_node_number(
+                        handles[physical_device_id]
+                    )
+                except AmdSmiException as e:
+                    logger.warning(
+                        "Could not detect NUMA node for GPU %d, "
+                        "disabling automatic NUMA binding: %s",
+                        device_id,
+                        e,
+                    )
+                    return None
+                numa_nodes.append(numa_node)
+            return numa_nodes
+        except Exception as e:
+            logger.warning("Failed to get NUMA nodes for GPUs: %s", e)
+            return None
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index b8cab5f45dcd..fdffdac43003 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -21,6 +21,7 @@
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
+    from vllm.config.kernel import IrOpPriorityConfig
     from vllm.v1.attention.selector import AttentionSelectorConfig
 else:
     VllmConfig = None
@@ -60,6 +61,12 @@ def get_attn_backend_cls(
             "only NHD layout is supported by XPU attention kernels."
         )
 
+        # TurboQuant KV cache: route directly to TQ backend
+        kv_cache_dtype = attn_selector_config.kv_cache_dtype
+        if kv_cache_dtype is not None and kv_cache_dtype.startswith("turboquant_"):
+            logger.info_once("Using TurboQuant attention backend.")
+            return AttentionBackendEnum.TURBOQUANT.get_path()
+
         dtype = attn_selector_config.dtype
         if attn_selector_config.use_sparse:
             logger.info_once("Using XPU MLA Sparse backend.")
@@ -124,6 +131,10 @@ def set_device(cls, device: torch.device) -> None:
         """
         torch.xpu.set_device(device)
 
+    @classmethod
+    def manual_seed_all(cls, seed: int) -> None:
+        torch.xpu.manual_seed_all(seed)
+
     @classmethod
     def get_device_capability(
         cls,
@@ -160,11 +171,7 @@ def get_static_graph_wrapper_cls(cls) -> str:
 
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
-        cache_config = vllm_config.cache_config
         parallel_config = vllm_config.parallel_config
-        # in V1(or with chunked prefill) block_size is 64
-        if cache_config and not cache_config.user_specified_block_size:
-            cache_config.block_size = 64
 
         # lazy import to avoid circular import
         from vllm.config import CUDAGraphMode
@@ -188,23 +195,26 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                 "XPU Graph is disabled by environment variable, "
                 "please set VLLM_XPU_ENABLE_XPU_GRAPH=1 to enable it."
             )
-        elif parallel_config.world_size_across_dp > 1:
-            compilation_config.cudagraph_mode = CUDAGraphMode.NONE
-            logger.warning(
-                "XPU Graph doesn't support capture communication ops, "
-                "disabling cudagraph_mode."
-            )
-        else:
-            if (
-                attention_config.backend == AttentionBackendEnum.FLASH_ATTN
-                and compilation_config.cudagraph_mode
-                not in {CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE}
-            ):
-                compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
+
+        # Disable fusion passes not yet supported on XPU.
+        pass_config = compilation_config.pass_config
+        fusion_passes_to_disable = {
+            "enable_sp": "Sequence parallelism",
+            "fuse_gemm_comms": "Async TP",
+            "fuse_allreduce_rms": "AllReduce + RMSNorm fusion",
+            "fuse_norm_quant": "RMSNorm + quant fusion",
+            "fuse_act_quant": "Activation + quant fusion",
+            "fuse_attn_quant": "Attention + quant fusion",
+            "fuse_act_padding": "Activation + padding fusion",
+            "fuse_rope_kvcache": "RoPE + KV cache fusion",
+        }
+        for flag, feature_name in fusion_passes_to_disable.items():
+            if getattr(pass_config, flag):
                 logger.warning(
-                    "FMHA sycl-tla kernels cannot be captured with XPU graphs, "
-                    "falling back to PIECEWISE graph mode on XPU platform."
+                    "Feature %r is not yet supported on XPU and will be disabled.",
+                    feature_name,
                 )
+                setattr(pass_config, flag, False)
 
         # check and update parallel config
         parallel_config = vllm_config.parallel_config
@@ -221,11 +231,60 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         # ref. https://openucx.readthedocs.io/en/master/faq.html
         os.environ["UCX_MEMTYPE_CACHE"] = "n"
 
+        # spawn is the only supported multiprocessing method on XPU
+        if "VLLM_WORKER_MULTIPROC_METHOD" not in os.environ:
+            os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
     @classmethod
     def update_block_size_for_backend(cls, vllm_config: "VllmConfig") -> None:
-        # TODO: XPU still sets block_size in check_and_update_config.
-        # Move that logic here so block_size is chosen by the backend.
-        pass
+        super().update_block_size_for_backend(vllm_config)
+        from vllm.config.vllm import get_layers_from_vllm_config
+        from vllm.model_executor.layers.attention_layer_base import (
+            AttentionLayerBase,
+        )
+        from vllm.utils.math_utils import cdiv
+
+        cache_config = vllm_config.cache_config
+        # special fix for GDN since kernel only supports block size dividable by 64
+        attn_layers = get_layers_from_vllm_config(
+            vllm_config,
+            AttentionLayerBase,  # type: ignore[type-abstract]
+        )
+
+        kernel_block_size = None
+        for layer in attn_layers.values():
+            b = layer.get_attn_backend()
+            if b.get_name() == "GDN_ATTN":
+                kernel_block_size = 64
+                break
+
+        if kernel_block_size is None:
+            return
+        new_block_size = (
+            cdiv(cache_config.block_size, kernel_block_size) * kernel_block_size
+        )
+        if new_block_size == cache_config.block_size:
+            return
+
+        if cache_config.mamba_cache_mode == "align":
+            cache_config.mamba_block_size = new_block_size
+        original_mamba_page_size_padded = cache_config.mamba_page_size_padded
+        if cache_config.mamba_page_size_padded is not None:
+            attn_page_size_1_token = (
+                cache_config.mamba_page_size_padded // cache_config.block_size
+            )
+            cache_config.mamba_page_size_padded = (
+                new_block_size * attn_page_size_1_token
+            )
+        cache_config.block_size = new_block_size
+        logger.info(
+            "[XPU]Setting attention block size to %d tokens to ensure multiple of %d, "
+            "set mamba_page_size_padded to %d bytes accordingly, before was %d bytes.",
+            new_block_size,
+            kernel_block_size,
+            cache_config.mamba_page_size_padded,
+            original_mamba_page_size_padded,
+        )
 
     @classmethod
     def support_hybrid_kv_cache(cls) -> bool:
@@ -267,6 +326,25 @@ def get_device_communicator_cls(cls) -> str:
             )
         return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator"  # noqa
 
+    @classmethod
+    def supports_fp8(cls) -> bool:
+        return True
+
+    @classmethod
+    def get_default_ir_op_priority(
+        cls, vllm_config: "VllmConfig"
+    ) -> "IrOpPriorityConfig":
+        from vllm.config.compilation import CompilationMode
+        from vllm.config.kernel import IrOpPriorityConfig
+
+        # Native used by default when compiling,
+        # use fused kernels where available when no codegen
+        cc = vllm_config.compilation_config
+        using_inductor = cc.backend == "inductor" and cc.mode != CompilationMode.NONE
+        default = ["native"] if using_inductor else ["xpu_kernels", "native"]
+
+        return IrOpPriorityConfig.with_default(default)
+
     @classmethod
     def device_count(cls) -> int:
         return torch.xpu.device_count()
@@ -314,3 +392,7 @@ def swap_out_blocks_to_host(
     @classmethod
     def num_compute_units(cls, device_id: int = 0) -> int:
         return torch.xpu.get_device_properties(device_id).max_compute_units
+
+    @classmethod
+    def use_custom_op_collectives(cls) -> bool:
+        return True
diff --git a/vllm/platforms/zen_cpu.py b/vllm/platforms/zen_cpu.py
index 62ba37a74c8d..2af64e5e9f53 100644
--- a/vllm/platforms/zen_cpu.py
+++ b/vllm/platforms/zen_cpu.py
@@ -1,17 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from typing import TYPE_CHECKING
+import torch
 
 from vllm.logger import init_logger
 from vllm.platforms.cpu import CpuPlatform
-from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 logger = init_logger(__name__)
 
-if TYPE_CHECKING:
-    from vllm.config import VllmConfig
-
 
 class ZenCpuPlatform(CpuPlatform):
     """CPU platform with AMD Zen (ZenDNN/zentorch) optimizations.
@@ -29,39 +25,8 @@ def is_zen_cpu(self) -> bool:
         # is_cpu() also returns True for this platform (inherited from CpuPlatform).
         return True
 
-    @classmethod
-    def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
-        super().check_and_update_config(vllm_config)
-        cls._apply_pytorch_backports()
-
-    @classmethod
-    def _apply_pytorch_backports(cls):
-        """Backport PyTorch mainline fixes missing in 2.10.
-
-        PyTorch 2.10 has a bug in FxGraphCachePickler.dumps that doesn't
-        catch ValueError, causing torch.compile cache misses. Remove this
-        once we drop PyTorch 2.10 support. PT mainline already has this fix.
-        """
-        if not is_torch_equal_or_newer("2.10.0") or is_torch_equal_or_newer("2.11.0"):
-            return
-
-        cls._patch_fxgraphcache_pickle()
-
-    @classmethod
-    def _patch_fxgraphcache_pickle(cls):
-        """Backport mainline ValueError fix to FxGraphCachePickler.dumps()."""
-        from torch._inductor.codecache import BypassFxGraphCache, FxGraphCachePickler
-
-        original_dumps = FxGraphCachePickler.dumps
-        if hasattr(original_dumps, "_zen_patched"):
-            return
-
-        def patched_dumps(self, obj):
-            try:
-                return original_dumps(self, obj)
-            except ValueError as e:
-                raise BypassFxGraphCache("Failed to pickle cache key") from e
-
-        patched_dumps._zen_patched = True  # type: ignore[attr-defined]
-        FxGraphCachePickler.dumps = patched_dumps
-        logger.info("[zen_cpu] Patched FxGraphCachePickler.dumps (ValueError fix)")
+    # Currently, AMD CPUs do not support float16 compute.
+    # Hence explicitly return bfloat16 and float32.
+    @property
+    def supported_dtypes(self) -> list[torch.dtype]:
+        return [torch.bfloat16, torch.float32]
diff --git a/vllm/plugins/io_processors/__init__.py b/vllm/plugins/io_processors/__init__.py
index c8cb4f185278..c502f4f744d7 100644
--- a/vllm/plugins/io_processors/__init__.py
+++ b/vllm/plugins/io_processors/__init__.py
@@ -12,6 +12,23 @@
 logger = logging.getLogger(__name__)
 
 
+def has_io_processor(
+    vllm_config: VllmConfig,
+    plugin_from_init: str | None = None,
+):
+    if plugin_from_init:
+        model_plugin = plugin_from_init
+    else:
+        # A plugin can be specified via the model config
+        # Retrieve the model specific plugin if available
+        # This is using a custom field in the hf_config for the model
+        hf_config = vllm_config.model_config.hf_config.to_dict()
+        config_plugin = hf_config.get("io_processor_plugin")
+        model_plugin = config_plugin
+
+    return model_plugin is not None
+
+
 def get_io_processor(
     vllm_config: VllmConfig,
     renderer: BaseRenderer,
diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py
index b347ec831abc..3cfe9b427bd5 100644
--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -87,13 +87,6 @@ def clone(self) -> "PoolingParams":
         return deepcopy(self)
 
     def verify(self, model_config: ModelConfig) -> None:
-        if self.task == "score":
-            logger.warning_once(
-                "`score` task is deprecated and will be removed in v0.20. "
-                "Please use `classify` instead."
-            )
-            self.task = "classify"
-
         # plugin task uses io_processor.parse_request to verify inputs,
         # skipping PoolingParams verify
         if self.task == "plugin":
@@ -117,7 +110,8 @@ def _merge_default_parameters(self, model_config: ModelConfig) -> None:
         if pooler_config is None:
             return
 
-        assert self.task is not None, "task must be set"
+        if self.task is None:
+            raise ValueError("task must be set before merging parameters")
         valid_parameters = self.valid_parameters[self.task]
 
         for k in valid_parameters:
@@ -196,7 +190,8 @@ def _set_default_parameters(self, model_config: ModelConfig):
             raise ValueError(f"Unknown pooling task: {self.task!r}")
 
     def _verify_valid_parameters(self):
-        assert self.task is not None, "task must be set"
+        if self.task is None:
+            raise ValueError("task must be set before verifying parameters")
         valid_parameters = self.valid_parameters[self.task]
         invalid_parameters = []
         for k in self.all_parameters:
@@ -228,6 +223,8 @@ def __repr__(self) -> str:
         )
 
     def __post_init__(self) -> None:
-        assert self.output_kind == RequestOutputKind.FINAL_ONLY, (
-            "For pooling output_kind has to be FINAL_ONLY"
-        )
+        if self.output_kind != RequestOutputKind.FINAL_ONLY:
+            raise ValueError(
+                "For pooling output_kind has to be FINAL_ONLY, "
+                f"got {self.output_kind!r}"
+            )
diff --git a/vllm/profiler/wrapper.py b/vllm/profiler/wrapper.py
index f3af993e7f7e..201b45078492 100644
--- a/vllm/profiler/wrapper.py
+++ b/vllm/profiler/wrapper.py
@@ -63,7 +63,7 @@ def _call_stop(self) -> None:
         """Call _stop with error handling but no safeguards."""
         try:
             self._stop()
-            logger.info_once("Profiler stopped successfully.", scope="local")
+            logger.info_once("Profiler stopped successfully.")
         except Exception as e:
             logger.warning("Failed to stop profiler: %s", e)
         self._running = False  # Always mark as not running, assume stop worked
@@ -93,7 +93,7 @@ def step(self) -> None:
             and self._delay_iters > 0
             and self._active_iteration_count == self._delay_iters
         ):
-            logger.info_once("Starting profiler after delay...", scope="local")
+            logger.info_once("Starting profiler after delay...")
             self._call_start()
 
         # Call profiler step for schedule-based profiling
@@ -109,9 +109,7 @@ def step(self) -> None:
             # Automatically stop the profiler after max iters
             # will be marked as not running, but leave as active so that stop
             # can clean up properly
-            logger.info_once(
-                "Max profiling iterations reached. Stopping profiler...", scope="local"
-            )
+            logger.info_once("Max profiling iterations reached. Stopping profiler...")
             self._call_stop()
             return
 
@@ -141,7 +139,7 @@ def stop(self) -> None:
 
     def shutdown(self) -> None:
         """Ensure profiler is stopped when shutting down."""
-        logger.info_once("Shutting down profiler", scope="local")
+        logger.info_once("Shutting down profiler")
         if self._running:
             self.stop()
 
@@ -176,7 +174,6 @@ def __init__(
             logger.info_once(
                 "Torch profiling enabled. Traces will be saved to: %s",
                 torch_profiler_trace_dir,
-                scope="local",
             )
             logger.debug(
                 "Profiler config: record_shapes=%s,"
@@ -216,7 +213,6 @@ def __init__(
                     profiler_config.wait_iterations,
                     profiler_config.warmup_iterations,
                     profiler_config.active_iterations,
-                    scope="local",
                 )
 
         self.profiler = torch.profiler.profile(
@@ -240,6 +236,28 @@ def __init__(
             0,
         )
 
+    def _build_profiler_table(
+        self,
+        sort_key: str,
+        row_limit: int | None = None,
+    ) -> str:
+        if row_limit is None:  # use profiler default row limit of 100
+            return self.profiler.key_averages().table(sort_by=sort_key)
+        return self.profiler.key_averages().table(
+            sort_by=sort_key,
+            row_limit=row_limit,
+        )
+
+    def _write_profiler_table(self, rank: int, table: str) -> None:
+        profiler_dir = self.profiler_config.torch_profiler_dir
+
+        # Skip file write for URI paths (gs://, s3://, etc.)
+        # as standard file I/O doesn't work with URI schemes
+        if not _is_uri_path(profiler_dir):
+            profiler_out_file = f"{profiler_dir}/profiler_out_{rank}.txt"
+            with open(profiler_out_file, "w") as f:
+                print(table, file=f)
+
     @override
     def _start(self) -> None:
         self.profiler.start()
@@ -251,26 +269,22 @@ def _stop(self) -> None:
         profiler_config = self.profiler_config
         rank = self.local_rank
         if profiler_config.torch_profiler_dump_cuda_time_total:
-            profiler_dir = profiler_config.torch_profiler_dir
-            sort_key = "self_cuda_time_total"
-            table = self.profiler.key_averages().table(sort_by=sort_key)
-
-            # Skip file write for URI paths (gs://, s3://, etc.)
-            # as standard file I/O doesn't work with URI schemes
-            if not _is_uri_path(profiler_dir):
-                profiler_out_file = f"{profiler_dir}/profiler_out_{rank}.txt"
-                with open(profiler_out_file, "w") as f:
-                    print(table, file=f)
+            table = self._build_profiler_table(sort_key="self_cuda_time_total")
+            self._write_profiler_table(rank, table)
 
             # only print profiler results on rank 0
             if rank == 0:
                 print(table)
-        if self.dump_cpu_time_total and rank == 0:
-            logger.info(
-                self.profiler.key_averages().table(
-                    sort_by="self_cpu_time_total", row_limit=50
-                )
+
+        if self.dump_cpu_time_total:
+            table = self._build_profiler_table(
+                sort_key="self_cpu_time_total", row_limit=50
             )
+            self._write_profiler_table(rank, table)
+
+            # only print profiler results on rank 0
+            if rank == 0:
+                print(table)
 
     @override
     def _profiler_step(self) -> bool:
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
index 8c78db6f1878..cd51f106503a 100644
--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -28,10 +28,30 @@
         "deepseek_v3_reasoning_parser",
         "DeepSeekV3ReasoningParser",
     ),
+    "deepseek_v4": (
+        "deepseek_v3_reasoning_parser",
+        "DeepSeekV3ReasoningParser",
+    ),
+    "poolside_v1": (
+        "poolside_v1_reasoning_parser",
+        "PoolsideV1ReasoningParser",
+    ),
+    "cohere_command3": (
+        "cohere_command_reasoning_parser",
+        "CohereCommand3ReasoningParser",
+    ),
+    "cohere_command4": (
+        "cohere_command_reasoning_parser",
+        "CohereCommand4ReasoningParser",
+    ),
     "ernie45": (
         "ernie45_reasoning_parser",
         "Ernie45ReasoningParser",
     ),
+    "gemma4": (
+        "gemma4_reasoning_parser",
+        "Gemma4ReasoningParser",
+    ),
     "glm45": (
         "deepseek_v3_reasoning_parser",
         "DeepSeekV3ReasoningWithThinkingParser",
@@ -52,10 +72,18 @@
         "hunyuan_a13b_reasoning_parser",
         "HunyuanA13BReasoningParser",
     ),
+    "hy_v3": (
+        "hy_v3_reasoning_parser",
+        "HYV3ReasoningParser",
+    ),
     "kimi_k2": (
         "kimi_k2_reasoning_parser",
         "KimiK2ReasoningParser",
     ),
+    "mimo": (
+        "qwen3_reasoning_parser",
+        "Qwen3ReasoningParser",
+    ),
     "minimax_m2": (
         "minimax_m2_reasoning_parser",
         "MiniMaxM2ReasoningParser",
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 5271a307075e..8edbc5f82efd 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -6,7 +6,7 @@
 from abc import abstractmethod
 from collections.abc import Callable, Iterable, Sequence
 from functools import cached_property
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 from vllm.entrypoints.mcp.tool_server import ToolServer
 from vllm.logger import init_logger
@@ -14,6 +14,7 @@
 from vllm.utils.import_utils import import_from_path
 
 if TYPE_CHECKING:
+    from vllm.config import ModelConfig
     from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
     from vllm.entrypoints.openai.engine.protocol import DeltaMessage
     from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
@@ -32,6 +33,9 @@ class ReasoningParser:
 
     def __init__(self, tokenizer: "TokenizerLike", *args, **kwargs):
         self.model_tokenizer = tokenizer
+        # Optional vLLM ModelConfig from the server. Use get (not pop) so composite
+        # parsers can forward **kwargs to nested parsers.
+        self._model_config: ModelConfig | None = kwargs.get("model_config")
 
     @cached_property
     def vocab(self) -> dict[str, int]:
@@ -39,6 +43,20 @@ def vocab(self) -> dict[str, int]:
         # whereas all tokenizers have .get_vocab()
         return self.model_tokenizer.get_vocab()
 
+    @property
+    def reasoning_start_str(self) -> str | None:
+        """Set `reasoning_start_str` to the strings that delimit
+        the reasoning block (e.g. `""<seed:think>""` and `"<think>"`).
+        """
+        return None
+
+    @property
+    def reasoning_end_str(self) -> str | None:
+        """Set `reasoning_end_str` to the strings that delimit
+        the reasoning block (e.g. `""</seed:think>""` and `"</think>"`).
+        """
+        return None
+
     @abstractmethod
     def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
         """
@@ -150,6 +168,12 @@ def extract_reasoning_streaming(
         previously been parsed and extracted (see constructor)
         """
 
+    def adjust_request(
+        self, request: "ChatCompletionRequest | ResponsesRequest"
+    ) -> "ChatCompletionRequest | ResponsesRequest":
+        """Adjust request parameters; override in subclasses as needed."""
+        return request
+
     def prepare_structured_tag(
         self,
         original_tag: str | None,
@@ -298,7 +322,7 @@ def _decorator(obj: type[ReasoningParser]) -> type[ReasoningParser]:
             if isinstance(name, str):
                 names = [name]
             elif is_list_of(name, str):
-                names = name
+                names = cast(list[str], name)
             else:
                 names = [class_name]
 
diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py
index a8bb33d2c9cd..938b7f736b2c 100644
--- a/vllm/reasoning/basic_parsers.py
+++ b/vllm/reasoning/basic_parsers.py
@@ -39,6 +39,14 @@ def end_token(self) -> str:
         """The token that ends reasoning content."""
         raise NotImplementedError
 
+    @property
+    def reasoning_start_str(self) -> str:
+        return self.start_token
+
+    @property
+    def reasoning_end_str(self) -> str:
+        return self.end_token
+
     def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
         super().__init__(tokenizer, *args, **kwargs)
 
diff --git a/vllm/reasoning/cohere_command_reasoning_parser.py b/vllm/reasoning/cohere_command_reasoning_parser.py
new file mode 100644
index 000000000000..b28a59089e73
--- /dev/null
+++ b/vllm/reasoning/cohere_command_reasoning_parser.py
@@ -0,0 +1,565 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import json
+from collections.abc import Mapping, Sequence
+from typing import Any, NamedTuple, TypedDict, TypeGuard
+
+import regex as re
+import xgrammar as xgr
+
+try:
+    from cohere_melody import PyFilter, PyFilterOptions
+except ImportError as e:
+    raise ImportError(
+        "The Cohere reasoning parser requires the `cohere_melody` "
+        "package, which is not installed. Install it with:\n"
+        "    pip install cohere_melody"
+    ) from e
+
+
+from vllm.entrypoints.mcp.tool_server import ToolServer
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    AnyResponseFormat,
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.reasoning import ReasoningParser
+from vllm.sampling_params import StructuredOutputsParams
+from vllm.tokenizers import TokenizerLike
+
+REPLACEMENT_CHAR = "\ufffd"
+
+
+class CohereTagRegistry(NamedTuple):
+    """A single ``structural_tag`` trigger / end pair (``begin`` uses ``trigger``)."""
+
+    trigger: str
+    end: str
+
+
+class CohereTagStyle(NamedTuple):
+    """The structural tags style for a given model architecture.
+
+    ``json_tags`` lists every JSON-schema wrapper the model may emit (MOE uses
+    both response and text delimiters). ``tools`` is the tool-call wrapper.
+    """
+
+    json_tags: tuple[CohereTagRegistry, ...]
+    tools: CohereTagRegistry
+
+
+class CohereNormalizedTool(TypedDict):
+    """A tool definition normalized to the shape ``collect_tool_schema`` expects.
+
+    ``parameters`` is a JSON Schema object (possibly empty) describing the tool's
+    call signature.
+    """
+
+    name: str
+    parameters: dict[str, Any]
+
+
+COMMAND_A_TOOLS_TAG = CohereTagRegistry(
+    trigger="<|START_ACTION|>",
+    end="<|END_ACTION|>",
+)
+COMMAND_A_JSON_TAG = CohereTagRegistry(
+    trigger="<|START_RESPONSE|>",
+    end="<|END_RESPONSE|>",
+)
+COMMAND_A_PLUS_JSON_TAG = CohereTagRegistry(
+    trigger="<|START_TEXT|>",
+    end="<|END_TEXT|>",
+)
+
+MODEL_TO_TAG_STYLE: dict[str, CohereTagStyle] = {
+    "Cohere2ForCausalLM": CohereTagStyle(
+        json_tags=(COMMAND_A_JSON_TAG,),
+        tools=COMMAND_A_TOOLS_TAG,
+    ),
+    "Cohere2VisionForConditionalGeneration": CohereTagStyle(
+        json_tags=(COMMAND_A_JSON_TAG, COMMAND_A_PLUS_JSON_TAG),
+        tools=COMMAND_A_TOOLS_TAG,
+    ),
+    "Cohere2MoeForCausalLM": CohereTagStyle(
+        json_tags=(COMMAND_A_JSON_TAG,),
+        tools=COMMAND_A_TOOLS_TAG,
+    ),
+}
+
+
+def collect_tool_schema(tool_schema: list[CohereNormalizedTool]) -> str:
+    """Build an xgrammar EBNF grammar that matches a JSON array of tool calls.
+
+    The grammar shape is architecture-independent; callers are responsible for
+    wrapping it in the correct structural tag (see ``CohereTagStyle.tools``).
+    """
+    tool_dictionary: dict[str, str] = {}
+    for tool in tool_schema:
+        tool_name = tool["name"]
+        tool_parameters = json.dumps(tool["parameters"])
+        json_schema = f"""{{
+                        "type": "object",
+                        "properties": {{
+                            "tool_call_id": {{
+                                "type": "string",
+                                "pattern": "^[0-9]+$"
+                            }},
+                            "tool_name": {{
+                                "type": "string",
+                                "const": "{tool_name}"
+                            }},
+                            "parameters": {tool_parameters}
+                            }}
+                            }}"""
+        tool_grammar = str(xgr.Grammar.from_json_schema(json_schema))
+        for match in re.findall(r"\b(\w+)\s*::=", tool_grammar):
+            tool_grammar = re.sub(
+                rf"\b{re.escape(match)}\b", tool_name + match, tool_grammar
+            )
+        tool_dictionary[tool_name] = f"{tool_name} ::= {tool_name}root\n{tool_grammar}"
+    # Emitted grammar shape:
+    #   root  ::= tools
+    #   tools ::= ws "[" ws tool ws ("," ws tool)* ws "]" ws
+    #   ws    ::= (" " | "\t" | "\n")*
+    #   tool  ::= <tool_a> | <tool_b> | ...         (one alternative per input)
+    #   <tool_x>     ::= <tool_x>root               (per-tool xgrammar rules)
+    #   <tool_x>root ::= ...                        (from xgr.Grammar.from_json_schema)
+    tool_alternatives = "tool ::= " + " | ".join(tool_dictionary.keys())
+    tool_rules = "\n    ".join(tool_dictionary.values())
+    grammar = f"""root ::= tools
+    tools ::= ws "[" ws tool ws ("," ws tool)*  ws "]" ws
+    ws    ::= (" " | "\\t" | "\\n")*
+    {tool_alternatives}
+    {tool_rules}
+    """
+    return grammar
+
+
+def _tool_definitions_to_schema_list(
+    tools: str | list[Any],
+) -> list[CohereNormalizedTool]:
+    """
+    Build the list of ``CohereNormalizedTool`` dicts expected by
+    ``collect_tool_schema``.
+
+    Accepts:
+    - JSON string
+    - list of dicts with top-level ``name`` / ``parameters``
+    - list of Chat Completions-style ``{"type": "function", "function": {...}}``
+    - list of Pydantic models with ``model_dump()``
+    """
+    if isinstance(tools, str):
+        try:
+            parsed = json.loads(tools)
+        except json.JSONDecodeError:
+            return []
+        if not isinstance(parsed, list):
+            return []
+    else:
+        parsed = list(tools)
+
+    out: list[CohereNormalizedTool] = []
+    for raw in parsed:
+        t = raw.model_dump() if hasattr(raw, "model_dump") else raw
+        if not isinstance(t, dict):
+            continue
+        # Unwrap Chat Completions' ``{"type": "function", "function": {...}}``
+        # shape; otherwise take the dict as-is.
+        if t.get("type") == "function" and isinstance(t.get("function"), dict):
+            t = t["function"]
+        name = t.get("name")
+        if not isinstance(name, str):
+            continue
+        params = t.get("parameters")
+        out.append(
+            CohereNormalizedTool(
+                name=name,
+                parameters=params if isinstance(params, dict) else {},
+            )
+        )
+    return out
+
+
+def _has_effective_tools(
+    tools: str | list[Any] | None,
+) -> TypeGuard[str | list[Any]]:
+    """
+    True when ``tools`` contains at least one tool definition to convert.
+
+    ``ResponsesRequest`` defaults ``tools`` to ``[]``; ``ChatCompletionRequest``
+    uses ``None``. Both mean "no tools" here. Strings (e.g. a JSON blob) are
+    treated as effective only when non-blank.
+    """
+    if tools is None:
+        return False
+    if isinstance(tools, str):
+        return bool(tools.strip())
+    return len(tools) > 0
+
+
+# Builder: produces vLLM response_format in xgrammar's canonical format.
+# See xgrammar docs: type "structural_tag" with "format" = triggered_tags
+# and tag content type = json_schema | grammar.
+def convert_schema_to_structural_tags(
+    schema: dict | None = None,
+    tools: str | list[Any] | None = None,
+    model_architecture: str | None = None,
+) -> str | None:
+    """
+    Returns a response_format string accepted by xgrammar's structural tag format.
+    Uses the canonical shape: {"type": "structural_tag", "format": {...}} with
+    format.type "triggered_tags" and tag content type "json_schema" or "grammar".
+
+    Callers that are not on an engine path (e.g. the reasoning parser) must pass
+    ``model_architecture`` explicitly.
+    """
+    if model_architecture is None or model_architecture not in MODEL_TO_TAG_STYLE:
+        return None
+    style = MODEL_TO_TAG_STYLE[model_architecture]
+
+    tags: list[dict] = []
+    triggers: list[str] = []
+
+    def _add_tag(tag: CohereTagRegistry, content: dict) -> None:
+        tags.append({"begin": tag.trigger, "content": content, "end": tag.end})
+        triggers.append(tag.trigger)
+
+    if schema is not None:
+        # One structural tag per JSON wrapper (e.g. MOE: response + text).
+        # Same for schema-only and "tools plus JSON mode" (North: schema when
+        # the model does not call tools).
+        for jt in style.json_tags:
+            _add_tag(jt, {"type": "json_schema", "json_schema": schema})
+
+    if _has_effective_tools(tools):
+        # ``tools`` may be a JSON string (poseidon / RESPONSE_FORMAT_TOOL_DEFINITIONS)
+        # or a list (Chat Completions ``request.tools`` as Pydantic models or dicts).
+        tool_schema_list = _tool_definitions_to_schema_list(tools)
+        if not tool_schema_list:
+            raise ValueError(
+                "No valid tool definitions could be parsed from the request for "
+                "structural tag conversion."
+            )
+        tool_grammar = collect_tool_schema(tool_schema_list)
+        _add_tag(style.tools, {"type": "grammar", "grammar": tool_grammar})
+
+    if not tags:
+        return None
+    return json.dumps(
+        {
+            "type": "structural_tag",
+            "format": {
+                "type": "triggered_tags",
+                "triggers": triggers,
+                "tags": tags,
+            },
+        }
+    )
+
+
+def _response_format_type(
+    response_format: AnyResponseFormat | dict | None,
+) -> str | None:
+    if response_format is None:
+        return None
+    if isinstance(response_format, dict):
+        t = response_format.get("type")
+        return t if isinstance(t, str) else None
+    return response_format.type
+
+
+def _maybe_parse_json_dict(value: Any) -> dict | None:
+    """If value is a JSON string, parse to dict; otherwise require dict."""
+    if isinstance(value, dict):
+        return value
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+        except (TypeError, json.JSONDecodeError):
+            return None
+        return parsed if isinstance(parsed, dict) else None
+    return None
+
+
+def _unwrap_nested_schema(candidate: Any) -> dict | None:
+    """Return ``candidate`` as a dict, unwrapping a nested ``schema`` if present.
+
+    Returns ``None`` if ``candidate`` is not (and cannot be parsed into) a dict.
+    """
+    cand = _maybe_parse_json_dict(candidate)
+    if not isinstance(cand, dict):
+        return None
+    nested = cand.get("schema")
+    return nested if isinstance(nested, dict) else cand
+
+
+def _schema_from_json_schema_field(js_wr: Any) -> dict | None:
+    """
+    Extract the JSON Schema object from Chat Completions ``json_schema`` payload.
+
+    Accepts:
+    - ``JsonSchemaResponseFormat`` (Pydantic) with ``schema`` / ``json_schema`` field
+    - dict in OpenAI shape ``{"name": ..., "schema": {...}}``
+    - dict with ``json_schema`` key holding either the schema or a nested wrapper
+    - dict that is already a JSON Schema document (some clients omit the wrapper)
+    - JSON strings for any of the above
+    """
+    if js_wr is None:
+        return None
+
+    parsed_wr = _maybe_parse_json_dict(js_wr)
+    if parsed_wr is not None:
+        js_wr = parsed_wr
+
+    if hasattr(js_wr, "model_dump"):
+        for by_alias in (True, False):
+            try:
+                data = js_wr.model_dump(by_alias=by_alias, exclude_none=False)
+            except TypeError:
+                data = js_wr.model_dump(by_alias=by_alias)
+            out = _unwrap_nested_schema(data.get("schema") or data.get("json_schema"))
+            if out is not None:
+                return out
+        inner_attr = getattr(js_wr, "json_schema", None)
+        return inner_attr if isinstance(inner_attr, dict) else None
+
+    if isinstance(js_wr, dict):
+        for key in ("schema", "json_schema"):
+            out = _unwrap_nested_schema(js_wr.get(key))
+            if out is not None:
+                return out
+        return js_wr
+
+    return None
+
+
+def _schema_dict_from_chat_response_format(
+    rf: AnyResponseFormat | dict | None,
+) -> dict | None:
+    """JSON schema dict from Chat Completions ``request.response_format`` only."""
+    if rf is None:
+        return None
+    rf_type = _response_format_type(rf)
+    if rf_type == "json_object":
+        return {"type": "object"}
+    if rf_type != "json_schema":
+        return None
+    js_wr = (
+        rf.get("json_schema")
+        if isinstance(rf, dict)
+        else getattr(rf, "json_schema", None)
+    )
+    return _schema_from_json_schema_field(js_wr)
+
+
+def _schema_dict_from_structured_outputs(
+    so: StructuredOutputsParams | None,
+) -> dict | None:
+    """Schema dict from ``structured_outputs`` (``json`` / ``json_object``).
+
+    Same unwrapping as ``json_schema``. ``json`` is expected to be ``str`` or
+    ``dict`` (enforced by ``StructuredOutputsParams`` / request models); other
+    types raise ``ValueError`` only if a caller bypasses that validation.
+    """
+    if so is None:
+        return None
+    if so.json_object:
+        return {"type": "object"}
+    raw: Any = so.json
+    if raw is None:
+        return None
+
+    if hasattr(raw, "model_dump"):
+        out = _schema_from_json_schema_field(raw)
+        if out is None:
+            raise ValueError(
+                "structured_outputs.json model has no extractable JSON Schema."
+            )
+        return out
+
+    if isinstance(raw, str):
+        if not raw.strip():
+            raise ValueError("structured_outputs.json cannot be empty.")
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError as e:
+            raise ValueError("structured_outputs.json must be valid JSON.") from e
+        if not isinstance(raw, dict):
+            raise ValueError("structured_outputs.json must decode to a JSON object.")
+
+    if isinstance(raw, Mapping):
+        body = raw if isinstance(raw, dict) else dict(raw)
+        return _schema_from_json_schema_field(body) or body
+
+    raise ValueError(
+        f"structured_outputs.json has unsupported type {type(raw).__name__}."
+    )
+
+
+class BaseCohereCommandReasoningParser(ReasoningParser):
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        *args,
+        streaming_opts: PyFilterOptions,
+        unary_opts: PyFilterOptions,
+        **kwargs,
+    ):
+        super().__init__(tokenizer, *args, **kwargs)
+        self.end_token_id = tokenizer.convert_tokens_to_ids("<|END_THINKING|>")
+        self.unary_opts = unary_opts
+        self.melody_unary = PyFilter(unary_opts)
+        self.melody_streaming = PyFilter(streaming_opts)
+
+    @property
+    def reasoning_start_str(self) -> str | None:
+        return "<|START_THINKING|>"
+
+    @property
+    def reasoning_end_str(self) -> str | None:
+        return "<|END_THINKING|>"
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> DeltaMessage | None:
+        r = self.melody_streaming.write_decoded(delta_text)
+        if r.content is None and r.reasoning is None and not r.tool_calls:
+            return None
+        msg = DeltaMessage()
+        if r.content is not None:
+            msg.content = r.content
+        if r.reasoning is not None:
+            msg.reasoning = r.reasoning
+        if r.tool_calls:
+            msg.tool_calls = [
+                DeltaToolCall(
+                    id=tc.id,
+                    index=tc.index,
+                    type="function",
+                    function=DeltaFunctionCall(name=tc.name, arguments=tc.arguments),
+                )
+                for tc in r.tool_calls
+            ]
+        return msg
+
+    def extract_reasoning(
+        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
+    ) -> tuple[str | None, str | None]:
+        result = self.melody_unary.process_full_text(model_output)
+        return result.reasoning, result.content
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        token_buf: list[int] = []
+        content_ids: list[int] = []
+        content_filter = PyFilter(self.unary_opts)
+        for t in input_ids:
+            token_buf.append(t)
+            s = self.model_tokenizer.decode(token_buf, skip_special_tokens=False)
+            if s.endswith(REPLACEMENT_CHAR):
+                continue
+            r = content_filter.write_decoded(s)
+            if r.content is not None:
+                content_ids.extend(token_buf)
+            token_buf = []
+        return content_ids
+
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
+        return any(tid == self.end_token_id for tid in reversed(input_ids))
+
+    def prepare_structured_tag(
+        self, original_tag: str | None, tool_server: ToolServer | None
+    ) -> str | None:
+        # Responses API replaces ``structural_tag`` via the reasoning parser.
+        # Default ``ReasoningParser.prepare_structured_tag`` returns None, which
+        # would clear a Cohere tag produced in ``adjust_request`` and break
+        # ``StructuredOutputsParams`` validation. Preserve the existing tag.
+        return original_tag
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        so = request.structured_outputs
+        if so is not None and so.structural_tag:
+            return request
+        # Schema: prefer ``response_format`` (OpenAI Chat Completions), then
+        # ``structured_outputs.json`` / ``json_object`` (vLLM direct). Tools stay
+        # on ``request.tools``.
+        rf = (
+            request.response_format
+            if isinstance(request, ChatCompletionRequest)
+            else None
+        )
+        if rf is not None and _response_format_type(rf) == "structural_tag":
+            return request
+        model_architecture = (
+            self._model_config.architecture if self._model_config is not None else None
+        )
+        tools = request.tools
+        # ``response_format`` wins if both it and ``structured_outputs`` supply JSON.
+        schema = _schema_dict_from_chat_response_format(rf)
+        if schema is None:
+            schema = _schema_dict_from_structured_outputs(so)
+        if schema is None and not _has_effective_tools(tools):
+            return request
+        if model_architecture is None:
+            return request
+        result = convert_schema_to_structural_tags(
+            schema=schema,
+            tools=tools,
+            model_architecture=model_architecture,
+        )
+        if result is None:
+            # Unsupported architectures are not in ``MODEL_TO_TAG_STYLE``.
+            raise ValueError(
+                "Failed to build structural_tag guided decoding constraints from "
+                "this request's JSON schema and/or tools. The configured model "
+                f"architecture ({model_architecture!r}) does not support Cohere "
+                "command structural tags, or the schema cannot be expressed in "
+                "that format."
+            )
+        request.structured_outputs = StructuredOutputsParams(structural_tag=result)
+        # Folded JSON constraints into ``structural_tag``; drop ``response_format``
+        # when it was the source so ``to_sampling_params`` does not also set ``json`` /
+        # ``json_object`` (mutually exclusive in ``StructuredOutputsParams``).
+        if isinstance(request, ChatCompletionRequest) and rf is not None:
+            rf_type = _response_format_type(rf)
+            if rf_type in ("json_schema", "json_object"):
+                request.response_format = None
+        return request
+
+
+class CohereCommand3ReasoningParser(BaseCohereCommandReasoningParser):
+    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+        super().__init__(
+            tokenizer,
+            *args,
+            streaming_opts=PyFilterOptions().cmd3(),
+            unary_opts=PyFilterOptions().cmd3().no_tools(),
+            **kwargs,
+        )
+
+
+class CohereCommand4ReasoningParser(BaseCohereCommandReasoningParser):
+    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+        super().__init__(
+            tokenizer,
+            *args,
+            streaming_opts=PyFilterOptions().cmd4(),
+            unary_opts=PyFilterOptions().cmd4().no_tools(),
+            **kwargs,
+        )
diff --git a/vllm/reasoning/deepseek_v3_reasoning_parser.py b/vllm/reasoning/deepseek_v3_reasoning_parser.py
index d2f7f50a3284..bb79afd8dede 100644
--- a/vllm/reasoning/deepseek_v3_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_v3_reasoning_parser.py
@@ -40,6 +40,14 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
         else:
             self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)
 
+    @property
+    def reasoning_start_str(self) -> str | None:
+        return self._parser.reasoning_start_str
+
+    @property
+    def reasoning_end_str(self) -> str | None:
+        return self._parser.reasoning_end_str
+
     def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
         return self._parser.is_reasoning_end(input_ids)
 
diff --git a/vllm/reasoning/gemma4_reasoning_parser.py b/vllm/reasoning/gemma4_reasoning_parser.py
new file mode 100644
index 000000000000..6f2241603f9a
--- /dev/null
+++ b/vllm/reasoning/gemma4_reasoning_parser.py
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from typing import TYPE_CHECKING
+
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
+from vllm.tokenizers import TokenizerLike
+
+if TYPE_CHECKING:
+    from vllm.entrypoints.openai.chat_completion.protocol import (
+        ChatCompletionRequest,
+    )
+    from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+
+# Role label that Gemma4 emits at the start of the thinking channel.
+# The model generates: <|channel>thought\n...reasoning...<channel|>
+# This prefix must be stripped to expose only the actual reasoning content.
+_THOUGHT_PREFIX = "thought\n"
+
+
+class Gemma4ReasoningParser(BaseThinkingReasoningParser):
+    """
+    Reasoning parser for Google Gemma4 thinking models.
+
+    Gemma4 uses <|channel>...<channel|> tokens to delimit reasoning/thinking
+    content within its output. Thinking mode is activated by passing
+    ``enable_thinking=True`` in the chat template kwargs, which injects a
+    system turn containing <|think|> (token 98) to trigger chain-of-thought
+    reasoning.
+
+    Output pattern when thinking is enabled::
+
+        <|channel>thought
+        ...chain of thought reasoning...<channel|>
+        Final answer text here.
+
+    The ``thought\\n`` role label inside the channel delimiters is a
+    structural artefact (analogous to ``user\\n`` in ``<|turn>user\\n...``).
+    This parser strips it so that downstream consumers see only the
+    actual reasoning text, consistent with the offline parser
+    (``vllm.reasoning.gemma4_utils._strip_thought_label``).
+    """
+
+    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+        super().__init__(tokenizer, *args, **kwargs)
+        # Instance state for streaming prefix stripping.
+        # Tracks only the reasoning text received from the base parser,
+        # independent of current_text (which may contain pre-reasoning
+        # content and lacks special token text due to
+        # skip_special_tokens=True).
+        self._reasoning_text: str = ""
+        self._prefix_stripped: bool = False
+        self.new_turn_token_id = self.vocab["<|turn>"]
+        self.tool_call_token_id = self.vocab["<|tool_call>"]
+        self.tool_response_token_id = self.vocab["<|tool_response>"]
+
+    def adjust_request(
+        self, request: "ChatCompletionRequest | ResponsesRequest"
+    ) -> "ChatCompletionRequest | ResponsesRequest":
+        """Disable special-token stripping to preserve boundary tokens."""
+        request.skip_special_tokens = False
+        return request
+
+    @property
+    def start_token(self) -> str:
+        """The token that starts reasoning content."""
+        return "<|channel>"
+
+    @property
+    def end_token(self) -> str:
+        """The token that ends reasoning content."""
+        return "<channel|>"
+
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
+        start_token_id = self.start_token_id
+        end_token_id = self.end_token_id
+        new_turn_token_id = self.new_turn_token_id
+        tool_call_token_id = self.tool_call_token_id
+        tool_response_token_id = self.tool_response_token_id
+
+        # Search from the end of input_ids to find the last match.
+        for i in range(len(input_ids) - 1, -1, -1):
+            if input_ids[i] == start_token_id:
+                return False
+            if input_ids[i] == tool_call_token_id:
+                # We're generating a tool call, so reasoning must be ended.
+                return True
+            if input_ids[i] in (new_turn_token_id, tool_response_token_id):
+                # We found a new turn or tool response token so don't consider
+                # reasoning ended yet, since the model starts new reasoning
+                # after these tokens.
+                return False
+            if input_ids[i] == end_token_id:
+                return True
+        return False
+
+    # ------------------------------------------------------------------
+    # Non-streaming path
+    # ------------------------------------------------------------------
+
+    def extract_reasoning(
+        self,
+        model_output: str,
+        request: "ChatCompletionRequest | ResponsesRequest",
+    ) -> tuple[str | None, str | None]:
+        """Extract reasoning, stripping the ``thought\\n`` role label."""
+        if self.start_token not in model_output and self.end_token not in model_output:
+            # Default to content history if no tags are present
+            # (or if they were stripped)
+            return None, model_output
+
+        reasoning, content = super().extract_reasoning(model_output, request)
+        if reasoning is not None:
+            reasoning = _strip_thought_label(reasoning)
+        return reasoning, content
+
+    # ------------------------------------------------------------------
+    # Streaming path
+    # ------------------------------------------------------------------
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> DeltaMessage | None:
+        """Extract streaming reasoning, stripping ``thought\\n`` from the
+        first reasoning delta(s).
+
+        The ``thought\\n`` prefix may arrive as a single delta or split
+        across multiple deltas (e.g. ``"thought"`` then ``"\\n"``). We
+        buffer early reasoning tokens until we can determine whether the
+        prefix is present, then emit the buffered content minus the
+        prefix.
+
+        Unlike the previous implementation which reconstructed accumulated
+        reasoning from ``current_text``, this uses instance state
+        (``_reasoning_text``) to track only the reasoning content returned
+        by the base parser. This is necessary because
+        ``skip_special_tokens=True`` (the vLLM default) causes the
+        ``<|channel>`` delimiter to be invisible in ``current_text``,
+        making it impossible to separate pre-reasoning content from
+        reasoning content via string matching.
+        """
+        result = super().extract_reasoning_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+        )
+        if result is None:
+            return None
+
+        if result.reasoning is None:
+            return result
+
+        # Accumulate ONLY the reasoning text from base parser results.
+        # This is immune to pre-reasoning content pollution.
+        self._reasoning_text += result.reasoning
+
+        # Once the prefix has been handled, all subsequent reasoning
+        # deltas pass through unchanged.
+        if self._prefix_stripped:
+            return result
+
+        # ---- Prefix stripping logic ----
+
+        # Case 1: We've accumulated enough to confirm the prefix is
+        # present. Strip it and pass through the remainder.
+        if self._reasoning_text.startswith(_THOUGHT_PREFIX):
+            prefix_len = len(_THOUGHT_PREFIX)
+            # How much reasoning was accumulated before this delta?
+            prev_reasoning_len = len(self._reasoning_text) - len(result.reasoning)
+            if prev_reasoning_len >= prefix_len:
+                # Prefix was already consumed by prior deltas; this
+                # delta is entirely real content — pass through.
+                self._prefix_stripped = True
+                return result
+            else:
+                # Part or all of the prefix is in this delta.
+                chars_of_prefix_in_delta = prefix_len - prev_reasoning_len
+                stripped = result.reasoning[chars_of_prefix_in_delta:]
+                if stripped:
+                    self._prefix_stripped = True
+                    result.reasoning = stripped
+                    return result
+                else:
+                    if len(self._reasoning_text) >= prefix_len:
+                        self._prefix_stripped = True
+                        result.reasoning = ""
+                        return result
+                    return None
+
+        # Case 2: Accumulated text is a strict prefix of
+        # _THOUGHT_PREFIX (e.g. we've only seen "thou" so far).
+        # Buffer by suppressing — we can't yet tell if this will
+        # become the full prefix or diverge.
+        if _THOUGHT_PREFIX.startswith(self._reasoning_text):
+            return None
+
+        # Case 3: Accumulated text doesn't match the thought prefix
+        # at all. This means prior deltas were buffered (suppressed
+        # by Case 2) but the text diverged. Re-emit the full
+        # accumulated text to avoid data loss.
+        self._prefix_stripped = True
+        result.reasoning = self._reasoning_text
+        return result
+
+
+def _strip_thought_label(text: str) -> str:
+    """Remove the ``thought\\n`` role label from the beginning of text.
+
+    Mirrors ``vllm.reasoning.gemma4_utils._strip_thought_label`` from the
+    offline parser.
+    """
+    if text.startswith(_THOUGHT_PREFIX):
+        return text[len(_THOUGHT_PREFIX) :]
+    return text
diff --git a/vllm/reasoning/gemma4_utils.py b/vllm/reasoning/gemma4_utils.py
new file mode 100644
index 000000000000..9cdac72039e8
--- /dev/null
+++ b/vllm/reasoning/gemma4_utils.py
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Copyright 2025 Google Inc. HuggingFace Inc. team. All rights reserved.
+
+"""Gemma4 thinking/reasoning output parsing utilities for offline inference.
+
+Standalone functions that parse decoded model text to extract structured
+thinking content from Gemma4 models. These are pure-Python utilities with
+zero heavy dependencies — they work on raw decoded strings from any
+inference backend (vLLM, HuggingFace, TGI, etc.).
+
+For the OpenAI-compatible API reasoning parser (streaming +
+non-streaming), see ``vllm.reasoning.gemma4_reasoning_parser``.
+For tool call parsing, see ``vllm.tool_parsers.gemma4_utils``.
+
+Usage with vLLM offline inference::
+
+    from vllm import LLM, SamplingParams
+    from vllm.reasoning.gemma4_utils import parse_thinking_output
+
+    llm = LLM(model="google/gemma-4-it")
+    outputs = llm.generate(prompt, SamplingParams(...))
+    text = tokenizer.decode(outputs[0].outputs[0].token_ids, skip_special_tokens=False)
+
+    # Extract thinking / answer (works with or without enable_thinking)
+    result = parse_thinking_output(text)
+    print(result["thinking"])  # chain-of-thought or None
+    print(result["answer"])  # final answer
+
+Ported from ``transformers.models.gemma4.utils_gemma4`` so that vLLM users
+do not need a transformers dependency for output parsing.
+"""
+
+# ---- Thinking Mode Utility ----
+
+# Thinking delimiter tokens as they appear in decoded text.
+# Gemma4 uses <|channel> (start) and <channel|> (end) as thinking delimiters.
+_THINKING_START_TAG = "<|channel>"
+_THINKING_END_TAG = "<channel|>"
+
+# Sentinel tokens that may appear in decoded output.
+_TURN_END_TAG = "<turn|>"
+
+
+def parse_thinking_output(text: str) -> dict[str, str | None]:
+    """Parse decoded Gemma4 model output.
+
+    Use this on **all** Gemma4 output regardless of whether thinking mode
+    was enabled.  It handles three cases:
+
+    1. **Thinking enabled, tags present** — splits on ``<|channel>``/
+       ``<channel|>`` to separate chain-of-thought from the answer and
+       strips the ``thought\\n`` role label.
+    2. **Thinking disabled, spurious label** — strips the bare
+       ``thought\\n`` prefix that some Gemma4 models emit even
+       without thinking mode.
+    3. **Clean output** — returns the text unchanged.
+
+    The answer text is always cleaned of trailing sentinel tokens
+    (``<turn|>``, ``<eos>``, etc.).
+
+    Args:
+        text: Decoded model output text (from ``tokenizer.decode(...)``).
+
+    Returns:
+        A dict with keys:
+            - ``"thinking"``: The chain-of-thought text, or ``None`` if no
+              thinking delimiters were found.
+            - ``"answer"``: The final answer text.
+
+    Example::
+
+        >>> from vllm.reasoning.gemma4_utils import parse_thinking_output
+        >>> output_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        >>> result = parse_thinking_output(output_text)
+        >>> print(result["thinking"])  # chain-of-thought reasoning or None
+        >>> print(result["answer"])    # final answer
+    """
+    if _THINKING_END_TAG in text:
+        parts = text.split(_THINKING_END_TAG, 1)
+        thinking_block = parts[0]
+        answer = _clean_answer(parts[1])
+
+        # Extract thinking content: strip the start tag if present
+        if _THINKING_START_TAG in thinking_block:
+            thinking = thinking_block.split(_THINKING_START_TAG, 1)[1]
+        else:
+            thinking = thinking_block
+
+        # Strip the "thought\n" channel role label the model emits inside
+        # <|channel>thought\n...<channel|> (analogous to "user\n" in
+        # <|turn>user\n...<turn|>).
+        thinking = _strip_thought_label(thinking.strip())
+        thinking = thinking.strip()
+
+        return {"thinking": thinking, "answer": answer}
+
+    # No thinking delimiters found.
+    # Strip spurious "thought\n" role label that some Gemma4 models sometimes
+    # emit even without thinking mode enabled, then clean trailing tokens.
+    answer = _strip_thought_label(text)
+    answer = _clean_answer(answer)
+    return {"thinking": None, "answer": answer}
+
+
+def _strip_thought_label(text: str) -> str:
+    """Strip the spurious ``thought\\n`` label from the start of text.
+
+    Only strips when ``thought`` appears as the very first word followed by
+    a newline — preserving the word ``thought`` in any other context.
+    """
+    if text.startswith("thought\n"):
+        return text[len("thought\n") :]
+    return text
+
+
+def _clean_answer(text: str) -> str:
+    """Clean trailing sentinel tokens from the answer text.
+
+    Strips ``<turn|>``, ``<eos>``, and surrounding whitespace that the
+    model appends at the end of its response.
+    """
+    text = text.strip()
+    # Strip trailing <turn|> (Gemma4 turn-end marker)
+    if text.endswith(_TURN_END_TAG):
+        text = text[: -len(_TURN_END_TAG)].rstrip()
+    # Strip trailing <eos> if present
+    if text.endswith("<eos>"):
+        text = text[:-5].rstrip()
+    return text
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 89299d4b12b8..1ba933cca31e 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
-from collections.abc import Sequence
+from collections.abc import Iterable, Sequence
 from typing import TYPE_CHECKING
 
 from transformers import PreTrainedTokenizerBase
@@ -112,6 +112,25 @@ def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
                         return True
         return False
 
+    def is_reasoning_end_streaming(
+        self, input_ids: Sequence[int], delta_ids: Iterable[int]
+    ) -> bool:
+        # The pattern window covers the end-of-reasoning marker itself.
+        # We add len(delta_ids) so that under speculative decoding (where
+        # a single step can accept many tokens) the entire accepted chunk
+        # is always inside the scan region.
+        delta_ids = tuple(delta_ids)
+        pattern_len = (
+            len(self.reasoning_end_token_ids_prefix)
+            + self.reasoning_max_num_between_tokens
+            + len(self.reasoning_end_token_ids_suffix)
+        )
+        window = pattern_len + len(delta_ids)
+        n = len(input_ids)
+        if n <= window:
+            return self.is_reasoning_end(input_ids)
+        return self.is_reasoning_end(input_ids[n - window :])
+
     def extract_content_ids(self, input_ids: list[int]) -> list[int]:
         _, content, _ = parse_chat_output(input_ids)
         if content is None:
diff --git a/vllm/reasoning/hy_v3_reasoning_parser.py b/vllm/reasoning/hy_v3_reasoning_parser.py
new file mode 100644
index 000000000000..5beac22996dd
--- /dev/null
+++ b/vllm/reasoning/hy_v3_reasoning_parser.py
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Iterable, Sequence
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.logger import init_logger
+from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
+from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
+from vllm.tokenizers import TokenizerLike
+
+logger = init_logger(__name__)
+
+
+class HYV3ReasoningParser(BaseThinkingReasoningParser):
+    """
+    HYV3 parser that delegates to either HYV3ReasoningParser or
+    IdentityReasoningParser based on `reasoning_effort`.
+
+    The HYV3 model uses <think>...</think> tokens to denote reasoning text.
+    This parser extracts the reasoning content from the model output.
+    """
+
+    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
+        super().__init__(tokenizer, *args, **kwargs)
+
+        # First, If there is reasoning_effort in chat_kwargs,
+        # prioritize using chat_kwargs.reasoning_effort.
+        # If it's not present, use the "reasoning_effort" field
+        # at the outer level of the chat message.
+        # Otherwise, If both are empty, assign "no_think".
+
+        chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {}
+        reasoning_effort = (
+            chat_kwargs.get("reasoning_effort")
+            or kwargs.get("reasoning_effort")
+            or "no_think"
+        )
+
+        logger.debug("reasoning_effort for choosing parser: %s", reasoning_effort)
+
+        self._identity_parser: IdentityReasoningParser | None
+        if reasoning_effort == "no_think":
+            self._identity_parser = IdentityReasoningParser(tokenizer, *args, **kwargs)
+        else:
+            self._identity_parser = None
+
+    @property
+    def start_token(self) -> str:
+        """The token that starts reasoning content."""
+        return "<think>"
+
+    @property
+    def end_token(self) -> str:
+        """The token that ends reasoning content."""
+        return "</think>"
+
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
+        if self._identity_parser is not None:
+            return self._identity_parser.is_reasoning_end(input_ids)
+
+        return super().is_reasoning_end(input_ids)
+
+    def is_reasoning_end_streaming(
+        self, input_ids: Sequence[int], delta_ids: Iterable[int]
+    ) -> bool:
+        if self._identity_parser is not None:
+            return self._identity_parser.is_reasoning_end_streaming(
+                input_ids, delta_ids
+            )
+
+        return super().is_reasoning_end_streaming(input_ids, delta_ids)
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        if self._identity_parser is not None:
+            return self._identity_parser.extract_content_ids(input_ids)
+
+        return super().extract_content_ids(input_ids)
+
+    def extract_reasoning(
+        self, model_output: str, request: "ChatCompletionRequest | ResponsesRequest"
+    ) -> tuple[str | None, str | None]:
+        if self._identity_parser is not None:
+            return self._identity_parser.extract_reasoning(model_output, request)
+
+        return super().extract_reasoning(model_output, request)
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+    ) -> DeltaMessage | None:
+        if self._identity_parser is not None:
+            return self._identity_parser.extract_reasoning_streaming(
+                previous_text,
+                current_text,
+                delta_text,
+                previous_token_ids,
+                current_token_ids,
+                delta_token_ids,
+            )
+
+        ret = super().extract_reasoning_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+        )
+        if (
+            ret is not None
+            and self.start_token_id not in previous_token_ids
+            and self.start_token_id not in delta_token_ids
+        ):
+            if self.end_token_id in delta_token_ids:
+                # end token in delta with more tokens,
+                # extract reasoning content and content
+                end_index = delta_text.find(self.end_token)
+                reasoning = delta_text[:end_index]
+                content = delta_text[end_index + len(self.end_token) :]
+                return DeltaMessage(
+                    reasoning=reasoning,
+                    content=content if content else None,
+                )
+            elif self.end_token_id in previous_token_ids:
+                # end token in previous, thinking content ends
+                return DeltaMessage(content=delta_text)
+            else:
+                # no end token in previous or delta, reasoning content continues
+                return DeltaMessage(reasoning=delta_text)
+
+        return ret
diff --git a/vllm/reasoning/identity_reasoning_parser.py b/vllm/reasoning/identity_reasoning_parser.py
index b02a9d3184ae..c6f117e2f983 100644
--- a/vllm/reasoning/identity_reasoning_parser.py
+++ b/vllm/reasoning/identity_reasoning_parser.py
@@ -33,6 +33,14 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
                 "constructor during construction."
             )
 
+    @property
+    def reasoning_start_str(self) -> str | None:
+        return None
+
+    @property
+    def reasoning_end_str(self) -> str | None:
+        return None
+
     def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
         # Always return True, since we never treat reasoning specially
         return True
diff --git a/vllm/reasoning/kimi_k2_reasoning_parser.py b/vllm/reasoning/kimi_k2_reasoning_parser.py
index 8ee05ffd23a0..0b64c5c62ea1 100644
--- a/vllm/reasoning/kimi_k2_reasoning_parser.py
+++ b/vllm/reasoning/kimi_k2_reasoning_parser.py
@@ -65,6 +65,14 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
                 "tokens in the tokenizer!"
             )
 
+    @property
+    def reasoning_start_str(self) -> str | None:
+        return self._start_token
+
+    @property
+    def reasoning_end_str(self) -> str | None:
+        return self._end_token
+
     def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
         """
         Check if the reasoning content ends in the input_ids.
@@ -213,6 +221,10 @@ def extract_reasoning_streaming(
             return None
 
         if self._end_token_id in delta_token_ids:
+            if self._end_token not in delta_text:
+                # Token ID arrived before text was flushed (stop-sequence buffering).
+                # Wait for the next delta when the text becomes visible.
+                return None
             end_index = delta_text.find(self._end_token)
             reasoning = delta_text[:end_index]
             content = delta_text[end_index + len(self._end_token) :]
@@ -221,6 +233,9 @@ def extract_reasoning_streaming(
             )
 
         if self._tool_section_start_token_id in delta_token_ids:
+            if self._tool_section_start_token not in delta_text:
+                # Token ID arrived before text was flushed (stop-sequence buffering).
+                return None
             tool_index = delta_text.find(self._tool_section_start_token)
             reasoning = delta_text[:tool_index]
             content = delta_text[tool_index:]
diff --git a/vllm/reasoning/nemotron_v3_reasoning_parser.py b/vllm/reasoning/nemotron_v3_reasoning_parser.py
index 52a57ccc8e93..7256f0f1283d 100644
--- a/vllm/reasoning/nemotron_v3_reasoning_parser.py
+++ b/vllm/reasoning/nemotron_v3_reasoning_parser.py
@@ -26,7 +26,7 @@ def extract_reasoning(
                 chat_template_kwargs.get("enable_thinking") is False
                 or chat_template_kwargs.get("force_nonempty_content") is True
             )
-            and final_content is None
+            and (final_content is None or not final_content.strip())
         ):
             reasoning, final_content = final_content, reasoning
 
diff --git a/vllm/reasoning/olmo3_reasoning_parser.py b/vllm/reasoning/olmo3_reasoning_parser.py
index 9697b500447f..102508b9ac18 100644
--- a/vllm/reasoning/olmo3_reasoning_parser.py
+++ b/vllm/reasoning/olmo3_reasoning_parser.py
@@ -218,28 +218,51 @@ class Olmo3ReasoningParser(ReasoningParser):
           token is missing from generation.
     """
 
+    think_start: str = r"<think>"
+    think_end: str = r"</think>"
+    # </think> is split in 3 by the pre-tokenizer, first split can be tokenized
+    # with an optional leading space, so there are 2 possible tokenizations
+    think_end_first_split: list[str] = [r"Ġ</", r"</"]
+    think_end_rest_split: list[str] = [r"think", r">"]
+    # notice that the first think is optional; this allows template to
+    # work in cases when we hardcode a <think> at the beginning of the
+    # reasoning template.
+    reasoning_regex: re.Pattern = re.compile(
+        rf"^(?:{think_start})?(?P<reasoning>.*?)"
+        rf"{think_end}(?P<content>.*)$",
+        re.DOTALL,
+    )
+
     def __init__(self, tokenizer: "TokenizerLike", *args, **kwargs):
         super().__init__(tokenizer, *args, **kwargs)
-
-        self.think_start = r"<think>"
-        self.think_end = r"</think>"
-
-        # notice that the first think is optional; this allows template to
-        # work in cases when we hardcode a <think> at the beginning of the
-        # reasoning template.
-        reasoning_expr = (
-            rf"^(?:{self.think_start})?(?P<reasoning>.*?)"
-            rf"{self.think_end}(?P<content>.*)$"
-        )
-        self.reasoning_regex = re.compile(reasoning_expr, re.DOTALL)
-
         self.buffer = Olmo3ReasoningBuffer(
             think_start=self.think_start, think_end=self.think_end
         )
+        self.think_end_first_token_ids: list[int] = [
+            self.vocab[token] for token in self.think_end_first_split
+        ]
+        self.think_end_rest_token_ids: list[int] = [
+            self.vocab[token] for token in self.think_end_rest_split
+        ]
+
+    @property
+    def reasoning_start_str(self) -> str:
+        return self.think_start
+
+    @property
+    def reasoning_end_str(self) -> str:
+        return self.think_end
 
     def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
-        text = self.model_tokenizer.decode(input_ids)
-        return self.think_end in text
+        rest_ids = self.think_end_rest_token_ids
+        rest_len = len(rest_ids)
+        for i in range(len(input_ids) - rest_len, -1, -1):
+            if (
+                list(input_ids[i + 1 : i + 1 + rest_len]) == rest_ids
+                and input_ids[i] in self.think_end_first_token_ids
+            ):
+                return True
+        return False
 
     def extract_content_ids(self, input_ids: list[int]) -> list[int]:
         # for Olmo 3 streaming reason parsing, the stream parse
diff --git a/vllm/reasoning/poolside_v1_reasoning_parser.py b/vllm/reasoning/poolside_v1_reasoning_parser.py
new file mode 100644
index 000000000000..30031d8513a9
--- /dev/null
+++ b/vllm/reasoning/poolside_v1_reasoning_parser.py
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Laguna reasoning parser.
+
+``DeepSeekV3ReasoningParser.is_reasoning_end`` walks the entire
+token sequence backwards and returns ``True`` on the first ``</think>`` it
+sees. When called on ``prompt_token_ids`` that mistakes any stray
+``</think>`` in conversation history, few-shot examples or tool descriptions
+for a template-injected "thinking already ended" marker. In the streaming
+path (see ``vllm/entrypoints/openai/chat_completion/serving.py``,
+``prompt_is_reasoning_end_arr``) that false positive short-circuits the
+reasoning parser for the whole response, so any ``<think>...</think>`` the
+model emits itself ends up in the content field instead of the reasoning
+field.
+
+As we have more flexible templates, we instead scope
+the backward search to the current assistant turn: the
+walk terminates as soon as we hit the ``<assistant>`` start-of-message
+token. A ``</think>`` in a prior user turn or few-shot example is no longer
+visible.
+"""
+
+from collections.abc import Sequence
+
+from transformers import PreTrainedTokenizerBase
+
+from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
+from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
+from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
+
+
+class PoolsideV1ReasoningParser(DeepSeekV3ReasoningParser):
+    """Drop-in replacement for ``deepseek_v3`` that tolerates ``</think>``
+    tokens appearing anywhere in the prompt other than the generation prefix.
+    """
+
+    _start_of_assistant_message = "<assistant>"
+
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+        super().__init__(tokenizer, *args, **kwargs)
+
+        if self._start_of_assistant_message not in self.vocab:
+            raise ValueError(
+                f"Tokenizer must contain {self._start_of_assistant_message!r} token"
+            )
+        self._start_of_assistant_message_token_id = self.vocab[
+            self._start_of_assistant_message
+        ]
+
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
+        # IdentityReasoningParser always returns True: no reasoning to parse.
+        if isinstance(self._parser, IdentityReasoningParser):
+            return True
+
+        assert isinstance(self._parser, DeepSeekR1ReasoningParser)
+        for tok_id in reversed(input_ids):
+            # <think>: reasoning is not yet ended.
+            if tok_id == self._parser.start_token_id:
+                return False
+            # </think>: reasoning has ended.
+            if tok_id == self._parser.end_token_id:
+                return True
+            # <assistant>: reached the start of the current assistant turn
+            # without seeing either marker. Anything further back belongs to
+            # the prior conversation and should be ignored.
+            if tok_id == self._start_of_assistant_message_token_id:
+                return False
+        return False
+
+
+__all__ = ["PoolsideV1ReasoningParser"]
diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py
index 9a54aa759518..e38b0de3d822 100644
--- a/vllm/reasoning/qwen3_reasoning_parser.py
+++ b/vllm/reasoning/qwen3_reasoning_parser.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from collections.abc import Sequence
+from collections.abc import Iterable, Sequence
 from typing import TYPE_CHECKING
 
 from vllm.entrypoints.openai.engine.protocol import DeltaMessage
@@ -31,6 +31,10 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
     use an older chat template where the model generates <think> itself.
     This parser handles both styles: if <think> appears in the generated output
     it is stripped before extraction (non-streaming) or skipped (streaming).
+
+    NOTE: Qwen3.5 models may emit <tool_call> inside the thinking block
+    without closing </think> first. <tool_call> is treated as an implicit
+    end of reasoning, matching the approach in KimiK2ReasoningParser.
     """
 
     def __init__(self, tokenizer: "TokenizerLike", *args, **kwargs):
@@ -41,6 +45,11 @@ def __init__(self, tokenizer: "TokenizerLike", *args, **kwargs):
         # pure content when the user explicitly disables it.
         self.thinking_enabled = chat_kwargs.get("enable_thinking", True)
 
+        self._tool_call_tag = "<tool_call>"
+        self._tool_call_token_id = self.vocab.get(self._tool_call_tag)
+        self._tool_call_end_tag = "</tool_call>"
+        self._tool_call_end_token_id = self.vocab.get(self._tool_call_end_tag)
+
     @property
     def start_token(self) -> str:
         """The token that starts reasoning content."""
@@ -51,6 +60,58 @@ def end_token(self) -> str:
         """The token that ends reasoning content."""
         return "</think>"
 
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
+        start_token_id = self.start_token_id
+        end_token_id = self.end_token_id
+        tool_call_token_id = self._tool_call_token_id
+        tool_call_end_token_id = self._tool_call_end_token_id
+
+        for i in range(len(input_ids) - 1, -1, -1):
+            token_id = input_ids[i]
+            if token_id == start_token_id:
+                # Found <think> before </think> or <tool_call>
+                return False
+            if token_id == end_token_id:
+                return True
+            if tool_call_token_id is not None and token_id == tool_call_token_id:
+                # Only treat as implicit reasoning end if this <tool_call>
+                # is NOT followed by </tool_call>.  Paired occurrences are
+                # template examples in the prompt, not model output.
+                if tool_call_end_token_id is not None and any(
+                    input_ids[j] == tool_call_end_token_id
+                    for j in range(i + 1, len(input_ids))
+                ):
+                    continue
+                return True
+        return False
+
+    def is_reasoning_end_streaming(
+        self, input_ids: Sequence[int], delta_ids: Iterable[int]
+    ) -> bool:
+        if super().is_reasoning_end_streaming(input_ids, delta_ids):
+            return True
+        if self._tool_call_token_id is not None:
+            return self._tool_call_token_id in delta_ids
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        """
+        Extract content token ids from the input_ids.
+        """
+        result = super().extract_content_ids(input_ids)
+        if result:
+            return result
+        # Fall back: content starts at <tool_call> (implicit reasoning end).
+        if (
+            self._tool_call_token_id is not None
+            and self._tool_call_token_id in input_ids
+        ):
+            tool_call_index = (
+                len(input_ids) - 1 - input_ids[::-1].index(self._tool_call_token_id)
+            )
+            return input_ids[tool_call_index:]
+        return []
+
     def extract_reasoning(
         self, model_output: str, request: "ChatCompletionRequest | ResponsesRequest"
     ) -> tuple[str | None, str | None]:
@@ -78,19 +139,23 @@ def extract_reasoning(
             model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
         )
 
-        if self.end_token not in model_output:
-            if not self.thinking_enabled:
-                # Thinking explicitly disabled — treat everything as content.
-                return None, model_output
-            # Thinking enabled but no </think>: output was truncated.
-            # Everything generated so far is reasoning.
-            return model_output, None
+        if self.end_token in model_output:
+            reasoning, _, content = model_output.partition(self.end_token)
+            return reasoning, content or None
 
-        # Extract reasoning content from the model output.
-        reasoning, _, content = model_output.partition(self.end_token)
+        if not self.thinking_enabled:
+            # Thinking explicitly disabled — treat everything as content.
+            return None, model_output
 
-        final_content = content or None
-        return reasoning, final_content
+        # No </think> — check for implicit reasoning end via <tool_call>.
+        tool_call_index = model_output.find(self._tool_call_tag)
+        if tool_call_index != -1:
+            reasoning = model_output[:tool_call_index]
+            content = model_output[tool_call_index:]
+            return reasoning or None, content or None
+        # Thinking enabled but no </think>: output was truncated.
+        # Everything generated so far is reasoning.
+        return model_output, None
 
     def extract_reasoning_streaming(
         self,
@@ -135,6 +200,20 @@ def extract_reasoning_streaming(
             # end_token_id in IDs but not in text (already stripped)
             return None
 
+        # Implicit reasoning end via <tool_call>.
+        if (
+            self._tool_call_token_id is not None
+            and self._tool_call_token_id in delta_token_ids
+        ):
+            tool_index = delta_text.find(self._tool_call_tag)
+            if tool_index >= 0:
+                reasoning = delta_text[:tool_index]
+                content = delta_text[tool_index:]
+                return DeltaMessage(
+                    reasoning=reasoning if reasoning else None,
+                    content=content if content else None,
+                )
+
         # No end token in this delta.
         if not delta_text:
             # Nothing left after stripping start token.
@@ -142,6 +221,11 @@ def extract_reasoning_streaming(
         elif self.end_token_id in previous_token_ids:
             # End token already passed: everything is content now.
             return DeltaMessage(content=delta_text)
+        elif (
+            self._tool_call_token_id is not None
+            and self._tool_call_token_id in previous_token_ids
+        ):
+            return DeltaMessage(content=delta_text)
         else:
             # No end token yet: still in reasoning phase.
             return DeltaMessage(reasoning=delta_text)
diff --git a/vllm/reasoning/step3_reasoning_parser.py b/vllm/reasoning/step3_reasoning_parser.py
index 5837f0673b7e..a50fcf02db48 100644
--- a/vllm/reasoning/step3_reasoning_parser.py
+++ b/vllm/reasoning/step3_reasoning_parser.py
@@ -29,6 +29,7 @@ class Step3ReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
         super().__init__(tokenizer, *args, **kwargs)
+        self.think_start_token = "<think>"
         self.think_end_token = "</think>"
 
         self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", re.DOTALL)
@@ -47,6 +48,14 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
             )
         self.think_end_token_id: int = think_end_token_id
 
+    @property
+    def reasoning_start_str(self) -> str:
+        return self.think_start_token
+
+    @property
+    def reasoning_end_str(self) -> str:
+        return self.think_end_token
+
     def extract_reasoning_streaming(
         self,
         previous_text: str,
diff --git a/vllm/renderers/base.py b/vllm/renderers/base.py
index 23f952eff4b9..41d8c0075fb1 100644
--- a/vllm/renderers/base.py
+++ b/vllm/renderers/base.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
-import copy
 import time
 from abc import ABC, abstractmethod
 from collections.abc import Mapping, Sequence
@@ -97,6 +96,7 @@ def __init__(self, config: "VllmConfig", tokenizer: _T | None) -> None:
         self._async_tokenizer: AsyncMicrobatchTokenizer | None = None
 
         self.mm_processor: BaseMultiModalProcessor | None = None
+        self._readonly_mm_processor: BaseMultiModalProcessor | None = None
         self._mm_cache_stats: MultiModalCacheStats | None = None
         self._clear_mm_cache_async = make_async(
             self.clear_mm_cache, executor=self._executor
@@ -104,26 +104,31 @@ def __init__(self, config: "VllmConfig", tokenizer: _T | None) -> None:
         self._process_multimodal_async = make_async(
             self._process_multimodal, executor=self._mm_executor
         )
-        if config.model_config.is_multimodal_model:
+        if mm_registry.supports_multimodal_inputs(config.model_config):
             mm_processor_cache = mm_registry.processor_cache_from_config(config)
 
-            # Deep-copy the tokenizer so the multimodal processor gets its
-            # own Rust tokenizer backend.  Without this, concurrent access
-            # from AsyncMicrobatchTokenizer and call_hf_processor causes
-            # "RuntimeError: Already borrowed" from the Rust RefCell.
-            # See: https://github.com/huggingface/tokenizers/issues/537
-            mm_tokenizer = copy.deepcopy(tokenizer)
-
             with set_default_torch_num_threads():
                 self.mm_processor = mm_registry.create_processor(
                     config.model_config,
-                    tokenizer=mm_tokenizer,
+                    tokenizer=self.tokenizer,
                     cache=mm_processor_cache,
                 )
 
             if mm_processor_cache:
                 self._mm_cache_stats = MultiModalCacheStats()
 
+            # A second processor with its own processor-only cache.
+            # Used by the tokenize endpoint so that tokenize-only
+            # requests don't pollute the sender cache.
+            ro_cache = mm_registry.processor_only_cache_from_config(config)
+            if ro_cache is not None:
+                with set_default_torch_num_threads():
+                    self._readonly_mm_processor = mm_registry.create_processor(
+                        config.model_config,
+                        tokenizer=self.tokenizer,
+                        cache=ro_cache,
+                    )
+
             # This is used to generate internal request ID for MM processing
             # It has no relation to the request ID for engine core
             self._mm_req_counter = AtomicCounter()
@@ -184,6 +189,40 @@ def clear_mm_cache(self) -> None:
         if self._mm_cache_stats is not None:
             self._mm_cache_stats.reset = True
 
+    @staticmethod
+    def _clear_processor_cache(
+        processor: "BaseMultiModalProcessor | None",
+    ) -> None:
+        if processor is None:
+            return
+
+        processor_cache = processor.cache
+        if processor_cache is not None:
+            processor_cache.clear_cache()
+
+    def _warmup_mm_processor(
+        self,
+        processor: "BaseMultiModalProcessor",
+        *,
+        log_prefix: str,
+    ) -> None:
+        from vllm.multimodal.processing import TimingContext
+
+        model_config = self.model_config
+        mm_config = model_config.get_multimodal_config()
+        mm_limits = {k: v for k, v in processor.info.allowed_mm_limits.items() if v > 0}
+
+        start_time = time.perf_counter()
+        processor_inputs = processor.dummy_inputs.get_dummy_processor_inputs(
+            seq_len=model_config.max_model_len,
+            mm_counts=dict.fromkeys(mm_limits, 1),
+            mm_options=mm_config.limit_per_prompt,
+        )
+        _ = processor.apply(processor_inputs, timing_ctx=TimingContext(enabled=False))
+
+        elapsed = time.perf_counter() - start_time
+        logger.info("%s warmup completed in %.3fs", log_prefix, elapsed)
+
     def warmup(self, chat_params: ChatParams) -> None:
         """
         Warm up this renderer to avoid first-request latency.
@@ -207,33 +246,29 @@ def warmup(self, chat_params: ChatParams) -> None:
             logger.warning("Chat template warmup failed", exc_info=True)
 
         if self.mm_processor:
-            from vllm.multimodal.processing import TimingContext
-
-            model_config = self.model_config
-            mm_config = model_config.get_multimodal_config()
-            processor = self.mm_processor
-            mm_limits = processor.info.allowed_mm_limits
-
             try:
                 logger.debug("Warming up multi-modal processing...")
-                start_time = time.perf_counter()
-
-                processor_inputs = processor.dummy_inputs.get_dummy_processor_inputs(
-                    seq_len=model_config.max_model_len,
-                    mm_counts=dict.fromkeys(mm_limits, 1),
-                    mm_options=mm_config.limit_per_prompt,
+                self._warmup_mm_processor(
+                    self.mm_processor,
+                    log_prefix="Multi-modal",
                 )
-                _ = processor.apply(
-                    processor_inputs, timing_ctx=TimingContext(enabled=False)
-                )
-
-                elapsed = time.perf_counter() - start_time
-                logger.info("Multi-modal warmup completed in %.3fs", elapsed)
             except Exception:
                 logger.warning("Multi-modal warmup failed")
             finally:
                 self.clear_mm_cache()
 
+        if self._readonly_mm_processor is not None:
+            try:
+                logger.debug("Warming up readonly multi-modal processing...")
+                self._warmup_mm_processor(
+                    self._readonly_mm_processor,
+                    log_prefix="Readonly multi-modal",
+                )
+            except Exception:
+                logger.warning("Readonly multi-modal warmup failed")
+            finally:
+                self._clear_processor_cache(self._readonly_mm_processor)
+
     async def clear_mm_cache_async(self) -> None:
         """Serialize clear_mm_cache through the shared executor to avoid
         races with concurrent process_inputs on the mm_processor_cache."""
@@ -421,6 +456,11 @@ def _tokenize_singleton_prompt(
         params: TokenizeParams,
     ) -> SingletonTokPrompt:
         if "prompt_token_ids" not in prompt and "prompt_embeds" not in prompt:
+            if not isinstance(prompt.get("prompt"), str):
+                raise TypeError(
+                    "Expected prompt['prompt'] to be a string before tokenization; "
+                    "use 'prompt_token_ids' for token ID inputs"
+                )
             prompt = params.apply_pre_tokenization(self.tokenizer, prompt)  # type: ignore[arg-type]
             prompt = self._tokenize_prompt(prompt, params)
 
@@ -452,6 +492,11 @@ async def _tokenize_singleton_prompt_async(
         params: TokenizeParams,
     ) -> SingletonTokPrompt:
         if "prompt_token_ids" not in prompt and "prompt_embeds" not in prompt:
+            if not isinstance(prompt.get("prompt"), str):
+                raise TypeError(
+                    "Expected prompt['prompt'] to be a string before tokenization; "
+                    "use 'prompt_token_ids' for token ID inputs"
+                )
             prompt = params.apply_pre_tokenization(self.tokenizer, prompt)  # type: ignore[arg-type]
             prompt = await self._tokenize_prompt_async(prompt, params)
 
@@ -625,10 +670,15 @@ def _process_multimodal(
         mm_uuids: MultiModalUUIDDict | None,
         mm_processor_kwargs: Mapping[str, object] | None,
         tokenization_kwargs: dict[str, Any] | None,
+        *,
+        skip_mm_cache: bool = False,
     ) -> "MultiModalInput":
         mm_req_id = f"renderer{self.api_process_rank}-mm-{self._mm_req_counter.inc(1)}"
 
-        mm_processor = self.get_mm_processor()
+        if skip_mm_cache and self._readonly_mm_processor is not None:
+            mm_processor = self._readonly_mm_processor
+        else:
+            mm_processor = self.get_mm_processor()
 
         mm_data_items = mm_processor.info.parse_mm_data(mm_data)
         mm_uuid_items = parse_mm_uuids(mm_uuids)
@@ -656,6 +706,8 @@ def _process_multimodal(
     def _process_tokens(
         self,
         prompt: TokensPrompt,
+        *,
+        skip_mm_cache: bool = False,
     ) -> TokensInput | MultiModalInput:
         """Process token inputs, with multimodal preprocessing offloaded
         to the shared thread pool in the async variant.
@@ -670,6 +722,7 @@ def _process_tokens(
                 mm_processor_kwargs=prompt.get("mm_processor_kwargs"),
                 tokenization_kwargs=None,  # Tokenization already done in Step 2
                 mm_uuids=prompt.get("multi_modal_uuids"),
+                skip_mm_cache=skip_mm_cache,
             )
         else:
             engine_input = tokens_input(prompt_token_ids)
@@ -707,11 +760,15 @@ def _process_embeds(self, prompt: EmbedsPrompt) -> EmbedsInput:
         return embeds_input(
             prompt_embeds=prompt_embeds,
             cache_salt=prompt.get("cache_salt"),
+            prompt_token_ids=prompt.get("prompt_token_ids"),
+            is_token_ids=prompt.get("prompt_is_token_ids"),
         )
 
     async def _process_tokens_async(
         self,
         prompt: TokensPrompt,
+        *,
+        skip_mm_cache: bool = False,
     ) -> TokensInput | MultiModalInput:
         prompt_token_ids = prompt["prompt_token_ids"]
 
@@ -723,6 +780,7 @@ async def _process_tokens_async(
                 mm_processor_kwargs=prompt.get("mm_processor_kwargs"),
                 tokenization_kwargs=None,
                 mm_uuids=prompt.get("multi_modal_uuids"),
+                skip_mm_cache=skip_mm_cache,
             )
         else:
             engine_input = tokens_input(prompt_token_ids)
@@ -734,24 +792,33 @@ async def _process_tokens_async(
 
         return engine_input
 
-    def _process_singleton(self, prompt: SingletonTokPrompt) -> SingletonInput:
+    def _process_singleton(
+        self,
+        prompt: SingletonTokPrompt,
+        *,
+        skip_mm_cache: bool = False,
+    ) -> SingletonInput:
         if "prompt_embeds" in prompt:
             return self._process_embeds(prompt)  # type: ignore[arg-type]
 
-        return self._process_tokens(prompt)  # type: ignore[arg-type]
+        return self._process_tokens(prompt, skip_mm_cache=skip_mm_cache)  # type: ignore[arg-type]
 
     async def _process_singleton_async(
         self,
         prompt: SingletonTokPrompt,
+        *,
+        skip_mm_cache: bool = False,
     ) -> SingletonInput:
         if "prompt_embeds" in prompt:
             return self._process_embeds(prompt)  # type: ignore[arg-type]
 
-        return await self._process_tokens_async(prompt)  # type: ignore[arg-type]
+        return await self._process_tokens_async(prompt, skip_mm_cache=skip_mm_cache)  # type: ignore[arg-type]
 
     def _process_enc_dec(
         self,
         prompt: EncoderDecoderTokPrompt,
+        *,
+        skip_mm_cache: bool = False,
     ) -> EncoderDecoderInput:
         enc_prompt = prompt["encoder_prompt"]
         dec_prompt = prompt["decoder_prompt"]
@@ -764,9 +831,13 @@ def _process_enc_dec(
                 skip_decoder_start_token = self.mm_processor.skip_decoder_start_token
 
         return build_enc_dec_input(
-            encoder_input=self._process_singleton(enc_prompt),
+            encoder_input=self._process_singleton(
+                enc_prompt, skip_mm_cache=skip_mm_cache
+            ),
             decoder_input=(
-                None if dec_prompt is None else self._process_singleton(dec_prompt)
+                None
+                if dec_prompt is None
+                else self._process_singleton(dec_prompt, skip_mm_cache=skip_mm_cache)
             ),
             decoder_start_token_id=self.get_dec_start_token_id(),
             skip_decoder_start_token=skip_decoder_start_token,
@@ -775,16 +846,20 @@ def _process_enc_dec(
     async def _process_enc_dec_async(
         self,
         prompt: EncoderDecoderTokPrompt,
+        *,
+        skip_mm_cache: bool = False,
     ) -> EncoderDecoderInput:
         enc_prompt = prompt["encoder_prompt"]
         dec_prompt = prompt["decoder_prompt"]
 
         encoder_input, decoder_input = await asyncio.gather(
-            self._process_singleton_async(enc_prompt),
+            self._process_singleton_async(enc_prompt, skip_mm_cache=skip_mm_cache),
             (
                 asyncio.sleep(0)
                 if dec_prompt is None
-                else self._process_singleton_async(dec_prompt)
+                else self._process_singleton_async(
+                    dec_prompt, skip_mm_cache=skip_mm_cache
+                )
             ),
         )
 
@@ -794,27 +869,40 @@ async def _process_enc_dec_async(
             decoder_start_token_id=self.get_dec_start_token_id(),
         )
 
-    def process_for_engine(self, prompt: TokPrompt, arrival_time: float) -> EngineInput:
+    def process_for_engine(
+        self,
+        prompt: TokPrompt,
+        arrival_time: float,
+        *,
+        skip_mm_cache: bool = False,
+    ) -> EngineInput:
         engine_input: EngineInput
         if "encoder_prompt" in prompt:
-            engine_input = self._process_enc_dec(prompt)  # type: ignore[arg-type]
+            engine_input = self._process_enc_dec(prompt, skip_mm_cache=skip_mm_cache)  # type: ignore[arg-type]
         else:
-            engine_input = self._process_singleton(prompt)
+            engine_input = self._process_singleton(prompt, skip_mm_cache=skip_mm_cache)
 
         engine_input["arrival_time"] = arrival_time
 
         return engine_input
 
     async def process_for_engine_async(
-        self, prompt: TokPrompt, arrival_time: float
+        self,
+        prompt: TokPrompt,
+        arrival_time: float,
+        *,
+        skip_mm_cache: bool = False,
     ) -> EngineInput:
         engine_input: EngineInput
         if "encoder_prompt" in prompt:
             engine_input = await self._process_enc_dec_async(
-                prompt  # type: ignore[arg-type]
+                prompt,  # type: ignore[arg-type]
+                skip_mm_cache=skip_mm_cache,
             )
         else:
-            engine_input = await self._process_singleton_async(prompt)
+            engine_input = await self._process_singleton_async(
+                prompt, skip_mm_cache=skip_mm_cache
+            )
 
         engine_input["arrival_time"] = arrival_time
 
@@ -827,6 +915,7 @@ def render_cmpl(
         tok_params: TokenizeParams | None = None,
         *,
         prompt_extras: dict[str, Any] | None = None,
+        skip_mm_cache: bool = False,
     ):
         arrival_time = time.time()
 
@@ -838,7 +927,10 @@ def render_cmpl(
 
         self._apply_prompt_extras(tok_prompts, prompt_extras)
 
-        return [self.process_for_engine(prompt, arrival_time) for prompt in tok_prompts]
+        return [
+            self.process_for_engine(prompt, arrival_time, skip_mm_cache=skip_mm_cache)
+            for prompt in tok_prompts
+        ]
 
     async def render_cmpl_async(
         self,
@@ -846,6 +938,7 @@ async def render_cmpl_async(
         tok_params: TokenizeParams | None = None,
         *,
         prompt_extras: dict[str, Any] | None = None,
+        skip_mm_cache: bool = False,
     ):
         arrival_time = time.time()
 
@@ -858,7 +951,12 @@ async def render_cmpl_async(
         self._apply_prompt_extras(tok_prompts, prompt_extras)
 
         return await asyncio.gather(
-            *(self.process_for_engine_async(p, arrival_time) for p in tok_prompts)
+            *(
+                self.process_for_engine_async(
+                    p, arrival_time, skip_mm_cache=skip_mm_cache
+                )
+                for p in tok_prompts
+            )
         )
 
     def render_chat(
@@ -868,6 +966,7 @@ def render_chat(
         tok_params: TokenizeParams | None = None,
         *,
         prompt_extras: dict[str, Any] | None = None,
+        skip_mm_cache: bool = False,
     ):
         arrival_time = time.time()
 
@@ -890,7 +989,8 @@ def render_chat(
         self._apply_prompt_extras(tok_prompts, prompt_extras)
 
         eng_prompts = [
-            self.process_for_engine(prompt, arrival_time) for prompt in tok_prompts
+            self.process_for_engine(prompt, arrival_time, skip_mm_cache=skip_mm_cache)
+            for prompt in tok_prompts
         ]
 
         return out_conversations, eng_prompts
@@ -902,6 +1002,7 @@ async def render_chat_async(
         tok_params: TokenizeParams | None = None,
         *,
         prompt_extras: dict[str, Any] | None = None,
+        skip_mm_cache: bool = False,
     ):
         arrival_time = time.time()
 
@@ -924,7 +1025,12 @@ async def render_chat_async(
         self._apply_prompt_extras(tok_prompts, prompt_extras)
 
         eng_prompts = await asyncio.gather(
-            *(self.process_for_engine_async(p, arrival_time) for p in tok_prompts)
+            *(
+                self.process_for_engine_async(
+                    p, arrival_time, skip_mm_cache=skip_mm_cache
+                )
+                for p in tok_prompts
+            )
         )
 
         return out_conversations, eng_prompts
diff --git a/vllm/renderers/deepseek_v4.py b/vllm/renderers/deepseek_v4.py
new file mode 100644
index 000000000000..3dc82b9622e5
--- /dev/null
+++ b/vllm/renderers/deepseek_v4.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.config import VllmConfig
+from vllm.entrypoints.chat_utils import (
+    ChatCompletionMessageParam,
+    ConversationMessage,
+    parse_chat_messages,
+    parse_chat_messages_async,
+)
+from vllm.logger import init_logger
+from vllm.tokenizers.deepseek_v4 import DeepseekV4Tokenizer
+from vllm.utils.async_utils import make_async
+
+from .base import BaseRenderer
+from .inputs import DictPrompt
+from .inputs.preprocess import parse_dec_only_prompt
+from .params import ChatParams
+
+logger = init_logger(__name__)
+
+
+class DeepseekV4Renderer(BaseRenderer[DeepseekV4Tokenizer]):
+    def __init__(
+        self,
+        config: VllmConfig,
+        tokenizer: DeepseekV4Tokenizer | None,
+    ) -> None:
+        super().__init__(config, tokenizer)
+
+        self._apply_chat_template_async = make_async(
+            self._apply_chat_template, executor=self._executor
+        )
+
+    def _apply_chat_template(self, *args, **kwargs):
+        return self.get_tokenizer().apply_chat_template(*args, **kwargs)
+
+    def render_messages(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        params: ChatParams,
+    ) -> tuple[list[ConversationMessage], DictPrompt]:
+        conversation, mm_data, mm_uuids = parse_chat_messages(
+            messages,
+            self.model_config,
+            content_format="string",
+            media_io_kwargs=params.media_io_kwargs,
+            mm_processor_kwargs=params.mm_processor_kwargs,
+        )
+
+        prompt_raw = self._apply_chat_template(
+            conversation=conversation,
+            messages=messages,
+            **params.get_apply_chat_template_kwargs(),
+        )
+
+        prompt = parse_dec_only_prompt(prompt_raw)
+        if mm_data is not None:
+            prompt["multi_modal_data"] = mm_data
+        if mm_uuids is not None:
+            prompt["multi_modal_uuids"] = mm_uuids
+
+        return conversation, prompt
+
+    async def render_messages_async(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        params: ChatParams,
+    ) -> tuple[list[ConversationMessage], DictPrompt]:
+        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
+            messages,
+            self.model_config,
+            content_format="string",
+            media_io_kwargs=params.media_io_kwargs,
+            mm_processor_kwargs=params.mm_processor_kwargs,
+        )
+
+        prompt_raw = await self._apply_chat_template_async(
+            conversation=conversation,
+            messages=messages,
+            **params.get_apply_chat_template_kwargs(),
+        )
+
+        prompt = parse_dec_only_prompt(prompt_raw)
+        if mm_data is not None:
+            prompt["multi_modal_data"] = mm_data
+        if mm_uuids is not None:
+            prompt["multi_modal_uuids"] = mm_uuids
+
+        return conversation, prompt
diff --git a/vllm/renderers/embed_utils.py b/vllm/renderers/embed_utils.py
index a51fc53a24ad..84c28dcf7e0f 100644
--- a/vllm/renderers/embed_utils.py
+++ b/vllm/renderers/embed_utils.py
@@ -7,6 +7,7 @@
 import torch
 
 from vllm.exceptions import VLLMValidationError
+from vllm.utils.async_utils import make_async
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig
@@ -30,15 +31,53 @@ def safe_load_prompt_embeds(
             weights_only=True,
             map_location=torch.device("cpu"),
         )
-        assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
-            torch.float32,
-            torch.bfloat16,
-            torch.float16,
-        )
+        if not isinstance(tensor, torch.Tensor):
+            raise VLLMValidationError(
+                "`prompt_embeds` payload did not deserialize to a torch.Tensor.",
+                parameter="prompt_embeds",
+            )
         tensor = tensor.to_dense()
 
     if tensor.dim() > 2:
         tensor = tensor.squeeze(0)
-        assert tensor.dim() == 2
+    if tensor.dim() != 2:
+        raise VLLMValidationError(
+            "`prompt_embeds` must be a 2D tensor of shape "
+            f"(num_tokens, hidden_size); got shape {tuple(tensor.shape)}.",
+            parameter="prompt_embeds",
+        )
+
+    # Pin each tensor to the model's hidden_size. Validating here
+    # also transitively guarantees cross-tensor consistency for requests that
+    # include multiple `prompt_embeds` parts, which is required by downstream
+    # concatenation in `_build_mixed_prompt_embeds`.
+    expected_hidden_size = model_config.get_hidden_size()
+    if tensor.shape[1] != expected_hidden_size:
+        raise VLLMValidationError(
+            f"`prompt_embeds` hidden_size {tensor.shape[1]} does not match "
+            f"the model's hidden_size {expected_hidden_size}.",
+            parameter="prompt_embeds",
+        )
+
+    # Cast to the model's dtype so API clients don't need to know the server's
+    # `--dtype` setting ahead of time. Only floating-point source dtypes are
+    # allowed. integer / bool / complex inputs almost certainly indicate caller
+    # error (e.g. quantized payloads, wrong tensor), and a silent `.to()`
+    # could hide a real mistake.
+    expected_dtype = model_config.dtype
+    if tensor.dtype != expected_dtype:
+        if not tensor.is_floating_point():
+            raise VLLMValidationError(
+                f"`prompt_embeds` dtype {tensor.dtype} is not a floating-point "
+                f"type, cannot safely cast to the model's dtype {expected_dtype}.",
+                parameter="prompt_embeds",
+            )
+        tensor = tensor.to(expected_dtype)
 
     return tensor
+
+
+safe_load_prompt_embeds_async = make_async(safe_load_prompt_embeds)
+"""Async variant of `safe_load_prompt_embeds` that defers the decode to a
+thread-pool executor, so the asyncio event loop is not blocked by the base64
+decode + `torch.load` work."""
diff --git a/vllm/renderers/hf.py b/vllm/renderers/hf.py
index 690ffb2a8954..e796607722ad 100644
--- a/vllm/renderers/hf.py
+++ b/vllm/renderers/hf.py
@@ -1,11 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from __future__ import annotations
+
+import copy
 import inspect
 import itertools
+import weakref
 from collections import defaultdict, deque
-from collections.abc import Set
+from collections.abc import Sequence
 from functools import lru_cache
-from typing import Any, Literal, cast, overload
+from typing import TYPE_CHECKING, Any, Final, Literal, cast, overload
 
 import jinja2
 import jinja2.ext
@@ -13,34 +17,199 @@
 import jinja2.nodes
 import jinja2.parser
 import jinja2.sandbox
+import torch
+from typing_extensions import override
 
-from vllm.config import ModelConfig, VllmConfig
 from vllm.entrypoints.chat_utils import (
-    ChatCompletionMessageParam,
-    ChatTemplateContentFormat,
-    ChatTemplateContentFormatOption,
+    PROMPT_EMBEDS_PLACEHOLDER_TOKEN,
     ChatTemplateResolutionError,
-    ConversationMessage,
     load_chat_template,
     parse_chat_messages,
     parse_chat_messages_async,
 )
-from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict
+from vllm.inputs import EmbedsPrompt
+from vllm.inputs.engine import MultiModalInput
 from vllm.logger import init_logger
-from vllm.tokenizers.hf import HfTokenizer
+from vllm.multimodal.hasher import MultiModalHasher
+from vllm.multimodal.inputs import (
+    MultiModalFieldElem,
+    MultiModalKwargsItem,
+    MultiModalKwargsItems,
+    MultiModalSharedField,
+    PlaceholderRange,
+)
+from vllm.multimodal.processing.processor import (
+    PromptReplacement,
+    apply_token_matches,
+    find_mm_placeholders,
+)
+from vllm.tokenizers.hf import HfTokenizer, maybe_make_thread_pool
 from vllm.transformers_utils.chat_templates import get_chat_template_fallback_path
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.utils.async_utils import make_async
 from vllm.utils.func_utils import supports_kw
 
 from .base import BaseRenderer
-from .inputs import DictPrompt
 from .inputs.preprocess import parse_dec_only_prompt
-from .params import ChatParams
+
+if TYPE_CHECKING:
+    from collections.abc import Set
+
+    from vllm.config import ModelConfig, VllmConfig
+    from vllm.entrypoints.chat_utils import (
+        ChatCompletionMessageParam,
+        ChatTemplateContentFormat,
+        ChatTemplateContentFormatOption,
+        ConversationMessage,
+    )
+    from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict, TokensPrompt
+    from vllm.inputs.engine import TokensInput
+    from vllm.multimodal.processing.processor import (
+        MultiModalPromptUpdates,
+        ResolvedPromptUpdate,
+    )
+
+    from .inputs import DictPrompt
+    from .params import ChatParams
 
 logger = init_logger(__name__)
 
 
+# Cache of `tokenizer -> prompt_embeds placeholder token ID`. Keyed by the
+# tokenizer object (not `id(tokenizer)`) so a fresh tokenizer landing at a
+# recycled memory address can't pick up a stale tid. Entries evict atomically
+# with the tokenizer's garbage-collection.
+_PROMPT_EMBEDS_PLACEHOLDER_TOKEN_ID_CACHE: Final[
+    weakref.WeakKeyDictionary[HfTokenizer, int]
+] = weakref.WeakKeyDictionary()
+_PROMPT_EMBEDS_PLACEHOLDER_TOKEN_ID_ERROR: Final[str] = (
+    "Expected {token!r} to tokenize to exactly 1 token, got {num_ids} ({ids!r})."
+)
+_PROMPT_EMBEDS_PLACEHOLDER_SPAN_MISMATCH_ERROR: Final[str] = (
+    "Expected {expected} prompt_embeds placeholder spans in the "
+    "tokenized prompt, found {actual}."
+)
+_MISSING_PROMPT_TOKEN_IDS_ERROR: Final[str] = (
+    "Expected prompt_token_ids in rendered prompt when prompt_embeds "
+    "are present. This indicates the chat template was invoked with "
+    "tokenize=False."
+)
+_TOKENIZE_OVERRIDE_WARNING: Final[str] = (
+    "Overriding `tokenize=False` to `True` because `prompt_embeds` "
+    "post-processing requires tokenized IDs."
+)
+
+
+def _ensure_prompt_embeds_placeholder_token(tokenizer: HfTokenizer) -> int:
+    """Register `PROMPT_EMBEDS_PLACEHOLDER_TOKEN` as a special token and return
+    its token ID."""
+    cached = _PROMPT_EMBEDS_PLACEHOLDER_TOKEN_ID_CACHE.get(tokenizer)
+    if cached is not None:
+        return cached
+
+    tokenizer.add_special_tokens(
+        {"additional_special_tokens": [PROMPT_EMBEDS_PLACEHOLDER_TOKEN]}
+    )
+
+    ids = tokenizer.encode(PROMPT_EMBEDS_PLACEHOLDER_TOKEN, add_special_tokens=False)
+    if len(ids) != 1:
+        raise RuntimeError(
+            _PROMPT_EMBEDS_PLACEHOLDER_TOKEN_ID_ERROR.format(
+                token=PROMPT_EMBEDS_PLACEHOLDER_TOKEN,
+                num_ids=len(ids),
+                ids=ids,
+            )
+        )
+
+    token_id = ids[0]
+    _PROMPT_EMBEDS_PLACEHOLDER_TOKEN_ID_CACHE[tokenizer] = token_id
+    return token_id
+
+
+def _build_prompt_embeds_updates(
+    prompt_embeds_tensors: Sequence[torch.Tensor],
+    placeholder_token_id: int,
+) -> MultiModalPromptUpdates:
+    """Build `MultiModalPromptUpdates` for `prompt_embeds` expansion.
+
+    Each tensor produces a `PromptReplacement` that maps
+    `[placeholder_token_id]` -> `[placeholder_token_id] x N`
+    (where `N = tensor.shape[0]`).
+    """
+    updates: list[Sequence[ResolvedPromptUpdate]] = []
+    for i, tensor in enumerate(prompt_embeds_tensors):
+        update = PromptReplacement(
+            modality="prompt_embeds",
+            target=[placeholder_token_id],
+            replacement=[placeholder_token_id] * tensor.shape[0],
+        )
+        updates.append([update.resolve(item_idx=i)])
+    return {"prompt_embeds": updates}
+
+
+def _expand_prompt_embeds_placeholders(
+    token_ids: list[int],
+    mm_prompt_updates: MultiModalPromptUpdates,
+) -> list[int]:
+    """Expand each 1-token `prompt_embeds` sentinel into an N-token span.
+
+    Uses `apply_token_matches`.  Each single placeholder token in
+    `token_ids` is replaced with a consecutive span of
+    `tensor.shape[0]` copies, following tensors in order.
+    """
+    expanded, _ = apply_token_matches(token_ids, mm_prompt_updates, tokenizer=None)
+    return expanded
+
+
+def _build_prompt_embeds_positions(
+    token_ids: list[int],
+    num_tensors: int,
+    mm_prompt_updates: MultiModalPromptUpdates,
+) -> list[tuple[int, int]]:
+    """Locate each prompt_embeds placeholder span in `token_ids`.
+
+    Expects `token_ids` to already contain expanded N-token spans.
+    Returns `[(start_idx, length), ...]` aligned with the tensors.
+    """
+    placeholders = find_mm_placeholders(
+        prompt=token_ids,
+        mm_prompt_updates=mm_prompt_updates,
+        tokenizer=None,
+    )
+    features = placeholders.get("prompt_embeds", [])
+
+    if len(features) != num_tensors:
+        raise ValueError(
+            _PROMPT_EMBEDS_PLACEHOLDER_SPAN_MISMATCH_ERROR.format(
+                expected=num_tensors,
+                actual=len(features),
+            )
+        )
+
+    return [(f.start_idx, f.length) for f in features]
+
+
+def _build_mixed_prompt_embeds(
+    token_ids: list[int],
+    prompt_embeds_tensors: Sequence[torch.Tensor],
+    positions: list[tuple[int, int]],
+) -> tuple[torch.Tensor, list[bool]]:
+    """Build the full-length `prompt_embeds` tensor and the `is_token_ids`
+    mask aligned to `token_ids`."""
+    total_len = len(token_ids)
+    hidden_size = prompt_embeds_tensors[0].shape[1]
+    dtype = prompt_embeds_tensors[0].dtype
+
+    full_embeds = torch.zeros(total_len, hidden_size, dtype=dtype)
+    is_token_ids = torch.ones(total_len, dtype=torch.bool)
+
+    for (start, length), tensor in zip(positions, prompt_embeds_tensors, strict=True):
+        full_embeds[start : start + length] = tensor
+        is_token_ids[start : start + length] = False
+
+    return full_embeds, is_token_ids.tolist()
+
+
 _PROCESSOR_CHAT_TEMPLATES = dict[tuple[str, bool], str | None]()
 """
 Used in `_try_get_processor_chat_template` to avoid calling
@@ -98,7 +267,7 @@ def resolve_chat_template(
     chat_template: str | None,
     tools: list[dict[str, Any]] | None,
     *,
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
 ) -> str | None:
     # 1st priority: The given chat template
     if chat_template is not None:
@@ -233,6 +402,7 @@ def _iter_nodes_assign_content_item(root: jinja2.nodes.Node):
     ]
 
     # Search for {%- for content in message['content'] -%} loops
+    # or {%- for item in content -%} loops
     for loop_ast in root.find_all(jinja2.nodes.For):
         loop_iter = loop_ast.iter
         loop_target = loop_ast.target
@@ -243,6 +413,10 @@ def _iter_nodes_assign_content_item(root: jinja2.nodes.Node):
                 yield loop_ast, loop_target.name
                 break
 
+        if isinstance(loop_iter, jinja2.nodes.Name) and loop_iter.name == "content":
+            assert isinstance(loop_target, jinja2.nodes.Name)
+            yield loop_ast, loop_target.name
+
 
 def _try_extract_ast(chat_template: str) -> jinja2.nodes.Template | None:
     import transformers.utils.chat_template_utils as hf_chat_utils
@@ -281,7 +455,7 @@ def _resolve_chat_template_content_format(
     tools: list[dict[str, Any]] | None,
     tokenizer: HfTokenizer,
     *,
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
 ) -> ChatTemplateContentFormat:
     resolved_chat_template = resolve_chat_template(
         tokenizer,
@@ -335,7 +509,7 @@ def resolve_chat_template_content_format(
     given_format: ChatTemplateContentFormatOption,
     tokenizer: HfTokenizer,
     *,
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
 ) -> ChatTemplateContentFormat:
     if given_format != "auto":
         return given_format
@@ -437,7 +611,7 @@ def resolve_chat_template_kwargs(
 
 @overload
 def safe_apply_chat_template(
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
     tokenizer: HfTokenizer,
     conversation: list[ConversationMessage],
     *,
@@ -448,7 +622,7 @@ def safe_apply_chat_template(
 ) -> list[int]: ...
 @overload
 def safe_apply_chat_template(
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
     tokenizer: HfTokenizer,
     conversation: list[ConversationMessage],
     *,
@@ -458,7 +632,7 @@ def safe_apply_chat_template(
     **kwargs,
 ) -> str: ...
 def safe_apply_chat_template(
-    model_config: "ModelConfig",
+    model_config: ModelConfig,
     tokenizer: HfTokenizer,
     conversation: list[ConversationMessage],
     *,
@@ -486,6 +660,14 @@ def safe_apply_chat_template(
         chat_template_kwargs=kwargs,
     )
 
+    # transformers v5 changed the default of `return_dict` to True, which
+    # makes `apply_chat_template(tokenize=True)` return a `BatchEncoding`
+    # instead of `list[int]`. Force `return_dict=False` so downstream code
+    # that expects a flat token list (e.g. `parse_dec_only_prompt`) works
+    # consistently across v4 and v5.
+    if tokenize and "return_dict" not in resolved_kwargs:
+        resolved_kwargs["return_dict"] = False
+
     try:
         return tokenizer.apply_chat_template(
             conversation=conversation,  # type: ignore[arg-type]
@@ -609,6 +791,14 @@ def __init__(
         config: VllmConfig,
         tokenizer: HfTokenizer | None,
     ) -> None:
+        # Ensure the og tokenizer is never modified by maybe_make_thread_pool
+        tokenizer = copy.copy(tokenizer)
+        if (
+            # Skip for mock configs and tokenizers
+            getattr(config.model_config, "enable_prompt_embeds", False)
+            and isinstance(tokenizer, HfTokenizer)
+        ):
+            _ensure_prompt_embeds_placeholder_token(tokenizer)
         super().__init__(config, tokenizer)
 
         self.use_unified_vision_chunk = getattr(
@@ -619,6 +809,11 @@ def __init__(
             safe_apply_chat_template, executor=self._executor
         )
 
+        if self.tokenizer is not None:
+            maybe_make_thread_pool(
+                self.tokenizer, config.model_config.renderer_num_workers + 1
+            )
+
     def render_messages(
         self,
         messages: list[ChatCompletionMessageParam],
@@ -627,6 +822,12 @@ def render_messages(
         model_config = self.model_config
         tokenizer = self.get_tokenizer()
 
+        prompt_embeds_placeholder_token_id: int | None = None
+        if model_config.enable_prompt_embeds:
+            prompt_embeds_placeholder_token_id = (
+                _ensure_prompt_embeds_placeholder_token(tokenizer)
+            )
+
         conversation, mm_data, mm_uuids = parse_chat_messages(
             messages,
             model_config,
@@ -641,11 +842,30 @@ def render_messages(
             mm_processor_kwargs=params.mm_processor_kwargs,
         )
 
+        # prompt_embeds tensors are carried by the tracker through mm_data,
+        # but they must NOT be fed to the MM processor (which would reject
+        # the unknown key). Extract them here.
+        prompt_embeds_tensors: list[torch.Tensor] | None = None
+        if mm_data is not None and "prompt_embeds" in mm_data:
+            prompt_embeds_tensors = list(
+                cast(Sequence[torch.Tensor], mm_data["prompt_embeds"])
+            )
+            mm_data = {k: v for k, v in mm_data.items() if k != "prompt_embeds"}
+            if not mm_data:
+                mm_data = None
+
+        chat_template_kwargs = params.get_apply_chat_template_kwargs()
+        if prompt_embeds_tensors:
+            # prompt_embeds post-processing requires prompt_token_ids.
+            if chat_template_kwargs.get("tokenize") is False:
+                logger.warning_once(_TOKENIZE_OVERRIDE_WARNING)
+            chat_template_kwargs["tokenize"] = True
+
         prompt_raw = safe_apply_chat_template(
             model_config,
             tokenizer,
             conversation,
-            **params.get_apply_chat_template_kwargs(),
+            **chat_template_kwargs,
         )
 
         # NOTE: use_unified_vision_chunk is currently specific to Kimi-K2.5
@@ -671,6 +891,29 @@ def render_messages(
             )
 
         prompt = parse_dec_only_prompt(prompt_raw)
+
+        # When `prompt_embeds` is mixed with other modality data,
+        # `_process_tokens` runs `_process_multimodal` first (expanding
+        # `<|AUDIO|>` / `<|IMAGE|>` placeholders) and then
+        # `_apply_prompt_embeds_to_engine_input` augments the result.
+        # Stash the tensors and placeholder ID for that override to consume.
+        if prompt_embeds_tensors and mm_data:
+            assert prompt_embeds_placeholder_token_id is not None
+            cast(dict, prompt)["_prompt_embeds"] = (
+                prompt_embeds_tensors,
+                prompt_embeds_placeholder_token_id,
+            )
+            if params.mm_processor_kwargs:
+                cast(dict, prompt)["mm_processor_kwargs"] = params.mm_processor_kwargs
+        elif prompt_embeds_tensors:
+            # Pure mode: no other MM data, mutate prompt to EmbedsPrompt shape.
+            assert prompt_embeds_placeholder_token_id is not None
+            self._apply_prompt_embeds_to_prompt(
+                prompt,
+                prompt_embeds_tensors,
+                prompt_embeds_placeholder_token_id,
+            )
+
         if mm_data is not None:
             prompt["multi_modal_data"] = mm_data
         if mm_uuids is not None:
@@ -686,6 +929,12 @@ async def render_messages_async(
         model_config = self.model_config
         tokenizer = self.get_tokenizer()
 
+        prompt_embeds_placeholder_token_id: int | None = None
+        if model_config.enable_prompt_embeds:
+            prompt_embeds_placeholder_token_id = (
+                _ensure_prompt_embeds_placeholder_token(tokenizer)
+            )
+
         conversation, mm_data, mm_uuids = await parse_chat_messages_async(
             messages,
             model_config,
@@ -700,11 +949,27 @@ async def render_messages_async(
             mm_processor_kwargs=params.mm_processor_kwargs,
         )
 
+        prompt_embeds_tensors: list[torch.Tensor] | None = None
+        if mm_data is not None and "prompt_embeds" in mm_data:
+            prompt_embeds_tensors = list(
+                cast(Sequence[torch.Tensor], mm_data["prompt_embeds"])
+            )
+            mm_data = {k: v for k, v in mm_data.items() if k != "prompt_embeds"}
+            if not mm_data:
+                mm_data = None
+
+        chat_template_kwargs = params.get_apply_chat_template_kwargs()
+        if prompt_embeds_tensors:
+            # prompt_embeds post-processing requires prompt_token_ids.
+            if chat_template_kwargs.get("tokenize") is False:
+                logger.warning_once(_TOKENIZE_OVERRIDE_WARNING)
+            chat_template_kwargs["tokenize"] = True
+
         prompt_raw = await self._apply_chat_template_async(
             model_config,
             tokenizer,
             conversation,
-            **params.get_apply_chat_template_kwargs(),
+            **chat_template_kwargs,
         )
 
         # NOTE: use_unified_vision_chunk is currently specific to Kimi-K2.5
@@ -728,9 +993,185 @@ async def render_messages_async(
             )
 
         prompt = parse_dec_only_prompt(prompt_raw)
+
+        # See `render_messages` for the rationale.
+        if prompt_embeds_tensors and mm_data:
+            assert prompt_embeds_placeholder_token_id is not None
+            cast(dict, prompt)["_prompt_embeds"] = (
+                prompt_embeds_tensors,
+                prompt_embeds_placeholder_token_id,
+            )
+            if params.mm_processor_kwargs:
+                cast(dict, prompt)["mm_processor_kwargs"] = params.mm_processor_kwargs
+        elif prompt_embeds_tensors:
+            assert prompt_embeds_placeholder_token_id is not None
+            self._apply_prompt_embeds_to_prompt(
+                prompt,
+                prompt_embeds_tensors,
+                prompt_embeds_placeholder_token_id,
+            )
+
         if mm_data is not None:
             prompt["multi_modal_data"] = mm_data
         if mm_uuids is not None:
             prompt["multi_modal_uuids"] = mm_uuids
 
         return conversation, prompt
+
+    @override
+    def _process_tokens(
+        self,
+        prompt: TokensPrompt,
+        *,
+        skip_mm_cache: bool = False,
+    ) -> TokensInput | MultiModalInput:
+        """Pre-expand `prompt_embeds` sentinels before delegating to the MM
+        processor, then attach `prompt_embeds` modality data to the result.
+
+        Mixed mode only: the `_prompt_embeds` stash is set by
+        `render_messages` when `prompt_embeds` co-exist with other MM data
+        (images, audio, …).  We expand each 1-token sentinel to an N-token
+        span *before* calling `super()._process_tokens()` so the MM
+        processor records all placeholder offsets in the final (post-expansion)
+        coordinate space, no offset shifting needed afterwards.
+        """
+        prompt_embeds_info = cast(dict, prompt).pop("_prompt_embeds", None)
+        if prompt_embeds_info is not None:
+            tensors, placeholder_token_id = prompt_embeds_info
+            mm_updates = _build_prompt_embeds_updates(tensors, placeholder_token_id)
+            cast(dict, prompt)["prompt_token_ids"] = _expand_prompt_embeds_placeholders(
+                list(prompt["prompt_token_ids"]), mm_updates
+            )
+        engine_input = super()._process_tokens(prompt, skip_mm_cache=skip_mm_cache)
+        if prompt_embeds_info is not None:
+            tensors, _ = prompt_embeds_info
+            self._apply_prompt_embeds_to_engine_input(
+                cast(MultiModalInput, engine_input),
+                tensors,
+                mm_updates,
+            )
+        return engine_input
+
+    @override
+    async def _process_tokens_async(
+        self,
+        prompt: TokensPrompt,
+        *,
+        skip_mm_cache: bool = False,
+    ) -> TokensInput | MultiModalInput:
+        """Async equivalent of `_process_tokens`."""
+        prompt_embeds_info = cast(dict, prompt).pop("_prompt_embeds", None)
+        if prompt_embeds_info is not None:
+            tensors, placeholder_token_id = prompt_embeds_info
+            mm_updates = _build_prompt_embeds_updates(tensors, placeholder_token_id)
+            cast(dict, prompt)["prompt_token_ids"] = _expand_prompt_embeds_placeholders(
+                list(prompt["prompt_token_ids"]), mm_updates
+            )
+        engine_input = await super()._process_tokens_async(
+            prompt, skip_mm_cache=skip_mm_cache
+        )
+        if prompt_embeds_info is not None:
+            tensors, _ = prompt_embeds_info
+            self._apply_prompt_embeds_to_engine_input(
+                cast(MultiModalInput, engine_input),
+                tensors,
+                mm_updates,
+            )
+        return engine_input
+
+    @staticmethod
+    def _apply_prompt_embeds_to_prompt(
+        prompt: DictPrompt,
+        prompt_embeds_tensors: list[torch.Tensor],
+        placeholder_token_id: int,
+    ) -> None:
+        """Mutate `prompt` from `TokensPrompt` to `EmbedsPrompt` shape.
+
+        Pure `prompt_embeds` path only (no other MM modalities).  Expands
+        each `<prompt_embeds>` sentinel token into an N-token span and builds
+        the full-length `prompt_embeds` tensor + `prompt_is_token_ids` mask
+        that the engine's `enable_prompt_embeds` worker branch consumes.
+        """
+        token_ids = cast(list[int] | None, prompt.get("prompt_token_ids"))
+        if token_ids is None:
+            raise RuntimeError(_MISSING_PROMPT_TOKEN_IDS_ERROR)
+
+        embeds_orig_positions: list[int] = [
+            i for i, tok in enumerate(token_ids) if tok == placeholder_token_id
+        ]
+        if len(embeds_orig_positions) != len(prompt_embeds_tensors):
+            raise ValueError(
+                f"Expected {len(prompt_embeds_tensors)} prompt_embeds "
+                f"placeholder tokens in the rendered prompt, found "
+                f"{len(embeds_orig_positions)}."
+            )
+
+        mm_updates = _build_prompt_embeds_updates(
+            prompt_embeds_tensors, placeholder_token_id
+        )
+        expanded = _expand_prompt_embeds_placeholders(token_ids, mm_updates)
+        positions = _build_prompt_embeds_positions(
+            expanded, len(prompt_embeds_tensors), mm_updates
+        )
+
+        embeds_prompt = cast(EmbedsPrompt, prompt)
+        embeds_prompt["prompt_token_ids"] = expanded
+        full_embeds, is_token_ids_mask = _build_mixed_prompt_embeds(
+            expanded, prompt_embeds_tensors, positions
+        )
+        embeds_prompt["prompt_embeds"] = full_embeds
+        embeds_prompt["prompt_is_token_ids"] = is_token_ids_mask
+
+    @staticmethod
+    def _apply_prompt_embeds_to_engine_input(
+        engine_input: MultiModalInput,
+        prompt_embeds_tensors: list[torch.Tensor],
+        mm_updates: MultiModalPromptUpdates,
+    ) -> None:
+        """Augment `engine_input` in-place with a `prompt_embeds` modality.
+
+        Mixed mode: called after `_process_multimodal` has already run on the
+        pre-expanded token IDs (expansion was done in `_process_tokens` before
+        calling `super()`).  Locates the already-expanded `prompt_embeds` spans
+        and adds `prompt_embeds` entries to `mm_kwargs`, `mm_hashes`, and
+        `mm_placeholders`.
+        """
+        # token_ids already contain the pre-expanded N-token spans.
+        token_ids = list(engine_input["prompt_token_ids"])
+
+        positions = _build_prompt_embeds_positions(
+            token_ids, len(prompt_embeds_tensors), mm_updates
+        )
+
+        pe_kwargs_items: list[MultiModalKwargsItem] = []
+        pe_hashes: list[str] = []
+        pe_placeholders: list[PlaceholderRange] = []
+        for tensor, (start, length) in zip(
+            prompt_embeds_tensors, positions, strict=True
+        ):
+            pe_kwargs_items.append(
+                MultiModalKwargsItem(
+                    {
+                        "embedding": MultiModalFieldElem(
+                            data=tensor,
+                            field=MultiModalSharedField(batch_size=1),
+                        )
+                    }
+                )
+            )
+            pe_hashes.append(MultiModalHasher.hash_kwargs(prompt_embeds=tensor))
+            # `is_embed=None` matches the existing image_embeds-style
+            # "no encoder, just splice the tensor directly" semantics.
+            pe_placeholders.append(
+                PlaceholderRange(offset=start, length=length, is_embed=None)
+            )
+
+        cast(
+            MultiModalKwargsItems[MultiModalKwargsItem | None],
+            engine_input["mm_kwargs"],
+        )["prompt_embeds"] = pe_kwargs_items
+        engine_input["mm_hashes"] = {
+            **engine_input["mm_hashes"],
+            "prompt_embeds": pe_hashes,
+        }
+        cast(dict, engine_input["mm_placeholders"])["prompt_embeds"] = pe_placeholders
diff --git a/vllm/renderers/inputs/preprocess.py b/vllm/renderers/inputs/preprocess.py
index 1828c4ff58d9..dcf80ac73899 100644
--- a/vllm/renderers/inputs/preprocess.py
+++ b/vllm/renderers/inputs/preprocess.py
@@ -4,7 +4,7 @@
 
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from typing import TYPE_CHECKING, NamedTuple, TypeAlias, TypedDict, overload
 
 from vllm.inputs import (
@@ -116,6 +116,19 @@ class EncoderDecoderDictPrompt(TypedDict):
 """
 
 
+def _validate_prompt_dict(prompt: Mapping[str, object]) -> None:
+    """Reject malformed dict prompts before renderer tokenization."""
+    if (
+        "prompt" not in prompt
+        or "prompt_token_ids" in prompt
+        or "prompt_embeds" in prompt
+    ):
+        return
+
+    if not isinstance(prompt["prompt"], str):
+        raise TypeError("Prompt text should be a string")
+
+
 def parse_dec_only_prompt(prompt: PromptType | object) -> DecoderOnlyDictPrompt:
     """
     Parse a prompt for a decoder-only model and normalize it to a dictionary.
@@ -133,6 +146,8 @@ def parse_dec_only_prompt(prompt: PromptType | object) -> DecoderOnlyDictPrompt:
         if "encoder_prompt" in prompt:
             raise TypeError("Cannot pass encoder-decoder prompt to decoder-only models")
 
+        _validate_prompt_dict(prompt)
+
         if (
             "prompt" in prompt
             or "prompt_token_ids" in prompt
@@ -156,6 +171,8 @@ def _parse_enc_prompt(prompt: PromptType | object) -> EncoderDictPrompt:
         return TokensPrompt(prompt_token_ids=prompt)
 
     if isinstance(prompt, dict):
+        _validate_prompt_dict(prompt)
+
         if "prompt_embeds" in prompt:
             raise TypeError("Cannot pass embeddings prompt to encoder-decoder models")
 
@@ -178,6 +195,8 @@ def _parse_dec_prompt(prompt: PromptType | object) -> DecoderDictPrompt:
         return TokensPrompt(prompt_token_ids=prompt)
 
     if isinstance(prompt, dict):
+        _validate_prompt_dict(prompt)
+
         if "prompt_embeds" in prompt:
             raise TypeError("Cannot pass embeddings prompt to encoder-decoder models")
 
diff --git a/vllm/renderers/params.py b/vllm/renderers/params.py
index a2c95690c792..d5c89abc043b 100644
--- a/vllm/renderers/params.py
+++ b/vllm/renderers/params.py
@@ -198,13 +198,20 @@ def __post_init__(self) -> None:
         max_input_tokens = self.max_input_tokens
         truncate_prompt_tokens = self.truncate_prompt_tokens
 
+        if self.truncation_side not in (None, "left", "right"):
+            raise VLLMValidationError(
+                "`truncation_side` must be either 'left' or 'right'.",
+                parameter="truncation_side",
+                value=self.truncation_side,
+            )
+
         if (
             max_output_tokens is not None
             and max_total_tokens is not None
             and max_output_tokens > max_total_tokens
         ):
             raise VLLMValidationError(
-                f"{self.max_output_tokens_param}={max_output_tokens}"
+                f"{self.max_output_tokens_param}={max_output_tokens} "
                 f"cannot be greater than "
                 f"{self.max_total_tokens_param}={max_total_tokens=}. "
                 f"Please request fewer output tokens.",
@@ -234,6 +241,9 @@ def with_kwargs(self, **tokenization_kwargs: Any):
         truncate_prompt_tokens = tokenization_kwargs.pop(
             "truncate_prompt_tokens", self.truncate_prompt_tokens
         )
+        truncation_side = tokenization_kwargs.pop(
+            "truncation_side", self.truncation_side
+        )
         do_lower_case = tokenization_kwargs.pop("do_lower_case", self.do_lower_case)
         add_special_tokens = tokenization_kwargs.pop(
             "add_special_tokens", self.add_special_tokens
@@ -279,7 +289,7 @@ def with_kwargs(self, **tokenization_kwargs: Any):
             ),
             pad_prompt_tokens=pad_prompt_tokens,
             truncate_prompt_tokens=truncate_prompt_tokens,
-            truncation_side=self.truncation_side,
+            truncation_side=truncation_side,
             do_lower_case=do_lower_case,
             add_special_tokens=add_special_tokens,
             needs_detokenization=needs_detokenization,
@@ -295,11 +305,11 @@ def get_encode_kwargs(self) -> dict[str, Any]:
             # while still failing `self._token_len_check` as expected by users
             max_length = self.max_input_tokens + 1
 
-        # Left-side truncation requires the full token sequence so we can
-        # slice from the end in _token_truncation.  Disable HF-level
-        # truncation (which would incorrectly truncate from the right for
-        # pooling models) and let _token_truncation handle it.
-        if self.truncation_side == "left":
+        # Explicit truncation-side overrides require the full token sequence so
+        # we can slice from the requested side in _token_truncation. Disable
+        # tokenizer-level truncation because generation tokenizers default to
+        # left truncation while callers may request right truncation.
+        if self.truncation_side is not None and self.truncate_prompt_tokens is not None:
             return dict(
                 truncation=False,
                 add_special_tokens=self.add_special_tokens,
diff --git a/vllm/renderers/registry.py b/vllm/renderers/registry.py
index 85a34a986720..8263dd713a49 100644
--- a/vllm/renderers/registry.py
+++ b/vllm/renderers/registry.py
@@ -21,8 +21,9 @@
 
 _VLLM_RENDERERS = {
     "deepseek_v32": ("deepseek_v32", "DeepseekV32Renderer"),
-    "hf": ("hf", "HfRenderer"),
+    "deepseek_v4": ("deepseek_v4", "DeepseekV4Renderer"),
     "grok2": ("grok2", "Grok2Renderer"),
+    "hf": ("hf", "HfRenderer"),
     "kimi_audio": ("hf", "HfRenderer"),
     "mistral": ("mistral", "MistralRenderer"),
     "qwen_vl": ("hf", "HfRenderer"),
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 3a2a04fd77d4..e24bd4eef082 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -25,6 +25,10 @@
 _SAMPLING_EPS = 1e-5
 _MAX_TEMP = 1e-2
 
+MAX_LOGPROB_TOKEN_IDS = 128
+"""Upper bound on `SamplingParams.logprob_token_ids` list length. Must match
+the per-request row width allocated by the sampler's `LogprobTokenIdsState`."""
+
 
 class SamplingType(IntEnum):
     GREEDY = 0
@@ -153,6 +157,14 @@ class RequestOutputKind(Enum):
     FINAL_ONLY = 2
 
 
+def _is_non_tekken_mistral(tokenizer: TokenizerLike) -> bool:
+    return is_mistral_tokenizer(tokenizer) and not tokenizer.is_tekken
+
+
+def _get_llg_tokenizer(tokenizer: TokenizerLike) -> Any:
+    return tokenizer.llg_tokenizer if is_mistral_tokenizer(tokenizer) else None
+
+
 class SamplingParams(
     PydanticMsgspecMixin,
     msgspec.Struct,
@@ -232,6 +244,12 @@ class SamplingParams(
     prompt_logprobs: int | None = None
     """Number of log probabilities to return per prompt token.
     When set to -1, return all `vocab_size` log probabilities."""
+    logprob_token_ids: list[int] | None = None
+    """Specific token IDs to return logprobs for. More efficient than
+    logprobs=-1 when you only need logprobs for a small set of tokens.
+    When set, logprobs for exactly these token IDs will be returned,
+    in addition to the sampled token. This is useful for scoring tasks
+    where you want to compare probabilities of specific label tokens."""
     flat_logprobs: bool = False
     """Whether to return logprobs in flatten format (i.e. FlatLogprob)
     for better performance.
@@ -276,6 +294,13 @@ class SamplingParams(
     """Arbitrary additional args, that can be used by custom sampling
     implementations, plugins, etc. Not used by any in-tree sampling
     implementations."""
+    routed_experts_prompt_start: int = 0
+    """When enable_return_routed_experts is active, skip the first
+    routed_experts_prompt_start prompt tokens from the returned routing
+    data. In multi-turn agent scenarios, set this to the length of the
+    already-returned prefix to avoid duplicating routing for prompt tokens
+    covered by earlier turns. Default 0 returns routing for all prompt
+    tokens."""
 
     # Fields used for bad words
     bad_words: list[str] | None = None
@@ -517,6 +542,12 @@ def _verify_args(self) -> None:
                 "stop strings are only supported when detokenize is True. "
                 "Set detokenize=True to use stop."
             )
+        assert isinstance(self.bad_words, list)
+        if any(not bad_word for bad_word in self.bad_words):
+            raise ValueError(
+                f"bad_words cannot contain an empty string. "
+                f"Got bad_words={self.bad_words}"
+            )
 
     def _verify_greedy_sampling(self) -> None:
         if self.n > 1:
@@ -614,6 +645,16 @@ def bad_words_token_ids(self) -> list[list[int]] | None:
         # For internal use only. Backward compatibility not guaranteed
         return self._bad_words_token_ids
 
+    @property
+    def num_logprobs(self) -> int | None:
+        """Number of sample logprobs to return per output token, or `None` if
+        no sample logprobs were requested. Takes `logprob_token_ids` into
+        account: when `logprobs` is unset but `logprob_token_ids` is set,
+        returns `len(logprob_token_ids)`."""
+        if self.logprobs is not None:
+            return self.logprobs
+        return len(self.logprob_token_ids) if self.logprob_token_ids else None
+
     def clone(self) -> "SamplingParams":
         """If skip_clone is True, uses shallow copy instead of deep copy."""
         if self.skip_clone:
@@ -652,6 +693,25 @@ def _validate_logprobs(self, model_config: ModelConfig) -> None:
                     value=num_logprobs,
                 )
 
+        # Validate logprob_token_ids.
+        if self.logprob_token_ids is not None:
+            n = len(self.logprob_token_ids)
+            if n > MAX_LOGPROB_TOKEN_IDS:
+                raise VLLMValidationError(
+                    f"Requested logprob_token_ids of length {n}, "
+                    f"which is greater than max allowed: {MAX_LOGPROB_TOKEN_IDS}",
+                    parameter="logprob_token_ids",
+                    value=n,
+                )
+            if self.logprobs is not None and self.logprobs != n:
+                raise VLLMValidationError(
+                    f"When both logprobs and logprob_token_ids are set, "
+                    f"logprobs must equal len(logprob_token_ids). Got "
+                    f"logprobs={self.logprobs}, len(logprob_token_ids)={n}.",
+                    parameter="logprob_token_ids",
+                    value=n,
+                )
+
         # Validate prompt logprobs.
         if num_prompt_logprobs := self.prompt_logprobs:
             if num_prompt_logprobs == -1:
@@ -795,17 +855,21 @@ def _validate_structured_outputs(
             # xgrammar with no fallback
             validate_xgrammar_grammar(self)
         elif backend.startswith("guidance"):
+            if _is_non_tekken_mistral(tokenizer=tokenizer):
+                raise ValueError(
+                    "Non-tekken Mistral tokenizers are not supported for the 'guidance'"
+                    " structured output backend. Please either use a more recent "
+                    "Mistral model, the ['xgrammar', 'outlines'] "
+                    "backends or tokenizer_mode='hf' instead."
+                )
             # TODO: ideally we would have the LLTokenizer here as Lark syntax
             # allows <|special_token|> and similar, see
             # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
             # Without tokenizer these are disallowed in grammars.
-            if is_mistral_tokenizer(tokenizer):
-                raise ValueError(
-                    "Mistral tokenizer is not supported for the 'guidance' "
-                    "structured output backend. Please use ['xgrammar', 'outlines'] "
-                    "backends or tokenizer_mode='hf' instead."
-                )
-            validate_guidance_grammar(self, tokenizer=None)
+            validate_guidance_grammar(
+                self,
+                tokenizer=_get_llg_tokenizer(tokenizer),
+            )
         elif backend == "outlines":
             # outlines backend
             validate_structured_output_request_outlines(self)
@@ -833,24 +897,28 @@ def _validate_structured_outputs(
                 # or includes some jsonschema feature(s) that
                 # are not supported in xgrammar.
 
+                skip_guidance = _is_non_tekken_mistral(tokenizer)
+
                 # Check if schema has features unsupported by guidance
                 so_params = self.structured_outputs
-                skip_guidance = False
-                if so_params.json:
+                if not skip_guidance and so_params.json:
                     if isinstance(so_params.json, str):
                         schema = json_mod.loads(so_params.json)
                     else:
                         schema = so_params.json
                     skip_guidance = has_guidance_unsupported_json_features(schema)
 
-                if is_mistral_tokenizer(tokenizer) or skip_guidance:
-                    # Fall back to outlines if the tokenizer is Mistral
-                    # or if schema contains features unsupported by guidance
+                if skip_guidance:
+                    # Fall back to outlines if the tokenizer is non-tekken Mistral or
+                    # the schema contains features unsupported by guidance
                     validate_structured_output_request_outlines(self)
                     self.structured_outputs._backend = "outlines"
                 else:
                     # Fall back to guidance by default.
-                    validate_guidance_grammar(self, tokenizer=None)
+                    validate_guidance_grammar(
+                        self,
+                        tokenizer=_get_llg_tokenizer(tokenizer),
+                    )
                     self.structured_outputs._backend = "guidance"
             # Remember that this backend was set automatically
             self.structured_outputs._backend_was_auto = True
diff --git a/vllm/tasks.py b/vllm/tasks.py
index 4e324c188519..017bc31197b6 100644
--- a/vllm/tasks.py
+++ b/vllm/tasks.py
@@ -16,6 +16,11 @@
 POOLING_TASKS: tuple[PoolingTask, ...] = get_args(PoolingTask)
 
 ScoreType = Literal["bi-encoder", "cross-encoder", "late-interaction"]
+SCORE_TYPE_MAP: dict[PoolingTask, ScoreType] = {
+    "embed": "bi-encoder",
+    "classify": "cross-encoder",
+    "token_embed": "late-interaction",
+}
 
 FrontendTask = Literal["render"]
 FRONTEND_TASKS: tuple[FrontendTask, ...] = get_args(FrontendTask)
diff --git a/vllm/tokenizers/__init__.py b/vllm/tokenizers/__init__.py
index 2daba409881f..6531989a9f35 100644
--- a/vllm/tokenizers/__init__.py
+++ b/vllm/tokenizers/__init__.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from .hf import maybe_make_thread_pool
 from .protocol import TokenizerLike
 from .registry import (
     TokenizerRegistry,
@@ -15,4 +16,5 @@
     "cached_get_tokenizer",
     "get_tokenizer",
     "cached_tokenizer_from_config",
+    "maybe_make_thread_pool",
 ]
diff --git a/vllm/tokenizers/deepseek_v4.py b/vllm/tokenizers/deepseek_v4.py
new file mode 100644
index 000000000000..2a6aaaf73975
--- /dev/null
+++ b/vllm/tokenizers/deepseek_v4.py
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
+from typing import Any
+
+from transformers import PreTrainedTokenizerFast
+
+from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
+
+from .deepseek_v4_encoding import encode_messages
+from .hf import HfTokenizer, get_cached_tokenizer
+from .protocol import TokenizerLike
+
+
+def get_deepseek_v4_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
+    """
+    Wraps a tokenizer to use the custom DeepSeek V4 chat template encoding.
+    """
+    dsv4_tokenizer = copy.copy(tokenizer)
+
+    added_vocab = tokenizer.get_added_vocab()
+    added_vocab_size = len(added_vocab)
+    tokenizer_vocab_size = tokenizer.vocab_size
+
+    class _DeepseekV4Tokenizer(tokenizer.__class__):  # type: ignore
+        def apply_chat_template(
+            self,
+            messages: list["ChatCompletionMessageParam"],
+            tools: list[dict[str, Any]] | None = None,
+            **kwargs,
+        ) -> str | list[int]:
+            thinking = kwargs.get("thinking", False)
+            enable_thinking = kwargs.get("enable_thinking", False)
+            thinking = thinking or enable_thinking
+            thinking_mode = "thinking" if thinking else "chat"
+
+            conversation = kwargs.get("conversation", messages)
+            messages = conversation.copy()
+            if tools is not None and len(tools) > 0:
+                messages.insert(0, {"role": "system"})
+                messages[0]["tools"] = tools  # type: ignore[typeddict-unknown-key]
+
+            reasoning_effort = kwargs.get("reasoning_effort")
+            if not isinstance(reasoning_effort, str):
+                reasoning_effort = None
+            elif reasoning_effort == "none":
+                thinking_mode = "chat"
+                reasoning_effort = None
+            elif reasoning_effort in ("max", "xhigh"):
+                reasoning_effort = "max"
+            else:
+                reasoning_effort = "high"
+
+            encode_config = dict(
+                thinking_mode=thinking_mode,
+                drop_thinking=kwargs.get("drop_thinking", True),
+                reasoning_effort=reasoning_effort,
+            )
+
+            prompt_str = encode_messages(messages, **encode_config)  # type: ignore
+
+            if kwargs.get("tokenize", True):
+                tokenizer_kwargs = {
+                    k: kwargs[k] for k in ("truncation", "max_length") if k in kwargs
+                }
+                return self.encode(
+                    prompt_str,
+                    add_special_tokens=False,
+                    **tokenizer_kwargs,
+                )
+
+            return prompt_str
+
+        def num_special_tokens_to_add(self) -> int:
+            return len(self.encode(""))
+
+        def __len__(self) -> int:
+            return tokenizer_vocab_size + added_vocab_size
+
+        def get_added_vocab(self) -> dict[str, int]:
+            return added_vocab.copy()
+
+        def __reduce__(self):
+            return get_deepseek_v4_tokenizer, (tokenizer,)
+
+    _DeepseekV4Tokenizer.__name__ = f"DSV4{tokenizer.__class__.__name__}"
+
+    dsv4_tokenizer.__class__ = _DeepseekV4Tokenizer
+    return dsv4_tokenizer
+
+
+class DeepseekV4Tokenizer(TokenizerLike):
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
+        tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs)
+        return get_cached_tokenizer(get_deepseek_v4_tokenizer(tokenizer))
diff --git a/vllm/tokenizers/deepseek_v4_encoding.py b/vllm/tokenizers/deepseek_v4_encoding.py
new file mode 100644
index 000000000000..6895771e2f59
--- /dev/null
+++ b/vllm/tokenizers/deepseek_v4_encoding.py
@@ -0,0 +1,757 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa
+# fmt: off
+
+"""
+DeepSeek-V4 Encoding
+
+A self-contained implementation for encoding/decoding DeepSeek-V4 chat messages
+with tool calling, thinking mode, and quick instruction task support.
+"""
+
+from typing import Any, Dict, List, Union, Optional, Tuple
+import copy
+import json
+
+import regex as re
+
+# ============================================================
+# Special Tokens
+# ============================================================
+
+bos_token: str = "<｜begin▁of▁sentence｜>"
+eos_token: str = "<｜end▁of▁sentence｜>"
+thinking_start_token: str = "<think>"
+thinking_end_token: str = "</think>"
+dsml_token: str = "｜DSML｜"
+
+USER_SP_TOKEN = "<｜User｜>"
+ASSISTANT_SP_TOKEN = "<｜Assistant｜>"
+LATEST_REMINDER_SP_TOKEN = "<｜latest_reminder｜>"
+
+# Task special tokens for internal classification tasks
+DS_TASK_SP_TOKENS = {
+    "action": "<｜action｜>",
+    "query": "<｜query｜>",
+    "authority": "<｜authority｜>",
+    "domain": "<｜domain｜>",
+    "title": "<｜title｜>",
+    "read_url": "<｜read_url｜>",
+}
+VALID_TASKS = set(DS_TASK_SP_TOKENS.keys())
+
+# ============================================================
+# Templates
+# ============================================================
+
+system_msg_template: str = "{content}"
+user_msg_template: str = "{content}"
+latest_reminder_msg_template: str = "{content}"
+assistant_msg_template: str = "{reasoning}{content}{tool_calls}" + eos_token
+assistant_msg_wo_eos_template: str = "{reasoning}{content}{tool_calls}"
+thinking_template: str = "{reasoning}"
+
+response_format_template: str = (
+    "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}"
+)
+tool_call_template: str = (
+    "<{dsml_token}invoke name=\"{name}\">\n{arguments}\n</{dsml_token}invoke>"
+)
+tool_calls_template = (
+    "<{dsml_token}{tc_block_name}>\n{tool_calls}\n</{dsml_token}{tc_block_name}>"
+)
+tool_calls_block_name: str = "tool_calls"
+
+tool_output_template: str = (
+    "<tool_result>{content}</tool_result>"
+)
+
+REASONING_EFFORT_MAX = (
+    "Reasoning Effort: Absolute maximum with no shortcuts permitted.\n"
+    "You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n"
+    "Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.\n\n"
+)
+
+TOOLS_TEMPLATE = """## Tools
+
+You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<{dsml_token}tool_calls>" block like the following:
+
+<{dsml_token}tool_calls>
+<{dsml_token}invoke name="$TOOL_NAME">
+<{dsml_token}parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</{dsml_token}parameter>
+...
+</{dsml_token}invoke>
+<{dsml_token}invoke name="$TOOL_NAME2">
+...
+</{dsml_token}invoke>
+</{dsml_token}tool_calls>
+
+String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
+
+If thinking_mode is enabled (triggered by {thinking_start_token}), you MUST output your complete reasoning inside {thinking_start_token}...{thinking_end_token} BEFORE any tool calls or final response.
+
+Otherwise, output directly after {thinking_end_token} with tool calls or final response.
+
+### Available Tool Schemas
+
+{tool_schemas}
+
+You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
+"""
+
+# ============================================================
+# Utility Functions
+# ============================================================
+
+def to_json(value: Any) -> str:
+    """Serialize a value to JSON string."""
+    try:
+        return json.dumps(value, ensure_ascii=False)
+    except Exception:
+        return json.dumps(value, ensure_ascii=True)
+
+
+def tools_from_openai_format(tools):
+    """Extract function definitions from OpenAI-format tool list."""
+    return [tool["function"] for tool in tools]
+
+
+def tool_calls_from_openai_format(tool_calls):
+    """Convert OpenAI-format tool calls to internal format."""
+    return [
+        {
+            "name": tool_call["function"]["name"],
+            "arguments": tool_call["function"]["arguments"],
+        }
+        for tool_call in tool_calls
+    ]
+
+
+def tool_calls_to_openai_format(tool_calls):
+    """Convert internal tool calls to OpenAI format."""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": tool_call["name"],
+                "arguments": tool_call["arguments"],
+            }
+        }
+        for tool_call in tool_calls
+    ]
+
+
+def encode_arguments_to_dsml(tool_call: Dict[str, Any]) -> str:
+    """
+    Encode tool call arguments into DSML parameter format.
+
+    Args:
+        tool_call: Dict with "name" and "arguments" keys.
+
+    Returns:
+        DSML-formatted parameter string.
+    """
+    p_dsml_template = '<{dsml_token}parameter name="{key}" string="{is_str}">{value}</{dsml_token}parameter>'
+    P_dsml_strs = []
+
+    if isinstance(tool_call["arguments"], str):
+        arguments = json.loads(tool_call["arguments"])
+    else:
+        arguments = tool_call["arguments"]
+
+    for k, v in arguments.items():
+        p_dsml_str = p_dsml_template.format(
+            dsml_token=dsml_token,
+            key=k,
+            is_str="true" if isinstance(v, str) else "false",
+            value=v if isinstance(v, str) else to_json(v),
+        )
+        P_dsml_strs.append(p_dsml_str)
+
+    return "\n".join(P_dsml_strs)
+
+
+def decode_dsml_to_arguments(tool_name: str, tool_args: Dict[str, Tuple[str, str]]) -> Dict[str, str]:
+    """
+    Decode DSML parameters back to a tool call dict.
+
+    Args:
+        tool_name: Name of the tool.
+        tool_args: Dict mapping param_name -> (value, is_string_flag).
+
+    Returns:
+        Dict with "name" and "arguments" (JSON string) keys.
+    """
+    def _decode_value(key: str, value: str, string: str):
+        if string == "true":
+            value = to_json(value)
+        return f"{to_json(key)}: {value}"
+
+    tool_args_json = "{" + ", ".join([_decode_value(k, v, string=is_str) for k, (v, is_str) in tool_args.items()]) + "}"
+    return dict(name=tool_name, arguments=tool_args_json)
+
+
+def render_tools(tools: List[Dict[str, Union[str, Dict[str, Any]]]]) -> str:
+    """
+    Render tool schemas into the system prompt format.
+
+    Args:
+        tools: List of tool schema dicts (each with name, description, parameters).
+
+    Returns:
+        Formatted tools section string.
+    """
+    tools_json = [to_json(t) for t in tools]
+
+    return TOOLS_TEMPLATE.format(
+        tool_schemas="\n".join(tools_json),
+        dsml_token=dsml_token,
+        thinking_start_token=thinking_start_token,
+        thinking_end_token=thinking_end_token,
+    )
+
+
+def find_last_user_index(messages: List[Dict[str, Any]]) -> int:
+    """Find the index of the last user/developer message."""
+    last_user_index = -1
+    for idx in range(len(messages) - 1, -1, -1):
+        if messages[idx].get("role") in ["user", "developer"]:
+            last_user_index = idx
+            break
+    return last_user_index
+
+
+# ============================================================
+# Message Rendering
+# ============================================================
+
+def render_message(index: int, messages: List[Dict[str, Any]], thinking_mode: str, drop_thinking: bool = True, reasoning_effort: Optional[str] = None) -> str:
+    """
+    Render a single message at the given index into its encoded string form.
+
+    This is the core function that converts each message in the conversation
+    into the DeepSeek-V4 format.
+
+    Args:
+        index: Index of the message to render.
+        messages: Full list of messages in the conversation.
+        thinking_mode: Either "chat" or "thinking".
+        drop_thinking: Whether to drop reasoning content from earlier turns.
+        reasoning_effort: Optional reasoning effort level ("max", "high", or None).
+
+    Returns:
+        Encoded string for this message.
+    """
+    assert 0 <= index < len(messages)
+    assert thinking_mode in ["chat", "thinking"], f"Invalid thinking_mode `{thinking_mode}`"
+
+    prompt = ""
+    msg = messages[index]
+    last_user_idx = find_last_user_index(messages)
+
+    role = msg.get("role")
+    content = msg.get("content")
+    tools = msg.get("tools")
+    response_format = msg.get("response_format")
+    tool_calls = msg.get("tool_calls")
+    reasoning = msg.get("reasoning")
+    wo_eos = msg.get("wo_eos", False)
+
+    if tools:
+        tools = tools_from_openai_format(tools)
+    if tool_calls:
+        tool_calls = tool_calls_from_openai_format(tool_calls)
+
+    # Reasoning effort prefix (only at index 0 in thinking mode with max effort)
+    assert reasoning_effort in ['max', None, 'high'], f"Invalid reasoning effort: {reasoning_effort}"
+    if index == 0 and thinking_mode == "thinking" and reasoning_effort == 'max':
+        prompt += REASONING_EFFORT_MAX
+
+    if role == "system":
+        prompt += system_msg_template.format(content=content or "")
+        if tools:
+            prompt += "\n\n" + render_tools(tools)
+        if response_format:
+            prompt += "\n\n" + response_format_template.format(schema=to_json(response_format))
+
+    elif role == "developer":
+        assert content, f"Invalid message for role `{role}`: {msg}"
+
+        content_developer = USER_SP_TOKEN
+        content_developer += content
+
+        if tools:
+            content_developer += "\n\n" + render_tools(tools)
+        if response_format:
+            content_developer += "\n\n" + response_format_template.format(schema=to_json(response_format))
+
+        prompt += user_msg_template.format(content=content_developer)
+
+    elif role == "user":
+        prompt += USER_SP_TOKEN
+
+        # Handle content blocks (tool results mixed with text)
+        content_blocks = msg.get("content_blocks")
+        if content_blocks:
+            parts = []
+            for block in content_blocks:
+                block_type = block.get("type")
+                if block_type == "text":
+                    parts.append(block.get("text", ""))
+                elif block_type == "tool_result":
+                    tool_content = block.get("content", "")
+                    if isinstance(tool_content, list):
+                        text_parts = []
+                        for b in tool_content:
+                            if b.get("type") == "text":
+                                text_parts.append(b.get("text", ""))
+                            else:
+                                text_parts.append(f"[Unsupported {b.get('type')}]")
+                        tool_content = "\n\n".join(text_parts)
+                    parts.append(tool_output_template.format(content=tool_content))
+                else:
+                    parts.append(f"[Unsupported {block_type}]")
+            prompt += "\n\n".join(parts)
+        else:
+            prompt += content or ""
+
+    elif role == "latest_reminder":
+        prompt += LATEST_REMINDER_SP_TOKEN + latest_reminder_msg_template.format(content=content)
+
+    elif role == "tool":
+        raise NotImplementedError("deepseek_v4 merges tool messages into user; please preprocess with merge_tool_messages()")
+
+    elif role == "assistant":
+        thinking_part = ""
+        tc_content = ""
+
+        if tool_calls:
+            tc_list = [
+                tool_call_template.format(
+                    dsml_token=dsml_token,
+                    name=tc.get("name"),
+                    arguments=encode_arguments_to_dsml(tc)
+                )
+                for tc in tool_calls
+            ]
+            tc_content += '\n\n' + tool_calls_template.format(
+                dsml_token=dsml_token,
+                tool_calls="\n".join(tc_list),
+                tc_block_name=tool_calls_block_name,
+            )
+
+        summary_content = content or ""
+        reasoning = reasoning or ""
+
+        # Check if previous message has a task - if so, this is a task output (no thinking)
+        prev_has_task = index - 1 >= 0 and messages[index - 1].get("task") is not None
+
+        if thinking_mode == "thinking" and not prev_has_task:
+            if not drop_thinking or index > last_user_idx:
+                thinking_part = thinking_template.format(reasoning=reasoning) + thinking_end_token
+            else:
+                thinking_part = ""
+
+        if wo_eos:
+            prompt += assistant_msg_wo_eos_template.format(
+                reasoning=thinking_part,
+                content=summary_content,
+                tool_calls=tc_content,
+            )
+        else:
+            prompt += assistant_msg_template.format(
+                reasoning=thinking_part,
+                content=summary_content,
+                tool_calls=tc_content,
+            )
+    else:
+        raise NotImplementedError(f"Unknown role: {role}")
+
+    # Append transition tokens based on what follows
+    if index + 1 < len(messages) and messages[index + 1].get("role") not in ["assistant", "latest_reminder"]:
+        return prompt
+
+    task = messages[index].get("task")
+    if task is not None:
+        # Task special token for internal classification tasks
+        assert task in VALID_TASKS, f"Invalid task: '{task}'. Valid tasks are: {list(VALID_TASKS)}"
+        task_sp_token = DS_TASK_SP_TOKENS[task]
+
+        if task != "action":
+            # Non-action tasks: append task sp token directly after the message
+            prompt += task_sp_token
+        else:
+            # Action task: append Assistant + thinking token + action sp token
+            prompt += ASSISTANT_SP_TOKEN
+            prompt += thinking_end_token if thinking_mode != "thinking" else thinking_start_token
+            prompt += task_sp_token
+
+    elif messages[index].get("role") in ["user", "developer"]:
+        # Normal generation: append Assistant + thinking token
+        prompt += ASSISTANT_SP_TOKEN
+        if not drop_thinking and thinking_mode == "thinking":
+            prompt += thinking_start_token
+        elif drop_thinking and thinking_mode == "thinking" and index >= last_user_idx:
+            prompt += thinking_start_token
+        else:
+            prompt += thinking_end_token
+
+    return prompt
+
+
+# ============================================================
+# Preprocessing
+# ============================================================
+
+def merge_tool_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Merge tool messages into the preceding user message using content_blocks format.
+
+    DeepSeek-V4 does not have a standalone "tool" role; instead, tool results
+    are encoded as <tool_result> blocks within user messages.
+
+    This function converts a standard OpenAI-format conversation (with separate
+    "tool" role messages) into V4 format where tool results are merged into
+    user messages.
+
+    Args:
+        messages: List of message dicts in OpenAI format.
+
+    Returns:
+        Processed message list with tool messages merged into user messages.
+    """
+    merged: List[Dict[str, Any]] = []
+
+    for msg in messages:
+        msg = copy.deepcopy(msg)
+        role = msg.get("role")
+
+        if role == "tool":
+            # Convert tool message to a user message with tool_result block
+            tool_block = {
+                "type": "tool_result",
+                "tool_use_id": msg.get("tool_call_id", ""),
+                "content": msg.get("content", ""),
+            }
+            # Merge into previous message if it's already a user (merged tool)
+            if merged and merged[-1].get("role") == "user" and "content_blocks" in merged[-1]:
+                merged[-1]["content_blocks"].append(tool_block)
+            else:
+                merged.append({
+                    "role": "user",
+                    "content_blocks": [tool_block],
+                })
+        elif role == "user":
+            text_block = {"type": "text", "text": msg.get("content", "")}
+            if merged and merged[-1].get("role") == "user" and "content_blocks" in merged[-1] and merged[-1].get("task") is None:
+                merged[-1]["content_blocks"].append(text_block)
+            else:
+                new_msg = {
+                    "role": "user",
+                    "content": msg.get("content", ""),
+                    "content_blocks": [text_block],
+                }
+                # Preserve extra fields (task, wo_eos, mask, etc.)
+                for key in ("task", "wo_eos", "mask"):
+                    if key in msg:
+                        new_msg[key] = msg[key]
+                merged.append(new_msg)
+        else:
+            merged.append(msg)
+
+    return merged
+
+
+def sort_tool_results_by_call_order(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Sort tool_result blocks within user messages by the order of tool_calls
+    in the preceding assistant message.
+
+    Args:
+        messages: Preprocessed message list (after merge_tool_messages).
+
+    Returns:
+        Message list with sorted tool result blocks.
+    """
+    last_tool_call_order: Dict[str, int] = {}
+
+    for msg in messages:
+        role = msg.get("role")
+        if role == "assistant" and msg.get("tool_calls"):
+            last_tool_call_order = {}
+            for idx, tc in enumerate(msg["tool_calls"]):
+                tc_id = tc.get("id") or tc.get("function", {}).get("id", "")
+                if tc_id:
+                    last_tool_call_order[tc_id] = idx
+
+        elif role == "user" and msg.get("content_blocks"):
+            tool_blocks = [b for b in msg["content_blocks"] if b.get("type") == "tool_result"]
+            if len(tool_blocks) > 1 and last_tool_call_order:
+                sorted_blocks = sorted(
+                    tool_blocks,
+                    key=lambda b: last_tool_call_order.get(b.get("tool_use_id", ""), 0)
+                )
+                sorted_idx = 0
+                new_blocks = []
+                for block in msg["content_blocks"]:
+                    if block.get("type") == "tool_result":
+                        new_blocks.append(sorted_blocks[sorted_idx])
+                        sorted_idx += 1
+                    else:
+                        new_blocks.append(block)
+                msg["content_blocks"] = new_blocks
+
+    return messages
+
+
+# ============================================================
+# Main Encoding Function
+# ============================================================
+
+def encode_messages(
+    messages: List[Dict[str, Any]],
+    thinking_mode: str,
+    context: Optional[List[Dict[str, Any]]] = None,
+    drop_thinking: bool = True,
+    add_default_bos_token: bool = True,
+    reasoning_effort: Optional[str] = None,
+) -> str:
+    """
+    Encode a list of messages into the DeepSeek-V4 prompt format.
+
+    This is the main entry point for encoding conversations. It handles:
+    - BOS token insertion
+    - Thinking mode with optional reasoning content dropping
+    - Tool message merging into user messages
+    - Multi-turn conversation context
+
+    Args:
+        messages: List of message dicts to encode.
+        thinking_mode: Either "chat" or "thinking".
+        context: Optional preceding context messages (already encoded prefix).
+        drop_thinking: If True, drop reasoning from earlier assistant turns
+                      (only keep reasoning for messages after the last user message).
+        add_default_bos_token: Whether to prepend BOS token at conversation start.
+        reasoning_effort: Optional reasoning effort level ("max", "high", or None).
+
+    Returns:
+        The encoded prompt string.
+    """
+    context = context if context else []
+
+    # Preprocess: merge tool messages and sort tool results
+    messages = merge_tool_messages(messages)
+    messages = sort_tool_results_by_call_order(context + messages)[len(context):]
+    if context:
+        context = merge_tool_messages(context)
+        context = sort_tool_results_by_call_order(context)
+
+    full_messages = context + messages
+
+    prompt = bos_token if add_default_bos_token and len(context) == 0 else ""
+
+    # Resolve drop_thinking: if any message has tools defined, don't drop thinking
+    effective_drop_thinking = drop_thinking
+    if any(m.get("tools") for m in full_messages):
+        effective_drop_thinking = False
+
+    if thinking_mode == "thinking" and effective_drop_thinking:
+        full_messages = _drop_thinking_messages(full_messages)
+        # After dropping, recalculate how many messages to render
+        # (context may have shrunk too)
+        num_to_render = len(full_messages) - len(_drop_thinking_messages(context))
+        context_len = len(full_messages) - num_to_render
+    else:
+        num_to_render = len(messages)
+        context_len = len(context)
+
+    for idx in range(num_to_render):
+        prompt += render_message(
+            idx + context_len,
+            full_messages,
+            thinking_mode=thinking_mode,
+            drop_thinking=effective_drop_thinking,
+            reasoning_effort=reasoning_effort,
+        )
+
+    return prompt
+
+
+def _drop_thinking_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Drop reasoning and non-essential messages before the last user message.
+
+    Behavior:
+    - Messages with role in ["user", "system", "tool", "latest_reminder"] are always kept.
+    - Messages at or after the last user index are always kept.
+    - Assistant messages before the last user get reasoning removed.
+    - Developer messages before the last user are dropped entirely.
+    """
+    last_user_idx = find_last_user_index(messages)
+    result = []
+    keep_roles = {"user", "system", "tool", "latest_reminder", "direct_search_results"}
+
+    for idx, msg in enumerate(messages):
+        role = msg.get("role")
+        if role in keep_roles or idx >= last_user_idx:
+            result.append(msg)
+        elif role == "assistant":
+            msg = copy.copy(msg)
+            msg.pop("reasoning", None)
+            result.append(msg)
+        # developer and other roles before last_user_idx are dropped
+
+    return result
+
+
+# ============================================================
+# Parsing (Decoding model output)
+# ============================================================
+
+def _read_until_stop(index: int, text: str, stop: List[str]) -> Tuple[int, str, Optional[str]]:
+    """
+    Read text from index until one of the stop strings is found.
+
+    Returns:
+        Tuple of (new_index, content_before_stop, matched_stop_string_or_None).
+    """
+    min_pos = len(text)
+    matched_stop = None
+
+    for s in stop:
+        pos = text.find(s, index)
+        if pos != -1 and pos < min_pos:
+            min_pos = pos
+            matched_stop = s
+
+    if matched_stop:
+        content = text[index:min_pos]
+        return min_pos + len(matched_stop), content, matched_stop
+    else:
+        content = text[index:]
+        return len(text), content, None
+
+
+def parse_tool_calls(index: int, text: str) -> Tuple[int, Optional[str], List[Dict[str, str]]]:
+    """
+    Parse DSML tool calls from text starting at the given index.
+
+    Args:
+        index: Starting position in text.
+        text: The full text to parse.
+
+    Returns:
+        Tuple of (new_index, last_stop_token, list_of_tool_call_dicts).
+        Each tool call dict has "name" and "arguments" keys.
+    """
+    tool_calls: List[Dict[str, Any]] = []
+    stop_token = None
+    tool_calls_end_token = f"</{dsml_token}{tool_calls_block_name}>"
+
+    while index < len(text):
+        index, content_before, stop_token = _read_until_stop(index, text, [f"<{dsml_token}invoke", tool_calls_end_token])
+        if content_before != ">\n":
+            raise ValueError(f"Tool call format error: expected '>\\n' but got '{content_before}'")
+
+        if stop_token == tool_calls_end_token:
+            break
+
+        if stop_token is None:
+            raise ValueError("Missing special token in tool calls")
+
+        index, tool_name_content, stop_token = _read_until_stop(index, text, [f"<{dsml_token}parameter", f"</{dsml_token}invoke"])
+
+        p_tool_name = re.findall(r'^\s*name="(.*?)">\n$', tool_name_content, flags=re.DOTALL)
+        if len(p_tool_name) != 1:
+            raise ValueError(f"Tool name format error: '{tool_name_content}'")
+        tool_name = p_tool_name[0]
+
+        tool_args: Dict[str, Tuple[str, str]] = {}
+        while stop_token == f"<{dsml_token}parameter":
+            index, param_content, stop_token = _read_until_stop(index, text, [f"/{dsml_token}parameter"])
+
+            param_kv = re.findall(r'^ name="(.*?)" string="(true|false)">(.*?)<$', param_content, flags=re.DOTALL)
+            if len(param_kv) != 1:
+                raise ValueError(f"Parameter format error: '{param_content}'")
+            param_name, string, param_value = param_kv[0]
+
+            if param_name in tool_args:
+                raise ValueError(f"Duplicate parameter name: '{param_name}'")
+            tool_args[param_name] = (param_value, string)
+
+            index, content, stop_token = _read_until_stop(index, text, [f"<{dsml_token}parameter", f"</{dsml_token}invoke"])
+            if content != ">\n":
+                raise ValueError(f"Parameter format error: expected '>\\n' but got '{content}'")
+
+        tool_call = decode_dsml_to_arguments(tool_name=tool_name, tool_args=tool_args)
+        tool_calls.append(tool_call)
+
+    return index, stop_token, tool_calls
+
+
+def parse_message_from_completion_text(text: str, thinking_mode: str) -> Dict[str, Any]:
+    """
+    Parse a model completion text into a structured assistant message.
+
+    This function takes the raw text output from the model (a single assistant turn)
+    and extracts:
+    - reasoning (thinking block)
+    - content (summary/response)
+    - tool_calls (if any)
+
+    NOTE: This function is designed to parse only correctly formatted strings and
+    will raise ValueError for malformed output.
+
+    Args:
+        text: The raw completion text (including EOS token).
+        thinking_mode: Either "chat" or "thinking".
+
+    Returns:
+        Dict with keys: "role", "content", "reasoning", "tool_calls".
+        tool_calls are in OpenAI format.
+    """
+    summary_content, reasoning = "", ""
+    tool_calls: List[Dict[str, str]] = []
+    index, stop_token = 0, None
+    tool_calls_start_token = f"\n\n<{dsml_token}{tool_calls_block_name}"
+
+    is_thinking = thinking_mode == "thinking"
+    is_tool_calling = False
+
+    if is_thinking:
+        index, content_delta, stop_token = _read_until_stop(index, text, [thinking_end_token, tool_calls_start_token])
+        reasoning = content_delta
+        if stop_token != thinking_end_token:
+            raise ValueError("Invalid thinking format: missing </think>")
+
+    index, content_delta, stop_token = _read_until_stop(index, text, [eos_token, tool_calls_start_token])
+    summary_content = content_delta
+    if stop_token == tool_calls_start_token:
+        is_tool_calling = True
+    else:
+        if stop_token != eos_token:
+            raise ValueError("Invalid format: missing EOS token")
+
+    if is_tool_calling:
+        index, stop_token, tool_calls = parse_tool_calls(index, text)
+
+        index, tool_ends_text, stop_token = _read_until_stop(index, text, [eos_token])
+        if tool_ends_text:
+            raise ValueError("Unexpected content after tool calls")
+
+    if len(text) != index or stop_token not in [eos_token, None]:
+        raise ValueError("Unexpected content at end")
+
+    for sp_token in [bos_token, eos_token, thinking_start_token, thinking_end_token, dsml_token]:
+        if sp_token in summary_content or sp_token in reasoning:
+            raise ValueError(f"Unexpected special token '{sp_token}' in content")
+
+    return {
+        "role": "assistant",
+        "content": summary_content,
+        "reasoning": reasoning,
+        "tool_calls": tool_calls_to_openai_format(tool_calls)
+    }
+
+# fmt: on
diff --git a/vllm/tokenizers/fastokens.py b/vllm/tokenizers/fastokens.py
new file mode 100644
index 000000000000..5f080a549dbd
--- /dev/null
+++ b/vllm/tokenizers/fastokens.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""``fastokens`` backend patch.
+
+When ``VLLM_USE_FASTOKENS=1`` is set, ``fastokens.patch_transformers()`` swaps
+the inner Rust tokenizer of every HF fast tokenizer loaded afterwards with the
+fastokens shim and rebinds ``tokenizers.decoders.DecodeStream`` so the
+streaming detokenizer accepts the shim. The patch is process-global and
+idempotent, so it applies to any tokenizer mode that ends up loading an HF
+fast tokenizer (`hf`, `deepseek_v32`, `deepseek_v4`, `qwen_vl`, …).
+"""
+
+from importlib.metadata import PackageNotFoundError, version
+
+from packaging.version import Version
+
+_MIN_FASTOKENS_VERSION = "0.2.0"
+
+
+def apply_fastokens_patch() -> None:
+    try:
+        import fastokens
+    except ImportError as e:
+        raise ImportError(
+            f"The 'fastokens' package (>= {_MIN_FASTOKENS_VERSION}) is required "
+            "when VLLM_USE_FASTOKENS=1."
+        ) from e
+
+    try:
+        installed = version("fastokens")
+    except PackageNotFoundError:
+        installed = None
+    if installed is None or Version(installed) < Version(_MIN_FASTOKENS_VERSION):
+        raise ImportError(
+            f"fastokens >= {_MIN_FASTOKENS_VERSION} is required when "
+            f"VLLM_USE_FASTOKENS=1 (found {installed or 'unknown'})."
+        )
+
+    fastokens.patch_transformers()
diff --git a/vllm/tokenizers/hf.py b/vllm/tokenizers/hf.py
index 85c812398529..b4248e229a68 100644
--- a/vllm/tokenizers/hf.py
+++ b/vllm/tokenizers/hf.py
@@ -2,8 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import contextlib
 import copy
+import queue
 from pathlib import Path
-from typing import TypeAlias
+from typing import TypeAlias, TypeVar
 
 from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
 
@@ -12,6 +13,92 @@
 from .protocol import TokenizerLike
 
 HfTokenizer: TypeAlias = PreTrainedTokenizer | PreTrainedTokenizerFast
+_T = TypeVar("_T", bound=TokenizerLike)
+
+
+class ThreadSafeHFTokenizerMixin:
+    """Mixin class for thread-safe HF fast tokenizers."""
+
+    pass
+
+
+def maybe_make_thread_pool(tokenizer: _T, copies: int = 1):
+    """
+    If `tokenizer` is a `PreTrainedTokenizerFast`, modify the tokenizer
+    in-place to make the public interface thread-safe by routing calls
+    through a deep-copied tokenizer pool.
+
+    Note that:
+    - Only ``TokenizerLike``'s public interface is thread-safe.
+      This doesn't include ``_tokenizer`` property nor any mutation
+      methods like ``add_special_tokens`` or ``add_tokens``.
+    - Adjacent method calls could happen on different deep copies.
+    """
+    if not isinstance(tokenizer, PreTrainedTokenizerFast) or isinstance(
+        tokenizer, ThreadSafeHFTokenizerMixin
+    ):
+        return tokenizer
+
+    og_tokenizer = copy.copy(tokenizer)
+
+    tokenizer_pool: queue.Queue[PreTrainedTokenizerFast] = queue.Queue()
+    for _ in range(copies):
+        tokenizer_pool.put(copy.deepcopy(og_tokenizer))
+
+    @contextlib.contextmanager
+    def _borrow_from_pool():
+        try:
+            tok = tokenizer_pool.get_nowait()
+            yield tok
+        except queue.Empty:
+            tok = copy.deepcopy(og_tokenizer)
+            yield tok
+        finally:
+            tokenizer_pool.put(tok)
+
+    class TokenizerPool(tokenizer.__class__, ThreadSafeHFTokenizerMixin):  # type: ignore
+        def apply_chat_template(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.apply_chat_template(*args, **kwargs)
+
+        def batch_decode(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.batch_decode(*args, **kwargs)
+
+        def batch_encode(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.batch_encode(*args, **kwargs)
+
+        def convert_tokens_to_ids(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.convert_tokens_to_ids(*args, **kwargs)
+
+        def convert_ids_to_tokens(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.convert_ids_to_tokens(*args, **kwargs)
+
+        def convert_tokens_to_string(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.convert_tokens_to_string(*args, **kwargs)
+
+        def decode(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.decode(*args, **kwargs)
+
+        def encode(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok.encode(*args, **kwargs)
+
+        def __call__(self, *args, **kwargs):
+            with _borrow_from_pool() as tok:
+                return tok(*args, **kwargs)
+
+        def __reduce__(self):
+            return maybe_make_thread_pool, (og_tokenizer, copies)
+
+    TokenizerPool.__name__ = f"TokenizerPool{og_tokenizer.__class__.__name__}"
+
+    tokenizer.__class__ = TokenizerPool
 
 
 def get_cached_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
@@ -103,7 +190,10 @@ def from_pretrained(
                     "is a custom tokenizer not yet available in the "
                     "HuggingFace transformers library, consider "
                     "setting `trust_remote_code=True` in LLM or using "
-                    "the `--trust-remote-code` flag in the CLI."
+                    "the `--trust-remote-code` flag in the CLI. If the "
+                    "model was created with a newer version of "
+                    "transformers, consider upgrading: "
+                    "`uv pip install --upgrade transformers`"
                 )
                 raise RuntimeError(err_msg) from e
             else:
diff --git a/vllm/tokenizers/mistral.py b/vllm/tokenizers/mistral.py
index e20f1edd472e..8fce690433ef 100644
--- a/vllm/tokenizers/mistral.py
+++ b/vllm/tokenizers/mistral.py
@@ -1,16 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Sequence
+from functools import cached_property
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast, overload
 
+from mistral_common.guidance.grammar_factory import GrammarFactory
+from mistral_common.guidance.tokenizer import from_mistral_tokenizer
 from mistral_common.protocol.instruct.request import (
     ChatCompletionRequest as MistralChatCompletionRequest,
 )
 from mistral_common.protocol.instruct.request import (
     ReasoningEffort,
 )
-from mistral_common.protocol.instruct.tool_calls import Function, Tool
 from mistral_common.protocol.instruct.validator import ValidationMode
 from mistral_common.tokens.tokenizers.base import (
     SpecialTokenPolicy,
@@ -45,11 +47,26 @@
     )
 
 if TYPE_CHECKING:
+    import llguidance
     from transformers import BatchEncoding
 
 logger = init_logger(__name__)
 
 
+def _pop_unallowed_keys_and_warn(
+    dictionary: dict[str, Any], allowed_keys: set[str], err_dict_name: str
+):
+    keys = list(dictionary.keys())
+    for key in keys:
+        if key not in allowed_keys:
+            dictionary.pop(key)
+            logger.warning_once(
+                f"'{key=}' is not supported by mistral-common "
+                f"for {err_dict_name}. It has been popped from the "
+                "object."
+            )
+
+
 def maybe_serialize_tool_calls(request: "MistralChatCompletionRequest"):
     # SEE: https://github.com/vllm-project/vllm/pull/9951
     # Credits go to: @gcalmettes
@@ -119,12 +136,11 @@ def truncate_tool_call_ids(request: "MistralChatCompletionRequest"):
                 request.messages[i]["tool_call_id"] = tool_call_id
 
 
-def _prepare_apply_chat_template_tools_and_messages(
+def _validate_apply_chat_template_args(
     messages: list["ChatCompletionMessageParam"],
-    tools: list[dict[str, Any]] | None = None,
     continue_final_message: bool = False,
     add_generation_prompt: bool = False,
-) -> tuple[list["ChatCompletionMessageParam"], list[dict[str, Any]] | None]:
+) -> None:
     if add_generation_prompt and continue_final_message:
         raise ValueError(
             "Cannot set both `add_generation_prompt` and "
@@ -148,54 +164,6 @@ def _prepare_apply_chat_template_tools_and_messages(
             "the last message is not from the assistant."
         )
 
-    # mistral-common requires AssistantMessage content to be string [1].
-    #
-    # [1]: https://github.com/mistralai/mistral-common/blob/f4a06998b75ed78bbf5aaf569590b772ea26c9f6/src/mistral_common/protocol/instruct/messages.py#L80
-    for message in messages:
-        # Remove reasoning as unsupported by Mistral
-        _ = message.pop("reasoning", None)  # type: ignore
-
-    # The Mistral client, in comparison to the OpenAI client, requires the
-    # "parameters" dict and the "description" string to be present
-    # even if they are empty.
-    if tools:
-        for function in [
-            tool["function"] for tool in tools if tool["type"] == "function"
-        ]:
-            if function.get("parameters") is None:
-                function["parameters"] = {}
-            if function.get("description") is None:
-                function["description"] = ""
-
-        # We filter not supported arguments to avoid throwing an error.
-        # TODO(juliendenize): remove this once OpenAI API is better supported by
-        # `mistral-common`.
-        tools_fields = set(Tool.model_fields.keys())
-        function_fields = set(Function.model_fields.keys())
-        for tool in tools:
-            tool_keys = list(tool.keys())
-            for tool_key in tool_keys:
-                if tool_key not in tools_fields:
-                    tool.pop(tool_key)
-                    logger.warning_once(
-                        f"'{tool_key}' is not supported by mistral-common for tools. "
-                        "It has been popped from the tool definition."
-                    )
-                if tool["type"] == "function":
-                    function_keys = list(tool["function"].keys())
-                    for function_key in function_keys:
-                        if function_key not in function_fields:
-                            tool["function"].pop(function_key)
-                            logger.warning_once(
-                                f"'{function_key}' is not supported by mistral-common "
-                                "for function tools. It has been popped from the "
-                                "function definition."
-                            )
-                else:
-                    raise ValueError("mistral-common only supports function tools.")
-
-    return messages, tools
-
 
 def validate_request_params(request: "ChatCompletionRequest"):
     if request.chat_template is not None or request.chat_template_kwargs is not None:
@@ -434,8 +402,8 @@ def apply_chat_template(
         if self.version >= 15:
             version_kwargs["reasoning_effort"] = kwargs.get("reasoning_effort")
 
-        messages, tools = _prepare_apply_chat_template_tools_and_messages(
-            messages, tools, continue_final_message, add_generation_prompt
+        _validate_apply_chat_template_args(
+            messages, continue_final_message, add_generation_prompt
         )
 
         return self.transformers_tokenizer.apply_chat_template(
@@ -574,3 +542,24 @@ def convert_ids_to_tokens(
             ]
 
         return tokens
+
+    @property
+    def supports_grammar(self) -> bool:
+        return GrammarFactory.is_supported(self.mistral)
+
+    @cached_property
+    def grammar_factory(self) -> GrammarFactory:
+        if not self.supports_grammar:
+            raise AttributeError(
+                "This tokenizer does not support `grammar_factory`. "
+                "This is only supported for tekken tokenizers with "
+                "version >= 11."
+            )
+        # Cache grammar factory to avoid creating a llguidance tokenizer at every usage.
+        return GrammarFactory(self.mistral)
+
+    @cached_property
+    def llg_tokenizer(self) -> "llguidance.LLTokenizer":
+        if not self.is_tekken:
+            raise ValueError("`llg_tokenizer` is only supported for Tekkenizers.")
+        return from_mistral_tokenizer(self.mistral)
diff --git a/vllm/tokenizers/registry.py b/vllm/tokenizers/registry.py
index 7d48e3c6ff91..d72772ea00cb 100644
--- a/vllm/tokenizers/registry.py
+++ b/vllm/tokenizers/registry.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import contextlib
 from dataclasses import dataclass, field
 from functools import lru_cache
 from pathlib import Path
@@ -10,6 +11,7 @@
 
 import vllm.envs as envs
 from vllm.logger import init_logger
+from vllm.transformers_utils.config import get_config
 from vllm.transformers_utils.gguf_utils import (
     check_gguf_file,
     get_gguf_file_path_from_hf,
@@ -31,8 +33,16 @@
 logger = init_logger(__name__)
 
 
+# Model types whose hub tokenizer_class is incorrect and should be overridden with
+# TokenizersBackend (the generic fast tokenizer). Adding a model type here is always a
+# temporary workaround and better long term solutions are:
+# - Add model type to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS in transformers (better)
+# - Fix tokenizer_class on the hub for the affected models (best)
+_MODEL_TYPES_WITH_INCORRECT_TOKENIZER_CLASS: set[str] = {"step3_vl"}
+
 _VLLM_TOKENIZERS = {
     "deepseek_v32": ("deepseek_v32", "DeepseekV32Tokenizer"),
+    "deepseek_v4": ("deepseek_v4", "DeepseekV4Tokenizer"),
     "grok2": ("grok2", "Grok2Tokenizer"),
     "hf": ("hf", "CachedHfTokenizer"),
     "kimi_audio": ("kimi_audio", "KimiAudioTokenizer"),
@@ -193,6 +203,13 @@ def get_tokenizer(
     **kwargs,
 ) -> _T:
     """Gets a tokenizer for the given model name via HuggingFace or ModelScope."""
+    if envs.VLLM_USE_FASTOKENS:
+        # Process-global, idempotent patch that swaps the Rust BPE backend
+        # of any HF fast tokenizer loaded afterwards. No-op for non-HF modes.
+        from .fastokens import apply_fastokens_patch
+
+        apply_fastokens_patch()
+
     tokenizer_mode, tokenizer_name, args, kwargs = cached_resolve_tokenizer_args(
         tokenizer_name,
         *args,
@@ -202,7 +219,31 @@ def get_tokenizer(
         **kwargs,
     )
 
-    if tokenizer_cls == TokenizerLike:
+    # Ensure that, if the config were to come from vllm.transformers_utils.config, it is
+    # registered with AutoConfig before the tokenizer is loaded. This is necessary since
+    # tokenizer_cls_.from_pretrained will call AutoConfig.from_pretrained internally.
+    # This may fail for paths that don't have a model config (e.g. LoRA adapters),
+    # which is fine — those don't need custom config registration.
+    config = None
+    with contextlib.suppress(ValueError, OSError):
+        config = get_config(
+            tokenizer_name,
+            trust_remote_code=trust_remote_code,
+            revision=revision,
+        )
+
+    # Some models have an incorrect tokenizer_class on the hub.
+    # For these model types, bypass AutoTokenizer and use TokenizersBackend directly.
+    model_type = getattr(config, "model_type", None) if config else None
+    if model_type in _MODEL_TYPES_WITH_INCORRECT_TOKENIZER_CLASS:
+        from transformers.tokenization_utils_tokenizers import TokenizersBackend
+
+        logger.debug(
+            "Overriding tokenizer_class to TokenizersBackend for model_type=%r",
+            model_type,
+        )
+        tokenizer_cls_ = TokenizersBackend
+    elif tokenizer_cls == TokenizerLike:
         tokenizer_cls_ = TokenizerRegistry.load_tokenizer_cls(tokenizer_mode)
     else:
         tokenizer_cls_ = tokenizer_cls
diff --git a/vllm/tool_parsers/__init__.py b/vllm/tool_parsers/__init__.py
index f480a635c6ad..a9a446b40159 100644
--- a/vllm/tool_parsers/__init__.py
+++ b/vllm/tool_parsers/__init__.py
@@ -34,6 +34,18 @@
         "deepseekv32_tool_parser",
         "DeepSeekV32ToolParser",
     ),
+    "deepseek_v4": (
+        "deepseekv4_tool_parser",
+        "DeepSeekV4ToolParser",
+    ),
+    "cohere_command3": (
+        "cohere_command_tool_parser",
+        "CohereCommand3ToolParser",
+    ),
+    "cohere_command4": (
+        "cohere_command_tool_parser",
+        "CohereCommand4ToolParser",
+    ),
     "ernie45": (
         "ernie45_tool_parser",
         "Ernie45ToolParser",
@@ -62,10 +74,18 @@
         "hermes_tool_parser",
         "Hermes2ProToolParser",
     ),
+    "poolside_v1": (
+        "poolside_v1_tool_parser",
+        "PoolsideV1ToolParser",
+    ),
     "hunyuan_a13b": (
         "hunyuan_a13b_tool_parser",
         "HunyuanA13BToolParser",
     ),
+    "hy_v3": (
+        "hy_v3_tool_parser",
+        "HYV3ToolParser",
+    ),
     "internlm": (
         "internlm2_tool_parser",
         "Internlm2ToolParser",
@@ -74,6 +94,10 @@
         "jamba_tool_parser",
         "JambaToolParser",
     ),
+    "lfm2": (
+        "lfm2_tool_parser",
+        "Lfm2ToolParser",
+    ),
     "kimi_k2": (
         "kimi_k2_tool_parser",
         "KimiK2ToolParser",
@@ -94,6 +118,10 @@
         "longcat_tool_parser",
         "LongcatFlashToolParser",
     ),
+    "mimo": (
+        "qwen3xml_tool_parser",
+        "Qwen3XMLToolParser",
+    ),
     "minimax_m2": (
         "minimax_m2_tool_parser",
         "MinimaxM2ToolParser",
@@ -154,6 +182,14 @@
         "functiongemma_tool_parser",
         "FunctionGemmaToolParser",
     ),
+    "gemma4": (
+        "gemma4_tool_parser",
+        "Gemma4ToolParser",
+    ),
+    "apertus": (
+        "apertus_tool_parser",
+        "ApertusToolParser",
+    ),
 }
 
 
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index dcfe45d388f1..c3438082a72d 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -2,18 +2,19 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import importlib
+import json
 import os
 from collections.abc import Callable, Sequence
 from functools import cached_property
-from typing import TypeAlias
 
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
     ResponseTextConfig,
 )
-from openai.types.responses.tool import Tool as ResponsesTool
+from openai.types.responses.function_tool import FunctionTool
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
     ChatCompletionToolsParam,
 )
@@ -24,18 +25,19 @@
 from vllm.entrypoints.openai.responses.protocol import (
     ResponsesRequest,
 )
+from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING
 from vllm.logger import init_logger
 from vllm.sampling_params import (
     StructuredOutputsParams,
 )
 from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.utils import get_json_schema_from_tools
+from vllm.tool_parsers.utils import Tool, get_json_schema_from_tools
 from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import import_from_path
 
-logger = init_logger(__name__)
+__all__ = ["Tool"]
 
-Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool
+logger = init_logger(__name__)
 
 
 class ToolParser:
@@ -45,6 +47,17 @@ class ToolParser:
     derived classes.
     """
 
+    # When True (default), the serving layer uses the standard JSON-based
+    # parsing for tool_choice="required" and named function tool_choice,
+    # which works for models where guided decoding produces well-formed
+    # JSON output (e.g. Hermes).
+    # Subclasses set False when the standard parsing does not work for
+    # their model's output format (e.g. GLM models that use XML).  When
+    # False, the serving layer falls back to the tool_parser's
+    # extract_tool_calls / extract_tool_calls_streaming methods for
+    # required/named tool_choice, treating them the same as "auto".
+    supports_required_and_named: bool = True
+
     def __init__(
         self,
         tokenizer: TokenizerLike,
@@ -57,7 +70,14 @@ def __init__(
         self.streamed_args_for_tool: list[str] = []
 
         self.model_tokenizer = tokenizer
-        self.tools = tools
+        if tools:
+            self.tools: list[ChatCompletionToolsParam | FunctionTool] = [
+                tool
+                for tool in tools
+                if isinstance(tool, (ChatCompletionToolsParam, FunctionTool))
+            ]
+        else:
+            self.tools = []
 
     @cached_property
     def vocab(self) -> dict[str, int]:
@@ -65,12 +85,40 @@ def vocab(self) -> dict[str, int]:
         # whereas all tokenizers have .get_vocab()
         return self.model_tokenizer.get_vocab()
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
-        """
-        Static method that used to adjust the request parameters.
-        """
+    def adjust_request(
+        self,
+        request: ChatCompletionRequest | ResponsesRequest,
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        # If there are no tools, return the request as is.
         if not request.tools:
             return request
+
+        # Step 1 (highest priority for ChatCompletionRequest): apply
+        # vLLM-owned structural tag support for model-specific tool formats.
+        if (
+            isinstance(request, ChatCompletionRequest)
+            and VLLM_ENFORCE_STRICT_TOOL_CALLING
+        ):
+            need_tool_calling = (
+                request.tool_choice == "auto"
+                or request.tool_choice == "required"
+                or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
+            )
+            if need_tool_calling:
+                structure_tag = self.get_structural_tag(request)
+                if structure_tag is not None:
+                    if request.structured_outputs is None:
+                        request.structured_outputs = StructuredOutputsParams(
+                            structural_tag=json.dumps(structure_tag.model_dump()),
+                        )
+                    else:
+                        request.structured_outputs.structural_tag = json.dumps(
+                            structure_tag.model_dump()
+                        )
+                    return request
+
+        # Step 2: set structured output params when tool constraints are
+        # derived from the tool schema.
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )
@@ -84,17 +132,27 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
                 )
                 request.response_format = None
             if isinstance(request, ResponsesRequest):
-                request.text = ResponseTextConfig()
-                request.text.format = ResponseFormatTextJSONSchemaConfig(
-                    name="tool_calling_response",
-                    schema=json_schema_from_tool,
-                    type="json_schema",
-                    description="Response format for tool calling",
-                    strict=True,
+                # Single-shot construction so Pydantic v2 tracks `format`
+                # in __fields_set__ — assigning to `.format` after the bare
+                # `ResponseTextConfig()` constructor does not, which can
+                # drop the nested config from `model_dump`. Also drop the
+                # `description` kwarg: it is not a field on
+                # ResponseFormatTextJSONSchemaConfig and was being silently
+                # passed through as extra.
+                request.text = ResponseTextConfig(
+                    format=ResponseFormatTextJSONSchemaConfig(
+                        type="json_schema",
+                        name="tool_calling_response",
+                        schema=json_schema_from_tool,
+                        strict=True,
+                    )
                 )
 
         return request
 
+    def get_structural_tag(self, request: ChatCompletionRequest):
+        return None
+
     def extract_tool_calls(
         self, model_output: str, request: ChatCompletionRequest
     ) -> ExtractedToolCallInformation:
diff --git a/vllm/tool_parsers/apertus_tool_parser.py b/vllm/tool_parsers/apertus_tool_parser.py
new file mode 100644
index 000000000000..cd2ba272f27b
--- /dev/null
+++ b/vllm/tool_parsers/apertus_tool_parser.py
@@ -0,0 +1,553 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tool call parser for Apertus models.
+
+Extracts tool calls from the format:
+<|tools_prefix|>[{"function_name": {"arg1": "value1", ...}}, ...]<|tools_suffix|>
+
+Used when --enable-auto-tool-choice --tool-call-parser apertus are set.
+"""
+
+import json
+from collections.abc import Sequence
+
+import regex as re
+from partial_json_parser.core.options import Allow
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import (
+    Tool,
+    ToolParser,
+)
+from vllm.tool_parsers.utils import (
+    find_common_prefix,
+    partial_json_loads,
+)
+
+logger = init_logger(__name__)
+
+# Apertus special tokens for tool calls
+TOOL_CALLS_PREFIX = "<|tools_prefix|>"
+TOOL_CALLS_SUFFIX = "<|tools_suffix|>"
+
+
+class ApertusToolParser(ToolParser):
+    """
+    Tool call parser for Apertus models.
+
+    Handles the extraction of tool calls from text in both non-streaming
+    (complete string) and streaming (chunked token) environments.
+
+    The expected Apertus function call format is a JSON array of single-key dictionaries
+    sandwiched between special tokens:
+    `<|tools_prefix|>[{"function_name": {"arg1": "value1"}}, ...]<|tools_suffix|>`
+
+    Examples:
+        >>> tokenizer = ...  # Mock tokenizer
+        >>> parser = ApertusToolParser(tokenizer)
+        >>> output = 'I will check. <|tools_prefix|>[{"get_weather": '\
+            '{"city": "Paris"}}]<|tools_suffix|>'
+        >>> request = ChatCompletionRequest(...)
+        >>> info = parser.extract_tool_calls(output, request)
+        >>> info.content
+        "I will check."
+        >>> info.tool_calls[0].function.name
+        "get_weather"
+        >>> info.tool_calls[0].function.arguments
+        '{"city": "Paris"}'
+    """
+
+    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+        """
+        Initializes the ApertusToolParser.
+
+        Args:
+            tokenizer: The model's tokenizer.
+                Must be provided to interact with special tokens.
+            tools: Optional list of tools available for the current request.
+
+        Raises:
+            ValueError: If the `model_tokenizer`
+                is not successfully passed to the base class.
+        """
+        super().__init__(tokenizer, tools)
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction."
+            )
+        # Regex to extract tool calls block (suffix is optional for incomplete outputs)
+        self.tool_call_regex = re.compile(
+            rf"{re.escape(TOOL_CALLS_PREFIX)}"
+            rf"(.*?)"
+            rf"(?:{re.escape(TOOL_CALLS_SUFFIX)}|$)",
+            re.DOTALL,
+        )
+
+        self._reset_streaming_state()
+
+    def _reset_streaming_state(self) -> None:
+        """
+        Resets all streaming state variables for a new completion request.
+
+        This clears the delta text buffer and resets the pointers used to
+        track the currently streaming tool index and arguments. Called implicitly
+        during initialization and should be called between separate streams.
+        """
+        self.buffered_delta_text = ""
+        self.current_tool_id = -1
+        self.current_tool_name_sent = False
+        self.streamed_args_for_tool: list[str] = []
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        """
+        Adjusts the generation request to ensure special tool tokens are not skipped.
+
+        Forces `skip_special_tokens=False` if tools are actively being evaluated,
+        ensuring the tools special tokens are surfaced to the engine for parsing.
+
+        Args:
+            request: The incoming OpenAI-compatible chat completion request.
+
+        Returns:
+            The potentially modified chat completion request.
+        """
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            request.skip_special_tokens = False
+        return request
+
+    def _buffer_delta_text(self, delta_text: str) -> str:
+        """
+        Buffers incoming delta chunks to prevent
+        fragmentation of multi-token special tags.
+
+        If a chunk ends with a partial match of
+        `<|tools_prefix|>` or `<|tools_suffix|>`,
+        it holds that part back until the next chunk clarifies if it's the actual tag
+        or just normal text.
+
+        Args:
+            delta_text: The newly generated text chunk
+
+        Returns:
+            The safe, verified text chunk free of partial tag collisions.
+
+        Examples:
+            >>> parser = ApertusToolParser(...)
+            >>> parser._buffer_delta_text("Let me check <|tool" \
+            "Let me check "  # "<|tool" is buffered internally
+            >>> parser._buffer_delta_text("s_prefix|>" \
+            "<|tools_prefix|>"  # Buffer released on completion
+        """
+        self.buffered_delta_text += delta_text
+        text = self.buffered_delta_text
+
+        for tag in (TOOL_CALLS_PREFIX, TOOL_CALLS_SUFFIX):
+            if text.endswith(tag):
+                self.buffered_delta_text = ""
+                return text
+
+            # Evaluate longest possible partial match first
+            for i in range(len(tag) - 1, 0, -1):
+                if text.endswith(tag[:i]):
+                    self.buffered_delta_text = text[-i:]
+                    return text[:-i]
+
+        self.buffered_delta_text = ""
+        return text
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        """
+        Extracts tool calls from a completely generated model response (Non-Streaming).
+
+        Args:
+            model_output: The full completion string generated by the model.
+            request: The current chat completion
+                request context containing tool schemas.
+
+        Returns:
+            An `ExtractedToolCallInformation` object containing normal text content
+            and a list of fully formatted `ToolCall` objects.
+
+        Examples:
+            >>> output = 'Let me see. <|tools_prefix|>[{"get_weather":' \
+                '{"loc": "Paris"}}]<|tools_suffix|>'
+            >>> info = parser.extract_tool_calls(output, request)
+            >>> info.tools_called
+            True
+            >>> info.content
+            'Let me see.'
+            >>> info.tool_calls[0].function.name
+            'get_weather'
+        """
+        match = self.tool_call_regex.search(model_output)
+        if not match:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+        try:
+            # group(1) might contain trailing text if the suffix is missing
+            matched_text = match.group(1)
+            stripped_text = matched_text.lstrip()
+
+            try:
+                # Use raw_decode to robustly isolate
+                # the valid JSON array from any trailing garbage
+                parsed_json, idx = json.JSONDecoder().raw_decode(stripped_text)
+                trailing_in_group = stripped_text[idx:]
+            except json.JSONDecodeError:
+                # Fallback sequentially to partial parser for token-truncated requests
+                parsed_json, _ = partial_json_loads(matched_text, Allow.ALL)
+                trailing_in_group = ""
+
+            if not isinstance(parsed_json, list):
+                parsed_json = [parsed_json] if parsed_json else []
+
+            tool_calls: list[ToolCall] = []
+            for obj in parsed_json:
+                if isinstance(obj, dict) and obj:
+                    name, args = next(iter(obj.items()))
+                    tool_calls.append(
+                        ToolCall(
+                            type="function",
+                            id=make_tool_call_id(),
+                            function=FunctionCall(
+                                name=name,
+                                arguments=json.dumps(args, ensure_ascii=False),
+                            ),
+                        )
+                    )
+
+            # Content combines any generated text
+            # prior to and safely after the tool block
+            content_str = model_output[: match.start()].strip()
+
+            # Surface any hallucinated text inside
+            # the regex group (due to missing suffix)
+            if trailing_in_group.strip():
+                trailing = trailing_in_group.replace(TOOL_CALLS_SUFFIX, "").strip()
+                if trailing:
+                    content_str = (content_str + "\n" + trailing).strip()
+
+            # Surface text natively generated after the explicit suffix
+            after_suffix = (
+                model_output[match.end() :].replace(TOOL_CALLS_SUFFIX, "").strip()
+            )
+            if after_suffix:
+                content_str = (content_str + "\n" + after_suffix).strip()
+
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=content_str if content_str else None,
+            )
+
+        except Exception:
+            logger.exception("Error extracting tool calls from Apertus response")
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        """
+        Handles streaming chunks
+
+        Args:
+            previous_text: The complete model text generated prior to this chunk.
+            current_text: The complete model text including this chunk.
+            delta_text: The incremental text addition.
+            previous_token_ids: Tokens generated prior to this chunk.
+            current_token_ids: Total tokens generated.
+            delta_token_ids: Incremental token additions.
+            request: The chat completion request.
+
+        Returns:
+            A `DeltaMessage` with updated content or tool argument diffs, or `None` if
+            the chunk shouldn't emit visible changes yet (e.g. it was purely buffered).
+
+        Examples:
+            >>> prev = '<|tools_prefix|>[{"get_weather": {"loc'
+            >>> cur = '<|tools_prefix|>[{"get_weather": {"location": "Paris"}}'
+            >>> delta = 'ation": "Paris"}}'
+            >>> msg = parser.extract_tool_calls_streaming(
+            ...     prev, cur, delta, ..., request
+            ... )
+            >>> msg.tool_calls[0].function.arguments
+            'ation": "Paris"}'
+        """
+        delta_text = self._buffer_delta_text(delta_text)
+        if not delta_text:
+            return None
+
+        # Fast path: normal text generation before any tools are invoked
+        if TOOL_CALLS_PREFIX not in current_text:
+            return DeltaMessage(content=delta_text)
+
+        try:
+            return self._extract_streaming(current_text, delta_text)
+        except Exception:
+            logger.exception("Error in Apertus streaming tool call extraction")
+            return None
+
+    def _extract_streaming(
+        self, current_text: str, delta_text: str
+    ) -> DeltaMessage | None:
+        """
+        Core streaming logic.
+        Separates visible chat text from JSON blocks and computes diffs.
+
+        Args:
+            current_text: The full generated output string so far.
+            delta_text: The latest chunk of text added.
+
+        Returns:
+            A `DeltaMessage` containing the `content` delta and/or `tool_calls` delta.
+        """
+        prefix_idx = current_text.rfind(TOOL_CALLS_PREFIX)
+        suffix_idx = current_text.rfind(TOOL_CALLS_SUFFIX)
+
+        is_inside_tools = prefix_idx > suffix_idx
+
+        json_completed = False
+        json_end_idx: int | None = None
+
+        # Check if the JSON array successfully closed implicitly
+        if is_inside_tools:
+            json_start = prefix_idx + len(TOOL_CALLS_PREFIX)
+            s = current_text[json_start:].lstrip()
+            try:
+                # If raw_decode succeeds,
+                # the JSON array is fully formed and implicitly closed
+                _, idx = json.JSONDecoder().raw_decode(s)
+                json_end_idx = len(current_text) - len(s) + idx
+                json_completed, is_inside_tools = True, False
+            except Exception:
+                pass
+
+        just_finished = (TOOL_CALLS_SUFFIX in delta_text) or json_completed
+
+        # 1. Fast path: Output normal text immediately
+        # if we are completely outside tool block constraints
+        if not is_inside_tools and not just_finished:
+            text = delta_text.replace(TOOL_CALLS_PREFIX, "").replace(
+                TOOL_CALLS_SUFFIX, ""
+            )
+            return DeltaMessage(content=text) if text else None
+
+        # 2. Extract leading and trailing normal text directly adjacent to tool blocks
+        content_str = ""
+        if TOOL_CALLS_PREFIX in delta_text:
+            content_str += delta_text.split(TOOL_CALLS_PREFIX)[0].replace(
+                TOOL_CALLS_SUFFIX, ""
+            )
+
+        if just_finished:
+            if json_completed and json_end_idx is not None:
+                # The tool block finished in this chunk via implicit JSON completion
+                # Ensure we strictly isolate
+                # and extract only trailing text that is part of `delta_text`
+                delta_start_idx = len(current_text) - len(delta_text)
+                content_start = max(json_end_idx, delta_start_idx)
+                if content_start < len(current_text):
+                    content_str += current_text[content_start:].replace(
+                        TOOL_CALLS_SUFFIX, ""
+                    )
+            else:
+                content_str += delta_text.split(TOOL_CALLS_SUFFIX)[-1]
+
+        # 3. Extract the isolated JSON array string for the active block
+        json_start = prefix_idx + len(TOOL_CALLS_PREFIX)
+        json_end = suffix_idx if suffix_idx > prefix_idx else json_end_idx
+        json_str = current_text[json_start:json_end]
+
+        tool_calls = self._parse_and_diff_json(json_str, is_final=not is_inside_tools)
+
+        if tool_calls or content_str:
+            return DeltaMessage(
+                content=content_str if content_str else None,
+                tool_calls=tool_calls if tool_calls else None,
+            )
+
+        return None
+
+    def _parse_and_diff_json(
+        self, json_str: str, is_final: bool
+    ) -> list[DeltaToolCall]:
+        """
+        Parses an isolated, potentially incomplete streaming JSON array and returns
+        newly accumulated tool call diffs.
+
+        Args:
+            json_str: The extracted JSON array string so far
+                (e.g. `[{"weather": {"city": "Par"}]`).
+            is_final: True if the tool block has received its closing`<|tools_suffix|>`
+
+        Returns:
+            A list of `DeltaToolCall`
+            items representing string diffs in function arguments
+            to stream back to the client.
+        """
+        try:
+            parsed, _ = partial_json_loads(json_str, Allow.ALL)
+            if not isinstance(parsed, list):
+                parsed = [parsed] if parsed else []
+        except Exception:
+            return []
+
+        if not parsed:
+            return []
+
+        tool_calls: list[DeltaToolCall] = []
+        latest_index = len(parsed) - 1
+
+        # Catch up and finalize any tools we fully skipped over in one large text delta
+        while self.current_tool_id < latest_index:
+            if self.current_tool_id >= 0:
+                if not self.current_tool_name_sent:
+                    self._emit_tool_name(parsed, self.current_tool_id, tool_calls)
+
+                delta = self._get_tool_diff(parsed, self.current_tool_id, is_final=True)
+                if delta:
+                    tool_calls.append(delta)
+
+            self.current_tool_id += 1
+            self.current_tool_name_sent = False
+            while len(self.streamed_args_for_tool) <= self.current_tool_id:
+                self.streamed_args_for_tool.append("")
+
+        # Stream the currently active tool
+        if self.current_tool_id >= 0:
+            if not self.current_tool_name_sent:
+                self._emit_tool_name(parsed, self.current_tool_id, tool_calls)
+
+            delta = self._get_tool_diff(parsed, self.current_tool_id, is_final)
+            if delta:
+                tool_calls.append(delta)
+
+        return tool_calls
+
+    def _emit_tool_name(
+        self, parsed: list, index: int, tool_calls: list[DeltaToolCall]
+    ) -> None:
+        """
+        Extracts and emits the function name mapped to a new tool call ID.
+
+        Args:
+            parsed: The partially parsed JSON list containing tool dictionaries.
+            index: The active index within the JSON list.
+            tool_calls: The running list of delta chunks to mutate.
+
+        Examples:
+            Appends `DeltaToolCall(index=0,
+                function=DeltaFunctionCall(name="get_weather", ...))`
+            to the `tool_calls` list and marks the name as sent.
+        """
+        obj = parsed[index]
+        if isinstance(obj, dict) and obj:
+            name = next(iter(obj))
+            self.current_tool_name_sent = True
+            tool_calls.append(
+                DeltaToolCall(
+                    index=index,
+                    type="function",
+                    id=make_tool_call_id(),
+                    function=DeltaFunctionCall(name=name, arguments="").model_dump(
+                        exclude_none=True
+                    ),
+                )
+            )
+
+    def _get_tool_diff(
+        self, parsed: list, index: int, is_final: bool
+    ) -> DeltaToolCall | None:
+        """
+        Calculates the exact string difference to safely append new tool parameters.
+
+        This ensures characters like `{`, `}`, and `"` don't jump around unevenly
+        in the UI frontend while streaming incomplete JSON arguments.
+
+        Args:
+            parsed: The latest list of parsed JSON objects.
+            index: The active tool's array index.
+            is_final: Whether to emit
+                trailing structural brackets (True if block is done).
+
+        Returns:
+            A `DeltaToolCall` mapping to the arguments diff,
+                or None if no text was appended.
+
+        Examples:
+            >>> # Previous streamed state: '{"city": "Pari'
+            >>> # Current full parse state: '{"city": "Paris"}'
+            >>> # Returns diff (closing bracket suppressed until final):
+            >>> parser._get_tool_diff(parsed, index=0, is_final=False)
+            DeltaToolCall(index=0, function=DeltaFunctionCall(arguments='s'))
+        """
+        obj = parsed[index]
+        if not isinstance(obj, dict) or not obj:
+            return None
+
+        name, args = next(iter(obj.items()))
+        if args is None:
+            return None
+
+        args_json = json.dumps(args, ensure_ascii=False)
+
+        # Suppress trailing structural characters
+        # during stream (looks cleaner in frontends)
+        if not is_final:
+            while args_json and args_json[-1] in ("}", '"', "]", " ", ","):
+                args_json = args_json[:-1]
+
+        prev_sent = self.streamed_args_for_tool[index]
+        if args_json == prev_sent:
+            return None
+
+        prefix = find_common_prefix(prev_sent, args_json)
+        if len(prefix) < len(prev_sent):
+            # Backtrack state if partial parser structurally updates a past assumption
+            self.streamed_args_for_tool[index] = prefix
+            return None
+
+        diff = args_json[len(prev_sent) :]
+        if diff:
+            self.streamed_args_for_tool[index] = args_json
+            return DeltaToolCall(
+                index=index,
+                function=DeltaFunctionCall(arguments=diff).model_dump(
+                    exclude_none=True
+                ),
+            )
+
+        return None
diff --git a/vllm/tool_parsers/cohere_command_tool_parser.py b/vllm/tool_parsers/cohere_command_tool_parser.py
new file mode 100644
index 000000000000..0b252ce3177a
--- /dev/null
+++ b/vllm/tool_parsers/cohere_command_tool_parser.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+
+try:
+    from cohere_melody import PyFilter, PyFilterOptions
+except ImportError as e:
+    raise ImportError(
+        "The Cohere tool parser requires the `cohere_melody` "
+        "package, which is not installed. Install it with:\n"
+        "    pip install cohere_melody"
+    ) from e
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+)
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers import ToolParser
+from vllm.tool_parsers.utils import Tool
+
+
+class BaseCohereCommandToolParser(ToolParser):
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        streaming_opts: PyFilterOptions,
+        unary_opts: PyFilterOptions,
+    ):
+        super().__init__(tokenizer)
+        self.melody_streaming = PyFilter(streaming_opts)
+        self.melody_unary = PyFilter(unary_opts)
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        request = super().adjust_request(request)
+        request.skip_special_tokens = False
+        return request
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        r = self.melody_streaming.write_decoded(delta_text)
+        if r.content is not None:
+            return DeltaMessage(content=r.content)
+        if r.reasoning is not None:
+            return DeltaMessage(reasoning=r.reasoning)
+        if r.tool_calls:
+            return DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        id=tc.id,
+                        index=tc.index,
+                        type="function",
+                        function=DeltaFunctionCall(
+                            name=tc.name, arguments=tc.arguments
+                        ),
+                    )
+                    for tc in r.tool_calls
+                ]
+            )
+        return None
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        result = self.melody_unary.process_full_text(model_output)
+        tool_calls = [
+            ToolCall(
+                id=tc.id,
+                type="function",
+                function=FunctionCall(name=tc.name, arguments=tc.arguments),
+            )
+            for tc in result.tool_calls
+        ]
+        return ExtractedToolCallInformation(
+            tools_called=len(tool_calls) > 0,
+            tool_calls=tool_calls,
+            content=result.content,
+        )
+
+
+class CohereCommand3ToolParser(BaseCohereCommandToolParser):
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        tools: list[Tool] | None = None,
+    ):
+        super().__init__(
+            tokenizer,
+            streaming_opts=PyFilterOptions().cmd3(),
+            unary_opts=PyFilterOptions().cmd3(),
+        )
+
+
+class CohereCommand4ToolParser(BaseCohereCommandToolParser):
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        tools: list[Tool] | None = None,
+    ):
+        super().__init__(
+            tokenizer,
+            streaming_opts=PyFilterOptions().cmd4(),
+            unary_opts=PyFilterOptions().cmd4(),
+        )
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index e86929944799..511b6e7c359e 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -19,12 +19,19 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import (
+    coerce_to_schema_type,
+    extract_types_from_schema,
+    find_tool_properties,
+    partial_tag_overlap,
+)
 
 logger = init_logger(__name__)
 
@@ -44,27 +51,30 @@ class DeepSeekV32ToolParser(ToolParser):
     </｜DSML｜function_calls>
     """
 
+    tool_call_start_token: str = "<｜DSML｜function_calls>"
+    tool_call_end_token: str = "</｜DSML｜function_calls>"
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
         self.prev_tool_call_arr: list[dict] = []
 
-        # Sentinel token
-        self.tool_call_start_token: str = "<｜DSML｜function_calls>"
-
         # Streaming state
-        self.is_tool_call_started: bool = False
         self.current_tool_index: int = 0
+        self._sent_content_idx: int = 0
 
         # Regex patterns for complete parsing
         self.tool_call_complete_regex = re.compile(
-            r"<｜DSML｜function_calls>(.*?)</｜DSML｜function_calls>", re.DOTALL
+            re.escape(self.tool_call_start_token)
+            + r"(.*?)"
+            + re.escape(self.tool_call_end_token),
+            re.DOTALL,
         )
         self.invoke_complete_regex = re.compile(
             r'<｜DSML｜invoke\s+name="([^"]+)"\s*>(.*?)</｜DSML｜invoke>', re.DOTALL
         )
         self.parameter_complete_regex = re.compile(
-            r'<｜DSML｜parameter\s+name="([^"]+)"\s+string="(?:true|false)"\s*>(.*?)</｜DSML｜parameter>',
+            r'<｜DSML｜parameter\s+name="([^"]+)"\s+string="(true|false)"\s*>(.*?)</｜DSML｜parameter>',
             re.DOTALL,
         )
 
@@ -78,11 +88,13 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
             "vLLM Successfully import tool parser %s !", self.__class__.__name__
         )
 
-    def adjust_request(self, request):
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # Ensure tool call tokens
-            # (<｜DSML｜function_calls>, </｜DSML｜function_calls>)
+            # (e.g. <｜DSML｜function_calls>, </｜DSML｜function_calls>)
             # are not skippedduring decoding.
             # Even though they are not marked as special tokens,
             # setting skip_special_tokens=False ensures proper handling in
@@ -94,72 +106,53 @@ def _generate_tool_call_id(self) -> str:
         """Generate a unique tool call ID."""
         return f"call_{uuid.uuid4().hex[:24]}"
 
-    def _parse_invoke_params(self, invoke_str: str) -> dict:
-        param_dict = dict()
-        for param_name, param_val in self.parameter_complete_regex.findall(invoke_str):
-            param_dict[param_name] = param_val
+    def _parse_invoke_params(self, invoke_str: str) -> dict[str, tuple[str, str]]:
+        param_dict: dict[str, tuple[str, str]] = {}
+        for param_name, string_attr, param_val in self.parameter_complete_regex.findall(
+            invoke_str
+        ):
+            param_dict[param_name] = (param_val, string_attr)
         return param_dict
 
-    def _convert_param_value(self, value: str, param_type: str) -> Any:
-        """Convert parameter value to the correct type."""
-        if value.lower() == "null":
-            return None
-
-        param_type = param_type.lower()
-        if param_type in ["string", "str", "text"]:
-            return value
-        elif param_type in ["integer", "int"]:
-            try:
-                return int(value)
-            except (ValueError, TypeError):
-                return value
-        elif param_type in ["number", "float"]:
-            try:
-                val = float(value)
-                return val if val != int(val) else int(val)
-            except (ValueError, TypeError):
-                return value
-        elif param_type in ["boolean", "bool"]:
-            return value.lower() in ["true", "1"]
-        elif param_type in ["object", "array"]:
-            try:
-                return json.loads(value)
-            except json.JSONDecodeError:
-                return value
-        else:
-            # Try JSON parse first, fallback to string
-            try:
-                return json.loads(value)
-            except json.JSONDecodeError:
-                return value
+    @staticmethod
+    def _repair_param_dict(
+        param_dict: dict[str, Any],
+        param_config: dict[str, dict],
+    ) -> dict[str, Any]:
+        """Unwrap single 'arguments' / 'input' wrappers when the wrapper
+        is not part of the requested tool schema and the wrapped object
+        matches the schema fields."""
+        allowed = set(param_config.keys())
+        for wrapper in ("arguments", "input"):
+            if set(param_dict.keys()) != {wrapper} or wrapper in allowed:
+                continue
+            inner = param_dict[wrapper]
+            if isinstance(inner, str):
+                try:
+                    inner = json.loads(inner)
+                except json.JSONDecodeError:
+                    return param_dict
+            if isinstance(inner, dict) and set(inner.keys()).issubset(allowed):
+                return inner
+        return param_dict
 
     def _convert_params_with_schema(
         self,
         function_name: str,
-        param_dict: dict[str, str],
-        request: ChatCompletionRequest | None,
+        param_dict: dict[str, tuple[str, str]],
     ) -> dict[str, Any]:
         """Convert raw string param values using the tool schema types."""
-        param_config: dict = {}
-        if request and request.tools:
-            for tool in request.tools:
-                if (
-                    hasattr(tool, "function")
-                    and tool.function.name == function_name
-                    and hasattr(tool.function, "parameters")
-                ):
-                    schema = tool.function.parameters
-                    if isinstance(schema, dict) and "properties" in schema:
-                        param_config = schema["properties"]
-                    break
+        param_config = find_tool_properties(self.tools, function_name)
 
         converted: dict[str, Any] = {}
-        for name, value in param_dict.items():
-            param_type = "string"
-            if name in param_config and isinstance(param_config[name], dict):
-                param_type = param_config[name].get("type", "string")
-            converted[name] = self._convert_param_value(value, param_type)
-        return converted
+        for name, (value, string_attr) in param_dict.items():
+            if string_attr == "true":
+                converted[name] = value
+                continue
+
+            param_types = extract_types_from_schema(param_config.get(name, {}))
+            converted[name] = coerce_to_schema_type(value, param_types)
+        return self._repair_param_dict(converted, param_config)
 
     def extract_tool_calls(
         self,
@@ -183,12 +176,13 @@ def extract_tool_calls(
                     tool_call_match
                 ):
                     param_dict = self._parse_invoke_params(invoke_content)
+                    params = self._convert_params_with_schema(invoke_name, param_dict)
                     tool_calls.append(
                         ToolCall(
                             type="function",
                             function=FunctionCall(
                                 name=invoke_name,
-                                arguments=json.dumps(param_dict, ensure_ascii=False),
+                                arguments=json.dumps(params, ensure_ascii=False),
                             ),
                         )
                     )
@@ -215,7 +209,7 @@ def extract_tool_calls(
     def _reset_streaming_state(self):
         """Reset all streaming state."""
         self.current_tool_index = 0
-        self.is_tool_call_started = False
+        self._sent_content_idx = 0
         self.prev_tool_call_arr.clear()
         self.streamed_args_for_tool.clear()
 
@@ -236,9 +230,7 @@ def _extract_delta_tool_calls(
             invoke_name, invoke_body = complete_invokes[self.current_tool_index]
             param_dict = self._parse_invoke_params(invoke_body)
 
-            converted = self._convert_params_with_schema(
-                invoke_name, param_dict, request
-            )
+            converted = self._convert_params_with_schema(invoke_name, param_dict)
             args_json = json.dumps(converted, ensure_ascii=False)
             idx = self.current_tool_index
             self.current_tool_index += 1
@@ -262,6 +254,24 @@ def _extract_delta_tool_calls(
 
         return delta_tool_calls
 
+    def _extract_content(self, current_text: str) -> str | None:
+        """Return unsent non-tool-call text, or None.
+
+        Holds back any suffix that could be a partial start marker
+        so that split markers are never leaked as content.
+        """
+        if self.tool_call_start_token not in current_text:
+            overlap = partial_tag_overlap(current_text, self.tool_call_start_token)
+            sendable_idx = len(current_text) - overlap
+        else:
+            sendable_idx = current_text.index(self.tool_call_start_token)
+
+        if sendable_idx > self._sent_content_idx:
+            content = current_text[self._sent_content_idx : sendable_idx]
+            self._sent_content_idx = sendable_idx
+            return content
+        return None
+
     def extract_tool_calls_streaming(
         self,
         previous_text: str,
@@ -283,29 +293,11 @@ def extract_tool_calls_streaming(
         if not previous_text:
             self._reset_streaming_state()
 
-        # Detect whether we've entered the tool-call region.
-        # Use current_text (not delta_text) since the start token may
-        # be split across chunks.
-        content_before = None
-        if self.is_tool_call_started:
-            pass
-        elif self.tool_call_start_token in current_text:
-            # Tool-call region found, capture any plain text before it.
-            self.is_tool_call_started = True
-            start_idx = current_text.index(self.tool_call_start_token)
-            content_before = current_text[len(previous_text) : start_idx] or None
-        else:
-            # Still in plain-text region, forward as content.
-            return DeltaMessage(content=delta_text) if delta_text else None
-
-        # Inside tool-call region: emit any newly completed invokes.
+        content = self._extract_content(current_text)
         delta_tool_calls = self._extract_delta_tool_calls(current_text, request)
 
-        if delta_tool_calls or content_before:
-            return DeltaMessage(
-                content=content_before,
-                tool_calls=delta_tool_calls,
-            )
+        if delta_tool_calls or content:
+            return DeltaMessage(content=content, tool_calls=delta_tool_calls)
 
         # Empty delta with token ids means EOS or closing tag; return
         # non-None so the serving framework can finalize finish_reason.
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
new file mode 100644
index 000000000000..e32451cd8bbd
--- /dev/null
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
+from vllm.tool_parsers.structural_tag_registry import (
+    get_enable_structured_outputs_in_reasoning,
+    get_model_structural_tag,
+)
+
+
+class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
+    """
+    DeepSeek V4 DSML tool parser.
+
+    V4 keeps the V3.2 DSML invoke/parameter grammar, but wraps tool calls in
+    ``<｜DSML｜tool_calls>`` instead of ``<｜DSML｜function_calls>``.
+    """
+
+    tool_call_start_token: str = "<｜DSML｜tool_calls>"
+    tool_call_end_token: str = "</｜DSML｜tool_calls>"
+
+    def get_structural_tag(self, request: ChatCompletionRequest):
+        return get_model_structural_tag(
+            model="deepseek_v4",
+            tools=request.tools,
+            tool_choice=request.tool_choice,
+            reasoning=get_enable_structured_outputs_in_reasoning(),
+        )
diff --git a/vllm/tool_parsers/functiongemma_tool_parser.py b/vllm/tool_parsers/functiongemma_tool_parser.py
index dfd91d974316..776792ea1d6d 100644
--- a/vllm/tool_parsers/functiongemma_tool_parser.py
+++ b/vllm/tool_parsers/functiongemma_tool_parser.py
@@ -18,6 +18,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
@@ -33,6 +34,21 @@ class FunctionGemmaToolParser(ToolParser):
     <start_function_call>call:func_name{param:<escape>value<escape>}<end_function_call>
     """
 
+    # FunctionGemma tokens
+    tool_call_start_token: str = "<start_function_call>"
+    tool_call_end_token: str = "<end_function_call>"
+
+    # Regex patterns
+    tool_call_regex: re.Pattern = re.compile(
+        r"<start_function_call>call:(\w+)\{(.*?)\}<end_function_call>"
+        r"|<start_function_call>call:(\w+)\{(.*)",
+        re.DOTALL,
+    )
+    arg_regex: re.Pattern = re.compile(
+        r"(\w+):<escape>(.*?)<escape>",
+        re.DOTALL,
+    )
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
@@ -41,33 +57,6 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.prev_tool_call_arr: list[dict] = []
         self.current_tool_id: int = -1
         self.streamed_args_for_tool: list[str] = []
-
-        # FunctionGemma tokens
-        self.tool_call_start_token: str = "<start_function_call>"
-        self.tool_call_end_token: str = "<end_function_call>"
-
-        # Regex patterns
-        self.tool_call_regex = re.compile(
-            r"<start_function_call>call:(\w+)\{(.*?)\}<end_function_call>"
-            r"|<start_function_call>call:(\w+)\{(.*)",
-            re.DOTALL,
-        )
-        self.arg_regex = re.compile(
-            r"(\w+):<escape>(.*?)<escape>",
-            re.DOTALL,
-        )
-
-        if self.model_tokenizer:
-            self.tool_call_start_token_ids = self.model_tokenizer.encode(
-                self.tool_call_start_token, add_special_tokens=False
-            )
-            self.tool_call_end_token_ids = self.model_tokenizer.encode(
-                self.tool_call_end_token, add_special_tokens=False
-            )
-        else:
-            self.tool_call_start_token_ids = []
-            self.tool_call_end_token_ids = []
-
         self.buffered_delta_text = ""
 
     def _parse_arguments(self, args_str: str) -> dict:
@@ -86,7 +75,9 @@ def _parse_arguments(self, args_str: str) -> dict:
 
         return arguments
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             request.skip_special_tokens = False
diff --git a/vllm/tool_parsers/gemma4_tool_parser.py b/vllm/tool_parsers/gemma4_tool_parser.py
new file mode 100644
index 000000000000..9925284273f9
--- /dev/null
+++ b/vllm/tool_parsers/gemma4_tool_parser.py
@@ -0,0 +1,790 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Tool call parser for Google Gemma4 models.
+
+Gemma4 uses a custom serialization format (not JSON) for tool calls::
+
+    <|tool_call>call:func_name{key:<|"|>value<|"|>,num:42}<tool_call|>
+
+Strings are delimited by ``<|"|>`` (token 52), keys are unquoted, and
+multiple tool calls are concatenated without separators.
+
+Used when ``--enable-auto-tool-choice --tool-call-parser gemma4`` are set.
+
+For offline inference tool call parsing (direct ``tokenizer.decode()`` output),
+see ``vllm.tool_parsers.gemma4_utils.parse_tool_calls``.
+"""
+
+import json
+from collections.abc import Sequence
+
+import regex as re
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+)
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
+from vllm.tool_parsers.utils import find_common_prefix
+
+logger = init_logger(__name__)
+
+# Gemma4 special tokens for tool calls
+TOOL_CALL_START = "<|tool_call>"
+TOOL_CALL_END = "<tool_call|>"
+STRING_DELIM = '<|"|>'
+
+
+# ---------------------------------------------------------------------------
+# Gemma4 argument parser (used by both streaming and non-streaming paths)
+# ---------------------------------------------------------------------------
+
+
+def _parse_gemma4_value(value_str: str) -> object:
+    """Parse a single Gemma4 value (after key:) into a Python object."""
+    value_str = value_str.strip()
+    if not value_str:
+        return value_str
+
+    # Boolean
+    if value_str == "true":
+        return True
+    if value_str == "false":
+        return False
+
+    # Null
+    if value_str.lower() in ("null", "none", "nil"):
+        return None
+
+    # Number (int or float)
+    try:
+        if "." in value_str:
+            return float(value_str)
+        return int(value_str)
+    except ValueError:
+        pass
+
+    # Bare string (no <|"|> delimiters — shouldn't happen but be safe)
+    return value_str
+
+
+def _parse_gemma4_args(args_str: str, *, partial: bool = False) -> dict:
+    """Parse Gemma4's custom key:value format into a Python dict.
+
+    Format examples::
+
+        location:<|"|>Tokyo<|"|>
+        location:<|"|>San Francisco<|"|>,unit:<|"|>celsius<|"|>
+        count:42,flag:true
+        nested:{inner_key:<|"|>val<|"|>}
+        items:[<|"|>a<|"|>,<|"|>b<|"|>]
+
+    Args:
+        args_str: The raw Gemma4 argument string.
+        partial: When True (streaming), bare values at end of string are
+            omitted because they may be incomplete and type-unstable
+            (e.g. partial boolean parsed as bare string).
+
+    Returns a dict ready for ``json.dumps()``.
+    """
+    if not args_str or not args_str.strip():
+        return {}
+
+    result: dict = {}
+    i = 0
+    n = len(args_str)
+
+    while i < n:
+        # Skip whitespace and commas
+        while i < n and args_str[i] in (" ", ",", "\n", "\t"):
+            i += 1
+        if i >= n:
+            break
+
+        # Parse key (unquoted, ends at ':')
+        key_start = i
+        while i < n and args_str[i] != ":":
+            i += 1
+        if i >= n:
+            break
+        key = args_str[key_start:i].strip()
+        i += 1  # skip ':'
+
+        # Parse value
+        if i >= n:
+            if not partial:
+                result[key] = ""
+            break
+
+        # Skip whitespace after ':'
+        while i < n and args_str[i] in (" ", "\n", "\t"):
+            i += 1
+        if i >= n:
+            if not partial:
+                result[key] = ""
+            break
+
+        # String value: <|"|>...<|"|>
+        if args_str[i:].startswith(STRING_DELIM):
+            i += len(STRING_DELIM)
+            val_start = i
+            end_pos = args_str.find(STRING_DELIM, i)
+            if end_pos == -1:
+                # Unterminated string — take rest
+                result[key] = args_str[val_start:]
+                break
+            result[key] = args_str[val_start:end_pos]
+            i = end_pos + len(STRING_DELIM)
+
+        # Nested object: {...}
+        elif args_str[i] == "{":
+            depth = 1
+            obj_start = i + 1
+            i += 1
+            while i < n and depth > 0:
+                if args_str[i:].startswith(STRING_DELIM):
+                    # Skip over string contents to avoid counting { inside strings
+                    i += len(STRING_DELIM)
+                    next_delim = args_str.find(STRING_DELIM, i)
+                    i = n if next_delim == -1 else next_delim + len(STRING_DELIM)
+                    continue
+                if args_str[i] == "{":
+                    depth += 1
+                elif args_str[i] == "}":
+                    depth -= 1
+                i += 1
+            if depth > 0:
+                # Incomplete nested object — use i (not i-1) to avoid
+                # dropping the last char, and recurse as partial.
+                result[key] = _parse_gemma4_args(args_str[obj_start:i], partial=True)
+            else:
+                result[key] = _parse_gemma4_args(args_str[obj_start : i - 1])
+
+        # Array: [...]
+        elif args_str[i] == "[":
+            depth = 1
+            arr_start = i + 1
+            i += 1
+            while i < n and depth > 0:
+                if args_str[i:].startswith(STRING_DELIM):
+                    i += len(STRING_DELIM)
+                    next_delim = args_str.find(STRING_DELIM, i)
+                    i = n if next_delim == -1 else next_delim + len(STRING_DELIM)
+                    continue
+                if args_str[i] == "[":
+                    depth += 1
+                elif args_str[i] == "]":
+                    depth -= 1
+                i += 1
+            if depth > 0:
+                result[key] = _parse_gemma4_array(args_str[arr_start:i], partial=True)
+            else:
+                result[key] = _parse_gemma4_array(args_str[arr_start : i - 1])
+
+        # Bare value (number, boolean, etc.)
+        else:
+            val_start = i
+            while i < n and args_str[i] not in (",", "}", "]"):
+                i += 1
+            if partial and i >= n:
+                # Value may be incomplete (e.g. partial boolean) —
+                # withhold to avoid type instability during streaming.
+                break
+            if i == val_start:
+                logger.warning(
+                    "Gemma4 args parser made no progress at position %d; "
+                    "aborting on malformed input.",
+                    i,
+                )
+                break
+            if partial:
+                raw_val = args_str[val_start:i].strip()
+                if raw_val.endswith("."):
+                    # Trailing dot means decimal digits may still arrive
+                    # (e.g. "108." may become "108.2"). Parsing now would
+                    # yield float("108.") == 108.0, whose json repr "108.0"
+                    # corrupts the streaming diff when the true digit lands.
+                    break
+            result[key] = _parse_gemma4_value(args_str[val_start:i])
+
+    return result
+
+
+def _parse_gemma4_array(arr_str: str, *, partial: bool = False) -> list:
+    """Parse a Gemma4 array content string into a Python list."""
+    items: list = []
+    i = 0
+    n = len(arr_str)
+
+    while i < n:
+        while i < n and arr_str[i] in (" ", ",", "\n", "\t"):
+            i += 1
+        if i >= n:
+            break
+
+        # String element
+        if arr_str[i:].startswith(STRING_DELIM):
+            i += len(STRING_DELIM)
+            end_pos = arr_str.find(STRING_DELIM, i)
+            if end_pos == -1:
+                items.append(arr_str[i:])
+                break
+            items.append(arr_str[i:end_pos])
+            i = end_pos + len(STRING_DELIM)
+
+        # Nested object
+        elif arr_str[i] == "{":
+            depth = 1
+            obj_start = i + 1
+            i += 1
+            while i < n and depth > 0:
+                if arr_str[i:].startswith(STRING_DELIM):
+                    i += len(STRING_DELIM)
+                    nd = arr_str.find(STRING_DELIM, i)
+                    i = nd + len(STRING_DELIM) if nd != -1 else n
+                    continue
+                if arr_str[i] == "{":
+                    depth += 1
+                elif arr_str[i] == "}":
+                    depth -= 1
+                i += 1
+            if depth > 0:
+                items.append(_parse_gemma4_args(arr_str[obj_start:i], partial=True))
+            else:
+                items.append(_parse_gemma4_args(arr_str[obj_start : i - 1]))
+
+        # Nested array
+        elif arr_str[i] == "[":
+            depth = 1
+            sub_start = i + 1
+            i += 1
+            while i < n and depth > 0:
+                if arr_str[i:].startswith(STRING_DELIM):
+                    i += len(STRING_DELIM)
+                    nd = arr_str.find(STRING_DELIM, i)
+                    i = nd + len(STRING_DELIM) if nd != -1 else n
+                    continue
+                if arr_str[i] == "[":
+                    depth += 1
+                elif arr_str[i] == "]":
+                    depth -= 1
+                i += 1
+            if depth > 0:
+                items.append(_parse_gemma4_array(arr_str[sub_start:i], partial=True))
+            else:
+                items.append(_parse_gemma4_array(arr_str[sub_start : i - 1]))
+
+        # Bare value
+        else:
+            val_start = i
+            while i < n and arr_str[i] not in (",", "]"):
+                i += 1
+            if partial and i >= n:
+                break
+            if i == val_start:
+                logger.warning(
+                    "Gemma4 array parser made no progress at position %d; "
+                    "aborting on malformed input.",
+                    i,
+                )
+                break
+            if partial:
+                raw_val = arr_str[val_start:i].strip()
+                if raw_val.endswith("."):
+                    break
+            items.append(_parse_gemma4_value(arr_str[val_start:i]))
+
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Parser
+# ---------------------------------------------------------------------------
+
+
+class Gemma4ToolParser(ToolParser):
+    """
+    Tool call parser for Google Gemma4 models.
+
+    Handles the Gemma4 function call format::
+
+        <|tool_call>call:func_name{key:<|"|>value<|"|>}<tool_call|>
+
+    Used when ``--enable-auto-tool-choice --tool-call-parser gemma4``
+    are set.
+
+    Streaming strategy: **accumulate-then-parse-then-diff**
+
+    Instead of trying to convert Gemma4's custom format to JSON
+    token-by-token (which fails because Gemma4 uses bare keys, custom
+    delimiters, and structural braces that differ from JSON), this parser:
+
+    1. Accumulates the raw Gemma4 argument string during streaming
+    2. Parses it with ``_parse_gemma4_args()`` into a Python dict
+    3. Converts to JSON with ``json.dumps()``
+    4. Diffs against the previously-streamed JSON string
+    5. Emits only the new JSON fragment as the delta
+
+    This follows the same pattern used by FunctionGemma, Hermes, and Llama
+    tool parsers.
+    """
+
+    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+        super().__init__(tokenizer, tools)
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction."
+            )
+
+        # Token strings
+        self.tool_call_start_token = TOOL_CALL_START
+        self.tool_call_end_token = TOOL_CALL_END
+
+        # Token IDs
+        self.tool_call_start_token_id = self.vocab.get(TOOL_CALL_START)
+        self.tool_call_end_token_id = self.vocab.get(TOOL_CALL_END)
+
+        if self.tool_call_start_token_id is None:
+            raise RuntimeError(
+                "Gemma4 ToolParser could not locate the tool call start "
+                f"token '{TOOL_CALL_START}' in the tokenizer!"
+            )
+
+        # Regex for non-streaming: extract complete tool calls.
+        # Supports function names with letters, digits, underscores,
+        # hyphens, and dots (e.g. "get-weather", "module.func").
+        self.tool_call_regex = re.compile(
+            r"<\|tool_call>call:([\w\-\.]+)\{(.*?)\}<tool_call\|>",
+            re.DOTALL,
+        )
+
+        # Streaming state — reset per-request via _reset_streaming_state()
+        self._reset_streaming_state()
+
+        # Delta buffer for handling multi-token special sequences
+        self.buffered_delta_text = ""
+
+    def _reset_streaming_state(self) -> None:
+        """Reset all streaming state for a new request."""
+        self.current_tool_id = -1
+        self.current_tool_name_sent = False
+        self.prev_tool_call_arr: list[dict] = []
+        self.streamed_args_for_tool: list[str] = []
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # Don't skip special tokens — <|tool_call> etc. are needed for
+            # the parser to detect tool calls. Apply to BOTH
+            # ChatCompletionRequest and ResponsesRequest (the previous
+            # isinstance(ChatCompletionRequest) guard caused tool-call
+            # delimiters to be stripped on /v1/responses, leaking raw
+            # `call:fn{...}` text via output_text.delta).
+            request.skip_special_tokens = False
+        return request
+
+    # ------------------------------------------------------------------
+    # Delta buffering for multi-token special sequences
+    # ------------------------------------------------------------------
+
+    def _buffer_delta_text(self, delta_text: str) -> str:
+        """Buffer incoming delta text to handle multi-token special sequences.
+
+        Accumulates partial tokens that could be the start of
+        ``<|tool_call>`` or ``<tool_call|>`` and only flushes them
+        when the complete sequence is recognized or the sequence breaks.
+
+        This prevents partial special tokens (e.g., ``<|tool``) from being
+        emitted prematurely as content text.
+        """
+        combined = self.buffered_delta_text + delta_text
+
+        # Check if combined ends with a complete special token
+        if combined.endswith(TOOL_CALL_START) or combined.endswith(TOOL_CALL_END):
+            self.buffered_delta_text = ""
+            return combined
+
+        # Check if combined ends with a partial prefix of a special token
+        for tag in [TOOL_CALL_START, TOOL_CALL_END]:
+            for i in range(1, len(tag)):
+                if combined.endswith(tag[:i]):
+                    self.buffered_delta_text = combined[-i:]
+                    return combined[:-i]
+
+        # No partial match — flush everything
+        self.buffered_delta_text = ""
+        return combined
+
+    # ------------------------------------------------------------------
+    # Non-streaming extraction
+    # ------------------------------------------------------------------
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        if self.tool_call_start_token not in model_output:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+        try:
+            matches = self.tool_call_regex.findall(model_output)
+            if not matches:
+                return ExtractedToolCallInformation(
+                    tools_called=False, tool_calls=[], content=model_output
+                )
+
+            tool_calls: list[ToolCall] = []
+            for func_name, args_str in matches:
+                arguments = _parse_gemma4_args(args_str)
+                tool_calls.append(
+                    ToolCall(
+                        type="function",
+                        function=FunctionCall(
+                            name=func_name,
+                            arguments=json.dumps(arguments, ensure_ascii=False),
+                        ),
+                    )
+                )
+
+            # Content = text before first tool call (if any)
+            content_end = model_output.find(self.tool_call_start_token)
+            content = model_output[:content_end].strip() if content_end > 0 else None
+
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=content if content else None,
+            )
+
+        except Exception:
+            logger.exception("Error extracting tool calls from Gemma4 response")
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+    # ------------------------------------------------------------------
+    # Streaming extraction — accumulate-then-parse-then-diff
+    # ------------------------------------------------------------------
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        # Buffer delta text to handle multi-token special sequences
+        delta_text = self._buffer_delta_text(delta_text)
+        # Keep current_text from the upstream stream state. The buffered delta
+        # is only for emission, and must not be stitched back into the
+        # accumulated model text or normal content like "<div>" can be
+        # duplicated into "<<div>" when a tool call just ended.
+
+        # If no tool call token seen yet, emit as content
+        if self.tool_call_start_token not in current_text:
+            if delta_text:
+                return DeltaMessage(content=delta_text)
+            return None
+
+        try:
+            return self._extract_streaming(
+                previous_text=previous_text,
+                current_text=current_text,
+                delta_text=delta_text,
+            )
+        except Exception:
+            logger.exception("Error in Gemma4 streaming tool call extraction")
+            return None
+
+    def _extract_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+    ) -> DeltaMessage | None:
+        """Tag-counting streaming parser.
+
+        Uses the proven approach from FunctionGemma/Hermes: count start/end
+        tags in previous vs current text to determine phase, then
+        accumulate-parse-diff for arguments.
+
+        Format: ``<|tool_call>call:name{args}<tool_call|>``
+        """
+        start_count = current_text.count(self.tool_call_start_token)
+        end_count = current_text.count(self.tool_call_end_token)
+        prev_start_count = previous_text.count(self.tool_call_start_token)
+        prev_end_count = previous_text.count(self.tool_call_end_token)
+
+        # Case 1: Not inside any tool call — emit as content
+        if (
+            start_count == end_count
+            and prev_end_count == end_count
+            and self.tool_call_end_token not in delta_text
+        ):
+            if delta_text:
+                return DeltaMessage(content=delta_text)
+            return None
+
+        # Case 2: Starting a new tool call
+        if start_count > prev_start_count and start_count > end_count:
+            self.current_tool_id += 1
+            self.current_tool_name_sent = False
+            self.streamed_args_for_tool.append("")
+            self.prev_tool_call_arr.append({})
+            logger.debug("Starting new tool call %d", self.current_tool_id)
+            # Don't return yet — fall through to try parsing if there's
+            # content after <|tool_call> in this same delta
+            # (but usually it's just the token itself, so return None)
+            if len(delta_text) <= len(self.tool_call_start_token):
+                return None
+
+        # Case 3: Tool call just ended
+        if end_count > prev_end_count:
+            return self._handle_tool_call_end(current_text)
+
+        # Case 4: In the middle of a tool call — parse partial content
+        if start_count > end_count:
+            return self._handle_tool_call_middle(current_text)
+
+        # Default: generate text outside tool calls
+        if delta_text:
+            text = delta_text.replace(self.tool_call_start_token, "")
+            text = text.replace(self.tool_call_end_token, "")
+            if text:
+                return DeltaMessage(content=text)
+        return None
+
+    def _extract_partial_call(self, current_text: str) -> tuple[str | None, str]:
+        """Extract function name and raw argument string from partial text.
+
+        Returns (func_name, raw_args_str) or (None, "") if not parseable yet.
+        """
+        # Get the text after the last <|tool_call> token
+        last_start = current_text.rfind(self.tool_call_start_token)
+        if last_start == -1:
+            return None, ""
+
+        partial_call = current_text[last_start + len(self.tool_call_start_token) :]
+
+        # Strip end token if present
+        if self.tool_call_end_token in partial_call:
+            partial_call = partial_call.split(self.tool_call_end_token)[0]
+
+        # Expect "call:name{args...}" or "call:name{args...}"
+        if not partial_call.startswith("call:"):
+            return None, ""
+
+        func_part = partial_call[5:]  # skip "call:"
+
+        if "{" not in func_part:
+            # Still accumulating function name, not ready yet
+            return None, ""
+
+        func_name, _, args_part = func_part.partition("{")
+        func_name = func_name.strip()
+
+        # Strip trailing '}' if present (Gemma4 structural brace)
+        if args_part.endswith("}"):
+            args_part = args_part[:-1]
+
+        return func_name, args_part
+
+    def _handle_tool_call_middle(self, current_text: str) -> DeltaMessage | None:
+        """Handle streaming when we're inside an active tool call.
+
+        Accumulates the raw Gemma4 arguments, parses them into JSON, and
+        diffs against the previously-streamed JSON to emit only the new
+        fragment.
+        """
+        func_name, args_part = self._extract_partial_call(current_text)
+
+        if func_name is None:
+            return None
+
+        # Step 1: Send function name (once)
+        if not self.current_tool_name_sent and func_name:
+            self.current_tool_name_sent = True
+            self.prev_tool_call_arr[self.current_tool_id] = {
+                "name": func_name,
+                "arguments": {},
+            }
+            return DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_id,
+                        type="function",
+                        id=make_tool_call_id(),
+                        function=DeltaFunctionCall(
+                            name=func_name,
+                            arguments="",
+                        ).model_dump(exclude_none=True),
+                    )
+                ]
+            )
+
+        # Step 2: Parse and diff arguments
+        if self.current_tool_name_sent and args_part:
+            return self._emit_argument_diff(args_part)
+
+        return None
+
+    def _handle_tool_call_end(self, current_text: str) -> DeltaMessage | None:
+        """Handle streaming when a tool call has just completed.
+
+        Performs a final parse of the complete tool call and flushes
+        any remaining un-streamed argument fragments.
+        """
+        if self.current_tool_id < 0 or self.current_tool_id >= len(
+            self.prev_tool_call_arr
+        ):
+            logger.debug(
+                "Tool call end detected but no active tool call (current_tool_id=%d)",
+                self.current_tool_id,
+            )
+            return None
+
+        # Parse the complete tool call using regex for accuracy
+        all_matches = self.tool_call_regex.findall(current_text)
+        if self.current_tool_id < len(all_matches):
+            _, args_str = all_matches[self.current_tool_id]
+            final_args = _parse_gemma4_args(args_str)
+            final_args_json = json.dumps(final_args, ensure_ascii=False)
+
+            prev_streamed = self.streamed_args_for_tool[self.current_tool_id]
+            if len(final_args_json) > len(prev_streamed):
+                diff = final_args_json[len(prev_streamed) :]
+                self.streamed_args_for_tool[self.current_tool_id] = final_args_json
+                self.prev_tool_call_arr[self.current_tool_id]["arguments"] = final_args
+
+                return DeltaMessage(
+                    tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_id,
+                            function=DeltaFunctionCall(arguments=diff).model_dump(
+                                exclude_none=True
+                            ),
+                        )
+                    ]
+                )
+
+        return None
+
+    def _emit_argument_diff(self, raw_args_str: str) -> DeltaMessage | None:
+        """Parse raw Gemma4 arguments, convert to JSON, diff, and emit.
+
+        This is the core of the accumulate-then-parse-then-diff strategy:
+        1. Parse ``raw_args_str`` with ``_parse_gemma4_args()``
+        2. Convert to JSON string with ``json.dumps()``
+        3. Withhold trailing closing characters (``"}``) that may move
+           as more tokens arrive
+        4. Diff against previously streamed JSON and emit only new chars
+
+        **Why withholding is necessary:**
+
+        Gemma4's custom format produces *structurally incomplete* JSON
+        during streaming. For example, when ``<|"|>Paris`` arrives
+        without a closing delimiter, ``_parse_gemma4_args`` treats it
+        as a complete value and produces ``{"location": "Paris"}``. But
+        when ``, France<|"|>`` arrives next, the JSON becomes
+        ``{"location": "Paris, France"}``. If we had sent the closing
+        ``"}`` from the first parse, the concatenated client output
+        would be ``{"location": "Paris"}France"}``, which is garbage.
+
+        The solution: **never send trailing closing chars during
+        streaming**. They get flushed by ``_handle_tool_call_end()``
+        when the ``<tool_call|>`` end marker arrives.
+
+        Args:
+            raw_args_str: The raw Gemma4 argument text accumulated so far
+                (without the surrounding ``{`` ``}``).
+
+        Returns:
+            DeltaMessage with the argument diff, or None if no new content.
+        """
+        try:
+            current_args = _parse_gemma4_args(raw_args_str, partial=True)
+        except Exception:
+            logger.debug(
+                "Could not parse partial Gemma4 args yet: %s",
+                raw_args_str[:100],
+            )
+            return None
+
+        if not current_args:
+            return None
+
+        current_args_json = json.dumps(current_args, ensure_ascii=False)
+
+        # Withhold trailing closing characters that may shift as more
+        # tokens arrive. Strip trailing '}', '"', ']' and partial
+        # STRING_DELIM fragments ('<', '|', '\\', '>') to get the
+        # "safe prefix".
+        safe_json = current_args_json
+        while safe_json and safe_json[-1] in ("}", '"', "]", "<", "|", "\\", ">"):
+            safe_json = safe_json[:-1]
+
+        prev_streamed = self.streamed_args_for_tool[self.current_tool_id]
+
+        if not safe_json or safe_json == prev_streamed:
+            return None
+
+        # Use find_common_prefix to handle cases where the value changed
+        # structurally (e.g., a string grew).
+        if prev_streamed:
+            prefix = find_common_prefix(prev_streamed, safe_json)
+            sent_len = len(prev_streamed)
+            prefix_len = len(prefix)
+
+            if prefix_len < sent_len:
+                # Structure changed — we sent too much. Truncate our
+                # tracking to the common prefix and wait for the final
+                # flush in _handle_tool_call_end.
+                self.streamed_args_for_tool[self.current_tool_id] = prefix
+                return None
+
+            # Stream the new stable portion
+            diff = safe_json[sent_len:]
+        else:
+            # First emission
+            diff = safe_json
+
+        if diff:
+            self.streamed_args_for_tool[self.current_tool_id] = safe_json
+            self.prev_tool_call_arr[self.current_tool_id]["arguments"] = current_args
+
+            return DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_id,
+                        function=DeltaFunctionCall(arguments=diff).model_dump(
+                            exclude_none=True
+                        ),
+                    )
+                ]
+            )
+
+        return None
diff --git a/vllm/tool_parsers/gemma4_utils.py b/vllm/tool_parsers/gemma4_utils.py
new file mode 100644
index 000000000000..439ad1125ce2
--- /dev/null
+++ b/vllm/tool_parsers/gemma4_utils.py
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Copyright 2025 Google Inc. HuggingFace Inc. team. All rights reserved.
+
+"""Gemma4 tool call parsing utilities for offline inference.
+
+Standalone functions that parse decoded model text to extract tool calls
+from Gemma4 models. These are pure-Python utilities with zero heavy
+dependencies — they work on raw decoded strings from any inference
+backend (vLLM, HuggingFace, TGI, etc.).
+
+For the OpenAI-compatible API server tool parser (streaming +
+non-streaming), see ``vllm.tool_parsers.gemma4_tool_parser``.
+For thinking/reasoning output parsing, see
+``vllm.reasoning.gemma4_utils``.
+
+Usage with vLLM offline inference::
+
+    from vllm import LLM, SamplingParams
+    from vllm.tool_parsers.gemma4_utils import (
+        parse_tool_calls,
+        has_tool_response_tag,
+    )
+
+    llm = LLM(model="google/gemma-4-it")
+    outputs = llm.generate(prompt, SamplingParams(...))
+    text = tokenizer.decode(outputs[0].outputs[0].token_ids, skip_special_tokens=False)
+
+    # Extract tool calls
+    tool_calls = parse_tool_calls(text)
+    for tc in tool_calls:
+        print(f"{tc['name']}({tc['arguments']})")
+
+Ported from ``transformers.models.gemma4.utils_gemma4`` so that vLLM users
+do not need a transformers dependency for output parsing.
+"""
+
+import json
+
+import regex as re
+
+# Tool call delimiter tokens as they appear in decoded text.
+# Standard format: <|tool_call>call:name{args}<tool_call|>
+_TOOL_CALL_START_TAG = "<|tool_call>"
+_TOOL_CALL_END_TAG = "<tool_call|>"
+_TOOL_RESPONSE_START_TAG = "<|tool_response>"
+
+# Gemma4 escape token as it appears in decoded text.
+_ESCAPE_TOKEN = '<|"|>'
+
+
+def _parse_tool_arguments(args_str: str) -> dict[str, str]:
+    """Parse tool call arguments from the Gemma4 compact format.
+
+    Handles the ``key:<|"|>value<|"|>`` format used by Gemma4, with fallback
+    to heuristic key-value extraction. Also tolerates the slightly different
+    ``key: "value"`` format (space + plain quotes) that some chat templates
+    produce.
+
+    Args:
+        args_str: Raw argument string from inside ``call:name{...}``.
+
+    Returns:
+        Dictionary of argument name → value.
+    """
+    if not args_str or not args_str.strip():
+        return {}
+
+    # Replace Gemma4 escape tokens with standard quotes.
+    cleaned = args_str.replace(_ESCAPE_TOKEN, '"')
+
+    # Try JSON parsing first (handles nested values, arrays, etc.).
+    try:
+        parsed = json.loads("{" + cleaned + "}")
+        # Ensure all values are strings for consistency.
+        return {k: str(v) if not isinstance(v, str) else v for k, v in parsed.items()}
+    except (json.JSONDecodeError, ValueError):
+        pass
+
+    # Fallback: extract key:"value" pairs (allow optional space after colon).
+    arguments = {}
+    for key, value in re.findall(r'(\w+):\s*"([^"]*)"', cleaned):
+        arguments[key] = value
+
+    if not arguments:
+        # Last resort: extract key:value pairs (unquoted).
+        for key, value in re.findall(r"(\w+):\s*([^,}]+)", args_str):
+            arguments[key] = value.strip().strip('"').replace(_ESCAPE_TOKEN, "")
+
+    return arguments
+
+
+def parse_tool_calls(text: str, *, strict: bool = False) -> list[dict]:
+    """Parse tool calls from decoded Gemma4 model output.
+
+    Uses a tiered parsing strategy to handle known output variations in
+    Gemma4 models, which may emit
+    non-standard tool call formats.
+
+    Parsing tiers:
+        1. **Standard**: ``<|tool_call>call:name{args}<tool_call|>``
+           (special token IDs 48/49 in decoded text)
+        2. **Fallback** (when ``strict=False``): bare ``call:name{args}``
+           patterns, including ``<call>name{args}`` (fragmented tokens from
+           multimodal inputs)
+
+    Args:
+        text: Decoded model output text (from ``tokenizer.decode(...,
+            skip_special_tokens=False)``).
+        strict: If ``True``, only match the standard ``<|tool_call>`` format.
+            If ``False`` (default), also try fallback patterns for
+            known Gemma4 output variations.
+
+    Returns:
+        A list of dicts, each with keys:
+            - ``"name"``: The tool function name (e.g. ``"get_weather"``).
+            - ``"arguments"``: A dict of argument name → value.
+
+    Example::
+
+        >>> from vllm.tool_parsers.gemma4_utils import parse_tool_calls
+        >>> output = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        >>> tool_calls = parse_tool_calls(output)
+        >>> for tc in tool_calls:
+        ...     print(f"Call: {tc['name']}({tc['arguments']})")
+    """
+    results = []
+
+    # Tier 1: Standard format with special tokens.
+    # <|tool_call>call:name{args}<tool_call|>
+    # Note: Some Gemma4 models emit <turn|> instead of <tool_call|>.
+    standard_pattern = r"<\|tool_call\>call:(\w+)\{(.*?)\}(?:<tool_call\|>|<turn\|>)"
+    for match in re.finditer(standard_pattern, text, re.DOTALL):
+        name, args_str = match.group(1), match.group(2)
+        results.append(
+            {
+                "name": name,
+                "arguments": _parse_tool_arguments(args_str),
+            }
+        )
+
+    if results or strict:
+        return results
+
+    # Tier 2: Fallback for known Gemma4 output variations.
+    # Matches: <call>name{args}, call:name{args}, or bare call:name{args}<eos>
+    fallback_pattern = r"(?:<call>|(?:^|\s)call:)(\w+)\{(.*?)\}"
+    for match in re.finditer(fallback_pattern, text, re.DOTALL):
+        name, args_str = match.group(1), match.group(2)
+        results.append(
+            {
+                "name": name,
+                "arguments": _parse_tool_arguments(args_str),
+            }
+        )
+
+    return results
+
+
+def has_tool_response_tag(text: str) -> bool:
+    """Check if model output properly ends with a tool response tag.
+
+    Some Gemma4 models sometimes emit ``<eos>`` instead of
+    ``<|tool_response>`` after a tool call. This helper detects
+    whether the model used the proper termination, so callers can
+    decide whether to inject ``<|tool_response>`` into the next prompt.
+
+    Args:
+        text: Decoded model output text.
+
+    Returns:
+        ``True`` if the output ends with ``<|tool_response>``
+        (proper behavior), ``False`` otherwise.
+
+    Example::
+
+        >>> from vllm.tool_parsers.gemma4_utils import has_tool_response_tag
+        >>> if not has_tool_response_tag(model_output):
+        ...     # Model used <eos> instead — inject <|tool_response> manually
+        ...     next_prompt = "<|tool_response>" + tool_result
+    """
+    stripped = text.rstrip()
+    return stripped.endswith(_TOOL_RESPONSE_START_TAG)
diff --git a/vllm/tool_parsers/gigachat3_tool_parser.py b/vllm/tool_parsers/gigachat3_tool_parser.py
index f470f6a5b282..d48ff43eedf8 100644
--- a/vllm/tool_parsers/gigachat3_tool_parser.py
+++ b/vllm/tool_parsers/gigachat3_tool_parser.py
@@ -18,6 +18,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
@@ -55,7 +56,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.end_content: bool = False
         self.streamed_args_for_tool: list[str] = []
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             request.skip_special_tokens = False
diff --git a/vllm/tool_parsers/glm47_moe_tool_parser.py b/vllm/tool_parsers/glm47_moe_tool_parser.py
index 765d6d37de11..47b6ad2f5afe 100644
--- a/vllm/tool_parsers/glm47_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm47_moe_tool_parser.py
@@ -23,6 +23,8 @@
 
 
 class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
+    supports_required_and_named = False
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
         # GLM-4.7 format: <tool_call>func_name[<arg_key>...]*</tool_call>
diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py
index fc718921d5ce..1779896e5b66 100644
--- a/vllm/tool_parsers/glm4_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm4_moe_tool_parser.py
@@ -20,6 +20,7 @@
 
 from vllm.entrypoints.chat_utils import make_tool_call_id
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
 )
 from vllm.entrypoints.openai.engine.protocol import (
@@ -30,12 +31,18 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import (
+    extract_types_from_schema,
+    find_tool_properties,
+    partial_tag_overlap,
+)
 
 logger = init_logger(__name__)
 
@@ -43,11 +50,13 @@
 class Glm4MoeModelToolParser(ToolParser):
     """Tool parser for GLM-4 models with incremental string streaming.
 
-    This parser emits tool-call deltas incrementally as arguments arrive.
-    For string-type parameters, content is streamed character-by-character
-    rather than waiting for the complete </arg_value> tag.
+    On every streaming call the parser re-parses ``current_text`` to find
+    ``<tool_call>`` regions, builds the JSON arguments string for each tool
+    call, and diffs against what was previously sent to emit only new content.
     """
 
+    supports_required_and_named = False
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
         # Stateful streaming fields
@@ -81,17 +90,17 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
 
         self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
         self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
-        self._buffer: str = ""
 
-        # Streaming state for incremental tool-call streaming
-        self._in_tool_call: bool = False
-        self._current_tool_name: str | None = None
-        self._pending_key: str | None = None
-        self._streaming_string_value: bool = False
+        # Pre-compiled pattern for finding the last <arg_key>...</arg_key>
+        # before a partial <arg_value> (used in _build_args_json_so_far).
+        self._arg_key_pattern = re.compile(
+            re.escape(self.arg_key_start) + r"(.*?)" + re.escape(self.arg_key_end),
+            re.DOTALL,
+        )
+
+        # Streaming state for re-parse-and-diff approach
+        self._sent_content_idx: int = 0
         self._tool_call_ids: list[str] = []
-        self._args_started: list[bool] = []
-        self._args_closed: list[bool] = []
-        self._seen_keys: list[set[str]] = []
 
     @staticmethod
     def _deserialize(value: str) -> Any:
@@ -118,27 +127,13 @@ def _json_escape_string_content(s: str) -> str:
             return ""
         return json.dumps(s, ensure_ascii=False)[1:-1]
 
-    @staticmethod
-    def _is_string_type(
-        tool_name: str,
-        arg_name: str,
-        tools: list[Tool] | None,
-    ) -> bool:
-        if tools is None:
+    def _is_string_type(self, tool_name: str, arg_name: str) -> bool:
+        tool_properties = find_tool_properties(self.tools, tool_name)
+        param_schema = tool_properties.get(arg_name)
+        if param_schema is None:
             return False
-        for tool in tools:
-            if tool.function.name != tool_name:
-                continue
-            if tool.function.parameters is None:
-                return False
-            arg_type = (
-                tool.function.parameters.get("properties", {})
-                .get(arg_name, {})
-                .get("type", None)
-            )
-            return arg_type == "string"
-        logger.debug("No tool named '%s'.", tool_name)
-        return False
+        param_types = extract_types_from_schema(param_schema)
+        return set(param_types) - {"null"} == {"string"}
 
     @staticmethod
     def _tools_enabled(request: ChatCompletionRequest) -> bool:
@@ -151,8 +146,28 @@ def _tools_enabled(request: ChatCompletionRequest) -> bool:
             logger.exception("Failed to determine if tools are enabled.")
             return False
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
-        """Adjust request parameters for tool call token handling."""
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        """Adjust request parameters for tool call token handling.
+
+        For required/named tool_choice, skip setting structured_outputs
+        because GLM models output tool calls in XML format (per chat
+        template).  Guided decoding would force JSON output, conflicting
+        with the XML format and causing parsing failures.
+        """
+        if request.tools:
+            tc = request.tool_choice
+            if tc == "required" or isinstance(tc, ChatCompletionNamedToolChoiceParam):
+                # Do NOT call super().adjust_request() for required/named,
+                # because it would set structured_outputs and force JSON
+                # output via guided decoding.  GLM models use XML tool-call
+                # syntax (defined in the chat template), so guided decoding
+                # must be skipped to let the model output XML freely.
+                # The tool_parser handles extraction from XML output.
+                if request.tool_choice != "none":
+                    request.skip_special_tokens = False
+                return request
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # Ensure tool call tokens (<tool_call>, </tool_call>) are not skipped
@@ -185,9 +200,10 @@ def extract_tool_calls(
                 arg_dct: dict[str, Any] = {}
                 for key, value in pairs:
                     arg_key = key.strip()
-                    arg_val = value.strip()
-                    if not self._is_string_type(tc_name, arg_key, request.tools):
-                        arg_val = self._deserialize(arg_val)
+                    if self._is_string_type(tc_name, arg_key):
+                        arg_val = value
+                    else:
+                        arg_val = self._deserialize(value.strip())
                     logger.debug("arg_key = %s, arg_val = %s", arg_key, arg_val)
                     arg_dct[arg_key] = arg_val
                 tool_calls.append(
@@ -219,6 +235,199 @@ def extract_tool_calls(
                 tools_called=False, tool_calls=[], content=model_output
             )
 
+    def _extract_content(self, current_text: str) -> str | None:
+        """Return unsent non-tool-call text, or None.
+
+        Collects all text outside ``<tool_call>...</tool_call>`` regions,
+        including text between consecutive tool calls.  Holds back any
+        suffix that could be a partial ``<tool_call>`` tag.
+        """
+        # Build the "sendable index" — the furthest point we can send
+        # content up to.  We scan through the text collecting segments
+        # that are outside tool-call regions.
+        content_segments: list[str] = []
+        pos = self._sent_content_idx
+
+        while pos < len(current_text):
+            start = current_text.find(self.tool_call_start_token, pos)
+            if start == -1:
+                # No more tool calls — send up to (len - partial-tag overlap)
+                tail = current_text[pos:]
+                overlap = partial_tag_overlap(tail, self.tool_call_start_token)
+                sendable = tail[: len(tail) - overlap] if overlap else tail
+                if sendable:
+                    content_segments.append(sendable)
+                pos = len(current_text) - overlap
+                break
+
+            # Text before this <tool_call>
+            if start > pos:
+                content_segments.append(current_text[pos:start])
+
+            # Skip past the </tool_call> (or to end if incomplete)
+            end = current_text.find(self.tool_call_end_token, start)
+            if end != -1:
+                pos = end + len(self.tool_call_end_token)
+            else:
+                # Incomplete tool call — nothing more to send
+                pos = start
+                break
+
+        if content_segments:
+            self._sent_content_idx = pos
+            return "".join(content_segments)
+        # Even if no content, advance past completed tool-call regions
+        if pos > self._sent_content_idx:
+            self._sent_content_idx = pos
+        return None
+
+    def _extract_tool_call_regions(self, text: str) -> list[tuple[str, bool]]:
+        """Extract ``(inner_text, is_complete)`` for each ``<tool_call>`` region."""
+        results: list[tuple[str, bool]] = []
+        pos = 0
+        while True:
+            start = text.find(self.tool_call_start_token, pos)
+            if start == -1:
+                break
+            inner_start = start + len(self.tool_call_start_token)
+            end = text.find(self.tool_call_end_token, inner_start)
+            if end != -1:
+                results.append((text[inner_start:end], True))
+                pos = end + len(self.tool_call_end_token)
+            else:
+                # Incomplete tool call — strip partial </tool_call> suffix
+                raw = text[inner_start:]
+                overlap = partial_tag_overlap(raw, self.tool_call_end_token)
+                if overlap:
+                    raw = raw[:-overlap]
+                results.append((raw, False))
+                break
+        return results
+
+    def _extract_tool_name_from_region(self, inner_text: str) -> str | None:
+        """Extract the tool name from the beginning of a tool-call region.
+
+        The name is everything before the first ``\\n`` or ``<arg_key>``.
+        Returns ``None`` if the name hasn't fully arrived yet.
+        """
+        nl = inner_text.find("\n")
+        ak = inner_text.find(self.arg_key_start)
+        candidates = [i for i in [nl, ak] if i != -1]
+        if not candidates:
+            return None
+        cut = min(candidates)
+        name = inner_text[:cut].strip()
+        return name if name else None
+
+    def _build_args_json_so_far(
+        self,
+        tool_name: str,
+        inner_text: str,
+        is_complete: bool,
+    ) -> str:
+        """Build the JSON arguments string from the XML pairs seen so far.
+
+        For complete ``<arg_key>/<arg_value>`` pairs the value is fully
+        formatted.  For the last argument whose ``<arg_value>`` has been
+        opened but not closed, the partial string content is included
+        (JSON-escaped, with an opening ``"`` but no closing ``"``).
+
+        The closing ``}`` is only appended when ``is_complete`` is True
+        (i.e. the ``</tool_call>`` tag has arrived).
+        """
+        # Find all complete arg pairs
+        pairs = self.func_arg_regex.findall(inner_text)
+
+        parts: list[str] = []
+        for key, value in pairs:
+            key = key.strip()
+            key_json = json.dumps(key, ensure_ascii=False)
+            if self._is_string_type(tool_name, key):
+                # Don't strip string values — whitespace is significant
+                # and must match the partial-value path for diffing.
+                val_json = json.dumps(value, ensure_ascii=False)
+            else:
+                val_json = json.dumps(
+                    self._deserialize(value.strip()), ensure_ascii=False
+                )
+            parts.append(f"{key_json}: {val_json}")
+
+        # Check for a partial (incomplete) arg value
+        # Find the last <arg_value> that isn't closed
+        last_val_start = inner_text.rfind(self.arg_val_start)
+        last_val_end = inner_text.rfind(self.arg_val_end)
+        has_partial_value = last_val_start != -1 and (
+            last_val_end == -1 or last_val_end < last_val_start
+        )
+
+        if has_partial_value:
+            # Find the key for this partial value
+            # Look for the last <arg_key>...</arg_key> before this <arg_value>
+            last_key_match = None
+            for m in self._arg_key_pattern.finditer(inner_text[:last_val_start]):
+                last_key_match = m
+
+            if last_key_match:
+                partial_key = last_key_match.group(1).strip()
+                partial_content_start = last_val_start + len(self.arg_val_start)
+                partial_content = inner_text[partial_content_start:]
+
+                # Hold back any partial </arg_value> suffix
+                overlap = partial_tag_overlap(partial_content, self.arg_val_end)
+                if overlap:
+                    partial_content = partial_content[:-overlap]
+
+                key_json = json.dumps(partial_key, ensure_ascii=False)
+                if is_complete:
+                    # Tool call finished but </arg_value> is missing
+                    # (malformed output). Treat partial as complete value
+                    # so the diff naturally closes any open quotes.
+                    if self._is_string_type(tool_name, partial_key):
+                        val_json = json.dumps(partial_content, ensure_ascii=False)
+                    else:
+                        val_json = json.dumps(
+                            self._deserialize(partial_content.strip()),
+                            ensure_ascii=False,
+                        )
+                    parts.append(f"{key_json}: {val_json}")
+                elif self._is_string_type(tool_name, partial_key):
+                    escaped = self._json_escape_string_content(partial_content)
+                    # Open quote but no close — more content may arrive
+                    parts.append(f'{key_json}: "{escaped}')
+                else:
+                    # Non-string partial: include raw content, no wrapping
+                    parts.append(f"{key_json}: {partial_content}")
+
+        if not parts:
+            return "{}" if is_complete else ""
+
+        joined = "{" + ", ".join(parts)
+        if is_complete:
+            joined += "}"
+        return joined
+
+    def _compute_args_diff(self, index: int, args_so_far: str) -> str | None:
+        """Return new argument text not yet sent for tool *index*, or None."""
+        if not args_so_far or len(args_so_far) <= len(
+            self.streamed_args_for_tool[index]
+        ):
+            return None
+        diff = args_so_far[len(self.streamed_args_for_tool[index]) :]
+        self.streamed_args_for_tool[index] = args_so_far
+        self.prev_tool_call_arr[index]["arguments"] = args_so_far
+        return diff
+
+    def _ensure_tool_state_for(self, index: int) -> None:
+        """Grow state arrays so that *index* is valid."""
+        while len(self._tool_call_ids) <= index:
+            self._tool_call_ids.append(
+                make_tool_call_id(id_type="random", func_name=None, idx=None)
+            )
+        while len(self.streamed_args_for_tool) <= index:
+            self.streamed_args_for_tool.append("")
+        while len(self.prev_tool_call_arr) <= index:
+            self.prev_tool_call_arr.append({})
+
     def extract_tool_calls_streaming(
         self,
         previous_text: str,
@@ -232,293 +441,55 @@ def extract_tool_calls_streaming(
         if not self._tools_enabled(request):
             return DeltaMessage(content=delta_text) if delta_text else None
 
-        self._buffer += delta_text
-
-        while True:
-            if not self._in_tool_call:
-                start_idx = self._buffer.find(self.tool_call_start_token)
-                if start_idx == -1:
-                    # Check for partial start token at end of buffer
-                    for i in range(1, len(self.tool_call_start_token)):
-                        if self._buffer.endswith(self.tool_call_start_token[:i]):
-                            out = self._buffer[:-i]
-                            self._buffer = self._buffer[-i:]
-                            return DeltaMessage(content=out) if out else None
-                    out = self._buffer
-                    self._buffer = ""
-                    return DeltaMessage(content=out) if out else None
-
-                if start_idx > 0:
-                    out = self._buffer[:start_idx]
-                    self._buffer = self._buffer[start_idx:]
-                    return DeltaMessage(content=out) if out else None
-
-                self._buffer = self._buffer[len(self.tool_call_start_token) :]
-                self._begin_tool_call()
-                continue
-
-            # Parse tool name first
-            if not self.current_tool_name_sent:
-                nl = self._buffer.find("\n")
-                ak = self._buffer.find(self.arg_key_start)
-                end = self._buffer.find(self.tool_call_end_token)
-                candidates = [i for i in [nl, ak, end] if i != -1]
-                if not candidates:
-                    return None
-                cut = min(candidates)
-                tool_name = self._buffer[:cut].strip()
-                if tool_name == "" and cut == end:
-                    # Handle empty tool call like `<tool_call></tool_call>`.
-                    # Consume the tokens and reset state to avoid infinite loop.
-                    self._buffer = self._buffer[end + len(self.tool_call_end_token) :]
-                    self._finish_tool_call()
-                    self._revert_last_tool_call_state()
-                    continue
-
-                if cut == nl:
-                    self._buffer = self._buffer[nl + 1 :]
-                else:
-                    self._buffer = self._buffer[cut:]
-
-                self._current_tool_name = tool_name
-                self.current_tool_name_sent = True
-                return self._emit_tool_name_delta(tool_name)
-
-            assert self._current_tool_name is not None
-
-            # Handle incremental string value streaming
-            if self._streaming_string_value:
-                val_end = self._buffer.find(self.arg_val_end)
-                if val_end != -1:
-                    raw_content = self._buffer[:val_end]
-                    self._buffer = self._buffer[val_end + len(self.arg_val_end) :]
-                    self._streaming_string_value = False
-                    self._pending_key = None
-
-                    escaped = self._json_escape_string_content(raw_content)
-                    frag = escaped + '"'
-                    self.streamed_args_for_tool[self.current_tool_id] += frag
-                    return self._emit_tool_args_delta(frag)
-                else:
-                    # Check for partial </arg_value> at end
-                    safe_len = len(self._buffer)
-                    for i in range(1, len(self.arg_val_end)):
-                        if self._buffer.endswith(self.arg_val_end[:i]):
-                            safe_len = len(self._buffer) - i
-                            break
-
-                    if safe_len > 0:
-                        to_emit = self._buffer[:safe_len]
-                        self._buffer = self._buffer[safe_len:]
-                        escaped = self._json_escape_string_content(to_emit)
-                        if escaped:
-                            self.streamed_args_for_tool[self.current_tool_id] += escaped
-                            return self._emit_tool_args_delta(escaped)
-                    return None
-
-            # If we have a pending key, parse its value
-            if self._pending_key is not None:
-                val_pos = self._buffer.find(self.arg_val_start)
-                if val_pos == -1:
-                    return None
-                if val_pos > 0:
-                    self._buffer = self._buffer[val_pos:]
-
-                key = (self._pending_key or "").strip()
-
-                is_string = self._is_string_type(
-                    self._current_tool_name, key, request.tools
-                )
-
-                if is_string:
-                    # String type: stream incrementally
-                    self._buffer = self._buffer[len(self.arg_val_start) :]
-
-                    if key in self._seen_keys[self.current_tool_id]:
-                        self._pending_key = None
-                        continue
-
-                    self._seen_keys[self.current_tool_id].add(key)
-                    key_json = json.dumps(key, ensure_ascii=False)
-
-                    if not self._args_started[self.current_tool_id]:
-                        frag = "{" + key_json + ': "'
-                        self._args_started[self.current_tool_id] = True
-                    else:
-                        frag = ", " + key_json + ': "'
-
-                    self.streamed_args_for_tool[self.current_tool_id] += frag
-                    self._streaming_string_value = True
-                    return self._emit_tool_args_delta(frag)
-                else:
-                    # Non-string type: wait for complete value
-                    val_end = self._buffer.find(self.arg_val_end)
-                    if val_end == -1:
-                        return None
-
-                    raw_val = self._buffer[len(self.arg_val_start) : val_end].strip()
-                    self._buffer = self._buffer[val_end + len(self.arg_val_end) :]
-                    self._pending_key = None
-
-                    frag_or_none = self._append_arg_fragment(key=key, raw_val=raw_val)
-                    if frag_or_none:
-                        return self._emit_tool_args_delta(frag_or_none)
-                    continue
-
-            # Parse next arg or close
-            end_pos = self._buffer.find(self.tool_call_end_token)
-            key_pos = self._buffer.find(self.arg_key_start)
-            if end_pos != -1 and (key_pos == -1 or end_pos < key_pos):
-                self._buffer = self._buffer[end_pos + len(self.tool_call_end_token) :]
-                frag_or_none = self._close_args_if_needed()
-                # Finalize prev_tool_call_arr with complete parsed arguments
-                if self._current_tool_name:
-                    try:
-                        full_args_str = self.streamed_args_for_tool[
-                            self.current_tool_id
-                        ]
-                        args_dict = json.loads(full_args_str)
-                        self.prev_tool_call_arr[self.current_tool_id] = {
-                            "name": self._current_tool_name,
-                            "arguments": args_dict,
-                        }
-                    except (json.JSONDecodeError, IndexError) as e:
-                        logger.warning(
-                            "Failed to finalize tool call state for tool %d: %s",
-                            self.current_tool_id,
-                            e,
-                        )
-                self._finish_tool_call()
-                return (
-                    self._emit_tool_args_delta(frag_or_none) if frag_or_none else None
+        content = self._extract_content(current_text)
+        regions = self._extract_tool_call_regions(current_text)
+        tool_call_deltas: list[DeltaToolCall] = []
+
+        for i, (inner_text, is_complete) in enumerate(regions):
+            self._ensure_tool_state_for(i)
+
+            # Extract tool name
+            tool_name = self._extract_tool_name_from_region(inner_text)
+            if not tool_name:
+                break
+
+            # Emit tool name (once per tool call)
+            if "name" not in self.prev_tool_call_arr[i]:
+                self.prev_tool_call_arr[i]["name"] = tool_name
+                tool_call_deltas.append(
+                    DeltaToolCall(
+                        index=i,
+                        id=self._tool_call_ids[i],
+                        type="function",
+                        function=DeltaFunctionCall(
+                            name=tool_name,
+                            arguments="",
+                        ).model_dump(exclude_none=True),
+                    )
                 )
 
-            if key_pos == -1:
-                return None
-            if key_pos > 0:
-                self._buffer = self._buffer[key_pos:]
-            key_end = self._buffer.find(self.arg_key_end)
-            if key_end == -1:
-                return None
-            key = self._buffer[len(self.arg_key_start) : key_end]
-            self._buffer = self._buffer[key_end + len(self.arg_key_end) :]
-            self._pending_key = key
-            continue
-
-    def _ensure_tool_state(self) -> None:
-        while len(self._tool_call_ids) <= self.current_tool_id:
-            self._tool_call_ids.append(
-                make_tool_call_id(id_type="random", func_name=None, idx=None)
+            # Build args JSON so far, diff, emit
+            args_so_far = self._build_args_json_so_far(
+                tool_name, inner_text, is_complete
             )
-        while len(self.streamed_args_for_tool) <= self.current_tool_id:
-            self.streamed_args_for_tool.append("")
-        while len(self.prev_tool_call_arr) <= self.current_tool_id:
-            self.prev_tool_call_arr.append({})
-        while len(self._args_started) <= self.current_tool_id:
-            self._args_started.append(False)
-        while len(self._args_closed) <= self.current_tool_id:
-            self._args_closed.append(False)
-        while len(self._seen_keys) <= self.current_tool_id:
-            self._seen_keys.append(set())
-
-    def _begin_tool_call(self) -> None:
-        if self.current_tool_id == -1:
-            self.current_tool_id = 0
-        else:
-            self.current_tool_id += 1
-        self._ensure_tool_state()
-        self.current_tool_name_sent = False
-        self._current_tool_name = None
-        self._pending_key = None
-        self._streaming_string_value = False
-        self._in_tool_call = True
-
-    def _finish_tool_call(self) -> None:
-        self._in_tool_call = False
-        self._current_tool_name = None
-        self._pending_key = None
-        self._streaming_string_value = False
-
-    def _revert_last_tool_call_state(self) -> None:
-        """Revert the state allocation for the last tool call."""
-        if self.current_tool_id < 0:
-            return
-        self._tool_call_ids.pop()
-        self.streamed_args_for_tool.pop()
-        self.prev_tool_call_arr.pop()
-        self._args_started.pop()
-        self._args_closed.pop()
-        self._seen_keys.pop()
-        self.current_tool_id -= 1
-
-    def _emit_tool_name_delta(self, tool_name: str) -> DeltaMessage:
-        self.prev_tool_call_arr[self.current_tool_id] = {
-            "name": self._current_tool_name,
-            "arguments": {},
-        }
-        return DeltaMessage(
-            tool_calls=[
-                DeltaToolCall(
-                    index=self.current_tool_id,
-                    id=self._tool_call_ids[self.current_tool_id],
-                    type="function",
-                    function=DeltaFunctionCall(
-                        name=tool_name,
-                        arguments="",
-                    ).model_dump(exclude_none=True),
-                )
-            ]
-        )
-
-    def _emit_tool_args_delta(self, fragment: str) -> DeltaMessage:
-        return DeltaMessage(
-            tool_calls=[
-                DeltaToolCall(
-                    index=self.current_tool_id,
-                    function=DeltaFunctionCall(arguments=fragment).model_dump(
-                        exclude_none=True
-                    ),
+            diff = self._compute_args_diff(i, args_so_far)
+            if diff:
+                tool_call_deltas.append(
+                    DeltaToolCall(
+                        index=i,
+                        function=DeltaFunctionCall(arguments=diff).model_dump(
+                            exclude_none=True
+                        ),
+                    )
                 )
-            ]
-        )
-
-    def _append_arg_fragment(
-        self,
-        *,
-        key: str,
-        raw_val: str,
-    ) -> str | None:
-        key = key.strip()
-        if not key:
-            return None
-        if key in self._seen_keys[self.current_tool_id]:
-            return None
-
-        # This function is only called for non-string types (already checked
-        # by _is_string_type in the caller), so we always deserialize.
-        val_obj: Any = self._deserialize(raw_val)
 
-        key_json = json.dumps(key, ensure_ascii=False)
-        val_json = json.dumps(val_obj, ensure_ascii=False)
-
-        if not self._args_started[self.current_tool_id]:
-            fragment = "{" + key_json + ": " + val_json
-            self._args_started[self.current_tool_id] = True
-        else:
-            fragment = "," + key_json + ": " + val_json
+        # Update current_tool_id for serving layer compatibility
+        if regions:
+            self.current_tool_id = len(regions) - 1
 
-        self._seen_keys[self.current_tool_id].add(key)
-        self.streamed_args_for_tool[self.current_tool_id] += fragment
-        return fragment
-
-    def _close_args_if_needed(self) -> str | None:
-        if self._args_closed[self.current_tool_id]:
-            return None
-        self._args_closed[self.current_tool_id] = True
-        if not self._args_started[self.current_tool_id]:
-            fragment = "{}"
-            self.streamed_args_for_tool[self.current_tool_id] = fragment
-        else:
-            fragment = "}"
-            self.streamed_args_for_tool[self.current_tool_id] += fragment
-        return fragment
+        if content or tool_call_deltas:
+            return DeltaMessage(
+                content=content,
+                tool_calls=tool_call_deltas,
+            )
+        return None
diff --git a/vllm/tool_parsers/granite4_tool_parser.py b/vllm/tool_parsers/granite4_tool_parser.py
index 3d58690f5928..b11e9e6d5066 100644
--- a/vllm/tool_parsers/granite4_tool_parser.py
+++ b/vllm/tool_parsers/granite4_tool_parser.py
@@ -19,6 +19,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
@@ -59,7 +60,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.start_regex = re.compile(self.tc_start)
         self.end_regex = re.compile(self.tc_end)
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # do not skip special tokens because the tool_call tokens are
diff --git a/vllm/tool_parsers/hermes_tool_parser.py b/vllm/tool_parsers/hermes_tool_parser.py
index 4e54d75b4b6f..546cde5cd14c 100644
--- a/vllm/tool_parsers/hermes_tool_parser.py
+++ b/vllm/tool_parsers/hermes_tool_parser.py
@@ -18,39 +18,27 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import is_complete_json, partial_tag_overlap
 from vllm.utils.mistral import is_mistral_tokenizer
 
 logger = init_logger(__name__)
 
 
-def _partial_tag_overlap(text: str, tag: str) -> int:
-    """Length of the longest prefix of `tag` that matches a suffix of `text`.
-
-    E.g. text ending in "<tool_" returns 6 when tag is "<tool_call>".
-    Returns 0 if there is no overlap.
-    """
-    max_check = min(len(tag) - 1, len(text))
-    for k in range(max_check, 0, -1):
-        if text.endswith(tag[:k]):
-            return k
-    return 0
-
-
-def _is_valid_json(text: str) -> bool:
-    try:
-        json.loads(text)
-        return True
-    except (json.JSONDecodeError, ValueError):
-        return False
-
-
 class Hermes2ProToolParser(ToolParser):
+    tool_call_start_token: str = "<tool_call>"
+    tool_call_end_token: str = "</tool_call>"
+    tool_call_regex = re.compile(
+        r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL
+    )
+    scratch_pad_regex = re.compile(r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
@@ -58,16 +46,6 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
             logger.error("Detected Mistral tokenizer when using a Hermes model")
             self.model_tokenizer = tokenizer.tokenizer
 
-        self.tool_call_start_token: str = "<tool_call>"
-        self.tool_call_end_token: str = "</tool_call>"
-
-        self.tool_call_regex = re.compile(
-            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL
-        )
-        self.scratch_pad_regex = re.compile(
-            r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL
-        )
-
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ToolParser "
@@ -77,7 +55,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         # Streaming state: what has been sent to the client.
         self._sent_content_idx: int = 0
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # do not skip special tokens because the tool_call tokens are
@@ -144,7 +124,7 @@ def _extract_content(self, current_text: str) -> str | None:
         Holds back any suffix that could be a partial <tool_call> tag.
         """
         if self.tool_call_start_token not in current_text:
-            overlap_length = _partial_tag_overlap(
+            overlap_length = partial_tag_overlap(
                 current_text, self.tool_call_start_token
             )
             sendable_idx = len(current_text) - overlap_length
@@ -173,13 +153,13 @@ def _extract_tool_call_jsons(self, text: str) -> list[tuple[str, bool]]:
             else:
                 raw = text[json_start:]
                 # Strip partial </tool_call> suffix if present.
-                overlap = _partial_tag_overlap(raw, self.tool_call_end_token)
+                overlap = partial_tag_overlap(raw, self.tool_call_end_token)
                 if overlap:
                     raw = raw[:-overlap]
                 tc_json = raw.strip()
                 # Valid JSON without closing tag = complete body,
                 # tag tokens just haven't arrived yet.
-                is_complete = _is_valid_json(tc_json) if tc_json else False
+                is_complete = is_complete_json(tc_json) if tc_json else False
                 results.append((tc_json, is_complete))
                 break
         return results
diff --git a/vllm/tool_parsers/hy_v3_tool_parser.py b/vllm/tool_parsers/hy_v3_tool_parser.py
new file mode 100644
index 000000000000..809a85ce4171
--- /dev/null
+++ b/vllm/tool_parsers/hy_v3_tool_parser.py
@@ -0,0 +1,645 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import ast
+import json
+from collections.abc import Sequence
+from typing import Any
+
+import regex as re
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import (
+    Tool,
+    ToolParser,
+)
+
+logger = init_logger(__name__)
+
+
+class HYV3ToolParser(ToolParser):
+    _TYPE_ALIASES: dict[str, str] = {
+        "str": "string",
+        "text": "string",
+        "varchar": "string",
+        "char": "string",
+        "enum": "string",
+        "bool": "boolean",
+        "binary": "boolean",
+        "int": "integer",
+        "float": "number",
+        "double": "number",
+        "list": "array",
+        "dict": "object",
+        "map": "object",
+    }
+
+    # Prefix-based wildcard matching for non-standard type names.
+    # Following the same approach as
+    # qwen3coder_tool_parser._convert_param_value which uses
+    # param_type.startswith("int"), startswith("uint"), etc.
+    _INTEGER_PREFIXES: tuple[str, ...] = (
+        "int",
+        "uint",
+        "long",
+        "short",
+        "unsigned",
+    )
+    _NUMBER_PREFIXES: tuple[str, ...] = ("num", "float")
+
+    @staticmethod
+    def _normalize_type(raw_type: str) -> str:
+        """Map non-standard type aliases to JSON Schema standard names.
+
+        First performs exact lookup in _TYPE_ALIASES. On miss, falls back
+        to prefix-based matching using startswith()
+          - int*/uint*/long*/short*/unsigned* → "integer"
+          - num*/float* → "number"
+        """
+        exact = HYV3ToolParser._TYPE_ALIASES.get(raw_type)
+        if exact is not None:
+            return exact
+        lower = raw_type.lower()
+        if any(lower.startswith(p) for p in HYV3ToolParser._INTEGER_PREFIXES):
+            return "integer"
+        if any(lower.startswith(p) for p in HYV3ToolParser._NUMBER_PREFIXES):
+            return "number"
+        return raw_type
+
+    @staticmethod
+    def _get_arg_schema(
+        function_name: str,
+        arg_key: str,
+        tools: list[ChatCompletionToolsParam] | None,
+    ) -> dict:
+        """Look up a specific argument's property schema from the tools list."""
+        if tools is None:
+            return {}
+        for tool in tools:
+            if tool.function.name == function_name:
+                if tool.function.parameters is None:
+                    return {}
+                return tool.function.parameters.get("properties", {}).get(arg_key, {})
+        logger.warning("No tool named '%s'.", function_name)
+        return {}
+
+    @staticmethod
+    def _get_schema_options(arg_schema: dict) -> list[dict]:
+        """Normalize any property schema into a list of sub-schemas.
+        - has type (single type) → return [arg_schema]
+        - anyOf  → return the anyOf list
+        - oneOf  → return the oneOf list
+        - fallback → [{"type": "string"}]
+
+        Note: single ``type`` has the highest priority.
+        """
+        if "type" in arg_schema:
+            return [arg_schema]
+        if "anyOf" in arg_schema:
+            return arg_schema["anyOf"]
+        if "oneOf" in arg_schema:
+            return arg_schema["oneOf"]
+
+        return [{"type": "string"}]
+
+    @staticmethod
+    def _get_types(arg_schema: dict) -> set[str]:
+        """Extract normalized, non-null type set from a property schema."""
+        schemas = HYV3ToolParser._get_schema_options(arg_schema)
+        return {
+            HYV3ToolParser._normalize_type(s.get("type", "string")) for s in schemas
+        } - {"null"}
+
+    @staticmethod
+    def _is_only_string_type(
+        function_name: str,
+        arg_key: str,
+        tools: list[ChatCompletionToolsParam] | None,
+    ) -> bool:
+        """Return True if the parameter's type set is exactly {"string"}.
+
+        Only pure string types get partial value streaming; compound types
+        like anyOf(string | array) do not, since the partial value might
+        end up being a JSON array or object.
+        """
+        arg_schema = HYV3ToolParser._get_arg_schema(function_name, arg_key, tools)
+        types = HYV3ToolParser._get_types(arg_schema)
+        return types == {"string"}
+
+    @staticmethod
+    def _try_parse_bool(value: str) -> bool | None:
+        """Try to parse a string as bool; return None on failure."""
+        lower = value.lower()
+        if lower == "true":
+            return True
+        elif lower == "false":
+            return False
+        return None
+
+    @staticmethod
+    def _try_parse_int(value: str) -> int | None:
+        """Try to parse a string as int; return None on failure."""
+        try:
+            return int(value)
+        except (ValueError, TypeError):
+            return None
+
+    @staticmethod
+    def _try_parse_wildcard_number(value: str) -> int | float | None:
+        """Try to parse a string as a number (int or float).
+
+        Decision rule: if the string contains '.' or 'e'/'E' (scientific
+        notation), parse as float; otherwise parse as int.
+
+        Examples:
+            "5"    → int(5)
+            "5.0"  → float(5.0)
+            "5.3"  → float(5.3)
+            "1e3"  → float(1000.0)
+            "-3"   → int(-3)
+
+        Return None on failure.
+        """
+        try:
+            if "." in value or "e" in value or "E" in value:
+                return float(value)
+            return int(value)
+        except (ValueError, TypeError):
+            return None
+
+    @staticmethod
+    def _deserialize(value: str) -> Any:
+        """Deserialize a string value using json.loads then ast.literal_eval."""
+        try:
+            return json.loads(value)
+        except Exception:
+            pass
+        try:
+            return ast.literal_eval(value)
+        except Exception:
+            pass
+        return value
+
+    @staticmethod
+    def _parse_value(
+        value: str,
+        function_name: str,
+        arg_key: str,
+        tools: list[ChatCompletionToolsParam] | None,
+    ) -> Any:
+        """Unified argument value parser with anyOf/oneOf support.
+
+        Fallthrough chain:
+            bool → int → number(wildcard_number)
+            → json.loads for array/object
+            → string → _deserialize
+        """
+        arg_schema = HYV3ToolParser._get_arg_schema(function_name, arg_key, tools)
+        types = HYV3ToolParser._get_types(arg_schema)
+
+        # 1. Try bool
+        if "boolean" in types:
+            result_bool = HYV3ToolParser._try_parse_bool(value)
+            if result_bool is not None:
+                return result_bool
+
+        # 2. Try int
+        if "integer" in types:
+            result_int = HYV3ToolParser._try_parse_int(value)
+            if result_int is not None:
+                return result_int
+
+        # 3. Try number (wildcard_number: int if no '.'/e/E, float otherwise)
+        if "number" in types:
+            result_number = HYV3ToolParser._try_parse_wildcard_number(value)
+            if result_number is not None:
+                return result_number
+
+        # 4. Try json.loads (covers array/object and other unlisted types)
+        if types - {"string", "boolean", "integer", "number"}:
+            try:
+                return json.loads(value)
+            except (json.JSONDecodeError, ValueError):
+                pass
+
+        # 5. String fallback
+        if "string" in types:
+            return value
+
+        # 6. Final fallback
+        return HYV3ToolParser._deserialize(value)
+
+    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+        super().__init__(tokenizer, tools)
+
+        self.current_tool_name_sent: bool = False
+        self.prev_tool_call_arr: list[dict] = []
+        self.current_tool_id: int = -1
+        self.streamed_args_for_tool: list[
+            str
+        ] = []  # map what has been streamed for each tool so far to a list
+
+        # Streaming state: send tool name first, then return arguments at once
+        self._streaming_tool_name: str | None = None  # tool name being streamed
+
+        # State fields for incremental argument streaming
+        self._completed_args: dict = {}  # closed {key: parsed_value}
+        self._current_arg_key: str | None = None  # key being collected
+        self._current_arg_is_string: bool = False  # is current arg pure string?
+        self._streamed_json_len: int = 0  # bytes of JSON already sent
+
+        self.tool_calls_start_token: str = "<tool_calls>"
+        self.tool_calls_end_token: str = "</tool_calls>"
+
+        self.tool_call_start_token: str = "<tool_call>"
+        self.tool_call_end_token: str = "</tool_call>"
+
+        self.tool_sep_token: str = "<tool_sep>"
+
+        self.arg_key_start_token: str = "<arg_key>"
+        self.arg_key_end_token: str = "</arg_key>"
+
+        self.arg_value_start_token: str = "<arg_value>"
+        self.arg_value_end_token: str = "</arg_value>"
+
+        self.tool_call_regex = re.compile(
+            rf"{self.tool_call_start_token}(.*?){self.tool_sep_token}"
+            rf"(.*?){self.tool_call_end_token}",
+            re.DOTALL,
+        )
+
+        self.tool_call_portion_regex = re.compile(
+            rf"{self.tool_call_start_token}(.*?){self.tool_sep_token}(.*)", re.DOTALL
+        )
+
+        self.func_args_regex = re.compile(
+            rf"{self.arg_key_start_token}(.*?){self.arg_key_end_token}\s*"
+            rf"{self.arg_value_start_token}(.*?){self.arg_value_end_token}",
+            re.DOTALL,
+        )
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction."
+            )
+        self.tool_calls_start_token_id = self.vocab.get(self.tool_calls_start_token)
+        self.tool_calls_end_token_id = self.vocab.get(self.tool_calls_end_token)
+
+        self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
+        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
+        self._buffer = ""
+
+        if (
+            self.tool_calls_start_token_id is None
+            or self.tool_calls_end_token_id is None
+        ):
+            raise RuntimeError(
+                "HYV3 Tool parser could not locate tool call "
+                "start/end tokens in the tokenizer!"
+            )
+
+    def _extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> list[ToolCall]:
+        try:
+            function_call_tuples = []
+            # start_token{name}sep_token{args}end_token...
+            function_calls = self.tool_call_regex.findall(model_output)
+            if function_calls:
+                function_call_tuples.extend(function_calls)
+                remaining = model_output.split(self.tool_call_end_token)[-1]
+                function_calls = self.tool_call_portion_regex.findall(remaining)
+                function_call_tuples += function_calls
+            else:
+                function_calls = self.tool_call_portion_regex.findall(model_output)
+                if function_calls:
+                    function_call_tuples.extend(function_calls)
+            tool_calls = []
+            for match in function_call_tuples:
+                function_name, function_args = match
+                function_name = function_name.strip()
+                function_args = function_args.strip()
+
+                arg_pairs = self.func_args_regex.findall(function_args)
+                arg_dict = {}
+                for key, value in arg_pairs:
+                    parsed_value = HYV3ToolParser._parse_value(
+                        value, function_name, key, request.tools
+                    )
+                    arg_dict[key] = parsed_value
+                tool_calls.append(
+                    ToolCall(
+                        type="function",
+                        function=FunctionCall(
+                            name=function_name,
+                            arguments=json.dumps(arg_dict, ensure_ascii=False),
+                        ),
+                    )
+                )
+            return tool_calls
+        except Exception:
+            logger.exception("Error in extracting tool call from response.")
+            return []
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        # sanity check; avoid unnecessary processing
+        if self.tool_calls_start_token not in model_output:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+        else:
+            try:
+                tool_calls = self._extract_tool_calls(model_output, request)
+
+                s_index = model_output.find(self.tool_calls_start_token)
+                content = model_output[:s_index] if s_index != -1 else model_output
+                return ExtractedToolCallInformation(
+                    tools_called=True,
+                    tool_calls=tool_calls,
+                    content=content if content else None,
+                )
+
+            except Exception:
+                logger.exception("Error in extracting tool call from response.")
+                return ExtractedToolCallInformation(
+                    tools_called=False, tool_calls=[], content=model_output
+                )
+
+    def _reset_streaming_tool_state(self):
+        """Reset the streaming state for a single tool call."""
+        self._streaming_tool_name = None
+        self._completed_args = {}
+        self._current_arg_key = None
+        self._current_arg_is_string = False
+        self._streamed_json_len = 0
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        # Check whether current tokens contain the tool_calls start token
+        if self.tool_calls_start_token_id not in current_token_ids:
+            return DeltaMessage(content=delta_text)
+
+        # Encountered tool_calls start tag; extract preceding content and buffer
+        if self.tool_calls_start_token in delta_text:
+            text_parts = delta_text.split(self.tool_calls_start_token)
+            self._buffer += text_parts[-1]
+            if text_parts[0]:
+                return DeltaMessage(content=text_parts[0])
+            # Don't return None; continue processing buffer for complete content
+        else:
+            self._buffer += delta_text
+
+        # Encountered finish, extract valid arguments
+        if (
+            current_text.find(self.tool_call_end_token + self.tool_calls_end_token)
+            != -1
+            and self._buffer.find(self.tool_call_end_token) == -1
+        ):
+            self._buffer += self.tool_call_end_token + self.tool_calls_end_token
+
+        cur_text = self._buffer
+
+        # Haven't encountered tool_call start tag yet; keep buffering
+        start_idx = cur_text.find(self.tool_call_start_token)
+        if start_idx == -1 and self._streaming_tool_name is None:
+            self._buffer = ""
+            return None
+
+        # === Phase 1: Detect tool name (send when tool_sep_token is seen) ===
+        name_delta: DeltaMessage | None = None
+        if self._streaming_tool_name is None:
+            sep_idx = cur_text.find(self.tool_sep_token)
+            if sep_idx == -1:
+                # tool_sep not yet seen; keep buffering from tool_call_start
+                self._buffer = cur_text[start_idx:]
+                return None
+
+            # Extract tool name: between tool_call_start_token and tool_sep_token
+            name_start = start_idx + len(self.tool_call_start_token)
+            tool_name = cur_text[name_start:sep_idx].strip()
+            self._streaming_tool_name = tool_name
+
+            # Update buffer: keep only content after tool_sep (i.e. the args portion)
+            self._buffer = cur_text[sep_idx + len(self.tool_sep_token) :]
+
+            # Increment tool_id and send a chunk containing only the name
+            self.current_tool_id += 1
+            self._current_tool_call_id = make_tool_call_id()
+            name_delta = DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_id,
+                        id=self._current_tool_call_id,
+                        type="function",
+                        function=DeltaFunctionCall(
+                            name=tool_name,
+                        ),
+                    )
+                ]
+            )
+
+            # Check if buffer already has complete arguments (all-in-one-delta)
+            if self.tool_call_end_token not in self._buffer:
+                return name_delta
+            # Buffer already has a complete tool call; continue to phase 2 below
+
+        # === Phase 2: Incremental argument streaming ===
+        return self._extract_streaming_incremental(name_delta, request)
+
+    def _make_args_delta(self, argument_diff: str) -> DeltaMessage:
+        """Build a DeltaMessage containing only an arguments diff."""
+        return DeltaMessage(
+            tool_calls=[
+                DeltaToolCall(
+                    index=self.current_tool_id,
+                    function=DeltaFunctionCall(arguments=argument_diff),
+                )
+            ]
+        )
+
+    def _extract_streaming_incremental(
+        self,
+        name_delta: DeltaMessage | None,
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        """Incremental phase-2: scan tags in buffer, emit JSON diffs.
+
+        Strategy:
+        - Track completed args and emit each one as a JSON fragment.
+        - For string-typed args, stream the value character-by-character.
+        - Withhold the closing ``}`` until ``</tool_call>`` is seen.
+
+        We build JSON manually via fragments rather than using json.dumps
+        with a cursor, because json.dumps of partial-vs-full string values
+        produces incompatible prefixes (e.g. ``""}`` vs ``"Hello"}``).
+        """
+        buf = self._buffer
+        is_complete = self.tool_call_end_token in buf
+
+        if is_complete:
+            end_idx = buf.find(self.tool_call_end_token)
+            args_text = buf[:end_idx]
+            remaining = buf[end_idx + len(self.tool_call_end_token) :]
+        else:
+            args_text = buf
+            remaining = ""
+
+        # --- scan all fully closed kv pairs ---
+        arg_pairs = self.func_args_regex.findall(args_text)
+        for key, value in arg_pairs:
+            key = key.strip()
+            if key not in self._completed_args:
+                parsed_value = HYV3ToolParser._parse_value(
+                    value, self._streaming_tool_name or "", key, request.tools
+                )
+                self._completed_args[key] = parsed_value
+
+        # --- detect partial (unclosed) kv at the tail ---
+        last_closed_end = 0
+        for m in self.func_args_regex.finditer(args_text):
+            last_closed_end = m.end()
+        tail = args_text[last_closed_end:]
+
+        partial_key: str | None = None
+        partial_value: str | None = None
+
+        ak_start = tail.find(self.arg_key_start_token)
+        if ak_start != -1:
+            ak_end = tail.find(
+                self.arg_key_end_token,
+                ak_start + len(self.arg_key_start_token),
+            )
+            if ak_end != -1:
+                partial_key = tail[
+                    ak_start + len(self.arg_key_start_token) : ak_end
+                ].strip()
+                self._current_arg_key = partial_key
+                self._current_arg_is_string = HYV3ToolParser._is_only_string_type(
+                    self._streaming_tool_name or "",
+                    partial_key,
+                    request.tools,
+                )
+
+                av_start = tail.find(self.arg_value_start_token, ak_end)
+                if av_start != -1:
+                    val_content_start = av_start + len(self.arg_value_start_token)
+                    if self._current_arg_is_string:
+                        partial_value = tail[val_content_start:]
+            else:
+                # key not yet closed
+                self._current_arg_key = None
+                self._current_arg_is_string = False
+
+        # --- build the current JSON snapshot as a string ---
+        # We construct JSON manually so we can precisely control
+        # what gets sent incrementally.
+        snapshot_parts: list[str] = []
+        for k, v in self._completed_args.items():
+            k_json = json.dumps(k, ensure_ascii=False)
+            v_json = json.dumps(v, ensure_ascii=False)
+            snapshot_parts.append(f"{k_json}: {v_json}")
+
+        if partial_key is not None and partial_value is not None:
+            k_json = json.dumps(partial_key, ensure_ascii=False)
+            # For string partial value, we build the JSON string
+            # WITHOUT the closing quote, so the prefix stays stable
+            # as the value grows.  The closing `"` and `}` will be
+            # sent when the value or tool_call closes.
+            escaped_val = (
+                partial_value.replace("\\", "\\\\")
+                .replace('"', '\\"')
+                .replace("\n", "\\n")
+                .replace("\r", "\\r")
+                .replace("\t", "\\t")
+            )
+            # Note: no closing " here – it's appended only on close
+            snapshot_parts.append(f'{k_json}: "{escaped_val}')
+
+        snapshot = "{" + ", ".join(snapshot_parts) + "}"
+
+        # --- compute diff ---
+        argument_diff: str | None = None
+
+        if is_complete:
+            # Tool call finished – send everything remaining.
+            # Build final snapshot with proper JSON (all values closed).
+            final_args = dict(self._completed_args)
+            final_json = json.dumps(final_args, ensure_ascii=False)
+            if self._streamed_json_len < len(final_json):
+                argument_diff = final_json[self._streamed_json_len :]
+            self._streamed_json_len = len(final_json)
+
+            # Record into prev_tool_call_arr
+            self.prev_tool_call_arr.append(
+                {
+                    "name": self._streaming_tool_name,
+                    "arguments": final_args,
+                }
+            )
+            self.streamed_args_for_tool.append(final_json)
+
+            self._reset_streaming_tool_state()
+            self._buffer = remaining
+        else:
+            # Still in progress – withhold the tail.
+            # For open strings: snapshot ends with ...partial_val}
+            #   we withhold "}" (1 char) – the missing closing " will
+            #   be sent when the value closes.
+            # For no open string: snapshot ends with ...value"}
+            #   we withhold "}" (1 char).
+            end = len(snapshot) - 1  # exclude trailing "}"
+            if end > self._streamed_json_len:
+                argument_diff = snapshot[self._streamed_json_len : end]
+                self._streamed_json_len = end
+
+        # --- construct return DeltaMessage ---
+        if name_delta is not None and argument_diff:
+            nd_func = name_delta.tool_calls[0].function
+            return DeltaMessage(
+                tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_id,
+                        id=self._current_tool_call_id,
+                        type="function",
+                        function=DeltaFunctionCall(
+                            name=nd_func.name if nd_func else None,
+                            arguments=argument_diff,
+                        ),
+                    )
+                ]
+            )
+        elif name_delta is not None:
+            return name_delta
+        elif argument_diff:
+            return self._make_args_delta(argument_diff)
+        else:
+            return None
diff --git a/vllm/tool_parsers/internlm2_tool_parser.py b/vllm/tool_parsers/internlm2_tool_parser.py
index fc7c44cff9ef..f4aaeef71a0f 100644
--- a/vllm/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/tool_parsers/internlm2_tool_parser.py
@@ -19,6 +19,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
@@ -35,7 +36,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
         self.position = 0
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # do not skip special tokens because internlm use the special
@@ -197,7 +200,7 @@ def extract_tool_calls(
         request: ChatCompletionRequest,
     ) -> ExtractedToolCallInformation:
         text = model_output
-        tools = request.tools
+        tools = self.tools
         if "<|action_start|><|plugin|>" in text:
             text, action = text.split("<|action_start|><|plugin|>")
             action = action.split("<|action_end|>".strip())[0]
diff --git a/vllm/tool_parsers/jamba_tool_parser.py b/vllm/tool_parsers/jamba_tool_parser.py
index 5a9af99109c4..dec3c88d934a 100644
--- a/vllm/tool_parsers/jamba_tool_parser.py
+++ b/vllm/tool_parsers/jamba_tool_parser.py
@@ -20,6 +20,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
@@ -68,7 +69,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
                 "tokens in the tokenizer!"
             )
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             # do not skip special tokens because jamba use the special
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index bc995319e51b..7ddd8fa7a80d 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# code modified from deepseekv3_tool_parser.py
 
 from collections.abc import Sequence
 
@@ -17,12 +16,14 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import partial_tag_overlap
 
 logger = init_logger(__name__)
 
@@ -30,124 +31,44 @@
 class KimiK2ToolParser(ToolParser):
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
-        self.current_tool_name_sent: bool = False
+
+        # Streaming state
+        self._sent_content_idx: int = 0
         self.prev_tool_call_arr: list[dict] = []
-        self.current_tool_id: int = -1
-        self.streamed_args_for_tool: list[
-            str
-        ] = []  # map what has been streamed for each tool so far to a list
-
-        # Section-level state management to prevent token leakage
-        self.in_tool_section: bool = False
-        self.token_buffer: str = ""
-        # Buffer size: empirical worst-case for longest marker (~30 chars) * 2
-        # + safety margin for unicode + partial overlap. Prevents unbounded growth.
-        self.buffer_max_size: int = 1024
-        self.section_char_count: int = 0  # Track characters processed in tool section
-        self.max_section_chars: int = 8192  # Force exit if section exceeds this
-        self._buffer_overflow_logged: bool = False  # Log overflow once per session
-
-        # Support both singular and plural variants
+        self.streamed_args_for_tool: list[str] = []
+
+        # Section marker
         self.tool_calls_start_token: str = "<|tool_calls_section_begin|>"
-        self.tool_calls_end_token: str = "<|tool_calls_section_end|>"
-        self.tool_calls_start_token_variants: list[str] = [
-            "<|tool_calls_section_begin|>",
-            "<|tool_call_section_begin|>",  # singular variant
-        ]
-        self.tool_calls_end_token_variants: list[str] = [
-            "<|tool_calls_section_end|>",
-            "<|tool_call_section_end|>",  # singular variant
-        ]
 
+        # Individual tool call markers
         self.tool_call_start_token: str = "<|tool_call_begin|>"
         self.tool_call_end_token: str = "<|tool_call_end|>"
+        self.tool_call_arg_token: str = "<|tool_call_argument_begin|>"
 
+        # Regex for non-streaming extraction
         self.tool_call_regex = re.compile(
-            r"<\|tool_call_begin\|>\s*(?P<tool_call_id>[^<]+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>(?:(?!<\|tool_call_begin\|>).)*?)\s*<\|tool_call_end\|>",
+            r"<\|tool_call_begin\|>\s*(?P<tool_call_id>[^<]+:\d+)\s*"
+            r"<\|tool_call_argument_begin\|>\s*"
+            r"(?P<function_arguments>(?:(?!<\|tool_call_begin\|>).)*?)\s*"
+            r"<\|tool_call_end\|>",
             re.DOTALL,
         )
 
-        self.stream_tool_call_portion_regex = re.compile(
-            r"(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)"
-        )
-
-        self.stream_tool_call_name_regex = re.compile(r"(?P<tool_call_id>.+:\d+)\s*")
-
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ToolParser "
                 "constructor during construction."
             )
-        self.tool_calls_start_token_id = self.vocab.get(self.tool_calls_start_token)
-        self.tool_calls_end_token_id = self.vocab.get(self.tool_calls_end_token)
-
-        # Get token IDs for all variants
-        self.tool_calls_start_token_ids: list[int] = [
-            tid
-            for variant in self.tool_calls_start_token_variants
-            if (tid := self.vocab.get(variant)) is not None
-        ]
-        self.tool_calls_end_token_ids: list[int] = [
-            tid
-            for variant in self.tool_calls_end_token_variants
-            if (tid := self.vocab.get(variant)) is not None
-        ]
-
-        self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
-        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
-
-        if (
-            self.tool_calls_start_token_id is None
-            or self.tool_calls_end_token_id is None
-        ):
-            raise RuntimeError(
-                "Kimi-K2 Tool parser could not locate tool call start/end "
-                "tokens in the tokenizer!"
-            )
 
-    def _check_and_strip_markers(self, text: str) -> tuple[str, bool, bool]:
-        """
-        Check for section begin/end markers in text and strip them.
-        Returns: (cleaned_text, found_section_begin, found_section_end)
-        """
-        found_begin = False
-        found_end = False
-        cleaned = text
-
-        # Check for section begin markers (any variant)
-        for variant in self.tool_calls_start_token_variants:
-            if variant in cleaned:
-                cleaned = cleaned.replace(variant, "")
-                found_begin = True
-
-        # Check for section end markers (any variant)
-        for variant in self.tool_calls_end_token_variants:
-            if variant in cleaned:
-                cleaned = cleaned.replace(variant, "")
-                found_end = True
-        return cleaned, found_begin, found_end
-
-    def _reset_section_state(self) -> None:
-        """Reset state when exiting tool section."""
-        self.in_tool_section = False
-        self.token_buffer = ""
-        self.section_char_count = 0
-
-    def reset_streaming_state(self) -> None:
-        """
-        Reset all streaming state. Call this between requests to prevent
-        state leakage when parser instance is reused.
-        """
-        # Reset section state
-        self._reset_section_state()
-
-        # Reset parent class state
-        self.current_tool_name_sent = False
-        self.prev_tool_call_arr = []
-        self.current_tool_id = -1
-        self.streamed_args_for_tool = []
-
-        logger.debug("Streaming state reset")
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # Ensure special-token markers appear as literal text in
+            # current_text so we can do pure text-based parsing.
+            request.skip_special_tokens = False
+        return request
 
     def extract_tool_calls(
         self,
@@ -198,6 +119,95 @@ def extract_tool_calls(
                     tools_called=False, tool_calls=[], content=model_output
                 )
 
+    def _extract_content(self, current_text: str) -> str | None:
+        """Return unsent content before the tool-calls section, or None.
+
+        Holds back any trailing suffix that partially matches
+        ``<|tool_calls_section_begin|>`` to avoid leaking marker bytes.
+        """
+        if self.tool_calls_start_token not in current_text:
+            overlap = partial_tag_overlap(current_text, self.tool_calls_start_token)
+            sendable_idx = len(current_text) - overlap
+        else:
+            sendable_idx = current_text.index(self.tool_calls_start_token)
+
+        if sendable_idx > self._sent_content_idx:
+            content = current_text[self._sent_content_idx : sendable_idx]
+            self._sent_content_idx = sendable_idx
+            return content
+        return None
+
+    def _extract_tool_calls(self, current_text: str) -> list[str]:
+        """Extract raw bodies from ``<|tool_call_begin|>…<|tool_call_end|>`` blocks."""
+        if self.tool_calls_start_token not in current_text:
+            return []
+
+        results: list[str] = []
+        pos = current_text.index(self.tool_calls_start_token)
+        while True:
+            start = current_text.find(self.tool_call_start_token, pos)
+            if start == -1:
+                break
+            tc_start = start + len(self.tool_call_start_token)
+            end = current_text.find(self.tool_call_end_token, tc_start)
+
+            if end != -1:
+                tool_call = current_text[tc_start:end]
+                pos = end + len(self.tool_call_end_token)
+            else:
+                tool_call = current_text[tc_start:]
+                overlap = partial_tag_overlap(tool_call, self.tool_call_end_token)
+                if overlap:
+                    tool_call = tool_call[:-overlap]
+
+            results.append(tool_call)
+
+            if end == -1:
+                break
+        return results
+
+    @staticmethod
+    def _extract_tool_id_and_name(
+        header: str | None,
+    ) -> tuple[str | None, str | None]:
+        """Parse ``(tool_id, tool_name)`` from a header
+        like ``"functions.get_weather:0"``."""
+        if header is None:
+            return None, None
+        match = re.match(r"(.+:\d+)", header)
+        if not match:
+            return None, None
+
+        tool_id = match.group(1).strip()
+        tool_name = tool_id.split(":")[0].split(".")[-1]
+        return tool_id, tool_name
+
+    def _split_tool_call(self, tool_call: str) -> tuple[str | None, str | None]:
+        """Split a tool-call body into ``(header, arguments)`` at the argument marker.
+
+        Example::
+            'get_weather:0 <|tool_call_argument_begin|>{"c'
+            -> ("get_weather:0", '{"c')
+        """
+        arg_pos = tool_call.find(self.tool_call_arg_token)
+        if arg_pos == -1:
+            return None, None
+        header = tool_call[:arg_pos].strip()
+        tool_args = tool_call[arg_pos + len(self.tool_call_arg_token) :]
+        return header, tool_args
+
+    def _compute_args_diff(self, index: int, tool_args: str | None) -> str | None:
+        """Return new argument text not yet sent for tool `index`, or None."""
+        if tool_args is None:
+            return None
+        prev = self.streamed_args_for_tool[index]
+        if len(tool_args) <= len(prev):
+            return None
+        diff = tool_args[len(prev) :]
+        self.streamed_args_for_tool[index] = tool_args
+        self.prev_tool_call_arr[index]["arguments"] = tool_args
+        return diff
+
     def extract_tool_calls_streaming(
         self,
         previous_text: str,
@@ -208,394 +218,59 @@ def extract_tool_calls_streaming(
         delta_token_ids: Sequence[int],
         request: ChatCompletionRequest,
     ) -> DeltaMessage | None:
-        logger.debug("delta_text: %s", delta_text)
-        logger.debug("delta_token_ids: %s", delta_token_ids)
-
-        # Flag to defer section exit until after tool parsing completes
-        deferred_section_exit = False
-
-        # Add delta to buffer for split marker detection
-        self.token_buffer += delta_text
-
-        # Enforce buffer size limit to prevent memory issues
-        if len(self.token_buffer) > self.buffer_max_size:
-            if not self._buffer_overflow_logged:
-                logger.warning(
-                    "Token buffer exceeded max size (%d bytes), flushing excess. "
-                    "This may indicate very long markers or unusual tokenization.",
-                    self.buffer_max_size,
-                )
-                self._buffer_overflow_logged = True
-            # Keep only the most recent content that might contain partial markers
-            self.token_buffer = self.token_buffer[-self.buffer_max_size // 2 :]
-
-        # Check buffer for section markers (handles split tokens)
-        buffered_text, found_section_begin, found_section_end = (
-            self._check_and_strip_markers(self.token_buffer)
-        )
-
-        # Track section state transitions
-        if found_section_begin and not self.in_tool_section:
-            logger.debug("Entering tool section")
-            self.in_tool_section = True
-            self.token_buffer = buffered_text  # Use cleaned buffer
-            self.section_char_count = 0  # Reset counter for new section
-
-        if found_section_end and self.in_tool_section:
-            logger.debug("Detected section end marker")
-            # CRITICAL: Don't exit early if tool_call_end is in this chunk.
-            # Tool parser must emit final arguments/close first to avoid dropping
-            # the final tool update and leaking tokens into reasoning channel.
-            has_tool_end = self.tool_call_end_token_id in delta_token_ids
-            if has_tool_end:
-                # Defer exit until after tool parsing completes
-                deferred_section_exit = True
-                logger.debug("Deferring section exit: tool_call_end in same chunk")
-                self.token_buffer = buffered_text
-            else:
-                # No tool call ending, safe to exit immediately
-                logger.debug("Exiting tool section")
-                self._reset_section_state()
-                # Extract any content AFTER the section end marker in delta_text
-                # (don't use buffered_text as it contains tool call data)
-                post_section_content = ""
-                for variant in self.tool_calls_end_token_variants:
-                    if variant in delta_text:
-                        parts = delta_text.split(variant, 1)
-                        if len(parts) > 1:
-                            post_section_content = parts[1]
-                        break
-                if post_section_content.strip():
-                    return DeltaMessage(content=post_section_content)
-                return DeltaMessage(content="")
-        else:
-            self.token_buffer = buffered_text
-
-        # Check if any variant of section start token is in current_token_ids
-        has_section_token = any(
-            tid in current_token_ids for tid in self.tool_calls_start_token_ids
-        )
-
-        # Early return: if no section token detected yet, return as reasoning content
-        if not has_section_token and not self.in_tool_section:
-            logger.debug("No tool call tokens found!")
-            # Don't clear buffer - it needs to accumulate partial markers across deltas
-            # Buffer overflow is already protected by lines 215-224
-            return DeltaMessage(content=delta_text)
-
-        # Strip section markers from delta_text for subsequent processing
-        # NOTE: This preprocessing happens BEFORE the regex-based tool call
-        # parsing (from PR #24847) to ensure markers are removed cleanly
-        # before pattern matching. No double-stripping occurs because
-        # section markers and tool call markers are distinct.
-        delta_text, _, _ = self._check_and_strip_markers(delta_text)
-
-        # Error recovery: If in tool section for too long, force exit
-        if self.in_tool_section:
-            self.section_char_count += len(delta_text)
-            if self.section_char_count > self.max_section_chars:
-                logger.warning(
-                    "Tool section exceeded max length (%d chars), forcing exit. "
-                    "This may indicate malformed model output.",
-                    self.max_section_chars,
-                )
-                self._reset_section_state()
-                # Deferred exit already handled by forced exit above
-                # Return remaining content as reasoning (or empty delta if no content)
-                return DeltaMessage(content=delta_text if delta_text.strip() else "")
-
         try:
-            # figure out where we are in the parsing by counting tool call
-            # start & end tags
-            prev_tool_start_count = previous_token_ids.count(
-                self.tool_call_start_token_id
-            )
-            prev_tool_end_count = previous_token_ids.count(self.tool_call_end_token_id)
-            cur_tool_start_count = current_token_ids.count(
-                self.tool_call_start_token_id
-            )
-            cur_tool_end_count = current_token_ids.count(self.tool_call_end_token_id)
-            tool_call_portion = None
-            text_portion = None
-
-            # case: if we're generating text, OR rounding out a tool call
-            if (
-                cur_tool_start_count == cur_tool_end_count
-                and prev_tool_end_count == cur_tool_end_count
-                and self.tool_call_end_token not in delta_text
-            ):
-                # Suppress content between section begin and first tool begin
-                # (header noise). Don't suppress content between tools to avoid
-                # breaking potential delimiter characters.
-                if self.in_tool_section and cur_tool_start_count == 0:
-                    logger.debug(
-                        "In tool section before first tool, suppressing: %s",
-                        delta_text,
-                    )
-                    # Return empty delta to maintain iterator contract
-                    return DeltaMessage(content="")
-                logger.debug("Generating text content! skipping tool parsing.")
-                return DeltaMessage(content=delta_text)
-
-            if self.tool_call_end_token in delta_text:
-                logger.debug("tool_call_end_token in delta_text")
-                full_text = current_text + delta_text
-                tool_call_portion = (
-                    full_text.split(self.tool_call_start_token)[-1]
-                    .split(self.tool_call_end_token)[0]
-                    .rstrip()
-                )
-                delta_text = delta_text.split(self.tool_call_end_token)[0].rstrip()
-                text_portion = delta_text.split(self.tool_call_end_token)[-1].lstrip()
-
-            # case -- we're starting a new tool call
-            if (
-                cur_tool_start_count > cur_tool_end_count
-                and cur_tool_start_count > prev_tool_start_count
-            ):
-                if len(delta_token_ids) > 1:
-                    tool_call_portion = current_text.split(self.tool_call_start_token)[
-                        -1
-                    ]
-                else:
-                    tool_call_portion = None
-                    delta = None
-
-                text_portion = None
-
-                # set cursors and state appropriately
-                self.current_tool_id += 1
-                self.current_tool_name_sent = False
-                self.streamed_args_for_tool.append("")
-                logger.debug("Starting on a new tool %s", self.current_tool_id)
-
-            # case -- we're updating an existing tool call
-            elif (
-                cur_tool_start_count > cur_tool_end_count
-                and cur_tool_start_count == prev_tool_start_count
-            ):
-                # get the portion of the text that's the tool call
-                tool_call_portion = current_text.split(self.tool_call_start_token)[-1]
-                text_portion = None
-
-            # case -- the current tool call is being closed.
-            elif (
-                cur_tool_start_count == cur_tool_end_count
-                and cur_tool_end_count >= prev_tool_end_count
-            ):
-                if self.prev_tool_call_arr is None or len(self.prev_tool_call_arr) == 0:
-                    logger.debug("attempting to close tool call, but no tool call")
-                    # Handle deferred section exit before returning
-                    if deferred_section_exit and self.in_tool_section:
-                        self._reset_section_state()
-                    return None
-                diff = self.prev_tool_call_arr[self.current_tool_id].get("arguments")
-                if diff:
-                    diff = (
-                        diff.encode("utf-8").decode("unicode_escape")
-                        if diff is str
-                        else diff
-                    )
-                    if '"}' not in delta_text:
-                        # Handle deferred section exit before returning
-                        if deferred_section_exit and self.in_tool_section:
-                            self._reset_section_state()
-                        return None
-                    end_loc = delta_text.rindex('"}')
-                    diff = delta_text[:end_loc] + '"}'
-                    logger.debug(
-                        "Finishing tool and found diff that had not "
-                        "been streamed yet: %s",
-                        diff,
-                    )
-                    self.streamed_args_for_tool[self.current_tool_id] += diff
-                    # Handle deferred section exit before returning
-                    if deferred_section_exit and self.in_tool_section:
-                        logger.debug("Completing deferred section exit")
-                        self._reset_section_state()
-                    return DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_id,
-                                function=DeltaFunctionCall(arguments=diff).model_dump(
-                                    exclude_none=True
-                                ),
-                            )
-                        ]
-                    )
-
-            # case -- otherwise we're just generating text
-            else:
-                # Check if we're in tool section - if so, suppress
-                if self.in_tool_section:
-                    logger.debug("In tool section, suppressing text generation")
-                    # Handle deferred section exit before returning
-                    if deferred_section_exit:
-                        self._reset_section_state()
-                    return DeltaMessage(content="")
-                text = delta_text.replace(self.tool_call_start_token, "")
-                text = text.replace(self.tool_call_end_token, "")
-                delta = DeltaMessage(tool_calls=[], content=text)
-                # Handle deferred section exit before returning
-                if deferred_section_exit and self.in_tool_section:
-                    self._reset_section_state()
-                return delta
-
-            current_tool_call = dict()
-            if tool_call_portion:
-                current_tool_call_matches = self.stream_tool_call_portion_regex.match(
-                    tool_call_portion
-                )
-                if current_tool_call_matches:
-                    tool_id, tool_args = current_tool_call_matches.groups()
-                    tool_name = tool_id.split(":")[0].split(".")[-1]
-                    current_tool_call["id"] = tool_id.strip()
-                    current_tool_call["name"] = tool_name
-                    current_tool_call["arguments"] = tool_args
-                else:
-                    current_tool_call_name_matches = (
-                        self.stream_tool_call_name_regex.match(tool_call_portion)
-                    )
-                    if current_tool_call_name_matches:
-                        (tool_id_str,) = current_tool_call_name_matches.groups()
-                        tool_name = tool_id_str.split(":")[0].split(".")[-1]
-                        current_tool_call["id"] = tool_id_str.strip()
-                        current_tool_call["name"] = tool_name
-                        current_tool_call["arguments"] = ""
-                    else:
-                        logger.debug("Not enough token")
-                        return None
-
-            # case - we haven't sent the tool name yet. If it's available, send
-            #   it. otherwise, wait until it's available.
-            if not self.current_tool_name_sent:
-                if current_tool_call is None:
-                    return None
-                function_name: str | None = current_tool_call.get("name")
-                tool_id = current_tool_call.get("id")
-                if function_name:
-                    self.current_tool_name_sent = True
-                    return DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_id,
-                                type="function",
-                                id=tool_id,
-                                function=DeltaFunctionCall(
-                                    name=function_name
-                                ).model_dump(exclude_none=True),
-                            )
-                        ]
+            # Extract any content before tool calls.
+            content = self._extract_content(current_text)
+            tool_calls = self._extract_tool_calls(current_text)
+            tool_call_deltas: list[DeltaToolCall] = []
+
+            for i, tool_call in enumerate(tool_calls):
+                # First time seeing tool call at index i.
+                if i >= len(self.prev_tool_call_arr):
+                    # Initialize streaming state.
+                    self.prev_tool_call_arr.append({})
+                    self.streamed_args_for_tool.append("")
+
+                header, tool_args = self._split_tool_call(tool_call)
+
+                # Stream back tool name.
+                if "name" not in self.prev_tool_call_arr[i]:
+                    tool_id, tool_name = self._extract_tool_id_and_name(header)
+                    if not tool_name:
+                        # Can't skip to tool i+1 if i isn't ready
+                        break
+                    self.prev_tool_call_arr[i]["name"] = tool_name
+                    self.prev_tool_call_arr[i]["id"] = tool_id
+                    tool_call_deltas.append(
+                        DeltaToolCall(
+                            index=i,
+                            type="function",
+                            id=tool_id,
+                            function=DeltaFunctionCall(name=tool_name).model_dump(
+                                exclude_none=True
+                            ),
+                        )
                     )
-                else:
-                    return None
-
-            # case -- otherwise, send the tool call delta
-
-            # if the tool call portion is None, send the delta as text
-            if tool_call_portion is None:
-                # if there's text but not tool calls, send that -
-                # otherwise None to skip chunk
-                # CRITICAL: Never return content if we're in a tool section
-                if self.in_tool_section:
-                    return None
-                delta = (
-                    DeltaMessage(content=delta_text)
-                    if text_portion is not None
-                    else None
-                )
-                return delta
-
-            # now, the nitty-gritty of tool calls
-            # now we have the portion to parse as tool call.
-
-            logger.debug(
-                "Trying to parse current tool call with ID %s", self.current_tool_id
-            )
-
-            # if we're starting a new tool call, push an empty object in as
-            #   a placeholder for the arguments
-            if len(self.prev_tool_call_arr) <= self.current_tool_id:
-                self.prev_tool_call_arr.append({})
-
-            # main logic for tool parsing here - compare prev. partially-parsed
-            #   JSON to the current partially-parsed JSON
-            prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
-                "arguments"
-            )
-            cur_arguments = current_tool_call.get("arguments")
-
-            logger.debug("diffing old arguments: %s", prev_arguments)
-            logger.debug("against new ones: %s", cur_arguments)
-
-            # case -- no arguments have been created yet. skip sending a delta.
-            if not cur_arguments and not prev_arguments:
-                logger.debug("Skipping text %s - no arguments", delta_text)
-                delta = None
-
-            # case -- prev arguments are defined, but non are now.
-            #   probably impossible, but not a fatal error - just keep going
-            elif not cur_arguments and prev_arguments:
-                logger.error(
-                    "should be impossible to have arguments reset "
-                    "mid-call. skipping streaming anything."
-                )
-                delta = None
 
-            # case -- we now have the first info about arguments available from
-            #   autocompleting the JSON
-            elif cur_arguments and not prev_arguments:
-                delta = DeltaMessage(
-                    tool_calls=[
+                # Stream back new tool args by diffing against what was sent.
+                args_diff = self._compute_args_diff(i, tool_args)
+                if args_diff:
+                    tool_call_deltas.append(
                         DeltaToolCall(
-                            index=self.current_tool_id,
-                            function=DeltaFunctionCall(
-                                arguments=cur_arguments
-                            ).model_dump(exclude_none=True),
+                            index=i,
+                            function=DeltaFunctionCall(arguments=args_diff).model_dump(
+                                exclude_none=True
+                            ),
                         )
-                    ]
-                )
-                self.streamed_args_for_tool[self.current_tool_id] = cur_arguments
-
-            # last case -- we have an update to existing arguments.
-            elif cur_arguments and prev_arguments:
-                if (
-                    isinstance(delta_text, str)
-                    and cur_arguments != prev_arguments
-                    and len(cur_arguments) > len(prev_arguments)
-                    and cur_arguments.startswith(prev_arguments)
-                ):
-                    delta_arguments = cur_arguments[len(prev_arguments) :]
-                    logger.debug("got diff %s", delta_text)
-
-                    delta = DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_id,
-                                function=DeltaFunctionCall(
-                                    arguments=delta_arguments
-                                ).model_dump(exclude_none=True),
-                            )
-                        ]
                     )
-                    self.streamed_args_for_tool[self.current_tool_id] = cur_arguments
-                else:
-                    delta = None
-
-            # handle saving the state for the current tool into
-            # the "prev" list for use in diffing for the next iteration
-            if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
-                self.prev_tool_call_arr[self.current_tool_id] = current_tool_call
-            else:
-                self.prev_tool_call_arr.append(current_tool_call)
-
-            # Handle deferred section exit after tool parsing completes
-            if deferred_section_exit and self.in_tool_section:
-                logger.debug("Completing deferred section exit")
-                self._reset_section_state()
 
-            return delta
+            if content or tool_call_deltas:
+                return DeltaMessage(
+                    content=content,
+                    tool_calls=tool_call_deltas,
+                )
+            return None
 
         except Exception:
             logger.exception("Error trying to handle streaming tool call.")
-            return None  # do not stream a delta. skip this token ID.
+            return None
diff --git a/vllm/tool_parsers/lfm2_tool_parser.py b/vllm/tool_parsers/lfm2_tool_parser.py
new file mode 100644
index 000000000000..ee92d060fbea
--- /dev/null
+++ b/vllm/tool_parsers/lfm2_tool_parser.py
@@ -0,0 +1,343 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import ast
+from collections.abc import Sequence
+
+import regex as re
+
+import vllm.envs as envs
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaMessage,
+    ExtractedToolCallInformation,
+)
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import (
+    Tool,
+    ToolParser,
+)
+from vllm.tool_parsers.utils import (
+    UnexpectedAstError,
+    compute_tool_delta,
+    handle_single_tool,
+    make_valid_python,
+)
+
+logger = init_logger(__name__)
+
+TOOL_CALL_START = "<|tool_call_start|>"
+TOOL_CALL_END = "<|tool_call_end|>"
+
+
+class Lfm2ToolParser(ToolParser):
+    """
+    Tool call parser for LiquidAI LFM2/LFM2.5 models that produce pythonic
+    tool calls wrapped in <|tool_call_start|> and <|tool_call_end|> tokens.
+
+    Example model output:
+        <|tool_call_start|>[get_weather(location="Paris")]<|tool_call_end|>
+        The weather in Paris is sunny.
+
+    Used when --enable-auto-tool-choice --tool-call-parser lfm2 are all set.
+    """
+
+    TOOL_CALL_REGEX = re.compile(r"\[.*\]$", re.DOTALL)
+
+    def __init__(
+        self,
+        tokenizer: TokenizerLike,
+        tools: list[Tool] | None = None,
+    ):
+        super().__init__(tokenizer, tools)
+
+        self.tool_call_start_token_id = self.vocab.get(TOOL_CALL_START)
+        self.tool_call_end_token_id = self.vocab.get(TOOL_CALL_END)
+
+        if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
+            raise RuntimeError(
+                "LFM2 tool parser could not locate "
+                "<|tool_call_start|>/<|tool_call_end|> tokens in the "
+                "tokenizer!"
+            )
+
+        # Trailing content already emitted to the client. Used by the
+        # streaming path to suppress LFM2's frequent echo of the tool
+        # call body after the first <|tool_call_end|> while still
+        # allowing legitimate post-call prose through.
+        self._trailing_emitted: str = ""
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # The <|tool_call_start|>/<|tool_call_end|> sentinels are
+            # registered as special tokens in the LFM2/LFM2.5 tokenizer.
+            # With the default ``skip_special_tokens=True`` they are
+            # stripped from the decoded text before reaching this parser,
+            # so the tool block becomes invisible. Force the engine to
+            # preserve them when tool calling is enabled.
+            request.skip_special_tokens = False
+        return request
+
+    # Rename for readability. This is NOT a tool id.
+    @property
+    def current_tool_index(self) -> int:
+        return self.current_tool_id
+
+    @current_tool_index.setter
+    def current_tool_index(self, value: int) -> None:
+        self.current_tool_id = value
+
+    @staticmethod
+    def _strip_echo(raw_after: str) -> str:
+        """Drop any orphan <|tool_call_end|> (and the preceding text) from
+        trailing content. LFM2 occasionally echoes the call body after the
+        first end token and caps it with a second end token; everything
+        through the last such orphan is model garbage, not user content."""
+        last_orphan = raw_after.rfind(TOOL_CALL_END)
+        if last_orphan != -1:
+            return raw_after[last_orphan + len(TOOL_CALL_END) :]
+        return raw_after
+
+    @classmethod
+    def _extract_tool_call_text(
+        cls, model_output: str
+    ) -> tuple[str | None, str | None]:
+        """Extract the pythonic call text and surrounding content.
+
+        Returns (tool_text, content) where tool_text is the text between
+        the sentinel tokens and content is everything outside them.
+        """
+        start_idx = model_output.find(TOOL_CALL_START)
+        if start_idx == -1:
+            return None, model_output
+
+        end_idx = model_output.find(TOOL_CALL_END, start_idx)
+        if end_idx == -1:
+            # Incomplete — treat entire text after start as tool call
+            tool_text = model_output[start_idx + len(TOOL_CALL_START) :]
+            content_before = model_output[:start_idx].strip()
+            content = content_before or None
+            return tool_text, content
+
+        tool_text = model_output[start_idx + len(TOOL_CALL_START) : end_idx]
+        content_before = model_output[:start_idx].strip()
+        content_after = cls._strip_echo(
+            model_output[end_idx + len(TOOL_CALL_END) :]
+        ).strip()
+
+        content_parts = []
+        if content_before:
+            content_parts.append(content_before)
+        if content_after:
+            content_parts.append(content_after)
+        content = "\n".join(content_parts) if content_parts else None
+
+        return tool_text, content
+
+    def extract_tool_calls(
+        self, model_output: str, request: ChatCompletionRequest
+    ) -> ExtractedToolCallInformation:
+        tool_text, content = self._extract_tool_call_text(model_output)
+
+        if tool_text is None:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+        tool_text = tool_text.strip()
+
+        is_tool_call_pattern = False
+        try:
+            is_tool_call_pattern = (
+                self.TOOL_CALL_REGEX.match(
+                    tool_text,
+                    timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS,
+                )
+                is not None
+            )
+        except TimeoutError:
+            logger.warning("Regex timeout occurred when matching tool call pattern.")
+
+        if not is_tool_call_pattern:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+        try:
+            module = ast.parse(tool_text)
+            parsed = getattr(module.body[0], "value", None)
+            if isinstance(parsed, ast.List) and all(
+                isinstance(e, ast.Call) for e in parsed.elts
+            ):
+                return ExtractedToolCallInformation(
+                    tools_called=True,
+                    tool_calls=[
+                        handle_single_tool(e)  # type: ignore
+                        for e in parsed.elts
+                    ],
+                    content=content,
+                )
+            else:
+                raise UnexpectedAstError("Tool output must be a list of function calls")
+        except Exception:
+            logger.exception("Error in extracting tool call from response.")
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        # If the tool call start token hasn't appeared yet, stream as content.
+        if TOOL_CALL_START not in current_text:
+            return DeltaMessage(content=delta_text)
+
+        # Compute leading content (before <|tool_call_start|>) that arrived
+        # in this delta and hasn't been streamed yet. Without this, when the
+        # prefix and the start token land in the same delta the prefix is
+        # silently dropped — token-by-token streaming masked the bug because
+        # the prefix tokens always arrived in earlier deltas.
+        leading_content = ""
+        if TOOL_CALL_START not in previous_text:
+            start_idx = current_text.find(TOOL_CALL_START)
+            # previous_text contained no start token, so it has already been
+            # streamed via the no-start-token branch above.
+            leading_content = current_text[len(previous_text) : start_idx]
+
+        has_end_in_current = TOOL_CALL_END in current_text
+        has_end_in_previous = TOOL_CALL_END in previous_text
+
+        # Compute trailing content (after <|tool_call_end|>) not yet
+        # streamed. LFM2 frequently echoes the tool call body again
+        # after the first end token, capped with a second end token.
+        # Suppress that echo:
+        #   - If a second <|tool_call_end|> has appeared, treat
+        #     everything through the last one as garbage.
+        #   - If the trailing starts with `[` or `<` (potential echo
+        #     body or another sentinel) and no second end token has
+        #     arrived yet, buffer it instead of emitting.
+        trailing_content = ""
+        if has_end_in_current:
+            end_idx = current_text.find(TOOL_CALL_END) + len(TOOL_CALL_END)
+            full_trailing = current_text[end_idx:]
+            stripped_trailing = self._strip_echo(full_trailing)
+            if stripped_trailing == full_trailing:
+                # No second end token yet — possibly mid-echo.
+                lstripped = full_trailing.lstrip()
+                if lstripped.startswith("[") or lstripped.startswith("<"):
+                    # Suspect echo; hold off until resolved.
+                    final_trailing = self._trailing_emitted
+                else:
+                    final_trailing = full_trailing
+            else:
+                final_trailing = stripped_trailing
+            if final_trailing.startswith(self._trailing_emitted):
+                trailing_content = final_trailing[len(self._trailing_emitted) :]
+            self._trailing_emitted = final_trailing
+
+        # If tools were already parsed in a prior delta, just stream any
+        # newly arrived trailing content.
+        if has_end_in_current and self.prev_tool_call_arr and has_end_in_previous:
+            if trailing_content:
+                return DeltaMessage(content=trailing_content)
+            return DeltaMessage(content="")
+
+        # Extract the pythonic text between start and end tokens.
+        tool_text = current_text.split(TOOL_CALL_START, 1)[1]
+        # Strip the end token if present (entire call arrived at once).
+        if TOOL_CALL_END in tool_text:
+            tool_text = tool_text.split(TOOL_CALL_END, 1)[0]
+
+        def _content_only_or_none() -> DeltaMessage | None:
+            """Return a content-only delta if any content arrived in this
+            chunk, otherwise None. Used on incremental-parse failure paths
+            so leading/trailing content is never silently dropped.
+            """
+            combined = leading_content + trailing_content
+            return DeltaMessage(content=combined) if combined else None
+
+        try:
+            valid_and_added_text = make_valid_python(tool_text)
+            if valid_and_added_text is None:
+                return _content_only_or_none()
+            valid_text, added_text = valid_and_added_text
+
+            module = ast.parse(valid_text)
+            parsed = getattr(module.body[0], "value", None)
+            if not isinstance(parsed, ast.List) or not all(
+                isinstance(e, ast.Call) for e in parsed.elts
+            ):
+                raise UnexpectedAstError("Tool output must be a list of function calls")
+            tool_calls = [
+                handle_single_tool(e)  # type: ignore
+                for e in parsed.elts
+            ]
+
+            tool_deltas = []
+            for index, new_call in enumerate(tool_calls):
+                if index < self.current_tool_index:
+                    continue
+
+                self.current_tool_index = index
+                if len(self.streamed_args_for_tool) == index:
+                    self.streamed_args_for_tool.append("")
+
+                new_call_complete = (
+                    index < len(tool_calls) - 1 or ")]" not in added_text
+                )
+                if new_call_complete:
+                    self.current_tool_index += 1
+
+                withheld_suffix = added_text[:-2] if not new_call_complete else ""
+                if not new_call_complete and added_text[-2] == ")":
+                    withheld_suffix = withheld_suffix + "}"
+                withheld_suffix = withheld_suffix.replace("'", '"')
+                delta = compute_tool_delta(
+                    self.streamed_args_for_tool[index],
+                    new_call,
+                    index,
+                    withheld_suffix,
+                )
+
+                if delta is not None:
+                    tool_deltas.append(delta)
+                    if (
+                        delta.function is not None
+                        and delta.function.arguments is not None
+                    ):
+                        self.streamed_args_for_tool[index] += delta.function.arguments
+
+            if tool_deltas and not self.prev_tool_call_arr:
+                self.prev_tool_call_arr = [{"arguments": {}}]
+
+            combined_content = leading_content + trailing_content
+
+            if tool_deltas or combined_content:
+                return DeltaMessage(
+                    content=combined_content if combined_content else None,
+                    tool_calls=tool_deltas,
+                )
+            elif not added_text and self.current_tool_id > 0:
+                return DeltaMessage(content="")
+            else:
+                return None
+        except Exception:
+            logger.exception("Error trying to handle streaming tool call.")
+            logger.debug(
+                "Skipping chunk as a result of tool streaming extraction error"
+            )
+            return _content_only_or_none()
diff --git a/vllm/tool_parsers/llama_tool_parser.py b/vllm/tool_parsers/llama_tool_parser.py
index be3d47acd97f..4a041041f096 100644
--- a/vllm/tool_parsers/llama_tool_parser.py
+++ b/vllm/tool_parsers/llama_tool_parser.py
@@ -45,6 +45,12 @@ class Llama3JsonToolParser(ToolParser):
     llama4_json are set.
     """
 
+    bot_token: str = "<|python_tag|>"
+    # Simple regex to find opening braces - we'll use JSON decoder for parsing
+    # This handles arbitrary nesting depth correctly
+    tool_call_start_regex: re.Pattern = re.compile(r"\{")
+    json_decoder: json.JSONDecoder = json.JSONDecoder()
+
     def __init__(
         self,
         tokenizer: PreTrainedTokenizerBase,
@@ -60,14 +66,12 @@ def __init__(
         self.streamed_args_for_tool: list[
             str
         ] = []  # map what has been streamed for each tool so far to a list
-        self.bot_token = "<|python_tag|>"
-        self.bot_token_id = tokenizer.encode(self.bot_token, add_special_tokens=False)[
-            0
-        ]
-        # Simple regex to find opening braces - we'll use JSON decoder for parsing
-        # This handles arbitrary nesting depth correctly
-        self.tool_call_start_regex = re.compile(r"\{")
-        self.json_decoder = json.JSONDecoder()
+        self.bot_token_id = self.vocab.get(self.bot_token)
+        if self.bot_token_id is None:
+            raise RuntimeError(
+                "Llama3JsonToolParser could not locate the bot token "
+                f"'{self.bot_token}' in the tokenizer."
+            )
 
     def extract_tool_calls(
         self, model_output: str, request: ChatCompletionRequest
diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py
index 6c75e009947a..5a3aae81262c 100644
--- a/vllm/tool_parsers/minimax_m2_tool_parser.py
+++ b/vllm/tool_parsers/minimax_m2_tool_parser.py
@@ -4,7 +4,6 @@
 import json
 import uuid
 from collections.abc import Sequence
-from typing import Any
 
 import regex as re
 
@@ -25,6 +24,11 @@
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import (
+    coerce_to_schema_type,
+    extract_types_from_schema,
+    find_tool_properties,
+)
 
 logger = init_logger(__name__)
 
@@ -86,162 +90,6 @@ def _extract_name(self, name_str: str) -> str:
             return name_str[1:-1]
         return name_str
 
-    def _extract_types_from_schema(self, schema: Any) -> list[str]:
-        """
-        Extract all possible types from a JSON schema definition.
-        Handles anyOf, oneOf, allOf, type arrays, and enum fields.
-
-        Args:
-            schema: The JSON schema definition for a parameter
-
-        Returns:
-            List of type strings (e.g., ["string", "integer", "null"])
-        """
-        if schema is None:
-            return ["string"]
-
-        if not isinstance(schema, dict):
-            return ["string"]
-
-        types: set[str] = set()
-
-        # Handle direct "type" field
-        if "type" in schema:
-            type_value = schema["type"]
-            if isinstance(type_value, str):
-                types.add(type_value)
-            elif isinstance(type_value, list):
-                for t in type_value:
-                    if isinstance(t, str):
-                        types.add(t)
-
-        # Handle enum - infer types from enum values
-        if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
-            for value in schema["enum"]:
-                if value is None:
-                    types.add("null")
-                elif isinstance(value, bool):
-                    types.add("boolean")
-                elif isinstance(value, int):
-                    types.add("integer")
-                elif isinstance(value, float):
-                    types.add("number")
-                elif isinstance(value, str):
-                    types.add("string")
-                elif isinstance(value, list):
-                    types.add("array")
-                elif isinstance(value, dict):
-                    types.add("object")
-
-        # Handle anyOf, oneOf, allOf - recursively extract types
-        for choice_field in ("anyOf", "oneOf", "allOf"):
-            if choice_field in schema and isinstance(schema[choice_field], list):
-                for choice in schema[choice_field]:
-                    extracted = self._extract_types_from_schema(choice)
-                    types.update(extracted)
-
-        # If no types found, default to string
-        if not types:
-            return ["string"]
-
-        return list(types)
-
-    def _convert_param_value_with_types(
-        self, value: str, param_types: list[str]
-    ) -> Any:
-        """
-        Convert parameter value to the correct type based on a list of possible types.
-        Tries each type in order until one succeeds.
-
-        Args:
-            value: The string value to convert
-            param_types: List of possible type strings
-
-        Returns:
-            The converted value
-        """
-        # Check if the VALUE itself indicates null (not just if null is allowed)
-        if value.lower() in ("null", "none", "nil"):
-            return None
-
-        # Normalize types
-        normalized_types = [t.lower() for t in param_types]
-
-        # Try each type in order of preference (most specific first, string as fallback)
-        # Priority: integer > number > boolean > object > array > string
-        type_priority = [
-            "integer",
-            "int",
-            "number",
-            "float",
-            "boolean",
-            "bool",
-            "object",
-            "array",
-            "string",
-            "str",
-            "text",
-        ]
-
-        for param_type in type_priority:
-            if param_type not in normalized_types:
-                continue
-
-            if param_type in ["string", "str", "text"]:
-                return value
-            elif param_type in ["integer", "int"]:
-                try:
-                    return int(value)
-                except (ValueError, TypeError):
-                    continue
-            elif param_type in ["number", "float"]:
-                try:
-                    val = float(value)
-                    return val if val != int(val) else int(val)
-                except (ValueError, TypeError):
-                    continue
-            elif param_type in ["boolean", "bool"]:
-                lower_val = value.lower().strip()
-                if lower_val in ["true", "1", "yes", "on"]:
-                    return True
-                elif lower_val in ["false", "0", "no", "off"]:
-                    return False
-                continue
-            elif param_type in ["object", "array"]:
-                try:
-                    return json.loads(value)
-                except json.JSONDecodeError:
-                    continue
-
-        # Fallback: try JSON parse, then return as string
-        try:
-            return json.loads(value)
-        except json.JSONDecodeError:
-            return value
-
-    def _get_param_types_from_config(
-        self, param_name: str, param_config: dict
-    ) -> list[str]:
-        """
-        Get parameter types from parameter configuration.
-        Handles anyOf, oneOf, allOf, and direct type definitions.
-
-        Args:
-            param_name: The name of the parameter
-            param_config: The properties dict from the tool schema
-
-        Returns:
-            List of type strings
-        """
-        if param_name not in param_config:
-            return ["string"]
-
-        param_schema = param_config[param_name]
-        if not isinstance(param_schema, dict):
-            return ["string"]
-
-        return self._extract_types_from_schema(param_schema)
-
     def _parse_single_invoke(
         self, invoke_str: str, tools: list | None
     ) -> ToolCall | None:
@@ -252,20 +100,7 @@ def _parse_single_invoke(
             return None
 
         function_name = self._extract_name(name_match.group(1))
-
-        # Get parameter configuration
-        param_config = {}
-        if tools:
-            for tool in tools:
-                if (
-                    hasattr(tool, "function")
-                    and tool.function.name == function_name
-                    and hasattr(tool.function, "parameters")
-                ):
-                    params = tool.function.parameters
-                    if isinstance(params, dict) and "properties" in params:
-                        param_config = params["properties"]
-                    break
+        tool_properties = find_tool_properties(tools, function_name)
 
         # Extract parameters
         param_dict = {}
@@ -274,14 +109,10 @@ def _parse_single_invoke(
             if param_match:
                 param_name = self._extract_name(param_match.group(1))
                 param_value = param_match.group(2).strip()
-
-                # Get parameter types (supports anyOf/oneOf/allOf)
-                param_type = self._get_param_types_from_config(param_name, param_config)
-
-                # Convert value
-                param_dict[param_name] = self._convert_param_value_with_types(
-                    param_value, param_type
+                param_types = extract_types_from_schema(
+                    tool_properties.get(param_name, {})
                 )
+                param_dict[param_name] = coerce_to_schema_type(param_value, param_types)
 
         return ToolCall(
             type="function",
@@ -308,7 +139,7 @@ def _extract_delta_tool_calls(
             invoke_str = complete_invokes[self.current_tool_index]
             tool_call = self._parse_single_invoke(
                 invoke_str,
-                request.tools if request else None,
+                self.tools,
             )
             if not tool_call:
                 self.current_tool_index += 1
@@ -358,9 +189,7 @@ def extract_tool_calls(
             for tool_call_match in self.tool_call_complete_regex.findall(model_output):
                 # Find all invokes within this tool_call
                 for invoke_match in self.invoke_complete_regex.findall(tool_call_match):
-                    tool_call = self._parse_single_invoke(
-                        invoke_match, request.tools if request else None
-                    )
+                    tool_call = self._parse_single_invoke(invoke_match, self.tools)
                     if tool_call:
                         tool_calls.append(tool_call)
 
diff --git a/vllm/tool_parsers/mistral_tool_parser.py b/vllm/tool_parsers/mistral_tool_parser.py
index 153c6ed32c47..0a057a3af468 100644
--- a/vllm/tool_parsers/mistral_tool_parser.py
+++ b/vllm/tool_parsers/mistral_tool_parser.py
@@ -1,15 +1,30 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 import json
 from collections.abc import Sequence
+from dataclasses import dataclass
 from enum import Enum, auto
 from random import choices
 from string import ascii_letters, digits
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 import ijson
 import regex as re
+from mistral_common.protocol.instruct.tool_calls import (
+    NamedToolChoice as MistralNamedToolChoice,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    Tool as MistralTool,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    ToolChoice as MistralToolChoice,
+)
+from mistral_common.protocol.instruct.tool_calls import (
+    ToolChoiceEnum as MistralToolChoiceEnum,
+)
 from pydantic import Field
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -23,18 +38,27 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
+from vllm.reasoning.mistral_reasoning_parser import MistralReasoningParser
+from vllm.sampling_params import StructuredOutputsParams
 from vllm.tokenizers import TokenizerLike
+from vllm.tokenizers.mistral import MistralTokenizer
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
 from vllm.utils.mistral import is_mistral_tokenizer
 
+if TYPE_CHECKING:
+    from vllm.reasoning import ReasoningParser
+
 logger = init_logger(__name__)
 
 ALPHANUMERIC = ascii_letters + digits
 
+_DEFAULT_JSON_SCHEMA = {"anyOf": [{"type": "object"}, {"type": "array"}]}
+
 
 class StreamingState(Enum):
     """Enum for tracking the current streaming parsing state."""
@@ -67,18 +91,43 @@ def is_valid_id(id: str) -> bool:
 
 
 def _is_pre_v11_tokeniser(model_tokenizer: TokenizerLike) -> bool:
-    return not (is_mistral_tokenizer(model_tokenizer) and model_tokenizer.version >= 11)
+    if is_mistral_tokenizer(model_tokenizer):
+        return model_tokenizer.version < 11
+    # For HF tokenizers, check if [ARGS] token exists in vocab
+    # which indicates a v11+ equivalent tokenizer
+    vocab: dict[str, int] = getattr(model_tokenizer, "get_vocab", lambda: {})()
+    return "[ARGS]" not in vocab
+
+
+@dataclass
+class MistralStreamingResult:
+    r"""Encapsulates the mutable state returned from
+    `MistralToolParser.extract_maybe_reasoning_and_tool_streaming`.
+    """
+
+    delta_message: DeltaMessage | None
+    reasoning_ended: bool
+    tools_called: bool
+    current_text: str
+    current_token_ids: list[int]
 
 
 class MistralToolParser(ToolParser):
-    """
-    Tool call parser for Mistral 7B Instruct v0.3, intended for use with
-    - [`mistral_common`](https://github.com/mistralai/mistral-common/)
-    - the examples/tool_chat_template_mistral.jinja template.
+    r"""Tool call parser for Mistral models, intended for use with either:
+
+    - `mistral_common <https://github.com/mistralai/mistral-common/>`_
+      (recommended)
+    - the `examples/tool_chat_template_mistral.jinja` template.
 
-    Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
+    Used when `--enable-auto-tool-choice --tool-call-parser mistral` are all
+    set.
     """
 
+    IS_MISTRAL_TOOL_PARSER = True  # used by vllm.utils.mistral
+
+    # Used to generate correct grammar in `adjust_request`
+    model_can_reason: bool = False
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
@@ -95,7 +144,8 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.current_tool_name: str | None = None
         self.current_tool_mistral_id: str | None = None
         self.starting_new_tool = False
-        if _is_pre_v11_tokeniser(self.model_tokenizer):
+        self._is_pre_v11 = _is_pre_v11_tokeniser(self.model_tokenizer)
+        if self._is_pre_v11:
             self.parse_coro = ijson.parse_coro(
                 self.update_stream_state_pre_v11_tokenizer()
             )
@@ -103,7 +153,6 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.bot_token = "[TOOL_CALLS]"
         self.bot_token_id = self.vocab.get(self.bot_token)
         self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
-        self._is_pre_v11 = _is_pre_v11_tokeniser(self.model_tokenizer)
 
         if self.bot_token_id is None:
             raise RuntimeError(
@@ -111,21 +160,269 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
                 "the tokenizer!"
             )
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
-        request = super().adjust_request(request)
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        so_non_supported_attributes = [
+            "regex",
+            "choice",
+            "grammar",
+            # whitespace_pattern is not a constraint type but an option;
+            # Mistral grammar factory does not support it.
+            "whitespace_pattern",
+            "structural_tag",
+        ]
+        any_so_non_supported_active = request.structured_outputs is not None and any(
+            getattr(request.structured_outputs, attribute) is not None
+            for attribute in so_non_supported_attributes
+        )
+        response_format_non_supported_active = (
+            isinstance(request, ResponsesRequest)
+            or request.response_format is not None
+            and request.response_format.type == "structural_tag"
+        )
+
         if (
             not is_mistral_tokenizer(self.model_tokenizer)
-            and request.tools
-            and request.tool_choice != "none"
+            or isinstance(request, ResponsesRequest)
+            or not self.model_tokenizer.supports_grammar
+            or any_so_non_supported_active
+            or response_format_non_supported_active
         ):
-            # Do not skip special tokens when using chat template
-            # with Mistral parser as TOOL_CALL token is needed
-            # for tool detection.
-            # Note: we don't want skip_special_tokens=False
-            # with MistralTokenizer as it is incompatible
-            request.skip_special_tokens = False
+            request = super().adjust_request(request)
+            if request.tools and request.tool_choice != "none":
+                # Do not skip special tokens when using chat template
+                # with Mistral parser as TOOL_CALL token is needed
+                # for tool detection.
+                # Note: we don't want skip_special_tokens=False
+                # with MistralTokenizer as it is incompatible
+                request.skip_special_tokens = False
+            return request
+
+        json_schema: dict[str, Any] | None = None
+        if request.structured_outputs is not None:
+            if request.structured_outputs.json_object is not None:
+                json_schema = _DEFAULT_JSON_SCHEMA
+            elif request.structured_outputs.json is not None:
+                if isinstance(request.structured_outputs.json, str):
+                    json_schema = json.loads(request.structured_outputs.json)
+                else:
+                    json_schema = request.structured_outputs.json
+            else:
+                raise ValueError(
+                    "Unsupported request.structured_outputs for MistralToolParser. "
+                    "Only `json` and `json_object` are supported."
+                )
+        elif (
+            request.response_format is not None
+            and request.response_format.type != "text"
+        ):
+            if request.response_format.type == "json_object":
+                json_schema = _DEFAULT_JSON_SCHEMA
+            elif request.response_format.type == "json_schema":
+                if request.response_format.json_schema is not None:
+                    json_schema = request.response_format.json_schema.json_schema
+                else:
+                    json_schema = _DEFAULT_JSON_SCHEMA
+            else:
+                raise ValueError(
+                    "MistralToolParser only accepts `text`, `json_object` or "
+                    f"`json_schema`, got {request.response_format=}"
+                )
+            # Structured Outputs will be defined.
+            request.response_format = None
+
+        grammar_factory = self.model_tokenizer.grammar_factory
+
+        # TODO: Once unified parser, improve this.
+        # The issue is figuring out when a model is a reasoning one or not.
+        template = grammar_factory.select_jinja_template(
+            reasoning=self.model_can_reason
+        )
+
+        mistral_tools = (
+            [MistralTool.from_openai(tool.model_dump()) for tool in request.tools]
+            if request.tools is not None
+            else None
+        )
+
+        tool_choice: MistralToolChoice
+        match request.tool_choice:
+            case "none" | "auto" | "required":
+                tool_choice = MistralToolChoiceEnum(request.tool_choice)
+            case None:
+                tool_choice = MistralToolChoiceEnum.auto
+            # _ == Named tool choice
+            case _:
+                tool_choice = MistralNamedToolChoice.model_validate(
+                    {
+                        "type": "function",
+                        "function": {"name": request.tool_choice.function.name},
+                    }
+                )
+
+        # Rendering grammar is cached in mistral-common given tools, template and mode.
+        match tool_choice, json_schema is not None:
+            case MistralToolChoiceEnum.none, True:
+                lark_grammar = grammar_factory.get_lark_for_json_schema(
+                    template=template, json_schema=json_schema
+                )
+            case _, _:
+                lark_grammar = grammar_factory.get_lark_from_jinja(
+                    template=template,
+                    mode=tool_choice,
+                    tools=mistral_tools,
+                    json_schema=json_schema,
+                    parallel_tool_calls=request.parallel_tool_calls,
+                    json_only=False,
+                )
+
+        request.structured_outputs = StructuredOutputsParams(grammar=lark_grammar)
+        request._grammar_from_tool_parser = True
         return request
 
+    def extract_maybe_reasoning_and_tool_streaming(
+        self,
+        *,
+        reasoning_parser: ReasoningParser | None,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: list[int],
+        current_token_ids: list[int],
+        output_token_ids: Sequence[int],
+        reasoning_ended: bool,
+        prompt_is_reasoning_end: bool | None,
+        request: ChatCompletionRequest,
+    ) -> MistralStreamingResult:
+        r"""Streaming extraction with reasoning followed by tool-call parsing.
+
+        This method encapsulates the combined reasoning extraction and
+        tool-call streaming logic so that the serving layer only needs a
+        thin routing branch.
+
+        The flow is:
+
+        1. If a *reasoning_parser* is present and reasoning has **not** ended,
+           extract reasoning tokens.  Pre-v15 models may have pre-filled
+           `[THINK]...[/THINK]` in system prompts, so we skip the
+           prompt-level reasoning-end check for those.
+        2. Once reasoning ends (or if there is no reasoning parser), delegate
+           to `extract_tool_calls_streaming` and track whether tools were
+           called.
+
+        Args:
+            reasoning_parser: Optional reasoning parser instance.
+            previous_text: Accumulated text from prior chunks.
+            current_text: Full accumulated text including current chunk.
+            delta_text: New text in this chunk.
+            previous_token_ids: Token ids from prior chunks.
+            current_token_ids: Full token ids including current chunk.
+            output_token_ids: Raw output token ids from the engine.
+            reasoning_ended: Whether reasoning has already ended.
+            prompt_is_reasoning_end: Whether the prompt itself ends reasoning.
+            request: The originating chat completion request.
+        """
+        delta_message: DeltaMessage | None = None
+        tools_called = False
+        reasoning_ended_at_entry = reasoning_ended
+
+        # For MistralReasoningParser, only enter the reasoning block when
+        # the model has actually emitted a [THINK] token.  Other reasoning
+        # parsers always expect thinking to be present.
+        expect_thinking = (
+            not isinstance(reasoning_parser, MistralReasoningParser)
+            or reasoning_parser.start_token_id in current_token_ids
+        )
+        if reasoning_parser is not None and not reasoning_ended and expect_thinking:
+            # Pre-v15 models may have pre-filled [THINK]...[/THINK] in
+            # system prompts, so skip the prompt-level reasoning-end
+            # check and wait for the output's own end-of-think.
+            is_pre_v15 = (
+                isinstance(self.model_tokenizer, MistralTokenizer)
+                and self.model_tokenizer.version < 15
+            )
+
+            if not is_pre_v15 and prompt_is_reasoning_end:
+                reasoning_ended = True
+                current_token_ids = list(output_token_ids)
+            else:
+                delta_message = reasoning_parser.extract_reasoning_streaming(
+                    previous_text,
+                    current_text,
+                    delta_text,
+                    previous_token_ids,
+                    current_token_ids,
+                    output_token_ids,
+                )
+                if reasoning_parser.is_reasoning_end_streaming(
+                    current_token_ids, output_token_ids
+                ):
+                    reasoning_ended = True
+                    current_token_ids = reasoning_parser.extract_content_ids(
+                        list(output_token_ids)
+                    )
+                    if delta_message and delta_message.content:
+                        current_text = delta_message.content
+                        delta_message.content = None
+                    else:
+                        current_text = ""
+
+            if not reasoning_ended:
+                return MistralStreamingResult(
+                    delta_message=delta_message,
+                    reasoning_ended=False,
+                    tools_called=False,
+                    current_text=current_text,
+                    current_token_ids=current_token_ids,
+                )
+
+        delta_token_ids = list(output_token_ids)
+
+        # On the iteration where reasoning just ended, reset the text/token
+        # state so the tool parser sees a clean history instead of the
+        # accumulated reasoning text.
+        if not reasoning_ended_at_entry and reasoning_ended:
+            previous_text = ""
+            previous_token_ids = []
+            delta_text = current_text
+            delta_token_ids = current_token_ids
+
+        delta_message = self.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=delta_text,
+            previous_token_ids=previous_token_ids,
+            current_token_ids=current_token_ids,
+            delta_token_ids=delta_token_ids,
+            request=request,
+        )
+        if delta_message and delta_message.tool_calls:
+            tools_called = True
+
+        return MistralStreamingResult(
+            delta_message=delta_message,
+            reasoning_ended=reasoning_ended,
+            tools_called=tools_called,
+            current_text=current_text,
+            current_token_ids=current_token_ids,
+        )
+
+    @staticmethod
+    def build_non_streaming_tool_calls(
+        tool_calls: list[FunctionCall] | None,
+    ) -> list[ToolCall]:
+        r"""Build `MistralToolCall` items for non-streaming responses."""
+        if not tool_calls:
+            return []
+
+        return [
+            MistralToolCall(id=tc.id, function=tc)
+            if tc.id
+            else MistralToolCall(function=tc)
+            for tc in tool_calls
+        ]
+
     def extract_tool_calls(
         self,
         model_output: str,
@@ -173,6 +470,8 @@ def extract_tool_calls(
                     raw_tool_call[end_name:],
                 )
 
+                # HF tokenizers may include [ARGS] in the text
+                tool_name = tool_name.replace("[ARGS]", "")
                 tool_calls.append({"name": tool_name, "arguments": args})
 
         # < v11: content[BOT] [{tool_call1},{tool_call2}]
@@ -184,21 +483,28 @@ def extract_tool_calls(
                 )
             stringified_tool_calls = raw_tool_calls[0].strip()
             try:
-                tool_calls = json.loads(stringified_tool_calls)
+                # Use raw_decode to parse the first valid JSON value,
+                # ignoring trailing tokens the model may emit after
+                # the tool call array.
+                tool_calls, _ = json.JSONDecoder().raw_decode(stringified_tool_calls)
             except json.JSONDecodeError:
-                # use a regex to find the part corresponding to the tool call.
-                # NOTE: This use case should not happen if the model is trained
-                # correctly. It's an easy possible fix so it's included, but
-                # can be brittle for very complex / highly nested tool calls
                 try:
                     raw_tool_call = self.tool_call_regex.findall(
                         stringified_tool_calls
                     )[0]
                     tool_calls = json.loads(raw_tool_call)
+                    tool_calls = [
+                        {
+                            "name": tool_call["name"],
+                            "arguments": json.dumps(
+                                tool_call.get("arguments", {}),
+                                ensure_ascii=False,
+                            ),
+                        }
+                        for tool_call in tool_calls
+                    ]
                 except (IndexError, json.JSONDecodeError):
-                    logger.exception("Error in extracting tool call from response: {e}")
-                    # If raw decoding and decoding post regex rule fails, then just
-                    # return content.
+                    logger.exception("Error in extracting tool call from response.")
                     return ExtractedToolCallInformation(
                         tools_called=False,
                         tool_calls=[],
@@ -209,7 +515,8 @@ def extract_tool_calls(
                     {
                         "name": tool_call["name"],
                         "arguments": json.dumps(
-                            tool_call["arguments"], ensure_ascii=False
+                            tool_call.get("arguments", {}),
+                            ensure_ascii=False,
                         ),
                     }
                     for tool_call in tool_calls
@@ -220,7 +527,7 @@ def extract_tool_calls(
                 type="function",
                 function=FunctionCall(
                     name=tool_call["name"],
-                    arguments=tool_call["arguments"],
+                    arguments=tool_call.get("arguments", "{}"),
                 ),
             )
             for tool_call in tool_calls
@@ -253,7 +560,7 @@ def extract_tool_calls_streaming(
         # if the tool call token IS in the tokens generated so far, that
         # means we're parsing as tool calls now
         try:
-            if _is_pre_v11_tokeniser(self.model_tokenizer):
+            if self._is_pre_v11:
                 return self._extract_tool_calls_streaming_pre_v11_tokenizer(
                     delta_text=delta_text,
                     delta_token_ids=delta_token_ids,
@@ -311,13 +618,6 @@ def _extract_tool_calls_streaming(
         if len(delta_tool_calls) > 0:
             delta.tool_calls = delta_tool_calls
 
-        # HACK: serving_chat.py inspects the internal state of tool parsers
-        # when determining its final streaming delta, automatically
-        # adding autocompleted JSON.
-        # These two lines avoid that nonsense while ensuring finish_reason
-        # is set to tool_calls when at least one tool is called.
-        if delta_tool_calls and not self.prev_tool_call_arr:
-            self.prev_tool_call_arr = [{"arguments": {}}]
         return delta
 
     def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
@@ -330,6 +630,8 @@ def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
             StreamingState.PARSING_ARGUMENTS,
         ] and delta_text.startswith(self.bot_token):
             self.current_tool_id += 1
+            self.streamed_args_for_tool.append("")
+            self.prev_tool_call_arr.append({})
             self.streaming_state = StreamingState.PARSING_NAME
             delta_text = delta_text.replace(self.bot_token, "", 1)
         if self.streaming_state == StreamingState.PARSING_NAME:
@@ -341,6 +643,11 @@ def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
                 tool_id = MistralToolCall.generate_random_id()
                 delta_function_name = delta_text.split("{")[0]
                 self.current_tool_name += delta_function_name
+                # HF tokenizers may include [ARGS] in the text
+                self.current_tool_name = self.current_tool_name.replace("[ARGS]", "")
+                self.prev_tool_call_arr[self.current_tool_id]["name"] = (
+                    self.current_tool_name
+                )
                 delta_text = delta_text[len(delta_function_name) :]
                 self.streaming_state = StreamingState.PARSING_ARGUMENTS
             else:
@@ -357,6 +664,10 @@ def _generate_delta_tool_call(self, delta_text: str) -> list[DeltaToolCall]:
                 self.streaming_state = StreamingState.TOOL_COMPLETE
             else:
                 delta_arguments = delta_text
+            self.streamed_args_for_tool[self.current_tool_id] += delta_arguments
+            self.prev_tool_call_arr[self.current_tool_id]["arguments"] = (
+                self.streamed_args_for_tool[self.current_tool_id]
+            )
             ret = []
             if self.current_tool_name or delta_arguments:
                 ret += [
@@ -506,9 +817,12 @@ def _extract_tool_calls_streaming_pre_v11_tokenizer(
                     if self.current_tool_mistral_id is not None:
                         current_tool_call.id = self.current_tool_mistral_id
                         self.current_tool_mistral_id = None
+                    self._track_streamed_args_pre_v11(current_tool_call)
                     delta_tool_calls.append(current_tool_call)
                 current_tool_call_modified = False
                 self.current_tool_id += 1
+                self.streamed_args_for_tool.append("")
+                self.prev_tool_call_arr.append({})
                 self.current_tool_mistral_id = MistralToolCall.generate_random_id()
                 current_tool_call = DeltaToolCall(
                     index=self.current_tool_id,
@@ -521,6 +835,9 @@ def _extract_tool_calls_streaming_pre_v11_tokenizer(
                 # we have the complete tool name
                 current_tool_call_modified = True
                 current_tool_call.function.name = self.current_tool_name
+                self.prev_tool_call_arr[self.current_tool_id]["name"] = (
+                    self.current_tool_name
+                )
                 self.current_tool_name = None
             if self.streaming_state == StreamingState.PARSING_NAME_COMPLETED:
                 self.streaming_state = StreamingState.WAITING_FOR_TOOL_KEY
@@ -546,16 +863,9 @@ def _extract_tool_calls_streaming_pre_v11_tokenizer(
             if self.current_tool_mistral_id is not None:
                 current_tool_call.id = self.current_tool_mistral_id
                 self.current_tool_mistral_id = None
+            self._track_streamed_args_pre_v11(current_tool_call)
             delta_tool_calls.append(current_tool_call)
 
-        # HACK: serving_chat.py inspects the internal state of tool parsers
-        # when determining it's final streaming delta, automatically
-        # adding autocompleted JSON.
-        # These two lines avoid that nonsense while ensuring finish_reason
-        # is set to tool_calls when at least one tool is called.
-        if delta_tool_calls and not self.prev_tool_call_arr:
-            self.prev_tool_call_arr = [{"arguments": {}}]
-
         if content or len(delta_tool_calls) > 0:
             delta_message = DeltaMessage()
             if content:
@@ -569,6 +879,16 @@ def _extract_tool_calls_streaming_pre_v11_tokenizer(
             else:
                 return None
 
+    def _track_streamed_args_pre_v11(self, tool_call: DeltaToolCall) -> None:
+        r"""Accumulate `tool_call` arguments into the streaming state."""
+        if tool_call.function is not None and tool_call.function.arguments is not None:
+            self.streamed_args_for_tool[self.current_tool_id] += (
+                tool_call.function.arguments
+            )
+            self.prev_tool_call_arr[self.current_tool_id]["arguments"] = (
+                self.streamed_args_for_tool[self.current_tool_id]
+            )
+
     def _split_delta(
         self,
         delta_text: str,
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index ee6dd70718b3..e5c37fbd3dfb 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -13,7 +13,11 @@
     FunctionCall,
     ToolCall,
 )
-from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+    parse_output_into_messages,
+)
 from vllm.logger import init_logger
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
@@ -50,10 +54,12 @@ def extract_tool_calls(
 
         if len(parser.messages) > 0:
             for msg in parser.messages:
+                if msg.author.role != "assistant":
+                    continue
                 if len(msg.content) < 1:
                     continue
                 msg_text = msg.content[0].text
-                if msg.recipient and msg.recipient.startswith("functions."):
+                if msg.recipient and is_function_recipient(msg.recipient):
                     # If no content-type is given assume JSON, as that's the
                     # most common case with gpt-oss models.
                     if not msg.content_type or "json" in msg.content_type:
@@ -72,7 +78,7 @@ def extract_tool_calls(
                         ToolCall(
                             type="function",
                             function=FunctionCall(
-                                name=msg.recipient.split("functions.")[1],
+                                name=extract_function_from_recipient(msg.recipient),
                                 arguments=tool_args,
                             ),
                         )
diff --git a/vllm/tool_parsers/poolside_v1_tool_parser.py b/vllm/tool_parsers/poolside_v1_tool_parser.py
new file mode 100644
index 000000000000..f14b47362917
--- /dev/null
+++ b/vllm/tool_parsers/poolside_v1_tool_parser.py
@@ -0,0 +1,583 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+GLM-4 Tool Call Parser with incremental string streaming support.
+
+This parser fixes the streaming issue reported in Issue #32829 where long string
+parameters (e.g., file content with 4000+ characters of code) are buffered until
+complete, causing multi-second delays before the user sees any content.
+
+The fix streams string values incrementally as they arrive, providing a true
+streaming experience for long content.
+"""
+
+import ast
+import json
+from collections.abc import Sequence
+from typing import Any
+
+import partial_json_parser.core.complete
+import regex as re
+from partial_json_parser.core.options import Allow
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+)
+from vllm.logger import init_logger
+from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import (
+    Tool,
+    ToolParser,
+)
+
+logger = init_logger(__name__)
+
+
+class PoolsideV1ToolParser(ToolParser):
+    """Tool parser for GLM-4 models with incremental string streaming.
+
+    This parser emits tool-call deltas incrementally as arguments arrive.
+    For string-type parameters, content is streamed character-by-character
+    rather than waiting for the complete </arg_value> tag.
+    """
+
+    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+        super().__init__(tokenizer, tools)
+        # Stateful streaming fields
+        self.current_tool_name_sent: bool = False
+        self.prev_tool_call_arr: list[dict[str, Any]] = []
+        self.current_tool_id: int = -1
+        self.streamed_args_for_tool: list[str] = []
+
+        self.tool_call_start_token: str = "<tool_call>"
+        self.tool_call_end_token: str = "</tool_call>"
+        self.arg_key_start: str = "<arg_key>"
+        self.arg_key_end: str = "</arg_key>"
+        self.arg_val_start: str = "<arg_value>"
+        self.arg_val_end: str = "</arg_value>"
+
+        self.tool_calls_start_token = self.tool_call_start_token
+
+        self.func_call_regex = re.compile(r"<tool_call>.*?</tool_call>", re.DOTALL)
+        self.func_detail_regex = re.compile(
+            r"<tool_call>([^\n]*)\n(.*)</tool_call>", re.DOTALL
+        )
+        self.func_arg_regex = re.compile(
+            r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>", re.DOTALL
+        )
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction."
+            )
+
+        self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
+        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
+        self._buffer: str = ""
+
+        # Streaming state for incremental tool-call streaming
+        self._in_tool_call: bool = False
+        self._current_tool_name: str | None = None
+        self._pending_key: str | None = None
+        self._streaming_string_value: bool = False
+        self._tool_call_ids: list[str] = []
+        self._args_started: list[bool] = []
+        self._args_closed: list[bool] = []
+        self._seen_keys: list[set[str]] = []
+
+    @staticmethod
+    def _deserialize(value: str) -> Any:
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            pass
+
+        try:
+            return ast.literal_eval(value)
+        except (ValueError, SyntaxError):
+            pass
+
+        return value
+
+    @staticmethod
+    def _json_escape_string_content(s: str) -> str:
+        """JSON-escape string content for incremental streaming.
+
+        This escapes the content that goes INSIDE a JSON string (between quotes),
+        not including the surrounding quotes themselves.
+        """
+        if not s:
+            return ""
+        return json.dumps(s, ensure_ascii=False)[1:-1]
+
+    @staticmethod
+    def _is_string_type(
+        tool_name: str,
+        arg_name: str,
+        tools: list[Tool] | None,
+    ) -> bool:
+        if tools is None:
+            return False
+        for tool in tools:
+            if tool.function.name != tool_name:
+                continue
+            if tool.function.parameters is None:
+                return False
+            arg_type = (
+                tool.function.parameters.get("properties", {})
+                .get(arg_name, {})
+                .get("type", None)
+            )
+            return arg_type == "string"
+        logger.debug("No tool named '%s'.", tool_name)
+        return False
+
+    @staticmethod
+    def _tools_enabled(request: ChatCompletionRequest) -> bool:
+        """Return whether tool parsing should be applied for this request."""
+        try:
+            tools = getattr(request, "tools", None)
+            tool_choice = getattr(request, "tool_choice", None)
+            return bool(tools) and tool_choice != "none"
+        except Exception:
+            logger.exception("Failed to determine if tools are enabled.")
+            return False
+
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
+        """Adjust request parameters for tool call token handling."""
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # Ensure tool call tokens (<tool_call>, </tool_call>) are not skipped
+            # during decoding. Even though they are not marked as special tokens,
+            # setting skip_special_tokens=False ensures proper handling in
+            # transformers 5.x where decoding behavior may have changed.
+            request.skip_special_tokens = False
+        return request
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        matched_tool_calls = self.func_call_regex.findall(model_output)
+        logger.debug("model_output: %s", model_output)
+        try:
+            tool_calls: list[ToolCall] = []
+            for match in matched_tool_calls:
+                tc_detail = self.func_detail_regex.search(match)
+                if not tc_detail:
+                    logger.warning(
+                        "Failed to parse tool call details from: %s",
+                        match,
+                    )
+                    continue
+                tc_name = tc_detail.group(1).strip()
+                tc_args = tc_detail.group(2)
+                pairs = self.func_arg_regex.findall(tc_args) if tc_args else []
+                arg_dct: dict[str, Any] = {}
+                for key, value in pairs:
+                    arg_key = key.strip()
+                    arg_val = value.strip()
+                    if not self._is_string_type(tc_name, arg_key, request.tools):
+                        arg_val = self._deserialize(arg_val)
+                    logger.debug("arg_key = %s, arg_val = %s", arg_key, arg_val)
+                    arg_dct[arg_key] = arg_val
+                tool_calls.append(
+                    ToolCall(
+                        type="function",
+                        function=FunctionCall(
+                            name=tc_name,
+                            arguments=json.dumps(arg_dct, ensure_ascii=False),
+                        ),
+                    )
+                )
+        except Exception:
+            logger.exception("Failed to extract tool call spec")
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+        else:
+            if len(tool_calls) > 0:
+                content: str | None = model_output[
+                    : model_output.find(self.tool_calls_start_token)
+                ]
+                # Normalize empty/whitespace-only content to None
+                if not content or not content.strip():
+                    content = None
+                return ExtractedToolCallInformation(
+                    tools_called=True, tool_calls=tool_calls, content=content
+                )
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        if not self._tools_enabled(request):
+            return DeltaMessage(content=delta_text) if delta_text else None
+
+        self._buffer += delta_text
+
+        pending_deltas: dict[int, DeltaToolCall] = {}
+        content: str | None = None
+
+        while True:
+            if not self._in_tool_call:
+                start_idx = self._buffer.find(self.tool_call_start_token)
+                if start_idx == -1:
+                    # Check for partial start token at end of buffer
+                    for i in range(1, len(self.tool_call_start_token)):
+                        if self._buffer.endswith(self.tool_call_start_token[:i]):
+                            out = self._buffer[:-i]
+                            self._buffer = self._buffer[-i:]
+                            if out:
+                                content = (content or "") + out
+                            break
+                    else:
+                        out = self._buffer
+                        self._buffer = ""
+                        if out:
+                            content = (content or "") + out
+                    break
+
+                if start_idx > 0:
+                    content = (content or "") + self._buffer[:start_idx]
+                    self._buffer = self._buffer[start_idx:]
+
+                self._buffer = self._buffer[len(self.tool_call_start_token) :]
+                self._begin_tool_call()
+                continue
+
+            # Parse tool name first
+            if not self.current_tool_name_sent:
+                nl = self._buffer.find("\n")
+                ak = self._buffer.find(self.arg_key_start)
+                end = self._buffer.find(self.tool_call_end_token)
+                candidates = [i for i in [nl, ak, end] if i != -1]
+                if not candidates:
+                    break
+                cut = min(candidates)
+                tool_name = self._buffer[:cut].strip()
+                if tool_name == "" and cut == end:
+                    # Handle empty tool call like `<tool_call></tool_call>`.
+                    # Consume the tokens and reset state to avoid infinite loop.
+                    self._buffer = self._buffer[end + len(self.tool_call_end_token) :]
+                    self._finish_tool_call()
+                    self._revert_last_tool_call_state()
+                    continue
+
+                if cut == nl:
+                    self._buffer = self._buffer[nl + 1 :]
+                else:
+                    self._buffer = self._buffer[cut:]
+
+                self._current_tool_name = tool_name
+                self.current_tool_name_sent = True
+                self._update_tool_name(pending_deltas, tool_name)
+                continue
+
+            assert self._current_tool_name is not None
+
+            # Handle incremental string value streaming
+            if self._streaming_string_value:
+                val_end = self._buffer.find(self.arg_val_end)
+                if val_end != -1:
+                    raw_content = self._buffer[:val_end]
+                    self._buffer = self._buffer[val_end + len(self.arg_val_end) :]
+                    self._streaming_string_value = False
+                    self._pending_key = None
+
+                    escaped = self._json_escape_string_content(raw_content)
+                    frag = escaped + '"'
+                    self.streamed_args_for_tool[self.current_tool_id] += frag
+                    self._update_tool_args(pending_deltas, frag)
+                    continue
+
+                # Check for partial </arg_value> at end
+                safe_len = len(self._buffer)
+                for i in range(1, len(self.arg_val_end)):
+                    if self._buffer.endswith(self.arg_val_end[:i]):
+                        safe_len = len(self._buffer) - i
+                        break
+
+                if safe_len > 0:
+                    to_emit = self._buffer[:safe_len]
+                    self._buffer = self._buffer[safe_len:]
+                    escaped = self._json_escape_string_content(to_emit)
+                    if escaped:
+                        self.streamed_args_for_tool[self.current_tool_id] += escaped
+                        self._update_tool_args(pending_deltas, escaped)
+                break
+
+            # If we have a pending key, parse its value
+            if self._pending_key is not None:
+                val_pos = self._buffer.find(self.arg_val_start)
+                if val_pos == -1:
+                    break
+                if val_pos > 0:
+                    self._buffer = self._buffer[val_pos:]
+
+                key = (self._pending_key or "").strip()
+
+                is_string = self._is_string_type(
+                    self._current_tool_name, key, request.tools
+                )
+
+                if is_string:
+                    # String type: stream incrementally
+                    self._buffer = self._buffer[len(self.arg_val_start) :]
+
+                    if key in self._seen_keys[self.current_tool_id]:
+                        self._pending_key = None
+                        continue
+
+                    self._seen_keys[self.current_tool_id].add(key)
+                    key_json = json.dumps(key, ensure_ascii=False)
+
+                    if not self._args_started[self.current_tool_id]:
+                        frag = "{" + key_json + ': "'
+                        self._args_started[self.current_tool_id] = True
+                    else:
+                        frag = ", " + key_json + ': "'
+
+                    self.streamed_args_for_tool[self.current_tool_id] += frag
+                    self._streaming_string_value = True
+                    self._update_tool_args(pending_deltas, frag)
+                    continue
+
+                # Non-string type: wait for complete value
+                val_end = self._buffer.find(self.arg_val_end)
+                if val_end == -1:
+                    break
+
+                raw_val = self._buffer[len(self.arg_val_start) : val_end].strip()
+                self._buffer = self._buffer[val_end + len(self.arg_val_end) :]
+                self._pending_key = None
+
+                frag_or_none = self._append_arg_fragment(key=key, raw_val=raw_val)
+                if frag_or_none:
+                    self._update_tool_args(pending_deltas, frag_or_none)
+                continue
+
+            # Parse next arg or close
+            end_pos = self._buffer.find(self.tool_call_end_token)
+            key_pos = self._buffer.find(self.arg_key_start)
+            if end_pos != -1 and (key_pos == -1 or end_pos < key_pos):
+                self._buffer = self._buffer[end_pos + len(self.tool_call_end_token) :]
+                frag_or_none = self._close_args_if_needed()
+                # Finalize prev_tool_call_arr with complete parsed arguments
+                if self._current_tool_name:
+                    try:
+                        full_args_str = self.streamed_args_for_tool[
+                            self.current_tool_id
+                        ]
+                        args_dict = json.loads(full_args_str)
+                        self.prev_tool_call_arr[self.current_tool_id] = {
+                            "name": self._current_tool_name,
+                            "arguments": args_dict,
+                        }
+                    except (json.JSONDecodeError, IndexError) as e:
+                        logger.warning(
+                            "Failed to finalize tool call state for tool %d: %s",
+                            self.current_tool_id,
+                            e,
+                        )
+                self._finish_tool_call()
+                if frag_or_none:
+                    self._update_tool_args(pending_deltas, frag_or_none)
+                continue
+
+            if key_pos == -1:
+                break
+            if key_pos > 0:
+                self._buffer = self._buffer[key_pos:]
+            key_end = self._buffer.find(self.arg_key_end)
+            if key_end == -1:
+                break
+            key = self._buffer[len(self.arg_key_start) : key_end]
+            self._buffer = self._buffer[key_end + len(self.arg_key_end) :]
+            self._pending_key = key
+            continue
+
+        tool_calls = list(pending_deltas.values())
+        if content is None and len(tool_calls) == 0:
+            if request.logprobs:
+                return DeltaMessage(content="")
+            return None
+        return DeltaMessage(content=content, tool_calls=tool_calls)
+
+    def _ensure_tool_state(self) -> None:
+        while len(self._tool_call_ids) <= self.current_tool_id:
+            self._tool_call_ids.append(
+                make_tool_call_id(id_type="random", func_name=None, idx=None)
+            )
+        while len(self.streamed_args_for_tool) <= self.current_tool_id:
+            self.streamed_args_for_tool.append("")
+        while len(self.prev_tool_call_arr) <= self.current_tool_id:
+            self.prev_tool_call_arr.append({})
+        while len(self._args_started) <= self.current_tool_id:
+            self._args_started.append(False)
+        while len(self._args_closed) <= self.current_tool_id:
+            self._args_closed.append(False)
+        while len(self._seen_keys) <= self.current_tool_id:
+            self._seen_keys.append(set())
+
+    def _begin_tool_call(self) -> None:
+        if self.current_tool_id == -1:
+            self.current_tool_id = 0
+        else:
+            self.current_tool_id += 1
+        self._ensure_tool_state()
+        self.current_tool_name_sent = False
+        self._current_tool_name = None
+        self._pending_key = None
+        self._streaming_string_value = False
+        self._in_tool_call = True
+
+    def _finish_tool_call(self) -> None:
+        self._in_tool_call = False
+        self._current_tool_name = None
+        self._pending_key = None
+        self._streaming_string_value = False
+
+    def _revert_last_tool_call_state(self) -> None:
+        """Revert the state allocation for the last tool call."""
+        if self.current_tool_id < 0:
+            return
+        self._tool_call_ids.pop()
+        self.streamed_args_for_tool.pop()
+        self.prev_tool_call_arr.pop()
+        self._args_started.pop()
+        self._args_closed.pop()
+        self._seen_keys.pop()
+        self.current_tool_id -= 1
+
+    def _get_or_create_delta(self, pending: dict[int, DeltaToolCall]) -> DeltaToolCall:
+        idx = self.current_tool_id
+        if idx not in pending:
+            pending[idx] = DeltaToolCall(
+                index=idx,
+                function=DeltaFunctionCall(),
+            )
+        delta = pending[idx]
+        assert delta.function is not None
+        return delta
+
+    def _update_tool_name(
+        self, pending: dict[int, DeltaToolCall], tool_name: str
+    ) -> None:
+        self.prev_tool_call_arr[self.current_tool_id] = {
+            "name": self._current_tool_name,
+            "arguments": {},
+        }
+        delta = self._get_or_create_delta(pending)
+        delta.id = self._tool_call_ids[self.current_tool_id]
+        delta.type = "function"
+        assert delta.function is not None
+        delta.function.name = tool_name
+        if delta.function.arguments is None:
+            delta.function.arguments = ""
+
+    @staticmethod
+    def _complete_json_prefix(
+        json_prefix: str,
+        allowed_partial_types: Allow,
+    ) -> dict | None:
+        """Complete a partial JSON prefix into a valid JSON object.
+
+        Returns (formatted_prefix, parsed_dict) or None on failure.
+
+        Note: ``partial_json_parser`` strips trailing whitespace before
+        parsing (``complete.py:20``), which means the returned slice is
+        shorter than ``json_prefix`` when it has trailing whitespace.
+        Since the parser controls the construction of the json_prefix value,
+        this code relies on it being a valid prefix and we only use the fix for
+        the completion of the JSON object.
+        """
+        try:
+            _, partial_str_completion = partial_json_parser.core.complete.fix(
+                json_prefix,
+                allowed_partial_types,
+            )
+            return json.loads(json_prefix + partial_str_completion)
+        except Exception:
+            return None
+
+    def _update_tool_args(
+        self, pending: dict[int, DeltaToolCall], fragment: str
+    ) -> None:
+        result = self._complete_json_prefix(
+            self.streamed_args_for_tool[self.current_tool_id],
+            Allow.ALL,
+        )
+        if result is not None:
+            self.prev_tool_call_arr[self.current_tool_id]["arguments"] = result
+        delta = self._get_or_create_delta(pending)
+        assert delta.function is not None
+        if delta.function.arguments is None:
+            delta.function.arguments = ""
+        delta.function.arguments += fragment
+
+    def _append_arg_fragment(
+        self,
+        *,
+        key: str,
+        raw_val: str,
+    ) -> str | None:
+        key = key.strip()
+        if not key:
+            return None
+        if key in self._seen_keys[self.current_tool_id]:
+            return None
+
+        # This function is only called for non-string types (already checked
+        # by _is_string_type in the caller), so we always deserialize.
+        val_obj: Any = self._deserialize(raw_val)
+
+        key_json = json.dumps(key, ensure_ascii=False)
+        val_json = json.dumps(val_obj, ensure_ascii=False)
+
+        if not self._args_started[self.current_tool_id]:
+            fragment = "{" + key_json + ": " + val_json
+            self._args_started[self.current_tool_id] = True
+        else:
+            fragment = ", " + key_json + ": " + val_json
+
+        self._seen_keys[self.current_tool_id].add(key)
+        self.streamed_args_for_tool[self.current_tool_id] += fragment
+        return fragment
+
+    def _close_args_if_needed(self) -> str | None:
+        if self._args_closed[self.current_tool_id]:
+            return None
+        self._args_closed[self.current_tool_id] = True
+        if not self._args_started[self.current_tool_id]:
+            fragment = "{}"
+            self.streamed_args_for_tool[self.current_tool_id] = fragment
+        else:
+            fragment = "}"
+            self.streamed_args_for_tool[self.current_tool_id] += fragment
+        return fragment
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index f9b406b53ec3..7457590c5ac0 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import ast
 import json
 import uuid
 from collections.abc import Sequence
@@ -19,17 +18,29 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.structural_tag_registry import (
+    get_enable_structured_outputs_in_reasoning,
+    get_model_structural_tag,
+)
+from vllm.tool_parsers.utils import (
+    coerce_to_schema_type,
+    extract_types_from_schema,
+    find_tool_properties,
+)
 
 logger = init_logger(__name__)
 
 
 class Qwen3CoderToolParser(ToolParser):
+    supports_required_and_named: bool = not VLLM_ENFORCE_STRICT_TOOL_CALLING
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
@@ -109,150 +120,24 @@ def _reset_streaming_state(self):
         self.accumulated_params = {}
         self.streaming_request = None
 
-    def _get_arguments_config(self, func_name: str, tools: list[Tool] | None) -> dict:
-        """Extract argument configuration for a function."""
-        if tools is None:
-            return {}
-        for config in tools:
-            if not hasattr(config, "type") or not (
-                hasattr(config, "function") and hasattr(config.function, "name")
-            ):
-                continue
-            if config.type == "function" and config.function.name == func_name:
-                if not hasattr(config.function, "parameters"):
-                    return {}
-                params = config.function.parameters
-                if isinstance(params, dict) and "properties" in params:
-                    return params["properties"]
-                elif isinstance(params, dict):
-                    return params
-                else:
-                    return {}
-        logger.debug("Tool '%s' is not defined in the tools list.", func_name)
-        return {}
-
     def _convert_param_value(
         self, param_value: str, param_name: str, param_config: dict, func_name: str
     ) -> Any:
         """Convert parameter value based on its type in the schema."""
-        # Handle null value for any type
-        if param_value.lower() == "null":
-            return None
-
-        if param_name not in param_config:
-            if param_config != {}:
-                logger.debug(
-                    "Parsed parameter '%s' is not defined in the tool "
-                    "parameters for tool '%s', directly returning the "
-                    "string value.",
-                    param_name,
-                    func_name,
-                )
+        if not isinstance(param_value, str):
             return param_value
+        param_schema = param_config.get(param_name, {})
+        param_types = extract_types_from_schema(param_schema)
+        return coerce_to_schema_type(param_value, param_types)
 
-        if (
-            isinstance(param_config[param_name], dict)
-            and "type" in param_config[param_name]
-        ):
-            param_type = str(param_config[param_name]["type"]).strip().lower()
-        elif (
-            isinstance(param_config[param_name], dict)
-            and "anyOf" in param_config[param_name]
-        ):
-            # anyOf has no top-level "type"; treat as object to trigger json.loads.
-            param_type = "object"
-        else:
-            param_type = "string"
-        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
-            return param_value
-        elif (
-            param_type.startswith("int")
-            or param_type.startswith("uint")
-            or param_type.startswith("long")
-            or param_type.startswith("short")
-            or param_type.startswith("unsigned")
-        ):
-            try:
-                return int(param_value)
-            except (ValueError, TypeError):
-                logger.debug(
-                    "Parsed value '%s' of parameter '%s' is not an "
-                    "integer in tool '%s', degenerating to string.",
-                    param_value,
-                    param_name,
-                    func_name,
-                )
-                return param_value
-        elif param_type.startswith("num") or param_type.startswith("float"):
-            try:
-                float_param_value = float(param_value)
-                return (
-                    float_param_value
-                    if float_param_value - int(float_param_value) != 0
-                    else int(float_param_value)
-                )
-            except (ValueError, TypeError):
-                logger.debug(
-                    "Parsed value '%s' of parameter '%s' is not a float "
-                    "in tool '%s', degenerating to string.",
-                    param_value,
-                    param_name,
-                    func_name,
-                )
-                return param_value
-        elif param_type in ["boolean", "bool", "binary"]:
-            param_value = param_value.lower()
-            if param_value not in ["true", "false"]:
-                logger.debug(
-                    "Parsed value '%s' of parameter '%s' is not a boolean "
-                    "(`true` or `false`) in tool '%s', degenerating to "
-                    "false.",
-                    param_value,
-                    param_name,
-                    func_name,
-                )
-            return param_value == "true"
-        else:
-            if (
-                param_type in ["object", "array", "arr"]
-                or param_type.startswith("dict")
-                or param_type.startswith("list")
-            ):
-                try:
-                    param_value = json.loads(param_value)
-                    return param_value
-                except (json.JSONDecodeError, TypeError, ValueError):
-                    logger.debug(
-                        "Parsed value '%s' of parameter '%s' cannot be "
-                        "parsed with json.loads in tool '%s', will try "
-                        "other methods to parse it.",
-                        param_value,
-                        param_name,
-                        func_name,
-                    )
-            try:
-                param_value = ast.literal_eval(param_value)  # safer
-            except (ValueError, SyntaxError, TypeError):
-                logger.debug(
-                    "Parsed value '%s' of parameter '%s' cannot be "
-                    "converted via Python `ast.literal_eval()` in tool "
-                    "'%s', degenerating to string.",
-                    param_value,
-                    param_name,
-                    func_name,
-                )
-            return param_value
-
-    def _parse_xml_function_call(
-        self, function_call_str: str, tools: list[Tool] | None
-    ) -> ToolCall | None:
+    def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None:
         # Extract function name
         end_index = function_call_str.find(">")
         # If there's no ">" character, this is not a valid xml function call
         if end_index == -1:
             return None
         function_name = function_call_str[:end_index]
-        param_config = self._get_arguments_config(function_name, tools)
+        param_config = find_tool_properties(self.tools, function_name)
         parameters = function_call_str[end_index + 1 :]
         param_dict = {}
         for match_text in self.tool_call_parameter_regex.findall(parameters):
@@ -314,7 +199,7 @@ def extract_tool_calls(
                 )
 
             tool_calls = [
-                self._parse_xml_function_call(function_call_str, request.tools)
+                self._parse_xml_function_call(function_call_str)
                 for function_call_str in function_calls
             ]
             # Populate prev_tool_call_arr for serving layer to set finish_reason
@@ -605,9 +490,8 @@ def extract_tool_calls_streaming(
                 self.current_param_name = current_param_name
                 self.accumulated_params[current_param_name] = param_value
 
-                param_config = self._get_arguments_config(
-                    self.current_function_name or "",
-                    self.streaming_request.tools if self.streaming_request else None,
+                param_config = find_tool_properties(
+                    self.tools, self.current_function_name or ""
                 )
 
                 converted_value = self._convert_param_value(
@@ -666,9 +550,6 @@ def extract_tool_calls_streaming(
                     try:
                         parsed_tool = self._parse_xml_function_call(
                             func_content,
-                            self.streaming_request.tools
-                            if self.streaming_request
-                            else None,
                         )
                         if parsed_tool and self.current_tool_index < len(
                             self.prev_tool_call_arr
@@ -708,3 +589,11 @@ def extract_tool_calls_streaming(
                 return result
 
         return None
+
+    def get_structural_tag(self, request: ChatCompletionRequest):
+        return get_model_structural_tag(
+            model="qwen_3_5",
+            tools=request.tools,
+            tool_choice=request.tool_choice,
+            reasoning=get_enable_structured_outputs_in_reasoning(),
+        )
diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py
index 23778091ee98..8ee10dcbc9e6 100644
--- a/vllm/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/tool_parsers/qwen3xml_tool_parser.py
@@ -26,6 +26,7 @@
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import find_tool_properties
 
 logger = init_logger(__name__)
 
@@ -1000,33 +1001,11 @@ def _get_param_type(self, param_name: str) -> str:
         if not self.tools or not self.current_function_name:
             return "string"
 
-        for tool in self.tools:
-            if not hasattr(tool, "type") or not (
-                hasattr(tool, "function") and hasattr(tool.function, "name")
-            ):
-                continue
-            if (
-                tool.type == "function"
-                and tool.function.name == self.current_function_name
-            ):
-                if not hasattr(tool.function, "parameters"):
-                    return "string"
-                params = tool.function.parameters
-                if isinstance(params, dict) and "properties" in params:
-                    properties = params["properties"]
-                    if param_name in properties and isinstance(
-                        properties[param_name], dict
-                    ):
-                        return self.repair_param_type(
-                            str(properties[param_name].get("type", "string"))
-                        )
-                elif isinstance(params, dict) and param_name in params:
-                    param_config = params[param_name]
-                    if isinstance(param_config, dict):
-                        return self.repair_param_type(
-                            str(param_config.get("type", "string"))
-                        )
-                break
+        properties = find_tool_properties(self.tools, self.current_function_name)
+        if param_name in properties and isinstance(properties[param_name], dict):
+            return self.repair_param_type(
+                str(properties[param_name].get("type", "string"))
+            )
         return "string"
 
     def repair_param_type(self, param_type: str) -> str:
@@ -1188,8 +1167,7 @@ def extract_tool_calls(
         # Reset tool call tracking arrays for new extraction
         self.prev_tool_call_arr = []
         self.streamed_args_for_tool = []
-        if request:
-            self.parser.set_tools(request.tools)
+        self.parser.set_tools(self.tools)
         result = self.parser.parse_single_streaming_chunks(model_output)
         if not result.tool_calls:
             return ExtractedToolCallInformation(
@@ -1260,8 +1238,7 @@ def extract_tool_calls_streaming(
             # Reset tool call tracking arrays for new streaming session
             self.prev_tool_call_arr = []
             self.streamed_args_for_tool = []
-            if request:
-                self.parser.set_tools(request.tools)
+            self.parser.set_tools(self.tools)
 
         # Model sometimes outputs separately causing delta_text to be empty.
         # If there were tool_calls before and all current tool_calls have ended,
@@ -1281,11 +1258,11 @@ def extract_tool_calls_streaming(
             return None
 
         # Parse the delta text and get the result
-        result = self.parser.parse_single_streaming_chunks(delta_text)
+        delta = self.parser.parse_single_streaming_chunks(delta_text)
 
         # Update tool call tracking arrays based on incremental parsing results
-        if result and result.tool_calls:
-            for tool_call in result.tool_calls:
+        if delta and delta.tool_calls:
+            for tool_call in delta.tool_calls:
                 if tool_call.function:
                     tool_index = (
                         tool_call.index
@@ -1315,4 +1292,7 @@ def extract_tool_calls_streaming(
                         self.streamed_args_for_tool[tool_index] += (
                             tool_call.function.arguments
                         )
-        return result
+        if delta.content is None and not delta.tool_calls and delta.reasoning is None:
+            # If no content and no tool calls, return None to indicate no update
+            return None
+        return delta
diff --git a/vllm/tool_parsers/seed_oss_tool_parser.py b/vllm/tool_parsers/seed_oss_tool_parser.py
index 48cee6a6951f..a90bdc76d9ed 100644
--- a/vllm/tool_parsers/seed_oss_tool_parser.py
+++ b/vllm/tool_parsers/seed_oss_tool_parser.py
@@ -3,11 +3,9 @@
 # Adapted from qwen3coder xml parser, All rights reserved.
 # ruff: noqa: E501
 
-import ast
 import json
 import uuid
 from collections.abc import Sequence
-from typing import Any
 
 import regex as re
 
@@ -28,6 +26,11 @@
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.utils import (
+    coerce_to_schema_type,
+    extract_types_from_schema,
+    find_tool_properties,
+)
 
 logger = init_logger(__name__)
 
@@ -111,129 +114,10 @@ def _reset_streaming_state(self):
     def _parse_xml_function_call(
         self, function_call_str: str, tools: list[Tool] | None
     ) -> ToolCall | None:
-        def get_arguments_config(func_name: str) -> dict:
-            if tools is None:
-                return {}
-            for config in tools:
-                if not hasattr(config, "type") or not (
-                    hasattr(config, "function") and hasattr(config.function, "name")
-                ):
-                    continue
-                if config.type == "function" and config.function.name == func_name:
-                    if not hasattr(config.function, "parameters"):
-                        return {}
-                    params = config.function.parameters
-                    if isinstance(params, dict) and "properties" in params:
-                        return params["properties"]
-                    elif isinstance(params, dict):
-                        return params
-                    else:
-                        return {}
-            logger.warning("Tool '%s' is not defined in the tools list.", func_name)
-            return {}
-
-        def convert_param_value(
-            param_value: str, param_name: str, param_config: dict, func_name: str
-        ) -> Any:
-            # Handle null value for any type
-            if param_value.lower() == "null":
-                return None
-
-            if param_name not in param_config:
-                if param_config != {}:
-                    logger.warning(
-                        "Parsed parameter '%s' is not defined in "
-                        "the tool parameters for tool '%s', "
-                        "directly returning the string value.",
-                        param_name,
-                        func_name,
-                    )
-                return param_value
-
-            if (
-                isinstance(param_config[param_name], dict)
-                and "type" in param_config[param_name]
-            ):
-                param_type = str(param_config[param_name]["type"]).strip().lower()
-            else:
-                param_type = "string"
-            if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
-                return param_value
-            elif (
-                param_type.startswith("int")
-                or param_type.startswith("uint")
-                or param_type.startswith("long")
-                or param_type.startswith("short")
-                or param_type.startswith("unsigned")
-            ):
-                try:
-                    param_value = int(param_value)  # type: ignore
-                except (ValueError, TypeError):
-                    logger.warning(
-                        "Parsed value '%s' of parameter '%s' is not an integer in tool "
-                        "'%s', degenerating to string.",
-                        param_value,
-                        param_name,
-                        func_name,
-                    )
-                return param_value
-            elif param_type.startswith("num") or param_type.startswith("float"):
-                try:
-                    float_param_value = float(param_value)
-                    param_value = (
-                        float_param_value  # type: ignore
-                        if float_param_value - int(float_param_value) != 0
-                        else int(float_param_value)  # type: ignore
-                    )
-                except (ValueError, TypeError):
-                    logger.warning(
-                        "Parsed value '%s' of parameter '%s' is not a float in tool "
-                        "'%s', degenerating to string.",
-                        param_value,
-                        param_name,
-                        func_name,
-                    )
-                return param_value
-            elif param_type in ["boolean", "bool", "binary"]:
-                param_value = param_value.lower()
-                if param_value not in ["true", "false"]:
-                    logger.warning(
-                        "Parsed value '%s' of parameter '%s' is not a boolean "
-                        "(`true` of `false`) in tool '%s', degenerating to false.",
-                        param_value,
-                        param_name,
-                        func_name,
-                    )
-                return param_value == "true"
-            else:
-                if param_type == "object" or param_type.startswith("dict"):
-                    try:
-                        param_value = json.loads(param_value)
-                        return param_value
-                    except (ValueError, TypeError, json.JSONDecodeError):
-                        logger.warning(
-                            "Parsed value '%s' of parameter '%s' is not a valid JSON "
-                            "object in tool '%s', will try other methods to parse it.",
-                            param_value,
-                            param_name,
-                            func_name,
-                        )
-                try:
-                    param_value = ast.literal_eval(param_value)
-                except (ValueError, SyntaxError):
-                    logger.warning(
-                        "Parsed value '%s' of parameter '%s' cannot be converted via "
-                        "Python `ast.literal_eval()` in tool '%s', degenerating to string.",
-                        param_value,
-                        param_name,
-                        func_name,
-                    )
-                return param_value
-
         # Extract function name
         end_index = function_call_str.index(">")
         function_name = function_call_str[:end_index]
-        param_config = get_arguments_config(function_name)
+        tool_properties = find_tool_properties(tools, function_name)
         parameters = function_call_str[end_index + 1 :]
         param_dict = {}
         for match in self.tool_call_parameter_regex.findall(parameters):
@@ -247,9 +131,8 @@ def convert_param_value(
             if param_value.endswith("\n"):
                 param_value = param_value[:-1]
 
-            param_dict[param_name] = convert_param_value(
-                param_value, param_name, param_config, function_name
-            )
+            param_types = extract_types_from_schema(tool_properties.get(param_name, {}))
+            param_dict[param_name] = coerce_to_schema_type(param_value, param_types)
         return ToolCall(
             type="function",
             function=FunctionCall(
@@ -312,7 +195,7 @@ def extract_tool_calls(
                 )
 
             tool_calls = [
-                self._parse_xml_function_call(function_call_str, request.tools)
+                self._parse_xml_function_call(function_call_str, self.tools)
                 for function_call_str in function_calls
             ]
 
@@ -566,7 +449,7 @@ def extract_tool_calls_streaming(
                     # Parse to get the complete arguments
                     try:
                         parsed_tool = self._parse_xml_function_call(
-                            func_content, request.tools if request else None
+                            func_content, self.tools
                         )
                         if parsed_tool:
                             # Update existing entry in prev_tool_call_arr with complete arguments
diff --git a/vllm/tool_parsers/step3_tool_parser.py b/vllm/tool_parsers/step3_tool_parser.py
index a9c5695876f7..b955bbf5f32b 100644
--- a/vllm/tool_parsers/step3_tool_parser.py
+++ b/vllm/tool_parsers/step3_tool_parser.py
@@ -19,6 +19,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
@@ -51,7 +52,9 @@ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         self.tool_block_started = False
         self.tool_block_finished = False
 
-    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+    def adjust_request(
+        self, request: ChatCompletionRequest | ResponsesRequest
+    ) -> ChatCompletionRequest | ResponsesRequest:
         request = super().adjust_request(request)
         if request.tools and request.tool_choice != "none":
             request.skip_special_tokens = False
@@ -79,9 +82,8 @@ def _cast_arguments(
         self,
         func_name: str,
         params: dict[str, Any],
-        request: ChatCompletionRequest,
     ) -> dict[str, Any]:
-        for tool in request.tools or []:
+        for tool in self.tools or []:
             if tool.function.name == func_name:
                 schema = tool.function.parameters or {}
                 properties = schema.get("properties", {})
@@ -231,7 +233,6 @@ def extract_tool_calls_streaming(
                     final_args = self._cast_arguments(
                         function_name,
                         tool_call_arr.get("parameters", {}),  # type: ignore
-                        request,
                     )
                     if final_args:
                         final_args_json = json.dumps(final_args, ensure_ascii=False)
@@ -288,7 +289,7 @@ def extract_tool_calls(
             function_name, params_dict = self._parse_steptml_invoke(invoke_part)
 
             if function_name and params_dict is not None:
-                params_dict = self._cast_arguments(function_name, params_dict, request)
+                params_dict = self._cast_arguments(function_name, params_dict)
                 params_str = json.dumps(params_dict, ensure_ascii=False)
                 tool_calls.append(
                     ToolCall(
diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py
index 25b310f2af6c..b46f899ce2ca 100644
--- a/vllm/tool_parsers/step3p5_tool_parser.py
+++ b/vllm/tool_parsers/step3p5_tool_parser.py
@@ -1385,8 +1385,7 @@ def extract_tool_calls(
         # Reset tool call tracking arrays for new extraction
         self.prev_tool_call_arr = []
         self.streamed_args_for_tool = []
-        if request:
-            self.parser.set_tools(request.tools)
+        self.parser.set_tools(self.tools)
         result = self.parser.parse_single_streaming_chunks(model_output)
         if not result.tool_calls:
             return ExtractedToolCallInformation(
@@ -1457,8 +1456,7 @@ def extract_tool_calls_streaming(
             # Reset tool call tracking arrays for new streaming session
             self.prev_tool_call_arr = []
             self.streamed_args_for_tool = []
-            if request:
-                self.parser.set_tools(request.tools)
+            self.parser.set_tools(self.tools)
 
         # Model sometimes outputs separately causing delta_text to be empty.
         # If there were tool_calls before and all current tool_calls have ended,
diff --git a/vllm/tool_parsers/streaming.py b/vllm/tool_parsers/streaming.py
new file mode 100644
index 000000000000..7f6638dcb94e
--- /dev/null
+++ b/vllm/tool_parsers/streaming.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+from typing import TYPE_CHECKING
+
+import partial_json_parser
+import regex as re
+from partial_json_parser.core.options import Allow
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaFunctionCall,
+    DeltaMessage,
+    DeltaToolCall,
+)
+from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
+from vllm.tool_parsers.utils import partial_json_loads
+from vllm.utils.mistral import is_mistral_tokenizer
+
+if TYPE_CHECKING:
+    from vllm.tokenizers import TokenizerLike
+else:
+    TokenizerLike = object
+
+
+def _bracket_level(s: str, opening: str = "{", closing: str = "}") -> int:
+    """Calculate the current level of nested brackets in a string."""
+    level = 0
+    for char in s:
+        if char == opening:
+            level += 1
+        elif char == closing:
+            level -= 1
+    return level
+
+
+def filter_delta_text(
+    delta_text: str,
+    previous_text: str,
+) -> tuple[str, bool]:
+    """Trim trailing tool-list delimiters from required-tool streaming text."""
+    bracket_level = _bracket_level(previous_text)
+    updated_delta = ""
+    passed_zero = False
+    for char in delta_text:
+        if char == "{":
+            bracket_level += 1
+            passed_zero = bracket_level == 0
+        elif char == "}":
+            bracket_level -= 1
+            passed_zero = bracket_level == 0
+
+        if bracket_level != 0:
+            updated_delta += char
+        else:
+            if char == ",":
+                break
+    return updated_delta, passed_zero
+
+
+def extract_named_tool_call_streaming(
+    *,
+    delta_text: str,
+    function_name: str,
+    function_name_returned: bool,
+    tool_call_idx: int | None,
+    tool_call_id_type: str,
+    tokenizer: "TokenizerLike",
+    tool_call_array_index: int = 0,
+) -> tuple[DeltaMessage | None, bool]:
+    """Build a streaming tool-call delta for forced named tool choice."""
+    if function_name_returned:
+        delta_tool_call = DeltaToolCall(
+            function=DeltaFunctionCall(arguments=delta_text),
+            index=tool_call_array_index,
+        )
+    else:
+        if is_mistral_tokenizer(tokenizer):
+            tool_call_id = MistralToolCall.generate_random_id()
+        else:
+            tool_call_id = make_tool_call_id(
+                id_type=tool_call_id_type,
+                func_name=function_name,
+                idx=tool_call_idx,
+            )
+        delta_tool_call = DeltaToolCall(
+            id=tool_call_id,
+            type="function",
+            function=DeltaFunctionCall(
+                name=function_name,
+                arguments=delta_text,
+            ),
+            index=tool_call_array_index,
+        )
+        function_name_returned = True
+    return (
+        DeltaMessage(tool_calls=[delta_tool_call]),
+        function_name_returned,
+    )
+
+
+def extract_required_tool_call_streaming(
+    *,
+    previous_text: str,
+    current_text: str | None,
+    delta_text: str,
+    function_name_returned: bool,
+    tool_call_idx: int | None,
+    tool_call_id_type: str,
+) -> tuple[DeltaMessage | None, bool]:
+    if current_text is None or current_text == "":
+        # if the current text is empty, we cannot parse it
+        return None, function_name_returned
+    try:
+        flags = Allow.ALL
+        obj, _ = partial_json_loads(current_text, flags)
+    except (
+        partial_json_parser.core.exceptions.MalformedJSON,
+        json.JSONDecodeError,
+    ):
+        obj = None
+
+    # check if the current text is a valid array
+    # containing a partial tool calling object
+    # if not repeat
+    if obj is None or not isinstance(obj, list) or not len(obj) > 0:
+        function_name_returned = False
+        delta_message = None
+    else:
+        _, finishes_previous_tool = filter_delta_text(delta_text, previous_text)
+        # take the last tool call from the generated list
+        current_tool_call = obj[-1]
+
+        # once parameters have been generated the name is complete as well
+        if not finishes_previous_tool and (
+            "name" not in current_tool_call or "parameters" not in current_tool_call
+        ):
+            function_name_returned = False
+            delta_message = None
+        else:
+            if not function_name_returned:
+                # get partly generated arguments from the latest tool call
+                param_match = re.search(
+                    r'.*"parameters":\s*(.*)', current_text, re.DOTALL
+                )
+                arguments = param_match.group(1) if param_match else ""
+                arguments, _ = filter_delta_text(arguments, previous_text)
+
+                # if this iteration finishes a previous tool call but a
+                # new incomplete tool is already generated, take the
+                # previous from the list
+                if finishes_previous_tool and "parameters" not in current_tool_call:
+                    current_tool_call = obj[-2]
+
+                function_name_returned = True
+                tool_call_id = make_tool_call_id(
+                    id_type=tool_call_id_type,
+                    func_name=current_tool_call["name"],
+                    idx=tool_call_idx,
+                )
+                delta_message = DeltaMessage(
+                    tool_calls=[
+                        DeltaToolCall(
+                            id=tool_call_id,
+                            function=DeltaFunctionCall(
+                                name=current_tool_call["name"], arguments=arguments
+                            ),
+                            index=len(obj) - 1,
+                            type="function",
+                        )
+                    ]
+                )
+
+            else:
+                delta_text, _ = filter_delta_text(delta_text, previous_text)
+
+                if delta_text != "":
+                    delta_message = DeltaMessage(
+                        tool_calls=[
+                            DeltaToolCall(
+                                function=DeltaFunctionCall(
+                                    # OpenAI API returns None
+                                    # instead of name every time
+                                    name=None,
+                                    arguments=delta_text,
+                                ),
+                                index=len(obj) - 1,
+                            )
+                        ]
+                    )
+                else:
+                    delta_message = None
+
+    return delta_message, function_name_returned
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
new file mode 100644
index 000000000000..754cc52361c5
--- /dev/null
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -0,0 +1,330 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Model-specific structural tag builders adapted from XGrammar's
+# builtin structural tag implementations:
+# https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py
+
+from collections.abc import Callable
+from typing import Any, Literal
+
+from xgrammar import StructuralTag
+from xgrammar.structural_tag import (
+    AnyTextFormat,
+    ConstStringFormat,
+    JSONSchemaFormat,
+    SequenceFormat,
+    TagFormat,
+    TagsWithSeparatorFormat,
+    TriggeredTagsFormat,
+)
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionToolsParam,
+)
+
+SimplifiedToolChoice = Literal["auto", "required", "forced"]
+ToolChoice = (
+    Literal["none", "auto", "required"] | ChatCompletionNamedToolChoiceParam | None
+)
+StructuralTagBuilder = Callable[
+    [list[ChatCompletionToolsParam], SimplifiedToolChoice, bool],
+    StructuralTag,
+]
+
+_structural_tag_registry: dict[str, StructuralTagBuilder] = {}
+
+
+def register_model_structural_tag(name: str):
+    """Register a vLLM-owned model-specific structural tag builder."""
+
+    def decorator(func: StructuralTagBuilder) -> StructuralTagBuilder:
+        _structural_tag_registry[name] = func
+        return func
+
+    return decorator
+
+
+def get_model_structural_tag(
+    model: str,
+    tools: list[ChatCompletionToolsParam] | None,
+    tool_choice: ToolChoice,
+    reasoning: bool,
+) -> StructuralTag | None:
+    """Build a structural tag from vLLM-owned model-specific builders."""
+
+    builder = _structural_tag_registry.get(model)
+    if builder is None:
+        supported = list(_structural_tag_registry.keys())
+        raise ValueError(f"Unknown format type: {model}, supported types: {supported}")
+
+    normalized_tools, simplified_tool_choice = _normalize_tool_choice(
+        tools=tools,
+        tool_choice=tool_choice,
+    )
+    if not normalized_tools:
+        return None
+
+    return builder(normalized_tools, simplified_tool_choice, reasoning)
+
+
+def _normalize_tool_choice(
+    tools: list[ChatCompletionToolsParam] | None,
+    tool_choice: ToolChoice,
+) -> tuple[list[ChatCompletionToolsParam], SimplifiedToolChoice]:
+    """Normalize vLLM ChatCompletion tool_choice for structural tag builders."""
+
+    if not tools:
+        return [], "auto"
+
+    if tool_choice is None or tool_choice == "none":
+        return [], "auto"
+
+    if tool_choice == "auto":
+        return tools, "auto"
+
+    if tool_choice == "required":
+        return tools, "required"
+
+    if isinstance(tool_choice, ChatCompletionNamedToolChoiceParam):
+        tool_name = tool_choice.function.name
+        filtered_tools = [tool for tool in tools if tool.function.name == tool_name]
+        if not filtered_tools:
+            raise ValueError(
+                f"The tool with name '{tool_name}' is not found in the tools list."
+            )
+        return filtered_tools, "forced"
+
+    raise ValueError(f"Unsupported tool_choice for structural tag: {tool_choice}")
+
+
+def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
+    """Return the JSON schema used for constrained tool arguments."""
+
+    if getattr(function, "strict", None) is False:
+        return True
+    if function.parameters is None:
+        return True
+    return function.parameters
+
+
+_enable_structured_outputs_in_reasoning: bool = False
+
+
+def set_enable_structured_outputs_in_reasoning(enabled: bool) -> None:
+    """Publish the engine's ``enable_in_reasoning`` flag to tool parsers.
+
+    Called once during APIServer startup so request-time parsers can read
+    it without going through the EngineCore-only contextvar.
+    """
+
+    global _enable_structured_outputs_in_reasoning
+    _enable_structured_outputs_in_reasoning = bool(enabled)
+
+
+def get_enable_structured_outputs_in_reasoning() -> bool:
+    """Whether structured outputs are active during the reasoning phase.
+
+    When ``True``, the structural tag will cover the reasoning part:
+    ``<think>...</think>`` prefix (if available); when ``False`` (default), the tag only
+    constrains the post-reasoning suffix.
+    """
+
+    return _enable_structured_outputs_in_reasoning
+
+
+@register_model_structural_tag("deepseek_v4")
+def get_deepseek_v4_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+) -> StructuralTag:
+    """Build DeepSeek V4 structural tags."""
+
+    invoke_begin_prefix = '<｜DSML｜invoke name="'
+    invoke_begin_suffix = '">\n'
+    invoke_end = "</｜DSML｜invoke>\n"
+    tool_calls_prefix = "\n\n"
+    function_calls_begin = "<｜DSML｜tool_calls>\n"
+    function_calls_end = "</｜DSML｜tool_calls>"
+    function_calls_trigger = "<｜DSML｜tool_calls>"
+    think_tag_end = "</think>"
+    think_exclude_tokens = ["<think>", "</think>"]
+    xml_style = "deepseek_xml"
+
+    if tool_choice == "auto":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=parameters,
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                )
+            )
+
+        if tags:
+            function_calling_tags = TagsWithSeparatorFormat(
+                tags=tags,
+                separator="\n",
+                at_least_one=True,
+            )
+            suffix_tag = TriggeredTagsFormat(
+                triggers=[function_calls_trigger],
+                tags=[
+                    TagFormat(
+                        begin=function_calls_begin,
+                        content=function_calling_tags,
+                        end=function_calls_end,
+                    )
+                ],
+                excludes=think_exclude_tokens,
+            )
+        else:
+            suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)
+
+    elif tool_choice == "forced":
+        if not tools:
+            raise ValueError("Forced tool choice must resolve to exactly one tool.")
+        function = tools[0].function
+        suffix_tag = SequenceFormat(
+            elements=[
+                ConstStringFormat(value=tool_calls_prefix + function_calls_begin),
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=_get_function_parameters(function),
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                ),
+                ConstStringFormat(value=function_calls_end),
+            ]
+        )
+
+    elif tool_choice == "required":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=parameters,
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                )
+            )
+        assert len(tags) > 0
+        suffix_tag = SequenceFormat(
+            elements=[
+                ConstStringFormat(value=tool_calls_prefix + function_calls_begin),
+                TagsWithSeparatorFormat(
+                    tags=tags,
+                    separator="\n",
+                    at_least_one=True,
+                ),
+                ConstStringFormat(value=function_calls_end),
+            ]
+        )
+
+    if not reasoning:
+        return StructuralTag(format=suffix_tag)
+
+    prefix_tag = TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end)
+    return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
+
+
+@register_model_structural_tag("qwen_3_5")
+def get_qwen_3_5_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+) -> StructuralTag:
+    """Build Qwen XML structural tags.
+
+    This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with
+    Qwen variants that use the same XML tool-call format.
+    """
+    tool_call_begin_prefix = "<tool_call>\n<function="
+    tool_call_begin_suffix = ">\n"
+    tool_call_end = "\n</function>\n</tool_call>"
+    tool_call_trigger = "<tool_call>\n<function="
+    think_tag_end = "</think>"
+    think_suffix = "\n\n"
+    think_exclude_tokens = ["<think>", "</think>"]
+
+    if tool_choice == "auto":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+                    content=JSONSchemaFormat(json_schema=parameters, style="qwen_xml"),
+                    end=tool_call_end,
+                )
+            )
+
+        if tags:
+            suffix_tag = TriggeredTagsFormat(
+                triggers=[tool_call_trigger],
+                tags=tags,
+                excludes=think_exclude_tokens,
+            )
+        else:
+            suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)
+
+    elif tool_choice == "forced":
+        if not tools:
+            raise ValueError("Forced tool choice must resolve to exactly one tool.")
+        function = tools[0].function
+        suffix_tag = TagFormat(
+            begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+            content=JSONSchemaFormat(
+                json_schema=_get_function_parameters(function),
+                style="qwen_xml",
+            ),
+            end=tool_call_end,
+        )
+
+    elif tool_choice == "required":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+                    content=JSONSchemaFormat(json_schema=parameters, style="qwen_xml"),
+                    end=tool_call_end,
+                )
+            )
+        assert len(tags) > 0
+        suffix_tag = TagsWithSeparatorFormat(
+            tags=tags,
+            separator="",
+            at_least_one=True,
+        )
+
+    if not reasoning:
+        result = StructuralTag(format=suffix_tag)
+    else:
+        prefix_tag = SequenceFormat(
+            elements=[
+                TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end),
+                ConstStringFormat(value=think_suffix),
+            ]
+        )
+        result = StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
+
+    return result
diff --git a/vllm/tool_parsers/utils.py b/vllm/tool_parsers/utils.py
index a279e5b9b59c..1c7830b320fc 100644
--- a/vllm/tool_parsers/utils.py
+++ b/vllm/tool_parsers/utils.py
@@ -4,14 +4,14 @@
 import ast
 import json
 from json import JSONDecodeError, JSONDecoder
-from typing import Any
+from typing import Any, TypeAlias
 
 import partial_json_parser
 from openai.types.responses import (
     FunctionTool,
     ToolChoiceFunction,
 )
-from openai.types.responses.tool import Tool
+from openai.types.responses.tool import Tool as ResponsesTool
 from partial_json_parser.core.options import Allow
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -26,9 +26,24 @@
 )
 from vllm.logger import init_logger
 
+Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool
+
 logger = init_logger(__name__)
 
 
+def partial_tag_overlap(text: str, tag: str) -> int:
+    """Length of the longest prefix of *tag* that matches a suffix of *text*.
+
+    E.g. text ending in ``"<tool_"`` returns 6 when tag is ``"<tool_call>"``.
+    Returns 0 when there is no overlap.
+    """
+    max_check = min(len(tag) - 1, len(text))
+    for k in range(max_check, 0, -1):
+        if text.endswith(tag[:k]):
+            return k
+    return 0
+
+
 def find_common_prefix(s1: str, s2: str) -> str:
     """
     Finds a common prefix that is shared between two strings, if there is one.
@@ -130,7 +145,7 @@ def consume_space(i: int, s: str) -> int:
 
 
 def _extract_tool_info(
-    tool: Tool | ChatCompletionToolsParam,
+    tool: Tool,
 ) -> tuple[str, dict[str, Any] | None]:
     if isinstance(tool, FunctionTool):
         return tool.name, tool.parameters
@@ -140,7 +155,21 @@ def _extract_tool_info(
         raise TypeError(f"Unsupported tool type: {type(tool)}")
 
 
-def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict:
+def find_tool_properties(
+    tools: list[Tool] | None,
+    tool_name: str,
+) -> dict[str, Any]:
+    """Find a tool by name and return its properties dict, or {}."""
+    if not tools:
+        return {}
+    for tool in tools:
+        name, params = _extract_tool_info(tool)
+        if name == tool_name:
+            return (params or {}).get("properties", {})
+    return {}
+
+
+def _get_tool_schema_from_tool(tool: Tool) -> dict:
     name, params = _extract_tool_info(tool)
     params = params if params else {"type": "object", "properties": {}}
     return {
@@ -153,7 +182,7 @@ def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict:
 
 
 def _get_tool_schema_defs(
-    tools: list[Tool | ChatCompletionToolsParam],
+    tools: list[Tool],
 ) -> dict:
     all_defs: dict[str, dict[str, Any]] = {}
     for tool in tools:
@@ -172,7 +201,7 @@ def _get_tool_schema_defs(
 
 
 def _get_json_schema_from_tools(
-    tools: list[Tool | ChatCompletionToolsParam],
+    tools: list[Tool],
 ) -> dict:
     json_schema = {
         "type": "array",
@@ -190,7 +219,7 @@ def _get_json_schema_from_tools(
 
 def get_json_schema_from_tools(
     tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam,
-    tools: list[FunctionTool | ChatCompletionToolsParam] | None,
+    tools: list[Tool] | None,
 ) -> str | dict | None:
     # tool_choice: "none"
     if tool_choice in ("none", None) or tools is None:
@@ -279,20 +308,43 @@ def get_parameter_value(val: ast.expr) -> Any:
         raise UnexpectedAstError("Tool call arguments must be literals")
 
 
+def _ast_callable_dotted_name(node: ast.expr) -> str:
+    """Return the dotted name for a call target, walking ``ast.Attribute``
+    chains so ``a.b.c(...)`` becomes ``"a.b.c"``.
+
+    Raises:
+        UnexpectedAstError: If the chain does not bottom out in an
+            ``ast.Name`` (e.g. subscript or call expression as receiver).
+    """
+    parts: list[str] = []
+    current: ast.expr = node
+    while isinstance(current, ast.Attribute):
+        parts.append(current.attr)
+        current = current.value
+    if not isinstance(current, ast.Name):
+        raise UnexpectedAstError("Invalid tool call name")
+    parts.append(current.id)
+    return ".".join(reversed(parts))
+
+
 def handle_single_tool(call: ast.Call) -> ToolCall:
     """Convert a single AST function call node into a ToolCall object.
 
+    Accepts both bare names (``foo(...)``) and dotted attribute chains
+    (``a.b.c(...)``); the resulting tool call ``name`` field preserves the
+    dotted form.
+
     Raises:
-        UnexpectedAstError: If the call node does not have a simple
-            function name (e.g. it's an attribute access or subscript).
+        UnexpectedAstError: If the call target is neither a simple name
+            nor a chain of attribute accesses bottoming out in a name.
     """
-    if not isinstance(call.func, ast.Name):
+    if not isinstance(call.func, (ast.Name, ast.Attribute)):
         logger.warning(
             "Tool call has non-simple function name: %s",
             ast.dump(call.func),
         )
         raise UnexpectedAstError("Invalid tool call name")
-    function_name = call.func.id
+    function_name = _ast_callable_dotted_name(call.func)
     arguments = {}
     for keyword in call.keywords:
         arguments[keyword.arg] = get_parameter_value(keyword.value)
@@ -374,7 +426,171 @@ def make_valid_python(text: str) -> tuple[str, str] | None:
     for char in reversed(bracket_stack):
         added_text += _CLOSING[char]
 
-    return text + added_text, added_text
+    candidate = text + added_text
+
+    # Streaming partial text can land in shapes the bracket-counting
+    # heuristics above don't catch. Two failure modes:
+    #   1. Mid-key inside a dict (`..., "k`) closes to `..., "k"}` — a
+    #      syntactically invalid mixed dict/set.
+    #   2. A bare string inside a dict (`{"k`) closes to `{"k"}` — valid
+    #      Python but a *set* literal, which downstream tool-call AST
+    #      handling rejects.
+    # Validate the candidate parses, has a body, and contains no Set
+    # nodes (pythonic tool calls always use dicts for `{...}`).
+    try:
+        module = ast.parse(candidate)
+    except SyntaxError:
+        return None
+    if not module.body:
+        return None
+    for node in ast.walk(module):
+        if isinstance(node, ast.Set):
+            return None
+
+    return candidate, added_text
+
+
+def extract_types_from_schema(schema: Any) -> list[str]:
+    """Extract all possible type strings from a JSON Schema definition.
+
+    Handles ``type`` (string or list), ``enum`` value inference, and
+    recursive ``anyOf``/``oneOf``/``allOf``.  Returns ``["string"]``
+    when no type information can be determined.
+    """
+    if schema is None or not isinstance(schema, dict):
+        return ["string"]
+
+    types: set[str] = set()
+
+    if "type" in schema:
+        type_value = schema["type"]
+        if isinstance(type_value, str):
+            types.add(type_value)
+        elif isinstance(type_value, list):
+            for t in type_value:
+                if isinstance(t, str):
+                    types.add(t)
+
+    if "enum" in schema and isinstance(schema["enum"], list) and schema["enum"]:
+        for value in schema["enum"]:
+            if value is None:
+                types.add("null")
+            elif isinstance(value, bool):
+                types.add("boolean")
+            elif isinstance(value, int):
+                types.add("integer")
+            elif isinstance(value, float):
+                types.add("number")
+            elif isinstance(value, str):
+                types.add("string")
+            elif isinstance(value, list):
+                types.add("array")
+            elif isinstance(value, dict):
+                types.add("object")
+
+    for choice_field in ("anyOf", "oneOf", "allOf"):
+        if choice_field in schema and isinstance(schema[choice_field], list):
+            for choice in schema[choice_field]:
+                types.update(extract_types_from_schema(choice))
+
+    return list(types) if types else ["string"]
+
+
+_TYPE_ALIASES: dict[str, str] = {
+    "str": "string",
+    "text": "string",
+    "varchar": "string",
+    "char": "string",
+    "enum": "string",
+    "int": "integer",
+    "int32": "integer",
+    "int64": "integer",
+    "uint": "integer",
+    "uint32": "integer",
+    "uint64": "integer",
+    "long": "integer",
+    "short": "integer",
+    "unsigned": "integer",
+    "float": "number",
+    "float32": "number",
+    "float64": "number",
+    "double": "number",
+    "bool": "boolean",
+    "dict": "object",
+    "arr": "array",
+    "list": "array",
+    "sequence": "array",
+}
+
+
+def coerce_to_schema_type(value: str, schema_type: str | list[str]) -> Any:
+    """Best-effort coercion of a raw string value to a JSON Schema type.
+
+    Tries each type in priority order (null > integer > number > boolean >
+    object > array > string) and returns the first successful coercion.
+    Falls back to the original string when no coercion succeeds.
+
+    Args:
+        value: The raw string value from the model output.
+        schema_type: One or more JSON Schema type strings
+            (e.g. ``"string"`` or ``["string", "null"]``).
+    """
+    if isinstance(schema_type, str):
+        schema_type = [schema_type]
+
+    normalized_types = {
+        _TYPE_ALIASES.get(key, key) for t in schema_type for key in [t.strip().lower()]
+    }
+
+    # Priority: null > integer > number > boolean > object > array > string
+    type_priority = [
+        "null",
+        "integer",
+        "number",
+        "boolean",
+        "object",
+        "array",
+        "string",
+    ]
+
+    for candidate_type in type_priority:
+        if candidate_type not in normalized_types:
+            continue
+
+        if candidate_type == "null":
+            if value.lower() == "null":
+                return None
+            continue
+        if candidate_type == "string":
+            return value
+        if candidate_type == "integer":
+            try:
+                return int(value)
+            except (ValueError, TypeError):
+                continue
+        if candidate_type == "number":
+            try:
+                val = float(value)
+                return val if val != int(val) else int(val)
+            except (ValueError, TypeError):
+                continue
+        if candidate_type == "boolean":
+            lower_val = value.lower().strip()
+            if lower_val in ("true", "1"):
+                return True
+            if lower_val in ("false", "0"):
+                return False
+            continue
+        if candidate_type in ("object", "array"):
+            try:
+                return json.loads(value)
+            except (json.JSONDecodeError, ValueError, TypeError):
+                continue
+
+    try:
+        return json.loads(value)
+    except (json.JSONDecodeError, ValueError):
+        return value
 
 
 def compute_tool_delta(
diff --git a/vllm/transformers_utils/chat_templates/registry.py b/vllm/transformers_utils/chat_templates/registry.py
index af9fc77f150c..0c3d15f4dbd0 100644
--- a/vllm/transformers_utils/chat_templates/registry.py
+++ b/vllm/transformers_utils/chat_templates/registry.py
@@ -39,6 +39,7 @@ def _get_minicpmv_chat_template_fallback(tokenizer_name_or_path: str) -> Path |
     "deepseek_vl_v2": CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja",
     "fuyu": CHAT_TEMPLATES_DIR / "template_fuyu.jinja",
     "minicpmv": _get_minicpmv_chat_template_fallback,
+    "minicpmv4_6": _get_minicpmv_chat_template_fallback,
     "paligemma": CHAT_TEMPLATES_DIR / "template_basic.jinja",
     "qwen": _get_qwen_chat_template_fallback,
     "siglip": CHAT_TEMPLATES_DIR / "template_basic.jinja",
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index d27134157293..570022d65168 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -66,6 +66,13 @@
 
 logger = init_logger(__name__)
 
+if Version(version("transformers")) < Version("5.0.0"):
+    logger.warning(
+        "Support for Transformers v4 is deprecated. The Transformers v4 codepath will "
+        "become unmaintained in vLLM v0.22.0 and will be removed in vLLM v0.24.0. "
+        "Please upgrade to Transformers v5: pip install --upgrade transformers"
+    )
+
 
 class LazyConfigDict(dict):
     def __getitem__(self, key):
@@ -80,17 +87,23 @@ def __getitem__(self, key):
 _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
     afmoe="AfmoeConfig",
     bagel="BagelConfig",
+    umm="CheersConfig",
     chatglm="ChatGLMConfig",
-    colmodernvbert="ColModernVBertConfig",
+    modernvbert="ColModernVBertConfig",
     colpali="ColPaliConfig",
     colqwen3="ColQwen3Config",
     ops_colqwen3="OpsColQwen3Config",
     qwen3_vl_nemotron_embed="Qwen3VLNemotronEmbedConfig",
     deepseek_vl_v2="DeepseekVLV2Config",
     deepseek_v32="DeepseekV3Config",
+    deepseek_v4="DeepseekV4Config",
     flex_olmo="FlexOlmoConfig",
+    fireredlid="FireRedLIDConfig",
     funaudiochat="FunAudioChatConfig",
+    granite4_vision="Granite4VisionConfig",
+    hyperclovax_vlm="HCXVisionConfig",
     hunyuan_vl="HunYuanVLConfig",
+    hy_v3="HYV3Config",
     isaac="IsaacConfig",
     kimi_k2="DeepseekV3Config",  # Kimi K2 uses same architecture as DeepSeek V3
     kimi_linear="KimiLinearConfig",
@@ -102,19 +115,23 @@ def __getitem__(self, key):
     mlp_speculator="MLPSpeculatorConfig",
     medusa="MedusaConfig",
     midashenglm="MiDashengLMConfig",
+    moondream3="Moondream3Config",
     eagle="EAGLEConfig",
     speculators="SpeculatorsConfig",
     nemotron="NemotronConfig",
     olmo_hybrid="OlmoHybridConfig",
+    openvla="OpenVLAConfig",
     ovis="OvisConfig",
     ultravox="UltravoxConfig",
     step3_vl="Step3VLConfig",
     step3_text="Step3TextConfig",
     step3p5="Step3p5Config",
+    qianfan_ocr="QianfanOCRConfig",
     qwen3_asr="Qwen3ASRConfig",
     qwen3_next="Qwen3NextConfig",
     qwen3_5="Qwen3_5Config",
     qwen3_5_moe="Qwen3_5MoeConfig",
+    laguna="LagunaConfig",
     lfm2_moe="Lfm2MoeConfig",
     tarsier2="Tarsier2Config",
 )
@@ -203,11 +220,22 @@ def parse(
             )
         else:
             if model_type in _CONFIG_REGISTRY:
-                # Register the config class to AutoConfig to ensure it's used in future
-                # calls to `from_pretrained`
+                # Register the config class to AutoConfig to ensure it's used
+                # in future calls to `from_pretrained` (e.g. from
+                # AutoTokenizer or AutoProcessor).
                 config_class = _CONFIG_REGISTRY[model_type]
                 config_class.model_type = model_type
                 AutoConfig.register(model_type, config_class, exist_ok=True)
+                # If the on-disk model_type differs from the overridden
+                # one, register under both so AutoConfig.from_pretrained
+                # returns the correct class regardless of what the
+                # checkpoint says
+                if (
+                    config_model_type := config_dict.get("model_type")
+                ) and config_model_type != model_type:
+                    config_class.model_type = config_model_type
+                    AutoConfig.register(config_model_type, config_class, exist_ok=True)
+                    config_class.model_type = model_type
                 # Now that it is registered, it is not considered remote code anymore
                 trust_remote_code = False
             try:
@@ -374,6 +402,57 @@ def set_default_rope_theta(config: PretrainedConfig, default_theta: float) -> No
         config.rope_parameters["rope_theta"] = default_theta
 
 
+def patch_legacy_rope_type(rope_parameters: dict[str, Any] | None) -> None:
+    """Patch legacy RoPE type fields for backwards compatibility with
+    older custom models which would otherwise fail to load."""
+
+    # No RoPE parameters to patch
+    if rope_parameters is None:
+        return
+
+    def _patch_legacy_rope_type(rope_parameters: dict[str, Any]) -> None:
+        # Case 1: Both legacy and modern fields present - check for conflicts
+        if "rope_type" in rope_parameters and "type" in rope_parameters:
+            rope_type = rope_parameters["rope_type"]
+            rope_type_legacy = rope_parameters["type"]
+            if (rope_type_legacy == "su" and rope_type == "longrope") or (
+                rope_type_legacy == "mrope" and rope_type == "default"
+            ):
+                pass  # No action needed
+            elif rope_type != rope_type_legacy:
+                raise ValueError(
+                    f"Found conflicts between 'rope_type={rope_type}' (modern "
+                    f"field) and 'type={rope_type_legacy}' (legacy field). "
+                    "You should only specify one of them."
+                )
+        # Case 2: Only legacy field present - patch to modern format with warning
+        if "rope_type" not in rope_parameters and "type" in rope_parameters:
+            rope_parameters["rope_type"] = rope_parameters["type"]
+            logger.info("Replacing legacy 'type' key with 'rope_type'")
+        # Case 3: No rope_type field at all - cannot determine RoPE type, raise error
+        if "rope_type" not in rope_parameters:
+            raise ValueError("rope_parameters should have a 'rope_type' key")
+        # Patch legacy rope_type values with warning
+        if rope_parameters["rope_type"] == "su":
+            rope_parameters["rope_type"] = "longrope"
+            logger.warning("Replacing legacy rope_type 'su' with 'longrope'")
+        elif rope_parameters["rope_type"] == "mrope":
+            if "mrope_section" not in rope_parameters:
+                raise ValueError(
+                    "Legacy rope_type 'mrope' requires "
+                    "'mrope_section' in rope_parameters"
+                )
+            rope_parameters["rope_type"] = "default"
+            logger.warning("Replacing legacy rope_type 'mrope' with 'default'")
+
+    # Handle nested rope_parameters in interleaved sliding attention models
+    if is_rope_parameters_nested(rope_parameters):
+        for rope_parameters_layer_type in rope_parameters.values():
+            _patch_legacy_rope_type(rope_parameters_layer_type)
+    else:
+        _patch_legacy_rope_type(rope_parameters)
+
+
 def patch_rope_parameters(config: PretrainedConfig) -> None:
     """Provide backwards compatibility for RoPE."""
     from vllm.config.utils import getattr_iter
@@ -387,22 +466,28 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
     ompe = getattr(config, "original_max_position_embeddings", None)
 
     if Version(version("transformers")) < Version("5.0.0"):
-        # Transformers v4 installed, legacy config fields may be present
-        if (rope_scaling := getattr(config, "rope_scaling", None)) is not None:
-            config.rope_parameters = rope_scaling
-        if (
-            rope_theta is not None
-            or partial_rotary_factor is not None
-            or ompe is not None
-        ) and not getattr(config, "rope_parameters", None):
-            config.rope_parameters = {"rope_type": "default"}
-        # Patch legacy fields into rope_parameters
-        if rope_theta is not None:
-            config.rope_parameters["rope_theta"] = rope_theta
-        if partial_rotary_factor is not None:
-            config.rope_parameters["partial_rotary_factor"] = partial_rotary_factor
-        if ompe is not None:
-            config.rope_parameters["original_max_position_embeddings"] = ompe
+        # Transformers v4 installed, legacy config fields may be present.
+        if is_rope_parameters_nested(getattr(config, "rope_parameters", {})):
+            # Loading nested rope_parameters (from Transformers v5) in Transformers v4.
+            # Skip legacy patching since it should already be in the correct format.
+            pass
+        else:
+            if (rope_scaling := getattr(config, "rope_scaling", None)) is not None:
+                config.rope_parameters = rope_scaling
+            if (
+                rope_theta is not None
+                or partial_rotary_factor is not None
+                or ompe is not None
+            ) and not getattr(config, "rope_parameters", None):
+                config.rope_parameters = {"rope_type": "default"}
+            # Patch legacy fields into rope_parameters
+            if rope_theta is not None:
+                config.rope_parameters["rope_theta"] = rope_theta
+            if partial_rotary_factor is not None:
+                config.rope_parameters["partial_rotary_factor"] = partial_rotary_factor
+            if ompe is not None:
+                config.rope_parameters["original_max_position_embeddings"] = ompe
+            patch_legacy_rope_type(getattr(config, "rope_parameters", None))
     elif rope_theta is not None or getattr(config, "rope_parameters", None):
         # Transformers v5 installed
         # Patch these fields in case they used non-standard names
@@ -411,54 +496,10 @@ def patch_rope_parameters(config: PretrainedConfig) -> None:
         if partial_rotary_factor is not None:
             config.partial_rotary_factor = partial_rotary_factor
         # Standardize and validate RoPE parameters
+        patch_legacy_rope_type(getattr(config, "rope_parameters", None))
         config.standardize_rope_params()
         config.validate_rope()
 
-    # No RoPE parameters to patch
-    if getattr(config, "rope_parameters", None) is None:
-        return
-
-    # Handle nested rope_parameters in interleaved sliding attention models
-    if is_rope_parameters_nested(config.rope_parameters):
-        for rope_parameters_layer_type in config.rope_parameters.values():
-            patch_rope_parameters_dict(rope_parameters_layer_type)
-    else:
-        patch_rope_parameters_dict(config.rope_parameters)
-
-
-def patch_rope_parameters_dict(rope_parameters: dict[str, Any]) -> None:
-    if "rope_type" in rope_parameters and "type" in rope_parameters:
-        rope_type = rope_parameters["rope_type"]
-        rope_type_legacy = rope_parameters["type"]
-        if (rope_type_legacy == "su" and rope_type == "longrope") or (
-            rope_type_legacy == "mrope" and rope_type == "default"
-        ):
-            pass  # No action needed
-        elif rope_type != rope_type_legacy:
-            raise ValueError(
-                f"Found conflicts between 'rope_type={rope_type}' (modern "
-                f"field) and 'type={rope_type_legacy}' (legacy field). "
-                "You should only specify one of them."
-            )
-
-    if "rope_type" not in rope_parameters and "type" in rope_parameters:
-        rope_parameters["rope_type"] = rope_parameters["type"]
-        logger.info("Replacing legacy 'type' key with 'rope_type'")
-
-    if "rope_type" not in rope_parameters:
-        raise ValueError("rope_parameters should have a 'rope_type' key")
-
-    if rope_parameters["rope_type"] == "su":
-        rope_parameters["rope_type"] = "longrope"
-        logger.warning("Replacing legacy rope_type 'su' with 'longrope'")
-    elif rope_parameters["rope_type"] == "mrope":
-        if "mrope_section" not in rope_parameters:
-            raise ValueError(
-                "Legacy rope_type 'mrope' requires 'mrope_section' in rope_parameters"
-            )
-        rope_parameters["rope_type"] = "default"
-        logger.warning("Replacing legacy rope_type 'mrope' with 'default'")
-
 
 def _uses_mrope(config: PretrainedConfig) -> bool:
     rope_parameters = getattr(config, "rope_parameters", None)
@@ -1163,6 +1204,15 @@ def try_get_dense_modules(
         return None
 
 
+def _read_safetensors_metadata_in_dir(local_dir: Path) -> dict[str, Any]:
+    return {
+        param_name: info
+        for file_path in local_dir.glob("*.safetensors")
+        if file_path.is_file()
+        for param_name, info in parse_safetensors_file_metadata(file_path).items()
+    }
+
+
 def get_safetensors_params_metadata(
     model: str,
     *,
@@ -1171,24 +1221,36 @@ def get_safetensors_params_metadata(
     """
     Get the safetensors parameters metadata for remote/local model repository.
     """
-    full_metadata = {}
     if (model_path := Path(model)).exists():
-        safetensors_to_check = model_path.glob("*.safetensors")
-        full_metadata = {
-            param_name: info
-            for file_path in safetensors_to_check
-            if file_path.is_file()
-            for param_name, info in parse_safetensors_file_metadata(file_path).items()
+        return _read_safetensors_metadata_in_dir(model_path)
+
+    repo_mt = try_get_safetensors_metadata(model, revision=revision)
+    if repo_mt and (files_mt := repo_mt.files_metadata):
+        return {
+            param_name: asdict(info)
+            for file_mt in files_mt.values()
+            for param_name, info in file_mt.tensors.items()
         }
-    else:
-        repo_mt = try_get_safetensors_metadata(model, revision=revision)
-        if repo_mt and (files_mt := repo_mt.files_metadata):
-            full_metadata = {
-                param_name: asdict(info)
-                for file_mt in files_mt.values()
-                for param_name, info in file_mt.tensors.items()
-            }
-    return full_metadata
+
+    # Hub fetch failed (e.g. 429, network unreachable). Fall back to the
+    # local HF cache: weights may already be cached from a prior run, and
+    # weight loading itself uses the same cache.
+    try:
+        local_dir = huggingface_hub.snapshot_download(
+            repo_id=model,
+            revision=revision,
+            allow_patterns=["*.safetensors"],
+            local_files_only=True,
+        )
+    except huggingface_hub.errors.LocalEntryNotFoundError as e:
+        logger.warning_once(
+            "Could not retrieve safetensors metadata for %s "
+            "(Hub fetch failed and no local cache snapshot is available): %s.",
+            model,
+            str(e),
+        )
+        return {}
+    return _read_safetensors_metadata_in_dir(Path(local_dir))
 
 
 def _download_mistral_config_file(model, revision) -> dict:
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 75bfda3fbdfe..e98f0ae055be 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -18,6 +18,7 @@
     "AfmoeConfig": "vllm.transformers_utils.configs.afmoe",
     "AXK1Config": "vllm.transformers_utils.configs.AXK1",
     "BagelConfig": "vllm.transformers_utils.configs.bagel",
+    "CheersConfig": "vllm.transformers_utils.configs.cheers",
     "ChatGLMConfig": "vllm.transformers_utils.configs.chatglm",
     "ColModernVBertConfig": "vllm.transformers_utils.configs.colmodernvbert",
     "ColPaliConfig": "vllm.transformers_utils.configs.colpali",
@@ -25,14 +26,19 @@
     "OpsColQwen3Config": "vllm.transformers_utils.configs.colqwen3",
     "Qwen3VLNemotronEmbedConfig": "vllm.transformers_utils.configs.colqwen3",
     "DeepseekVLV2Config": "vllm.transformers_utils.configs.deepseek_vl2",
+    "DeepseekV4Config": "vllm.transformers_utils.configs.deepseek_v4",
     "DotsOCRConfig": "vllm.transformers_utils.configs.dotsocr",
     "EAGLEConfig": "vllm.transformers_utils.configs.eagle",
+    "FireRedLIDConfig": "vllm.transformers_utils.configs.fireredlid",
     "FlexOlmoConfig": "vllm.transformers_utils.configs.flex_olmo",
     "FunAudioChatConfig": "vllm.transformers_utils.configs.funaudiochat",
     "FunAudioChatAudioEncoderConfig": "vllm.transformers_utils.configs.funaudiochat",
+    "Granite4VisionConfig": "vllm.transformers_utils.configs.granite4_vision",
     "HunYuanVLConfig": "vllm.transformers_utils.configs.hunyuan_vl",
     "HunYuanVLTextConfig": "vllm.transformers_utils.configs.hunyuan_vl",
     "HunYuanVLVisionConfig": "vllm.transformers_utils.configs.hunyuan_vl",
+    "HCXVisionConfig": "vllm.transformers_utils.configs.hyperclovax",
+    "HYV3Config": "vllm.transformers_utils.configs.hy_v3",
     "HyperCLOVAXConfig": "vllm.transformers_utils.configs.hyperclovax",
     "IsaacConfig": "vllm.transformers_utils.configs.isaac",
     # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
@@ -40,10 +46,14 @@
     # `FalconConfig` class from the official HuggingFace transformers library.
     "RWConfig": "vllm.transformers_utils.configs.falcon",
     "JAISConfig": "vllm.transformers_utils.configs.jais",
+    "LagunaConfig": "vllm.transformers_utils.configs.laguna",
     "Lfm2MoeConfig": "vllm.transformers_utils.configs.lfm2_moe",
     "MedusaConfig": "vllm.transformers_utils.configs.medusa",
     "MiDashengLMConfig": "vllm.transformers_utils.configs.midashenglm",
     "MLPSpeculatorConfig": "vllm.transformers_utils.configs.mlp_speculator",
+    "Moondream3Config": "vllm.transformers_utils.configs.moondream3",
+    "Moondream3TextConfig": "vllm.transformers_utils.configs.moondream3",
+    "Moondream3VisionConfig": "vllm.transformers_utils.configs.moondream3",
     "MoonViTConfig": "vllm.transformers_utils.configs.moonvit",
     "KimiLinearConfig": "vllm.transformers_utils.configs.kimi_linear",
     "KimiVLConfig": "vllm.transformers_utils.configs.kimi_vl",
@@ -51,6 +61,7 @@
     "NemotronConfig": "vllm.transformers_utils.configs.nemotron",
     "NemotronHConfig": "vllm.transformers_utils.configs.nemotron_h",
     "OlmoHybridConfig": "vllm.transformers_utils.configs.olmo_hybrid",
+    "OpenVLAConfig": "vllm.transformers_utils.configs.openvla",
     "OvisConfig": "vllm.transformers_utils.configs.ovis",
     "PixelShuffleSiglip2VisionConfig": "vllm.transformers_utils.configs.isaac",
     "RadioConfig": "vllm.transformers_utils.configs.radio",
@@ -60,6 +71,8 @@
     "Step3VisionEncoderConfig": "vllm.transformers_utils.configs.step3_vl",
     "Step3TextConfig": "vllm.transformers_utils.configs.step3_vl",
     "Step3p5Config": "vllm.transformers_utils.configs.step3p5",
+    "QianfanOCRConfig": "vllm.transformers_utils.configs.qianfan_ocr",
+    "QianfanOCRVisionConfig": "vllm.transformers_utils.configs.qianfan_ocr",
     "Qwen3ASRConfig": "vllm.transformers_utils.configs.qwen3_asr",
     "Qwen3NextConfig": "vllm.transformers_utils.configs.qwen3_next",
     "Qwen3_5Config": "vllm.transformers_utils.configs.qwen3_5",
@@ -75,6 +88,7 @@
     "AfmoeConfig",
     "AXK1Config",
     "BagelConfig",
+    "CheersConfig",
     "ChatGLMConfig",
     "ColModernVBertConfig",
     "ColPaliConfig",
@@ -83,22 +97,31 @@
     "Qwen3VLNemotronEmbedConfig",
     "DeepseekVLV2Config",
     "DeepseekV3Config",
+    "DeepseekV4Config",
     "DotsOCRConfig",
     "EAGLEConfig",
     "FlexOlmoConfig",
+    "FireRedLIDConfig",
     "FunAudioChatConfig",
     "FunAudioChatAudioEncoderConfig",
+    "Granite4VisionConfig",
     "HunYuanVLConfig",
     "HunYuanVLTextConfig",
     "HunYuanVLVisionConfig",
+    "HCXVisionConfig",
+    "HYV3Config",
     "HyperCLOVAXConfig",
     "IsaacConfig",
     "RWConfig",
     "JAISConfig",
+    "LagunaConfig",
     "Lfm2MoeConfig",
     "MedusaConfig",
     "MiDashengLMConfig",
     "MLPSpeculatorConfig",
+    "Moondream3Config",
+    "Moondream3TextConfig",
+    "Moondream3VisionConfig",
     "MoonViTConfig",
     "KimiLinearConfig",
     "KimiVLConfig",
@@ -106,6 +129,7 @@
     "NemotronConfig",
     "NemotronHConfig",
     "OlmoHybridConfig",
+    "OpenVLAConfig",
     "OvisConfig",
     "PixelShuffleSiglip2VisionConfig",
     "RadioConfig",
@@ -115,6 +139,8 @@
     "Step3VisionEncoderConfig",
     "Step3TextConfig",
     "Step3p5Config",
+    "QianfanOCRConfig",
+    "QianfanOCRVisionConfig",
     "Qwen3ASRConfig",
     "Qwen3NextConfig",
     "Qwen3_5Config",
diff --git a/vllm/transformers_utils/configs/cheers.py b/vllm/transformers_utils/configs/cheers.py
new file mode 100644
index 000000000000..e00d19761af9
--- /dev/null
+++ b/vllm/transformers_utils/configs/cheers.py
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from transformers import PretrainedConfig, SiglipVisionConfig
+from transformers.modeling_rope_utils import rope_config_validation
+
+
+class CheersTextConfig(PretrainedConfig):
+    """Qwen2-based text config with Cheers-specific defaults."""
+
+    model_type = "umm"
+    base_config_key = "text_config"
+
+    def __init__(
+        self,
+        vocab_size=152064,
+        hidden_size=3584,
+        intermediate_size=18944,
+        num_hidden_layers=28,
+        num_attention_heads=28,
+        num_key_value_heads=4,
+        hidden_act="silu",
+        max_position_embeddings=131072,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        tie_word_embeddings=False,
+        rope_theta=1000000.0,
+        rope_scaling=None,
+        use_sliding_window=False,
+        sliding_window=131072,
+        max_window_layers=28,
+        layer_types=None,
+        attention_dropout=0.0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.use_sliding_window = use_sliding_window
+        self.sliding_window = sliding_window if self.use_sliding_window else None
+        self.max_window_layers = max_window_layers
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.attention_dropout = attention_dropout
+        if self.rope_scaling is not None and "type" in self.rope_scaling:
+            self.rope_scaling["rope_type"] = self.rope_scaling["type"]
+        rope_config_validation(self)
+
+        self.layer_types = layer_types
+        if self.layer_types is None:
+            self.layer_types = [
+                "sliding_attention"
+                if self.sliding_window is not None and i >= self.max_window_layers
+                else "full_attention"
+                for i in range(self.num_hidden_layers)
+            ]
+
+        super().__init__(
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+class CheersConfig(PretrainedConfig):
+    """Configuration class for Cheers (UMM) model."""
+
+    model_type = "umm"
+
+    def __init__(
+        self,
+        text_config: dict | CheersTextConfig | None = None,
+        vision_representation_config: dict | SiglipVisionConfig | None = None,
+        vae_encoder_config: dict | None = None,
+        vae_decoder_config: dict | None = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+
+        if isinstance(text_config, dict):
+            self.text_config = CheersTextConfig(**text_config)
+        else:
+            self.text_config = text_config or CheersTextConfig()
+
+        if isinstance(vision_representation_config, dict):
+            self.vision_representation_config = SiglipVisionConfig(
+                **vision_representation_config
+            )
+        else:
+            self.vision_representation_config = (
+                vision_representation_config or SiglipVisionConfig()
+            )
+
+        self.vae_encoder_config = vae_encoder_config or {"resolution": 512}
+        self.vae_decoder_config = vae_decoder_config or {"resolution": 512}
+
+    @property
+    def hidden_size(self) -> int:
+        """Return the hidden size of the language model."""
+        return self.text_config.hidden_size
diff --git a/vllm/transformers_utils/configs/colmodernvbert.py b/vllm/transformers_utils/configs/colmodernvbert.py
index 656980739689..17558aafcec7 100644
--- a/vllm/transformers_utils/configs/colmodernvbert.py
+++ b/vllm/transformers_utils/configs/colmodernvbert.py
@@ -17,43 +17,41 @@ class ColModernVBertConfig(PretrainedConfig):
     def __init__(
         self,
         embedding_dim: int = 128,
-        vlm_config: dict | None = None,
+        image_token_id: int = 50407,
+        pixel_shuffle_factor: int = 4,
+        text_config: dict | None = None,
+        vision_config: dict | None = None,
         **kwargs,
     ):
         self.embedding_dim = embedding_dim
+        self.image_token_id = image_token_id
+        self.pixel_shuffle_factor = pixel_shuffle_factor
 
-        if vlm_config is None:
-            vlm_config = {}
+        text_config = text_config or {}
+        self.hidden_size = text_config.get("hidden_size", 768)
 
-        # Top-level VLM fields
-        self.image_token_id = vlm_config.get("image_token_id", 50407)
-        self.pixel_shuffle_factor = vlm_config.get("pixel_shuffle_factor", 4)
-        self.hidden_size = vlm_config.get("hidden_size", 768)
-        additional_vocab_size = vlm_config.get("additional_vocab_size", 40)
-
-        # Text config (ModernBERT)
-        text_cfg = vlm_config.get("text_config", {})
-        base_vocab = text_cfg.get("vocab_size", 50368)
         self.text_config = ModernBertConfig(
-            vocab_size=base_vocab + additional_vocab_size,
-            hidden_size=text_cfg.get("hidden_size", 768),
-            intermediate_size=text_cfg.get("intermediate_size", 1152),
-            num_hidden_layers=text_cfg.get("num_hidden_layers", 22),
-            num_attention_heads=text_cfg.get("num_attention_heads", 12),
-            mlp_bias=text_cfg.get("mlp_bias", False),
-            max_position_embeddings=vlm_config.get("max_position_embeddings", 8192),
+            vocab_size=text_config.get("vocab_size", 50408),
+            hidden_size=text_config.get("hidden_size", 768),
+            intermediate_size=text_config.get("intermediate_size", 1152),
+            num_hidden_layers=text_config.get("num_hidden_layers", 22),
+            num_attention_heads=text_config.get("num_attention_heads", 12),
+            mlp_bias=text_config.get("mlp_bias", False),
+            max_position_embeddings=text_config.get("max_position_embeddings", 8192),
         )
 
-        # Vision config (SigLIP)
-        vis_cfg = vlm_config.get("vision_config", {})
+        vision_config = vision_config or {}
         self.vision_config = SiglipVisionConfig(
-            hidden_size=vis_cfg.get("embed_dim", 768),
-            image_size=vis_cfg.get("image_size", 512),
-            patch_size=vis_cfg.get("patch_size", 16),
-            num_hidden_layers=vis_cfg.get("num_hidden_layers", 12),
-            intermediate_size=vis_cfg.get("intermediate_size", 3072),
-            num_attention_heads=vis_cfg.get("num_attention_heads", 12),
+            hidden_size=vision_config.get("hidden_size", 768),
+            image_size=vision_config.get("image_size", 512),
+            patch_size=vision_config.get("patch_size", 16),
+            num_hidden_layers=vision_config.get("num_hidden_layers", 12),
+            intermediate_size=vision_config.get("intermediate_size", 3072),
+            num_attention_heads=vision_config.get("num_attention_heads", 12),
         )
+
+        # Ensure architectures is set so vLLM routes to our model class
+        kwargs.setdefault("architectures", ["ColModernVBertForRetrieval"])
         super().__init__(**kwargs)
 
     @property
diff --git a/vllm/transformers_utils/configs/deepseek_v4.py b/vllm/transformers_utils/configs/deepseek_v4.py
new file mode 100755
index 000000000000..7708272c3bd4
--- /dev/null
+++ b/vllm/transformers_utils/configs/deepseek_v4.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any
+
+from transformers import PretrainedConfig
+
+
+class DeepseekV4Config(PretrainedConfig):
+    model_type = "deepseek_v4"
+
+    def __init__(
+        self,
+        max_position_embeddings: int = 1048576,
+        rope_scaling: dict[str, Any] | None = None,
+        rope_parameters: dict[str, Any] | None = None,
+        rope_theta: float = 10000.0,
+        **kwargs,
+    ):
+        self.max_position_embeddings = max_position_embeddings
+        self.rope_scaling = rope_scaling
+        self.rope_theta = rope_theta
+        self.rope_parameters = rope_scaling or rope_parameters
+        super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py
index 03f24319e287..3d3e20fea856 100644
--- a/vllm/transformers_utils/configs/deepseek_vl2.py
+++ b/vllm/transformers_utils/configs/deepseek_vl2.py
@@ -101,7 +101,6 @@ class DeepseekVLV2TextConfig(DeepseekV2Config):
 
 class DeepseekVLV2Config(PretrainedConfig):
     model_type = "deepseek_vl_v2"
-    architectures: list[str] | None = None
 
     tile_tag: str = "2D"
     global_view_pos: str = "head"
@@ -114,17 +113,11 @@ def __init__(
         candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),),
         **kwargs,
     ):
-        if "architectures" not in kwargs:
-            kwargs["architectures"] = ["DeepseekVLV2ForCausalLM"]
+        architectures = kwargs.setdefault("architectures", ["DeepseekVLV2ForCausalLM"])
 
-        vision_config = kwargs.pop("vision_config", {})
-        self.vision_config = VisionEncoderConfig(**vision_config)
-
-        projector_config = kwargs.pop("projector_config", {})
-        self.projector_config = MlpProjectorConfig(**projector_config)
-
-        language_config = kwargs.pop("language_config", {})
-        self.text_config = DeepseekVLV2TextConfig(**language_config)
+        self.vision_config = VisionEncoderConfig(**kwargs.pop("vision_config", {}))
+        self.projector_config = MlpProjectorConfig(**kwargs.pop("projector_config", {}))
+        self.text_config = DeepseekVLV2TextConfig(**kwargs.pop("language_config", {}))
 
         self.tile_tag = tile_tag
         self.global_view_pos = global_view_pos
@@ -132,8 +125,8 @@ def __init__(
         self.vocab_size = self.text_config.vocab_size
 
         # update model_type for OCR models
-        if "DeepseekOCRForCausalLM" in kwargs["architectures"]:
-            self.model_type = "deepseek_ocr"
-        elif "DeepseekOCR2ForCausalLM" in kwargs["architectures"]:
-            self.model_type = "deepseek_ocr2"
+        if "DeepseekOCRForCausalLM" in architectures:
+            kwargs["model_type"] = "deepseek_ocr"
+        elif "DeepseekOCR2ForCausalLM" in architectures:
+            kwargs["model_type"] = "deepseek_ocr2"
         super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/eagle.py b/vllm/transformers_utils/configs/eagle.py
index 902e335cb632..682828c0a594 100644
--- a/vllm/transformers_utils/configs/eagle.py
+++ b/vllm/transformers_utils/configs/eagle.py
@@ -62,9 +62,20 @@ def __init__(
                 else f"Eagle3{arch}"
                 for arch in self.model.architectures
             ]
+        elif method == "dflash":
+            assert self.model is not None, (
+                "model should not be None when method is dflash"
+            )
+            kwargs["architectures"] = [
+                arch
+                if arch.startswith("DFlash") or arch.endswith("DFlash")
+                else f"DFlash{arch}"
+                for arch in self.model.architectures
+            ]
         else:
             raise ValueError(
-                f"Invalid method {method}. Supported methods are eagle and eagle3."
+                f"Invalid method {method}. Supported methods are "
+                "eagle, eagle3, and dflash."
             )
 
         super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/extract_hidden_states.py b/vllm/transformers_utils/configs/extract_hidden_states.py
index 5391fbe1ad53..2beec0e3081b 100644
--- a/vllm/transformers_utils/configs/extract_hidden_states.py
+++ b/vllm/transformers_utils/configs/extract_hidden_states.py
@@ -23,10 +23,14 @@ def __init__(
 
         if isinstance(model, dict):
             model_dict = model
+            source_text_config = None
         elif isinstance(model, PretrainedConfig):
             model_dict = model.to_dict()
+            text_config = model.get_text_config()
+            source_text_config = text_config if text_config is not model else None
         else:
             model_dict = {}
+            source_text_config = None
 
         # Combine: model_dict first, then kwargs override
         combined = {**model_dict, **kwargs}
@@ -35,6 +39,12 @@ def __init__(
 
         combined["architectures"] = ["ExtractHiddenStatesModel"]
 
+        # to_dict() and kwargs both flatten text_config to a plain dict;
+        # downstream get_hf_text_config() needs it as a PretrainedConfig
+        # for attribute access. Re-insert the original object.
+        if source_text_config is not None:
+            combined["text_config"] = source_text_config
+
         super().__init__(**combined)
 
     @classmethod
diff --git a/vllm/transformers_utils/configs/fireredlid.py b/vllm/transformers_utils/configs/fireredlid.py
new file mode 100644
index 000000000000..a71062d08459
--- /dev/null
+++ b/vllm/transformers_utils/configs/fireredlid.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import contextlib
+
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+
+
+class FireRedLIDConfig(PretrainedConfig):
+    """Minimal config class for native vLLM FireRedLID support."""
+
+    model_type = "fireredlid"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size: int = 120,
+        lid_odim: int = 120,
+        idim: int = 80,
+        d_model: int = 1280,
+        n_head: int = 20,
+        n_layers_enc: int = 16,
+        n_layers_lid_dec: int = 6,
+        kernel_size: int = 33,
+        residual_dropout: float = 0.05,
+        dropout_rate: float = 0.05,
+        pe_maxlen: int = 5000,
+        pad_token_id: int = 2,
+        bos_token_id: int = 3,
+        eos_token_id: int = 4,
+        decoder_start_token_id: int = 3,
+        tie_word_embeddings: bool = True,
+        is_encoder_decoder: bool = True,
+        architectures: list[str] | None = None,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.lid_odim = lid_odim
+        self.idim = idim
+        self.d_model = d_model
+        self.hidden_size = d_model
+        self.n_head = n_head
+        self.num_attention_heads = n_head
+        self.n_layers_enc = n_layers_enc
+        self.encoder_layers = n_layers_enc
+        self.n_layers_lid_dec = n_layers_lid_dec
+        self.decoder_layers = n_layers_lid_dec
+        self.num_hidden_layers = n_layers_lid_dec
+        self.kernel_size = kernel_size
+        self.residual_dropout = residual_dropout
+        self.dropout_rate = dropout_rate
+        self.pe_maxlen = pe_maxlen
+        self.tie_word_embeddings = tie_word_embeddings
+        self.is_encoder_decoder = is_encoder_decoder
+        self.architectures = architectures or ["FireRedLIDForConditionalGeneration"]
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            decoder_start_token_id=decoder_start_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            is_encoder_decoder=is_encoder_decoder,
+            architectures=self.architectures,
+            **kwargs,
+        )
+
+
+with contextlib.suppress(ValueError):
+    AutoConfig.register(FireRedLIDConfig.model_type, FireRedLIDConfig)
diff --git a/vllm/transformers_utils/configs/granite4_vision.py b/vllm/transformers_utils/configs/granite4_vision.py
new file mode 100644
index 000000000000..a67e6ffeb8ee
--- /dev/null
+++ b/vllm/transformers_utils/configs/granite4_vision.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any
+
+import transformers
+
+
+class Granite4VisionConfig(transformers.PretrainedConfig):
+    """Configuration for Granite 4 Vision model.
+
+    This config is needed because the granite4_vision model type is not yet
+    in the transformers version pinned by vLLM.  Once transformers adds native
+    support, this file can be removed and the _CONFIG_REGISTRY entry dropped.
+    """
+
+    model_type = "granite4_vision"
+    is_composition = False
+
+    def __init__(
+        self,
+        vision_config: dict[str, Any] | None = None,
+        text_config: dict[str, Any] | None = None,
+        image_token_index: int = 100352,
+        image_seq_length: int = 576,
+        image_grid_pinpoints: list[list[int]] | None = None,
+        vision_feature_select_strategy: str = "full",
+        vision_feature_layer: int | list[int] = -2,
+        projector_hidden_act: str = "gelu",
+        projector_dropout: float = 0.1,
+        downsample_rate: str | None = None,
+        use_image_newline_parameter: bool = True,
+        deepstack_layer_map: list[list[int]] | None = None,
+        use_spatial_sampling: bool = False,
+        spatial_stride: int = 2,
+        spatial_vision_layer: int = -1,
+        spatial_target_layers: list[int] | None = None,
+        # Hub aliases — base model config uses different field names
+        vision_layer_to_llm_layer: list[list[int]] | None = None,
+        use_checkerboard_sampling: bool | None = None,
+        checkerboard_stride: int | None = None,
+        checkerboard_vision_layer: int | None = None,
+        checkerboard_llm_layers: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        self.image_token_index = image_token_index
+        self.image_seq_length = image_seq_length
+        self.image_grid_pinpoints = image_grid_pinpoints or []
+        self.vision_feature_select_strategy = vision_feature_select_strategy
+        self.vision_feature_layer = vision_feature_layer
+        self.projector_hidden_act = projector_hidden_act
+        self.projector_dropout = projector_dropout
+        self.downsample_rate = downsample_rate
+        self.use_image_newline_parameter = use_image_newline_parameter
+        self.deepstack_layer_map = deepstack_layer_map or vision_layer_to_llm_layer
+        self.use_spatial_sampling = (
+            use_spatial_sampling
+            if use_checkerboard_sampling is None
+            else use_checkerboard_sampling
+        )
+        self.spatial_stride = (
+            spatial_stride if checkerboard_stride is None else checkerboard_stride
+        )
+        self.spatial_vision_layer = (
+            spatial_vision_layer
+            if checkerboard_vision_layer is None
+            else checkerboard_vision_layer
+        )
+        self.spatial_target_layers = (
+            spatial_target_layers or checkerboard_llm_layers or [0, 10, 20, 30]
+        )
+
+        if vision_config is None:
+            vision_config = {}
+        if text_config is None:
+            text_config = {}
+
+        vision_model_type = vision_config.get("model_type", "siglip_vision_model")
+        if vision_model_type in transformers.CONFIG_MAPPING:
+            self.vision_config = transformers.CONFIG_MAPPING[vision_model_type](
+                **vision_config
+            )
+        else:
+            self.vision_config = transformers.PretrainedConfig(**vision_config)
+
+        text_model_type = text_config.get("model_type", "granite")
+        if text_model_type in transformers.CONFIG_MAPPING:
+            self.text_config = transformers.CONFIG_MAPPING[text_model_type](
+                **text_config
+            )
+        else:
+            self.text_config = transformers.PretrainedConfig(**text_config)
+
+        super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/hy_v3.py b/vllm/transformers_utils/configs/hy_v3.py
new file mode 100644
index 000000000000..9425caf4e03f
--- /dev/null
+++ b/vllm/transformers_utils/configs/hy_v3.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any
+
+from transformers.configuration_utils import PretrainedConfig
+
+
+class HYV3Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`HYV3Model`].
+    It is used to instantiate a HYV3 model (HY V3 MoE language model) according to
+    the specified arguments.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to
+    control the model outputs. Read the documentation from [`PretrainedConfig`]
+    for more information.
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 120832):
+            Vocabulary size of the model.
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 13312):
+            Dimension of the dense FFN intermediate representations.
+        num_hidden_layers (`int`, *optional*, defaults to 80):
+            Number of hidden layers in the Transformer decoder.
+        num_attention_heads (`int`, *optional*, defaults to 64):
+            Number of attention heads for each attention layer.
+        num_key_value_heads (`int`, *optional*, defaults to 8):
+            Number of key-value heads for grouped-query attention.
+        head_dim (`int`, *optional*, defaults to 128):
+            Dimension per attention head.
+        hidden_act (`str`, *optional*, defaults to `"silu"`):
+            Activation function used in FFN layers.
+        max_position_embeddings (`int`, *optional*, defaults to 131072):
+            Maximum sequence length supported by the model.
+        initializer_range (`float`, *optional*, defaults to 0.006):
+            Standard deviation of the truncated normal initializer for weight
+            initialization.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-5):
+            Epsilon for RMS normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether to use KV cache for decoding.
+        pad_token_id (`int`, *optional*):
+            Padding token id.
+        bos_token_id (`int`, *optional*):
+            Beginning-of-sequence token id.
+        eos_token_id (`int` or `List[int]`, *optional*):
+            End-of-sequence token id(s).
+        rope_parameters (`dict`, *optional*):
+            The parameters of the RoPE embeddings.
+        qk_norm (`bool`, *optional*, defaults to `True`):
+            Whether to apply RMSNorm to query and key states before attention.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie input and output embedding weights.
+        enable_attention_fp32_softmax (`bool`, *optional*, defaults to `False`):
+            Whether to upcast attention softmax to float32. Note: the eager attention
+            path always computes softmax in float32 regardless of this setting; this
+            flag is reserved for future use with custom attention backends.
+        enable_lm_head_fp32 (`bool`, *optional*, defaults to `True`):
+            Whether to upcast the LM head computation to float32.
+        num_experts (`int`, *optional*, defaults to 192):
+            Total number of MoE experts.
+        num_experts_per_tok (`int`, *optional*, defaults to 8):
+            Number of experts selected per token (top-k routing).
+        num_shared_experts (`int`, *optional*, defaults to 1):
+            Number of always-active shared experts combined into a single MLP.
+        expert_hidden_dim (`int`, *optional*, defaults to 1536):
+            Intermediate dimension of each individual MoE expert.
+        moe_router_enable_expert_bias (`bool`, *optional*, defaults to `True`):
+            Whether to use per-expert load-balancing bias in the router.
+        moe_router_use_sigmoid (`bool`, *optional*, defaults to `True`):
+            Whether to use sigmoid (instead of softmax) for router scoring.
+        route_norm (`bool`, *optional*, defaults to `True`):
+            Whether to normalize routing scores when using sigmoid routing.
+        router_scaling_factor (`float`, *optional*):
+            Optional multiplicative scaling factor applied to routing scores.
+        use_grouped_mm (`bool`, *optional*, defaults to `False`):
+            Whether to use grouped GEMM for expert computation (not yet implemented).
+        enable_moe_fp32_combine (`bool`, *optional*, defaults to `False`):
+            Whether to accumulate expert outputs in float32.
+        first_k_dense_replace (`int`, *optional*, defaults to 1):
+            Number of initial decoder layers that use a dense FFN instead of MoE.
+        output_router_logits (`bool`, *optional*, defaults to `False`):
+            Whether to output router logits from each MoE layer. Useful for computing
+            auxiliary load-balancing loss during training. Disabled by default to avoid
+            the memory overhead of storing per-layer router tensors during inference.
+
+    Example:
+        ```python
+        >>> from transformers import HYV3Config, HYV3Model
+
+        >>> config = HYV3Config()
+        >>> model = HYV3Model(config)
+        ```
+    """
+
+    model_type = "hy_v3"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=120832,
+        hidden_size=4096,
+        intermediate_size=13312,
+        num_hidden_layers=80,
+        num_attention_heads=64,
+        num_key_value_heads=8,
+        head_dim=128,
+        hidden_act="silu",
+        max_position_embeddings=131072,
+        initializer_range=0.006,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=None,
+        eos_token_id=None,
+        rope_parameters: dict[str, Any] | None = None,
+        qk_norm=True,
+        tie_word_embeddings=False,
+        enable_attention_fp32_softmax=False,
+        enable_lm_head_fp32=True,
+        # MoE specific
+        num_experts=192,
+        num_experts_per_tok=8,
+        num_shared_experts=1,
+        expert_hidden_dim=1536,
+        moe_router_enable_expert_bias=True,
+        moe_router_use_sigmoid=True,
+        route_norm=True,
+        router_scaling_factor=None,
+        use_grouped_mm=False,
+        enable_moe_fp32_combine=False,
+        # Dense/MoE layer control
+        first_k_dense_replace=1,
+        output_router_logits=False,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.head_dim = head_dim
+        self.hidden_act = hidden_act
+        self.max_position_embeddings = max_position_embeddings
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        rope_theta = kwargs.pop("rope_theta", 11158840.0)
+        if rope_parameters is None:
+            rope_parameters = {"rope_type": "default", "rope_theta": rope_theta}
+        self.rope_parameters = rope_parameters
+        self.qk_norm = qk_norm
+        self.tie_word_embeddings = tie_word_embeddings
+        self.enable_lm_head_fp32 = enable_lm_head_fp32
+        self.enable_attention_fp32_softmax = enable_attention_fp32_softmax
+
+        # MoE specific
+        self.num_experts = num_experts
+        self.num_experts_per_tok = num_experts_per_tok
+        self.num_shared_experts = num_shared_experts
+        self.expert_hidden_dim = expert_hidden_dim
+        self.moe_router_enable_expert_bias = moe_router_enable_expert_bias
+        self.moe_router_use_sigmoid = moe_router_use_sigmoid
+        self.route_norm = route_norm
+        self.use_grouped_mm = use_grouped_mm
+        self.router_scaling_factor = router_scaling_factor
+        self.enable_moe_fp32_combine = enable_moe_fp32_combine
+
+        # Dense/MoE layer control
+        self.first_k_dense_replace = first_k_dense_replace
+        self.output_router_logits = output_router_logits
+
+        if eos_token_id is not None and isinstance(eos_token_id, int):
+            eos_token_id = [eos_token_id]
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
diff --git a/vllm/transformers_utils/configs/hyperclovax.py b/vllm/transformers_utils/configs/hyperclovax.py
index 9fa823743d66..d1a3218fe4dd 100644
--- a/vllm/transformers_utils/configs/hyperclovax.py
+++ b/vllm/transformers_utils/configs/hyperclovax.py
@@ -17,6 +17,7 @@
 # limitations under the License.
 """HyperCLOVA X model configuration."""
 
+from transformers import AutoConfig
 from transformers.configuration_utils import PretrainedConfig
 
 
@@ -275,3 +276,74 @@ def __init__(
             auto_map=auto_map,
             **kwargs,
         )
+
+
+class HCXVisionConfig(PretrainedConfig):
+    """Vendored HyperCLOVAX Vision config with transformers v5 fix.
+
+    The original remote code config does not handle empty initialization
+    (text_config=None), which breaks transformers v5's @strict validation.
+
+    TODO: Remove this class once HyperCLOVAX is upstreamed to transformers.
+    Tracking PR: https://github.com/huggingface/transformers/pull/44956
+    """
+
+    model_type = "hyperclovax_vlm"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    text_config_attribute_map = {
+        "n_embd": "hidden_size",
+        "n_positions": "max_position_embeddings",
+        "n_head": "num_attention_heads",
+        "n_layer": "num_hidden_layers",
+    }
+
+    def __init__(
+        self,
+        text_config=None,
+        vision_config=None,
+        use_nth_layer=-2,
+        img_start_id=100009,
+        decoder_max_length=4096,
+        anyres=False,
+        unpad=False,
+        max_num_grids=-1,
+        num_queries_vis_abstractor=-1,
+        ignore_index=-100,
+        proj_pos_emb=True,
+        proj_prenorm=False,
+        use_1x1_grid=False,
+        **kwargs,
+    ):
+        for key, val in self.text_config_attribute_map.items():
+            if text_config is not None and key in text_config:
+                text_config[val] = text_config.pop(key)
+
+        self.text_config = None
+        if text_config is not None:
+            _text_config = AutoConfig.for_model(text_config["model_type"])
+            self.text_config = _text_config.from_dict(text_config)
+            self.hidden_size = self.text_config.hidden_size
+
+        self.vision_config = None
+        if vision_config is not None:
+            _vision_config = AutoConfig.for_model(vision_config["model_type"])
+            self.vision_config = _vision_config.from_dict(vision_config)
+
+        self.use_nth_layer = use_nth_layer
+        self.decoder_max_length = decoder_max_length
+        self.anyres = anyres
+        self.unpad = unpad
+        self.max_num_grids = max_num_grids
+        self.num_queries_vis_abstractor = num_queries_vis_abstractor
+        self.img_start_id = img_start_id
+        self.ignore_index = ignore_index
+        self.proj_pos_emb = proj_pos_emb
+        self.proj_prenorm = proj_prenorm
+        self.use_1x1_grid = use_1x1_grid
+        super().__init__(**kwargs)
+
+    def get_text_config(self, decoder=False):
+        if self.text_config is not None:
+            return self.text_config
+        return self
diff --git a/vllm/transformers_utils/configs/laguna.py b/vllm/transformers_utils/configs/laguna.py
new file mode 100644
index 000000000000..2702d3af5aa1
--- /dev/null
+++ b/vllm/transformers_utils/configs/laguna.py
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from transformers.configuration_utils import PretrainedConfig
+
+
+class LagunaConfig(PretrainedConfig):
+    model_type = "laguna"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    base_model_tp_plan = {
+        "layers.*.self_attn.q_proj": "colwise",
+        "layers.*.self_attn.k_proj": "colwise",
+        "layers.*.self_attn.v_proj": "colwise",
+        "layers.*.self_attn.g_proj": "colwise",
+        "layers.*.self_attn.o_proj": "rowwise",
+        "layers.*.mlp.gate_proj": "colwise",
+        "layers.*.mlp.up_proj": "colwise",
+        "layers.*.mlp.down_proj": "rowwise",
+    }
+    base_model_pp_plan = {
+        "embed_tokens": (["input_ids"], ["inputs_embeds"]),
+        "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
+        "norm": (["hidden_states"], ["hidden_states"]),
+    }
+
+    def __init__(
+        self,
+        vocab_size: int = 100352,
+        hidden_size: int = 2048,
+        intermediate_size: int = 8192,
+        num_hidden_layers: int = 40,
+        num_attention_heads: int = 48,
+        num_key_value_heads: int = 8,
+        head_dim: int = 128,
+        qkv_bias: bool = False,
+        attention_bias: bool = False,
+        gating: bool | str = True,
+        hidden_act: str = "silu",
+        max_position_embeddings: int = 131072,
+        initializer_range: float = 0.02,
+        rms_norm_eps: float = 1e-6,
+        use_cache: bool = True,
+        tie_word_embeddings: bool = False,
+        rope_theta: float = 500000.0,
+        rope_scaling: dict | None = None,
+        rope_parameters: dict | None = None,
+        partial_rotary_factor: float = 1.0,
+        attention_dropout: float = 0.0,
+        sliding_window: int | None = None,
+        layer_types: list[str] | None = None,
+        swa_attention_sink_enabled: bool = False,
+        swa_rope_parameters: dict | None = None,
+        num_attention_heads_per_layer: list[int] | None = None,
+        num_experts: int = 256,
+        num_experts_per_tok: int = 8,
+        moe_intermediate_size: int = 512,
+        shared_expert_intermediate_size: int = 512,
+        norm_topk_prob: bool = True,
+        decoder_sparse_step: int = 1,
+        mlp_only_layers: list[int] | None = None,
+        router_aux_loss_coef: float = 0.001,
+        output_router_logits: bool = False,
+        moe_routed_scaling_factor: float = 1.0,
+        moe_apply_router_weight_on_input: bool = False,
+        **kwargs,
+    ):
+        if mlp_only_layers is None:
+            mlp_only_layers = [0]
+
+        # Accept either v4-style (rope_theta + rope_scaling) or v5-style
+        # (rope_parameters). Translate v5 → v4 so downstream code has one path.
+        if rope_parameters is not None:
+            rp = dict(rope_parameters)
+            rope_theta = float(rp.pop("rope_theta", rope_theta))
+            rt = rp.pop("rope_type", None)
+            if rt is not None and rt != "default":
+                rope_scaling = {"rope_type": rt, **rp}
+            elif rp and rope_scaling is None:
+                rope_scaling = {"rope_type": "default", **rp}
+
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.head_dim = head_dim
+        self.qkv_bias = qkv_bias
+        self.attention_bias = attention_bias
+        self.gating = gating
+        self.hidden_act = hidden_act
+        self.max_position_embeddings = max_position_embeddings
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.partial_rotary_factor = partial_rotary_factor
+        self.attention_dropout = attention_dropout
+        self.sliding_window = sliding_window
+        self.layer_types = layer_types
+        self.swa_attention_sink_enabled = swa_attention_sink_enabled
+        self.swa_rope_parameters = swa_rope_parameters
+        self.num_attention_heads_per_layer = num_attention_heads_per_layer
+        self.num_experts = num_experts
+        self.num_experts_per_tok = num_experts_per_tok
+        self.moe_intermediate_size = moe_intermediate_size
+        self.shared_expert_intermediate_size = shared_expert_intermediate_size
+        self.norm_topk_prob = norm_topk_prob
+        self.decoder_sparse_step = decoder_sparse_step
+        self.mlp_only_layers = mlp_only_layers
+        self.router_aux_loss_coef = router_aux_loss_coef
+        self.output_router_logits = output_router_logits
+        self.moe_routed_scaling_factor = moe_routed_scaling_factor
+        self.moe_apply_router_weight_on_input = moe_apply_router_weight_on_input
+
+        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+
+
+__all__ = ["LagunaConfig"]
diff --git a/vllm/transformers_utils/configs/mimo_v2_omni.py b/vllm/transformers_utils/configs/mimo_v2_omni.py
new file mode 100644
index 000000000000..b87ca22a9a83
--- /dev/null
+++ b/vllm/transformers_utils/configs/mimo_v2_omni.py
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from transformers import PretrainedConfig
+
+
+class Mimo_VLVisionConfig(PretrainedConfig):
+    model_type = "mimovl"
+    base_config_key = "vision_config"
+
+    def __init__(
+        self,
+        depth=28,
+        hidden_size=1280,
+        hidden_act="silu",
+        intermediate_size=4608,
+        num_heads=32,
+        in_channels=3,
+        patch_size=16,
+        spatial_merge_size=2,
+        temporal_patch_size=2,
+        tokens_per_second=2,
+        window_size=128,
+        out_hidden_size=2048,
+        fullatt_block_indexes=None,
+        initializer_range=0.02,
+        kv_channels=64,  # HACK
+        qk_channels=64,
+        num_query_groups=4,
+        num_key_value_heads=8,
+        vit_window_attn_types=None,
+        visual_token_window_size=64,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+
+        self.depth = depth
+        self.hidden_size = hidden_size
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.num_heads = num_heads
+        # Support GQA: if num_key_value_heads is not provided,
+        # default to num_heads (MHA)
+        if num_key_value_heads is None:
+            num_key_value_heads = num_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.in_channels = in_channels
+        self.patch_size = patch_size
+        self.spatial_merge_size = spatial_merge_size
+        self.temporal_patch_size = temporal_patch_size
+        self.tokens_per_second = tokens_per_second
+        self.window_size = window_size
+        self.fullatt_block_indexes = (
+            fullatt_block_indexes
+            if fullatt_block_indexes is not None
+            else [7, 15, 23, 31]
+        )
+        self.out_hidden_size = out_hidden_size
+        self.initializer_range = initializer_range
+        self.kv_channels = kv_channels
+        self.qk_channels = qk_channels
+        self.num_query_groups = num_query_groups
+        self.vit_window_attn_types = vit_window_attn_types or [-1] * depth
+        self.visual_token_window_size = visual_token_window_size
diff --git a/vllm/transformers_utils/configs/mistral.py b/vllm/transformers_utils/configs/mistral.py
index bdeadec1bf07..2b0796691472 100644
--- a/vllm/transformers_utils/configs/mistral.py
+++ b/vllm/transformers_utils/configs/mistral.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from typing import Any
 
+from packaging.version import Version
 from transformers import PretrainedConfig, WhisperConfig
+from transformers import __version__ as TRANSFORMERS_VERSION
 
 from vllm.logger import init_logger
 
@@ -134,6 +136,10 @@ def _remap_mistral_yarn_args(config: dict) -> dict:
             # Cast to remove Transformers > v5 type warnings
             config["rope_parameters"][new_name] = cast(yarn_config.pop(old_name))
 
+    # Ignore apply_yarn_scaling in Transformers > v5 RoPE validation to remove warnings
+    if Version(TRANSFORMERS_VERSION) >= Version("5.3.0.dev0"):
+        config["ignore_keys_at_rope_validation"] = {"apply_yarn_scaling"}
+
     assert len(yarn_config) == 0, f"Unparsed yarn config: {yarn_config}"
 
     return config
diff --git a/vllm/transformers_utils/configs/moondream3.py b/vllm/transformers_utils/configs/moondream3.py
new file mode 100644
index 000000000000..307bb2977206
--- /dev/null
+++ b/vllm/transformers_utils/configs/moondream3.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Configuration for Moondream3 model."""
+
+from transformers import PretrainedConfig
+
+
+class Moondream3VisionConfig(PretrainedConfig):
+    """Vision encoder configuration for Moondream3."""
+
+    model_type = "moondream3_vision"
+
+    def __init__(
+        self,
+        enc_dim: int = 1152,
+        enc_patch_size: int = 14,
+        enc_n_layers: int = 27,
+        enc_ff_dim: int = 4304,
+        enc_n_heads: int = 16,
+        proj_inner_dim: int = 8192,
+        crop_size: int = 378,
+        max_crops: int = 12,
+        overlap_margin: int = 4,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.enc_dim = enc_dim
+        self.enc_patch_size = enc_patch_size
+        self.enc_n_layers = enc_n_layers
+        self.enc_ff_dim = enc_ff_dim
+        self.enc_n_heads = enc_n_heads
+        self.proj_inner_dim = proj_inner_dim
+        self.crop_size = crop_size
+        self.max_crops = max_crops
+        self.overlap_margin = overlap_margin
+
+        # Standard HuggingFace attributes for vision config
+        self.hidden_size = enc_dim
+        self.num_attention_heads = enc_n_heads
+        self.num_hidden_layers = enc_n_layers
+        self.intermediate_size = enc_ff_dim
+        self.patch_size = enc_patch_size
+        self.image_size = crop_size
+
+
+class Moondream3TextConfig(PretrainedConfig):
+    """Text decoder configuration for Moondream3."""
+
+    model_type = "moondream3_text"
+
+    def __init__(
+        self,
+        dim: int = 2048,
+        ff_dim: int = 8192,
+        n_layers: int = 24,
+        vocab_size: int = 51200,
+        max_context: int = 4096,
+        n_heads: int = 32,
+        n_kv_heads: int = 32,
+        prefix_attn: int = 730,
+        rope_theta: float = 1500000.0,
+        moe: dict | None = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+
+        # Store original moondream3 config names
+        self.dim = dim
+        self.ff_dim = ff_dim
+        self.n_layers = n_layers
+        self.n_heads = n_heads
+        self.n_kv_heads = n_kv_heads
+        self.prefix_attn = prefix_attn
+        self.max_context = max_context
+        self.rope_theta = rope_theta
+
+        # MoE config
+        moe = moe or {}
+        self.moe_start_layer = moe.get("start_layer", 4)
+        self.moe_num_experts = moe.get("n_experts", 64)
+        self.moe_experts_per_token = moe.get("n_experts_per_tok", 8)
+        self.moe_expert_inner_dim = moe.get("expert_inner_dim", 1024)
+
+        # Standard HuggingFace attributes (required by vLLM)
+        self.hidden_size = dim
+        self.num_attention_heads = n_heads
+        self.num_key_value_heads = n_kv_heads
+        self.num_hidden_layers = n_layers
+        self.intermediate_size = ff_dim
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_context
+
+        # Moondream3 uses token 0 (<|endoftext|>) as both BOS and EOS.
+        # Token 3 (<|md_reserved_2|>) is an answer delimiter that the model
+        # implementation suppresses during generation.
+        self.bos_token_id = 0
+        self.eos_token_id = 0
+
+        # MoE standard attributes
+        self.num_local_experts = self.moe_num_experts
+        self.num_experts_per_tok = self.moe_experts_per_token
+
+
+class Moondream3Config(PretrainedConfig):
+    """Combined configuration for Moondream3 multimodal model."""
+
+    model_type = "moondream3"
+    is_composition = True
+
+    def __init__(
+        self,
+        config: dict | None = None,
+        **kwargs,
+    ):
+        config = config or {}
+
+        # Parse text config
+        text_config = config.get("text", {})
+        self.text_config: Moondream3TextConfig = Moondream3TextConfig(**text_config)
+
+        # Parse vision config
+        vision_config = config.get("vision", {})
+        self.vision_config = Moondream3VisionConfig(**vision_config)
+
+        # Store the original config dict for model access
+        self.config = config
+        tokenizer_config = config.get("tokenizer", {})
+        self.answer_token_id = tokenizer_config.get("answer_id", 3)
+
+        super().__init__(**kwargs)
+
+        # Expose key attributes at top level for vLLM compatibility
+        self.hidden_size = self.text_config.hidden_size
+        self.num_attention_heads = self.text_config.num_attention_heads
+        self.num_key_value_heads = self.text_config.num_key_value_heads
+        self.num_hidden_layers = self.text_config.num_hidden_layers
+        self.vocab_size = self.text_config.vocab_size
+        self.intermediate_size = self.text_config.intermediate_size
+
+        # Moondream3 uses token 0 (<|endoftext|>) as both BOS and EOS.
+        # Token 3 (<|md_reserved_2|>) is an answer delimiter that the model
+        # implementation suppresses during generation.
+        self.bos_token_id = 0
+        self.eos_token_id = 0
+
+    def get_text_config(self, decoder: bool = False) -> "Moondream3TextConfig":
+        """Return the text config for vLLM's text_config detection.
+
+        Args:
+            decoder: Ignored. Only used for encoder-decoder models.
+        """
+        return self.text_config
diff --git a/vllm/transformers_utils/configs/openvla.py b/vllm/transformers_utils/configs/openvla.py
new file mode 100644
index 000000000000..897af8f705dd
--- /dev/null
+++ b/vllm/transformers_utils/configs/openvla.py
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""OpenVLA configuration support.
+
+OpenVLA checkpoints use a custom ``model_type`` and nest the language model
+configuration under ``text_config``. This shim lets vLLM load the checkpoint
+configuration without executing Hugging Face remote code.
+"""
+
+from typing import Any
+
+from transformers import LlamaConfig, PretrainedConfig
+
+
+class OpenVLAConfig(PretrainedConfig):
+    """Configuration class for OpenVLA models."""
+
+    model_type = "openvla"
+
+    def __init__(
+        self,
+        timm_model_ids: list[str] | None = None,
+        timm_override_act_layers: list[str | None] | None = None,
+        image_sizes: list[int] | None = None,
+        use_fused_vision_backbone: bool = True,
+        image_token_index: int = 32000,
+        n_action_bins: int = 256,
+        text_config: dict[str, Any] | LlamaConfig | None = None,
+        **kwargs: Any,
+    ) -> None:
+        kwargs.setdefault("architectures", ["OpenVLAForActionPrediction"])
+        super().__init__(**kwargs)
+
+        self.timm_model_ids = timm_model_ids or [
+            "vit_large_patch14_reg4_dinov2.lvd142m",
+            "vit_so400m_patch14_siglip_224",
+        ]
+        self.timm_override_act_layers = timm_override_act_layers or [None, None]
+        self.image_sizes = image_sizes or [224, 224]
+        self.use_fused_vision_backbone = use_fused_vision_backbone
+        self.image_token_index = image_token_index
+        self.n_action_bins = n_action_bins
+
+        if text_config is None:
+            text_config = LlamaConfig(architectures=["LlamaForCausalLM"])
+        elif isinstance(text_config, dict):
+            text_config = text_config.copy()
+            text_config.setdefault("architectures", ["LlamaForCausalLM"])
+            text_config = LlamaConfig(**text_config)
+        self.text_config = text_config
diff --git a/vllm/transformers_utils/configs/parakeet.py b/vllm/transformers_utils/configs/parakeet.py
index 7c7a5ddd800e..febd1da446a2 100644
--- a/vllm/transformers_utils/configs/parakeet.py
+++ b/vllm/transformers_utils/configs/parakeet.py
@@ -44,16 +44,29 @@ class ExtractorConfig:
     subsampling_factor: int
     subsampling_conv_kernel_size: int
     subsampling_conv_stride: int
+    hop_length: int = 160
+    """Default `160`: Matches HF default"""
     clip_duration_s: int = 30
     clip_min_duration_s: float = 0.1
 
-    @staticmethod
-    def from_hf_config(config: PretrainedConfig) -> "ExtractorConfig":
+    win_length: int = 400
+    preemphasis: float = 0.97
+    n_fft: int = 512
+    padding_value: float = 0.0
+
+    @classmethod
+    def from_hf_config(cls, config: PretrainedConfig) -> "ExtractorConfig":
         assert isinstance(config, PretrainedConfig)
-        return ExtractorConfig(
+        defaults = ("hop_length", "win_length", "preemphasis", "n_fft", "padding_value")
+        optional_kwargs = {
+            name: getattr(config, name) for name in defaults if hasattr(config, name)
+        }
+
+        return cls(
             feature_size=config.num_mel_bins,
             sampling_rate=config.sampling_rate,
             subsampling_factor=config.subsampling_factor,
             subsampling_conv_kernel_size=config.subsampling_conv_kernel_size,
             subsampling_conv_stride=config.subsampling_conv_stride,
+            **optional_kwargs,
         )
diff --git a/vllm/transformers_utils/configs/qianfan_ocr.py b/vllm/transformers_utils/configs/qianfan_ocr.py
new file mode 100644
index 000000000000..da004bb90f4f
--- /dev/null
+++ b/vllm/transformers_utils/configs/qianfan_ocr.py
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any
+
+from transformers import PretrainedConfig
+from transformers.models.auto import CONFIG_MAPPING
+
+
+class QianfanOCRVisionConfig(PretrainedConfig):
+    model_type = "qianfan_ocr_vision"
+
+    def __init__(
+        self,
+        hidden_size: int = 1024,
+        intermediate_size: int = 4096,
+        num_hidden_layers: int = 24,
+        num_attention_heads: int = 16,
+        num_channels: int = 3,
+        image_size: int = 448,
+        patch_size: int = 14,
+        hidden_act: str = "gelu",
+        layer_norm_eps: float = 1e-6,
+        attention_dropout: float = 0.0,
+        drop_path_rate: float = 0.1,
+        qkv_bias: bool = True,
+        qk_normalization: bool = False,
+        norm_type: str = "layer_norm",
+        initializer_range: float = 0.02,
+        initializer_factor: float = 0.1,
+        use_mask_token: bool = False,
+        use_mean_pooling: bool = True,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_channels = num_channels
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.hidden_act = hidden_act
+        self.layer_norm_eps = layer_norm_eps
+        self.attention_dropout = attention_dropout
+        self.drop_path_rate = drop_path_rate
+        self.qkv_bias = qkv_bias
+        self.qk_normalization = qk_normalization
+        self.norm_type = norm_type
+        self.initializer_range = initializer_range
+        self.initializer_factor = initializer_factor
+        self.use_mask_token = use_mask_token
+        self.use_mean_pooling = use_mean_pooling
+
+
+class QianfanOCRConfig(PretrainedConfig):
+    model_type = "qianfan_ocr"
+
+    def __init__(
+        self,
+        vision_config: dict | None = None,
+        text_config: dict | None = None,
+        downsample_ratio: float = 0.5,
+        dynamic_image_size: bool = True,
+        force_image_size: int = 448,
+        image_token_id: int = 151671,
+        max_dynamic_patch: int = 12,
+        min_dynamic_patch: int = 1,
+        pad2square: bool = False,
+        ps_version: str = "v2",
+        select_layer: int = -1,
+        template: str = "internvl2_5",
+        use_thumbnail: bool = True,
+        tie_word_embeddings: bool = False,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+
+        if isinstance(vision_config, dict):
+            self.vision_config = QianfanOCRVisionConfig(**vision_config)
+        elif vision_config is None:
+            self.vision_config = QianfanOCRVisionConfig()
+        else:
+            self.vision_config = vision_config
+
+        if isinstance(text_config, dict):
+            model_type = text_config.get("model_type", "qwen3")
+            self.text_config = CONFIG_MAPPING[model_type](**text_config)
+        elif text_config is None:
+            self.text_config = CONFIG_MAPPING["qwen3"]()
+        else:
+            self.text_config = text_config
+
+        self.downsample_ratio = downsample_ratio
+        self.dynamic_image_size = dynamic_image_size
+        self.force_image_size = force_image_size
+        self.image_token_id = image_token_id
+        self.max_dynamic_patch = max_dynamic_patch
+        self.min_dynamic_patch = min_dynamic_patch
+        self.pad2square = pad2square
+        self.ps_version = ps_version
+        self.select_layer = select_layer
+        self.template = template
+        self.use_thumbnail = use_thumbnail
+        self.tie_word_embeddings = tie_word_embeddings
diff --git a/vllm/transformers_utils/configs/speculators/algos.py b/vllm/transformers_utils/configs/speculators/algos.py
index f4dffab8b3e3..650f09c39fb0 100644
--- a/vllm/transformers_utils/configs/speculators/algos.py
+++ b/vllm/transformers_utils/configs/speculators/algos.py
@@ -41,3 +41,66 @@ def update_eagle3(config_dict: dict, pre_trained_config: dict) -> None:
         pre_trained_config["eagle_aux_hidden_state_layer_ids"] = config_dict[
             "eagle_aux_hidden_state_layer_ids"
         ]
+
+
+@register_speculator("peagle")
+def update_peagle(config_dict: dict, pre_trained_config: dict) -> None:
+    """
+    Apply PEagle (Parallel Eagle) specific configuration transformations to
+    the `dict` used to construct the Transformers PreTrainedConfig.
+
+    PEagle specific fields:
+    - draft_vocab_size: Size of the draft model's vocabulary
+    - target_hidden_size: Hidden size of the target model
+    - norm_before_residual: Whether to apply norm before residual connection
+    - norm_before_fc: Whether to apply RMSNorm before the fc projection
+    - mask_token_id (required): Token ID used for parallel drafting mask
+        placeholders, mapped to pard_token for the proposer
+    - eagle_aux_hidden_state_layer_ids: Layer indices from the target model
+        whose intermediate hidden states are used as auxiliary inputs
+    """
+    pre_trained_config["architectures"] = ["PeagleLlamaForCausalLM"]
+    pre_trained_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
+    if config_dict.get("target_hidden_size") is not None:
+        pre_trained_config["target_hidden_size"] = config_dict["target_hidden_size"]
+    pre_trained_config["norm_before_residual"] = config_dict.get(
+        "norm_before_residual", False
+    )
+    pre_trained_config["norm_before_fc"] = config_dict.get("norm_before_fc", False)
+    pre_trained_config["pard_token"] = config_dict["mask_token_id"]
+    if config_dict.get("eagle_aux_hidden_state_layer_ids"):
+        pre_trained_config["eagle_aux_hidden_state_layer_ids"] = config_dict[
+            "eagle_aux_hidden_state_layer_ids"
+        ]
+
+
+@register_speculator("dflash")
+def update_dflash(config_dict: dict, pre_trained_config: dict) -> None:
+    """
+    Apply DFlash specific configuration transformations to the `dict` used to
+    construct the Transformers PreTrainedConfig.
+
+    DFlash specific fields:
+    - draft_vocab_size: Size of the draft model's vocabulary
+    - target_hidden_size: Hidden size of the target model
+    - mask_token_id (required): Token ID used for parallel drafting mask
+        placeholders
+    - aux_hidden_state_layer_ids (required): Layer indices from the target
+        model whose intermediate hidden states are used as context for the
+        DFlash drafter. Mapped to both eagle_aux_hidden_state_layer_ids
+        (for gpu_model_runner) and dflash_config.target_layer_ids (for the
+        DFlash model).
+    """
+    pre_trained_config["architectures"] = ["DFlashDraftModel"]
+    pre_trained_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
+    if config_dict.get("target_hidden_size") is not None:
+        pre_trained_config["target_hidden_size"] = config_dict["target_hidden_size"]
+
+    aux_layer_ids = config_dict["aux_hidden_state_layer_ids"]
+    pre_trained_config["eagle_aux_hidden_state_layer_ids"] = aux_layer_ids
+
+    # DFlash configs use different indexing for the target layers, see #40727
+    pre_trained_config["dflash_config"] = {
+        "mask_token_id": config_dict["mask_token_id"],
+        "target_layer_ids": [i - 1 for i in aux_layer_ids],
+    }
diff --git a/vllm/transformers_utils/configs/speculators/base.py b/vllm/transformers_utils/configs/speculators/base.py
index 697c9d52e81b..f09173bcb9a0 100644
--- a/vllm/transformers_utils/configs/speculators/base.py
+++ b/vllm/transformers_utils/configs/speculators/base.py
@@ -131,7 +131,10 @@ def build_vllm_speculative_config(
             )
 
         # Build base vLLM speculative configuration
-        return {
+        result = {
             "method": config_dict.get("speculators_model_type"),
             "num_speculative_tokens": num_speculative_tokens,
         }
+        if result["method"] == "peagle":
+            result.update({"method": "eagle3", "parallel_drafting": True})
+        return result
diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py
index 3faa5ee60e9f..7708378ee13b 100644
--- a/vllm/transformers_utils/gguf_utils.py
+++ b/vllm/transformers_utils/gguf_utils.py
@@ -40,15 +40,48 @@ def check_gguf_file(model: str | PathLike) -> bool:
 
 @cache
 def is_remote_gguf(model: str | Path) -> bool:
-    """Check if the model is a remote GGUF model."""
+    """Check if the model is a remote GGUF model.
+
+    Recognizes two forms:
+    1. Standard: ``repo_id:quant_type`` where *quant_type* is a known
+       GGML quantization type (e.g. ``Q4_K_M``).
+    2. Non-standard: ``repo_id:quant_type`` where *quant_type* contains
+       a known GGML type with extra prefixes (e.g. ``UD-Q4_K_XL``).
+       A warning is logged and actual file existence is validated later
+       during download.
+    """
     pattern = r"^[a-zA-Z0-9][a-zA-Z0-9._-]*/[a-zA-Z0-9][a-zA-Z0-9._-]*:[A-Za-z0-9_+-]+$"
     model = str(model)
     if re.fullmatch(pattern, model):
         _, quant_type = model.rsplit(":", 1)
-        return is_valid_gguf_quant_type(quant_type)
+        if is_valid_gguf_quant_type(quant_type):
+            return True
+        if is_nonstandard_gguf_quant_type(quant_type):
+            logger.warning(
+                "Non-standard GGUF quant type '%s' detected.",
+                quant_type,
+            )
+            return True
     return False
 
 
+def is_nonstandard_gguf_quant_type(quant_type: str) -> bool:
+    """Check if a non-standard quant type contains a known GGML type.
+
+    Splits the quant type by the last ``-`` and checks whether the
+    trailing part is a standard GGML type.  For example::
+
+        UD-Q4_K_XL      → rsplit → ["UD", "Q4_K_XL"]      → Q4_K_XL valid ✓
+        UD-IQ4_NL       → rsplit → ["UD", "IQ4_NL"]       → IQ4_NL  valid ✓
+        Custom-UD-Q4_K  → rsplit → ["Custom-UD", "Q4_K"]  → Q4_K    valid ✓
+        RANDOM          → no "-" → False
+    """
+    if "-" not in quant_type:
+        return False
+    _, remainder = quant_type.rsplit("-", 1)
+    return is_valid_gguf_quant_type(remainder)
+
+
 # Common suffixes used in GGUF file naming conventions
 # e.g., Q4_K_M, Q3_K_S, Q5_K_L, Q2_K_XL
 _GGUF_QUANT_SUFFIXES = ("_M", "_S", "_L", "_XL", "_XS", "_XXS")
@@ -84,7 +117,9 @@ def split_remote_gguf(model: str | Path) -> tuple[str, str]:
         f"Wrong GGUF model or invalid GGUF quant type: {model}.\n"
         "- It should be in repo_id:quant_type format.\n"
         f"- Valid base quant types: {GGMLQuantizationType._member_names_}\n"
-        f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}",
+        f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}\n"
+        "- Non-standard GGUF quant types also supported: "
+        "dash-separated prefixes (e.g. UD-Q4_K_XL, Custom-Q8_0)",
     )
 
 
diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py
index 3229539e313a..35fa1313d1e7 100644
--- a/vllm/transformers_utils/model_arch_config_convertor.py
+++ b/vllm/transformers_utils/model_arch_config_convertor.py
@@ -47,6 +47,9 @@ def get_hidden_size(self) -> int:
 
     def get_head_size(self) -> int:
         if self.is_deepseek_mla():
+            # special case for deepseek_v4
+            if hasattr(self.hf_text_config, "compress_ratios"):
+                return self.hf_text_config.head_dim
             qk_rope_head_dim = getattr(self.hf_text_config, "qk_rope_head_dim", 0)
             if not envs.VLLM_MLA_DISABLE:
                 return self.hf_text_config.kv_lora_rank + qk_rope_head_dim
@@ -77,6 +80,8 @@ def get_total_num_kv_heads(self) -> int:
             "num_key_value_heads",
             # For ChatGLM:
             "multi_query_group_num",
+            # For Step3p5:
+            "num_attention_groups",
         ]
         # For non-grouped-query attention models, the number of KV heads is
         # equal to the number of attention heads.
@@ -220,6 +225,7 @@ def is_deepseek_mla(self) -> bool:
             "deepseek_v2",
             "deepseek_v3",
             "deepseek_v32",
+            "deepseek_v4",
             "deepseek_mtp",
             "glm_moe_dsa",
             "glm4_moe_lite",
@@ -231,7 +237,11 @@ def is_deepseek_mla(self) -> bool:
             "pangu_ultra_moe_mtp",
             "bailing_hybrid",
         ):
-            return getattr(self.hf_text_config, "kv_lora_rank", None) is not None
+            # check is deepseek_v4 model
+            if hasattr(self.hf_text_config, "compress_ratios"):
+                return getattr(self.hf_text_config, "head_dim", None) is not None
+            else:
+                return getattr(self.hf_text_config, "kv_lora_rank", None) is not None
         elif self.hf_text_config.model_type == "eagle":
             # if the model is an EAGLE module, check for the
             # underlying architecture
@@ -248,6 +258,23 @@ def is_deepseek_mla(self) -> bool:
             )
         return False
 
+    def is_mm_prefix_lm(self) -> bool:
+        """Whether to use bidirectional attention for mm positions."""
+        if hasattr(self.hf_config, "is_mm_prefix_lm"):
+            return bool(self.hf_config.is_mm_prefix_lm)
+        # fallback to list of known models
+        MM_PREFIX_LM_MODELS = (
+            "bagel",
+            "gemma3",
+            "molmo2",
+            "moondream3",
+            "paligemma",
+            "umm",
+        )
+        if not hasattr(self.hf_config, "model_type"):
+            return False
+        return self.hf_config.model_type in MM_PREFIX_LM_MODELS
+
     def derive_max_model_len_and_key(self) -> tuple[float, str | None]:
         derived_max_model_len = float("inf")
         possible_keys = [
@@ -297,6 +324,7 @@ def convert(self) -> ModelArchitectureConfig:
             num_experts=self.get_num_experts(),
             quantization_config=self.get_quantization_config(),
             is_deepseek_mla=self.is_deepseek_mla(),
+            is_mm_prefix_lm=self.is_mm_prefix_lm(),
             derived_max_model_len_and_key=self.derive_max_model_len_and_key(),
         )
 
@@ -324,6 +352,9 @@ def get_total_num_kv_heads(self) -> int:
         )
         return enc_num_kv_heads
 
+    def is_mm_prefix_lm(self) -> bool:
+        return False
+
 
 class MambaModelArchConfigConvertor(ModelArchConfigConvertorBase):
     def get_head_size(self) -> int:
@@ -418,6 +449,39 @@ def get_num_hidden_layers(self) -> int:
         return getattr(self.hf_text_config, "num_nextn_predict_layers", 0)
 
 
+def _strip_mimo_v2_attention_chunk_size(
+    hf_config: PretrainedConfig, hf_text_config: PretrainedConfig
+) -> None:
+    # MiMo-V2-Flash's config.json sets `attention_chunk_size=128` but the
+    # architecture does not actually use chunked local attention. Leaving it
+    # set makes vLLM disable the hybrid KV cache manager
+    for cfg in (hf_text_config, hf_config):
+        if cfg is not None and hasattr(cfg, "attention_chunk_size"):
+            delattr(cfg, "attention_chunk_size")
+
+
+class MimoV2ModelArchConfigConvertor(ModelArchConfigConvertorBase):
+    def __init__(self, hf_config: PretrainedConfig, hf_text_config: PretrainedConfig):
+        if getattr(hf_config, "vision_config", None):
+            hf_config.architectures = ["MiMoV2OmniForCausalLM"]
+        super().__init__(hf_config, hf_text_config)
+        _strip_mimo_v2_attention_chunk_size(hf_config, hf_text_config)
+
+
+class MimoV2MTPModelArchConfigConvertor(ModelArchConfigConvertorBase):
+    def __init__(self, hf_config: PretrainedConfig, hf_text_config: PretrainedConfig):
+        super().__init__(hf_config, hf_text_config)
+        _strip_mimo_v2_attention_chunk_size(hf_config, hf_text_config)
+
+    def get_num_hidden_layers(self) -> int:
+        n = getattr(self.hf_text_config, "num_nextn_predict_layers", None)
+        if n is not None:
+            return n
+        # Fall back to n_predict set by hf_config_override
+        n = getattr(self.hf_text_config, "n_predict", None)
+        return n if n is not None else 0
+
+
 class GLM4MoeMTPModelArchConfigConvertor(ModelArchConfigConvertorBase):
     def get_num_hidden_layers(self) -> int:
         return getattr(self.hf_text_config, "num_nextn_predict_layers", 0)
@@ -448,6 +512,34 @@ def get_num_hidden_layers(self) -> int:
         return getattr(self.hf_text_config, "num_nextn_predict_layers", 1)
 
 
+class Gemma4MTPModelArchConfigConvertor(ModelArchConfigConvertorBase):
+    def get_hidden_size(self) -> int:
+        # The speculator buffer must match the backbone (target) model's
+        # hidden dimension, not the draft model's smaller dimension.
+        return getattr(
+            self.hf_config, "backbone_hidden_size", super().get_hidden_size()
+        )
+
+    def get_num_hidden_layers(self) -> int:
+        return getattr(self.hf_text_config, "num_hidden_layers", 0)
+
+
+class Gemma4ModelArchConfigConvertor(ModelArchConfigConvertorBase):
+    def is_mm_prefix_lm(self) -> bool:
+        return (
+            getattr(self.hf_text_config, "use_bidirectional_attention", None)
+            == "vision"
+        )
+
+    def get_head_size(self) -> int:
+        # Gemma4 uses dual head dimensions: head_dim (sliding attention)
+        # and global_head_dim (full attention).  Return the largest so
+        # that attention backends allocate buffers large enough for both.
+        head_dim = getattr(self.hf_text_config, "head_dim", 0)
+        global_head_dim = getattr(self.hf_text_config, "global_head_dim", 0)
+        return max(head_dim, global_head_dim) or super().get_head_size()
+
+
 # hf_config.model_type -> convertor class
 MODEL_ARCH_CONFIG_CONVERTORS = {
     "cohere_asr": CohereAsrModelArchConfigConvertor,
@@ -459,6 +551,9 @@ def get_num_hidden_layers(self) -> int:
     "mpt": MPTModelArchConfigConvertor,
     "dbrx": DbrxModelArchConfigConvertor,
     "falcon": FalconModelArchConfigConvertor,
+    "gemma4": Gemma4ModelArchConfigConvertor,
+    "gemma4_text": Gemma4ModelArchConfigConvertor,
+    "gemma4_mtp": Gemma4MTPModelArchConfigConvertor,
     "RefinedWeb": FalconModelArchConfigConvertor,
     "RefinedWebModel": FalconModelArchConfigConvertor,
     "nemotron-nas": NemotronNasModelArchConfigConvertor,
@@ -466,6 +561,10 @@ def get_num_hidden_layers(self) -> int:
     "qwen3_next_mtp": Qwen3NextMTPModelArchConfigConvertor,
     "qwen3_5_mtp": Qwen3_5MTPModelArchConfigConvertor,
     "mimo_mtp": MimoMTPModelArchConfigConvertor,
+    "mimo_v2": MimoV2ModelArchConfigConvertor,
+    "mimo_v2_flash": MimoV2ModelArchConfigConvertor,
+    "mimo_v2_mtp": MimoV2MTPModelArchConfigConvertor,
+    "mimo_v2_omni_mtp": MimoV2MTPModelArchConfigConvertor,
     "glm4_moe_mtp": GLM4MoeMTPModelArchConfigConvertor,
     "glm_ocr_mtp": GLM4MoeMTPModelArchConfigConvertor,
     "ernie_mtp": ErnieMTPModelArchConfigConvertor,
diff --git a/vllm/transformers_utils/processors/__init__.py b/vllm/transformers_utils/processors/__init__.py
index d0994c257798..ba2872f89276 100644
--- a/vllm/transformers_utils/processors/__init__.py
+++ b/vllm/transformers_utils/processors/__init__.py
@@ -12,24 +12,30 @@
 
 __all__ = [
     "BagelProcessor",
+    "CheersProcessor",
     "CohereASRProcessor",
     "DeepseekVLV2Processor",
     "FireRedASR2Processor",
+    "FireRedLIDProcessor",
     "FunASRProcessor",
     "GLM4VProcessor",
+    "Granite4VisionProcessor",
     "H2OVLProcessor",
     "HunYuanVLProcessor",
     "HunYuanVLImageProcessor",
+    "Moondream3Processor",
     "InternVLProcessor",
     "IsaacProcessor",
     "KimiAudioProcessor",
     "KimiK25Processor",
+    "MiMoOmniProcessor",
     "MistralCommonPixtralProcessor",
     "MistralCommonVoxtralProcessor",
     "NanoNemotronVLProcessor",
     "NemotronVLProcessor",
     "LlamaNemotronVLEmbedProcessor",
     "NVLMProcessor",
+    "OpenVLAProcessor",
     "OvisProcessor",
     "Ovis2_5Processor",
     "QwenVLProcessor",
@@ -39,11 +45,14 @@
 
 _CLASS_TO_MODULE: dict[str, str] = {
     "BagelProcessor": "vllm.transformers_utils.processors.bagel",
+    "CheersProcessor": "vllm.transformers_utils.processors.cheers",
     "CohereASRProcessor": "vllm.transformers_utils.processors.cohere_asr",
     "DeepseekVLV2Processor": "vllm.transformers_utils.processors.deepseek_vl2",
     "FireRedASR2Processor": "vllm.transformers_utils.processors.fireredasr2",
+    "FireRedLIDProcessor": "vllm.transformers_utils.processors.fireredlid",
     "FunASRProcessor": "vllm.transformers_utils.processors.funasr",
     "GLM4VProcessor": "vllm.transformers_utils.processors.glm4v",
+    "Granite4VisionProcessor": "vllm.transformers_utils.processors.granite4_vision",
     "H2OVLProcessor": "vllm.transformers_utils.processors.h2ovl",
     "HunYuanVLProcessor": "vllm.transformers_utils.processors.hunyuan_vl",
     "HunYuanVLImageProcessor": "vllm.transformers_utils.processors.hunyuan_vl_image",
@@ -51,12 +60,15 @@
     "IsaacProcessor": "vllm.transformers_utils.processors.isaac",
     "KimiAudioProcessor": "vllm.transformers_utils.processors.kimi_audio",
     "KimiK25Processor": "vllm.transformers_utils.processors.kimi_k25",
+    "MiMoOmniProcessor": "vllm.transformers_utils.processors.mimo_v2_omni",
     "MistralCommonPixtralProcessor": "vllm.transformers_utils.processors.pixtral",
     "MistralCommonVoxtralProcessor": "vllm.transformers_utils.processors.voxtral",
+    "Moondream3Processor": "vllm.transformers_utils.processors.moondream3",
     "NanoNemotronVLProcessor": "vllm.transformers_utils.processors.nano_nemotron_vl",
     "NemotronVLProcessor": "vllm.transformers_utils.processors.nemotron_vl",
     "LlamaNemotronVLEmbedProcessor": "vllm.transformers_utils.processors.nemotron_vl",
     "NVLMProcessor": "vllm.transformers_utils.processors.nvlm_d",
+    "OpenVLAProcessor": "vllm.transformers_utils.processors.openvla",
     "OvisProcessor": "vllm.transformers_utils.processors.ovis",
     "Ovis2_5Processor": "vllm.transformers_utils.processors.ovis2_5",
     "QwenVLProcessor": "vllm.transformers_utils.processors.qwen_vl",
diff --git a/vllm/transformers_utils/processors/cheers.py b/vllm/transformers_utils/processors/cheers.py
new file mode 100644
index 000000000000..68eecbcffe76
--- /dev/null
+++ b/vllm/transformers_utils/processors/cheers.py
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Cheers (UMM) processor for image and text inputs."""
+
+from transformers import AutoProcessor
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.image_utils import ImageInput
+from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
+from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
+
+
+class CheersProcessorKwargs(ProcessingKwargs, total=False):  # type: ignore[call-arg]
+    _defaults = {
+        "images_kwargs": {
+            "return_tensors": "pt",
+        },
+    }
+
+
+class CheersProcessor(ProcessorMixin):
+    """
+    Constructs a Cheers processor which wraps a
+    SigLIP image processor and a Qwen2 tokenizer.
+    """
+
+    attributes = ["image_processor", "tokenizer"]
+    image_processor_class = "AutoImageProcessor"
+    tokenizer_class = "AutoTokenizer"
+
+    def __call__(
+        self,
+        text: TextInput
+        | PreTokenizedInput
+        | list[TextInput]
+        | list[PreTokenizedInput] = None,
+        images: ImageInput = None,
+        **kwargs: Unpack[CheersProcessorKwargs],
+    ):
+        output_kwargs = self._merge_kwargs(
+            CheersProcessorKwargs,
+            tokenizer_init_kwargs=self.tokenizer.init_kwargs,
+            **kwargs,
+        )
+
+        if images is not None:
+            import torch
+
+            if isinstance(images, (list, tuple)):
+                all_pv = []
+                all_ghw = []
+                for img in images:
+                    result = self.image_processor(img, **output_kwargs["images_kwargs"])
+                    all_pv.append(result["pixel_values"])
+                    if "grid_hws" in result:
+                        all_ghw.append(result["grid_hws"])
+                pixel_values = {
+                    "pixel_values": torch.cat(all_pv, dim=0),
+                }
+                if all_ghw:
+                    pixel_values["grid_hws"] = torch.cat(all_ghw, dim=0)
+            else:
+                pixel_values = self.image_processor(
+                    images, **output_kwargs["images_kwargs"]
+                )
+        else:
+            pixel_values = {}
+
+        text_inputs = (
+            self.tokenizer(text, **output_kwargs["text_kwargs"])
+            if text is not None
+            else {}
+        )
+
+        return BatchFeature(data={**pixel_values, **text_inputs})
+
+    def batch_decode(self, *args, **kwargs):
+        return self.tokenizer.batch_decode(*args, **kwargs)
+
+    def decode(self, *args, **kwargs):
+        return self.tokenizer.decode(*args, **kwargs)
+
+    @property
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        image_processor_input_names = self.image_processor.model_input_names
+        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+
+
+AutoProcessor.register("CheersProcessor", CheersProcessor)
diff --git a/vllm/transformers_utils/processors/cohere_asr.py b/vllm/transformers_utils/processors/cohere_asr.py
index f742074a4e3d..e1257de4e735 100644
--- a/vllm/transformers_utils/processors/cohere_asr.py
+++ b/vllm/transformers_utils/processors/cohere_asr.py
@@ -4,11 +4,11 @@
 import math
 import random
 
-import librosa
 import numpy as np
 import torch
 import torch.nn.functional as F
 from torch import nn
+from torchaudio.functional import melscale_fbanks
 from transformers import AutoFeatureExtractor, AutoProcessor, BatchFeature
 from transformers.feature_extraction_sequence_utils import (
     SequenceFeatureExtractor,
@@ -129,17 +129,15 @@ def __init__(
         self.pad_min_duration = 0.0
         self.pad_direction = "both"
 
-        filterbanks = torch.tensor(
-            librosa.filters.mel(
-                sr=sample_rate,
-                n_fft=self.n_fft,
-                n_mels=nfilt,
-                fmin=lowfreq,
-                fmax=highfreq,
-                norm=mel_norm,
-            ),
-            dtype=torch.float,
-        ).unsqueeze(0)
+        filterbanks = melscale_fbanks(
+            n_freqs=self.n_fft // 2 + 1,
+            f_min=lowfreq,
+            f_max=highfreq,
+            n_mels=nfilt,
+            sample_rate=sample_rate,
+            norm=mel_norm,
+            mel_scale="slaney",
+        ).T.unsqueeze(0)
         self.register_buffer("fb", filterbanks)
 
         # Calculate maximum sequence length
diff --git a/vllm/transformers_utils/processors/fireredlid.py b/vllm/transformers_utils/processors/fireredlid.py
new file mode 100644
index 000000000000..cb041397d036
--- /dev/null
+++ b/vllm/transformers_utils/processors/fireredlid.py
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+FireRedLID feature extractor and processor.
+
+The FeatureExtractor handles:
+  - Raw waveform → 80-dim log-mel filterbank (via kaldi_native_fbank)
+  - CMVN normalization (means / inverse_std_variences from preprocessor_config)
+  - Padding + length tracking
+
+The Processor wraps the FeatureExtractor and a tokenizer.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from transformers import (
+    AutoFeatureExtractor,
+    BatchFeature,
+)
+from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
+from transformers.processing_utils import ProcessorMixin
+from transformers.utils import TensorType
+
+from vllm.logger import init_logger
+from vllm.utils.import_utils import LazyLoader
+
+if TYPE_CHECKING:
+    import kaldi_native_fbank as knf
+else:
+    knf = LazyLoader("knf", globals(), "kaldi_native_fbank")
+
+
+logger = init_logger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Helpers (shared with FireRedASR2 processor)
+# ---------------------------------------------------------------------------
+
+
+class CMVN:
+    def __init__(self, dim, means, inverse_std_variences):
+        self.dim = dim
+        self.means = np.array(means)
+        self.inverse_std_variences = np.array(inverse_std_variences)
+
+    def __call__(self, x):
+        assert x.shape[-1] == self.dim, "CMVN dim mismatch"
+        out = x - self.means
+        out = out * self.inverse_std_variences
+        return out
+
+
+class KaldifeatFbank:
+    def __init__(
+        self,
+        num_mel_bins: int = 80,
+        frame_length: int = 25,
+        frame_shift: int = 10,
+        dither: float = 0.0,
+    ):
+        self.dither = dither
+        opts = knf.FbankOptions()
+        opts.frame_opts.dither = dither
+        opts.mel_opts.num_bins = num_mel_bins
+        opts.frame_opts.snip_edges = True
+        opts.mel_opts.debug_mel = False
+        self.opts = opts
+
+    def __call__(self, sample_rate, wav_np, is_train=False):
+        dither = self.dither if is_train else 0.0
+        self.opts.frame_opts.dither = dither
+        fbank = knf.OnlineFbank(self.opts)
+        fbank.accept_waveform(sample_rate, wav_np.tolist())
+        feat = []
+        for i in range(fbank.num_frames_ready):
+            feat.append(fbank.get_frame(i))
+        if len(feat) == 0:
+            return np.zeros((0, self.opts.mel_opts.num_bins))
+        return np.vstack(feat)
+
+
+# ---------------------------------------------------------------------------
+# Feature Extractor
+# ---------------------------------------------------------------------------
+
+
+class FireRedLIDFeatureExtractor(SequenceFeatureExtractor):
+    """
+    Extracts 80-dim log-mel filterbank features from raw waveforms,
+    applies CMVN, and returns padded feature tensors with lengths.
+
+    Also computes ``fake_token_lengths`` — the actual encoder output
+    length for each audio — so that vLLM can allocate the correct
+    number of cross-attention KV cache slots.
+    """
+
+    model_input_names = ["input_features"]
+
+    def __init__(
+        self,
+        feature_size=80,
+        sampling_rate=16000,
+        chunk_length=30,
+        padding_value=0.0,
+        return_attention_mask=False,
+        dim=80,
+        means=None,
+        inverse_std_variences=None,
+        num_mel_bins=80,
+        frame_length=25,
+        frame_shift=10,
+        dither=0.0,
+        left_context=3,
+        right_context=3,
+        **kwargs,
+    ):
+        super().__init__(
+            feature_size=feature_size,
+            sampling_rate=sampling_rate,
+            padding_value=padding_value,
+            return_attention_mask=return_attention_mask,
+            **kwargs,
+        )
+        self.chunk_length = chunk_length
+        self.dim = dim
+        self.means = means
+        self.inverse_std_variences = inverse_std_variences
+        self.num_mel_bins = num_mel_bins
+        self.frame_length = frame_length
+        self.frame_shift = frame_shift
+        self.dither = dither
+        self.sampling_rate = sampling_rate
+        self.context = left_context + 1 + right_context
+
+    def __call__(
+        self,
+        raw_speech: np.ndarray | list[float] | list[np.ndarray] | list[list[float]],
+        truncation: bool = True,
+        pad_to_multiple_of: int | None = None,
+        return_tensors: str | TensorType | None = None,
+        return_attention_mask: bool | None = None,
+        padding: str | None = "max_length",
+        max_length: int | None = None,
+        sampling_rate: int | None = None,
+        do_normalize: bool | None = None,
+        **kwargs,
+    ) -> BatchFeature:
+        if sampling_rate is not None and sampling_rate != self.sampling_rate:
+            raise ValueError(
+                f"FireRedLIDFeatureExtractor expects sampling_rate="
+                f"{self.sampling_rate}, got {sampling_rate}."
+            )
+
+        # Initialize helpers
+        cmvn = CMVN(self.dim, self.means, self.inverse_std_variences)
+        fbank = KaldifeatFbank(
+            num_mel_bins=self.num_mel_bins,
+            frame_length=self.frame_length,
+            frame_shift=self.frame_shift,
+            dither=self.dither,
+        )
+
+        def padding_position_is_0(padded_input, input_lengths):
+            N, T = padded_input.size()[:2]
+            mask = torch.ones((N, T)).to(padded_input.device)
+            for i in range(N):
+                mask[i, input_lengths[i] :] = 0
+            mask = mask.unsqueeze(dim=1)
+            return mask.to(torch.uint8)
+
+        feats = []
+        speech_lengths = []
+        fake_token_lengths = []
+
+        for speech in raw_speech:
+            # vLLM loads audio via librosa (float32 in [-1,1]),
+            # but kaldi_native_fbank expects int16-scale values.
+            speech_scaled = speech * 32768
+            feat = fbank(self.sampling_rate, speech_scaled)
+            feat = cmvn(feat)
+            feat = torch.from_numpy(feat).float()
+            length = feat.size(0)
+            feats.append(feat)
+            speech_lengths.append(length)
+
+            # Compute the actual Conv2dSubsampling output length.
+            # This mirrors the mask logic in Conv2dSubsampling.forward:
+            #   pad context frames, then mask[:, :, :-2:2][:, :, :-2:2].sum()
+            padded_input = F.pad(feat, (0, 0, 0, self.context - 1), "constant", 0.0)
+            src_mask = padding_position_is_0(
+                padded_input[None, :, :],
+                torch.tensor([length], dtype=torch.int32),
+            )
+            mask = src_mask[:, :, :-2:2][:, :, :-2:2]
+            enc_len = mask[:, -1, :].sum(dim=-1)
+            fake_token_len = torch.clamp(enc_len, min=1)
+            fake_token_lengths.append(fake_token_len)
+
+        if len(feats) == 0:
+            return BatchFeature()
+
+        # Pad to uniform length
+        max_feat_len = max(f.size(0) for f in feats)
+        padded = feats[0].new_zeros(len(feats), max_feat_len, feats[0].size(1))
+        for i, feat in enumerate(feats):
+            padded[i, : feat.size(0)] = feat
+
+        result = BatchFeature({"input_features": padded})
+
+        if return_tensors is not None:
+            result = result.convert_to_tensors(return_tensors)
+
+        result["speech_lengths"] = torch.tensor(speech_lengths, dtype=torch.long)
+        result["fake_token_lengths"] = torch.concat(fake_token_lengths)
+        return result
+
+
+# ---------------------------------------------------------------------------
+# Processor
+# ---------------------------------------------------------------------------
+
+
+class FireRedLIDProcessor(ProcessorMixin):
+    """
+    Wraps FireRedLIDFeatureExtractor + a tokenizer.
+    """
+
+    feature_extractor_class = "FireRedLIDFeatureExtractor"
+    tokenizer_class = ("PreTrainedTokenizer", "PreTrainedTokenizerFast")
+
+    def __init__(self, feature_extractor, tokenizer):
+        super().__init__(feature_extractor, tokenizer)
+        self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
+
+    def __call__(self, *args, **kwargs):
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        audio = kwargs.pop("audio", None)
+        sampling_rate = kwargs.pop("sampling_rate", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is not None:
+            inputs = self.feature_extractor(
+                audio, *args, sampling_rate=sampling_rate, **kwargs
+            )
+        else:
+            inputs = BatchFeature()
+
+        if text is not None:
+            if isinstance(text, str):
+                text = [text]
+            encodings = self.tokenizer(text, **kwargs)
+            if audio is not None:
+                inputs["labels"] = encodings["input_ids"]
+            else:
+                return encodings
+
+        return inputs
+
+
+# ---------------------------------------------------------------------------
+# Registration
+# ---------------------------------------------------------------------------
+
+AutoFeatureExtractor.register("FireRedLIDFeatureExtractor", FireRedLIDFeatureExtractor)
diff --git a/vllm/transformers_utils/processors/granite4_vision.py b/vllm/transformers_utils/processors/granite4_vision.py
new file mode 100644
index 000000000000..7972015adf9d
--- /dev/null
+++ b/vllm/transformers_utils/processors/granite4_vision.py
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from fractions import Fraction
+
+from transformers import LlavaNextProcessor
+from transformers.image_processing_utils import select_best_resolution
+
+
+class Granite4VisionProcessor(LlavaNextProcessor):
+    """Processor for Granite 4 Vision.
+
+    Extends LlavaNextProcessor to account for the Window Q-Former
+    downsampling when computing the number of image features.
+
+    This processor is needed because the granite4_vision processor type
+    is not yet in the transformers version pinned by vLLM.
+    """
+
+    model_type = "granite4_vision"
+
+    def __init__(
+        self,
+        image_processor=None,
+        tokenizer=None,
+        patch_size=None,
+        vision_feature_select_strategy=None,
+        chat_template=None,
+        image_token="<image>",
+        num_additional_image_tokens=0,
+        downsample_rate=None,
+        **kwargs,
+    ):
+        super().__init__(
+            image_processor=image_processor,
+            tokenizer=tokenizer,
+            patch_size=patch_size,
+            vision_feature_select_strategy=vision_feature_select_strategy,
+            chat_template=chat_template,
+            image_token=image_token,
+            num_additional_image_tokens=num_additional_image_tokens,
+        )
+        self.downsample_rate = downsample_rate
+
+    def _get_number_of_features(
+        self,
+        orig_height: int,
+        orig_width: int,
+        height: int,
+        width: int,
+    ) -> int:
+        image_grid_pinpoints = self.image_processor.image_grid_pinpoints
+
+        height_best_resolution, width_best_resolution = select_best_resolution(
+            [orig_height, orig_width], image_grid_pinpoints
+        )
+        scale_height = height_best_resolution // height
+        scale_width = width_best_resolution // width
+
+        patches_height = height // self.patch_size
+        patches_width = width // self.patch_size
+        if self.downsample_rate is not None:
+            ds_rate = Fraction(self.downsample_rate)
+            patches_height = int(patches_height * ds_rate)
+            patches_width = int(patches_width * ds_rate)
+
+        unpadded_features, newline_features = self._get_unpadded_features(
+            orig_height,
+            orig_width,
+            patches_height,
+            patches_width,
+            scale_height,
+            scale_width,
+        )
+        base_features = (
+            patches_height * patches_width + self.num_additional_image_tokens
+        )
+        return unpadded_features + newline_features + base_features
diff --git a/vllm/transformers_utils/processors/mimo_v2_omni.py b/vllm/transformers_utils/processors/mimo_v2_omni.py
new file mode 100644
index 000000000000..97df3184113e
--- /dev/null
+++ b/vllm/transformers_utils/processors/mimo_v2_omni.py
@@ -0,0 +1,1285 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# mypy: ignore-errors
+"""MiMo-Omni multimodal processor for vLLM.
+
+Ported from SGLang's MiMoV2OmniProcessor / MiMoVLProcessor implementations.
+"""
+
+import contextlib
+import copy
+import io
+import logging
+import math
+from collections import OrderedDict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass, field
+from io import BytesIO
+from typing import Any, Literal
+
+import numpy as np
+import regex as re
+import requests
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from transformers import BatchFeature, TensorType
+from transformers.processing_utils import ProcessorMixin
+
+try:
+    from torchcodec.decoders import AudioDecoder
+
+    _HAS_TORCHCODEC = True
+except ImportError:
+    AudioDecoder = None
+    _HAS_TORCHCODEC = False
+
+try:
+    import torchaudio
+    from torchaudio.transforms import MelSpectrogram as _MelSpectrogram
+
+    _HAS_TORCHAUDIO = True
+except ImportError:
+    torchaudio = None  # type: ignore[assignment]
+    _MelSpectrogram = None  # type: ignore[assignment,misc]
+    _HAS_TORCHAUDIO = False
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+_PIXEL_MEAN = [123.675, 116.28, 103.53]
+_PIXEL_STD = [58.395, 57.12, 57.375]
+_mean_std_cache: dict[str, tuple[torch.Tensor, torch.Tensor]] = {}
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ImageInput:
+    # PIL.Image | str (path/url/base64) | bytes | torch.Tensor (C,H,W)
+    image: Any
+    max_pixels: int | None = None
+    min_pixels: int | None = None
+
+
+@dataclass
+class VideoInput:
+    # tuple[frames_TCHW: torch.Tensor, timestamps_T: torch.Tensor]
+    video: Any
+    min_pixels: int | None = None
+    max_pixels: int | None = None
+    total_max_pixels: int | None = None
+    fps: float | None = None
+    num_frames: int | None = None
+    max_frames: int | None = None
+    min_frames: int | None = None
+    do_include_last_frame: bool | None = False
+    start_time: float | None = None
+    end_time: float | None = None
+    segment_type: Literal["individual", "partial"] = "individual"
+
+
+@dataclass
+class AudioInput:
+    # str (path/url/base64) | bytes | tuple[waveform_1D, sr]
+    # | np.ndarray | torch.Tensor (T,n_vq)
+    audio: Any
+
+
+@dataclass
+class VideoAudioInput:
+    video: Any  # same as VideoInput.video
+    audio: Any  # same as AudioInput.audio
+    min_pixels: int | None = None
+    max_pixels: int | None = None
+    total_max_pixels: int | None = None
+    fps: float | None = None
+    num_frames: int | None = None
+    max_frames: int | None = None
+    min_frames: int | None = None
+    do_include_last_frame: bool | None = False
+    start_time: float | None = None
+    end_time: float | None = None
+    segment_type: Literal["individual", "partial"] = "individual"
+
+
+@dataclass
+class Content:
+    type: Literal["text", "image", "video", "audio", "video_audio"]
+    content: Any
+    is_target: bool | None = None
+
+
+@dataclass
+class MiMoVLInputSample:
+    input_ids: torch.Tensor
+    labels: torch.Tensor | None
+    pixel_values: list[torch.Tensor]
+    pixel_values_videos: list[torch.Tensor]
+    image_thw_grids: list[torch.Tensor]
+    video_thw_grids: list[torch.Tensor]
+    audio_inputs: list[torch.Tensor]
+    second_per_grid_ts: list[float] = field(default_factory=list)
+    video_start_times: list[float] = field(default_factory=list)
+    audio_token_lens: list[int] = field(default_factory=list)
+    va_audio_inputs: list[torch.Tensor] = field(default_factory=list)
+    video_audio_n_segs: list[int] = field(default_factory=list)
+    video_audio_seg_lens: list[int] = field(default_factory=list)
+    position_ids: torch.Tensor | None = None
+    rope_deltas: torch.Tensor | None = None
+    extra: dict = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# Vision utilities
+# ---------------------------------------------------------------------------
+
+
+def _format_timestamp(ts: float) -> str:
+    return f"{int(ts // 60):02d}:{int(ts % 60):02d}"
+
+
+def _smart_resize(
+    h: int, w: int, factor: int, min_px: int, max_px: int
+) -> tuple[int, int]:
+    if min(h, w) < factor:
+        if h < w:
+            h, w = factor, int(w * factor / h)
+        else:
+            w, h = factor, int(h * factor / w)
+    elif max(h, w) / min(h, w) > 200:
+        raise ValueError(f"Aspect ratio > 200 not allowed: {h}x{w}")
+    h_bar = round(h / factor) * factor
+    w_bar = round(w / factor) * factor
+    if h_bar * w_bar > max_px:
+        beta = math.sqrt((h * w) / max_px)
+        h_bar = math.floor(h / beta / factor) * factor
+        w_bar = math.floor(w / beta / factor) * factor
+    elif h_bar * w_bar < min_px:
+        beta = math.sqrt(min_px / (h * w))
+        h_bar = math.ceil(h * beta / factor) * factor
+        w_bar = math.ceil(w * beta / factor) * factor
+    return int(h_bar), int(w_bar)
+
+
+def _to_rgb(img: Image.Image) -> Image.Image:
+    if img.mode == "RGBA":
+        bg = Image.new("RGB", img.size, (255, 255, 255))
+        bg.paste(img, mask=img.split()[3])
+        return bg
+    return img.convert("RGB")
+
+
+def _standardize(images: torch.Tensor) -> torch.Tensor:
+    key = str(images.device)
+    if key not in _mean_std_cache:
+        mean = torch.tensor(_PIXEL_MEAN, device=images.device).view(1, -1, 1, 1)
+        std = torch.tensor(_PIXEL_STD, device=images.device).view(1, -1, 1, 1)
+        _mean_std_cache[key] = (mean, std)
+    mean, std = _mean_std_cache[key]
+    return (images - mean) / std
+
+
+def _transform_batch(
+    frames: torch.Tensor,
+    factor: int,
+    min_px: int,
+    max_px: int,
+    device: torch.device | None = None,
+) -> tuple[torch.Tensor, int, int]:
+    if device is not None:
+        frames = frames.to(device)
+    _, _, h, w = frames.shape
+    h_bar, w_bar = _smart_resize(h, w, factor, min_px, max_px)
+    resized = F.interpolate(
+        frames.float(), (h_bar, w_bar), mode="bilinear", align_corners=False
+    )
+    return _standardize(resized), w_bar, h_bar
+
+
+def _transform_single(
+    img: Any,
+    factor: int,
+    min_px: int,
+    max_px: int,
+    device: torch.device | None = None,
+) -> tuple[torch.Tensor, int, int]:
+    if isinstance(img, torch.Tensor):
+        t = img.float()
+        _, h, w = t.shape
+    elif isinstance(img, Image.Image):
+        img = img.convert("RGB")
+        w, h = img.size
+        t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float()
+    else:
+        raise TypeError(f"Expected Tensor or PIL.Image, got {type(img)}")
+    if device is not None:
+        t = t.to(device)
+    h_bar, w_bar = _smart_resize(h, w, factor, min_px, max_px)
+    out = F.interpolate(
+        t.unsqueeze(0), (h_bar, w_bar), mode="bilinear", align_corners=False
+    )
+    return _standardize(out).squeeze(0), w_bar, h_bar
+
+
+def _fetch_image(src: Any) -> Image.Image:
+    if isinstance(src, Image.Image):
+        return _to_rgb(src)
+    if isinstance(src, bytes):
+        return _to_rgb(copy.deepcopy(Image.open(BytesIO(src))))
+    if isinstance(src, str):
+        if src.startswith(("http://", "https://")):
+            r = requests.get(src, timeout=30)
+            r.raise_for_status()
+            return _to_rgb(copy.deepcopy(Image.open(BytesIO(r.content))))
+        if src.startswith("file://"):
+            return _to_rgb(Image.open(src[7:]))
+        if src.startswith("data:image"):
+            import pybase64 as _b64
+
+            _, b64 = src.split("base64,", 1)
+            return _to_rgb(copy.deepcopy(Image.open(BytesIO(_b64.b64decode(b64)))))
+        return _to_rgb(Image.open(src))
+    raise ValueError(f"Unrecognized image source: {type(src)}")
+
+
+# ---------------------------------------------------------------------------
+# Core processor
+# ---------------------------------------------------------------------------
+
+
+class MiMoVLProcessor:
+    """Core MiMo-VL multimodal processor.
+
+    Handles image/video/audio preprocessing and token sequence construction.
+    Ported from SGLang's MiMoVLProcessor.
+    """
+
+    def __init__(
+        self,
+        tokenizer: Any,
+        patch_size: int = 14,
+        merge_size: int = 2,
+        temporal_patch_size: int = 2,
+        temporal_compression_ratio: int = 1,
+        use_video_timestamps: bool = True,
+        video_audio_interleave_length: int = 0,
+        audio_kernel_size: int = 3,
+        audio_stride_size: int = 2,
+        audio_avg_pooler: int = 2,
+        audio_sampling_rate: int = 24000,
+        audio_nfft: int = 960,
+        audio_hop_length: int = 240,
+        audio_window_size: int = 960,
+        audio_fmin: float = 0.0,
+        audio_fmax: float | None = None,
+        audio_n_mels: int = 128,
+        audio_segment_size: int = 6000,
+        audio_channels: int = 8,
+        audio_group_size: int = 4,
+        audio_input_id_per_second: float = 25.0,
+        audio_zeroemb_idx: int = 4096,
+        image_min_pixels: int | None = None,
+        image_max_pixels: int | None = None,
+        video_min_pixels: int | None = None,
+        video_max_pixels: int | None = None,
+        video_total_max_pixels: int | None = None,
+        fps: float | None = None,
+        num_frames: int | None = None,
+        max_frames: int | None = None,
+        min_frames: int | None = None,
+        image_token_id: int | None = None,
+        video_token_id: int | None = None,
+        audio_token_id: int | None = None,
+        vision_start_token_id: int | None = None,
+        vision_end_token_id: int | None = None,
+        audio_start_token_id: int | None = None,
+        audio_end_token_id: int | None = None,
+        video_start_token_id: int | None = None,
+        video_end_token_id: int | None = None,
+        pad_token_id: int | None = None,
+        rope_type: str = "rope",
+        video_process_num_threads: int = 16,
+        device: Any | None = None,
+        **kwargs: Any,
+    ) -> None:
+        self.tokenizer = tokenizer
+        self.video_process_num_threads = video_process_num_threads
+        self.device = torch.device(device) if isinstance(device, str) else device
+
+        self.rope_type = "rope" if rope_type == "1d" else rope_type
+        assert self.rope_type in ("rope", "mrope"), (
+            f"Unknown rope_type: {self.rope_type}"
+        )
+
+        # video timestamps require 1-D rope
+        assert use_video_timestamps, "use_video_timestamps must be True"
+        assert self.rope_type == "rope", (
+            "use_video_timestamps requires rope_type='rope'"
+        )
+        self.use_video_timestamps = use_video_timestamps
+        self.video_audio_interleave_length = video_audio_interleave_length
+
+        self.image_token_id = image_token_id
+        self.video_token_id = video_token_id
+        self.audio_token_id = audio_token_id
+        self.vision_start_token_id = vision_start_token_id
+        self.vision_end_token_id = vision_end_token_id
+        self.audio_start_token_id = audio_start_token_id
+        self.audio_end_token_id = audio_end_token_id
+        self.video_start_token_id = video_start_token_id
+        self.video_end_token_id = video_end_token_id
+        self.pad_token_id = pad_token_id
+
+        self.patch_size = patch_size
+        self.merge_size = merge_size
+        self.temporal_patch_size = temporal_patch_size
+        self.temporal_compression_ratio = temporal_compression_ratio
+
+        self.audio_sampling_rate = audio_sampling_rate
+        self.audio_nfft = audio_nfft
+        self.audio_hop_length = audio_hop_length
+        self.audio_window_size = audio_window_size
+        self.audio_fmin = audio_fmin
+        self.audio_fmax = audio_fmax
+        self.audio_n_mels = audio_n_mels
+        self.audio_segment_size = audio_segment_size
+        self.audio_kernel_size = audio_kernel_size
+        self.audio_stride_size = audio_stride_size
+        self.audio_avg_pooler = audio_avg_pooler
+        self.audio_channels = audio_channels
+        self.audio_group_size = audio_group_size
+        self.audio_input_id_per_second = audio_input_id_per_second
+
+        self._mel_spec_kwargs = dict(
+            sample_rate=audio_sampling_rate,
+            n_fft=audio_nfft,
+            hop_length=audio_hop_length,
+            win_length=audio_window_size,
+            f_min=audio_fmin,
+            f_max=audio_fmax,
+            n_mels=audio_n_mels,
+            power=1.0,
+            center=True,
+        )
+        self._mel_spectrogram: Any | None = None
+        self._resamplers: OrderedDict = OrderedDict()
+        self._resamplers_max = 16
+
+        if isinstance(audio_zeroemb_idx, int):
+            self.audio_zeroemb_idxs = torch.tensor(
+                [audio_zeroemb_idx] * audio_channels, dtype=torch.int32
+            )
+        else:
+            self.audio_zeroemb_idxs = torch.tensor(audio_zeroemb_idx, dtype=torch.int32)
+
+        assert image_min_pixels is not None, "image_min_pixels must be set"
+        assert image_max_pixels is not None, "image_max_pixels must be set"
+        assert video_min_pixels is not None, "video_min_pixels must be set"
+        assert video_max_pixels is not None, "video_max_pixels must be set"
+        assert video_total_max_pixels is not None, "video_total_max_pixels must be set"
+        assert fps is not None or num_frames is not None, (
+            "fps or num_frames must be set"
+        )
+
+        self._img_kw = {"min_pixels": image_min_pixels, "max_pixels": image_max_pixels}
+        self._vid_kw = {
+            "min_pixels": video_min_pixels,
+            "max_pixels": video_max_pixels,
+            "total_max_pixels": video_total_max_pixels,
+            "fps": fps,
+            "num_frames": num_frames,
+            "max_frames": max_frames,
+            "min_frames": min_frames,
+        }
+
+    @property
+    def mel_spectrogram(self) -> Any:
+        if self._mel_spectrogram is None:
+            if _MelSpectrogram is None:
+                raise RuntimeError(
+                    "torchaudio is required for audio. "
+                    "Install with: pip install torchaudio"
+                )
+            self._mel_spectrogram = _MelSpectrogram(**self._mel_spec_kwargs)
+        return self._mel_spectrogram
+
+    def _resolve_img_kw(self, img: ImageInput) -> dict:
+        return {
+            "min_px": (
+                img.min_pixels
+                if img.min_pixels is not None
+                else self._img_kw["min_pixels"]
+            ),
+            "max_px": (
+                img.max_pixels
+                if img.max_pixels is not None
+                else self._img_kw["max_pixels"]
+            ),
+        }
+
+    def _resolve_vid_kw(self, vid: VideoInput) -> dict:
+        kw: dict = {}
+        for k in ("min_pixels", "max_pixels", "total_max_pixels"):
+            kw[k] = getattr(vid, k) or self._vid_kw[k]
+        if vid.num_frames is not None:
+            kw["num_frames"] = vid.num_frames
+        elif vid.fps is not None:
+            kw["fps"] = vid.fps
+            if vid.max_frames is not None:
+                kw["max_frames"] = vid.max_frames
+            if vid.min_frames is not None:
+                kw["min_frames"] = vid.min_frames
+        elif self._vid_kw["num_frames"] is not None:
+            kw["num_frames"] = self._vid_kw["num_frames"]
+        elif self._vid_kw["fps"] is not None:
+            kw["fps"] = self._vid_kw["fps"]
+            if self._vid_kw["max_frames"] is not None:
+                kw["max_frames"] = self._vid_kw["max_frames"]
+            if self._vid_kw["min_frames"] is not None:
+                kw["min_frames"] = self._vid_kw["min_frames"]
+        else:
+            raise ValueError(
+                "No video sampling strategy specified (fps or num_frames)."
+            )
+        return kw
+
+    def preprocess_audio(self, audio: Any) -> tuple[torch.Tensor, int]:
+        """Decode audio bytes/path/tuple → (mel_spec (T, n_mels), token_len)."""
+        if isinstance(audio, tuple):
+            waveform, original_sr = audio
+        else:
+            if AudioDecoder is None:
+                raise RuntimeError(
+                    "torchcodec is required for audio. "
+                    "Install with: pip install torchcodec"
+                )
+            if isinstance(audio, bytes):
+                file_obj: Any = io.BytesIO(audio)
+            elif isinstance(audio, str):
+                if audio.startswith("data:"):
+                    import pybase64 as _b64
+
+                    file_obj = io.BytesIO(_b64.b64decode(audio.split(",")[1]))
+                elif audio.startswith(("http://", "https://")):
+                    r = requests.get(audio, timeout=30)
+                    r.raise_for_status()
+                    file_obj = io.BytesIO(r.content)
+                else:
+                    file_obj = audio
+            else:
+                raise ValueError(f"Unsupported audio source type: {type(audio)}")
+            samples = AudioDecoder(file_obj).get_all_samples()
+            waveform = samples.data
+            original_sr = samples.sample_rate
+
+        if original_sr != self.audio_sampling_rate:
+            if original_sr not in self._resamplers:
+                if len(self._resamplers) >= self._resamplers_max:
+                    self._resamplers.popitem(last=False)
+                self._resamplers[original_sr] = torchaudio.transforms.Resample(
+                    orig_freq=original_sr, new_freq=self.audio_sampling_rate
+                )
+            self._resamplers.move_to_end(original_sr)
+            waveform = self._resamplers[original_sr](waveform)
+
+        if waveform.ndim == 2:
+            waveform = waveform.mean(dim=0)
+        spec = self.mel_spectrogram(waveform[None, :])
+        spec = torch.log(torch.clip(spec, min=1e-7)).squeeze().transpose(0, 1)
+
+        n = spec.shape[0]
+        n = n + 3 - self.audio_kernel_size
+        n = (n + 2 - self.audio_kernel_size) // self.audio_stride_size + 1
+        n = n // self.audio_avg_pooler + int(n % self.audio_avg_pooler != 0)
+        token_len = math.ceil(n / self.audio_group_size)
+        return spec, token_len
+
+    def process_image(self, image: ImageInput) -> torch.Tensor:
+        kw = self._resolve_img_kw(image)
+        src = image.image
+        if isinstance(src, (str, bytes)):
+            src = _fetch_image(src)
+        tensor, _, _ = _transform_single(
+            src,
+            factor=self.patch_size * self.merge_size,
+            device=self.device,
+            **kw,
+        )
+        return tensor
+
+    def process_video(
+        self, video_input: VideoInput
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+        kw = self._resolve_vid_kw(video_input)
+        video = video_input.video
+        if not isinstance(video, tuple):
+            raise ValueError(
+                f"video must be a (frames_TCHW, timestamps_T) tuple, "
+                f"got {type(video)}. "
+                "Decode the video before calling the processor."
+            )
+        frames, timestamps = video
+
+        fps = (
+            1.0
+            if len(timestamps) < 2
+            else float(1.0 / (float(timestamps[1]) - float(timestamps[0])))
+        )
+        start = (
+            video_input.start_time
+            if video_input.start_time is not None
+            else float(timestamps[0])
+        )
+        end = (
+            video_input.end_time
+            if video_input.end_time is not None
+            else float(timestamps[-1]) + 1.0 / fps
+        )
+
+        if video_input.segment_type != "individual":
+            mask = (timestamps >= start) & (timestamps < end)
+            idxs = torch.where(mask)[0]
+            if len(idxs) == 0:
+                idxs = torch.where(timestamps <= start)[0][-1:]
+            frames, timestamps = frames[idxs], timestamps[idxs]
+
+        tp = self.temporal_patch_size * self.temporal_compression_ratio
+        n = frames.shape[0]
+        total_px = kw["total_max_pixels"]
+        max_px = max(
+            kw["min_pixels"], min(total_px * tp // max(n, 1), kw["max_pixels"])
+        )
+
+        if n % tp != 0:
+            pad = tp - n % tp
+            frames = torch.cat(
+                [frames, frames[-1:].repeat(pad, *([1] * (frames.ndim - 1)))],
+                dim=0,
+            )
+            timestamps = torch.cat([timestamps, timestamps[-1:].repeat(pad)], dim=0)
+
+        transformed, _, _ = _transform_batch(
+            frames,
+            factor=self.patch_size * self.merge_size,
+            min_px=kw["min_pixels"],
+            max_px=max_px,
+            device=self.device,
+        )
+        patches, thw = self._flatten_visual(transformed, "video")
+        meta = {
+            "fps_sampled": fps,
+            "segment_start_time": start,
+            "segment_end_time": end,
+        }
+        return patches, thw, timestamps, meta
+
+    def process_audio(self, audio: AudioInput) -> Any:
+        src = audio.audio
+        if isinstance(src, np.ndarray):
+            src = (torch.from_numpy(src).float(), self.audio_sampling_rate)
+        if isinstance(src, (str, bytes, tuple)):
+            return self.preprocess_audio(src)
+        # Pre-tokenized tensor (T, n_vq)
+        assert isinstance(src, torch.Tensor) and src.ndim == 2
+        T = src.shape[0]
+        src = src[:, : self.audio_channels].to(torch.long)
+        pad_T = (
+            (T + self.audio_group_size - 1)
+            // self.audio_group_size
+            * self.audio_group_size
+        )
+        padding = (
+            torch.zeros(pad_T - T, self.audio_channels, dtype=torch.long) + src[-1]
+        )
+        src = torch.cat([src, padding], dim=0)
+        return src.reshape(
+            pad_T // self.audio_group_size, self.audio_group_size, self.audio_channels
+        )
+
+    def _flatten_visual(
+        self, visual: torch.Tensor, kind: str
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        if kind == "image":
+            h, w = visual.shape[-2:]
+            patches = visual.unsqueeze(0).repeat(self.temporal_patch_size, 1, 1, 1)
+        else:  # video / video_audio
+            temporal_stride = self.temporal_compression_ratio * self.temporal_patch_size
+            assert visual.shape[0] % temporal_stride == 0
+            patches = visual
+            h, w = patches.shape[-2:]
+
+        C = patches.shape[1]
+        grid_t = patches.shape[0] // self.temporal_patch_size
+        grid_h, grid_w = h // self.patch_size, w // self.patch_size
+
+        patches = (
+            patches.contiguous()
+            .view(
+                grid_t,
+                self.temporal_patch_size,
+                C,
+                grid_h // self.merge_size,
+                self.merge_size,
+                self.patch_size,
+                grid_w // self.merge_size,
+                self.merge_size,
+                self.patch_size,
+            )
+            .permute(0, 3, 6, 4, 7, 2, 1, 5, 8)
+            .contiguous()
+            .view(
+                grid_t * grid_h * grid_w,
+                C * self.temporal_patch_size * self.patch_size * self.patch_size,
+            )
+        )
+        thw = torch.tensor([grid_t, grid_h, grid_w], dtype=torch.int32)
+        return patches, thw
+
+    def process(
+        self, contents: list[Content], verbose: bool = False
+    ) -> MiMoVLInputSample:
+        input_ids: list[int] = []
+        labels: list[int] = []
+        img_pv: list[torch.Tensor] = []
+        img_grids: list[torch.Tensor] = []
+        vid_pv: list[torch.Tensor] = []
+        vid_grids: list[torch.Tensor] = []
+        audio_inputs: list[torch.Tensor] = []
+        is_audio_tokenized: list[bool] = []
+        audio_token_lens: list[int] = []
+        second_per_grid_ts: list[float] = []
+        video_start_times: list[float] = []
+        va_audio_inputs: list[torch.Tensor] = []
+        video_audio_n_segs: list[int] = []
+        video_audio_seg_lens: list[int] = []
+
+        # Pre-decode videos in parallel
+        vid_info = [
+            (i, c.content, c.type == "video_audio")
+            for i, c in enumerate(contents)
+            if c.type in ("video", "video_audio")
+        ]
+        vid_results: dict[int, tuple] = {}
+        if vid_info:
+            n_t = min(self.video_process_num_threads, len(vid_info))
+            if n_t > 1 and len(vid_info) > 1:
+                with ThreadPoolExecutor(max_workers=n_t) as ex:
+                    fut_map = {
+                        ex.submit(self.process_video, vi): idx
+                        for idx, vi, _ in vid_info
+                    }
+                    for fut in as_completed(fut_map):
+                        vid_results[fut_map[fut]] = fut.result()
+            else:
+                for idx, vi, _ in vid_info:
+                    vid_results[idx] = self.process_video(vi)
+
+        for ci, content in enumerate(contents):
+            _ids: list[int] = []
+            _lbls: list[int] | None = None
+
+            if content.type == "text":
+                _ids = (
+                    self.tokenizer.encode(content.content)
+                    if isinstance(content.content, str)
+                    else list(content.content)
+                )
+                if content.is_target:
+                    _lbls = _ids
+
+            elif content.type == "image":
+                tensor = self.process_image(content.content)
+                patches, thw = self._flatten_visual(tensor, "image")
+                t, h, w = thw.tolist()
+                n_tok = (t * h * w) // (self.merge_size**2)
+                img_pv.append(patches)
+                img_grids.append(thw)
+                _ids = (
+                    [self.vision_start_token_id]
+                    + [self.image_token_id] * n_tok
+                    + [self.vision_end_token_id]
+                )
+
+            elif content.type == "video":
+                patches, thw, ts, meta = vid_results[ci]
+                t, h, w = thw.tolist()
+                n_per_grid = h * w // (self.merge_size**2)
+                vid_pv.append(patches)
+                vid_grids.append(thw)
+                second_per_grid_ts.append(
+                    self.temporal_patch_size / meta["fps_sampled"]
+                )
+                video_start_times.append(float(ts[0]))
+                video_audio_n_segs.append(0)
+
+                stride = self.temporal_patch_size * self.temporal_compression_ratio
+                ts_texts = [_format_timestamp(float(x)) for x in ts[::stride]]
+                ts_ids_list = [self.tokenizer.encode(s) for s in ts_texts]
+
+                _ids = [self.video_start_token_id]
+                for ts_ids in ts_ids_list:
+                    _ids += (
+                        ts_ids
+                        + [self.vision_start_token_id]
+                        + [self.video_token_id] * n_per_grid
+                        + [self.vision_end_token_id]
+                    )
+                _ids += [self.video_end_token_id]
+
+            elif content.type == "audio":
+                processed = self.process_audio(content.content)
+                if isinstance(processed, tuple):
+                    is_audio_tokenized.append(False)
+                    spec, tok_len = processed
+                    audio_inputs.append(spec)
+                else:
+                    is_audio_tokenized.append(True)
+                    tok_len = processed.shape[0]
+                    audio_inputs.append(processed)
+                audio_token_lens.append(tok_len)
+                _ids = (
+                    [self.audio_start_token_id]
+                    + [self.audio_token_id] * tok_len
+                    + [self.audio_end_token_id]
+                )
+
+            elif content.type == "video_audio":
+                patches, thw, ts, meta = vid_results[ci]
+                second_per_grid_ts.append(
+                    self.temporal_patch_size / meta["fps_sampled"]
+                )
+                video_start_times.append(float(ts[0]))
+                processed_audio = self.process_audio(content.content)
+                tok_per_sec = self.audio_input_id_per_second / self.audio_group_size
+
+                t, h, w = thw.tolist()
+                vid_pv.append(patches)
+                vid_grids.append(thw)
+
+                if isinstance(processed_audio, tuple):
+                    # Mel spec (not pre-tokenized): store in va_audio_inputs separately
+                    spec, total_atok = processed_audio
+                    va_audio_inputs.append(spec)
+                    _va_is_tokenized = False
+                else:
+                    # Pre-tokenized: not expected in vLLM, but handle defensively
+                    total_atok = processed_audio.shape[0]
+                    _va_is_tokenized = True
+
+                n_per_grid = h * w // (self.merge_size**2)
+                stride = self.temporal_patch_size * self.temporal_compression_ratio
+                grid_ts = ts[::stride]
+                ts_texts = [_format_timestamp(float(x)) for x in grid_ts]
+                ts_ids_list = [self.tokenizer.encode(s) for s in ts_texts]
+
+                units: list[tuple] = []
+                for i in range(len(grid_ts)):
+                    a_start = int(float(grid_ts[i]) * tok_per_sec)
+                    a_end = (
+                        int(float(grid_ts[i + 1]) * tok_per_sec)
+                        if i < len(grid_ts) - 1
+                        else int(meta["segment_end_time"] * tok_per_sec)
+                    )
+                    seg_len = min(a_end, total_atok) - a_start
+                    assert seg_len > 0, f"Zero-length audio segment at grid index {i}"
+                    seg = (
+                        processed_audio[a_start : a_start + seg_len]
+                        if _va_is_tokenized
+                        else None
+                    )
+                    units.append(
+                        (
+                            float(grid_ts[i]),
+                            ts_texts[i],
+                            ts_ids_list[i],
+                            n_per_grid,
+                            seg_len,
+                            seg,
+                        )
+                    )
+
+                il = self.video_audio_interleave_length
+                if il == -1:
+                    groups: list[list] = [list(enumerate(units))]
+                elif il == 0:
+                    groups = [[(i, u)] for i, u in enumerate(units)]
+                else:
+                    groups, cur, t_ptr = [], [], 0.0
+                    for i, u in enumerate(units):
+                        while u[0] >= t_ptr + il:
+                            if cur:
+                                groups.append(cur)
+                                cur = []
+                            t_ptr += il
+                        cur.append((i, u))
+                    if cur:
+                        groups.append(cur)
+
+                # Track n_segs (= num groups) and per-group audio token counts
+                video_audio_n_segs.append(len(groups))
+                for group in groups:
+                    group_seg_len = sum(u[4] for _, u in group)
+                    video_audio_seg_lens.append(group_seg_len)
+
+                _ids = [self.video_start_token_id]
+                for group in groups:
+                    _ids += group[0][1][2]  # first-unit timestamp token ids
+                    _vid_tok: list[int] = []
+                    _aud_tok: list[int] = []
+                    for _, u in group:
+                        _, _, _, vid_n, seg_n, seg_audio = u
+                        _vid_tok += (
+                            [self.vision_start_token_id]
+                            + [self.video_token_id] * vid_n
+                            + [self.vision_end_token_id]
+                        )
+                        _aud_tok += [self.audio_token_id] * seg_n
+                        if seg_audio is not None:
+                            # Pre-tokenized per-frame segments (rare in vLLM)
+                            audio_inputs.append(seg_audio)
+                    _ids += (
+                        _vid_tok
+                        + [self.audio_start_token_id]
+                        + _aud_tok
+                        + [self.audio_end_token_id]
+                    )
+                _ids += [self.video_end_token_id]
+
+            input_ids.extend(_ids)
+            labels.extend(
+                _lbls if _lbls is not None else [self.pad_token_id] * len(_ids)
+            )
+
+        ids_t = torch.tensor(input_ids)
+        lbl_arr = np.roll(labels, shift=-1)
+        lbl_arr[-1] = self.pad_token_id
+        lbl_t = torch.tensor(lbl_arr)
+
+        extra: dict = {}
+        if is_audio_tokenized:
+            assert all(is_audio_tokenized) or not any(is_audio_tokenized)
+            extra["is_audio_tokenized"] = is_audio_tokenized[0]
+
+        position_ids = torch.arange(ids_t.shape[0]).expand(3, -1)
+        rope_deltas = torch.zeros((1, 1), dtype=torch.int32)
+
+        return MiMoVLInputSample(
+            input_ids=ids_t,
+            labels=lbl_t,
+            pixel_values=img_pv,
+            pixel_values_videos=vid_pv,
+            image_thw_grids=img_grids,
+            video_thw_grids=vid_grids,
+            audio_inputs=audio_inputs,
+            second_per_grid_ts=second_per_grid_ts,
+            video_start_times=video_start_times,
+            audio_token_lens=audio_token_lens,
+            va_audio_inputs=va_audio_inputs,
+            video_audio_n_segs=video_audio_n_segs,
+            video_audio_seg_lens=video_audio_seg_lens,
+            position_ids=position_ids,
+            rope_deltas=rope_deltas,
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# vLLM ProcessorMixin wrapper
+# ---------------------------------------------------------------------------
+
+
+class MiMoOmniProcessor(ProcessorMixin):
+    """HuggingFace-compatible ProcessorMixin wrapper for MiMo-Omni.
+
+    Accepts PIL images, pre-decoded video tuples (frames_TCHW, timestamps_T),
+    and audio (file path / bytes / (waveform, sr) tuple / numpy array).
+    """
+
+    attributes = ["tokenizer"]
+    tokenizer_class = "AutoTokenizer"
+
+    # Single or multi-pad placeholders produced by the chat template / prior expansion
+    _IMG_RE = re.compile(r"<\|vision_start\|>(?:<\|image_pad\|>)+<\|vision_end\|>")
+    _VID_RE = re.compile(r"<\|vision_start\|>(?:<\|video_pad\|>)+<\|vision_end\|>")
+    _AUD_RE = re.compile(
+        r"<\|mimo_audio_start\|>(?:<\|audio_pad\|>)+<\|mimo_audio_end\|>"
+    )
+
+    _MM_RE = re.compile(
+        r"(<\|vision_start\|>(?:<\|image_pad\|>)+<\|vision_end\|>"
+        r"|<\|vision_start\|>(?:<\|video_pad\|>)+<\|vision_end\|>"
+        r"|<\|mimo_audio_start\|>(?:<\|audio_pad\|>)+<\|mimo_audio_end\|>)"
+    )
+
+    def __init__(
+        self,
+        tokenizer: Any,
+        *,
+        patch_size: int = 14,
+        merge_size: int = 2,
+        temporal_patch_size: int = 2,
+        temporal_compression_ratio: int = 1,
+        image_min_pixels: int | None = None,
+        image_max_pixels: int | None = None,
+        video_min_pixels: int | None = None,
+        video_max_pixels: int | None = None,
+        video_total_max_pixels: int | None = None,
+        fps: float = 2.0,
+        num_frames: int | None = None,
+        max_frames: int = 256,
+        min_frames: int = 8,
+        video_audio_interleave_length: int = 0,
+        audio_sampling_rate: int = 24000,
+        audio_nfft: int = 960,
+        audio_hop_length: int = 240,
+        audio_window_size: int = 960,
+        audio_fmin: float = 0.0,
+        audio_fmax: float | None = None,
+        audio_n_mels: int = 128,
+        audio_segment_size: int = 6000,
+        audio_kernel_size: int = 3,
+        audio_stride_size: int = 2,
+        audio_avg_pooler: int = 2,
+        audio_channels: int = 8,
+        audio_group_size: int = 4,
+        audio_input_id_per_second: float = 25.0,
+        audio_zeroemb_idx: int = 4096,
+        image_token_id: int | None = None,
+        video_token_id: int | None = None,
+        audio_token_id: int | None = None,
+        vision_start_token_id: int | None = None,
+        vision_end_token_id: int | None = None,
+        audio_start_token_id: int | None = None,
+        audio_end_token_id: int | None = None,
+        video_start_token_id: int | None = None,
+        video_end_token_id: int | None = None,
+        rope_type: str = "rope",
+    ) -> None:
+        self.tokenizer = tokenizer
+
+        unit = patch_size * merge_size
+        self.mimo_processor = MiMoVLProcessor(
+            tokenizer=tokenizer,
+            patch_size=patch_size,
+            merge_size=merge_size,
+            temporal_patch_size=temporal_patch_size,
+            temporal_compression_ratio=temporal_compression_ratio,
+            use_video_timestamps=True,
+            video_audio_interleave_length=video_audio_interleave_length,
+            audio_sampling_rate=audio_sampling_rate,
+            audio_nfft=audio_nfft,
+            audio_hop_length=audio_hop_length,
+            audio_window_size=audio_window_size,
+            audio_fmin=audio_fmin,
+            audio_fmax=audio_fmax,
+            audio_n_mels=audio_n_mels,
+            audio_segment_size=audio_segment_size,
+            audio_kernel_size=audio_kernel_size,
+            audio_stride_size=audio_stride_size,
+            audio_avg_pooler=audio_avg_pooler,
+            audio_channels=audio_channels,
+            audio_group_size=audio_group_size,
+            audio_input_id_per_second=audio_input_id_per_second,
+            audio_zeroemb_idx=audio_zeroemb_idx,
+            image_min_pixels=image_min_pixels or (4 * unit * unit),
+            image_max_pixels=image_max_pixels or (4096 * unit * unit),
+            video_min_pixels=video_min_pixels or (4 * unit * unit),
+            video_max_pixels=video_max_pixels or (4096 * unit * unit),
+            video_total_max_pixels=video_total_max_pixels or (16384 * unit * unit),
+            fps=fps,
+            num_frames=num_frames,
+            max_frames=max_frames,
+            min_frames=min_frames,
+            image_token_id=image_token_id,
+            video_token_id=video_token_id,
+            audio_token_id=audio_token_id,
+            vision_start_token_id=vision_start_token_id,
+            vision_end_token_id=vision_end_token_id,
+            audio_start_token_id=audio_start_token_id,
+            audio_end_token_id=audio_end_token_id,
+            video_start_token_id=video_start_token_id,
+            video_end_token_id=video_end_token_id,
+            pad_token_id=tokenizer.pad_token_id,
+            rope_type=rope_type,
+        )
+
+    @classmethod
+    def from_hf_config(cls, tokenizer: Any, hf_config: Any) -> "MiMoOmniProcessor":
+        """Convenience factory: instantiate directly from an HF model config object."""
+        vc = hf_config.vision_config
+        if isinstance(vc, dict):
+            patch_size = vc.get("patch_size", 14)
+            merge_size = vc.get("spatial_merge_size", 2)
+            temporal_patch_size = vc.get("temporal_patch_size", 2)
+        else:
+            patch_size = getattr(vc, "patch_size", 14)
+            merge_size = getattr(vc, "spatial_merge_size", 2)
+            temporal_patch_size = getattr(vc, "temporal_patch_size", 2)
+
+        pc: dict = getattr(hf_config, "processor_config", {}) or {}
+        ac = getattr(hf_config, "audio_config", None)
+        audio_sr: int | None = pc.get("audio_sampling_rate")
+        if audio_sr is None and ac is not None:
+            if isinstance(ac, dict):
+                audio_sr = ac.get("sampling_rate") or ac.get("sample_rate")
+            else:
+                audio_sr = getattr(ac, "sampling_rate", None) or getattr(
+                    ac, "sample_rate", None
+                )
+
+        rope_type = "rope"
+        rs = getattr(hf_config, "rope_scaling", None)
+        if rs and rs.get("type") == "default" and rs.get("mrope_section") is not None:
+            rope_type = "mrope"
+
+        unit = patch_size * merge_size
+        return cls(
+            tokenizer,
+            patch_size=patch_size,
+            merge_size=merge_size,
+            temporal_patch_size=temporal_patch_size,
+            image_min_pixels=pc.get("image_min_pixels") or (4 * unit * unit),
+            image_max_pixels=pc.get("image_max_pixels") or (4096 * unit * unit),
+            video_min_pixels=pc.get("video_min_pixels") or (4 * unit * unit),
+            video_max_pixels=pc.get("video_max_pixels") or (4096 * unit * unit),
+            video_total_max_pixels=(
+                pc.get("video_total_max_pixels") or (16384 * unit * unit)
+            ),
+            fps=pc.get("fps") or 2.0,
+            num_frames=pc.get("num_frames"),
+            max_frames=pc.get("max_frames") or 256,
+            min_frames=pc.get("min_frames") or 8,
+            video_audio_interleave_length=pc.get("video_audio_interleave_length", 0),
+            audio_sampling_rate=audio_sr or 24000,
+            image_token_id=pc.get("image_token_id"),
+            video_token_id=pc.get("video_token_id"),
+            audio_token_id=pc.get("audio_token_id"),
+            vision_start_token_id=pc.get("vision_start_token_id"),
+            vision_end_token_id=pc.get("vision_end_token_id"),
+            audio_start_token_id=pc.get("audio_start_token_id"),
+            audio_end_token_id=pc.get("audio_end_token_id"),
+            video_start_token_id=pc.get("video_start_token_id"),
+            video_end_token_id=pc.get("video_end_token_id"),
+            rope_type=rope_type,
+        )
+
+    @property
+    def image_token(self) -> str:
+        """Token string used as image placeholder (for vLLM integration)."""
+        return "<|image_pad|>"
+
+    @property
+    def video_token(self) -> str:
+        """Token string used as video placeholder (for vLLM integration)."""
+        return "<|video_pad|>"
+
+    @property
+    def image_processor(self) -> Any:
+        """Minimal image-processor-like object for vLLM processing-info compat."""
+        p = self.mimo_processor
+
+        class _ImageProcessor:
+            merge_size = p.merge_size
+            size = {
+                "shortest_edge": p._img_kw["min_pixels"],
+                "longest_edge": p._img_kw["max_pixels"],
+            }
+
+        return _ImageProcessor()
+
+    def _modality(self, token: str) -> str:
+        if self._IMG_RE.fullmatch(token):
+            return "image"
+        if self._VID_RE.fullmatch(token):
+            return "video"
+        if self._AUD_RE.fullmatch(token):
+            return "audio"
+        return "unknown"
+
+    def __call__(
+        self,
+        text: str | list[str] | None = None,
+        images: Any = None,
+        videos: Any = None,
+        audio: Any = None,
+        video_audio: Any = None,
+        return_tensors: str | TensorType | None = None,
+        **kwargs: Any,
+    ) -> BatchFeature:
+        """Process multimodal inputs into model-ready tensors.
+
+        Args:
+            text: Prompt string(s) containing multimodal placeholders
+                  ``<|vision_start|><|image_pad|><|vision_end|>``,
+                  ``<|vision_start|><|video_pad|><|vision_end|>``, or
+                  ``<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>``.
+            images: PIL.Image or list[PIL.Image].
+            videos: list of ``(frames_TCHW: torch.Tensor, timestamps_T: torch.Tensor)``
+                    tuples (pre-decoded).
+            audio: list of ``str`` (path/url/base64), ``bytes``,
+                   ``(waveform_1D, sample_rate)`` tuples, or ``np.ndarray``.
+            return_tensors: Passed to :class:`BatchFeature`.
+
+        Returns:
+            :class:`BatchFeature` with keys:
+            - ``input_ids``
+            - ``pixel_values`` + ``image_grid_thw``
+            - ``pixel_values_videos`` + ``video_grid_thw`` + ``second_per_grid_ts``
+            - ``audio_features``
+        """
+        if isinstance(text, list):
+            text = text[0] if len(text) == 1 else "\n".join(text)
+
+        imgs: list = (
+            ([images] if isinstance(images, Image.Image) else list(images))
+            if images is not None
+            else []
+        )
+        vids: list = list(videos) if videos is not None else []
+        auds: list = list(audio) if audio is not None else []
+        va_items: list = list(video_audio) if video_audio is not None else []
+
+        # If audio exists but text has no audio placeholder, prepend one
+        _aud_placeholder = "<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>"
+        if auds and text is not None and not self._AUD_RE.search(text):
+            text = _aud_placeholder + text
+
+        # Build Content list
+        contents: list[Content] = []
+
+        if text and (imgs or vids or auds or va_items):
+            parts = self._MM_RE.split(text)
+            img_it = iter(imgs)
+            vid_it = iter(vids)
+            aud_it = iter(auds)
+            va_it = iter(va_items)
+            for part in parts:
+                if self._MM_RE.fullmatch(part):
+                    mod = self._modality(part)
+                    if mod == "image":
+                        with contextlib.suppress(StopIteration):
+                            contents.append(
+                                Content(
+                                    type="image",
+                                    content=ImageInput(image=next(img_it)),
+                                )
+                            )
+                    elif mod == "video":
+                        # Try regular video first, fall back to video_audio
+                        vid_item = None
+                        vid_type = "video"
+                        with contextlib.suppress(StopIteration):
+                            vid_item = next(vid_it)
+                        if vid_item is None:
+                            with contextlib.suppress(StopIteration):
+                                vid_item = next(va_it)
+                                vid_type = "video_audio"
+                        if vid_item is not None:
+                            if vid_type == "video":
+                                contents.append(
+                                    Content(
+                                        type="video",
+                                        content=VideoInput(video=vid_item),
+                                    )
+                                )
+                            else:
+                                contents.append(
+                                    Content(
+                                        type="video_audio",
+                                        content=vid_item,
+                                    )
+                                )
+                    elif mod == "audio":
+                        with contextlib.suppress(StopIteration):
+                            contents.append(
+                                Content(
+                                    type="audio",
+                                    content=AudioInput(audio=next(aud_it)),
+                                )
+                            )
+                elif part:
+                    contents.append(Content(type="text", content=part))
+        elif text:
+            contents.append(Content(type="text", content=text))
+        else:
+            for img in imgs:
+                contents.append(Content(type="image", content=ImageInput(image=img)))
+            for vid in vids:
+                contents.append(Content(type="video", content=VideoInput(video=vid)))
+            for aud in auds:
+                contents.append(Content(type="audio", content=AudioInput(audio=aud)))
+            for va_item in va_items:
+                contents.append(Content(type="video_audio", content=va_item))
+
+        if not contents:
+            ids = self.tokenizer(text or "", return_tensors=return_tensors)["input_ids"]
+            return BatchFeature(data={"input_ids": ids}, tensor_type=return_tensors)
+
+        sample = self.mimo_processor.process(contents, verbose=False)
+
+        # vLLM expects input_ids to have a batch dimension [1, seq_len].
+        data: dict = {"input_ids": sample.input_ids.unsqueeze(0)}
+
+        if sample.pixel_values:
+            data["pixel_values"] = torch.cat(sample.pixel_values, dim=0)
+            data["image_grid_thw"] = torch.stack(sample.image_thw_grids)
+
+        if sample.pixel_values_videos:
+            data["pixel_values_videos"] = torch.cat(sample.pixel_values_videos, dim=0)
+            data["video_grid_thw"] = torch.stack(sample.video_thw_grids)
+            if sample.second_per_grid_ts:
+                data["second_per_grid_ts"] = torch.tensor(
+                    sample.second_per_grid_ts, dtype=torch.float32
+                )
+            if sample.video_start_times:
+                data["video_start_times"] = torch.tensor(
+                    sample.video_start_times, dtype=torch.float32
+                )
+            if sample.video_audio_n_segs:
+                data["video_audio_n_segs"] = torch.tensor(
+                    sample.video_audio_n_segs, dtype=torch.long
+                )
+            # video_audio_seg_lens: 2D padded tensor (num_videos, max_T).
+            # Row i has the per-group audio token lengths for video i
+            # (zeros for regular videos; valid values for video_audio videos).
+            n_segs_list = sample.video_audio_n_segs
+            max_segs = max(n_segs_list) if n_segs_list else 0
+            if max_segs > 0:
+                seg_lens_2d = torch.zeros(len(n_segs_list), max_segs, dtype=torch.long)
+                flat_cursor = 0
+                for vi, n in enumerate(n_segs_list):
+                    if n > 0:
+                        seg_lens_2d[vi, :n] = torch.tensor(
+                            sample.video_audio_seg_lens[flat_cursor : flat_cursor + n],
+                            dtype=torch.long,
+                        )
+                        flat_cursor += n
+                data["video_audio_seg_lens"] = seg_lens_2d
+
+        # audio_features is a list of variable-length mel-spec tensors; pop it
+        # before BatchFeature conversion to avoid "batched tensors of the same
+        # length" errors, then re-attach it after.
+        audio_features = None
+        if sample.audio_inputs:
+            audio_features = sample.audio_inputs
+            if "is_audio_tokenized" in sample.extra:
+                data["is_audio_tokenized"] = sample.extra["is_audio_tokenized"]
+            if sample.audio_token_lens:
+                data["audio_token_lens"] = torch.tensor(
+                    sample.audio_token_lens, dtype=torch.long
+                )
+
+        bf = BatchFeature(data=data, tensor_type=return_tensors)
+        if audio_features is not None:
+            bf["audio_features"] = audio_features
+        # va_audio_features: list of mel-spec tensors (one per video_audio item)
+        if sample.va_audio_inputs:
+            bf["va_audio_features"] = sample.va_audio_inputs
+        return bf
diff --git a/vllm/transformers_utils/processors/moondream3.py b/vllm/transformers_utils/processors/moondream3.py
new file mode 100644
index 000000000000..289c40dd175e
--- /dev/null
+++ b/vllm/transformers_utils/processors/moondream3.py
@@ -0,0 +1,541 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Custom processor for Moondream3 model."""
+
+import math
+
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoProcessor, BatchFeature
+from transformers.image_utils import ImageInput
+from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
+from transformers.tokenization_utils_base import (
+    PreTokenizedInput,
+    PreTrainedTokenizerBase,
+    TextInput,
+)
+
+from vllm.multimodal.image import convert_image_mode
+
+__all__ = ["Moondream3Processor"]
+
+
+class Moondream3ProcessorKwargs(ProcessingKwargs, total=False):  # type: ignore[call-arg]
+    _defaults = {
+        "text_kwargs": {
+            "padding": False,
+        },
+        "images_kwargs": {
+            "max_crops": 12,
+            "overlap_margin": 4,
+            "crop_size": 378,
+            "patch_size": 14,
+            "convert_to_rgb": True,
+            "return_tensors": "pt",
+        },
+    }
+
+
+def select_tiling(
+    height: int, width: int, crop_size: int, max_crops: int
+) -> tuple[int, int]:
+    """Determine the optimal number of tiles to cover an image."""
+    if height <= crop_size or width <= crop_size:
+        return (1, 1)
+
+    min_h = math.ceil(height / crop_size)
+    min_w = math.ceil(width / crop_size)
+
+    if min_h * min_w > max_crops:
+        ratio = math.sqrt(max_crops / (min_h * min_w))
+        return (max(1, math.floor(min_h * ratio)), max(1, math.floor(min_w * ratio)))
+
+    h_tiles = math.floor(math.sqrt(max_crops * height / width))
+    w_tiles = math.floor(math.sqrt(max_crops * width / height))
+
+    h_tiles = max(h_tiles, min_h)
+    w_tiles = max(w_tiles, min_w)
+
+    if h_tiles * w_tiles > max_crops:
+        if w_tiles > h_tiles:
+            w_tiles = math.floor(max_crops / h_tiles)
+        else:
+            h_tiles = math.floor(max_crops / w_tiles)
+
+    return (max(1, h_tiles), max(1, w_tiles))
+
+
+class Moondream3Processor(ProcessorMixin):
+    """
+    Constructs a Moondream3 processor which handles image preprocessing
+    and tokenization for the Moondream3 multimodal model.
+
+    Args:
+        tokenizer: The tokenizer to use for text processing.
+        chat_template: Optional chat template string.
+        crop_size: Size of each image crop.
+        max_crops: Maximum number of crops per image.
+        overlap_margin: Margin for overlapping crops in patches.
+        patch_size: Size of each patch.
+    """
+
+    attributes = ["tokenizer"]
+    valid_kwargs = [
+        "chat_template",
+        "crop_size",
+        "max_crops",
+        "overlap_margin",
+        "patch_size",
+    ]
+
+    tokenizer_class = "AutoTokenizer"
+    # Use separate tokenizer repo
+    _tokenizer_repo = "moondream/starmie-v1"
+
+    # Default chat template for Moondream3
+    # Moondream uses special tokens for prompting:
+    # - Token 0 (<|endoftext|>): BOS token (ALWAYS present at position 0)
+    # - Token 1 (<|md_reserved_0|>): Start of instruction
+    # - Token 2 (<|md_reserved_1|>): Separator before question
+    # - Token 3 (<|md_reserved_2|>): End of question / start of answer
+    #
+    # Task routing based on text prefix:
+    #   "caption [short|normal|long]" → describe<|md_reserved_1|>{length}
+    #   "describe [short|normal|long]" → describe<|md_reserved_1|>{length}
+    #   otherwise                      → query<|md_reserved_1|><text>
+    #
+    # Format with image:
+    #   <|endoftext|><image><|md_reserved_0|>{task}<|md_reserved_1|>{q}<|md_reserved_2|>
+    # Format without image:
+    #   <|endoftext|><|md_reserved_0|>{task}<|md_reserved_1|>{q}<|md_reserved_2|>
+    _default_chat_template = (
+        "{% for message in messages %}"
+        "{% if message['role'] == 'user' %}"
+        "{% if message['content'] is string %}"
+        # Simple string content (with image assumed) - route by prefix
+        "<|endoftext|><image><|md_reserved_0|>"
+        "{% if message['content'] == 'caption' %}"
+        "describe<|md_reserved_1|>normal<|md_reserved_2|>"
+        "{% elif message['content'].startswith('caption ') %}"
+        "describe<|md_reserved_1|>{{ message['content'][8:] }}<|md_reserved_2|>"
+        "{% elif message['content'] == 'describe' %}"
+        "describe<|md_reserved_1|>normal<|md_reserved_2|>"
+        "{% elif message['content'].startswith('describe ') %}"
+        "describe<|md_reserved_1|>{{ message['content'][9:] }}<|md_reserved_2|>"
+        "{% else %}"
+        "query<|md_reserved_1|>{{ message['content'] }}<|md_reserved_2|>"
+        "{% endif %}"
+        "{% else %}"
+        # List content - build Moondream's image prefix independently of
+        # OpenAI-style content part order, then render the text task.
+        "<|endoftext|>"
+        "{% for content in message['content'] %}"
+        "{% if content['type'] in ['image', 'image_url', 'input_image', 'image_pil'] %}"  # noqa: E501
+        "<image>"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% for content in message['content'] %}"
+        "{% if content['type'] == 'text' %}"
+        "<|md_reserved_0|>"
+        "{% if content['text'] == 'caption' %}"
+        "describe<|md_reserved_1|>normal<|md_reserved_2|>"
+        "{% elif content['text'].startswith('caption ') %}"
+        "describe<|md_reserved_1|>{{ content['text'][8:] }}<|md_reserved_2|>"
+        "{% elif content['text'] == 'describe' %}"
+        "describe<|md_reserved_1|>normal<|md_reserved_2|>"
+        "{% elif content['text'].startswith('describe ') %}"
+        "describe<|md_reserved_1|>{{ content['text'][9:] }}<|md_reserved_2|>"
+        "{% else %}"
+        "query<|md_reserved_1|>{{ content['text'] }}<|md_reserved_2|>"
+        "{% endif %}"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% endif %}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ message['content'] }}"
+        "{% endif %}"
+        "{% endfor %}"
+    )
+
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase | None = None,
+        chat_template: str | None = None,
+        crop_size: int = 378,
+        max_crops: int = 12,
+        overlap_margin: int = 4,
+        patch_size: int = 14,
+        **kwargs,
+    ):
+        self.image_token = "<image>"
+        self.crop_size = crop_size
+        self.max_crops = max_crops
+        self.overlap_margin = overlap_margin
+        self.patch_size = patch_size
+
+        # Number of patches per crop (27x27 = 729 for 378/14)
+        self.patches_per_crop = (crop_size // patch_size) ** 2
+
+        # Use default chat template if none provided
+        if chat_template is None:
+            chat_template = self._default_chat_template
+
+        super().__init__(tokenizer, chat_template=chat_template)
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path,
+        **kwargs,
+    ):
+        """
+        Load the processor, using a separate tokenizer repo.
+
+        The moondream3 model uses a custom tokenizer from 'moondream/starmie-v1'
+        instead of having tokenizer files in the model repo.
+        """
+        from transformers import AutoTokenizer, PreTrainedTokenizerFast
+        from transformers.utils import cached_file
+
+        tokenizer = kwargs.pop("tokenizer", None)
+
+        tokenizer_kwargs = {
+            "trust_remote_code": kwargs.get("trust_remote_code", False),
+        }
+        for key in (
+            "cache_dir",
+            "force_download",
+            "local_files_only",
+            "revision",
+            "subfolder",
+            "token",
+            "use_fast",
+        ):
+            if key in kwargs:
+                tokenizer_kwargs[key] = kwargs[key]
+
+        cached_file_kwargs = {
+            key: tokenizer_kwargs[key]
+            for key in (
+                "cache_dir",
+                "force_download",
+                "local_files_only",
+                "revision",
+                "subfolder",
+                "token",
+            )
+            if key in tokenizer_kwargs
+        }
+
+        def load_tokenizer(repo_or_path):
+            try:
+                return AutoTokenizer.from_pretrained(repo_or_path, **tokenizer_kwargs)
+            except Exception:
+                tokenizer_file = cached_file(
+                    repo_or_path,
+                    "tokenizer.json",
+                    **cached_file_kwargs,
+                )
+                return PreTrainedTokenizerFast(
+                    tokenizer_file=tokenizer_file,
+                    clean_up_tokenization_spaces=False,
+                )
+
+        if isinstance(tokenizer, str):
+            tokenizer = load_tokenizer(tokenizer)
+
+        if tokenizer is None:
+            # Prefer model-local tokenizer files first. If unavailable, fall
+            # back to moondream's dedicated tokenizer repository.
+            try:
+                tokenizer = load_tokenizer(pretrained_model_name_or_path)
+            except Exception:
+                tokenizer = load_tokenizer(cls._tokenizer_repo)
+
+        # Configure special tokens for Moondream3
+        # BOS and EOS are both token 0 (<|endoftext|>), matching the native
+        # config (TokenizerConfig.bos_id=0, eos_id=0). This is standard for
+        # GPT-2 style models where <|endoftext|> signals both start and end.
+        # Token 1 (<|md_reserved_0|>) is a template delimiter, NOT the EOS.
+        tokenizer.bos_token = "<|endoftext|>"
+        tokenizer.bos_token_id = 0
+        tokenizer.eos_token = "<|endoftext|>"
+        tokenizer.eos_token_id = 0
+
+        # Extract processor-specific kwargs
+        crop_size = kwargs.pop("crop_size", 378)
+        max_crops = kwargs.pop("max_crops", 12)
+        overlap_margin = kwargs.pop("overlap_margin", 4)
+        patch_size = kwargs.pop("patch_size", 14)
+        chat_template = kwargs.pop("chat_template", None)
+
+        # Set default chat template on tokenizer if not already set
+        if chat_template is None:
+            chat_template = cls._default_chat_template
+        if tokenizer.chat_template is None:
+            tokenizer.chat_template = chat_template
+
+        return cls(
+            tokenizer=tokenizer,
+            chat_template=chat_template,
+            crop_size=crop_size,
+            max_crops=max_crops,
+            overlap_margin=overlap_margin,
+            patch_size=patch_size,
+        )
+
+    def __call__(
+        self,
+        images: ImageInput = None,
+        text: TextInput
+        | PreTokenizedInput
+        | list[TextInput]
+        | list[PreTokenizedInput] = None,
+        **kwargs: Unpack[Moondream3ProcessorKwargs],
+    ) -> BatchFeature:
+        """
+        Process images and text for Moondream3 model.
+
+        Args:
+            images: Input images (PIL Image, numpy array, or list thereof).
+            text: Input text or list of texts.
+            **kwargs: Additional processing arguments.
+
+        Returns:
+            BatchFeature with processed inputs.
+        """
+        output_kwargs = self._merge_kwargs(
+            Moondream3ProcessorKwargs,
+            tokenizer_init_kwargs=self.tokenizer.init_kwargs,
+            **kwargs,
+        )
+
+        # Process images
+        image_features = {}
+        if images is not None:
+            processed_images = []
+            tilings = []
+
+            images_list = images if isinstance(images, list) else [images]
+            for image in images_list:
+                pixel_values, tiling = self.preprocess_image(
+                    image, **output_kwargs["images_kwargs"]
+                )
+                processed_images.append(pixel_values)
+                tilings.append(tiling)
+
+            if processed_images:
+                image_features["pixel_values"] = processed_images
+                image_features["tilings"] = tilings
+
+        # Process text
+        if text is not None:
+            if not isinstance(text, list):
+                text = [text]
+
+            # Get text kwargs, remove keys we set ourselves
+            text_kwargs = output_kwargs.get("text_kwargs", {}).copy()
+            text_kwargs.pop("return_tensors", None)
+            text_kwargs.pop("add_special_tokens", None)
+
+            # Tokenize text
+            tokenized = self.tokenizer(
+                text,
+                add_special_tokens=True,
+                return_tensors="pt",
+                **text_kwargs,
+            )
+
+            output = BatchFeature(data=dict(tokenized))
+
+            # Add image features
+            if image_features:
+                output["pixel_values"] = image_features["pixel_values"]
+                output["tilings"] = image_features["tilings"]
+
+            return output
+
+        # If only images were provided
+        return BatchFeature(data=image_features)
+
+    @staticmethod
+    def _image_array_to_uint8(array: np.ndarray) -> np.ndarray:
+        if array.dtype == np.uint8:
+            return np.ascontiguousarray(array)
+
+        if array.dtype == np.bool_:
+            return np.ascontiguousarray(array.astype(np.uint8) * 255)
+
+        if np.issubdtype(array.dtype, np.floating):
+            array = np.nan_to_num(array, nan=0.0, posinf=255.0, neginf=0.0)
+            if array.size > 0 and array.max() <= 1.0:
+                array = array * 255.0
+            array = np.rint(array)
+
+        return np.ascontiguousarray(np.clip(array, 0, 255).astype(np.uint8))
+
+    @staticmethod
+    def _to_pil_image(image: ImageInput) -> Image.Image:
+        if isinstance(image, Image.Image):
+            return image
+
+        if isinstance(image, torch.Tensor):
+            tensor = image.detach().cpu()
+            if tensor.dtype == torch.bfloat16:
+                tensor = tensor.to(torch.float32)
+            image_array = tensor.numpy()
+        elif isinstance(image, np.ndarray):
+            image_array = image
+        else:
+            raise TypeError(
+                "Moondream3 images must be PIL images, numpy arrays, "
+                f"or torch tensors, got {type(image)!r}."
+            )
+
+        if image_array.ndim == 2:
+            image_array = Moondream3Processor._image_array_to_uint8(image_array)
+            return Image.fromarray(image_array)
+
+        if image_array.ndim != 3:
+            raise ValueError(
+                "Moondream3 image arrays must have 2 or 3 dimensions, "
+                f"got shape {image_array.shape}."
+            )
+
+        channel_dims = (1, 3, 4)
+        if image_array.shape[-1] not in channel_dims:
+            if image_array.shape[0] not in channel_dims:
+                raise ValueError(
+                    "Moondream3 image arrays must be HWC or CHW with 1, 3, "
+                    f"or 4 channels, got shape {image_array.shape}."
+                )
+            image_array = np.transpose(image_array, (1, 2, 0))
+
+        image_array = Moondream3Processor._image_array_to_uint8(image_array)
+        if image_array.shape[-1] == 1:
+            image_array = image_array[..., 0]
+
+        return Image.fromarray(image_array)
+
+    def preprocess_image(
+        self,
+        image: ImageInput,
+        max_crops: int = 12,
+        overlap_margin: int = 4,
+        crop_size: int = 378,
+        patch_size: int = 14,
+        convert_to_rgb: bool = True,
+        return_tensors: str = "pt",
+    ) -> tuple[torch.Tensor, tuple[int, int]]:
+        """
+        Preprocess an image using overlap-and-resize cropping strategy.
+
+        Args:
+            image: Input PIL image, numpy array, or torch tensor.
+            max_crops: Maximum number of crops.
+            overlap_margin: Margin for overlapping in patches.
+            crop_size: Size of each crop.
+            patch_size: Size of each patch.
+            convert_to_rgb: Whether to convert to RGB.
+            return_tensors: Return type ("pt" for PyTorch).
+
+        Returns:
+            Tuple of (pixel_values tensor, tiling tuple).
+        """
+        image = self._to_pil_image(image)
+        if convert_to_rgb:
+            image = convert_image_mode(image, "RGB")
+
+        # Convert to numpy array
+        image_array = np.array(image)
+        original_h, original_w = image_array.shape[:2]
+
+        margin_pixels = patch_size * overlap_margin
+        total_margin_pixels = margin_pixels * 2
+
+        crop_patches = crop_size // patch_size
+        crop_window_patches = crop_patches - (2 * overlap_margin)
+        crop_window_size = crop_window_patches * patch_size
+
+        tiling = select_tiling(
+            original_h - total_margin_pixels,
+            original_w - total_margin_pixels,
+            crop_window_size,
+            max_crops,
+        )
+
+        n_crops = tiling[0] * tiling[1] + 1
+        crops = np.zeros((n_crops, crop_size, crop_size, 3), dtype=np.uint8)
+
+        target_size = (
+            tiling[0] * crop_window_size + total_margin_pixels,
+            tiling[1] * crop_window_size + total_margin_pixels,
+        )
+
+        # Resize image
+        pil_img = Image.fromarray(image_array)
+        resized = pil_img.resize(
+            (int(target_size[1]), int(target_size[0])),
+            resample=Image.Resampling.LANCZOS,
+        )
+        resized_array = np.asarray(resized)
+
+        # Create global crop
+        global_pil = pil_img.resize(
+            (crop_size, crop_size), resample=Image.Resampling.LANCZOS
+        )
+        crops[0] = np.asarray(global_pil)
+
+        # Create local crops
+        for i in range(tiling[0]):
+            for j in range(tiling[1]):
+                y0 = i * crop_window_size
+                x0 = j * crop_window_size
+                y_end = min(y0 + crop_size, resized_array.shape[0])
+                x_end = min(x0 + crop_size, resized_array.shape[1])
+
+                crop_region = resized_array[y0:y_end, x0:x_end]
+                crop_idx = 1 + i * tiling[1] + j
+                h_slice = slice(None, crop_region.shape[0])
+                w_slice = slice(None, crop_region.shape[1])
+                crops[crop_idx, h_slice, w_slice] = crop_region
+
+        # Convert to tensor: (n_crops, H, W, C) -> (n_crops, C, H, W)
+        pixel_values = np.transpose(crops, (0, 3, 1, 2))
+
+        if return_tensors == "pt":
+            # Match HF reference preprocessing exactly: convert uint8 crops to
+            # bfloat16 before in-place normalization.
+            pixel_values = (
+                torch.from_numpy(pixel_values)
+                .to(dtype=torch.bfloat16)
+                .div_(255.0)
+                .sub_(0.5)
+                .div_(0.5)
+            )
+        else:
+            pixel_values = pixel_values.astype(np.float32) / 255.0
+            pixel_values = (pixel_values - 0.5) / 0.5
+
+        return pixel_values, tiling
+
+    def get_num_image_tokens(self) -> int:
+        """Return the number of image tokens (729 = 27x27 patches)."""
+        return self.patches_per_crop
+
+    def batch_decode(self, *args, **kwargs):
+        """Forward to tokenizer's batch_decode."""
+        return self.tokenizer.batch_decode(*args, **kwargs)
+
+    def decode(self, *args, **kwargs):
+        """Forward to tokenizer's decode."""
+        return self.tokenizer.decode(*args, **kwargs)
+
+    @property
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        return tokenizer_input_names + ["pixel_values", "tilings"]
+
+
+AutoProcessor.register("Moondream3Processor", Moondream3Processor)
diff --git a/vllm/transformers_utils/processors/nano_nemotron_vl.py b/vllm/transformers_utils/processors/nano_nemotron_vl.py
index 594290c1441e..76b73d21635c 100644
--- a/vllm/transformers_utils/processors/nano_nemotron_vl.py
+++ b/vllm/transformers_utils/processors/nano_nemotron_vl.py
@@ -8,7 +8,6 @@
 # --------------------------------------------------------
 
 import math
-import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import dataclass
@@ -26,7 +25,7 @@
 from vllm.model_executor.models.parakeet import ParakeetExtractor
 from vllm.multimodal.evs import compute_retained_tokens_count
 from vllm.multimodal.inputs import AudioItem
-from vllm.multimodal.processing.processor import PromptUpdateDetails, _seq2tokens
+from vllm.multimodal.processing.processor import PromptUpdateDetails
 from vllm.tokenizers.hf import HfTokenizer
 
 from .internvl import calculate_internvl_targets, get_internvl_target_ratios
@@ -63,42 +62,50 @@ def calculate_timestamps(
     return timestamps
 
 
-def input_conditioner(x: torch.Tensor, norm_mean: torch.Tensor, norm_std: torch.Tensor):
-    return (x - norm_mean) / norm_std
+@torch.compile(dynamic=True)
+def _bicubic_resize_and_normalize(
+    tensor: torch.Tensor,
+    size: tuple[int, int] | None = None,
+    norm_mean: torch.Tensor | None = None,
+    norm_std: torch.Tensor | None = None,
+    dtype: torch.dtype = torch.float32,
+) -> torch.Tensor:
+    """Permute NHWC→NCHW, optional bicubic resize, rescale + normalize.
 
+    Input must be a raw 4-D **NHWC** tensor.
 
-def _bicubic_from_ndarray(
-    array: npt.NDArray[Any], *, size: tuple[int, int]
-) -> torch.Tensor:
-    """
-    Convert a 4D NHWC ndarray to NCHW and interpolate with bicubic.
-    Suppresses PyTorch's non-writable NumPy warning because interpolate copies,
-    and torch.from_numpy(array) is discarded at the end of function scope.
+    *size*: target ``(H, W)``; skips interpolation when ``None``.
+    *norm_mean* / *norm_std*: when both provided, fused
+    ``(x/255 - mean) / std`` + dtype cast; otherwise ``x/255`` + cast.
     """
-
-    with warnings.catch_warnings():
-        msg = "The given NumPy array is not writ.*"
-        # Apparently, different versions of PyTorch use writable or writeable.
-        warnings.filterwarnings("ignore", message=msg, category=UserWarning)
-        tensor = torch.from_numpy(array)
-    assert tensor.ndim == 4, f"{tensor.ndim=}"
-    tensor = tensor.permute(0, 3, 1, 2)
-    return (
-        torch.nn.functional.interpolate(
+    tensor = tensor.permute(0, 3, 1, 2).to(dtype=torch.float32)
+    if size is not None:
+        tensor = torch.nn.functional.interpolate(
             tensor, size=size, mode="bicubic", align_corners=False, antialias=True
         )
-        / 255.0
+    if norm_mean is not None and norm_std is not None:
+        return ((tensor / 255.0 - norm_mean) / norm_std).to(dtype=dtype).contiguous()
+    return (tensor / 255.0).to(dtype=dtype).contiguous()
+
+
+def _pil_to_nhwc_tensor(image: Image.Image) -> torch.Tensor:
+    """Convert a PIL image to a 4-D NHWC tensor suitable for compiled ops."""
+    array = np.asarray(
+        image.convert("RGB") if image.mode != "RGB" else image, dtype=np.uint8
     )
+    return torch.from_numpy(np.expand_dims(array, axis=0))
 
 
 def dynamic_preprocess(
-    image,
+    image: Image.Image,
     *,
-    image_size=512,
-    max_num_tiles=12,
-    use_thumbnail=True,
-    idx=0,
-):
+    image_size: int = 512,
+    max_num_tiles: int = 12,
+    use_thumbnail: bool = True,
+    norm_mean: torch.Tensor | None = None,
+    norm_std: torch.Tensor | None = None,
+    dtype: torch.dtype = torch.float32,
+) -> torch.Tensor:
     orig_width, orig_height = image.size
 
     target_ratios = get_internvl_target_ratios(1, max_num_tiles)
@@ -111,13 +118,15 @@ def dynamic_preprocess(
         use_thumbnail=False,
     )
 
-    image = np.asarray(
-        image.convert("RGB") if image.mode != "RGB" else image, dtype=np.uint8
-    )
-
-    image = np.expand_dims(image, axis=0)
+    tensor = _pil_to_nhwc_tensor(image)
 
-    resized_img = _bicubic_from_ndarray(image, size=(target_height, target_width))
+    resized_img = _bicubic_resize_and_normalize(
+        tensor,
+        size=(target_height, target_width),
+        norm_mean=norm_mean,
+        norm_std=norm_std,
+        dtype=dtype,
+    )
     B, C, H, W = resized_img.shape
     hp, wp = H // image_size, W // image_size
     patches = (
@@ -127,30 +136,16 @@ def dynamic_preprocess(
     )
 
     if use_thumbnail and patches.shape[0] > 1:
-        thumb = _bicubic_from_ndarray(image, size=(image_size, image_size))
+        thumb = _bicubic_resize_and_normalize(
+            tensor,
+            size=(image_size, image_size),
+            norm_mean=norm_mean,
+            norm_std=norm_std,
+            dtype=dtype,
+        )
         patches = torch.cat([patches, thumb], dim=0)
 
-    return list(patches)
-
-
-def image_to_pixel_values(
-    image: Image.Image,
-    *,
-    input_size: int,
-    max_num: int,
-    use_thumbnail: bool,
-    idx: int,
-) -> torch.Tensor:
-    images = dynamic_preprocess(
-        image,
-        image_size=input_size,
-        max_num_tiles=max_num,
-        use_thumbnail=use_thumbnail,
-        idx=idx,
-    )
-
-    pixel_values = torch.stack(images)
-    return pixel_values
+    return patches
 
 
 def _compute_aspect_preserving_size(
@@ -233,14 +228,16 @@ def video_to_pixel_values(
     video_maintain_aspect_ratio: bool = False,
     patch_size: int = 16,
     downsample_ratio: float = 0.5,
+    norm_mean: torch.Tensor | None = None,
+    norm_std: torch.Tensor | None = None,
+    dtype: torch.dtype = torch.float32,
 ) -> torch.Tensor:
-    # (num_frames, H, W, C) -> (num_frames, C, H, W)
-    video_tensor = torch.from_numpy(video).permute(0, 3, 1, 2)
+    """Convert video ndarray (T, H, W, C) to normalized pixel tensor (T, C, H, W)."""
+    orig_h, orig_w = video.shape[1], video.shape[2]
+    size: tuple[int, int] | None = None
 
     if video_target_num_patches is not None:
-        # Resize to target patch count (aspect-preserving or square).
-        orig_h, orig_w = video_tensor.shape[2], video_tensor.shape[3]
-        target_w, target_h, _ = get_video_target_size_and_feature_size(
+        tw, th, _ = get_video_target_size_and_feature_size(
             orig_w=orig_w,
             orig_h=orig_h,
             target_patches=video_target_num_patches,
@@ -248,14 +245,13 @@ def video_to_pixel_values(
             patch_size=patch_size,
             downsample_ratio=downsample_ratio,
         )
-        if video_tensor.shape[2] != target_h or video_tensor.shape[3] != target_w:
-            return _bicubic_from_ndarray(video, size=(target_h, target_w))
-    elif video_tensor.shape[2] != input_size or video_tensor.shape[3] != input_size:
-        return _bicubic_from_ndarray(video, size=(input_size, input_size))
-
-    video_tensor = video_tensor / 255.0
+        if orig_h != th or orig_w != tw:
+            size = (th, tw)
+    elif orig_h != input_size or orig_w != input_size:
+        size = (input_size, input_size)
 
-    return video_tensor
+    tensor = torch.from_numpy(video)
+    return _bicubic_resize_and_normalize(tensor, size, norm_mean, norm_std, dtype)
 
 
 class DynamicResolutionImageTiler:
@@ -343,6 +339,7 @@ def _images_to_pixel_values_lst(
         self,
         text_prompt_length: int,
         images: list[Image.Image],
+        dtype: torch.dtype = torch.float32,
     ) -> tuple[list[torch.Tensor], list[int]]:
         num_tokens_available = self.max_num_tokens_available(text_prompt_length)
         params_per_image = self.compute_params(images, num_tokens_available)
@@ -350,21 +347,12 @@ def _images_to_pixel_values_lst(
         feature_sizes = []
         images = []
         for param in params_per_image:
-            for t in self.apply_params(param):
+            for t in self.apply_params(param, dtype=dtype):
                 assert t.ndim == 3, f"{t.ndim=}: expected 3 dim tensor"
                 images.append(t)
                 feature_sizes.append(param.num_embeddings)
         return images, feature_sizes
 
-    feature_size_cache: dict[Image.Image, int] = {}
-
-    @classmethod
-    def get_cached_feature_size(cls, image: Image.Image) -> int:
-        feature_size = cls.feature_size_cache[id(image)]
-        # hard assert that we only use the feature size once
-        del cls.feature_size_cache[id(image)]
-        return feature_size
-
     @dataclass
     class DynamicResolutionParams:
         media: Image.Image
@@ -372,17 +360,23 @@ class DynamicResolutionParams:
         num_embeddings: int
         patch_size: tuple[int, int]
 
-    def apply_params(self, params: DynamicResolutionParams) -> list[torch.Tensor]:
+    def apply_params(
+        self,
+        params: DynamicResolutionParams,
+        dtype: torch.dtype = torch.float32,
+    ) -> list[torch.Tensor]:
         target_size = (
             params.patch_size[1] * self._patch_size,
             params.patch_size[0] * self._patch_size,
         )
-        image = np.asarray(
-            params.media.convert("RGB") if params.media.mode != "RGB" else params.media,
-            dtype=np.uint8,
+        tensor = _pil_to_nhwc_tensor(params.media)
+        resized_img = _bicubic_resize_and_normalize(
+            tensor,
+            size=target_size,
+            norm_mean=self.norm_mean,
+            norm_std=self.norm_std,
+            dtype=dtype,
         )
-        image = np.expand_dims(image, axis=0)
-        resized_img = _bicubic_from_ndarray(image, size=target_size)
         return list(resized_img)
 
     def process_media(
@@ -519,7 +513,6 @@ def compute_params(
                 param, token_count = self.process_media(media, tokens_for_media)
                 params.append(param)
                 token_counts.append(token_count)
-                self.feature_size_cache[id(param.media)] = param.num_embeddings
 
             # Step 2: Check if total tokens is within budget
             total_tokens = sum(token_counts)
@@ -629,6 +622,7 @@ def __init__(
                 norm_mean=config.norm_mean,
                 norm_std=config.norm_std,
             )
+        self.dtype: torch.dtype = getattr(config, "dtype", torch.float32)
 
     @staticmethod
     def use_dynamic_resolution(config: PretrainedConfig) -> bool:
@@ -672,14 +666,16 @@ def _images_to_pixel_values_lst(
         max_num_tiles: int,
     ) -> list[torch.Tensor]:
         return [
-            image_to_pixel_values(
+            dynamic_preprocess(
                 image,
-                input_size=self.image_size,
-                max_num=max_num_tiles,
+                image_size=self.image_size,
+                max_num_tiles=max_num_tiles,
                 use_thumbnail=self.use_thumbnail,
-                idx=idx,
+                norm_mean=self.norm_mean,
+                norm_std=self.norm_std,
+                dtype=self.dtype,
             )
-            for idx, image in enumerate(images)
+            for image in images
         ]
 
     def _preprocess_image(
@@ -700,23 +696,22 @@ def _preprocess_image(
             pixel_values_lst, num_tokens_per_image = tiler._images_to_pixel_values_lst(
                 text_prompt_length=text_prompt_length,
                 images=images,
+                dtype=self.dtype,
             )
             imgs_sizes = [(pv.shape[-2], pv.shape[-1]) for pv in pixel_values_lst]
-            normalized = [
-                input_conditioner(img, tiler.norm_mean, tiler.norm_std)
-                for img in pixel_values_lst
-            ]
             image_num_patches = torch.tensor([1] * len(num_tokens_per_image))
             image_inputs = {
-                "pixel_values_flat": normalized,
+                "pixel_values_flat": pixel_values_lst,
                 "imgs_sizes": imgs_sizes,
                 "num_tokens_per_image": num_tokens_per_image,
             }
         else:
             pixel_values_lst = self._images_to_pixel_values_lst(images, max_num_tiles)
             image_num_patches = torch.tensor([len(item) for item in pixel_values_lst])
-            pixel_values_flat = input_conditioner(
-                torch.cat(pixel_values_lst), self.norm_mean, self.norm_std
+            pixel_values_flat = (
+                torch.cat(pixel_values_lst)
+                if len(pixel_values_lst) > 1
+                else pixel_values_lst[0]
             )
             image_inputs = {
                 "pixel_values_flat": pixel_values_flat,
@@ -781,6 +776,7 @@ def __init__(
         max_num_tiles: int | None = None,
         video_token: str | None = None,
         video_pruning_rate: float | None = None,
+        use_audio_in_video: bool = False,
     ) -> None:
         super().__init__(
             config=config,
@@ -791,6 +787,7 @@ def __init__(
         # add extra video token for video processing
         self.video_token = video_token
         self.video_pruning_rate = video_pruning_rate
+        self.use_audio_in_video = use_audio_in_video
 
         # Video params live exclusively in vision_config
         vision_config = getattr(config, "vision_config", config)
@@ -857,13 +854,12 @@ def num_video_token(self) -> int:
 
     @property
     def supports_video(self) -> bool:
-        return self.video_token_id is not None
+        return True
 
     @property
-    def video_token_id(self) -> int | None:
-        if self.video_token is None:
-            return None
-        return self.tokenizer.get_vocab().get(self.video_token, None)
+    def video_token_id(self) -> int:
+        assert self.video_token is not None
+        return self.tokenizer.get_vocab()[self.video_token]
 
     @property
     def image_token_id(self) -> int:
@@ -872,6 +868,8 @@ def image_token_id(self) -> int:
     def _videos_to_pixel_values_lst(
         self,
         videos: list[npt.NDArray],
+        *,
+        dtype: torch.dtype = torch.float32,
     ) -> list[torch.Tensor]:
         return [
             video_to_pixel_values(
@@ -881,6 +879,9 @@ def _videos_to_pixel_values_lst(
                 video_maintain_aspect_ratio=self.video_maintain_aspect_ratio,
                 patch_size=self.config.patch_size,
                 downsample_ratio=self.config.downsample_ratio,
+                norm_mean=self.norm_mean,
+                norm_std=self.norm_std,
+                dtype=dtype,
             )
             for video in videos
         ]
@@ -895,8 +896,10 @@ def _preprocess_video(
 
         videos_lst = [v[0] for v in videos]
         video_metadata_lst = [v[1] for v in videos]
+
         pixel_values_lst_video = self._videos_to_pixel_values_lst(
             videos_lst,
+            dtype=self.dtype,
         )
 
         # We use frame duration in milliseconds (as integer) to ensure
@@ -912,10 +915,15 @@ def _preprocess_video(
             metadata["frames_indices"] for metadata in video_metadata_lst
         ]
         video_num_patches = torch.tensor([len(item) for item in pixel_values_lst_video])
+
+        # Normalization already fused into resize above.
+        # Skip the torch.cat copy when there is exactly one video
+        if len(pixel_values_lst_video) == 1:
+            pixel_values_flat = pixel_values_lst_video[0]
+        else:
+            pixel_values_flat = torch.cat(pixel_values_lst_video)
         video_inputs = {
-            "pixel_values_flat_video": input_conditioner(
-                torch.cat(pixel_values_lst_video), self.norm_mean, self.norm_std
-            ),
+            "pixel_values_flat_video": pixel_values_flat,
             "video_num_patches": video_num_patches,
             "frames_indices": frames_indices_lst,
             "frame_duration_ms": torch.tensor(frame_duration_ms_lst),
@@ -1001,17 +1009,7 @@ def _preprocess_audio(
                 parts[idx] = audio_repl.full
                 audio_index += 1
         text = ["".join(parts)]
-        audio_inputs = extractor(
-            audios,
-            sampling_rate=extractor.sampling_rate,
-            return_tensors="pt",
-        )
-        audio_inputs = {
-            "input_audio_features": audio_inputs.input_features,
-            "feature_attention_mask": audio_inputs.attention_mask,
-            "audio_num_clips": audio_inputs.audio_num_clips,
-        }
-
+        audio_inputs = extractor(audios)
         return text, audio_inputs
 
     def __call__(
@@ -1055,6 +1053,13 @@ def __call__(
         text_inputs = self.tokenizer(text, add_special_tokens=False)
 
         combined_inputs = {**text_inputs, **video_inputs, **audio_inputs}
+        frames_indices = combined_inputs.get("frames_indices")
+        ragged_frames_indices = (
+            isinstance(frames_indices, list)
+            and len({len(frame_indices) for frame_indices in frames_indices}) > 1
+        )
+        if ragged_frames_indices:
+            combined_inputs.pop("frames_indices")
 
         if self.dynamic_tiler is None:
             batch = BatchFeature(
@@ -1066,6 +1071,12 @@ def __call__(
             # allow images to be exempt from the BatchFeature validation:
             # We will .stack() them in _parse_and_validate_image_input
             batch.update(image_inputs)
+        if ragged_frames_indices:
+            assert isinstance(frames_indices, list)
+            batch["frames_indices"] = [
+                torch.as_tensor(frame_indices, dtype=torch.int64)
+                for frame_indices in frames_indices
+            ]
         return batch
 
     def get_image_repl(
@@ -1174,20 +1185,21 @@ def get_video_repl(
                 for i, _ in enumerate(tokens_per_frame)
             ]
 
-        # Tokenize frame separator independently
-        frame_separators_tokenized = [
-            _seq2tokens(tokenizer, sep) for sep in frame_separators
-        ]
+        # Batch-tokenize all frame separators at once — the HuggingFace
+        # tokenizers Rust backend parallelizes batch encoding across threads.
+        batch_encoded = tokenizer(
+            frame_separators,
+            add_special_tokens=False,
+            return_attention_mask=False,
+        )
+        frame_separators_tokenized: list[list[int]] = batch_encoded["input_ids"]
 
         # Tokenize each component independently to avoid tokenizer merging tokens
         # across boundaries. This ensures consistent tokenization regardless of
         # num_tokens_per_frame values.
         all_token_ids = []
         for i, num_tokens in enumerate(tokens_per_frame):
-            frame_sep_token_ids = frame_separators_tokenized[i]
-            all_token_ids.extend(frame_sep_token_ids)
-
-            # Add pre-tokenized special tokens
+            all_token_ids.extend(frame_separators_tokenized[i])
             all_token_ids.extend(img_start_token_ids)
             all_token_ids.extend(img_context_token_ids * num_tokens)
             all_token_ids.extend(img_end_token_ids)
diff --git a/vllm/transformers_utils/processors/openvla.py b/vllm/transformers_utils/processors/openvla.py
new file mode 100644
index 000000000000..162f40238309
--- /dev/null
+++ b/vllm/transformers_utils/processors/openvla.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from typing import Any
+
+import numpy as np
+import torch
+from PIL import Image
+from transformers.processing_utils import ProcessorMixin
+
+IMAGENET_MEAN = np.array([0.484375, 0.455078125, 0.40625], dtype=np.float32)
+IMAGENET_STD = np.array([0.228515625, 0.2236328125, 0.224609375], dtype=np.float32)
+SIGLIP_MEAN = np.array([0.5, 0.5, 0.5], dtype=np.float32)
+SIGLIP_STD = np.array([0.5, 0.5, 0.5], dtype=np.float32)
+
+
+def to_rgb_image(image: Any) -> Image.Image:
+    if isinstance(image, Image.Image):
+        return image.convert("RGB")
+
+    if isinstance(image, torch.Tensor):
+        image = image.detach().cpu().numpy()
+    if not isinstance(image, np.ndarray):
+        raise TypeError(
+            "OpenVLA image input must be a PIL image, numpy array, or torch tensor; "
+            f"got {type(image)}"
+        )
+
+    if image.ndim != 3:
+        raise ValueError(
+            f"OpenVLA image input must have 3 dimensions, got shape {image.shape}"
+        )
+
+    if image.shape[0] in (1, 3):
+        image = np.moveaxis(image, 0, -1)
+
+    if image.shape[-1] == 1:
+        image = np.repeat(image, 3, axis=-1)
+    elif image.shape[-1] != 3:
+        raise ValueError(
+            f"OpenVLA image input must have 1 or 3 channels, got shape {image.shape}"
+        )
+
+    if image.dtype != np.uint8:
+        image = image.astype(np.float32)
+        if image.max(initial=0.0) <= 1.0:
+            image = image * 255.0
+        image = np.clip(image, 0, 255).astype(np.uint8)
+
+    return Image.fromarray(image).convert("RGB")
+
+
+def preprocess_openvla_image(image: Any, image_size: int) -> torch.Tensor:
+    rgb_image = to_rgb_image(image)
+    rgb_image = rgb_image.resize(
+        (image_size, image_size),
+        Image.Resampling.BICUBIC,
+    )
+
+    raw = np.asarray(rgb_image, dtype=np.float32) / 255.0
+    dinov2_pixels = ((raw - IMAGENET_MEAN) / IMAGENET_STD).transpose(2, 0, 1)
+    siglip_pixels = ((raw - SIGLIP_MEAN) / SIGLIP_STD).transpose(2, 0, 1)
+    pixel_values = np.concatenate([dinov2_pixels, siglip_pixels], axis=0)
+    return torch.from_numpy(pixel_values)
+
+
+class OpenVLAImageProcessor:
+    def __init__(self, *, image_size: int) -> None:
+        self.image_size = image_size
+
+    def __call__(
+        self,
+        images: Any | None = None,
+        **kwargs: object,
+    ) -> dict[str, object]:
+        if images is None:
+            return {}
+        if not isinstance(images, Sequence) or isinstance(images, (str, bytes)):
+            images = [images]
+        if len(images) == 0:
+            return {}
+
+        pixel_values = torch.stack(
+            [
+                preprocess_openvla_image(image, image_size=self.image_size)
+                for image in images
+            ],
+            dim=0,
+        )
+        return {"pixel_values": pixel_values}
+
+
+class OpenVLAProcessor(ProcessorMixin):
+    def __init__(
+        self,
+        *,
+        image_processor: OpenVLAImageProcessor,
+        tokenizer: Any,
+    ) -> None:
+        self.image_processor = image_processor
+        self.tokenizer = tokenizer
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
deleted file mode 100644
index 36e357a83da1..000000000000
--- a/vllm/transformers_utils/tokenizer.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import warnings
-
-
-def __getattr__(name: str):
-    # Keep until lm-eval is updated
-    if name == "get_tokenizer":
-        from vllm.tokenizers import get_tokenizer
-
-        warnings.warn(
-            "`vllm.transformers_utils.tokenizer.get_tokenizer` "
-            "has been moved to `vllm.tokenizers.get_tokenizer`. "
-            "The old name will be removed in a future version.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        return get_tokenizer
-
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/vllm/triton_utils/importing.py b/vllm/triton_utils/importing.py
index f05bc555bfdc..8dea20fd3ea9 100644
--- a/vllm/triton_utils/importing.py
+++ b/vllm/triton_utils/importing.py
@@ -3,9 +3,11 @@
 
 import os
 import types
+from importlib.metadata import version
 from importlib.util import find_spec
 
 from vllm.logger import init_logger
+from vllm.utils.math_utils import cdiv
 
 logger = init_logger(__name__)
 
@@ -47,6 +49,17 @@
                 len(active_drivers),
             )
             HAS_TRITON = False
+
+        # Check Triton CPU
+        if "cpu" in version("vllm"):
+            if "cpu" in backends:
+                HAS_TRITON = True
+            else:
+                logger.warning(
+                    "Triton is installed, but doesn't include CPU backend. "
+                    "Disabling Triton."
+                )
+                HAS_TRITON = False
     except ImportError:
         # This can occur if Triton is partially installed or triton.backends
         # is missing.
@@ -79,6 +92,7 @@ def __init__(self):
         self.autotune = self._dummy_decorator("autotune")
         self.heuristics = self._dummy_decorator("heuristics")
         self.Config = self._dummy_decorator("Config")
+        self.cdiv = cdiv
         self.language = TritonLanguagePlaceholder()
 
     def _dummy_decorator(self, name):
@@ -99,5 +113,6 @@ def __init__(self):
         self.int32 = None
         self.tensor = None
         self.exp = None
+        self.exp2 = None
         self.log = None
         self.log2 = None
diff --git a/vllm/triton_utils/jit_monitor.py b/vllm/triton_utils/jit_monitor.py
new file mode 100644
index 000000000000..5ee33fc51dc4
--- /dev/null
+++ b/vllm/triton_utils/jit_monitor.py
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Monitor unexpected Triton kernel JIT compilation during inference.
+
+After server warmup completes, any Triton JIT compilation or autotuning
+event indicates a cache miss or unexpected input shape that causes a
+latency spike. This module registers hooks in the Triton runtime to
+detect and log such events so they can be investigated.
+
+Currently monitors:
+- Triton ``@triton.autotune`` cache misses (via ``knobs.autotuning.print``)
+- Triton ``@triton.jit`` first-time compilations
+  (via ``knobs.runtime.jit_post_compile_hook``)
+"""
+
+import os
+
+from vllm.logger import init_logger
+from vllm.triton_utils.importing import HAS_TRITON
+
+logger = init_logger(__name__)
+
+_active: bool = False
+
+
+def is_active() -> bool:
+    """Return whether the JIT compilation monitor is currently active."""
+    return _active
+
+
+def activate() -> None:
+    """Enable JIT compilation monitoring after warmup.
+
+    Call once per worker process at the end of
+    :func:`compile_or_warm_up_model`.  After activation every Triton
+    kernel compilation or autotuning benchmark that happens during
+    inference will be logged as a warning.
+
+    Safe to call multiple times — subsequent calls are no-ops.
+
+    If the user has explicitly set ``TRITON_PRINT_AUTOTUNING=0`` in
+    their environment, autotuning printing is left disabled; the JIT
+    compilation hook is still registered regardless.
+    """
+    global _active
+    if _active:
+        return
+    _active = True
+
+    _setup_triton_autotuning_print()
+    _setup_triton_jit_hook()
+
+    logger.info(
+        "Kernel JIT monitor activated — Triton JIT compilations "
+        "during inference will be logged as warnings."
+    )
+
+
+# ------------------------------------------------------------------
+# Triton autotuning print
+# ------------------------------------------------------------------
+
+
+def _setup_triton_autotuning_print() -> None:
+    """Enable ``TRITON_PRINT_AUTOTUNING`` unless the user opted out."""
+    if not HAS_TRITON:
+        return
+    from triton import knobs  # type: ignore[import-untyped]
+
+    user_val = os.environ.get("TRITON_PRINT_AUTOTUNING")
+    if user_val == "0":
+        logger.debug(
+            "TRITON_PRINT_AUTOTUNING=0 set by user — "
+            "autotuning messages will stay suppressed."
+        )
+        return
+
+    knobs.autotuning.print = True
+
+
+# ------------------------------------------------------------------
+# Triton JIT compilation hook
+# ------------------------------------------------------------------
+
+
+def _setup_triton_jit_hook() -> None:
+    """Register a ``jit_post_compile_hook`` that warns on compilation."""
+    if not HAS_TRITON:
+        return
+    from triton import knobs  # type: ignore[import-untyped]
+
+    existing_hook = knobs.runtime.jit_post_compile_hook
+
+    def _on_jit_compile(**kwargs):
+        # `jit_post_compile_hook` is Triton internal API and its
+        # signature has changed across releases (kwargs added/renamed).
+        # Accept **kwargs so an upstream change cannot crash this hook
+        # with TypeError, and forward the full kwarg set to any
+        # pre-existing hook unchanged.
+        fn = kwargs.get("fn")
+        fn_name = getattr(fn, "name", "<unknown>")
+        logger.warning_once(
+            "Triton kernel JIT compilation during inference: %s. "
+            "This causes a latency spike; consider extending warmup "
+            "to cover this shape/config.",
+            fn_name,
+        )
+        if existing_hook is not None:
+            return existing_hook(**kwargs)
+        return None
+
+    knobs.runtime.jit_post_compile_hook = _on_jit_compile
diff --git a/vllm/usage/usage_lib.py b/vllm/usage/usage_lib.py
index fdae7d9b77c0..b2ce46c7b8c3 100644
--- a/vllm/usage/usage_lib.py
+++ b/vllm/usage/usage_lib.py
@@ -22,7 +22,6 @@
 from vllm.connections import global_http_connection
 from vllm.logger import init_logger
 from vllm.utils.platform_utils import cuda_get_device_properties
-from vllm.utils.torch_utils import cuda_device_count_stateless
 from vllm.version import __version__ as VLLM_VERSION
 
 logger = init_logger(__name__)
@@ -196,7 +195,7 @@ def _report_usage_once(
         from vllm.platforms import current_platform
 
         if current_platform.is_cuda_alike():
-            self.gpu_count = cuda_device_count_stateless()
+            self.gpu_count = current_platform.device_count()
             self.gpu_type, self.gpu_memory_per_device = cuda_get_device_properties(
                 0, ("name", "total_memory")
             )
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index 9b481d63990b..bf455c261f4f 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -34,3 +34,16 @@ def length_from_prompt_token_ids_or_embeds(
                 f" prompt_embeds={prompt_embeds_len}"
             )
         return prompt_token_len
+
+
+def is_moe_layer(module: torch.nn.Module) -> bool:
+    # TODO(bnell): Should use isinstance but can't due to circular dependencies.
+    def _check_bases(cls):
+        if cls.__name__ == "FusedMoE":
+            return True
+
+        for b in cls.__bases__:
+            if _check_bases(b):
+                return True
+
+    return _check_bases(module.__class__)
diff --git a/vllm/utils/argparse_utils.py b/vllm/utils/argparse_utils.py
index c48edb68f20a..84c853757192 100644
--- a/vllm/utils/argparse_utils.py
+++ b/vllm/utils/argparse_utils.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Argument parsing utilities for vLLM."""
 
+import argparse
 import json
 import sys
 import textwrap
@@ -25,6 +26,71 @@
 logger = init_logger(__name__)
 
 
+def human_readable_int(value: str) -> int:
+    """Parse human-readable integers like '1k', '2M', etc.
+    Including decimal values with decimal multipliers.
+
+    Examples:
+    - '1k' -> 1,000
+    - '1K' -> 1,024
+    - '25.6k' -> 25,600
+    """
+    value = value.strip()
+
+    match = re.fullmatch(r"(\d+(?:\.\d+)?)([kKmMgGtT])", value)
+    if match:
+        decimal_multiplier = {
+            "k": 10**3,
+            "m": 10**6,
+            "g": 10**9,
+            "t": 10**12,
+        }
+        binary_multiplier = {
+            "K": 2**10,
+            "M": 2**20,
+            "G": 2**30,
+            "T": 2**40,
+        }
+
+        number, suffix = match.groups()
+        if suffix in decimal_multiplier:
+            mult = decimal_multiplier[suffix]
+            return int(float(number) * mult)
+        elif suffix in binary_multiplier:
+            mult = binary_multiplier[suffix]
+            # Do not allow decimals with binary multipliers
+            try:
+                return int(number) * mult
+            except ValueError as e:
+                raise argparse.ArgumentTypeError(
+                    "Decimals are not allowed "
+                    f"with binary suffixes like {suffix}. Did you mean to use "
+                    f"{number}{suffix.lower()} instead?"
+                ) from e
+
+    # Regular plain number.
+    return int(value)
+
+
+def human_readable_int_or_auto(value: str) -> int:
+    """Parse human-readable integers like '1k', '2M', etc.
+    Including decimal values with decimal multipliers.
+    Also accepts -1 or 'auto' as a special value for auto-detection.
+
+    Examples:
+    - '1k' -> 1,000
+    - '1K' -> 1,024
+    - '25.6k' -> 25,600
+    - '-1' or 'auto' -> -1 (special value for auto-detection)
+    """
+    value = value.strip()
+
+    if value == "-1" or value.lower() == "auto":
+        return -1
+
+    return human_readable_int(value)
+
+
 class SortedHelpFormatter(ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter):
     """SortedHelpFormatter that sorts arguments by their option strings."""
 
@@ -192,7 +258,7 @@ def parse_args(  # type: ignore[override]
                     "With `vllm serve`, you should provide the model as a "
                     "positional argument or in a config file instead of via "
                     "the `--model` option. "
-                    "The `--model` option will be removed in v0.13."
+                    "The `--model` option will be removed in a future version."
                 )
 
                 if args[model_idx] == "--model":
@@ -338,7 +404,12 @@ def recursive_dict_update(
                 try:
                     value = json.loads(value_str)
                 except json.decoder.JSONDecodeError:
-                    value = value_str
+                    # Support human-readable suffixes (e.g. 1k, 80g) for
+                    # dotted config args like --config.field 80g
+                    try:
+                        value = human_readable_int(value_str)  # type: ignore[assignment]
+                    except (ValueError, ArgumentTypeError):
+                        value = value_str
 
                 # Merge all values with the same key into a single dict
                 arg_dict = create_nested_dict(keys, value)
diff --git a/vllm/utils/cpu_resource_utils.py b/vllm/utils/cpu_resource_utils.py
new file mode 100644
index 000000000000..6baf84266195
--- /dev/null
+++ b/vllm/utils/cpu_resource_utils.py
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+import os
+import subprocess
+import sys
+from dataclasses import dataclass
+from functools import cache
+
+import psutil
+import regex as re
+
+DEVICE_CONTROL_ENV_VAR = "CPU_VISIBLE_MEMORY_NODES"
+
+
+@dataclass
+class LogicalCPUInfo:
+    id: int = -1
+    physical_core: int = -1
+    numa_node: int = -1
+
+    @classmethod
+    def _int(cls, value: str) -> int:
+        try:
+            int_value = int(value)
+        except Exception:
+            int_value = -1
+        return int_value
+
+    @staticmethod
+    def json_decoder(obj_dict: dict):
+        id = obj_dict.get("cpu")
+        physical_core = obj_dict.get("core")
+        numa_node = obj_dict.get("node")
+
+        if not (id is None or physical_core is None or numa_node is None):
+            return LogicalCPUInfo(
+                id=LogicalCPUInfo._int(id),
+                physical_core=LogicalCPUInfo._int(physical_core),
+                numa_node=LogicalCPUInfo._int(numa_node),
+            )
+        else:
+            return obj_dict
+
+
+@dataclass
+class MemoryNodeInfo:
+    total_memory: int = -1
+    available_memory: int = -1
+
+
+def get_memory_affinity(pid: int = 0) -> list[int]:
+    pid = os.getpid() if pid == 0 else pid
+    path = f"/proc/{pid}/status"
+    with open(path) as f:
+        for line in f:
+            if line.startswith("Mems_allowed_list:"):
+                # Extract the string part (e.g., "0-1,3")
+                raw_list = line.split(":")[1].strip()
+                return parse_id_list(raw_list)
+    return []
+
+
+def parse_id_list(raw_str: str) -> list[int]:
+    """Parses strings like '0-2,4,7-8' into [0, 1, 2, 4, 7, 8]"""
+    result: list[int] = []
+    if not raw_str:
+        return result
+
+    for part in raw_str.split(","):
+        if "-" in part:
+            start, end = map(int, part.split("-"))
+            result.extend(range(start, end + 1))
+        else:
+            result.append(int(part))
+    return sorted(list(set(result)))
+
+
+def get_memory_node_info(node_id: int = 0) -> MemoryNodeInfo:
+    if sys.platform == "darwin":
+        # MacOS has no memory node
+        return MemoryNodeInfo(
+            total_memory=psutil.virtual_memory().total,
+            available_memory=psutil.virtual_memory().available,
+        )
+
+    meminfo_path = f"/sys/devices/system/node/node{node_id}/meminfo"
+    if not os.path.exists(meminfo_path):
+        # Non-NUMA systems (e.g. many RISC-V boards) don't expose per-node
+        # meminfo. Fall back to system-wide numbers from psutil.
+        vm = psutil.virtual_memory()
+        return MemoryNodeInfo(
+            total_memory=vm.total,
+            available_memory=vm.available,
+        )
+
+    meminfo = {}
+    with open(meminfo_path) as f:
+        for line in f:
+            # Each line looks like: "Node 0 MemTotal: 97421888 kB"
+            parts = line.split()
+            key = parts[2].rstrip(":")
+            # convert to Bytes
+            value = int(parts[3]) * 1024
+            meminfo[key] = value
+
+    total_memory = meminfo["MemTotal"]
+    free_memory = meminfo["MemFree"]
+    active_file_memory = meminfo["Active(file)"]
+    inactive_file_memory = meminfo["Inactive(file)"]
+    reclaimable_memory = meminfo["SReclaimable"]
+    available_memory = (
+        free_memory + active_file_memory + inactive_file_memory + reclaimable_memory
+    )
+
+    return MemoryNodeInfo(
+        total_memory=total_memory,
+        available_memory=available_memory,
+    )
+
+
+def get_allowed_cpu_list() -> list[LogicalCPUInfo]:
+    cpu_list = _get_cpu_list()
+    if sys.platform == "linux":
+        allowed = os.sched_getaffinity(0)
+        return [x for x in cpu_list if x.id in allowed]
+    return cpu_list
+
+
+def get_visible_memory_node() -> list[int]:
+    if sys.platform == "darwin":
+        return [0]
+
+    allowed_memory_node_list = get_memory_affinity()
+
+    env_key = DEVICE_CONTROL_ENV_VAR
+    if (
+        ("VLLM_CPU_SIM_MULTI_NUMA" not in os.environ)
+        and env_key in os.environ
+        and os.environ[env_key] != ""
+    ):
+        visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
+        visible_nodes = [
+            node for node in visible_nodes if node in allowed_memory_node_list
+        ]
+        return visible_nodes
+
+    return allowed_memory_node_list
+
+
+@cache
+def _synthesize_cpu_list() -> list[LogicalCPUInfo]:
+    """Synthesize a flat CPU list: each logical CPU is its own core on
+    NUMA node 0.  Used when lscpu output is unavailable or unparsable
+    (e.g. macOS, RISC-V)."""
+    cpu_count = os.cpu_count()
+    assert cpu_count
+    return [LogicalCPUInfo(i, i, 0) for i in range(cpu_count)]
+
+
+def _get_cpu_list() -> list[LogicalCPUInfo]:
+    if sys.platform == "darwin":
+        # For MacOS, no user-level CPU affinity and SMT, return all CPUs
+        return _synthesize_cpu_list()
+
+    lscpu_output = subprocess.check_output(
+        "lscpu --json --extended=CPU,CORE,NODE --online", shell=True, text=True
+    )
+
+    # For platforms without NUMA, map bare `-` node to 0 so non-NUMA
+    # systems keep the existing behavior from #39781.
+    lscpu_output = re.sub(r'"node":\s*-\s*(,|\n|\})', r'"node": 0\1', lscpu_output)
+
+    # On some architectures (notably RISC-V), lscpu also emits bare `-`
+    # for cpu/core.  Quote them so the JSON parses; they will decode to
+    # -1 and be filtered out below, triggering the synthesized fallback.
+    lscpu_output = re.sub(
+        r'("(?:cpu|core)":\s*)-\s*(,|\n|\})',
+        r'\1"-"\2',
+        lscpu_output,
+    )
+
+    logical_cpu_list: list[LogicalCPUInfo] = json.loads(
+        lscpu_output, object_hook=LogicalCPUInfo.json_decoder
+    )["cpus"]
+
+    # Filter CPUs with invalid attributes
+    logical_cpu_list = [
+        x for x in logical_cpu_list if -1 not in (x.id, x.physical_core, x.numa_node)
+    ]
+
+    # If lscpu returned no valid entries (e.g. RISC-V where all fields
+    # are bare `-`), fall back to synthesized topology.
+    if not logical_cpu_list:
+        logical_cpu_list = _synthesize_cpu_list()
+
+    return logical_cpu_list
diff --git a/vllm/utils/cpu_triton_utils.py b/vllm/utils/cpu_triton_utils.py
index d956dde8b071..ea0383a9d4b9 100644
--- a/vllm/utils/cpu_triton_utils.py
+++ b/vllm/utils/cpu_triton_utils.py
@@ -45,3 +45,292 @@ def _compute_slot_mapping_kernel_impl(
 
 
 compute_slot_mapping_kernel = _FuncWrapper(_compute_slot_mapping_kernel_impl)
+
+
+def _ensure_int64(t: torch.Tensor) -> torch.Tensor:
+    return t if t.dtype == torch.int64 else t.to(torch.int64)
+
+
+def _eagle_prepare_inputs_padded_kernel_impl(
+    cu_num_draft_tokens,
+    valid_sampled_tokens_count,
+    query_start_loc_gpu,
+    token_indices_to_sample,
+    num_rejected_tokens_gpu,
+    num_reqs,
+):
+    # C++ expects int64 for cu_num_draft_tokens, valid_sampled_tokens_count,
+    # and num_rejected_tokens_gpu, but Python allocates them as int32.
+    orig_rejected_dtype = num_rejected_tokens_gpu.dtype
+    rejected_i64 = (
+        num_rejected_tokens_gpu
+        if orig_rejected_dtype == torch.int64
+        else num_rejected_tokens_gpu.to(torch.int64)
+    )
+    torch.ops._C.eagle_prepare_inputs_padded_kernel_impl(
+        _ensure_int64(cu_num_draft_tokens),
+        _ensure_int64(valid_sampled_tokens_count),
+        query_start_loc_gpu,
+        token_indices_to_sample,
+        rejected_i64,
+        num_reqs,
+    )
+    if orig_rejected_dtype != torch.int64:
+        num_rejected_tokens_gpu.copy_(rejected_i64.to(orig_rejected_dtype))
+
+
+def _eagle_prepare_next_token_padded_kernel_impl(
+    sampled_token_ids,
+    discard_request_mask,
+    backup_next_token_ids,
+    next_token_ids,
+    valid_sampled_tokens_count,
+    vocab_size,
+    num_sampled_tokens_per_req,
+    num_reqs,
+    stride=None,
+    BLOCK_SIZE_TOKENS=None,
+):
+    # C++ reads all integer tensors as int64_t*. Output tensors are written
+    # in-place so we create int64 copies, call C++, and copy back.
+    orig_next_dtype = next_token_ids.dtype
+    orig_valid_dtype = valid_sampled_tokens_count.dtype
+    next_i64 = _ensure_int64(next_token_ids)
+    valid_i64 = _ensure_int64(valid_sampled_tokens_count)
+    torch.ops._C.eagle_prepare_next_token_padded_kernel_impl(
+        _ensure_int64(sampled_token_ids),
+        discard_request_mask,
+        _ensure_int64(backup_next_token_ids),
+        next_i64,
+        valid_i64,
+        vocab_size,
+        num_sampled_tokens_per_req,
+        num_reqs,
+    )
+    if orig_next_dtype != torch.int64:
+        next_token_ids.copy_(next_i64.to(orig_next_dtype))
+    if orig_valid_dtype != torch.int64:
+        valid_sampled_tokens_count.copy_(valid_i64.to(orig_valid_dtype))
+
+
+def _eagle_step_slot_mapping_metadata_kernel_impl(
+    positions,
+    block_table,
+    stride,
+    seq_lens,
+    out_clamped_positions,
+    out_slot_mapping,
+    block_size,
+    max_model_len,
+    n_blocks_per_req,
+    PAD_ID,
+    batch_size=None,
+):
+    assert batch_size is None or batch_size == positions.shape[0], (
+        f"batch_size mismatch: {batch_size} vs positions.shape[0]={positions.shape[0]}"
+    )
+    torch.ops._C.eagle_step_slot_mapping_metadata_kernel_impl(
+        positions,
+        block_table,
+        seq_lens,
+        out_clamped_positions,
+        out_slot_mapping,
+        block_size,
+        max_model_len,
+        PAD_ID,
+    )
+
+
+def _copy_and_expand_eagle_inputs_kernel_impl(
+    target_token_ids_ptr,
+    target_positions_ptr,
+    next_token_ids_ptr,
+    out_input_ids_ptr,
+    out_positions_ptr,
+    out_is_rejected_token_mask_ptr,
+    out_is_masked_token_mask_ptr,
+    out_new_token_indices_ptr,
+    out_hidden_state_mapping_ptr,
+    query_start_loc_ptr,
+    query_end_loc_ptr,
+    padding_token_id,
+    parallel_drafting_token_id,
+    total_input_tokens,
+    num_padding_slots_per_request,
+    shift_input_ids,
+    BLOCK_SIZE_TOKENS=None,
+    BLOCK_SIZE_REQS=None,
+):
+    """Adapter between Triton kernel call convention and C++ implementation.
+
+    The Triton kernel uses '_ptr' suffixed parameter names and compile-time
+    constants (BLOCK_SIZE_TOKENS, BLOCK_SIZE_REQS) which are not needed by
+    the C++ implementation. C++ reads token id tensors as int64_t*.
+    Output tensors that are int32 need copy-back after C++ writes int64.
+    """
+    orig_ids_dtype = out_input_ids_ptr.dtype
+    orig_pos_dtype = out_positions_ptr.dtype
+    out_ids_i64 = _ensure_int64(out_input_ids_ptr)
+    out_pos_i64 = _ensure_int64(out_positions_ptr)
+    torch.ops._C.copy_and_expand_eagle_inputs_kernel_impl(
+        _ensure_int64(target_token_ids_ptr),
+        _ensure_int64(target_positions_ptr),
+        _ensure_int64(next_token_ids_ptr),
+        out_ids_i64,
+        out_pos_i64,
+        out_is_rejected_token_mask_ptr,
+        out_is_masked_token_mask_ptr,
+        out_new_token_indices_ptr,
+        out_hidden_state_mapping_ptr,
+        query_start_loc_ptr,
+        query_end_loc_ptr,
+        padding_token_id,
+        parallel_drafting_token_id,
+        total_input_tokens,
+        num_padding_slots_per_request,
+        shift_input_ids,
+    )
+    if orig_ids_dtype != torch.int64:
+        out_input_ids_ptr.copy_(out_ids_i64.to(orig_ids_dtype))
+    if orig_pos_dtype != torch.int64:
+        out_positions_ptr.copy_(out_pos_i64.to(orig_pos_dtype))
+
+
+def _rejection_greedy_sample_kernel_impl(
+    output_token_ids,
+    cu_num_draft_tokens,
+    draft_token_ids,
+    target_argmax,
+    bonus_token_ids,
+    is_greedy,
+    max_spec_len,
+    uniform_probs=None,
+    synthetic_conditional_rates=None,
+    SYNTHETIC_MODE=False,
+):
+    # C++ kernel expects int64 for all integer tensors.
+    # Note: uniform_probs, synthetic_conditional_rates, and SYNTHETIC_MODE are
+    # passed by the rejection sampler for synthetic mode support, but are not
+    # yet implemented in the C++ CPU kernel. We accept them here to maintain
+    # compatibility with the kernel calling convention.
+    assert not SYNTHETIC_MODE, "Synthetic acceptance not supported with CPU sampling"
+    orig_dtype = output_token_ids.dtype
+    output_token_ids_i64 = _ensure_int64(output_token_ids)
+    torch.ops._C.rejection_greedy_sample_kernel_impl(
+        output_token_ids_i64,
+        _ensure_int64(cu_num_draft_tokens),
+        _ensure_int64(draft_token_ids),
+        _ensure_int64(target_argmax),
+        _ensure_int64(bonus_token_ids),
+        is_greedy,
+        max_spec_len,
+    )
+    if orig_dtype != torch.int64:
+        output_token_ids.copy_(output_token_ids_i64.to(orig_dtype))
+
+
+def _rejection_random_sample_kernel_impl(
+    output_token_ids,
+    cu_num_draft_tokens,
+    draft_token_ids,
+    draft_probs,
+    target_probs,
+    bonus_token_ids,
+    recovered_token_ids,
+    uniform_probs,
+    is_greedy,
+    max_spec_len,
+    vocab_size,
+    synthetic_conditional_rates=None,
+    NO_DRAFT_PROBS=False,
+    SYNTHETIC_MODE=False,
+):
+    # C++ kernel expects int64 for all integer tensors and float32 for probs.
+    # uniform_probs is intentionally float64 in Python to avoid exact-zero
+    # samples; cast to float32 here for C++ compatibility.
+    # Note: synthetic_conditional_rates and SYNTHETIC_MODE are passed by the
+    # rejection sampler for synthetic mode support, but are not yet implemented
+    # in the C++ CPU kernel. We accept them here to maintain compatibility with
+    # the kernel calling convention.
+    assert not SYNTHETIC_MODE, "Synthetic acceptance not supported with CPU sampling"
+    orig_dtype = output_token_ids.dtype
+    output_token_ids_i64 = _ensure_int64(output_token_ids)
+    torch.ops._C.rejection_random_sample_kernel_impl(
+        output_token_ids_i64,
+        _ensure_int64(cu_num_draft_tokens),
+        _ensure_int64(draft_token_ids),
+        draft_probs,
+        target_probs,
+        _ensure_int64(bonus_token_ids),
+        _ensure_int64(recovered_token_ids),
+        uniform_probs.to(torch.float32),
+        is_greedy,
+        max_spec_len,
+        vocab_size,
+        NO_DRAFT_PROBS,
+    )
+    if orig_dtype != torch.int64:
+        output_token_ids.copy_(output_token_ids_i64.to(orig_dtype))
+
+
+def _expand_kernel_impl(
+    output,
+    input_val,
+    cu_num_tokens,
+    replace_from,
+    replace_to,
+    MAX_NUM_TOKENS=None,
+):
+    torch.ops._C.expand_kernel_impl(
+        _ensure_int64(output),
+        _ensure_int64(input_val),
+        _ensure_int64(cu_num_tokens),
+        replace_from,
+        replace_to,
+    )
+
+
+def _sample_recovered_tokens_kernel_impl(
+    output_token_ids,
+    cu_num_draft_tokens,
+    draft_token_ids,
+    draft_probs,
+    target_probs,
+    inv_q,
+    vocab_size,
+    BLOCK_SIZE=None,
+    NO_DRAFT_PROBS=False,
+):
+    # C++ reads integer tensors as int64_t*; ensure correct dtype.
+    orig_dtype = output_token_ids.dtype
+    output_i64 = _ensure_int64(output_token_ids)
+    torch.ops._C.sample_recovered_tokens_kernel_impl(
+        output_i64,
+        _ensure_int64(cu_num_draft_tokens),
+        _ensure_int64(draft_token_ids),
+        draft_probs,
+        target_probs,
+        inv_q,
+        vocab_size,
+        NO_DRAFT_PROBS,
+    )
+    if orig_dtype != torch.int64:
+        output_token_ids.copy_(output_i64.to(orig_dtype))
+
+
+eagle_prepare_inputs_padded_kernel = _FuncWrapper(
+    _eagle_prepare_inputs_padded_kernel_impl
+)
+eagle_prepare_next_token_padded_kernel = _FuncWrapper(
+    _eagle_prepare_next_token_padded_kernel_impl
+)
+copy_and_expand_eagle_inputs_kernel = _FuncWrapper(
+    _copy_and_expand_eagle_inputs_kernel_impl
+)
+eagle_step_slot_mapping_metadata_kernel = _FuncWrapper(
+    _eagle_step_slot_mapping_metadata_kernel_impl
+)
+rejection_greedy_sample_kernel = _FuncWrapper(_rejection_greedy_sample_kernel_impl)
+rejection_random_sample_kernel = _FuncWrapper(_rejection_random_sample_kernel_impl)
+expand_kernel = _FuncWrapper(_expand_kernel_impl)
+sample_recovered_tokens_kernel = _FuncWrapper(_sample_recovered_tokens_kernel_impl)
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index abf25db16c78..6b89f5c33203 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -106,16 +106,14 @@ def is_deep_gemm_e8m0_used() -> bool:
     _lazy_init()
 
     if _fp8_gemm_nt_impl is None:
-        logger.info_once(
-            "DeepGEMM E8M0 disabled: _fp8_gemm_nt_impl not found", scope="local"
-        )
+        logger.info_once("DeepGEMM E8M0 disabled: _fp8_gemm_nt_impl not found")
         return False
 
     if envs.VLLM_USE_DEEP_GEMM_E8M0:
-        logger.info_once("DeepGEMM E8M0 enabled on current platform.", scope="local")
+        logger.info_once("DeepGEMM E8M0 enabled on current platform.")
         return True
 
-    logger.info_once("DeepGEMM E8M0 disabled on current configuration.", scope="local")
+    logger.info_once("DeepGEMM E8M0 disabled on current configuration.")
     return False
 
 
@@ -127,33 +125,79 @@ def _missing(*_: Any, **__: Any) -> NoReturn:
     )
 
 
+_cublaslt_gemm_nt_impl: Callable[..., Any] | None = None
 _fp8_gemm_nt_impl: Callable[..., Any] | None = None
+_fp8_einsum_impl: Callable[..., Any] | None = None
 _grouped_impl: Callable[..., Any] | None = None
 _grouped_masked_impl: Callable[..., Any] | None = None
-_fp8_mqa_logits_impl: Callable[..., Any] | None = None
-_fp8_paged_mqa_logits_impl: Callable[..., Any] | None = None
+_grouped_fp4_impl: Callable[..., Any] | None = None
+_fp8_fp4_mqa_logits_impl: Callable[..., Any] | None = None
+_fp8_fp4_paged_mqa_logits_impl: Callable[..., Any] | None = None
 _get_paged_mqa_logits_metadata_impl: Callable[..., Any] | None = None
+_tf32_hc_prenorm_gemm_impl: Callable[..., Any] | None = None
 _get_mn_major_tma_aligned_tensor_impl: Callable[..., Any] | None = None
 _get_mk_alignment_for_contiguous_layout_impl: Callable[..., Any] | None = None
 _transform_sf_into_required_layout_impl: Callable[..., Any] | None = None
 
 
+def _import_deep_gemm():
+    """Import the deep_gemm module.
+
+    Prefers an externally installed ``deep_gemm`` package (so users can
+    pin a specific version), then falls back to the vendored copy bundled
+    in the vLLM wheel.
+
+    Returns ``None`` when neither source is usable.
+    """
+    # 1. Try the external (pip-installed) package first.
+    try:
+        module = importlib.import_module("deep_gemm")
+        logger.debug_once("Imported deep_gemm module from site-packages")
+        return module
+    except ImportError:
+        logger.debug_once(
+            "deep_gemm not found in site-packages, "
+            "trying vendored vllm.third_party.deep_gemm"
+        )
+
+    # 2. Fall back to the vendored copy bundled in the vLLM wheel.
+    try:
+        module = importlib.import_module("vllm.third_party.deep_gemm")
+        logger.debug_once("Imported deep_gemm module from vllm.third_party.deep_gemm")
+        return module
+    except ImportError:
+        logger.debug_once("Vendored deep_gemm not found either")
+    except Exception as e:
+        # The vendored module may raise RuntimeError during _C.init()
+        # if JIT include files are missing (e.g. incomplete wheel).
+        logger.warning_once("Failed to import vendored deep_gemm: %s", e)
+
+    return None
+
+
 def _lazy_init() -> None:
     """Import deep_gemm and resolve symbols on first use."""
-    global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl
-    global _fp8_mqa_logits_impl, _fp8_paged_mqa_logits_impl
+    global _cublaslt_gemm_nt_impl
+    global _fp8_gemm_nt_impl, _fp8_einsum_impl
+    global _grouped_impl, _grouped_masked_impl, _grouped_fp4_impl
+    global _fp8_fp4_mqa_logits_impl, _fp8_fp4_paged_mqa_logits_impl
     global _get_paged_mqa_logits_metadata_impl
+    global _tf32_hc_prenorm_gemm_impl
     global _get_mn_major_tma_aligned_tensor_impl
     global _get_mk_alignment_for_contiguous_layout_impl
     global _transform_sf_into_required_layout_impl
     # fast path
     if (
-        _fp8_gemm_nt_impl is not None
+        _cublaslt_gemm_nt_impl is not None
+        or _fp8_gemm_nt_impl is not None
+        or _fp8_einsum_impl is not None
         or _grouped_impl is not None
         or _grouped_masked_impl is not None
-        or _fp8_mqa_logits_impl is not None
-        or _fp8_paged_mqa_logits_impl is not None
+        or _grouped_fp4_impl is not None
+        or _fp8_fp4_mqa_logits_impl is not None
+        or _fp8_fp4_paged_mqa_logits_impl is not None
         or _get_paged_mqa_logits_metadata_impl is not None
+        or _tf32_hc_prenorm_gemm_impl is not None
         or _get_mk_alignment_for_contiguous_layout_impl is not None
         or _transform_sf_into_required_layout_impl is not None
     ):
@@ -169,16 +213,24 @@ def _lazy_init() -> None:
             envs.VLLM_CACHE_ROOT, "deep_gemm"
         )
 
-    _dg = importlib.import_module("deep_gemm")
+    _dg = _import_deep_gemm()
+    if _dg is None:
+        return
 
+    _cublaslt_gemm_nt_impl = getattr(_dg, "cublaslt_gemm_nt", None)
     _fp8_gemm_nt_impl = getattr(_dg, "fp8_gemm_nt", None)
+    _fp8_einsum_impl = getattr(_dg, "fp8_einsum", None)
     _grouped_impl = getattr(_dg, "m_grouped_fp8_gemm_nt_contiguous", None)
     _grouped_masked_impl = getattr(_dg, "fp8_m_grouped_gemm_nt_masked", None)
-    _fp8_mqa_logits_impl = getattr(_dg, "fp8_mqa_logits", None)
-    _fp8_paged_mqa_logits_impl = getattr(_dg, "fp8_paged_mqa_logits", None)
+    _grouped_fp4_impl = getattr(_dg, "m_grouped_fp8_fp4_gemm_nt_contiguous", None)
+    # DeepGEMM exposes fp8_fp4_*_mqa_logits as the canonical symbols that
+    # handle both the FP8 and FP4 Q/K paths via a tuple-typed `q`.
+    _fp8_fp4_mqa_logits_impl = getattr(_dg, "fp8_fp4_mqa_logits", None)
+    _fp8_fp4_paged_mqa_logits_impl = getattr(_dg, "fp8_fp4_paged_mqa_logits", None)
     _get_paged_mqa_logits_metadata_impl = getattr(
         _dg, "get_paged_mqa_logits_metadata", None
     )
+    _tf32_hc_prenorm_gemm_impl = getattr(_dg, "tf32_hc_prenorm_gemm", None)
     _get_mn_major_tma_aligned_tensor_impl = getattr(
         _dg, "get_mn_major_tma_aligned_tensor", None
     )
@@ -193,8 +245,18 @@ def _lazy_init() -> None:
 
 def get_num_sms() -> int:
     _lazy_init()
-    _dg = importlib.import_module("deep_gemm")
-    return int(_dg.get_num_sms())
+    dg = _import_deep_gemm()
+    if dg is None:
+        raise RuntimeError("DeepGEMM is not available")
+    return int(dg.get_num_sms())
+
+
+def set_num_sms(num_sms: int) -> None:
+    _lazy_init()
+    dg = _import_deep_gemm()
+    if dg is None:
+        raise RuntimeError("DeepGEMM is not available")
+    dg.set_num_sms(num_sms)
 
 
 @functools.cache
@@ -214,6 +276,13 @@ def get_col_major_tma_aligned_tensor(x: torch.Tensor) -> torch.Tensor:
     return _get_mn_major_tma_aligned_tensor_impl(x)
 
 
+def cublaslt_gemm_nt(*args, **kwargs):
+    _lazy_init()
+    if _cublaslt_gemm_nt_impl is None:
+        return _missing(*args, **kwargs)
+    return _cublaslt_gemm_nt_impl(*args, **kwargs)
+
+
 def fp8_gemm_nt(*args, **kwargs):
     _lazy_init()
     if _fp8_gemm_nt_impl is None:
@@ -226,6 +295,13 @@ def fp8_gemm_nt(*args, **kwargs):
     return _fp8_gemm_nt_impl(*args, disable_ue8m0_cast=not use_ue8m0, **kwargs)
 
 
+def fp8_einsum(*args, **kwargs):
+    _lazy_init()
+    if _fp8_einsum_impl is None:
+        return _missing(*args, **kwargs)
+    return _fp8_einsum_impl(*args, **kwargs)
+
+
 def m_grouped_fp8_gemm_nt_contiguous(*args, **kwargs):
     _lazy_init()
     if _grouped_impl is None:
@@ -235,6 +311,15 @@ def m_grouped_fp8_gemm_nt_contiguous(*args, **kwargs):
     )
 
 
+def m_grouped_fp8_fp4_gemm_nt_contiguous(*args, **kwargs):
+    _lazy_init()
+    if _grouped_fp4_impl is None:
+        return _missing(*args, **kwargs)
+    return _grouped_fp4_impl(
+        *args, disable_ue8m0_cast=not is_deep_gemm_e8m0_used(), **kwargs
+    )
+
+
 def fp8_m_grouped_gemm_nt_masked(*args, **kwargs):
     _lazy_init()
     if _grouped_masked_impl is None:
@@ -253,37 +338,48 @@ def transform_sf_into_required_layout(*args, **kwargs):
     )
 
 
-def fp8_mqa_logits(
-    q: torch.Tensor,
+def fp8_fp4_mqa_logits(
+    q: tuple[torch.Tensor, torch.Tensor | None],
     kv: tuple[torch.Tensor, torch.Tensor],
     weights: torch.Tensor,
     cu_seqlen_ks: torch.Tensor,
     cu_seqlen_ke: torch.Tensor,
     clean_logits: bool,
 ) -> torch.Tensor:
-    """Compute FP8 MQA logits for a single sequence without KV paging.
+    """Compute MQA logits for a single sequence without KV paging.
+
+    Unified FP8/FP4 dispatch — the underlying DeepGEMM kernel takes
+    ``q = (values, scales_or_None)`` where ``scales`` is None for FP8 Q
+    (per-token scale is folded into ``weights``) and a packed block-scale
+    tensor for MXFP4 Q.
 
     Args:
-        q: Query tensor of shape [M, H, D]. Casted to
-            `torch.float8_e4m3fn` by caller.
-        kv: Tuple `(k_fp8, k_scales)` where `k_fp8` has shape [N, D] with
-            dtype `torch.float8_e4m3fn` and `k_scales` has shape [N])
-            with dtype `torch.float32`.
+        q: Tuple ``(q_values, q_scale)``. FP8 path: q_values is [M, H, D]
+            float8_e4m3fn and q_scale is None (per-token scale is folded
+            into ``weights``). FP4 path: q_values is packed uint8 and
+            q_scale is the companion block-scale tensor.
+        kv: Tuple `(k_packed, k_scales)` — FP8 layout is [N, D]
+            float8_e4m3fn plus fp32 scales [N]; FP4 layout is packed uint8.
         weights: weights of shape [M, H], dtype `torch.float32`.
-        cu_seqlen_ks: Start indices (inclusive) for valid K per query position,
-            shape [M], dtype int32.
-        cu_seqlen_ke: End indices (exclusive) for valid K per query position,
-            shape [M], dtype int32.
+        cu_seqlen_ks: Start indices (inclusive) for valid K per query
+            position, shape [M], dtype int32.
+        cu_seqlen_ke: End indices (exclusive) for valid K per query
+            position, shape [M], dtype int32.
         clean_logits: Whether to clean the unfilled logits into `-inf`.
 
     Returns:
         Logits tensor of shape [M, N], dtype `torch.float32`.
     """
     _lazy_init()
-    if _fp8_mqa_logits_impl is None:
+    if _fp8_fp4_mqa_logits_impl is None:
         return _missing()
-    return _fp8_mqa_logits_impl(
-        q, kv, weights, cu_seqlen_ks, cu_seqlen_ke, clean_logits=clean_logits
+    return _fp8_fp4_mqa_logits_impl(
+        q,
+        kv,
+        weights,
+        cu_seqlen_ks,
+        cu_seqlen_ke,
+        clean_logits=clean_logits,
     )
 
 
@@ -299,7 +395,7 @@ def get_paged_mqa_logits_metadata(
         num_sms: Number of SMs available. 132 for Hopper
 
     Returns:
-        Backend-specific tensor consumed by `fp8_paged_mqa_logits` to
+        Backend-specific tensor consumed by `fp8_fp4_paged_mqa_logits` to
         schedule work across SMs.
     """
     _lazy_init()
@@ -308,9 +404,9 @@ def get_paged_mqa_logits_metadata(
     return _get_paged_mqa_logits_metadata_impl(context_lens, block_size, num_sms)
 
 
-def fp8_paged_mqa_logits(
-    q_fp8: torch.Tensor,
-    kv_cache_fp8: torch.Tensor,
+def fp8_fp4_paged_mqa_logits(
+    q: tuple[torch.Tensor, torch.Tensor | None],
+    kv_cache: torch.Tensor,
     weights: torch.Tensor,
     context_lens: torch.Tensor,
     block_tables: torch.Tensor,
@@ -318,14 +414,20 @@ def fp8_paged_mqa_logits(
     max_model_len: int,
     clean_logits: bool,
 ) -> torch.Tensor:
-    """Compute FP8 MQA logits using paged KV-cache.
+    """Compute MQA logits using a paged KV-cache.
+
+    Unified FP8/FP4 dispatch — the underlying DeepGEMM kernel takes
+    ``q = (values, scales_or_None)``; pass ``(q_tensor, None)`` for the FP8
+    path and ``(q_values, q_scale)`` for MXFP4.
 
     Args:
-        q_fp8: Query tensor of shape [B, next_n, H, D]. Casted to
-            `torch.float8_e4m3fn` by caller.
-        kv_cache_fp8: Paged KV-cache in packed FP8+scale layout with shape
-            [num_blocks, block_size, 1, D+4], dtype `torch.uint8`. The last
-            4 bytes per (block,pos) store the `float` dequant scale.
+        q: Tuple ``(q_values, q_scale)``. FP8 path: q_values is
+            [B, next_n, H, D] float8_e4m3fn and q_scale is None. FP4 path:
+            q_values is packed uint8 and q_scale is the companion
+            block-scale tensor.
+        kv_cache: Paged KV-cache. FP8 layout is [num_blocks, block_size, 1,
+            D+4], dtype `torch.uint8`, with the last 4 bytes per (block, pos)
+            storing the float dequant scale.
         weights: Tensor of shape [B * next_n, H], dtype `torch.float32`.
         context_lens: Tensor of shape [B], dtype int32; effective context length
             for each batch element.
@@ -341,11 +443,11 @@ def fp8_paged_mqa_logits(
         `torch.float32`.
     """
     _lazy_init()
-    if _fp8_paged_mqa_logits_impl is None:
+    if _fp8_fp4_paged_mqa_logits_impl is None:
         return _missing()
-    return _fp8_paged_mqa_logits_impl(
-        q_fp8,
-        kv_cache_fp8,
+    return _fp8_fp4_paged_mqa_logits_impl(
+        q,
+        kv_cache,
         weights,
         context_lens,
         block_tables,
@@ -355,6 +457,32 @@ def fp8_paged_mqa_logits(
     )
 
 
+def tf32_hc_prenorm_gemm(
+    x: torch.Tensor,
+    fn: torch.Tensor,
+    out: torch.Tensor,
+    sqrsum: torch.Tensor,
+    num_split: int,
+) -> torch.Tensor:
+    """
+    Perform the following computation:
+        out = x.float() @ fn.T
+        sqrsum = x.float().square().sum(-1)
+
+    See the caller function for shape requirement
+    """
+    _lazy_init()
+    if _tf32_hc_prenorm_gemm_impl is None:
+        return _missing()
+    return _tf32_hc_prenorm_gemm_impl(
+        x,
+        fn,
+        out,
+        sqrsum,
+        num_split,
+    )
+
+
 def _ceil_to_ue8m0(x: torch.Tensor):
     return torch.pow(2.0, torch.ceil(torch.log2(x.abs())))
 
@@ -413,7 +541,7 @@ def calc_diff(x: torch.Tensor, y: torch.Tensor):
 
 def should_use_deepgemm_for_fp8_linear(
     output_dtype: torch.dtype,
-    weight: torch.Tensor,
+    weight_shape: tuple[int, int],
     supports_deep_gemm: bool | None = None,
 ):
     if supports_deep_gemm is None:
@@ -428,8 +556,8 @@ def should_use_deepgemm_for_fp8_linear(
     return (
         supports_deep_gemm
         and output_dtype == torch.bfloat16
-        and weight.shape[0] % N_MULTIPLE == 0
-        and weight.shape[1] % K_MULTIPLE == 0
+        and weight_shape[0] % N_MULTIPLE == 0
+        and weight_shape[1] % K_MULTIPLE == 0
     )
 
 
@@ -437,15 +565,18 @@ def should_use_deepgemm_for_fp8_linear(
     "calc_diff",
     "DeepGemmQuantScaleFMT",
     "fp8_gemm_nt",
+    "fp8_einsum",
     "m_grouped_fp8_gemm_nt_contiguous",
+    "m_grouped_fp8_fp4_gemm_nt_contiguous",
     "fp8_m_grouped_gemm_nt_masked",
-    "fp8_mqa_logits",
-    "fp8_paged_mqa_logits",
+    "fp8_fp4_mqa_logits",
+    "fp8_fp4_paged_mqa_logits",
     "get_paged_mqa_logits_metadata",
     "per_block_cast_to_fp8",
     "is_deep_gemm_e8m0_used",
     "is_deep_gemm_supported",
     "get_num_sms",
+    "set_num_sms",
     "should_use_deepgemm_for_fp8_linear",
     "get_col_major_tma_aligned_tensor",
     "get_mk_alignment_for_contiguous_layout",
diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py
index 065a9ca894d1..f7ed180a7300 100644
--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -20,6 +20,7 @@
 import vllm.envs as envs
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
+from vllm.utils.math_utils import cdiv
 
 logger = init_logger(__name__)
 
@@ -128,6 +129,15 @@ def wrapper(*args, **kwargs):
 nvfp4_block_scale_interleave = _lazy_import_wrapper(
     "flashinfer.fp4_quantization", "block_scale_interleave"
 )
+flashinfer_cute_dsl_fused_moe_nvfp4 = _lazy_import_wrapper(
+    "flashinfer", "cute_dsl_fused_moe_nvfp4"
+)
+flashinfer_convert_sf_to_mma_layout = _lazy_import_wrapper(
+    "flashinfer.cute_dsl.utils", "convert_sf_to_mma_layout"
+)
+flashinfer_b12x_fused_moe = _lazy_import_wrapper(
+    "flashinfer.fused_moe", "b12x_fused_moe"
+)
 trtllm_fp4_block_scale_moe = _lazy_import_wrapper(
     "flashinfer", "trtllm_fp4_block_scale_moe"
 )
@@ -137,7 +147,6 @@ def wrapper(*args, **kwargs):
     "autotune",
     fallback_fn=lambda *args, **kwargs: contextlib.nullcontext(),
 )
-_is_fi_autotuning: bool = False
 
 
 @functools.cache
@@ -202,6 +211,7 @@ def has_flashinfer_trtllm_fused_moe() -> bool:
         ("flashinfer.fused_moe", "trtllm_fp8_per_tensor_scale_moe"),
         ("flashinfer.fused_moe", "trtllm_fp4_block_scale_moe"),
         ("flashinfer.fused_moe", "trtllm_mxint4_block_scale_moe"),
+        ("flashinfer.fused_moe", "trtllm_bf16_moe"),
     ]
     for module_name, attr_name in required_functions:
         mod = _get_submodule(module_name)
@@ -251,6 +261,48 @@ def has_flashinfer_cutedsl_grouped_gemm_nt_masked() -> bool:
     return True
 
 
+@functools.cache
+def has_flashinfer_cutedsl_moe_nvfp4() -> bool:
+    """Return ``True`` if FlashInfer cute_dsl_fused_moe_nvfp4 is available."""
+    if not has_flashinfer_cutedsl():
+        return False
+    mod = _get_submodule("flashinfer")
+    return mod is not None and hasattr(mod, "cute_dsl_fused_moe_nvfp4")
+
+
+@functools.cache
+def has_flashinfer_b12x_gemm() -> bool:
+    """Return True if FlashInfer b12x FP4 GEMM backend is available (SM120+)."""
+    if not has_flashinfer_cutedsl():
+        return False
+    mod = _get_submodule("flashinfer.gemm")
+    if mod is None:
+        return False
+    # FlashInfer 0.6.11 renamed Sm120BlockScaledDenseGemmKernel ->
+    # Sm120B12xBlockScaledDenseGemmKernel (commit 223f2a49). Accept either.
+    return hasattr(mod, "Sm120B12xBlockScaledDenseGemmKernel") or hasattr(
+        mod, "Sm120BlockScaledDenseGemmKernel"
+    )
+
+
+@functools.cache
+def has_flashinfer_b12x_moe() -> bool:
+    """Return ``True`` if FlashInfer CuteDSL SM12x fused MoE is available."""
+    if not has_flashinfer_moe():
+        return False
+
+    required_functions = [
+        ("flashinfer.fused_moe", "b12x_fused_moe"),
+        ("flashinfer.cute_dsl.utils", "convert_sf_to_mma_layout"),
+    ]
+
+    for module_name, attr_name in required_functions:
+        mod = _get_submodule(module_name)
+        if not mod or not hasattr(mod, attr_name):
+            return False
+    return True
+
+
 @functools.cache
 def has_nvidia_artifactory() -> bool:
     """Return `True` if NVIDIA's artifactory is accessible.
@@ -464,6 +516,8 @@ def flashinfer_mm_fp4(
         dtype: torch.dtype,
         use_8x4_sf_layout: bool,
         backend: str,
+        block_size: int = 16,
+        use_nvfp4: bool = True,
     ) -> torch.Tensor:
         from flashinfer import mm_fp4 as flashinfer_mm_fp4_
 
@@ -474,8 +528,9 @@ def flashinfer_mm_fp4(
             B_scale,
             g_scale,
             dtype,
-            block_size=16,
+            block_size=block_size,
             use_8x4_sf_layout=use_8x4_sf_layout,
+            use_nvfp4=use_nvfp4,
             backend=backend,
         )
 
@@ -491,9 +546,36 @@ def flashinfer_mm_fp4_fake(
         dtype: torch.dtype,
         use_8x4_sf_layout: bool,
         backend: str,
+        block_size: int = 16,
+        use_nvfp4: bool = True,
     ) -> torch.Tensor:
         return torch.empty(A.shape[0], B.shape[1], dtype=dtype, device=A.device)
 
+    @torch.library.custom_op(
+        "vllm::flashinfer_mxfp4_quantize",
+        mutates_args=[],
+        device_types="cuda",
+    )
+    def flashinfer_mxfp4_quantize(
+        a: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        from flashinfer import mxfp4_quantize as _mxfp4_quantize
+
+        return _mxfp4_quantize(a)
+
+    @torch.library.register_fake("vllm::flashinfer_mxfp4_quantize")
+    def flashinfer_mxfp4_quantize_fake(
+        a: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        m, k = a.shape
+        sf_vec_size = 32
+        padded_m = cdiv(m, 128) * 128
+        sf_cols = cdiv(k // sf_vec_size, 4) * 4
+        return (
+            torch.empty(m, k // 2, dtype=torch.uint8, device=a.device),
+            torch.empty(padded_m, sf_cols, dtype=torch.uint8, device=a.device),
+        )
+
     @torch.library.custom_op(
         "vllm::bmm_fp8",
         mutates_args=[],
@@ -642,15 +724,20 @@ def flashinfer_scaled_fp4_mm(
     b: torch.Tensor,
     block_scale_a: torch.Tensor,
     block_scale_b: torch.Tensor,
-    alpha: torch.Tensor,
+    alpha: torch.Tensor | None,
     out_dtype: torch.dtype,
     backend: str,
+    block_size: int = 16,
+    use_nvfp4: bool = True,
 ) -> torch.Tensor:
     assert a.ndim == 2 and b.ndim == 2
     assert block_scale_a.ndim == 2 and block_scale_b.ndim == 2
     assert a.stride(-1) == 1 and b.stride(-1) == 1
     assert a.shape[1] == b.shape[1]
 
+    if alpha is None:
+        alpha = torch.ones(1, dtype=torch.float32, device=a.device)
+
     if backend in ("cutlass", "cudnn"):
         block_scale_a = block_scale_a.view(torch.uint8)
         block_scale_b = block_scale_b.view(torch.uint8)
@@ -666,9 +753,52 @@ def flashinfer_scaled_fp4_mm(
         out_dtype,
         use_8x4_sf_layout=use_8x4_sf_layout,
         backend=backend,
+        block_size=block_size,
+        use_nvfp4=use_nvfp4,
     )
 
 
+def flashinfer_scaled_fp4_mm_out(
+    a: torch.Tensor,
+    b: torch.Tensor,
+    block_scale_a: torch.Tensor,
+    block_scale_b: torch.Tensor,
+    alpha: torch.Tensor,
+    out: torch.Tensor,
+    out_dtype: torch.dtype | None,
+    use_8x4_sf_layout: bool,
+    backend: str,
+) -> torch.Tensor:
+    assert a.ndim == 2 and b.ndim == 2 and out.ndim == 2
+    assert block_scale_a.ndim == 2 and block_scale_b.ndim == 2
+    assert a.stride(-1) == 1
+    assert a.shape[1] == b.shape[0]
+    assert out.shape == (a.shape[0], b.shape[1])
+    assert out.device.type == "cuda"
+
+    if backend in ("cutlass", "cudnn"):
+        if block_scale_a.dtype != torch.uint8:
+            block_scale_a = block_scale_a.view(torch.uint8)
+        if block_scale_b.dtype != torch.uint8:
+            block_scale_b = block_scale_b.view(torch.uint8)
+
+    from flashinfer import mm_fp4 as flashinfer_mm_fp4_
+
+    flashinfer_mm_fp4_(
+        a,
+        b,
+        block_scale_a,
+        block_scale_b,
+        alpha,
+        out_dtype or out.dtype,
+        out=out,
+        block_size=16,
+        use_8x4_sf_layout=use_8x4_sf_layout,
+        backend=backend,
+    )
+    return out
+
+
 def flashinfer_scaled_fp8_mm(
     a: torch.Tensor,
     b: torch.Tensor,
@@ -699,6 +829,38 @@ def flashinfer_scaled_fp8_mm(
     return output
 
 
+def flashinfer_scaled_fp8_mm_out(
+    a: torch.Tensor,
+    b: torch.Tensor,
+    scale_a: torch.Tensor,
+    scale_b: torch.Tensor,
+    out: torch.Tensor,
+    out_dtype: torch.dtype | None = None,
+) -> torch.Tensor:
+    assert a.ndim == 2 and b.ndim == 2 and out.ndim == 2
+    assert a.shape[1] == b.shape[0]
+    assert out.shape == (a.shape[0], b.shape[1])
+    assert scale_a.numel() == 1 and scale_b.numel() == 1
+    assert a.dtype == torch.float8_e4m3fn and b.dtype == torch.float8_e4m3fn
+    assert out.device.type == "cuda"
+    assert a.is_contiguous()
+
+    from flashinfer import bmm_fp8 as bmm_fp8_
+
+    bmm_fp8_(
+        a.unsqueeze(0),
+        # FlashInfer expects the weight in the same column-major view layout
+        # consumed by flashinfer_scaled_fp8_mm, so keep the transposed view.
+        b.unsqueeze(0),
+        scale_a,
+        scale_b,
+        out_dtype or out.dtype,
+        out.unsqueeze(0),
+        "auto",
+    )
+    return out
+
+
 def flashinfer_quant_nvfp4_8x4_sf_layout(
     a: torch.Tensor, a_global_sf: torch.Tensor
 ) -> tuple[torch.Tensor, torch.Tensor]:
@@ -732,8 +894,9 @@ def is_flashinfer_fp8_blockscale_gemm_supported() -> bool:
 def should_use_flashinfer_for_blockscale_fp8_gemm(
     is_flashinfer_supported: bool,
     output_dtype: torch.dtype,
-    input: torch.Tensor,
-    weight: torch.Tensor,
+    input_dtype: torch.dtype,
+    weight_dtype: torch.dtype,
+    weight_shape: tuple[int, int],
 ):
     if not is_flashinfer_supported:
         return False
@@ -744,20 +907,51 @@ def should_use_flashinfer_for_blockscale_fp8_gemm(
     N_MULTIPLE = 64
     K_MULTIPLE = 128
 
-    weight_dtype = weight.dtype
-    input_dtype = input.dtype
-
     should_use_flashinfer = (
         output_dtype == torch.bfloat16
         and input_dtype == torch.bfloat16
         and weight_dtype == torch.float8_e4m3fn
-        and weight.shape[0] % N_MULTIPLE == 0
-        and weight.shape[1] % K_MULTIPLE == 0
+        and weight_shape[0] % N_MULTIPLE == 0
+        and weight_shape[1] % K_MULTIPLE == 0
     )
 
     return should_use_flashinfer
 
 
+_MIN_CUDNN_FP8 = 91701  # cuDNN >= 9.17.1 required for FP8 attention
+
+
+@functools.cache
+def is_flashinfer_cudnn_fp8_prefill_attn_supported() -> bool:
+    """Check if FP8 ViT attention is supported on this platform.
+
+    Requires native FP8 hardware support, the FlashInfer cuDNN backend,
+    and cuDNN >= 9.17.1.
+    """
+    from vllm.v1.attention.backends.registry import AttentionBackendEnum
+
+    # cuDNN SDPA FP8 requires Hopper (SM 90) or newer.
+    if not current_platform.has_device_capability(90):
+        return False
+
+    try:
+        supported = current_platform.get_supported_vit_attn_backends()
+        if AttentionBackendEnum.FLASHINFER not in supported:
+            return False
+    except (ImportError, AttributeError):
+        return False
+
+    try:
+        import torch.backends.cudnn as cudnn
+
+        if cudnn.is_available() and cudnn.version() < _MIN_CUDNN_FP8:
+            return False
+    except (ImportError, AttributeError):
+        pass
+
+    return True
+
+
 __all__ = [
     "has_flashinfer",
     "flashinfer_trtllm_fp8_block_scale_moe",
@@ -767,6 +961,9 @@ def should_use_flashinfer_for_blockscale_fp8_gemm(
     "silu_and_mul_scaled_nvfp4_experts_quantize",
     "scaled_fp4_grouped_quantize",
     "nvfp4_block_scale_interleave",
+    "flashinfer_cute_dsl_fused_moe_nvfp4",
+    "flashinfer_b12x_fused_moe",
+    "flashinfer_convert_sf_to_mma_layout",
     "trtllm_fp4_block_scale_moe",
     "autotune",
     "has_flashinfer_moe",
@@ -775,15 +972,22 @@ def should_use_flashinfer_for_blockscale_fp8_gemm(
     "has_flashinfer_nvlink_one_sided",
     "has_flashinfer_cutlass_fused_moe",
     "has_flashinfer_cutedsl_grouped_gemm_nt_masked",
+    "has_flashinfer_cutedsl_moe_nvfp4",
+    "has_flashinfer_b12x_moe",
+    "has_flashinfer_b12x_gemm",
     "has_flashinfer_fp8_blockscale_gemm",
     "has_nvidia_artifactory",
     "supports_trtllm_attention",
     "can_use_trtllm_attention",
     "use_trtllm_attention",
+    "flashinfer_mxfp4_quantize",
     "flashinfer_scaled_fp4_mm",
+    "flashinfer_scaled_fp4_mm_out",
     "flashinfer_scaled_fp8_mm",
+    "flashinfer_scaled_fp8_mm_out",
     "flashinfer_quant_nvfp4_8x4_sf_layout",
     "flashinfer_fp8_blockscale_gemm",
     "should_use_flashinfer_for_blockscale_fp8_gemm",
     "is_flashinfer_fp8_blockscale_gemm_supported",
+    "is_flashinfer_cudnn_fp8_prefill_attn_supported",
 ]
diff --git a/vllm/utils/func_utils.py b/vllm/utils/func_utils.py
index 82eab043b0db..5ce23e6a0074 100644
--- a/vllm/utils/func_utils.py
+++ b/vllm/utils/func_utils.py
@@ -45,16 +45,14 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> None:
 
 
 @lru_cache
-def supports_kw(
+def _supports_kw(
     callable: Callable[..., object],
     kw_name: str,
     *,
     requires_kw_only: bool = False,
     allow_var_kwargs: bool = True,
 ) -> bool:
-    """Check if a keyword is a valid kwarg for a callable; if requires_kw_only
-    disallows kwargs names that can also be positional arguments.
-    """
+    """Internal cached implementation of supports_kw."""
     params = inspect.signature(callable).parameters
     if not params:
         return False
@@ -99,6 +97,29 @@ def supports_kw(
     return False
 
 
+def supports_kw(
+    callable: Callable[..., object],
+    kw_name: str,
+    *,
+    requires_kw_only: bool = False,
+    allow_var_kwargs: bool = True,
+) -> bool:
+    """Check if a keyword is a valid kwarg for a callable; if requires_kw_only
+    disallows kwargs names that can also be positional arguments.
+    """
+    # Unwrap bound methods so that the lru_cache key is the underlying
+    # function, not the instance. Caching bound methods pins the object
+    # (and all its GPU tensors) for the lifetime of the cache.
+    if hasattr(callable, "__func__"):
+        callable = callable.__func__
+    return _supports_kw(
+        callable,
+        kw_name,
+        requires_kw_only=requires_kw_only,
+        allow_var_kwargs=allow_var_kwargs,
+    )
+
+
 def get_allowed_kwarg_only_overrides(
     callable: Callable[..., object],
     overrides: Mapping[str, object] | None,
diff --git a/vllm/utils/import_utils.py b/vllm/utils/import_utils.py
index e7f966b275e2..5822e5840afc 100644
--- a/vllm/utils/import_utils.py
+++ b/vllm/utils/import_utils.py
@@ -66,14 +66,12 @@ def import_triton_kernels():
 
         logger.debug_once(
             f"Loading module triton_kernels from {triton_kernels.__file__}.",
-            scope="local",
         )
     elif _has_module("vllm.third_party.triton_kernels"):
         import vllm.third_party.triton_kernels as triton_kernels
 
         logger.debug_once(
             f"Loading module triton_kernels from {triton_kernels.__file__}.",
-            scope="local",
         )
         sys.modules["triton_kernels"] = triton_kernels
     else:
@@ -408,8 +406,13 @@ def has_deep_ep() -> bool:
 
 
 def has_deep_gemm() -> bool:
-    """Whether the optional `deep_gemm` package is available."""
-    return _has_module("deep_gemm")
+    """Whether the optional `deep_gemm` package is available.
+
+    Prefers an externally installed ``deep_gemm`` package (so users can
+    override with a newer version), then falls back to the vendored copy
+    bundled in the vLLM wheel.
+    """
+    return _has_module("deep_gemm") or _has_module("vllm.third_party.deep_gemm")
 
 
 def has_nixl_ep() -> bool:
@@ -461,3 +464,13 @@ def has_aiter() -> bool:
 def has_mori() -> bool:
     """Whether the optional `mori` package is available."""
     return _has_module("mori")
+
+
+def has_fbgemm_gpu() -> bool:
+    """Whether the optional `fbgemm_gpu` package is available."""
+    return _has_module("fbgemm_gpu")
+
+
+def has_cutedsl() -> bool:
+    """Whether the optional `cutelass` package is available."""
+    return _has_module("cutlass")
diff --git a/vllm/utils/mem_constants.py b/vllm/utils/mem_constants.py
index 62b725fbb0f2..b2f9a037c526 100644
--- a/vllm/utils/mem_constants.py
+++ b/vllm/utils/mem_constants.py
@@ -1,5 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+KB_bytes = 1_000
+"""The number of bytes in one kilobyte (KB)."""
+
+KiB_bytes = 1 << 10
+"""The number of bytes in one kibibyte (KiB)."""
+
 MB_bytes = 1_000_000
 """The number of bytes in one megabyte (MB)."""
 
diff --git a/vllm/utils/mem_utils.py b/vllm/utils/mem_utils.py
index e6a60a0c1377..4efb29975af8 100644
--- a/vllm/utils/mem_utils.py
+++ b/vllm/utils/mem_utils.py
@@ -13,7 +13,11 @@
 
 from vllm.platforms import current_platform
 
-from .mem_constants import GiB_bytes, MiB_bytes
+from .mem_constants import GiB_bytes, KiB_bytes, MiB_bytes
+
+
+def format_kib(b: int) -> str:
+    return f"{round(b / KiB_bytes, 2)}"
 
 
 def format_mib(b: int) -> str:
@@ -102,22 +106,12 @@ def measure(self) -> None:
         )
 
         self.free_memory, self.total_memory = current_platform.mem_get_info(device)
-        shared_sysmem_device_mem_sms = ((8, 7), (11, 0), (12, 1))  # Orin, Thor, Spark
-        if (
-            current_platform.is_cuda()
-            and current_platform.get_device_capability(device.index)
-            in shared_sysmem_device_mem_sms
-        ):
-            # On UMA (Orin, Thor and Spark) platform,
-            # where both CPU and GPU rely on system memory,
-            # the cudaMemGetInfo function shows the amount of free system memory
-            # rather than what’s actually available.
-            # In the case,
-            # torch.cuda.mem_get_info() only reports "free" memory,
-            # which can be lower than what is actually
-            # available due to not including cache memory.
-            # There’s also a comprehensive reference page
-            # that explains how you can compute the proper value yourself.
+        if current_platform.is_integrated_gpu(device.index):
+            # On UMA (Unified Memory Architecture) platforms where CPU and
+            # GPU share physical memory (e.g. GH200, DGX Spark, Jetson Orin),
+            # cudaMemGetInfo underreports free memory because it does not
+            # account for reclaimable OS memory (page cache, buffers).
+            # Use psutil to get the true available memory.
             # https://docs.nvidia.com/cuda/cuda-for-tegra-appnote/#estimating-total-allocatable-device-memory-on-an-integrated-gpu-device
             self.free_memory = psutil.virtual_memory().available
 
diff --git a/vllm/utils/mistral.py b/vllm/utils/mistral.py
index c9c24a2e306c..276ca8170f1d 100644
--- a/vllm/utils/mistral.py
+++ b/vllm/utils/mistral.py
@@ -12,8 +12,10 @@
 if TYPE_CHECKING:
     # if type checking, eagerly import the module
     import vllm.tokenizers.mistral as mt
+    import vllm.tool_parsers.mistral_tool_parser as mtp
 else:
     mt = LazyLoader("mt", globals(), "vllm.tokenizers.mistral")
+    mtp = LazyLoader("mtp", globals(), "vllm.tool_parsers.mistral_tool_parser")
 
 
 def is_mistral_tokenizer(obj: TokenizerLike | None) -> TypeGuard[mt.MistralTokenizer]:
@@ -26,3 +28,16 @@ def is_mistral_tokenizer(obj: TokenizerLike | None) -> TypeGuard[mt.MistralToken
         getattr(cls, "IS_MISTRAL_TOKENIZER", False)
         and isinstance(obj, mt.MistralTokenizer)
     )
+
+
+def is_mistral_tool_parser(cls: type | None) -> bool:
+    """Return true if *cls* is (a subclass of) MistralToolParser.
+
+    Uses a class attribute check so that importing
+    ``vllm.tool_parsers.mistral_tool_parser`` — and transitively
+    ``mistral_common`` — is not required.
+    """
+    return bool(
+        getattr(cls, "IS_MISTRAL_TOOL_PARSER", False)
+        and issubclass(cls, mtp.MistralToolParser)  # type: ignore[arg-type]
+    )
diff --git a/vllm/utils/multi_stream_utils.py b/vllm/utils/multi_stream_utils.py
index 3ade910bf99c..2203221c5a14 100644
--- a/vllm/utils/multi_stream_utils.py
+++ b/vllm/utils/multi_stream_utils.py
@@ -2,11 +2,21 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from collections.abc import Callable
+from enum import Enum
 from typing import Any
 
 import torch
 
 
+class AuxStreamType(Enum):
+    Attention = 1
+
+
+class EventType(Enum):
+    Main = 0
+    Attention = 1
+
+
 def maybe_execute_in_parallel(
     fn0: Callable[[], Any],
     fn1: Callable[[], Any],
@@ -46,3 +56,73 @@ def maybe_execute_in_parallel(
         result0 = fn0()
         result1 = fn1()
     return (result0, result1)
+
+
+def execute_in_parallel(
+    default_fn: Callable[[], Any],
+    aux_fns: list[Callable[[], Any] | None],
+    start_event: torch.cuda.Event,
+    done_events: list[torch.cuda.Event],
+    aux_streams: list[torch.cuda.Stream] | None = None,
+    enable: bool = False,
+) -> tuple[Any, list[Any]]:
+    """Run default_fn on the current stream and aux_fns concurrently on
+    aux_streams.
+
+    Generalizes maybe_execute_in_parallel to N aux callables. Slots where
+    aux_fns[i] is None are skipped (no stream switch, no event record); their
+    corresponding entry in the returned aux_results list is None.
+
+    start_event fans out from the current stream to every launched aux stream;
+    done_events[i] is recorded after aux_fns[i] so the current stream joins
+    before returning. Falls back to sequential execution on the current stream
+    when aux_streams is None or enable is False; in that case default_fn runs
+    first, then aux_fns in order.
+
+    Args:
+        default_fn: Callable for the default (current) stream.
+        aux_fns: Per-aux callables; entries may be None to skip.
+        start_event: CUDA event recorded on the current stream before
+            default_fn so each launched aux stream can wait on it.
+        done_events: One CUDA event per aux slot, recorded after the
+            corresponding aux_fn. Length must match aux_fns.
+        aux_streams: Per-aux CUDA streams. Length must match aux_fns.
+            Multi-stream is disabled when None.
+        enable: Opt-in switch for the multi-stream path. Defaults to False,
+            so callers that pass aux_streams must also pass enable=True
+            (typically gated by an env var) to actually overlap. When False,
+            execution falls back to sequential on the current stream.
+
+    Returns:
+        Tuple of (default_result, aux_results) where aux_results[i] is the
+        result of aux_fns[i] (or None when skipped).
+    """
+    aux_results: list[Any]
+    if aux_streams is None or not enable:
+        default_result = default_fn()
+        aux_results = [fn() if fn is not None else None for fn in aux_fns]
+        return default_result, aux_results
+
+    assert len(aux_fns) == len(aux_streams) == len(done_events), (
+        "aux_fns, aux_streams, and done_events must be the same length"
+    )
+
+    aux_results = [None] * len(aux_fns)
+    pending: list[torch.cuda.Event] = []
+
+    start_event.record()
+    for i, fn in enumerate(aux_fns):
+        if fn is None:
+            continue
+        with torch.cuda.stream(aux_streams[i]):
+            start_event.wait()
+            aux_results[i] = fn()
+            done_events[i].record()
+        pending.append(done_events[i])
+
+    default_result = default_fn()
+
+    for ev in pending:
+        ev.wait()
+
+    return default_result, aux_results
diff --git a/vllm/utils/network_utils.py b/vllm/utils/network_utils.py
index 6152bb0b2d9d..cf3141a0ced6 100644
--- a/vllm/utils/network_utils.py
+++ b/vllm/utils/network_utils.py
@@ -64,7 +64,7 @@ def get_ip() -> str:
         pass
 
     warnings.warn(
-        "Failed to get the IP address, using 0.0.0.0 by default."
+        "Failed to get the IP address, using 0.0.0.0 by default. "
         "The value can be set by the environment variable"
         " VLLM_HOST_IP or HOST_IP.",
         stacklevel=2,
diff --git a/vllm/utils/numa_utils.py b/vllm/utils/numa_utils.py
new file mode 100644
index 000000000000..4e1addad980f
--- /dev/null
+++ b/vllm/utils/numa_utils.py
@@ -0,0 +1,317 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""NUMA binding utilities for vLLM worker processes.
+
+Adapted in part from SGLang's NUMA helper implementation:
+https://github.com/sgl-project/sglang/blob/ba6d54d0f08f82f42b8224908ae2459a496b31b3/python/sglang/srt/utils/numa_utils.py
+"""
+
+import ctypes
+import logging
+import multiprocessing
+import os
+import subprocess
+from contextlib import contextmanager
+from functools import cache
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import psutil
+
+from vllm import envs
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+
+logger = logging.getLogger(__name__)
+_NUMACTL_ARGS_ENV = "_VLLM_INTERNAL_NUMACTL_ARGS"
+_NUMACTL_PYTHON_EXECUTABLE_ENV = "_VLLM_INTERNAL_NUMACTL_PYTHON_EXECUTABLE"
+
+
+@cache
+def get_libnuma():
+    libnuma = None
+    for libnuma_so in ["libnuma.so", "libnuma.so.1"]:
+        try:
+            libnuma = ctypes.CDLL(libnuma_so)
+        except OSError:
+            libnuma = None
+        if libnuma is not None:
+            break
+    return libnuma
+
+
+def _can_set_mempolicy() -> bool:
+    """Check whether the current process can use NUMA memory policy syscalls."""
+    try:
+        libnuma = get_libnuma()
+        if libnuma is None or libnuma.numa_available() < 0:
+            return False
+        mode = ctypes.c_int()
+        ret = libnuma.get_mempolicy(
+            ctypes.byref(mode), None, ctypes.c_ulong(0), None, ctypes.c_ulong(0)
+        )
+        return ret == 0
+    except Exception:
+        return False
+
+
+def _is_auto_numa_available() -> bool:
+    """Check whether automatic GPU-to-NUMA detection should be attempted."""
+    from vllm.platforms import current_platform
+
+    if not current_platform.is_cuda_alike():
+        return False
+
+    if not os.path.isdir("/sys/devices/system/node/node1"):
+        return False
+
+    try:
+        process = psutil.Process(os.getpid())
+        cpu_affinity = process.cpu_affinity()
+        cpu_count = psutil.cpu_count()
+        if cpu_count is not None and cpu_affinity != list(range(cpu_count)):
+            logger.warning(
+                "CPU affinity is already constrained for this process. "
+                "Skipping automatic NUMA binding; pass --numa-bind-nodes "
+                "explicitly to override."
+            )
+            return False
+    except (AttributeError, NotImplementedError, psutil.Error):
+        pass
+
+    if not _can_set_mempolicy():
+        logger.warning(
+            "User lacks permission to set NUMA memory policy. "
+            "Automatic NUMA detection may not work; if you are using Docker, "
+            "try adding --cap-add SYS_NICE."
+        )
+        return False
+
+    if not hasattr(current_platform, "get_all_device_numa_nodes"):
+        logger.warning(
+            "Platform %s does not support automatic NUMA detection",
+            type(current_platform).__name__,
+        )
+        return False
+
+    return True
+
+
+@cache
+def get_auto_numa_nodes() -> list[int] | None:
+    """Auto-detect NUMA nodes for all visible GPUs."""
+    from vllm.platforms import current_platform
+
+    if not _is_auto_numa_available():
+        return None
+
+    numa_nodes = current_platform.get_all_device_numa_nodes()
+    if numa_nodes is not None:
+        logger.info("Auto-detected NUMA nodes for GPUs: %s", numa_nodes)
+    return numa_nodes
+
+
+def _get_gpu_index(
+    parallel_config, local_rank: int, dp_local_rank: int | None = None
+) -> int:
+    """Compute the physical GPU index used for NUMA lookup."""
+    if (
+        parallel_config.distributed_executor_backend not in ("ray", "external_launcher")
+        and parallel_config.data_parallel_backend != "ray"
+        and parallel_config.nnodes_within_dp == 1
+    ):
+        if dp_local_rank is None:
+            dp_local_rank = parallel_config.data_parallel_rank_local
+            if dp_local_rank is None:
+                dp_local_rank = parallel_config.data_parallel_index
+
+        tp_pp_world_size = (
+            parallel_config.pipeline_parallel_size
+            * parallel_config.tensor_parallel_size
+        )
+        return local_rank + dp_local_rank * tp_pp_world_size
+
+    return local_rank
+
+
+def _get_numa_node(parallel_config, gpu_index: int) -> int:
+    numa_nodes = parallel_config.numa_bind_nodes
+    if numa_nodes is None:
+        numa_nodes = get_auto_numa_nodes()
+        if numa_nodes is None:
+            raise RuntimeError(
+                "NUMA binding was requested, but vLLM could not detect the "
+                "GPU-to-NUMA topology automatically. Pass --numa-bind-nodes "
+                "explicitly or disable --numa-bind."
+            )
+        parallel_config.numa_bind_nodes = numa_nodes
+
+    if gpu_index >= len(numa_nodes):
+        raise ValueError(
+            f"GPU index {gpu_index} exceeds numa_bind_nodes size {len(numa_nodes)}. "
+            "Ensure the binding lists cover every visible GPU."
+        )
+
+    return numa_nodes[gpu_index]
+
+
+def _get_cpu_binding(parallel_config, gpu_index: int) -> str | None:
+    cpu_bindings = parallel_config.numa_bind_cpus
+    if cpu_bindings is None:
+        return None
+
+    if gpu_index >= len(cpu_bindings):
+        raise ValueError(
+            f"GPU index {gpu_index} exceeds numa_bind_cpus size "
+            f"{len(cpu_bindings)}. Ensure the binding lists cover every visible GPU."
+        )
+
+    return cpu_bindings[gpu_index]
+
+
+def _get_numactl_args(
+    vllm_config: "VllmConfig",
+    local_rank: int,
+    dp_local_rank: int | None = None,
+    process_kind: str = "worker",
+) -> str | None:
+    parallel_config = vllm_config.parallel_config
+    if not parallel_config.numa_bind:
+        return None
+
+    gpu_index = _get_gpu_index(parallel_config, local_rank, dp_local_rank)
+    numa_node = _get_numa_node(parallel_config, gpu_index)
+    cpu_binding = _get_cpu_binding(parallel_config, gpu_index)
+
+    if cpu_binding is not None:
+        bind_arg = f"--physcpubind={cpu_binding}"
+        logger.info(
+            "Binding %s subprocess (local_rank=%s, gpu_index=%s) to CPUs %s and NUMA node %s",  # noqa: E501
+            process_kind,
+            local_rank,
+            gpu_index,
+            cpu_binding,
+            numa_node,
+        )
+    else:
+        bind_arg = f"--cpunodebind={numa_node}"
+        logger.info(
+            "Binding %s subprocess (local_rank=%s, gpu_index=%s) to NUMA node %s",
+            process_kind,
+            local_rank,
+            gpu_index,
+            numa_node,
+        )
+
+    return f"{bind_arg} --membind={numa_node}"
+
+
+def _log_numactl_show(label: str) -> bool:
+    try:
+        result = subprocess.run(
+            ["numactl", "--show"],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+    except (FileNotFoundError, subprocess.CalledProcessError) as e:
+        logger.warning("Failed to run `numactl --show` for %s: %s", label, e)
+        return False
+
+    output = result.stdout.strip()
+    if not output:
+        logger.warning("`numactl --show` returned no output for %s", label)
+        return False
+
+    summary = ", ".join(line.strip() for line in output.splitlines() if line.strip())
+    logger.debug("%s affinity: %s", label, summary)
+    return True
+
+
+def log_current_affinity_state(label: str) -> None:
+    """Log the process's effective NUMA affinity state."""
+    _log_numactl_show(label)
+
+
+@contextmanager
+def configure_subprocess(
+    vllm_config: "VllmConfig",
+    local_rank: int,
+    dp_local_rank: int | None = None,
+    process_kind: str = "worker",
+):
+    """Temporarily replace the multiprocessing executable with a numactl wrapper."""
+    numactl_args = _get_numactl_args(
+        vllm_config, local_rank, dp_local_rank, process_kind
+    )
+    if numactl_args is None:
+        yield
+        return
+
+    executable, debug_str = _get_numactl_executable()
+    python_executable = os.fsdecode(multiprocessing.spawn.get_executable())
+    with (
+        _set_numa_wrapper_env(numactl_args, python_executable),
+        _mp_set_executable(executable, debug_str),
+    ):
+        yield
+
+
+def _get_numactl_executable() -> tuple[str, str]:
+    """Return the fixed wrapper executable used to launch numactl."""
+    from shutil import which
+
+    if which("numactl") is None:
+        raise RuntimeError(
+            "numactl is required for NUMA binding but is not installed or "
+            "not available on PATH."
+        )
+
+    script_path = Path(__file__).with_name("numa_wrapper.sh")
+    return str(script_path), f"{script_path} via {_NUMACTL_ARGS_ENV}"
+
+
+@contextmanager
+def _set_numa_wrapper_env(numactl_args: str, python_executable: str):
+    old_numactl_args = os.environ.get(_NUMACTL_ARGS_ENV)
+    old_python_executable = os.environ.get(_NUMACTL_PYTHON_EXECUTABLE_ENV)
+    os.environ[_NUMACTL_ARGS_ENV] = numactl_args
+    os.environ[_NUMACTL_PYTHON_EXECUTABLE_ENV] = python_executable
+    try:
+        yield
+    finally:
+        if old_numactl_args is None:
+            os.environ.pop(_NUMACTL_ARGS_ENV, None)
+        else:
+            os.environ[_NUMACTL_ARGS_ENV] = old_numactl_args
+
+        if old_python_executable is None:
+            os.environ.pop(_NUMACTL_PYTHON_EXECUTABLE_ENV, None)
+        else:
+            os.environ[_NUMACTL_PYTHON_EXECUTABLE_ENV] = old_python_executable
+
+
+@contextmanager
+def _mp_set_executable(executable: str, debug_str: str):
+    start_method = envs.VLLM_WORKER_MULTIPROC_METHOD
+    if start_method != "spawn":
+        logger.warning(
+            "NUMA binding requires spawn method but got '%s'. "
+            "NUMA binding will be ineffective. "
+            "Set VLLM_WORKER_MULTIPROC_METHOD=spawn to enable NUMA binding.",
+            start_method,
+        )
+        yield
+        return
+
+    old_executable = os.fsdecode(multiprocessing.spawn.get_executable())
+    multiprocessing.spawn.set_executable(executable)
+    try:
+        yield
+    finally:
+        assert os.fsdecode(multiprocessing.spawn.get_executable()) == executable, (
+            "Executable was changed during NUMA binding context: "
+            f"expected {executable}, got {multiprocessing.spawn.get_executable()}"
+        )
+        multiprocessing.spawn.set_executable(old_executable)
diff --git a/vllm/utils/numa_wrapper.sh b/vllm/utils/numa_wrapper.sh
new file mode 100755
index 000000000000..541801ed5df5
--- /dev/null
+++ b/vllm/utils/numa_wrapper.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+if [ -z "${_VLLM_INTERNAL_NUMACTL_ARGS:-}" ]; then
+    echo "_VLLM_INTERNAL_NUMACTL_ARGS is not set" >&2
+    exit 1
+fi
+
+if [ -z "${_VLLM_INTERNAL_NUMACTL_PYTHON_EXECUTABLE:-}" ]; then
+    echo "_VLLM_INTERNAL_NUMACTL_PYTHON_EXECUTABLE is not set" >&2
+    exit 1
+fi
+
+if ! command -v numactl >/dev/null 2>&1; then
+    echo "numactl is not available on PATH" >&2
+    exit 1
+fi
+
+case "${_VLLM_INTERNAL_NUMACTL_ARGS}" in
+    *[![:alnum:]\ \-\_=,./]*)
+        echo "Invalid characters in _VLLM_INTERNAL_NUMACTL_ARGS" >&2
+        exit 1
+        ;;
+esac
+
+exec numactl ${_VLLM_INTERNAL_NUMACTL_ARGS} "${_VLLM_INTERNAL_NUMACTL_PYTHON_EXECUTABLE}" "$@"
diff --git a/vllm/utils/ompmultiprocessing.py b/vllm/utils/ompmultiprocessing.py
new file mode 100644
index 000000000000..711f527355df
--- /dev/null
+++ b/vllm/utils/ompmultiprocessing.py
@@ -0,0 +1,289 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""OMP Aware Multiprocessing manager for running multiprocessing.Process()
+Copyright (c) 2026 Red Hat Inc
+Copyright (c) 2026 Cambridge Greys Ltd
+"""
+
+import os
+from collections.abc import Callable
+from contextlib import contextmanager
+from typing import TYPE_CHECKING
+
+import vllm.utils.cpu_resource_utils as cr_utils
+from vllm import envs
+from vllm.logger import init_logger
+from vllm.platforms import CpuArchEnum, current_platform
+from vllm.utils.cpu_resource_utils import LogicalCPUInfo
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+
+logger = init_logger(__name__)
+
+
+class OMPProcessManager:
+    def __init__(self, config: "VllmConfig"):
+        if not current_platform.is_cpu():
+            return
+
+        self.local_world_size = config.parallel_config.local_world_size
+        self.local_dp_rank = config.parallel_config.data_parallel_rank_local
+        # This is a bit tricky because the internal DP size
+        # is always 1 for non-MoE models
+        self.internal_dp_size = config.parallel_config._api_process_count
+
+        self.simulate_multi_node = os.environ.get("VLLM_CPU_SIM_MULTI_NUMA", "0") != "0"
+        ld_preload_str = os.getenv("LD_PRELOAD", "")
+        self.use_iomp = "libiomp" in ld_preload_str or "libomp" in ld_preload_str
+        self.use_gomp = "libgomp" in ld_preload_str
+
+        assert not (self.use_iomp and self.use_gomp)
+
+        # at least reserve 1/local_world_size(for ARM/RISC-V) core for scheduler
+        # proc as always use MP executor
+        # TODO: make scheduler proc sleep when idle
+        self.reserve_cpu_num = (
+            self.local_world_size
+            if current_platform.get_cpu_architecture()
+            in (CpuArchEnum.ARM, CpuArchEnum.RISCV)
+            else 1
+        )
+        # reserve at one more core for nixl_connector under p/d case
+        if config.kv_transfer_config:
+            self.reserve_cpu_num += 1
+
+        if envs.VLLM_CPU_NUM_OF_RESERVED_CPU is not None:
+            if self.reserve_cpu_num > envs.VLLM_CPU_NUM_OF_RESERVED_CPU:
+                msg = (
+                    f"VLLM_CPU_NUM_OF_RESERVED_CPU is less than "
+                    "the minimum requirement"
+                    f": {self.reserve_cpu_num} cores"
+                )
+                logger.warning(msg=msg)
+            self.reserve_cpu_num = envs.VLLM_CPU_NUM_OF_RESERVED_CPU
+
+        self._parse_omp_threads_bind_env()
+
+        assert not self.simulate_multi_node or self.auto_setup
+
+    @contextmanager
+    def configure_omp_envs(self, rank: int, local_rank: int):
+        if not current_platform.is_cpu() or self.skip_setup:
+            yield
+            return
+
+        envs_dict = {}
+        cpu_list = [str(i) for i in self.cpu_lists[local_rank]]
+        envs_dict["OMP_NUM_THREADS"] = str(len(cpu_list))
+        if self.use_iomp:
+            # set IOMP envs
+            cpu_list_str = ",".join(cpu_list)
+            envs_dict["KMP_AFFINITY"] = (
+                f"granularity=fine,explicit,proclist=[{cpu_list_str}]"
+            )
+            # The time(milliseconds) that a thread should wait after
+            # completing the execution of a parallel region, before sleeping.
+            # A value of 5 masks thread underutilization.
+            # Set to 1 when debugging thread utilization issues.
+            envs_dict["KMP_BLOCKTIME"] = "5"
+            # Prevents the CPU to run into low performance state
+            envs_dict["KMP_TPAUSE"] = "0"
+        elif self.use_gomp:
+            # set GOMP envs
+            # likes '0 1 2 ...'
+            cpu_list_str = " ".join(cpu_list)
+            envs_dict["GOMP_CPU_AFFINITY"] = cpu_list_str
+        else:
+            # set OMP envs
+            # likes '{0,1,2,...}'
+            cpu_list_str = ",".join(cpu_list)
+            envs_dict["OMP_PLACES"] = f"{{{cpu_list_str}}}"
+            envs_dict["OMP_PROC_BIND"] = "true"
+
+        # backup envs
+        old_envs_dict = {}
+        for k in envs_dict:
+            old_envs_dict[k] = os.environ.get(k)
+
+        try:
+            # set envs
+            for k, v in envs_dict.items():
+                os.environ[k] = v
+            yield
+        finally:
+            # restore old envs
+            for k, v in old_envs_dict.items():  # type: ignore
+                if v is None:
+                    os.environ.pop(k, None)
+                else:
+                    os.environ[k] = v
+
+    def _parse_omp_threads_bind_env(self):
+        vllm_mask = envs.VLLM_CPU_OMP_THREADS_BIND
+        self.skip_setup = vllm_mask == "nobind"
+        self.auto_setup = vllm_mask == "auto"
+        self.reserved_cpu_list = []
+        self.cpu_lists = []
+
+        if self.auto_setup:
+            # auto generate CPU lists
+            cpu_arch = current_platform.get_cpu_architecture()
+            if cpu_arch == CpuArchEnum.POWERPC:
+                # For POWERPC SMT-8/4/2
+                cpu_list, reserve_list = self._get_autobind_cpu_ids(
+                    lambda cpus: [cpu for cpu in cpus if cpu.id % 8 < 4]
+                )
+            elif cpu_arch in (CpuArchEnum.X86, CpuArchEnum.S390X):
+                # For x86/S390X SMT-2, use 1 logical CPU per physical core
+                cpu_list, reserve_list = self._get_autobind_cpu_ids(
+                    lambda cpus: cpus[-1:]
+                )
+            elif cpu_arch in (CpuArchEnum.ARM, CpuArchEnum.RISCV):
+                # For AArch64 / RISC-V, no SMT, use all logical CPUs
+                cpu_list, reserve_list = self._get_autobind_cpu_ids(lambda cpus: cpus)
+            else:
+                cpu_list, reserve_list = [], []
+                raise RuntimeError(f"{cpu_arch} doesn't support auto CPU binding.")
+
+            for item in cpu_list:
+                self.cpu_lists.append([x.id for x in item])
+            self.reserved_cpu_list = [x.id for x in reserve_list]
+        elif not self.skip_setup:
+            # user defined CPU lists
+            omp_cpuids_list = vllm_mask.split("|")
+            if self.local_dp_rank is not None:
+                local_dp_rank = self.local_dp_rank
+                world_size = self.local_world_size
+                # Rank mapping [DP, PP, TP]
+                omp_cpuids_list = omp_cpuids_list[
+                    local_dp_rank * world_size : (local_dp_rank + 1) * world_size
+                ]
+
+            assert len(omp_cpuids_list) == self.local_world_size, (
+                "Given "
+                f"number of CPU id list {omp_cpuids_list} doesn't match "
+                f"local world size {self.local_world_size}."
+            )
+
+            # parse CPU list strings like "5,2-4" to [5, 2, 3, 4]
+            self.cpu_lists = [cr_utils.parse_id_list(s) for s in omp_cpuids_list]
+        else:
+            # skip
+            self.cpu_lists = []
+
+        msg = (
+            "OpenMP thread binding info: \n"
+            f"\tVLLM_CPU_OMP_THREADS_BIND={vllm_mask!r}, "
+            f"auto_setup={self.auto_setup}, skip_setup={self.skip_setup}\n"
+            f"\tlocal_world_size={self.local_world_size}, "
+            f"reserve_cpu_num={self.reserve_cpu_num}\n"
+        )
+        for i, cpus in enumerate(self.cpu_lists):
+            msg += f"\tlocal_rank={i}, core ids={cpus}\n"
+        msg += f"\treserved_cpus={self.reserved_cpu_list}"
+        logger.info(msg)
+
+    def _get_autobind_cpu_ids(
+        self, cpu_selector: Callable[[list[LogicalCPUInfo]], list[LogicalCPUInfo]]
+    ) -> tuple[list[list[LogicalCPUInfo]], list[LogicalCPUInfo]]:
+        """
+        Return CPU ids to bind based on NUMA nodes, and CPU ids reserved for
+        other processes.
+        Currently for rank N, only CPU ids on the N-th node in available NUMA
+        node list will be selected.
+        Args:
+            cpu_selector: a callable object to select CPUs from a CPU list
+            of a physical core. The input is a LogicalCPUInfo list contains
+            logical CPUs of a physical CPU, sorted by the LogicalCPUInfo.id.
+            A selected LogicalCPUInfo list should be returned.
+        """
+
+        # this memory node list has been sliced for DP offset
+        allowed_numa_nodes = cr_utils.get_visible_memory_node()
+        logical_cpu_list = cr_utils.get_allowed_cpu_list()
+
+        local_world_size = self.local_world_size
+        assert (
+            len(allowed_numa_nodes) >= local_world_size or self.simulate_multi_node
+        ), (
+            f"Not enough allowed NUMA nodes to bind threads of "
+            f"{local_world_size} local CPUWorkers. "
+            f"Allowed NUMA nodes are {allowed_numa_nodes}. "
+            "Please try to bind threads manually or decrease DP/TP/PP."
+        )
+
+        # Generate OMP CPU list for each rank
+        cpu_lists_of_ranks = []
+        reserved_cpu_list = []
+        total_cpu_num = 0
+        for local_rank in range(self.local_world_size):
+            if not self.simulate_multi_node:
+                selected_numa_node = allowed_numa_nodes[local_rank]
+                selected_logical_cpu_list = [
+                    x for x in logical_cpu_list if x.numa_node == selected_numa_node
+                ]
+            else:
+                world_size_across_dp = self.local_world_size * self.internal_dp_size
+                assert len(logical_cpu_list) >= world_size_across_dp
+                selected_logical_cpu_list = sorted(
+                    logical_cpu_list, key=lambda x: x.numa_node
+                )
+                sim_cpu_num_per_node = (
+                    len(selected_logical_cpu_list) // world_size_across_dp
+                )
+                assert self.local_dp_rank is not None
+                start_idx = (
+                    local_rank + self.local_world_size * self.local_dp_rank
+                ) * sim_cpu_num_per_node
+                selected_logical_cpu_list = selected_logical_cpu_list[
+                    start_idx : (start_idx + sim_cpu_num_per_node)
+                ]
+
+            # Select logical CPUs on same physical cores via cpu_selector
+            core_to_cpus: dict[int, list[LogicalCPUInfo]] = {}
+            for cpu_info in selected_logical_cpu_list:
+                if cpu_info.physical_core not in core_to_cpus:
+                    core_to_cpus[cpu_info.physical_core] = []
+                core_to_cpus[cpu_info.physical_core].append(cpu_info)
+            selected_logical_cpu_list = []
+            for cpu_list in core_to_cpus.values():
+                cpu_list = sorted(cpu_list, key=lambda x: x.id)
+                selected_logical_cpu_list.extend(cpu_selector(cpu_list))
+
+            # sort selected cores based on core id
+            selected_logical_cpu_list = sorted(
+                selected_logical_cpu_list, key=lambda x: x.id
+            )
+
+            cpu_lists_of_ranks.append(selected_logical_cpu_list)
+            total_cpu_num += len(selected_logical_cpu_list)
+
+        # Reserve CPUs for other processes
+        if total_cpu_num <= self.reserve_cpu_num:
+            logger.warning(
+                "Selected CPU core number (%s) "
+                "should be greater than reserved CPU core "
+                "number (%s).",
+                total_cpu_num,
+                self.reserve_cpu_num,
+            )
+            return cpu_lists_of_ranks, []
+
+        reserve_num_per_rank = [
+            self.reserve_cpu_num // self.local_world_size
+        ] * self.local_world_size
+        # last rank first
+        for i in range(
+            self.local_world_size - 1,
+            self.local_world_size - 1 - self.reserve_cpu_num % self.local_world_size,
+            -1,
+        ):
+            reserve_num_per_rank[i] += 1
+        for i in range(self.local_world_size):
+            num = reserve_num_per_rank[i]
+            if num > 0:
+                reserved_cpu_list.extend(cpu_lists_of_ranks[i][-num:])
+                cpu_lists_of_ranks[i] = cpu_lists_of_ranks[i][:-num]
+
+        return cpu_lists_of_ranks, reserved_cpu_list
diff --git a/vllm/utils/platform_utils.py b/vllm/utils/platform_utils.py
index 6dd9ca4221c0..cc69d9a241c6 100644
--- a/vllm/utils/platform_utils.py
+++ b/vllm/utils/platform_utils.py
@@ -50,8 +50,10 @@ def is_pin_memory_available() -> bool:
 def is_uva_available() -> bool:
     """Check if Unified Virtual Addressing (UVA) is available."""
     # UVA requires pinned memory.
+    from vllm.platforms import current_platform
+
     # TODO: Add more requirements for UVA if needed.
-    return is_pin_memory_available()
+    return is_pin_memory_available() or current_platform.is_cpu()
 
 
 @cache
diff --git a/vllm/utils/profiling.py b/vllm/utils/profiling.py
index b66910693957..ce2a5ba3993a 100644
--- a/vllm/utils/profiling.py
+++ b/vllm/utils/profiling.py
@@ -8,7 +8,13 @@
 from functools import wraps
 from typing import Any
 
+from typing_extensions import deprecated
 
+
+@deprecated(
+    "vllm.utils.profiling.cprofile_context() is deprecated and will be removed "
+    "in v0.21. Use Python's cProfile module directly instead."
+)
 @contextlib.contextmanager
 def cprofile_context(save_file: str | None = None):
     """Run a cprofile
@@ -32,6 +38,10 @@ def cprofile_context(save_file: str | None = None):
             prof.print_stats(sort="cumtime")
 
 
+@deprecated(
+    "vllm.utils.profiling.cprofile() is deprecated and will be removed in "
+    "v0.21. Use Python's cProfile module directly instead."
+)
 def cprofile(save_file: str | None = None, enabled: bool = True):
     """Decorator to profile a Python method using cProfile.
 
diff --git a/vllm/utils/serial_utils.py b/vllm/utils/serial_utils.py
index 596a71935107..5fde5ac7105d 100644
--- a/vllm/utils/serial_utils.py
+++ b/vllm/utils/serial_utils.py
@@ -27,6 +27,7 @@ def nbytes(self) -> int:
 
 
 EmbedDType = Literal["float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"]
+MmMetadataDType = Literal["int32", "int64", "uint8", "bool"]
 Endianness = Literal["native", "big", "little"]
 EncodingFormat = Literal["float", "base64", "bytes", "bytes_only"]
 
@@ -42,6 +43,15 @@ def nbytes(self) -> int:
     "fp8_e4m3": DTypeInfo(torch.float8_e4m3fn, torch.uint8, np.uint8),
     "fp8_e5m2": DTypeInfo(torch.float8_e5m2, torch.uint8, np.uint8),
 }
+MM_METADATA_DTYPES: Mapping[MmMetadataDType, DTypeInfo] = {
+    "int32": DTypeInfo(torch.int32, torch.int32, np.int32),
+    "int64": DTypeInfo(torch.int64, torch.int64, np.int64),
+    "uint8": DTypeInfo(torch.uint8, torch.uint8, np.uint8),
+    "bool": DTypeInfo(torch.bool, torch.uint8, np.uint8),
+}
+_ALL_SERIAL_DTYPES: Mapping[str, DTypeInfo] = {
+    k: v for d in (EMBED_DTYPES, MM_METADATA_DTYPES) for k, v in d.items()
+}
 ENDIANNESS: tuple[Endianness, ...] = get_args(Endianness)
 
 
@@ -56,14 +66,14 @@ def tensor2base64(x: torch.Tensor) -> str:
 
 def tensor2binary(
     tensor: torch.Tensor,
-    embed_dtype: EmbedDType,
+    embed_dtype: "EmbedDType | MmMetadataDType",
     endianness: Endianness,
 ) -> bytes:
     assert isinstance(tensor, torch.Tensor)
-    assert embed_dtype in EMBED_DTYPES
+    assert embed_dtype in _ALL_SERIAL_DTYPES
     assert endianness in ENDIANNESS
 
-    dtype_info = EMBED_DTYPES[embed_dtype]
+    dtype_info = _ALL_SERIAL_DTYPES[embed_dtype]
 
     np_array = (
         tensor.to(dtype_info.torch_dtype)
@@ -82,13 +92,13 @@ def tensor2binary(
 def binary2tensor(
     binary: bytes,
     shape: tuple[int, ...],
-    embed_dtype: EmbedDType,
+    embed_dtype: "EmbedDType | MmMetadataDType",
     endianness: Endianness,
 ) -> torch.Tensor:
-    assert embed_dtype in EMBED_DTYPES
+    assert embed_dtype in _ALL_SERIAL_DTYPES
     assert endianness in ENDIANNESS
 
-    dtype_info = EMBED_DTYPES[embed_dtype]
+    dtype_info = _ALL_SERIAL_DTYPES[embed_dtype]
 
     np_array = np.frombuffer(binary, dtype=dtype_info.numpy_view_dtype).reshape(shape)
 
diff --git a/vllm/utils/system_utils.py b/vllm/utils/system_utils.py
index ca29dfd72130..7f56f972a4fa 100644
--- a/vllm/utils/system_utils.py
+++ b/vllm/utils/system_utils.py
@@ -140,6 +140,11 @@ def _maybe_force_spawn():
         os.environ["RAY_ADDRESS"] = ray.get_runtime_context().gcs_address
         reasons.append("In a Ray actor and can only be spawned")
 
+    # Force spawn if NUMA binding is enabled via --numa-bind.
+    # NUMA binding uses executable hijacking which requires spawn
+    if "--numa-bind" in sys.argv:
+        reasons.append("NUMA binding requires spawn method")
+
     if cuda_is_initialized():
         reasons.append("CUDA is initialized")
     elif xpu_is_initialized():
@@ -229,12 +234,17 @@ def write_with_prefix(s: str):
     file.write = write_with_prefix  # type: ignore[method-assign]
 
 
-def decorate_logs(process_name: str | None = None) -> None:
+def decorate_logs(
+    process_name: str | None = None, *, skip_if_decorated: bool = False
+) -> None:
     """Decorate stdout/stderr with process name and PID prefix."""
     # Respect VLLM_CONFIGURE_LOGGING environment variable
     if not envs.VLLM_CONFIGURE_LOGGING:
         return
 
+    if skip_if_decorated and hasattr(sys.stdout, "_original_write"):
+        return
+
     if process_name is None:
         process_name = get_mp_context().current_process().name
 
diff --git a/vllm/utils/torch_utils.py b/vllm/utils/torch_utils.py
index b4c77e54803c..12ec5b0fcc66 100644
--- a/vllm/utils/torch_utils.py
+++ b/vllm/utils/torch_utils.py
@@ -3,10 +3,10 @@
 import contextlib
 import importlib.metadata
 import os
+import platform
 import random
 import threading
 from collections.abc import Callable, Collection
-from functools import lru_cache
 from typing import TYPE_CHECKING, Any, TypeVar
 
 import numpy as np
@@ -39,8 +39,15 @@
     "fp8_e4m3": torch.uint8,
     "fp8_e5m2": torch.uint8,
     "int8": torch.int8,
+    "int8_per_token_head": torch.int8,
+    "fp8_per_token_head": torch.uint8,
     "fp8_inc": torch.float8_e4m3fn,
     "fp8_ds_mla": torch.uint8,
+    "turboquant_k8v4": torch.uint8,
+    "turboquant_4bit_nc": torch.uint8,
+    "turboquant_k3v4_nc": torch.uint8,
+    "turboquant_3bit_nc": torch.uint8,
+    "nvfp4": torch.uint8,
 }
 
 TORCH_DTYPE_TO_NUMPY_DTYPE = {
@@ -54,15 +61,31 @@
 
 
 MODELOPT_TO_VLLM_KV_CACHE_DTYPE_MAP = {
-    # TODO: Add more modelopt kv cache dtype
-    # mappings here when it supported by some attention backend
-    # (for example supports nvfp4).
     "fp8": "fp8_e4m3",
+    "nvfp4": "nvfp4",
 }
 
 T = TypeVar("T")
 
 
+# Pin memory in non-WSL case.
+# Logic duplicated here for now to avoid circular import.
+PIN_MEMORY = "microsoft" not in " ".join(platform.uname()).lower()
+
+
+def is_quantized_kv_cache(kv_cache_dtype: str) -> bool:
+    return (
+        kv_cache_dtype.startswith("fp8")
+        or kv_cache_dtype.endswith("per_token_head")
+        or kv_cache_dtype == "nvfp4"
+    )
+
+
+def kv_cache_uses_per_token_head_scales(kv_cache_dtype: str) -> bool:
+    """Return True if *kv_cache_dtype* needs per-token-head scales."""
+    return kv_cache_dtype.endswith("per_token_head")
+
+
 def is_strictly_contiguous(t: torch.Tensor) -> bool:
     """
     Check if tensor is contiguous AND has no degenerate strides.
@@ -93,6 +116,32 @@ def is_strictly_contiguous(t: torch.Tensor) -> bool:
     return True
 
 
+def canonicalize_singleton_dim_strides(t: torch.Tensor) -> torch.Tensor:
+    """Fix degenerate strides on size=1 dimensions for CUDA TMA compatibility.
+
+    PyTorch allows any stride on a size=1 dim (is_contiguous() is always True
+    there), so a size=1 dim may have stride=1 (2 bytes for bf16) instead of
+    the canonical product(shape[i+1:]).  CUDA TMA on H100+ requires all
+    non-outermost strides to be ≥16-byte aligned; stride=1 triggers
+    cudaErrorIllegalInstruction.  Zero-copy: patches stride metadata only via
+    as_strided; returns t unchanged if all size=1 strides are already canonical.
+    """
+    if 1 not in t.shape:
+        return t
+    strides = list(t.stride())
+    shape = t.shape
+    prev_stride = 1
+    changed = False
+    for i in range(len(shape) - 1, -1, -1):
+        if shape[i] == 1 and strides[i] != prev_stride:
+            strides[i] = prev_stride
+            changed = True
+        prev_stride = strides[i] * shape[i]
+    if not changed:
+        return t
+    return t.as_strided(t.shape, strides)
+
+
 @contextlib.contextmanager
 def set_default_torch_dtype(dtype: torch.dtype):
     """Sets the default torch dtype to the given dtype."""
@@ -285,6 +334,8 @@ def get_kv_cache_quant_algo_string(quant_cfg: dict[str, Any]) -> str | None:
                 and kv_algo.get("type") == "float"
             ):
                 kv_algo = "fp8"
+            elif kv_algo.get("num_bits") == 4 and kv_algo.get("type") == "float":
+                kv_algo = "nvfp4"
             else:
                 # Unknown/unsupported format - return "auto" as safe fallback
                 logger.warning(
@@ -356,8 +407,98 @@ def set_random_seed(seed: int | None) -> None:
         random.seed(seed)
         np.random.seed(seed)
         torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(seed)
+        from vllm.platforms import current_platform
+
+        current_platform.manual_seed_all(seed)
+
+
+def nvfp4_kv_cache_full_dim(head_size: int) -> int:
+    """Packed last dim for NVFP4 KV cache: fp4 data + fp8 block scales."""
+    return head_size // 2 + head_size // 16
+
+
+def _nvfp4_split_data_scale(
+    kv_side: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Split a single NVFP4 KV-side buffer into data and scale views.
+
+    The input is a 4D tensor for one KV side (K or V) whose last
+    dimension is ``full_dim = data_dim + scale_dim``.  The physical
+    layout within each side is [data | scale], both packed contiguously.
+
+    Args:
+        kv_side: 4D uint8 tensor with shape
+            ``(num_pages, dim_1, dim_2, full_dim)``.
+            May be in any permutation order (NHD or HND).
+
+    Returns:
+        ``(data, scale)`` where
+        ``data`` is a uint8 view with shape
+        ``(num_pages, dim_1, dim_2, data_dim)``.
+        ``scale`` is a float8_e4m3fn view with shape
+        ``(num_pages, dim_1, dim_2, scale_dim)``.
+    """
+    num_pages = kv_side.shape[0]
+    dim_1, dim_2 = kv_side.shape[1], kv_side.shape[2]
+    full_dim = kv_side.shape[3]
+    data_dim = full_dim * 8 // 9
+    scale_dim = full_dim - data_dim
+
+    data_per_kv = dim_1 * dim_2 * data_dim
+    page_bytes = kv_side.stride(0)
+
+    # Derive inner strides from the kv_side strides, scaling by the
+    # ratio of the target dim to full_dim.  This preserves the physical
+    # layout (NHD vs HND) encoded in the input tensor's strides.
+    s1 = kv_side.stride(1) * data_dim // full_dim
+    s2 = kv_side.stride(2) * data_dim // full_dim
+    data_shape = (num_pages, dim_1, dim_2, data_dim)
+    data_strides = (page_bytes, s1, s2, 1)
+
+    s1_s = kv_side.stride(1) * scale_dim // full_dim
+    s2_s = kv_side.stride(2) * scale_dim // full_dim
+    scale_shape = (num_pages, dim_1, dim_2, scale_dim)
+    scale_strides = (page_bytes, s1_s, s2_s, 1)
+
+    base = kv_side.storage_offset()
+    data = torch.as_strided(kv_side, data_shape, data_strides, storage_offset=base)
+    scale = torch.as_strided(
+        kv_side, scale_shape, scale_strides, storage_offset=base + data_per_kv
+    ).view(torch.float8_e4m3fn)
+
+    return data, scale
+
+
+def nvfp4_kv_cache_split_views(kv_cache: torch.Tensor) -> tuple[tuple, tuple]:
+    """Split an NVFP4 KV cache tensor into data and scale views.
+
+    Accepts either a 5D tensor ``(num_pages, 2, dim_2, dim_3, full_dim)``
+    or a 4D single-side tensor ``(num_pages, dim_2, dim_3, full_dim)``.
+
+    Per-page layout: [K_data | K_scale | V_data | V_scale].
+    Each KV side is self-contained (data followed by its scale), so the
+    5D case simply splits each side independently.
+
+    The returned views are in the same dim order as the input (NHD or
+    HND), so callers get views matching whichever order they passed in.
+
+    Args:
+        kv_cache: 5D or 4D uint8 tensor where the last dimension is
+            ``full_dim = data_dim + scale_dim = 9 * head_size / 16``.
+
+    Returns:
+        For 5D input:
+            ``(k_data, v_data), (k_scale, v_scale)``
+        For 4D input (single KV side):
+            ``(data,), (scale,)``
+    """
+    if kv_cache.dim() == 4:
+        data, scale = _nvfp4_split_data_scale(kv_cache)
+        return (data,), (scale,)
+
+    k_data, k_scale = _nvfp4_split_data_scale(kv_cache[:, 0])
+    v_data, v_scale = _nvfp4_split_data_scale(kv_cache[:, 1])
+    return (k_data, v_data), (k_scale, v_scale)
 
 
 def create_kv_caches_with_random_flash(
@@ -386,15 +527,31 @@ def create_kv_caches_with_random_flash(
     value_caches: list[torch.Tensor] = []
 
     for _ in range(num_layers):
-        key_value_cache = torch.empty(
-            size=kv_cache_allocation_shape, dtype=dtype, device=device
-        ).permute(*stride_order)
-        if cache_dtype in ["auto", "half", "bfloat16", "float"]:
-            key_value_cache.uniform_(-scale, scale)
-        elif cache_dtype == "fp8":
-            _generate_random_fp8(key_value_cache, -scale, scale)
+        if cache_dtype == "nvfp4":
+            # Full page dim: fp4 data + fp8 block scales per head.
+            # Per page layout: [K_data | K_scale | V_data | V_scale]
+            # Returns [:, 0] and [:, 1] like all other dtypes.
+            full_dim = nvfp4_kv_cache_full_dim(head_size)
+            nvfp4_shape = (num_blocks, 2, block_size, num_heads, full_dim)
+            nvfp4_phys = tuple(nvfp4_shape[i] for i in stride_order)
+            inv = [stride_order.index(i) for i in range(len(stride_order))]
+            key_value_cache = torch.randint(
+                0,
+                256,
+                nvfp4_phys,
+                dtype=dtype,
+                device=device,
+            ).permute(*inv)
         else:
-            raise ValueError(f"Does not support key cache of type {cache_dtype}")
+            key_value_cache = torch.empty(
+                size=kv_cache_allocation_shape, dtype=dtype, device=device
+            ).permute(*stride_order)
+            if cache_dtype in ["auto", "half", "bfloat16", "float"]:
+                key_value_cache.uniform_(-scale, scale)
+            elif cache_dtype == "fp8":
+                _generate_random_fp8(key_value_cache, -scale, scale)
+            else:
+                raise ValueError(f"Does not support key cache of type {cache_dtype}")
         key_caches.append(key_value_cache[:, 0])
         value_caches.append(key_value_cache[:, 1])
     return key_caches, value_caches
@@ -451,12 +608,12 @@ def create_kv_caches_with_random(
 def async_tensor_h2d(
     data: list,
     dtype: torch.dtype,
-    target_device: str | torch.device,
-    pin_memory: bool,
+    device: str | torch.device,
+    pin_memory: bool = PIN_MEMORY,
 ) -> torch.Tensor:
     """Asynchronously create a tensor and copy it from host to device."""
     t = torch.tensor(data, dtype=dtype, pin_memory=pin_memory, device="cpu")
-    return t.to(device=target_device, non_blocking=True)
+    return t.to(device=device, non_blocking=True)
 
 
 def make_ndarray_with_pad(
@@ -590,49 +747,6 @@ def aux_stream() -> torch.cuda.Stream | None:
     return _aux_stream
 
 
-@lru_cache(maxsize=8)
-def _cuda_device_count_stateless(cuda_visible_devices: str | None = None) -> int:
-    # Note: cuda_visible_devices is not used, but we keep it as an argument for
-    # LRU Cache purposes.
-
-    # Code below is based on
-    # https://github.com/pytorch/pytorch/blob/
-    # c1cd946818442aca8c7f812b16d187ce1586c3bc/
-    # torch/cuda/__init__.py#L831C1-L831C17
-    import torch.cuda
-    import torch.version
-
-    from vllm.platforms import current_platform
-
-    if not torch.cuda._is_compiled():
-        return 0
-    if current_platform.is_rocm():
-        # ROCm uses amdsmi instead of nvml for stateless device count
-        # This requires a sufficiently modern version of Torch 2.4.0
-        raw_count = (
-            torch.cuda._device_count_amdsmi()
-            if (hasattr(torch.cuda, "_device_count_amdsmi"))
-            else -1
-        )
-    else:
-        raw_count = torch.cuda._device_count_nvml()
-    r = torch._C._cuda_getDeviceCount() if raw_count < 0 else raw_count
-    return r
-
-
-def cuda_device_count_stateless() -> int:
-    """Get number of CUDA devices, caching based on the value of
-    CUDA_VISIBLE_DEVICES at the time of call.
-
-    This should be used instead of torch.accelerator.device_count()
-    unless CUDA_VISIBLE_DEVICES has already been set to the desired
-    value."""
-
-    # This can be removed and simply replaced with torch.cuda.get_device_count
-    # after https://github.com/pytorch/pytorch/pull/122815 is released.
-    return _cuda_device_count_stateless(envs.CUDA_VISIBLE_DEVICES)
-
-
 def weak_ref_tensor(tensor: Any) -> Any:
     """
     Create a weak reference to a tensor.
@@ -742,37 +856,62 @@ def is_torch_equal(target: str) -> bool:
 
 HAS_OPAQUE_TYPE = is_torch_equal_or_newer("2.11.0.dev")
 
+# Allow toggling LayerName usage via environment variable.
+# Defaults to True on torch >= 2.11, False otherwise.
+# Set VLLM_USE_LAYERNAME=0 to disable even on torch >= 2.11.
+_USE_LAYERNAME = HAS_OPAQUE_TYPE and envs.VLLM_USE_LAYERNAME
+
 if HAS_OPAQUE_TYPE:
     from torch._opaque_base import OpaqueBase
 else:
     OpaqueBase = object  # type: ignore[misc, assignment]
 
 
-class ModuleName(OpaqueBase):  # type: ignore[misc]
+class LayerName(OpaqueBase):  # type: ignore[misc]
     """Wraps a module name string for use as a torch opaque type.
 
     When torch >= 2.11, this is registered as a hoisted value-type opaque
     object so that torch.compile lifts it as a graph input instead of baking
-    it as a constant.  This avoids per-layer recompilation for MOE ops.
+    it as a constant.  This avoids per-layer recompilation for custom ops
+    that accept layer name strings (attention, MOE, KV cache, etc.).
     """
 
     def __init__(self, value: str):
         self.value = value
 
     def __eq__(self, other):
-        return isinstance(other, ModuleName) and self.value == other.value
+        return isinstance(other, LayerName) and self.value == other.value
 
     def __hash__(self):
         return hash(self.value)
 
     def __fx_repr__(self):
-        return (f"ModuleName({self.value!r})", {ModuleName})
+        return (f"LayerName({self.value!r})", {"LayerName": LayerName})
 
 
 if HAS_OPAQUE_TYPE:
     from torch._library.opaque_object import register_opaque_type
 
-    register_opaque_type(ModuleName, typ="value", hoist=True)
+    register_opaque_type(LayerName, typ="value", hoist=True)
+
+# On torch >= 2.11 (with VLLM_USE_LAYERNAME enabled), custom op
+# layer_name parameters use LayerName; otherwise they remain plain str.
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+    LayerNameType: TypeAlias = str | LayerName
+else:
+    LayerNameType = LayerName if _USE_LAYERNAME else str
+
+
+def _resolve_layer_name(layer_name: str | LayerName) -> str:
+    """Unwrap a LayerName to str, or return str unchanged."""
+    return layer_name.value if isinstance(layer_name, LayerName) else layer_name
+
+
+def _encode_layer_name(layer_name: str) -> str | LayerName:
+    """Wrap a str layer name as LayerName when enabled."""
+    return LayerName(layer_name) if _USE_LAYERNAME else layer_name
 
 
 # Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
diff --git a/vllm/v1/attention/backend.py b/vllm/v1/attention/backend.py
index cd49ea30e6f4..d83489238d33 100644
--- a/vllm/v1/attention/backend.py
+++ b/vllm/v1/attention/backend.py
@@ -10,6 +10,13 @@
 import torch
 from typing_extensions import deprecated
 
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    kFp8Dynamic64Sym,
+    kFp8Dynamic128Sym,
+    kFp8StaticTensorSym,
+    kNvfp4Dynamic,
+)
+
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
     from vllm.config.cache import CacheDType
@@ -17,7 +24,9 @@
     from vllm.model_executor.layers.quantization.utils.quant_utils import QuantKey
     from vllm.platforms.interface import DeviceCapability
     from vllm.v1.attention.backends.utils import KVCacheLayoutType
-    from vllm.v1.kv_cache_interface import AttentionSpec
+    from vllm.v1.kv_cache_interface import AttentionSpec, KVQuantMode
+
+from vllm.v1.kv_cache_interface import get_kv_quant_mode
 
 
 class AttentionType(str, Enum):
@@ -46,10 +55,6 @@ def __init__(self, base: int):
 class AttentionBackend(ABC):
     """Abstract class for attention backends."""
 
-    # For some attention backends, we allocate an output tensor before
-    # calling the custom op. When piecewise cudagraph is enabled, this
-    # makes sure the output tensor is allocated inside the cudagraph.
-    accept_output_buffer: bool = False
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list["CacheDType"]] = [
         "auto",
@@ -220,6 +225,21 @@ def is_sparse(cls) -> bool:
     def supports_per_head_quant_scales(cls) -> bool:
         return False
 
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        """Check if backend supports non-causal (bidirectional) attention
+        for decoder models.
+
+        Unlike ENCODER_ONLY attention type which implies a different
+        execution model, this refers to non-causal attention within the
+        standard paged-KV-cache decoder path.
+        """
+        return False
+
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return False
+
     @classmethod
     def supports_attn_type(cls, attn_type: str) -> bool:
         """Check if backend supports a given attention type.
@@ -261,6 +281,8 @@ def validate_configuration(
         use_per_head_quant_scales: bool,
         device_capability: "DeviceCapability",
         attn_type: str,
+        use_non_causal: bool = False,
+        use_batch_invariant: bool = False,
     ) -> list[str]:
         invalid_reasons = []
         if not cls.supports_head_size(head_size):
@@ -293,6 +315,10 @@ def validate_configuration(
             invalid_reasons.append("compute capability not supported")
         if not cls.supports_attn_type(attn_type):
             invalid_reasons.append(f"attention type {attn_type} not supported")
+        if use_non_causal and not cls.supports_non_causal():
+            invalid_reasons.append("non-causal attention not supported")
+        if use_batch_invariant and not cls.supports_batch_invariance():
+            invalid_reasons.append("batch invariance not supported")
         combination_reason = cls.supports_combination(
             head_size,
             dtype,
@@ -311,6 +337,10 @@ def validate_configuration(
     def get_required_kv_cache_layout(cls) -> "KVCacheLayoutType | None":
         return None
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return False
+
 
 class AttentionMetadata:
     pass
@@ -362,11 +392,22 @@ class CommonAttentionMetadata:
     dcp_local_seq_lens_cpu: torch.Tensor | None = None
     """Sequence lengths of the local rank in decode context parallelism world"""
 
+    positions: torch.Tensor | None = None
+    """(num_actual_tokens,) token positions.  Optional; set when the caller
+    has positions available so that builders can pre-compute position-dependent
+    metadata (e.g. C128A topk indices for DeepSeek V4)."""
+
     is_prefilling: torch.Tensor | None = None
     """(batch_size,) bool tensor: True if request is still in prefill phase
     (num_computed_tokens < num_prompt_tokens). Used by some backends to
     distinguish actual decodes from short extends."""
 
+    seq_lens_cpu_upper_bound: torch.Tensor | None = None
+    """(batch_size,) CPU upper bound on seq_lens. Precise for prefill rows
+    and for all rows outside async spec decode; optimistic for async-spec
+    decode rows (assumes every draft was accepted). Not safe for kernels
+    that need exact per-row context lengths on decode rows."""
+
     # WARNING: Deprecated fields. Will be removed in a future release (v0.15.0)
     _seq_lens_cpu: torch.Tensor | None = None
     _num_computed_tokens_cpu: torch.Tensor | None = None
@@ -722,6 +763,13 @@ def process_weights_after_loading(self, act_dtype: torch.dtype):
 class AttentionImpl(AttentionImplBase[T], Generic[T]):
     """Standard attention implementation with forward method."""
 
+    kv_cache_dtype: str
+
+    @property
+    def kv_quant_mode(self) -> "KVQuantMode":
+        """Return the KV cache quantization mode for this layer."""
+        return get_kv_quant_mode(self.kv_cache_dtype)
+
     @abstractmethod
     def __init__(
         self,
@@ -747,7 +795,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: T,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -846,6 +894,19 @@ def forward_mqa(
         """MQA-style decode forward pass."""
         raise NotImplementedError
 
+    def fused_output_quant_supported(self, quant_key: "QuantKey"):
+        """
+        Does this attention implementation support fused output quantization.
+        Since MLA quantization is done manually in forward_impl (common code),
+        all MLA backends support it by default.
+        """
+        return quant_key in (
+            kFp8StaticTensorSym,
+            kNvfp4Dynamic,
+            kFp8Dynamic128Sym,
+            kFp8Dynamic64Sym,
+        )
+
     def do_kv_cache_update(
         self,
         kv_c_normed: torch.Tensor,
@@ -876,6 +937,19 @@ class SparseMLAAttentionImpl(AttentionImplBase[T], Generic[T]):
     They do not support prefill (MHA-style) attention.
     """
 
+    def fused_output_quant_supported(self, quant_key: "QuantKey"):
+        """
+        Does this attention implementation support fused output quantization.
+        Since MLA quantization is done manually in forward_impl (common code),
+        all MLA backends support it by default.
+        """
+        return quant_key in (
+            kFp8StaticTensorSym,
+            kNvfp4Dynamic,
+            kFp8Dynamic128Sym,
+            kFp8Dynamic64Sym,
+        )
+
     @abstractmethod
     def __init__(
         self,
@@ -936,10 +1010,6 @@ def do_kv_cache_update(
         )
 
 
-def is_quantized_kv_cache(kv_cache_dtype: str) -> bool:
-    return kv_cache_dtype.startswith("fp8")
-
-
 def subclass_attention_backend(
     name_prefix: str,
     attention_backend_cls: type[AttentionBackend],
diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py
index 5fa3844c8233..005975c47759 100644
--- a/vllm/v1/attention/backends/cpu_attn.py
+++ b/vllm/v1/attention/backends/cpu_attn.py
@@ -1,14 +1,20 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import functools
 from dataclasses import dataclass
-from typing import ClassVar
+from typing import TYPE_CHECKING, ClassVar
+
+if TYPE_CHECKING:
+    from vllm.config.cache import CacheDType
 
 import torch
 
 from vllm import _custom_ops as ops
+from vllm import envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionImpl,
@@ -16,29 +22,45 @@
     AttentionMetadataBuilder,
     AttentionType,
     CommonAttentionMetadata,
-    is_quantized_kv_cache,
+    MultipleOf,
 )
 from vllm.v1.attention.backends.utils import (
+    KVCacheLayoutType,
     split_decodes_and_prefills,
 )
 from vllm.v1.kv_cache_interface import AttentionSpec, CrossAttentionSpec
 
 logger = init_logger(__name__)
 
-_CPU_ARCH_PREFER_MIXED_BATCH = (CpuArchEnum.X86, CpuArchEnum.ARM, CpuArchEnum.S390X)
+_CPU_ARCH_PREFER_MIXED_BATCH = (
+    CpuArchEnum.X86,
+    CpuArchEnum.ARM,
+    CpuArchEnum.S390X,
+    CpuArchEnum.RISCV,
+    CpuArchEnum.POWERPC,
+)
 
 
 class CPUAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [
         torch.float16,
         torch.bfloat16,
         torch.float32,
     ]
+    supported_kv_cache_dtypes: ClassVar[list["CacheDType"]] = [
+        "auto",
+        "fp8",
+        "fp8_e4m3",
+        "fp8_e5m2",
+    ]
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [MultipleOf(16)]
 
     @classmethod
     def get_supported_head_sizes(cls) -> list[int]:
-        return [32, 64, 80, 96, 112, 128, 160, 192, 224, 256]
+        return [32, 64, 80, 96, 112, 128, 160, 192, 224, 256, 512]
 
     @staticmethod
     def get_name() -> str:
@@ -73,6 +95,10 @@ def get_kv_cache_shape(
     ) -> tuple[int, ...]:
         return 2, num_blocks, num_kv_heads, block_size, head_size
 
+    @classmethod
+    def get_required_kv_cache_layout(cls) -> "KVCacheLayoutType | None":
+        return "HND"
+
     @staticmethod
     def use_cascade_attention(*args, **kwargs) -> bool:
         return False
@@ -133,7 +159,13 @@ def __init__(
         if self.window_size is None:
             self.window_size = -1
         self.block_size = vllm_config.cache_config.block_size
-        self.isa = _get_attn_isa(self.dtype, self.block_size, self.head_dim)
+        kv_cache_dtype_str = vllm_config.cache_config.cache_dtype
+        self.isa = _get_attn_isa(
+            self.dtype,
+            self.block_size,
+            self.head_dim,
+            kv_cache_dtype_str,
+        )
         self.is_cross_attention = isinstance(kv_cache_spec, CrossAttentionSpec)
 
     def build(
@@ -181,7 +213,7 @@ def build(
             causal=causal,
             sliding_window_size=self.window_size,
             isa=self.isa,
-            enable_kv_split=True,
+            enable_kv_split=envs.VLLM_CPU_ATTN_SPLIT_KV,
         )
 
         attn_metadata = CPUAttentionMetadata(
@@ -247,8 +279,7 @@ def __init__(
         self.kv_cache_dtype = kv_cache_dtype
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
 
-        if is_quantized_kv_cache(kv_cache_dtype):
-            raise NotImplementedError("FP8 KV cache is unsupported in CPU_ATTN")
+        self.is_fp8_kv_cache = is_quantized_kv_cache(kv_cache_dtype)
         self.attn_type = attn_type
 
         self.sinks = sinks
@@ -266,7 +297,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: CPUAttentionMetadata | None,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -282,7 +313,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
         if output_scale is not None or output_block_scale is not None:
             raise NotImplementedError(
                 "fused output quantization is not yet supported"
@@ -326,6 +356,9 @@ def forward(
                 value_cache,
                 attn_metadata.slot_mapping,
                 attn_metadata.isa,
+                k_scale=layer._k_scale_float,
+                v_scale=layer._v_scale_float,
+                kv_cache_dtype=self.kv_cache_dtype,
             )
 
         if attn_metadata.use_sdpa_prefill:
@@ -357,6 +390,9 @@ def forward(
                 softcap=self.logits_soft_cap,
                 scheduler_metadata=attn_metadata.scheduler_metadata,
                 s_aux=self.sinks,
+                k_scale=layer._k_scale_float,
+                v_scale=layer._v_scale_float,
+                kv_cache_dtype=self.kv_cache_dtype,
             )
 
         return output
@@ -477,22 +513,62 @@ def _make_sliding_window_bias(
     return attn_biases
 
 
+@functools.lru_cache(maxsize=1)
+def _riscv_supports_rvv() -> bool:
+    """Whether the C++ RVV attention path is usable.
+
+    The kernel in csrc/cpu/cpu_attn_rvv.hpp uses VLEN-agnostic RVVI()
+    macros and supports VLEN=128 and VLEN=256.  CMake auto-detects the
+    largest zvl<N>b from /proc/cpuinfo and passes it via -mrvv-vector-bits.
+    The RVV path is compiled whenever __riscv_v_min_vlen is defined, so
+    we check that at least one supported zvl<N>b is advertised.
+    """
+    try:
+        with open("/proc/cpuinfo") as f:
+            cpuinfo = f.read()
+    except OSError:
+        return False
+    return any(f"zvl{n}b" in cpuinfo for n in (128, 256)) and all(
+        f"zvl{n}b" not in cpuinfo for n in (512, 1024)
+    )
+
+
 def _get_attn_isa(
-    dtype: torch.dtype, block_size: int, head_size: int | None = None
+    dtype: torch.dtype,
+    block_size: int,
+    head_size: int | None = None,
+    kv_cache_dtype: str | None = None,
 ) -> str:
+    fp8_kv = is_quantized_kv_cache(kv_cache_dtype) if kv_cache_dtype else False
     if head_size is not None and head_size % 32 != 0 and head_size % 16 == 0:
+        if fp8_kv:
+            raise NotImplementedError(
+                "FP8 KV cache requires head_size divisible by 32 on CPU."
+            )
         return "vec16"
     supports_amx = torch.cpu._is_amx_tile_supported()
-    supports_arm = current_platform.get_cpu_architecture() == CpuArchEnum.ARM
-    supports_vxe = current_platform.get_cpu_architecture() == CpuArchEnum.S390X
+    arch = current_platform.get_cpu_architecture()
+    supports_arm = arch == CpuArchEnum.ARM
+    supports_vxe = arch == CpuArchEnum.S390X
+    supports_riscv = arch == CpuArchEnum.RISCV
+    supports_vsx = arch == CpuArchEnum.POWERPC
+    supports_avx512 = torch.cpu._is_avx512_supported()
+    if fp8_kv and not supports_amx and not supports_avx512:
+        raise NotImplementedError(
+            "FP8 KV cache on CPU requires x86 with AVX-512 or AMX."
+        )
     if supports_amx and dtype in (torch.bfloat16,) and block_size % 32 == 0:
         return "amx"
     elif block_size % 32 == 0:
         if supports_arm:
             # support ARM NEON FMLA and BFMMLA (bf16) for block size 32
             return "neon"
+        elif supports_riscv and _riscv_supports_rvv():
+            return "rvv"
         elif supports_vxe:
             return "vxe"
+        elif supports_vsx:
+            return "vsx"
         else:
             return "vec"
     else:
diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py
index a4423b301d69..0d6a3d298b66 100644
--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -54,7 +54,10 @@ def get_scheduler_metadata(*args: Any, **kwargs: Any) -> None:  # type: ignore[m
 
 
 def get_flash_attn_version(
-    requires_alibi: bool = False, head_size: int | None = None
+    requires_alibi: bool = False,
+    head_size: int | None = None,
+    head_size_v: int | None = None,
+    has_sinks: bool = False,
 ) -> int | None:
     if current_platform.is_xpu():
         return 2
@@ -112,13 +115,36 @@ def get_flash_attn_version(
             )
             fa_version = 2
 
+        # Some FA3 unsupported SM90 cases can use FA4 when available.
+        if (
+            fa_version == 3
+            and device_capability.major == 9
+            and is_fa_version_supported(4)
+        ):
+            upgrade_reason = None
+            if head_size is not None and head_size > 256:
+                upgrade_reason = f"FA3 does not support head_size={head_size} on SM90"
+            elif (
+                has_sinks
+                and head_size is not None
+                and head_size_v is not None
+                and head_size != head_size_v
+            ):
+                upgrade_reason = "Diff-KV with sinks"
+            if upgrade_reason:
+                logger.info_once(
+                    "%s: upgrading FlashAttention 3 -> 4",
+                    upgrade_reason,
+                    scope="local",
+                )
+                fa_version = 4
+
         # FA4 currently uses batch-shape-dependent scheduling
         # heuristics on SM100+, which breaks batch invariance.
         if envs.VLLM_BATCH_INVARIANT and fa_version == 4:
             logger.warning_once(
                 "Cannot use FA version 4 with batch invariance, "
                 "defaulting to FA version 2.",
-                scope="local",
             )
             fa_version = 2
 
@@ -154,18 +180,25 @@ def get_flash_attn_version(
         return None
 
 
-def flash_attn_supports_fp8() -> bool:
-    return (
-        get_flash_attn_version() == 3
-        and current_platform.is_device_capability_family(90)
-    )
+def is_fa_version_supported(fa_version: int) -> bool:
+    try:
+        from vllm.vllm_flash_attn.flash_attn_interface import (
+            is_fa_version_supported as _is_fa_version_supported,
+        )
+
+        return _is_fa_version_supported(fa_version)
+    except ImportError:
+        return False
+
+
+def flash_attn_supports_quant_query_input() -> bool:
+    return not current_platform.is_xpu()
 
 
 def flash_attn_supports_sinks() -> bool:
     if current_platform.is_xpu():
         return True
-    else:
-        return get_flash_attn_version() == 3
+    return get_flash_attn_version() in (3, 4)
 
 
 def flash_attn_supports_mla():
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 245995be2642..dc041485bd12 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -10,21 +10,28 @@
 import torch
 
 from vllm.model_executor.layers.attention import Attention
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import (
+    canonicalize_singleton_dim_strides,
+    is_quantized_kv_cache,
+)
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionImpl,
     AttentionType,
     MultipleOf,
-    is_quantized_kv_cache,
 )
 from vllm.v1.attention.backends.fa_utils import (
-    flash_attn_supports_fp8,
+    flash_attn_supports_quant_query_input,
     get_flash_attn_version,
+    is_fa_version_supported,
     is_flash_attn_varlen_func_available,
 )
+from vllm.v1.attention.backends.utils import get_dcp_local_seq_lens
 from vllm.v1.attention.ops.common import cp_lse_ag_out_rs
 from vllm.v1.attention.ops.dcp_alltoall import dcp_a2a_lse_reduce
 from vllm.v1.attention.ops.merge_attn_states import merge_attn_states
+from vllm.v1.worker.workspace import current_workspace_manager
 
 if is_flash_attn_varlen_func_available():
     from vllm.v1.attention.backends.fa_utils import (
@@ -51,7 +58,6 @@
     CommonAttentionMetadata,
 )
 from vllm.v1.attention.backends.utils import (
-    get_dcp_local_seq_lens,
     get_kv_cache_layout,
 )
 from vllm.v1.kv_cache_interface import AttentionSpec
@@ -60,7 +66,6 @@
 
 
 class FlashAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -90,10 +95,24 @@ def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
 
     forward_includes_kv_cache_update: bool = False
 
+    @classmethod
+    def get_preferred_block_size(cls, default_block_size: int) -> int:
+        if current_platform.is_xpu():
+            return max(default_block_size, 64)
+        return super().get_preferred_block_size(default_block_size)
+
     @staticmethod
     def get_name() -> str:
         return "FLASH_ATTN"
 
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return True
+
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        return True
+
     @classmethod
     def supports_attn_type(cls, attn_type: str) -> bool:
         """FlashAttention supports all attention types."""
@@ -150,23 +169,27 @@ def get_kv_cache_stride_order(
             raise ValueError(f"Unknown cache layout format {cache_layout}.")
         return stride_order
 
-    @staticmethod
-    def get_fp8_dtype_for_flashattn(kv_cache_dtype: str) -> torch.dtype:
-        if kv_cache_dtype in ("fp8", "fp8_e4m3"):
-            return torch.float8_e4m3fn
-        else:
-            raise ValueError(f"Unrecognized FP8 dtype: {kv_cache_dtype}")
-
     @classmethod
     def supports_head_size(cls, head_size: int) -> bool:
-        return head_size % 8 == 0 and head_size <= 256
+        if head_size % 8 != 0:
+            return False
+        if head_size <= 256:
+            return True
+        if is_fa_version_supported(4):
+            return head_size <= 512
+        return False
 
     @classmethod
     def supports_kv_cache_dtype(cls, kv_cache_dtype: CacheDType | None) -> bool:
         if kv_cache_dtype is None:
             return True
-        if kv_cache_dtype.startswith("fp8"):
-            return flash_attn_supports_fp8()
+        if kv_cache_dtype in ("fp8", "fp8_e4m3"):
+            if current_platform.is_xpu():
+                return True
+            return (
+                get_flash_attn_version() == 3
+                and current_platform.is_device_capability_family(90)
+            )
         return kv_cache_dtype in ["auto", "float16", "bfloat16"]
 
     @classmethod
@@ -236,11 +259,16 @@ class FlashAttentionMetadata:
 def _get_sliding_window_configs(
     vllm_config: VllmConfig,
 ) -> set[tuple[int, int] | None]:
-    """Get the set of all sliding window configs used in the model."""
+    """Get the set of all sliding window configs used in the model.
+
+    Only inspects FlashAttentionImpl layers. Other backends (e.g.
+    TurboQuant, MLA) use their own metadata builders and are skipped.
+    """
     sliding_window_configs: set[tuple[int, int] | None] = set()
     layers = get_layers_from_vllm_config(vllm_config, Attention)
     for layer in layers.values():
-        assert isinstance(layer.impl, FlashAttentionImpl)
+        if not isinstance(layer.impl, FlashAttentionImpl):
+            continue
         sliding_window_configs.add(layer.impl.sliding_window)
     return sliding_window_configs
 
@@ -266,7 +294,7 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad
     #  https://github.com/vllm-project/vllm/issues/22945
     _cudagraph_support = (
         AttentionCGSupport.ALWAYS
-        if get_flash_attn_version() == 3
+        if get_flash_attn_version() == 3 or current_platform.is_xpu()
         else AttentionCGSupport.UNIFORM_BATCH
     )
     supports_update_block_table: bool = True
@@ -345,6 +373,14 @@ def __init__(
                 self.attention_config.flash_attn_max_num_splits_for_cuda_graph
             )
 
+        if self.dcp_world_size > 1:
+            max_num_reqs = vllm_config.scheduler_config.max_num_seqs
+            self._dcp_context_kv_lens = torch.zeros(
+                max_num_reqs,
+                dtype=torch.int32,
+                device=self.device,
+            )
+
         # Sliding window size to be used with the AOT scheduler will be
         # populated on first build() call.
         self.aot_sliding_window: tuple[int, int] | None = None
@@ -369,8 +405,11 @@ def build(
         slot_mapping = common_attn_metadata.slot_mapping
         causal = common_attn_metadata.causal
 
-        # the overhead of the aot schedule is not worth it for spec-decode
-        aot_schedule = self.aot_schedule and not fast_build
+        # Disable AOT schedule for spec-decode proposer (not worth the overhead)
+        # and for batch invariance (schedule varies with max_seqlen_q/k).
+        aot_schedule = (
+            self.aot_schedule and not fast_build and not envs.VLLM_BATCH_INVARIANT
+        )
 
         if self.aot_sliding_window is None:
             self.aot_sliding_window = (-1, -1)
@@ -407,10 +446,8 @@ def schedule(
             batch_size, cu_query_lens, max_query_len, seqlens, max_seq_len, causal
         ):
             cache_dtype = self.cache_config.cache_dtype
-            if cache_dtype.startswith("fp8"):
-                qkv_dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn(
-                    cache_dtype
-                )
+            if is_quantized_kv_cache(cache_dtype):
+                qkv_dtype = current_platform.fp8_dtype()
             else:
                 qkv_dtype = self.kv_cache_dtype
             if aot_schedule:
@@ -441,15 +478,18 @@ def schedule(
         prefix_scheduler_metadata = None
 
         if self.dcp_world_size > 1:
-            query_kv_lens = query_start_loc[1:] - query_start_loc[:-1]
-            dcp_context_kv_lens = seq_lens - query_kv_lens
-
-            dcp_context_kv_lens = get_dcp_local_seq_lens(
-                dcp_context_kv_lens,
+            query_lens = query_start_loc[1:] - query_start_loc[:-1]
+            context_kv_lens = seq_lens - query_lens
+            local_context_kv_lens = get_dcp_local_seq_lens(
+                context_kv_lens,
                 self.dcp_world_size,
                 self.dcp_rank,
                 self.cp_kv_cache_interleave_size,
             )
+            self._dcp_context_kv_lens[:num_reqs] = local_context_kv_lens
+            self._dcp_context_kv_lens[num_reqs:] = 0
+            dcp_context_kv_lens = self._dcp_context_kv_lens[:num_reqs]
+
             # After DCP distribution, the maximum number of tokens for any rank is
             # ceil(L / (N * I)) * I, where L is max_seq_len, N is dcp_world_size,
             # and I is cp_kv_cache_interleave_size.
@@ -596,16 +636,10 @@ def __init__(
         logger.info_once(
             "Using FlashAttention version %s",
             self.vllm_flash_attn_version,
-            scope="local",
         )
         # Cache the batch invariant result for use in forward passes
         self.batch_invariant_enabled = envs.VLLM_BATCH_INVARIANT
 
-        if is_quantized_kv_cache(self.kv_cache_dtype) and not flash_attn_supports_fp8():
-            raise NotImplementedError(
-                "FlashAttention does not support fp8 kv-cache on this device."
-            )
-
         self.sinks = sinks
         if self.sinks is not None:
             assert flash_attn_supports_sinks(), (
@@ -616,7 +650,7 @@ def __init__(
                 "heads in the layer"
             )
 
-        self.supports_quant_query_input = True
+        self.supports_quant_query_input = flash_attn_supports_quant_query_input()
 
         vllm_config = get_current_vllm_config_or_none()
         dcp_a2a = (
@@ -626,6 +660,10 @@ def __init__(
         )
         self.dcp_combine = dcp_a2a_lse_reduce if dcp_a2a else cp_lse_ag_out_rs
 
+        self._dcp_dtype: torch.dtype | None = None
+        if vllm_config is not None and self.dcp_world_size > 1:
+            self._dcp_dtype = vllm_config.model_config.dtype
+
     def forward(
         self,
         layer: torch.nn.Module,
@@ -634,7 +672,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: FlashAttentionMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -653,7 +691,6 @@ def forward(
               {q,k,v}_descale to be (num_sequences, num_kv_heads).
               We use torch's .expand() to avoid duplicating values
         """
-        assert output is not None, "Output tensor must be provided."
         assert self.vllm_flash_attn_version is not None, (
             "FlashAttention version not detected."
         )
@@ -695,14 +732,28 @@ def forward(
 
         # For decoder and cross-attention, use KV cache as before
         key_cache, value_cache = kv_cache.unbind(0)
+        # Fix degenerate strides on size-1 dims (e.g. num_kv_heads=1 with TP).
+        # FA3/4 on H100+ uses TMA, which requires ≥16-byte stride alignment.
+        # See vllm.utils.torch_utils.canonicalize_singleton_dim_strides.
+        fixed_k = canonicalize_singleton_dim_strides(key_cache)
+        fixed_v = canonicalize_singleton_dim_strides(value_cache)
+        if fixed_k is not key_cache or fixed_v is not value_cache:
+            logger.debug(
+                "Canonicalized degenerate KV cache strides (FlashAttention): "
+                "shape=%s, key strides before=%s after=%s, "
+                "value strides before=%s after=%s",
+                key_cache.shape,
+                key_cache.stride(),
+                fixed_k.stride(),
+                value_cache.stride(),
+                fixed_v.stride(),
+            )
+        key_cache, value_cache = fixed_k, fixed_v
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             # queries are quantized in the attention layer
-            dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn(
-                self.kv_cache_dtype
-            )
-            key_cache = key_cache.view(dtype)
-            value_cache = value_cache.view(dtype)
+            key_cache = key_cache.view(current_platform.fp8_dtype())
+            value_cache = value_cache.view(current_platform.fp8_dtype())
 
         if not attn_metadata.use_cascade:
             cu_seqlens_q = attn_metadata.query_start_loc
@@ -714,7 +765,11 @@ def forward(
 
             descale_shape = (cu_seqlens_q.shape[0] - 1, self.num_kv_heads)
 
-            q_descale = layer._q_scale.expand(descale_shape)
+            q_descale = (
+                layer._q_scale.expand(descale_shape)
+                if self.supports_quant_query_input
+                else None
+            )
             k_descale = layer._k_scale.expand(descale_shape)
             v_descale = layer._v_scale.expand(descale_shape)
 
@@ -805,6 +860,8 @@ def do_kv_cache_update(
             # we use direct Q, K, V tensors without caching
             return
 
+        # Scatter write into the KV cache using slot_mapping indices.
+        # No TMA kernel is invoked here, so stride canonicalization is not needed.
         key_cache, value_cache = kv_cache.unbind(0)
 
         # Reshape the input keys and values and store them in the cache.
@@ -851,11 +908,18 @@ def _forward_with_dcp(
         sliding_window_size = (
             list(self.sliding_window) if self.sliding_window is not None else None
         )
+        n = query_across_dcp.shape[0]
+        (dcp_context_out,) = current_workspace_manager().get_simultaneous(
+            (
+                (n, self.num_heads * self.dcp_world_size, self.head_size),
+                self._dcp_dtype,
+            ),
+        )
         context_attn_out, context_lse = flash_attn_varlen_func(
             q=query_across_dcp,
             k=key_cache,
             v=value_cache,
-            out=None,
+            out=dcp_context_out,
             cu_seqlens_q=cu_seqlens_q,
             max_seqlen_q=max_seqlen_q,
             seqused_k=attn_metadata.dcp_context_kv_lens,
@@ -883,11 +947,14 @@ def _forward_with_dcp(
         )
         context_lse_cor = context_lse_cor.transpose(0, 1).contiguous()
 
+        (dcp_query_out,) = current_workspace_manager().get_simultaneous(
+            ((query.shape[0], self.num_heads, self.head_size), self._dcp_dtype),
+        )
         query_attn_out, query_lse = flash_attn_varlen_func(
             q=query,
             k=key,
             v=value,
-            out=None,
+            out=dcp_query_out,
             cu_seqlens_q=cu_seqlens_q,
             max_seqlen_q=max_seqlen_q,
             cu_seqlens_k=cu_seqlens_q,
@@ -938,7 +1005,7 @@ def _forward_encoder_attention(
         )
 
         # For encoder attention, process FP8 quantization if needed
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             raise NotImplementedError(
                 "quantization is not supported for encoder attention"
             )
@@ -973,7 +1040,9 @@ def _forward_encoder_attention(
             window_size=sliding_window_size,
             softcap=self.logits_soft_cap,
             fa_version=self.vllm_flash_attn_version,
-            q_descale=layer._q_scale.expand(descale_shape),
+            q_descale=layer._q_scale.expand(descale_shape)
+            if self.supports_quant_query_input
+            else None,
             k_descale=layer._k_scale.expand(descale_shape),
             v_descale=layer._v_scale.expand(descale_shape),
             num_splits=1 if self.batch_invariant_enabled else 0,
diff --git a/vllm/v1/attention/backends/flash_attn_diffkv.py b/vllm/v1/attention/backends/flash_attn_diffkv.py
index 5305cc1b8c12..e788b0e3496f 100644
--- a/vllm/v1/attention/backends/flash_attn_diffkv.py
+++ b/vllm/v1/attention/backends/flash_attn_diffkv.py
@@ -4,15 +4,23 @@
 
 import torch
 
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils.torch_utils import (
+    canonicalize_singleton_dim_strides,
+    is_quantized_kv_cache,
+)
 from vllm.v1.attention.backend import AttentionType
-from vllm.v1.attention.backends.fa_utils import is_flash_attn_varlen_func_available
+from vllm.v1.attention.backends.fa_utils import (
+    get_flash_attn_version,
+    is_flash_attn_varlen_func_available,
+)
 from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
     triton_reshape_and_cache_flash_diffkv,
 )
 
 if is_flash_attn_varlen_func_available():
     from vllm.v1.attention.backends.fa_utils import flash_attn_varlen_func
-from vllm.logger import init_logger
 from vllm.v1.attention.backends.utils import get_kv_cache_layout
 
 from .flash_attn import (
@@ -85,6 +93,54 @@ def get_kv_cache_stride_order(
 
 
 class FlashAttentionDiffKVImpl(FlashAttentionImpl):
+    vllm_flash_attn_version: int | None
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        # Re-derive the FA version with diff-kv context so that
+        # get_flash_attn_version can apply the FA3 -> FA4 upgrade rule
+        # for sinks + hdim != hdim_v.
+        self.vllm_flash_attn_version = get_flash_attn_version(
+            requires_alibi=self.alibi_slopes is not None,
+            head_size=self.head_size,
+            head_size_v=FlashAttentionDiffKVBackend.head_size_v,
+            has_sinks=self.sinks is not None,
+        )
+
+    def do_kv_cache_update(
+        self,
+        layer: torch.nn.Module,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        kv_cache: torch.Tensor,
+        slot_mapping: torch.Tensor,
+    ) -> None:
+        if self.attn_type in (AttentionType.ENCODER_ONLY, AttentionType.ENCODER):
+            # For encoder attention,
+            # we use direct Q, K, V tensors without caching
+            return
+
+        # Unlike standard FlashAttn which splits kv_cache via unbind(0),
+        # DiffKV packs K and V into a single tensor along the last dim:
+        #   kv_cache shape: [num_blocks, block_size, num_kv_heads,
+        #                    head_size_k + head_size_v]
+        # The triton kernel handles this combined layout directly.
+        #
+        # NOTE(woosuk): key and value are padded while slot_mapping is
+        # not padded. However, we don't need to do key[:num_actual_tokens]
+        # and value[:num_actual_tokens] because the reshape_and_cache_flash
+        # op uses the slot_mapping's shape to determine the number of
+        # actual tokens.
+        triton_reshape_and_cache_flash_diffkv(
+            key,
+            value,
+            kv_cache,
+            slot_mapping,
+            self.kv_cache_dtype,
+            layer._k_scale,
+            layer._v_scale,
+        )
+
     def forward(
         self,
         layer: torch.nn.Module,
@@ -93,7 +149,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: FlashAttentionMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -112,7 +168,6 @@ def forward(
               {q,k,v}_descale to be (num_sequences, num_kv_heads).
               We use torch's .expand() to avoid duplicating values
         """
-        assert output is not None, "Output tensor must be provided."
         assert self.vllm_flash_attn_version is not None, (
             "FlashAttention version not detected."
         )
@@ -156,41 +211,28 @@ def forward(
         # Different head_size for K and V
         key_cache = kv_cache[..., : self.head_size]
         value_cache = kv_cache[..., self.head_size :]
-
-        # key and value may be None in the case of cross attention. They are
-        # calculated once based on the output from the encoder and then cached
-        # in KV cache.
-        if (
-            self.kv_sharing_target_layer_name is None
-            and key is not None
-            and value is not None
-        ):
-            # Reshape the input keys and values and store them in the cache.
-            # Skip this if sharing KV cache with an earlier attention layer.
-            # NOTE(woosuk): Here, key and value are padded while slot_mapping is
-            # not padded. However, we don't need to do key[:num_actual_tokens]
-            # and value[:num_actual_tokens] because the reshape_and_cache_flash
-            # op uses the slot_mapping's shape to determine the number of
-            # actual tokens.
-
-            # kv_cache update for different head_size K and V
-            triton_reshape_and_cache_flash_diffkv(
-                key,
-                value,
-                kv_cache,
-                attn_metadata.slot_mapping,
-                self.kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+        # Fix degenerate strides on size-1 dims (e.g. num_kv_heads=1 with TP).
+        # FA3/4 on H100+ uses TMA, which requires ≥16-byte stride alignment.
+        # See vllm.utils.torch_utils.canonicalize_singleton_dim_strides.
+        fixed_k = canonicalize_singleton_dim_strides(key_cache)
+        fixed_v = canonicalize_singleton_dim_strides(value_cache)
+        if fixed_k is not key_cache or fixed_v is not value_cache:
+            logger.debug(
+                "Canonicalized degenerate KV cache strides (FlashAttentionDiffKV): "
+                "shape=%s, key strides before=%s after=%s, "
+                "value strides before=%s after=%s",
+                key_cache.shape,
+                key_cache.stride(),
+                fixed_k.stride(),
+                value_cache.stride(),
+                fixed_v.stride(),
             )
+        key_cache, value_cache = fixed_k, fixed_v
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             # queries are quantized in the attention layer
-            dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn(
-                self.kv_cache_dtype
-            )
-            key_cache = key_cache.view(dtype)
-            value_cache = value_cache.view(dtype)
+            key_cache = key_cache.view(current_platform.fp8_dtype())
+            value_cache = value_cache.view(current_platform.fp8_dtype())
 
         if not attn_metadata.use_cascade:
             cu_seqlens_q = attn_metadata.query_start_loc
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 5b6c198e763d..a81c5742c1bb 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -42,7 +42,13 @@
 )
 from vllm.utils.math_utils import cdiv
 from vllm.utils.platform_utils import is_pin_memory_available
-from vllm.utils.torch_utils import is_strictly_contiguous
+from vllm.utils.torch_utils import (
+    canonicalize_singleton_dim_strides,
+    is_quantized_kv_cache,
+    is_strictly_contiguous,
+    nvfp4_kv_cache_full_dim,
+    nvfp4_kv_cache_split_views,
+)
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -56,6 +62,7 @@
     KVCacheLayoutType,
     get_dcp_local_seq_lens,
     get_kv_cache_layout,
+    get_num_attention_heads_from_layers,
     get_per_layer_parameters,
     infer_global_hyperparameters,
     split_decodes_and_prefills,
@@ -63,7 +70,11 @@
 from vllm.v1.attention.ops.common import cp_lse_ag_out_rs
 from vllm.v1.attention.ops.dcp_alltoall import dcp_a2a_lse_reduce
 from vllm.v1.attention.ops.merge_attn_states import merge_attn_states
-from vllm.v1.kv_cache_interface import AttentionSpec, UniformTypeKVCacheSpecs
+from vllm.v1.kv_cache_interface import (
+    AttentionSpec,
+    KVQuantMode,
+    UniformTypeKVCacheSpecs,
+)
 from vllm.v1.utils import CpuGpuBuffer
 
 FLASHINFER_WORKSPACE_BUFFER_SIZE_BATCH_INVARIANT = 2048 * 1024 * 1024
@@ -213,9 +224,7 @@ def __init__(
         self._context = BatchPrefillWithPagedKVCacheWrapper(
             workspace_buffer, get_kv_cache_layout()
         )
-        self._new_tokens = BatchPrefillWithRaggedKVCacheWrapper(
-            workspace_buffer, get_kv_cache_layout()
-        )
+        self._new_tokens = BatchPrefillWithRaggedKVCacheWrapper(workspace_buffer)
 
     def plan(
         self,
@@ -315,7 +324,6 @@ def run(
 
 
 class FlashInferBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -324,6 +332,7 @@ class FlashInferBackend(AttentionBackend):
         "fp8",
         "fp8_e4m3",
         "fp8_e5m2",
+        "nvfp4",
     ]
 
     @staticmethod
@@ -352,6 +361,10 @@ def get_kv_cache_shape(
         head_size: int,
         cache_dtype_str: str = "auto",
     ) -> tuple[int, ...]:
+        if cache_dtype_str == "nvfp4":
+            # Packed layout: fp4 data + fp8 block scales in last dim
+            last_dim = nvfp4_kv_cache_full_dim(head_size)
+            return (num_blocks, 2, block_size, num_kv_heads, last_dim)
         return (num_blocks, 2, block_size, num_kv_heads, head_size)
 
     @staticmethod
@@ -376,18 +389,20 @@ def get_kv_cache_stride_order(
         return stride_order
 
     @staticmethod
-    def get_fp8_dtype_for_flashinfer(kv_cache_dtype: str) -> torch.dtype:
+    def get_dtype_for_flashinfer(kv_cache_dtype: str) -> torch.dtype:
         if kv_cache_dtype in ("fp8", "fp8_e4m3"):
             return torch.float8_e4m3fn
         elif kv_cache_dtype == "fp8_e5m2":
             return torch.float8_e5m2
+        elif kv_cache_dtype == "nvfp4":
+            return torch.uint8
         else:
-            raise ValueError(f"Unrecognized FP8 dtype: {kv_cache_dtype}")
+            raise ValueError(f"Unrecognized dtype: {kv_cache_dtype}")
 
     @classmethod
     def get_supported_head_sizes(cls) -> list[int]:
         # https://github.com/flashinfer-ai/flashinfer/blob/3d55c71a62052c590c130897d3a3db49b14fcc34/include/flashinfer/utils.cuh#L157
-        return [64, 128, 256]
+        return [64, 128, 256, 512]
 
     @classmethod
     def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
@@ -593,20 +608,31 @@ def __init__(
             self.use_dcp and vllm_config.parallel_config.dcp_comm_backend == "a2a"
         )
 
-        self.num_qo_heads = self.model_config.get_num_attention_heads(
-            self.vllm_config.parallel_config
-        )
+        # Compatible with models with non-uniform per-layer head counts.
+        self.num_qo_heads = get_num_attention_heads_from_layers(
+            vllm_config, layer_names
+        ) or self.model_config.get_num_attention_heads(self.vllm_config.parallel_config)
 
         self.num_kv_heads = self.kv_cache_spec.num_kv_heads
         self.head_dim = self.kv_cache_spec.head_size
         self.page_size = self.kv_cache_spec.block_size
 
-        self.cache_dtype = self.cache_config.cache_dtype
-        if self.cache_dtype.startswith("fp8"):
-            self.kv_cache_dtype = FlashInferBackend.get_fp8_dtype_for_flashinfer(
-                self.cache_dtype
-            )
+        if self.kv_cache_spec.kv_quant_mode != KVQuantMode.NONE:
+            self.cache_dtype = self.cache_config.cache_dtype
+            # Cannot use self.kv_cache_spec.dtype here because kv_cache_spec
+            # storage dtype may not be the same as the op dtype (uint8 vs fp8_e4m3)
+            self.is_kvcache_nvfp4 = self.cache_dtype == "nvfp4"
+            if self.is_kvcache_nvfp4:
+                # For NVFP4, kv_cache_dtype stays as the string "nvfp4"
+                # which is passed to FlashInferImpl
+                self.kv_cache_dtype = self.cache_dtype
+            else:
+                self.kv_cache_dtype = FlashInferBackend.get_dtype_for_flashinfer(
+                    self.cache_dtype
+                )
         else:
+            self.cache_dtype = "auto"
+            self.is_kvcache_nvfp4 = False
             assert self.kv_cache_spec.dtype == self.model_config.dtype
             self.kv_cache_dtype = self.kv_cache_spec.dtype
 
@@ -620,7 +646,13 @@ def __init__(
             can_use_trtllm
             and not vllm_config.attention_config.disable_flashinfer_q_quantization
         ):
-            self.q_data_type = self.kv_cache_dtype
+            if self.is_kvcache_nvfp4:
+                # NVFP4 KV cache uses FP8 quantized queries
+                self.q_data_type = FlashInferBackend.get_dtype_for_flashinfer(
+                    "fp8_e4m3"
+                )
+            else:
+                self.q_data_type = self.kv_cache_dtype
         else:
             self.q_data_type = self.model_config.dtype
 
@@ -651,7 +683,7 @@ def __init__(
         # reused CPU buffers to avoid a race condition between step N async copies to
         # GPU and step N+1 buffer updates.
         self.pin_memory = (
-            not envs.VLLM_USE_V2_MODEL_RUNNER and is_pin_memory_available()
+            not vllm_config.use_v2_model_runner and is_pin_memory_available()
         )
         self.paged_kv_indptr = self._make_buffer(max_num_reqs + 1)
         self.paged_kv_indptr_cpu_buffer = torch.zeros_like(
@@ -736,8 +768,13 @@ def _get_prefill_wrapper(
                     dcp_a2a=self.dcp_a2a,
                 )
             else:
+                # NVFP4 KV cache requires the trtllm-gen backend inside
+                # the wrapper; fa2/fa3 do not support nvfp4.
+                backend = "trtllm-gen" if self.is_kvcache_nvfp4 else "auto"
                 self._prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
-                    self._get_workspace_buffer(), get_kv_cache_layout()
+                    self._get_workspace_buffer(),
+                    get_kv_cache_layout(),
+                    backend=backend,
                 )
         assert self._prefill_wrapper is not None
         return self._prefill_wrapper
@@ -757,6 +794,9 @@ def _get_decode_wrapper(self, batch_size: int, use_cudagraph: bool = False):
                 paged_kv_indptr = None
                 paged_kv_indices = None
                 paged_kv_last_page_len = None
+            # NVFP4 KV cache requires the trtllm-gen backend inside
+            # the wrapper; fa2/fa3 do not support nvfp4.
+            backend = "trtllm-gen" if self.is_kvcache_nvfp4 else "auto"
             decode_wrapper = BatchDecodeWithPagedKVCacheWrapper(
                 self._get_workspace_buffer(),
                 get_kv_cache_layout(),
@@ -768,6 +808,7 @@ def _get_decode_wrapper(self, batch_size: int, use_cudagraph: bool = False):
                 # at least as good as cuda cores for all attention ops in latest
                 # gpus.
                 use_tensor_cores=True,
+                backend=backend,
             )
 
             # save the decode wrapper
@@ -891,9 +932,6 @@ def build(
         all_uses_trtllm = (num_prefills == 0 or prefill_use_trtllm) and (
             num_decodes == 0 or decode_use_trtllm
         )
-        is_only_trtllm_decode = num_prefills == 0 and (
-            num_decodes > 0 and decode_use_trtllm
-        )
 
         if not all_uses_trtllm:
             if self.has_sinks:
@@ -939,7 +977,10 @@ def build(
 
         # Guard access to seq_lens_cpu, which may not always be needed
         # and can be expensive to retrieve in async mode.
-        needs_seq_lens_cpu = self.use_dcp or use_cascade or not is_only_trtllm_decode
+        # When all attention (both prefill and decode) uses TRTLLM,
+        # seq_lens_cpu is not needed since TRTLLM paths use GPU tensors
+        # (block_tables, seq_lens) directly.
+        needs_seq_lens_cpu = self.use_dcp or use_cascade or not all_uses_trtllm
         seq_lens_cpu = common_attn_metadata.seq_lens_cpu if needs_seq_lens_cpu else None
         seq_lens_np = seq_lens_cpu.numpy() if seq_lens_cpu is not None else None
         num_blocks_np = (
@@ -977,7 +1018,9 @@ def build(
             num_blocks_np -= num_common_kv_blocks
 
         # Compute paged_kv_indices if necessary
-        needs_paged_kv_indices = use_cascade or not is_only_trtllm_decode
+        # paged_kv_indices is only needed for FlashInfer native paths;
+        # TRTLLM paths use block_tables directly on GPU.
+        needs_paged_kv_indices = use_cascade or not all_uses_trtllm
         if needs_paged_kv_indices:
             assert num_blocks_np is not None
             assert seq_lens_np is not None
@@ -1054,9 +1097,21 @@ def build(
                 qo_indptr_prefill_gpu = (
                     qo_indptr[prefill_start:] - qo_indptr[prefill_start]
                 )
+                # Compute cum_seq_lens_kv on GPU to avoid CPU sync.
+                # This is the cumulative sum of the number of KV cache
+                # blocks per prefill request.
+                prefill_seq_lens = seq_lens[prefill_start:]
+                num_blocks_per_req = (prefill_seq_lens + page_size - 1) // page_size
                 paged_kv_indptr_prefill_gpu = self.paged_kv_indptr.gpu[
                     prefill_start : num_reqs + 1
                 ]
+                # Assign to slice to avoid cpu sync.
+                paged_kv_indptr_prefill_gpu[:1] = 0
+                torch.cumsum(
+                    num_blocks_per_req,
+                    dim=0,
+                    out=paged_kv_indptr_prefill_gpu[1:],
+                )
                 # Compute max_q_len for prefill requests
                 query_lens_prefill_cpu = (
                     qo_indptr_prefill_cpu[1:] - qo_indptr_prefill_cpu[:-1]
@@ -1106,6 +1161,12 @@ def build(
                         prefill_wrapper,
                         BatchPrefillWithPagedKVCacheWrapper,
                     )
+                    # NVFP4 trtllm kernel only supports FP8 output;
+                    # use FP8 o_data_type so the wrapper matches the
+                    # FP8 output buffer allocated in forward().
+                    o_dtype = (
+                        FP8_DTYPE if self.is_kvcache_nvfp4 else self.model_config.dtype
+                    )
                     prefill_wrapper.plan(
                         qo_indptr=qo_indptr_prefill_cpu,
                         paged_kv_indptr=paged_kv_indptr_prefill_cpu,
@@ -1121,7 +1182,7 @@ def build(
                         logits_soft_cap=self.logits_soft_cap,
                         q_data_type=self.q_data_type,
                         kv_data_type=self.kv_cache_dtype,
-                        o_data_type=self.model_config.dtype,
+                        o_data_type=o_dtype,
                         fixed_split_size=self.prefill_fixed_split_size,
                         disable_split_kv=self.disable_split_kv,
                     )
@@ -1155,6 +1216,12 @@ def build(
                 # Use the persistent buffer with padding length,
                 # instead of the same address but chunked version
                 # in atten_metadata when using cudagraph.
+                # NVFP4 trtllm kernel only supports FP8 output;
+                # use FP8 o_data_type so the wrapper matches the
+                # FP8 output buffer allocated in forward().
+                o_dtype = (
+                    FP8_DTYPE if self.is_kvcache_nvfp4 else self.model_config.dtype
+                )
                 fast_plan_decode(
                     decode_wrapper,
                     indptr_cpu=self.paged_kv_indptr.cpu[: num_input_tokens + 1],
@@ -1173,7 +1240,7 @@ def build(
                     logits_soft_cap=self.logits_soft_cap,
                     q_data_type=self.q_data_type,
                     kv_data_type=self.kv_cache_dtype,
-                    o_data_type=self.model_config.dtype,
+                    o_data_type=o_dtype,
                     fixed_split_size=self.decode_fixed_split_size,
                     disable_split_kv=self.disable_split_kv,
                 )
@@ -1222,6 +1289,8 @@ def __init__(
             self.sliding_window[0] if self.sliding_window is not None else -1
         )
         self.kv_cache_dtype = kv_cache_dtype
+        self.is_kvcache_nvfp4 = kv_cache_dtype == "nvfp4"
+        self.fp4_data_dim = head_size // 2 if self.is_kvcache_nvfp4 else 0
         self.logits_soft_cap = logits_soft_cap
         self.kv_sharing_target_layer_name = kv_sharing_target_layer_name
 
@@ -1256,6 +1325,17 @@ def __init__(
         self.bmm2_scale: float | None = None
         self.o_sf_scale: float | None = None
 
+        # Pre-allocated FP8 output buffer for NVFP4 without fused output quant.
+        if self.is_kvcache_nvfp4 and vllm_config is not None:
+            max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+            self._nvfp4_fp8_out = torch.empty(
+                (max_num_tokens, num_heads, head_size),
+                dtype=FP8_DTYPE,
+                device="cuda",
+            )
+        else:
+            self._nvfp4_fp8_out = None
+
         dcp_a2a = (
             vllm_config is not None
             and vllm_config.parallel_config.decode_context_parallel_size > 1
@@ -1269,7 +1349,7 @@ def __init__(
     def fused_output_quant_supported(self, quant_key: QuantKey):
         return (
             self.support_trtllm_attn
-            and self.kv_cache_dtype.startswith("fp8")
+            and is_quantized_kv_cache(self.kv_cache_dtype)
             and quant_key in (kFp8StaticTensorSym, kNvfp4Dynamic)
         )
 
@@ -1286,7 +1366,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: FlashInferMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -1303,8 +1383,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
-
         if attn_metadata is None:
             # Profiling run.
             return output.fill_(0)
@@ -1317,12 +1395,12 @@ def forward(
 
         if self.bmm1_scale is None:
             self.bmm1_scale = self.scale
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm1_scale *= layer._q_scale_float * layer._k_scale_float
 
         if self.bmm2_scale is None:
             self.bmm2_scale = 1.0
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm2_scale *= layer._v_scale_float
 
         prefill_use_trtllm = isinstance(attn_metadata.prefill, TRTLLMPrefill)
@@ -1373,15 +1451,16 @@ def forward(
 
         num_actual_tokens = attn_metadata.num_actual_tokens
 
-        # The FlashInfer api requires data to be in fp8_e4m3 or fp8_e5m2
-        # to process the cache when the kv_cache_dtype is fp8
-        if self.kv_sharing_target_layer_name is None and self.kv_cache_dtype.startswith(
-            "fp8"
-        ):
-            torch_dtype = FlashInferBackend.get_fp8_dtype_for_flashinfer(
-                self.kv_cache_dtype
-            )
-            kv_cache = kv_cache.view(torch_dtype)
+        # FlashInfer treats uint8 KV cache as NVFP4. vLLM stores FP8 KV cache
+        # as uint8 bytes, so pass FP8 caches with their logical dtype.
+        if not self.is_kvcache_nvfp4 and kv_cache.dtype == torch.uint8:
+            fp8_view_dtype = None
+            if self.kv_cache_dtype in ("fp8", "fp8_e4m3", torch.float8_e4m3fn):
+                fp8_view_dtype = torch.float8_e4m3fn
+            elif self.kv_cache_dtype in ("fp8_e5m2", torch.float8_e5m2):
+                fp8_view_dtype = torch.float8_e5m2
+            if fp8_view_dtype is not None:
+                kv_cache = kv_cache.view(fp8_view_dtype)
 
         # Inputs and outputs may be padded for CUDA graphs
         query = query[:num_actual_tokens]
@@ -1402,7 +1481,31 @@ def forward(
         num_prefill_tokens = attn_metadata.num_prefill_tokens
 
         stride_order = FlashInferBackend.get_kv_cache_stride_order()
-        kv_cache_permute = kv_cache.permute(*stride_order)
+        kv_cache_permute = kv_cache.permute(*stride_order)  # HND and contiguous
+        # Fix degenerate strides on any size-1 dimension (e.g. num_kv_heads=1
+        # with TP=8).  PyTorch permits non-canonical strides on size-1 dims;
+        # CUDA TMA requires ≥16-byte alignment on all non-outermost strides.
+        # canonicalize_singleton_dim_strides patches metadata via as_strided —
+        # zero-copy.  See vllm.utils.torch_utils.
+        fixed = canonicalize_singleton_dim_strides(kv_cache_permute)
+        if fixed is not kv_cache_permute:
+            logger.debug(
+                "Canonicalized degenerate KV cache strides (FlashInfer): "
+                "shape=%s, strides before=%s, strides after=%s",
+                kv_cache_permute.shape,
+                kv_cache_permute.stride(),
+                fixed.stride(),
+            )
+        kv_cache_permute = fixed
+
+        # For NVFP4, the kv_cache last dim is full_dim (data + scale packed).
+        # Split into correctly-strided data and scale views.
+        nvfp4_kv_data = None
+        nvfp4_kv_block_scales = None
+        if self.is_kvcache_nvfp4:
+            nvfp4_kv_data, nvfp4_kv_block_scales = nvfp4_kv_cache_split_views(
+                kv_cache_permute
+            )
 
         use_dcp = self.dcp_world_size > 1
 
@@ -1449,20 +1552,45 @@ def forward(
                     )
                     assert prefill_wrapper._sm_scale == self.scale
                     assert prefill_wrapper._causal
+
+                    if self.is_kvcache_nvfp4:
+                        kv_cache_permute = nvfp4_kv_data
+                    kv_cache_sf = (
+                        nvfp4_kv_block_scales if self.is_kvcache_nvfp4 else None
+                    )
+
+                    # NVFP4 trtllm kernel only supports FP8 output.
+                    # Use a pre-allocated FP8 buffer and dequantize
+                    # afterwards.
+                    needs_fp8_out_prefill = (
+                        self.is_kvcache_nvfp4 and output.dtype != FP8_DTYPE
+                    )
+                    if needs_fp8_out_prefill:
+                        out_prefill = self._nvfp4_fp8_out[:num_prefill_tokens]
+                    else:
+                        out_prefill = output[num_decode_tokens:]
+
                     prefill_wrapper.run(
                         prefill_query,
                         kv_cache_permute,
                         k_scale=layer._k_scale_float,
                         v_scale=layer._v_scale_float,
-                        out=output[num_decode_tokens:],
+                        out=out_prefill,
+                        kv_cache_sf=kv_cache_sf,
                     )
+
+                    if needs_fp8_out_prefill:
+                        output[
+                            num_decode_tokens : num_decode_tokens + num_prefill_tokens
+                        ].copy_(out_prefill.to(output.dtype))
             else:
                 assert isinstance(attn_metadata.prefill, TRTLLMPrefill)
                 # prefill_query may be non-contiguous or have degenerate strides
-                # First ensure memory contiguity, then fix degenerate strides
-                # with reshape. contiguous() alone doesn't fix degenerate
-                # strides when a dimension has size 1.
-                prefill_query = prefill_query.contiguous().reshape(prefill_query.shape)
+                # on size=1 dims. contiguous() ensures memory layout; then
+                # canonicalize_singleton_dim_strides fixes any remaining
+                # degenerate strides on size=1 dims for TMA alignment.
+                prefill_query = prefill_query.contiguous()
+                prefill_query = canonicalize_singleton_dim_strides(prefill_query)
                 workspace_buffer = _get_trtllm_gen_workspace_buffer()
                 block_tables_prefill = attn_metadata.prefill.block_tables
                 seq_lens_prefill = attn_metadata.prefill.seq_lens
@@ -1486,7 +1614,24 @@ def forward(
                     assert self.o_sf_scale is None
                     out = output[num_decode_tokens:]
 
-                if (
+                # NVFP4 trtllm kernel only supports FP8 output.
+                # Use a pre-allocated FP8 buffer and dequantize afterwards.
+                needs_fp8_out = self.is_kvcache_nvfp4 and output.dtype != FP8_DTYPE
+                if needs_fp8_out:
+                    out = self._nvfp4_fp8_out[:num_prefill_tokens]
+
+                prefill_kv_block_scales = None
+                if self.is_kvcache_nvfp4:
+                    # NVFP4 trtllm-gen kernel requires FP8 query.
+                    assert attn_metadata.q_data_type == FP8_DTYPE, (
+                        "NVFP4 KV cache requires FP8 quantized queries for "
+                        "trtllm-gen prefill. Set "
+                        "disable_flashinfer_q_quantization=False."
+                    )
+                    mock_kv_cache = nvfp4_kv_data
+                    mock_block_table = block_tables_prefill
+                    prefill_kv_block_scales = nvfp4_kv_block_scales
+                elif (
                     attn_metadata.q_data_type != FP8_DTYPE
                     and self.kv_cache_dtype.startswith("fp8")
                 ):
@@ -1495,11 +1640,9 @@ def forward(
                     # with fp8 kv cache, we can construct a mock block
                     # and mock kv cache with BF16 KV involved in the prefill
                     #
-                    # The inner (block_size, head_size) dims must be
-                    # contiguous; outer dims may have non-canonical strides
-                    # (e.g. cross-layer unified allocation).
-                    # Degenerate strides on outer dims break TMA descriptors
-                    # (see flashinfer-ai/flashinfer#2232).
+                    kv_cache_permute = canonicalize_singleton_dim_strides(
+                        kv_cache_permute
+                    )
                     kv_strides = kv_cache_permute.stride()
                     assert (
                         kv_strides[-1] == 1
@@ -1536,8 +1679,14 @@ def forward(
                     sinks=self.sinks,
                     o_sf_scale=self.o_sf_scale,
                     out=out,
+                    kv_cache_sf=prefill_kv_block_scales,
                 )
 
+                if needs_fp8_out:
+                    output[
+                        num_decode_tokens : num_decode_tokens + num_prefill_tokens
+                    ].copy_(out[:num_prefill_tokens].to(output.dtype))
+
         if num_decode_tokens > 0:
             decode_query = query[:num_decode_tokens]
             assert decode_query.shape[0] == num_decode_tokens
@@ -1550,6 +1699,18 @@ def forward(
                 assert decode_wrapper._logits_soft_cap == (self.logits_soft_cap or 0.0)
                 assert decode_wrapper._sm_scale == self.scale
 
+                if self.is_kvcache_nvfp4:
+                    kv_cache_permute = nvfp4_kv_data
+                kv_cache_sf = nvfp4_kv_block_scales if self.is_kvcache_nvfp4 else None
+
+                # NVFP4 kernel only supports FP8 output.
+                # Use a pre-allocated FP8 buffer and dequantize afterwards.
+                needs_fp8_out = self.is_kvcache_nvfp4 and output.dtype != FP8_DTYPE
+                if needs_fp8_out:
+                    out_decode = self._nvfp4_fp8_out[:num_decode_tokens]
+                else:
+                    out_decode = output[:num_decode_tokens]
+
                 if use_dcp:
                     decode_query = get_dcp_group().all_gather(
                         decode_query.contiguous(), dim=-2
@@ -1568,6 +1729,7 @@ def forward(
                         out=output_tmp,
                         lse=lse,
                         return_lse=True,
+                        kv_cache_sf=kv_cache_sf,
                     )
                     output[:num_decode_tokens] = self.dcp_combine(
                         output_tmp,
@@ -1580,15 +1742,20 @@ def forward(
                         kv_cache_permute,
                         k_scale=layer._k_scale_float,
                         v_scale=layer._v_scale_float,
-                        out=output[:num_decode_tokens],
+                        out=out_decode,
+                        kv_cache_sf=kv_cache_sf,
                     )
+
+                if needs_fp8_out:
+                    output[:num_decode_tokens].copy_(out_decode.to(output.dtype))
             else:
-                # decode_query may be non-contiguous or have degenerate strides
                 assert isinstance(attn_metadata.decode, TRTLLMDecode)
-                # First ensure memory contiguity, then fix degenerate strides
-                # with reshape. contiguous() alone doesn't fix degenerate
-                # strides when a dimension has size 1.
-                decode_query = decode_query.contiguous().reshape(decode_query.shape)
+                # decode_query may be non-contiguous or have degenerate strides
+                # on size=1 dims. contiguous() ensures memory layout; then
+                # canonicalize_singleton_dim_strides fixes any remaining
+                # degenerate strides on size=1 dims for TMA alignment.
+                decode_query = decode_query.contiguous()
+                decode_query = canonicalize_singleton_dim_strides(decode_query)
                 workspace_buffer = _get_trtllm_gen_workspace_buffer()
                 block_tables_decode = attn_metadata.decode.block_tables
                 seq_lens_decode = attn_metadata.decode.seq_lens
@@ -1599,11 +1766,7 @@ def forward(
                 assert is_strictly_contiguous(workspace_buffer)
                 assert is_strictly_contiguous(block_tables_decode)
                 assert is_strictly_contiguous(seq_lens_decode)
-                # kv_cache outer dims may be non-contiguous (e.g.
-                # cross-layer unified allocation), but inner dims
-                # (block_size, head_size) must be contiguous and
-                # strides must be canonical to avoid TMA descriptor
-                # failures (see flashinfer-ai/flashinfer#2232).
+                kv_cache_permute = canonicalize_singleton_dim_strides(kv_cache_permute)
                 kv_strides = kv_cache_permute.stride()
                 assert (
                     kv_strides[-1] == 1 and kv_strides[-2] == kv_cache_permute.shape[-1]
@@ -1624,6 +1787,12 @@ def forward(
                     assert self.o_sf_scale is None
                     out = output[:num_decode_tokens]
 
+                # NVFP4 trtllm kernel only supports FP8 output.
+                # Use a pre-allocated FP8 buffer and dequantize afterwards.
+                needs_fp8_out = self.is_kvcache_nvfp4 and output.dtype != FP8_DTYPE
+                if needs_fp8_out:
+                    out = self._nvfp4_fp8_out[:num_decode_tokens]
+
                 if num_decode_tokens % attn_metadata.num_decodes != 0:
                     # This gets triggered when the dummy_run forces
                     # attention to be initialized with q_len = 0
@@ -1633,7 +1802,9 @@ def forward(
 
                 trtllm_batch_decode_with_kv_cache(
                     query=decode_query,
-                    kv_cache=kv_cache_permute,
+                    kv_cache=(
+                        nvfp4_kv_data if self.is_kvcache_nvfp4 else kv_cache_permute
+                    ),
                     workspace_buffer=workspace_buffer,
                     block_tables=block_tables_decode,
                     seq_lens=seq_lens_decode,
@@ -1645,7 +1816,13 @@ def forward(
                     o_sf_scale=self.o_sf_scale,
                     out=out,
                     q_len_per_req=q_len_per_req,
+                    kv_cache_sf=(
+                        nvfp4_kv_block_scales if self.is_kvcache_nvfp4 else None
+                    ),
                 )
+
+                if needs_fp8_out:
+                    output[:num_decode_tokens].copy_(out.to(output.dtype))
         return output_padded
 
     def do_kv_cache_update(
@@ -1664,11 +1841,13 @@ def do_kv_cache_update(
             # and value[:num_actual_tokens] because the reshape_and_cache_flash
             # op uses the slot_mapping's shape to determine the number of
             # actual tokens.
+            k_cache = kv_cache[:, 0]
+            v_cache = kv_cache[:, 1]
             torch.ops._C_cache_ops.reshape_and_cache_flash(
                 key,
                 value,
-                kv_cache[:, 0],
-                kv_cache[:, 1],
+                k_cache,
+                v_cache,
                 slot_mapping,
                 self.kv_cache_dtype,
                 layer._k_scale,
diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py
index 16874c17791a..3995f67ddc91 100644
--- a/vllm/v1/attention/backends/flex_attention.py
+++ b/vllm/v1/attention/backends/flex_attention.py
@@ -27,16 +27,17 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
-from vllm.utils.torch_utils import is_torch_equal_or_newer
+from vllm.utils.torch_utils import is_quantized_kv_cache, is_torch_equal_or_newer
 from vllm.v1.attention.backend import (
     AttentionBackend,
+    AttentionCGSupport,
     AttentionImpl,
     AttentionMetadataBuilder,
     AttentionType,
     CommonAttentionMetadata,
-    is_quantized_kv_cache,
+    MultipleOf,
 )
-from vllm.v1.kv_cache_interface import AttentionSpec
+from vllm.v1.kv_cache_interface import AttentionSpec, EncoderOnlyAttentionSpec
 
 logger = init_logger(__name__)
 
@@ -47,12 +48,16 @@
 flex_attention_compiled = torch.compile(flex_attention, fullgraph=True)
 
 
-def _offsets_to_doc_ids_tensor(offsets: torch.Tensor) -> torch.Tensor:
-    device = offsets.device
-    counts = offsets[1:] - offsets[:-1]
-    return torch.repeat_interleave(
-        torch.arange(len(counts), device=device, dtype=torch.int32), counts
+def _offsets_to_doc_ids_tensor(
+    offsets_cpu: torch.Tensor, device: torch.device
+) -> torch.Tensor:
+    # Build on CPU (so `repeat_interleave` doesn't force a GPU->CPU sync to
+    # learn the data-dependent output length) and upload non-blocking.
+    counts = offsets_cpu[1:] - offsets_cpu[:-1]
+    doc_ids = torch.repeat_interleave(
+        torch.arange(len(counts), dtype=torch.int32), counts
     )
+    return doc_ids.to(device, non_blocking=True)
 
 
 def pad_to_multiple(x: torch.Tensor, multiple: int, dim: int):
@@ -73,7 +78,6 @@ def pad_to_multiple(x: torch.Tensor, multiple: int, dim: int):
 
 
 class FlexAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [
         torch.float16,
         torch.bfloat16,
@@ -91,11 +95,19 @@ class FlexAttentionBackend(AttentionBackend):
     def get_name() -> str:
         return "FLEX_ATTENTION"
 
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        return True
+
     @classmethod
     def supports_attn_type(cls, attn_type: str) -> bool:
         """FlexAttention supports both decoder and encoder-only attention."""
         return attn_type in (AttentionType.DECODER, AttentionType.ENCODER_ONLY)
 
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return True
+
     @classmethod
     def supports_mm_prefix(cls) -> bool:
         """FlexAttention supports full attention for image tokens."""
@@ -127,6 +139,10 @@ def use_cascade_attention(*args, **kwargs) -> bool:
     def get_supported_head_sizes(cls) -> list[int]:
         return []
 
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [MultipleOf(16)]
+
 
 # @torch.compile(fullgraph=True, mode="reduce-overhead")
 def physical_to_logical_mapping(
@@ -278,11 +294,13 @@ def unique_static_unsorted(
     keep = (x_flat != ignored_val) & (idx == first_idx.gather(1, x_flat))  # [B, N]
 
     # ── left-pack uniques into a fresh tensor ───────────────────────────
+    # Route non-kept entries to a garbage slot at column N so we can do a
+    # single scatter rather than using torch.nonzero (which would force a
+    # GPU->CPU sync to enumerate kept positions).
     dest_pos = torch.cumsum(keep.to(torch.long), dim=1) - 1  # where to go
-    packed_flat = torch.full_like(x_flat, pad_val)
-
-    rows, src_cols = torch.nonzero(keep, as_tuple=True)
-    packed_flat[rows, dest_pos[rows, src_cols]] = x_flat[rows, src_cols]
+    dest_pos = torch.where(keep, dest_pos, N)
+    packed_extended = torch.full((B, N + 1), pad_val, device=device, dtype=x_flat.dtype)
+    packed_flat = packed_extended.scatter_(1, dest_pos, x_flat)[:, :N]
 
     # ── restore original layout ─────────────────────────────────────────
     packed = packed_flat.reshape(x_perm.shape).movedim(-1, dim)
@@ -295,6 +313,12 @@ def causal_mask_mod(
     return q_idx >= kv_idx
 
 
+def bidirectional_mask_mod(
+    b: torch.Tensor, h: torch.Tensor, q_idx: torch.Tensor, kv_idx: torch.Tensor
+):
+    return q_idx >= 0
+
+
 # Type alias for the block sparsity hint callable signature.
 _block_sparsity_hint_signature = Callable[
     [torch.Tensor, torch.Tensor, int], torch.Tensor
@@ -316,12 +340,21 @@ class BlockSparsityHint(NamedTuple):
     hint_fn: _block_sparsity_hint_signature
 
 
+def copy_to_persistent(dst, src):
+    sliced = dst[tuple(slice(0, s) for s in src.shape)]
+    sliced.copy_(src)
+    return sliced
+
+
 @dataclass
 class FlexAttentionMetadata:
     causal: bool
     num_actual_tokens: int  # Number of tokens excluding padding.
     max_query_len: int
     query_start_loc: torch.Tensor
+    # CPU-resident copy of query_start_loc used to derive doc_ids without a
+    # GPU->CPU sync from repeat_interleave's data-dependent output size.
+    query_start_loc_cpu: torch.Tensor
     max_seq_len: int
     seq_lens: torch.Tensor
     block_table: torch.Tensor
@@ -341,6 +374,9 @@ class FlexAttentionMetadata:
     physical_to_logical: torch.Tensor
     decode_offset: torch.Tensor
     num_blocks_per_seq: torch.Tensor
+    persistent_kv_indices: torch.Tensor
+    persistent_kv_num_blocks: torch.Tensor
+    persistent_doc_ids: torch.Tensor
 
     # For logging.
     num_input_tokens: int = 0  # Number of tokens including padding.
@@ -350,6 +386,7 @@ class FlexAttentionMetadata:
     block_mask: BlockMask | None = None
     score_mod: _score_mod_signature | None = None
     logical_mask_mod: _mask_mod_signature = causal_mask_mod
+    uses_paged_kv: bool = True
     doc_ids: torch.Tensor | None = None
     direct_build: bool = True
     q_block_size: int = 16
@@ -424,12 +461,7 @@ def final_mask_mod(
             (is_valid, logical_q_idx, logical_kv_idx) = (
                 self._convert_physical_to_logical(self.doc_ids, q_idx, physical_kv_idx)
             )
-            # Apply mask modification only for valid indices
-            return torch.where(
-                is_valid,
-                self.logical_mask_mod(b, h, logical_q_idx, logical_kv_idx),
-                False,
-            )
+            return is_valid & self.logical_mask_mod(b, h, logical_q_idx, logical_kv_idx)
 
         return final_mask_mod
 
@@ -441,7 +473,9 @@ def get_bidirectional_mask_mod(self) -> _mask_mod_signature:
         packed query sequences.
         """
         # Create a lookup mapping from query indices -> request number
-        request_lookup = _offsets_to_doc_ids_tensor(self.query_start_loc)
+        request_lookup = _offsets_to_doc_ids_tensor(
+            self.query_start_loc_cpu, self.query_start_loc.device
+        )
 
         def final_mask_mod(
             b: torch.Tensor,
@@ -483,7 +517,7 @@ def final_mask_mod(
                 False,
             )
 
-        return final_mask_mod if self.causal else sliding_window_mask_mod
+        return final_mask_mod if self.uses_paged_kv else sliding_window_mask_mod
 
     def get_prefix_lm_mask_mod(self) -> _mask_mod_signature:
         """Creates the prefix LM mask_mod function for FlexAttention."""
@@ -527,8 +561,7 @@ def final_mask_mod(
     def get_mask_mod(self):
         # Stage-1: initialize the base mask_mod
         # (causal mask for decoder or bidirectional mask for encoder)
-        has_custom_mask = self.logical_mask_mod is not causal_mask_mod
-        if self.causal or has_custom_mask:
+        if self.uses_paged_kv:
             mask_mod = self.get_paged_mask_mod()
         else:
             mask_mod = self.get_bidirectional_mask_mod()
@@ -554,7 +587,9 @@ def get_transformed_score_mod(self) -> _score_mod_signature | None:
             return None
 
         # Create a lookup mapping from query indices -> request number
-        request_lookup = _offsets_to_doc_ids_tensor(self.query_start_loc)
+        request_lookup = _offsets_to_doc_ids_tensor(
+            self.query_start_loc_cpu, self.query_start_loc.device
+        )
         user_score_mod = self.score_mod
 
         def transformed_score_mod(
@@ -581,7 +616,7 @@ def transformed_score_mod(
         return transformed_score_mod
 
     def _build_block_mask_direct(self) -> BlockMask:
-        """Direct block mask construction for standard causal attention.
+        """Direct block mask construction for paged KV cache attention.
 
         This method constructs the block mask directly using
         BlockMask.from_kv_blocks which is much more efficient than the
@@ -657,8 +692,11 @@ def _build_block_mask_direct(self) -> BlockMask:
         kv_indices = unique_static_unsorted(
             (used_pages_padded.long()), M=self.num_blocks
         ).to(torch.int32)
+        kv_indices = copy_to_persistent(self.persistent_kv_indices, kv_indices)
 
         kv_num_blocks = (kv_indices >= 0).sum(dim=-1).to(torch.int32)
+        kv_num_blocks = copy_to_persistent(self.persistent_kv_num_blocks, kv_num_blocks)
+
         block_mask_kwargs = {
             "seq_lengths": (self.num_actual_tokens, self.total_cache_tokens),
             "kv_num_blocks": kv_num_blocks[None, None],
@@ -676,7 +714,9 @@ def _build_block_mask_direct(self) -> BlockMask:
 
     def build_block_mask(self) -> BlockMask:
         mask_mod = self.get_mask_mod()
-        kv_len = self.total_cache_tokens if self.causal else self.num_actual_tokens
+        kv_len = (
+            self.total_cache_tokens if self.uses_paged_kv else self.num_actual_tokens
+        )
         return create_block_mask_compiled(
             mask_mod,
             None,
@@ -694,7 +734,10 @@ def __post_init__(self):
         assert self.prefix_kv_lens is None, "Not implemented yet."
         assert self.suffix_kv_lens is None, "Not implemented yet."
         # Create a lookup mapping from query indices -> request number
-        self.doc_ids = _offsets_to_doc_ids_tensor(self.query_start_loc)
+        self.doc_ids = _offsets_to_doc_ids_tensor(
+            self.query_start_loc_cpu, self.query_start_loc.device
+        )
+        self.doc_ids = copy_to_persistent(self.persistent_doc_ids, self.doc_ids)
         self.num_blocks = self.total_cache_tokens // self.block_size
 
         self.mask_mod = self.get_mask_mod()
@@ -702,6 +745,8 @@ def __post_init__(self):
 
 
 class FlexAttentionMetadataBuilder(AttentionMetadataBuilder[FlexAttentionMetadata]):
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.ALWAYS
+
     def __init__(
         self,
         kv_cache_spec: AttentionSpec,
@@ -724,8 +769,81 @@ def __init__(
         self.kv_cache_spec = kv_cache_spec
         supports_small_blocks = is_torch_equal_or_newer("2.9.0.dev0")
         self.direct_build: bool = supports_small_blocks
-        self.q_block_size: int = 16 if supports_small_blocks else 128
-        self.kv_block_size: int = self.block_size if supports_small_blocks else 128
+
+        self.q_block_size, self.kv_block_size = self._get_block_sizes(
+            vllm_config.attention_config,
+            supports_small_blocks,
+            self.block_size,
+        )
+
+        if self.direct_build and self.kv_block_size != self.block_size:
+            self.direct_build = False
+
+        self.max_model_len = self.model_config.max_model_len
+        max_num_seqs = vllm_config.scheduler_config.max_num_seqs
+        max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        self.max_num_query_groups = cdiv(max_num_batched_tokens, self.q_block_size)
+        max_num_pages_per_seq = cdiv(self.max_model_len, self.block_size)
+        self.max_num_kv_indices = self.q_block_size * max_num_pages_per_seq
+        self.persistent_kv_num_blocks = torch.empty(
+            self.max_num_query_groups, dtype=torch.int32, device=device
+        )
+        self.persistent_offset_tensor = torch.empty(
+            max_num_seqs, dtype=torch.int32, device=device
+        )
+        self.persistent_doc_ids = torch.empty(
+            max_num_batched_tokens, dtype=torch.int32, device=device
+        )
+
+        # initialize later when we can access block_table
+        self.persistent_physical_to_logical = None
+        self.persistent_kv_indices = None
+
+    @staticmethod
+    def _get_block_sizes(
+        attn_cfg,
+        supports_small_blocks: bool,
+        cache_block_size: int,
+    ) -> tuple[int, int]:
+        q_block_size = 16 if supports_small_blocks else 128
+        kv_block_size = cache_block_size if supports_small_blocks else 128
+
+        q_block_size = attn_cfg.flex_attn_q_block_size or q_block_size
+        if (q_block_size & (q_block_size - 1)) != 0 or (
+            attn_cfg.flex_attn_block_m is not None
+            and q_block_size % attn_cfg.flex_attn_block_m != 0
+        ):
+            raise ValueError(
+                f"flex_attn_q_block_size must be a power of 2 "
+                f"and divisible by flex_attn_block_m, got "
+                f"{q_block_size}, {attn_cfg.flex_attn_block_m}"
+            )
+
+        kv_block_size = attn_cfg.flex_attn_kv_block_size or kv_block_size
+        if (kv_block_size & (kv_block_size - 1)) != 0 or (
+            attn_cfg.flex_attn_block_n is not None
+            and kv_block_size % attn_cfg.flex_attn_block_n != 0
+        ):
+            raise ValueError(
+                f"flex_attn_kv_block_size must be a power of 2 "
+                f"and divisible by flex_attn_block_n, got "
+                f"{kv_block_size}, {attn_cfg.flex_attn_block_n}"
+            )
+
+        return q_block_size, kv_block_size
+
+    def build_for_cudagraph_capture(
+        self, common_attn_metadata: CommonAttentionMetadata
+    ) -> FlexAttentionMetadata:
+        # Use actual max_seq_len (not max_model_len) to avoid torch.compile
+        # recompilation during CUDA graph capture.
+        assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+        common_attn_metadata.max_seq_len = int(
+            common_attn_metadata.seq_lens_cpu_upper_bound.max().item()
+        )
+        return self.build(
+            common_prefix_len=0, common_attn_metadata=common_attn_metadata
+        )
 
     def build(
         self,
@@ -739,6 +857,7 @@ def build(
 
         max_seq_len = common_attn_metadata.max_seq_len
         query_start_loc = common_attn_metadata.query_start_loc
+        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
         seq_lens = common_attn_metadata.seq_lens
         block_table_tensor = common_attn_metadata.block_table_tensor
         slot_mapping = common_attn_metadata.slot_mapping
@@ -766,14 +885,44 @@ def build(
         inverse_block_table = physical_to_logical_mapping(
             block_table_tensor, seq_lens, block_size, num_gpu_blocks
         )
+        if self.persistent_physical_to_logical is None:
+            max_num_seqs = self.vllm_config.scheduler_config.max_num_seqs
+            self.persistent_physical_to_logical = torch.empty(
+                max_num_seqs,
+                num_gpu_blocks,
+                dtype=torch.long,
+                device=self.device,
+            )
+
+        if self.persistent_kv_indices is None:
+            self.persistent_kv_indices = torch.empty(
+                self.max_num_query_groups,
+                self.max_num_kv_indices,
+                dtype=torch.int32,
+                device=self.device,
+            )
+
+        inverse_block_table = copy_to_persistent(
+            self.persistent_physical_to_logical, inverse_block_table
+        )
 
         offset_tensor = common_attn_metadata.compute_num_computed_tokens()
+        offset_tensor = copy_to_persistent(self.persistent_offset_tensor, offset_tensor)
+
+        uses_paged_kv = not isinstance(self.kv_cache_spec, EncoderOnlyAttentionSpec)
+        logical_mask_mod = (
+            bidirectional_mask_mod
+            if uses_paged_kv and not common_attn_metadata.causal
+            else causal_mask_mod
+        )
 
         out = FlexAttentionMetadata(
             causal=common_attn_metadata.causal,
+            logical_mask_mod=logical_mask_mod,
             num_actual_tokens=num_actual_tokens,
             max_query_len=max_query_len,
             query_start_loc=query_start_loc,
+            query_start_loc_cpu=query_start_loc_cpu,
             max_seq_len=max_seq_len,
             seq_lens=seq_lens,
             block_table=block_table_tensor,
@@ -790,13 +939,27 @@ def build(
             total_cache_tokens=total_cache_tokens,
             decode_offset=offset_tensor,
             num_blocks_per_seq=num_blocks_per_seq,
+            uses_paged_kv=uses_paged_kv,
             # FIXME(Isotr0py): direct build has issue to build bidirectional
             # attention block mask for encoder-only models, disable it temporarily.
             # see: https://github.com/vllm-project/vllm/pull/27329#issuecomment-3431484053
-            direct_build=(self.direct_build and common_attn_metadata.causal),
+            direct_build=self.direct_build and uses_paged_kv,
             q_block_size=self.q_block_size,
             kv_block_size=self.kv_block_size,
+            persistent_kv_indices=self.persistent_kv_indices,
+            persistent_kv_num_blocks=self.persistent_kv_num_blocks,
+            persistent_doc_ids=self.persistent_doc_ids,
         )
+
+        # Pre-build block_mask so it is ready before CUDA graph capture.
+        # Without this, the lazy build in forward() would run non-graph-safe
+        # ops (e.g. torch.nonzero) inside capture.
+        if out.block_mask is None:
+            if out.direct_build:
+                out.block_mask = out._build_block_mask_direct()
+            else:
+                out.block_mask = out.build_block_mask()
+
         return out
 
     def use_cascade_attention(self, *args, **kwargs) -> bool:
@@ -823,6 +986,8 @@ def __init__(
         logits_soft_cap: float | None = None,
         attn_type: AttentionType = AttentionType.DECODER,
         kv_sharing_target_layer_name: str | None = None,
+        block_m: int | None = None,
+        block_n: int | None = None,
         **kwargs,
     ) -> None:
         self.num_heads = num_heads
@@ -863,6 +1028,14 @@ def __init__(
                 "FlexAttention does not support quantized kv-cache. Yet"
             )
 
+        self.block_m = 16 if envs.VLLM_BATCH_INVARIANT else None
+        self.block_n = 16 if envs.VLLM_BATCH_INVARIANT else None
+
+        if block_m is not None:
+            self.block_m = block_m
+        if block_n is not None:
+            self.block_n = block_n
+
     @staticmethod
     def view_as_4d(tensor: torch.Tensor) -> torch.Tensor:
         """View a 3d tensor as 4D."""
@@ -902,7 +1075,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: FlexAttentionMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -918,7 +1091,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
         if output_scale is not None or output_block_scale is not None:
             raise NotImplementedError(
                 "fused output quantization is not yet supported for FlexAttentionImpl"
@@ -970,9 +1142,7 @@ def forward(
             else:
                 attn_metadata.block_mask = attn_metadata.build_block_mask()
 
-        if not attn_metadata.causal:
-            assert self.attn_type == AttentionType.ENCODER_ONLY
-
+        if self.attn_type == AttentionType.ENCODER_ONLY:
             query, key_tensor, value_tensor = map(
                 lambda x: self.view_as_4d(x).permute(0, 2, 1, 3),
                 (query, key, value),
@@ -1011,6 +1181,13 @@ def forward(
         kernel_options = get_kernel_options(
             query, block_m, block_n, attn_metadata.direct_build
         )
+
+        if self.block_m is not None:
+            kernel_options["BLOCK_M"] = self.block_m
+        if self.block_n is not None:
+            kernel_options["BLOCK_N"] = self.block_n
+        if envs.VLLM_BATCH_INVARIANT:
+            kernel_options["IS_DIVISIBLE"] = False
         out = flex_attention_compiled(
             query,
             key_tensor,
@@ -1050,11 +1227,6 @@ def ensure_divisible(candidate: int, block_size: int) -> int:
             return block_size
         return candidate
 
-    if envs.VLLM_BATCH_INVARIANT:
-        kernel_options["BLOCK_M"] = 16
-        kernel_options["BLOCK_N"] = 16
-        kernel_options["IS_DIVISIBLE"] = False
-        return kernel_options
     if use_direct_build:
         kernel_options["BLOCK_M"] = block_m
         kernel_options["BLOCK_N"] = block_n
diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py
index 574cc87e7582..85715e91ab40 100644
--- a/vllm/v1/attention/backends/gdn_attn.py
+++ b/vllm/v1/attention/backends/gdn_attn.py
@@ -14,7 +14,7 @@
     CommonAttentionMetadata,
 )
 from vllm.v1.attention.backends.utils import (
-    PAD_SLOT_ID,
+    NULL_BLOCK_ID,
     compute_causal_conv1d_metadata,
     mamba_get_block_table_tensor,
     split_decodes_and_prefills,
@@ -31,6 +31,10 @@ def get_name() -> str:
     def get_builder_cls() -> type["GDNAttentionMetadataBuilder"]:
         return GDNAttentionMetadataBuilder
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return True
+
 
 @dataclass
 class GDNAttentionMetadata:
@@ -59,6 +63,10 @@ class GDNAttentionMetadata:
 
     num_accepted_tokens: torch.Tensor | None = None  # shape: [batch,]
 
+    # Pre-computed FLA chunk metadata (avoids GPU->CPU sync in prepare_chunk_indices)
+    chunk_indices: torch.Tensor | None = None
+    chunk_offsets: torch.Tensor | None = None
+
     # The following attributes are for triton implementation of causal_conv1d
     nums_dict: dict | None = None
     batch_ptr: torch.Tensor | None = None
@@ -245,7 +253,7 @@ def build(  # type: ignore[override]
                 )
                 # Filter by spec_sequence_masks to exclude padded sequences
                 spec_state_indices_tensor = block_table_tensor[
-                    spec_sequence_masks, : self.num_spec + 1
+                    spec_sequence_masks_cpu, : self.num_spec + 1
                 ]
                 non_spec_state_indices_tensor = None
                 # Padded sequences are always at the back, so the first
@@ -256,7 +264,9 @@ def build(  # type: ignore[override]
                 non_spec_query_start_loc_cpu = None
             else:
                 spec_token_masks = torch.repeat_interleave(
-                    spec_sequence_masks, query_lens
+                    spec_sequence_masks,
+                    query_lens,
+                    output_size=query_start_loc_cpu[-1].item(),
                 )
                 index = torch.argsort(spec_token_masks, stable=True)
                 num_non_spec_tokens = num_prefill_tokens + num_decode_tokens
@@ -264,10 +274,10 @@ def build(  # type: ignore[override]
                 spec_token_indx = index[num_non_spec_tokens:]
 
                 spec_state_indices_tensor = block_table_tensor[
-                    spec_sequence_masks, : self.num_spec + 1
+                    spec_sequence_masks_cpu, : self.num_spec + 1
                 ]
                 non_spec_state_indices_tensor = block_table_tensor[
-                    ~spec_sequence_masks, 0
+                    ~spec_sequence_masks_cpu, 0
                 ]
 
                 spec_query_start_loc = torch.zeros(
@@ -276,7 +286,9 @@ def build(  # type: ignore[override]
                     device=query_start_loc.device,
                 )
                 torch.cumsum(
-                    query_lens[spec_sequence_masks], dim=0, out=spec_query_start_loc[1:]
+                    query_lens[spec_sequence_masks_cpu],
+                    dim=0,
+                    out=spec_query_start_loc[1:],
                 )
                 non_spec_query_start_loc = torch.zeros(
                     query_lens.size(0) - num_spec_decodes + 1,
@@ -284,7 +296,7 @@ def build(  # type: ignore[override]
                     device=query_start_loc.device,
                 )
                 torch.cumsum(
-                    query_lens[~spec_sequence_masks],
+                    query_lens[~spec_sequence_masks_cpu],
                     dim=0,
                     out=non_spec_query_start_loc[1:],
                 )
@@ -299,12 +311,32 @@ def build(  # type: ignore[override]
                 )
 
             assert num_accepted_tokens is not None
-            num_accepted_tokens = num_accepted_tokens[spec_sequence_masks]
+            num_accepted_tokens = num_accepted_tokens[spec_sequence_masks_cpu]
+
+        chunk_indices: torch.Tensor | None = None
+        chunk_offsets: torch.Tensor | None = None
+        if num_prefills > 0:
+            # Only prefill batches use FLA chunk ops.
+            # Pre-compute on CPU and async-copy to GPU to avoid
+            # GPU→CPU sync (.tolist()) in prepare_chunk_indices.
+            from vllm.model_executor.layers.fla.ops.index import (
+                prepare_chunk_indices,
+                prepare_chunk_offsets,
+            )
+            from vllm.model_executor.layers.fla.ops.utils import FLA_CHUNK_SIZE
+
+            gpu_device = query_start_loc.device
+            chunk_indices = prepare_chunk_indices(
+                non_spec_query_start_loc_cpu, FLA_CHUNK_SIZE
+            ).to(device=gpu_device, non_blocking=True)
+            chunk_offsets = prepare_chunk_offsets(
+                non_spec_query_start_loc_cpu, FLA_CHUNK_SIZE
+            ).to(device=gpu_device, non_blocking=True)
 
         if num_prefills > 0:
             has_initial_state = context_lens_tensor > 0
-            if spec_sequence_masks is not None:
-                has_initial_state = has_initial_state[~spec_sequence_masks]
+            if spec_sequence_masks_cpu is not None:
+                has_initial_state = has_initial_state[~spec_sequence_masks_cpu]
                 assert non_spec_query_start_loc_cpu is not None
             nums_dict, batch_ptr, token_chunk_offset_ptr = (
                 compute_causal_conv1d_metadata(
@@ -337,7 +369,7 @@ def build(  # type: ignore[override]
                 spec_state_indices_tensor, non_blocking=True
             )
             spec_state_indices_tensor = self.spec_state_indices_tensor[:batch_size]
-            spec_state_indices_tensor[num_spec_decodes:].fill_(PAD_SLOT_ID)
+            spec_state_indices_tensor[num_spec_decodes:].fill_(NULL_BLOCK_ID)
 
             self.spec_sequence_masks[:num_spec_decodes].copy_(
                 spec_sequence_masks[:num_spec_decodes], non_blocking=True
@@ -383,7 +415,7 @@ def build(  # type: ignore[override]
             non_spec_state_indices_tensor = self.non_spec_state_indices_tensor[
                 :batch_size
             ]
-            non_spec_state_indices_tensor[num_decodes:].fill_(PAD_SLOT_ID)
+            non_spec_state_indices_tensor[num_decodes:].fill_(NULL_BLOCK_ID)
 
             self.non_spec_query_start_loc[: num_decodes + 1].copy_(
                 non_spec_query_start_loc, non_blocking=True
@@ -401,6 +433,8 @@ def build(  # type: ignore[override]
             num_spec_decode_tokens=num_spec_decode_tokens,
             num_actual_tokens=m.num_actual_tokens,
             has_initial_state=has_initial_state,
+            chunk_indices=chunk_indices,
+            chunk_offsets=chunk_offsets,
             spec_query_start_loc=spec_query_start_loc,
             non_spec_query_start_loc=non_spec_query_start_loc,
             spec_state_indices_tensor=spec_state_indices_tensor,
diff --git a/vllm/v1/attention/backends/linear_attn.py b/vllm/v1/attention/backends/linear_attn.py
index fe27e7a389ac..b2ca151986cc 100644
--- a/vllm/v1/attention/backends/linear_attn.py
+++ b/vllm/v1/attention/backends/linear_attn.py
@@ -27,6 +27,10 @@ def get_name() -> str:
     def get_builder_cls() -> type["LinearAttentionMetadataBuilder"]:
         return LinearAttentionMetadataBuilder
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return True
+
 
 @dataclass
 class LinearAttentionMetadata:
diff --git a/vllm/v1/attention/backends/mamba1_attn.py b/vllm/v1/attention/backends/mamba1_attn.py
index 8903406200ca..925fceb024f6 100644
--- a/vllm/v1/attention/backends/mamba1_attn.py
+++ b/vllm/v1/attention/backends/mamba1_attn.py
@@ -20,6 +20,10 @@ def get_name() -> str:
     def get_builder_cls() -> type["Mamba1AttentionMetadataBuilder"]:
         return Mamba1AttentionMetadataBuilder
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return True
+
 
 @dataclass
 class Mamba1AttentionMetadata(BaseMambaAttentionMetadata):
diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py
index 5e8abbab565e..5f25c4a79520 100644
--- a/vllm/v1/attention/backends/mamba2_attn.py
+++ b/vllm/v1/attention/backends/mamba2_attn.py
@@ -96,6 +96,10 @@ def get_name() -> str:
     def get_builder_cls() -> type["Mamba2AttentionMetadataBuilder"]:
         return Mamba2AttentionMetadataBuilder
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return True
+
 
 @dataclass
 class Mamba2AttentionMetadata(BaseMambaAttentionMetadata):
@@ -133,7 +137,9 @@ def build(
         **kwargs: Any,
     ) -> Mamba2AttentionMetadata:
         common = self._compute_common_metadata(
-            common_attn_metadata, num_accepted_tokens=kwargs.get("num_accepted_tokens")
+            common_attn_metadata,
+            num_accepted_tokens=kwargs.get("num_accepted_tokens"),
+            prev_last_scheduled_idx=kwargs.get("prev_last_scheduled_idx"),
         )
 
         seq_idx_p = None
diff --git a/vllm/v1/attention/backends/mamba_attn.py b/vllm/v1/attention/backends/mamba_attn.py
index 59f2e7ca51a6..16e292e21d2f 100644
--- a/vllm/v1/attention/backends/mamba_attn.py
+++ b/vllm/v1/attention/backends/mamba_attn.py
@@ -9,13 +9,14 @@
 
 from vllm.config import VllmConfig
 from vllm.utils.math_utils import cdiv
+from vllm.utils.torch_utils import async_tensor_h2d
 from vllm.v1.attention.backend import (
     AttentionCGSupport,
     AttentionMetadataBuilder,
     CommonAttentionMetadata,
 )
 from vllm.v1.attention.backends.utils import (
-    PAD_SLOT_ID,
+    NULL_BLOCK_ID,
     compute_causal_conv1d_metadata,
     mamba_get_block_table_tensor,
     split_decodes_and_prefills,
@@ -55,6 +56,7 @@ class BaseMambaAttentionMetadata:
     block_idx_last_scheduled_token: torch.Tensor | None
     block_idx_first_scheduled_token_p: torch.Tensor | None
     block_idx_last_computed_token: torch.Tensor | None
+    block_idx_last_scheduled_token_prev_step: torch.Tensor | None
 
     # The following tensor is only used for prefix caching in align mode
     seq_lens: torch.Tensor
@@ -107,12 +109,13 @@ def __init__(
             )
 
         if self.vllm_config.cache_config.mamba_cache_mode == "all":
-            max_num_blocks = cdiv(
-                self.vllm_config.model_config.max_model_len,
-                self.kv_cache_spec.block_size,
+            max_num_blocks = (
+                cdiv(
+                    self.vllm_config.model_config.max_model_len,
+                    kv_cache_spec.block_size,
+                )
+                + kv_cache_spec.num_speculative_blocks
             )
-            # Speculative decoding not supported with prefix caching,
-            # so keep shape consistent with prefill buffer
             # TODO: reduce this size as needed for decode-only cudagraph capture
             self.state_indices_tensor_d: torch.Tensor = torch.empty(
                 (
@@ -132,6 +135,14 @@ def __init__(
                 dtype=torch.int32,
                 device=device,
             )
+            if self.use_spec_decode:
+                self.block_idx_last_scheduled_token_prev_step: torch.Tensor = (
+                    torch.empty(
+                        (self.decode_cudagraph_max_bs,),
+                        dtype=torch.int32,
+                        device=device,
+                    )
+                )
         else:
             self.state_indices_tensor_d = torch.empty(
                 (self.decode_cudagraph_max_bs, 1 + self.num_spec_tokens),
@@ -175,7 +186,23 @@ def build_for_cudagraph_capture(
         if self.num_spec_tokens > 0:
             num_accepted_tokens = torch.diff(m.query_start_loc)
 
-        return self.build(0, m, num_accepted_tokens=num_accepted_tokens)
+        prev_last_scheduled_idx = None
+        if (
+            self.use_spec_decode
+            and self.vllm_config.cache_config.mamba_cache_mode == "all"
+        ):
+            prev_last_scheduled_idx = torch.zeros(
+                (m.num_reqs,),
+                dtype=torch.int32,
+                device=m.query_start_loc.device,
+            )
+
+        return self.build(
+            0,
+            m,
+            num_accepted_tokens=num_accepted_tokens,
+            prev_last_scheduled_idx=prev_last_scheduled_idx,
+        )
 
     def build(
         self,
@@ -184,6 +211,7 @@ def build(
         fast_build: bool = False,
         *,
         num_accepted_tokens: torch.Tensor | None = None,
+        prev_last_scheduled_idx: torch.Tensor | None = None,
         **kwargs: Any,
     ) -> M:
         """
@@ -191,7 +219,9 @@ def build(
         Subclasses (e.g., Mamba2) can override to add additional metadata.
         """
         return self._compute_common_metadata(
-            common_attn_metadata, num_accepted_tokens=num_accepted_tokens
+            common_attn_metadata,
+            num_accepted_tokens=num_accepted_tokens,
+            prev_last_scheduled_idx=prev_last_scheduled_idx,
         )
 
     def _compute_chunk_metadata(
@@ -270,16 +300,20 @@ def _build_chunk_metadata_tensors(
         num_prefills = common.num_prefills
         num_decode_tokens = common.num_decode_tokens
 
-        num_computed_tokens_cpu = (
-            common_attn_metadata.compute_num_computed_tokens().cpu()
-        )
-        num_computed_tokens_p_cpu = num_computed_tokens_cpu[
-            num_reqs - num_prefills : num_reqs
-        ]
+        # Derive prefill context lengths from CPU data only.
+        # `seq_lens_cpu_upper_bound` is precise for prefill rows in all modes
+        # (including async spec decode), so this avoids the D2H sync that
+        # `compute_num_computed_tokens().cpu()` would force.
+        seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+        assert seq_lens_cpu is not None
         query_start_loc_p_cpu = (
             common_attn_metadata.query_start_loc_cpu[-num_prefills - 1 :]
             - num_decode_tokens
         )
+        prefill_query_lens_cpu = query_start_loc_p_cpu[1:] - query_start_loc_p_cpu[:-1]
+        num_computed_tokens_p_cpu = (
+            seq_lens_cpu[num_reqs - num_prefills : num_reqs] - prefill_query_lens_cpu
+        )
 
         cu_chunk_seqlen, seq_idx, last_chunk_indices = self._compute_chunk_metadata(
             chunk_size,
@@ -289,20 +323,14 @@ def _build_chunk_metadata_tensors(
         )
 
         device = common_attn_metadata.query_start_loc.device
-        cu_chunk_seqlen_p = torch.as_tensor(
-            cu_chunk_seqlen,
-            device=device,
-            dtype=torch.int32,
+        # Build on pinned CPU and upload non-blocking to avoid the synchronous
+        # H2D copy that `torch.as_tensor(list, device=cuda)` would force.
+        cu_chunk_seqlen_p = async_tensor_h2d(
+            cu_chunk_seqlen, dtype=torch.int32, device=device
         )
-        seq_idx_p = torch.as_tensor(
-            seq_idx,
-            device=device,
-            dtype=torch.int32,
-        )
-        last_chunk_indices_p = torch.as_tensor(
-            last_chunk_indices,
-            device=device,
-            dtype=torch.int32,
+        seq_idx_p = async_tensor_h2d(seq_idx, dtype=torch.int32, device=device)
+        last_chunk_indices_p = async_tensor_h2d(
+            last_chunk_indices, dtype=torch.int32, device=device
         )
         return cu_chunk_seqlen_p, seq_idx_p, last_chunk_indices_p
 
@@ -342,6 +370,7 @@ def _compute_common_metadata(
         common_attn_metadata: CommonAttentionMetadata,
         *,
         num_accepted_tokens: torch.Tensor | None = None,
+        prev_last_scheduled_idx: torch.Tensor | None = None,
     ) -> M:
         """
         Compute metadata common to both Mamba1 and Mamba2.
@@ -356,6 +385,28 @@ def _compute_common_metadata(
             self.reorder_batch_threshold if num_accepted_tokens is not None else 1
         )
 
+        # FULL-CG dispatch is shape-based, so one-token prefills with
+        # prior Mamba state can replay a decode graph while `is_prefilling`
+        # is still true. Treat them as decode/update rows. This is required
+        # for NIXL disagg's h(N-1)->N recompute path and for sporadic
+        # final single-token prefill chunks that land in a `uniform` FULL-CG
+        # batch. Relies on `reorder` putting short extends before pure prefills.
+        is_prefilling = common_attn_metadata.is_prefilling
+        assert is_prefilling is not None
+        seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+        assert seq_lens_cpu is not None
+        query_lens_cpu = torch.diff(common_attn_metadata.query_start_loc_cpu)
+        single_token_prefill_rows = is_prefilling & (query_lens_cpu == 1)
+        # First-token prefills have no prior Mamba state and must stay prefills.
+        has_prior_state = seq_lens_cpu > 1
+        prefill_to_decode = single_token_prefill_rows & has_prior_state
+        if torch.any(prefill_to_decode).item():
+            is_prefilling = is_prefilling.clone()
+            is_prefilling[prefill_to_decode] = False
+            common_attn_metadata = common_attn_metadata.replace(
+                is_prefilling=is_prefilling
+            )
+
         num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = (
             split_decodes_and_prefills(
                 common_attn_metadata,
@@ -376,6 +427,7 @@ def _compute_common_metadata(
         block_idx_first_scheduled_token_p = None
         block_idx_last_computed_token = None
         block_idx_last_scheduled_token = None
+        block_idx_last_scheduled_token_prev_step = None
 
         # for causal_conv1d
         nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None
@@ -394,6 +446,15 @@ def _compute_common_metadata(
             ) = self._compute_prefix_caching_block_indices(
                 common_attn_metadata, mamba_block_size
             )
+            if self.use_spec_decode and prev_last_scheduled_idx is not None:
+                fallback = torch.clamp(
+                    (num_computed_tokens - 1) // mamba_block_size, min=0
+                )
+                block_idx_last_scheduled_token_prev_step = torch.where(
+                    prev_last_scheduled_idx >= 0,
+                    prev_last_scheduled_idx,
+                    fallback,
+                )
         else:
             state_indices_tensor = mamba_get_block_table_tensor(
                 common_attn_metadata.block_table_tensor,
@@ -471,6 +532,9 @@ def _compute_common_metadata(
             block_idx_last_scheduled_token=block_idx_last_scheduled_token,
             block_idx_first_scheduled_token_p=block_idx_first_scheduled_token_p,
             block_idx_last_computed_token=block_idx_last_computed_token,
+            block_idx_last_scheduled_token_prev_step=(
+                block_idx_last_scheduled_token_prev_step
+            ),
             num_computed_tokens_p=num_computed_tokens_p,
             num_reqs=num_reqs,
             seq_lens=common_attn_metadata.seq_lens,
@@ -494,6 +558,9 @@ def _update_metadata_for_cudagraph_capture(
         num_accepted_tokens = metadata.num_accepted_tokens
         block_idx_last_scheduled_token = metadata.block_idx_last_scheduled_token
         block_idx_last_computed_token = metadata.block_idx_last_computed_token
+        block_idx_last_scheduled_token_prev_step = (
+            metadata.block_idx_last_scheduled_token_prev_step
+        )
         if (
             metadata.num_prefills == 0
             and metadata.num_decodes <= self.decode_cudagraph_max_bs
@@ -504,7 +571,7 @@ def _update_metadata_for_cudagraph_capture(
                 state_indices_tensor_d, non_blocking=True
             )
             state_indices_tensor_d = self.state_indices_tensor_d[:padded_bs]
-            state_indices_tensor_d[metadata.num_decodes :] = PAD_SLOT_ID
+            state_indices_tensor_d[metadata.num_decodes :] = NULL_BLOCK_ID
 
             if self.use_spec_decode and num_accepted_tokens is not None:
                 assert query_start_loc_d is not None
@@ -525,16 +592,35 @@ def _update_metadata_for_cudagraph_capture(
                     non_blocking=True,
                 )
                 block_idx_last_scheduled_token = self.block_idx_last_scheduled_token[
-                    : metadata.num_decode_tokens
+                    :padded_bs
                 ]
+                block_idx_last_scheduled_token[metadata.num_decodes :] = 0
 
                 self.block_idx_last_computed_token[: metadata.num_decodes].copy_(
                     block_idx_last_computed_token[: metadata.num_decodes],
                     non_blocking=True,
                 )
                 block_idx_last_computed_token = self.block_idx_last_computed_token[
-                    : metadata.num_decode_tokens
+                    :padded_bs
                 ]
+                block_idx_last_computed_token[metadata.num_decodes :] = 0
+
+                if (
+                    self.use_spec_decode
+                    and block_idx_last_scheduled_token_prev_step is not None
+                ):
+                    self.block_idx_last_scheduled_token_prev_step[
+                        : metadata.num_decodes
+                    ].copy_(
+                        block_idx_last_scheduled_token_prev_step[
+                            : metadata.num_decodes
+                        ],
+                        non_blocking=True,
+                    )
+                    block_idx_last_scheduled_token_prev_step = (
+                        self.block_idx_last_scheduled_token_prev_step[:padded_bs]
+                    )
+                    block_idx_last_scheduled_token_prev_step[metadata.num_decodes :] = 0
 
         return replace(
             metadata,
@@ -543,6 +629,9 @@ def _update_metadata_for_cudagraph_capture(
             num_accepted_tokens=num_accepted_tokens,
             block_idx_last_scheduled_token=block_idx_last_scheduled_token,
             block_idx_last_computed_token=block_idx_last_computed_token,
+            block_idx_last_scheduled_token_prev_step=(
+                block_idx_last_scheduled_token_prev_step
+            ),
         )
 
     def update_block_table(
diff --git a/vllm/v1/attention/backends/mla/compressor_utils.py b/vllm/v1/attention/backends/mla/compressor_utils.py
new file mode 100644
index 000000000000..36b115f64444
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/compressor_utils.py
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.triton_utils import tl, triton
+
+
+@triton.jit
+def _compressed_slot_mapping_kernel(
+    # [num_tokens]
+    slot_mapping_ptr,
+    # [num_reqs + 1]
+    query_start_loc_ptr,
+    # [num_reqs]
+    seq_lens_ptr,
+    # [num_reqs, max_num_blocks]
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    COMPRESS_RATIO: tl.constexpr,
+    PAD_ID: tl.constexpr,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+
+    query_start = tl.load(query_start_loc_ptr + batch_idx)
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1)
+    query_len = query_end - query_start
+
+    seq_len = tl.load(seq_lens_ptr + batch_idx)
+    start_pos = seq_len - query_len
+
+    for i in range(0, query_len, TRITON_BLOCK_SIZE):
+        offset = i + tl.arange(0, TRITON_BLOCK_SIZE)
+        mask = offset < query_len
+
+        pos = start_pos + i + tl.arange(0, TRITON_BLOCK_SIZE)
+        is_valid = (pos + 1) % COMPRESS_RATIO == 0
+        pos_after_compress = pos // COMPRESS_RATIO
+
+        block_ids = pos_after_compress // block_size
+        block_numbers = tl.load(
+            block_table_ptr + batch_idx * block_table_stride + block_ids,
+            mask=mask & is_valid,
+        )
+        slot_ids = block_numbers * block_size + pos_after_compress % block_size
+
+        # NOTE
+        slot_ids = tl.where(is_valid, slot_ids, PAD_ID)
+        tl.store(slot_mapping_ptr + query_start + offset, slot_ids, mask=mask)
+
+
+def get_compressed_slot_mapping(
+    num_tokens: int,
+    query_start_loc: torch.Tensor,
+    seq_lens: torch.Tensor,
+    block_table: torch.Tensor,
+    block_size: int,
+    compress_ratio: int,
+    out: torch.Tensor | None = None,
+) -> torch.Tensor:
+    if out is not None:
+        # Guard: for padded / invalid sequences.
+        # Negative positions produce bogus block indices that lead to illegal memory
+        # accesses inside the block_table load.
+        # NOTE: Fill -1 to the whole tensor, not just the first `num_tokens`.
+        out.fill_(-1)
+        slot_mapping = out[:num_tokens]
+    else:
+        slot_mapping = torch.full(
+            (num_tokens,), -1, dtype=torch.int64, device=query_start_loc.device
+        )
+
+    num_reqs = block_table.shape[0]
+    _compressed_slot_mapping_kernel[(num_reqs,)](
+        slot_mapping,
+        query_start_loc,
+        seq_lens,
+        block_table,
+        block_table.stride(0),
+        block_size,
+        compress_ratio,
+        PAD_ID=-1,
+        TRITON_BLOCK_SIZE=1024,
+    )
+    return slot_mapping
diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py
index fd4d9ab84274..8815bd93407b 100644
--- a/vllm/v1/attention/backends/mla/cutlass_mla.py
+++ b/vllm/v1/attention/backends/mla/cutlass_mla.py
@@ -17,12 +17,12 @@
 )
 from vllm.platforms.interface import DeviceCapability
 from vllm.utils.platform_utils import num_compute_units
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionCGSupport,
     AttentionLayer,
     AttentionType,
     MultipleOf,
-    is_quantized_kv_cache,
 )
 
 logger = init_logger(__name__)
@@ -162,11 +162,6 @@ def __init__(
         # Share workspace buffer across all executions
         self._workspace = g_sm100_workspace
 
-        # Pre-allocated output buffer, lazily sized on first call.
-        # Zero-init once to prevent NaN in padding slots (seq_lens=0)
-        # from contaminating downstream per-tensor reductions.
-        self._decode_out: torch.Tensor | None = None
-
     def _sm100_cutlass_mla_decode(
         self,
         q_nope: torch.Tensor,
@@ -223,15 +218,7 @@ def _sm100_cutlass_mla_decode(
             if is_quantized_kv_cache(self.kv_cache_dtype)
             else q_nope.dtype
         )
-        # Reuse pre-allocated zero-init output buffer to avoid a memset
-        # kernel on every CUDA graph replay.
-        if (
-            self._decode_out is None
-            or self._decode_out.shape[0] < B_q
-            or self._decode_out.dtype != dtype
-        ):
-            self._decode_out = q_nope.new_zeros((B_q, MAX_HEADS, D_latent), dtype=dtype)
-        out = self._decode_out[:B_q]
+        out = q_nope.new_empty((B_q, MAX_HEADS, D_latent), dtype=dtype)
         lse = (
             torch.empty((B_q, MAX_HEADS), dtype=torch.float32, device=q_nope.device)
             if self.need_to_return_lse_for_decode
diff --git a/vllm/v1/attention/backends/mla/flashattn_mla.py b/vllm/v1/attention/backends/mla/flashattn_mla.py
index 82d463dcd09e..bd947296e8bc 100644
--- a/vllm/v1/attention/backends/mla/flashattn_mla.py
+++ b/vllm/v1/attention/backends/mla/flashattn_mla.py
@@ -20,12 +20,12 @@
 )
 from vllm.platforms.interface import DeviceCapability
 from vllm.utils.math_utils import round_up
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionCGSupport,
     AttentionLayer,
     AttentionType,
     MultipleOf,
-    is_quantized_kv_cache,
 )
 from vllm.v1.attention.backends.fa_utils import (
     flash_attn_supports_mla,
@@ -56,6 +56,10 @@ def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
     def get_name() -> str:
         return "FLASH_ATTN_MLA"
 
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return True
+
     @staticmethod
     def get_builder_cls() -> type["FlashAttnMLAMetadataBuilder"]:
         return FlashAttnMLAMetadataBuilder
@@ -319,7 +323,7 @@ def forward_mqa(
                 q, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
             )
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             raise NotImplementedError("FP8 FlashAttention MLA not yet supported")
 
         kv_c_cache = kv_c_and_k_pe_cache[..., : self.kv_lora_rank]
diff --git a/vllm/v1/attention/backends/mla/flashinfer_mla.py b/vllm/v1/attention/backends/mla/flashinfer_mla.py
index 16d01bd338ca..e98bee9d79b5 100644
--- a/vllm/v1/attention/backends/mla/flashinfer_mla.py
+++ b/vllm/v1/attention/backends/mla/flashinfer_mla.py
@@ -16,12 +16,12 @@
     QueryLenSupport,
 )
 from vllm.platforms.interface import DeviceCapability
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionCGSupport,
     AttentionLayer,
     AttentionType,
     MultipleOf,
-    is_quantized_kv_cache,
 )
 from vllm.v1.attention.backends.utils import KVCacheLayoutType
 
@@ -152,11 +152,6 @@ def __init__(
         self.bmm1_scale: float | None = None
         self.bmm2_scale: float | None = None
 
-        # Pre-allocated output buffer, lazily sized on first call.
-        # Zero-init once to prevent NaN in padding slots (seq_lens=0)
-        # from contaminating downstream per-tensor reductions.
-        self._decode_out: torch.Tensor | None = None
-
     def forward_mqa(
         self,
         q: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
@@ -184,45 +179,14 @@ def forward_mqa(
 
         if self.bmm1_scale is None:
             self.bmm1_scale = self.scale
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm1_scale *= layer._q_scale_float * layer._k_scale_float
 
         if self.bmm2_scale is None:
             self.bmm2_scale = 1.0
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm2_scale *= layer._k_scale_float
 
-        # Reuse pre-allocated zero-init output buffer to avoid a memset
-        # kernel on every CUDA graph replay.
-        # q is 4D: (batch, q_len_per_req, num_heads, head_dim)
-        # FlashInfer has a bug where out= validation hardcodes 3D shape
-        # (batch, num_heads, kv_lora_rank), but the kernel writes 4D
-        # (batch, q_len, num_heads, kv_lora_rank) when q_len > 1.
-        # So we can only pass out= for single-token decode (q_len == 1).
-        # For q_len > 1, we zero padding slots after the kernel returns.
-        # TODO: upstream fix to FlashInfer
-        B, q_len_per_req = q.shape[0], q.shape[1]
-        out_kwargs: dict[str, torch.Tensor] = {}
-        if q_len_per_req == 1:
-            dtype = (
-                torch.bfloat16
-                if is_quantized_kv_cache(self.kv_cache_dtype)
-                else q.dtype
-            )
-            if (
-                self._decode_out is None
-                or self._decode_out.shape[0] < B
-                or self._decode_out.dtype != dtype
-            ):
-                self._decode_out = torch.zeros(
-                    B,
-                    q.shape[2],
-                    self.kv_lora_rank,
-                    dtype=dtype,
-                    device=q.device,
-                )
-            out_kwargs["out"] = self._decode_out[:B]
-
         o = trtllm_batch_decode_with_kv_cache_mla(
             query=q,
             kv_cache=kv_c_and_k_pe_cache.unsqueeze(1),
@@ -235,15 +199,8 @@ def forward_mqa(
             max_seq_len=attn_metadata.max_seq_len,
             bmm1_scale=self.bmm1_scale,
             bmm2_scale=self.bmm2_scale,
-            **out_kwargs,
         )
 
-        # For q_len > 1, we can't pass out= so we work around by zeroing padding slots
-        if not out_kwargs:
-            num_real = attn_metadata.num_decodes
-            if num_real < o.shape[0]:
-                o[num_real:] = 0
-
         # Flatten the output for consistent shape
         o = o.view(-1, o.shape[-2], o.shape[-1])
 
diff --git a/vllm/v1/attention/backends/mla/flashinfer_mla_sparse.py b/vllm/v1/attention/backends/mla/flashinfer_mla_sparse.py
index 7b5ec0d4976a..842153f40396 100644
--- a/vllm/v1/attention/backends/mla/flashinfer_mla_sparse.py
+++ b/vllm/v1/attention/backends/mla/flashinfer_mla_sparse.py
@@ -26,6 +26,7 @@
     get_mla_dims,
 )
 from vllm.platforms.interface import DeviceCapability
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -58,7 +59,6 @@ class FlashInferMLASparseBackend(AttentionBackend):
     for models like DeepSeek-V3.2 that use index-based sparse attention.
     """
 
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -341,11 +341,11 @@ def forward_mqa(
 
         if self.bmm1_scale is None:
             self.bmm1_scale = self.scale
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm1_scale *= layer._q_scale_float * layer._k_scale_float
         if self.bmm2_scale is None:
             self.bmm2_scale = 1.0
-            if self.kv_cache_dtype.startswith("fp8"):
+            if is_quantized_kv_cache(self.kv_cache_dtype):
                 self.bmm2_scale *= layer._k_scale_float
 
         o = trtllm_batch_decode_with_kv_cache_mla(
diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py
index df54b865a4d0..2f6058d69aeb 100644
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@@ -20,6 +20,7 @@
 )
 from vllm.platforms.interface import DeviceCapability
 from vllm.utils.platform_utils import num_compute_units
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionCGSupport,
     AttentionLayer,
@@ -128,7 +129,9 @@ def __init__(
 
         self.cg_buf_tile_scheduler_metadata = None
         self.cg_buf_num_splits = None
-        self.is_fp8_kvcache = vllm_config.cache_config.cache_dtype.startswith("fp8")
+        self.is_fp8_kvcache = is_quantized_kv_cache(
+            vllm_config.cache_config.cache_dtype
+        )
 
         num_sms = num_compute_units(self.device.index)
 
@@ -269,7 +272,7 @@ def forward_mqa(
         q = reshape_query_for_spec_decode(q, num_decodes)
 
         scheduler_metadata = attn_metadata.decode.scheduler_metadata
-        if envs.VLLM_BATCH_INVARIANT and not self.kv_cache_dtype.startswith("fp8"):
+        if envs.VLLM_BATCH_INVARIANT and not is_quantized_kv_cache(self.kv_cache_dtype):
             device = q.device
             dtype = torch.int32
 
@@ -299,7 +302,7 @@ def forward_mqa(
             scheduler_metadata.tile_scheduler_metadata = tile_scheduler_metadata
             scheduler_metadata.num_splits = num_splits
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             o, lse = flash_mla_with_kvcache_fp8(
                 q=q,
                 k_cache=kv_c_and_k_pe_cache.unsqueeze(-2),  # Add head dim of 1
diff --git a/vllm/v1/attention/backends/mla/flashmla_sparse.py b/vllm/v1/attention/backends/mla/flashmla_sparse.py
index 7cc50ec84584..9140a6fccd55 100644
--- a/vllm/v1/attention/backends/mla/flashmla_sparse.py
+++ b/vllm/v1/attention/backends/mla/flashmla_sparse.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
 import numpy as np
 import torch
@@ -15,7 +15,10 @@
 )
 from vllm.platforms import current_platform
 from vllm.platforms.interface import DeviceCapability
+from vllm.triton_utils import tl, triton
+from vllm.utils.math_utils import cdiv
 from vllm.utils.platform_utils import num_compute_units
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -26,6 +29,7 @@
     MultipleOf,
     SparseMLAAttentionImpl,
 )
+from vllm.v1.attention.backends.mla.compressor_utils import get_compressed_slot_mapping
 from vllm.v1.attention.backends.mla.sparse_utils import (
     triton_convert_req_index_to_global_index,
 )
@@ -64,8 +68,8 @@
 """
 NOTE: FlashMLA Sparse uses an fp8 cache with the following format
 
-In the "FP8 with scale" format, each token's KV cache is 656 Bytes,
-structured as:
+For DeepSeek V3.2, in the "FP8 with scale" format, each token's KV cache is 656
+Bytes, structured as:
 -   **First 512 bytes:** The "quantized NoPE" part, containing 512
     `float8_e4m3` values.
 -   **Next 16 bytes:** Scale factors, containing 4 `float32` values.
@@ -73,11 +77,20 @@
     the second for the next 128, and so on.
 -   **Last 128 bytes:** The "RoPE" part, containing 64 `bfloat16` values. This
     part is not quantized for accuracy.
+
+For DeepSeek V4, in the "FP8 with scale" format, each token's KV cache is 584
+Bytes, structured as:
+-   **First 448 bytes:** The "quantized NoPE" part, containing 448
+    `float8_e4m3` values.
+-   **Next 128 bytes:** The "RoPE" part, containing 64 `bfloat16` values. This
+    part is not quantized for accuracy.
+-   **Last 8 bytes:** Scale factors, containing 7 `ue8m0` values + 1B pad.
+    The first `ue8m0` is the scale for the first 64 `float8_e4m3` values,
+    the second for the next 64, and so on.
 """
 
 
 class FlashMLASparseBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -99,11 +112,15 @@ def get_builder_cls() -> type["FlashMLASparseMetadataBuilder"]:
         return FlashMLASparseMetadataBuilder
 
     @staticmethod
-    def get_impl_cls() -> type["FlashMLASparseImpl"]:
+    def get_impl_cls() -> type[SparseMLAAttentionImpl[Any]]:
         return FlashMLASparseImpl
 
     @classmethod
     def get_supported_head_sizes(cls) -> list[int]:
+        # DeepSeek V3.2 layout: 512 NoPE + 64 RoPE = 576.
+        # DeepSeek V4 uses 448 NoPE + 64 RoPE = 512 and overrides this in
+        # vllm/models/deepseek_v4/nvidia/flashmla.py:
+        # DeepseekV4FlashMLASparseBackend.get_supported_head_sizes.
         return [576]
 
     @classmethod
@@ -127,8 +144,7 @@ def get_kv_cache_shape(
         cache_dtype_str: str = "auto",
     ) -> tuple[int, ...]:
         if cache_dtype_str == "fp8_ds_mla":
-            # custom storage format is 656 bytes
-            #  see FlashMLA readme.md for details
+            # V3.2 main MLA: 656-byte custom storage format. See module docstring.
             return (num_blocks, block_size, 656)
         else:
             return (num_blocks, block_size, head_size)
@@ -159,6 +175,7 @@ class FP8KernelMetadata:
     class FP8SeparatePrefillDecode:
         @dataclass
         class Decode:
+            seq_lens: torch.Tensor
             kernel_metadata: "FlashMLASparseMetadata.FP8KernelMetadata"
             decode_query_len: int  # needed for reshape in spec decode
 
@@ -206,6 +223,13 @@ class Chunk:
     fp8_extra_metadata: FP8SeparatePrefillDecode | FP8KernelMetadata | None = None
     fp8_use_mixed_batch: bool = False
 
+    # Pre-computed C128A metadata (DeepseekV4 only, compress_ratio == 128).
+    # Decode: global slot ids + valid-entry counts (fused from positions).
+    c128a_global_decode_topk_indices: torch.Tensor | None = None
+    c128a_decode_topk_lens: torch.Tensor | None = None
+    # Prefill: local topk indices (used by combine_topk_swa_indices).
+    c128a_prefill_topk_indices: torch.Tensor | None = None
+
 
 def get_prefill_workspace_size(max_model_len: int):
     # NOTE(Lucas): 5 is a magic number for controlling the prefill buffer size.
@@ -235,8 +259,9 @@ def __init__(
         parallel_config = vllm_config.parallel_config
         self.device = device
 
-        # Treat requests with query length <= 1 as decodes to match the
-        # DeepGEMM indexer constraint (fp8_paged_mqa_logits only supports next_n <= 2)
+        # Classify single-token queries (plus num_speculative_tokens via
+        # supports_spec_as_decode=True) as decodes; longer queries go to
+        # prefill.
         self._init_reorder_batch_threshold(1, supports_spec_as_decode=True)
 
         sm_count = num_compute_units(device.index)
@@ -300,6 +325,68 @@ def __init__(
             device=device,
         )
 
+        # DeepseekV4: has compress_ratios in hf_config.
+        hf_config = vllm_config.model_config.hf_config
+        self.is_deepseek_v4 = (
+            hasattr(hf_config, "compress_ratios") and len(hf_config.compress_ratios) > 0
+        )
+        self.compress_ratio = 1
+        if self.is_deepseek_v4:
+            assert hasattr(self.kv_cache_spec, "compress_ratio")
+            self.compress_ratio = self.kv_cache_spec.compress_ratio
+            # Pre-allocate compressed slot mapping buffer for CUDA graph
+            # address stability when compress_ratio > 1.
+            if self.compress_ratio > 1:
+                max_num_batched_tokens = (
+                    vllm_config.scheduler_config.max_num_batched_tokens
+                )
+                self.compressed_slot_mapping_buffer = torch.empty(
+                    max_num_batched_tokens,
+                    dtype=torch.int64,
+                    device=self.device,
+                )
+
+            # Pre-allocate C128A topk buffers for CUDA graph address stability.
+            if self.compress_ratio == 128:
+                max_num_batched_tokens = (
+                    vllm_config.scheduler_config.max_num_batched_tokens
+                )
+                # Pad to B_TOPK alignment (128 covers both h_q=64 B_TOPK=64 and
+                # h_q=128 B_TOPK=128). FlashMLA decode asserts extra_topk % B_TOPK
+                # == 0; unaligned widths (e.g. 17 = ceil(2136/128)) crash the
+                # sm100 head64 kernel. Padded slots stay -1 and decode_lens caps
+                # them via topk_length, so the pad is a no-op at kernel level.
+                # Mirrors _SPARSE_PREFILL_TOPK_ALIGNMENT in cache_utils.py.
+                _C128A_TOPK_ALIGNMENT = 128
+                c128a_max_compressed = cdiv(
+                    self.model_config.max_model_len, self.compress_ratio
+                )
+                c128a_max_compressed = (
+                    cdiv(c128a_max_compressed, _C128A_TOPK_ALIGNMENT)
+                    * _C128A_TOPK_ALIGNMENT
+                )
+                # Stored so _build_c128a_metadata passes it as the kernel's
+                # max_compressed_tokens, matching the buffer stride. Otherwise
+                # the kernel's default 8192 iterates past row width and spills
+                # writes into adjacent rows (present in both decode and prefill
+                # branches of _build_c128a_topk_metadata_kernel).
+                self.c128a_max_compressed = c128a_max_compressed
+                self.c128a_global_decode_buffer = torch.empty(
+                    (max_num_batched_tokens, c128a_max_compressed),
+                    dtype=torch.int32,
+                    device=self.device,
+                )
+                self.c128a_decode_lens_buffer = torch.empty(
+                    max_num_batched_tokens,
+                    dtype=torch.int32,
+                    device=self.device,
+                )
+                self.c128a_prefill_buffer = torch.empty(
+                    (max_num_batched_tokens, c128a_max_compressed),
+                    dtype=torch.int32,
+                    device=self.device,
+                )
+
     def _build_fp8_mixed_decode_prefill(
         self,
         common_attn_metadata: CommonAttentionMetadata,
@@ -364,7 +451,10 @@ def _build_fp8_separate_prefill_decode(
         # For pure decode batches, prefill_request_id will be None
         # For mixed batches, it will have -1 for decode and request_id for prefill
         if num_prefills > 0:
-            seq_lens_cpu = common_attn_metadata.seq_lens.cpu()
+            # Upper bound is exact for prefill rows (the `[num_decodes:]`
+            # slice below), so no D2H sync is needed.
+            seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+            assert seq_lens_cpu is not None
             seq_lens = common_attn_metadata.seq_lens
             query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
 
@@ -457,15 +547,7 @@ def _build_fp8_separate_prefill_decode(
             decode_query_len = (query_start_loc_cpu[1] - query_start_loc_cpu[0]).item()
 
             # Use padded head count since that's what the kernel will see
-            padded_heads = self.fp8_decode_padded_heads
-            scheduler_metadata, _ = get_mla_metadata(
-                cache_seqlens=self.topk_tokens_tensor[:num_decodes],
-                num_q_tokens_per_head_k=decode_query_len * padded_heads,
-                topk=self.topk_tokens,
-                num_heads_q=padded_heads,
-                num_heads_k=1,
-                is_fp8_kvcache=True,
-            )
+            scheduler_metadata, _ = get_mla_metadata()
 
             kernel_meta = FlashMLASparseMetadata.FP8KernelMetadata(
                 scheduler_metadata=scheduler_metadata,
@@ -473,6 +555,7 @@ def _build_fp8_separate_prefill_decode(
                 cache_lens=self.max_model_len_tensor[:num_decodes],
             )
             fp8_metadata.decode = FP8Meta.Decode(
+                seq_lens=common_attn_metadata.seq_lens[:num_decodes],
                 kernel_metadata=kernel_meta,
                 decode_query_len=decode_query_len,
             )
@@ -499,35 +582,109 @@ def build(
         )
         req_id_per_token = self.req_id_per_token_buffer[:num_tokens]
 
+        slot_mapping = cm.slot_mapping
+        if self.compress_ratio > 1:
+            slot_mapping = get_compressed_slot_mapping(
+                common_attn_metadata.num_actual_tokens,
+                common_attn_metadata.query_start_loc,
+                common_attn_metadata.seq_lens,
+                common_attn_metadata.block_table_tensor.clamp(min=0),
+                int(self.kv_cache_spec.storage_block_size),
+                self.compress_ratio,
+                out=self.compressed_slot_mapping_buffer,
+            )
+
         fp8_extra_metadata: (
             FlashMLASparseMetadata.FP8SeparatePrefillDecode
             | FlashMLASparseMetadata.FP8KernelMetadata
             | None
         ) = None
-        fp8_use_mixed_batch = self.num_heads < MIN_HEADS_FOR_BF16_PREFILL
-        if self.use_fp8_kv_cache:
+        fp8_use_mixed_batch = (
+            self.num_heads < MIN_HEADS_FOR_BF16_PREFILL and not self.is_deepseek_v4
+        )
+        # DeepseekV4 has its own attention impl (DeepseekV4MLAAttention) that does not
+        # consume fp8_extra_metadata. Skipping the build here avoids a
+        # forced D2H sync on seq_lens that would otherwise fire on every
+        # prefill-bearing step, lifting GPU utilization on long-prefill
+        # workloads (e.g. LongBench) from ~83% to ~100%.
+        if self.use_fp8_kv_cache and not self.is_deepseek_v4:
             if fp8_use_mixed_batch:
                 fp8_extra_metadata = self._build_fp8_mixed_decode_prefill(cm)
             else:
                 fp8_extra_metadata = self._build_fp8_separate_prefill_decode(cm)
 
+        # Pre-compute C128A topk indices for DeepseekV4.
+        c128a_fields = {}
+        if self.is_deepseek_v4 and self.compress_ratio == 128:
+            c128a_fields = self._build_c128a_metadata(cm, req_id_per_token)
+
         metadata = FlashMLASparseMetadata(
             num_reqs=cm.num_reqs,
             max_query_len=cm.max_query_len,
             max_seq_len=cm.max_seq_len,
             num_actual_tokens=cm.num_actual_tokens,
             query_start_loc=cm.query_start_loc,
-            slot_mapping=cm.slot_mapping,
+            slot_mapping=slot_mapping,
             block_table=cm.block_table_tensor,
             req_id_per_token=req_id_per_token,
             block_size=self.kv_cache_spec.block_size,
             topk_tokens=self.topk_tokens,
             fp8_extra_metadata=fp8_extra_metadata,
             fp8_use_mixed_batch=fp8_use_mixed_batch,
+            **c128a_fields,
         )
 
         return metadata
 
+    def _build_c128a_metadata(
+        self,
+        cm: CommonAttentionMetadata,
+        req_id_per_token: torch.Tensor,
+    ) -> dict[str, torch.Tensor | None]:
+        """Pre-compute C128A topk indices for DeepseekV4 (compress_ratio >= 128)."""
+        # Must match SWA's decode split (no `require_uniform=True`) so
+        # `c128a_global_decode_topk_indices.shape[0]` lines up with q in
+        # `_forward_decode`. The per-token C128A kernel handles non-uniform
+        # query lengths.
+        (num_decodes, _, num_decode_tokens, num_prefill_tokens) = (
+            split_decodes_and_prefills(
+                cm,
+                decode_threshold=self.reorder_batch_threshold or 1,
+            )
+        )
+
+        num_total = num_decode_tokens + num_prefill_tokens
+        if num_total == 0:
+            return {}
+
+        assert cm.positions is not None, (
+            "positions is required for C128A metadata build"
+        )
+        block_size = self.kv_cache_spec.block_size // self.compress_ratio
+        global_decode, decode_lens, prefill_local = build_c128a_topk_metadata(
+            cm.positions[:num_total],
+            self.compress_ratio,
+            num_decode_tokens,
+            req_id_per_token,
+            cm.block_table_tensor[:num_decodes],
+            block_size,
+            cm.slot_mapping,
+            self.c128a_global_decode_buffer,
+            self.c128a_decode_lens_buffer,
+            self.c128a_prefill_buffer,
+            max_compressed_tokens=self.c128a_max_compressed,
+        )
+
+        result: dict[str, torch.Tensor | None] = {}
+        if num_decode_tokens > 0:
+            result["c128a_global_decode_topk_indices"] = global_decode.view(
+                num_decode_tokens, 1, -1
+            )
+            result["c128a_decode_topk_lens"] = decode_lens
+        if num_prefill_tokens > 0:
+            result["c128a_prefill_topk_indices"] = prefill_local
+        return result
+
 
 class FlashMLASparseImpl(SparseMLAAttentionImpl[FlashMLASparseMetadata]):
     @staticmethod
@@ -549,7 +706,7 @@ def __init__(
         attn_type: str,
         kv_sharing_target_layer_name: str | None,
         # MLA Specific Arguments
-        topk_indice_buffer: torch.Tensor | None = None,
+        topk_indices_buffer: torch.Tensor | None = None,
         indexer: "Indexer | None" = None,
         **mla_args,
     ) -> None:
@@ -571,7 +728,7 @@ def __init__(
         vllm_config = get_current_vllm_config()
         max_tokens = vllm_config.scheduler_config.max_num_batched_tokens
         q_concat_shape = (max_tokens, num_heads, head_size)
-        if kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(kv_cache_dtype):
             assert kv_cache_dtype == "fp8_ds_mla", (
                 "FlashMLA Sparse Attention backend fp8 only supports "
                 "fp8_ds_mla kv-cache dtype"
@@ -612,7 +769,11 @@ def _forward_bf16_kv(
             NUM_TOPK_TOKENS=topk_indices.shape[1],
         )
 
-        return self._bf16_flash_mla_kernel(q, kv_c_and_k_pe_cache, topk_indices)
+        return self._bf16_flash_mla_kernel(
+            q,
+            kv_c_and_k_pe_cache,
+            topk_indices,
+        )
 
     def _forward_fp8_kv_separate_prefill_decode(
         self,
@@ -653,7 +814,10 @@ def _forward_fp8_kv_separate_prefill_decode(
         fp8_metadata = attn_metadata.fp8_extra_metadata
         assert isinstance(fp8_metadata, FlashMLASparseMetadata.FP8SeparatePrefillDecode)
 
-        def _fp8_decode(q: torch.Tensor, topk_indices: torch.Tensor) -> torch.Tensor:
+        def _fp8_decode(
+            q: torch.Tensor,
+            topk_indices: torch.Tensor,
+        ) -> torch.Tensor:
             # Reshape q: (num_decode_tokens, num_heads, head_dim)
             #         -> (num_decodes, seq_len, num_heads, head_dim)
             q = reshape_query_for_spec_decode(q, num_decodes)
@@ -689,7 +853,8 @@ def _fp8_decode(q: torch.Tensor, topk_indices: torch.Tensor) -> torch.Tensor:
 
             if num_decode_tokens > 0:
                 attn_out[:num_decode_tokens] = _fp8_decode(
-                    q[:num_decode_tokens], topk_indices[:num_decode_tokens]
+                    q[:num_decode_tokens],
+                    topk_indices[:num_decode_tokens],
                 )
 
             assert fp8_metadata.prefill is not None
@@ -820,6 +985,7 @@ def _bf16_flash_mla_kernel(
         output = flash_mla_sparse_fwd(
             q, kv_c_and_k_pe_cache, topk_indices, self.softmax_scale
         )[0]
+
         output = output[:, : self.num_heads, :]
         return output
 
@@ -861,3 +1027,123 @@ def forward_mqa(
             )
 
         return attn_out, None
+
+
+def build_c128a_topk_metadata(
+    positions: torch.Tensor,
+    compress_ratio: int,
+    num_decode_tokens: int,
+    token_to_req_indices: torch.Tensor,
+    block_table: torch.Tensor,
+    block_size: int,
+    slot_mapping: torch.Tensor,
+    global_decode_buffer: torch.Tensor,
+    decode_lens_buffer: torch.Tensor,
+    prefill_buffer: torch.Tensor,
+    max_compressed_tokens: int = 8192,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Single kernel for all C128A tokens (decode + prefill).
+
+    Decode tokens: position → block_table lookup → global slot ids + topk_lens.
+    Prefill tokens: position → local indices [0, ..., n-1, -1, ...].
+
+    Writes into pre-allocated buffers for CUDA graph address stability.
+    Returns slices of the buffers.
+    """
+    num_tokens = positions.shape[0]
+    num_prefill_tokens = num_tokens - num_decode_tokens
+
+    global_decode = global_decode_buffer[:num_decode_tokens]
+    decode_lens = decode_lens_buffer[:num_decode_tokens]
+    prefill_local = prefill_buffer[:num_prefill_tokens]
+
+    if num_tokens == 0:
+        return global_decode, decode_lens, prefill_local
+
+    _build_c128a_topk_metadata_kernel[(num_tokens,)](
+        global_decode_buffer,
+        global_decode_buffer.stride(0),
+        decode_lens_buffer,
+        prefill_buffer,
+        prefill_buffer.stride(0),
+        positions,
+        compress_ratio,
+        max_compressed_tokens,
+        num_decode_tokens,
+        token_to_req_indices,
+        block_table,
+        block_table.stride(0),
+        block_size,
+        slot_mapping,
+        BLOCK_SIZE=1024,
+    )
+    return global_decode, decode_lens, prefill_local
+
+
+@triton.jit
+def _build_c128a_topk_metadata_kernel(
+    # Decode outputs
+    global_decode_ptr,
+    global_decode_stride,
+    decode_lens_ptr,
+    # Prefill output
+    prefill_local_ptr,
+    prefill_local_stride,
+    # Inputs
+    positions_ptr,
+    compress_ratio,
+    max_compressed_tokens,
+    num_decode_tokens,
+    token_to_req_indices_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    slot_mapping_ptr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    position = tl.load(positions_ptr + token_idx)
+    num_compressed = (position + 1) // compress_ratio
+    num_compressed = tl.minimum(num_compressed, max_compressed_tokens)
+    is_decode = token_idx < num_decode_tokens
+
+    if is_decode:
+        # --- Decode: block-table lookup → global slot ids + count ---
+        is_valid_token = tl.load(slot_mapping_ptr + token_idx) >= 0
+        req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+        count = tl.zeros((), dtype=tl.int32)
+        for i in range(0, max_compressed_tokens, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < max_compressed_tokens
+            is_valid = offset < num_compressed
+
+            block_indices = offset // block_size
+            block_numbers = tl.load(
+                block_table_ptr + req_idx * block_table_stride + block_indices,
+                mask=mask & is_valid,
+            )
+            block_offsets = offset % block_size
+            slot_ids = block_numbers * block_size + block_offsets
+            slot_ids = tl.where(is_valid, slot_ids, -1)
+            tl.store(
+                global_decode_ptr + token_idx * global_decode_stride + offset,
+                slot_ids,
+                mask=mask,
+            )
+            count += tl.sum(is_valid.to(tl.int32), axis=0)
+
+        tl.store(
+            decode_lens_ptr + token_idx,
+            tl.where(is_valid_token, count, 0),
+        )
+    else:
+        # --- Prefill: write local indices ---
+        pfx_idx = token_idx - num_decode_tokens
+        for i in range(0, max_compressed_tokens, BLOCK_SIZE):
+            offset = i + tl.arange(0, BLOCK_SIZE)
+            mask = offset < max_compressed_tokens
+            tl.store(
+                prefill_local_ptr + pfx_idx * prefill_local_stride + offset,
+                tl.where(offset < num_compressed, offset, -1),
+                mask=mask,
+            )
diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py
index 2fa9fe851fc9..2870ec9a15c0 100644
--- a/vllm/v1/attention/backends/mla/indexer.py
+++ b/vllm/v1/attention/backends/mla/indexer.py
@@ -4,9 +4,11 @@
 
 import torch
 
+import vllm.envs as envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
 from vllm.utils.deep_gemm import (
     get_paged_mqa_logits_metadata,
     has_deep_gemm,
@@ -20,16 +22,99 @@
     CommonAttentionMetadata,
     MultipleOf,
 )
+from vllm.v1.attention.backends.mla.compressor_utils import get_compressed_slot_mapping
 from vllm.v1.attention.backends.utils import (
     split_decodes_and_prefills,
-    split_prefill_chunks,
 )
-from vllm.v1.kv_cache_interface import AttentionSpec
+from vllm.v1.kv_cache_interface import AttentionSpec, MLAAttentionSpec
 from vllm.v1.worker.cp_utils import get_total_cp_world_size
 
 logger = init_logger(__name__)
 
 
+@triton.jit
+def _prepare_uniform_decode_kernel(
+    seq_lens_ptr,
+    decode_seq_lens_ptr,
+    block_table_ptr,
+    block_table_stride,
+    expanded_block_table_ptr,
+    expanded_bt_stride,
+    decode_lens_ptr,
+    max_decode_len,
+    BLOCK_SIZE: tl.constexpr,
+):
+    idx = tl.program_id(0)
+    req_id = idx // max_decode_len
+    local_idx = idx % max_decode_len
+
+    # Compute number of KVs attended to by this token.
+    seq_len = tl.load(seq_lens_ptr + req_id)
+    per_token_seq_len = seq_len - max_decode_len + local_idx + 1
+    tl.store(decode_seq_lens_ptr + idx, per_token_seq_len)
+
+    # Copy block table row.
+    src = block_table_ptr + req_id * block_table_stride
+    dst = expanded_block_table_ptr + idx * expanded_bt_stride
+    for i in tl.range(0, expanded_bt_stride, BLOCK_SIZE):
+        off = i + tl.arange(0, BLOCK_SIZE)
+        mask = off < expanded_bt_stride
+        src_block = tl.load(src + off, mask=mask)
+        tl.store(dst + off, src_block, mask=mask)
+
+    # All reqs now have decode_len = 1.
+    tl.store(decode_lens_ptr + idx, 1)
+
+
+def split_indexer_prefill_chunks(
+    seq_lens_cpu: torch.Tensor,
+    query_lens_cpu: torch.Tensor,
+    workspace_size: int,
+    max_logits_bytes: int,
+    request_offset: int = 0,
+) -> list[tuple[slice, slice]]:
+    """
+    Split prefill requests into chunks for the sparse indexer, respecting:
+    - N constraint: total_seq_lens <= workspace_size (existing O(N) workspace)
+    - Logits constraint: M * N * 4 <= max_logits_bytes
+
+    When a single request-level chunk still exceeds the logits budget,
+    sub-chunks on the query dimension (M) to bound peak memory.
+
+    Returns list of (req_slice, query_slice) tuples.
+    """
+    chunks: list[tuple[slice, slice]] = []
+    n = len(seq_lens_cpu)
+    max_logits_elems = max_logits_bytes // 4
+    end = 0
+
+    while end < n:
+        start, chunk_m, chunk_n = end, 0, 0
+
+        while end < n:
+            q, s = query_lens_cpu[end].item(), seq_lens_cpu[end].item()
+            new_m, new_n = chunk_m + q, chunk_n + s
+            if new_n <= workspace_size and new_m * new_n <= max_logits_elems:
+                chunk_m, chunk_n = new_m, new_n
+                end += 1
+            else:
+                break
+
+        # A single request can exceed the budget, requiring sub-chunking
+        # on the query dimension.
+        if end == start:
+            chunk_m, chunk_n = query_lens_cpu[end].item(), seq_lens_cpu[end].item()
+            end += 1
+
+        req_slice = slice(start + request_offset, end + request_offset)
+        max_q = max(1, max_logits_elems // chunk_n) if chunk_n > 0 else chunk_m
+        for q_off in range(0, chunk_m, max_q):
+            sub_m = min(max_q, chunk_m - q_off)
+            chunks.append((req_slice, slice(q_off, q_off + sub_m)))
+
+    return chunks
+
+
 class DeepseekV32IndexerBackend(AttentionBackend):
     @staticmethod
     def get_name() -> str:
@@ -37,7 +122,7 @@ def get_name() -> str:
 
     @staticmethod
     def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
-        return [1 if current_platform.is_rocm() else 64]
+        return [1, 64] if current_platform.is_rocm() else [64]
 
     @classmethod
     def get_supported_head_sizes(cls) -> list[int]:
@@ -70,6 +155,16 @@ def get_kv_cache_stride_order(
         return (0, 1, 2)
 
 
+class DeepseekV4IndexerBackend(DeepseekV32IndexerBackend):
+    @staticmethod
+    def get_name() -> str:
+        return "DEEPSEEK_V4_INDEXER"
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [256]
+
+
 @dataclass
 class DeepseekV32IndexerPrefillChunkMetadata:
     block_table: torch.Tensor
@@ -81,6 +176,7 @@ class DeepseekV32IndexerPrefillChunkMetadata:
     token_start: int
     token_end: int
     num_reqs: int
+    skip_kv_gather: bool = False
 
 
 @dataclass
@@ -91,12 +187,14 @@ class DeepseekV32IndexerPrefillMetadata:
 @dataclass
 class DeepSeekV32IndexerDecodeMetadata:
     block_table: torch.Tensor
+    # seq_lens: per-token effective context lengths.
+    #   - flatten path / plain decode: 1D (batch_size,)
+    #   - native MTP path: 2D (B, next_n) where [b,j] = L_b - next_n + j + 1
+    # Both fp8_fp4_paged_mqa_logits and the topk kernels accept both shapes.
     seq_lens: torch.Tensor
     decode_lens: torch.Tensor
     requires_padding: bool
     schedule_metadata: torch.Tensor
-    use_large_context_topk: bool
-    offsets: torch.Tensor | None  # Precomputed offsets for speculative decoding
 
 
 @dataclass
@@ -104,16 +202,8 @@ class DeepseekV32IndexerMetadata:
     # FIXME (zyongye)
     # hacky way to access the data now, need to be in chunked meta
     seq_lens: torch.Tensor
-
-    num_reqs: int
-    max_query_len: int
     max_seq_len: int
-
-    num_actual_tokens: int  # Number of tokens excluding padding.
-    query_start_loc: torch.Tensor
     slot_mapping: torch.Tensor
-    # The dimension of the attention heads
-    head_dim: int
 
     # New for MLA (compared to FlashAttention)
     # For handling prefill decode split
@@ -126,71 +216,6 @@ class DeepseekV32IndexerMetadata:
     prefill: DeepseekV32IndexerPrefillMetadata | None = None
 
 
-# TODO (zyongye) optimize this, this is now vibe coded
-def kv_spans_from_batches(
-    start_seq_loc: torch.Tensor, seq_len_per_batch: torch.Tensor, device: torch.device
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """
-    Args:
-      start_seq_loc: 1D long tensor [B+1], cumulative counts of
-                     selected tokens per batch.
-            Example: [0, 2, 4, 7] ->
-                     batch sizes (selected) [2, 2, 3], N=7 tokens total.
-      seq_len_per_batch: 1D long tensor [B],
-                         full sequence length (KV length) of each batch.
-                         Example: [5, 9, 4].
-
-    Returns:
-      start_tensor: 1D long tensor [N], start offset in the
-                    concatenated KV cache for each token's batch.
-      end_location: 1D long tensor [N],
-                    **exclusive** end = start + token's local position.
-                    (So the attended KV slice is kv[start:end].)
-
-    Assumes each batch contributes its full `seq_len_per_batch[i]`
-    keys to the KV cache, andthe selected tokens within a batch
-    are the **last** `counts[i]` positions of that sequence.
-    """
-    q = start_seq_loc.to(dtype=torch.long)
-    L = seq_len_per_batch.to(dtype=torch.long)
-    assert q.dim() == 1 and L.dim() == 1
-    assert q.numel() == L.numel() + 1, "start_seq_loc must have length B+1"
-
-    # Selected tokens per batch and totals
-    counts = q[1:] - q[:-1]  # [B]
-    N = int(q[-1].item())  # total selected tokens
-    B = L.numel()
-
-    if N == 0:
-        return (
-            torch.empty(0, dtype=torch.long, device=device),
-            torch.empty(0, dtype=torch.long, device=device),
-        )
-
-    # KV start offsets per batch in the concatenated KV cache
-    kv_starts_per_batch = torch.cumsum(L, dim=0) - L  # [B]
-
-    # For each selected token, which batch does it belong to?
-    batch_id = torch.repeat_interleave(torch.arange(B), counts)  # [N]
-
-    # Map batch KV start to each token
-    start_tensor = kv_starts_per_batch[batch_id]  # [N]
-
-    # End-align local positions inside each batch:
-    # local_pos = L[b] - counts[b] + (1..counts[b])  for each batch b
-    L_expand = torch.repeat_interleave(L, counts)  # [N]
-    m_expand = torch.repeat_interleave(counts, counts)  # [N]
-    # position within the selected block: 1..counts[b]
-    pos_within = (
-        torch.arange(N, dtype=torch.long) - torch.repeat_interleave(q[:-1], counts) + 1
-    )
-
-    local_pos = L_expand - m_expand + pos_within  # [N], 1-based
-    end_location = start_tensor + local_pos  # exclusive end
-
-    return start_tensor.int().to(device), end_location.int().to(device)
-
-
 def get_max_prefill_buffer_size(vllm_config: VllmConfig):
     max_model_len = vllm_config.model_config.max_model_len
     # NOTE(Chen): 40 is a magic number for controlling the prefill buffer size.
@@ -206,7 +231,7 @@ def get_max_prefill_buffer_size(vllm_config: VllmConfig):
 
 class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder):
     reorder_batch_threshold: int = 1
-    natively_supported_next_n: list[int] = [1, 2]
+    natively_supported_next_n_fp4: list[int] = [1, 2]
     # TODO (matt): integrate kernel with next_n = 4 support
 
     @classmethod
@@ -227,31 +252,58 @@ def __init__(self, *args, **kwargs):
             if self.vllm_config.speculative_config
             else 0
         )
+        self.use_fp4_indexer_cache = (
+            self.vllm_config.attention_config.use_fp4_indexer_cache
+        )
+
+        assert (
+            current_platform.is_device_capability_family(100)
+            or not self.use_fp4_indexer_cache
+        ), (
+            "use_fp4_indexer_cache requires Blackwell datacenter GPUs "
+            "(sm_10x, e.g. B200/GB200); sm_120 (consumer Blackwell) and "
+            "earlier architectures are not supported."
+        )
+
         next_n = self.num_speculative_tokens + 1
         self.reorder_batch_threshold += self.num_speculative_tokens
-        self.use_flattening = next_n not in self.natively_supported_next_n
+        # NOTE(zyongye) fp4 indexer cache only natively supports next_n in
+        # natively_supported_next_n_fp4; for other next_n values we fall back
+        # to the flattening path. Outside the SM100 datacenter family the FP8
+        # paged MQA logits kernel has the same [1, 2] constraint (deepgemm
+        # smxx_fp8_fp4_paged_mqa_logits.hpp:233), so flatten there too.
+        self.use_flattening = (
+            self.use_fp4_indexer_cache
+            or not current_platform.is_device_capability_family(100)
+        ) and next_n not in self.natively_supported_next_n_fp4
 
         sm_count = num_compute_units(self.device.index)
         self.num_sms = sm_count
 
-        self.decode_lens_buffer = torch.empty(
-            (scheduler_config.max_num_batched_tokens,),
-            dtype=torch.int32,
-            device=self.device,
-        )
         self.offsets_buffer = torch.arange(
             next_n, device=self.device, dtype=torch.int32
         )
-        self.arange_buffer = torch.arange(
-            scheduler_config.max_num_seqs * next_n,
+        self.decode_lens_buffer = torch.zeros(
+            (scheduler_config.max_num_batched_tokens,),
             dtype=torch.int32,
             device=self.device,
         )
-        self.expanded_seq_lens_buffer = torch.zeros(
+        # Shared workspace for decode seq_lens. Native MTP views this as
+        # (B, max_decode_len) at runtime, keeping context_lens contiguous even
+        # when max_decode_len is smaller than next_n.
+        self.decode_seq_lens_buffer = torch.zeros(
             (scheduler_config.max_num_batched_tokens,),
             dtype=torch.int32,
             device=self.device,
         )
+        self.arange_buffer = torch.arange(
+            max(
+                scheduler_config.max_num_seqs * next_n,
+                scheduler_config.max_num_batched_tokens,
+            ),
+            dtype=torch.int32,
+            device=self.device,
+        )
         max_num_blocks_per_req = cdiv(
             self.vllm_config.model_config.max_model_len,
             self.kv_cache_spec.block_size * get_total_cp_world_size(),
@@ -270,45 +322,147 @@ def __init__(self, *args, **kwargs):
             (self.num_sms + 1, 2), dtype=torch.int32, device=self.device
         )
 
-    def build_one_prefill_chunk(
-        self, reqs_start, reqs_end, query_start_loc_cpu, seq_lens_cpu, block_table
-    ):
-        prefill_query_start_loc = (
-            query_start_loc_cpu[reqs_start : reqs_end + 1]
-            - query_start_loc_cpu[reqs_start]
-        )
-        cu_seqlen_ks, cu_seqlen_ke = kv_spans_from_batches(
-            prefill_query_start_loc, seq_lens_cpu[reqs_start:reqs_end], self.device
-        )
-        token_start = query_start_loc_cpu[reqs_start].item()
-        token_end = query_start_loc_cpu[reqs_end].item()
-        total_seq_lens = seq_lens_cpu[reqs_start:reqs_end].sum()
-        seq_idx = torch.arange(0, reqs_end - reqs_start, dtype=torch.int32)
-        token_to_seq = torch.repeat_interleave(
-            seq_idx, seq_lens_cpu[reqs_start:reqs_end]
-        ).to(self.device)
-        assert total_seq_lens <= self.max_prefill_buffer_size
-        cu_seq_lens = (
-            torch.cat(
-                [
-                    torch.zeros(1, dtype=torch.int32),
-                    seq_lens_cpu[reqs_start:reqs_end].cumsum(dim=0),
-                ]
+        # KV compression. Default to 1 for no compression.
+        self.compress_ratio = 1
+        # Get compress_ratio for DeepseekV4 support
+        if isinstance(self.kv_cache_spec, MLAAttentionSpec):
+            self.compress_ratio = self.kv_cache_spec.compress_ratio
+
+        # Pre-allocate buffers for CUDA graph compatibility when
+        if self.compress_ratio > 1:
+            # compress_ratio > 1 (DeepseekV4)
+            # Compressed slot mapping output buffer
+            self.compressed_slot_mapping_buffer = torch.zeros(
+                (scheduler_config.max_num_batched_tokens,),
+                dtype=torch.int64,
+                device=self.device,
+            )
+            # Buffer for compressed seq_lens in decode path
+            self.expanded_seq_lens_buffer = torch.zeros(
+                (scheduler_config.max_num_batched_tokens,),
+                dtype=torch.int32,
+                device=self.device,
             )
-            .to(torch.int32)
-            .to(self.device)
-        )
-        return DeepseekV32IndexerPrefillChunkMetadata(
-            cu_seqlen_ks=cu_seqlen_ks,
-            cu_seqlen_ke=cu_seqlen_ke,
-            cu_seq_lens=cu_seq_lens,
-            token_to_seq=token_to_seq,
-            total_seq_lens=total_seq_lens,
-            block_table=block_table[reqs_start:reqs_end],
-            token_start=token_start,
-            token_end=token_end,
-            num_reqs=reqs_end - reqs_start,
-        )
+
+    def _prepare_decode_tensors(
+        self,
+        seq_lens: torch.Tensor,
+        block_table: torch.Tensor,
+        decode_lens: torch.Tensor,
+        decode_lens_cpu: torch.Tensor,
+        query_start_loc: torch.Tensor,
+        num_decodes: int,
+        num_decode_tokens: int,
+        use_native: bool,
+        next_n: int,
+        max_decode_len: int,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, int, bool]:
+        """Expand seq_lens/block_table/decode_lens for the decode kernels.
+
+        Flatten path (not use_native, max_decode_len > 1):
+          Each multi-token decode request is expanded into individual
+          single-token entries so the kernel always sees next_n=1.
+
+        Native path (use_native or max_decode_len == 1):
+          Plain decode or spec-decode with 2D per-token context lengths.
+
+        Returns (seq_lens, block_table, decode_lens, batch_size, requires_padding).
+        seq_lens is 1D (batch_size,) for flatten/plain, 2D (B, max_decode_len)
+        for native MTP.
+        """
+        min_decode_len = int(decode_lens_cpu.min().item())
+        if not use_native and max_decode_len > 1:
+            assert self.decode_seq_lens_buffer.dim() == 1
+            if min_decode_len == max_decode_len:
+                # Uniform decode lengths.
+                num_decode_tokens = num_decodes * max_decode_len
+                _prepare_uniform_decode_kernel[(num_decode_tokens,)](
+                    seq_lens,
+                    self.decode_seq_lens_buffer,
+                    block_table,
+                    block_table.stride(0),
+                    self.expanded_block_table_buffer,
+                    self.expanded_block_table_buffer.stride(0),
+                    self.decode_lens_buffer,
+                    max_decode_len,
+                    BLOCK_SIZE=1024,
+                )
+                self.decode_seq_lens_buffer[num_decode_tokens:] = 0
+                seq_lens = self.decode_seq_lens_buffer[:num_decode_tokens]
+                block_table = self.expanded_block_table_buffer[:num_decode_tokens]
+                decode_lens = self.decode_lens_buffer[:num_decode_tokens]
+                return seq_lens, block_table, decode_lens, num_decode_tokens, False
+            else:
+                # Variable decode lengths.
+                # Assume 4 requests with seq_lens [10, 7, 12, 0] (the final req is
+                # padding) and decode_lens [3, 1, 4, 0] in the below example comments.
+                # The context lengths are therefore
+                # [10-3, 7-1, 12-4, 0-0] = [7, 6, 8, 0].
+
+                # 3 + 1 + 4 + 0 = 8
+                actual_expanded = int(decode_lens_cpu.sum().item())
+
+                # Fuse expanded_base and expanded_starts into a single
+                # repeat_interleave:
+                # seq_len_i = (context_start[b] - query_start_loc[b]) + arange[i] + 1
+                # where context_start[b] = seq_lens[b] - decode_lens[b].
+                # Example: offsets = [7-0, 6-3, 8-4, 0-8] = [7, 3, 4, -8]
+                # expanded_offsets  = [7, 7, 7, 3, 4, 4, 4, 4]
+                # result            = [8, 9, 10, 7, 9, 10, 11, 12]
+                expanded_offsets = torch.repeat_interleave(
+                    seq_lens - decode_lens - query_start_loc,
+                    decode_lens,
+                    output_size=actual_expanded,
+                )
+
+                # [8, 9, 10, 7, 9, 10, 11, 12, ...] where ... is unused buffer space
+                self.decode_seq_lens_buffer[:actual_expanded] = (
+                    expanded_offsets + self.arange_buffer[:actual_expanded] + 1
+                )
+                self.decode_seq_lens_buffer[actual_expanded:] = 0
+                seq_lens = self.decode_seq_lens_buffer[:num_decode_tokens]
+
+                # Give each of the flattened entries the same block table row as the
+                # original request.
+                self.expanded_block_table_buffer[:actual_expanded] = (
+                    torch.repeat_interleave(
+                        block_table, decode_lens, dim=0, output_size=actual_expanded
+                    )
+                )
+                if actual_expanded < num_decode_tokens:
+                    self.expanded_block_table_buffer[
+                        actual_expanded:num_decode_tokens, 0
+                    ] = 0
+                block_table = self.expanded_block_table_buffer[:num_decode_tokens]
+
+                # All reqs now have decode_len=1
+                self.decode_lens_buffer[:num_decode_tokens] = 1
+                decode_lens = self.decode_lens_buffer[:num_decode_tokens]
+                return seq_lens, block_table, decode_lens, num_decode_tokens, False
+        else:
+            # Native path: plain decode (next_n==1) or spec decode
+            # with 2D per-token context lengths (next_n > 1).
+            #
+            # When decode_lens are not truly uniform (e.g. some requests have
+            # decode_len < next_n due to padding or short prefills), the simple
+            # reshape in sparse_attn_indexer won't work. Use pack_seq_triton
+            # (requires_padding) instead.
+            requires_padding = min_decode_len != max_decode_len
+            if use_native and next_n > 1:
+                assert self.decode_seq_lens_buffer.dim() == 1
+                # (B, max_decode_len): token j attends to
+                # L - max_decode_len + j + 1 KV tokens.
+                seq_lens_buffer = self.decode_seq_lens_buffer[
+                    : num_decodes * max_decode_len
+                ].view(num_decodes, max_decode_len)
+                seq_lens_buffer[:] = (
+                    seq_lens.unsqueeze(1)
+                    - max_decode_len
+                    + 1
+                    + self.offsets_buffer[:max_decode_len]
+                )
+                seq_lens = seq_lens_buffer
+            return seq_lens, block_table, decode_lens, num_decodes, requires_padding
 
     def build(
         self,
@@ -318,8 +472,12 @@ def build(
     ) -> DeepseekV32IndexerMetadata:
         num_reqs = common_attn_metadata.num_reqs
         num_tokens = common_attn_metadata.num_actual_tokens
-
+        query_start_loc = common_attn_metadata.query_start_loc
         query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
+        seq_lens = common_attn_metadata.seq_lens
+        slot_mapping = common_attn_metadata.slot_mapping
+        block_table = common_attn_metadata.block_table_tensor
+
         num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = (
             split_decodes_and_prefills(
                 common_attn_metadata,
@@ -331,26 +489,67 @@ def build(
         assert num_decodes + num_prefills == num_reqs
         assert num_decode_tokens + num_prefill_tokens == num_tokens
 
+        compressed_slot_mapping = slot_mapping
+        compressed_seq_lens = seq_lens
+        if self.compress_ratio > 1:
+            compressed_slot_mapping = get_compressed_slot_mapping(
+                num_tokens,
+                query_start_loc,
+                seq_lens,
+                block_table,
+                self.kv_cache_spec.storage_block_size,
+                self.compress_ratio,
+                out=self.compressed_slot_mapping_buffer,
+            )
+            compressed_seq_lens = seq_lens // self.compress_ratio
+
         prefill_metadata = None
         if num_prefills > 0:
-            chunk_seq_ids = split_prefill_chunks(
-                common_attn_metadata.seq_lens_cpu[num_decodes:],
+            # This CPU value is an upper bound for async-spec extend rows.  It
+            # is safe for chunking/allocation because CUDA metadata below is
+            # built from exact device seq_lens and gather ignores the tail.
+            assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+            seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+            compressed_seq_lens_cpu = (
+                seq_lens_cpu // self.compress_ratio
+                if self.compress_ratio > 1
+                else seq_lens_cpu
+            )
+            prefill_query_lens_cpu = torch.diff(
+                query_start_loc_cpu[num_decodes : num_decodes + num_prefills + 1]
+            )
+            max_logits_bytes = envs.VLLM_SPARSE_INDEXER_MAX_LOGITS_MB * 1024 * 1024
+            # Upper bound is exact for prefill rows (the `[num_decodes:]`
+            # slice below).
+            assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+            seq_lens_cpu = common_attn_metadata.seq_lens_cpu_upper_bound
+            chunk_specs = split_indexer_prefill_chunks(
+                compressed_seq_lens_cpu[num_decodes:],
+                prefill_query_lens_cpu,
                 self.max_prefill_buffer_size,
+                max_logits_bytes,
                 request_offset=num_decodes,
             )
-            chunks = [
-                self.build_one_prefill_chunk(
-                    reqs_start,
-                    reqs_end,
+
+            chunks = []
+            for req_slice, query_slice in chunk_specs:
+                metadata = build_prefill_chunk_metadata(
+                    req_slice.start,
+                    req_slice.stop,
+                    query_start_loc,
                     query_start_loc_cpu,
-                    common_attn_metadata.seq_lens_cpu,
+                    seq_lens,
+                    compressed_seq_lens,
+                    compressed_seq_lens_cpu,
                     common_attn_metadata.block_table_tensor,
+                    self.compress_ratio,
+                    query_slice=query_slice,
+                    skip_kv_gather=query_slice.start > 0,
                 )
-                for reqs_start, reqs_end in chunk_seq_ids
-            ]
-            prefill_metadata = DeepseekV32IndexerPrefillMetadata(
-                chunks=chunks,
-            )
+                # Skip when total_seq_lens is 0 (i.e., no compressed token).
+                if metadata is not None:
+                    chunks.append(metadata)
+            prefill_metadata = DeepseekV32IndexerPrefillMetadata(chunks)
 
         decode_metadata = None
         if num_decodes > 0:
@@ -366,116 +565,69 @@ def build(
             seq_lens = common_attn_metadata.seq_lens[:num_decodes]
             block_table = common_attn_metadata.block_table_tensor[:num_decodes, ...]
 
-            # Padded CUDA graph requests have block_table entries of -1.
-            # Clamp to 0 to prevent OOB access in the DeepGEMM kernel.
-            # This is safe because padded requests have seq_lens=0, so the
-            # kernel produces no meaningful output for those rows.
-            block_table.clamp_(min=0)
-
             max_decode_len = int(decode_lens_cpu.max().item())
             next_n = 1 + self.num_speculative_tokens
-            use_native = not self.use_flattening and max_decode_len == next_n
-
-            if use_native and next_n > 1:
-                offsets = self.offsets_buffer
-                batch_size = num_decodes
-            elif max_decode_len > 1:
-                # Flatten multi-token decode requests into single-token
-                # batch entries, expanding seq_lens and block tables so
-                # the kernel always sees next_n=1.
-
-                # Also handles the edge case where use_flattening=False
-                # but max_decode_len != next_n (e.g. a batch containing some
-                # short prefills (q_len < next_n) and no true decodes).
-
-                # Assume 4 requests with seq_lens [10, 7, 12, 0] (the final req is
-                # padding) and decode_lens [3, 1, 4, 0] in the below example comments.
-                # The context lengths are therefore
-                # [10-3, 7-1, 12-4, 0-0] = [7, 6, 8, 0].
-
-                # 3 + 1 + 4 + 0 = 8
-                actual_expanded = int(decode_lens_cpu.sum().item())
-
-                # [7, 6, 8, 0] -> [7, 7, 7, 6, 8, 8, 8, 8]
-                expanded_base = torch.repeat_interleave(
-                    seq_lens - decode_lens, decode_lens, output_size=actual_expanded
-                )
-
-                # [0, 3, 4, 8] -> [0, 0, 0, 3, 4, 4, 4, 4]
-                expanded_starts = torch.repeat_interleave(
-                    common_attn_metadata.query_start_loc[:num_decodes],
-                    decode_lens,
-                    output_size=actual_expanded,
-                )
-
-                # [0, 1, 2, 0, 0, 1, 2, 3]
-                positions_within = (
-                    self.arange_buffer[:actual_expanded] - expanded_starts
+            use_native = not self.use_flattening and max_decode_len <= next_n
+
+            seq_lens, block_table, decode_lens, batch_size, requires_padding = (
+                self._prepare_decode_tensors(
+                    seq_lens=seq_lens,
+                    block_table=block_table,
+                    decode_lens=decode_lens,
+                    decode_lens_cpu=decode_lens_cpu,
+                    query_start_loc=common_attn_metadata.query_start_loc[:num_decodes],
+                    num_decodes=num_decodes,
+                    num_decode_tokens=num_decode_tokens,
+                    use_native=use_native,
+                    next_n=next_n,
+                    max_decode_len=max_decode_len,
                 )
+            )
 
-                # [8, 9, 10, 7, 9, 10, 11, 12, ...] where ... is unused buffer space
-                self.expanded_seq_lens_buffer[:actual_expanded] = (
-                    expanded_base + positions_within + 1
+            # For DeepseekV4 (compress_ratio > 1), the indexer KV cache stores
+            # compressed tokens. Convert uncompressed seq_lens to compressed.
+            if self.compress_ratio > 1:
+                # True iff seq_lens aliases decode_seq_lens_buffer (flatten or
+                # native wrote it); False iff it aliases common_attn_metadata.
+                seq_lens_is_local_view = (use_native and next_n > 1) or (
+                    not use_native and max_decode_len > 1
                 )
-                self.expanded_seq_lens_buffer[actual_expanded:] = 0
-                seq_lens = self.expanded_seq_lens_buffer[:num_decode_tokens]
-
-                # Give each of the flattened entries the same block table row as the
-                # original request.
-                self.expanded_block_table_buffer[:actual_expanded] = (
-                    torch.repeat_interleave(
-                        block_table, decode_lens, dim=0, output_size=actual_expanded
+                if seq_lens_is_local_view:
+                    seq_lens //= self.compress_ratio
+                else:
+                    # Copy to avoid mutating shared state; keeps CG address stable.
+                    self.expanded_seq_lens_buffer[:num_decodes] = (
+                        seq_lens // self.compress_ratio
                     )
-                )
-                if actual_expanded < num_decode_tokens:
-                    self.expanded_block_table_buffer[
-                        actual_expanded:num_decode_tokens, 0
-                    ] = 0
-                block_table = self.expanded_block_table_buffer[:num_decode_tokens]
+                    self.expanded_seq_lens_buffer[num_decodes:num_decode_tokens] = 0
+                    seq_lens = self.expanded_seq_lens_buffer[:num_decode_tokens]
 
-                # All reqs now have decode_len=1
-                self.decode_lens_buffer[:num_decode_tokens] = 1
-                decode_lens = self.decode_lens_buffer[:num_decode_tokens]
-                offsets = None
-                batch_size = num_decode_tokens
-            else:
-                offsets = None
-                batch_size = num_decodes
+            # Non-MTP: deep_gemm paged MQA logits requires 2D context_lens
+            # (csrc/apis/attention.hpp). Unsqueeze to (B, 1) so downstream
+            # kernels see the same (B, next_n) layout as the MTP path.
+            if seq_lens.dim() == 1:
+                seq_lens = seq_lens.unsqueeze(-1)
 
             # DeepGEMM is required for the paged MQA logits on CUDA devices
             if current_platform.is_cuda() and has_deep_gemm():
                 self.scheduler_metadata_buffer[:] = get_paged_mqa_logits_metadata(
                     seq_lens,
-                    self.kv_cache_spec.block_size,
+                    self.kv_cache_spec.storage_block_size,
                     self.num_sms,
                 )
 
-            # Decide which top-k kernel to use based on batch size and sequence length
-            # Decision logic based on micro-benchmark results:
-            # - large_context_topk wins for batch <= 128 and seq_len > 8K
-            # - top_k_per_row_decode wins for batch > 128 or seq_len <= 8K
-            _is_large_context = common_attn_metadata.max_seq_len > 8192
-            use_large_context_topk = batch_size <= 128 and _is_large_context
-
             decode_metadata = DeepSeekV32IndexerDecodeMetadata(
                 block_table=block_table,
                 seq_lens=seq_lens,
                 decode_lens=decode_lens,
-                requires_padding=False,
+                requires_padding=requires_padding,
                 schedule_metadata=self.scheduler_metadata_buffer,
-                use_large_context_topk=use_large_context_topk,
-                offsets=offsets,
             )
 
         attn_metadata = DeepseekV32IndexerMetadata(
             seq_lens=common_attn_metadata.seq_lens,
-            num_reqs=common_attn_metadata.num_reqs,
-            max_query_len=common_attn_metadata.max_query_len,
             max_seq_len=common_attn_metadata.max_seq_len,
-            num_actual_tokens=common_attn_metadata.num_actual_tokens,
-            query_start_loc=common_attn_metadata.query_start_loc,
-            slot_mapping=common_attn_metadata.slot_mapping,
-            head_dim=128,
+            slot_mapping=compressed_slot_mapping,
             num_decodes=num_decodes,
             num_decode_tokens=num_decode_tokens,
             num_prefills=num_prefills,
@@ -484,6 +636,139 @@ def build(
             decode=decode_metadata,
         )
 
-        # if get_tensor_model_parallel_rank() == 0:
-        #     logger.info(f"attn_metadata: {attn_metadata}")
         return attn_metadata
+
+
+def build_prefill_chunk_metadata(
+    start_idx: int,
+    end_idx: int,
+    query_start_loc: torch.Tensor,
+    query_start_loc_cpu: torch.Tensor,
+    uncompressed_seq_lens: torch.Tensor,
+    compressed_seq_lens: torch.Tensor,
+    compressed_seq_lens_cpu: torch.Tensor,
+    block_table: torch.Tensor,
+    compress_ratio: int,
+    query_slice: slice | None = None,
+    skip_kv_gather: bool = False,
+) -> DeepseekV32IndexerPrefillChunkMetadata | None:
+    total_seq_lens = compressed_seq_lens_cpu[start_idx:end_idx].sum().item()
+    if total_seq_lens == 0:
+        return None
+
+    num_reqs = end_idx - start_idx
+    device = block_table.device
+    token_to_seq = torch.empty(total_seq_lens, dtype=torch.int32, device=device)
+
+    cu_seq_lens = torch.empty(num_reqs + 1, dtype=torch.int32, device=device)
+    # Assigning to slice avoids cpu sync.
+    cu_seq_lens[:1] = 0
+    torch.cumsum(compressed_seq_lens[start_idx:end_idx], dim=0, out=cu_seq_lens[1:])
+
+    query_start_loc = (
+        query_start_loc[start_idx : end_idx + 1] - query_start_loc[start_idx]
+    )
+
+    total_query_len = int(
+        (query_start_loc_cpu[end_idx] - query_start_loc_cpu[start_idx]).item()
+    )
+    if query_slice is not None:
+        qs_start = query_slice.start
+        qs_stop = query_slice.stop
+    else:
+        qs_start = 0
+        qs_stop = total_query_len
+    output_query_len = qs_stop - qs_start
+
+    cu_seq_len_ks = torch.empty(output_query_len, dtype=torch.int32, device=device)
+    cu_seq_len_ke = torch.empty(output_query_len, dtype=torch.int32, device=device)
+
+    _build_prefill_chunk_metadata_kernel[(num_reqs,)](
+        query_start_loc,
+        uncompressed_seq_lens[start_idx:end_idx],
+        cu_seq_lens,
+        token_to_seq,
+        cu_seq_len_ks,
+        cu_seq_len_ke,
+        qs_start,
+        qs_stop,
+        BLOCK_SIZE=1024,
+        COMPRESS_RATIO=compress_ratio,
+    )
+
+    token_start = query_start_loc_cpu[start_idx].item()
+    if query_slice is not None:
+        token_end = token_start + qs_stop
+        token_start = token_start + qs_start
+        skip_kv_gather = skip_kv_gather or qs_start > 0
+    else:
+        token_end = query_start_loc_cpu[end_idx].item()
+
+    return DeepseekV32IndexerPrefillChunkMetadata(
+        cu_seqlen_ks=cu_seq_len_ks,
+        cu_seqlen_ke=cu_seq_len_ke,
+        cu_seq_lens=cu_seq_lens,
+        token_to_seq=token_to_seq,
+        total_seq_lens=total_seq_lens,
+        block_table=block_table[start_idx:end_idx],
+        token_start=token_start,
+        token_end=token_end,
+        num_reqs=num_reqs,
+        skip_kv_gather=skip_kv_gather,
+    )
+
+
+@triton.jit
+def _build_prefill_chunk_metadata_kernel(
+    # Inputs
+    query_start_loc_ptr,
+    uncompressed_seq_lens_ptr,
+    cu_compressed_seq_lens_ptr,
+    # Outputs
+    token_to_seq_ptr,
+    cu_compressed_seq_len_ks_ptr,
+    cu_compressed_seq_len_ke_ptr,
+    query_slice_start,
+    query_slice_stop,
+    BLOCK_SIZE: tl.constexpr,
+    COMPRESS_RATIO: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+
+    query_start = tl.load(query_start_loc_ptr + batch_idx)
+    query_end = tl.load(query_start_loc_ptr + batch_idx + 1)
+    query_len = query_end - query_start
+
+    seq_start = tl.load(cu_compressed_seq_lens_ptr + batch_idx)
+    seq_end = tl.load(cu_compressed_seq_lens_ptr + batch_idx + 1)
+    compressed_seq_len = seq_end - seq_start
+
+    uncompressed_seq_len = tl.load(uncompressed_seq_lens_ptr + batch_idx)
+    start_pos = uncompressed_seq_len - query_len
+
+    for i in range(0, query_len, BLOCK_SIZE):
+        offset = i + tl.arange(0, BLOCK_SIZE)
+        abs_pos = query_start + offset
+        mask = (
+            (offset < query_len)
+            & (abs_pos >= query_slice_start)
+            & (abs_pos < query_slice_stop)
+        )
+        out_pos = abs_pos - query_slice_start
+
+        # Compute cu_seq_len_ks
+        tl.store(cu_compressed_seq_len_ks_ptr + out_pos, seq_start, mask=mask)
+
+        # Compute cu_seq_len_ke
+        seq_len_per_token = (start_pos + 1 + offset) // COMPRESS_RATIO
+        tl.store(
+            cu_compressed_seq_len_ke_ptr + out_pos,
+            seq_start + seq_len_per_token,
+            mask=mask,
+        )
+
+    # Compute token_to_seq
+    for i in range(0, compressed_seq_len, BLOCK_SIZE):
+        offset = i + tl.arange(0, BLOCK_SIZE)
+        mask = offset < compressed_seq_len
+        tl.store(token_to_seq_ptr + seq_start + offset, batch_idx, mask=mask)
diff --git a/vllm/v1/attention/backends/mla/prefill/__init__.py b/vllm/v1/attention/backends/mla/prefill/__init__.py
new file mode 100644
index 000000000000..ae5b7ae82598
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+from vllm.v1.attention.backends.mla.prefill.registry import MLAPrefillBackendEnum
+from vllm.v1.attention.backends.mla.prefill.selector import get_mla_prefill_backend
+
+__all__ = [
+    "MLAPrefillBackend",
+    "MLAPrefillBackendEnum",
+    "get_mla_prefill_backend",
+]
diff --git a/vllm/v1/attention/backends/mla/prefill/base.py b/vllm/v1/attention/backends/mla/prefill/base.py
new file mode 100644
index 000000000000..91d668826fd9
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/base.py
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Abstract base class for MLA prefill backends."""
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, ClassVar
+
+import torch
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.model_executor.layers.attention.mla_attention import (
+        MLACommonPrefillMetadata,
+    )
+    from vllm.platforms.interface import DeviceCapability
+    from vllm.v1.attention.backends.mla.prefill.selector import (
+        MLAPrefillSelectorConfig,
+    )
+
+
+class MLAPrefillBackend(ABC):
+    """Abstract base class for MLA prefill backends."""
+
+    supported_dtypes: ClassVar[list[torch.dtype]] = [
+        torch.float16,
+        torch.bfloat16,
+    ]
+    requires_r1_mla_dimensions: ClassVar[bool] = False
+
+    @staticmethod
+    @abstractmethod
+    def get_name() -> str:
+        raise NotImplementedError
+
+    @classmethod
+    def supports_compute_capability(cls, device_capability: "DeviceCapability") -> bool:
+        return True
+
+    @classmethod
+    def supports_dtype(cls, dtype: torch.dtype) -> bool:
+        return dtype in cls.supported_dtypes
+
+    @classmethod
+    def is_available(cls) -> bool:
+        return True
+
+    @classmethod
+    def validate_configuration(
+        cls,
+        device_capability: "DeviceCapability",
+        selector_config: "MLAPrefillSelectorConfig",
+    ) -> list[str]:
+        invalid_reasons: list[str] = []
+
+        if not cls.supports_compute_capability(device_capability):
+            invalid_reasons.append(
+                f"compute capability {device_capability.major}."
+                f"{device_capability.minor} not supported"
+            )
+
+        if not cls.supports_dtype(selector_config.dtype):
+            invalid_reasons.append(f"dtype {selector_config.dtype} not supported")
+
+        if not cls.is_available():
+            invalid_reasons.append("required dependencies not available")
+
+        if cls.requires_r1_mla_dimensions and not selector_config.is_r1_compatible:
+            invalid_reasons.append(
+                "model does not have DeepSeek R1 MLA dimensions "
+                "(qk_nope_head_dim=128, qk_rope_head_dim=64, v_head_dim=128)"
+            )
+
+        return invalid_reasons
+
+    def __init__(
+        self,
+        num_heads: int,
+        scale: float,
+        kv_lora_rank: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        vllm_config: "VllmConfig",
+    ) -> None:
+        self.num_heads = num_heads
+        self.scale = scale
+        self.kv_lora_rank = kv_lora_rank
+        self.qk_nope_head_dim = qk_nope_head_dim
+        self.qk_rope_head_dim = qk_rope_head_dim
+        self.v_head_dim = v_head_dim
+        self.vllm_config = vllm_config
+
+    def prepare_metadata(  # noqa: B027
+        self,
+        prefill_metadata: "MLACommonPrefillMetadata",
+    ) -> None:
+        """Prepare backend-specific metadata before the forward pass.
+
+        Called by the metadata builder after constructing the prefill metadata.
+        """
+        self._prefill_metadata = prefill_metadata
+
+    @abstractmethod
+    def run_prefill_new_tokens(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def run_prefill_context_chunk(
+        self,
+        chunk_idx: int,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        raise NotImplementedError
diff --git a/vllm/v1/attention/backends/mla/prefill/flash_attn.py b/vllm/v1/attention/backends/mla/prefill/flash_attn.py
new file mode 100644
index 000000000000..029bd8ec9560
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/flash_attn.py
@@ -0,0 +1,176 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""FlashAttention backend for MLA prefill."""
+
+import functools
+from typing import TYPE_CHECKING
+
+import torch
+
+import vllm.envs as envs
+from vllm.platforms import current_platform
+from vllm.v1.attention.backends.fa_utils import (
+    get_flash_attn_version,
+    is_flash_attn_varlen_func_available,
+)
+from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+
+if is_flash_attn_varlen_func_available():
+    from vllm.v1.attention.backends.fa_utils import flash_attn_varlen_func
+else:
+    flash_attn_varlen_func = None  # type: ignore[assignment]
+
+
+class FlashAttnPrefillBackend(MLAPrefillBackend):
+    """FlashAttention backend for MLA prefill."""
+
+    @staticmethod
+    def get_name() -> str:
+        return "FLASH_ATTN"
+
+    @classmethod
+    def is_available(cls) -> bool:
+        return is_flash_attn_varlen_func_available()
+
+    def __init__(
+        self,
+        num_heads: int,
+        scale: float,
+        kv_lora_rank: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        vllm_config: "VllmConfig",
+    ) -> None:
+        super().__init__(
+            num_heads=num_heads,
+            scale=scale,
+            kv_lora_rank=kv_lora_rank,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            vllm_config=vllm_config,
+        )
+
+        # Handle the differences between the flash_attn_varlen from
+        # flash_attn and the one from vllm_flash_attn
+        assert flash_attn_varlen_func is not None, (
+            "FlashAttnPrefillBackend requires flash_attn_varlen_func. "
+            "Ensure FlashAttnPrefillBackend.is_available() is checked first."
+        )
+        qk_head_dim = qk_nope_head_dim + qk_rope_head_dim
+        self.flash_attn_varlen_func = flash_attn_varlen_func
+        self.vllm_flash_attn_version = get_flash_attn_version(head_size=qk_head_dim)
+        if self.vllm_flash_attn_version is not None:
+            self.flash_attn_varlen_func = functools.partial(
+                flash_attn_varlen_func, fa_version=self.vllm_flash_attn_version
+            )
+
+        # Determine if we need to pad V
+        # For MLA the v head dim is smaller than qk head dim so we pad out
+        # v with 0s to match the qk head dim for attention backends that do
+        # not support different headdims.
+        # FA3 on Hopper (SM90) and FA4 natively handle diff headdims.
+        device_capability = current_platform.get_device_capability()
+        self.requires_v_padding = self.vllm_flash_attn_version is None or not (
+            (
+                self.vllm_flash_attn_version == 3
+                and device_capability is not None
+                and device_capability[0] == 9
+            )
+            or self.vllm_flash_attn_version == 4
+        )
+
+        # Track whether we're using vllm's FA or upstream (for ROCm)
+        self._is_vllm_fa = current_platform.is_cuda() or current_platform.is_xpu()
+
+    def _flash_attn_varlen_diff_headdims(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool = False,
+        softmax_scale: float | None = None,
+        **kwargs,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        maybe_padded_v = v
+        if self.requires_v_padding:
+            maybe_padded_v = torch.nn.functional.pad(
+                v, [0, q.shape[-1] - v.shape[-1]], value=0
+            )
+
+        if self._is_vllm_fa:
+            kwargs["return_softmax_lse"] = return_softmax_lse
+        else:
+            # ROCm leverages the upstream flash_attn, which takes a parameter
+            # called "return_attn_probs" instead of return_softmax_lse
+            kwargs["return_attn_probs"] = return_softmax_lse
+        if envs.VLLM_BATCH_INVARIANT:
+            kwargs["num_splits"] = 1
+
+        attn_out = self.flash_attn_varlen_func(
+            q=q,
+            k=k,
+            v=maybe_padded_v,
+            softmax_scale=softmax_scale,
+            **kwargs,
+        )
+
+        # Unpack the output if there are multiple results
+        lse = None
+        if isinstance(attn_out, tuple):
+            attn_out, lse = attn_out[0], attn_out[1]
+
+        # Unpad output back to v_head_dim if we padded V
+        if self.requires_v_padding:
+            attn_out = attn_out[..., : v.shape[-1]]
+
+        # Remain consistent with old `flash_attn_varlen_func` where there
+        # is only one output tensor if `return_softmax_lse` is False.
+        if return_softmax_lse:
+            return attn_out, lse
+        return attn_out
+
+    def run_prefill_new_tokens(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        return self._flash_attn_varlen_diff_headdims(
+            q=q,
+            k=k,
+            v=v,
+            cu_seqlens_q=self._prefill_metadata.query_start_loc,
+            cu_seqlens_k=self._prefill_metadata.query_start_loc,
+            max_seqlen_q=self._prefill_metadata.max_query_len,
+            max_seqlen_k=self._prefill_metadata.max_query_len,
+            softmax_scale=self.scale,
+            causal=True,
+            return_softmax_lse=return_softmax_lse,
+        )
+
+    def run_prefill_context_chunk(
+        self,
+        chunk_idx: int,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        assert self._prefill_metadata.chunked_context is not None
+        return self._flash_attn_varlen_diff_headdims(
+            q=q,
+            k=k,
+            v=v,
+            cu_seqlens_q=self._prefill_metadata.query_start_loc,
+            cu_seqlens_k=self._prefill_metadata.chunked_context.cu_seq_lens[chunk_idx],
+            max_seqlen_q=self._prefill_metadata.max_query_len,
+            max_seqlen_k=self._prefill_metadata.chunked_context.max_seq_lens[chunk_idx],
+            softmax_scale=self.scale,
+            causal=False,  # Context is unmasked
+            return_softmax_lse=True,
+        )
diff --git a/vllm/v1/attention/backends/mla/prefill/flashinfer.py b/vllm/v1/attention/backends/mla/prefill/flashinfer.py
new file mode 100644
index 000000000000..0204f6ee1a02
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/flashinfer.py
@@ -0,0 +1,221 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""FlashInfer backend for MLA prefill."""
+
+from typing import TYPE_CHECKING
+
+import torch
+
+import vllm.envs as envs
+from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+from vllm.v1.attention.backends.utils import (
+    PerLayerParameters,
+    get_per_layer_parameters,
+    infer_global_hyperparameters,
+)
+from vllm.v1.worker.workspace import current_workspace_manager
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.model_executor.layers.attention.mla_attention import (
+        MLACommonPrefillMetadata,
+    )
+    from vllm.platforms.interface import DeviceCapability
+
+try:
+    from flashinfer import BatchPrefillWithRaggedKVCacheWrapper
+except ImportError:
+    BatchPrefillWithRaggedKVCacheWrapper = object  # type: ignore[misc,assignment]
+
+_DEFAULT_NUM_CHUNKS = 32
+
+
+class FlashInferPrefillBackend(MLAPrefillBackend):
+    """FlashInfer backend for MLA prefill."""
+
+    requires_r1_mla_dimensions = True
+
+    @staticmethod
+    def get_name() -> str:
+        return "FLASHINFER"
+
+    @classmethod
+    def supports_compute_capability(cls, device_capability: "DeviceCapability") -> bool:
+        return device_capability.major == 10
+
+    @classmethod
+    def is_available(cls) -> bool:
+        try:
+            from flashinfer import (
+                BatchPrefillWithRaggedKVCacheWrapper,  # noqa: F401
+            )
+
+            return True
+        except ImportError:
+            return False
+
+    def __init__(
+        self,
+        num_heads: int,
+        scale: float,
+        kv_lora_rank: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        vllm_config: "VllmConfig",
+    ) -> None:
+        super().__init__(
+            num_heads=num_heads,
+            scale=scale,
+            kv_lora_rank=kv_lora_rank,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            vllm_config=vllm_config,
+        )
+
+        self._prefill_main: BatchPrefillWithRaggedKVCacheWrapper | None = None
+        self._prefill_chunks: list[BatchPrefillWithRaggedKVCacheWrapper] = []
+        self._global_hyperparameters: PerLayerParameters | None = None
+        (self._workspace_buffer,) = current_workspace_manager().get_simultaneous(
+            ((envs.VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE,), torch.uint8),
+        )
+
+    def _ensure_chunks(
+        self,
+        num_chunks: int,
+        workspace_buffer: torch.Tensor,
+    ) -> None:
+        if len(self._prefill_chunks) < num_chunks:
+            for _ in range(len(self._prefill_chunks), num_chunks):
+                self._prefill_chunks.append(
+                    BatchPrefillWithRaggedKVCacheWrapper(
+                        workspace_buffer, "NHD", backend="cutlass"
+                    )
+                )
+
+    def _resolve_global_hyperparameters(self) -> PerLayerParameters:
+        if self._global_hyperparameters is not None:
+            return self._global_hyperparameters
+
+        from vllm.model_executor.layers.attention.mla_attention import (
+            MLAAttention,
+            MLACommonImpl,
+        )
+
+        forward_context = self.vllm_config.compilation_config.static_forward_context
+        layer_names = [
+            name
+            for name, layer in forward_context.items()
+            if isinstance(layer, MLAAttention)
+        ]
+
+        self._global_hyperparameters = infer_global_hyperparameters(
+            get_per_layer_parameters(
+                self.vllm_config,
+                layer_names,
+                MLACommonImpl,  # type: ignore[type-abstract]
+            )
+        )
+        return self._global_hyperparameters
+
+    def prepare_metadata(
+        self,
+        prefill_metadata: "MLACommonPrefillMetadata",
+    ) -> None:
+        global_hyperparameters = self._resolve_global_hyperparameters()
+        qo_indptr = prefill_metadata.query_start_loc
+        has_context = prefill_metadata.chunked_context is not None
+        if self._prefill_main is None:
+            self._prefill_main = BatchPrefillWithRaggedKVCacheWrapper(
+                self._workspace_buffer, "NHD", backend="cutlass"
+            )
+            self._ensure_chunks(_DEFAULT_NUM_CHUNKS, self._workspace_buffer)
+
+        if has_context:
+            chunked_context = prefill_metadata.chunked_context
+            assert chunked_context is not None
+            num_chunks = chunked_context.cu_seq_lens.shape[0]
+            self._ensure_chunks(num_chunks, self._workspace_buffer)
+
+        num_qo_heads = self.num_heads
+        num_kv_heads = num_qo_heads
+
+        head_dim_qk = self.qk_nope_head_dim + self.qk_rope_head_dim
+        head_dim_vo = self.v_head_dim
+        kv_indptr = qo_indptr.clone()
+
+        assert self._prefill_main is not None
+        self._prefill_main.plan(
+            qo_indptr=qo_indptr,
+            kv_indptr=kv_indptr,
+            num_qo_heads=num_qo_heads,
+            num_kv_heads=num_kv_heads,
+            head_dim_qk=head_dim_qk,
+            head_dim_vo=head_dim_vo,
+            causal=True,
+            sm_scale=global_hyperparameters.sm_scale,
+            window_left=global_hyperparameters.window_left,
+            logits_soft_cap=global_hyperparameters.logits_soft_cap,
+            q_data_type=prefill_metadata.q_data_type,
+            o_data_type=prefill_metadata.output_dtype,
+        )
+
+        if has_context:
+            chunked_context = prefill_metadata.chunked_context
+            assert chunked_context is not None
+            for i in range(num_chunks):
+                kv_indptr_chunk = chunked_context.cu_seq_lens[i]
+
+                self._prefill_chunks[i].plan(
+                    qo_indptr=qo_indptr,
+                    kv_indptr=kv_indptr_chunk,
+                    num_qo_heads=num_qo_heads,
+                    num_kv_heads=num_kv_heads,
+                    head_dim_qk=head_dim_qk,
+                    head_dim_vo=head_dim_vo,
+                    causal=False,
+                    sm_scale=global_hyperparameters.sm_scale,
+                    window_left=global_hyperparameters.window_left,
+                    logits_soft_cap=global_hyperparameters.logits_soft_cap,
+                    q_data_type=prefill_metadata.q_data_type,
+                    o_data_type=prefill_metadata.output_dtype,
+                )
+
+    def run_prefill_new_tokens(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        assert self._prefill_main is not None
+
+        ret = self._prefill_main.run(
+            q=q,
+            k=k,
+            v=v,
+            return_lse=return_softmax_lse,
+        )
+
+        if isinstance(ret, tuple):
+            # Convert from (q_len, num_heads) to (num_heads, q_len)
+            return ret[0], ret[1].transpose(0, 1).contiguous()
+        return ret
+
+    def run_prefill_context_chunk(
+        self,
+        chunk_idx: int,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        attn_out, lse = self._prefill_chunks[chunk_idx].run(
+            q=q,
+            k=k,
+            v=v,
+            return_lse=True,
+        )
+
+        # Convert from (q_len, num_heads) to (num_heads, q_len)
+        return attn_out, lse.transpose(0, 1).contiguous()
diff --git a/vllm/v1/attention/backends/mla/prefill/registry.py b/vllm/v1/attention/backends/mla/prefill/registry.py
new file mode 100644
index 000000000000..e78a9a3095a9
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/registry.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Registry for MLA prefill backends.
+
+This module provides an enumeration of all available MLA prefill backends
+and utilities for loading them.
+"""
+
+from enum import Enum, EnumMeta
+from typing import TYPE_CHECKING
+
+from vllm.utils.import_utils import resolve_obj_by_qualname
+
+if TYPE_CHECKING:
+    from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+
+
+class _MLAPrefillBackendEnumMeta(EnumMeta):
+    """Metaclass for MLAPrefillBackendEnum to provide better error messages."""
+
+    def __getitem__(cls, name: str):
+        try:
+            return super().__getitem__(name)
+        except KeyError:
+            members = cls.__members__.keys()
+            valid_backends = ", ".join(members)
+            raise ValueError(
+                f"Unknown MLA prefill backend: '{name}'. "
+                f"Valid options are: {valid_backends}"
+            ) from None
+
+
+class MLAPrefillBackendEnum(Enum, metaclass=_MLAPrefillBackendEnumMeta):
+    """Enumeration of all supported MLA prefill backends."""
+
+    FLASH_ATTN = (
+        "vllm.v1.attention.backends.mla.prefill.flash_attn.FlashAttnPrefillBackend"
+    )
+    FLASHINFER = (
+        "vllm.v1.attention.backends.mla.prefill.flashinfer.FlashInferPrefillBackend"
+    )
+    TRTLLM_RAGGED = (
+        "vllm.v1.attention.backends.mla.prefill.trtllm_ragged."
+        "TrtllmRaggedPrefillBackend"
+    )
+    TOKENSPEED_MLA = (
+        "vllm.v1.attention.backends.mla.prefill.tokenspeed_mla."
+        "TokenspeedMLAPrefillBackend"
+    )
+
+    def get_path(self) -> str:
+        """Get the fully qualified class path for this backend."""
+        return self.value
+
+    def get_class(self) -> "type[MLAPrefillBackend]":
+        """Lazy load and return the backend class."""
+        return resolve_obj_by_qualname(self.get_path())
diff --git a/vllm/v1/attention/backends/mla/prefill/selector.py b/vllm/v1/attention/backends/mla/prefill/selector.py
new file mode 100644
index 000000000000..816f4fd4b737
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/selector.py
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Selector for MLA prefill backends.
+
+This module provides functions for selecting the appropriate MLA prefill
+backend based on device capabilities and configuration.
+"""
+
+from functools import cache
+from typing import TYPE_CHECKING, NamedTuple
+
+import torch
+
+from vllm.logger import init_logger
+from vllm.platforms.interface import DeviceCapability
+from vllm.v1.attention.backends.mla.prefill.registry import MLAPrefillBackendEnum
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+
+logger = init_logger(__name__)
+
+
+class MLAPrefillSelectorConfig(NamedTuple):
+    """Hashable configuration for MLA prefill backend selection.
+
+    This is analogous to AttentionSelectorConfig and contains model-specific
+    configuration needed to select an MLA prefill backend, extracted from
+    VllmConfig into a hashable form for caching.
+    """
+
+    dtype: torch.dtype
+    is_r1_compatible: bool
+
+
+def is_deepseek_r1_mla_compatible(vllm_config: "VllmConfig") -> bool:
+    """Check if model has DeepSeek R1 compatible MLA dimensions.
+
+    DeepSeek R1 MLA dimensions are:
+    - qk_nope_head_dim = 128
+    - qk_rope_head_dim = 64
+    - v_head_dim = 128
+    """
+    if vllm_config.model_config is None:
+        return False
+    hf_text_config = vllm_config.model_config.hf_text_config
+    qk_nope_head_dim = getattr(hf_text_config, "qk_nope_head_dim", 1)
+    qk_rope_head_dim = getattr(hf_text_config, "qk_rope_head_dim", 1)
+    v_head_dim = getattr(hf_text_config, "v_head_dim", 1)
+    return qk_nope_head_dim == 128 and qk_rope_head_dim == 64 and v_head_dim == 128
+
+
+def _get_mla_prefill_backend_priorities(
+    device_capability: DeviceCapability,
+) -> list[MLAPrefillBackendEnum]:
+    """Get MLA prefill backend priorities based on device capability.
+
+    Args:
+        device_capability: The device's compute capability.
+
+    Returns:
+        List of backends in priority order (highest priority first).
+    """
+    if device_capability.major == 10:  # Blackwell
+        return [
+            MLAPrefillBackendEnum.FLASH_ATTN,
+            MLAPrefillBackendEnum.TRTLLM_RAGGED,
+            MLAPrefillBackendEnum.FLASHINFER,
+            MLAPrefillBackendEnum.TOKENSPEED_MLA,
+        ]
+    else:  # Hopper (SM90) and older
+        return [
+            MLAPrefillBackendEnum.FLASH_ATTN,
+        ]
+
+
+def get_mla_prefill_backend(
+    vllm_config: "VllmConfig",
+) -> "type[MLAPrefillBackend]":
+    """Select the MLA prefill backend based on configuration and device.
+
+    This function first checks for explicit user preferences via
+    mla_prefill_backend in AttentionConfig, then falls back to automatic
+    priority-based selection.
+
+    Args:
+        vllm_config: The vLLM configuration.
+
+    Returns:
+        The selected prefill backend class.
+    """
+    from vllm.platforms import current_platform
+
+    device_capability = current_platform.get_device_capability()
+    if device_capability is None:
+        logger.info_once(
+            "Device capability not available, using FlashAttention MLA prefill backend."
+        )
+        return MLAPrefillBackendEnum.FLASH_ATTN.get_class()
+
+    attention_config = vllm_config.attention_config
+
+    selector_config = MLAPrefillSelectorConfig(
+        dtype=vllm_config.model_config.dtype,
+        is_r1_compatible=is_deepseek_r1_mla_compatible(vllm_config),
+    )
+
+    if attention_config.mla_prefill_backend is not None:
+        selected_backend = attention_config.mla_prefill_backend
+        backend_cls: type[MLAPrefillBackend] | None = None
+        try:
+            backend_cls = selected_backend.get_class()
+            invalid_reasons = backend_cls.validate_configuration(
+                device_capability, selector_config
+            )
+        except ImportError:
+            invalid_reasons = ["ImportError"]
+        if invalid_reasons:
+            raise ValueError(
+                f"Selected MLA prefill backend {selected_backend.name} "
+                f"is not valid for this configuration. "
+                f"Reason: {invalid_reasons}"
+            )
+        assert backend_cls is not None
+        logger.info("Using %s MLA prefill backend.", selected_backend.name)
+        return backend_cls
+
+    return _auto_select_mla_prefill_backend(
+        device_capability,
+        selector_config,
+    )
+
+
+@cache
+def _auto_select_mla_prefill_backend(
+    device_capability: DeviceCapability,
+    selector_config: MLAPrefillSelectorConfig,
+) -> "type[MLAPrefillBackend]":
+    """Auto-select the best available MLA prefill backend.
+
+    Args:
+        device_capability: The device's compute capability.
+        selector_config: Hashable configuration for backend selection.
+
+    Returns:
+        The selected prefill backend class.
+    """
+    priorities = _get_mla_prefill_backend_priorities(device_capability)
+    all_invalid_reasons: dict[str, list[str]] = {}
+
+    for backend_enum in priorities:
+        backend_cls: type[MLAPrefillBackend] | None = None
+        try:
+            backend_cls = backend_enum.get_class()
+            invalid_reasons = backend_cls.validate_configuration(
+                device_capability, selector_config
+            )
+        except ImportError:
+            invalid_reasons = ["ImportError"]
+        if not invalid_reasons:
+            assert backend_cls is not None
+            logger.info_once("Using %s MLA prefill backend.", backend_enum.name)
+            return backend_cls
+        all_invalid_reasons[backend_enum.name] = invalid_reasons
+
+    reasons_str = (
+        "{"
+        + ", ".join(
+            f"{name}: [{', '.join(reasons)}]"
+            for name, reasons in all_invalid_reasons.items()
+        )
+        + "}"
+    )
+    config_str = repr(selector_config)
+    logger.debug_once(
+        "Some MLA prefill backends are not valid with %s. Reasons: %s.",
+        config_str,
+        reasons_str,
+    )
+
+    raise ValueError(
+        f"No valid MLA prefill backend found with {config_str}. Reasons: {reasons_str}."
+    )
diff --git a/vllm/v1/attention/backends/mla/prefill/tokenspeed_mla.py b/vllm/v1/attention/backends/mla/prefill/tokenspeed_mla.py
new file mode 100644
index 000000000000..d6e4fca172ad
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/tokenspeed_mla.py
@@ -0,0 +1,180 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TokenSpeed CuTe DSL backend for MLA prefill."""
+
+from typing import TYPE_CHECKING
+
+import torch
+
+from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.model_executor.layers.attention.mla_attention import (
+        MLACommonPrefillMetadata,
+    )
+    from vllm.platforms.interface import DeviceCapability
+
+
+class TokenspeedMLAPrefillBackend(MLAPrefillBackend):
+    """TokenSpeed CuTe DSL backend for MLA prefill."""
+
+    requires_r1_mla_dimensions = True
+
+    @staticmethod
+    def get_name() -> str:
+        return "TOKENSPEED_MLA"
+
+    @classmethod
+    def supports_compute_capability(cls, device_capability: "DeviceCapability") -> bool:
+        return device_capability.major == 10
+
+    _INSTALL_HINT = (
+        "tokenspeed_mla package is not installed. "
+        "Install it with: `uv pip install tokenspeed-mla`"
+    )
+
+    @classmethod
+    def is_available(cls) -> bool:
+        try:
+            from tokenspeed_mla import (
+                tokenspeed_mla_prefill,  # noqa: F401
+            )
+
+            return True
+        except ImportError:
+            return False
+
+    @classmethod
+    def validate_configuration(
+        cls,
+        device_capability,
+        selector_config,
+    ) -> list[str]:
+        # Replace the generic "required dependencies not available" message
+        # from the base class with a specific install hint so users know
+        # exactly which package to install when they explicitly select this
+        # backend without having tokenspeed_mla installed.
+        reasons = super().validate_configuration(device_capability, selector_config)
+        return [
+            cls._INSTALL_HINT if r == "required dependencies not available" else r
+            for r in reasons
+        ]
+
+    def __init__(
+        self,
+        num_heads: int,
+        scale: float,
+        kv_lora_rank: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        vllm_config: "VllmConfig",
+    ) -> None:
+        super().__init__(
+            num_heads=num_heads,
+            scale=scale,
+            kv_lora_rank=kv_lora_rank,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            vllm_config=vllm_config,
+        )
+
+        # Pre-JIT BF16 and FP8 prefill kernels. Idempotent — also called from
+        # TokenspeedMLAImpl.__init__; second call is a no-op.
+        from tokenspeed_mla import warmup_compile_prefill
+
+        for q_dtype in (torch.bfloat16, torch.float8_e4m3fn):
+            warmup_compile_prefill(
+                q_dtype=q_dtype,
+                d_qk=qk_nope_head_dim + qk_rope_head_dim,
+                d_v=v_head_dim,
+                enable_pdl=False,
+            )
+
+    def prepare_metadata(
+        self,
+        prefill_metadata: "MLACommonPrefillMetadata",
+    ) -> None:
+        super().prepare_metadata(prefill_metadata)
+        # Kernel signature requires `seq_lens` but the implementation never reads
+        # it (per-batch lengths are derived from `cum_seq_lens` diffs); compute
+        # for parity with trtllm_ragged. cuda-graph padding in
+        # `query_start_loc` is saturated to `total_num_tokens`
+        # (gpu_model_runner.py:1905), so trailing diffs are 0 and padded batches
+        # are kernel no-ops — same reason trtllm passes the padded length as
+        # batch_size directly.
+        self._query_seq_lens = (
+            prefill_metadata.query_start_loc[1:] - prefill_metadata.query_start_loc[:-1]
+        )
+
+    def run_prefill_new_tokens(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        from tokenspeed_mla import tokenspeed_mla_prefill
+
+        # `v` arrives as the second half of `kv_nope.split(...)` in
+        # mla_attention.forward_mha — a non-contiguous view of `kv_nope` along
+        # dim=-1. The kernel does `v.reshape(1, total_kv, h_k, 1, d_v)` which
+        # would silently copy on a non-contiguous tensor; force contiguity here
+        # so the copy (if any) happens once outside the kernel call.
+        v = v.contiguous()
+
+        ret = tokenspeed_mla_prefill(
+            query=q,
+            key=k,
+            value=v,
+            seq_lens=self._query_seq_lens,
+            cum_seq_lens=self._prefill_metadata.query_start_loc,
+            max_seq_len=self._prefill_metadata.max_query_len,
+            batch_size=self._query_seq_lens.shape[0],
+            softmax_scale=self.scale,
+            is_causal=True,
+            return_lse=return_softmax_lse,
+            enable_pdl=False,
+        )
+
+        if isinstance(ret, tuple):
+            # Convert from (q_len, num_heads) to (num_heads, q_len)
+            return ret[0], ret[1].transpose(0, 1).contiguous()
+        return ret
+
+    def run_prefill_context_chunk(
+        self,
+        chunk_idx: int,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        from tokenspeed_mla import tokenspeed_mla_prefill
+
+        assert self._prefill_metadata.chunked_context is not None
+        chunked = self._prefill_metadata.chunked_context
+
+        # See note in run_prefill_new_tokens — `v` is a split-view of `kv_nope`
+        # in `_compute_prefill_context` and arrives non-contiguous.
+        v = v.contiguous()
+
+        attn_out, lse = tokenspeed_mla_prefill(
+            query=q,
+            key=k,
+            value=v,
+            seq_lens=chunked.seq_lens[chunk_idx],
+            cum_seq_lens=chunked.cu_seq_lens[chunk_idx],
+            max_seq_len=chunked.max_seq_lens[chunk_idx],
+            batch_size=chunked.seq_lens[chunk_idx].shape[0],
+            softmax_scale=self.scale,
+            is_causal=False,
+            return_lse=True,
+            cum_seq_lens_q=self._prefill_metadata.query_start_loc,
+            max_seq_len_q=self._prefill_metadata.max_query_len,
+            enable_pdl=False,
+        )
+
+        # Convert from (q_len, num_heads) to (num_heads, q_len)
+        return attn_out, lse.transpose(0, 1).contiguous()
diff --git a/vllm/v1/attention/backends/mla/prefill/trtllm_ragged.py b/vllm/v1/attention/backends/mla/prefill/trtllm_ragged.py
new file mode 100644
index 000000000000..afb0444a3148
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/prefill/trtllm_ragged.py
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TRT-LLM Ragged backend for MLA prefill."""
+
+from typing import TYPE_CHECKING
+
+import torch
+
+import vllm.envs as envs
+from vllm.v1.attention.backends.mla.prefill.base import MLAPrefillBackend
+from vllm.v1.worker.workspace import current_workspace_manager
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.model_executor.layers.attention.mla_attention import (
+        MLACommonPrefillMetadata,
+    )
+    from vllm.platforms.interface import DeviceCapability
+
+
+class TrtllmRaggedPrefillBackend(MLAPrefillBackend):
+    """TRT-LLM Ragged backend for MLA prefill."""
+
+    requires_r1_mla_dimensions = True
+
+    @staticmethod
+    def get_name() -> str:
+        return "TRTLLM_RAGGED"
+
+    @classmethod
+    def supports_compute_capability(cls, device_capability: "DeviceCapability") -> bool:
+        return device_capability.major == 10
+
+    @classmethod
+    def is_available(cls) -> bool:
+        try:
+            from flashinfer.prefill import (
+                trtllm_ragged_attention_deepseek,  # noqa: F401
+            )
+
+            return True
+        except ImportError:
+            return False
+
+    def __init__(
+        self,
+        num_heads: int,
+        scale: float,
+        kv_lora_rank: int,
+        qk_nope_head_dim: int,
+        qk_rope_head_dim: int,
+        v_head_dim: int,
+        vllm_config: "VllmConfig",
+    ) -> None:
+        super().__init__(
+            num_heads=num_heads,
+            scale=scale,
+            kv_lora_rank=kv_lora_rank,
+            qk_nope_head_dim=qk_nope_head_dim,
+            qk_rope_head_dim=qk_rope_head_dim,
+            v_head_dim=v_head_dim,
+            vllm_config=vllm_config,
+        )
+        (self._workspace_buffer,) = current_workspace_manager().get_simultaneous(
+            (
+                (envs.VLLM_FLASHINFER_WORKSPACE_BUFFER_SIZE,),
+                torch.uint8,
+            ),
+        )
+
+    def prepare_metadata(
+        self,
+        prefill_metadata: "MLACommonPrefillMetadata",
+    ) -> None:
+        super().prepare_metadata(prefill_metadata)
+        self._query_seq_lens = (
+            prefill_metadata.query_start_loc[1:] - prefill_metadata.query_start_loc[:-1]
+        )
+
+    def run_prefill_new_tokens(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        return_softmax_lse: bool,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        from flashinfer.prefill import trtllm_ragged_attention_deepseek
+
+        out = torch.empty(
+            q.shape[0],
+            q.shape[1],
+            v.shape[2],
+            device=q.device,
+            dtype=self._prefill_metadata.output_dtype,
+        )
+
+        ret = trtllm_ragged_attention_deepseek(
+            query=q,
+            key=k,
+            value=v,
+            workspace_buffer=self._workspace_buffer,
+            seq_lens=self._query_seq_lens,
+            max_q_len=self._prefill_metadata.max_query_len,
+            max_kv_len=self._prefill_metadata.max_query_len,
+            bmm1_scale=self.scale,
+            bmm2_scale=1.0,
+            o_sf_scale=1.0,
+            batch_size=self._query_seq_lens.shape[0],
+            window_left=-1,
+            cum_seq_lens_q=self._prefill_metadata.query_start_loc,
+            cum_seq_lens_kv=self._prefill_metadata.query_start_loc,
+            enable_pdl=False,
+            is_causal=True,
+            return_lse=return_softmax_lse,
+            out=out,
+        )
+
+        if isinstance(ret, tuple):
+            # Convert from (q_len, num_heads) to (num_heads, q_len)
+            return ret[0], ret[1].transpose(0, 1).contiguous()
+        return ret
+
+    def run_prefill_context_chunk(
+        self,
+        chunk_idx: int,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        from flashinfer.prefill import trtllm_ragged_attention_deepseek
+
+        assert self._prefill_metadata.chunked_context is not None
+        assert self._prefill_metadata.chunked_context.seq_lens[chunk_idx] is not None
+
+        out = torch.empty(
+            q.shape[0],
+            q.shape[1],
+            v.shape[2],
+            device=q.device,
+            dtype=self._prefill_metadata.output_dtype,
+        )
+
+        attn_out, lse = trtllm_ragged_attention_deepseek(
+            query=q,
+            key=k,
+            value=v,
+            workspace_buffer=self._workspace_buffer,
+            seq_lens=self._prefill_metadata.chunked_context.seq_lens[chunk_idx],
+            max_q_len=self._prefill_metadata.max_query_len,
+            max_kv_len=self._prefill_metadata.chunked_context.max_seq_lens[chunk_idx],
+            bmm1_scale=self.scale,
+            bmm2_scale=1.0,
+            o_sf_scale=1.0,
+            batch_size=self._prefill_metadata.chunked_context.seq_lens[chunk_idx].shape[
+                0
+            ],
+            window_left=-1,
+            cum_seq_lens_q=self._prefill_metadata.query_start_loc,
+            cum_seq_lens_kv=self._prefill_metadata.chunked_context.cu_seq_lens[
+                chunk_idx
+            ],
+            enable_pdl=False,
+            is_causal=False,
+            return_lse=True,
+            out=out,
+        )
+
+        # Convert from (q_len, num_heads) to (num_heads, q_len)
+        return attn_out, lse.transpose(0, 1).contiguous()
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
index 6c1073b3aa77..e0a5730f5fd8 100644
--- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
+++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py
@@ -1,14 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import functools
 from dataclasses import dataclass
-from typing import ClassVar
+from typing import ClassVar, Final
 
 import torch
 
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import VllmConfig
 from vllm.config.cache import CacheDType
+from vllm.logger import init_logger
 from vllm.model_executor.layers.attention.mla_attention import (
     MLACommonBackend,
     MLACommonDecodeMetadata,
@@ -26,6 +28,28 @@
 )
 from vllm.v1.kv_cache_interface import AttentionSpec
 
+logger = init_logger(__name__)
+
+
+@functools.lru_cache(maxsize=1)
+def _fp8_mla_prefill_supported() -> bool:
+    """Auto-detect FP8 MLA prefill via mla_prefill_ps_asm_fwd + mla_reduce_v1.
+
+    Requires gfx950 plus an AITER build that exports both kernels.  When
+    either is missing we silently fall back to ``flash_attn_varlen_func``.
+    """
+    try:
+        from vllm.platforms.rocm import on_gfx950
+    except Exception:  # noqa: BLE001
+        return False
+    if not on_gfx950():
+        return False
+    try:
+        from aiter import mla_prefill_ps_asm_fwd, mla_reduce_v1  # noqa: F401
+    except Exception:  # noqa: BLE001
+        return False
+    return True
+
 
 class AiterMLABackend(MLACommonBackend):
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
@@ -44,7 +68,11 @@ def get_supported_head_sizes(cls) -> list[int]:
 
     @staticmethod
     def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
-        return [1]
+        # The aiter MLA decode kernel always operates with page_size=1
+        # internally (the wrapper flattens kv_buffer via .view(-1, 1, 1, H)).
+        # We support any kernel_block_size by expanding block-level indices
+        # into per-token flat indices in the metadata builder.
+        return [MultipleOf(1)]
 
     @staticmethod
     def get_name() -> str:
@@ -74,6 +102,8 @@ class AiterMLADecodeMetadata(MLACommonDecodeMetadata):
     attn_out_dtype: torch.dtype = torch.bfloat16
     # The max query output length: int
     max_qo_len: int | None = None
+    # Whether persistent MLA metadata was computed (only for qseqlen=1)
+    has_persistent_metadata: bool = False
 
 
 @dataclass
@@ -85,6 +115,25 @@ class AiterMLAMetadata(MLACommonMetadata[AiterMLADecodeMetadata]):
     reduce_final_map: torch.Tensor | None = None
     reduce_partial_map: torch.Tensor | None = None
 
+    # FP8 ASM prefill persistent-scheduling (PS) metadata.  Populated by
+    # AiterMLAMetadataBuilder._build_fp8_prefill_ps_metadata when prefill
+    # tokens are present and FP8 MLA prefill is supported on the device.
+    # Left as None on hosts/configs that fall back to flash_attn_varlen_func.
+    fp8_prefill_qo_indptr: torch.Tensor | None = None
+    fp8_prefill_kv_indptr: torch.Tensor | None = None
+    fp8_prefill_kv_indices: torch.Tensor | None = None
+    fp8_prefill_work_indptr: torch.Tensor | None = None
+    fp8_prefill_work_info_set: torch.Tensor | None = None
+    fp8_prefill_reduce_indptr: torch.Tensor | None = None
+    fp8_prefill_reduce_final_map: torch.Tensor | None = None
+    fp8_prefill_reduce_partial_map: torch.Tensor | None = None
+    fp8_prefill_max_q_len: int | None = None
+    fp8_prefill_num_partial_tiles: int | None = None
+
+
+# Tile size used by the mla_prefill_ps_asm_fwd assembly kernel.
+_FP8_PREFILL_TILE_Q = 256
+
 
 class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
     # TODO(luka, lucas): audit this as part of:
@@ -105,7 +154,16 @@ def __init__(
 
         self.compilation_config = vllm_config.compilation_config
         self.decode_attn_out_dtype = vllm_config.model_config.dtype
-        # kernel block size is always 1.
+
+        # Store the kernel block size from the spec. When kernel_block_size=1
+        # (no spec-dec), behavior is identical to the original. When > 1
+        # (e.g. 16 with Eagle3), we expand block-level indices into per-token
+        # flat indices since the aiter kernel always uses page_size=1 internally.
+        self.kernel_block_size = kv_cache_spec.block_size
+
+        # In the flat view (.view(-1,1,1,H)), each token is its own page,
+        # so max_num_pages_per_req = max_model_len regardless of
+        # kernel_block_size.
         max_num_pages_per_req = vllm_config.model_config.max_model_len
         max_num_reqs = vllm_config.scheduler_config.max_num_seqs
         max_num_pages = max_num_reqs * max_num_pages_per_req
@@ -115,8 +173,9 @@ def __init__(
         # so we can only use the persistent buffer if a cudagraph is actually
         # being used.
 
-        # paged_kv_last_page_len is always 1s (kernel block size is always 1),
-        # so we create it once and reuse slices in both eager and cudagraph modes.
+        # paged_kv_last_page_len is always 1s (the aiter kernel always sees
+        # page_size=1 after .view(-1,1,1,H) flattening), so we create it
+        # once and reuse slices in both eager and cudagraph modes.
         self.paged_kv_last_page_len = torch.ones(
             max_num_reqs, dtype=torch.int32, device=device
         )
@@ -129,9 +188,10 @@ def __init__(
 
         from aiter import dtypes, get_mla_metadata_info_v1
 
-        self._num_attention_heads = vllm_config.model_config.get_num_attention_heads(
-            vllm_config.parallel_config
-        )
+        # For num_attention_heads < 16 (e.g. kimi-k2.5 head=8 with TP8),
+        # make sure get_mla_metadata_info_v1 / get_mla_metadata_v1 are consistent
+        # with the actual tensor shape passed to mla_decode_fwd.
+        self._num_attention_heads = max(16, self.num_heads)
         q_dtype = self.decode_attn_out_dtype
         kv_cache_dtype_str = getattr(vllm_config.cache_config, "cache_dtype", "auto")
         if kv_cache_dtype_str in ("fp8", "fp8_e4m3", "fp8_e5m2"):
@@ -176,6 +236,14 @@ def __init__(
             device=device,
         )
 
+        self._fp8_prefill_enabled = _fp8_mla_prefill_supported()
+        if self._fp8_prefill_enabled:
+            max_prefill_qlen = min(
+                vllm_config.model_config.max_model_len,
+                vllm_config.scheduler_config.max_num_batched_tokens,
+            )
+            self._init_fp8_prefill_ps_buffers(max_num_reqs, max_prefill_qlen, device)
+
         if self.compilation_config.cudagraph_mode.has_full_cudagraphs():
             self.paged_kv_indptr = torch.zeros(
                 max_num_reqs + 1, dtype=torch.int32, device=device
@@ -185,6 +253,162 @@ def __init__(
                 max_num_reqs + 1, dtype=torch.int32, device=device
             )
 
+    def _init_fp8_prefill_ps_buffers(
+        self,
+        max_num_reqs: int,
+        max_prefill_qlen: int,
+        device: torch.device,
+    ) -> None:
+        """Pre-allocate persistent buffers for FP8 MLA prefill PS metadata.
+
+        Uses ``get_ps_metadata_info_v1`` with max values so the buffers are
+        large enough for any batch.  ``get_ps_metadata_v1`` fills them
+        per-batch in ``build()``.
+
+        Args:
+            max_num_reqs: Maximum number of concurrent requests.
+            max_prefill_qlen: Maximum Q-length for a single request in one
+                prefill batch.  Should be ``min(max_model_len,
+                max_num_batched_tokens)`` — the chunked-prefill scheduler
+                never emits more than ``max_num_batched_tokens`` new tokens
+                per batch.
+            device: Target device for the buffers.
+        """
+        from aiter import get_ps_metadata_info_v1
+
+        # After kv_b_proj decompression, K has num_heads heads (same as Q).
+        # So gqa_ratio=1 and num_head_k=num_heads for the PS kernel.
+        num_head_k = self.num_heads
+        # gqa_ratio = 1
+        # qlen_granularity = _FP8_PREFILL_TILE_Q // max(gqa_ratio, 1)
+        qlen_granularity = _FP8_PREFILL_TILE_Q
+
+        (
+            (work_metadata_size, work_metadata_dtype),
+            (work_indptr_size, work_indptr_dtype),
+            (work_info_size, work_info_dtype),
+            (reduce_indptr_size, reduce_indptr_dtype),
+            (reduce_final_map_size, reduce_final_map_dtype),
+            (reduce_partial_map_size, reduce_partial_map_dtype),
+        ) = get_ps_metadata_info_v1(
+            batch_size=max_num_reqs,
+            num_head_k=num_head_k,
+            max_qlen=max_prefill_qlen,
+            qlen_granularity=qlen_granularity,
+        )
+
+        self.fp8_ps_work_metadata = torch.empty(
+            work_metadata_size, dtype=work_metadata_dtype, device=device
+        )
+        self.fp8_ps_work_indptr = torch.empty(
+            work_indptr_size, dtype=work_indptr_dtype, device=device
+        )
+        self.fp8_ps_work_info = torch.empty(
+            *work_info_size, dtype=work_info_dtype, device=device
+        )
+        self.fp8_ps_reduce_indptr = torch.empty(
+            reduce_indptr_size, dtype=reduce_indptr_dtype, device=device
+        )
+        self.fp8_ps_reduce_final_map = torch.empty(
+            *reduce_final_map_size, dtype=reduce_final_map_dtype, device=device
+        )
+        self.fp8_ps_reduce_partial_map = torch.empty(
+            reduce_partial_map_size,
+            dtype=reduce_partial_map_dtype,
+            device=device,
+        )
+
+        logger.info(
+            "FP8 MLA prefill PS buffers allocated "
+            "(max_batch=%d, max_qlen=%d, num_head_k=%d)",
+            max_num_reqs,
+            max_prefill_qlen,
+            num_head_k,
+        )
+
+    def _build_fp8_prefill_ps_metadata(
+        self,
+        metadata: AiterMLAMetadata,
+        common_attn_metadata: CommonAttentionMetadata,
+    ) -> None:
+        """Build per-batch FP8 MLA prefill PS metadata and attach to *metadata*.
+
+        Called from ``build()`` when prefill tokens are present and
+        FP8 MLA prefill is enabled (auto-detected via
+        ``_fp8_mla_prefill_supported()``).
+        """
+        from aiter import get_ps_metadata_v1
+
+        prefill = metadata.prefill
+        # Caller (build()) only invokes this when prefill tokens exist, so
+        # metadata.prefill is guaranteed non-None.  Assert to narrow for mypy.
+        assert prefill is not None
+        qo_indptr = prefill.query_start_loc
+        kv_indptr = qo_indptr  # new tokens: KV length == Q length
+
+        # Reuse the existing CPU view of query_start_loc instead of forcing a
+        # device->host copy.  Prefill batches sit at the tail of the request
+        # list, so we slice from num_decodes onwards and rebase to zero, the
+        # same transform the parent build applies on device tensors.
+        num_decodes = metadata.num_decodes
+        qsl_cpu = common_attn_metadata.query_start_loc_cpu
+        qo_indptr_cpu = (qsl_cpu[num_decodes:] - qsl_cpu[num_decodes]).to(torch.int32)
+        kv_indptr_cpu = qo_indptr_cpu.clone()
+        seq_lens_cpu = (qo_indptr_cpu[1:] - qo_indptr_cpu[:-1]).to(torch.int32)
+
+        num_head_k = self.num_heads
+        # gqa_ratio = 1
+        # qhead_granularity = max(gqa_ratio, 1)
+        # qlen_granularity = _FP8_PREFILL_TILE_Q // qhead_granularity
+        gqa_ratio = 1
+        qhead_granularity = 1
+        qlen_granularity = _FP8_PREFILL_TILE_Q
+        kvlen_granularity = 128
+        block_size = 1  # non-paged: each "page" is one token
+
+        get_ps_metadata_v1(
+            qo_indptr_cpu,
+            kv_indptr_cpu,
+            seq_lens_cpu,
+            gqa_ratio,
+            num_head_k,
+            self.fp8_ps_work_metadata,
+            self.fp8_ps_work_indptr,
+            self.fp8_ps_work_info,
+            self.fp8_ps_reduce_indptr,
+            self.fp8_ps_reduce_final_map,
+            self.fp8_ps_reduce_partial_map,
+            qhead_granularity=qhead_granularity,
+            qlen_granularity=qlen_granularity,
+            kvlen_granularity=kvlen_granularity,
+            block_size=block_size,
+            is_causal=True,
+        )
+
+        total_prefill_tokens = int(qo_indptr_cpu[-1].item())
+        kv_indices = torch.arange(
+            total_prefill_tokens, device=qo_indptr.device, dtype=torch.int32
+        )
+
+        # The actual number of active partial tiles for this batch is the
+        # final value of reduce_indptr.  Resolving it here (during metadata
+        # build) keeps it off the per-layer forward path where a sync would
+        # break CUDA Graph capture.  Using the device-side reduce_indptr is
+        # acceptable since build is allowed to incur an occasional sync.
+        num_partial_tiles = int(self.fp8_ps_reduce_indptr[-1].item())
+
+        # Attach PS metadata to the metadata object so forward_mha can read it.
+        metadata.fp8_prefill_qo_indptr = qo_indptr
+        metadata.fp8_prefill_kv_indptr = kv_indptr
+        metadata.fp8_prefill_kv_indices = kv_indices
+        metadata.fp8_prefill_work_indptr = self.fp8_ps_work_indptr
+        metadata.fp8_prefill_work_info_set = self.fp8_ps_work_info
+        metadata.fp8_prefill_reduce_indptr = self.fp8_ps_reduce_indptr
+        metadata.fp8_prefill_reduce_final_map = self.fp8_ps_reduce_final_map
+        metadata.fp8_prefill_reduce_partial_map = self.fp8_ps_reduce_partial_map
+        metadata.fp8_prefill_max_q_len = prefill.max_query_len
+        metadata.fp8_prefill_num_partial_tiles = num_partial_tiles
+
     def _build_decode(
         self,
         block_table_tensor: torch.Tensor,
@@ -195,14 +419,14 @@ def _build_decode(
         num_decode_tokens: int,
         dcp_tot_seq_lens_device: torch.Tensor | None,
     ) -> AiterMLADecodeMetadata:
-        # kernel block size is always 1, although the kv block size is not 1.
         device = self.device
         num_reqs = seq_lens_device.size(0)
 
-        # kernel block size is always 1, so each page has exactly 1 token.
-        # last_page_len is always 1 - just slice the pre-initialized buffer.
+        # The aiter kernel always operates with page_size=1 (the wrapper
+        # flattens kv_buffer). last_page_len is always 1.
         paged_kv_last_page_len = self.paged_kv_last_page_len[:num_reqs]
 
+        # indptr: cumsum of seq_lens (one page per token in the flat view)
         paged_kv_indptr = torch.cat(
             [
                 torch.zeros(1, dtype=seq_lens_device.dtype, device=device),
@@ -214,11 +438,19 @@ def _build_decode(
 
         if self.compilation_config.cudagraph_mode.has_full_cudagraphs():
             self.paged_kv_indices.fill_(-1)
-        _copy_page_indices_kernel[(num_reqs,)](
+
+        # Expand block_table entries into per-token flat indices.
+        # When kernel_block_size=1, this degrades to a direct copy (identical
+        # to the original _copy_page_indices_kernel).
+        # When kernel_block_size=K>1, block_table entry b covering K tokens
+        # gets expanded to flat indices b*K, b*K+1, ..., b*K+(K-1).
+        _expand_page_indices_kernel[(num_reqs,)](
             self.paged_kv_indices,
             block_table_tensor,
             block_table_tensor.stride(0),
             paged_kv_indptr,
+            seq_lens_device,
+            KERNEL_BLOCK_SIZE=self.kernel_block_size,
             BLOCK_SIZE=1024,
         )
         paged_kv_indices = self.paged_kv_indices
@@ -244,27 +476,37 @@ def _build_decode(
                 0, num_reqs + 1, step=1, dtype=torch.int32, device=device
             )
 
-        from aiter import get_mla_metadata_v1
-
-        get_mla_metadata_v1(
-            qo_indptr,
-            paged_kv_indptr,
-            paged_kv_last_page_len,
-            self._num_attention_heads,
-            1,
-            True,
-            self._mla_work_meta_data,
-            self._mla_work_info_set,
-            self._mla_work_indptr,
-            self._mla_reduce_indptr,
-            self._mla_reduce_final_map,
-            self._mla_reduce_partial_map,
-            page_size=1,
-            kv_granularity=16,
-            max_seqlen_qo=max_qo_len,
-            uni_seqlen_qo=max_qo_len,
-            fast_mode=True,
-        )
+        # The aiter MLA ASM kernel only supports qseqlen=1 (single-token
+        # decode). With speculative decoding, the verification step has
+        # qseqlen > 1 (e.g. 8 for spec7). get_mla_metadata_v1 calls
+        # get_heuristic_kernel_mla which fails for qseqlen > 1.
+        # We track whether persistent metadata was successfully computed
+        # so forward_mqa can skip passing it (falling back to the kernel
+        # computing its own metadata internally, like v0.18.0).
+        has_persistent_metadata = False
+        if max_qo_len == 1:
+            from aiter import get_mla_metadata_v1
+
+            get_mla_metadata_v1(
+                qo_indptr,
+                paged_kv_indptr,
+                paged_kv_last_page_len,
+                self._num_attention_heads,
+                1,
+                True,
+                self._mla_work_meta_data,
+                self._mla_work_info_set,
+                self._mla_work_indptr,
+                self._mla_reduce_indptr,
+                self._mla_reduce_final_map,
+                self._mla_reduce_partial_map,
+                page_size=1,
+                kv_granularity=16,
+                max_seqlen_qo=max_qo_len,
+                uni_seqlen_qo=max_qo_len,
+                fast_mode=True,
+            )
+            has_persistent_metadata = True
 
         attn_metadata = AiterMLADecodeMetadata(
             block_table=block_table_tensor,
@@ -276,6 +518,7 @@ def _build_decode(
             dcp_tot_seq_lens=dcp_tot_seq_lens_device,
             max_qo_len=max_qo_len,
             attn_out_dtype=self.decode_attn_out_dtype,
+            has_persistent_metadata=has_persistent_metadata,
         )
 
         return attn_metadata
@@ -289,41 +532,117 @@ def build(
         attn_metadata = super().build(
             common_prefix_len, common_attn_metadata, fast_build
         )
-        attn_metadata.work_meta_data = self._mla_work_meta_data
-        attn_metadata.work_indptr = self._mla_work_indptr
-        attn_metadata.work_info_set = self._mla_work_info_set
-        attn_metadata.reduce_indptr = self._mla_reduce_indptr
-        attn_metadata.reduce_final_map = self._mla_reduce_final_map
-        attn_metadata.reduce_partial_map = self._mla_reduce_partial_map
+        if (
+            attn_metadata.decode is not None
+            and attn_metadata.decode.has_persistent_metadata
+        ):
+            attn_metadata.work_meta_data = self._mla_work_meta_data
+            attn_metadata.work_indptr = self._mla_work_indptr
+            attn_metadata.work_info_set = self._mla_work_info_set
+            attn_metadata.reduce_indptr = self._mla_reduce_indptr
+            attn_metadata.reduce_final_map = self._mla_reduce_final_map
+            attn_metadata.reduce_partial_map = self._mla_reduce_partial_map
+        if self._fp8_prefill_enabled and attn_metadata.prefill is not None:
+            self._build_fp8_prefill_ps_metadata(attn_metadata, common_attn_metadata)
         return attn_metadata
 
 
 @triton.jit
-def _copy_page_indices_kernel(
+def _expand_page_indices_kernel(
     page_indices,
     block_table,
     block_table_stride,
-    cu_num_blocks,
+    cu_num_tokens,
+    seq_lens,
+    KERNEL_BLOCK_SIZE: tl.constexpr,
     BLOCK_SIZE: tl.constexpr,
 ):
-    """Copy block table rows into a flat page_indices buffer using indptr.
-    Avoids blocking boolean mask indexing (tensor[mask]) which has
-    data-dependent output size and forces sync.
-    This is the same kernel as introduced in backends/flashinfer.py.
+    """Expand block table entries into per-token flat page indices.
+
+    The aiter MLA kernel always operates with page_size=1 internally
+    (kv_buffer is flattened via .view(-1, 1, 1, H)). This kernel converts
+    block-level indices from the block table into individual token positions
+    in the flattened KV buffer.
+
+    When KERNEL_BLOCK_SIZE=1: block_idx=t, offset=0, flat=block_id
+    (equivalent to a direct copy -- no regression from the original kernel).
+
+    When KERNEL_BLOCK_SIZE=K: block table entry b (covering K tokens)
+    is expanded to flat indices b*K, b*K+1, ..., b*K+(K-1).
     """
     req_idx = tl.program_id(0)
     row_ptr = block_table + req_idx * block_table_stride
-    start_idx = tl.load(cu_num_blocks + req_idx)
-    end_idx = tl.load(cu_num_blocks + req_idx + 1)
-    num_blocks = end_idx - start_idx
+    start_idx = tl.load(cu_num_tokens + req_idx)
+    num_tokens = tl.load(seq_lens + req_idx)
 
     offset = tl.arange(0, BLOCK_SIZE)
-    for i in tl.range(0, num_blocks, BLOCK_SIZE):
-        block_ids = tl.load(row_ptr + i + offset, mask=i + offset < num_blocks)
+    for i in tl.range(0, num_tokens, BLOCK_SIZE):
+        token_offsets = i + offset
+        mask = token_offsets < num_tokens
+
+        # Which block in the block table does this token belong to?
+        block_idx = token_offsets // KERNEL_BLOCK_SIZE
+        # Offset within that block
+        offset_in_block = token_offsets % KERNEL_BLOCK_SIZE
+
+        # Load the block ID from the block table
+        block_ids = tl.load(row_ptr + block_idx, mask=mask)
+
+        # Compute flat index in the flattened kv_buffer
+        flat_indices = block_ids * KERNEL_BLOCK_SIZE + offset_in_block
+
         tl.store(
-            page_indices + start_idx + i + offset,
-            block_ids,
-            mask=i + offset < num_blocks,
+            page_indices + start_idx + token_offsets,
+            flat_indices,
+            mask=mask,
+        )
+
+
+class AiterMLAHelper:
+    """
+    AITER MLA implementation requires num_heads >= 16. If num_heads < 16 and
+    16 % num_heads == 0, we can pad q to 16 heads; otherwise AITER has to fail.
+    """
+
+    _AITER_MIN_MLA_HEADS: Final = 16
+    _AITER_UNSUPPORTED_HEADS: ClassVar[tuple[int, ...]] = ()
+
+    @staticmethod
+    def check_num_heads_validity(num_heads: int):
+        assert AiterMLAHelper.is_valid_num_heads(num_heads), (
+            f"Aiter MLA requires that num_heads be multiples or divisors of 16, "
+            f"but provided {num_heads} number of heads.\n"
+            f"Try adjusting tensor_parallel_size value."
+        )
+
+    @staticmethod
+    def is_valid_num_heads(num_heads: int) -> bool:
+        return (
+            num_heads % AiterMLAHelper._AITER_MIN_MLA_HEADS == 0
+            if num_heads >= AiterMLAHelper._AITER_MIN_MLA_HEADS
+            else AiterMLAHelper._AITER_MIN_MLA_HEADS % num_heads == 0
+        )
+
+    @staticmethod
+    def get_actual_mla_num_heads(num_heads: int) -> int:
+        return max(num_heads, AiterMLAHelper._AITER_MIN_MLA_HEADS)
+
+    @staticmethod
+    def get_mla_padded_q(num_heads: int, q: torch.Tensor) -> torch.Tensor:
+        return (
+            q
+            if num_heads >= AiterMLAHelper._AITER_MIN_MLA_HEADS
+            else q.repeat_interleave(
+                AiterMLAHelper._AITER_MIN_MLA_HEADS // num_heads, dim=1
+            )
+        )
+
+    @staticmethod
+    def get_mla_unpadded_o(num_heads: int, o: torch.Tensor) -> torch.Tensor:
+        return (
+            o
+            if num_heads >= AiterMLAHelper._AITER_MIN_MLA_HEADS
+            else o[:, :: AiterMLAHelper._AITER_MIN_MLA_HEADS // num_heads, :]
         )
 
 
@@ -356,17 +675,8 @@ def __init__(
             kv_sharing_target_layer_name,
             **mla_args,
         )
-        _valid_heads = num_heads in (4, 8) or (
-            num_heads % 16 == 0 and 16 <= num_heads <= 128
-        )
-        assert _valid_heads, (
-            f"Aiter MLA supports num_heads of 4, 8, or multiples of 16 "
-            f"in [16, 128].\n"
-            f"Provided {num_heads} number of heads.\n"
-            "Try adjusting tensor_parallel_size value."
-        )
-        self._needs_head_repeat = num_heads < 16
-        self._head_repeat_factor = 16 // num_heads if num_heads < 16 else 1
+        AiterMLAHelper.check_num_heads_validity(num_heads)
+
         unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
         if any(unsupported_features):
             raise NotImplementedError(
@@ -378,6 +688,15 @@ def __init__(
 
         self.flash_attn_varlen_func = flash_attn_varlen_func
 
+        # FP8 MLA prefill kernel imports (lazy, only when enabled).
+        # Auto-enabled on gfx950 when AITER ships the kernels.
+        self._fp8_prefill_enabled = _fp8_mla_prefill_supported()
+        if self._fp8_prefill_enabled:
+            from aiter import mla_prefill_ps_asm_fwd, mla_reduce_v1
+
+            self._mla_prefill_ps_asm_fwd = mla_prefill_ps_asm_fwd
+            self._mla_reduce_v1 = mla_reduce_v1
+
     def _flash_attn_varlen_diff_headdims(
         self, q, k, v, return_softmax_lse=False, softmax_scale=None, **kwargs
     ):
@@ -392,6 +711,157 @@ def _flash_attn_varlen_diff_headdims(
 
         return output
 
+    def _mla_fp8_prefill_attn(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        attn_metadata: AiterMLAMetadata,
+        out: torch.Tensor,
+    ) -> None:
+        """Run FP8 MLA prefill via mla_prefill_ps_asm_fwd + mla_reduce_v1.
+
+        Q, K, V are already decompressed (post-kv_b_proj), so K and V have
+        ``num_heads`` heads (same as Q) and gqa_ratio=1.  Writes the
+        result in-place to ``out``, which is the [total_q, nhead * v_head_dim]
+        output buffer supplied by ``forward_mha``; no extra allocation or
+        copy is required.
+        """
+        from vllm.platforms import current_platform
+        from vllm.v1.worker.workspace import current_workspace_manager
+
+        fp8_dtype = current_platform.fp8_dtype()
+        total_q = q.shape[0]
+        nhead = self.num_heads
+        v_head_dim = self.v_head_dim
+        tile_q = _FP8_PREFILL_TILE_Q
+
+        # The FP8 ASM kernel expects FP8 inputs; the q_scale/k_scale/v_scale
+        # parameters select per-tensor dequant scales.  Q/K/V arrive as
+        # bf16 from kv_b_proj, so cast here (one_scale=1.0 disables scaling).
+        if q.dtype != fp8_dtype:
+            q = q.to(fp8_dtype)
+        if k.dtype != fp8_dtype:
+            k = k.to(fp8_dtype)
+        if v.dtype != fp8_dtype:
+            v = v.to(fp8_dtype)
+
+        one_scale = torch.ones((), dtype=torch.float32, device=q.device)
+
+        # num_partial_tiles is resolved during metadata build to avoid an
+        # in-forward .item() sync that would prevent CUDA Graph capture.
+        # forward_mha gates the FP8 path on fp8_prefill_qo_indptr being set,
+        # and the builder always sets every fp8_prefill_* field together, so
+        # num_partial_tiles is non-None here.
+        num_partial_tiles = attn_metadata.fp8_prefill_num_partial_tiles
+        assert num_partial_tiles is not None
+
+        # Reuse the caller's output buffer to skip the per-call alloc + copy.
+        # The ASM and reduce kernels both write to a [total_q, nhead, v_head_dim]
+        # view, which aliases the [total_q, nhead * v_head_dim] storage of out.
+        out_3d = out.view(total_q, nhead, v_head_dim)
+
+        # Per-call scratch (logits, attn_lse, final_lse) is served from the
+        # workspace manager so allocator churn in the prefill hot path is
+        # bounded after warmup, matching the pattern in PR #41002.
+        logits, attn_lse, final_lse = current_workspace_manager().get_simultaneous(
+            ((num_partial_tiles * tile_q, nhead, v_head_dim), torch.float32),
+            ((num_partial_tiles * tile_q, nhead), torch.float32),
+            ((total_q, nhead), torch.float32),
+        )
+
+        # Phase 1: persistent-scheduling assembly prefill kernel.
+        self._mla_prefill_ps_asm_fwd(
+            q,
+            k,
+            v,
+            attn_metadata.fp8_prefill_qo_indptr,
+            attn_metadata.fp8_prefill_kv_indptr,
+            attn_metadata.fp8_prefill_kv_indices,
+            attn_metadata.fp8_prefill_work_indptr,
+            attn_metadata.fp8_prefill_work_info_set,
+            attn_metadata.fp8_prefill_max_q_len,
+            self.scale,
+            True,  # is_causal
+            logits,
+            attn_lse,
+            out_3d,
+            one_scale,
+            one_scale,
+            one_scale,
+        )
+
+        # Phase 2: reduction across KV splits.
+        self._mla_reduce_v1(
+            logits,
+            attn_lse,
+            attn_metadata.fp8_prefill_reduce_indptr,
+            attn_metadata.fp8_prefill_reduce_final_map,
+            attn_metadata.fp8_prefill_reduce_partial_map,
+            tile_q,
+            out_3d,
+            final_lse,
+        )
+
+    def forward_mha(
+        self,
+        q: torch.Tensor,
+        kv_c_normed: torch.Tensor,
+        k_pe: torch.Tensor,
+        kv_c_and_k_pe_cache: torch.Tensor,
+        attn_metadata: MLACommonMetadata,
+        k_scale: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        """Dispatch prefill to the FP8 ASM kernel when available.
+
+        Falls back to the parent (``flash_attn_varlen_func``) when FP8
+        MLA prefill is disabled, PS metadata is missing, or chunked
+        context requires two-pass merge.
+
+        The annotation uses the base ``MLACommonMetadata`` to honour LSP
+        with ``MLACommonImpl.forward_mha``; the AITER builder always
+        produces ``AiterMLAMetadata`` instances at runtime, so we narrow
+        with ``isinstance`` before reading the AITER-specific FP8 fields.
+        """
+        if (
+            not self._fp8_prefill_enabled
+            or not isinstance(attn_metadata, AiterMLAMetadata)
+            or attn_metadata.fp8_prefill_qo_indptr is None
+        ):
+            return super().forward_mha(
+                q,
+                kv_c_normed,
+                k_pe,
+                kv_c_and_k_pe_cache,
+                attn_metadata,
+                k_scale,
+                output,
+            )
+
+        assert attn_metadata.prefill is not None
+        prefill_metadata = attn_metadata.prefill
+        has_context = prefill_metadata.chunked_context is not None
+
+        if has_context:
+            return super().forward_mha(
+                q,
+                kv_c_normed,
+                k_pe,
+                kv_c_and_k_pe_cache,
+                attn_metadata,
+                k_scale,
+                output,
+            )
+
+        kv_nope = self.kv_b_proj(kv_c_normed)[0].view(
+            -1, self.num_heads, self.qk_nope_head_dim + self.v_head_dim
+        )
+        k_nope, v = kv_nope.split([self.qk_nope_head_dim, self.v_head_dim], dim=-1)
+        k = self._concat_k_nope_k_pe(k_nope, k_pe)
+
+        self._mla_fp8_prefill_attn(q, k, v, attn_metadata, output)
+
     def forward_mqa(
         self,
         q: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
@@ -409,15 +879,11 @@ def forward_mqa(
         assert isinstance(q, torch.Tensor)
         B = q.shape[0]
 
-        if self._needs_head_repeat:
-            q = q.repeat_interleave(self._head_repeat_factor, dim=1)
-            kernel_num_heads = 16
-        else:
-            kernel_num_heads = self.num_heads
-
-        o = torch.zeros(
+        mla_padded_q = AiterMLAHelper.get_mla_padded_q(self.num_heads, q)
+        mla_num_heads = AiterMLAHelper.get_actual_mla_num_heads(self.num_heads)
+        o = torch.empty(
             B,
-            kernel_num_heads,
+            mla_num_heads,
             self.kv_lora_rank,
             dtype=attn_metadata.decode.attn_out_dtype,
             device=q.device,
@@ -425,8 +891,26 @@ def forward_mqa(
 
         kv_buffer = kv_c_and_k_pe_cache.unsqueeze(2)
 
+        # Build kwargs for mla_decode_fwd. Pass persistent metadata only
+        # when it was successfully computed (qseqlen=1 decode steps).
+        # For multi-token verification steps (spec-dec), the kernel falls
+        # back to computing metadata internally.
+        mla_kwargs = dict(
+            q_scale=layer._q_scale,
+            kv_scale=layer._k_scale,
+        )
+        if attn_metadata.work_meta_data is not None:
+            mla_kwargs.update(
+                work_meta_data=attn_metadata.work_meta_data,
+                work_indptr=attn_metadata.work_indptr,
+                work_info_set=attn_metadata.work_info_set,
+                reduce_indptr=attn_metadata.reduce_indptr,
+                reduce_final_map=attn_metadata.reduce_final_map,
+                reduce_partial_map=attn_metadata.reduce_partial_map,
+            )
+
         rocm_aiter_ops.mla_decode_fwd(
-            q,
+            mla_padded_q,
             kv_buffer,
             o,
             self.scale,
@@ -435,17 +919,7 @@ def forward_mqa(
             attn_metadata.decode.paged_kv_indptr,
             attn_metadata.decode.paged_kv_indices,
             attn_metadata.decode.paged_kv_last_page_len,
-            q_scale=layer._q_scale,
-            kv_scale=layer._k_scale,
-            work_meta_data=attn_metadata.work_meta_data,
-            work_indptr=attn_metadata.work_indptr,
-            work_info_set=attn_metadata.work_info_set,
-            reduce_indptr=attn_metadata.reduce_indptr,
-            reduce_final_map=attn_metadata.reduce_final_map,
-            reduce_partial_map=attn_metadata.reduce_partial_map,
+            **mla_kwargs,
         )
 
-        if self._needs_head_repeat:
-            o = o[:, :: self._head_repeat_factor, :]
-
-        return o, None
+        return AiterMLAHelper.get_mla_unpadded_o(self.num_heads, o), None
diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py
index f14271d1bee0..dd25e721d33e 100644
--- a/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py
+++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla_sparse.py
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 
+from vllm import _custom_ops as ops
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import VllmConfig
 from vllm.config.cache import CacheDType
@@ -14,6 +15,7 @@
 from vllm.model_executor.layers.attention.mla_attention import (
     get_mla_dims,
 )
+from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
 from vllm.v1.attention.backend import (
     AttentionBackend,
@@ -25,8 +27,8 @@
     MultipleOf,
     SparseMLAAttentionImpl,
 )
-from vllm.v1.attention.backends.mla.flashmla_sparse import (
-    triton_convert_req_index_to_global_index,
+from vllm.v1.attention.backends.mla.rocm_aiter_mla import (
+    AiterMLAHelper,
 )
 from vllm.v1.kv_cache_interface import AttentionSpec
 
@@ -35,6 +37,188 @@
 logger = init_logger(__name__)
 
 
+@triton.jit
+def _convert_req_index_to_global_index_kernel(
+    req_id_ptr,  # int32 [num_tokens]
+    block_table_ptr,  # int32 [num_requests, max_num_blocks_per_req]
+    token_indices_ptr,  # int32 [num_tokens, NUM_TOPK_TOKENS]
+    cu_seqlens_ptr,  # int32 [num_tokens + 1]
+    out_ptr,  # int32 [num_tokens, NUM_TOPK_TOKENS]
+    # shapes (compile-time where possible)
+    max_num_blocks_per_req: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    BLOCK_N: tl.constexpr,  # tile width along columns
+    # strides (in elements)
+    bt_stride0,
+    bt_stride1,
+    ti_stride0,
+    ti_stride1,
+):
+    # program_id(0) -> token_id (row)
+    # program_id(1) -> tile index along columns
+    token_id = tl.program_id(0)
+    tile_id = tl.program_id(1)
+
+    # Each program covers BLOCK_N consecutive columns
+    indice_id = tile_id * BLOCK_N + tl.arange(0, BLOCK_N)
+
+    # Load request id for this token (no mask: grid is exact)
+    req = tl.load(req_id_ptr + token_id)
+
+    # Load cumulative sequence lengths to get starting index of this request
+    seq_start = tl.load(cu_seqlens_ptr + token_id)
+    seq_end = tl.load(cu_seqlens_ptr + token_id + 1)
+
+    if tile_id * BLOCK_N + seq_start >= seq_end:
+        return
+
+    # Load token indices for this tile
+    ti_ptr = token_indices_ptr + token_id * ti_stride0 + indice_id * ti_stride1
+    tok = tl.load(ti_ptr)  # int32
+
+    # Only token == -1 should propagate as -1
+    is_invalid_tok = tok < 0
+
+    # Compute block id and in-block offset
+    block_id = tok // BLOCK_SIZE
+    inblock_off = tok % BLOCK_SIZE
+
+    # Guard block_table access
+    valid_block = (block_id < max_num_blocks_per_req) & (block_id >= 0)
+    bt_ptr = block_table_ptr + req * bt_stride0 + block_id * bt_stride1
+    base = tl.load(bt_ptr, mask=valid_block, other=0)
+
+    # # If token == -1 OR block_id OOB, output 0; else base * BLOCK_SIZE + offset
+    out_val = tl.where(
+        is_invalid_tok | (~valid_block), 0, base * BLOCK_SIZE + inblock_off
+    )
+    out_ptr_ij = out_ptr + seq_start + indice_id
+    out_ptr_ij_mask = (seq_start + indice_id) < seq_end
+
+    # store the results with mask
+    tl.store(out_ptr_ij, out_val, mask=out_ptr_ij_mask)
+
+
+def triton_convert_req_index_to_global_index(
+    req_id: torch.Tensor,  # int32 [num_tokens]
+    block_table: torch.Tensor,  # int32 [num_requests, max_num_blocks_per_req]
+    token_indices: torch.Tensor,  # int32 [num_tokens, NUM_TOPK_TOKENS]
+    cu_seqlens: torch.Tensor,  # int32 [num_tokens + 1]
+    paged_kv_indices: torch.Tensor,  # int32 [num_tokens * topk] out_buffer
+    BLOCK_SIZE: int = 64,
+    NUM_TOPK_TOKENS: int = 2048,
+    BLOCK_N: int = 128,  # tile width along columns
+):
+    """
+    out[token_id, indice_id] =
+        block_table[req_id[token_id],
+            token_indices[token_id, indice_id] // BLOCK_SIZE] * BLOCK_SIZE
+        + token_indices[token_id, indice_id] % BLOCK_SIZE
+
+    Only when token_indices[token_id, indice_id] == -1 do we output -1.
+    For safety, we also output -1 if the derived block_id would be
+        out-of-bounds.
+    """
+    assert req_id.dtype == torch.int32
+    assert block_table.dtype == torch.int32
+    assert token_indices.dtype == torch.int32
+    assert token_indices.shape[1] == NUM_TOPK_TOKENS
+    assert NUM_TOPK_TOKENS % BLOCK_N == 0, (
+        f"NUM_TOPK_TOKENS ({NUM_TOPK_TOKENS}) must be divisible byBLOCK_N ({BLOCK_N})"
+    )
+    # print("req_id: ", req_id, flush=True)
+    num_tokens = req_id.shape[0]
+    _, max_num_blocks_per_req = block_table.shape
+    tiles_per_row = NUM_TOPK_TOKENS // BLOCK_N
+
+    # Ensure contiguous tensors on the same device
+    req_id_c = req_id.contiguous()
+    block_table_c = block_table.contiguous()
+    token_indices_c = token_indices.contiguous()
+
+    # Strides in elements
+    bt_stride0, bt_stride1 = block_table_c.stride()
+    ti_stride0, ti_stride1 = token_indices_c.stride()
+
+    # Exact 2D grid: tokens × column tiles
+    grid = (num_tokens, tiles_per_row)
+
+    _convert_req_index_to_global_index_kernel[grid](
+        req_id_c,
+        block_table_c,
+        token_indices_c,
+        cu_seqlens,
+        paged_kv_indices,
+        # shapes / constexprs
+        max_num_blocks_per_req,
+        BLOCK_SIZE,
+        BLOCK_N,
+        # strides
+        bt_stride0,
+        bt_stride1,
+        ti_stride0,
+        ti_stride1,
+    )
+    return
+
+
+@triton.jit
+def generate_sparse_seqlen_kernel(
+    seq_len_ptr,  # [num_seq]
+    cu_query_lens_ptr,  # [num_seq]
+    out_ptr,  # [num_query_tokens]
+    topk_token: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    seq_id = tl.program_id(0)
+    query_offset = tl.program_id(1) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    query_start = tl.load(cu_query_lens_ptr + seq_id)
+    query_end = tl.load(cu_query_lens_ptr + seq_id + 1)
+    if query_start + tl.program_id(1) * BLOCK_SIZE > query_end:
+        return
+    query_len = query_end - query_start
+    query_mask = query_offset + query_start < query_end
+    seq_len = tl.load(seq_len_ptr + seq_id)
+    # Just return since the out_ptr is zero initialized.
+    if seq_len == 0:
+        return
+    context_start_point = seq_len - query_len
+    sparse_seqlen = context_start_point + query_offset
+    sparse_seqlen_masked = tl.where(
+        sparse_seqlen + 1 < topk_token, sparse_seqlen + 1, topk_token
+    )
+    tl.store(
+        out_ptr + query_start + query_offset, sparse_seqlen_masked, mask=query_mask
+    )
+
+
+def generate_sparse_seqlen_triton(
+    query_lens: torch.Tensor,
+    seq_lens: torch.Tensor,
+    cu_query_lens: torch.Tensor,
+    topk_token: int,
+    num_tokens: int,
+    max_query_len: int,
+):
+    num_seqs = query_lens.size(0)
+    # zero initialize the tensor to make sure invalid positions will be zero
+    out = torch.zeros([num_tokens], dtype=torch.int32, device=query_lens.device)
+    block_size = 64
+    num_block_per_row = triton.cdiv(max_query_len, block_size)
+    grid = (
+        num_seqs,
+        num_block_per_row,
+    )
+    generate_sparse_seqlen_kernel[grid](
+        seq_lens,
+        cu_query_lens,
+        out,
+        topk_token,
+        block_size,
+    )
+    return out
+
+
 @triton.jit
 def fetch_id_to_ragged_kernel(
     in_tensor_ptr,  # [num_seq, topk]
@@ -78,17 +262,18 @@ def fetch_id_to_ragged_triton(
 
 
 class ROCMAiterMLASparseBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
         "float16",
         "bfloat16",
+        "fp8",
+        "fp8_e4m3",
     ]
 
     @staticmethod
     def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
-        return [1]
+        return [1, 64]
 
     @staticmethod
     def get_name() -> str:
@@ -142,11 +327,20 @@ class ROCMAiterMLASparseMetadata(AttentionMetadata):
     paged_kv_last_page_len: torch.Tensor
     paged_kv_indices: torch.Tensor
     paged_kv_indptr: torch.Tensor
-    paged_kv_indptr_rest: torch.Tensor
+    attn_out_dtype: torch.dtype
 
     block_size: int = 1
     topk_tokens: int = 2048
 
+    # Persistent MLA metadata (only populated when persistent mode is enabled,
+    # i.e. when the aiter sparse decode kernel supports work-stealing splits).
+    work_meta_data: torch.Tensor | None = None
+    work_indptr: torch.Tensor | None = None
+    work_info_set: torch.Tensor | None = None
+    reduce_indptr: torch.Tensor | None = None
+    reduce_final_map: torch.Tensor | None = None
+    reduce_partial_map: torch.Tensor | None = None
+
 
 @dataclass
 class ROCMAiterMLASparseMetadataBuilder(
@@ -165,6 +359,7 @@ def __init__(
     ):
         self.kv_cache_spec = kv_cache_spec
         self.model_config = vllm_config.model_config
+        self.model_dtype = vllm_config.model_config.dtype
         parallel_config = vllm_config.parallel_config
         self.device = device
         max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
@@ -172,9 +367,6 @@ def __init__(
         self.num_heads = self.model_config.get_num_attention_heads(parallel_config)
         self.mla_dims = get_mla_dims(self.model_config)
         self.topk_tokens = vllm_config.model_config.hf_config.index_topk
-        self.topk_tokens_tensor = torch.tensor(
-            [self.topk_tokens], device=device, dtype=torch.int32
-        )
         self.max_model_len_tensor = torch.tensor(
             [self.model_config.max_model_len], device=device, dtype=torch.int32
         )
@@ -206,6 +398,66 @@ def __init__(
             [max_num_batched_tokens + 1], dtype=torch.int32, device=device
         )
 
+        # ----- Persistent MLA metadata buffers -----
+        # The aiter sparse decode kernel supports a "persistent" path that
+        # uses precomputed work-splitting metadata for better load balancing
+        # across CUs. Mirrors the approach used in rocm_aiter_mla.py.
+        #
+        # In the sparse case each query token is its own "batch" entry in the
+        # qo_indptr (qo_indptr = [0, 1, 2, ..., num_tokens]) and max_qo_len=1.
+        # We pad get_mla_metadata_info_v1's batch_size to max_num_batched_tokens
+        # so the buffers are large enough for any decode shape we might see.
+        from aiter import dtypes, get_mla_metadata_info_v1
+
+        # Aiter sparse MLA also requires num_heads >= 16 (will be padded by
+        # AiterMLAHelper.get_mla_padded_q in forward).
+        self._num_attention_heads = max(16, self.num_heads)
+
+        q_dtype = self.model_dtype
+        kv_cache_dtype_str = getattr(vllm_config.cache_config, "cache_dtype", "auto")
+        if kv_cache_dtype_str in ("fp8", "fp8_e4m3", "fp8_e5m2"):
+            kv_cache_dtype_str = "fp8"
+        else:
+            kv_cache_dtype_str = "bf16"
+        kv_dtype = dtypes.d_dtypes.get(kv_cache_dtype_str, dtypes.bf16)
+
+        (
+            (work_meta_data_size, work_meta_data_type),
+            (work_indptr_size, work_indptr_type),
+            (work_info_set_size, work_info_set_type),
+            (reduce_indptr_size, reduce_indptr_type),
+            (reduce_final_map_size, reduce_final_map_type),
+            (reduce_partial_map_size, reduce_partial_map_type),
+        ) = get_mla_metadata_info_v1(
+            max_num_batched_tokens,
+            1,
+            self._num_attention_heads,
+            q_dtype,
+            kv_dtype,
+            is_sparse=True,
+            fast_mode=True,
+        )
+        self._mla_work_meta_data = torch.empty(
+            work_meta_data_size, dtype=work_meta_data_type, device=device
+        )
+        self._mla_work_indptr = torch.empty(
+            work_indptr_size, dtype=work_indptr_type, device=device
+        )
+        self._mla_work_info_set = torch.empty(
+            work_info_set_size, dtype=work_info_set_type, device=device
+        )
+        self._mla_reduce_indptr = torch.empty(
+            reduce_indptr_size, dtype=reduce_indptr_type, device=device
+        )
+        self._mla_reduce_final_map = torch.empty(
+            reduce_final_map_size, dtype=reduce_final_map_type, device=device
+        )
+        self._mla_reduce_partial_map = torch.empty(
+            reduce_partial_map_size,
+            dtype=reduce_partial_map_type,
+            device=device,
+        )
+
     def build(
         self,
         common_prefix_len: int,
@@ -220,18 +472,60 @@ def build(
         )
         # Zero-fill for cudagraphs
         self.req_id_per_token_buffer.fill_(0)
+        self.paged_kv_indices.fill_(0)
+        self.paged_kv_indptr.fill_(0)
         self.req_id_per_token_buffer[: req_id_per_token.shape[0]].copy_(
             torch.from_numpy(req_id_per_token), non_blocking=True
         )
-        self.paged_kv_indices.fill_(0)
-        self.paged_kv_indptr.fill_(0)
+        query_lens = (
+            common_attn_metadata.query_start_loc[1:]
+            - common_attn_metadata.query_start_loc[:-1]
+        )
+        seq_lens = common_attn_metadata.seq_lens
+        sparse_seqlen = generate_sparse_seqlen_triton(
+            query_lens,
+            seq_lens,
+            common_attn_metadata.query_start_loc,
+            self.topk_tokens,
+            num_tokens,
+            common_attn_metadata.max_query_len,
+        )
+
+        torch.cumsum(sparse_seqlen, dim=0, out=self.paged_kv_indptr[1 : num_tokens + 1])
+        self.paged_kv_indptr[num_tokens + 1 :].fill_(self.paged_kv_indptr[num_tokens])
 
         req_id_per_token = self.req_id_per_token_buffer[:num_tokens]
         qo_indptr = self.qo_indptr[: num_tokens + 1]
         paged_kv_last_page_len = self.paged_kv_last_page_len[:num_tokens]
-        paged_kv_indices = self.paged_kv_indices[: num_tokens * self.topk_tokens]
         paged_kv_indptr = self.paged_kv_indptr[: num_tokens + 1]
-        paged_kv_indptr_rest = self.paged_kv_indptr[num_tokens + 1 :]
+        paged_kv_indices = self.paged_kv_indices[: num_tokens * self.topk_tokens]
+
+        # ----- Compute persistent MLA metadata -----
+        # The aiter sparse decode kernel uses qseqlen=1 (each query token is
+        # treated as its own batch entry), so persistent metadata can always
+        # be precomputed here. The kernel switches to the persistent
+        # work-stealing path automatically when work_meta_data is non-None.
+        from aiter import get_mla_metadata_v1
+
+        get_mla_metadata_v1(
+            qo_indptr,
+            paged_kv_indptr,
+            paged_kv_last_page_len,
+            self._num_attention_heads,
+            1,
+            True,
+            self._mla_work_meta_data,
+            self._mla_work_info_set,
+            self._mla_work_indptr,
+            self._mla_reduce_indptr,
+            self._mla_reduce_final_map,
+            self._mla_reduce_partial_map,
+            page_size=1,
+            kv_granularity=16,
+            max_seqlen_qo=1,
+            uni_seqlen_qo=1,
+            fast_mode=True,
+        )
 
         metadata = ROCMAiterMLASparseMetadata(
             num_reqs=common_attn_metadata.num_reqs,
@@ -243,12 +537,18 @@ def build(
             block_table=common_attn_metadata.block_table_tensor,
             req_id_per_token=req_id_per_token,
             block_size=self.kv_cache_spec.block_size,
+            attn_out_dtype=self.model_dtype,
             topk_tokens=self.topk_tokens,
             qo_indptr=qo_indptr,
             paged_kv_last_page_len=paged_kv_last_page_len,
             paged_kv_indices=paged_kv_indices,
             paged_kv_indptr=paged_kv_indptr,
-            paged_kv_indptr_rest=paged_kv_indptr_rest,
+            work_meta_data=self._mla_work_meta_data,
+            work_indptr=self._mla_work_indptr,
+            work_info_set=self._mla_work_info_set,
+            reduce_indptr=self._mla_reduce_indptr,
+            reduce_final_map=self._mla_reduce_final_map,
+            reduce_partial_map=self._mla_reduce_partial_map,
         )
         return metadata
 
@@ -300,6 +600,8 @@ def __init__(
         indexer: "Indexer | None" = None,
         **mla_args,
     ) -> None:
+        AiterMLAHelper.check_num_heads_validity(num_heads)
+
         self.num_heads = num_heads
         self.head_size = head_size
         self.scale = float(scale)
@@ -310,28 +612,37 @@ def __init__(
         assert indexer is not None
         self.topk_indices_buffer: torch.Tensor | None = indexer.topk_indices_buffer
 
-    def _forward_bf16_kv(
+    def _forward_mla(
         self,
+        layer: AttentionLayer,
         q: torch.Tensor,  # [sq, heads, d_qk]
         kv_c_and_k_pe_cache: torch.Tensor,  # [blocks, heads, d_qk]
-        topk_indices: torch.Tensor,  # [sq, topk]
         attn_metadata: ROCMAiterMLASparseMetadata,
     ) -> torch.Tensor:
         num_tokens = q.shape[0]
+        mla_num_heads = AiterMLAHelper.get_actual_mla_num_heads(self.num_heads)
         output = torch.empty(
-            [num_tokens, self.num_heads, self.kv_lora_rank],
-            dtype=q.dtype,
+            [num_tokens, mla_num_heads, self.kv_lora_rank],
+            dtype=attn_metadata.attn_out_dtype,
             device=q.device,
         )
-        seq_len = (topk_indices != -1).sum(dim=-1)
-        torch.cumsum(seq_len, dim=0, out=attn_metadata.paged_kv_indptr[1:])
-        attn_metadata.paged_kv_indptr_rest.fill_(attn_metadata.paged_kv_indptr[-1])
-        fetch_id_to_ragged_triton(
-            topk_indices,
-            attn_metadata.paged_kv_indptr,
-            attn_metadata.paged_kv_indices,
-            attn_metadata.topk_tokens,
+
+        # Build kwargs and forward the persistent MLA metadata when it has
+        # been computed. The aiter mla_decode_fwd switches to its
+        # work-stealing persistent kernel path when work_meta_data is given.
+        mla_kwargs: dict = dict(
+            q_scale=layer._q_scale,
+            kv_scale=layer._k_scale,
         )
+        if attn_metadata.work_meta_data is not None:
+            mla_kwargs.update(
+                work_meta_data=attn_metadata.work_meta_data,
+                work_indptr=attn_metadata.work_indptr,
+                work_info_set=attn_metadata.work_info_set,
+                reduce_indptr=attn_metadata.reduce_indptr,
+                reduce_final_map=attn_metadata.reduce_final_map,
+                reduce_partial_map=attn_metadata.reduce_partial_map,
+            )
 
         rocm_aiter_ops.mla_decode_fwd(
             q,
@@ -343,9 +654,10 @@ def _forward_bf16_kv(
             attn_metadata.paged_kv_indptr,
             attn_metadata.paged_kv_indices,
             attn_metadata.paged_kv_last_page_len,
+            **mla_kwargs,
         )
 
-        return output[:, : self.num_heads, :]
+        return AiterMLAHelper.get_mla_unpadded_o(self.num_heads, output)
 
     def forward_mqa(
         self,
@@ -361,22 +673,32 @@ def forward_mqa(
         if isinstance(q, tuple):
             q = torch.cat(q, dim=-1)
 
-        num_actual_toks = q.shape[0]
+        num_actual_toks = attn_metadata.num_actual_tokens
 
         # Get topk indices
         assert self.topk_indices_buffer is not None
         topk_indices = self.topk_indices_buffer[:num_actual_toks]
 
-        topk_indices_global = triton_convert_req_index_to_global_index(
+        triton_convert_req_index_to_global_index(
             attn_metadata.req_id_per_token,
             attn_metadata.block_table,
             topk_indices,
+            attn_metadata.paged_kv_indptr,
+            attn_metadata.paged_kv_indices,
             BLOCK_SIZE=attn_metadata.block_size,
             NUM_TOPK_TOKENS=attn_metadata.topk_tokens,
         )
 
-        attn_out = self._forward_bf16_kv(
-            q, kv_c_and_k_pe_cache, topk_indices_global, attn_metadata
+        # write the latent and rope to kv cache
+        fp8_attention = self.kv_cache_dtype.startswith("fp8")
+        if fp8_attention:
+            original_q_shape = q.shape
+            kv_c_and_k_pe_cache = kv_c_and_k_pe_cache.view(current_platform.fp8_dtype())
+            q, _ = ops.scaled_fp8_quant(q.view(q.shape[0], -1), layer._q_scale)
+            q = q.view(original_q_shape)
+        mla_padded_q = AiterMLAHelper.get_mla_padded_q(self.num_heads, q)
+        attn_out = self._forward_mla(
+            layer, mla_padded_q, kv_c_and_k_pe_cache, attn_metadata
         )
 
         return attn_out, None
diff --git a/vllm/v1/attention/backends/mla/sparse_swa.py b/vllm/v1/attention/backends/mla/sparse_swa.py
new file mode 100644
index 000000000000..f0e444e493c4
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/sparse_swa.py
@@ -0,0 +1,503 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+from typing import ClassVar, cast
+
+import torch
+
+from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config
+from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    AttentionCGSupport,
+    AttentionMetadataBuilder,
+    CommonAttentionMetadata,
+    MultipleOf,
+)
+from vllm.v1.attention.backends.utils import split_decodes_and_prefills
+from vllm.v1.attention.ops.flashmla import FlashMLASchedMeta, get_mla_metadata
+from vllm.v1.kv_cache_interface import (
+    KVCacheSpec,
+    MLAAttentionSpec,
+    SlidingWindowMLASpec,
+)
+
+# DeepseekV4 decode layer types, keyed by compress_ratio. Each type has a distinct
+# (topk, extra_topk, extra_page_block_size) config, so they cannot share a
+# FlashMLA tile-scheduler plan. Within a type, all ~60 DeepseekV4 layers share one
+# plan per step because b / s_q / h_q / page_block_sizes / topks are identical.
+_LAYER_TYPE_SWAONLY = "swaonly"
+_LAYER_TYPE_C4A = "c4a"
+_LAYER_TYPE_C128A = "c128a"
+
+
+def _layer_type_for(compress_ratio: int) -> str:
+    if compress_ratio <= 1:
+        return _LAYER_TYPE_SWAONLY
+    if compress_ratio == 4:
+        return _LAYER_TYPE_C4A
+    if compress_ratio == 128:
+        return _LAYER_TYPE_C128A
+    raise ValueError(
+        f"Unsupported DeepseekV4 compress_ratio={compress_ratio}; "
+        "expected 1, 4, or 128."
+    )
+
+
+class DeepseekV4SWACache(torch.nn.Module, AttentionLayerBase):
+    def __init__(
+        self,
+        head_dim: int,
+        window_size: int,
+        dtype: torch.dtype,
+        prefix: str,
+        cache_config: CacheConfig,
+    ):
+        super().__init__()
+        self.kv_cache = torch.tensor([])
+        self.head_dim = head_dim
+        self.window_size = window_size
+        self.prefix = prefix
+        self.cache_config = cache_config
+        self.dtype = dtype
+        compilation_config = get_current_vllm_config().compilation_config
+        if prefix in compilation_config.static_forward_context:
+            raise ValueError(f"Duplicate layer name: {prefix}")
+        compilation_config.static_forward_context[prefix] = self
+
+        # Block size is constrained by tensor sharing between SWA and C4A KV blocks.
+        # Since both block types share the same physical tensor, they must use the
+        # same page size. The C4A KV block shape [256//4, head_dim] = [64, head_dim]
+        # determines the SWA block size of 64 tokens per block.
+        # TODO(yifan): make SWA block size automatically determined and configurable.
+        self.block_size = 64
+        assert self.dtype == torch.uint8
+
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec:
+        return SlidingWindowMLASpec(
+            block_size=self.block_size,
+            num_kv_heads=1,
+            head_size=self.head_dim,
+            dtype=self.dtype,
+            sliding_window=self.window_size,
+            cache_dtype_str=self.cache_config.cache_dtype,
+            alignment=576,  # NOTE: FlashMLA requires 576B alignment
+            model_version="deepseek_v4",
+        )
+
+    def forward(self): ...
+
+    def get_attn_backend(self) -> type[AttentionBackend]:
+        return DeepseekSparseSWABackend
+
+
+class DeepseekSparseSWABackend(AttentionBackend):
+    @staticmethod
+    def get_name() -> str:
+        return "DEEPSEEK_SPARSE_SWA"
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [MultipleOf(64)]
+
+    @classmethod
+    def get_preferred_block_size(cls, default_block_size: int) -> int:
+        return 256
+
+    @classmethod
+    def get_supported_head_sizes(cls) -> list[int]:
+        return [512]
+
+    @staticmethod
+    def get_builder_cls() -> type["DeepseekSparseSWAMetadataBuilder"]:
+        if current_platform.is_rocm():
+            from vllm.models.deepseek_v4.amd.rocm import (
+                DeepseekV4ROCMAiterSparseSWAMetadataBuilder,
+            )
+
+            return DeepseekV4ROCMAiterSparseSWAMetadataBuilder
+        return DeepseekSparseSWAMetadataBuilder
+
+    @staticmethod
+    def get_kv_cache_shape(
+        num_blocks: int,
+        block_size: int,
+        num_kv_heads: int,
+        head_size: int,
+        cache_dtype_str: str = "auto",
+    ) -> tuple[int, ...]:
+        assert num_kv_heads == 1
+        if cache_dtype_str == "fp8_ds_mla":
+            # DeepseekV4 SWA: 584B per token (448 NoPE + 128 RoPE + 8 fp8 scale).
+            # head_size passed in is the semantic head_dim (512).
+            return (num_blocks, block_size, 584)
+        else:
+            return (num_blocks, block_size, head_size)
+
+    @staticmethod
+    def get_kv_cache_stride_order(
+        include_num_layers_dimension: bool = False,
+    ) -> tuple[int, ...]:
+        if include_num_layers_dimension:
+            return (0, 1, 2, 3)
+        return (0, 1, 2)
+
+
+@dataclass
+class DeepseekSparseSWAMetadata:
+    block_table: torch.Tensor
+    slot_mapping: torch.Tensor
+    block_size: int
+    seq_lens: torch.Tensor | None = None  # [num_seqs]
+    query_start_loc: torch.Tensor | None = None  # [num_seqs + 1]
+    query_start_loc_cpu: torch.Tensor | None = None  # [num_seqs + 1]
+
+    is_valid_token: torch.Tensor | None = None  # [num_tokens]
+    token_to_req_indices: torch.Tensor | None = None  # [num_tokens]
+    decode_swa_indices: torch.Tensor | None = None  # [num_decode_tokens, window_size]
+    decode_swa_lens: torch.Tensor | None = None  # [num_decode_tokens]
+
+    # Number of decode/prefill requests/tokens (batch is reordered: decodes first)
+    num_decodes: int = 0
+    num_prefills: int = 0
+    num_decode_tokens: int = 0
+    num_prefill_tokens: int = 0
+
+    # Pre-computed prefill metadata shared across all DeepseekV4 attention layers.
+    prefill_seq_lens: torch.Tensor | None = None
+    prefill_gather_lens: torch.Tensor | None = None
+
+    # Per-layer-type FlashMLA tile-scheduler metadata. One FlashMLASchedMeta
+    # per present DeepseekV4 layer type, shared across all ~60 layers of that type
+    # within a decode step. The first forward call of a given type triggers
+    # the in-kernel planner (which also allocates tile_scheduler_metadata and
+    # num_splits via PyTorch's graph-aware allocator); subsequent same-type
+    # calls skip planning and reuse the plan. Fresh instance per build(), so
+    # have_initialized is always False at the start of a step and the plan
+    # is re-derived from current seq_lens / topk_length on replay.
+    # None for layer types the model does not use (or when num_decode_tokens
+    # is zero).
+    tile_sched_swaonly: "FlashMLASchedMeta | None" = None
+    tile_sched_c4a: "FlashMLASchedMeta | None" = None
+    tile_sched_c128a: "FlashMLASchedMeta | None" = None
+
+
+class DeepseekSparseSWAMetadataBuilder(AttentionMetadataBuilder):
+    """Builds metadata for DeepseekV4 SWA cache.
+
+    Similar to the indexer, this handles mixed batches by:
+    1. Using split_decodes_and_prefills() to determine the boundary
+    2. Building separate metadata for decode and prefill portions
+
+    Supports:
+    - Mixed decode/prefill batches
+    - MTP (Multi-Token Prediction) where decode has query_len > 1
+    - Chunked prefill (aligns with the indexer's chunking)
+    """
+
+    # Base threshold: query_len <= 1 is decode
+    reorder_batch_threshold: int = 1
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert isinstance(self.kv_cache_spec, SlidingWindowMLASpec | MLAAttentionSpec)
+        mla_spec = cast(SlidingWindowMLASpec | MLAAttentionSpec, self.kv_cache_spec)
+        self.head_size = mla_spec.head_size  # Already considered quantization.
+        self.compress_ratio = mla_spec.compress_ratio
+        self.block_size = mla_spec.block_size
+
+        # Handle MTP: adjust decode_threshold like the indexer does
+        self.num_speculative_tokens = (
+            self.vllm_config.speculative_config.num_speculative_tokens
+            if self.vllm_config.speculative_config
+            else 0
+        )
+        # With MTP, decode can have query_len up to 1 + num_speculative_tokens.
+        # Must match the threshold used by the indexer and flashmla_sparse so
+        # that all backends agree on the decode/prefill split.
+        self.decode_threshold = (
+            self.reorder_batch_threshold + self.num_speculative_tokens
+        )
+
+        hf_config = self.vllm_config.model_config.hf_config
+        assert hasattr(hf_config, "sliding_window")
+        self.window_size = hf_config.sliding_window
+
+        # Detect which DeepseekV4 layer types this model uses so we only build a
+        # FlashMLA tile-scheduler plan for types that will actually be called.
+        # Models without compress_ratios (pure SWA) fall back to swaonly.
+        compress_ratios = getattr(hf_config, "compress_ratios", None) or [1]
+        self._layer_types: set[str] = set()
+        for ratio in compress_ratios:
+            self._layer_types.add(_layer_type_for(int(ratio)))
+
+        max_tokens = self.vllm_config.scheduler_config.max_num_batched_tokens
+        self.token_to_req_indices = torch.zeros(
+            max_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+        self.decode_swa_indices = torch.zeros(
+            max_tokens,
+            1,
+            self.window_size,
+            dtype=torch.int32,
+            device=self.device,
+        )
+        self.decode_swa_lens = torch.zeros(
+            max_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+        self.is_valid_token = torch.zeros(
+            max_tokens,
+            dtype=torch.bool,
+            device=self.device,
+        )
+
+    def build(
+        self,
+        common_prefix_len: int,
+        common_attn_metadata: CommonAttentionMetadata,
+        fast_build: bool = False,
+    ) -> DeepseekSparseSWAMetadata:
+        """Build SWA metadata for mixed decode/prefill batches.
+
+        The batch is assumed to be reordered with decodes first (by vLLM scheduler).
+        We use split_decodes_and_prefills() to find the boundary, then build
+        separate window_topk_idxs for each portion.
+
+        For prefill, we use chunked prefill to align with the indexer's chunking.
+        """
+        num_reqs = common_attn_metadata.num_reqs
+        seq_lens = common_attn_metadata.seq_lens
+        query_start_loc = common_attn_metadata.query_start_loc
+        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
+        block_table = common_attn_metadata.block_table_tensor
+        slot_mapping = common_attn_metadata.slot_mapping
+
+        # Split into decode and prefill portions using configurable threshold
+        (num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens) = (
+            split_decodes_and_prefills(
+                common_attn_metadata, decode_threshold=self.decode_threshold
+            )
+        )
+
+        # NOTE: Ensure all metadata tensors maintain fixed memory addresses
+        # for CUDA graph compatibility.
+        query_lens = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
+        x = torch.repeat_interleave(torch.arange(num_reqs), query_lens).pin_memory()
+        token_to_req_indices = self.token_to_req_indices[: x.shape[0]]
+        token_to_req_indices.copy_(x, non_blocking=True)
+
+        is_valid_token = self.is_valid_token[: slot_mapping.shape[0]]
+        is_valid_token.copy_(slot_mapping >= 0)
+
+        if num_decode_tokens > 0:
+            self.decode_swa_lens[num_decode_tokens:] = 0
+            _compute_swa_indices_and_lens_kernel[(num_decode_tokens,)](
+                self.decode_swa_indices,
+                self.decode_swa_indices.stride(0),
+                self.decode_swa_lens,
+                self.window_size,
+                query_start_loc,
+                seq_lens,
+                token_to_req_indices,
+                is_valid_token,
+                block_table,
+                block_table.stride(0),
+                self.block_size,
+                TRITON_BLOCK_SIZE=1024,
+            )
+
+        # Pre-compute DeepseekV4 prefill metadata shared across all attention layers.
+        deepseek_v4_fields = self._build_deepseek_v4_metadata(
+            num_decodes,
+            num_prefills,
+            seq_lens,
+            query_start_loc,
+        )
+
+        # Per-layer-type tile-scheduler plan holders. Empty FlashMLASchedMeta
+        # per present DeepseekV4 layer type; the first flash_mla_with_kvcache call of
+        # each type triggers the planner and all same-type layers reuse the
+        # resulting plan for the rest of the step.
+        tile_sched = self.build_tile_scheduler(num_decode_tokens)
+
+        return DeepseekSparseSWAMetadata(
+            seq_lens=seq_lens,
+            query_start_loc=query_start_loc,
+            query_start_loc_cpu=query_start_loc_cpu,
+            block_table=block_table,
+            slot_mapping=slot_mapping,
+            is_valid_token=is_valid_token,
+            token_to_req_indices=token_to_req_indices,
+            decode_swa_indices=self.decode_swa_indices[:num_decode_tokens],
+            decode_swa_lens=self.decode_swa_lens[:num_decode_tokens],
+            block_size=self.block_size,
+            num_decodes=num_decodes,
+            num_prefills=num_prefills,
+            num_decode_tokens=num_decode_tokens,
+            num_prefill_tokens=num_prefill_tokens,
+            tile_sched_swaonly=tile_sched[_LAYER_TYPE_SWAONLY],
+            tile_sched_c4a=tile_sched[_LAYER_TYPE_C4A],
+            tile_sched_c128a=tile_sched[_LAYER_TYPE_C128A],
+            **deepseek_v4_fields,
+        )
+
+    def build_tile_scheduler(
+        self, num_decode_tokens: int
+    ) -> dict[str, FlashMLASchedMeta | None]:
+        """Allocate one empty ``FlashMLASchedMeta`` per present DeepseekV4 layer type.
+
+        Returned instances have ``tile_scheduler_metadata`` / ``num_splits``
+        set to ``None``; the FlashMLA C++ decode path will allocate them and
+        run the tile-scheduler planner on the first ``flash_mla_with_kvcache``
+        call of each type. Subsequent same-type calls reuse the plan because
+        the tensors (and ``have_initialized``) are populated on the struct.
+
+        Returns all-``None`` when there are no decode tokens this step, so
+        ``_forward_decode`` sees a clean sentinel.
+        """
+        out: dict[str, FlashMLASchedMeta | None] = {
+            _LAYER_TYPE_SWAONLY: None,
+            _LAYER_TYPE_C4A: None,
+            _LAYER_TYPE_C128A: None,
+        }
+        if (
+            num_decode_tokens == 0
+            or current_platform.is_rocm()
+            or current_platform.is_xpu()
+        ):
+            return out
+        for layer_type in self._layer_types:
+            # get_mla_metadata() is the official FlashMLA entry point that
+            # returns a fresh empty FlashMLASchedMeta; using it keeps this
+            # call site aligned with the rest of the vLLM FlashMLA backends
+            # that already go through the same stub.
+            out[layer_type] = get_mla_metadata()[0]
+        return out
+
+    def _build_deepseek_v4_metadata(
+        self,
+        num_decodes: int,
+        num_prefills: int,
+        seq_lens: torch.Tensor,
+        query_start_loc: torch.Tensor,
+    ) -> dict[str, torch.Tensor | None]:
+        """Pre-compute DeepseekV4 prefill metadata during the metadata build phase.
+
+        Returns a dict of keyword arguments to pass to the
+        DeepseekSparseSWAMetadata constructor.
+
+        Note: C128A topk indices are computed by the FlashMLASparse builder
+        (which owns the C128A block_table), not here.
+        """
+        result: dict[str, torch.Tensor | None] = {}
+
+        # --- Prefill query metadata (single Triton kernel + CPU slicing) ---
+        if num_prefills > 0:
+            pfx_gather_lens = torch.empty(
+                num_prefills, dtype=torch.int32, device=seq_lens.device
+            )
+            _compute_prefill_metadata_kernel[(1,)](
+                pfx_gather_lens,
+                seq_lens,
+                query_start_loc,
+                num_prefills,
+                num_decodes,
+                self.window_size,
+                BLOCK_SIZE=triton.next_power_of_2(num_prefills),
+            )
+
+            result["prefill_seq_lens"] = seq_lens[num_decodes:]
+            result["prefill_gather_lens"] = pfx_gather_lens
+
+        return result
+
+
+@triton.jit
+def _compute_prefill_metadata_kernel(
+    # Outputs
+    prefill_gather_lens_ptr,
+    # Inputs
+    seq_lens_ptr,
+    query_start_loc_ptr,
+    num_prefills,
+    num_decodes,
+    window_size,
+    BLOCK_SIZE: tl.constexpr,
+):
+    """Compute prefill gather_lens in a single pass."""
+    offset = tl.arange(0, BLOCK_SIZE)
+    mask = offset < num_prefills
+
+    seq_len = tl.load(seq_lens_ptr + num_decodes + offset, mask=mask)
+    qsl_start = tl.load(query_start_loc_ptr + num_decodes + offset, mask=mask)
+    qsl_end = tl.load(query_start_loc_ptr + num_decodes + offset + 1, mask=mask)
+
+    query_len = qsl_end - qsl_start
+    prefix_len = seq_len - query_len
+    gather_len = query_len + tl.minimum(prefix_len, window_size - 1)
+
+    tl.store(prefill_gather_lens_ptr + offset, gather_len, mask=mask)
+
+
+@triton.jit
+def _compute_swa_indices_and_lens_kernel(
+    swa_indices_ptr,
+    swa_indices_stride,
+    swa_lens_ptr,
+    window_size,
+    query_start_loc_ptr,
+    seq_lens_ptr,
+    token_to_req_indices_ptr,
+    is_valid_token_ptr,
+    block_table_ptr,
+    block_table_stride,
+    block_size,
+    TRITON_BLOCK_SIZE: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    is_valid = tl.load(is_valid_token_ptr + token_idx)
+    if not is_valid:
+        tl.store(swa_lens_ptr + token_idx, 0)
+        return
+
+    req_idx = tl.load(token_to_req_indices_ptr + token_idx)
+
+    query_start = tl.load(query_start_loc_ptr + req_idx)
+    query_end = tl.load(query_start_loc_ptr + req_idx + 1)
+    query_len = query_end - query_start
+
+    seq_len = tl.load(seq_lens_ptr + req_idx)
+    prefix_len = seq_len - query_len
+
+    pos = prefix_len + token_idx - query_start
+    start_pos = tl.maximum(pos - window_size + 1, 0)
+    end_pos = pos + 1
+
+    swa_len = end_pos - start_pos
+    tl.store(swa_lens_ptr + token_idx, swa_len)
+
+    for i in range(0, window_size, TRITON_BLOCK_SIZE):
+        offset = i + tl.arange(0, TRITON_BLOCK_SIZE)
+
+        pos_offset = start_pos + offset
+        block_indices = pos_offset // block_size
+        block_numbers = tl.load(
+            block_table_ptr + req_idx * block_table_stride + block_indices,
+            mask=pos_offset < end_pos,
+        )
+        block_offsets = pos_offset % block_size
+        slot_ids = block_numbers * block_size + block_offsets
+
+        slot_ids = tl.where(offset < swa_len, slot_ids, -1)
+        tl.store(
+            swa_indices_ptr + token_idx * swa_indices_stride + offset,
+            slot_ids,
+            mask=offset < window_size,
+        )
diff --git a/vllm/v1/attention/backends/mla/tokenspeed_mla.py b/vllm/v1/attention/backends/mla/tokenspeed_mla.py
new file mode 100644
index 000000000000..6c8dedd77f27
--- /dev/null
+++ b/vllm/v1/attention/backends/mla/tokenspeed_mla.py
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TokenSpeed CuTe DSL MLA decode backend (Blackwell, FP8 KV cache only)."""
+
+from typing import ClassVar
+
+import torch
+
+from vllm.config.cache import CacheDType
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention.mla_attention import (
+    MLACommonBackend,
+    MLACommonImpl,
+    MLACommonMetadata,
+    MLACommonMetadataBuilder,
+    QueryLenSupport,
+)
+from vllm.platforms.interface import DeviceCapability
+from vllm.utils.torch_utils import is_quantized_kv_cache
+from vllm.v1.attention.backend import (
+    AttentionCGSupport,
+    AttentionLayer,
+    AttentionType,
+    MultipleOf,
+)
+from vllm.v1.attention.backends.utils import KVCacheLayoutType
+
+logger = init_logger(__name__)
+
+# Workspace upper bound for tokenspeed_mla_decode (per-device, lazy):
+#   num_sms * num_heads * MAX_Q_LEN * (kv_lora_rank + 1) * sizeof(float32)
+# Matches the kernel's `get_workspace_size` formula. MAX_Q_LEN=8 covers up to
+# EAGLE3 / MTP-2 spec decoding query lengths; larger q_len fails the kernel's
+# own buffer check.
+_TOKENSPEED_MAX_Q_LEN = 8
+
+_g_workspace: dict[torch.device, torch.Tensor] = {}
+
+
+def _get_workspace(
+    device: torch.device, num_heads: int, kv_lora_rank: int
+) -> torch.Tensor:
+    from tokenspeed_mla import get_num_sm
+
+    needed = (
+        get_num_sm(device) * num_heads * _TOKENSPEED_MAX_Q_LEN * (kv_lora_rank + 1) * 4
+    )
+    existing = _g_workspace.get(device)
+    if existing is None or existing.numel() < needed:
+        _g_workspace[device] = torch.empty(needed, dtype=torch.int8, device=device)
+    return _g_workspace[device]
+
+
+class TokenspeedMLAMetadataBuilder(MLACommonMetadataBuilder[MLACommonMetadata]):
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
+    query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
+
+
+class TokenspeedMLABackend(MLACommonBackend):
+    supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
+    supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
+        "fp8",
+        "fp8_e4m3",
+    ]
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [32, 64]
+
+    @staticmethod
+    def get_name() -> str:
+        return "TOKENSPEED_MLA"
+
+    @staticmethod
+    def get_impl_cls() -> type["TokenspeedMLAImpl"]:
+        return TokenspeedMLAImpl
+
+    @staticmethod
+    def get_builder_cls() -> type["TokenspeedMLAMetadataBuilder"]:
+        return TokenspeedMLAMetadataBuilder
+
+    @classmethod
+    def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
+        return capability.major == 10
+
+    @classmethod
+    def supports_combination(
+        cls,
+        head_size: int,
+        dtype: torch.dtype,
+        kv_cache_dtype: CacheDType | None,
+        block_size: int | None,
+        use_mla: bool,
+        has_sink: bool,
+        use_sparse: bool,
+        device_capability: DeviceCapability,
+    ) -> str | None:
+        # Surface a clear install hint up front rather than letting a raw
+        # ModuleNotFoundError fire deep inside `forward_mqa` at first request.
+        try:
+            import tokenspeed_mla  # noqa: F401
+        except ImportError:
+            return (
+                "tokenspeed_mla package is not installed. "
+                "Install it with: `uv pip install tokenspeed-mla`"
+            )
+
+        # tokenspeed_mla CuTe DSL kernel is shape-specialized for DeepSeek R1
+        # MLA dimensions (qk_nope=128, qk_rope=64, v=128). Reject anything else.
+        from vllm.config import get_current_vllm_config
+
+        vllm_config = get_current_vllm_config()
+        if vllm_config.model_config is not None:
+            hf_text_config = vllm_config.model_config.hf_text_config
+            qk_nope_head_dim = getattr(hf_text_config, "qk_nope_head_dim", 0)
+            qk_rope_head_dim = getattr(hf_text_config, "qk_rope_head_dim", 0)
+            v_head_dim = getattr(hf_text_config, "v_head_dim", 0)
+            if qk_nope_head_dim != 128 or qk_rope_head_dim != 64 or v_head_dim != 128:
+                return (
+                    "tokenspeed_mla requires DeepSeek R1 MLA dimensions "
+                    "(qk_nope_head_dim=128, qk_rope_head_dim=64, v_head_dim=128), "
+                    f"got ({qk_nope_head_dim}, {qk_rope_head_dim}, {v_head_dim})"
+                )
+        return None
+
+    @classmethod
+    def get_required_kv_cache_layout(cls) -> "KVCacheLayoutType | None":
+        return "HND"
+
+
+class TokenspeedMLAImpl(MLACommonImpl[MLACommonMetadata]):
+    def __init__(
+        self,
+        num_heads: int,
+        head_size: int,
+        scale: float,
+        num_kv_heads: int,
+        alibi_slopes: list[float] | None,
+        sliding_window: int | None,
+        kv_cache_dtype: str,
+        logits_soft_cap: float | None,
+        attn_type: str,
+        kv_sharing_target_layer_name: str | None,
+        # MLA Specific Arguments
+        **mla_args,
+    ) -> None:
+        super().__init__(
+            num_heads,
+            head_size,
+            scale,
+            num_kv_heads,
+            alibi_slopes,
+            sliding_window,
+            kv_cache_dtype,
+            logits_soft_cap,
+            attn_type,
+            kv_sharing_target_layer_name,
+            **mla_args,
+        )
+
+        unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
+        if any(unsupported_features):
+            raise NotImplementedError(
+                "TokenspeedMLAImpl does not support one of the following: "
+                "alibi_slopes, sliding_window, logits_soft_cap"
+            )
+
+        if attn_type != AttentionType.DECODER:
+            raise NotImplementedError(
+                "Encoder self-attention and "
+                "encoder/decoder cross-attention "
+                "are not implemented for "
+                "TokenspeedMLAImpl"
+            )
+
+        if not is_quantized_kv_cache(self.kv_cache_dtype):
+            raise NotImplementedError(
+                "TokenspeedMLAImpl requires an FP8 KV cache "
+                "(--kv-cache-dtype fp8 or fp8_e4m3); "
+                f"got kv_cache_dtype={self.kv_cache_dtype!r}."
+            )
+
+        # Allocate (or fetch the cached) workspace lazily on first forward —
+        # __init__ runs before the device is necessarily set on the worker;
+        # we know it for sure at forward time when we see the input tensor.
+        self._workspace_buffer: torch.Tensor | None = None
+        self.softmax_scale: float | None = None
+        self.output_scale: float | None = None
+
+        # Pre-JIT BF16 and FP8 prefill kernels here too — decode impl always
+        # runs when tokenspeed is selected, prefill backend may not (user can
+        # pair with flash_attn / trtllm). Idempotent.
+        from tokenspeed_mla import warmup_compile_prefill
+
+        for q_dtype in (torch.bfloat16, torch.float8_e4m3fn):
+            warmup_compile_prefill(
+                q_dtype=q_dtype,
+                d_qk=self.qk_nope_head_dim + self.qk_rope_head_dim,
+                d_v=self.v_head_dim,
+                enable_pdl=False,
+            )
+
+    def forward_mqa(
+        self,
+        q: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
+        kv_c_and_k_pe_cache: torch.Tensor,
+        attn_metadata: MLACommonMetadata,
+        layer: AttentionLayer,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        from tokenspeed_mla import tokenspeed_mla_decode
+
+        assert kv_c_and_k_pe_cache.numel() > 0
+        assert attn_metadata.decode is not None
+
+        if isinstance(q, tuple):
+            q_nope, q_pe = q
+            q = torch.cat([q_nope, q_pe], dim=-1)
+
+        # supports_quant_query_input=True (set in MLACommonImpl) tells the
+        # pipeline to concat+FP8-quantize Q upstream via _decode_concat_quant_fp8_op.
+        # The kernel is shape-specialized for FP8 Q + FP8 KV, so anything else
+        # here means the upstream quant didn't run and the kernel will produce
+        # garbage.
+        assert q.dtype == torch.float8_e4m3fn, (
+            f"TokenspeedMLAImpl expected FP8 query (supports_quant_query_input=True), "
+            f"got {q.dtype}. Pipeline isinstance(q, tuple)={isinstance(q, tuple)}, "
+            f"q_scale={layer._q_scale_float}, k_scale={layer._k_scale_float}."
+        )
+
+        # tokenspeed_mla_decode expects query shape
+        # (num_decodes, q_len_per_request, num_heads, head_dim).
+        if attn_metadata.num_decode_tokens % attn_metadata.num_decodes != 0:
+            logger.warning_once(
+                """TokenspeedMLAImpl got a query of uneven length.
+                This usually indicates an issue in batch reordering
+                or incorrect setup in dummy_run."""
+            )
+            q = q.unsqueeze(1)
+        else:
+            q = q.view(attn_metadata.num_decodes, -1, q.shape[-2], q.shape[-1])
+
+        if self.softmax_scale is None:
+            # FP8 KV cache is mandatory for this backend, so q_scale/k_scale
+            # always apply. softmax_scale is bmm1; output_scale is bmm2 — both
+            # required to recover the correct attention output from the FP8
+            # KV cache (V is stored as V_real/k_scale).
+            self.softmax_scale = (
+                self.scale * layer._q_scale_float * layer._k_scale_float
+            )
+            self.output_scale = layer._k_scale_float
+
+        if self._workspace_buffer is None:
+            self._workspace_buffer = _get_workspace(
+                q.device, self.num_heads, self.kv_lora_rank
+            )
+
+        # vLLM kv_c_and_k_pe_cache is already (num_blocks, block_size, head_size).
+        # tokenspeed_mla_decode wants 3D — pass as-is (no unsqueeze, unlike trtllm).
+        o = tokenspeed_mla_decode(
+            query=q,
+            kv_cache=kv_c_and_k_pe_cache,
+            workspace_buffer=self._workspace_buffer,
+            kv_lora_rank=self.kv_lora_rank,
+            qk_rope_head_dim=self.qk_rope_head_dim,
+            block_tables=attn_metadata.decode.block_table,
+            seq_lens=attn_metadata.decode.seq_lens,
+            max_seq_len=attn_metadata.max_seq_len,
+            softmax_scale=self.softmax_scale,
+            output_scale=self.output_scale,
+            enable_pdl=False,
+        )
+
+        # Flatten the output for consistent shape
+        o = o.view(-1, o.shape[-2], o.shape[-1])
+
+        # tokenspeed_mla_decode does not return LSE.
+        return o, None
diff --git a/vllm/v1/attention/backends/mla/triton_mla.py b/vllm/v1/attention/backends/mla/triton_mla.py
index 3de5be31d598..c2aa5edccb66 100644
--- a/vllm/v1/attention/backends/mla/triton_mla.py
+++ b/vllm/v1/attention/backends/mla/triton_mla.py
@@ -12,19 +12,27 @@
     MLACommonBackend,
     MLACommonImpl,
     MLACommonMetadata,
+    MLACommonMetadataBuilder,
 )
+from vllm.platforms import current_platform
 from vllm.platforms.interface import DeviceCapability
+from vllm.triton_utils import triton
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
+    AttentionCGSupport,
     AttentionLayer,
     AttentionType,
     MultipleOf,
-    is_quantized_kv_cache,
 )
 from vllm.v1.attention.ops.triton_decode_attention import decode_attention_fwd
 
 logger = init_logger(__name__)
 
 
+class TritonMLAMetadataBuilder(MLACommonMetadataBuilder[MLACommonMetadata]):
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
+
+
 class TritonMLABackend(MLACommonBackend):
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
@@ -53,10 +61,18 @@ def supports_block_size(cls, block_size: int | None) -> bool:
     def get_name() -> str:
         return "TRITON_MLA"
 
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return True
+
     @staticmethod
     def get_impl_cls() -> type["TritonMLAImpl"]:
         return TritonMLAImpl
 
+    @staticmethod
+    def get_builder_cls() -> type["TritonMLAMetadataBuilder"]:
+        return TritonMLAMetadataBuilder
+
     @classmethod
     def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
         return True
@@ -115,17 +131,7 @@ def __init__(
         if is_quantized_kv_cache(self.kv_cache_dtype):
             self.supports_quant_query_input = False
 
-    def _flash_attn_varlen_diff_headdims(
-        self, q, k, v, return_softmax_lse=False, softmax_scale=None, **kwargs
-    ):
-        return super()._flash_attn_varlen_diff_headdims(
-            q,
-            k,
-            v,
-            return_softmax_lse=return_softmax_lse,
-            softmax_scale=softmax_scale,
-            **kwargs,
-        )
+        self._sm_count = current_platform.num_compute_units()
 
     def forward_mqa(
         self,
@@ -149,7 +155,24 @@ def forward_mqa(
         lse = torch.zeros(B, q_num_heads, dtype=q.dtype, device=q.device)
 
         # For batch invariance, use only 1 split to ensure deterministic reduction
-        num_kv_splits = 1 if envs.VLLM_BATCH_INVARIANT else 4
+        if envs.VLLM_BATCH_INVARIANT:
+            num_kv_splits = 1
+        else:
+            # Minimum work per split
+            # hardware dependent
+            min_work_per_split = 512
+
+            ideal_splits = max(1, attn_metadata.max_seq_len // min_work_per_split)
+
+            # use power of 2 to avoid excessive kernel instantiations
+            ideal_splits = triton.next_power_of_2(ideal_splits)
+
+            # Calculate SM-based maximum splits with occupancy multiplier
+            # 2-4x allows multiple blocks per SM for latency hiding
+            # hardware dependent
+            occupancy_multiplier = 2
+            max_splits = self._sm_count * occupancy_multiplier
+            num_kv_splits = min(ideal_splits, max_splits)
 
         # TODO(lucas) Allocate ahead of time
         attn_logits = torch.empty(
@@ -186,6 +209,7 @@ def forward_mqa(
             PAGE_SIZE,
             k_scale=layer._k_scale,
             v_scale=layer._k_scale,
+            is_mla=True,
         )
 
         return o, lse
diff --git a/vllm/v1/attention/backends/mla/xpu_mla_sparse.py b/vllm/v1/attention/backends/mla/xpu_mla_sparse.py
index 44455a7008e8..2fa91d018388 100644
--- a/vllm/v1/attention/backends/mla/xpu_mla_sparse.py
+++ b/vllm/v1/attention/backends/mla/xpu_mla_sparse.py
@@ -13,6 +13,7 @@
 from vllm.model_executor.layers.attention.mla_attention import (
     get_mla_dims,
 )
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -34,7 +35,6 @@
 
 
 class XPUMLASparseBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -231,7 +231,7 @@ def forward_mqa(
         # NOTE(lucas): for the sparse FlashMLA kernels the kernels want to use
         # MQA 576/512 approach for both prefill and decode
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             raise NotImplementedError("FP8 kv is not supported with XPU MLA Sparse yet")
 
         # Concatenate q if it's a tuple (ql_nope, q_pe)
diff --git a/vllm/v1/attention/backends/registry.py b/vllm/v1/attention/backends/registry.py
index 4744ead4f54b..87abb6884313 100644
--- a/vllm/v1/attention/backends/registry.py
+++ b/vllm/v1/attention/backends/registry.py
@@ -63,6 +63,9 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
     FLASHINFER_MLA = (
         "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend"
     )
+    TOKENSPEED_MLA = (
+        "vllm.v1.attention.backends.mla.tokenspeed_mla.TokenspeedMLABackend"
+    )
     FLASHINFER_MLA_SPARSE = (
         "vllm.v1.attention.backends.mla.flashinfer_mla_sparse."
         "FlashInferMLASparseBackend"
@@ -76,12 +79,12 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
     FLASH_ATTN_MLA = "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend"
     NO_ATTENTION = "vllm.v1.attention.backends.no_attention.NoAttentionBackend"
     FLEX_ATTENTION = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend"
-    TREE_ATTN = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend"
     ROCM_AITER_UNIFIED_ATTN = (
         "vllm.v1.attention.backends.rocm_aiter_unified_attn."
         "RocmAiterUnifiedAttentionBackend"
     )
     CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"
+    TURBOQUANT = "vllm.v1.attention.backends.turboquant_attn.TurboQuantAttentionBackend"
     # Placeholder for third-party/custom backends - must be registered before use
     # set to None to avoid alias with other backend, whose value is an empty string
     CUSTOM = None
@@ -193,16 +196,6 @@ def clear_override(self) -> None:
         _MAMBA_ATTN_OVERRIDES.pop(self, None)
 
 
-MAMBA_TYPE_TO_BACKEND_MAP = {
-    "mamba1": MambaAttentionBackendEnum.MAMBA1.name,
-    "mamba2": MambaAttentionBackendEnum.MAMBA2.name,
-    "short_conv": MambaAttentionBackendEnum.SHORT_CONV.name,
-    "linear_attention": MambaAttentionBackendEnum.LINEAR.name,
-    "gdn_attention": MambaAttentionBackendEnum.GDN_ATTN.name,
-    "custom": MambaAttentionBackendEnum.CUSTOM.name,
-}
-
-
 _ATTN_OVERRIDES: dict[AttentionBackendEnum, str] = {}
 _MAMBA_ATTN_OVERRIDES: dict[MambaAttentionBackendEnum, str] = {}
 
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 6c6e82b1b936..5dbedc86bc02 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -16,6 +16,7 @@
 from vllm.platforms.interface import DeviceCapability
 from vllm.utils.math_utils import cdiv
 from vllm.utils.platform_utils import num_compute_units
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -111,10 +112,12 @@ def cp_mha_gather_cache_kernel(
             if DEQUANT:
                 k_scale = tl.load(k_scale_ptr)
                 v_scale = tl.load(v_scale_ptr)
-                k_dtype = k_reg.dtype
-                v_dtype = v_reg.dtype
-                k_reg = (k_reg.to(tl.float32) * k_scale).to(k_dtype)
-                v_reg = (v_reg.to(tl.float32) * v_scale).to(v_dtype)
+                k_reg = (k_reg.to(tl.float32) * k_scale).to(
+                    key_ptr_offset.dtype.element_ty
+                )
+                v_reg = (v_reg.to(tl.float32) * v_scale).to(
+                    value_ptr_offset.dtype.element_ty
+                )
             tl.store(key_ptr_offset + col_offsets, k_reg)
             tl.store(value_ptr_offset + col_offsets, v_reg)
 
@@ -227,7 +230,6 @@ def reshape_and_cache_shuffle_kernel(
         num_kv_heads,
         BLOCK_SIZE: tl.constexpr,
         QUANT: tl.constexpr,
-        IS_FNUZ: tl.constexpr,
     ):
         tid = tl.program_id(0)
         head_id = tl.program_id(1)
@@ -291,7 +293,7 @@ def reshape_and_cache_shuffle_triton(
         new_key_cache = key_cache.view_as(k_cache_template)
         new_value_cache = value_cache.view_as(v_cache_template)
         QUANT = False
-        if kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(kv_cache_dtype):
             QUANT = True
         grid = (
             num_tokens,
@@ -313,7 +315,6 @@ def reshape_and_cache_shuffle_triton(
             num_kv_heads,
             BLOCK_SIZE=head_size,
             QUANT=QUANT,
-            IS_FNUZ=current_platform.fp8_dtype() == torch.float8_e4m3fnuz,
         )
 
 
@@ -323,22 +324,17 @@ def reshape_and_cache_shuffle_triton(
 @dataclass
 class AiterFlashAttentionDecodeMetadata:
     max_query_len: int
-    min_query_len: int
-    max_seq_len: int
-    query_start_loc: torch.Tensor
 
 
 @dataclass
 class AiterFlashAttentionPrefillMetadata:
     max_query_len: int
-    min_query_len: int
     max_seq_len: int
     query_start_loc: torch.Tensor
 
 
 @dataclass
 class AiterChunkSlidingWindowMetadata:
-    swa_seqlens: torch.Tensor
     swa_cu_seqlens: torch.Tensor
     swa_seq_starts: torch.Tensor
     swa_token_to_batch: torch.Tensor
@@ -353,9 +349,7 @@ class AiterChunkContextMetadata:
     cu_seq_lens_chunk: torch.Tensor
     chunk_starts: torch.Tensor
     token_to_batch: torch.Tensor
-    seq_tot: list[int]
     max_seq_lens: list[int]
-    seq_lens: torch.Tensor
     num_chunks: int
     total_token_per_batch: list[int]
     swa_metadata: AiterChunkSlidingWindowMetadata | None
@@ -364,7 +358,6 @@ class AiterChunkContextMetadata:
 @dataclass
 class AiterFlashAttentionChunkPrefillMetadata:
     max_query_len: int
-    min_query_len: int
     max_seq_len: int
     query_start_loc: torch.Tensor
     chunk_context_metadata: AiterChunkContextMetadata
@@ -381,19 +374,17 @@ class AiterFlashAttentionMetadata:
     #                                   |-- query_len ---|
 
     num_actual_tokens: int  # Number of tokens excluding padding.
-    num_actual_kv_tokens: int
-    max_query_len: int
     query_start_loc: torch.Tensor
     max_seq_len: int
     seq_lens: torch.Tensor
     slot_mapping: torch.Tensor
     block_table: torch.Tensor
+    causal: bool
 
     # prefill and decode split
     num_decodes: int
     num_decode_tokens: int
     num_prefills: int
-    num_prefill_tokens: int
     num_extends: int
     num_extend_tokens: int
 
@@ -403,8 +394,6 @@ class AiterFlashAttentionMetadata:
 
     # For cascade attention.
     use_cascade: bool
-    common_prefix_len: int
-    total_tokens: int
 
     # Only for fp8 shuffle layout kv cache, we allocate kv_scale for each layer
     # since we might integrate per token quant for kv cache in the future.
@@ -439,7 +428,6 @@ def __init__(
         # Sliding window size to be used with the AOT scheduler will be
         # populated on first build() call.
         self.aot_sliding_window: tuple[int, int] | None = None
-        self.total_tokens: int = 0
         self._init_reorder_batch_threshold(1, supports_spec_as_decode=True)
 
         sliding_window_configs: set[tuple[int, int] | None] = set()
@@ -471,13 +459,9 @@ def __init__(
     def build_for_cudagraph_capture(
         self, common_attn_metadata: CommonAttentionMetadata
     ):
-        self.total_tokens = (
-            self.model_config.max_model_len
-            * self.vllm_config.scheduler_config.max_num_partial_prefills
+        return self.build(
+            common_prefix_len=0, common_attn_metadata=common_attn_metadata
         )
-        res = self.build(common_prefix_len=0, common_attn_metadata=common_attn_metadata)
-        self.total_tokens = 0
-        return res
 
     def build(
         self,
@@ -494,7 +478,7 @@ def build(
         if (
             rocm_aiter_ops.is_shuffle_kv_cache_enabled()
             and self.scale.numel() == 1
-            and self.vllm_config.cache_config.cache_dtype.startswith("fp8")
+            and is_quantized_kv_cache(self.vllm_config.cache_config.cache_dtype)
         ):
             layers = get_layers_from_vllm_config(self.vllm_config, Attention)
             first_layer_name = [k for k in layers][0]
@@ -513,12 +497,18 @@ def build(
             num_prefills,
             num_decode_tokens,
             num_extend_tokens,
-            num_prefill_tokens,
+            _,
         ) = split_ret
 
         query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
 
-        seq_lens = common_attn_metadata.seq_lens.cpu()
+        # Only copy seq_lens to CPU when prefill or extend is present to avoid a
+        # blocking device→host transfer.
+        seq_lens = (
+            common_attn_metadata.seq_lens.cpu()
+            if num_prefills > 0 or num_extends > 0
+            else None
+        )
 
         query_lens_cpu = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
 
@@ -526,26 +516,24 @@ def build(
         if num_decodes > 0:
             decode_metadata = AiterFlashAttentionDecodeMetadata(
                 max_query_len=query_lens_cpu[:num_decodes].max().item(),
-                min_query_len=query_lens_cpu[:num_decodes].min().item(),
-                max_seq_len=seq_lens[:num_decodes].max().item(),
-                query_start_loc=common_attn_metadata.query_start_loc[: num_decodes + 1],
             )
 
         prefill_metadata = None
         if num_prefills > 0:
+            assert seq_lens is not None
             query_lens_for_prefill = query_lens_cpu[num_decodes + num_extends :]
             query_start_loc_device = common_attn_metadata.query_start_loc[
                 num_decodes + num_extends :
             ]
             prefill_metadata = AiterFlashAttentionPrefillMetadata(
                 max_query_len=query_lens_for_prefill.max().item(),
-                min_query_len=query_lens_for_prefill.min().item(),
                 max_seq_len=seq_lens[num_decodes + num_extends :].max().item(),
                 query_start_loc=query_start_loc_device - query_start_loc_device[0],
             )
 
         extend_metadata = None
         if num_extends > 0:
+            assert seq_lens is not None
             num_extends_slice = slice(num_decodes, num_decodes + num_extends)
             query_lens_for_extend = query_lens_cpu[num_extends_slice]
             seq_lens_for_extend = seq_lens[num_extends_slice]
@@ -589,9 +577,6 @@ def build(
                 total_tokens = cu_seq_lens[-1].item()
 
                 swa_metadata = AiterChunkSlidingWindowMetadata(
-                    swa_seqlens=swa_seqlen_for_extend.to(
-                        self.device, non_blocking=True
-                    ),
                     swa_cu_seqlens=cu_seq_lens.to(self.device, non_blocking=True),
                     swa_seq_starts=seq_starts.to(self.device, non_blocking=True),
                     swa_token_to_batch=token_to_seq.to(self.device, non_blocking=True),
@@ -636,10 +621,8 @@ def build(
                 workspace=self.extend_workspace,
                 cu_seq_lens_chunk=cu_seq_lens_cpu.to(self.device, non_blocking=True),
                 chunk_starts=chunk_starts.to(self.device, non_blocking=True),
-                seq_tot=chunk_seq_lens.sum(dim=1).tolist(),
-                max_seq_lens=chunk_seq_lens.max(dim=1).values.tolist(),
-                seq_lens=chunk_seq_lens,
                 token_to_batch=token_to_batch_tensor.to(self.device, non_blocking=True),
+                max_seq_lens=chunk_seq_lens.max(dim=1).values.tolist(),
                 num_chunks=num_chunks,
                 total_token_per_batch=cu_seq_lens_cpu[:, -1].tolist(),
                 swa_metadata=swa_metadata,
@@ -657,37 +640,30 @@ def build(
             )
             extend_metadata = AiterFlashAttentionChunkPrefillMetadata(
                 max_query_len=query_lens_for_extend.max().item(),
-                min_query_len=query_lens_for_extend.min().item(),
                 max_seq_len=seq_lens[num_extends_slice].max().item(),
                 query_start_loc=query_start_loc_device - query_start_loc_device[0],
                 chunk_context_metadata=chunk_context_metadata,
             )
 
-        num_actual_kv_tokens = torch.sum(seq_lens).item()
-
         use_cascade = common_prefix_len > 0
 
         attn_metadata = AiterFlashAttentionMetadata(
             num_actual_tokens=common_attn_metadata.num_actual_tokens,
-            num_actual_kv_tokens=num_actual_kv_tokens,
-            max_query_len=common_attn_metadata.max_query_len,
             query_start_loc=common_attn_metadata.query_start_loc,
             max_seq_len=common_attn_metadata.max_seq_len,
             seq_lens=common_attn_metadata.seq_lens,
             block_table=common_attn_metadata.block_table_tensor,
+            causal=common_attn_metadata.causal,
             slot_mapping=common_attn_metadata.slot_mapping,
             num_decodes=num_decodes,
             num_decode_tokens=num_decode_tokens,
             num_prefills=num_prefills,
-            num_prefill_tokens=num_prefill_tokens,
             num_extends=num_extends,
             num_extend_tokens=num_extend_tokens,
             decode_metadata=decode_metadata,
             prefill_metadata=prefill_metadata,
             extend_metadata=extend_metadata,
             use_cascade=use_cascade,
-            common_prefix_len=common_prefix_len,
-            total_tokens=self.total_tokens,
             k_scale=self.scale,
             v_scale=self.scale,
         )
@@ -710,32 +686,25 @@ def build_for_drafting(
 
         decode_metadata = AiterFlashAttentionDecodeMetadata(
             max_query_len=common_attn_metadata.max_query_len,
-            min_query_len=common_attn_metadata.max_query_len,  # uniform batch
-            max_seq_len=common_attn_metadata.max_seq_len,
-            query_start_loc=common_attn_metadata.query_start_loc,
         )
 
         return AiterFlashAttentionMetadata(
             num_actual_tokens=num_tokens,
-            num_actual_kv_tokens=0,  # not used in unified_attention path
-            max_query_len=common_attn_metadata.max_query_len,
             query_start_loc=common_attn_metadata.query_start_loc,
             max_seq_len=common_attn_metadata.max_seq_len,
             seq_lens=common_attn_metadata.seq_lens,
             block_table=common_attn_metadata.block_table_tensor,
+            causal=common_attn_metadata.causal,
             slot_mapping=common_attn_metadata.slot_mapping,
             num_decodes=num_reqs,
             num_decode_tokens=num_tokens,
             num_prefills=0,
-            num_prefill_tokens=0,
             num_extends=0,
             num_extend_tokens=0,
             decode_metadata=decode_metadata,
             prefill_metadata=None,
             extend_metadata=None,
             use_cascade=False,
-            common_prefix_len=0,
-            total_tokens=self.total_tokens,
             k_scale=self.scale,
             v_scale=self.scale,
         )
@@ -745,7 +714,6 @@ def use_cascade_attention(self, *args, **kwargs) -> bool:
 
 
 class AiterFlashAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
     supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
         "auto",
@@ -808,6 +776,10 @@ def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
         # more reliable.
         return on_mi3xx()
 
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        return True
+
 
 class AiterFlashAttentionImpl(AttentionImpl):
     def __init__(
@@ -844,9 +816,13 @@ def __init__(
         assert self.num_heads % self.num_kv_heads == 0
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
 
-        if attn_type not in [AttentionType.DECODER, AttentionType.ENCODER_DECODER]:
+        if attn_type != AttentionType.DECODER:
             raise NotImplementedError(
-                "Encoder self-attention is not implemented for AiterFlashAttentionImpl"
+                "Only decoder self-attention is supported for "
+                "AiterFlashAttentionImpl. ENCODER_DECODER is not supported "
+                "because the prefill path uses cu_seqlens_k set to decoder "
+                "query_start_loc with causal=True, which is incorrect for "
+                "cross-attention."
             )
 
     def extend_for_sliding_window(
@@ -887,7 +863,7 @@ def extend_for_sliding_window(
             cu_seqlens_kv=swa_cu_seqlens,
             token_to_batch=swa_token_to_batch,
             seq_starts=swa_seq_starts,
-            dequant=self.kv_cache_dtype.startswith("fp8"),
+            dequant=is_quantized_kv_cache(self.kv_cache_dtype),
             kv_cache_layout="NHD",
             total_tokens=swa_total_tokens,
         )
@@ -921,8 +897,8 @@ def extend_forward(
         output: torch.Tensor,
         cu_seqlens_q: torch.Tensor,
         max_seqlen_q: int,
-        max_seqlen_k: int,
         min_seqlen_q: int,
+        max_seqlen_k: int,
         block_table: torch.Tensor,
         slot_mapping: torch.Tensor,
         k_scale: torch.Tensor,
@@ -982,7 +958,7 @@ def extend_forward(
                 cu_seqlens_kv=cu_seqlens_kv[chunk_idx],
                 token_to_batch=token_to_batch[chunk_idx],
                 seq_starts=chunk_starts[chunk_idx],
-                dequant=self.kv_cache_dtype.startswith("fp8"),
+                dequant=is_quantized_kv_cache(self.kv_cache_dtype),
                 kv_cache_layout="SHUFFLE"
                 if rocm_aiter_ops.is_shuffle_kv_cache_enabled()
                 else "NHD",
@@ -1038,7 +1014,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: AiterFlashAttentionMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -1057,8 +1033,6 @@ def forward(
               {q,k,v}_descale to be (num_sequences, num_kv_heads).
               We use torch's .expand() to avoid duplicating values
         """
-        assert output is not None, "Output tensor must be provided."
-
         if output_scale is not None or output_block_scale is not None:
             raise NotImplementedError(
                 "fused output quantization is not yet supported "
@@ -1081,7 +1055,7 @@ def forward(
         num_actual_tokens = attn_metadata.num_actual_tokens
         key_cache, value_cache = kv_cache.unbind(0)
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             key_cache = key_cache.view(current_platform.fp8_dtype())
             value_cache = value_cache.view(current_platform.fp8_dtype())
 
@@ -1120,7 +1094,7 @@ def forward(
                     min_seqlen_q=1,
                     dropout_p=0.0,
                     softmax_scale=self.scale,
-                    causal=True,
+                    causal=attn_metadata.causal,
                     window_size=self.sliding_window,
                     alibi_slopes=self.alibi_slopes,
                     out=output_actual_tokens[num_decode_tokens + num_extend_tokens :],
@@ -1151,8 +1125,8 @@ def forward(
                     output=extend_outputs,
                     cu_seqlens_q=attn_metadata.extend_metadata.query_start_loc,
                     max_seqlen_q=attn_metadata.extend_metadata.max_query_len,
-                    max_seqlen_k=attn_metadata.extend_metadata.max_seq_len,
                     min_seqlen_q=1,
+                    max_seqlen_k=attn_metadata.extend_metadata.max_seq_len,
                     block_table=attn_metadata.block_table[
                         num_decodes : num_decodes + num_extends
                     ],
@@ -1168,39 +1142,78 @@ def forward(
                 assert attn_metadata.decode_metadata is not None
                 decode_max_query_len = attn_metadata.decode_metadata.max_query_len
 
-                # Use unified_attention for speculative decoding (multi-token)
+                # Multi-token speculative decode path.
                 if decode_max_query_len > 1:
                     assert not rocm_aiter_ops.is_shuffle_kv_cache_enabled(), (
                         "Shuffle KV cache layout is not supported with "
                         "speculative decoding (multi-token decode)."
                     )
-                    from aiter.ops.triton.unified_attention import (
-                        unified_attention,
-                    )
-
-                    descale_shape = (
-                        attn_metadata.query_start_loc[:num_decodes].shape[0] - 1,
-                        key_cache.shape[2],
-                    )
-                    unified_attention(
-                        q=query[:num_decode_tokens],
-                        k=key_cache,
-                        v=value_cache,
-                        out=output[:num_decode_tokens],
-                        cu_seqlens_q=attn_metadata.query_start_loc[:num_decodes],
-                        max_seqlen_q=decode_max_query_len,
-                        seqused_k=attn_metadata.seq_lens[:num_decodes],
-                        max_seqlen_k=attn_metadata.max_seq_len,
-                        softmax_scale=self.scale,
-                        causal=True,
-                        alibi_slopes=self.alibi_slopes,
-                        window_size=self.sliding_window,
-                        block_table=attn_metadata.block_table[:num_decodes],
-                        softcap=self.logits_soft_cap,
-                        q_descale=None,
-                        k_descale=layer._k_scale.expand(descale_shape),
-                        v_descale=layer._v_scale.expand(descale_shape),
-                    )
+                    if not attn_metadata.causal:
+                        from aiter.ops.triton.attention.mha_v3 import (
+                            flash_attn_with_kvcache,
+                        )
+
+                        descale_shape = (num_decodes, key_cache.shape[2])
+                        decode_query = query[:num_decode_tokens].reshape(
+                            num_decodes,
+                            decode_max_query_len,
+                            query.shape[1],
+                            query.shape[2],
+                        )
+                        decode_out = flash_attn_with_kvcache(
+                            q=decode_query,
+                            k_cache=key_cache,
+                            v_cache=value_cache,
+                            cache_seqlens=attn_metadata.seq_lens[:num_decodes],
+                            softmax_scale=self.scale,
+                            causal=attn_metadata.causal,
+                            window_size=self.sliding_window,
+                            softcap=self.logits_soft_cap,
+                            q_descale=None,
+                            k_descale=layer._k_scale.expand(descale_shape),
+                            v_descale=layer._v_scale.expand(descale_shape),
+                            page_table=attn_metadata.block_table[:num_decodes],
+                        )
+                        output[:num_decode_tokens].copy_(
+                            decode_out.reshape(
+                                num_decode_tokens,
+                                query.shape[1],
+                                query.shape[2],
+                            )
+                        )
+                    else:
+                        # Non-uniform query lengths can appear in real serving
+                        # traffic (e.g. mixed datasets). Fall back to varlen
+                        # unified_attention instead of asserting.
+                        from aiter.ops.triton.unified_attention import (
+                            unified_attention,
+                        )
+
+                        descale_shape = (
+                            num_decodes,
+                            key_cache.shape[2],
+                        )
+                        unified_attention(
+                            q=query[:num_decode_tokens],
+                            k=key_cache,
+                            v=value_cache,
+                            out=output[:num_decode_tokens],
+                            cu_seqlens_q=attn_metadata.query_start_loc[
+                                : num_decodes + 1
+                            ],
+                            max_seqlen_q=decode_max_query_len,
+                            seqused_k=attn_metadata.seq_lens[:num_decodes],
+                            max_seqlen_k=attn_metadata.max_seq_len,
+                            softmax_scale=self.scale,
+                            causal=True,
+                            alibi_slopes=self.alibi_slopes,
+                            window_size=self.sliding_window,
+                            block_table=attn_metadata.block_table[:num_decodes],
+                            softcap=self.logits_soft_cap,
+                            q_descale=None,
+                            k_descale=layer._k_scale.expand(descale_shape),
+                            v_descale=layer._v_scale.expand(descale_shape),
+                        )
                     return
 
                 # The ll4mi kernel in paged_attention_v1 requires
@@ -1247,7 +1260,23 @@ def forward(
                         v_descale=layer._v_scale.expand(descale_shape),
                     )
                 elif rocm_aiter_ops.is_shuffle_kv_cache_enabled():
-                    num_blocks, block_size, num_kv_heads, head_size = key_cache.shape
+                    _, num_heads, head_size = query.shape
+                    num_seqs = attn_metadata.seq_lens.shape[0]
+                    max_num_partitions = (
+                        attn_metadata.max_seq_len + _PARTITION_SIZE_ROCM - 1
+                    ) // _PARTITION_SIZE_ROCM
+                    tmp_out = torch.empty(
+                        (num_seqs, num_heads, max_num_partitions, head_size),
+                        dtype=query.dtype,
+                        device=query.device,
+                    )
+                    exp_sums = torch.empty(
+                        (num_seqs, num_heads, max_num_partitions),
+                        dtype=torch.float32,
+                        device=query.device,
+                    )
+                    max_logits = torch.empty_like(exp_sums)
+                    num_blocks, block_size, num_kv_heads, _ = key_cache.shape
                     x = 16 // key_cache.element_size()
                     k_cache_template = torch.empty(
                         [num_blocks, num_kv_heads, head_size // x, block_size, x],
@@ -1261,18 +1290,36 @@ def forward(
                     )
                     new_key_cache = key_cache.view_as(k_cache_template)
                     new_value_cache = value_cache.view_as(v_cache_template)
-                    rocm_aiter_ops.pa_fwd_asm(
+                    k_qscale = (
+                        layer._k_scale
+                        if attn_metadata.k_scale is None
+                        else attn_metadata.k_scale
+                    )
+                    v_qscale = (
+                        layer._v_scale
+                        if attn_metadata.v_scale is None
+                        else attn_metadata.v_scale
+                    )
+                    rocm_aiter_ops.paged_attention_common(
                         Q=query[:num_decode_tokens],
                         K=new_key_cache,
                         V=new_value_cache,
+                        tmp_out=tmp_out,
+                        max_logits=max_logits,
+                        exp_sums=exp_sums,
+                        max_seq_len=attn_metadata.max_seq_len,
                         block_tables=attn_metadata.block_table[:num_decodes],
                         context_lens=attn_metadata.seq_lens[:num_decodes],
                         block_tables_stride0=attn_metadata.block_table[
                             :num_decodes
                         ].stride(0),
-                        K_QScale=attn_metadata.k_scale,
-                        V_QScale=attn_metadata.v_scale,
+                        scale=self.scale,
+                        K_QScale_hip=k_qscale,
+                        V_QScale_hip=v_qscale,
+                        K_QScale_asm=k_qscale,
+                        V_QScale_asm=v_qscale,
                         out_=output[:num_decode_tokens],
+                        kv_cache_dtype=self.kv_cache_dtype,
                     )
                 else:
                     _, num_heads, head_size = query.shape
@@ -1336,7 +1383,7 @@ def do_kv_cache_update(
         # key and value may be None in the case of cross attention. They are
         # calculated once based on the output from the encoder and then cached
         # in KV cache.
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             key_cache = key_cache.view(current_platform.fp8_dtype())
             value_cache = value_cache.view(current_platform.fp8_dtype())
         # Reshape the input keys and values and store them in the cache.
@@ -1402,7 +1449,7 @@ def do_rope_and_kv_cache_update(
         key_cache, value_cache = kv_cache.unbind(0)
         flash_layout = True
 
-        is_fp8_kv_cache = self.kv_cache_dtype.startswith("fp8")
+        is_fp8_kv_cache = is_quantized_kv_cache(self.kv_cache_dtype)
         if is_fp8_kv_cache:
             key_cache = key_cache.view(current_platform.fp8_dtype())
             value_cache = value_cache.view(current_platform.fp8_dtype())
diff --git a/vllm/v1/attention/backends/rocm_aiter_unified_attn.py b/vllm/v1/attention/backends/rocm_aiter_unified_attn.py
index bd7f137f9427..f56b58c43e7f 100644
--- a/vllm/v1/attention/backends/rocm_aiter_unified_attn.py
+++ b/vllm/v1/attention/backends/rocm_aiter_unified_attn.py
@@ -11,11 +11,12 @@
     QuantKey,
     kFp8StaticTensorSym,
 )
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import AttentionLayer, AttentionType, MultipleOf
-from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
 from vllm.v1.attention.backends.rocm_attn import (
     RocmAttentionBackend,
     RocmAttentionImpl,
+    RocmAttentionMetadata,
     RocmAttentionMetadataBuilder,
 )
 
@@ -23,8 +24,6 @@
 
 
 class RocmAiterUnifiedAttentionBackend(RocmAttentionBackend):
-    accept_output_buffer: bool = True
-
     @staticmethod
     def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
         return [MultipleOf(16)]
@@ -54,6 +53,10 @@ def supports_mm_prefix(cls) -> bool:
     def supports_sink(cls) -> bool:
         return True
 
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        return False
+
     forward_includes_kv_cache_update: bool = False
 
     @staticmethod
@@ -141,8 +144,8 @@ def forward(
         key: torch.Tensor,
         value: torch.Tensor,
         kv_cache: torch.Tensor,
-        attn_metadata: FlashAttentionMetadata,
-        output: torch.Tensor | None = None,
+        attn_metadata: RocmAttentionMetadata,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -158,8 +161,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
-
         if output_block_scale is not None:
             raise NotImplementedError(
                 "fused block_scale output quantization is not yet supported"
@@ -199,19 +200,9 @@ def forward(
         key_cache, value_cache = kv_cache.unbind(0)
 
         softmax_scale = self.scale
-        fp8_post_attn_v_rescale = False
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
-            # When Q is FP8, triton kernel skips K/V dequant (for fp8xfp8 matmul).
-            # Compensate by absorbing q_scale and k_scale into softmax_scale, and
-            # v_scale into output_scale (or post-multiplying if no fusion).
-            if query.dtype == self.fp8_dtype:
-                softmax_scale = self.scale * layer._q_scale_float * layer._k_scale_float
-                if output_scale is not None:
-                    output_scale = output_scale / layer._v_scale_float
-                else:
-                    fp8_post_attn_v_rescale = True
 
         cu_seqlens_q = attn_metadata.query_start_loc
         seqused_k = attn_metadata.seq_lens
@@ -219,11 +210,6 @@ def forward(
         max_seqlen_k = attn_metadata.max_seq_len
         block_table = attn_metadata.block_table
 
-        descale_shape = (
-            cu_seqlens_q.shape[0] - 1,
-            key.shape[1] if key is not None else self.num_kv_heads,
-        )
-
         self.unified_attention(
             q=query[:num_actual_tokens],
             k=key_cache,
@@ -239,16 +225,13 @@ def forward(
             window_size=self.sliding_window,
             block_table=block_table,
             softcap=self.logits_soft_cap,
-            q_descale=None,  # q_scale absorbed into softmax_scale
-            k_descale=layer._k_scale.expand(descale_shape),
-            v_descale=layer._v_scale.expand(descale_shape),
+            q_descale=layer._q_scale if query.dtype == self.fp8_dtype else None,
+            k_descale=layer._k_scale,
+            v_descale=layer._v_scale,
             sinks=self.sinks,
             output_scale=output_scale,
         )
 
-        if fp8_post_attn_v_rescale:
-            output[:num_actual_tokens].mul_(layer._v_scale_float)
-
         return output
 
     def do_kv_cache_update(
@@ -299,7 +282,7 @@ def do_rope_and_kv_cache_update(
         key_cache, value_cache = kv_cache.unbind(0)
         flash_layout = True
 
-        is_fp8_kv_cache = self.kv_cache_dtype.startswith("fp8")
+        is_fp8_kv_cache = is_quantized_kv_cache(self.kv_cache_dtype)
         if is_fp8_kv_cache:
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
diff --git a/vllm/v1/attention/backends/rocm_attn.py b/vllm/v1/attention/backends/rocm_attn.py
index 6afb617f28ed..d533268e2176 100644
--- a/vllm/v1/attention/backends/rocm_attn.py
+++ b/vllm/v1/attention/backends/rocm_attn.py
@@ -16,6 +16,7 @@
     kFp8StaticTensorSym,
 )
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -26,9 +27,9 @@
     CommonAttentionMetadata,
     MultipleOf,
 )
-from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
 from vllm.v1.attention.ops.chunked_prefill_paged_decode import (
     chunked_prefill_paged_decode,
+    has_native_kv_cache_layout,
 )
 from vllm.v1.attention.ops.paged_attn import PagedAttention
 from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
@@ -68,6 +69,9 @@ class RocmAttentionMetadata:
     scheduler_metadata: torch.Tensor | None = None
     prefix_scheduler_metadata: torch.Tensor | None = None
 
+    # DFlash drafting sets this to False via CommonAttentionMetadata.
+    causal: bool = True
+
 
 class RocmAttentionMetadataBuilder(AttentionMetadataBuilder[RocmAttentionMetadata]):
     _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.ALWAYS
@@ -153,12 +157,12 @@ def build(
             prefix_kv_lens=prefix_kv_lens,
             suffix_kv_lens=suffix_kv_lens,
             prefix_scheduler_metadata=prefix_scheduler_metadata,
+            causal=common_attn_metadata.causal,
         )
         return attn_metadata
 
 
 class RocmAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [
         torch.float16,
         torch.bfloat16,
@@ -200,6 +204,10 @@ def supports_sink(cls) -> bool:
         # kernel, which is less efficient than the proper triton backends.
         return False
 
+    @classmethod
+    def supports_non_causal(cls) -> bool:
+        return True
+
     forward_includes_kv_cache_update: bool = False
 
     @staticmethod
@@ -301,7 +309,7 @@ def _forward_encoder_attention(
         key: torch.Tensor,
         value: torch.Tensor,
         output: torch.Tensor,
-        attn_metadata: FlashAttentionMetadata,
+        attn_metadata: RocmAttentionMetadata,
         layer: torch.nn.Module,
     ) -> torch.Tensor:
         """Forward pass for encoder attention without KV cache.
@@ -315,7 +323,7 @@ def _forward_encoder_attention(
             layer: The attention layer
         """
         # For encoder attention, process FP8 quantization if needed
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             raise NotImplementedError(
                 "quantization is not supported for encoder attention"
             )
@@ -350,8 +358,8 @@ def forward(
         key: torch.Tensor,
         value: torch.Tensor,
         kv_cache: torch.Tensor,
-        attn_metadata: FlashAttentionMetadata,
-        output: torch.Tensor | None = None,
+        attn_metadata: RocmAttentionMetadata,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -367,8 +375,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
-
         if output_block_scale is not None:
             raise NotImplementedError(
                 "fused block_scale output quantization is not yet supported"
@@ -406,7 +412,7 @@ def forward(
             kv_cache, self.num_kv_heads, self.head_size
         )
 
-        if self.kv_cache_dtype.startswith("fp8"):
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
             assert layer._q_scale_float == 1.0, (
@@ -440,6 +446,7 @@ def forward(
             sm_scale=self.scale,
             output_scale=output_scale,
             sinks=self.sinks,
+            causal=attn_metadata.causal,
         )
 
         return output
@@ -462,9 +469,10 @@ def do_kv_cache_update(
         # Get the actual block_size from value_cache
         # value_cache shape: [num_blocks, num_heads, head_size, block_size]
         block_size = value_cache.shape[3]
+        has_native_layout = has_native_kv_cache_layout(key_cache, value_cache)
 
-        if block_size in (16, 32):
-            # Normal 16, 32, use vLLM native HIP C++ logic
+        if block_size in (16, 32) and has_native_layout:
+            # Normal 16, 32 with contiguous blocks: use vLLM native HIP C++ logic.
             PagedAttention.write_to_paged_cache(
                 key,
                 value,
@@ -476,8 +484,10 @@ def do_kv_cache_update(
                 layer._v_scale,
             )
         else:
-            # Case B: Non-standard blocks (e.g., 64, 128, 544 in Qwen3Next or Qwen3.5 ),
-            # force using our modified Triton logic
+            # Non-standard blocks and hybrid attention/Mamba layouts need the
+            # stride-aware Triton writer. The native reshape_and_cache kernel
+            # assumes contiguous block storage and writes to the wrong hybrid
+            # cache blocks.
             triton_reshape_and_cache_flash(
                 key,
                 value,
@@ -513,7 +523,7 @@ def do_rope_and_kv_cache_update(
         )
         flash_layout = False
 
-        is_fp8_kv_cache = self.kv_cache_dtype.startswith("fp8")
+        is_fp8_kv_cache = is_quantized_kv_cache(self.kv_cache_dtype)
         if is_fp8_kv_cache:
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
diff --git a/vllm/v1/attention/backends/short_conv_attn.py b/vllm/v1/attention/backends/short_conv_attn.py
index c6a8e6eeaa16..9c85ec5efb30 100644
--- a/vllm/v1/attention/backends/short_conv_attn.py
+++ b/vllm/v1/attention/backends/short_conv_attn.py
@@ -18,6 +18,10 @@ def get_name() -> str:
     def get_builder_cls() -> type["ShortConvAttentionMetadataBuilder"]:
         return ShortConvAttentionMetadataBuilder
 
+    @classmethod
+    def is_ssm(cls) -> bool:
+        return True
+
 
 @dataclass
 class ShortConvAttentionMetadata(BaseMambaAttentionMetadata):
diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py
deleted file mode 100644
index 587f71628777..000000000000
--- a/vllm/v1/attention/backends/tree_attn.py
+++ /dev/null
@@ -1,445 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Attention layer with TreeAttention."""
-
-import ast
-from dataclasses import dataclass
-from typing import ClassVar
-
-import torch
-
-from vllm import _custom_ops as ops
-from vllm.config import VllmConfig
-from vllm.config.cache import CacheDType
-from vllm.logger import init_logger
-from vllm.v1.attention.backend import (
-    AttentionBackend,
-    AttentionImpl,
-    AttentionMetadataBuilder,
-    AttentionType,
-    CommonAttentionMetadata,
-    MultipleOf,
-)
-from vllm.v1.attention.backends.utils import (
-    split_decodes_and_prefills,
-)
-from vllm.v1.attention.ops.triton_unified_attention import unified_attention
-from vllm.v1.kv_cache_interface import AttentionSpec
-
-logger = init_logger(__name__)
-
-
-class TreeAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
-    supported_dtypes: ClassVar[list[torch.dtype]] = [torch.float16, torch.bfloat16]
-    supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
-        "auto",
-        "float16",
-        "bfloat16",
-    ]
-    forward_includes_kv_cache_update: bool = False
-
-    @staticmethod
-    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
-        return [MultipleOf(16)]
-
-    @classmethod
-    def get_supported_head_sizes(cls) -> list[int]:
-        return [32, 64, 96, 128, 160, 192, 224, 256]
-
-    @staticmethod
-    def get_name() -> str:
-        return "TREE_ATTN"
-
-    @staticmethod
-    def get_impl_cls() -> type["TreeAttentionImpl"]:
-        return TreeAttentionImpl
-
-    @staticmethod
-    def get_kv_cache_shape(
-        num_blocks: int,
-        block_size: int,
-        num_kv_heads: int,
-        head_size: int,
-        cache_dtype_str: str = "auto",
-    ) -> tuple[int, ...]:
-        if block_size % 16 != 0:
-            raise ValueError("Block size must be a multiple of 16.")
-        return (2, num_blocks, block_size, num_kv_heads, head_size)
-
-    @staticmethod
-    def get_builder_cls() -> type["TreeAttentionMetadataBuilder"]:
-        return TreeAttentionMetadataBuilder
-
-    @staticmethod
-    def use_cascade_attention(*args, **kwargs) -> bool:
-        return False
-
-
-@dataclass
-class TreeAttentionMetadata:
-    num_actual_tokens: int  # Number of tokens excluding padding.
-    max_query_len: int
-    query_start_loc: torch.Tensor
-    max_seq_len: int
-    seq_lens: torch.Tensor
-    block_table: torch.Tensor
-    slot_mapping: torch.Tensor
-
-    num_prefill_tokens: int = 0
-    num_decode_tokens: int = 0
-    num_prefills: int = 0
-    num_decodes: int = 0
-
-    tree_attn_bias: torch.Tensor | None = None
-
-    # Cached Prefill/decode metadata.
-    _cached_prefill_metadata: "TreeAttentionMetadata | None" = None
-    _cached_decode_metadata: "TreeAttentionMetadata | None" = None
-
-    @property
-    def prefill_metadata(self) -> "TreeAttentionMetadata | None":
-        if self.num_prefills == 0:
-            return None
-
-        if self._cached_prefill_metadata is not None:
-            # Recover cached prefill-phase attention
-            # metadata structure
-            return self._cached_prefill_metadata
-
-        q_start_loc = self.query_start_loc[self.num_decodes :]
-        q_seqlens = torch.diff(q_start_loc)
-        kv_seqlens = self.seq_lens[self.num_decodes :]
-        # Construct & cache prefill-phase attention metadata structure
-        self._cached_prefill_metadata = TreeAttentionMetadata(
-            num_actual_tokens=self.num_prefill_tokens,
-            max_query_len=int(q_seqlens.max().item()),
-            query_start_loc=q_start_loc - q_start_loc[0],
-            max_seq_len=int(kv_seqlens.max().item()),
-            seq_lens=kv_seqlens,
-            block_table=self.block_table[self.num_decodes :],
-            slot_mapping=self.slot_mapping[self.num_decode_tokens :],
-        )
-        return self._cached_prefill_metadata
-
-    @property
-    def decode_metadata(self) -> "TreeAttentionMetadata | None":
-        if self.num_decode_tokens == 0:
-            return None
-
-        if self._cached_decode_metadata is not None:
-            # Recover cached decode-phase attention
-            # metadata structure
-            return self._cached_decode_metadata
-
-        q_start_loc = self.query_start_loc[: self.num_decodes + 1]
-        q_seqlens = torch.diff(q_start_loc)
-        kv_seqlens = self.seq_lens[: self.num_decodes]
-        # Construct & cache decode-phase attention metadata structure
-        self._cached_decode_metadata = TreeAttentionMetadata(
-            num_actual_tokens=self.num_decode_tokens,
-            max_query_len=int(q_seqlens.max().item()),
-            query_start_loc=q_start_loc,
-            max_seq_len=int(kv_seqlens.max().item()),
-            seq_lens=kv_seqlens,
-            block_table=self.block_table[: self.num_decodes],
-            slot_mapping=self.slot_mapping[: self.num_decode_tokens],
-            tree_attn_bias=self.tree_attn_bias,
-        )
-        return self._cached_decode_metadata
-
-
-class TreeAttentionMetadataBuilder(AttentionMetadataBuilder[TreeAttentionMetadata]):
-    def __init__(
-        self,
-        kv_cache_spec: AttentionSpec,
-        layer_names: list[str],
-        vllm_config: VllmConfig,
-        device: torch.device,
-    ):
-        super().__init__(kv_cache_spec, layer_names, vllm_config, device)
-
-        self.block_size = kv_cache_spec.block_size
-
-        spec_config = vllm_config.speculative_config
-        spec_token_tree: str | None = None
-        if spec := spec_config:
-            spec_token_tree = spec.speculative_token_tree
-        tree_choices: list[tuple[int, ...]] = (
-            ast.literal_eval(spec_token_tree) if spec_token_tree is not None else [(0,)]
-        )
-        # Construct the tree attention bias.
-        depth_counts = _get_depth_counts(tree_choices)
-        self.tree_attn_bias = _prepare_tree_attn_bias(
-            tree_choices,
-            depth_counts,
-            dtype=torch.float32,
-            device=device,
-        )
-
-        self.reorder_batch_threshold = self.tree_attn_bias.shape[0]
-
-    def build(
-        self,
-        common_prefix_len: int,
-        common_attn_metadata: CommonAttentionMetadata,
-        fast_build: bool = False,
-    ) -> TreeAttentionMetadata:
-        decode_threshold = self.tree_attn_bias.shape[0]
-        num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = (
-            split_decodes_and_prefills(
-                common_attn_metadata, decode_threshold=decode_threshold
-            )
-        )
-
-        num_actual_tokens = common_attn_metadata.num_actual_tokens
-        q_start_loc = common_attn_metadata.query_start_loc
-        max_query_len = common_attn_metadata.max_query_len
-        kv_seqlens = common_attn_metadata.seq_lens
-        max_seq_len = common_attn_metadata.max_seq_len
-        block_table = common_attn_metadata.block_table_tensor
-        slot_mapping = common_attn_metadata.slot_mapping
-
-        return TreeAttentionMetadata(
-            num_actual_tokens=num_actual_tokens,
-            num_prefill_tokens=num_prefill_tokens,
-            num_decode_tokens=num_decode_tokens,
-            num_prefills=num_prefills,
-            num_decodes=num_decodes,
-            max_query_len=max_query_len,
-            query_start_loc=q_start_loc,
-            max_seq_len=max_seq_len,
-            seq_lens=kv_seqlens,
-            block_table=block_table,
-            slot_mapping=slot_mapping,
-            tree_attn_bias=self.tree_attn_bias,
-        )
-
-    def build_for_drafting(
-        self,
-        common_attn_metadata: CommonAttentionMetadata,
-        draft_index: int,
-    ) -> TreeAttentionMetadata:
-        # Cache the original tree attention bias.
-        orig_tree_attn_bias = self.tree_attn_bias
-
-        if draft_index == 0:
-            # Use prefill for drafting at the root level.
-            self.tree_attn_bias = torch.empty(0)
-        else:
-            # Slice the tree attention bias for drafting. Exclude
-            # the root level.
-            start, end = 1, 1 + common_attn_metadata.max_query_len
-            self.tree_attn_bias = self.tree_attn_bias[start:end, start:end].contiguous()
-
-        # Build attention bias.
-        attn_metadata = self.build(0, common_attn_metadata, fast_build=True)
-
-        # Reset the tree attention bias to the original value.
-        self.tree_attn_bias = orig_tree_attn_bias
-        return attn_metadata
-
-
-def _get_depth_counts(sorted_tree_choices: list[tuple[int, ...]]) -> list[int]:
-    # Count the number of choices at each depth of the tree.
-    depth_counts = []
-    prev_depth = 0
-    for path in sorted_tree_choices:
-        depth = len(path)
-        if depth != prev_depth:
-            depth_counts.append(0)
-        depth_counts[depth - 1] += 1
-        prev_depth = depth
-    return depth_counts
-
-
-def _prepare_tree_attn_bias(
-    sorted_tree_choices: list[tuple[int, ...]],
-    depth_counts: list[int],
-    dtype: torch.dtype | None,
-    device: torch.device | None,
-) -> torch.Tensor:
-    # +1 comes from the additional root node.
-    tree_len = len(sorted_tree_choices) + 1
-    tree_attn_mask = torch.full(
-        (tree_len, tree_len), -torch.inf, device=device, dtype=dtype
-    )
-
-    # Set diagonal to all zeros. Each token should
-    # attend to itself.
-    mask_val = 0
-    for i in range(tree_len):
-        tree_attn_mask[i, i] = mask_val
-
-    # Set root to all zeros. All tokens attend to it.
-    tree_attn_mask[:, 0] = mask_val
-
-    # Set all ancestors to zeros.
-    start = 0
-    for i in range(len(depth_counts)):
-        for j in range(depth_counts[i]):
-            cur_tree_choice = sorted_tree_choices[start + j]
-            # Retrieve ancestor position.
-            if len(cur_tree_choice) == 1:
-                continue
-            ancestor_idx = []
-            for c in range(len(cur_tree_choice) - 1):
-                ancestor_idx.append(
-                    sorted_tree_choices.index(cur_tree_choice[: c + 1]) + 1
-                )
-            tree_attn_mask[j + start + 1, ancestor_idx] = mask_val
-        start += depth_counts[i]
-    return tree_attn_mask
-
-
-class TreeAttentionImpl(AttentionImpl):
-    def __init__(
-        self,
-        num_heads: int,
-        head_size: int,
-        scale: float,
-        num_kv_heads: int,
-        alibi_slopes: list[float] | None,
-        sliding_window: int | None,
-        kv_cache_dtype: str,
-        logits_soft_cap: float | None = None,
-        attn_type: AttentionType = AttentionType.DECODER,
-        kv_sharing_target_layer_name: str | None = None,
-    ) -> None:
-        self.num_heads = num_heads
-        self.head_size = head_size
-        self.scale = float(scale)
-        self.num_kv_heads = num_kv_heads
-        self.num_queries_per_kv = self.num_heads // self.num_kv_heads
-        self.kv_cache_dtype = kv_cache_dtype
-        self.kv_sharing_target_layer_name = kv_sharing_target_layer_name
-        if alibi_slopes is not None:
-            alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32)
-        self.alibi_slopes = alibi_slopes
-        if logits_soft_cap is None:
-            # Setting logits_soft_cap to 0 means no soft cap.
-            logits_soft_cap = 0
-        self.logits_soft_cap = logits_soft_cap
-        if sliding_window is None:
-            self.sliding_window = (-1, -1)
-        else:
-            self.sliding_window = (sliding_window - 1, 0)
-
-        if attn_type != AttentionType.DECODER:
-            raise NotImplementedError(
-                "Encoder self-attention and "
-                "encoder/decoder cross-attention "
-                "are not implemented for "
-                "TreeAttentionImpl."
-            )
-
-    def do_kv_cache_update(
-        self,
-        layer: torch.nn.Module,
-        key: torch.Tensor,
-        value: torch.Tensor,
-        kv_cache: torch.Tensor,
-        slot_mapping: torch.Tensor,
-    ) -> None:
-        key_cache, value_cache = kv_cache.unbind(0)
-
-        # Reshape the input keys and values and store them in the cache.
-        # NOTE(woosuk): Here, key and value are padded while slot_mapping is
-        # not padded. However, we don't need to do key[:num_actual_tokens]
-        # and value[:num_actual_tokens] because the reshape_and_cache_flash
-        # op uses the slot_mapping's shape to determine the number of
-        # actual tokens.
-        ops.reshape_and_cache_flash(
-            key,
-            value,
-            key_cache,
-            value_cache,
-            slot_mapping,
-            self.kv_cache_dtype,
-            layer._k_scale,
-            layer._v_scale,
-        )
-
-    def forward(
-        self,
-        layer: torch.nn.Module,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        value: torch.Tensor,
-        kv_cache: torch.Tensor,
-        attn_metadata: TreeAttentionMetadata,
-        output: torch.Tensor | None = None,
-        output_scale: torch.Tensor | None = None,
-        output_block_scale: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        """Forward pass with TreeAttention.
-
-        Args:
-            query: shape = [num_tokens, num_heads, head_size]
-            key: shape = [num_tokens, num_kv_heads, head_size]
-            value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache: shape =
-                [2, num_blocks, block_size, num_kv_heads, head_size]
-            attn_metadata: Metadata for attention.
-        Returns:
-            shape = [num_tokens, num_heads * head_size]
-        """
-        assert output is not None, "Output tensor must be provided."
-
-        if output_scale is not None or output_block_scale is not None:
-            raise NotImplementedError(
-                "fused output quantization is not yet supported for TreeAttentionImpl"
-            )
-
-        if attn_metadata is None:
-            # Profiling run.
-            return output.fill_(0)
-
-        key_cache, value_cache = kv_cache.unbind(0)
-
-        num_actual_tokens = attn_metadata.num_actual_tokens
-        num_decode_tokens = attn_metadata.num_decode_tokens
-        descale_shape = (attn_metadata.query_start_loc.shape[0] - 1, key.shape[1])
-        if prefill_meta := attn_metadata.prefill_metadata:
-            unified_attention(
-                q=query[num_decode_tokens:num_actual_tokens],
-                k=key_cache,
-                v=value_cache,
-                out=output[num_decode_tokens:num_actual_tokens],
-                cu_seqlens_q=prefill_meta.query_start_loc,
-                max_seqlen_q=prefill_meta.max_query_len,
-                seqused_k=prefill_meta.seq_lens,
-                max_seqlen_k=prefill_meta.max_seq_len,
-                softmax_scale=self.scale,
-                causal=True,
-                alibi_slopes=self.alibi_slopes,
-                window_size=self.sliding_window,
-                block_table=prefill_meta.block_table,
-                softcap=self.logits_soft_cap,
-                q_descale=None,  # Not supported
-                k_descale=layer._k_scale.expand(descale_shape),
-                v_descale=layer._v_scale.expand(descale_shape),
-            )
-
-        if decode_meta := attn_metadata.decode_metadata:
-            unified_attention(
-                q=query[:num_decode_tokens],
-                k=key_cache,
-                v=value_cache,
-                out=output[:num_decode_tokens],
-                cu_seqlens_q=decode_meta.query_start_loc,
-                max_seqlen_q=decode_meta.max_query_len,
-                seqused_k=decode_meta.seq_lens,
-                max_seqlen_k=decode_meta.max_seq_len,
-                softmax_scale=self.scale,
-                causal=True,
-                alibi_slopes=self.alibi_slopes,
-                qq_bias=decode_meta.tree_attn_bias,
-                window_size=self.sliding_window,
-                block_table=decode_meta.block_table,
-                softcap=self.logits_soft_cap,
-                q_descale=None,  # Not supported
-                k_descale=layer._k_scale.expand(descale_shape),
-                v_descale=layer._v_scale.expand(descale_shape),
-            )
-        return output
diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py
index 6d967b515e45..b68776375fc0 100644
--- a/vllm/v1/attention/backends/triton_attn.py
+++ b/vllm/v1/attention/backends/triton_attn.py
@@ -7,6 +7,7 @@
 
 import torch
 
+import vllm.envs as envs
 from vllm._aiter_ops import rocm_aiter_ops
 from vllm.config import CUDAGraphMode, VllmConfig
 from vllm.config.cache import CacheDType
@@ -18,6 +19,7 @@
 from vllm.platforms import current_platform
 from vllm.platforms.interface import DeviceCapability
 from vllm.utils.math_utils import next_power_of_2
+from vllm.utils.torch_utils import async_tensor_h2d, is_quantized_kv_cache
 from vllm.v1.attention.backend import (
     AttentionBackend,
     AttentionCGSupport,
@@ -28,12 +30,22 @@
     CommonAttentionMetadata,
     MultipleOf,
 )
+from vllm.v1.attention.backends.utils import (
+    get_kv_cache_layout,
+    get_num_attention_heads_from_layers,
+)
 from vllm.v1.attention.ops.triton_prefill_attention import context_attention_fwd
 from vllm.v1.attention.ops.triton_reshape_and_cache_flash import (
     triton_reshape_and_cache_flash,
+    triton_reshape_and_cache_flash_per_token_head_quant,
 )
 from vllm.v1.attention.ops.triton_unified_attention import unified_attention
-from vllm.v1.kv_cache_interface import AttentionSpec
+from vllm.v1.kv_cache_interface import (
+    AttentionSpec,
+    KVQuantMode,
+    get_kv_quant_mode,
+    kv_cache_uses_per_token_head_scales,
+)
 
 logger = init_logger(__name__)
 
@@ -78,40 +90,41 @@ class TritonAttentionMetadata:
     scheduler_metadata: torch.Tensor | None = None
     prefix_scheduler_metadata: torch.Tensor | None = None
     mm_prefix_range: dict[int, list[tuple[int, int]]] | None = None
+    mm_prefix_range_tensor: torch.Tensor | None = None
 
-    @property
-    def mm_prefix_range_tensor(self) -> torch.Tensor | None:
+    @staticmethod
+    def compute_mm_prefix_range_tensor(
+        mm_prefix_range: dict[int, list[tuple[int, int]]] | None,
+        num_seqs: int,
+        device: torch.device,
+    ) -> torch.Tensor | None:
         """Convert mm_prefix_range dict to padded tensor for Triton kernel.
 
         Returns shape: (num_seqs, max_ranges, 2) with 0-padding for empty ranges.
         Empty ranges have start==end==0, which kernel skips via is_valid check.
         """
-        # TODO(Isotr0py): Move to model runner's attention metadata
-        # preparation to avoid duplicate computation.
-        if self.mm_prefix_range is None:
+        if mm_prefix_range is None:
             return None
 
-        num_seqs = self.seq_lens.shape[0]
-        device = self.seq_lens.device
-
         # Collect ranges, using [(0,0)] for empty sequences to ensure uniform dims
         range_lists = [
-            self.mm_prefix_range.get(i, [(0, 0)]) or [(0, 0)] for i in range(num_seqs)
+            mm_prefix_range.get(i, [(0, 0)]) or [(0, 0)] for i in range(num_seqs)
         ]
 
         # Return None if all ranges are trivial (only (0,0) placeholders)
         if all(r == [(0, 0)] for r in range_lists):
             return None
 
-        # Create 2D tensors with shape (num_ranges, 2) for each sequence
-        range_tensors = [
-            torch.tensor(r, dtype=torch.int32, device=device).view(-1, 2)
-            for r in range_lists
-        ]
-
-        return torch.nested.nested_tensor(
-            range_tensors, layout=torch.jagged
-        ).to_padded_tensor(0)
+        # Build on CPU first then move to GPU in a single H2D transfer
+        max_ranges = max(len(r) for r in range_lists)
+        # Pad all sequences to the same number of ranges
+        padded = []
+        for r in range_lists:
+            padded_r = list(r) + [(0, 0)] * (max_ranges - len(r))
+            padded.append(padded_r)
+        # Build on pinned CPU memory so the H2D transfer is non-blocking.
+        padded = async_tensor_h2d(padded, dtype=torch.int32, device=device)
+        return padded.view(num_seqs, max_ranges, 2)
 
 
 class TritonAttentionMetadataBuilder(AttentionMetadataBuilder[TritonAttentionMetadata]):
@@ -129,9 +142,10 @@ def __init__(
         self.block_size = kv_cache_spec.block_size
 
         model_config = vllm_config.model_config
-        self.num_heads_q = model_config.get_num_attention_heads(
-            vllm_config.parallel_config
-        )
+        # Compatible with models with non-uniform per-layer head counts.
+        self.num_heads_q = get_num_attention_heads_from_layers(
+            vllm_config, layer_names
+        ) or model_config.get_num_attention_heads(vllm_config.parallel_config)
         self.num_heads_kv = model_config.get_num_kv_heads(vllm_config.parallel_config)
         self.headdim = model_config.get_head_size()
 
@@ -255,7 +269,6 @@ def build(
 
 
 class TritonAttentionBackend(AttentionBackend):
-    accept_output_buffer: bool = True
     supported_dtypes: ClassVar[list[torch.dtype]] = [
         torch.float16,
         torch.bfloat16,
@@ -268,6 +281,8 @@ class TritonAttentionBackend(AttentionBackend):
         "fp8",
         "fp8_e4m3",
         "fp8_e5m2",
+        "int8_per_token_head",
+        "fp8_per_token_head",
     ]
 
     @staticmethod
@@ -286,6 +301,10 @@ def supports_block_size(cls, block_size: int | None) -> bool:
     def get_name() -> str:
         return "TRITON_ATTN"
 
+    @classmethod
+    def supports_batch_invariance(cls) -> bool:
+        return True
+
     @staticmethod
     def get_impl_cls() -> type["TritonAttentionImpl"]:
         return TritonAttentionImpl
@@ -300,6 +319,18 @@ def get_kv_cache_shape(
     ) -> tuple[int, ...]:
         if block_size % 16 != 0:
             raise ValueError("Block size must be a multiple of 16.")
+        if kv_cache_uses_per_token_head_scales(cache_dtype_str):
+            # Pad head_size by sizeof(float32)/sizeof(cache_dtype) so
+            # the per-head scale fits inline.  The backend extracts
+            # data[:head_size] and scale[head_size:] via typed views.
+            from vllm.utils.torch_utils import (
+                STR_DTYPE_TO_TORCH_DTYPE,
+                get_dtype_size,
+            )
+
+            cache_dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_dtype_str]
+            scale_pad = get_dtype_size(torch.float32) // get_dtype_size(cache_dtype)
+            return (num_blocks, 2, block_size, num_kv_heads, head_size + scale_pad)
         return (num_blocks, 2, block_size, num_kv_heads, head_size)
 
     @staticmethod
@@ -308,12 +339,20 @@ def get_kv_cache_stride_order(
     ) -> tuple[int, ...]:
         # `stride_order` indicates the permutation that gets
         # us from `get_kv_cache_shape` to the actual memory layout we want.
-        if include_num_layers_dimension:
+        cache_layout = get_kv_cache_layout()
+        if cache_layout == "NHD" and include_num_layers_dimension:
             # (num_blocks, num_layers, 2, block_size, num_kv_heads, head_size)
             return (1, 0, 2, 3, 4, 5)
-
-        # (num_blocks, 2, block_size, num_kv_heads, head_size)
-        return (0, 1, 2, 3, 4)
+        elif cache_layout == "NHD":
+            stride_order = (0, 1, 2, 3, 4)
+        elif cache_layout == "HND" and include_num_layers_dimension:
+            # (num_blocks, 2, num_kv_heads, num_layers, block_size, head_size)
+            return (1, 2, 4, 0, 3, 5)
+        elif cache_layout == "HND":
+            stride_order = (0, 1, 3, 2, 4)
+        else:
+            raise ValueError(f"Unknown cache layout: {cache_layout}")
+        return stride_order
 
     @staticmethod
     def use_cascade_attention(*args, **kwargs) -> bool:
@@ -355,6 +394,62 @@ def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
 
 
 class TritonAttentionImpl(AttentionImpl):
+    # Per-token-head quant: scale views carved from inline head padding.
+    _k_scale_cache: torch.Tensor | None = None
+    _v_scale_cache: torch.Tensor | None = None
+
+    def _ensure_scale_caches(self, kv_cache: torch.Tensor) -> None:
+        """Extract per-head scale views from the padded head dimension.
+
+        The KV cache shape is ``(num_blocks, 2, block_size, nkv, hs+pad)``
+        where ``pad = sizeof(float32) / sizeof(cache_dtype)``.  The last
+        ``pad`` elements of each head hold one float32 scale.  We create
+        strided float32 views over those bytes.
+
+        Scale shape: ``(num_blocks, block_size, num_kv_heads)``
+        """
+        if self._k_scale_cache is not None:
+            return
+        from vllm.utils.torch_utils import get_dtype_size
+
+        num_blocks, _, block_size, nkv, padded_hs = kv_cache.shape
+        dtype_sz = kv_cache.element_size()
+        scale_pad = get_dtype_size(torch.float32) // dtype_sz  # e.g. 4
+        hs = padded_hs - scale_pad
+
+        raw = kv_cache.untyped_storage()
+        base_f32 = torch.tensor([], dtype=torch.float32, device=kv_cache.device).set_(
+            raw
+        )
+
+        # In the raw bytes, each (block, kv_half, slot, head) occupies
+        # padded_hs * dtype_sz bytes.  The scale float32 sits at byte
+        # offset hs * dtype_sz within that region.
+        kv_half_bytes = block_size * nkv * padded_hs * dtype_sz
+        full_block_f32 = 2 * kv_half_bytes // 4  # stride between blocks
+        slot_f32 = nkv * padded_hs * dtype_sz // 4  # stride between slots
+        head_f32 = padded_hs * dtype_sz // 4  # stride between heads
+        scale_off_f32 = hs * dtype_sz // 4  # offset to scale within head
+
+        # K scales: kv_half=0
+        self._k_scale_cache = torch.as_strided(
+            base_f32,
+            size=(num_blocks, block_size, nkv),
+            stride=(full_block_f32, slot_f32, head_f32),
+            storage_offset=scale_off_f32,
+        )
+        self._k_scale_cache.fill_(1.0)
+
+        # V scales: kv_half=1, offset by kv_half_bytes
+        v_base_f32 = kv_half_bytes // 4
+        self._v_scale_cache = torch.as_strided(
+            base_f32,
+            size=(num_blocks, block_size, nkv),
+            stride=(full_block_f32, slot_f32, head_f32),
+            storage_offset=v_base_f32 + scale_off_f32,
+        )
+        self._v_scale_cache.fill_(1.0)
+
     def fused_output_quant_supported(self, quant_key: QuantKey):
         return quant_key == kFp8StaticTensorSym
 
@@ -372,6 +467,7 @@ def __init__(
         kv_sharing_target_layer_name: int | None = None,
         sinks: torch.Tensor | None = None,
         use_alibi_sqrt: bool = False,
+        chunk_lookback: int = -1,
     ) -> None:
         self.num_heads = num_heads
         self.head_size = head_size
@@ -406,8 +502,27 @@ def __init__(
                 f"num_heads: {num_heads}."
             )
         self.use_alibi_sqrt = use_alibi_sqrt
+        self.chunk_lookback = chunk_lookback
         self.supports_quant_query_input = current_platform.is_cuda()
 
+        self._kv_quant_mode = get_kv_quant_mode(kv_cache_dtype)
+        self._is_per_token_head_quant = self._kv_quant_mode.is_per_token_head
+
+        # Enable tensor descriptors for Q/K/V load/store on platforms that
+        # benefit from HW 2D block reads (Intel Xe2/Xe3).  The dead branch
+        # is eliminated at Triton compile time, so other platforms see
+        # zero cost when TD is off.
+        #
+        # ``VLLM_TRITON_ATTN_USE_TD`` is tri-state:
+        #   - unset (None): auto-select (TD on for XPU, off elsewhere),
+        #   - ``1``: force TD on regardless of platform,
+        #   - ``0``: force TD off regardless of platform (useful for A/B).
+        td_override = envs.VLLM_TRITON_ATTN_USE_TD
+        if td_override is None:
+            self.use_td = current_platform.is_xpu()
+        else:
+            self.use_td = td_override
+
     def forward(
         self,
         layer: torch.nn.Module,
@@ -416,7 +531,7 @@ def forward(
         value: torch.Tensor,
         kv_cache: torch.Tensor,
         attn_metadata: TritonAttentionMetadata,
-        output: torch.Tensor | None = None,
+        output: torch.Tensor,
         output_scale: torch.Tensor | None = None,
         output_block_scale: torch.Tensor | None = None,
     ) -> torch.Tensor:
@@ -432,8 +547,6 @@ def forward(
         Returns:
             shape = [num_tokens, num_heads * head_size]
         """
-        assert output is not None, "Output tensor must be provided."
-
         if output_block_scale is not None:
             raise NotImplementedError(
                 "fused block_scale output quantization is not yet supported"
@@ -470,15 +583,43 @@ def forward(
                 layer,
             )
 
-        # For decoder and cross-attention, use KV cache as before
-        key_cache, value_cache = kv_cache.unbind(1)
-        if self.kv_cache_dtype.startswith("fp8"):
-            if key_cache.dtype != self.fp8_dtype:
+        # Per-token-head quantized KV cache: use separate scale caches.
+        if self._is_per_token_head_quant:
+            self._ensure_scale_caches(kv_cache)
+            key_cache, value_cache = kv_cache.unbind(1)
+            if key_cache.dtype == torch.uint8:
                 key_cache = key_cache.view(self.fp8_dtype)
                 value_cache = value_cache.view(self.fp8_dtype)
-            assert layer._q_scale_float == 1.0, (
-                "A non 1.0 q_scale is not currently supported."
+            q_descale = None
+            k_descale = None
+            v_descale = None
+            k_scale_cache = self._k_scale_cache
+            v_scale_cache = self._v_scale_cache
+        # FP8 per-tensor / auto path (original flow).
+        else:
+            key_cache, value_cache = kv_cache.unbind(1)
+            if (
+                is_quantized_kv_cache(self.kv_cache_dtype)
+                and key_cache.dtype != self.fp8_dtype
+            ):
+                key_cache = key_cache.view(self.fp8_dtype)
+                value_cache = value_cache.view(self.fp8_dtype)
+            descale_shape = (
+                attn_metadata.query_start_loc.shape[0] - 1,
+                key_cache.shape[2],
+            )
+            q_descale = (
+                layer._q_scale
+                if (
+                    self._kv_quant_mode == KVQuantMode.FP8_PER_TENSOR
+                    and query.dtype == self.fp8_dtype
+                )
+                else None
             )
+            k_descale = layer._k_scale.expand(descale_shape)
+            v_descale = layer._v_scale.expand(descale_shape)
+            k_scale_cache = None
+            v_scale_cache = None
 
         cu_seqlens_q = attn_metadata.query_start_loc
         seqused_k = attn_metadata.seq_lens
@@ -492,7 +633,6 @@ def forward(
         softmax_segm_max = attn_metadata.softmax_segm_max
         softmax_segm_expsum = attn_metadata.softmax_segm_expsum
 
-        descale_shape = (cu_seqlens_q.shape[0] - 1, key_cache.shape[2])
         mm_prefix_range_tensor = attn_metadata.mm_prefix_range_tensor
 
         unified_attention(
@@ -511,9 +651,9 @@ def forward(
             window_size=self.sliding_window,
             block_table=block_table,
             softcap=self.logits_soft_cap,
-            q_descale=None,  # Not supported
-            k_descale=layer._k_scale.expand(descale_shape),
-            v_descale=layer._v_scale.expand(descale_shape),
+            q_descale=q_descale,
+            k_descale=k_descale,
+            v_descale=v_descale,
             seq_threshold_3D=seq_threshold_3D,
             num_par_softmax_segments=num_par_softmax_segments,
             softmax_segm_output=softmax_segm_output,
@@ -522,6 +662,11 @@ def forward(
             sinks=self.sinks,
             output_scale=output_scale,
             mm_prefix_range=mm_prefix_range_tensor,
+            kv_quant_mode=self._kv_quant_mode,
+            k_scale_cache=k_scale_cache,
+            v_scale_cache=v_scale_cache,
+            chunk_lookback=self.chunk_lookback,
+            use_td=self.use_td,
         )
 
         return output
@@ -545,10 +690,10 @@ def _forward_encoder_attention(
             attn_metadata: Encoder attention metadata
             layer: The attention layer
         """
-        # For encoder attention, process FP8 quantization if needed
-        if self.kv_cache_dtype.startswith("fp8"):
+        # Quantized KV cache is not supported for encoder attention.
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             raise NotImplementedError(
-                "quantization is not supported for encoder attention"
+                "quantized KV cache is not supported for encoder attention"
             )
 
         # Use encoder-specific metadata for sequence information
@@ -584,16 +729,28 @@ def do_kv_cache_update(
             # For encoder attention,
             # we use direct Q, K, V tensors without caching
             return
-        # For decoder and cross-attention, use KV cache as before
-        key_cache, value_cache = kv_cache.unbind(1)
-
         # Reshape the input keys and values and store them in the cache.
-        if self.kv_cache_dtype.startswith("fp8"):
+        if self._is_per_token_head_quant:
+            self._ensure_scale_caches(kv_cache)
+            key_cache, value_cache = kv_cache.unbind(1)
+            if key_cache.dtype == torch.uint8:
+                key_cache = key_cache.view(self.fp8_dtype)
+                value_cache = value_cache.view(self.fp8_dtype)
+            triton_reshape_and_cache_flash_per_token_head_quant(
+                key,
+                value,
+                key_cache,
+                value_cache,
+                self._k_scale_cache,
+                self._v_scale_cache,
+                slot_mapping,
+            )
+            return
+        # For decoder and cross-attention, use KV cache as before.
+        key_cache, value_cache = kv_cache.unbind(1)
+        if is_quantized_kv_cache(self.kv_cache_dtype):
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
-            # triton kernel does not support uint8 kv_cache
-            #  (because some explicit casts (e.g. float8_e4m3fnuz)
-            #   are not supported)
         triton_reshape_and_cache_flash(
             key,
             value,
@@ -606,6 +763,8 @@ def do_kv_cache_update(
         )
 
     def fused_rope_kvcache_supported(self):
+        if self._is_per_token_head_quant:
+            return False
         return rocm_aiter_ops.is_enabled()
 
     def do_rope_and_kv_cache_update(
@@ -623,7 +782,7 @@ def do_rope_and_kv_cache_update(
         key_cache, value_cache = kv_cache.unbind(1)
         flash_layout = True
 
-        is_fp8_kv_cache = self.kv_cache_dtype.startswith("fp8")
+        is_fp8_kv_cache = is_quantized_kv_cache(self.kv_cache_dtype)
         if is_fp8_kv_cache:
             key_cache = key_cache.view(self.fp8_dtype)
             value_cache = value_cache.view(self.fp8_dtype)
diff --git a/vllm/v1/attention/backends/turboquant_attn.py b/vllm/v1/attention/backends/turboquant_attn.py
new file mode 100644
index 000000000000..3bf3b6b82482
--- /dev/null
+++ b/vllm/v1/attention/backends/turboquant_attn.py
@@ -0,0 +1,906 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""TurboQuant attention backend for vLLM.
+
+Prefill: Standard scaled dot-product attention on uncompressed K/V,
+         then quantize K and store K+V into combined cache slot.
+Decode:  Compute TQ attention scores from compressed cache,
+         unpack FP16 values, softmax + weighted sum.
+
+Cache layout (no leading 2 dimension):
+  (num_blocks, block_size, num_kv_heads, slot_size)
+  where slot_size = key_packed_size + value_fp16_size
+
+Per-head per-position slot layout:
+  [key_packed (kps bytes) | value_fp16 (D*2 bytes)]
+  For turboquant_k3v4_nc head_dim=256: [100 bytes key | 512 bytes value] = 612
+"""
+
+import functools
+import math
+from dataclasses import dataclass
+from typing import Any, ClassVar
+
+import torch
+import torch.nn.functional as F
+
+from vllm.config import get_current_vllm_config
+from vllm.config.cache import CacheDType
+from vllm.model_executor.layers.quantization.turboquant.centroids import (
+    get_centroids,
+)
+from vllm.triton_utils import triton
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    AttentionCGSupport,
+    AttentionImpl,
+    AttentionLayer,
+    AttentionMetadata,
+    AttentionMetadataBuilder,
+    AttentionType,
+    CommonAttentionMetadata,
+    MultipleOf,
+)
+from vllm.v1.attention.backends.fa_utils import (
+    get_flash_attn_version,
+    is_flash_attn_varlen_func_available,
+)
+from vllm.v1.attention.backends.utils import split_decodes_and_prefills
+from vllm.v1.attention.ops.triton_turboquant_decode import (
+    _tq_full_dequant_kv,
+    _use_fp8_e4b15,
+    triton_turboquant_decode_attention,
+)
+from vllm.v1.attention.ops.triton_turboquant_store import triton_turboquant_store
+from vllm.v1.worker.workspace import (
+    current_workspace_manager,
+    is_workspace_manager_initialized,
+)
+
+_HAS_FLASH_ATTN = is_flash_attn_varlen_func_available()
+if _HAS_FLASH_ATTN:
+    from vllm.v1.attention.backends.fa_utils import flash_attn_varlen_func
+
+# Continuation prefill: for small continuation chunks (q_len ≤ threshold),
+# use the TQ decode kernel directly instead of full-dequant + flash_attn.
+# do_kv_cache_update already stored all tokens to TQ cache, so the decode
+# kernel can read them efficiently. This avoids O(cached_len) dequant work
+# per continuation, eliminating the O(N²/chunk_size) collapse at long context.
+_CONTINUATION_DECODE_THRESHOLD = 128
+
+
+def _build_hadamard(d: int, device_str: str) -> torch.Tensor:
+    """Orthonormal Hadamard matrix (Sylvester construction), cached per (d, device).
+
+    Precomputed D×D matrix enables matmul-based WHT — single cuBLAS GEMM
+    instead of log2(D) butterfly kernel launches. 64KB for D=128.
+    """
+    # Normalize device string so "cuda" and "cuda:0" hit the same cache entry.
+    return _build_hadamard_cached(d, str(torch.device(device_str)))
+
+
+@functools.cache
+def _build_hadamard_cached(d: int, device_str: str) -> torch.Tensor:
+    H = torch.tensor([[1.0]])
+    while H.shape[0] < d:
+        H = torch.cat([torch.cat([H, H], 1), torch.cat([H, -H], 1)], 0)
+    return (H / math.sqrt(d)).to(torch.device(device_str))
+
+
+class TurboQuantAttentionBackend(AttentionBackend):
+    """Attention backend using TurboQuant KV-cache compression."""
+
+    accept_output_buffer: bool = True
+    forward_includes_kv_cache_update: bool = False
+
+    supported_dtypes: ClassVar[list[torch.dtype]] = [
+        torch.float16,
+        torch.bfloat16,
+    ]
+    supported_kv_cache_dtypes: ClassVar[list[CacheDType]] = [
+        "turboquant_k8v4",
+        "turboquant_4bit_nc",
+        "turboquant_k3v4_nc",
+        "turboquant_3bit_nc",
+    ]
+
+    @staticmethod
+    def get_name() -> str:
+        return "TURBOQUANT"
+
+    @staticmethod
+    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
+        return [16, 32, 64, 128]
+
+    @classmethod
+    def supports_attn_type(cls, attn_type: str) -> bool:
+        return attn_type == AttentionType.DECODER
+
+    @classmethod
+    def supports_per_head_quant_scales(cls) -> bool:
+        return False
+
+    @staticmethod
+    def get_impl_cls() -> type["TurboQuantAttentionImpl"]:
+        return TurboQuantAttentionImpl
+
+    @staticmethod
+    def get_builder_cls() -> type["TurboQuantMetadataBuilder"]:
+        return TurboQuantMetadataBuilder
+
+    @staticmethod
+    def get_kv_cache_shape(
+        num_blocks: int,
+        block_size: int,
+        num_kv_heads: int,
+        head_size: int,
+        cache_dtype_str: str = "turboquant_4bit_nc",
+    ) -> tuple[int, ...]:
+        """Combined K+V cache shape — no leading 2 dimension.
+
+        Standard attention backends use (2, num_blocks, block_size, num_kv_heads,
+        head_dim) with a leading 2 to separate K and V. TurboQuant packs K+V
+        into a single interleaved slot per head per position, so the cache is:
+
+            (num_blocks, block_size, num_kv_heads, slot_size_aligned)
+
+        Each slot = [key_packed | value_packed | padding].
+        This is safe because TQ has its own get_kv_cache_shape override and
+        never shares cache tensors with other backends. Layers that fall back
+        to native dtype via kv_cache_dtype_skip_layers get their own
+        standard-shaped cache allocation.
+
+        head_size is the model's real head_dim. slot_size_aligned is computed
+        from the TQ config to ensure correct cache allocation for all head dims.
+        """
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            TurboQuantConfig,
+        )
+
+        tq_config = TurboQuantConfig.from_cache_dtype(cache_dtype_str, head_size)
+        return (num_blocks, block_size, num_kv_heads, tq_config.slot_size_aligned)
+
+    @classmethod
+    def supports_kv_cache_dtype(cls, kv_cache_dtype: CacheDType | None) -> bool:
+        if kv_cache_dtype is None:
+            return False
+        return kv_cache_dtype.startswith("turboquant_")
+
+    @classmethod
+    def supports_head_size(cls, head_size: int) -> bool:
+        # head_size from spec is effective_head_size (padded_slot//2),
+        # not the model's actual head_dim. Accept any positive value.
+        return head_size > 0
+
+
+@dataclass
+class TurboQuantMetadata(AttentionMetadata):
+    """Metadata for TurboQuant attention."""
+
+    seq_lens: torch.Tensor  # (num_reqs,) — total context length per request
+    slot_mapping: torch.Tensor  # (num_tokens,) — cache slot for each token
+    block_table: torch.Tensor  # (num_reqs, max_num_blocks)
+    query_start_loc: torch.Tensor  # (num_reqs + 1,) — cu_seqlens for queries
+    num_actual_tokens: int = 0  # actual tokens (excluding padding)
+    max_query_len: int = 0  # longest query in batch
+    max_seq_len: int = 0  # longest context in batch
+    is_prefill: bool = False
+    num_decodes: int = 0  # number of decode requests (first in batch)
+    num_decode_tokens: int = 0  # tokens from decode requests
+    # CPU-resident copies used by the prefill path for per-request iteration
+    # without per-step D2H syncs.
+    query_start_loc_cpu: torch.Tensor | None = None
+    seq_lens_cpu: torch.Tensor | None = None
+
+
+class TurboQuantMetadataBuilder(AttentionMetadataBuilder[TurboQuantMetadata]):
+    """Builds TurboQuantMetadata from scheduler output."""
+
+    _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
+
+    def __init__(self, kv_cache_spec, layer_names, vllm_config, device):
+        super().__init__(kv_cache_spec, layer_names, vllm_config, device)
+        self._init_reorder_batch_threshold(1, supports_spec_as_decode=False)
+
+    def build_for_cudagraph_capture(
+        self, common_attn_metadata: CommonAttentionMetadata
+    ) -> TurboQuantMetadata:
+        attn_metadata = self.build(0, common_attn_metadata)
+        # Set seq_lens to 1 so CUDA graph capture is fast
+        # (real seq_lens are filled at replay time).
+        attn_metadata.seq_lens.fill_(1)
+        return attn_metadata
+
+    def build(self, common_prefix_len, common_attn_metadata, fast_build=False):
+        """Build TurboQuantMetadata from common attention metadata."""
+        cam = common_attn_metadata
+
+        # With reorder_batch_threshold=1, the model runner guarantees
+        # decodes come first in the batch. split_decodes_and_prefills
+        # finds the boundary (operates on CPU tensors — no GPU sync).
+        assert self.reorder_batch_threshold is not None
+        num_decodes, num_prefills, num_decode_tokens, _ = split_decodes_and_prefills(
+            cam, decode_threshold=self.reorder_batch_threshold
+        )
+
+        return TurboQuantMetadata(
+            seq_lens=cam.seq_lens,
+            slot_mapping=cam.slot_mapping,
+            block_table=cam.block_table_tensor,
+            query_start_loc=cam.query_start_loc,
+            num_actual_tokens=cam.num_actual_tokens,
+            max_query_len=cam.max_query_len,
+            max_seq_len=cam.max_seq_len,
+            is_prefill=(cam.max_query_len > 1),
+            num_decodes=num_decodes,
+            num_decode_tokens=num_decode_tokens,
+            query_start_loc_cpu=cam.query_start_loc_cpu,
+            seq_lens_cpu=cam.seq_lens_cpu_upper_bound,
+        )
+
+
+class TurboQuantAttentionImpl(AttentionImpl["TurboQuantMetadata"]):
+    """TurboQuant attention implementation.
+
+    Vectorized PyTorch: batch quantize/store, vectorized bit-unpack
+    decode with einsum scores and value gather.
+    """
+
+    supports_quant_query_input: bool = False
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_size: int,
+        scale: float,
+        num_kv_heads: int | None = None,
+        alibi_slopes: list[float] | None = None,
+        sliding_window: int | None = None,
+        kv_cache_dtype: str = "auto",
+        logits_soft_cap: float | None = None,
+        attn_type: str = AttentionType.DECODER,
+        kv_sharing_target_layer_name: str | None = None,
+        **kwargs,
+    ):
+        self.num_heads = num_heads
+        self.head_size = head_size
+        self.scale = scale
+        self.num_kv_heads = num_kv_heads if num_kv_heads is not None else num_heads
+        self.num_kv_groups = num_heads // self.num_kv_heads
+        self.kv_cache_dtype = kv_cache_dtype
+
+        from vllm.model_executor.layers.quantization.turboquant.config import (
+            TurboQuantConfig,
+        )
+
+        self.tq_config = TurboQuantConfig.from_cache_dtype(kv_cache_dtype, head_size)
+
+        # Pre-compute kernel constants from config (avoid repeated arithmetic)
+        cfg = self.tq_config
+        self._mse_bytes = (
+            math.ceil(head_size * cfg.key_mse_bits / 8)
+            if not cfg.key_fp8
+            else head_size
+        )
+        self._val_data_bytes = math.ceil(head_size * cfg.effective_value_quant_bits / 8)
+        self._n_centroids = cfg.n_centroids if not cfg.key_fp8 else 1
+
+        # Detect flash-attn version (FA2/3/4) for prefill paths.
+        self.fa_version = get_flash_attn_version(head_size=head_size)
+
+        # Fixed NUM_KV_SPLITS (grid dims must be constant for cudagraph,
+        # and benchmarks show no regression vs dynamic in eager mode).
+        vllm_config = get_current_vllm_config()
+        self.max_num_kv_splits = (
+            vllm_config.attention_config.tq_max_kv_splits_for_cuda_graph
+        )
+
+    def _flash_attn_varlen(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        cu_seqlens_q: torch.Tensor,
+        cu_seqlens_k: torch.Tensor,
+        max_seqlen_q: int,
+        max_seqlen_k: int,
+    ) -> torch.Tensor:
+        # fa_utils.get_flash_attn_version() returns None on backends that
+        # should not pass an explicit fa_version kwarg.
+        if self.fa_version is None:
+            return flash_attn_varlen_func(
+                q=q,
+                k=k,
+                v=v,
+                cu_seqlens_q=cu_seqlens_q,
+                cu_seqlens_k=cu_seqlens_k,
+                max_seqlen_q=max_seqlen_q,
+                max_seqlen_k=max_seqlen_k,
+                softmax_scale=self.scale,
+                causal=True,
+            )
+        return flash_attn_varlen_func(
+            q=q,
+            k=k,
+            v=v,
+            cu_seqlens_q=cu_seqlens_q,
+            cu_seqlens_k=cu_seqlens_k,
+            max_seqlen_q=max_seqlen_q,
+            max_seqlen_k=max_seqlen_k,
+            softmax_scale=self.scale,
+            causal=True,
+            fa_version=self.fa_version,
+        )
+
+    def _ensure_on_device(self, layer, device):
+        """One-time derivation of TQ buffers (rotation matrix, midpoints).
+
+        The Hadamard rotation is shared across all layers: random sign
+        flips do not improve Lloyd-Max quantization quality because the
+        quantizer is symmetric around zero (sign-flipping a coordinate
+        maps it to the mirror centroid with identical distortion).
+        """
+        if not hasattr(layer, "_tq_cached"):
+            D = self.head_size
+
+            # Pure Hadamard: orthonormal + symmetric (H = H^T), enabling
+            # in-kernel butterfly fusion and trivial inverse for continuation.
+            H = _build_hadamard(D, str(device))
+            layer._tq_PiT = H
+            layer._tq_Pi = H
+            # fp16 copy for rotation in continuation prefill path
+            layer._tq_Pi_half = H.to(torch.float16)
+
+            # Centroids for Lloyd-Max quantization.
+            layer._tq_centroids = get_centroids(D, self.tq_config.centroid_bits).to(
+                device=device, dtype=torch.float32
+            )
+
+            c_sorted, _ = layer._tq_centroids.sort()
+            layer._tq_midpoints = (c_sorted[:-1] + c_sorted[1:]) / 2
+            layer._tq_cached = True
+
+    def do_kv_cache_update(
+        self,
+        layer: torch.nn.Module,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        kv_cache: torch.Tensor,
+        slot_mapping: torch.Tensor,
+    ) -> None:
+        """Store compressed K/V into the combined TQ cache.
+
+        Called as a separate custom op (unified_kv_cache_update) BEFORE
+        the attention forward, matching FlashAttention's split pattern.
+        slot_mapping is already sliced to num_actual_tokens by the caller.
+        """
+        N = slot_mapping.shape[0]
+        if N <= 0:
+            return
+
+        device = key.device
+        self._ensure_on_device(layer, device)
+
+        k = key[:N].view(N, self.num_kv_heads, self.head_size)
+        v = value[:N].view(N, self.num_kv_heads, self.head_size)
+        self._store_kv(k, v, kv_cache, slot_mapping, layer)
+
+    def forward(
+        self,
+        layer: AttentionLayer,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        kv_cache: torch.Tensor,
+        attn_metadata: "TurboQuantMetadata",
+        output: torch.Tensor | None = None,
+        output_scale: torch.Tensor | None = None,
+        output_block_scale: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        num_tokens = query.shape[0]
+
+        if output is None:
+            output = torch.zeros(
+                num_tokens,
+                self.num_heads * self.head_size,
+                dtype=query.dtype,
+                device=query.device,
+            )
+
+        if attn_metadata is None:
+            return output.fill_(0)
+
+        # Slice to actual tokens
+        N = attn_metadata.num_actual_tokens
+        if N <= 0:
+            return output.fill_(0)
+
+        q = query[:N].view(N, self.num_heads, self.head_size)
+
+        # Get TQ buffers, ensure on device (one-time migration).
+        # Use Any-typed alias for dynamic _tq_* attrs set by _ensure_on_device.
+        tq_layer: Any = layer
+        device = q.device
+        self._ensure_on_device(tq_layer, device)
+        Pi = tq_layer._tq_Pi
+        PiT = tq_layer._tq_PiT
+        centroids = tq_layer._tq_centroids
+
+        # Compute attention (KV cache was already updated by do_kv_cache_update)
+        # With reorder_batch_threshold=1, decodes come first in the batch.
+        # num_decodes/num_decode_tokens from metadata give the split point.
+        num_decodes = attn_metadata.num_decodes
+        num_decode_tokens = attn_metadata.num_decode_tokens
+
+        if not attn_metadata.is_prefill:
+            # Pure decode batch — fast path
+            attn_out = self._decode_attention(
+                q, kv_cache, attn_metadata, Pi, centroids, PiT, layer
+            )
+        elif num_decodes == 0:
+            # Pure prefill batch
+            k = key[:N].view(N, self.num_kv_heads, self.head_size)
+            v = value[:N].view(N, self.num_kv_heads, self.head_size)
+            attn_out = self._prefill_attention(
+                q,
+                k,
+                v,
+                kv_cache,
+                attn_metadata,
+                Pi,
+                centroids,
+                PiT,
+                layer=layer,
+            )
+        else:
+            # Mixed batch: decodes first (guaranteed by reorder_batch).
+            attn_out = torch.empty(
+                N, self.num_heads, self.head_size, device=device, dtype=q.dtype
+            )
+
+            # --- Decode portion (first num_decodes requests) ---
+            # Use full-batch max_seq_len as safe upper bound (no GPU sync).
+            decode_meta = TurboQuantMetadata(
+                seq_lens=attn_metadata.seq_lens[:num_decodes],
+                slot_mapping=attn_metadata.slot_mapping[:num_decode_tokens],
+                block_table=attn_metadata.block_table[:num_decodes],
+                query_start_loc=attn_metadata.query_start_loc[: num_decodes + 1],
+                num_actual_tokens=num_decode_tokens,
+                max_query_len=1,
+                max_seq_len=attn_metadata.max_seq_len,
+                is_prefill=False,
+            )
+            attn_out[:num_decode_tokens] = self._decode_attention(
+                q[:num_decode_tokens], kv_cache, decode_meta, Pi, centroids, PiT, layer
+            )
+
+            # --- Prefill portion (remaining requests) ---
+            # CRITICAL: use prefill-specific max_seq_len so flash_attn's
+            # fast path (max_query_len == max_seq_len) triggers for
+            # first-chunk prefills. Using full-batch max_seq_len breaks
+            # this because decode requests inflate max_seq_len.
+            prefill_seq_lens = attn_metadata.seq_lens[num_decodes:]
+            # Use the CPU-resident `seq_lens` upper-bound from the metadata
+            # (populated in the builder) to compute the prefill sub-batch
+            # max without a GPU→CPU sync.
+            if attn_metadata.seq_lens_cpu is not None:
+                prefill_max_seq = int(attn_metadata.seq_lens_cpu[num_decodes:].max())
+            else:
+                prefill_max_seq = attn_metadata.max_seq_len
+            prefill_qsl = (
+                attn_metadata.query_start_loc[num_decodes:] - num_decode_tokens
+            )
+            prefill_qsl_cpu = None
+            if attn_metadata.query_start_loc_cpu is not None:
+                prefill_qsl_cpu = (
+                    attn_metadata.query_start_loc_cpu[num_decodes:] - num_decode_tokens
+                )
+            prefill_meta = TurboQuantMetadata(
+                seq_lens=prefill_seq_lens,
+                slot_mapping=attn_metadata.slot_mapping[num_decode_tokens:N],
+                block_table=attn_metadata.block_table[num_decodes:],
+                query_start_loc=prefill_qsl,
+                num_actual_tokens=N - num_decode_tokens,
+                max_query_len=attn_metadata.max_query_len,
+                max_seq_len=prefill_max_seq,
+                is_prefill=True,
+                query_start_loc_cpu=prefill_qsl_cpu,
+                seq_lens_cpu=attn_metadata.seq_lens_cpu[num_decodes:]
+                if attn_metadata.seq_lens_cpu is not None
+                else None,
+            )
+            k = key[:N].view(N, self.num_kv_heads, self.head_size)
+            v = value[:N].view(N, self.num_kv_heads, self.head_size)
+            attn_out[num_decode_tokens:] = self._prefill_attention(
+                q[num_decode_tokens:],
+                k[num_decode_tokens:],
+                v[num_decode_tokens:],
+                kv_cache,
+                prefill_meta,
+                Pi,
+                centroids,
+                PiT,
+                layer=layer,
+            )
+
+        # Write into output buffer: attn_out is (N, Hq, D)
+        # output may be 2D (N, Hq*D) or 3D (N, Hq, D)
+        if output.ndim == 3:
+            output[:N] = attn_out.to(output.dtype)
+        else:
+            output[:N] = attn_out.reshape(N, -1).to(output.dtype)
+        return output
+
+    # ------------------------------------------------------------------ #
+    #  Store K/V into combined cache (vectorized)                         #
+    # ------------------------------------------------------------------ #
+    def _store_kv(
+        self,
+        key: torch.Tensor,  # (N, Hk, D)
+        value: torch.Tensor,  # (N, Hk, D)
+        kv_cache: torch.Tensor,  # (num_blocks, block_size, Hk, slot_size)
+        slot_mapping: torch.Tensor,
+        layer: Any,
+    ):
+        """Quantize + store via fused Triton kernel."""
+        triton_turboquant_store(
+            key,
+            value,
+            kv_cache,
+            slot_mapping,
+            layer._tq_PiT,
+            layer._tq_midpoints,
+            mse_bits=self.tq_config.key_mse_bits,
+            key_packed_size=self.tq_config.key_packed_size,
+            value_quant_bits=self.tq_config.effective_value_quant_bits,
+            key_fp8=self.tq_config.key_fp8,
+        )
+
+    # ------------------------------------------------------------------ #
+    #  Prefill: SDPA on raw Q/K/V with causal mask                        #
+    # ------------------------------------------------------------------ #
+    def _prefill_attention(
+        self,
+        query: torch.Tensor,  # (N, Hq, D)
+        key: torch.Tensor,  # (N, Hk, D)
+        value: torch.Tensor,  # (N, Hk, D)
+        kv_cache: torch.Tensor,  # (num_blocks, block_size, Hk, slot_size)
+        attn_metadata: TurboQuantMetadata,
+        Pi: torch.Tensor,
+        centroids: torch.Tensor,
+        PiT: torch.Tensor | None = None,
+        layer: Any = None,
+    ) -> torch.Tensor:
+        N, Hq, D = query.shape
+
+        # Fast path: use flash_attn for first-chunk prefills (all K/V in batch).
+        # max_query_len == max_seq_len means no request has prior cached KV.
+        # Both are Python ints — no GPU sync.
+        if _HAS_FLASH_ATTN and attn_metadata.max_query_len == attn_metadata.max_seq_len:
+            return self._flash_attn_varlen(
+                q=query,
+                k=key,
+                v=value,
+                cu_seqlens_q=attn_metadata.query_start_loc,
+                cu_seqlens_k=attn_metadata.query_start_loc,
+                max_seqlen_q=attn_metadata.max_query_len,
+                max_seqlen_k=attn_metadata.max_query_len,
+            )
+
+        # Continuation or no flash_attn: per-request attention.
+        # For continuation chunks (seq_len > q_len), we must attend to
+        # previously cached K/V from the TQ cache, not just the current
+        # chunk's raw K/V.
+        Hk = key.shape[1]
+        use_gqa = Hk < Hq
+        query_start_loc = attn_metadata.query_start_loc
+        num_reqs = query_start_loc.shape[0] - 1
+
+        output = torch.zeros(N, Hq, D, device=query.device, dtype=query.dtype)
+
+        # Prefer the CPU-resident copies from the metadata if populated —
+        # otherwise `.tolist()` on GPU tensors forces a synchronizing copy.
+        if attn_metadata.query_start_loc_cpu is not None:
+            qsl = attn_metadata.query_start_loc_cpu.tolist()
+        else:
+            qsl = query_start_loc.tolist()
+        if attn_metadata.seq_lens_cpu is not None:
+            seq_lens_list = attn_metadata.seq_lens_cpu.tolist()
+        else:
+            seq_lens_list = attn_metadata.seq_lens.tolist()
+
+        # Pre-allocate cu_seqlens for single-request flash_attn calls
+        # to avoid per-request host→device tensor creation.
+        if not hasattr(self, "_cu_2"):
+            self._cu_2 = torch.zeros(2, device=query.device, dtype=torch.int32)
+        # Cache arange on self (avoid per-call kernel launch).
+        _max_seq = attn_metadata.max_seq_len
+        _ac: torch.Tensor | None = getattr(self, "_arange_cache", None)
+        if _ac is None or _ac.shape[0] <= _max_seq:
+            _ac = torch.arange(
+                0, _max_seq + 1, device=query.device, dtype=attn_metadata.seq_lens.dtype
+            )
+            self._arange_cache = _ac
+        _arange_cache: torch.Tensor = _ac
+
+        for i in range(num_reqs):
+            q_start = qsl[i]
+            q_end = qsl[i + 1]
+            q_len = q_end - q_start
+            if q_len <= 0:
+                continue
+
+            seq_len = seq_lens_list[i]
+            q_seq = query[q_start:q_end]  # (q_len, Hq, D)
+            k_seq = key[q_start:q_end]  # (q_len, Hk, D)
+            v_seq = value[q_start:q_end]  # (q_len, Hk, D)
+
+            if q_len == seq_len:
+                # First-chunk prefill: all K/V are in the current batch.
+                if _HAS_FLASH_ATTN:
+                    # Assign to slice to avoid gpu/cpu sync.
+                    self._cu_2[1:2] = q_len
+                    cu = self._cu_2
+                    out = self._flash_attn_varlen(
+                        q=q_seq,
+                        k=k_seq,
+                        v=v_seq,
+                        cu_seqlens_q=cu,
+                        cu_seqlens_k=cu,
+                        max_seqlen_q=q_len,
+                        max_seqlen_k=q_len,
+                    )
+                else:
+                    q_t = q_seq.transpose(0, 1).contiguous()
+                    k_t = k_seq.transpose(0, 1).contiguous()
+                    v_t = v_seq.transpose(0, 1).contiguous()
+                    out = F.scaled_dot_product_attention(
+                        q_t,
+                        k_t,
+                        v_t,
+                        is_causal=True,
+                        scale=self.scale,
+                        enable_gqa=use_gqa,
+                    ).transpose(0, 1)
+                output[q_start:q_end] = out.to(query.dtype)
+            else:
+                # Continuation chunk: tokens already stored to TQ cache
+                # by do_kv_cache_update. Use decode kernel directly to
+                # avoid O(cached_len) full-dequant per continuation.
+                # For large continuations, fall back to _continuation_prefill.
+                cached_len = seq_len - q_len
+                if q_len <= _CONTINUATION_DECODE_THRESHOLD:
+                    # Fast path: treat each query as a decode request
+                    # with incremental seq_lens for causal masking.
+                    # Slice from pre-built arange (no kernel launch)
+                    synth_seq_lens = _arange_cache[cached_len + 1 : seq_len + 1]
+                    synth_bt = attn_metadata.block_table[i : i + 1].expand(q_len, -1)
+                    out = triton_turboquant_decode_attention(
+                        query=q_seq,
+                        kv_cache=kv_cache,
+                        block_table=synth_bt,
+                        seq_lens=synth_seq_lens,
+                        Pi=Pi,
+                        centroids=centroids,
+                        scale=self.scale,
+                        mse_bits=self.tq_config.key_mse_bits,
+                        key_packed_size=self.tq_config.key_packed_size,
+                        value_quant_bits=(self.tq_config.effective_value_quant_bits),
+                        key_fp8=self.tq_config.key_fp8,
+                        norm_correction=self.tq_config.norm_correction,
+                        PiT=PiT,
+                    )
+                else:
+                    # Large continuation: dequant cached K/V and use
+                    # flash_attn for better throughput.
+                    out = self._continuation_prefill(
+                        layer,
+                        q_seq,
+                        k_seq,
+                        v_seq,
+                        kv_cache,
+                        attn_metadata.block_table[i : i + 1],
+                        cached_len,
+                        seq_len,
+                        Pi,
+                        centroids,
+                    )
+                output[q_start:q_end] = out.to(query.dtype)
+
+        return output
+
+    def _continuation_prefill(
+        self,
+        layer: Any,
+        query: torch.Tensor,  # (q_len, Hq, D)
+        key_chunk: torch.Tensor,  # (q_len, Hk, D)
+        val_chunk: torch.Tensor,  # (q_len, Hk, D)
+        kv_cache: torch.Tensor,  # (num_blocks, block_size, Hk, slot_size)
+        block_table: torch.Tensor,  # (1, max_num_blocks)
+        cached_len: int,
+        seq_len: int,
+        Pi: torch.Tensor,
+        centroids: torch.Tensor,
+    ) -> torch.Tensor:
+        """Handle continuation chunk by dequanting cached K/V from TQ cache.
+
+        Dequants previously cached K/V, concatenates with the current
+        chunk's raw K/V, then runs flash_attn with causal masking.
+        """
+        q_len, Hq, D = query.shape
+        Hk = key_chunk.shape[1]
+        device = query.device
+        block_size = kv_cache.shape[1]
+        BLOCK_D = triton.next_power_of_2(D)
+
+        mse_bytes = self._mse_bytes
+        val_data_bytes = self._val_data_bytes
+
+        # Dequant cached K/V from TQ cache
+        # Allocate slightly over to align to block_size for the grid.
+        # Reuse cached buffers to avoid per-call allocation (~16MB at 8K).
+        alloc_len = math.ceil(cached_len / block_size) * block_size
+        buf_shape = (1, Hk, alloc_len, D)
+        # Use WorkspaceManager for dequant buffers.
+        # Shared across all layers — saves 60× memory at long context.
+        # Required for CUDA Graph capture (per-layer growth incompatible with CG).
+        k_buf, v_buf = current_workspace_manager().get_simultaneous(
+            (buf_shape, torch.float16),
+            (buf_shape, torch.float16),
+        )
+        # Skip .zero_() — kernel writes all positions up to cached_len,
+        # and we only read [:cached_len] afterwards.
+        k_cached = k_buf[:, :, :alloc_len, :]
+        v_cached = v_buf[:, :, :alloc_len, :]
+
+        grid = (alloc_len, 1 * Hk)
+        _tq_full_dequant_kv[grid](
+            kv_cache,
+            block_table,
+            centroids,
+            k_cached,
+            v_cached,
+            k_cached.stride(0),
+            k_cached.stride(1),
+            k_cached.stride(2),
+            v_cached.stride(0),
+            v_cached.stride(1),
+            v_cached.stride(2),
+            kv_cache.stride(0),
+            kv_cache.stride(1),
+            kv_cache.stride(2),
+            block_table.stride(0),
+            HEAD_DIM=D,
+            BLOCK_SIZE=block_size,
+            NUM_KV_HEADS=Hk,
+            MSE_BYTES=mse_bytes,
+            KPS=self.tq_config.key_packed_size,
+            VQB=self.tq_config.effective_value_quant_bits,
+            VAL_DATA_BYTES=val_data_bytes,
+            MSE_BITS=self.tq_config.key_mse_bits,
+            KEY_FP8=1 if self.tq_config.key_fp8 else 0,
+            BLOCK_D=BLOCK_D,
+            NORM_CORRECTION=1 if self.tq_config.norm_correction else 0,
+            FP8_E4B15=_use_fp8_e4b15(device.index or 0),
+            num_warps=4,
+        )
+
+        # Inverse-rotate MSE keys back to original space
+        if not self.tq_config.key_fp8:
+            # fp16 matmul for rotation (2× less bandwidth, uses fp16 tensor cores)
+            Pi_half = layer._tq_Pi_half
+            k_flat = k_cached[0, :, :cached_len, :].reshape(-1, D)
+            k_flat = k_flat @ Pi_half
+            k_cached_trim = k_flat.reshape(Hk, cached_len, D).transpose(
+                0, 1
+            )  # (cached_len, Hk, D) — already fp16
+        else:
+            k_cached_trim = k_cached[0, :, :cached_len, :].transpose(
+                0, 1
+            )  # (cached_len, Hk, D)
+
+        # Skip .contiguous() — the copy into k_full/v_full handles layout
+        v_cached_trim = v_cached[0, :, :cached_len, :].transpose(0, 1)
+
+        # Concatenate cached + current chunk K/V (match query dtype)
+        # Pre-allocate full K/V buffer, copy into slices (no cat alloc)
+        qdtype = query.dtype
+        k_full = torch.empty(seq_len, Hk, D, dtype=qdtype, device=device)
+        v_full = torch.empty(seq_len, Hk, D, dtype=qdtype, device=device)
+        k_full[:cached_len] = k_cached_trim.to(qdtype)
+        k_full[cached_len:] = key_chunk
+        v_full[:cached_len] = v_cached_trim.to(qdtype)
+        v_full[cached_len:] = val_chunk
+
+        # Attention: q_len queries attending to seq_len K/V with causal mask
+        if _HAS_FLASH_ATTN:
+            # Reuse pre-allocated cu_seqlens (avoid host→device transfer)
+            if not hasattr(self, "_cu_2_q"):
+                self._cu_2_q = torch.zeros(2, device=device, dtype=torch.int32)
+                self._cu_2_k = torch.zeros(2, device=device, dtype=torch.int32)
+            # Assigning to slice uses fill_ which avoids cpu/gpu sync.
+            self._cu_2_q[1:2] = q_len
+            self._cu_2_k[1:2] = seq_len
+            cu_seqlens_q = self._cu_2_q
+            cu_seqlens_k = self._cu_2_k
+            return self._flash_attn_varlen(
+                q=query,
+                k=k_full,
+                v=v_full,
+                cu_seqlens_q=cu_seqlens_q,
+                cu_seqlens_k=cu_seqlens_k,
+                max_seqlen_q=q_len,
+                max_seqlen_k=seq_len,
+            )
+        else:
+            # SDPA fallback: expand KV for GQA, build causal mask
+            q_t = query.transpose(0, 1).unsqueeze(0)  # (1, Hq, q_len, D)
+            k_t = k_full.transpose(0, 1).unsqueeze(0)  # (1, Hk, seq_len, D)
+            v_t = v_full.transpose(0, 1).unsqueeze(0)  # (1, Hk, seq_len, D)
+            # Build causal mask: query position p can attend to K position j
+            # where j <= cached_len + p (p is 0-indexed within chunk)
+            q_pos = torch.arange(q_len, device=device).unsqueeze(1) + cached_len
+            k_pos = torch.arange(seq_len, device=device).unsqueeze(0)
+            mask = k_pos <= q_pos  # (q_len, seq_len)
+            out = F.scaled_dot_product_attention(
+                q_t,
+                k_t,
+                v_t,
+                attn_mask=mask,
+                scale=self.scale,
+                enable_gqa=(Hk < Hq),
+            )  # (1, Hq, q_len, D)
+            return out[0].transpose(0, 1)  # (q_len, Hq, D)
+
+    # ------------------------------------------------------------------ #
+    #  Decode: Triton TQ decode attention                                 #
+    # ------------------------------------------------------------------ #
+    def _decode_attention(
+        self,
+        query: torch.Tensor,  # (B, Hq, D)
+        kv_cache: torch.Tensor,  # (num_blocks, block_size, Hk, slot_size)
+        attn_metadata: TurboQuantMetadata,
+        Pi: torch.Tensor,
+        centroids: torch.Tensor,
+        PiT: torch.Tensor | None = None,
+        layer: torch.nn.Module | None = None,
+    ) -> torch.Tensor:
+        # Acquire shared decode scratch buffers from WorkspaceManager.
+        # Layers execute sequentially so one set of buffers is sufficient.
+        # Falls back to kernel-internal allocation if workspace unavailable.
+        B = query.shape[0]
+        D = self.head_size
+        S = self.max_num_kv_splits
+        Hq = self.num_heads
+        mid_o_buf = output_buf = lse_buf = None
+        if is_workspace_manager_initialized():
+            # output_buf in query dtype — matches the in-kernel fp16 cast in stage2.
+            mid_o_buf, output_buf, lse_buf = (
+                current_workspace_manager().get_simultaneous(
+                    ((B, Hq, S, D + 1), torch.float32),
+                    ((B, Hq, D), query.dtype),
+                    ((B, Hq), torch.float32),
+                )
+            )
+
+        result = triton_turboquant_decode_attention(
+            query=query,
+            kv_cache=kv_cache,
+            block_table=attn_metadata.block_table,
+            seq_lens=attn_metadata.seq_lens,
+            Pi=Pi,
+            centroids=centroids,
+            scale=self.scale,
+            mse_bits=self.tq_config.key_mse_bits,
+            key_packed_size=self.tq_config.key_packed_size,
+            value_quant_bits=self.tq_config.effective_value_quant_bits,
+            key_fp8=self.tq_config.key_fp8,
+            norm_correction=self.tq_config.norm_correction,
+            PiT=PiT,
+            mid_o_buf=mid_o_buf,
+            output_buf=output_buf,
+            lse_buf=lse_buf,
+            buf_holder=layer,
+            max_num_kv_splits=self.max_num_kv_splits,
+        )
+        return result
diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py
index 59f6ca9bf6e9..b73d17e8e5cc 100644
--- a/vllm/v1/attention/backends/utils.py
+++ b/vllm/v1/attention/backends/utils.py
@@ -42,6 +42,7 @@
 _KV_CACHE_LAYOUT_OVERRIDE: KVCacheLayoutType | None = None
 
 PAD_SLOT_ID = -1
+NULL_BLOCK_ID = 0
 
 
 def is_valid_kv_cache_layout(value: str) -> bool:
@@ -135,6 +136,32 @@ def get_per_layer_parameters(
     return per_layer_params
 
 
+def get_num_attention_heads_from_layers(
+    vllm_config: VllmConfig, layer_names: list[str]
+) -> int | None:
+    """Per-TP-rank ``num_heads`` shared by the named Attention layers.
+
+    Use in metadata builders whose plan-time allocations depend on the
+    head count: the model-wide ``get_num_attention_heads()`` is wrong
+    for models with non-uniform per-layer head counts. All layers in
+    one attention group must agree on ``num_heads``; this is asserted.
+    Returns ``None`` when no matching Attention layer is found.
+    """
+    attn_layers = get_layers_from_vllm_config(
+        vllm_config,
+        AttentionLayerBase,  # type: ignore[type-abstract]
+        layer_names,
+    )
+    if not attn_layers:
+        return None
+    heads = {layer.impl.num_heads for layer in attn_layers.values()}
+    assert len(heads) == 1, (
+        f"All layers in one attention group must share num_heads; "
+        f"got {heads} for {layer_names}."
+    )
+    return heads.pop()
+
+
 def infer_global_hyperparameters(
     per_layer_params: dict[str, PerLayerParameters],
 ) -> PerLayerParameters:
@@ -331,8 +358,10 @@ def make_local_attention_virtual_batches(
     # regression when using numpy arrays (batch and block indices) to index into
     # torch tensor (block_table). As a workaround, convert numpy arrays to torch
     # tensor first, which recovers perf.
-    batch_indices_torch = torch.from_numpy(batch_indices)
-    block_indices_torch = torch.from_numpy(block_indices)
+    # Upload the index tensors to the block_table's device up-front so that the
+    # fancy indexing below doesn't implicitly force a synchronous H2D copy.
+    batch_indices_torch = torch.from_numpy(batch_indices).to(device, non_blocking=True)
+    block_indices_torch = torch.from_numpy(block_indices).to(device, non_blocking=True)
 
     # Save as a lambda so we can return this for update_block_table
     make_block_table = lambda block_table: block_table[
@@ -355,6 +384,7 @@ def make_local_attention_virtual_batches(
         block_table_tensor=block_table_local,
         slot_mapping=common_attn_metadata.slot_mapping,
         causal=True,
+        seq_lens_cpu_upper_bound=common_attn_metadata.seq_lens_cpu_upper_bound,
         _seq_lens_cpu=seq_lens_cpu,
         _num_computed_tokens_cpu=torch.from_numpy(num_computed_tokens_local),
     ), make_block_table
@@ -389,7 +419,16 @@ def make_kv_sharing_fast_prefill_common_attn_metadata(
 
     # Figure out how many tokens are in each request
     # num_decode_tokens: [1, 2, 1]
-    num_decode_tokens = torch.bincount(request_ids, minlength=num_reqs)
+    # Avoid `torch.bincount` here — on CUDA it forces a sync to determine
+    # the output size (even with `minlength`, the kernel must confirm no
+    # value exceeds the bound). `scatter_add_` into a preallocated buffer
+    # is equivalent and stays async.
+    num_decode_tokens = torch.zeros(
+        num_reqs, dtype=request_ids.dtype, device=request_ids.device
+    )
+    num_decode_tokens.scatter_add_(
+        0, request_ids.to(num_decode_tokens.dtype), torch.ones_like(request_ids)
+    )
 
     # Calculate new query_start_loc with tokens in generation_indices
     # decode_query_start_loc: [0, 1, 3, 4]
@@ -397,7 +436,7 @@ def make_kv_sharing_fast_prefill_common_attn_metadata(
         num_reqs + 1, device=query_start_loc.device, dtype=query_start_loc.dtype
     )
 
-    decode_query_start_loc[0] = 0
+    decode_query_start_loc[:1].fill_(0)  # Avoid sync from scalar assignment.
     decode_query_start_loc[1:] = torch.cumsum(num_decode_tokens, dim=0)
     decode_max_query_len = int(num_decode_tokens.max().item())
     total_num_decode_tokens = int(num_decode_tokens.sum().item())
@@ -413,6 +452,7 @@ def make_kv_sharing_fast_prefill_common_attn_metadata(
         block_table_tensor=common_attn_metadata.block_table_tensor,
         slot_mapping=common_attn_metadata.slot_mapping,
         causal=True,
+        seq_lens_cpu_upper_bound=common_attn_metadata.seq_lens_cpu_upper_bound,
         _seq_lens_cpu=common_attn_metadata._seq_lens_cpu,
         _num_computed_tokens_cpu=common_attn_metadata._num_computed_tokens_cpu,
     )
@@ -444,7 +484,11 @@ def split_decodes_prefills_and_extends(
     num_reqs = common_attn_metadata.num_reqs
     num_tokens = common_attn_metadata.num_actual_tokens
     query_start_loc = common_attn_metadata.query_start_loc_cpu
-    seq_lens = common_attn_metadata.seq_lens_cpu
+    # Upper bound is exact for prefill rows; decode rows still satisfy
+    # seq_len > query_len under the optimistic bound, so `seq_lens ==
+    # query_lens` identifies prefills correctly either way.
+    assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+    seq_lens = common_attn_metadata.seq_lens_cpu_upper_bound
 
     if max_query_len <= decode_threshold:
         return num_reqs, 0, 0, num_tokens, 0, 0
@@ -860,8 +904,10 @@ def mamba_get_block_table_tensor(
     Get the block table tensor for mamba kernels from the input
     common_attn_metadata.block_table_tensor given different mamba cache modes.
 
-    - "all":   input  (#requests, cdiv(max_model_len, block_size));
-               output (#requests, cdiv(max_model_len, block_size)).
+    - "all":   input  (#requests, cdiv(max_model_len, block_size)
+                        + num_speculative_blocks);
+               output (#requests, cdiv(max_model_len, block_size)
+                        + num_speculative_blocks).
 
     - "none":  input  (#requests, 1 + num_speculative_blocks);
                output (#requests, 1 + num_speculative_blocks).
diff --git a/vllm/v1/attention/ops/chunked_prefill_paged_decode.py b/vllm/v1/attention/ops/chunked_prefill_paged_decode.py
index 000fd4d43b93..77eb3ac60b1f 100644
--- a/vllm/v1/attention/ops/chunked_prefill_paged_decode.py
+++ b/vllm/v1/attention/ops/chunked_prefill_paged_decode.py
@@ -21,6 +21,22 @@
 float8_info = torch.finfo(current_platform.fp8_dtype())
 
 
+def has_native_kv_cache_layout(
+    key_cache: torch.Tensor,
+    value_cache: torch.Tensor,
+) -> bool:
+    """Return whether KV cache blocks can use the native ROCm pairing.
+
+    The native reshape_and_cache writer assumes packed blocks. If cache update
+    needs reshape_and_cache_flash for a stride-padded hybrid layout, decode
+    should use the matching Triton path too.
+    """
+    return (
+        key_cache.stride(0) == key_cache.shape[1:].numel()
+        and value_cache.stride(0) == value_cache.shape[1:].numel()
+    )
+
+
 @triton.jit
 def cdiv_fn(x, y):
     return (x + y - 1) // y
@@ -269,6 +285,7 @@ def chunked_prefill_paged_decode(
     # Optional tensor for sinks
     sinks=None,
     is_block_table_ptr: bool = False,
+    causal: bool = True,
 ):
     if sm_scale is None:
         sm_scale = 1.0 / (query.shape[2] ** 0.5)
@@ -300,6 +317,7 @@ def chunked_prefill_paged_decode(
             skip_decode=True,
             fp8_out_scale=output_scale,
             sinks=sinks,
+            causal=causal,
         )
 
     block_size = value_cache.shape[3]
@@ -344,14 +362,12 @@ def chunked_prefill_paged_decode(
         alibi_slopes,
         sinks,
     )
-    # Triton is only forced when encountering a non-standard block
-    # like Qwen3 with a size of 544.
-    # 1. Check if block_size is a power of 2 (16, 32, 64...)
-    # 2. If it's a power of 2, we trust the vLLM's native use_custom decision.
-    # 3. If it's not a power of 2 (such as Qwen3's 544),
-    # then our Triton path is forced.
+    has_native_layout = has_native_kv_cache_layout(key_cache, value_cache)
+    # Force Triton for non-standard blocks like Qwen3's 544 and for
+    # stride-padded hybrid layouts. The latter use reshape_and_cache_flash
+    # during cache update, so keep decode on the matching stride-aware path.
     is_pow2 = block_size > 0 and (block_size & (block_size - 1) == 0)
-    if not is_pow2:
+    if not is_pow2 or not has_native_layout:
         use_custom = False
 
     if use_custom:
@@ -402,7 +418,12 @@ def chunked_prefill_paged_decode(
         real_block_size = value_cache.shape[3]
         # The standard model directly uses the original block_size.
         # Non-standard 544 uses 32 to accommodate integer division logic.
-        TRITON_BLOCK_SIZE = block_size if is_pow2 else 32
+        # Cap at 128 to avoid exceeding GPU shared memory limits
+        # (e.g. hybrid Mamba models inflate block_size to 2048).
+        # The kernel handles TRITON_BLOCK_SIZE != PHYSICAL_BLOCK_SIZE
+        # via the l_block_idx/internal_offsets addressing logic.
+        MAX_TRITON_BLOCK_SIZE = 128
+        TRITON_BLOCK_SIZE = min(block_size, MAX_TRITON_BLOCK_SIZE) if is_pow2 else 32
         if is_block_table_ptr:
             # Using the physical base address of tensors
             kv_element_size = key_cache.element_size()
diff --git a/vllm/v1/attention/ops/common.py b/vllm/v1/attention/ops/common.py
index 46c689ce0b8f..98abc7790ea2 100644
--- a/vllm/v1/attention/ops/common.py
+++ b/vllm/v1/attention/ops/common.py
@@ -265,6 +265,7 @@ def _pack_seq_kernel(
     D: tl.constexpr,
     Lmax: tl.constexpr,
     PAD_VALUE: tl.constexpr,
+    PAD_IS_UINT8: tl.constexpr,
     BLOCK_T: tl.constexpr,  # timesteps per program
     BLOCK_D: tl.constexpr,  # features per program
 ):
@@ -294,9 +295,15 @@ def _pack_seq_kernel(
     # out_ptr: row-major [B, Lmax, D]
     out_row_ptr = out_ptr + (pid_b * Lmax + off_t)[:, None] * D + off_d[None, :]
 
-    # Initialize with PAD (cast will occur as needed based on out_ptr dtype)
+    # Initialize with PAD. PAD_IS_UINT8 selects the pad tensor's dtype so
+    # integer-typed outputs (e.g. MXFP4 packed nibbles, ue8m0 scale bytes)
+    # get an exact-byte pad rather than going through an fp32→uint8 cast
+    # that's implementation-defined outside of value 0.
     d_mask = off_d[None, :] < D
-    pad_vals = tl.full([BLOCK_T, BLOCK_D], PAD_VALUE, tl.float32)
+    if PAD_IS_UINT8:
+        pad_vals = tl.full([BLOCK_T, BLOCK_D], PAD_VALUE, tl.uint8)
+    else:
+        pad_vals = tl.full([BLOCK_T, BLOCK_D], PAD_VALUE, tl.float32)
     tl.store(out_row_ptr, pad_vals, mask=t_mask[:, None] & d_mask)
 
     # Load & write only where within seq_len
@@ -307,23 +314,36 @@ def _pack_seq_kernel(
 def pack_seq_triton(
     x: torch.Tensor,
     lengths: torch.Tensor,
-    pad_value: float = -float("inf"),
+    pad_value: float | int = -float("inf"),
     block_t: int = 64,
     block_d: int = 64,
 ) -> torch.Tensor:
-    """
-    Pack sequences of different lengths into a batched tensor.
+    """Pack sequences of different lengths into a batched tensor.
+
+    Supports float dtypes (any, via fp32 pad) and ``torch.uint8`` (exact-byte
+    pad — e.g. MXFP4 packed nibbles or ue8m0 scale bytes). For uint8 inputs
+    ``pad_value`` must be an integer in ``[0, 255]``.
 
     Args:
-        x: [N, ...] - input tensor where N is total number of tokens
-        lengths: [B] - sequence lengths for each batch
-        pad_value: value to use for padding
-        block_t: block size for time dimension
-        block_d: block size for feature dimension
+        x: [N, ...] — input tensor where N is total number of tokens.
+        lengths: [B] — sequence lengths for each batch.
+        pad_value: value to use for padding. Defaults to ``-inf`` which is
+            only sensible for float dtypes; pass ``0`` (or any byte) for
+            uint8 inputs.
+        block_t: block size for time dimension.
+        block_d: block size for feature dimension.
 
     Returns:
-        packed: [B, Lmax, ...] - packed tensor
+        packed: [B, Lmax, ...] — packed tensor.
     """
+    is_uint8 = x.dtype == torch.uint8
+    if is_uint8:
+        assert isinstance(pad_value, int) and 0 <= pad_value <= 255, (
+            f"uint8 pack requires an integer pad in [0, 255], got {pad_value!r}"
+        )
+        pad_constexpr: int | float = int(pad_value)
+    else:
+        pad_constexpr = float(pad_value)
 
     # Handle multi-dimensional input by reshaping to (N, -1)
     original_shape = x.shape
@@ -338,8 +358,6 @@ def pack_seq_triton(
     B = lengths.numel()
     Lmax = int(lengths.max().item())
 
-    # Starts are computed inside the kernel from lengths
-
     out = torch.empty((B, Lmax, D), device=x.device, dtype=x.dtype)
 
     grid = (B, triton.cdiv(Lmax, block_t), triton.cdiv(D, block_d))
@@ -350,17 +368,16 @@ def pack_seq_triton(
         N,
         D,
         Lmax,
-        PAD_VALUE=float(pad_value),
+        PAD_VALUE=pad_constexpr,
+        PAD_IS_UINT8=is_uint8,
         BLOCK_T=block_t,
         BLOCK_D=block_d,
         num_warps=4,
         num_stages=2,
     )
 
-    # Reshape output back to original dimensions (except first dimension)
     if len(original_shape) > 2:
-        output_shape = (B, Lmax) + original_shape[1:]
-        out = out.reshape(output_shape)
+        out = out.reshape((B, Lmax) + original_shape[1:])
 
     return out
 
diff --git a/vllm/v1/attention/ops/dcp_alltoall.py b/vllm/v1/attention/ops/dcp_alltoall.py
index 92f50f63e3ef..1469a5c754d6 100644
--- a/vllm/v1/attention/ops/dcp_alltoall.py
+++ b/vllm/v1/attention/ops/dcp_alltoall.py
@@ -9,10 +9,8 @@
 A2A exchanges partial attention outputs and their LSE values across
 ranks, then combines them with exact LSE-weighted reduction.
 
-This reduces the number of NCCL calls per attention layer from 3
-(AG for Q, AG for K metadata, RS for output) to 2 (A2A for output,
-A2A for LSE), lowering per-step communication overhead for long-context
-decode where NCCL latency is a significant fraction of step time.
+This reduces the number of NCCL calls per attention layer by exchanging
+the partial output and LSE in a single packed All-to-All payload.
 
 Usage:
     vllm serve model --tp 16 --dcp 16 --dcp-comm-backend a2a
@@ -28,6 +26,10 @@
 import torch.distributed as dist
 
 from vllm.triton_utils import tl, triton
+from vllm.v1.worker.workspace import (
+    current_workspace_manager,
+    is_workspace_manager_initialized,
+)
 
 if TYPE_CHECKING:
     from vllm.distributed.parallel_state import GroupCoordinator
@@ -44,7 +46,6 @@ def _lse_weighted_combine(
     CPU reference implementation for LSE-weighted combination.
 
     This is a pure PyTorch implementation used for testing and validation.
-    For GPU execution, use dcp_lse_combine_triton instead.
 
     Args:
         outputs: Partial attention outputs [N, B, H, D]
@@ -102,57 +103,137 @@ def _lse_weighted_combine(
     return result
 
 
+def _dcp_a2a_lse_pack_dim(output_dtype: torch.dtype) -> int:
+    bits = torch.finfo(output_dtype).bits
+    if bits == 16:
+        return 2
+    if bits == 32:
+        return 1
+    raise ValueError(f"Cannot pack fp32 LSE into output dtype {output_dtype}.")
+
+
+def _dcp_a2a_send_recv_buffers(
+    shape: tuple[int, ...],
+    device: torch.device,
+    dtype: torch.dtype,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if is_workspace_manager_initialized():
+        send_buffer, recv_buffer = current_workspace_manager().get_simultaneous(
+            (shape, dtype),
+            (shape, dtype),
+        )
+        return send_buffer, recv_buffer
+
+    return (
+        torch.empty(shape, device=device, dtype=dtype),
+        torch.empty(shape, device=device, dtype=dtype),
+    )
+
+
 @triton.jit
-def _dcp_lse_combine_kernel(
-    # Input pointers
-    recv_output_ptr,
-    recv_lse_ptr,
-    # Output pointers
+def _dcp_a2a_pack_send_kernel(
     out_ptr,
-    out_lse_ptr,
-    # Strides for recv_output [N, B, H_local, D]
-    ro_stride_N,
-    ro_stride_B,
-    ro_stride_H,
-    ro_stride_D,
-    # Strides for recv_lse [N, B, H_local]
-    rl_stride_N,
-    rl_stride_B,
-    rl_stride_H,
-    # Strides for output [B, H_local, D]
-    o_stride_B,
-    o_stride_H,
-    o_stride_D,
-    # Constants
+    lse_ptr,
+    send_ptr,
+    out_stride_B,
+    out_stride_H,
+    out_stride_D,
+    lse_stride_B,
+    lse_stride_H,
+    send_stride_N,
+    send_stride_B,
+    send_stride_H,
+    send_stride_D,
     N: tl.constexpr,
     HEAD_DIM: tl.constexpr,
-    IS_BASE_E: tl.constexpr,
-    RETURN_LSE: tl.constexpr,
+    H_PER_RANK: tl.constexpr,
+    LSE_PACK_DIM: tl.constexpr,
 ):
-    """
-    Triton kernel for LSE-weighted combination of partial attention outputs.
+    batch_idx = tl.program_id(0).to(tl.int64)
+    local_head_idx = tl.program_id(1).to(tl.int64)
+    d_offsets = tl.arange(0, HEAD_DIM)
 
-    After All-to-All, each rank has:
-    - recv_output [N, B, H_local, D]: partial outputs from all KV shards
-    - recv_lse [N, B, H_local]: partial LSEs from all KV shards
+    for rank_idx in tl.static_range(N):
+        src_head_idx = rank_idx * H_PER_RANK + local_head_idx
+        send_base = (
+            rank_idx * send_stride_N
+            + batch_idx * send_stride_B
+            + local_head_idx * send_stride_H
+        )
 
-    This kernel computes the weighted combination locally (no communication).
+        out_offsets = (
+            batch_idx * out_stride_B
+            + src_head_idx * out_stride_H
+            + d_offsets * out_stride_D
+        )
+        tl.store(
+            send_ptr + send_base + d_offsets * send_stride_D,
+            tl.load(out_ptr + out_offsets),
+        )
 
-    Grid: (B, H_local)
-    Each program handles one (batch, head) and processes all D elements.
-    """
+        lse_val = tl.load(
+            lse_ptr + batch_idx * lse_stride_B + src_head_idx * lse_stride_H
+        )
+        if LSE_PACK_DIM == 1:
+            tl.store(
+                send_ptr + send_base + HEAD_DIM * send_stride_D,
+                lse_val.to(send_ptr.dtype.element_ty),
+            )
+        else:
+            lse_bits = lse_val.to(tl.uint32, bitcast=True)
+            lo = (lse_bits & 0xFFFF).to(tl.uint16)
+            hi = ((lse_bits >> 16) & 0xFFFF).to(tl.uint16)
+            tl.store(
+                send_ptr + send_base + HEAD_DIM * send_stride_D,
+                lo.to(send_ptr.dtype.element_ty, bitcast=True),
+            )
+            tl.store(
+                send_ptr + send_base + (HEAD_DIM + 1) * send_stride_D,
+                hi.to(send_ptr.dtype.element_ty, bitcast=True),
+            )
+
+
+@triton.jit
+def _dcp_a2a_unpack_combine_kernel(
+    recv_ptr,
+    out_ptr,
+    out_lse_ptr,
+    recv_stride_N,
+    recv_stride_B,
+    recv_stride_H,
+    recv_stride_D,
+    out_stride_B,
+    out_stride_H,
+    out_stride_D,
+    out_lse_stride_B,
+    out_lse_stride_H,
+    N: tl.constexpr,
+    HEAD_DIM: tl.constexpr,
+    IS_BASE_E: tl.constexpr,
+    RETURN_LSE: tl.constexpr,
+    LSE_PACK_DIM: tl.constexpr,
+):
     batch_idx = tl.program_id(0).to(tl.int64)
     head_idx = tl.program_id(1).to(tl.int64)
+    d_offsets = tl.arange(0, HEAD_DIM)
 
-    # Base offset for this (batch, head)
-    base_lse_offset = batch_idx * rl_stride_B + head_idx * rl_stride_H
-    base_out_offset = batch_idx * ro_stride_B + head_idx * ro_stride_H
-
-    # First pass: find max LSE for numerical stability
     lse_max = -float("inf")
-    for n in tl.static_range(N):
-        lse_offset = n * rl_stride_N + base_lse_offset
-        lse_val = tl.load(recv_lse_ptr + lse_offset)
+    for rank_idx in tl.static_range(N):
+        recv_base = (
+            rank_idx * recv_stride_N
+            + batch_idx * recv_stride_B
+            + head_idx * recv_stride_H
+        )
+        if LSE_PACK_DIM == 1:
+            lse_val = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D).to(
+                tl.float32
+            )
+        else:
+            lo_raw = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D)
+            hi_raw = tl.load(recv_ptr + recv_base + (HEAD_DIM + 1) * recv_stride_D)
+            lo = lo_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            hi = hi_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            lse_val = (lo | (hi << 16)).to(tl.float32, bitcast=True)
         lse_val = tl.where(
             (lse_val != lse_val) | (lse_val == float("inf")),
             -float("inf"),
@@ -162,11 +243,23 @@ def _dcp_lse_combine_kernel(
 
     lse_max = tl.where(lse_max == -float("inf"), 0.0, lse_max)
 
-    # Second pass: compute sum of exp(lse - max)
     lse_sum = 0.0
-    for n in tl.static_range(N):
-        lse_offset = n * rl_stride_N + base_lse_offset
-        lse_val = tl.load(recv_lse_ptr + lse_offset)
+    for rank_idx in tl.static_range(N):
+        recv_base = (
+            rank_idx * recv_stride_N
+            + batch_idx * recv_stride_B
+            + head_idx * recv_stride_H
+        )
+        if LSE_PACK_DIM == 1:
+            lse_val = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D).to(
+                tl.float32
+            )
+        else:
+            lo_raw = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D)
+            hi_raw = tl.load(recv_ptr + recv_base + (HEAD_DIM + 1) * recv_stride_D)
+            lo = lo_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            hi = hi_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            lse_val = (lo | (hi << 16)).to(tl.float32, bitcast=True)
         lse_val = tl.where(
             (lse_val != lse_val) | (lse_val == float("inf")),
             -float("inf"),
@@ -177,19 +270,28 @@ def _dcp_lse_combine_kernel(
         else:
             lse_sum += tl.exp2(lse_val - lse_max)
 
-    # Compute global LSE
     if IS_BASE_E:  # noqa: SIM108
         global_lse = tl.log(lse_sum) + lse_max
     else:
         global_lse = tl.log2(lse_sum) + lse_max
 
-    # Third pass: weighted combination across D dimension
-    d_offsets = tl.arange(0, HEAD_DIM)
     acc = tl.zeros([HEAD_DIM], dtype=tl.float32)
-
-    for n in tl.static_range(N):
-        lse_offset = n * rl_stride_N + base_lse_offset
-        lse_val = tl.load(recv_lse_ptr + lse_offset)
+    for rank_idx in tl.static_range(N):
+        recv_base = (
+            rank_idx * recv_stride_N
+            + batch_idx * recv_stride_B
+            + head_idx * recv_stride_H
+        )
+        if LSE_PACK_DIM == 1:
+            lse_val = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D).to(
+                tl.float32
+            )
+        else:
+            lo_raw = tl.load(recv_ptr + recv_base + HEAD_DIM * recv_stride_D)
+            hi_raw = tl.load(recv_ptr + recv_base + (HEAD_DIM + 1) * recv_stride_D)
+            lo = lo_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            hi = hi_raw.to(tl.uint16, bitcast=True).to(tl.uint32)
+            lse_val = (lo | (hi << 16)).to(tl.float32, bitcast=True)
         lse_val = tl.where(
             (lse_val != lse_val) | (lse_val == float("inf")),
             -float("inf"),
@@ -200,80 +302,89 @@ def _dcp_lse_combine_kernel(
         else:
             weight = tl.exp2(lse_val - global_lse)
         weight = tl.where(weight != weight, 0.0, weight)
+        acc += (
+            tl.load(recv_ptr + recv_base + d_offsets * recv_stride_D).to(tl.float32)
+            * weight
+        )
 
-        out_offsets = n * ro_stride_N + base_out_offset + d_offsets * ro_stride_D
-        out_vals = tl.load(recv_output_ptr + out_offsets)
-        acc += out_vals.to(tl.float32) * weight
-
-    # Store result
     final_offsets = (
-        batch_idx * o_stride_B + head_idx * o_stride_H + d_offsets * o_stride_D
+        batch_idx * out_stride_B + head_idx * out_stride_H + d_offsets * out_stride_D
     )
     tl.store(out_ptr + final_offsets, acc)
 
     if RETURN_LSE:
-        tl.store(out_lse_ptr + base_lse_offset, global_lse)
-
+        out_lse_offset = batch_idx * out_lse_stride_B + head_idx * out_lse_stride_H
+        tl.store(out_lse_ptr + out_lse_offset, global_lse)
 
-def dcp_lse_combine_triton(
-    recv_output: torch.Tensor,
-    recv_lse: torch.Tensor,
-    return_lse: bool = False,
-    is_lse_base_on_e: bool = True,
-) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-    """
-    Triton-accelerated LSE-weighted combination for DCP A2A.
 
-    Args:
-        recv_output: [N, B, H_local, D] - partial outputs from all KV shards
-        recv_lse: [N, B, H_local] - partial LSEs from all KV shards
-        return_lse: If True, also return the global LSE
-        is_lse_base_on_e: If True, LSE is base e; if False, base 2
+def _dcp_a2a_pack_send(
+    cp_attn_out: torch.Tensor,
+    cp_attn_lse: torch.Tensor,
+    send_buffer: torch.Tensor,
+    world_size: int,
+    h_per_rank: int,
+    head_dim: int,
+    lse_pack_dim: int,
+) -> None:
+    grid = (cp_attn_out.shape[0], h_per_rank, 1)
+    _dcp_a2a_pack_send_kernel[grid](
+        cp_attn_out,
+        cp_attn_lse,
+        send_buffer,
+        cp_attn_out.stride(0),
+        cp_attn_out.stride(1),
+        cp_attn_out.stride(2),
+        cp_attn_lse.stride(0),
+        cp_attn_lse.stride(1),
+        send_buffer.stride(0),
+        send_buffer.stride(1),
+        send_buffer.stride(2),
+        send_buffer.stride(3),
+        N=world_size,
+        HEAD_DIM=head_dim,
+        H_PER_RANK=h_per_rank,
+        LSE_PACK_DIM=lse_pack_dim,
+    )
 
-    Returns:
-        Combined output [B, H_local, D]
-        If return_lse=True, also returns global_lse [B, H_local]
-    """
-    N, B, H_local, D = recv_output.shape
 
+def _dcp_a2a_unpack_combine(
+    recv_buffer: torch.Tensor,
+    head_dim: int,
+    lse_pack_dim: int,
+    return_lse: bool,
+    is_lse_base_on_e: bool,
+) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+    world_size, num_tokens, h_per_rank, _ = recv_buffer.shape
     out = torch.empty(
-        (B, H_local, D), device=recv_output.device, dtype=recv_output.dtype
+        (num_tokens, h_per_rank, head_dim),
+        device=recv_buffer.device,
+        dtype=recv_buffer.dtype,
     )
-
-    if return_lse:
-        out_lse = torch.empty(
-            (B, H_local), device=recv_lse.device, dtype=recv_lse.dtype
-        )
-    else:
-        out_lse = torch.empty(1, device=recv_lse.device, dtype=recv_lse.dtype)
-
-    ro_stride_N, ro_stride_B, ro_stride_H, ro_stride_D = recv_output.stride()
-    rl_stride_N, rl_stride_B, rl_stride_H = recv_lse.stride()
-    o_stride_B, o_stride_H, o_stride_D = out.stride()
-
-    grid = (B, H_local, 1)
-
-    _dcp_lse_combine_kernel[grid](
-        recv_output,
-        recv_lse,
+    out_lse = torch.empty(
+        (num_tokens, h_per_rank) if return_lse else (1, 1),
+        device=recv_buffer.device,
+        dtype=torch.float32 if return_lse else recv_buffer.dtype,
+    )
+    grid = (num_tokens, h_per_rank, 1)
+    _dcp_a2a_unpack_combine_kernel[grid](
+        recv_buffer,
         out,
         out_lse,
-        ro_stride_N,
-        ro_stride_B,
-        ro_stride_H,
-        ro_stride_D,
-        rl_stride_N,
-        rl_stride_B,
-        rl_stride_H,
-        o_stride_B,
-        o_stride_H,
-        o_stride_D,
-        N=N,
-        HEAD_DIM=D,
+        recv_buffer.stride(0),
+        recv_buffer.stride(1),
+        recv_buffer.stride(2),
+        recv_buffer.stride(3),
+        out.stride(0),
+        out.stride(1),
+        out.stride(2),
+        out_lse.stride(0),
+        out_lse.stride(1),
+        N=world_size,
+        HEAD_DIM=head_dim,
         IS_BASE_E=is_lse_base_on_e,
         RETURN_LSE=return_lse,
+        LSE_PACK_DIM=lse_pack_dim,
     )
-
     if return_lse:
         return out, out_lse
     return out
@@ -290,17 +401,8 @@ def dcp_a2a_lse_reduce(
     """
     Combine partial attention outputs across DCP ranks using All-to-All.
 
-    Each rank holds attention output for all heads but only a local shard
-    of the KV cache. This function:
-    1. Exchanges partial outputs across ranks via All-to-All
-    2. Exchanges LSE values via All-to-All
-    3. Combines them with exact LSE-weighted reduction (Triton kernel)
-
-    Tensor flow:
-        Input:  cp_attn_out [B, H, D] - all heads, local KV shard
-        Reshape: [N, B, H/N, D] - split heads across ranks
-        A2A:    Two all_to_all_single calls (output and LSE)
-        Combine: recv [N, B, H/N, D] + lse [N, B, H/N] -> [B, H/N, D]
+    The output and fp32 LSE are packed into a single output-dtype buffer, sent
+    with one All-to-All, then unpacked and combined with exact LSE weighting.
 
     Args:
         cp_attn_out: [B, H, D] where B=num_tokens, H=total_heads, D=head_dim
@@ -321,43 +423,36 @@ def dcp_a2a_lse_reduce(
             return cp_attn_out, cp_attn_lse
         return cp_attn_out
 
-    local_output = cp_attn_out.contiguous()
-    local_lse = cp_attn_lse.contiguous()
-
-    B, H, D = local_output.shape
+    B, H, D = cp_attn_out.shape
+    if H % world_size != 0:
+        raise ValueError(f"H={H} must be divisible by DCP world size {world_size}.")
     H_per_rank = H // world_size
+    lse_pack_dim = _dcp_a2a_lse_pack_dim(cp_attn_out.dtype)
 
-    # Reshape for All-to-All: [B, H, D] -> [N, B, H/N, D]
-    # Split heads into N chunks, each destined for a different rank
-    send_output = (
-        local_output.view(B, world_size, H_per_rank, D).permute(1, 0, 2, 3).contiguous()
+    send_buffer, recv_buffer = _dcp_a2a_send_recv_buffers(
+        (world_size, B, H_per_rank, D + lse_pack_dim),
+        device=cp_attn_out.device,
+        dtype=cp_attn_out.dtype,
     )
-    recv_output = torch.empty_like(send_output)
-
-    # Same for LSE: [B, H] -> [N, B, H/N]
-    send_lse = local_lse.view(B, world_size, H_per_rank).permute(1, 0, 2).contiguous()
-    recv_lse = torch.empty_like(send_lse)
 
-    # All-to-All for partial attention outputs and LSE values (async overlap)
-    work_output = dist.all_to_all_single(
-        recv_output.view(-1),
-        send_output.view(-1),
-        group=cp_group.device_group,
-        async_op=True,
+    _dcp_a2a_pack_send(
+        cp_attn_out,
+        cp_attn_lse,
+        send_buffer,
+        world_size,
+        H_per_rank,
+        D,
+        lse_pack_dim,
     )
-    work_lse = dist.all_to_all_single(
-        recv_lse.view(-1),
-        send_lse.view(-1),
+
+    work = dist.all_to_all_single(
+        recv_buffer.view(-1),
+        send_buffer.view(-1),
         group=cp_group.device_group,
         async_op=True,
     )
-    work_output.wait()
-    work_lse.wait()
-
-    # LSE-weighted combination via Triton kernel (local, no communication)
-    return dcp_lse_combine_triton(
-        recv_output,
-        recv_lse,
-        return_lse=return_lse,
-        is_lse_base_on_e=is_lse_base_on_e,
+    work.wait()
+
+    return _dcp_a2a_unpack_combine(
+        recv_buffer, D, lse_pack_dim, return_lse, is_lse_base_on_e
     )
diff --git a/vllm/v1/attention/ops/flashmla.py b/vllm/v1/attention/ops/flashmla.py
index aa667570a823..df04f5bf2289 100644
--- a/vllm/v1/attention/ops/flashmla.py
+++ b/vllm/v1/attention/ops/flashmla.py
@@ -151,16 +151,3 @@ def flash_mla_with_kvcache_fp8(
         descale_k,
     )
     return out, softmax_lse
-
-
-#
-# TODO: Add fake functions
-#
-# @register_fake("_flashmla_C::get_mla_metadata")
-# def _get_mla_metadata_fake(....) -> Tuple[torch.Tensor, torch.Tensor]:
-#     return ....
-#
-# @register_fake("_flashmla_C::fwd_kvcache_mla")
-# def _fwd_kvcache_mla_fake(....) -> Tuple[torch.Tensor, torch.Tensor]:
-#     return ....
-#
diff --git a/vllm/v1/attention/ops/merge_attn_states.py b/vllm/v1/attention/ops/merge_attn_states.py
index 673d2d94790e..cf4338fb180d 100644
--- a/vllm/v1/attention/ops/merge_attn_states.py
+++ b/vllm/v1/attention/ops/merge_attn_states.py
@@ -13,35 +13,91 @@ def merge_attn_states(
     suffix_output: torch.Tensor,
     suffix_lse: torch.Tensor,
     output_lse: torch.Tensor | None = None,
+    prefill_tokens_with_context: int | None = None,
+    output_scale: torch.Tensor | None = None,
 ) -> None:
+    """Merge partial attention outputs from prefix (KV cache) and suffix
+    (new tokens) into a single output tensor using the log-sum-exp (LSE)
+    rescaling method described in section 2.2 of
+    https://www.arxiv.org/pdf/2501.01005.
+
+    For tokens that have prefix context (token index < prefill_tokens_with_context),
+    the prefix and suffix partial outputs are combined as a weighted sum.
+    For tokens without prefix context, the suffix output is copied directly.
+
+    Args:
+        output: Output tensor of shape [NUM_TOKENS, NUM_HEADS, HEAD_SIZE].
+        prefix_output: Partial attention output over the prefix (KV cache),
+            shape [NUM_TOKENS, NUM_HEADS, HEAD_SIZE].
+        prefix_lse: Log-sum-exp values for the prefix attention,
+            shape [NUM_HEADS, NUM_TOKENS].
+        suffix_output: Partial attention output over the suffix (new KV),
+            shape [NUM_TOKENS, NUM_HEADS, HEAD_SIZE].
+        suffix_lse: Log-sum-exp values for the suffix attention,
+            shape [NUM_HEADS, NUM_TOKENS].
+        output_lse: Optional tensor to store the merged LSE values,
+            shape [NUM_HEADS, NUM_TOKENS]. If None, LSE is not written out.
+        prefill_tokens_with_context: Number of prefill tokens that have
+            prefix context and therefore require merging. Tokens at indices
+            >= this value are decode or context-free prefill tokens whose
+            output is taken directly from suffix_output. If None, all tokens
+            are treated as having context.
+        output_scale: Optional scalar tensor for FP8 static quantization.
+            When provided, output must be FP8 dtype.
+    """
+
     # NOTE(DefTruth): Currently, custom merge_attn_states CUDA kernel
-    # does not support FP8 dtype, fallback to use Triton kernel.
-    def supported_dtypes(o: torch.Tensor) -> bool:
-        return o.dtype in [torch.float32, torch.half, torch.bfloat16]
+    # does not support FP8 dtype for inputs, fallback to use Triton kernel.
+    # However, when output_scale is provided, the inputs are still BF16/FP16
+    # and the output is FP8 — both CUDA and Triton support this.
+    # FP8 output requires output_scale to be set.
+    if output.dtype not in (torch.float32, torch.half, torch.bfloat16):
+        assert output_scale is not None, (
+            f"output_scale is required when output is {output.dtype}"
+        )
+
+    def supported_dtypes(prefix: torch.Tensor) -> bool:
+        return prefix.dtype in [torch.float32, torch.half, torch.bfloat16]
 
     # NOTE(DefTruth): Currently, custom merge_attn_states CUDA
     # kernel load/store 128b(16 bytes) per memory issue within
     # thread. Namely, the headsize(headdim) must be multiple of
-    # pack_size (float32 -> 4, half/bfloat16 -> 8).
-    def supported_headdim(o: torch.Tensor) -> bool:
-        headdim = o.shape[2]  # [NUM_TOKENS, NUM_HEADS, HEAD_SIZE]
-        if o.dtype == torch.float32:
+    # pack_size based on input dtype (float32 -> 4, half/bfloat16 -> 8).
+    def supported_headdim(prefix: torch.Tensor) -> bool:
+        headdim = prefix.shape[2]  # [NUM_TOKENS, NUM_HEADS, HEAD_SIZE]
+        if prefix.dtype == torch.float32:
             return headdim % 4 == 0
         return headdim % 8 == 0
 
     if (
         current_platform.is_cuda()
-        and supported_dtypes(output)
-        and supported_headdim(output)
+        and supported_dtypes(prefix_output)
+        and supported_headdim(prefix_output)
     ):
         from vllm._custom_ops import merge_attn_states
 
         return merge_attn_states(
-            output, prefix_output, prefix_lse, suffix_output, suffix_lse, output_lse
+            output,
+            prefix_output,
+            prefix_lse,
+            suffix_output,
+            suffix_lse,
+            output_lse,
+            prefill_tokens_with_context,
+            output_scale,
         )
     else:
-        from vllm.v1.attention.ops.triton_merge_attn_states import merge_attn_states
+        from vllm.v1.attention.ops.triton_merge_attn_states import (
+            merge_attn_states,
+        )
 
         return merge_attn_states(
-            output, prefix_output, prefix_lse, suffix_output, suffix_lse, output_lse
+            output,
+            prefix_output,
+            prefix_lse,
+            suffix_output,
+            suffix_lse,
+            output_lse,
+            prefill_tokens_with_context,
+            output_scale,
         )
diff --git a/vllm/v1/attention/ops/prefix_prefill.py b/vllm/v1/attention/ops/prefix_prefill.py
index 13c82f586a3b..8488c72aeaf6 100644
--- a/vllm/v1/attention/ops/prefix_prefill.py
+++ b/vllm/v1/attention/ops/prefix_prefill.py
@@ -4,6 +4,8 @@
 # The kernels in this file are adapted from LightLLM's context_attention_fwd:
 # https://github.com/ModelTC/lightllm/blob/main/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py
 
+from typing import Any
+
 import torch
 
 from vllm.platforms import current_platform
@@ -87,6 +89,7 @@ def _fwd_kernel(
     SKIP_DECODE: tl.constexpr,
     USE_SINKS: tl.constexpr,
     USE_FP8: tl.constexpr,
+    CAUSAL: tl.constexpr = True,
     MAX_Q_LEN: tl.constexpr = 0,
     MAX_CTX_LEN: tl.constexpr = 0,
     FP8_MIN: tl.constexpr = float8_info.min,
@@ -281,10 +284,17 @@ def _fwd_kernel(
     # block_mask is 0 when we're already past the current query length
     block_mask = tl.where(block_start_loc < cur_batch_query_len, 1, 0)
 
-    # compute query against itself (with causal mask)
+    # compute query against itself (causal among queries by default;
+    # CAUSAL=False for bidirectional attention over query tokens, e.g. DFlash.)
+    if CAUSAL:
+        key_range_upper = block_mask * (start_m + 1) * BLOCK_M
+    else:
+        q_len_pad = (cur_batch_query_len + BLOCK_N - 1) // BLOCK_N * BLOCK_N
+        key_range_upper = block_mask * q_len_pad
+
     for start_n in tl.range(
         0,
-        block_mask * (start_m + 1) * BLOCK_M,
+        key_range_upper,
         BLOCK_N,
         loop_unroll_factor=num_unroll_request,
     ):
@@ -300,14 +310,17 @@ def _fwd_kernel(
         qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
         qk = tl.dot(q, k, acc=qk, input_precision=IN_PRECISION)
         qk *= sm_scale
-        # apply causal mask
-        qk = tl.where(offs_m[:, None] >= (start_n + offs_n[None, :]), qk, float("-inf"))
+
+        valid_kv = (start_n + offs_n[None, :]) < cur_batch_query_len
+        if CAUSAL:
+            attn_mask = valid_kv & (offs_m[:, None] >= (start_n + offs_n[None, :]))
+        else:
+            attn_mask = valid_kv
         if SLIDING_WINDOW > 0:
-            qk = tl.where(
-                offs_m[:, None] - (start_n + offs_n[None, :]) < SLIDING_WINDOW,
-                qk,
-                float("-inf"),
+            attn_mask = attn_mask & (
+                offs_m[:, None] - (start_n + offs_n[None, :]) < SLIDING_WINDOW
             )
+        qk = tl.where(attn_mask, qk, float("-inf"))
 
         # compute running maximum
         m_ij = tl.maximum(m_i, tl.max(qk, axis=1))
@@ -654,6 +667,7 @@ def context_attention_fwd(
     fp8_out_scale=None,
     sinks=None,
     is_block_table_ptr: bool = False,
+    causal: bool = True,
 ):
     q_dtype_is_f32 = q.dtype is torch.float32
 
@@ -720,6 +734,7 @@ def context_attention_fwd(
         processed_b_loc = b_loc.to(torch.int32)
 
     if alibi_slopes is not None:
+        assert causal, "Non-causal prefix attention is not supported with alibi"
         assert sinks is None, "Sinks arg is not supported with alibi"
         assert fp8_out_scale is None, "FP8 output not supported with alibi"
         # need to reduce num. blocks when using fp32
@@ -780,7 +795,7 @@ def context_attention_fwd(
         return
 
     max_seq_len = 0 if max_seq_len is None else max_seq_len
-    extra_kargs = {}
+    extra_kargs: dict[str, Any] = {}
     if current_platform.is_rocm():
         extra_kargs = {}
 
@@ -857,6 +872,7 @@ def context_attention_fwd(
         num_warps=4,
         num_stages=1,
         USE_SINKS=sinks is not None,
+        CAUSAL=causal,
         **extra_kargs,
     )
     return
diff --git a/vllm/v1/attention/ops/rocm_aiter_mla_sparse.py b/vllm/v1/attention/ops/rocm_aiter_mla_sparse.py
index 9d1da5b53be5..80731296fcf0 100644
--- a/vllm/v1/attention/ops/rocm_aiter_mla_sparse.py
+++ b/vllm/v1/attention/ops/rocm_aiter_mla_sparse.py
@@ -2,17 +2,25 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import functools
 import importlib
+import math
+from importlib.util import find_spec
 
 import torch
+import torch.nn.functional as F
 
+from vllm.compilation.breakable_cudagraph import eager_break_during_capture
 from vllm.forward_context import get_forward_context
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import LayerNameType
 from vllm.v1.attention.backends.mla.indexer import DeepseekV32IndexerMetadata
 from vllm.v1.attention.ops.common import pack_seq_triton, unpack_seq_triton
 
-if current_platform.is_cuda_alike():
-    from vllm import _custom_ops as ops
+if current_platform.is_rocm():
+    from vllm.platforms.rocm import _ON_GFX942, _ON_GFX950
+else:
+    _ON_GFX942 = False
+    _ON_GFX950 = False
 
 
 @triton.jit
@@ -95,7 +103,8 @@ def indexer_k_quant_and_cache_triton(
     # In real layout, we store the first portion as kv cache value
     # and second portion as kv cache scale
     kv_cache = kv_cache.view(num_blocks, -1)
-    kv_cache_value = kv_cache[:, : block_size * head_dim]
+    fp8_dtype = current_platform.fp8_dtype()
+    kv_cache_value = kv_cache[:, : block_size * head_dim].view(fp8_dtype)
     kv_cache_scale = kv_cache[:, block_size * head_dim :].view(torch.float32)
     head_tile_size = head_tile_size // kv_cache.element_size()
     grid = (num_tokens,)
@@ -109,7 +118,7 @@ def indexer_k_quant_and_cache_triton(
         block_size,
         num_tokens,
         head_dim,
-        "NHD",
+        "SHUFFLE",
         block_tile_size,
         head_tile_size,
         IS_FNUZ=current_platform.fp8_dtype() == torch.float8_e4m3fnuz,
@@ -135,35 +144,57 @@ def _cp_gather_indexer_quant_cache_kernel(
     HEAD_DIM: tl.constexpr,
     BLOCK_TILE_SIZE: tl.constexpr,
     HEAD_TILE_SIZE: tl.constexpr,
+    NUM_TOKENS: tl.constexpr,
+    NUM_BATCHES: tl.constexpr,
+    BLOCK_TABLE_WIDTH: tl.constexpr,
+    NUM_BLOCKS: tl.constexpr,
 ):
     tid = tl.program_id(0)
     offset = tl.arange(0, HEAD_DIM)
-    batch_id = tl.load(token_to_seq_ptr + tid)
-    batch_start = tl.load(cu_seqlen_ptr + batch_id)
-    batch_end = tl.load(cu_seqlen_ptr + batch_id + 1)
+    valid_tid = tid < NUM_TOKENS
+    batch_id = tl.load(token_to_seq_ptr + tid, mask=valid_tid, other=-1)
+    valid_batch = (batch_id >= 0) & (batch_id < NUM_BATCHES)
+    safe_batch_id = tl.where(valid_batch, batch_id, 0)
+    batch_start = tl.load(cu_seqlen_ptr + safe_batch_id, mask=valid_batch, other=0)
+    batch_end = tl.load(cu_seqlen_ptr + safe_batch_id + 1, mask=valid_batch, other=0)
     batch_offset = tid - batch_start
-    if tid >= batch_end:
+    valid_token = valid_tid & valid_batch & (tid >= batch_start) & (tid < batch_end)
+    if not valid_token:
         return
     block_table_id = batch_offset // block_size
     block_offset = batch_offset % block_size
-    block_table_offset = batch_id * block_table_stride + block_table_id
-    block_id = tl.load(block_table_ptr + block_table_offset)
-    tiled_block_id = block_offset // BLOCK_TILE_SIZE
-    tiled_block_offset = block_offset % BLOCK_TILE_SIZE
+    valid_block_table = (
+        valid_token
+        & (block_table_id >= 0)
+        & (block_table_id < BLOCK_TABLE_WIDTH)
+        & (block_offset >= 0)
+        & (block_offset < block_size)
+    )
+    safe_block_table_id = tl.where(valid_block_table, block_table_id, 0)
+    block_table_offset = safe_batch_id * block_table_stride + safe_block_table_id
+    block_id = tl.load(
+        block_table_ptr + block_table_offset, mask=valid_block_table, other=-1
+    )
+    valid_block = valid_block_table & (block_id >= 0) & (block_id < NUM_BLOCKS)
+    safe_block_id = tl.where(valid_block, block_id, 0)
+    safe_block_offset = tl.where(valid_block, block_offset, 0)
+    tiled_block_offset = safe_block_offset % BLOCK_TILE_SIZE
     if LAYOUT == "SHUFFLE":
         src_cache_offset = (
-            block_id * kv_cache_stride
-            + tiled_block_id * HEAD_DIM * BLOCK_TILE_SIZE
+            safe_block_id * kv_cache_stride
+            + (safe_block_offset // BLOCK_TILE_SIZE) * HEAD_DIM * BLOCK_TILE_SIZE
             + tiled_block_offset * HEAD_TILE_SIZE
         )
     else:
-        src_cache_offset = block_id * kv_cache_stride + block_offset * HEAD_DIM
-    src_scale_offset = block_id * kv_cache_scale_stride + block_offset
+        src_cache_offset = (
+            safe_block_id * kv_cache_stride + safe_block_offset * HEAD_DIM
+        )
+    src_scale_offset = safe_block_id * kv_cache_scale_stride + safe_block_offset
     dst_offset = tid * HEAD_DIM
     src_scale_ptr = kv_cache_scale_ptr + src_scale_offset
     src_cache_ptr = kv_cache_ptr + src_cache_offset
     dst_k_ptr = k_fp8_ptr + dst_offset
-    scale_val = tl.load(src_scale_ptr)
+    scale_val = tl.load(src_scale_ptr, mask=valid_block, other=0.0)
     tl.store(k_scale_ptr + tid, scale_val)
     if LAYOUT == "SHUFFLE":
         tiled_src_offset = (
@@ -173,7 +204,7 @@ def _cp_gather_indexer_quant_cache_kernel(
     else:
         tiled_src_offset = offset
     val = tl.load(src_cache_ptr + tiled_src_offset)
-    tl.store(dst_k_ptr + offset, val)
+    tl.store(dst_k_ptr + offset, val, mask=valid_block)
 
 
 def cp_gather_indexer_k_quant_cache_triton(
@@ -210,10 +241,14 @@ def cp_gather_indexer_k_quant_cache_triton(
         block_table_stride,
         k_cache_value.stride(0),
         k_cache_scale.stride(0),
-        "NHD",
+        "SHUFFLE",
         head_dim,
         block_tile_size,
         head_tile_size,
+        num_tokens,
+        cu_seqlen.shape[0] - 1,
+        block_table.shape[1],
+        num_blocks,
     )
 
 
@@ -230,6 +265,43 @@ def fp8_paged_mqa_logits_torch(
 
     fp8_dtype = current_platform.fp8_dtype()
     batch_size, next_n, _, dim = q.size()
+    if next_n == 1:
+        block_size = kv_cache.shape[1]
+        logits = torch.full(
+            [batch_size, max_model_len],
+            float("-inf"),
+            device=q.device,
+            dtype=torch.float32,
+        )
+        if context_lens.dim() > 1:
+            context_lens = context_lens.squeeze(-1)
+        kv_cache_flat = kv_cache.view(-1, block_size * (dim + 4))
+        for i in range(batch_size):
+            q_i = q[i, 0].to(torch.float32)
+            q_scale = weights[i]
+            seq_len = int(context_lens[i].item())
+            assert seq_len <= max_model_len
+            num_pages = cdiv(seq_len, block_size)
+            padded_seq_len = num_pages * block_size
+            pages = block_tables[i, :num_pages]
+            cache = kv_cache_flat[pages]
+            scale_offset = block_size * dim
+            cache_value = (
+                cache[..., :scale_offset].view(dtype=fp8_dtype).to(torch.float32)
+            )
+            cache_scale = (
+                cache[..., scale_offset:].view(dtype=torch.float32).contiguous()
+            )
+            cache_value = cache_value.view(padded_seq_len, dim)
+            cache_scale = cache_scale.view(padded_seq_len)
+            score = F.linear(cache_value, q_i)
+            score = F.relu(score)
+            score *= q_scale[None, :]
+            score = score.sum(dim=1)
+            score *= cache_scale
+            logits[i, :seq_len] = score[:seq_len]
+        return logits
+
     kv_cache, scale = kv_cache[..., :dim], kv_cache[..., dim:]
     scale = scale.contiguous().view(torch.float)
     q = q.float()
@@ -241,20 +313,30 @@ def fp8_paged_mqa_logits_torch(
         device=q.device,
         dtype=torch.float32,
     )
-    context_lens = context_lens.tolist()
     for i in range(batch_size):
         context_len = context_lens[i]
-        q_offsets = torch.arange(context_len - next_n, context_len, device="cuda")
+        if context_len.ndim == 0:
+            context_len_i = int(context_len.item())
+            q_offsets = torch.arange(
+                context_len_i - next_n, context_len_i, device=q.device
+            )
+            context_limit = torch.full(
+                (next_n,), context_len_i, dtype=torch.int32, device=q.device
+            )
+        else:
+            context_limit = context_len.to(device=q.device, dtype=torch.int32)
+            q_offsets = context_limit - 1
         weight_slice = (
             weights[i * next_n : (i + 1) * next_n, :].transpose(0, 1).contiguous()
         )
-        for block_rk in range(cdiv(context_len, block_size)):
+        max_context_len = int(context_limit.max().item())
+        for block_rk in range(cdiv(max_context_len, block_size)):
             block_idx = block_tables[i][block_rk]
             qx, kx = q[i], kv_cache[block_idx]
             k_offsets = torch.arange(
-                block_rk * block_size, (block_rk + 1) * block_size, device="cuda"
+                block_rk * block_size, (block_rk + 1) * block_size, device=q.device
             )
-            mask = (k_offsets[None, :] < context_len) & (
+            mask = (k_offsets[None, :] < context_limit[:, None]) & (
                 k_offsets[None, :] <= q_offsets[:, None]
             )
             s = torch.where(
@@ -276,11 +358,9 @@ def fp8_paged_mqa_logits_torch(
 @functools.lru_cache
 def paged_mqa_logits_module():
     paged_mqa_logits_module_path = None
-    if importlib.util.find_spec("aiter.ops.triton.pa_mqa_logits") is not None:
+    if find_spec("aiter.ops.triton.pa_mqa_logits") is not None:
         paged_mqa_logits_module_path = "aiter.ops.triton.pa_mqa_logits"
-    elif (
-        importlib.util.find_spec("aiter.ops.triton.attention.pa_mqa_logits") is not None
-    ):
+    elif find_spec("aiter.ops.triton.attention.pa_mqa_logits") is not None:
         paged_mqa_logits_module_path = "aiter.ops.triton.attention.pa_mqa_logits"
 
     if paged_mqa_logits_module_path is not None:
@@ -325,10 +405,39 @@ def rocm_fp8_paged_mqa_logits(
     from vllm._aiter_ops import rocm_aiter_ops
 
     aiter_paged_mqa_logits_module = None
+    # if rocm_aiter_ops.is_enabled():
+    batch_size, next_n, heads, head_dim = q_fp8.shape
+    num_blocks, block_size, _, _ = kv_cache_fp8.shape
+
     if rocm_aiter_ops.is_enabled():
         aiter_paged_mqa_logits_module = paged_mqa_logits_module()
 
     if aiter_paged_mqa_logits_module is not None:
+        if _ON_GFX942 or _ON_GFX950:
+            deepgemm_fp8_paged_mqa_logits = (
+                aiter_paged_mqa_logits_module.deepgemm_fp8_paged_mqa_logits
+            )
+            batch_size, next_n, heads, _ = q_fp8.shape
+            out_logits = torch.full(
+                [batch_size * next_n, max_model_len],
+                float("-inf"),
+                device="cuda",
+                dtype=torch.float32,
+            )
+            deepgemm_fp8_paged_mqa_logits(
+                q_fp8,
+                kv_cache_fp8,
+                weights,
+                out_logits,
+                context_lens,
+                block_tables,
+                max_model_len,
+                ChunkK=256,
+                Preshuffle=block_size == 64,
+                KVBlockSize=block_size,
+                WavePerEU=2,
+            )
+            return out_logits
         deepgemm_fp8_paged_mqa_logits_stage1 = (
             aiter_paged_mqa_logits_module.deepgemm_fp8_paged_mqa_logits_stage1
         )
@@ -347,6 +456,7 @@ def rocm_fp8_paged_mqa_logits(
             context_lens,
             block_tables,
             max_model_len,
+            ChunkQ=heads,
         )
         return out_qk.sum(dim=0)
     else:
@@ -380,16 +490,17 @@ def fp8_mqa_logits_torch(
     Returns:
         Logits tensor of shape [M, N], dtype `torch.float32`.
     """
-    kv, scale = kv
-    seq_len_kv = kv.shape[0]
-    k = kv.to(torch.bfloat16)
+    k_fp8, scale = kv
+    seq_len_kv = k_fp8.shape[0]
+    k = k_fp8.to(torch.bfloat16)
     q = q.to(torch.bfloat16)
+    device = q.device
 
     mask_lo = (
-        torch.arange(0, seq_len_kv, device="cuda")[None, :] >= cu_seqlen_ks[:, None]
+        torch.arange(0, seq_len_kv, device=device)[None, :] >= cu_seqlen_ks[:, None]
     )
     mask_hi = (
-        torch.arange(0, seq_len_kv, device="cuda")[None, :] < cu_seqlen_ke[:, None]
+        torch.arange(0, seq_len_kv, device=device)[None, :] < cu_seqlen_ke[:, None]
     )
     mask = mask_lo & mask_hi
 
@@ -403,12 +514,9 @@ def fp8_mqa_logits_torch(
 @functools.lru_cache
 def mqa_logits_module():
     mqa_logits_module_path = None
-    if importlib.util.find_spec("aiter.ops.triton.fp8_mqa_logits") is not None:
+    if find_spec("aiter.ops.triton.fp8_mqa_logits") is not None:
         mqa_logits_module_path = "aiter.ops.triton.fp8_mqa_logits"
-    elif (
-        importlib.util.find_spec("aiter.ops.triton.attention.fp8_mqa_logits")
-        is not None
-    ):
+    elif find_spec("aiter.ops.triton.attention.fp8_mqa_logits") is not None:
         mqa_logits_module_path = "aiter.ops.triton.attention.fp8_mqa_logits"
 
     if mqa_logits_module_path is not None:
@@ -455,15 +563,46 @@ def rocm_fp8_mqa_logits(
 
     if aiter_mqa_logits_module is not None:
         fp8_mqa_logits = aiter_mqa_logits_module.fp8_mqa_logits
-        kv, scale = kv
-        return fp8_mqa_logits(q, kv, scale, weights, cu_seqlen_ks, cu_seqlen_ke)
+        k_fp8, scale = kv
+        return fp8_mqa_logits(q, k_fp8, scale, weights, cu_seqlen_ks, cu_seqlen_ke)
     else:
         return fp8_mqa_logits_torch(q, kv, weights, cu_seqlen_ks, cu_seqlen_ke)
 
 
+def _topk_indices_torch(
+    logits: torch.Tensor,
+    topk_tokens: int,
+    row_starts: torch.Tensor | None = None,
+) -> torch.Tensor:
+    k = min(topk_tokens, logits.shape[-1])
+    values, indices = torch.topk(logits, k=k, dim=-1)
+    indices = indices.to(torch.int32)
+    indices = torch.where(
+        values == float("-inf"),
+        torch.full_like(indices, -1, dtype=torch.int32),
+        indices,
+    )
+    if row_starts is not None:
+        # Match the CUDA top_k_per_row_prefill contract: indices are local to
+        # each row's valid [row_start, row_end) range, not columns in the
+        # concatenated chunk logits matrix.
+        starts = row_starts.to(dtype=torch.int32).view(-1, 1)
+        indices = torch.where(indices < 0, indices, indices - starts)
+    if k == topk_tokens:
+        return indices
+    padded = torch.full(
+        (logits.shape[0], topk_tokens),
+        -1,
+        dtype=torch.int32,
+        device=logits.device,
+    )
+    padded[:, :k] = indices
+    return padded
+
+
 def rocm_aiter_sparse_attn_indexer_fake(
     hidden_states: torch.Tensor,
-    k_cache_prefix: str,
+    k_cache_prefix: LayerNameType,
     kv_cache: torch.Tensor,
     q_fp8: torch.Tensor,
     k: torch.Tensor,
@@ -475,22 +614,15 @@ def rocm_aiter_sparse_attn_indexer_fake(
     max_model_len: int,
     total_seq_lens: int,
     topk_indices_buffer: torch.Tensor | None,
+    skip_k_cache_insert: bool = False,
 ) -> torch.Tensor:
-    # profile run
-    # NOTE(Chen): create the max possible flattened_kv. So that
-    # profile_run can get correct memory usage.
-    _flattened_kv = torch.empty(
-        [total_seq_lens, head_dim + 4], device=k.device, dtype=torch.uint8
-    )
-    fp8_dtype = current_platform.fp8_dtype()
-    _k_fp8 = _flattened_kv[..., :head_dim].view(fp8_dtype).contiguous()
-    _k_scale = _flattened_kv[..., head_dim:].view(torch.float32).contiguous()
     return topk_indices_buffer
 
 
+@eager_break_during_capture
 def rocm_aiter_sparse_attn_indexer(
     hidden_states: torch.Tensor,
-    k_cache_prefix: str,
+    k_cache_prefix: LayerNameType,
     kv_cache: torch.Tensor,
     q_fp8: torch.Tensor,
     k: torch.Tensor,
@@ -502,10 +634,15 @@ def rocm_aiter_sparse_attn_indexer(
     max_model_len: int,
     total_seq_lens: int,
     topk_indices_buffer: torch.Tensor | None,
+    skip_k_cache_insert: bool = False,
 ) -> torch.Tensor:
     # careful! this will be None in dummy run
     attn_metadata = get_forward_context().attn_metadata
     fp8_dtype = current_platform.fp8_dtype()
+    from vllm import _custom_ops as ops
+    from vllm.utils.torch_utils import _resolve_layer_name
+
+    k_cache_prefix = _resolve_layer_name(k_cache_prefix)
     # assert isinstance(attn_metadata, dict)
     if not isinstance(attn_metadata, dict):
         return rocm_aiter_sparse_attn_indexer_fake(
@@ -522,44 +659,76 @@ def rocm_aiter_sparse_attn_indexer(
             max_model_len,
             total_seq_lens,
             topk_indices_buffer,
+            skip_k_cache_insert,
         )
-    attn_metadata = attn_metadata[k_cache_prefix]
-    assert isinstance(attn_metadata, DeepseekV32IndexerMetadata)
-    slot_mapping = attn_metadata.slot_mapping
-    has_decode = attn_metadata.num_decodes > 0
-    has_prefill = attn_metadata.num_prefills > 0
-    num_decode_tokens = attn_metadata.num_decode_tokens
+    layer_attn_metadata = attn_metadata[k_cache_prefix]
+    assert isinstance(layer_attn_metadata, DeepseekV32IndexerMetadata)
+    assert topk_indices_buffer is not None
+    assert scale_fmt is not None
+    slot_mapping = layer_attn_metadata.slot_mapping
+    has_decode = layer_attn_metadata.num_decodes > 0
+    has_prefill = layer_attn_metadata.num_prefills > 0
+    num_decode_tokens = layer_attn_metadata.num_decode_tokens
+    device = hidden_states.device if k is None else k.device
 
-    ops.indexer_k_quant_and_cache(
-        k,
-        kv_cache,
-        slot_mapping,
-        quant_block_size,
-        scale_fmt,
-    )
+    # during speculative decoding, k may be padded to the CUDA graph batch
+    # size while slot_mapping only covers actual tokens.
+    num_tokens = slot_mapping.shape[0]
+    if k is not None:
+        k = k[:num_tokens]
+    elif not skip_k_cache_insert:
+        raise ValueError("k must be provided when skip_k_cache_insert is False")
+
+    if not skip_k_cache_insert:
+        if _ON_GFX942:
+            ops.indexer_k_quant_and_cache(
+                k,
+                kv_cache,
+                slot_mapping,
+                quant_block_size,
+                scale_fmt,
+            )
+        else:
+            indexer_k_quant_and_cache_triton(
+                k,
+                kv_cache,
+                slot_mapping,
+                quant_block_size,
+                scale_fmt,
+            )
 
     topk_indices_buffer[: hidden_states.shape[0]] = -1
     if has_prefill:
-        prefill_metadata = attn_metadata.prefill
+        prefill_metadata = layer_attn_metadata.prefill
+        assert prefill_metadata is not None
         for chunk in prefill_metadata.chunks:
             k_fp8 = torch.empty(
                 [chunk.total_seq_lens, head_dim],
-                device=k.device,
+                device=device,
                 dtype=fp8_dtype,
             )
             k_scale = torch.empty(
                 [chunk.total_seq_lens, 4],
-                device=k.device,
+                device=device,
                 dtype=torch.uint8,
             )
-
-            ops.cp_gather_indexer_k_quant_cache(
-                kv_cache,
-                k_fp8,
-                k_scale,
-                chunk.block_table,
-                chunk.cu_seq_lens,
-            )
+            if _ON_GFX942:
+                ops.cp_gather_indexer_k_quant_cache(
+                    kv_cache,
+                    k_fp8,
+                    k_scale,
+                    chunk.block_table,
+                    chunk.cu_seq_lens,
+                )
+            else:
+                cp_gather_indexer_k_quant_cache_triton(
+                    kv_cache,
+                    k_fp8,
+                    k_scale,
+                    chunk.block_table,
+                    chunk.cu_seq_lens,
+                    token_to_seq=chunk.token_to_seq,
+                )
 
             logits = rocm_fp8_mqa_logits(
                 q_fp8[chunk.token_start : chunk.token_end],
@@ -568,11 +737,12 @@ def rocm_aiter_sparse_attn_indexer(
                 chunk.cu_seqlen_ks,
                 chunk.cu_seqlen_ke,
             )
-            num_rows = logits.shape[0]
-            assert topk_tokens == 2048, "top_k_per_row assumes size 2048"
             topk_indices = topk_indices_buffer[
                 chunk.token_start : chunk.token_end, :topk_tokens
             ]
+
+            num_rows = logits.shape[0]
+
             torch.ops._C.top_k_per_row_prefill(
                 logits,
                 chunk.cu_seqlen_ks,
@@ -585,7 +755,8 @@ def rocm_aiter_sparse_attn_indexer(
             )
 
     if has_decode:
-        decode_metadata = attn_metadata.decode
+        decode_metadata = layer_attn_metadata.decode
+        assert decode_metadata is not None
         # kv_cache size requirement [num_block, block_size, n_head, head_dim],
         # we only have [num_block, block_size, head_dim],
         kv_cache = kv_cache.unsqueeze(-2)
@@ -618,9 +789,9 @@ def rocm_aiter_sparse_attn_indexer(
             max_model_len=max_model_len,
         )
 
+        topk_indices = topk_indices_buffer[:num_padded_tokens, :topk_tokens]
         num_rows = logits.shape[0]
-        assert topk_tokens == 2048, "top_k_per_row assumes size 2048"
-        topk_indices = topk_indices_buffer[:num_decode_tokens, :topk_tokens]
+
         torch.ops._C.top_k_per_row_decode(
             logits,
             next_n,
@@ -636,7 +807,7 @@ def rocm_aiter_sparse_attn_indexer(
             # if padded, we need to unpack
             # the topk indices removing padded tokens
             topk_indices = unpack_seq_triton(
-                topk_indices.reshape(batch_size, -1, topk_indices.shape[-1]),
+                topk_indices.reshape(batch_size, next_n, topk_indices.shape[-1]),
                 decode_lens,
             )
             topk_indices_buffer[:num_decode_tokens, : topk_indices.shape[-1]] = (
@@ -644,3 +815,935 @@ def rocm_aiter_sparse_attn_indexer(
             )
 
     return topk_indices_buffer
+
+
+def _decode_e8m0_scales(scale: torch.Tensor) -> torch.Tensor:
+    if scale.dtype == torch.float8_e8m0fnu:
+        from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+            _upcast_e8m0_to_fp32,
+        )
+
+        return _upcast_e8m0_to_fp32(scale).contiguous()
+    return scale.to(torch.float32)
+
+
+def _expand_2d_block_scales(
+    scale: torch.Tensor,
+    rows: int,
+    cols: int,
+) -> torch.Tensor:
+    scale = _decode_e8m0_scales(scale)
+    row_blocks, col_blocks = scale.shape[-2:]
+    row_block = math.ceil(rows / row_blocks)
+    col_block = math.ceil(cols / col_blocks)
+    scale = torch.repeat_interleave(scale, row_block, dim=-2)[..., :rows, :]
+    scale = torch.repeat_interleave(scale, col_block, dim=-1)[..., :, :cols]
+    return scale
+
+
+def _apply_gptj_inv_rope_ref(
+    x: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    rope_dim: int,
+) -> torch.Tensor:
+    if rope_dim == 0 or x.numel() == 0:
+        return x
+    half_rot = rope_dim // 2
+    nope_dim = x.shape[-1] - rope_dim
+    dtype = x.dtype
+    x = x.to(torch.float32)
+    cache = cos_sin_cache.index_select(0, positions.to(torch.long))
+    cos = cache[:, :half_rot].to(torch.float32)
+    sin = cache[:, half_rot : 2 * half_rot].to(torch.float32)
+    view_shape = (positions.shape[0],) + (1,) * (x.dim() - 2) + (half_rot,)
+    cos = cos.view(view_shape)
+    sin = sin.view(view_shape)
+    rope = x[..., nope_dim:]
+    y_even = rope[..., 0::2]
+    y_odd = rope[..., 1::2]
+    rope_out = torch.stack(
+        (y_even * cos + y_odd * sin, y_odd * cos - y_even * sin),
+        dim=-1,
+    ).flatten(-2)
+    x = x.clone()
+    x[..., nope_dim:] = rope_out
+    return x.to(dtype)
+
+
+def _apply_inv_rope_ref(
+    rotary_emb: torch.nn.Module,
+    x: torch.Tensor,
+    positions: torch.Tensor,
+    rope_dim: int,
+) -> torch.Tensor:
+    if hasattr(rotary_emb, "forward_native"):
+        try:
+            query, _ = rotary_emb.forward_native(
+                positions,
+                x.clone(),
+                None,
+                inverse=True,
+            )
+            return query
+        except TypeError:
+            pass
+    return _apply_gptj_inv_rope_ref(x, positions, rotary_emb.cos_sin_cache, rope_dim)
+
+
+def rocm_inv_rope_einsum(
+    rotary_emb: torch.nn.Module,
+    o: torch.Tensor,
+    positions: torch.Tensor,
+    rope_head_dim: int,
+    n_local_groups: int,
+    o_lora_rank: int,
+    wo_a: torch.nn.Module,
+) -> torch.Tensor:
+    """Reference inverse-RoPE + WO_A einsum path used on ROCm."""
+    o_ref = _apply_inv_rope_ref(rotary_emb, o, positions, rope_head_dim).to(
+        torch.bfloat16
+    )
+    o_ref = o_ref.view(o.shape[0], n_local_groups, -1)
+
+    hidden_dim = o_ref.shape[-1]
+    if hasattr(wo_a, "weight_scale_inv"):
+        wo_a_weight = wo_a.weight.view(n_local_groups, o_lora_rank, hidden_dim).to(
+            torch.float32
+        )
+        wo_a_scale = _expand_2d_block_scales(
+            wo_a.weight_scale_inv.view(
+                n_local_groups, -1, wo_a.weight_scale_inv.shape[-1]
+            ),
+            o_lora_rank,
+            hidden_dim,
+        )
+        wo_a_weight = (wo_a_weight * wo_a_scale).to(torch.bfloat16)
+    else:
+        wo_a_weight = wo_a.weight.view(n_local_groups, o_lora_rank, hidden_dim).to(
+            torch.bfloat16
+        )
+
+    return torch.einsum("tgd,grd->tgr", o_ref, wo_a_weight)
+
+
+_DSV4_SPARSE_NOPE_DIM = 448
+_DSV4_SPARSE_ROPE_DIM = 64
+
+
+def _validate_dsv4_sparse_dims(
+    head_dim: int,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    op_name: str,
+) -> None:
+    assert head_dim == nope_head_dim + rope_head_dim, (
+        f"{op_name} expected head_dim={nope_head_dim + rope_head_dim}, got {head_dim}"
+    )
+    assert (
+        nope_head_dim == _DSV4_SPARSE_NOPE_DIM
+        and rope_head_dim == _DSV4_SPARSE_ROPE_DIM
+    ), (
+        f"{op_name} expects {_DSV4_SPARSE_NOPE_DIM} NoPE dims and "
+        f"{_DSV4_SPARSE_ROPE_DIM} RoPE dims"
+    )
+
+
+@triton.jit
+def _pack_dense_prefix_to_ragged_kernel(
+    indices_ptr,
+    lengths_ptr,
+    indptr_ptr,
+    out_ptr,
+    indices_stride0,
+    num_rows_limit,
+    row_width,
+    BLOCK_SIZE: tl.constexpr,
+):
+    row_idx = tl.program_id(0)
+    block_idx = tl.program_id(1)
+    offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+
+    row_len = tl.load(lengths_ptr + row_idx)
+    if block_idx * BLOCK_SIZE >= row_len:
+        return
+
+    mask = offsets < row_len
+    safe_offsets = tl.where(offsets < row_width, offsets, 0)
+    vals = tl.load(
+        indices_ptr + row_idx * indices_stride0 + safe_offsets,
+        mask=mask & (offsets < row_width),
+        other=-1,
+    ).to(tl.int32)
+    if num_rows_limit >= 0:
+        vals = tl.where((vals >= 0) & (vals < num_rows_limit), vals, -1)
+
+    out_start = tl.load(indptr_ptr + row_idx)
+    tl.store(out_ptr + out_start + offsets, vals, mask=mask)
+
+
+def build_ragged_indices_from_dense(
+    indices: torch.Tensor,
+    lengths: torch.Tensor,
+    num_rows: int = -1,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    indices = indices.reshape(indices.shape[0], -1)
+    lengths = lengths.to(device=indices.device, dtype=torch.int32).reshape(-1)
+    assert lengths.numel() == indices.shape[0], (
+        f"Expected one length per row, got {lengths.shape} for indices {indices.shape}"
+    )
+
+    max_width = indices.shape[1] if indices.ndim == 2 else 0
+    lengths = lengths.clamp(min=0, max=max_width).contiguous()
+
+    indptr = torch.empty(indices.shape[0] + 1, dtype=torch.int32, device=indices.device)
+    indptr[0] = 0
+    torch.cumsum(lengths, dim=0, out=indptr[1:])
+
+    if indices.numel() == 0:
+        flat = torch.empty(0, dtype=torch.int32, device=indices.device)
+    else:
+        flat = torch.empty(
+            int(indptr[-1].item()), dtype=torch.int32, device=indices.device
+        )
+        if flat.numel() > 0:
+            block_size = 128
+            _pack_dense_prefix_to_ragged_kernel[
+                (indices.shape[0], triton.cdiv(max_width, block_size))
+            ](
+                indices,
+                lengths,
+                indptr,
+                flat,
+                indices.stride(0),
+                int(num_rows),
+                max_width,
+                BLOCK_SIZE=block_size,
+            )
+
+    return flat, indptr
+
+
+def _as_int32_contiguous_1d(x: torch.Tensor) -> torch.Tensor:
+    if x.dtype == torch.int32 and x.ndim == 1 and x.is_contiguous():
+        return x
+    return x.to(torch.int32).contiguous()
+
+
+@triton.jit
+def _sparse_attn_prefill_ragged_kernel(
+    q_ptr,
+    kv_ptr,
+    kv_indices_ptr,
+    kv_indptr_ptr,
+    attn_sink_ptr,
+    out_ptr,
+    q_stride_t,
+    q_stride_h,
+    q_stride_d,
+    kv_stride_n,
+    kv_stride_d,
+    out_stride_t,
+    out_stride_h,
+    out_stride_d,
+    num_heads,
+    head_dim,
+    num_kv,
+    scale,
+    HAS_ATTN_SINK: tl.constexpr,
+    BLOCK_H: tl.constexpr,
+    BLOCK_D: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+):
+    query_idx = tl.program_id(0)
+    pid_h = tl.program_id(1)
+
+    head_offsets = pid_h * BLOCK_H + tl.arange(0, BLOCK_H)
+    dim_offsets = tl.arange(0, BLOCK_D)
+    head_mask = head_offsets < num_heads
+    dim_mask = dim_offsets < head_dim
+
+    q = tl.load(
+        q_ptr
+        + query_idx * q_stride_t
+        + head_offsets[:, None] * q_stride_h
+        + dim_offsets[None, :] * q_stride_d,
+        mask=head_mask[:, None] & dim_mask[None, :],
+        other=0.0,
+    )
+
+    neg_large = -3.4028234663852886e38
+    m_i = tl.full((BLOCK_H,), neg_large, dtype=tl.float32)
+    l_i = tl.zeros((BLOCK_H,), dtype=tl.float32)
+    acc = tl.zeros((BLOCK_H, BLOCK_D), dtype=tl.float32)
+
+    kv_start = tl.load(kv_indptr_ptr + query_idx)
+    kv_end = tl.load(kv_indptr_ptr + query_idx + 1)
+    kv_len = kv_end - kv_start
+
+    k_offsets = tl.arange(0, BLOCK_K)
+    for k_start in tl.range(0, kv_len, BLOCK_K):
+        k_pos = k_start + k_offsets
+        in_range = k_pos < kv_len
+        slot = tl.load(kv_indices_ptr + kv_start + k_pos, mask=in_range, other=-1)
+        valid = in_range & (slot >= 0) & (slot < num_kv)
+        safe_slot = tl.where(valid, slot, 0)
+
+        kv = tl.load(
+            kv_ptr
+            + safe_slot[:, None] * kv_stride_n
+            + dim_offsets[None, :] * kv_stride_d,
+            mask=valid[:, None] & dim_mask[None, :],
+            other=0.0,
+        )
+        kv = tl.where(valid[:, None] & dim_mask[None, :], kv, 0.0)
+
+        scores = tl.dot(q, tl.trans(kv)) * scale
+        scores = tl.where(head_mask[:, None] & valid[None, :], scores, neg_large)
+
+        m_block = tl.max(scores, axis=1)
+        m_new = tl.maximum(m_i, m_block)
+        alpha = tl.exp(m_i - m_new)
+        p = tl.exp(scores - m_new[:, None])
+        p = tl.where(head_mask[:, None] & valid[None, :], p, 0.0)
+        l_new = l_i * alpha + tl.sum(p, axis=1)
+
+        acc = acc * alpha[:, None] + tl.dot(p.to(kv.dtype), kv)
+        m_i = m_new
+        l_i = l_new
+
+    if HAS_ATTN_SINK:
+        sink = tl.load(
+            attn_sink_ptr + head_offsets, mask=head_mask, other=neg_large
+        ).to(tl.float32)
+        m_final = tl.maximum(m_i, sink)
+        alpha = tl.exp(m_i - m_final)
+        l_final = l_i * alpha + tl.exp(sink - m_final)
+        denom = tl.maximum(l_final, 1.0e-30)
+        out = tl.where(
+            l_final[:, None] > 0.0,
+            (acc * alpha[:, None]) / denom[:, None],
+            0.0,
+        )
+    else:
+        denom = tl.maximum(l_i, 1.0e-30)
+        out = tl.where(l_i[:, None] > 0.0, acc / denom[:, None], 0.0)
+
+    tl.store(
+        out_ptr
+        + query_idx * out_stride_t
+        + head_offsets[:, None] * out_stride_h
+        + dim_offsets[None, :] * out_stride_d,
+        out,
+        mask=head_mask[:, None] & dim_mask[None, :],
+    )
+
+
+@triton.jit
+def _sparse_attn_decode_ragged_kernel(
+    q_ptr,
+    main_cache_ptr,
+    main_indices_ptr,
+    main_indptr_ptr,
+    extra_cache_ptr,
+    extra_indices_ptr,
+    extra_indptr_ptr,
+    attn_sink_ptr,
+    out_ptr,
+    q_stride0,
+    q_stride1,
+    out_stride0,
+    out_stride1,
+    main_cache_stride0,
+    extra_cache_stride0,
+    main_num_rows,
+    extra_num_rows,
+    main_block_size,
+    extra_block_size,
+    scale,
+    num_heads,
+    HAS_ATTN_SINK: tl.constexpr,
+    HAS_EXTRA: tl.constexpr,
+    NOPE_DIM: tl.constexpr,
+    NOPE_BLOCK: tl.constexpr,
+    ROPE_DIM: tl.constexpr,
+    IS_FNUZ: tl.constexpr,
+    BLOCK_H: tl.constexpr,
+    BLOCK_K: tl.constexpr,
+):
+    query_idx = tl.program_id(0)
+    pid_h = tl.program_id(1)
+
+    head_offsets = pid_h * BLOCK_H + tl.arange(0, BLOCK_H)
+    head_mask = head_offsets < num_heads
+    nope_offsets = tl.arange(0, NOPE_BLOCK)
+    nope_mask = nope_offsets < NOPE_DIM
+    rope_offsets = tl.arange(0, ROPE_DIM)
+
+    q_row_ptr = q_ptr + query_idx * q_stride0 + head_offsets[:, None] * q_stride1
+    q_nope = tl.load(
+        q_row_ptr + nope_offsets[None, :],
+        mask=head_mask[:, None] & nope_mask[None, :],
+        other=0.0,
+    )
+    q_rope = tl.load(
+        q_row_ptr + NOPE_DIM + rope_offsets[None, :],
+        mask=head_mask[:, None],
+        other=0.0,
+    )
+
+    neg_large = -3.4028234663852886e38
+    m_i = tl.full((BLOCK_H,), neg_large, dtype=tl.float32)
+    l_i = tl.zeros((BLOCK_H,), dtype=tl.float32)
+    acc_nope = tl.zeros((BLOCK_H, NOPE_BLOCK), dtype=tl.float32)
+    acc_rope = tl.zeros((BLOCK_H, ROPE_DIM), dtype=tl.float32)
+    k_offsets = tl.arange(0, BLOCK_K)
+
+    main_start = tl.load(main_indptr_ptr + query_idx)
+    main_end = tl.load(main_indptr_ptr + query_idx + 1)
+    main_len = main_end - main_start
+
+    zero_nope = tl.zeros((BLOCK_K, NOPE_BLOCK), dtype=tl.bfloat16)
+    zero_rope = tl.zeros((BLOCK_K, ROPE_DIM), dtype=tl.bfloat16)
+
+    for k_start in tl.range(0, main_len, BLOCK_K):
+        k_pos = k_start + k_offsets
+        in_range = k_pos < main_len
+        slot = tl.load(main_indices_ptr + main_start + k_pos, mask=in_range, other=-1)
+        valid = in_range & (slot >= 0) & (slot < main_num_rows)
+        safe_slot = tl.where(valid, slot, 0)
+
+        block_idx = safe_slot // main_block_size
+        pos_in_block = safe_slot % main_block_size
+        cache_block_ptr = main_cache_ptr + block_idx.to(tl.int64) * main_cache_stride0
+        token_data_ptr = cache_block_ptr + pos_in_block * 576
+        token_scale_ptr = cache_block_ptr + main_block_size * 576 + pos_in_block * 8
+
+        x_uint8 = tl.load(
+            token_data_ptr[:, None] + nope_offsets[None, :],
+            mask=valid[:, None] & nope_mask[None, :],
+            other=0,
+        )
+        if IS_FNUZ:
+            x_fp8 = x_uint8.to(tl.float8e4b15, bitcast=True)
+        else:
+            x_fp8 = x_uint8.to(tl.float8e4nv, bitcast=True)
+        encoded_scales = tl.load(
+            token_scale_ptr[:, None] + nope_offsets[None, :] // 64,
+            mask=valid[:, None] & nope_mask[None, :],
+            other=127,
+        )
+        scales = tl.exp2(encoded_scales.to(tl.float32) - 127.0)
+        k_nope = x_fp8.to(tl.bfloat16) * scales.to(tl.bfloat16)
+        k_nope = tl.where(valid[:, None] & nope_mask[None, :], k_nope, zero_nope)
+        k_nope = tl.where(k_nope == k_nope, k_nope, zero_nope)
+
+        rope_ptr = (token_data_ptr + NOPE_DIM).to(tl.pointer_type(tl.bfloat16))
+        k_rope = tl.load(
+            rope_ptr[:, None] + rope_offsets[None, :],
+            mask=valid[:, None],
+            other=0.0,
+        )
+        k_rope = tl.where(valid[:, None], k_rope, zero_rope)
+        k_rope = tl.where(k_rope == k_rope, k_rope, zero_rope)
+
+        scores = tl.dot(q_nope, tl.trans(k_nope)) + tl.dot(q_rope, tl.trans(k_rope))
+        scores *= scale
+        scores = tl.where(head_mask[:, None] & valid[None, :], scores, neg_large)
+
+        m_block = tl.max(scores, axis=1)
+        m_new = tl.maximum(m_i, m_block)
+        alpha = tl.exp(m_i - m_new)
+        p = tl.exp(scores - m_new[:, None])
+        p = tl.where(head_mask[:, None] & valid[None, :], p, 0.0)
+        l_new = l_i * alpha + tl.sum(p, axis=1)
+
+        acc_nope = acc_nope * alpha[:, None] + tl.dot(p.to(k_nope.dtype), k_nope)
+        acc_rope = acc_rope * alpha[:, None] + tl.dot(p.to(k_rope.dtype), k_rope)
+        m_i = m_new
+        l_i = l_new
+
+    if HAS_EXTRA:
+        extra_start = tl.load(extra_indptr_ptr + query_idx)
+        extra_end = tl.load(extra_indptr_ptr + query_idx + 1)
+        extra_len = extra_end - extra_start
+
+        for k_start in tl.range(0, extra_len, BLOCK_K):
+            k_pos = k_start + k_offsets
+            in_range = k_pos < extra_len
+            slot = tl.load(
+                extra_indices_ptr + extra_start + k_pos, mask=in_range, other=-1
+            )
+            valid = in_range & (slot >= 0) & (slot < extra_num_rows)
+            safe_slot = tl.where(valid, slot, 0)
+
+            block_idx = safe_slot // extra_block_size
+            pos_in_block = safe_slot % extra_block_size
+            cache_block_ptr = (
+                extra_cache_ptr + block_idx.to(tl.int64) * extra_cache_stride0
+            )
+            token_data_ptr = cache_block_ptr + pos_in_block * 576
+            token_scale_ptr = (
+                cache_block_ptr + extra_block_size * 576 + pos_in_block * 8
+            )
+
+            x_uint8 = tl.load(
+                token_data_ptr[:, None] + nope_offsets[None, :],
+                mask=valid[:, None] & nope_mask[None, :],
+                other=0,
+            )
+            if IS_FNUZ:
+                x_fp8 = x_uint8.to(tl.float8e4b15, bitcast=True)
+            else:
+                x_fp8 = x_uint8.to(tl.float8e4nv, bitcast=True)
+            encoded_scales = tl.load(
+                token_scale_ptr[:, None] + nope_offsets[None, :] // 64,
+                mask=valid[:, None] & nope_mask[None, :],
+                other=127,
+            )
+            scales = tl.exp2(encoded_scales.to(tl.float32) - 127.0)
+            k_nope = x_fp8.to(tl.bfloat16) * scales.to(tl.bfloat16)
+            k_nope = tl.where(valid[:, None] & nope_mask[None, :], k_nope, zero_nope)
+            k_nope = tl.where(k_nope == k_nope, k_nope, zero_nope)
+
+            rope_ptr = (token_data_ptr + NOPE_DIM).to(tl.pointer_type(tl.bfloat16))
+            k_rope = tl.load(
+                rope_ptr[:, None] + rope_offsets[None, :],
+                mask=valid[:, None],
+                other=0.0,
+            )
+            k_rope = tl.where(valid[:, None], k_rope, zero_rope)
+            k_rope = tl.where(k_rope == k_rope, k_rope, zero_rope)
+
+            scores = tl.dot(q_nope, tl.trans(k_nope)) + tl.dot(
+                q_rope,
+                tl.trans(k_rope),
+            )
+            scores *= scale
+            scores = tl.where(head_mask[:, None] & valid[None, :], scores, neg_large)
+
+            m_block = tl.max(scores, axis=1)
+            m_new = tl.maximum(m_i, m_block)
+            alpha = tl.exp(m_i - m_new)
+            p = tl.exp(scores - m_new[:, None])
+            p = tl.where(head_mask[:, None] & valid[None, :], p, 0.0)
+            l_new = l_i * alpha + tl.sum(p, axis=1)
+
+            acc_nope = acc_nope * alpha[:, None] + tl.dot(p.to(k_nope.dtype), k_nope)
+            acc_rope = acc_rope * alpha[:, None] + tl.dot(p.to(k_rope.dtype), k_rope)
+            m_i = m_new
+            l_i = l_new
+
+    if HAS_ATTN_SINK:
+        sink = tl.load(
+            attn_sink_ptr + head_offsets, mask=head_mask, other=neg_large
+        ).to(tl.float32)
+        m_final = tl.maximum(m_i, sink)
+        alpha = tl.exp(m_i - m_final)
+        l_final = l_i * alpha + tl.exp(sink - m_final)
+        denom = tl.maximum(l_final, 1.0e-30)
+        out_nope = tl.where(
+            l_final[:, None] > 0.0,
+            (acc_nope * alpha[:, None]) / denom[:, None],
+            0.0,
+        )
+        out_rope = tl.where(
+            l_final[:, None] > 0.0,
+            (acc_rope * alpha[:, None]) / denom[:, None],
+            0.0,
+        )
+    else:
+        denom = tl.maximum(l_i, 1.0e-30)
+        out_nope = tl.where(l_i[:, None] > 0.0, acc_nope / denom[:, None], 0.0)
+        out_rope = tl.where(l_i[:, None] > 0.0, acc_rope / denom[:, None], 0.0)
+
+    out_row_ptr = (
+        out_ptr + query_idx * out_stride0 + head_offsets[:, None] * out_stride1
+    )
+    tl.store(
+        out_row_ptr + nope_offsets[None, :],
+        out_nope,
+        mask=head_mask[:, None] & nope_mask[None, :],
+    )
+    tl.store(
+        out_row_ptr + NOPE_DIM + rope_offsets[None, :],
+        out_rope,
+        mask=head_mask[:, None],
+    )
+
+
+def _rocm_sparse_attn_prefill_ragged_triton(
+    q: torch.Tensor,
+    kv: torch.Tensor,
+    indices: torch.Tensor,
+    indptr: torch.Tensor,
+    scale: float,
+    attn_sink: torch.Tensor | None,
+    nope_head_dim: int,
+    rope_head_dim: int,
+) -> torch.Tensor:
+    assert q.ndim == 3, f"expected q=[sq,h,d], got {q.shape}"
+    assert kv.ndim == 2, f"expected kv=[skv,d], got {kv.shape}"
+    assert indices.ndim == 1, f"expected indices=[nnz], got {indices.shape}"
+    assert indptr.ndim == 1, f"expected indptr=[sq+1], got {indptr.shape}"
+    assert not q.is_cpu and not kv.is_cpu and not indices.is_cpu and not indptr.is_cpu
+
+    indices = _as_int32_contiguous_1d(indices)
+    indptr = _as_int32_contiguous_1d(indptr)
+    has_attn_sink = attn_sink is not None
+    if attn_sink is None:
+        attn_sink = torch.empty(1, device=q.device, dtype=torch.float32)
+    else:
+        attn_sink = attn_sink.contiguous()
+
+    num_queries, num_heads, head_dim = q.shape
+    assert indptr.numel() == num_queries + 1, (
+        f"expected indptr shape [{num_queries + 1}], got {indptr.shape}"
+    )
+    _validate_dsv4_sparse_dims(
+        head_dim,
+        nope_head_dim,
+        rope_head_dim,
+        "_rocm_sparse_attn_prefill_ragged_triton",
+    )
+
+    block_h = 16
+    block_d = triton.next_power_of_2(head_dim)
+    block_k = 16 if head_dim >= 256 else 32
+    out = torch.empty_like(q, dtype=torch.bfloat16)
+    _sparse_attn_prefill_ragged_kernel[(num_queries, triton.cdiv(num_heads, block_h))](
+        q,
+        kv,
+        indices,
+        indptr,
+        attn_sink,
+        out,
+        q.stride(0),
+        q.stride(1),
+        q.stride(2),
+        kv.stride(0),
+        kv.stride(1),
+        out.stride(0),
+        out.stride(1),
+        out.stride(2),
+        num_heads,
+        head_dim,
+        kv.shape[0],
+        float(scale),
+        HAS_ATTN_SINK=has_attn_sink,
+        BLOCK_H=block_h,
+        BLOCK_D=block_d,
+        BLOCK_K=block_k,
+        num_warps=8,
+    )
+    return out
+
+
+def _rocm_sparse_attn_prefill_triton(
+    q: torch.Tensor,
+    kv: torch.Tensor,
+    indices: torch.Tensor,
+    scale: float,
+    attn_sink: torch.Tensor | None,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    topk_length: torch.Tensor | None = None,
+) -> torch.Tensor:
+    ragged_indices, ragged_indptr = build_ragged_indices_from_dense(
+        indices,
+        topk_length
+        if topk_length is not None
+        else (indices >= 0).sum(dim=-1, dtype=torch.int32),
+        num_rows=kv.shape[0],
+    )
+    return _rocm_sparse_attn_prefill_ragged_triton(
+        q=q,
+        kv=kv,
+        indices=ragged_indices,
+        indptr=ragged_indptr,
+        scale=scale,
+        attn_sink=attn_sink,
+        nope_head_dim=nope_head_dim,
+        rope_head_dim=rope_head_dim,
+    )
+
+
+def _rocm_sparse_attn_decode_ragged_triton(
+    q: torch.Tensor,
+    main_cache: torch.Tensor,
+    main_indices: torch.Tensor,
+    main_indptr: torch.Tensor,
+    scale: float,
+    attn_sink: torch.Tensor | None,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    extra_cache: torch.Tensor | None = None,
+    extra_indices: torch.Tensor | None = None,
+    extra_indptr: torch.Tensor | None = None,
+) -> torch.Tensor:
+    assert q.ndim == 3, f"expected q=[b,h,d], got {q.shape}"
+    assert main_cache.ndim == 3, (
+        f"expected main_cache=[blocks,block,bytes], got {main_cache.shape}"
+    )
+    assert main_indices.ndim == 1, (
+        f"expected main_indices=[nnz], got {main_indices.shape}"
+    )
+    assert main_indptr.ndim == 1, f"expected main_indptr=[b+1], got {main_indptr.shape}"
+    assert (
+        not q.is_cpu
+        and not main_cache.is_cpu
+        and not main_indices.is_cpu
+        and not main_indptr.is_cpu
+    )
+
+    main_indices = _as_int32_contiguous_1d(main_indices)
+    main_indptr = _as_int32_contiguous_1d(main_indptr)
+    has_attn_sink = attn_sink is not None
+    if attn_sink is None:
+        attn_sink = torch.empty(1, device=q.device, dtype=torch.float32)
+    else:
+        attn_sink = attn_sink.contiguous()
+
+    num_queries, num_heads, head_dim = q.shape
+    assert main_indptr.numel() == num_queries + 1, (
+        f"expected main_indptr shape [{num_queries + 1}], got {main_indptr.shape}"
+    )
+    _validate_dsv4_sparse_dims(
+        head_dim,
+        nope_head_dim,
+        rope_head_dim,
+        "_rocm_sparse_attn_decode_ragged_triton",
+    )
+
+    has_extra = (
+        extra_cache is not None
+        and extra_indices is not None
+        and extra_indptr is not None
+    )
+    if has_extra:
+        assert extra_cache is not None
+        assert extra_indices is not None
+        assert extra_indptr is not None
+        assert extra_indices.ndim == 1, (
+            f"expected extra_indices=[nnz], got {extra_indices.shape}"
+        )
+        assert extra_indptr.ndim == 1, (
+            f"expected extra_indptr=[b+1], got {extra_indptr.shape}"
+        )
+        extra_indices = _as_int32_contiguous_1d(extra_indices)
+        extra_indptr = _as_int32_contiguous_1d(extra_indptr)
+        assert extra_indptr.numel() == num_queries + 1, (
+            f"expected extra_indptr shape [{num_queries + 1}], got {extra_indptr.shape}"
+        )
+    else:
+        extra_cache = main_cache
+        extra_indices = torch.empty(0, device=q.device, dtype=torch.int32)
+        extra_indptr = torch.zeros(num_queries + 1, device=q.device, dtype=torch.int32)
+
+    block_h = 16
+    block_k = 16 if head_dim >= 256 else 32
+    out = torch.empty_like(q, dtype=torch.bfloat16)
+    _sparse_attn_decode_ragged_kernel[(num_queries, triton.cdiv(num_heads, block_h))](
+        q,
+        main_cache,
+        main_indices,
+        main_indptr,
+        extra_cache,
+        extra_indices,
+        extra_indptr,
+        attn_sink,
+        out,
+        q.stride(0),
+        q.stride(1),
+        out.stride(0),
+        out.stride(1),
+        main_cache.stride(0),
+        extra_cache.stride(0),
+        main_cache.shape[0] * main_cache.shape[1],
+        extra_cache.shape[0] * extra_cache.shape[1],
+        main_cache.shape[1],
+        extra_cache.shape[1],
+        scale,
+        num_heads,
+        HAS_ATTN_SINK=has_attn_sink,
+        HAS_EXTRA=has_extra,
+        NOPE_DIM=nope_head_dim,
+        NOPE_BLOCK=triton.next_power_of_2(nope_head_dim),
+        ROPE_DIM=rope_head_dim,
+        IS_FNUZ=current_platform.is_fp8_fnuz(),
+        BLOCK_H=block_h,
+        BLOCK_K=block_k,
+        num_warps=8,
+    )
+    return out
+
+
+def _rocm_sparse_attn_decode_triton(
+    q: torch.Tensor,
+    main_cache: torch.Tensor,
+    main_indices: torch.Tensor,
+    scale: float,
+    attn_sink: torch.Tensor | None,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    extra_cache: torch.Tensor | None = None,
+    extra_indices: torch.Tensor | None = None,
+    main_lengths: torch.Tensor | None = None,
+    extra_lengths: torch.Tensor | None = None,
+    main_ragged_indices: torch.Tensor | None = None,
+    main_ragged_indptr: torch.Tensor | None = None,
+    extra_ragged_indices: torch.Tensor | None = None,
+    extra_ragged_indptr: torch.Tensor | None = None,
+) -> torch.Tensor:
+    if main_ragged_indices is None or main_ragged_indptr is None:
+        main_ragged_indices, main_ragged_indptr = build_ragged_indices_from_dense(
+            main_indices,
+            main_lengths
+            if main_lengths is not None
+            else (main_indices >= 0).sum(dim=-1, dtype=torch.int32),
+            num_rows=main_cache.shape[0] * main_cache.shape[1],
+        )
+
+    if (
+        (extra_ragged_indices is None or extra_ragged_indptr is None)
+        and extra_cache is not None
+        and extra_indices is not None
+    ):
+        extra_ragged_indices, extra_ragged_indptr = build_ragged_indices_from_dense(
+            extra_indices,
+            extra_lengths
+            if extra_lengths is not None
+            else (extra_indices >= 0).sum(dim=-1, dtype=torch.int32),
+            num_rows=extra_cache.shape[0] * extra_cache.shape[1],
+        )
+
+    return _rocm_sparse_attn_decode_ragged_triton(
+        q=q,
+        main_cache=main_cache,
+        main_indices=main_ragged_indices,
+        main_indptr=main_ragged_indptr,
+        scale=scale,
+        attn_sink=attn_sink,
+        nope_head_dim=nope_head_dim,
+        rope_head_dim=rope_head_dim,
+        extra_cache=extra_cache,
+        extra_indices=extra_ragged_indices,
+        extra_indptr=extra_ragged_indptr,
+    )
+
+
+def rocm_sparse_attn_prefill(
+    q: torch.Tensor,
+    kv: torch.Tensor,
+    indices: torch.Tensor,
+    topk_length: torch.Tensor | None,
+    scale: float,
+    head_dim: int,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    attn_sink: torch.Tensor | None,
+    output: torch.Tensor,
+    ragged_indices: torch.Tensor | None = None,
+    ragged_indptr: torch.Tensor | None = None,
+) -> None:
+    assert kv.ndim == 3 and kv.shape[1] == 1, (
+        f"ROCm Triton sparse prefill expects kv=[skv,1,d], got {kv.shape}"
+    )
+    _validate_dsv4_sparse_dims(
+        head_dim,
+        nope_head_dim,
+        rope_head_dim,
+        "rocm_sparse_attn_prefill",
+    )
+    if ragged_indices is not None and ragged_indptr is not None:
+        output_chunk = _rocm_sparse_attn_prefill_ragged_triton(
+            q=q,
+            kv=kv.squeeze(1),
+            indices=ragged_indices,
+            indptr=ragged_indptr,
+            scale=scale,
+            attn_sink=None if attn_sink is None else attn_sink[: q.shape[1]],
+            nope_head_dim=nope_head_dim,
+            rope_head_dim=rope_head_dim,
+        )
+    else:
+        indices_2d = indices.reshape(indices.shape[0], -1)
+        output_chunk = _rocm_sparse_attn_prefill_triton(
+            q=q,
+            kv=kv.squeeze(1),
+            indices=indices_2d,
+            scale=scale,
+            attn_sink=None if attn_sink is None else attn_sink[: q.shape[1]],
+            nope_head_dim=nope_head_dim,
+            rope_head_dim=rope_head_dim,
+            topk_length=topk_length,
+        )
+    output.copy_(output_chunk.to(output.dtype))
+
+
+def rocm_sparse_attn_decode(
+    q: torch.Tensor,
+    kv_cache: torch.Tensor | None,
+    swa_k_cache: torch.Tensor,
+    swa_only: bool,
+    topk_indices: torch.Tensor | None,
+    topk_lens: torch.Tensor | None,
+    swa_indices: torch.Tensor,
+    swa_lens: torch.Tensor,
+    swa_ragged_indices: torch.Tensor | None,
+    swa_ragged_indptr: torch.Tensor | None,
+    topk_ragged_indices: torch.Tensor | None,
+    topk_ragged_indptr: torch.Tensor | None,
+    attn_sink: torch.Tensor | None,
+    scale: float,
+    head_dim: int,
+    nope_head_dim: int,
+    rope_head_dim: int,
+    output: torch.Tensor,
+) -> None:
+    assert swa_k_cache.dtype == torch.uint8, (
+        "ROCm Triton sparse decode expects uint8 fp8_ds_mla SWA cache, "
+        f"got {swa_k_cache.dtype}"
+    )
+    _validate_dsv4_sparse_dims(
+        head_dim,
+        nope_head_dim,
+        rope_head_dim,
+        "rocm_sparse_attn_decode",
+    )
+
+    main_indices = swa_indices.reshape(swa_indices.shape[0], -1)
+
+    extra_cache = None
+    extra_indices = None
+    if not swa_only:
+        assert kv_cache is not None
+        assert topk_indices is not None or (
+            topk_ragged_indices is not None and topk_ragged_indptr is not None
+        )
+        assert kv_cache.dtype == torch.uint8, (
+            "ROCm Triton sparse decode expects uint8 fp8_ds_mla extra cache, "
+            f"got {kv_cache.dtype}"
+        )
+        extra_cache = kv_cache
+        if topk_indices is not None:
+            extra_indices = topk_indices.reshape(topk_indices.shape[0], -1)
+
+    attn_out = _rocm_sparse_attn_decode_triton(
+        q=q,
+        main_cache=swa_k_cache,
+        main_indices=main_indices,
+        scale=scale,
+        attn_sink=None if attn_sink is None else attn_sink[: q.shape[1]],
+        nope_head_dim=nope_head_dim,
+        rope_head_dim=rope_head_dim,
+        extra_cache=extra_cache,
+        extra_indices=extra_indices,
+        main_lengths=swa_lens,
+        extra_lengths=topk_lens,
+        main_ragged_indices=swa_ragged_indices,
+        main_ragged_indptr=swa_ragged_indptr,
+        extra_ragged_indices=topk_ragged_indices,
+        extra_ragged_indptr=topk_ragged_indptr,
+    )
+    output.copy_(attn_out.to(output.dtype))
diff --git a/vllm/v1/attention/ops/triton_attention_helpers.py b/vllm/v1/attention/ops/triton_attention_helpers.py
new file mode 100644
index 000000000000..6ed50f6a2df2
--- /dev/null
+++ b/vllm/v1/attention/ops/triton_attention_helpers.py
@@ -0,0 +1,383 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Shared ``@triton.jit`` helpers used by the unified attention kernel
+and ``reduce_segments``.
+
+These are plain attention-loop helpers — mask building, ALiBi / QQ-bias
+score post-processing, online-softmax bookkeeping, tile-loop bounds,
+sequence lookup — extracted so the 2D and 3D paths of the unified
+kernel (and any future consumer) share a single implementation.
+"""
+
+from __future__ import annotations
+
+from vllm.triton_utils import tl, triton
+
+# ===========================================================================
+# Scalar helpers (reused by every kernel + reduce_segments)
+# ===========================================================================
+
+
+@triton.jit
+def cdiv_fn(x, y):
+    """Ceiling division.  Kept as a helper to keep kernel bodies terse."""
+    return (x + y - 1) // y
+
+
+@triton.jit
+def apply_softcap(S, x):
+    """Softcap (aka tanh-style clamp) used to bound attention scores.
+
+    ``x * tanh(S / x)`` rewritten to avoid a direct ``tanh`` call.
+    """
+    Sdiv = S / x
+    p1 = tl.exp(Sdiv)
+    p2 = tl.exp(-Sdiv)
+    return x * (p1 - p2) / (p1 + p2)
+
+
+# ===========================================================================
+# Attention loop
+# ===========================================================================
+
+
+@triton.jit
+def resolve_seq_and_query_len(
+    query_start_len_ptr,
+    seq_lens_ptr,
+    q_block_global_idx,
+    num_seqs,
+    BLOCK_Q: tl.constexpr,
+):
+    """Resolve the (sequence, q-block-within-sequence) pair and load the
+    per-sequence lengths.
+
+    Shared across every attention kernel — the ``q_block_global_idx``
+    program id indexes into the flattened ``(seq, q_block_in_seq)``
+    space, and a binary search over ``query_start_len_ptr`` recovers
+    the (seq, local-q-block) pair.
+
+    Returns ``(seq_idx, q_block_local_idx, cur_batch_in_all_start_index,
+    cur_batch_query_len, seq_len)``.  Callers must still early-return
+    when ``q_block_local_idx * BLOCK_Q >= cur_batch_query_len`` (Triton
+    helpers cannot return from the caller).
+    """
+    # find_seq_idx is defined below; forward use is fine inside @triton.jit.
+    seq_idx = find_seq_idx(
+        query_start_len_ptr, q_block_global_idx, num_seqs, BLOCK_Q, True
+    )
+    q_block_start_idx = tl.load(query_start_len_ptr + seq_idx) // BLOCK_Q + seq_idx
+    q_block_local_idx = q_block_global_idx - q_block_start_idx
+    cur_start = tl.load(query_start_len_ptr + seq_idx)
+    cur_stop = tl.load(query_start_len_ptr + seq_idx + 1)
+    cur_batch_query_len = cur_stop - cur_start
+    seq_len = tl.load(seq_lens_ptr + seq_idx)
+    return seq_idx, q_block_local_idx, cur_start, cur_batch_query_len, seq_len
+
+
+@triton.jit
+def find_seq_idx(
+    query_start_len_ptr,
+    target_idx,
+    num_seqs,
+    BLOCK_Q: tl.constexpr,
+    use_q_block_mode: tl.constexpr,
+):
+    """Binary search over the cumulative query-length prefix.
+
+    When ``use_q_block_mode`` is True, the prefix values are reshaped
+    into units of ``BLOCK_Q`` plus one entry per boundary — matching
+    the q-block grid laid out by the attention kernels.  When False
+    we search the plain cumulative-length prefix (used by
+    ``reduce_segments`` which iterates over raw query tokens).
+    """
+    left: tl.int32 = 0
+    right = num_seqs
+    while left < right:
+        mid = (left + right) // 2
+        val = tl.load(query_start_len_ptr + mid)
+        mid_val = val // BLOCK_Q + mid if use_q_block_mode else val
+
+        if mid_val <= target_idx:
+            left = mid + 1
+        else:
+            right = mid
+
+    return left - 1
+
+
+@triton.jit
+def init_softmax_M(
+    sink_ptr,
+    query_offset_1,
+    query_mask_1,
+    segm_idx_or_0,
+    BLOCK_M: tl.constexpr,
+    USE_SINKS: tl.constexpr,
+    IS_3D: tl.constexpr,
+):
+    """Initial row-max ``M`` for the online softmax.
+
+    Without sinks: ``-inf``.  With sinks: load the per-head sink bias
+    once.  In 3D mode only segment 0 loads — ``reduce_segments`` adds
+    the sink contribution exactly once across segments, so other
+    segments must start from ``-inf``.
+
+    ``segm_idx_or_0`` is the 3D segment index or 0 for 2D (caller
+    passes ``0`` when ``IS_3D`` is False).
+    """
+    M = tl.full([BLOCK_M], float("-inf"), dtype=tl.float32)
+    if USE_SINKS:
+        load_sinks = (not IS_3D) or (segm_idx_or_0 == 0)
+        if load_sinks:
+            M = tl.load(
+                sink_ptr + query_offset_1,
+                mask=query_mask_1,
+                other=float("-inf"),
+            ).to(tl.float32)
+    return M
+
+
+@triton.jit
+def compute_tile_loop_bounds(
+    context_len,
+    seq_len,
+    cur_batch_query_len,
+    q_block_local_idx,
+    segm_idx_or_0,
+    tiles_per_segment_or_0,
+    TILE_SIZE: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    BLOCK_Q: tl.constexpr,
+    num_queries_per_kv: tl.constexpr,
+    SLIDING_WINDOW: tl.constexpr,
+    USE_MM_PREFIX: tl.constexpr,
+    IS_3D: tl.constexpr,
+    CHUNK_LOOKBACK: tl.constexpr = -1,
+    CHUNK_SIZE: tl.constexpr = -1,
+):
+    """Compute the tile-loop bounds ``(loop_lo, loop_hi)`` and the
+    derived ``max_seq_prefix_len`` used for per-tile masking.
+
+    Combines three concerns into one helper:
+
+    1. Longest prefix spanned by any query token in this q-block.
+       Clamped to ``seq_len`` (causal) or extended to it when
+       mm_prefix is active (bidirectional ranges can reach past the
+       causal prefix).
+    2. Sliding-window pruning: narrows ``[tile_start, tile_end)`` to
+       only tiles that can contain an allowed key under SWA.
+    3. 3D scoping: when ``IS_3D`` is True, further narrows to the
+       segment's slice via ``(segm_idx * tiles_per_segment,
+       (segm_idx + 1) * tiles_per_segment)``.
+    """
+    # compute the length of the longest sequence prefix spanned by any
+    # query token in the current q_block (q_block_local_idx)
+    max_seq_prefix_len = (
+        context_len
+        + q_block_local_idx * BLOCK_Q
+        + (BLOCK_M - 1) // num_queries_per_kv
+        + 1
+    )
+    if USE_MM_PREFIX:
+        # image bidirectional attention ranges require a full range
+        # including q_block padding to make sure doc mask is correct
+        max_seq_prefix_len = tl.maximum(max_seq_prefix_len, seq_len)
+    else:
+        max_seq_prefix_len = tl.minimum(max_seq_prefix_len, seq_len)
+
+    num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE)
+
+    # ---- Sliding-window tile pruning --------------------
+    # Default: keep previous global behavior
+    tile_start = 0
+    tile_end = num_tiles
+    # TODO(Isotr0py): sliding window pruning with image bidirectional mask
+    if SLIDING_WINDOW > 0 and not USE_MM_PREFIX:
+        # Query rows covered by this Q-block
+        qpos_lo = q_block_local_idx * BLOCK_Q
+        qpos_hi = tl.minimum(
+            qpos_lo + (BLOCK_M - 1) // num_queries_per_kv,
+            cur_batch_query_len - 1,
+        )
+        # For sliding window, each query position q can only attend to
+        # keys in the range [q_abs - SLIDING_WINDOW + 1, q_abs]
+        # where q_abs = context_len + q
+        # The union of allowed key positions for this Q-block is:
+        # [context_len + qpos_lo - SLIDING_WINDOW + 1, context_len + qpos_hi]
+        q_abs = context_len + qpos_lo
+        if CHUNK_LOOKBACK > -1:
+            # Chunked attention: align lower bound to the start of the
+            # lookback'th previous chunk.
+            first_allowed_key = ((q_abs // CHUNK_SIZE) - CHUNK_LOOKBACK) * CHUNK_SIZE
+        else:
+            first_allowed_key = q_abs - SLIDING_WINDOW + 1
+        last_allowed_key = context_len + qpos_hi
+        # Convert to tile indices and clamp
+        tile_start = tl.maximum(0, first_allowed_key // TILE_SIZE)
+        tile_end = tl.minimum((last_allowed_key // TILE_SIZE) + 1, num_tiles)
+
+    if IS_3D:
+        loop_lo = max(segm_idx_or_0 * tiles_per_segment_or_0, tile_start)
+        loop_hi = min((segm_idx_or_0 + 1) * tiles_per_segment_or_0, tile_end)
+    else:
+        loop_lo = tile_start
+        loop_hi = tile_end
+
+    return loop_lo, loop_hi, max_seq_prefix_len
+
+
+@triton.jit
+def store_segm_reduce_scalars(
+    segm_max_ptr,
+    segm_expsum_ptr,
+    query_offset_0,
+    query_offset_1,
+    segm_idx,
+    M,
+    L,
+    query_mask_0,
+    query_mask_1,
+    num_query_heads: tl.constexpr,
+    NUM_SEGMENTS_PER_SEQ: tl.constexpr,
+):
+    """Store per-segment ``M`` and ``L`` for ``reduce_segments`` to
+    combine into the final softmax.
+
+    Shared across every 3D attention epilogue; the per-token output
+    stripes are mode-specific (flat / 2-stream split / 4-stream split)
+    and stay inlined.
+    """
+    segm_offset = (
+        query_offset_0.to(tl.int64) * (num_query_heads * NUM_SEGMENTS_PER_SEQ)
+        + query_offset_1 * NUM_SEGMENTS_PER_SEQ
+        + segm_idx
+    )
+    tl.store(segm_max_ptr + segm_offset, M, mask=query_mask_0 & query_mask_1)
+    tl.store(segm_expsum_ptr + segm_offset, L, mask=query_mask_0 & query_mask_1)
+
+
+@triton.jit
+def compute_kv_seq_mask(
+    query_abs_pos,
+    seq_offset,
+    seq_idx,
+    mm_prefix_range_ptr,
+    SLIDING_WINDOW: tl.constexpr,
+    USE_MM_PREFIX: tl.constexpr,
+    MAX_MM_RANGES: tl.constexpr,
+    CHUNK_LOOKBACK: tl.constexpr = -1,
+    CHUNK_SIZE: tl.constexpr = -1,
+):
+    """Build the KV mask for one tile.
+
+    Causal (key <= query) by default; AND-ed with either chunked
+    attention (``CHUNK_LOOKBACK >= 0``) or sliding window
+    (``SLIDING_WINDOW > 0``); OR-ed with the bidirectional ranges from
+    ``mm_prefix_range`` when PrefixLM / multimodal attention is active.
+    Order matches FlexAttention: ``(causal AND window) OR mm_prefix``.
+    Chunked attention takes precedence over sliding window when both
+    are non-default — the launcher zeros ``CHUNK_LOOKBACK`` whenever
+    sliding window is disabled.
+    """
+    # Compute attention mask: causal by default (key <= query)
+    seq_mask = seq_offset[None, :] <= query_abs_pos
+
+    # Apply sliding window / chunked attention to base mask
+    # BEFORE mm_prefix OR.
+    # Order must match FlexAttention:
+    #   (causal AND sliding_window) OR mm_prefix
+    if CHUNK_LOOKBACK > -1:
+        seq_mask = seq_mask & (
+            (query_abs_pos // CHUNK_SIZE - seq_offset[None, :] // CHUNK_SIZE)
+            <= CHUNK_LOOKBACK
+        )
+    elif SLIDING_WINDOW > 0:
+        seq_mask = seq_mask & ((query_abs_pos - seq_offset) < SLIDING_WINDOW)
+
+    # PrefixLM: extend mask with bidirectional ranges for multimodal tokens.
+    # Applied AFTER sliding window so mm_prefix ranges override SW restriction.
+    if USE_MM_PREFIX:
+        for i in range(MAX_MM_RANGES):
+            range_start = tl.load(
+                mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2
+            )
+            range_end = tl.load(
+                mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2 + 1
+            )
+            is_valid = range_start < range_end
+            q_in_range = (
+                (query_abs_pos >= range_start) & (query_abs_pos <= range_end) & is_valid
+            )
+            k_in_range = (
+                (seq_offset[None, :] >= range_start)
+                & (seq_offset[None, :] <= range_end)
+                & is_valid
+            )
+            seq_mask |= q_in_range & k_in_range
+    return seq_mask
+
+
+@triton.jit
+def apply_alibi_to_score(
+    S,
+    alibi_slope,
+    seq_offset,
+    context_len,
+    query_pos,
+    USE_ALIBI_SQRT: tl.constexpr,
+):
+    """Add the ALiBi positional bias (linear or sqrt variant) to S in-place."""
+    if USE_ALIBI_SQRT:
+        relative_pos = seq_offset - (context_len + query_pos[:, None])
+        alibi_offset = tl.where(
+            relative_pos <= 0,
+            -tl.sqrt((-relative_pos).to(tl.float32)),
+            0.0,
+        )
+    else:
+        alibi_offset = seq_offset - context_len
+    return S + alibi_slope[:, None] * alibi_offset
+
+
+@triton.jit
+def load_qq_bias_tile(
+    qq_bias_row_ptrs,
+    seq_offset,
+    context_len,
+    qq_bias_stride_0,
+):
+    """Load the qq-bias slice for keys that correspond to query rows."""
+    key_rel_pos = seq_offset - context_len
+    is_query_key = key_rel_pos >= 0 and key_rel_pos < qq_bias_stride_0
+    return tl.load(
+        qq_bias_row_ptrs + key_rel_pos[None, :],
+        mask=is_query_key[None, :],
+        other=0.0,
+    )
+
+
+@triton.jit
+def softmax_step(S, M, L):
+    """Online softmax update for one tile.
+
+    Returns ``(M_new, L_new, P, alpha)``.  Caller is responsible for
+    rescaling its accumulator(s) by ``alpha[:, None]`` — done outside so
+    kernels with a different number / shape of accumulators can reuse
+    the same step.
+    """
+    # compute running maximum
+    # m_j : (BLOCK_M,)
+    m_j = tl.maximum(M, tl.max(S, axis=1))
+    # For sliding window there's a chance the max is -inf due to masking of
+    # the entire row. In this case we need to set m_j 0 to avoid NaN
+    m_j = tl.where(m_j > float("-inf"), m_j, 0.0)
+    # P : (BLOCK_M, TILE_SIZE)
+    P = tl.exp(S - m_j[:, None])
+    # l_j : (BLOCK_M,)
+    l_j = tl.sum(P, axis=1)
+    # alpha : (BLOCK_M, )
+    alpha = tl.exp(M - m_j)
+    # update constants
+    L_new = L * alpha + l_j
+    return m_j, L_new, P, alpha
diff --git a/vllm/v1/attention/ops/triton_decode_attention.py b/vllm/v1/attention/ops/triton_decode_attention.py
index 63263bc92e24..c58a7026e89b 100644
--- a/vllm/v1/attention/ops/triton_decode_attention.py
+++ b/vllm/v1/attention/ops/triton_decode_attention.py
@@ -291,6 +291,7 @@ def _fwd_grouped_kernel_stage1(
     logit_cap: tl.constexpr,
     Lk: tl.constexpr,
     Lv: tl.constexpr,
+    IS_MLA: tl.constexpr = False,
 ):
     cur_batch = tl.program_id(0)
     cur_head_id = tl.program_id(1)
@@ -310,7 +311,12 @@ def _fwd_grouped_kernel_stage1(
     cur_batch_req_idx = cur_batch
 
     offs_q = cur_batch * stride_qbs + cur_head[:, None] * stride_qh + offs_d[None, :]
-    q = tl.load(Q + offs_q, mask=(mask_h[:, None]) & (mask_d[None, :]), other=0.0)
+    q = tl.load(
+        Q + offs_q,
+        mask=(mask_h[:, None]) & (mask_d[None, :]),
+        other=0.0,
+        cache_modifier=".ca",
+    )
 
     if BLOCK_DPE > 0:
         offs_dpe = BLOCK_DMODEL + tl.arange(0, BLOCK_DPE)
@@ -319,7 +325,10 @@ def _fwd_grouped_kernel_stage1(
             cur_batch * stride_qbs + cur_head[:, None] * stride_qh + offs_dpe[None, :]
         )
         qpe = tl.load(
-            Q + off_qpe, mask=(mask_h[:, None]) & (mask_dpe[None, :]), other=0.0
+            Q + off_qpe,
+            mask=(mask_h[:, None]) & (mask_dpe[None, :]),
+            other=0.0,
+            cache_modifier=".ca",
         )
 
     kv_len_per_split = tl.cdiv(cur_batch_seq_len, NUM_KV_SPLITS)
@@ -331,9 +340,14 @@ def _fwd_grouped_kernel_stage1(
     acc = tl.zeros([BLOCK_H, BLOCK_DV], dtype=tl.float32)
 
     if split_kv_end > split_kv_start:
+        base_offs_k = cur_kv_head * stride_buf_kh + offs_d[:, None]
+        base_offs_v = cur_kv_head * stride_buf_vh + offs_dv[None, :]
+        if BLOCK_DPE > 0:
+            base_offs_kpe = cur_kv_head * stride_buf_kh + offs_dpe[:, None]
+
         ks = tl.load(k_scale)
         vs = tl.load(v_scale)
-        for start_n in range(split_kv_start, split_kv_end, BLOCK_N):
+        for start_n in tl.range(split_kv_start, split_kv_end, BLOCK_N):
             offs_n = start_n + tl.arange(0, BLOCK_N)
             kv_page_number = tl.load(
                 Req_to_tokens
@@ -341,31 +355,29 @@ def _fwd_grouped_kernel_stage1(
                 + offs_n // PAGE_SIZE,
                 mask=offs_n < split_kv_end,
                 other=0,
+                cache_modifier=".ca",
             )
             kv_loc = kv_page_number * PAGE_SIZE + offs_n % PAGE_SIZE
-            offs_buf_k = (
-                kv_loc[None, :] * stride_buf_kbs
-                + cur_kv_head * stride_buf_kh
-                + offs_d[:, None]
-            )
+
+            # explicitly facilitate overlapping load/compute
+            offs_buf_k = kv_loc[None, :] * stride_buf_kbs + base_offs_k
             k = tl.load(
                 K_Buffer + offs_buf_k,
                 mask=(offs_n[None, :] < split_kv_end) & (mask_d[:, None]),
                 other=0.0,
+                cache_modifier=".cg",
             )
+
             if k.dtype.is_fp8():
                 k = (k.to(tl.float32) * ks).to(q.dtype)
             qk = tl.dot(q, k.to(q.dtype))
             if BLOCK_DPE > 0:
-                offs_buf_kpe = (
-                    kv_loc[None, :] * stride_buf_kbs
-                    + cur_kv_head * stride_buf_kh
-                    + offs_dpe[:, None]
-                )
+                offs_buf_kpe = kv_loc[None, :] * stride_buf_kbs + base_offs_kpe
                 kpe = tl.load(
                     K_Buffer + offs_buf_kpe,
                     mask=(offs_n[None, :] < split_kv_end) & (mask_dpe[:, None]),
                     other=0.0,
+                    cache_modifier=".cg",
                 )
                 if kpe.dtype.is_fp8():
                     kpe = (kpe.to(tl.float32) * ks).to(qpe.dtype)
@@ -379,18 +391,20 @@ def _fwd_grouped_kernel_stage1(
                 mask_h[:, None] & (offs_n[None, :] < split_kv_end), qk, float("-inf")
             )
 
-            offs_buf_v = (
-                kv_loc[:, None] * stride_buf_vbs
-                + cur_kv_head * stride_buf_vh
-                + offs_dv[None, :]
-            )
-            v = tl.load(
-                V_Buffer + offs_buf_v,
-                mask=(offs_n[:, None] < split_kv_end) & (mask_dv[None, :]),
-                other=0.0,
-            )
-            if v.dtype.is_fp8():
-                v = (v.to(tl.float32) * vs).to(q.dtype)
+            if not IS_MLA:
+                offs_buf_v = kv_loc[:, None] * stride_buf_vbs + base_offs_v
+                v = tl.load(
+                    V_Buffer + offs_buf_v,
+                    mask=(offs_n[:, None] < split_kv_end) & (mask_dv[None, :]),
+                    other=0.0,
+                )
+                if v.dtype.is_fp8():
+                    v = (v.to(tl.float32) * vs).to(q.dtype)
+            else:
+                # MLA uses a single c_kv.
+                # loading the same c_kv to interpret it as v is not necessary.
+                # transpose the existing c_kv (aka k) for the dot product.
+                v = tl.trans(k)
 
             n_e_max = tl.maximum(tl.max(qk, 1), e_max)
             re_scale = tl.exp(e_max - n_e_max)
@@ -441,26 +455,33 @@ def _decode_grouped_att_m_fwd(
     logit_cap,
     k_scale,
     v_scale,
+    is_mla=False,
 ):
-    BLOCK = 32
+    # with is_mla there is only a single c_kv in smem.
+    # could increase BLOCK or num_stages.
     Lk = k_buffer.shape[-1]
     Lv = v_buffer.shape[-1]
 
-    # [TODO] work around shmem limit on MI3xx
-    if is_hip_ and Lk >= 576:
-        BLOCK = 16
-
-    if Lk == 576:
-        BLOCK_DMODEL = 512
-        BLOCK_DPE = 64
-    elif Lk == 288:
-        BLOCK_DMODEL = 256
-        BLOCK_DPE = 32
+    # Align tile dimensions with latent rank for MLA to avoid shape mismatch.
+    if is_mla:
+        if not is_hip_ and Lk == 576:
+            BLOCK_DMODEL = 512
+            BLOCK_DPE = 64
+        elif not is_hip_ and Lk == 288:
+            BLOCK_DMODEL = 256
+            BLOCK_DPE = 32
+        else:
+            BLOCK_DMODEL = triton.next_power_of_2(Lv)
+            BLOCK_DPE = triton.next_power_of_2(Lk - Lv) if Lk > Lv else 0
     else:
         BLOCK_DMODEL = triton.next_power_of_2(Lk)
         BLOCK_DPE = 0
     BLOCK_DV = triton.next_power_of_2(Lv)
 
+    BLOCK = 32
+    if is_hip_:
+        BLOCK = 16
+
     batch, head_num = q.shape[0], q.shape[1]
     kv_group_num = q.shape[1] // k_buffer.shape[-2]
 
@@ -479,6 +500,11 @@ def _decode_grouped_att_m_fwd(
         # https://github.com/triton-lang/triton/blob/main/third_party/amd/backend/compiler.py
         extra_kargs = {"waves_per_eu": 1, "matrix_instr_nonkdim": 16, "kpack": 2}
         num_stages = 1
+    elif not is_hip_ and BLOCK_DMODEL >= 1024:
+        # Avoid shared memory overflow on NVIDIA when BLOCK_DMODEL is large
+        # like non-MLA D_QK=576, BLOCK_DMODEL=1024, BLOCK_H=16
+        # exceeds 101376 bytes limit
+        num_stages = 1
 
     _fwd_grouped_kernel_stage1[grid](
         q,
@@ -514,6 +540,7 @@ def _decode_grouped_att_m_fwd(
         num_stages=num_stages,
         Lk=Lk,
         Lv=Lv,
+        IS_MLA=is_mla,
         **extra_kargs,
     )
 
@@ -533,6 +560,7 @@ def _fwd_kernel_stage2(
     NUM_KV_SPLITS: tl.constexpr,
     BLOCK_DV: tl.constexpr,
     Lv: tl.constexpr,
+    OUTPUT_FP16: tl.constexpr = 0,
 ):
     cur_batch = tl.program_id(0)
     cur_head = tl.program_id(1)
@@ -569,9 +597,12 @@ def _fwd_kernel_stage2(
             e_sum = e_sum * old_scale + exp_logic
             e_max = n_e_max
 
+    result = acc / e_sum
+    if OUTPUT_FP16:
+        result = result.to(tl.float16)
     tl.store(
         o + cur_batch * stride_obs + cur_head * stride_oh + offs_d,
-        acc / e_sum,
+        result,
         mask=mask_d,
     )
     lse_val = e_max + tl.log(e_sum)
@@ -673,6 +704,7 @@ def decode_attention_fwd_grouped(
     logit_cap=0.0,
     k_scale=None,
     v_scale=None,
+    is_mla=False,
 ):
     _decode_grouped_att_m_fwd(
         q,
@@ -687,6 +719,7 @@ def decode_attention_fwd_grouped(
         logit_cap,
         k_scale,
         v_scale,
+        is_mla=is_mla,
     )
     _decode_softmax_reducev_fwd(
         attn_logits, q, o, lse, v_buffer, b_seq_len, num_kv_splits
@@ -708,6 +741,7 @@ def decode_attention_fwd(
     logit_cap=0.0,
     k_scale=None,
     v_scale=None,
+    is_mla=False,
 ):
     assert num_kv_splits == attn_logits.shape[2]
 
@@ -753,4 +787,5 @@ def decode_attention_fwd(
             logit_cap,
             k_scale,
             v_scale,
+            is_mla=is_mla,
         )
diff --git a/vllm/v1/attention/ops/triton_merge_attn_states.py b/vllm/v1/attention/ops/triton_merge_attn_states.py
index 74e4d778ded8..14a52ada97fd 100644
--- a/vllm/v1/attention/ops/triton_merge_attn_states.py
+++ b/vllm/v1/attention/ops/triton_merge_attn_states.py
@@ -3,8 +3,11 @@
 
 import torch
 
+from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
 
+float8_info = torch.finfo(current_platform.fp8_dtype())
+
 
 # Implements section 2.2 of https://www.arxiv.org/pdf/2501.01005
 # can be used to combine partial attention results (in the split-KV case)
@@ -15,16 +18,23 @@ def merge_attn_states(
     suffix_output: torch.Tensor,
     suffix_lse: torch.Tensor,
     output_lse: torch.Tensor | None = None,
+    prefill_tokens_with_context: int | None = None,
+    output_scale: torch.Tensor | None = None,
 ) -> None:
     num_tokens = output.shape[0]
     num_query_heads = output.shape[1]
     head_size = output.shape[2]
     padded_head_size = triton.next_power_of_2(head_size)
     # We assume the output stride on num_head is not always as same as the
-    # `suffix_output` and `prefix_output`, as them might be padded by the attention
-    # backend.
+    # `suffix_output` and `prefix_output`, as them might be padded by the
+    # attention backend.
     prefix_head_stride = prefix_output.stride(1)
     output_head_stride = output.stride(1)
+
+    # If prefill_tokens_with_context is None, all tokens should use prefix context
+    if prefill_tokens_with_context is None:
+        prefill_tokens_with_context = num_tokens
+
     # TODO(woosuk): Use CUDA kernel instead of Triton to minimize CPU overhead.
     merge_attn_states_kernel[(num_tokens, num_query_heads)](
         output,
@@ -35,9 +45,12 @@ def merge_attn_states(
         suffix_lse,
         prefix_head_stride,
         output_head_stride,
+        output_scale,
         head_size,
         padded_head_size,
         output_lse is not None,
+        prefill_tokens_with_context,
+        output_scale is not None,
     )
 
 
@@ -51,15 +64,57 @@ def merge_attn_states_kernel(
     suffix_lse,  # [NUM_HEADS, NUM_TOKENS]
     prefix_head_stride,
     output_head_stride,
+    output_scale,  # scale tensor or None
     HEAD_SIZE: tl.constexpr,
     PADDED_HEAD_SIZE: tl.constexpr,
     OUTPUT_LSE: tl.constexpr,
+    prefill_tokens_with_context: tl.constexpr,
+    USE_FP8: tl.constexpr,
+    FP8_MIN: tl.constexpr = float8_info.min,
+    FP8_MAX: tl.constexpr = float8_info.max,
 ):
     token_idx = tl.program_id(0)
     num_tokens = tl.num_programs(0)
     head_idx = tl.program_id(1)
     num_heads = tl.num_programs(1)
 
+    prefix_mask = token_idx < prefill_tokens_with_context
+
+    head_arange = tl.arange(0, PADDED_HEAD_SIZE)
+    head_mask = head_arange < HEAD_SIZE
+
+    # For tokens without context (token_idx >= prefill_tokens_with_context),
+    # directly copy from suffix_output
+    if not prefix_mask:
+        s_lse = tl.load(suffix_lse + head_idx * num_tokens + token_idx)
+        if OUTPUT_LSE:
+            tl.store(output_lse + head_idx * num_tokens + token_idx, s_lse)
+
+        s_out = tl.load(
+            suffix_output
+            + token_idx * num_heads * prefix_head_stride
+            + head_idx * prefix_head_stride
+            + head_arange,
+            mask=head_mask,
+        )
+
+        if USE_FP8:
+            s_out = s_out * (1.0 / tl.load(output_scale))
+            s_out = tl.clamp(s_out, FP8_MIN, FP8_MAX)
+            s_out = s_out.to(output.dtype.element_ty)
+
+        tl.store(
+            output
+            + token_idx * num_heads * output_head_stride
+            + head_idx * output_head_stride
+            + head_arange,
+            s_out,
+            mask=head_mask,
+        )
+        return
+
+    # For tokens with context (token_idx < prefill_tokens_with_context),
+    # perform normal merge operation
     p_lse = tl.load(prefix_lse + head_idx * num_tokens + token_idx)
     s_lse = tl.load(suffix_lse + head_idx * num_tokens + token_idx)
 
@@ -83,8 +138,6 @@ def merge_attn_states_kernel(
         out_lse = tl.log(out_se) + max_lse
         tl.store(output_lse + head_idx * num_tokens + token_idx, out_lse)
 
-    head_arange = tl.arange(0, PADDED_HEAD_SIZE)
-    head_mask = head_arange < HEAD_SIZE
     p_out = tl.load(
         prefix_output
         + token_idx * num_heads * prefix_head_stride
@@ -106,6 +159,12 @@ def merge_attn_states_kernel(
     p_scale = p_se / out_se
     s_scale = s_se / out_se
     out = p_out * p_scale + s_out * s_scale
+
+    if USE_FP8:
+        out = out * (1.0 / tl.load(output_scale))
+        out = tl.clamp(out, FP8_MIN, FP8_MAX)
+        out = out.to(output.dtype.element_ty)
+
     tl.store(
         output
         + token_idx * num_heads * output_head_stride
diff --git a/vllm/v1/attention/ops/triton_reshape_and_cache_flash.py b/vllm/v1/attention/ops/triton_reshape_and_cache_flash.py
index c5c9a9c96662..6e696fdb5135 100644
--- a/vllm/v1/attention/ops/triton_reshape_and_cache_flash.py
+++ b/vllm/v1/attention/ops/triton_reshape_and_cache_flash.py
@@ -3,8 +3,15 @@
 
 import torch
 
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    FP8_DTYPE,
+    get_fp8_min_max,
+)
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
+from vllm.utils.torch_utils import is_quantized_kv_cache
+
+FP8_MIN, FP8_MAX = get_fp8_min_max()
 
 
 @triton.jit
@@ -69,9 +76,15 @@ def reshape_and_cache_kernel_flash(
             + (cur_dim % x)
         )
     else:
-        tgt_base = block_idx * block_stride + block_offset * page_stride
-        tgt_idx_k = tgt_base + tile_pos
-        tgt_idx_v = tgt_base + tile_pos
+        cur_head = tile_pos // head_size
+        cur_dim = tile_pos % head_size
+        tgt_idx_k = (
+            block_idx * block_stride
+            + block_offset * page_stride
+            + cur_head * head_stride
+            + cur_dim
+        )
+        tgt_idx_v = tgt_idx_k
 
     # [TILE_SIZE]
     key_load = tl.load(
@@ -111,6 +124,198 @@ def reshape_and_cache_kernel_flash(
     return
 
 
+# ---------------------------------------------------------------------------
+# Per-token-head dynamic quantization kernel
+# Grid: (num_tokens, NUM_KV_HEADS)
+# Each program handles one (token, head) pair:
+#   1. Loads K (or V) for that single head
+#   2. Computes absmax across head_size → scale = absmax / QUANT_MAX
+#   3. Quantizes and stores the data + per-head scale
+#
+# Parametrised by QUANT_MAX / QUANT_MIN so the same code path works
+# for int8 (±127/128), fp8_e4m3 (±448), and other formats.
+# ---------------------------------------------------------------------------
+@triton.jit
+def _reshape_cache_per_token_head(
+    key_ptr,  # [num_tokens, num_kv_heads, head_size]
+    value_ptr,  # [num_tokens, num_kv_heads, head_size_v]
+    key_cache_ptr,  # [num_blocks, block_size, num_kv_heads, head_size]
+    value_cache_ptr,  # [num_blocks, block_size, num_kv_heads, head_size_v]
+    k_scale_cache_ptr,  # [num_blocks, block_size, num_kv_heads] float32
+    v_scale_cache_ptr,  # [num_blocks, block_size, num_kv_heads] float32
+    slot_mapping_ptr,  # [num_tokens]
+    stride_key_tok: tl.int64,
+    stride_key_head: tl.int64,
+    stride_val_tok: tl.int64,
+    stride_val_head: tl.int64,
+    stride_kc_blk: tl.int64,  # key_cache stride over blocks
+    stride_kc_slot: tl.int64,  # key_cache stride over slots
+    stride_kc_head: tl.int64,  # key_cache stride over heads
+    stride_vc_blk: tl.int64,
+    stride_vc_slot: tl.int64,
+    stride_vc_head: tl.int64,
+    stride_ks_blk: tl.int64,  # k_scale_cache stride[0] (blocks)
+    stride_ks_slot: tl.int64,  # k_scale_cache stride[1] (slots)
+    stride_ks_head: tl.int64,  # k_scale_cache stride[2] (heads)
+    stride_vs_blk: tl.int64,  # v_scale_cache stride[0] (blocks)
+    stride_vs_slot: tl.int64,  # v_scale_cache stride[1] (slots)
+    stride_vs_head: tl.int64,  # v_scale_cache stride[2] (heads)
+    block_size: tl.constexpr,
+    head_size: tl.constexpr,
+    head_size_v: tl.constexpr,
+    HEAD_SIZE_PADDED: tl.constexpr,  # next_power_of_2(max(head_size, head_size_v))
+    QUANT_MAX: tl.constexpr = 127.0,
+    QUANT_MIN: tl.constexpr = -128.0,
+):
+    tok = tl.program_id(0)
+    head = tl.program_id(1)
+
+    slot = tl.load(slot_mapping_ptr + tok).to(tl.int64)
+    if slot < 0:
+        return
+
+    blk = slot // block_size
+    slot_in_blk = slot % block_size
+
+    dim_offs = tl.arange(0, HEAD_SIZE_PADDED)
+
+    # ---- Key: load one head → absmax → quantize → store -------------------
+    k_mask = dim_offs < head_size
+    k_h = tl.load(
+        key_ptr + tok * stride_key_tok + head * stride_key_head + dim_offs,
+        mask=k_mask,
+        other=0.0,
+    ).to(tl.float32)
+
+    k_scale = tl.maximum(tl.max(tl.abs(k_h)) / QUANT_MAX, 1e-6)
+    tl.store(
+        k_scale_cache_ptr
+        + blk * stride_ks_blk
+        + slot_in_blk * stride_ks_slot
+        + head * stride_ks_head,
+        k_scale,
+    )
+
+    k_q = tl.clamp(k_h * (1.0 / k_scale), QUANT_MIN, QUANT_MAX)
+    tl.store(
+        key_cache_ptr
+        + blk * stride_kc_blk
+        + slot_in_blk * stride_kc_slot
+        + head * stride_kc_head
+        + dim_offs,
+        k_q,
+        mask=k_mask,
+    )
+
+    # ---- Value: same per-head approach ------------------------------------
+    v_mask = dim_offs < head_size_v
+    v_h = tl.load(
+        value_ptr + tok * stride_val_tok + head * stride_val_head + dim_offs,
+        mask=v_mask,
+        other=0.0,
+    ).to(tl.float32)
+
+    v_scale = tl.maximum(tl.max(tl.abs(v_h)) / QUANT_MAX, 1e-6)
+    tl.store(
+        v_scale_cache_ptr
+        + blk * stride_vs_blk
+        + slot_in_blk * stride_vs_slot
+        + head * stride_vs_head,
+        v_scale,
+    )
+
+    v_q = tl.clamp(v_h * (1.0 / v_scale), QUANT_MIN, QUANT_MAX)
+    tl.store(
+        value_cache_ptr
+        + blk * stride_vc_blk
+        + slot_in_blk * stride_vc_slot
+        + head * stride_vc_head
+        + dim_offs,
+        v_q,
+        mask=v_mask,
+    )
+
+
+# Mapping from cache torch dtype to (QUANT_MAX, QUANT_MIN) for the
+# per-token-head quantization kernel.
+_PER_TOKEN_HEAD_QUANT_PARAMS: dict[torch.dtype, tuple[float, float]] = {
+    torch.int8: (127.0, -128.0),
+    FP8_DTYPE: (FP8_MAX, FP8_MIN),
+}
+
+
+def triton_reshape_and_cache_flash_per_token_head_quant(
+    key: torch.Tensor,  # [num_tokens, num_kv_heads, head_size]
+    value: torch.Tensor,  # [num_tokens, num_kv_heads, head_size_v]
+    key_cache: torch.Tensor,  # [num_blocks, block_size, num_kv_heads, head_size]
+    value_cache: torch.Tensor,  # [num_blocks, block_size, num_kv_heads, head_size_v]
+    k_scale_cache: torch.Tensor,  # [num_blocks, block_size, num_kv_heads] float32
+    v_scale_cache: torch.Tensor,  # [num_blocks, block_size, num_kv_heads] float32
+    slot_mapping: torch.Tensor,  # [num_tokens]
+):
+    """Quantize key/value per (token, head) and write to paged cache.
+
+    Computes one scale = absmax / QUANT_MAX per (token, head), stores
+    quantized data in key_cache/value_cache, and stores the float32
+    scale in k_scale_cache/v_scale_cache.
+
+    The quantization range (QUANT_MAX, QUANT_MIN) is derived from the
+    cache tensor dtype so the same code path works for int8 and fp8.
+    """
+    cache_dtype = key_cache.dtype
+    quant_params = _PER_TOKEN_HEAD_QUANT_PARAMS.get(cache_dtype)
+    if quant_params is None:
+        raise ValueError(
+            f"Per-token-head quantization not supported for cache dtype "
+            f"{cache_dtype}.  Supported: {list(_PER_TOKEN_HEAD_QUANT_PARAMS)}"
+        )
+    quant_max, quant_min = quant_params
+
+    num_tokens, num_kv_heads, head_size = key.shape
+    head_size_v = value.shape[2]
+    head_size_padded = triton.next_power_of_2(max(head_size, head_size_v))
+
+    block_size = key_cache.shape[1]
+
+    if current_platform.is_rocm() or current_platform.is_xpu():
+        num_warps = 4
+    else:
+        num_warps = min(16, max(1, head_size_padded // 32))
+
+    _reshape_cache_per_token_head[(num_tokens, num_kv_heads)](
+        key_ptr=key,
+        value_ptr=value,
+        key_cache_ptr=key_cache,
+        value_cache_ptr=value_cache,
+        k_scale_cache_ptr=k_scale_cache,
+        v_scale_cache_ptr=v_scale_cache,
+        slot_mapping_ptr=slot_mapping,
+        stride_key_tok=key.stride(0),
+        stride_key_head=key.stride(1),
+        stride_val_tok=value.stride(0),
+        stride_val_head=value.stride(1),
+        stride_kc_blk=key_cache.stride(0),
+        stride_kc_slot=key_cache.stride(1),
+        stride_kc_head=key_cache.stride(2),
+        stride_vc_blk=value_cache.stride(0),
+        stride_vc_slot=value_cache.stride(1),
+        stride_vc_head=value_cache.stride(2),
+        stride_ks_blk=k_scale_cache.stride(0),
+        stride_ks_slot=k_scale_cache.stride(1),
+        stride_ks_head=k_scale_cache.stride(2),
+        stride_vs_blk=v_scale_cache.stride(0),
+        stride_vs_slot=v_scale_cache.stride(1),
+        stride_vs_head=v_scale_cache.stride(2),
+        block_size=block_size,
+        head_size=head_size,
+        head_size_v=head_size_v,
+        HEAD_SIZE_PADDED=head_size_padded,
+        QUANT_MAX=quant_max,
+        QUANT_MIN=quant_min,
+        num_warps=num_warps,
+    )
+
+
 def triton_reshape_and_cache_flash(
     key: torch.Tensor,  # [num_tokens, num_heads, head_size]
     value: torch.Tensor,  # [num_tokens, num_heads, head_size]
@@ -145,16 +350,18 @@ def triton_reshape_and_cache_flash(
     block_stride = key_cache.stride()[0]
     page_stride = key_cache.stride()[1]
 
-    assert kv_cache_dtype == "auto" or kv_cache_dtype.startswith("fp8"), (
+    assert kv_cache_dtype == "auto" or is_quantized_kv_cache(kv_cache_dtype), (
         f"unsupported kv_cache_dtype (str), got {kv_cache_dtype}."
     )
     kv_cache_torch_dtype = (
         current_platform.fp8_dtype()
-        if kv_cache_dtype.startswith("fp8")
+        if is_quantized_kv_cache(kv_cache_dtype)
         else key_cache.dtype
     )
 
-    if key_cache.dtype != kv_cache_torch_dtype and kv_cache_dtype.startswith("fp8"):
+    if key_cache.dtype != kv_cache_torch_dtype and is_quantized_kv_cache(
+        kv_cache_dtype
+    ):
         # to avoid erounous implicit cast in triton kernel (tl.store to uint8)
         # (e.g. explicit cast to fp8e4m3fnuz is not supported in triton 3.4)
         key_cache = key_cache.view(kv_cache_torch_dtype)
@@ -164,7 +371,7 @@ def triton_reshape_and_cache_flash(
         "uint8 is not supported by triton reshape_and_cache_flash"
     )
 
-    FP8_KV_CACHE = kv_cache_dtype.startswith("fp8")
+    FP8_KV_CACHE = is_quantized_kv_cache(kv_cache_dtype)
     assert (not FP8_KV_CACHE) or kv_cache_torch_dtype in [
         torch.float8_e4m3fn,
         torch.float8_e5m2,
@@ -215,7 +422,6 @@ def triton_reshape_and_cache_flash(
         block_size=block_size,
         x=x,
         USE_HEAD_MAJOR_LAYOUT=use_head_major_layout,
-        # FP8 flags
         FP8_KV_CACHE=FP8_KV_CACHE,
         # autotune parameters
         TILE_SIZE=TILE_SIZE,
@@ -323,16 +529,16 @@ def triton_reshape_and_cache_flash_diffkv(
     block_stride = kv_cache.stride()[0]
     page_stride = kv_cache.stride()[1]
 
-    assert kv_cache_dtype == "auto" or kv_cache_dtype.startswith("fp8"), (
+    assert kv_cache_dtype == "auto" or is_quantized_kv_cache(kv_cache_dtype), (
         f"unsupported kv_cache_dtype (str), got {kv_cache_dtype}."
     )
     kv_cache_torch_dtype = (
         current_platform.fp8_dtype()
-        if kv_cache_dtype.startswith("fp8")
+        if is_quantized_kv_cache(kv_cache_dtype)
         else kv_cache.dtype
     )
 
-    if kv_cache.dtype != kv_cache_torch_dtype and kv_cache_dtype.startswith("fp8"):
+    if kv_cache.dtype != kv_cache_torch_dtype and is_quantized_kv_cache(kv_cache_dtype):
         # to avoid erounous implicit cast in triton kernel (tl.store to uint8)
         # (e.g. explicit cast to fp8e4m3fnuz is not supported in triton 3.4)
         kv_cache = kv_cache.view(kv_cache_torch_dtype)
@@ -341,7 +547,7 @@ def triton_reshape_and_cache_flash_diffkv(
         "uint8 is not supported by triton reshape_and_cache_flash_diffkv"
     )
 
-    FP8_KV_CACHE = kv_cache_dtype.startswith("fp8")
+    FP8_KV_CACHE = is_quantized_kv_cache(kv_cache_dtype)
     assert (not FP8_KV_CACHE) or kv_cache_torch_dtype in [
         torch.float8_e4m3fn,
         torch.float8_e5m2,
diff --git a/vllm/v1/attention/ops/triton_turboquant_decode.py b/vllm/v1/attention/ops/triton_turboquant_decode.py
new file mode 100644
index 000000000000..3adaf2610d8d
--- /dev/null
+++ b/vllm/v1/attention/ops/triton_turboquant_decode.py
@@ -0,0 +1,630 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Triton fused TurboQuant decode attention.
+
+Decode path: Triton stage1 (split-KV tiled attention scoring + value
+accumulation) + stage2 (log-sum-exp reduction across splits).
+
+Supports FP8 (E4M3) keys, 3-bit and 4-bit uniform quantized values.
+"""
+
+import math
+from typing import Any
+
+import torch
+
+from vllm.platforms import current_platform
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.ops.triton_decode_attention import (
+    _fwd_kernel_stage2,
+)
+
+_FP8_E4B15: dict[int, int] = {}
+
+
+def _use_fp8_e4b15(device: int = 0) -> int:
+    """Return 1 if device needs fp8e4b15 (Ampere/Ada, SM < 8.9), else 0.
+    On non-CUDA platforms (e.g. XPU), always returns 0 (use e4nv format).
+    """
+    if device not in _FP8_E4B15:
+        if current_platform.is_cuda_alike():
+            cap = torch.cuda.get_device_capability(device)
+            _FP8_E4B15[device] = 1 if cap < (8, 9) else 0
+        else:
+            _FP8_E4B15[device] = 0
+    return _FP8_E4B15[device]
+
+
+# ---------------------------------------------------------------------------
+# Stage 1: Fused TQ score + value accumulation (BLOCK_KV tiled)
+# ---------------------------------------------------------------------------
+
+
+@triton.jit
+def _tq_decode_stage1(
+    # Precomputed query projection
+    Q_rot_ptr,  # [B, Hq, D] float32
+    # Compressed KV cache (combined K+V)
+    KV_cache_ptr,  # [num_blocks, block_size, Hk, padded_slot] uint8
+    # Block table and sequence info
+    Block_table_ptr,  # [B, max_num_blocks] int32
+    Seq_lens_ptr,  # [B] int32
+    # TQ parameters
+    Centroids_ptr,  # [n_centroids] float32
+    # Output (intermediate for stage2)
+    Mid_o_ptr,  # [B, Hq, NUM_KV_SPLITS, D+1] float32
+    # Strides
+    stride_qb,
+    stride_qh,  # Q strides: [B, Hq, D]
+    stride_cache_block,
+    stride_cache_pos,
+    stride_cache_head,  # KV cache
+    stride_bt_b,  # block_table stride per batch
+    stride_mid_b,
+    stride_mid_h,
+    stride_mid_s,  # mid_o strides
+    # Constexpr dims
+    NUM_KV_HEADS: tl.constexpr,
+    HEAD_DIM: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,  # KV cache block_size (pages)
+    NUM_KV_SPLITS: tl.constexpr,
+    KV_GROUP_SIZE: tl.constexpr,  # Hq // Hk
+    # TQ layout constants
+    MSE_BITS: tl.constexpr,  # 3 or 4
+    MSE_BYTES: tl.constexpr,  # ceil(D * mse_bits / 8)
+    KPS: tl.constexpr,  # key_packed_size
+    VQB: tl.constexpr,  # value_quant_bits (4 or 8=FP8)
+    VAL_DATA_BYTES: tl.constexpr,  # ceil(D * vqb / 8) or D for FP8
+    # Score constants
+    ATTN_SCALE: tl.constexpr,  # 1/sqrt(D)
+    # Block tile sizes
+    BLOCK_D: tl.constexpr,  # next_power_of_2(HEAD_DIM)
+    BLOCK_KV: tl.constexpr,  # tokens per tile (16)
+    KEY_FP8: tl.constexpr,  # 1 if K is stored as FP8
+    NORM_CORRECTION: tl.constexpr = 0,  # 1 = re-normalize centroids
+    FP8_E4B15: tl.constexpr = 0,  # 1 = use e4b15 (Ampere/Ada), 0 = e4nv (Hopper+)
+):
+    bid = tl.program_id(0)  # batch index
+    hid = tl.program_id(1)  # q_head index
+    sid = tl.program_id(2)  # kv_split index
+
+    kv_head = hid // KV_GROUP_SIZE
+
+    # Sequence length for this batch
+    seq_len = tl.load(Seq_lens_ptr + bid)
+
+    # KV split range
+    split_len = tl.cdiv(seq_len, NUM_KV_SPLITS)
+    split_start = split_len * sid
+    split_end = tl.minimum(split_start + split_len, seq_len)
+
+    if split_start >= split_end:
+        return
+
+    # Dimension offsets
+    d_offs = tl.arange(0, BLOCK_D)
+    d_mask = d_offs < HEAD_DIM
+    kv_range = tl.arange(0, BLOCK_KV)
+
+    # Load query vector: q_rot — [BLOCK_D] float32
+    q_base = bid * stride_qb + hid * stride_qh
+    q_rot = tl.load(Q_rot_ptr + q_base + d_offs, mask=d_mask, other=0.0).to(tl.float32)
+
+    # Precompute byte/bit index vectors for MSE gather loads
+    if not KEY_FP8:
+        mse_bit_off = d_offs * MSE_BITS
+        mse_byte_idx = mse_bit_off // 8
+        mse_bit_shift = mse_bit_off % 8
+        mse_mask = (1 << MSE_BITS) - 1
+
+    # Precompute value bit/byte index vectors (loop-invariant)
+    if VQB == 3:
+        val_bit_off = d_offs * 3
+        val_byte_idx = val_bit_off // 8
+        val_bit_shift = val_bit_off % 8
+
+    # Online softmax accumulators
+    m_prev = -float("inf")
+    l_prev = 0.0
+    acc = tl.zeros([BLOCK_D], dtype=tl.float32)
+
+    bt_base = bid * stride_bt_b
+
+    # ================================================================
+    # TILED LOOP: process BLOCK_KV tokens per iteration
+    # ================================================================
+    for start_n in range(split_start, split_end, BLOCK_KV):
+        kv_offs = start_n + kv_range
+        kv_mask = kv_offs < split_end
+
+        page_idx = kv_offs // BLOCK_SIZE
+        page_off = kv_offs % BLOCK_SIZE
+        block_nums = tl.load(
+            Block_table_ptr + bt_base + page_idx,
+            mask=kv_mask,
+            other=0,
+        ).to(tl.int64)
+
+        slot_bases = (
+            block_nums * stride_cache_block
+            + page_off.to(tl.int64) * stride_cache_pos
+            + tl.cast(kv_head, tl.int64) * stride_cache_head
+        )
+
+        # ============================================================
+        # COMPUTE ATTENTION SCORES: [BLOCK_KV]
+        # ============================================================
+        if KEY_FP8:
+            k_addrs = slot_bases[:, None] + d_offs[None, :]
+            k_raw = tl.load(
+                KV_cache_ptr + k_addrs,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            )
+            if FP8_E4B15:
+                k_float = k_raw.to(tl.float8e4b15, bitcast=True).to(tl.float32)
+            else:
+                k_float = k_raw.to(tl.float8e4nv, bitcast=True).to(tl.float32)
+            scores = (
+                tl.sum(
+                    tl.where(d_mask[None, :], q_rot[None, :] * k_float, 0.0),
+                    axis=1,
+                )
+                * ATTN_SCALE
+            )
+            scores = tl.where(kv_mask, scores, -float("inf"))
+        else:
+            # MSE unpack + norms
+            mse_addrs0 = slot_bases[:, None] + mse_byte_idx[None, :]
+            mse_raw0 = tl.load(
+                KV_cache_ptr + mse_addrs0,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            ).to(tl.int32)
+            mse_raw1 = tl.load(
+                KV_cache_ptr + mse_addrs0 + 1,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            ).to(tl.int32)
+            raw16 = mse_raw0 | (mse_raw1 << 8)
+            mse_idx = (raw16 >> mse_bit_shift[None, :]) & mse_mask
+
+            # Centroid gather + dot product
+            c_vals = tl.load(
+                Centroids_ptr + mse_idx,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0.0,
+            )
+
+            # Norm correction: re-normalize centroid vector to unit norm
+            if NORM_CORRECTION:
+                c_norm_sq = tl.sum(
+                    tl.where(d_mask[None, :], c_vals * c_vals, 0.0),
+                    axis=1,
+                )
+                c_inv_norm = 1.0 / tl.sqrt(c_norm_sq + 1e-16)
+                c_vals = c_vals * c_inv_norm[:, None]
+
+            term1 = tl.sum(
+                tl.where(d_mask[None, :], q_rot[None, :] * c_vals, 0.0),
+                axis=1,
+            )
+
+            # Load norms (fp16 -> fp32): norms are at MSE_BYTES offset
+            norm_bases = slot_bases + MSE_BYTES
+            n_lo = tl.load(KV_cache_ptr + norm_bases, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            n_hi = tl.load(KV_cache_ptr + norm_bases + 1, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            vec_norms = (n_lo | (n_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+
+            scores = vec_norms * term1 * ATTN_SCALE
+            scores = tl.where(kv_mask, scores, -float("inf"))
+
+        # ============================================================
+        # ONLINE SOFTMAX UPDATE (block-level)
+        # ============================================================
+        n_e_max = tl.maximum(tl.max(scores, 0), m_prev)
+        re_scale = tl.exp(m_prev - n_e_max)
+        p = tl.exp(scores - n_e_max)
+
+        # ============================================================
+        # VALUE LOAD + DEQUANTIZE: [BLOCK_KV, BLOCK_D]
+        # ============================================================
+        val_bases = slot_bases + KPS
+
+        if VQB == 3:
+            val_addrs0 = val_bases[:, None] + val_byte_idx[None, :]
+            val_raw0 = tl.load(
+                KV_cache_ptr + val_addrs0,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            ).to(tl.int32)
+            val_raw1 = tl.load(
+                KV_cache_ptr + val_addrs0 + 1,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            ).to(tl.int32)
+            raw16 = val_raw0 | (val_raw1 << 8)
+            v_idx = ((raw16 >> val_bit_shift[None, :]) & 0x7).to(tl.float32)
+
+            sc_bases = val_bases + VAL_DATA_BYTES
+            sc_lo = tl.load(KV_cache_ptr + sc_bases, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            sc_hi = tl.load(KV_cache_ptr + sc_bases + 1, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            v_scales = (
+                (sc_lo | (sc_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+            )
+            zr_lo = tl.load(KV_cache_ptr + sc_bases + 2, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            zr_hi = tl.load(KV_cache_ptr + sc_bases + 3, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            v_zeros = (zr_lo | (zr_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+            values = v_idx * v_scales[:, None] + v_zeros[:, None]
+        else:  # VQB == 4
+            vb_idx = d_offs // 2
+            vb_shift = (d_offs % 2) * 4
+            val_addrs = val_bases[:, None] + vb_idx[None, :]
+            val_raw = tl.load(
+                KV_cache_ptr + val_addrs,
+                mask=kv_mask[:, None] & d_mask[None, :],
+                other=0,
+            ).to(tl.int32)
+            v_idx = ((val_raw >> vb_shift[None, :]) & 0xF).to(tl.float32)
+
+            sc_bases = val_bases + VAL_DATA_BYTES
+            sc_lo = tl.load(KV_cache_ptr + sc_bases, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            sc_hi = tl.load(KV_cache_ptr + sc_bases + 1, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            v_scales = (
+                (sc_lo | (sc_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+            )
+            zr_lo = tl.load(KV_cache_ptr + sc_bases + 2, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            zr_hi = tl.load(KV_cache_ptr + sc_bases + 3, mask=kv_mask, other=0).to(
+                tl.uint16
+            )
+            v_zeros = (zr_lo | (zr_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+            values = v_idx * v_scales[:, None] + v_zeros[:, None]
+
+        # ============================================================
+        # WEIGHTED VALUE ACCUMULATION
+        # ============================================================
+        acc = acc * re_scale + tl.sum(p[:, None] * values, 0)
+        l_prev = l_prev * re_scale + tl.sum(p, 0)
+        m_prev = n_e_max
+
+    # Store partial result
+    out_base = bid * stride_mid_b + hid * stride_mid_h + sid * stride_mid_s
+    safe_l = tl.where(l_prev > 0.0, l_prev, 1.0)
+    tl.store(Mid_o_ptr + out_base + d_offs, acc / safe_l, mask=d_mask)
+    lse = m_prev + tl.log(safe_l)
+    tl.store(Mid_o_ptr + out_base + HEAD_DIM, lse)
+
+
+# ---------------------------------------------------------------------------
+# Pre-dequant kernel: Bulk dequant K (MSE+norms) and V to fp16
+# ---------------------------------------------------------------------------
+
+
+@triton.jit
+def _tq_full_dequant_kv(
+    KV_cache_ptr,
+    Block_table_ptr,
+    Centroids_ptr,
+    K_out_ptr,  # [B, Hk, max_seq, D] float16
+    V_out_ptr,  # [B, Hk, max_seq, D] float16
+    stride_ko_b,
+    stride_ko_h,
+    stride_ko_s,
+    stride_vo_b,
+    stride_vo_h,
+    stride_vo_s,
+    stride_cache_block,
+    stride_cache_pos,
+    stride_cache_head,
+    stride_bt_b,
+    HEAD_DIM: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    NUM_KV_HEADS: tl.constexpr,
+    MSE_BYTES: tl.constexpr,
+    KPS: tl.constexpr,
+    VQB: tl.constexpr,
+    VAL_DATA_BYTES: tl.constexpr,
+    MSE_BITS: tl.constexpr,
+    KEY_FP8: tl.constexpr,
+    BLOCK_D: tl.constexpr,
+    NORM_CORRECTION: tl.constexpr = 0,
+    FP8_E4B15: tl.constexpr = 0,  # 1 = use e4b15 (Ampere/Ada), 0 = e4nv (Hopper+)
+):
+    """Full dequant: reconstruct K (MSE centroids * norm or FP8) and V to fp16."""
+    pos = tl.program_id(0)
+    bh = tl.program_id(1)
+    bid = bh // NUM_KV_HEADS
+    hid = bh % NUM_KV_HEADS
+
+    page_idx = pos // BLOCK_SIZE
+    page_off = pos % BLOCK_SIZE
+    block_num = tl.load(Block_table_ptr + bid * stride_bt_b + page_idx).to(tl.int64)
+    slot_base = (
+        block_num * stride_cache_block
+        + tl.cast(page_off, tl.int64) * stride_cache_pos
+        + tl.cast(hid, tl.int64) * stride_cache_head
+    )
+
+    d_offs = tl.arange(0, BLOCK_D)
+    d_mask = d_offs < HEAD_DIM
+
+    # === K dequant ===
+    ko_base = bid * stride_ko_b + hid * stride_ko_h + pos * stride_ko_s
+    if KEY_FP8:
+        k_raw = tl.load(KV_cache_ptr + slot_base + d_offs, mask=d_mask, other=0)
+        if FP8_E4B15:
+            k_recon = k_raw.to(tl.float8e4b15, bitcast=True).to(tl.float32)
+        else:
+            k_recon = k_raw.to(tl.float8e4nv, bitcast=True).to(tl.float32)
+        tl.store(K_out_ptr + ko_base + d_offs, k_recon.to(tl.float16), mask=d_mask)
+    else:
+        # MSE unpack (3-bit or 4-bit) + norms
+        mse_bit_off = d_offs * MSE_BITS
+        mse_byte_idx = mse_bit_off // 8
+        mse_bit_shift = mse_bit_off % 8
+        mse_umask = (1 << MSE_BITS) - 1
+
+        mse_raw0 = tl.load(
+            KV_cache_ptr + slot_base + mse_byte_idx, mask=d_mask, other=0
+        ).to(tl.int32)
+        mse_raw1 = tl.load(
+            KV_cache_ptr + slot_base + mse_byte_idx + 1, mask=d_mask, other=0
+        ).to(tl.int32)
+        raw16_key = mse_raw0 | (mse_raw1 << 8)
+        mse_idx = (raw16_key >> mse_bit_shift) & mse_umask
+
+        k_mse = tl.load(Centroids_ptr + mse_idx, mask=d_mask, other=0.0)
+
+        # Norm correction: re-normalize centroid vector to unit norm
+        if NORM_CORRECTION:
+            c_norm_sq = tl.sum(tl.where(d_mask, k_mse * k_mse, 0.0), axis=0)
+            c_inv_norm = 1.0 / tl.sqrt(c_norm_sq + 1e-16)
+            k_mse = k_mse * c_inv_norm
+
+        # Norms at MSE_BYTES offset (no QJL bytes)
+        norm_base = slot_base + MSE_BYTES
+        n_lo = tl.load(KV_cache_ptr + norm_base).to(tl.uint16)
+        n_hi = tl.load(KV_cache_ptr + norm_base + 1).to(tl.uint16)
+        vec_norm = (n_lo | (n_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+
+        k_recon = vec_norm * k_mse
+        tl.store(K_out_ptr + ko_base + d_offs, k_recon.to(tl.float16), mask=d_mask)
+
+    # === V dequant ===
+    val_base = slot_base + KPS
+    if VQB == 4:
+        vb_idx = d_offs // 2
+        vb_shift = (d_offs % 2) * 4
+        val_raw = tl.load(KV_cache_ptr + val_base + vb_idx, mask=d_mask, other=0).to(
+            tl.int32
+        )
+        v_idx = ((val_raw >> vb_shift) & 0xF).to(tl.float32)
+
+        sc_base = val_base + VAL_DATA_BYTES
+        sc_lo = tl.load(KV_cache_ptr + sc_base).to(tl.uint16)
+        sc_hi = tl.load(KV_cache_ptr + sc_base + 1).to(tl.uint16)
+        v_scale = (sc_lo | (sc_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+        zr_lo = tl.load(KV_cache_ptr + sc_base + 2).to(tl.uint16)
+        zr_hi = tl.load(KV_cache_ptr + sc_base + 3).to(tl.uint16)
+        v_zero = (zr_lo | (zr_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+        v_vals = v_idx * v_scale + v_zero
+    elif VQB == 3:
+        # 3-bit value unpack: 8 values per 3 bytes
+        val_bit_off = d_offs * 3
+        val_byte_idx = val_bit_off // 8
+        val_bit_shift = val_bit_off % 8
+        val_raw0 = tl.load(
+            KV_cache_ptr + val_base + val_byte_idx, mask=d_mask, other=0
+        ).to(tl.int32)
+        val_raw1 = tl.load(
+            KV_cache_ptr + val_base + val_byte_idx + 1, mask=d_mask, other=0
+        ).to(tl.int32)
+        raw16_val = val_raw0 | (val_raw1 << 8)
+        v_idx = ((raw16_val >> val_bit_shift) & 0x7).to(tl.float32)
+
+        sc_base = val_base + VAL_DATA_BYTES
+        sc_lo = tl.load(KV_cache_ptr + sc_base).to(tl.uint16)
+        sc_hi = tl.load(KV_cache_ptr + sc_base + 1).to(tl.uint16)
+        v_scale = (sc_lo | (sc_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+        zr_lo = tl.load(KV_cache_ptr + sc_base + 2).to(tl.uint16)
+        zr_hi = tl.load(KV_cache_ptr + sc_base + 3).to(tl.uint16)
+        v_zero = (zr_lo | (zr_hi << 8)).to(tl.float16, bitcast=True).to(tl.float32)
+        v_vals = v_idx * v_scale + v_zero
+    else:
+        v_vals = tl.zeros([BLOCK_D], dtype=tl.float32)
+
+    vo_base = bid * stride_vo_b + hid * stride_vo_h + pos * stride_vo_s
+    tl.store(V_out_ptr + vo_base + d_offs, v_vals.to(tl.float16), mask=d_mask)
+
+
+# ---------------------------------------------------------------------------
+# Stage 2: Reuse from triton_decode_attention.py
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# Launcher — cached constants + fused GEMM
+# ---------------------------------------------------------------------------
+
+_layout_cache: dict = {}
+
+
+def _get_layout(D, mse_bits, value_quant_bits, key_packed_size):
+    """Get cached layout constants."""
+    key = (D, mse_bits, value_quant_bits, key_packed_size)
+    cfg = _layout_cache.get(key)
+    if cfg is None:
+        val_data_bytes = math.ceil(D * value_quant_bits / 8)
+        cfg = {
+            "mse_bytes": math.ceil(D * mse_bits / 8),
+            "val_data_bytes": val_data_bytes,
+            "mse_bits": mse_bits,
+            "n_centroids": 2**mse_bits,
+            "BLOCK_D": triton.next_power_of_2(D),
+        }
+        _layout_cache[key] = cfg
+    return cfg
+
+
+def triton_turboquant_decode_attention(
+    query: torch.Tensor,  # [B, Hq, D] — original query
+    kv_cache: torch.Tensor,  # [num_blocks, block_size, Hk, padded_slot] uint8
+    block_table: torch.Tensor,  # [B, max_num_blocks] int32
+    seq_lens: torch.Tensor,  # [B] int32
+    Pi: torch.Tensor,  # [D, D] float32
+    centroids: torch.Tensor,  # [n_centroids] float32
+    scale: float,
+    mse_bits: int,
+    key_packed_size: int,
+    value_quant_bits: int,
+    key_fp8: bool = False,
+    norm_correction: bool = False,
+    PiT: torch.Tensor | None = None,  # [D, D] pre-computed Pi.T contiguous
+    # Pre-allocated buffers (optional, avoids per-call allocation)
+    mid_o_buf: torch.Tensor | None = None,
+    output_buf: torch.Tensor | None = None,
+    lse_buf: torch.Tensor | None = None,
+    buf_holder: Any = None,
+    max_num_kv_splits: int = 32,  # fixed split count (must be constant for cudagraph)
+) -> torch.Tensor:
+    """Launch fused TQ decode attention (Triton stage1 + stage2).
+
+    Returns: output tensor [B, Hq, D] in query's dtype.
+    """
+    B, Hq, D = query.shape
+    Hk = kv_cache.shape[2]
+    block_size = kv_cache.shape[1]
+    kv_group_size = Hq // Hk
+    device = query.device
+
+    cfg = _get_layout(D, mse_bits, value_quant_bits, key_packed_size)
+
+    # Compute q_rot = q @ Pi.T (rotated query for MSE key scoring)
+    # FP8 path: pass query directly (float16); kernel casts inline.
+    # MSE path: still needs external GEMM (cuBLAS), so q_rot is float32.
+    if key_fp8:
+        q_rot = query.contiguous()
+    else:
+        q_float = query.float()
+        if PiT is None:
+            PiT = Pi.T.contiguous()
+        q_rot = (q_float @ PiT).contiguous()
+
+    NUM_KV_SPLITS = max_num_kv_splits
+
+    if (
+        mid_o_buf is not None
+        and mid_o_buf.shape[0] >= B
+        and mid_o_buf.shape[2] >= NUM_KV_SPLITS
+    ):
+        mid_o = mid_o_buf[:B, :Hq, :NUM_KV_SPLITS, :]
+    else:
+        mid_o = torch.empty(
+            B,
+            Hq,
+            NUM_KV_SPLITS,
+            D + 1,
+            dtype=torch.float32,
+            device=device,
+        )
+        if buf_holder is not None:
+            buf_holder._tq_mid_o_buf = mid_o
+
+    # Stage 1: split-KV tiled attention scoring + value accumulation
+    fp8_e4b15 = _use_fp8_e4b15(device.index or 0)
+    BLOCK_KV = 4
+    grid = (B, Hq, NUM_KV_SPLITS)
+    _tq_decode_stage1[grid](
+        q_rot,
+        kv_cache,
+        block_table,
+        seq_lens,
+        centroids,
+        mid_o,
+        q_rot.stride(0),
+        q_rot.stride(1),
+        kv_cache.stride(0),
+        kv_cache.stride(1),
+        kv_cache.stride(2),
+        block_table.stride(0),
+        mid_o.stride(0),
+        mid_o.stride(1),
+        mid_o.stride(2),
+        NUM_KV_HEADS=Hk,
+        HEAD_DIM=D,
+        BLOCK_SIZE=block_size,
+        NUM_KV_SPLITS=NUM_KV_SPLITS,
+        KV_GROUP_SIZE=kv_group_size,
+        MSE_BITS=mse_bits,
+        MSE_BYTES=cfg["mse_bytes"],
+        KPS=key_packed_size,
+        VQB=value_quant_bits,
+        VAL_DATA_BYTES=cfg["val_data_bytes"],
+        ATTN_SCALE=scale,
+        BLOCK_D=cfg["BLOCK_D"],
+        BLOCK_KV=BLOCK_KV,
+        KEY_FP8=1 if key_fp8 else 0,
+        NORM_CORRECTION=1 if norm_correction else 0,
+        FP8_E4B15=fp8_e4b15,
+        num_warps=1,
+        num_stages=1,
+    )
+
+    # Stage 2: Reduce across KV splits
+    # Output in query dtype — eliminates float16_copy kernel after stage2
+    out_dtype = query.dtype
+    if (
+        output_buf is not None
+        and output_buf.shape[0] >= B
+        and output_buf.dtype == out_dtype
+    ):
+        output = output_buf[:B, :Hq, :D]
+    else:
+        output = torch.empty(B, Hq, D, dtype=out_dtype, device=device)
+        if buf_holder is not None:
+            buf_holder._tq_output_buf = output
+    if lse_buf is not None and lse_buf.shape[0] >= B:
+        lse = lse_buf[:B, :Hq]
+    else:
+        lse = torch.empty(B, Hq, dtype=torch.float32, device=device)
+        if buf_holder is not None:
+            buf_holder._tq_lse_buf = lse
+
+    grid2 = (B, Hq)
+    _fwd_kernel_stage2[grid2](
+        mid_o,
+        output,
+        lse,
+        seq_lens,
+        mid_o.stride(0),
+        mid_o.stride(1),
+        mid_o.stride(2),
+        output.stride(0),
+        output.stride(1),
+        lse.stride(0),
+        NUM_KV_SPLITS=NUM_KV_SPLITS,
+        BLOCK_DV=cfg["BLOCK_D"],
+        Lv=D,
+        OUTPUT_FP16=1 if out_dtype == torch.float16 else 0,
+        num_warps=4,
+        num_stages=2,
+    )
+
+    return output  # already in query dtype
diff --git a/vllm/v1/attention/ops/triton_turboquant_store.py b/vllm/v1/attention/ops/triton_turboquant_store.py
new file mode 100644
index 000000000000..3ad2d41488e7
--- /dev/null
+++ b/vllm/v1/attention/ops/triton_turboquant_store.py
@@ -0,0 +1,447 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Fused Triton kernels for TurboQuant KV store.
+
+Two kernels:
+1. _tq_fused_store_fp8: FP8 key scatter + value uniform quantization.
+2. _tq_fused_store_mse: Fused binary-search bucketize + MSE index
+   packing + value quantization.
+
+The launcher `triton_turboquant_store` selects the appropriate kernel.
+"""
+
+import math
+
+import torch
+
+from vllm.triton_utils import tl, triton
+from vllm.v1.attention.ops.triton_turboquant_decode import _use_fp8_e4b15
+
+# ═══════════════════════════════════════════════════════════════════════
+# Shared: value uniform quantization + pack + scale/zero store
+# ═══════════════════════════════════════════════════════════════════════
+
+
+@triton.jit
+def _store_quantized_value(
+    Value_ptr,
+    KV_cache_ptr,
+    base,  # pid * D offset into Value_ptr
+    slot_base,  # byte offset into KV_cache_ptr for this slot+head
+    d_offs,  # tl.arange(0, BLOCK_D)
+    d_mask,  # d_offs < D
+    D: tl.constexpr,
+    KPS: tl.constexpr,
+    VQB: tl.constexpr,
+    VAL_DATA_BYTES: tl.constexpr,
+    BLOCK_D: tl.constexpr,
+    BLOCK_VAL: tl.constexpr,
+    BLOCK_GRP: tl.constexpr,
+):
+    """Uniform quantization of values to VQB bits, pack, and store with scale/zero."""
+    val_cache_offset = KPS
+
+    if VQB == 3:
+        val_vec = tl.load(Value_ptr + base + d_offs, mask=d_mask, other=0.0).to(
+            tl.float32
+        )
+        val_min = tl.min(tl.where(d_mask, val_vec, float("inf")), axis=0)
+        val_max = tl.max(tl.where(d_mask, val_vec, -float("inf")), axis=0)
+        v_scale = (val_max - val_min) / 7.0
+        v_scale = tl.where(v_scale > 1e-8, v_scale, 1e-8)
+
+        q_vals = tl.minimum(
+            tl.maximum(((val_vec - val_min) / v_scale + 0.5).to(tl.int32), 0), 7
+        )
+
+        grp_offs = tl.arange(0, BLOCK_GRP)
+        grp_mask = grp_offs < (D // 8)
+        q_grp = tl.reshape(q_vals, [BLOCK_GRP, 8])
+        shifts_3bit = tl.arange(0, 8) * 3
+        packed_24 = tl.sum(q_grp << shifts_3bit[None, :], axis=1)
+        b0 = (packed_24 & 0xFF).to(tl.uint8)
+        b1 = ((packed_24 >> 8) & 0xFF).to(tl.uint8)
+        b2 = ((packed_24 >> 16) & 0xFF).to(tl.uint8)
+        tl.store(
+            KV_cache_ptr + slot_base + val_cache_offset + grp_offs * 3,
+            b0,
+            mask=grp_mask,
+        )
+        tl.store(
+            KV_cache_ptr + slot_base + val_cache_offset + grp_offs * 3 + 1,
+            b1,
+            mask=grp_mask,
+        )
+        tl.store(
+            KV_cache_ptr + slot_base + val_cache_offset + grp_offs * 3 + 2,
+            b2,
+            mask=grp_mask,
+        )
+
+        sc_offset = val_cache_offset + VAL_DATA_BYTES
+        sc_f16 = v_scale.to(tl.float16)
+        sc_u16 = sc_f16.to(tl.uint16, bitcast=True)
+        tl.store(KV_cache_ptr + slot_base + sc_offset, (sc_u16 & 0xFF).to(tl.uint8))
+        tl.store(
+            KV_cache_ptr + slot_base + sc_offset + 1,
+            ((sc_u16 >> 8) & 0xFF).to(tl.uint8),
+        )
+        zr_f16 = val_min.to(tl.float16)
+        zr_u16 = zr_f16.to(tl.uint16, bitcast=True)
+        tl.store(KV_cache_ptr + slot_base + sc_offset + 2, (zr_u16 & 0xFF).to(tl.uint8))
+        tl.store(
+            KV_cache_ptr + slot_base + sc_offset + 3,
+            ((zr_u16 >> 8) & 0xFF).to(tl.uint8),
+        )
+
+    else:  # VQB == 4
+        val_vec = tl.load(Value_ptr + base + d_offs, mask=d_mask, other=0.0).to(
+            tl.float32
+        )
+        val_min = tl.min(tl.where(d_mask, val_vec, float("inf")), axis=0)
+        val_max = tl.max(tl.where(d_mask, val_vec, -float("inf")), axis=0)
+        v_scale = (val_max - val_min) / 15.0
+        v_scale = tl.where(v_scale > 1e-8, v_scale, 1e-8)
+
+        # Quantize all D elements from register (no re-load)
+        q_all = tl.minimum(
+            tl.maximum(((val_vec - val_min) / v_scale + 0.5).to(tl.int32), 0), 15
+        )
+        # Reshape to pairs and pack two 4-bit values per byte
+        q_pairs = tl.reshape(q_all, [BLOCK_D // 2, 2])
+        shifts_4 = tl.arange(0, 2) * 4
+        packed_val = tl.sum((q_pairs & 0xF) << shifts_4[None, :], axis=1).to(tl.uint8)
+        val_offs = tl.arange(0, BLOCK_D // 2)
+        val_mask = val_offs < VAL_DATA_BYTES
+        tl.store(
+            KV_cache_ptr + slot_base + val_cache_offset + val_offs,
+            packed_val,
+            mask=val_mask,
+        )
+
+        sc_offset = val_cache_offset + VAL_DATA_BYTES
+        sc_f16 = v_scale.to(tl.float16)
+        sc_u16 = sc_f16.to(tl.uint16, bitcast=True)
+        tl.store(KV_cache_ptr + slot_base + sc_offset, (sc_u16 & 0xFF).to(tl.uint8))
+        tl.store(
+            KV_cache_ptr + slot_base + sc_offset + 1,
+            ((sc_u16 >> 8) & 0xFF).to(tl.uint8),
+        )
+        zr_f16 = val_min.to(tl.float16)
+        zr_u16 = zr_f16.to(tl.uint16, bitcast=True)
+        tl.store(KV_cache_ptr + slot_base + sc_offset + 2, (zr_u16 & 0xFF).to(tl.uint8))
+        tl.store(
+            KV_cache_ptr + slot_base + sc_offset + 3,
+            ((zr_u16 >> 8) & 0xFF).to(tl.uint8),
+        )
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# FP8 key store + value uniform quantization
+# ═══════════════════════════════════════════════════════════════════════
+
+
+@triton.jit
+def _tq_fused_store_fp8(
+    Key_ptr,  # [NH, D] float16/bfloat16 — raw keys
+    Value_ptr,  # [NH, D] float16/bfloat16 — raw values
+    KV_cache_ptr,  # [total_bytes] uint8 (flattened view)
+    Slot_mapping_ptr,  # [N] int32 — per-token slot indices
+    # Cache strides (for computing byte offsets)
+    stride_cache_block: tl.constexpr,
+    stride_cache_pos: tl.constexpr,
+    stride_cache_head: tl.constexpr,
+    # Dimensions
+    D: tl.constexpr,
+    H: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    BLOCK_D: tl.constexpr,
+    # TQ layout
+    KPS: tl.constexpr,
+    # Value quantization
+    VQB: tl.constexpr,
+    VAL_DATA_BYTES: tl.constexpr,
+    # Packing block sizes
+    BLOCK_VAL: tl.constexpr,
+    BLOCK_GRP: tl.constexpr = 16,
+    FP8_E4B15: tl.constexpr = 0,  # 1 = e4b15 (Ampere/Ada), 0 = e4nv (Hopper+)
+):
+    """FP8 key cast+scatter + value uniform quantization."""
+    pid = tl.program_id(0)
+    token_idx = pid // H
+    head_idx = pid % H
+
+    slot = tl.load(Slot_mapping_ptr + token_idx)
+    if slot < 0:
+        return
+    blk = (slot // BLOCK_SIZE).to(tl.int64)
+    off = (slot % BLOCK_SIZE).to(tl.int64)
+    head_idx_i64 = tl.cast(head_idx, tl.int64)
+    slot_base = (
+        blk * stride_cache_block
+        + off * stride_cache_pos
+        + head_idx_i64 * stride_cache_head
+    )
+
+    base = pid * D
+
+    # ── FP8 KEY: cast to FP8 in-kernel and store ─────────────────
+    d_offs = tl.arange(0, BLOCK_D)
+    d_mask = d_offs < D
+    k_vals = tl.load(Key_ptr + base + d_offs, mask=d_mask, other=0.0)
+    k_fp8 = k_vals.to(tl.float8e4b15) if FP8_E4B15 else k_vals.to(tl.float8e4nv)
+    k_bytes = k_fp8.to(tl.uint8, bitcast=True)
+    tl.store(KV_cache_ptr + slot_base + d_offs, k_bytes, mask=d_mask)
+
+    # ── VALUE QUANTIZE + PACK ───────────────────────────────────────
+    _store_quantized_value(
+        Value_ptr,
+        KV_cache_ptr,
+        base,
+        slot_base,
+        d_offs,
+        d_mask,
+        D=D,
+        KPS=KPS,
+        VQB=VQB,
+        VAL_DATA_BYTES=VAL_DATA_BYTES,
+        BLOCK_D=BLOCK_D,
+        BLOCK_VAL=BLOCK_VAL,
+        BLOCK_GRP=BLOCK_GRP,
+    )
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# Fused MSE store: bucketize + MSE index pack + norm store + value pack
+# (eliminates 4 PyTorch kernel launches per layer vs pack-only kernel)
+# ═══════════════════════════════════════════════════════════════════════
+
+
+@triton.jit
+def _tq_fused_store_mse(
+    # Post-rotation inputs
+    Y_ptr,  # [NH, D] float32 — rotated normalized keys (x_hat @ PiT)
+    Norms_ptr,  # [NH] float32 — key vector norms (||k||)
+    Value_ptr,  # [NH, D] float32 — raw values
+    # Quantization tables
+    Midpoints_ptr,  # [n_centroids-1] float32
+    # Cache and indexing
+    KV_cache_ptr,  # [total_bytes] uint8 (flattened view)
+    Slot_mapping_ptr,  # [N] int32 — per-token slot indices
+    # Cache strides
+    stride_cache_block: tl.constexpr,
+    stride_cache_pos: tl.constexpr,
+    stride_cache_head: tl.constexpr,
+    # Dimensions
+    D: tl.constexpr,
+    H: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    BLOCK_D: tl.constexpr,
+    # TQ layout
+    MSE_BYTES: tl.constexpr,
+    KPS: tl.constexpr,
+    # Value quantization
+    VQB: tl.constexpr,
+    VAL_DATA_BYTES: tl.constexpr,
+    # Packing block sizes
+    BLOCK_VAL: tl.constexpr,
+    # MSE params
+    MSE_BITS: tl.constexpr,
+    N_CENTROIDS: tl.constexpr,
+    BLOCK_GRP: tl.constexpr = 16,
+):
+    """Fused MSE quantize + pack + store.
+
+    Performs binary-search bucketize, MSE index packing, norm storage,
+    and value quantization in one kernel.
+    """
+    pid = tl.program_id(0)
+    token_idx = pid // H
+    head_idx = pid % H
+
+    slot = tl.load(Slot_mapping_ptr + token_idx)
+    if slot < 0:
+        return
+    blk = (slot // BLOCK_SIZE).to(tl.int64)
+    off = (slot % BLOCK_SIZE).to(tl.int64)
+    head_idx_i64 = tl.cast(head_idx, tl.int64)
+    slot_base = (
+        blk * stride_cache_block
+        + off * stride_cache_pos
+        + head_idx_i64 * stride_cache_head
+    )
+
+    base = pid * D
+    d_offs = tl.arange(0, BLOCK_D)
+    d_mask = d_offs < D
+
+    # ── 1. BINARY SEARCH BUCKETIZE ───────────────────────────────────
+    # Midpoints are sorted (N_CENTROIDS-1 values); binary search finds
+    # insertion point in MSE_BITS iterations vs N_CENTROIDS-1 for linear.
+    y_vec = tl.load(Y_ptr + base + d_offs, mask=d_mask, other=0.0)
+    lo = tl.zeros([BLOCK_D], dtype=tl.int32)
+    hi = tl.full([BLOCK_D], N_CENTROIDS - 1, dtype=tl.int32)
+    for _ in range(MSE_BITS):
+        mid = (lo + hi) >> 1
+        # Clamp to valid midpoint index [0, N_CENTROIDS-2] for load safety;
+        # the search result (lo) is still correct since converged lanes
+        # don't change.
+        safe_mid = tl.minimum(mid, N_CENTROIDS - 2)
+        mid_val = tl.load(Midpoints_ptr + safe_mid, mask=d_mask, other=0.0)
+        lo = tl.where(y_vec >= mid_val, mid + 1, lo)
+        hi = tl.where(y_vec >= mid_val, hi, mid)
+    idx = tl.minimum(lo, N_CENTROIDS - 1)
+
+    # ── 2. PACK MSE INDICES from register idx ─────────────────────────
+    if MSE_BITS == 4:
+        idx_pairs = tl.reshape(idx, [BLOCK_D // 2, 2])
+        shifts_4 = tl.arange(0, 2) * 4
+        packed = tl.sum((idx_pairs & 0xF) << shifts_4[None, :], axis=1).to(tl.uint8)
+        mse_offs = tl.arange(0, BLOCK_D // 2)
+        mse_mask = mse_offs < MSE_BYTES
+        tl.store(KV_cache_ptr + slot_base + mse_offs, packed, mask=mse_mask)
+
+    elif MSE_BITS == 3:
+        grp_offs = tl.arange(0, BLOCK_GRP)
+        grp_mask = grp_offs < (D // 8)
+        idx_grp = tl.reshape(idx, [BLOCK_GRP, 8])
+        shifts_3 = tl.arange(0, 8) * 3
+        packed_24 = tl.sum((idx_grp & 0x7) << shifts_3[None, :], axis=1)
+        b0 = (packed_24 & 0xFF).to(tl.uint8)
+        b1 = ((packed_24 >> 8) & 0xFF).to(tl.uint8)
+        b2 = ((packed_24 >> 16) & 0xFF).to(tl.uint8)
+        tl.store(KV_cache_ptr + slot_base + grp_offs * 3, b0, mask=grp_mask)
+        tl.store(KV_cache_ptr + slot_base + grp_offs * 3 + 1, b1, mask=grp_mask)
+        tl.store(KV_cache_ptr + slot_base + grp_offs * 3 + 2, b2, mask=grp_mask)
+
+    # ── 3. STORE vec_norm (fp16, 2 bytes) ─────────────────────────────
+    norm_offset = MSE_BYTES
+
+    vn_f16 = tl.load(Norms_ptr + pid).to(tl.float16)
+    vn_u16 = vn_f16.to(tl.uint16, bitcast=True)
+    tl.store(KV_cache_ptr + slot_base + norm_offset, (vn_u16 & 0xFF).to(tl.uint8))
+    tl.store(
+        KV_cache_ptr + slot_base + norm_offset + 1, ((vn_u16 >> 8) & 0xFF).to(tl.uint8)
+    )
+
+    # ── 4. VALUE QUANTIZE + PACK ──────────────────────────────────────
+    _store_quantized_value(
+        Value_ptr,
+        KV_cache_ptr,
+        base,
+        slot_base,
+        d_offs,
+        d_mask,
+        D=D,
+        KPS=KPS,
+        VQB=VQB,
+        VAL_DATA_BYTES=VAL_DATA_BYTES,
+        BLOCK_D=BLOCK_D,
+        BLOCK_VAL=BLOCK_VAL,
+        BLOCK_GRP=BLOCK_GRP,
+    )
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# Launcher
+# ═══════════════════════════════════════════════════════════════════════
+
+
+def triton_turboquant_store(
+    key: torch.Tensor,  # [N, H, D] — raw keys (post-RoPE)
+    value: torch.Tensor,  # [N, H, D] — raw values
+    kv_cache: torch.Tensor,  # [num_blocks, block_size, Hk, padded_slot] uint8
+    slot_mapping: torch.Tensor,  # [N] int32
+    PiT: torch.Tensor,  # [D, D] float32
+    midpoints: torch.Tensor,  # [n_centroids-1] float32
+    mse_bits: int,
+    key_packed_size: int,
+    value_quant_bits: int,
+    key_fp8: bool = False,
+):
+    """Launch TQ store kernel (FP8 or MSE path)."""
+    N, H, D = key.shape
+    NH = N * H
+    block_size = kv_cache.shape[1]
+    BLOCK_D = triton.next_power_of_2(D)
+    mse_bytes = math.ceil(D * mse_bits / 8)
+    n_centroids = 2**mse_bits
+
+    val_data_bytes = math.ceil(D * value_quant_bits / 8)
+
+    BLOCK_VAL = triton.next_power_of_2(val_data_bytes)
+
+    # Cache strides (element_size=1 for uint8, so stride in bytes = stride())
+    stride_block = kv_cache.stride(0)
+    stride_pos = kv_cache.stride(1)
+    stride_head = kv_cache.stride(2)
+
+    block_grp = triton.next_power_of_2(D // 8) if D >= 8 else 1
+
+    # ── FP8 PATH: in-kernel FP8 cast + scatter via fp8 kernel ──
+    if key_fp8:
+        k_flat = key.reshape(NH, D).contiguous()
+        v_flat = value.reshape(NH, D).contiguous()
+
+        fp8_e4b15 = _use_fp8_e4b15(key.device.index or 0)
+
+        grid = (NH,)
+        _tq_fused_store_fp8[grid](
+            k_flat,
+            v_flat,
+            kv_cache.view(-1),
+            slot_mapping,
+            stride_cache_block=stride_block,
+            stride_cache_pos=stride_pos,
+            stride_cache_head=stride_head,
+            D=D,
+            H=H,
+            BLOCK_SIZE=block_size,
+            BLOCK_D=BLOCK_D,
+            KPS=key_packed_size,
+            VQB=value_quant_bits,
+            VAL_DATA_BYTES=val_data_bytes,
+            BLOCK_VAL=BLOCK_VAL,
+            BLOCK_GRP=block_grp,
+            FP8_E4B15=fp8_e4b15,
+            num_warps=4,
+            num_stages=1,
+        )
+        return
+
+    # ── MSE PATH: external GEMM + fused bucketize/pack kernel ──
+    # Normalize + rotation GEMM externally (cuBLAS is faster than in-kernel)
+    k_flat = key.float().reshape(NH, D)
+    norms = k_flat.norm(dim=1, keepdim=True)
+    x_hat = k_flat / (norms + 1e-8)
+    y = x_hat @ PiT
+
+    v_flat = value.float().reshape(NH, D)
+
+    # Fused kernel: bucketize + MSE index pack + norm store + value pack
+    grid = (NH,)
+    _tq_fused_store_mse[grid](
+        y,
+        norms.squeeze(1),
+        v_flat,
+        midpoints,
+        kv_cache.view(-1),
+        slot_mapping,
+        stride_cache_block=stride_block,
+        stride_cache_pos=stride_pos,
+        stride_cache_head=stride_head,
+        D=D,
+        H=H,
+        BLOCK_SIZE=block_size,
+        BLOCK_D=BLOCK_D,
+        MSE_BYTES=mse_bytes,
+        KPS=key_packed_size,
+        VQB=value_quant_bits,
+        VAL_DATA_BYTES=val_data_bytes,
+        BLOCK_VAL=BLOCK_VAL,
+        MSE_BITS=mse_bits,
+        N_CENTROIDS=n_centroids,
+        BLOCK_GRP=block_grp,
+        num_warps=4,
+        num_stages=1,
+    )
diff --git a/vllm/v1/attention/ops/triton_unified_attention.py b/vllm/v1/attention/ops/triton_unified_attention.py
index ca5d0e336713..56f1d1c1d084 100644
--- a/vllm/v1/attention/ops/triton_unified_attention.py
+++ b/vllm/v1/attention/ops/triton_unified_attention.py
@@ -7,12 +7,28 @@
 #  - Chih-Chieh Yang <chih.chieh.yang@ibm.com>
 #  - Thomas Parnell <tpa@zurich.ibm.com>
 
+from typing import Any
+
 import torch
 
 import vllm.envs as envs
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
+from vllm.v1.attention.ops.triton_attention_helpers import (
+    apply_alibi_to_score,
+    apply_softcap,
+    cdiv_fn,
+    compute_kv_seq_mask,
+    compute_tile_loop_bounds,
+    find_seq_idx,
+    init_softmax_M,
+    load_qq_bias_tile,
+    resolve_seq_and_query_len,
+    softmax_step,
+    store_segm_reduce_scalars,
+)
+from vllm.v1.kv_cache_interface import KVQuantMode
 
 logger = init_logger(__name__)
 is_batch_invariant = envs.VLLM_BATCH_INVARIANT
@@ -20,57 +36,167 @@
 
 
 @triton.jit
-def cdiv_fn(x, y):
-    return (x + y - 1) // y
+def _cast_kv_tile(data, Q, tensor_scale, KV_QUANT_MODE: tl.constexpr):
+    """Cast a loaded KV tile to Q's dtype, dequantizing if needed.
+
+    Modes handled inside the core kernel:
+
+    - ``KV_QUANT_MODE == 0`` (NONE) and ``2`` (INT8 per-token-head) and
+      ``3`` (FP8 per-token-head): plain cast.  Per-token-head modes apply
+      their scales separately on S/P inside the loop.
+    - ``KV_QUANT_MODE == 1`` (FP8 per-tensor): dequantize using the
+      tensor-wide scale, unless Q is also FP8 and the caller folds the scales
+      into the attention score and output accumulator.
+    """
+    if KV_QUANT_MODE == 1:
+        if Q.dtype.is_fp8():
+            return data.to(Q.dtype)
+        return (data.to(tl.float32) * tl.load(tensor_scale)).to(Q.dtype)
+    return data.to(Q.dtype)
+
+
+# ---------------------------------------------------------------------------
+# Tensor-descriptor (TD) helpers
+#
+# Used when the caller enables ``USE_TD`` (Intel Xe2/Xe3 HW 2D block reads).
+# When ``USE_TD`` is False the helpers are dead-code-eliminated at Triton
+# compile time, leaving the pointer-arithmetic load/store path untouched.
+# ---------------------------------------------------------------------------
 
 
 @triton.jit
-def apply_softcap(S, x):
-    Sdiv = S / x
-    p1 = tl.exp(Sdiv)
-    p2 = tl.exp(-Sdiv)
-    return x * (p1 - p2) / (p1 + p2)
+def _load_q_td(
+    query_ptr,
+    q_block_local_len,
+    query_stride_0: tl.int64,
+    query_stride_1: tl.int64,
+    cur_batch_in_all_start_index,
+    q_block_local_idx,
+    kv_head_idx,
+    num_queries_per_kv: tl.constexpr,
+    BLOCK_Q: tl.constexpr,
+    BLOCK_M: tl.constexpr,
+    HEAD_SIZE: tl.constexpr,
+    HEAD_SIZE_PADDED: tl.constexpr,
+):
+    """Load Q via a 2D tensor descriptor.
+
+    Caller guarantees (via the wrapper's ``use_td_qo`` gate):
+      * ``HEAD_SIZE == HEAD_SIZE_PADDED`` (head_size is a power of 2),
+      * ``num_queries_per_kv`` is a power of 2,
+      * the ``num_queries_per_kv`` heads of the current KV group are
+        contiguous in memory (``query_stride_1 == HEAD_SIZE``, which is
+        the default vLLM query layout).
+
+    Under those preconditions the inner two axes are flattened into one
+    row of size ``num_queries_per_kv * HEAD_SIZE`` with stride 1, which
+    avoids the non-power-of-2 ``block_shape`` error from the Triton
+    tensor-descriptor validator.  Returns (BLOCK_M, HEAD_SIZE_PADDED).
+    """
+    q_base = (
+        query_ptr
+        + (cur_batch_in_all_start_index + q_block_local_idx * BLOCK_Q) * query_stride_0
+        + (kv_head_idx * num_queries_per_kv) * query_stride_1
+    )
+    q_desc = tl.make_tensor_descriptor(
+        base=q_base,
+        shape=(q_block_local_len, num_queries_per_kv * HEAD_SIZE),
+        strides=(query_stride_0, 1),
+        block_shape=(BLOCK_Q, num_queries_per_kv * HEAD_SIZE_PADDED),
+    )
+    return q_desc.load([0, 0]).reshape(BLOCK_M, HEAD_SIZE_PADDED)
 
 
 @triton.jit
-def find_seq_idx(
-    query_start_len_ptr,
-    target_idx,
-    num_seqs,
+def _load_kv_tile_td(
+    cache_ptr,
+    physical_block_idx_scalar,
+    kv_head_idx,
+    offset_in_block,
+    stride_cache_0: tl.int64,
+    stride_cache_1: tl.int64,
+    stride_cache_2: tl.int64,
+    stride_cache_3: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+    TILE_SIZE: tl.constexpr,
+    HEAD_SIZE: tl.constexpr,
+    HEAD_SIZE_PADDED: tl.constexpr,
+):
+    """Load a KV cache tile via tensor descriptor.
+
+    Returns shape (TILE_SIZE, HEAD_SIZE_PADDED). Caller transposes for K.
+    Tensor descriptors zero-pad reads beyond the shape boundary, so
+    ``HEAD_SIZE_PADDED > HEAD_SIZE`` is handled correctly.
+    """
+    base = (
+        cache_ptr
+        + physical_block_idx_scalar * stride_cache_0
+        + kv_head_idx * stride_cache_2
+    )
+    desc = tl.make_tensor_descriptor(
+        base=base,
+        shape=(BLOCK_SIZE, HEAD_SIZE),
+        strides=(stride_cache_1, stride_cache_3),
+        block_shape=(TILE_SIZE, HEAD_SIZE_PADDED),
+    )
+    return desc.load([offset_in_block, 0])
+
+
+@triton.jit
+def _store_output_td(
+    base_ptr,
+    acc,
+    q_block_local_len,
+    stride_token: tl.int64,
+    stride_head: tl.int64,
+    num_queries_per_kv: tl.constexpr,
     BLOCK_Q: tl.constexpr,
-    use_q_block_mode: tl.constexpr,
+    HEAD_SIZE: tl.constexpr,
+    HEAD_SIZE_PADDED: tl.constexpr,
 ):
-    left: tl.int32 = 0
-    right = num_seqs
-    while left < right:
-        mid = (left + right) // 2
-        val = tl.load(query_start_len_ptr + mid)
-        mid_val = val // BLOCK_Q + mid if use_q_block_mode else val
-
-        if mid_val <= target_idx:
-            left = mid + 1
-        else:
-            right = mid
+    """Store an output tile via a tensor descriptor.
 
-    return left - 1
+    The 2D and 3D epilogues differ only in ``base_ptr`` and the
+    ``(stride_token, stride_head)`` pair: 2D writes directly to the
+    flat output buffer, 3D writes to a single per-segment slice of
+    ``segm_output_ptr``.  Descriptor shape / block_shape / reshape
+    are the same in both modes, so share one helper.
+    """
+    acc = acc.to(base_ptr.dtype.element_ty)
+    output_desc = tl.make_tensor_descriptor(
+        base=base_ptr,
+        shape=(q_block_local_len, num_queries_per_kv, HEAD_SIZE),
+        strides=(stride_token, stride_head, 1),
+        block_shape=(BLOCK_Q, num_queries_per_kv, HEAD_SIZE_PADDED),
+    )
+    output_desc.store(
+        [0, 0, 0],
+        acc.reshape(BLOCK_Q, num_queries_per_kv, HEAD_SIZE_PADDED),
+    )
 
 
 @triton.jit
-def kernel_unified_attention_2d(
-    output_ptr,  # [num_tokens, num_query_heads, head_size]
-    query_ptr,  # [num_tokens, num_query_heads, head_size]
-    key_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
-    value_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
-    sink_ptr,  # [num_query_heads]
-    block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
-    seq_lens_ptr,  # [num_seqs]
-    alibi_slopes_ptr,  # [num_query_heads]
-    qq_bias_ptr,  # [num_query_tokens, num_query_tokens]
-    scale,  # float32
-    k_scale,  # float32
-    v_scale,  # float32
-    out_scale,  # float32
-    softcap,  # float32
+def kernel_unified_attention(
+    # Output destination for the 2D path.  In 3D mode per-segment partials
+    # go to the ``segm_*`` tensors (see bottom of signature) and
+    # ``output_ptr`` is unused (callers may pass any non-null pointer).
+    output_ptr,
+    # Inputs
+    query_ptr,
+    key_cache_ptr,
+    value_cache_ptr,
+    sink_ptr,
+    block_tables_ptr,
+    seq_lens_ptr,
+    alibi_slopes_ptr,
+    qq_bias_ptr,
+    # Scalars
+    scale,
+    q_scale,
+    k_scale,
+    v_scale,
+    out_scale,
+    softcap,
     num_query_heads: tl.constexpr,  # int
     num_queries_per_kv: tl.constexpr,  # int
     block_table_stride: tl.int64,  # int
@@ -91,7 +217,7 @@ def kernel_unified_attention_2d(
     SLIDING_WINDOW: tl.constexpr,  # int
     USE_MM_PREFIX: tl.constexpr,  # bool
     MAX_MM_RANGES: tl.constexpr,  # int
-    mm_prefix_range_ptr,  # [num_seqs] - prefix length for each sequence
+    mm_prefix_range_ptr,
     stride_k_cache_0: tl.int64,  # int
     stride_k_cache_1: tl.int64,  # int
     stride_k_cache_2: tl.int64,  # int
@@ -100,33 +226,91 @@ def kernel_unified_attention_2d(
     stride_v_cache_1: tl.int64,  # int
     stride_v_cache_2: tl.int64,  # int
     stride_v_cache_3: tl.constexpr,  # int
-    query_start_len_ptr,  # [num_seqs+1]
-    BLOCK_Q: tl.constexpr,  # int
+    query_start_len_ptr,
+    BLOCK_Q: tl.constexpr,
     num_seqs: tl.int32,
-    BLOCK_M: tl.constexpr,  # int
-    USE_FP8: tl.constexpr,  # bool
+    BLOCK_M: tl.constexpr,
+    NUM_SEGMENTS_PER_SEQ: tl.constexpr,
+    USE_FP8: tl.constexpr,
+    # Toggles 2D vs 3D layout.  The 2D path runs the full sequence in one
+    # tile loop and writes to ``output_ptr``.  The 3D path scopes the loop
+    # to ``[segm_idx, segm_idx+1) × tiles_per_segment`` and writes
+    # per-segment partials, finalized by ``reduce_segments``.
+    IS_3D: tl.constexpr,
+    # Parameters below default to None so Triton can skip materialising them
+    # on call sites where the corresponding constexpr branch is dead.
+    # Credit: @quinnlp identified this as a perf regression source in
+    # intel/intel-xpu-backend-for-triton#6758 (review comment r3204641104).
+    # Per-segment outputs: used in 3D mode; unused in 2D (IS_3D=False).
+    segm_output_ptr=None,
+    segm_max_ptr=None,
+    segm_expsum_ptr=None,
+    # Per-(token, head) scale caches: used iff KV_QUANT_MODE in {2, 3}.
+    k_scale_cache_ptr=None,
+    v_scale_cache_ptr=None,
+    stride_ks_blk: tl.int64 = None,
+    stride_ks_slot: tl.int64 = None,
+    stride_ks_head: tl.int64 = None,
+    stride_vs_blk: tl.int64 = None,
+    stride_vs_slot: tl.int64 = None,
+    stride_vs_head: tl.int64 = None,
+    # KV cache quantization mode handled inside this kernel via constexpr
+    # branches: NONE (0), FP8_PER_TENSOR (1), INT8_PER_TOKEN_HEAD (2),
+    # FP8_PER_TOKEN_HEAD (3).
+    KV_QUANT_MODE: tl.constexpr = 0,
     FP8_MIN: tl.constexpr = float8_info.min,
     FP8_MAX: tl.constexpr = float8_info.max,
+    # Chunked / block-local attention.  ``CHUNK_LOOKBACK >= 0`` enables
+    # chunked masking (used by Gemma3 block-local layers); takes precedence
+    # over ``SLIDING_WINDOW`` inside the helpers.  ``-1`` disables.
+    CHUNK_LOOKBACK: tl.constexpr = -1,
+    CHUNK_SIZE: tl.constexpr = -1,
+    # Tensor-descriptor load/store for HW 2D block reads on Intel Xe2/Xe3.
+    # ``USE_TD`` gates KV tile loads; ``USE_TD_QO`` separately gates Q/output
+    # (see ``unified_attention`` wrapper for the gating rules).
+    USE_TD: tl.constexpr = False,
+    USE_TD_QO: tl.constexpr = False,
+    Q_IS_FP8: tl.constexpr = False,
 ):
+    USE_PER_TOKEN_HEAD_SCALES: tl.constexpr = KV_QUANT_MODE >= 2
+    USE_FP8_Q_DESCALE: tl.constexpr = KV_QUANT_MODE == 1 and Q_IS_FP8
+
+    if USE_TD:
+        tl.static_assert(
+            BLOCK_SIZE % TILE_SIZE == 0,
+            "USE_TD requires BLOCK_SIZE to be a multiple of TILE_SIZE",
+        )
+
     q_block_global_idx = tl.program_id(0)
     kv_head_idx = tl.program_id(1)
-
-    seq_idx = find_seq_idx(
-        query_start_len_ptr, q_block_global_idx, num_seqs, BLOCK_Q, True
+    segm_idx = tl.program_id(2) if IS_3D else 0
+
+    (
+        seq_idx,
+        q_block_local_idx,
+        cur_batch_in_all_start_index,
+        cur_batch_query_len,
+        seq_len,
+    ) = resolve_seq_and_query_len(
+        query_start_len_ptr, seq_lens_ptr, q_block_global_idx, num_seqs, BLOCK_Q
     )
 
-    q_block_start_idx = tl.load(query_start_len_ptr + seq_idx) // BLOCK_Q + seq_idx
-
-    q_block_local_idx = q_block_global_idx - q_block_start_idx
-
-    cur_batch_in_all_start_index = tl.load(query_start_len_ptr + seq_idx)
-    cur_batch_in_all_stop_index = tl.load(query_start_len_ptr + seq_idx + 1)
-
-    cur_batch_query_len = cur_batch_in_all_stop_index - cur_batch_in_all_start_index
-
     if q_block_local_idx * BLOCK_Q >= cur_batch_query_len:
         return
 
+    if IS_3D:
+        tiles_per_segment = cdiv_fn(seq_len, NUM_SEGMENTS_PER_SEQ * TILE_SIZE)
+        if segm_idx * tiles_per_segment * TILE_SIZE >= seq_len:
+            return
+    else:
+        tiles_per_segment = 0
+
+    # Number of valid query rows in this block (used by TD descriptor
+    # shapes, but always computed so the variable stays in scope).
+    q_block_local_len = tl.minimum(
+        BLOCK_Q, cur_batch_query_len - q_block_local_idx * BLOCK_Q
+    )
+
     offs_m = tl.arange(0, BLOCK_M)
     offs_d = tl.arange(0, HEAD_SIZE_PADDED)
     offs_t = tl.arange(0, TILE_SIZE)
@@ -145,92 +329,72 @@ def kernel_unified_attention_2d(
     query_mask_1 = tl.where(query_offset_1 < num_query_heads, 1, 0).to(tl.int1)
 
     # Q : (BLOCK_M, HEAD_SIZE_PADDED)
-    Q = tl.load(
-        query_ptr + query_offset,
-        mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
-        other=0.0,
-    )
+    if USE_TD_QO:
+        Q = _load_q_td(
+            query_ptr,
+            q_block_local_len,
+            query_stride_0,
+            query_stride_1,
+            cur_batch_in_all_start_index,
+            q_block_local_idx,
+            kv_head_idx,
+            num_queries_per_kv,
+            BLOCK_Q,
+            BLOCK_M,
+            HEAD_SIZE,
+            HEAD_SIZE_PADDED,
+        )
+    else:
+        Q = tl.load(
+            query_ptr + query_offset,
+            mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
+            other=0.0,
+        )
 
     block_table_offset = seq_idx * block_table_stride
 
-    if not USE_SINKS:
-        M = tl.full([BLOCK_M], float("-inf"), dtype=tl.float32)
-    else:
-        M = tl.load(
-            sink_ptr + query_offset_1,
-            mask=query_mask_1,
-            other=float("-inf"),
-        ).to(dtype=tl.float32)
-
+    M = init_softmax_M(
+        sink_ptr, query_offset_1, query_mask_1, segm_idx, BLOCK_M, USE_SINKS, IS_3D
+    )
     L = tl.full([BLOCK_M], 1.0, dtype=tl.float32)
+    # acc : (BLOCK_M, HEAD_SIZE_PADDED)
     acc = tl.zeros([BLOCK_M, HEAD_SIZE_PADDED], dtype=tl.float32)
+    score_scale = scale
+    value_scale = 1.0
+    if USE_FP8_Q_DESCALE:
+        score_scale = scale * tl.load(q_scale) * tl.load(k_scale)
+        value_scale = tl.load(v_scale)
 
-    # sequence len for this particular sequence
-    seq_len = tl.load(seq_lens_ptr + seq_idx)
-
-    # context length for this particular sequences
     context_len = seq_len - cur_batch_query_len
 
-    # alibi slope for this head
     if USE_ALIBI_SLOPES:
         alibi_slope = tl.load(
             alibi_slopes_ptr + query_offset_1, mask=query_mask_1, other=0.0
         )
 
-    # query-query attention bias
     if USE_QQ_BIAS:
-        qq_bias_row_ptrs = (
-            qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0
-        )  # shape: [BLOCK_M]
-
-    # compute the length of the longest sequence prefix spanned by any
-    # query token in the current q_block (q_block_local_idx)
-    max_seq_prefix_len = (
-        context_len
-        + q_block_local_idx * BLOCK_Q
-        + (BLOCK_M - 1) // num_queries_per_kv
-        + 1
+        qq_bias_row_ptrs = qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0
+
+    loop_lo, loop_hi, max_seq_prefix_len = compute_tile_loop_bounds(
+        context_len,
+        seq_len,
+        cur_batch_query_len,
+        q_block_local_idx,
+        segm_idx,
+        tiles_per_segment,
+        TILE_SIZE,
+        BLOCK_M,
+        BLOCK_Q,
+        num_queries_per_kv,
+        SLIDING_WINDOW,
+        USE_MM_PREFIX,
+        IS_3D,
+        CHUNK_LOOKBACK,
+        CHUNK_SIZE,
     )
 
-    if USE_MM_PREFIX:
-        # image bidirectional attention ranges require a full range
-        # including q_block padding to make sure doc mask is correct
-        max_seq_prefix_len = tl.maximum(max_seq_prefix_len, seq_len)
-    else:
-        # adjust for potential padding in the last q_block by considering the
-        # actual sequence length
-        max_seq_prefix_len = tl.minimum(max_seq_prefix_len, seq_len)
-
-    # calculate the number of tiles that need to be processed to
-    # cover the longest sequence prefix (due to causal masking, tiles beyond
-    # this prefix can be skipped)
-    num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE)
-
-    # ---- Sliding-window tile pruning --------------------
-    # Default: keep previous global behavior
-    tile_start = 0
-    tile_end = num_tiles
-    # TODO(Isotr0py): sliding window pruning with image bidirectional mask
-    if SLIDING_WINDOW > 0 and not USE_MM_PREFIX:
-        # Query rows covered by this Q-block
-        qpos_lo = q_block_local_idx * BLOCK_Q
-        qpos_hi = tl.minimum(
-            qpos_lo + (BLOCK_M - 1) // num_queries_per_kv,
-            cur_batch_query_len - 1,
-        )
-        # For sliding window, each query position q can only attend to
-        # keys in the range [q_abs - SLIDING_WINDOW + 1, q_abs]
-        # where q_abs = context_len + q
-        # The union of allowed key positions for this Q-block is:
-        # [context_len + qpos_lo - SLIDING_WINDOW + 1, context_len + qpos_hi]
-        first_allowed_key = context_len + qpos_lo - SLIDING_WINDOW + 1
-        last_allowed_key = context_len + qpos_hi
-        # Convert to tile indices and clamp
-        tile_start = tl.maximum(0, first_allowed_key // TILE_SIZE)
-        tile_end = tl.minimum((last_allowed_key // TILE_SIZE) + 1, num_tiles)
-
     # iterate through tiles (now limited to the sliding window range)
-    for j in range(tile_start, tile_end):
+    for j in range(loop_lo, loop_hi):
         seq_offset = j * TILE_SIZE + offs_t
         tile_mask = seq_offset < max_seq_prefix_len
 
@@ -238,87 +402,113 @@ def kernel_unified_attention_2d(
             block_tables_ptr + block_table_offset + seq_offset // BLOCK_SIZE
         ).to(tl.int64)
 
-        v_offset = (
-            physical_block_idx[:, None] * stride_v_cache_0
-            + kv_head_idx * stride_v_cache_2
-            + offs_d[None, :] * stride_v_cache_3
-            + (seq_offset % BLOCK_SIZE)[:, None] * stride_v_cache_1
-        )
-
-        k_offset = (
-            physical_block_idx[None, :] * stride_k_cache_0
-            + kv_head_idx * stride_k_cache_2
-            + offs_d[:, None] * stride_k_cache_3
-            + (seq_offset % BLOCK_SIZE)[None, :] * stride_k_cache_1
-        )
-
-        # K : (HEAD_SIZE, TILE_SIZE)
-        K_load = tl.load(
-            key_cache_ptr + k_offset,
-            mask=dim_mask[:, None] & tile_mask[None, :],
-            other=0.0,
-        )
-
-        if K_load.dtype.is_fp8():
-            if Q.dtype.is_fp8():
-                K = K_load
-            else:
-                K = (K_load.to(tl.float32) * tl.load(k_scale)).to(Q.dtype)
-        else:
-            K = K_load
-
-        # V : (TILE_SIZE, HEAD_SIZE)
-        V_load = tl.load(
-            value_cache_ptr + v_offset,
-            mask=dim_mask[None, :] & tile_mask[:, None],
-            other=0.0,
-        )
-
-        if V_load.dtype.is_fp8():
-            if Q.dtype.is_fp8():
-                V = V_load
-            else:
-                V = (V_load.to(tl.float32) * tl.load(v_scale)).to(Q.dtype)
+        if USE_TD:
+            # All TILE_SIZE slots within a single KV tile map to one
+            # physical block (guaranteed by ``BLOCK_SIZE % TILE_SIZE == 0``
+            # from the static_assert above), so load the block index as
+            # a scalar instead of a broadcast reduction.
+            offset_in_block = (j * TILE_SIZE) % BLOCK_SIZE
+            physical_block_scalar = tl.load(
+                block_tables_ptr + block_table_offset + (j * TILE_SIZE) // BLOCK_SIZE
+            ).to(tl.int64)
+            # K : (HEAD_SIZE, TILE_SIZE)
+            K_load = _load_kv_tile_td(
+                key_cache_ptr,
+                physical_block_scalar,
+                kv_head_idx,
+                offset_in_block,
+                stride_k_cache_0,
+                stride_k_cache_1,
+                stride_k_cache_2,
+                stride_k_cache_3,
+                BLOCK_SIZE,
+                TILE_SIZE,
+                HEAD_SIZE,
+                HEAD_SIZE_PADDED,
+            ).T
+            # V : (TILE_SIZE, HEAD_SIZE)
+            V_load = _load_kv_tile_td(
+                value_cache_ptr,
+                physical_block_scalar,
+                kv_head_idx,
+                offset_in_block,
+                stride_v_cache_0,
+                stride_v_cache_1,
+                stride_v_cache_2,
+                stride_v_cache_3,
+                BLOCK_SIZE,
+                TILE_SIZE,
+                HEAD_SIZE,
+                HEAD_SIZE_PADDED,
+            )
         else:
-            V = V_load
+            v_offset = (
+                physical_block_idx[:, None] * stride_v_cache_0
+                + kv_head_idx * stride_v_cache_2
+                + offs_d[None, :] * stride_v_cache_3
+                + (seq_offset % BLOCK_SIZE)[:, None] * stride_v_cache_1
+            )
+            k_offset = (
+                physical_block_idx[None, :] * stride_k_cache_0
+                + kv_head_idx * stride_k_cache_2
+                + offs_d[:, None] * stride_k_cache_3
+                + (seq_offset % BLOCK_SIZE)[None, :] * stride_k_cache_1
+            )
+            # K : (HEAD_SIZE, TILE_SIZE)
+            K_load = tl.load(
+                key_cache_ptr + k_offset,
+                mask=dim_mask[:, None] & tile_mask[None, :],
+                other=0.0,
+            )
+            # V : (TILE_SIZE, HEAD_SIZE)
+            V_load = tl.load(
+                value_cache_ptr + v_offset,
+                mask=dim_mask[None, :] & tile_mask[:, None],
+                other=0.0,
+            )
+        K = _cast_kv_tile(K_load, Q, k_scale, KV_QUANT_MODE)
+        V = _cast_kv_tile(V_load, Q, v_scale, KV_QUANT_MODE)
+
+        # Per-(token, head) scales for INT8 / FP8 per-token-head modes.
+        if USE_PER_TOKEN_HEAD_SCALES:
+            scale_idx = (
+                physical_block_idx * stride_ks_blk
+                + (seq_offset % BLOCK_SIZE) * stride_ks_slot
+                + kv_head_idx * stride_ks_head
+            )
+            k_token_head_scales = tl.load(
+                k_scale_cache_ptr + scale_idx, mask=tile_mask, other=1.0
+            )
+            v_scale_idx = (
+                physical_block_idx * stride_vs_blk
+                + (seq_offset % BLOCK_SIZE) * stride_vs_slot
+                + kv_head_idx * stride_vs_head
+            )
+            v_token_head_scales = tl.load(
+                v_scale_cache_ptr + v_scale_idx, mask=tile_mask, other=1.0
+            )
 
-        # Compute attention mask: causal by default (key <= query)
         query_abs_pos = context_len + query_pos[:, None]
-        seq_mask = seq_offset[None, :] <= query_abs_pos
-
-        # Apply sliding window to base mask BEFORE mm_prefix OR.
-        # Order must match FlexAttention: (causal AND sliding_window) OR mm_prefix
-        if SLIDING_WINDOW > 0:
-            seq_mask = seq_mask & ((query_abs_pos - seq_offset) < SLIDING_WINDOW)
-
-        # PrefixLM: extend mask with bidirectional ranges for multimodal tokens.
-        # Applied AFTER sliding window so mm_prefix ranges override SW restriction.
-        if USE_MM_PREFIX:
-            for i in range(MAX_MM_RANGES):
-                range_start = tl.load(
-                    mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2
-                )
-                range_end = tl.load(
-                    mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2 + 1
-                )
-
-                is_valid = range_start < range_end
-                q_in_range = (
-                    (query_abs_pos >= range_start)
-                    & (query_abs_pos <= range_end)
-                    & is_valid
-                )
-                k_in_range = (
-                    (seq_offset[None, :] >= range_start)
-                    & (seq_offset[None, :] <= range_end)
-                    & is_valid
-                )
-                seq_mask |= q_in_range & k_in_range
+        seq_mask = compute_kv_seq_mask(
+            query_abs_pos,
+            seq_offset,
+            seq_idx,
+            mm_prefix_range_ptr,
+            SLIDING_WINDOW,
+            USE_MM_PREFIX,
+            MAX_MM_RANGES,
+            CHUNK_LOOKBACK,
+            CHUNK_SIZE,
+        )
 
         # S : (BLOCK_M, TILE_SIZE)
         S = tl.zeros(shape=(BLOCK_M, TILE_SIZE), dtype=tl.float32)
-
-        S += scale * tl.dot(Q, K)
+        if USE_PER_TOKEN_HEAD_SCALES:
+            # Per-token-head quant: fuse softmax_scale with per-head k_scale
+            # to avoid a separate BLOCK_M × TILE_SIZE multiply on S.
+            S += tl.dot(Q, K) * (score_scale * k_token_head_scales[None, :])
+        else:
+            S += score_scale * tl.dot(Q, K)
 
         if USE_SOFTCAP:
             S = apply_softcap(S, softcap)
@@ -328,436 +518,129 @@ def kernel_unified_attention_2d(
         )
 
         if USE_ALIBI_SLOPES:
-            if USE_ALIBI_SQRT:
-                relative_pos = seq_offset - (context_len + query_pos[:, None])
-                alibi_offset = tl.where(
-                    relative_pos <= 0,
-                    -tl.sqrt((-relative_pos).to(tl.float32)),
-                    0.0,
-                )
-            else:
-                alibi_offset = seq_offset - context_len
-            S += alibi_slope[:, None] * alibi_offset
+            S = apply_alibi_to_score(
+                S, alibi_slope, seq_offset, context_len, query_pos, USE_ALIBI_SQRT
+            )
 
         if USE_QQ_BIAS:
-            # compute key positions relative to query section
-            key_rel_pos = seq_offset - context_len  # shape: [BLOCK_SIZE]
-            # load bias only for keys that correspond to queries
-            is_query_key = key_rel_pos >= 0 and key_rel_pos < qq_bias_stride_0
-            qq_bias = tl.load(
-                qq_bias_row_ptrs + key_rel_pos[None, :],
-                mask=is_query_key[None, :],  # avoid OOB for context keys
-                other=0.0,
+            S += load_qq_bias_tile(
+                qq_bias_row_ptrs, seq_offset, context_len, qq_bias_stride_0
             )
-            S += qq_bias
-
-        # compute running maximum
-        # m_j : (BLOCK_M,)
-        m_j = tl.maximum(M, tl.max(S, axis=1))
-
-        # For sliding window there's a chance the max is -inf due to masking of
-        # the entire row. In this case we need to set m_j 0 to avoid NaN
-        m_j = tl.where(m_j > float("-inf"), m_j, 0.0)
 
-        # P : (BLOCK_M, TILE_SIZE)
-        P = tl.exp(S - m_j[:, None])
-
-        # l_j : (BLOCK_M,)
-        l_j = tl.sum(P, axis=1)
-
-        # alpha : (BLOCK_M, )
-        alpha = tl.exp(M - m_j)
-
-        # acc : (BLOCK_M, HEAD_SIZE_PADDED)
+        M, L, P, alpha = softmax_step(S, M, L)
         acc = acc * alpha[:, None]
 
-        # update constants
-        L = L * alpha + l_j
-        M = m_j
-
         if SLIDING_WINDOW:
             qpos_lo = q_block_local_idx * BLOCK_Q
             V = tl.where(
-                (context_len + qpos_lo - seq_offset[:, None]) < SLIDING_WINDOW, V, 0.0
+                (context_len + qpos_lo - seq_offset[:, None]) < SLIDING_WINDOW,
+                V,
+                0.0,
             )
-
-        # acc : (BLOCK_M, HEAD_SIZE_PADDED)
-        acc += tl.dot(P.to(V.dtype), V)
-
-    # epilogue
-    acc = acc / L[:, None]
-    if USE_FP8:
-        acc = acc * tl.load(out_scale)
-        acc = tl.clamp(acc, FP8_MIN, FP8_MAX)
-
-    output_offset = (
-        query_offset_0[:, None] * output_stride_0
-        + query_offset_1[:, None] * output_stride_1
-        + offs_d[None, :]
-    )
-
-    tl.store(
-        output_ptr + output_offset,
-        acc,
-        mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
-    )
-
-
-@triton.jit
-def kernel_unified_attention_3d(
-    segm_output_ptr,
-    # [num_tokens, num_query_heads, num_segments, head_size_padded]
-    segm_max_ptr,  # [num_tokens, num_query_heads, num_segments]
-    segm_expsum_ptr,  # [num_tokens, num_query_heads, num_segments]
-    query_ptr,  # [num_tokens, num_query_heads, head_size]
-    key_cache_ptr,  # [num_blks, num_kv_heads, head_size // x, blk_size, x]
-    value_cache_ptr,  # [num_blks, num_kv_heads, head_size, blk_size]
-    sink_ptr,  # [num_query_heads]
-    block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
-    seq_lens_ptr,  # [num_seqs]
-    alibi_slopes_ptr,  # [num_query_heads]
-    qq_bias_ptr,  # [num_query_tokens, num_query_tokens]
-    scale,  # float32
-    k_scale,  # float32
-    v_scale,  # float32
-    softcap,  # float32
-    num_query_heads: tl.constexpr,  # int
-    num_queries_per_kv: tl.constexpr,  # int
-    block_table_stride: tl.int64,  # int
-    query_stride_0: tl.int64,  # int
-    query_stride_1: tl.int64,  # int, should be equal to head_size
-    qq_bias_stride_0: tl.int64,  # int
-    BLOCK_SIZE: tl.constexpr,  # int
-    TILE_SIZE: tl.constexpr,  # int, must be power of 2
-    HEAD_SIZE: tl.constexpr,  # int
-    HEAD_SIZE_PADDED: tl.constexpr,  # int, must be power of 2
-    USE_ALIBI_SLOPES: tl.constexpr,  # bool
-    USE_ALIBI_SQRT: tl.constexpr,  # bool
-    USE_QQ_BIAS: tl.constexpr,  # bool
-    USE_SOFTCAP: tl.constexpr,  # bool
-    USE_SINKS: tl.constexpr,  # bool
-    SLIDING_WINDOW: tl.constexpr,  # int
-    stride_k_cache_0: tl.int64,  # int
-    stride_k_cache_1: tl.int64,  # int
-    stride_k_cache_2: tl.int64,  # int
-    stride_k_cache_3: tl.constexpr,  # int
-    stride_v_cache_0: tl.int64,  # int
-    stride_v_cache_1: tl.int64,  # int
-    stride_v_cache_2: tl.int64,  # int
-    stride_v_cache_3: tl.constexpr,  # int
-    query_start_len_ptr,  # [num_seqs+1]
-    BLOCK_Q: tl.constexpr,  # int
-    num_seqs: tl.int32,
-    BLOCK_M: tl.constexpr,  # int
-    NUM_SEGMENTS_PER_SEQ: tl.constexpr,  # int
-    USE_MM_PREFIX: tl.constexpr,  # bool
-    MAX_MM_RANGES: tl.constexpr,  # int
-    mm_prefix_range_ptr,  # [num_seqs] - prefix length for each sequence
-):
-    q_block_global_idx = tl.program_id(0)
-    kv_head_idx = tl.program_id(1)
-    segm_idx = tl.program_id(2)
-
-    seq_idx = find_seq_idx(
-        query_start_len_ptr, q_block_global_idx, num_seqs, BLOCK_Q, True
-    )
-
-    q_block_start_idx = tl.load(query_start_len_ptr + seq_idx) // BLOCK_Q + seq_idx
-
-    q_block_local_idx = q_block_global_idx - q_block_start_idx
-
-    cur_batch_in_all_start_index = tl.load(query_start_len_ptr + seq_idx)
-    cur_batch_in_all_stop_index = tl.load(query_start_len_ptr + seq_idx + 1)
-
-    cur_batch_query_len = cur_batch_in_all_stop_index - cur_batch_in_all_start_index
-
-    if q_block_local_idx * BLOCK_Q >= cur_batch_query_len:
-        return
-
-    # sequence len for this particular sequence
-    seq_len = tl.load(seq_lens_ptr + seq_idx)
-
-    # number of segments for this particular sequence
-    num_segments = NUM_SEGMENTS_PER_SEQ
-    tiles_per_segment = cdiv_fn(seq_len, num_segments * TILE_SIZE)
-
-    if segm_idx * tiles_per_segment * TILE_SIZE >= seq_len:
-        return
-
-    offs_m = tl.arange(0, BLOCK_M)
-    offs_d = tl.arange(0, HEAD_SIZE_PADDED)
-    offs_t = tl.arange(0, TILE_SIZE)
-    query_pos = q_block_local_idx * BLOCK_Q + offs_m // num_queries_per_kv
-
-    query_offset_0 = cur_batch_in_all_start_index + query_pos
-    query_offset_1 = kv_head_idx * num_queries_per_kv + offs_m % num_queries_per_kv
-    query_offset = (
-        query_offset_0[:, None] * query_stride_0
-        + query_offset_1[:, None] * query_stride_1
-        + offs_d[None, :]
-    )
-
-    dim_mask = tl.where(offs_d < HEAD_SIZE, 1, 0).to(tl.int1)
-    query_mask_0 = tl.where(query_pos < cur_batch_query_len, 1, 0).to(tl.int1)
-    query_mask_1 = tl.where(query_offset_1 < num_query_heads, 1, 0).to(tl.int1)
-
-    # Q : (BLOCK_M, HEAD_SIZE_PADDED)
-    Q = tl.load(
-        query_ptr + query_offset,
-        mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
-        other=0.0,
-    )
-
-    block_table_offset = seq_idx * block_table_stride
-
-    if USE_SINKS:
-        if segm_idx == 0:
-            M = tl.load(
-                sink_ptr + query_offset_1,
-                mask=query_mask_1,
-                other=float("-inf"),
-            ).to(dtype=tl.float32)
+        if USE_PER_TOKEN_HEAD_SCALES:
+            # Per-token-head quant: apply v_scale to P instead of V.
+            P_v = (P * v_token_head_scales[None, :]).to(V.dtype)
+            acc += tl.dot(P_v, V)
         else:
-            M = tl.full([BLOCK_M], float("-inf"), dtype=tl.float32)
-    else:
-        M = tl.full([BLOCK_M], float("-inf"), dtype=tl.float32)
-
-    L = tl.full([BLOCK_M], 1.0, dtype=tl.float32)
-    acc = tl.zeros([BLOCK_M, HEAD_SIZE_PADDED], dtype=tl.float32)
-
-    # context length for this particular sequences
-    context_len = seq_len - cur_batch_query_len
-
-    # alibi slope for this head
-    if USE_ALIBI_SLOPES:
-        alibi_slope = tl.load(
-            alibi_slopes_ptr + query_offset_1, mask=query_mask_1, other=0.0
-        )
-
-    # query-query attention bias
-    if USE_QQ_BIAS:
-        qq_bias_row_ptrs = (
-            qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0
-        )  # shape: [BLOCK_M]
-
-    # compute the length of the longest sequence prefix spanned by any
-    # query token in the current q_block (q_block_local_idx)
-    max_seq_prefix_len = (
-        context_len
-        + q_block_local_idx * BLOCK_Q
-        + (BLOCK_M - 1) // num_queries_per_kv
-        + 1
-    )
-
-    # adjust for potential padding in the last q_block by considering the
-    # actual sequence length
-    max_seq_prefix_len = tl.minimum(max_seq_prefix_len, seq_len)
-
-    # calculate the number of tiles that need to be processed to
-    # cover the longest sequence prefix (due to causal masking, tiles beyond
-    # this prefix can be skipped)
-    num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE)
-
-    # ---- Sliding-window tile pruning --------------------
-    # Default: keep previous global behavior
-    tile_start = 0
-    tile_end = num_tiles
-    # TODO(Isotr0py): sliding window pruning with image bidirectional mask
-    if SLIDING_WINDOW > 0 and not USE_MM_PREFIX:
-        # Query rows covered by this Q-block
-        qpos_lo = q_block_local_idx * BLOCK_Q
-        qpos_hi = tl.minimum(
-            qpos_lo + (BLOCK_M - 1) // num_queries_per_kv,
-            cur_batch_query_len - 1,
-        )
-        # For sliding window, each query position q can only attend to
-        # keys in the range [q_abs - SLIDING_WINDOW + 1, q_abs]
-        # where q_abs = context_len + q
-        # The union of allowed key positions for this Q-block is:
-        # [context_len + qpos_lo - SLIDING_WINDOW + 1, context_len + qpos_hi]
-        first_allowed_key = context_len + qpos_lo - SLIDING_WINDOW + 1
-        last_allowed_key = context_len + qpos_hi
-        # Convert to tile indices and clamp
-        tile_start = tl.maximum(0, first_allowed_key // TILE_SIZE)
-        tile_end = tl.minimum((last_allowed_key // TILE_SIZE) + 1, num_tiles)
-
-    # iterate through tiles (now limited to the sliding window range)
-    for j in range(
-        max(segm_idx * tiles_per_segment, tile_start),
-        min((segm_idx + 1) * tiles_per_segment, tile_end),
-    ):
-        seq_offset = j * TILE_SIZE + offs_t
-        tile_mask = seq_offset < max_seq_prefix_len
-
-        physical_block_idx = tl.load(
-            block_tables_ptr + block_table_offset + seq_offset // BLOCK_SIZE
-        ).to(tl.int64)
-
-        v_offset = (
-            physical_block_idx[:, None] * stride_v_cache_0
-            + kv_head_idx * stride_v_cache_2
-            + offs_d[None, :] * stride_v_cache_3
-            + (seq_offset % BLOCK_SIZE)[:, None] * stride_v_cache_1
-        )
-
-        k_offset = (
-            physical_block_idx[None, :] * stride_k_cache_0
-            + kv_head_idx * stride_k_cache_2
-            + offs_d[:, None] * stride_k_cache_3
-            + (seq_offset % BLOCK_SIZE)[None, :] * stride_k_cache_1
-        )
-
-        # K : (HEAD_SIZE, TILE_SIZE)
-        K_load = tl.load(
-            key_cache_ptr + k_offset,
-            mask=dim_mask[:, None] & tile_mask[None, :],
-            other=0.0,
-        )
-
-        if K_load.dtype.is_fp8():
-            if Q.dtype.is_fp8():
-                K = K_load
-            else:
-                K = (K_load.to(tl.float32) * tl.load(k_scale)).to(Q.dtype)
+            acc += tl.dot(P.to(V.dtype), V)
+
+    # ---- Epilogue ---------------------------------------------------------
+    if IS_3D:
+        if USE_FP8_Q_DESCALE:
+            acc *= value_scale
+        # Store per-segment partials; finalized by ``reduce_segments``.
+        if USE_TD_QO:
+            # 3D target: segm_output[token, head, segm_idx, :].  Advance
+            # the base to the correct (token-start, head-start, segm)
+            # slice; strides step between tokens / heads of the flattened
+            # (T, H, SEGS, PAD) layout.
+            segm_base = (
+                segm_output_ptr
+                + (cur_batch_in_all_start_index + q_block_local_idx * BLOCK_Q).to(
+                    tl.int64
+                )
+                * (num_query_heads * NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
+                + (kv_head_idx * num_queries_per_kv)
+                * (NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
+                + segm_idx * HEAD_SIZE_PADDED
+            )
+            _store_output_td(
+                segm_base,
+                acc,
+                q_block_local_len,
+                num_query_heads * NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED,
+                NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED,
+                num_queries_per_kv,
+                BLOCK_Q,
+                HEAD_SIZE,
+                HEAD_SIZE_PADDED,
+            )
         else:
-            K = K_load
-
-        # V : (TILE_SIZE, HEAD_SIZE)
-        V_load = tl.load(
-            value_cache_ptr + v_offset,
-            mask=dim_mask[None, :] & tile_mask[:, None],
-            other=0.0,
+            segm_output_offset = (
+                query_offset_0[:, None].to(tl.int64)
+                * (num_query_heads * NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
+                + query_offset_1[:, None] * (NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
+                + segm_idx * HEAD_SIZE_PADDED
+                + tl.arange(0, HEAD_SIZE_PADDED)[None, :]
+            )
+            tl.store(
+                segm_output_ptr + segm_output_offset,
+                acc,
+                mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
+            )
+        store_segm_reduce_scalars(
+            segm_max_ptr,
+            segm_expsum_ptr,
+            query_offset_0,
+            query_offset_1,
+            segm_idx,
+            M,
+            L,
+            query_mask_0,
+            query_mask_1,
+            num_query_heads,
+            NUM_SEGMENTS_PER_SEQ,
         )
-
-        if V_load.dtype.is_fp8():
-            if Q.dtype.is_fp8():
-                V = V_load
-            else:
-                V = (V_load.to(tl.float32) * tl.load(v_scale)).to(Q.dtype)
+    else:
+        acc = acc / L[:, None]
+        if USE_FP8_Q_DESCALE:
+            acc *= value_scale
+        if USE_FP8:
+            acc = acc * tl.load(out_scale)
+            acc = tl.clamp(acc, FP8_MIN, FP8_MAX)
+        if USE_TD_QO:
+            # 2D target: flat output[token, head, :].  Strides come
+            # straight from the caller (``output_stride_0`` per token,
+            # ``output_stride_1`` per head).
+            output_base = (
+                output_ptr
+                + (cur_batch_in_all_start_index + q_block_local_idx * BLOCK_Q)
+                * output_stride_0
+                + (kv_head_idx * num_queries_per_kv) * output_stride_1
+            )
+            _store_output_td(
+                output_base,
+                acc,
+                q_block_local_len,
+                output_stride_0,
+                output_stride_1,
+                num_queries_per_kv,
+                BLOCK_Q,
+                HEAD_SIZE,
+                HEAD_SIZE_PADDED,
+            )
         else:
-            V = V_load
-
-        # Compute attention mask: causal by default (key <= query)
-        query_abs_pos = context_len + query_pos[:, None]
-        seq_mask = seq_offset[None, :] <= query_abs_pos
-
-        # Apply sliding window to base mask BEFORE mm_prefix OR.
-        # Order must match FlexAttention: (causal AND sliding_window) OR mm_prefix
-        if SLIDING_WINDOW > 0:
-            seq_mask = seq_mask & ((query_abs_pos - seq_offset) < SLIDING_WINDOW)
-
-        # PrefixLM: extend mask with bidirectional ranges for multimodal tokens.
-        # Applied AFTER sliding window so mm_prefix ranges override SW restriction.
-        if USE_MM_PREFIX:
-            for i in range(MAX_MM_RANGES):
-                range_start = tl.load(
-                    mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2
-                )
-                range_end = tl.load(
-                    mm_prefix_range_ptr + seq_idx * MAX_MM_RANGES * 2 + i * 2 + 1
-                )
-
-                is_valid = range_start < range_end
-                q_in_range = (
-                    (query_abs_pos >= range_start)
-                    & (query_abs_pos <= range_end)
-                    & is_valid
-                )
-                k_in_range = (
-                    (seq_offset[None, :] >= range_start)
-                    & (seq_offset[None, :] <= range_end)
-                    & is_valid
-                )
-                seq_mask |= q_in_range & k_in_range
-
-        # S : (BLOCK_M, TILE_SIZE)
-        S = tl.zeros(shape=(BLOCK_M, TILE_SIZE), dtype=tl.float32)
-        S += scale * tl.dot(Q, K)
-
-        if USE_SOFTCAP:
-            S = apply_softcap(S, softcap)
-
-        S = tl.where(
-            query_mask_1[:, None] & query_mask_0[:, None] & seq_mask, S, float("-inf")
-        )
-
-        if USE_ALIBI_SLOPES:
-            if USE_ALIBI_SQRT:
-                relative_pos = seq_offset - (context_len + query_pos[:, None])
-                alibi_offset = tl.where(
-                    relative_pos <= 0,
-                    -tl.sqrt((-relative_pos).to(tl.float32)),
-                    0.0,
-                )
-            else:
-                alibi_offset = seq_offset - context_len
-            S += alibi_slope[:, None] * alibi_offset
-
-        if USE_QQ_BIAS:
-            # compute key positions relative to query section
-            key_rel_pos = seq_offset - context_len  # shape: [BLOCK_SIZE]
-            # load bias only for keys that correspond to queries
-            is_query_key = key_rel_pos >= 0 and key_rel_pos < qq_bias_stride_0
-            qq_bias = tl.load(
-                qq_bias_row_ptrs + key_rel_pos[None, :],
-                mask=is_query_key[None, :],  # avoid OOB for context keys
-                other=0.0,
+            output_offset = (
+                query_offset_0[:, None] * output_stride_0
+                + query_offset_1[:, None] * output_stride_1
+                + offs_d[None, :]
             )
-            S += qq_bias
-
-        # compute running maximum
-        # m_j : (BLOCK_M,)
-        m_j = tl.maximum(M, tl.max(S, axis=1))
-
-        # For sliding window there's a chance the max is -inf due to masking of
-        # the entire row. In this case we need to set m_j 0 to avoid NaN
-        m_j = tl.where(m_j > float("-inf"), m_j, 0.0)
-
-        # P : (BLOCK_M, TILE_SIZE,)
-        P = tl.exp(S - m_j[:, None])
-
-        # l_j : (BLOCK_M,)
-        l_j = tl.sum(P, axis=1)
-
-        # alpha : (BLOCK_M, )
-        alpha = tl.exp(M - m_j)
-
-        # acc : (BLOCK_M, HEAD_SIZE_PADDED)
-        acc = acc * alpha[:, None]
-
-        # update constants
-        L = L * alpha + l_j
-        M = m_j
-
-        if SLIDING_WINDOW:
-            qpos_lo = q_block_local_idx * BLOCK_Q
-            V = tl.where(
-                (context_len + qpos_lo - seq_offset[:, None]) < SLIDING_WINDOW, V, 0.0
+            tl.store(
+                output_ptr + output_offset,
+                acc,
+                mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
             )
 
-        # acc : (BLOCK_M, HEAD_SIZE_PADDED)
-        acc += tl.dot(P.to(V.dtype), V)
-
-    segm_output_offset = (
-        query_offset_0[:, None].to(tl.int64)
-        * (num_query_heads * NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
-        + query_offset_1[:, None] * (NUM_SEGMENTS_PER_SEQ * HEAD_SIZE_PADDED)
-        + segm_idx * HEAD_SIZE_PADDED
-        + tl.arange(0, HEAD_SIZE_PADDED)[None, :]
-    )
-    tl.store(
-        segm_output_ptr + segm_output_offset,
-        acc,
-        mask=dim_mask[None, :] & query_mask_0[:, None] & query_mask_1[:, None],
-    )
-    segm_offset = (
-        query_offset_0.to(tl.int64) * (num_query_heads * NUM_SEGMENTS_PER_SEQ)
-        + query_offset_1 * NUM_SEGMENTS_PER_SEQ
-        + segm_idx
-    )
-    tl.store(segm_max_ptr + segm_offset, M, mask=query_mask_0 & query_mask_1)
-    tl.store(segm_expsum_ptr + segm_offset, L, mask=query_mask_0 & query_mask_1)
-
 
 @triton.jit
 def reduce_segments(
@@ -865,12 +748,7 @@ def _get_tile_size(
     element_size: int,
     is_prefill: bool,
 ) -> int:
-    """Select tile size with Gemma3-specific optimization.
-
-    For Gemma3, use 32 for both prefill and decode to better utilize
-    the larger head dimension (128/256). For other models, use
-    the default vLLM behavior.
-    """
+    """Select tile size with Gemma3-specific optimization."""
     if _is_gemma3_attention(head_size, sliding_window):
         # Gemma3: use 32 for decode (default is 16)
         return 32
@@ -878,6 +756,7 @@ def _get_tile_size(
     # Default behavior
     if is_prefill:
         return 32
+    # Note: tile size must be at least 32 for fp8 (element_size == 1).
     return 16 if element_size >= 2 else 32
 
 
@@ -911,13 +790,31 @@ def unified_attention(
     # Optional tensor for prefix lengths (PrefixLM support)
     mm_prefix_range=None,
     use_alibi_sqrt=False,
+    # KV cache quantization mode and per-token-head scale caches.
+    kv_quant_mode: KVQuantMode = KVQuantMode.NONE,
+    k_scale_cache=None,  # [num_blocks, block_size, num_kv_heads] float32
+    v_scale_cache=None,  # [num_blocks, block_size, num_kv_heads] float32
+    # Chunked attention: restrict attention to aligned blocks with lookback.
+    chunk_lookback=-1,
+    # Tensor-descriptor mode: use ``tl.make_tensor_descriptor`` for Q/K/V
+    # loads and output stores.  Enables HW 2D block reads on Intel Xe2/Xe3.
+    # The non-TD branch is dead-code-eliminated at Triton compile time so
+    # disabling this flag costs nothing.
+    use_td: bool = False,
 ):
     assert causal, "Only causal attention is supported"
-    assert q_descale is None, "Q scales not supported"
-
     if sinks is not None:
         assert sinks.shape[0] == q.shape[1], "Sinks must be num_query_heads size"
 
+    use_per_token_head_scales = kv_quant_mode in (
+        KVQuantMode.INT8_PER_TOKEN_HEAD,
+        KVQuantMode.FP8_PER_TOKEN_HEAD,
+    )
+    if use_per_token_head_scales:
+        assert k_scale_cache is not None and v_scale_cache is not None, (
+            f"{kv_quant_mode.name} requires k_scale_cache / v_scale_cache"
+        )
+
     use_mm_prefix = False
     max_mm_ranges = 0
     if mm_prefix_range is not None:
@@ -955,28 +852,75 @@ def unified_attention(
     #    = floor(q.shape[0] / BLOCK_Q) + num_seqs
     total_num_q_blocks = q.shape[0] // BLOCK_Q + num_seqs
 
-    # Tile sizes for prefill and decode. Gemma3 models use optimized values.
-    # Note: tile size must be at least 32 for fp8 (element_size == 1).
     sliding_window_val = 1 + window_size[0] if window_size[0] >= 0 else 0
+
+    # Compute chunked block size from sliding window if needed.
+    chunk_size = -1
+    if sliding_window_val > 0 and chunk_lookback > -1:
+        chunk_size = sliding_window_val // (chunk_lookback + 1)
+        assert chunk_size > 0, "sliding_window must be > chunk_lookback+1"
+    elif sliding_window_val <= 0:
+        chunk_lookback = -1
+
     TILE_SIZE_PREFILL = _get_tile_size(
-        head_size,
-        sliding_window_val,
-        q.element_size(),
-        is_prefill=True,
+        head_size, sliding_window_val, q.element_size(), is_prefill=True
     )
     TILE_SIZE_DECODE = _get_tile_size(
-        head_size,
-        sliding_window_val,
-        q.element_size(),
-        is_prefill=False,
+        head_size, sliding_window_val, q.element_size(), is_prefill=False
     )
 
+    # USE_TD requires BLOCK_SIZE % TILE_SIZE == 0 (enforced by a
+    # ``tl.static_assert`` in the kernel).  The default prefill tile
+    # size (32) is larger than a common ``block_size=16``, so clamp it
+    # down when TD is enabled.  Zero overhead when disabled.
+    if use_td:
+        TILE_SIZE_PREFILL = min(TILE_SIZE_PREFILL, block_size)
+        TILE_SIZE_DECODE = min(TILE_SIZE_DECODE, block_size)
+
+    # Tensor descriptors for Q load / output store require every element
+    # of ``block_shape`` to be a power of 2.  ``num_queries_per_kv`` is
+    # not always pow2 (e.g. Qwen2-7B: 28 / 4 = 7), so gate the Q/O paths
+    # separately from the KV tile loads (whose ``block_shape`` does not
+    # include ``num_queries_per_kv``).
+    #
+    # The Q/O descriptors also encode ``HEAD_SIZE_PADDED`` on the inner
+    # axis while the backing buffers (both flat output and per-segment
+    # output) are laid out with ``HEAD_SIZE``.  When they differ (e.g.
+    # Phi-3's head_size=96 → HEAD_SIZE_PADDED=128) the store would spill
+    # padded lanes into neighbouring heads because tensor-descriptor
+    # stores don't mask the padded tail.  Fall back to the pointer path
+    # for Q/O in that case — KV tile loads are unaffected because their
+    # ``shape`` already matches ``block_shape`` on the inner axis.
+    head_size_padded = triton.next_power_of_2(head_size)
+    _is_pow2_nq = (num_queries_per_kv & (num_queries_per_kv - 1)) == 0
+    _is_pow2_hs = head_size == head_size_padded
+    use_td_qo = use_td and _is_pow2_nq and _is_pow2_hs
+
+    # ``_load_q_td`` / ``_store_output_td`` flatten ``(num_queries_per_kv,
+    # HEAD_SIZE)`` into a single contiguous inner axis.  That's only
+    # equivalent to the pointer path when the ``num_queries_per_kv`` heads
+    # for this KV group start at ``kv_head_idx * num_queries_per_kv`` and
+    # lie exactly HEAD_SIZE apart — i.e. ``query_stride_1 == HEAD_SIZE``
+    # and ``output_stride_1 == head_size``.  This is the default vLLM
+    # query/output layout; assert it explicitly so we fail fast if a
+    # future caller passes a non-contiguous query tensor.
+    if use_td_qo:
+        assert q.stride(1) == head_size, (
+            f"USE_TD_QO requires contiguous query heads "
+            f"(q.stride(1) = {q.stride(1)} != head_size = {head_size}); "
+            f"set VLLM_TRITON_ATTN_USE_TD=0 or pad the query layout."
+        )
+        assert out.stride(1) == head_size, (
+            f"USE_TD_QO requires contiguous output heads "
+            f"(out.stride(1) = {out.stride(1)} != head_size = {head_size})."
+        )
+
     # Launch the 2D kernel if
     # 1. No intermediate tiled softmax buffers for the 3D kernel have been allocated, or
     # 2. The batch includes at least one prefill request, or
     # 3. The number of sequences exceeds the configured threshold, or
     # 4. Batch invariance is enabled
-    if (
+    use_3d = not (
         seq_threshold_3D is None
         or num_par_softmax_segments is None
         or softmax_segm_output is None
@@ -985,114 +929,113 @@ def unified_attention(
         or max_seqlen_q > 1
         or num_seqs > seq_threshold_3D
         or is_batch_invariant
-    ):
-        kernel_unified_attention_2d[
-            (
-                total_num_q_blocks,
-                num_kv_heads,
-            )
-        ](
-            output_ptr=out,
-            query_ptr=q,
-            key_cache_ptr=k,
-            value_cache_ptr=v,
-            sink_ptr=sinks,
-            block_tables_ptr=block_table,
-            seq_lens_ptr=seqused_k,
-            alibi_slopes_ptr=alibi_slopes,
-            qq_bias_ptr=qq_bias,
-            scale=softmax_scale,
-            k_scale=k_descale,
-            v_scale=v_descale,
-            out_scale=1 / output_scale if output_scale is not None else 1.0,
-            softcap=softcap,
-            num_query_heads=num_query_heads,
-            num_queries_per_kv=num_queries_per_kv,
-            block_table_stride=block_table.stride(0),
-            query_stride_0=q.stride(0),
-            query_stride_1=q.stride(1),
-            output_stride_0=out.stride(0),
-            output_stride_1=out.stride(1),
-            qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0,
-            BLOCK_SIZE=block_size,
-            TILE_SIZE=TILE_SIZE_PREFILL,
-            HEAD_SIZE=head_size,
-            HEAD_SIZE_PADDED=triton.next_power_of_2(head_size),
-            USE_ALIBI_SLOPES=use_alibi_slopes,
-            USE_ALIBI_SQRT=use_alibi_sqrt,
-            USE_QQ_BIAS=use_qq_bias,
-            USE_SOFTCAP=(softcap > 0),
-            USE_SINKS=(sinks is not None),
-            USE_MM_PREFIX=use_mm_prefix,
-            MAX_MM_RANGES=max_mm_ranges,
-            mm_prefix_range_ptr=mm_prefix_range,
-            SLIDING_WINDOW=(1 + window_size[0]),
-            stride_k_cache_0=k.stride(0),
-            stride_k_cache_1=k.stride(1),
-            stride_k_cache_2=k.stride(2),
-            stride_k_cache_3=k.stride(3),
-            stride_v_cache_0=v.stride(0),
-            stride_v_cache_1=v.stride(1),
-            stride_v_cache_2=v.stride(2),
-            stride_v_cache_3=v.stride(3),
-            query_start_len_ptr=cu_seqlens_q,
-            BLOCK_Q=BLOCK_Q,
-            num_seqs=num_seqs,
-            BLOCK_M=BLOCK_M,
-            USE_FP8=output_scale is not None,
-        )
+    )
+
+    # The kernel signature is the same for 2D and 3D — only the launch
+    # grid + a handful of constexpr toggles differ.  Per-token-head scale
+    # caches and their strides are required arguments; non-per-token-head
+    # modes pass dummy zeros (the code path is dead-code eliminated by
+    # the ``USE_PER_TOKEN_HEAD_SCALES`` constexpr branch in the kernel).
+    if use_per_token_head_scales:
+        ks_strides = k_scale_cache.stride()
+        vs_strides = v_scale_cache.stride()
+        ks_blk, ks_slot, ks_head = ks_strides[0], ks_strides[1], ks_strides[2]
+        vs_blk, vs_slot, vs_head = vs_strides[0], vs_strides[1], vs_strides[2]
+        k_scale_ptr = k_scale_cache
+        v_scale_ptr = v_scale_cache
     else:
-        kernel_unified_attention_3d[
-            (total_num_q_blocks, num_kv_heads, num_par_softmax_segments)
-        ](
-            segm_output_ptr=softmax_segm_output,
-            segm_max_ptr=softmax_segm_max,
-            segm_expsum_ptr=softmax_segm_expsum,
-            query_ptr=q,
-            key_cache_ptr=k,
-            value_cache_ptr=v,
-            sink_ptr=sinks,
-            block_tables_ptr=block_table,
-            seq_lens_ptr=seqused_k,
-            alibi_slopes_ptr=alibi_slopes,
-            qq_bias_ptr=qq_bias,
-            scale=softmax_scale,
-            k_scale=k_descale,
-            v_scale=v_descale,
-            softcap=softcap,
-            num_query_heads=num_query_heads,
-            num_queries_per_kv=num_queries_per_kv,
-            block_table_stride=block_table.stride(0),
-            query_stride_0=q.stride(0),
-            query_stride_1=q.stride(1),
-            qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0,
-            BLOCK_SIZE=block_size,
-            TILE_SIZE=TILE_SIZE_DECODE,
-            HEAD_SIZE=head_size,
-            HEAD_SIZE_PADDED=triton.next_power_of_2(head_size),
-            USE_ALIBI_SLOPES=use_alibi_slopes,
-            USE_ALIBI_SQRT=use_alibi_sqrt,
-            USE_QQ_BIAS=use_qq_bias,
-            USE_SOFTCAP=(softcap > 0),
-            USE_SINKS=(sinks is not None),
-            USE_MM_PREFIX=use_mm_prefix,
-            MAX_MM_RANGES=max_mm_ranges,
-            mm_prefix_range_ptr=mm_prefix_range,
-            SLIDING_WINDOW=(1 + window_size[0]),
-            stride_k_cache_0=k.stride(0),
-            stride_k_cache_1=k.stride(1),
-            stride_k_cache_2=k.stride(2),
-            stride_k_cache_3=k.stride(3),
-            stride_v_cache_0=v.stride(0),
-            stride_v_cache_1=v.stride(1),
-            stride_v_cache_2=v.stride(2),
-            stride_v_cache_3=v.stride(3),
-            query_start_len_ptr=cu_seqlens_q,
-            BLOCK_Q=BLOCK_Q,
-            num_seqs=num_seqs,
-            BLOCK_M=BLOCK_M,
-            NUM_SEGMENTS_PER_SEQ=num_par_softmax_segments,
-        )
+        ks_blk = ks_slot = ks_head = 0
+        vs_blk = vs_slot = vs_head = 0
+        # Pass the K cache as a stand-in pointer; never dereferenced.
+        k_scale_ptr = k
+        v_scale_ptr = v
+    # 3D needs real segm tensors; 2D never touches them but Triton wants
+    # a non-null pointer.  Reuse ``out`` as the placeholder.
+    segm_output_ptr = softmax_segm_output if use_3d else out
+    segm_max_ptr = softmax_segm_max if use_3d else out
+    segm_expsum_ptr = softmax_segm_expsum if use_3d else out
+    num_segments = num_par_softmax_segments if use_3d else 1
+
+    grid: tuple[Any, ...]
+    if not use_3d:
+        grid = (total_num_q_blocks, num_kv_heads)
+        tile_size = TILE_SIZE_PREFILL
+    else:
+        grid = (total_num_q_blocks, num_kv_heads, num_par_softmax_segments)
+        tile_size = TILE_SIZE_DECODE
+
+    kernel_unified_attention[grid](
+        output_ptr=out,
+        segm_output_ptr=segm_output_ptr,
+        segm_max_ptr=segm_max_ptr,
+        segm_expsum_ptr=segm_expsum_ptr,
+        query_ptr=q,
+        key_cache_ptr=k,
+        value_cache_ptr=v,
+        sink_ptr=sinks,
+        block_tables_ptr=block_table,
+        seq_lens_ptr=seqused_k,
+        alibi_slopes_ptr=alibi_slopes,
+        qq_bias_ptr=qq_bias,
+        k_scale_cache_ptr=k_scale_ptr,
+        v_scale_cache_ptr=v_scale_ptr,
+        scale=softmax_scale,
+        q_scale=q_descale,
+        k_scale=k_descale,
+        v_scale=v_descale,
+        out_scale=1 / output_scale if output_scale is not None else 1.0,
+        softcap=softcap,
+        num_query_heads=num_query_heads,
+        num_queries_per_kv=num_queries_per_kv,
+        block_table_stride=block_table.stride(0),
+        query_stride_0=q.stride(0),
+        query_stride_1=q.stride(1),
+        output_stride_0=out.stride(0),
+        output_stride_1=out.stride(1),
+        qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0,
+        BLOCK_SIZE=block_size,
+        TILE_SIZE=tile_size,
+        HEAD_SIZE=head_size,
+        HEAD_SIZE_PADDED=head_size_padded,
+        USE_ALIBI_SLOPES=use_alibi_slopes,
+        USE_ALIBI_SQRT=use_alibi_sqrt,
+        USE_QQ_BIAS=use_qq_bias,
+        USE_SOFTCAP=(softcap > 0),
+        USE_SINKS=(sinks is not None),
+        USE_MM_PREFIX=use_mm_prefix,
+        MAX_MM_RANGES=max_mm_ranges,
+        mm_prefix_range_ptr=mm_prefix_range,
+        SLIDING_WINDOW=(1 + window_size[0]),
+        stride_k_cache_0=k.stride(0),
+        stride_k_cache_1=k.stride(1),
+        stride_k_cache_2=k.stride(2),
+        stride_k_cache_3=k.stride(3),
+        stride_v_cache_0=v.stride(0),
+        stride_v_cache_1=v.stride(1),
+        stride_v_cache_2=v.stride(2),
+        stride_v_cache_3=v.stride(3),
+        stride_ks_blk=ks_blk,
+        stride_ks_slot=ks_slot,
+        stride_ks_head=ks_head,
+        stride_vs_blk=vs_blk,
+        stride_vs_slot=vs_slot,
+        stride_vs_head=vs_head,
+        query_start_len_ptr=cu_seqlens_q,
+        BLOCK_Q=BLOCK_Q,
+        num_seqs=num_seqs,
+        BLOCK_M=BLOCK_M,
+        NUM_SEGMENTS_PER_SEQ=num_segments,
+        USE_FP8=output_scale is not None,
+        IS_3D=use_3d,
+        KV_QUANT_MODE=kv_quant_mode,
+        Q_IS_FP8=(q.dtype == current_platform.fp8_dtype()),
+        CHUNK_LOOKBACK=chunk_lookback,
+        CHUNK_SIZE=chunk_size,
+        USE_TD=use_td,
+        USE_TD_QO=use_td_qo,
+    )
+
+    if use_3d:
         reduce_segments[(q.shape[0], num_query_heads)](
             output_ptr=out,
             segm_output_ptr=softmax_segm_output,
@@ -1107,7 +1050,7 @@ def unified_attention(
             block_table_stride=block_table.stride(0),
             TILE_SIZE=TILE_SIZE_DECODE,
             HEAD_SIZE=head_size,
-            HEAD_SIZE_PADDED=triton.next_power_of_2(head_size),
+            HEAD_SIZE_PADDED=head_size_padded,
             query_start_len_ptr=cu_seqlens_q,
             BLOCK_Q=BLOCK_Q,
             NUM_SEGMENTS_PER_SEQ=num_par_softmax_segments,
diff --git a/vllm/v1/attention/ops/vit_attn_wrappers.py b/vllm/v1/attention/ops/vit_attn_wrappers.py
index 6ffe110adaa4..4506f452cf9a 100644
--- a/vllm/v1/attention/ops/vit_attn_wrappers.py
+++ b/vllm/v1/attention/ops/vit_attn_wrappers.py
@@ -279,6 +279,10 @@ def flashinfer_wrapper(
     cu_seqlens: torch.Tensor | None = None,
     max_seqlen: torch.Tensor | None = None,
     sequence_lengths: torch.Tensor | None = None,
+    q_scale: torch.Tensor | None = None,
+    k_scale: torch.Tensor | None = None,
+    v_scale: torch.Tensor | None = None,
+    o_data_type: torch.dtype | None = None,
 ) -> torch.Tensor:
     from flashinfer.prefill import cudnn_batch_prefill_with_kv_cache
 
@@ -292,6 +296,9 @@ def flashinfer_wrapper(
     # RoPE has already made q and k contiguous.
     q, k = q.contiguous(), k.contiguous()
 
+    assert cu_seqlens is not None
+    assert max_seqlen is not None
+    assert sequence_lengths is not None
     assert len(cu_seqlens) % 2 == 0, "cu_seqlens must be divisible by 2"
     cu_seqlength = len(cu_seqlens) // 2
     batch_offsets_qko = cu_seqlens[:cu_seqlength].view(-1, 1, 1, 1)
@@ -315,6 +322,10 @@ def flashinfer_wrapper(
         batch_offsets_k=batch_offsets_qko,
         batch_offsets_v=batch_offsets_v,
         batch_offsets_o=batch_offsets_qko,
+        q_scale=q_scale,
+        k_scale=k_scale,
+        v_scale=v_scale,
+        o_data_type=o_data_type,
     )
 
     if is_reshaped:
@@ -332,8 +343,12 @@ def vit_flashinfer_wrapper_fake(
     cu_seqlens: torch.Tensor | None = None,
     max_seqlen: torch.Tensor | None = None,
     sequence_lengths: torch.Tensor | None = None,
+    q_scale: torch.Tensor | None = None,
+    k_scale: torch.Tensor | None = None,
+    v_scale: torch.Tensor | None = None,
+    o_data_type: torch.dtype | None = None,
 ) -> torch.Tensor:
-    return torch.empty_like(q)
+    return torch.empty_like(q, dtype=o_data_type or q.dtype)
 
 
 direct_register_custom_op(
@@ -352,7 +367,22 @@ def vit_flashinfer_wrapper(
     cu_seqlens: torch.Tensor | None = None,
     max_seqlen: torch.Tensor | None = None,
     sequence_lengths: torch.Tensor | None = None,
+    q_scale: torch.Tensor | None = None,
+    k_scale: torch.Tensor | None = None,
+    v_scale: torch.Tensor | None = None,
+    o_data_type: torch.dtype | None = None,
 ) -> torch.Tensor:
     return torch.ops.vllm.flashinfer_wrapper(
-        q, k, v, scale, workspace_buffer, cu_seqlens, max_seqlen, sequence_lengths
+        q,
+        k,
+        v,
+        scale,
+        workspace_buffer,
+        cu_seqlens,
+        max_seqlen,
+        sequence_lengths,
+        q_scale,
+        k_scale,
+        v_scale,
+        o_data_type,
     )
diff --git a/vllm/v1/attention/selector.py b/vllm/v1/attention/selector.py
index c37b709fef98..d98de6966f52 100644
--- a/vllm/v1/attention/selector.py
+++ b/vllm/v1/attention/selector.py
@@ -6,12 +6,12 @@
 
 import torch
 
+import vllm.envs as envs
 from vllm.config.cache import CacheDType
 from vllm.logger import init_logger
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.v1.attention.backend import AttentionBackend, AttentionType
 from vllm.v1.attention.backends.registry import (
-    MAMBA_TYPE_TO_BACKEND_MAP,
     MambaAttentionBackendEnum,
 )
 
@@ -29,6 +29,8 @@ class AttentionSelectorConfig(NamedTuple):
     use_mm_prefix: bool = False
     use_per_head_quant_scales: bool = False
     attn_type: str = AttentionType.DECODER
+    use_non_causal: bool = False
+    use_batch_invariant: bool = False
 
     def __repr__(self):
         return (
@@ -41,7 +43,9 @@ def __repr__(self):
             f"use_sparse={self.use_sparse}, "
             f"use_mm_prefix={self.use_mm_prefix}, "
             f"use_per_head_quant_scales={self.use_per_head_quant_scales}, "
-            f"attn_type={self.attn_type})"
+            f"attn_type={self.attn_type}, "
+            f"use_non_causal={self.use_non_causal}, "
+            f"use_batch_invariant={self.use_batch_invariant})"
         )
 
 
@@ -87,6 +91,8 @@ def get_attn_backend(
         use_mm_prefix=use_mm_prefix,
         use_per_head_quant_scales=use_per_head_quant_scales,
         attn_type=attn_type or AttentionType.DECODER,
+        use_non_causal=vllm_config.attention_config.use_non_causal,
+        use_batch_invariant=envs.VLLM_BATCH_INVARIANT,
     )
 
     return _cached_get_attn_backend(
@@ -131,7 +137,7 @@ def _cached_get_attn_backend(
 
 
 def get_mamba_attn_backend(
-    mamba_type: str,
+    mamba_type: MambaAttentionBackendEnum,
 ) -> type[AttentionBackend]:
     """Select which mamba attention backend to use and lazily import it."""
     return _cached_get_mamba_attn_backend(mamba_type)
@@ -139,19 +145,14 @@ def get_mamba_attn_backend(
 
 @cache
 def _cached_get_mamba_attn_backend(
-    mamba_type: str,
+    mamba_type: MambaAttentionBackendEnum,
 ) -> type[AttentionBackend]:
-    assert mamba_type and isinstance(mamba_type, str)
+    assert mamba_type and isinstance(mamba_type, MambaAttentionBackendEnum)
 
-    selected_backend = None
-    try:
-        backend_name = MAMBA_TYPE_TO_BACKEND_MAP[mamba_type]
-        selected_backend = MambaAttentionBackendEnum[backend_name]
-    except KeyError as e:
-        raise ValueError(
-            f"Invalid mamba attention backend type: '{mamba_type}'. Valid "
-            f"types are: {list(MAMBA_TYPE_TO_BACKEND_MAP.keys())}"
-        ) from e
-
-    mamba_attn_backend = selected_backend.get_class()
+    mamba_attn_backend = mamba_type.get_class()
+    if envs.VLLM_BATCH_INVARIANT and not mamba_attn_backend.supports_batch_invariance():
+        raise RuntimeError(
+            "VLLM batch_invariant mode is not supported for "
+            f"{mamba_attn_backend.get_name()}."
+        )
     return mamba_attn_backend
diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py
index 4b62d2a4c642..513e4bf380b9 100644
--- a/vllm/v1/core/block_pool.py
+++ b/vllm/v1/core/block_pool.py
@@ -22,6 +22,7 @@
     KVCacheBlock,
     generate_block_hash_extra_keys,
     get_block_hash,
+    get_group_id,
     make_block_hash_with_group_id,
     maybe_convert_block_hash,
 )
@@ -215,6 +216,7 @@ def cache_full_blocks(
         num_full_blocks: int,
         block_size: int,
         kv_cache_group_id: int,
+        block_mask: list[bool] | None = None,
     ) -> None:
         """Cache a list of full blocks for prefix caching.
         This function takes a list of blocks that will have their block hash
@@ -232,11 +234,19 @@ def cache_full_blocks(
                 be cached after this function.
             block_size: Number of tokens in each block.
             kv_cache_group_id: The id of the KV cache group.
+            block_mask: Optional mask aligned with
+                ``blocks[num_cached_blocks:num_full_blocks]``. When provided,
+                blocks where the mask is False are skipped (treated like null
+                blocks). Used by groups whose ``find_longest_cache_hit`` only
+                consults a subset of blocks (e.g. SWA tail-window), so blocks
+                that can never serve a hit stay out of the prefix-cache hash
+                map.
         """
         if num_cached_blocks >= num_full_blocks:
             return
         new_full_blocks = blocks[num_cached_blocks:num_full_blocks]
         assert len(request.block_hashes) >= num_full_blocks
+        assert block_mask is None or len(block_mask) == len(new_full_blocks)
         if block_size == self.hash_block_size:
             # Common case.
             block_hashes: BlockHashList = request.block_hashes
@@ -255,10 +265,10 @@ def cache_full_blocks(
             [] if self.enable_kv_cache_events else None
         )
         for i, blk in enumerate(new_full_blocks):
-            # Some blocks may be null blocks when enabling sparse attention like
-            # sliding window attention, or Mamba models with prefix-caching in
-            # align mode. We skip null blocks here.
-            if blk.is_null:
+            # Some blocks may be null or masked out when enabling sparse attention
+            # like sliding window attention, or Mamba models with prefix-caching
+            # in align mode. We skip null blocks here.
+            if blk.is_null or (block_mask is not None and not block_mask[i]):
                 continue
             assert blk.block_hash is None
             block_hash = new_block_hashes[i]
@@ -287,12 +297,14 @@ def cache_full_blocks(
             # Generate extra keys for each block individually.
             # Each block may have different extra_keys (e.g., different MM
             # features, or cache_salt only for the first block).
-            # Skip null blocks to match the length of new_hashes.
+            # Skip null/masked-out blocks to match the length of new_hashes.
             extra_keys_list: list[tuple[Any, ...] | None] = []
             curr_mm_idx = 0
             for i in range(num_cached_blocks, num_full_blocks):
                 if blocks[i].is_null:
                     continue
+                if block_mask is not None and not block_mask[i - num_cached_blocks]:
+                    continue
                 block_start = i * block_size
                 block_end = block_start + block_size
                 extra_keys, curr_mm_idx = generate_block_hash_extra_keys(
@@ -314,6 +326,7 @@ def cache_full_blocks(
                     if request.lora_request
                     else None,
                     extra_keys=extra_keys_list if extra_keys_list else None,
+                    group_idx=kv_cache_group_id,
                 )
             )
 
@@ -377,14 +390,11 @@ def _maybe_evict_cached_block(self, block: KVCacheBlock) -> bool:
         block.reset_hash()
 
         if self.enable_kv_cache_events:
-            # FIXME (Chen): Not sure whether we should return `hash_value`
-            # or `(hash_value, group_id)` here. But it's fine now because
-            # we disable hybrid kv cache manager when kv cache event is
-            # enabled, so there is only one group.
             self.kv_event_queue.append(
                 BlockRemoved(
                     block_hashes=[maybe_convert_block_hash(get_block_hash(block_hash))],
                     medium=MEDIUM_GPU,
+                    group_idx=get_group_id(block_hash),
                 )
             )
         return True
diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
index eaa95dfe49f7..c5e8953745ad 100644
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@@ -34,6 +34,7 @@ def __init__(
         self,
         kv_cache_config: KVCacheConfig,
         max_model_len: int,
+        max_num_batched_tokens: int,
         use_eagle: bool,
         enable_caching: bool,
         enable_kv_cache_events: bool,
@@ -47,18 +48,26 @@ def __init__(
         self.enable_caching = enable_caching
 
         self.block_pool = BlockPool(
-            kv_cache_config.num_blocks,
-            enable_caching,
-            hash_block_size,
-            enable_kv_cache_events,
-            metrics_collector,
+            num_gpu_blocks=kv_cache_config.num_blocks,
+            enable_caching=enable_caching,
+            hash_block_size=hash_block_size,
+            enable_kv_cache_events=enable_kv_cache_events,
+            metrics_collector=metrics_collector,
         )
 
-        # Needs special handling for find_longest_cache_hit if eagle is enabled
-        self.use_eagle = use_eagle
+        # KV cache group indices that get the EAGLE last-block drop.
+        self.eagle_group_ids: set[int] = {
+            i for i, g in enumerate(kv_cache_config.kv_cache_groups) if g.is_eagle_group
+        }
+        # Conservatively fall back to flag all groups when no group is flagged.
+        if use_eagle and not self.eagle_group_ids:
+            self.eagle_group_ids = set(range(len(kv_cache_config.kv_cache_groups)))
+
         self.single_type_managers = tuple(
             get_manager_for_kv_cache_spec(
                 kv_cache_spec=kv_cache_group.kv_cache_spec,
+                max_num_batched_tokens=max_num_batched_tokens,
+                max_model_len=max_model_len,
                 block_pool=self.block_pool,
                 enable_caching=enable_caching,
                 kv_cache_group_id=i,
@@ -76,6 +85,7 @@ def get_num_blocks_to_allocate(
         num_encoder_tokens: int,
         total_computed_tokens: int,
         num_tokens_main_model: int,
+        apply_admission_cap: bool = False,
     ) -> int:
         """
         Get the number of blocks needed to be allocated for the request.
@@ -92,6 +102,10 @@ def get_num_blocks_to_allocate(
             num_tokens_main_model: The number of tokens for the main model (aka target
                 model in spec decode). w/o spec decode, it is num_tokens;
                 with spec decode, it is num_tokens - num_lookahead_tokens.
+            apply_admission_cap: If True, apply the recycling-aware
+                per-request admission cap (SWA / chunked-local). Set only by
+                the full-sequence admission gate; per-step allocation must
+                leave it False so the predictor matches `allocate_new_blocks`.
 
         Returns:
             The number of blocks to allocate.
@@ -102,7 +116,12 @@ def get_num_blocks_to_allocate(
                 # For cross-attention, we issue a single static allocation
                 # of blocks based on the number of encoder input tokens.
                 num_blocks_to_allocate += manager.get_num_blocks_to_allocate(
-                    request_id, num_encoder_tokens, [], 0, num_encoder_tokens
+                    request_id,
+                    num_encoder_tokens,
+                    [],
+                    0,
+                    num_encoder_tokens,
+                    apply_admission_cap=apply_admission_cap,
                 )
             else:
                 num_blocks_to_allocate += manager.get_num_blocks_to_allocate(
@@ -111,6 +130,7 @@ def get_num_blocks_to_allocate(
                     new_computed_blocks[i],
                     total_computed_tokens,
                     num_tokens_main_model,
+                    apply_admission_cap=apply_admission_cap,
                 )
         return num_blocks_to_allocate
 
@@ -265,6 +285,7 @@ def __init__(
         self,
         kv_cache_config: KVCacheConfig,
         max_model_len: int,
+        max_num_batched_tokens: int,
         use_eagle: bool,
         enable_kv_cache_events: bool,
         dcp_world_size: int,
@@ -275,6 +296,7 @@ def __init__(
         super().__init__(
             kv_cache_config,
             max_model_len,
+            max_num_batched_tokens,
             use_eagle,
             False,
             enable_kv_cache_events,
@@ -310,6 +332,7 @@ def __init__(
         self,
         kv_cache_config: KVCacheConfig,
         max_model_len: int,
+        max_num_batched_tokens: int,
         use_eagle: bool,
         enable_caching: bool,
         enable_kv_cache_events: bool,
@@ -321,6 +344,7 @@ def __init__(
         super().__init__(
             kv_cache_config,
             max_model_len,
+            max_num_batched_tokens,
             use_eagle,
             enable_caching,
             enable_kv_cache_events,
@@ -357,7 +381,7 @@ def find_longest_cache_hit(
             kv_cache_group_ids=[0],
             block_pool=self.block_pool,
             kv_cache_spec=self.kv_cache_spec,
-            use_eagle=self.use_eagle,
+            use_eagle=0 in self.eagle_group_ids,
             alignment_tokens=self.block_size,
             dcp_world_size=self.dcp_world_size,
             pcp_world_size=self.pcp_world_size,
@@ -375,6 +399,7 @@ def __init__(
         self,
         kv_cache_config: KVCacheConfig,
         max_model_len: int,
+        max_num_batched_tokens: int,
         use_eagle: bool,
         enable_caching: bool,
         enable_kv_cache_events: bool,
@@ -386,6 +411,7 @@ def __init__(
         super().__init__(
             kv_cache_config,
             max_model_len,
+            max_num_batched_tokens,
             use_eagle,
             enable_caching,
             enable_kv_cache_events,
@@ -450,6 +476,30 @@ def verify_and_split_kv_cache_groups(self) -> None:
         block_sizes = [spec.block_size for spec, _, _ in attention_groups]
         self.lcm_block_size = lcm(*block_sizes)
 
+        # Attention-group indices (into ``self.attention_groups``) that
+        # contain at least one EAGLE/MTP KV cache group.
+        self.eagle_attn_group_indices: set[int] = {
+            i
+            for i, (_, group_ids, _) in enumerate(self.attention_groups)
+            if any(gid in self.eagle_group_ids for gid in group_ids)
+        }
+
+    def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
+        # Cache hits in this coordinator are always a multiple of
+        # ``lcm_block_size`` tokens (see ``find_longest_cache_hit``). Within an
+        # aligned region, SWA groups only consult a subset of blocks per
+        # ``lcm_block_size``-segment so the unused blocks also stay out of the
+        # prefix-cache hash map.
+        num_computed_tokens = (
+            num_computed_tokens // self.lcm_block_size * self.lcm_block_size
+        )
+        for manager in self.single_type_managers:
+            manager.cache_blocks(
+                request,
+                num_computed_tokens,
+                alignment_tokens=self.lcm_block_size,
+            )
+
     def find_longest_cache_hit(
         self,
         block_hashes: list[BlockHash],
@@ -485,49 +535,62 @@ def _get_block_hashes(kv_cache_spec: KVCacheSpec) -> BlockHashList:
         hit_blocks_by_group: list[list[KVCacheBlock] | None] = [None] * num_groups
 
         # Simple hybrid (1 full attn + 1 other): one iteration suffices.
-        # Full attn is always first if it exists. This avoids EAGLE drops
-        # being applied multiple times to non-full-attn groups.
-        # FIXME (yifan): However, for complex hybrid models with multiple attn
-        # groups, we still have the EAGLE spiral block dropping problem. See
-        # discussion in issue https://github.com/vllm-project/vllm/issues/32802.
+        # Full attn is always first if it exists.
         is_simple_hybrid = len(self.attention_groups) == 2 and isinstance(
             self.attention_groups[0][0], FullAttentionSpec
         )
 
+        # Attention-group indices whose EAGLE drop is verified at the current
+        # ``curr_hit_length``. Each eagle group applies the drop at most once
+        # per candidate length (see issue #32802).
+        eagle_verified: set[int] = set()
+
         while True:
             curr_hit_length = hit_length
 
-            for spec, group_ids, manager_cls in self.attention_groups:
-                is_full_attn = isinstance(spec, FullAttentionSpec)
-
-                # Full attention: reuse cached blocks (downward-closed property)
+            for idx, (spec, group_ids, manager_cls) in enumerate(self.attention_groups):
                 cached_blocks = hit_blocks_by_group[group_ids[0]]
-                if is_full_attn and cached_blocks is not None:
-                    # For full attention, we only need to compute the cache hit
-                    # length once. Starting from the second iteration, if the
-                    # curr_hit_length is reduced by other groups, we can simply
-                    # keep the first (curr_hit_length // block_size) blocks from
-                    # the last iteration.
-                    num_blocks = curr_hit_length // spec.block_size
-                    curr_hit_length = num_blocks * spec.block_size
-                else:
-                    hit_blocks = manager_cls.find_longest_cache_hit(
-                        block_hashes=_get_block_hashes(spec),
-                        max_length=curr_hit_length,
-                        kv_cache_group_ids=group_ids,
-                        block_pool=self.block_pool,
-                        kv_cache_spec=spec,
-                        use_eagle=self.use_eagle,
-                        alignment_tokens=self.lcm_block_size,
+                if isinstance(spec, FullAttentionSpec) and cached_blocks is not None:
+                    # Full attention is downward-closed: we only need to look
+                    # up cached blocks once; on subsequent iterations just trim
+                    # to the (reduced) current hit length.
+                    curr_hit_length = (
+                        curr_hit_length // spec.block_size * spec.block_size
                     )
-                    curr_hit_length = len(hit_blocks[0]) * spec.block_size
-                    for group_id, blocks in zip(group_ids, hit_blocks):
-                        hit_blocks_by_group[group_id] = blocks
+                    continue
+
+                use_eagle = (
+                    idx in self.eagle_attn_group_indices and idx not in eagle_verified
+                )
+
+                _max_length = curr_hit_length
+                if use_eagle:
+                    # Eagle needs to match one more block and then pop the last.
+                    _max_length = min(
+                        curr_hit_length + spec.block_size, max_cache_hit_length
+                    )
+                hit_blocks = manager_cls.find_longest_cache_hit(
+                    block_hashes=_get_block_hashes(spec),
+                    max_length=_max_length,
+                    kv_cache_group_ids=group_ids,
+                    block_pool=self.block_pool,
+                    kv_cache_spec=spec,
+                    use_eagle=use_eagle,
+                    alignment_tokens=self.lcm_block_size,
+                )
+                _new_hit_length = len(hit_blocks[0]) * spec.block_size
+                if use_eagle:
+                    eagle_verified.add(idx)
+                elif _new_hit_length < curr_hit_length:
+                    # length shrunk; invalidate previous eagle verifications
+                    eagle_verified.clear()
+                curr_hit_length = _new_hit_length
+                for group_id, blocks in zip(group_ids, hit_blocks):
+                    hit_blocks_by_group[group_id] = blocks
 
             if curr_hit_length >= hit_length:
                 break
             hit_length = curr_hit_length
-            # Simple hybrid: exit after one iteration
             if is_simple_hybrid:
                 break
 
@@ -547,6 +610,7 @@ def _get_block_hashes(kv_cache_spec: KVCacheSpec) -> BlockHashList:
 def get_kv_cache_coordinator(
     kv_cache_config: KVCacheConfig,
     max_model_len: int,
+    max_num_batched_tokens: int,
     use_eagle: bool,
     enable_caching: bool,
     enable_kv_cache_events: bool,
@@ -559,6 +623,7 @@ def get_kv_cache_coordinator(
         return KVCacheCoordinatorNoPrefixCache(
             kv_cache_config,
             max_model_len,
+            max_num_batched_tokens,
             use_eagle,
             enable_kv_cache_events,
             dcp_world_size=dcp_world_size,
@@ -570,6 +635,7 @@ def get_kv_cache_coordinator(
         return UnitaryKVCacheCoordinator(
             kv_cache_config,
             max_model_len,
+            max_num_batched_tokens,
             use_eagle,
             enable_caching,
             enable_kv_cache_events,
@@ -581,6 +647,7 @@ def get_kv_cache_coordinator(
     return HybridKVCacheCoordinator(
         kv_cache_config,
         max_model_len,
+        max_num_batched_tokens,
         use_eagle,
         enable_caching,
         enable_kv_cache_events,
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index dcec5e05bf97..9359d8843a91 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -6,12 +6,16 @@
 from dataclasses import dataclass
 from typing import Literal, overload
 
-from vllm.distributed.kv_events import KVCacheEvent
+from vllm.distributed.kv_events import BlockStored, KVCacheEvent
 from vllm.logger import init_logger
 from vllm.v1.core.kv_cache_coordinator import get_kv_cache_coordinator
 from vllm.v1.core.kv_cache_metrics import KVCacheMetricsCollector
 from vllm.v1.core.kv_cache_utils import KVCacheBlock
-from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.kv_cache_interface import (
+    KVCacheConfig,
+    get_kv_cache_spec_kind,
+    get_kv_cache_spec_sliding_window,
+)
 from vllm.v1.metrics.stats import PrefixCacheStats
 from vllm.v1.request import Request
 
@@ -109,6 +113,7 @@ def __init__(
         kv_cache_config: KVCacheConfig,
         max_model_len: int,
         hash_block_size: int,
+        max_num_batched_tokens: int | None = None,
         enable_caching: bool = True,
         use_eagle: bool = False,
         log_stats: bool = False,
@@ -118,6 +123,11 @@ def __init__(
         metrics_collector: KVCacheMetricsCollector | None = None,
     ) -> None:
         self.max_model_len = max_model_len
+        # When unset, fall back to `max_model_len` so the recycling-aware cap
+        # collapses to the prior (uncapped) admission behavior. The scheduler
+        # always supplies the real value at runtime.
+        if max_num_batched_tokens is None:
+            max_num_batched_tokens = max_model_len
 
         self.enable_caching = enable_caching
         self.use_eagle = use_eagle
@@ -131,6 +141,7 @@ def __init__(
         self.coordinator = get_kv_cache_coordinator(
             kv_cache_config=kv_cache_config,
             max_model_len=self.max_model_len,
+            max_num_batched_tokens=max_num_batched_tokens,
             use_eagle=self.use_eagle,
             enable_caching=self.enable_caching,
             enable_kv_cache_events=enable_kv_cache_events,
@@ -142,6 +153,13 @@ def __init__(
         self.num_kv_cache_groups = len(kv_cache_config.kv_cache_groups)
         self.block_pool = self.coordinator.block_pool
         self.kv_cache_config = kv_cache_config
+        self.kv_cache_event_metadata = tuple(
+            (
+                get_kv_cache_spec_kind(group.kv_cache_spec).value,
+                get_kv_cache_spec_sliding_window(group.kv_cache_spec),
+            )
+            for group in kv_cache_config.kv_cache_groups
+        )
 
         # Pre-constructed KVCacheBlocks with no blocks, callers should use this
         # via create_kv_cache_blocks instead of creating new ones to avoid GC
@@ -215,45 +233,6 @@ def get_computed_blocks(self, request: Request) -> tuple[KVCacheBlocks, int]:
 
         return self.create_kv_cache_blocks(computed_blocks), num_new_computed_tokens
 
-    def can_fit_full_sequence(
-        self,
-        request: Request,
-        num_new_computed_tokens: int = 0,
-        new_computed_blocks: KVCacheBlocks | None = None,
-        num_external_computed_tokens: int = 0,
-        num_encoder_tokens: int = 0,
-    ) -> bool:
-        """Check if the KV cache has enough free blocks to hold the full
-        sequence, accounting for prefix cache hits and sliding window.
-
-        This is used as an admission gate to prevent over-admitting requests
-        when chunked prefill would otherwise only check the first chunk.
-        """
-        if new_computed_blocks is not None:
-            new_computed_block_list = new_computed_blocks.blocks
-        else:
-            new_computed_block_list = self.empty_kv_cache_blocks.blocks
-
-        num_local_computed_tokens = (
-            request.num_computed_tokens + num_new_computed_tokens
-        )
-        total_computed_tokens = min(
-            num_local_computed_tokens + num_external_computed_tokens,
-            self.max_model_len,
-        )
-        full_num_tokens = min(request.num_tokens, self.max_model_len)
-
-        num_blocks_to_allocate = self.coordinator.get_num_blocks_to_allocate(
-            request_id=request.request_id,
-            num_tokens=full_num_tokens,
-            new_computed_blocks=new_computed_block_list,
-            num_encoder_tokens=num_encoder_tokens,
-            total_computed_tokens=total_computed_tokens,
-            num_tokens_main_model=full_num_tokens,
-        )
-
-        return num_blocks_to_allocate <= self.block_pool.get_num_free_blocks()
-
     def allocate_slots(
         self,
         request: Request,
@@ -264,6 +243,7 @@ def allocate_slots(
         num_external_computed_tokens: int = 0,
         delay_cache_blocks: bool = False,
         num_encoder_tokens: int = 0,
+        full_sequence_must_fit: bool = False,
     ) -> KVCacheBlocks | None:
         """Add slots for a request with new tokens to append.
 
@@ -285,6 +265,10 @@ def allocate_slots(
             num_encoder_tokens: The number of encoder tokens to allocate for
                 cross-attention in encoder-decoder models(e.g., Whisper).
                 For decoder-only models, this should be 0.
+            full_sequence_must_fit: Only allocate blocks if the KV cache has enough
+                free blocks to hold the full sequence, accounting for prefix cache hits
+                and sliding window. Used as an admission gate to prevent over-admitting
+                requests when chunked prefill would otherwise only check the first chunk
 
         Blocks layout:
         ```
@@ -358,10 +342,26 @@ def allocate_slots(
             num_local_computed_tokens + num_external_computed_tokens,
             self.max_model_len,
         )
+
+        if full_sequence_must_fit:
+            # First check and fail if the full request sequence won't fit.
+            full_num_tokens = min(request.num_tokens, self.max_model_len)
+
+            num_blocks_to_allocate = self.coordinator.get_num_blocks_to_allocate(
+                request_id=request.request_id,
+                num_tokens=full_num_tokens,
+                new_computed_blocks=new_computed_block_list,
+                num_encoder_tokens=num_encoder_tokens,
+                total_computed_tokens=total_computed_tokens,
+                num_tokens_main_model=full_num_tokens,
+                apply_admission_cap=True,
+            )
+            if num_blocks_to_allocate > self.block_pool.get_num_free_blocks():
+                return None
+
         num_tokens_main_model = total_computed_tokens + num_new_tokens
         num_tokens_need_slot = min(
-            num_tokens_main_model + num_lookahead_tokens,
-            self.max_model_len,
+            num_tokens_main_model + num_lookahead_tokens, self.max_model_len
         )
 
         # Free the blocks that are skipped during the attention computation
@@ -513,7 +513,25 @@ def take_events(self) -> list[KVCacheEvent]:
         Returns:
             A list of KV cache events.
         """
-        return self.block_pool.take_events()
+        events = self.block_pool.take_events()
+        for event in events:
+            if not isinstance(event, BlockStored):
+                continue
+            if event.group_idx is None:
+                continue
+            if event.group_idx < 0 or event.group_idx >= len(
+                self.kv_cache_event_metadata
+            ):
+                logger.warning(
+                    "Group index `%s` not in KV cache metadata", event.group_idx
+                )
+                continue
+            # Annotate here so BlockPool can keep emitting structural cache
+            # events without owning semantic KV cache spec metadata.
+            kind, sliding_window = self.kv_cache_event_metadata[event.group_idx]
+            event.kv_cache_spec_kind = kind
+            event.kv_cache_spec_sliding_window = sliding_window
+        return events
 
     def get_blocks(self, request_id: str) -> KVCacheBlocks:
         """Get the blocks of a request."""
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 9ab5af0f6fb0..7f3a5e4fdf3f 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -4,26 +4,32 @@
 
 import copy
 import hashlib
+import math
 import os
 from collections import defaultdict
 from collections.abc import Callable, Iterable, Iterator, Sequence
 from dataclasses import dataclass, replace
 from functools import partial
-from typing import Any, NewType, TypeAlias, overload
+from typing import Any, NewType, TypeAlias, cast, overload
 
 from vllm import envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.utils.hashing import sha256_cbor, xxhash_cbor
-from vllm.utils.math_utils import cdiv
+from vllm.utils.math_utils import cdiv, round_up
 from vllm.utils.mem_utils import format_gib
+from vllm.utils.torch_utils import get_dtype_size
 from vllm.v1.kv_cache_interface import (
     ChunkedLocalAttentionSpec,
     FullAttentionSpec,
+    HiddenStateCacheSpec,
     KVCacheConfig,
     KVCacheGroupSpec,
     KVCacheSpec,
     KVCacheTensor,
+    MambaSpec,
+    MLAAttentionSpec,
+    SlidingWindowMLASpec,
     SlidingWindowSpec,
     UniformTypeKVCacheSpecs,
 )
@@ -562,6 +568,72 @@ def hash_block_tokens(
     )
 
 
+def resolve_kv_cache_block_sizes(
+    kv_cache_config: KVCacheConfig,
+    vllm_config: VllmConfig,
+) -> tuple[int, int]:
+    """Resolve (scheduler_block_size, hash_block_size).
+
+    - ``scheduler_block_size`` is the token-alignment invariant used by the
+      scheduler (e.g. for ``num_computed_tokens`` rounding). Single group:
+      ``cache_config.block_size * dcp * pcp``. Multiple groups: LCM of every
+      group's block size — context parallelism is not supported here.
+    - ``hash_block_size`` is the granularity at which ``Request.block_hashes``
+      is computed. Single group: equals scheduler block size. Multiple groups:
+      ``cache_config.hash_block_size`` override if set, else the GCD of group
+      block sizes; every group's block size must be divisible by it. Returns
+      the scheduler block size (i.e. disables finer hashing) if block hashing
+      is inactive or a mamba group's block size diverges from the cache
+      block size (mamba_cache_mode != "align").
+    """
+    cache_config = vllm_config.cache_config
+    dcp = vllm_config.parallel_config.decode_context_parallel_size
+    pcp = vllm_config.parallel_config.prefill_context_parallel_size
+    groups = kv_cache_config.kv_cache_groups
+
+    if len(groups) <= 1:  # Single group: block_size * dcp * pcp
+        bs = cache_config.block_size * dcp * pcp
+        return bs, bs
+
+    if dcp != 1 or pcp != 1:
+        raise ValueError(
+            "Hybrid KV cache groups with multiple block sizes do not "
+            "support context parallelism (dcp_world_size/pcp_world_size > 1)."
+        )
+
+    group_block_sizes = [g.kv_cache_spec.block_size for g in groups]
+    scheduler_block_size = math.lcm(*group_block_sizes)
+
+    # Block hashes are only consumed by prefix caching and KV connectors
+    # (P/D, offloading); when neither is active, keep hash_block_size equal
+    # to the scheduler block size.
+    connector_enabled = vllm_config.kv_transfer_config is not None
+    if not (cache_config.enable_prefix_caching or connector_enabled):
+        return scheduler_block_size, scheduler_block_size
+
+    # Mamba groups with block_size != cache_config.block_size
+    # (mamba_cache_mode != "align") break divisibility; back off to the
+    # scheduler block size.
+    if any(
+        isinstance(g.kv_cache_spec, MambaSpec)
+        and g.kv_cache_spec.block_size != cache_config.block_size
+        for g in groups
+    ):
+        return scheduler_block_size, scheduler_block_size
+
+    requested = cache_config.hash_block_size
+    hash_block_size = (
+        requested if requested is not None else math.gcd(*group_block_sizes)
+    )
+    if any(bs % hash_block_size != 0 for bs in group_block_sizes):
+        raise ValueError(
+            f"Invalid hash_block_size={hash_block_size}; all KV cache group "
+            f"block sizes must be divisible by hash_block_size. "
+            f"Got group block sizes={group_block_sizes}."
+        )
+    return scheduler_block_size, hash_block_size
+
+
 def get_request_block_hasher(
     block_size: int,
     caching_hash_fn: Callable[[Any], bytes],
@@ -625,7 +697,9 @@ def _check_enough_kv_cache_memory(
     if available_memory <= 0:
         raise ValueError(
             "No available memory for the cache blocks. "
-            "Try increasing `gpu_memory_utilization` when initializing the engine. "
+            "Try increasing `gpu_memory_utilization` when initializing the engine "
+            "(this flag also controls CPU memory reservation on the CPU "
+            "backend, despite its name). "
             "See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ "
             "for more details."
         )
@@ -642,11 +716,12 @@ def _check_enough_kv_cache_memory(
             )
 
         raise ValueError(
-            f"To serve at least one request with the models's max seq len "
+            f"To serve at least one request with the model's max seq len "
             f"({max_model_len}), ({format_gib(needed_memory)} GiB KV "
             f"cache is needed, which is larger than the available KV cache "
             f"memory ({format_gib(available_memory)} GiB). {estimated_msg}"
-            f"Try increasing `gpu_memory_utilization` or decreasing `max_model_len` "
+            f"Try increasing `gpu_memory_utilization` (which also controls "
+            f"CPU memory on the CPU backend) or decreasing `max_model_len` "
             f"when initializing the engine. "
             f"See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ "
             f"for more details."
@@ -823,21 +898,45 @@ def get_max_concurrency_for_kv_cache_config(
 def may_override_num_blocks(vllm_config: VllmConfig, num_blocks: int) -> int:
     """
     Override the number of kv cache blocks if `num_gpu_blocks_override` is set.
+    The override is logged once, at the call site in `get_kv_cache_configs`.
     """
     if vllm_config.cache_config.num_gpu_blocks_override is not None:
-        num_gpu_blocks_override = vllm_config.cache_config.num_gpu_blocks_override
-        logger.info(
-            "Overriding num_gpu_blocks=%d with num_gpu_blocks_override=%d",
-            num_blocks,
-            num_gpu_blocks_override,
-        )
-        num_blocks = num_gpu_blocks_override
-
+        num_blocks = vllm_config.cache_config.num_gpu_blocks_override
     return num_blocks
 
 
+def _pool_bytes_per_block(kv_cache_groups: list[KVCacheGroupSpec]) -> int:
+    """
+    Bytes consumed by one block in the worker's shared KV cache pool, mirroring
+    the divisor used by `get_kv_cache_config_from_groups` to convert
+    `available_memory` into `num_blocks`. Used to compute the effective KV cache
+    capacity once `num_gpu_blocks_override` is applied.
+    """
+    if len(kv_cache_groups) == 1 and isinstance(
+        kv_cache_groups[0].kv_cache_spec, UniformTypeKVCacheSpecs
+    ):
+        return kv_cache_groups[0].kv_cache_spec.page_size_bytes
+    if all(
+        isinstance(g.kv_cache_spec, UniformTypeKVCacheSpecs) for g in kv_cache_groups
+    ):
+        # DeepseekV4: shared layout sized by the largest per-page-size bucket.
+        full_mla_spec = cast(UniformTypeKVCacheSpecs, kv_cache_groups[0].kv_cache_spec)
+        layer_tuple_page_bytes = sum(full_mla_spec.get_page_sizes())
+        num_layer_tuples = max(
+            cast(UniformTypeKVCacheSpecs, g.kv_cache_spec).get_num_layer_tuples()
+            for g in kv_cache_groups
+        )
+        return layer_tuple_page_bytes * num_layer_tuples
+    group_size = max(len(g.layer_names) for g in kv_cache_groups)
+    page_size = get_uniform_page_size([g.kv_cache_spec for g in kv_cache_groups])
+    return page_size * group_size
+
+
 def get_num_blocks(
-    vllm_config: VllmConfig, num_layers: int, available_memory: int, page_size: int
+    vllm_config: VllmConfig,
+    num_layers: int,
+    available_memory: int,
+    page_size: int,
 ) -> int:
     """
     Get the number of kv cache blocks.
@@ -850,8 +949,7 @@ def get_num_blocks(
     """
     num_blocks = int(available_memory // page_size // num_layers)
     num_blocks = max(num_blocks, 0)
-    num_blocks = may_override_num_blocks(vllm_config, num_blocks)
-    return num_blocks
+    return may_override_num_blocks(vllm_config, num_blocks)
 
 
 def get_uniform_page_size(kv_cache_specs: Iterable[KVCacheSpec]) -> int:
@@ -1078,6 +1176,63 @@ def _get_kv_cache_groups_uniform_page_size(
     return create_kv_cache_group_specs(kv_cache_spec, grouped_layers)
 
 
+def _get_kv_cache_config_deepseek_v4(
+    vllm_config: VllmConfig,
+    kv_cache_groups: list[KVCacheGroupSpec],
+    available_memory: int,
+) -> tuple[int, list[KVCacheTensor]]:
+    """DeepseekV4 KV cache tensor layout planning.
+
+    Precondition: kv_cache_groups[0] is the full-MLA group; its page sizes
+    define the canonical bucket set. Non-full-MLA groups must have been
+    page_size-padded upstream (see _get_kv_cache_groups_uniform_groups) so
+    every layer's page_size matches one of the full-MLA bucket sizes.
+
+    For each group, bucket its layers by page_size_bytes and place each
+    layer at tuple_idx = position-within-bucket. Emit one KVCacheTensor
+    per (tuple_idx, bucket) whose shared_by is the union of per-group
+    layers at that slot.
+    """
+    full_mla_spec = kv_cache_groups[0].kv_cache_spec
+    assert isinstance(full_mla_spec, UniformTypeKVCacheSpecs)
+    page_sizes = sorted(full_mla_spec.get_page_sizes())
+    layer_tuple_page_bytes = sum(page_sizes)
+
+    # Pre-bucket each group's layers by page_size (registration order within
+    # bucket). bucketed[g_idx][page_size] = [layer_name, ...].
+    bucketed: list[dict[int, list[str]]] = []
+    for group in kv_cache_groups:
+        assert isinstance(group.kv_cache_spec, UniformTypeKVCacheSpecs)
+        specs = group.kv_cache_spec.kv_cache_specs
+        b: dict[int, list[str]] = defaultdict(list)
+        for name in group.layer_names:
+            b[specs[name].page_size_bytes].append(name)
+        bucketed.append(b)
+
+    # num_layer_tuples = longest bucket list across all groups. For the
+    # full-MLA group this equals the count of layers in the largest
+    # per-page-size bucket (= get_num_layer_tuples()); for SWA sub-groups
+    # this equals the sub-group size (each has a single page_size).
+    num_layer_tuples = max(len(layers) for b in bucketed for layers in b.values())
+
+    num_blocks = available_memory // (layer_tuple_page_bytes * num_layer_tuples)
+    num_blocks = may_override_num_blocks(vllm_config, num_blocks)
+
+    kv_cache_tensors: list[KVCacheTensor] = []
+    for tuple_idx in range(num_layer_tuples):
+        for ps in page_sizes:
+            shared_by: list[str] = []
+            for b in bucketed:
+                bucket = b.get(ps)
+                if bucket is not None and tuple_idx < len(bucket):
+                    shared_by.append(bucket[tuple_idx])
+            kv_cache_tensors.append(
+                KVCacheTensor(size=ps * num_blocks, shared_by=shared_by)
+            )
+
+    return num_blocks, kv_cache_tensors
+
+
 def get_kv_cache_config_from_groups(
     vllm_config: VllmConfig,
     kv_cache_groups: list[KVCacheGroupSpec],
@@ -1108,7 +1263,7 @@ def get_kv_cache_config_from_groups(
         kv_cache_groups[0].kv_cache_spec, UniformTypeKVCacheSpecs
     ):
         # Special case: all layers have the same type of KV cache but with
-        # different hidden size. Allocate different amount of memory for each
+        # different hidden sizes. Allocate different amount of memory for each
         # layer based on its hidden size.
         num_blocks = (
             available_memory // kv_cache_groups[0].kv_cache_spec.page_size_bytes
@@ -1122,6 +1277,15 @@ def get_kv_cache_config_from_groups(
             )
             for layer_name in kv_cache_groups[0].layer_names
         ]
+    elif all(
+        isinstance(group.kv_cache_spec, UniformTypeKVCacheSpecs)
+        for group in kv_cache_groups
+    ):
+        # DeepseekV4: UniformTypeKVCacheSpecs but multiple groups.
+        # Delegate to the DeepseekV4-specific allocator.
+        num_blocks, kv_cache_tensors = _get_kv_cache_config_deepseek_v4(
+            vllm_config, kv_cache_groups, available_memory
+        )
     else:
         # General case:
         # We will have group_size memory pools, each is shared by one layer from
@@ -1188,14 +1352,48 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
     has_chunked_local_attention = any(
         isinstance(spec, ChunkedLocalAttentionSpec) for spec in kv_cache_spec.values()
     )
+    has_swa_mla = any(
+        isinstance(spec, SlidingWindowMLASpec) for spec in kv_cache_spec.values()
+    )
+
+    uniform_block_size: int | None = None
+    if has_swa_mla:
+        # For DeepseekV4, block sizes can be different for different KV cache groups.
+        # E.g., Full MLA: 256; SWA MLA: 64; C4 partial states: 4, C128 states: 8.
+        assert has_full_attention
+        any_full_spec = next(
+            iter(
+                spec
+                for spec in kv_cache_spec.values()
+                if isinstance(spec, FullAttentionSpec)
+            )
+        )
+        uniform_block_size = any_full_spec.block_size
+
     if has_full_attention and (has_sliding_window or has_chunked_local_attention):
         for layer_name, spec in kv_cache_spec.items():
-            if isinstance(spec, SlidingWindowSpec):
+            if isinstance(spec, SlidingWindowMLASpec):
+                kv_cache_spec[layer_name] = MLAAttentionSpec(
+                    block_size=uniform_block_size
+                    if uniform_block_size is not None
+                    else spec.block_size,
+                    num_kv_heads=spec.num_kv_heads,
+                    head_size=spec.head_size,
+                    dtype=spec.dtype,
+                    page_size_padded=spec.page_size_padded,
+                    cache_dtype_str=spec.cache_dtype_str,
+                    alignment=spec.alignment,
+                    compress_ratio=spec.compress_ratio,
+                    model_version=spec.model_version,
+                )
+            elif isinstance(spec, SlidingWindowSpec):
                 kv_cache_spec[layer_name] = FullAttentionSpec(
                     block_size=spec.block_size,
                     num_kv_heads=spec.num_kv_heads,
                     head_size=spec.head_size,
+                    head_size_v=spec.head_size_v,
                     dtype=spec.dtype,
+                    kv_quant_mode=spec.kv_quant_mode,
                     sliding_window=spec.sliding_window,
                     page_size_padded=spec.page_size_padded,
                 )
@@ -1219,6 +1417,204 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
         )
 
 
+def group_and_unify_kv_cache_specs(
+    kv_cache_spec: dict[str, KVCacheSpec],
+) -> list[UniformTypeKVCacheSpecs] | None:
+    """
+    Group the KV cache specs and unify each group into one UniformTypeKVCacheSpecs.
+    Currently, this is only used for DeepseekV4.
+    """
+    if not any(
+        isinstance(spec, SlidingWindowMLASpec) for spec in kv_cache_spec.values()
+    ):
+        return None
+
+    mla_specs: dict[str, KVCacheSpec] = {}
+    grouped_swa_mla_specs: dict[tuple[int, int], dict[str, KVCacheSpec]] = defaultdict(
+        dict
+    )
+    # NOTE: Here we group SWA layers by (block_size, sliding_window), which separates
+    # SWA layers, C4I+C4A layers, and C128A layers into three different groups. It can
+    # be fragile with only block_size and sliding_window as keys, but fine for now.
+    for name, spec in kv_cache_spec.items():
+        if isinstance(spec, SlidingWindowMLASpec):
+            grouped_swa_mla_specs[(spec.block_size, spec.sliding_window)][name] = spec
+        elif isinstance(spec, MLAAttentionSpec):
+            mla_specs[name] = spec
+
+    assert len(mla_specs) > 0
+    mla_uniform_spec = UniformTypeKVCacheSpecs.from_specs(mla_specs)
+    assert mla_uniform_spec is not None
+
+    swa_uniform_specs: list[UniformTypeKVCacheSpecs] = []
+    for spec_dict in grouped_swa_mla_specs.values():
+        uniform_spec = UniformTypeKVCacheSpecs.from_specs(spec_dict)
+        assert uniform_spec is not None
+        swa_uniform_specs.append(uniform_spec)
+
+    return [mla_uniform_spec, *swa_uniform_specs]
+
+
+def _approximate_gcd(values: Sequence[int], *, lower_bound: int | None = None) -> int:
+    """Pick a chunk size that minimizes total upward padding.
+
+    Each x is rounded up to a multiple of d:
+
+      x -> ceil(x / d) * d
+
+    Total padding is:
+
+      pad(d) = sum_i (ceil(x_i / d) * d - x_i)
+
+    We brute-force d in [lower_bound, max(values)] (fine for small lists / small
+    maxima) and return the d with minimum padding. Ties prefer larger d.
+    """
+    if not values:
+        raise ValueError("values must be non-empty")
+    if any(x <= 0 for x in values):
+        raise ValueError(f"values must be positive, got: {list(values)!r}")
+
+    min_d = max(1, lower_bound if lower_bound is not None else 1)
+    max_d = max(values)
+    if min_d > max_d:
+        return min_d
+
+    best_d = min_d
+    best_pad: int | None = None
+    for d in range(min_d, max_d + 1):
+        pad = sum((d - (x % d)) % d for x in values)
+        if best_pad is None or pad < best_pad or (pad == best_pad and d > best_d):
+            best_pad = pad
+            best_d = d
+
+    return best_d
+
+
+def _get_kv_cache_groups_uniform_groups(
+    grouped_specs: list[UniformTypeKVCacheSpecs],
+) -> list[KVCacheGroupSpec]:
+    """
+    Generate the KV cache groups from the grouped specs.
+    """
+    assert len(grouped_specs) > 0 and all(
+        isinstance(spec, UniformTypeKVCacheSpecs) for spec in grouped_specs
+    )
+    # For now, we restrict the first grouped_spec to be UniformTypeKVCacheSpecs
+    # containing only MLAAttentionSpec.
+    full_mla_spec = grouped_specs[0]
+    assert all(
+        isinstance(spec, MLAAttentionSpec)
+        for spec in full_mla_spec.kv_cache_specs.values()
+    )
+    full_mla_group = KVCacheGroupSpec(
+        layer_names=list(full_mla_spec.kv_cache_specs.keys()),
+        kv_cache_spec=full_mla_spec,
+    )
+
+    # We define a layer tuple as a group of layers with different page sizes, and
+    # one UniformTypeKVCacheSpecs contains a list of layer tuples.
+    # For example, if we have 11 C4 layers and 10 C128 layers, we can define a layer
+    # tuple as [C4I, C4A, C128], and the full_mla_group will contain "11" layer tuples.
+    # The other uniform KV cache specs will be similarly partitioned into layer tuples.
+    # Say we have 21 SWA layers, all with the same page size, then we will have "21"
+    # layer tuples.
+    num_layer_tuples_per_group: list[int] = [
+        g_spec.get_num_layer_tuples() for g_spec in grouped_specs
+    ]
+    # Choose `num_layer_tuples` to minimize total padding across groups.
+    num_layer_tuples = _approximate_gcd(
+        num_layer_tuples_per_group, lower_bound=num_layer_tuples_per_group[0]
+    )
+    # Round up to the nearest multiple of `num_layer_tuples` (i.e., padding)
+    num_layer_tuples_per_group = [
+        round_up(x, num_layer_tuples) for x in num_layer_tuples_per_group
+    ]
+
+    swa_mla_specs = grouped_specs[1:]
+    assert all(
+        isinstance(spec, SlidingWindowMLASpec)
+        for group in swa_mla_specs
+        for spec in group.kv_cache_specs.values()
+    )
+
+    # Split each SWA UniformKV group into smaller groups to align their #(layer tuples)
+    # Possibly padding layer tuples for this.
+    # Additionally, we also pad KV blocks in each SWA layer, to align the page size
+    # with the corresponding layer in the full-MLA group.
+    all_page_sizes = full_mla_spec.get_page_sizes()
+    swa_mla_groups = []
+    for sm_spec in swa_mla_specs:
+        sm_page_sizes = sm_spec.get_page_sizes()
+        layers_per_size: dict[int, list[str]] = defaultdict(list)
+        assert max(sm_page_sizes) <= max(all_page_sizes)
+
+        # Unify page size by padding layers' page_size to the nearest larger page_size.
+        # Compute candidate (nearest larger page_size) for each unique page size.
+        size_to_candidate: dict[int, int] = {}
+        for ps in sm_page_sizes:
+            size_to_candidate[ps] = min(x for x in all_page_sizes if x >= ps)
+        # Pad and collect layer names per page size.
+        for layer_name, layer_spec in sm_spec.kv_cache_specs.items():
+            current_size = layer_spec.page_size_bytes
+            candidate = size_to_candidate[current_size]
+            if current_size < candidate:
+                object.__setattr__(layer_spec, "page_size_padded", candidate)
+            layers_per_size[candidate].append(layer_name)
+        # NOTE(yifan): for now, inside a UniformKV group, each page_size should
+        # have the same number of layers. This also means we don't need to pad layers
+        # inside a partial-full layer tuple.
+        assert len(set(len(layers) for layers in layers_per_size.values())) == 1
+        num_layers_per_size = len(next(iter(layers_per_size.values())))
+
+        # Split layers inside each UniformKV group for aligned #(layers).
+        # See `_get_kv_cache_groups_uniform_page_size` for more details.
+        num_tuple_groups = cdiv(num_layers_per_size, num_layer_tuples)
+        layer_tuples = list(zip(*layers_per_size.values()))
+        for i in range(num_tuple_groups):
+            group_layer_tuples = layer_tuples[i::num_tuple_groups]
+            # Flatten tuples and build dict for from_specs
+            group_layer_names = [
+                name for layer_tuple in group_layer_tuples for name in layer_tuple
+            ]
+            group_layer_specs = {
+                name: sm_spec.kv_cache_specs[name] for name in group_layer_names
+            }
+            sub_sm_spec = UniformTypeKVCacheSpecs.from_specs(group_layer_specs)
+            assert sub_sm_spec is not None
+            swa_mla_groups.append(
+                KVCacheGroupSpec(
+                    layer_names=group_layer_names,
+                    kv_cache_spec=sub_sm_spec,
+                )
+            )
+
+    return [full_mla_group, *swa_mla_groups]
+
+
+def _annotate_eagle_groups_deepseek_v4(
+    vllm_config: VllmConfig,
+    kv_cache_spec: dict[str, KVCacheSpec],
+    kv_cache_groups: list[KVCacheGroupSpec],
+) -> None:
+    spec_config = vllm_config.speculative_config
+    if spec_config is None or not spec_config.use_eagle():
+        return
+    # Detection uses the merged MLA spec's model_version.
+    if not any(
+        getattr(spec, "model_version", None) == "deepseek_v4"
+        for spec in kv_cache_spec.values()
+    ):
+        return
+    # DeepseekV4's MTP attention layer is always the last layer, and we flag whichever
+    # group contains it.
+    # FIXME(yifan): avoid/generalize this hacky check.
+    last_layer = next(reversed(kv_cache_spec))
+    for group in kv_cache_groups:
+        if last_layer in group.layer_names:
+            group.is_eagle_group = True
+            break
+
+
 def get_kv_cache_groups(
     vllm_config: VllmConfig, kv_cache_spec: dict[str, KVCacheSpec]
 ) -> list[KVCacheGroupSpec]:
@@ -1250,16 +1646,42 @@ def get_kv_cache_groups(
         # full attention, or all layers are sliding window attention with the
         # same window size). Put all layers into one group.
         return _get_kv_cache_groups_uniform_type(uniform_spec)
+    elif grouped_specs := group_and_unify_kv_cache_specs(kv_cache_spec):
+        # DeepseekV4 case: All layers need the same number of token slots,
+        # yet some layers are full attention while others are sliding window
+        # attention in different sizes. Need to group layers into multiple
+        # UniformTypeKVCacheSpecs.
+        kv_cache_groups = _get_kv_cache_groups_uniform_groups(grouped_specs)
+        _annotate_eagle_groups_deepseek_v4(vllm_config, kv_cache_spec, kv_cache_groups)
+        return kv_cache_groups
+
+    # Pull HiddenStateCacheSpec layers out before the general multi-group
+    # path so they don't affect page-size unification or grouping.
+    hidden_specs = {
+        k: v for k, v in kv_cache_spec.items() if isinstance(v, HiddenStateCacheSpec)
+    }
+    filtered_spec = {
+        k: v
+        for k, v in kv_cache_spec.items()
+        if not isinstance(v, HiddenStateCacheSpec)
+    }
 
     # As KVCacheManager can only allocate memory of one size, we need to unify
     # the page size of the layers. For cases cannot be unified, this function
     # will raise an error.
-    kv_cache_spec = unify_kv_cache_spec_page_size(kv_cache_spec)
-    # Model contains multiple attention types, but KV cache of all layers
-    # have the same physical memory per block per layer. Split the layers
-    # into groups with the same number of layers, and thus same total page
-    # size.
-    return _get_kv_cache_groups_uniform_page_size(kv_cache_spec)
+    filtered_spec = unify_kv_cache_spec_page_size(filtered_spec)
+    groups = _get_kv_cache_groups_uniform_page_size(filtered_spec)
+
+    # Add hidden-state layers back with page aligned to the common page.
+    if hidden_specs:
+        common_page = get_uniform_page_size([g.kv_cache_spec for g in groups])
+        for name, spec in hidden_specs.items():
+            per_token = spec.num_kv_heads * spec.head_size * get_dtype_size(spec.dtype)
+            new_bs = max(common_page // per_token, 1)
+            aligned = replace(spec, block_size=new_bs, page_size_padded=common_page)
+            groups.append(KVCacheGroupSpec([name], aligned))
+
+    return groups
 
 
 def generate_scheduler_kv_cache_config(
@@ -1294,38 +1716,25 @@ def _report_kv_cache_config(
         vllm_config: The global VllmConfig
         kv_cache_config: The resolved KV cache configuration
     """
-    min_block_size = min(
-        [group.kv_cache_spec.block_size for group in kv_cache_config.kv_cache_groups]
-    )
-
-    # Log the KV cache size and maximum concurrency.
-    num_tokens = (
-        kv_cache_config.num_blocks
-        // len(kv_cache_config.kv_cache_groups)
-        * min_block_size
-    )
-    dcp_size = vllm_config.parallel_config.decode_context_parallel_size
-    pcp_size = vllm_config.parallel_config.prefill_context_parallel_size
-    if pcp_size * dcp_size > 1:
-        num_tokens *= pcp_size * dcp_size
-        logger.info(
-            "Multiplying the GPU KV cache size by the cp_world_size %d "
-            "(pcp_world_size %d * dcp_world_size %d).",
-            pcp_size * dcp_size,
-            pcp_size,
-            dcp_size,
-        )
-    num_tokens_str = f"{num_tokens:,}"
-    logger.info_once("GPU KV cache size: %s tokens", num_tokens_str, scope="local")
-    max_model_len_str = f"{vllm_config.model_config.max_model_len:,}"
+    max_model_len = vllm_config.model_config.max_model_len
     max_concurrency = get_max_concurrency_for_kv_cache_config(
         vllm_config, kv_cache_config
     )
+
+    # GPU KV cache size in tokens = max_concurrency * max_model_len: the total
+    # tokens of context the pool can hold at peak utilization. Sourcing this
+    # from the concurrency calculation handles hybrid layouts correctly: SWA /
+    # chunked-local groups have a per-request block count that's capped by
+    # their window, so a naive `num_blocks // num_groups * block_size` formula
+    # underestimates capacity for these models. DCP/PCP sharding is already
+    # accounted for in each spec's `max_memory_usage_bytes`.
+    num_tokens = int(max_concurrency * max_model_len)
+
+    logger.info_once("GPU KV cache size: %s tokens", f"{num_tokens:,}")
     logger.info_once(
         "Maximum concurrency for %s tokens per request: %.2fx",
-        max_model_len_str,
+        f"{max_model_len:,}",
         max_concurrency,
-        scope="local",
     )
 
 
@@ -1343,15 +1752,40 @@ def _max_memory_usage_bytes_from_groups(
     if not kv_cache_groups:
         return 0
 
-    # UniformTypeKVCacheSpecs special case (single group, per-layer specs)
     if len(kv_cache_groups) == 1 and isinstance(
         kv_cache_groups[0].kv_cache_spec, UniformTypeKVCacheSpecs
     ):
+        # UniformTypeKVCacheSpecs special case (single group, per-layer specs)
         per_layer_specs = kv_cache_groups[0].kv_cache_spec.kv_cache_specs
         return sum(
             spec.max_memory_usage_bytes(vllm_config)
             for spec in per_layer_specs.values()
         )
+    elif all(
+        isinstance(group.kv_cache_spec, UniformTypeKVCacheSpecs)
+        for group in kv_cache_groups
+    ):
+        # Special case (only DeepseekV4 for now): all groups are
+        # UniformTypeKVCacheSpecs.
+        # They must already be page_size aligned and share a common padded
+        # layer-tuple layout. Even groups with fewer actual tuples still reserve
+        # the global number of tuple slots in the shared tensor layout.
+        full_mla_spec = cast(UniformTypeKVCacheSpecs, kv_cache_groups[0].kv_cache_spec)
+        layer_tuple_bytes = sum(full_mla_spec.get_page_sizes())
+        num_layer_tuples = max(
+            cast(UniformTypeKVCacheSpecs, group.kv_cache_spec).get_num_layer_tuples()
+            for group in kv_cache_groups
+        )
+
+        total_max_mem_usage_bytes = 0
+        for group in kv_cache_groups:
+            group_spec = cast(UniformTypeKVCacheSpecs, group.kv_cache_spec)
+            g_max_mem_usage_pages = group_spec.max_memory_usage_pages(vllm_config)
+            g_max_mem_usage_page_bytes = (
+                num_layer_tuples * g_max_mem_usage_pages * layer_tuple_bytes
+            )
+            total_max_mem_usage_bytes += g_max_mem_usage_page_bytes
+        return total_max_mem_usage_bytes
 
     # General case: group_size pools, each shared by one layer per group
     # Memory = group_size * page_size * blocks_for_max_len
@@ -1427,7 +1861,6 @@ def _auto_fit_max_model_len(
             "Auto-fit max_model_len: attention-free model, "
             "using derived max_model_len=%d",
             original_max,
-            scope="local",
         )
         return
 
@@ -1454,7 +1887,6 @@ def _auto_fit_max_model_len(
             "Auto-fit max_model_len: full model context length %d fits in "
             "available GPU memory",
             original_max,
-            scope="local",
         )
     else:
         # Need to reduce max_model_len to fit in memory
@@ -1465,7 +1897,6 @@ def _auto_fit_max_model_len(
             original_max,
             auto_fit_max,
             format_gib(limiting_worker_mem),
-            scope="local",
         )
 
 
@@ -1501,7 +1932,13 @@ def _project_kv_cache_groups_to_worker(
                     for layer_name in worker_layer_names
                 },
             )
-        projected_groups.append(KVCacheGroupSpec(worker_layer_names, group_spec))
+        projected_groups.append(
+            KVCacheGroupSpec(
+                worker_layer_names,
+                group_spec,
+                is_eagle_group=group.is_eagle_group and bool(worker_layer_names),
+            )
+        )
     return projected_groups
 
 
@@ -1567,6 +2004,28 @@ def get_kv_cache_configs(
         for worker_spec in kv_cache_specs
     ]
 
+    # If `num_gpu_blocks_override` is set, the cache size that will actually
+    # be allocated is decoupled from the profiled `available_memory`:
+    # `may_override_num_blocks` in `get_kv_cache_config_from_groups` clamps
+    # `num_blocks` to the override. Reflect that in `available_memory` here so
+    # auto-fit, the admission check, and the per-worker config builder all
+    # plan against the same effective capacity.
+    override = vllm_config.cache_config.num_gpu_blocks_override
+    if override is not None:
+        adjusted_memory: list[int] = []
+        for groups, avail_mem in zip(projected_groups_per_worker, available_memory):
+            if not groups:
+                adjusted_memory.append(avail_mem)
+                continue
+            bytes_per_block = _pool_bytes_per_block(groups)
+            logger.info(
+                "Overriding num_gpu_blocks=%d with num_gpu_blocks_override=%d",
+                avail_mem // bytes_per_block,
+                override,
+            )
+            adjusted_memory.append(override * bytes_per_block)
+        available_memory = adjusted_memory
+
     if vllm_config.model_config.original_max_model_len == -1:
         _auto_fit_max_model_len(
             vllm_config, projected_groups_per_worker, available_memory
@@ -1684,10 +2143,7 @@ def __iter__(self) -> Iterator[BlockHash]:
     def _get_value_at(self, idx: int) -> BlockHash:
         base = idx * self.scale_factor
         end = base + self.scale_factor
-        merged_hash: bytes = self.block_hashes[base]
-        for i in range(base + 1, end):
-            merged_hash += self.block_hashes[i]
-        return BlockHash(merged_hash)
+        return BlockHash(b"".join(self.block_hashes[base:end]))
 
 
 BlockHashList = list[BlockHash] | BlockHashListWithBlockSize
diff --git a/vllm/v1/core/sched/async_scheduler.py b/vllm/v1/core/sched/async_scheduler.py
index 0b3958dbcf5a..cb61bcabd3ee 100644
--- a/vllm/v1/core/sched/async_scheduler.py
+++ b/vllm/v1/core/sched/async_scheduler.py
@@ -37,10 +37,11 @@ def _update_after_schedule(self, scheduler_output: SchedulerOutput) -> None:
     def _update_request_with_output(
         self, request: Request, new_token_ids: list[int]
     ) -> tuple[list[int], bool]:
-        if request.discard_latest_async_tokens:
-            # If the request is force preempted in reset_prefix_cache, we
-            # should discard the latest async token.
-            request.discard_latest_async_tokens = False
+        if request.async_tokens_to_discard > 0:
+            # The request was force-preempted in reset_prefix_cache; drop one
+            # stale in-flight async output frame per call until the counter
+            # is drained.
+            request.async_tokens_to_discard -= 1
             return [], False
 
         status_before_update = request.status
diff --git a/vllm/v1/core/sched/interface.py b/vllm/v1/core/sched/interface.py
index b44f2db1926b..264811a556d3 100644
--- a/vllm/v1/core/sched/interface.py
+++ b/vllm/v1/core/sched/interface.py
@@ -41,6 +41,7 @@ def __init__(
         kv_cache_config: "KVCacheConfig",
         structured_output_manager: "StructuredOutputManager",
         block_size: int,
+        hash_block_size: int,
         mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
         include_finished_set: bool = False,
         log_stats: bool = False,
diff --git a/vllm/v1/core/sched/output.py b/vllm/v1/core/sched/output.py
index bdb97decadfe..b2e9dd8b1719 100644
--- a/vllm/v1/core/sched/output.py
+++ b/vllm/v1/core/sched/output.py
@@ -38,6 +38,7 @@ class NewRequestData:
     num_computed_tokens: int
     lora_request: LoRARequest | None
     prompt_embeds: "torch.Tensor | None" = None
+    prompt_is_token_ids: list[bool] | None = None
 
     # Only used for v2 model runner.
     prefill_token_ids: list[int] | None = None
@@ -59,6 +60,7 @@ def from_request(
             num_computed_tokens=request.num_computed_tokens,
             lora_request=request.lora_request,
             prompt_embeds=request.prompt_embeds,
+            prompt_is_token_ids=request.prompt_is_token_ids,
             prefill_token_ids=prefill_token_ids,
         )
 
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index c28a5d18ae77..c69c9a8119ab 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -7,9 +7,6 @@
 from dataclasses import replace
 from typing import Any
 
-import numpy as np
-
-from vllm import envs
 from vllm.compilation.cuda_graph import CUDAGraphStat
 from vllm.config import VllmConfig
 from vllm.distributed.ec_transfer.ec_connector.base import (
@@ -28,7 +25,7 @@
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe.routed_experts_capturer import (
-    RoutedExpertsReader,
+    RoutedExpertsManager,
 )
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.multimodal.encoder_budget import MultiModalBudget
@@ -52,7 +49,7 @@
 )
 from vllm.v1.core.sched.utils import check_stop, remove_all
 from vllm.v1.engine import EngineCoreEventType, EngineCoreOutput, EngineCoreOutputs
-from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig
+from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.metrics.perf import ModelMetrics, PerfStats
 from vllm.v1.metrics.stats import PrefixCacheStats, SchedulerStats
 from vllm.v1.outputs import DraftTokenIds, KVConnectorOutput, ModelRunnerOutput
@@ -71,6 +68,7 @@ def __init__(
         kv_cache_config: KVCacheConfig,
         structured_output_manager: StructuredOutputManager,
         block_size: int,
+        hash_block_size: int | None = None,
         mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
         include_finished_set: bool = False,
         log_stats: bool = False,
@@ -222,20 +220,28 @@ def __init__(
                 self.num_lookahead_tokens = self.num_spec_tokens
 
         # Create the KV cache manager.
+        if hash_block_size is None:
+            hash_block_size = block_size
         self.kv_cache_manager = KVCacheManager(
             kv_cache_config=kv_cache_config,
             max_model_len=self.max_model_len,
+            max_num_batched_tokens=self.scheduler_config.max_num_batched_tokens,
             enable_caching=self.cache_config.enable_prefix_caching,
             use_eagle=self.use_eagle,
             log_stats=self.log_stats,
             enable_kv_cache_events=self.enable_kv_cache_events,
             dcp_world_size=self.dcp_world_size,
             pcp_world_size=self.pcp_world_size,
-            hash_block_size=self.block_size,
+            hash_block_size=hash_block_size,
             metrics_collector=self.kv_metrics_collector,
         )
+        # Bind GPU block pool to the KV connector. This must happen after
+        # kv_cache_manager is constructed so block_pool is available.
+        if self.connector is not None:
+            self.connector.bind_gpu_block_pool(self.kv_cache_manager.block_pool)
+
         self.use_pp = self.parallel_config.pipeline_parallel_size > 1
-        self.use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER
+        self.use_v2_model_runner = vllm_config.use_v2_model_runner
         self.scheduler_reserve_full_isl = (
             self.scheduler_config.scheduler_reserve_full_isl
         )
@@ -249,42 +255,24 @@ def __init__(
         if self.log_stats and vllm_config.observability_config.enable_mfu_metrics:
             self.perf_metrics = ModelMetrics(vllm_config)
 
-        if self.vllm_config.model_config.enable_return_routed_experts:
+        self.enable_return_routed_experts = (
+            vllm_config.model_config.enable_return_routed_experts
+        )
+
+        if self.enable_return_routed_experts:
             assert self.dcp_world_size == 1 and self.pcp_world_size == 1, (
                 "enable_return_routed_experts does not support context parallelism "
                 "(dcp_world_size > 1 or pcp_world_size > 1)"
             )
 
-            self.routed_experts_reader = RoutedExpertsReader.create()
-
-            assert len(kv_cache_config.kv_cache_groups) > 0, (
-                "enable_return_routed_experts requires at least one kv cache group"
-            )
-            # Find the attention group for routed experts indexing.
-            self.routed_experts_attn_gid = 0
-            for gid, group in enumerate(kv_cache_config.kv_cache_groups):
-                if isinstance(group.kv_cache_spec, AttentionSpec):
-                    self.routed_experts_attn_gid = gid
-                    break
-            min_block_size = min(
-                [
-                    group.kv_cache_spec.block_size
-                    for group in kv_cache_config.kv_cache_groups
-                ]
-            )
-            num_groups = len(kv_cache_config.kv_cache_groups)
-            self.max_num_kv_tokens = (
-                kv_cache_config.num_blocks // num_groups
-            ) * min_block_size
-            dcp_size = self.vllm_config.parallel_config.decode_context_parallel_size
-            pcp_size = self.vllm_config.parallel_config.prefill_context_parallel_size
-            if pcp_size * dcp_size > 1:
-                self.max_num_kv_tokens *= pcp_size * dcp_size
-
-            self.routed_experts_reader.attach_buffer(
-                max_num_kv_tokens=self.max_num_kv_tokens,
-                vllm_config=self.vllm_config,
+            self.routed_experts_mgr = RoutedExpertsManager(
+                vllm_config=vllm_config,
+                kv_cache_config=kv_cache_config,
             )
+            # Block-ID snapshot taken at schedule time (before forward),
+            # so update_from_output can read slot data even if a later
+            # schedule() frees the blocks (async scheduling race).
+            self._re_block_ids: dict[str, list[int]] = {}
 
         self._pause_state: PauseState = PauseState.UNPAUSED
 
@@ -622,7 +610,6 @@ def schedule(self) -> SchedulerOutput:
                             step_skipped_waiting.prepend_request(request)
                             continue
 
-                        request.num_external_computed_tokens = ext_tokens
                         num_external_computed_tokens = ext_tokens
 
                         connector_prefix_cache_queries = (
@@ -635,6 +622,15 @@ def schedule(self) -> SchedulerOutput:
                         num_new_local_computed_tokens + num_external_computed_tokens
                     )
                     assert num_computed_tokens <= request.num_tokens
+
+                    # Track first scheduled prefill, not post-preemption repeat prefills
+                    if request.prefill_stats is not None:
+                        assert num_computed_tokens <= request.num_prompt_tokens
+                        request.prefill_stats.set(
+                            num_prompt_tokens=request.num_prompt_tokens,
+                            num_local_cached_tokens=num_new_local_computed_tokens,
+                            num_external_cached_tokens=num_external_computed_tokens,
+                        )
                 else:
                     # KVTransfer: WAITING reqs have num_computed_tokens > 0
                     # after async KV recvs are completed.
@@ -722,20 +718,6 @@ def schedule(self) -> SchedulerOutput:
                         for i in encoder_inputs_to_schedule
                     )
 
-                if (
-                    self.scheduler_reserve_full_isl
-                    and not self.kv_cache_manager.can_fit_full_sequence(
-                        request,
-                        num_new_computed_tokens=num_new_local_computed_tokens,
-                        new_computed_blocks=new_computed_blocks,
-                        num_external_computed_tokens=num_external_computed_tokens,
-                        num_encoder_tokens=num_encoder_tokens,
-                    )
-                ):
-                    if request.has_encoder_inputs:
-                        self.encoder_cache_manager.free(request)
-                    break
-
                 new_blocks = self.kv_cache_manager.allocate_slots(
                     request,
                     num_new_tokens,
@@ -745,6 +727,7 @@ def schedule(self) -> SchedulerOutput:
                     num_external_computed_tokens=num_external_computed_tokens,
                     delay_cache_blocks=load_kv_async,
                     num_encoder_tokens=num_encoder_tokens,
+                    full_sequence_must_fit=self.scheduler_reserve_full_isl,
                 )
 
                 if new_blocks is None:
@@ -819,9 +802,6 @@ def schedule(self) -> SchedulerOutput:
                 token_budget -= num_new_tokens
                 request.status = RequestStatus.RUNNING
                 request.num_computed_tokens = num_computed_tokens
-                # Count the number of prefix cached tokens.
-                if request.num_cached_tokens < 0:
-                    request.num_cached_tokens = num_computed_tokens
                 # Encoder-related.
                 if encoder_inputs_to_schedule:
                     scheduled_encoder_inputs[request_id] = encoder_inputs_to_schedule
@@ -989,13 +969,21 @@ def _update_after_schedule(self, scheduler_output: SchedulerOutput) -> None:
                 request.use_structured_output and not request.is_prefill_chunk
             )
 
-            # NOTE: _free_encoder_inputs relies on num_computed_tokens, which
-            # may be updated again in _update_from_output for speculative
-            # decoding. However, it is safe to call the method here because
-            # encoder inputs are always part of the prompt, not the output,
-            # and thus are unaffected by speculative decoding.
-            if request.has_encoder_inputs:
-                self._free_encoder_inputs(request)
+        # Snapshot block IDs for routed experts before forward starts.
+        # A concurrent schedule() may preempt requests and free blocks
+        # before update_from_output runs; the snapshot survives that.
+        # Use update() to preserve entries from the previous step that
+        # have not yet been consumed by update_from_output (async
+        # scheduling may call _update_after_schedule again before the
+        # prior update_from_output runs).
+        if self.enable_return_routed_experts:
+            gid = self.routed_experts_mgr.attn_gid
+            self._re_block_ids.update(
+                {
+                    rid: self.kv_cache_manager.get_blocks(rid).get_block_ids()[gid]
+                    for rid in num_scheduled_tokens
+                }
+            )
 
         # Clear the finished request IDs.
         # NOTE: We shouldn't do self.finished_req_ids.clear() here because
@@ -1330,6 +1318,27 @@ def update_from_output(
                 num_scheduled_tokens,
             )
 
+        # Persist per-step routed experts into the scheduler-side slot
+        # buffer (CPU->CPU fancy-index assign; ~few MB per step).
+        # MUST precede the per-request routing reads below: stopped
+        # requests may terminate on tokens generated in this very step,
+        # whose routing was just D2H'd into model_runner_output.
+        routing_data = None
+        routing_offsets: dict[str, int] = {}
+        if model_runner_output.routed_experts is not None:
+            re = model_runner_output.routed_experts
+            self.routed_experts_mgr.store_batch(re.routing_data, re.slot_mapping)
+            routing_data = re.routing_data.astype(
+                self.routed_experts_mgr.routed_experts_by_slot.dtype,
+                copy=False,
+            )
+            # Build offset map using model runner's request order
+            # (input_batch ordering), NOT scheduler dict order.
+            offset = 0
+            for rid in model_runner_output.req_ids:
+                routing_offsets[rid] = offset
+                offset += num_scheduled_tokens[rid]
+
         # NOTE(woosuk): As len(num_scheduled_tokens) can be up to 1K or more,
         # the below loop can be a performance bottleneck. We should do our best
         # to avoid expensive operations inside the loop.
@@ -1382,12 +1391,17 @@ def update_from_output(
                     request_id=req_id,
                 )
 
+            # Free encoder inputs only after the step has actually executed.
+            if request.has_encoder_inputs:
+                self._free_encoder_inputs(request)
+
             stopped = False
             new_logprobs = None
             new_token_ids = generated_token_ids
             pooler_output = pooler_outputs[req_index] if pooler_outputs else None
             kv_transfer_params = None
             status_before_stop = request.status
+            num_output_tokens_before = len(request._output_token_ids)
 
             # Check for stop and update request status.
             if new_token_ids:
@@ -1399,11 +1413,65 @@ def update_from_output(
                 request.status = RequestStatus.FINISHED_STOPPED
                 stopped = True
 
+            if new_token_ids and self.structured_output_manager.should_advance(request):
+                struct_output_request = request.structured_output_request
+                assert struct_output_request is not None
+                assert struct_output_request.grammar is not None
+                if not struct_output_request.grammar.accept_tokens(  # type: ignore[union-attr]
+                    req_id, new_token_ids
+                ):
+                    logger.error(
+                        "Unexpected: grammar rejected tokens %s for request %s. "
+                        "Terminating request.",
+                        new_token_ids,
+                        req_id,
+                    )
+                    request.status = RequestStatus.FINISHED_ERROR
+                    request.resumable = False
+                    stopped = True
+
             routed_experts = None
+            if (
+                self.enable_return_routed_experts
+                and routing_data is not None
+                and new_token_ids
+            ):
+                req_offset = routing_offsets[req_id]
+                end = req_offset + num_tokens_scheduled
+                block_ids = self._re_block_ids.pop(req_id, [])
+                if num_output_tokens_before == 0:
+                    # Prefill completed: read full prompt routing from
+                    # slot buffer using the block-ID snapshot taken at
+                    # schedule time (immune to async preemption).
+                    if (
+                        request.sampling_params is not None
+                        and request.sampling_params.routed_experts_prompt_start
+                        is not None
+                    ):
+                        prompt_start = (
+                            request.sampling_params.routed_experts_prompt_start
+                        )
+                        assert prompt_start < request.num_prompt_tokens
+                    else:
+                        prompt_start = 0
+                    routed_experts = self.routed_experts_mgr.get(
+                        block_ids,
+                        request.num_prompt_tokens,
+                        token_start=prompt_start,
+                    )
+                else:
+                    if scheduled_spec_token_ids:
+                        # Spec decode: accepted tokens at the START of
+                        # the scheduled range, rejected at the end.
+                        routed_experts = routing_data[
+                            req_offset : req_offset + len(new_token_ids)
+                        ]
+                    else:
+                        # Normal decode / re-prefill: token(s) at the END.
+                        routed_experts = routing_data[end - len(new_token_ids) : end]
+
             finish_reason = None
             if stopped:
-                routed_experts = self._get_routed_experts(request)
-
                 # Capture finish_reason BEFORE _handle_stopped_request, which may
                 # reset the status to WAITING for streaming requests that continue.
                 finish_reason = request.get_finished_reason()
@@ -1419,23 +1487,11 @@ def update_from_output(
             # Extract sample logprobs if needed.
             if (
                 request.sampling_params is not None
-                and request.sampling_params.logprobs is not None
+                and request.sampling_params.num_logprobs is not None
                 and logprobs
             ):
                 new_logprobs = logprobs.slice_request(req_index, len(new_token_ids))
 
-            if new_token_ids and self.structured_output_manager.should_advance(request):
-                struct_output_request = request.structured_output_request
-                assert struct_output_request is not None
-                assert struct_output_request.grammar is not None
-                ok = struct_output_request.grammar.accept_tokens(req_id, new_token_ids)
-                if not ok:
-                    logger.warning(
-                        "Unexpected: grammar rejected tokens %s for request %s.",
-                        new_token_ids,
-                        req_id,
-                    )
-
             if num_nans_in_logits is not None and req_id in num_nans_in_logits:
                 request.num_nans_in_logits = num_nans_in_logits[req_id]
 
@@ -1458,10 +1514,9 @@ def update_from_output(
                         pooling_output=pooler_output,
                         stop_reason=request.stop_reason,
                         events=request.take_events(),
+                        prefill_stats=request.take_prefill_stats(),
                         kv_transfer_params=kv_transfer_params,
                         trace_headers=request.trace_headers,
-                        num_cached_tokens=request.num_cached_tokens,
-                        num_external_computed_tokens=request.num_external_computed_tokens,
                         routed_experts=routed_experts,
                         num_nans_in_logits=request.num_nans_in_logits,
                     )
@@ -1488,7 +1543,6 @@ def update_from_output(
                         finish_reason=request.get_finished_reason(),
                         events=request.take_events(),
                         trace_headers=request.trace_headers,
-                        num_cached_tokens=request.num_cached_tokens,
                     )
                 )
 
@@ -1592,31 +1646,6 @@ def _handle_stopped_request(self, request: Request) -> bool:
         self._enqueue_waiting_request(request)
         return False
 
-    def _get_routed_experts(self, request: Request) -> np.ndarray | None:
-        if not self.vllm_config.model_config.enable_return_routed_experts:
-            return None
-
-        kv_blocks = self.kv_cache_manager.get_blocks(request.request_id)
-        block_ids = kv_blocks.get_block_ids()[self.routed_experts_attn_gid]
-        num_tokens = request.num_tokens - 1
-
-        # compute slot mapping using attention group's block_size
-        block_ids_array = np.array(block_ids, dtype=np.int32)
-        num_blocks = len(block_ids)
-        attn_group = self.kv_cache_config.kv_cache_groups[self.routed_experts_attn_gid]
-        block_size = attn_group.kv_cache_spec.block_size
-
-        # generate block offsets
-        block_offsets = np.arange(0, block_size)
-
-        # compute slot mapping: slot = block_id * block_size + offset
-        slot_mapping = (
-            block_offsets.reshape((1, block_size))
-            + block_ids_array.reshape((num_blocks, 1)) * block_size
-        ).flatten()[:num_tokens]
-
-        return self.routed_experts_reader.get_routed_experts(indices=slot_mapping)
-
     def _update_request_with_output(
         self, request: Request, new_token_ids: list[int]
     ) -> tuple[list[int], bool]:
@@ -1742,6 +1771,8 @@ def add_request(self, request: Request) -> None:
                 request.streaming_queue = deque()
             self._enqueue_waiting_request(request)
             self.requests[request.request_id] = request
+            if self.connector is not None:
+                self.connector.on_new_request(request)
             if self.log_stats:
                 request.record_event(EngineCoreEventType.QUEUED)
 
@@ -1851,7 +1882,16 @@ def get_num_unfinished_requests(self) -> int:
         return num_waiting + len(self.running)
 
     def has_finished_requests(self) -> bool:
-        return len(self.finished_req_ids) > 0
+        if self.finished_req_ids:
+            return True
+        if self.connector is None:
+            return False
+        # Finished requests waiting on delayed connector cleanup remain in
+        # self.requests after they have been removed from scheduling queues.
+        num_in_queues = (
+            len(self.waiting) + len(self.skipped_waiting) + len(self.running)
+        )
+        return len(self.requests) > num_in_queues
 
     def reset_prefix_cache(
         self, reset_running_requests: bool = False, reset_connector: bool = False
@@ -1874,10 +1914,14 @@ def reset_prefix_cache(
             while self.running:
                 request = self.running.pop()
                 self._preempt_request(request, timestamp)
-                # NOTE(zhuohan): For async scheduling, we need to discard the latest
-                # output token on the fly to avoid a redundant repetitive output token.
+                # For async scheduling, any output frames already in flight at
+                # preemption time are now stale and must be discarded when they
+                # return. num_output_placeholders is exactly that count: 0 if
+                # the engine has drained (e.g. pause_generation(keep) waited
+                # for idle), 1 for vanilla async mid-step, or 1 + spec/PP frames
+                # otherwise.
+                request.async_tokens_to_discard = request.num_output_placeholders
                 request.num_output_placeholders = 0
-                request.discard_latest_async_tokens = True
 
             # Clear scheduled request ids cache. Since we are forcing preemption
             # + resumption in the same step, we must act as if these requests were
@@ -1947,9 +1991,9 @@ def make_stats(
         )
         return SchedulerStats(
             num_running_reqs=len(self.running),
-            num_waiting_reqs=len(self.waiting) + len(self.skipped_waiting),
+            num_waiting_reqs=len(self.waiting),
+            num_skipped_waiting_reqs=len(self.skipped_waiting),
             kv_cache_usage=self.kv_cache_manager.usage,
-            encoder_cache_usage=self._get_encoder_cache_usage(),
             prefix_cache_stats=prefix_cache_stats,
             connector_prefix_cache_stats=connector_prefix_cache_stats,
             kv_cache_eviction_events=eviction_events,
@@ -1959,14 +2003,6 @@ def make_stats(
             perf_stats=perf_stats,
         )
 
-    def _get_encoder_cache_usage(self) -> float:
-        """Get encoder cache usage as a fraction (0.0 to 1.0)."""
-        ecm = self.encoder_cache_manager
-        if ecm.cache_size == 0:
-            return 0.0
-        used_slots = ecm.cache_size - ecm.num_free_slots
-        return used_slots / ecm.cache_size
-
     def make_spec_decoding_stats(
         self,
         spec_decoding_stats: SpecDecodingStats | None,
@@ -2015,7 +2051,7 @@ def _connector_finished(
         # the connector.
         self.kv_cache_manager.remove_skipped_blocks(
             request_id=request.request_id,
-            total_computed_tokens=request.num_tokens,
+            total_computed_tokens=request.num_computed_tokens,
         )
 
         block_ids = self.kv_cache_manager.get_block_ids(request.request_id)
@@ -2062,10 +2098,6 @@ def _update_waiting_for_remote_kv(self, request: Request) -> None:
             if request.num_computed_tokens == request.num_tokens:
                 request.num_computed_tokens = request.num_tokens - 1
 
-            # Count the number of prefix cached tokens.
-            if request.num_cached_tokens < 0:
-                request.num_cached_tokens = request.num_computed_tokens
-
         self.finished_recving_kv_req_ids.remove(request.request_id)
 
     def _try_promote_blocked_waiting_request(self, request: Request) -> bool:
@@ -2212,7 +2244,7 @@ def _update_requests_with_invalid_blocks(
                     req_num_computed_tokens - request.num_computed_tokens
                 )
                 total_affected_tokens += num_affected_tokens
-                request.num_external_computed_tokens -= num_affected_tokens
+
                 # collect invalid block and all downstream dependent blocks
                 if evict_blocks:
                     blocks_to_evict.update(req_block_ids[idx:])
diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py
index 62bdb8113a32..e29919022ec4 100644
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -16,11 +16,14 @@
     ChunkedLocalAttentionSpec,
     CrossAttentionSpec,
     FullAttentionSpec,
+    HiddenStateCacheSpec,
     KVCacheSpec,
     MambaSpec,
     MLAAttentionSpec,
     SinkFullAttentionSpec,
+    SlidingWindowMLASpec,
     SlidingWindowSpec,
+    TQFullAttentionSpec,
 )
 from vllm.v1.request import Request
 
@@ -39,6 +42,7 @@ def __init__(
         kv_cache_group_id: int,
         dcp_world_size: int = 1,
         pcp_world_size: int = 1,
+        max_admission_blocks_per_request: int | None = None,
     ) -> None:
         """
         Initializes the SingleTypeKVCacheManager.
@@ -46,6 +50,12 @@ def __init__(
             kv_cache_spec: The kv_cache_spec for this manager.
             block_pool: The block pool.
             kv_cache_group_id: The id of the kv cache group of this manager.
+            max_admission_blocks_per_request: Recycling-aware per-request
+                block cap used by `get_num_blocks_to_allocate`. Only set for
+                spec types that recycle blocks across chunks (SWA,
+                chunked-local); `None` (the default) means no cap, which is
+                correct for full-attention-style specs that hold every
+                block until the request finishes.
         """
         self.block_size = kv_cache_spec.block_size
         self.dcp_world_size = dcp_world_size
@@ -55,6 +65,7 @@ def __init__(
         self.kv_cache_spec = kv_cache_spec
         self.block_pool = block_pool
         self.enable_caching = enable_caching
+        self._max_admission_blocks_per_request = max_admission_blocks_per_request
         self.new_block_ids: list[int] = []
 
         # Mapping from request ID to blocks to track the blocks allocated
@@ -82,6 +93,7 @@ def get_num_blocks_to_allocate(
         new_computed_blocks: Sequence[KVCacheBlock],
         total_computed_tokens: int,
         num_tokens_main_model: int,
+        apply_admission_cap: bool = False,
     ) -> int:
         """
         Get the number of blocks needed to be allocated for the request.
@@ -97,12 +109,28 @@ def get_num_blocks_to_allocate(
             num_tokens_main_model: The number of tokens for the main model (aka target
                 model in spec decode). w/o spec decode, it is num_tokens;
                 with spec decode, it is num_tokens - num_lookahead_tokens.
+            apply_admission_cap: If True, clamp by `num_required_blocks` by
+                `_max_admission_blocks_per_request`for recycling-aware specs
+                (SWA, chunked-local).
 
         Returns:
             The number of blocks to allocate.
         """
 
         num_required_blocks = cdiv(num_tokens, self.block_size)
+        if apply_admission_cap and self._max_admission_blocks_per_request is not None:
+            # Recycling-aware specs (SWA, chunked-local) cap the per-request
+            # reservation here so admission matches the startup pool sizer
+            # (`SlidingWindowSpec.max_admission_blocks_per_request` / its
+            # chunked-local counterpart). `remove_skipped_blocks` runs from
+            # `allocate_slots` before each chunk's `get_num_blocks_to_allocate`,
+            # so per-request peak real-held blocks <= this cap, which keeps
+            # `sum(reservations) <= pool` <=> `sum(peak_real_held) <= pool`.
+            # Drift between the two would re-introduce the deadlock from
+            # issue #39734 or, worse, mid-prefill OOM.
+            num_required_blocks = min(
+                num_required_blocks, self._max_admission_blocks_per_request
+            )
         num_req_blocks = len(self.req_to_blocks.get(request_id, ()))
 
         if request_id in self.num_cached_block:
@@ -209,7 +237,7 @@ def allocate_new_computed_blocks(
                 cdiv(num_total_computed_tokens, self.block_size) - len(req_blocks)
             )
             req_blocks.extend(allocated_blocks)
-            if type(self.kv_cache_spec) is FullAttentionSpec:
+            if type(self.kv_cache_spec) in (FullAttentionSpec, TQFullAttentionSpec):
                 self.new_block_ids.extend(b.block_id for b in allocated_blocks)
 
     def allocate_new_blocks(
@@ -237,7 +265,7 @@ def allocate_new_blocks(
         else:
             new_blocks = self.block_pool.get_new_blocks(num_new_blocks)
             req_blocks.extend(new_blocks)
-            if type(self.kv_cache_spec) is FullAttentionSpec:
+            if type(self.kv_cache_spec) in (FullAttentionSpec, TQFullAttentionSpec):
                 self.new_block_ids.extend(b.block_id for b in new_blocks)
             return new_blocks
 
@@ -247,7 +275,12 @@ def take_new_block_ids(self) -> list[int]:
         self.new_block_ids = []
         return ids
 
-    def cache_blocks(self, request: Request, num_tokens: int) -> None:
+    def cache_blocks(
+        self,
+        request: Request,
+        num_tokens: int,
+        alignment_tokens: int | None = None,
+    ) -> None:
         """
         Cache the blocks for the request.
 
@@ -255,6 +288,12 @@ def cache_blocks(self, request: Request, num_tokens: int) -> None:
             request: The request.
             num_tokens: The total number of tokens that need to be cached
                 (including tokens that are already cached).
+            alignment_tokens: The cache-hit alignment (in tokens) used by the
+                coordinator's ``find_longest_cache_hit``. When greater than
+                this group's ``block_size``, managers whose hit logic only
+                returns a subset of blocks per alignment-aligned segment
+                (SWA) skip the rest since they can never participate in a
+                future cache hit.
         """
         num_cached_blocks = self.num_cached_block.get(request.request_id, 0)
         num_full_blocks = num_tokens // self.block_size
@@ -262,6 +301,13 @@ def cache_blocks(self, request: Request, num_tokens: int) -> None:
         if num_cached_blocks >= num_full_blocks:
             return
 
+        # Fast path: when the coordinator imposes no alignment constraint
+        if alignment_tokens is None or alignment_tokens <= self.block_size:
+            block_mask = None
+        else:
+            block_mask = self._cache_block_mask(
+                num_cached_blocks, num_full_blocks, alignment_tokens
+            )
         self.block_pool.cache_full_blocks(
             request=request,
             blocks=self.req_to_blocks[request.request_id],
@@ -269,10 +315,26 @@ def cache_blocks(self, request: Request, num_tokens: int) -> None:
             num_full_blocks=num_full_blocks,
             block_size=self.block_size,
             kv_cache_group_id=self.kv_cache_group_id,
+            block_mask=block_mask,
         )
 
         self.num_cached_block[request.request_id] = num_full_blocks
 
+    def _cache_block_mask(
+        self,
+        num_cached_blocks: int,
+        num_full_blocks: int,
+        alignment_tokens: int,
+    ) -> list[bool] | None:
+        """Per-block mask for ``cache_full_blocks``. ``None`` means cache
+        every (non-null) block — the default for full attention.
+
+        Subclasses with sparse hit semantics (SWA) override this to skip
+        blocks that can never serve a hit at any alignment-aligned prefix
+        length.
+        """
+        return None
+
     def free(self, request_id: str) -> None:
         """
         Free the blocks for the request.
@@ -533,12 +595,10 @@ def find_longest_cache_hit(
             ):
                 # Skip prefix matching check if the block is not aligned with
                 # `alignment_tokens`.
-                if (
-                    num_contiguous_blocks == 0
-                    and block_size != alignment_tokens  # Faster for common case.
-                    and (i + 1) * block_size % alignment_tokens != 0
-                ):
-                    continue
+                if num_contiguous_blocks == 0 and block_size != alignment_tokens:
+                    post_pop_blocks = i if use_eagle else i + 1
+                    if (post_pop_blocks * block_size) % alignment_tokens != 0:
+                        continue
                 # Add the cached block to the computed blocks.
                 for computed, cached in zip(computed_blocks, cached_block):
                     computed[i] = cached
@@ -565,13 +625,32 @@ def find_longest_cache_hit(
                 for computed in computed_blocks:
                     computed.pop()
         if use_eagle and computed_blocks[0]:
-            assert kv_cache_spec.block_size == alignment_tokens, (
-                "aligned_length is not compatible with eagle now"
-            )
             for computed in computed_blocks:
                 computed.pop()
+            # Re-align after eagle pop: the pop may break the alignment
+            # when block_size != alignment_tokens (hybrid models with
+            # different page sizes, e.g. Gemma4).
+            while (
+                block_size != alignment_tokens
+                and len(computed_blocks[0]) * block_size % alignment_tokens != 0
+            ):
+                for computed in computed_blocks:
+                    computed.pop()
         return computed_blocks
 
+    def _cache_block_mask(
+        self, num_cached_blocks: int, num_full_blocks: int, alignment_tokens: int
+    ) -> list[bool] | None:
+        assert alignment_tokens > self.block_size
+        per_segment = alignment_tokens // self.block_size
+        tail = cdiv(self.sliding_window - 1, self.block_size)
+        if tail >= per_segment:
+            return None
+        skip = per_segment - tail
+        return [
+            i % per_segment >= skip for i in range(num_cached_blocks, num_full_blocks)
+        ]
+
     def get_num_skipped_tokens(self, num_computed_tokens: int) -> int:
         """
         Get the number of tokens that will be skipped for attention computation.
@@ -866,6 +945,7 @@ def get_num_blocks_to_allocate(
         new_computed_blocks: Sequence[KVCacheBlock],
         total_computed_tokens: int,
         num_tokens_main_model: int,
+        apply_admission_cap: bool = False,
     ) -> int:
         assert isinstance(self.kv_cache_spec, MambaSpec)
         if (
@@ -890,6 +970,7 @@ def get_num_blocks_to_allocate(
                 new_computed_blocks,
                 total_computed_tokens,
                 num_tokens_main_model,
+                apply_admission_cap=apply_admission_cap,
             )
         else:
             # We don't allocate blocks for lookahead tokens in align mode, because if
@@ -1016,9 +1097,14 @@ def get_num_skipped_tokens(self, num_computed_tokens: int) -> int:
         """
         return num_computed_tokens - 1
 
-    def cache_blocks(self, request: Request, num_tokens: int) -> None:
+    def cache_blocks(
+        self,
+        request: Request,
+        num_tokens: int,
+        alignment_tokens: int | None = None,
+    ) -> None:
         num_cached_blocks_before = self.num_cached_block.get(request.request_id, 0)
-        super().cache_blocks(request, num_tokens)
+        super().cache_blocks(request, num_tokens, alignment_tokens=alignment_tokens)
         num_cached_blocks_after = self.num_cached_block.get(request.request_id, 0)
         if num_cached_blocks_after > num_cached_blocks_before:
             for block in self.req_to_blocks[request.request_id][
@@ -1047,7 +1133,12 @@ def allocate_new_computed_blocks(
         # requests, so  `new_computed_blocks` should always be empty.
         assert len(new_computed_blocks) == 0
 
-    def cache_blocks(self, request: Request, num_tokens: int) -> None:
+    def cache_blocks(
+        self,
+        request: Request,
+        num_tokens: int,
+        alignment_tokens: int | None = None,
+    ) -> None:
         # We do not cache blocks for cross-attention to be shared between
         # requests, so this method is not relevant.
         raise ValueError("Should not be called as prefix caching is disabled.")
@@ -1108,8 +1199,11 @@ def __init__(
 
 spec_manager_map: dict[type[KVCacheSpec], type[SingleTypeKVCacheManager]] = {
     FullAttentionSpec: FullAttentionManager,
+    TQFullAttentionSpec: FullAttentionManager,
     MLAAttentionSpec: FullAttentionManager,
+    HiddenStateCacheSpec: FullAttentionManager,
     SlidingWindowSpec: SlidingWindowManager,
+    SlidingWindowMLASpec: SlidingWindowManager,
     ChunkedLocalAttentionSpec: ChunkedLocalAttentionManager,
     MambaSpec: MambaManager,
     CrossAttentionSpec: CrossAttentionManager,
@@ -1118,8 +1212,21 @@ def __init__(
 
 
 def get_manager_for_kv_cache_spec(
-    kv_cache_spec: KVCacheSpec, **kwargs
+    kv_cache_spec: KVCacheSpec,
+    max_num_batched_tokens: int,
+    max_model_len: int,
+    **kwargs,
 ) -> SingleTypeKVCacheManager:
     manager_class = spec_manager_map[type(kv_cache_spec)]
+    # SlidingWindow / ChunkedLocalAttention managers recycle blocks across
+    # chunks; the runtime admission cap must match the recycling-aware bound
+    # the startup pool sizer uses (single source of truth: the spec method).
+    if isinstance(kv_cache_spec, (SlidingWindowSpec, ChunkedLocalAttentionSpec)):
+        kwargs["max_admission_blocks_per_request"] = (
+            kv_cache_spec.max_admission_blocks_per_request(
+                max_num_batched_tokens=max_num_batched_tokens,
+                max_model_len=max_model_len,
+            )
+        )
     manager = manager_class(kv_cache_spec, **kwargs)
     return manager
diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index e27b5ee38834..cf0c1d417728 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -46,9 +46,14 @@ def __init__(self, vllm_config: VllmConfig):
             CUDAGraphMode.FULL: set(),
         }
 
+        from vllm.compilation.breakable_cudagraph import (
+            is_breakable_cudagraph_enabled,
+        )
+
         assert (
             not self.compilation_config.cudagraph_mode.requires_piecewise_compilation()
             or self.compilation_config.is_attention_compiled_piecewise()
+            or is_breakable_cudagraph_enabled()
         ), (
             "Compilation mode should be CompilationMode.VLLM_COMPILE when "
             "cudagraph_mode piecewise cudagraphs is used, "
diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py
index 114d45fc4ff7..d8a413f4c3f0 100644
--- a/vllm/v1/engine/__init__.py
+++ b/vllm/v1/engine/__init__.py
@@ -4,6 +4,7 @@
 import enum
 import time
 from collections.abc import Mapping
+from dataclasses import dataclass
 from typing import Any, Literal
 
 import msgspec
@@ -14,7 +15,7 @@
 from vllm.multimodal.inputs import MultiModalFeatureSpec
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingParams
-from vllm.v1.metrics.stats import SchedulerStats
+from vllm.v1.metrics.stats import PrefillStats, SchedulerStats
 from vllm.v1.outputs import LogprobsLists, LogprobsTensors
 from vllm.v1.serial_utils import UtilityResult
 
@@ -63,6 +64,20 @@ def __str__(self):
         return FINISH_REASON_STRINGS[self.value]
 
 
+@dataclass
+class EngineCoreReadyResponse:
+    """Sent from EngineCore to each frontend at the end of engine startup.
+
+    Contains post-initialization config that may differ from the original
+    values (e.g. max_model_len after KV cache auto-fitting).
+    """
+
+    max_model_len: int
+    num_gpu_blocks: int
+    dp_stats_address: str | None
+    dtype: str | None = None
+
+
 class EngineCoreRequest(
     msgspec.Struct,
     array_like=True,  # type: ignore[call-arg]
@@ -80,6 +95,12 @@ class EngineCoreRequest(
     data_parallel_rank: int | None
     prompt_embeds: torch.Tensor | None = None
 
+    # Per-position mask for mixed-mode inputs (e.g chat completion with
+    # prompt_embeds content parts). `True` means the position is a real
+    # token ID; `False` means the position uses a pre-computed entry from
+    # `prompt_embeds`. `None` for pure-tokens and pure-embeds requests.
+    prompt_is_token_ids: list[bool] | None = None
+
     # Index of the client, used to ensure outputs are sent back to the same
     # client for this request when scaling out the front-end.
     client_index: int = 0
@@ -100,6 +121,13 @@ class EngineCoreRequest(
     external_req_id: str | None = None
 
     reasoning_ended: bool | None = None
+    reasoning_parser_kwargs: dict[str, Any] | None = None
+
+    # If True, the request should be added to the scheduler's waiting queue
+    # and immediately aborted, so connector-side cleanup runs via the standard
+    # request_finished hook. Used to free P-side prefill blocks when a
+    # KV-transfer request is rejected on the D node before engine admission.
+    abort_immediately: bool = False
 
     @property
     def params(self) -> SamplingParams | PoolingParams:
@@ -157,10 +185,9 @@ class EngineCoreOutput(
     kv_transfer_params: dict[str, Any] | None = None
 
     trace_headers: Mapping[str, str] | None = None
-    # The number of tokens with prefix cache hits (local + external).
-    num_cached_tokens: int = 0
-    # The number of tokens computed remotely (original count from connector).
-    num_external_computed_tokens: int = 0
+
+    prefill_stats: PrefillStats | None = None
+
     routed_experts: np.ndarray | None = None
     # The number of NaNs in logits.
     # A value greater than 0 indicates that the output is corrupted.
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index 7ff324f120d9..160f148f5c59 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -26,7 +26,6 @@
 from vllm.lora.request import LoRARequest
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput
-from vllm.plugins.io_processors import get_io_processor
 from vllm.pooling_params import PoolingParams
 from vllm.renderers import renderer_from_config
 from vllm.renderers.inputs.preprocess import extract_prompt_components
@@ -78,7 +77,6 @@ def __init__(
         log_stats: bool,
         usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
         mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
-        use_cached_outputs: bool = False,
         log_requests: bool = True,
         start_engine_loop: bool = True,
         stat_loggers: list[StatLoggerFactory] | None = None,
@@ -96,7 +94,6 @@ def __init__(
             log_stats: Whether to log stats.
             usage_context: Usage context of the LLM.
             mm_registry: Multi-modal registry.
-            use_cached_outputs: Whether to use cached outputs.
             log_requests: Whether to log requests.
             start_engine_loop: Whether to start the engine loop.
             stat_loggers: customized stat loggers for the engine.
@@ -133,11 +130,6 @@ def __init__(
             )
 
         self.renderer = renderer = renderer_from_config(self.vllm_config)
-        self.io_processor = get_io_processor(
-            self.vllm_config,
-            self.renderer,
-            self.model_config.io_processor_plugin,
-        )
 
         # Convert EngineInput --> EngineCoreRequest.
         self.input_processor = InputProcessor(self.vllm_config, renderer)
@@ -301,6 +293,7 @@ async def add_request(
         data_parallel_rank: int | None = None,
         prompt_text: str | None = None,
         reasoning_ended: bool | None = None,
+        reasoning_parser_kwargs: dict[str, Any] | None = None,
     ) -> RequestOutputCollector:
         """Add new request to the AsyncLLM."""
 
@@ -321,7 +314,7 @@ async def add_request(
             )
 
         if isinstance(prompt, AsyncGenerator):
-            if reasoning_ended is not None:
+            if reasoning_ended is not None or reasoning_parser_kwargs is not None:
                 raise NotImplementedError
 
             # Streaming input case.
@@ -369,6 +362,8 @@ async def add_request(
 
         if reasoning_ended is not None:
             request.reasoning_ended = reasoning_ended
+        if reasoning_parser_kwargs is not None:
+            request.reasoning_parser_kwargs = reasoning_parser_kwargs
 
         self.input_processor.assign_request_id(request)
 
@@ -542,6 +537,7 @@ async def generate(
         priority: int = 0,
         data_parallel_rank: int | None = None,
         reasoning_ended: bool | None = None,
+        reasoning_parser_kwargs: dict[str, Any] | None = None,
     ) -> AsyncGenerator[RequestOutput, None]:
         """
         Main function called by the API server to kick off a request
@@ -571,6 +567,7 @@ async def generate(
                 data_parallel_rank=data_parallel_rank,
                 prompt_text=prompt_text,
                 reasoning_ended=reasoning_ended,
+                reasoning_parser_kwargs=reasoning_parser_kwargs,
             )
 
             # The output_handler task pushes items into the queue.
@@ -723,6 +720,33 @@ async def abort(
         if self.log_requests:
             logger.info("Aborted request(s) %s.", ",".join(request_ids))
 
+    async def notify_kv_transfer_request_rejected(
+        self,
+        request_id: str,
+        kv_transfer_params: dict[str, Any],
+        *,
+        data_parallel_rank: int | None = None,
+    ) -> None:
+        """Submit a pre-aborted request so the connector's request_finished
+        hook runs to free any pre-admission KV-transfer resources (e.g. NIXL
+        prefill blocks pinned on the P node)."""
+        request = EngineCoreRequest(
+            request_id=request_id,
+            prompt_token_ids=[0],
+            mm_features=None,
+            sampling_params=SamplingParams(
+                max_tokens=1,
+                extra_args={"kv_transfer_params": dict(kv_transfer_params)},
+            ),
+            pooling_params=None,
+            arrival_time=time.time(),
+            lora_request=None,
+            cache_salt=None,
+            data_parallel_rank=data_parallel_rank,
+            abort_immediately=True,
+        )
+        await self.engine_core.add_request_async(request)
+
     async def pause_generation(
         self,
         *,
@@ -757,6 +781,8 @@ async def pause_generation(
                 stacklevel=2,
             )
             mode = "wait"
+        if clear_cache:
+            await self.renderer.clear_mm_cache_async()
         await self.engine_core.pause_scheduler_async(mode=mode, clear_cache=clear_cache)
         # Small sleep to help ensure that final outputs from any in-flight requests are
         # returned prior to this method returning. These outputs come out of the engine
@@ -903,6 +929,8 @@ async def reset_encoder_cache(self) -> None:
         await self.engine_core.reset_encoder_cache_async()
 
     async def sleep(self, level: int = 1, mode: PauseMode = "abort") -> None:
+        if level >= 1:
+            await self.renderer.clear_mm_cache_async()
         await self.engine_core.sleep_async(level, mode)
 
         if self.logger_manager is not None:
@@ -1052,6 +1080,13 @@ async def init_weight_transfer_engine(
             "init_weight_transfer_engine", kwargs={"init_info": init_info_dict}
         )
 
+    async def start_weight_update(self, is_checkpoint_format: bool = True) -> None:
+        """Start a new weight update."""
+        await self.collective_rpc(
+            "start_weight_update",
+            kwargs={"is_checkpoint_format": is_checkpoint_format},
+        )
+
     async def update_weights(self, request: WeightTransferUpdateRequest) -> None:
         """
         Batched weight update for RL training.
@@ -1070,3 +1105,7 @@ async def update_weights(self, request: WeightTransferUpdateRequest) -> None:
         await self.collective_rpc(
             "update_weights", kwargs={"update_info": update_info_dict}
         )
+
+    async def finish_weight_update(self) -> None:
+        """Finish the current weight update."""
+        await self.collective_rpc("finish_weight_update")
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 0fa59579ee76..c9503e46dd88 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import gc
 import os
 import queue
 import signal
@@ -21,7 +22,10 @@
 
 import vllm.envs as envs
 from vllm.config import ParallelConfig, VllmConfig
-from vllm.distributed import stateless_destroy_torch_distributed_process_group
+from vllm.distributed import (
+    cleanup_dist_env_and_memory,
+    stateless_destroy_torch_distributed_process_group,
+)
 from vllm.envs import enable_envs_cache
 from vllm.logger import init_logger
 from vllm.logging_utils.dump_input import dump_engine_exception
@@ -30,6 +34,7 @@
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.tracing import instrument, maybe_init_worker_tracer
 from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
+from vllm.utils import numa_utils
 from vllm.utils.gc_utils import (
     freeze_gc_heap,
     maybe_attach_gc_debug_callback,
@@ -43,6 +48,7 @@
     get_kv_cache_configs,
     get_request_block_hasher,
     init_none_hash,
+    resolve_kv_cache_block_sizes,
 )
 from vllm.v1.core.sched.interface import PauseState, SchedulerInterface
 from vllm.v1.core.sched.output import SchedulerOutput
@@ -51,6 +57,7 @@
     EEPNotificationType,
     EngineCoreOutput,
     EngineCoreOutputs,
+    EngineCoreReadyResponse,
     EngineCoreRequest,
     EngineCoreRequestType,
     FinishReason,
@@ -68,7 +75,7 @@
     get_device_indices,
 )
 from vllm.v1.executor import Executor
-from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.kv_cache_interface import KVCacheConfig, get_kv_cache_spec_kind
 from vllm.v1.metrics.stats import SchedulerStats
 from vllm.v1.outputs import ModelRunnerOutput
 from vllm.v1.request import Request, RequestStatus
@@ -134,10 +141,8 @@ def __init__(
                 logger.warning("Disabling chunked prefill for model without KVCache")
                 vllm_config.scheduler_config.enable_chunked_prefill = False
 
-        scheduler_block_size = (
-            vllm_config.cache_config.block_size
-            * vllm_config.parallel_config.decode_context_parallel_size
-            * vllm_config.parallel_config.prefill_context_parallel_size
+        scheduler_block_size, hash_block_size = resolve_kv_cache_block_sizes(
+            kv_cache_config, vllm_config
         )
 
         self.scheduler: SchedulerInterface = Scheduler(
@@ -147,6 +152,7 @@ def __init__(
             include_finished_set=include_finished_set,
             log_stats=self.log_stats,
             block_size=scheduler_block_size,
+            hash_block_size=hash_block_size,
         )
         self.use_spec_decode = vllm_config.speculative_config is not None
         if self.scheduler.connector is not None:  # type: ignore
@@ -204,7 +210,7 @@ def __init__(
             init_none_hash(caching_hash_fn)
 
             self.request_block_hasher = get_request_block_hasher(
-                scheduler_block_size, caching_hash_fn
+                hash_block_size, caching_hash_fn
             )
 
         self.step_fn = (
@@ -280,16 +286,54 @@ def _initialize_kv_caches(self, vllm_config: VllmConfig) -> KVCacheConfig:
         self.model_executor.initialize_from_config(kv_cache_configs)
 
         elapsed = time.time() - start
-        logger.info_once(
-            "init engine (profile, create kv cache, warmup model) took %.2f seconds",
-            elapsed,
-            scope="local",
-        )
+        compile_time = vllm_config.compilation_config.compilation_time
+        encoder_compile_time = vllm_config.compilation_config.encoder_compilation_time
+        if encoder_compile_time > 0:
+            logger.info_once(
+                "init engine (profile, create kv cache, warmup model) took "
+                "%.2f s (compilation: %.2f s — language_model: %.2f s, "
+                "encoder: %.2f s)",
+                elapsed,
+                compile_time + encoder_compile_time,
+                compile_time,
+                encoder_compile_time,
+            )
+        elif compile_time > 0:
+            logger.info_once(
+                "init engine (profile, create kv cache, warmup model) took "
+                "%.2f s (compilation: %.2f s)",
+                elapsed,
+                compile_time,
+            )
+        else:
+            logger.info_once(
+                "init engine (profile, create kv cache, warmup model) took %.2f s",
+                elapsed,
+            )
         return scheduler_kv_cache_config
 
     def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
         return self.model_executor.supported_tasks
 
+    def get_kv_cache_group_metadata(self) -> list[dict[str, int | str | None]]:
+        """Return msgspec-serializable metadata for scheduler KV cache groups."""
+        kv_cache_config = getattr(self.scheduler, "kv_cache_config", None)
+        if kv_cache_config is None:
+            return []
+
+        metadata: list[dict[str, int | str | None]] = []
+        for group_idx, group in enumerate(kv_cache_config.kv_cache_groups):
+            spec = group.kv_cache_spec
+            metadata.append(
+                {
+                    "group_idx": group_idx,
+                    "kind": get_kv_cache_spec_kind(spec).value,
+                    "block_size": spec.block_size,
+                    "sliding_window": getattr(spec, "sliding_window", None),
+                }
+            )
+        return metadata
+
     def add_request(self, request: Request, request_wave: int = 0):
         """Add request to the scheduler.
 
@@ -322,6 +366,10 @@ def add_request(self, request: Request, request_wave: int = 0):
             )
 
         self.scheduler.add_request(request)
+        if request.abort_immediately:
+            # Immediately abort so the connector's request_finished hook runs
+            # to free any pre-admission KV-transfer resources.
+            self.abort_requests([request.request_id])
 
     def abort_requests(self, request_ids: list[str]):
         """Abort requests from the scheduler."""
@@ -553,6 +601,15 @@ def shutdown(self):
         if self.scheduler:
             self.scheduler.shutdown()
 
+        # Undo the gc.freeze() from __init__ so that the objects allocated
+        # during engine startup (model weights, KV caches, etc.) become
+        # visible to the garbage collector again. Without this, deleting
+        # the engine in-process (e.g. unit tests) leaks GPU memory.
+        gc.unfreeze()
+        # Tear down distributed state initialized in this EngineCore process
+        # before it exits and release cached memory.
+        cleanup_dist_env_and_memory()
+
     def profile(self, is_start: bool = True, profile_prefix: str | None = None):
         self.model_executor.profile(is_start, profile_prefix)
 
@@ -934,14 +991,20 @@ def _perform_handshakes(
             vllm_config.parallel_config,
         )
         if client_handshake_address is None:
+            # We only need to handshake with one party.
             with handshake as addresses:
                 yield addresses
         else:
+            # We need to handshake with rank 0 front-end and our colocated frontend.
             assert local_client
             local_handshake = self._perform_handshake(
                 input_ctx, client_handshake_address, identity, True, False, vllm_config
             )
             with handshake as addresses, local_handshake as client_addresses:
+                # 1. Obtain DP Coordinator zmq address and DP process group address
+                #    (addresses).
+                # 2. Add front-end input/output addresses from colocated front-end
+                #    (client_addresses).
                 addresses.inputs = client_addresses.inputs
                 addresses.outputs = client_addresses.outputs
                 yield addresses
@@ -975,20 +1038,12 @@ def _perform_handshake(
             yield addresses
 
             # Send ready message.
-            num_gpu_blocks = vllm_config.cache_config.num_gpu_blocks
-            # We pass back the coordinator stats update address here for the
-            # external LB case for our colocated front-end to use (coordinator
-            # only runs with rank 0).
-            dp_stats_address = self.frontend_stats_publish_address
-
-            # Include config hash for DP configuration validation
             ready_msg = {
                 "status": "READY",
                 "local": local_client,
                 "headless": headless,
-                "num_gpu_blocks": num_gpu_blocks,
-                "dp_stats_address": dp_stats_address,
             }
+            # Include config hash for DP configuration validation
             if vllm_config.parallel_config.data_parallel_size > 1:
                 ready_msg["parallel_config_hash"] = (
                     vllm_config.parallel_config.compute_hash()
@@ -1055,6 +1110,8 @@ def run_engine_core(*args, dp_rank: int = 0, local_dp_rank: int = 0, **kwargs):
             set_process_title(process_title)
             maybe_init_worker_tracer("vllm.engine_core", "engine_core", process_title)
             decorate_logs()
+            if parallel_config.numa_bind:
+                numa_utils.log_current_affinity_state(process_title)
 
             if data_parallel and vllm_config.kv_transfer_config is not None:
                 # modify the engine_id and append the local_dp_rank to it to ensure
@@ -1185,11 +1242,10 @@ def _process_engine_step(self) -> bool:
         # Post-step hook.
         self.post_step(model_executed)
 
-        # If no model execution happened but there are waiting requests
-        # (e.g., WAITING_FOR_REMOTE_KVS), yield the GIL briefly to allow
-        # background threads (like NIXL handshake) to make progress.
-        # Without this, the tight polling loop can starve background threads.
-        if not model_executed and self.scheduler.has_unfinished_requests():
+        # If no model execution happened but there is still scheduler work
+        # (e.g. WAITING_FOR_REMOTE_KVS or delayed KV connector frees), yield
+        # the GIL briefly to allow background transfer threads to make progress.
+        if not model_executed and self.scheduler.has_requests():
             time.sleep(0.001)
 
         return model_executed
@@ -1255,8 +1311,9 @@ def _handle_client_request(
                 return
             output = UtilityOutput(call_id)
             # Lazily look-up utility method so that failure will be handled/returned.
-            get_result = lambda: (method := getattr(self, method_name)) and method(
-                *self._convert_msgspec_args(method, args)
+            get_result = lambda: (
+                (method := getattr(self, method_name))
+                and method(*self._convert_msgspec_args(method, args))
             )
             enqueue_output = lambda out: self.output_queue.put_nowait(
                 (client_idx, EngineCoreOutputs(utility_output=out))
@@ -1381,11 +1438,18 @@ def process_input_sockets(
 
             # Register sockets with poller.
             poller = zmq.Poller()
+            ready_response = EngineCoreReadyResponse(
+                max_model_len=self.vllm_config.model_config.max_model_len,
+                num_gpu_blocks=self.vllm_config.cache_config.num_gpu_blocks or 0,
+                dp_stats_address=self.frontend_stats_publish_address,
+                dtype=str(self.vllm_config.model_config.dtype).removeprefix("torch."),
+            )
+            ready_payload = msgspec.msgpack.encode(ready_response)
             for input_socket in input_sockets:
                 # Send initial message to each input socket - this is required
                 # before the front-end ROUTER socket can send input messages
                 # back to us.
-                input_socket.send(b"")
+                input_socket.send(ready_payload)
                 poller.register(input_socket, zmq.POLLIN)
 
             if coord_socket is not None:
@@ -1536,7 +1600,8 @@ def engine_idle_callback(engine: "EngineCoreProc", future: Future[Any]) -> None:
 
         pause_state = PauseState.PAUSED_ALL if mode == "keep" else PauseState.PAUSED_NEW
         self.scheduler.set_pause_state(pause_state)
-        if not self.has_work():
+
+        if self._pause_complete():
             if clear_cache:
                 self._reset_caches()
             return None
@@ -1545,6 +1610,13 @@ def engine_idle_callback(engine: "EngineCoreProc", future: Future[Any]) -> None:
         self._idle_state_callbacks.append(partial(engine_idle_callback, future=future))
         return future
 
+    def _pause_complete(self) -> bool:
+        """Returns True if the pause has fully completed and the caller can
+        return ``None`` synchronously; False if the pause is still pending
+        and the caller should register an idle-state callback to finish it.
+        """
+        return not self.has_work()
+
     def _send_finish_outputs_to_client(
         self, req_ids: list[str], client_index: int, finish_reason: FinishReason
     ) -> None:
@@ -1600,6 +1672,14 @@ def __init__(
         self.current_wave = 0
         self.last_counts = (0, 0)
 
+        # Two-phase pause protocol state. When pending_pause is True, the
+        # engine keeps stepping (dummy batches) while waiting for all DP
+        # ranks to also set pending_pause. Once all ranks agree via
+        # all-reduce, ignore_start_dp_wave is set so that stale
+        # START_DP_WAVE messages cannot re-wake the engines.
+        self.pending_pause = False
+        self.ignore_start_dp_wave = False
+
         from vllm.distributed.elastic_ep.elastic_state import ElasticEPScalingState
 
         self.eep_scaling_state: ElasticEPScalingState | None = None
@@ -1629,6 +1709,7 @@ def _init_data_parallel(self, vllm_config: VllmConfig):
         assert 0 <= local_dp_rank <= dp_rank < dp_size
 
         self.dp_rank = dp_rank
+        self.dp_size = dp_size
         dp_group, dp_store = parallel_config.stateless_init_dp_group(return_store=True)
         self.dp_group, self.dp_store = dp_group, dp_store
 
@@ -1637,6 +1718,24 @@ def shutdown(self):
         if dp_group := getattr(self, "dp_group", None):
             stateless_destroy_torch_distributed_process_group(dp_group)
 
+    def _pause_complete(self) -> bool:
+        """Two-phase DP-aware pause.
+
+        Phase 1: Set local pause state and ``pending_pause`` flag. If the
+        engines are idle, kick-start them by setting ``engines_running`` to
+        True so ranks enter the stepping loop and reach the all-reduce
+        consensus checkpoint in ``_has_global_unfinished_reqs``.
+
+        Phase 2 (in ``_has_global_unfinished_reqs``): Once the all-reduce
+        confirms that **all** ranks have ``pending_pause`` set, collectively
+        stop stepping and set ``ignore_start_dp_wave`` so that stale
+        ``START_DP_WAVE`` messages cannot re-wake any engine.
+        """
+        self.pending_pause = True
+        self.engines_running = True
+
+        return False
+
     def add_request(self, request: Request, request_wave: int = 0):
         super().add_request(request, request_wave)
         if self.has_coordinator and request_wave != self.current_wave:
@@ -1646,36 +1745,60 @@ def add_request(self, request: Request, request_wave: int = 0):
                 not self.engines_running
                 and self.scheduler.pause_state == PauseState.UNPAUSED
             ):
-                self.engines_running = True
                 # Request received for an already-completed wave, notify
                 # front-end that we need to start the next one.
+                self.engines_running = True
                 self.output_queue.put_nowait(
                     (-1, EngineCoreOutputs(start_wave=self.current_wave))
                 )
 
     def resume_scheduler(self):
-        super().resume_scheduler()
-        if (
-            self.has_coordinator
-            and not self.engines_running
-            and self.scheduler.has_unfinished_requests()
-        ):
-            # Wake up other DP engines.
-            self.output_queue.put_nowait(
-                (-1, EngineCoreOutputs(start_wave=self.current_wave))
+        if self.pending_pause or (self.engines_running and self.ignore_start_dp_wave):
+            raise RuntimeError(
+                "resume_scheduler called while pause is still in "
+                "flight. Wait for the pause future to resolve before "
+                "resuming."
             )
+        if self.engines_running:
+            logger.debug("Resume called while engines are not paused, ignoring.")
+            return
+
+        super().resume_scheduler()
+        self.ignore_start_dp_wave = False
+
+        # Barrier: wait for all DP ranks to have resumed (and cleared
+        # ignore_start_dp_wave) before any rank starts stepping. Uses
+        # the existing all-reduce which is safe because engines are
+        # stopped.
+        has_global_unfinished = ParallelConfig.has_unfinished_dp(
+            self.dp_group, self.scheduler.has_unfinished_requests()
+        )
+
+        if has_global_unfinished:
+            self.engines_running = True
+
+    def barrier(self):
+        """Blocking barrier on the DP process group (test-only utility)."""
+        import torch.distributed as dist
+
+        dist.barrier(group=self.dp_group)
 
     def _handle_client_request(
         self, request_type: EngineCoreRequestType, request: Any
     ) -> None:
         if request_type == EngineCoreRequestType.START_DP_WAVE:
+            if self.ignore_start_dp_wave:
+                return
             new_wave, exclude_eng_index = request
             if exclude_eng_index != self.engine_index and (
                 new_wave >= self.current_wave
             ):
                 self.current_wave = new_wave
                 if not self.engines_running:
-                    logger.debug("EngineCore starting idle loop for wave %d.", new_wave)
+                    logger.debug(
+                        "EngineCore starting idle loop for wave %d.",
+                        new_wave,
+                    )
                     self.engines_running = True
         else:
             super()._handle_client_request(request_type, request)
@@ -1700,6 +1823,8 @@ def run_busy_loop(self):
         while self._handle_shutdown():
             # 1) Poll the input queue until there is work to do.
             self._process_input_queue()
+            # Publish request counts before and after GPU step to ensure freshness.
+            self._maybe_publish_request_counts()
 
             if self.eep_scaling_state is not None:
                 _ = self.eep_scaling_state.progress()
@@ -1755,7 +1880,18 @@ def _has_global_unfinished_reqs(self, local_unfinished: bool) -> bool:
         if self.step_counter % 32 != 0:
             return True
 
-        return ParallelConfig.has_unfinished_dp(self.dp_group, local_unfinished)
+        has_unfinished, pause_consensus = ParallelConfig.sync_dp_state(
+            self.dp_group,
+            has_unfinished=local_unfinished,
+            pending_pause=self.pending_pause,
+        )
+
+        if pause_consensus:
+            self.ignore_start_dp_wave = True
+            self.pending_pause = False
+            logger.debug("DP pause consensus reached, ignoring START_DP_WAVE.")
+
+        return has_unfinished
 
     def reinitialize_distributed(
         self, reconfig_request: ReconfigureDistributedRequest
@@ -1896,6 +2032,8 @@ def __init__(
         vllm_config.parallel_config.data_parallel_index = dp_rank
         vllm_config.parallel_config.data_parallel_rank_local = local_dp_rank
 
+        self._set_nixl_side_channel_host()
+
         # Set CUDA_VISIBLE_DEVICES as early as possible in actor life cycle
         # NOTE: in MP we set CUDA_VISIBLE_DEVICES at process creation time,
         # and this cannot be done in the same way for Ray because:
@@ -1915,6 +2053,16 @@ def __init__(
         # of ray.
         self._set_visible_devices(vllm_config, local_dp_rank)
 
+    @staticmethod
+    def _set_nixl_side_channel_host():
+        import ray
+
+        # The driver-side value is excluded from Ray actor env propagation.
+        # Fill in an actor-local default while preserving explicit overrides.
+        os.environ.setdefault(
+            "VLLM_NIXL_SIDE_CHANNEL_HOST", ray.util.get_node_ip_address()
+        )
+
     def _set_visible_devices(self, vllm_config: VllmConfig, local_dp_rank: int):
         from vllm.platforms import current_platform
 
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index b9a3c7545e16..2f2c15d246f8 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
 import contextlib
-import multiprocessing
 import queue
 import sys
 import uuid
@@ -36,6 +35,7 @@
     EEP_NOTIFICATION_CALL_ID,
     EEPNotificationType,
     EngineCoreOutputs,
+    EngineCoreReadyResponse,
     EngineCoreRequest,
     EngineCoreRequestType,
     PauseMode,
@@ -590,8 +590,9 @@ def __init__(
                         f"timeout, set the environment variable: "
                         f"VLLM_ENGINE_READY_TIMEOUT_S=<seconds>"
                     )
-                identity, _ = sync_input_socket.recv_multipart()
+                identity, payload = sync_input_socket.recv_multipart()
                 identities.remove(identity)
+                self._apply_ready_response(payload)
 
             self.core_engine: EngineIdentity = self.core_engines[0]
             self.utility_results: dict[int, AnyFuture] = {}
@@ -640,34 +641,20 @@ def dp_engines_running(self) -> bool:
     def start_engine_core_monitor(self):
         """Start a monitor thread for engine core processes."""
         engine_manager = self.resources.engine_manager
-        if (
-            engine_manager is None
-            or not hasattr(engine_manager, "processes")
-            or not engine_manager.processes
-        ):
+        if engine_manager is None:
             # No engine processes to monitor
             return
 
-        engine_processes = engine_manager.processes
         self_ref = weakref.ref(self)
 
         # Monitor engine core process liveness. If any die unexpectedly,
-        # logs an error, shuts down the client and invokes the failure
-        # callback to inform the engine.
+        # marks the engine as dead, and shuts down the client.
         def monitor_engine_cores():
-            sentinels = [proc.sentinel for proc in engine_processes]
-            died = multiprocessing.connection.wait(sentinels)
+            engine_manager.monitor_engine_liveness()
             _self = self_ref()
             if not _self or not _self._finalizer.alive or _self.resources.engine_dead:
                 return
             _self.resources.engine_dead = True
-            proc_name = next(
-                proc.name for proc in engine_processes if proc.sentinel == died[0]
-            )
-            logger.error(
-                "Engine core proc %s died unexpectedly, shutting down client.",
-                proc_name,
-            )
             _self.shutdown()
             # Note: For MPClient, we don't have a failure callback mechanism
             # like MultiprocExecutor, but we set engine_dead flag which will
@@ -677,6 +664,32 @@ def monitor_engine_cores():
             target=monitor_engine_cores, daemon=True, name="MPClientEngineMonitor"
         ).start()
 
+    def _apply_ready_response(self, payload: bytes) -> None:
+        """Decode an EngineCoreReadyResponse and sync any post-initialization
+        config changes (e.g. auto-fitted max_model_len) back to the frontend."""
+        if not payload:
+            return
+        vllm_config = self.vllm_config
+        response = msgspec.msgpack.decode(payload, type=EngineCoreReadyResponse)
+        vllm_config.model_config.max_model_len = min(
+            vllm_config.model_config.max_model_len, response.max_model_len
+        )
+
+        # Setup KV cache config with initialization state from
+        # engine core process. Sum values from all engines in DP case.
+        num_gpu_blocks = vllm_config.cache_config.num_gpu_blocks or 0
+        num_gpu_blocks += response.num_gpu_blocks
+        vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks
+
+        # In external DP LB mode, the coordinator address that the
+        # front-end procs connect to is obtained by each engine via it's
+        # initial handshake with the rank 0 front-end.
+        if response.dp_stats_address is not None:
+            if self.stats_update_address is None:
+                self.stats_update_address = response.dp_stats_address
+            else:
+                assert response.dp_stats_address == self.stats_update_address
+
 
 def _process_utility_output(
     output: UtilityOutput, utility_results: dict[int, AnyFuture]
@@ -1597,8 +1610,9 @@ async def _scale_up_elastic_ep(
                     f"timeout, set the environment variable: "
                     f"VLLM_ENGINE_READY_TIMEOUT_S=<seconds>"
                 )
-            identity, _ = sync_input_socket.recv_multipart()
+            identity, payload = sync_input_socket.recv_multipart()
             new_engine_identities.discard(identity)
+            self._apply_ready_response(payload)
 
         # NOTE(yongji): Before we schedule any requests on the new workers,
         # we should wait for them to switch to the new setup.
@@ -1634,6 +1648,9 @@ async def _scale_down_elastic_ep(
         parallel_config = self.vllm_config.parallel_config
         ip, coord_store_port = self._setup_elastic_ep_reconfig_bootstrap()
 
+        removed_dp_size = cur_data_parallel_size - new_data_parallel_size
+        assert isinstance(self.resources.engine_manager, CoreEngineActorManager)
+        self.resources.engine_manager.remove_run_refs_for_scale_down(removed_dp_size)
         reconfig_futures = []
         for cur_dp_rank, engine in enumerate(self.core_engines):
             reconfig_request = ReconfigureDistributedRequest(
diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py
index 2f81ba4f6c78..4700eecb59a7 100644
--- a/vllm/v1/engine/detokenizer.py
+++ b/vllm/v1/engine/detokenizer.py
@@ -3,9 +3,9 @@
 from abc import ABC, abstractmethod
 
 import tokenizers
+import tokenizers.decoders
 from packaging import version
 from tokenizers import Tokenizer
-from tokenizers.decoders import DecodeStream
 from transformers import PreTrainedTokenizerFast
 
 from vllm.logger import init_logger
@@ -177,7 +177,10 @@ def __init__(self, tokenizer: PreTrainedTokenizerFast, request: EngineCoreReques
         self.tokenizer: Tokenizer = tokenizer._tokenizer
 
         # Use native prefill to prime the decode stream with prompt tokens.
-        self.stream = DecodeStream(
+        # Look up DecodeStream on the module so backend patches (e.g. the
+        # fastokens shim that replaces ``tokenizers.decoders.DecodeStream``)
+        # are honored regardless of import order.
+        self.stream = tokenizers.decoders.DecodeStream(
             ids=request.prompt_token_ids,
             skip_special_tokens=self.skip_special_tokens,
         )
@@ -237,7 +240,9 @@ def _protected_step(self, next_token_id: int) -> str | None:
                 " for request %s, resetting decode stream.",
                 self.request_id,
             )
-            self.stream = DecodeStream(skip_special_tokens=self.skip_special_tokens)
+            self.stream = tokenizers.decoders.DecodeStream(
+                skip_special_tokens=self.skip_special_tokens
+            )
             token = self.stream.step(self.tokenizer, next_token_id)
         return token
 
diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py
index b59d02a46327..c579c92baf37 100644
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -98,9 +98,9 @@ def _validate_params(
                 self.tokenizer,
             )
 
-            if (
-                params.thinking_token_budget is not None
-                and self.vllm_config.reasoning_config is None
+            if params.thinking_token_budget is not None and (
+                self.vllm_config.reasoning_config is None
+                or not self.vllm_config.reasoning_config.enabled
             ):
                 raise ValueError(
                     "thinking_token_budget is set but reasoning_config is "
@@ -172,6 +172,45 @@ def _get_mm_identifier(
             return mm_hash
         return f"{lora_request.lora_name}:{mm_hash}"
 
+    def inject_into_mm_cache(
+        self,
+        mm_hashes: dict[str, list[str]],
+        mm_kwargs: dict[str, list],
+    ) -> None:
+        """Inject pre-processed mm_kwargs into the processor cache.
+
+        Call this when mm_kwargs have already been through the HF processor
+        externally (e.g. by a frontend that transfers pre-processed tensors
+        to the backend).  This ensures MM cache hit rate metrics are reported
+        accurately and avoids redundant processing on subsequent requests
+        with the same images.
+
+        Uses ``get_and_update_item()`` with an empty prompt_updates list,
+        since token expansion has already been handled externally.
+        """
+        cache = self.renderer.mm_processor_cache
+        if cache is None:
+            return
+        try:
+            for modality, hashes in mm_hashes.items():
+                items = mm_kwargs.get(modality, [])
+                for i, mm_hash in enumerate(hashes):
+                    if i < len(items) and items[i] is not None:
+                        # Insert into cache via get_and_update_item.
+                        # Use the returned item (may be an address for SHM
+                        # cache or the original item for LRU cache).
+                        items[i], _ = cache.get_and_update_item(
+                            (items[i], []),
+                            mm_hash,
+                        )
+            # Update cache stats to reflect the externally processed items
+            self.renderer.update_mm_cache_stats()
+        except Exception:
+            logger.warning(
+                "Failed to inject mm_kwargs into processor cache",
+                exc_info=True,
+            )
+
     @staticmethod
     def assign_request_id(request: EngineCoreRequest):
         """Replace the externally supplied request ID with an internal request ID
@@ -253,11 +292,13 @@ def process_inputs(
 
         # Mypy can be conservative for TypedDict unions; normalize access.
         if decoder_inputs["type"] == "embeds":
-            prompt_token_ids = None
             prompt_embeds = decoder_inputs["prompt_embeds"]
+            prompt_token_ids = decoder_inputs.get("prompt_token_ids")
+            prompt_is_token_ids = decoder_inputs.get("is_token_ids")
         else:
             prompt_token_ids = decoder_inputs["prompt_token_ids"]
             prompt_embeds = None
+            prompt_is_token_ids = None
 
         sampling_params = None
         pooling_params = None
@@ -322,6 +363,7 @@ def process_inputs(
             request_id=request_id,
             prompt_token_ids=prompt_token_ids,
             prompt_embeds=prompt_embeds,
+            prompt_is_token_ids=prompt_is_token_ids,
             mm_features=mm_features,
             sampling_params=sampling_params,
             pooling_params=pooling_params,
@@ -405,11 +447,11 @@ def _validate_model_input(
             decoder_mm_positions = prompt_input["mm_placeholders"]
             for modality, mm_positions in decoder_mm_positions.items():
                 for mm_position in mm_positions:
-                    embed_length = mm_position.get_num_embeds()
-                    if embed_length > self.mm_encoder_cache_size:
+                    num_embeds = mm_position.get_num_embeds()
+                    if num_embeds > self.mm_encoder_cache_size:
                         raise ValueError(
                             f"The {prompt_type} prompt contains a(n) {modality} item "
-                            f"with length {embed_length}, which exceeds the "
+                            f"with {num_embeds} embedding tokens, which exceeds the "
                             f"pre-allocated encoder cache size "
                             f"{self.mm_encoder_cache_size}. Please reduce the input "
                             f"size or increase the encoder cache size "
diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py
index 4b6a7ba44e10..f3e8a95b0d63 100644
--- a/vllm/v1/engine/llm_engine.py
+++ b/vllm/v1/engine/llm_engine.py
@@ -19,7 +19,6 @@
 from vllm.lora.request import LoRARequest
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.outputs import PoolingRequestOutput, RequestOutput
-from vllm.plugins.io_processors import get_io_processor
 from vllm.pooling_params import PoolingParams
 from vllm.renderers import renderer_from_config
 from vllm.renderers.inputs.preprocess import extract_prompt_components
@@ -57,7 +56,6 @@ def __init__(
         usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
         stat_loggers: list[StatLoggerFactory] | None = None,
         mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
-        use_cached_outputs: bool = False,
         multiprocess_mode: bool = False,
     ) -> None:
         self.vllm_config = vllm_config
@@ -90,11 +88,6 @@ def __init__(
         self.should_execute_dummy_batch = False
 
         self.renderer = renderer = renderer_from_config(self.vllm_config)
-        self.io_processor = get_io_processor(
-            self.vllm_config,
-            self.renderer,
-            self.model_config.io_processor_plugin,
-        )
 
         # Convert EngineInput --> EngineCoreRequest.
         self.input_processor = InputProcessor(self.vllm_config, renderer)
@@ -357,6 +350,8 @@ def reset_encoder_cache(self) -> None:
         self.engine_core.reset_encoder_cache()
 
     def sleep(self, level: int = 1, mode: PauseMode = "abort"):
+        if level >= 1:
+            self.renderer.clear_mm_cache()
         self.engine_core.sleep(level, mode)
 
         if self.logger_manager is not None:
diff --git a/vllm/v1/engine/logprobs.py b/vllm/v1/engine/logprobs.py
index 513531c31f3b..74a45ab1e4d4 100644
--- a/vllm/v1/engine/logprobs.py
+++ b/vllm/v1/engine/logprobs.py
@@ -7,6 +7,7 @@
 
 from vllm.logger import init_logger
 from vllm.logprobs import (
+    FlatLogprobs,
     PromptLogprobs,
     SampleLogprobs,
     append_logprobs_for_next_position,
@@ -46,7 +47,7 @@ def from_new_request(
     ) -> "LogprobsProcessor":
         sampling_params = request.sampling_params
         assert sampling_params is not None
-        num_logprobs = sampling_params.logprobs
+        num_logprobs = sampling_params.num_logprobs
         num_prompt_logprobs = sampling_params.prompt_logprobs
         return cls(
             tokenizer=tokenizer,
@@ -96,8 +97,11 @@ def _update_sample_logprobs(self, logprobs_lists: LogprobsLists) -> None:
                 decoded_tokens_list = convert_ids_list_to_tokens(
                     self.tokenizer, token_ids
                 )
+                context_token_ids = self._get_sampled_context_ids(self.logprobs)
                 decoded_tokens = self._verify_tokens(
-                    decoded_tokens_list=decoded_tokens_list, tokens=token_ids
+                    decoded_tokens_list=decoded_tokens_list,
+                    tokens=token_ids,
+                    context_token_ids=context_token_ids,
                 )
 
             # Sampler puts the sampled logprob in first.
@@ -162,9 +166,14 @@ def _update_prompt_logprobs(
             else:
                 # Extract decoded tokens for this position
                 decoded_tokens_slice = all_decoded_tokens[offset:offset_end]
+                # Context: preceding prompt tokens accumulated in
+                # self.prompt_logprobs from previous loop iterations.
+                context_token_ids = self._get_sampled_context_ids(self.prompt_logprobs)
                 # Apply UTF-8 correction within this position's token boundaries
                 decoded_tokens_for_pos = self._verify_tokens(
-                    decoded_tokens_list=decoded_tokens_slice, tokens=token_ids_list[pos]
+                    decoded_tokens_list=decoded_tokens_slice,
+                    tokens=token_ids_list[pos],
+                    context_token_ids=context_token_ids,
                 )
 
             # Update with the Logprob container for this pos.
@@ -196,41 +205,139 @@ def pop_prompt_logprobs(self) -> PromptLogprobs | None:
             self.prompt_logprobs = []
         return plp
 
-    def _correct_decoded_token(self, idx: int, tokens: list[int]) -> str:
-        assert self.tokenizer is not None, "self.tokenizer should not be None"
-
-        # try with prev token id in same list
-        if idx > 0:
-            possible_decoded_token = self.tokenizer.decode(tokens[idx - 1 : idx + 1])
-            if not possible_decoded_token.endswith("�"):
-                return possible_decoded_token
-        # try with previous logprob token id
-        if self.logprobs:
-            latest_token_id = next(iter(self.logprobs[-1]))
-
-            decode_ids = [latest_token_id]
-            if idx > 0:
-                decode_ids.extend(tokens[idx - 1 : idx + 1])
+    @staticmethod
+    def _get_sampled_context_ids(
+        logprobs_source: SampleLogprobs | PromptLogprobs | None,
+        max_context: int = 4,
+    ) -> list[int]:
+        """Extract recent sampled token IDs from a logprobs source.
+
+        The sampled (or prompt) token at each position is the first
+        entry, since it is always inserted first by
+        append_logprobs_for_next_position.
+
+        Args:
+            logprobs_source: The logprobs container to extract from.
+            max_context: Maximum number of preceding tokens to return.
+                4 is sufficient for any UTF-8 multi-byte sequence.
+
+        Returns:
+            List of sampled token IDs, oldest first, most recent last.
+        """
+        if not logprobs_source:
+            return []
+
+        n = len(logprobs_source)
+        start = max(0, n - max_context)
+
+        # Efficient path for FlatLogprobs: access token_ids directly.
+        if isinstance(logprobs_source, FlatLogprobs):
+            return [
+                logprobs_source.token_ids[logprobs_source.start_indices[i]]
+                for i in range(start, n)
+                if logprobs_source.start_indices[i] < logprobs_source.end_indices[i]
+            ]
+
+        # list[dict] path
+        result: list[int] = []
+        for i in range(start, n):
+            entry = logprobs_source[i]
+            if entry is not None:
+                result.append(next(iter(entry)))
+        return result
+
+    def _correct_decoded_token(
+        self, token_id: int, context_token_ids: list[int]
+    ) -> str:
+        """Correct a decoded token that contains the replacement character.
+
+        When byte-fallback tokenization splits multi-byte UTF-8
+        characters across tokens, individual token decoding produces
+        the replacement character U+FFFD. This method uses preceding
+        sampled tokens as context to reconstruct the correct text.
+
+        Args:
+            token_id: The single token ID to correct.
+            context_token_ids: Preceding sampled token IDs in sequential
+                order (oldest first). These are the actual tokens in
+                the generated sequence, NOT top-k alternatives.
+
+        Returns:
+            The corrected decoded string, or empty string if the byte
+            sequence is genuinely incomplete at this point.
+        """
+        assert self.tokenizer is not None
+
+        max_ctx = min(len(context_token_ids), 4)
+
+        for num_ctx in range(1, max_ctx + 1):
+            context = context_token_ids[-num_ctx:]
+            full_decoded = self.tokenizer.decode(context + [token_id])
+
+            if full_decoded.endswith("�"):
+                continue
+
+            # Find the boundary between "clean" context tokens and
+            # byte-fallback tokens that are part of the same incomplete
+            # sequence. Byte-fallback context tokens returned "" when
+            # they were processed, so their text must be attributed to
+            # this completing token.
+            clean_end = len(context)
+            for j in range(len(context) - 1, -1, -1):
+                if self.tokenizer.decode([context[j]]).endswith("�"):
+                    clean_end = j
+                else:
+                    break
+
+            # Decode only the clean (non-byte-fallback) prefix.
+            if clean_end > 0:
+                clean_prefix = self.tokenizer.decode(context[:clean_end])
             else:
-                decode_ids.extend(tokens[idx : idx + 1])
+                clean_prefix = ""
 
-            possible_decoded_token = self.tokenizer.decode(decode_ids)
-            if not possible_decoded_token.endswith("�"):
-                return possible_decoded_token
+            if full_decoded.startswith(clean_prefix):
+                return full_decoded[len(clean_prefix) :]
+
+            # Tokenizer normalization may cause prefix mismatch.
+            # Find the longest common prefix between them.
+            common_len = 0
+            for a, b in zip(clean_prefix, full_decoded):
+                if a != b:
+                    break
+                common_len += 1
+            return full_decoded[common_len:]
 
-        # by default return empty string
         return ""
 
     def _verify_tokens(
-        self, decoded_tokens_list: list[str], tokens: list[int]
+        self,
+        decoded_tokens_list: list[str],
+        tokens: list[int],
+        context_token_ids: list[int] | None = None,
     ) -> list[str]:
+        """Verify and correct decoded tokens with replacement characters.
+
+        Args:
+            decoded_tokens_list: Decoded token strings to verify.
+            tokens: Token IDs corresponding to decoded_tokens_list.
+                These are alternatives at the SAME position (e.g.
+                [sampled, top1, top2]), NOT sequential tokens.
+            context_token_ids: Preceding sampled token IDs providing
+                sequential context. If None, extracted from
+                self.logprobs.
+        """
+        if context_token_ids is None:
+            context_token_ids = self._get_sampled_context_ids(self.logprobs)
+
         corrected_decoded_token_map = dict()
         for idx, text in enumerate(decoded_tokens_list):
             if text.endswith("�"):
-                # utf-8 char at the end means it's a potential unfinished byte sequence
-                # from byte fallback tokenization.
+                # Replacement char at the end means a potential
+                # unfinished byte sequence from byte-fallback
+                # tokenization. Correct each token independently
+                # using only the sequential context.
                 corrected_decoded_token_map[idx] = self._correct_decoded_token(
-                    idx, tokens
+                    tokens[idx], context_token_ids
                 )
 
         for idx, text in corrected_decoded_token_map.items():
diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py
index f9e965092288..e1032cfd1f2b 100644
--- a/vllm/v1/engine/output_processor.py
+++ b/vllm/v1/engine/output_processor.py
@@ -175,6 +175,9 @@ def __init__(
 
         self.stats = RequestStateStats(arrival_time=arrival_time) if log_stats else None
 
+        # Routed experts accumulation (prompt + sample chunks)
+        self.routed_experts_chunks: list[np.ndarray] = []
+
         # Stream Interval
         self.stream_interval = stream_interval
         self.sent_tokens_offset = 0  # Offset of sent tokens
@@ -273,7 +276,6 @@ def make_request_output(
         finish_reason: FinishReason | None,
         stop_reason: int | str | None,
         kv_transfer_params: dict[str, Any] | None = None,
-        routed_experts: np.ndarray | None = None,
     ) -> RequestOutput | PoolingRequestOutput | None:
         finished = finish_reason is not None
         final_only = self.output_kind == RequestOutputKind.FINAL_ONLY
@@ -314,9 +316,7 @@ def make_request_output(
                 finished,
             )
 
-        output = self._new_completion_output(
-            new_token_ids, finish_reason, stop_reason, routed_experts
-        )
+        output = self._new_completion_output(new_token_ids, finish_reason, stop_reason)
 
         if self.parent_req is None:
             outputs = [output]
@@ -378,7 +378,6 @@ def _new_completion_output(
         token_ids: list[int],
         finish_reason: FinishReason | None,
         stop_reason: int | str | None,
-        routed_experts: np.ndarray | None = None,
     ) -> CompletionOutput:
         assert self.detokenizer is not None
         assert self.logprobs_processor is not None
@@ -395,6 +394,11 @@ def _new_completion_output(
         if delta and logprobs:
             logprobs = logprobs[-len(token_ids) :]
 
+        # Concatenate routed experts on finish
+        routed_experts = None
+        if finished and self.routed_experts_chunks:
+            routed_experts = np.concatenate(self.routed_experts_chunks, axis=0)
+
         return CompletionOutput(
             index=self.request_index,
             text=text,
@@ -616,9 +620,17 @@ def process_outputs(
             finish_reason = engine_core_output.finish_reason
             stop_reason = engine_core_output.stop_reason
             kv_transfer_params = engine_core_output.kv_transfer_params
-            routed_experts = engine_core_output.routed_experts
-            req_state.num_cached_tokens = engine_core_output.num_cached_tokens
-            req_state.is_prefilling = False
+            if engine_core_output.routed_experts is not None:
+                req_state.routed_experts_chunks.append(
+                    engine_core_output.routed_experts
+                )
+
+            if req_state.is_prefilling:
+                if engine_core_output.prefill_stats is not None:
+                    req_state.num_cached_tokens = (
+                        engine_core_output.prefill_stats.num_cached_tokens
+                    )
+                req_state.is_prefilling = False
 
             if pooling_output is None:
                 assert req_state.detokenizer is not None
@@ -642,7 +654,6 @@ def process_outputs(
                 finish_reason,
                 stop_reason,
                 kv_transfer_params,
-                routed_experts,
             ):
                 if req_state.streaming_input:
                     request_output.finished = False
@@ -776,7 +787,6 @@ def _update_stats_from_output(
             engine_core_output,
             engine_core_timestamp,
             req_state.is_prefilling,
-            req_state.prompt_len,
             req_state.stats,
             self.lora_states,
             req_state.lora_name,
@@ -795,6 +805,7 @@ def _update_stats_from_finished(
         assert req_state.stats is not None
         iteration_stats.update_from_finished_request(
             finish_reason=finish_reason,
+            request_id=req_state.external_req_id,
             num_prompt_tokens=req_state.prompt_len,
             max_tokens_param=req_state.max_tokens_param,
             req_stats=req_state.stats,
diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py
index 90ec47edb033..cc8ce10e31e0 100644
--- a/vllm/v1/engine/utils.py
+++ b/vllm/v1/engine/utils.py
@@ -11,7 +11,7 @@
 from multiprocessing import Process, connection
 from multiprocessing.process import BaseProcess
 from multiprocessing.queues import Queue
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 from unittest.mock import patch
 
 import msgspec
@@ -22,10 +22,12 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.ray.ray_env import get_env_vars_to_copy
+from vllm.utils import numa_utils
 from vllm.utils.network_utils import get_open_zmq_ipc_path, zmq_socket_ctx
 from vllm.utils.system_utils import get_mp_context
 from vllm.v1.engine.coordinator import DPCoordinator
 from vllm.v1.executor import Executor
+from vllm.v1.executor.ray_utils import WORKER_SPECIFIC_ENV_VARS
 from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
 
 if TYPE_CHECKING:
@@ -79,6 +81,21 @@ class EngineHandshakeMetadata:
     parallel_config: dict[str, int | str | list[int]]
 
 
+def _make_control_bundle(node_ip: str) -> dict[str, float]:
+    # The engine actor is scheduled on the final CPU-only bundle. Keep that
+    # bundle colocated with the group's first GPU bundle so the actor does not
+    # float to an unrelated node and reorder worker ranks away from the
+    # advertised DP bootstrap host.
+    return {"CPU": 1.0, "node:" + node_ip: 0.001}
+
+
+def _get_bundle_node_ip(bundle: dict[str, float]) -> str:
+    for key in bundle:
+        if key.startswith("node:"):
+            return key.split(":", 1)[1]
+    raise ValueError(f"Missing node affinity in placement bundle: {bundle}")
+
+
 class CoreEngineProcManager:
     """
     Utility class to handle creation, readiness, and shutdown
@@ -133,19 +150,42 @@ def __init__(
             )
 
         self._finalizer = weakref.finalize(self, shutdown, self.processes)
+        self.manager_stopped = threading.Event()
+        self.failed_proc_name: str | None = None
 
         try:
             for proc, local_dp_rank in zip(self.processes, local_dp_ranks):
-                # Adjust device control in DP for non-CUDA platforms
-                # as well as external and ray launchers
-                # For CUDA platforms, we use torch.accelerator.set_device_index()()
+                # Adjust device control in DP for platforms that cannot rely
+                # on torch.accelerator.set_device_index(), and for Ray launchers.
+                device_control_context: contextlib.AbstractContextManager[None] = (
+                    contextlib.nullcontext()
+                )
+                needs_device_env_isolation = not (
+                    current_platform.is_cuda_alike() or current_platform.is_xpu()
+                )
                 if is_dp and (
-                    not current_platform.is_cuda_alike()
-                    or vllm_config.parallel_config.use_ray
+                    needs_device_env_isolation or vllm_config.parallel_config.use_ray
+                ):
+                    device_control_context = set_device_control_env_var(
+                        vllm_config, local_dp_rank
+                    )
+
+                with (
+                    device_control_context,
+                    numa_utils.configure_subprocess(
+                        # EngineCore itself does not have a TP/PP-local rank.
+                        # When DP is enabled, set_device_control_env_var()
+                        # narrows visible devices to this DP shard first, so
+                        # local_rank=0 means "the first local GPU in this
+                        # shard". The actual TP/PP worker processes spawned by
+                        # the executor are bound separately with their own
+                        # local_rank values.
+                        vllm_config,
+                        local_rank=0,
+                        dp_local_rank=local_dp_rank,
+                        process_kind="EngineCore",
+                    ),
                 ):
-                    with set_device_control_env_var(vllm_config, local_dp_rank):
-                        proc.start()
-                else:
                     proc.start()
         finally:
             # Kill other procs if not all are running.
@@ -154,12 +194,31 @@ def __init__(
 
     def shutdown(self, timeout: float | None = None) -> None:
         """Shutdown engine core processes with configurable timeout."""
+        self.manager_stopped.set()
         if self._finalizer.detach() is not None:
             shutdown(self.processes, timeout=timeout)
 
-    def join_first(self):
-        """Wait for any process to exit."""
-        connection.wait(proc.sentinel for proc in self.processes)
+    def monitor_engine_liveness(self) -> None:
+        """Monitor engine core process liveness."""
+
+        sentinel_to_proc = {proc.sentinel: proc for proc in self.processes}
+        sentinels = set(sentinel_to_proc.keys())
+
+        while sentinels and not self.manager_stopped.is_set():
+            died_sentinels = connection.wait(sentinels, timeout=1)
+
+            for sentinel in died_sentinels:
+                proc = sentinel_to_proc.pop(cast(int, sentinel))
+                exitcode = proc.exitcode
+                if exitcode != 0 and not self.manager_stopped.is_set():
+                    self.failed_proc_name = proc.name
+            if died_sentinels:
+                # Any engine exit currently triggers a shutdown. Future
+                # work (e.g., Elastic and fault-tolerant EP) will add finer-grained
+                # handling for different exit scenarios.
+                break
+
+        self.shutdown()
 
     def sentinels(self) -> list:
         return [proc.sentinel for proc in self.processes]
@@ -251,6 +310,18 @@ def get_device_indices(
     return value
 
 
+def _apply_dp_identity_suffix(dp_vllm_config, dp_rank: int) -> None:
+    # Ray actor names (RayExecutorV2) and KV-connector engine_ids must
+    # be unique across sibling DP engines or registration collides.
+    # Use the global DP rank, not a node-local rank, since sibling DP
+    # engines can span multiple nodes.
+    dp_vllm_config.instance_id = f"{dp_vllm_config.instance_id}_dp{dp_rank}"
+    if dp_vllm_config.kv_transfer_config is not None:
+        dp_vllm_config.kv_transfer_config.engine_id = (
+            f"{dp_vllm_config.kv_transfer_config.engine_id}_dp{dp_rank}"
+        )
+
+
 class CoreEngineActorManager:
     """
     Utility class to handle creation, readiness, and shutdown
@@ -287,7 +358,10 @@ def __init__(
         self.local_engine_actors: list[ray.ActorHandle] = []
         self.remote_engine_actors: list[ray.ActorHandle] = []
 
-        env_vars_list = get_env_vars_to_copy(destination=actor_class.__name__)
+        env_vars_list = get_env_vars_to_copy(
+            destination=actor_class.__name__,
+            exclude_vars=WORKER_SPECIFIC_ENV_VARS,
+        )
         self.env_vars_dict = {
             name: os.environ[name] for name in env_vars_list if name in os.environ
         }
@@ -298,6 +372,8 @@ def __init__(
         self.log_stats = log_stats
         local_engine_count = vllm_config.parallel_config.data_parallel_size_local
         world_size = vllm_config.parallel_config.world_size
+        self.manager_stopped = threading.Event()
+        self.failed_proc_name: str | None = None
 
         if ray.is_initialized():
             logger.info("Ray is already initialized. Skipping Ray initialization.")
@@ -344,16 +420,11 @@ def __init__(
             range(dp_size), local_dp_ranks, placement_groups
         ):
             dp_vllm_config = copy.deepcopy(vllm_config)
+            if dp_size > 1:
+                _apply_dp_identity_suffix(dp_vllm_config, index)
             dp_vllm_config.parallel_config.placement_group = pg
             local_client = index < local_engine_count
 
-            if dp_size > 1 and dp_vllm_config.kv_transfer_config is not None:
-                # modify the engine_id and append the local_dp_rank to it to ensure
-                # that the kv_transfer_config is unique for each DP rank.
-                dp_vllm_config.kv_transfer_config.engine_id = (
-                    f"{dp_vllm_config.kv_transfer_config.engine_id}_dp{local_index}"
-                )
-
             # Ray XPU known issue: dpctl initializes the GPU runtime early, so
             # setting device env vars in Ray actor's initialization method
             # will not affect device selection. See:
@@ -395,8 +466,11 @@ def __init__(
 
         ray.get(refs)
         self.run_refs = []
+        self.actor_run_ref_dict = dict()
         for actor in self.local_engine_actors + self.remote_engine_actors:
-            self.run_refs.append(actor.run.remote())
+            ref = actor.run.remote()
+            self.run_refs.append(ref)
+            self.actor_run_ref_dict[actor] = ref
 
     @staticmethod
     def create_dp_placement_groups(
@@ -550,10 +624,20 @@ def create_dp_placement_groups(
                     if len(collected_bundles) < world_size:
                         continue
 
-                    bundles = collected_bundles + [{"CPU": 1.0}]
+                    control_node_ip = _get_bundle_node_ip(collected_bundles[0])
+                    bundles = collected_bundles + [
+                        _make_control_bundle(control_node_ip)
+                    ]
                     collected_bundles = []
                 else:
-                    bundles = device_bundle * world_size + [{"CPU": 1.0}]
+                    # STRICT_PACK already keeps every bundle in the placement
+                    # group on one node, so the explicit node affinity on the
+                    # control bundle is redundant for correctness here. Keep it
+                    # anyway for consistency with the span path and to preserve
+                    # intent if this scheduling strategy changes later.
+                    bundles = device_bundle * world_size + [
+                        _make_control_bundle(node_ip)
+                    ]
 
                 pg = ray.util.placement_group(
                     name=f"dp_rank_{len(placement_groups)}",
@@ -712,6 +796,8 @@ def scale_up_elastic_ep(
         for i, (pg, local_rank) in enumerate(zip(placement_groups, local_dp_ranks)):
             rank = cur_data_parallel_size + i
             dp_vllm_config = copy.deepcopy(cur_vllm_config)
+            if new_data_parallel_size > 1:
+                _apply_dp_identity_suffix(dp_vllm_config, rank)
             dp_vllm_config.parallel_config.data_parallel_size = new_data_parallel_size
             dp_vllm_config.parallel_config.placement_group = pg
 
@@ -776,7 +862,9 @@ def scale_up_elastic_ep(
         ) + self.remote_engine_actors[-(len(placement_groups) - new_local_engines) :]
 
         for actor in actors:
-            self.run_refs.append(actor.run.remote())
+            ref = actor.run.remote()
+            self.run_refs.append(ref)
+            self.actor_run_ref_dict[actor] = ref
 
         cur_vllm_config.parallel_config.data_parallel_size = new_data_parallel_size
         # Update old_vllm_config with new data_parallel_size_local if any new
@@ -805,12 +893,59 @@ def scale_down_elastic_ep(
                 self.remote_engine_actors.pop()
             ray.util.remove_placement_group(pg)
 
+    def remove_run_refs_for_scale_down(self, removed_dp_size: int) -> None:
+        if removed_dp_size <= 0:
+            return
+        flags = self.placement_group_is_local[-removed_dp_size:]
+        li = len(self.local_engine_actors) - 1
+        ri = len(self.remote_engine_actors) - 1
+        for is_local in reversed(flags):
+            if is_local:
+                actor = self.local_engine_actors[li]
+                li -= 1
+            else:
+                actor = self.remote_engine_actors[ri]
+                ri -= 1
+            ref = self.actor_run_ref_dict.pop(actor)
+            self.run_refs.remove(ref)
+
     def get_run_refs(self):
         return self.run_refs
 
+    def monitor_engine_liveness(self) -> None:
+        import ray
+
+        while not self.manager_stopped.is_set():
+            actor_run_refs = list(self.get_run_refs())
+            if not actor_run_refs:
+                logger.info(
+                    "There are no actors to monitor currently. "
+                    "The monitoring function is about to terminate."
+                )
+                break
+            actor_done_refs, _ = ray.wait(actor_run_refs, timeout=5)
+            unexpected_failure = False
+            for actor_ref in actor_done_refs:
+                if self.manager_stopped.is_set():
+                    break
+                if actor_ref not in self.get_run_refs():
+                    # The run refs may have been updated by elastic scale-down.
+                    continue
+                try:
+                    ray.get(actor_ref)
+                except ray.exceptions.RayActorError:
+                    self.failed_proc_name = f"Actor {actor_ref}"
+                    unexpected_failure = True
+
+            if unexpected_failure:
+                break
+
+        self.shutdown()
+
     def shutdown(self, timeout: float | None = None) -> None:
         import ray
 
+        self.manager_stopped.set()
         for actor in self.local_engine_actors + self.remote_engine_actors:
             ray.kill(actor)
         for pg in self.created_placement_groups:
@@ -831,7 +966,7 @@ def get_engine_zmq_addresses(
 
     # In offline mode there is an LLM instance per DP rank and
     # one core engine per LLM, see
-    # examples/offline_inference/data_parallel.py.
+    # examples/features/data_parallel/data_parallel_offline.py.
     offline_mode = local_start_index is not None
 
     # client_local_only = True for cases where this front-end
@@ -1116,19 +1251,6 @@ def wait_for_engine_startup(
             start_pending[0 if local else 1] += 1
             engine.state = CoreEngineState.CONNECTED
         elif status == "READY" and engine.state == CoreEngineState.CONNECTED:
-            # Setup KV cache config with initialization state from
-            # engine core process. Sum values from all engines in DP case.
-            num_gpu_blocks = cache_config.num_gpu_blocks or 0
-            num_gpu_blocks += msg["num_gpu_blocks"]
-            cache_config.num_gpu_blocks = num_gpu_blocks
-
-            # In external DP LB mode, the coordinator address that the
-            # front-end procs connect to is obtained from rank 0 via
-            # one of the engine handshakes, and passed to the local
-            # front-end process in the response from the other.
-            if addresses.frontend_stats_publish_address is None:
-                addresses.frontend_stats_publish_address = msg.get("dp_stats_address")
-
             # Validate config hash consistency across DP workers for MoE models.
             if coordinated_dp:
                 worker_config_hash = msg.get("parallel_config_hash")
diff --git a/vllm/v1/executor/abstract.py b/vllm/v1/executor/abstract.py
index 2c3538d9ac26..e68c0283f579 100644
--- a/vllm/v1/executor/abstract.py
+++ b/vllm/v1/executor/abstract.py
@@ -7,6 +7,7 @@
 from functools import cached_property
 from typing import TYPE_CHECKING, Literal, TypeVar, overload
 
+import vllm.envs as envs
 from vllm.config import VllmConfig
 from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
 from vllm.distributed.kv_transfer.kv_connector.v1.base import (
@@ -21,7 +22,7 @@
 from vllm.v1.engine import ReconfigureDistributedRequest
 from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
 from vllm.v1.outputs import DraftTokenIds, ModelRunnerOutput
-from vllm.v1.worker.worker_base import WorkerBase
+from vllm.v1.worker.worker_base import CompilationTimes, WorkerBase
 
 if TYPE_CHECKING:
     from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
@@ -57,9 +58,14 @@ def get_class(vllm_config: VllmConfig) -> type["Executor"]:
                 )
             executor_class = distributed_executor_backend
         elif distributed_executor_backend == "ray":
-            from vllm.v1.executor.ray_executor import RayDistributedExecutor
+            if envs.VLLM_USE_RAY_V2_EXECUTOR_BACKEND:
+                from vllm.v1.executor.ray_executor_v2 import RayExecutorV2
 
-            executor_class = RayDistributedExecutor
+                executor_class = RayExecutorV2
+            else:
+                from vllm.v1.executor.ray_executor import RayDistributedExecutor
+
+                executor_class = RayDistributedExecutor
         elif distributed_executor_backend == "mp":
             from vllm.v1.executor.multiproc_executor import MultiprocExecutor
 
@@ -115,14 +121,19 @@ def initialize_from_config(self, kv_cache_configs: list[KVCacheConfig]) -> None:
         underlying workers.
         """
         self.collective_rpc("initialize_from_config", args=(kv_cache_configs,))
-        compilation_times: list[float] = self.collective_rpc("compile_or_warm_up_model")
+        compilation_times: list[CompilationTimes] = self.collective_rpc(
+            "compile_or_warm_up_model"
+        )
         # Propagate compilation time from workers back to the main process.
         # With TP>1, compilation happens in worker processes, so the main
         # process config is never updated. Use max across workers since they
         # compile in parallel.
         if compilation_times:
             self.vllm_config.compilation_config.compilation_time = max(
-                compilation_times
+                t.language_model for t in compilation_times
+            )
+            self.vllm_config.compilation_config.encoder_compilation_time = max(
+                t.encoder for t in compilation_times
             )
 
     def register_failure_callback(self, callback: FailureCallback):  # noqa: B027
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index f9b77154067a..db21d7cee779 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -44,12 +44,14 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.tracing import instrument, maybe_init_worker_tracer
+from vllm.utils import numa_utils
 from vllm.utils.network_utils import (
     get_distributed_init_method,
     get_ip,
     get_loopback_ip,
     get_open_port,
 )
+from vllm.utils.ompmultiprocessing import OMPProcessManager
 from vllm.utils.system_utils import (
     _maybe_force_spawn,
     decorate_logs,
@@ -67,25 +69,29 @@
 class FutureWrapper(Future):
     def __init__(
         self,
-        futures_queue: deque[tuple["FutureWrapper", Callable]],
+        futures_queue: deque["FutureWrapper"],
+        get_response: Callable[[], Any],
         aggregate: Callable = lambda x: x,
     ):
         self.futures_queue = futures_queue
+        self.get_response = get_response
         self.aggregate = aggregate
         super().__init__()
+        self.futures_queue.appendleft(self)
 
     def result(self, timeout=None):
         if timeout is not None:
             raise RuntimeError("timeout not implemented")
+
         # Drain any futures ahead of us in the queue.
         while not self.done():
-            future, get_response = self.futures_queue.pop()
-            future.wait_for_response(get_response)
+            future = self.futures_queue.pop()
+            future._wait_for_response()
         return super().result()
 
-    def wait_for_response(self, get_response: Callable):
+    def _wait_for_response(self):
         try:
-            response = self.aggregate(get_response())
+            response = self.aggregate(self.get_response())
             with suppress(InvalidStateError):
                 self.set_result(response)
         except Exception as e:
@@ -115,7 +121,6 @@ def _init_executor(self) -> None:
             f"_parallel_size ({pcp_size}). "
         )
 
-        # Set multiprocessing envs
         set_multiprocessing_worker_envs()
 
         # use the loopback address get_loopback_ip() for communication.
@@ -165,19 +170,24 @@ def _init_executor(self) -> None:
                 [] if context.get_start_method() == "fork" else None
             )
 
+            # For CPU backend only, to setup OpenMP threads affinity
+            cpu_omp_manager = OMPProcessManager(self.vllm_config)
             for local_rank in range(self.local_world_size):
                 global_rank = global_start_rank + local_rank
                 is_driver_worker = self._is_driver_worker(global_rank)
-                unready_worker_handle = WorkerProc.make_worker_process(
-                    vllm_config=self.vllm_config,
-                    local_rank=local_rank,
-                    rank=global_rank,
-                    distributed_init_method=distributed_init_method,
-                    input_shm_handle=scheduler_output_handle,
-                    shared_worker_lock=shared_worker_lock,
-                    is_driver_worker=is_driver_worker,
-                    inherited_fds=inherited_fds,
-                )
+                with cpu_omp_manager.configure_omp_envs(
+                    rank=global_rank, local_rank=local_rank
+                ):
+                    unready_worker_handle = WorkerProc.make_worker_process(
+                        vllm_config=self.vllm_config,
+                        local_rank=local_rank,
+                        rank=global_rank,
+                        distributed_init_method=distributed_init_method,
+                        input_shm_handle=scheduler_output_handle,
+                        shared_worker_lock=shared_worker_lock,
+                        is_driver_worker=is_driver_worker,
+                        inherited_fds=inherited_fds,
+                    )
                 unready_workers.append(unready_worker_handle)
                 if inherited_fds is not None:
                     inherited_fds.append(unready_worker_handle.death_writer.fileno())
@@ -218,7 +228,7 @@ def _init_executor(self) -> None:
             for response_mq in self.response_mqs:
                 response_mq.wait_until_ready()
 
-            self.futures_queue = deque[tuple[FutureWrapper, Callable]]()
+            self.futures_queue = deque[FutureWrapper]()
 
             self._post_init_executor()
 
@@ -384,17 +394,13 @@ def get_response():
                 responses.append(result)
             return responses[0] if output_rank is not None else responses
 
-        if non_block:
-            future = FutureWrapper(self.futures_queue, aggregate=aggregate)
-            self.futures_queue.appendleft((future, get_response))
-            return future
-
-        # First drain any pending futures in the queue.
-        while self.futures_queue:
-            future, get_fut_response = self.futures_queue.pop()
-            future.wait_for_response(get_fut_response)
+        future = FutureWrapper(
+            self.futures_queue,
+            get_response=get_response,
+            aggregate=aggregate,
+        )
 
-        return aggregate(get_response())
+        return future if non_block else future.result()
 
     @staticmethod
     def _ensure_worker_termination(worker_procs: list[BaseProcess]):
@@ -674,7 +680,12 @@ def make_worker_process(
             daemon=True,
         )
 
-        proc.start()
+        # Apply NUMA binding if configured
+        with numa_utils.configure_subprocess(
+            vllm_config, local_rank, process_kind="worker"
+        ):
+            proc.start()
+
         # Close child ends of pipes here in the parent
         ready_writer.close()
         death_reader.close()
@@ -825,6 +836,8 @@ def signal_handler(signum, frame):
 
             worker = WorkerProc(*args, **kwargs)
             assert worker.worker_response_mq is not None
+            if kwargs["vllm_config"].parallel_config.numa_bind:
+                numa_utils.log_current_affinity_state(f"Worker_{worker.rank}")
 
             worker.monitor_death_pipe(death_pipe, shutdown_requested)
 
@@ -1000,24 +1013,25 @@ def set_multiprocessing_worker_envs():
 
     _maybe_force_spawn()
 
-    # Configure thread parallelism if OMP_NUM_THREADS isn't set
-    #
-    # Helps to avoid CPU contention. The default of spawning a thread per
-    # core combined with multiprocessing for each GPU can have a negative
-    # impact on performance. The contention is amplified when running in a
-    # container where CPU limits can cause throttling.
-    default_omp_num_threads = 1
-    if (
-        "OMP_NUM_THREADS" not in os.environ
-        and (current_parallelism := torch.get_num_threads()) > default_omp_num_threads
-    ):
-        logger.warning_once(
-            "Reducing Torch parallelism from %d threads to %d to avoid "
-            "unnecessary CPU contention. Set OMP_NUM_THREADS in the "
-            "external environment to tune this value as needed.",
-            current_parallelism,
-            default_omp_num_threads,
-            scope="local",
-        )
-        os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads)
-        torch.set_num_threads(default_omp_num_threads)
+    if not current_platform.is_cpu():
+        # Configure thread parallelism if OMP_NUM_THREADS isn't set
+        #
+        # Helps to avoid CPU contention. The default of spawning a thread per
+        # core combined with multiprocessing for each GPU can have a negative
+        # impact on performance. The contention is amplified when running in a
+        # container where CPU limits can cause throttling.
+        default_omp_num_threads = 1
+        if (
+            "OMP_NUM_THREADS" not in os.environ
+            and (current_parallelism := torch.get_num_threads())
+            > default_omp_num_threads
+        ):
+            logger.warning_once(
+                "Reducing Torch parallelism from %d threads to %d to avoid "
+                "unnecessary CPU contention. Set OMP_NUM_THREADS in the "
+                "external environment to tune this value as needed.",
+                current_parallelism,
+                default_omp_num_threads,
+            )
+            os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads)
+            torch.set_num_threads(default_omp_num_threads)
diff --git a/vllm/v1/executor/ray_distributed_executor.py b/vllm/v1/executor/ray_distributed_executor.py
deleted file mode 100644
index 9a56c093ad69..000000000000
--- a/vllm/v1/executor/ray_distributed_executor.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.v1.executor.ray_executor import (
-    RayDistributedExecutor as _RayDistributedExecutor,
-)
-
-# For backwards compatibility.
-RayDistributedExecutor = _RayDistributedExecutor
diff --git a/vllm/v1/executor/ray_env_utils.py b/vllm/v1/executor/ray_env_utils.py
new file mode 100644
index 000000000000..6ce12b8ca913
--- /dev/null
+++ b/vllm/v1/executor/ray_env_utils.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+
+from vllm.ray.ray_env import RAY_NON_CARRY_OVER_ENV_VARS
+
+
+def get_driver_env_vars(
+    worker_specific_vars: set[str],
+) -> dict[str, str]:
+    """Return driver env vars to propagate to Ray workers.
+
+    Returns everything from ``os.environ`` except ``worker_specific_vars``
+    and user-configured exclusions (``RAY_NON_CARRY_OVER_ENV_VARS``).
+    """
+    exclude_vars = worker_specific_vars | RAY_NON_CARRY_OVER_ENV_VARS
+
+    return {key: value for key, value in os.environ.items() if key not in exclude_vars}
diff --git a/vllm/v1/executor/ray_executor.py b/vllm/v1/executor/ray_executor.py
index c4e5e7bc67ed..cfeebb5e09de 100644
--- a/vllm/v1/executor/ray_executor.py
+++ b/vllm/v1/executor/ray_executor.py
@@ -23,8 +23,10 @@
 from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
 from vllm.v1.executor.abstract import Executor
 from vllm.v1.executor.ray_utils import (
+    WORKER_SPECIFIC_ENV_VARS,
     FutureWrapper,
     RayWorkerWrapper,
+    detach_zero_copy_from_model_runner_output,
     initialize_ray_cluster,
     ray,
 )
@@ -62,17 +64,6 @@ class RayWorkerMetaData:
 class RayDistributedExecutor(Executor):
     """Ray-based distributed executor"""
 
-    # These env vars are worker-specific, therefore are NOT copied
-    # from the driver to the workers
-    WORKER_SPECIFIC_ENV_VARS = {
-        "VLLM_HOST_IP",
-        "VLLM_HOST_PORT",
-        "LOCAL_RANK",
-        "CUDA_VISIBLE_DEVICES",
-        "HIP_VISIBLE_DEVICES",
-        "ROCR_VISIBLE_DEVICES",
-    }
-
     uses_ray: bool = True
     supports_pp: bool = True
 
@@ -335,7 +326,7 @@ def sort_by_driver_then_worker_ip(item: RayWorkerMetaData):
 
         # Environment variables to copy from driver to workers
         env_vars_to_copy = get_env_vars_to_copy(
-            exclude_vars=self.WORKER_SPECIFIC_ENV_VARS,
+            exclude_vars=WORKER_SPECIFIC_ENV_VARS,
             additional_vars=set(current_platform.additional_env_vars),
             destination="workers",
         )
@@ -473,7 +464,9 @@ def _execute_dag(
             # Get output only from a single worker (output_rank)
             # When PP is not used, we block here until the result is available.
             if not non_block:
-                return refs[0].get()
+                output = refs[0].get()
+                detach_zero_copy_from_model_runner_output(output)
+                return output
 
             # When PP is used, we return a FutureWrapper immediately so that
             # the scheduler can yield to the next batch.
@@ -483,7 +476,10 @@ def _execute_dag(
         assert self.kv_output_aggregator is not None
         if not non_block:
             # Block and get results from all workers
-            return self.kv_output_aggregator.aggregate(ray.get(refs))
+            outputs = ray.get(refs)
+            for output in outputs:
+                detach_zero_copy_from_model_runner_output(output)
+            return self.kv_output_aggregator.aggregate(outputs)
 
         # Return a future that will aggregate outputs from all workers
         return FutureWrapper(refs, self.kv_output_aggregator)
diff --git a/vllm/v1/executor/ray_executor_v2.py b/vllm/v1/executor/ray_executor_v2.py
new file mode 100644
index 000000000000..0665b5fc1b88
--- /dev/null
+++ b/vllm/v1/executor/ray_executor_v2.py
@@ -0,0 +1,524 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
+import os
+import threading
+import weakref
+from collections import defaultdict, deque
+from dataclasses import dataclass
+from typing import Any
+
+import vllm.envs as envs
+from vllm.config import VllmConfig
+from vllm.distributed.device_communicators.shm_broadcast import (
+    Handle,
+    MessageQueue,
+)
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.utils.network_utils import (
+    get_distributed_init_method,
+    get_open_port,
+)
+from vllm.v1.executor.multiproc_executor import (
+    FutureWrapper,
+    MultiprocExecutor,
+    WorkerProc,
+)
+from vllm.v1.executor.ray_env_utils import get_driver_env_vars
+from vllm.v1.executor.ray_utils import (
+    WORKER_SPECIFIC_ENV_VARS,
+    build_actor_name,
+    get_bundles_for_indices,
+    get_bundles_sorted_by_node,
+    initialize_ray_cluster,
+    ray,
+)
+
+if ray is not None:
+    from ray.actor import ActorHandle
+    from ray.types import ObjectRef
+    from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+else:
+    ActorHandle = None
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class RayWorkerHandle:
+    """Handle for a Ray worker actor, compatible with MultiprocExecutor."""
+
+    actor: ActorHandle
+    """Ray worker actor"""
+
+    rank: int
+    """Rank of the worker"""
+
+    local_rank: int
+    """Local rank of the worker"""
+
+    node_id: str
+    """Node ID of the worker"""
+
+    bundle_id_idx: int = -1
+    """Placement group bundle index for the worker"""
+
+    run_ref: ObjectRef | None = None
+    """run() ObjectRef used as a sentinel for health monitoring"""
+
+    def run(self):
+        """Start the worker's busy loop"""
+        self.run_ref = self.actor.run.remote()
+
+
+class RayWorkerProc(WorkerProc):
+    """Worker process that runs inside a Ray actor.
+
+    Initialization is split into two phases:
+    1. __init__: lightweight setup, stores init args (no device/model init)
+    2. initialize_worker: called after GPU IDs are discovered, completes
+       the full WorkerProc initialization with the correct local_rank and
+       CUDA_VISIBLE_DEVICES.
+
+    CUDA_VISIBLE_DEVICES setup flow:
+
+    1. RayExecutorV2 enables RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES so Ray does
+       not set CUDA_VISIBLE_DEVICES on RayWorkerProc actors at creation time.
+    2. Each actor is scheduled with a placement group and bundle index; Ray resolves
+       the physical GPU ID for that bundle at placement time.
+    3. After placement, the worker discovers that GPU ID and sets
+       CUDA_VISIBLE_DEVICES before finishing WorkerProc initialization.
+
+    There is no workaround for this unset-and-reset sequence when the placement group
+    is externally managed: scheduling must complete before CUDA_VISIBLE_DEVICES can
+    match the GPU tied to the worker's bundle.
+
+    This sequence allows multiple vLLM instances to coexist on the same node:
+    each instance is unaware which physical devices others hold, and the
+    externally managed placement group avoids CUDA_VISIBLE_DEVICES conflicts
+    by binding workers to specific placement group bundles.
+    """
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        rank: int,
+        distributed_init_method: str,
+        input_shm_handle: Handle,
+        is_driver_worker: bool,
+        is_driver_node: bool = False,
+    ):
+        # Defer WorkerProc.__init__ until GPU IDs are known.
+        self._is_driver_node = is_driver_node
+        self._init_kwargs = dict(
+            vllm_config=vllm_config,
+            rank=rank,
+            distributed_init_method=distributed_init_method,
+            input_shm_handle=input_shm_handle,
+            shared_worker_lock=None,
+            is_driver_worker=is_driver_worker,
+        )
+
+    def get_node_and_gpu_ids(self) -> tuple[str, list[int]]:
+        """Return (node_id, gpu_ids) assigned to this actor by Ray."""
+        node_id = ray.get_runtime_context().get_node_id()
+        device_key = current_platform.ray_device_key
+        if not device_key:
+            raise RuntimeError(
+                f"current platform {current_platform.device_name} does not support ray."
+            )
+        gpu_ids = ray.get_runtime_context().get_accelerator_ids()[device_key]
+        return node_id, [int(x) for x in gpu_ids]
+
+    def initialize_worker(
+        self,
+        local_rank: int,
+        env_vars: dict[str, str],
+        driver_env_vars: dict[str, str] | None = None,
+    ) -> None:
+        """Complete initialization after GPU assignment is known.
+
+        *driver_env_vars* are applied with ``setdefault`` — they fill
+        in missing vars but never overwrite node-local values.
+        *env_vars* (e.g. CUDA_VISIBLE_DEVICES) always overwrite.
+        """
+        if driver_env_vars:
+            for key, value in driver_env_vars.items():
+                os.environ.setdefault(key, value)
+        for key, value in env_vars.items():
+            os.environ[key] = value
+
+        self.local_rank = local_rank
+        super().__init__(
+            local_rank=local_rank,
+            **self._init_kwargs,
+        )
+
+    def _init_message_queues(
+        self, input_shm_handle: Handle, vllm_config: VllmConfig
+    ) -> None:
+        """
+        Workers on the same node as the executor use shared memory for
+        both the broadcast (input) MQ and the response MQ. Workers on
+        different nodes use TCP (n_local_reader=0).
+        """
+        self.rpc_broadcast_mq = MessageQueue.create_from_handle(
+            input_shm_handle, self.worker.rank
+        )
+
+        n_local = 1 if self._is_driver_node else 0
+        # Use ray.util.get_node_ip_address() to get Ray's internal IP.
+        # get_ip() returns host's external IP which is typically not
+        # routable between nodes within the cluster.
+        self.worker_response_mq = MessageQueue(
+            n_reader=1,
+            n_local_reader=n_local,
+            connect_ip=ray.util.get_node_ip_address(),
+        )
+        self.peer_response_handles: list[dict] = []
+
+    def wait_for_init(self) -> dict:
+        """Respond to the driver's wait_until_ready() barrier."""
+        assert self.worker_response_mq is not None
+        return {
+            "status": self.READY_STR,
+            "handle": self.worker_response_mq.export_handle(),
+        }
+
+    def run(self) -> None:
+        """Main entry point called via actor.run.remote()."""
+        try:
+            assert self.rpc_broadcast_mq is not None
+            self.rpc_broadcast_mq.wait_until_ready()
+            assert self.worker_response_mq is not None
+            self.worker_response_mq.wait_until_ready()
+
+            self.worker_busy_loop()
+        except Exception as e:
+            logger.exception("RayWorkerProc failed: %s", e)
+            raise
+        finally:
+            self.shutdown()
+
+
+class RayExecutorV2(MultiprocExecutor):
+    """Ray-based distributed executor using MessageQueue communication.
+
+    Inherits from MultiprocExecutor to reuse the MQ-based control plane
+    and NCCL data plane. Workers are Ray actors.
+
+    Async scheduling is enabled, inherited from MultiprocExecutor.
+    This is cricitcal for RayExecutorV2 to be performant.
+    """
+
+    uses_ray: bool = True
+    supports_pp: bool = True
+
+    def __init__(self, vllm_config: VllmConfig):
+        super().__init__(vllm_config)
+
+    def _build_runtime_env(self) -> dict:
+        """Build a runtime_env dict for RayWorkerProc actors.
+
+        Driver env vars are applied separately via initialize_worker
+        with setdefault semantics.
+        """
+        base = self.parallel_config.ray_runtime_env
+        runtime_env: dict = copy.deepcopy(dict(base)) if base else {}
+
+        env_vars = runtime_env.setdefault("env_vars", {})
+        env_vars.update({v: "1" for v in current_platform.ray_noset_device_env_vars})
+        if self.parallel_config.ray_workers_use_nsight:
+            runtime_env["nsight"] = {
+                "t": "cuda,cudnn,cublas",
+                "o": "'worker_process_%p'",
+                "cuda-graph-trace": "node",
+            }
+        return runtime_env
+
+    @staticmethod
+    def _get_actor_resource_kwargs() -> dict[str, Any]:
+        """Return Ray actor resource kwargs for the current platform."""
+        num_devices = envs.VLLM_RAY_PER_WORKER_GPUS
+        device_key = current_platform.ray_device_key
+        if device_key == "GPU":
+            return {"num_gpus": num_devices}
+        return {"num_gpus": 0, "resources": {device_key: num_devices}}
+
+    def _init_executor(self) -> None:
+        """Initialize the RayExecutorV2 executor."""
+        self._finalizer = weakref.finalize(self, self.shutdown)
+        self.is_failed = False
+        self.failure_callback = None
+        self.shutting_down = False
+        self.shutdown_lock = threading.Lock()
+
+        # Step 1: Initialize Ray cluster and retrieve placement group
+        if ray is None:
+            raise ImportError("Using Ray backend requires installation of ray.")
+        initialize_ray_cluster(self.parallel_config, require_gpu_on_driver=False)
+        placement_group = self.parallel_config.placement_group
+
+        tp_size, pp_size, pcp_size = self._get_parallel_sizes()
+        assert self.world_size == tp_size * pp_size * pcp_size, (
+            f"world_size ({self.world_size}) must be equal to the "
+            f"tensor_parallel_size ({tp_size}) x pipeline"
+            f"_parallel_size ({pp_size}) x prefill_context"
+            f"_parallel_size ({pcp_size}). "
+        )
+
+        # Step 2: Build bundle assignments for worker rank placement
+        # while respecting VLLM_RAY_BUNDLE_INDICES.
+        if envs.VLLM_RAY_BUNDLE_INDICES:
+            bundle_to_node_id = get_bundles_for_indices(
+                placement_group,
+                list(map(int, envs.VLLM_RAY_BUNDLE_INDICES.split(","))),
+                self.world_size,
+            )
+        else:
+            bundle_to_node_id = get_bundles_sorted_by_node(placement_group)
+        driver_node = ray.get_runtime_context().get_node_id()
+
+        bundle_assignments: list[dict[str, Any]] = []
+        for rank, (bundle_id_idx, node_id, node_ip) in enumerate(bundle_to_node_id):
+            bundle_assignments.append(
+                {
+                    "rank": rank,
+                    "bundle_id_idx": bundle_id_idx,
+                    "node_id": node_id,
+                    "node_ip": node_ip,
+                }
+            )
+
+        # Step 3: Resolve the IP for torch.distributed TCPStore.
+        # The TCPStore server runs on rank 0's node, so all workers
+        # must be able to reach this address.
+        dist_ip = bundle_assignments[0]["node_ip"]
+        distributed_init_method = get_distributed_init_method(dist_ip, get_open_port())
+
+        # Step 4: Create broadcast MessageQueue.
+        # Workers on the driver node use shared memory; the rest use TCP.
+        max_chunk_bytes = envs.VLLM_MQ_MAX_CHUNK_BYTES_MB * 1024 * 1024
+        n_local = sum(1 for a in bundle_assignments if a["node_id"] == driver_node)
+        self.rpc_broadcast_mq = MessageQueue(
+            self.world_size,
+            n_local,
+            max_chunk_bytes=max_chunk_bytes,
+            connect_ip=ray.util.get_node_ip_address(),
+        )
+        scheduler_output_handle = self.rpc_broadcast_mq.export_handle()
+
+        # Step 5: Spawn RayWorkerProc actors into PG bundles (deferred init).
+        # Workers are created lightweight here; full initialization happens
+        # in Step 7 after GPU IDs are discovered.
+        self.ray_worker_handles: list[RayWorkerHandle] = []
+        instance_id = self.vllm_config.instance_id
+
+        # Collect driver env vars and apply but don't overwrite node-local values.
+        self.driver_env_vars = get_driver_env_vars(
+            worker_specific_vars=WORKER_SPECIFIC_ENV_VARS,
+        )
+
+        runtime_env = self._build_runtime_env()
+        resource_kwargs = self._get_actor_resource_kwargs()
+
+        for bundle_idx in range(self.world_size):
+            bundle = bundle_assignments[bundle_idx]
+            is_driver_worker = self._is_driver_worker(bundle["rank"])
+            is_driver_node = bundle["node_id"] == driver_node
+
+            scheduling_strategy = PlacementGroupSchedulingStrategy(
+                placement_group=placement_group,
+                placement_group_bundle_index=bundle["bundle_id_idx"],
+            )
+
+            actor_name = build_actor_name(
+                instance_id, bundle["rank"], tp_size, pp_size, pcp_size
+            )
+
+            actor = (
+                ray.remote(RayWorkerProc)
+                .options(
+                    name=actor_name,
+                    num_cpus=0,
+                    **resource_kwargs,
+                    scheduling_strategy=scheduling_strategy,
+                    runtime_env=runtime_env,
+                )
+                .remote(
+                    vllm_config=self.vllm_config,
+                    rank=bundle["rank"],
+                    distributed_init_method=distributed_init_method,
+                    input_shm_handle=scheduler_output_handle,
+                    is_driver_worker=is_driver_worker,
+                    is_driver_node=is_driver_node,
+                )
+            )
+
+            handle = RayWorkerHandle(
+                actor=actor,
+                rank=bundle["rank"],
+                local_rank=-1,  # Set in Step 7 after GPU ID discovery
+                node_id=bundle["node_id"],
+                bundle_id_idx=bundle["bundle_id_idx"],
+            )
+            self.ray_worker_handles.append(handle)
+
+        # Step 6: Discover GPU IDs assigned to each worker via Ray runtime context.
+        worker_node_and_gpu_ids = ray.get(
+            [h.actor.get_node_and_gpu_ids.remote() for h in self.ray_worker_handles]
+        )
+
+        node_workers: dict[str, list[int]] = defaultdict(list)
+        node_gpus: dict[str, list[int]] = defaultdict(list)
+        for i, (node_id, gpu_ids) in enumerate(worker_node_and_gpu_ids):
+            node_workers[node_id].append(i)
+            node_gpus[node_id].extend(gpu_ids)
+        for node_id, gpu_ids in node_gpus.items():
+            node_gpus[node_id] = sorted(gpu_ids)
+
+        # Step 7: Initialize workers with correct local_rank and
+        # CUDA_VISIBLE_DEVICES. Each worker sees all GPUs assigned to
+        # this executor on its node; local_rank indexes into that set.
+        init_worker_refs = []
+        for i, (node_id, _) in enumerate(worker_node_and_gpu_ids):
+            local_rank = node_workers[node_id].index(i)
+            worker_env_vars = {
+                current_platform.device_control_env_var: ",".join(
+                    map(str, node_gpus[node_id])
+                ),
+            }
+            self.ray_worker_handles[i].local_rank = local_rank
+            init_worker_refs.append(
+                self.ray_worker_handles[i].actor.initialize_worker.remote(
+                    local_rank, worker_env_vars, self.driver_env_vars
+                )
+            )
+        ray.get(init_worker_refs)
+
+        # Step 8: Collect response MQ handles
+        init_results = ray.get(
+            [h.actor.wait_for_init.remote() for h in self.ray_worker_handles]
+        )
+
+        self.response_mqs: list[MessageQueue] = []
+        for i, result in enumerate(init_results):
+            if result["status"] != RayWorkerProc.READY_STR:
+                raise RuntimeError(f"Worker {i} failed to initialize: {result}")
+            self.response_mqs.append(
+                MessageQueue.create_from_handle(result["handle"], 0)
+            )
+
+        # Step 9: Start run() before wait_until_ready() to avoid
+        # deadlock — workers send subscriptions inside run().
+        for handle in self.ray_worker_handles:
+            handle.run()
+
+        # Step 10: wait_until_ready() barrier
+        self.rpc_broadcast_mq.wait_until_ready()
+        for response_mq in self.response_mqs:
+            response_mq.wait_until_ready()
+
+        self.futures_queue = deque[FutureWrapper]()
+        self._post_init_executor()
+
+        self.start_worker_monitor()
+        self.output_rank = self._get_output_rank()
+
+    def start_worker_monitor(self, inline=False) -> None:
+        """Monitor worker liveness via ray.wait() on run() ObjectRefs."""
+        run_refs = [h.run_ref for h in self.ray_worker_handles if h.run_ref is not None]
+        if not run_refs:
+            raise RuntimeError("Ray workers have not started successfully.")
+
+        self_ref = weakref.ref(self)
+        ref_to_rank = {
+            h.run_ref: h.rank for h in self.ray_worker_handles if h.run_ref is not None
+        }
+
+        def _should_stop() -> bool:
+            executor = self_ref()
+            return not executor or executor.shutting_down
+
+        def monitor_workers():
+            # Poll with a timeout rather than blocking on ray.wait()
+            # because a blocking call would segfault if Ray is torn down
+            # while this thread is inside it.
+            while not _should_stop() and ray.is_initialized():
+                try:
+                    done, _ = ray.wait(run_refs, num_returns=1, timeout=5.0)
+                except Exception:
+                    logger.exception(
+                        "RayWorkerMonitor: unexpected error, exiting monitor thread"
+                    )
+                    return
+                if not done or _should_stop():
+                    continue
+
+                dead_ranks = [ref_to_rank[r] for r in done]
+                executor = self_ref()
+                if not executor:
+                    return
+                executor.is_failed = True
+                logger.error(
+                    "RayWorkerProc rank=%s died unexpectedly, shutting down executor.",
+                    dead_ranks,
+                )
+                executor.shutdown()
+                if executor.failure_callback is not None:
+                    callback = executor.failure_callback
+                    executor.failure_callback = None
+                    callback()
+                return
+
+        t = threading.Thread(
+            target=monitor_workers, daemon=True, name="RayWorkerMonitor"
+        )
+        t.start()
+        self._monitor_thread = t
+
+    def _join_monitor_thread(self) -> None:
+        """Wait for the monitor thread to exit.
+
+        Must be called before tearing down Ray resources — the monitor
+        may be inside ray.wait() which would segfault if Ray is shut
+        down underneath it. When the monitor itself calls shutdown()
+        on worker death, we skip the join because the thread is about
+        to return anyway.
+        """
+        monitor = getattr(self, "_monitor_thread", None)
+        if (
+            monitor is not None
+            and monitor.is_alive()
+            and threading.current_thread() is not monitor
+        ):
+            monitor.join(timeout=10)
+
+    def shutdown(self) -> None:
+        """Properly shut down the executor and its workers."""
+        lock = getattr(self, "shutdown_lock", None)
+        if lock is None:
+            return
+
+        with lock:
+            if getattr(self, "shutting_down", False):
+                return
+            self.shutting_down = True
+
+        self._join_monitor_thread()
+
+        for handle in getattr(self, "ray_worker_handles", []):
+            try:
+                ray.kill(handle.actor)
+                logger.debug("Killed actor rank=%d", handle.rank)
+            except Exception:
+                logger.exception("Failed to kill actor rank=%d", handle.rank)
+
+        if rpc_broadcast_mq := getattr(self, "rpc_broadcast_mq", None):
+            rpc_broadcast_mq.shutdown()
+            self.rpc_broadcast_mq = None
+
+        for mq in getattr(self, "response_mqs", []):
+            mq.shutdown()
+        self.response_mqs = []
diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py
index dd82cfb99aac..9083b9195912 100644
--- a/vllm/v1/executor/ray_utils.py
+++ b/vllm/v1/executor/ray_utils.py
@@ -7,6 +7,8 @@
 from concurrent.futures import Future
 from typing import TYPE_CHECKING, Union
 
+import numpy as np
+
 import vllm.platforms
 from vllm.config import ParallelConfig
 from vllm.distributed import get_pp_group
@@ -26,6 +28,18 @@
 logger = init_logger(__name__)
 PG_WAIT_TIMEOUT = 1800
 
+# Env vars that are worker-specific and must NOT be copied from the
+# driver to Ray workers — they are set per-worker after GPU discovery.
+WORKER_SPECIFIC_ENV_VARS: set[str] = {
+    "VLLM_HOST_IP",
+    "VLLM_HOST_PORT",
+    "VLLM_NIXL_SIDE_CHANNEL_HOST",
+    "LOCAL_RANK",
+    "CUDA_VISIBLE_DEVICES",
+    "HIP_VISIBLE_DEVICES",
+    "ROCR_VISIBLE_DEVICES",
+}
+
 try:
     import ray
     from ray.util import placement_group_table
@@ -51,6 +65,8 @@ def __init__(self, *args, **kwargs) -> None:
             # that thread.
             self.compiled_dag_cuda_device_set = False
 
+        rpc_rank: int
+
         def adjust_rank(self, rank_mapping: dict[int, int]) -> None:
             """
             Adjust the rpc_rank based on the given mapping.
@@ -177,6 +193,48 @@ def _is_last_rank(self) -> bool:
     RayWorkerWrapper = None  # type: ignore
 
 
+def detach_zero_copy_from_model_runner_output(output: "ModelRunnerOutput") -> None:
+    """Detach Ray SHM-channel zero-copy buffers from a ModelRunnerOutput in-place.
+
+    Ray compiled DAG SHM channels may return zero-copy objects (e.g. `np.ndarray`)
+    backed by Ray's shared-memory object store. Ray's channel docs explicitly
+    warn that subsequent reads may block if such an object is still in scope.
+
+    vLLM can return numpy-backed logprobs in `ModelRunnerOutput.logprobs`. If
+    those arrays are backed by Ray SHM (commonly read-only), retaining them in
+    scope across scheduler iterations can stall the channel and eventually hit
+    `RAY_CGRAPH_get_timeout`.
+
+    Copy read-only numpy arrays so the returned output no longer retains
+    references to Ray's shared-memory buffers.
+
+    We intentionally do not touch `prompt_logprobs_dict`: those entries are
+    `LogprobsTensors` backed by PyTorch-owned CPU tensors (`to_cpu_nonblocking`
+    or `empty_cpu`), not NumPy views decoded from Ray channels.
+    """
+    if output.logprobs is None:
+        return
+
+    token_ids, logprobs, ranks, cu_num_generated_tokens = output.logprobs
+
+    def _copy_if_readonly(arr):
+        if isinstance(arr, np.ndarray) and not arr.flags.writeable:
+            return arr.copy()
+        return arr
+
+    # `cu_num_generated_tokens` is already a plain Python list (or None), so it
+    # never aliases Ray SHM buffers and can be reused as-is.
+    token_ids_c = _copy_if_readonly(token_ids)
+    logprobs_c = _copy_if_readonly(logprobs)
+    ranks_c = _copy_if_readonly(ranks)
+    if token_ids_c is token_ids and logprobs_c is logprobs and ranks_c is ranks:
+        return
+
+    output.logprobs = type(output.logprobs)(
+        token_ids_c, logprobs_c, ranks_c, cu_num_generated_tokens
+    )
+
+
 class FutureWrapper(Future):
     """A wrapper around Ray output reference to meet the interface
     of .execute_model(): The top level (core busy loop) expects .result() api
@@ -194,8 +252,11 @@ def __init__(self, ref_or_refs, aggregator: KVOutputAggregator | None = None):
     def result(self, timeout=None):
         outputs = ray.get(self.ref_or_refs, timeout=timeout)
         if self.aggregator is None:
+            detach_zero_copy_from_model_runner_output(outputs)
             return outputs
 
+        for output in outputs:
+            detach_zero_copy_from_model_runner_output(output)
         return self.aggregator.aggregate(outputs, output_rank=0)
 
 
@@ -214,13 +275,17 @@ def assert_ray_available():
 
 
 def _verify_bundles(
-    placement_group: "PlacementGroup", parallel_config: ParallelConfig, device_str: str
+    placement_group: "PlacementGroup",
+    parallel_config: ParallelConfig,
+    device_str: str,
+    require_gpu_on_driver: bool = True,
 ):
     """Verify a given placement group has bundles located in the right place.
 
     There are 2 rules.
     - Warn if all tensor parallel workers cannot fit in a single node.
-    - Fail if driver node is not included in a placement group.
+    - Fail if driver node is not included in a placement group
+      (only when require_gpu_on_driver is True).
     """
     assert ray.is_initialized(), (
         "Ray is not initialized although distributed-executor-backend is ray."
@@ -237,7 +302,7 @@ def _verify_bundles(
         node_id_to_bundle[node_id].append(bundles[bundle_idx])
     driver_node_id = ray.get_runtime_context().get_node_id()
 
-    if driver_node_id not in node_id_to_bundle:
+    if require_gpu_on_driver and driver_node_id not in node_id_to_bundle:
         raise RuntimeError(
             f"driver node id {driver_node_id} is not included in a placement "
             f"group {placement_group.id}. Node id -> bundles "
@@ -266,6 +331,115 @@ def _verify_bundles(
             )
 
 
+def build_actor_name(
+    instance_id: str,
+    rank: int,
+    tp_size: int,
+    pp_size: int,
+    pcp_size: int,
+) -> str:
+    """Build a descriptive Ray actor name for dashboard visibility."""
+    name = f"vllm_Worker_{instance_id}"
+    if tp_size > 1:
+        name += f"_TP{rank % tp_size}"
+    if pp_size > 1:
+        name += f"_PP{(rank // tp_size) % pp_size}"
+    if pcp_size > 1:
+        name += f"_PCP{rank // (tp_size * pp_size)}"
+    return name
+
+
+def get_bundles_for_indices(
+    placement_group: "PlacementGroup",
+    bundle_indices: list[int],
+    world_size: int,
+) -> list[tuple[int, str, str]]:
+    """
+    Return GPU bundle indices paired with node IDs and node IPs for
+    explicit bundle indices specified via VLLM_RAY_BUNDLE_INDICES.
+    """
+    assert len(bundle_indices) == world_size, (
+        "VLLM_RAY_BUNDLE_INDICES must have the same size"
+        f" as the world size, but got {bundle_indices=} "
+        f"and {world_size=}"
+    )
+    assert len(set(bundle_indices)) == len(bundle_indices), (
+        "VLLM_RAY_BUNDLE_INDICES cannot have duplicate values,"
+        f" but got {bundle_indices=}"
+    )
+
+    pg_data = placement_group_table(placement_group)
+    pg_bundle_to_node = pg_data["bundles_to_node_id"]
+    node_id_to_ip = {
+        n["NodeID"]: n["NodeManagerAddress"] for n in ray.nodes() if n["Alive"]
+    }
+    return [
+        (bid, pg_bundle_to_node[bid], node_id_to_ip[pg_bundle_to_node[bid]])
+        for bid in bundle_indices
+    ]
+
+
+def get_bundles_sorted_by_node(
+    placement_group: "PlacementGroup",
+) -> list[tuple[int, str, str]]:
+    """
+    Return GPU bundle indices paired with node IDs and node IPs,
+    sorted driver-first.
+
+    This utility has to be invoked from the driver node.
+
+    Example: 3-node cluster, driver on node-A, PG bundles spread
+    across nodes:
+
+      Input: [
+          (0, node-C),
+          (1, node-A),
+          (2, node-B),
+          (3, node-C),
+          (4, node-A),
+          (5, node-B),
+      ]
+      Output: [
+          (1, node-A),
+          (4, node-A),
+          (2, node-B),
+          (5, node-B),
+          (0, node-C),
+          (3, node-C),
+      ]
+    """
+    pg_data = placement_group_table(placement_group)
+    bundle_to_node = pg_data["bundles_to_node_id"]
+
+    ray_device_key = current_platform.ray_device_key
+    if not ray_device_key:
+        raise ValueError(
+            f"current platform {current_platform.device_name} does not support ray."
+        )
+
+    node_id_to_ip = {
+        n["NodeID"]: n["NodeManagerAddress"] for n in ray.nodes() if n["Alive"]
+    }
+
+    bundle_specs = placement_group.bundle_specs
+    assert bundle_specs is not None
+    bundle_to_node_id: list[tuple[int, str, str]] = []
+    for bundle_idx, bundle in enumerate(bundle_specs):
+        if bundle.get(ray_device_key):
+            node_id = bundle_to_node.get(bundle_idx)
+            bundle_to_node_id.append((bundle_idx, node_id, node_id_to_ip[node_id]))
+
+    driver_node = ray.get_runtime_context().get_node_id()
+
+    def _sort_key(item):
+        _, node_id, _ = item
+        return (0 if node_id == driver_node else 1, node_id)
+
+    bundle_to_node_id.sort(key=_sort_key)
+
+    return bundle_to_node_id
+
+
 def _wait_until_pg_ready(current_placement_group: "PlacementGroup"):
     """Wait until a placement group is ready.
 
@@ -352,6 +526,7 @@ def _wait_until_pg_removed(current_placement_group: "PlacementGroup"):
 def initialize_ray_cluster(
     parallel_config: ParallelConfig,
     ray_address: str | None = None,
+    require_gpu_on_driver: bool = True,
 ):
     """Initialize the distributed cluster with Ray.
 
@@ -363,15 +538,21 @@ def initialize_ray_cluster(
         parallel_config: The configurations for parallel execution.
         ray_address: The address of the Ray cluster. If None, uses
             the default Ray cluster address.
+        require_gpu_on_driver: If True (default), require at least one GPU
+            on the current (driver) node and pin the first PG bundle to it.
+            Set to False for executors like RayExecutorV2 where all GPU work
+            is delegated to remote Ray actors.
     """
     assert_ray_available()
     from vllm.platforms import current_platform
 
+    # Disable Ray usage stats collection
+    if os.environ.get("RAY_USAGE_STATS_ENABLED", "0") != "1":
+        os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
+
     # Prevalidate GPU requirements before Ray processing
     if current_platform.is_cuda() and parallel_config.world_size > 1:
-        from vllm.utils.torch_utils import cuda_device_count_stateless
-
-        available_gpus = cuda_device_count_stateless()
+        available_gpus = current_platform.device_count()
         if parallel_config.world_size > available_gpus:
             logger.warning(
                 "Tensor parallel size (%d) exceeds available GPUs (%d). "
@@ -461,16 +642,20 @@ def initialize_ray_cluster(
         current_ip = get_ip()
         current_node_id = ray.get_runtime_context().get_node_id()
         current_node_resource = available_resources_per_node()[current_node_id]
-        if current_node_resource.get(device_str, 0) < 1:
-            raise ValueError(
-                f"Current node has no {device_str} available. "
-                f"{current_node_resource=}. vLLM engine cannot start without "
-                f"{device_str}. Make sure you have at least 1 {device_str} "
-                f"available in a node {current_node_id=} {current_ip=}."
-            )
-        # This way, at least bundle is required to be created in a current
-        # node.
-        placement_group_specs[0][f"node:{current_ip}"] = 0.001
+        # TODO (jeffreywang): require_gpu_on_driver should be always False
+        # after deprecating RayDistributedExecutor.
+        if require_gpu_on_driver:
+            if current_node_resource.get(device_str, 0) < 1:
+                raise ValueError(
+                    f"Current node has no {device_str} available. "
+                    f"{current_node_resource=}. vLLM engine cannot start "
+                    f"without {device_str}. Make sure you have at least 1 "
+                    f"{device_str} available in a node "
+                    f"{current_node_id=} {current_ip=}."
+                )
+            # This way, at least bundle is required to be created in a
+            # current node.
+            placement_group_specs[0][f"node:{current_ip}"] = 0.001
 
         # By default, Ray packs resources as much as possible.
         current_placement_group = ray.util.placement_group(
@@ -479,7 +664,9 @@ def initialize_ray_cluster(
         _wait_until_pg_ready(current_placement_group)
 
     assert current_placement_group is not None
-    _verify_bundles(current_placement_group, parallel_config, device_str)
+    _verify_bundles(
+        current_placement_group, parallel_config, device_str, require_gpu_on_driver
+    )
     # Set the placement group in the parallel config
     parallel_config.placement_group = current_placement_group
 
diff --git a/vllm/v1/executor/uniproc_executor.py b/vllm/v1/executor/uniproc_executor.py
index b616c3b7b8ad..92e668406f98 100644
--- a/vllm/v1/executor/uniproc_executor.py
+++ b/vllm/v1/executor/uniproc_executor.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import os
 from collections.abc import Callable
-from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import Future
 from functools import cached_property
 from multiprocessing import Lock
 from typing import Any
@@ -23,6 +23,25 @@
 logger = init_logger(__name__)
 
 
+class AsyncOutputFuture(Future):
+    def __init__(self, async_output: AsyncModelRunnerOutput, single_value: bool):
+        self.async_output = async_output
+        self.single_value = single_value
+        super().__init__()
+
+    def result(self, timeout=None):
+        if timeout is not None:
+            raise RuntimeError("timeout not implemented")
+
+        if not super().done():
+            try:
+                output = self.async_output.get_output()
+                self.set_result(output if self.single_value else [output])
+            except Exception as e:
+                self.set_exception(e)
+        return super().result()
+
+
 class UniProcExecutor(Executor):
     def _init_executor(self) -> None:
         """Initialize the worker and load the model."""
@@ -37,12 +56,6 @@ def _init_executor(self) -> None:
             shared_worker_lock=Lock(),
         )
 
-        self.async_output_thread: ThreadPoolExecutor | None = None
-        if self.max_concurrent_batches > 1:
-            self.async_output_thread = ThreadPoolExecutor(
-                max_workers=1, thread_name_prefix="WorkerAsyncOutput"
-            )
-
         self.driver_worker.init_worker(all_kwargs=[kwargs])
         self.driver_worker.init_device()
 
@@ -83,15 +96,7 @@ def collective_rpc(  # type: ignore[override]
         try:
             result = run_method(self.driver_worker, method, args, kwargs)
             if isinstance(result, AsyncModelRunnerOutput):
-                if (async_thread := self.async_output_thread) is not None:
-                    if single_value:
-                        return async_thread.submit(result.get_output)
-
-                    def get_output_list() -> list[Any]:
-                        return [result.get_output()]
-
-                    return async_thread.submit(get_output_list)
-                result = result.get_output()
+                return AsyncOutputFuture(result, single_value)
             future = Future[Any]()
             future.set_result(result if single_value else [result])
         except Exception as e:
@@ -147,7 +152,7 @@ class ExecutorWithExternalLauncher(UniProcExecutor):
     offline inference with tensor parallelism.
 
     see https://github.com/vllm-project/vllm/issues/11400 for
-    the motivation, and examples/offline_inference/torchrun_example.py
+    the motivation, and examples/features/torchrun/torchrun_example_offline.py
     for the usage example.
 
     The key idea: although it is tensor-parallel inference, we only
diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py
index 48ecf6b9dc85..31ee89bc72aa 100644
--- a/vllm/v1/kv_cache_interface.py
+++ b/vllm/v1/kv_cache_interface.py
@@ -1,21 +1,96 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 import copy
+from collections import Counter
 from dataclasses import dataclass, fields, replace
+from enum import Enum, IntEnum
 from math import prod
+from typing import TYPE_CHECKING
 
 import torch
 from typing_extensions import Self
 
-from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.utils.math_utils import cdiv
-from vllm.utils.torch_utils import get_dtype_size
+from vllm.utils.math_utils import cdiv, round_up
+from vllm.utils.torch_utils import get_dtype_size, nvfp4_kv_cache_full_dim
+from vllm.v1.attention.backends.registry import MambaAttentionBackendEnum
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
 
 logger = init_logger(__name__)
 
 
+# ---------------------------------------------------------------------------
+# KV cache quantization mode
+# ---------------------------------------------------------------------------
+
+
+class KVQuantMode(IntEnum):
+    """KV cache quantization mode.
+
+    Used by attention backends and kernels to dispatch quantization logic
+    without string matching on ``kv_cache_dtype``.
+    """
+
+    NONE = 0
+    FP8_PER_TENSOR = 1  # per-tensor scales (current fp8 path)
+    INT8_PER_TOKEN_HEAD = 2  # per-token-head dynamic scales for int8
+    FP8_PER_TOKEN_HEAD = 3  # per-token-head dynamic scales for fp8
+    NVFP4 = 4  # packed fp4 data + fp8 block scales
+
+    @property
+    def is_per_token_head(self) -> bool:
+        """True for any per-token-head quantization mode."""
+        return self in (
+            KVQuantMode.INT8_PER_TOKEN_HEAD,
+            KVQuantMode.FP8_PER_TOKEN_HEAD,
+        )
+
+    @property
+    def is_nvfp4(self) -> bool:
+        """True for NVFP4 packed quantization mode."""
+        return self == KVQuantMode.NVFP4
+
+
+def get_kv_quant_mode(kv_cache_dtype: str) -> KVQuantMode:
+    """Map a ``kv_cache_dtype`` string to a :class:`KVQuantMode`."""
+    if kv_cache_dtype == "int8_per_token_head":
+        return KVQuantMode.INT8_PER_TOKEN_HEAD
+    if kv_cache_dtype == "fp8_per_token_head":
+        return KVQuantMode.FP8_PER_TOKEN_HEAD
+    if kv_cache_dtype == "nvfp4":
+        return KVQuantMode.NVFP4
+    if isinstance(kv_cache_dtype, str) and kv_cache_dtype.startswith("fp8"):
+        return KVQuantMode.FP8_PER_TENSOR
+    return KVQuantMode.NONE
+
+
+def is_quantized_kv_cache(kv_cache_dtype: str) -> bool:
+    return get_kv_quant_mode(kv_cache_dtype) != KVQuantMode.NONE
+
+
+def kv_cache_uses_per_token_head_scales(kv_cache_dtype: str) -> bool:
+    """Return True if *kv_cache_dtype* needs per-token-head scales."""
+    return get_kv_quant_mode(kv_cache_dtype).is_per_token_head
+
+
+class KVCacheSpecKind(str, Enum):
+    FULL_ATTENTION = "full_attention"
+    MLA_ATTENTION = "mla_attention"
+    SLIDING_WINDOW = "sliding_window"
+    SLIDING_WINDOW_MLA = "sliding_window_mla"
+    MAMBA = "mamba"
+    CHUNKED_LOCAL_ATTENTION = "chunked_local_attention"
+    SINK_FULL_ATTENTION = "sink_full_attention"
+    ENCODER_ONLY_ATTENTION = "encoder_only_attention"
+    CROSS_ATTENTION = "cross_attention"
+    UNKNOWN = "unknown"
+
+
 @dataclass(frozen=True)
 class KVCacheSpec:
     """
@@ -35,6 +110,10 @@ def page_size_bytes(self) -> int:
         """
         raise NotImplementedError
 
+    @property
+    def storage_block_size(self) -> int:
+        return self.block_size
+
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         """
         The maximum possible memory usage of this KV cache in bytes.
@@ -66,11 +145,19 @@ class AttentionSpec(KVCacheSpec):
     num_kv_heads: int
     head_size: int
     dtype: torch.dtype
+    kv_quant_mode: KVQuantMode = KVQuantMode.NONE
     page_size_padded: int | None = None
 
     @property
     def page_size_bytes(self) -> int:
         real_page_size = self.real_page_size_bytes
+        # Per-token-head scales are stored in separate tensors managed
+        # by the attention backend, but the memory is carved from the
+        # raw KV cache allocation so it must be budgeted here.
+        if self.kv_quant_mode.is_per_token_head:
+            real_page_size += (
+                2 * self.block_size * self.num_kv_heads * get_dtype_size(torch.float32)
+            )
         if self.page_size_padded is not None:
             assert self.page_size_padded >= real_page_size
             return self.page_size_padded
@@ -78,6 +165,16 @@ def page_size_bytes(self) -> int:
 
     @property
     def real_page_size_bytes(self) -> int:
+        if self.kv_quant_mode.is_nvfp4:
+            # Packed layout: fp4 data + fp8 block scales per head.
+            full_dim = nvfp4_kv_cache_full_dim(self.head_size)
+            return (
+                2
+                * self.block_size
+                * self.num_kv_heads
+                * full_dim
+                * get_dtype_size(self.dtype)
+            )
         return (
             2
             * self.block_size
@@ -159,6 +256,7 @@ def merge(cls, specs: list[Self]) -> Self:
             head_size=specs[0].head_size,
             head_size_v=specs[0].head_size_v,
             dtype=specs[0].dtype,
+            kv_quant_mode=specs[0].kv_quant_mode,
             page_size_padded=specs[0].page_size_padded,
             sliding_window=cls.merge_window_sizes(sliding_window),
             attention_chunk_size=cls.merge_window_sizes(attention_chunk_size),
@@ -179,6 +277,19 @@ def merge(cls, specs: list[Self]) -> Self:
 
     @property
     def real_page_size_bytes(self) -> int:
+        if self.kv_quant_mode.is_nvfp4:
+            # Packed layout per head: fp4 data + fp8 block scales.
+            # fp4 data: head_size//2 bytes (2 fp4 values per byte)
+            # fp8 block scale: head_size//16 bytes (1 scale per 16 elements)
+            last_dim = nvfp4_kv_cache_full_dim(
+                self.head_size
+            ) + nvfp4_kv_cache_full_dim(self.head_size_v)
+            return (
+                self.block_size
+                * self.num_kv_heads
+                * last_dim
+                * get_dtype_size(self.dtype)
+            )
         return (
             self.block_size
             * self.num_kv_heads
@@ -187,19 +298,70 @@ def real_page_size_bytes(self) -> int:
         )
 
 
+def _apply_alignment_padding(spec: MLAAttentionSpec | SlidingWindowMLASpec):
+    if spec.alignment is None:
+        return
+    actual_page_size = spec.real_page_size_bytes
+    padded_page_size = round_up(actual_page_size, spec.alignment)
+    if padded_page_size != actual_page_size:
+        object.__setattr__(spec, "page_size_padded", padded_page_size)
+
+
+@dataclass(frozen=True, kw_only=True)
+class TQFullAttentionSpec(FullAttentionSpec):
+    """FullAttentionSpec with TQ-aware page size.
+
+    Python equivalent of the C++ TQ4FullAttentionSpec. Overrides
+    real_page_size_bytes to use TQ slot bytes instead of the raw
+    head_size * dtype formula.
+    """
+
+    tq_slot_size: int = 0
+
+    @property
+    def real_page_size_bytes(self) -> int:
+        if self.tq_slot_size > 0:
+            return self.block_size * self.num_kv_heads * self.tq_slot_size
+        return super().real_page_size_bytes
+
+    @classmethod
+    def merge(cls, specs: list[Self]) -> Self:
+        merged = super().merge(specs)
+        assert all(s.tq_slot_size == specs[0].tq_slot_size for s in specs), (
+            "All TQ layers in the same KV cache group must use the same tq_slot_size."
+        )
+        return replace(merged, tq_slot_size=specs[0].tq_slot_size)
+
+
 @dataclass(frozen=True, kw_only=True)
 class MLAAttentionSpec(FullAttentionSpec):
     # TODO(Lucas/Chen): less hacky way to do this
     cache_dtype_str: str | None = None
+    # DeepseekV4 only fields. Non-DeepseekV4 MLA models leave these at defaults.
+    alignment: int | None = None  # Default to None for no padding.
+    compress_ratio: int = 1  # Default to 1 for no compression.
+    model_version: str | None = None
+
+    def __post_init__(self):
+        super().__post_init__()
+        _apply_alignment_padding(self)
+
+    @property
+    def storage_block_size(self) -> int:
+        return self.block_size // self.compress_ratio
 
     @property
     def real_page_size_bytes(self) -> int:
         if self.cache_dtype_str == "fp8_ds_mla":
-            # See `vllm/v1/attention/backends/mla/flashmla_sparse.py`
-            #  for details.
+            if self.model_version == "deepseek_v4":
+                # DeepseekV4: 448B NoPE + 128B RoPE + 8B fp8 scale = 584B per token.
+                # head_size stays semantic (512); bytes are determined here.
+                return self.storage_block_size * 584
+            # V3.2 main MLA: 656-byte custom layout (kv_lora_rank=512 +
+            # qk_rope_head_dim=64, head_size=576). See flashmla_sparse.py.
             return self.block_size * 656
         return (
-            self.block_size
+            self.storage_block_size
             * self.num_kv_heads
             * self.head_size
             * get_dtype_size(self.dtype)
@@ -211,42 +373,114 @@ def merge(cls, specs: list[Self]) -> Self:
             "All attention layers in the same KV cache group must be MLAAttentionSpec."
         )
         cache_dtype_str_set = set(spec.cache_dtype_str for spec in specs)
-        assert len(cache_dtype_str_set) == 1, (
+        compress_ratio_set = set(spec.compress_ratio for spec in specs)
+        model_version_set = set(spec.model_version for spec in specs)
+        assert (
+            len(cache_dtype_str_set) == 1
+            and len(compress_ratio_set) == 1
+            and len(model_version_set) == 1
+        ), (
             "All attention layers in the same KV cache group must use the same "
-            "quantization method."
+            "quantization method, compress ratio, and model version."
         )
         return cls(
             block_size=specs[0].block_size,
             num_kv_heads=specs[0].num_kv_heads,
             head_size=specs[0].head_size,
             dtype=specs[0].dtype,
+            kv_quant_mode=specs[0].kv_quant_mode,
             page_size_padded=specs[0].page_size_padded,
             cache_dtype_str=cache_dtype_str_set.pop(),
+            compress_ratio=compress_ratio_set.pop(),
+            model_version=model_version_set.pop(),
         )
 
 
+@dataclass(frozen=True, kw_only=True)
+class HiddenStateCacheSpec(MLAAttentionSpec):
+    """Marker for hidden-state cache layers used by extract_hidden_states."""
+
+    pass
+
+
 @dataclass(frozen=True, kw_only=True)
 class ChunkedLocalAttentionSpec(AttentionSpec):
     attention_chunk_size: int
 
-    def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
-        max_model_len = vllm_config.model_config.max_model_len
-        max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+    def max_admission_blocks_per_request(
+        self, max_num_batched_tokens: int, max_model_len: int
+    ) -> int:
+        """Per-request admission cap, in blocks.
 
-        # During chunked prefill, we allocate KV cache for at most
-        # `self.attention_chunk_size` computed tokens plus the newly scheduled
-        # tokens. And we won't allocate KV cache for more than `max_model_len`
-        # tokens.
+        Single source of truth for both startup pool sizing
+        (`max_memory_usage_bytes`) and the runtime admission gate, so requests
+        admitted by startup can also be admitted at runtime.
+        """
+        # During chunked prefill, we hold KV for at most one chunk window.
         num_tokens = min(
             self.attention_chunk_size + max_num_batched_tokens, max_model_len
         )
+        return cdiv(num_tokens, self.block_size)
 
-        return cdiv(num_tokens, self.block_size) * self.page_size_bytes
+    def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
+        max_model_len = vllm_config.model_config.max_model_len
+        max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        max_blocks = self.max_admission_blocks_per_request(
+            max_num_batched_tokens=max_num_batched_tokens, max_model_len=max_model_len
+        )
+        return max_blocks * self.page_size_bytes
 
 
 @dataclass(frozen=True, kw_only=True)
 class SlidingWindowSpec(AttentionSpec):
     sliding_window: int
+    head_size_v: int = None  # type: ignore[assignment]
+
+    def __post_init__(self):
+        if self.head_size_v is None:
+            object.__setattr__(self, "head_size_v", self.head_size)
+
+    @property
+    def real_page_size_bytes(self) -> int:
+        # Mirror ``FullAttentionSpec.real_page_size_bytes`` for NVFP4 KV cache.
+        if self.kv_quant_mode.is_nvfp4:
+            last_dim = nvfp4_kv_cache_full_dim(
+                self.head_size
+            ) + nvfp4_kv_cache_full_dim(self.head_size_v)
+            return (
+                self.block_size
+                * self.num_kv_heads
+                * last_dim
+                * get_dtype_size(self.dtype)
+            )
+        return (
+            self.block_size
+            * self.num_kv_heads
+            * (self.head_size + self.head_size_v)
+            * get_dtype_size(self.dtype)
+        )
+
+    def max_admission_blocks_per_request(
+        self, max_num_batched_tokens: int, max_model_len: int
+    ) -> int:
+        """Per-request admission cap, in blocks.
+
+        Single source of truth for both startup pool sizing
+        (`max_memory_usage_bytes`) and the runtime admission gate. Per-request
+        real-held blocks plateau at this bound because
+        `SlidingWindowManager.remove_skipped_blocks` runs from `allocate_slots`
+        before each chunk's `get_num_blocks_to_allocate`.
+        """
+        # During chunked prefill, we hold KV for the last `sliding_window-1`
+        # computed tokens plus the newly scheduled tokens, and never more
+        # than `max_model_len`.
+        num_tokens = min(
+            self.sliding_window - 1 + max_num_batched_tokens, max_model_len
+        )
+        # +1 because the sliding window may not start from the beginning of
+        # the block. E.g. block size 4 and num_token 4 needs two blocks
+        # [XXCD][EF] to store the 6-token window [CDEF].
+        return cdiv(num_tokens, self.block_size) + 1
 
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         assert vllm_config.parallel_config.decode_context_parallel_size == 1, (
@@ -254,20 +488,75 @@ def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         )
         max_model_len = vllm_config.model_config.max_model_len
         max_num_batched_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        max_blocks = self.max_admission_blocks_per_request(
+            max_num_batched_tokens=max_num_batched_tokens, max_model_len=max_model_len
+        )
+        return max_blocks * self.page_size_bytes
 
-        # During chunked prefill, we allocate KV cache for the last
-        # `self.sliding_window-1` computed tokens plus the newly scheduled
-        # tokens. And we won't allocate KV cache for more than `max_model_len`
-        # tokens.
-        num_tokens = min(
-            self.sliding_window - 1 + max_num_batched_tokens, max_model_len
+
+@dataclass(frozen=True, kw_only=True)
+class SlidingWindowMLASpec(SlidingWindowSpec):
+    """Sliding window attention with MLA cache format."""
+
+    cache_dtype_str: str | None = None
+    # DeepseekV4-only: see MLAAttentionSpec.model_version.
+    alignment: int | None = None  # Default to None for no padding.
+    compress_ratio: int = 1
+    model_version: str | None = None
+
+    def __post_init__(self):
+        _apply_alignment_padding(self)
+
+    @property
+    def storage_block_size(self) -> int:
+        return self.block_size // self.compress_ratio
+
+    @property
+    def real_page_size_bytes(self) -> int:
+        if self.model_version == "deepseek_v4":
+            # DeepseekV4: 448B NoPE + 128B RoPE + 8B fp8 scale = 584B per token.
+            return self.storage_block_size * 584
+        assert self.model_version is None, (
+            f"Unsupported model version: {self.model_version}"
+        )
+        return (
+            self.storage_block_size
+            * self.num_kv_heads
+            * self.head_size
+            * get_dtype_size(self.dtype)
         )
 
-        # +1 here because the sliding window may not start from the beginning
-        # of the block. For example, if the block size is 4 and num_token
-        # is 4, we need two blocks [XXCD] [EF] to store the sliding
-        # window [CDEF] of 6 tokens.
-        return (cdiv(num_tokens, self.block_size) + 1) * self.page_size_bytes
+    @classmethod
+    def merge(cls, specs: list[Self]) -> Self:
+        assert all(isinstance(spec, SlidingWindowMLASpec) for spec in specs), (
+            "All attention layers in the same KV cache group must be "
+            "SlidingWindowMLASpec."
+        )
+        cache_dtype_str_set = set(spec.cache_dtype_str for spec in specs)
+        compress_ratio_set = set(spec.compress_ratio for spec in specs)
+        model_version_set = set(spec.model_version for spec in specs)
+        sliding_window_set = set(spec.sliding_window for spec in specs)
+        assert (
+            len(cache_dtype_str_set) == 1
+            and len(compress_ratio_set) == 1
+            and len(model_version_set) == 1
+            and len(sliding_window_set) == 1
+        ), (
+            "All attention layers in the same KV cache group must use the same "
+            "quantization method, compress ratio, model version and sliding "
+            "window size."
+        )
+        return cls(
+            block_size=specs[0].block_size,
+            num_kv_heads=specs[0].num_kv_heads,
+            head_size=specs[0].head_size,
+            dtype=specs[0].dtype,
+            page_size_padded=specs[0].page_size_padded,
+            sliding_window=sliding_window_set.pop(),
+            cache_dtype_str=cache_dtype_str_set.pop(),
+            compress_ratio=compress_ratio_set.pop(),
+            model_version=model_version_set.pop(),
+        )
 
 
 @dataclass(frozen=True)
@@ -275,7 +564,7 @@ class MambaSpec(KVCacheSpec):
     shapes: tuple[tuple[int, ...], ...]
     dtypes: tuple[torch.dtype]
     page_size_padded: int | None = None
-    mamba_type: str = "mamba2"
+    mamba_type: MambaAttentionBackendEnum = MambaAttentionBackendEnum.MAMBA2
     mamba_cache_mode: str = "none"
     num_speculative_blocks: int = 0
 
@@ -293,7 +582,9 @@ def page_size_bytes(self) -> int:
     def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
         if vllm_config.cache_config.mamba_cache_mode == "all":
             max_model_len = vllm_config.model_config.max_model_len
-            return cdiv(max_model_len, self.block_size) * self.page_size_bytes
+            return (
+                cdiv(max_model_len, self.block_size) + self.num_speculative_blocks
+            ) * self.page_size_bytes
         elif vllm_config.cache_config.mamba_cache_mode == "align":
             return self.page_size_bytes * (2 + self.num_speculative_blocks)
         else:
@@ -352,6 +643,7 @@ def merge(cls, specs: list[Self]) -> Self:
             head_size_v=specs[0].head_size_v,
             sink_len=specs[0].sink_len,
             dtype=specs[0].dtype,
+            kv_quant_mode=specs[0].kv_quant_mode,
             page_size_padded=specs[0].page_size_padded,
             sliding_window=cls.merge_window_sizes(sliding_window),
             attention_chunk_size=cls.merge_window_sizes(attention_chunk_size),
@@ -403,7 +695,17 @@ def is_uniform_type(cls, kv_cache_specs: dict[str, KVCacheSpec]) -> bool:
             # Different block sizes, not uniform.
             return False
         one_spec = next(iter(kv_cache_specs.values()))
-        if isinstance(one_spec, FullAttentionSpec):
+        # NOTE: Check subclasses before parent classes since isinstance()
+        # returns True for subclasses.
+        if isinstance(one_spec, SlidingWindowMLASpec):
+            # SlidingWindowMLASpec is uniform if all specs are SlidingWindowMLASpec
+            # with the same sliding_window size.
+            return all(
+                isinstance(spec, SlidingWindowMLASpec)
+                and spec.sliding_window == one_spec.sliding_window
+                for spec in kv_cache_specs.values()
+            )
+        elif isinstance(one_spec, FullAttentionSpec):
             return all(
                 isinstance(spec, FullAttentionSpec) for spec in kv_cache_specs.values()
             )
@@ -447,6 +749,65 @@ def from_specs(cls, kv_cache_specs: dict[str, KVCacheSpec]) -> Self | None:
         else:
             return None
 
+    # NOTE: below util functions are only used by DeepseekV4 for now.
+    def get_page_sizes(self) -> list[int]:
+        return list(set(spec.page_size_bytes for spec in self.kv_cache_specs.values()))
+
+    def get_num_layer_tuples(self) -> int:
+        return Counter(
+            spec.page_size_bytes for spec in self.kv_cache_specs.values()
+        ).most_common(1)[0][1]
+
+    def max_memory_usage_pages(self, vllm_config: VllmConfig) -> int:
+        return max(
+            cdiv(spec.max_memory_usage_bytes(vllm_config), spec.page_size_bytes)
+            for spec in self.kv_cache_specs.values()
+        )
+
+
+def get_kv_cache_spec_kind(kv_cache_spec: KVCacheSpec) -> KVCacheSpecKind:
+    if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs):
+        inner_kinds = {
+            get_kv_cache_spec_kind(spec)
+            for spec in kv_cache_spec.kv_cache_specs.values()
+        }
+        if len(inner_kinds) == 1:
+            return next(iter(inner_kinds))
+        return KVCacheSpecKind.UNKNOWN
+    # Keep subclass checks before base classes so specialized specs keep their
+    # more precise kind.
+    if isinstance(kv_cache_spec, SlidingWindowMLASpec):
+        return KVCacheSpecKind.SLIDING_WINDOW_MLA
+    if isinstance(kv_cache_spec, MLAAttentionSpec):
+        return KVCacheSpecKind.MLA_ATTENTION
+    if isinstance(kv_cache_spec, SinkFullAttentionSpec):
+        return KVCacheSpecKind.SINK_FULL_ATTENTION
+    if isinstance(kv_cache_spec, FullAttentionSpec):
+        return KVCacheSpecKind.FULL_ATTENTION
+    if isinstance(kv_cache_spec, ChunkedLocalAttentionSpec):
+        return KVCacheSpecKind.CHUNKED_LOCAL_ATTENTION
+    if isinstance(kv_cache_spec, SlidingWindowSpec):
+        return KVCacheSpecKind.SLIDING_WINDOW
+    if isinstance(kv_cache_spec, MambaSpec):
+        return KVCacheSpecKind.MAMBA
+    if isinstance(kv_cache_spec, EncoderOnlyAttentionSpec):
+        return KVCacheSpecKind.ENCODER_ONLY_ATTENTION
+    if isinstance(kv_cache_spec, CrossAttentionSpec):
+        return KVCacheSpecKind.CROSS_ATTENTION
+    return KVCacheSpecKind.UNKNOWN
+
+
+def get_kv_cache_spec_sliding_window(kv_cache_spec: KVCacheSpec) -> int | None:
+    if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs):
+        inner_windows = {
+            get_kv_cache_spec_sliding_window(spec)
+            for spec in kv_cache_spec.kv_cache_specs.values()
+        }
+        return next(iter(inner_windows)) if len(inner_windows) == 1 else None
+    if isinstance(kv_cache_spec, SlidingWindowSpec):
+        return kv_cache_spec.sliding_window
+    return None
+
 
 @dataclass
 class KVCacheTensor:
@@ -469,6 +830,8 @@ class KVCacheGroupSpec:
     layer_names: list[str]
     # The KV cache spec of this manager layer
     kv_cache_spec: KVCacheSpec
+    # Whether this group contains EAGLE/MTP draft attention layers.
+    is_eagle_group: bool = False
 
 
 @dataclass
diff --git a/vllm/v1/kv_offload/abstract.py b/vllm/v1/kv_offload/abstract.py
deleted file mode 100644
index 27aa1e35317f..000000000000
--- a/vllm/v1/kv_offload/abstract.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-OffloadingManager class for managing KV data offloading in vLLM v1
-
-This class runs in the scheduler, tracks which blocks are offloaded
-and their address.
-
-The class provides the following primitives:
-    lookup() - find the length of the maximal series of blocks,
-        starting from the first one, that are all offloaded.
-    prepare_load() - prepare given blocks to be read.
-        The given blocks will be protected from eviction.
-        This function returns a LoadSpec which encapsulates
-        information required for performing the load.
-    touch() - marks the give blocks as recently used. Can be used
-        to track block's LRU. This function is separated from the
-        prepare_load function to allow setting block recency even
-        for blocks which do not need reading from the cache, such as
-        blocks that are cached by the GPU prefix cache.
-    complete_load() - mark blocks which were previously prepared to be
-        loaded as done loading. This is to re-allow their eviction.
-    prepare_store() - prepare the given blocks to be written.
-        Returns a StoreSpec encapsulating offloading information,
-        as well as a list of blocks that were evicted as a result.
-    complete_store() - marks a previous store as completed.
-        Following this call, the given blocks will become loadable.
-"""
-
-from abc import ABC, abstractmethod
-from collections.abc import Iterable
-from dataclasses import dataclass
-
-from vllm.v1.core.kv_cache_utils import BlockHash
-
-
-class LoadStoreSpec(ABC):
-    """
-    Abstract metadata that encapsulates information allowing a worker
-    to load, and optionally also to store, blocks of KV data.
-    """
-
-    @staticmethod
-    @abstractmethod
-    def medium() -> str:
-        """
-        Returns a string representation of the medium type
-        this store/load targets.
-        """
-        pass
-
-
-@dataclass
-class PrepareStoreOutput:
-    block_hashes_to_store: list[BlockHash]
-    store_spec: LoadStoreSpec
-    block_hashes_evicted: list[BlockHash]
-
-
-@dataclass
-class OffloadingEvent:
-    block_hashes: list[BlockHash]
-    block_size: int
-    medium: str
-    # True if blocks are removed, False if stored
-    removed: bool
-
-
-class OffloadingManager(ABC):
-    @abstractmethod
-    def lookup(self, block_hashes: Iterable[BlockHash]) -> int | None:
-        """
-        Finds the length of the maximal series of blocks, starting from the
-        first one, that are all offloaded.
-
-        Args:
-            block_hashes: the hashes identifying the blocks to lookup.
-
-        Returns:
-            An integer representing the maximal number of blocks that
-            are currently offloaded, or None if the lookup should be retried
-            later. Returning None will delay the request handling by the vLLM
-            scheduler.
-        """
-        pass
-
-    @abstractmethod
-    def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec:
-        """
-        Prepare the given blocks to be read.
-        The given blocks will be protected from eviction until
-        complete_load is called.
-        It assumes all given blocks are offloaded.
-
-        Args:
-            block_hashes: the hashes identifying the blocks.
-
-        Returns:
-            A LoadStoreSpec that can be used by a worker to locate and load
-            the actual offloaded KV data.
-        """
-        pass
-
-    def touch(self, block_hashes: Iterable[BlockHash]):
-        """
-        Mark the given blocks as recently used.
-        This could in practice mean moving them to the end of an LRU list.
-
-        Args:
-            block_hashes: the hashes identifying the blocks.
-        """
-        return
-
-    def complete_load(self, block_hashes: Iterable[BlockHash]):
-        """
-        Marks previous blocks that were prepared to load as done loading.
-
-        Args:
-            block_hashes: the hashes identifying the blocks.
-        """
-        return
-
-    @abstractmethod
-    def prepare_store(
-        self, block_hashes: Iterable[BlockHash]
-    ) -> PrepareStoreOutput | None:
-        """
-        Prepare the given blocks to be offloaded.
-        The given blocks will be protected from eviction until
-        complete_store is called.
-
-        Args:
-            block_hashes: the hashes identifying the blocks.
-
-        Returns:
-            A PrepareStoreOutput indicating which blocks need storing,
-            where to store them (LoadStoreSpec), and list of blocks that
-            were evicted as a result.
-            None is returned if the blocks cannot be stored.
-        """
-        pass
-
-    def complete_store(self, block_hashes: Iterable[BlockHash], success: bool = True):
-        """
-        Marks blocks which were previously prepared to be stored, as stored.
-        Following this call, the blocks become loadable.
-        If if_success is False, blocks that were not marked as stored will be
-        removed.
-
-        Args:
-            block_hashes: the hashes identifying the blocks.
-            success: whether the blocks were stored successfully.
-        """
-        return
-
-    def take_events(self) -> Iterable[OffloadingEvent]:
-        """
-        Take the offloading events from the manager.
-
-        Yields:
-            New OffloadingEvents collected since the last call.
-        """
-        return ()
diff --git a/vllm/v1/kv_offload/base.py b/vllm/v1/kv_offload/base.py
new file mode 100644
index 000000000000..de65be1c05e6
--- /dev/null
+++ b/vllm/v1/kv_offload/base.py
@@ -0,0 +1,411 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Core abstractions for KV cache offloading in vLLM v1.
+"""
+
+from abc import ABC, abstractmethod
+from collections.abc import Collection, Iterable, Iterator, Sequence
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, NewType
+
+import numpy as np
+import torch
+
+from vllm.logger import init_logger
+from vllm.v1.core.kv_cache_utils import resolve_kv_cache_block_sizes
+
+if TYPE_CHECKING:
+    from vllm.config import VllmConfig
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+    from vllm.v1.kv_offload.worker.worker import OffloadingHandler
+
+# `OffloadKey` identifies an offloaded block. It combines a block hash with
+# its KV cache group index, encoded as raw bytes to avoid tuple GC overhead.
+# Use the helper functions below to construct / decompose keys.
+OffloadKey = NewType("OffloadKey", bytes)
+
+logger = init_logger(__name__)
+
+
+def make_offload_key(block_hash: bytes, group_idx: int) -> OffloadKey:
+    """Pack a block hash and group index into an `OffloadKey`."""
+    return OffloadKey(block_hash + group_idx.to_bytes(4, "big", signed=False))
+
+
+def get_offload_block_hash(key: OffloadKey) -> bytes:
+    """Extract the block hash from an `OffloadKey`."""
+    return key[:-4]
+
+
+def get_offload_group_idx(key: OffloadKey) -> int:
+    """Extract the group index from an `OffloadKey`."""
+    return int.from_bytes(key[-4:], "big", signed=False)
+
+
+@dataclass
+class ReqContext:
+    req_id: str
+    kv_transfer_params: dict[str, Any] | None = None
+
+
+class LoadStoreSpec(ABC):
+    """
+    Abstract metadata that encapsulates information allowing a worker
+    to load, and optionally also to store, blocks of KV data.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def medium() -> str:
+        """
+        Returns a string representation of the medium type
+        this store/load targets.
+        """
+        pass
+
+
+@dataclass
+class PrepareStoreOutput:
+    keys_to_store: list[OffloadKey]
+    store_spec: LoadStoreSpec
+    evicted_keys: list[OffloadKey]
+
+
+@dataclass
+class OffloadingEvent:
+    keys: list[OffloadKey]
+    medium: str
+    # True if blocks are removed, False if stored
+    removed: bool
+
+
+"""
+OffloadingManager class for managing KV data offloading in vLLM v1
+
+This class runs in the scheduler, tracks which blocks are offloaded
+and their address.
+
+The class provides the following primitives:
+    lookup() - check whether a single block is offloaded and ready.
+    prepare_load() - prepare given blocks to be read.
+        The given blocks will be protected from eviction.
+        This function returns a LoadSpec which encapsulates
+        information required for performing the load.
+    touch() - marks the give blocks as recently used. Can be used
+        to track block's LRU. This function is separated from the
+        prepare_load function to allow setting block recency even
+        for blocks which do not need reading from the cache, such as
+        blocks that are cached by the GPU prefix cache.
+    complete_load() - mark blocks which were previously prepared to be
+        loaded as done loading. This is to re-allow their eviction.
+    prepare_store() - prepare the given blocks to be written.
+        Returns a StoreSpec encapsulating offloading information,
+        as well as a list of blocks that were evicted as a result.
+    complete_store() - marks a previous store as completed.
+        Following this call, the given blocks will become loadable.
+"""
+
+
+class OffloadingManager(ABC):
+    @abstractmethod
+    def lookup(self, key: OffloadKey, req_context: ReqContext) -> bool | None:
+        """
+        Checks whether a single block is offloaded and ready to be read.
+
+        Args:
+            key: the key identifying the block to lookup.
+            req_context: per-request context (e.g. kv_transfer_params).
+
+        Returns:
+            True if the block is offloaded and ready, False if not,
+            or None if the lookup should be retried later.
+            Returning None will delay the request handling by the vLLM
+            scheduler.
+        """
+        pass
+
+    @abstractmethod
+    def prepare_load(
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+    ) -> LoadStoreSpec:
+        """
+        Prepare the given blocks to be read.
+        The given blocks will be protected from eviction until
+        complete_load is called.
+        It assumes all given blocks are offloaded.
+
+        Args:
+            keys: the keys identifying the blocks.
+            req_context: per-request context (e.g. kv_transfer_params).
+
+        Returns:
+            A LoadStoreSpec that can be used by a worker to locate and load
+            the actual offloaded KV data.
+        """
+        pass
+
+    def touch(self, keys: Collection[OffloadKey], req_context: ReqContext):
+        """
+        Mark the given blocks as recently used.
+        This could in practice mean moving them to the end of an LRU list.
+
+        Args:
+            keys: the keys identifying the blocks.
+            req_context: per-request context (e.g. kv_transfer_params).
+        """
+        return
+
+    def complete_load(self, keys: Collection[OffloadKey], req_context: ReqContext):
+        """
+        Marks previous blocks that were prepared to load as done loading.
+
+        Args:
+            keys: the keys identifying the blocks.
+            req_context: per-request context (e.g. kv_transfer_params).
+        """
+        return
+
+    @abstractmethod
+    def prepare_store(
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+    ) -> PrepareStoreOutput | None:
+        """
+        Prepare the given blocks to be offloaded.
+        The given blocks will be protected from eviction until
+        complete_store is called.
+
+        Args:
+            keys: the keys identifying the blocks.
+            req_context: per-request context (e.g. kv_transfer_params).
+
+        Returns:
+            A PrepareStoreOutput indicating which blocks need storing,
+            where to store them (LoadStoreSpec), and list of blocks that
+            were evicted as a result.
+            None is returned if the blocks cannot be stored.
+        """
+        pass
+
+    def complete_store(
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+        success: bool = True,
+    ):
+        """
+        Marks blocks which were previously prepared to be stored, as stored.
+        Following this call, the blocks become loadable.
+        If success is False, blocks that were not marked as stored will be
+        removed.
+
+        Args:
+            keys: the keys identifying the blocks.
+            req_context: per-request context (e.g. kv_transfer_params).
+            success: whether the blocks were stored successfully.
+        """
+        return
+
+    def take_events(self) -> Iterable[OffloadingEvent]:
+        """
+        Take the offloading events from the manager.
+
+        Yields:
+            New OffloadingEvents collected since the last call.
+        """
+        return ()
+
+    def reset_cache(self) -> None:
+        """Evict all tracked blocks and reset internal state."""
+        return
+
+    def shutdown(self) -> None:
+        """Shutdown the manager and release any resources."""
+        return
+
+
+class BlockIDsLoadStoreSpec(LoadStoreSpec, ABC):
+    """
+    Spec for loading/storing KV blocks from given block numbers.
+    """
+
+    def __init__(self, block_ids: list[int]):
+        self.block_ids = np.array(block_ids, dtype=np.int64)
+
+    def __repr__(self) -> str:
+        return repr(self.block_ids)
+
+
+class GPULoadStoreSpec(BlockIDsLoadStoreSpec):
+    """
+    Spec for loading/storing a KV block to GPU memory.
+
+    If there are multiple KV groups, the blocks are expected to be
+    ordered by the group index.
+    In that case, group_sizes[i] determines the number of blocks
+    per the i-th KV group, and thus sum(group_sizes) == len(block_ids).
+    group_sizes=None indicates a single KV group.
+
+    If block_indices is given, each group (determined by group_sizes) of block IDs
+    will correspond to logically contiguous blocks, e.g. blocks 5-10 of a some request.
+    block_indices[i] will represent the block index of the first block in group #i.
+    Thus, len(block_indices) == len(group_sizes) = number of KV cache groups.
+    This information is required in order to support off/loading from offloaded blocks
+    which are larger than GPU blocks.
+    In such cases, the first GPU block per each group may be unaligned to the offloaded
+    block size, and so knowing block_indices[i] allows the worker to correctly
+    skip part of the first matching offloaded block.
+    """
+
+    def __init__(
+        self,
+        block_ids: list[int],
+        group_sizes: Sequence[int],
+        block_indices: Sequence[int],
+    ):
+        super().__init__(block_ids)
+        assert sum(group_sizes) == len(block_ids)
+        assert len(block_indices) == len(group_sizes)
+        self.group_sizes: Sequence[int] = group_sizes
+        self.block_indices: Sequence[int] = block_indices
+
+    @staticmethod
+    def medium() -> str:
+        return "GPU"
+
+
+@dataclass
+class CanonicalKVCacheTensor:
+    """
+    A canonicalized KV cache tensor whose first dimension is num_blocks.
+
+    For attention backends where the raw tensor has num_blocks at a
+    non-leading physical dimension (e.g. FlashAttention's
+    (2, num_blocks, ...) layout), the tensor is split so that each
+    resulting CanonicalKVCacheTensor starts with (num_blocks, ...).
+    """
+
+    # The KV cache tensor with shape (num_blocks, ...)
+    tensor: torch.Tensor
+    # The (possibly padded) page size per block in bytes
+    page_size_bytes: int
+
+
+@dataclass
+class CanonicalKVCacheRef:
+    """
+    Per-layer (or group of layers) reference to a specific (by index)
+    CanonicalKVCacheTensor and records the un-padded page size used by that layer.
+    """
+
+    # Index into the list of CanonicalKVCacheTensor objects
+    tensor_idx: int
+    # The un-padded page size per block in bytes
+    page_size_bytes: int
+
+
+@dataclass
+class CanonicalKVCaches:
+    """
+    Canonicalized block-level representation of the KV caches.
+
+    Composed of:
+        - Unique list of KV cache data tensors,
+          each with shape (num_blocks, page_size_in_bytes) and int8 dtype.
+        - Per-group data references of the tensors.
+          i.e. how each KV cache group maps to the tensors.
+    """
+
+    # Ordered list of unique block tensors, each with shape
+    # (num_blocks, ...).
+    tensors: list[CanonicalKVCacheTensor]
+    # Per-KV-cache-group list of data references that map each layer
+    # in the group to the appropriate entry in the tensors list.
+    group_data_refs: list[list[CanonicalKVCacheRef]]
+
+
+class OffloadingSpec(ABC):
+    """Spec for an offloading connector"""
+
+    def __init__(self, vllm_config: "VllmConfig", kv_cache_config: "KVCacheConfig"):
+        logger.warning(
+            "Initializing OffloadingSpec. This API is experimental and "
+            "subject to change in the future as we iterate the design."
+        )
+        self.vllm_config = vllm_config
+        self.kv_cache_config = kv_cache_config
+
+        kv_transfer_config = vllm_config.kv_transfer_config
+        assert kv_transfer_config is not None
+        self.extra_config = kv_transfer_config.kv_connector_extra_config
+
+        parallel_config = vllm_config.parallel_config
+        context_parallel_factor = (
+            parallel_config.decode_context_parallel_size
+            * parallel_config.prefill_context_parallel_size
+        )
+
+        # gpu block size per group
+        self.gpu_block_size: tuple[int, ...] = tuple(
+            kv_cache_group.kv_cache_spec.block_size * context_parallel_factor
+            for kv_cache_group in kv_cache_config.kv_cache_groups
+        )
+
+        # hash_block_size must match what the scheduler uses for
+        # Request.block_hashes (resolved via resolve_kv_cache_block_sizes).
+        _, self.hash_block_size = resolve_kv_cache_block_sizes(
+            kv_cache_config, vllm_config
+        )
+
+        for block_size in self.gpu_block_size:
+            assert block_size % self.hash_block_size == 0, (
+                f"gpu_block_size={block_size} not divisible by "
+                f"hash_block_size={self.hash_block_size}. "
+                f"Hybrid models (e.g. Mamba+Attention) need "
+                f"--enable-prefix-caching to align block sizes."
+            )
+
+        # offloaded_block_size / gpu_block_size
+        self.block_size_factor: int = 1
+
+        offloaded_block_size = self.extra_config.get("block_size")
+        if offloaded_block_size is not None:
+            offloaded_block_size_int = int(offloaded_block_size)
+            gpu_block_sizes = set(self.gpu_block_size)
+            assert len(gpu_block_sizes) == 1, (
+                "If 'block_size' is specified in kv_connector_extra_config, "
+                "there must be at least one KV cache group, "
+                "and all groups must have the same block size."
+            )
+            gpu_block_size = gpu_block_sizes.pop()
+
+            assert offloaded_block_size_int % gpu_block_size == 0
+            self.block_size_factor = offloaded_block_size_int // gpu_block_size
+
+    @abstractmethod
+    def get_manager(self) -> OffloadingManager:
+        """
+        Get an OffloadingManager that will be used
+        by the scheduler-side offloading connector to track
+        offloaded blocks and manage evictions.
+        """
+        pass
+
+    @abstractmethod
+    def get_handlers(
+        self, kv_caches: CanonicalKVCaches
+    ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], "OffloadingHandler"]]:
+        """
+        Get offloading handlers along with their respective src and dst types.
+
+        Args:
+            kv_caches: Canonicalized KV caches.
+
+        Yields:
+            Tuples of (src_type, dst_type, offloading_handler).
+        """
+        pass
diff --git a/vllm/v1/kv_offload/cpu/common.py b/vllm/v1/kv_offload/cpu/common.py
new file mode 100644
index 000000000000..cf5b2b39dd6b
--- /dev/null
+++ b/vllm/v1/kv_offload/cpu/common.py
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from vllm.v1.kv_offload.base import BlockIDsLoadStoreSpec
+
+
+class CPULoadStoreSpec(BlockIDsLoadStoreSpec):
+    """
+    Spec for loading/storing a KV block to CPU memory.
+    """
+
+    @staticmethod
+    def medium() -> str:
+        return "CPU"
diff --git a/vllm/v1/kv_offload/cpu/gpu_worker.py b/vllm/v1/kv_offload/cpu/gpu_worker.py
new file mode 100644
index 000000000000..119778368ca7
--- /dev/null
+++ b/vllm/v1/kv_offload/cpu/gpu_worker.py
@@ -0,0 +1,452 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import time
+from collections import deque
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+
+from vllm import _custom_ops as ops
+from vllm.logger import init_logger
+from vllm.utils.math_utils import cdiv
+from vllm.utils.platform_utils import is_pin_memory_available
+from vllm.v1.kv_offload.base import (
+    BlockIDsLoadStoreSpec,
+    CanonicalKVCacheRef,
+    CanonicalKVCaches,
+    GPULoadStoreSpec,
+)
+from vllm.v1.kv_offload.cpu.shared_offload_region import SharedOffloadRegion
+from vllm.v1.kv_offload.worker.worker import (
+    OffloadingHandler,
+    TransferResult,
+    TransferSpec,
+)
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class Transfer:
+    job_id: int
+    stream: torch.cuda.Stream
+    start_event: torch.Event
+    end_event: torch.Event
+    num_bytes: int
+
+
+def compute_sub_block_ptrs(
+    block_ids: np.ndarray,
+    block_size_factor: int,
+    output: np.ndarray,
+    tensor: torch.Tensor,
+    skip_count: int = 0,
+):
+    """
+    Compute byte pointers for sub-blocks of the given block IDs.
+
+    Each block in block_ids contains block_size_factor sub-blocks.
+    The pointer for sub-block j of block b is:
+        base_ptr + b * row_stride + j * sub_block_size
+
+    where sub_block_size = tensor.shape[1] // block_size_factor (gpu page size).
+
+    This handles tensors where row_stride != block_size_factor * sub_block_size
+    (e.g. non-contiguous CPU tensors).
+
+    Args:
+        block_ids: array of block IDs at the tensor's native granularity.
+        block_size_factor: number of sub-blocks per block.
+        output: pre-allocated int64 array to write pointers into.
+        tensor: the source or destination tensor.
+        skip_count: sub-blocks to skip in the first block.
+    """
+    assert skip_count < block_size_factor
+
+    num_sub_blocks = len(output)
+    base_ptr = tensor.data_ptr()
+    row_stride = tensor.stride(0)
+
+    if block_size_factor == 1:
+        # Fast path: 1:1 mapping, no sub-block expansion needed.
+        output[:] = base_ptr + block_ids[:num_sub_blocks] * row_stride
+        return
+
+    # Vectorized expansion for block_size_factor > 1.
+    assert tensor.shape[1] % block_size_factor == 0
+    sub_block_size = tensor.shape[1] // block_size_factor
+    sub_offsets = np.arange(block_size_factor, dtype=np.int64) * sub_block_size
+    # (num_blocks, 1) + (1, block_size_factor) -> (num_blocks, block_size_factor)
+    all_ptrs = (
+        base_ptr + block_ids.astype(np.int64)[:, np.newaxis] * row_stride
+    ) + sub_offsets[np.newaxis, :]
+    # Flatten and apply skip_count / truncation
+    flat = all_ptrs.ravel()
+    output[:] = flat[skip_count : skip_count + num_sub_blocks]
+
+
+def pin_mmap_region(region: SharedOffloadRegion) -> None:
+    """Register the entire mmap as CUDA pinned memory via cudaHostRegister."""
+    rank = region.rank
+
+    base_ptr = region._base.data_ptr()
+    result = torch.cuda.cudart().cudaHostRegister(base_ptr, region.total_size_bytes, 0)
+    if result.value != 0:
+        logger.warning(
+            "cudaHostRegister failed for rank=%d (code=%d) — "
+            "transfers will still work but may be slower (unpinned DMA)",
+            rank,
+            result,
+        )
+    else:
+        logger.debug(
+            "cudaHostRegister rank=%d %.2f GB",
+            rank,
+            region.total_size_bytes / 1e9,
+        )
+        region.is_pinned = True
+
+
+class SingleDirectionOffloadingHandler(OffloadingHandler):
+    """
+    SingleDirectionOffloadingHandler handles transfers for a single direction,
+    either CPU->GPU or GPU->CPU.
+    Transfers are guaranteed to be executed in order of their submission.
+    Each transfer uses a unique CUDA stream, and its stream will start
+    executing only after the streams of previous transfers have finished.
+    """
+
+    def __init__(
+        self,
+        gpu_tensors: list[torch.Tensor],
+        cpu_tensors: list[torch.Tensor],
+        block_size_factor: int,
+        kv_cache_groups_data_refs: list[list[CanonicalKVCacheRef]],
+        gpu_to_cpu: bool,
+        mmap_region: SharedOffloadRegion | None = None,
+    ):
+        """
+        Initialize a SingleDirectionOffloadingHandler.
+
+        Args:
+            gpu_tensors: list of GPU KV cache tensors.
+                Each of shape (num_gpu_blocks, gpu_page_size_bytes) with dtype int8.
+            cpu_tensors: list of CPU KV cache tensors.
+                Each of shape (num_cpu_blocks, cpu_page_size_bytes) with dtype int8.
+                Order should match gpu_tensors.
+            kv_cache_groups_data_refs: list of CanonicalKVCacheRef per group.
+            gpu_to_cpu: if True, transfer from GPU to CPU; otherwise CPU to GPU.
+        """
+        assert len(gpu_tensors) == len(cpu_tensors)
+        assert len(gpu_tensors) > 0
+
+        # assert input tensors are as expected
+        for gpu_tensor, cpu_tensor in zip(gpu_tensors, cpu_tensors):
+            assert gpu_tensor.dtype == torch.int8
+            assert gpu_tensor.ndim == 2
+            assert gpu_tensor.is_cuda
+            assert cpu_tensor.dtype == torch.int8
+            assert cpu_tensor.ndim == 2
+            assert cpu_tensor.device.type == "cpu"
+            _, gpu_page_size = gpu_tensor.shape
+            _, cpu_page_size = cpu_tensor.shape
+            assert cpu_page_size == gpu_page_size * block_size_factor
+
+        self.src_tensors: list[torch.Tensor] = (
+            gpu_tensors if gpu_to_cpu else cpu_tensors
+        )
+        self.dst_tensors: list[torch.Tensor] = (
+            cpu_tensors if gpu_to_cpu else gpu_tensors
+        )
+        self.gpu_to_cpu: bool = gpu_to_cpu
+        self.kv_cache_groups_data_refs = kv_cache_groups_data_refs
+
+        # GPU blocks may be smaller
+        # cpu_page_size = gpu_page_size * block_size_factor.
+        self.src_block_size_factor = 1 if self.gpu_to_cpu else block_size_factor
+        self.dst_block_size_factor = block_size_factor if self.gpu_to_cpu else 1
+
+        self.transfer_type = ("GPU", "CPU") if self.gpu_to_cpu else ("CPU", "GPU")
+        # mmap_region to clean up on shutdown (gpu_to_cpu handler owns it)
+        self._mmap_region = mmap_region
+        # job_id -> event
+        self._transfer_events: dict[int, torch.Event] = {}
+        # queue of transfers (job_id, stream, event)
+        self._transfers: deque[Transfer] = deque()
+        # list of CUDA streams available for re-use
+        self._stream_pool: list[torch.cuda.Stream] = []
+        # list of CUDA events available for re-use
+        self._event_pool: list[torch.Event] = []
+
+    def transfer_async(self, job_id: int, transfer_spec: TransferSpec) -> bool:
+        src_spec, dst_spec = transfer_spec
+        assert isinstance(src_spec, BlockIDsLoadStoreSpec)
+        assert isinstance(dst_spec, BlockIDsLoadStoreSpec)
+
+        src_blocks = src_spec.block_ids
+        dst_blocks = dst_spec.block_ids
+        assert src_blocks.ndim == 1
+        assert dst_blocks.ndim == 1
+
+        num_src_blocks = len(src_blocks)
+        num_dst_blocks = len(dst_blocks)
+
+        # There are 2 types of transfers:
+        # 1. GPU -> CPU
+        # 2. CPU -> GPU
+        #
+        # transfers are also to CPU blocks, EXCEPT MAYBE for the first and last block.
+        # i.e. the first and last CPU blocks in src_blocks can match against
+        # a smaller (byte-wise) set of GPU blocks in dst_blocks.
+        # In such cases, we may need to skip some gpu-sized sub-blocks,
+        # and start reading/writing from the middle of the first CPU block.
+        # If we have multiple KV cache groups (when using HMA with hybrid models),
+        # we may have a partial first/last CPU block per each group.
+        # The group_sizes parameter encodes the size of each group of blocks
+        # in the GPU dst_blocks.
+        # If group_sizes is None, we assume all blocks belong to a single group.
+        # The logical_offset parameter maps each group of blocks to its logical
+        # offset inside the request, counting in GPU blocks.
+        # This allows us to find the correct starting position
+        # in the matching first CPU block.
+
+        # extract group_sizes from the GPU spec
+        gpu_spec = src_spec if self.gpu_to_cpu else dst_spec
+        assert isinstance(gpu_spec, GPULoadStoreSpec)
+        group_sizes = gpu_spec.group_sizes
+        assert len(group_sizes) == len(self.kv_cache_groups_data_refs)
+
+        # extract block indices from the GPU spec
+        block_indices = gpu_spec.block_indices
+        assert len(block_indices) == len(self.kv_cache_groups_data_refs)
+
+        num_copy_ops = 0
+        for group_size, group_data_refs in zip(
+            group_sizes, self.kv_cache_groups_data_refs
+        ):
+            num_copy_ops += group_size * len(group_data_refs)
+
+        all_src = np.empty(num_copy_ops, dtype=np.int64)
+        all_dst = np.empty(num_copy_ops, dtype=np.int64)
+        all_sizes = np.empty(num_copy_ops, dtype=np.int64)
+
+        src_offset = 0
+        dst_offset = 0
+        op_idx = 0
+        # count total number of bytes copied
+        num_transfer_bytes = 0
+        for group_size, block_idx, group_data_refs in zip(
+            group_sizes, block_indices, self.kv_cache_groups_data_refs
+        ):
+            if group_size == 0:
+                continue
+
+            src_logical_blocks_to_skip = block_idx % self.src_block_size_factor
+            dst_logical_blocks_to_skip = block_idx % self.dst_block_size_factor
+            src_logical_blocks_count = group_size + src_logical_blocks_to_skip
+            dst_logical_blocks_count = group_size + dst_logical_blocks_to_skip
+
+            dst_blocks_count = cdiv(
+                dst_logical_blocks_count, self.dst_block_size_factor
+            )
+            dst_end_offset = dst_offset + dst_blocks_count
+            assert dst_end_offset <= num_dst_blocks
+
+            src_blocks_count = cdiv(
+                src_logical_blocks_count, self.src_block_size_factor
+            )
+            src_end_offset = src_offset + src_blocks_count
+            assert src_end_offset <= num_src_blocks
+
+            group_src = src_blocks[src_offset:src_end_offset]
+            group_dst = dst_blocks[dst_offset:dst_end_offset]
+
+            for data_ref in group_data_refs:
+                t_idx = data_ref.tensor_idx
+                end_idx = op_idx + group_size
+
+                compute_sub_block_ptrs(
+                    group_src,
+                    self.src_block_size_factor,
+                    all_src[op_idx:end_idx],
+                    self.src_tensors[t_idx],
+                    skip_count=src_logical_blocks_to_skip,
+                )
+                compute_sub_block_ptrs(
+                    group_dst,
+                    self.dst_block_size_factor,
+                    all_dst[op_idx:end_idx],
+                    self.dst_tensors[t_idx],
+                    skip_count=dst_logical_blocks_to_skip,
+                )
+
+                all_sizes[op_idx:end_idx] = data_ref.page_size_bytes
+                num_transfer_bytes += group_size * data_ref.page_size_bytes
+                op_idx = end_idx
+
+            src_offset = src_end_offset
+            dst_offset = dst_end_offset
+
+        assert src_offset == num_src_blocks
+        assert dst_offset == num_dst_blocks
+        assert op_idx == num_copy_ops
+
+        batch_src = torch.from_numpy(all_src)
+        batch_dst = torch.from_numpy(all_dst)
+        batch_sizes = torch.from_numpy(all_sizes)
+
+        stream = self._stream_pool.pop() if self._stream_pool else torch.cuda.Stream()
+        start_event = (
+            self._event_pool.pop()
+            if self._event_pool
+            else torch.Event(enable_timing=True)
+        )
+        end_event = (
+            self._event_pool.pop()
+            if self._event_pool
+            else torch.Event(enable_timing=True)
+        )
+
+        if self.gpu_to_cpu:
+            # wait for model computation to finish before offloading
+            stream.wait_stream(torch.cuda.current_stream())
+        if self._transfers:
+            last_transfer: Transfer = self._transfers[-1]
+            last_event = last_transfer.end_event
+            # assure job will start only after the previous one completes
+            stream.wait_event(last_event)
+        # CPU->GPU reads from host pinned memory, which is never written
+        # by a concurrent GPU stream, so CU_MEMCPY_SRC_ACCESS_ORDER_ANY is
+        # safe and lets the driver pipeline source reads. GPU->CPU reads
+        # from the live GPU KV cache, which the compute stream keeps
+        # writing; we must keep STREAM ordering so source reads are gated
+        # by the transfer stream's wait_stream(compute) barrier.
+        is_src_access_order_any = not self.gpu_to_cpu
+        with torch.cuda.stream(stream):
+            start_event.record(stream)
+            if num_copy_ops > 0:
+                ops.swap_blocks_batch(
+                    batch_src,
+                    batch_dst,
+                    batch_sizes,
+                    is_src_access_order_any=is_src_access_order_any,
+                )
+            end_event.record(stream)
+
+        self._transfer_events[job_id] = end_event
+        self._transfers.append(
+            Transfer(
+                job_id=job_id,
+                stream=stream,
+                start_event=start_event,
+                end_event=end_event,
+                num_bytes=num_transfer_bytes,
+            )
+        )
+
+        # success
+        return True
+
+    def get_finished(self) -> list[TransferResult]:
+        results: list[TransferResult] = []
+        while self._transfers and self._transfers[0].end_event.query():
+            transfer = self._transfers.popleft()
+            transfer_time = (
+                transfer.start_event.elapsed_time(transfer.end_event) * 1e-3
+            )  # elapsed_time is in milliseconds
+            result = TransferResult(
+                job_id=transfer.job_id,
+                success=True,
+                transfer_size=transfer.num_bytes,
+                transfer_time=transfer_time,
+                transfer_type=self.transfer_type,
+            )
+
+            results.append(result)
+            self._stream_pool.append(transfer.stream)
+            self._event_pool.append(transfer.end_event)
+            self._event_pool.append(transfer.start_event)
+            del self._transfer_events[transfer.job_id]
+        return results
+
+    def wait(self, job_ids: set[int]):
+        for job_id in job_ids:
+            event = self._transfer_events.get(job_id)
+            if event is not None:
+                event.synchronize()
+
+    def shutdown(self) -> None:
+        while self._transfers:
+            transfer = self._transfers.popleft()
+            transfer.end_event.synchronize()
+        self._transfer_events.clear()
+        self._stream_pool.clear()
+        self._event_pool.clear()
+        self.src_tensors.clear()
+        self.dst_tensors.clear()
+        if self._mmap_region is not None:
+            self._mmap_region.cleanup()
+            self._mmap_region = None
+
+
+class CpuGpuOffloadingHandlers:
+    def __init__(
+        self,
+        kv_caches: CanonicalKVCaches,
+        block_size_factor: int,
+        num_cpu_blocks: int,
+        mmap_region: SharedOffloadRegion | None = None,
+    ):
+        pin_memory = is_pin_memory_available()
+        logger.info("Allocating %d CPU tensors...", len(kv_caches.tensors))
+        self._mmap_region = mmap_region
+        if mmap_region is not None and pin_memory:
+            pin_mmap_region(mmap_region)
+
+        gpu_tensors: list[torch.Tensor] = []
+        cpu_tensors: list[torch.Tensor] = []
+        for kv_cache_tensor in kv_caches.tensors:
+            gpu_page_size_bytes = kv_cache_tensor.page_size_bytes
+            gpu_tensor = kv_cache_tensor.tensor.view(torch.int8).view(
+                (-1, gpu_page_size_bytes)
+            )
+            cpu_page_size_bytes = gpu_page_size_bytes * block_size_factor
+
+            if mmap_region is not None:
+                cpu_tensor = mmap_region.create_next_view(cpu_page_size_bytes)
+            else:
+                t0 = time.monotonic()
+                cpu_tensor = torch.zeros(
+                    (num_cpu_blocks, cpu_page_size_bytes),
+                    dtype=torch.int8,
+                    device="cpu",
+                    pin_memory=pin_memory,
+                )
+                logger.debug(
+                    "torch.zeros pinned tensor %d×%d (%.2f GB): %.3f s",
+                    num_cpu_blocks,
+                    cpu_page_size_bytes,
+                    num_cpu_blocks * cpu_page_size_bytes / 1e9,
+                    time.monotonic() - t0,
+                )
+
+            gpu_tensors.append(gpu_tensor)
+            cpu_tensors.append(cpu_tensor)
+
+        self.gpu_to_cpu_handler = SingleDirectionOffloadingHandler(
+            gpu_tensors=gpu_tensors,
+            cpu_tensors=cpu_tensors,
+            block_size_factor=block_size_factor,
+            kv_cache_groups_data_refs=kv_caches.group_data_refs,
+            gpu_to_cpu=True,
+            mmap_region=mmap_region,
+        )
+
+        self.cpu_to_gpu_handler = SingleDirectionOffloadingHandler(
+            gpu_tensors=gpu_tensors,
+            cpu_tensors=cpu_tensors,
+            block_size_factor=block_size_factor,
+            kv_cache_groups_data_refs=kv_caches.group_data_refs,
+            gpu_to_cpu=False,
+        )
diff --git a/vllm/v1/kv_offload/cpu/manager.py b/vllm/v1/kv_offload/cpu/manager.py
index 66f0e6736a9d..39e64933b5c9 100644
--- a/vllm/v1/kv_offload/cpu/manager.py
+++ b/vllm/v1/kv_offload/cpu/manager.py
@@ -1,19 +1,21 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections.abc import Iterable
+from collections import OrderedDict
+from collections.abc import Collection, Iterable
 from typing import Literal
 
-from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.abstract import (
+from vllm.v1.kv_offload.base import (
     LoadStoreSpec,
     OffloadingEvent,
     OffloadingManager,
+    OffloadKey,
     PrepareStoreOutput,
+    ReqContext,
 )
-from vllm.v1.kv_offload.cpu.policies.abstract import BlockStatus, CachePolicy
+from vllm.v1.kv_offload.cpu.common import CPULoadStoreSpec
 from vllm.v1.kv_offload.cpu.policies.arc import ARCCachePolicy
+from vllm.v1.kv_offload.cpu.policies.base import BlockStatus, CachePolicy
 from vllm.v1.kv_offload.cpu.policies.lru import LRUCachePolicy
-from vllm.v1.kv_offload.mediums import CPULoadStoreSpec
 
 _CACHE_POLICIES: dict[str, type[CachePolicy]] = {
     "lru": LRUCachePolicy,
@@ -33,12 +35,12 @@ class CPUOffloadingManager(OffloadingManager):
 
     def __init__(
         self,
-        block_size: int,
         num_blocks: int,
         cache_policy: Literal["lru", "arc"] = "lru",
         enable_events: bool = False,
+        store_threshold: int = 1,
+        max_tracker_size: int = 64_000,
     ):
-        self.block_size: int = block_size
         self.medium: str = CPULoadStoreSpec.medium()
         self._num_blocks: int = num_blocks
         self._num_allocated_blocks: int = 0
@@ -51,17 +53,22 @@ def __init__(
                 f"Supported: {list(_CACHE_POLICIES)}"
             )
         self._policy: CachePolicy = policy_cls(cache_capacity=num_blocks)
+        self.store_threshold: int = store_threshold
+        self.max_tracker_size: int = max_tracker_size
+
+        # Number of block references. It is ordered so can evict the LRU entry in O(1).
+        self.counts: OrderedDict[OffloadKey, int] | None = (
+            OrderedDict() if store_threshold >= 2 else None
+        )
 
     # --- block pool ---
 
     def _get_num_free_blocks(self) -> int:
         return len(self._free_list) + self._num_blocks - self._num_allocated_blocks
 
-    def _allocate_blocks(self, block_hashes: list[BlockHash]) -> list[BlockStatus]:
-        num_fresh = min(
-            len(block_hashes), self._num_blocks - self._num_allocated_blocks
-        )
-        num_reused = len(block_hashes) - num_fresh
+    def _allocate_blocks(self, keys: list[OffloadKey]) -> list[BlockStatus]:
+        num_fresh = min(len(keys), self._num_blocks - self._num_allocated_blocks)
+        num_reused = len(keys) - num_fresh
         assert len(self._free_list) >= num_reused
 
         # allocate fresh blocks
@@ -80,128 +87,153 @@ def _free_block(self, block: BlockStatus) -> None:
 
     def _get_load_store_spec(
         self,
-        block_hashes: Iterable[BlockHash],
+        keys: Iterable[OffloadKey],
         blocks: Iterable[BlockStatus],
     ) -> CPULoadStoreSpec:
         return CPULoadStoreSpec([block.block_id for block in blocks])
 
     # --- OffloadingManager interface ---
 
-    def lookup(self, block_hashes: Iterable[BlockHash]) -> int | None:
-        hit_count = 0
-        for block_hash in block_hashes:
-            block = self._policy.get(block_hash)
-            if block is None or not block.is_ready:
-                break
-            hit_count += 1
-        return hit_count
-
-    def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec:
+    def lookup(self, key: OffloadKey, req_context: ReqContext) -> bool | None:
+        if self.counts is not None:
+            if key in self.counts:
+                self.counts.move_to_end(key)
+                self.counts[key] += 1
+            else:
+                if len(self.counts) >= self.max_tracker_size:
+                    self.counts.popitem(last=False)
+                self.counts[key] = 1
+        block = self._policy.get(key)
+        if block is None:
+            return False
+        if not block.is_ready:
+            return None  # write in-flight; caller should retry
+        return True
+
+    def prepare_load(
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+    ) -> LoadStoreSpec:
         blocks = []
-        for block_hash in block_hashes:
-            block = self._policy.get(block_hash)
-            assert block is not None, f"Block {block_hash!r} not found in cache"
-            assert block.is_ready, f"Block {block_hash!r} is not ready for reading"
+        for key in keys:
+            block = self._policy.get(key)
+            assert block is not None, f"Block {key!r} not found in cache"
+            assert block.is_ready, f"Block {key!r} is not ready for reading"
             block.ref_cnt += 1
             blocks.append(block)
-        return self._get_load_store_spec(block_hashes, blocks)
+        return self._get_load_store_spec(keys, blocks)
 
-    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
-        self._policy.touch(block_hashes)
+    def touch(self, keys: Collection[OffloadKey], req_context: ReqContext) -> None:
+        self._policy.touch(keys)
 
-    def complete_load(self, block_hashes: Iterable[BlockHash]) -> None:
-        for block_hash in block_hashes:
-            block = self._policy.get(block_hash)
-            assert block is not None, f"Block {block_hash!r} not found"
-            assert block.ref_cnt > 0, f"Block {block_hash!r} ref_cnt is already 0"
+    def complete_load(
+        self, keys: Collection[OffloadKey], req_context: ReqContext
+    ) -> None:
+        for key in keys:
+            block = self._policy.get(key)
+            assert block is not None, f"Block {key!r} not found"
+            assert block.ref_cnt > 0, f"Block {key!r} ref_cnt is already 0"
             block.ref_cnt -= 1
 
     def prepare_store(
-        self, block_hashes: Iterable[BlockHash]
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
     ) -> PrepareStoreOutput | None:
-        block_hashes_list = list(block_hashes)
-
+        if self.counts is not None:
+            keys = [k for k in keys if self.counts.get(k, 0) >= self.store_threshold]
         # filter out blocks that are already stored
-        block_hashes_to_store = [
-            bh for bh in block_hashes_list if self._policy.get(bh) is None
-        ]
+        keys_to_store = [k for k in keys if self._policy.get(k) is None]
 
-        if not block_hashes_to_store:
+        if not keys_to_store:
             return PrepareStoreOutput(
-                block_hashes_to_store=[],
+                keys_to_store=[],
                 store_spec=self._get_load_store_spec([], []),
-                block_hashes_evicted=[],
+                evicted_keys=[],
             )
 
-        num_blocks_to_evict = len(block_hashes_to_store) - self._get_num_free_blocks()
+        num_blocks_to_evict = len(keys_to_store) - self._get_num_free_blocks()
 
-        to_evict: list[BlockHash] = []
+        to_evict: list[OffloadKey] = []
         if num_blocks_to_evict > 0:
             # Blocks from the original input are excluded from eviction candidates:
             # a block that was already stored must remain in the cache after this call.
-            protected = set(block_hashes_list)
+            protected = set(keys)
             evicted = self._policy.evict(num_blocks_to_evict, protected)
             if evicted is None:
                 return None
-            for block_hash, block in evicted:
+            for key, block in evicted:
                 self._free_block(block)
-                to_evict.append(block_hash)
+                to_evict.append(key)
 
         if to_evict and self.events is not None:
             self.events.append(
                 OffloadingEvent(
-                    block_hashes=to_evict,
-                    block_size=self.block_size,
+                    keys=to_evict,
                     medium=self.medium,
                     removed=True,
                 )
             )
 
-        blocks = self._allocate_blocks(block_hashes_to_store)
-        assert len(blocks) == len(block_hashes_to_store), (
+        blocks = self._allocate_blocks(keys_to_store)
+        assert len(blocks) == len(keys_to_store), (
             "Block pool did not allocate the expected number of blocks"
         )
 
-        for block_hash, block in zip(block_hashes_to_store, blocks):
-            self._policy.insert(block_hash, block)
+        for key, block in zip(keys_to_store, blocks):
+            self._policy.insert(key, block)
 
         # build store specs for allocated blocks
-        store_spec = self._get_load_store_spec(block_hashes_to_store, blocks)
+        store_spec = self._get_load_store_spec(keys_to_store, blocks)
 
         return PrepareStoreOutput(
-            block_hashes_to_store=block_hashes_to_store,
+            keys_to_store=keys_to_store,
             store_spec=store_spec,
-            block_hashes_evicted=to_evict,
+            evicted_keys=to_evict,
         )
 
     def complete_store(
-        self, block_hashes: Iterable[BlockHash], success: bool = True
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+        success: bool = True,
     ) -> None:
-        stored_block_hashes: list[BlockHash] = []
+        stored_keys: list[OffloadKey] = []
 
         if success:
-            for block_hash in block_hashes:
-                block = self._policy.get(block_hash)
+            for key in keys:
+                block = self._policy.get(key)
                 if block is not None and not block.is_ready:
                     block.ref_cnt = 0
-                    stored_block_hashes.append(block_hash)
+                    stored_keys.append(key)
         else:
-            for block_hash in block_hashes:
-                block = self._policy.get(block_hash)
+            for key in keys:
+                block = self._policy.get(key)
                 if block is not None and not block.is_ready:
-                    self._policy.remove(block_hash)
+                    self._policy.remove(key)
                     self._free_block(block)
 
-        if stored_block_hashes and self.events is not None:
+        if stored_keys and self.events is not None:
             self.events.append(
                 OffloadingEvent(
-                    block_hashes=stored_block_hashes,
-                    block_size=self.block_size,
+                    keys=stored_keys,
                     medium=self.medium,
                     removed=False,
                 )
             )
 
+    def reset_cache(self) -> None:
+        # Clear ALL blocks unconditionally. The scheduler's _stale_job_threshold
+        # guarantees that complete_load / complete_store are never called for
+        # pre-reset jobs, so no lazy cleanup is needed. The scheduler also
+        # flushes in-flight load job IDs to the workers before any new stores
+        # can begin, preventing a cross-direction data race on reused offload block IDs.
+        self._policy.clear()
+
+        self._free_list.clear()
+        self._num_allocated_blocks = 0
+
     def take_events(self) -> Iterable[OffloadingEvent]:
         if self.events is not None:
             yield from self.events
diff --git a/vllm/v1/kv_offload/cpu/policies/arc.py b/vllm/v1/kv_offload/cpu/policies/arc.py
index fdcb16badd45..5b01815c2d75 100644
--- a/vllm/v1/kv_offload/cpu/policies/arc.py
+++ b/vllm/v1/kv_offload/cpu/policies/arc.py
@@ -3,8 +3,8 @@
 from collections import OrderedDict
 from collections.abc import Iterable
 
-from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.cpu.policies.abstract import BlockStatus, CachePolicy
+from vllm.v1.kv_offload.base import OffloadKey
+from vllm.v1.kv_offload.cpu.policies.base import BlockStatus, CachePolicy
 
 
 class ARCCachePolicy(CachePolicy):
@@ -23,7 +23,7 @@ class ARCCachePolicy(CachePolicy):
            until a miss or non-ready block is encountered.
 
         2. Cache touch (touch) - Adaptive Learning:
-           For each block_hash (in reverse order):
+           For each key (in reverse order):
            - If in T1: Move to T2 (promotion from recent to frequent).
            - If in T2: Move to MRU position (end of queue).
            - If in B1 ghost list: Increase target_t1_size.
@@ -48,88 +48,95 @@ class ARCCachePolicy(CachePolicy):
     def __init__(self, cache_capacity: int):
         self.cache_capacity: int = cache_capacity
         self.target_t1_size: float = 0.0
-        self.t1: OrderedDict[BlockHash, BlockStatus] = OrderedDict()
-        self.t2: OrderedDict[BlockHash, BlockStatus] = OrderedDict()
-        # block_hash -> None (only care about presence)
-        self.b1: OrderedDict[BlockHash, None] = OrderedDict()
-        self.b2: OrderedDict[BlockHash, None] = OrderedDict()
-
-    def get(self, block_hash: BlockHash) -> BlockStatus | None:
-        return self.t1.get(block_hash) or self.t2.get(block_hash)
-
-    def insert(self, block_hash: BlockHash, block: BlockStatus) -> None:
-        self.t1[block_hash] = block
-        self.b1.pop(block_hash, None)
-        self.b2.pop(block_hash, None)
-
-    def remove(self, block_hash: BlockHash) -> None:
-        if self.t1.pop(block_hash, None) is None:
-            self.t2.pop(block_hash, None)
-
-    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
-        for block_hash in reversed(list(block_hashes)):
-            if block_hash in self.t1:
-                block = self.t1.pop(block_hash)
+        self.t1: OrderedDict[OffloadKey, BlockStatus] = OrderedDict()
+        self.t2: OrderedDict[OffloadKey, BlockStatus] = OrderedDict()
+        # key -> None (only care about presence)
+        self.b1: OrderedDict[OffloadKey, None] = OrderedDict()
+        self.b2: OrderedDict[OffloadKey, None] = OrderedDict()
+
+    def get(self, key: OffloadKey) -> BlockStatus | None:
+        return self.t1.get(key) or self.t2.get(key)
+
+    def insert(self, key: OffloadKey, block: BlockStatus) -> None:
+        self.t1[key] = block
+        self.b1.pop(key, None)
+        self.b2.pop(key, None)
+
+    def remove(self, key: OffloadKey) -> None:
+        if self.t1.pop(key, None) is None:
+            self.t2.pop(key, None)
+
+    def touch(self, keys: Iterable[OffloadKey]) -> None:
+        for key in reversed(list(keys)):
+            if key in self.t1:
+                block = self.t1.pop(key)
                 if not block.is_ready:
                     # block was just prepared to be stored, not really touched
                     # twice — keep it in T1 and mark as most recently used
-                    self.t1[block_hash] = block
+                    self.t1[key] = block
                 else:
-                    self.t2[block_hash] = block
+                    self.t2[key] = block
 
-            elif block_hash in self.t2:
-                self.t2.move_to_end(block_hash)
+            elif key in self.t2:
+                self.t2.move_to_end(key)
 
-            elif block_hash in self.b1:
+            elif key in self.b1:
                 delta = max(1, len(self.b2) / len(self.b1))
                 self.target_t1_size = min(
                     self.target_t1_size + delta, self.cache_capacity
                 )
                 # move to MRU position (end) to keep it fresh in the ghost list
-                self.b1.move_to_end(block_hash)
+                self.b1.move_to_end(key)
 
-            elif block_hash in self.b2:
+            elif key in self.b2:
                 delta = max(1, len(self.b1) / len(self.b2))
                 self.target_t1_size = max(self.target_t1_size - delta, 0)
                 # move to MRU position (end) to keep it fresh in the ghost list
-                self.b2.move_to_end(block_hash)
+                self.b2.move_to_end(key)
+
+    def clear(self) -> None:
+        self.t1.clear()
+        self.t2.clear()
+        self.b1.clear()
+        self.b2.clear()
+        self.target_t1_size = 0.0
 
     def evict(
-        self, n: int, protected: set[BlockHash]
-    ) -> list[tuple[BlockHash, BlockStatus]] | None:
+        self, n: int, protected: set[OffloadKey]
+    ) -> list[tuple[OffloadKey, BlockStatus]] | None:
         if n == 0:
             return []
 
         # Collect candidates atomically: simulate T1 size changes as we select,
         # but do not modify actual data structures until all n are found.
         candidates: list[
-            tuple[BlockHash, BlockStatus, bool]
-        ] = []  # (hash, block, from_t1)
-        already_selected: set[BlockHash] = set()
+            tuple[OffloadKey, BlockStatus, bool]
+        ] = []  # (key, block, from_t1)
+        already_selected: set[OffloadKey] = set()
         virtual_t1_size = len(self.t1)
 
         for _ in range(n):
-            candidate: tuple[BlockHash, BlockStatus, bool] | None = None
+            candidate: tuple[OffloadKey, BlockStatus, bool] | None = None
 
             if virtual_t1_size >= int(self.target_t1_size):
-                for block_hash, block in self.t1.items():
+                for key, block in self.t1.items():
                     if (
                         block.ref_cnt == 0
-                        and block_hash not in protected
-                        and block_hash not in already_selected
+                        and key not in protected
+                        and key not in already_selected
                     ):
-                        candidate = (block_hash, block, True)
+                        candidate = (key, block, True)
                         virtual_t1_size -= 1
                         break
 
             if candidate is None:
-                for block_hash, block in self.t2.items():
+                for key, block in self.t2.items():
                     if (
                         block.ref_cnt == 0
-                        and block_hash not in protected
-                        and block_hash not in already_selected
+                        and key not in protected
+                        and key not in already_selected
                     ):
-                        candidate = (block_hash, block, False)
+                        candidate = (key, block, False)
                         break
                 if candidate is None:
                     return None
@@ -138,15 +145,15 @@ def evict(
             already_selected.add(candidate[0])
 
         # Apply all evictions now that we know n candidates exist.
-        result: list[tuple[BlockHash, BlockStatus]] = []
-        for block_hash, block, from_t1 in candidates:
+        result: list[tuple[OffloadKey, BlockStatus]] = []
+        for key, block, from_t1 in candidates:
             if from_t1:
-                del self.t1[block_hash]
-                self.b1[block_hash] = None
+                del self.t1[key]
+                self.b1[key] = None
             else:
-                del self.t2[block_hash]
-                self.b2[block_hash] = None
-            result.append((block_hash, block))
+                del self.t2[key]
+                self.b2[key] = None
+            result.append((key, block))
 
         # Trim ghost lists to cache_capacity.
         for ghost in (self.b1, self.b2):
diff --git a/vllm/v1/kv_offload/cpu/policies/abstract.py b/vllm/v1/kv_offload/cpu/policies/base.py
similarity index 77%
rename from vllm/v1/kv_offload/cpu/policies/abstract.py
rename to vllm/v1/kv_offload/cpu/policies/base.py
index b45bb34cbd2e..0febfe90d613 100644
--- a/vllm/v1/kv_offload/cpu/policies/abstract.py
+++ b/vllm/v1/kv_offload/cpu/policies/base.py
@@ -4,7 +4,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 
-from vllm.v1.core.kv_cache_utils import BlockHash
+from vllm.v1.kv_offload.base import OffloadKey
 
 
 class BlockStatus(ctypes.Structure):
@@ -45,32 +45,40 @@ class CachePolicy(ABC):
     def __init__(self, cache_capacity: int) -> None: ...
 
     @abstractmethod
-    def get(self, block_hash: BlockHash) -> BlockStatus | None:
+    def get(self, key: OffloadKey) -> BlockStatus | None:
         """Find block in data structures. Returns None if not present."""
 
     @abstractmethod
-    def insert(self, block_hash: BlockHash, block: BlockStatus) -> None:
+    def insert(self, key: OffloadKey, block: BlockStatus) -> None:
         """Add a newly allocated block. For ARC: also removes from ghost lists."""
 
     @abstractmethod
-    def remove(self, block_hash: BlockHash) -> None:
+    def remove(self, key: OffloadKey) -> None:
         """Remove a block (used to clean up after a failed store)."""
 
     @abstractmethod
-    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
+    def touch(self, keys: Iterable[OffloadKey]) -> None:
         """Mark blocks as recently used."""
 
     @abstractmethod
     def evict(
-        self, n: int, protected: set[BlockHash]
-    ) -> list[tuple[BlockHash, BlockStatus]] | None:
+        self, n: int, protected: set[OffloadKey]
+    ) -> list[tuple[OffloadKey, BlockStatus]] | None:
         """
         Evict exactly n blocks, skipping any in protected.
 
-        Returns a list of (block_hash, block) for the evicted blocks,
+        Returns a list of (key, block) for the evicted blocks,
         or None if n evictions cannot be satisfied. The operation is atomic:
         if None is returned, no state changes are made.
 
         For ARC: ghost list cleanup (trimming to cache_capacity) is performed
         at the end of a successful eviction.
         """
+
+    @abstractmethod
+    def clear(self) -> None:
+        """
+        Remove ALL blocks regardless of ref_cnt.
+
+        Ghost lists and adaptive state are also reset.
+        """
diff --git a/vllm/v1/kv_offload/cpu/policies/lru.py b/vllm/v1/kv_offload/cpu/policies/lru.py
index b29b81f3c82e..51680d8bcc5b 100644
--- a/vllm/v1/kv_offload/cpu/policies/lru.py
+++ b/vllm/v1/kv_offload/cpu/policies/lru.py
@@ -3,8 +3,8 @@
 from collections import OrderedDict
 from collections.abc import Iterable
 
-from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.cpu.policies.abstract import BlockStatus, CachePolicy
+from vllm.v1.kv_offload.base import OffloadKey
+from vllm.v1.kv_offload.cpu.policies.base import BlockStatus, CachePolicy
 
 
 class LRUCachePolicy(CachePolicy):
@@ -12,35 +12,38 @@ class LRUCachePolicy(CachePolicy):
 
     def __init__(self, cache_capacity: int):
         # cache_capacity unused by LRU but accepted for a uniform constructor
-        self.blocks: OrderedDict[BlockHash, BlockStatus] = OrderedDict()
+        self.blocks: OrderedDict[OffloadKey, BlockStatus] = OrderedDict()
 
-    def get(self, block_hash: BlockHash) -> BlockStatus | None:
-        return self.blocks.get(block_hash)
+    def get(self, key: OffloadKey) -> BlockStatus | None:
+        return self.blocks.get(key)
 
-    def insert(self, block_hash: BlockHash, block: BlockStatus) -> None:
-        self.blocks[block_hash] = block
+    def insert(self, key: OffloadKey, block: BlockStatus) -> None:
+        self.blocks[key] = block
 
-    def remove(self, block_hash: BlockHash) -> None:
-        del self.blocks[block_hash]
+    def remove(self, key: OffloadKey) -> None:
+        del self.blocks[key]
 
-    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
-        for block_hash in reversed(list(block_hashes)):
-            if block_hash in self.blocks:
-                self.blocks.move_to_end(block_hash)
+    def touch(self, keys: Iterable[OffloadKey]) -> None:
+        for key in reversed(list(keys)):
+            if key in self.blocks:
+                self.blocks.move_to_end(key)
+
+    def clear(self) -> None:
+        self.blocks.clear()
 
     def evict(
-        self, n: int, protected: set[BlockHash]
-    ) -> list[tuple[BlockHash, BlockStatus]] | None:
+        self, n: int, protected: set[OffloadKey]
+    ) -> list[tuple[OffloadKey, BlockStatus]] | None:
         if n == 0:
             return []
-        candidates: list[tuple[BlockHash, BlockStatus]] = []
-        for block_hash, block in self.blocks.items():
-            if block.ref_cnt == 0 and block_hash not in protected:
-                candidates.append((block_hash, block))
+        candidates: list[tuple[OffloadKey, BlockStatus]] = []
+        for key, block in self.blocks.items():
+            if block.ref_cnt == 0 and key not in protected:
+                candidates.append((key, block))
                 if len(candidates) == n:
                     break
         if len(candidates) < n:
             return None
-        for block_hash, _ in candidates:
-            del self.blocks[block_hash]
+        for key, _ in candidates:
+            del self.blocks[key]
         return candidates
diff --git a/vllm/v1/kv_offload/cpu/shared_offload_region.py b/vllm/v1/kv_offload/cpu/shared_offload_region.py
new file mode 100644
index 000000000000..1166b44fc7e3
--- /dev/null
+++ b/vllm/v1/kv_offload/cpu/shared_offload_region.py
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import mmap
+import os
+import time
+
+import torch
+
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def _wait_for_file_size(fd: int, expected_size: int, timeout: float = 30.0) -> None:
+    """Spin-wait until the file reaches expected_size (creator truncated it)."""
+    deadline = time.monotonic() + timeout
+    while True:
+        if os.fstat(fd).st_size >= expected_size:
+            return
+        if time.monotonic() > deadline:
+            raise TimeoutError(
+                f"Timed out waiting for mmap file to reach {expected_size} bytes"
+            )
+        time.sleep(0.005)
+
+
+class SharedOffloadRegion:
+    """
+    Single mmap-backed memory region shared across all workers for a
+    vLLM instance.  Workers coordinate via the filesystem: the first worker
+    to open the file with O_EXCL becomes the creator and calls ftruncate;
+    the rest open the existing file and wait until it reaches the expected
+    size.  Each worker then mmap()s the full file.
+
+    File path: /dev/shm/vllm_offload_{instance_id}.mmap
+    """
+
+    def __init__(
+        self,
+        instance_id: str,
+        total_size_bytes: int,
+        num_blocks: int,
+        rank: int | None,
+        num_workers: int,
+        cpu_page_size: int,
+    ) -> None:
+        self.page_size = mmap.PAGESIZE
+
+        self.total_size_bytes = total_size_bytes
+        self.mmap_path = f"/dev/shm/vllm_offload_{instance_id}.mmap"
+        self._creator = False  # set True only if this worker creates the file
+        self.num_blocks = num_blocks
+        self.rank = rank
+        # interleaved-layout stride: one row = all workers' data for one block
+        self._row_stride = cpu_page_size * num_workers
+        if rank is not None:
+            # byte offset to this worker's first slot within each block row
+            self._worker_offset = rank * cpu_page_size
+            # exclusive upper bound for this worker's area within each row
+            self._worker_area_end = (rank + 1) * cpu_page_size
+        try:
+            # Exclusive create — only one worker succeeds
+            self.fd: int | None = os.open(
+                self.mmap_path, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0o600
+            )
+            os.ftruncate(self.fd, self.total_size_bytes)
+            self._creator = True
+            logger.info(
+                "Created mmap file %s (%.2f GB)",
+                self.mmap_path,
+                self.total_size_bytes / 1e9,
+            )
+        except FileExistsError:
+            self.fd = os.open(self.mmap_path, os.O_RDWR)
+            _wait_for_file_size(self.fd, self.total_size_bytes)
+            logger.info("Opened existing mmap file %s", self.mmap_path)
+
+        self.mmap_obj: mmap.mmap | None = mmap.mmap(
+            self.fd,
+            self.total_size_bytes,
+            flags=mmap.MAP_SHARED,
+            prot=mmap.PROT_READ | mmap.PROT_WRITE,
+        )
+
+        # MADV_POPULATE_WRITE was added in Linux 5.14 (value 23).
+        _MADV_POPULATE_WRITE = getattr(mmap, "MADV_POPULATE_WRITE", 23)
+        if rank is not None:
+            # Populate only this worker's pages (one slot per block row).
+            worker_offset = rank * cpu_page_size
+            _t0 = time.perf_counter()
+            page_size = self.page_size
+            for block in range(num_blocks):
+                raw_offset = block * self._row_stride + worker_offset
+                aligned_offset = (raw_offset // page_size) * page_size
+                end = raw_offset + cpu_page_size
+                aligned_length = end - aligned_offset
+                self.mmap_obj.madvise(
+                    _MADV_POPULATE_WRITE, aligned_offset, aligned_length
+                )
+            logger.debug(
+                "MADV_POPULATE_WRITE loop: %d blocks in %.3f s",
+                num_blocks,
+                time.perf_counter() - _t0,
+            )
+        else:
+            # No rank — populate the entire shared region in one call.
+            _t0 = time.perf_counter()
+            self.mmap_obj.madvise(_MADV_POPULATE_WRITE, 0, self.total_size_bytes)
+            logger.debug(
+                "MADV_POPULATE_WRITE entire region: %.3f s", time.perf_counter() - _t0
+            )
+
+        self._base = torch.frombuffer(memoryview(self.mmap_obj), dtype=torch.int8)
+        self._views: list[torch.Tensor] = []
+        self.is_pinned: bool = False
+
+    def create_next_view(self, tensor_page_size: int) -> torch.Tensor:
+        """Allocate a strided int8 view for this worker, one canonical tensor.
+
+        Must be called once per canonical tensor. The full mmap layout is:
+
+            worker0_block0 | worker1_block0 | ... | worker{M-1}_block0
+            worker0_block1 | worker1_block1 | ... | worker{M-1}_block1
+            ...
+
+        Each worker_block cell is cpu_page_size bytes and holds all canonical
+        tensors for that worker and block concatenated:
+            [ tensor0_data | tensor1_data | ... | tensor{L-1}_data ]
+
+        Consecutive rows are separated by row_stride = cpu_page_size * M.
+
+        Returns an int8 tensor of shape (num_blocks, tensor_page_size) with stride
+        (row_stride, 1).  Using int8 keeps stride == bytes, so swap_blocks
+        address arithmetic works without any dtype conversion.
+
+        Args:
+            tensor_page_size: Bytes per block for this  tensor.
+        """
+        assert self.rank is not None
+        new_offset = self._worker_offset + tensor_page_size
+        assert new_offset <= self._worker_area_end, (
+            f"Worker offset {new_offset} exceeds worker area end "
+            f"{self._worker_area_end} (overflowed by "
+            f"{new_offset - self._worker_area_end} bytes)"
+        )
+        worker_layer_view = torch.as_strided(
+            self._base,
+            size=(self.num_blocks, tensor_page_size),
+            stride=(self._row_stride, 1),
+            storage_offset=self._worker_offset,
+        )
+        self._worker_offset = new_offset
+        self._views.append(worker_layer_view)
+        return worker_layer_view
+
+    def create_kv_memoryview(self) -> memoryview:
+        """Return a zero-copy memoryview over the entire KV buffer.
+
+        Shape: (num_blocks, row_stride_bytes). Secondary tiers address
+        block *b* as ``view[b]``.
+        """
+        kv_tensor = self._base.view(self.num_blocks, self._row_stride)
+        np_arr = kv_tensor.numpy()
+        assert np_arr.ctypes.data == self._base.data_ptr(), (
+            "view()/numpy() created a copy instead of sharing the mmap buffer; "
+            "secondary tiers require zero-copy access to primary KV data"
+        )
+        return memoryview(np_arr)
+
+    def cleanup(self) -> None:
+        if self.is_pinned and self._base is not None:
+            base_ptr = self._base.data_ptr()
+            result = torch.cuda.cudart().cudaHostUnregister(base_ptr)
+            if result.value != 0:
+                logger.warning(
+                    "cudaHostUnregister failed for rank=%d (code=%d)", self.rank, result
+                )
+            self.is_pinned = False
+        # Release views before _base: each view holds a _base reference and a
+        # direct StorageImpl reference.  Freeing views first lets both refcounts
+        # drop so the storage (which holds the mmap_obj buffer export) is freed
+        # before mmap_obj.close() is called below.
+        if self._views is not None:
+            self._views.clear()
+        self._base = None
+        if self.mmap_obj:
+            try:
+                self.mmap_obj.close()
+            except Exception:
+                logger.warning("Failed to close mmap_obj", exc_info=True)
+            self.mmap_obj = None
+        if self.fd is not None:
+            try:
+                os.close(self.fd)
+            except Exception:
+                logger.warning("Failed to close fd %s", self.fd, exc_info=True)
+            self.fd = None
+        if self._creator and getattr(self, "mmap_path", None):
+            try:
+                os.unlink(self.mmap_path)
+                logger.info("Removed mmap file %s", self.mmap_path)
+            except Exception:
+                logger.warning(
+                    "Failed to unlink path %s", self.mmap_path, exc_info=True
+                )
+            self._creator = False
diff --git a/vllm/v1/kv_offload/cpu/spec.py b/vllm/v1/kv_offload/cpu/spec.py
index 4feae8cf7d5a..6d17d5317f1c 100644
--- a/vllm/v1/kv_offload/cpu/spec.py
+++ b/vllm/v1/kv_offload/cpu/spec.py
@@ -5,12 +5,16 @@
 from vllm.config import VllmConfig
 from vllm.platforms import current_platform
 from vllm.v1.kv_cache_interface import KVCacheConfig
-from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
+from vllm.v1.kv_offload.base import (
+    CanonicalKVCaches,
+    GPULoadStoreSpec,
+    LoadStoreSpec,
+    OffloadingManager,
+    OffloadingSpec,
+)
+from vllm.v1.kv_offload.cpu.common import CPULoadStoreSpec
+from vllm.v1.kv_offload.cpu.gpu_worker import CpuGpuOffloadingHandlers
 from vllm.v1.kv_offload.cpu.manager import CPUOffloadingManager
-from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
-from vllm.v1.kv_offload.reuse_manager import FilterReusedOffloadingManager
-from vllm.v1.kv_offload.spec import CanonicalKVCaches, OffloadingSpec
-from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandlers
 from vllm.v1.kv_offload.worker.worker import OffloadingHandler
 
 
@@ -26,17 +30,13 @@ def __init__(self, vllm_config: VllmConfig, kv_cache_config: KVCacheConfig):
 
         # calculate kv_bytes_per_offloaded_block
         assert kv_cache_config is not None
-        page_sizes = {
-            kv_cache_group.kv_cache_spec.page_size_bytes
-            for kv_cache_group in kv_cache_config.kv_cache_groups
-        }
-        assert len(page_sizes) == 1
-        page_size_bytes = page_sizes.pop()
-        kv_bytes_per_block = (
-            page_size_bytes
-            * len(kv_cache_config.kv_cache_tensors)
-            * vllm_config.parallel_config.world_size
-        )
+        if kv_cache_config.num_blocks > 0:
+            total_gpu_kv_bytes = sum(t.size for t in kv_cache_config.kv_cache_tensors)
+            kv_bytes_per_block = (
+                total_gpu_kv_bytes // kv_cache_config.num_blocks
+            ) * vllm_config.parallel_config.world_size
+        else:
+            kv_bytes_per_block = 0
 
         kv_bytes_per_offloaded_block = kv_bytes_per_block * self.block_size_factor
         self.num_blocks = (
@@ -44,6 +44,10 @@ def __init__(self, vllm_config: VllmConfig, kv_cache_config: KVCacheConfig):
             if kv_bytes_per_offloaded_block > 0
             else 0
         )
+        world_size = vllm_config.parallel_config.world_size
+        self.cpu_page_size_per_worker: int = (
+            kv_bytes_per_offloaded_block // world_size if world_size > 0 else 0
+        )
 
         # scheduler-side
         self._manager: OffloadingManager | None = None
@@ -60,32 +64,30 @@ def get_manager(self) -> OffloadingManager:
                 kv_events_config is not None and kv_events_config.enable_kv_cache_events
             )
 
-            assert len(self.gpu_block_size) == 1
-            gpu_block_size = self.gpu_block_size[0]
-            offloaded_block_size = gpu_block_size * self.block_size_factor
+            # store_threshold: how many times a block must appear in lookup()
+            # before it is eligible for CPU offloading.  Values < 2 disable
+            # filtering (a threshold of 1 equals no filter; 0 is the default).
+            store_threshold = int(self.extra_config.get("store_threshold", 0))
+
+            # Maximum entries in the internal tracker's LRU table.
+            max_tracker_size = int(self.extra_config.get("max_tracker_size", 64_000))
 
             self._manager = CPUOffloadingManager(
-                block_size=offloaded_block_size,
                 num_blocks=self.num_blocks,
                 cache_policy=self.eviction_policy,  # type: ignore[arg-type]
                 enable_events=enable_events,
+                store_threshold=store_threshold,
+                max_tracker_size=max_tracker_size,
             )
-
-            # store_threshold: how many times a block must appear in lookup()
-            # before it is eligible for CPU offloading.  Values < 2 disable
-            # filtering (a threshold of 1 equals no filter; 0 is the default).
-            store_threshold = int(self.extra_config.get("store_threshold", 0))
-            if store_threshold >= 2:
-                max_tracker_size = int(
-                    self.extra_config.get("max_tracker_size", 64_000)
-                )
-                self._manager = FilterReusedOffloadingManager(
-                    backing=self._manager,
-                    store_threshold=store_threshold,
-                    max_tracker_size=max_tracker_size,
-                )
         return self._manager
 
+    def create_handlers(self, kv_caches: CanonicalKVCaches) -> CpuGpuOffloadingHandlers:
+        return CpuGpuOffloadingHandlers(
+            kv_caches=kv_caches,
+            block_size_factor=self.block_size_factor,
+            num_cpu_blocks=self.num_blocks,
+        )
+
     def get_handlers(
         self, kv_caches: CanonicalKVCaches
     ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
@@ -94,12 +96,7 @@ def get_handlers(
                 raise Exception(
                     "CPU Offloading is currently only supported on CUDA-alike GPUs"
                 )
-
-            self._handlers = CpuGpuOffloadingHandlers(
-                kv_caches=kv_caches,
-                block_size_factor=self.block_size_factor,
-                num_cpu_blocks=self.num_blocks,
-            )
+            self._handlers = self.create_handlers(kv_caches)
 
         assert self._handlers is not None
         yield GPULoadStoreSpec, CPULoadStoreSpec, self._handlers.gpu_to_cpu_handler
diff --git a/vllm/v1/kv_offload/factory.py b/vllm/v1/kv_offload/factory.py
index ecbaebb0d967..8b967f771b04 100644
--- a/vllm/v1/kv_offload/factory.py
+++ b/vllm/v1/kv_offload/factory.py
@@ -5,7 +5,7 @@
 from typing import TYPE_CHECKING
 
 from vllm.logger import init_logger
-from vllm.v1.kv_offload.spec import OffloadingSpec
+from vllm.v1.kv_offload.base import OffloadingSpec
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
@@ -56,3 +56,8 @@ def create_spec(
 OffloadingSpecFactory.register_spec(
     "CPUOffloadingSpec", "vllm.v1.kv_offload.cpu.spec", "CPUOffloadingSpec"
 )
+OffloadingSpecFactory.register_spec(
+    "TieringOffloadingSpec",
+    "vllm.v1.kv_offload.tiering.spec",
+    "TieringOffloadingSpec",
+)
diff --git a/vllm/v1/kv_offload/file_mapper.py b/vllm/v1/kv_offload/file_mapper.py
new file mode 100644
index 000000000000..7184a5d1ce13
--- /dev/null
+++ b/vllm/v1/kv_offload/file_mapper.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import hashlib
+import json
+
+from vllm.v1.kv_offload.base import (
+    OffloadingSpec,
+    OffloadKey,
+    get_offload_block_hash,
+    get_offload_group_idx,
+)
+
+_BASE_PATH_HASH_LEN = 12
+_CONFIG_FILENAME = "config.json"
+
+
+class FileMapper:
+    """
+    FileMapper maps KV blocks (given by their hash) to file names.
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        model_name: str,
+        hash_block_size: int,
+        gpu_blocks_per_file: int,
+        tp_size: int,
+        pp_size: int,
+        pcp_size: int,
+        dcp_size: int,
+        rank: int,
+        dtype: str,
+        kv_cache_groups: list[dict] | None = None,
+        inference_engine: str = "vllm",
+        parallel_agnostic: bool = False,
+    ):
+        """
+        Initialize the file mapper. Each worker constructs its own, but
+        `config.json` is shared across workers since rank lives outside the hash.
+        When `parallel_agnostic=True`, tp/pp/pcp/dcp are forced to 1 and rank
+        to 0 so multiple parallelism layouts collapse into the same folder.
+        """
+        if parallel_agnostic:
+            tp_size = pp_size = pcp_size = dcp_size = 1
+            rank = 0
+        self.rank: int = rank
+        self.fields: dict = {
+            "model_name": model_name,
+            "hash_block_size": hash_block_size,
+            "gpu_blocks_per_file": gpu_blocks_per_file,
+            "tp_size": tp_size,
+            "pp_size": pp_size,
+            "pcp_size": pcp_size,
+            "dcp_size": dcp_size,
+            "dtype": str(dtype),
+            "kv_cache_groups": kv_cache_groups or [],
+            "inference_engine": inference_engine,
+        }
+        self.base_path: str = self._compute_base_path(root_dir, self.fields)
+
+    @classmethod
+    def from_offloading_spec(
+        cls,
+        root_dir: str,
+        offloading_spec: OffloadingSpec,
+        gpu_blocks_per_file: int = 1,
+        parallel_agnostic: bool = False,
+    ) -> "FileMapper":
+        """Build a FileMapper from an OffloadingSpec."""
+        vllm_config = offloading_spec.vllm_config
+        kv_cache_config = offloading_spec.kv_cache_config
+
+        parallel_config = vllm_config.parallel_config
+        dtype = str(vllm_config.cache_config.cache_dtype).replace("torch.", "")
+        kv_cache_groups = [
+            {
+                "block_size": group.kv_cache_spec.block_size,
+                "layer_names": list(group.layer_names),
+            }
+            for group in kv_cache_config.kv_cache_groups
+        ]
+        return cls(
+            root_dir=root_dir,
+            model_name=vllm_config.model_config.model,
+            hash_block_size=vllm_config.cache_config.block_size,
+            gpu_blocks_per_file=gpu_blocks_per_file,
+            tp_size=parallel_config.tensor_parallel_size,
+            pp_size=parallel_config.pipeline_parallel_size,
+            pcp_size=parallel_config.prefill_context_parallel_size,
+            dcp_size=parallel_config.decode_context_parallel_size,
+            rank=parallel_config.rank,
+            dtype=dtype,
+            kv_cache_groups=kv_cache_groups,
+            parallel_agnostic=parallel_agnostic,
+        )
+
+    def get_file_name(self, key: OffloadKey) -> str:
+        """Map an OffloadKey to <base>_r<rank>/<hhh>/<hh>_g<group_idx>/<hash>.bin."""
+        hash_hex = get_offload_block_hash(key).hex()
+        group_idx = get_offload_group_idx(key)
+        subfolder1, subfolder2 = hash_hex[:3], hash_hex[3:5]
+        return (
+            f"{self.base_path}_r{self.rank}"
+            f"/{subfolder1}/{subfolder2}_g{group_idx}/{hash_hex}.bin"
+        )
+
+    def get_run_config(self) -> dict:
+        return dict(self.fields)
+
+    def get_config_file_path(self) -> str:
+        return f"{self.base_path}/{_CONFIG_FILENAME}"
+
+    @staticmethod
+    def _compute_base_path(root_dir: str, fields: dict) -> str:
+        """
+        Layout: <root_dir>/<safe_model_name>_<sha256-prefix>/.
+        safe_model_name replaces '/' with '_' so HuggingFace IDs don't nest.
+        """
+        canonical = json.dumps(fields, sort_keys=True, separators=(",", ":"))
+        digest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()[
+            :_BASE_PATH_HASH_LEN
+        ]
+        safe_model_name = fields["model_name"].replace("/", "_")
+        return f"{root_dir}/{safe_model_name}_{digest}"
diff --git a/vllm/v1/kv_offload/mediums.py b/vllm/v1/kv_offload/mediums.py
deleted file mode 100644
index 85ef2a95a6bd..000000000000
--- a/vllm/v1/kv_offload/mediums.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from abc import ABC
-from collections.abc import Sequence
-
-import numpy as np
-
-from vllm.v1.kv_offload.abstract import LoadStoreSpec
-
-
-class BlockIDsLoadStoreSpec(LoadStoreSpec, ABC):
-    """
-    Spec for loading/storing KV blocks from given block numbers.
-    """
-
-    def __init__(self, block_ids: list[int]):
-        self.block_ids = np.array(block_ids, dtype=np.int64)
-
-    def __repr__(self) -> str:
-        return repr(self.block_ids)
-
-
-class GPULoadStoreSpec(BlockIDsLoadStoreSpec):
-    """
-    Spec for loading/storing a KV block to GPU memory.
-
-    If there are multiple KV groups, the blocks are expected to be
-    ordered by the group index.
-    In that case, group_sizes[i] determines the number of blocks
-    per the i-th KV group, and thus sum(group_sizes) == len(block_ids).
-    group_sizes=None indicates a single KV group.
-
-    If block_indices is given, each group (determined by group_sizes) of block IDs
-    will correspond to logically contiguous blocks, e.g. blocks 5-10 of a some request.
-    block_indices[i] will represent the block index of the first block in group #i.
-    Thus, len(block_indices) == len(group_sizes) = number of KV cache groups.
-    This information is required in order to support loading from offloaded blocks
-    which are larger than GPU blocks.
-    In such cases, the first GPU block per each group may be unaligned to the offloaded
-    block size, and so knowing block_indices[i] allows the worker to correctly
-    skip part of the first matching offloaded block.
-    Offloading from GPU is always aligned to offloaded block size, and so
-    block_indices will only be set by the offloading connector when loading into GPU.
-    """
-
-    def __init__(
-        self,
-        block_ids: list[int],
-        group_sizes: Sequence[int],
-        block_indices: Sequence[int] | None = None,
-    ):
-        super().__init__(block_ids)
-        assert sum(group_sizes) == len(block_ids)
-        assert block_indices is None or len(block_indices) == len(group_sizes)
-        self.group_sizes: Sequence[int] = group_sizes
-        self.block_indices: Sequence[int] | None = block_indices
-
-    @staticmethod
-    def medium() -> str:
-        return "GPU"
-
-
-class CPULoadStoreSpec(BlockIDsLoadStoreSpec):
-    """
-    Spec for loading/storing a KV block to CPU memory.
-    """
-
-    @staticmethod
-    def medium() -> str:
-        return "CPU"
diff --git a/vllm/v1/kv_offload/reuse_manager.py b/vllm/v1/kv_offload/reuse_manager.py
deleted file mode 100644
index daf6c65cd2d7..000000000000
--- a/vllm/v1/kv_offload/reuse_manager.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Reuse-frequency gating for CPU KV-cache offload stores.
-
-FilterReusedOffloadingManager — OffloadingManager decorator that skips
-    storing blocks that have not yet been seen enough times.
-"""
-
-from collections import OrderedDict
-from collections.abc import Iterable
-
-from vllm.v1.core.kv_cache_utils import BlockHash
-from vllm.v1.kv_offload.abstract import (
-    LoadStoreSpec,
-    OffloadingEvent,
-    OffloadingManager,
-    PrepareStoreOutput,
-)
-
-
-class FilterReusedOffloadingManager(OffloadingManager):
-    """An :class:`OffloadingManager` decorator that skips storing blocks
-    whose reuse frequency is below *store_threshold*.
-
-    All methods are delegated to the *backing* manager.  Two methods are
-    intercepted:
-
-    * ``lookup`` — records each visited block hash in an internal LRU counter.
-    * ``prepare_store`` — filters out block hashes that have not yet
-      crossed the threshold *before* calling the backing
-      ``prepare_store``.
-
-    Args:
-        backing: The underlying ``OffloadingManager`` to delegate to.
-        store_threshold: A block must be seen at least this many times in
-            ``lookup()`` before it is eligible for offloading.  Must be >= 2
-            (a value of 1 would be equivalent to no filtering).
-        max_tracker_size: Maximum entries in the internal tracker's LRU table.
-    """
-
-    def __init__(
-        self,
-        backing: OffloadingManager,
-        store_threshold: int = 2,
-        max_tracker_size: int = 64_000,
-    ):
-        if store_threshold < 2:
-            raise ValueError(
-                "FilterReusedOffloadingManager store_threshold must be >= 2, "
-                f"got {store_threshold}"
-            )
-        if max_tracker_size < 1:
-            raise ValueError(
-                "FilterReusedOffloadingManager max_tracker_size must be >= 1, "
-                f"got {max_tracker_size}"
-            )
-        self._backing = backing
-        self.store_threshold = store_threshold
-        self.max_tracker_size = max_tracker_size
-        # Ordered so we can evict the LRU entry in O(1).
-        self.counts: OrderedDict[BlockHash, int] = OrderedDict()
-
-    # ------------------------------------------------------------------
-    # Intercepted methods
-    # ------------------------------------------------------------------
-
-    def lookup(self, block_hashes: Iterable[BlockHash]) -> int | None:
-        """Record each hash, then delegate lookup to backing manager."""
-        block_hashes = list(block_hashes)
-        for block_hash in block_hashes:
-            if block_hash in self.counts:
-                self.counts.move_to_end(block_hash)
-                self.counts[block_hash] += 1
-            else:
-                if len(self.counts) >= self.max_tracker_size:
-                    self.counts.popitem(last=False)  # evict LRU
-                self.counts[block_hash] = 1
-        return self._backing.lookup(block_hashes)
-
-    def prepare_store(
-        self, block_hashes: Iterable[BlockHash]
-    ) -> PrepareStoreOutput | None:
-        """Filter out blocks below threshold, then delegate to backing.
-
-        Filtering is evaluated *before* calling the backing manager's
-        ``prepare_store`` so that blocks that would be skipped do not
-        consume any CPU offload capacity.
-        """
-        block_hashes = list(block_hashes)
-        eligible = [
-            bh for bh in block_hashes if self.counts.get(bh, 0) >= self.store_threshold
-        ]
-
-        # Delegate to the backing manager with only the eligible hashes.
-        # Passing an empty list is intentional and safe — both
-        # LRUOffloadingManager and ARCOffloadingManager handle it correctly,
-        # returning a PrepareStoreOutput with empty lists.
-        return self._backing.prepare_store(eligible)
-
-    # ------------------------------------------------------------------
-    # Delegated methods
-    # ------------------------------------------------------------------
-
-    def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec:
-        return self._backing.prepare_load(block_hashes)
-
-    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
-        return self._backing.touch(block_hashes)
-
-    def complete_load(self, block_hashes: Iterable[BlockHash]) -> None:
-        return self._backing.complete_load(block_hashes)
-
-    def complete_store(
-        self, block_hashes: Iterable[BlockHash], success: bool = True
-    ) -> None:
-        return self._backing.complete_store(block_hashes, success)
-
-    def take_events(self) -> Iterable[OffloadingEvent]:
-        return self._backing.take_events()
diff --git a/vllm/v1/kv_offload/spec.py b/vllm/v1/kv_offload/spec.py
deleted file mode 100644
index 1eb4fdb3e6ce..000000000000
--- a/vllm/v1/kv_offload/spec.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from abc import ABC, abstractmethod
-from collections.abc import Iterator
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-
-import torch
-
-from vllm.logger import init_logger
-from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
-from vllm.v1.kv_offload.worker.worker import OffloadingHandler
-
-if TYPE_CHECKING:
-    from vllm.config import VllmConfig
-    from vllm.v1.kv_cache_interface import KVCacheConfig
-
-logger = init_logger(__name__)
-
-
-@dataclass
-class CanonicalKVCacheTensor:
-    """
-    A canonicalized KV cache tensor whose first dimension is num_blocks.
-
-    For attention backends where the raw tensor has num_blocks at a
-    non-leading physical dimension (e.g. FlashAttention's
-    (2, num_blocks, ...) layout), the tensor is split so that each
-    resulting CanonicalKVCacheTensor starts with (num_blocks, ...).
-    """
-
-    # The KV cache tensor with shape (num_blocks, ...)
-    tensor: torch.Tensor
-    # The (possibly padded) page size per block in bytes
-    page_size_bytes: int
-
-
-@dataclass
-class CanonicalKVCacheRef:
-    """
-    Per-layer (or group of layers) reference to a specific (by index)
-    CanonicalKVCacheTensor and records the un-padded page size used by that layer.
-    """
-
-    # Index into the list of CanonicalKVCacheTensor objects
-    tensor_idx: int
-    # The un-padded page size per block in bytes
-    page_size_bytes: int
-
-
-@dataclass
-class CanonicalKVCaches:
-    """
-    Canonicalized block-level representation of the KV caches.
-
-    Composed of:
-        - Unique list of KV cache data tensors,
-          each with shape (num_blocks, page_size_in_bytes) and int8 dtype.
-        - Per-group data references of the tensors.
-          i.e. how each KV cache group maps to the tensors.
-    """
-
-    # Ordered list of unique block tensors, each with shape
-    # (num_blocks, ...).
-    tensors: list[CanonicalKVCacheTensor]
-    # Per-KV-cache-group list of data references that map each layer
-    # in the group to the appropriate entry in the tensors list.
-    group_data_refs: list[list[CanonicalKVCacheRef]]
-
-
-class OffloadingSpec(ABC):
-    """Spec for an offloading connector"""
-
-    def __init__(self, vllm_config: "VllmConfig", kv_cache_config: "KVCacheConfig"):
-        logger.warning(
-            "Initializing OffloadingSpec. This API is experimental and "
-            "subject to change in the future as we iterate the design."
-        )
-        self.vllm_config = vllm_config
-        self.kv_cache_config = kv_cache_config
-
-        kv_transfer_config = vllm_config.kv_transfer_config
-        assert kv_transfer_config is not None
-        self.extra_config = kv_transfer_config.kv_connector_extra_config
-
-        # block size used by vLLM for hashing request tokens for the sake
-        # of enabling prefix caching
-        self.hash_block_size = vllm_config.cache_config.block_size
-        # gpu block size per group
-        self.gpu_block_size: tuple[int, ...] = tuple(
-            kv_cache_group.kv_cache_spec.block_size
-            for kv_cache_group in kv_cache_config.kv_cache_groups
-        )
-
-        for block_size in self.gpu_block_size:
-            assert block_size % self.hash_block_size == 0
-
-        # offloaded_block_size / gpu_block_size
-        self.block_size_factor: int = 1
-
-        offloaded_block_size = self.extra_config.get("block_size")
-        if offloaded_block_size is not None:
-            offloaded_block_size_int = int(offloaded_block_size)
-            gpu_block_sizes = set(self.gpu_block_size)
-            assert len(gpu_block_sizes) == 1, (
-                "If 'block_size' is specified in kv_connector_extra_config, "
-                "there must be at least one KV cache group, "
-                "and all groups must have the same block size."
-            )
-            gpu_block_size = gpu_block_sizes.pop()
-
-            assert offloaded_block_size_int % gpu_block_size == 0
-            self.block_size_factor = offloaded_block_size_int // gpu_block_size
-
-    @abstractmethod
-    def get_manager(self) -> OffloadingManager:
-        """
-        Get an OffloadingManager that will be used
-        by the scheduler-side offloading connector to track
-        offloaded blocks and manage evictions.
-        """
-        pass
-
-    @abstractmethod
-    def get_handlers(
-        self, kv_caches: CanonicalKVCaches
-    ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
-        """
-        Get offloading handlers along with their respective src and dst types.
-
-        Args:
-            kv_caches: Canonicalized KV caches.
-
-        Yields:
-            Tuples of (src_type, dst_type, offloading_handler).
-        """
-        pass
diff --git a/vllm/v1/kv_offload/tiering/__init__.py b/vllm/v1/kv_offload/tiering/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/v1/kv_offload/tiering/base.py b/vllm/v1/kv_offload/tiering/base.py
new file mode 100644
index 000000000000..8014ac9b0ce8
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/base.py
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Abstract interfaces and data types for the secondary tiering layer.
+"""
+
+from abc import ABC, abstractmethod
+from collections.abc import Collection, Iterable
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from vllm.v1.kv_offload.base import OffloadKey, ReqContext
+
+if TYPE_CHECKING:
+    from vllm.v1.kv_offload.base import OffloadingSpec
+
+# Type alias for job IDs used in async transfer tracking
+JobId = int
+
+
+@dataclass
+class JobMetadata:
+    """Metadata for an in-flight async transfer job."""
+
+    job_id: JobId
+    keys: Collection[OffloadKey]
+    block_ids: np.ndarray
+    is_promotion: bool
+    req_context: ReqContext
+
+
+@dataclass
+class JobResult:
+    """Result of an async transfer job (successful or failed)."""
+
+    job_id: JobId
+    success: bool
+
+
+class SecondaryTierManager(ABC):
+    """
+    Abstract interface for managing a single non-primary offloading tier.
+
+    Secondary tiers cannot directly access GPU memory. All data transfers
+    must go through the CPU (primary) tier:
+      - Store: GPU → CPU (primary) → secondary  (cascade)
+      - Load:  secondary → CPU (primary) → GPU  (promotion)
+
+    IMPORTANT: All methods run in the Scheduler process and must be
+    lightweight and non-blocking. submit_load() and submit_store() submit
+    async jobs; get_finished() polls for completion.
+    """
+
+    def __init__(
+        self,
+        offloading_spec: "OffloadingSpec",
+        primary_kv_view: memoryview,
+        tier_type: str,
+    ) -> None:
+        """
+        Args:
+            offloading_spec: Offloading configuration.
+            primary_kv_view: Memoryview of the primary tier's CPU KV cache.
+            tier_type: Tier type identifier, set by SecondaryTierFactory
+                from the registered tier type.
+        """
+        self._offloading_spec = offloading_spec
+        self._primary_kv_view: memoryview = primary_kv_view
+        self.tier_type = tier_type
+
+    @abstractmethod
+    def lookup(self, key: OffloadKey, req_context: ReqContext) -> bool | None:
+        """
+        Check whether a block exists in this secondary tier.
+
+        Args:
+            key: Offload key to look up.
+            req_context: per-request context (e.g. kv_transfer_params).
+
+        Returns:
+            True if the block is present and ready,
+            False if not found,
+            or None if the block is being transferred (retry later).
+        """
+        pass
+
+    @abstractmethod
+    def submit_store(self, job_metadata: JobMetadata) -> None:
+        """
+        Submit an async job to store blocks from the primary tier to this
+        secondary tier.
+
+        This method must be lightweight and non-blocking: allocate metadata
+        and submit the transfer, but do NOT perform the data copy on the
+        calling thread.
+
+        Preconditions (guaranteed by the framework):
+          - ``job_metadata.block_ids`` are valid primary-tier slots, pinned
+            (ref-counted) for the duration of the transfer.
+
+        The implementation is responsible for:
+          1. Filtering out blocks already present in this tier
+          2. Evicting blocks if capacity is needed
+          3. Allocating space in this tier
+          4. Submitting the async transfer (read from primary via block_ids)
+
+        Report completion via ``get_finished()``.
+
+        Args:
+            job_metadata: Job metadata including job_id, keys, and block_ids
+                          identifying the primary-tier slots to read from.
+        """
+        pass
+
+    @abstractmethod
+    def submit_load(self, job_metadata: JobMetadata) -> None:
+        """
+        Submit an async job to load blocks from this secondary tier to the
+        primary tier.
+
+        This method must be lightweight and non-blocking: mark blocks as
+        in-flight and submit the transfer, but do NOT perform the data copy
+        on the calling thread.
+
+        Preconditions (guaranteed by the framework):
+          - ``job_metadata.block_ids`` are allocated primary-tier slots
+            ready to receive data.
+
+        The implementation must copy data from this tier into the
+        primary-tier slots identified by ``block_ids``.
+
+        Report completion via ``get_finished()``.
+
+        Args:
+            job_metadata: Job metadata including job_id, keys, and block_ids
+                          identifying the primary-tier slots to write into.
+        """
+        pass
+
+    @abstractmethod
+    def get_finished(self) -> Iterable[JobResult]:
+        """
+        Return all jobs (loads and stores) that completed since the last call.
+
+        The framework uses these results to release resources and finalize
+        transfers.
+
+        Returns:
+            Iterable of JobResult objects for jobs finished since the
+            last call.
+        """
+        pass
+
+    def touch(self, keys: Collection[OffloadKey], req_context: ReqContext):
+        """
+        Mark blocks as recently used for eviction policy.
+
+        Args:
+            keys: Offload keys to mark as recently used.
+            req_context: Per-request context.
+        """
+        return
+
+    def shutdown(self) -> None:
+        """Release resources held by this tier (threads, connections, etc.)."""
+        return
diff --git a/vllm/v1/kv_offload/tiering/example/__init__.py b/vllm/v1/kv_offload/tiering/example/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/v1/kv_offload/tiering/example/manager.py b/vllm/v1/kv_offload/tiering/example/manager.py
new file mode 100644
index 000000000000..65d519e46bb7
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/example/manager.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+ExampleSecondaryTierManager: A simple in-memory secondary tier.
+
+This implementation provides a minimal secondary tier that stores blocks
+in memory (using a dictionary) with immediate completion. It serves as a
+reference for writing new tiers and is useful for testing the
+TieringOffloadingManager without requiring actual storage or network backends.
+"""
+
+import logging
+from collections.abc import Iterable
+from typing import TYPE_CHECKING
+
+from vllm.v1.kv_offload.base import OffloadKey, ReqContext
+from vllm.v1.kv_offload.tiering.base import (
+    JobMetadata,
+    JobResult,
+    SecondaryTierManager,
+)
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from vllm.v1.kv_offload.base import OffloadingSpec
+
+
+class ExampleSecondaryTierManager(SecondaryTierManager):
+    """
+    A simple in-memory secondary tier.
+
+    This implementation:
+    - Stores blocks in a dictionary (key -> True)
+    - Completes transfers immediately (synchronous)
+    """
+
+    def __init__(
+        self,
+        offloading_spec: "OffloadingSpec",
+        primary_kv_view: memoryview,
+        tier_type: str,
+        custom_param: int = 0,
+    ):
+        """
+        Initialize the example secondary tier.
+
+        Args:
+            custom_param: Dummy parameter demonstrating custom args.
+        """
+        super().__init__(
+            offloading_spec=offloading_spec,
+            primary_kv_view=primary_kv_view,
+            tier_type=tier_type,
+        )
+
+        logger.info(
+            "ExampleSecondaryTierManager initialized with custom_param=%d", custom_param
+        )
+
+        # key -> True (only care about presence)
+        self.blocks: dict[OffloadKey, bool] = {}
+
+        # Completed jobs waiting to be retrieved by get_finished()
+        self.completed_jobs: list[JobResult] = []
+
+    def lookup(self, key: OffloadKey, req_context: ReqContext) -> bool | None:
+        """
+        Check whether a block exists in this secondary tier.
+
+        Args:
+            key: Offload key to look up.
+            req_context: Per-request context.
+
+        Returns:
+            True if the block is present, False if not found.
+        """
+        return key in self.blocks
+
+    def submit_store(self, job_metadata: JobMetadata) -> None:
+        """
+        Submit a job to store blocks from primary tier to this tier.
+
+        Args:
+            job_metadata: Job metadata including job_id, keys, and
+                          spec for reading blocks from the primary tier.
+        """
+        keys = job_metadata.keys
+        block_ids = job_metadata.block_ids
+
+        assert len(keys) == len(block_ids), (
+            f"Length mismatch: {len(keys)} keys but {len(block_ids)} block_ids"
+        )
+
+        for key in keys:
+            self.blocks[key] = True
+        self.completed_jobs.append(JobResult(job_id=job_metadata.job_id, success=True))
+
+    def submit_load(self, job_metadata: JobMetadata) -> None:
+        """
+        Submit a job to load blocks from this tier to primary tier.
+
+        Args:
+            job_metadata: Job metadata including job_id, keys, and
+                          spec for writing blocks into the primary tier.
+        """
+        keys = job_metadata.keys
+        block_ids = job_metadata.block_ids
+
+        assert len(keys) == len(block_ids), (
+            f"Length mismatch: {len(keys)} keys but {len(block_ids)} block_ids"
+        )
+
+        for key in keys:
+            if key not in self.blocks:
+                self.completed_jobs.append(
+                    JobResult(job_id=job_metadata.job_id, success=False)
+                )
+                return
+
+        self.completed_jobs.append(JobResult(job_id=job_metadata.job_id, success=True))
+
+    def get_finished(self) -> Iterable[JobResult]:
+        """
+        Poll for finished jobs.
+
+        Returns:
+            Iterable of JobResult objects for all jobs that have
+            finished since the last call.
+        """
+        result = self.completed_jobs
+        self.completed_jobs = []
+        return result
+
+    def get_num_blocks(self) -> int:
+        """Get the number of blocks currently stored in this tier."""
+        return len(self.blocks)
diff --git a/vllm/v1/kv_offload/tiering/factory.py b/vllm/v1/kv_offload/tiering/factory.py
new file mode 100644
index 000000000000..bc5f1f6ad6c1
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/factory.py
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import importlib
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+
+from vllm.v1.kv_offload.tiering.base import SecondaryTierManager
+
+if TYPE_CHECKING:
+    from vllm.v1.kv_offload.base import OffloadingSpec
+
+
+class SecondaryTierFactory:
+    _registry: dict[str, Callable[[], type[SecondaryTierManager]]] = {}
+
+    @classmethod
+    def register_tier(cls, tier_type: str, module_path: str, class_name: str) -> None:
+        if tier_type in cls._registry:
+            raise ValueError(f"Tier '{tier_type}' is already registered.")
+
+        def loader() -> type[SecondaryTierManager]:
+            module = importlib.import_module(module_path)
+            return getattr(module, class_name)
+
+        cls._registry[tier_type] = loader
+
+    @classmethod
+    def create_secondary_tier(
+        cls,
+        tier_config: dict,
+        primary_kv_view: memoryview,
+        offloading_spec: "OffloadingSpec",
+    ) -> SecondaryTierManager:
+        config = tier_config.copy()
+
+        tier_type = config.pop("type", None)
+        if not tier_type:
+            raise ValueError("Secondary tier configuration must include 'type'")
+
+        if tier_type not in cls._registry:
+            raise ValueError(
+                f"Unknown secondary tier type: {tier_type!r}. "
+                f"Supported types: {list(cls._registry)}"
+            )
+
+        tier_cls = cls._registry[tier_type]()
+        return tier_cls(
+            offloading_spec=offloading_spec,
+            primary_kv_view=primary_kv_view,
+            tier_type=tier_type,
+            **config,
+        )
+
+
+SecondaryTierFactory.register_tier(
+    "example",
+    "vllm.v1.kv_offload.tiering.example.manager",
+    "ExampleSecondaryTierManager",
+)
+
+SecondaryTierFactory.register_tier(
+    "fs_python",
+    "vllm.v1.kv_offload.tiering.fs.manager",
+    "FileSystemTierManager",
+)
diff --git a/vllm/v1/kv_offload/tiering/fs/__init__.py b/vllm/v1/kv_offload/tiering/fs/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/v1/kv_offload/tiering/fs/io.py b/vllm/v1/kv_offload/tiering/fs/io.py
new file mode 100644
index 000000000000..c5a82a73c674
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/fs/io.py
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import logging
+import os
+import random
+import threading
+
+logger = logging.getLogger(__name__)
+
+# O_DIRECT is Linux-specific and not available on macOS
+O_DIRECT = getattr(os, "O_DIRECT", 0)
+
+# Thread-local storage for unique temporary file suffixes
+_thread_local = threading.local()
+
+
+def _get_tmp_suffix() -> str:
+    """Generate a thread-local unique suffix for temporary files."""
+    try:
+        return _thread_local.tmp_suffix
+    except AttributeError:
+        _thread_local.tmp_suffix = f"_{random.randint(0, 2**63 - 1)}.tmp"
+        return _thread_local.tmp_suffix
+
+
+def _ensure_dirs(path: str) -> None:
+    """Create parent directories of *path* if they don't exist."""
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+
+
+def store_block(
+    dest_path: str,
+    buffer: memoryview,
+    offset: int,
+    block_size: int,
+) -> None:
+    """
+    Store callback: Writes to a temp file then atomically replaces the destination.
+    """
+    # Check if block already exists to avoid redundant writes
+    if os.path.exists(dest_path):
+        return
+
+    tmp_path = dest_path + _get_tmp_suffix()
+    # Ensure parent directories exist
+    _ensure_dirs(dest_path)
+
+    # Write block atomically. Cast to a flat byte view so the slice uses byte
+    # indices; the raw memoryview may be multi-dimensional with itemsize > 1.
+    view_slice = buffer.cast("B")[offset : offset + block_size]
+    try:
+        fd = os.open(
+            tmp_path,
+            os.O_CREAT | os.O_EXCL | os.O_WRONLY | os.O_TRUNC | O_DIRECT,
+            0o644,
+        )
+        try:
+            written = os.write(fd, view_slice)
+            if written < len(view_slice):
+                raise OSError(
+                    f"Short write: expected {len(view_slice)} bytes, wrote {written}"
+                )
+        finally:
+            os.close(fd)
+        os.replace(tmp_path, dest_path)
+    except Exception:
+        try:
+            os.remove(tmp_path)
+        except OSError as cleanup_exc:
+            logger.warning("Failed to remove temp file %s: %s", tmp_path, cleanup_exc)
+        raise
+
+
+def load_block(
+    source_path: str,
+    view: memoryview,
+    offset: int,
+    block_size: int,
+) -> None:
+    """
+    Load callback: read one KV block from disk. Remove the file on failure.
+    """
+    fd: int | None = None
+    view_slice = view.cast("B")[offset : offset + block_size]
+    try:
+        fd = os.open(source_path, os.O_RDONLY | O_DIRECT)
+        bytes_read = os.readv(fd, [view_slice])
+        if bytes_read < block_size:
+            raise OSError(f"Short read: expected {block_size} bytes, read {bytes_read}")
+    except Exception:
+        try:
+            os.remove(source_path)
+        except OSError as cleanup_exc:
+            logger.warning(
+                "Failed to remove unreadable file %s: %s", source_path, cleanup_exc
+            )
+        raise
+    finally:
+        if fd is not None:
+            os.close(fd)
diff --git a/vllm/v1/kv_offload/tiering/fs/manager.py b/vllm/v1/kv_offload/tiering/fs/manager.py
new file mode 100644
index 000000000000..25318b760d94
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/fs/manager.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+FileSystemTierManager: Pure-Python file system secondary tier for KV cache offloading.
+
+Store path:
+    Data is written to a temp file (<dest_path.tmp>) via os.write,
+    then os.replace'd to the final path (without .tmp).
+
+Load path:
+    Data is read from the block file directly via os.readv into the
+    provided memoryview slice.
+
+File naming:  <base_path>_r<rank>/<hhh>/<hh>_g<group_idx>/<hash_hex>.bin
+              (hash-based subdirectories to limit directory fan-out)
+"""
+
+import functools
+import json
+import os
+from collections.abc import Iterable
+from typing import TYPE_CHECKING
+
+from vllm.logger import init_logger
+from vllm.v1.kv_offload.base import OffloadKey, ReqContext
+from vllm.v1.kv_offload.file_mapper import FileMapper
+from vllm.v1.kv_offload.tiering.base import (
+    JobMetadata,
+    JobResult,
+    SecondaryTierManager,
+)
+from vllm.v1.kv_offload.tiering.fs.io import load_block, store_block
+from vllm.v1.kv_offload.tiering.fs.thread_pool import DualQueueThreadPool
+
+if TYPE_CHECKING:
+    from vllm.v1.kv_offload.base import OffloadingSpec
+
+logger = init_logger(__name__)
+
+
+class FileSystemTierManager(SecondaryTierManager):
+    """
+    Pure-Python disk-backed secondary tier.
+
+    Read-priority threads service load jobs preferentially; write-priority
+    threads service store jobs preferentially.  Both groups can drain either
+    queue, so neither starves.
+
+    submit_store / submit_load are non-blocking: they enqueue tasks and return.
+    get_finished() polls job completion and returns completed JobResults.
+
+    """
+
+    def __init__(
+        self,
+        offloading_spec: "OffloadingSpec",
+        primary_kv_view: memoryview,
+        tier_type: str,
+        root_dir: str,
+        n_read_threads: int = 16,
+        n_write_threads: int = 16,
+    ):
+        """
+        Args:
+            offloading_spec: contains the vllm_config, kv_cache_config
+                and block_size_factor.
+            primary_kv_view: Memoryview of the primary tier's CPU KV cache.
+            tier_type: Tier type identifier, set by SecondaryTierFactory.
+            root_dir: Root directory for block files.
+            n_read_threads: Number of read-priority I/O threads.
+            n_write_threads: Number of write-priority I/O threads.
+        """
+        super().__init__(offloading_spec, primary_kv_view, tier_type)
+
+        # Extract block size from primary view
+        assert primary_kv_view.strides is not None, (
+            "primary_kv_view.strides cannot be None"
+        )
+        self._block_size: int = primary_kv_view.strides[0]
+
+        # Create file mapper
+        self.file_mapper = FileMapper.from_offloading_spec(
+            root_dir=root_dir,
+            offloading_spec=offloading_spec,
+            gpu_blocks_per_file=offloading_spec.block_size_factor,
+        )
+
+        # Write config file
+        config_path = self.file_mapper.get_config_file_path()
+        os.makedirs(os.path.dirname(config_path), exist_ok=True)
+        if not os.path.exists(config_path):
+            with open(config_path, "w") as f:
+                json.dump(
+                    self.file_mapper.get_run_config(), f, indent=2, sort_keys=True
+                )
+
+        self._pool = DualQueueThreadPool(
+            n_read_threads,
+            n_write_threads,
+            thread_name_prefix="vllm_kv_py_fs",
+        )
+
+    def lookup(
+        self, key: OffloadKey, req_context: ReqContext | None = None
+    ) -> bool | None:
+        return os.path.exists(self.file_mapper.get_file_name(key))
+
+    def submit_store(self, job_metadata: JobMetadata) -> None:
+        tasks = (
+            functools.partial(
+                store_block,
+                self.file_mapper.get_file_name(key),
+                self._primary_kv_view,
+                int(bid) * self._block_size,
+                self._block_size,
+            )
+            for key, bid in zip(job_metadata.keys, job_metadata.block_ids)
+        )
+        self._pool.enqueue_store(job_metadata.job_id, len(job_metadata.keys), tasks)
+
+    def submit_load(self, job_metadata: JobMetadata) -> None:
+        tasks = (
+            functools.partial(
+                load_block,
+                self.file_mapper.get_file_name(key),
+                self._primary_kv_view,
+                int(bid) * self._block_size,
+                self._block_size,
+            )
+            for key, bid in zip(job_metadata.keys, job_metadata.block_ids)
+        )
+        self._pool.enqueue_load(job_metadata.job_id, len(job_metadata.keys), tasks)
+
+    def get_finished(self) -> Iterable[JobResult]:
+        """
+        Collect completed jobs from the finished-jobs queue.
+        """
+        return (
+            JobResult(job_id=job_id, success=success)
+            for job_id, success in self._pool.get_finished()
+        )
+
+    def shutdown(self) -> None:
+        """
+        Release resources held by this tier.
+
+        Shuts down the thread pool, clearing pending tasks and waiting for
+        active threads to complete.
+        """
+        self._pool.shutdown(wait=True)
diff --git a/vllm/v1/kv_offload/tiering/fs/thread_pool.py b/vllm/v1/kv_offload/tiering/fs/thread_pool.py
new file mode 100644
index 000000000000..80704babd7e5
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/fs/thread_pool.py
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Thread pool:
+    Two queues (load, store) and two sets of threads:
+      - Load-priority threads: drain the load queue first, then the store queue.
+      - Store-priority threads: drain the store queue first, then the load queue.
+    Load jobs are enqueued to the load queue; store jobs to the store queue.
+"""
+
+import threading
+from collections import deque
+from collections.abc import Callable, Iterable
+
+from vllm.logger import init_logger
+from vllm.v1.kv_offload.tiering.base import JobId
+
+logger = init_logger(__name__)
+
+
+class JobState:
+    """
+    Thread-safe completion tracker for a set of per-block I/O tasks.
+
+    Each task calls task_done(success) when it finishes.
+    """
+
+    __slots__ = ("_job_id", "_n_tasks", "_completed", "_success", "_lock")
+
+    def __init__(self, job_id: JobId, n_tasks: int) -> None:
+        self._job_id: JobId = job_id
+        self._n_tasks = n_tasks
+        self._completed = 0
+        self._success = True
+        self._lock = threading.Lock()
+
+    @property
+    def job_id(self) -> JobId:
+        return self._job_id
+
+    def task_done(self, success: bool) -> tuple[bool, bool]:
+        """Returns if job completed and success flag"""
+        with self._lock:
+            self._completed += 1
+            if not success:
+                self._success = False
+            return self._completed == self._n_tasks, self._success
+
+
+class DualQueueThreadPool:
+    """
+    Thread pool with two task queues (load and store) and two thread groups.
+
+    Load-priority threads drain the load queue first, then fall back to the
+    store queue.  Store-priority threads do the reverse.  Both queues share
+    a single condition variable.
+    """
+
+    def __init__(
+        self,
+        n_read_threads: int,
+        n_write_threads: int,
+        thread_name_prefix: str = "fs_secondary_tier",
+    ) -> None:
+        self._load_q: deque = deque()
+        self._store_q: deque = deque()
+        self._condition = threading.Condition(threading.Lock())
+        self._stop = False
+        self._threads: list[threading.Thread] = []
+        self._finished_q: deque[tuple[JobId, bool]] = deque()
+
+        for i in range(n_read_threads):
+            t = threading.Thread(
+                target=self._worker,
+                args=(True,),
+                name=f"{thread_name_prefix}_l{i}",
+                daemon=True,
+            )
+            t.start()
+            self._threads.append(t)
+
+        for i in range(n_write_threads):
+            t = threading.Thread(
+                target=self._worker,
+                args=(False,),
+                name=f"{thread_name_prefix}_s{i}",
+                daemon=True,
+            )
+            t.start()
+            self._threads.append(t)
+
+    def enqueue_load(
+        self,
+        job_id: JobId,
+        n_tasks: int,
+        tasks: Iterable[Callable],
+    ) -> None:
+        """Enqueue load tasks for a job (high-priority for load-priority threads)."""
+        state = JobState(job_id, n_tasks)
+        with self._condition:
+            for fn in tasks:
+                self._load_q.append((fn, state))
+            self._condition.notify(n_tasks)
+
+    def enqueue_store(
+        self,
+        job_id: JobId,
+        n_tasks: int,
+        tasks: Iterable[Callable],
+    ) -> None:
+        """Enqueue store tasks for a job (high-priority for store-priority threads)."""
+        state = JobState(job_id, n_tasks)
+        with self._condition:
+            for fn in tasks:
+                self._store_q.append((fn, state))
+            self._condition.notify(n_tasks)
+
+    def get_finished(self) -> list[tuple[JobId, bool]]:
+        jobs = []
+        while self._finished_q:
+            jobs.append(self._finished_q.popleft())
+        return jobs
+
+    def shutdown(self, wait: bool = True) -> None:
+        with self._condition:
+            self._stop = True
+            self._load_q.clear()
+            self._store_q.clear()
+            self._condition.notify_all()
+        if wait:
+            for t in self._threads:
+                t.join()
+
+    def _worker(self, load_priority: bool) -> None:
+        # Wait for tasks, process from primary queue first, fall back to secondary.
+        while True:
+            with self._condition:
+                self._condition.wait_for(
+                    lambda: self._stop or self._load_q or self._store_q
+                )
+                if self._stop:
+                    return
+                primary = self._load_q if load_priority else self._store_q
+                secondary = self._store_q if load_priority else self._load_q
+                task, state = primary.popleft() if primary else secondary.popleft()
+            try:
+                task()
+                job_finished, success = state.task_done(True)
+            except Exception as exc:
+                logger.error(
+                    "FileSystemTierManagerPython: job %s block I/O failed: %s",
+                    state.job_id,
+                    exc,
+                )
+                job_finished, success = state.task_done(False)
+
+            if job_finished:
+                self._finished_q.append((state.job_id, success))
diff --git a/vllm/v1/kv_offload/tiering/manager.py b/vllm/v1/kv_offload/tiering/manager.py
new file mode 100644
index 000000000000..4a2d79254ea1
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/manager.py
@@ -0,0 +1,505 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+TieringOffloadingManager: Multi-tier KV cache offloading orchestrator.
+
+This manager coordinates between a CPU primary tier (with direct GPU access)
+and zero or more secondary tiers (Storage, Network, etc.) to provide
+hierarchical KV cache offloading.
+
+Key Design Principles:
+1. Always offload to all tiers — When a block is stored to the primary tier,
+   it is cascaded to ALL secondary tiers
+2. Primary tier is the gateway — Secondary tiers cannot access GPU memory
+   directly; all data flows through the CPU primary tier
+3. Staged promotion — Blocks in secondary tiers must be promoted to the
+   primary tier before GPU can access them
+4. Transparent retry mechanism — Return None from lookup() to signal
+   "data is being promoted, try later"
+5. ref_cnt as eviction protection — primary.prepare_read() increments ref_cnt,
+   protecting blocks from eviction until complete_read() is called
+"""
+
+from collections.abc import Collection, Iterable
+from dataclasses import dataclass, field
+
+import numpy as np
+
+from vllm.logger import init_logger
+from vllm.v1.kv_offload.base import (
+    LoadStoreSpec,
+    OffloadingEvent,
+    OffloadingManager,
+    OffloadKey,
+    PrepareStoreOutput,
+    ReqContext,
+)
+from vllm.v1.kv_offload.cpu.common import CPULoadStoreSpec
+from vllm.v1.kv_offload.cpu.manager import CPUOffloadingManager
+from vllm.v1.kv_offload.cpu.shared_offload_region import SharedOffloadRegion
+from vllm.v1.kv_offload.tiering.base import (
+    JobId,
+    JobMetadata,
+    SecondaryTierManager,
+)
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class PendingPromotion:
+    """Accumulator for blocks awaiting submit_load() for one (tier, request)."""
+
+    req_context: ReqContext
+    keys: list[OffloadKey] = field(default_factory=list)
+    block_ids: list[int] = field(default_factory=list)
+
+
+class CPUPrimaryTierOffloadingManager(CPUOffloadingManager):
+    """CPUOffloadingManager with a primary/secondary transfer interface.
+
+    The inherited prepare_store/complete_store/prepare_load/complete_load are the
+    GPU-facing OffloadingManager interface. These aliases expose the same operations
+    from the secondary tier perspective, where read/write refers to secondary
+    accessing primary. This avoids confusion when reading TieringOffloadingManager
+    code (e.g. calling prepare_load inside a cascade/store path would be misleading).
+    """
+
+    def __init__(
+        self,
+        num_blocks: int,
+        mmap_region: SharedOffloadRegion,
+        cache_policy: str = "lru",
+        enable_events: bool = False,
+    ):
+        super().__init__(
+            num_blocks=num_blocks,
+            cache_policy=cache_policy,  # type: ignore[arg-type]
+            enable_events=enable_events,
+        )
+        self._mmap_region = mmap_region
+        # read/write is for CPU<->secondary transfers,
+        # load/store is for CPU<->GPU transfers.
+        # These aliases avoid calling prepare_load inside a store path.
+        self.prepare_read = self.prepare_load
+        self.complete_read = self.complete_load
+        self.prepare_write = self.prepare_store
+        self.complete_write = self.complete_store
+
+        self._kv_memoryview = mmap_region.create_kv_memoryview()
+
+    def get_kv_memoryview(self) -> memoryview:
+        """Return the memoryview over the primary tier's KV cache buffer.
+
+        The view has shape (num_blocks, row_stride_bytes) and is backed by the
+        SharedOffloadRegion mmap.  Secondary tiers address block *b* as
+        ``view[b]``.
+        """
+        return self._kv_memoryview
+
+    def shutdown(self) -> None:
+        super().shutdown()
+        self._kv_memoryview.release()
+        self._mmap_region.cleanup()
+
+
+class TieringOffloadingManager(OffloadingManager):
+    """
+    Orchestrates multi-tier KV cache offloading.
+
+    This manager coordinates between a CPU primary tier (with direct GPU access)
+    and zero or more secondary tiers (Storage, Network, etc.) to provide
+    hierarchical KV cache offloading.
+
+    Key internal state:
+      - Minimal state tracking; relies on secondary tiers to report completion
+        via get_finished()
+      - Secondary tiers return JobResult objects containing all necessary
+        information
+      - job_id_counter: monotonically increasing counter for job IDs
+    """
+
+    def __init__(
+        self,
+        primary_tier: CPUPrimaryTierOffloadingManager,
+        secondary_tiers: list[SecondaryTierManager] | None = None,
+        enable_events: bool = False,
+    ):
+        """
+        Initialize the TieringOffloadingManager.
+
+        Args:
+            primary_tier: The primary tier manager (CPU-based).
+            secondary_tiers: List of secondary tier managers (e.g., Storage,
+                            Network). Can be None or empty list.
+            enable_events: Whether to track offloading events
+        """
+        self.primary_tier: CPUPrimaryTierOffloadingManager = primary_tier
+        self.secondary_tiers = secondary_tiers or []
+
+        self._job_id_counter: int = 0
+        self.events: list[OffloadingEvent] | None = [] if enable_events else None
+
+        # Job tracking: maps job_id to metadata for all in-flight transfers.
+        # JobMetadata.is_promotion distinguishes direction:
+        #   True:  secondary → primary (promotion)
+        #   False: primary → secondary (cascade)
+        self._transfer_jobs: dict[JobId, JobMetadata] = {}
+
+        # Pending promotion requests accumulated during lookup() calls; flushed
+        # as one batched submit_load() per (tier, request) in take_events().
+        # Outer key: tier. Inner key: req_context.req_id — the same ReqContext
+        # object is reused for all block lookups of a given request per engine step.
+        self._pending_load_submissions: dict[
+            SecondaryTierManager, dict[str, PendingPromotion]
+        ] = {}
+
+        # Gate for once-per-step execution of _maybe_process_finished_jobs().
+        # Reset at the end of each step in take_events().
+        self._processed_jobs_this_step: bool = False
+
+    def _next_job_id(self) -> JobId:
+        """Generate a unique job ID for async transfer tracking."""
+        job_id = self._job_id_counter
+        self._job_id_counter += 1
+        return job_id
+
+    def _maybe_process_finished_jobs(self):
+        """
+        Poll secondary tiers for completed jobs (at most once per step).
+
+        Guarded by _processed_jobs_this_step: the first call in an engine step
+        does the actual polling; subsequent calls are no-ops. The flag is reset
+        in take_events() at the end of each step.
+        """
+        if self._processed_jobs_this_step:
+            return
+        self._processed_jobs_this_step = True
+        self._process_finished_jobs()
+
+    def _process_finished_jobs(self):
+        """
+        Unconditionally poll all secondary tiers for completed jobs.
+
+        This method:
+        1. Calls get_finished() on each secondary tier
+        2. For completed stores (primary→secondary): calls primary.complete_read()
+           to decrement ref_cnt
+        3. For completed loads (secondary→primary): calls primary.complete_write()
+           to make blocks available
+        """
+        for i, tier in enumerate(self.secondary_tiers):
+            for completed_job in tier.get_finished():
+                job_id = completed_job.job_id
+                job_metadata = self._transfer_jobs.pop(job_id, None)
+                assert job_metadata is not None, (
+                    f"Finished job_id {job_id} from tier #{i}"
+                    f" ({tier.tier_type}) not in _transfer_jobs"
+                )
+
+                if job_metadata.is_promotion:
+                    # secondary→primary transfer (promotion) completed.
+                    # Make blocks available in primary tier.
+                    self.primary_tier.complete_write(
+                        job_metadata.keys,
+                        job_metadata.req_context,
+                        completed_job.success,
+                    )
+                else:
+                    # primary→secondary transfer completed.
+                    # Decrement ref_cnt on primary blocks.
+                    self.primary_tier.complete_read(
+                        job_metadata.keys, job_metadata.req_context
+                    )
+
+    def lookup(self, key: OffloadKey, req_context: ReqContext) -> bool | None:
+        """
+        Check whether a single block is offloaded and ready.
+
+        Algorithm:
+            1. Process any completed async jobs first.
+            2. Query primary tier — short-circuit on hit or in-flight.
+            3. On primary miss, query secondary tiers — stop on first
+               hit and initiate promotion.
+
+        Args:
+            key: Block hash to look up.
+            req_context: Per-request context.
+
+        Returns:
+            True  — block is ready in the primary tier.
+            None  — block found but not yet ready (primary in-flight,
+                    promotion started, or a secondary tier is busy).
+            False — block not found in any tier, or primary is full
+                    and cannot accept a promotion.
+        """
+        self._maybe_process_finished_jobs()
+
+        primary_hit = self.primary_tier.lookup(key, req_context)
+        if primary_hit is True:
+            return True
+        if primary_hit is None:
+            return None
+
+        any_none = False
+        for tier in self.secondary_tiers:
+            result = tier.lookup(key, req_context)
+            if result is True:
+                if not self._initiate_promotion(tier, key, req_context):
+                    return False  # primary full, block unavailable
+                return None  # promotion started, retry later
+            if result is None:
+                any_none = True
+
+        if any_none:
+            return None
+        return False
+
+    def _initiate_promotion(
+        self,
+        tier: SecondaryTierManager,
+        key: OffloadKey,
+        req_context: ReqContext,
+    ) -> bool:
+        """
+        Queue a block for promotion from a secondary tier to the primary tier.
+
+        Allocates space in the primary tier immediately (sets ref_cnt=-1 so
+        subsequent lookups within the same step see the slot as in-flight),
+        then defers the actual submit_load() call to _flush_pending_promotions()
+        so all blocks queued during one engine step are submitted as a single
+        batched job.
+
+        Args:
+            tier: The secondary tier to promote from
+            key: Block to promote
+            req_context: Per-request context forwarded to primary.prepare_write().
+
+        Returns:
+            True if promotion was initiated, False if primary tier is full.
+        """
+        # Allocate space in primary tier for promoted block.
+        # Must happen immediately so primary.lookup() returns None (in-flight)
+        # for this key on any subsequent lookup() call within the same step,
+        # preventing duplicate promotion attempts.
+        primary_write_result = self.primary_tier.prepare_write([key], req_context)
+
+        if primary_write_result is None:
+            # Primary tier is full; caller should treat the block as unavailable
+            # rather than retrying indefinitely.
+            return False
+
+        store_spec = primary_write_result.store_spec
+        assert isinstance(store_spec, CPULoadStoreSpec)
+        # Defer submit_load to take_events(). Group by (tier, request) so each
+        # request's blocks are submitted as one batched job per tier.
+        tier_pending = self._pending_load_submissions.setdefault(tier, {})
+        ctx_id = req_context.req_id
+        if ctx_id not in tier_pending:
+            tier_pending[ctx_id] = PendingPromotion(
+                keys=[], block_ids=[], req_context=req_context
+            )
+        entry = tier_pending[ctx_id]
+        entry.keys.extend(primary_write_result.keys_to_store)
+        entry.block_ids.extend(store_spec.block_ids)
+        return True
+
+    def _flush_pending_promotions(self) -> None:
+        """Submit one batched submit_load() per (tier, request).
+
+        Called from take_events() at the end of each engine step, flushing
+        all promotion requests deferred during lookup().
+        """
+        if not self._pending_load_submissions:
+            return
+
+        for tier, pending_by_ctx in self._pending_load_submissions.items():
+            for entry in pending_by_ctx.values():
+                job_id = self._next_job_id()
+                job_metadata = JobMetadata(
+                    job_id=job_id,
+                    keys=entry.keys,
+                    block_ids=np.array(entry.block_ids, dtype=np.int64),
+                    is_promotion=True,
+                    req_context=entry.req_context,
+                )
+                self._transfer_jobs[job_id] = job_metadata
+                tier.submit_load(job_metadata)
+
+        self._pending_load_submissions.clear()
+
+    def prepare_load(
+        self, keys: Collection[OffloadKey], req_context: ReqContext
+    ) -> LoadStoreSpec:
+        """
+        Prepare blocks to be loaded from primary tier to GPU.
+
+        CRITICAL: This method calls _maybe_process_finished_jobs() FIRST to ensure
+        that any completed promotions have been finalized and blocks are ready.
+
+        This increments ref_cnt on the blocks in the primary tier, protecting
+        them from eviction during the transfer.
+
+        Args:
+            keys: Blocks to prepare for loading.
+            req_context: Per-request context.
+
+        Returns:
+            LoadStoreSpec for reading from primary tier.
+        """
+        # Process completed promotions to ensure blocks are ready
+        self._maybe_process_finished_jobs()
+
+        return self.primary_tier.prepare_load(keys, req_context)
+
+    def touch(self, keys: Collection[OffloadKey], req_context: ReqContext):
+        """
+        Mark blocks as recently used in all tiers.
+
+        Args:
+            keys: Blocks to mark as recently used.
+            req_context: Per-request context.
+        """
+        self.primary_tier.touch(keys, req_context)
+        for tier in self.secondary_tiers:
+            tier.touch(keys, req_context)
+
+    def complete_load(self, keys: Collection[OffloadKey], req_context: ReqContext):
+        """
+        Mark blocks as done loading from primary tier to GPU.
+
+        This decrements ref_cnt on the blocks in the primary tier, allowing
+        them to be evicted again.
+
+        Args:
+            keys: Blocks that finished loading.
+            req_context: Per-request context.
+        """
+        self.primary_tier.complete_load(keys, req_context)
+
+    def prepare_store(
+        self, keys: Collection[OffloadKey], req_context: ReqContext
+    ) -> PrepareStoreOutput | None:
+        """
+        Prepare blocks to be stored from GPU to primary tier.
+
+        CRITICAL: This method calls _maybe_process_finished_jobs() FIRST to ensure
+        that any completed async transfers have their ref_cnt decremented
+        before the primary tier makes eviction decisions.
+
+        Args:
+            keys: Blocks to prepare for storing.
+            req_context: Per-request context.
+
+        Returns:
+            PrepareStoreOutput describing where to store blocks and what was
+            evicted, or None if store cannot proceed.
+        """
+        # Step 1: Poll for completed async jobs FIRST
+        # This decrements ref_cnt on primary blocks that have been
+        # successfully transferred to secondary tiers.
+        self._maybe_process_finished_jobs()
+
+        # Step 2: Store to primary tier
+        primary_result = self.primary_tier.prepare_store(keys, req_context)
+
+        # Note: Secondary tier cascading will happen in complete_store()
+        # after the GPU→Primary transfer completes and blocks are ready.
+
+        return primary_result
+
+    def complete_store(
+        self,
+        keys: Collection[OffloadKey],
+        req_context: ReqContext,
+        success: bool = True,
+    ):
+        """
+        Mark blocks as done storing from GPU to primary tier.
+
+        This is where secondary tier cascading happens — after blocks are
+        confirmed to be in the primary tier, they are cascaded to ALL
+        secondary tiers.
+
+        For each secondary tier:
+        1. Call primary.prepare_read() to get LoadStoreSpec AND increment
+           ref_cnt (protecting blocks during async transfer)
+        2. Call tier.submit_store() to start async transfer: primary→secondary
+        3. Track the job in _store_jobs dictionary
+
+        Args:
+            keys: Blocks that finished storing.
+            success: Whether the GPU→primary transfer succeeded.
+            req_context: Per-request context forwarded to primary.prepare_read().
+        """
+        # Step 1: Complete store in primary tier (makes blocks loadable)
+        self.primary_tier.complete_store(keys, req_context, success)
+
+        if not success:
+            # If GPU→Primary transfer failed, don't cascade to secondary tiers
+            return
+
+        # Step 2: Cascade to ALL secondary tiers
+        # For each secondary tier, call primary.prepare_read() to get the
+        # LoadStoreSpec AND to increment ref_cnt (protecting blocks from
+        # eviction during the async transfer). One prepare_read() call per
+        # secondary tier.
+        for tier in self.secondary_tiers:
+            primary_blocks_spec = self.primary_tier.prepare_read(keys, req_context)
+
+            # Submit async store job: primary→secondary
+            job_id = self._next_job_id()
+
+            # Track this store job
+            assert isinstance(primary_blocks_spec, CPULoadStoreSpec)
+            job_metadata = JobMetadata(
+                job_id=job_id,
+                keys=keys,
+                block_ids=primary_blocks_spec.block_ids,
+                is_promotion=False,
+                req_context=req_context,
+            )
+            self._transfer_jobs[job_id] = job_metadata
+
+            tier.submit_store(job_metadata)
+
+        # Note: The async transfers are now in flight. Their completion is
+        # tracked via get_finished() / _maybe_process_finished_jobs().
+
+    def take_events(self) -> Iterable[OffloadingEvent]:
+        """
+        End-of-step hook: flush deferred work, yield events, reset per-step state.
+
+        Called once per engine step from Scheduler.update_from_output() →
+        connector.take_events(). Ensures _maybe_process_finished_jobs() has run
+        at least once this step, flushes pending promotions, yields collected
+        events, and resets the per-step flag.
+
+        Yields:
+            New OffloadingEvents collected since the last call.
+        """
+        # TODO: Move _flush_pending_promotions() to a dedicated end_of_batch()
+        # hook once one exists. For now, take_events() serves as the flush
+        # point under the assumption that it is called at the end of each
+        # engine step (Scheduler.update_from_output() → connector.take_events()).
+        # When the dedicated hook is added, update tests that rely on
+        # take_events() to signal end of step.
+
+        self._maybe_process_finished_jobs()
+
+        self._flush_pending_promotions()
+
+        # Reset the per-step gate so next step's first call does real work.
+        self._processed_jobs_this_step = False
+
+        if self.events is not None:
+            yield from self.events
+            self.events.clear()
+
+        yield from self.primary_tier.take_events()
+
+    def shutdown(self) -> None:
+        """Shutdown all tiers and release resources."""
+        for tier in self.secondary_tiers:
+            tier.shutdown()
+        self.primary_tier.shutdown()
diff --git a/vllm/v1/kv_offload/tiering/spec.py b/vllm/v1/kv_offload/tiering/spec.py
new file mode 100644
index 000000000000..ced8a7fc6540
--- /dev/null
+++ b/vllm/v1/kv_offload/tiering/spec.py
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+TieringOffloadingSpec: Spec for multi-tier KV cache offloading.
+
+This spec creates a TieringOffloadingManager with a CPU primary tier
+and configurable secondary tiers (e.g., Storage, Network).
+
+Configuration via kv_connector_extra_config:
+  - cpu_bytes_to_use: (required) Bytes to allocate for CPU primary tier
+  - block_size: (optional) Block size for offloaded blocks (default: GPU block size)
+  - eviction_policy: (optional) Primary tier eviction policy: "lru" or
+    "arc" (default: "lru")
+  - secondary_tiers: (optional) List of secondary tier configurations
+    Each secondary tier config is a dict with:
+      - type: (required) Type of secondary tier (e.g., "example", "storage", "network")
+      - Additional tier-specific parameters are passed directly to the tier
+        constructor. See each tier's documentation for supported parameters.
+
+Example configuration:
+{
+    "cpu_bytes_to_use": 10737418240,  # 10 GB
+    "block_size": 16,
+    "eviction_policy": "lru",
+    "secondary_tiers": [
+        {
+            "type": "example",
+            "custom_param": 67
+        }
+    ]
+}
+"""
+
+import torch
+from typing_extensions import override
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.kv_offload.base import CanonicalKVCaches, OffloadingManager
+from vllm.v1.kv_offload.cpu.gpu_worker import CpuGpuOffloadingHandlers
+from vllm.v1.kv_offload.cpu.shared_offload_region import SharedOffloadRegion
+from vllm.v1.kv_offload.cpu.spec import CPUOffloadingSpec
+from vllm.v1.kv_offload.tiering.factory import SecondaryTierFactory
+from vllm.v1.kv_offload.tiering.manager import (
+    CPUPrimaryTierOffloadingManager,
+    TieringOffloadingManager,
+)
+
+logger = init_logger(__name__)
+
+
+class TieringOffloadingSpec(CPUOffloadingSpec):
+    """
+    Spec for multi-tier KV cache offloading.
+
+    Creates a TieringOffloadingManager with:
+    - Primary tier: CPU (LRU or ARC eviction policy)
+    - Secondary tiers: Configurable via extra_config
+
+    The CPU primary tier has direct GPU access and serves as the gateway for
+    all GPU↔offload operations. Secondary tiers cannot directly access GPU
+    memory and must transfer data through the primary tier.
+    """
+
+    def __init__(self, vllm_config: VllmConfig, kv_cache_config: KVCacheConfig):
+        super().__init__(vllm_config, kv_cache_config)
+        # Redeclare for mypy: parent sets this but `--follow-imports skip` hides it
+        self._manager: OffloadingManager | None = None
+
+        # Parse secondary tier configurations
+        self.secondary_tier_configs = self.extra_config.get("secondary_tiers", [])
+        if not isinstance(self.secondary_tier_configs, list):
+            raise ValueError("secondary_tiers must be a list of tier configurations")
+
+        # Scheduler-side mmap (rank=None); kept for cleanup
+        self._scheduler_mmap: SharedOffloadRegion | None = None
+
+    @override
+    def get_manager(self) -> OffloadingManager:
+        """
+        Get the TieringOffloadingManager.
+
+        Creates a TieringOffloadingManager with:
+        - Primary tier: CPU (LRU or ARC)
+        - Secondary tiers: As configured in extra_config
+
+        Returns:
+            TieringOffloadingManager instance
+        """
+        if not self._manager:
+            kv_events_config = self.vllm_config.kv_events_config
+            enable_events = (
+                kv_events_config is not None and kv_events_config.enable_kv_cache_events
+            )
+
+            # Create scheduler-side SharedOffloadRegion (rank=None) so the
+            # primary tier can eagerly create a memoryview over _base.
+            world_size = self.vllm_config.parallel_config.world_size
+            scheduler_mmap = SharedOffloadRegion(
+                instance_id=self.vllm_config.instance_id,
+                total_size_bytes=self.cpu_page_size_per_worker
+                * world_size
+                * self.num_blocks,
+                num_blocks=self.num_blocks,
+                rank=None,
+                num_workers=world_size,
+                cpu_page_size=self.cpu_page_size_per_worker,
+            )
+            self._scheduler_mmap = scheduler_mmap
+
+            # Create primary tier (CPU-based)
+            assert len(self.gpu_block_size) == 1
+            primary_tier = CPUPrimaryTierOffloadingManager(
+                num_blocks=self.num_blocks,
+                cache_policy=self.eviction_policy,  # type: ignore[arg-type]
+                enable_events=enable_events,
+                mmap_region=scheduler_mmap,
+            )
+
+            # Create secondary tiers
+            primary_kv_view = primary_tier.get_kv_memoryview()
+            secondary_tiers = []
+            for i, tier_config in enumerate(self.secondary_tier_configs):
+                try:
+                    tier = SecondaryTierFactory.create_secondary_tier(
+                        tier_config, primary_kv_view, self
+                    )
+                    secondary_tiers.append(tier)
+                    logger.info(
+                        "Created secondary tier #%d (%s)",
+                        i,
+                        tier.tier_type,
+                    )
+                except Exception as e:
+                    logger.error(
+                        "Failed to create secondary tier from config %s: %s",
+                        tier_config,
+                        e,
+                    )
+                    raise
+
+            # Create TieringOffloadingManager. GPU↔CPU transfers use the inherited
+            # get_handlers(); secondary tier transfers are handled by the
+            # secondary tier managers and need no additional handlers here.
+            tiering_manager = TieringOffloadingManager(
+                primary_tier=primary_tier,
+                secondary_tiers=secondary_tiers,
+                enable_events=enable_events,
+            )
+            if int(self.extra_config.get("store_threshold", 0)) >= 2:
+                raise ValueError(
+                    "store_threshold is not supported for TieringOffloadingSpec"
+                )
+            self._manager = tiering_manager
+
+            logger.info(
+                "Created TieringOffloadingManager with primary tier "
+                "(%s, %s blocks) and %s secondary tier(s)",
+                self.eviction_policy,
+                self.num_blocks,
+                len(secondary_tiers),
+            )
+
+        return self._manager
+
+    @override
+    def create_handlers(self, kv_caches: CanonicalKVCaches) -> CpuGpuOffloadingHandlers:
+        world_size = self.vllm_config.parallel_config.world_size
+        rank = torch.accelerator.current_device_index()
+        worker_mmap = SharedOffloadRegion(
+            instance_id=self.vllm_config.instance_id,
+            total_size_bytes=self.cpu_page_size_per_worker
+            * world_size
+            * self.num_blocks,
+            num_blocks=self.num_blocks,
+            rank=rank,
+            num_workers=world_size,
+            cpu_page_size=self.cpu_page_size_per_worker,
+        )
+        return CpuGpuOffloadingHandlers(
+            kv_caches=kv_caches,
+            block_size_factor=self.block_size_factor,
+            num_cpu_blocks=self.num_blocks,
+            mmap_region=worker_mmap,
+        )
diff --git a/vllm/v1/kv_offload/worker/cpu_gpu.py b/vllm/v1/kv_offload/worker/cpu_gpu.py
deleted file mode 100644
index eeabf0cdadd7..000000000000
--- a/vllm/v1/kv_offload/worker/cpu_gpu.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections import deque
-from dataclasses import dataclass
-
-import numpy as np
-import torch
-
-from vllm import _custom_ops as ops
-from vllm.logger import init_logger
-from vllm.utils.platform_utils import is_pin_memory_available
-from vllm.v1.kv_offload.mediums import BlockIDsLoadStoreSpec
-from vllm.v1.kv_offload.spec import CanonicalKVCacheRef, CanonicalKVCaches
-from vllm.v1.kv_offload.worker.worker import (
-    OffloadingHandler,
-    TransferResult,
-    TransferSpec,
-)
-
-logger = init_logger(__name__)
-
-
-@dataclass
-class Transfer:
-    job_id: int
-    stream: torch.cuda.Stream
-    start_event: torch.Event
-    end_event: torch.Event
-    num_bytes: int
-
-
-def expand_block_ids(
-    block_ids: np.ndarray,
-    block_size_factor: int,
-    output: np.ndarray,
-    skip_count: int = 0,
-):
-    """
-    Convert a list of block IDs to a list of matching block ids,
-    assuming each block is composed of actual block_size_factor blocks.
-    Outputs to output tensor.
-    The first skip_count blocks will be skipped.
-    Note that skip_count must be less than block_size_factor.
-
-    For example, if block_ids = [0, 1, 3] and block_size_factor =  4,
-    then it yields [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15]
-    since 0 maps to [0, 1, 2, 3]
-    1 maps to [4, 5, 6, 7]
-    and 3 maps to [12, 13, 14, 15]
-    """
-    assert skip_count < block_size_factor
-
-    first_range = np.arange(skip_count, block_size_factor)
-    full_range = np.arange(0, block_size_factor)
-
-    output_idx = 0
-    for i, block_id in enumerate(block_ids):
-        base_block_id = block_id * block_size_factor
-        indices = first_range if i == 0 else full_range
-        output_end_idx = output_idx + len(indices)
-        output[output_idx:output_end_idx] = base_block_id + indices
-        output_idx = output_end_idx
-
-
-class SingleDirectionOffloadingHandler(OffloadingHandler):
-    """
-    SingleDirectionOffloadingHandler handles transfers for a single direction,
-    either CPU->GPU or GPU->CPU.
-    Transfers are guaranteed to be executed in order of their submission.
-    Each transfer uses a unique CUDA stream, and its stream will start
-    executing only after the streams of previous transfers have finished.
-    """
-
-    def __init__(
-        self,
-        gpu_tensors: list[torch.Tensor],
-        cpu_tensors: list[torch.Tensor],
-        block_size_factor: int,
-        kv_cache_groups_data_refs: list[list[CanonicalKVCacheRef]],
-        gpu_to_cpu: bool,
-    ):
-        """
-        Initialize a SingleDirectionOffloadingHandler.
-
-        Args:
-            gpu_tensors: list of GPU KV cache tensors.
-                Each of shape (num_gpu_blocks, gpu_page_size_bytes) with dtype int8.
-            cpu_tensors: list of CPU KV cache tensors.
-                Each of shape (num_cpu_blocks, cpu_page_size_bytes) with dtype int8.
-                Order should match gpu_tensors.
-            kv_cache_groups_data_refs: list of CanonicalKVCacheRef per group.
-            gpu_to_cpu: if True, transfer from GPU to CPU; otherwise CPU to GPU.
-        """
-        assert len(gpu_tensors) == len(cpu_tensors)
-        assert len(gpu_tensors) > 0
-
-        # assert a single KV group until transfer_async supports multiple groups
-        assert len(kv_cache_groups_data_refs) == 1
-
-        # assert input tensors are as expected
-        for gpu_tensor, cpu_tensor in zip(gpu_tensors, cpu_tensors):
-            assert gpu_tensor.dtype == torch.int8
-            assert gpu_tensor.ndim == 2
-            assert gpu_tensor.is_cuda
-            assert cpu_tensor.dtype == torch.int8
-            assert cpu_tensor.ndim == 2
-            assert cpu_tensor.device.type == "cpu"
-            _, gpu_page_size = gpu_tensor.shape
-            _, cpu_page_size = cpu_tensor.shape
-            assert cpu_page_size == gpu_page_size * block_size_factor
-
-        self.src_tensors: list[torch.Tensor] = (
-            gpu_tensors if gpu_to_cpu else cpu_tensors
-        )
-        self.dst_tensors: list[torch.Tensor] = (
-            cpu_tensors if gpu_to_cpu else gpu_tensors
-        )
-        self.gpu_to_cpu: bool = gpu_to_cpu
-
-        # GPU blocks may be smaller
-        # cpu_page_size = gpu_page_size * block_size_factor.
-        self.src_block_size_factor = 1 if self.gpu_to_cpu else block_size_factor
-        self.dst_block_size_factor = block_size_factor if self.gpu_to_cpu else 1
-
-        # per-tensor block size in byte
-        self.tensor_block_size_in_bytes = [
-            gpu_tensor.shape[1] for gpu_tensor in gpu_tensors
-        ]
-
-        # per-group block size in bytes
-        self.group_block_size_in_bytes = []
-        for kv_cache_group_data_refs in kv_cache_groups_data_refs:
-            group_block_size_in_bytes = 0
-            for kv_cache_data_ref in kv_cache_group_data_refs:
-                # TODO(orozery): use kv_cache_data_ref.page_size_bytes
-                # once swap_blocks support it
-                group_block_size_in_bytes += self.tensor_block_size_in_bytes[
-                    kv_cache_data_ref.tensor_idx
-                ]
-            self.group_block_size_in_bytes.append(group_block_size_in_bytes)
-
-        self.transfer_type = ("GPU", "CPU") if self.gpu_to_cpu else ("CPU", "GPU")
-        # job_id -> event
-        self._transfer_events: dict[int, torch.Event] = {}
-        # queue of transfers (job_id, stream, event)
-        self._transfers: deque[Transfer] = deque()
-        # list of CUDA streams available for re-use
-        self._stream_pool: list[torch.cuda.Stream] = []
-        # list of CUDA events available for re-use
-        self._event_pool: list[torch.Event] = []
-
-    def transfer_async(self, job_id: int, transfer_spec: TransferSpec) -> bool:
-        src_spec, dst_spec = transfer_spec
-        assert isinstance(src_spec, BlockIDsLoadStoreSpec)
-        assert isinstance(dst_spec, BlockIDsLoadStoreSpec)
-
-        src_blocks = src_spec.block_ids
-        dst_blocks = dst_spec.block_ids
-        assert src_blocks.ndim == 1
-        assert dst_blocks.ndim == 1
-
-        src_sub_block_count = src_blocks.size * self.src_block_size_factor
-        dst_sub_block_count = dst_blocks.size * self.dst_block_size_factor
-        src_sub_blocks_to_skip = -dst_blocks.size % self.src_block_size_factor
-
-        assert dst_sub_block_count == src_sub_block_count - src_sub_blocks_to_skip
-
-        src_to_dst = np.empty((dst_sub_block_count, 2), dtype=np.int64)
-        expand_block_ids(
-            src_blocks,
-            self.src_block_size_factor,
-            src_to_dst[:, 0],
-            skip_count=src_sub_blocks_to_skip,
-        )
-        expand_block_ids(dst_blocks, self.dst_block_size_factor, src_to_dst[:, 1])
-        src_to_dst_tensor = torch.from_numpy(src_to_dst)
-
-        stream = self._stream_pool.pop() if self._stream_pool else torch.cuda.Stream()
-        start_event = (
-            self._event_pool.pop()
-            if self._event_pool
-            else torch.Event(enable_timing=True)
-        )
-        end_event = (
-            self._event_pool.pop()
-            if self._event_pool
-            else torch.Event(enable_timing=True)
-        )
-
-        if self.gpu_to_cpu:
-            # wait for model computation to finish before offloading
-            stream.wait_stream(torch.cuda.current_stream())
-        if self._transfers:
-            last_transfer: Transfer = self._transfers[-1]
-            last_event = last_transfer.end_event
-            # assure job will start only after the previous one completes
-            stream.wait_event(last_event)
-        with torch.cuda.stream(stream):
-            start_event.record(stream)
-            for src_tensor, dst_tensor, block_size_in_bytes in zip(
-                self.src_tensors,
-                self.dst_tensors,
-                self.tensor_block_size_in_bytes,
-            ):
-                ops.swap_blocks(
-                    src_tensor,
-                    dst_tensor,
-                    block_size_in_bytes,
-                    src_to_dst_tensor,
-                )
-            end_event.record(stream)
-
-        self._transfer_events[job_id] = end_event
-        self._transfers.append(
-            Transfer(
-                job_id=job_id,
-                stream=stream,
-                start_event=start_event,
-                end_event=end_event,
-                num_bytes=dst_sub_block_count * self.group_block_size_in_bytes[0],
-            )
-        )
-
-        # success
-        return True
-
-    def get_finished(self) -> list[TransferResult]:
-        results: list[TransferResult] = []
-        while self._transfers and self._transfers[0].end_event.query():
-            transfer = self._transfers.popleft()
-            transfer_time = (
-                transfer.start_event.elapsed_time(transfer.end_event) * 1e-3
-            )  # elapsed_time is in milliseconds
-            result = TransferResult(
-                job_id=transfer.job_id,
-                success=True,
-                transfer_size=transfer.num_bytes,
-                transfer_time=transfer_time,
-                transfer_type=self.transfer_type,
-            )
-
-            results.append(result)
-            self._stream_pool.append(transfer.stream)
-            self._event_pool.append(transfer.end_event)
-            self._event_pool.append(transfer.start_event)
-            del self._transfer_events[transfer.job_id]
-        return results
-
-    def wait(self, job_ids: set[int]):
-        for job_id in job_ids:
-            event = self._transfer_events.get(job_id)
-            if event is not None:
-                event.synchronize()
-
-
-class CpuGpuOffloadingHandlers:
-    def __init__(
-        self,
-        kv_caches: CanonicalKVCaches,
-        block_size_factor: int,
-        num_cpu_blocks: int,
-    ):
-        pin_memory = is_pin_memory_available()
-        logger.info("Allocating %d CPU tensors...", len(kv_caches.tensors))
-        gpu_tensors: list[torch.Tensor] = []
-        cpu_tensors: list[torch.Tensor] = []
-        for kv_cache_tensor in kv_caches.tensors:
-            gpu_page_size_bytes = kv_cache_tensor.page_size_bytes
-            gpu_tensor = kv_cache_tensor.tensor.view(torch.int8).view(
-                (-1, gpu_page_size_bytes)
-            )
-            cpu_page_size_bytes = gpu_page_size_bytes * block_size_factor
-            cpu_tensor = torch.zeros(
-                (num_cpu_blocks, cpu_page_size_bytes),
-                dtype=torch.int8,
-                device="cpu",
-                pin_memory=pin_memory,
-            )
-
-            gpu_tensors.append(gpu_tensor)
-            cpu_tensors.append(cpu_tensor)
-
-        self.gpu_to_cpu_handler = SingleDirectionOffloadingHandler(
-            gpu_tensors=gpu_tensors,
-            cpu_tensors=cpu_tensors,
-            block_size_factor=block_size_factor,
-            kv_cache_groups_data_refs=kv_caches.group_data_refs,
-            gpu_to_cpu=True,
-        )
-
-        self.cpu_to_gpu_handler = SingleDirectionOffloadingHandler(
-            gpu_tensors=gpu_tensors,
-            cpu_tensors=cpu_tensors,
-            block_size_factor=block_size_factor,
-            kv_cache_groups_data_refs=kv_caches.group_data_refs,
-            gpu_to_cpu=False,
-        )
diff --git a/vllm/v1/kv_offload/worker/worker.py b/vllm/v1/kv_offload/worker/worker.py
index efb31c2a0ec7..2f0dd2471631 100644
--- a/vllm/v1/kv_offload/worker/worker.py
+++ b/vllm/v1/kv_offload/worker/worker.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass
 
 from vllm.logger import init_logger
-from vllm.v1.kv_offload.abstract import LoadStoreSpec
+from vllm.v1.kv_offload.base import LoadStoreSpec
 
 # a single transfer spec (src_blocks_spec, dst_blocks_spec)
 TransferSpec = tuple[LoadStoreSpec, LoadStoreSpec]
@@ -69,6 +69,10 @@ def wait(self, job_ids: set[int]) -> None:
             job_ids: The set of job IDs to wait for.
         """
 
+    def shutdown(self) -> None:
+        """Shutdown the handler and release any resources."""
+        return
+
 
 class OffloadingWorker:
     """
@@ -166,3 +170,7 @@ def wait(self, job_ids: set[int]) -> None:
         """
         for handler in self.handlers:
             handler.wait(job_ids)
+
+    def shutdown(self) -> None:
+        for handler in self.handlers:
+            handler.shutdown()
diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py
index 5d5877d1692e..6855efd9f54c 100644
--- a/vllm/v1/metrics/loggers.py
+++ b/vllm/v1/metrics/loggers.py
@@ -32,6 +32,10 @@
 
 logger = init_logger(__name__)
 
+# User-facing reason labels for waiting request breakdown
+WAITING_REASON_CAPACITY = "capacity"
+WAITING_REASON_DEFERRED = "deferred"
+
 PerEngineStatLoggerFactory = Callable[[VllmConfig, int], "StatLoggerBase"]
 AggregateStatLoggerFactory = type["AggregateStatLoggerBase"]
 StatLoggerFactory = AggregateStatLoggerFactory | PerEngineStatLoggerFactory
@@ -222,13 +226,21 @@ def log(self):
             "Running: %d reqs",
             "Waiting: %d reqs",
         ]
+        total_waiting = (
+            self.last_scheduler_stats.num_waiting_reqs
+            + self.last_scheduler_stats.num_skipped_waiting_reqs
+        )
         log_args: list[int | float | str] = [
             self.last_prompt_throughput,
             self.last_generation_throughput,
             self.last_scheduler_stats.num_running_reqs,
-            self.last_scheduler_stats.num_waiting_reqs,
+            total_waiting,
         ]
 
+        if self.last_scheduler_stats.num_skipped_waiting_reqs > 0:
+            log_parts.append("Deferred: %d reqs")
+            log_args.append(self.last_scheduler_stats.num_skipped_waiting_reqs)
+
         if self.num_preemptions > 0:
             log_parts.append("Preemptions: %d")
             log_args.append(self.num_preemptions)
@@ -328,6 +340,9 @@ def aggregate_scheduler_stats(self):
             self.last_scheduler_stats.num_running_reqs += (
                 last_scheduler_stats.num_running_reqs
             )
+            self.last_scheduler_stats.num_skipped_waiting_reqs += (
+                last_scheduler_stats.num_skipped_waiting_reqs
+            )
             self.last_scheduler_stats.kv_cache_usage += (
                 last_scheduler_stats.kv_cache_usage
             )
@@ -453,6 +468,28 @@ def __init__(
             gauge_scheduler_waiting, per_engine_labelvalues
         )
 
+        gauge_waiting_by_reason = self._gauge_cls(
+            name="vllm:num_requests_waiting_by_reason",
+            documentation=(
+                "Number of waiting requests by reason. "
+                "Reason labels: 'capacity' = waiting for scheduling capacity; "
+                "'deferred' = deferred by transient constraints "
+                "(LoRA budget, KV transfer, blocked status). "
+                "Sum of all reasons equals vllm:num_requests_waiting."
+            ),
+            multiprocess_mode="mostrecent",
+            labelnames=labelnames + ["reason"],
+        )
+        self.gauge_waiting_by_reason: dict[str, dict[int, Gauge]] = {}
+        for waiting_reason in [WAITING_REASON_CAPACITY, WAITING_REASON_DEFERRED]:
+            per_engine_labelvalues_with_reason = {
+                idx: labelvalues + [waiting_reason]
+                for idx, labelvalues in per_engine_labelvalues.items()
+            }
+            self.gauge_waiting_by_reason[waiting_reason] = create_metric_per_engine(
+                gauge_waiting_by_reason, per_engine_labelvalues_with_reason
+            )
+
         gauge_engine_sleep_state = self._gauge_cls(
             name="vllm:engine_sleep_state",
             documentation=(
@@ -622,16 +659,6 @@ def __init__(
             counter_prompt_tokens_cached, per_engine_labelvalues
         )
 
-        # Recomputed tokens (last token recomputed when entire prompt is cached)
-        counter_prompt_tokens_recomputed = self._counter_cls(
-            name="vllm:prompt_tokens_recomputed",
-            documentation="Number of cached tokens recomputed for forward pass.",
-            labelnames=labelnames,
-        )
-        self.counter_prompt_tokens_recomputed = create_metric_per_engine(
-            counter_prompt_tokens_recomputed, per_engine_labelvalues
-        )
-
         counter_generation_tokens = self._counter_cls(
             name="vllm:generation_tokens",
             documentation="Number of generation tokens processed.",
@@ -1040,9 +1067,17 @@ def record(
             self.gauge_scheduler_running[engine_idx].set(
                 scheduler_stats.num_running_reqs
             )
-            self.gauge_scheduler_waiting[engine_idx].set(
+            total_waiting = (
+                scheduler_stats.num_waiting_reqs
+                + scheduler_stats.num_skipped_waiting_reqs
+            )
+            self.gauge_scheduler_waiting[engine_idx].set(total_waiting)
+            self.gauge_waiting_by_reason[WAITING_REASON_CAPACITY][engine_idx].set(
                 scheduler_stats.num_waiting_reqs
             )
+            self.gauge_waiting_by_reason[WAITING_REASON_DEFERRED][engine_idx].set(
+                scheduler_stats.num_skipped_waiting_reqs
+            )
             self.gauge_kv_cache_usage[engine_idx].set(scheduler_stats.kv_cache_usage)
 
             self.counter_prefix_cache_queries[engine_idx].inc(
@@ -1122,7 +1157,6 @@ def record(
                 pts.get_by_source(source)
             )
         self.counter_prompt_tokens_cached[engine_idx].inc(pts.cached_tokens)
-        self.counter_prompt_tokens_recomputed[engine_idx].inc(pts.recomputed_tokens)
         self.counter_generation_tokens[engine_idx].inc(
             iteration_stats.num_generation_tokens
         )
diff --git a/vllm/v1/metrics/ray_wrappers.py b/vllm/v1/metrics/ray_wrappers.py
index a11b92680779..7e2100546e82 100644
--- a/vllm/v1/metrics/ray_wrappers.py
+++ b/vllm/v1/metrics/ray_wrappers.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
 import time
 
 from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorProm
@@ -28,10 +29,13 @@ def _get_replica_id() -> str | None:
 
 
 class RayPrometheusMetric:
+    _is_labeled: bool = False
+
     def __init__(self):
         if ray_metrics is None:
             raise ImportError("RayPrometheusMetric requires Ray to be installed.")
         self.metric: Metric = None
+        self._tags: dict[str, str] = {"ReplicaId": _get_replica_id() or ""}
 
     @staticmethod
     def _get_tag_keys(labelnames: list[str] | None) -> tuple[str, ...]:
@@ -39,7 +43,7 @@ def _get_tag_keys(labelnames: list[str] | None) -> tuple[str, ...]:
         labels.append("ReplicaId")
         return tuple(labels)
 
-    def labels(self, *labels, **labelskwargs):
+    def _build_tags(self, *labels, **labelskwargs) -> dict[str, str]:
         if labels:
             # -1 because ReplicaId was added automatically
             expected = len(self.metric._tag_keys) - 1
@@ -52,12 +56,15 @@ def labels(self, *labels, **labelskwargs):
 
         labelskwargs["ReplicaId"] = _get_replica_id() or ""
 
-        if labelskwargs:
-            for k, v in labelskwargs.items():
-                if not isinstance(v, str):
-                    labelskwargs[k] = str(v)
-            self.metric.set_default_tags(labelskwargs)
-        return self
+        return {k: v if isinstance(v, str) else str(v) for k, v in labelskwargs.items()}
+
+    def labels(self, *labels, **labelskwargs) -> "RayPrometheusMetric":
+        if self._is_labeled:
+            raise ValueError("labels() cannot be called on an already-labeled metric.")
+        clone = copy.copy(self)
+        clone._tags = self._build_tags(*labels, **labelskwargs)
+        clone._is_labeled = True
+        return clone
 
     @staticmethod
     def _get_sanitized_opentelemetry_name(name: str) -> str:
@@ -91,6 +98,7 @@ def __init__(
         # implemented at the observability layer (Prometheus/Grafana).
         del multiprocess_mode
 
+        super().__init__()
         tag_keys = self._get_tag_keys(labelnames)
         name = self._get_sanitized_opentelemetry_name(name)
 
@@ -101,11 +109,11 @@ def __init__(
         )
 
     def set(self, value: int | float):
-        return self.metric.set(value)
+        return self.metric.set(value, tags=self._tags)
 
     def set_to_current_time(self):
         # ray metrics doesn't have set_to_current time, https://docs.ray.io/en/latest/_modules/ray/util/metrics.html
-        return self.metric.set(time.time())
+        return self.set(time.time())
 
 
 class RayCounterWrapper(RayPrometheusMetric):
@@ -118,6 +126,7 @@ def __init__(
         documentation: str | None = "",
         labelnames: list[str] | None = None,
     ):
+        super().__init__()
         tag_keys = self._get_tag_keys(labelnames)
         name = self._get_sanitized_opentelemetry_name(name)
         self.metric = ray_metrics.Counter(
@@ -129,7 +138,7 @@ def __init__(
     def inc(self, value: int | float = 1.0):
         if value == 0:
             return
-        return self.metric.inc(value)
+        return self.metric.inc(value, tags=self._tags)
 
 
 class RayHistogramWrapper(RayPrometheusMetric):
@@ -143,6 +152,7 @@ def __init__(
         labelnames: list[str] | None = None,
         buckets: list[float] | None = None,
     ):
+        super().__init__()
         tag_keys = self._get_tag_keys(labelnames)
         name = self._get_sanitized_opentelemetry_name(name)
 
@@ -155,7 +165,7 @@ def __init__(
         )
 
     def observe(self, value: int | float):
-        return self.metric.observe(value)
+        return self.metric.observe(value, tags=self._tags)
 
 
 class RaySpecDecodingProm(SpecDecodingProm):
diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py
index 4a1e8b6f35ce..a7a5fb7a2d2f 100644
--- a/vllm/v1/metrics/stats.py
+++ b/vllm/v1/metrics/stats.py
@@ -172,14 +172,15 @@ class SchedulerStats:
     """Stats associated with the scheduler."""
 
     num_running_reqs: int = 0
-    num_waiting_reqs: int = 0
+
+    num_waiting_reqs: int = 0  # length of the "waiting" request queue
+    num_skipped_waiting_reqs: int = 0  # length of the "skipped waiting" queue
 
     # These are used for internal DP load-balancing.
     step_counter: int = 0
     current_wave: int = 0
 
     kv_cache_usage: float = 0.0
-    encoder_cache_usage: float = 0.0
 
     prefix_cache_stats: PrefixCacheStats = field(default_factory=PrefixCacheStats)
     connector_prefix_cache_stats: PrefixCacheStats | None = None
@@ -224,6 +225,7 @@ class FinishedRequestStats:
     """Stats associated with a finished request."""
 
     finish_reason: "FinishReason"
+    request_id: str | None = None
     e2e_latency: float = 0.0
     num_prompt_tokens: int = 0
     num_generation_tokens: int = 0
@@ -237,6 +239,40 @@ class FinishedRequestStats:
     num_cached_tokens: int = 0
 
 
+@dataclass
+class PrefillStats:
+    """Breakdown of a scheduled prefill computation.
+
+    Fields:
+        num_prompt_tokens: Total number of tokens to be prefilled.
+        num_computed_tokens: Tokens to be prefilled locally (actual compute work).
+        num_cached_tokens: Tokens to be prefilled without actual compute work.
+        num_local_cached_tokens: Tokens to be prefilled from local prefix cache.
+        num_external_cached_tokens: Tokens to be prefilled from external KV transfer.
+    """
+
+    num_prompt_tokens: int = 0
+    num_computed_tokens: int = 0
+    num_cached_tokens: int = 0
+    num_local_cached_tokens: int = 0
+    num_external_cached_tokens: int = 0
+
+    def set(
+        self,
+        num_prompt_tokens: int,
+        num_local_cached_tokens: int,
+        num_external_cached_tokens: int,
+    ):
+        num_cached_tokens = num_local_cached_tokens + num_external_cached_tokens
+        assert num_cached_tokens <= num_prompt_tokens
+
+        self.num_prompt_tokens = num_prompt_tokens
+        self.num_computed_tokens = num_prompt_tokens - num_cached_tokens
+        self.num_cached_tokens = num_cached_tokens
+        self.num_local_cached_tokens = num_local_cached_tokens
+        self.num_external_cached_tokens = num_external_cached_tokens
+
+
 @dataclass
 class PromptTokenStats:
     """Breakdown of prompt tokens by source.
@@ -246,12 +282,11 @@ class PromptTokenStats:
         local_cache_hit: Tokens from local prefix cache.
         external_kv_transfer: Tokens from external KV transfer.
         cached_tokens: Tokens skipped during prefill (from scheduler).
-        recomputed_tokens: Cached tokens that were recomputed (see below).
         total: Total prompt tokens.
 
     Invariants:
-        computed + local_cache_hit + external_kv_transfer - recomputed_tokens = total
-        local_cache_hit + external_kv_transfer - recomputed_tokens = cached_tokens
+        computed + local_cache_hit + external_kv_transfer = total
+        local_cache_hit + external_kv_transfer = cached_tokens
     """
 
     ALL_SOURCES: tuple[str, ...] = (
@@ -264,29 +299,16 @@ class PromptTokenStats:
     local_cache_hit: int = 0
     external_kv_transfer: int = 0
     cached_tokens: int = 0
-    recomputed_tokens: int = 0
     total: int = 0
 
-    def update_from_output(
-        self,
-        num_cached_tokens: int,
-        num_external_computed_tokens: int,
-        prompt_len: int,
-    ) -> None:
+    def update_from_output(self, prefill_stats: PrefillStats) -> None:
         """Update stats from a prefill output."""
-        # When all tokens are cached, the scheduler reduces num_cached_tokens
-        # by 1 to force the model to recompute the last token, since the model
-        # needs at least one input token to run a forward pass.
-        recomputed = 1 if (num_cached_tokens + 1 == prompt_len) else 0
-
-        self.computed += prompt_len - num_cached_tokens
-        self.external_kv_transfer += num_external_computed_tokens
-        self.local_cache_hit += (
-            num_cached_tokens + recomputed - num_external_computed_tokens
-        )
-        self.cached_tokens += num_cached_tokens
-        self.recomputed_tokens += recomputed
-        self.total += prompt_len
+        self.computed += prefill_stats.num_computed_tokens
+        self.cached_tokens += prefill_stats.num_cached_tokens
+        self.total += prefill_stats.num_prompt_tokens
+
+        self.local_cache_hit += prefill_stats.num_local_cached_tokens
+        self.external_kv_transfer += prefill_stats.num_external_cached_tokens
 
     def get_by_source(self, source: str) -> int:
         """Get token count by source label."""
@@ -333,7 +355,6 @@ def update_from_output(
         output: "EngineCoreOutput",
         engine_core_timestamp: float,
         is_prefilling: bool,
-        prompt_len: int,
         req_stats: RequestStateStats,
         lora_states: "LoRARequestStates",
         lora_name: str | None,
@@ -342,11 +363,8 @@ def update_from_output(
 
         self.num_generation_tokens += num_new_generation_tokens
         if is_prefilling:
-            self.prompt_token_stats.update_from_output(
-                num_cached_tokens=output.num_cached_tokens,
-                num_external_computed_tokens=output.num_external_computed_tokens,
-                prompt_len=prompt_len,
-            )
+            if output.prefill_stats is not None:
+                self.prompt_token_stats.update_from_output(output.prefill_stats)
 
             first_token_latency = self._time_since(req_stats.arrival_time)
             self.time_to_first_tokens_iter.append(first_token_latency)
@@ -410,6 +428,7 @@ def update_from_events(
     def update_from_finished_request(
         self,
         finish_reason: "FinishReason",
+        request_id: str,
         num_prompt_tokens: int,
         max_tokens_param: int | None,
         req_stats: RequestStateStats,
@@ -441,6 +460,7 @@ def update_from_finished_request(
 
         finished_req = FinishedRequestStats(
             finish_reason=finish_reason,
+            request_id=request_id,
             e2e_latency=e2e_latency,
             num_prompt_tokens=num_prompt_tokens,
             num_generation_tokens=req_stats.num_generation_tokens,
diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py
index 8eb58de4f3fd..9703dfa9e70b 100644
--- a/vllm/v1/outputs.py
+++ b/vllm/v1/outputs.py
@@ -2,9 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from abc import ABC, abstractmethod
-from collections.abc import Callable
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, NamedTuple, TypeAlias, TypeVar
+from typing import TYPE_CHECKING, NamedTuple, TypeAlias
 
 import numpy as np
 import torch
@@ -110,6 +109,73 @@ def empty_cpu(
         )
 
 
+class RoutedExpertsTensors(NamedTuple):
+    """Device-side snapshot of routed experts data, pending async D2H.
+
+    Produced by :class:`GPUModelRunner` at the end of each async-scheduled
+    step. The copy stream waits on the default stream, then issues
+    non-blocking D2H via :meth:`to_cpu_nonblocking` into a pinned CPU
+    buffer; :class:`AsyncGPUModelRunnerOutput.get_output` synchronizes
+    the copy before the scheduler reads it.
+
+    Sliced to ``total_num_scheduled_tokens`` (step-level, across all
+    requests — NOT per-request). Both ``routing_data`` and
+    ``slot_mapping`` must be private clones when sourced from shared
+    capturer / prepare-input buffers, so the next forward pass /
+    ``_prepare_inputs`` on the default stream does not race with a
+    D2H still pending on the copy stream.
+    """
+
+    # (num_scheduled_tokens, num_layers, num_experts_per_tok)
+    routing_data: torch.Tensor
+    # (num_scheduled_tokens,)
+    slot_mapping: torch.Tensor
+
+    def to_cpu_nonblocking(self) -> "RoutedExpertsTensors":
+        """Issue non-blocking D2H on the current stream.
+
+        NOTE: ``non_blocking=True`` only delivers true overlap when the
+        CPU target is pinned. The current fallback here allocates a
+        new pageable CPU tensor per call, which silently degrades to a
+        synchronous copy; acceptable because the sync happens on the
+        dedicated copy stream, not the default stream.
+        """
+        if self.routing_data.device.type == "cpu":
+            return self
+        return RoutedExpertsTensors(
+            self.routing_data.to("cpu", non_blocking=True),
+            self.slot_mapping.to("cpu", non_blocking=True),
+        )
+
+    def tolists(self) -> "RoutedExpertsLists":
+        """Convert to the numpy-backed form consumed by the scheduler.
+
+        ``.cpu()`` is a no-op when the tensor is already on CPU, so this
+        is cheap for the post-D2H case; for raw device tensors it will
+        synchronously block, which is only reached in tests.
+        """
+        return RoutedExpertsLists(
+            self.routing_data.cpu().numpy(),
+            self.slot_mapping.cpu().numpy(),
+        )
+
+
+class RoutedExpertsLists(NamedTuple):
+    """CPU-side routed experts, the form :meth:`RoutedExpertsManager.store_batch`
+    consumes.
+
+    Batched per scheduler step: the leading dim is the number of tokens
+    scheduled across all requests in this step (``total_num_scheduled_tokens``),
+    not per-request tokens. ``slot_mapping[i]`` tells the scheduler which
+    physical KV-cache slot row ``i`` of ``routing_data`` belongs to.
+    """
+
+    # (num_scheduled_tokens, num_layers, num_experts_per_tok)
+    routing_data: np.ndarray
+    # (num_scheduled_tokens,)
+    slot_mapping: np.ndarray
+
+
 # [num_reqs, <dynamic>]
 # The shape of each element depends on the pooler used
 PoolerOutput: TypeAlias = torch.Tensor | list[torch.Tensor] | list[torch.Tensor | None]
@@ -125,20 +191,6 @@ class SamplerOutput:
     logprobs_tensors: LogprobsTensors | None
 
 
-T = TypeVar("T")
-
-
-def _combine_non_none(f: Callable[[T, T], T], items: list[T | None]) -> T | None:
-    non_none = [item for item in items if item is not None]
-    if len(non_none) == 0:
-        return None
-
-    combined = non_none[0]
-    for item in non_none[1:]:
-        combined = f(combined, item)
-    return combined
-
-
 @dataclass
 class KVConnectorOutput:
     # [req_ids]
@@ -167,43 +219,6 @@ def is_empty(self):
             and not self.kv_connector_worker_meta
         )
 
-    @classmethod
-    def merge(cls, *outputs: "KVConnectorOutput"):
-        assert len(outputs) > 0, "Cannot merge empty outputs"
-        finished_sending = _combine_non_none(
-            set.union, [output.finished_sending for output in outputs]
-        )
-        finished_recving = _combine_non_none(
-            set.union, [output.finished_recving for output in outputs]
-        )
-        kv_connector_stats = _combine_non_none(
-            lambda x, y: x.aggregate(y),
-            [output.kv_connector_stats for output in outputs],
-        )
-        kv_cache_events = _combine_non_none(
-            lambda x, y: x.merge(y),
-            [output.kv_cache_events for output in outputs],
-        )
-        invalid_block_ids = _combine_non_none(
-            set.union, [output.invalid_block_ids for output in outputs]
-        )
-        assert invalid_block_ids is not None
-
-        assert all(
-            output.expected_finished_count == outputs[0].expected_finished_count
-            for output in outputs
-        )
-        expected_finished_count = outputs[0].expected_finished_count
-
-        return cls(
-            finished_sending=finished_sending,
-            finished_recving=finished_recving,
-            kv_connector_stats=kv_connector_stats,
-            kv_cache_events=kv_cache_events,
-            invalid_block_ids=invalid_block_ids,
-            expected_finished_count=expected_finished_count,
-        )
-
 
 @dataclass
 class ECConnectorOutput:
@@ -253,6 +268,17 @@ class ModelRunnerOutput:
     # information related to cudagraph execution
     cudagraph_stats: CUDAGraphStat | None = None
 
+    # Per-step routed experts data captured by the worker.
+    # ``routing_data`` shape: (num_scheduled_tokens, num_layers,
+    #                         num_experts_per_tok); expert IDs as uint8/uint16.
+    # ``slot_mapping`` shape: (num_scheduled_tokens,); physical KV-cache
+    #                         slot for each row of routing_data.
+    # ``num_scheduled_tokens`` is step-level (total across all requests
+    # in this step), not per-request. The scheduler persists this into
+    # its slot buffer via ``slot_buffer[slot_mapping] = routing_data``.
+    # ``None`` when ``enable_return_routed_experts`` is off.
+    routed_experts: RoutedExpertsLists | None = None
+
 
 # ModelRunnerOutput wrapper for async scheduling.
 class AsyncModelRunnerOutput(ABC):
diff --git a/vllm/v1/pool/late_interaction.py b/vllm/v1/pool/late_interaction.py
index 4a465bd2f7d3..554c5947c618 100644
--- a/vllm/v1/pool/late_interaction.py
+++ b/vllm/v1/pool/late_interaction.py
@@ -56,16 +56,7 @@ def build_late_interaction_doc_params(
     )
 
 
-def compute_maxsim_score(
-    q_emb: torch.Tensor,
-    d_emb: torch.Tensor,
-) -> torch.Tensor:
-    # compute in float32 for numerical stability
-    token_scores = torch.matmul(q_emb.float(), d_emb.float().T)
-    return token_scores.amax(dim=-1).sum()
-
-
-def compute_maxsim_scores(
+def compute_maxsim_score_batched(
     q_embs: Sequence[torch.Tensor],
     d_embs: Sequence[torch.Tensor],
     max_batch_size: int = 64,
diff --git a/vllm/v1/pool/metadata.py b/vllm/v1/pool/metadata.py
index 076c87526f0a..f772c850f0dc 100644
--- a/vllm/v1/pool/metadata.py
+++ b/vllm/v1/pool/metadata.py
@@ -20,7 +20,7 @@ class PoolingCursor:
     seq_lens_cpu: torch.Tensor
     num_scheduled_tokens_cpu: torch.Tensor
 
-    def __getitem__(self, indices: slice):
+    def __getitem__(self, indices: slice) -> "PoolingCursor":
         return PoolingCursor(
             first_token_indices_gpu=self.first_token_indices_gpu[indices],
             last_token_indices_gpu=self.last_token_indices_gpu[indices],
@@ -29,19 +29,19 @@ def __getitem__(self, indices: slice):
             num_scheduled_tokens_cpu=self.num_scheduled_tokens_cpu[indices],
         )
 
-    def is_partial_prefill(self):
+    def is_partial_prefill(self) -> bool:
         return not torch.all(self.prompt_lens_cpu == self.num_scheduled_tokens_cpu)
 
-    def is_finished(self):
+    def is_finished(self) -> torch.Tensor:
         return self.prompt_lens_cpu == self.seq_lens_cpu
 
 
 class PoolingStates:
-    def __init__(self):
+    def __init__(self) -> None:
         # for chunked prefill with ALL pooling
         self.hidden_states_cache: list[torch.Tensor] = []
 
-    def clean(self):
+    def clean(self) -> None:
         self.hidden_states_cache.clear()
 
 
@@ -64,11 +64,15 @@ def __post_init__(self) -> None:
             for pooling_param in pooling_params
             if (task := pooling_param.task) is not None
         ]
-        assert len(pooling_params) == len(tasks)
+        if len(pooling_params) != len(tasks):
+            raise ValueError(
+                "Every pooling param must have a task set, but got "
+                f"{len(tasks)} tasks for {len(pooling_params)} pooling params"
+            )
 
         self.tasks = tasks
 
-    def __getitem__(self, indices: slice):
+    def __getitem__(self, indices: slice) -> "PoolingMetadata":
         return PoolingMetadata(
             prompt_lens=self.prompt_lens[indices],
             prompt_token_ids=None
@@ -84,23 +88,30 @@ def __getitem__(self, indices: slice):
             else self.pooling_cursor[indices],
         )
 
-    def get_prompt_token_ids(self) -> list[torch.Tensor]:
-        prompt_token_ids = self.prompt_token_ids
-        assert prompt_token_ids is not None, (
-            "Please set `requires_token_ids=True` in `get_pooling_updates`"
-        )
+    def _get_prompt_token_ids(
+        self,
+        prompt_token_ids: torch.Tensor | None,
+    ) -> list[torch.Tensor]:
+        if prompt_token_ids is None:
+            raise ValueError(
+                "prompt_token_ids is required but was not set. "
+                "Please set `requires_token_ids=True` in `get_pooling_updates`"
+            )
         return [prompt_token_ids[i, :num] for i, num in enumerate(self.prompt_lens)]
 
+    def get_prompt_token_ids(self) -> list[torch.Tensor]:
+        return self._get_prompt_token_ids(self.prompt_token_ids)
+
     def get_prompt_token_ids_cpu(self) -> list[torch.Tensor]:
-        prompt_token_ids = self.prompt_token_ids_cpu
-        assert prompt_token_ids is not None, (
-            "Please set `requires_token_ids=True` in `get_pooling_updates`"
-        )
-        return [prompt_token_ids[i, :num] for i, num in enumerate(self.prompt_lens)]
+        return self._get_prompt_token_ids(self.prompt_token_ids_cpu)
 
     def get_pooling_cursor(self) -> PoolingCursor:
         pooling_cursor = self.pooling_cursor
-        assert pooling_cursor is not None, "Should call `build_pooling_cursor` first"
+        if pooling_cursor is None:
+            raise RuntimeError(
+                "pooling_cursor has not been initialized. "
+                "Call `build_pooling_cursor` before accessing it"
+            )
 
         return pooling_cursor
 
@@ -110,11 +121,15 @@ def build_pooling_cursor(
         seq_lens_cpu: torch.Tensor,
         device: torch.device,
         query_start_loc_gpu: torch.Tensor | None = None,
-    ):
+    ) -> None:
         n_seq = len(num_scheduled_tokens_np)
         prompt_lens = self.prompt_lens
 
-        assert len(prompt_lens) == n_seq
+        if len(prompt_lens) != n_seq:
+            raise ValueError(
+                f"prompt_lens length ({len(prompt_lens)}) does not match "
+                f"the number of sequences ({n_seq})"
+            )
 
         num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np)
         if query_start_loc_gpu is None:
diff --git a/vllm/v1/request.py b/vllm/v1/request.py
index 946e71c15d35..26cc82fc4a6b 100644
--- a/vllm/v1/request.py
+++ b/vllm/v1/request.py
@@ -20,6 +20,7 @@
     EngineCoreRequest,
     FinishReason,
 )
+from vllm.v1.metrics.stats import PrefillStats
 from vllm.v1.structured_output.request import StructuredOutputRequest
 from vllm.v1.utils import ConstantList
 
@@ -65,6 +66,7 @@ def __init__(
         client_index: int = 0,
         arrival_time: float | None = None,
         prompt_embeds: torch.Tensor | None = None,
+        prompt_is_token_ids: list[bool] | None = None,
         mm_features: list[MultiModalFeatureSpec] | None = None,
         lora_request: "LoRARequest | None" = None,
         cache_salt: str | None = None,
@@ -73,6 +75,8 @@ def __init__(
         block_hasher: Callable[["Request"], list["BlockHash"]] | None = None,
         resumable: bool = False,
         reasoning_ended: bool | None = None,
+        reasoning_parser_kwargs: dict[str, Any] | None = None,
+        abort_immediately: bool = False,
     ) -> None:
         self.request_id = request_id
         self.client_index = client_index
@@ -85,6 +89,9 @@ def __init__(
         )
         if self.structured_output_request is not None:
             self.structured_output_request.reasoning_ended = reasoning_ended
+            self.structured_output_request.reasoning_parser_kwargs = (
+                reasoning_parser_kwargs
+            )
         self.arrival_time = arrival_time if arrival_time is not None else time.time()
 
         self.status = RequestStatus.WAITING
@@ -113,6 +120,10 @@ def __init__(
 
         self.prompt_token_ids = prompt_token_ids
         self.prompt_embeds = prompt_embeds
+        # Per-position mask used in mixed-mode (chat completion with
+        # prompt_embeds). `None` except when both `prompt_token_ids` and
+        # `prompt_embeds` are set and their positions are interleaved.
+        self.prompt_is_token_ids = prompt_is_token_ids
         # Cache per-block prompt-embed hashes to avoid rehashing the same
         # tensor slices when generating extra keys.
         self._prompt_embeds_per_block_hashes: dict[tuple[int, int], bytes] = {}
@@ -128,8 +139,7 @@ def __init__(
 
         # Used in async scheduling.
         self.num_output_placeholders = 0
-        # Used in forced preemption (reset_prefix_cache) with async scheduling.
-        self.discard_latest_async_tokens = False
+        self.async_tokens_to_discard = 0
 
         self.spec_token_ids: list[int] = []
         self.num_computed_tokens = 0
@@ -145,9 +155,6 @@ def __init__(
         self.all_token_ids = ConstantList(self._all_token_ids)
         # trace_headers
         self.trace_headers = trace_headers
-        # State
-        # The number of tokens with prefix cache hits.
-        self.num_cached_tokens = -1
 
         # True if this request is scheduled as a non-final prefill chunk.
         self.is_prefill_chunk = False
@@ -159,8 +166,7 @@ def __init__(
         # The number of times this request has been preempted by the scheduler.
         self.num_preemptions = 0
 
-        # The number of tokens that have been computed remotely.
-        self.num_external_computed_tokens = 0
+        self.prefill_stats: PrefillStats | None = PrefillStats()
 
         self.block_hashes: list[BlockHash] = []
         # Store the block hasher without binding self to avoid creating a
@@ -176,6 +182,10 @@ def __init__(
         # None entry in the queue means finished.
         self.streaming_queue: deque[StreamingUpdate | None] | None = None
 
+        # If True, request should be aborted immediately after being added to
+        # the scheduler so the connector's request_finished hook runs.
+        self.abort_immediately = abort_immediately
+
     @classmethod
     def from_engine_core_request(
         cls,
@@ -187,6 +197,7 @@ def from_engine_core_request(
             client_index=request.client_index,
             prompt_token_ids=request.prompt_token_ids,
             prompt_embeds=request.prompt_embeds,
+            prompt_is_token_ids=request.prompt_is_token_ids,
             mm_features=request.mm_features,
             sampling_params=request.sampling_params,
             pooling_params=request.pooling_params,
@@ -198,6 +209,8 @@ def from_engine_core_request(
             block_hasher=block_hasher,
             resumable=request.resumable,
             reasoning_ended=request.reasoning_ended,
+            reasoning_parser_kwargs=request.reasoning_parser_kwargs,
+            abort_immediately=request.abort_immediately,
         )
 
     def append_output_token_ids(
@@ -278,6 +291,13 @@ def take_events(self) -> list[EngineCoreEvent] | None:
         events, self.events = self.events, []
         return events
 
+    def take_prefill_stats(self) -> PrefillStats | None:
+        if self.prefill_stats is None:
+            return None
+        prefill_stats = self.prefill_stats
+        self.prefill_stats = None
+        return prefill_stats
+
     def __lt__(self, other: "Request") -> bool:
         """
         Compare two requests based on priority, arrival time, and request ID.
diff --git a/vllm/v1/sample/logits_processor/__init__.py b/vllm/v1/sample/logits_processor/__init__.py
index fb4a046fc057..2cb89e1ea950 100644
--- a/vllm/v1/sample/logits_processor/__init__.py
+++ b/vllm/v1/sample/logits_processor/__init__.py
@@ -18,7 +18,6 @@
     LogitBiasLogitsProcessor,
     MinPLogitsProcessor,
     MinTokensLogitsProcessor,
-    ThinkingTokenBudgetLogitsProcessor,
     process_dict_updates,
 )
 from vllm.v1.sample.logits_processor.interface import (
@@ -51,7 +50,6 @@
     MinTokensLogitsProcessor,
     LogitBiasLogitsProcessor,
     MinPLogitsProcessor,
-    ThinkingTokenBudgetLogitsProcessor,
 ]
 
 
@@ -356,5 +354,4 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
     "STR_POOLING_REJECTS_LOGITSPROCS",
     "LOGITSPROCS_GROUP",
     "AdapterLogitsProcessor",
-    "ThinkingTokenBudgetLogitsProcessor",
 ]
diff --git a/vllm/v1/sample/logits_processor/builtin.py b/vllm/v1/sample/logits_processor/builtin.py
index c92f334021fc..11a52711d671 100644
--- a/vllm/v1/sample/logits_processor/builtin.py
+++ b/vllm/v1/sample/logits_processor/builtin.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Callable, Sequence
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, TypeVar
 
 import numpy as np
 import torch
@@ -291,263 +291,6 @@ def apply_with_spec_decode(
         return logits
 
 
-class ThinkingTokenBudgetLogitsProcessor(LogitsProcessor):
-    """Limits the number of tokens allowed inside a 'thinking' section."""
-
-    def __init__(
-        self, vllm_config: "VllmConfig", device: torch.device, is_pin_memory: bool
-    ):
-        reasoning_config = vllm_config.reasoning_config
-        max_num_reqs = vllm_config.scheduler_config.max_num_seqs
-
-        # Check if thinking is enabled
-        self.is_enabled = reasoning_config is not None
-
-        self.think_start_token_ids = getattr(
-            reasoning_config, "think_start_token_ids", []
-        )
-        self.think_end_token_ids = getattr(reasoning_config, "think_end_token_ids", [])
-
-        self.pin_memory = is_pin_memory
-        self.device = device
-        # Per-request state tracking for thinking token management
-        # Key: request_index, Value: state dict containing:
-        # "in_think": bool - currently in thinking mode
-        # "in_end": bool - currently forcing end tokens output
-        # "check_count_down": int - steps remaining until next think
-        #                            start/end token parsing
-        # "think_count": int - number of thinking tokens generated
-        # "end_count": int - number of end tokens forced so far
-        # "thinking_token_budget": int - max allowed thinking tokens
-        # "output_tok_ids": list[int] - generated output tokens
-        # "prev_output_length": int - previous output length for
-        #                               incremental processing
-        self._state: dict[int, dict[str, Any]] = {}
-
-        # Preallocate reusable tensors
-        self.mask = torch.zeros(max_num_reqs, dtype=torch.bool, device=device)
-        self.force_token_ids = torch.full(
-            (max_num_reqs,), -1, dtype=torch.long, device=device
-        )
-
-    @staticmethod
-    def _find_last_sequence_index(target_list: list[int], token_ids: list[int]) -> int:
-        """
-        Returns the index of the last occurrence of token_ids in target_list.
-
-        Args:
-          target_list (list[int]): The list of token IDs.
-          token_ids (list[int]): The sequence of token IDs to find.
-        """
-        if not token_ids:
-            return -1
-        for i in range(len(target_list) - len(token_ids), -1, -1):
-            if target_list[i : i + len(token_ids)] == token_ids:
-                return i
-        return -1
-
-    def _init_state_entry(
-        self, prompt_tok_ids: list[int] | None, thinking_token_budget: int
-    ) -> dict[str, Any]:
-        """Initializes the tracking state for a given sequence index."""
-        if prompt_tok_ids is None:
-            last_start = -1
-            last_end = -1
-            in_think = False
-            think_count = 0
-        else:
-            last_start = self._find_last_sequence_index(
-                prompt_tok_ids, self.think_start_token_ids
-            )
-            last_end = self._find_last_sequence_index(
-                prompt_tok_ids, self.think_end_token_ids
-            )
-            in_think = last_start > last_end
-            if in_think:
-                think_count = len(prompt_tok_ids) - (
-                    last_start + len(self.think_start_token_ids)
-                )
-            else:
-                think_count = 0
-
-        return {
-            "in_think": in_think,  # Currently in thinking mode
-            "in_end": in_think and thinking_token_budget == 0,
-            "check_count_down": thinking_token_budget,
-            "think_count": think_count,  # Number of tokens in thinking section
-            "end_count": 0,  # Number of end tokens forced so far
-            "prompt_tok_ids": prompt_tok_ids,
-            "output_tok_ids": [],
-            "thinking_token_budget": thinking_token_budget,
-            "prev_output_length": 0,
-            # Track previous output length for incremental updates
-        }
-
-    def _update_think_state(self, state: dict[str, Any]):
-        """Updates the state based on newly generated output tokens."""
-        if not state.get("in_end", False) and state.get("check_count_down", 0) > 0:
-            state["check_count_down"] -= 1
-            return
-
-        output = state.get("output_tok_ids", [])
-        if not output:
-            return
-
-        # Track previous output length for incremental processing
-        prev_length = state.get("prev_output_length", 0)
-        current_length = len(output)
-
-        if current_length <= prev_length:
-            return
-
-        # Process only newly added tokens
-        new_tokens = output[prev_length:]
-        state["prev_output_length"] = current_length
-
-        # Check if new tokens contain think start or end sequences
-        start_len = len(self.think_start_token_ids)
-        end_len = len(self.think_end_token_ids)
-
-        # Look for think sequences in recent tokens (including boundary)
-        # Check overlapping regions where sequences might span boundaries
-        check_start_idx = max(0, prev_length - max(start_len, end_len) + 1)
-        recent_tokens = output[check_start_idx:]
-
-        # Find any think start/end sequences in recent tokens
-        recent_start_pos = self._find_last_sequence_index(
-            recent_tokens, self.think_start_token_ids
-        )
-        recent_end_pos = self._find_last_sequence_index(
-            recent_tokens, self.think_end_token_ids
-        )
-
-        # Update state based on recent sequences
-        if not state["in_end"]:
-            if recent_start_pos >= 0 and recent_end_pos >= 0:
-                if recent_start_pos > recent_end_pos:
-                    # Case: ...<end>...<start>... - entering think mode
-                    absolute_start_pos = check_start_idx + recent_start_pos
-                    new_think_count = current_length - (absolute_start_pos + start_len)
-                    state["in_think"] = True
-                    state["think_count"] = new_think_count
-                else:
-                    # Case: ...<start>...<end>... - exiting think mode
-                    state["in_think"] = False
-                    state["think_count"] = 0
-            elif recent_start_pos >= 0:
-                # Found think start - entering think mode
-                absolute_start_pos = check_start_idx + recent_start_pos
-                new_think_count = current_length - (absolute_start_pos + start_len)
-                state["in_think"] = True
-                state["think_count"] = new_think_count
-            elif recent_end_pos >= 0:
-                # Found think end - exiting think mode
-                state["in_think"] = False
-                state["think_count"] = 0
-            elif state["in_think"]:
-                # Continue thinking mode, increment count by new tokens
-                state["think_count"] += len(new_tokens)
-
-            # Set countdown based on current state
-            if state["in_think"]:
-                remaining_budget = max(
-                    0, state["thinking_token_budget"] - state["think_count"]
-                )
-                state["check_count_down"] = max(0, remaining_budget - 1)
-            else:
-                state["check_count_down"] = state["thinking_token_budget"]
-
-            # Check if need to transition to end mode
-            if (
-                state["in_think"]
-                and state["think_count"] >= state["thinking_token_budget"]
-            ):
-                state["in_think"] = False
-                state["in_end"] = True
-                state["end_count"] = 0
-                state["check_count_down"] = state["thinking_token_budget"]
-        else:
-            # In end mode
-            state["end_count"] += 1
-            if state["end_count"] >= len(self.think_end_token_ids):
-                state.update(
-                    {
-                        "in_end": False,
-                        "end_count": 0,
-                        "check_count_down": state["thinking_token_budget"],
-                    }
-                )
-
-    def is_argmax_invariant(self) -> bool:
-        """This logits processor can change the outcome of
-        greedy sampling by forcing that the thinking section
-        ends after a certain number of tokens."""
-        return False
-
-    def update_state(self, batch_update: BatchUpdate | None):
-        if not self.is_enabled:
-            return
-        if batch_update:
-            for index, params, prompt_tok_ids, output_tok_ids in batch_update.added:
-                thinking_token_budget = params.thinking_token_budget
-
-                if thinking_token_budget is not None:
-                    self._state[index] = self._init_state_entry(
-                        prompt_tok_ids, thinking_token_budget
-                    )
-                    self._state[index]["output_tok_ids"] = output_tok_ids
-                else:
-                    # Remove state if no thinking budget
-                    self._state.pop(index, None)
-
-            for index in batch_update.removed:
-                self._state.pop(index, {})
-
-            for i1, i2, direction in batch_update.moved:
-                if direction == MoveDirectionality.SWAP:
-                    state1 = self._state.pop(i1, None)
-                    state2 = self._state.pop(i2, None)
-                    if state1 is not None:
-                        self._state[i2] = state1
-                    if state2 is not None:
-                        self._state[i1] = state2
-                else:
-                    state = self._state.pop(i1, None)
-                    if state is not None:
-                        self._state[i2] = state
-
-        for state in self._state.values():
-            self._update_think_state(state)
-
-    def apply(self, logits: torch.Tensor) -> torch.Tensor:
-        if not self.is_enabled or not self._state:
-            return logits
-
-        batch_size = logits.size(0)
-        self.mask[:batch_size] = False
-
-        for i in range(batch_size):
-            state = self._state.get(i)
-            if state and state["in_end"]:
-                self.mask[i] = True
-                self.force_token_ids[i] = self.think_end_token_ids[state["end_count"]]
-
-        # Check in CPU first not to sync with GPU
-        has_active_thinking = any(
-            state.get("in_end", False) for state in self._state.values()
-        )
-
-        if has_active_thinking:
-            current_mask = self.mask[:batch_size]
-            active_indices = current_mask.nonzero(as_tuple=False).view(-1)
-            if len(active_indices) > 0:
-                force_tokens = self.force_token_ids[active_indices]
-                # Apply a large value for the end thinking token id index
-                logits[active_indices, force_tokens] = 1e9
-
-        return logits
-
-
 def process_dict_updates(
     req_entries: dict[int, T],
     batch_update: BatchUpdate | None,
diff --git a/vllm/v1/sample/metadata.py b/vllm/v1/sample/metadata.py
index b1101b1b2318..fa4ceac8e71e 100644
--- a/vllm/v1/sample/metadata.py
+++ b/vllm/v1/sample/metadata.py
@@ -1,11 +1,14 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 from dataclasses import dataclass
 
 import torch
 
 from vllm.v1.sample.logits_processor import LogitsProcessors
+from vllm.v1.sample.thinking_budget_state import ThinkingBudgetStateHolder
 
 
 @dataclass
@@ -40,5 +43,13 @@ class SamplingMetadata:
     # Loaded logits processors
     logitsprocs: LogitsProcessors
 
+    # Specific token IDs to compute logprobs for (more efficient than full vocab)
+    # When set, logprobs are computed only for these token IDs using gather
+    # req_index -> list of token IDs to get logprobs for
+    logprob_token_ids: dict[int, list[int]] | None = None
+
     # Speculative token ids
     spec_token_ids: list[list[int]] | None = None
+    # When non-None, use ``holder.has_tracked_requests()`` to see if this batch applies
+    # thinking-token-budget logits (holder may exist with an empty tracking set).
+    thinking_budget_state_holder: ThinkingBudgetStateHolder | None = None
diff --git a/vllm/v1/sample/ops/bad_words.py b/vllm/v1/sample/ops/bad_words.py
index 56972e517980..62ea430ac694 100644
--- a/vllm/v1/sample/ops/bad_words.py
+++ b/vllm/v1/sample/ops/bad_words.py
@@ -23,7 +23,8 @@ def _apply_bad_words_single_batch(
         assert len(actual_prefix) == len(expected_prefix)
 
         if actual_prefix == expected_prefix:
-            logits[last_token_id] = _SMALLEST_LOGIT
+            # Assign to slice to avoid cpu->gpu sync.
+            logits[last_token_id : last_token_id + 1] = _SMALLEST_LOGIT
 
 
 def apply_bad_words(
diff --git a/vllm/v1/sample/ops/logprobs.py b/vllm/v1/sample/ops/logprobs.py
index cf36d46e13fd..9cdcfa456a80 100644
--- a/vllm/v1/sample/ops/logprobs.py
+++ b/vllm/v1/sample/ops/logprobs.py
@@ -7,7 +7,7 @@
 from vllm.platforms import current_platform
 
 
-@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
+@torch.compile(backend=current_platform.simple_compile_backend)
 def batched_count_greater_than(x: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
     """
     Counts elements in each row of x that are greater than the corresponding
@@ -22,4 +22,6 @@ def batched_count_greater_than(x: torch.Tensor, values: torch.Tensor) -> torch.T
     Returns:
         torch.Tensor: A 1D tensor of shape (batch_size,) with the counts.
     """
+    torch._check(x.shape[0] >= 1)
+    torch._check(x.shape[0] == values.shape[0])
     return (x >= values).sum(-1)
diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py
index 33f7090e4e3d..1fc5f6e8ccf0 100644
--- a/vllm/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm/v1/sample/ops/topk_topp_sampler.py
@@ -41,23 +41,35 @@ def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs") -> None:
 
                 capability = current_platform.get_device_capability()
                 assert capability is not None
-                if not FlashInferBackend.supports_compute_capability(capability):
+                if FlashInferBackend.supports_compute_capability(capability):
+                    logger.info_once(
+                        "Using FlashInfer for top-p & top-k sampling.",
+                        scope="global",
+                    )
+                    self.forward = self.forward_cuda
+                elif envs.is_set("VLLM_USE_FLASHINFER_SAMPLER"):
+                    # User explicitly opted in but the GPU can't run FlashInfer.
                     capability_str = capability.as_version_str()
                     raise RuntimeError(
                         "FlashInfer does not support compute capability "
                         f"{capability_str}, unset VLLM_USE_FLASHINFER_SAMPLER=1."
                     )
-                # Users must opt in explicitly via VLLM_USE_FLASHINFER_SAMPLER=1.
-                logger.info_once(
-                    "Using FlashInfer for top-p & top-k sampling.",
-                    scope="global",
-                )
-                self.forward = self.forward_cuda
+                else:
+                    # Default-on path; hardware can't run FlashInfer →
+                    # quietly fall back to the PyTorch-native sampler
+                    # instead of failing server startup.
+                    logger.warning_once(
+                        "FlashInfer top-p/top-k sampling not supported on "
+                        "compute capability %s; falling back to PyTorch-native "
+                        "sampler. Set VLLM_USE_FLASHINFER_SAMPLER=0 to silence.",
+                        capability.as_version_str(),
+                    )
+                    self.forward = self.forward_native
             else:
-                logger.debug_once(
-                    "FlashInfer top-p/top-k sampling is available but disabled "
-                    "by default. Set VLLM_USE_FLASHINFER_SAMPLER=1 to opt in "
-                    "after verifying accuracy for your workloads."
+                # User explicitly set VLLM_USE_FLASHINFER_SAMPLER=0.
+                logger.info_once(
+                    "FlashInfer top-p/top-k sampling disabled via "
+                    "VLLM_USE_FLASHINFER_SAMPLER=0; using PyTorch-native sampler."
                 )
                 self.forward = self.forward_native
 
@@ -70,6 +82,11 @@ def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs") -> None:
                 self.forward = self.forward_native
             else:
                 self.forward = self.forward_cpu
+        elif current_platform.is_xpu():
+            if envs.VLLM_XPU_USE_SAMPLER_KERNEL:
+                self.forward = self.forward_xpu
+            else:
+                self.forward = self.forward_native
         elif (
             logprobs_mode not in ("processed_logits", "processed_logprobs")
             and rocm_aiter_ops.is_enabled()
@@ -120,9 +137,9 @@ def forward_cuda(
         p: torch.Tensor | None,
     ) -> tuple[torch.Tensor, torch.Tensor | None]:
         """More optimized implementation for top-k and top-p sampling."""
-        # We prefer `random_sample` over `flashinfer_sample` when sorting is
-        # not needed. This is because `random_sample` does not require
-        # CPU-GPU synchronization while `flashinfer_sample` does.
+        # Fall back to the PyTorch-native path when FlashInfer has nothing
+        # to do (no top-k / top-p filter) or when per-request generators
+        # are present (unsupported by FlashInfer 0.2.3+).
         if (k is None and p is None) or generators:
             if generators:
                 logger.debug_once(
@@ -231,6 +248,54 @@ def aiter_sample(
             return torch.multinomial(renorm_probs, num_samples=1).view(-1)
         raise RuntimeError("aiter_sample was called with no active top-k or top-p.")
 
+    def forward_xpu(
+        self,
+        logits: torch.Tensor,
+        generators: dict[int, torch.Generator],
+        k: torch.Tensor | None,
+        p: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        if generators:
+            logger.warning_once(
+                "xpu kernel topk_topp_sampler does not support "
+                "per-request generators. Falling back to "
+                "PyTorch-native implementation."
+            )
+            return self.forward_native(logits, generators, k, p)
+        random_sampled = torch.empty(
+            logits.shape[0], dtype=torch.int64, device=logits.device
+        )
+        logits_to_return = None
+        if (
+            self.logprobs_mode == "processed_logits"
+            or self.logprobs_mode == "processed_logprobs"
+        ):
+            logits_to_return = torch.empty_like(logits)
+
+        assert len(generators) != logits.shape[0], (
+            "xpu kernel topk_topp_sampler does not support batch-wise generators."
+        )
+        generator = torch.xpu.default_generators[logits.device.index]
+
+        state = generator.get_state()
+        seed, offset = state.view(torch.int64)
+        seeds = torch.tensor(
+            [seed, offset], dtype=torch.int64, device=torch.device("cpu")
+        )
+        # The XPU kernel expects k as int64 (Long), but the input batch
+        # stores top_k as int32. Cast here to avoid dtype mismatch.
+        if k is not None:
+            k = k.to(torch.int64)
+        torch.ops.vllm.xpu_topk_topp_sampler(
+            random_sampled, logits_to_return, logits, k, p, self.logprobs_mode, seeds
+        )
+        # The custom XPU sampler kernel consumes RNG values internally, so advance
+        # the default generator's offset to keep future draws deterministic.
+        offset += logits.numel()
+        state.view(torch.int64)[1] = offset
+        generator.set_state(state)
+        return random_sampled, logits_to_return
+
 
 # Note: this is a workaround for
 # https://github.com/pytorch/pytorch/pull/151218
@@ -248,6 +313,10 @@ def apply_top_k_top_p(
     if p is None and k is None:
         return logits
 
+    # Keep CPU logits on the PyTorch path to avoid invoking Triton kernels.
+    if current_platform.is_cpu():
+        return apply_top_k_top_p_pytorch(logits, k, p, allow_cpu_sync=True)
+
     if HAS_TRITON and logits.shape[0] >= 8:
         return apply_top_k_top_p_triton(logits, k, p)
 
@@ -361,10 +430,6 @@ def flashinfer_sample(
     NOTE: The outputs of this function do not necessarily match the outputs of
     the `random_sample` function. It only guarantees that the outputs are
     statistically equivalent.
-
-    NOTE: This function includes CPU-GPU synchronization, while `random_sample`
-    does not. Call this function at the end of the forward pass to minimize
-    the synchronization overhead.
     """
     import flashinfer
 
diff --git a/vllm/v1/sample/ops/topk_topp_triton.py b/vllm/v1/sample/ops/topk_topp_triton.py
index 4c7c3e99d44b..fee87883c968 100644
--- a/vllm/v1/sample/ops/topk_topp_triton.py
+++ b/vllm/v1/sample/ops/topk_topp_triton.py
@@ -93,6 +93,7 @@ def _update_min_larger_stats(data, above_mask, min_larger, num_min_larger, senti
 @triton.jit
 def _topk_topp_kernel(
     LOGITS,
+    LOGITS_STRIDE_0,
     BUFFER,
     PERCENTILE_TO_STD_TABLE,
     NORMAL_CDF_TO_SIGMA_TABLE,
@@ -110,7 +111,7 @@ def _topk_topp_kernel(
     pid = tl.program_id(0)
     num_programs = tl.num_programs(0)
     for row_id in tl.range(pid, BATCH_SIZE, num_programs):
-        LOGITS_ROW = LOGITS + row_id * VOCAB_SIZE
+        LOGITS_ROW = LOGITS + row_id * LOGITS_STRIDE_0
         BUFFER_ROW = BUFFER + pid * VOCAB_SIZE
 
         final_pivot = -float("inf")
@@ -975,26 +976,31 @@ def apply_top_k_top_p_triton(
     to the remaining k values (by probability).
 
     Args:
-        logits: [batch_size, vocab_size] float32 tensor, modified in-place
+        logits: [batch_size, vocab_size] float32 tensor. The returned tensor
+            may alias this input or be a new contiguous tensor for unsupported
+            layouts.
         k: [batch_size] int32 tensor of top-k values per row, or None to disable top-k
         p: [batch_size] float32 tensor of top-p values per row (0 to 1),
             or None to disable top-p
         mask_value: Value for masked positions (default: -inf)
 
     Returns:
-        The logits tensor (modified in-place)
+        The masked logits tensor. It may or may not be modified in-place.
     """
     assert logits.ndim == 2
     assert logits.dtype == torch.float32
-
     batch_size, vocab_size = logits.shape
-
     topk_enabled = k is not None
     topp_enabled = p is not None
 
     if batch_size == 0 or not (topk_enabled or topp_enabled):
         return logits
 
+    # The Triton kernel supports arbitrary row strides, but it still assumes
+    # the vocab dimension is laid out contiguously within each row.
+    if logits.stride(1) != 1:
+        logits = logits.contiguous()
+
     if k is not None:
         assert k.ndim == 1 and k.shape[0] == batch_size
         k_ptr = k.to(torch.int32)
@@ -1034,6 +1040,7 @@ def apply_top_k_top_p_triton(
 
     _topk_topp_kernel[(NUM_PROGRAMS,)](
         logits,
+        logits.stride(0),
         buffer,
         percentile_to_std_table,
         normal_cdf_to_sigma_table,
diff --git a/vllm/v1/sample/rejection_sampler.py b/vllm/v1/sample/rejection_sampler.py
index d3e8573458b1..678654cb78a4 100644
--- a/vllm/v1/sample/rejection_sampler.py
+++ b/vllm/v1/sample/rejection_sampler.py
@@ -1,8 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 from collections.abc import Sequence
 from dataclasses import replace
+from typing import TYPE_CHECKING
 
 import torch
 import torch.nn as nn
@@ -17,6 +20,10 @@
 from vllm.v1.sample.ops.topk_topp_sampler import apply_top_k_top_p
 from vllm.v1.sample.sampler import Sampler
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
+from vllm.v1.spec_decode.utils import unconditional_to_conditional_rates
+
+if TYPE_CHECKING:
+    from vllm.config.speculative import SpeculativeConfig
 
 logger = init_logger(__name__)
 
@@ -50,13 +57,33 @@ class RejectionSampler(nn.Module):
         output tokens = accepted tokens + recovered tokens + bonus tokens
     """
 
-    def __init__(self, sampler: Sampler):
+    def __init__(
+        self,
+        sampler: Sampler,
+        spec_config: SpeculativeConfig | None = None,
+        device: torch.device | None = None,
+    ):
         super().__init__()
         self.sampler = sampler
         logprobs_mode = self.sampler.logprobs_mode
         self.is_processed_logprobs_mode = logprobs_mode.startswith("processed")
         self.is_logits_logprobs_mode = logprobs_mode.endswith("logits")
 
+        self.synthetic_conditional_rates: torch.Tensor | None = None
+        if (
+            spec_config is not None
+            and spec_config.rejection_sample_method == "synthetic"
+        ):
+            assert spec_config.synthetic_acceptance_rates is not None
+            self.synthetic_conditional_rates = torch.tensor(
+                unconditional_to_conditional_rates(
+                    spec_config.synthetic_acceptance_rates
+                ),
+                dtype=torch.float32,
+                device=device,
+            )
+        self.synthetic_mode = self.synthetic_conditional_rates is not None
+
     def forward(
         self,
         metadata: SpecDecodeMetadata,
@@ -147,6 +174,8 @@ def forward(
             target_logits,
             bonus_token_ids,
             sampling_metadata,
+            synthetic_mode=self.synthetic_mode,
+            synthetic_conditional_rates=self.synthetic_conditional_rates,
         )
 
         logprobs_tensors = None
@@ -261,16 +290,24 @@ def apply_logits_processors(
         any_penalties_or_bad_words = (
             sampling_metadata.bad_words_token_ids or has_penalties
         )
+        holder = sampling_metadata.thinking_budget_state_holder
+        needs_thinking = holder is not None and holder.has_tracked_requests()
 
         output_token_ids = sampling_metadata.output_token_ids
-        if any_penalties_or_bad_words:
+        if any_penalties_or_bad_words or needs_thinking:
             output_token_ids = self._combine_outputs_with_spec_tokens(
                 output_token_ids,
                 sampling_metadata.spec_token_ids,
             )
 
         # Calculate indices of target logits.
-        if sampling_metadata.allowed_token_ids_mask is not None or has_penalties:
+        repeat_indices: torch.Tensor | None = None
+        need_repeat_indices = (
+            sampling_metadata.allowed_token_ids_mask is not None
+            or has_penalties
+            or needs_thinking
+        )
+        if need_repeat_indices:
             num_requests = len(metadata.num_draft_tokens)
             num_draft_tokens = torch.tensor(metadata.num_draft_tokens, device="cpu")
             original_indices = torch.arange(num_requests, device="cpu")
@@ -298,7 +335,12 @@ def apply_logits_processors(
                 logits = processor.apply_with_spec_decode(
                     logits, metadata.num_draft_tokens
                 )
-
+        if holder is not None and holder.has_tracked_requests():
+            logits = holder.apply_to_logits(
+                logits,
+                predict_bonus_token=False,
+                spec_token_ids=sampling_metadata.spec_token_ids,
+            )
         return logits
 
     @staticmethod
@@ -362,6 +404,8 @@ def rejection_sample(
     # [batch_size, 1]
     bonus_token_ids: torch.Tensor,
     sampling_metadata: SamplingMetadata,
+    synthetic_mode: bool = False,
+    synthetic_conditional_rates: torch.Tensor | None = None,
 ) -> torch.Tensor:
     assert draft_token_ids.ndim == 1
     assert draft_probs is None or draft_probs.ndim == 2
@@ -389,6 +433,20 @@ def rejection_sample(
         is_greedy = None
     else:
         is_greedy = sampling_metadata.temperature == GREEDY_TEMPERATURE
+
+    # Generate uniform probabilities before either kernel because synthetic
+    # mode needs them in the greedy kernel too.  Skip only when all requests
+    # are greedy *and* synthetic mode is off (the standard fast-path).
+    # [num_tokens]
+    uniform_probs: torch.Tensor | None = None
+    if synthetic_mode or not sampling_metadata.all_greedy:
+        uniform_probs = generate_uniform_probs(
+            num_tokens,
+            num_draft_tokens,
+            sampling_metadata.generators,
+            device,
+        )
+
     if not sampling_metadata.all_random:
         # Rejection sampling for greedy sampling requests.
         target_argmax = target_logits.argmax(dim=-1)
@@ -400,6 +458,9 @@ def rejection_sample(
             bonus_token_ids,
             is_greedy,
             max_spec_len,
+            uniform_probs,
+            synthetic_conditional_rates,
+            SYNTHETIC_MODE=synthetic_mode,
         )
         if sampling_metadata.all_greedy:
             return output_token_ids
@@ -408,15 +469,6 @@ def rejection_sample(
     target_probs = target_logits.softmax(dim=-1, dtype=torch.float32)
     assert target_probs.is_contiguous()
 
-    # Generate uniform probabilities for rejection sampling.
-    # [num_tokens]
-    uniform_probs = generate_uniform_probs(
-        num_tokens,
-        num_draft_tokens,
-        sampling_metadata.generators,
-        device,
-    )
-
     # Sample recovered tokens for each position.
     # [num_tokens]
     recovered_token_ids = sample_recovered_tokens(
@@ -431,6 +483,7 @@ def rejection_sample(
     )
 
     # Rejection sampling for random sampling requests.
+    assert uniform_probs is not None
     rejection_random_sample_kernel[(batch_size,)](
         output_token_ids,
         cu_num_draft_tokens,
@@ -443,7 +496,9 @@ def rejection_sample(
         is_greedy,
         max_spec_len,
         vocab_size,
+        synthetic_conditional_rates,
         NO_DRAFT_PROBS=draft_probs is None,
+        SYNTHETIC_MODE=synthetic_mode,
     )
     return output_token_ids
 
@@ -658,6 +713,9 @@ def rejection_greedy_sample_kernel(
     bonus_token_ids_ptr,  # [batch_size]
     is_greedy_ptr,  # [batch_size] or None
     max_spec_len,
+    uniform_probs_ptr,  # [num_tokens] or None (synthetic mode only)
+    synthetic_conditional_rates_ptr,  # [num_speculative_tokens] or None
+    SYNTHETIC_MODE: tl.constexpr,
 ):
     req_idx = tl.program_id(0)
     # FIXME(woosuk): Because is_greedy_ptr is not None at profiling run,
@@ -675,14 +733,20 @@ def rejection_greedy_sample_kernel(
     for pos in range(num_draft_tokens):
         if not rejected:
             draft_token_id = tl.load(draft_token_ids_ptr + start_idx + pos)
-            target_argmax_id = tl.load(target_argmax_ptr + start_idx + pos)
+            target_argmax_id = tl.load(target_argmax_ptr + start_idx + pos).to(tl.int32)
+            if SYNTHETIC_MODE:
+                uniform_prob = tl.load(uniform_probs_ptr + start_idx + pos)
+                rate = tl.load(synthetic_conditional_rates_ptr + pos)
+                accepted = uniform_prob < rate
+                token_id = draft_token_id if accepted else target_argmax_id
+                rejected = not accepted
+            else:
+                token_id = target_argmax_id
+                rejected = draft_token_id != target_argmax_id
             tl.store(
                 output_token_ids_ptr + req_idx * (max_spec_len + 1) + pos,
-                target_argmax_id,
+                token_id,
             )
-            if draft_token_id != target_argmax_id:
-                # Reject.
-                rejected = True
 
     if not rejected:
         # If all tokens are accepted, append the bonus token.
@@ -707,7 +771,9 @@ def rejection_random_sample_kernel(
     is_greedy_ptr,  # [batch_size]
     max_spec_len,
     vocab_size,
+    synthetic_conditional_rates_ptr,  # [num_speculative_tokens] or None
     NO_DRAFT_PROBS: tl.constexpr,
+    SYNTHETIC_MODE: tl.constexpr,
 ):
     req_idx = tl.program_id(0)
     is_greedy = tl.load(is_greedy_ptr + req_idx)
@@ -723,23 +789,28 @@ def rejection_random_sample_kernel(
     for pos in range(num_draft_tokens):
         if not rejected:
             draft_token_id = tl.load(draft_token_ids_ptr + start_idx + pos)
-            if NO_DRAFT_PROBS:
-                draft_prob = 1
+            uniform_prob = tl.load(uniform_probs_ptr + start_idx + pos)
+            if SYNTHETIC_MODE:
+                rate = tl.load(synthetic_conditional_rates_ptr + pos)
+                accepted = uniform_prob < rate
             else:
-                draft_prob = tl.load(
-                    draft_probs_ptr + (start_idx + pos) * vocab_size + draft_token_id
+                if NO_DRAFT_PROBS:
+                    draft_prob = 1
+                else:
+                    draft_prob = tl.load(
+                        draft_probs_ptr
+                        + (start_idx + pos) * vocab_size
+                        + draft_token_id
+                    )
+                target_prob = tl.load(
+                    target_probs_ptr + (start_idx + pos) * vocab_size + draft_token_id
                 )
-            target_prob = tl.load(
-                target_probs_ptr + (start_idx + pos) * vocab_size + draft_token_id
-            )
-            uniform_prob = tl.load(uniform_probs_ptr + start_idx + pos)
-            # NOTE(woosuk): While the draft probability should never be 0,
-            # we check it to avoid NaNs. If it happens to be 0, we reject.
-            if draft_prob > 0 and target_prob / draft_prob >= uniform_prob:
-                # Accept.
+                # NOTE(woosuk): While the draft probability should never be 0,
+                # we check it to avoid NaNs. If it happens to be 0, we reject.
+                accepted = draft_prob > 0 and target_prob / draft_prob >= uniform_prob
+            if accepted:
                 token_id = draft_token_id
             else:
-                # Reject. Use recovered token.
                 rejected = True
                 token_id = tl.load(recovered_token_ids_ptr + start_idx + pos)
             tl.store(
diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py
index 3840a70689b3..9ac3821a3261 100644
--- a/vllm/v1/sample/sampler.py
+++ b/vllm/v1/sample/sampler.py
@@ -77,7 +77,8 @@ def forward(
         # This is different from the V0 sampler, which uses the logits that
         # is used for sampling (after penalties and temperature scaling).
         num_logprobs = sampling_metadata.max_num_logprobs
-        if num_logprobs is not None:
+        raw_logprobs: torch.Tensor | None = None
+        if num_logprobs is not None or sampling_metadata.logprob_token_ids:
             if logprobs_mode == "raw_logprobs":
                 raw_logprobs = self.compute_logprobs(logits)
             elif logprobs_mode == "raw_logits":
@@ -102,8 +103,17 @@ def forward(
         # return int32 (while PyTorch argmax and topk return int64).
         sampled = sampled.long()
 
+        # Handle logprob_token_ids if specified (more efficient than full vocab)
+        # This is used by generative_scoring API to get logprobs for specific tokens
+        logprob_token_ids_tensors = None
+        if sampling_metadata.logprob_token_ids:
+            assert raw_logprobs is not None
+            logprob_token_ids_tensors = self.gather_specific_token_logprobs(
+                raw_logprobs, sampling_metadata.logprob_token_ids, sampled
+            )
+
         if num_logprobs is None:
-            logprobs_tensors = None
+            logprobs_tensors = logprob_token_ids_tensors
         elif num_logprobs == -1:
             # Return the full unsorted and unranked logprobs.
             logprobs_tensors = LogprobsTensors(
@@ -115,6 +125,11 @@ def forward(
                 raw_logprobs, num_logprobs, token_ids=sampled
             )
 
+        # If we have both num_logprobs and logprob_token_ids, prefer
+        # logprob_token_ids as it's more specific
+        if logprob_token_ids_tensors is not None and num_logprobs is not None:
+            logprobs_tensors = logprob_token_ids_tensors
+
         # Use int32 to reduce the tensor size.
         sampled = sampled.to(torch.int32)
 
@@ -128,6 +143,82 @@ def forward(
         )
         return sampler_output
 
+    def gather_specific_token_logprobs(
+        self,
+        logprobs: torch.Tensor,
+        logprob_token_ids: dict[int, list[int]],
+        sampled: torch.Tensor,
+    ) -> LogprobsTensors | None:
+        """Gather logprobs for specific token IDs requested per request.
+
+        Used by the generative_scoring API to return logprobs for an explicit
+        set of token ids rather than the top-k. Handles heterogeneous token
+        id lists across requests by padding shorter lists to the max length.
+
+        Args:
+            logprobs: [batch_size, vocab_size] tensor of (raw) logprobs to
+                gather from.
+            logprob_token_ids: dict mapping req_index -> list of token IDs
+            sampled: [batch_size] tensor of sampled token IDs
+
+        Returns:
+            LogprobsTensors with logprobs for the specified tokens, or None
+            if no requests have logprob_token_ids.
+        """
+        if not logprob_token_ids:
+            return None
+
+        batch_size = logprobs.shape[0]
+        device = logprobs.device
+
+        # Find max number of tokens across all requests
+        max_num_tokens = max(len(tids) for tids in logprob_token_ids.values())
+        pin = self.pin_memory
+
+        # Build the padded token_ids and valid_mask matrices on pinned CPU,
+        # then upload non-blocking.
+        token_ids_cpu = torch.zeros(
+            batch_size, max_num_tokens + 1, dtype=torch.int64, pin_memory=pin
+        )
+        # Create mask for valid positions (True = valid, False = padded)
+        valid_mask_cpu = torch.zeros(
+            batch_size, max_num_tokens + 1, dtype=torch.bool, pin_memory=pin
+        )
+        valid_mask_cpu[:, 0] = True  # Sampled token is always valid
+        for req_idx, token_ids in logprob_token_ids.items():
+            num_tokens = len(token_ids)
+            token_ids_cpu[req_idx, 1 : num_tokens + 1] = torch.as_tensor(
+                token_ids, dtype=torch.int64
+            )
+            valid_mask_cpu[req_idx, 1 : num_tokens + 1] = True
+
+        token_ids_tensor = token_ids_cpu.to(device, non_blocking=True)
+        valid_mask = valid_mask_cpu.to(device, non_blocking=True)
+        # Sampled token in column 0 — fill on-device from the sampled GPU
+        # tensor so we don't need to D2H + re-upload.
+        token_ids_tensor[:, 0] = sampled
+
+        # Gather logprobs at the requested token ids.
+        gathered_logprobs = logprobs.gather(-1, token_ids_tensor)
+
+        # Mask invalid (padded) positions with -inf
+        gathered_logprobs = gathered_logprobs.masked_fill(~valid_mask, float("-inf"))
+
+        # Compute ranks for the sampled token. log_softmax is monotonic w.r.t.
+        # the original logits, so ranks computed from logprobs are equivalent.
+        sampled_logprobs = logprobs.gather(-1, sampled.unsqueeze(-1))
+        # Avoid 0/1 specialization recompile on the batch dimension of the
+        # compiled batched_count_greater_than. See gather_logprobs for context.
+        torch._dynamo.decorators.mark_unbacked(logprobs, 0)
+        torch._dynamo.decorators.mark_unbacked(sampled_logprobs, 0)
+        token_ranks = batched_count_greater_than(logprobs, sampled_logprobs)
+
+        return LogprobsTensors(
+            logprob_token_ids=token_ids_tensor.to(torch.int32),
+            logprobs=gathered_logprobs,
+            selected_token_ranks=token_ranks,
+        )
+
     @staticmethod
     def apply_temperature(
         logits: torch.Tensor,
@@ -164,7 +255,10 @@ def sample(
             greedy_sampled = self.greedy_sample(logits)
             if sampling_metadata.all_greedy:
                 processed_logprobs = None
-                if sampling_metadata.max_num_logprobs is not None:
+                if (
+                    sampling_metadata.max_num_logprobs is not None
+                    or sampling_metadata.logprob_token_ids
+                ):
                     if logprobs_mode == "processed_logits":
                         processed_logprobs = logits
                     elif logprobs_mode == "processed_logprobs":
@@ -239,6 +333,12 @@ def gather_logprobs(
         token_logprobs = logprobs.gather(-1, token_ids)
 
         # Compute the ranks of the actual token.
+        # Avoid 0/1 specialization recompile on the batch dimension
+        # of the compiled batched_count_greater_than. mark_unbacked makes
+        # the size fully symbolic so dynamo doesn't specialize when
+        # batch_size transitions from 1 to >=2.
+        torch._dynamo.decorators.mark_unbacked(logprobs, 0)
+        torch._dynamo.decorators.mark_unbacked(token_logprobs, 0)
         token_ranks = batched_count_greater_than(logprobs, token_logprobs)
 
         # Concatenate together with the topk.
@@ -273,9 +373,13 @@ def apply_logits_processors(
         any_penalties_or_bad_words = (
             bool(bad_words_token_ids) or not sampling_metadata.no_penalties
         )
+        holder = sampling_metadata.thinking_budget_state_holder
+        needs_thinking_combine = holder is not None and holder.has_tracked_requests()
 
         output_token_ids = sampling_metadata.output_token_ids
-        if predict_bonus_token and any_penalties_or_bad_words:
+        if predict_bonus_token and (
+            any_penalties_or_bad_words or needs_thinking_combine
+        ):
             # Combine base outputs with spec tokens when speculative decoding
             # is enabled.
             output_token_ids = self._combine_outputs_with_spec_tokens(
@@ -297,6 +401,17 @@ def apply_logits_processors(
 
         # Apply penalties (e.g., freq_penalties).
         logits = self.apply_penalties(logits, sampling_metadata, output_token_ids)
+        if holder is not None and holder.has_tracked_requests():
+            holder.update_state(
+                output_token_ids,
+                sampling_metadata.spec_token_ids,
+                repeat_indices=None,
+            )
+            logits = holder.apply_to_logits(
+                logits,
+                predict_bonus_token,
+                sampling_metadata.spec_token_ids,
+            )
         return logits
 
     @staticmethod
diff --git a/vllm/v1/sample/thinking_budget_state.py b/vllm/v1/sample/thinking_budget_state.py
new file mode 100644
index 000000000000..ca5e2b66e03a
--- /dev/null
+++ b/vllm/v1/sample/thinking_budget_state.py
@@ -0,0 +1,524 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Per-batch thinking token budget state; applied after penalties at sample time."""
+
+from typing import TYPE_CHECKING, Any
+
+import torch
+
+from vllm.utils.torch_utils import async_tensor_h2d
+from vllm.v1.sample.logits_processor.interface import (
+    BatchUpdate,
+    MoveDirectionality,
+)
+
+if TYPE_CHECKING:
+    from vllm.config.reasoning import ReasoningConfig
+
+
+def maybe_create_thinking_budget_state_holder(
+    reasoning_config: "ReasoningConfig | None",
+    max_num_seqs: int,
+    num_spec_tokens: int,
+    device: torch.device,
+    is_pin_memory: bool,
+) -> "ThinkingBudgetStateHolder | None":
+    if reasoning_config is None:
+        return None
+    return ThinkingBudgetStateHolder(
+        reasoning_config, max_num_seqs, num_spec_tokens, device, is_pin_memory
+    )
+
+
+class ThinkingBudgetStateHolder:
+    """Tracks thinking sections and forces end tokens when budget is exceeded."""
+
+    think_start_token_ids: list[int]
+    think_end_token_ids: list[int]
+
+    def __init__(
+        self,
+        reasoning_config: "ReasoningConfig | None",
+        max_num_seqs: int,
+        num_spec_tokens: int,
+        device: torch.device,
+        is_pin_memory: bool,
+    ):
+        _ = is_pin_memory  # API parity with logits processors
+        max_num_reqs = max_num_seqs
+        self.in_spec_mode = num_spec_tokens > 0
+        self.num_spec_tokens = num_spec_tokens
+
+        # No separate enable flag: a non-``None`` ``reasoning_config`` is the switch.
+        self.is_enabled = reasoning_config is not None
+
+        if reasoning_config is None:
+            self.think_start_token_ids = []
+            self.think_end_token_ids = []
+        else:
+            rs = reasoning_config.reasoning_start_token_ids
+            re = reasoning_config.reasoning_end_token_ids
+            self.think_start_token_ids = rs if rs else []
+            self.think_end_token_ids = re if re else []
+
+        self.device = device
+        self._state: dict[int, dict[str, Any]] = {}
+        self.cu_num_tokens: dict[int, int] = {}
+
+        if self.num_spec_tokens > 0:
+            self._mask_capacity = max_num_reqs * (self.num_spec_tokens + 1)
+        else:
+            self._mask_capacity = max_num_reqs
+
+    def has_tracked_requests(self) -> bool:
+        """True when ``sync_batch`` has state for a ``thinking_token_budget`` row.
+
+        Used to decide whether sampling needs output-token rows and spec combining;
+        distinct from merely having a holder instance (reasoning may be on with no
+        budgeted requests in this batch).
+        """
+        return bool(self._state)
+
+    def sync_batch(self, batch_update: BatchUpdate | None) -> None:
+        """Add/remove/move per-request state only (no _update_think_state)."""
+        if not self.is_enabled or not batch_update:
+            return
+        for index in batch_update.removed:
+            self._state.pop(index, None)
+
+        for index, params, prompt_tok_ids, output_tok_ids in batch_update.added:
+            thinking_token_budget = params.thinking_token_budget
+            if thinking_token_budget is not None:
+                self._state[index] = self._init_state_entry(
+                    prompt_tok_ids, thinking_token_budget
+                )
+                self._state[index]["output_tok_ids"] = output_tok_ids
+                self._state[index]["spec_token_ids"] = []
+            else:
+                self._state.pop(index, None)
+
+        for i1, i2, direction in batch_update.moved:
+            if direction == MoveDirectionality.SWAP:
+                state1 = self._state.get(i1)
+                state2 = self._state.get(i2)
+                if state1 is not None:
+                    self._state[i2] = state1
+                if state2 is not None:
+                    self._state[i1] = state2
+            else:
+                state = self._state.pop(i1, None)
+                if state is not None:
+                    self._state[i2] = state
+
+    def update_state(
+        self,
+        output_token_ids: list[list[int]],
+        spec_token_ids: list[list[int]] | None,
+        repeat_indices: torch.Tensor | None = None,
+    ) -> None:
+        """Refresh output/spec from sampling rows and recompute think state."""
+        if not self.is_enabled or not self._state:
+            return
+
+        spec_lists = spec_token_ids or []
+        last_row_for_req: dict[int, int] | None = None
+        if repeat_indices is not None:
+            last_row_for_req = {}
+            rpt = repeat_indices.cpu().tolist()
+            for batch_row, req_i in enumerate(rpt):
+                last_row_for_req[req_i] = batch_row
+
+        for seq_idx, state in list(self._state.items()):
+            if last_row_for_req is not None:
+                output_row: int | None = last_row_for_req.get(seq_idx)
+                if output_row is None or output_row >= len(output_token_ids):
+                    continue
+                state["output_tok_ids"] = output_token_ids[output_row]
+            elif seq_idx >= len(output_token_ids):
+                continue
+            else:
+                state["output_tok_ids"] = output_token_ids[seq_idx]
+            if seq_idx < len(spec_lists):
+                state["spec_token_ids"] = list(spec_lists[seq_idx])
+            else:
+                state["spec_token_ids"] = []
+            state["in_spec_mode"] = self.in_spec_mode
+            state["force_index"] = []
+            if len(state["output_tok_ids"]) > 0:
+                spec_len = len(state["spec_token_ids"])
+                # Only strip draft suffix when there are spec tokens; ``[:-0]`` would
+                # clear the whole list (Python treats stop index 0 as "up to empty").
+                if spec_len > 0 and len(state["output_tok_ids"]) >= spec_len:
+                    state["output_tok_ids"] = state["output_tok_ids"][:-spec_len]
+            self._update_think_state(state)
+
+    def apply_to_logits(
+        self,
+        logits: torch.Tensor,
+        predict_bonus_token: bool,
+        spec_token_ids: list[list[int]] | None,
+    ) -> torch.Tensor:
+        """Mask and bump logits for forced end-of-thinking tokens."""
+        if not self.is_enabled or not self._state:
+            return logits
+        spec_lists = spec_token_ids or []
+        return self._apply_forcing_to_logits(logits, predict_bonus_token, spec_lists)
+
+    @staticmethod
+    def _find_last_sequence_index(target_list: list[int], token_ids: list[int]) -> int:
+        if not token_ids:
+            return -1
+        for i in range(len(target_list) - len(token_ids), -1, -1):
+            if target_list[i : i + len(token_ids)] == token_ids:
+                return i
+        return -1
+
+    def _init_state_entry(
+        self, prompt_tok_ids: list[int] | None, thinking_token_budget: int
+    ) -> dict[str, Any]:
+        if prompt_tok_ids is None:
+            last_start = -1
+            last_end = -1
+            in_think = False
+            think_count = 0
+            start_thinking = -1
+            countdown = thinking_token_budget
+            continue_thinking = False
+            in_end = False
+        else:
+            start_thinking = -1
+            countdown = thinking_token_budget
+            continue_thinking = False
+            in_end = False
+            last_start = self._find_last_sequence_index(
+                prompt_tok_ids, self.think_start_token_ids
+            )
+            last_end = self._find_last_sequence_index(
+                prompt_tok_ids, self.think_end_token_ids
+            )
+            in_think = last_start > last_end
+            # load metrics such as think count, start thinking
+            # if request is in thinking mode, already
+            if in_think:
+                think_count = len(prompt_tok_ids) - (
+                    last_start + len(self.think_start_token_ids)
+                )
+                start_thinking = len(prompt_tok_ids) - think_count - 1
+                countdown -= think_count
+                continue_thinking = True
+                # check if the token is exhausted within prompt
+                token_exhausted = thinking_token_budget - think_count
+                in_end = token_exhausted <= 0
+            else:
+                think_count = 0
+
+        return {
+            "in_think": in_think,
+            "in_end": in_end,
+            "check_count_down": countdown,
+            "think_count": think_count,
+            "end_count": 0,
+            "prompt_tok_ids": prompt_tok_ids,
+            "output_tok_ids": [],
+            "thinking_token_budget": thinking_token_budget,
+            "prev_output_length": 0,
+            "spec_token_ids": [],
+            "force_index": [],
+            "start_thinking": start_thinking,
+            "end_thinking": -1,
+            "in_spec_mode": False,
+            "bonus_token_forced": False,
+            "continue_thinking": continue_thinking,
+        }
+
+    def _update_think_state(self, state: dict[str, Any]) -> None:
+        if state.get("thinking_token_budget", -1) == -1:
+            return
+        if len(self.think_end_token_ids) == 0:
+            state["thinking_token_budget"] = -1
+            state["in_end"] = False
+            state["force_index"] = []
+            return
+
+        if state["start_thinking"] == -1:
+            start_thinking = self._find_last_sequence_index(
+                state.get("output_tok_ids", []), self.think_start_token_ids
+            )
+            state["start_thinking"] = start_thinking
+        if state["end_thinking"] == -1:
+            end_thinking = self._find_last_sequence_index(
+                state.get("output_tok_ids", []), self.think_end_token_ids
+            )
+            state["end_thinking"] = end_thinking
+
+        if state["start_thinking"] == -1:
+            return
+
+        if state["continue_thinking"]:
+            sampled_tokens_from_previous_step = len(
+                state.get("output_tok_ids", [])
+            ) - state.get("prev_output_length", 0)
+        else:
+            if state["prev_output_length"] == 0:
+                sampled_tokens_from_previous_step = len(
+                    state.get("output_tok_ids", [])
+                ) - len(self.think_start_token_ids)
+            else:
+                sampled_tokens_from_previous_step = (
+                    len(state.get("output_tok_ids", [])) - state["prev_output_length"]
+                )
+        current_step_countdown = (
+            state["check_count_down"] - sampled_tokens_from_previous_step
+        )
+        predicted_countdown = current_step_countdown - len(state["spec_token_ids"]) - 1
+        # We only proceed further if we have counted down the thinking budget
+        # to 0 or less and when we are in the "in think" mode.
+        if (
+            not state.get("in_end", False)
+            and predicted_countdown >= 0
+            and state["start_thinking"] > -1
+        ):
+            state["check_count_down"] = current_step_countdown
+            state["prev_output_length"] = len(state.get("output_tok_ids", []))
+            return
+        output = state.get("output_tok_ids", [])
+        if not output:
+            # When in_end was set at init (budget=0, prompt already in think),
+            # we must force the first generated token to be the end token;
+            # otherwise apply() sees in_end=True but force_index=[] and
+            # allows an extra thinking token.
+            if state.get("in_end", False):
+                state["force_index"] = [0]
+            return
+
+        # Track previous output length for incremental processing
+        prev_length = state.get("prev_output_length", 0)
+        current_length = len(output)
+
+        if current_length <= prev_length:
+            if state.get("in_end", False):
+                remaining_budget = state["thinking_token_budget"] - state["think_count"]
+                spec_len = len(state["spec_token_ids"])
+                if spec_len > 0:
+                    if 0 < remaining_budget < spec_len:
+                        state["force_index"] = [remaining_budget]
+                    elif remaining_budget <= 0:
+                        state["force_index"] = [0]
+                    else:
+                        state["force_index"] = [spec_len]
+                else:
+                    state["force_index"] = [0]
+            return
+
+        state["prev_output_length"] = current_length
+
+        start_len = len(self.think_start_token_ids)
+        absolute_start_pos = state["start_thinking"]
+
+        if state["continue_thinking"] and state["end_thinking"] > -1:
+            absolute_end_pos = state["end_thinking"] + len(
+                state.get("prompt_tok_ids") or []
+            )
+        else:
+            absolute_end_pos = state["end_thinking"]
+        # Update state based on recent sequences
+        # This is the case where we are in end mode, but the rejection sampler
+        # rejected a token before the end token,
+        # so we need to go back to think mode and wait for the next end token
+        # eg with 999: [2,4,5,999] -> [3,-1,-1,-1]
+        if state["in_end"] and state["end_count"] == 0:
+            new_tokens = output[prev_length:]
+            stopping_thinking = (
+                self.think_end_token_ids[state["end_count"]] in new_tokens
+            )
+            if not stopping_thinking:
+                state["in_think"] = True
+                state["in_end"] = False
+                state["end_count"] = 0
+                state["bonus_token_forced"] = False
+
+        if not state["in_end"]:
+            if absolute_start_pos >= 0 and absolute_end_pos >= 0:
+                # Case: ...<end>...<start>... - entering think mode
+                if absolute_start_pos > absolute_end_pos:
+                    new_think_count = current_length - (absolute_start_pos + start_len)
+                    state["in_think"] = True
+                    state["think_count"] = new_think_count
+                else:
+                    # Case: ...<start>...<end>... - exiting think mode
+                    state["in_think"] = False
+                    state["think_count"] = 0
+
+            elif absolute_start_pos >= 0 and not state["continue_thinking"]:
+                # Found think start - entering think mode
+                new_think_count = current_length - (absolute_start_pos + start_len)
+                state["in_think"] = True
+                state["think_count"] = new_think_count
+
+            elif absolute_end_pos >= 0:
+                # Found think end - exiting think mode
+                state["in_think"] = False
+                state["think_count"] = 0
+
+            elif state["in_think"]:
+                # Continue thinking mode, increment count by new tokens
+                prompt_tok_ids = state.get("prompt_tok_ids") or []
+                think_tokens_in_prompt = len(prompt_tok_ids) - (
+                    absolute_start_pos + start_len
+                )
+                state["think_count"] = (
+                    len(state["output_tok_ids"]) + think_tokens_in_prompt
+                )
+            if state["in_think"]:
+                remaining_budget = max(
+                    0, state["thinking_token_budget"] - state["think_count"]
+                )
+                state["check_count_down"] = remaining_budget
+            else:
+                state["check_count_down"] = state["thinking_token_budget"]
+
+            total_thinking_tokens = (
+                state["think_count"] + len(state["spec_token_ids"]) + 1
+            )
+            # Check if need to transition to end mode
+            # If we have more thinking tokens than the budget,
+            # we need to transition to end mode
+            if (
+                state["in_think"]
+                and total_thinking_tokens > state["thinking_token_budget"]
+            ):
+                # Calculate force_index: position within spec_token_ids where
+                # forcing starts. If we're already over budget without spec
+                # tokens, force from position 0. Force from the position
+                # where budget is exceeded.
+                state["in_think"] = False
+                state["in_end"] = True
+                state["end_count"] = 0
+                state["check_count_down"] = state["thinking_token_budget"]
+                remaining_budget = state["thinking_token_budget"] - state["think_count"]
+                spec_len = len(state["spec_token_ids"])
+                if 0 < remaining_budget < spec_len:
+                    state["force_index"] = [remaining_budget]
+
+                elif remaining_budget <= 0:
+                    state["force_index"] = [0]
+
+                else:
+                    # remaining_budget >= spec_len: all spec tokens are within
+                    # budget; force the bonus token position
+                    state["force_index"] = [len(state["spec_token_ids"])]
+
+        else:
+            state["force_index"] = []
+            if len(state["spec_token_ids"]) > 0:
+                for i, token_id in enumerate(state["spec_token_ids"]):
+                    if state["end_count"] + 1 < len(self.think_end_token_ids):
+                        if token_id == self.think_end_token_ids[state["end_count"] + 1]:
+                            state["end_count"] += 1
+                        else:
+                            state["end_count"] += 1
+                            state["force_index"] = [i]
+                            break
+                    else:
+                        state["end_count"] += 1
+                if len(state["force_index"]) == 0:
+                    state["end_count"] += 1
+                    state["force_index"] = [len(state["spec_token_ids"])]
+            else:
+                state["end_count"] += 1
+                state["force_index"] = [0]
+            if state["end_count"] >= len(self.think_end_token_ids):
+                state.update(
+                    {
+                        "in_end": False,
+                        "end_count": 0,
+                        "check_count_down": state["thinking_token_budget"],
+                    }
+                )
+
+    def _apply_forcing_to_logits(
+        self,
+        logits: torch.Tensor,
+        predict_bonus_token: bool,
+        spec_token_ids_for_layout: list[list[int]],
+    ) -> torch.Tensor:
+        cumulative_total = 0
+        self.cu_num_tokens.clear()
+
+        n_layout = len(spec_token_ids_for_layout)
+        if self._state:
+            n_layout = max(n_layout, max(self._state.keys()) + 1)
+
+        for index in range(n_layout):
+            self.cu_num_tokens[index] = cumulative_total
+            spec_tokens = (
+                spec_token_ids_for_layout[index]
+                if index < len(spec_token_ids_for_layout)
+                else []
+            )
+            if self.in_spec_mode:
+                cumulative_total += len(spec_tokens) if not predict_bonus_token else 1
+            else:
+                cumulative_total += 1
+
+        # Build the active index / forced-token lists entirely on CPU so we
+        # avoid per-iteration scalar sync writes to GPU tensors.
+        active_indices_cpu: list[int] = []
+        force_tokens_cpu: list[int] = []
+
+        for seq_idx in sorted(self._state.keys()):
+            if seq_idx not in self.cu_num_tokens:
+                continue
+            state = self._state[seq_idx]
+            if state.get("in_end", False):
+                # logits processor in spec mode are called twice
+                # once for bonus token logits and
+                # second time for the target logits
+                # in case the force index is bonus token index
+                # we change the force index to 0
+                if predict_bonus_token:
+                    if state.get("force_index") and state["force_index"][0] < len(
+                        state["spec_token_ids"]
+                    ):
+                        continue
+                    else:
+                        state["force_index"] = [0]
+                # continue enforcing the end thinking tokens
+                if state["end_count"] > 0:
+                    state["bonus_token_forced"] = False
+                if state and not state["bonus_token_forced"]:
+                    force_index = state.get("force_index", [])
+                    if len(force_index) == 0:
+                        continue
+                    end_count = state.get("end_count", 0)
+                    for force_idx in force_index:
+                        if end_count < len(self.think_end_token_ids):
+                            mask_idx = self.cu_num_tokens[seq_idx] + force_idx
+                            if (
+                                mask_idx < self._mask_capacity
+                                and mask_idx < logits.shape[0]
+                            ):
+                                active_indices_cpu.append(mask_idx)
+                                force_tokens_cpu.append(
+                                    self.think_end_token_ids[end_count]
+                                )
+                            if predict_bonus_token:
+                                if state["end_count"] > 0:
+                                    state["bonus_token_forced"] = False
+                                    state["force_index"] = []
+                                else:
+                                    state["bonus_token_forced"] = True
+
+        if active_indices_cpu:
+            device = logits.device
+            active_indices = async_tensor_h2d(
+                active_indices_cpu, dtype=torch.long, device=device
+            )
+            force_tokens = async_tensor_h2d(
+                force_tokens_cpu, dtype=torch.long, device=device
+            )
+            # Avoid CPU->GPU sync.
+            fill = logits.new_full((len(active_indices_cpu),), 1e9)
+            logits.index_put_((active_indices, force_tokens), fill)
+
+        return logits
diff --git a/vllm/v1/simple_kv_offload/__init__.py b/vllm/v1/simple_kv_offload/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/v1/simple_kv_offload/copy_backend.py b/vllm/v1/simple_kv_offload/copy_backend.py
new file mode 100644
index 000000000000..114f26973767
--- /dev/null
+++ b/vllm/v1/simple_kv_offload/copy_backend.py
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""DMA copy backend for GPU<->CPU block transfers."""
+
+from __future__ import annotations
+
+import queue
+import threading
+
+import torch
+
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+from vllm.v1.simple_kv_offload.cuda_mem_ops import (
+    BatchMemcpyParams,
+    build_params,
+    copy_blocks,
+)
+
+logger = init_logger(__name__)
+
+
+class DmaCopyBackend:
+    """cuMemcpyBatchAsync copy backend (background thread)."""
+
+    def __init__(self) -> None:
+        self._store_params: BatchMemcpyParams | None = None
+        self._load_params: BatchMemcpyParams | None = None
+        self._load_stream: torch.cuda.Stream | None = None
+        self._store_stream: torch.cuda.Stream | None = None
+        self._queue: queue.SimpleQueue | None = None
+        self._thread: threading.Thread | None = None
+        self._shutdown: bool = False
+
+    def init(
+        self,
+        gpu_caches: dict[str, torch.Tensor],
+        cpu_caches: dict[str, torch.Tensor],
+        device: torch.device,
+        load_stream: torch.cuda.Stream,
+        store_stream: torch.cuda.Stream,
+    ) -> None:
+        self._load_stream = load_stream
+        self._store_stream = store_stream
+
+        self._store_params = build_params(gpu_caches, cpu_caches, store_stream)
+        self._load_params = build_params(cpu_caches, gpu_caches, load_stream)
+
+        self._queue = queue.SimpleQueue()
+        self._thread = threading.Thread(
+            target=self._copy_loop,
+            args=(self._queue, device, load_stream, store_stream),
+            daemon=True,
+        )
+        self._thread.start()
+
+    def launch_copy(
+        self,
+        src_blocks: list[int],
+        dst_blocks: list[int],
+        is_store: bool,
+        event_idx: int,
+        events_list: list[tuple[int, torch.Event]],
+    ) -> None:
+        params = self._store_params if is_store else self._load_params
+        assert params is not None and self._queue is not None
+        self._queue.put(
+            (src_blocks, dst_blocks, params, is_store, event_idx, events_list)
+        )
+
+    def shutdown(self) -> None:
+        if self._shutdown:
+            return
+        self._shutdown = True
+        if self._queue is not None:
+            self._queue.put(None)
+        if self._thread is not None:
+            self._thread.join(timeout=5.0)
+
+    @staticmethod
+    def _copy_loop(
+        q: queue.SimpleQueue,
+        device: torch.device,
+        load_stream: torch.cuda.Stream,
+        store_stream: torch.cuda.Stream,
+    ) -> None:
+        current_platform.set_device(device)
+        while True:
+            item = q.get()
+            if item is None:
+                return
+            src_blocks, dst_blocks, params, is_store, event_idx, events_list = item
+            copy_blocks(src_blocks, dst_blocks, params)
+            stream = store_stream if is_store else load_stream
+            event = torch.Event()
+            event.record(stream)
+            events_list.append((event_idx, event))
diff --git a/vllm/v1/simple_kv_offload/cuda_mem_ops.py b/vllm/v1/simple_kv_offload/cuda_mem_ops.py
new file mode 100644
index 000000000000..b4c68aff3ca9
--- /dev/null
+++ b/vllm/v1/simple_kv_offload/cuda_mem_ops.py
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Low-level CUDA/HIP memory helpers: pinning and batch DMA transfers."""
+
+import ctypes
+from typing import Any, NamedTuple
+
+import numpy as np
+import torch
+
+from vllm.logger import init_logger
+from vllm.platforms import current_platform
+
+logger = init_logger(__name__)
+
+
+def pin_tensor(tensor: torch.Tensor) -> None:
+    """Pin a CPU tensor via cudaHostRegister.
+
+    This bypasses PyTorch's CUDACachingHostAllocator which rounds
+    every ``pin_memory=True`` allocation up to the next power of 2
+    (e.g. 100 GB becomes 128 GB).
+    """
+    err = torch.cuda.cudart().cudaHostRegister(tensor.data_ptr(), tensor.nbytes, 0)
+    if err.value != 0:
+        raise RuntimeError(f"cudaHostRegister failed: {err}")
+
+
+class _CUmemLocation(ctypes.Structure):
+    _fields_ = [("type", ctypes.c_uint), ("id", ctypes.c_int)]
+
+
+class _CUmemcpyAttributes(ctypes.Structure):
+    _fields_ = [
+        ("srcAccessOrder", ctypes.c_uint),
+        ("srcLocHint", _CUmemLocation),
+        ("dstLocHint", _CUmemLocation),
+        ("flags", ctypes.c_uint),
+    ]
+
+
+_BATCH_MEMCPY_FUNC_TYPE = ctypes.CFUNCTYPE(
+    ctypes.c_uint,  # CUresult / hipError_t
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_size_t,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+    ctypes.c_size_t,
+    ctypes.c_void_p,
+    ctypes.c_void_p,
+)
+
+# Resolved lazily on first use.
+_batch_memcpy_fn: Any = None
+
+
+def _resolve_batch_memcpy():
+    """Resolve the platform batch-memcpy entry point (one-time).
+
+    * CUDA: ``cuMemcpyBatchAsync`` via ``cuGetProcAddress`` (uses
+      srcAccessOrder=STREAM via one attributes entry).
+    * ROCm: ``hipMemcpyBatchAsync`` from libamdhip64 (ROCm 7.1+). ROCm
+      7.2.1 or 7.2.2 rejects any call with ``numAttrs > 0``
+      (see ROCm/clr @ rocm-7.2.1 hipamd/src/hip_memory.cpp:2819-2822), so
+      we call with ``numAttrs=0``.
+
+    Raises ``RuntimeError`` if the symbol is unavailable (older CUDA
+    driver, ROCm < 7.1, unusual install). The connector requires the
+    batch API.
+    """
+    if current_platform.is_rocm():
+        try:
+            lib = ctypes.CDLL("libamdhip64.so", mode=ctypes.RTLD_GLOBAL)
+            fn = lib.hipMemcpyBatchAsync
+        except (OSError, AttributeError) as e:
+            raise RuntimeError(
+                "hipMemcpyBatchAsync is unavailable in this ROCm install; "
+                "SimpleCPUOffloadConnector requires ROCm 7.1+."
+            ) from e
+        fn.restype = ctypes.c_uint
+        fn.argtypes = [
+            ctypes.c_void_p,  # dsts
+            ctypes.c_void_p,  # srcs
+            ctypes.c_void_p,  # sizes
+            ctypes.c_size_t,  # count
+            ctypes.c_void_p,  # attrs
+            ctypes.c_void_p,  # attrIdxs
+            ctypes.c_size_t,  # numAttrs
+            ctypes.c_void_p,  # failIdx
+            ctypes.c_void_p,  # stream
+        ]
+        return fn
+
+    from cuda.bindings import driver as drv
+
+    err, ptr, _ = drv.cuGetProcAddress(b"cuMemcpyBatchAsync", 12080, 0)
+    if err != drv.CUresult.CUDA_SUCCESS:
+        raise RuntimeError(f"cuGetProcAddress(cuMemcpyBatchAsync) failed: {err}")
+    return _BATCH_MEMCPY_FUNC_TYPE(ptr)
+
+
+class BatchMemcpyParams(NamedTuple):
+    src_bases: np.ndarray  # [num_layers] uint64 — data_ptr per layer
+    dst_bases: np.ndarray  # [num_layers] uint64
+    bpb: np.ndarray  # [num_layers] uint64 — bytes per block
+    num_layers: int
+    # CUDA only: one attributes entry with srcAccessOrder=ANY. Unused on
+    # ROCm (7.2.1 or 7.2.2) because the current runtime rejects numAttrs > 0.
+    attrs: _CUmemcpyAttributes
+    attrs_idx: ctypes.c_size_t
+    # NOTE: cuMemcpyBatchAsync_v2() removed fail_idx field, but we use
+    # cuMemcpyBatchAsync() with fail_idx for backward compatibility
+    fail_idx: ctypes.c_size_t
+    stream_handle: int  # raw cudaStream_t / CUstream
+
+
+def build_params(
+    src_caches: dict[str, torch.Tensor],
+    dst_caches: dict[str, torch.Tensor],
+    stream: torch.cuda.Stream,
+) -> BatchMemcpyParams:
+    global _batch_memcpy_fn
+    if _batch_memcpy_fn is None:
+        _batch_memcpy_fn = _resolve_batch_memcpy()
+
+    assert list(src_caches.keys()) == list(dst_caches.keys())
+    src_tensors = list(src_caches.values())
+    dst_tensors = list(dst_caches.values())
+
+    src_bases, dst_bases, bpb = [], [], []
+    for s, d in zip(src_tensors, dst_tensors):
+        s_bpb = s.stride(0) * s.element_size()
+        assert s_bpb == d.stride(0) * d.element_size()
+        src_bases.append(s.data_ptr())
+        dst_bases.append(d.data_ptr())
+        bpb.append(s_bpb)
+
+    # ``srcAccessOrder=3`` == CU_MEMCPY_SRC_ACCESS_ORDER_ANY /
+    # hipMemcpySrcAccessOrderAny. See
+    # https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g6f1ff58e3065df3eb4b573dba77ad31f  # noqa: E501
+    attrs = _CUmemcpyAttributes(srcAccessOrder=3)
+
+    return BatchMemcpyParams(
+        src_bases=np.array(src_bases, dtype=np.uint64),
+        dst_bases=np.array(dst_bases, dtype=np.uint64),
+        bpb=np.array(bpb, dtype=np.uint64),
+        num_layers=len(src_tensors),
+        attrs=attrs,
+        attrs_idx=ctypes.c_size_t(0),
+        fail_idx=ctypes.c_size_t(0),
+        stream_handle=stream.cuda_stream,
+    )
+
+
+def copy_blocks(
+    src_block_ids: list[int],
+    dst_block_ids: list[int],
+    params: BatchMemcpyParams,
+) -> None:
+    """Copy blocks via cuMemcpyBatchAsync / hipMemcpyBatchAsync."""
+    n = len(src_block_ids)
+    if n == 0:
+        return
+
+    src_ids = np.array(src_block_ids, dtype=np.uint64)
+    dst_ids = np.array(dst_block_ids, dtype=np.uint64)
+
+    src_all = (
+        params.src_bases[:, None] + src_ids[None, :] * params.bpb[:, None]
+    ).ravel()
+    dst_all = (
+        params.dst_bases[:, None] + dst_ids[None, :] * params.bpb[:, None]
+    ).ravel()
+    sz_all = np.repeat(params.bpb, n)
+    total = n * params.num_layers
+
+    # ROCm 7.2.1/7.2.2 rejects any call with numAttrs>0 (hipMemcpyBatchAsync
+    # hipamd/src/hip_memory.cpp:2819-2822); CUDA uses one attrs entry so
+    # srcAccessOrder is honored. attrs / attrsIdxs are ignored when
+    # numAttrs==0, so we pass the same values from both paths.
+    num_attrs = 0 if current_platform.is_rocm() else 1
+    err = _batch_memcpy_fn(
+        dst_all.ctypes.data,
+        src_all.ctypes.data,
+        sz_all.ctypes.data,
+        total,
+        ctypes.addressof(params.attrs),
+        ctypes.byref(params.attrs_idx),
+        num_attrs,
+        ctypes.byref(params.fail_idx),
+        params.stream_handle,
+    )
+    if err != 0:
+        raise RuntimeError(
+            f"batch memcpy failed: err={err} failIdx={params.fail_idx.value}"
+        )
diff --git a/vllm/v1/simple_kv_offload/manager.py b/vllm/v1/simple_kv_offload/manager.py
new file mode 100644
index 000000000000..24b6a178ce9d
--- /dev/null
+++ b/vllm/v1/simple_kv_offload/manager.py
@@ -0,0 +1,831 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Scheduler-side manager for SimpleCPUOffloadConnector."""
+
+import contextlib
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from vllm.config import VllmConfig
+from vllm.distributed.kv_events import KVCacheEvent
+from vllm.distributed.kv_transfer.kv_connector.utils import yield_req_data
+from vllm.logger import init_logger
+from vllm.utils.math_utils import cdiv
+from vllm.v1.core.block_pool import BlockPool
+from vllm.v1.core.kv_cache_coordinator import (
+    KVCacheCoordinator,
+    get_kv_cache_coordinator,
+)
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import (
+    FullAttentionSpec,
+    MambaSpec,
+    SlidingWindowSpec,
+)
+from vllm.v1.outputs import KVConnectorOutput
+from vllm.v1.simple_kv_offload.metadata import (
+    SimpleCPUOffloadMetadata,
+    SimpleCPUOffloadWorkerMetadata,
+)
+
+if TYPE_CHECKING:
+    from vllm.v1.core.kv_cache_manager import KVCacheBlocks
+    from vllm.v1.core.kv_cache_utils import KVCacheBlock
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+    from vllm.v1.request import Request
+
+logger = init_logger(__name__)
+
+
+@dataclass
+class TransferMeta:
+    gpu_block_ids: list[int]
+    cpu_block_ids: list[int]
+
+
+@dataclass
+class LoadRequestState:
+    request: "Request"
+    transfer_meta: TransferMeta
+    load_event: int | None = None
+    finished: bool = False
+
+
+# NOTE: This per-request state is only used in eager mode.
+@dataclass
+class StoreRequestState:
+    request: "Request"
+    # Accumulated block IDs from scheduler_output via yield_req_data.
+    block_ids: tuple[list[int], ...]
+    # Per-group cursors tracking how many blocks have been stored/skipped.
+    num_stored_blocks: list[int]
+    store_events: set[int] = field(default_factory=set)
+    finished: bool = False
+
+
+class SimpleCPUOffloadScheduler:
+    """Scheduler-side manager for CPU offloading."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        kv_cache_config: "KVCacheConfig | None",
+        cpu_capacity_bytes: int,
+        scheduler_block_size: int,
+        hash_block_size: int,
+        lazy_offload: bool = False,
+    ):
+        self.vllm_config = vllm_config
+        self.kv_cache_config = kv_cache_config
+        self.enable_kv_cache_events = (
+            vllm_config.kv_events_config is not None
+            and vllm_config.kv_events_config.enable_kv_cache_events
+        )
+        self.block_size = scheduler_block_size
+        self.hash_block_size = hash_block_size
+        assert self.block_size % self.hash_block_size == 0
+        # Derive a CPU KVCacheConfig from the GPU config and build a coordinator
+        assert kv_cache_config is not None
+        self.cpu_kv_cache_config = self._derive_cpu_config(
+            kv_cache_config, cpu_capacity_bytes
+        )
+        self.num_cpu_blocks = self.cpu_kv_cache_config.num_blocks
+        # Find the full attention kv group for prefix cache matching.
+        self.fa_gidx = -1
+        for g_idx, g in enumerate(self.cpu_kv_cache_config.kv_cache_groups):
+            if isinstance(g.kv_cache_spec, FullAttentionSpec):
+                self.fa_gidx = g_idx
+                break
+        assert 0 <= self.fa_gidx < len(self.cpu_kv_cache_config.kv_cache_groups)
+        # FA group's own block_size; divides scheduler_block_size (the LCM)
+        # but is NOT assumed to equal it.
+        self.fa_block_size: int = self.cpu_kv_cache_config.kv_cache_groups[
+            self.fa_gidx
+        ].kv_cache_spec.block_size
+        assert self.block_size % self.fa_block_size == 0
+
+        logger.info(
+            "SimpleCPUOffloadScheduler: Allocating %d CPU blocks (%.2f GB, mode=%s)",
+            self.num_cpu_blocks,
+            cpu_capacity_bytes / (1024**3),
+            "lazy" if lazy_offload else "eager",
+        )
+
+        # TODO (yifan): maybe need to enable kv_cache_events and metrics_collector here.
+        dcp_world_size = vllm_config.parallel_config.decode_context_parallel_size
+        pcp_world_size = vllm_config.parallel_config.prefill_context_parallel_size
+        assert dcp_world_size == 1 and pcp_world_size == 1
+        self.cpu_coordinator: KVCacheCoordinator = get_kv_cache_coordinator(
+            kv_cache_config=self.cpu_kv_cache_config,
+            max_model_len=vllm_config.model_config.max_model_len,
+            max_num_batched_tokens=(
+                vllm_config.scheduler_config.max_num_batched_tokens
+            ),
+            use_eagle=False,
+            enable_caching=True,
+            enable_kv_cache_events=self.enable_kv_cache_events,
+            dcp_world_size=dcp_world_size,
+            pcp_world_size=pcp_world_size,
+            hash_block_size=self.hash_block_size,
+        )
+        self.cpu_block_pool: BlockPool = self.cpu_coordinator.block_pool
+
+        # GPU block pool reference - bound after scheduler builds kv_cache_manager
+        self._gpu_block_pool: BlockPool | None = None
+
+        # Load metadata
+        self._reqs_to_load: dict[str, LoadRequestState] = {}
+        # Inverse map: load_event_idx -> req_ids. Keyed by load_event_idx because
+        # the worker reports completions by event index, not request id.
+        self._load_event_to_reqs: dict[int, list[str]] = {}
+
+        # Pending (cpu_hit_blocks, hit_length) tuples from find_longest_cache_hit,
+        # kept pinned via touch() while awaiting update_state_after_alloc().
+        self._pending_cpu_hits: dict[
+            str, tuple[tuple[list[KVCacheBlock], ...], int]
+        ] = {}
+
+        # Store metadata
+        self._lazy_mode = lazy_offload
+        # Lazy mode: use a cursor to track the last scanned block in the GPU free queue.
+        self._cursor: KVCacheBlock | None = None
+        if self._lazy_mode:
+            self._target_free = self._estimate_lazy_target_blocks(
+                kv_cache_config,
+                vllm_config.scheduler_config.max_num_batched_tokens,
+            )
+        else:
+            self._target_free = 0
+        self._store_event_to_blocks: dict[int, TransferMeta] = {}
+        # Eager mode only
+        self._reqs_to_store: dict[str, StoreRequestState] = {}
+        self._store_event_to_reqs: dict[int, list[str]] = {}
+        self._in_flight_store_gpu_blocks: set[int] = set()
+
+        # Event counters
+        self._load_event_counter: int = 0
+        self._store_event_counter: int = 0
+
+        # For TP/PP: track partial store completions across steps.
+        # Events must be reported by all world_size workers before considered complete.
+        self._expected_worker_count = vllm_config.parallel_config.world_size
+        self._store_event_pending_counts: dict[int, int] = {}
+
+    @staticmethod
+    def _derive_cpu_config(
+        gpu_config: "KVCacheConfig", cpu_capacity_bytes: int
+    ) -> "KVCacheConfig":
+        """Derive a CPU KVCacheConfig from the GPU config.
+        Same kv_cache_groups, num_blocks scaled by CPU/GPU memory ratio."""
+        # Import here to avoid potential circular imports
+        from vllm.v1.kv_cache_interface import KVCacheConfig as KVCacheConfigCls
+        from vllm.v1.kv_cache_interface import KVCacheTensor
+
+        assert len(gpu_config.kv_cache_tensors) > 0
+
+        gpu_total_bytes = sum(t.size for t in gpu_config.kv_cache_tensors)
+        num_gpu_blocks = gpu_config.num_blocks
+        num_cpu_blocks = max(1, num_gpu_blocks * cpu_capacity_bytes // gpu_total_bytes)
+        # Create CPU kv_cache_tensors mirroring GPU by scaling size proportionally.
+        cpu_tensors = [
+            KVCacheTensor(
+                size=t.size // num_gpu_blocks * num_cpu_blocks,
+                shared_by=list(t.shared_by),
+            )
+            for t in gpu_config.kv_cache_tensors
+        ]
+
+        return KVCacheConfigCls(
+            num_blocks=num_cpu_blocks,
+            kv_cache_tensors=cpu_tensors,
+            kv_cache_groups=gpu_config.kv_cache_groups,
+        )
+
+    @staticmethod
+    def _estimate_lazy_target_blocks(
+        kv_cache_config: "KVCacheConfig", max_num_batched_tokens: int
+    ) -> int:
+        """GPU blocks to keep available (free/offloaded) per step in lazy mode."""
+        WATERMARK_RATIO = 1.0  # Reserve larger space to avoid running out of GPU blocks
+        target = 0
+        for g in kv_cache_config.kv_cache_groups:
+            spec = g.kv_cache_spec
+            if isinstance(spec, MambaSpec):
+                target += 2
+            elif isinstance(spec, SlidingWindowSpec):
+                target += cdiv(spec.sliding_window, spec.block_size) + 1
+            else:
+                target += cdiv(max_num_batched_tokens, spec.block_size)
+        return int(target * (1 + WATERMARK_RATIO))
+
+    def bind_gpu_block_pool(self, gpu_block_pool: BlockPool) -> None:
+        """Bind GPU block pool so that we can touch blocks during stores.
+        Called by Scheduler after kv_cache_manager is ready."""
+        self._gpu_block_pool = gpu_block_pool
+
+    def get_num_new_matched_tokens(
+        self, request: "Request", num_computed_tokens: int
+    ) -> tuple[int | None, bool]:
+        """Return (num_new_tokens, is_async) from consecutive CPU cache hits."""
+
+        # Pins found CPU blocks so they survive LRU eviction until
+        # update_state_after_alloc() consumes them. Any pin from an earlier
+        # call on the same request (e.g. retry after a failed allocate_slots)
+        # is dropped first.
+        if stale := self._pending_cpu_hits.pop(request.request_id, None):
+            self._free_pending_cpu_hit(stale)
+
+        num_skipped_hashes = num_computed_tokens // self.hash_block_size
+        remaining_hashes = request.block_hashes[num_skipped_hashes:]
+
+        if not remaining_hashes:
+            return 0, False
+        # Must recompute at least the last token, matching the logic in
+        # kv_cache_manager.get_computed_blocks().
+        max_hit_len = request.num_tokens - 1 - num_computed_tokens
+        if max_hit_len <= 0:
+            return 0, False
+        cpu_hit_blocks, hit_length = self.cpu_coordinator.find_longest_cache_hit(
+            remaining_hashes, max_hit_len
+        )
+
+        if hit_length > 0:
+            pin_blocks = [
+                blk for grp in cpu_hit_blocks for blk in grp if not blk.is_null
+            ]
+            self.cpu_block_pool.touch(pin_blocks)
+            self._pending_cpu_hits[request.request_id] = (
+                cpu_hit_blocks,
+                hit_length,
+            )
+            return hit_length, True
+        return 0, False
+
+    # TODO(yifan): this API now only matches the suffix part of the prefix cache. A more
+    # general API should scan blocks in both GPU and CPU block pool in a single pass.
+    def update_state_after_alloc(
+        self,
+        request: "Request",
+        blocks: "KVCacheBlocks",
+        num_external_tokens: int,
+    ) -> None:
+        req_id = request.request_id
+        block_ids_by_group = blocks.get_block_ids()
+        num_groups = len(block_ids_by_group)
+
+        # Store tracking (eager mode only). Register the request;
+        # block IDs are accumulated from scheduler_output in
+        # _prepare_eager_store_specs via yield_req_data.
+        if not self._lazy_mode and req_id not in self._reqs_to_store:
+            self._reqs_to_store[req_id] = StoreRequestState(
+                request=request,
+                block_ids=tuple([] for _ in range(num_groups)),
+                num_stored_blocks=[0] * num_groups,
+            )
+
+        # Pop the CPU hit cached by get_num_new_matched_tokens(). The
+        # found blocks were pinned there to survive LRU eviction in the window
+        # between get_num_new_matched_tokens() and this matching call.
+        pending = self._pending_cpu_hits.pop(req_id, None)
+
+        if num_external_tokens == 0:
+            if pending is not None:
+                logger.warning(
+                    "SimpleCPUOffloadScheduler: update_state_after_alloc "
+                    "called for req_id=%s with no external tokens but "
+                    "get_num_new_matched_tokens() unexpectedly recorded "
+                    "a pending CPU hit; releasing the stale pin.",
+                    req_id,
+                )
+                self._free_pending_cpu_hit(pending)
+            return
+
+        if pending is None:
+            logger.warning(
+                "SimpleCPUOffloadScheduler: update_state_after_alloc called "
+                "for req_id=%s with num_external_tokens=%d but no pending "
+                "CPU hit from get_num_new_matched_tokens(); skipping load.",
+                req_id,
+                num_external_tokens,
+            )
+            return
+
+        cpu_hit_blocks_full, _ = pending
+
+        # ``num_external_tokens`` is LCM-aligned (checked per-group below),
+        # so this counts whole scheduler-aligned chunks of incoming tokens.
+        num_blocks_to_load = num_external_tokens // self.block_size
+        assert num_blocks_to_load > 0
+        num_cached_fa_blocks = sum(
+            blk.block_hash is not None for blk in blocks.blocks[self.fa_gidx]
+        )
+        num_computed_tokens = num_cached_fa_blocks * self.fa_block_size
+
+        # Build transfer pairs across all groups.
+        total_computed_tokens = num_computed_tokens + num_external_tokens
+        kv_cache_groups = self.cpu_kv_cache_config.kv_cache_groups
+
+        # The scheduler may have accepted fewer blocks than
+        # get_num_new_matched_tokens() reported.
+        # (e.g. due to token budget in test_partial_gpu_prefix_plus_cpu_load).
+        # Take only the leading N blocks per group matching num_external_tokens;
+        # the rest will be released along with the temp pin below.
+        cpu_hit_blocks: list[list[KVCacheBlock]] = []
+        for g in range(num_groups):
+            g_block_size = kv_cache_groups[g].kv_cache_spec.block_size
+            assert num_external_tokens % g_block_size == 0, (
+                f"num_external_tokens={num_external_tokens} not aligned to "
+                f"group {g} block_size={g_block_size}"
+            )
+            n_take_g = num_external_tokens // g_block_size
+            cpu_hit_blocks.append(cpu_hit_blocks_full[g][:n_take_g])
+
+        gpu_block_ids: list[int] = []
+        cpu_block_ids: list[int] = []
+        cpu_blocks_to_touch: list[KVCacheBlock] = []
+
+        for g in range(num_groups):
+            cpu_blocks_g = cpu_hit_blocks[g]
+            n_ext_g = len(cpu_blocks_g)
+            if n_ext_g == 0:
+                continue
+
+            # Number of blocks in the computed range for this group.
+            g_block_size = kv_cache_groups[g].kv_cache_spec.block_size
+            n_computed_g = cdiv(total_computed_tokens, g_block_size)
+
+            # Back-trace: ext blocks sit at the tail of the computed range.
+            gpu_ext_start = n_computed_g - n_ext_g
+            group_gpu_ids = block_ids_by_group[g]
+
+            for i, cpu_blk in enumerate(cpu_blocks_g):
+                # Skip null blocks (e.g. sliding window or mamba padding).
+                if cpu_blk.is_null:
+                    continue
+                gpu_block_ids.append(group_gpu_ids[gpu_ext_start + i])
+                cpu_block_ids.append(cpu_blk.block_id)
+                cpu_blocks_to_touch.append(cpu_blk)
+
+        # Touch CPU blocks to prevent eviction during async load.
+        self.cpu_block_pool.touch(cpu_blocks_to_touch)
+        # Release the temporary pin held since get_num_new_matched_tokens().
+        self._free_pending_cpu_hit(pending)
+
+        # Touch GPU blocks to prevent freeing during async load
+        assert self._gpu_block_pool is not None
+        self._gpu_block_pool.touch(
+            [self._gpu_block_pool.blocks[bid] for bid in gpu_block_ids]
+        )
+
+        assert self._reqs_to_load.get(req_id) is None
+        self._reqs_to_load[req_id] = LoadRequestState(
+            request=request, transfer_meta=TransferMeta(gpu_block_ids, cpu_block_ids)
+        )
+
+    def build_connector_meta(
+        self,
+        scheduler_output: SchedulerOutput,
+    ) -> SimpleCPUOffloadMetadata:
+        # --- Stores ---
+        store_event = -1
+        store_gpu, store_cpu, store_req_ids = self.prepare_store_specs(scheduler_output)
+        if store_gpu:
+            store_event = self._store_event_counter
+            self._store_event_counter += 1
+            self._store_event_to_blocks[store_event] = TransferMeta(
+                store_gpu, store_cpu
+            )
+            if store_req_ids:  # For eager mode only, track req->blocks mapping
+                self._store_event_to_reqs[store_event] = store_req_ids
+                for req_id in store_req_ids:
+                    store_state = self._reqs_to_store.get(req_id)
+                    if store_state is not None:
+                        store_state.store_events.add(store_event)
+
+        # --- Loads ---
+        load_event = -1
+        load_gpu: list[int] = []
+        load_cpu: list[int] = []
+        load_req_ids: list[str] = []
+        for req_id, load_state in self._reqs_to_load.items():
+            if load_state.load_event is not None:
+                continue
+            assert load_state.transfer_meta is not None
+            load_gpu.extend(load_state.transfer_meta.gpu_block_ids)
+            load_cpu.extend(load_state.transfer_meta.cpu_block_ids)
+            load_req_ids.append(req_id)
+        if load_req_ids:
+            load_event = self._load_event_counter
+            self._load_event_counter += 1
+            for req_id in load_req_ids:
+                self._reqs_to_load[req_id].load_event = load_event
+            self._load_event_to_reqs[load_event] = load_req_ids
+
+        result = SimpleCPUOffloadMetadata(
+            load_event=load_event,
+            load_gpu_blocks=load_gpu,
+            load_cpu_blocks=load_cpu,
+            load_event_to_reqs=self._load_event_to_reqs,
+            store_event=store_event,
+            store_gpu_blocks=store_gpu,
+            store_cpu_blocks=store_cpu,
+            need_flush=bool(scheduler_output.preempted_req_ids),
+        )
+        return result
+
+    def prepare_store_specs(
+        self, scheduler_output: SchedulerOutput
+    ) -> tuple[list[int], list[int], list[str]]:
+        """Prepare store specs for the store event."""
+        if self._lazy_mode:
+            return self._prepare_lazy_store_specs()
+        else:
+            return self._prepare_eager_store_specs(scheduler_output)
+
+    def _prepare_lazy_store_specs(
+        self,
+    ) -> tuple[list[int], list[int], list[str]]:
+        """Single-pass cursor walk: offload cached GPU blocks near eviction.
+
+        Walks the GPU free queue from the cursor, counting blocks that are
+        free-or-offloaded (safe for the allocator to evict). Stops when
+        target_free blocks are covered or CPU capacity is reached.
+        """
+        gpu_pool = self._gpu_block_pool
+        if gpu_pool is None or self._target_free <= 0:
+            return [], [], []
+
+        free_queue = gpu_pool.free_block_queue
+        cpu_pool = self.cpu_block_pool
+        num_cpu_free = cpu_pool.get_num_free_blocks()
+
+        # Validate cursor: stale if block was removed from free queue.
+        if self._cursor is not None and self._cursor.ref_cnt > 0:
+            self._cursor = None
+
+        # Determine start node.
+        if self._cursor is None:
+            node = free_queue.fake_free_list_head.next_free_block
+        else:
+            node = self._cursor.next_free_block
+
+        tail = free_queue.fake_free_list_tail
+        gpu_ids: list[int] = []
+        block_hashes: list[bytes] = []
+        covered = 0
+        last_visited = self._cursor
+
+        while (
+            node is not None
+            and node is not tail
+            and covered < self._target_free
+            and len(gpu_ids) < num_cpu_free
+        ):
+            last_visited = node
+            bhash = node.block_hash
+
+            if (
+                bhash is not None
+                and not node.is_null
+                and cpu_pool.cached_block_hash_to_block.get_one_block(bhash) is None
+            ):
+                gpu_ids.append(node.block_id)
+                block_hashes.append(bhash)
+
+            covered += 1
+            node = node.next_free_block
+
+        self._cursor = last_visited
+
+        # Batch-allocate CPU blocks and stamp hashes.
+        if gpu_ids:
+            cpu_blocks = cpu_pool.get_new_blocks(len(gpu_ids))
+            cpu_ids = [blk.block_id for blk in cpu_blocks]
+            for cpu_blk, bhash in zip(cpu_blocks, block_hashes):  # type: ignore[assignment]
+                cpu_blk._block_hash = bhash  # type: ignore[assignment]
+            # Touch GPU blocks to prevent eviction during async copy.
+            gpu_pool.touch([gpu_pool.blocks[bid] for bid in gpu_ids])
+        else:
+            cpu_ids = []
+
+        return gpu_ids, cpu_ids, []
+
+    def _prepare_eager_store_specs(
+        self, scheduler_output: SchedulerOutput
+    ) -> tuple[list[int], list[int], list[str]]:
+        """Identify newly computed blocks to offload from scheduler requests.
+
+        Only considers blocks whose KV data has been **confirmed computed** by
+        the GPU. This means blocks from the current step are NOT stored until the
+        next step. If a request finishes in the same step as its last full block,
+        that block may be missed. (TODO: flush on finish.)
+
+        Returns:
+            (gpu_block_ids, cpu_block_ids, req_ids) for the store event.
+        """
+
+        merged_gpu_block_ids: list[int] = []
+        merged_cpu_block_ids: list[int] = []
+        req_ids: list[str] = []
+
+        gpu_block_pool = self._gpu_block_pool
+        if gpu_block_pool is None:
+            return [], [], []
+        cpu_block_pool = self.cpu_block_pool
+        num_free = cpu_block_pool.get_num_free_blocks()
+        kv_cache_groups = self.cpu_kv_cache_config.kv_cache_groups
+        num_groups = len(kv_cache_groups)
+        # Dedup against blocks already scheduled.
+        in_flight = self._in_flight_store_gpu_blocks
+
+        for req_id, new_block_id_groups, preempted in yield_req_data(scheduler_output):
+            state = self._reqs_to_store.get(req_id)
+            if state is None or state.finished:
+                continue
+
+            # Accumulate new block IDs.
+            if preempted:
+                state.block_ids = tuple([] for _ in range(num_groups))
+                state.num_stored_blocks = [0] * num_groups
+            if new_block_id_groups:
+                for g in range(min(num_groups, len(new_block_id_groups))):
+                    if new_block_id_groups[g] is not None:
+                        state.block_ids[g].extend(new_block_id_groups[g])
+
+            num_new_tokens = scheduler_output.num_scheduled_tokens.get(req_id, 0)
+            if num_new_tokens == 0:
+                continue
+
+            block_ids_by_group = state.block_ids
+            if not block_ids_by_group:
+                continue
+
+            # --- Phase 1: Scan blocks, classify as cached vs to-store ---
+            gpu_block_ids: list[int] = []
+            block_hashes_to_store: list[bytes] = []
+            advanced_per_group: list[int] = [0] * num_groups
+            out_of_space = False
+            # Confirmed tokens: KV data written and visible to all streams.
+            req = state.request
+            confirmed_tokens = req.num_computed_tokens - req.num_output_placeholders
+            # Cap to blocks with confirmed KV data.
+            aligned_tokens = confirmed_tokens // self.block_size * self.block_size
+
+            for g in range(num_groups):
+                # FIXME (yifan): handle CPU cache eviction, where
+                # num_stored_blocks can be stale and omit evicted blocks in
+                # the middle of the request.
+                already_stored_g = state.num_stored_blocks[g]
+                group_gpu_ids = block_ids_by_group[g]
+
+                g_block_size = kv_cache_groups[g].kv_cache_spec.block_size
+                ready_blocks_g = aligned_tokens // g_block_size
+                scannable = group_gpu_ids[already_stored_g:ready_blocks_g]
+
+                for gpu_block_id in scannable:
+                    gpu_block = gpu_block_pool.blocks[gpu_block_id]
+                    if gpu_block.is_null:
+                        advanced_per_group[g] += 1
+                        continue
+
+                    bhash_with_group = gpu_block.block_hash
+                    if bhash_with_group is None:
+                        # Masked-out SWA position the coordinator chose not to
+                        # hash; it can never serve a prefix-cache hit, so skip.
+                        advanced_per_group[g] += 1
+                        continue
+
+                    # Skip if already scheduled for store or already cached in CPU.
+                    if (
+                        gpu_block_id in in_flight
+                        or cpu_block_pool.cached_block_hash_to_block.get_one_block(
+                            bhash_with_group
+                        )
+                        is not None
+                    ):
+                        advanced_per_group[g] += 1
+                        continue
+
+                    if num_free <= 0:
+                        out_of_space = True
+                        break
+                    num_free -= 1
+
+                    gpu_block_ids.append(gpu_block_id)
+                    block_hashes_to_store.append(bhash_with_group)
+                    advanced_per_group[g] += 1
+
+                if out_of_space:
+                    break
+
+            # --- Phase 2: Batch allocate CPU blocks and stamp hashes ---
+            n_to_alloc = len(gpu_block_ids)
+            if n_to_alloc > 0:
+                cpu_blocks_alloc = cpu_block_pool.get_new_blocks(n_to_alloc)
+                cpu_block_ids = [blk.block_id for blk in cpu_blocks_alloc]
+                for cpu_blk, bhash in zip(cpu_blocks_alloc, block_hashes_to_store):
+                    cpu_blk._block_hash = bhash  # type: ignore[assignment]
+            else:
+                cpu_block_ids = []
+
+            if cpu_block_ids:
+                req_ids.append(req_id)
+                merged_gpu_block_ids.extend(gpu_block_ids)
+                merged_cpu_block_ids.extend(cpu_block_ids)
+                in_flight.update(gpu_block_ids)
+
+                # Touch GPU blocks to prevent freeing during async copy
+                gpu_block_pool.touch(
+                    [gpu_block_pool.blocks[bid] for bid in gpu_block_ids]
+                )
+
+                logger.debug(
+                    "Request %s: Scheduling store of %d blocks to CPU (%d groups)",
+                    req_id,
+                    len(cpu_block_ids),
+                    num_groups,
+                )
+
+            # Advance per-group cursors (includes cached hits + newly stored)
+            for g in range(num_groups):
+                state.num_stored_blocks[g] += advanced_per_group[g]
+
+        return merged_gpu_block_ids, merged_cpu_block_ids, req_ids
+
+    def update_connector_output(self, connector_output: KVConnectorOutput) -> None:
+        """Handle async transfer completions from worker.
+
+        Load completions arrive via finished_recving (real req_ids).
+        Store completions arrive via kv_connector_worker_meta as
+        per-event worker counts. We accumulate across steps and process
+        a store event only when all workers have reported completion.
+        """
+        # --- Load completions ---
+        for req_id in list(connector_output.finished_recving or []):
+            self._cleanup_load_request(req_id)
+
+        # --- Store completions ---
+        meta = connector_output.kv_connector_worker_meta
+        if not isinstance(meta, SimpleCPUOffloadWorkerMetadata):
+            return
+        for event_idx, count in meta.completed_store_events.items():
+            total = self._store_event_pending_counts.get(event_idx, 0) + count
+            if total >= self._expected_worker_count:
+                self._store_event_pending_counts.pop(event_idx, None)
+                self._process_store_event(event_idx)
+            else:
+                self._store_event_pending_counts[event_idx] = total
+
+    def _process_store_event(self, event_idx: int) -> None:
+        """Process a fully-completed store event."""
+        transfer = self._store_event_to_blocks.pop(event_idx)
+        if not self._lazy_mode:
+            self._in_flight_store_gpu_blocks.difference_update(transfer.gpu_block_ids)
+        self._process_store_completion(transfer.gpu_block_ids, transfer.cpu_block_ids)
+        logger.debug(
+            "Store event %d completed: cached %d blocks to CPU",
+            event_idx,
+            len(transfer.cpu_block_ids),
+        )
+
+        # Eager only: update per-req state
+        if not self._lazy_mode:
+            for req_id in self._store_event_to_reqs.pop(event_idx, []):
+                state = self._reqs_to_store.get(req_id)
+                if state is None:
+                    continue
+                state.store_events.discard(event_idx)
+                if state.finished and not state.store_events:
+                    self._cleanup_store_request(req_id)
+
+    def _process_store_completion(
+        self, gpu_block_ids: list[int], cpu_block_ids: list[int]
+    ) -> None:
+        """Cache CPU blocks per-group and release GPU refs.
+
+        Block hashes were stamped on CPU blocks at allocation time (in
+        ``_prepare_*_store_specs``).  Here we just register them in the
+        cache map so they become discoverable by the load path.
+        """
+        assert len(cpu_block_ids) == len(gpu_block_ids)
+
+        cpu_blocks = [self.cpu_block_pool.blocks[bid] for bid in cpu_block_ids]
+
+        for cpu_block in cpu_blocks:
+            bhash = cpu_block.block_hash
+            assert bhash is not None
+            self.cpu_block_pool.cached_block_hash_to_block.insert(bhash, cpu_block)
+
+        # Free CPU and GPU blocks' ref counts to turn them into prefix cache
+        self.cpu_block_pool.free_blocks(cpu_blocks)
+        assert self._gpu_block_pool is not None
+        self._gpu_block_pool.free_blocks(
+            self._gpu_block_pool.blocks[bid] for bid in gpu_block_ids
+        )
+
+    def has_pending_stores(self) -> bool:
+        """Return True if there are in-flight store transfers."""
+        return bool(self._store_event_to_blocks)
+
+    def request_finished(
+        self,
+        request: "Request",
+        block_ids: list[int],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        """Always returns (False, None). GPU blocks are protected by ref_cnt,
+        so the scheduler can free blocks immediately."""
+        req_id = request.request_id
+
+        # Release any temp CPU hit pin from get_num_new_matched_tokens()
+        # if request is canceled or preempted before update_state_after_alloc()
+        pending = self._pending_cpu_hits.pop(req_id, None)
+        if pending is not None:
+            self._free_pending_cpu_hit(pending)
+
+        # Handle load: defer cleanup if load is in-flight
+        load_state = self._reqs_to_load.get(req_id)
+        if load_state is not None:
+            if load_state.load_event is not None:
+                load_state.finished = True  # Defer: load in-flight
+            else:
+                self._cleanup_load_request(req_id)
+
+        # Handle store (eager mode only): defer cleanup if stores in-flight
+        if not self._lazy_mode:
+            store_state = self._reqs_to_store.get(req_id)
+            if store_state is not None:
+                if store_state.store_events:
+                    store_state.finished = True  # Defer: stores in-flight
+                else:
+                    self._cleanup_store_request(req_id)
+
+        return False, None
+
+    def request_finished_all_groups(
+        self,
+        request: "Request",
+        block_ids: tuple[list[int], ...],
+    ) -> tuple[bool, dict[str, Any] | None]:
+        return self.request_finished(request, block_ids=[])
+
+    def _free_pending_cpu_hit(self, pending: tuple) -> None:
+        """Release the temporary CPU block pin taken in get_num_new_matched_tokens()."""
+        cpu_hit_blocks, _ = pending
+        blocks_to_free = [
+            blk for grp in cpu_hit_blocks for blk in grp if not blk.is_null
+        ]
+        if blocks_to_free:
+            self.cpu_block_pool.free_blocks(blocks_to_free)
+
+    def _cleanup_load_request(self, req_id: str) -> None:
+        """Release all load resources for a request.
+
+        Shared between request_finished() and update_connector_output() paths.
+        Removes the request from _reqs_to_load, cleans up event mappings,
+        and frees CPU/GPU touch refs.
+        """
+        state = self._reqs_to_load.pop(req_id, None)
+        if state is None:
+            return
+        # Remove from load event mapping (only this req, not whole event)
+        if state.load_event is not None:
+            reqs = self._load_event_to_reqs.get(state.load_event)
+            if reqs is not None:
+                with contextlib.suppress(ValueError):
+                    reqs.remove(req_id)
+                if not reqs:
+                    self._load_event_to_reqs.pop(state.load_event, None)
+
+        if state.transfer_meta is not None:
+            # Free CPU touch refs
+            self.cpu_block_pool.free_blocks(
+                self.cpu_block_pool.blocks[bid]
+                for bid in state.transfer_meta.cpu_block_ids
+            )
+            # Free GPU touch refs
+            assert self._gpu_block_pool is not None
+            self._gpu_block_pool.free_blocks(
+                self._gpu_block_pool.blocks[bid]
+                for bid in state.transfer_meta.gpu_block_ids
+            )
+
+    def _cleanup_store_request(self, req_id: str) -> None:
+        """Release store metadata for a request.
+
+        Metadata-only cleanup but no block freeing. Job completion handles
+        block caching and GPU ref freeing via _process_store_completion().
+        """
+        state = self._reqs_to_store.pop(req_id, None)
+        if state is None:
+            return
+        for event_idx in list(state.store_events):
+            if (reqs := self._store_event_to_reqs.get(event_idx)) is not None:
+                with contextlib.suppress(ValueError):
+                    reqs.remove(req_id)
+                if not reqs:
+                    self._store_event_to_reqs.pop(event_idx, None)
+        state.store_events.clear()
+
+    def take_events(self) -> Iterable[KVCacheEvent]:
+        return self.cpu_block_pool.take_events()
diff --git a/vllm/v1/simple_kv_offload/metadata.py b/vllm/v1/simple_kv_offload/metadata.py
new file mode 100644
index 000000000000..8c8d4511ee60
--- /dev/null
+++ b/vllm/v1/simple_kv_offload/metadata.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Metadata for SimpleCPUOffloadConnector."""
+
+from dataclasses import dataclass, field
+
+from vllm.distributed.kv_transfer.kv_connector.v1.base import (
+    KVConnectorMetadata,
+    KVConnectorWorkerMetadata,
+)
+
+INVALID_JOB_ID = -1
+
+
+@dataclass
+class SimpleCPUOffloadMetadata(KVConnectorMetadata):
+    """
+    Metadata passed from scheduler to worker for CPU offload operations.
+
+    The worker receives flat block lists keyed by a monotonic event_idx.
+    Job->req_id translation is handled by the scheduler-side manager
+    (via inverse maps), so the worker never knows about request identities.
+    """
+
+    # Load event per step. INVALID_JOB_ID means no blocks to load this step.
+    load_event: int = INVALID_JOB_ID
+    load_gpu_blocks: list[int] = field(default_factory=list)
+    load_cpu_blocks: list[int] = field(default_factory=list)
+    # Reverse map: load_event->req_ids, for tracking requests with finished load events
+    load_event_to_reqs: dict[int, list[str]] = field(default_factory=dict)
+
+    # Store event per step. INVALID_JOB_ID means no blocks to store this step.
+    store_event: int = INVALID_JOB_ID
+    store_gpu_blocks: list[int] = field(default_factory=list)
+    store_cpu_blocks: list[int] = field(default_factory=list)
+
+    # Whether any requests were preempted this step and need flush pending transfers.
+    need_flush: bool = False
+
+
+@dataclass
+class SimpleCPUOffloadWorkerMetadata(KVConnectorWorkerMetadata):
+    """Worker -> Scheduler metadata for completed store events.
+
+    Each worker reports {event_idx: 1} for newly completed stores.
+    ``aggregate()`` sums counts across workers within a step.
+    The scheduler-side manager accumulates across steps and processes
+    a store completion only when count reaches ``world_size``.
+    """
+
+    completed_store_events: dict[int, int]
+
+    def aggregate(
+        self, other: "KVConnectorWorkerMetadata"
+    ) -> "KVConnectorWorkerMetadata":
+        assert isinstance(other, SimpleCPUOffloadWorkerMetadata)
+        merged = dict(self.completed_store_events)
+        for k, v in other.completed_store_events.items():
+            merged[k] = merged.get(k, 0) + v
+        return SimpleCPUOffloadWorkerMetadata(completed_store_events=merged)
diff --git a/vllm/v1/simple_kv_offload/worker.py b/vllm/v1/simple_kv_offload/worker.py
new file mode 100644
index 000000000000..c23b44f29173
--- /dev/null
+++ b/vllm/v1/simple_kv_offload/worker.py
@@ -0,0 +1,305 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Worker-side handler for SimpleCPUOffloadConnector."""
+
+from typing import TYPE_CHECKING
+
+import torch
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.utils.platform_utils import is_pin_memory_available
+from vllm.v1.simple_kv_offload.copy_backend import DmaCopyBackend
+from vllm.v1.simple_kv_offload.cuda_mem_ops import pin_tensor
+from vllm.v1.simple_kv_offload.metadata import (
+    SimpleCPUOffloadMetadata,
+    SimpleCPUOffloadWorkerMetadata,
+)
+
+if TYPE_CHECKING:
+    from vllm.v1.kv_cache_interface import KVCacheConfig
+
+logger = init_logger(__name__)
+
+
+class SimpleCPUOffloadWorker:
+    """Worker-side handler for CPU offloading transfers."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        kv_cache_config: "KVCacheConfig | None",
+        cpu_capacity_bytes: int,
+    ):
+        self.vllm_config = vllm_config
+        self.kv_cache_config = kv_cache_config
+        self.cpu_capacity_bytes = cpu_capacity_bytes
+
+        self.gpu_kv_caches: dict[str, torch.Tensor] | None = None
+        self.cpu_kv_caches: dict[str, torch.Tensor] | None = None
+        self.device: torch.device | None = None
+        self.num_cpu_blocks: int = 0
+
+        # CUDA streams for the async transfers
+        self.load_stream: torch.cuda.Stream | None = None
+        self.store_stream: torch.cuda.Stream | None = None
+
+        self._backend = DmaCopyBackend()
+
+        # Ordered (event_idx, Event). Events pre-allocated on main thread.
+        self._load_events: list[tuple[int, torch.Event]] = []
+        self._store_events: list[tuple[int, torch.Event]] = []
+        # High-water marks: highest event_idx completed per stream.
+        # When the event list is empty, the hwm covers all prior events.
+        self._load_hwm: int = -1
+        self._store_hwm: int = -1
+
+        # Metadata for the current step
+        self._connector_metadata: SimpleCPUOffloadMetadata | None = None
+
+        # Pending event index sets, populated in bind_connector_metadata
+        self._pending_load_event_indices: set[int] = set()
+        self._pending_store_event_indices: set[int] = set()
+        # Completed store events to report via build_connector_worker_meta
+        self._completed_store_events: dict[int, int] = {}
+
+    def register_kv_caches(
+        self,
+        kv_caches: dict[str, torch.Tensor],
+    ) -> None:
+        """Register GPU KV caches and allocate pinned CPU tensors.
+        The worker will infer the underlying raw storage from the kv_caches.
+
+        Args:
+            kv_caches: Per-layer GPU KV caches. Values are either a single
+                tensor (attention layers) or a list of tensors (Mamba layers
+                in hybrid models). All values are included for offloading
+                by resolving to their underlying raw storage.
+        """
+        if not kv_caches:
+            logger.warning("No KV caches to offload.")
+            return
+
+        # Resolve each entry to a representative tensor for storage
+        # deduplication. For attention layers the value is already a tensor;
+        # for Mamba layers it is a list of tensors that all share the same
+        # underlying raw storage, so we take the first one.
+        def _repr_tensor(v: torch.Tensor | list[torch.Tensor]) -> torch.Tensor:
+            assert isinstance(v, torch.Tensor | list)
+            return v if isinstance(v, torch.Tensor) else v[0]
+
+        any_tensor = _repr_tensor(next(iter(kv_caches.values())))
+        self.device = any_tensor.device
+
+        assert self.kv_cache_config is not None
+        num_blocks = self.kv_cache_config.num_blocks
+
+        # Deduplicate: multiple layers may share the same backing storage.
+        seen_ptrs: dict[int, tuple[str, torch.Tensor]] = {}
+        for name, value in kv_caches.items():
+            tensor = _repr_tensor(value)
+            ptr = tensor.untyped_storage().data_ptr()
+            if ptr not in seen_ptrs:
+                seen_ptrs[ptr] = (name, tensor)
+
+        # Build [num_blocks, block_bytes] int8 views from each unique
+        # storage so that stride(0) gives block_bytes for the copy op.
+        #
+        # The physical layout varies across attention backends:
+        #   FlashAttn/ROCm:  (2, num_blocks, ...) -> K/V outermost, 2 segments
+        #   FlashInfer/MLA:  (num_blocks, ...)    -> blocks outermost, 1 segment
+        # We derive page_size_bytes = storage.nbytes() // num_blocks, then
+        # classify dims: any dim whose byte-stride exceeds page_size_bytes
+        # must be an outer segment dim (e.g. the K/V dim of size 2). A less
+        # hacky way is to update the interface with the layout.
+        unique_gpu_caches: dict[str, torch.Tensor] = {}
+        for name, tensor in seen_ptrs.values():
+            storage = tensor.untyped_storage()
+            raw = torch.empty(0, dtype=torch.int8, device=self.device).set_(
+                storage, 0, (storage.nbytes(),)
+            )
+            el = tensor.element_size()
+            page_size_bytes = storage.nbytes() // num_blocks
+            outer_dims = [
+                d for d in range(tensor.ndim) if tensor.stride(d) * el > page_size_bytes
+            ]
+            if not outer_dims:
+                unique_gpu_caches[name] = raw.view(num_blocks, -1)
+            else:
+                seg_stride = tensor.stride(outer_dims[0]) * el
+                for idx in range(tensor.shape[outer_dims[0]]):
+                    offset = idx * seg_stride
+                    chunk = raw[offset : offset + seg_stride]
+                    unique_gpu_caches[f"{name}.{idx}"] = chunk.view(num_blocks, -1)
+
+        # Compute per-tensor bytes_per_block. Tensors may have different
+        # page_size_bytes (e.g., UniformTypeKVCacheSpecs with varying head_size).
+        per_tensor_bpb = [
+            t.stride(0) * t.element_size() for t in unique_gpu_caches.values()
+        ]
+        total_bytes_per_block = sum(per_tensor_bpb)
+
+        self.num_cpu_blocks = max(1, self.cpu_capacity_bytes // total_bytes_per_block)
+
+        logger.info(
+            "SimpleCPUOffloadWorker: %d unique GPU KV tensors, "
+            "allocating %d CPU blocks (%.2f GB)",
+            len(unique_gpu_caches),
+            self.num_cpu_blocks,
+            (self.num_cpu_blocks * total_bytes_per_block) / (1024**3),
+        )
+
+        pin_memory = is_pin_memory_available()
+        if not pin_memory:
+            logger.warning(
+                "Pinned memory not available. CPU offload performance may be degraded."
+            )
+
+        self.gpu_kv_caches = unique_gpu_caches
+        self.cpu_kv_caches = {}
+        for name, gpu_tensor in unique_gpu_caches.items():
+            cpu_shape = (self.num_cpu_blocks,) + gpu_tensor.shape[1:]
+            # Allocate non-pinned first, then pin via cudaHostRegister to
+            # bypass PyTorch's CUDACachingHostAllocator which rounds up to
+            # the next power of 2 (e.g. 100 GB -> 128 GB).
+            tensor = torch.zeros(cpu_shape, dtype=gpu_tensor.dtype, device="cpu")
+            if pin_memory:
+                pin_tensor(tensor)
+            self.cpu_kv_caches[name] = tensor
+
+        # Use lowest priority so KV cache I/O yields to compute streams.
+        low_pri, _ = torch.cuda.Stream.priority_range()
+        self.load_stream = torch.cuda.Stream(priority=low_pri)
+        self.store_stream = torch.cuda.Stream(priority=low_pri)
+
+        # Initialize copy backend with caches and streams.
+        self._backend.init(
+            self.gpu_kv_caches,
+            self.cpu_kv_caches,
+            self.device,
+            self.load_stream,
+            self.store_stream,
+        )
+
+    def bind_connector_metadata(self, metadata: SimpleCPUOffloadMetadata) -> None:
+        self._connector_metadata = metadata
+        if metadata.load_event >= 0:
+            self._pending_load_event_indices.add(metadata.load_event)
+        if metadata.store_event >= 0:
+            self._pending_store_event_indices.add(metadata.store_event)
+
+    def clear_connector_metadata(self) -> None:
+        self._connector_metadata = None
+
+    def start_load_kv(self) -> None:
+        # NOTE: we defer launching both load and store to get_finished(),
+        # which runs after model execution. This hides the CPU-side
+        # block copy op overhead (~5ms) behind GPU compute.
+        pass
+
+    def wait_for_save(self) -> None:
+        pass
+
+    def get_finished(
+        self,
+        finished_req_ids: set[str],
+    ) -> tuple[set[str] | None, set[str] | None]:
+        """Submit transfers and report completed events to the scheduler.
+
+        Called after model execution. The manager only schedules stores for
+        blocks whose KV data is confirmed computed, so we launch both loads
+        and stores immediately — no deferral or cross-stream sync needed.
+
+        Returns:
+            tuple of (finished_sending, finished_recving).
+            - finished_sending: always None (stores use worker metadata).
+            - finished_recving: req_ids whose loads have completed.
+        """
+        # (1) Submit transfers
+        metadata = self._connector_metadata
+        if metadata is not None:
+            # Launch loads (CPU->GPU).
+            if metadata.load_cpu_blocks:
+                self._backend.launch_copy(
+                    metadata.load_cpu_blocks,
+                    metadata.load_gpu_blocks,
+                    is_store=False,
+                    event_idx=metadata.load_event,
+                    events_list=self._load_events,
+                )
+            # Launch stores (GPU->CPU).
+            if metadata.store_gpu_blocks:
+                self._backend.launch_copy(
+                    metadata.store_gpu_blocks,
+                    metadata.store_cpu_blocks,
+                    is_store=True,
+                    event_idx=metadata.store_event,
+                    events_list=self._store_events,
+                )
+
+        # (2) Track completed transfer events
+        finished_recving: set[str] = set()
+
+        if self._pending_load_event_indices:
+            load_wm = self._poll_stream_events(is_store=False)
+            for j in [j for j in self._pending_load_event_indices if j <= load_wm]:
+                self._pending_load_event_indices.discard(j)
+                req_ids = (
+                    metadata.load_event_to_reqs.get(j) if metadata is not None else None
+                )
+                if req_ids:
+                    finished_recving.update(req_ids)
+
+        if self._pending_store_event_indices:
+            store_wm = self._poll_stream_events(is_store=True)
+            for j in [j for j in self._pending_store_event_indices if j <= store_wm]:
+                self._pending_store_event_indices.discard(j)
+                self._completed_store_events[j] = 1
+
+        return None, finished_recving or None
+
+    def build_connector_worker_meta(self) -> SimpleCPUOffloadWorkerMetadata | None:
+        """Return completed store events since the last call."""
+        if not self._completed_store_events:
+            return None
+        meta = SimpleCPUOffloadWorkerMetadata(
+            completed_store_events=self._completed_store_events,
+        )
+        self._completed_store_events = {}
+        return meta
+
+    def handle_preemptions(
+        self, kv_connector_metadata: SimpleCPUOffloadMetadata
+    ) -> None:
+        """Sync all in-flight transfers before preempted blocks are reused."""
+        if not kv_connector_metadata.need_flush:
+            return
+        self._flush_and_sync_all()
+
+    def _flush_and_sync_all(self) -> None:
+        """Synchronize all in-flight transfer events."""
+        for event_idx, event in self._load_events:
+            event.synchronize()
+            self._load_hwm = event_idx
+        self._load_events.clear()
+
+        for event_idx, event in self._store_events:
+            event.synchronize()
+            self._store_hwm = event_idx
+        self._store_events.clear()
+
+    def _poll_stream_events(self, is_store: bool) -> int:
+        """Non-blocking poll for completed events and return the high-water mark."""
+        events = self._store_events if is_store else self._load_events
+        hwm = self._store_hwm if is_store else self._load_hwm
+        while events:
+            event_idx, event = events[0]
+            if not event.query():
+                break
+            hwm = event_idx
+            events.pop(0)
+        if is_store:
+            self._store_hwm = hwm
+        else:
+            self._load_hwm = hwm
+        return hwm
diff --git a/vllm/v1/spec_decode/custom_class_proposer.py b/vllm/v1/spec_decode/custom_class_proposer.py
new file mode 100755
index 000000000000..64a17087f3e5
--- /dev/null
+++ b/vllm/v1/spec_decode/custom_class_proposer.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import importlib
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def create_custom_proposer(vllm_config: VllmConfig):
+    """Load and instantiate a user-provided proposer class.
+
+    The class path is read from ``speculative_config.model``
+    (e.g., ``"my_module.MyCustomProposer"``).  The class is
+    imported, instantiated with *vllm_config*, and returned
+    directly so the caller can use it without any wrapper.
+
+    The returned object must expose a callable ``propose`` method.
+    """
+    assert vllm_config.speculative_config is not None
+    spec_config = vllm_config.speculative_config
+
+    backend = spec_config.model
+    assert backend is not None
+
+    if "." not in backend:
+        raise ValueError(
+            f"Invalid custom proposer module path '{backend}'. "
+            "It must be a full module path (e.g., 'module.MyProposerClass')."
+        )
+
+    module_path, class_name = backend.rsplit(".", 1)
+    try:
+        module = importlib.import_module(module_path)
+    except ImportError as e:
+        raise ImportError(
+            f"Cannot import module '{module_path}' for custom proposer '{backend}': {e}"
+        ) from e
+
+    user_class = getattr(module, class_name, None)
+    if user_class is None:
+        raise AttributeError(
+            f"Module '{module_path}' has no attribute '{class_name}' "
+            f"(speculative_config.model='{backend}')"
+        )
+
+    try:
+        instance = user_class(vllm_config)
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to instantiate custom proposer class '{backend}': {e}. "
+            "The class constructor must accept VllmConfig as argument."
+        ) from e
+
+    if not hasattr(instance, "propose"):
+        raise AttributeError(
+            f"Custom proposer class '{backend}' must have a 'propose' method."
+        )
+    if not callable(instance.propose):
+        raise AttributeError(
+            f"Custom proposer class '{backend}' has a 'propose' attribute "
+            "but it is not callable."
+        )
+
+    logger.info(
+        "Loaded custom proposer class '%s' with num_speculative_tokens=%d",
+        backend,
+        spec_config.num_speculative_tokens,
+    )
+
+    return instance
diff --git a/vllm/v1/spec_decode/dflash.py b/vllm/v1/spec_decode/dflash.py
new file mode 100644
index 000000000000..db74044f4fde
--- /dev/null
+++ b/vllm/v1/spec_decode/dflash.py
@@ -0,0 +1,300 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from dataclasses import replace
+from typing import Any
+
+import torch
+from typing_extensions import override
+
+from vllm.config import VllmConfig
+from vllm.forward_context import set_forward_context
+from vllm.logger import init_logger
+from vllm.triton_utils import triton
+from vllm.v1.attention.backend import CommonAttentionMetadata
+from vllm.v1.spec_decode.llm_base_proposer import SpecDecodeBaseProposer
+from vllm.v1.spec_decode.utils import copy_and_expand_dflash_inputs_kernel
+
+logger = init_logger(__name__)
+
+
+class DFlashProposer(SpecDecodeBaseProposer):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        device: torch.device,
+        runner=None,
+    ):
+        assert vllm_config.speculative_config is not None
+        assert vllm_config.speculative_config.method == "dflash"
+        super().__init__(
+            vllm_config=vllm_config,
+            device=device,
+            pass_hidden_states_to_model=True,
+            runner=runner,
+        )
+
+        # Only next_token_ids and mask tokens are query tokens, all other context is K/V
+        self.max_query_tokens = self.max_batch_size * (1 + self.num_speculative_tokens)
+        # Positions covers both context states + query states
+        self.max_positions = self.max_num_tokens + self.max_query_tokens
+
+        # Separate context buffers to keep query buffer addresses stable for CUDA graphs
+        self._context_slot_mapping_buffer = torch.zeros(
+            self.max_num_tokens,
+            dtype=torch.int64,
+            device=device,
+        )
+        self._slot_mapping_buffer = torch.zeros(
+            self.max_query_tokens,
+            dtype=torch.int64,
+            device=device,
+        )
+        self._context_positions_buffer = torch.zeros(
+            self.max_num_tokens,
+            dtype=torch.int64,
+            device=device,
+        )
+        self.positions = torch.zeros(
+            self.max_query_tokens,
+            dtype=torch.int64,
+            device=device,
+        )
+
+        self.arange = torch.arange(
+            self.max_positions + 1, device=device, dtype=torch.int32
+        )
+
+        # For DFlash we use the input embeddings to embed the mask token
+        self.parallel_drafting_hidden_state_tensor = None
+
+    @override
+    def _create_draft_vllm_config(self) -> VllmConfig:
+        base = super()._create_draft_vllm_config()
+        return replace(
+            base,
+            attention_config=replace(
+                base.attention_config,
+                use_non_causal=True,
+            ),
+        )
+
+    @override
+    def _warn_if_multimodal(self):
+        # Override to allow multimodal inputs since DFlash supports Qwen3.5 models
+        pass
+
+    @override
+    def set_inputs_first_pass(
+        self,
+        target_token_ids: torch.Tensor,
+        next_token_ids: torch.Tensor,
+        target_positions: torch.Tensor,
+        target_hidden_states: torch.Tensor,
+        token_indices_to_sample: torch.Tensor | None,
+        cad: CommonAttentionMetadata,
+        num_rejected_tokens_gpu: torch.Tensor | None,
+    ) -> tuple[int, torch.Tensor, CommonAttentionMetadata]:
+        # DFlash cross-attention: context K/V from target hidden states,
+        # Q from query embeddings (bonus + mask tokens).
+        batch_size = cad.batch_size()
+        num_context = target_token_ids.shape[0]
+        num_query_per_req = 1 + self.num_speculative_tokens
+        num_query_total = batch_size * num_query_per_req
+
+        # Store for build_model_inputs_first_pass to use
+        self._dflash_num_context = num_context
+
+        # We don't need to copy into a buffer here since the context preprocessing
+        # does not run in a CUDA graph
+        self._dflash_hidden_states = target_hidden_states
+
+        token_indices_to_sample = torch.empty(
+            batch_size * self.num_speculative_tokens,
+            dtype=torch.int32,
+            device=self.device,
+        )
+
+        # Launch fused triton kernel for input_ids, positions, slot_mapping,
+        # and token_indices_to_sample
+        max_ctx_per_req = cad.max_query_len
+        max_tokens_per_req = max_ctx_per_req + num_query_per_req
+        BLOCK_SIZE = min(256, triton.next_power_of_2(max_tokens_per_req))
+        num_blocks = triton.cdiv(max_tokens_per_req, BLOCK_SIZE)
+        grid = (batch_size, num_blocks)
+
+        has_num_rejected = num_rejected_tokens_gpu is not None
+        copy_and_expand_dflash_inputs_kernel[grid](
+            # Inputs
+            next_token_ids_ptr=next_token_ids,
+            target_positions_ptr=target_positions,
+            # Outputs
+            out_input_ids_ptr=self.input_ids,
+            out_context_positions_ptr=self._context_positions_buffer,
+            out_query_positions_ptr=self.positions,
+            out_context_slot_mapping_ptr=self._context_slot_mapping_buffer,
+            out_query_slot_mapping_ptr=self._slot_mapping_buffer,
+            out_token_indices_ptr=token_indices_to_sample,
+            # Block table
+            block_table_ptr=cad.block_table_tensor,
+            block_table_stride=cad.block_table_tensor.stride(0),
+            # Metadata
+            query_start_loc_ptr=cad.query_start_loc,
+            num_rejected_tokens_ptr=(
+                num_rejected_tokens_gpu if has_num_rejected else 0
+            ),
+            # Scalars
+            parallel_drafting_token_id=self.parallel_drafting_token_id,
+            block_size=self.block_size,
+            num_query_per_req=num_query_per_req,
+            num_speculative_tokens=self.num_speculative_tokens,
+            total_input_tokens=num_context,
+            BLOCK_SIZE=BLOCK_SIZE,
+            HAS_NUM_REJECTED=has_num_rejected,
+        )
+
+        query_slot_mapping = self._slot_mapping_buffer[:num_query_total]
+        new_query_start_loc = self.arange[: batch_size + 1] * num_query_per_req
+
+        # In padded mode, cad.seq_lens includes rejected tokens. Subtract
+        # them so attention only sees the valid prefix of context states.
+        effective_seq_lens = cad.seq_lens
+        if has_num_rejected:
+            effective_seq_lens = effective_seq_lens - num_rejected_tokens_gpu
+
+        # Skip num_rejected_tokens (GPU-only); overestimating is fine here.
+        new_seq_lens_cpu_upper_bound = (
+            cad.seq_lens_cpu_upper_bound + num_query_per_req
+            if cad.seq_lens_cpu_upper_bound is not None
+            else None
+        )
+        new_cad = CommonAttentionMetadata(
+            query_start_loc=new_query_start_loc,
+            seq_lens=effective_seq_lens + num_query_per_req,
+            query_start_loc_cpu=(
+                torch.from_numpy(self.token_arange_np[: batch_size + 1]).clone()
+                * num_query_per_req
+            ),
+            _seq_lens_cpu=None,
+            _num_computed_tokens_cpu=None,
+            seq_lens_cpu_upper_bound=new_seq_lens_cpu_upper_bound,
+            num_reqs=cad.num_reqs,
+            num_actual_tokens=num_query_total,
+            max_query_len=num_query_per_req,
+            max_seq_len=cad.max_seq_len + num_query_per_req,
+            block_table_tensor=cad.block_table_tensor,
+            slot_mapping=query_slot_mapping,
+            causal=False,  # Non-causal attention is required for DFlash
+        )
+
+        return num_query_total, token_indices_to_sample, new_cad
+
+    @override
+    @torch.inference_mode()
+    def dummy_run(
+        self,
+        num_tokens: int,
+        use_cudagraphs: bool = True,
+        is_graph_capturing: bool = False,
+        slot_mappings: dict[str, torch.Tensor] | None = None,
+    ) -> None:
+        """
+        Key differences to default dummy_run:
+        - Only one forward pass due to parallel drafting
+        - DFlash uses context states as unpadded metadata, so hidden_states will
+        use the unpadded num_tokens instead of num_input_tokens
+        - max_query_tokens is quite small, DFlash only sees spec tokens as queries
+        - Multimodal inputs are not currently supported
+        """
+        num_query_tokens = min(num_tokens, self.max_query_tokens)
+        cudagraph_runtime_mode, num_input_tokens, num_tokens_across_dp = (
+            self._determine_batch_execution_and_padding(
+                num_query_tokens, use_cudagraphs=use_cudagraphs
+            )
+        )
+
+        # Slot mapping sized to num_input_tokens (query only), matching
+        # the K/V tensor size from the model forward.  Context KVs are
+        # pre-inserted separately and don't flow through the model.
+        if (
+            self._draft_attn_layer_names
+            and slot_mappings is not None
+            and next(iter(self._draft_attn_layer_names)) in slot_mappings
+        ):
+            slot_mapping_dict = self._get_slot_mapping(num_input_tokens)
+        else:
+            slot_mapping_dict = slot_mappings or {}
+
+        # Context and query positions use separate buffers; no copy needed.
+        context_positions = self._context_positions_buffer[:num_tokens]
+        # Context states will be passed directly to the precomputation without
+        # going through the buffer, since no CUDA graph is used for the precomputation.
+        # For the dummy run, we use the dummy buffer.
+        context_states = self.hidden_states[:num_tokens]
+
+        # Run the KV projection (GEMM + norms + RoPE) for memory profiling,
+        self.model.precompute_and_store_context_kv(context_states, context_positions)
+        with set_forward_context(
+            None,
+            self.vllm_config,
+            num_tokens=num_input_tokens,
+            num_tokens_across_dp=num_tokens_across_dp,
+            cudagraph_runtime_mode=cudagraph_runtime_mode,
+            slot_mapping=slot_mapping_dict,
+        ):
+            self.model(
+                input_ids=self.input_ids[:num_input_tokens],
+                positions=self._get_positions(num_input_tokens),
+                inputs_embeds=None,
+            )
+
+    @override
+    def build_model_inputs_first_pass(
+        self,
+        num_tokens: int,
+        num_input_tokens: int,
+        mm_embed_inputs: tuple[list[torch.Tensor], torch.Tensor] | None,
+    ) -> tuple[dict[str, Any], int]:
+        # Context and query positions/slots were written to separate
+        # buffers by the kernel — no copy needed.
+        num_context = self._dflash_num_context
+
+        # Pre-insert context KVs directly into cache
+        self.model.precompute_and_store_context_kv(
+            self._dflash_hidden_states,  # Shape is already [num_context, hidden_size]
+            self._context_positions_buffer[:num_context],
+            self._context_slot_mapping_buffer[:num_context],
+        )
+        return (
+            dict(
+                input_ids=self.input_ids[:num_input_tokens],
+                positions=self._get_positions(num_input_tokens),
+                inputs_embeds=None,
+            ),
+            num_input_tokens,
+        )
+
+    @override
+    def build_per_group_and_layer_attn_metadata(
+        self, cad: CommonAttentionMetadata, draft_index: int = 0
+    ) -> tuple[list[object], dict[str, object]]:
+        per_group, per_layer = super().build_per_group_and_layer_attn_metadata(
+            cad, draft_index
+        )
+        for layer_name, attn_metadata in per_layer.items():
+            assert getattr(attn_metadata, "causal", None) is False, (
+                f"Attention metadata for layer {layer_name} does not have"
+                " non-causal support, which is required for DFlash."
+                " Consider using a different attention backend, such as FlashAttention."
+            )
+        return per_group, per_layer
+
+    @override
+    def _get_eagle3_use_aux_hidden_state_from_config(self):
+        use_aux_hidden_state = True
+        dflash_config = getattr(
+            self.draft_model_config.hf_config, "dflash_config", None
+        )
+        if dflash_config is not None:
+            use_aux_hidden_state = dflash_config.get("use_aux_hidden_state", True)
+        return use_aux_hidden_state
diff --git a/vllm/v1/spec_decode/draft_model.py b/vllm/v1/spec_decode/draft_model.py
index 9633e2ef6ca2..a8c8ab03b615 100644
--- a/vllm/v1/spec_decode/draft_model.py
+++ b/vllm/v1/spec_decode/draft_model.py
@@ -9,7 +9,7 @@
 from vllm.config.utils import replace
 from vllm.logger import init_logger
 from vllm.model_executor.model_loader import get_model
-from vllm.v1.spec_decode.eagle import SpecDecodeBaseProposer
+from vllm.v1.spec_decode.llm_base_proposer import SpecDecodeBaseProposer
 
 logger = init_logger(__name__)
 
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index 4ccccfea42dc..002d0b7833a4 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -1,1686 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import ast
-from importlib.util import find_spec
-from typing import cast
 
-import numpy as np
 import torch
-import torch.nn as nn
 
-from vllm.config import (
-    CUDAGraphMode,
-    VllmConfig,
-    get_layers_from_vllm_config,
-    replace,
-)
-from vllm.distributed.parallel_state import get_pp_group
-from vllm.forward_context import set_forward_context
-from vllm.logger import init_logger
-from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
-from vllm.model_executor.model_loader import get_model
-from vllm.model_executor.models import supports_multimodal
-from vllm.model_executor.models.deepseek_eagle3 import Eagle3DeepseekV2ForCausalLM
-from vllm.model_executor.models.interfaces import SupportsMultiModal
-from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
-from vllm.multimodal import MULTIMODAL_REGISTRY
-from vllm.platforms import current_platform
-from vllm.triton_utils import triton
-from vllm.utils.platform_utils import is_pin_memory_available
-from vllm.v1.attention.backend import CommonAttentionMetadata
-from vllm.v1.attention.backends.registry import AttentionBackendEnum
-from vllm.v1.attention.backends.tree_attn import (
-    TreeAttentionMetadata,
-    TreeAttentionMetadataBuilder,
-)
-from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata
-from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher
-from vllm.v1.kv_cache_interface import KVCacheConfig, UniformTypeKVCacheSpecs
-from vllm.v1.sample.metadata import SamplingMetadata
-from vllm.v1.sample.sampler import _SAMPLING_EPS
-from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
-from vllm.v1.spec_decode.utils import (
-    PADDING_SLOT_ID,
-    compute_new_slot_mapping,
-    copy_and_expand_eagle_inputs_kernel,
-    eagle_prepare_inputs_padded_kernel,
-    eagle_prepare_next_token_padded_kernel,
-    eagle_step_update_slot_mapping_and_metadata,
-    extend_all_queries_by_N,
-)
-from vllm.v1.utils import CpuGpuBuffer
-from vllm.v1.worker.dp_utils import coordinate_batch_across_dp
-from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
-from vllm.v1.worker.utils import AttentionGroup
-
-logger = init_logger(__name__)
-
-
-class SpecDecodeBaseProposer:
-    def __init__(
-        self,
-        vllm_config: VllmConfig,
-        device: torch.device,
-        pass_hidden_states_to_model: bool,
-        runner=None,
-    ):
-        self.vllm_config = vllm_config
-        assert vllm_config.speculative_config is not None
-        self.speculative_config = vllm_config.speculative_config
-        self.draft_model_config = self.speculative_config.draft_model_config
-        self.method = self.speculative_config.method
-        self.pass_hidden_states_to_model = pass_hidden_states_to_model
-
-        self.device = device
-        self.dtype = vllm_config.model_config.dtype
-        self.max_model_len = vllm_config.model_config.max_model_len
-        self.dp_rank = vllm_config.parallel_config.data_parallel_rank
-        self.num_speculative_tokens = self.speculative_config.num_speculative_tokens
-
-        # We need to get the hidden size from the draft model config because
-        # the draft model's hidden size can be different from the target model's
-        # hidden size (e.g., Llama 3.3 70B).
-        self.hidden_size = self.draft_model_config.get_hidden_size()
-        self.inputs_embeds_size = self.draft_model_config.get_inputs_embeds_size()
-
-        # Unifying eagle, draft model, and parallel drafting support
-        self.parallel_drafting: bool = self.speculative_config.parallel_drafting
-        self.extra_slots_per_request = (
-            1 if not self.parallel_drafting else self.num_speculative_tokens
-        )
-        self.net_num_new_slots_per_request = self.extra_slots_per_request - (
-            1 if self.pass_hidden_states_to_model else 0
-        )
-        self.needs_extra_input_slots = self.net_num_new_slots_per_request > 0
-
-        self.parallel_drafting_token_id: int = 0
-        self.parallel_drafting_hidden_state_tensor: torch.Tensor | None = None
-        if self.parallel_drafting:
-            self._init_parallel_drafting_params()
-        self.use_local_argmax_reduction: bool = (
-            self.speculative_config.use_local_argmax_reduction
-        )
-
-        max_batch_size = vllm_config.scheduler_config.max_num_seqs
-        self.max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
-        self.token_arange_np = np.arange(self.max_num_tokens)
-
-        # Multi-modal data support
-        self.mm_registry = MULTIMODAL_REGISTRY
-        self.supports_mm_inputs = self.mm_registry.supports_multimodal_inputs(
-            vllm_config.model_config
-        )
-
-        self.draft_attn_groups: list[AttentionGroup] = []
-        self.kv_cache_gid: int = -1
-        self.eagle3_use_aux_hidden_state: bool = (
-            self._get_eagle3_use_aux_hidden_state_from_config()
-        )
-
-        self.compilation_config = self.vllm_config.compilation_config
-
-        # Cudagraph dispatcher for PIECEWISE-only dispatching in eagle.
-        # Keys are initialized later via initialize_cudagraph_keys() called from
-        # gpu_model_runner._check_and_update_cudagraph_mode after
-        # adjust_cudagraph_sizes_for_spec_decode is called.
-        self.cudagraph_dispatcher = CudagraphDispatcher(self.vllm_config)
-
-        # persistent buffers for cuda graph
-        self.input_ids = torch.zeros(
-            self.max_num_tokens, dtype=torch.int32, device=device
-        )
-        # Use draft model's M-RoPE setting, not target model's
-        # Draft models may be text-only even if target is multimodal
-        self.uses_mrope = self.draft_model_config.uses_mrope
-        self.uses_xdrope_dim = self.vllm_config.model_config.uses_xdrope_dim
-        self.draft_uses_xdrope_dim = self.draft_model_config.uses_xdrope_dim
-        if self.uses_mrope:
-            # NOTE: `mrope_positions` is implemented with one additional dummy
-            # position on purpose to make it non-contiguous so that it can work
-            # with torch compile.
-            # See detailed explanation in https://github.com/vllm-project/vllm/pull/12128#discussion_r1926431923
-
-            # NOTE: When M-RoPE is enabled, position ids are 3D regardless of
-            # the modality of inputs. For text-only inputs, each dimension has
-            # identical position IDs, making M-RoPE functionally equivalent to
-            # 1D-RoPE.
-            # See page 5 of https://arxiv.org/abs/2409.12191
-            self.mrope_positions = torch.zeros(
-                (3, self.max_num_tokens + 1), dtype=torch.int64, device=device
-            )
-        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
-            self.xdrope_positions = torch.zeros(
-                (self.uses_xdrope_dim, self.max_num_tokens + 1),
-                dtype=torch.int64,
-                device=device,
-            )
-        else:
-            # RoPE need (max_num_tokens,)
-            self.positions = torch.zeros(
-                self.max_num_tokens, dtype=torch.int64, device=device
-            )
-        self.hidden_states = torch.zeros(
-            (self.max_num_tokens, self.hidden_size), dtype=self.dtype, device=device
-        )
-
-        # Will be set when we initialize the attention backend
-        self.block_size: int = -1
-
-        # We need +1 here because the arange is used to set query_start_loc,
-        # which has one more element than batch_size.
-        max_num_slots_for_arange = max(max_batch_size + 1, self.max_num_tokens)
-        self.arange = torch.arange(
-            max_num_slots_for_arange, device=device, dtype=torch.int32
-        )
-
-        if self.needs_extra_input_slots:
-            self._raise_if_padded_drafter_batch_disabled()
-            self._raise_if_multimodal()
-            self._raise_if_mrope()
-
-        self.is_rejected_token_mask: torch.Tensor | None = None
-        self.is_masked_token_mask: torch.Tensor | None = None
-        if self.needs_extra_input_slots:
-            # For draft models and parallel drafting, we need to keep track of
-            # which tokens are rejected to update the slot mapping with padding slots.
-            self.is_rejected_token_mask = torch.zeros(
-                (self.max_num_tokens,), dtype=torch.bool, device=device
-            )
-            # For parallel drafting, we also need to keep track of which tokens
-            # are parallel-padding tokens used to sample at later positions.
-            # We populate this tensor even when using draft models for simplicity.
-            self.is_masked_token_mask = torch.zeros(
-                (self.max_num_tokens,), dtype=torch.bool, device=device
-            )
-
-        self.inputs_embeds = torch.zeros(
-            (self.max_num_tokens, self.inputs_embeds_size),
-            dtype=self.dtype,
-            device=device,
-        )
-
-        self.backup_next_token_ids = CpuGpuBuffer(
-            max_batch_size,
-            dtype=torch.int32,
-            pin_memory=is_pin_memory_available(),
-            device=device,
-            with_numpy=True,
-        )
-
-        self._slot_mapping_buffer = torch.zeros(
-            self.max_num_tokens, dtype=torch.int64, device=device
-        )
-
-        # Determine allowed attention backends once during initialization.
-        self.allowed_attn_types: tuple | None = None
-        if current_platform.is_rocm():
-            from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
-                ROCMAiterMLASparseMetadata,
-            )
-            from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
-
-            rocm_types = [
-                TritonAttentionMetadata,
-                RocmAttentionMetadata,
-                ROCMAiterMLASparseMetadata,
-            ]
-            # ROCM_AITER_FA is an optional backend
-            # We check is_enabled() here to avoid importing the backend module during
-            # auto-discovery when VLLM_ROCM_USE_AITER=0, which would trigger aiter
-            # import and JIT compilation warnings. Explicit backend selection via
-            # attention_config still works because the backend module is loaded
-            # directly when selected, not through this auto-discovery path.
-            # Check if backend module exists to allow explicit selection
-            if find_spec(
-                AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
-            ):
-                from vllm.v1.attention.backends.rocm_aiter_fa import (
-                    AiterFlashAttentionMetadata,
-                )
-
-                rocm_types.append(AiterFlashAttentionMetadata)
-
-            # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
-            from vllm.model_executor.layers.attention.mla_attention import (
-                MLACommonMetadata,
-            )
-
-            rocm_types.append(MLACommonMetadata)
-
-            # FlexAttention backend support
-            from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
-
-            rocm_types.append(FlexAttentionMetadata)
-
-            self.allowed_attn_types = tuple(rocm_types)
-
-        # Parse the speculative token tree.
-        spec_token_tree = self.speculative_config.speculative_token_tree
-        assert spec_token_tree is not None
-        self.tree_choices: list[tuple[int, ...]] = ast.literal_eval(spec_token_tree)
-        tree_depth = len(self.tree_choices[-1])
-        # Precompute per-level properties of the tree.
-        num_drafts_per_level = [0] * tree_depth
-        for node in self.tree_choices:
-            num_drafts_per_level[len(node) - 1] += 1
-        self.cu_drafts_per_level = [num_drafts_per_level[0]]
-        self.child_drafts_per_level = [num_drafts_per_level[0]]
-        for level in range(1, tree_depth):
-            self.cu_drafts_per_level.append(
-                self.cu_drafts_per_level[-1] + num_drafts_per_level[level]
-            )
-            self.child_drafts_per_level.append(
-                num_drafts_per_level[level] // num_drafts_per_level[level - 1]
-            )
-        # Precompute draft position offsets in flattened tree.
-        self.tree_draft_pos_offsets = torch.arange(
-            1, len(self.tree_choices) + 1, device=device, dtype=torch.int32
-        ).repeat(max_batch_size, 1)
-
-    def _raise_if_padded_drafter_batch_disabled(self):
-        if self.speculative_config.disable_padded_drafter_batch:
-            raise NotImplementedError(
-                "Speculative Decoding with draft models or parallel drafting only "
-                "supports padded drafter batch. Please unset "
-                "disable_padded_drafter_batch in the speculative_config."
-            )
-
-    def _raise_if_multimodal(self):
-        if self.supports_mm_inputs:
-            raise NotImplementedError(
-                "Speculative Decoding with draft models or parallel drafting "
-                "does not support multimodal models yet"
-            )
-
-    def _raise_if_mrope(self):
-        if self.draft_model_config.uses_mrope:
-            raise NotImplementedError(
-                "Speculative Decoding with draft models or parallel drafting "
-                "does not support M-RoPE yet"
-            )
-
-    def _init_parallel_drafting_params(self):
-        # For parallel drafting, we need the token ID to use for masked slots
-        # And for EAGLE + parallel drafting, we need the hidden state tensor to use
-        # for those masked slots.
-
-        model_hf_config = self.draft_model_config.hf_config
-        if hasattr(model_hf_config, "pard_token"):
-            self.parallel_drafting_token_id = model_hf_config.pard_token
-        elif hasattr(model_hf_config, "ptd_token_id"):
-            self.parallel_drafting_token_id = model_hf_config.ptd_token_id
-        else:
-            raise ValueError(
-                "For parallel drafting, the draft model config must have "
-                "`pard_token` or `ptd_token_id` specified in its config.json."
-            )
-
-        if self.pass_hidden_states_to_model:
-            self.parallel_drafting_hidden_state_tensor = torch.empty(
-                self.hidden_size, dtype=self.dtype, device=self.device
-            )
-
-    def _get_positions(self, num_tokens: int):
-        if self.uses_mrope:
-            return self.mrope_positions[:, :num_tokens]
-        if self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
-            return self.xdrope_positions[:, :num_tokens]
-        return self.positions[:num_tokens]
-
-    def _set_positions(self, num_tokens: int, positions: torch.Tensor):
-        if self.uses_mrope:
-            self.mrope_positions[:, :num_tokens] = positions
-        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
-            self.xdrope_positions[:, :num_tokens] = positions
-        else:
-            # Convert M-RoPE positions if target model uses M-RoPE
-            # but draft doesn't, For text inputs, all M-RoPE
-            # dimensions are identical
-            if self.vllm_config.model_config.uses_mrope:
-                positions = positions[0]
-            self.positions[:num_tokens] = positions
-
-    def _get_slot_mapping(
-        self,
-        num_tokens: int,
-        slot_mapping: torch.Tensor | None = None,
-    ) -> dict[str, torch.Tensor]:
-        """Return slot_mapping dict for EAGLE layers.
-
-        If slot_mapping is provided, copies it into the buffer first.
-        """
-        if slot_mapping is not None:
-            num_actual = slot_mapping.shape[0]
-            self._slot_mapping_buffer[:num_actual].copy_(slot_mapping)
-            if num_tokens > num_actual:
-                self._slot_mapping_buffer[num_actual:num_tokens].fill_(PADDING_SLOT_ID)
-
-        view = self._slot_mapping_buffer[:num_tokens]
-        return {name: view for name in self._draft_attn_layer_names}
-
-    def initialize_cudagraph_keys(self, cudagraph_mode: CUDAGraphMode) -> None:
-        """Initialize cudagraph dispatcher keys for eagle.
-
-        Eagle only supports PIECEWISE cudagraphs (via mixed_mode).
-        This should be called after adjust_cudagraph_sizes_for_spec_decode.
-        """
-        if (
-            not self.speculative_config.enforce_eager
-            and cudagraph_mode.mixed_mode()
-            in [CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL]
-        ):
-            eagle_cudagraph_mode = CUDAGraphMode.PIECEWISE
-        else:
-            eagle_cudagraph_mode = CUDAGraphMode.NONE
-
-        self.cudagraph_dispatcher.initialize_cudagraph_keys(eagle_cudagraph_mode)
-
-    def _greedy_sample(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        """Greedy-sample draft tokens from hidden states."""
-        if self.use_local_argmax_reduction:
-            return self.model.get_top_tokens(hidden_states)
-        return self.model.compute_logits(hidden_states).argmax(dim=-1)
-
-    def propose(
-        self,
-        # [num_tokens]
-        target_token_ids: torch.Tensor,
-        # [num_tokens] or [3, num_tokens] when M-RoPE is enabled
-        target_positions: torch.Tensor,
-        # [num_tokens, hidden_size]
-        target_hidden_states: torch.Tensor,
-        # [batch_size]
-        next_token_ids: torch.Tensor,
-        token_indices_to_sample: torch.Tensor | None,
-        common_attn_metadata: CommonAttentionMetadata,
-        sampling_metadata: SamplingMetadata,
-        mm_embed_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None,
-        num_rejected_tokens_gpu: torch.Tensor | None = None,
-        slot_mappings: dict[str, torch.Tensor]
-        | list[dict[str, torch.Tensor]]
-        | None = None,
-    ) -> torch.Tensor:
-        batch_size = common_attn_metadata.batch_size()
-
-        if self.method == "eagle3":
-            assert isinstance(
-                self.model, (Eagle3LlamaForCausalLM, Eagle3DeepseekV2ForCausalLM)
-            )
-            target_hidden_states = self.model.combine_hidden_states(
-                target_hidden_states
-            )
-            assert target_hidden_states.shape[-1] == self.hidden_size
-
-        num_tokens, token_indices_to_sample, common_attn_metadata = (
-            self.set_inputs_first_pass(
-                target_token_ids=target_token_ids,
-                next_token_ids=next_token_ids,
-                target_positions=target_positions,
-                target_hidden_states=target_hidden_states,
-                token_indices_to_sample=token_indices_to_sample,
-                cad=common_attn_metadata,
-                num_rejected_tokens_gpu=num_rejected_tokens_gpu,
-            )
-        )
-
-        per_layer_attn_metadata: dict[str, object] = {}
-        for attn_group in self.draft_attn_groups:
-            attn_metadata = attn_group.get_metadata_builder().build_for_drafting(
-                common_attn_metadata=common_attn_metadata, draft_index=0
-            )
-            for layer_name in attn_group.layer_names:
-                per_layer_attn_metadata[layer_name] = attn_metadata
-
-        cudagraph_runtime_mode, num_input_tokens, num_tokens_across_dp = (
-            self._determine_batch_execution_and_padding(num_tokens)
-        )
-
-        if self.supports_mm_inputs:
-            mm_embeds, is_mm_embed = mm_embed_inputs or (None, None)
-
-            self.inputs_embeds[:num_tokens] = self.model.embed_input_ids(
-                self.input_ids[:num_tokens],
-                multimodal_embeddings=mm_embeds,
-                is_multimodal=is_mm_embed,
-            )
-
-            input_ids = None
-            inputs_embeds = self.inputs_embeds[:num_input_tokens]
-        else:
-            input_ids = self.input_ids[:num_input_tokens]
-            inputs_embeds = None
-
-        model_kwargs = {
-            "input_ids": input_ids,
-            "positions": self._get_positions(num_input_tokens),
-            "inputs_embeds": inputs_embeds,
-        }
-        if self.pass_hidden_states_to_model:
-            model_kwargs["hidden_states"] = self.hidden_states[:num_input_tokens]
-
-        with set_forward_context(
-            per_layer_attn_metadata,
-            self.vllm_config,
-            num_tokens=num_input_tokens,
-            num_tokens_across_dp=num_tokens_across_dp,
-            cudagraph_runtime_mode=cudagraph_runtime_mode,
-            slot_mapping=self._get_slot_mapping(
-                num_input_tokens, common_attn_metadata.slot_mapping
-            ),
-        ):
-            ret_hidden_states = self.model(**model_kwargs)
-            if not self.model_returns_tuple():
-                last_hidden_states = ret_hidden_states
-                hidden_states = last_hidden_states
-            else:
-                last_hidden_states, hidden_states = ret_hidden_states
-
-        sample_hidden_states = last_hidden_states[token_indices_to_sample]
-
-        # Early exit if there is only one draft token to be generated.
-        if self.num_speculative_tokens == 1 or self.parallel_drafting:
-            draft_token_ids = self._greedy_sample(sample_hidden_states)
-            return draft_token_ids.view(-1, self.num_speculative_tokens)
-
-        if self.uses_mrope:
-            positions = self.mrope_positions[:, token_indices_to_sample]
-        else:
-            positions = self.positions[token_indices_to_sample]
-        hidden_states = hidden_states[token_indices_to_sample]
-
-        if isinstance(attn_metadata, TreeAttentionMetadata):
-            # Draft using tree attention - requires full logits for top-k
-            logits = self.model.compute_logits(sample_hidden_states)
-            draft_token_ids_list = self.propose_tree(
-                batch_size=batch_size,
-                logits=logits,
-                positions=positions,
-                hidden_states=hidden_states,
-                common_attn_metadata=common_attn_metadata,
-                slot_mappings=slot_mappings,
-            )
-            # [batch_size, num_tree_tokens]
-            return torch.cat(draft_token_ids_list, dim=1)
-
-        draft_token_ids = self._greedy_sample(sample_hidden_states)
-
-        if self.allowed_attn_types is not None and not isinstance(
-            attn_metadata, self.allowed_attn_types
-        ):
-            raise ValueError(
-                f"Unsupported attention metadata type for speculative "
-                "decoding with num_speculative_tokens > 1: "
-                f"{type(attn_metadata)}. Supported types are: "
-                f"{self.allowed_attn_types}"
-            )
-
-        # Generate the remaining draft tokens.
-        draft_token_ids_list = [draft_token_ids]
-
-        cudagraph_runtime_mode, input_batch_size, batch_size_across_dp = (
-            self._determine_batch_execution_and_padding(batch_size)
-        )
-
-        common_attn_metadata.num_actual_tokens = batch_size
-        common_attn_metadata.max_query_len = 1
-        common_attn_metadata.query_start_loc = self.arange[: batch_size + 1]
-        common_attn_metadata.query_start_loc_cpu = torch.from_numpy(
-            self.token_arange_np[: batch_size + 1]
-        ).clone()
-
-        # In padded drafter batch, we need to adjust the sequence lengths
-        # to remove the "padding" (i.e. rejected tokens).
-        # Only apply this adjustment when we have rejected tokens
-        # (i.e., not the first proposal).
-        if self.num_speculative_tokens > 1 and num_rejected_tokens_gpu is not None:
-            common_attn_metadata.seq_lens -= num_rejected_tokens_gpu
-            # Invalidate the CPU-side shadows to avoid H<>D sync.
-            common_attn_metadata._seq_lens_cpu = None
-            common_attn_metadata._num_computed_tokens_cpu = None
-
-        block_size = self.block_size
-        assert block_size > 0, "block_size has not been initialized."
-        for token_index in range(self.num_speculative_tokens - 1):
-            # Update the inputs.
-            # cast to int32 is crucial when eagle model is compiled.
-            # tensor.argmax() returns int64 by default.
-            input_ids = draft_token_ids_list[-1].int()
-            # Use fused kernel for slot mapping and metadata updates.
-            # Write clamped positions directly into the positions buffer to
-            # avoid an extra D2D copy for the common (non-mrope) case.
-            positions_1d = positions[0] if self.uses_mrope else positions
-            if self.uses_mrope:
-                out_pos = self.mrope_positions[0, :batch_size]
-            elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
-                out_pos = self.xdrope_positions[0, :batch_size]
-            else:
-                out_pos = self.positions[:batch_size]
-            eagle_step_update_slot_mapping_and_metadata(
-                positions_1d=positions_1d,
-                block_table_tensor=common_attn_metadata.block_table_tensor,
-                seq_lens=common_attn_metadata.seq_lens,
-                block_size=block_size,
-                max_model_len=self.max_model_len,
-                out_clamped_positions=out_pos,
-                out_slot_mapping=self._slot_mapping_buffer[:input_batch_size],
-                input_batch_size=input_batch_size,
-            )
-            common_attn_metadata.slot_mapping = self._slot_mapping_buffer[:batch_size]
-            if self.uses_mrope:
-                self.mrope_positions[1:, :batch_size] = self.mrope_positions[
-                    0, :batch_size
-                ]
-                positions = self.mrope_positions[:, :batch_size]
-            elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
-                self.xdrope_positions[1:, :batch_size] = self.xdrope_positions[
-                    0, :batch_size
-                ]
-                positions = self.xdrope_positions[0, :batch_size]
-            else:
-                positions = self.positions[:batch_size]
-            # Increment the maximum sequence length. We increment max_seq_len
-            # unconditionally even though some seq_lens may have been capped above,
-            # as max_seq_len serves as an upper bound for sequence lengths.
-            common_attn_metadata.max_seq_len = min(
-                common_attn_metadata.max_seq_len + 1, self.max_model_len
-            )
-
-            # Also update the CPU-side shadow; NOTE: this is hacky and should be
-            # removed in when common_attn_metadata.seq_lens_cpu is deprecated.
-            if common_attn_metadata._seq_lens_cpu is not None:
-                common_attn_metadata._seq_lens_cpu += 1
-            if common_attn_metadata._num_computed_tokens_cpu is not None:
-                common_attn_metadata._num_computed_tokens_cpu += 1
-
-            # Rebuild attention metadata
-            for attn_group in self.draft_attn_groups:
-                attn_metadata = attn_group.get_metadata_builder().build_for_drafting(
-                    common_attn_metadata=common_attn_metadata,
-                    draft_index=token_index + 1,
-                )
-                for layer_name in attn_group.layer_names:
-                    per_layer_attn_metadata[layer_name] = attn_metadata
-
-            # copy inputs to buffer for cudagraph
-            self.input_ids[:batch_size] = input_ids
-            self.hidden_states[:batch_size] = hidden_states
-            if self.supports_mm_inputs:
-                self.inputs_embeds[:batch_size] = self.model.embed_input_ids(input_ids)
-
-                input_ids = None
-                inputs_embeds = self.inputs_embeds[:input_batch_size]
-            else:
-                input_ids = self.input_ids[:input_batch_size]
-                inputs_embeds = None
-
-            # Run the model.
-            model_kwargs = {
-                "input_ids": input_ids,
-                "positions": self._get_positions(input_batch_size),
-                "inputs_embeds": inputs_embeds,
-            }
-            if self.pass_hidden_states_to_model:
-                model_kwargs["hidden_states"] = self.hidden_states[:input_batch_size]
-
-            with set_forward_context(
-                per_layer_attn_metadata,
-                self.vllm_config,
-                num_tokens=input_batch_size,
-                num_tokens_across_dp=batch_size_across_dp,
-                cudagraph_runtime_mode=cudagraph_runtime_mode,
-                slot_mapping=self._get_slot_mapping(input_batch_size),
-            ):
-                ret_hidden_states = self.model(**model_kwargs)
-                if not self.model_returns_tuple():
-                    last_hidden_states = ret_hidden_states
-                    hidden_states = ret_hidden_states
-                else:
-                    last_hidden_states, hidden_states = ret_hidden_states
-
-            hidden_states = hidden_states[:batch_size]
-            draft_token_ids = self._greedy_sample(last_hidden_states[:batch_size])
-            draft_token_ids_list.append(draft_token_ids)
-
-        # [batch_size, num_speculative_tokens]
-        draft_token_ids = torch.stack(draft_token_ids_list, dim=1)
-        return draft_token_ids
-
-    def set_inputs_first_pass(
-        self,
-        target_token_ids: torch.Tensor,
-        next_token_ids: torch.Tensor,
-        target_positions: torch.Tensor,
-        target_hidden_states: torch.Tensor,
-        token_indices_to_sample: torch.Tensor | None,
-        cad: CommonAttentionMetadata,
-        num_rejected_tokens_gpu: torch.Tensor | None,
-    ) -> tuple[int, torch.Tensor, CommonAttentionMetadata]:
-        if not self.needs_extra_input_slots:
-            # Default EAGLE pathway: no reshaping of input tensors needed.
-            # Simply rotate the input ids and leave the positions unchanged,
-            # Inserting the next token ids at the last slot in each request.
-            if token_indices_to_sample is None:
-                token_indices_to_sample = cad.query_start_loc[1:] - 1
-
-            num_tokens = target_token_ids.shape[0]
-            # Shift the input ids by one token.
-            # E.g., [a1, b1, b2, c1, c2, c3] -> [b1, b2, c1, c2, c3, c3]
-            self.input_ids[: num_tokens - 1] = target_token_ids[1:]
-            # Replace the last token with the next token.
-            # E.g., [b1, b2, c1, c2, c3, c3] -> [a2, b2, b3, c2, c3, c4]
-            self.input_ids[token_indices_to_sample] = next_token_ids
-
-            # copy inputs to buffer for cudagraph
-            if self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim == 0:
-                target_positions = target_positions[0]
-            self._set_positions(num_tokens, target_positions)
-
-            self.hidden_states[:num_tokens] = target_hidden_states
-
-            return num_tokens, token_indices_to_sample, cad
-        else:
-            assert self.is_rejected_token_mask is not None
-            assert self.is_masked_token_mask is not None
-            # 1.
-            # Call a custom triton kernel to copy input_ids and positions
-            # into the correct slots in the preallocated buffers self.input_ids,
-            # self.positions.
-            batch_size = cad.batch_size()
-            # Since we might have to copy a lot of data for prefills, we select the
-            # block size based on the max query length and limit to max 256 slots/block.
-            max_num_tokens_per_request = (
-                cad.max_query_len + self.net_num_new_slots_per_request
-            )
-            BLOCK_SIZE_TOKENS = min(
-                256, triton.next_power_of_2(max_num_tokens_per_request)
-            )
-            num_blocks = (
-                max_num_tokens_per_request + BLOCK_SIZE_TOKENS - 1
-            ) // BLOCK_SIZE_TOKENS
-            total_num_input_tokens = target_token_ids.shape[0]
-            total_num_output_tokens = total_num_input_tokens + (
-                self.net_num_new_slots_per_request * batch_size
-            )
-
-            token_indices_to_sample = torch.empty(
-                batch_size * self.extra_slots_per_request,
-                dtype=torch.int32,
-                device=self.device,
-            )
-
-            # Destination indices to write target_hidden_states into drafting buffer.
-            out_hidden_state_mapping = torch.empty(
-                total_num_input_tokens, dtype=torch.int32, device=self.device
-            )
-
-            # Kernel grid: one program per request (row)
-            grid = (batch_size, num_blocks)
-            query_start_loc = cad.query_start_loc
-            query_end_loc = cad.query_start_loc[1:] - 1
-            if num_rejected_tokens_gpu is not None:
-                query_end_loc = query_end_loc - num_rejected_tokens_gpu
-            copy_and_expand_eagle_inputs_kernel[grid](
-                # (Padded) Inputs from the target model
-                target_token_ids_ptr=target_token_ids,
-                target_positions_ptr=target_positions,
-                next_token_ids_ptr=next_token_ids,  # sampled tokens, one per request
-                # Outputs to the drafting buffers
-                out_input_ids_ptr=self.input_ids,
-                out_positions_ptr=self.positions,  # Doesn't support mrope for now
-                out_is_rejected_token_mask_ptr=self.is_rejected_token_mask,
-                out_is_masked_token_mask_ptr=self.is_masked_token_mask,
-                out_new_token_indices_ptr=token_indices_to_sample,
-                out_hidden_state_mapping_ptr=out_hidden_state_mapping,
-                # Input metadata
-                query_start_loc_ptr=query_start_loc,
-                query_end_loc_ptr=query_end_loc,
-                padding_token_id=0,
-                parallel_drafting_token_id=self.parallel_drafting_token_id,
-                # Sizing info
-                # Note that we can deduce batch_size for free from the grid size
-                total_input_tokens=total_num_input_tokens,
-                num_padding_slots_per_request=self.extra_slots_per_request,
-                shift_input_ids=self.pass_hidden_states_to_model,
-                BLOCK_SIZE_TOKENS=BLOCK_SIZE_TOKENS,
-            )
-            if self.pass_hidden_states_to_model:
-                assert self.parallel_drafting_hidden_state_tensor is not None
-                self.hidden_states[out_hidden_state_mapping] = target_hidden_states
-                # Use torch.where to avoid DtoH sync from boolean indexing
-                mask = self.is_masked_token_mask[:total_num_output_tokens]
-                torch.where(
-                    mask.unsqueeze(1),
-                    self.parallel_drafting_hidden_state_tensor,
-                    self.hidden_states[:total_num_output_tokens],
-                    out=self.hidden_states[:total_num_output_tokens],
-                )
-
-            # 2.
-            # Recompute the slot mapping based on the new positions and
-            # rejection mask.
-            assert self.block_size > 0, "block_size has not been initialized."
-            new_slot_mapping = compute_new_slot_mapping(
-                cad=cad,
-                new_positions=self.positions[:total_num_output_tokens],
-                is_rejected_token_mask=self.is_rejected_token_mask[
-                    :total_num_output_tokens
-                ],
-                block_size=self.block_size,
-                num_new_tokens=self.net_num_new_slots_per_request,
-                max_model_len=self.max_model_len,
-            )
-
-            # 3. Update the common attention metadata with the new (meta)data
-            new_cad = extend_all_queries_by_N(
-                cad,
-                N=self.net_num_new_slots_per_request,
-                arange=self.arange,
-                new_slot_mapping=new_slot_mapping,
-            )
-
-            return total_num_output_tokens, token_indices_to_sample, new_cad
-
-    def model_returns_tuple(self) -> bool:
-        return self.method not in ("mtp", "draft_model")
-
-    def prepare_next_token_ids_cpu(
-        self,
-        sampled_token_ids: list[list[int]],
-        requests: dict[str, CachedRequestState],
-        gpu_input_batch: InputBatch,
-        num_scheduled_tokens: dict[str, int],
-    ) -> torch.Tensor:
-        """
-        This function is used to prepare the inputs for speculative decoding.
-        It calculates the next token ids for each request based on the sampled
-        token ids from the CPU. If a request has no sampled token ids (e.g.,
-        during the initial decoding steps), it falls back to using the request
-        state to get the next token id.
-        """
-        req_ids = gpu_input_batch.req_ids
-        next_token_ids: list[int] = []
-        for i, token_ids in enumerate(sampled_token_ids):
-            if token_ids:
-                # Common case.
-                next_token_id = token_ids[-1]
-            else:
-                # Partial prefill (rare case).
-                # Get the next token id from the request state.
-                req_id = req_ids[i]
-                req_state = requests[req_id]
-                seq_len = req_state.num_computed_tokens + num_scheduled_tokens[req_id]
-                next_token_id = req_state.get_token_id(seq_len)
-            next_token_ids.append(next_token_id)
-        next_token_ids = torch.tensor(
-            next_token_ids, dtype=torch.int32, device=self.input_ids.device
-        )
-        return next_token_ids
-
-    def prepare_next_token_ids_padded(
-        self,
-        seq_lens_cpu: torch.Tensor,
-        sampled_token_ids: torch.Tensor,
-        requests: dict[str, CachedRequestState],
-        gpu_input_batch: InputBatch,
-        discard_request_mask: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        This function is used to prepare the inputs for speculative decoding.
-        It calculates the next token ids and the number of valid sampled tokens
-        for each request, considering the "discarded" requests whose next token
-        is not sampled and comes from `request.get_token_id()` instead. This is denoted
-        the "backup" token id. It also counts rejected tokens via `sampled_token_ids`.
-        """
-        # Precompute get_token_id for when there is no valid next token
-        num_reqs = gpu_input_batch.num_reqs
-        seq_lens_list = seq_lens_cpu[:num_reqs].tolist()
-        self.backup_next_token_ids.np[:num_reqs] = np.array(
-            [
-                requests[gpu_input_batch.req_ids[i]].get_token_id(seq_lens_list[i])
-                for i in range(num_reqs)
-            ],
-            dtype=np.int32,
-        )
-        self.backup_next_token_ids.copy_to_gpu(num_reqs)
-        backup_tokens_gpu = self.backup_next_token_ids.gpu
-
-        batch_size, num_tokens = sampled_token_ids.shape
-        device = sampled_token_ids.device
-
-        assert discard_request_mask.dtype == torch.bool
-        assert backup_tokens_gpu.dtype == torch.int32
-
-        next_token_ids = torch.empty(batch_size, dtype=torch.int32, device=device)
-        valid_sampled_tokens_count = next_token_ids.new_empty(batch_size)
-
-        # Kernel grid: one program per request (row)
-        grid = (batch_size,)
-
-        # Find the next power of 2 for block sizes
-        BLOCK_SIZE_TOKENS = triton.next_power_of_2(num_tokens)
-        eagle_prepare_next_token_padded_kernel[grid](
-            sampled_token_ids,
-            discard_request_mask,
-            backup_tokens_gpu,
-            next_token_ids,
-            valid_sampled_tokens_count,
-            gpu_input_batch.vocab_size,
-            num_tokens,
-            batch_size,
-            sampled_token_ids.stride(0),
-            BLOCK_SIZE_TOKENS=BLOCK_SIZE_TOKENS,
-        )
-
-        return next_token_ids, valid_sampled_tokens_count
-
-    def prepare_inputs_padded(
-        self,
-        common_attn_metadata: CommonAttentionMetadata,
-        spec_decode_metadata: SpecDecodeMetadata,
-        valid_sampled_tokens_count: torch.Tensor,
-    ) -> tuple[CommonAttentionMetadata, torch.Tensor, torch.Tensor]:
-        """
-        This function is used to prepare the inputs for speculative decoding
-        It updates the common_attn_metadata for speculative decoding,
-        but does not consider the rejected tokens. Instead, all tokens
-        are included as inputs to the speculator, with the rejected tokens
-        used as padding and filtered out later by `token_indices_to_sample`.
-        No blocking CPU operations should be introduced in this function.
-        """
-        num_reqs = common_attn_metadata.num_reqs
-        device = valid_sampled_tokens_count.device
-
-        token_indices_to_sample = torch.empty(
-            (num_reqs,), dtype=torch.int32, device=device
-        )
-        num_rejected_tokens_gpu = torch.empty(
-            (num_reqs,), dtype=torch.int32, device=device
-        )
-
-        grid = (num_reqs,)
-        eagle_prepare_inputs_padded_kernel[grid](
-            spec_decode_metadata.cu_num_draft_tokens,
-            valid_sampled_tokens_count,
-            common_attn_metadata.query_start_loc,
-            token_indices_to_sample,
-            num_rejected_tokens_gpu,
-            num_reqs,
-        )
-
-        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
-        new_query_len_per_req = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
-
-        total_num_tokens = query_start_loc_cpu[-1].item()
-
-        spec_common_attn_metadata = CommonAttentionMetadata(
-            query_start_loc=common_attn_metadata.query_start_loc,
-            seq_lens=common_attn_metadata.seq_lens,
-            query_start_loc_cpu=query_start_loc_cpu,
-            _seq_lens_cpu=common_attn_metadata._seq_lens_cpu,
-            _num_computed_tokens_cpu=common_attn_metadata._num_computed_tokens_cpu,
-            num_reqs=common_attn_metadata.num_reqs,
-            num_actual_tokens=total_num_tokens,
-            max_query_len=new_query_len_per_req.max().item(),
-            max_seq_len=common_attn_metadata.max_seq_len,
-            block_table_tensor=common_attn_metadata.block_table_tensor,
-            slot_mapping=common_attn_metadata.slot_mapping[:total_num_tokens],
-            causal=True,
-            dcp_local_seq_lens=common_attn_metadata.dcp_local_seq_lens,
-        )
-
-        return (
-            spec_common_attn_metadata,
-            token_indices_to_sample,
-            num_rejected_tokens_gpu,
-        )
-
-    def propose_tree(
-        self,
-        batch_size: int,
-        # [num_tokens, vocab_size]
-        logits: torch.Tensor,
-        # [num_tokens]
-        positions: torch.Tensor,
-        # [num_tokens, hidden_size]
-        hidden_states: torch.Tensor,
-        common_attn_metadata: CommonAttentionMetadata,
-        slot_mappings: dict[str, torch.Tensor]
-        | list[dict[str, torch.Tensor]]
-        | None = None,
-    ) -> list[torch.Tensor]:
-        tree_attn_metadata_builder = self.draft_attn_groups[0].get_metadata_builder()
-        assert isinstance(tree_attn_metadata_builder, TreeAttentionMetadataBuilder)
-
-        total_num_drafts = self.cu_drafts_per_level[0]
-        level_num_drafts = total_num_drafts
-        # Sample a draft token for each child at the tree root level.
-        num_children = self.child_drafts_per_level[0]
-        if num_children == 1:
-            draft_token_ids = logits.argmax(dim=-1).view(batch_size, -1)
-        else:
-            draft_token_ids = torch.topk(logits, num_children, dim=-1).indices.view(
-                batch_size, -1
-            )
-        draft_token_ids_list = [draft_token_ids]
-        draft_hidden_states = hidden_states.view(batch_size, 1, -1)
-
-        # Initialize empty tensors for concatenation with the level outputs.
-        tree_input_ids = torch.empty(
-            0, device=self.input_ids.device, dtype=self.input_ids.dtype
-        )
-        tree_positions = torch.empty(
-            0, device=self.positions.device, dtype=self.positions.dtype
-        )
-        tree_hidden_states = torch.empty(
-            0, device=self.hidden_states.device, dtype=self.hidden_states.dtype
-        )
-        # Precompute the draft token positions.
-        flattened_draft_positions = (
-            positions.view(batch_size, -1) + self.tree_draft_pos_offsets[:batch_size, :]
-        )
-        tree_depth = len(self.cu_drafts_per_level)
-        for level in range(tree_depth - 1):
-            # Get draft positions for RoPE.
-            draft_positions = positions + (level + 1)
-            exceeds_max_model_len = (positions + total_num_drafts) >= self.max_model_len
-            # Mask out the position ids that exceed the max model length.
-            # Otherwise, we may get out-of-range error in RoPE.
-            draft_positions = torch.where(
-                exceeds_max_model_len,
-                0,
-                draft_positions,
-            ).view(batch_size, -1)
-
-            if level_num_drafts > 1:
-                # Repeat the positions for each draft at this level.
-                draft_positions = draft_positions.repeat_interleave(
-                    level_num_drafts, dim=1
-                )
-
-            if num_children > 1:
-                # Repeat draft hidden states for each child.
-                draft_hidden_states = draft_hidden_states.repeat_interleave(
-                    num_children, dim=1
-                )
-
-            # Concatenate the draft tokens, positions, and hidden states.
-            tree_input_ids = torch.cat([tree_input_ids, draft_token_ids], dim=1)
-            tree_positions = torch.cat([tree_positions, draft_positions], dim=1)
-            tree_hidden_states = torch.cat(
-                [tree_hidden_states, draft_hidden_states], dim=1
-            )
-
-            # Build new attention metadata for the next level of drafts.
-            # This is necessary to support tree attention.
-            query_len = total_num_drafts
-            common_attn_metadata = replace(
-                common_attn_metadata,
-                query_start_loc=query_len * self.arange[: batch_size + 1],
-                seq_lens=common_attn_metadata.seq_lens + level_num_drafts,
-                num_actual_tokens=batch_size * query_len,
-                max_query_len=query_len,
-            )
-            attn_metadata = tree_attn_metadata_builder.build_for_drafting(
-                common_attn_metadata=common_attn_metadata, draft_index=level + 1
-            )
-
-            # Apply new attention metadata to all draft layers.
-            per_layer_attn_metadata = {}
-            for attn_group in self.draft_attn_groups:
-                for layer_name in attn_group.layer_names:
-                    per_layer_attn_metadata[layer_name] = attn_metadata
-
-            # Consider max model length.
-            attn_metadata.max_seq_len = min(
-                attn_metadata.max_seq_len, self.max_model_len
-            )
-            # For the requests that exceed the max model length, we set the
-            # sequence length to 1 to minimize their overheads in attention.
-            attn_metadata.seq_lens.masked_fill_(exceeds_max_model_len, 1)
-
-            # Compute the slot mapping.
-            block_size = tree_attn_metadata_builder.kv_cache_spec.block_size
-            query_positions = flattened_draft_positions[:, level : level + query_len]
-            block_numbers = query_positions // block_size
-            block_ids = attn_metadata.block_table.gather(dim=1, index=block_numbers)
-            slot_mapping = block_ids * block_size + query_positions % block_size
-            # Mask out the slot mappings that exceed the max model length.
-            # Otherwise, the KV cache will be inadvertently updated with the
-            # padding tokens.
-            slot_mapping[exceeds_max_model_len] = PADDING_SLOT_ID
-            attn_metadata.slot_mapping = slot_mapping.view(-1)
-
-            # Copy inputs to buffer for cudagraph.
-            num_tokens = attn_metadata.num_actual_tokens
-            input_ids = tree_input_ids.view(-1)
-            self.input_ids[:num_tokens] = input_ids
-            self.positions[:num_tokens] = tree_positions.view(-1)
-            self.hidden_states[:num_tokens] = tree_hidden_states.view(num_tokens, -1)
-
-            cudagraph_runtime_mode, batch_desc = self.cudagraph_dispatcher.dispatch(
-                num_tokens
-            )
-            num_input_tokens = batch_desc.num_tokens
-            # Run the model.
-            with set_forward_context(
-                per_layer_attn_metadata,
-                self.vllm_config,
-                num_tokens=num_input_tokens,
-                cudagraph_runtime_mode=cudagraph_runtime_mode,
-                slot_mapping=self._get_slot_mapping(
-                    num_input_tokens, attn_metadata.slot_mapping
-                ),
-            ):
-                last_hidden_states, hidden_states = self.model(
-                    input_ids=self.input_ids[:num_input_tokens],
-                    positions=self.positions[:num_input_tokens],
-                    hidden_states=self.hidden_states[:num_input_tokens],
-                    inputs_embeds=None,
-                )
-
-            # Get the output hidden states for the draft tokens.
-            draft_hidden_states = hidden_states[:num_tokens].view(
-                batch_size, query_len, -1
-            )[:, -level_num_drafts:]
-            draft_last_hidden_states = last_hidden_states[:num_tokens].view(
-                batch_size, query_len, -1
-            )[:, -level_num_drafts:]
-
-            # Get the output logits for the draft tokens.
-            logits = self.model.compute_logits(
-                draft_last_hidden_states.reshape(batch_size * level_num_drafts, -1)
-            )
-
-            # Sample a draft token for each child at the next tree level.
-            num_children = self.child_drafts_per_level[level + 1]
-            if num_children == 1:
-                draft_token_ids = logits.argmax(dim=-1).view(batch_size, -1)
-            else:
-                draft_token_ids = torch.topk(logits, num_children, dim=-1).indices.view(
-                    batch_size, -1
-                )
-            draft_token_ids_list.append(draft_token_ids)
-
-            # Update the # drafts counters for the next tree level.
-            level_num_drafts = self.cu_drafts_per_level[level + 1] - total_num_drafts
-            total_num_drafts = self.cu_drafts_per_level[level + 1]
-        return draft_token_ids_list
-
-    def prepare_inputs(
-        self,
-        common_attn_metadata: CommonAttentionMetadata,
-        sampled_token_ids: list[list[int]],
-        num_draft_tokens: list[int],
-    ) -> tuple[CommonAttentionMetadata, torch.Tensor]:
-        """
-        This function is used to prepare the inputs for speculative decoding.
-        It updates to the common_attn_metadata to account for the rejected
-        tokens (and newly sampled tokens). It also returns the token indices
-        of the tokens that should be fed to the speculator.
-        """
-        # E.g.
-        #  common_attn_metadata.query_start_loc{_cpu}:
-        #       [0, q1, q1 + q2, q1 + q2 + q3]
-        #  common_attn_metadata.seq_lens{_cpu}: [s1, s2, s3]
-        #  num_rejected_tokens: [n1, n2, n3]
-        # This function computes the intermediate values:
-        #  num_tokens_per_req: [q1 - n1, q2 - n2, q3 - n3]
-        # And returns:
-        #  common_attn_metadata.query_start_loc{_cpu}:
-        #       [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
-        #  common_attn_metadata.seq_lens{_cpu}:
-        #       [s1 - n1 + 1, s2 - n2 + 1, s3 - n3 + 1]
-        #  token_indices: [0, 1, ..., q1 - n1 - 1,
-        #                 q1, q1 + 1, ..., q1 + q2 - n2 - 1,
-        #                 q1 + q2, q1 + q2 + 1, ..., q1 + q2 + q3 - n3 - 1]
-
-        num_rejected_tokens = [
-            n + 1 - len(sampled_token_ids[i]) if n > 0 else 0
-            for i, n in enumerate(num_draft_tokens)
-        ]
-        num_rejected_tokens = torch.tensor(num_rejected_tokens, dtype=torch.int32)
-
-        device = common_attn_metadata.query_start_loc.device
-        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
-        new_seq_lens_cpu = common_attn_metadata.seq_lens_cpu - num_rejected_tokens
-
-        # [0, q1, q1 + q2, q1 + q2 + q3] -> [q1, q2, q3]
-        new_query_len_per_req = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
-        # [q1, q2, q3] -> [q1 - n1, q2 - n2, q3 - n3]
-        new_num_tokens_per_req = new_query_len_per_req - num_rejected_tokens
-        new_num_tokens_per_req_np = new_num_tokens_per_req.numpy()
-
-        # [q1 - n1, q2 - n2, q3 - n3] ->
-        # [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
-        new_query_start_loc_cpu = torch.zeros(
-            query_start_loc_cpu.shape,
-            dtype=torch.int32,
-            pin_memory=is_pin_memory_available(),
-        )
-        new_query_start_loc_np = new_query_start_loc_cpu.numpy()
-        np.cumsum(new_num_tokens_per_req_np, out=new_query_start_loc_np[1:])
-
-        total_num_tokens = new_query_start_loc_np[-1]
-        # Example assuming num_tokens_per_req_np = [2, 4, 3]
-        # this implies that `new_query_start_locs` is:
-        # [0, 2, 6, 9] ->
-        # [0, 0, 2, 2, 2, 2, 6, 6, 6]
-        #  _r1_  ____r2____  ___r3__
-        new_query_start_locs_expanded = np.repeat(
-            new_query_start_loc_np[:-1], new_num_tokens_per_req_np
-        )
-        # [0, 1, 2, 3, 4, 5, 6, 7, 8] ->
-        # [0, 1, 0, 1, 2, 3, 0, 1, 2]
-        #  _r1_  ____r2____  ___r3__
-        token_offsets = (
-            self.token_arange_np[:total_num_tokens] - new_query_start_locs_expanded
-        )
-
-        # Expand starting positions to match token pattern
-        # [0, q1, q1 + q2] ->
-        # [0, 0, q1, q1, q1, q1, q1 + q2, q1 + q2, q1 + q2]
-        #  _r1_  _____r2_______  ___________r3____________
-        old_query_start_locs_expanded = np.repeat(
-            query_start_loc_cpu[:-1].numpy(), new_num_tokens_per_req_np
-        )
-        # Final token indices are:
-        # [0, 1,                                // req 1
-        #  q1 + 0, q1 + 1, q1 + 2, q1 + 3,       // req 2
-        #  q1 + q2 + 0, q1 + q2 + 1, q1 + q2 + 2] // req 3
-        token_indices_np = token_offsets + old_query_start_locs_expanded
-        token_indices = torch.from_numpy(token_indices_np).to(device, non_blocking=True)
-
-        spec_common_attn_metadata = CommonAttentionMetadata(
-            query_start_loc=new_query_start_loc_cpu.to(device, non_blocking=True),
-            seq_lens=new_seq_lens_cpu.to(device, non_blocking=True),
-            query_start_loc_cpu=new_query_start_loc_cpu,
-            _seq_lens_cpu=new_seq_lens_cpu,
-            _num_computed_tokens_cpu=common_attn_metadata._num_computed_tokens_cpu,
-            num_reqs=common_attn_metadata.num_reqs,
-            num_actual_tokens=total_num_tokens,
-            max_query_len=new_query_len_per_req.max().item(),
-            max_seq_len=new_seq_lens_cpu.max().item(),
-            block_table_tensor=common_attn_metadata.block_table_tensor,
-            slot_mapping=common_attn_metadata.slot_mapping[token_indices],
-            causal=True,
-            dcp_local_seq_lens=common_attn_metadata.dcp_local_seq_lens,
-        )
-
-        return spec_common_attn_metadata, token_indices
-
-    def get_model_name(self, model: nn.Module) -> str:
-        if hasattr(model, "module"):  # multi-GPU
-            model = model.module
-        return model.__class__.__name__
-
-    def _create_draft_vllm_config(self) -> VllmConfig:
-        """Return a VllmConfig with kernel-level overrides for the proposer.
-        Subclasses may override to apply additional config changes.
-        """
-        spec_cfg = self.speculative_config
-        if spec_cfg.moe_backend is not None:
-            return replace(
-                self.vllm_config,
-                kernel_config=replace(
-                    self.vllm_config.kernel_config,
-                    moe_backend=spec_cfg.moe_backend,
-                ),
-            )
-        return self.vllm_config
-
-    def _get_model(self) -> nn.Module:
-        """
-        Default method to call get_model(). Can be overridden by subclasses which
-        need to customize model loading.
-        """
-        from vllm.compilation.backends import set_model_tag
-
-        draft_vllm_config = self._create_draft_vllm_config()
-        with set_model_tag("eagle_head"):
-            model = get_model(
-                vllm_config=draft_vllm_config,
-                model_config=self.speculative_config.draft_model_config,
-                load_config=self.speculative_config.draft_load_config,
-            )
-        return model
-
-    def load_model(self, target_model: nn.Module) -> None:
-        target_attn_layer_names = set(
-            get_layers_from_vllm_config(
-                self.vllm_config,
-                AttentionLayerBase,  # type: ignore[type-abstract]
-            ).keys()
-        )
-
-        self.model = self._get_model()
-
-        # Find draft layers (attention layers added by draft model)
-        all_attn_layers = get_layers_from_vllm_config(
-            self.vllm_config,
-            AttentionLayerBase,  # type: ignore[type-abstract]
-        )
-        self._draft_attn_layer_names = (
-            set(all_attn_layers.keys()) - target_attn_layer_names
-        )
-
-        if self.supports_mm_inputs:
-            # Even if the target model is multimodal, we can also use
-            # text-only draft models
-            try:
-                dummy_input_ids = torch.tensor([[1]], device=self.input_ids.device)
-                self.model.embed_input_ids(dummy_input_ids, multimodal_embeddings=None)
-            except (NotImplementedError, AttributeError, TypeError):
-                logger.warning(
-                    "Draft model does not support multimodal inputs, "
-                    "falling back to text-only mode"
-                )
-                self.supports_mm_inputs = False
-
-        if supports_multimodal(target_model):
-            # handle multimodality
-            assert hasattr(target_model, "config")
-            if self.get_model_name(target_model) in [
-                "Qwen2_5_VLForConditionalGeneration",
-                "Qwen3VLForConditionalGeneration",
-                "Qwen3VLMoeForConditionalGeneration",
-                "HunYuanVLForConditionalGeneration",
-                "GlmOcrForConditionalGeneration",
-                "Qwen3_5ForConditionalGeneration",
-                "Qwen3_5MoeForConditionalGeneration",
-            ]:
-                self.model.config.image_token_index = target_model.config.image_token_id
-            elif self.get_model_name(target_model) == "PixtralForConditionalGeneration":
-                self.model.config.image_token_index = (
-                    target_model.config.vision_config.image_token_id
-                )
-            elif self.get_model_name(target_model) == "KimiK25ForConditionalGeneration":
-                self.model.config.image_token_index = (
-                    target_model.config.media_placeholder_token_id
-                )
-            else:
-                self.model.config.image_token_index = (
-                    target_model.config.image_token_index
-                )
-            target_language_model = cast(
-                SupportsMultiModal, target_model
-            ).get_language_model()
-        else:
-            target_language_model = target_model
-
-        self._maybe_share_embeddings(target_language_model)
-        self._maybe_share_lm_head(target_language_model)
-
-        if self.parallel_drafting and self.pass_hidden_states_to_model:
-            assert self.parallel_drafting_hidden_state_tensor is not None
-            self.parallel_drafting_hidden_state_tensor.copy_(
-                self.model.combine_hidden_states(
-                    self.model.mask_hidden.view(3 * self.hidden_size)
-                )
-                if self.eagle3_use_aux_hidden_state
-                else self.model.mask_hidden.view(self.hidden_size)
-            )
-
-    def _maybe_share_embeddings(self, target_language_model: nn.Module) -> None:
-        """
-        Some draft models may not have their own embedding layers, and some may
-        have a duplicate copy of the target model's embedding layers. In these cases,
-        we share the target model's embedding layers with the draft model to save
-        memory.
-        """
-        if get_pp_group().world_size == 1:
-            inner_model = getattr(target_language_model, "model", None)
-            if inner_model is None:
-                raise AttributeError("Target model does not have 'model' attribute")
-            if hasattr(inner_model, "embed_tokens"):
-                target_embed_tokens = inner_model.embed_tokens
-            elif hasattr(inner_model, "embedding"):
-                target_embed_tokens = inner_model.embedding
-            else:
-                raise AttributeError(
-                    "Target model does not have 'embed_tokens' or 'embedding' attribute"
-                )
-
-            share_embeddings = False
-            if hasattr(self.model, "has_own_embed_tokens"):
-                # EAGLE model
-                if not self.model.has_own_embed_tokens:
-                    share_embeddings = True
-                    logger.info(
-                        "Detected EAGLE model without its own embed_tokens in the"
-                        " checkpoint. Sharing target model embedding weights with the"
-                        " draft model."
-                    )
-                elif (
-                    isinstance(target_embed_tokens.weight, torch.Tensor)
-                    and isinstance(self.model.model.embed_tokens.weight, torch.Tensor)
-                    # TODO: Offload to CPU for comparison to avoid extra GPU memory
-                    # usage in CI testing environments with limited GPU memory
-                    and torch.equal(
-                        target_embed_tokens.weight.cpu(),
-                        self.model.model.embed_tokens.weight.cpu(),
-                    )
-                ):
-                    share_embeddings = True
-                    logger.info(
-                        "Detected EAGLE model with embed_tokens identical to the target"
-                        " model. Sharing target model embedding weights with the draft"
-                        " model."
-                    )
-                else:
-                    logger.info(
-                        "Detected EAGLE model with distinct embed_tokens weights. "
-                        "Keeping separate embedding weights from the target model."
-                    )
-            else:
-                # MTP model
-                share_embeddings = True
-                logger.info(
-                    "Detected MTP model. "
-                    "Sharing target model embedding weights with the draft model."
-                )
-
-            if share_embeddings:
-                if hasattr(self.model.model, "embed_tokens"):
-                    del self.model.model.embed_tokens
-                self.model.model.embed_tokens = target_embed_tokens
-        else:
-            logger.info(
-                "The draft model's vocab embedding will be loaded separately"
-                " from the target model."
-            )
-
-    def _maybe_share_lm_head(self, target_language_model: nn.Module) -> None:
-        """
-        Some draft models may not have their own LM head, and some may have a
-        duplicate copy of the target model's LM head. In these cases, we share
-        the target model's LM head with the draft model to save memory.
-        """
-        share_lm_head = False
-        if hasattr(self.model, "has_own_lm_head"):
-            # EAGLE model
-            if not self.model.has_own_lm_head:
-                share_lm_head = True
-                logger.info(
-                    "Detected EAGLE model without its own lm_head in the checkpoint. "
-                    "Sharing target model lm_head weights with the draft model."
-                )
-            elif (
-                hasattr(target_language_model, "lm_head")
-                and hasattr(target_language_model.lm_head, "weight")
-                and hasattr(self.model.lm_head, "weight")
-                and isinstance(target_language_model.lm_head.weight, torch.Tensor)
-                and isinstance(self.model.lm_head.weight, torch.Tensor)
-                # TODO: Offload to CPU for comparison to avoid extra GPU memory
-                # usage in CI testing environments with limited GPU memory
-                and torch.equal(
-                    target_language_model.lm_head.weight.cpu(),
-                    self.model.lm_head.weight.cpu(),
-                )
-            ):
-                share_lm_head = True
-                logger.info(
-                    "Detected EAGLE model with lm_head identical to the target model. "
-                    "Sharing target model lm_head weights with the draft model."
-                )
-            else:
-                logger.info(
-                    "Detected EAGLE model with distinct lm_head weights. "
-                    "Keeping separate lm_head weights from the target model."
-                )
-        else:
-            # MTP model
-            share_lm_head = True
-            logger.info(
-                "Detected MTP model. "
-                "Sharing target model lm_head weights with the draft model."
-            )
-
-        if share_lm_head and hasattr(target_language_model, "lm_head"):
-            if hasattr(self.model, "lm_head"):
-                del self.model.lm_head
-            self.model.lm_head = target_language_model.lm_head
-
-            # MTP models call compute_logits via shared_head.head (a
-            # ParallelLMHead inside each MTP layer), not self.model.lm_head.
-            # If the checkpoint omits a copy of the lm_head weights at the
-            # MTP layer path, shared_head.head stays uninitialised and
-            # produces NaN logits. Always share it explicitly.
-            inner = getattr(self.model, "model", None)
-            layers = getattr(inner, "layers", None) if inner else None
-            if layers is not None:
-                items = layers.values() if isinstance(layers, nn.ModuleDict) else layers
-                for layer in items:
-                    sh = getattr(layer, "shared_head", None)
-                    if sh is not None and hasattr(sh, "head"):
-                        del sh.head
-                        sh.head = target_language_model.lm_head
-                        logger.info(
-                            "Shared target model lm_head with MTP shared_head.head."
-                        )
-
-        if self.use_local_argmax_reduction:
-            if not hasattr(self.model, "get_top_tokens"):
-                raise ValueError(
-                    "use_local_argmax_reduction is enabled but draft model "
-                    f"{self.model.__class__.__name__} does not implement "
-                    "get_top_tokens()."
-                )
-            # Warn if draft model has vocab remapping, which forces fallback
-            # to the full-logits path (negating the optimization).
-            if (
-                hasattr(self.model, "draft_id_to_target_id")
-                and self.model.draft_id_to_target_id is not None
-            ):
-                logger.warning(
-                    "use_local_argmax_reduction is enabled but draft model "
-                    "uses draft_id_to_target_id vocab remapping. The "
-                    "optimization will be bypassed (falling back to full "
-                    "logits gather + argmax)."
-                )
-            else:
-                logger.info(
-                    "Using local argmax reduction for draft token generation "
-                    "(communication: O(2*tp_size) vs O(vocab_size))."
-                )
-
-    @torch.inference_mode()
-    def dummy_run(
-        self,
-        num_tokens: int,
-        use_cudagraphs: bool = True,
-        is_graph_capturing: bool = False,
-        slot_mappings: dict[str, torch.Tensor] | None = None,
-    ) -> None:
-        # FIXME: when using tree-based specdec, adjust number of forward-passes
-        # according to the depth of the tree.
-        for fwd_idx in range(
-            self.num_speculative_tokens if not is_graph_capturing else 1
-        ):
-            if fwd_idx <= 1:
-                cudagraph_runtime_mode, num_input_tokens, num_tokens_across_dp = (
-                    self._determine_batch_execution_and_padding(
-                        num_tokens, use_cudagraphs=use_cudagraphs
-                    )
-                )
-
-            # Make sure to use EAGLE's own buffer during cudagraph capture.
-            if (
-                self._draft_attn_layer_names
-                and slot_mappings is not None
-                and next(iter(self._draft_attn_layer_names)) in slot_mappings
-            ):
-                slot_mapping_dict = self._get_slot_mapping(num_input_tokens)
-            else:
-                slot_mapping_dict = slot_mappings or {}
-
-            with set_forward_context(
-                None,
-                self.vllm_config,
-                num_tokens=num_input_tokens,
-                num_tokens_across_dp=num_tokens_across_dp,
-                cudagraph_runtime_mode=cudagraph_runtime_mode,
-                slot_mapping=slot_mapping_dict,
-            ):
-                if self.supports_mm_inputs:
-                    input_ids = None
-                    inputs_embeds = self.inputs_embeds[:num_input_tokens]
-                else:
-                    input_ids = self.input_ids[:num_input_tokens]
-                    inputs_embeds = None
-
-                kwargs = dict(
-                    input_ids=input_ids,
-                    positions=self._get_positions(num_input_tokens),
-                    inputs_embeds=inputs_embeds,
-                )
-                if self.pass_hidden_states_to_model:
-                    kwargs["hidden_states"] = self.hidden_states[:num_input_tokens]
-                self.model(**kwargs)
-
-    def _get_eagle3_use_aux_hidden_state_from_config(self) -> bool:
-        """
-        Some eagle3 heads (e.g., nvidia/gpt-oss-120b-Eagle3-v2) do not use auxiliary
-        hidden states and directly uses the last layer output just like eagle1.
-        They might indicate this by setting "use_aux_hidden_state" to False
-        inside the "eagle_config" dict of their hf_config.
-        """
-        if self.method != "eagle3":
-            return False
-        # Assume that eagle3 heads use aux hidden states by default
-        use_aux_hidden_state = True
-        eagle_config = getattr(self.draft_model_config.hf_config, "eagle_config", None)
-        if eagle_config is not None:
-            use_aux_hidden_state = eagle_config.get("use_aux_hidden_state", True)
-        return use_aux_hidden_state
-
-    def validate_same_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None:
-        """
-        Validate that all drafting layers belong to the same KVCacheGroup.
-        Need this assumption to ensure all drafting layers can use the
-        same AttentionMetadata.
-        May extend to multiple AttentionMetadata in the future.
-        """
-        kv_cache_groups: dict[str, int] = {}
-        for id, kv_cache_group in enumerate(kv_cache_config.kv_cache_groups):
-            for layer_name in kv_cache_group.layer_names:
-                kv_cache_groups[layer_name] = id
-        assert (
-            len(
-                set(
-                    [
-                        kv_cache_groups[layer_name]
-                        for layer_name in self._draft_attn_layer_names
-                    ]
-                )
-            )
-            == 1
-        ), "All drafting layers should belong to the same kv cache group"
-
-    def initialize_attn_backend(
-        self,
-        kv_cache_config: KVCacheConfig,
-        kernel_block_sizes: list[int] | None = None,
-    ) -> None:
-        """
-        Initialize AttentionGroups for draft layers using kv_cache_config.
-        Called from the model runner's initialize_metadata_builders.
-        """
-        all_attn_layers = get_layers_from_vllm_config(
-            self.vllm_config,
-            AttentionLayerBase,  # type: ignore[type-abstract]
-        )
-
-        # Find which kv_cache_group the draft layers belong to
-        self.validate_same_kv_cache_group(kv_cache_config)
-        kv_cache_spec = None
-        for gid, group in enumerate(kv_cache_config.kv_cache_groups):
-            if self._draft_attn_layer_names & set(group.layer_names):
-                self.kv_cache_gid = gid
-                kv_cache_spec = group.kv_cache_spec
-                break
-
-        attention_groups: dict[tuple[str, str], AttentionGroup] = {}
-        if kv_cache_spec is not None:
-            for layer_name in self._draft_attn_layer_names:
-                attn_backend = all_attn_layers[layer_name].get_attn_backend()
-                backend_key = attn_backend.full_cls_name()
-                if backend_key not in attention_groups:
-                    layer_kv_cache_spec = kv_cache_spec
-                    if isinstance(layer_kv_cache_spec, UniformTypeKVCacheSpecs):
-                        layer_kv_cache_spec = layer_kv_cache_spec.kv_cache_specs[
-                            layer_name
-                        ]
-
-                    kernel_block_size = (
-                        kernel_block_sizes[self.kv_cache_gid]
-                        if kernel_block_sizes is not None
-                        and self.kv_cache_gid < len(kernel_block_sizes)
-                        else None
-                    )
-                    attn_group = AttentionGroup(
-                        backend=attn_backend,
-                        layer_names=[layer_name],
-                        kv_cache_spec=layer_kv_cache_spec,
-                        kv_cache_group_id=self.kv_cache_gid,
-                    )
-                    attn_group.create_metadata_builders(
-                        self.vllm_config,
-                        self.device,
-                        kernel_block_size=kernel_block_size,
-                    )
-                    attention_groups[backend_key] = attn_group
-                else:
-                    attention_groups[backend_key].layer_names.append(layer_name)
-
-        self.draft_attn_groups = list(attention_groups.values())
-        self.block_size = (
-            self.draft_attn_groups[0].get_metadata_builder().kv_cache_spec.block_size
-        )
-        logger.debug("Using block size %d for drafting layers", self.block_size)
-
-    def _determine_batch_execution_and_padding(
-        self,
-        num_tokens: int,
-        use_cudagraphs: bool = True,
-    ) -> tuple[CUDAGraphMode, int, torch.Tensor | None]:
-        cudagraph_mode, batch_desc = self.cudagraph_dispatcher.dispatch(
-            num_tokens,
-            valid_modes=({CUDAGraphMode.NONE} if not use_cudagraphs else None),
-        )
-        num_tokens_padded = batch_desc.num_tokens
-
-        # Extra coordination when running data-parallel since we need to
-        # coordinate across ranks
-        # TODO(Flechman): support DBO ubatching
-        should_ubatch, num_tokens_across_dp = False, None
-        if self.vllm_config.parallel_config.data_parallel_size > 1:
-            should_ubatch, num_tokens_across_dp, synced_cudagraph_mode = (
-                coordinate_batch_across_dp(
-                    num_tokens_unpadded=num_tokens,
-                    parallel_config=self.vllm_config.parallel_config,
-                    allow_microbatching=False,
-                    num_tokens_padded=num_tokens_padded,
-                    cudagraph_mode=cudagraph_mode.value,
-                )
-            )
-            assert not should_ubatch, "DBO ubatching not implemented for EAGLE"
-
-            # Extract DP-synced values
-            if num_tokens_across_dp is not None:
-                dp_rank = self.dp_rank
-                num_tokens_padded = int(num_tokens_across_dp[dp_rank].item())
-                # Re-dispatch with DP padding so we have the correct
-                # batch_descriptor
-                cudagraph_mode, batch_desc = self.cudagraph_dispatcher.dispatch(
-                    num_tokens_padded,
-                    valid_modes={CUDAGraphMode(synced_cudagraph_mode)},
-                )
-                # Assert to make sure the agreed upon token count is correct
-                # otherwise num_tokens_across_dp will no-longer be valid
-                assert batch_desc.num_tokens == num_tokens_padded
-                num_tokens_across_dp[dp_rank] = num_tokens_padded
-
-        return cudagraph_mode, num_tokens_padded, num_tokens_across_dp
+from vllm.config import VllmConfig
+from vllm.v1.spec_decode.llm_base_proposer import SpecDecodeBaseProposer
 
 
 class EagleProposer(SpecDecodeBaseProposer):
@@ -1696,49 +20,3 @@ def __init__(
             pass_hidden_states_to_model=True,
             runner=runner,
         )
-
-
-# NOTE(woosuk): Currently, the below code is not used and we always use argmax
-# to sample the draft tokens. We will use this after we find a way to manage
-# the draft prob tensor.
-# Refer to https://github.com/vllm-project/vllm/pull/16899 for the details.
-# FIXME(woosuk): The logic here is duplicated with the main sampling code.
-# We should refactor this to reuse the same sampling implementation.
-def compute_probs_and_sample_next_token(
-    logits: torch.Tensor,
-    sampling_metadata: SamplingMetadata,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    if sampling_metadata.all_greedy:
-        # For greedy requests, draft_probs is not used in rejection sampling.
-        # Therefore, we can just return the logits.
-        probs = logits
-        next_token_ids = logits.argmax(dim=-1)
-        return next_token_ids, probs
-
-    assert sampling_metadata.temperature is not None
-
-    # Use epsilon comparison to detect greedy sampling (temperature ~ 0.0)
-    # consistent with sampler.py's _SAMPLING_EPS threshold
-    temperature = sampling_metadata.temperature
-    # Avoid division by zero if there are greedy requests.
-    if not sampling_metadata.all_random:
-        is_greedy = temperature < _SAMPLING_EPS
-        temperature = torch.where(is_greedy, 1.0, temperature)
-    logits.div_(temperature.view(-1, 1))
-    probs = logits.softmax(dim=-1, dtype=torch.float32)
-
-    # NOTE(woosuk): Currently, we ignore most of the sampling parameters in
-    # generating the draft tokens. We only use the temperature. While this
-    # could degrade the acceptance rate, it does not affect the distribution
-    # of the generated tokens after rejection sampling.
-
-    # TODO(woosuk): Consider seeds.
-    q = torch.empty_like(probs)
-    q.exponential_()
-    # NOTE(woosuk): We shouldn't use `probs.div_(q)` because the draft_probs
-    # will be used later for rejection sampling.
-    next_token_ids = probs.div(q).argmax(dim=-1).view(-1)
-    if not sampling_metadata.all_random:
-        greedy_token_ids = probs.argmax(dim=-1)
-        next_token_ids = torch.where(is_greedy, greedy_token_ids, next_token_ids)
-    return next_token_ids, probs
diff --git a/vllm/v1/spec_decode/extract_hidden_states.py b/vllm/v1/spec_decode/extract_hidden_states.py
index e26fa768a324..c3cb3c8aaeaf 100644
--- a/vllm/v1/spec_decode/extract_hidden_states.py
+++ b/vllm/v1/spec_decode/extract_hidden_states.py
@@ -12,8 +12,10 @@
 from vllm.forward_context import set_forward_context
 from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.model_executor.model_loader import get_model
+from vllm.utils.platform_utils import is_pin_memory_available
 from vllm.v1.attention.backend import AttentionMetadataBuilder, CommonAttentionMetadata
 from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher
+from vllm.v1.utils import CpuGpuBuffer
 from vllm.v1.worker.dp_utils import coordinate_batch_across_dp
 from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
 
@@ -42,6 +44,7 @@ def __init__(self, vllm_config: VllmConfig, device):
         self.model: nn.Module | None = None
         self.attn_layer_names: list[str] = []
         self.attn_metadata_builder: AttentionMetadataBuilder | None = None
+        self.kv_cache_gid: int = -1
 
         # Maximum number of tokens for buffers
         max_batch_size = vllm_config.scheduler_config.max_num_seqs
@@ -49,6 +52,14 @@ def __init__(self, vllm_config: VllmConfig, device):
             vllm_config.scheduler_config.max_num_batched_tokens + max_batch_size
         )
 
+        self.backup_next_token_ids = CpuGpuBuffer(
+            max_batch_size,
+            dtype=torch.int32,
+            pin_memory=is_pin_memory_available(),
+            device=device,
+            with_numpy=True,
+        )
+
         self.hf_config = vllm_config.speculative_config.draft_model_config.hf_config
         layer_ids = getattr(self.hf_config, "eagle_aux_hidden_state_layer_ids", None)
         if not layer_ids:
@@ -145,7 +156,10 @@ def propose(
 
         # Return the sampled tokens as "draft" tokens
         # Shape: [batch_size, 1] to match num_speculative_tokens=1
-        return sampled_token_ids
+        # On decode steps with spec tokens, sampled_token_ids may have
+        # shape [batch_size, 2] (target + spec verification); slice to
+        # return only the target-sampled column.
+        return sampled_token_ids[:, :1]
 
     def _get_slot_mapping(
         self,
@@ -286,7 +300,6 @@ def _build_attn_metadata_builder(
 
     def prepare_next_token_ids_padded(
         self,
-        seq_lens: torch.Tensor,
         sampled_token_ids: torch.Tensor,
         requests: dict[str, CachedRequestState],
         gpu_input_batch: InputBatch,
@@ -300,18 +313,15 @@ def prepare_next_token_ids_padded(
         (if valid and not discarded) or a backup token from the request state.
         """
         num_reqs = gpu_input_batch.num_reqs
-        device = sampled_token_ids.device
-
-        # Compute backup tokens for discarded / invalid requests
-        seq_lens_list = seq_lens[:num_reqs].tolist()
-        backup_tokens_gpu = torch.tensor(
-            [
-                requests[gpu_input_batch.req_ids[i]].get_token_id(seq_lens_list[i])
-                for i in range(num_reqs)
-            ],
-            dtype=torch.int32,
-            device=device,
-        )
+
+        # Precompute backup token IDs for discarded requests.
+        num_reqs = gpu_input_batch.num_reqs
+        for i in range(num_reqs):
+            self.backup_next_token_ids.np[i] = requests[
+                gpu_input_batch.req_ids[i]
+            ].get_token_id(gpu_input_batch.num_tokens_no_spec[i] - 1)
+        self.backup_next_token_ids.copy_to_gpu(num_reqs)
+        backup_tokens_gpu = self.backup_next_token_ids.gpu[:num_reqs]
 
         assert discard_request_mask.dtype == torch.bool
 
@@ -372,9 +382,12 @@ def load_model(self, target_model: nn.Module) -> None:
         )
 
     def validate_same_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None:
-        """Validate all drafting layers belong to the same KV cache group.
-
-        With exactly one attention layer (asserted in load_model), this is
-        trivially satisfied.
-        """
+        """Validate all drafting layers belong to the same KV cache group
+        and record the group index for common_attn_metadata selection."""
         assert len(self.attn_layer_names) == 1
+        layer = self.attn_layer_names[0]
+        for gid, group in enumerate(kv_cache_config.kv_cache_groups):
+            if layer in group.layer_names:
+                self.kv_cache_gid = gid
+                return
+        raise ValueError(f"Cache-only layer {layer!r} not in any KV cache group")
diff --git a/vllm/v1/spec_decode/gemma4.py b/vllm/v1/spec_decode/gemma4.py
new file mode 100644
index 000000000000..b0a02774faf6
--- /dev/null
+++ b/vllm/v1/spec_decode/gemma4.py
@@ -0,0 +1,335 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Gemma4 MTP (Multi-Token Prediction) proposer for speculative decoding.
+
+The Gemma4 assistant model runs all decoder layers per draft step
+(producing one token), and all its attention layers share KV cache
+with the target model via cross-model KV sharing.
+"""
+
+from collections import defaultdict
+from copy import copy
+
+import torch
+import torch.nn as nn
+
+from vllm.config import VllmConfig, get_layers_from_vllm_config, replace
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+from vllm.v1.attention.backend import CommonAttentionMetadata
+from vllm.v1.kv_cache_interface import (
+    KVCacheConfig,
+    KVCacheSpec,
+    UniformTypeKVCacheSpecs,
+)
+from vllm.v1.spec_decode.llm_base_proposer import SpecDecodeBaseProposer
+from vllm.v1.worker.utils import AttentionGroup
+
+logger = init_logger(__name__)
+
+
+class Gemma4Proposer(SpecDecodeBaseProposer):
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        device: torch.device,
+        runner=None,
+    ):
+        super().__init__(
+            vllm_config,
+            device,
+            pass_hidden_states_to_model=True,
+            runner=runner,
+        )
+        # All draft steps predict from the same position (the last
+        # target-model position), so positions and seq_lens must not
+        # advance between steps.
+        self.constant_draft_positions = True
+
+        # Per-group block tables for multi-group KV cache models.
+        # Populated by gpu_model_runner during _prepare_inputs.
+        self._per_group_block_tables: dict[int, torch.Tensor] = {}
+
+        # Centroids CUDA graphs — populated in load_model if centroids
+        # masking is active. _centroids_sizes is pre-sorted for fast
+        # lookup in _greedy_sample.
+        self._centroids_sizes: list[int] = []
+        self._centroids_graphs: dict[int, torch.cuda.CUDAGraph] = {}
+        self._centroids_inputs: dict[int, torch.Tensor] = {}
+        self._centroids_outputs: dict[int, torch.Tensor] = {}
+
+    def set_per_group_block_table(self, gid: int, block_table: torch.Tensor) -> None:
+        self._per_group_block_tables[gid] = block_table
+
+    def model_returns_tuple(self) -> bool:
+        # forward() returns (draft_hidden_states, backbone_hidden_states).
+        # The proposer uses draft_hidden_states for compute_logits and
+        # backbone_hidden_states for the hidden-state feedback buffer.
+        return True
+
+    def build_per_group_and_layer_attn_metadata(
+        self,
+        common_attn_metadata: CommonAttentionMetadata,
+        draft_index: int = 0,
+    ) -> tuple[list[object], dict[str, object]]:
+        """Build attention metadata using the correct block table per group.
+
+        Gemma4 has multiple KV cache groups (sliding vs full attention)
+        with different block tables.  The base class receives a single
+        common_attn_metadata whose block_table belongs to one group.
+        We swap in the correct block table for each draft attention group.
+        """
+        per_group_attn_metadata: list[object] = []
+        per_layer_attn_metadata: dict[str, object] = {}
+        for attn_group in self.draft_attn_groups:
+            gid = attn_group.kv_cache_group_id
+            if gid in self._per_group_block_tables:
+                cm = copy(common_attn_metadata)
+                cm.block_table_tensor = self._per_group_block_tables[gid]
+            else:
+                cm = common_attn_metadata
+            attn_metadata = attn_group.get_metadata_builder().build_for_drafting(
+                common_attn_metadata=cm, draft_index=draft_index
+            )
+            per_group_attn_metadata.append(attn_metadata)
+            for layer_name in attn_group.layer_names:
+                per_layer_attn_metadata[layer_name] = attn_metadata
+        return per_group_attn_metadata, per_layer_attn_metadata
+
+    def _greedy_sample(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        if self._centroids_sizes:
+            T = hidden_states.shape[0]
+            for size in self._centroids_sizes:
+                if size >= T:
+                    self._centroids_inputs[size][:T].copy_(hidden_states)
+                    self._centroids_graphs[size].replay()
+                    return self._centroids_outputs[size][:T].clone()
+            return self.model.get_top_tokens(hidden_states)
+        return super()._greedy_sample(hidden_states)
+
+    def _setup_centroids_cuda_graphs(self) -> None:
+        """Capture CUDA graphs for centroids get_top_tokens at key sizes."""
+        masked_emb = self.model.masked_embedding
+        lm_head_weight = self.model._get_full_lm_head_weight()
+
+        for size in [1, 2, 4, 8, 16, 32, 64]:
+            static_input = torch.zeros(
+                size,
+                masked_emb.hidden_size,
+                dtype=self.dtype,
+                device=self.device,
+            )
+            for _ in range(3):
+                masked_emb.get_top_tokens(static_input, lm_head_weight)
+            torch.accelerator.synchronize()
+
+            g = torch.cuda.CUDAGraph()
+            with torch.cuda.graph(g):
+                static_output = masked_emb.get_top_tokens(
+                    static_input,
+                    lm_head_weight,
+                )
+            self._centroids_graphs[size] = g
+            self._centroids_inputs[size] = static_input
+            self._centroids_outputs[size] = static_output
+
+        self._centroids_sizes = sorted(self._centroids_graphs)
+        logger.info(
+            "Gemma4 MTP: captured centroids CUDA graphs for sizes %s.",
+            self._centroids_sizes,
+        )
+
+    def _create_draft_vllm_config(self) -> VllmConfig:
+        """Preserve the target's forced TRITON_ATTN backend for draft layers.
+
+        Gemma4 forces TRITON_ATTN due to heterogeneous head dimensions
+        (head_dim=256 sliding, global_head_dim=512 full). The base class
+        resets attention_config.backend to None for draft models, causing
+        sliding layers to fall back to FLASH_ATTN which cannot handle
+        KV-shared cache. Override to carry the target's backend through.
+        """
+        base = super()._create_draft_vllm_config()
+        target_backend = self.vllm_config.attention_config.backend
+        if target_backend is not None:
+            base = replace(
+                base,
+                attention_config=replace(
+                    base.attention_config,
+                    backend=target_backend,
+                ),
+            )
+        return base
+
+    def _maybe_share_lm_head(self, target_language_model: nn.Module) -> None:
+        """Gemma4 MTP always keeps its own draft-dim lm_head.
+
+        The draft model's lm_head operates in draft hidden_size (e.g. 256),
+        which differs from the target's backbone hidden_size (e.g. 1536).
+        Sharing would break compute_logits (and centroids masking when
+        use_ordered_embeddings is enabled).
+        """
+        logger.info(
+            "Gemma4 MTP: keeping draft model's own lm_head (draft_dim != backbone_dim)."
+        )
+
+    def load_model(self, target_model: nn.Module) -> None:
+        target_attn_layer_names = set(
+            get_layers_from_vllm_config(
+                self.vllm_config,
+                AttentionLayerBase,  # type: ignore[type-abstract]
+            ).keys()
+        )
+
+        super().load_model(target_model)
+
+        self._setup_gemma4_kv_sharing(target_attn_layer_names)
+
+        if getattr(self.model, "masked_embedding", None) is not None:
+            self._setup_centroids_cuda_graphs()
+
+    def validate_same_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None:
+        """Draft layers span multiple KV cache groups (sliding + full
+        attention with different head dimensions), so skip the base
+        class single-group assertion."""
+
+    def initialize_attn_backend(
+        self,
+        kv_cache_config: KVCacheConfig,
+        kernel_block_sizes: list[int] | None = None,
+    ) -> None:
+        """Create separate AttentionGroup objects per KV cache spec
+        so that each head-dim variant gets its own metadata builder."""
+        all_attn_layers = get_layers_from_vllm_config(
+            self.vllm_config,
+            AttentionLayerBase,  # type: ignore[type-abstract]
+        )
+
+        layer_to_gid: dict[str, int] = {}
+        layer_to_spec: dict[str, KVCacheSpec] = {}
+        for gid, group in enumerate(kv_cache_config.kv_cache_groups):
+            group_spec = group.kv_cache_spec
+            for ln in group.layer_names:
+                layer_to_gid[ln] = gid
+                if isinstance(group_spec, UniformTypeKVCacheSpecs):
+                    if ln in group_spec.kv_cache_specs:
+                        layer_to_spec[ln] = group_spec.kv_cache_specs[ln]
+                    else:
+                        tgt = getattr(
+                            all_attn_layers.get(ln),
+                            "kv_sharing_target_layer_name",
+                            None,
+                        )
+                        if tgt and tgt in group_spec.kv_cache_specs:
+                            layer_to_spec[ln] = group_spec.kv_cache_specs[tgt]
+                        else:
+                            layer_to_spec[ln] = group_spec
+                else:
+                    layer_to_spec[ln] = group_spec
+
+        attention_groups: dict[tuple[tuple[str, str], KVCacheSpec], AttentionGroup] = {}
+        for layer_name in self._draft_attn_layer_names:
+            if layer_name not in layer_to_spec:
+                continue
+            attn_layer = all_attn_layers[layer_name]
+            attn_backend = attn_layer.get_attn_backend()
+            spec = layer_to_spec[layer_name]
+            gid = layer_to_gid[layer_name]
+            group_key = (attn_backend.full_cls_name(), spec)
+
+            if group_key not in attention_groups:
+                kernel_block_size = (
+                    kernel_block_sizes[gid]
+                    if kernel_block_sizes is not None and gid < len(kernel_block_sizes)
+                    else None
+                )
+                attn_group = AttentionGroup(
+                    backend=attn_backend,
+                    layer_names=[layer_name],
+                    kv_cache_spec=spec,
+                    kv_cache_group_id=gid,
+                )
+                attn_group.create_metadata_builders(
+                    self.vllm_config,
+                    self.device,
+                    kernel_block_size=kernel_block_size,
+                )
+                attention_groups[group_key] = attn_group
+            else:
+                attention_groups[group_key].layer_names.append(layer_name)
+
+        self.draft_attn_groups = list(attention_groups.values())
+        if self.draft_attn_groups:
+            self.kv_cache_gid = self.draft_attn_groups[0].kv_cache_group_id
+            self.block_size = (
+                self.draft_attn_groups[0]
+                .get_metadata_builder()
+                .kv_cache_spec.block_size
+            )
+        else:
+            self.kv_cache_gid = 0
+            self.block_size = kv_cache_config.kv_cache_groups[
+                0
+            ].kv_cache_spec.block_size
+        logger.debug("Using block size %d for drafting layers", self.block_size)
+
+    def _setup_gemma4_kv_sharing(
+        self,
+        target_attn_layer_names: set[str],
+    ) -> None:
+        """Wire draft layers to share KV with the target model.
+
+        Each draft decoder layer is mapped to the last non-KV-shared
+        target layer of the same attention type (sliding or full).
+        """
+        draft_config = self.speculative_config.draft_model_config.hf_config
+        draft_text_config = draft_config.get_text_config()
+        target_config = self.vllm_config.model_config.hf_config
+        target_text_config = target_config.get_text_config()
+        target_layer_types = getattr(target_text_config, "layer_types", [])
+
+        if not (hasattr(self.model, "model") and hasattr(self.model.model, "layers")):
+            return
+
+        target_num_kv_shared = getattr(target_text_config, "num_kv_shared_layers", 0)
+        num_non_shared = len(target_layer_types) - target_num_kv_shared
+        type_to_target_indices: dict[str, list[int]] = defaultdict(list)
+        for idx, lt in enumerate(target_layer_types[:num_non_shared]):
+            type_to_target_indices[lt].append(idx)
+
+        target_prefix = "model.layers"
+        for name in target_attn_layer_names:
+            if ".layers." in name:
+                target_prefix = name.split(".layers.")[0] + ".layers"
+                break
+
+        draft_layer_types = getattr(draft_text_config, "layer_types", [])
+        for draft_idx, layer in enumerate(self.model.model.layers):
+            if not hasattr(layer, "self_attn"):
+                continue
+            attn = getattr(layer.self_attn, "attn", None)
+            if attn is None:
+                continue
+
+            draft_layer_type = (
+                draft_layer_types[draft_idx]
+                if draft_idx < len(draft_layer_types)
+                else "full_attention"
+            )
+            candidates = type_to_target_indices.get(draft_layer_type, [])
+            if not candidates:
+                logger.warning(
+                    "No target layer of type '%s' for draft layer %d",
+                    draft_layer_type,
+                    draft_idx,
+                )
+                continue
+
+            target_idx = candidates[-1]
+            target_layer_name = f"{target_prefix}.{target_idx}.self_attn.attn"
+            attn.kv_sharing_target_layer_name = target_layer_name
+            logger.info(
+                "Gemma4 MTP: draft layer %d (%s) -> %s",
+                draft_idx,
+                draft_layer_type,
+                target_layer_name,
+            )
diff --git a/vllm/v1/spec_decode/llm_base_proposer.py b/vllm/v1/spec_decode/llm_base_proposer.py
new file mode 100644
index 000000000000..9979a0517271
--- /dev/null
+++ b/vllm/v1/spec_decode/llm_base_proposer.py
@@ -0,0 +1,1691 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from importlib.util import find_spec
+from typing import Any, cast
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from vllm.config import (
+    CUDAGraphMode,
+    VllmConfig,
+    get_layers_from_vllm_config,
+    replace,
+)
+from vllm.distributed.parallel_state import get_pp_group
+from vllm.forward_context import set_forward_context
+from vllm.logger import init_logger
+from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+from vllm.model_executor.model_loader import get_model
+from vllm.model_executor.models import supports_multimodal
+from vllm.model_executor.models.deepseek_eagle3 import Eagle3DeepseekV2ForCausalLM
+from vllm.model_executor.models.interfaces import SupportsMultiModal
+from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
+from vllm.model_executor.models.qwen3_dflash import DFlashQwen3ForCausalLM
+from vllm.multimodal import MULTIMODAL_REGISTRY
+from vllm.platforms import current_platform
+from vllm.utils.platform_utils import is_pin_memory_available
+from vllm.v1.attention.backend import CommonAttentionMetadata
+from vllm.v1.attention.backends.registry import AttentionBackendEnum
+from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata
+from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher
+from vllm.v1.kv_cache_interface import KVCacheConfig, UniformTypeKVCacheSpecs
+from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.sample.sampler import _SAMPLING_EPS
+from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
+from vllm.v1.spec_decode.utils import (
+    PADDING_SLOT_ID,
+    compute_new_slot_mapping,
+    copy_and_expand_eagle_inputs_kernel,
+    eagle_prepare_inputs_padded_kernel,
+    eagle_prepare_next_token_padded_kernel,
+    eagle_step_update_slot_mapping_and_metadata,
+    extend_all_queries_by_N,
+    next_power_of_2,
+)
+from vllm.v1.utils import CpuGpuBuffer
+from vllm.v1.worker.dp_utils import coordinate_batch_across_dp
+from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
+from vllm.v1.worker.utils import AttentionGroup
+
+logger = init_logger(__name__)
+
+
+class SpecDecodeBaseProposer:
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        device: torch.device,
+        pass_hidden_states_to_model: bool,
+        runner=None,
+    ):
+        self.vllm_config = vllm_config
+        assert vllm_config.speculative_config is not None
+        self.speculative_config = vllm_config.speculative_config
+        self.draft_model_config = self.speculative_config.draft_model_config
+        self.method = self.speculative_config.method
+        self.pass_hidden_states_to_model = pass_hidden_states_to_model
+
+        self.device = device
+        self.dtype = vllm_config.model_config.dtype
+        self.max_model_len = vllm_config.model_config.max_model_len
+        self.dp_rank = vllm_config.parallel_config.data_parallel_rank
+        self.num_speculative_tokens = self.speculative_config.num_speculative_tokens
+
+        # We need to get the hidden size from the draft model config because
+        # the draft model's hidden size can be different from the target model's
+        # hidden size (e.g., Llama 3.3 70B).
+        self.hidden_size = self.draft_model_config.get_hidden_size()
+        self.inputs_embeds_size = self.draft_model_config.get_inputs_embeds_size()
+
+        # DeepSeek V4 MTP consumes the target's pre-hc_head residual stream,
+        # shape (T, hc_mult * hidden_size). Expand the hidden_states buffer
+        # so target_hidden_states fits; detect DeepseekV4 via draft hf_config.
+        draft_hf_config = self.draft_model_config.hf_config
+        if hasattr(draft_hf_config, "compress_ratios") and hasattr(
+            draft_hf_config, "hc_mult"
+        ):
+            self.hidden_size = self.hidden_size * draft_hf_config.hc_mult
+
+        # Unifying eagle, draft model, and parallel drafting support.
+        # DFlash always uses parallel drafting (all tokens in one pass),
+        # but has an additional slot for the next_token_id (does not shift like EAGLE)
+        self.parallel_drafting: bool = self.speculative_config.parallel_drafting
+        self.extra_slots_per_request = (
+            1 if not self.parallel_drafting else self.num_speculative_tokens
+        )
+        self.net_num_new_slots_per_request = self.extra_slots_per_request - (
+            1 if (self.pass_hidden_states_to_model and self.method != "dflash") else 0
+        )
+        self.needs_extra_input_slots = self.net_num_new_slots_per_request > 0
+
+        # When True, all draft steps reuse the same position as the
+        # first step instead of advancing by one each iteration.
+        # Used by draft models with Q-only attention that share KV
+        # with the target and always predict from the same position.
+        self.constant_draft_positions: bool = False
+
+        self.parallel_drafting_token_id: int = 0
+        self.parallel_drafting_hidden_state_tensor: torch.Tensor | None = None
+        if self.parallel_drafting:
+            self._init_parallel_drafting_params()
+        self.use_local_argmax_reduction: bool = (
+            self.speculative_config.use_local_argmax_reduction
+        )
+
+        self.max_batch_size = vllm_config.scheduler_config.max_num_seqs
+        self.max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens
+        self.token_arange_np = np.arange(self.max_num_tokens, dtype=np.int32)
+
+        # Can be specialized by methods like DFlash to reduce the limit
+        self.max_query_tokens = self.max_num_tokens
+        self.max_positions = self.max_num_tokens
+
+        # Multi-modal data support
+        self.mm_registry = MULTIMODAL_REGISTRY
+        self.supports_mm_inputs = self.mm_registry.supports_multimodal_inputs(
+            vllm_config.model_config
+        )
+
+        self.draft_attn_groups: list[AttentionGroup] = []
+        self.kv_cache_gid: int = -1
+        self.eagle3_use_aux_hidden_state: bool = (
+            self._get_eagle3_use_aux_hidden_state_from_config()
+        )
+
+        self.compilation_config = self.vllm_config.compilation_config
+
+        # Cudagraph dispatcher for PIECEWISE-only dispatching in eagle.
+        # Keys are initialized later via initialize_cudagraph_keys() called from
+        # gpu_model_runner._check_and_update_cudagraph_mode after
+        # adjust_cudagraph_sizes_for_spec_decode is called.
+        self.cudagraph_dispatcher = CudagraphDispatcher(self.vllm_config)
+
+        # persistent buffers for cuda graph
+        self.input_ids = torch.zeros(
+            self.max_num_tokens, dtype=torch.int32, device=device
+        )
+        # Use draft model's M-RoPE setting, not target model's
+        # Draft models may be text-only even if target is multimodal
+        self.uses_mrope = self.draft_model_config.uses_mrope
+        self.uses_xdrope_dim = self.vllm_config.model_config.uses_xdrope_dim
+        self.draft_uses_xdrope_dim = self.draft_model_config.uses_xdrope_dim
+        if self.uses_mrope:
+            # NOTE: `mrope_positions` is implemented with one additional dummy
+            # position on purpose to make it non-contiguous so that it can work
+            # with torch compile.
+            # See detailed explanation in https://github.com/vllm-project/vllm/pull/12128#discussion_r1926431923
+
+            # NOTE: When M-RoPE is enabled, position ids are 3D regardless of
+            # the modality of inputs. For text-only inputs, each dimension has
+            # identical position IDs, making M-RoPE functionally equivalent to
+            # 1D-RoPE.
+            # See page 5 of https://arxiv.org/abs/2409.12191
+            self.mrope_positions = torch.zeros(
+                (3, self.max_positions + 1), dtype=torch.int64, device=device
+            )
+        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
+            self.xdrope_positions = torch.zeros(
+                (self.uses_xdrope_dim, self.max_positions + 1),
+                dtype=torch.int64,
+                device=device,
+            )
+        else:
+            # RoPE need (max_num_tokens,)
+            self.positions = torch.zeros(
+                self.max_positions,
+                dtype=torch.int64,
+                device=device,
+            )
+        self.hidden_states = torch.zeros(
+            (self.max_num_tokens, self.hidden_size), dtype=self.dtype, device=device
+        )
+
+        # Will be set when we initialize the attention backend
+        self.block_size: int = -1
+
+        # We need +1 here because the arange is used to set query_start_loc,
+        # which has one more element than batch_size.
+        max_num_slots_for_arange = max(self.max_batch_size + 1, self.max_num_tokens)
+        self.arange = torch.arange(
+            max_num_slots_for_arange, device=device, dtype=torch.int32
+        )
+
+        if self.needs_extra_input_slots:
+            self._raise_if_padded_drafter_batch_disabled()
+            self._warn_if_multimodal()
+            self._raise_if_mrope()
+
+        self.is_rejected_token_mask: torch.Tensor | None = None
+        self.is_masked_token_mask: torch.Tensor | None = None
+        if self.needs_extra_input_slots:
+            # For draft models and parallel drafting, we need to keep track of
+            # which tokens are rejected to update the slot mapping with padding slots.
+            self.is_rejected_token_mask = torch.zeros(
+                (self.max_num_tokens,), dtype=torch.bool, device=device
+            )
+            # For parallel drafting, we also need to keep track of which tokens
+            # are parallel-padding tokens used to sample at later positions.
+            # We populate this tensor even when using draft models for simplicity.
+            self.is_masked_token_mask = torch.zeros(
+                (self.max_num_tokens,), dtype=torch.bool, device=device
+            )
+
+        self.inputs_embeds = torch.zeros(
+            (self.max_num_tokens, self.inputs_embeds_size),
+            dtype=self.dtype,
+            device=device,
+        )
+
+        self.backup_next_token_ids = CpuGpuBuffer(
+            self.max_batch_size,
+            dtype=torch.int32,
+            pin_memory=is_pin_memory_available(),
+            device=device,
+            with_numpy=True,
+        )
+        self._enable_probabilistic_draft_probs = (
+            self.speculative_config.rejection_sample_method == "standard"
+            and self.speculative_config.draft_sample_method == "probabilistic"
+        )
+        self._last_draft_probs: torch.Tensor | None = None
+
+        self._slot_mapping_buffer = torch.zeros(
+            self.max_positions,
+            dtype=torch.int64,
+            device=device,
+        )
+
+        # Determine allowed attention backends once during initialization.
+        self.allowed_attn_types: tuple | None = None
+        if current_platform.is_rocm():
+            from vllm.models.deepseek_v4.amd.rocm import (
+                DeepseekV4ROCMAiterMLASparseMetadata,
+                DeepseekV4ROCMAiterSparseSWAMetadata,
+            )
+            from vllm.v1.attention.backends.mla.indexer import (
+                DeepseekV32IndexerMetadata,
+            )
+            from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
+                ROCMAiterMLASparseMetadata,
+            )
+            from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
+
+            rocm_types = [
+                TritonAttentionMetadata,
+                RocmAttentionMetadata,
+                ROCMAiterMLASparseMetadata,
+                DeepseekV4ROCMAiterMLASparseMetadata,
+                DeepseekV4ROCMAiterSparseSWAMetadata,
+                DeepseekV32IndexerMetadata,
+            ]
+            # ROCM_AITER_FA is an optional backend
+            # We check is_enabled() here to avoid importing the backend module during
+            # auto-discovery when VLLM_ROCM_USE_AITER=0, which would trigger aiter
+            # import and JIT compilation warnings. Explicit backend selection via
+            # attention_config still works because the backend module is loaded
+            # directly when selected, not through this auto-discovery path.
+            # Check if backend module exists to allow explicit selection
+            if find_spec(
+                AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
+            ):
+                from vllm.v1.attention.backends.rocm_aiter_fa import (
+                    AiterFlashAttentionMetadata,
+                )
+
+                rocm_types.append(AiterFlashAttentionMetadata)
+
+            # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
+            from vllm.model_executor.layers.attention.mla_attention import (
+                MLACommonMetadata,
+            )
+
+            rocm_types.append(MLACommonMetadata)
+
+            # FlexAttention backend support
+            from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
+
+            rocm_types.append(FlexAttentionMetadata)
+
+            self.allowed_attn_types = tuple(rocm_types)
+
+    def _raise_if_padded_drafter_batch_disabled(self):
+        if self.speculative_config.disable_padded_drafter_batch:
+            raise NotImplementedError(
+                "Speculative Decoding with draft models or parallel drafting only "
+                "supports padded drafter batch. Please unset "
+                "disable_padded_drafter_batch in the speculative_config."
+            )
+
+    def _warn_if_multimodal(self):
+        if self.supports_mm_inputs:
+            logger.warning(
+                "Speculative Decoding with draft models or parallel drafting "
+                "does not fully support multimodal models yet. "
+                "Proceeding with text-only speculative decoding."
+            )
+
+    def _raise_if_mrope(self):
+        if self.draft_model_config.uses_mrope:
+            raise NotImplementedError(
+                "Speculative Decoding with draft models or parallel drafting "
+                "does not support M-RoPE yet"
+            )
+
+    def _init_parallel_drafting_params(self):
+        # For parallel drafting, we need the token ID to use for masked slots
+        # And for EAGLE + parallel drafting, we need the hidden state tensor to use
+        # for those masked slots.
+
+        model_hf_config = self.draft_model_config.hf_config
+        # DFlash stores mask_token_id in dflash_config
+        dflash_config = getattr(model_hf_config, "dflash_config", None)
+        if dflash_config and "mask_token_id" in dflash_config:
+            self.parallel_drafting_token_id = dflash_config["mask_token_id"]
+        elif hasattr(model_hf_config, "pard_token"):
+            self.parallel_drafting_token_id = model_hf_config.pard_token
+        elif hasattr(model_hf_config, "ptd_token_id"):
+            self.parallel_drafting_token_id = model_hf_config.ptd_token_id
+        else:
+            raise ValueError(
+                "For parallel drafting, the draft model config must have "
+                "`pard_token`, `ptd_token_id`, or "
+                "`dflash_config.mask_token_id` specified in its config.json."
+            )
+
+        if self.pass_hidden_states_to_model:
+            self.parallel_drafting_hidden_state_tensor = torch.empty(
+                self.hidden_size, dtype=self.dtype, device=self.device
+            )
+
+    def _get_positions(self, num_tokens: int):
+        if self.uses_mrope:
+            return self.mrope_positions[:, :num_tokens]
+        if self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
+            return self.xdrope_positions[:, :num_tokens]
+        return self.positions[:num_tokens]
+
+    def _set_positions(self, num_tokens: int, positions: torch.Tensor):
+        if self.uses_mrope:
+            self.mrope_positions[:, :num_tokens] = positions
+        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
+            self.xdrope_positions[:, :num_tokens] = positions
+        else:
+            # Convert M-RoPE positions if target model uses M-RoPE
+            # but draft doesn't, For text inputs, all M-RoPE
+            # dimensions are identical
+            if self.vllm_config.model_config.uses_mrope:
+                positions = positions[0]
+            self.positions[:num_tokens] = positions
+
+    def _get_slot_mapping(
+        self,
+        num_tokens: int,
+        slot_mapping: torch.Tensor | None = None,
+    ) -> dict[str, torch.Tensor]:
+        """Return slot_mapping dict for EAGLE layers.
+
+        If slot_mapping is provided, copies it into the buffer first.
+        """
+        if slot_mapping is not None:
+            num_actual = slot_mapping.shape[0]
+            self._slot_mapping_buffer[:num_actual].copy_(slot_mapping)
+            if num_tokens > num_actual:
+                self._slot_mapping_buffer[num_actual:num_tokens].fill_(PADDING_SLOT_ID)
+
+        view = self._slot_mapping_buffer[:num_tokens]
+        return {name: view for name in self._draft_attn_layer_names}
+
+    def initialize_cudagraph_keys(self, cudagraph_mode: CUDAGraphMode) -> None:
+        """Initialize cudagraph dispatcher keys for the drafter.
+
+        Only supports PIECEWISE cudagraphs (via mixed_mode).
+        This should be called after adjust_cudagraph_sizes_for_spec_decode.
+        """
+        if (
+            not self.speculative_config.enforce_eager
+            and cudagraph_mode.mixed_mode()
+            in [CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL]
+        ):
+            eagle_cudagraph_mode = CUDAGraphMode.PIECEWISE
+        else:
+            eagle_cudagraph_mode = CUDAGraphMode.NONE
+
+        self.cudagraph_dispatcher.initialize_cudagraph_keys(eagle_cudagraph_mode)
+
+    def _greedy_sample(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        """Greedy-sample draft tokens from hidden states."""
+        if self.use_local_argmax_reduction:
+            return self.model.get_top_tokens(hidden_states)
+        return self.model.compute_logits(hidden_states).argmax(dim=-1)
+
+    def _sample_from_logits(
+        self,
+        logits: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        if not self._enable_probabilistic_draft_probs:
+            return logits.argmax(dim=-1), None
+        if sampling_metadata.all_greedy:
+            return logits.argmax(dim=-1), None
+        return compute_probs_and_sample_next_token(logits, sampling_metadata)
+
+    def _sample_draft_tokens(
+        self,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
+        if not self._enable_probabilistic_draft_probs or sampling_metadata.all_greedy:
+            return self._greedy_sample(hidden_states), None
+        logits = self.model.compute_logits(hidden_states)
+        return self._sample_from_logits(logits, sampling_metadata)
+
+    def take_last_draft_probs(self) -> torch.Tensor | None:
+        return self._last_draft_probs
+
+    def propose(
+        self,
+        # [num_tokens]
+        target_token_ids: torch.Tensor,
+        # [num_tokens] or [3, num_tokens] when M-RoPE is enabled
+        target_positions: torch.Tensor,
+        # [num_tokens, hidden_size]
+        target_hidden_states: torch.Tensor,
+        # [batch_size]
+        next_token_ids: torch.Tensor,
+        token_indices_to_sample: torch.Tensor | None,
+        common_attn_metadata: CommonAttentionMetadata,
+        sampling_metadata: SamplingMetadata,
+        mm_embed_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None,
+        num_rejected_tokens_gpu: torch.Tensor | None = None,
+        slot_mappings: dict[str, torch.Tensor]
+        | list[dict[str, torch.Tensor]]
+        | None = None,
+    ) -> torch.Tensor:
+        self._last_draft_probs = None
+        batch_size = common_attn_metadata.batch_size()
+
+        if self.method in ("eagle3", "dflash"):
+            assert isinstance(
+                self.model,
+                (
+                    Eagle3LlamaForCausalLM,
+                    Eagle3DeepseekV2ForCausalLM,
+                    DFlashQwen3ForCausalLM,
+                ),
+            )
+            target_hidden_states = self.model.combine_hidden_states(
+                target_hidden_states
+            )
+            assert target_hidden_states.shape[-1] == self.hidden_size
+
+        num_tokens, token_indices_to_sample, common_attn_metadata = (
+            self.set_inputs_first_pass(
+                target_token_ids=target_token_ids,
+                next_token_ids=next_token_ids,
+                target_positions=target_positions,
+                target_hidden_states=target_hidden_states,
+                token_indices_to_sample=token_indices_to_sample,
+                cad=common_attn_metadata,
+                num_rejected_tokens_gpu=num_rejected_tokens_gpu,
+            )
+        )
+
+        per_group_attn_metadata, per_layer_attn_metadata = (
+            self.build_per_group_and_layer_attn_metadata(common_attn_metadata)
+        )
+
+        cudagraph_runtime_mode, num_input_tokens, num_tokens_across_dp = (
+            self._determine_batch_execution_and_padding(num_tokens)
+        )
+
+        model_kwargs, slot_mapping_size = self.build_model_inputs_first_pass(
+            num_tokens, num_input_tokens, mm_embed_inputs
+        )
+
+        with set_forward_context(
+            per_layer_attn_metadata,
+            self.vllm_config,
+            num_tokens=num_input_tokens,
+            num_tokens_across_dp=num_tokens_across_dp,
+            cudagraph_runtime_mode=cudagraph_runtime_mode,
+            slot_mapping=self._get_slot_mapping(
+                slot_mapping_size, common_attn_metadata.slot_mapping
+            ),
+        ):
+            ret_hidden_states = self.model(**model_kwargs)
+            if not self.model_returns_tuple():
+                last_hidden_states = ret_hidden_states
+                hidden_states = last_hidden_states
+            else:
+                last_hidden_states, hidden_states = ret_hidden_states
+
+        sample_hidden_states = last_hidden_states[token_indices_to_sample]
+
+        # Early exit if there is only one draft token to be generated.
+        if self.num_speculative_tokens == 1 or self.parallel_drafting:
+            draft_token_ids, draft_probs = self._sample_draft_tokens(
+                sample_hidden_states, sampling_metadata
+            )
+            if draft_probs is not None:
+                self._last_draft_probs = draft_probs.view(
+                    -1, self.num_speculative_tokens, draft_probs.shape[-1]
+                ).contiguous()
+            return draft_token_ids.view(-1, self.num_speculative_tokens)
+
+        if self.uses_mrope:
+            positions = self.mrope_positions[:, token_indices_to_sample]
+        else:
+            positions = self.positions[token_indices_to_sample]
+        hidden_states = hidden_states[token_indices_to_sample]
+
+        if self.constant_draft_positions:
+            # Write the sampling positions into the front of the
+            # positions buffer so that subsequent loop iterations
+            # (which read via _get_positions) use the correct values.
+            self.positions[:batch_size] = positions
+
+        draft_token_ids, draft_probs = self._sample_draft_tokens(
+            sample_hidden_states, sampling_metadata
+        )
+        draft_probs_list = None if draft_probs is None else [draft_probs]
+
+        if self.allowed_attn_types is not None:
+            for group_md in per_group_attn_metadata:
+                if not isinstance(group_md, self.allowed_attn_types):
+                    raise ValueError(
+                        f"Unsupported attention metadata type for speculative "
+                        "decoding with num_speculative_tokens > 1: "
+                        f"{type(group_md)}. Supported types are: "
+                        f"{self.allowed_attn_types}"
+                    )
+
+        # Generate the remaining draft tokens.
+        draft_token_ids_list = [draft_token_ids]
+
+        cudagraph_runtime_mode, input_batch_size, batch_size_across_dp = (
+            self._determine_batch_execution_and_padding(batch_size)
+        )
+
+        common_attn_metadata.num_actual_tokens = batch_size
+        common_attn_metadata.max_query_len = 1
+        common_attn_metadata.query_start_loc = self.arange[: batch_size + 1]
+        common_attn_metadata.query_start_loc_cpu = torch.from_numpy(
+            self.token_arange_np[: batch_size + 1]
+        ).clone()
+
+        # In padded drafter batch, we need to adjust the sequence lengths
+        # to remove the "padding" (i.e. rejected tokens).
+        # Only apply this adjustment when we have rejected tokens
+        # (i.e., not the first proposal).
+        if self.num_speculative_tokens > 1 and num_rejected_tokens_gpu is not None:
+            common_attn_metadata.seq_lens -= num_rejected_tokens_gpu
+            # Invalidate the CPU-side shadows to avoid H<>D sync.
+            common_attn_metadata._seq_lens_cpu = None
+            common_attn_metadata._num_computed_tokens_cpu = None
+
+        block_size = self.block_size
+        assert block_size > 0, "block_size has not been initialized."
+        for token_index in range(self.num_speculative_tokens - 1):
+            # Update the inputs.
+            # cast to int32 is crucial when eagle model is compiled.
+            # tensor.argmax() returns int64 by default.
+            input_ids = draft_token_ids_list[-1].int()
+
+            if not self.constant_draft_positions:
+                positions = self._update_positions_dependent_metadata(
+                    positions,
+                    common_attn_metadata,
+                    batch_size,
+                    input_batch_size,
+                    block_size,
+                )
+
+            # Rebuild attention metadata. When draft positions are constant
+            # (e.g. Gemma4 MTP), common_attn_metadata is invariant across
+            # loop iterations so we build once and reuse.
+            if not self.constant_draft_positions or token_index == 0:
+                _, per_layer_attn_metadata = (
+                    self.build_per_group_and_layer_attn_metadata(
+                        common_attn_metadata, draft_index=token_index + 1
+                    )
+                )
+
+            # copy inputs to buffer for cudagraph
+            self.input_ids[:batch_size] = input_ids
+            self.hidden_states[:batch_size] = hidden_states
+            if self.supports_mm_inputs:
+                self.inputs_embeds[:batch_size] = self.model.embed_input_ids(input_ids)
+
+                input_ids = None
+                inputs_embeds = self.inputs_embeds[:input_batch_size]
+            else:
+                input_ids = self.input_ids[:input_batch_size]
+                inputs_embeds = None
+
+            # Run the model.
+            model_kwargs = {
+                "input_ids": input_ids,
+                "positions": self._get_positions(input_batch_size),
+                "inputs_embeds": inputs_embeds,
+            }
+            if self.pass_hidden_states_to_model:
+                model_kwargs["hidden_states"] = self.hidden_states[:input_batch_size]
+
+            with set_forward_context(
+                per_layer_attn_metadata,
+                self.vllm_config,
+                num_tokens=input_batch_size,
+                num_tokens_across_dp=batch_size_across_dp,
+                cudagraph_runtime_mode=cudagraph_runtime_mode,
+                slot_mapping=self._get_slot_mapping(input_batch_size),
+            ):
+                ret_hidden_states = self.model(**model_kwargs)
+                if not self.model_returns_tuple():
+                    last_hidden_states = ret_hidden_states
+                    hidden_states = ret_hidden_states
+                else:
+                    last_hidden_states, hidden_states = ret_hidden_states
+
+            hidden_states = hidden_states[:batch_size]
+            draft_token_ids, draft_probs = self._sample_draft_tokens(
+                last_hidden_states[:batch_size], sampling_metadata
+            )
+            if draft_probs is not None:
+                assert draft_probs_list is not None
+                draft_probs_list.append(draft_probs)
+            draft_token_ids_list.append(draft_token_ids)
+
+        # [batch_size, num_speculative_tokens]
+        draft_token_ids = torch.stack(draft_token_ids_list, dim=1)
+        if draft_probs_list is not None:
+            self._last_draft_probs = torch.stack(draft_probs_list, dim=1).contiguous()
+        return draft_token_ids
+
+    def _update_positions_dependent_metadata(
+        self,
+        positions: torch.Tensor,
+        common_attn_metadata,
+        batch_size: int,
+        input_batch_size: int,
+        block_size: int,
+    ) -> torch.Tensor:
+        """Update positions, slot mappings, and sequence metadata for the
+        next draft step. Returns the updated positions tensor."""
+        positions_1d = positions[0] if self.uses_mrope else positions
+        if self.uses_mrope:
+            out_pos = self.mrope_positions[0, :batch_size]
+        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
+            out_pos = self.xdrope_positions[0, :batch_size]
+        else:
+            out_pos = self.positions[:batch_size]
+        eagle_step_update_slot_mapping_and_metadata(
+            positions_1d=positions_1d,
+            block_table_tensor=common_attn_metadata.block_table_tensor,
+            seq_lens=common_attn_metadata.seq_lens,
+            block_size=block_size,
+            max_model_len=self.max_model_len,
+            out_clamped_positions=out_pos,
+            out_slot_mapping=self._slot_mapping_buffer[:input_batch_size],
+            input_batch_size=input_batch_size,
+        )
+        common_attn_metadata.slot_mapping = self._slot_mapping_buffer[:batch_size]
+        if self.uses_mrope:
+            self.mrope_positions[1:, :batch_size] = self.mrope_positions[0, :batch_size]
+            positions = self.mrope_positions[:, :batch_size]
+        elif self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim > 0:
+            self.xdrope_positions[1:, :batch_size] = self.xdrope_positions[
+                0, :batch_size
+            ]
+            positions = self.xdrope_positions[0, :batch_size]
+        else:
+            positions = self.positions[:batch_size]
+        common_attn_metadata.max_seq_len = min(
+            common_attn_metadata.max_seq_len + 1,
+            self.max_model_len,
+        )
+
+        if common_attn_metadata._seq_lens_cpu is not None:
+            common_attn_metadata._seq_lens_cpu += 1
+        if common_attn_metadata._num_computed_tokens_cpu is not None:
+            common_attn_metadata._num_computed_tokens_cpu += 1
+        if common_attn_metadata.seq_lens_cpu_upper_bound is not None:
+            common_attn_metadata.seq_lens_cpu_upper_bound += 1
+
+        return positions
+
+    def set_inputs_first_pass(
+        self,
+        target_token_ids: torch.Tensor,
+        next_token_ids: torch.Tensor,
+        target_positions: torch.Tensor,
+        target_hidden_states: torch.Tensor,
+        token_indices_to_sample: torch.Tensor | None,
+        cad: CommonAttentionMetadata,
+        num_rejected_tokens_gpu: torch.Tensor | None,
+    ) -> tuple[int, torch.Tensor, CommonAttentionMetadata]:
+        if not self.needs_extra_input_slots:
+            # Default EAGLE pathway: no reshaping of input tensors needed.
+            # Simply rotate the input ids and leave the positions unchanged,
+            # Inserting the next token ids at the last slot in each request.
+            if token_indices_to_sample is None:
+                token_indices_to_sample = cad.query_start_loc[1:] - 1
+
+            num_tokens = target_token_ids.shape[0]
+            # Shift the input ids by one token.
+            # E.g., [a1, b1, b2, c1, c2, c3] -> [b1, b2, c1, c2, c3, c3]
+            self.input_ids[: num_tokens - 1] = target_token_ids[1:]
+            # Replace the last token with the next token.
+            # E.g., [b1, b2, c1, c2, c3, c3] -> [a2, b2, b3, c2, c3, c4]
+            self.input_ids[token_indices_to_sample] = next_token_ids
+
+            # copy inputs to buffer for cudagraph
+            if self.uses_xdrope_dim > 0 and self.draft_uses_xdrope_dim == 0:
+                target_positions = target_positions[0]
+            self._set_positions(num_tokens, target_positions)
+
+            self.hidden_states[:num_tokens] = target_hidden_states
+
+            return num_tokens, token_indices_to_sample, cad
+        else:
+            assert self.is_rejected_token_mask is not None
+            assert self.is_masked_token_mask is not None
+            # 1.
+            # Call a custom triton kernel to copy input_ids and positions
+            # into the correct slots in the preallocated buffers self.input_ids,
+            # self.positions.
+            batch_size = cad.batch_size()
+            # Since we might have to copy a lot of data for prefills, we select the
+            # block size based on the max query length and limit to max 256 slots/block.
+            max_num_tokens_per_request = (
+                cad.max_query_len + self.net_num_new_slots_per_request
+            )
+            BLOCK_SIZE_TOKENS = min(256, next_power_of_2(max_num_tokens_per_request))
+            num_blocks = (
+                max_num_tokens_per_request + BLOCK_SIZE_TOKENS - 1
+            ) // BLOCK_SIZE_TOKENS
+            total_num_input_tokens = target_token_ids.shape[0]
+            total_num_output_tokens = total_num_input_tokens + (
+                self.net_num_new_slots_per_request * batch_size
+            )
+
+            token_indices_to_sample = torch.empty(
+                batch_size * self.extra_slots_per_request,
+                dtype=torch.int32,
+                device=self.device,
+            )
+
+            # Destination indices to write target_hidden_states into drafting buffer.
+            out_hidden_state_mapping = torch.empty(
+                total_num_input_tokens, dtype=torch.int32, device=self.device
+            )
+
+            # Kernel grid: one program per request (row)
+            grid = (batch_size, num_blocks)
+            query_start_loc = cad.query_start_loc
+            query_end_loc = cad.query_start_loc[1:] - 1
+            if num_rejected_tokens_gpu is not None:
+                query_end_loc = query_end_loc - num_rejected_tokens_gpu
+
+            copy_and_expand_eagle_inputs_kernel[grid](
+                # (Padded) Inputs from the target model
+                target_token_ids_ptr=target_token_ids,
+                target_positions_ptr=target_positions,
+                next_token_ids_ptr=next_token_ids,  # sampled tokens, one per request
+                # Outputs to the drafting buffers
+                out_input_ids_ptr=self.input_ids,
+                out_positions_ptr=self.positions,  # Doesn't support mrope for now
+                out_is_rejected_token_mask_ptr=self.is_rejected_token_mask,
+                out_is_masked_token_mask_ptr=self.is_masked_token_mask,
+                out_new_token_indices_ptr=token_indices_to_sample,
+                out_hidden_state_mapping_ptr=out_hidden_state_mapping,
+                # Input metadata
+                query_start_loc_ptr=query_start_loc,
+                query_end_loc_ptr=query_end_loc,
+                padding_token_id=0,
+                parallel_drafting_token_id=self.parallel_drafting_token_id,
+                # Sizing info
+                # Note that we can deduce batch_size for free from the grid size
+                total_input_tokens=total_num_input_tokens,
+                num_padding_slots_per_request=self.extra_slots_per_request,
+                shift_input_ids=self.pass_hidden_states_to_model,
+                BLOCK_SIZE_TOKENS=BLOCK_SIZE_TOKENS,
+            )
+            if self.pass_hidden_states_to_model:
+                assert self.parallel_drafting_hidden_state_tensor is not None
+                self.hidden_states[out_hidden_state_mapping] = target_hidden_states
+                # Use torch.where to avoid DtoH sync from boolean indexing
+                mask = self.is_masked_token_mask[:total_num_output_tokens]
+                torch.where(
+                    mask.unsqueeze(1),
+                    self.parallel_drafting_hidden_state_tensor,
+                    self.hidden_states[:total_num_output_tokens],
+                    out=self.hidden_states[:total_num_output_tokens],
+                )
+
+            # 2.
+            # Recompute the slot mapping based on the new positions and
+            # rejection mask.
+            assert self.block_size > 0, "block_size has not been initialized."
+            new_slot_mapping = compute_new_slot_mapping(
+                cad=cad,
+                new_positions=self.positions[:total_num_output_tokens],
+                is_rejected_token_mask=self.is_rejected_token_mask[
+                    :total_num_output_tokens
+                ],
+                block_size=self.block_size,
+                num_new_tokens=self.net_num_new_slots_per_request,
+                max_model_len=self.max_model_len,
+            )
+
+            # 3. Update the common attention metadata with the new (meta)data
+            new_cad = extend_all_queries_by_N(
+                cad,
+                N=self.net_num_new_slots_per_request,
+                arange=self.arange,
+                new_slot_mapping=new_slot_mapping,
+            )
+
+            return total_num_output_tokens, token_indices_to_sample, new_cad
+
+    def build_model_inputs_first_pass(
+        self,
+        num_tokens: int,
+        num_input_tokens: int,
+        mm_embed_inputs: tuple[list[torch.Tensor], torch.Tensor] | None,
+    ) -> tuple[dict[str, Any], int]:
+        if self.supports_mm_inputs:
+            mm_embeds, is_mm_embed = mm_embed_inputs or (None, None)
+
+            self.inputs_embeds[:num_tokens] = self.model.embed_input_ids(
+                self.input_ids[:num_tokens],
+                multimodal_embeddings=mm_embeds,
+                is_multimodal=is_mm_embed,
+            )
+
+            input_ids = None
+            inputs_embeds = self.inputs_embeds[:num_input_tokens]
+        else:
+            input_ids = self.input_ids[:num_input_tokens]
+            inputs_embeds = None
+
+        model_kwargs = {
+            "input_ids": input_ids,
+            "positions": self._get_positions(num_input_tokens),
+            "inputs_embeds": inputs_embeds,
+        }
+        if self.pass_hidden_states_to_model:
+            model_kwargs["hidden_states"] = self.hidden_states[:num_input_tokens]
+
+        return model_kwargs, num_input_tokens
+
+    def build_per_group_and_layer_attn_metadata(
+        self, common_attn_metadata: CommonAttentionMetadata, draft_index: int = 0
+    ) -> tuple[list[object], dict[str, object]]:
+        per_group_attn_metadata: list[object] = []
+        per_layer_attn_metadata: dict[str, object] = {}
+        for attn_group in self.draft_attn_groups:
+            attn_metadata = attn_group.get_metadata_builder().build_for_drafting(
+                common_attn_metadata=common_attn_metadata, draft_index=draft_index
+            )
+            per_group_attn_metadata.append(attn_metadata)
+            for layer_name in attn_group.layer_names:
+                per_layer_attn_metadata[layer_name] = attn_metadata
+        return per_group_attn_metadata, per_layer_attn_metadata
+
+    def model_returns_tuple(self) -> bool:
+        return self.method not in ("mtp", "draft_model", "dflash")
+
+    def prepare_next_token_ids_cpu(
+        self,
+        sampled_token_ids: list[list[int]],
+        requests: dict[str, CachedRequestState],
+        gpu_input_batch: InputBatch,
+        num_scheduled_tokens: dict[str, int],
+    ) -> torch.Tensor:
+        """
+        This function is used to prepare the inputs for speculative decoding.
+        It calculates the next token ids for each request based on the sampled
+        token ids from the CPU. If a request has no sampled token ids (e.g.,
+        during the initial decoding steps), it falls back to using the request
+        state to get the next token id.
+        """
+        req_ids = gpu_input_batch.req_ids
+        next_token_ids: list[int] = []
+        for i, token_ids in enumerate(sampled_token_ids):
+            if token_ids:
+                # Common case.
+                next_token_id = token_ids[-1]
+            else:
+                # Partial prefill (rare case).
+                # Get the next token id from the request state.
+                req_id = req_ids[i]
+                req_state = requests[req_id]
+                seq_len = req_state.num_computed_tokens + num_scheduled_tokens[req_id]
+                next_token_id = req_state.get_token_id(seq_len)
+            next_token_ids.append(next_token_id)
+        next_token_ids = torch.tensor(
+            next_token_ids, dtype=torch.int32, device=self.input_ids.device
+        )
+        return next_token_ids
+
+    def prepare_next_token_ids_padded(
+        self,
+        sampled_token_ids: torch.Tensor,
+        requests: dict[str, CachedRequestState],
+        gpu_input_batch: InputBatch,
+        discard_request_mask: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        This function is used to prepare the inputs for speculative decoding.
+        It calculates the next token ids and the number of valid sampled tokens
+        for each request, considering the "discarded" requests whose next token
+        is not sampled and comes from `request.get_token_id()` instead. This is denoted
+        the "backup" token id. It also counts rejected tokens via `sampled_token_ids`.
+        """
+        # Precompute backup token IDs for discarded requests.
+        num_reqs = gpu_input_batch.num_reqs
+        for i in range(num_reqs):
+            self.backup_next_token_ids.np[i] = requests[
+                gpu_input_batch.req_ids[i]
+            ].get_token_id(gpu_input_batch.num_tokens_no_spec[i] - 1)
+        self.backup_next_token_ids.copy_to_gpu(num_reqs)
+        backup_tokens_gpu = self.backup_next_token_ids.gpu
+
+        batch_size, num_tokens = sampled_token_ids.shape
+        device = sampled_token_ids.device
+
+        assert discard_request_mask.dtype == torch.bool
+        assert backup_tokens_gpu.dtype == torch.int32
+
+        next_token_ids = torch.empty(batch_size, dtype=torch.int32, device=device)
+        valid_sampled_tokens_count = next_token_ids.new_empty(batch_size)
+
+        # Kernel grid: one program per request (row)
+        grid = (batch_size,)
+
+        # Find the next power of 2 for block sizes
+        BLOCK_SIZE_TOKENS = next_power_of_2(num_tokens)
+        eagle_prepare_next_token_padded_kernel[grid](
+            sampled_token_ids,
+            discard_request_mask,
+            backup_tokens_gpu,
+            next_token_ids,
+            valid_sampled_tokens_count,
+            gpu_input_batch.vocab_size,
+            num_tokens,
+            batch_size,
+            sampled_token_ids.stride(0),
+            BLOCK_SIZE_TOKENS=BLOCK_SIZE_TOKENS,
+        )
+
+        return next_token_ids, valid_sampled_tokens_count
+
+    def prepare_inputs_padded(
+        self,
+        common_attn_metadata: CommonAttentionMetadata,
+        spec_decode_metadata: SpecDecodeMetadata,
+        valid_sampled_tokens_count: torch.Tensor,
+    ) -> tuple[CommonAttentionMetadata, torch.Tensor, torch.Tensor]:
+        """
+        This function is used to prepare the inputs for speculative decoding
+        It updates the common_attn_metadata for speculative decoding,
+        but does not consider the rejected tokens. Instead, all tokens
+        are included as inputs to the speculator, with the rejected tokens
+        used as padding and filtered out later by `token_indices_to_sample`.
+        No blocking CPU operations should be introduced in this function.
+        """
+        num_reqs = common_attn_metadata.num_reqs
+        device = valid_sampled_tokens_count.device
+
+        token_indices_to_sample = torch.empty(
+            (num_reqs,), dtype=torch.int32, device=device
+        )
+        num_rejected_tokens_gpu = torch.empty(
+            (num_reqs,), dtype=torch.int32, device=device
+        )
+
+        grid = (num_reqs,)
+        eagle_prepare_inputs_padded_kernel[grid](
+            spec_decode_metadata.cu_num_draft_tokens,
+            valid_sampled_tokens_count,
+            common_attn_metadata.query_start_loc,
+            token_indices_to_sample,
+            num_rejected_tokens_gpu,
+            num_reqs,
+        )
+
+        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
+        new_query_len_per_req = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
+
+        total_num_tokens = query_start_loc_cpu[-1].item()
+
+        spec_common_attn_metadata = CommonAttentionMetadata(
+            query_start_loc=common_attn_metadata.query_start_loc,
+            seq_lens=common_attn_metadata.seq_lens,
+            query_start_loc_cpu=query_start_loc_cpu,
+            _seq_lens_cpu=common_attn_metadata._seq_lens_cpu,
+            _num_computed_tokens_cpu=common_attn_metadata._num_computed_tokens_cpu,
+            seq_lens_cpu_upper_bound=common_attn_metadata.seq_lens_cpu_upper_bound,
+            num_reqs=common_attn_metadata.num_reqs,
+            num_actual_tokens=total_num_tokens,
+            max_query_len=new_query_len_per_req.max().item(),
+            max_seq_len=common_attn_metadata.max_seq_len,
+            block_table_tensor=common_attn_metadata.block_table_tensor,
+            slot_mapping=common_attn_metadata.slot_mapping[:total_num_tokens],
+            causal=True,
+            dcp_local_seq_lens=common_attn_metadata.dcp_local_seq_lens,
+        )
+
+        return (
+            spec_common_attn_metadata,
+            token_indices_to_sample,
+            num_rejected_tokens_gpu,
+        )
+
+    def prepare_inputs(
+        self,
+        common_attn_metadata: CommonAttentionMetadata,
+        sampled_token_ids: list[list[int]],
+        num_draft_tokens: list[int],
+    ) -> tuple[CommonAttentionMetadata, torch.Tensor]:
+        """
+        This function is used to prepare the inputs for speculative decoding.
+        It updates to the common_attn_metadata to account for the rejected
+        tokens (and newly sampled tokens). It also returns the token indices
+        of the tokens that should be fed to the speculator.
+        """
+        # E.g.
+        #  common_attn_metadata.query_start_loc{_cpu}:
+        #       [0, q1, q1 + q2, q1 + q2 + q3]
+        #  common_attn_metadata.seq_lens{_cpu}: [s1, s2, s3]
+        #  num_rejected_tokens: [n1, n2, n3]
+        # This function computes the intermediate values:
+        #  num_tokens_per_req: [q1 - n1, q2 - n2, q3 - n3]
+        # And returns:
+        #  common_attn_metadata.query_start_loc{_cpu}:
+        #       [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
+        #  common_attn_metadata.seq_lens{_cpu}:
+        #       [s1 - n1 + 1, s2 - n2 + 1, s3 - n3 + 1]
+        #  token_indices: [0, 1, ..., q1 - n1 - 1,
+        #                 q1, q1 + 1, ..., q1 + q2 - n2 - 1,
+        #                 q1 + q2, q1 + q2 + 1, ..., q1 + q2 + q3 - n3 - 1]
+
+        num_rejected_tokens = [
+            n + 1 - len(sampled_token_ids[i]) if n > 0 else 0
+            for i, n in enumerate(num_draft_tokens)
+        ]
+        num_rejected_tokens = torch.tensor(num_rejected_tokens, dtype=torch.int32)
+
+        device = common_attn_metadata.query_start_loc.device
+        query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu
+        # upper_bound - rejected = actual post-rejection seq_lens (no D2H sync).
+        assert common_attn_metadata.seq_lens_cpu_upper_bound is not None
+        new_seq_lens_cpu = (
+            common_attn_metadata.seq_lens_cpu_upper_bound - num_rejected_tokens
+        )
+
+        # [0, q1, q1 + q2, q1 + q2 + q3] -> [q1, q2, q3]
+        new_query_len_per_req = query_start_loc_cpu[1:] - query_start_loc_cpu[:-1]
+        # [q1, q2, q3] -> [q1 - n1, q2 - n2, q3 - n3]
+        new_num_tokens_per_req = new_query_len_per_req - num_rejected_tokens
+        new_num_tokens_per_req_np = new_num_tokens_per_req.numpy()
+
+        # [q1 - n1, q2 - n2, q3 - n3] ->
+        # [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
+        new_query_start_loc_cpu = torch.zeros(
+            query_start_loc_cpu.shape,
+            dtype=torch.int32,
+            pin_memory=is_pin_memory_available(),
+        )
+        new_query_start_loc_np = new_query_start_loc_cpu.numpy()
+        np.cumsum(new_num_tokens_per_req_np, out=new_query_start_loc_np[1:])
+
+        total_num_tokens = new_query_start_loc_np[-1]
+        # Example assuming num_tokens_per_req_np = [2, 4, 3]
+        # this implies that `new_query_start_locs` is:
+        # [0, 2, 6, 9] ->
+        # [0, 0, 2, 2, 2, 2, 6, 6, 6]
+        #  _r1_  ____r2____  ___r3__
+        new_query_start_locs_expanded = np.repeat(
+            new_query_start_loc_np[:-1], new_num_tokens_per_req_np
+        )
+        # [0, 1, 2, 3, 4, 5, 6, 7, 8] ->
+        # [0, 1, 0, 1, 2, 3, 0, 1, 2]
+        #  _r1_  ____r2____  ___r3__
+        token_offsets = (
+            self.token_arange_np[:total_num_tokens] - new_query_start_locs_expanded
+        )
+
+        # Expand starting positions to match token pattern
+        # [0, q1, q1 + q2] ->
+        # [0, 0, q1, q1, q1, q1, q1 + q2, q1 + q2, q1 + q2]
+        #  _r1_  _____r2_______  ___________r3____________
+        old_query_start_locs_expanded = np.repeat(
+            query_start_loc_cpu[:-1].numpy(), new_num_tokens_per_req_np
+        )
+        # Final token indices are:
+        # [0, 1,                                // req 1
+        #  q1 + 0, q1 + 1, q1 + 2, q1 + 3,       // req 2
+        #  q1 + q2 + 0, q1 + q2 + 1, q1 + q2 + 2] // req 3
+        token_indices_np = token_offsets + old_query_start_locs_expanded
+        token_indices = torch.from_numpy(token_indices_np).to(device, non_blocking=True)
+
+        spec_common_attn_metadata = CommonAttentionMetadata(
+            query_start_loc=new_query_start_loc_cpu.to(device, non_blocking=True),
+            seq_lens=new_seq_lens_cpu.to(device, non_blocking=True),
+            query_start_loc_cpu=new_query_start_loc_cpu,
+            _seq_lens_cpu=new_seq_lens_cpu,
+            _num_computed_tokens_cpu=common_attn_metadata._num_computed_tokens_cpu,
+            seq_lens_cpu_upper_bound=new_seq_lens_cpu,
+            num_reqs=common_attn_metadata.num_reqs,
+            num_actual_tokens=total_num_tokens,
+            max_query_len=new_query_len_per_req.max().item(),
+            max_seq_len=new_seq_lens_cpu.max().item(),
+            block_table_tensor=common_attn_metadata.block_table_tensor,
+            slot_mapping=common_attn_metadata.slot_mapping[token_indices],
+            causal=True,
+            dcp_local_seq_lens=common_attn_metadata.dcp_local_seq_lens,
+        )
+
+        return spec_common_attn_metadata, token_indices
+
+    def get_model_name(self, model: nn.Module) -> str:
+        if hasattr(model, "module"):  # multi-GPU
+            model = model.module
+        return model.__class__.__name__
+
+    def _create_draft_vllm_config(self) -> VllmConfig:
+        """Return a VllmConfig with kernel-level overrides for the proposer.
+        Subclasses may override to apply additional config changes.
+        """
+        spec_cfg = self.speculative_config
+        base = self.vllm_config
+
+        if spec_cfg.moe_backend is not None:
+            base = replace(
+                base,
+                kernel_config=replace(
+                    base.kernel_config,
+                    moe_backend=spec_cfg.moe_backend,
+                ),
+            )
+
+        # Note (matt): Never inherit the attention backend from base, because there are
+        # many opportunities for incompatibility, so we always independently autoselect
+        # unless explicitly specified in the speculative config.
+        base = replace(
+            base,
+            attention_config=replace(
+                base.attention_config,
+                backend=spec_cfg.attention_backend,
+            ),
+        )
+
+        return base
+
+    def _get_model(self) -> nn.Module:
+        """
+        Default method to call get_model(). Can be overridden by subclasses which
+        need to customize model loading.
+        """
+        from vllm.compilation.backends import set_model_tag
+
+        draft_vllm_config = self._create_draft_vllm_config()
+        with set_model_tag("eagle_head"):
+            model = get_model(
+                vllm_config=draft_vllm_config,
+                model_config=self.speculative_config.draft_model_config,
+                load_config=self.speculative_config.draft_load_config,
+            )
+        return model
+
+    def load_model(self, target_model: nn.Module) -> None:
+        target_attn_layer_names = set(
+            get_layers_from_vllm_config(
+                self.vllm_config,
+                AttentionLayerBase,  # type: ignore[type-abstract]
+            ).keys()
+        )
+
+        self.model = self._get_model()
+
+        # Find draft layers (attention layers added by draft model)
+        all_attn_layers = get_layers_from_vllm_config(
+            self.vllm_config,
+            AttentionLayerBase,  # type: ignore[type-abstract]
+        )
+        # Filter to only layers that have KV cache specs.
+        self._draft_attn_layer_names = {
+            name
+            for name in (set(all_attn_layers.keys()) - target_attn_layer_names)
+            if all_attn_layers[name].get_kv_cache_spec(self.vllm_config) is not None
+        }
+
+        if self.supports_mm_inputs:
+            # Even if the target model is multimodal, we can also use
+            # text-only draft models
+            try:
+                dummy_input_ids = torch.tensor([[1]], device=self.input_ids.device)
+                self.model.embed_input_ids(dummy_input_ids, multimodal_embeddings=None)
+            except (NotImplementedError, AttributeError, TypeError):
+                logger.warning(
+                    "Draft model does not support multimodal inputs, "
+                    "falling back to text-only mode"
+                )
+                self.supports_mm_inputs = False
+
+        if supports_multimodal(target_model):
+            # handle multimodality
+            assert hasattr(target_model, "config")
+            if self.get_model_name(target_model) in [
+                "Cohere2VisionForConditionalGeneration",
+                "Exaone4_5_ForConditionalGeneration",
+                "GlmOcrForConditionalGeneration",
+                "HunYuanVLForConditionalGeneration",
+                "InternS2PreviewForConditionalGeneration",
+                "MiMoV2OmniForCausalLM",
+                "Qwen2_5_VLForConditionalGeneration",
+                "Qwen3_5ForConditionalGeneration",
+                "Qwen3_5MoeForConditionalGeneration",
+                "Qwen3VLForConditionalGeneration",
+                "Qwen3VLMoeForConditionalGeneration",
+                "Gemma4ForConditionalGeneration",
+            ]:
+                self.model.config.image_token_index = target_model.config.image_token_id
+            elif self.get_model_name(target_model) == "PixtralForConditionalGeneration":
+                self.model.config.image_token_index = (
+                    target_model.config.vision_config.image_token_id
+                )
+            elif self.get_model_name(target_model) == "KimiK25ForConditionalGeneration":
+                self.model.config.image_token_index = (
+                    target_model.config.media_placeholder_token_id
+                )
+            else:
+                self.model.config.image_token_index = (
+                    target_model.config.image_token_index
+                )
+            target_language_model = cast(
+                SupportsMultiModal, target_model
+            ).get_language_model()
+        else:
+            target_language_model = target_model
+
+        self._maybe_share_embeddings(target_language_model)
+        self._maybe_share_lm_head(target_language_model)
+
+        if (
+            self.parallel_drafting
+            and self.pass_hidden_states_to_model
+            and self.parallel_drafting_hidden_state_tensor is not None
+        ):
+            flat_mask = self.model.mask_hidden.view(-1)
+            if self.eagle3_use_aux_hidden_state:
+                # EAGLE3: mask_hidden stores all aux hidden states,
+                # project through combine_hidden_states
+                self.parallel_drafting_hidden_state_tensor.copy_(
+                    self.model.combine_hidden_states(flat_mask)
+                )
+            else:
+                self.parallel_drafting_hidden_state_tensor.copy_(flat_mask)
+
+    def _maybe_share_embeddings(self, target_language_model: nn.Module) -> None:
+        """
+        Some draft models may not have their own embedding layers, and some may
+        have a duplicate copy of the target model's embedding layers. In these cases,
+        we share the target model's embedding layers with the draft model to save
+        memory.
+        """
+        if get_pp_group().world_size == 1:
+            inner_model = getattr(target_language_model, "model", None)
+            if inner_model is None:
+                raise AttributeError("Target model does not have 'model' attribute")
+            if hasattr(inner_model, "embed_tokens"):
+                target_embed_tokens = inner_model.embed_tokens
+            elif hasattr(inner_model, "embedding"):
+                target_embed_tokens = inner_model.embedding
+            else:
+                raise AttributeError(
+                    "Target model does not have 'embed_tokens' or 'embedding' attribute"
+                )
+
+            share_embeddings = False
+            if hasattr(self.model, "has_own_embed_tokens"):
+                # EAGLE model
+                if not self.model.has_own_embed_tokens:
+                    share_embeddings = True
+                    logger.info(
+                        "Detected EAGLE model without its own embed_tokens in the"
+                        " checkpoint. Sharing target model embedding weights with the"
+                        " draft model."
+                    )
+                elif (
+                    isinstance(target_embed_tokens.weight, torch.Tensor)
+                    and isinstance(self.model.model.embed_tokens.weight, torch.Tensor)
+                    # TODO: Offload to CPU for comparison to avoid extra GPU memory
+                    # usage in CI testing environments with limited GPU memory
+                    and torch.equal(
+                        target_embed_tokens.weight.cpu(),
+                        self.model.model.embed_tokens.weight.cpu(),
+                    )
+                ):
+                    share_embeddings = True
+                    logger.info(
+                        "Detected EAGLE model with embed_tokens identical to the target"
+                        " model. Sharing target model embedding weights with the draft"
+                        " model."
+                    )
+                else:
+                    logger.info(
+                        "Detected EAGLE model with distinct embed_tokens weights. "
+                        "Keeping separate embedding weights from the target model."
+                    )
+            else:
+                # MTP model
+                share_embeddings = True
+                logger.info(
+                    "Detected MTP model. "
+                    "Sharing target model embedding weights with the draft model."
+                )
+
+            if share_embeddings:
+                if hasattr(self.model.model, "embed_tokens"):
+                    del self.model.model.embed_tokens
+                self.model.model.embed_tokens = target_embed_tokens
+        else:
+            logger.info(
+                "The draft model's vocab embedding will be loaded separately"
+                " from the target model."
+            )
+
+    def _maybe_share_lm_head(self, target_language_model: nn.Module) -> None:
+        """
+        Some draft models may not have their own LM head, and some may have a
+        duplicate copy of the target model's LM head. In these cases, we share
+        the target model's LM head with the draft model to save memory.
+        """
+        share_lm_head = False
+        if hasattr(self.model, "has_own_lm_head"):
+            # EAGLE model
+            if not self.model.has_own_lm_head:
+                share_lm_head = True
+                logger.info(
+                    "Detected EAGLE model without its own lm_head in the checkpoint. "
+                    "Sharing target model lm_head weights with the draft model."
+                )
+            elif (
+                hasattr(target_language_model, "lm_head")
+                and hasattr(target_language_model.lm_head, "weight")
+                and hasattr(self.model.lm_head, "weight")
+                and isinstance(target_language_model.lm_head.weight, torch.Tensor)
+                and isinstance(self.model.lm_head.weight, torch.Tensor)
+                # TODO: Offload to CPU for comparison to avoid extra GPU memory
+                # usage in CI testing environments with limited GPU memory
+                and torch.equal(
+                    target_language_model.lm_head.weight.cpu(),
+                    self.model.lm_head.weight.cpu(),
+                )
+            ):
+                share_lm_head = True
+                logger.info(
+                    "Detected EAGLE model with lm_head identical to the target model. "
+                    "Sharing target model lm_head weights with the draft model."
+                )
+            else:
+                logger.info(
+                    "Detected EAGLE model with distinct lm_head weights. "
+                    "Keeping separate lm_head weights from the target model."
+                )
+        else:
+            # MTP model
+            share_lm_head = True
+            logger.info(
+                "Detected MTP model. "
+                "Sharing target model lm_head weights with the draft model."
+            )
+
+        if share_lm_head and hasattr(target_language_model, "lm_head"):
+            if hasattr(self.model, "lm_head"):
+                del self.model.lm_head
+            self.model.lm_head = target_language_model.lm_head
+
+            # MTP models call compute_logits via shared_head.head (a
+            # ParallelLMHead inside each MTP layer), not self.model.lm_head.
+            # If the checkpoint omits a copy of the lm_head weights at the
+            # MTP layer path, shared_head.head stays uninitialised and
+            # produces NaN logits. Always share it explicitly.
+            inner = getattr(self.model, "model", None)
+            layers = getattr(inner, "layers", None) if inner else None
+            if layers is not None:
+                items = layers.values() if isinstance(layers, nn.ModuleDict) else layers
+                for layer in items:
+                    sh = getattr(layer, "shared_head", None)
+                    if sh is not None and hasattr(sh, "head"):
+                        del sh.head
+                        sh.head = target_language_model.lm_head
+                        logger.info(
+                            "Shared target model lm_head with MTP shared_head.head."
+                        )
+
+        if hasattr(target_language_model.model, "topk_indices_buffer"):
+            if hasattr(self.model.model, "topk_indices_buffer"):
+                del self.model.model.topk_indices_buffer
+            self.model.model.topk_indices_buffer = (
+                target_language_model.model.topk_indices_buffer
+            )
+            logger.info(
+                "Detected MTP model with topk_indices_buffer. "
+                "Sharing target model topk_indices_buffer with the draft model."
+            )
+
+        if self.use_local_argmax_reduction:
+            if not hasattr(self.model, "get_top_tokens"):
+                raise ValueError(
+                    "use_local_argmax_reduction is enabled but draft model "
+                    f"{self.model.__class__.__name__} does not implement "
+                    "get_top_tokens()."
+                )
+            # Warn if draft model has vocab remapping, which forces fallback
+            # to the full-logits path (negating the optimization).
+            if (
+                hasattr(self.model, "draft_id_to_target_id")
+                and self.model.draft_id_to_target_id is not None
+            ):
+                logger.warning(
+                    "use_local_argmax_reduction is enabled but draft model "
+                    "uses draft_id_to_target_id vocab remapping. The "
+                    "optimization will be bypassed (falling back to full "
+                    "logits gather + argmax)."
+                )
+            else:
+                logger.info(
+                    "Using local argmax reduction for draft token generation "
+                    "(communication: O(2*tp_size) vs O(vocab_size))."
+                )
+
+    @torch.inference_mode()
+    def dummy_run(
+        self,
+        num_tokens: int,
+        use_cudagraphs: bool = True,
+        is_graph_capturing: bool = False,
+        slot_mappings: dict[str, torch.Tensor] | None = None,
+    ) -> None:
+        # FIXME: when using tree-based specdec, adjust number of forward-passes
+        # according to the depth of the tree.
+        only_one_forward_pass = is_graph_capturing or self.parallel_drafting
+        for fwd_idx in range(
+            1 if only_one_forward_pass else self.num_speculative_tokens
+        ):
+            if fwd_idx <= 1:
+                cudagraph_runtime_mode, num_input_tokens, num_tokens_across_dp = (
+                    self._determine_batch_execution_and_padding(
+                        num_tokens, use_cudagraphs=use_cudagraphs
+                    )
+                )
+
+            # Make sure to use EAGLE's own buffer during cudagraph capture.
+            if (
+                self._draft_attn_layer_names
+                and slot_mappings is not None
+                and next(iter(self._draft_attn_layer_names)) in slot_mappings
+            ):
+                slot_mapping_dict = self._get_slot_mapping(num_input_tokens)
+            else:
+                slot_mapping_dict = slot_mappings or {}
+
+            with set_forward_context(
+                None,
+                self.vllm_config,
+                num_tokens=num_input_tokens,
+                num_tokens_across_dp=num_tokens_across_dp,
+                cudagraph_runtime_mode=cudagraph_runtime_mode,
+                slot_mapping=slot_mapping_dict,
+            ):
+                if self.supports_mm_inputs:
+                    input_ids = None
+                    inputs_embeds = self.inputs_embeds[:num_input_tokens]
+                else:
+                    input_ids = self.input_ids[:num_input_tokens]
+                    inputs_embeds = None
+
+                kwargs = dict(
+                    input_ids=input_ids,
+                    positions=self._get_positions(num_input_tokens),
+                    inputs_embeds=inputs_embeds,
+                )
+                if self.pass_hidden_states_to_model:
+                    kwargs["hidden_states"] = self.hidden_states[:num_input_tokens]
+                self.model(**kwargs)
+
+    def _get_eagle3_use_aux_hidden_state_from_config(self) -> bool:
+        """
+        Some eagle3 heads (e.g., nvidia/gpt-oss-120b-Eagle3-v2) do not use auxiliary
+        hidden states and directly uses the last layer output just like eagle1.
+        They might indicate this by setting "use_aux_hidden_state" to False
+        inside the "eagle_config" dict of their hf_config.
+        """
+        if self.method != "eagle3":
+            return False
+        # Assume that eagle3 heads use aux hidden states by default
+        use_aux_hidden_state = True
+        eagle_config = getattr(self.draft_model_config.hf_config, "eagle_config", None)
+        if eagle_config is not None:
+            use_aux_hidden_state = eagle_config.get("use_aux_hidden_state", True)
+        return use_aux_hidden_state
+
+    def validate_same_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None:
+        """
+        Validate that all drafting layers belong to the same KVCacheGroup.
+        Need this assumption to ensure all drafting layers can use the
+        same AttentionMetadata.
+        May extend to multiple AttentionMetadata in the future.
+        """
+        kv_cache_groups: dict[str, int] = {}
+        for id, kv_cache_group in enumerate(kv_cache_config.kv_cache_groups):
+            for layer_name in kv_cache_group.layer_names:
+                kv_cache_groups[layer_name] = id
+        assert (
+            len(
+                set(
+                    [
+                        kv_cache_groups[layer_name]
+                        for layer_name in self._draft_attn_layer_names
+                    ]
+                )
+            )
+            == 1
+        ), "All drafting layers should belong to the same kv cache group"
+
+    def initialize_attn_backend(
+        self,
+        kv_cache_config: KVCacheConfig,
+        kernel_block_sizes: list[int] | None = None,
+    ) -> None:
+        """
+        Initialize AttentionGroups for draft layers using kv_cache_config.
+        Called from the model runner's initialize_metadata_builders.
+        """
+        all_attn_layers = get_layers_from_vllm_config(
+            self.vllm_config,
+            AttentionLayerBase,  # type: ignore[type-abstract]
+        )
+
+        # Find which kv_cache_group the draft layers belong to
+        self.validate_same_kv_cache_group(kv_cache_config)
+        kv_cache_spec = None
+        for gid, group in enumerate(kv_cache_config.kv_cache_groups):
+            if self._draft_attn_layer_names & set(group.layer_names):
+                self.kv_cache_gid = gid
+                kv_cache_spec = group.kv_cache_spec
+                break
+
+        attention_groups: dict[tuple[str, str], AttentionGroup] = {}
+        if kv_cache_spec is not None:
+            for layer_name in self._draft_attn_layer_names:
+                attn_backend = all_attn_layers[layer_name].get_attn_backend()
+                backend_key = attn_backend.full_cls_name()
+                if backend_key not in attention_groups:
+                    layer_kv_cache_spec = kv_cache_spec
+                    if isinstance(layer_kv_cache_spec, UniformTypeKVCacheSpecs):
+                        layer_kv_cache_spec = layer_kv_cache_spec.kv_cache_specs[
+                            layer_name
+                        ]
+
+                    kernel_block_size = (
+                        kernel_block_sizes[self.kv_cache_gid]
+                        if kernel_block_sizes is not None
+                        and self.kv_cache_gid < len(kernel_block_sizes)
+                        else None
+                    )
+                    attn_group = AttentionGroup(
+                        backend=attn_backend,
+                        layer_names=[layer_name],
+                        kv_cache_spec=layer_kv_cache_spec,
+                        kv_cache_group_id=self.kv_cache_gid,
+                    )
+                    attn_group.create_metadata_builders(
+                        self.vllm_config,
+                        self.device,
+                        kernel_block_size=kernel_block_size,
+                    )
+                    attention_groups[backend_key] = attn_group
+                else:
+                    attention_groups[backend_key].layer_names.append(layer_name)
+
+        self.draft_attn_groups = list(attention_groups.values())
+        self.block_size = (
+            self.draft_attn_groups[0].get_metadata_builder().kv_cache_spec.block_size
+        )
+        logger.debug("Using block size %d for drafting layers", self.block_size)
+
+    def _determine_batch_execution_and_padding(
+        self,
+        num_tokens: int,
+        use_cudagraphs: bool = True,
+    ) -> tuple[CUDAGraphMode, int, torch.Tensor | None]:
+        cudagraph_mode, batch_desc = self.cudagraph_dispatcher.dispatch(
+            num_tokens,
+            valid_modes=({CUDAGraphMode.NONE} if not use_cudagraphs else None),
+        )
+        num_tokens_padded = batch_desc.num_tokens
+
+        # Extra coordination when running data-parallel since we need to
+        # coordinate across ranks
+        # TODO(Flechman): support DBO ubatching
+        should_ubatch, num_tokens_across_dp = False, None
+        if self.vllm_config.parallel_config.data_parallel_size > 1:
+            should_ubatch, num_tokens_across_dp, synced_cudagraph_mode = (
+                coordinate_batch_across_dp(
+                    num_tokens_unpadded=num_tokens,
+                    parallel_config=self.vllm_config.parallel_config,
+                    allow_microbatching=False,
+                    num_tokens_padded=num_tokens_padded,
+                    cudagraph_mode=cudagraph_mode.value,
+                )
+            )
+            assert not should_ubatch, "DBO ubatching not implemented for EAGLE"
+
+            # Extract DP-synced values
+            if num_tokens_across_dp is not None:
+                dp_rank = self.dp_rank
+                num_tokens_padded = int(num_tokens_across_dp[dp_rank].item())
+                # Re-dispatch with DP padding so we have the correct
+                # batch_descriptor
+                cudagraph_mode, batch_desc = self.cudagraph_dispatcher.dispatch(
+                    num_tokens_padded,
+                    valid_modes={CUDAGraphMode(synced_cudagraph_mode)},
+                )
+                # Assert to make sure the agreed upon token count is correct
+                # otherwise num_tokens_across_dp will no-longer be valid
+                assert batch_desc.num_tokens == num_tokens_padded
+                num_tokens_across_dp[dp_rank] = num_tokens_padded
+
+        return cudagraph_mode, num_tokens_padded, num_tokens_across_dp
+
+
+# NOTE(woosuk): Currently, the below code is not used and we always use argmax
+# to sample the draft tokens. We will use this after we find a way to manage
+# the draft prob tensor.
+# Refer to https://github.com/vllm-project/vllm/pull/16899 for the details.
+# FIXME(woosuk): The logic here is duplicated with the main sampling code.
+# We should refactor this to reuse the same sampling implementation.
+def compute_probs_and_sample_next_token(
+    logits: torch.Tensor,
+    sampling_metadata: SamplingMetadata,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if sampling_metadata.all_greedy:
+        # For greedy requests, draft_probs is not used in rejection sampling.
+        # Therefore, we can just return the logits.
+        probs = logits
+        next_token_ids = logits.argmax(dim=-1)
+        return next_token_ids, probs
+
+    assert sampling_metadata.temperature is not None
+
+    # Use epsilon comparison to detect greedy sampling (temperature ~ 0.0)
+    # consistent with sampler.py's _SAMPLING_EPS threshold
+    temperature = sampling_metadata.temperature
+    # Avoid division by zero if there are greedy requests.
+    if not sampling_metadata.all_random:
+        is_greedy = temperature < _SAMPLING_EPS
+        temperature = torch.where(is_greedy, 1.0, temperature)
+    logits.div_(temperature.view(-1, 1))
+    probs = logits.softmax(dim=-1, dtype=torch.float32)
+
+    # NOTE(woosuk): Currently, we ignore most of the sampling parameters in
+    # generating the draft tokens. We only use the temperature. While this
+    # could degrade the acceptance rate, it does not affect the distribution
+    # of the generated tokens after rejection sampling.
+
+    # TODO(woosuk): Consider seeds.
+    q = torch.empty_like(probs)
+    q.exponential_()
+    # NOTE(woosuk): We shouldn't use `probs.div_(q)` because the draft_probs
+    # will be used later for rejection sampling.
+    next_token_ids = probs.div(q).argmax(dim=-1).view(-1)
+    if not sampling_metadata.all_random:
+        greedy_token_ids = probs.argmax(dim=-1)
+        next_token_ids = torch.where(is_greedy, greedy_token_ids, next_token_ids)
+    return next_token_ids, probs
diff --git a/vllm/v1/spec_decode/ngram_proposer_gpu.py b/vllm/v1/spec_decode/ngram_proposer_gpu.py
index eb24a9c933e2..7759d5c32f60 100644
--- a/vllm/v1/spec_decode/ngram_proposer_gpu.py
+++ b/vllm/v1/spec_decode/ngram_proposer_gpu.py
@@ -18,6 +18,7 @@
     VllmConfig,
 )
 from vllm.forward_context import set_forward_context
+from vllm.utils.torch_utils import async_tensor_h2d
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.utils import record_function_or_nullcontext
 from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
@@ -569,8 +570,8 @@ def update_ngram_gpu_tensors_incremental(
             reorder_dst.append(curr_idx)
 
     if reorder_src:
-        src_tensor = torch.tensor(reorder_src, dtype=torch.long, device=device)
-        dst_tensor = torch.tensor(reorder_dst, dtype=torch.long, device=device)
+        src_tensor = async_tensor_h2d(reorder_src, dtype=torch.long, device=device)
+        dst_tensor = async_tensor_h2d(reorder_dst, dtype=torch.long, device=device)
 
         temp_token_ids = token_ids_gpu_tensor[src_tensor].clone()
         temp_num_tokens = num_tokens_no_spec_gpu[src_tensor].clone()
diff --git a/vllm/v1/spec_decode/utils.py b/vllm/v1/spec_decode/utils.py
index b85459c86f24..e046f0136152 100644
--- a/vllm/v1/spec_decode/utils.py
+++ b/vllm/v1/spec_decode/utils.py
@@ -11,6 +11,20 @@
 PADDING_SLOT_ID = -1
 
 
+def next_power_of_2(n: int) -> int:
+    """Return the smallest power of 2 >= n."""
+    if n <= 0:
+        return 1
+    n -= 1
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    n |= n >> 32
+    return n + 1
+
+
 @triton.jit
 def eagle_step_slot_mapping_metadata_kernel(
     positions_ptr,  # [batch_size] - current positions (1D view for M-RoPE)
@@ -102,8 +116,8 @@ def eagle_step_update_slot_mapping_and_metadata(
     batch_size = positions_1d.shape[0]
     if input_batch_size is None:
         input_batch_size = batch_size
-    n_blocks_per_req = block_table_tensor.shape[1]
 
+    n_blocks_per_req = block_table_tensor.shape[1]
     eagle_step_slot_mapping_metadata_kernel[(input_batch_size,)](
         positions_1d,
         block_table_tensor,
@@ -441,6 +455,114 @@ def copy_and_expand_eagle_inputs_kernel(
     )
 
 
+@triton.jit
+def copy_and_expand_dflash_inputs_kernel(
+    # Inputs
+    next_token_ids_ptr,  # [num_reqs]
+    target_positions_ptr,  # [num_context]
+    # Outputs
+    out_input_ids_ptr,  # [num_query_total] (output)
+    out_context_positions_ptr,  # [num_context] (output)
+    out_query_positions_ptr,  # [num_query_total] (output)
+    out_context_slot_mapping_ptr,  # [num_context] (output)
+    out_query_slot_mapping_ptr,  # [num_query_total] (output)
+    out_token_indices_ptr,  # [num_reqs * num_speculative_tokens] (output)
+    # Block table
+    block_table_ptr,  # [max_reqs, max_blocks]
+    block_table_stride,  # stride of block_table dim 0 (in elements)
+    # Metadata
+    query_start_loc_ptr,  # [num_reqs + 1]
+    num_rejected_tokens_ptr,  # [num_reqs] or null (0) when not padded
+    # Scalars
+    parallel_drafting_token_id,  # tl.int32
+    block_size,  # tl.int32
+    num_query_per_req,  # tl.int32
+    num_speculative_tokens,  # tl.int32
+    total_input_tokens,  # tl.int32
+    BLOCK_SIZE: tl.constexpr,
+    HAS_NUM_REJECTED: tl.constexpr = False,
+):
+    """
+    Fused kernel for DFlash first-pass input setup.
+
+    Per request, this kernel:
+      1. Copies context positions from target_positions to
+         out_context_positions.
+      2. Computes query positions (last_target_pos + 1 + offset) and writes
+         them to out_query_positions.
+      3. Writes input_ids for query tokens: [next_token, mask, mask, ...].
+      4. Computes slot_mapping for context and query positions into separate
+         buffers via block_table lookup.
+      5. Writes token_indices_to_sample for the mask (speculative) tokens.
+    """
+    req_idx = tl.program_id(axis=0)
+    block_idx = tl.program_id(axis=1)
+
+    # Load context token range for this request
+    ctx_start = tl.load(query_start_loc_ptr + req_idx)
+    ctx_end = tl.load(query_start_loc_ptr + req_idx + 1)
+    num_ctx = ctx_end - ctx_start
+    total_tokens = num_ctx + num_query_per_req
+
+    j = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    in_bounds = j < total_tokens
+    is_ctx = j < num_ctx
+    is_query = (~is_ctx) & in_bounds
+    query_off = j - num_ctx  # offset within query portion (0-indexed)
+
+    # --- Positions ---
+    # Context: load from target_positions
+    ctx_pos_idx = tl.minimum(ctx_start + j, total_input_tokens - 1)
+    ctx_pos = tl.load(target_positions_ptr + ctx_pos_idx, mask=is_ctx, other=0)
+
+    # Query: last_valid_pos + 1 + query_off
+    # In padded mode, ctx_end includes rejected tokens; use valid_ctx_end
+    # to find the last accepted context position.
+    if HAS_NUM_REJECTED:
+        num_rejected = tl.load(num_rejected_tokens_ptr + req_idx)
+        valid_ctx_end = ctx_end - num_rejected
+    else:
+        valid_ctx_end = ctx_end
+    last_pos = tl.load(target_positions_ptr + valid_ctx_end - 1)
+    query_pos = last_pos + 1 + query_off
+
+    positions = tl.where(is_ctx, ctx_pos, query_pos)
+
+    # Context and query positions go to separate buffers.
+    ctx_pos_out = ctx_start + j
+    tl.store(out_context_positions_ptr + ctx_pos_out, ctx_pos, mask=is_ctx)
+    query_out = req_idx * num_query_per_req + query_off
+    tl.store(out_query_positions_ptr + query_out, query_pos, mask=is_query)
+
+    # --- Slot mapping (block_table lookup for all positions) ---
+    block_num = positions // block_size
+    # # Clamp block_number to avoid OOB when position is at max
+    block_num = tl.minimum(block_num, block_table_stride - 1)
+    block_id = tl.load(
+        block_table_ptr + req_idx * block_table_stride + block_num,
+        mask=in_bounds,
+        other=0,
+    ).to(tl.int64)
+    slot = block_id * block_size + (positions % block_size)
+    tl.store(out_context_slot_mapping_ptr + ctx_pos_out, slot, mask=is_ctx)
+    tl.store(out_query_slot_mapping_ptr + query_out, slot, mask=is_query)
+
+    # --- Input IDs (query tokens only) ---
+    bonus_token = tl.load(next_token_ids_ptr + req_idx)
+    is_bonus = is_query & (query_off == 0)
+    input_id = tl.where(is_bonus, bonus_token, parallel_drafting_token_id)
+    tl.store(out_input_ids_ptr + query_out, input_id, mask=is_query)
+
+    # --- Token indices to sample (mask tokens, skip the bonus token) ---
+    is_sample = is_query & (query_off > 0)
+    sample_out_idx = req_idx * num_speculative_tokens + (query_off - 1)
+    tl.store(
+        out_token_indices_ptr + sample_out_idx,
+        query_out,
+        mask=is_sample,
+    )
+
+
 @torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
 def update_num_computed_tokens_for_batch_change(
     num_computed_tokens: torch.Tensor,
@@ -472,3 +594,9 @@ def update_num_computed_tokens_for_batch_change(
     num_accepted_tokens.copy_(
         torch.where(participating, valid_counts, num_accepted_tokens)
     )
+
+
+def unconditional_to_conditional_rates(rates: list[float]) -> list[float]:
+    """Convert per-position unconditional rates to per-position conditional
+    rates for the early-terminating rejection loop (c_i = p_i / p_{i-1})."""
+    return [p / q if q > 0.0 else 0.0 for p, q in zip(rates, [1.0, *rates[:-1]])]
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
index 213b49f28d91..6a4fcbb629ff 100644
--- a/vllm/v1/structured_output/__init__.py
+++ b/vllm/v1/structured_output/__init__.py
@@ -15,6 +15,7 @@
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
+    StructuredOutputOptions,
 )
 from vllm.v1.structured_output.backend_xgrammar import XgrammarBackend
 
@@ -37,7 +38,10 @@ class StructuredOutputManager:
 
     def __init__(self, vllm_config: VllmConfig):
         self.backend: StructuredOutputBackend | None = None
-        self.reasoner: ReasoningParser | None = None
+        # We only store the class of the reasoner in the manager.
+        # The parser instance is request-scoped because some reasoning parsers
+        # depend on per-request chat-template kwargs.
+        self.reasoner_cls: type[ReasoningParser] | None = None
         self.vllm_config = vllm_config
 
         # When in external_launcher mode, async grammar compilation causes deadlocks
@@ -85,15 +89,29 @@ def __init__(self, vllm_config: VllmConfig):
                 self.vllm_config.structured_outputs_config.reasoning_parser
             )
             if reasoning_parser:
-                reasoner_cls = ReasoningParserManager.get_reasoning_parser(
+                self.reasoner_cls = ReasoningParserManager.get_reasoning_parser(
                     reasoning_parser
                 )
-                self.reasoner = reasoner_cls(tokenizer=self.tokenizer)
 
         self.enable_in_reasoning = (
             self.vllm_config.structured_outputs_config.enable_in_reasoning
         )
 
+    def _get_reasoner(self, request: "Request") -> "ReasoningParser | None":
+        structured_req = request.structured_output_request
+        if structured_req is None or self.reasoner_cls is None:
+            return None
+
+        if structured_req.reasoner is None:
+            # Lazily build the request-local parser so the structured-output
+            # gate observes the same template kwargs used by the frontend.
+            parser_kwargs = structured_req.reasoning_parser_kwargs or {}
+            structured_req.reasoner = self.reasoner_cls(
+                tokenizer=self.tokenizer,
+                **parser_kwargs,
+            )
+        return structured_req.reasoner
+
     def grammar_init(self, request: "Request") -> None:
         if request.structured_output_request is None:
             return
@@ -285,7 +303,8 @@ def should_fill_bitmask(self, request: "Request") -> bool:
         # NOTE (Hanchen) if enable_in_reasoning is True, it means that
         # the model needs to be constrained in reasoning. So we should always
         # enable the bitmask filling.
-        if self.reasoner is not None:
+        reasoner = self._get_reasoner(request)
+        if reasoner is not None:
             if self.enable_in_reasoning:
                 return True
             assert request.structured_output_request is not None
@@ -295,7 +314,7 @@ def should_fill_bitmask(self, request: "Request") -> bool:
                 # After unifying the `openai_gptoss` and non-`openai_gptoss` styles,
                 # it can be removed.
                 request.structured_output_request.reasoning_ended = (
-                    self.reasoner.is_reasoning_end(request.prompt_token_ids or [])
+                    reasoner.is_reasoning_end(request.prompt_token_ids or [])
                 )
             return request.structured_output_request.reasoning_ended
         return True
@@ -311,7 +330,8 @@ def should_advance(self, request: "Request") -> bool:
             assert request.structured_output_request.grammar is not None
         # by default, we should always advance
         # for cases that don't use thinking mode.
-        if self.reasoner is None:
+        reasoner = self._get_reasoner(request)
+        if reasoner is None:
             return True
 
         # if the model needs structured in reasoning, we should advance
@@ -328,13 +348,25 @@ def should_advance(self, request: "Request") -> bool:
         start = (
             delta_from if delta_from >= 0 else max(len(all_token_ids) + delta_from, 0)
         )
-        if self.reasoner.is_reasoning_end_streaming(
+        if reasoner.is_reasoning_end_streaming(
             all_token_ids, itertools.islice(all_token_ids, start, None)
         ):
-            # Reasoning just ended, so we shouldn't advance til
-            # next pass
             structured_req.reasoning_ended = True
 
+            # Reasoning just ended this step. Defer FSM advance until the next
+            # pass (see reasoning_ended check above) for JSON/regex/choice/grammar:
+            # advancing on the closing boundary token can accept tokens that still
+            # belong to the reasoning stream. Structural tags are the only safe
+            # same-step exception: they model phased output (e.g. thinking tag ->
+            # answer tag), and speculative decoding must run grammar.validate_tokens
+            # on draft tokens produced immediately after that transition.
+            if (
+                self.vllm_config.speculative_config is not None
+                and structured_req.structured_output_key[0]
+                == StructuredOutputOptions.STRUCTURAL_TAG
+            ):
+                return True
+
         return False
 
     def clear_backend(self) -> None:
diff --git a/vllm/v1/structured_output/backend_guidance.py b/vllm/v1/structured_output/backend_guidance.py
index 6063a2dc2a6d..31178e9f2462 100644
--- a/vllm/v1/structured_output/backend_guidance.py
+++ b/vllm/v1/structured_output/backend_guidance.py
@@ -12,6 +12,7 @@
 from vllm.logger import init_logger
 from vllm.sampling_params import SamplingParams
 from vllm.utils.import_utils import LazyLoader
+from vllm.utils.mistral import is_mistral_tokenizer
 from vllm.v1.structured_output.backend_types import (
     StructuredOutputBackend,
     StructuredOutputGrammar,
@@ -92,9 +93,12 @@ def __post_init__(self):
             self.vllm_config.structured_outputs_config.disable_additional_properties
         )
 
-        self.ll_tokenizer = llguidance_hf.from_tokenizer(
-            self.tokenizer, max(self.vocab_size, len(self.tokenizer))
-        )
+        if is_mistral_tokenizer(self.tokenizer):
+            self.ll_tokenizer = self.tokenizer.llg_tokenizer
+        else:
+            self.ll_tokenizer = llguidance_hf.from_tokenizer(
+                self.tokenizer, max(self.vocab_size, len(self.tokenizer))
+            )
 
     def compile_grammar(
         self, request_type: StructuredOutputOptions, grammar_spec: str
diff --git a/vllm/v1/structured_output/request.py b/vllm/v1/structured_output/request.py
index b921a71b3cf1..dfa8c7efcae4 100644
--- a/vllm/v1/structured_output/request.py
+++ b/vllm/v1/structured_output/request.py
@@ -5,7 +5,7 @@
 import json
 from concurrent.futures import Future
 from concurrent.futures._base import TimeoutError
-from typing import cast
+from typing import TYPE_CHECKING, Any, cast
 
 from vllm.sampling_params import SamplingParams, StructuredOutputsParams
 from vllm.v1.structured_output.backend_types import (
@@ -14,12 +14,19 @@
     StructuredOutputOptions,
 )
 
+if TYPE_CHECKING:
+    from vllm.reasoning import ReasoningParser
+
 
 @dataclasses.dataclass
 class StructuredOutputRequest:
     params: StructuredOutputsParams
     _grammar: Future[StructuredOutputGrammar] | StructuredOutputGrammar | None = None
     reasoning_ended: bool | None = None
+    reasoning_parser_kwargs: dict[str, Any] | None = None
+    # Cached per request; do not share reasoning parsers across requests because
+    # their behavior can depend on reasoning_parser_kwargs.
+    reasoner: "ReasoningParser | None" = None
 
     @staticmethod
     def from_sampling_params(
diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py
index 9fc1accb2e2c..f149ae845e31 100644
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -12,7 +12,6 @@
 import regex as re
 import torch
 from cachetools import LRUCache
-from diskcache import Cache
 
 import vllm.envs as envs
 from vllm.logger import init_logger
@@ -181,6 +180,8 @@ def get_outlines_cache():
 
     cache_dir = get_outlines_cache_path()
     if envs.VLLM_V1_USE_OUTLINES_CACHE:
+        from diskcache import Cache
+
         logger.warning(
             "Enabling outlines cache. This is an unbounded on-disk "
             "cache. It may consume a lot of disk space and should "
diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index 1aa36b1a5f81..afa621ae54d4 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import argparse
 import contextlib
+import json
 import multiprocessing
+import threading
 import time
 import weakref
 from collections.abc import Callable, Sequence
@@ -232,6 +234,146 @@ def shutdown(self, timeout: float | None = None) -> None:
             shutdown(self.processes, timeout=timeout)
 
 
+class RustFrontendProcessManager:
+    """Manages a single Rust frontend subprocess.
+
+    Launches the Rust vllm-rs binary in 'frontend' mode, passing the
+    listening socket fd and ZMQ transport addresses. Provides the same
+    interface as APIServerProcessManager for process monitoring.
+    """
+
+    def __init__(
+        self,
+        binary_path: str,
+        sock: Any,
+        args: argparse.Namespace,
+        input_address: str,
+        output_address: str,
+        engine_count: int,
+        stats_update_address: str | None = None,
+    ):
+        import os
+        import subprocess
+
+        fd = sock.fileno()
+        os.set_inheritable(fd, True)
+
+        cmd = [
+            binary_path,
+            "frontend",
+            "--listen-fd",
+            str(fd),
+            "--input-address",
+            input_address,
+            "--output-address",
+            output_address,
+            "--engine-count",
+            str(engine_count),
+        ]
+        if stats_update_address is not None:
+            cmd.extend(["--coordinator-address", stats_update_address])
+        from vllm.entrypoints.utils import jsonify_non_default_args
+
+        args_json = json.dumps(
+            jsonify_non_default_args(args, exclude={"api_server_count"}),
+            sort_keys=True,
+        )
+        cmd.extend(["--args-json", args_json])
+
+        logger.info("Launching Rust frontend: %s", " ".join(cmd))
+        self._proc = subprocess.Popen(cmd, pass_fds=(fd,))
+
+        # Create a process wrapper with a sentinel fd for monitoring
+        self.processes: list[_SubprocessWrapper] = [
+            _SubprocessWrapper(self._proc, "RustFrontend")
+        ]
+
+        self._finalizer = weakref.finalize(self, _shutdown_subprocesses, self.processes)
+
+    def shutdown(self, timeout: float | None = None) -> None:
+        if self._finalizer.detach() is not None:
+            _shutdown_subprocesses(self.processes, timeout=timeout)
+
+
+class _SubprocessWrapper:
+    """Wraps subprocess.Popen to provide the BaseProcess-like interface
+    needed by wait_for_completion_or_failure."""
+
+    def __init__(self, proc, name: str):
+        self._proc = proc
+        self.name = name
+        self.pid = proc.pid
+        self._sentinel_conn: connection.Connection | None = None
+        self._sentinel_send: connection.Connection | None = None
+
+        # Use a Pipe-based sentinel so subprocess monitoring works uniformly
+        # across platforms with multiprocessing.connection.wait().
+        recv, send = connection.Pipe(duplex=False)
+        self._sentinel_conn = recv
+        self._sentinel_send = send
+
+        def monitor_subprocess() -> None:
+            try:
+                proc.wait()
+            finally:
+                with contextlib.suppress(Exception):
+                    send.close()
+
+        threading.Thread(
+            target=monitor_subprocess, daemon=True, name=f"{name}Monitor"
+        ).start()
+
+    @property
+    def sentinel(self):
+        return self._sentinel_conn
+
+    @property
+    def exitcode(self) -> int | None:
+        return self._proc.returncode if self._proc.poll() is not None else None
+
+    def is_alive(self) -> bool:
+        return self._proc.poll() is None
+
+    def terminate(self):
+        self._proc.terminate()
+
+    def join(self, timeout=None):
+        with contextlib.suppress(Exception):
+            self._proc.wait(timeout=timeout)
+
+    def __del__(self):
+        with contextlib.suppress(Exception):
+            if self._sentinel_conn is not None:
+                self._sentinel_conn.close()
+            if self._sentinel_send is not None:
+                self._sentinel_send.close()
+
+
+def _shutdown_subprocesses(
+    procs: list[_SubprocessWrapper], timeout: float | None = None
+) -> None:
+    """Shutdown subprocess wrappers (mirrors the shutdown() function)."""
+    if timeout is None:
+        timeout = 0.0
+    timeout = max(timeout, 5.0)
+
+    for proc in procs:
+        if proc.is_alive():
+            proc.terminate()
+
+    deadline = time.monotonic() + timeout
+    for proc in procs:
+        remaining = deadline - time.monotonic()
+        if remaining <= 0:
+            break
+        if proc.is_alive():
+            proc.join(remaining)
+
+    for proc in procs:
+        if proc.is_alive() and (pid := proc.pid) is not None:
+            kill_process_tree(pid)
+
+
 def run_api_server_worker_proc(
     listen_address, sock, args, client_config=None, **uvicorn_kwargs
 ) -> None:
@@ -252,7 +394,7 @@ def run_api_server_worker_proc(
 
 
 def wait_for_completion_or_failure(
-    api_server_manager: APIServerProcessManager,
+    api_server_manager: "APIServerProcessManager | RustFrontendProcessManager",
     engine_manager: Union["CoreEngineProcManager", "CoreEngineActorManager"]
     | None = None,
     coordinator: "DPCoordinator | None" = None,
@@ -269,46 +411,51 @@ def wait_for_completion_or_failure(
         coordinator: The coordinator for data parallel.
     """
 
-    from vllm.v1.engine.utils import CoreEngineActorManager, CoreEngineProcManager
-
     try:
         logger.info("Waiting for API servers to complete ...")
         # Create a mapping of sentinels to their corresponding processes
         # for efficient lookup
-        sentinel_to_proc: dict[Any, BaseProcess] = {
+        sentinel_to_proc: dict[Any, BaseProcess | _SubprocessWrapper | None] = {
             proc.sentinel: proc for proc in api_server_manager.processes
         }
 
         if coordinator:
             sentinel_to_proc[coordinator.proc.sentinel] = coordinator.proc
 
-        actor_run_refs = []
-        if isinstance(engine_manager, CoreEngineProcManager):
-            for proc in engine_manager.processes:
-                sentinel_to_proc[proc.sentinel] = proc
-        elif isinstance(engine_manager, CoreEngineActorManager):
-            actor_run_refs = engine_manager.get_run_refs()
+        if engine_manager:
+            core_shutdown_recv, core_shutdown_send = connection.Pipe(duplex=False)
+
+            def monitor_engines():
+                try:
+                    engine_manager.monitor_engine_liveness()
+                finally:
+                    core_shutdown_send.close()
+                    core_shutdown_recv.close()
+
+            # start monitor for engine liveness
+            threading.Thread(target=monitor_engines, daemon=True).start()
+            sentinel_to_proc[core_shutdown_recv] = None  # type: ignore[assignment]
 
         # Check if any process terminates
-        while sentinel_to_proc or actor_run_refs:
-            # Wait for any process to terminate
-            ready_sentinels: list[Any] = connection.wait(sentinel_to_proc, timeout=5)
+        while sentinel_to_proc:
+            # Wait for any process to terminate (or engine shutdown signal)
+            ready_sentinels: list[Any] = connection.wait(sentinel_to_proc)
 
             # Process any terminated processes
             for sentinel in ready_sentinels:
                 proc = sentinel_to_proc.pop(sentinel)
 
                 # Check if process exited with error
-                if proc.exitcode != 0:
+                if proc is not None and proc.exitcode != 0:
                     raise RuntimeError(
                         f"Process {proc.name} (PID: {proc.pid}) "
                         f"died with exit code {proc.exitcode}"
                     )
-
-            if actor_run_refs:
-                import ray
-
-                _, actor_run_refs = ray.wait(actor_run_refs, timeout=5)
+                if engine_manager and engine_manager.failed_proc_name is not None:
+                    raise RuntimeError(
+                        f"Engine core process {engine_manager.failed_proc_name} "
+                        "died unexpectedly."
+                    )
 
     except KeyboardInterrupt:
         logger.info("Received KeyboardInterrupt, shutting down API servers...")
@@ -327,10 +474,9 @@ def shutdown(procs: list[BaseProcess], timeout: float | None = None) -> None:
         timeout: Maximum time in seconds to wait for graceful shutdown
     """
     if timeout is None:
-        timeout = 0.0
-
-    # Allow at least 5 seconds for remaining procs to terminate.
-    timeout = max(timeout, 5.0)
+        # Keep a small grace period for best-effort cleanup paths that do not
+        # have a user-configured shutdown timeout.
+        timeout = 5.0
 
     # Shutdown the process.
     for proc in procs:
diff --git a/vllm/v1/worker/block_table.py b/vllm/v1/worker/block_table.py
index f46e8a8ed63c..87a2aac9d4ca 100644
--- a/vllm/v1/worker/block_table.py
+++ b/vllm/v1/worker/block_table.py
@@ -257,6 +257,13 @@ def __init__(
                 f"must match block_sizes length ({len(block_sizes)})"
             )
 
+        # Align to a multiple of (128 / block_size) as required
+        # by some attention backends such as TRTLLM (#39324)
+        max_num_blocks = [
+            cdiv(n, 128 // bs) * (128 // bs) if bs <= 128 else n
+            for n, bs in zip(max_num_blocks, block_sizes)
+        ]
+
         self.block_tables = [
             BlockTable(
                 block_size,
diff --git a/vllm/v1/worker/cp_utils.py b/vllm/v1/worker/cp_utils.py
index 2c2e0b5cdbe2..05cca52fc0db 100644
--- a/vllm/v1/worker/cp_utils.py
+++ b/vllm/v1/worker/cp_utils.py
@@ -29,10 +29,11 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
                 )
             if dcp_size > 1:
                 assert layer_impl.need_to_return_lse_for_decode, (
-                    "DCP requires attention impls to return"
-                    " the softmax lse for decode, but the impl "
-                    f"{layer_impl.__class__.__name__} "
-                    "does not return the softmax lse for decode."
+                    "Decode Context Parallelism (DCP) requires attention "
+                    "implementations to return the softmax LSE during decode, "
+                    f"but {layer_impl.__class__.__name__} does not. "
+                    "Try a different backend by setting "
+                    "--attention-backend or disable DCP."
                 )
 
             if pcp_size > 1:
diff --git a/vllm/v1/worker/cpu/__init__.py b/vllm/v1/worker/cpu/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/vllm/v1/worker/cpu/buffer_utils.py b/vllm/v1/worker/cpu/buffer_utils.py
new file mode 100644
index 000000000000..8ee257a28f40
--- /dev/null
+++ b/vllm/v1/worker/cpu/buffer_utils.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Sequence
+
+import torch
+
+from vllm.utils.platform_utils import is_uva_available
+
+
+class UvaBuffer:
+    def __init__(self, size: int | Sequence[int], dtype: torch.dtype):
+        if not is_uva_available():
+            raise RuntimeError("UVA is not available")
+        self.cpu = torch.zeros(size, dtype=dtype, device="cpu")
+        self.np = self.cpu.numpy()
+        self.uva = self.cpu
diff --git a/vllm/v1/worker/cpu/model_runner.py b/vllm/v1/worker/cpu/model_runner.py
new file mode 100644
index 000000000000..8aa0726a7f03
--- /dev/null
+++ b/vllm/v1/worker/cpu/model_runner.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.logger import init_logger
+from vllm.v1.worker.gpu.model_runner import GPUModelRunner
+
+logger = init_logger(__name__)
+
+
+class CPUModelRunner(GPUModelRunner):
+    # TBD: Whether need to move this to Worker?
+    def warming_up_model(self) -> None:
+        logger.info("Warming up model for the compilation...")
+        # Only generate graph for the generic shape
+        self.profile_run()
+        logger.info("Warming up done.")
diff --git a/vllm/v1/worker/cpu/shm.py b/vllm/v1/worker/cpu/shm.py
new file mode 100644
index 000000000000..92aa1b5b95f3
--- /dev/null
+++ b/vllm/v1/worker/cpu/shm.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# isort: skip_file
+# ruff: noqa: E402
+# mypy: disable-error-code="misc, assignment"
+
+from typing import Any
+
+# Patch torch APIs
+import torch
+
+
+def noop(*args: Any, **kwargs: Any) -> None:
+    pass
+
+
+class _EventPlaceholder:
+    def __init__(self, *args, **kwargs) -> None:
+        self.record = noop
+        self.synchronize = noop
+
+
+class _StreamPlaceholder:
+    def __init__(self, *args, **kwargs) -> None:
+        self.wait_stream = noop
+
+    def __enter__(self, *args, **kwargs):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+
+torch.Event = _EventPlaceholder
+torch.cuda.Event = _EventPlaceholder
+torch.cuda.Stream = _StreamPlaceholder
+torch.cuda.set_stream = noop
+torch.cuda.current_stream = lambda *args, **kwargs: _StreamPlaceholder()
+torch.accelerator.synchronize = noop
+torch.accelerator.empty_cache = noop
+
+# Patch vLLM torch utils
+import vllm.utils.torch_utils as torch_utils
+
+
+def async_tensor_h2d(
+    data: list,
+    dtype: torch.dtype,
+    device: str | torch.device,
+    pin_memory: bool = False,
+) -> torch.Tensor:
+    return torch.tensor(data, dtype=dtype, device="cpu")
+
+
+torch_utils.async_tensor_h2d = async_tensor_h2d
+
+# Patch model runner APIs
+import vllm.v1.worker.gpu.buffer_utils as gpu_buffer_utils
+import vllm.v1.worker.cpu.buffer_utils as cpu_buffer_utils
+
+gpu_buffer_utils.UvaBuffer = cpu_buffer_utils.UvaBuffer
diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py
index d66aac90a357..6afffa424d42 100644
--- a/vllm/v1/worker/cpu_model_runner.py
+++ b/vllm/v1/worker/cpu_model_runner.py
@@ -11,6 +11,8 @@
 from vllm.logger import init_logger
 from vllm.model_executor.model_loader import get_model
 from vllm.tracing import instrument
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.kv_cache_interface import KVCacheConfig
 from vllm.v1.utils import CpuGpuBuffer
 from vllm.v1.worker.gpu_model_runner import GPUModelRunner
 
@@ -19,11 +21,14 @@
 
 class CPUModelRunner(GPUModelRunner):
     def __init__(self, vllm_config: VllmConfig, device: torch.device):
+        # avoid calling accelerator APIs for methods inherited from super class
+        _set_torch_accelerator_to_noop()
+
         with _torch_cuda_wrapper():
             super().__init__(vllm_config, device)
 
         assert device == torch.device("cpu")
-        assert self.speculative_config is None, "spec decode is not supported."
+        # Note: speculative decoding is now supported on CPU with C++ native impls
 
         self.use_cuda_graph = False
         self.cascade_attn_enabled = False
@@ -61,6 +66,34 @@ def _postprocess_triton(self) -> None:
             cpu_tl.compute_slot_mapping_kernel
         )
 
+        # Speculative decoding fallbacks
+        import vllm.v1.sample.rejection_sampler
+        import vllm.v1.spec_decode.llm_base_proposer
+        import vllm.v1.spec_decode.utils
+
+        vllm.v1.spec_decode.llm_base_proposer.eagle_prepare_inputs_padded_kernel = (
+            cpu_tl.eagle_prepare_inputs_padded_kernel
+        )
+        vllm.v1.spec_decode.llm_base_proposer.eagle_prepare_next_token_padded_kernel = (
+            cpu_tl.eagle_prepare_next_token_padded_kernel
+        )
+        vllm.v1.spec_decode.llm_base_proposer.copy_and_expand_eagle_inputs_kernel = (
+            cpu_tl.copy_and_expand_eagle_inputs_kernel
+        )
+        vllm.v1.spec_decode.utils.eagle_step_slot_mapping_metadata_kernel = (
+            cpu_tl.eagle_step_slot_mapping_metadata_kernel
+        )
+        vllm.v1.sample.rejection_sampler.rejection_greedy_sample_kernel = (
+            cpu_tl.rejection_greedy_sample_kernel
+        )
+        vllm.v1.sample.rejection_sampler.rejection_random_sample_kernel = (
+            cpu_tl.rejection_random_sample_kernel
+        )
+        vllm.v1.sample.rejection_sampler.expand_kernel = cpu_tl.expand_kernel
+        vllm.v1.sample.rejection_sampler.sample_recovered_tokens_kernel = (
+            cpu_tl.sample_recovered_tokens_kernel
+        )
+
     @instrument(span_name="Loading (CPU)")
     def load_model(self, load_dummy_weights: bool = False) -> None:
         if load_dummy_weights:
@@ -74,6 +107,12 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
         if self.lora_config:
             self.model = self.load_lora_model(self.model, self.vllm_config, self.device)
 
+        if hasattr(self, "drafter"):
+            logger.info_once("Loading drafter model...")
+            self.drafter.load_model(self.model)
+
+        self._setup_eagle3_aux_hidden_state_outputs()
+
     def get_model(self) -> nn.Module:
         return self.model
 
@@ -82,15 +121,22 @@ def warming_up_model(self) -> None:
         logger.info("Warming up model for the compilation...")
         # Only generate graph for the generic shape
         with _set_global_compilation_settings(self.vllm_config):
-            self._dummy_run(
-                min(
-                    max(16, self.max_num_reqs),
-                    self.scheduler_config.max_num_batched_tokens,
-                )
-            )
-
+            self.profile_run()
         logger.info("Warming up done.")
 
+    def initialize_kv_cache(
+        self,
+        kv_cache_config: KVCacheConfig,
+        is_profiling: bool = False,
+    ) -> None:
+        super().initialize_kv_cache(kv_cache_config, is_profiling)
+
+        if self.speculative_config:
+            if self.speculative_config.use_eagle():
+                logger.info("EAGLE drafter KV cache initialized for CPU backend")
+            elif self.speculative_config.uses_draft_model():
+                logger.info("Draft model KV cache initialized for CPU backend")
+
     def _init_device_properties(self) -> None:
         pass
 
@@ -102,9 +148,70 @@ def _zero_block_ids(self, block_ids: list[int]) -> None:
         # so stale KV cache data never affects computation.
         pass
 
-    def get_dp_padding(self, num_tokens: int) -> tuple[int, torch.Tensor | None]:
-        # Note: For CPU backend, dp padding is not required for now.
-        return 0, None
+    # =========================================================================
+    # CPU-safe overrides for speculative decoding methods
+    # These methods override GPU-specific implementations that use CUDA streams
+    # =========================================================================
+
+    def _copy_draft_token_ids_to_cpu(
+        self, scheduler_output: "SchedulerOutput", zeros_only: bool = False
+    ) -> None:
+        """CPU-safe version: no async copy needed, tensors already on CPU."""
+        if self.use_async_scheduling and not (
+            scheduler_output.has_structured_output_requests
+            or self.input_batch.sampling_metadata.output_token_ids
+        ):
+            return
+        self._draft_token_req_ids = self.input_batch.req_ids.copy()
+
+        draft_token_ids: torch.Tensor = self._draft_token_ids
+        if not torch.is_tensor(draft_token_ids):
+            return
+
+        num_reqs = draft_token_ids.shape[0]
+        if self.draft_token_ids_cpu is not None:
+            if not zeros_only:
+                self.draft_token_ids_cpu[:num_reqs].copy_(draft_token_ids)
+            else:
+                self.draft_token_ids_cpu[:num_reqs] = 0
+
+    def _get_draft_token_ids_cpu(self) -> tuple[list[list[int]], list[str]]:
+        """CPU-safe version: no event synchronization needed."""
+        if isinstance(self._draft_token_ids, list):
+            return self._draft_token_ids, self.input_batch.req_ids
+        req_ids = self._draft_token_req_ids
+        if req_ids is None:
+            return [], []
+        if self.draft_token_ids_cpu is not None:
+            return self.draft_token_ids_cpu[: len(req_ids)].tolist(), req_ids
+        return [], []
+
+    def _copy_valid_sampled_token_count(
+        self, next_token_ids: torch.Tensor, valid_sampled_tokens_count: torch.Tensor
+    ) -> None:
+        """CPU-safe version: direct copy without CUDA streams."""
+        if self.valid_sampled_token_count_cpu is None:
+            return
+
+        counts = valid_sampled_tokens_count
+        counts_cpu = self.valid_sampled_token_count_cpu
+        counts_cpu[: counts.shape[0]].copy_(counts)
+        self.input_batch.prev_sampled_token_ids = next_token_ids.unsqueeze(1)
+
+    def _get_valid_sampled_token_count(self) -> list[int]:
+        """CPU-safe version: no event synchronization needed."""
+        prev_sampled_token_ids = self.input_batch.prev_sampled_token_ids
+        if prev_sampled_token_ids is None:
+            return []
+
+        counts_cpu = self.valid_sampled_token_count_cpu
+        if counts_cpu is None:
+            return []
+        return counts_cpu[: prev_sampled_token_ids.shape[0]].tolist()
+
+    def _to_list(self, sampled_token_ids: torch.Tensor) -> list[list[int]]:
+        """CPU-safe version: direct tolist() without CUDA events."""
+        return sampled_token_ids.tolist()
 
 
 @contextmanager
@@ -142,3 +249,11 @@ def _set_global_compilation_settings(config: VllmConfig):
         yield
     finally:
         torch_inductor_config.freezing = freezing_value
+
+
+def _set_torch_accelerator_to_noop() -> None:
+    def noop(*args: Any, **kwargs: Any) -> None:
+        pass
+
+    torch.accelerator.synchronize = noop
+    torch.accelerator.empty_cache = noop
diff --git a/vllm/v1/worker/cpu_worker.py b/vllm/v1/worker/cpu_worker.py
index 122cacd14cd8..9edb870a03a1 100644
--- a/vllm/v1/worker/cpu_worker.py
+++ b/vllm/v1/worker/cpu_worker.py
@@ -1,22 +1,31 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+# Must be imported firstly
+import vllm.v1.worker.cpu.shm  # noqa # isort: skip
+
+import math
 import os
-import platform
 import sys
-from collections.abc import Callable
 from typing import Any
 
+import psutil
 import torch
 
-from vllm import envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
-from vllm.platforms.cpu import CpuPlatform, LogicalCPUInfo
 from vllm.profiler.wrapper import TorchProfilerWrapper
+from vllm.utils.cpu_resource_utils import (
+    get_allowed_cpu_list,
+    get_memory_node_info,
+    get_visible_memory_node,
+)
+from vllm.utils.mem_utils import format_gib
 from vllm.utils.torch_utils import set_random_seed
 from vllm.v1.worker.cpu_model_runner import CPUModelRunner
 from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
+from vllm.v1.worker.worker_base import CompilationTimes
 
 logger = init_logger(__name__)
 
@@ -30,6 +39,49 @@ def __init__(
         distributed_init_method: str,
         is_driver_worker: bool = False,
     ):
+        # TODO: use numactl for process setup
+        # TODO: optimize for `interleaved` policy
+        # Bind memory node
+        allowed_memory_nodes = get_visible_memory_node()
+        allowed_cpu_list = get_allowed_cpu_list()
+        cpu_core = allowed_cpu_list[0]
+
+        # TODO: some CI hosts are not correctly set, change to assertion
+        # after fix
+        if cpu_core.numa_node not in allowed_memory_nodes:
+            logger.warning(
+                "Node %s is not in available memory nodes %s.",
+                cpu_core.numa_node,
+                allowed_memory_nodes,
+            )
+
+        torch.ops._C.init_cpu_memory_env([cpu_core.numa_node])
+
+        memory_status = get_memory_node_info(cpu_core.numa_node)
+        memory_fraction = vllm_config.cache_config.gpu_memory_utilization
+        self.requested_cpu_memory = math.ceil(
+            memory_status.total_memory * memory_fraction
+        )
+        available_memory = memory_status.available_memory
+
+        if (
+            vllm_config.cache_config.kv_cache_memory_bytes is None
+            and self.requested_cpu_memory > available_memory
+        ):
+            raise ValueError(
+                f"Available memory on node {cpu_core.numa_node} "
+                f"({format_gib(available_memory)}/"
+                f"{format_gib(memory_status.total_memory)} GiB) on startup "
+                f"is less than desired CPU memory utilization "
+                f"({vllm_config.cache_config.gpu_memory_utilization}, "
+                f"{format_gib(self.requested_cpu_memory)} GiB). "
+                "On the CPU backend, the `--gpu-memory-utilization` flag "
+                "controls the fraction of CPU memory reserved (despite its "
+                "name). To resolve: decrease `--gpu-memory-utilization` "
+                "(e.g. `--gpu-memory-utilization 0.5`) "
+                "or reduce CPU memory used by other processes."
+            )
+
         super().__init__(
             vllm_config,
             local_rank,
@@ -53,6 +105,8 @@ def __init__(
             )
 
     def init_device(self):
+        self.device = torch.device("cpu")
+
         # Check whether critical libraries are loaded
         def check_preloaded_libs(name: str):
             ld_preload_list = os.environ.get("LD_PRELOAD", "")
@@ -71,42 +125,13 @@ def check_preloaded_libs(name: str):
             if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
                 check_preloaded_libs("libiomp")
 
-        # Setup OpenMP threads affinity.
-        omp_cpuids = envs.VLLM_CPU_OMP_THREADS_BIND
-        # Under numa binding some cores reserved for kv transfer in nixl_connector.py
-        if omp_cpuids == "auto" and platform.system() == "Linux":
-            cpu_arch = current_platform.get_cpu_architecture()
-            if cpu_arch in (CpuArchEnum.POWERPC, CpuArchEnum.S390X):
-                # For S390X/POWERPC SMT-8/4/2
-                self.local_omp_cpuid = self._get_autobind_cpu_ids(
-                    lambda cpus: [cpu for cpu in cpus if cpu.id % 8 < 4]
-                )
-            elif cpu_arch == CpuArchEnum.X86:
-                # For x86 SMT-2, use 1 CPU per core
-                self.local_omp_cpuid = self._get_autobind_cpu_ids(
-                    lambda cpus: cpus[-1:]
-                )
-            elif cpu_arch == CpuArchEnum.ARM:
-                # For AArch64, no SMT
-                self.local_omp_cpuid = self._get_autobind_cpu_ids(lambda cpus: cpus)
-            else:
-                self.local_omp_cpuid = "nobind"
-        elif omp_cpuids == "nobind":
-            self.local_omp_cpuid = "nobind"
-        else:
-            local_dp_rank = self.parallel_config.data_parallel_rank_local
-            omp_cpuids_list = omp_cpuids.split("|")
-            if local_dp_rank is not None:
-                world_size = self.parallel_config.world_size
-                omp_cpuids_list = omp_cpuids_list[
-                    local_dp_rank * world_size : (local_dp_rank + 1) * world_size
-                ]
-            self.local_omp_cpuid = omp_cpuids_list[self.rank]
-
-        if self.local_omp_cpuid != "nobind":
-            ret = torch.ops._C.init_cpu_threads_env(self.local_omp_cpuid)
-            if ret:
-                logger.info(ret)
+        def skip_set_num_threads(x: int):
+            logger.warning(
+                "CPU backend doesn't allow to use "
+                "`torch.set_num_threads` after the thread binding, skip it."
+            )
+
+        torch.set_num_threads = skip_set_num_threads
 
         # Note: unique identifier for creating allreduce shared memory
         os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
@@ -122,9 +147,16 @@ def check_preloaded_libs(name: str):
         set_random_seed(self.model_config.seed)
 
         # Construct the model runner
-        self.model_runner: CPUModelRunner = CPUModelRunner(
-            self.vllm_config, torch.device("cpu")
-        )
+        if self.use_v2_model_runner:
+            from vllm.v1.worker.cpu.model_runner import (
+                CPUModelRunner as CPUModelRunnerV2,
+            )
+
+            self.model_runner: CPUModelRunner = CPUModelRunnerV2(  # type: ignore
+                self.vllm_config, self.device
+            )
+        else:
+            self.model_runner = CPUModelRunner(self.vllm_config, torch.device("cpu"))
 
     def sleep(self, level: int = 1) -> None:
         logger.warning("sleep mode is not supported on CPU, ignore it.")
@@ -135,100 +167,76 @@ def wake_up(self, tags: list[str] | None = None) -> None:
         pass
 
     def determine_available_memory(self) -> int:
-        return self.cache_config.cpu_kvcache_space_bytes or 0
-
-    def compile_or_warm_up_model(self) -> float:
-        # Reset the seed to ensure that the random state is not affected by
-        # the model initialization and profiling.
-        set_random_seed(self.model_config.seed)
         self.model_runner.warming_up_model()
-        return self.compilation_config.compilation_time
-
-    def _get_autobind_cpu_ids(
-        self, cpu_selector: Callable[[list[LogicalCPUInfo]], list[LogicalCPUInfo]]
-    ) -> str:
-        """
-        Return CPU ids to bind based on NUMA nodes.
-        Currently for rank N, only CPU ids on the N-th node in available NUMA
-        node list will be selected.
-        Args:
-            cpu_selector: a callable object to select CPUs from a CPU list
-            of a physical core. The input is a LogicalCPUInfo list, sorted by
-            the LogicalCPUInfo.id. A selected LogicalCPUInfo list should be
-            returned.
-        """
-        # simulate multiple numa nodes, for testing
-        sim_multi_numa_nodes = os.environ.get("VLLM_CPU_SIM_MULTI_NUMA", "0") != "0"
-
-        allowed_numa_nodes, logical_cpu_list = (
-            CpuPlatform.get_allowed_cpu_core_node_list()
-        )
-        local_world_size = self.parallel_config.local_world_size
-        assert len(allowed_numa_nodes) >= local_world_size or sim_multi_numa_nodes, (
-            f"Not enough allowed NUMA nodes to bind threads of "
-            f"{local_world_size} local CPUWorkers. "
-            f"Allowed NUMA nodes are {allowed_numa_nodes}. "
-            "Please try to bind threads manually."
-        )
 
-        if not sim_multi_numa_nodes:
-            # Get CPUs on NUMA node `allowed_numa_nodes[local_rank]`
-            selected_numa_node = allowed_numa_nodes[self.local_rank]  # type: ignore
-            logical_cpu_list = [
-                x for x in logical_cpu_list if x.numa_node == selected_numa_node
-            ]
-        else:
-            # This is a bit tricky because the internal DP size
-            # is always 1 for non-MoE models
-            world_size_across_dp = (
-                self.parallel_config.world_size
-                * self.parallel_config._api_process_count
+        allowed_cpu_list = get_allowed_cpu_list()
+        cpu_core = allowed_cpu_list[0]
+
+        memory_status = get_memory_node_info(cpu_core.numa_node)
+        available_memory = memory_status.available_memory
+        explicit_kv_cache_size = self.cache_config.kv_cache_memory_bytes
+
+        kv_cache_size = None
+        msg = None
+        if explicit_kv_cache_size is not None:
+            if explicit_kv_cache_size > available_memory:
+                raise ValueError(
+                    f"Available memory on node {cpu_core.numa_node} "
+                    f"({format_gib(available_memory)}/"
+                    f"{format_gib(memory_status.total_memory)} GiB) on kv cache"
+                    f" allocation is less than requested memory for kv "
+                    f"({format_gib(explicit_kv_cache_size)} GiB). "
+                    "Decrease --kv-cache-memory-bytes, VLLM_CPU_KVCACHE_SPACE, "
+                    "or reduce CPU memory used by other processes."
+                )
+            kv_cache_size = explicit_kv_cache_size
+            msg = (
+                f"Explicitly set ({format_gib(kv_cache_size)}/"
+                f"{format_gib(memory_status.total_memory)}) GiB for KV cache "
+                f"on node {cpu_core.numa_node}."
             )
-            assert len(logical_cpu_list) >= world_size_across_dp
-            logical_cpu_list = sorted(logical_cpu_list, key=lambda x: x.numa_node)
-            sim_cpu_num_per_node = len(logical_cpu_list) // world_size_across_dp
-            assert self.parallel_config.data_parallel_rank_local is not None
-            start_idx = (
-                self.local_rank
-                + self.parallel_config.world_size
-                * self.parallel_config.data_parallel_rank_local
-            ) * sim_cpu_num_per_node
-            logical_cpu_list = logical_cpu_list[
-                start_idx : (start_idx + sim_cpu_num_per_node)
-            ]
-
-        # Select CPUs from each physical core via cpu_selector
-        core_to_cpus: dict[int, list[LogicalCPUInfo]] = {}
-        for cpu_info in logical_cpu_list:
-            if cpu_info.physical_core not in core_to_cpus:
-                core_to_cpus[cpu_info.physical_core] = []
-            core_to_cpus[cpu_info.physical_core].append(cpu_info)
-        logical_cpu_list = []
-        for cpu_list in core_to_cpus.values():
-            cpu_list = sorted(cpu_list, key=lambda x: x.id)
-            logical_cpu_list.extend(cpu_selector(cpu_list))
-        logical_cpu_list = sorted(logical_cpu_list, key=lambda x: x.id)
-
-        # Reserve CPUs for other processes
-        reserve_cpu_num = envs.VLLM_CPU_NUM_OF_RESERVED_CPU
-        if reserve_cpu_num is None:
-            need_reserve = (
-                self.parallel_config.world_size > 1
-                or self.parallel_config.data_parallel_size_local > 1
+        else:
+            consumed_memory = psutil.Process(os.getpid()).memory_info().rss
+            requested_memory_for_kv = int(self.requested_cpu_memory - consumed_memory)
+            if (
+                requested_memory_for_kv <= 0
+                or requested_memory_for_kv > available_memory
+            ):
+                raise ValueError(
+                    f"Available memory on node {cpu_core.numa_node} "
+                    f"({format_gib(available_memory)}/"
+                    f"{format_gib(memory_status.total_memory)} GiB) on kv cache"
+                    f" allocation is less than requested memory for kv "
+                    f"({format_gib(requested_memory_for_kv)}/"
+                    f"{format_gib(self.requested_cpu_memory)} GiB). "
+                    "Reduce CPU memory used by other processes."
+                )
+            kv_cache_size = requested_memory_for_kv
+            msg = (
+                f"Auto set ({format_gib(kv_cache_size)}/"
+                f"{format_gib(memory_status.total_memory)}) GiB for KV cache "
+                f"on node {cpu_core.numa_node}, with "
+                f"{format_gib(self.requested_cpu_memory)} GiB requested memory"
+                f" for the worker. {format_gib(consumed_memory)} GiB"
+                f" memory was consumed by non-kv usages."
             )
-            reserve_cpu_num = 1 if need_reserve else 0
-        assert len(logical_cpu_list) > reserve_cpu_num, (
-            f"VLLM_CPU_NUM_OF_RESERVED_CPU ({reserve_cpu_num}) "
-            f"should less than {len(logical_cpu_list)}."
-        )
-        if reserve_cpu_num != 0:
-            logical_cpu_list = logical_cpu_list[:-reserve_cpu_num]
 
-        logger.info(
-            "auto thread-binding list (id, physical core): %s",
-            [(x.id, x.physical_core) for x in logical_cpu_list],
+        logger.info(msg)
+
+        return kv_cache_size
+
+    def compile_or_warm_up_model(self) -> CompilationTimes:
+        # Note: the model has been compiled in determine_available_memory(),
+        # Only compile here for models without kv cache
+        if len(self.model_runner.kv_caches) == 0:
+            self.model_runner.warming_up_model()
+        # Reset the seed to ensure that the random state is not affected by
+        # the model initialization and profiling.
+        set_random_seed(self.model_config.seed)
+        return CompilationTimes(
+            language_model=self.compilation_config.compilation_time,
+            encoder=self.compilation_config.encoder_compilation_time,
         )
-        return ",".join([str(x.id) for x in logical_cpu_list])
 
     def profile(self, is_start: bool = True, profile_prefix: str | None = None):
         if self.profiler is None:
diff --git a/vllm/v1/worker/dp_utils.py b/vllm/v1/worker/dp_utils.py
index 051fe42155ee..e7c6d81a9929 100644
--- a/vllm/v1/worker/dp_utils.py
+++ b/vllm/v1/worker/dp_utils.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-
-import numpy as np
 import torch
 import torch.distributed as dist
 
@@ -29,7 +27,6 @@ def _get_device_and_group(parallel_config: ParallelConfig):
     if parallel_config.disable_nccl_for_dp_synchronization:
         logger.info_once(
             "Using CPU all reduce to synchronize DP padding between ranks.",
-            scope="local",
         )
         device = "cpu"
         group = get_dp_group().cpu_group
@@ -46,11 +43,13 @@ def _run_ar(
     dp_size = parallel_config.data_parallel_size
     dp_rank = parallel_config.data_parallel_rank
     device, group = _get_device_and_group(parallel_config)
-    tensor = torch.zeros(4, dp_size, device=device, dtype=torch.int32)
-    tensor[0][dp_rank] = orig_num_tokens_per_ubatch
-    tensor[1][dp_rank] = padded_num_tokens_per_ubatch
-    tensor[2][dp_rank] = 1 if should_ubatch else 0
-    tensor[3][dp_rank] = cudagraph_mode
+    # Populate this rank's contribution on CPU to reduce GPU syncs.
+    tensor_cpu = torch.zeros(4, dp_size, dtype=torch.int32)
+    tensor_cpu[0][dp_rank] = orig_num_tokens_per_ubatch
+    tensor_cpu[1][dp_rank] = padded_num_tokens_per_ubatch
+    tensor_cpu[2][dp_rank] = 1 if should_ubatch else 0
+    tensor_cpu[3][dp_rank] = cudagraph_mode
+    tensor = tensor_cpu.to(device, non_blocking=True)
     dist.all_reduce(tensor, group=group)
     return tensor
 
@@ -168,7 +167,6 @@ def coordinate_batch_across_dp(
     parallel_config: ParallelConfig,
     num_tokens_padded: int | None = None,
     uniform_decode: bool | None = None,
-    num_scheduled_tokens_per_request: np.ndarray | None = None,
     cudagraph_mode: int = 0,
 ) -> tuple[bool, torch.Tensor | None, int]:
     """
@@ -183,8 +181,6 @@ def coordinate_batch_across_dp(
             TP, etc)
         uniform_decode: Only used if allow_microbatching is True. True if the batch
             only contains single token decodes
-        num_scheduled_tokens_per_request: Only used if allow_microbatching is True. The
-            number of tokens per request.
         cudagraph_mode: The cudagraph mode for this rank (0=NONE, 1=PIECEWISE, 2=FULL).
             DP padding is enabled when synced cudagraph mode across ranks is not NONE.
 
diff --git a/vllm/v1/worker/ec_connector_model_runner_mixin.py b/vllm/v1/worker/ec_connector_model_runner_mixin.py
index 4d785c4efba3..b3430a8d94da 100644
--- a/vllm/v1/worker/ec_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/ec_connector_model_runner_mixin.py
@@ -34,14 +34,6 @@ def maybe_save_ec_to_connector(
         connector = get_ec_transfer()
         connector.save_caches(encoder_cache=encoder_cache, mm_hash=mm_hash)
 
-    @staticmethod
-    def get_finished_ec_transfers(
-        scheduler_output: "SchedulerOutput",
-    ) -> tuple[set[str] | None, set[str] | None]:
-        if has_ec_transfer():
-            return get_ec_transfer().get_finished(scheduler_output.finished_req_ids)
-        return None, None
-
     @staticmethod
     def maybe_get_ec_connector_output(
         scheduler_output: "SchedulerOutput",
diff --git a/vllm/v1/worker/encoder_cudagraph.py b/vllm/v1/worker/encoder_cudagraph.py
index b2930a23474e..6d5aa6f41243 100644
--- a/vllm/v1/worker/encoder_cudagraph.py
+++ b/vllm/v1/worker/encoder_cudagraph.py
@@ -14,10 +14,14 @@
     tensor_model_parallel_all_gather,
 )
 from vllm.logger import init_logger
-from vllm.model_executor.models.interfaces import SupportsEncoderCudaGraph
+from vllm.model_executor.models.interfaces import (
+    SupportsEncoderCudaGraph,
+)
+from vllm.model_executor.models.utils import scatter_output_slices
 from vllm.model_executor.models.vision import get_load_balance_assignment
-from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
+from vllm.v1.worker.encoder_cudagraph_defs import (
     EncoderCudaGraphConfig,
+    EncoderItemSpec,
 )
 
 logger = init_logger(__name__)
@@ -36,6 +40,7 @@ class BudgetGraphMetadata:
 
     token_budget: int
     max_batch_size: int  # Max number of images/videos per batch
+    max_frames_per_batch: int  # Max total frames per batch (for video)
     graph: torch.cuda.CUDAGraph
     # The input tensor updated before replay (e.g. pixel_values)
     input_buffer: torch.Tensor
@@ -66,25 +71,80 @@ def __init__(
 
         comp_config = vllm_config.compilation_config
         user_budgets = comp_config.encoder_cudagraph_token_budgets
-        user_max_images = comp_config.encoder_cudagraph_max_images_per_batch
-
-        if user_budgets and user_max_images > 0:
-            # Fully user-specified
+        user_max_vision_items = comp_config.encoder_cudagraph_max_vision_items_per_batch
+        user_max_frames = comp_config.encoder_cudagraph_max_frames_per_batch
+
+        multimodal_config = vllm_config.model_config.multimodal_config
+
+        # Invariant: max_batch_size <= min_token_budget.
+        # This ensures per_image_output = budget // max_batch_size >= 1
+        # for every captured budget, preventing reshape crashes on empty
+        # tensors during CUDA graph capture. Validated/enforced below for
+        # each configuration path.
+        if user_budgets and user_max_vision_items > 0:
+            # Fully user-specified: validate the invariant.
             self.token_budgets = sorted(user_budgets)
-            self.max_batch_size = user_max_images
+            self.max_batch_size = user_max_vision_items
+            min_tok = min(self.token_budgets)
+            if self.max_batch_size > min_tok:
+                raise ValueError(
+                    f"encoder_cudagraph_max_vision_items_per_batch "
+                    f"({self.max_batch_size}) must be <= smallest token "
+                    f"budget ({min_tok}). With budgets="
+                    f"{self.token_budgets}, per_image_output = "
+                    f"{min_tok} // {self.max_batch_size} = "
+                    f"{min_tok // self.max_batch_size}, which would cause "
+                    f"a capture failure. Either increase the smallest "
+                    f"budget or decrease max_vision_items_per_batch."
+                )
         else:
-            # Auto-infer missing values from model
+            # Auto-infer missing values from model.
             min_budget, max_budget = model.get_encoder_cudagraph_budget_range(
                 vllm_config
             )
-            self.token_budgets = (
-                sorted(user_budgets)
-                if user_budgets
-                else self._generate_budgets(min_budget, max_budget)
-            )
-            self.max_batch_size = (
-                user_max_images if user_max_images > 0 else max_budget // min_budget
-            )
+            if min_budget <= 0 or max_budget <= 0:
+                raise ValueError(
+                    f"Invalid encoder cudagraph budget range: "
+                    f"min_budget={min_budget}, max_budget={max_budget}. "
+                    f"Both must be positive."
+                )
+            if min_budget > max_budget:
+                raise ValueError(
+                    f"Invalid encoder cudagraph budget range: "
+                    f"min_budget={min_budget} > max_budget={max_budget}."
+                )
+
+            if user_max_vision_items > 0:
+                # User provided max_vision_items only; adjust auto-inferred
+                # budgets so min(budgets) >= max_batch_size.
+                self.max_batch_size = user_max_vision_items
+                effective_min = max(min_budget, user_max_vision_items)
+                self.token_budgets = self._generate_budgets(effective_min, max_budget)
+            elif user_budgets:
+                # User provided budgets only; cap auto-inferred
+                # max_batch_size to min(user_budgets).
+                self.token_budgets = sorted(user_budgets)
+                self.max_batch_size = min(
+                    max_budget // min_budget,
+                    min(self.token_budgets),
+                )
+            else:
+                # Fully auto-inferred.
+                self.token_budgets = self._generate_budgets(min_budget, max_budget)
+                self.max_batch_size = min(
+                    max_budget // min_budget,
+                    min(self.token_budgets),
+                )
+
+        assert multimodal_config is not None
+        if multimodal_config.get_limit_per_prompt("video") == 0:
+            self.max_frames_per_batch = 0
+        elif user_max_frames is not None:
+            self.max_frames_per_batch = user_max_frames
+        else:
+            # Set it to the model-specific value from config.
+            max_frames_per_video = self.config.max_frames_per_video
+            self.max_frames_per_batch = self.max_batch_size * max_frames_per_video
 
         mm_config = vllm_config.model_config.multimodal_config
         self.use_dp = (
@@ -100,9 +160,10 @@ def __init__(
 
         logger.info(
             "EncoderCudaGraphManager initialized with "
-            "budgets=%s, max_batch_size=%d, use_dp=%s",
+            "budgets=%s, max_batch_size=%d, max_frames_per_batch=%s, use_dp=%s",
             self.token_budgets,
             self.max_batch_size,
+            self.max_frames_per_batch,
             self.use_dp,
         )
 
@@ -136,13 +197,19 @@ def capture(self):
     def _capture_budget_graph(self, token_budget: int):
         """Capture CUDA graph for a single token budget."""
         logger.debug(
-            "Capturing encoder cudagraph for budget=%d, max_batch_size=%d",
+            "Capturing encoder cudagraph for budget=%d, max_batch_size=%d, "
+            "max_frames_per_batch=%d",
             token_budget,
             self.max_batch_size,
+            self.max_frames_per_batch,
         )
 
         capture_inputs = self.model.prepare_encoder_cudagraph_capture_inputs(
-            token_budget, self.max_batch_size, self.device, self.dtype
+            token_budget,
+            self.max_batch_size,
+            self.max_frames_per_batch,
+            self.device,
+            self.dtype,
         )
 
         mm_kwargs = capture_inputs.mm_kwargs
@@ -157,10 +224,14 @@ def _capture_budget_graph(self, token_budget: int):
             output = self.model.encoder_cudagraph_forward(mm_kwargs, buffers)
             output_buffer.copy_(output)
 
-        input_key = self.config.input_key
+        # Since the image and video modalities share the same per-patch shape,
+        # so we can use the image dummy inputs to capture CUDA graph for both
+        # image and video.
+        input_key = self.config.input_key_by_modality["image"]
         self.budget_graphs[token_budget] = BudgetGraphMetadata(
             token_budget=token_budget,
             max_batch_size=self.max_batch_size,
+            max_frames_per_batch=self.max_frames_per_batch,
             graph=graph,
             input_buffer=mm_kwargs[input_key],
             metadata_buffers=buffers,
@@ -180,28 +251,13 @@ def _find_smallest_fitting_budget_given_tokens(
                 return budget
         return None
 
+    def _get_item_specs(self, mm_kwargs: dict[str, Any]) -> list[EncoderItemSpec]:
+        """Get item specs from the model."""
+        return self.model.get_encoder_cudagraph_item_specs(mm_kwargs)
+
     def _get_per_item_out_tokens(self, mm_kwargs: dict[str, Any]) -> list[int]:
         """Get per-item output token counts as plain ints."""
-        return [
-            int(t)
-            for t in self.model.get_encoder_cudagraph_per_item_output_tokens(mm_kwargs)
-        ]
-
-    @staticmethod
-    def _scatter_output_slices(
-        output: torch.Tensor,
-        indices: list[int],
-        per_item_out_tokens: list[int],
-        dest: dict[int, torch.Tensor] | list[torch.Tensor | None],
-        clone: bool = False,
-    ) -> None:
-        """Slice a concatenated output tensor and scatter into dest by index."""
-        offset = 0
-        for idx in indices:
-            n_tok = per_item_out_tokens[idx]
-            sliced = output[offset : offset + n_tok]
-            dest[idx] = sliced.clone() if clone else sliced
-            offset += n_tok
+        return [spec.output_tokens for spec in self._get_item_specs(mm_kwargs)]
 
     def _run_budget_graph(
         self,
@@ -220,7 +276,7 @@ def _run_budget_graph(
         Returns:
             Encoder outputs, or None if graph not captured.
         """
-        num_items = self.model.get_encoder_cudagraph_num_items(mm_kwargs)
+        num_items = len(self._get_item_specs(mm_kwargs))
         if token_budget not in self.budget_graphs:
             self.graph_misses += num_items
             return None
@@ -230,10 +286,11 @@ def _run_budget_graph(
         # Copy the input tensor. Buffers are sized for the full budget;
         # actual inputs may be smaller. Zero then slice-copy so padded
         # positions are invisible to attention (cu_seqlens masks them out).
-        input_key = self.config.input_key
+        input_key = self.config.input_key_by_modality[
+            self.model.get_input_modality(mm_kwargs)
+        ]
         src = mm_kwargs[input_key]
         n = src.shape[0]
-        graph_meta.input_buffer.zero_()
         graph_meta.input_buffer[:n].copy_(src)
 
         # Copy metadata buffers using keys from config.buffer_keys.
@@ -277,10 +334,11 @@ def _execute_local(
                          always satisfy total_tokens <= max_budget and therefore
                          always find a valid budget (no miss).
         """
-        num_items = self.model.get_encoder_cudagraph_num_items(mm_kwargs)
+        item_specs = self._get_item_specs(mm_kwargs)
+        num_items = len(item_specs)
         max_budget = self.token_budgets[-1]
 
-        per_item_out_tokens = self._get_per_item_out_tokens(mm_kwargs)
+        per_item_out_tokens = [spec.output_tokens for spec in item_specs]
 
         # Sort ascending by output token count (smallest first)
         sorted_indices = sorted(range(num_items), key=lambda i: per_item_out_tokens[i])
@@ -346,7 +404,7 @@ def _execute_local(
                 self.graph_misses += len(batch_orig_indices)
                 with torch.inference_mode():
                     raw = self.model.encoder_eager_forward(batch_mm_kwargs)
-                self._scatter_output_slices(
+                scatter_output_slices(
                     raw,
                     batch_orig_indices,
                     per_item_out_tokens,
@@ -362,7 +420,9 @@ def _execute_local(
                     (token_budget - batch_out_tokens) / token_budget * 100,
                 )
                 replay = self.model.prepare_encoder_cudagraph_replay_buffers(
-                    batch_mm_kwargs, self.max_batch_size
+                    batch_mm_kwargs,
+                    self.max_batch_size,
+                    self.max_frames_per_batch,
                 )
 
                 # graph_hits counted inside _run_budget_graph after replay.
@@ -370,12 +430,13 @@ def _execute_local(
                     batch_mm_kwargs, token_budget, replay.buffers
                 )
                 assert output is not None
-                self._scatter_output_slices(
+                self.model.postprocess_encoder_output(
                     output,
                     batch_orig_indices,
                     per_item_out_tokens,
                     outputs_by_orig_idx,
                     clone=True,
+                    batch_mm_kwargs=batch_mm_kwargs,
                 )
 
         # Return in original batch order (caller maps outputs to token positions)
@@ -401,9 +462,8 @@ def _dp_shard(
         tp_size = get_tensor_model_parallel_world_size()
         current_rank = get_tensor_model_parallel_rank()
 
-        per_item_input_sizes = self.model.get_encoder_cudagraph_per_item_input_sizes(
-            mm_kwargs
-        )
+        item_specs = self._get_item_specs(mm_kwargs)
+        per_item_input_sizes = [spec.input_size for spec in item_specs]
 
         (image_rank_assignment, images_per_rank, input_patches_per_rank) = (
             get_load_balance_assignment(per_item_input_sizes, tp_size)
@@ -503,7 +563,7 @@ def _dp_gather(
             count = images_per_rank[rank]
             if count > 0:
                 rank_items = image_rank_assignment[current_idx : current_idx + count]
-                self._scatter_output_slices(
+                scatter_output_slices(
                     rank_outputs[rank],
                     rank_items,
                     per_item_out_tokens,
diff --git a/vllm/v1/worker/encoder_cudagraph_defs.py b/vllm/v1/worker/encoder_cudagraph_defs.py
index 455786682059..70e15703f978 100644
--- a/vllm/v1/worker/encoder_cudagraph_defs.py
+++ b/vllm/v1/worker/encoder_cudagraph_defs.py
@@ -8,6 +8,23 @@
 import torch
 
 
+@dataclass
+class EncoderItemSpec:
+    """Description of a single encoder input item.
+
+    Returned by ``get_encoder_cudagraph_item_specs()`` to describe each
+    image or video in a batch without the manager needing to understand
+    model-specific input formats.
+    """
+
+    input_size: int
+    """Number of input patches/rows for this item."""
+
+    output_tokens: int
+    """Number of output tokens after encoder processing (e.g. after
+    spatial merge)."""
+
+
 @dataclass
 class EncoderCudaGraphConfig:
     """Configuration for encoder CUDA graph management.
@@ -20,8 +37,10 @@ class EncoderCudaGraphConfig:
     modalities: list[str]
     """Supported modalities (e.g. ["image"])."""
 
-    input_key: str
-    """Key in mm_kwargs for the input tensor (e.g. "pixel_values")."""
+    input_key_by_modality: dict[str, str]
+    """Per-modality input tensor key mapping, e.g.
+    {"image": "pixel_values", "video": "pixel_values_videos"}.
+    """
 
     buffer_keys: list[str]
     """Keys for the tensor buffers recorded into the CUDA graph.
@@ -32,6 +51,11 @@ class EncoderCudaGraphConfig:
     """Output hidden dim of the vision encoder.
     Used for DP gather buffer allocation."""
 
+    max_frames_per_video: int = 1
+    """Maximum number of frames per video.
+    Only relevant when "video" is in ``modalities``.
+    Image-only models can use the default of 1."""
+
 
 @dataclass
 class EncoderCudaGraphCaptureInputs:
diff --git a/vllm/v1/worker/gpu/async_utils.py b/vllm/v1/worker/gpu/async_utils.py
index 7f270c2b8c95..b3d6f5e4d901 100644
--- a/vllm/v1/worker/gpu/async_utils.py
+++ b/vllm/v1/worker/gpu/async_utils.py
@@ -17,7 +17,6 @@ def __init__(
         num_sampled_tokens: torch.Tensor,
         main_stream: torch.cuda.Stream,
         copy_stream: torch.cuda.Stream,
-        copy_event: torch.cuda.Event,
     ):
         # NOTE(woosuk): We must retain references to the GPU tensors,
         # as the copy operations are performed on a different CUDA stream than
@@ -25,7 +24,7 @@ def __init__(
         self.model_runner_output = model_runner_output
         self.sampler_output = sampler_output
         self.num_sampled_tokens = num_sampled_tokens
-        self.copy_event = copy_event
+        self.copy_event = torch.cuda.Event()
 
         with stream(copy_stream, main_stream):
             copy_stream.wait_stream(main_stream)
@@ -78,12 +77,11 @@ def __init__(
         is_valid: torch.Tensor | None,
         main_stream: torch.cuda.Stream,
         copy_stream: torch.cuda.Stream,
-        copy_event: torch.cuda.Event,
     ):
         self.model_runner_output = model_runner_output
         self.pooler_output = pooler_output
         self.is_valid = is_valid
-        self.copy_event = copy_event
+        self.copy_event = torch.cuda.Event()
 
         with stream(copy_stream, main_stream):
             copy_stream.wait_stream(main_stream)
diff --git a/vllm/v1/worker/gpu/attn_utils.py b/vllm/v1/worker/gpu/attn_utils.py
index 34089a67b3be..8b4e0c3c0a92 100644
--- a/vllm/v1/worker/gpu/attn_utils.py
+++ b/vllm/v1/worker/gpu/attn_utils.py
@@ -1,21 +1,40 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from collections.abc import Sequence
+from dataclasses import dataclass
 from typing import Any, cast
 
-import numpy as np
 import torch
 
 from vllm.config import VllmConfig, get_layers_from_vllm_config
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
-from vllm.v1.attention.backend import AttentionBackend, CommonAttentionMetadata
+from vllm.utils.torch_utils import get_dtype_size
+from vllm.v1.attention.backend import (
+    AttentionBackend,
+    AttentionCGSupport,
+    CommonAttentionMetadata,
+)
 from vllm.v1.kv_cache_interface import (
     AttentionSpec,
     KVCacheConfig,
     KVCacheSpec,
+    MambaSpec,
     UniformTypeKVCacheSpecs,
 )
-from vllm.v1.worker.utils import AttentionGroup, bind_kv_cache
+from vllm.v1.worker.gpu.model_states.interface import ModelSpecificAttnMetadata
+from vllm.v1.worker.utils import (
+    AttentionGroup,
+    add_kv_sharing_layers_to_kv_cache_groups,
+    bind_kv_cache,
+    prepare_kernel_block_sizes,
+)
+
+
+@dataclass(frozen=True)
+class AttentionCGSupportInfo:
+    min_cg_support: AttentionCGSupport = AttentionCGSupport.ALWAYS
+    min_cg_attn_backend: str | None = None
 
 
 def get_kv_cache_spec(vllm_config: VllmConfig) -> dict[str, KVCacheSpec]:
@@ -23,21 +42,45 @@ def get_kv_cache_spec(vllm_config: VllmConfig) -> dict[str, KVCacheSpec]:
     layer_type = cast(type[Any], AttentionLayerBase)
     attn_layers = get_layers_from_vllm_config(vllm_config, layer_type)
     for layer_name, attn_module in attn_layers.items():
+        if getattr(attn_module, "kv_sharing_target_layer_name", None):
+            # This layer will use KV cache of the sharing target layer.
+            continue
         # Skip modules that don't need KV cache (eg encoder-only attention)
         if spec := attn_module.get_kv_cache_spec(vllm_config):
             kv_cache_spec[layer_name] = spec
     return kv_cache_spec
 
 
+def get_shared_kv_cache_layers(vllm_config: VllmConfig):
+    attn_layers = get_layers_from_vllm_config(vllm_config, Attention)
+    return {
+        layer_name: kv_tgt_layer
+        for layer_name, attn_module in attn_layers.items()
+        if (kv_tgt_layer := attn_module.kv_sharing_target_layer_name)
+    }
+
+
 def init_attn_backend(
     kv_cache_config: KVCacheConfig,
     vllm_config: VllmConfig,
     device: torch.device,
     active_layer_names: set[str] | None = None,
-):
+) -> tuple[
+    dict[str, type[AttentionBackend]],
+    list[list[AttentionGroup]],
+    AttentionCGSupportInfo,
+    list[int],
+]:
     attn_backends: dict[str, type[AttentionBackend]] = {}
     attn_groups: list[list[AttentionGroup]] = []
-    attn_backend_workspace: torch.Tensor | None = None
+
+    # Add KV-sharing layers to their target's kv cache group so they are
+    # discovered alongside the target layer in Phase 1 below.
+    add_kv_sharing_layers_to_kv_cache_groups(
+        get_shared_kv_cache_layers(vllm_config), kv_cache_config.kv_cache_groups
+    )
+
+    # Phase 1: discover attention groups for each kv cache group.
     for kv_cache_group_id, kv_cache_group_spec in enumerate(
         kv_cache_config.kv_cache_groups
     ):
@@ -62,21 +105,32 @@ def init_attn_backend(
             key = (attn_backend.full_cls_name(), layer_kv_cache_spec)
             if key not in group_map:
                 group_map[key] = AttentionGroup(
-                    attn_backend,
-                    [layer_name],
-                    layer_kv_cache_spec,
-                    kv_cache_group_id,
+                    attn_backend, [layer_name], layer_kv_cache_spec, kv_cache_group_id
                 )
                 group_order.append(key)
             else:
                 group_map[key].layer_names.append(layer_name)
 
-        groups = [group_map[key] for key in group_order]
+        attn_groups.append([group_map[key] for key in group_order])
+
+    # Phase 2: pick a kernel block size per kv cache group that is supported
+    # by all backends within that group.
+    kernel_block_sizes = prepare_kernel_block_sizes(kv_cache_config, attn_groups)
+
+    # Phase 3: create metadata builders and determine cudagraph support.
+    attn_backend_workspace: torch.Tensor | None = None
+    min_cg_support = AttentionCGSupport.ALWAYS
+    min_cg_attn_backend = None
+    for kv_cache_group_id, groups in enumerate(attn_groups):
+        kv_cache_group_spec = kv_cache_config.kv_cache_groups[kv_cache_group_id]
+        kernel_block_size = None
+        if kv_cache_group_id < len(kernel_block_sizes):
+            kernel_block_size = kernel_block_sizes[kv_cache_group_id]
         for group in groups:
             group.create_metadata_builders(
                 vllm_config=vllm_config,
                 device=device,
-                kernel_block_size=None,
+                kernel_block_size=kernel_block_size,
                 num_metadata_builders=1,
             )
             builder = group.get_metadata_builder(0)
@@ -86,11 +140,29 @@ def init_attn_backend(
             else:
                 if hasattr(builder, "set_workspace_buffer"):
                     builder.set_workspace_buffer(attn_backend_workspace)
-        attn_groups.append(groups)
-    return attn_backends, attn_groups
+            # Check cudagraph support for the attention backend
+            cg_support = builder.get_cudagraph_support(
+                vllm_config,
+                cast(AttentionSpec, kv_cache_group_spec.kv_cache_spec),
+            )
+            if cg_support.value < min_cg_support.value:
+                min_cg_support = cg_support
+                min_cg_attn_backend = group.backend.__name__
+
+    return (
+        attn_backends,
+        attn_groups,
+        AttentionCGSupportInfo(
+            min_cg_support=min_cg_support,
+            min_cg_attn_backend=min_cg_attn_backend,
+        ),
+        kernel_block_sizes,
+    )
 
 
-def _allocate_kv_cache(kv_cache_config: KVCacheConfig, device: torch.device):
+def _allocate_kv_cache(
+    kv_cache_config: KVCacheConfig, shared_layers: dict[str, str], device: torch.device
+):
     kv_cache_raw_tensors: dict[str, torch.Tensor] = {}
     for kv_cache_tensor in kv_cache_config.kv_cache_tensors:
         tensor = torch.zeros(kv_cache_tensor.size, dtype=torch.int8, device=device)
@@ -101,7 +173,7 @@ def _allocate_kv_cache(kv_cache_config: KVCacheConfig, device: torch.device):
     for group in kv_cache_config.kv_cache_groups:
         for layer_name in group.layer_names:
             layer_names.add(layer_name)
-    assert layer_names == set(kv_cache_raw_tensors.keys()), (
+    assert layer_names == (kv_cache_raw_tensors.keys() | shared_layers.keys()), (
         "Some layers are not correctly initialized"
     )
     return kv_cache_raw_tensors
@@ -110,61 +182,174 @@ def _allocate_kv_cache(kv_cache_config: KVCacheConfig, device: torch.device):
 def _reshape_kv_cache(
     kv_cache_config: KVCacheConfig,
     kv_cache_raw_tensors: dict[str, torch.Tensor],
-    attn_backends: dict[str, AttentionBackend],
+    attn_backends: dict[str, type[AttentionBackend]],
     cache_dtype: str,
-) -> dict[str, torch.Tensor]:
-    kv_caches: dict[str, torch.Tensor] = {}
-    for kv_cache_group_spec in kv_cache_config.kv_cache_groups:
+    kernel_block_sizes: list[int],
+    shared_kv_cache_layers: dict[str, str],
+) -> dict[str, Any]:
+    kv_caches: dict[str, Any] = {}
+    has_attn, has_mamba = False, False
+    for kv_cache_group_id, kv_cache_group_spec in enumerate(
+        kv_cache_config.kv_cache_groups
+    ):
         for layer_name in kv_cache_group_spec.layer_names:
+            if layer_name in shared_kv_cache_layers:
+                # Shared layer — tensor will be aliased to its target later.
+                continue
             kv_cache_spec = kv_cache_group_spec.kv_cache_spec
             if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs):
                 kv_cache_spec = kv_cache_spec.kv_cache_specs[layer_name]
-            assert isinstance(kv_cache_spec, AttentionSpec)
-
-            raw_tensor = kv_cache_raw_tensors[layer_name]
-            assert raw_tensor.numel() % kv_cache_spec.page_size_bytes == 0
-            num_blocks = raw_tensor.numel() // kv_cache_spec.page_size_bytes
-
-            attn_backend = attn_backends[layer_name]
-            kv_cache_shape = attn_backend.get_kv_cache_shape(
-                num_blocks,
-                kv_cache_spec.block_size,
-                kv_cache_spec.num_kv_heads,
-                kv_cache_spec.head_size,
-                cache_dtype,
-            )
 
-            # FIXME(woosuk): Add kv_cache_stride_order to all attention backends.
-            try:
-                kv_cache_stride_order = attn_backend.get_kv_cache_stride_order()
-                assert len(kv_cache_stride_order) == len(kv_cache_shape)
-            except (AttributeError, NotImplementedError):
-                kv_cache_stride_order = tuple(range(len(kv_cache_shape)))
-
-            kv_cache_shape = tuple(kv_cache_shape[i] for i in kv_cache_stride_order)
-            inv_order = [
-                kv_cache_stride_order.index(i)
-                for i in range(len(kv_cache_stride_order))
-            ]
-
-            dtype = kv_cache_spec.dtype
-            raw_tensor = raw_tensor.view(dtype)
-            raw_tensor = raw_tensor.view(kv_cache_shape)
-            kv_caches[layer_name] = raw_tensor.permute(*inv_order)
+            kv_raw_tensor = kv_cache_raw_tensors[layer_name]
+            assert kv_raw_tensor.numel() % kv_cache_spec.page_size_bytes == 0
+            num_blocks = kv_raw_tensor.numel() // kv_cache_spec.page_size_bytes
+
+            if isinstance(kv_cache_spec, AttentionSpec):
+                has_attn = True
+                attn_backend = attn_backends[layer_name]
+
+                if kv_cache_group_id < len(kernel_block_sizes):
+                    kernel_block_size = kernel_block_sizes[kv_cache_group_id]
+                    num_blocks *= kv_cache_spec.block_size // kernel_block_size
+                else:
+                    kernel_block_size = kv_cache_spec.block_size
+
+                if kv_cache_spec.storage_block_size != kv_cache_spec.block_size:
+                    shape_block_size = kv_cache_spec.storage_block_size
+                else:
+                    shape_block_size = kernel_block_size
+
+                kv_cache_shape = attn_backend.get_kv_cache_shape(
+                    num_blocks,
+                    shape_block_size,
+                    kv_cache_spec.num_kv_heads,
+                    kv_cache_spec.head_size,
+                    cache_dtype_str=cache_dtype,
+                )
+
+                # FIXME(woosuk): Add kv_cache_stride_order to all attention backends.
+                try:
+                    kv_cache_stride_order = attn_backend.get_kv_cache_stride_order()
+                    assert len(kv_cache_stride_order) == len(kv_cache_shape)
+                except (AttributeError, NotImplementedError):
+                    kv_cache_stride_order = tuple(range(len(kv_cache_shape)))
+
+                kv_cache_shape = tuple(kv_cache_shape[i] for i in kv_cache_stride_order)
+                inv_order = [
+                    kv_cache_stride_order.index(i)
+                    for i in range(len(kv_cache_stride_order))
+                ]
+
+                dtype = kv_cache_spec.dtype
+                kv_tensor = kv_raw_tensor.view(dtype)
+                if kv_cache_spec.page_size_padded is not None:
+                    # Use strided view to handle page_size_bytes that
+                    # include padding. This follows the same pattern as
+                    # MambaSpec handling in gpu_model_runner.py.
+                    # NOTE: This assumes kv_cache_shape[0] == num_blocks
+                    # (i.e. the first physical dimension is the block
+                    # index), which holds for MLA backends but NOT for
+                    # standard attention backends whose shape starts with
+                    # a K/V dimension of size 2.
+                    dtype_size = get_dtype_size(dtype)
+                    page_stride = kv_cache_spec.page_size_bytes // dtype_size
+                    strides = list(torch.empty(kv_cache_shape).stride())
+                    strides[inv_order[0]] = page_stride
+                    kv_cache = torch.as_strided(
+                        kv_tensor,
+                        size=kv_cache_shape,
+                        stride=tuple(strides),
+                    )
+                else:
+                    # No padding — safe to use a contiguous view.
+                    kv_cache = kv_tensor.view(kv_cache_shape)
+                kv_caches[layer_name] = kv_cache.permute(*inv_order)
+
+            elif isinstance(kv_cache_spec, MambaSpec):
+                has_mamba = True
+                state_tensors = []
+                storage_offset_bytes = 0
+                for shape, dtype in zip(kv_cache_spec.shapes, kv_cache_spec.dtypes):
+                    dtype_size = get_dtype_size(dtype)
+                    num_element_per_page = kv_cache_spec.page_size_bytes // dtype_size
+                    target_shape = (num_blocks, *shape)
+                    stride = torch.empty(target_shape).stride()
+                    target_stride = (num_element_per_page, *stride[1:])
+                    assert storage_offset_bytes % dtype_size == 0
+                    tensor = torch.as_strided(
+                        kv_raw_tensor.view(dtype),
+                        size=target_shape,
+                        stride=target_stride,
+                        storage_offset=storage_offset_bytes // dtype_size,
+                    )
+                    state_tensors.append(tensor)
+                    storage_offset_bytes += stride[0] * dtype_size
+                kv_caches[layer_name] = state_tensors
+            else:
+                raise NotImplementedError(
+                    f"Unsupported KV cache spec type: {type(kv_cache_spec)}"
+                )
+
+    if has_attn and has_mamba:
+        _update_hybrid_attention_layout(kv_caches, kv_cache_config)
+
+    # Map any sharing layers to their target layer's KV cache.
+    for layer_name, target_layer_name in shared_kv_cache_layers.items():
+        kv_caches[layer_name] = kv_caches[target_layer_name]
+
     return kv_caches
 
 
+def _update_hybrid_attention_layout(
+    kv_caches: dict[str, Any], kv_cache_config: KVCacheConfig
+) -> None:
+    for kv_cache_group_spec in kv_cache_config.kv_cache_groups:
+        for layer_name in kv_cache_group_spec.layer_names:
+            if layer_name not in kv_caches:
+                # Shared layer — will be aliased to its target after this pass.
+                continue
+            kv_cache_spec = kv_cache_group_spec.kv_cache_spec
+            if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs):
+                kv_cache_spec = kv_cache_spec.kv_cache_specs[layer_name]
+            if not isinstance(kv_cache_spec, AttentionSpec):
+                continue
+            kv_cache = kv_caches[layer_name]
+            if kv_cache.shape[0] == 2:
+                assert kv_cache.shape[1] != 2, (
+                    f"Cannot determine layout for tensor of shape {kv_cache.shape}"
+                )
+                hidden_size = kv_cache.shape[2:].numel()
+                kv_cache.as_strided_(
+                    size=kv_cache.shape,
+                    stride=(
+                        hidden_size,
+                        2 * hidden_size,
+                        *kv_cache.stride()[2:],
+                    ),
+                )
+
+
 def init_kv_cache(
     runner_kv_caches: list[torch.Tensor],
     forward_context: dict[str, Any],
     kv_cache_config: KVCacheConfig,
-    attn_backends: dict[str, AttentionBackend],
+    attn_backends: dict[str, type[AttentionBackend]],
     device: torch.device,
     cache_dtype: str,
-) -> dict[str, torch.Tensor]:
-    kv_cache_raw_tensors = _allocate_kv_cache(kv_cache_config, device)
+    kernel_block_sizes: list[int],
+    vllm_config: VllmConfig,
+) -> dict[str, Any]:
+    shared_kv_cache_layers = get_shared_kv_cache_layers(vllm_config)
+    kv_cache_raw_tensors = _allocate_kv_cache(
+        kv_cache_config, shared_kv_cache_layers, device
+    )
     kv_caches = _reshape_kv_cache(
-        kv_cache_config, kv_cache_raw_tensors, attn_backends, cache_dtype
+        kv_cache_config,
+        kv_cache_raw_tensors,
+        attn_backends,
+        cache_dtype,
+        kernel_block_sizes,
+        shared_kv_cache_layers,
     )
     bind_kv_cache(kv_caches, forward_context, runner_kv_caches)
     return kv_caches
@@ -193,12 +378,17 @@ def build_attn_metadata(
     block_tables: Sequence[torch.Tensor],
     slot_mappings: torch.Tensor,
     kv_cache_config: KVCacheConfig,
+    seq_lens_cpu_upper_bound: torch.Tensor | None = None,
     dcp_local_seq_lens: torch.Tensor | None = None,
-    encoder_seq_lens: dict[int, tuple[torch.Tensor, np.ndarray]] | None = None,
+    positions: torch.Tensor | None = None,
+    model_specific_attn_metadata: ModelSpecificAttnMetadata | None = None,
+    for_cudagraph_capture: bool = False,
 ) -> dict[str, Any]:
     seq_lens = seq_lens[:num_reqs]
     if dcp_local_seq_lens is not None:
         dcp_local_seq_lens = dcp_local_seq_lens[:num_reqs]
+    if seq_lens_cpu_upper_bound is not None:
+        seq_lens_cpu_upper_bound = seq_lens_cpu_upper_bound[:num_reqs]
 
     attn_metadata: dict[str, Any] = {}
     num_kv_cache_groups = len(kv_cache_config.kv_cache_groups)
@@ -206,10 +396,16 @@ def build_attn_metadata(
         block_table = block_tables[i]
         slot_mapping = slot_mappings[i]
 
+        common_attn_metadata_extra_kwargs = (
+            model_specific_attn_metadata.get_extra_common_attn_kwargs(i, num_reqs)
+            if model_specific_attn_metadata is not None
+            else {}
+        )
         common_attn_metadata = CommonAttentionMetadata(
             query_start_loc=query_start_loc_gpu,
             query_start_loc_cpu=query_start_loc_cpu,
             seq_lens=seq_lens,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             max_seq_len=max_seq_len,
             num_reqs=num_reqs,
             num_actual_tokens=num_tokens,
@@ -218,17 +414,30 @@ def build_attn_metadata(
             slot_mapping=slot_mapping,
             causal=True,
             dcp_local_seq_lens=dcp_local_seq_lens,
+            positions=positions,
+            **common_attn_metadata_extra_kwargs,
         )
-        if encoder_seq_lens and i in encoder_seq_lens:
-            encoder_seq_lens_gpu, encoder_seq_lens_cpu = encoder_seq_lens[i]
-            common_attn_metadata.encoder_seq_lens = encoder_seq_lens_gpu
-            common_attn_metadata.encoder_seq_lens_cpu = encoder_seq_lens_cpu
 
         for attn_group in attn_groups[i]:
             attn_metadata_builder = attn_group.get_metadata_builder(0)
-            metadata = attn_metadata_builder.build(
-                common_prefix_len=0, common_attn_metadata=common_attn_metadata
-            )
+            if for_cudagraph_capture:
+                metadata = attn_metadata_builder.build_for_cudagraph_capture(
+                    common_attn_metadata
+                )
+            else:
+                attn_metadata_extra_kwargs = (
+                    model_specific_attn_metadata.get_extra_attn_kwargs(
+                        attn_metadata_builder,
+                        num_reqs,
+                    )
+                    if model_specific_attn_metadata is not None
+                    else {}
+                )
+                metadata = attn_metadata_builder.build(
+                    common_prefix_len=0,
+                    common_attn_metadata=common_attn_metadata,
+                    **attn_metadata_extra_kwargs,
+                )
             for layer_name in attn_group.layer_names:
                 attn_metadata[layer_name] = metadata
     return attn_metadata
diff --git a/vllm/v1/worker/gpu/block_table.py b/vllm/v1/worker/gpu/block_table.py
index e79a7afbd81e..2c8c551a48a9 100644
--- a/vllm/v1/worker/gpu/block_table.py
+++ b/vllm/v1/worker/gpu/block_table.py
@@ -5,7 +5,6 @@
 import torch
 
 from vllm.triton_utils import tl, triton
-from vllm.utils.math_utils import cdiv
 from vllm.v1.attention.backends.utils import PAD_SLOT_ID
 from vllm.v1.worker.gpu.buffer_utils import StagedWriteTensor, UvaBackedTensor
 
@@ -16,16 +15,17 @@ def __init__(
         block_sizes: list[int],
         max_num_reqs: int,
         max_num_batched_tokens: int,
-        max_model_len: int,
+        max_num_blocks_per_group: list[int],
         device: torch.device,
+        kernel_block_sizes: list[int],
         cp_size: int = 1,
         cp_rank: int = 0,
         cp_interleave: int = 1,
     ):
         self.block_sizes = block_sizes
+        self.kernel_block_sizes = kernel_block_sizes
         self.max_num_reqs = max_num_reqs
         self.max_num_batched_tokens = max_num_batched_tokens
-        self.max_model_len = max_model_len
         self.device = device
 
         self.cp_size = cp_size
@@ -33,32 +33,23 @@ def __init__(
         self.cp_interleave = cp_interleave
 
         self.num_kv_cache_groups = len(self.block_sizes)
+        assert len(max_num_blocks_per_group) == self.num_kv_cache_groups
+
+        self.blocks_per_kv_block = [
+            bs // kbs for bs, kbs in zip(block_sizes, kernel_block_sizes)
+        ]
+
         # num_kv_cache_groups x [max_num_reqs, max_num_blocks]
         self.block_tables: list[StagedWriteTensor] = []
         for i in range(self.num_kv_cache_groups):
-            block_size = self.block_sizes[i]
-            # When using DCP, each request's KV cache is sharded among different ranks.
-            # As a result, one block on the current rank covers `block_size * cp_size`
-            # tokens in the full, global (unsharded) sequence.
-            max_num_blocks = cdiv(self.max_model_len, block_size * self.cp_size)
+            max_num_blocks = max_num_blocks_per_group[i] * self.blocks_per_kv_block[i]
             block_table = StagedWriteTensor(
                 (self.max_num_reqs, max_num_blocks),
                 dtype=torch.int32,
                 device=device,
             )
             self.block_tables.append(block_table)
-        self.block_table_ptrs = self._make_ptr_tensor(
-            [b.gpu for b in self.block_tables]
-        )
-        self.block_table_strides = torch.tensor(
-            [b.gpu.stride(0) for b in self.block_tables],
-            dtype=torch.int64,
-            device=self.device,
-        )
 
-        self.block_sizes_tensor = torch.tensor(
-            self.block_sizes, dtype=torch.int32, device=self.device
-        )
         self.num_blocks = UvaBackedTensor(
             (self.num_kv_cache_groups, self.max_num_reqs),
             dtype=torch.int32,
@@ -69,7 +60,6 @@ def __init__(
         self.input_block_tables: list[torch.Tensor] = [
             torch.zeros_like(b.gpu) for b in self.block_tables
         ]
-        self.input_block_table_ptrs = self._make_ptr_tensor(self.input_block_tables)
 
         self.slot_mappings = torch.zeros(
             self.num_kv_cache_groups,
@@ -78,12 +68,33 @@ def __init__(
             device=self.device,
         )
 
+        self.init_block_table_layout_tensors()
+
     def _make_ptr_tensor(self, x: Iterable[torch.Tensor]) -> torch.Tensor:
         # NOTE(woosuk): Use uint64 instead of int64 to cover all possible addresses.
         return torch.tensor(
             [t.data_ptr() for t in x], dtype=torch.uint64, device=self.device
         )
 
+    def init_block_table_layout_tensors(self) -> None:
+        # Called at init and after a CuMem kv_cache wake-up. The ptr tensors
+        # cache raw data_ptr() values that go stale once the underlying tensors
+        # are reallocated on wake; block_sizes_tensor needs re-populating
+        # because its storage lives under the kv_cache pool tag and comes back
+        # with undefined contents.
+        self.block_table_ptrs = self._make_ptr_tensor(
+            [b.gpu for b in self.block_tables]
+        )
+        self.block_table_strides = torch.tensor(
+            [b.gpu.stride(0) for b in self.block_tables],
+            dtype=torch.int64,
+            device=self.device,
+        )
+        self.block_sizes_tensor = torch.tensor(
+            self.kernel_block_sizes, dtype=torch.int32, device=self.device
+        )
+        self.input_block_table_ptrs = self._make_ptr_tensor(self.input_block_tables)
+
     def append_block_ids(
         self,
         req_index: int,
@@ -93,6 +104,9 @@ def append_block_ids(
         for i in range(self.num_kv_cache_groups):
             start = self.num_blocks.np[i, req_index] if not overwrite else 0
             block_ids = new_block_ids[i]
+            bpk = self.blocks_per_kv_block[i]
+            if bpk > 1:
+                block_ids = [b * bpk + k for b in block_ids for k in range(bpk)]
             self.block_tables[i].stage_write(req_index, start, block_ids)
             self.num_blocks.np[i, req_index] = start + len(block_ids)
 
diff --git a/vllm/v1/worker/gpu/buffer_utils.py b/vllm/v1/worker/gpu/buffer_utils.py
index a653c262556c..5963790a7792 100644
--- a/vllm/v1/worker/gpu/buffer_utils.py
+++ b/vllm/v1/worker/gpu/buffer_utils.py
@@ -167,7 +167,7 @@ def apply_write(self) -> None:
 
         # Special handling for write_contents
         write_contents = async_tensor_h2d(
-            self._staged_write_contents, self.dtype, self.device, pin_memory=True
+            self._staged_write_contents, self.dtype, self.device
         )
 
         # Write diffs to the GPU buffer
diff --git a/vllm/v1/worker/gpu/cudagraph_utils.py b/vllm/v1/worker/gpu/cudagraph_utils.py
index d918131c68d4..c7a7ffe442d2 100644
--- a/vllm/v1/worker/gpu/cudagraph_utils.py
+++ b/vllm/v1/worker/gpu/cudagraph_utils.py
@@ -3,12 +3,13 @@
 from collections import defaultdict
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, NamedTuple
 
 import torch
 import torch.nn as nn
 from tqdm import tqdm
 
+from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig
 from vllm.config.compilation import CUDAGraphMode
 from vllm.distributed.parallel_state import (
@@ -32,6 +33,11 @@
 logger = init_logger(__name__)
 
 
+class CapturedAttentionState(NamedTuple):
+    attn_metadata: dict[str, Any] | None
+    slot_mappings: dict[str, torch.Tensor]
+
+
 @dataclass(frozen=True)
 class BatchExecutionDescriptor:
     """Describes the shape of the batch and CG mode to run; this is used to make shape
@@ -94,6 +100,7 @@ def __init__(
         self.decode_query_len = decode_query_len
 
         self.dp_size = vllm_config.parallel_config.data_parallel_size
+        self.tp_size = vllm_config.parallel_config.tensor_parallel_size
         self.is_first_pp_rank = get_pp_group().is_first_rank
         self.is_last_pp_rank = get_pp_group().is_last_rank
 
@@ -103,6 +110,10 @@ def __init__(
         self._graphs_captured = False
         self._candidates: list[list[BatchExecutionDescriptor]] = []
         self._capture_descs: dict[CUDAGraphMode, list[BatchExecutionDescriptor]] = {}
+        # adjust the cudagraph sizes to be a multiple of the uniform decode query length
+        self.compilation_config.adjust_cudagraph_sizes_for_spec_decode(
+            self.decode_query_len, self.tp_size
+        )
         self._init_candidates()
 
     def _init_candidates(self) -> None:
@@ -177,16 +188,20 @@ def needs_capture(self) -> bool:
     def capture(
         self,
         create_forward_fn: Callable[
-            [BatchExecutionDescriptor], Callable[[CUDAGraphMode], None]
+            [BatchExecutionDescriptor],
+            tuple[Callable[[CUDAGraphMode], None], CapturedAttentionState],
         ],
         progress_bar_desc: str = "Capturing CUDA graphs",
-    ) -> None:
+    ) -> dict[BatchExecutionDescriptor, CapturedAttentionState]:
         """Capture CUDA graphs.
 
         Args:
             create_forward_fn: Factory that prepares inputs (OUTSIDE graph) and
-                returns a function that runs forward with a given CUDAGraphMode.
+                returns a tuple of (forward_fn, captured_attn_state).
         """
+        captured_attn_states: dict[
+            BatchExecutionDescriptor, CapturedAttentionState
+        ] = {}
         with graph_capture(device=self.device):
             # Capture in order: PIECEWISE first, then FULL. PIECEWISE has larger
             # activations so FULL activations should fit in already allocated
@@ -200,7 +215,7 @@ def capture(
                     descs = tqdm(descs, desc=f"{progress_bar_desc} ({mode.name})")
                 for desc in descs:
                     # Prepare inputs and get forward function
-                    forward_fn = create_forward_fn(desc)
+                    forward_fn, attn_state = create_forward_fn(desc)
 
                     # Warmup
                     forward_fn(CUDAGraphMode.NONE)
@@ -210,8 +225,15 @@ def capture(
                         "CG Capture: mode=%s, batch_desc=%s", desc.cg_mode.name, desc
                     )
                     if desc.cg_mode == CUDAGraphMode.PIECEWISE:
+                        captured_attn_states[desc] = attn_state
                         forward_fn(CUDAGraphMode.PIECEWISE)
                     else:
+                        # Capture with fresh attention state. The warmup
+                        # attention state is discarded because some backends
+                        # (e.g. FlashMLA) perform lazy initializations that
+                        # must be captured in the graph.
+                        forward_fn, attn_state = create_forward_fn(desc)
+                        captured_attn_states[desc] = attn_state
                         assert desc not in self.graphs, (
                             f"Graph already captured for {desc}"
                         )
@@ -227,7 +249,9 @@ def capture(
                             # the next forward pass.
                             get_offloader().join_after_forward()
                         self.graphs[desc] = graph
+                        compilation_counter.num_cudagraph_captured += 1
         self._graphs_captured = True
+        return captured_attn_states
 
     def dispatch(
         self,
@@ -289,13 +313,16 @@ def capture(
         has_lora: bool = False,
         use_aux_hidden_state_outputs: bool = False,
         progress_bar_desc: str = "Capturing CUDA graphs",
-    ) -> None:
+    ) -> dict[BatchExecutionDescriptor, CapturedAttentionState]:
         """Capture CUDA graphs for model forward pass."""
         self.use_aux_hidden_state_outputs = use_aux_hidden_state_outputs
 
         def create_forward_fn(
             desc: BatchExecutionDescriptor,
-        ) -> Callable[[CUDAGraphMode], None]:
+        ) -> tuple[
+            Callable[[CUDAGraphMode], None],
+            CapturedAttentionState,
+        ]:
             num_tokens = desc.num_tokens
             num_reqs = desc.num_reqs or min(num_tokens, self.max_num_reqs)
             num_tokens_across_dp = (
@@ -324,16 +351,18 @@ def create_forward_fn(
                 block_tables,
                 attn_groups,
                 kv_cache_config,
+                skip_attn=(desc.cg_mode == CUDAGraphMode.PIECEWISE),
             )
 
             def forward_fn(cg_mode: CUDAGraphMode) -> None:
-                batch_descriptor = (
-                    BatchDescriptor(num_tokens=num_tokens)
-                    if cg_mode == CUDAGraphMode.PIECEWISE
-                    else None
-                )
+                batch_descriptor = None
+                if cg_mode == CUDAGraphMode.PIECEWISE:
+                    assert attn_metadata is None
+                    batch_descriptor = BatchDescriptor(
+                        num_tokens=num_tokens, has_lora=has_lora
+                    )
                 with set_forward_context(
-                    attn_metadata if cg_mode != CUDAGraphMode.PIECEWISE else None,
+                    attn_metadata,
                     self.vllm_config,
                     num_tokens=num_tokens,
                     cudagraph_runtime_mode=cg_mode,
@@ -375,9 +404,9 @@ def forward_fn(cg_mode: CUDAGraphMode) -> None:
                     for k, v in intermediate_tensors.tensors.items():
                         self.intermediate_tensors[k][:num_tokens] = v
 
-            return forward_fn
+            return forward_fn, CapturedAttentionState(attn_metadata, slot_mappings)
 
-        super().capture(create_forward_fn, progress_bar_desc)
+        return super().capture(create_forward_fn, progress_bar_desc)
 
     def run_fullgraph(
         self, desc: BatchExecutionDescriptor
@@ -403,7 +432,8 @@ def prepare_inputs_to_capture(
     block_tables: BlockTables,
     attn_groups: list[list[AttentionGroup]],
     kv_cache_config: KVCacheConfig,
-) -> tuple[dict[str, Any], dict[str, torch.Tensor]]:
+    skip_attn: bool = False,
+) -> CapturedAttentionState:
     input_batch = InputBatch.make_dummy(num_reqs, num_tokens, input_buffers)
     input_block_tables = block_tables.get_dummy_block_tables(num_reqs)
     slot_mappings = block_tables.get_dummy_slot_mappings(num_tokens)
@@ -423,13 +453,15 @@ def prepare_inputs_to_capture(
         )
         input_batch.dcp_local_seq_lens = input_buffers.dcp_local_seq_lens[:num_reqs]
 
-    attn_metadata = model_state.prepare_attn(
-        input_batch,
-        CUDAGraphMode.NONE,
-        input_block_tables,
-        slot_mappings,
-        attn_groups,
-        kv_cache_config,
-        for_capture=True,
-    )
-    return attn_metadata, slot_mappings_by_layer
+    attn_metadata = None
+    if not skip_attn:
+        attn_metadata = model_state.prepare_attn(
+            input_batch,
+            CUDAGraphMode.NONE,
+            input_block_tables,
+            slot_mappings,
+            attn_groups,
+            kv_cache_config,
+            for_capture=True,
+        )
+    return CapturedAttentionState(attn_metadata, slot_mappings_by_layer)
diff --git a/vllm/v1/worker/gpu/dp_utils.py b/vllm/v1/worker/gpu/dp_utils.py
index f0e2bfcf54b8..b3c172738c3a 100644
--- a/vllm/v1/worker/gpu/dp_utils.py
+++ b/vllm/v1/worker/gpu/dp_utils.py
@@ -13,14 +13,8 @@
 )
 
 
-def make_num_tokens_across_dp(dp_size: int, num_tokens: int) -> torch.Tensor | None:
-    if dp_size == 1:
-        return None
-    return torch.full((dp_size,), num_tokens, dtype=torch.int32, device="cpu")
-
-
 def sync_cudagraph_and_dp_padding(
-    cudagraph_manager: CudaGraphManager,
+    cudagraph_manager: CudaGraphManager | None,
     desired_batch_desc: BatchExecutionDescriptor,
     num_tokens: int,
     num_reqs: int,
@@ -61,6 +55,10 @@ def sync_cudagraph_and_dp_padding(
             num_reqs=num_reqs,
         ), num_tokens_across_dp
 
+    assert cudagraph_manager is not None, (
+        "cudagraph_manager should only be None during profile run, "
+        "where synced_cg_mode must be NONE across all DP ranks"
+    )
     synced_num_tokens = int(num_tokens_across_dp.max().item())
     synced_uniform_token_count = uniform_token_counts_across_dp[0]
     # If ranks disagree on the uniform token count, or its 0 (means None) set to None
@@ -79,3 +77,41 @@ def sync_cudagraph_and_dp_padding(
     num_tokens_across_dp[:] = synced_desc.num_tokens
 
     return synced_desc, num_tokens_across_dp
+
+
+def dispatch_cg_and_sync_dp(
+    cudagraph_manager: CudaGraphManager | None,
+    num_reqs: int,
+    num_tokens: int,
+    uniform_token_count: int | None,
+    dp_size: int,
+    dp_rank: int,
+    need_eager: bool = False,
+) -> tuple[BatchExecutionDescriptor, torch.Tensor | None]:
+    if need_eager:
+        batch_desc = BatchExecutionDescriptor(
+            cg_mode=CUDAGraphMode.NONE,
+            num_tokens=num_tokens,
+            num_reqs=num_reqs,
+        )
+    else:
+        assert cudagraph_manager is not None, (
+            "cudagraph_manager should only be None during profile run, "
+            "where need_eager must be True"
+        )
+        batch_desc = cudagraph_manager.dispatch(
+            num_reqs, num_tokens, uniform_token_count
+        )
+
+    if dp_size == 1:
+        return batch_desc, None
+
+    return sync_cudagraph_and_dp_padding(
+        cudagraph_manager,
+        batch_desc,
+        num_tokens,
+        num_reqs,
+        uniform_token_count,
+        dp_size,
+        dp_rank,
+    )
diff --git a/vllm/v1/worker/gpu/eplb_utils.py b/vllm/v1/worker/gpu/eplb_utils.py
index 61d70fafea33..8f04ce3577c5 100644
--- a/vllm/v1/worker/gpu/eplb_utils.py
+++ b/vllm/v1/worker/gpu/eplb_utils.py
@@ -10,11 +10,24 @@
 
 from vllm.distributed.eplb.eplb_state import EplbState
 from vllm.logger import init_logger
-from vllm.model_executor.models.interfaces import is_mixture_of_experts
+from vllm.model_executor.models.interfaces import (
+    SupportsMultiModal,
+    is_mixture_of_experts,
+)
 
 logger = init_logger(__name__)
 
 
+def _unwrap_moe(model: nn.Module) -> nn.Module:
+    # VLM wrappers (e.g. KimiK25ForConditionalGeneration) hold the MoE
+    # language model under `.language_model` but don't implement
+    # MixtureOfExperts themselves. Mirror the V1 path
+    # (see vllm/v1/worker/gpu_model_runner.py, PR #39805).
+    if not is_mixture_of_experts(model) and isinstance(model, SupportsMultiModal):
+        return model.get_language_model()
+    return model
+
+
 def step_eplb_after(*, is_dummy: bool = False) -> Callable:
     """Step EPLB after a model runner method completes successfully."""
 
@@ -89,12 +102,11 @@ def maybe_register_model(
         if not self.parallel_config.enable_eplb or load_dummy_weights:
             return False
 
+        model = _unwrap_moe(model)
         if not is_mixture_of_experts(model):
             return False
 
-        logger.info_once(
-            "EPLB is enabled for model %s.", model_config.model, scope="local"
-        )
+        logger.info_once("EPLB is enabled for model %s.", model_config.model)
         assert self.state is not None
         self.state.add_model(model, model_config)
         self._has_registered_models = True
@@ -130,6 +142,7 @@ def setup_from_mapping(
         expanded_physical_to_logical: torch.Tensor,
         old_num_physical_experts: int,
     ) -> None:
+        model = _unwrap_moe(model)
         assert is_mixture_of_experts(model)
 
         self.state = EplbState.from_mapping(
diff --git a/vllm/v1/worker/gpu/input_batch.py b/vllm/v1/worker/gpu/input_batch.py
index 24df137cb31e..b253d7d8c063 100644
--- a/vllm/v1/worker/gpu/input_batch.py
+++ b/vllm/v1/worker/gpu/input_batch.py
@@ -53,15 +53,22 @@ class InputBatch:
     # sum(num_scheduled_tokens)
     num_tokens: int
     num_tokens_after_padding: int
+    # Sum of draft tokens scheduled across requests.
     num_draft_tokens: int
+    # [num_reqs] number of draft tokens scheduled for each request, if any.
+    num_draft_tokens_per_req: np.ndarray | None
 
     # [num_reqs + 1]
     query_start_loc: torch.Tensor
     query_start_loc_np: np.ndarray
     # [num_reqs]
     seq_lens: torch.Tensor
+    # [num_reqs] CPU upper bound on seq_lens (see CommonAttentionMetadata).
+    seq_lens_cpu_upper_bound: torch.Tensor
     # [num_reqs]
     dcp_local_seq_lens: torch.Tensor | None
+    # [num_reqs] CPU bool array.
+    is_prefilling_np: np.ndarray
 
     # [num_tokens_after_padding]
     input_ids: torch.Tensor
@@ -121,6 +128,8 @@ def make_dummy(
         logits_indices = query_start_loc[1:] - 1
         cu_num_logits = torch.arange(num_reqs + 1, device=device, dtype=torch.int32)
         cu_num_logits_np = np.arange(num_reqs + 1, dtype=np.int32)
+        # Dummy: seq_len == query_len (fresh-prefill shape).
+        seq_lens_cpu_upper_bound = torch.from_numpy(num_scheduled_tokens.copy())
         return cls(
             req_ids=req_ids,
             num_reqs=num_reqs,
@@ -133,10 +142,13 @@ def make_dummy(
             num_tokens=num_tokens,
             num_tokens_after_padding=num_tokens,
             num_draft_tokens=0,
+            num_draft_tokens_per_req=None,
             query_start_loc=query_start_loc,
             query_start_loc_np=query_start_loc_np,
             seq_lens=seq_lens,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             dcp_local_seq_lens=None,
+            is_prefilling_np=np.zeros(num_reqs, dtype=np.bool_),
             input_ids=input_ids,
             positions=positions,
             logits_indices=logits_indices,
diff --git a/vllm/v1/worker/gpu/kv_connector.py b/vllm/v1/worker/gpu/kv_connector.py
index bcbeef1ae99e..3c29ea132da3 100644
--- a/vllm/v1/worker/gpu/kv_connector.py
+++ b/vllm/v1/worker/gpu/kv_connector.py
@@ -65,8 +65,8 @@ def pre_forward(self, scheduler_output: "SchedulerOutput") -> None:
 
         kv_connector_metadata = scheduler_output.kv_connector_metadata
         assert kv_connector_metadata is not None
-        self.kv_connector.bind_connector_metadata(kv_connector_metadata)
         self.kv_connector.handle_preemptions(kv_connector_metadata)
+        self.kv_connector.bind_connector_metadata(kv_connector_metadata)
 
         # TODO: sort out KV Connectors' use of forward_context
         if is_forward_context_available():
@@ -93,15 +93,14 @@ def post_forward(
         output.invalid_block_ids = self.kv_connector.get_block_ids_with_load_errors()
         output.kv_connector_stats = self.kv_connector.get_kv_connector_stats()
         output.kv_cache_events = self.kv_connector.get_kv_connector_kv_cache_events()
+        output.kv_connector_worker_meta = (
+            self.kv_connector.build_connector_worker_meta()
+        )
+
         if clear_metadata:
             self.kv_connector.clear_connector_metadata()
         return output
 
-    def clear_metadata(self) -> None:
-        """Clear the connector metadata. Call this after draft model runs."""
-        if not self._disabled:
-            self.kv_connector.clear_connector_metadata()
-
     def no_forward(self, scheduler_output: "SchedulerOutput") -> ModelRunnerOutput:
         if self._disabled:
             return EMPTY_MODEL_RUNNER_OUTPUT
diff --git a/vllm/v1/worker/gpu/mm/encoder_runner.py b/vllm/v1/worker/gpu/mm/encoder_runner.py
index fb2a21ce43e6..1000dbe05a80 100644
--- a/vllm/v1/worker/gpu/mm/encoder_runner.py
+++ b/vllm/v1/worker/gpu/mm/encoder_runner.py
@@ -83,7 +83,7 @@ def gather_mm_embeddings(
 
         mm_embeds: list[torch.Tensor] = []
         is_mm_embed = torch.zeros(
-            total_num_scheduled_tokens, dtype=torch.bool, device="cpu", pin_memory=True
+            total_num_scheduled_tokens, dtype=torch.bool, device="cpu"
         )
         for i, req_id in enumerate(req_ids):
             if not is_prefilling[i]:
@@ -131,8 +131,6 @@ def gather_mm_embeddings(
                 )
                 mm_embeds.append(mm_embeds_item)
 
-        # Copy the is_mm_embed tensor to the GPU.
-        is_mm_embed = is_mm_embed.to(device=self.device, non_blocking=True)
         return mm_embeds, is_mm_embed
 
     @torch.inference_mode()
diff --git a/vllm/v1/worker/gpu/model_runner.py b/vllm/v1/worker/gpu/model_runner.py
index a2f83c52e951..c0a95618c111 100644
--- a/vllm/v1/worker/gpu/model_runner.py
+++ b/vllm/v1/worker/gpu/model_runner.py
@@ -27,6 +27,7 @@
 import torch
 import torch.nn as nn
 
+from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig
 from vllm.config.compilation import CUDAGraphMode
 from vllm.distributed.parallel_state import (
@@ -36,14 +37,19 @@
 )
 from vllm.forward_context import BatchDescriptor, set_forward_context
 from vllm.logger import init_logger
+from vllm.lora.layers import LoRAMapping
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import (
+    initialize_mamba_ssu_backend,
+)
 from vllm.model_executor.model_loader import get_model_loader
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.sequence import IntermediateTensors
 from vllm.tasks import SupportedTask
+from vllm.utils.math_utils import cdiv
 from vllm.utils.mem_utils import DeviceMemoryProfiler, format_gib
 from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
-from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.kv_cache_interface import KVCacheConfig, MambaSpec
 from vllm.v1.outputs import DraftTokenIds, KVConnectorOutput, ModelRunnerOutput
 from vllm.v1.worker.cp_utils import check_attention_cp_compatibility
 from vllm.v1.worker.gpu.async_utils import AsyncOutput, AsyncPoolingOutput
@@ -61,7 +67,7 @@
     ModelCudaGraphManager,
     get_uniform_token_count,
 )
-from vllm.v1.worker.gpu.dp_utils import sync_cudagraph_and_dp_padding
+from vllm.v1.worker.gpu.dp_utils import dispatch_cg_and_sync_dp
 from vllm.v1.worker.gpu.eplb_utils import EPLBController, step_eplb_after
 from vllm.v1.worker.gpu.input_batch import (
     InputBatch,
@@ -87,6 +93,7 @@
 from vllm.v1.worker.gpu.sample.output import SamplerOutput
 from vllm.v1.worker.gpu.sample.prompt_logprob import PromptLogprobsWorker
 from vllm.v1.worker.gpu.sample.sampler import Sampler
+from vllm.v1.worker.gpu.shutdown import free_before_shutdown
 from vllm.v1.worker.gpu.spec_decode import init_speculator
 from vllm.v1.worker.gpu.spec_decode.eagle.eagle3_utils import (
     set_eagle3_aux_hidden_state_layers,
@@ -130,7 +137,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
 
         self.use_async_scheduling = self.scheduler_config.async_scheduling
         self.output_copy_stream = torch.cuda.Stream(self.device)
-        self.output_copy_event = torch.cuda.Event()
 
         # Pipeline parallelism.
         self.use_pp = self.parallel_config.pipeline_parallel_size > 1
@@ -177,6 +183,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
 
         # Draft tokens propagation - for spec-dec + struct outputs.
         self.draft_tokens_handler = DraftTokensHandler(self.device)
+        self.uniform_decode_query_len = 1 + self.num_speculative_steps
 
         # Pooling models.
         self.is_pooling_model = self.model_config.runner_type == "pooling"
@@ -212,11 +219,13 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
                 req_states=self.req_states,
                 logprobs_mode=self.model_config.logprobs_mode,
                 num_speculative_tokens=self.num_speculative_steps + 1,
+                use_fp64_gumbel=self.model_config.use_fp64_gumbel,
             )
             if self.speculative_config is not None:
                 self.rejection_sampler = RejectionSampler(
                     self.sampler,
                     self.speculative_config,
+                    self.device,
                 )
             self.prompt_logprobs_worker = PromptLogprobsWorker(self.max_num_reqs)
             self.structured_outputs_worker = StructuredOutputsWorker(
@@ -225,14 +234,9 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
                 device=self.device,
             )
 
-        # CUDA graphs.
+        # For CUDA graphs, and will init cudagraph_manager after init_attn_backend.
         self.decode_query_len = self.num_speculative_steps + 1
-        self.cudagraph_manager = ModelCudaGraphManager(
-            self.vllm_config,
-            self.device,
-            self.compilation_config.cudagraph_mode,
-            decode_query_len=self.decode_query_len,
-        )
+        self.cudagraph_manager: ModelCudaGraphManager | None = None
         # LoRA-related workers.
         self.lora_state = LoraState(max_num_reqs=self.max_num_reqs)
         # KV Connector if configured.
@@ -326,6 +330,25 @@ def load_model(self, load_dummy_weights: bool = False, *args, **kwargs) -> None:
     def get_model(self) -> nn.Module:
         return self.model
 
+    def reload_weights(self, *args, **kwargs) -> None:
+        # TODO(Wentao): Use full version instead of import when fully migrated to v2
+        from vllm.v1.worker.gpu_model_runner import GPUModelRunner as GPUModelRunnerV1
+
+        GPUModelRunnerV1.reload_weights(self, *args, **kwargs)  # type: ignore[arg-type]
+        self.reset_encoder_cache()
+        self.reset_mm_cache()
+
+    def update_config(self, *args, **kwargs) -> None:
+        # TODO(Wentao): Use full version instead of import when fully migrated to v2
+        from vllm.v1.worker.gpu_model_runner import GPUModelRunner as GPUModelRunnerV1
+
+        GPUModelRunnerV1.update_config(self, *args, **kwargs)  # type: ignore[arg-type]
+
+        # v2 reads config via self.vllm_config (e.g. in load_model), so keep it
+        # in sync with the attributes the v1 helper just replaced.
+        self.vllm_config.model_config = self.model_config
+        self.vllm_config.load_config = self.load_config
+
     @functools.cached_property
     def main_stream(self) -> torch.cuda.Stream:
         # Cache the default CUDA stream to avoid lookup overhead.
@@ -337,10 +360,6 @@ def get_kv_cache_spec(self):
     def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
         kv_cache_config = deepcopy(kv_cache_config)
         self.kv_cache_config = kv_cache_config
-        block_sizes = [
-            kv_cache_group.kv_cache_spec.block_size
-            for kv_cache_group in kv_cache_config.kv_cache_groups
-        ]
 
         block_table_max_model_len = self.max_model_len
         if self.is_encoder_decoder:
@@ -351,27 +370,69 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
                 getattr(self.model_config.hf_config, "max_source_positions", 0),
             )
 
+        block_sizes = []
+        max_num_blocks_per_group = []
+        for kv_cache_group in kv_cache_config.kv_cache_groups:
+            spec = kv_cache_group.kv_cache_spec
+            block_sizes.append(spec.block_size)
+            # When using DCP, each request's KV cache is sharded among different ranks.
+            # As a result, one block on the current rank covers `block_size * cp_size`
+            # tokens in the full, global (unsharded) sequence.
+            max_num_blocks = cdiv(
+                block_table_max_model_len, spec.block_size * self.dcp_size
+            )
+            # Align to a multiple of (128 / block_size) as required by some attention
+            # backends such as TRTLLM (#39324)
+            if spec.block_size <= 128:
+                alignment = 128 // spec.block_size
+                max_num_blocks = cdiv(max_num_blocks, alignment) * alignment
+            # For Mamba/Hybrid Model, KVCaches need extra blocks for speculative tokens
+            if isinstance(spec, MambaSpec):
+                max_num_blocks = (
+                    max_num_blocks if self.cache_config.enable_prefix_caching else 1
+                ) + spec.num_speculative_blocks
+            max_num_blocks_per_group.append(max_num_blocks)
+
+        (self.attn_backends, self.attn_groups, attn_cg_support, kernel_block_sizes) = (
+            init_attn_backend(self.kv_cache_config, self.vllm_config, self.device)
+        )
+
         self.block_tables = BlockTables(
             block_sizes=block_sizes,
             max_num_reqs=self.max_num_reqs,
             max_num_batched_tokens=self.max_num_tokens,
-            max_model_len=block_table_max_model_len,
+            max_num_blocks_per_group=max_num_blocks_per_group,
             device=self.device,
+            kernel_block_sizes=kernel_block_sizes,
             cp_size=self.dcp_size,
             cp_rank=self.dcp_rank,
             cp_interleave=self.cp_interleave,
         )
-
-        self.attn_backends, self.attn_groups = init_attn_backend(
-            self.kv_cache_config, self.vllm_config, self.device
+        initialize_mamba_ssu_backend(
+            self.vllm_config.mamba_config, self.kv_cache_config
         )
+        cudagraph_mode = self.compilation_config.resolve_cudagraph_mode_and_sizes(
+            attn_cg_support.min_cg_support,
+            attn_cg_support.min_cg_attn_backend,
+            self.uniform_decode_query_len,
+            self.parallel_config.tensor_parallel_size,
+            self.kv_cache_config,
+            self.max_num_reqs,
+        )
+        self.cudagraph_manager = ModelCudaGraphManager(
+            self.vllm_config,
+            self.device,
+            cudagraph_mode,
+            decode_query_len=self.decode_query_len,
+        )
+        if self.speculator is not None:
+            self.speculator.init_cudagraph_manager(cudagraph_mode)
+
         check_attention_cp_compatibility(self.vllm_config)
         if self.speculator is not None:
             # HACK(woosuk)
             self.speculator.set_attn(
-                self.model_state,
-                self.kv_cache_config,
-                self.block_tables,
+                self.model_state, self.kv_cache_config, self.block_tables
             )
 
         self.kv_caches: list[torch.Tensor] = []
@@ -382,6 +443,8 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
             self.attn_backends,
             self.device,
             self.cache_config.cache_dtype,
+            kernel_block_sizes,
+            self.vllm_config,
         )
         self.kv_connector = get_kv_connector(self.vllm_config, kv_caches_dict)
 
@@ -391,12 +454,17 @@ def _dummy_run(
         self,
         num_tokens: int,
         *args,
-        skip_attn: bool = True,
+        skip_attn: bool = False,
         uniform_decode: bool = False,
         skip_eplb: bool = False,
         is_profile: bool = False,
         **kwargs,
     ) -> tuple[torch.Tensor | None, torch.Tensor | None]:
+        if skip_attn and not is_profile:
+            raise ValueError(
+                "skip_attn must only be True for initial memory profiling."
+            )
+
         # Create a dummy scheduler output.
         num_reqs = min(num_tokens, self.max_num_reqs)
         if uniform_decode:
@@ -433,6 +501,7 @@ def _dummy_run(
             intermediate_tensors=intermediate_tensors,
             dummy_run=True,
             skip_attn_for_dummy_run=skip_attn,
+            is_profile=is_profile,
         )
         self.kv_connector.set_disabled(False)
 
@@ -446,7 +515,6 @@ def _dummy_run(
         slot_mappings_by_layer = self.execute_model_state.slot_mappings_by_layer
         hidden_states = self.execute_model_state.hidden_states
         aux_hidden_states = self.execute_model_state.aux_hidden_states
-        num_tokens_across_dp = self.execute_model_state.num_tokens_across_dp
         self.execute_model_state = None
 
         # dummy run the eagle speculator's propose to ensure DP/EP sync.
@@ -462,11 +530,20 @@ def _dummy_run(
                         device=self.device,
                     ),
                 )
+
+            # Let the target override the hidden state fed to the drafter
+            # (e.g. DeepSeek V4 MTP needs the pre-hc_head residual). The
+            # target returns a persistent buffer sized at max_num_batched_tokens;
+            # slice to the active token count that propose() expects.
+            spec_hidden_states = hidden_states
+            if hasattr(self.model, "get_mtp_target_hidden_states"):
+                pre_hc_hidden_states = self.model.get_mtp_target_hidden_states()
+                spec_hidden_states = pre_hc_hidden_states[: hidden_states.shape[0]]  # type: ignore[union-attr]
             self.speculator.propose(
                 input_batch=input_batch,
                 attn_metadata=attn_metadata,
                 slot_mappings=slot_mappings_by_layer,
-                last_hidden_states=hidden_states,
+                last_hidden_states=spec_hidden_states,
                 aux_hidden_states=aux_hidden_states,
                 num_sampled=torch.ones(
                     input_batch.num_reqs, dtype=torch.int32, device=self.device
@@ -478,10 +555,10 @@ def _dummy_run(
                 next_prefill_tokens=self.req_states.next_prefill_tokens,
                 temperature=self.sampler.sampling_states.temperature.gpu,
                 seeds=self.sampler.sampling_states.seeds.gpu,
-                num_tokens_across_dp=num_tokens_across_dp,
                 dummy_run=True,
                 skip_attn_for_dummy_run=skip_attn,
                 mm_inputs=mm_inputs,
+                is_profile=is_profile,
             )
 
         assert hidden_states is not None  # Last PP rank always has hidden_states
@@ -525,6 +602,9 @@ def profile_run(self) -> None:
         del hidden_states, sample_hidden_states
         gc.collect()
 
+    def post_kv_cache_wake_up(self) -> None:
+        self.block_tables.init_block_table_layout_tensors()
+
     def reset_mm_cache(self) -> None:
         if self.encoder_cache is not None:
             self.encoder_cache.reset_mm_cache()
@@ -543,6 +623,7 @@ def profile_cudagraph_memory(self) -> int:
 
     @torch.inference_mode()
     def capture_model(self) -> int:
+        assert self.cudagraph_manager is not None
         if not self.cudagraph_manager.needs_capture():
             logger.warning(
                 "Skipping CUDA graph capture. To turn on CUDA graph capture, "
@@ -550,13 +631,15 @@ def capture_model(self) -> int:
             )
             return 0
 
+        compilation_counter.num_gpu_runner_capture_triggers += 1
+
         start_time = time.perf_counter()
         gc.collect()
         torch.accelerator.empty_cache()
         start_free_gpu_memory = torch.cuda.mem_get_info()[0]
 
         with self.maybe_setup_dummy_loras(self.lora_config):
-            self.cudagraph_manager.capture(
+            captured_attn_states = self.cudagraph_manager.capture(
                 self.model,
                 self.model_state,
                 self.input_buffers,
@@ -568,7 +651,7 @@ def capture_model(self) -> int:
                 use_aux_hidden_state_outputs=self.use_aux_hidden_state_outputs,
             )
             if self.speculator is not None:
-                self.speculator.capture_model()
+                self.speculator.capture(captured_attn_states)
 
         end_time = time.perf_counter()
         end_free_gpu_memory = torch.cuda.mem_get_info()[0]
@@ -651,15 +734,26 @@ def add_requests(self, scheduler_output: SchedulerOutput) -> None:
             self.sampler.apply_staged_writes()
 
     def update_requests(self, scheduler_output: SchedulerOutput) -> None:
-        # Add new blocks for the existing requests.
+        # Add new blocks and update num_computed_tokens for the existing requests.
         reqs = scheduler_output.scheduled_cached_reqs
-        for req_new_block_ids, req_id in zip(reqs.new_block_ids, reqs.req_ids):
+        num_computed_tokens_np = self.req_states.num_computed_tokens_np
+        for req_id, num_computed_tokens, req_new_block_ids in zip(
+            reqs.req_ids, reqs.num_computed_tokens, reqs.new_block_ids
+        ):
+            req_index = self.req_states.req_id_to_index[req_id]
+            num_computed_tokens_np[req_index] = num_computed_tokens
             if req_new_block_ids is not None:
-                req_index = self.req_states.req_id_to_index[req_id]
                 self.block_tables.append_block_ids(
                     req_index, req_new_block_ids, overwrite=False
                 )
 
+        # Update num_computed_prefill_tokens.
+        np.minimum(
+            self.req_states.num_computed_tokens_np,
+            self.req_states.prefill_len.np,
+            out=self.req_states.num_computed_prefill_tokens,
+        )
+
     def prepare_inputs(
         self, scheduler_output: SchedulerOutput, batch_desc: BatchExecutionDescriptor
     ) -> InputBatch:
@@ -681,6 +775,7 @@ def prepare_inputs(
 
         # Get the number of draft tokens for each request.
         draft_tokens = scheduler_output.scheduled_spec_decode_tokens
+        num_draft_tokens_per_req = None
         if not draft_tokens:
             # No draft token scheduled (common case).
             total_num_draft_tokens = 0
@@ -694,15 +789,15 @@ def prepare_inputs(
                 num_reqs, dtype=torch.int32, device=self.device
             )
         else:
-            num_draft_tokens = np.fromiter(
+            num_draft_tokens_per_req = np.fromiter(
                 (len(draft_tokens.get(req_id, ())) for req_id in req_ids),
                 dtype=np.int32,
                 count=num_reqs,
             )
-            total_num_draft_tokens = int(num_draft_tokens.sum())
+            total_num_draft_tokens = int(num_draft_tokens_per_req.sum())
             total_num_logits = num_reqs + total_num_draft_tokens
 
-            num_logits = num_draft_tokens + 1
+            num_logits = num_draft_tokens_per_req + 1
             cu_num_logits_np = np.empty(num_reqs + 1, dtype=np.int32)
             cu_num_logits_np[0] = 0
             np.cumsum(num_logits, out=cu_num_logits_np[1:])
@@ -725,9 +820,10 @@ def prepare_inputs(
         async_copy_to_gpu(query_start_loc_np, out=self.input_buffers.query_start_loc)
         query_start_loc_np = query_start_loc_np[: num_reqs_padded + 1]
         query_start_loc = self.input_buffers.query_start_loc[: num_reqs_padded + 1]
+        is_prefilling_np = self.req_states.is_prefilling(idx_mapping_np)
 
         # Get prefill tokens if any.
-        if self.req_states.any_prefills(idx_mapping_np):
+        if np.any(is_prefilling_np):
             prepare_prefill_inputs(
                 self.input_buffers.input_ids,
                 self.req_states.next_prefill_tokens,
@@ -775,6 +871,14 @@ def prepare_inputs(
             total_num_logits,
         )
 
+        # CPU upper bound on seq_lens; padded entries left at zero.
+        seq_lens_cpu_upper_bound_np = np.zeros(num_reqs_padded, dtype=np.int32)
+        np.add(
+            self.req_states.num_computed_tokens_np[idx_mapping_np],
+            num_scheduled_tokens,
+            out=seq_lens_cpu_upper_bound_np[:num_reqs],
+        )
+        seq_lens_cpu_upper_bound = torch.from_numpy(seq_lens_cpu_upper_bound_np)
         return InputBatch(
             req_ids=req_ids,
             num_reqs=num_reqs,
@@ -787,10 +891,13 @@ def prepare_inputs(
             num_tokens=num_tokens,
             num_tokens_after_padding=num_tokens_after_padding,
             num_draft_tokens=total_num_draft_tokens,
+            num_draft_tokens_per_req=num_draft_tokens_per_req,
             query_start_loc=query_start_loc,
             query_start_loc_np=query_start_loc_np,
             seq_lens=seq_lens,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             dcp_local_seq_lens=dcp_local_seq_lens,
+            is_prefilling_np=is_prefilling_np,
             input_ids=self.input_buffers.input_ids[:num_tokens_after_padding],
             positions=self.input_buffers.positions[:num_tokens_after_padding],
             logits_indices=logits_indices,
@@ -896,13 +1003,7 @@ def postprocess(
             self.req_states.total_len.gpu,
         )
 
-        # Update the number of computed prefill tokens.
-        idx_mapping_np = input_batch.idx_mapping_np
-        computed_prefill = self.req_states.num_computed_prefill_tokens
-        computed_prefill[idx_mapping_np] += input_batch.num_scheduled_tokens
-        np.minimum(
-            computed_prefill, self.req_states.prefill_len.np, out=computed_prefill
-        )
+        self.model_state.postprocess_state(input_batch, num_sampled)
 
     @torch.inference_mode()
     def execute_model(
@@ -911,6 +1012,7 @@ def execute_model(
         intermediate_tensors: IntermediateTensors | None = None,
         dummy_run: bool = False,
         skip_attn_for_dummy_run: bool = False,
+        is_profile: bool = False,
     ) -> ModelRunnerOutput | IntermediateTensors | None:
         if not dummy_run:
             # Update the request states.
@@ -930,34 +1032,22 @@ def execute_model(
         max_query_len = max(scheduler_output.num_scheduled_tokens.values())
         uniform_tok_count = get_uniform_token_count(num_reqs, num_toks, max_query_len)
 
-        batch_desc = self.cudagraph_manager.dispatch(
-            num_reqs, num_toks, uniform_tok_count
-        )
-        num_tokens_across_dp = None
-
         skip_compiled = False
         if self.is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
             # Encoder-decoder models such as Whisper should run eager/non-compiled
             # when encoder inputs are scheduled, because this step updates
             # cross-attention cache with dynamic encoder outputs.
-            # Override batch_desc to NONE.
             skip_compiled = True
-            batch_desc = BatchExecutionDescriptor(
-                cg_mode=CUDAGraphMode.NONE,
-                num_tokens=num_toks,
-                num_reqs=num_reqs,
-            )
 
-        if self.dp_size > 1:
-            batch_desc, num_tokens_across_dp = sync_cudagraph_and_dp_padding(
-                self.cudagraph_manager,
-                batch_desc,
-                num_toks,
-                num_reqs,
-                uniform_tok_count,
-                self.dp_size,
-                self.dp_rank,
-            )
+        batch_desc, num_tokens_across_dp = dispatch_cg_and_sync_dp(
+            self.cudagraph_manager,
+            num_reqs,
+            num_toks,
+            uniform_tok_count,
+            self.dp_size,
+            self.dp_rank,
+            need_eager=is_profile or skip_compiled,
+        )
 
         if batch_desc.num_tokens == 0:
             # All DP ranks have zero tokens to run.
@@ -988,9 +1078,37 @@ def execute_model(
             if not skip_attn_for_dummy_run:
                 block_tables, slot_mappings = self.prepare_dummy_attn(input_batch)
             else:
+                assert batch_desc.cg_mode != CUDAGraphMode.FULL, (
+                    "Attention metadata must be prepared for dummy runs when using "
+                    "FULL cudagraph mode."
+                )
                 block_tables = None
                 slot_mappings = None
-            # FIXME(woosuk): Fix warmup for LoRA.
+            if self.lora_config:
+                # program a no-LoRA mapping here so kernels early-exit instead of
+                # reading uninitialized metadata during dummy runs.
+                # FIXME: Replace this with LoRA warmup:
+                # https://github.com/vllm-project/vllm/pull/35536
+                assert hasattr(self, "lora_manager")
+                adapter_manager = self.lora_manager._adapter_manager
+                adapter_manager.set_adapter_mapping(
+                    LoRAMapping(
+                        index_mapping=(0,) * input_batch.num_tokens_after_padding,
+                        prompt_mapping=(0,) * input_batch.num_reqs,
+                        is_prefill=True,
+                    )
+                )
+                seen_wrappers: set[int] = set()
+                for punica_wrapper in adapter_manager.punica_wrapper_mapping.values():
+                    if id(punica_wrapper) in seen_wrappers:
+                        continue
+                    seen_wrappers.add(id(punica_wrapper))
+                    for kernel_meta in (
+                        punica_wrapper.token_mapping_meta,  # type: ignore[attr-defined]
+                        punica_wrapper.prompt_mapping_meta,  # type: ignore[attr-defined]
+                    ):
+                        kernel_meta.no_lora_flag_cpu[0] = False
+                        kernel_meta.num_active_loras_cpu[0] = 1
 
         attn_metadata = None
         slot_mappings_by_layer = None
@@ -1051,6 +1169,7 @@ def execute_model(
             # Use explicit cudagraph replay for FULL mode.
             # NOTE(woosuk): Here, we don't need to pass the input tensors,
             # because they are already copied to the CUDA graph input buffers.
+            assert self.cudagraph_manager is not None
             self.kv_connector.pre_forward(scheduler_output)
             model_output = self.cudagraph_manager.run_fullgraph(batch_desc)
         else:
@@ -1096,7 +1215,6 @@ def execute_model(
             hidden_states=hidden_states,
             aux_hidden_states=aux_hidden_states,
             kv_connector_output=kv_connector_output,
-            num_tokens_across_dp=num_tokens_across_dp,
         )
 
         if not self.is_last_pp_rank:
@@ -1121,7 +1239,6 @@ def sample_tokens(
         hidden_states = self.execute_model_state.hidden_states
         aux_hidden_states = self.execute_model_state.aux_hidden_states
         kv_connector_output = self.execute_model_state.kv_connector_output
-        num_tokens_across_dp = self.execute_model_state.num_tokens_across_dp
         self.execute_model_state = None
 
         if not self.is_last_pp_rank:
@@ -1171,7 +1288,6 @@ def sample_tokens(
             num_sampled_tokens=num_sampled,
             main_stream=self.main_stream,
             copy_stream=self.output_copy_stream,
-            copy_event=self.output_copy_event,
         )
 
         mm_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None
@@ -1203,11 +1319,19 @@ def sample_tokens(
 
         if self.speculator is not None:
             assert self.sampler is not None
+            # Let the target override the hidden state fed to the drafter
+            # (e.g. DeepSeek V4 MTP needs the pre-hc_head residual). The
+            # target returns a persistent buffer sized at max_num_batched_tokens;
+            # slice to the active token count that propose() expects.
+            spec_hidden_states = hidden_states
+            if hasattr(self.model, "get_mtp_target_hidden_states"):
+                pre_hc_hidden_states = self.model.get_mtp_target_hidden_states()
+                spec_hidden_states = pre_hc_hidden_states[: hidden_states.shape[0]]  # type: ignore[union-attr]
             draft_tokens = self.speculator.propose(
                 input_batch,
                 attn_metadata,
                 slot_mappings_by_layer,
-                hidden_states,
+                spec_hidden_states,
                 aux_hidden_states,
                 num_sampled,
                 num_rejected,
@@ -1215,7 +1339,6 @@ def sample_tokens(
                 self.req_states.next_prefill_tokens,
                 self.sampler.sampling_states.temperature.gpu,
                 self.sampler.sampling_states.seeds.gpu,
-                num_tokens_across_dp=num_tokens_across_dp,
                 mm_inputs=mm_inputs,
             )
             self.req_states.draft_tokens[input_batch.idx_mapping] = draft_tokens
@@ -1261,7 +1384,6 @@ def pool(self) -> AsyncPoolingOutput | ModelRunnerOutput | None:
             is_valid=is_valid,
             main_stream=self.main_stream,
             copy_stream=self.output_copy_stream,
-            copy_event=self.output_copy_event,
         )
 
         self.postprocess_pool(input_batch)
@@ -1277,13 +1399,23 @@ def postprocess_pool(self, input_batch: InputBatch) -> None:
             input_batch.query_start_loc,
         )
 
-        # Update the number of computed prefill tokens.
-        idx_mapping_np = input_batch.idx_mapping_np
-        computed_prefill = self.req_states.num_computed_prefill_tokens
-        computed_prefill[idx_mapping_np] += input_batch.num_scheduled_tokens
-        np.minimum(
-            computed_prefill, self.req_states.prefill_len.np, out=computed_prefill
-        )
+    def shutdown(self) -> None:
+        """Release GPU tensors (model weights, KV caches, workspace) so that
+        memory is reclaimable when running in the same process."""
+        torch.accelerator.synchronize()
+        if hasattr(self, "kv_caches"):
+            self.kv_caches.clear()
+        if hasattr(self, "attn_groups"):
+            self.attn_groups.clear()
+        if hasattr(self, "kv_cache_config"):
+            del self.kv_cache_config
+        free_before_shutdown(self.vllm_config)
+        if hasattr(self, "model"):
+            del self.model
+
+        gc.collect()
+        torch.accelerator.empty_cache()
+        logger.debug("Cleaned up model weights, KV caches, and workspace")
 
     ########### EPLB methods start ###########
     @property
@@ -1324,4 +1456,3 @@ class ExecuteModelState(NamedTuple):
     hidden_states: torch.Tensor | None
     aux_hidden_states: list[torch.Tensor] | None
     kv_connector_output: KVConnectorOutput | None
-    num_tokens_across_dp: torch.Tensor | None
diff --git a/vllm/v1/worker/gpu/model_states/__init__.py b/vllm/v1/worker/gpu/model_states/__init__.py
index 651452553332..06b5a92c3952 100644
--- a/vllm/v1/worker/gpu/model_states/__init__.py
+++ b/vllm/v1/worker/gpu/model_states/__init__.py
@@ -18,6 +18,11 @@ def init_model_state(
 
         return WhisperModelState(vllm_config, model, encoder_cache, device)
 
+    if vllm_config.model_config.is_hybrid:
+        from vllm.v1.worker.gpu.model_states.mamba_hybrid import MambaHybridModelState
+
+        return MambaHybridModelState(vllm_config, model, encoder_cache, device)
+
     from vllm.v1.worker.gpu.model_states.default import DefaultModelState
 
     return DefaultModelState(vllm_config, model, encoder_cache, device)
diff --git a/vllm/v1/worker/gpu/model_states/default.py b/vllm/v1/worker/gpu/model_states/default.py
index 8e73867deb2e..7f7955a58ab3 100644
--- a/vllm/v1/worker/gpu/model_states/default.py
+++ b/vllm/v1/worker/gpu/model_states/default.py
@@ -173,6 +173,12 @@ def prepare_attn(
             num_tokens = input_batch.num_tokens
         query_start_loc_cpu = torch.from_numpy(input_batch.query_start_loc_np)
         max_query_len = input_batch.num_scheduled_tokens.max().item()
+        seq_lens_cpu_upper_bound = input_batch.seq_lens_cpu_upper_bound
+        if for_capture:
+            # Capture with worst-case max_seq_len so the graph is valid at any replay.
+            max_seq_len = self.max_model_len
+        else:
+            max_seq_len = int(seq_lens_cpu_upper_bound[:num_reqs].max().item())
         attn_metadata = build_attn_metadata(
             attn_groups=attn_groups,
             num_reqs=num_reqs,
@@ -181,10 +187,13 @@ def prepare_attn(
             query_start_loc_cpu=query_start_loc_cpu,
             max_query_len=max_query_len,
             seq_lens=input_batch.seq_lens,
-            max_seq_len=self.max_model_len,
+            max_seq_len=max_seq_len,
             block_tables=block_tables,
             slot_mappings=slot_mappings,
             kv_cache_config=kv_cache_config,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             dcp_local_seq_lens=input_batch.dcp_local_seq_lens,
+            positions=input_batch.positions,
+            for_cudagraph_capture=for_capture,
         )
         return attn_metadata
diff --git a/vllm/v1/worker/gpu/model_states/interface.py b/vllm/v1/worker/gpu/model_states/interface.py
index d83ab2fc515f..721e5c2013de 100644
--- a/vllm/v1/worker/gpu/model_states/interface.py
+++ b/vllm/v1/worker/gpu/model_states/interface.py
@@ -17,6 +17,24 @@
 from vllm.v1.worker.utils import AttentionGroup
 
 
+class ModelSpecificAttnMetadata:
+    """Base class for model-specific attention metadata."""
+
+    def get_extra_common_attn_kwargs(
+        self,
+        kv_cache_group_id: int,
+        num_reqs: int,
+    ) -> dict[str, Any]:
+        return {}
+
+    def get_extra_attn_kwargs(
+        self,
+        attn_metadata_builder: Any,
+        num_reqs: int,
+    ) -> dict[str, Any]:
+        return {}
+
+
 class ModelState(ABC):
     @abstractmethod
     def __init__(
@@ -38,6 +56,13 @@ def add_request(self, req_index: int, new_req_data: NewRequestData) -> None:
     def apply_staged_writes(self) -> None:
         return None
 
+    def postprocess_state(
+        self,
+        input_batch: InputBatch,
+        num_sampled: torch.Tensor,
+    ) -> None:
+        return None
+
     @abstractmethod
     def get_mm_embeddings(
         self,
diff --git a/vllm/v1/worker/gpu/model_states/mamba_hybrid.py b/vllm/v1/worker/gpu/model_states/mamba_hybrid.py
new file mode 100644
index 000000000000..93115fdf64dd
--- /dev/null
+++ b/vllm/v1/worker/gpu/model_states/mamba_hybrid.py
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from vllm.config import VllmConfig
+from vllm.config.compilation import CUDAGraphMode
+from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder
+from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadataBuilder
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.worker.gpu.attn_utils import build_attn_metadata
+from vllm.v1.worker.gpu.input_batch import InputBatch
+from vllm.v1.worker.gpu.mm.encoder_cache import EncoderCache
+from vllm.v1.worker.gpu.model_states.default import DefaultModelState
+from vllm.v1.worker.gpu.model_states.interface import ModelSpecificAttnMetadata
+from vllm.v1.worker.utils import AttentionGroup
+
+
+@dataclass
+class MambaHybridAttnMetadata(ModelSpecificAttnMetadata):
+    is_prefilling: torch.Tensor
+    num_accepted_tokens: torch.Tensor | None = None
+    num_decode_draft_tokens_cpu: torch.Tensor | None = None
+
+    def get_extra_common_attn_kwargs(
+        self,
+        kv_cache_group_id: int,
+        num_reqs: int,
+    ) -> dict[str, Any]:
+        return {"is_prefilling": self.is_prefilling[:num_reqs]}
+
+    def get_extra_attn_kwargs(
+        self,
+        attn_metadata_builder: Any,
+        num_reqs: int,
+    ) -> dict[str, Any]:
+        if not isinstance(
+            attn_metadata_builder,
+            (Mamba2AttentionMetadataBuilder, GDNAttentionMetadataBuilder),
+        ):
+            return {}
+        return {
+            "num_accepted_tokens": None
+            if self.num_accepted_tokens is None
+            else self.num_accepted_tokens[:num_reqs],
+            "num_decode_draft_tokens_cpu": None
+            if self.num_decode_draft_tokens_cpu is None
+            else self.num_decode_draft_tokens_cpu[:num_reqs],
+        }
+
+
+class MambaHybridModelState(DefaultModelState):
+    """Model state for hybrid attention + Mamba / linear-attention models."""
+
+    def __init__(
+        self,
+        vllm_config: VllmConfig,
+        model: nn.Module,
+        encoder_cache: EncoderCache | None,
+        device: torch.device,
+    ) -> None:
+        super().__init__(vllm_config, model, encoder_cache, device)
+        self.num_accepted_tokens_gpu = torch.ones(
+            self.max_num_reqs, dtype=torch.int32, device=self.device
+        )
+
+    def prepare_attn(
+        self,
+        input_batch: InputBatch,
+        cudagraph_mode: CUDAGraphMode,
+        block_tables: tuple[torch.Tensor, ...],
+        slot_mappings: torch.Tensor,
+        attn_groups: list[list[AttentionGroup]],
+        kv_cache_config: KVCacheConfig,
+        for_capture: bool = False,
+    ) -> dict[str, Any]:
+        if cudagraph_mode == CUDAGraphMode.FULL:
+            num_reqs = input_batch.num_reqs_after_padding
+            num_tokens = input_batch.num_tokens_after_padding
+        else:
+            num_reqs = input_batch.num_reqs
+            num_tokens = input_batch.num_tokens
+        query_start_loc_cpu = torch.from_numpy(input_batch.query_start_loc_np)
+        max_query_len = input_batch.num_scheduled_tokens.max().item()
+
+        is_prefilling = torch.zeros(num_reqs, dtype=torch.bool, device="cpu")
+        is_prefilling[: input_batch.num_reqs] = torch.from_numpy(
+            input_batch.is_prefilling_np
+        )
+        # During CUDAGraph capture, num_decode_draft_tokens_cpu and num_accepted_tokens
+        # are created by attn_metadata_builder.build_for_cudagraph_capture, so we only
+        # compute them during actual (non-capture) forward execution.
+        num_accepted_tokens = None
+        num_decode_draft_tokens_cpu = None
+        if not for_capture:
+            num_accepted_tokens = self.num_accepted_tokens_gpu.new_ones(num_reqs)
+            num_accepted_tokens[: input_batch.num_reqs] = self.num_accepted_tokens_gpu[
+                input_batch.idx_mapping
+            ]
+
+            # GDN uses >= 0 to select spec-decode rows, so non-decode rows
+            # need the -1 sentinel rather than a raw zero draft count.
+            num_decode_draft_tokens_np = np.full(num_reqs, -1, dtype=np.int32)
+            if input_batch.num_draft_tokens_per_req is not None:
+                spec_decode_mask = (
+                    input_batch.num_draft_tokens_per_req > 0
+                ) & ~input_batch.is_prefilling_np
+                num_decode_draft_tokens_np[: input_batch.num_reqs] = np.where(
+                    spec_decode_mask,
+                    input_batch.num_draft_tokens_per_req,
+                    -1,
+                )
+            num_decode_draft_tokens_cpu = torch.from_numpy(num_decode_draft_tokens_np)
+
+        mamba_attn_metadata = MambaHybridAttnMetadata(
+            is_prefilling=is_prefilling,
+            num_accepted_tokens=num_accepted_tokens,
+            num_decode_draft_tokens_cpu=num_decode_draft_tokens_cpu,
+        )
+        return build_attn_metadata(
+            attn_groups=attn_groups,
+            num_reqs=num_reqs,
+            num_tokens=num_tokens,
+            query_start_loc_gpu=input_batch.query_start_loc,
+            query_start_loc_cpu=query_start_loc_cpu,
+            max_query_len=max_query_len,
+            seq_lens=input_batch.seq_lens,
+            max_seq_len=self.max_model_len,
+            block_tables=block_tables,
+            slot_mappings=slot_mappings,
+            kv_cache_config=kv_cache_config,
+            dcp_local_seq_lens=input_batch.dcp_local_seq_lens,
+            model_specific_attn_metadata=mamba_attn_metadata,
+            for_cudagraph_capture=for_capture,
+        )
+
+    def postprocess_state(
+        self,
+        input_batch: InputBatch,
+        num_sampled: torch.Tensor,
+    ) -> None:
+        # Chunked prefill does not sample a token, so num_sampled can be 0.
+        # Mamba treats num_accepted_tokens=1 as the neutral non-spec value.
+        self.num_accepted_tokens_gpu[input_batch.idx_mapping] = torch.clamp(
+            num_sampled, min=1
+        )
diff --git a/vllm/v1/worker/gpu/model_states/whisper.py b/vllm/v1/worker/gpu/model_states/whisper.py
index 1268fee88210..0ef3cadc87ab 100644
--- a/vllm/v1/worker/gpu/model_states/whisper.py
+++ b/vllm/v1/worker/gpu/model_states/whisper.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
 from typing import Any
 
 import numpy as np
@@ -13,11 +14,33 @@
 from vllm.v1.worker.gpu.input_batch import InputBatch
 from vllm.v1.worker.gpu.mm.encoder_cache import EncoderCache
 from vllm.v1.worker.gpu.mm.encoder_runner import EncoderRunner
-from vllm.v1.worker.gpu.model_states.interface import ModelState
+from vllm.v1.worker.gpu.model_states.interface import (
+    ModelSpecificAttnMetadata,
+    ModelState,
+)
 from vllm.v1.worker.gpu.states import RequestState
 from vllm.v1.worker.utils import AttentionGroup
 
 
+@dataclass
+class WhisperAttnMetadata(ModelSpecificAttnMetadata):
+    encoder_seq_lens: dict[int, tuple[torch.Tensor, np.ndarray]]
+
+    def get_extra_common_attn_kwargs(
+        self,
+        kv_cache_group_id: int,
+        num_reqs: int,
+    ) -> dict[str, Any]:
+        encoder_seq_lens = self.encoder_seq_lens.get(kv_cache_group_id)
+        if encoder_seq_lens is None:
+            return {}
+        encoder_seq_lens_gpu, encoder_seq_lens_cpu = encoder_seq_lens
+        return {
+            "encoder_seq_lens": encoder_seq_lens_gpu[:num_reqs],
+            "encoder_seq_lens_cpu": encoder_seq_lens_cpu[:num_reqs],
+        }
+
+
 class WhisperModelState(ModelState):
     def __init__(
         self,
@@ -111,12 +134,17 @@ def prepare_attn(
         else:
             num_reqs = input_batch.num_reqs
             num_tokens = input_batch.num_tokens
-        encoder_seq_lens = self._get_encoder_seq_lens(
-            input_batch.req_ids, attn_groups, for_capture
+        whisper_attn_metadata = WhisperAttnMetadata(
+            self._get_encoder_seq_lens(input_batch.req_ids, attn_groups, for_capture)
         )
 
         query_start_loc_cpu = torch.from_numpy(input_batch.query_start_loc_np)
         max_query_len = input_batch.num_scheduled_tokens.max().item()
+        seq_lens_cpu_upper_bound = input_batch.seq_lens_cpu_upper_bound
+        if for_capture:
+            max_seq_len = self.max_model_len
+        else:
+            max_seq_len = int(seq_lens_cpu_upper_bound[:num_reqs].max().item())
         attn_metadata = build_attn_metadata(
             attn_groups=attn_groups,
             num_reqs=num_reqs,
@@ -125,12 +153,14 @@ def prepare_attn(
             query_start_loc_cpu=query_start_loc_cpu,
             max_query_len=max_query_len,
             seq_lens=input_batch.seq_lens,
-            max_seq_len=self.max_model_len,
+            max_seq_len=max_seq_len,
             block_tables=block_tables,
             slot_mappings=slot_mappings,
             kv_cache_config=kv_cache_config,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             dcp_local_seq_lens=input_batch.dcp_local_seq_lens,
-            encoder_seq_lens=encoder_seq_lens,
+            model_specific_attn_metadata=whisper_attn_metadata,
+            for_cudagraph_capture=for_capture,
         )
         return attn_metadata
 
diff --git a/vllm/v1/worker/gpu/pool/late_interaction_runner.py b/vllm/v1/worker/gpu/pool/late_interaction_runner.py
index 221dee558699..da87c8f05d6b 100644
--- a/vllm/v1/worker/gpu/pool/late_interaction_runner.py
+++ b/vllm/v1/worker/gpu/pool/late_interaction_runner.py
@@ -9,7 +9,7 @@
 from vllm.v1.pool.late_interaction import (
     LATE_INTERACTION_MODE_CACHE_QUERY,
     LATE_INTERACTION_MODE_SCORE_DOC,
-    compute_maxsim_scores,
+    compute_maxsim_score_batched,
 )
 
 
@@ -116,7 +116,7 @@ def postprocess_pooler_output(
             raise ValueError(f"Unsupported late-interaction mode: {mode!r}")
 
         if score_indices:
-            score_values = compute_maxsim_scores(score_queries, score_docs)
+            score_values = compute_maxsim_score_batched(score_queries, score_docs)
             for i, req_id, query_key, score in zip(
                 score_indices, score_req_ids, score_query_keys, score_values
             ):
diff --git a/vllm/v1/worker/gpu/sample/gumbel.py b/vllm/v1/worker/gpu/sample/gumbel.py
index 0d08ceb83bc0..aaa49283d32a 100644
--- a/vllm/v1/worker/gpu/sample/gumbel.py
+++ b/vllm/v1/worker/gpu/sample/gumbel.py
@@ -2,7 +2,18 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import torch
 
-from vllm.triton_utils import tl, triton
+from vllm.triton_utils import HAS_TRITON, tl, triton
+
+# Smallest positive normal fp32 value. Used to clamp the uniform draw so that
+# `log(u)` cannot produce -inf (and thus `-log(-log(u))` stays finite).
+#
+# Triton requires globals accessed from `@triton.jit` functions to be wrapped
+# in `tl.constexpr(...)`. We can only do that when Triton is actually
+# available — on the CPU worker path `tl` is a placeholder whose `constexpr`
+# attribute is `None`, and `tl.constexpr(...)` would crash at import time.
+_FP32_TINY = (
+    tl.constexpr(float.fromhex("0x1p-126")) if HAS_TRITON else float.fromhex("0x1p-126")
+)
 
 
 @triton.jit
@@ -65,36 +76,23 @@ def tl_rand64(seed, offset, includes_zero: tl.constexpr):
 
 
 @triton.jit
-def _gumbel_sample_kernel(
-    local_argmax_ptr,
-    local_argmax_stride,
-    local_max_ptr,
-    local_max_stride,
-    processed_logits_ptr,
-    processed_logits_stride,
-    logits_ptr,
-    logits_stride,
+def gumbel_block_argmax(
+    logits,
+    block,
+    mask,
+    token_idx,
     expanded_idx_mapping_ptr,
+    temp_ptr,
     seeds_ptr,
     pos_ptr,
-    temp_ptr,
+    processed_logits_ptr,
+    processed_logits_stride,
+    processed_logits_col_ptr,
     vocab_size,
-    BLOCK_SIZE: tl.constexpr,
     APPLY_TEMPERATURE: tl.constexpr,
+    USE_FP64: tl.constexpr,
 ):
-    token_idx = tl.program_id(0)
     req_state_idx = tl.load(expanded_idx_mapping_ptr + token_idx)
-
-    block_idx = tl.program_id(1)
-    block = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
-    mask = block < vocab_size
-    logits = tl.load(
-        logits_ptr + token_idx * logits_stride + block,
-        mask=mask,
-        other=float("-inf"),
-    )
-    logits = logits.to(tl.float32)
-
     temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
     if temp != 0.0 and APPLY_TEMPERATURE:
         # Apply temperature.
@@ -102,30 +100,92 @@ def _gumbel_sample_kernel(
         # E.g., if the kernel uses tl.div_rn, we should use tl.div_rn here too.
         logits = logits / temp
 
-    # Store the temperature-applied logits.
     if processed_logits_ptr is not None:
+        # Store the temperature-applied logits.
+        if processed_logits_col_ptr is not None:
+            col = tl.load(processed_logits_col_ptr)
+        else:
+            col = 0
         tl.store(
-            processed_logits_ptr + req_state_idx * processed_logits_stride + block,
+            processed_logits_ptr
+            + req_state_idx * processed_logits_stride
+            + col * vocab_size
+            + block,
             logits,
             mask=mask,
         )
 
-    logits = logits.to(tl.float64)
+    # fp32 is the default reduction dtype; fp64 is ~1/32–1/64x the throughput
+    # on H100/Ada/Blackwell and empirically indistinguishable for Gumbel-max.
+    if USE_FP64:
+        logits = logits.to(tl.float64)
     if temp != 0.0:
         # Calculate the seed for gumbel noise.
         seed = tl.load(seeds_ptr + req_state_idx)
         pos = tl.load(pos_ptr + token_idx)
         gumbel_seed = tl.randint(seed, pos)
 
-        # tl.rand returns fp32, so build a true fp64 uniform from 64 random
-        # bits before applying the double-log transform.
-        u = tl_rand64(gumbel_seed, block, includes_zero=False)
+        if USE_FP64:
+            u = tl_rand64(gumbel_seed, block, includes_zero=False)
+        else:
+            u = tl.rand(gumbel_seed, block)
+            u = tl.maximum(u, _FP32_TINY)
         gumbel_noise = -tl.log(-tl.log(u))
 
         # Apply gumbel noise.
         logits = tl.where(mask, logits + gumbel_noise, float("-inf"))
 
     value, idx = tl.max(logits, axis=0, return_indices=True)
+    return value, idx
+
+
+@triton.jit
+def _gumbel_sample_kernel(
+    local_argmax_ptr,
+    local_argmax_stride,
+    local_max_ptr,
+    local_max_stride,
+    processed_logits_ptr,
+    processed_logits_stride,
+    processed_logits_col_ptr,
+    logits_ptr,
+    logits_stride,
+    expanded_idx_mapping_ptr,
+    seeds_ptr,
+    pos_ptr,
+    temp_ptr,
+    vocab_size,
+    BLOCK_SIZE: tl.constexpr,
+    APPLY_TEMPERATURE: tl.constexpr,
+    USE_FP64: tl.constexpr,
+):
+    token_idx = tl.program_id(0)
+    block_idx = tl.program_id(1)
+    block = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = block < vocab_size
+    logits = tl.load(
+        logits_ptr + token_idx * logits_stride + block,
+        mask=mask,
+        other=float("-inf"),
+    )
+    logits = logits.to(tl.float32)
+
+    value, idx = gumbel_block_argmax(
+        logits,
+        block,
+        mask,
+        token_idx,
+        expanded_idx_mapping_ptr,
+        temp_ptr,
+        seeds_ptr,
+        pos_ptr,
+        processed_logits_ptr,
+        processed_logits_stride,
+        processed_logits_col_ptr,
+        vocab_size,
+        APPLY_TEMPERATURE=APPLY_TEMPERATURE,
+        USE_FP64=USE_FP64,
+    )
     token_id = block_idx * BLOCK_SIZE + idx
     tl.store(local_argmax_ptr + token_idx * local_argmax_stride + block_idx, token_id)
     tl.store(local_max_ptr + token_idx * local_max_stride + block_idx, value)
@@ -138,20 +198,24 @@ def gumbel_sample(
     seed: torch.Tensor,  # [max_num_reqs]
     pos: torch.Tensor,  # [num_tokens]
     apply_temperature: bool,
-    processed_logits_out: torch.Tensor | None = None,  # [num_reqs, vocab_size]
+    output_processed_logits: torch.Tensor | None = None,
+    output_processed_logits_col: torch.Tensor | None = None,
+    use_fp64: bool = False,
 ) -> torch.Tensor:
     num_tokens, vocab_size = logits.shape
     BLOCK_SIZE = 1024
     num_blocks = triton.cdiv(vocab_size, BLOCK_SIZE)
     local_argmax = logits.new_empty(num_tokens, num_blocks, dtype=torch.int64)
-    local_max = logits.new_empty(num_tokens, num_blocks, dtype=torch.float64)
+    local_max_dtype = torch.float64 if use_fp64 else torch.float32
+    local_max = logits.new_empty(num_tokens, num_blocks, dtype=local_max_dtype)
     _gumbel_sample_kernel[(num_tokens, num_blocks)](
         local_argmax,
         local_argmax.stride(0),
         local_max,
         local_max.stride(0),
-        processed_logits_out,
-        processed_logits_out.stride(0) if processed_logits_out is not None else 0,
+        output_processed_logits,
+        output_processed_logits.stride(0) if output_processed_logits is not None else 0,
+        output_processed_logits_col,
         logits,
         logits.stride(0),
         expanded_idx_mapping,
@@ -161,6 +225,7 @@ def gumbel_sample(
         vocab_size,
         BLOCK_SIZE=BLOCK_SIZE,
         APPLY_TEMPERATURE=apply_temperature,
+        USE_FP64=use_fp64,
     )
     # NOTE(woosuk): Use int64 for later indexing.
     max_block_idx = local_max.argmax(dim=-1, keepdim=True)
diff --git a/vllm/v1/worker/gpu/sample/logprob.py b/vllm/v1/worker/gpu/sample/logprob.py
index 4317cad9ce7f..cf24c186e93a 100644
--- a/vllm/v1/worker/gpu/sample/logprob.py
+++ b/vllm/v1/worker/gpu/sample/logprob.py
@@ -1,10 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import numpy as np
 import torch
 
+from vllm.sampling_params import MAX_LOGPROB_TOKEN_IDS, SamplingParams
 from vllm.triton_utils import tl, triton
 from vllm.v1.outputs import LogprobsTensors
+from vllm.v1.worker.gpu.buffer_utils import StagedWriteTensor, UvaBackedTensor
 
 
 @triton.jit
@@ -75,6 +78,9 @@ def _ranks_kernel(
 def compute_token_logprobs(
     logits: torch.Tensor, token_ids: torch.Tensor
 ) -> torch.Tensor:
+    # NOTE(woosuk): To save GPU memory, we do not materialize the full
+    # [batch_size, vocab_size] logprobs tensor. The kernel computes
+    # max + logsumexp per row and only emits logprobs at `token_ids`.
     batch_size, vocab_size = logits.shape
     token_ids = token_ids.to(torch.int64)
     num_logprobs = token_ids.shape[1]
@@ -97,18 +103,56 @@ def compute_topk_logprobs(
     num_logprobs: int,
     sampled_token_ids: torch.Tensor,
     cu_num_logits: list[int] | None = None,
+    logprob_token_ids_state: "LogprobTokenIdsState | None" = None,
+    expanded_idx_mapping: torch.Tensor | None = None,
+    max_per_req_token_ids: int = 0,
 ) -> LogprobsTensors:
     assert num_logprobs >= 0
     batch_size, vocab_size = logits.shape
-    logprob_token_ids = sampled_token_ids.unsqueeze(-1)
-    if num_logprobs > 0:
-        topk_indices = torch.topk(logits, num_logprobs, dim=-1).indices
-        logprob_token_ids = torch.cat((logprob_token_ids, topk_indices), dim=1)
-
-    # NOTE(woosuk): Here, to save GPU memory, we do not materialize the full
-    # logprobs tensor. Instead, we only compute and return the logprobs of
-    # the topk + 1 tokens.
-    logprobs = compute_token_logprobs(logits, logprob_token_ids)
+
+    if max_per_req_token_ids == 0:
+        # Fast path: no request asked for custom logprob_token_ids.
+        logprob_token_ids = sampled_token_ids.unsqueeze(-1)
+        if num_logprobs > 0:
+            topk_indices = torch.topk(logits, num_logprobs, dim=-1).indices
+            logprob_token_ids = torch.cat((logprob_token_ids, topk_indices), dim=1)
+        logprobs = compute_token_logprobs(logits, logprob_token_ids)
+    else:
+        # Some requests specified logprob_token_ids. Build the [batch_size,
+        # 1 + max_cols] token_ids matrix and validity mask on the GPU via a
+        # single triton kernel, overriding the topk columns with per-request
+        # tokens where applicable.
+        assert logprob_token_ids_state is not None
+        assert expanded_idx_mapping is not None
+
+        if num_logprobs > 0:
+            topk_token_ids = torch.topk(logits, num_logprobs, dim=-1).indices
+            topk_token_ids = topk_token_ids.to(torch.int32)
+        else:
+            # This tensor just used as an int32 pointer, data not accessed.
+            topk_token_ids = logprob_token_ids_state.token_ids.gpu
+
+        num_cols = max(num_logprobs, max_per_req_token_ids)
+        logprob_token_ids = sampled_token_ids.new_zeros((batch_size, 1 + num_cols))
+        valid_mask = torch.zeros_like(logprob_token_ids, dtype=torch.bool)
+        _fill_logprob_token_ids_kernel[(batch_size,)](
+            logprob_token_ids,
+            logprob_token_ids.stride(0),
+            valid_mask,
+            valid_mask.stride(0),
+            sampled_token_ids,
+            topk_token_ids,
+            topk_token_ids.stride(0),
+            expanded_idx_mapping,
+            logprob_token_ids_state.num_token_ids.gpu,
+            logprob_token_ids_state.token_ids.gpu,
+            logprob_token_ids_state.token_ids.gpu.stride(0),
+            NUM_TOPK=num_logprobs,
+            PADDED_COLS=triton.next_power_of_2(num_cols),
+        )
+        logprobs = compute_token_logprobs(logits, logprob_token_ids)
+        logprobs = logprobs.masked_fill(~valid_mask, float("-inf"))
+
     token_ranks = torch.empty(batch_size, dtype=torch.int64, device=logits.device)
     _ranks_kernel[(batch_size,)](
         token_ranks,
@@ -124,3 +168,85 @@ def compute_topk_logprobs(
         selected_token_ranks=token_ranks,
         cu_num_generated_tokens=cu_num_logits,
     )
+
+
+@triton.jit
+def _fill_logprob_token_ids_kernel(
+    # [batch_size, 1 + num_cols]
+    out_token_ids_ptr,
+    out_token_ids_stride,
+    # [batch_size, 1 + num_cols]
+    out_valid_mask_ptr,
+    out_valid_mask_stride,
+    sampled_token_ids_ptr,  # [batch_size]
+    topk_indices_ptr,  # [batch_size, NUM_TOPK] (unused when NUM_TOPK == 0)
+    topk_indices_stride,
+    expanded_idx_mapping_ptr,  # [batch_size] -> req_state_idx
+    num_per_req_token_ids_ptr,  # [max_num_reqs]
+    per_req_token_ids_ptr,  # [max_num_reqs, MAX_LOGPROB_TOKEN_IDS]
+    per_req_token_ids_stride,
+    NUM_TOPK: tl.constexpr,
+    PADDED_COLS: tl.constexpr,
+):
+    batch_idx = tl.program_id(0)
+
+    # Column 0: always the sampled token, always valid.
+    sampled = tl.load(sampled_token_ids_ptr + batch_idx)
+    tl.store(out_token_ids_ptr + batch_idx * out_token_ids_stride, sampled)
+    tl.store(out_valid_mask_ptr + batch_idx * out_valid_mask_stride, 1)
+
+    req_state_idx = tl.load(expanded_idx_mapping_ptr + batch_idx)
+    num_custom = tl.load(num_per_req_token_ids_ptr + req_state_idx)
+
+    col = tl.arange(0, PADDED_COLS)
+    tid_base = out_token_ids_ptr + batch_idx * out_token_ids_stride + 1
+    mask_base = out_valid_mask_ptr + batch_idx * out_valid_mask_stride + 1
+
+    if num_custom > 0:
+        # Override topk with per-request custom tokens.
+        src = per_req_token_ids_ptr + req_state_idx * per_req_token_ids_stride
+        valid = col < num_custom
+    else:
+        # Fill with topk indices (no-op when NUM_TOPK == 0).
+        src = topk_indices_ptr + batch_idx * topk_indices_stride
+        valid = col < NUM_TOPK
+
+    tokens = tl.load(src + col, mask=valid, other=0).to(tl.int64)
+    tl.store(tid_base + col, tokens, mask=valid)
+    tl.store(mask_base + col, tl.full([PADDED_COLS], 1, tl.int1), mask=valid)
+
+
+class LogprobTokenIdsState:
+    """Per-request override of which token ids' logprobs to return.
+
+    See `SamplingParams.logprob_token_ids`.
+    """
+
+    def __init__(self, max_num_reqs: int, device: torch.device):
+        self.max_num_reqs = max_num_reqs
+        self.num_token_ids = UvaBackedTensor(max_num_reqs, dtype=torch.int32)
+        self.token_ids = StagedWriteTensor(
+            (max_num_reqs, MAX_LOGPROB_TOKEN_IDS),
+            dtype=torch.int32,
+            device=device,
+        )
+
+    def add_request(self, req_idx: int, sampling_params: SamplingParams) -> None:
+        token_ids = sampling_params.logprob_token_ids
+        if not token_ids:
+            self.num_token_ids.np[req_idx] = 0
+            return
+        n = len(token_ids)
+        if n > MAX_LOGPROB_TOKEN_IDS:
+            raise ValueError(
+                f"Too many logprob_token_ids: {n}. The max is {MAX_LOGPROB_TOKEN_IDS}."
+            )
+        self.num_token_ids.np[req_idx] = n
+        self.token_ids.stage_write(req_idx, 0, token_ids)
+
+    def apply_staged_writes(self) -> None:
+        self.num_token_ids.copy_to_uva()
+        self.token_ids.apply_write()
+
+    def max_num_token_ids(self, idx_mapping_np: np.ndarray) -> int:
+        return int(self.num_token_ids.np[idx_mapping_np].max(initial=0))
diff --git a/vllm/v1/worker/gpu/sample/penalties.py b/vllm/v1/worker/gpu/sample/penalties.py
index 04adf9369233..b2ce2fb812a1 100644
--- a/vllm/v1/worker/gpu/sample/penalties.py
+++ b/vllm/v1/worker/gpu/sample/penalties.py
@@ -58,8 +58,7 @@ def apply_staged_writes(self) -> None:
             idx_mapping = async_tensor_h2d(
                 self._new_penalties_reqs,
                 dtype=torch.int32,
-                target_device=self.device,
-                pin_memory=True,
+                device=self.device,
             )
 
             prefill_lens = self.req_states.prefill_len.np[self._new_penalties_reqs]
@@ -86,7 +85,6 @@ def apply_penalties(
         idx_mapping_np: np.ndarray,
         input_ids: torch.Tensor,
         expanded_local_pos: torch.Tensor,
-        num_speculative_tokens: int,
     ) -> None:
         if not np.any(self.use_penalty[idx_mapping_np]):
             # No request uses penalties. Skip the kernel launch.
@@ -102,7 +100,6 @@ def apply_penalties(
             self.presence_penalty.gpu,
             self.prompt_bin_mask,
             self.output_bin_counts,
-            num_speculative_tokens,
         )
 
 
@@ -122,7 +119,6 @@ def _penalties_kernel(
     output_bin_counts_stride,
     vocab_size,
     BLOCK_SIZE: tl.constexpr,
-    MAX_SPEC_LEN: tl.constexpr,
 ):
     token_idx = tl.program_id(0)
     req_state_idx = tl.load(expanded_idx_mapping_ptr + token_idx)
@@ -150,18 +146,16 @@ def _penalties_kernel(
         other=0,
     )
 
-    # Compute cumulative draft_counts from previous positions in this request
+    # Accumulate draft token counts from previous positions directly into
+    # output_bin_counts (preserves its native tensor layout, avoiding an
+    # expensive shared-memory layout conversion after the loop).
     pos = tl.load(expanded_local_pos_ptr + token_idx)
     start_idx = token_idx - pos
-    draft_counts = tl.zeros((BLOCK_SIZE,), dtype=tl.int32)
-    for prev_pos in tl.static_range(MAX_SPEC_LEN):
-        if prev_pos < pos:
-            prev_token = tl.load(token_ids_ptr + start_idx + prev_pos + 1)
-            token_match = block == prev_token
-            draft_counts = draft_counts + token_match.to(tl.int32)
-
-    # Total counts = base output counts + cumulative draft counts
-    output_bin_counts = base_output_counts + draft_counts
+    output_bin_counts = base_output_counts
+    for prev_pos in tl.range(pos):
+        prev_token = tl.load(token_ids_ptr + start_idx + prev_pos + 1)
+        token_match = block == prev_token
+        output_bin_counts = output_bin_counts + token_match.to(tl.int32)
     output_bin_mask = output_bin_counts > 0
 
     # Apply repetition penalties.
@@ -199,7 +193,6 @@ def apply_penalties(
     presence_penalty: torch.Tensor,
     prompt_bin_mask: torch.Tensor,
     output_bin_counts: torch.Tensor,
-    num_speculative_tokens: int,
 ) -> None:
     num_tokens, vocab_size = logits.shape
     BLOCK_SIZE = 8192
@@ -219,7 +212,6 @@ def apply_penalties(
         output_bin_counts.stride(0),
         vocab_size,
         BLOCK_SIZE=BLOCK_SIZE,
-        MAX_SPEC_LEN=num_speculative_tokens,
     )
 
 
@@ -284,8 +276,10 @@ def bincount(
     output_bin_counts: torch.Tensor,
     max_prefill_len: int,
 ) -> None:
-    prompt_bin_mask[expanded_idx_mapping] = 0
-    output_bin_counts[expanded_idx_mapping] = 0
+    # Use index_fill_ instead of `tensor[idx] = 0` to avoid sync.
+    idx_long = expanded_idx_mapping.long()
+    prompt_bin_mask.index_fill_(0, idx_long, 0)
+    output_bin_counts.index_fill_(0, idx_long, 0)
     num_tokens = expanded_idx_mapping.shape[0]
     BLOCK_SIZE = 1024
     num_blocks = triton.cdiv(max_prefill_len, BLOCK_SIZE)
diff --git a/vllm/v1/worker/gpu/sample/prompt_logprob.py b/vllm/v1/worker/gpu/sample/prompt_logprob.py
index 1915a0539790..71feb7cf0e91 100644
--- a/vllm/v1/worker/gpu/sample/prompt_logprob.py
+++ b/vllm/v1/worker/gpu/sample/prompt_logprob.py
@@ -17,13 +17,14 @@ def __init__(self, max_num_reqs: int):
         self.max_num_reqs = max_num_reqs
 
         self.uses_prompt_logprobs = np.zeros(self.max_num_reqs, dtype=bool)
+        self.num_prompt_logprobs = np.zeros(self.max_num_reqs, dtype=np.int32)
         # req_idx -> list of in-progress LogprobsTensors
         self.in_progress_prompt_logprobs: dict[str, list[LogprobsTensors]] = {}
 
     def add_request(self, req_id: str, req_idx: int, sampling_params: SamplingParams):
-        # For now, only support prompt logprobs for the prompt tokens (not top-k).
         uses_prompt_logprobs = sampling_params.prompt_logprobs is not None
         self.uses_prompt_logprobs[req_idx] = uses_prompt_logprobs
+        self.num_prompt_logprobs[req_idx] = sampling_params.prompt_logprobs or 0
         if uses_prompt_logprobs:
             self.in_progress_prompt_logprobs[req_id] = []
 
@@ -52,11 +53,10 @@ def compute_prompt_logprobs(
             # Common case: No request asks for prompt logprobs.
             return {}
 
+        num_prompt_logprobs = self.num_prompt_logprobs[idx_mapping_np]
         prompt_lens = prompt_lens[idx_mapping_np]
-        # NOTE(woosuk): -1 because the last prompt token's hidden state is not
-        # needed for prompt logprobs.
         computed_prefill = num_computed_prefill_tokens[idx_mapping_np]
-        includes_prompt = computed_prefill < prompt_lens - 1
+        includes_prompt = computed_prefill < prompt_lens
         # NOTE(woosuk): If the request was resumed after preemption, its prompt
         # logprobs must have been computed before preemption. Skip.
         resumed_after_prompt = prompt_lens < prefill_lens[idx_mapping_np]
@@ -64,6 +64,14 @@ def compute_prompt_logprobs(
         if not np.any(needs_prompt_logprobs):
             return {}
 
+        # get the maximum number in this batch
+        requested_num_prompt_logprobs = num_prompt_logprobs[needs_prompt_logprobs]
+        max_num_prompt_logprobs = (
+            -1
+            if np.any(requested_num_prompt_logprobs == -1)
+            else int(requested_num_prompt_logprobs.max())
+        )
+
         # Get the prompt logprobs token_ids.
         prompt_logprobs_token_ids = get_prompt_logprobs_token_ids(
             input_batch.num_tokens,
@@ -72,45 +80,59 @@ def compute_prompt_logprobs(
             num_computed_tokens,
             all_token_ids,
         )
-        # Compute the prompt logprobs.
-        prompt_logprobs, prompt_ranks = compute_prompt_logprobs_with_chunking(
-            prompt_logprobs_token_ids,
-            hidden_states[: input_batch.num_tokens],
-            logits_fn,
+        prompt_token_ids, prompt_logprobs, prompt_ranks = (
+            compute_prompt_logprobs_with_chunking(
+                prompt_logprobs_token_ids,
+                hidden_states[: input_batch.num_tokens],
+                logits_fn,
+                max_num_prompt_logprobs,
+            )
         )
 
         pos_after_step = computed_prefill + input_batch.num_scheduled_tokens
         is_prompt_chunked = pos_after_step < prompt_lens
 
         query_start_loc_np = input_batch.query_start_loc_np
-        prompt_token_ids = prompt_logprobs_token_ids.unsqueeze(-1)
         prompt_logprobs_dict: dict[str, LogprobsTensors] = {}
         for i, req_id in enumerate(input_batch.req_ids):
             if not needs_prompt_logprobs[i]:
                 continue
 
+            req_is_prompt_chunked = is_prompt_chunked[i]
+            req_num_prompt_logprobs = int(num_prompt_logprobs[i])
             start_idx = query_start_loc_np[i]
             end_idx = query_start_loc_np[i + 1]
             assert start_idx < end_idx, (
                 f"start_idx ({start_idx}) >= end_idx ({end_idx})"
             )
-            if not is_prompt_chunked[i]:
+            if not req_is_prompt_chunked:
                 end_idx -= 1
-            logprobs = LogprobsTensors(
-                logprob_token_ids=prompt_token_ids[start_idx:end_idx],
-                logprobs=prompt_logprobs[start_idx:end_idx],
-                selected_token_ranks=prompt_ranks[start_idx:end_idx],
+
+            width = (
+                prompt_logprobs.shape[1]
+                if req_num_prompt_logprobs == -1
+                else req_num_prompt_logprobs + 1
+            )
+            # no logprobs if start_idx >= end_idx
+            logprobs = (
+                None
+                if start_idx >= end_idx
+                else LogprobsTensors(
+                    logprob_token_ids=prompt_token_ids[start_idx:end_idx, :width],
+                    logprobs=prompt_logprobs[start_idx:end_idx, :width],
+                    selected_token_ranks=prompt_ranks[start_idx:end_idx],
+                )
             )
 
             prompt_logprobs_list = self.in_progress_prompt_logprobs[req_id]
-            if is_prompt_chunked[i]:
-                # Prompt is chunked. Do not return the logprobs yet.
+            if logprobs is not None and (req_is_prompt_chunked or prompt_logprobs_list):
                 prompt_logprobs_list.append(logprobs)
+            if req_is_prompt_chunked:
+                # Prompt is chunked. Do not return the logprobs yet.
                 continue
 
             if prompt_logprobs_list:
                 # Merge the in-progress logprobs.
-                prompt_logprobs_list.append(logprobs)
                 logprobs = LogprobsTensors(
                     logprob_token_ids=torch.cat(
                         [x.logprob_token_ids for x in prompt_logprobs_list]
@@ -122,6 +144,9 @@ def compute_prompt_logprobs(
                 )
                 prompt_logprobs_list.clear()
 
+            if logprobs is None:
+                continue
+
             prompt_logprobs_dict[req_id] = logprobs
         return prompt_logprobs_dict
 
@@ -184,10 +209,12 @@ def compute_prompt_logprobs_with_chunking(
     prompt_token_ids: torch.Tensor,
     prompt_hidden_states: torch.Tensor,
     logits_fn: Callable[[torch.Tensor], torch.Tensor],
-) -> tuple[torch.Tensor, torch.Tensor]:
+    num_prompt_logprobs: int,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     # Since materializing the full prompt logits can take too much memory,
     # we compute it in chunks.
     CHUNK_SIZE = 1024
+    token_ids = []
     logprobs = []
     ranks = []
     prompt_token_ids = prompt_token_ids.to(torch.int64)
@@ -195,14 +222,21 @@ def compute_prompt_logprobs_with_chunking(
         end_idx = start_idx + CHUNK_SIZE
         # NOTE(woosuk): logits_fn can be slow because it involves all-gather.
         prompt_logits = logits_fn(prompt_hidden_states[start_idx:end_idx])
+        requested_num_prompt_logprobs = (
+            prompt_logits.shape[-1]
+            if num_prompt_logprobs == -1
+            else num_prompt_logprobs
+        )
         prompt_logprobs = compute_topk_logprobs(
             prompt_logits,
-            0,  # num_logprobs
+            requested_num_prompt_logprobs,
             prompt_token_ids[start_idx:end_idx],
         )
+        token_ids.append(prompt_logprobs.logprob_token_ids)
         logprobs.append(prompt_logprobs.logprobs)
         ranks.append(prompt_logprobs.selected_token_ranks)
 
+    token_ids = torch.cat(token_ids, dim=0) if len(token_ids) > 1 else token_ids[0]
     logprobs = torch.cat(logprobs, dim=0) if len(logprobs) > 1 else logprobs[0]
     ranks = torch.cat(ranks, dim=0) if len(ranks) > 1 else ranks[0]
-    return logprobs, ranks
+    return token_ids, logprobs, ranks
diff --git a/vllm/v1/worker/gpu/sample/sampler.py b/vllm/v1/worker/gpu/sample/sampler.py
index 6f73ca87ac67..8bf884fd9b32 100644
--- a/vllm/v1/worker/gpu/sample/sampler.py
+++ b/vllm/v1/worker/gpu/sample/sampler.py
@@ -12,7 +12,10 @@
 from vllm.v1.worker.gpu.sample.bad_words import BadWordsState
 from vllm.v1.worker.gpu.sample.gumbel import gumbel_sample
 from vllm.v1.worker.gpu.sample.logit_bias import LogitBiasState
-from vllm.v1.worker.gpu.sample.logprob import compute_topk_logprobs
+from vllm.v1.worker.gpu.sample.logprob import (
+    LogprobTokenIdsState,
+    compute_topk_logprobs,
+)
 from vllm.v1.worker.gpu.sample.output import SamplerOutput
 from vllm.v1.worker.gpu.sample.penalties import PenaltiesState
 from vllm.v1.worker.gpu.sample.states import NO_LOGPROBS, SamplingStates
@@ -28,16 +31,19 @@ def __init__(
         req_states: RequestState,
         logprobs_mode: LogprobsMode = "raw_logprobs",
         num_speculative_tokens: int = 1,
+        use_fp64_gumbel: bool = False,
     ):
         if logprobs_mode not in ("processed_logprobs", "raw_logprobs"):
             raise NotImplementedError(f"Unsupported logprobs_mode: {logprobs_mode}")
         self.logprobs_mode = logprobs_mode
         self.compute_nans = envs.VLLM_COMPUTE_NANS_IN_LOGITS  # False by default.
+        self.use_fp64_gumbel = use_fp64_gumbel
 
         self.sampling_states = SamplingStates(max_num_reqs, vocab_size)
         self.penalties_state = PenaltiesState(req_states)
         self.logit_bias_state = LogitBiasState(max_num_reqs, device)
         self.bad_words_state = BadWordsState(req_states)
+        self.logprob_token_ids_state = LogprobTokenIdsState(max_num_reqs, device)
         self.num_speculative_tokens = num_speculative_tokens
 
     def add_request(
@@ -47,12 +53,14 @@ def add_request(
         self.penalties_state.add_request(req_idx, sampling_params)
         self.logit_bias_state.add_request(req_idx, prompt_len, sampling_params)
         self.bad_words_state.add_request(req_idx, sampling_params)
+        self.logprob_token_ids_state.add_request(req_idx, sampling_params)
 
     def apply_staged_writes(self) -> None:
         self.sampling_states.apply_staged_writes()
         self.penalties_state.apply_staged_writes()
         self.logit_bias_state.apply_staged_writes()
         self.bad_words_state.apply_staged_writes()
+        self.logprob_token_ids_state.apply_staged_writes()
 
     def __call__(
         self,
@@ -79,13 +87,23 @@ def __call__(
         )
 
         max_num_logprobs = self.sampling_states.max_num_logprobs(idx_mapping_np)
-        if max_num_logprobs != NO_LOGPROBS:
+        max_per_req_token_ids = self.logprob_token_ids_state.max_num_token_ids(
+            idx_mapping_np
+        )
+        if max_num_logprobs != NO_LOGPROBS or max_per_req_token_ids > 0:
             if self.logprobs_mode == "processed_logprobs":
                 logits = processed_logits
             expanded_logits = logits.shape[0] != idx_mapping_np.shape[0]
             cu_num_logits = cu_num_logits_np.tolist() if expanded_logits else None
+            num_logprobs = max_num_logprobs if max_num_logprobs != NO_LOGPROBS else 0
             logprobs_tensors = compute_topk_logprobs(
-                logits, max_num_logprobs, sampled, cu_num_logits
+                logits,
+                num_logprobs,
+                sampled,
+                cu_num_logits,
+                logprob_token_ids_state=self.logprob_token_ids_state,
+                expanded_idx_mapping=input_batch.expanded_idx_mapping,
+                max_per_req_token_ids=max_per_req_token_ids,
             )
         else:
             logprobs_tensors = None
@@ -126,7 +144,6 @@ def apply_sampling_params(
             idx_mapping_np,
             input_ids,
             expanded_local_pos,
-            self.num_speculative_tokens,
         )
 
         # Apply bad words masking in place.
@@ -177,5 +194,6 @@ def sample(
             self.sampling_states.seeds.gpu,
             pos,
             apply_temperature=False,
+            use_fp64=self.use_fp64_gumbel,
         )
         return sampled, processed_logits
diff --git a/vllm/v1/worker/gpu/shutdown.py b/vllm/v1/worker/gpu/shutdown.py
new file mode 100644
index 000000000000..830083962347
--- /dev/null
+++ b/vllm/v1/worker/gpu/shutdown.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def free_before_shutdown(vllm_config: VllmConfig) -> None:
+    from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT
+    from vllm.v1.worker.workspace import reset_workspace_manager
+
+    cache_config = vllm_config.cache_config
+    cache_config.num_gpu_blocks = None
+
+    compilation_config = vllm_config.compilation_config
+    compilation_config.static_forward_context.clear()
+
+    _ROPE_DICT.clear()
+    reset_workspace_manager()
diff --git a/vllm/v1/worker/gpu/spec_decode/eagle/cudagraph.py b/vllm/v1/worker/gpu/spec_decode/eagle/cudagraph.py
index 1e75c48966b2..43bece01d0ec 100644
--- a/vllm/v1/worker/gpu/spec_decode/eagle/cudagraph.py
+++ b/vllm/v1/worker/gpu/spec_decode/eagle/cudagraph.py
@@ -10,6 +10,7 @@
 from vllm.v1.worker.gpu.block_table import BlockTables
 from vllm.v1.worker.gpu.cudagraph_utils import (
     BatchExecutionDescriptor,
+    CapturedAttentionState,
     CudaGraphManager,
     prepare_inputs_to_capture,
 )
@@ -18,22 +19,17 @@
 from vllm.v1.worker.utils import AttentionGroup
 
 
-class EagleCudaGraphManager(CudaGraphManager):
-    """CudaGraphManager for Eagle speculative decoding (FULL mode only)."""
+class EagleCudaGraphManagerBase(CudaGraphManager):
+    """Base CudaGraphManager for Eagle with a dedicated graph pool."""
 
     def __init__(
         self,
         vllm_config: VllmConfig,
         device: torch.device,
         cudagraph_mode: CUDAGraphMode,
-        draft_tokens: torch.Tensor,
+        decode_query_len: int,
     ):
-        assert not cudagraph_mode.has_mode(CUDAGraphMode.PIECEWISE), (
-            "EagleCudaGraphManager does not support PIECEWISE mode yet"
-        )
-        # Eagle always uses uniform decode with query_len=1
-        super().__init__(vllm_config, device, cudagraph_mode, decode_query_len=1)
-        self.draft_tokens = draft_tokens
+        super().__init__(vllm_config, device, cudagraph_mode, decode_query_len)
 
         # Use a dedicated pool for Eagle to avoid memory overlap with the main
         # model's cudagraph. The base class uses a shared global pool, but Eagle's
@@ -42,9 +38,49 @@ def __init__(
         if cudagraph_mode:
             self.pool = torch.cuda.graph_pool_handle()
 
+
+class PrefillEagleCudaGraphManager(EagleCudaGraphManagerBase):
+    """Eagle CudaGraphManager for prefill, using pre-built attention states
+    from the target model's capture."""
+
     def capture(
         self,
-        generate_fn: Callable,
+        forward_fn: Callable,
+        full_cg_attn_states: dict[BatchExecutionDescriptor, CapturedAttentionState],
+        progress_bar_desc: str = "Capturing CUDA graphs",
+    ) -> None:
+        def create_forward_fn(
+            desc: BatchExecutionDescriptor,
+        ) -> tuple[Callable[[CUDAGraphMode], None], CapturedAttentionState]:
+            num_tokens = desc.num_tokens
+            num_reqs = desc.num_reqs or min(num_tokens, self.max_num_reqs)
+            num_tokens_across_dp = (
+                torch.full((self.dp_size,), num_tokens, dtype=torch.int32, device="cpu")
+                if self.dp_size > 1
+                else None
+            )
+            attn_state = full_cg_attn_states[desc]
+            attn_metadata, slot_mappings = attn_state
+            fwd = lambda cg_mode: forward_fn(
+                num_reqs,
+                num_tokens,
+                attn_metadata,
+                slot_mappings,
+                num_tokens_across_dp,
+                cg_mode,
+            )
+            return fwd, attn_state
+
+        super().capture(create_forward_fn, progress_bar_desc)
+
+
+class DecodeEagleCudaGraphManager(EagleCudaGraphManagerBase):
+    """Eagle CudaGraphManager for decode draft generation, building its own
+    attention metadata from scratch."""
+
+    def capture(
+        self,
+        forward_fn: Callable,
         model_state: ModelState,
         input_buffers: InputBuffers,
         block_tables: BlockTables,
@@ -52,11 +88,9 @@ def capture(
         kv_cache_config: KVCacheConfig,
         progress_bar_desc: str = "Capturing CUDA graphs",
     ) -> None:
-        """Capture CUDA graphs for Eagle speculative decoding (FULL mode only)."""
-
         def create_forward_fn(
             desc: BatchExecutionDescriptor,
-        ) -> Callable[[CUDAGraphMode], None]:
+        ) -> tuple[Callable[[CUDAGraphMode], None], CapturedAttentionState]:
             num_tokens = desc.num_tokens
             num_reqs = desc.num_reqs or min(num_tokens, self.max_num_reqs)
             num_tokens_across_dp = (
@@ -64,7 +98,7 @@ def create_forward_fn(
                 if self.dp_size > 1
                 else None
             )
-            attn_metadata, slot_mappings = prepare_inputs_to_capture(
+            attn_state = prepare_inputs_to_capture(
                 num_reqs,
                 num_tokens,
                 model_state,
@@ -72,9 +106,11 @@ def create_forward_fn(
                 block_tables,
                 attn_groups,
                 kv_cache_config,
+                skip_attn=(desc.cg_mode == CUDAGraphMode.PIECEWISE),
             )
+            attn_metadata, slot_mappings = attn_state
 
-            return lambda cg_mode: generate_fn(
+            fwd = lambda cg_mode: forward_fn(
                 num_reqs,
                 num_tokens,
                 attn_metadata,
@@ -82,10 +118,6 @@ def create_forward_fn(
                 num_tokens_across_dp,
                 cg_mode,
             )
+            return fwd, attn_state
 
         super().capture(create_forward_fn, progress_bar_desc)
-
-    def run_fullgraph(self, desc: BatchExecutionDescriptor) -> torch.Tensor:
-        """Replay a captured FULL cudagraph and return draft tokens."""
-        super().run_fullgraph(desc)
-        return self.draft_tokens
diff --git a/vllm/v1/worker/gpu/spec_decode/eagle/speculator.py b/vllm/v1/worker/gpu/spec_decode/eagle/speculator.py
index 887fd52794cb..af1c3608da88 100644
--- a/vllm/v1/worker/gpu/spec_decode/eagle/speculator.py
+++ b/vllm/v1/worker/gpu/spec_decode/eagle/speculator.py
@@ -21,12 +21,17 @@
 from vllm.v1.worker.gpu.block_table import BlockTables
 from vllm.v1.worker.gpu.cudagraph_utils import (
     BatchExecutionDescriptor,
+    CapturedAttentionState,
+    get_uniform_token_count,
 )
-from vllm.v1.worker.gpu.dp_utils import sync_cudagraph_and_dp_padding
+from vllm.v1.worker.gpu.dp_utils import dispatch_cg_and_sync_dp
 from vllm.v1.worker.gpu.input_batch import InputBatch, InputBuffers
 from vllm.v1.worker.gpu.model_states.interface import ModelState
 from vllm.v1.worker.gpu.sample.gumbel import gumbel_sample
-from vllm.v1.worker.gpu.spec_decode.eagle.cudagraph import EagleCudaGraphManager
+from vllm.v1.worker.gpu.spec_decode.eagle.cudagraph import (
+    DecodeEagleCudaGraphManager,
+    PrefillEagleCudaGraphManager,
+)
 from vllm.v1.worker.gpu.spec_decode.eagle.utils import load_eagle_model
 
 logger = init_logger(__name__)
@@ -51,8 +56,14 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
         # the draft model's hidden size can be different from the target model's
         # hidden size (e.g., Llama 3.3 70B).
         self.hidden_size = self.draft_model_config.get_hidden_size()
+        # Widen for HC-multiplexed residuals (e.g. DeepSeek V4 feeds the MTP
+        # draft the target's pre-hc_head (T, hc_mult * hidden_size) residual).
+        # Non-HC models default to hc_mult=1 and are unaffected.
+        hc_mult = getattr(self.draft_model_config.hf_config, "hc_mult", 1)
+        self.hidden_size = self.hidden_size * hc_mult
         self.vocab_size = self.draft_model_config.get_vocab_size()
         self.dtype = vllm_config.model_config.dtype
+        self.use_fp64_gumbel = vllm_config.model_config.use_fp64_gumbel
 
         # DP configuration
         self.dp_size = vllm_config.parallel_config.data_parallel_size
@@ -79,6 +90,13 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
             dtype=torch.int64,
             device=device,
         )
+        self.current_draft_step = torch.tensor(0, dtype=torch.int64, device=device)
+        self.last_token_indices = torch.zeros(
+            self.max_num_reqs, dtype=torch.int64, device=device
+        )
+        self.arange = torch.arange(
+            self.max_num_reqs + 1, dtype=torch.int32, device="cpu"
+        )
 
         self.supports_mm_inputs = MULTIMODAL_REGISTRY.supports_multimodal_inputs(
             self.draft_model_config
@@ -89,7 +107,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
             )
 
         self.draft_logits: torch.Tensor | None = None
-        if self.speculative_config.rejection_sample_method == "probabilistic":
+        if self.speculative_config.draft_sample_method == "probabilistic":
             self.draft_logits = torch.zeros(
                 self.max_num_reqs,
                 self.num_speculative_steps,
@@ -98,16 +116,38 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
                 device=device,
             )
 
-        # currently we don't  support PIECEWISE for Eagle.
-        cudagraph_mode = vllm_config.compilation_config.cudagraph_mode
+        self.prefill_cudagraph_manager: PrefillEagleCudaGraphManager | None = None
+        self.decode_cudagraph_manager: DecodeEagleCudaGraphManager | None = None
+
+    def init_cudagraph_manager(self, cudagraph_mode: CUDAGraphMode) -> None:
+        cudagraph_mode = self.vllm_config.compilation_config.cudagraph_mode
+        # Initialize cudagraph manager for draft prefill (draft position 0).
+        self.prefill_cudagraph_manager = PrefillEagleCudaGraphManager(
+            self.vllm_config,
+            self.device,
+            cudagraph_mode,
+            self.num_speculative_steps + 1,
+        )
+
+        # PIECEWISE cudagraphs are not supported for eagle draft decodes.
+        # PIECEWISE pads num_tokens to the next capture size without padding
+        # num_reqs, which can cause attention backends to read past the
+        # valid per-request metadata (e.g. FlashInfer's kv_indptr buffer).
         if cudagraph_mode.decode_mode() == CUDAGraphMode.FULL:
             cudagraph_mode = CUDAGraphMode.FULL_DECODE_ONLY
         else:
             cudagraph_mode = CUDAGraphMode.NONE
 
-        self.cudagraph_manager = EagleCudaGraphManager(
-            vllm_config, device, cudagraph_mode, self.draft_tokens
+        # Initialize cudagraph manager for draft decodes (draft positions > 0).
+        self.decode_cudagraph_manager = DecodeEagleCudaGraphManager(
+            self.vllm_config,
+            self.device,
+            cudagraph_mode,
+            decode_query_len=1,
         )
+        # Share a single pool between prefill and decode since they never
+        # execute concurrently.
+        self.decode_cudagraph_manager.pool = self.prefill_cudagraph_manager.pool
 
     def load_model(self, target_model: nn.Module) -> None:
         target_attn_layer_names = get_layers_from_vllm_config(
@@ -133,7 +173,7 @@ def set_attn(
     ) -> None:
         self.model_state = model_state
         self.kv_cache_config = kv_cache_config
-        _, self.attn_groups = init_attn_backend(
+        _, self.attn_groups, _, _ = init_attn_backend(
             kv_cache_config,
             self.vllm_config,
             self.device,
@@ -188,98 +228,172 @@ def run_model(
             last_hidden_states, hidden_states = ret_hidden_states
         return last_hidden_states, hidden_states
 
-    def generate_draft(
+    def _sample_draft(
+        self,
+        logits: torch.Tensor,
+        idx_mapping: torch.Tensor,
+        pos: torch.Tensor,
+        draft_step: torch.Tensor,
+        draft_logits: torch.Tensor | None,
+    ) -> torch.Tensor:
+        if draft_logits is not None:
+            # NOTE(woosuk): We must add 1 to the positions to match the Gumbel noise
+            # used for draft and target sampling.
+            return gumbel_sample(
+                logits,
+                idx_mapping,
+                self.temperature,
+                self.seeds,
+                pos + 1,
+                apply_temperature=True,
+                output_processed_logits=draft_logits,
+                output_processed_logits_col=draft_step,
+                use_fp64=self.use_fp64_gumbel,
+            )
+        else:
+            return logits.argmax(dim=-1)
+
+    def prefill(
         self,
         num_reqs: int,
-        num_tokens_padded: int,
+        num_tokens: int,
         attn_metadata: dict[str, Any] | None,
         slot_mappings: dict[str, torch.Tensor] | None,
         num_tokens_across_dp: torch.Tensor | None,
         cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
+        mm_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None,
     ) -> None:
-        pos = self.input_buffers.positions[:num_reqs]
+        last_token_indices = self.last_token_indices[:num_reqs]
+        pos = self.input_buffers.positions[last_token_indices]
+        idx_mapping = self.idx_mapping[:num_reqs]
+
+        last_hidden_states, hidden_states = self.run_model(
+            num_tokens,
+            attn_metadata,
+            slot_mappings,
+            num_tokens_across_dp=num_tokens_across_dp,
+            cudagraph_runtime_mode=cudagraph_runtime_mode,
+            mm_inputs=mm_inputs,
+        )
+        sample_hidden_states = last_hidden_states[last_token_indices]
+        logits = self.model.compute_logits(sample_hidden_states)
+
+        self.draft_tokens[:num_reqs, 0] = self._sample_draft(
+            logits,
+            idx_mapping,
+            pos,
+            self.current_draft_step,
+            self.draft_logits,
+        )
+        self.hidden_states[:num_reqs] = hidden_states[last_token_indices]
+        self.input_buffers.positions[:num_reqs] = pos
+
+    def multi_step_decode(
+        self,
+        num_reqs: int,
+        skip_attn: bool,
+        batch_desc: BatchExecutionDescriptor,
+        num_tokens_across_dp: torch.Tensor | None,
+    ) -> None:
+        positions = self.input_buffers.positions[:num_reqs]
         query_start_loc = self.input_buffers.query_start_loc[: num_reqs + 1]
         idx_mapping = self.idx_mapping[:num_reqs]
+
         for step in range(1, self.num_speculative_steps):
-            # Run the eagle model.
-            last_hidden_states, hidden_states = self.run_model(
-                num_tokens_padded,
-                attn_metadata,
-                slot_mappings,
-                num_tokens_across_dp,
-                cudagraph_runtime_mode,
-            )
-            last_hidden_states = last_hidden_states[:num_reqs]
-            hidden_states = hidden_states[:num_reqs]
-            logits = self.model.compute_logits(last_hidden_states)
+            attn_metadata = None
+            slot_mappings_by_layer = None
+            if not skip_attn:
+                # Build attention metadata and slot mappings for each draft
+                # decode step. It is necessary to rebuild the attention
+                # metadata even when replaying the FULL graph so that any
+                # attention metadata builder state is updated.
+                slot_mappings = self.block_tables.compute_slot_mappings(
+                    idx_mapping,
+                    query_start_loc,
+                    positions,
+                    batch_desc.num_tokens,
+                )
+                slot_mappings_by_layer = build_slot_mappings_by_layer(
+                    slot_mappings, self.kv_cache_config
+                )
+                attn_metadata = self._build_draft_attn_metadata(
+                    num_reqs=num_reqs,
+                    num_reqs_padded=batch_desc.num_reqs or num_reqs,
+                    num_tokens_padded=batch_desc.num_tokens,
+                )
 
-            # NOTE(woosuk): We must add 1 to the positions to match the Gumbel noise
-            # used for draft and target sampling.
-            draft_tokens = gumbel_sample(
-                logits,
-                idx_mapping,
-                self.temperature,
-                self.seeds,
-                pos + 1,
-                apply_temperature=True,
-                processed_logits_out=self.draft_logits[:, step]
-                if self.draft_logits is not None
-                else None,
-            )
-            self.draft_tokens[:num_reqs, step] = draft_tokens
-
-            if step < self.num_speculative_steps - 1:
-                # Update the inputs for the next step.
-                update_eagle_inputs(
-                    draft_tokens,
-                    hidden_states,
-                    self.input_buffers,
-                    self.hidden_states,
-                    self.max_model_len,
+            # Update the current draft step.
+            self.current_draft_step.fill_(step)
+
+            # Generate draft tokens for the current step.
+            if batch_desc.cg_mode == CUDAGraphMode.FULL:
+                assert self.decode_cudagraph_manager is not None
+                self.decode_cudagraph_manager.run_fullgraph(batch_desc)
+            else:
+                self.generate_draft(
+                    num_reqs,
+                    batch_desc.num_tokens,
+                    attn_metadata,
+                    slot_mappings_by_layer,
+                    num_tokens_across_dp=num_tokens_across_dp,
+                    cudagraph_runtime_mode=batch_desc.cg_mode,
                 )
-                if attn_metadata is not None:
-                    self.block_tables.compute_slot_mappings(
-                        idx_mapping, query_start_loc, pos, num_tokens_padded
-                    )
 
-    def _dispatch_and_sync_dp(
+    def generate_draft(
         self,
-        cudagraph_manager: EagleCudaGraphManager,
         num_reqs: int,
-        num_tokens: int,
-        uniform_token_count: int | None,
-    ) -> tuple[BatchExecutionDescriptor, torch.Tensor | None]:
-        batch_desc = cudagraph_manager.dispatch(
-            num_reqs, num_tokens, uniform_token_count
+        num_tokens_padded: int,
+        attn_metadata: dict[str, Any] | None,
+        slot_mappings: dict[str, torch.Tensor] | None,
+        num_tokens_across_dp: torch.Tensor | None,
+        cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
+    ) -> None:
+        idx_mapping = self.idx_mapping[:num_reqs]
+        positions = self.input_buffers.positions[:num_reqs]
+        # Run the eagle model forward pass.
+        last_hidden_states, hidden_states = self.run_model(
+            num_tokens_padded,
+            attn_metadata,
+            slot_mappings,
+            num_tokens_across_dp,
+            cudagraph_runtime_mode,
+        )
+        last_hidden_states = last_hidden_states[:num_reqs]
+
+        # Sample the draft tokens.
+        logits = self.model.compute_logits(last_hidden_states)
+        draft_tokens = self._sample_draft(
+            logits,
+            idx_mapping,
+            positions,
+            self.current_draft_step,
+            self.draft_logits,
+        )
+
+        # Update the inputs for the next step.
+        update_eagle_draft_inputs(
+            draft_tokens,
+            self.current_draft_step,
+            hidden_states,
+            self.draft_tokens,
+            self.hidden_states,
+            self.input_buffers,
+            num_reqs,
+            self.max_model_len,
+            self.num_speculative_steps,
         )
-        num_tokens_across_dp = None
-        if self.dp_size > 1:
-            batch_desc, num_tokens_across_dp = sync_cudagraph_and_dp_padding(
-                cudagraph_manager,
-                batch_desc,
-                num_tokens,
-                num_reqs,
-                uniform_token_count,
-                self.dp_size,
-                self.dp_rank,
-            )
-        return batch_desc, num_tokens_across_dp
 
     def _build_draft_attn_metadata(
         self,
         num_reqs: int,
         num_reqs_padded: int,
         num_tokens_padded: int,
-        max_query_len: int,
     ) -> dict[str, Any] | None:
         if not self.draft_attn_layer_names:
             return None
 
-        query_start_loc_cpu = (
-            torch.arange(num_reqs_padded + 1, dtype=torch.int32, device="cpu").clamp_(
-                max=num_reqs
-            )
-            * max_query_len
+        query_start_loc_cpu = torch.clamp(
+            self.arange[: num_reqs_padded + 1], max=num_reqs
         )
         block_tables = [
             x[:num_reqs_padded] for x in self.block_tables.input_block_tables
@@ -293,7 +407,7 @@ def _build_draft_attn_metadata(
                 : num_reqs_padded + 1
             ],
             query_start_loc_cpu=query_start_loc_cpu,
-            max_query_len=max_query_len,
+            max_query_len=1,
             seq_lens=self.input_buffers.seq_lens[:num_reqs_padded],
             max_seq_len=self.max_model_len,
             block_tables=block_tables,
@@ -302,18 +416,42 @@ def _build_draft_attn_metadata(
         )
         return attn_metadata
 
-    def capture_model(self) -> None:
+    def capture(
+        self,
+        attn_states: dict[BatchExecutionDescriptor, CapturedAttentionState],
+    ) -> None:
+        logger.info("Capturing model for Eagle speculator...")
+        # Reset indices to zeros to prevent stale values from prior
+        # dummy runs to cause out-of-bounds indexing during capture.
+        self.last_token_indices.zero_()
+
+        # Capture the prefill routine (model forward + compute_logits +
+        # sample).
+        # For FULL graphs, the entire routine is recorded as one graph.
+        # For PIECEWISE, only the model's compiled regions are captured
+        # and the rest (compute_logits, gumbel_sample) runs eagerly.
+        assert self.prefill_cudagraph_manager is not None
+        self.prefill_cudagraph_manager.capture(
+            self.prefill,
+            attn_states,
+            progress_bar_desc="Capturing eagle prefill CUDA graphs",
+        )
+
         if self.num_speculative_steps == 1:
             return
-        logger.info("Capturing model for Eagle speculator...")
-        self.cudagraph_manager.capture(
+
+        # Capture the decode draft generation routine (model forward +
+        # compute_logits + sample + update_eagle_inputs) for a single
+        # step.
+        assert self.decode_cudagraph_manager is not None
+        self.decode_cudagraph_manager.capture(
             self.generate_draft,
             self.model_state,
             self.input_buffers,
             self.block_tables,
             self.attn_groups,
             self.kv_cache_config,
-            progress_bar_desc="Capturing eagle CUDA graphs",
+            progress_bar_desc="Capturing eagle decode CUDA graphs",
         )
 
     @torch.inference_mode()
@@ -342,7 +480,12 @@ def propose(
         dummy_run: bool = False,
         skip_attn_for_dummy_run: bool = False,
         mm_inputs: tuple[list[torch.Tensor], torch.Tensor] | None = None,
+        is_profile: bool = False,
     ) -> torch.Tensor:
+        num_tokens = input_batch.num_tokens_after_padding
+        num_reqs = input_batch.num_reqs
+        max_query_len = input_batch.num_scheduled_tokens.max()
+
         # NOTE(woosuk): To avoid CPU-GPU synchronization without CPU knowing the
         # number of rejected tokens, we maintain the size of eagle's input_ids and
         # hidden_states the same as the target model's. This means, we pad each
@@ -356,129 +499,113 @@ def propose(
             )
         else:
             hidden_states = last_hidden_states
-        num_tokens = input_batch.num_tokens_after_padding
-        self.hidden_states[:num_tokens] = hidden_states
+        self.hidden_states[:num_tokens].copy_(hidden_states)
+
+        # Copy temperature, seeds, and idx mapping to the pre-allocated buffers.
+        # NOTE(woosuk): For draft sampling, we only consider the temperature
+        # and ignore the other sampling parameters such as top_k and top_p,
+        # for simplicity and performance.
+        # While this may slightly degrade the acceptance rate, it does not
+        # affect the output distribution after rejection sampling.
+        self.temperature.copy_(temperature)
+        self.seeds.copy_(seeds)
+        self.idx_mapping[:num_reqs].copy_(input_batch.idx_mapping)
 
         # Get the input ids and last token indices for the speculator.
-        last_token_indices = prepare_eagle_inputs(
+        prepare_eagle_inputs(
+            self.last_token_indices,
+            self.current_draft_step,
             self.input_buffers,
             input_batch,
             num_sampled,
             num_rejected,
             last_sampled,
             next_prefill_tokens,
+            self.max_num_reqs,
         )
 
-        # Prefill: Run the eagle speculator with eager mode.
-        # TODO(woosuk): Support CUDA graph for prefill.
-        last_hidden_states, hidden_states = self.run_model(
+        # When all requests are decoding (no true prefills), each has
+        # num_speculative_steps + 1 tokens, enabling FULL graph replay.
+        uniform_token_count = get_uniform_token_count(
+            num_reqs,
+            # Use the actual number of tokens without padding added by
+            # the target model during FULL cudagraph.
+            input_batch.num_tokens,
+            max_query_len,
+        )
+        prefill_batch_desc, num_tokens_across_dp = dispatch_cg_and_sync_dp(
+            self.prefill_cudagraph_manager,
+            num_reqs,
             num_tokens,
-            attn_metadata,
-            slot_mappings,
-            num_tokens_across_dp=num_tokens_across_dp,
-            mm_inputs=mm_inputs,
+            uniform_token_count,
+            dp_size=self.dp_size,
+            dp_rank=self.dp_rank,
+            need_eager=is_profile,
         )
-        sample_hidden_states = last_hidden_states[last_token_indices]
-        logits = self.model.compute_logits(sample_hidden_states)
 
-        num_reqs = input_batch.num_reqs
-        # NOTE(woosuk): For draft sampling, we only consider the temperature
-        # and ignore the other sampling parameters such as top_k and top_p,
-        # for simplicity and performance.
-        # While this may slightly degrade the acceptance rate, it does not
-        # affect the output distribution after rejection sampling.
-        idx_mapping = self.idx_mapping[:num_reqs]
-        idx_mapping.copy_(input_batch.idx_mapping)
-        self.temperature.copy_(temperature)
-        self.seeds.copy_(seeds)
-
-        # Gather the values and copy them to the pre-allocated buffers.
-        pos = self.input_buffers.positions[:num_reqs]
-        torch.gather(input_batch.positions, 0, last_token_indices, out=pos)
-        # NOTE(woosuk): We must add 1 to the positions to match the Gumbel noise
-        # used for draft and target sampling.
-        draft_tokens = gumbel_sample(
-            logits,
-            idx_mapping,
-            self.temperature,
-            self.seeds,
-            pos + 1,
-            apply_temperature=True,
-            processed_logits_out=self.draft_logits[:, 0]
-            if self.draft_logits is not None
-            else None,
-        )
+        if prefill_batch_desc.cg_mode == CUDAGraphMode.FULL:
+            # Replay the full graph for draft prefill.
+            assert self.prefill_cudagraph_manager is not None
+            self.prefill_cudagraph_manager.run_fullgraph(prefill_batch_desc)
+        else:
+            # The target model's attention metadata and slot mappings
+            # can directly be used for draft prefill, because of the
+            # identical batch shape and KV cache layout.
+            self.prefill(
+                num_reqs,
+                prefill_batch_desc.num_tokens,
+                attn_metadata,
+                slot_mappings,
+                num_tokens_across_dp=num_tokens_across_dp,
+                cudagraph_runtime_mode=prefill_batch_desc.cg_mode,
+                mm_inputs=mm_inputs,
+            )
 
         if self.num_speculative_steps == 1:
             # Early exit.
-            return draft_tokens.view(-1, 1)
+            return self.draft_tokens[:num_reqs, :1]
 
-        # Save the draft tokens for the first step.
-        self.draft_tokens[:num_reqs, 0] = draft_tokens
         # Prepare the inputs for the decode steps.
         prepare_eagle_decode(
-            draft_tokens,
-            hidden_states,
-            last_token_indices,
+            self.draft_tokens[:num_reqs, 0],
             input_batch.seq_lens,
             num_rejected,
             self.input_buffers,
-            self.hidden_states,
             self.max_model_len,
             self.max_num_reqs,
         )
 
-        # Each request produces exactly 1 token per draft decode step,
-        # enabling FULL cudagraph.
-        decode_batch_desc, num_tokens_across_dp = self._dispatch_and_sync_dp(
-            self.cudagraph_manager,
+        # Each request produces exactly 1 token per draft generation step,
+        # enabling FULL graph replay.
+        decode_batch_desc, num_tokens_across_dp = dispatch_cg_and_sync_dp(
+            self.decode_cudagraph_manager,
             num_reqs,
             num_reqs,
             uniform_token_count=1,
+            dp_size=self.dp_size,
+            dp_rank=self.dp_rank,
+            need_eager=is_profile,
         )
 
-        attn_metadata_updated = None
-        slot_mappings_updated = None
-        if not (dummy_run and skip_attn_for_dummy_run):
-            # Build attention metadata and slot mappings for the draft
-            # decode steps. It is necessary to rebuild the attention
-            # metadata even when replaying the FULL cudagraph so that
-            # any attention metadata builder state is updated.
-            slot_mappings = self.block_tables.compute_slot_mappings(
-                idx_mapping,
-                self.input_buffers.query_start_loc[: num_reqs + 1],
-                pos,
-                decode_batch_desc.num_tokens,
-            )
-            slot_mappings_updated = build_slot_mappings_by_layer(
-                slot_mappings, self.kv_cache_config
-            )
-            attn_metadata_updated = self._build_draft_attn_metadata(
-                num_reqs=num_reqs,
-                num_reqs_padded=decode_batch_desc.num_reqs or num_reqs,
-                num_tokens_padded=decode_batch_desc.num_tokens,
-                max_query_len=1,
-            )
+        # Generate the remaining num_speculative_steps - 1 draft tokens.
+        self.multi_step_decode(
+            num_reqs,
+            dummy_run and skip_attn_for_dummy_run,
+            decode_batch_desc,
+            num_tokens_across_dp,
+        )
 
-        if decode_batch_desc.cg_mode == CUDAGraphMode.FULL:
-            self.cudagraph_manager.run_fullgraph(decode_batch_desc)
-        else:
-            self.generate_draft(
-                num_reqs,
-                decode_batch_desc.num_tokens,
-                attn_metadata_updated,
-                slot_mappings_updated,
-                num_tokens_across_dp=num_tokens_across_dp,
-                cudagraph_runtime_mode=decode_batch_desc.cg_mode,
-            )
         return self.draft_tokens[:num_reqs]
 
 
 @triton.jit
 def _prepare_eagle_inputs_kernel(
     last_token_indices_ptr,
+    eagle_current_draft_step_ptr,
     eagle_input_ids_ptr,
     eagle_positions_ptr,
+    eagle_query_start_loc_ptr,
+    eagle_seq_lens_ptr,
     target_input_ids_ptr,
     target_positions_ptr,
     idx_mapping_ptr,
@@ -487,20 +614,24 @@ def _prepare_eagle_inputs_kernel(
     num_sampled_ptr,
     num_rejected_ptr,
     query_start_loc_ptr,
+    seq_lens_ptr,
+    max_num_reqs,
     BLOCK_SIZE: tl.constexpr,
 ):
-    batch_idx = tl.program_id(0)
-    req_state_idx = tl.load(idx_mapping_ptr + batch_idx)
+    req_idx = tl.program_id(0)
+    num_reqs = tl.num_programs(0)
+    req_state_idx = tl.load(idx_mapping_ptr + req_idx)
 
-    query_start = tl.load(query_start_loc_ptr + batch_idx)
-    query_end = tl.load(query_start_loc_ptr + batch_idx + 1)
+    query_start = tl.load(query_start_loc_ptr + req_idx)
+    query_end = tl.load(query_start_loc_ptr + req_idx + 1)
     query_len = query_end - query_start
+    seq_len = tl.load(seq_lens_ptr + req_idx)
 
     # Get the true query length and next token after accounting for rejected tokens.
-    num_rejected = tl.load(num_rejected_ptr + batch_idx)
+    num_rejected = tl.load(num_rejected_ptr + req_idx)
     query_len -= num_rejected
 
-    num_sampled = tl.load(num_sampled_ptr + batch_idx)
+    num_sampled = tl.load(num_sampled_ptr + req_idx)
     if num_sampled > 0:
         next_token = tl.load(last_sampled_ptr + req_state_idx).to(tl.int32)
     else:
@@ -516,7 +647,7 @@ def _prepare_eagle_inputs_kernel(
         tl.store(eagle_input_ids_ptr + query_start + block - 1, input_ids, mask=mask)
 
     last_token_index = query_start + query_len - 1
-    tl.store(last_token_indices_ptr + batch_idx, last_token_index)
+    tl.store(last_token_indices_ptr + req_idx, last_token_index)
     tl.store(eagle_input_ids_ptr + last_token_index, next_token)
 
     # Copy positions.
@@ -526,8 +657,34 @@ def _prepare_eagle_inputs_kernel(
         target_pos = tl.load(target_positions_ptr + query_start + block, mask=mask)
         tl.store(eagle_positions_ptr + query_start + block, target_pos, mask=mask)
 
+    # Copy query start locations.
+    tl.store(eagle_query_start_loc_ptr + req_idx, query_start)
+    # Copy sequence lengths.
+    tl.store(eagle_seq_lens_ptr + req_idx, seq_len)
+    if req_idx == (num_reqs - 1):
+        # Reset the current draft step to 0.
+        tl.store(eagle_current_draft_step_ptr, 0)
+        # Pad query_start_loc for CUDA graphs.
+        for i in range(num_reqs, max_num_reqs + 1, BLOCK_SIZE):
+            block = i + tl.arange(0, BLOCK_SIZE)
+            mask = block < max_num_reqs + 1
+            tl.store(eagle_query_start_loc_ptr + block, query_end, mask=mask)
+        # Pad seq_lens for CUDA graphs.
+        for i in range(num_reqs, max_num_reqs, BLOCK_SIZE):
+            block = i + tl.arange(0, BLOCK_SIZE)
+            mask = block < max_num_reqs
+            tl.store(eagle_seq_lens_ptr + block, 0, mask=mask)
+        # Pad last_token_indices for CUDA graphs.
+        for i in range(num_reqs, max_num_reqs, BLOCK_SIZE):
+            block = i + tl.arange(0, BLOCK_SIZE)
+            mask = block < max_num_reqs
+            tl.store(last_token_indices_ptr + block, 0, mask=mask)
+
 
 def prepare_eagle_inputs(
+    # [num_reqs]
+    last_token_indices: torch.Tensor,
+    current_draft_step: torch.Tensor,
     input_buffers: InputBuffers,
     input_batch: InputBatch,
     # [num_reqs]
@@ -538,17 +695,16 @@ def prepare_eagle_inputs(
     last_sampled: torch.Tensor,
     # [max_num_reqs]
     next_prefill_tokens: torch.Tensor,
+    max_num_reqs,
 ) -> torch.Tensor:
     num_reqs = input_batch.num_reqs
-    last_token_indices = torch.empty(
-        num_reqs,
-        dtype=torch.int64,
-        device=num_sampled.device,
-    )
     _prepare_eagle_inputs_kernel[(num_reqs,)](
         last_token_indices,
+        current_draft_step,
         input_buffers.input_ids,
         input_buffers.positions,
+        input_buffers.query_start_loc,
+        input_buffers.seq_lens,
         input_batch.input_ids,
         input_batch.positions,
         input_batch.idx_mapping,
@@ -557,26 +713,23 @@ def prepare_eagle_inputs(
         num_sampled,
         num_rejected,
         input_batch.query_start_loc,
+        input_batch.seq_lens,
+        max_num_reqs,
         BLOCK_SIZE=1024,
     )
     return last_token_indices
 
 
 @triton.jit
-def _prepare_eagle_docode_kernel(
+def _prepare_eagle_decode_kernel(
     draft_tokens_ptr,
-    output_hidden_states_ptr,
-    output_hidden_states_stride,
-    last_token_indices_ptr,
+    draft_tokens_stride,
     target_seq_lens_ptr,
     num_rejected_ptr,
     input_ids_ptr,
     positions_ptr,
-    input_hidden_states_ptr,
-    input_hidden_states_stride,
     query_start_loc_ptr,
     seq_lens_ptr,
-    hidden_size,
     max_model_len,
     max_num_reqs,
     BLOCK_SIZE: tl.constexpr,
@@ -599,24 +752,9 @@ def _prepare_eagle_docode_kernel(
         return
 
     # draft token -> input id.
-    draft_token = tl.load(draft_tokens_ptr + req_idx)
+    draft_token = tl.load(draft_tokens_ptr + req_idx * draft_tokens_stride)
     tl.store(input_ids_ptr + req_idx, draft_token)
 
-    # output hidden states -> input hidden states.
-    src_idx = tl.load(last_token_indices_ptr + req_idx)
-    for i in range(0, hidden_size, BLOCK_SIZE):
-        block = i + tl.arange(0, BLOCK_SIZE)
-        mask = block < hidden_size
-        output_hidden_states = tl.load(
-            output_hidden_states_ptr + src_idx * output_hidden_states_stride + block,
-            mask=mask,
-        )
-        tl.store(
-            input_hidden_states_ptr + req_idx * input_hidden_states_stride + block,
-            output_hidden_states,
-            mask=mask,
-        )
-
     # Compute position and seq_lens.
     # NOTE(woosuk): To prevent out-of-range access, we clamp these values
     # if they reach the max model length.
@@ -633,31 +771,22 @@ def _prepare_eagle_docode_kernel(
 
 def prepare_eagle_decode(
     draft_tokens: torch.Tensor,
-    output_hidden_states: torch.Tensor,
-    last_token_indices: torch.Tensor,
     target_seq_lens: torch.Tensor,
     num_rejected: torch.Tensor,
     input_buffers: InputBuffers,
-    input_hidden_states: torch.Tensor,
     max_model_len: int,
     max_num_reqs: int,
 ):
     num_reqs = draft_tokens.shape[0]
-    hidden_size = output_hidden_states.shape[-1]
-    _prepare_eagle_docode_kernel[(num_reqs + 1,)](
+    _prepare_eagle_decode_kernel[(num_reqs + 1,)](
         draft_tokens,
-        output_hidden_states,
-        output_hidden_states.stride(0),
-        last_token_indices,
+        draft_tokens.stride(0),
         target_seq_lens,
         num_rejected,
         input_buffers.input_ids,
         input_buffers.positions,
-        input_hidden_states,
-        input_hidden_states.stride(0),
         input_buffers.query_start_loc,
         input_buffers.seq_lens,
-        hidden_size,
         max_model_len,
         max_num_reqs,
         BLOCK_SIZE=1024,
@@ -665,36 +794,55 @@ def prepare_eagle_decode(
 
 
 @triton.jit
-def _update_eagle_inputs_kernel(
+def _update_eagle_draft_inputs_kernel(
+    output_draft_tokens_ptr,
+    output_draft_tokens_stride,
+    next_input_hidden_states_ptr,
+    next_input_hidden_states_stride,
     input_ids_ptr,
     positions_ptr,
-    input_hidden_states_ptr,
-    input_hidden_states_stride,
     seq_lens_ptr,
-    max_model_len,
     draft_tokens_ptr,
-    output_hidden_states_ptr,
-    output_hidden_states_stride,
+    current_draft_step_ptr,
+    hidden_states_ptr,
+    hidden_states_stride,
     hidden_size,
+    max_model_len,
+    num_speculative_steps,
     BLOCK_SIZE: tl.constexpr,
 ):
     req_idx = tl.program_id(0)
 
-    # Draft token -> Input ID.
+    # Write the sampled draft token into self.draft_tokens[req_idx, step].
     draft_token = tl.load(draft_tokens_ptr + req_idx)
+    step = tl.load(current_draft_step_ptr)
+    tl.store(
+        output_draft_tokens_ptr + req_idx * output_draft_tokens_stride + step,
+        draft_token,
+    )
+
+    if step >= num_speculative_steps - 1:
+        # This is the final step. Skip updating draft forward inputs.
+        return
+
+    # Write the sampled draft token into the input ids tensor for the next
+    # forward pass.
     tl.store(input_ids_ptr + req_idx, draft_token)
 
-    # Output hidden states -> Input hidden states.
+    # Copy hidden states into the input hidden states tensor for the next
+    # forward pass.
     for i in range(0, hidden_size, BLOCK_SIZE):
         block = i + tl.arange(0, BLOCK_SIZE)
         mask = block < hidden_size
-        output_hidden_states = tl.load(
-            output_hidden_states_ptr + req_idx * output_hidden_states_stride + block,
+        hidden_states = tl.load(
+            hidden_states_ptr + req_idx * hidden_states_stride + block,
             mask=mask,
         )
         tl.store(
-            input_hidden_states_ptr + req_idx * input_hidden_states_stride + block,
-            output_hidden_states,
+            next_input_hidden_states_ptr
+            + req_idx * next_input_hidden_states_stride
+            + block,
+            hidden_states,
             mask=mask,
         )
 
@@ -710,24 +858,32 @@ def _update_eagle_inputs_kernel(
     tl.store(seq_lens_ptr + req_idx, seq_len)
 
 
-def update_eagle_inputs(
+def update_eagle_draft_inputs(
     draft_tokens: torch.Tensor,
-    output_hidden_states: torch.Tensor,
-    input_buffers: InputBuffers,
+    current_draft_step: torch.Tensor,
     hidden_states: torch.Tensor,
+    output_draft_tokens: torch.Tensor,
+    next_input_hidden_states: torch.Tensor,
+    input_buffers: InputBuffers,
+    num_reqs: int,
     max_model_len: int,
+    num_speculative_steps: int,
 ):
-    num_reqs, hidden_size = output_hidden_states.shape
-    _update_eagle_inputs_kernel[(num_reqs,)](
+    _, hidden_size = hidden_states.shape
+    _update_eagle_draft_inputs_kernel[(num_reqs,)](
+        output_draft_tokens,
+        output_draft_tokens.stride(0),
+        next_input_hidden_states,
+        next_input_hidden_states.stride(0),
         input_buffers.input_ids,
         input_buffers.positions,
-        hidden_states,
-        hidden_states.stride(0),
         input_buffers.seq_lens,
-        max_model_len,
         draft_tokens,
-        output_hidden_states,
-        output_hidden_states.stride(0),
+        current_draft_step,
+        hidden_states,
+        hidden_states.stride(0),
         hidden_size,
+        max_model_len,
+        num_speculative_steps,
         BLOCK_SIZE=1024,
     )
diff --git a/vllm/v1/worker/gpu/spec_decode/eagle/utils.py b/vllm/v1/worker/gpu/spec_decode/eagle/utils.py
index ee37eadb2a8e..fcbfc5569ef3 100644
--- a/vllm/v1/worker/gpu/spec_decode/eagle/utils.py
+++ b/vllm/v1/worker/gpu/spec_decode/eagle/utils.py
@@ -1,11 +1,29 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
 import torch.nn as nn
 
 from vllm.config import VllmConfig
+from vllm.distributed.parallel_state import get_pp_group
 from vllm.model_executor.model_loader import get_model
 
 
+def _should_share(eagle: nn.Module, flag: str, draft, target) -> bool:
+    """Share when the draft has no own copy, or its copy matches the target."""
+
+    if not getattr(eagle, flag, False) or draft is None:
+        return True
+    if target is None:
+        return False
+    # torch.equal on GPU allocates a bool mask the size of the input.
+    # Use the faster GPU path when there is plenty of headroom;
+    # otherwise compare on CPU.
+    w = draft.weight
+    if w.is_cuda and torch.cuda.mem_get_info(w.device)[0] < w.numel() * 2:
+        return torch.equal(w.cpu(), target.weight.cpu())
+    return torch.equal(w, target.weight)
+
+
 def load_eagle_model(target_model: nn.Module, vllm_config: VllmConfig) -> nn.Module:
     from vllm.compilation.backends import set_model_tag
 
@@ -17,36 +35,51 @@ def load_eagle_model(target_model: nn.Module, vllm_config: VllmConfig) -> nn.Mod
             vllm_config=vllm_config, model_config=draft_model_config
         )
 
-    # Share target embeddings when the draft checkpoint does not include
-    # its own vocab embedding table.
-    share_embeddings = True
-    if hasattr(eagle_model, "has_own_embed_tokens"):
-        share_embeddings = not eagle_model.has_own_embed_tokens
-    if share_embeddings:
-        target_language_model = (
-            target_model.get_language_model()
-            if hasattr(target_model, "get_language_model")
-            else target_model
+    target_language_model = (
+        target_model.get_language_model()
+        if hasattr(target_model, "get_language_model")
+        else target_model
+    )
+    target_inner = target_language_model.model
+    draft_inner = eagle_model.model
+
+    # Skip embedding sharing under PP — each rank owns its own embedding.
+    if get_pp_group().world_size == 1:
+        target_embed = getattr(target_inner, "embed_tokens", None) or getattr(
+            target_inner, "embedding", None
         )
-        inner_model = getattr(target_language_model, "model", None)
-        target_embed_tokens = None
-        if inner_model is not None:
-            if hasattr(inner_model, "embed_tokens"):
-                target_embed_tokens = inner_model.embed_tokens
-            elif hasattr(inner_model, "embedding"):
-                target_embed_tokens = inner_model.embedding
-        if target_embed_tokens is not None and hasattr(eagle_model, "model"):
-            if hasattr(eagle_model.model, "embed_tokens"):
-                del eagle_model.model.embed_tokens
-            eagle_model.model.embed_tokens = target_embed_tokens
-
-    # Only share target lm_head when the draft model does not own one.
-    share_lm_head = True
-    if hasattr(eagle_model, "has_own_lm_head"):
-        share_lm_head = not eagle_model.has_own_lm_head
-    if share_lm_head and hasattr(target_model, "lm_head"):
-        if hasattr(eagle_model, "lm_head"):
+        draft_embed = getattr(draft_inner, "embed_tokens", None)
+        if target_embed is not None and _should_share(
+            eagle_model, "has_own_embed_tokens", draft_embed, target_embed
+        ):
+            if draft_embed is not None:
+                del draft_inner.embed_tokens
+            draft_inner.embed_tokens = target_embed
+
+    target_lm_head = getattr(target_model, "lm_head", None)
+    draft_lm_head = getattr(eagle_model, "lm_head", None)
+    if target_lm_head is not None and _should_share(
+        eagle_model, "has_own_lm_head", draft_lm_head, target_lm_head
+    ):
+        if draft_lm_head is not None:
             del eagle_model.lm_head
-        eagle_model.lm_head = target_model.lm_head
+        eagle_model.lm_head = target_lm_head
+
+        # MTP layers route logits through layer.shared_head.head, not
+        # eagle_model.lm_head, so the per-layer copies need fixing up too.
+        layers = getattr(draft_inner, "layers", None)
+        if layers is not None:
+            items = layers.values() if isinstance(layers, nn.ModuleDict) else layers
+            for layer in items:
+                sh = getattr(layer, "shared_head", None)
+                if sh is not None and hasattr(sh, "head"):
+                    del sh.head
+                    sh.head = target_lm_head
+
+    # MTP also shares a topk_indices_buffer between target and draft.
+    if hasattr(target_inner, "topk_indices_buffer"):
+        if hasattr(draft_inner, "topk_indices_buffer"):
+            del draft_inner.topk_indices_buffer
+        draft_inner.topk_indices_buffer = target_inner.topk_indices_buffer
 
     return eagle_model
diff --git a/vllm/v1/worker/gpu/spec_decode/rejection_sampler.py b/vllm/v1/worker/gpu/spec_decode/rejection_sampler.py
index abb2b90f0884..1fe079a43e77 100644
--- a/vllm/v1/worker/gpu/spec_decode/rejection_sampler.py
+++ b/vllm/v1/worker/gpu/spec_decode/rejection_sampler.py
@@ -5,427 +5,18 @@
 from vllm.config import SpeculativeConfig
 from vllm.triton_utils import tl, triton
 from vllm.v1.outputs import LogprobsTensors
+from vllm.v1.spec_decode.utils import unconditional_to_conditional_rates
 from vllm.v1.worker.gpu.input_batch import InputBatch
 from vllm.v1.worker.gpu.metrics.logits import get_num_nans
-from vllm.v1.worker.gpu.sample.gumbel import gumbel_sample, tl_rand64
 from vllm.v1.worker.gpu.sample.logprob import compute_topk_logprobs
 from vllm.v1.worker.gpu.sample.output import SamplerOutput
 from vllm.v1.worker.gpu.sample.sampler import Sampler
 from vllm.v1.worker.gpu.sample.states import NO_LOGPROBS
-from vllm.v1.worker.gpu.spec_decode.synthetic_rejection_sampler_utils import (
-    compute_synthetic_rejection_sampler_params,
-    synthetic_rejection_sample,
+from vllm.v1.worker.gpu.spec_decode.rejection_sampler_utils import (
+    rejection_sample,
 )
 
 
-@triton.jit
-def _strict_rejection_sample_kernel(
-    sampled_ptr,  # [num_reqs, num_speculative_steps + 1]
-    sampled_stride,
-    num_sampled_ptr,  # [num_reqs]
-    target_sampled_ptr,  # [num_draft_tokens + num_reqs]
-    input_ids_ptr,  # [num_draft_tokens + num_reqs]
-    cu_num_logits_ptr,  # [num_reqs + 1]
-):
-    req_idx = tl.program_id(0)
-    start_idx = tl.load(cu_num_logits_ptr + req_idx)
-    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
-    num_tokens = end_idx - start_idx
-
-    num_sampled = 0
-    rejected = False
-    for i in range(num_tokens - 1):
-        if not rejected:
-            target_sampled = tl.load(target_sampled_ptr + start_idx + i)
-            draft_sampled = tl.load(input_ids_ptr + start_idx + i + 1)
-            tl.store(sampled_ptr + req_idx * sampled_stride + i, target_sampled)
-            num_sampled += 1
-            if target_sampled != draft_sampled:
-                rejected = True
-    if not rejected:
-        target_sampled = tl.load(target_sampled_ptr + start_idx + num_tokens - 1)
-        tl.store(
-            sampled_ptr + req_idx * sampled_stride + num_tokens - 1, target_sampled
-        )
-        num_sampled += 1
-    tl.store(num_sampled_ptr + req_idx, num_sampled)
-
-
-def strict_rejection_sample(
-    # [num_draft_tokens + num_reqs]
-    target_sampled: torch.Tensor,
-    # [num_draft_tokens + num_reqs]
-    draft_sampled: torch.Tensor,
-    # [num_reqs + 1]
-    cu_num_logits: torch.Tensor,
-    num_speculative_steps,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    num_reqs = cu_num_logits.shape[0] - 1
-    sampled = target_sampled.new_empty(num_reqs, num_speculative_steps + 1)
-    num_sampled = target_sampled.new_empty(num_reqs, dtype=torch.int32)
-    _strict_rejection_sample_kernel[(num_reqs,)](
-        sampled,
-        sampled.stride(0),
-        num_sampled,
-        target_sampled,
-        draft_sampled,
-        cu_num_logits,
-        num_warps=1,
-    )
-    return sampled, num_sampled
-
-
-@triton.jit
-def _gather_draft_logits_and_target_argmax_kernel(
-    local_target_argmax_ptr,
-    local_target_argmax_stride,
-    local_target_max_ptr,
-    local_target_max_stride,
-    # [num_logits, V]
-    out_draft_logits_ptr,
-    out_draft_logits_stride,
-    # [num_logits, V]
-    target_logits_ptr,
-    target_logits_stride,
-    # [max_num_reqs, num_speculative_steps, V]
-    draft_logits_ptr,
-    draft_logits_stride_0,
-    draft_logits_stride_1,
-    # [num_logits]
-    expanded_idx_mapping_ptr,
-    # [num_logits]
-    expanded_local_pos_ptr,
-    # [max_num_reqs]
-    temp_ptr,
-    vocab_size,
-    num_speculative_steps,
-    BLOCK_SIZE: tl.constexpr,
-):
-    logit_idx = tl.program_id(0)
-    req_state_idx = tl.load(expanded_idx_mapping_ptr + logit_idx)
-    draft_step_idx = tl.load(expanded_local_pos_ptr + logit_idx)
-
-    block_idx = tl.program_id(1)
-    block_offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
-    mask = block_offsets < vocab_size
-    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
-
-    if temp == 0.0:
-        # Greedy sampling. Get the target logits argmax.
-        target_logits = tl.load(
-            target_logits_ptr + logit_idx * target_logits_stride + block_offsets,
-            mask=mask,
-            other=float("-inf"),
-        ).to(tl.float32)
-        value, idx = tl.max(target_logits, axis=0, return_indices=True)
-        token_id = block_idx * BLOCK_SIZE + idx
-        tl.store(
-            local_target_argmax_ptr
-            + logit_idx * local_target_argmax_stride
-            + block_idx,
-            token_id,
-        )
-        tl.store(
-            local_target_max_ptr + logit_idx * local_target_max_stride + block_idx,
-            value,
-        )
-    elif draft_step_idx < num_speculative_steps:
-        draft_logits = tl.load(
-            draft_logits_ptr
-            + req_state_idx * draft_logits_stride_0
-            + draft_step_idx * draft_logits_stride_1
-            + block_offsets,
-            mask=mask,
-            other=float("-inf"),
-        ).to(tl.float32)
-        tl.store(
-            out_draft_logits_ptr + logit_idx * out_draft_logits_stride + block_offsets,
-            draft_logits,
-            mask=mask,
-        )
-
-
-@triton.jit
-def _probabilistic_rejection_kernel(
-    # [num_reqs, num_speculative_steps + 1]
-    sampled_ptr,
-    sampled_stride,
-    # [num_reqs]
-    rejected_steps_ptr,
-    # [num_reqs]
-    rejected_pos_ptr,
-    # [num_logits]
-    draft_sampled_ptr,
-    # [num_logits, V]
-    target_probs_ptr,
-    target_probs_stride,
-    # [num_logits, V]
-    draft_probs_ptr,
-    draft_probs_stride,
-    # [num_logits, num_blocks]
-    local_target_argmax_ptr,
-    local_target_argmax_stride,
-    # [num_logits, num_blocks]
-    local_target_max_ptr,
-    local_target_max_stride,
-    # [num_reqs + 1]
-    cu_num_logits_ptr,
-    # [num_logits]
-    pos_ptr,
-    # [num_reqs]
-    idx_mapping_ptr,
-    # [max_num_reqs]
-    temp_ptr,
-    # [max_num_reqs]
-    seeds_ptr,
-    NUM_BLOCKS: tl.constexpr,
-    PADDED_NUM_BLOCKS: tl.constexpr,
-):
-    req_idx = tl.program_id(0)
-    start_idx = tl.load(cu_num_logits_ptr + req_idx)
-    num_tokens = tl.load(cu_num_logits_ptr + req_idx + 1) - start_idx
-    req_state_idx = tl.load(idx_mapping_ptr + req_idx)
-    seed = tl.load(seeds_ptr + req_state_idx)
-    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
-
-    rejected_step = 0
-    accepted = True
-    for i in range(num_tokens - 1):
-        if accepted:
-            logit_idx = start_idx + i
-            draft_sampled = tl.load(draft_sampled_ptr + logit_idx + 1)
-            if temp == 0.0:
-                # Greedy sampling. Only accept the sampled draft token if
-                # it exactly matches the target argmax.
-                block_offsets = tl.arange(0, PADDED_NUM_BLOCKS)
-                block_mask = block_offsets < NUM_BLOCKS
-                local_max = tl.load(
-                    local_target_max_ptr
-                    + logit_idx * local_target_max_stride
-                    + block_offsets,
-                    mask=block_mask,
-                    other=float("-inf"),
-                )
-                max_block = tl.argmax(local_max, axis=0)
-                target_argmax = tl.load(
-                    local_target_argmax_ptr
-                    + logit_idx * local_target_argmax_stride
-                    + max_block
-                )
-                accepted &= target_argmax == draft_sampled
-            else:
-                target_prob = tl.load(
-                    target_probs_ptr + logit_idx * target_probs_stride + draft_sampled
-                ).to(tl.float64)
-                draft_prob = tl.load(
-                    draft_probs_ptr + logit_idx * draft_probs_stride + draft_sampled
-                ).to(tl.float64)
-                pos = tl.load(pos_ptr + logit_idx)
-                u = tl_rand64(seed, pos, includes_zero=False)
-                accepted &= target_prob > u * draft_prob
-            tl.store(sampled_ptr + req_idx * sampled_stride + i, draft_sampled)
-            rejected_step += accepted
-    tl.store(rejected_steps_ptr + req_idx, rejected_step)
-    pos_val = tl.load(pos_ptr + start_idx + rejected_step)
-    tl.store(rejected_pos_ptr + req_idx, pos_val)
-
-
-@triton.jit
-def _compute_residual_logits_kernel(
-    # [num_reqs, V]
-    residual_logits_ptr,
-    residual_logits_stride,
-    # [num_logits, V]
-    target_probs_ptr,
-    target_probs_stride,
-    # [num_logits, V]
-    draft_probs_ptr,
-    draft_probs_stride,
-    # [num_logits, V]
-    target_logits_ptr,
-    target_logits_stride,
-    # [num_reqs]
-    rejected_step_ptr,
-    # [num_reqs + 1]
-    cu_num_logits_ptr,
-    # [num_reqs]
-    idx_mapping_ptr,
-    # [max_num_reqs]
-    temp_ptr,
-    vocab_size,
-    BLOCK_SIZE: tl.constexpr,
-):
-    req_idx = tl.program_id(0)
-    block_idx = tl.program_id(1)
-
-    req_state_idx = tl.load(idx_mapping_ptr + req_idx)
-    start_idx = tl.load(cu_num_logits_ptr + req_idx)
-    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
-    rejected_logit_idx = start_idx + tl.load(rejected_step_ptr + req_idx)
-    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
-    block_offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
-    mask = block_offsets < vocab_size
-
-    if temp == 0.0 or (rejected_logit_idx == end_idx - 1):
-        # Greedy sampling / bonus token. In either case, use the
-        # target logits directly to reduce numerical error.
-        residual_logits = tl.load(
-            target_logits_ptr
-            + rejected_logit_idx * target_logits_stride
-            + block_offsets,
-            mask=mask,
-            other=float("-inf"),
-        )
-    else:
-        target_probs = tl.load(
-            target_probs_ptr + rejected_logit_idx * target_probs_stride + block_offsets,
-            mask=mask,
-            other=0.0,
-        )
-        draft_probs = tl.load(
-            draft_probs_ptr + rejected_logit_idx * draft_probs_stride + block_offsets,
-            mask=mask,
-            other=0.0,
-        )
-        residual_probs = tl.maximum(target_probs - draft_probs, 0.0)
-        residual_logits = tl.log(residual_probs)
-
-    tl.store(
-        residual_logits_ptr + req_idx * residual_logits_stride + block_offsets,
-        residual_logits,
-        mask=mask,
-    )
-
-
-def probabilistic_rejection_sample(
-    # [num_logits, V]
-    target_logits: torch.Tensor,
-    # [max_num_reqs, num_speculative_steps, V]
-    draft_logits: torch.Tensor,
-    # [num_logits]
-    draft_sampled: torch.Tensor,
-    # [num_reqs + 1]
-    cu_num_logits: torch.Tensor,
-    # [num_logits]
-    pos: torch.Tensor,
-    # [num_reqs]
-    idx_mapping: torch.Tensor,
-    # [num_logits]
-    expanded_idx_mapping: torch.Tensor,
-    # [num_logits]
-    expanded_local_pos: torch.Tensor,
-    # [max_num_reqs]
-    temperature: torch.Tensor,
-    # [max_num_reqs]
-    seed: torch.Tensor,
-    num_speculative_steps: int,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    num_reqs = cu_num_logits.shape[0] - 1
-    num_logits, vocab_size = target_logits.shape
-
-    BLOCK_SIZE = 1024
-    num_blocks = triton.cdiv(vocab_size, BLOCK_SIZE)
-
-    # Gather draft logits and target argmax for greedy sampling.
-    gathered_draft_logits = target_logits.new_empty(target_logits.shape)
-    local_target_argmax = target_logits.new_empty(
-        num_logits, num_blocks, dtype=torch.int64
-    )
-    local_target_max = target_logits.new_empty(
-        num_logits, num_blocks, dtype=torch.float32
-    )
-    _gather_draft_logits_and_target_argmax_kernel[(num_logits, num_blocks)](
-        local_target_argmax,
-        local_target_argmax.stride(0),
-        local_target_max,
-        local_target_max.stride(0),
-        gathered_draft_logits,
-        gathered_draft_logits.stride(0),
-        target_logits,
-        target_logits.stride(0),
-        draft_logits,
-        draft_logits.stride(0),
-        draft_logits.stride(1),
-        expanded_idx_mapping,
-        expanded_local_pos,
-        temperature,
-        vocab_size,
-        num_speculative_steps,
-        BLOCK_SIZE=BLOCK_SIZE,
-    )
-
-    # Compute target and draft probs.
-    target_probs = torch.softmax(target_logits, dim=-1)
-    draft_probs = torch.softmax(gathered_draft_logits, dim=-1)
-
-    # Rejection sample.
-    # [num_reqs, num_speculative_steps + 1]
-    sampled = draft_sampled.new_empty(
-        num_reqs, num_speculative_steps + 1, dtype=torch.int64
-    )
-    # [num_reqs]
-    rejected_steps = sampled.new_empty(num_reqs)
-    # [num_reqs]
-    rejected_pos = pos.new_empty(num_reqs)
-    _probabilistic_rejection_kernel[(num_reqs,)](
-        sampled,
-        sampled.stride(0),
-        rejected_steps,
-        rejected_pos,
-        draft_sampled,
-        target_probs,
-        target_probs.stride(0),
-        draft_probs,
-        draft_probs.stride(0),
-        local_target_argmax,
-        local_target_argmax.stride(0),
-        local_target_max,
-        local_target_max.stride(0),
-        cu_num_logits,
-        pos,
-        idx_mapping,
-        temperature,
-        seed,
-        num_warps=1,
-        NUM_BLOCKS=num_blocks,
-        PADDED_NUM_BLOCKS=triton.next_power_of_2(num_blocks),
-    )
-
-    # Compute the logits and positions to resample the rejected/bonus
-    # tokens from.
-    # [num_reqs, vocab_size]
-    residual_logits = target_logits.new_empty(num_reqs, vocab_size)
-    _compute_residual_logits_kernel[(num_reqs, num_blocks)](
-        residual_logits,
-        residual_logits.stride(0),
-        target_probs,
-        target_probs.stride(0),
-        draft_probs,
-        draft_probs.stride(0),
-        target_logits,
-        target_logits.stride(0),
-        rejected_steps,
-        cu_num_logits,
-        idx_mapping,
-        temperature,
-        vocab_size,
-        BLOCK_SIZE=BLOCK_SIZE,
-    )
-
-    # Gumbel sample tokens from the residual distribution.
-    resampled = gumbel_sample(
-        residual_logits,
-        idx_mapping,
-        temperature,
-        seed,
-        rejected_pos,
-        apply_temperature=False,
-    )
-    sampled.scatter_(1, rejected_steps.unsqueeze(1), resampled.unsqueeze(1))
-
-    return sampled, rejected_steps + 1
-
-
 @triton.jit
 def _flatten_sampled_kernel(
     # [num_logits]
@@ -451,24 +42,20 @@ def __init__(
         self,
         sampler: Sampler,
         spec_config: SpeculativeConfig,
+        device: torch.device,
     ):
         self.sampler = sampler
         self.num_speculative_steps = spec_config.num_speculative_tokens
         self.rejection_sample_method = spec_config.rejection_sample_method
+        self.synthetic_conditional_rates: torch.Tensor | None = None
         if self.rejection_sample_method == "synthetic":
-            synthetic_acceptance_rate = spec_config.synthetic_acceptance_rate
-            if (
-                synthetic_acceptance_rate is None
-                or not 0.0 <= synthetic_acceptance_rate <= 1.0
-            ):
-                raise ValueError(
-                    f"synthetic_acceptance_rate must be in [0, 1], "
-                    f"but got {synthetic_acceptance_rate}"
-                )
-            self.base_acceptance_rate, self.decay_factor = (
-                compute_synthetic_rejection_sampler_params(
-                    synthetic_acceptance_rate, self.num_speculative_steps
-                )
+            assert spec_config.synthetic_acceptance_rates is not None
+            self.synthetic_conditional_rates = torch.tensor(
+                unconditional_to_conditional_rates(
+                    spec_config.synthetic_acceptance_rates
+                ),
+                dtype=torch.float32,
+                device=device,
             )
 
     def _get_logprobs_tensors(
@@ -511,70 +98,43 @@ def __call__(
         input_batch: InputBatch,
         draft_logits: torch.Tensor | None = None,
     ) -> SamplerOutput:
-        draft_sampled = input_batch.input_ids[input_batch.logits_indices]
         # NOTE(woosuk): We intentionally compute num_nans before sampling to make clear
         # that num_nans is computed before applying penalties and temperature.
         num_nans = get_num_nans(logits) if self.sampler.compute_nans else None
 
-        if self.rejection_sample_method == "strict":
-            sampler_output = self.sampler(logits, input_batch)
-            logprobs_tensors = sampler_output.logprobs_tensors
-            sampled, num_sampled = strict_rejection_sample(
-                sampler_output.sampled_token_ids.view(-1),
-                draft_sampled,
-                input_batch.cu_num_logits,
-                self.num_speculative_steps,
-            )
-        elif self.rejection_sample_method == "probabilistic":
-            assert draft_logits is not None
-            pos = input_batch.positions[input_batch.logits_indices]
-            processed_logits = self.sampler.apply_sampling_params(
-                logits,
-                input_batch.expanded_idx_mapping,
-                input_batch.idx_mapping_np,
-                pos,
-                draft_sampled,
-                input_batch.expanded_local_pos,
-            )
-            sampled, num_sampled = probabilistic_rejection_sample(
-                processed_logits,
-                draft_logits,
-                draft_sampled,
-                input_batch.cu_num_logits,
-                pos,
-                input_batch.idx_mapping,
-                input_batch.expanded_idx_mapping,
-                input_batch.expanded_local_pos,
-                self.sampler.sampling_states.temperature.gpu,
-                self.sampler.sampling_states.seeds.gpu,
-                self.num_speculative_steps,
-            )
-            logprobs_tensors = self._get_logprobs_tensors(
-                input_batch,
-                sampled,
-                num_sampled,
-                processed_logits
-                if self.sampler.logprobs_mode == "processed_logprobs"
-                else logits,
-            )
-        elif self.rejection_sample_method == "synthetic":
-            sampler_output = self.sampler(logits, input_batch)
-            logprobs_tensors = sampler_output.logprobs_tensors
-            sampled, num_sampled = synthetic_rejection_sample(
-                sampler_output.sampled_token_ids.view(-1),
-                draft_sampled,
-                input_batch.cu_num_logits,
-                input_batch.positions[input_batch.logits_indices],
-                input_batch.idx_mapping,
-                self.sampler.sampling_states.seeds.gpu,
-                self.base_acceptance_rate,
-                self.decay_factor,
-                self.num_speculative_steps,
-            )
-        else:
-            raise ValueError(
-                f"Unknown rejection sample method: {self.rejection_sample_method}"
-            )
+        draft_sampled = input_batch.input_ids[input_batch.logits_indices]
+        pos = input_batch.positions[input_batch.logits_indices]
+        processed_logits = self.sampler.apply_sampling_params(
+            logits,
+            input_batch.expanded_idx_mapping,
+            input_batch.idx_mapping_np,
+            pos,
+            draft_sampled,
+            input_batch.expanded_local_pos,
+        )
+        sampled, num_sampled = rejection_sample(
+            processed_logits,
+            draft_logits,
+            draft_sampled,
+            input_batch.cu_num_logits,
+            pos,
+            input_batch.idx_mapping,
+            input_batch.expanded_idx_mapping,
+            input_batch.expanded_local_pos,
+            self.sampler.sampling_states.temperature.gpu,
+            self.sampler.sampling_states.seeds.gpu,
+            self.num_speculative_steps,
+            self.synthetic_conditional_rates,
+            use_fp64=self.sampler.use_fp64_gumbel,
+        )
+        logprobs_tensors = self._get_logprobs_tensors(
+            input_batch,
+            sampled,
+            num_sampled,
+            processed_logits
+            if self.sampler.logprobs_mode == "processed_logprobs"
+            else logits,
+        )
 
         return SamplerOutput(
             sampled_token_ids=sampled,
diff --git a/vllm/v1/worker/gpu/spec_decode/rejection_sampler_utils.py b/vllm/v1/worker/gpu/spec_decode/rejection_sampler_utils.py
new file mode 100644
index 000000000000..0cfbdf4182be
--- /dev/null
+++ b/vllm/v1/worker/gpu/spec_decode/rejection_sampler_utils.py
@@ -0,0 +1,670 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+
+from vllm.triton_utils import tl, triton
+from vllm.v1.worker.gpu.sample.gumbel import gumbel_block_argmax, tl_rand64
+
+
+@triton.jit
+def _compute_block_max_and_sumexp(logits):
+    block_max = tl.max(logits, axis=0)
+    block_sumexp = tl.where(
+        block_max > float("-inf"),
+        tl.sum(tl.exp(logits - block_max)),
+        0.0,
+    )
+    return block_max, block_sumexp
+
+
+@triton.jit
+def _compute_global_lse(
+    local_max_ptr,
+    local_max_stride,
+    local_sumexp_ptr,
+    local_sumexp_stride,
+    logit_idx,
+    vocab_num_blocks,
+    PADDED_VOCAB_NUM_BLOCKS: tl.constexpr,
+):
+    blocks = tl.arange(0, PADDED_VOCAB_NUM_BLOCKS)
+    blocks_mask = blocks < vocab_num_blocks
+    maxes = tl.load(
+        local_max_ptr + logit_idx * local_max_stride + blocks,
+        mask=blocks_mask,
+        other=float("-inf"),
+    )
+    sumexps = tl.load(
+        local_sumexp_ptr + logit_idx * local_sumexp_stride + blocks,
+        mask=blocks_mask,
+        other=0.0,
+    )
+    global_max = tl.max(maxes, axis=0)
+    global_lse = global_max + tl.log(tl.sum(sumexps * tl.exp(maxes - global_max)))
+    return global_lse
+
+
+@triton.jit
+def _compute_block_stats_kernel(
+    # [num_logits, num_blocks]
+    target_local_argmax_ptr,
+    target_local_argmax_stride,
+    # [num_logits, num_blocks]
+    target_local_max_ptr,
+    target_local_max_stride,
+    # [num_logits, num_blocks]
+    target_local_sumexp_ptr,
+    target_local_sumexp_stride,
+    # [num_logits, num_blocks]
+    draft_local_max_ptr,
+    draft_local_max_stride,
+    # [num_logits, num_blocks]
+    draft_local_sumexp_ptr,
+    draft_local_sumexp_stride,
+    # [num_logits, V]
+    target_logits_ptr,
+    target_logits_stride,
+    # [max_num_reqs, num_speculative_steps, V]
+    draft_logits_ptr,
+    draft_logits_stride_0,
+    draft_logits_stride_1,
+    # [num_logits]
+    expanded_idx_mapping_ptr,
+    # [num_logits]
+    expanded_local_pos_ptr,
+    # [max_num_reqs]
+    temp_ptr,
+    vocab_size,
+    num_speculative_steps,
+    BLOCK_SIZE: tl.constexpr,
+    HAS_DRAFT_LOGITS: tl.constexpr,
+):
+    logit_idx = tl.program_id(0)
+    draft_step_idx = tl.load(expanded_local_pos_ptr + logit_idx)
+
+    if draft_step_idx >= num_speculative_steps:
+        # Bonus token. Max/argmax and summed exponentials are not needed.
+        return
+
+    req_state_idx = tl.load(expanded_idx_mapping_ptr + logit_idx)
+    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
+
+    block_idx = tl.program_id(1)
+    block_offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = block_offsets < vocab_size
+
+    if temp == 0.0:
+        # Greedy sampling. Only the target max/argmax are needed.
+        target_logits = tl.load(
+            target_logits_ptr + logit_idx * target_logits_stride + block_offsets,
+            mask=mask,
+            other=float("-inf"),
+        ).to(tl.float32)
+        value, idx = tl.max(target_logits, axis=0, return_indices=True)
+        token_id = block_idx * BLOCK_SIZE + idx
+        tl.store(
+            target_local_argmax_ptr
+            + logit_idx * target_local_argmax_stride
+            + block_idx,
+            token_id,
+        )
+        tl.store(
+            target_local_max_ptr + logit_idx * target_local_max_stride + block_idx,
+            value,
+        )
+    else:
+        # Get local target max and summed exponentials.
+        target_logits = tl.load(
+            target_logits_ptr + logit_idx * target_logits_stride + block_offsets,
+            mask=mask,
+            other=float("-inf"),
+        ).to(tl.float32)
+        target_max, target_sumexp = _compute_block_max_and_sumexp(target_logits)
+        tl.store(
+            target_local_max_ptr + logit_idx * target_local_max_stride + block_idx,
+            target_max,
+        )
+        tl.store(
+            target_local_sumexp_ptr
+            + logit_idx * target_local_sumexp_stride
+            + block_idx,
+            target_sumexp,
+        )
+        if HAS_DRAFT_LOGITS:
+            # Get local draft max and summed exponentials.
+            draft_logits = tl.load(
+                draft_logits_ptr
+                + req_state_idx * draft_logits_stride_0
+                + draft_step_idx * draft_logits_stride_1
+                + block_offsets,
+                mask=mask,
+                other=float("-inf"),
+            ).to(tl.float32)
+            draft_max, draft_sumexp = _compute_block_max_and_sumexp(draft_logits)
+            tl.store(
+                draft_local_max_ptr + logit_idx * draft_local_max_stride + block_idx,
+                draft_max,
+            )
+            tl.store(
+                draft_local_sumexp_ptr
+                + logit_idx * draft_local_sumexp_stride
+                + block_idx,
+                draft_sumexp,
+            )
+
+
+@triton.jit
+def _rejection_kernel(
+    # [num_reqs, num_speculative_steps + 1]
+    sampled_ptr,
+    sampled_stride,
+    # [num_reqs]
+    rejected_steps_ptr,
+    # [num_reqs]
+    target_rejected_logsumexp_ptr,
+    # [num_reqs]
+    draft_rejected_logsumexp_ptr,
+    # [num_logits, V]
+    target_logits_ptr,
+    target_logits_stride,
+    # [num_logits, num_blocks]
+    target_local_argmax_ptr,
+    target_local_argmax_stride,
+    # [num_logits, num_blocks]
+    target_local_max_ptr,
+    target_local_max_stride,
+    # [num_logits, num_blocks]
+    target_local_sumexp_ptr,
+    target_local_sumexp_stride,
+    # [num_logits]
+    draft_sampled_ptr,
+    # [max_num_reqs, num_speculative_steps, V]
+    draft_logits_ptr,
+    draft_logits_stride_0,
+    draft_logits_stride_1,
+    # [num_logits, num_blocks]
+    draft_local_max_ptr,
+    draft_local_max_stride,
+    # [num_logits, num_blocks]
+    draft_local_sumexp_ptr,
+    draft_local_sumexp_stride,
+    # [num_reqs + 1]
+    cu_num_logits_ptr,
+    # [num_reqs]
+    idx_mapping_ptr,
+    # [max_num_reqs]
+    temp_ptr,
+    # [max_num_reqs]
+    seed_ptr,
+    # [num_logits]
+    pos_ptr,
+    # [num_speculative_steps]
+    synthetic_conditional_rates_ptr,
+    vocab_num_blocks,
+    PADDED_VOCAB_NUM_BLOCKS: tl.constexpr,
+    HAS_DRAFT_LOGITS: tl.constexpr,
+    SYNTHETIC_MODE: tl.constexpr,
+):
+    req_idx = tl.program_id(0)
+    req_state_idx = tl.load(idx_mapping_ptr + req_idx)
+    start_idx = tl.load(cu_num_logits_ptr + req_idx)
+    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
+    num_tokens = end_idx - start_idx
+    seed = tl.load(seed_ptr + req_state_idx)
+    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
+
+    rejected_step = 0
+    target_lse = 0.0
+    draft_lse = 0.0
+    accepted = True
+    for i in range(num_tokens - 1):
+        if accepted:
+            logit_idx = start_idx + i
+            draft_sampled = tl.load(draft_sampled_ptr + logit_idx + 1).to(tl.int64)
+            if temp == 0.0:
+                # Greedy sampling. Accept IFF draft matches target argmax.
+                # NOTE: Target argmax is stored directly so that resampling
+                # can be skipped upon rejection.
+                target_blocks = tl.arange(0, PADDED_VOCAB_NUM_BLOCKS)
+                target_blocks_mask = target_blocks < vocab_num_blocks
+                target_local_max = tl.load(
+                    target_local_max_ptr
+                    + logit_idx * target_local_max_stride
+                    + target_blocks,
+                    mask=target_blocks_mask,
+                    other=float("-inf"),
+                )
+                max_target_block_idx = tl.argmax(target_local_max, axis=0)
+                target_argmax = tl.load(
+                    target_local_argmax_ptr
+                    + logit_idx * target_local_argmax_stride
+                    + max_target_block_idx
+                ).to(tl.int64)
+
+                if SYNTHETIC_MODE:
+                    pos = tl.load(pos_ptr + logit_idx)
+                    u = tl_rand64(seed, pos, includes_zero=False)
+                    rate = tl.load(synthetic_conditional_rates_ptr + i)
+                    accepted &= u < rate
+                else:
+                    accepted &= target_argmax == draft_sampled
+                tl.store(
+                    sampled_ptr + req_idx * sampled_stride + i,
+                    draft_sampled if accepted else target_argmax,
+                )
+            else:
+                target_logit = tl.load(
+                    target_logits_ptr + logit_idx * target_logits_stride + draft_sampled
+                ).to(tl.float32)
+                target_lse = _compute_global_lse(
+                    target_local_max_ptr,
+                    target_local_max_stride,
+                    target_local_sumexp_ptr,
+                    target_local_sumexp_stride,
+                    logit_idx,
+                    vocab_num_blocks,
+                    PADDED_VOCAB_NUM_BLOCKS,
+                )
+                target_log_prob = target_logit - target_lse
+                pos = tl.load(pos_ptr + logit_idx)
+                u = tl_rand64(seed, pos, includes_zero=False)
+                if HAS_DRAFT_LOGITS:
+                    draft_logit = tl.load(
+                        draft_logits_ptr
+                        + req_state_idx * draft_logits_stride_0
+                        + i * draft_logits_stride_1
+                        + draft_sampled
+                    ).to(tl.float32)
+                    draft_lse = _compute_global_lse(
+                        draft_local_max_ptr,
+                        draft_local_max_stride,
+                        draft_local_sumexp_ptr,
+                        draft_local_sumexp_stride,
+                        logit_idx,
+                        vocab_num_blocks,
+                        PADDED_VOCAB_NUM_BLOCKS,
+                    )
+                    draft_log_prob = draft_logit - draft_lse
+                else:
+                    # One-hot draft: q(draft_token) = 1, log_q = 0.
+                    draft_log_prob = 0
+
+                if SYNTHETIC_MODE:
+                    rate = tl.load(synthetic_conditional_rates_ptr + i)
+                    accepted &= u < rate
+                else:
+                    # Probability ratio test: p(x) > u * q(x)
+                    # Equivalent log form: log_p(x) > log(u) + log_q(x)
+                    accepted &= target_log_prob > tl.log(u) + draft_log_prob
+                tl.store(sampled_ptr + req_idx * sampled_stride + i, draft_sampled)
+            rejected_step += accepted
+    tl.store(rejected_steps_ptr + req_idx, rejected_step)
+    tl.store(target_rejected_logsumexp_ptr + req_idx, target_lse)
+    tl.store(draft_rejected_logsumexp_ptr + req_idx, draft_lse)
+
+
+@triton.jit
+def _resample_kernel(
+    # [num_reqs, num_blocks]
+    resampled_local_argmax_ptr,
+    resampled_local_argmax_stride,
+    # [num_reqs, num_blocks]
+    resampled_local_max_ptr,
+    resampled_local_max_stride,
+    # [num_logits, V]
+    target_logits_ptr,
+    target_logits_stride,
+    # [num_reqs]
+    target_rejected_logsumexp_ptr,
+    # [max_num_reqs, num_speculative_steps, V]
+    draft_logits_ptr,
+    draft_logits_stride_0,
+    draft_logits_stride_1,
+    # [num_reqs]
+    draft_rejected_logsumexp_ptr,
+    # [num_reqs]
+    rejected_step_ptr,
+    # [num_reqs + 1]
+    cu_num_logits_ptr,
+    # [num_logits]
+    expanded_idx_mapping_ptr,
+    # [num_logits]
+    draft_sampled_ptr,
+    # [max_num_reqs]
+    temp_ptr,
+    # [max_num_reqs]
+    seed_ptr,
+    # [num_logits]
+    pos_ptr,
+    vocab_size,
+    BLOCK_SIZE: tl.constexpr,
+    HAS_DRAFT_LOGITS: tl.constexpr,
+    USE_FP64: tl.constexpr,
+):
+    req_idx = tl.program_id(0)
+    resample_idx = tl.load(rejected_step_ptr + req_idx)
+    start_idx = tl.load(cu_num_logits_ptr + req_idx)
+    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
+    resample_token_idx = start_idx + resample_idx
+    req_state_idx = tl.load(expanded_idx_mapping_ptr + resample_token_idx)
+
+    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
+    is_bonus = resample_token_idx == end_idx - 1
+    if temp == 0.0 and not is_bonus:
+        # Greedy + non-bonus token. No resampling needed because
+        # the target argmax is already in the sampled tensor.
+        return
+
+    block_idx = tl.program_id(1)
+    block = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = block < vocab_size
+    target_logits = tl.load(
+        target_logits_ptr + resample_token_idx * target_logits_stride + block,
+        mask=mask,
+        other=float("-inf"),
+    ).to(tl.float32)
+
+    # Compute the residual logits to resample the rejected token from.
+    if is_bonus:
+        # Bonus token (no rejections). Directly use the target logits.
+        residual_logits = target_logits
+    elif HAS_DRAFT_LOGITS:
+        draft_logits = tl.load(
+            draft_logits_ptr
+            + req_state_idx * draft_logits_stride_0
+            + resample_idx * draft_logits_stride_1
+            + block,
+            mask=mask,
+            other=float("-inf"),
+        ).to(tl.float32)
+        target_lse = tl.load(target_rejected_logsumexp_ptr + req_idx)
+        draft_lse = tl.load(draft_rejected_logsumexp_ptr + req_idx)
+        target_log_probs = target_logits - target_lse
+        draft_log_probs = draft_logits - draft_lse
+        # Compute the residual: max(p(x) - q(x), 0)
+        # Equivalent log form: log(max(exp(log_p(x)) - exp(log_q(x)), 0))
+        # The more numerically stable form is:
+        # log(max(exp(a) - exp(b), 0)) = a + log(max(1 - exp(b - a), 0))
+        ratio = tl.exp(draft_log_probs - target_log_probs)
+        residual_logits = tl.where(
+            ratio < 1.0,
+            target_log_probs + tl.log(1 - ratio),
+            float("-inf"),
+        ).to(tl.float32)
+    else:
+        # One-hot draft. The residual is just the target distribution with
+        # the rejected draft token probability zeroed out.
+        rejected_draft_token = tl.load(draft_sampled_ptr + resample_token_idx + 1)
+        residual_logits = tl.where(
+            block != rejected_draft_token,
+            target_logits,
+            float("-inf"),
+        ).to(tl.float32)
+
+    # Resample the rejected/bonus token.
+    value, idx = gumbel_block_argmax(
+        residual_logits,
+        block,
+        mask,
+        resample_token_idx,
+        expanded_idx_mapping_ptr,
+        temp_ptr,
+        seed_ptr,
+        pos_ptr,
+        None,  # processed_logits_ptr
+        0,  # processed_logits_stride
+        None,  # processed_logits_col_ptr
+        vocab_size,
+        APPLY_TEMPERATURE=False,
+        USE_FP64=USE_FP64,
+    )
+    token_id = block_idx * BLOCK_SIZE + idx
+    tl.store(
+        resampled_local_argmax_ptr
+        + req_idx * resampled_local_argmax_stride
+        + block_idx,
+        token_id,
+    )
+    tl.store(
+        resampled_local_max_ptr + req_idx * resampled_local_max_stride + block_idx,
+        value,
+    )
+
+
+@triton.jit
+def _insert_resampled_kernel(
+    # [num_reqs, num_speculative_steps + 1]
+    sampled_ptr,
+    sampled_stride,
+    # [num_reqs]
+    num_sampled_ptr,
+    # [num_reqs, num_blocks]
+    resampled_local_argmax_ptr,
+    resampled_local_argmax_stride,
+    # [num_reqs, num_blocks]
+    resampled_local_max_ptr,
+    resampled_local_max_stride,
+    resample_num_blocks,
+    # [num_reqs + 1]
+    cu_num_logits_ptr,
+    # [num_reqs]
+    expanded_idx_mapping_ptr,
+    # [max_num_reqs]
+    temp_ptr,
+    PADDED_RESAMPLE_NUM_BLOCKS: tl.constexpr,
+):
+    req_idx = tl.program_id(0)
+    num_sampled = tl.load(num_sampled_ptr + req_idx)
+    start_idx = tl.load(cu_num_logits_ptr + req_idx)
+    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
+    resample_token_idx = start_idx + num_sampled
+    req_state_idx = tl.load(expanded_idx_mapping_ptr + resample_token_idx)
+
+    # Increment the number of sampled tokens.
+    tl.store(num_sampled_ptr + req_idx, num_sampled + 1)
+
+    temp = tl.load(temp_ptr + req_state_idx).to(tl.float32)
+    is_bonus = resample_token_idx == end_idx - 1
+    if temp == 0.0 and not is_bonus:
+        # Greedy + non-bonus token. The target argmax is already
+        # in the sampled tensor.
+        return
+
+    # Insert the resampled token.
+    block = tl.arange(0, PADDED_RESAMPLE_NUM_BLOCKS)
+    mask = block < resample_num_blocks
+    resampled_local_max = tl.load(
+        resampled_local_max_ptr + req_idx * resampled_local_max_stride + block,
+        mask=mask,
+        other=float("-inf"),
+    )
+    resampled_max_block_idx = tl.argmax(resampled_local_max, axis=0)
+    resampled = tl.load(
+        resampled_local_argmax_ptr
+        + req_idx * resampled_local_argmax_stride
+        + resampled_max_block_idx,
+    )
+    tl.store(
+        sampled_ptr + req_idx * sampled_stride + num_sampled,
+        resampled,
+    )
+
+
+def rejection_sample(
+    # [num_logits, V]
+    target_logits: torch.Tensor,
+    # [max_num_reqs, num_speculative_steps, V]
+    draft_logits: torch.Tensor | None,
+    # [num_logits]
+    draft_sampled: torch.Tensor,
+    # [num_reqs + 1]
+    cu_num_logits: torch.Tensor,
+    # [num_logits]
+    pos: torch.Tensor,
+    # [num_reqs]
+    idx_mapping: torch.Tensor,
+    # [num_logits]
+    expanded_idx_mapping: torch.Tensor,
+    # [num_logits]
+    expanded_local_pos: torch.Tensor,
+    # [max_num_reqs]
+    temperature: torch.Tensor,
+    # [max_num_reqs]
+    seed: torch.Tensor,
+    num_speculative_steps: int,
+    # [num_speculative_steps]
+    synthetic_conditional_rates: torch.Tensor | None = None,
+    use_fp64: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    num_reqs = cu_num_logits.shape[0] - 1
+    num_logits, vocab_size = target_logits.shape
+    has_draft_logits = draft_logits is not None
+
+    if draft_logits is None:
+        # When draft_logits is None, create a dummy tensor so that Triton
+        # kernel signatures receive valid pointers/strides. The kernels
+        # will never read from it when HAS_DRAFT_LOGITS=False.
+        draft_logits = target_logits.new_empty(1, 1, 1)
+
+    # Compute the block-level logits stats, such as target argmax
+    # (for greedy requests), and target max + softmax exponential
+    # (for non-greedy requests).
+    VOCAB_BLOCK_SIZE = 8192
+    vocab_num_blocks = triton.cdiv(vocab_size, VOCAB_BLOCK_SIZE)
+    padded_vocab_num_blocks = triton.next_power_of_2(vocab_num_blocks)
+    target_local_argmax = target_logits.new_empty(
+        num_logits, vocab_num_blocks, dtype=torch.int64
+    )
+    target_local_max = target_logits.new_empty(
+        num_logits, vocab_num_blocks, dtype=torch.float32
+    )
+    target_local_sumexp = target_logits.new_empty(
+        num_logits, vocab_num_blocks, dtype=torch.float32
+    )
+    draft_local_max = target_logits.new_empty(
+        num_logits, vocab_num_blocks, dtype=torch.float32
+    )
+    draft_local_sumexp = target_logits.new_empty(
+        num_logits, vocab_num_blocks, dtype=torch.float32
+    )
+    _compute_block_stats_kernel[(num_logits, vocab_num_blocks)](
+        target_local_argmax,
+        target_local_argmax.stride(0),
+        target_local_max,
+        target_local_max.stride(0),
+        target_local_sumexp,
+        target_local_sumexp.stride(0),
+        draft_local_max,
+        draft_local_max.stride(0),
+        draft_local_sumexp,
+        draft_local_sumexp.stride(0),
+        target_logits,
+        target_logits.stride(0),
+        draft_logits,
+        draft_logits.stride(0),
+        draft_logits.stride(1),
+        expanded_idx_mapping,
+        expanded_local_pos,
+        temperature,
+        vocab_size,
+        num_speculative_steps,
+        BLOCK_SIZE=VOCAB_BLOCK_SIZE,
+        HAS_DRAFT_LOGITS=has_draft_logits,
+    )
+
+    # Sample up until the first rejected/bonus token, and store
+    # the step.
+    sampled = draft_sampled.new_empty(
+        num_reqs, num_speculative_steps + 1, dtype=torch.int64
+    )
+    num_sampled = sampled.new_empty(num_reqs, dtype=torch.int32)
+    target_rejected_logsumexp = target_logits.new_empty(num_reqs, dtype=torch.float32)
+    draft_rejected_logsumexp = target_logits.new_empty(num_reqs, dtype=torch.float32)
+    _rejection_kernel[(num_reqs,)](
+        sampled,
+        sampled.stride(0),
+        num_sampled,
+        target_rejected_logsumexp,
+        draft_rejected_logsumexp,
+        target_logits,
+        target_logits.stride(0),
+        target_local_argmax,
+        target_local_argmax.stride(0),
+        target_local_max,
+        target_local_max.stride(0),
+        target_local_sumexp,
+        target_local_sumexp.stride(0),
+        draft_sampled,
+        draft_logits,
+        draft_logits.stride(0),
+        draft_logits.stride(1),
+        draft_local_max,
+        draft_local_max.stride(0),
+        draft_local_sumexp,
+        draft_local_sumexp.stride(0),
+        cu_num_logits,
+        idx_mapping,
+        temperature,
+        seed,
+        pos,
+        synthetic_conditional_rates,
+        vocab_num_blocks,
+        PADDED_VOCAB_NUM_BLOCKS=padded_vocab_num_blocks,
+        HAS_DRAFT_LOGITS=has_draft_logits,
+        SYNTHETIC_MODE=synthetic_conditional_rates is not None,
+        num_warps=1,
+    )
+
+    # Resample the rejected/bonus tokens.
+    RESAMPLE_BLOCK_SIZE = 1024
+    resample_num_blocks = triton.cdiv(vocab_size, RESAMPLE_BLOCK_SIZE)
+    padded_resample_num_blocks = triton.next_power_of_2(resample_num_blocks)
+    resampled_local_argmax = target_logits.new_empty(
+        num_reqs, resample_num_blocks, dtype=torch.int64
+    )
+    resampled_local_max = target_logits.new_empty(
+        num_reqs,
+        resample_num_blocks,
+        dtype=torch.float64 if use_fp64 else torch.float32,
+    )
+    _resample_kernel[(num_reqs, resample_num_blocks)](
+        resampled_local_argmax,
+        resampled_local_argmax.stride(0),
+        resampled_local_max,
+        resampled_local_max.stride(0),
+        target_logits,
+        target_logits.stride(0),
+        target_rejected_logsumexp,
+        draft_logits,
+        draft_logits.stride(0),
+        draft_logits.stride(1),
+        draft_rejected_logsumexp,
+        num_sampled,
+        cu_num_logits,
+        expanded_idx_mapping,
+        draft_sampled,
+        temperature,
+        seed,
+        pos,
+        vocab_size,
+        BLOCK_SIZE=RESAMPLE_BLOCK_SIZE,
+        HAS_DRAFT_LOGITS=has_draft_logits,
+        USE_FP64=use_fp64,
+    )
+
+    # Insert the resampled tokens into the output sampled.
+    _insert_resampled_kernel[(num_reqs,)](
+        sampled,
+        sampled.stride(0),
+        num_sampled,
+        resampled_local_argmax,
+        resampled_local_argmax.stride(0),
+        resampled_local_max,
+        resampled_local_max.stride(0),
+        resample_num_blocks,
+        cu_num_logits,
+        expanded_idx_mapping,
+        temperature,
+        PADDED_RESAMPLE_NUM_BLOCKS=padded_resample_num_blocks,
+    )
+    return sampled, num_sampled
diff --git a/vllm/v1/worker/gpu/spec_decode/synthetic_rejection_sampler_utils.py b/vllm/v1/worker/gpu/spec_decode/synthetic_rejection_sampler_utils.py
deleted file mode 100644
index f5388575baed..000000000000
--- a/vllm/v1/worker/gpu/spec_decode/synthetic_rejection_sampler_utils.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import torch
-
-from vllm.triton_utils import tl, triton
-from vllm.v1.worker.gpu.sample.gumbel import tl_rand64
-
-MIN_ACCEPTANCE_DECAY_FACTOR = 0.85
-
-
-@triton.jit
-def _synthetic_rejection_sample_kernel(
-    # [num_reqs, num_speculative_steps + 1]
-    sampled_ptr,
-    sampled_stride,
-    # [num_reqs]
-    num_sampled_ptr,
-    # [num_draft_tokens + num_reqs]
-    target_sampled_ptr,
-    # [num_draft_tokens + num_reqs]
-    input_ids_ptr,
-    # [num_reqs + 1]
-    cu_num_logits_ptr,
-    # [num_logits]
-    pos_ptr,
-    # [num_reqs]
-    idx_mapping_ptr,
-    # [max_num_reqs]
-    seeds_ptr,
-    base_acceptance_rate,
-    decay_factor,
-):
-    req_idx = tl.program_id(0)
-    start_idx = tl.load(cu_num_logits_ptr + req_idx)
-    end_idx = tl.load(cu_num_logits_ptr + req_idx + 1)
-    num_tokens = end_idx - start_idx
-    req_state_idx = tl.load(idx_mapping_ptr + req_idx)
-    seed = tl.load(seeds_ptr + req_state_idx)
-
-    num_sampled = 0
-    acceptance_rate = base_acceptance_rate
-    rejected = False
-    for i in range(num_tokens - 1):
-        if not rejected:
-            logit_idx = start_idx + i
-            pos = tl.load(pos_ptr + logit_idx)
-            u = tl_rand64(seed, pos, includes_zero=False)
-            if u < acceptance_rate:
-                sampled = tl.load(input_ids_ptr + logit_idx + 1).to(tl.int64)
-            else:
-                sampled = tl.load(target_sampled_ptr + logit_idx)
-                rejected = True
-            tl.store(sampled_ptr + req_idx * sampled_stride + i, sampled)
-            num_sampled += 1
-            acceptance_rate *= decay_factor
-    if not rejected:
-        target_sampled = tl.load(target_sampled_ptr + start_idx + num_tokens - 1)
-        tl.store(
-            sampled_ptr + req_idx * sampled_stride + num_tokens - 1, target_sampled
-        )
-        num_sampled += 1
-    tl.store(num_sampled_ptr + req_idx, num_sampled)
-
-
-def synthetic_rejection_sample(
-    # [num_draft_tokens + num_reqs]
-    target_sampled: torch.Tensor,
-    # [num_draft_tokens + num_reqs]
-    draft_sampled: torch.Tensor,
-    # [num_reqs + 1]
-    cu_num_logits: torch.Tensor,
-    # [num_logits]
-    pos: torch.Tensor,
-    # [num_reqs]
-    idx_mapping: torch.Tensor,
-    # [max_num_reqs]
-    seed: torch.Tensor,
-    base_acceptance_rate: float,
-    decay_factor: float,
-    num_speculative_steps: int,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    num_reqs = cu_num_logits.shape[0] - 1
-    sampled = target_sampled.new_empty(num_reqs, num_speculative_steps + 1)
-    num_sampled = target_sampled.new_empty(num_reqs, dtype=torch.int32)
-    _synthetic_rejection_sample_kernel[(num_reqs,)](
-        sampled,
-        sampled.stride(0),
-        num_sampled,
-        target_sampled,
-        draft_sampled,
-        cu_num_logits,
-        pos,
-        idx_mapping,
-        seed,
-        base_acceptance_rate,
-        decay_factor,
-        num_warps=1,
-    )
-    return sampled, num_sampled
-
-
-def compute_synthetic_rejection_sampler_params(
-    p_avg: float, n: int, tol: float = 1e-9
-) -> tuple[float, float]:
-    def mean_joint_prob(a_0: float, gamma: float, n: int):
-        total = 0.0
-        for i in range(n):
-            total += a_0 ** (i + 1) * gamma ** (i * (i + 1) // 2)
-        return total / n
-
-    def min_valid_decay_factor(p: float, n: int, tol: float = 1e-9) -> float:
-        low, high = MIN_ACCEPTANCE_DECAY_FACTOR, 1.0
-        if mean_joint_prob(1, low, n) >= p:
-            return low
-
-        # Sweep for a gamma decay factor that is guaranteed
-        # to yield a base acceptance rate <= 1.
-        while (high - low) > tol:
-            mid = (low + high) / 2
-            if mean_joint_prob(1, mid, n) >= p:
-                high = mid
-            else:
-                low = mid
-        return high
-
-    def compute_base_acceptance_rate(
-        p_avg: float, gamma: float, n: int, tol: float = 1e-9
-    ) -> float:
-        if p_avg <= 0.0:
-            return 0.0
-        if p_avg >= 1.0:
-            return 1.0
-
-        # Sweep for a base acceptance rate that yields
-        # the desired mean joint probability.
-        low, high = 0.0, 1.0
-        while (high - low) > tol:
-            mid = (low + high) / 2
-            if mean_joint_prob(mid, gamma, n) >= p_avg:
-                high = mid
-            else:
-                low = mid
-        return high
-
-    decay_factor = min_valid_decay_factor(p_avg, n)
-    base_rate = compute_base_acceptance_rate(p_avg, decay_factor, n)
-    return base_rate, decay_factor
diff --git a/vllm/v1/worker/gpu/states.py b/vllm/v1/worker/gpu/states.py
index 24d225886106..cdd7286fa56e 100644
--- a/vllm/v1/worker/gpu/states.py
+++ b/vllm/v1/worker/gpu/states.py
@@ -57,6 +57,8 @@ def __init__(
         self.num_computed_tokens = StagedWriteTensor(
             self.max_num_reqs, dtype=torch.int32, device=device
         )
+        # Optimistic CPU mirror of num_computed_tokens (upper bound on GPU value).
+        self.num_computed_tokens_np = np.zeros(self.max_num_reqs, dtype=np.int32)
 
         # Last sampled tokens.
         self.last_sampled_tokens = torch.zeros(
@@ -100,8 +102,21 @@ def add_request(
         self.total_len.stage_write_elem(req_idx, prefill_len)
         self.all_token_ids.stage_write(req_idx, 0, all_token_ids)
         self.num_computed_prefill_tokens[req_idx] = num_computed_tokens
+        self.num_computed_tokens_np[req_idx] = num_computed_tokens
         self.num_computed_tokens.stage_write_elem(req_idx, num_computed_tokens)
 
+        if 0 < num_computed_tokens <= prefill_len:
+            # For PD disagg or resumed requests: set last_sampled to the last
+            # computed token so the first decode step gets the right input_id.
+            # For fresh prefill requests (num_computed_tokens == 0) the tensor
+            # is not read by combine_sampled_and_draft_tokens so we skip the
+            # write. Use a slice assignment rather than scalar indexing so the
+            # write is dispatched through fill_ without a host/device sync.
+            self.last_sampled_tokens[req_idx : req_idx + 1] = all_token_ids[
+                num_computed_tokens - 1
+            ]
+        self.draft_tokens[req_idx].zero_()
+
     def apply_staged_writes(self) -> None:
         self.prompt_len.copy_to_uva()
         self.prefill_len.copy_to_uva()
@@ -118,8 +133,8 @@ def remove_request(self, req_id: str) -> bool:
         self.free_indices.append(req_idx)
         return True
 
-    def any_prefills(self, idx_mapping_np: np.ndarray) -> bool:
-        return np.any(
+    def is_prefilling(self, idx_mapping_np: np.ndarray) -> np.ndarray:
+        return (
             self.num_computed_prefill_tokens[idx_mapping_np]
             < self.prefill_len.np[idx_mapping_np]
         )
diff --git a/vllm/v1/worker/gpu/warmup.py b/vllm/v1/worker/gpu/warmup.py
index 026b6a7d7eb9..83d87c74a4a0 100644
--- a/vllm/v1/worker/gpu/warmup.py
+++ b/vllm/v1/worker/gpu/warmup.py
@@ -29,13 +29,16 @@ def warmup_kernels(
     triton kernels. We must call the provided worker's execute_model for
     pipeline parallel coordination.
 
-    The first iteration simulates a prefill with requests of 2 prompt
-    tokens each. The second iteration simulates a decode step with all
-    requests generating 1 token each.
+    The first iteration simulates a prefill with requests of
+    2 + num_spec_steps prompt tokens each. The second iteration simulates
+    a decode step with all requests generating 1 + num_spec_steps tokens.
     """
-    prompt_token_ids = [0, 1]
-    prompt_len = len(prompt_token_ids)
     num_spec_steps = model_runner.num_speculative_steps
+    # Use 1 + num_spec_steps + 1 tokens so the prefill batch's per-request
+    # query length exceeds decode_query_len (= 1 + num_spec_steps), preventing
+    # it from being misclassified as a uniform decode batch.
+    prompt_len = 2 + num_spec_steps
+    prompt_token_ids = list(range(prompt_len))
     # After prefill, decode generates 1 verified + num_spec_steps draft tokens.
     decode_len = prompt_len + 1 + num_spec_steps
 
@@ -76,7 +79,7 @@ def _alloc_blocks(num_blocks: int) -> list[int]:
         nonlocal next_block_id
         return list(range(next_block_id, next_block_id := next_block_id + num_blocks))
 
-    # Step 1: Prefill all requests with 2 prompt tokens each.
+    # Step 1: Prefill all requests with 2 + num_spec_steps prompt tokens each.
     new_reqs = [
         NewRequestData.from_request(
             Request(req_ids[i], prompt_token_ids, sampling_params, pooling_params),
diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py
index b9cd10544826..89d69c0bde64 100644
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -8,6 +8,7 @@
 import numpy as np
 import torch
 
+from vllm.config.reasoning import ReasoningConfig
 from vllm.lora.request import LoRARequest
 from vllm.multimodal.inputs import MultiModalFeatureSpec
 from vllm.pooling_params import PoolingParams
@@ -22,6 +23,9 @@
     MoveDirectionality,
 )
 from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.sample.thinking_budget_state import (
+    maybe_create_thinking_budget_state_holder,
+)
 from vllm.v1.utils import copy_slice
 from vllm.v1.worker.block_table import MultiGroupBlockTable
 
@@ -45,6 +49,12 @@ class CachedRequestState:
 
     lora_request: LoRARequest | None = None
     prompt_embeds: torch.Tensor | None = None
+    # To accumulate prompt logprobs tensor chunks across prefill steps.
+    in_progress_prompt_logprobs_cpu: LogprobsTensors | None = None
+
+    # Per-position mask for mixed-mode inputs (e.g chat completion with
+    # prompt_embeds content parts). See `Request.prompt_is_token_ids`.
+    prompt_is_token_ids: list[bool] | None = None
 
     # Used when both async_scheduling and spec_decode are enabled.
     prev_num_draft_len: int = 0
@@ -92,12 +102,20 @@ def __init__(
         max_num_blocks_per_req: list[int] | None = None,
         logitsprocs: LogitsProcessors | None = None,
         logitsprocs_need_output_token_ids: bool = False,
-        is_spec_decode: bool = False,
+        num_spec_tokens: int = 0,
         is_pooling_model: bool = False,
         cp_kv_cache_interleave_size: int = 1,
+        reasoning_config: ReasoningConfig | None = None,
     ):
+        self.thinking_budget_state_holder = maybe_create_thinking_budget_state_holder(
+            reasoning_config,
+            max_num_reqs,
+            num_spec_tokens,
+            device,
+            pin_memory,
+        )
+        self.thinking_token_budget_reqs: set[str] = set()
         self.is_pooling_model = is_pooling_model
-        self.is_spec_decode = is_spec_decode
         self.max_num_reqs = max_num_reqs
         self.max_model_len = max_model_len
         self.max_num_batched_tokens = max_num_batched_tokens
@@ -235,8 +253,9 @@ def __init__(
 
         self.num_logprobs: dict[str, int] = {}
 
-        # To accumulate prompt logprobs tensor chunks across prefill steps.
-        self.in_progress_prompt_logprobs_cpu: dict[str, LogprobsTensors] = {}
+        # req_id -> list of specific token IDs to compute logprobs for
+        # More efficient than num_logprobs=-1 when only a few tokens are needed
+        self.logprob_token_ids: dict[str, list[int]] = {}
 
         # Internal representation of per-step batch state changes, used for
         # reordering persistent batch and generating logitsprocs batch state
@@ -340,7 +359,12 @@ def add_request(
         end_idx = start_idx + len(request.output_token_ids)
         if request.prompt_token_ids is not None:
             self.token_ids_cpu[req_index, :num_prompt_tokens] = request.prompt_token_ids
-            self.is_token_ids[req_index, :num_prompt_tokens] = True
+            if request.prompt_is_token_ids is not None:
+                self.is_token_ids[req_index, :num_prompt_tokens] = (
+                    request.prompt_is_token_ids
+                )
+            else:
+                self.is_token_ids[req_index, :num_prompt_tokens] = True
         else:
             self.is_token_ids[req_index, :num_prompt_tokens] = False
         if request.prompt_embeds is not None:
@@ -395,6 +419,10 @@ def add_request(
                     else sampling_params.logprobs
                 )
 
+            # Store specific token IDs to compute logprobs for (more efficient)
+            if sampling_params.logprob_token_ids is not None:
+                self.logprob_token_ids[req_id] = sampling_params.logprob_token_ids
+
             if sampling_params.allowed_token_ids:
                 self.has_allowed_token_ids.add(req_id)
                 if self.allowed_token_ids_mask_cpu_tensor is None:
@@ -476,6 +504,7 @@ def update_req_spec_token_ids(
         start_index = self.num_tokens_no_spec[req_index]
         end_token_index = start_index + num_spec_tokens
         self.token_ids_cpu[req_index, start_index:end_token_index] = spec_token_ids
+        self.is_token_ids[req_index, start_index:end_token_index] = True
         cur_spec_token_ids.extend(spec_token_ids)
 
     def remove_request(self, req_id: str) -> int | None:
@@ -522,7 +551,7 @@ def remove_request(self, req_id: str) -> int | None:
         self.repetition_penalties_reqs.discard(req_id)
         self.generators.pop(req_index, None)
         self.num_logprobs.pop(req_id, None)
-        self.in_progress_prompt_logprobs_cpu.pop(req_id, None)
+        self.logprob_token_ids.pop(req_id, None)
         if self.prev_req_id_to_index is not None:
             self.prev_req_id_to_index.pop(req_id, None)
 
@@ -531,6 +560,7 @@ def remove_request(self, req_id: str) -> int | None:
             # False means we don't fill with -inf.
             self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False)
         self.bad_words_token_ids.pop(req_index, None)
+        self.thinking_token_budget_reqs.discard(req_id)
         return req_index
 
     def swap_states(self, i1: int, i2: int) -> None:
@@ -791,6 +821,8 @@ def refresh_metadata(self):
         # reset batch update tracking.
         # Update sampling metadata if batch state is changed.
         batch_update = self.batch_update_builder.get_and_reset(self.num_reqs)
+        if self.thinking_budget_state_holder is not None and batch_update:
+            self.thinking_budget_state_holder.sync_batch(batch_update)
         for logit_proc in self.logitsprocs.all:
             logit_proc.update_state(batch_update)
         if batch_update:
@@ -844,10 +876,15 @@ def _make_sampling_metadata(self) -> SamplingMetadata:
 
         # Only set output_token_ids if required by the current requests'
         # sampling parameters.
+        holder = self.thinking_budget_state_holder
+        thinking_budget_tracks_reqs = (
+            holder is not None and holder.has_tracked_requests()
+        )
         needs_output_token_ids = (
             not self.no_penalties
             or bool(self.bad_words_token_ids)
             or self.logitsprocs_need_output_token_ids
+            or thinking_budget_tracks_reqs
         )
         output_token_ids = (
             cast(list[list[int]], self.req_output_token_ids)
@@ -865,6 +902,15 @@ def _make_sampling_metadata(self) -> SamplingMetadata:
             )
             allowed_token_ids_mask = self.allowed_token_ids_mask[:num_reqs]
 
+        # Build per-request logprob_token_ids mapping: req_index -> token_ids
+        logprob_token_ids_by_index: dict[int, list[int]] | None = None
+        if self.logprob_token_ids:
+            logprob_token_ids_by_index = {}
+            for req_id, token_ids in self.logprob_token_ids.items():
+                if req_id in self.req_id_to_index:
+                    req_index = self.req_id_to_index[req_id]
+                    logprob_token_ids_by_index[req_index] = token_ids
+
         return SamplingMetadata(
             temperature=temperature,
             all_greedy=self.all_greedy,
@@ -873,6 +919,7 @@ def _make_sampling_metadata(self) -> SamplingMetadata:
             top_k=None if self.no_top_k else self.top_k[:num_reqs],
             generators=self.generators,
             max_num_logprobs=self.max_num_logprobs,
+            logprob_token_ids=logprob_token_ids_by_index,
             prompt_token_ids=prompt_token_ids,
             frequency_penalties=self.frequency_penalties[:num_reqs],
             presence_penalties=self.presence_penalties[:num_reqs],
@@ -883,6 +930,7 @@ def _make_sampling_metadata(self) -> SamplingMetadata:
             allowed_token_ids_mask=allowed_token_ids_mask,
             bad_words_token_ids=self.bad_words_token_ids,
             logitsprocs=self.logitsprocs,
+            thinking_budget_state_holder=self.thinking_budget_state_holder,
         )
 
     def get_pooling_params(self) -> list[PoolingParams]:
@@ -1002,7 +1050,12 @@ def update_async_output_token_ids(self) -> None:
             # output placeholders (tokens can be discarded after kv-load
             # failure) or a larger number (async spec decode adds optimistic
             # placeholders that may exceed the actual acceptance count).
-            first_placeholder = req_output_token_ids.index(-1)
+            first_placeholder = len(req_output_token_ids)
+            while (
+                first_placeholder > 0
+                and req_output_token_ids[first_placeholder - 1] == -1
+            ):
+                first_placeholder -= 1
             num_placeholders = len(req_output_token_ids) - first_placeholder
             num_to_replace = min(num_sampled_ids, num_placeholders)
             del new_ids[num_to_replace:]
@@ -1057,6 +1110,13 @@ def no_penalties(self) -> bool:
             and len(self.repetition_penalties_reqs) == 0
         )
 
+    @property
+    def no_thinking_budget(self) -> bool:
+        return (
+            self.thinking_budget_state_holder is None
+            or len(self.thinking_token_budget_reqs) == 0
+        )
+
     @property
     def max_num_logprobs(self) -> int | None:
         return max(self.num_logprobs.values()) if self.num_logprobs else None
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 8a43f43d0398..d51bf2284096 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -21,6 +21,10 @@
 from tqdm import tqdm
 
 import vllm.envs as envs
+from vllm.compilation.breakable_cudagraph import (
+    BreakableCUDAGraphWrapper,
+    is_breakable_cudagraph_enabled,
+)
 from vllm.compilation.counter import compilation_counter
 from vllm.compilation.cuda_graph import CUDAGraphStat, CUDAGraphWrapper
 from vllm.compilation.monitor import set_cudagraph_capturing_enabled
@@ -56,6 +60,9 @@
 from vllm.model_executor.layers.fused_moe.routed_experts_capturer import (
     RoutedExpertsCapturer,
 )
+from vllm.model_executor.layers.mamba.ops.ssu_dispatch import (
+    initialize_mamba_ssu_backend,
+)
 from vllm.model_executor.layers.rotary_embedding import (
     MRotaryEmbedding,
     XDRotaryEmbedding,
@@ -66,6 +73,7 @@
     initialize_layerwise_reload,
 )
 from vllm.model_executor.models.interfaces import (
+    MixtureOfExperts,
     MultiModalEmbeddings,
     SupportsMRoPE,
     SupportsMultiModal,
@@ -109,6 +117,7 @@
 from vllm.utils.platform_utils import is_pin_memory_available, num_compute_units
 from vllm.utils.torch_utils import (
     get_dtype_size,
+    is_quantized_kv_cache,
     kv_cache_dtype_str_to_dtype,
 )
 from vllm.v1.attention.backend import (
@@ -122,6 +131,7 @@
 from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder
 from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadataBuilder
 from vllm.v1.attention.backends.utils import (
+    NULL_BLOCK_ID,
     create_fast_prefill_custom_backend,
     get_dcp_local_seq_lens,
     reorder_batch_to_split_decodes_and_prefills,
@@ -151,6 +161,8 @@
     LogprobsTensors,
     ModelRunnerOutput,
     PoolerOutput,
+    RoutedExpertsLists,
+    RoutedExpertsTensors,
     SamplerOutput,
     make_empty_encoder_model_runner_output,
 )
@@ -160,9 +172,12 @@
 from vllm.v1.sample.metadata import SamplingMetadata
 from vllm.v1.sample.rejection_sampler import RejectionSampler
 from vllm.v1.sample.sampler import Sampler
+from vllm.v1.spec_decode.custom_class_proposer import create_custom_proposer
+from vllm.v1.spec_decode.dflash import DFlashProposer
 from vllm.v1.spec_decode.draft_model import DraftModelProposer
 from vllm.v1.spec_decode.eagle import EagleProposer
 from vllm.v1.spec_decode.extract_hidden_states import ExtractHiddenStatesProposer
+from vllm.v1.spec_decode.gemma4 import Gemma4Proposer
 from vllm.v1.spec_decode.medusa import MedusaProposer
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 from vllm.v1.spec_decode.ngram_proposer_gpu import (
@@ -227,6 +242,7 @@ def __init__(
         invalid_req_indices: list[int],
         async_output_copy_stream: torch.cuda.Stream,
         vocab_size: int,
+        routed_experts: RoutedExpertsTensors | None = None,
     ):
         self._model_runner_output = model_runner_output
         self._invalid_req_indices = invalid_req_indices
@@ -239,6 +255,7 @@ def __init__(
         self._sampled_token_ids = sampled_token_ids
         self.vocab_size = vocab_size
         self._logprobs_tensors = logprobs_tensors
+        self._routed_experts = routed_experts
 
         # Initiate the copy on a separate stream, but do not synchronize it.
         default_stream = torch.cuda.current_stream()
@@ -252,6 +269,11 @@ def __init__(
                 if self._logprobs_tensors
                 else None
             )
+            self._routed_experts_cpu = (
+                self._routed_experts.to_cpu_nonblocking()
+                if self._routed_experts is not None
+                else None
+            )
             self.async_copy_ready_event.record()
 
     def get_output(self) -> ModelRunnerOutput:
@@ -283,6 +305,11 @@ def get_output(self) -> ModelRunnerOutput:
         output = self._model_runner_output
         output.sampled_token_ids = valid_sampled_token_ids
         output.logprobs = logprobs_lists
+
+        if self._routed_experts_cpu is not None:
+            output.routed_experts = self._routed_experts_cpu.tolists()
+        del self._routed_experts
+
         return output
 
 
@@ -449,7 +476,6 @@ def __init__(
         # Model-related.
         self.num_query_heads = model_config.get_num_attention_heads(parallel_config)
         self.inputs_embeds_size = model_config.get_inputs_embeds_size()
-        self.attention_chunk_size = model_config.attention_chunk_size
         # Only relevant for models using ALiBi (e.g, MPT)
         self.use_alibi = model_config.uses_alibi
 
@@ -478,6 +504,7 @@ def __init__(
         self.sampler = Sampler(logprobs_mode=self.model_config.logprobs_mode)
 
         self.eplb_state: EplbState | None = None
+        self._moe_model: MixtureOfExperts | None = None
         # NOTE(yongji): flag to temporarily disable EPLB during scaling up/down
         self.eep_eplb_suppressed = False
         """
@@ -515,11 +542,17 @@ def __init__(
                 | NgramProposerGPU
                 | SuffixDecodingProposer
                 | EagleProposer
+                | DFlashProposer
                 | DraftModelProposer
                 | MedusaProposer
                 | ExtractHiddenStatesProposer
+                | Gemma4Proposer
             )
-            if self.speculative_config.method == "ngram":
+            if self.speculative_config.method == "custom_class":
+                self.drafter = create_custom_proposer(  # type: ignore[assignment]
+                    self.vllm_config
+                )
+            elif self.speculative_config.method == "ngram":
                 from vllm.v1.spec_decode.ngram_proposer import NgramProposer
 
                 self.drafter = NgramProposer(self.vllm_config)
@@ -546,6 +579,11 @@ def __init__(
                 self._ngram_pinned_val_buf = torch.zeros(
                     self.max_num_reqs, dtype=torch.int32, pin_memory=True
                 )
+            elif self.speculative_config.use_gemma4_mtp():
+                self.drafter = Gemma4Proposer(self.vllm_config, self.device, self)
+            elif self.speculative_config.use_dflash():
+                self.drafter = DFlashProposer(self.vllm_config, self.device, self)
+                self.use_aux_hidden_state_outputs = True
             elif self.speculative_config.method == "suffix":
                 self.drafter = SuffixDecodingProposer(self.vllm_config)
             elif self.speculative_config.use_eagle():
@@ -568,7 +606,9 @@ def __init__(
                     "Unknown speculative decoding method: "
                     f"{self.speculative_config.method}"
                 )
-            self.rejection_sampler = RejectionSampler(self.sampler)
+            self.rejection_sampler = RejectionSampler(
+                self.sampler, self.speculative_config, self.device
+            )
 
         self.num_spec_tokens = 0
         self.valid_sampled_token_count_gpu: torch.Tensor | None = None
@@ -588,7 +628,6 @@ def __init__(
         # NOTE(rob): num_prompt_logprobs only includes reqs
         # that are currently in the prefill phase.
         self.num_prompt_logprobs: dict[str, int] = {}
-        self.comm_stream = torch.cuda.Stream()
 
         # Input Batch
         # NOTE(Chen): Ideally, we should initialize the input batch inside
@@ -619,7 +658,7 @@ def __init__(
             vocab_size=self.model_config.get_vocab_size(),
             block_sizes=[placeholder_block_size],
             kernel_block_sizes=[placeholder_block_size],
-            is_spec_decode=bool(self.vllm_config.speculative_config),
+            num_spec_tokens=self.num_spec_tokens,
             logitsprocs=build_logitsprocs(
                 self.vllm_config,
                 self.device,
@@ -635,6 +674,7 @@ def __init__(
             or self.vllm_config.reasoning_config is not None,
             is_pooling_model=self.is_pooling_model,
             cp_kv_cache_interleave_size=self.parallel_config.cp_kv_cache_interleave_size,
+            reasoning_config=self.vllm_config.reasoning_config,
         )
 
         # Separate cuda stream for overlapping transfer of sampled token ids from
@@ -714,16 +754,6 @@ def __init__(
             self.max_num_reqs, dtype=torch.int32
         )
 
-        # Only relevant for multimodal models
-        if self.supports_mm_inputs:
-            # Double buffer to avoid race condition: previous iteration's async
-            # copy may still be reading from CPU while current iteration writes.
-            self.is_mm_embed_buffers = [
-                self._make_buffer(self.max_num_tokens, dtype=torch.bool),
-                self._make_buffer(self.max_num_tokens, dtype=torch.bool),
-            ]
-            self.is_mm_embed_idx = 0
-
         # Only relevant for models using M-RoPE (e.g, Qwen2-VL)
         if self.uses_mrope:
             # NOTE: `mrope_positions` is implemented with one additional dummy
@@ -792,6 +822,8 @@ def __init__(
 
         # Cached outputs.
         self._draft_token_ids: list[list[int]] | torch.Tensor | None = None
+        self._draft_probs: torch.Tensor | None = None
+        self._draft_prob_req_ids: list[str] | None = None
         # N-gram GPU path: async D2H buffer/event for per-request valid draft counts.
         self._num_valid_draft_tokens: torch.Tensor | None = None
         self._num_valid_draft_tokens_cpu: torch.Tensor | None = None
@@ -855,7 +887,12 @@ def __init__(
         self.execute_model_state: ExecuteModelState | None = None
         self.kv_connector_output: KVConnectorOutput | None = None
         self.mamba_state_idx: dict[str, int] = {}
-        self._mamba_copy_bufs: mamba_utils.MambaCopyBuffers | None = None
+        self._mamba_bufs: mamba_utils.MambaBuffers | None = None
+        self.mamba_prev_last_scheduled_idx: CpuGpuBuffer | None = None
+        if self.cache_config.mamba_cache_mode == "all" and self.num_spec_tokens > 0:
+            self.mamba_prev_last_scheduled_idx = self._make_buffer(
+                self.max_num_reqs, dtype=torch.int32
+            )
         self.layerwise_nvtx_hooks_registered = False
 
     def update_max_model_len(self, max_model_len: int) -> None:
@@ -883,6 +920,9 @@ def reset_encoder_cache(self) -> None:
         self.encoder_cache.clear()
         self.late_interaction_runner.clear()
 
+    def post_kv_cache_wake_up(self) -> None:
+        self.init_fp8_kv_scales()
+
     @torch.inference_mode()
     def init_fp8_kv_scales(self) -> None:
         """
@@ -892,7 +932,7 @@ def init_fp8_kv_scales(self) -> None:
           If these are left at 0.0 (default after wake_up), all KV cache values
           become effectively zero, causing gibberish output.
         """
-        if not self.cache_config.cache_dtype.startswith("fp8"):
+        if not is_quantized_kv_cache(self.cache_config.cache_dtype):
             return
 
         kv_caches = getattr(self, "kv_caches", [])
@@ -952,15 +992,23 @@ def _make_buffer(
             with_numpy=numpy,
         )
 
-    def _get_mamba_copy_bufs(self) -> mamba_utils.MambaCopyBuffers:
-        if self._mamba_copy_bufs is None:
-            self._mamba_copy_bufs = mamba_utils.MambaCopyBuffers.create(
-                self.max_num_reqs,
-                self.kv_cache_config,
-                self.model.get_mamba_state_copy_func(),
-                self._make_buffer,
+    def _get_mamba_bufs(self) -> mamba_utils.MambaBuffers:
+        # Only reachable on the ``mamba_cache_mode == "align"`` path.
+        # The postprocess sub-object is additionally gated on spec
+        # decode + hybrid model.
+        assert self.cache_config.mamba_cache_mode == "align"
+        if self._mamba_bufs is None:
+            self._mamba_bufs = mamba_utils.MambaBuffers.create(
+                max_num_reqs=self.max_num_reqs,
+                kv_cache_config=self.kv_cache_config,
+                copy_funcs=self.model.get_mamba_state_copy_func(),
+                make_buffer=self._make_buffer,
+                device=self.device,
+                with_postprocess_align=(
+                    self.speculative_config is not None and self.model_config.is_hybrid
+                ),
             )
-        return self._mamba_copy_bufs
+        return self._mamba_bufs
 
     def _init_model_kwargs(self):
         model_kwargs = dict[str, Any]()
@@ -983,16 +1031,23 @@ def _init_model_kwargs(self):
         if len(token_type_id_requests) == 0:
             return model_kwargs
 
-        seq_lens = self.seq_lens[:num_reqs]
+        # Build ids on CPU using the CPU-resident upper bound for seq_lens;
+        # `torch.arange(seq_lens[i])` with a GPU scalar would force a sync.
+        seq_lens_cpu = self.optimistic_seq_lens_cpu[:num_reqs].tolist()
         token_type_ids = []
 
         for i in range(num_reqs):
-            pos = token_type_id_requests.get(i, seq_lens[i])
-            ids = (torch.arange(seq_lens[i]) >= pos).int()
+            seq_len_i = seq_lens_cpu[i]
+            pos = token_type_id_requests.get(i, seq_len_i)
+            ids = (torch.arange(seq_len_i) >= pos).int()
             token_type_ids.append(ids)
 
-        model_kwargs["token_type_ids"] = torch.concat(token_type_ids).to(
-            device=self.device
+        token_type_ids_cpu = torch.empty(
+            sum(seq_lens_cpu), dtype=torch.int32, pin_memory=self.pin_memory
+        )
+        torch.cat(token_type_ids, out=token_type_ids_cpu)
+        model_kwargs["token_type_ids"] = token_type_ids_cpu.to(
+            device=self.device, non_blocking=True
         )
         return model_kwargs
 
@@ -1051,6 +1106,13 @@ def _init_device_properties(self) -> None:
     def _sync_device(self) -> None:
         torch.accelerator.synchronize()
 
+    def _get_or_create_async_output_copy_stream(self) -> torch.cuda.Stream:
+        stream = self.async_output_copy_stream
+        if stream is None:
+            stream = torch.cuda.Stream()
+            self.async_output_copy_stream = stream
+        return stream
+
     def _update_states(self, scheduler_output: "SchedulerOutput") -> Callable | None:
         """Update the cached states and the persistent batch with the scheduler
         output.
@@ -1152,6 +1214,7 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> Callable | None
                 req_id=req_id,
                 prompt_token_ids=new_req_data.prompt_token_ids,
                 prompt_embeds=new_req_data.prompt_embeds,
+                prompt_is_token_ids=new_req_data.prompt_is_token_ids,
                 mm_features=new_req_data.mm_features,
                 sampling_params=sampling_params,
                 pooling_params=pooling_params,
@@ -1331,13 +1394,27 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> Callable | None
             # For the last rank, we don't need to update the token_ids_cpu
             # because the sampled tokens are already cached.
             if not is_last_rank:
-                # Add new_token_ids to token_ids_cpu.
-                start_token_index = num_computed_tokens
-                end_token_index = num_computed_tokens + len(new_token_ids)
-                self.input_batch.token_ids_cpu[
-                    req_index, start_token_index:end_token_index
-                ] = new_token_ids
-                self.input_batch.num_tokens_no_spec[req_index] = end_token_index
+                start_token_index = self.input_batch.num_tokens_no_spec[req_index]
+                # For chunked prefill, num_computed_tokens may less
+                # than num_tokens_no_spec.
+                # Async scheduled PP: no new_token_ids, advance num_tokens_no_spec
+                # according to num_computed_tokens.
+                end_token_index = max(
+                    start_token_index,
+                    num_computed_tokens + len(new_token_ids),
+                )
+                if end_token_index > start_token_index:
+                    if new_token_ids:
+                        # Add new_token_ids to token_ids_cpu.
+                        num_new_tokens = end_token_index - start_token_index
+                        tokens_to_append = new_token_ids[-num_new_tokens:]
+                        self.input_batch.token_ids_cpu[
+                            req_index, start_token_index:end_token_index
+                        ] = tokens_to_append
+                    self.input_batch.is_token_ids[
+                        req_index, start_token_index:end_token_index
+                    ] = True
+                    self.input_batch.num_tokens_no_spec[req_index] = end_token_index
 
             # Add spec_token_ids to token_ids_cpu.
             self.input_batch.update_req_spec_token_ids(req_state, scheduled_spec_tokens)
@@ -1420,45 +1497,32 @@ def _update_states_after_model_execute(
         if not self.speculative_config or not self.model_config.is_hybrid:
             return
 
-        # TODO: Remove .cpu() sync to enable fully async for hybrid model;
-        # Use num_computed_tokens.gpu instead of req.num_computed_tokens to
-        # support aligned mamba cache mode.
-        # Find the number of accepted tokens for each sequence.
+        # Count the number of accepted tokens for each sequence.
+        # Valid tokens are contiguous from position 0, so counting non-(-1)
+        # tokens gives us the first -1 position (i.e., number of accepted).
         num_reqs = output_token_ids.size(0)
-        self.num_accepted_tokens.gpu[:num_reqs] = (
-            (
-                torch.cat(
-                    [
-                        output_token_ids,
-                        torch.full(
-                            (num_reqs, 1),
-                            -1,
-                            device=output_token_ids.device,
-                        ),
-                    ],
-                    dim=1,
-                )
-                == -1
-            )
-            .int()
-            .argmax(-1)
-        )
+        self.num_accepted_tokens.gpu[:num_reqs] = (output_token_ids != -1).sum(dim=1)
 
         if self.cache_config.mamba_cache_mode == "align":
-            for i, num_tokens in enumerate(
-                self.num_accepted_tokens.gpu[:num_reqs].cpu().numpy()
-            ):
-                self.input_batch.num_accepted_tokens_cpu[i] = num_tokens
-            mamba_utils.postprocess_mamba(
-                scheduler_output,
-                self.kv_cache_config,
-                self.input_batch,
-                self.requests,
-                self.mamba_state_idx,
-                self.compilation_config.static_forward_context,
-                self.model.get_mamba_state_copy_func(),
-                self._get_mamba_copy_bufs(),
+            # Fused GPU postprocess: state copies + per-request accepted-token
+            # update without CPU-GPU sync. The metadata
+            # (num_scheduled_tokens, num_draft_tokens, num_computed_tokens) is
+            # pre-staged to GPU buffers in _prepare_inputs.
+            mamba_utils.postprocess_mamba_align_gpu(
+                bufs=self._get_mamba_bufs(),
+                num_reqs=num_reqs,
+                num_accepted_tokens_gpu=self.num_accepted_tokens.gpu,
+                num_accepted_tokens_cpu_tensor=(
+                    self.input_batch.num_accepted_tokens_cpu_tensor
+                ),
+                input_batch=self.input_batch,
+                kv_cache_config=self.kv_cache_config,
+                forward_context=self.compilation_config.static_forward_context,
+                mamba_state_copy_funcs=self.model.get_mamba_state_copy_func(),
             )
+
+            assert self.num_accepted_tokens_event is not None
+            self.num_accepted_tokens_event.record()
         else:
             self.input_batch.num_accepted_tokens_cpu_tensor[:num_reqs].copy_(
                 self.num_accepted_tokens.gpu[:num_reqs], non_blocking=True
@@ -1466,6 +1530,17 @@ def _update_states_after_model_execute(
             assert self.num_accepted_tokens_event is not None
             self.num_accepted_tokens_event.record()
 
+            if self.cache_config.mamba_cache_mode == "all":
+                mamba_utils.postprocess_mamba_all(
+                    scheduler_output,
+                    self.kv_cache_config,
+                    self.input_batch,
+                    self.requests,
+                    self.mamba_state_idx,
+                    self.num_spec_tokens,
+                    num_reqs,
+                )
+
     def _update_streaming_request(
         self, req_id: str, new_req_data: NewRequestData
     ) -> CachedRequestState:
@@ -1509,10 +1584,16 @@ def _init_mrope_positions(self, req_state: CachedRequestState):
         )
         mrope_model = cast(SupportsMRoPE, model)
 
+        # `prompt_embeds` is a passthrough modality (no grid_thw), models'
+        # M-RoPE code assumes per-feature grid info, so filter it out. The
+        # prompt_embeds positions are treated as text positions for M-RoPE.
+        mrope_features = [
+            f for f in req_state.mm_features if f.modality != "prompt_embeds"
+        ]
         req_state.mrope_positions, req_state.mrope_position_delta = (
             mrope_model.get_mrope_input_positions(
                 req_state.prompt_token_ids,
-                req_state.mm_features,
+                mrope_features,
             )
         )
 
@@ -1696,8 +1777,6 @@ def _prepare_input_ids(
                 self.input_batch.prev_sampled_token_ids[:num_common_tokens, 0],
                 non_blocking=True,
             )
-            if self.enable_prompt_embeds:
-                self.is_token_ids.gpu[:num_common_tokens] = True
             return
         # Upload the index tensors asynchronously so the scatter can be non-blocking.
         sampled_tokens_index_tensor = torch.tensor(
@@ -1933,15 +2012,39 @@ def _prepare_inputs(
         # _update_states_after_model_execute for hybrid models).
         if self.num_accepted_tokens_event is not None:
             self.num_accepted_tokens_event.synchronize()
-            self.num_accepted_tokens.np[:num_reqs] = (
-                self.input_batch.num_accepted_tokens_cpu[:num_reqs]
-            )
+            # Async mode: condense() reordered indices, use prev_positions mapping
+            if self.use_async_scheduling and prev_req_id_to_index:
+                prev_idx = self.prev_positions.np[:num_reqs]
+                new_mask = prev_idx < 0
+                self.num_accepted_tokens.np[:num_reqs] = (
+                    self.input_batch.num_accepted_tokens_cpu[
+                        np.where(new_mask, 0, prev_idx)
+                    ]
+                )
+                self.num_accepted_tokens.np[:num_reqs][new_mask] = 1
+                self.input_batch.num_accepted_tokens_cpu[:num_reqs] = (
+                    self.num_accepted_tokens.np[:num_reqs]
+                )
+            else:
+                # Non-async mode: use values directly
+                self.num_accepted_tokens.np[:num_reqs] = (
+                    self.input_batch.num_accepted_tokens_cpu[:num_reqs]
+                )
             self.num_accepted_tokens.np[num_reqs:].fill(1)
             self.num_accepted_tokens.copy_to_gpu()
         else:
             self.num_accepted_tokens.np.fill(1)
             self.num_accepted_tokens.gpu.fill_(1)
 
+        if self.mamba_prev_last_scheduled_idx is not None:
+            mamba_utils.preprocess_mamba_all_specdec(
+                scheduler_output,
+                self.input_batch,
+                self.mamba_state_idx,
+                num_reqs,
+                self.mamba_prev_last_scheduled_idx,
+            )
+
         # Update num_computed_tokens on GPU. In async spec decode,
         # CPU values are optimistic (all drafts accepted). The kernel
         # corrects on GPU using the previous step's
@@ -2130,9 +2233,9 @@ def _get_block_table(kv_cache_gid: int):
                 blk_table = self.input_batch.block_table[kv_cache_gid]
                 blk_table_tensor = blk_table.get_device_tensor(num_reqs_padded)
 
-            # Fill unused with -1. Needed for reshape_and_cache in full cuda
-            # graph mode. `blk_table_tensor` -1 to match mamba PAD_SLOT_ID
-            blk_table_tensor[num_reqs:num_reqs_padded].fill_(-1)
+            # Fill unused block table entries with NULL_BLOCK_ID (null block)
+            # for CUDAGraph padding. Block 0 is reserved for padding.
+            blk_table_tensor[num_reqs:num_reqs_padded].fill_(NULL_BLOCK_ID)
             return blk_table_tensor
 
         assert slot_mappings is not None
@@ -2140,9 +2243,18 @@ def _get_block_table(kv_cache_gid: int):
         slot_mapping_gid_0 = slot_mappings[0]
 
         if self.routed_experts_initialized:
+            # Copy this step's attention slot_mapping into our private
+            # device buffer. The shared ``slot_mappings[attn_gid]`` is
+            # owned by the attention block table and will be overwritten
+            # by the next ``_prepare_inputs``; we need a stable snapshot
+            # because the async D2H may still be in flight on the copy
+            # stream when the next step runs.
             attn_gid = self.routed_experts_attn_gid
             slot_mapping_attn = slot_mappings[attn_gid]
-            self.slot_mapping = slot_mapping_attn[:num_tokens].cpu().numpy()
+            self.routed_experts_slot_mapping_device[:num_tokens].copy_(
+                slot_mapping_attn[:num_tokens]
+            )
+
         num_computed_tokens_cpu = self.input_batch.num_computed_tokens_cpu_tensor[
             :num_reqs_padded
         ]
@@ -2150,11 +2262,15 @@ def _get_block_table(kv_cache_gid: int):
             :num_reqs_padded
         ]
         seq_lens_cpu = self.optimistic_seq_lens_cpu[:num_reqs_padded]
+        seq_lens_cpu_upper_bound = seq_lens_cpu
 
         # is_prefilling: True if request is still in prefill phase.
         # Used by mamba backends to distinguish actual decodes from
         # short extends.
         is_prefilling = num_computed_tokens_cpu < num_prompt_tokens_cpu
+        # Zero out padded rows so stale data from condense() doesn't
+        # misclassify padding as prefill in CUDA graph mode.
+        is_prefilling[num_reqs:] = False
 
         if self.use_async_spec_decode:
             # GPU tensors are authoritative in async mode.
@@ -2167,6 +2283,7 @@ def _get_block_table(kv_cache_gid: int):
             seq_lens=self.seq_lens[:num_reqs_padded],
             _seq_lens_cpu=seq_lens_cpu,
             _num_computed_tokens_cpu=num_computed_tokens_cpu,
+            seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
             num_reqs=num_reqs_padded,
             num_actual_tokens=num_tokens_padded,
             max_query_len=max_query_len,
@@ -2175,6 +2292,7 @@ def _get_block_table(kv_cache_gid: int):
             slot_mapping=slot_mapping_gid_0,
             causal=True,
             is_prefilling=is_prefilling,
+            positions=self.positions[:num_tokens_padded],
         )
 
         if self.dcp_world_size > 1:
@@ -2237,6 +2355,13 @@ def _build_attn_group_metadata(
                         :num_reqs_padded
                     ],
                 )
+                if (
+                    isinstance(builder, Mamba2AttentionMetadataBuilder)
+                    and self.mamba_prev_last_scheduled_idx is not None
+                ):
+                    extra_attn_metadata_args["prev_last_scheduled_idx"] = (
+                        self.mamba_prev_last_scheduled_idx.gpu[:num_reqs_padded]
+                    )
 
             if for_cudagraph_capture:
                 attn_metadata_i = builder.build_for_cudagraph_capture(
@@ -2289,11 +2414,24 @@ def _build_attn_group_metadata(
                 cm.slot_mapping = slot_mappings[kv_cache_gid]
 
             if self.speculative_config and spec_decode_common_attn_metadata is None:
-                if isinstance(self.drafter, EagleProposer):
+                if isinstance(
+                    self.drafter,
+                    (
+                        EagleProposer,
+                        DFlashProposer,
+                        Gemma4Proposer,
+                        ExtractHiddenStatesProposer,
+                    ),
+                ):
                     if self.drafter.kv_cache_gid == kv_cache_gid:
                         spec_decode_common_attn_metadata = cm
                 else:
                     spec_decode_common_attn_metadata = cm
+            # Capture per-group block tables for multi-group proposers.
+            if self.speculative_config and isinstance(self.drafter, Gemma4Proposer):
+                self.drafter.set_per_group_block_table(
+                    kv_cache_gid, cm.block_table_tensor
+                )
 
             for attn_gid in range(len(self.attn_groups[kv_cache_gid])):
                 if ubatch_slices is not None:
@@ -2305,23 +2443,31 @@ def _build_attn_group_metadata(
 
         if self.is_mm_prefix_lm:
             req_doc_ranges = {}
+
+            # Gemma4 bidi: skip ranges that exceed the sliding
+            # window. When image tokens > sliding_window, bidi causes
+            # early image tokens to attend to the entire image
+            # (e.g. 6 → 1092 targets), degrading spatial precision.
+            # Per-range filtering keeps bidi for small images/video
+            # frames while skipping oversized images.
+            hf_text_config = self.model_config.hf_text_config
+            _bidi_sw = getattr(hf_text_config, "sliding_window", None)
+
             for req_id in self.input_batch.req_ids:
                 image_doc_ranges = []
                 req_state = self.requests[req_id]
                 for mm_feature in req_state.mm_features:
                     pos_info = mm_feature.mm_position
                     img_doc_range = pos_info.extract_embeds_range()
-                    image_doc_ranges.extend(img_doc_range)
+                    for r in img_doc_range:
+                        if _bidi_sw is not None and (r[1] - r[0] + 1) > _bidi_sw:
+                            continue
+                        image_doc_ranges.append(r)
                 req_idx = self.input_batch.req_id_to_index[req_id]
                 req_doc_ranges[req_idx] = image_doc_ranges
 
-            if isinstance(attn_metadata, list):
-                for ub_metadata in attn_metadata:
-                    for _metadata in ub_metadata.values():
-                        _metadata.mm_prefix_range = req_doc_ranges  # type: ignore[attr-defined]
-            else:
-                for _metadata in attn_metadata.values():
-                    _metadata.mm_prefix_range = req_doc_ranges  # type: ignore[attr-defined]
+            # Set mm_prefix_range for all attention metadata
+            self._set_mm_prefix_range_for_metadata(attn_metadata, req_doc_ranges)
 
         if spec_decode_common_attn_metadata is not None and (
             num_reqs != num_reqs_padded or num_tokens != num_tokens_padded
@@ -2655,10 +2801,9 @@ def _prepare_kv_sharing_fast_prefill(
         # There might have leftover indices in logits_indices[num_logits:]
         # from previous iterations, whose values may be greater than the
         # batch size in the current iteration. To ensure indices are always
-        # valid, we fill the padded indices with the last index.
-        self.kv_sharing_fast_prefill_logits_indices[num_logits:].fill_(
-            logits_indices[-1].item()
-        )
+        # valid, fill the padded indices with the last index. Broadcast the
+        # scalar GPU-side to avoid a D2H sync on `.item()`.
+        self.kv_sharing_fast_prefill_logits_indices[num_logits:] = logits_indices[-1]
         # Dispatch for the decoder portion of the model.
         _, batch_desc = self.cudagraph_dispatcher.dispatch(
             num_logits, invalid_modes={CUDAGraphMode.FULL}
@@ -2722,6 +2867,33 @@ def _execute_mm_encoder(
         if not mm_kwargs:
             return []
 
+        # `prompt_embeds` is a passthrough modality, the tensor is already in
+        # the model embedding space, so no encoder runs. Inject each
+        # `prompt_embeds` tensor directly into the encoder cache here so that
+        # `_gather_mm_embeddings` can splice it via the standard `is_mm_embed`
+        # path.
+        pe_indices = [
+            i
+            for i, (modality, _) in enumerate(mm_kwargs)
+            if modality == "prompt_embeds"
+        ]
+        if pe_indices:
+            for i in pe_indices:
+                pe_tensor = mm_kwargs[i][1]["embedding"].data
+                assert isinstance(pe_tensor, torch.Tensor)
+
+                self.encoder_cache[mm_hashes[i]] = pe_tensor.to(self.device)
+                self.maybe_save_ec_to_connector(self.encoder_cache, mm_hashes[i])
+            # Filter out `prompt_embeds` items from mm_kwargs/mm_hashes/mm_lora_refs
+            # since they don't require further encoder processing.
+            mm_hashes = [h for i, h in enumerate(mm_hashes) if i not in pe_indices]
+            mm_kwargs = [k for i, k in enumerate(mm_kwargs) if i not in pe_indices]
+            mm_lora_refs = [
+                r for i, r in enumerate(mm_lora_refs) if i not in pe_indices
+            ]
+            if not mm_kwargs:
+                return []  # nothing left to encode after filtering out `prompt_embeds`
+
         should_time = bool(
             self.observability_config
             and self.observability_config.enable_mm_processor_stats
@@ -2771,7 +2943,20 @@ def _execute_mm_encoder(
             )
             self.lora_manager.set_active_adapters(lora_requests, tower_mapping)
 
-            if hasattr(self.model, "get_num_mm_connector_tokens"):
+            # Only set connector mapping if the model actually has a connector.
+            # Some multimodal models inherit a stub `get_num_mm_connector_tokens`
+            # from `SupportsMultiModal`, which returns None and should not be
+            # treated as a signal that connector LoRA is supported.
+            mm_mapping = (
+                self.model.get_mm_mapping()  # type: ignore[attr-defined]
+                if hasattr(self.model, "get_mm_mapping")
+                else None
+            )
+            if (
+                mm_mapping is not None
+                and mm_mapping.connector
+                and hasattr(self.model, "get_num_mm_connector_tokens")
+            ):
                 post_op_counts = [
                     self.model.get_num_mm_connector_tokens(num_tokens)  # type: ignore[attr-defined]
                     for num_tokens in encoder_token_counts
@@ -2890,14 +3075,10 @@ def _gather_mm_embeddings(
     ) -> tuple[list[torch.Tensor], torch.Tensor]:
         total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
 
-        # Swap to the other buffer to avoid race condition with previous
-        # iteration's async copy that may still be reading from CPU.
-        self.is_mm_embed_idx = 1 - self.is_mm_embed_idx
-        is_mm_embed_buf = self.is_mm_embed_buffers[self.is_mm_embed_idx]
-
         mm_embeds = list[torch.Tensor]()
-        is_mm_embed = is_mm_embed_buf.cpu
-        is_mm_embed[:total_num_scheduled_tokens] = False
+        is_mm_embed = torch.zeros(
+            total_num_scheduled_tokens, dtype=torch.bool, device="cpu"
+        )
 
         req_start_idx = 0
         should_sync_mrope_positions = False
@@ -2980,8 +3161,6 @@ def _gather_mm_embeddings(
             mm_embeds.extend(mm_embeds_req)
             req_start_idx += num_scheduled_tokens
 
-        is_mm_embed = is_mm_embed_buf.copy_to_gpu(total_num_scheduled_tokens)
-
         if should_sync_mrope_positions:
             self._calc_mrope_positions(scheduler_output)
             self.mrope_positions.copy_to_gpu(total_num_scheduled_tokens)
@@ -2995,7 +3174,9 @@ def _gather_mm_embeddings(
     def get_model(self) -> nn.Module:
         if not hasattr(self, "model"):
             raise ValueError("Cannot get model before model has been initialized")
-        if isinstance(self.model, (CUDAGraphWrapper, UBatchWrapper)):
+        if isinstance(
+            self.model, (CUDAGraphWrapper, UBatchWrapper, BreakableCUDAGraphWrapper)
+        ):
             # get raw model out of the cudagraph wrapper.
             return self.model.unwrap()
         return self.model
@@ -3035,7 +3216,7 @@ def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
 
         return tuple(tasks)
 
-    def sync_and_slice_intermediate_tensors(
+    def sync_and_gather_intermediate_tensors(
         self,
         num_tokens: int,
         intermediate_tensors: IntermediateTensors | None,
@@ -3046,24 +3227,23 @@ def sync_and_slice_intermediate_tensors(
         tp = self.vllm_config.parallel_config.tensor_parallel_size
         is_rs = is_residual_scattered_for_sp(self.vllm_config, num_tokens)
 
-        # When sequence parallelism is enabled, the "residual" tensor is sharded
-        # across tensor parallel ranks, so each rank only needs its own slice.
+        # When sequence parallelism is enabled, the "residual" tensor is
+        # sharded across TP ranks. All-gather it here because downstream
+        # QKV + Attention needs the full residual before the SP split point.
         if sync_self:
             assert intermediate_tensors is not None
             for k, v in intermediate_tensors.items():
                 is_scattered = k == "residual" and is_rs
-                copy_len = num_tokens // tp if is_scattered else num_tokens
-                self.intermediate_tensors[k][:copy_len].copy_(
-                    v[:copy_len], non_blocking=True
+                if is_scattered:
+                    local_len = num_tokens // tp
+                    v = get_tp_group().all_gather(v[:local_len], dim=0)
+
+                self.intermediate_tensors[k][:num_tokens].copy_(
+                    v[:num_tokens], non_blocking=True
                 )
 
         return IntermediateTensors(
-            {
-                k: v[: num_tokens // tp]
-                if k == "residual" and is_rs
-                else v[:num_tokens]
-                for k, v in self.intermediate_tensors.items()
-            }
+            {k: v[:num_tokens] for k, v in self.intermediate_tensors.items()}
         )
 
     def eplb_step(self, is_dummy: bool = False, is_profile: bool = False) -> None:
@@ -3074,8 +3254,7 @@ def eplb_step(self, is_dummy: bool = False, is_profile: bool = False) -> None:
             return
 
         assert self.eplb_state is not None
-        model = self.get_model()
-        assert is_mixture_of_experts(model)
+        assert self._moe_model is not None
         self.eplb_state.step(
             is_dummy,
             is_profile,
@@ -3087,11 +3266,10 @@ def setup_eplb_from_mapping(
         expanded_physical_to_logical: torch.Tensor,
         old_num_physical_experts: int,
     ) -> None:
-        model = self.get_model()
-        assert is_mixture_of_experts(model)
+        assert self._moe_model is not None
 
         self.eplb_state = EplbState.from_mapping(
-            model=model,
+            model=self._moe_model,
             model_config=self.model_config,
             device=self.device,
             parallel_config=self.parallel_config,
@@ -3148,21 +3326,21 @@ def _pool(
             model_runner_output.pooler_output = [None] * num_reqs
             return model_runner_output
 
-        if self.use_async_scheduling:
-            return AsyncGPUPoolingModelRunnerOutput(
-                model_runner_output=model_runner_output,
+        if not current_platform.is_cuda_alike():
+            # cpu/xpu runners cannot use the CUDA stream/event-based wrapper.
+            model_runner_output.pooler_output = _copy_pooler_output_to_cpu(
                 raw_pooler_output=raw_pooler_output,
                 finished_mask=finished_mask,
-                async_output_copy_stream=self.async_output_copy_stream,
             )
+            self._sync_device()
+            return model_runner_output
 
-        model_runner_output.pooler_output = _copy_pooler_output_to_cpu(
+        return AsyncGPUPoolingModelRunnerOutput(
+            model_runner_output=model_runner_output,
             raw_pooler_output=raw_pooler_output,
             finished_mask=finished_mask,
+            async_output_copy_stream=self._get_or_create_async_output_copy_stream(),
         )
-        self._sync_device()
-
-        return model_runner_output
 
     def _pad_for_sequence_parallelism(self, num_scheduled_tokens: int) -> int:
         # Pad tokens to multiple of tensor_parallel_size when
@@ -3243,13 +3421,12 @@ def _preprocess(
             # If a batch only has token ids, then including the embedding layer
             # in the CUDA graph will be more performant (like in the else case
             # below).
-            token_ids_idx = (
-                self.is_token_ids.gpu[:num_scheduled_tokens]
-                .nonzero(as_tuple=False)
-                .squeeze(1)
-            )
+            is_token_ids = self.is_token_ids.np[:num_scheduled_tokens]
+            token_ids_idx_np = np.nonzero(is_token_ids)[0]
             # Some tokens ids may need to become embeds
-            if token_ids_idx.numel() > 0:
+            if token_ids_idx_np.size > 0:
+                token_ids_idx = torch.from_numpy(token_ids_idx_np)
+                token_ids_idx = token_ids_idx.to(self.device, non_blocking=True)
                 token_ids = self.input_ids.gpu[token_ids_idx]
                 tokens_to_embeds = self.model.embed_input_ids(input_ids=token_ids)
                 self.inputs_embeds.gpu[token_ids_idx] = tokens_to_embeds
@@ -3279,7 +3456,7 @@ def _preprocess(
             intermediate_tensors = None
         else:
             assert intermediate_tensors is not None
-            intermediate_tensors = self.sync_and_slice_intermediate_tensors(
+            intermediate_tensors = self.sync_and_gather_intermediate_tensors(
                 num_input_tokens, intermediate_tensors, True
             )
 
@@ -3323,9 +3500,10 @@ def _sample(
             draft_token_ids_cpu, _ = self._get_draft_token_ids_cpu()
             self.input_batch.update_async_spec_token_ids(draft_token_ids_cpu)
 
+        draft_probs = self._get_spec_decode_draft_probs(spec_decode_metadata)
         sampler_output = self.rejection_sampler(
             spec_decode_metadata,
-            None,  # draft_probs
+            draft_probs,
             logits,
             sampling_metadata,
         )
@@ -3338,7 +3516,6 @@ def _bookkeeping_sync(
         logits: torch.Tensor | None,
         hidden_states: torch.Tensor,
         num_scheduled_tokens: int,
-        spec_decode_metadata: SpecDecodeMetadata | None,
     ) -> tuple[
         dict[str, int],
         LogprobsLists | None,
@@ -3372,6 +3549,21 @@ def _bookkeeping_sync(
         invalid_req_indices = []
         logprobs_lists = None
         if not self.use_async_scheduling:
+            # Sync scheduling: issue routed experts D2H into the pinned
+            # CPU buffer BEFORE ``_to_list`` below. ``_to_list`` does
+            # ``event.synchronize()`` on the async copy stream which
+            # waits for every D2H queued on the default stream since
+            # the last sync, so this enqueue is naturally covered
+            # without requiring its own synchronize.
+            if self.routed_experts_initialized:
+                buf = self.routed_experts_capturer.get_device_buffer()
+                total = scheduler_output.total_num_scheduled_tokens
+                self.routed_experts_cpu[:total].copy_(buf[:total], non_blocking=True)
+                self.routed_experts_slot_mapping_cpu[:total].copy_(
+                    self.routed_experts_slot_mapping_device[:total],
+                    non_blocking=True,
+                )
+
             # Get the valid generated tokens.
             max_gen_len = sampled_token_ids.shape[-1]
             if max_gen_len == 1:
@@ -3606,7 +3798,6 @@ def dispatch_cudagraph(num_tokens, disable_full=False, valid_modes=None):
                     allow_microbatching=allow_microbatching,
                     num_tokens_padded=num_tokens_padded,
                     uniform_decode=uniform_decode,
-                    num_scheduled_tokens_per_request=num_scheduled_tokens_np,
                     cudagraph_mode=cudagraph_mode.value,
                 )
             )
@@ -3751,6 +3942,15 @@ def _get_slot_mapping(kv_cache_gid: int):
 
         return slot_mappings_by_gid, slot_mappings_by_layer
 
+    def _is_all_reqs_chunked_prefill(self) -> bool:
+        """Check if all scheduled requests are marked to discard sampled tokens.
+
+        This is true when `discard_request_mask` is set for every scheduled
+        request (e.g., for chunked prefill requests that are not the last
+        prefill chunk)."""
+        num_reqs = self.input_batch.num_reqs
+        return bool(self.discard_request_mask.np[:num_reqs].all())
+
     @torch.inference_mode()
     def execute_model(
         self,
@@ -3764,11 +3964,7 @@ def execute_model(
             )
 
         if self.routed_experts_initialized:
-            capturer = RoutedExpertsCapturer.get_instance()
-            if capturer is not None:
-                capturer.clear_buffer()  # noqa
-            else:
-                logger.error("RoutedExpertsCapturer not initialized.")
+            self.routed_experts_capturer.clear_buffer()
 
         # If ngram_gpu is used, we need to copy the scheduler_output to avoid
         # the modification has influence on the scheduler_output in engine core process.
@@ -3917,6 +4113,7 @@ def execute_model(
                 if deferred_state_corrections_fn:
                     deferred_state_corrections_fn()
                     deferred_state_corrections_fn = None
+                mamba_bufs = self._get_mamba_bufs()
                 mamba_utils.preprocess_mamba(
                     scheduler_output,
                     self.kv_cache_config,
@@ -3926,7 +4123,7 @@ def execute_model(
                     self.requests,
                     self.compilation_config.static_forward_context,
                     self.model.get_mamba_state_copy_func(),
-                    self._get_mamba_copy_bufs(),
+                    mamba_bufs.preprocess,
                 )
                 # preprocess_mamba resets num_accepted_tokens_cpu to 1
                 # for requests whose state was copied to a new block.
@@ -3937,6 +4134,21 @@ def execute_model(
                 )
                 self.num_accepted_tokens.copy_to_gpu(num_reqs)
 
+                # Stage per-request inputs for the fused postprocess kernel
+                # only when that kernel will actually run. The kernel is
+                # gated on spec-decode + hybrid (see MambaBuffers.create);
+                # without it, ``mamba_bufs.postprocess_align`` is None and
+                # the staging buffers don't exist.
+                if mamba_bufs.postprocess_align is not None:
+                    mamba_utils.stage_postprocess_inputs_to_gpu(
+                        mamba_bufs.postprocess_align,
+                        scheduler_output,
+                        self.input_batch.req_ids,
+                        num_reqs,
+                        self.requests,
+                        self.mamba_state_idx,
+                    )
+
             use_spec_decode = len(scheduler_output.scheduled_spec_decode_tokens) > 0
             ubatch_slices_attn = ubatch_slices_padded if pad_attn else ubatch_slices
 
@@ -4112,7 +4324,7 @@ def sample_tokens(
             kv_connector_output = self.kv_connector_output
             self.kv_connector_output = None
             # receive sampled token ids from the last PP rank.
-            if self.use_async_scheduling and get_pp_group().world_size > 1:
+            if self.use_async_scheduling and not get_pp_group().is_last_rank:
                 self._pp_receive_prev_sampled_token_ids_to_input_batch()
             if not kv_connector_output:
                 return None  # type: ignore[return-value]
@@ -4165,6 +4377,8 @@ def sample_tokens(
                 )
 
         self._draft_token_ids = None
+        self._draft_probs = None
+        self._draft_prob_req_ids = None
         self._draft_token_req_ids = None
         self.valid_sampled_token_count_gpu = None
         self.input_batch.prev_sampled_token_ids = None
@@ -4188,6 +4402,7 @@ def propose_draft_token_ids(sampled_token_ids):
         spec_config = self.speculative_config
         propose_drafts_after_bookkeeping = False
         if spec_config is not None:
+            # Decide whether to run the drafter or zero out draft tokens.
             input_fits_in_drafter = spec_decode_common_attn_metadata is not None and (
                 spec_decode_common_attn_metadata.max_seq_len + self.num_spec_tokens
                 <= self.effective_drafter_max_model_len
@@ -4202,7 +4417,11 @@ def propose_draft_token_ids(sampled_token_ids):
                 # as inputs, and does not need to wait for bookkeeping to finish.
                 assert isinstance(
                     self.drafter,
-                    EagleProposer | DraftModelProposer | ExtractHiddenStatesProposer,
+                    EagleProposer
+                    | DFlashProposer
+                    | DraftModelProposer
+                    | ExtractHiddenStatesProposer
+                    | Gemma4Proposer,
                 )
                 sampled_token_ids = sampler_output.sampled_token_ids
                 if input_fits_in_drafter:
@@ -4211,7 +4430,6 @@ def propose_draft_token_ids(sampled_token_ids):
                     assert spec_decode_common_attn_metadata is not None
                     next_token_ids, valid_sampled_tokens_count = (
                         self.drafter.prepare_next_token_ids_padded(
-                            self.optimistic_seq_lens_cpu,
                             sampled_token_ids,
                             self.requests,
                             self.input_batch,
@@ -4221,10 +4439,6 @@ def propose_draft_token_ids(sampled_token_ids):
                     self._copy_valid_sampled_token_count(
                         next_token_ids, valid_sampled_tokens_count
                     )
-                    self._draft_token_ids = torch.zeros(
-                        1, device=self.device, dtype=torch.int32
-                    ).expand(len(self.input_batch.req_ids), self.num_spec_tokens)
-                    self._copy_draft_token_ids_to_cpu(scheduler_output, zeros_only=True)
             elif (
                 spec_config.use_ngram_gpu()
                 and not spec_config.disable_padded_drafter_batch
@@ -4247,15 +4461,22 @@ def propose_draft_token_ids(sampled_token_ids):
                     self._copy_valid_sampled_token_count(
                         next_token_ids, valid_sampled_tokens_count
                     )
-                    # Since we couldn't run the drafter,
-                    # just use zeros for the draft tokens.
-                    self._draft_token_ids = torch.zeros(
-                        1, device=self.device, dtype=torch.int32
-                    ).expand(len(self.input_batch.req_ids), self.num_spec_tokens)
-                    self._copy_draft_token_ids_to_cpu(scheduler_output, zeros_only=True)
             else:
                 propose_drafts_after_bookkeeping = input_fits_in_drafter
 
+            if not input_fits_in_drafter:
+                # Zero out draft tokens so the scheduler doesn't schedule
+                # stale drafts from the previous step.
+                # For Nemotron-H: it is necessary to zero out the draft tokens,
+                # otherwise the stale tokens will corrupt Mamba recurrent
+                # state and logprobs for sequences near max_model_len.
+                self._draft_token_ids = torch.zeros(
+                    1, device=self.device, dtype=torch.int32
+                ).expand(len(self.input_batch.req_ids), self.num_spec_tokens)
+                self._draft_probs = None
+                self._draft_prob_req_ids = None
+                self._copy_draft_token_ids_to_cpu(scheduler_output, zeros_only=True)
+
         with record_function_or_nullcontext("gpu_model_runner: bookkeep"):
             (
                 num_nans_in_logits,
@@ -4271,7 +4492,6 @@ def propose_draft_token_ids(sampled_token_ids):
                 logits,
                 hidden_states,
                 scheduler_output.total_num_scheduled_tokens,
-                spec_decode_metadata,
             )
 
         if propose_drafts_after_bookkeeping:
@@ -4293,13 +4513,6 @@ def propose_draft_token_ids(sampled_token_ids):
         self.kv_connector_output = None
 
         with record_function_or_nullcontext("gpu_model_runner: ModelRunnerOutput"):
-            if self.routed_experts_initialized:
-                capturer = RoutedExpertsCapturer.get_instance()
-                if capturer is not None:
-                    capturer.save_captured_experts(indices=self.slot_mapping)  # noqa
-                else:
-                    logger.error("RoutedExpertsCapturer not initialized.")
-
             output = ModelRunnerOutput(
                 req_ids=req_ids_output_copy,
                 req_id_to_index=req_id_to_index_output_copy,
@@ -4312,21 +4525,55 @@ def propose_draft_token_ids(sampled_token_ids):
                 else None,
                 num_nans_in_logits=num_nans_in_logits,
                 cudagraph_stats=cudagraph_stats,
+                routed_experts=None,
             )
 
         if not self.use_async_scheduling:
+            if self.routed_experts_initialized:
+                # Sync path: D2H was issued in ``_bookkeeping_sync`` and
+                # synchronized by ``_to_list``'s event.synchronize(), so
+                # the pinned buffers are ready to be wrapped as numpy.
+                total = scheduler_output.total_num_scheduled_tokens
+                output.routed_experts = RoutedExpertsLists(
+                    routing_data=self.routed_experts_cpu[:total].numpy(),
+                    slot_mapping=self.routed_experts_slot_mapping_cpu[:total].numpy(),
+                )
             return output
 
         with record_function_or_nullcontext(
             "gpu_model_runner: AsyncGPUModelRunnerOutput"
         ):
+            # Async path: produce a device-side snapshot that the async
+            # copy stream can D2H later. Both tensors must be private
+            # clones because:
+            #   - ``routing_data`` source is the shared capturer buffer,
+            #     which is ``clear_buffer()``-ed at the start of the
+            #     next step on the default stream.
+            #   - ``slot_mapping`` source is our own
+            #     ``routed_experts_slot_mapping_device``, which the
+            #     next ``_prepare_inputs`` overwrites on the default
+            #     stream while the D2H is still pending on the copy
+            #     stream.
+            # Without clones, the copy stream would read torn data.
+            routed_experts_snapshot = None
+            if self.routed_experts_initialized:
+                buf = self.routed_experts_capturer.get_device_buffer()
+                total = scheduler_output.total_num_scheduled_tokens
+                routed_experts_snapshot = RoutedExpertsTensors(
+                    routing_data=buf[:total].clone(),
+                    slot_mapping=self.routed_experts_slot_mapping_device[
+                        :total
+                    ].clone(),
+                )
+
             async_output = AsyncGPUModelRunnerOutput(
                 model_runner_output=output,
                 sampled_token_ids=sampler_output.sampled_token_ids,
                 logprobs_tensors=sampler_output.logprobs_tensors,
                 invalid_req_indices=invalid_req_indices,
-                async_output_copy_stream=self.async_output_copy_stream,
+                async_output_copy_stream=self._get_or_create_async_output_copy_stream(),
                 vocab_size=self.input_batch.vocab_size,
+                routed_experts=routed_experts_snapshot,
             )
         with record_function_or_nullcontext(
             "gpu_model_runner: set_async_sampled_token_ids"
@@ -4350,9 +4597,12 @@ def _pp_broadcast_prev_sampled_token_ids(
         assert sampled_token_ids.dim() == 2 and sampled_token_ids.shape[-1] == 1, (
             "PP+async expects sampled_token_ids to have shape [num_reqs, 1]"
         )
-        torch.distributed.broadcast(
-            sampled_token_ids, src=pp.rank, group=pp.device_group
-        )
+        # Skip for chunked prefill: sampled tokens are dummy
+        # and will be discarded, no need to broadcast.
+        if not self._is_all_reqs_chunked_prefill():
+            torch.distributed.broadcast(
+                sampled_token_ids, src=pp.rank, group=pp.device_group
+            )
 
     def _pp_receive_prev_sampled_token_ids_to_input_batch(self) -> None:
         """Receive sampled token ids broadcast from last PP stage"""
@@ -4361,7 +4611,9 @@ def _pp_receive_prev_sampled_token_ids_to_input_batch(self) -> None:
         num_reqs = self.input_batch.num_reqs
         # `prev_sampled_token_ids` is expected to have shape [num_reqs, 1].
         recv = torch.empty((num_reqs, 1), dtype=torch.int32, device=self.device)
-        torch.distributed.broadcast(recv, src=pp.last_rank, group=pp.device_group)
+        # skip for chunked prefill.
+        if not self._is_all_reqs_chunked_prefill():
+            torch.distributed.broadcast(recv, src=pp.last_rank, group=pp.device_group)
         self.input_batch.prev_sampled_token_ids = recv
 
         # construct `prev_req_id_to_index` here so `_prepare_input_ids`
@@ -4377,6 +4629,9 @@ def _pp_receive_prev_sampled_token_ids_to_input_batch(self) -> None:
             # appending a placeholder (-1) token id.
             if (req_state := self.requests.get(req_id)) is not None:
                 req_state.output_token_ids.append(-1)
+            pos = self.input_batch.num_tokens_no_spec[i]
+            self.input_batch.is_token_ids[i, pos] = True
+            self.input_batch.num_tokens_no_spec[i] = pos + 1
         self.input_batch.prev_req_id_to_index = prev_req_id_to_index
 
     def take_draft_token_ids(self) -> DraftTokenIds | None:
@@ -4463,6 +4718,35 @@ def _get_valid_sampled_token_count(self) -> list[int]:
         sampled_count_event.synchronize()
         return counts_cpu[: prev_sampled_token_ids.shape[0]].tolist()
 
+    def _get_spec_decode_draft_probs(
+        self, spec_decode_metadata: SpecDecodeMetadata
+    ) -> torch.Tensor | None:
+        if self._draft_probs is None or self._draft_prob_req_ids is None:
+            return None
+
+        row_by_req_id = {
+            req_id: idx for idx, req_id in enumerate(self._draft_prob_req_ids)
+        }
+        draft_probs_rows: list[torch.Tensor] = []
+        for req_id, num_draft in zip(
+            self.input_batch.req_ids, spec_decode_metadata.num_draft_tokens
+        ):
+            if num_draft == 0:
+                continue
+            row_idx = row_by_req_id.get(req_id)
+            if row_idx is None:
+                logger.warning(
+                    "Missing cached draft probabilities for request %s; "
+                    "falling back to legacy speculative rejection behavior.",
+                    req_id,
+                )
+                return None
+            draft_probs_rows.append(self._draft_probs[row_idx, :num_draft])
+
+        if not draft_probs_rows:
+            return None
+        return torch.cat(draft_probs_rows, dim=0).contiguous()
+
     def propose_draft_token_ids(
         self,
         scheduler_output: "SchedulerOutput",
@@ -4478,6 +4762,8 @@ def propose_draft_token_ids(
         num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
         spec_config = self.speculative_config
         assert spec_config is not None
+        self._draft_probs = None
+        self._draft_prob_req_ids = None
         if spec_config.method == "ngram":
             from vllm.v1.spec_decode.ngram_proposer import NgramProposer
 
@@ -4489,6 +4775,14 @@ def propose_draft_token_ids(
                 self.input_batch.token_ids_cpu,
                 slot_mappings=slot_mappings,
             )
+        elif spec_config.method == "custom_class":
+            assert isinstance(sampled_token_ids, list)
+            draft_token_ids = cast(Any, self.drafter).propose(
+                sampled_token_ids,
+                self.input_batch.num_tokens_no_spec,
+                self.input_batch.token_ids_cpu,
+                slot_mappings=slot_mappings,
+            )
         elif spec_config.use_ngram_gpu():
             assert isinstance(self.drafter, NgramProposerGPU)
             (
@@ -4578,7 +4872,6 @@ def propose_draft_token_ids(
             )
             next_token_ids, valid_sampled_tokens_count = (
                 self.drafter.prepare_next_token_ids_padded(
-                    self.optimistic_seq_lens_cpu,
                     sampled_token_ids,
                     self.requests,
                     self.input_batch,
@@ -4589,8 +4882,15 @@ def propose_draft_token_ids(
                 next_token_ids, valid_sampled_tokens_count
             )
 
-        elif spec_config.use_eagle() or spec_config.uses_draft_model():
-            assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
+        elif (
+            spec_config.use_eagle()
+            or spec_config.use_dflash()
+            or spec_config.uses_draft_model()
+        ):
+            assert isinstance(
+                self.drafter,
+                EagleProposer | DFlashProposer | DraftModelProposer | Gemma4Proposer,
+            )
 
             if spec_config.disable_padded_drafter_batch:
                 # When padded-batch is disabled, the sampled_token_ids should be
@@ -4617,7 +4917,6 @@ def propose_draft_token_ids(
                 )
                 next_token_ids, valid_sampled_tokens_count = (
                     self.drafter.prepare_next_token_ids_padded(
-                        self.optimistic_seq_lens_cpu,
                         sampled_token_ids,
                         self.requests,
                         self.input_batch,
@@ -4628,6 +4927,16 @@ def propose_draft_token_ids(
                     next_token_ids, valid_sampled_tokens_count
                 )
 
+            # Let the target override the hidden state fed to the drafter
+            # (e.g. DeepSeek V4 MTP needs the pre-hc_head residual). Safe to
+            # rebind here: hidden_states was already consumed for sampling
+            # above and is not used again in this branch.
+            alt = getattr(
+                self.get_model(), "get_mtp_target_hidden_states", lambda: None
+            )()
+            if alt is not None:
+                hidden_states = alt
+
             num_rejected_tokens_gpu = None
             if spec_decode_metadata is None:
                 token_indices_to_sample = None
@@ -4700,6 +5009,11 @@ def propose_draft_token_ids(
                 num_rejected_tokens_gpu=num_rejected_tokens_gpu,
                 slot_mappings=slot_mappings,
             )
+            if hasattr(self.drafter, "take_last_draft_probs"):
+                draft_probs = self.drafter.take_last_draft_probs()
+                if draft_probs is not None:
+                    self._draft_probs = draft_probs
+                    self._draft_prob_req_ids = self.input_batch.req_ids.copy()
 
         return draft_token_ids
 
@@ -4745,7 +5059,8 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
                     )
                 if hasattr(self, "drafter"):
                     logger.info_once("Loading drafter model...")
-                    self.drafter.load_model(self.model)
+                    if hasattr(self.drafter, "load_model"):
+                        self.drafter.load_model(self.model)
                     if (
                         hasattr(self.drafter, "model")
                         and is_mixture_of_experts(self.drafter.model)
@@ -4771,27 +5086,36 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
                         )
                         eplb_models += 1
 
-                if self.use_aux_hidden_state_outputs:
-                    if not supports_eagle3(self.get_model()):
-                        raise RuntimeError(
-                            "Model does not support EAGLE3 interface but "
-                            "aux_hidden_state_outputs was requested"
-                        )
+                self._setup_eagle3_aux_hidden_state_outputs()
 
-                    # Try to get auxiliary layers from speculative config,
-                    # otherwise use model's default layers
-                    aux_layers = self._get_eagle3_aux_layers_from_config()
-                    if aux_layers:
-                        logger.info(
-                            "Using auxiliary layers from speculative config: %s",
-                            aux_layers,
-                        )
-                    else:
-                        aux_layers = (
-                            self.model.get_eagle3_default_aux_hidden_state_layers()
-                        )
+                # Resolve the MoE model, unwrapping VLM wrappers if needed.
+                # VLM models (e.g. KimiK25ForConditionalGeneration) wrap the
+                # actual MoE language model but don't implement
+                # MixtureOfExperts themselves.
+                moe_candidate = self.model
+                if not is_mixture_of_experts(moe_candidate) and isinstance(
+                    moe_candidate, SupportsMultiModal
+                ):
+                    moe_candidate = moe_candidate.get_language_model()
+                if is_mixture_of_experts(moe_candidate):
+                    self._moe_model = moe_candidate
+
+                if (
+                    self._moe_model is not None
+                    and self.parallel_config.enable_eplb
+                    and not load_dummy_weights
+                ):
+                    logger.info_once(
+                        "EPLB is enabled for model %s.",
+                        self.model_config.model,
+                    )
+                    assert self.eplb_state is not None
+                    self.eplb_state.add_model(
+                        self._moe_model,
+                        self.model_config,
+                    )
+                    eplb_models += 1
 
-                    self.model.set_aux_hidden_state_layers(aux_layers)
                 time_after_load = time.perf_counter()
             self.model_memory_usage = m.consumed_memory
         except torch.cuda.OutOfMemoryError as e:
@@ -4809,7 +5133,6 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
             "Model loading took %s GiB memory and %.6f seconds",
             format_gib(self.model_memory_usage),
             time_after_load - time_before_load,
-            scope="local",
         )
         if not load_dummy_weights:
             prepare_communication_buffer_for_model(self.model)
@@ -4828,23 +5151,21 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
         )  # Temporary hack for dynamic res video w/o support for bs>1 yet
 
         if (
-            is_mixture_of_experts(self.model)
+            self._moe_model is not None
             and self.parallel_config.enable_eplb
             and not load_dummy_weights
+            and self.eplb_state is not None
+            and self.eplb_state.is_async
         ):
-            logger.info_once("EPLB is enabled for model %s.", self.model_config.model)
-            assert self.eplb_state is not None
-            self.eplb_state.add_model(
-                self.model,
-                self.model_config,
-            )
-            if self.eplb_state.is_async:
-                self.eplb_state.start_async_loop()
+            self.eplb_state.start_async_loop()
 
         if (
             self.vllm_config.compilation_config.mode
             == CompilationMode.STOCK_TORCH_COMPILE
         ):
+            from vllm.env_override import _apply_constrain_to_fx_strides_patch
+
+            _apply_constrain_to_fx_strides_patch()
             backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
             compilation_counter.stock_torch_compile_count += 1
             self.model.compile(fullgraph=True, backend=backend)
@@ -4856,6 +5177,12 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
         cudagraph_mode = self.compilation_config.cudagraph_mode
         assert cudagraph_mode is not None
         if (
+            is_breakable_cudagraph_enabled()
+            and cudagraph_mode != CUDAGraphMode.NONE
+            and not self.parallel_config.use_ubatching
+        ):
+            self.model = BreakableCUDAGraphWrapper(self.model, self.vllm_config)
+        elif (
             cudagraph_mode.has_full_cudagraphs()
             and not self.parallel_config.use_ubatching
         ):
@@ -4874,6 +5201,27 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
 
         get_offloader().post_init()
 
+    def _setup_eagle3_aux_hidden_state_outputs(self) -> None:
+        if not self.use_aux_hidden_state_outputs:
+            return
+
+        if not supports_eagle3(self.get_model()):
+            raise RuntimeError(
+                "Model does not support EAGLE3 interface but "
+                "aux_hidden_state_outputs was requested"
+            )
+        # Try to get auxiliary layers from speculative config,
+        # otherwise use model's default layers
+        aux_layers = self._get_eagle3_aux_layers_from_config()
+        if aux_layers:
+            logger.info(
+                "Using auxiliary layers from speculative config: %s", aux_layers
+            )
+        else:
+            aux_layers = self.model.get_eagle3_default_aux_hidden_state_layers()
+
+        self.model.set_aux_hidden_state_layers(aux_layers)
+
     def _get_eagle3_aux_layers_from_config(self) -> tuple[int, ...] | None:
         """Extract Eagle3 auxiliary layer indices from speculative config.
 
@@ -4889,10 +5237,21 @@ def _get_eagle3_aux_layers_from_config(self) -> tuple[int, ...] | None:
             return None
 
         hf_config = self.speculative_config.draft_model_config.hf_config
-        if not hasattr(hf_config, "eagle_aux_hidden_state_layer_ids"):
-            return None
 
-        layer_ids = hf_config.eagle_aux_hidden_state_layer_ids
+        layer_ids = getattr(hf_config, "eagle_aux_hidden_state_layer_ids", None)
+        if not layer_ids:
+            dflash_config = getattr(hf_config, "dflash_config", None)
+            eagle_config = getattr(hf_config, "eagle_config", None)
+
+            if dflash_config and isinstance(dflash_config, dict):
+                # Add 1 to convert DFlash's aux layer id semantics
+                layer_ids = [
+                    i + 1 for i in (dflash_config.get("target_layer_ids") or [])
+                ]
+
+            if eagle_config and isinstance(eagle_config, dict):
+                layer_ids = eagle_config.get("eagle_aux_hidden_state_layer_ids")
+
         if layer_ids and isinstance(layer_ids, (list, tuple)):
             return tuple(layer_ids)
 
@@ -4942,7 +5301,7 @@ def reload_weights(
             )
 
         # begin loading weights
-        logger.info_once("Reloading weights inplace...", scope="local")
+        logger.info_once("Reloading weights inplace...")
         if is_checkpoint_format:
             # load weights from checkpoint/ original model format
             initialize_layerwise_reload(model)
@@ -4954,7 +5313,6 @@ def reload_weights(
             logger.warning_once(
                 "Reloading with `is_checkpoint_format=True` requires that "
                 "weights be in kernel format and already sharded",
-                scope="local",
             )
             loaded_weights = set()
             for name, loaded_weight in weights_iterator:
@@ -4968,7 +5326,6 @@ def reload_weights(
         logger.info_once(
             "Reloading and processing weights took %.2f seconds",
             diff_seconds,
-            scope="local",
         )
         if self.model_config.quantization is None and loaded_weights is not None:
             weights_not_loaded = weights_to_load - loaded_weights
@@ -4987,7 +5344,6 @@ def _get_prompt_logprobs_dict(
         if not num_prompt_logprobs_dict:
             return {}
 
-        in_progress_dict = self.input_batch.in_progress_prompt_logprobs_cpu
         prompt_logprobs_dict: dict[str, LogprobsTensors | None] = {}
 
         # Since prompt logprobs are a rare feature, prioritize simple,
@@ -5011,14 +5367,14 @@ def _get_prompt_logprobs_dict(
             )
 
             # Set up target LogprobsTensors object.
-            logprobs_tensors = in_progress_dict.get(req_id)
-            if not logprobs_tensors:
+            logprobs_tensors = request.in_progress_prompt_logprobs_cpu
+            if logprobs_tensors is None:
                 # Create empty logprobs CPU tensors for the entire prompt.
                 # If chunked, we'll copy in slice by slice.
                 logprobs_tensors = LogprobsTensors.empty_cpu(
                     num_prompt_tokens - 1, num_prompt_logprobs + 1
                 )
-                in_progress_dict[req_id] = logprobs_tensors
+                request.in_progress_prompt_logprobs_cpu = logprobs_tensors
 
             # Determine number of logits to retrieve.
             start_idx = request.num_computed_tokens
@@ -5075,7 +5431,7 @@ def _get_prompt_logprobs_dict(
         # num_prompt_logprobs_dict.
         for req_id in completed_prefill_reqs:
             del num_prompt_logprobs_dict[req_id]
-            del in_progress_dict[req_id]
+            self.requests[req_id].in_progress_prompt_logprobs_cpu = None
 
         # Must synchronize the non-blocking GPU->CPU transfers.
         if prompt_logprobs_dict:
@@ -5336,12 +5692,18 @@ def _dummy_run(
         attn_metadata: PerLayerAttnMetadata | None = None
 
         slot_mappings_by_group, slot_mappings = self._get_slot_mappings(
-            num_tokens_padded=num_tokens,
+            num_tokens_padded=num_tokens_padded,
             num_reqs_padded=num_reqs_padded,
             num_tokens_unpadded=num_tokens_unpadded,
             ubatch_slices=ubatch_slices_padded,
         )
 
+        # Dummy runs have no real slot assignments — fill with -1 so
+        # concat_and_cache kernels skip the KV write.
+        if slot_mappings_by_group is not None:
+            for sm in slot_mappings_by_group.values():
+                sm.fill_(-1)
+
         # _dummy_run shares pinned CPU buffers (seq_lens, query_start_loc,
         # etc.) with execute_model.  It must participate in the same event
         # protocol so that back-to-back dummy/real steps don't overwrite
@@ -5434,7 +5796,7 @@ def _dummy_run(
                         )
                     )
 
-                intermediate_tensors = self.sync_and_slice_intermediate_tensors(
+                intermediate_tensors = self.sync_and_gather_intermediate_tensors(
                     num_tokens_padded, None, False
                 )
 
@@ -5479,7 +5841,11 @@ def _dummy_run(
             ):
                 assert isinstance(
                     self.drafter,
-                    EagleProposer | DraftModelProposer | ExtractHiddenStatesProposer,
+                    EagleProposer
+                    | DFlashProposer
+                    | DraftModelProposer
+                    | ExtractHiddenStatesProposer
+                    | Gemma4Proposer,
                 )
                 assert self.speculative_config is not None
                 # Eagle currently only supports PIECEWISE cudagraphs.
@@ -5569,6 +5935,7 @@ def _dummy_sampler_run(
             top_k=dummy_tensors(logits.size(1) - 1),
             generators={},
             max_num_logprobs=None,
+            logprob_token_ids=None,
             no_penalties=True,
             prompt_token_ids=None,
             frequency_penalties=dummy_tensors(0.1),
@@ -5584,6 +5951,26 @@ def _dummy_sampler_run(
             sampler_output = self.sampler(
                 logits=logits, sampling_metadata=dummy_metadata
             )
+            # Also warm forward_native (taken when generators dict is non-empty),
+            # but skip the extra call in 'processed_logits' / 'processed_logprobs'
+            # modes — there TopKTopPSampler binds forward = forward_native at
+            # init time, so the warmup call is redundant and only inflates peak
+            # memory during profile_run.
+            # No .clone() of logits: warmup output is discarded, so any in-place
+            # mutation by forward_native does not affect correctness.
+            if self.sampler.logprobs_mode not in (
+                "processed_logits",
+                "processed_logprobs",
+            ):
+                self.sampler(
+                    logits=logits,
+                    sampling_metadata=replace(
+                        dummy_metadata,
+                        generators={
+                            0: torch.Generator(device=self.device).manual_seed(0)
+                        },
+                    ),
+                )
         except RuntimeError as e:
             if "out of memory" in str(e):
                 raise RuntimeError(
@@ -5601,10 +5988,18 @@ def _dummy_sampler_run(
             )
 
             num_tokens = sum(len(ids) for ids in draft_token_ids)
-            # draft_probs = torch.randn(
-            #     num_tokens, logits.shape[-1], device=self.device,
-            #     dtype=logits.dtype)
             draft_probs = None
+            if (
+                self.speculative_config.rejection_sample_method == "standard"
+                and self.speculative_config.draft_sample_method == "probabilistic"
+            ):
+                draft_probs = torch.rand(
+                    num_tokens,
+                    logits.shape[-1],
+                    device=self.device,
+                    dtype=torch.float32,
+                )
+                draft_probs = torch.softmax(draft_probs, dim=-1)
             logits = torch.randn(
                 num_tokens + num_reqs,
                 logits.shape[-1],
@@ -5745,7 +6140,6 @@ def profile_run(self) -> None:
                             encoder_budget,
                             max_mm_items_per_batch,
                             dummy_modality,
-                            scope="local",
                         )
 
                         # Create dummy batch of multimodal inputs.
@@ -5800,7 +6194,7 @@ def _init_minimal_kv_cache_for_profiling(self) -> None:
         )
         self.cache_config.num_gpu_blocks_override = saved_override
 
-        self.initialize_kv_cache(minimal_config)
+        self.initialize_kv_cache(minimal_config, is_profiling=True)
         self.cache_config.num_gpu_blocks = minimal_config.num_blocks
 
         logger.debug("Initialized minimal KV cache for CUDA graph profiling")
@@ -5819,6 +6213,20 @@ def _freeze_gc():
                 gc.unfreeze()
                 gc.collect()
 
+    def shutdown(self) -> None:
+        """Release GPU tensors (model weights, KV caches, workspace) so that
+        memory is reclaimable when running in the same process."""
+        from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT
+        from vllm.v1.worker.workspace import reset_workspace_manager
+
+        # Calls torch.accelerator.synchronize()
+        self._cleanup_profiling_kv_cache()
+        self.compilation_config.static_forward_context.clear()
+        self.model = None  # type: ignore[assignment]
+        _ROPE_DICT.clear()
+
+        reset_workspace_manager()
+
     def _cleanup_profiling_kv_cache(self) -> None:
         torch.accelerator.synchronize()
         if hasattr(self, "kv_caches") and self.kv_caches:
@@ -5840,6 +6248,13 @@ def _cleanup_profiling_kv_cache(self) -> None:
                 layer.kv_cache = (
                     torch.tensor([]) if isinstance(kv_cache, torch.Tensor) else []
                 )
+            # Clean up quantized KV cache scale views
+            # (int8_per_token_head, fp8_per_token_head)
+            if hasattr(layer, "impl"):
+                if hasattr(layer.impl, "_k_scale_cache"):
+                    layer.impl._k_scale_cache = None
+                if hasattr(layer.impl, "_v_scale_cache"):
+                    layer.impl._v_scale_cache = None
 
         gc.collect()
         torch.accelerator.empty_cache()
@@ -5873,7 +6288,10 @@ def profile_cudagraph_memory(self) -> int:
         # Use a temporary pool for profiling to avoid fragmentation in the main pool.
         profiling_pool = current_platform.graph_pool_handle()
         original_pools: dict[int, Any] = {}
-        for instance in list(CUDAGraphWrapper._all_instances):
+        all_wrappers = list(CUDAGraphWrapper._all_instances) + list(
+            BreakableCUDAGraphWrapper._all_instances
+        )
+        for instance in all_wrappers:
             original_pools[id(instance)] = instance.graph_pool
             instance.graph_pool = profiling_pool
 
@@ -5924,7 +6342,11 @@ def profile_cudagraph_memory(self) -> int:
 
         set_cudagraph_capturing_enabled(False)
         CUDAGraphWrapper.clear_all_graphs()
-        for instance in list(CUDAGraphWrapper._all_instances):
+        BreakableCUDAGraphWrapper.clear_all_graphs()
+        all_wrappers = list(CUDAGraphWrapper._all_instances) + list(
+            BreakableCUDAGraphWrapper._all_instances
+        )
+        for instance in all_wrappers:
             if id(instance) in original_pools:
                 instance.graph_pool = original_pools[id(instance)]
         for key_set in self.cudagraph_dispatcher.cudagraph_keys.values():
@@ -5969,7 +6391,9 @@ def capture_model(self) -> int:
                 SupportsEncoderCudaGraph,
                 supports_encoder_cudagraph,
             )
-            from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager
+            from vllm.v1.worker.encoder_cudagraph import (
+                EncoderCudaGraphManager,
+            )
 
             raw_model = self.get_model()
             if supports_encoder_cudagraph(raw_model):
@@ -6033,7 +6457,6 @@ def capture_model(self) -> int:
             "Graph capturing finished in %.0f secs, took %.2f GiB",
             elapsed_time,
             cuda_graph_size / (1 << 30),
-            scope="local",
         )
         return cuda_graph_size
 
@@ -6058,6 +6481,7 @@ def _warmup_and_capture(
                 skip_eplb=True,
                 remove_lora=False,
                 num_active_loras=desc.num_active_loras,
+                profile_seq_lens=profile_seq_lens,
             )
         self._dummy_run(
             desc.num_tokens,
@@ -6121,7 +6545,11 @@ def _capture_cudagraphs(
             torch.accelerator.synchronize()
         self.maybe_remove_all_loras(self.lora_config)
 
-    def initialize_attn_backend(self, kv_cache_config: KVCacheConfig) -> None:
+    def initialize_attn_backend(
+        self,
+        kv_cache_config: KVCacheConfig,
+        is_profiling: bool = False,
+    ) -> None:
         """
         Initialize the attention backends and attention metadata builders.
         """
@@ -6193,7 +6621,9 @@ def create_attn_groups(
 
         # Resolve cudagraph_mode before actually initialize metadata_builders
         self._check_and_update_cudagraph_mode(
-            attention_backend_list, kv_cache_config.kv_cache_groups
+            attention_backend_list,
+            kv_cache_config.kv_cache_groups,
+            is_profiling=is_profiling,
         )
 
         # Check if attention backend supports PCP&DCP and related features.
@@ -6230,13 +6660,17 @@ def initialize_metadata_builders(
             self.speculative_config.use_eagle()
             or self.speculative_config.uses_draft_model()
         ):
-            assert isinstance(self.drafter, EagleProposer | DraftModelProposer)
+            assert isinstance(
+                self.drafter,
+                EagleProposer | DFlashProposer | DraftModelProposer | Gemma4Proposer,
+            )
             self.drafter.initialize_attn_backend(kv_cache_config, kernel_block_sizes)
 
     def _check_and_update_cudagraph_mode(
         self,
         attention_backends: list[set[type[AttentionBackend]]],
         kv_cache_groups: list[KVCacheGroupSpec],
+        is_profiling: bool = False,
     ) -> None:
         """
         Resolve the cudagraph_mode when there are multiple attention
@@ -6245,7 +6679,7 @@ def _check_and_update_cudagraph_mode(
         cudagraph_mode.
         """
         min_cg_support = AttentionCGSupport.ALWAYS
-        min_cg_backend_name = None
+        min_cg_attn_backend = None
 
         for attn_backend_set, kv_cache_group in zip(
             attention_backends, kv_cache_groups
@@ -6258,144 +6692,18 @@ def _check_and_update_cudagraph_mode(
                 )
                 if cg_support.value < min_cg_support.value:
                     min_cg_support = cg_support
-                    min_cg_backend_name = attn_backend.__name__
-        # Flexible resolve the cudagraph mode
-        cudagraph_mode = self.compilation_config.cudagraph_mode
-        assert cudagraph_mode is not None
-        # check cudagraph for mixed batch is supported
-        if (
-            cudagraph_mode.mixed_mode() == CUDAGraphMode.FULL
-            and min_cg_support != AttentionCGSupport.ALWAYS
-        ):
-            msg = (
-                f"CUDAGraphMode.{cudagraph_mode.name} is not supported "
-                f"with {min_cg_backend_name} backend (support: "
-                f"{min_cg_support})"
-            )
-            if min_cg_support == AttentionCGSupport.NEVER:
-                # if not supported any full cudagraphs, just raise it.
-                msg += (
-                    "; please try cudagraph_mode=PIECEWISE, and "
-                    "make sure compilation mode is VLLM_COMPILE"
-                )
-                raise ValueError(msg)
-
-            # attempt to resolve the full cudagraph related mode
-            if self.compilation_config.splitting_ops_contain_attention():
-                msg += "; setting cudagraph_mode=FULL_AND_PIECEWISE"
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.FULL_AND_PIECEWISE
-                )
-            else:
-                msg += "; setting cudagraph_mode=FULL_DECODE_ONLY"
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.FULL_DECODE_ONLY
-                )
-            logger.warning(msg)
-
-        # check that if we are doing decode full-cudagraphs it is supported
-        if (
-            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
-            and min_cg_support == AttentionCGSupport.NEVER
-        ):
-            msg = (
-                f"CUDAGraphMode.{cudagraph_mode.name} is not supported "
-                f"with {min_cg_backend_name} backend (support: "
-                f"{min_cg_support})"
-            )
-            if self.compilation_config.mode == CompilationMode.VLLM_COMPILE and (
-                self.compilation_config.splitting_ops_contain_attention()
-                or self.compilation_config.use_inductor_graph_partition
-            ):
-                msg += (
-                    "; setting cudagraph_mode=PIECEWISE because "
-                    "attention is compiled piecewise"
-                )
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.PIECEWISE
-                )
-            else:
-                msg += (
-                    "; setting cudagraph_mode=NONE because "
-                    "attention is not compiled piecewise"
-                )
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.NONE
-                )
-            logger.warning(msg)
-
-        # check that if we are doing spec-decode + decode full-cudagraphs it is
-        # supported
-        if (
-            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
-            and self.uniform_decode_query_len > 1
-            and min_cg_support.value < AttentionCGSupport.UNIFORM_BATCH.value
-        ):
-            msg = (
-                f"CUDAGraphMode.{cudagraph_mode.name} is not supported"
-                f" with spec-decode for attention backend "
-                f"{min_cg_backend_name} (support: {min_cg_support})"
-            )
-            if self.compilation_config.splitting_ops_contain_attention():
-                msg += "; setting cudagraph_mode=PIECEWISE"
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.PIECEWISE
-                )
-            else:
-                msg += "; setting cudagraph_mode=NONE"
-                cudagraph_mode = self.compilation_config.cudagraph_mode = (
-                    CUDAGraphMode.NONE
-                )
-            logger.warning(msg)
-
-        # double check that we can support full cudagraph if they are requested
-        # even after automatic downgrades
-        if (
-            cudagraph_mode.has_full_cudagraphs()
-            and min_cg_support == AttentionCGSupport.NEVER
-        ):
-            raise ValueError(
-                f"CUDAGraphMode.{cudagraph_mode.name} is not "
-                f"supported with {min_cg_backend_name} backend ("
-                f"support:{min_cg_support}) "
-                "; please try cudagraph_mode=PIECEWISE, "
-                "and make sure compilation mode is VLLM_COMPILE"
-            )
-
-        # if we have dedicated decode cudagraphs, and spec-decode is enabled,
-        # we need to adjust the cudagraph sizes to be a multiple of the uniform
-        # decode query length to avoid: https://github.com/vllm-project/vllm/issues/28207
-        # temp-fix: https://github.com/vllm-project/vllm/issues/28207#issuecomment-3504004536
-        # Will be removed in the near future when we have separate cudagraph capture
-        # sizes for decode and mixed prefill-decode.
-        if (
-            cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
-            and cudagraph_mode.separate_routine()
-            and self.uniform_decode_query_len > 1
-        ):
-            self.compilation_config.adjust_cudagraph_sizes_for_spec_decode(
-                self.uniform_decode_query_len, self.parallel_config.tensor_parallel_size
-            )
-
-        # If the model has Mamba layers and cudagraph mode includes FULL
-        # decode, cap cudagraph capture sizes to the number of available
-        # Mamba cache blocks. Each decode request needs one conv_state
-        # cache line, so capture batch sizes cannot exceed num_blocks.
-        # Only FULL decode graphs are affected because PIECEWISE captures
-        # run GDN/Mamba ops eagerly (prefill path, no causal_conv1d_update).
-        # See: https://github.com/vllm-project/vllm/issues/34094
-        if cudagraph_mode.has_full_cudagraphs():
-            has_mamba = any(
-                isinstance(g.kv_cache_spec, MambaSpec) for g in kv_cache_groups
-            )
-            if has_mamba and self.kv_cache_config is not None:
-                self.compilation_config.adjust_cudagraph_sizes_for_mamba_cache(
-                    self.kv_cache_config.num_blocks
-                )
-
+                    min_cg_attn_backend = attn_backend.__name__
+        cudagraph_mode = self.compilation_config.resolve_cudagraph_mode_and_sizes(
+            min_cg_support,
+            min_cg_attn_backend,
+            self.uniform_decode_query_len,
+            self.parallel_config.tensor_parallel_size,
+            self.kv_cache_config,
+            self.max_num_reqs,
+            is_profiling=is_profiling,
+        )
         # Trigger cudagraph dispatching keys initialization after
         # resolved cudagraph mode.
-        self.compilation_config.cudagraph_mode = cudagraph_mode
         self.cudagraph_dispatcher.initialize_cudagraph_keys(
             cudagraph_mode, self.uniform_decode_query_len
         )
@@ -6405,7 +6713,13 @@ def _check_and_update_cudagraph_mode(
             self.speculative_config.use_eagle()
             or self.speculative_config.uses_extract_hidden_states()
         ):
-            assert isinstance(self.drafter, EagleProposer | ExtractHiddenStatesProposer)
+            assert isinstance(
+                self.drafter,
+                EagleProposer
+                | DFlashProposer
+                | ExtractHiddenStatesProposer
+                | Gemma4Proposer,
+            )
             self.drafter.initialize_cudagraph_keys(cudagraph_mode)
 
     def calculate_reorder_batch_threshold(self) -> None:
@@ -6428,6 +6742,46 @@ def calculate_reorder_batch_threshold(self) -> None:
             return
         self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)  # type: ignore[assignment]
 
+    def _set_mm_prefix_range_for_metadata(
+        self,
+        attn_metadata: Any,
+        req_doc_ranges: dict[int, list[tuple[int, int]]],
+    ) -> None:
+        """Set mm_prefix_range for all attention metadata objects.
+
+        This method handles both list and non-list attention metadata,
+        computing mm_prefix_range_tensor once and sharing it across all
+        metadata objects to avoid redundant host-to-device transfers.
+        """
+        from vllm.v1.attention.backends.triton_attn import (
+            TritonAttentionMetadata,
+        )
+
+        # Get all metadata objects from either list or dict structure
+        metadata_list = []
+        if isinstance(attn_metadata, list):
+            for ub_metadata in attn_metadata:
+                metadata_list.extend(ub_metadata.values())
+        else:
+            metadata_list.extend(attn_metadata.values())
+
+        # Set mm_prefix_range for all metadata and compute tensor once
+        shared_tensor = None
+        for metadata in metadata_list:
+            metadata.mm_prefix_range = req_doc_ranges  # type: ignore[attr-defined]
+
+            # Only compute tensor for TritonAttentionMetadata
+            if isinstance(metadata, TritonAttentionMetadata):
+                if shared_tensor is None:
+                    shared_tensor = (
+                        TritonAttentionMetadata.compute_mm_prefix_range_tensor(
+                            req_doc_ranges,
+                            metadata.seq_lens.shape[0],  # type: ignore[attr-defined]
+                            metadata.seq_lens.device,  # type: ignore[attr-defined]
+                        )
+                    )
+                metadata.mm_prefix_range_tensor = shared_tensor
+
     def may_reinitialize_input_batch(
         self, kv_cache_config: KVCacheConfig, kernel_block_sizes: list[int]
     ) -> None:
@@ -6465,11 +6819,6 @@ def may_reinitialize_input_batch(
             block_sizes != self._init_block_sizes
             or kernel_block_sizes != self._init_kernel_block_sizes
         ):
-            assert self.offload_config.uva.cpu_offload_gb == 0, (
-                "Cannot re-initialize the input batch when CPU weight "
-                "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 "  # noqa: E501
-                "for more details."
-            )
             self._init_block_sizes = block_sizes
             self._init_kernel_block_sizes = kernel_block_sizes
             self.input_batch = InputBatch(
@@ -6482,10 +6831,11 @@ def may_reinitialize_input_batch(
                 block_sizes=block_sizes,
                 kernel_block_sizes=kernel_block_sizes,
                 max_num_blocks_per_req=max_num_blocks,
-                is_spec_decode=bool(self.vllm_config.speculative_config),
+                num_spec_tokens=self.num_spec_tokens,
                 logitsprocs=self.input_batch.logitsprocs,
                 logitsprocs_need_output_token_ids=self.input_batch.logitsprocs_need_output_token_ids,
                 is_pooling_model=self.is_pooling_model,
+                reasoning_config=self.vllm_config.reasoning_config,
             )
 
         assert self._init_block_sizes == block_sizes, (
@@ -6540,7 +6890,6 @@ def _kv_cache_spec_attn_group_iterator(self) -> Iterator[AttentionGroup]:
 
     def _reshape_kv_cache_tensors(
         self,
-        kv_cache_config: KVCacheConfig,
         kv_cache_raw_tensors: dict[str, torch.Tensor],
         kernel_block_sizes: list[int],
     ) -> dict[str, torch.Tensor]:
@@ -6548,7 +6897,6 @@ def _reshape_kv_cache_tensors(
         Reshape the KV cache tensors to the desired shape and dtype.
 
         Args:
-            kv_cache_config: The KV cache config
             kv_cache_raw_tensors: The KV cache buffer of each layer, with
                 correct size but uninitialized shape.
             kernel_block_sizes: The kernel block sizes for each KV cache group.
@@ -6578,9 +6926,15 @@ def _reshape_kv_cache_tensors(
                     )
                     kernel_num_blocks = num_blocks * num_blocks_per_kv_block
 
+                    # For MLA with compression, storage_block_size != block_size
+                    if kv_cache_spec.storage_block_size != kv_cache_spec.block_size:
+                        shape_block_size = kv_cache_spec.storage_block_size
+                    else:
+                        shape_block_size = kernel_block_size
+
                     kv_cache_shape = attn_backend.get_kv_cache_shape(
                         kernel_num_blocks,
-                        kernel_block_size,
+                        shape_block_size,
                         kv_cache_spec.num_kv_heads,
                         kv_cache_spec.head_size,
                         cache_dtype_str=self.cache_config.cache_dtype,
@@ -6604,12 +6958,31 @@ def _reshape_kv_cache_tensors(
                         kv_cache_stride_order.index(i)
                         for i in range(len(kv_cache_stride_order))
                     ]
-                    kv_caches[layer_name] = (
-                        kv_cache_raw_tensors[layer_name]
-                        .view(dtype)
-                        .view(kv_cache_shape)
-                        .permute(*inv_order)
-                    )
+
+                    raw_tensor = kv_cache_raw_tensors[layer_name].view(dtype)
+                    if kv_cache_spec.page_size_padded is not None:
+                        # Use strided view to handle page_size_bytes that
+                        # include padding. This follows
+                        # the same pattern as MambaSpec handling below.
+                        # NOTE: This assumes kv_cache_shape[0] == num_blocks
+                        # (i.e. the first physical dimension is the block
+                        # index), which holds for MLA backends but NOT for
+                        # standard attention backends whose shape starts with
+                        # a K/V dimension of size 2.
+                        dtype_size = get_dtype_size(dtype)
+                        page_stride = kv_cache_spec.page_size_bytes // dtype_size
+                        strides = list(torch.empty(kv_cache_shape).stride())
+                        strides[inv_order[0]] = page_stride
+                        kv_cache = torch.as_strided(
+                            raw_tensor,
+                            size=kv_cache_shape,
+                            stride=tuple(strides),
+                        )
+                    else:
+                        # No padding — safe to use a contiguous view.
+                        kv_cache = raw_tensor.view(kv_cache_shape)
+                    kv_caches[layer_name] = kv_cache.permute(*inv_order)
+
                 elif isinstance(kv_cache_spec, MambaSpec):
                     has_mamba = True
                     raw_tensor = kv_cache_raw_tensors[layer_name]
@@ -6638,12 +7011,12 @@ def _reshape_kv_cache_tensors(
                     raise NotImplementedError
 
         if has_attn and has_mamba:
-            self._update_hybrid_attention_mamba_layout(kv_caches)
+            self._update_hybrid_attention_mamba_layout(kv_caches, kernel_block_sizes)
 
         return kv_caches
 
     def _update_hybrid_attention_mamba_layout(
-        self, kv_caches: dict[str, torch.Tensor]
+        self, kv_caches: dict[str, torch.Tensor], kernel_block_sizes: list[int]
     ) -> None:
         """
         Update the layout of attention layers from (2, num_blocks, ...) to
@@ -6651,23 +7024,30 @@ def _update_hybrid_attention_mamba_layout(
 
         Args:
             kv_caches: The KV cache buffer of each layer.
+            kernel_block_sizes: The kernel block sizes for each KV cache group.
         """
 
         for group in self._kv_cache_spec_attn_group_iterator():
             kv_cache_spec = group.kv_cache_spec
+            if not isinstance(kv_cache_spec, AttentionSpec):
+                continue
+            block_dim = group.backend.get_kv_cache_block_dim(
+                kernel_block_sizes[group.kv_cache_group_id],
+                kv_cache_spec.num_kv_heads,
+                kv_cache_spec.head_size,
+                cache_dtype_str=self.cache_config.cache_dtype,
+            )
+            # block_dim: 0 means (num_blocks, 2, ...); 1 means (2, num_blocks, ...).
+            if block_dim == 0:
+                continue
+            assert block_dim == 1
             for layer_name in group.layer_names:
                 kv_cache = kv_caches[layer_name]
-                if isinstance(kv_cache_spec, AttentionSpec) and kv_cache.shape[0] == 2:
-                    assert kv_cache.shape[1] != 2, (
-                        "Fail to determine whether the layout is "
-                        "(2, num_blocks, ...) or (num_blocks, 2, ...) for "
-                        f"a tensor of shape {kv_cache.shape}"
-                    )
-                    hidden_size = kv_cache.shape[2:].numel()
-                    kv_cache.as_strided_(
-                        size=kv_cache.shape,
-                        stride=(hidden_size, 2 * hidden_size, *kv_cache.stride()[2:]),
-                    )
+                hidden_size = kv_cache.shape[2:].numel()
+                kv_cache.as_strided_(
+                    size=kv_cache.shape,
+                    stride=(hidden_size, 2 * hidden_size, *kv_cache.stride()[2:]),
+                )
 
     def initialize_kv_cache_tensors(
         self, kv_cache_config: KVCacheConfig, kernel_block_sizes: list[int]
@@ -6705,7 +7085,7 @@ def initialize_kv_cache_tensors(
 
             # Change the memory buffer to the desired shape
             kv_caches = self._reshape_kv_cache_tensors(
-                kv_cache_config, kv_cache_raw_tensors, kernel_block_sizes
+                kv_cache_raw_tensors, kernel_block_sizes
             )
 
         # Set up cross-layer KV cache sharing
@@ -6752,7 +7132,11 @@ def maybe_add_kv_sharing_layers_to_kv_cache_groups(
                 else:
                     break
 
-    def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
+    def initialize_kv_cache(
+        self,
+        kv_cache_config: KVCacheConfig,
+        is_profiling: bool = False,
+    ) -> None:
         """
         Initialize KV cache based on `kv_cache_config`.
         Args:
@@ -6761,10 +7145,13 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
         """
         kv_cache_config = deepcopy(kv_cache_config)
         self.kv_cache_config = kv_cache_config
-        self._mamba_copy_bufs = None
+        self._mamba_bufs = None
         self.may_add_encoder_only_layers_to_kv_cache_config()
         self.maybe_add_kv_sharing_layers_to_kv_cache_groups(kv_cache_config)
-        self.initialize_attn_backend(kv_cache_config)
+        self.initialize_attn_backend(kv_cache_config, is_profiling=is_profiling)
+        initialize_mamba_ssu_backend(
+            self.vllm_config.mamba_config, self.kv_cache_config
+        )
         # The kernel block size for all KV cache groups. For example, if
         # kv_cache_manager uses block_size 256 for a given group, but the attention
         # backends for that group only supports block_size 64, we will return
@@ -6793,7 +7180,7 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
             # group
             self.drafter.validate_same_kv_cache_group(kv_cache_config)
 
-        if has_kv_transfer_group():
+        if has_kv_transfer_group() and not is_profiling:
             kv_transfer_group = get_kv_transfer_group()
             if self.cross_layers_kv_cache is not None:
                 assert self.cross_layers_attn_backend is not None
@@ -6805,9 +7192,16 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
             kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks)
 
     def _get_attention_kv_cache_gid(self) -> int:
-        """Find the KV cache group index for attention layers."""
+        """Find the KV cache group index for attention layers.
+
+        Must match :attr:`RoutedExpertsManager.attn_gid` in the scheduler:
+        both pick the first ``FullAttentionSpec`` group so hybrid models
+        (Mamba / linear-attention layers that use other AttentionSpec
+        subclasses) end up indexing the same slot layout on both sides.
+        Falls back to 0 only for legacy single-group configs.
+        """
         for gid, group in enumerate(self.kv_cache_config.kv_cache_groups):
-            if isinstance(group.kv_cache_spec, AttentionSpec):
+            if isinstance(group.kv_cache_spec, FullAttentionSpec):
                 return gid
         return 0
 
@@ -6816,29 +7210,41 @@ def init_routed_experts_capturer(self):
             "Initializing routed experts capturer, enable_return_routed_experts: %s",
             self.model_config.enable_return_routed_experts,
         )
-        routed_experts_capturer = RoutedExpertsCapturer.create()
-        self.routed_experts_attn_gid = self._get_attention_kv_cache_gid()
-        min_block_size = min(
-            [
-                group.kv_cache_spec.block_size
-                for group in self.kv_cache_config.kv_cache_groups
-            ]
-        )
-        num_groups = len(self.kv_cache_config.kv_cache_groups)
-        self.max_num_kv_tokens = (
-            self.kv_cache_config.num_blocks // num_groups
-        ) * min_block_size
-        dcp_size = self.vllm_config.parallel_config.decode_context_parallel_size
-        pcp_size = self.vllm_config.parallel_config.prefill_context_parallel_size
-        if pcp_size * dcp_size > 1:
-            self.max_num_kv_tokens *= pcp_size * dcp_size
-
-        routed_experts_capturer.init_buffer(
+        self.routed_experts_capturer = RoutedExpertsCapturer(
             max_num_batched_tokens=self.scheduler_config.max_num_batched_tokens,
-            max_num_kv_tokens=self.max_num_kv_tokens,
             vllm_config=self.vllm_config,
         )
-        self._bind_routed_experts_capturer(routed_experts_capturer)
+        self.routed_experts_attn_gid = self._get_attention_kv_cache_gid()
+        self._bind_routed_experts_capturer(self.routed_experts_capturer)
+
+        # Pinned CPU buffer for non-blocking D2H of ``routing_data`` on
+        # the sync scheduling path. Shape / dtype mirror the device
+        # capturer exactly so ``copy_`` is a straight memcpy.
+        self.routed_experts_cpu = torch.empty(
+            self.routed_experts_capturer.device_buffer.shape,
+            dtype=self.routed_experts_capturer.device_buffer.dtype,
+            device="cpu",
+            pin_memory=self.pin_memory,
+        )
+        # ``slot_mapping`` dtype is fixed to int64 by
+        # ``block_table.slot_mapping``; we mirror that here.
+        max_tokens = self.scheduler_config.max_num_batched_tokens
+        self.routed_experts_slot_mapping_cpu = torch.empty(
+            (max_tokens,),
+            dtype=torch.int64,
+            device="cpu",
+            pin_memory=self.pin_memory,
+        )
+        # Private device buffer so the shared ``block_table.slot_mapping``
+        # can be overwritten by the next ``_prepare_inputs`` while the
+        # D2H is still pending on the copy stream. Written in
+        # ``_prepare_inputs``, read in ``_bookkeeping_sync`` (sync path)
+        # or cloned into a snapshot (async path).
+        self.routed_experts_slot_mapping_device = torch.empty(
+            (max_tokens,),
+            dtype=torch.int64,
+            device=self.device,
+        )
         self.routed_experts_initialized = True
 
     def _bind_routed_experts_capturer(self, capturer: RoutedExpertsCapturer) -> None:
diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py
index 52faa2e88005..657fc8267345 100644
--- a/vllm/v1/worker/gpu_ubatch_wrapper.py
+++ b/vllm/v1/worker/gpu_ubatch_wrapper.py
@@ -23,6 +23,7 @@
 from vllm.model_executor.offloader.base import get_offloader
 from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
+from vllm.utils.deep_gemm import set_num_sms as deep_gemm_set_num_sms
 from vllm.utils.import_utils import has_deep_gemm
 from vllm.utils.platform_utils import num_compute_units
 from vllm.v1.worker.ubatching import UBatchContext, make_ubatch_contexts
@@ -30,6 +31,23 @@
 logger = init_logger(__name__)
 
 
+def _cat_ubatch_outputs(
+    sorted_results: list,
+) -> "torch.Tensor | tuple[torch.Tensor, ...]":
+    """Concatenate per-ubatch model outputs along the batch dim.
+
+    Most models return a single hidden-states tensor per ubatch. Target
+    models running with auxiliary output (e.g. EAGLE3 speculative decoding,
+    which collects aux hidden states for the drafter) return a tuple of
+    tensors instead. Fan out over tuple components so `torch.cat` sees
+    matching shapes and the caller receives the same structure the model
+    produced for a single ubatch (#40769).
+    """
+    if sorted_results and isinstance(sorted_results[0], tuple):
+        return tuple(torch.cat(parts, dim=0) for parts in zip(*sorted_results))
+    return torch.cat(sorted_results, dim=0)
+
+
 @dataclass
 class UbatchMetadata:
     context: UBatchContext
@@ -70,8 +88,8 @@ def __init__(
                 A function that sets the number of SMs for computation.
         """
 
-        assert current_platform.is_cuda(), (
-            "SM control is currently only supported on CUDA"
+        assert current_platform.is_cuda() or current_platform.is_rocm(), (
+            "SM/CU control is supported on CUDA and ROCm platforms"
         )
         device = torch.accelerator.current_device_index()
         total_sms = num_compute_units(device)
@@ -158,9 +176,7 @@ def _create_sm_control_context(vllm_config: VllmConfig):
         # TODO(lucas): support other kernels besides DeepGEMM
         set_compute_sms = lambda sms: None
         if has_deep_gemm() and comm_sms > 0:
-            import deep_gemm as dg
-
-            set_compute_sms = lambda sms: dg.set_num_sms(sms)
+            set_compute_sms = lambda sms: deep_gemm_set_num_sms(sms)
 
         return SMControlContextManager(
             comm_sms=comm_sms,
@@ -267,7 +283,7 @@ def _capture_ubatch_thread(results, ubatch_metadata):
                 for thread in ubatch_threads:
                     thread.join()
                 sorted_results = [value for position, value in sorted(results)]
-                result = torch.cat(sorted_results, dim=0)
+                result = _cat_ubatch_outputs(sorted_results)
                 cudagraph_metadata.outputs = result
                 # Join offloader's copy stream after forward to avoid unjoined
                 # stream error. The last layer's start_prefetch forks copy_stream,
@@ -311,7 +327,7 @@ def _ubatch_thread(results, model, ubatch_metadata):
             for thread in ubatch_threads:
                 thread.join()
         sorted_results = [value for position, value in sorted(results)]
-        result = torch.cat(sorted_results, dim=0)
+        result = _cat_ubatch_outputs(sorted_results)
         return result
 
     def _make_ubatch_metadata(
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index 91dcdc2b9798..582c6a17cb4e 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -5,12 +5,13 @@
 import gc
 import os
 from collections.abc import Callable
-from contextlib import AbstractContextManager, nullcontext
+from contextlib import AbstractContextManager, contextmanager, nullcontext
 from datetime import timedelta
 from types import NoneType
 from typing import TYPE_CHECKING, Any
 
 import numpy as np
+import regex as re
 import torch
 import torch.nn as nn
 
@@ -35,7 +36,10 @@
     get_pp_group,
     get_tp_group,
 )
-from vllm.distributed.weight_transfer import WeightTransferEngineFactory
+from vllm.distributed.weight_transfer import (
+    WeightTransferEngine,
+    WeightTransferEngineFactory,
+)
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.warmup.kernel_warmup import kernel_warmup
@@ -56,7 +60,7 @@
 )
 from vllm.v1.utils import compute_iteration_details, report_usage_stats
 from vllm.v1.worker.utils import is_residual_scattered_for_sp
-from vllm.v1.worker.worker_base import WorkerBase
+from vllm.v1.worker.worker_base import CompilationTimes, WorkerBase
 from vllm.v1.worker.workspace import init_workspace_manager
 
 from ...model_executor.model_loader import TensorizerLoader
@@ -130,15 +134,11 @@ def __init__(
         # Buffers saved before sleep
         self._sleep_saved_buffers: dict[str, torch.Tensor] = {}
 
-        # Weight transfer engine (initialized on-demand)
-        self.weight_transfer_engine = (
-            WeightTransferEngineFactory.create_engine(
-                self.vllm_config.weight_transfer_config,
-                self.vllm_config.parallel_config,
-            )
-            if self.vllm_config.weight_transfer_config is not None
-            else None
-        )
+        # Weight transfer engine is created in `load_model` once the model
+        # is available, since the engine needs a reference to the model.
+        self.weight_transfer_engine: WeightTransferEngine | None = None
+        self._weight_update_active = False
+        self._is_checkpoint_format = True
 
         # Torch/CUDA profiler. Enabled and configured through profiler_config.
         # Profiler wrapper is created lazily in profile() when start is called,
@@ -150,7 +150,7 @@ def __init__(
         if self.profiler_config.profiler not in ("torch", "cuda", None):
             raise ValueError(f"Unknown profiler type: {self.profiler_config.profiler}")
 
-        self.use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER
+        self.use_v2_model_runner = vllm_config.use_v2_model_runner
         # pending non-blocking PP send work from the previous iteration
         self._pp_send_work: list[Handle] = []
 
@@ -192,18 +192,11 @@ def wake_up(self, tags: list[str] | None = None) -> None:
                     buffer.data.copy_(self._sleep_saved_buffers[name].data)
             self._sleep_saved_buffers = {}
 
-        # If the KV cache has just been woken up,
-        # the internal state of cache_engine must be reset,
-        # especially the FP8 scaling factor.
-        if (
-            (tags is None or "kv_cache" in tags)
-            and self.cache_config.cache_dtype.startswith("fp8")
-            and hasattr(self.model_runner, "init_fp8_kv_scales")
-        ):
-            self.model_runner.init_fp8_kv_scales()
+        if tags is None or "kv_cache" in tags:
+            self.model_runner.post_kv_cache_wake_up()
 
     def _maybe_get_memory_pool_context(self, tag: str) -> AbstractContextManager:
-        if not self.vllm_config.model_config.enable_sleep_mode:
+        if not self.vllm_config.model_config.enable_cumem_allocator:
             return nullcontext()
 
         from vllm.device_allocator.cumem import CuMemAllocator
@@ -211,10 +204,34 @@ def _maybe_get_memory_pool_context(self, tag: str) -> AbstractContextManager:
         allocator = CuMemAllocator.get_instance()
         if tag == "weights":
             assert allocator.get_current_usage() == 0, (
-                "Sleep mode can only be used for one instance per process."
+                "CuMem allocator can only be used for one instance per process."
             )
         return allocator.use_memory_pool(tag=tag)
 
+    @contextmanager
+    def _scoped_allocator_max_split(self, max_split_size_mb: int):
+        """Temporarily set max_split_size_mb to reduce allocator fragmentation at the
+        cost of more cudaMalloc calls (negligible in practice). Restores the original
+        value on exit."""
+        if not current_platform.is_cuda():
+            yield
+            return
+
+        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+        match = re.search(r"max_split_size_mb:(\d+)", conf)
+        original_value = match.group(1) if match else None
+
+        torch._C._accelerator_setAllocatorSettings(
+            f"max_split_size_mb:{max_split_size_mb}"
+        )
+        try:
+            yield
+        finally:
+            # PyTorch defaults to SIZE_MAX (no limit).
+            _SIZE_MAX_MB = (2**64 - 1) // (1024 * 1024)
+            restore = original_value if original_value else str(_SIZE_MAX_MB)
+            torch._C._accelerator_setAllocatorSettings(f"max_split_size_mb:{restore}")
+
     @instrument(span_name="Init device")
     def init_device(self):
         if self.device_config.device_type == "cuda":
@@ -269,7 +286,7 @@ def init_device(self):
             )
 
             if self.use_v2_model_runner:
-                logger.info_once("Using V2 Model Runner", scope="local")
+                logger.info_once("Using V2 Model Runner")
 
             # Set random seed.
             set_random_seed(self.model_config.seed)
@@ -319,9 +336,18 @@ def load_model(self, *, load_dummy_weights: bool = False) -> None:
         with (
             self._maybe_get_memory_pool_context(tag="weights"),
             set_current_vllm_config(self.vllm_config),
+            # 20 MiB is the minimum PyTorch allows for max_split_size_mb.
+            self._scoped_allocator_max_split(max_split_size_mb=20),
         ):
             self.model_runner.load_model(load_dummy_weights=load_dummy_weights)
 
+        if self.vllm_config.weight_transfer_config is not None:
+            self.weight_transfer_engine = WeightTransferEngineFactory.create_engine(
+                self.vllm_config.weight_transfer_config,
+                self.vllm_config.parallel_config,
+                self.model_runner.get_model(),
+            )
+
     def update_config(self, overrides: dict[str, Any]) -> None:
         self.model_runner.update_config(overrides)
 
@@ -374,10 +400,14 @@ def determine_available_memory(self) -> int:
             )
 
             # Profile CUDA graph memory if graphs will be captured.
-            # Skip on ROCm/HIP as graph pool handles and mem_get_info behave
+            # Skip on ROCm/HIP/XPU as graph pool handles and mem_get_info behave
             # differently and can produce incorrect/negative estimates.
             cudagraph_memory_estimate = 0
-            if not self.model_config.enforce_eager and not current_platform.is_rocm():
+            if (
+                current_platform.is_cuda()
+                and self.vllm_config.compilation_config.cudagraph_mode
+                != CUDAGraphMode.NONE
+            ):
                 cudagraph_memory_estimate = self.model_runner.profile_cudagraph_memory()
 
         # Use the pre-cudagraph torch peak to avoid double-counting.
@@ -436,7 +466,6 @@ def determine_available_memory(self) -> int:
         logger.info_once(
             "Available KV cache memory: %s GiB",
             format_gib(self.available_kv_cache_memory_bytes),
-            scope="local",
         )
 
         if cudagraph_memory_estimate > 0:
@@ -450,14 +479,13 @@ def determine_available_memory(self) -> int:
                     1.0,
                 )
                 logger.info(
-                    "CUDA graph memory profiling is enabled "
-                    "(VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1). "
-                    "This will become the default in v0.19. "
-                    "The current --gpu-memory-utilization=%.4f is equivalent "
-                    "to --gpu-memory-utilization=%.4f without CUDA graph "
-                    "memory profiling. To maintain the same effective KV "
-                    "cache size as before, increase "
-                    "--gpu-memory-utilization to %.4f.",
+                    "CUDA graph memory profiling is enabled (default since "
+                    "v0.21.0). The current --gpu-memory-utilization=%.4f is "
+                    "equivalent to --gpu-memory-utilization=%.4f without "
+                    "CUDA graph memory profiling. To maintain the same "
+                    "effective KV cache size as before, increase "
+                    "--gpu-memory-utilization to %.4f. To disable, set "
+                    "VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0.",
                     current_util,
                     equiv_util,
                     suggested_util,
@@ -467,14 +495,14 @@ def determine_available_memory(self) -> int:
                     round(current_util + cg_util_delta, 4),
                     1.0,
                 )
-                logger.info(
-                    "In v0.19, CUDA graph memory profiling will be enabled "
-                    "by default (VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1), "
-                    "which more accurately accounts for CUDA graph memory "
-                    "during KV cache allocation. To try it now, set "
-                    "VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1 and increase "
-                    "--gpu-memory-utilization from %.4f to %.4f to maintain "
-                    "the same effective KV cache size.",
+                logger.warning(
+                    "CUDA graph memory profiling is disabled "
+                    "(VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0). "
+                    "Without it, CUDA graph memory is not accounted for "
+                    "during KV cache allocation, which may require lowering "
+                    "--gpu-memory-utilization to avoid OOM. Consider "
+                    "re-enabling it (the default as of v0.21.0) and increasing "
+                    "--gpu-memory-utilization from %.4f to %.4f.",
                     current_util,
                     suggested_util,
                 )
@@ -526,13 +554,7 @@ def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
         # related to kv cache connector (e.g. kv cache sharing layers).
         ensure_kv_transfer_initialized(self.vllm_config, kv_cache_config)
 
-        if self.vllm_config.model_config.enable_sleep_mode:
-            from vllm.device_allocator.cumem import CuMemAllocator
-
-            allocator = CuMemAllocator.get_instance()
-            with allocator.use_memory_pool(tag="kv_cache"):
-                self.model_runner.initialize_kv_cache(kv_cache_config)
-        else:
+        with self._maybe_get_memory_pool_context(tag="kv_cache"):
             self.model_runner.initialize_kv_cache(kv_cache_config)
 
         if self.model_config.enable_return_routed_experts:
@@ -547,7 +569,7 @@ def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
             self.model_runner._init_kv_zero_meta()
 
     @instrument(span_name="Warmup (GPU)")
-    def compile_or_warm_up_model(self) -> float:
+    def compile_or_warm_up_model(self) -> CompilationTimes:
         warmup_sizes: list[int] = []
 
         if self.vllm_config.compilation_config.mode == CompilationMode.VLLM_COMPILE:
@@ -689,7 +711,18 @@ def compile_or_warm_up_model(self) -> float:
         # the model initialization and profiling.
         set_random_seed(self.model_config.seed)
 
-        return self.compilation_config.compilation_time
+        # All warmup is done — start monitoring for unexpected JIT
+        # compilations that would cause latency spikes during inference.
+        from vllm.triton_utils.jit_monitor import (
+            activate as activate_triton_jit_monitor,
+        )
+
+        activate_triton_jit_monitor()
+
+        return CompilationTimes(
+            language_model=self.compilation_config.compilation_time,
+            encoder=self.compilation_config.encoder_compilation_time,
+        )
 
     def reset_mm_cache(self) -> None:
         self.model_runner.reset_mm_cache()
@@ -703,6 +736,11 @@ def get_model(self) -> nn.Module:
     def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
         return self.model_runner.get_supported_tasks()
 
+    def get_compilation_match_table(self) -> dict[str, int]:
+        from vllm.compilation.passes.vllm_inductor_pass import get_match_table
+
+        return get_match_table()
+
     def get_encoder_timing_stats(self) -> dict[str, dict[str, float | int]]:
         """Get encoder timing stats from model runner."""
         return self.model_runner.get_encoder_timing_stats()
@@ -928,6 +966,13 @@ def save_tensorized_model(self, tensorizer_config: "TensorizerConfig") -> None:
             model_config=self.model_config,
         )
 
+    def _check_weight_transfer_engine(self) -> None:
+        if self.weight_transfer_engine is None:
+            raise RuntimeError(
+                "Weight transfer not configured. "
+                "Please set weight_transfer_config to enable weight transfer."
+            )
+
     def init_weight_transfer_engine(self, init_info: dict) -> None:
         """
         Initialize weight transfer mechanism.
@@ -936,26 +981,62 @@ def init_weight_transfer_engine(self, init_info: dict) -> None:
         Args:
             init_info: Dictionary containing backend-specific initialization info
         """
-        if self.weight_transfer_engine is None:
-            raise RuntimeError(
-                "Weight transfer not configured. "
-                "Please set weight_transfer_config to enable weight transfer."
-            )
+        self._check_weight_transfer_engine()
+        assert self.weight_transfer_engine is not None
         # Parse dict into backend-specific typed dataclass
         typed_init_info = self.weight_transfer_engine.parse_init_info(init_info)
         self.weight_transfer_engine.init_transfer_engine(typed_init_info)
 
+    def start_weight_update(self, is_checkpoint_format: bool = True) -> None:
+        """
+        Start a new weight update.
+
+        Prepares the model for receiving weights. For checkpoint format,
+        this initializes state for layerwise processing. For kernel format, this is
+        a no-op but must still be called for consistency.
+
+        Args:
+            is_checkpoint_format: Whether incoming weights are in checkpoint
+                format (need layerwise processing) or kernel format (direct
+                copy). Stored as state for finish_weight_update.
+        """
+        self._check_weight_transfer_engine()
+
+        if self._weight_update_active:
+            raise RuntimeError(
+                "start_weight_update called while a weight update is "
+                "already active. Call finish_weight_update first."
+            )
+
+        if is_checkpoint_format:
+            from vllm.model_executor.model_loader.reload import (
+                initialize_layerwise_reload,
+            )
+
+            model = self.model_runner.model
+            with torch.device(self.device):
+                initialize_layerwise_reload(model)
+
+        # Store state so update_weights/finish_weight_update can check
+        self._is_checkpoint_format = is_checkpoint_format
+        self._weight_update_active = True
+
     def update_weights(self, update_info: dict) -> None:
         """
-        Batched weight update from the trainer.
+        Receive weights from the trainer (one or more chunks).
+
+        start_weight_update must be called before update_weights and
+        finish_weight_update must be called after.
 
         Args:
             update_info: Dictionary containing backend-specific update info
         """
-        if self.weight_transfer_engine is None:
+        self._check_weight_transfer_engine()
+        assert self.weight_transfer_engine is not None
+
+        if not self._weight_update_active:
             raise RuntimeError(
-                "Weight transfer not configured. "
-                "Please set weight_transfer_config to enable weight transfer."
+                "start_weight_update must be called before update_weights."
             )
 
         # Parse dict into backend-specific typed dataclass
@@ -963,38 +1044,59 @@ def update_weights(self, update_info: dict) -> None:
 
         model = self.model_runner.model
 
-        if typed_update_info.is_checkpoint_format:
-            from vllm.model_executor.model_loader.reload import (
-                finalize_layerwise_reload,
-                initialize_layerwise_reload,
-            )
-
-            # Use layerwise reload pattern for checkpoint format weights
-            with torch.device(self.device):
-                initialize_layerwise_reload(model)
+        with torch.device(self.device):
+            if self._is_checkpoint_format:
                 self.weight_transfer_engine.receive_weights(
                     typed_update_info,
                     load_weights=model.load_weights,
                 )
-                finalize_layerwise_reload(model, self.model_config)
-        else:
-            # Weights are already in kernel format, copy directly
-            def load_weights_direct(
-                weights: list[tuple[str, torch.Tensor]],
-            ) -> None:
-                for name, weight in weights:
-                    param = model.get_parameter(name)
-                    param.copy_(weight)
-
-            self.weight_transfer_engine.receive_weights(
-                typed_update_info,
-                load_weights=load_weights_direct,
-            )
+            else:
+                # Weights are already in kernel format, copy directly
+                def load_weights_direct(
+                    weights: list[tuple[str, torch.Tensor]],
+                ) -> None:
+                    for name, weight in weights:
+                        param = model.get_parameter(name)
+                        param.copy_(weight)
+
+                self.weight_transfer_engine.receive_weights(
+                    typed_update_info,
+                    load_weights=load_weights_direct,
+                )
 
         # NCCL broadcast/packed path are asynchronous.
         # Sync here so the next step uses the new weights.
         torch.accelerator.synchronize()
 
+    def finish_weight_update(self) -> None:
+        """
+        Finish the current weight update.
+
+        For checkpoint format, this runs layerwise postprocessing.
+        Uses the is_checkpoint_format state stored by start_weight_update.
+        """
+        self._check_weight_transfer_engine()
+
+        if not self._weight_update_active:
+            raise RuntimeError(
+                "start_weight_update must be called before finish_weight_update."
+            )
+
+        is_checkpoint_format = self._is_checkpoint_format
+
+        if is_checkpoint_format:
+            from vllm.model_executor.model_loader.reload import (
+                finalize_layerwise_reload,
+            )
+
+            model = self.model_runner.model
+            with torch.device(self.device):
+                finalize_layerwise_reload(model, self.model_config)
+
+        # Reset state
+        self._weight_update_active = False
+        self._is_checkpoint_format = True
+
     def shutdown(self) -> None:
         # has_kv_transfer_group can be None during interpreter shutdown.
         if ensure_kv_transfer_shutdown is not None:
@@ -1005,6 +1107,11 @@ def shutdown(self) -> None:
         if weight_transfer_engine := getattr(self, "weight_transfer_engine", None):
             weight_transfer_engine.shutdown()
 
+        # Release GPU resources held by the model runner so that memory
+        # can be reclaimed when running in-process
+        if model_runner := getattr(self, "model_runner", None):
+            model_runner.shutdown()
+
     def elastic_ep_execute(self, execute_method: str, *args, **kwargs):
         return self.elastic_ep_executor.execute(execute_method, *args, **kwargs)
 
@@ -1017,11 +1124,10 @@ def init_worker_distributed_environment(
     backend: str = "nccl",
 ) -> None:
     """Initialize the distributed environment."""
-    attention_config = vllm_config.attention_config
     parallel_config = vllm_config.parallel_config
     from vllm.model_executor.layers.batch_invariant import init_batch_invariance
 
-    init_batch_invariance(attention_config.backend)
+    init_batch_invariance()
     override_envs_for_eplb(parallel_config)
     set_custom_all_reduce(not parallel_config.disable_custom_all_reduce)
 
diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py
index bc243906b22a..4fc1aff94fed 100644
--- a/vllm/v1/worker/kv_connector_model_runner_mixin.py
+++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py
@@ -13,11 +13,7 @@
 
 from vllm.config import VllmConfig
 from vllm.config.cache import CacheDType
-from vllm.distributed.kv_transfer import (
-    ensure_kv_transfer_shutdown,
-    get_kv_transfer_group,
-    has_kv_transfer_group,
-)
+from vllm.distributed.kv_transfer import get_kv_transfer_group, has_kv_transfer_group
 from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
 from vllm.forward_context import get_forward_context, set_forward_context
 from vllm.logger import init_logger
@@ -38,12 +34,6 @@
 
 # Defined as a kv connector functionality mixin for ModelRunner (GPU, TPU)
 class KVConnectorModelRunnerMixin:
-    @staticmethod
-    def ensure_kv_transfer_shutdown() -> None:
-        # has_kv_transfer_group can be None during interpreter shutdown.
-        if has_kv_transfer_group and has_kv_transfer_group():  # type: ignore[truthy-function]
-            ensure_kv_transfer_shutdown()
-
     @staticmethod
     def kv_connector_no_forward(
         scheduler_output: "SchedulerOutput", vllm_config: VllmConfig
diff --git a/vllm/v1/worker/lora_model_runner_mixin.py b/vllm/v1/worker/lora_model_runner_mixin.py
index 53873d156f88..3a14abfc3589 100644
--- a/vllm/v1/worker/lora_model_runner_mixin.py
+++ b/vllm/v1/worker/lora_model_runner_mixin.py
@@ -101,9 +101,12 @@ def maybe_setup_dummy_loras(
             assert self.lora_manager is not None, "LoRA is not enabled"
 
             num_loras = lora_config.max_loras
-            lora_warmup_rank = (
+            lora_warmup_rank: int = (
                 lora_config.max_lora_rank if lora_config.max_lora_rank < 8 else 8
             )
+            lora_warmup_rank = self.lora_manager.get_dummy_lora_warmup_rank(
+                lora_warmup_rank
+            )
             # Make dummy lora requests
             lora_requests: set[LoRARequest] = {
                 LoRARequest(
diff --git a/vllm/v1/worker/mamba_utils.py b/vllm/v1/worker/mamba_utils.py
index c832389b1b0a..485b274eabdf 100644
--- a/vllm/v1/worker/mamba_utils.py
+++ b/vllm/v1/worker/mamba_utils.py
@@ -10,6 +10,8 @@
 from vllm.config import CacheConfig
 from vllm.model_executor.layers.mamba.mamba_utils import (
     MambaStateCopyFunc,
+    get_conv_copy_spec,
+    get_temporal_copy_spec,
 )
 from vllm.triton_utils import tl, triton
 from vllm.utils.math_utils import cdiv
@@ -20,6 +22,157 @@
 from vllm.v1.worker.lora_model_runner_mixin import GPUInputBatch
 
 
+@triton.jit
+def postprocess_mamba_fused_kernel(
+    # Decision inputs (per-request)
+    num_accepted_tokens_ptr,
+    mamba_state_idx_ptr,
+    num_scheduled_tokens_ptr,
+    num_computed_tokens_ptr,
+    num_draft_tokens_ptr,
+    # Per-group block table base addresses: int64[num_groups]. Each entry is
+    # the data_ptr of that group's persistent [max_reqs, max_blocks] int32
+    # block table.
+    block_table_ptrs_ptr,
+    block_table_stride_req: tl.int64,  # stride between requests (in elements)
+    # Mamba state metadata (per-layer, per-state-type)
+    # These are 1D arrays indexed by (layer_idx * num_state_types + state_type_idx)
+    state_base_addrs_ptr,  # base address of each state tensor
+    state_block_strides_ptr,  # bytes per block for each state
+    state_elem_sizes_ptr,  # element size for each state
+    state_inner_sizes_ptr,  # number of elements in inner dimensions
+    state_conv_widths_ptr,  # conv width for conv states (0 for temporal)
+    state_group_indices_ptr,  # maps state_idx to group index in block table
+    # Output: num_accepted_tokens update (for src==dst case)
+    num_accepted_tokens_out_ptr,
+    # Runtime parameter (varies per batch - NOT constexpr to avoid recompilation)
+    num_reqs,
+    # Compile-time constants (fixed after model initialization)
+    # block_size: determined by model config, constant for all invocations
+    block_size: tl.constexpr,
+    # COPY_BLOCK_SIZE: fixed tuning parameter for memory copy loop
+    COPY_BLOCK_SIZE: tl.constexpr,
+):
+    """
+    Fused GPU kernel for postprocess_mamba that computes decisions AND performs
+    mamba state copies without any CPU-GPU synchronization.
+
+    Grid: (num_reqs, num_layers * num_state_types)
+    - program_id(0) = request index
+    - program_id(1) = state_idx (flattened index into layer/state_type metadata)
+
+    Note: num_layers and num_state_types are not passed as kernel parameters
+    because the kernel indexes directly into pre-flattened metadata arrays
+    using program_id(1). The grid dimensions encode the total state count.
+    """
+    req_idx = tl.program_id(0)
+    state_idx = tl.program_id(1)
+
+    # Bounds check
+    if req_idx >= num_reqs:
+        return
+
+    # Compute decision logic (mirrors postprocess_mamba Python reference)
+    num_accepted = tl.load(num_accepted_tokens_ptr + req_idx)
+    src_block_idx = tl.load(mamba_state_idx_ptr + req_idx)
+    num_scheduled = tl.load(num_scheduled_tokens_ptr + req_idx)
+    num_computed = tl.load(num_computed_tokens_ptr + req_idx)
+    num_draft = tl.load(num_draft_tokens_ptr + req_idx)
+
+    num_tokens_running_state = num_computed + num_scheduled - num_draft
+    new_num_computed = num_tokens_running_state + num_accepted - 1
+    aligned_new_computed = (new_num_computed // block_size) * block_size
+
+    needs_copy = aligned_new_computed >= num_tokens_running_state
+
+    if not needs_copy:
+        return
+
+    # Compute copy parameters
+    accept_token_bias = aligned_new_computed - num_tokens_running_state
+    dest_block_idx = aligned_new_computed // block_size - 1
+
+    # Load state metadata for this layer/state_type
+    state_base_addr = tl.load(state_base_addrs_ptr + state_idx)
+    state_block_stride = tl.load(state_block_strides_ptr + state_idx)
+    state_elem_size = tl.load(state_elem_sizes_ptr + state_idx)
+    state_inner_size = tl.load(state_inner_sizes_ptr + state_idx)
+    conv_width = tl.load(state_conv_widths_ptr + state_idx)
+
+    # Load the group index for this state, then index into the correct
+    # group's block table. Each mamba group has independently allocated
+    # physical blocks.
+    group_idx = tl.load(state_group_indices_ptr + state_idx).to(tl.int64)
+
+    # block_table_ptrs_ptr holds one pointer per group (each group owns its own
+    # block table). Reinterpret as int32* since block ids are int32.
+    group_base_addr = tl.load(block_table_ptrs_ptr + group_idx)
+    block_table_typed = group_base_addr.to(tl.pointer_type(tl.int32))
+    block_table_base = block_table_typed + req_idx * block_table_stride_req
+
+    # Widen block ids to int64 before they reach `block_id * state_block_stride`
+    # below: state_block_stride can exceed 2**31 bytes for large mamba caches,
+    # and Triton would otherwise do the multiply in int32 and wrap.
+    src_block_id = tl.load(block_table_base + src_block_idx).to(tl.int64)
+    dest_block_id = tl.load(block_table_base + dest_block_idx).to(tl.int64)
+
+    # Compute source and destination addresses based on state type
+    # conv_width > 0 means this is a conv state (get_conv_copy_spec logic)
+    # conv_width == 0 means this is a temporal state (get_temporal_copy_spec logic)
+    is_conv_state = conv_width > 0
+
+    if is_conv_state:
+        # Conv state: copy
+        #   state[block_table[req_idx, src_block_idx],  accept_token_bias:]
+        # to
+        #   state[block_table[req_idx, dest_block_idx], :conv_width - accept_token_bias]
+        src_offset = accept_token_bias.to(tl.int64) * state_inner_size * state_elem_size
+        src_addr = state_base_addr + src_block_id * state_block_stride + src_offset
+        dst_addr = state_base_addr + dest_block_id * state_block_stride
+        # Number of elements to copy:
+        # (conv_width - accept_token_bias) * inner_size
+        num_elems_to_copy = (conv_width - accept_token_bias).to(
+            tl.int64
+        ) * state_inner_size
+        copy_size = num_elems_to_copy * state_elem_size
+    else:
+        # Temporal state: copy
+        #   state[block_table[req_idx, src_block_idx + accept_token_bias]]
+        # to
+        #   state[block_table[req_idx, dest_block_idx]]
+        actual_src_block_idx = src_block_idx + accept_token_bias
+        actual_src_block_id = tl.load(block_table_base + actual_src_block_idx).to(
+            tl.int64
+        )
+        src_addr = state_base_addr + actual_src_block_id * state_block_stride
+        dst_addr = state_base_addr + dest_block_id * state_block_stride
+        # Use natural block data size (inner_size * elem_size), NOT
+        # state_block_stride which is the page stride and can exceed the
+        # actual data when the state tensor uses as_strided page padding.
+        copy_size = state_inner_size * state_elem_size
+
+    # Mirror postprocess_mamba's trailing
+    #     if src_block_idx == dest_block_idx: num_accepted_tokens_cpu[i] = 1
+    # This runs whether or not the copy below is skipped (it's per-request, so
+    # only state_idx == 0 writes).
+    if src_block_idx == dest_block_idx and state_idx == 0:
+        tl.store(num_accepted_tokens_out_ptr + req_idx, 1)
+
+    # Mirror collect_mamba_copy_meta's early return: src==dst with no token
+    # bias means source and destination ranges coincide, so the copy is a
+    # no-op.
+    if src_block_idx == dest_block_idx and accept_token_bias == 0:
+        return
+
+    offsets = tl.arange(0, COPY_BLOCK_SIZE)
+    for i in range(0, copy_size, COPY_BLOCK_SIZE):
+        mask = (i + offsets) < copy_size
+        curr_src = (src_addr + i + offsets).to(tl.pointer_type(tl.uint8))
+        curr_dst = (dst_addr + i + offsets).to(tl.pointer_type(tl.uint8))
+        data = tl.load(curr_src, mask=mask)
+        tl.store(curr_dst, data, mask=mask)
+
+
 @triton.jit
 def batch_memcpy_kernel(src_ptrs, dst_ptrs, sizes, BLOCK_SIZE: tl.constexpr):
     pid = tl.program_id(0)
@@ -94,6 +247,328 @@ def create(
         )
 
 
+@dataclasses.dataclass
+class MambaSpecDecodeGPUContext:
+    """
+    Context for GPU-side Mamba state copy operations during the
+    fused postprocess path.
+
+    Only used when speculative decoding is enabled on a hybrid model
+    (and the mamba_cache_config is in align mode).
+
+    Precomputes memory layout metadata (base addresses, strides, element sizes)
+    so the GPU kernel can perform state copies without CPU-GPU sync.
+
+    State types are distinguished by conv_width: >0 for conv states (sliding
+    window with offset-based copies), 0 for temporal states (full block copies).
+    """
+
+    # Per-state metadata tensors (shape: [num_layers * num_state_types])
+    # These are populated from forward_context during the first forward pass
+    state_base_addrs: torch.Tensor  # int64: base address of each state tensor
+    state_block_strides: torch.Tensor  # int64: bytes per block
+    state_elem_sizes: torch.Tensor  # int32: element size in bytes
+    state_inner_sizes: torch.Tensor  # int64: elements in inner dimensions
+    state_conv_widths: torch.Tensor  # int32: conv width (0 for temporal states)
+    state_group_indices: torch.Tensor  # int32: maps state_idx to group index
+
+    # Configuration
+    block_size: int
+    num_layers: int
+    num_state_types: int
+    mamba_group_ids: list[int]
+    num_groups: int
+
+    # Output buffer for num_accepted_tokens updates
+    num_accepted_tokens_out: torch.Tensor
+
+    # Per-group block-table base addresses: int64[num_groups]. Populated in
+    # initialize_from_forward_context from the persistent per-group block
+    # table tensors (whose data_ptr is stable across steps).
+    block_table_ptrs: torch.Tensor
+    block_table_stride_req: int = 0
+
+    # Per-request staging buffers (CPU+GPU mirrors). The runner stages
+    # values into the CPU view in ``_prepare_inputs`` and the fused kernel
+    # reads the GPU side. These only exist when the postprocess kernel is
+    # enabled (spec decode + hybrid + align mode).
+    mamba_state_idx_buf: CpuGpuBuffer | None = None
+    num_scheduled_tokens_buf: CpuGpuBuffer | None = None
+    num_computed_tokens_buf: CpuGpuBuffer | None = None
+    num_draft_tokens_buf: CpuGpuBuffer | None = None
+
+    # Flag to track if metadata has been populated
+    is_initialized: bool = False
+
+    @classmethod
+    def create(
+        cls,
+        max_num_reqs: int,
+        kv_cache_config: KVCacheConfig,
+        num_state_types: int,
+        device: torch.device,
+        make_buffer: Callable[..., CpuGpuBuffer],
+    ) -> "MambaSpecDecodeGPUContext":
+        """Create context with allocated buffers (metadata populated later)."""
+        mamba_group_ids, mamba_spec = get_mamba_groups(kv_cache_config)
+
+        # Count total layers across all mamba groups
+        num_layers = sum(
+            len(kv_cache_config.kv_cache_groups[gid].layer_names)
+            for gid in mamba_group_ids
+        )
+        total_states = num_layers * num_state_types
+
+        return cls(
+            state_base_addrs=torch.zeros(
+                total_states, dtype=torch.int64, device=device
+            ),
+            state_block_strides=torch.zeros(
+                total_states, dtype=torch.int64, device=device
+            ),
+            state_elem_sizes=torch.zeros(
+                total_states, dtype=torch.int32, device=device
+            ),
+            state_inner_sizes=torch.zeros(
+                total_states, dtype=torch.int64, device=device
+            ),
+            state_conv_widths=torch.zeros(
+                total_states, dtype=torch.int32, device=device
+            ),
+            state_group_indices=torch.zeros(
+                total_states, dtype=torch.int32, device=device
+            ),
+            block_size=mamba_spec.block_size,
+            num_layers=num_layers,
+            num_state_types=num_state_types,
+            mamba_group_ids=mamba_group_ids,
+            num_groups=len(mamba_group_ids),
+            num_accepted_tokens_out=torch.zeros(
+                max_num_reqs, dtype=torch.int32, device=device
+            ),
+            block_table_ptrs=torch.zeros(
+                len(mamba_group_ids), dtype=torch.int64, device=device
+            ),
+            mamba_state_idx_buf=make_buffer(max_num_reqs, dtype=torch.int32),
+            num_scheduled_tokens_buf=make_buffer(max_num_reqs, dtype=torch.int32),
+            num_computed_tokens_buf=make_buffer(max_num_reqs, dtype=torch.int32),
+            num_draft_tokens_buf=make_buffer(max_num_reqs, dtype=torch.int32),
+            is_initialized=False,
+        )
+
+    def initialize_from_forward_context(
+        self,
+        kv_cache_config: KVCacheConfig,
+        forward_context: dict[str, Any],
+        mamba_state_copy_funcs: tuple[MambaStateCopyFunc, ...],
+        block_tables: list[torch.Tensor],
+    ) -> None:
+        """
+        Extract and cache memory layout metadata from Mamba state tensors.
+
+        This method populates the pre-allocated metadata tensors with information
+        needed by `postprocess_mamba_fused_kernel` to perform state copies entirely
+        on the GPU without CPU-GPU synchronization.
+
+        For each Mamba layer and state type, the following metadata is extracted:
+        - state_base_addrs: GPU memory address (data_ptr) of the state tensor
+        - state_block_strides: Bytes between consecutive blocks (stride * elem_size)
+        - state_elem_sizes: Element size in bytes (e.g., 2 for float16)
+        - state_inner_sizes: For conv states, elements per conv position (stride(1)),
+          used to compute offset when slicing state[block, offset:]. For temporal
+          states, this field is unused (set to 1).
+        - state_conv_widths: Conv dimension size for conv states, 0 for temporal states
+
+        The conv vs temporal state type is detected by inspecting the copy function
+        name: functions containing "conv" are treated as conv states.
+
+        This method is idempotent - it only executes once (guarded by is_initialized
+        flag) since the metadata is static after model loading.
+
+        Args:
+            kv_cache_config: Configuration containing KV cache group info and
+                layer name mappings.
+            forward_context: Dictionary mapping layer names to attention objects,
+                populated after the model is loaded. Each attention object must
+                have a `kv_cache` attribute containing the list of state tensors.
+            mamba_state_copy_funcs: Tuple of copy functions (one per state type)
+                used to determine whether each state is a conv or temporal state.
+            block_tables: per-mamba-group persistent block-table tensors, in
+                the same order as `mamba_group_ids`. Their `data_ptr()` /
+                `stride(0)` are captured once for the kernel to index into.
+        """
+        if self.is_initialized:
+            return
+
+        idx = 0
+        for group_local_idx, mamba_group_id in enumerate(self.mamba_group_ids):
+            layer_names = kv_cache_config.kv_cache_groups[mamba_group_id].layer_names
+            for layer_name in layer_names:
+                attention = forward_context[layer_name]
+                kv_caches: list[torch.Tensor] = attention.kv_cache
+
+                for state_type_idx, state in enumerate(kv_caches):
+                    # Base address
+                    self.state_base_addrs[idx] = state.data_ptr()
+
+                    # Block stride (bytes between consecutive blocks)
+                    # state shape: [num_blocks, ...], stride(0) = elements per block
+                    if state.dim() > 1:
+                        block_stride_elems = state.stride(0)
+                    else:
+                        block_stride_elems = state.numel()
+                    self.state_block_strides[idx] = (
+                        block_stride_elems * state.element_size()
+                    )
+
+                    # Element size
+                    self.state_elem_sizes[idx] = state.element_size()
+
+                    copy_func = mamba_state_copy_funcs[state_type_idx]
+                    assert (
+                        copy_func is get_conv_copy_spec
+                        or copy_func is get_temporal_copy_spec
+                    ), f"unexpected copy func: {copy_func}"
+                    if copy_func is get_conv_copy_spec:
+                        # Conv state: conv_width is state.size(1)
+                        # inner_size is stride(1) = elements per conv position,
+                        # used to compute byte offset for state[block, offset:]
+                        conv_w = state.size(1) if state.dim() > 1 else 0
+                        self.state_conv_widths[idx] = conv_w
+                        if state.dim() > 2:
+                            # stride(1) = product of dims[2:] for contiguous tensor
+                            self.state_inner_sizes[idx] = state.stride(1)
+                        else:
+                            # 2D tensor: [num_blocks, conv_dim], no inner dims
+                            self.state_inner_sizes[idx] = 1
+                    else:
+                        # Temporal state: inner_size = natural elements per
+                        # block (prod of inner dims).  The kernel uses this
+                        # to compute copy_size = inner_size * elem_size,
+                        # which gives the correct byte count even when the
+                        # state tensor is as_strided with padded page strides
+                        # (state_block_stride would be the page size, too big).
+                        self.state_conv_widths[idx] = 0
+                        self.state_inner_sizes[idx] = (
+                            state[0].numel() if state.dim() > 1 else 1
+                        )
+
+                    self.state_group_indices[idx] = group_local_idx
+                    idx += 1
+
+        # Cache per-group block-table base addresses and per-request stride.
+        # `block_tables[i]` is the persistent 2D int32 block-table tensor for
+        # `mamba_group_ids[i]`; `data_ptr()` / `stride(0)` are stable for the
+        # engine's lifetime, so we capture them once here.
+        assert len(block_tables) == self.num_groups, (
+            f"expected {self.num_groups} block tables, got {len(block_tables)}"
+        )
+        strides = {bt.stride(0) for bt in block_tables}
+        assert len(strides) == 1, (
+            f"all mamba block tables must share stride(0), got {strides}"
+        )
+        self.block_table_stride_req = int(next(iter(strides)))
+        for i, bt in enumerate(block_tables):
+            self.block_table_ptrs[i] = bt.data_ptr()
+
+        self.is_initialized = True
+
+    def run_fused_postprocess(
+        self,
+        num_reqs: int,
+        num_accepted_tokens_gpu: torch.Tensor,
+        mamba_state_idx_gpu: torch.Tensor,
+        num_scheduled_tokens_gpu: torch.Tensor,
+        num_computed_tokens_gpu: torch.Tensor,
+        num_draft_tokens_gpu: torch.Tensor,
+    ) -> None:
+        """
+        Run the fused postprocess_mamba kernel on GPU.
+
+        This computes decisions and performs mamba state copies entirely on GPU,
+        eliminating the CPU-GPU sync that was previously needed.
+
+        Args:
+            num_reqs: Number of active requests
+            num_accepted_tokens_gpu: [num_reqs] accepted token counts
+            mamba_state_idx_gpu: [num_reqs] source block indices
+            num_scheduled_tokens_gpu: [num_reqs] scheduled token counts
+            num_computed_tokens_gpu: [num_reqs] computed token counts
+            num_draft_tokens_gpu: [num_reqs] draft token counts
+        """
+        if num_reqs == 0 or not self.is_initialized:
+            return
+
+        # Initialize output to current values (unchanged unless src==dst)
+        self.num_accepted_tokens_out[:num_reqs].copy_(
+            num_accepted_tokens_gpu[:num_reqs]
+        )
+
+        total_states = self.num_layers * self.num_state_types
+        grid = (num_reqs, total_states)
+
+        postprocess_mamba_fused_kernel[grid](
+            num_accepted_tokens_gpu,
+            mamba_state_idx_gpu,
+            num_scheduled_tokens_gpu,
+            num_computed_tokens_gpu,
+            num_draft_tokens_gpu,
+            self.block_table_ptrs,
+            self.block_table_stride_req,
+            self.state_base_addrs,
+            self.state_block_strides,
+            self.state_elem_sizes,
+            self.state_inner_sizes,
+            self.state_conv_widths,
+            self.state_group_indices,
+            self.num_accepted_tokens_out,
+            num_reqs,
+            block_size=self.block_size,
+            COPY_BLOCK_SIZE=1024,
+        )
+
+
+@dataclasses.dataclass
+class MambaBuffers:
+    """Single owner for all mamba-specific runner buffers.
+
+    The two sub-objects have different gates:
+    ``preprocess`` is needed whenever ``mamba_cache_mode == "align"``;
+    ``postprocess_align`` is needed only when align is combined with
+    speculative decoding on a hybrid model, and is ``None`` otherwise.
+    """
+
+    preprocess: MambaCopyBuffers
+    postprocess_align: MambaSpecDecodeGPUContext | None
+
+    @classmethod
+    def create(
+        cls,
+        max_num_reqs: int,
+        kv_cache_config: KVCacheConfig,
+        copy_funcs: tuple[MambaStateCopyFunc, ...],
+        make_buffer: Callable[..., CpuGpuBuffer],
+        device: torch.device,
+        with_postprocess_align: bool,
+    ) -> "MambaBuffers":
+        return cls(
+            preprocess=MambaCopyBuffers.create(
+                max_num_reqs, kv_cache_config, copy_funcs, make_buffer
+            ),
+            postprocess_align=(
+                MambaSpecDecodeGPUContext.create(
+                    max_num_reqs=max_num_reqs,
+                    kv_cache_config=kv_cache_config,
+                    num_state_types=len(copy_funcs),
+                    device=device,
+                    make_buffer=make_buffer,
+                )
+                if with_postprocess_align
+                else None
+            ),
+        )
+
+
 def collect_mamba_copy_meta(
     copy_bufs: MambaCopyBuffers,
     kv_cache_config: KVCacheConfig,
@@ -144,6 +619,24 @@ def do_mamba_copy_block(copy_bufs: MambaCopyBuffers):
     )
 
 
+def cleanup_mamba_state_idx(
+    scheduler_output: SchedulerOutput,
+    mamba_state_idx: dict[str, int],
+) -> None:
+    """Pop stale `mamba_state_idx` entries for finished/preempted/resumed reqs.
+
+    Force-preempted requests (e.g., during reset_prefix_cache / KV cache
+    flush) appear in resumed_req_ids without a corresponding entry in
+    preempted_req_ids, leaving stale entries that can point to block
+    indices beyond the new (smaller) block allocation.
+    """
+    finished_req_ids = scheduler_output.finished_req_ids
+    preempted_req_ids = scheduler_output.preempted_req_ids or set()
+    resumed_req_ids = scheduler_output.scheduled_cached_reqs.resumed_req_ids
+    for req_id in itertools.chain(finished_req_ids, preempted_req_ids, resumed_req_ids):
+        mamba_state_idx.pop(req_id, None)
+
+
 def preprocess_mamba(
     scheduler_output: SchedulerOutput,
     kv_cache_config: KVCacheConfig,
@@ -165,16 +658,7 @@ def preprocess_mamba(
     # TODO(Chen): we need to optimize this function a lot
     assert cache_config.enable_prefix_caching
     block_size = mamba_spec.block_size
-    finished_req_ids = scheduler_output.finished_req_ids
-    preempted_req_ids = scheduler_output.preempted_req_ids or set()
-    # We need to clear mamba_state_idx for resumed requests. When requests are
-    # force-preempted (e.g., during reset_prefix_cache / KV cache flush),
-    # they appear in resumed_req_ids without a corresponding entry in
-    # preempted_req_ids, leaving stale mamba_state_idx entries that can
-    # point to block indices beyond the new (smaller) block allocation.
-    resumed_req_ids = scheduler_output.scheduled_cached_reqs.resumed_req_ids
-    for req_id in itertools.chain(finished_req_ids, preempted_req_ids, resumed_req_ids):
-        mamba_state_idx.pop(req_id, None)
+    cleanup_mamba_state_idx(scheduler_output, mamba_state_idx)
 
     copy_bufs.offset = 0
     for i, req_id in enumerate(input_batch.req_ids):
@@ -219,55 +703,198 @@ def preprocess_mamba(
     do_mamba_copy_block(copy_bufs)
 
 
-def postprocess_mamba(
+def postprocess_mamba_all(
     scheduler_output: SchedulerOutput,
     kv_cache_config: KVCacheConfig,
     input_batch: GPUInputBatch,
     requests: dict[str, CachedRequestState],
     mamba_state_idx: dict[str, int],
-    forward_context: dict[str, Any],
-    mamba_state_copy_funcs: tuple[MambaStateCopyFunc, ...],
-    copy_bufs: MambaCopyBuffers,
+    num_spec_tokens: int,
+    num_reqs: int,
 ):
+    """All-mode postprocess (only meaningful with num_spec_tokens > 0):
+    record per-request the block index of the last token scheduled this
+    step, so the next step can anchor its in-place writes when accepted
+    drafts leave the sequence at a non-block-aligned position.
     """
-    If a blocks is converted from partial block to full block in this step, copy the
-    state from the block for running state to the new full block.
+    if num_spec_tokens <= 0:
+        return
+    _, mamba_spec = get_mamba_groups(kv_cache_config)
+    block_size = mamba_spec.block_size
+    full_decode_len = 1 + num_spec_tokens
+    scheduled = scheduler_output.num_scheduled_tokens
+    for req_id in input_batch.req_ids[:num_reqs]:
+        num_query = scheduled.get(req_id, 0)
+        if num_query == full_decode_len:
+            req = requests[req_id]
+            seq_len = req.num_computed_tokens + num_query
+            mamba_state_idx[req_id] = max(0, (seq_len - 1) // block_size)
+        else:
+            mamba_state_idx.pop(req_id, None)
+
+
+def preprocess_mamba_all_specdec(
+    scheduler_output: SchedulerOutput,
+    input_batch: GPUInputBatch,
+    mamba_state_idx: dict[str, int],
+    num_reqs: int,
+    prev_last_scheduled_idx_buf: CpuGpuBuffer,
+) -> None:
+    cleanup_mamba_state_idx(scheduler_output, mamba_state_idx)
+    np_view = prev_last_scheduled_idx_buf.np
+    for i, req_id in enumerate(input_batch.req_ids[:num_reqs]):
+        np_view[i] = mamba_state_idx.get(req_id, -1)
+    np_view[num_reqs:].fill(-1)
+    prev_last_scheduled_idx_buf.copy_to_gpu()
+
+
+def postprocess_mamba_align_gpu(
+    *,
+    bufs: "MambaBuffers",
+    num_reqs: int,
+    num_accepted_tokens_gpu: torch.Tensor,
+    num_accepted_tokens_cpu_tensor: torch.Tensor,
+    input_batch: GPUInputBatch,
+    kv_cache_config: KVCacheConfig,
+    forward_context: dict[str, Any],
+    mamba_state_copy_funcs: tuple[MambaStateCopyFunc, ...],
+) -> None:
+    """GPU-side mamba postprocess for spec decode + hybrid + align mode.
+
+    Lazily binds the fused-kernel context to the persistent block tables and
+    forward-context state pointers on the first call, runs the fused kernel,
+    and async-copies the per-request accepted-token counts back to the input
+    batch's CPU tensor for the next iteration's preprocess.
     """
-    num_scheduled_tokens_dict = scheduler_output.num_scheduled_tokens
-    scheduled_spec_decode_tokens_dict = scheduler_output.scheduled_spec_decode_tokens
-    num_accepted_tokens_cpu = input_batch.num_accepted_tokens_cpu
-    mamba_group_ids = copy_bufs.mamba_group_ids
-    mamba_spec = copy_bufs.mamba_spec
-    copy_bufs.offset = 0
-    for i, req_id in enumerate(input_batch.req_ids):
-        req_state = requests[req_id]
-        num_computed_tokens = req_state.num_computed_tokens
-        num_draft_tokens = len(scheduled_spec_decode_tokens_dict.get(req_id, []))
-        num_scheduled_tokens = num_scheduled_tokens_dict[req_id]
-        num_accepted_tokens = num_accepted_tokens_cpu[i]
-        num_tokens_running_state = (
-            num_computed_tokens + num_scheduled_tokens - num_draft_tokens
+    ctx = bufs.postprocess_align
+    # Caller is responsible for gating on spec decode + hybrid; this assert is
+    # a tripwire if those gates ever drift apart.
+    assert ctx is not None
+    assert ctx.mamba_state_idx_buf is not None
+    assert ctx.num_scheduled_tokens_buf is not None
+    assert ctx.num_computed_tokens_buf is not None
+    assert ctx.num_draft_tokens_buf is not None
+
+    if not ctx.is_initialized:
+        ctx.initialize_from_forward_context(
+            kv_cache_config,
+            forward_context,
+            mamba_state_copy_funcs,
+            [
+                input_batch.block_table[gid].get_device_tensor(num_reqs)
+                for gid in ctx.mamba_group_ids
+            ],
         )
-        new_num_computed_tokens = num_tokens_running_state + num_accepted_tokens - 1
-        aligned_new_computed_tokens = (
-            new_num_computed_tokens // mamba_spec.block_size * mamba_spec.block_size
+
+    ctx.run_fused_postprocess(
+        num_reqs=num_reqs,
+        num_accepted_tokens_gpu=num_accepted_tokens_gpu,
+        mamba_state_idx_gpu=ctx.mamba_state_idx_buf.gpu,
+        num_scheduled_tokens_gpu=ctx.num_scheduled_tokens_buf.gpu,
+        num_computed_tokens_gpu=ctx.num_computed_tokens_buf.gpu,
+        num_draft_tokens_gpu=ctx.num_draft_tokens_buf.gpu,
+    )
+
+    # ``num_accepted_tokens_out`` is pre-initialized from
+    # ``num_accepted_tokens_gpu``; the kernel only overwrites entries to 1
+    # when src_block_idx == dest_block_idx (copy within the same block), so
+    # the original count is preserved for everyone else.
+    num_accepted_tokens_cpu_tensor[:num_reqs].copy_(
+        ctx.num_accepted_tokens_out[:num_reqs], non_blocking=True
+    )
+
+
+def stage_postprocess_metadata_to_gpu(
+    scheduler_output: SchedulerOutput,
+    req_ids: list[str],
+    num_reqs: int,
+    requests: dict[str, CachedRequestState],
+    num_scheduled_tokens_buf: CpuGpuBuffer,
+    num_computed_tokens_buf: CpuGpuBuffer,
+    num_draft_tokens_buf: CpuGpuBuffer,
+) -> None:
+    """Stage per-request postprocess metadata into GPU buffers (non-blocking).
+
+    Walks ``req_ids[:num_reqs]`` in batch order and writes each request's
+    scheduled/computed/draft token counts into the matching pinned numpy
+    views, then issues three non-blocking H→D copies. These values don't
+    change between ``_prepare_inputs`` and ``_update_states_after_model_execute``.
+    The fused postprocess kernel indexes the resulting GPU tensors
+    by ``req_idx``.
+    """
+    scheduled_spec_tokens = scheduler_output.scheduled_spec_decode_tokens
+    num_scheduled = scheduler_output.num_scheduled_tokens
+    scheduled_np = num_scheduled_tokens_buf.np
+    computed_np = num_computed_tokens_buf.np
+    draft_np = num_draft_tokens_buf.np
+    for i in range(num_reqs):
+        req_id = req_ids[i]
+        scheduled_np[i] = num_scheduled[req_id]
+        computed_np[i] = requests[req_id].num_computed_tokens
+        draft_np[i] = len(scheduled_spec_tokens.get(req_id, []))
+    num_scheduled_tokens_buf.copy_to_gpu(num_reqs)
+    num_computed_tokens_buf.copy_to_gpu(num_reqs)
+    num_draft_tokens_buf.copy_to_gpu(num_reqs)
+
+
+def stage_mamba_state_idx_to_gpu(
+    mamba_state_idx: dict[str, int],
+    req_ids: list[str],
+    num_reqs: int,
+    gpu_buf: CpuGpuBuffer,
+) -> None:
+    """Materialize ``mamba_state_idx`` into ``gpu_buf`` and copy to GPU.
+
+    Walks ``req_ids[:num_reqs]`` in batch order, writing each request's block
+    index into the buffer's pinned numpy view, then issues a non-blocking H→D
+    copy. The fused kernel indexes the resulting GPU tensor by ``req_idx``.
+
+    Invariant: ``preprocess_mamba`` must have run first for the same batch so
+    that every ``req_ids[i]`` has an entry in ``mamba_state_idx``.
+    """
+    np_view = gpu_buf.np
+    for i in range(num_reqs):
+        req_id = req_ids[i]
+        state_idx = mamba_state_idx.get(req_id)
+        assert state_idx is not None, (
+            f"mamba_state_idx missing entry for {req_id!r}; "
+            "preprocess_mamba must run before stage_mamba_state_idx_to_gpu"
         )
-        # TODO: how to ensure all blocks that cache_blocks called are cached here?
-        if aligned_new_computed_tokens >= num_tokens_running_state:
-            accept_token_bias = aligned_new_computed_tokens - num_tokens_running_state
-            src_block_idx = mamba_state_idx[req_id]
-            dest_block_idx = aligned_new_computed_tokens // mamba_spec.block_size - 1
-            collect_mamba_copy_meta(
-                copy_bufs,
-                kv_cache_config,
-                mamba_state_copy_funcs,
-                mamba_group_ids,
-                src_block_idx,
-                dest_block_idx,
-                accept_token_bias,
-                req_state,
-                forward_context,
-            )
-            if src_block_idx == dest_block_idx:
-                num_accepted_tokens_cpu[i] = 1
-    do_mamba_copy_block(copy_bufs)
+        np_view[i] = state_idx
+    gpu_buf.copy_to_gpu(num_reqs)
+
+
+def stage_postprocess_inputs_to_gpu(
+    ctx: MambaSpecDecodeGPUContext,
+    scheduler_output: SchedulerOutput,
+    req_ids: list[str],
+    num_reqs: int,
+    requests: dict[str, CachedRequestState],
+    mamba_state_idx: dict[str, int],
+) -> None:
+    """Stage all per-request inputs the fused mamba postprocess kernel reads.
+
+    Bundles ``stage_mamba_state_idx_to_gpu`` and
+    ``stage_postprocess_metadata_to_gpu`` into a single call so the runner
+    has one entry point for postprocess staging. Buffers live on ``ctx``
+    and only exist when the postprocess kernel is enabled.
+    """
+    assert ctx.mamba_state_idx_buf is not None
+    assert ctx.num_scheduled_tokens_buf is not None
+    assert ctx.num_computed_tokens_buf is not None
+    assert ctx.num_draft_tokens_buf is not None
+    stage_mamba_state_idx_to_gpu(
+        mamba_state_idx,
+        req_ids,
+        num_reqs,
+        ctx.mamba_state_idx_buf,
+    )
+    stage_postprocess_metadata_to_gpu(
+        scheduler_output,
+        req_ids,
+        num_reqs,
+        requests,
+        ctx.num_scheduled_tokens_buf,
+        ctx.num_computed_tokens_buf,
+        ctx.num_draft_tokens_buf,
+    )
diff --git a/vllm/v1/worker/ubatch_utils.py b/vllm/v1/worker/ubatch_utils.py
index 7c41726472d5..f4a76529023c 100644
--- a/vllm/v1/worker/ubatch_utils.py
+++ b/vllm/v1/worker/ubatch_utils.py
@@ -177,7 +177,22 @@ def _make_metadata_with_slice(
         query_start_loc[1:] -= tokens_skipped
         query_start_loc_cpu[1:] -= tokens_skipped
     seq_lens = attn_metadata.seq_lens[request_slice]
-    seq_lens_cpu = attn_metadata.seq_lens_cpu[request_slice]
+    # Read raw fields to avoid triggering the deprecated D2H-syncing properties.
+    seq_lens_cpu = (
+        attn_metadata._seq_lens_cpu[request_slice]
+        if attn_metadata._seq_lens_cpu is not None
+        else None
+    )
+    seq_lens_cpu_upper_bound = (
+        attn_metadata.seq_lens_cpu_upper_bound[request_slice]
+        if attn_metadata.seq_lens_cpu_upper_bound is not None
+        else None
+    )
+    num_computed_tokens_cpu = (
+        attn_metadata._num_computed_tokens_cpu[request_slice]
+        if attn_metadata._num_computed_tokens_cpu is not None
+        else None
+    )
 
     if splits_last_request:
         # NOTE: We use start_locs (the original query_start_loc_cpu) to calculate
@@ -190,12 +205,18 @@ def _make_metadata_with_slice(
         # Make sure we don't modify the seq_lens tensors
         #  (not cudagraph compatible)
         seq_lens = seq_lens.clone()
-        seq_lens_cpu = seq_lens_cpu.clone()
         seq_lens[-1] -= tokens_skipped
-        seq_lens_cpu[-1] -= tokens_skipped
-
-    max_seq_len = int(seq_lens_cpu.max())
-    num_computed_tokens_cpu = attn_metadata.num_computed_tokens_cpu[request_slice]
+        if seq_lens_cpu is not None:
+            seq_lens_cpu = seq_lens_cpu.clone()
+            seq_lens_cpu[-1] -= tokens_skipped
+        if seq_lens_cpu_upper_bound is not None:
+            seq_lens_cpu_upper_bound = seq_lens_cpu_upper_bound.clone()
+            seq_lens_cpu_upper_bound[-1] -= tokens_skipped
+
+    assert seq_lens_cpu_upper_bound is not None
+    # Preserve the max_seq_len override set during CUDA-graph capture so
+    # the attention backend selects the correct kernel for SWA layers.
+    max_seq_len = max(int(seq_lens_cpu_upper_bound.max()), attn_metadata.max_seq_len)
 
     num_requests = request_slice.stop - request_slice.start
     num_actual_tokens = token_slice.stop - token_slice.start
@@ -221,6 +242,7 @@ def _make_metadata_with_slice(
         max_seq_len=max_seq_len,
         block_table_tensor=block_table_tensor,
         slot_mapping=slot_mapping,
+        seq_lens_cpu_upper_bound=seq_lens_cpu_upper_bound,
         _seq_lens_cpu=seq_lens_cpu,
         _num_computed_tokens_cpu=num_computed_tokens_cpu,
     )
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 83fc12cb5c3b..7cb1620c95ef 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -120,7 +120,7 @@ def init_meta(
 
         for group in attn_groups_iter:
             spec = group.kv_cache_spec
-            if type(spec) is not FullAttentionSpec:
+            if not isinstance(spec, FullAttentionSpec):
                 continue
             if group.kv_cache_group_id >= len(kernel_block_sizes):
                 continue
@@ -441,6 +441,9 @@ def add_kv_sharing_layers_to_kv_cache_groups(
             from the KV cache of `shared_kv_cache_layers[layer_name]`.
         kv_cache_groups: The KV cache groups of the model.
     """
+    if not shared_kv_cache_layers:
+        return
+
     layer_to_kv_cache_group: dict[str, KVCacheGroupSpec] = {}
     for kv_cache_group in kv_cache_groups:
         for layer_name in kv_cache_group.layer_names:
@@ -519,12 +522,8 @@ def is_residual_scattered_for_sp(
     """Check if the residual tensor is scattered for sequence parallelism.
 
     The residual tensor is scattered across tensor parallel ranks when sequence
-    parallelism and tensor parallelism is enabled.
-
-    This follows the same logic as SequenceParallelismPass.is_applicable_for_range():
-    - In full-graph compilation mode (no splitting ops or using inductor graph
-      partition), SP is always applied
-    - Otherwise, SP is only applied for specific shapes in compile_sizes
+    parallelism and tensor parallelism is enabled. SP is only supported in
+    full-graph compilation mode.
     """
     if not vllm_config.compilation_config.pass_config.enable_sp:
         return False
@@ -534,16 +533,13 @@ def is_residual_scattered_for_sp(
     if tp == 1:
         return False
 
+    assert (
+        vllm_config.compilation_config.use_inductor_graph_partition
+        or not vllm_config.compilation_config.splitting_ops
+    ), "Sequence parallelism requires full-graph compilation"
+
     # When sequence parallelism is enabled, we always pad num_input_tokens
     # to be a multiple of tensor_parallel_size (tp) earlier.
     assert num_input_tokens % tp == 0
 
-    if (
-        not vllm_config.compilation_config.splitting_ops
-        or vllm_config.compilation_config.use_inductor_graph_partition
-    ):
-        return True
-    compile_sizes = vllm_config.compilation_config.compile_sizes
-    if compile_sizes is None:
-        return False
-    return num_input_tokens in compile_sizes
+    return True
diff --git a/vllm/v1/worker/worker_base.py b/vllm/v1/worker/worker_base.py
index 041fff637b87..19bb18bd39f7 100644
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -2,11 +2,12 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from collections.abc import Callable
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
 
 import torch
 import torch.nn as nn
 
+import vllm.ir
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
@@ -30,6 +31,11 @@
 _R = TypeVar("_R")
 
 
+class CompilationTimes(NamedTuple):
+    language_model: float
+    encoder: float
+
+
 class WorkerBase:
     """Worker interface that allows vLLM to cleanly separate implementations for
     different hardware. Also abstracts control plane communication, e.g., to
@@ -82,15 +88,22 @@ def __init__(
         self.device: torch.device | None = None
         self.model_runner: nn.Module | None = None
 
+        # IR op priority and torch-wrap state are constant for the worker's
+        # lifetime.
+        vllm_config.kernel_config.ir_op_priority.set_default()
+        vllm.ir.set_default_torch_wrap(
+            vllm_config.compilation_config.ir_enable_torch_wrap
+        )
+
     def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]:
         """Get specifications for KV cache implementation."""
         raise NotImplementedError
 
-    def compile_or_warm_up_model(self) -> float:
+    def compile_or_warm_up_model(self) -> CompilationTimes:
         """Prepare model for execution through compilation/warmup.
 
         Returns:
-            The accumulated compilation time in seconds.
+            Compilation times (language_model, encoder) in seconds.
         """
         raise NotImplementedError
 
@@ -195,8 +208,8 @@ def __init__(
         All workers have rpc_rank=0, but they have different ranks in the TP
         group.
         """
-        self.rpc_rank = rpc_rank
-        self.global_rank = self.rpc_rank if global_rank is None else global_rank
+        self.rpc_rank: int = rpc_rank
+        self.global_rank: int = self.rpc_rank if global_rank is None else global_rank
 
         # Initialized after init_worker is called
         self.worker: WorkerBase
diff --git a/vllm/v1/worker/workspace.py b/vllm/v1/worker/workspace.py
index 28ba85a26248..1c502bfd8ff1 100644
--- a/vllm/v1/worker/workspace.py
+++ b/vllm/v1/worker/workspace.py
@@ -31,7 +31,7 @@ def _compute_bytes(shape: tuple[int, ...], dtype: torch.dtype) -> int:
 class WorkspaceManager:
     """Manager for workspace allocation.
 
-    Manages workspace buffers for DBO (Dual Batch Overlap) execution.
+    Manages one workspace buffer per active ubatch slot.
     Can be locked to prevent further growth during execution.
     """
 
@@ -39,7 +39,9 @@ def __init__(self, device: torch.device, num_ubatches: int | None = None):
         self._device = device
         # Cache num ubatches at init based on configuration (default to 1)
         self._num_ubatches = num_ubatches if num_ubatches is not None else 1
-        self._current_workspaces: list[torch.Tensor | None] = [None, None]
+        self._current_workspaces: list[torch.Tensor | None] = [
+            None
+        ] * self._num_ubatches
         self._locked: bool = False
 
     @staticmethod
@@ -159,36 +161,33 @@ def get_caller_info() -> str:
                     "Workspace growth is not allowed after locking."
                 )
 
-            for ubatch_id in range(self._num_ubatches):
-                current_workspace = self._current_workspaces[ubatch_id]
-                if (
-                    current_workspace is None
-                    or self._workspace_size_bytes(current_workspace) < required_bytes
-                ):
-                    # Delete old tensor before allocating new one to avoid
-                    # memory spike from resize_(). resize_() allocates new
-                    # memory before freeing old, which can cause OOM.
-                    # Must clear the list reference first since local var
-                    # is just a copy of the reference.
-                    self._current_workspaces[ubatch_id] = None
-                    del current_workspace
-                    self._current_workspaces[ubatch_id] = torch.empty(
-                        (required_bytes,), dtype=torch.uint8, device=self._device
-                    )
+            # Only resize the requesting ubatch's workspace.  Other
+            # ubatches resize lazily on their next get_simultaneous call.
+            # Resizing all ubatches here would orphan the other ubatch's
+            # old tensor when it still holds views into it (DBO leak).
+            self._current_workspaces[ubatch_id] = None
+            del current_workspace
+            # Release the freed segment back to CUDA so the caching
+            # allocator can reuse the GPU memory for the larger
+            # allocation below. Without this, each resize may leave a
+            # dead segment in reserved memory which can cause higher peak
+            # memory usage.
+            torch.accelerator.empty_cache()
+            self._current_workspaces[ubatch_id] = torch.empty(
+                (required_bytes,), dtype=torch.uint8, device=self._device
+            )
+            current_workspace = self._current_workspaces[ubatch_id]
 
             if envs.VLLM_DEBUG_WORKSPACE:
                 logger.info(
                     "[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> "
-                    "%.2f MB (%d ubatches, total memory %.2f MB)",
+                    "%.2f MB (ubatch %d)",
                     get_caller_info(),
                     current_size / _MB,
                     required_bytes / _MB,
-                    self._num_ubatches,
-                    required_bytes * self._num_ubatches / _MB,
+                    ubatch_id,
                 )
 
-            current_workspace = self._current_workspaces[dbo_current_ubatch_id()]
-
         return current_workspace
 
 
@@ -224,7 +223,7 @@ def init_workspace_manager(
 
     Args:
         device: The device to allocate workspace on.
-        num_ubatches: Number of micro-batches. Defaults to 1.
+        num_ubatches: Number of workspace ubatch slots. Defaults to 1.
     """
     global _manager
     if _manager is not None:
diff --git a/vllm/v1/worker/xpu_model_runner.py b/vllm/v1/worker/xpu_model_runner.py
index 68041c5b3a5f..f93d04395571 100644
--- a/vllm/v1/worker/xpu_model_runner.py
+++ b/vllm/v1/worker/xpu_model_runner.py
@@ -1,23 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from contextlib import contextmanager
-from typing import TYPE_CHECKING
 
 import torch
 
 from vllm.config import VllmConfig
-from vllm.logger import init_logger
 from vllm.utils.torch_utils import supports_xpu_graph
 from vllm.v1.worker.gpu.model_runner import (
     GPUModelRunner as GPUModelRunnerV2,
 )
 from vllm.v1.worker.gpu_model_runner import GPUModelRunner
 
-if TYPE_CHECKING:
-    pass
-
-logger = init_logger(__name__)
-
 
 class XPUModelRunner(GPUModelRunner):
     """A model runner for XPU devices."""
@@ -47,19 +40,16 @@ def __init__(
 
 @contextmanager
 def _torch_cuda_wrapper():
-    try:
-        # replace cuda APIs with xpu APIs, this should work by default
-        torch.cuda.Stream = torch.xpu.Stream
-        torch.cuda.default_stream = torch.xpu.current_stream
-        torch.cuda.current_stream = torch.xpu.current_stream
-        torch.cuda.stream = torch.xpu.stream
-        torch.cuda.mem_get_info = torch.xpu.mem_get_info
-        torch.cuda.Event = torch.Event
-        torch.cuda.set_stream = torch.xpu.set_stream
-        if supports_xpu_graph():
-            torch.cuda.graph = torch.xpu.graph
-            torch.cuda.CUDAGraph = torch.xpu.XPUGraph
-            torch.cuda.graph_pool_handle = torch.xpu.graph_pool_handle
-        yield
-    finally:
-        pass
+    # replace cuda APIs with xpu APIs, this should work by default
+    torch.cuda.Stream = torch.xpu.Stream
+    torch.cuda.default_stream = torch.xpu.current_stream
+    torch.cuda.current_stream = torch.xpu.current_stream
+    torch.cuda.stream = torch.xpu.stream
+    torch.cuda.mem_get_info = torch.xpu.mem_get_info
+    torch.cuda.Event = torch.Event
+    torch.cuda.set_stream = torch.xpu.set_stream
+    if supports_xpu_graph():
+        torch.cuda.graph = torch.xpu.graph
+        torch.cuda.CUDAGraph = torch.xpu.XPUGraph
+        torch.cuda.graph_pool_handle = torch.xpu.graph_pool_handle
+    yield
diff --git a/vllm/v1/worker/xpu_worker.py b/vllm/v1/worker/xpu_worker.py
index 4211059239df..555c60227861 100644
--- a/vllm/v1/worker/xpu_worker.py
+++ b/vllm/v1/worker/xpu_worker.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import gc
 import os
-from typing import Any
 
 import torch
 
@@ -40,19 +39,35 @@ def __init__(
         assert device_config.device_type == "xpu"
         assert current_platform.is_xpu()
 
-        # Torch profiler. Enabled and configured through profiler_config.
-        self.profiler: Any | None = None
-        profiler_config = vllm_config.profiler_config
-        if profiler_config.profiler == "torch":
-            worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
-            self.profiler = TorchProfilerWrapper(
-                profiler_config,
-                worker_name=worker_name,
-                local_rank=self.local_rank,
-                activities=["CPU", "XPU"],
+    def init_device(self):
+        # In DP mode, XPU workers see all visible devices.
+        # Offset local_rank by the local DP shard.
+        parallel_config = self.parallel_config
+        if (
+            parallel_config.distributed_executor_backend
+            not in ("ray", "external_launcher")
+            and parallel_config.data_parallel_backend != "ray"
+            and parallel_config.nnodes_within_dp == 1
+        ):
+            dp_local_rank = parallel_config.data_parallel_rank_local
+            if dp_local_rank is None:
+                dp_local_rank = parallel_config.data_parallel_index
+            tp_pp_world_size = (
+                parallel_config.pipeline_parallel_size
+                * parallel_config.tensor_parallel_size
+            )
+            self.local_rank += dp_local_rank * tp_pp_world_size
+
+            visible_device_count = torch.accelerator.device_count()
+            assert self.local_rank < visible_device_count, (
+                f"DP adjusted local rank {self.local_rank} is out of bounds. "
+            )
+            assert parallel_config.local_world_size <= visible_device_count, (
+                f"local_world_size ({parallel_config.local_world_size}) must "
+                f"be less than or equal to the number of visible devices "
+                f"({visible_device_count})."
             )
 
-    def init_device(self):
         device = self.device_config.device
         if (
             isinstance(device, torch.device)
@@ -86,7 +101,11 @@ def init_device(self):
         )
 
         # global all_reduce needed for overall oneccl warm up
-        torch.distributed.all_reduce(torch.zeros(1).xpu())
+        if torch.distributed.is_xccl_available():
+            torch.distributed.all_reduce(torch.zeros(1).xpu())
+
+        if self.use_v2_model_runner:
+            logger.info_once("Using V2 Model Runner")
 
         # Set random seed.
         set_random_seed(self.model_config.seed)
@@ -116,3 +135,30 @@ def init_device(self):
         if self.rank == 0:
             # If usage stat is enabled, collect relevant info.
             report_usage_stats(self.vllm_config)
+
+    def profile(self, is_start: bool = True, profile_prefix: str | None = None):
+        if self.profiler_config is None or self.profiler_config.profiler is None:
+            raise RuntimeError(
+                "Profiling is not enabled. Please set --profiler-config to enable "
+                "profiling. Example: "
+                "'--profiler-config.profiler=torch --profiler-config.torch_profiler_dir"
+                "=YOUR_DIR_PATH_TO_DUMP_TRACE'"
+            )
+
+        if is_start and self.profiler is None:
+            from vllm.distributed.utils import get_worker_rank_suffix
+
+            rank_suffix = get_worker_rank_suffix(global_rank=self.rank)
+            trace_name = (
+                f"{profile_prefix}_{rank_suffix}" if profile_prefix else rank_suffix
+            )
+
+            self.profiler = TorchProfilerWrapper(
+                self.profiler_config,
+                worker_name=trace_name,
+                local_rank=self.local_rank,
+                activities=["CPU", "XPU"],
+            )
+            logger.debug("Starting torch profiler with trace name: %s", trace_name)
+
+        super().profile(is_start=is_start, profile_prefix=profile_prefix)
diff --git a/vllm/vllm_flash_attn/__init__.py b/vllm/vllm_flash_attn/__init__.py
index 3507defabaea..7dea1f659b8c 100644
--- a/vllm/vllm_flash_attn/__init__.py
+++ b/vllm/vllm_flash_attn/__init__.py
@@ -1,7 +1,26 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from vllm.vllm_flash_attn.flash_attn_interface import (
+import importlib.machinery
+import os
+import sys
+import types
+
+# In symlink mode (VLLM_FLASH_ATTN_SRC_DIR), cute/ is a symlink to the real
+# source tree and its files use `flash_attn.cute.*` imports (not rewritten).
+# Register a virtual `flash_attn` package so those imports resolve.
+_cute_dir = os.path.join(os.path.dirname(__file__), "cute")
+if os.path.islink(_cute_dir) and "flash_attn" not in sys.modules:
+    _fa_mod = types.ModuleType("flash_attn")
+    _fa_mod.__path__ = [os.path.dirname(os.path.realpath(_cute_dir))]
+    _fa_mod.__package__ = "flash_attn"
+    _fa_mod.__spec__ = importlib.machinery.ModuleSpec(
+        "flash_attn", None, is_package=True
+    )
+    _fa_mod.__spec__.submodule_search_locations = _fa_mod.__path__
+    sys.modules["flash_attn"] = _fa_mod
+
+from vllm.vllm_flash_attn.flash_attn_interface import (  # noqa: E402
     FA2_AVAILABLE,
     FA3_AVAILABLE,
     fa_version_unsupported_reason,
diff --git a/vllm/vllm_flash_attn/flash_attn_interface.py b/vllm/vllm_flash_attn/flash_attn_interface.py
index 9d9a9be2f316..33955bb239ef 100644
--- a/vllm/vllm_flash_attn/flash_attn_interface.py
+++ b/vllm/vllm_flash_attn/flash_attn_interface.py
@@ -366,14 +366,7 @@ def flash_attn_varlen_func(
         )
     elif fa_version == 4:
         assert alibi_slopes is None, "Alibi is not supported in FA4"
-        # FA4 on SM90 doesn't support paged KV; SM100+ does
-        from vllm.platforms import current_platform
 
-        if block_table is not None and current_platform.is_device_capability_family(90):
-            raise NotImplementedError(
-                "FA4 with paged KV is not supported on SM90 (Hopper). "
-                "Use FA3 or upgrade to Blackwell (SM100+)."
-            )
         from vllm.vllm_flash_attn.cute.interface import _flash_attn_fwd
 
         out, softmax_lse = _flash_attn_fwd(
@@ -394,6 +387,7 @@ def flash_attn_varlen_func(
             num_splits=num_splits,
             return_lse=return_softmax_lse,
             out=out,
+            learnable_sink=s_aux,
         )
     else:
         raise ValueError(f"Unsupported FA version: {fa_version}")